aboutsummaryrefslogtreecommitdiffstatshomepage
path: root/tools/testing/selftests
diff options
context:
space:
mode:
Diffstat (limited to 'tools/testing/selftests')
-rw-r--r--tools/testing/selftests/.gitignore1
-rw-r--r--tools/testing/selftests/Makefile173
-rw-r--r--tools/testing/selftests/alsa/.gitignore3
-rw-r--r--tools/testing/selftests/alsa/Makefile27
-rw-r--r--tools/testing/selftests/alsa/alsa-local.h37
-rw-r--r--tools/testing/selftests/alsa/conf.c475
-rw-r--r--tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf84
-rw-r--r--tools/testing/selftests/alsa/mixer-test.c1112
-rw-r--r--tools/testing/selftests/alsa/pcm-test.c641
-rw-r--r--tools/testing/selftests/alsa/pcm-test.conf63
-rw-r--r--tools/testing/selftests/alsa/test-pcmtest-driver.c330
-rw-r--r--tools/testing/selftests/amd-pstate/Makefile18
-rwxr-xr-xtools/testing/selftests/amd-pstate/basic.sh38
-rw-r--r--tools/testing/selftests/amd-pstate/config1
-rwxr-xr-xtools/testing/selftests/amd-pstate/gitsource.sh359
-rwxr-xr-xtools/testing/selftests/amd-pstate/run.sh396
-rwxr-xr-xtools/testing/selftests/amd-pstate/tbench.sh339
-rw-r--r--tools/testing/selftests/android/Makefile39
-rw-r--r--tools/testing/selftests/android/config5
-rw-r--r--tools/testing/selftests/android/ion/Makefile20
-rw-r--r--tools/testing/selftests/android/ion/README101
-rw-r--r--tools/testing/selftests/android/ion/ion.h134
-rwxr-xr-xtools/testing/selftests/android/ion/ion_test.sh58
-rw-r--r--tools/testing/selftests/android/ion/ionapp_export.c127
-rw-r--r--tools/testing/selftests/android/ion/ionapp_import.c79
-rw-r--r--tools/testing/selftests/android/ion/ionmap_test.c136
-rw-r--r--tools/testing/selftests/android/ion/ionutils.c253
-rw-r--r--tools/testing/selftests/android/ion/ionutils.h55
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.c227
-rw-r--r--tools/testing/selftests/android/ion/ipcsocket.h35
-rwxr-xr-xtools/testing/selftests/android/run.sh3
-rw-r--r--tools/testing/selftests/arm64/Makefile15
-rw-r--r--tools/testing/selftests/arm64/abi/.gitignore4
-rw-r--r--tools/testing/selftests/arm64/abi/Makefile15
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c995
-rw-r--r--tools/testing/selftests/arm64/abi/ptrace.c271
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi-asm.S362
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.c565
-rw-r--r--tools/testing/selftests/arm64/abi/syscall-abi.h15
-rw-r--r--tools/testing/selftests/arm64/abi/tpidr2.c302
-rw-r--r--tools/testing/selftests/arm64/bti/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/bti/Makefile56
-rw-r--r--tools/testing/selftests/arm64/bti/assembler.h80
-rw-r--r--tools/testing/selftests/arm64/bti/btitest.h23
-rw-r--r--tools/testing/selftests/arm64/bti/signal.c37
-rw-r--r--tools/testing/selftests/arm64/bti/signal.h21
-rw-r--r--tools/testing/selftests/arm64/bti/start.S14
-rw-r--r--tools/testing/selftests/arm64/bti/syscall.S23
-rw-r--r--tools/testing/selftests/arm64/bti/system.c20
-rw-r--r--tools/testing/selftests/arm64/bti/system.h28
-rw-r--r--tools/testing/selftests/arm64/bti/test.c229
-rw-r--r--tools/testing/selftests/arm64/bti/teststubs.S39
-rw-r--r--tools/testing/selftests/arm64/bti/trampoline.S29
-rw-r--r--tools/testing/selftests/arm64/fp/.gitignore17
-rw-r--r--tools/testing/selftests/arm64/fp/Makefile53
-rw-r--r--tools/testing/selftests/arm64/fp/README100
-rw-r--r--tools/testing/selftests/arm64/fp/TODO7
-rw-r--r--tools/testing/selftests/arm64/fp/asm-offsets.h12
-rw-r--r--tools/testing/selftests/arm64/fp/asm-utils.S172
-rw-r--r--tools/testing/selftests/arm64/fp/assembler.h68
-rw-r--r--tools/testing/selftests/arm64/fp/fp-pidbench.S70
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace-asm.S279
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.c1503
-rw-r--r--tools/testing/selftests/arm64/fp/fp-ptrace.h13
-rw-r--r--tools/testing/selftests/arm64/fp/fp-stress.c649
-rwxr-xr-xtools/testing/selftests/arm64/fp/fpsimd-stress60
-rw-r--r--tools/testing/selftests/arm64/fp/fpsimd-test.S332
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sme.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl-sve.c14
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.S20
-rw-r--r--tools/testing/selftests/arm64/fp/rdvl.h9
-rw-r--r--tools/testing/selftests/arm64/fp/sme-inst.h71
-rw-r--r--tools/testing/selftests/arm64/fp/ssve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-probe-vls.c63
-rw-r--r--tools/testing/selftests/arm64/fp/sve-ptrace.c764
-rwxr-xr-xtools/testing/selftests/arm64/fp/sve-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/sve-test.S584
-rw-r--r--tools/testing/selftests/arm64/fp/vec-syscfg.c797
-rw-r--r--tools/testing/selftests/arm64/fp/vlset.c161
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork-asm.S61
-rw-r--r--tools/testing/selftests/arm64/fp/za-fork.c100
-rw-r--r--tools/testing/selftests/arm64/fp/za-ptrace.c366
-rw-r--r--tools/testing/selftests/arm64/fp/za-stress59
-rw-r--r--tools/testing/selftests/arm64/fp/za-test.S403
-rw-r--r--tools/testing/selftests/arm64/fp/zt-ptrace.c365
-rw-r--r--tools/testing/selftests/arm64/fp/zt-test.S321
-rw-r--r--tools/testing/selftests/arm64/mte/.gitignore8
-rw-r--r--tools/testing/selftests/arm64/mte/Makefile41
-rw-r--r--tools/testing/selftests/arm64/mte/check_buffer_fill.c478
-rw-r--r--tools/testing/selftests/arm64/mte/check_child_memory.c198
-rw-r--r--tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c143
-rw-r--r--tools/testing/selftests/arm64/mte/check_ksm_options.c166
-rw-r--r--tools/testing/selftests/arm64/mte/check_mmap_options.c265
-rw-r--r--tools/testing/selftests/arm64/mte/check_prctl.c119
-rw-r--r--tools/testing/selftests/arm64/mte/check_tags_inclusion.c200
-rw-r--r--tools/testing/selftests/arm64/mte/check_user_mem.c243
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.c368
-rw-r--r--tools/testing/selftests/arm64/mte/mte_common_util.h129
-rw-r--r--tools/testing/selftests/arm64/mte/mte_def.h60
-rw-r--r--tools/testing/selftests/arm64/mte/mte_helper.S130
-rw-r--r--tools/testing/selftests/arm64/pauth/.gitignore2
-rw-r--r--tools/testing/selftests/arm64/pauth/Makefile39
-rw-r--r--tools/testing/selftests/arm64/pauth/exec_target.c34
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.c39
-rw-r--r--tools/testing/selftests/arm64/pauth/helper.h28
-rw-r--r--tools/testing/selftests/arm64/pauth/pac.c370
-rw-r--r--tools/testing/selftests/arm64/pauth/pac_corruptor.S19
-rw-r--r--tools/testing/selftests/arm64/signal/.gitignore7
-rw-r--r--tools/testing/selftests/arm64/signal/Makefile13
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.c4
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals.h13
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.c129
-rw-r--r--tools/testing/selftests/arm64/signal/test_signals_utils.h34
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/TODO1
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c2
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c92
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c94
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c82
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c38
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c45
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c36
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sme_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_regs.c132
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c161
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_regs.c121
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/sve_vl.c68
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.c177
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/testcases.h14
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c86
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c90
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_no_regs.c119
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/za_regs.c138
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c51
-rw-r--r--tools/testing/selftests/arm64/signal/testcases/zt_regs.c86
-rw-r--r--tools/testing/selftests/arm64/tags/Makefile2
-rw-r--r--tools/testing/selftests/bpf/.gitignore29
-rw-r--r--tools/testing/selftests/bpf/DENYLIST7
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.aarch6414
-rw-r--r--tools/testing/selftests/bpf/DENYLIST.s390x8
-rw-r--r--tools/testing/selftests/bpf/Makefile637
-rw-r--r--tools/testing/selftests/bpf/Makefile.docs83
-rw-r--r--tools/testing/selftests/bpf/README.rst272
-rw-r--r--tools/testing/selftests/bpf/autoconf_helper.h9
-rw-r--r--tools/testing/selftests/bpf/bench.c333
-rw-r--r--tools/testing/selftests/bpf/bench.h38
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c477
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c89
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c277
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_bpf_loop.c100
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_count.c16
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_htab_mem.c351
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage.c282
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_create.c258
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c263
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_rename.c50
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_ringbufs.c40
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_strncmp.c156
-rw-r--r--tools/testing/selftests/bpf/benchs/bench_trigger.c286
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh45
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh11
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh15
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh40
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage.sh24
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh11
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_rename.sh2
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh54
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_strncmp.sh12
-rwxr-xr-xtools/testing/selftests/bpf/benchs/run_bench_uprobes.sh9
-rw-r--r--tools/testing/selftests/bpf/benchs/run_common.sh92
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_alloc.h67
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_common.h70
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_htab.h100
-rw-r--r--tools/testing/selftests/bpf/bpf_arena_list.h92
-rw-r--r--tools/testing/selftests/bpf/bpf_experimental.h462
-rw-r--r--tools/testing/selftests/bpf/bpf_kfuncs.h78
-rw-r--r--tools/testing/selftests/bpf/bpf_legacy.h40
-rw-r--r--tools/testing/selftests/bpf/bpf_rlimit.h28
-rw-r--r--tools/testing/selftests/bpf/bpf_sockopt_helpers.h21
-rw-r--r--tools/testing/selftests/bpf/bpf_tcp_helpers.h68
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile19
-rw-r--r--tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c84
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/.gitignore6
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/Makefile20
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h57
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c674
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h96
-rw-r--r--tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h109
-rw-r--r--tools/testing/selftests/bpf/bpf_util.h19
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.c292
-rw-r--r--tools/testing/selftests/bpf/btf_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.c67
-rw-r--r--tools/testing/selftests/bpf/cap_helpers.h19
-rw-r--r--tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h25
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.c533
-rw-r--r--tools/testing/selftests/bpf/cgroup_helpers.h31
-rw-r--r--tools/testing/selftests/bpf/cgroup_tcp_skb.h35
-rw-r--r--tools/testing/selftests/bpf/config115
-rw-r--r--tools/testing/selftests/bpf/config.aarch64167
-rw-r--r--tools/testing/selftests/bpf/config.s390x137
-rw-r--r--tools/testing/selftests/bpf/config.vm12
-rw-r--r--tools/testing/selftests/bpf/config.x86_64233
l---------tools/testing/selftests/bpf/disasm.c1
l---------tools/testing/selftests/bpf/disasm.h1
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.c18
-rw-r--r--tools/testing/selftests/bpf/flow_dissector_load.h7
-rwxr-xr-xtools/testing/selftests/bpf/generate_udp_fragments.py90
-rw-r--r--tools/testing/selftests/bpf/get_cgroup_id_user.c38
-rw-r--r--tools/testing/selftests/bpf/gnu/stubs.h2
-rwxr-xr-xtools/testing/selftests/bpf/ima_setup.sh156
-rw-r--r--tools/testing/selftests/bpf/ip_check_defrag_frags.h57
l---------tools/testing/selftests/bpf/json_writer.c1
l---------tools/testing/selftests/bpf/json_writer.h1
-rw-r--r--tools/testing/selftests/bpf/liburandom_read.map15
-rw-r--r--tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c118
-rw-r--r--tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c15
-rw-r--r--tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c155
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c252
-rw-r--r--tools/testing/selftests/bpf/map_tests/map_percpu_stats.c470
-rw-r--r--tools/testing/selftests/bpf/map_tests/sk_storage_map.c88
-rw-r--r--tools/testing/selftests/bpf/map_tests/task_storage_map.c127
-rw-r--r--tools/testing/selftests/bpf/netcnt_common.h38
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.c358
-rw-r--r--tools/testing/selftests/bpf/netlink_helpers.h46
-rw-r--r--tools/testing/selftests/bpf/network_helpers.c468
-rw-r--r--tools/testing/selftests/bpf/network_helpers.h85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/access_variable_array.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/align.c285
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_htab.c88
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arena_list.c68
-rw-r--r--tools/testing/selftests/bpf/prog_tests/arg_parsing.c175
-rw-r--r--tools/testing/selftests/bpf/prog_tests/assign_reuse.c199
-rw-r--r--tools/testing/selftests/bpf/prog_tests/async_stack_depth.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomic_bounds.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/atomics.c198
-rw-r--r--tools/testing/selftests/bpf/prog_tests/attach_probe.c354
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoattach.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/autoload.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c67
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bind_perm.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c213
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_cookie.c606
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter.c1484
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c226
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c100
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_loop.c207
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c206
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_nf.c182
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c232
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c269
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c439
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c268
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf.c (renamed from tools/testing/selftests/bpf/test_btf.c)2255
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c453
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_dump.c817
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_endian.c99
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c153
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_module.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_split.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_tag.c249
-rw-r--r--tools/testing/selftests/bpf/prog_tests/btf_write.c506
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cb_refs.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c393
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c158
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c6
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c549
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c339
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_iter.c333
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_link.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c344
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c79
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c109
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c361
-rw-r--r--tools/testing/selftests/bpf/prog_tests/check_mtu.c206
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cls_redirect.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_force_port.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/connect_ping.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_autosize.c223
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_extern.c17
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_read_macros.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_reloc.c781
-rw-r--r--tools/testing/selftests/bpf/prog_tests/core_retro.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/cpumask.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c873
-rw-r--r--tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c176
-rw-r--r--tools/testing/selftests/bpf/prog_tests/d_path.c208
-rw-r--r--tools/testing/selftests/bpf/prog_tests/decap_sanity.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/deny_namespace.c102
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c161
-rw-r--r--tools/testing/selftests/bpf/prog_tests/dynptr.c110
-rw-r--r--tools/testing/selftests/bpf/prog_tests/empty_skb.c149
-rw-r--r--tools/testing/selftests/bpf/prog_tests/enable_stats.c2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/endian.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exceptions.c409
-rw-r--r--tools/testing/selftests/bpf/prog_tests/exhandler.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_fexit.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fentry_test.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c582
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_sleep.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_stress.c79
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fexit_test.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fib_lookup.c303
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fill_link_info.c622
-rw-r--r--tools/testing/selftests/bpf/prog_tests/find_vma.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c26
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c154
-rw-r--r--tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c142
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c130
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_args_test.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c42
-rw-r--r--tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data.c55
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_data_init.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_args.c62
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/global_map_resize.c235
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hash_large_key.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/hashmap.c199
-rw-r--r--tools/testing/selftests/bpf/prog_tests/helper_restricted.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_reuse.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/htab_update.c126
-rw-r--r--tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c31
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c283
-rw-r--r--tools/testing/selftests/bpf/prog_tests/iters.c316
-rw-r--r--tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c28
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfree_skb.c92
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_call.c322
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c119
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c541
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c52
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_btf.c191
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ksyms_module.c69
-rw-r--r--tools/testing/selftests/bpf/prog_tests/l4lb_all.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/legacy_printk.c65
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c87
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_probes.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/libbpf_str.c225
-rw-r--r--tools/testing/selftests/bpf/prog_tests/link_pinning.c7
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_funcs.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_list.c795
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_maps.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/linked_vars.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c147
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_buf.c276
-rw-r--r--tools/testing/selftests/bpf/prog_tests/log_fixup.c181
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c291
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lookup_key.c112
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lru_bug.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c323
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_helpers.h138
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_redirect.c332
-rw-r--r--tools/testing/selftests/bpf/prog_tests/lwt_reroute.c263
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_btf.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_in_map.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_init.c214
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_kptr.c163
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lock.c21
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ops.c162
-rw-r--r--tools/testing/selftests/bpf/prog_tests/map_ptr.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/metadata.c141
-rw-r--r--tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c559
-rw-r--r--tools/testing/selftests/bpf/prog_tests/missed.c138
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mmap.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/modify_return.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_attach.c111
-rw-r--r--tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c133
-rw-r--r--tools/testing/selftests/bpf/prog_tests/mptcp.c331
-rw-r--r--tools/testing/selftests/bpf/prog_tests/nested_trust.c12
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netcnt.c82
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/netns_cookie.c80
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c119
-rw-r--r--tools/testing/selftests/bpf/prog_tests/obj_name.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c93
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c66
-rw-r--r--tools/testing/selftests/bpf/prog_tests/percpu_alloc.c128
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_branches.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_buffer.c113
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c116
-rw-r--r--tools/testing/selftests/bpf/prog_tests/perf_link.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pinning.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_access.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/pkt_md_access.c18
-rw-r--r--tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c71
-rw-r--r--tools/testing/selftests/bpf/prog_tests/probe_user.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_array_init.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_opts.c77
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c36
-rw-r--r--tools/testing/selftests/bpf/prog_tests/queue_stack_map.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c86
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c14
-rw-r--r--tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rbtree.c142
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c154
-rw-r--r--tools/testing/selftests/bpf/prog_tests/rdonly_maps.c11
-rw-r--r--tools/testing/selftests/bpf/prog_tests/read_vsyscall.c57
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursion.c41
-rw-r--r--tools/testing/selftests/bpf/prog_tests/recursive_attach.c151
-rw-r--r--tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reference_tracking.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/reg_bounds.c2131
-rw-r--r--tools/testing/selftests/bpf/prog_tests/resolve_btfids.c167
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c53
-rw-r--r--tools/testing/selftests/bpf/prog_tests/section_names.c47
-rw-r--r--tools/testing/selftests/bpf/prog_tests/select_reuseport.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal.c89
-rw-r--r--tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c3
-rw-r--r--tools/testing/selftests/bpf/prog_tests/setget_sockopt.c198
-rw-r--r--tools/testing/selftests/bpf/prog_tests/signal_pending.c25
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_assign.c34
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_lookup.c1413
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c56
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c135
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_ctx.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_helpers.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c45
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c54
-rw-r--r--tools/testing/selftests/bpf/prog_tests/skeleton.c58
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf.c127
-rw-r--r--tools/testing/selftests/bpf/prog_tests/snprintf_btf.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_addr.c612
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_destroy.c221
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_fields.c402
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c135
-rw-r--r--tools/testing/selftests/bpf/prog_tests/socket_cookie.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_basic.c743
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h416
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_listen.c1060
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt.c234
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c91
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_multi.c123
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockopt_sk.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spin_lock.c171
-rw-r--r--tools/testing/selftests/bpf/prog_tests/spinlock.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stack_var_off.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c27
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map.c10
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c13
-rw-r--r--tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c63
-rw-r--r--tools/testing/selftests/bpf/prog_tests/static_linked.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c159
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs.c96
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subprogs_extable.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/subskeleton.c78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/syscall.c81
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tailcalls.c993
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c4
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_kfunc.c98
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_local_storage.c240
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_pt_regs.c50
-rw-r--r--tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c60
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_bpf.c429
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_helpers.h78
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_links.c1901
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c687
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_opts.c2814
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_redirect.c1290
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c150
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_estats.c8
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c563
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcp_rtt.c38
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c137
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c90
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bpffs.c164
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c108
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_global_funcs.c210
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ima.c240
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c139
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_local_storage.c172
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_lsm.c64
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_overhead.c48
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_profiler.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c44
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_strncmp.c148
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c46
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c101
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c35
-rw-r--r--tools/testing/selftests/bpf/prog_tests/test_tunnel.c722
-rw-r--r--tools/testing/selftests/bpf/prog_tests/time_tai.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer.c97
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_crash.c32
-rw-r--r--tools/testing/selftests/bpf/prog_tests/timer_mim.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/token.c1052
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tp_attach_query.c20
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_ext.c115
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_printk.c76
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trace_vprintk.c72
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_failure.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tracing_struct.c85
-rw-r--r--tools/testing/selftests/bpf/prog_tests/trampoline_count.c155
-rw-r--r--tools/testing/selftests/bpf/prog_tests/type_cast.c114
-rw-r--r--tools/testing/selftests/bpf/prog_tests/udp_limit.c70
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uninit_stack.c9
-rw-r--r--tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c316
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe.c95
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c74
-rw-r--r--tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c588
-rw-r--r--tools/testing/selftests/bpf/prog_tests/usdt.c420
-rw-r--r--tools/testing/selftests/bpf/prog_tests/user_ringbuf.c696
-rw-r--r--tools/testing/selftests/bpf/prog_tests/varlen.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verif_stats.c29
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier.c246
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verifier_log.c450
-rw-r--r--tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c565
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vmlinux.c16
-rw-r--r--tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c312
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp.c43
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c146
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c275
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_attach.c115
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bonding.c691
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c146
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c105
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c121
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c61
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c106
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c248
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_info.c30
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_link.c152
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_metadata.c525
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_noinline.c75
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_perf.c23
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c178
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdpwall.c15
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xfrm_info.c347
-rw-r--r--tools/testing/selftests/bpf/progs/access_map_in_map.c93
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab.c48
-rw-r--r--tools/testing/selftests/bpf/progs/arena_htab_asm.c5
-rw-r--r--tools/testing/selftests/bpf/progs/arena_list.c87
-rw-r--r--tools/testing/selftests/bpf/progs/async_stack_depth.c61
-rw-r--r--tools/testing/selftests/bpf/progs/atomic_bounds.c24
-rw-r--r--tools/testing/selftests/bpf/progs/atomics.c170
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops.c25
-rw-r--r--tools/testing/selftests/bpf/progs/bad_struct_ops2.c14
-rw-r--r--tools/testing/selftests/bpf/progs/bench_local_storage_create.c82
-rw-r--r--tools/testing/selftests/bpf/progs/bind4_prog.c159
-rw-r--r--tools/testing/selftests/bpf/progs/bind6_prog.c176
-rw-r--r--tools/testing/selftests/bpf/progs/bind_perm.c45
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_bench.c154
-rw-r--r--tools/testing/selftests/bpf/progs/bloom_filter_map.c83
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_compiler.h33
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_cubic.c53
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp.c58
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_dctcp_release.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_flow.c34
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c40
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c63
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter.h167
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c59
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c123
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c46
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c50
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c65
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c52
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c26
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_ksym.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_netlink.c29
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c71
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c60
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c49
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_file.c38
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c68
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c62
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tasks.c88
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c233
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c249
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c17
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c23
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c35
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c21
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h18
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp4.c70
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_udp6.c78
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_unix.c80
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c37
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop.c225
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_loop_bench.c30
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h138
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_mod_race.c100
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_syscall_macro.c110
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c19
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h122
-rw-r--r--tools/testing/selftests/bpf/progs/bprm_opts.c34
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c4
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c3
-rw-r--r--tools/testing/selftests/bpf/progs/btf_data.c50
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c12
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c84
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c173
-rw-r--r--tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c57
-rw-r--r--tools/testing/selftests/bpf/progs/btf_ptr.h27
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag.c25
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c67
-rw-r--r--tools/testing/selftests/bpf/progs/btf_type_tag_user.c40
-rw-r--r--tools/testing/selftests/bpf/progs/cb_refs.c114
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi.h13
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c33
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c57
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c58
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c16
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c69
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c155
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_iter.c39
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c382
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h79
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c247
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c223
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c100
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_negative.c26
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c94
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c126
-rw-r--r--tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c126
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_dropper.c26
-rw-r--r--tools/testing/selftests/bpf/progs/connect4_prog.c55
-rw-r--r--tools/testing/selftests/bpf/progs/connect6_prog.c2
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port4.c9
-rw-r--r--tools/testing/selftests/bpf/progs/connect_force_port6.c9
-rw-r--r--tools/testing/selftests/bpf/progs/connect_ping.c53
-rw-r--r--tools/testing/selftests/bpf/progs/connect_unix_prog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern.c120
-rw-r--r--tools/testing/selftests/bpf/progs/core_kern_overflow.c22
-rw-r--r--tools/testing/selftests/bpf/progs/core_reloc_types.h649
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_common.h123
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_failure.c192
-rw-r--r--tools/testing/selftests/bpf/progs/cpumask_success.c527
-rw-r--r--tools/testing/selftests/bpf/progs/decap_sanity.c68
-rw-r--r--tools/testing/selftests/bpf/progs/dev_cgroup.c1
-rw-r--r--tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c27
-rw-r--r--tools/testing/selftests/bpf/progs/dummy_st_ops_success.c47
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_fail.c1688
-rw-r--r--tools/testing/selftests/bpf/progs/dynptr_success.c546
-rw-r--r--tools/testing/selftests/bpf/progs/empty_skb.c37
-rw-r--r--tools/testing/selftests/bpf/progs/err.h18
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions.c368
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_assert.c135
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_ext.c72
-rw-r--r--tools/testing/selftests/bpf/progs/exceptions_fail.c349
-rw-r--r--tools/testing/selftests/bpf/progs/exhandler_kern.c52
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_many_args.c39
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_recursive.c14
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_recursive_target.c25
-rw-r--r--tools/testing/selftests/bpf/progs/fentry_test.c2
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c37
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_many_args.c40
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_sleep.c32
-rw-r--r--tools/testing/selftests/bpf/progs/fexit_test.c4
-rw-r--r--tools/testing/selftests/bpf/progs/fib_lookup.c22
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma.c69
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/find_vma_fail2.c29
-rw-r--r--tools/testing/selftests/bpf/progs/fmod_ret_freplace.c14
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_array_map_elem.c73
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c95
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c27
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_attach_probe.c40
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_cls_redirect.c34
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c19
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_dead_global_func.c11
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_get_constant.c15
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_global_func.c18
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_progmap.c24
-rw-r--r--tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c20
-rw-r--r--tools/testing/selftests/bpf/progs/get_branch_snapshot.c40
-rw-r--r--tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c1
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_args_test.c123
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_test.c106
-rw-r--r--tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c18
-rw-r--r--tools/testing/selftests/bpf/progs/getpeername_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/getsockname_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/htab_mem_bench.c105
-rw-r--r--tools/testing/selftests/bpf/progs/htab_reuse.c19
-rw-r--r--tools/testing/selftests/bpf/progs/htab_update.c29
-rw-r--r--tools/testing/selftests/bpf/progs/ima.c103
-rw-r--r--tools/testing/selftests/bpf/progs/inner_array_lookup.c45
-rw-r--r--tools/testing/selftests/bpf/progs/ip_check_defrag.c104
-rw-r--r--tools/testing/selftests/bpf/progs/iters.c1437
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css.c72
-rw-r--r--tools/testing/selftests/bpf/progs/iters_css_task.c102
-rw-r--r--tools/testing/selftests/bpf/progs/iters_looping.c163
-rw-r--r--tools/testing/selftests/bpf/progs/iters_num.c242
-rw-r--r--tools/testing/selftests/bpf/progs/iters_state_safety.c426
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task.c51
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_failure.c105
-rw-r--r--tools/testing/selftests/bpf/progs/iters_task_vma.c43
-rw-r--r--tools/testing/selftests/bpf/progs/iters_testmod_seq.c79
-rw-r--r--tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c42
-rw-r--r--tools/testing/selftests/bpf/progs/jit_probe_mem.c59
-rw-r--r--tools/testing/selftests/bpf/progs/kfree_skb.c8
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_destructive.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_fail.c153
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_race.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test.c180
-rw-r--r--tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c37
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi.c162
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_empty.c12
-rw-r--r--tools/testing/selftests/bpf/progs/kprobe_multi_override.c13
-rw-r--r--tools/testing/selftests/bpf/progs/kptr_xchg_inline.c48
-rw-r--r--tools/testing/selftests/bpf/progs/ksym_race.c13
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs1.c89
-rw-r--r--tools/testing/selftests/bpf/progs/linked_funcs2.c89
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.c381
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list.h56
-rw-r--r--tools/testing/selftests/bpf/progs/linked_list_fail.c612
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps1.c82
-rw-r--r--tools/testing/selftests/bpf/progs/linked_maps2.c76
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars1.c54
-rw-r--r--tools/testing/selftests/bpf/progs/linked_vars2.c55
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash.c259
-rw-r--r--tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c85
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage.c226
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_bench.c104
-rw-r--r--tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c67
-rw-r--r--tools/testing/selftests/bpf/progs/loop3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/loop4.c4
-rw-r--r--tools/testing/selftests/bpf/progs/loop5.c1
-rw-r--r--tools/testing/selftests/bpf/progs/loop6.c102
-rw-r--r--tools/testing/selftests/bpf/progs/lru_bug.c49
-rw-r--r--tools/testing/selftests/bpf/progs/lsm.c139
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup.c188
-rw-r--r--tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c14
-rw-r--r--tools/testing/selftests/bpf/progs/map_in_map_btf.c73
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr.c530
-rw-r--r--tools/testing/selftests/bpf/progs/map_kptr_fail.c388
-rw-r--r--tools/testing/selftests/bpf/progs/map_percpu_stats.c24
-rw-r--r--tools/testing/selftests/bpf/progs/map_ptr_kern.c703
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_unused.c15
-rw-r--r--tools/testing/selftests/bpf/progs/metadata_used.c15
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe.c30
-rw-r--r--tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c48
-rw-r--r--tools/testing/selftests/bpf/progs/missed_tp_recursion.c41
-rw-r--r--tools/testing/selftests/bpf/progs/modify_return.c40
-rw-r--r--tools/testing/selftests/bpf/progs/mptcp_sock.c88
-rw-r--r--tools/testing/selftests/bpf/progs/mptcpify.c20
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_common.h12
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_failure.c49
-rw-r--r--tools/testing/selftests/bpf/progs/nested_trust_success.c34
-rw-r--r--tools/testing/selftests/bpf/progs/netcnt_prog.c10
-rw-r--r--tools/testing/selftests/bpf/progs/netif_receive_skb.c255
-rw-r--r--tools/testing/selftests/bpf/progs/netns_cookie_prog.c84
-rw-r--r--tools/testing/selftests/bpf/progs/normal_map_btf.c56
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_array.c190
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c109
-rw-r--r--tools/testing/selftests/bpf/progs/percpu_alloc_fail.c182
-rw-r--r--tools/testing/selftests/bpf/progs/perf_event_stackmap.c59
-rw-r--r--tools/testing/selftests/bpf/progs/perfbuf_bench.c4
-rw-r--r--tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c106
-rw-r--r--tools/testing/selftests/bpf/progs/priv_map.c13
-rw-r--r--tools/testing/selftests/bpf/progs/priv_prog.c13
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.h177
-rw-r--r--tools/testing/selftests/bpf/progs/profiler.inc.h963
-rw-r--r--tools/testing/selftests/bpf/progs/profiler1.c5
-rw-r--r--tools/testing/selftests/bpf/progs/profiler2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/profiler3.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf.h99
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf180.c22
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600.c11
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c6
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_iter.c7
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf600_nounroll.c3
-rw-r--r--tools/testing/selftests/bpf/progs/pyperf_subprogs.c5
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree.c246
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c52
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c38
-rw-r--r--tools/testing/selftests/bpf/progs/rbtree_fail.c303
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_read_lock.c441
-rw-r--r--tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c36
-rw-r--r--tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c38
-rw-r--r--tools/testing/selftests/bpf/progs/read_vsyscall.c45
-rw-r--r--tools/testing/selftests/bpf/progs/recursion.c43
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg4_prog.c40
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg6_prog.c46
-rw-r--r--tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c38
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr.c571
-rw-r--r--tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c121
-rw-r--r--tools/testing/selftests/bpf/progs/ringbuf_bench.c3
-rw-r--r--tools/testing/selftests/bpf/progs/sample_map_ret0.c24
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg4_prog.c7
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg6_prog.c7
-rw-r--r--tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c39
-rw-r--r--tools/testing/selftests/bpf/progs/setget_sockopt.c403
-rw-r--r--tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c59
-rw-r--r--tools/testing/selftests/bpf/progs/skb_load_bytes.c19
-rw-r--r--tools/testing/selftests/bpf/progs/skb_pkt_end.c53
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog.c145
-rw-r--r--tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c22
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c91
-rw-r--r--tools/testing/selftests/bpf/progs/socket_cookie_prog.c47
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_parse_prog.c4
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c14
-rw-r--r--tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c18
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_inherit.c19
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_multi.c31
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c47
-rw-r--r--tools/testing/selftests/bpf/progs/sockopt_sk.c108
-rw-r--r--tools/testing/selftests/bpf/progs/stacktrace_map_skip.c68
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_bench.c50
-rw-r--r--tools/testing/selftests/bpf/progs/strncmp_test.c54
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta.h194
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c9
-rw-r--r--tools/testing/selftests/bpf/progs/strobemeta_subprogs.c10
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate.c52
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c32
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c29
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c24
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_module.c56
-rw-r--r--tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c102
-rw-r--r--tools/testing/selftests/bpf/progs/syscall.c207
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall1.c31
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall2.c37
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall3.c13
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall4.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall5.c9
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall6.c34
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c37
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c40
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c71
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c78
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c45
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c18
-rw-r--r--tools/testing/selftests/bpf/progs/tailcall_poke.c32
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_common.h76
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_failure.c326
-rw-r--r--tools/testing/selftests/bpf/progs/task_kfunc_success.c316
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage.c64
-rw-r--r--tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c35
-rw-r--r--tools/testing/selftests/bpf/progs/task_ls_recursion.c90
-rw-r--r--tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c47
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c35
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c21
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_update.c80
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c71
-rw-r--r--tools/testing/selftests/bpf/progs/tcp_rtt.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_access_variable_array.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_assign_reuse.c142
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe.c119
-rw-r--r--tools/testing/selftests/bpf/progs/test_attach_probe_manual.c53
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoattach.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_autoload.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_cookie.c121
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_ma.c289
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf.c237
-rw-r--r--tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c148
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_decl_tag.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_haskv.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_map_in_map.c80
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_newkv.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_nokv.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c174
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_cgroup_link.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_check_mtu.c290
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.c131
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect.h9
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c981
-rw-r--r--tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_autosize.c182
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_extern.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_read_macros.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c1
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_existence.c11
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_mods.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_module.c104
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_size.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c157
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c115
-rw-r--r--tools/testing/selftests/bpf/progs/test_core_retro.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c63
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path.c65
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_d_path_check_types.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_deny_namespace.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_enable_stats.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_endian.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_fill_link_info.c54
-rw-r--r--tools/testing/selftests/bpf/progs/test_fsverity.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_get_xattr.c37
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_data.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func1.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func10.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func11.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func12.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func13.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func14.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func15.c56
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func16.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func17.c19
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func2.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func3.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func4.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func5.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func6.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func7.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func8.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func9.c134
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_args.c91
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c172
-rw-r--r--tools/testing/selftests/bpf/progs/test_global_map_resize.c62
-rw-r--r--tools/testing/selftests/bpf/progs/test_hash_large_key.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_helper_restricted.c123
-rw-r--r--tools/testing/selftests/bpf/progs/test_jhash.h31
-rw-r--r--tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c86
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf.c55
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c44
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_module.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_ksyms_weak.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline.c45
-rw-r--r--tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c487
-rw-r--r--tools/testing/selftests/bpf/progs/test_ldsx_insn.c125
-rw-r--r--tools/testing/selftests/bpf/progs/test_legacy_printk.c73
-rw-r--r--tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_buf.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_log_fixup.c74
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_and_delete.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_lookup_key.c46
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_redirect.c90
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_reroute.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_lwt_seg6local.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_init.c33
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lock.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c76
-rw-r--r--tools/testing/selftests/bpf/progs/test_map_ops.c138
-rw-r--r--tools/testing/selftests/bpf/progs/test_migrate_reuseport.c135
-rw-r--r--tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c329
-rw-r--r--tools/testing/selftests/bpf/progs/test_mmap.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_module_attach.c119
-rw-r--r--tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_obj_id.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_overhead.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c118
-rw-r--r--tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c114
-rw-r--r--tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c38
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_buffer.c26
-rw-r--r--tools/testing/selftests/bpf/progs/test_perf_link.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_pinning_invalid.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_access.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_pkt_md_access.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_read_user_str.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_probe_user.c43
-rw-r--r--tools/testing/selftests/bpf/progs/test_prog_array_init.c39
-rw-r--r--tools/testing/selftests/bpf/progs/test_ptr_untrusted.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_queue_stack_map.h4
-rw-r--r--tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_rdonly_maps.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c71
-rw-r--r--tools/testing/selftests/bpf/progs/test_ringbuf_multi.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_seg6_loop.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_send_signal_kern.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_sig_in_xattr.c83
-rw-r--r--tools/testing/selftests/bpf/progs/test_siphash.h64
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign.c22
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup.c660
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c41
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c29
-rw-r--r--tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c111
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c14
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_ctx.c13
-rw-r--r--tools/testing/selftests/bpf/progs/test_skb_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c40
-rw-r--r--tools/testing/selftests/bpf/progs/test_skeleton.c47
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_snprintf_single.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_sock_fields.c (renamed from tools/testing/selftests/bpf/progs/test_sock_fields_kern.c)230
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c23
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_kern.h81
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_listen.c50
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c32
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_sockmap_update.c48
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock.c72
-rw-r--r--tools/testing/selftests/bpf/progs/test_spin_lock_fail.c248
-rw-r--r--tools/testing/selftests/bpf/progs/test_stack_var_off.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_stacktrace_map.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked1.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_static_linked2.c31
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs.c124
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_extable.c51
-rw-r--r--tools/testing/selftests/bpf/progs/test_subprogs_unused.c21
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton.c28
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton_lib.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop1.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_loop2.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_sysctl_prog.c10
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_pt_regs.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_task_under_cgroup.c77
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_bpf.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_dtime.c397
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_edt.c12
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c69
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh.c136
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c158
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_peer.c63
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_tunnel.c210
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c595
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h140
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_estats.c3
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c626
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c159
-rw-r--r--tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_time_tai.c24
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c25
-rw-r--r--tools/testing/selftests/bpf/progs/test_tracepoint.c9
-rw-r--r--tools/testing/selftests/bpf/progs/test_trampoline_count.c16
-rw-r--r--tools/testing/selftests/bpf/progs/test_tunnel_kern.c731
-rw-r--r--tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c83
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe.c61
-rw-r--r--tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c117
-rw-r--r--tools/testing/selftests/bpf/progs/test_urandom_usdt.c70
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt.c96
-rw-r--r--tools/testing/selftests/bpf/progs/test_usdt_multispec.c30
-rw-r--r--tools/testing/selftests/bpf/progs/test_user_ringbuf.h35
-rw-r--r--tools/testing/selftests/bpf/progs/test_varlen.c163
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale1.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale2.c6
-rw-r--r--tools/testing/selftests/bpf/progs/test_verif_scale3.c4
-rw-r--r--tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c88
-rw-r--r--tools/testing/selftests/bpf/progs/test_vmlinux.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c36
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c54
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c8
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c20
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c116
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_dynptr.c256
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_link.c18
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_loop.c7
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_noinline.c118
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_redirect.c2
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_update_frags.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_vlan.c17
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c42
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c27
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c13
-rw-r--r--tools/testing/selftests/bpf/progs/timer.c424
-rw-r--r--tools/testing/selftests/bpf/progs/timer_crash.c54
-rw-r--r--tools/testing/selftests/bpf/progs/timer_failure.c68
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim.c88
-rw-r--r--tools/testing/selftests/bpf/progs/timer_mim_reject.c74
-rw-r--r--tools/testing/selftests/bpf/progs/token_lsm.c32
-rw-r--r--tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c21
-rw-r--r--tools/testing/selftests/bpf/progs/trace_printk.c22
-rw-r--r--tools/testing/selftests/bpf/progs/trace_vprintk.c34
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_failure.c20
-rw-r--r--tools/testing/selftests/bpf/progs/tracing_struct.c187
-rw-r--r--tools/testing/selftests/bpf/progs/trigger_bench.c49
-rw-r--r--tools/testing/selftests/bpf/progs/twfw.c58
-rw-r--r--tools/testing/selftests/bpf/progs/type_cast.c79
-rw-r--r--tools/testing/selftests/bpf/progs/udp_limit.c59
-rw-r--r--tools/testing/selftests/bpf/progs/uninit_stack.c87
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi.c101
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_bench.c15
-rw-r--r--tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c16
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_fail.c223
-rw-r--r--tools/testing/selftests/bpf/progs/user_ringbuf_success.c212
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_and.c107
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_arena.c146
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_array_access.c529
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_basic_stack.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bitfield_write.c100
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds.c1140
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c171
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c639
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c554
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c124
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_bswap.c63
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c32
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c20
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cfg.c162
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c89
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c227
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c308
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_const_or.c82
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx.c221
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c228
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_d_path.c48
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c803
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c56
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div0.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_div_overflow.c144
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c182
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_global_subprogs.c379
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_gotol.c67
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c825
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c550
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_restricted.c279
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_helper_value_access.c1282
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_int_ptr.c158
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c409
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c213
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ld_ind.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ldsx.c159
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_leak_ptr.c92
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_loops1.c286
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_lwt.c234
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_in_map.c142
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr.c159
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c265
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_map_ret_val.c110
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_masking.c410
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_meta_access.c284
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_movsx.c239
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c121
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c49
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_precision.c133
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c61
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_stack.c372
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c50
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ref_tracking.c1495
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_reg_equal.c58
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_regalloc.c364
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_ringbuf.c131
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_runtime_jit.c360
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_scalar_ids.c659
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sdiv.c785
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_search_pruning.c339
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_sock.c980
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spill_fill.c1247
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_spin_lock.c533
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_stack_ptr.c484
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subprog_precision.c709
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_subreg.c673
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_typedef.c23
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_uninit.c61
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv.c726
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c34
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value.c158
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c78
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c168
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_or_null.c288
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c1423
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_var_off.c418
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xadd.c124
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp.c24
-rw-r--r--tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c1722
-rw-r--r--tools/testing/selftests/bpf/progs/vrf_socket_lookup.c89
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_dummy.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_features.c268
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_hw_metadata.c117
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata.c98
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_metadata2.c24
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c94
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c865
-rw-r--r--tools/testing/selftests/bpf/progs/xdp_tx.c2
-rw-r--r--tools/testing/selftests/bpf/progs/xdping_kern.c9
-rw-r--r--tools/testing/selftests/bpf/progs/xdpwall.c364
-rw-r--r--tools/testing/selftests/bpf/progs/xfrm_info.c40
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c73
-rw-r--r--tools/testing/selftests/bpf/sdt-config.h6
-rw-r--r--tools/testing/selftests/bpf/sdt.h513
-rw-r--r--tools/testing/selftests/bpf/settings (renamed from tools/testing/selftests/powerpc/dscr/settings)0
-rw-r--r--tools/testing/selftests/bpf/task_local_storage_helpers.h22
-rwxr-xr-xtools/testing/selftests/bpf/tcp_client.py50
-rwxr-xr-xtools/testing/selftests/bpf/tcp_server.py80
-rw-r--r--tools/testing/selftests/bpf/test_bpftool.py22
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_build.sh6
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_metadata.sh85
-rwxr-xr-xtools/testing/selftests/bpf/test_bpftool_synctypes.py627
-rw-r--r--tools/testing/selftests/bpf/test_btf.h12
-rw-r--r--tools/testing/selftests/bpf/test_cgroup_storage.c39
-rw-r--r--tools/testing/selftests/bpf/test_cpp.cpp104
-rw-r--r--tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c159
-rw-r--r--tools/testing/selftests/bpf/test_dev_cgroup.c24
-rwxr-xr-xtools/testing/selftests/bpf/test_doc_build.sh20
-rw-r--r--tools/testing/selftests/bpf/test_flow_dissector.c2
-rwxr-xr-xtools/testing/selftests/bpf/test_flow_dissector.sh22
-rwxr-xr-xtools/testing/selftests/bpf/test_ftrace.sh7
-rwxr-xr-xtools/testing/selftests/bpf/test_kmod.sh30
-rwxr-xr-xtools/testing/selftests/bpf/test_lirc_mode2.sh5
-rw-r--r--tools/testing/selftests/bpf/test_lirc_mode2_user.c7
-rw-r--r--tools/testing/selftests/bpf/test_loader.c736
-rw-r--r--tools/testing/selftests/bpf/test_lpm_map.c91
-rw-r--r--tools/testing/selftests/bpf/test_lru_map.c118
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_ip_encap.sh40
-rwxr-xr-xtools/testing/selftests/bpf/test_lwt_seg6local.sh173
-rw-r--r--tools/testing/selftests/bpf/test_maps.c541
-rw-r--r--tools/testing/selftests/bpf/test_maps.h7
-rw-r--r--tools/testing/selftests/bpf/test_netcnt.c161
-rwxr-xr-xtools/testing/selftests/bpf/test_offload.py107
-rw-r--r--tools/testing/selftests/bpf/test_progs.c1622
-rw-r--r--tools/testing/selftests/bpf/test_progs.h353
-rwxr-xr-xtools/testing/selftests/bpf/test_skb_cgroup_id.sh2
-rw-r--r--tools/testing/selftests/bpf/test_skb_cgroup_id_user.c12
-rw-r--r--tools/testing/selftests/bpf/test_sock.c405
-rw-r--r--tools/testing/selftests/bpf/test_sock_addr.c345
-rw-r--r--tools/testing/selftests/bpf/test_sock_fields.c490
-rw-r--r--tools/testing/selftests/bpf/test_socket_cookie.c214
-rw-r--r--tools/testing/selftests/bpf/test_sockmap.c245
-rw-r--r--tools/testing/selftests/bpf/test_stub.c44
-rw-r--r--tools/testing/selftests/bpf/test_sysctl.c45
-rw-r--r--tools/testing/selftests/bpf/test_tag.c12
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_edt.sh3
-rwxr-xr-xtools/testing/selftests/bpf/test_tc_tunnel.sh37
-rwxr-xr-xtools/testing/selftests/bpf/test_tcp_check_syncookie.sh11
-rw-r--r--tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c84
-rw-r--r--tools/testing/selftests/bpf/test_tcp_hdr_options.h153
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf.h4
-rw-r--r--tools/testing/selftests/bpf/test_tcpbpf_user.c171
-rw-r--r--tools/testing/selftests/bpf/test_tcpnotify_user.c38
-rwxr-xr-xtools/testing/selftests/bpf/test_tunnel.sh264
-rw-r--r--tools/testing/selftests/bpf/test_verifier.c992
-rw-r--r--tools/testing/selftests/bpf/test_verifier_log.c174
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_features.sh107
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh44
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect.sh90
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_redirect_multi.sh214
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_veth.sh47
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_vlan.sh77
-rwxr-xr-xtools/testing/selftests/bpf/test_xdping.sh4
-rwxr-xr-xtools/testing/selftests/bpf/test_xsk.sh244
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.c413
-rw-r--r--tools/testing/selftests/bpf/testing_helpers.h54
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.c328
-rw-r--r--tools/testing/selftests/bpf/trace_helpers.h18
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.c57
-rw-r--r--tools/testing/selftests/bpf/unpriv_helpers.h7
-rw-r--r--tools/testing/selftests/bpf/uprobe_multi.c91
-rw-r--r--tools/testing/selftests/bpf/urandom_read.c78
-rw-r--r--tools/testing/selftests/bpf/urandom_read_aux.c9
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib1.c35
-rw-r--r--tools/testing/selftests/bpf/urandom_read_lib2.c8
-rw-r--r--tools/testing/selftests/bpf/verifier/and.c50
-rw-r--r--tools/testing/selftests/bpf/verifier/array_access.c378
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_and.c100
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_bounds.c27
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c235
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_fetch.c151
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_fetch_add.c106
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_invalid.c25
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_or.c102
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_xchg.c46
-rw-r--r--tools/testing/selftests/bpf/verifier/atomic_xor.c77
-rw-r--r--tools/testing/selftests/bpf/verifier/basic.c2
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_instr.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/basic_stack.c64
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds.c559
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_deduction.c124
-rw-r--r--tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c406
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_get_stack.c44
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_loop_inline.c270
-rw-r--r--tools/testing/selftests/bpf/verifier/bpf_st_mem.c99
-rw-r--r--tools/testing/selftests/bpf/verifier/calls.c408
-rw-r--r--tools/testing/selftests/bpf/verifier/cfg.c73
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_skb.c197
-rw-r--r--tools/testing/selftests/bpf/verifier/cgroup_storage.c220
-rw-r--r--tools/testing/selftests/bpf/verifier/const_or.c60
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx.c198
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c532
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_sk_msg.c181
-rw-r--r--tools/testing/selftests/bpf/verifier/ctx_skb.c118
-rw-r--r--tools/testing/selftests/bpf/verifier/dead_code.c24
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_packet_access.c656
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c40
-rw-r--r--tools/testing/selftests/bpf/verifier/direct_value_access.c3
-rw-r--r--tools/testing/selftests/bpf/verifier/div0.c184
-rw-r--r--tools/testing/selftests/bpf/verifier/div_overflow.c110
-rw-r--r--tools/testing/selftests/bpf/verifier/event_output.c1
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_access_var_len.c616
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_packet_access.c460
-rw-r--r--tools/testing/selftests/bpf/verifier/helper_value_access.c953
-rw-r--r--tools/testing/selftests/bpf/verifier/int_ptr.c160
-rw-r--r--tools/testing/selftests/bpf/verifier/jit.c117
-rw-r--r--tools/testing/selftests/bpf/verifier/jmp32.c51
-rw-r--r--tools/testing/selftests/bpf/verifier/jset.c10
-rw-r--r--tools/testing/selftests/bpf/verifier/jump.c22
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_imm64.c16
-rw-r--r--tools/testing/selftests/bpf/verifier/ld_ind.c72
-rw-r--r--tools/testing/selftests/bpf/verifier/leak_ptr.c67
-rw-r--r--tools/testing/selftests/bpf/verifier/loops1.c206
-rw-r--r--tools/testing/selftests/bpf/verifier/lwt.c189
-rw-r--r--tools/testing/selftests/bpf/verifier/map_in_map.c62
-rw-r--r--tools/testing/selftests/bpf/verifier/map_kptr.c444
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ptr_mixing.c100
-rw-r--r--tools/testing/selftests/bpf/verifier/map_ret_val.c65
-rw-r--r--tools/testing/selftests/bpf/verifier/masking.c322
-rw-r--r--tools/testing/selftests/bpf/verifier/meta_access.c235
-rw-r--r--tools/testing/selftests/bpf/verifier/perf_event_sample_period.c6
-rw-r--r--tools/testing/selftests/bpf/verifier/precise.c180
-rw-r--r--tools/testing/selftests/bpf/verifier/prevent_map_lookup.c29
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_stack.c305
-rw-r--r--tools/testing/selftests/bpf/verifier/raw_tp_writable.c34
-rw-r--r--tools/testing/selftests/bpf/verifier/ref_tracking.c856
-rw-r--r--tools/testing/selftests/bpf/verifier/runtime_jit.c231
-rw-r--r--tools/testing/selftests/bpf/verifier/search_pruning.c156
-rw-r--r--tools/testing/selftests/bpf/verifier/sleepable.c91
-rw-r--r--tools/testing/selftests/bpf/verifier/sock.c633
-rw-r--r--tools/testing/selftests/bpf/verifier/spill_fill.c76
-rw-r--r--tools/testing/selftests/bpf/verifier/spin_lock.c333
-rw-r--r--tools/testing/selftests/bpf/verifier/stack_ptr.c357
-rw-r--r--tools/testing/selftests/bpf/verifier/subreg.c533
-rw-r--r--tools/testing/selftests/bpf/verifier/uninit.c39
-rw-r--r--tools/testing/selftests/bpf/verifier/unpriv.c522
-rw-r--r--tools/testing/selftests/bpf/verifier/value.c104
-rw-r--r--tools/testing/selftests/bpf/verifier/value_adj_spill.c43
-rw-r--r--tools/testing/selftests/bpf/verifier/value_illegal_alu.c95
-rw-r--r--tools/testing/selftests/bpf/verifier/value_or_null.c171
-rw-r--r--tools/testing/selftests/bpf/verifier/value_ptr_arith.c838
-rw-r--r--tools/testing/selftests/bpf/verifier/var_off.c248
-rw-r--r--tools/testing/selftests/bpf/verifier/wide_access.c46
-rw-r--r--tools/testing/selftests/bpf/verifier/xadd.c97
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp.c14
-rw-r--r--tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c900
-rwxr-xr-xtools/testing/selftests/bpf/verify_sig_setup.sh129
-rw-r--r--tools/testing/selftests/bpf/veristat.c2163
-rw-r--r--tools/testing/selftests/bpf/veristat.cfg17
-rwxr-xr-xtools/testing/selftests/bpf/vmtest.sh436
-rw-r--r--tools/testing/selftests/bpf/xdp_features.c718
-rw-r--r--tools/testing/selftests/bpf/xdp_features.h20
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c747
-rw-r--r--tools/testing/selftests/bpf/xdp_metadata.h52
-rw-r--r--tools/testing/selftests/bpf/xdp_redirect_multi.c226
-rw-r--r--tools/testing/selftests/bpf/xdp_synproxy.c471
-rw-r--r--tools/testing/selftests/bpf/xdping.c24
-rw-r--r--tools/testing/selftests/bpf/xsk.c781
-rw-r--r--tools/testing/selftests/bpf/xsk.h249
-rwxr-xr-xtools/testing/selftests/bpf/xsk_prereqs.sh93
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h12
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c2595
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h176
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test.c4
-rw-r--r--tools/testing/selftests/breakpoints/breakpoint_test_arm64.c4
-rw-r--r--tools/testing/selftests/breakpoints/step_after_suspend_test.c55
-rw-r--r--tools/testing/selftests/cachestat/.gitignore (renamed from tools/testing/selftests/android/ion/.gitignore)4
-rw-r--r--tools/testing/selftests/cachestat/Makefile8
-rw-r--r--tools/testing/selftests/cachestat/test_cachestat.c318
-rw-r--r--tools/testing/selftests/capabilities/Makefile2
-rw-r--r--tools/testing/selftests/capabilities/test_execve.c14
-rw-r--r--tools/testing/selftests/capabilities/validate_cap.c8
-rw-r--r--tools/testing/selftests/cgroup/.gitignore7
-rw-r--r--tools/testing/selftests/cgroup/Makefile25
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.c135
-rw-r--r--tools/testing/selftests/cgroup/cgroup_util.h14
-rw-r--r--tools/testing/selftests/cgroup/config7
-rw-r--r--tools/testing/selftests/cgroup/memcg_protection.m89
-rw-r--r--tools/testing/selftests/cgroup/test_core.c165
-rw-r--r--tools/testing/selftests/cgroup/test_cpu.c726
-rw-r--r--tools/testing/selftests/cgroup/test_cpuset.c275
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh936
-rw-r--r--tools/testing/selftests/cgroup/test_freezer.c59
-rw-r--r--tools/testing/selftests/cgroup/test_hugetlb_memcg.c234
-rw-r--r--tools/testing/selftests/cgroup/test_kill.c297
-rw-r--r--tools/testing/selftests/cgroup/test_kmem.c453
-rw-r--r--tools/testing/selftests/cgroup/test_memcontrol.c507
-rwxr-xr-xtools/testing/selftests/cgroup/test_stress.sh2
-rw-r--r--tools/testing/selftests/cgroup/test_zswap.c476
-rw-r--r--tools/testing/selftests/cgroup/wait_inotify.c87
-rw-r--r--tools/testing/selftests/clone3/.gitignore1
-rw-r--r--tools/testing/selftests/clone3/Makefile6
-rw-r--r--tools/testing/selftests/clone3/clone3.c295
-rw-r--r--tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c180
-rw-r--r--tools/testing/selftests/clone3/clone3_clear_sighand.c9
-rw-r--r--tools/testing/selftests/clone3/clone3_selftests.h19
-rw-r--r--tools/testing/selftests/clone3/clone3_set_tid.c8
-rw-r--r--tools/testing/selftests/connector/.gitignore1
-rw-r--r--tools/testing/selftests/connector/Makefile6
-rw-r--r--tools/testing/selftests/connector/proc_filter.c310
-rw-r--r--tools/testing/selftests/core/.gitignore1
-rw-r--r--tools/testing/selftests/core/Makefile7
-rw-r--r--tools/testing/selftests/core/close_range_test.c539
-rw-r--r--tools/testing/selftests/cpu-hotplug/Makefile2
-rw-r--r--tools/testing/selftests/cpu-hotplug/config1
-rwxr-xr-xtools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh140
-rw-r--r--tools/testing/selftests/cpufreq/config8
-rwxr-xr-xtools/testing/selftests/cpufreq/main.sh2
-rw-r--r--tools/testing/selftests/damon/.gitignore5
-rw-r--r--tools/testing/selftests/damon/Makefile22
-rw-r--r--tools/testing/selftests/damon/_chk_dependency.sh52
-rw-r--r--tools/testing/selftests/damon/_damon_sysfs.py385
-rw-r--r--tools/testing/selftests/damon/_debugfs_common.sh59
-rw-r--r--tools/testing/selftests/damon/access_memory.c41
-rw-r--r--tools/testing/selftests/damon/config7
-rw-r--r--tools/testing/selftests/damon/damos_apply_interval.py67
-rw-r--r--tools/testing/selftests/damon/damos_quota.py67
-rwxr-xr-xtools/testing/selftests/damon/debugfs_attrs.sh17
-rwxr-xr-xtools/testing/selftests/damon/debugfs_duplicate_context_creation.sh27
-rwxr-xr-xtools/testing/selftests/damon/debugfs_empty_targets.sh21
-rwxr-xr-xtools/testing/selftests/damon/debugfs_huge_count_read_write.sh22
-rwxr-xr-xtools/testing/selftests/damon/debugfs_rm_non_contexts.sh19
-rwxr-xr-xtools/testing/selftests/damon/debugfs_schemes.sh19
-rwxr-xr-xtools/testing/selftests/damon/debugfs_target_ids.sh19
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c68
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh22
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c80
-rw-r--r--tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh14
-rw-r--r--tools/testing/selftests/damon/huge_count_read_write.c48
-rwxr-xr-xtools/testing/selftests/damon/lru_sort.sh41
-rwxr-xr-xtools/testing/selftests/damon/reclaim.sh42
-rwxr-xr-xtools/testing/selftests/damon/sysfs.sh377
-rwxr-xr-xtools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh58
-rw-r--r--tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py33
-rw-r--r--tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py55
-rw-r--r--tools/testing/selftests/devices/Makefile4
-rw-r--r--tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml40
-rw-r--r--tools/testing/selftests/devices/boards/google,spherion.yaml50
-rw-r--r--tools/testing/selftests/devices/ksft.py90
-rwxr-xr-xtools/testing/selftests/devices/test_discoverable_devices.py318
-rw-r--r--tools/testing/selftests/dma/Makefile7
-rw-r--r--tools/testing/selftests/dma/config1
-rw-r--r--tools/testing/selftests/dma/dma_map_benchmark.c129
-rw-r--r--tools/testing/selftests/dmabuf-heaps/.gitignore1
-rw-r--r--tools/testing/selftests/dmabuf-heaps/Makefile2
-rw-r--r--tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c152
-rw-r--r--tools/testing/selftests/drivers/.gitignore1
-rw-r--r--tools/testing/selftests/drivers/dma-buf/Makefile2
-rw-r--r--tools/testing/selftests/drivers/dma-buf/udmabuf.c8
-rwxr-xr-xtools/testing/selftests/drivers/gpu/drm_mm.sh4
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile24
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh46
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh80
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh84
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh80
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_options.sh333
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh158
-rw-r--r--tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh43
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config10
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh109
-rw-r--r--tools/testing/selftests/drivers/net/bonding/lag_lib.sh177
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh45
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh45
-rw-r--r--tools/testing/selftests/drivers/net/bonding/settings1
-rw-r--r--tools/testing/selftests/drivers/net/dsa/Makefile32
l---------tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_mld.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh1
l---------tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh1
-rw-r--r--tools/testing/selftests/drivers/net/dsa/forwarding.config2
l---------tools/testing/selftests/drivers/net/dsa/local_termination.sh1
l---------tools/testing/selftests/drivers/net/dsa/no_forwarding.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh9
l---------tools/testing/selftests/drivers/net/dsa/tc_actions.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh47
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh334
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh29
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh105
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh34
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh65
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh50
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh250
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh7
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh342
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh272
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh187
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/extack.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/fib.sh14
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh31
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh263
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh264
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh311
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh4
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh64
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh3
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/pci_reset.sh58
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh111
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh95
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/port_scale.sh62
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh304
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh68
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh26
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh27
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh22
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh379
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh28
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh282
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh417
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh183
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh107
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag.sh136
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh146
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh72
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh213
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh147
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh50
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/router_scale.sh2
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh485
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_ets.sh24
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_offload.sh290
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh281
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh100
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh25
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh6
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py2
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh43
l---------tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh1
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh108
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh15
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh339
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh6
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh16
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh67
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh34
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh34
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh16
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh16
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh334
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh23
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh108
-rw-r--r--tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh101
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh98
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/tc_sample.sh658
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan.sh283
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh39
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh12
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh17
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh65
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/Makefile18
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/config10
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink.sh221
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh27
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh132
-rw-r--r--tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh57
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh114
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh49
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh85
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib.sh65
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh430
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh421
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/nexthop.sh1058
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/peer.sh143
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/psample.sh183
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh77
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh963
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/basic_qos.sh253
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh327
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh352
-rw-r--r--tools/testing/selftests/drivers/net/team/Makefile11
-rw-r--r--tools/testing/selftests/drivers/net/team/config5
-rwxr-xr-xtools/testing/selftests/drivers/net/team/dev_addr_lists.sh51
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/Makefile20
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/config1
-rw-r--r--tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c276
-rwxr-xr-xtools/testing/selftests/drivers/sdsi/sdsi.sh25
-rw-r--r--tools/testing/selftests/drivers/sdsi/sdsi_test.py226
-rw-r--r--tools/testing/selftests/dt/.gitignore1
-rw-r--r--tools/testing/selftests/dt/Makefile21
-rw-r--r--tools/testing/selftests/dt/compatible_ignore_list1
-rwxr-xr-xtools/testing/selftests/dt/test_unprobed_devices.sh80
-rw-r--r--tools/testing/selftests/efivarfs/create-read.c2
-rwxr-xr-xtools/testing/selftests/efivarfs/efivarfs.sh5
-rw-r--r--tools/testing/selftests/exec/.gitignore4
-rw-r--r--tools/testing/selftests/exec/Makefile17
-rwxr-xr-xtools/testing/selftests/exec/binfmt_script.py (renamed from tools/testing/selftests/exec/binfmt_script)0
-rw-r--r--tools/testing/selftests/exec/execveat.c89
-rw-r--r--tools/testing/selftests/exec/load_address.c68
-rw-r--r--tools/testing/selftests/exec/non-regular.c196
-rw-r--r--tools/testing/selftests/exec/null-argv.c78
-rw-r--r--tools/testing/selftests/fchmodat2/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore)2
-rw-r--r--tools/testing/selftests/fchmodat2/Makefile6
-rw-r--r--tools/testing/selftests/fchmodat2/fchmodat2_test.c142
-rw-r--r--tools/testing/selftests/filelock/Makefile5
-rw-r--r--tools/testing/selftests/filelock/ofdlocks.c132
-rw-r--r--tools/testing/selftests/filesystems/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/binderfs/binderfs_test.c309
-rw-r--r--tools/testing/selftests/filesystems/binderfs/config1
-rw-r--r--tools/testing/selftests/filesystems/epoll/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c211
-rw-r--r--tools/testing/selftests/filesystems/eventfd/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/eventfd/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/eventfd/eventfd_test.c186
-rw-r--r--tools/testing/selftests/filesystems/fat/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/fat/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/fat/config2
-rw-r--r--tools/testing/selftests/filesystems/fat/rename_exchange.c37
-rwxr-xr-xtools/testing/selftests/filesystems/fat/run_fat_tests.sh82
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/.gitignore2
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/Makefile7
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c184
-rw-r--r--tools/testing/selftests/filesystems/overlayfs/log.h26
-rw-r--r--tools/testing/selftests/filesystems/statmount/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore)2
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile6
-rw-r--r--tools/testing/selftests/filesystems/statmount/statmount_test.c612
-rw-r--r--tools/testing/selftests/firmware/.gitignore2
-rw-r--r--tools/testing/selftests/firmware/Makefile2
-rw-r--r--tools/testing/selftests/firmware/config1
-rwxr-xr-xtools/testing/selftests/firmware/fw_filesystem.sh249
-rwxr-xr-xtools/testing/selftests/firmware/fw_lib.sh19
-rw-r--r--tools/testing/selftests/firmware/fw_namespace.c9
-rwxr-xr-xtools/testing/selftests/firmware/fw_run_tests.sh4
-rwxr-xr-xtools/testing/selftests/firmware/fw_upload.sh214
-rw-r--r--tools/testing/selftests/firmware/settings8
-rw-r--r--tools/testing/selftests/fpu/.gitignore2
-rw-r--r--tools/testing/selftests/fpu/Makefile9
-rwxr-xr-xtools/testing/selftests/fpu/run_test_fpu.sh46
-rw-r--r--tools/testing/selftests/fpu/test_fpu.c61
-rw-r--r--tools/testing/selftests/ftrace/Makefile3
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest83
-rwxr-xr-xtools/testing/selftests/ftrace/ftracetest-ktap8
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc95
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc114
-rw-r--r--tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc82
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc78
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc97
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc26
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc9
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc27
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc29
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc123
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc82
-rw-r--r--tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc67
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc44
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc17
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc42
-rw-r--r--tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/functions51
-rw-r--r--tools/testing/selftests/ftrace/test.d/instances/instance-event.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc60
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc27
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc16
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc14
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc13
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc34
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc18
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc21
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/profile.tc4
-rw-r--r--tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc11
-rw-r--r--tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc8
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc7
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc53
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc31
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc24
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc10
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc38
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc63
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc2
-rw-r--r--tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc18
-rw-r--r--tools/testing/selftests/futex/Makefile4
-rw-r--r--tools/testing/selftests/futex/functional/.gitignore3
-rw-r--r--tools/testing/selftests/futex/functional/Makefile17
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue.c136
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi.c13
-rw-r--r--tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c2
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait.c171
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_timeout.c152
-rw-r--r--tools/testing/selftests/futex/functional/futex_wait_wouldblock.c41
-rw-r--r--tools/testing/selftests/futex/functional/futex_waitv.c237
-rwxr-xr-xtools/testing/selftests/futex/functional/run.sh9
-rw-r--r--tools/testing/selftests/futex/include/futex2test.h22
-rw-r--r--tools/testing/selftests/gpio/.gitignore4
-rw-r--r--tools/testing/selftests/gpio/Makefile30
-rw-r--r--tools/testing/selftests/gpio/config2
-rw-r--r--tools/testing/selftests/gpio/gpio-chip-info.c57
-rw-r--r--tools/testing/selftests/gpio/gpio-line-name.c55
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-cdev.c198
-rw-r--r--tools/testing/selftests/gpio/gpio-mockup-chardev.c323
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup-sysfs.sh168
-rwxr-xr-xtools/testing/selftests/gpio/gpio-mockup.sh494
-rwxr-xr-xtools/testing/selftests/gpio/gpio-sim.sh399
-rw-r--r--tools/testing/selftests/hid/.gitignore5
-rw-r--r--tools/testing/selftests/hid/Makefile241
-rw-r--r--tools/testing/selftests/hid/config32
-rw-r--r--tools/testing/selftests/hid/config.common240
-rw-r--r--tools/testing/selftests/hid/config.x86_644
-rwxr-xr-xtools/testing/selftests/hid/hid-apple.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-core.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-gamepad.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-ite.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-keyboard.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-mouse.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-multitouch.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-sony.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-tablet.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-usb_crash.sh7
-rwxr-xr-xtools/testing/selftests/hid/hid-wacom.sh7
-rw-r--r--tools/testing/selftests/hid/hid_bpf.c869
-rw-r--r--tools/testing/selftests/hid/progs/hid.c206
-rw-r--r--tools/testing/selftests/hid/progs/hid_bpf_helpers.h98
-rwxr-xr-xtools/testing/selftests/hid/run-hid-tools-tests.sh28
-rw-r--r--tools/testing/selftests/hid/settings3
-rw-r--r--tools/testing/selftests/hid/tests/__init__.py2
-rw-r--r--tools/testing/selftests/hid/tests/base.py344
-rw-r--r--tools/testing/selftests/hid/tests/conftest.py81
-rw-r--r--tools/testing/selftests/hid/tests/descriptors_wacom.py1360
-rw-r--r--tools/testing/selftests/hid/tests/test_apple_keyboard.py440
-rw-r--r--tools/testing/selftests/hid/tests/test_gamepad.py209
-rw-r--r--tools/testing/selftests/hid/tests/test_hid_core.py154
-rw-r--r--tools/testing/selftests/hid/tests/test_ite_keyboard.py166
-rw-r--r--tools/testing/selftests/hid/tests/test_keyboard.py485
-rw-r--r--tools/testing/selftests/hid/tests/test_mouse.py977
-rw-r--r--tools/testing/selftests/hid/tests/test_multitouch.py2088
-rw-r--r--tools/testing/selftests/hid/tests/test_sony.py342
-rw-r--r--tools/testing/selftests/hid/tests/test_tablet.py1164
-rw-r--r--tools/testing/selftests/hid/tests/test_usb_crash.py103
-rw-r--r--tools/testing/selftests/hid/tests/test_wacom_generic.py1198
-rwxr-xr-xtools/testing/selftests/hid/vmtest.sh296
-rw-r--r--tools/testing/selftests/intel_pstate/Makefile6
-rw-r--r--tools/testing/selftests/intel_pstate/aperf.c22
-rw-r--r--tools/testing/selftests/iommu/.gitignore3
-rw-r--r--tools/testing/selftests/iommu/Makefile11
-rw-r--r--tools/testing/selftests/iommu/config3
-rw-r--r--tools/testing/selftests/iommu/iommufd.c2349
-rw-r--r--tools/testing/selftests/iommu/iommufd_fail_nth.c630
-rw-r--r--tools/testing/selftests/iommu/iommufd_utils.h686
-rw-r--r--tools/testing/selftests/ipc/Makefile2
-rw-r--r--tools/testing/selftests/ipc/msgque.c6
-rw-r--r--tools/testing/selftests/ir/ir_loopback.c11
-rwxr-xr-xtools/testing/selftests/ir/ir_loopback.sh2
-rw-r--r--tools/testing/selftests/kcmp/Makefile2
-rw-r--r--tools/testing/selftests/kcmp/kcmp_test.c6
-rw-r--r--tools/testing/selftests/kexec/Makefile6
-rwxr-xr-xtools/testing/selftests/kexec/kexec_common_lib.sh67
-rwxr-xr-xtools/testing/selftests/kexec/test_kexec_file_load.sh13
-rwxr-xr-xtools/testing/selftests/kmod/kmod.sh55
-rw-r--r--tools/testing/selftests/kselftest.h231
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh111
-rwxr-xr-xtools/testing/selftests/kselftest/prefix.pl2
-rw-r--r--tools/testing/selftests/kselftest/runner.sh117
-rwxr-xr-xtools/testing/selftests/kselftest_deps.sh91
-rw-r--r--tools/testing/selftests/kselftest_harness.h576
-rw-r--r--tools/testing/selftests/kselftest_module.h22
-rw-r--r--tools/testing/selftests/kvm/.gitignore32
-rw-r--r--tools/testing/selftests/kvm/Makefile307
-rw-r--r--tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c167
-rw-r--r--tools/testing/selftests/kvm/aarch64/arch_timer.c225
-rw-r--r--tools/testing/selftests/kvm/aarch64/debug-exceptions.c607
-rw-r--r--tools/testing/selftests/kvm/aarch64/get-reg-list.c757
-rw-r--r--tools/testing/selftests/kvm/aarch64/hypercalls.c308
-rw-r--r--tools/testing/selftests/kvm/aarch64/page_fault_test.c1136
-rw-r--r--tools/testing/selftests/kvm/aarch64/psci_test.c198
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c485
-rw-r--r--tools/testing/selftests/kvm/aarch64/smccc_filter.c268
-rw-r--r--tools/testing/selftests/kvm/aarch64/vcpu_width_config.c121
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_init.c716
-rw-r--r--tools/testing/selftests/kvm/aarch64/vgic_irq.c855
-rw-r--r--tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c653
-rw-r--r--tools/testing/selftests/kvm/access_tracking_perf_test.c394
-rw-r--r--tools/testing/selftests/kvm/arch_timer.c259
-rw-r--r--tools/testing/selftests/kvm/clear_dirty_log_test.c6
-rw-r--r--tools/testing/selftests/kvm/config2
-rw-r--r--tools/testing/selftests/kvm/demand_paging_test.c673
-rw-r--r--tools/testing/selftests/kvm/dirty_log_perf_test.c435
-rw-r--r--tools/testing/selftests/kvm/dirty_log_test.c783
-rw-r--r--tools/testing/selftests/kvm/get-reg-list.c402
-rw-r--r--tools/testing/selftests/kvm/guest_memfd_test.c201
-rw-r--r--tools/testing/selftests/kvm/guest_print_test.c219
-rw-r--r--tools/testing/selftests/kvm/hardware_disable_test.c184
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/arch_timer.h142
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/delay.h25
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic.h47
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/gic_v3.h82
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/processor.h220
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/spinlock.h13
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/aarch64/vgic.h36
-rw-r--r--tools/testing/selftests/kvm/include/guest_modes.h21
-rw-r--r--tools/testing/selftests/kvm/include/kvm_test_harness.h36
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util.h334
-rw-r--r--tools/testing/selftests/kvm/include/kvm_util_base.h1135
-rw-r--r--tools/testing/selftests/kvm/include/memstress.h83
-rw-r--r--tools/testing/selftests/kvm/include/numaif.h55
-rw-r--r--tools/testing/selftests/kvm/include/riscv/arch_timer.h71
-rw-r--r--tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/riscv/processor.h206
-rw-r--r--tools/testing/selftests/kvm/include/riscv/ucall.h20
-rw-r--r--tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h13
-rw-r--r--tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h7
-rw-r--r--tools/testing/selftests/kvm/include/s390x/processor.h8
-rw-r--r--tools/testing/selftests/kvm/include/s390x/ucall.h19
-rw-r--r--tools/testing/selftests/kvm/include/sparsebit.h56
-rw-r--r--tools/testing/selftests/kvm/include/test_util.h162
-rw-r--r--tools/testing/selftests/kvm/include/timer_test.h45
-rw-r--r--tools/testing/selftests/kvm/include/ucall_common.h116
-rw-r--r--tools/testing/selftests/kvm/include/userfaultfd_util.h45
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/apic.h93
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/evmcs.h (renamed from tools/testing/selftests/kvm/include/evmcs.h)247
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/hyperv.h346
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h23
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/mce.h25
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/pmu.h97
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/processor.h1150
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/sev.h107
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm.h35
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/svm_util.h48
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/ucall.h13
-rw-r--r--tools/testing/selftests/kvm/include/x86_64/vmx.h104
-rw-r--r--tools/testing/selftests/kvm/kvm_binary_stats_test.c275
-rw-r--r--tools/testing/selftests/kvm/kvm_create_max_vcpus.c34
-rw-r--r--tools/testing/selftests/kvm/kvm_page_table_test.c479
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic.c161
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_private.h32
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/gic_v3.c398
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/handlers.S126
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/processor.c477
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/spinlock.c27
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/ucall.c102
-rw-r--r--tools/testing/selftests/kvm/lib/aarch64/vgic.c170
-rw-r--r--tools/testing/selftests/kvm/lib/assert.c24
-rw-r--r--tools/testing/selftests/kvm/lib/elf.c17
-rw-r--r--tools/testing/selftests/kvm/lib/guest_modes.c129
-rw-r--r--tools/testing/selftests/kvm/lib/guest_sprintf.c314
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util.c2026
-rw-r--r--tools/testing/selftests/kvm/lib/kvm_util_internal.h111
-rw-r--r--tools/testing/selftests/kvm/lib/memstress.c398
-rw-r--r--tools/testing/selftests/kvm/lib/rbtree.c1
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/handlers.S101
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/processor.c504
-rw-r--r--tools/testing/selftests/kvm/lib/riscv/ucall.c31
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c80
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/processor.c88
-rw-r--r--tools/testing/selftests/kvm/lib/s390x/ucall.c42
-rw-r--r--tools/testing/selftests/kvm/lib/sparsebit.c52
-rw-r--r--tools/testing/selftests/kvm/lib/string_override.c48
-rw-r--r--tools/testing/selftests/kvm/lib/test_util.c330
-rw-r--r--tools/testing/selftests/kvm/lib/ucall_common.c160
-rw-r--r--tools/testing/selftests/kvm/lib/userfaultfd_util.c186
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/apic.c43
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/handlers.S81
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/hyperv.c46
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/memstress.c112
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/pmu.c31
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/processor.c1450
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/sev.c114
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/svm.c47
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/ucall.c72
-rw-r--r--tools/testing/selftests/kvm/lib/x86_64/vmx.c266
-rw-r--r--tools/testing/selftests/kvm/max_guest_memory_test.c294
-rw-r--r--tools/testing/selftests/kvm/memslot_modification_stress_test.c182
-rw-r--r--tools/testing/selftests/kvm/memslot_perf_test.c1129
-rw-r--r--tools/testing/selftests/kvm/riscv/arch_timer.c111
-rw-r--r--tools/testing/selftests/kvm/riscv/get-reg-list.c1034
-rw-r--r--tools/testing/selftests/kvm/rseq_test.c268
-rw-r--r--tools/testing/selftests/kvm/s390x/cmma_test.c695
-rw-r--r--tools/testing/selftests/kvm/s390x/debug_test.c160
-rw-r--r--tools/testing/selftests/kvm/s390x/memop.c1252
-rw-r--r--tools/testing/selftests/kvm/s390x/resets.c184
-rw-r--r--tools/testing/selftests/kvm/s390x/sync_regs_test.c169
-rw-r--r--tools/testing/selftests/kvm/s390x/tprot.c244
-rw-r--r--tools/testing/selftests/kvm/set_memory_region_test.c309
-rw-r--r--tools/testing/selftests/kvm/settings1
-rw-r--r--tools/testing/selftests/kvm/steal_time.c270
-rw-r--r--tools/testing/selftests/kvm/system_counter_offset_test.c127
-rw-r--r--tools/testing/selftests/kvm/x86_64/amx_test.c334
-rw-r--r--tools/testing/selftests/kvm/x86_64/cpuid_test.c208
-rw-r--r--tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c56
-rw-r--r--tools/testing/selftests/kvm/x86_64/debug_regs.c110
-rw-r--r--tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c262
-rw-r--r--tools/testing/selftests/kvm/x86_64/evmcs_test.c166
-rw-r--r--tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c42
-rw-r--r--tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c144
-rw-r--r--tools/testing/selftests/kvm/x86_64/flds_emulation.h52
-rw-r--r--tools/testing/selftests/kvm/x86_64/get_msr_index_features.c35
-rw-r--r--tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c47
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_clock.c263
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c134
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c310
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c98
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_features.c701
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_ipi.c313
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c200
-rw-r--r--tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c682
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_clock_test.c156
-rw-r--r--tools/testing/selftests/kvm/x86_64/kvm_pv_test.c154
-rw-r--r--tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c44
-rw-r--r--tools/testing/selftests/kvm/x86_64/mmio_warning_test.c126
-rw-r--r--tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c290
-rw-r--r--tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c269
-rwxr-xr-xtools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh58
-rw-r--r--tools/testing/selftests/kvm/x86_64/platform_info_test.c66
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_counters_test.c620
-rw-r--r--tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c910
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c484
-rw-r--r--tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c120
-rw-r--r--tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c74
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/set_sregs_test.c108
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c397
-rw-r--r--tools/testing/selftests/kvm/x86_64/sev_smoke_test.c88
-rw-r--r--tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c111
-rw-r--r--tools/testing/selftests/kvm/x86_64/smm_test.c118
-rw-r--r--tools/testing/selftests/kvm/x86_64/state_test.c154
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c121
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c62
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c213
-rw-r--r--tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c25
-rw-r--r--tools/testing/selftests/kvm/x86_64/sync_regs_test.c312
-rw-r--r--tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c124
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c161
-rw-r--r--tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c110
-rw-r--r--tools/testing/selftests/kvm/x86_64/ucna_injection_test.c302
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_io_test.c103
-rw-r--r--tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c780
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c124
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c31
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c51
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c145
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c103
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c131
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c206
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c228
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c53
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c138
-rw-r--r--tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c30
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c491
-rw-r--r--tools/testing/selftests/kvm/x86_64/xapic_state_test.c215
-rw-r--r--tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c137
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c1156
-rw-r--r--tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c142
-rw-r--r--tools/testing/selftests/kvm/x86_64/xss_msr_test.c58
-rw-r--r--tools/testing/selftests/landlock/.gitignore2
-rw-r--r--tools/testing/selftests/landlock/Makefile23
-rw-r--r--tools/testing/selftests/landlock/base_test.c329
-rw-r--r--tools/testing/selftests/landlock/common.h228
-rw-r--r--tools/testing/selftests/landlock/config14
-rw-r--r--tools/testing/selftests/landlock/config.um1
-rw-r--r--tools/testing/selftests/landlock/fs_test.c4877
-rw-r--r--tools/testing/selftests/landlock/net_test.c1804
-rw-r--r--tools/testing/selftests/landlock/ptrace_test.c439
-rw-r--r--tools/testing/selftests/landlock/true.c5
-rw-r--r--tools/testing/selftests/lib.mk149
-rw-r--r--tools/testing/selftests/lib/Makefile2
-rw-r--r--tools/testing/selftests/lib/config1
-rwxr-xr-xtools/testing/selftests/lib/scanf.sh4
-rw-r--r--tools/testing/selftests/livepatch/.gitignore1
-rw-r--r--tools/testing/selftests/livepatch/Makefile6
-rw-r--r--tools/testing/selftests/livepatch/README41
-rw-r--r--tools/testing/selftests/livepatch/config1
-rw-r--r--tools/testing/selftests/livepatch/functions.sh136
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh130
-rwxr-xr-xtools/testing/selftests/livepatch/test-ftrace.sh15
-rwxr-xr-xtools/testing/selftests/livepatch/test-livepatch.sh22
-rwxr-xr-xtools/testing/selftests/livepatch/test-shadow-vars.sh87
-rwxr-xr-xtools/testing/selftests/livepatch/test-state.sh39
-rwxr-xr-xtools/testing/selftests/livepatch/test-syscall.sh53
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh86
-rw-r--r--tools/testing/selftests/livepatch/test_klp-call_getpid.c44
-rw-r--r--tools/testing/selftests/livepatch/test_modules/Makefile26
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c57
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c70
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c121
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c93
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c24
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c51
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c301
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state.c162
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state2.c191
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_state3.c5
-rw-r--r--tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c116
-rw-r--r--tools/testing/selftests/lkdtm/.gitignore1
-rw-r--r--tools/testing/selftests/lkdtm/Makefile1
-rw-r--r--tools/testing/selftests/lkdtm/config13
-rwxr-xr-xtools/testing/selftests/lkdtm/run.sh28
-rwxr-xr-xtools/testing/selftests/lkdtm/stack-entropy.sh51
-rw-r--r--tools/testing/selftests/lkdtm/tests.txt33
-rw-r--r--tools/testing/selftests/lsm/.gitignore1
-rw-r--r--tools/testing/selftests/lsm/Makefile17
-rw-r--r--tools/testing/selftests/lsm/common.c89
-rw-r--r--tools/testing/selftests/lsm/common.h33
-rw-r--r--tools/testing/selftests/lsm/config3
-rw-r--r--tools/testing/selftests/lsm/lsm_get_self_attr_test.c275
-rw-r--r--tools/testing/selftests/lsm/lsm_list_modules_test.c143
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c74
-rw-r--r--tools/testing/selftests/media_tests/Makefile2
-rw-r--r--tools/testing/selftests/media_tests/video_device_test.c111
-rw-r--r--tools/testing/selftests/membarrier/Makefile2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_impl.h33
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_multi_thread.c2
-rw-r--r--tools/testing/selftests/membarrier/membarrier_test_single_thread.c6
-rw-r--r--tools/testing/selftests/memfd/Makefile4
-rw-r--r--tools/testing/selftests/memfd/fuse_test.c3
-rw-r--r--tools/testing/selftests/memfd/memfd_test.c551
-rw-r--r--tools/testing/selftests/memory-hotplug/config1
-rwxr-xr-xtools/testing/selftests/memory-hotplug/mem-on-off-test.sh35
-rw-r--r--tools/testing/selftests/mincore/.gitignore2
-rw-r--r--tools/testing/selftests/mincore/Makefile6
-rw-r--r--tools/testing/selftests/mincore/mincore_selftest.c368
-rw-r--r--tools/testing/selftests/mm/.gitignore49
-rw-r--r--tools/testing/selftests/mm/Makefile204
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/charge_reserved_hugetlb.sh (renamed from tools/testing/selftests/vm/charge_reserved_hugetlb.sh)45
-rwxr-xr-xtools/testing/selftests/mm/check_config.sh31
-rw-r--r--tools/testing/selftests/mm/compaction_test.c (renamed from tools/testing/selftests/vm/compaction_test.c)107
-rw-r--r--tools/testing/selftests/mm/config (renamed from tools/testing/selftests/vm/config)4
-rw-r--r--tools/testing/selftests/mm/cow.c1783
-rw-r--r--tools/testing/selftests/mm/gup_longterm.c460
-rw-r--r--tools/testing/selftests/mm/gup_test.c271
-rw-r--r--tools/testing/selftests/mm/hmm-tests.c (renamed from tools/testing/selftests/vm/hmm-tests.c)784
-rw-r--r--tools/testing/selftests/mm/hugepage-mmap.c (renamed from tools/testing/selftests/vm/hugepage-mmap.c)31
-rw-r--r--tools/testing/selftests/mm/hugepage-mremap.c168
-rw-r--r--tools/testing/selftests/mm/hugepage-shm.c (renamed from tools/testing/selftests/vm/hugepage-shm.c)4
-rw-r--r--tools/testing/selftests/mm/hugepage-vmemmap.c147
-rw-r--r--tools/testing/selftests/mm/hugetlb-madvise.c368
-rw-r--r--tools/testing/selftests/mm/hugetlb-read-hwpoison.c322
-rw-r--r--tools/testing/selftests/mm/hugetlb_fault_after_madv.c73
-rw-r--r--tools/testing/selftests/mm/hugetlb_madv_vs_map.c124
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/hugetlb_reparenting_test.sh (renamed from tools/testing/selftests/vm/hugetlb_reparenting_test.sh)35
-rw-r--r--tools/testing/selftests/mm/khugepaged.c1285
-rw-r--r--tools/testing/selftests/mm/ksm_functional_tests.c650
-rw-r--r--tools/testing/selftests/mm/ksm_tests.c948
-rw-r--r--tools/testing/selftests/mm/madv_populate.c311
-rw-r--r--tools/testing/selftests/mm/map_fixed_noreplace.c (renamed from tools/testing/selftests/vm/map_fixed_noreplace.c)123
-rw-r--r--tools/testing/selftests/mm/map_hugetlb.c (renamed from tools/testing/selftests/vm/map_hugetlb.c)61
-rw-r--r--tools/testing/selftests/mm/map_populate.c (renamed from tools/testing/selftests/vm/map_populate.c)43
-rw-r--r--tools/testing/selftests/mm/mdwe_test.c303
-rw-r--r--tools/testing/selftests/mm/memfd_secret.c299
-rw-r--r--tools/testing/selftests/mm/migration.c202
-rw-r--r--tools/testing/selftests/mm/mkdirty.c379
-rw-r--r--tools/testing/selftests/mm/mlock-random-test.c (renamed from tools/testing/selftests/vm/mlock-random-test.c)137
-rw-r--r--tools/testing/selftests/mm/mlock2-tests.c (renamed from tools/testing/selftests/vm/mlock2-tests.c)283
-rw-r--r--tools/testing/selftests/mm/mlock2.h (renamed from tools/testing/selftests/vm/mlock2.h)19
-rw-r--r--tools/testing/selftests/mm/mrelease_test.c184
-rw-r--r--tools/testing/selftests/mm/mremap_dontunmap.c (renamed from tools/testing/selftests/vm/mremap_dontunmap.c)89
-rw-r--r--tools/testing/selftests/mm/mremap_test.c762
-rw-r--r--tools/testing/selftests/mm/on-fault-limit.c42
-rw-r--r--tools/testing/selftests/mm/pagemap_ioctl.c1664
-rw-r--r--tools/testing/selftests/mm/pkey-helpers.h (renamed from tools/testing/selftests/vm/pkey-helpers.h)5
-rw-r--r--tools/testing/selftests/mm/pkey-powerpc.h (renamed from tools/testing/selftests/vm/pkey-powerpc.h)3
-rw-r--r--tools/testing/selftests/mm/pkey-x86.h (renamed from tools/testing/selftests/vm/pkey-x86.h)54
-rw-r--r--tools/testing/selftests/mm/protection_keys.c (renamed from tools/testing/selftests/vm/protection_keys.c)269
-rwxr-xr-xtools/testing/selftests/mm/run_vmtests.sh445
-rw-r--r--tools/testing/selftests/mm/settings1
-rw-r--r--tools/testing/selftests/mm/soft-dirty.c213
-rw-r--r--tools/testing/selftests/mm/split_huge_page_test.c446
-rwxr-xr-xtools/testing/selftests/mm/test_hmm.sh (renamed from tools/testing/selftests/vm/test_hmm.sh)24
-rwxr-xr-xtools/testing/selftests/mm/test_vmalloc.sh (renamed from tools/testing/selftests/vm/test_vmalloc.sh)21
-rw-r--r--tools/testing/selftests/mm/thp_settings.c349
-rw-r--r--tools/testing/selftests/mm/thp_settings.h80
-rw-r--r--tools/testing/selftests/mm/thuge-gen.c (renamed from tools/testing/selftests/vm/thuge-gen.c)169
-rw-r--r--tools/testing/selftests/mm/transhuge-stress.c138
-rw-r--r--tools/testing/selftests/mm/uffd-common.c715
-rw-r--r--tools/testing/selftests/mm/uffd-common.h134
-rw-r--r--tools/testing/selftests/mm/uffd-stress.c480
-rw-r--r--tools/testing/selftests/mm/uffd-unit-tests.c1556
-rw-r--r--tools/testing/selftests/mm/va_high_addr_switch.c (renamed from tools/testing/selftests/vm/va_128TBswitch.c)51
-rwxr-xr-xtools/testing/selftests/mm/va_high_addr_switch.sh64
-rw-r--r--tools/testing/selftests/mm/virtual_address_range.c (renamed from tools/testing/selftests/vm/virtual_address_range.c)64
-rw-r--r--tools/testing/selftests/mm/vm_util.c364
-rw-r--r--tools/testing/selftests/mm/vm_util.h63
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/mm/write_hugetlb_memory.sh (renamed from tools/testing/selftests/vm/write_hugetlb_memory.sh)4
-rw-r--r--tools/testing/selftests/mm/write_to_hugetlbfs.c (renamed from tools/testing/selftests/vm/write_to_hugetlbfs.c)0
-rw-r--r--tools/testing/selftests/mount/.gitignore1
-rw-r--r--tools/testing/selftests/mount/Makefile4
-rw-r--r--tools/testing/selftests/mount/nosymfollow-test.c218
-rwxr-xr-xtools/testing/selftests/mount/run_nosymfollow.sh4
-rwxr-xr-xtools/testing/selftests/mount/run_unprivileged_remount.sh (renamed from tools/testing/selftests/mount/run_tests.sh)0
-rw-r--r--tools/testing/selftests/mount/unprivileged-remount-test.c4
-rw-r--r--tools/testing/selftests/mount_setattr/.gitignore1
-rw-r--r--tools/testing/selftests/mount_setattr/Makefile7
-rw-r--r--tools/testing/selftests/mount_setattr/config1
-rw-r--r--tools/testing/selftests/mount_setattr/mount_setattr_test.c1500
-rw-r--r--tools/testing/selftests/move_mount_set_group/.gitignore1
-rw-r--r--tools/testing/selftests/move_mount_set_group/Makefile7
-rw-r--r--tools/testing/selftests/move_mount_set_group/config1
-rw-r--r--tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c375
-rw-r--r--tools/testing/selftests/mqueue/mq_perf_tests.c29
-rw-r--r--tools/testing/selftests/mqueue/setting1
-rw-r--r--tools/testing/selftests/nci/.gitignore1
-rw-r--r--tools/testing/selftests/nci/Makefile6
-rw-r--r--tools/testing/selftests/nci/config3
-rw-r--r--tools/testing/selftests/nci/nci_dev.c904
-rw-r--r--tools/testing/selftests/net/.gitignore59
-rw-r--r--tools/testing/selftests/net/Makefile133
-rw-r--r--tools/testing/selftests/net/af_unix/Makefile4
-rw-r--r--tools/testing/selftests/net/af_unix/diag_uid.c177
-rw-r--r--tools/testing/selftests/net/af_unix/scm_pidfd.c429
-rw-r--r--tools/testing/selftests/net/af_unix/test_unix_oob.c436
-rw-r--r--tools/testing/selftests/net/af_unix/unix_connect.c148
-rwxr-xr-xtools/testing/selftests/net/altnames.sh2
-rwxr-xr-xtools/testing/selftests/net/amt.sh284
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh213
-rwxr-xr-xtools/testing/selftests/net/arp_ndisc_untracked_subnets.sh302
-rwxr-xr-xtools/testing/selftests/net/bareudp.sh546
-rwxr-xr-xtools/testing/selftests/net/big_tcp.sh182
-rw-r--r--tools/testing/selftests/net/bind_bhash.c144
-rwxr-xr-xtools/testing/selftests/net/bind_bhash.sh68
-rw-r--r--tools/testing/selftests/net/bind_timewait.c92
-rw-r--r--tools/testing/selftests/net/bind_wildcard.c160
-rwxr-xr-xtools/testing/selftests/net/cmsg_ipv6.sh154
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c522
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_mark.sh78
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh84
-rw-r--r--tools/testing/selftests/net/config73
-rw-r--r--tools/testing/selftests/net/csum.c988
-rwxr-xr-xtools/testing/selftests/net/devlink_port_split.py309
-rwxr-xr-xtools/testing/selftests/net/drop_monitor_tests.sh216
-rwxr-xr-xtools/testing/selftests/net/fcnal-test.sh949
-rwxr-xr-xtools/testing/selftests/net/fdb_flush.sh813
-rwxr-xr-xtools/testing/selftests/net/fib-onlink-tests.sh9
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_multiprefix.sh98
-rwxr-xr-xtools/testing/selftests/net/fib_nexthop_nongw.sh115
-rwxr-xr-xtools/testing/selftests/net/fib_nexthops.sh1005
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh260
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh1043
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile64
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh283
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh494
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_locked_port.sh365
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb.sh1371
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_host.sh103
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_max.sh1347
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh118
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh564
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh5
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh546
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh5
-rw-r--r--tools/testing/selftests/net/forwarding/config40
-rwxr-xr-xtools/testing/selftests/net/forwarding/custom_multipath_hash.sh372
-rw-r--r--tools/testing/selftests/net/forwarding/devlink_lib.sh150
-rwxr-xr-xtools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh367
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool.sh21
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_extended_state.sh117
-rw-r--r--tools/testing/selftests/net/forwarding/ethtool_lib.sh51
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_mm.sh340
-rwxr-xr-xtools/testing/selftests/net/forwarding/ethtool_rmon.sh143
-rw-r--r--tools/testing/selftests/net/forwarding/fib_offload_lib.sh14
-rw-r--r--tools/testing/selftests/net/forwarding/forwarding.config.sample15
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh464
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh.sh319
-rwxr-xr-xtools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh323
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3.sh340
-rwxr-xr-xtools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh111
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh174
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh466
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_key.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_key.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh65
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh6
-rw-r--r--tools/testing/selftests/net/forwarding/ip6gre_lib.sh438
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh953
-rwxr-xr-xtools/testing/selftests/net/forwarding/local_termination.sh299
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bound.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh14
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh7
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_changes.sh5
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_lib.sh2
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh2
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh15
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_lib.sh22
-rw-r--r--tools/testing/selftests/net/forwarding/mirror_topo_lib.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_vlan.sh4
-rwxr-xr-xtools/testing/selftests/net/forwarding/no_forwarding.sh261
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_dsfield.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_ip.sh201
-rwxr-xr-xtools/testing/selftests/net/forwarding/pedit_l4port.sh200
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni.sh348
-rwxr-xr-xtools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh347
-rwxr-xr-xtools/testing/selftests/net/forwarding/router.sh18
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge.sh79
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d.sh185
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh408
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_lag.sh323
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh155
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan.sh102
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh169
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh171
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh.sh124
-rw-r--r--tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh129
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_mpath_nh_res.sh413
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multicast.sh92
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_multipath.sh45
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_nh.sh160
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_vid_1.sh27
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_red.sh493
-rw-r--r--tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh32
-rwxr-xr-xtools/testing/selftests/net/forwarding/sch_tbf_root.sh4
-rw-r--r--tools/testing/selftests/net/forwarding/settings1
-rwxr-xr-xtools/testing/selftests/net/forwarding/skbedit_priority.sh6
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_actions.sh99
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_chains.sh4
-rw-r--r--tools/testing/selftests/net/forwarding/tc_common.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower.sh352
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_cfm.sh206
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh357
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_flower_port_range.sh228
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh192
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_police.sh441
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_tunnel_key.sh164
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh249
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric.sh12
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh504
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh17
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh804
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh30
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh837
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric.sh10
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh563
-rwxr-xr-xtools/testing/selftests/net/fq_band_pktlimit.sh59
-rwxr-xr-xtools/testing/selftests/net/gre_gso.sh235
-rw-r--r--tools/testing/selftests/net/gro.c1187
-rwxr-xr-xtools/testing/selftests/net/gro.sh104
-rw-r--r--tools/testing/selftests/net/hsr/Makefile7
-rw-r--r--tools/testing/selftests/net/hsr/config4
-rwxr-xr-xtools/testing/selftests/net/hsr/hsr_ping.sh276
-rw-r--r--tools/testing/selftests/net/hwtstamp_config.c6
-rwxr-xr-xtools/testing/selftests/net/icmp.sh72
-rwxr-xr-xtools/testing/selftests/net/icmp_redirect.sh195
-rw-r--r--tools/testing/selftests/net/io_uring_zerocopy_tx.c320
-rwxr-xr-xtools/testing/selftests/net/io_uring_zerocopy_tx.sh126
-rwxr-xr-xtools/testing/selftests/net/ioam6.sh771
-rw-r--r--tools/testing/selftests/net/ioam6_parser.c674
-rwxr-xr-xtools/testing/selftests/net/ip_defrag.sh2
-rw-r--r--tools/testing/selftests/net/ip_local_port_range.c463
-rwxr-xr-xtools/testing/selftests/net/ip_local_port_range.sh5
-rw-r--r--tools/testing/selftests/net/ipsec.c2341
-rw-r--r--tools/testing/selftests/net/ipv6_flowlabel.c75
-rwxr-xr-xtools/testing/selftests/net/ipv6_flowlabel.sh16
-rwxr-xr-xtools/testing/selftests/net/l2_tos_ttl_inherit.sh446
-rwxr-xr-xtools/testing/selftests/net/l2tp.sh130
-rw-r--r--tools/testing/selftests/net/lib.sh96
-rw-r--r--tools/testing/selftests/net/mptcp/.gitignore2
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile10
-rw-r--r--tools/testing/selftests/net/mptcp/config30
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh308
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c860
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh709
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c599
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh3803
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh507
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c866
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh338
-rwxr-xr-xtools/testing/selftests/net/mptcp/pm_netlink.sh157
-rw-r--r--tools/testing/selftests/net/mptcp/pm_nl_ctl.c989
-rw-r--r--tools/testing/selftests/net/mptcp/settings2
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh294
-rwxr-xr-xtools/testing/selftests/net/mptcp/userspace_pm.sh897
-rw-r--r--tools/testing/selftests/net/msg_zerocopy.c5
-rwxr-xr-xtools/testing/selftests/net/msg_zerocopy.sh9
-rw-r--r--tools/testing/selftests/net/nat6to4.c285
-rwxr-xr-xtools/testing/selftests/net/ndisc_unsolicited_na_test.sh249
-rw-r--r--tools/testing/selftests/net/net_helper.sh25
-rwxr-xr-xtools/testing/selftests/net/netns-name.sh87
-rw-r--r--tools/testing/selftests/net/nettest.c811
-rw-r--r--tools/testing/selftests/net/openvswitch/Makefile13
-rwxr-xr-xtools/testing/selftests/net/openvswitch/openvswitch.sh722
-rw-r--r--tools/testing/selftests/net/openvswitch/ovs-dpctl.py2236
-rwxr-xr-xtools/testing/selftests/net/pmtu.sh931
-rw-r--r--tools/testing/selftests/net/psock_fanout.c84
-rw-r--r--tools/testing/selftests/net/psock_lib.h4
-rw-r--r--tools/testing/selftests/net/psock_snd.c2
-rwxr-xr-xtools/testing/selftests/net/psock_snd.sh17
-rw-r--r--tools/testing/selftests/net/reuseaddr_ports_exhausted.c32
-rw-r--r--tools/testing/selftests/net/reuseport_bpf.c6
-rw-r--r--tools/testing/selftests/net/reuseport_bpf_numa.c6
-rwxr-xr-xtools/testing/selftests/net/route_localnet.sh6
-rwxr-xr-xtools/testing/selftests/net/rps_default_mask.sh79
-rwxr-xr-xtools/testing/selftests/net/rtnetlink.sh1246
-rwxr-xr-xtools/testing/selftests/net/run_afpackettests5
-rw-r--r--tools/testing/selftests/net/rxtimestamp.c127
-rwxr-xr-xtools/testing/selftests/net/rxtimestamp.sh4
-rw-r--r--tools/testing/selftests/net/sctp_hello.c137
-rwxr-xr-xtools/testing/selftests/net/sctp_vrf.sh172
-rw-r--r--tools/testing/selftests/net/settings1
-rw-r--r--tools/testing/selftests/net/setup_loopback.sh120
-rw-r--r--tools/testing/selftests/net/setup_veth.sh44
-rw-r--r--tools/testing/selftests/net/sk_bind_sendto_listen.c80
-rw-r--r--tools/testing/selftests/net/sk_connect_zero_addr.c62
-rw-r--r--tools/testing/selftests/net/so_incoming_cpu.c274
-rw-r--r--tools/testing/selftests/net/so_netns_cookie.c61
-rw-r--r--tools/testing/selftests/net/so_txtime.c256
-rwxr-xr-xtools/testing/selftests/net/so_txtime.sh120
-rw-r--r--tools/testing/selftests/net/socket.c3
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh573
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh496
-rwxr-xr-xtools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh501
-rwxr-xr-xtools/testing/selftests/net/srv6_end_flavors_test.sh869
-rwxr-xr-xtools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh1145
-rwxr-xr-xtools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh1213
-rwxr-xr-xtools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh879
-rwxr-xr-xtools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh821
-rw-r--r--tools/testing/selftests/net/stress_reuseport_listen.c105
-rwxr-xr-xtools/testing/selftests/net/stress_reuseport_listen.sh25
-rw-r--r--tools/testing/selftests/net/tap.c434
-rw-r--r--tools/testing/selftests/net/tcp_ao/.gitignore2
-rw-r--r--tools/testing/selftests/net/tcp_ao/Makefile56
-rw-r--r--tools/testing/selftests/net/tcp_ao/bench-lookups.c360
-rw-r--r--tools/testing/selftests/net/tcp_ao/config10
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect-deny.c264
-rw-r--r--tools/testing/selftests/net/tcp_ao/connect.c90
l---------tools/testing/selftests/net/tcp_ao/icmps-accept.c1
-rw-r--r--tools/testing/selftests/net/tcp_ao/icmps-discard.c449
-rw-r--r--tools/testing/selftests/net/tcp_ao/key-management.c1186
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/aolib.h605
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/kconfig.c148
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/netlink.c413
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/proc.c273
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/repair.c254
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/setup.c361
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/sock.c596
-rw-r--r--tools/testing/selftests/net/tcp_ao/lib/utils.c30
-rw-r--r--tools/testing/selftests/net/tcp_ao/restore.c236
-rw-r--r--tools/testing/selftests/net/tcp_ao/rst.c457
-rw-r--r--tools/testing/selftests/net/tcp_ao/self-connect.c197
-rw-r--r--tools/testing/selftests/net/tcp_ao/seq-ext.c245
-rw-r--r--tools/testing/selftests/net/tcp_ao/setsockopt-closed.c835
-rw-r--r--tools/testing/selftests/net/tcp_ao/settings1
-rw-r--r--tools/testing/selftests/net/tcp_ao/unsigned-md5.c741
-rw-r--r--tools/testing/selftests/net/tcp_fastopen_backup_key.c6
-rw-r--r--tools/testing/selftests/net/tcp_mmap.c169
-rwxr-xr-xtools/testing/selftests/net/test_bridge_backup_port.sh775
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh855
-rw-r--r--tools/testing/selftests/net/test_ingress_egress_chaining.sh79
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_mdb.sh2511
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_nolocalbypass.sh238
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_under_vrf.sh72
-rwxr-xr-xtools/testing/selftests/net/test_vxlan_vnifiltering.sh607
-rw-r--r--tools/testing/selftests/net/timestamping.c102
-rw-r--r--tools/testing/selftests/net/tls.c1153
-rw-r--r--tools/testing/selftests/net/toeplitz.c589
-rwxr-xr-xtools/testing/selftests/net/toeplitz.sh199
-rwxr-xr-xtools/testing/selftests/net/toeplitz_client.sh28
-rwxr-xr-xtools/testing/selftests/net/traceroute.sh82
-rw-r--r--tools/testing/selftests/net/tun.c162
-rw-r--r--tools/testing/selftests/net/txtimestamp.c25
-rwxr-xr-xtools/testing/selftests/net/txtimestamp.sh14
-rwxr-xr-xtools/testing/selftests/net/udpgro.sh55
-rwxr-xr-xtools/testing/selftests/net/udpgro_bench.sh13
-rwxr-xr-xtools/testing/selftests/net/udpgro_frglist.sh104
-rwxr-xr-xtools/testing/selftests/net/udpgro_fwd.sh271
-rw-r--r--tools/testing/selftests/net/udpgso.c146
-rwxr-xr-xtools/testing/selftests/net/udpgso.sh49
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh26
-rw-r--r--tools/testing/selftests/net/udpgso_bench_rx.c26
-rw-r--r--tools/testing/selftests/net/udpgso_bench_tx.c44
-rwxr-xr-xtools/testing/selftests/net/unicast_extensions.sh225
-rwxr-xr-xtools/testing/selftests/net/veth.sh392
-rwxr-xr-xtools/testing/selftests/net/vlan_hw_filter.sh29
-rwxr-xr-xtools/testing/selftests/net/vrf-xfrm-tests.sh109
-rwxr-xr-xtools/testing/selftests/net/vrf_route_leaking.sh617
-rwxr-xr-xtools/testing/selftests/net/vrf_strict_mode_test.sh426
-rw-r--r--tools/testing/selftests/net/xdp_dummy.c13
-rwxr-xr-xtools/testing/selftests/net/xfrm_policy.sh181
-rw-r--r--tools/testing/selftests/netfilter/.gitignore6
-rw-r--r--tools/testing/selftests/netfilter/Makefile17
-rw-r--r--tools/testing/selftests/netfilter/audit_logread.c165
-rw-r--r--tools/testing/selftests/netfilter/bridge_netfilter.sh188
-rw-r--r--tools/testing/selftests/netfilter/config1
-rw-r--r--tools/testing/selftests/netfilter/connect_close.c136
-rw-r--r--tools/testing/selftests/netfilter/conntrack_dump_flush.c471
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_icmp_related.sh36
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_sctp_collision.sh89
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh167
-rwxr-xr-xtools/testing/selftests/netfilter/conntrack_vrf.sh241
-rwxr-xr-xtools/testing/selftests/netfilter/ipip-conntrack-mtu.sh207
-rw-r--r--tools/testing/selftests/netfilter/nf-queue.c61
-rwxr-xr-xtools/testing/selftests/netfilter/nf_nat_edemux.sh127
-rwxr-xr-xtools/testing/selftests/netfilter/nft_audit.sh245
-rwxr-xr-xtools/testing/selftests/netfilter/nft_concat_range.sh161
-rwxr-xr-xtools/testing/selftests/netfilter/nft_conntrack_helper.sh48
-rwxr-xr-xtools/testing/selftests/netfilter/nft_fib.sh273
-rwxr-xr-xtools/testing/selftests/netfilter/nft_flowtable.sh581
-rwxr-xr-xtools/testing/selftests/netfilter/nft_meta.sh142
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat.sh355
-rwxr-xr-xtools/testing/selftests/netfilter/nft_nat_zones.sh309
-rwxr-xr-xtools/testing/selftests/netfilter/nft_queue.sh143
-rwxr-xr-xtools/testing/selftests/netfilter/nft_synproxy.sh117
-rwxr-xr-xtools/testing/selftests/netfilter/nft_trans_stress.sh91
-rwxr-xr-xtools/testing/selftests/netfilter/nft_zones_many.sh163
-rwxr-xr-xtools/testing/selftests/netfilter/rpath.sh169
-rw-r--r--tools/testing/selftests/netfilter/sctp_collision.c99
-rw-r--r--tools/testing/selftests/netfilter/settings1
-rwxr-xr-xtools/testing/selftests/netfilter/xt_string.sh128
-rw-r--r--tools/testing/selftests/nolibc/.gitignore7
-rw-r--r--tools/testing/selftests/nolibc/Makefile281
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.c26
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test-linkage.h9
-rw-r--r--tools/testing/selftests/nolibc/nolibc-test.c1438
-rwxr-xr-xtools/testing/selftests/nolibc/run-tests.sh169
-rw-r--r--tools/testing/selftests/openat2/Makefile4
-rw-r--r--tools/testing/selftests/openat2/helpers.h12
-rw-r--r--tools/testing/selftests/openat2/openat2_test.c31
-rw-r--r--tools/testing/selftests/perf_events/.gitignore3
-rw-r--r--tools/testing/selftests/perf_events/Makefile6
-rw-r--r--tools/testing/selftests/perf_events/config1
-rw-r--r--tools/testing/selftests/perf_events/remove_on_exec.c260
-rw-r--r--tools/testing/selftests/perf_events/settings1
-rw-r--r--tools/testing/selftests/perf_events/sigtrap_threads.c240
-rw-r--r--tools/testing/selftests/pid_namespace/Makefile8
-rw-r--r--tools/testing/selftests/pid_namespace/regression_enomem.c1
-rw-r--r--tools/testing/selftests/pidfd/Makefile2
-rw-r--r--tools/testing/selftests/pidfd/config1
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h20
-rw-r--r--tools/testing/selftests/pidfd/pidfd_fdinfo_test.c25
-rw-r--r--tools/testing/selftests/pidfd/pidfd_getfd_test.c37
-rw-r--r--tools/testing/selftests/pidfd/pidfd_open_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_poll_test.c1
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c80
-rw-r--r--tools/testing/selftests/pidfd/pidfd_test.c82
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c320
-rw-r--r--tools/testing/selftests/power_supply/Makefile4
-rw-r--r--tools/testing/selftests/power_supply/helpers.sh178
-rwxr-xr-xtools/testing/selftests/power_supply/test_power_supply_properties.sh114
-rw-r--r--tools/testing/selftests/powerpc/Makefile22
-rw-r--r--tools/testing/selftests/powerpc/alignment/alignment_handler.c166
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/context_switch.c27
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/gettimeofday.c6
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/null_syscall.c3
-rw-r--r--tools/testing/selftests/powerpc/copyloops/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/copyloops/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h14
l---------tools/testing/selftests/powerpc/copyloops/copy_mc_64.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/linux/export.h (renamed from tools/testing/selftests/powerpc/copyloops/asm/export.h)0
l---------tools/testing/selftests/powerpc/copyloops/mem_64.S1
l---------tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S1
-rw-r--r--tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S8
-rw-r--r--tools/testing/selftests/powerpc/copyloops/memmove_validate.c58
-rw-r--r--tools/testing/selftests/powerpc/dexcr/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/dexcr/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.c132
-rw-r--r--tools/testing/selftests/powerpc/dexcr/dexcr.h49
-rw-r--r--tools/testing/selftests/powerpc/dexcr/hashchk_test.c227
-rw-r--r--tools/testing/selftests/powerpc/dexcr/lsdexcr.c141
-rw-r--r--tools/testing/selftests/powerpc/dscr/Makefile5
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr.h55
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_default_test.c205
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c171
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c6
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c41
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c2
-rw-r--r--tools/testing/selftests/powerpc/dscr/dscr_user_test.c6
-rw-r--r--tools/testing/selftests/powerpc/eeh/Makefile2
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-basic.sh40
-rw-r--r--[-rwxr-xr-x]tools/testing/selftests/powerpc/eeh/eeh-functions.sh179
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh45
-rwxr-xr-xtools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh35
-rw-r--r--tools/testing/selftests/powerpc/eeh/settings1
-rw-r--r--tools/testing/selftests/powerpc/harness.c4
-rw-r--r--tools/testing/selftests/powerpc/include/basic_asm.h63
-rw-r--r--tools/testing/selftests/powerpc/include/instructions.h77
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h136
-rw-r--r--tools/testing/selftests/powerpc/include/reg.h87
-rw-r--r--tools/testing/selftests/powerpc/include/subunit.h16
-rw-r--r--tools/testing/selftests/powerpc/include/utils.h107
-rw-r--r--tools/testing/selftests/powerpc/lib/reg.S107
-rw-r--r--tools/testing/selftests/powerpc/math/.gitignore2
-rw-r--r--tools/testing/selftests/powerpc/math/Makefile12
-rw-r--r--tools/testing/selftests/powerpc/math/fpu.h25
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_asm.S48
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_denormal.c38
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_preempt.c30
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_signal.c16
-rw-r--r--tools/testing/selftests/powerpc/math/fpu_syscall.c8
-rw-r--r--tools/testing/selftests/powerpc/math/mma.S36
-rw-r--r--tools/testing/selftests/powerpc/math/mma.c48
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_preempt.c13
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_signal.c4
-rw-r--r--tools/testing/selftests/powerpc/math/vmx_syscall.c7
-rw-r--r--tools/testing/selftests/powerpc/math/vsx_preempt.c2
-rw-r--r--tools/testing/selftests/powerpc/mce/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/mce/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/mce/inject-ra-err.c65
l---------tools/testing/selftests/powerpc/mce/vas-api.h1
-rw-r--r--tools/testing/selftests/powerpc/mm/.gitignore16
-rw-r--r--tools/testing/selftests/powerpc/mm/Makefile20
-rw-r--r--tools/testing/selftests/powerpc/mm/bad_accesses.c31
-rw-r--r--tools/testing/selftests/powerpc/mm/exec_prot.c231
-rw-r--r--tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c158
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_exec_prot.c294
-rw-r--r--tools/testing/selftests/powerpc/mm/pkey_siginfo.c333
-rw-r--r--tools/testing/selftests/powerpc/mm/prot_sao.c10
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c202
-rw-r--r--tools/testing/selftests/powerpc/mm/stack_expansion_signal.c118
-rwxr-xr-xtools/testing/selftests/powerpc/mm/stress_code_patching.sh49
-rw-r--r--tools/testing/selftests/powerpc/mm/tlbie_test.c1
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/Makefile4
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gunz_test.c4
-rw-r--r--tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c75
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/.gitignore (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore)2
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/Makefile7
-rw-r--r--tools/testing/selftests/powerpc/papr_attributes/attr_test.c113
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/Makefile12
-rw-r--r--tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c196
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/Makefile12
-rw-r--r--tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c352
-rw-r--r--tools/testing/selftests/powerpc/pmu/Makefile40
-rw-r--r--tools/testing/selftests/powerpc/pmu/branch_loops.S28
-rw-r--r--tools/testing/selftests/powerpc/pmu/count_stcx_fail.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb.h2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S43
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c1
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c7
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c8
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c2
-rw-r--r--tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.c19
-rw-r--r--tools/testing/selftests/powerpc/pmu/event.h6
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore20
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c132
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c109
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c116
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c130
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c60
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c54
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c70
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c56
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c56
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c96
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c63
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c88
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c67
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c77
-rw-r--r--tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c44
-rw-r--r--tools/testing/selftests/powerpc/pmu/l3_bank_test.c3
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.c53
-rw-r--r--tools/testing/selftests/powerpc/pmu/lib.h2
-rw-r--r--tools/testing/selftests/powerpc/pmu/per_event_excludes.c5
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore21
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile15
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c114
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c57
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c537
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h232
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c59
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c58
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c66
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c77
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c85
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c67
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c65
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c69
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c64
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c66
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c69
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c74
-rw-r--r--tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c80
l---------tools/testing/selftests/powerpc/primitives/asm/extable.h1
-rw-r--r--tools/testing/selftests/powerpc/primitives/linux/bitops.h (renamed from tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h)0
l---------tools/testing/selftests/powerpc/primitives/linux/wordpart.h1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/ptrace/Makefile41
-rw-r--r--tools/testing/selftests/powerpc/ptrace/child.h4
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c34
-rw-r--r--tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c671
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S52
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c125
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h14
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c159
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S33
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c445
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c59
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c23
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c25
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c3
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c2
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace.h84
-rwxr-xr-xtools/testing/selftests/powerpc/scripts/hmi.sh2
-rw-r--r--tools/testing/selftests/powerpc/security/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/security/Makefile9
-rw-r--r--tools/testing/selftests/powerpc/security/entry_flush.c139
-rw-r--r--tools/testing/selftests/powerpc/security/flush_utils.c84
-rw-r--r--tools/testing/selftests/powerpc/security/flush_utils.h24
-rwxr-xr-xtools/testing/selftests/powerpc/security/mitigation-patching.sh78
-rw-r--r--tools/testing/selftests/powerpc/security/rfi_flush.c68
-rw-r--r--tools/testing/selftests/powerpc/security/spectre_v2.c29
-rw-r--r--tools/testing/selftests/powerpc/security/uaccess_flush.c158
-rw-r--r--tools/testing/selftests/powerpc/signal/.gitignore3
-rw-r--r--tools/testing/selftests/powerpc/signal/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/signal/signal_tm.c1
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_kernel.c132
-rw-r--r--tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c43
-rw-r--r--tools/testing/selftests/powerpc/stringloops/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h1
-rw-r--r--tools/testing/selftests/powerpc/stringloops/linux/export.h (renamed from tools/testing/selftests/powerpc/stringloops/asm/export.h)0
-rw-r--r--tools/testing/selftests/powerpc/stringloops/memcmp.c46
-rw-r--r--tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S23
-rw-r--r--tools/testing/selftests/powerpc/syscalls/.gitignore1
-rw-r--r--tools/testing/selftests/powerpc/syscalls/Makefile6
-rw-r--r--tools/testing/selftests/powerpc/syscalls/rtas_filter.c224
-rw-r--r--tools/testing/selftests/powerpc/tm/Makefile2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-exec.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-fork.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-poison.c13
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-resched-dscr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-signal-stack.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-sigreturn.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall-asm.S37
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-syscall.c38
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tar.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-tmspr.c11
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-trap.c15
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-unavailable.c10
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c2
-rw-r--r--tools/testing/selftests/powerpc/tm/tm-vmxcopy.c1
-rw-r--r--tools/testing/selftests/powerpc/tm/tm.h39
-rw-r--r--tools/testing/selftests/powerpc/utils.c539
l---------tools/testing/selftests/powerpc/vphn/asm/lppaca.h1
l---------tools/testing/selftests/powerpc/vphn/asm/vphn.h1
-rw-r--r--tools/testing/selftests/prctl/.gitignore2
-rw-r--r--tools/testing/selftests/prctl/Makefile4
-rw-r--r--tools/testing/selftests/prctl/config1
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c2
-rw-r--r--tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c2
-rw-r--r--tools/testing/selftests/prctl/set-anon-vma-name-test.c104
-rw-r--r--tools/testing/selftests/prctl/set-process-name.c94
-rw-r--r--tools/testing/selftests/proc/.gitignore3
-rw-r--r--tools/testing/selftests/proc/Makefile4
-rw-r--r--tools/testing/selftests/proc/proc-empty-vm.c544
-rw-r--r--tools/testing/selftests/proc/proc-loadavg-001.c1
-rw-r--r--tools/testing/selftests/proc/proc-pid-vm.c64
-rw-r--r--tools/testing/selftests/proc/proc-self-syscall.c1
-rw-r--r--tools/testing/selftests/proc/proc-subset-pid.c121
-rw-r--r--tools/testing/selftests/proc/proc-tid0.c81
-rw-r--r--tools/testing/selftests/proc/proc-uptime-001.c25
-rw-r--r--tools/testing/selftests/proc/proc-uptime-002.c31
-rw-r--r--tools/testing/selftests/proc/proc-uptime.h28
-rw-r--r--tools/testing/selftests/proc/read.c4
-rw-r--r--tools/testing/selftests/ptp/Makefile9
-rw-r--r--tools/testing/selftests/ptp/ptpchmaskfmt.sh14
-rw-r--r--tools/testing/selftests/ptp/testptp.c208
-rw-r--r--tools/testing/selftests/ptrace/.gitignore2
-rw-r--r--tools/testing/selftests/ptrace/Makefile4
-rw-r--r--tools/testing/selftests/ptrace/get_set_sud.c72
-rw-r--r--tools/testing/selftests/ptrace/peeksiginfo.c14
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config2csv.sh66
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/config_override.sh3
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configcheck.sh65
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/configinit.sh7
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/console-badness.sh18
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/cpus2use.sh1
-rwxr-xr-x[-rw-r--r--]tools/testing/selftests/rcutorture/bin/functions.sh99
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/jitter.sh26
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstart.sh37
-rw-r--r--tools/testing/selftests/rcutorture/bin/jitterstop.sh23
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kcsan-collapse.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-again.sh221
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh105
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-build.sh19
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-check-branches.sh102
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh39
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-find-errors.sh21
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh88
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh4
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh)6
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh (renamed from tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh)24
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh71
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh38
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-recheck.sh69
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh30
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh280
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh90
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh184
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh196
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-transform.sh139
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm.sh358
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/mkinitrd.sh22
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-build.sh8
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/parse-console.sh61
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/srcu_lockdep.sh78
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/torture.sh695
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK086
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK096
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST17
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/CFLIST4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE014
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t)4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u)4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot (renamed from tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS013
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS024
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TASKS035
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TINY021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE0111
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE029
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE011
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot4
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE021
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE047
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE052
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE061
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE072
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE081
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE092
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE102
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh21
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST)0
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon8
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TINY (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TINY)2
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TRACE0116
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE)5
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/TREE54 (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/TREE54)3
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh (renamed from tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh)5
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/CFcommon6
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT20
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/PREEMPT17
-rw-r--r--tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh17
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFLIST2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/CFcommon2
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT13
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot1
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/PREEMPT10
-rw-r--r--tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh30
-rw-r--r--tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt5
-rw-r--r--tools/testing/selftests/rcutorture/doc/initrd.txt36
-rw-r--r--tools/testing/selftests/rcutorture/doc/rcu-test-image.txt41
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h152
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk376
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h17
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h41
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c14
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h28
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c32
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h34
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h221
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h58
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h93
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c79
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h59
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c51
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h103
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile12
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail1
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass0
-rw-r--r--tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c73
-rwxr-xr-xtools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh103
-rw-r--r--tools/testing/selftests/resctrl/.gitignore2
-rw-r--r--tools/testing/selftests/resctrl/Makefile19
-rw-r--r--tools/testing/selftests/resctrl/README43
-rw-r--r--tools/testing/selftests/resctrl/cache.c265
-rw-r--r--tools/testing/selftests/resctrl/cat_test.c454
-rw-r--r--tools/testing/selftests/resctrl/cmt_test.c181
-rw-r--r--tools/testing/selftests/resctrl/config2
-rw-r--r--tools/testing/selftests/resctrl/cqm_test.c176
-rw-r--r--tools/testing/selftests/resctrl/fill_buf.c182
-rw-r--r--tools/testing/selftests/resctrl/mba_test.c106
-rw-r--r--tools/testing/selftests/resctrl/mbm_test.c106
-rw-r--r--tools/testing/selftests/resctrl/resctrl.h180
-rw-r--r--tools/testing/selftests/resctrl/resctrl_tests.c314
-rw-r--r--tools/testing/selftests/resctrl/resctrl_val.c345
-rw-r--r--tools/testing/selftests/resctrl/resctrlfs.c584
-rw-r--r--tools/testing/selftests/resctrl/settings3
-rw-r--r--tools/testing/selftests/riscv/Makefile58
-rw-r--r--tools/testing/selftests/riscv/hwprobe/.gitignore1
-rw-r--r--tools/testing/selftests/riscv/hwprobe/Makefile18
-rw-r--r--tools/testing/selftests/riscv/hwprobe/cbo.c226
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.c66
-rw-r--r--tools/testing/selftests/riscv/hwprobe/hwprobe.h15
-rw-r--r--tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S12
-rw-r--r--tools/testing/selftests/riscv/hwprobe/which-cpus.c154
-rw-r--r--tools/testing/selftests/riscv/mm/.gitignore2
-rw-r--r--tools/testing/selftests/riscv/mm/Makefile15
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_bottomup.c35
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_default.c35
-rw-r--r--tools/testing/selftests/riscv/mm/mmap_test.h67
-rwxr-xr-xtools/testing/selftests/riscv/mm/run_mmap.sh12
-rw-r--r--tools/testing/selftests/riscv/vector/.gitignore3
-rw-r--r--tools/testing/selftests/riscv/vector/Makefile19
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval_nolibc.c68
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c112
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c181
-rw-r--r--tools/testing/selftests/rlimits/.gitignore2
-rw-r--r--tools/testing/selftests/rlimits/Makefile6
-rw-r--r--tools/testing/selftests/rlimits/config1
-rw-r--r--tools/testing/selftests/rlimits/rlimits-per-userns.c161
-rw-r--r--tools/testing/selftests/rseq/.gitignore4
-rw-r--r--tools/testing/selftests/rseq/Makefile28
-rw-r--r--tools/testing/selftests/rseq/basic_percpu_ops_test.c63
-rw-r--r--tools/testing/selftests/rseq/basic_test.c4
-rw-r--r--tools/testing/selftests/rseq/compiler.h62
-rw-r--r--tools/testing/selftests/rseq/param_test.c407
-rw-r--r--tools/testing/selftests/rseq/rseq-abi.h173
-rw-r--r--tools/testing/selftests/rseq/rseq-arm-bits.h505
-rw-r--r--tools/testing/selftests/rseq/rseq-arm.h703
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64-bits.h392
-rw-r--r--tools/testing/selftests/rseq/rseq-arm64.h545
-rw-r--r--tools/testing/selftests/rseq/rseq-bits-reset.h11
-rw-r--r--tools/testing/selftests/rseq/rseq-bits-template.h41
-rw-r--r--tools/testing/selftests/rseq/rseq-generic-thread-pointer.h25
-rw-r--r--tools/testing/selftests/rseq/rseq-mips-bits.h462
-rw-r--r--tools/testing/selftests/rseq/rseq-mips.h687
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc-bits.h454
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h30
-rw-r--r--tools/testing/selftests/rseq/rseq-ppc.h637
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv-bits.h410
-rw-r--r--tools/testing/selftests/rseq/rseq-riscv.h198
-rw-r--r--tools/testing/selftests/rseq/rseq-s390-bits.h474
-rw-r--r--tools/testing/selftests/rseq/rseq-s390.h470
-rw-r--r--tools/testing/selftests/rseq/rseq-skip.h65
-rw-r--r--tools/testing/selftests/rseq/rseq-thread-pointer.h19
-rw-r--r--tools/testing/selftests/rseq/rseq-x86-bits.h993
-rw-r--r--tools/testing/selftests/rseq/rseq-x86-thread-pointer.h40
-rw-r--r--tools/testing/selftests/rseq/rseq-x86.h1076
-rw-r--r--tools/testing/selftests/rseq/rseq.c239
-rw-r--r--tools/testing/selftests/rseq/rseq.h239
-rwxr-xr-xtools/testing/selftests/rseq/run_param_test.sh7
-rw-r--r--tools/testing/selftests/rtc/rtctest.c99
-rw-r--r--tools/testing/selftests/rtc/settings2
-rwxr-xr-xtools/testing/selftests/run_kselftest.sh106
-rw-r--r--tools/testing/selftests/rust/Makefile4
-rw-r--r--tools/testing/selftests/rust/config5
-rwxr-xr-xtools/testing/selftests/rust/test_probe_samples.sh41
-rw-r--r--tools/testing/selftests/safesetid/Makefile2
-rw-r--r--tools/testing/selftests/safesetid/safesetid-test.c295
-rw-r--r--tools/testing/selftests/sched/.gitignore1
-rw-r--r--tools/testing/selftests/sched/Makefile14
-rw-r--r--tools/testing/selftests/sched/config1
-rw-r--r--tools/testing/selftests/sched/cs_prctl_test.c359
-rw-r--r--tools/testing/selftests/seccomp/Makefile3
-rw-r--r--tools/testing/selftests/seccomp/config2
-rw-r--r--tools/testing/selftests/seccomp/seccomp_benchmark.c261
-rw-r--r--tools/testing/selftests/seccomp/seccomp_bpf.c1774
-rw-r--r--tools/testing/selftests/seccomp/settings1
-rw-r--r--tools/testing/selftests/sgx/.gitignore2
-rw-r--r--tools/testing/selftests/sgx/Makefile60
-rw-r--r--tools/testing/selftests/sgx/call.S44
-rw-r--r--tools/testing/selftests/sgx/defines.h81
-rw-r--r--tools/testing/selftests/sgx/load.c370
-rw-r--r--tools/testing/selftests/sgx/main.c1993
-rw-r--r--tools/testing/selftests/sgx/main.h46
-rw-r--r--tools/testing/selftests/sgx/sign_key.S12
-rw-r--r--tools/testing/selftests/sgx/sign_key.pem39
-rw-r--r--tools/testing/selftests/sgx/sigstruct.c391
-rw-r--r--tools/testing/selftests/sgx/test_encl.c162
-rw-r--r--tools/testing/selftests/sgx/test_encl.lds41
-rw-r--r--tools/testing/selftests/sgx/test_encl_bootstrap.S92
-rw-r--r--tools/testing/selftests/sigaltstack/current_stack_pointer.h23
-rw-r--r--tools/testing/selftests/sigaltstack/sas.c31
-rw-r--r--tools/testing/selftests/sparc64/drivers/adi-test.c4
-rw-r--r--tools/testing/selftests/splice/.gitignore1
-rw-r--r--tools/testing/selftests/splice/Makefile4
-rw-r--r--tools/testing/selftests/splice/config1
-rw-r--r--tools/testing/selftests/splice/settings1
-rwxr-xr-xtools/testing/selftests/splice/short_splice_read.sh133
-rw-r--r--tools/testing/selftests/splice/splice_read.c57
-rwxr-xr-xtools/testing/selftests/static_keys/test_static_keys.sh8
-rw-r--r--tools/testing/selftests/sync/Makefile2
-rw-r--r--tools/testing/selftests/sync/config2
-rw-r--r--tools/testing/selftests/sync/sync_test.c2
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/.gitignore3
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/Makefile9
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/config1
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c202
-rw-r--r--tools/testing/selftests/syscall_user_dispatch/sud_test.c312
-rwxr-xr-xtools/testing/selftests/sysctl/sysctl.sh270
-rw-r--r--tools/testing/selftests/tc-testing/.gitignore1
-rw-r--r--tools/testing/selftests/tc-testing/Makefile6
-rw-r--r--tools/testing/selftests/tc-testing/README67
-rw-r--r--tools/testing/selftests/tc-testing/TdcPlugin.py4
-rw-r--r--tools/testing/selftests/tc-testing/TdcResults.py3
-rw-r--r--tools/testing/selftests/tc-testing/action-ebpfbin0 -> 856 bytes
-rw-r--r--tools/testing/selftests/tc-testing/action.c (renamed from tools/testing/selftests/tc-testing/bpf/action.c)0
-rw-r--r--tools/testing/selftests/tc-testing/bpf/Makefile30
-rw-r--r--tools/testing/selftests/tc-testing/config53
-rw-r--r--tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt2
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py67
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py242
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py4
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py42
-rw-r--r--tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py5
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh16
-rw-r--r--tools/testing/selftests/tc-testing/settings1
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json14
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json95
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/csum.json69
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ct.json99
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json352
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gact.json152
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/gate.json351
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/ife.json194
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json475
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json159
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/nat.json131
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json198
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/police.json174
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/sample.json137
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/simple.json110
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json90
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json78
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json192
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json136
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/basic.json47
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json175
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json1236
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flow.json623
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flower.json (renamed from tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json)98
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/fw.json315
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json222
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/route.json206
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/tests.json129
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/u32.json128
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/actions.json416
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/filter.json26
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json445
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json214
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json172
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json193
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json65
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json107
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json284
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json98
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json381
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json298
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json13
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json150
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json173
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json192
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json261
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json50
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json182
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json114
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json114
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json340
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json109
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json172
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json99
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json280
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json51
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json255
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json212
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json87
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json262
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json193
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json85
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.py396
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh66
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_batch.py2
-rw-r--r--tools/testing/selftests/tc-testing/tdc_config.py5
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc_multibatch.py2
-rw-r--r--tools/testing/selftests/tdx/.gitignore1
-rw-r--r--tools/testing/selftests/tdx/Makefile7
-rw-r--r--tools/testing/selftests/tdx/config1
-rw-r--r--tools/testing/selftests/tdx/tdx_guest_test.c163
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c108
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/.gitignore1
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/Makefile12
-rw-r--r--tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c157
-rw-r--r--tools/testing/selftests/timens/.gitignore2
-rw-r--r--tools/testing/selftests/timens/Makefile2
-rw-r--r--tools/testing/selftests/timens/futex.c110
-rw-r--r--tools/testing/selftests/timens/gettime_perf.c8
-rw-r--r--tools/testing/selftests/timens/procfs.c60
-rw-r--r--tools/testing/selftests/timens/timens.c2
-rw-r--r--tools/testing/selftests/timens/vfork_exec.c139
-rw-r--r--tools/testing/selftests/timers/Makefile1
-rw-r--r--tools/testing/selftests/timers/adjtick.c2
-rw-r--r--tools/testing/selftests/timers/alarmtimer-suspend.c4
-rw-r--r--tools/testing/selftests/timers/change_skew.c2
-rw-r--r--tools/testing/selftests/timers/clocksource-switch.c75
-rw-r--r--tools/testing/selftests/timers/inconsistency-check.c34
-rw-r--r--tools/testing/selftests/timers/leap-a-day.c2
-rw-r--r--tools/testing/selftests/timers/leapcrash.c4
-rw-r--r--tools/testing/selftests/timers/nanosleep.c18
-rw-r--r--tools/testing/selftests/timers/nsleep-lat.c26
-rw-r--r--tools/testing/selftests/timers/posix_timers.c136
-rw-r--r--tools/testing/selftests/timers/raw_skew.c5
-rw-r--r--tools/testing/selftests/timers/rtcpie.c10
-rw-r--r--tools/testing/selftests/timers/settings1
-rw-r--r--tools/testing/selftests/timers/skew_consistency.c2
-rw-r--r--tools/testing/selftests/timers/threadtest.c2
-rw-r--r--tools/testing/selftests/timers/valid-adjtimex.c2
-rw-r--r--tools/testing/selftests/tpm2/Makefile2
-rw-r--r--tools/testing/selftests/tpm2/settings1
-rwxr-xr-xtools/testing/selftests/tpm2/test_async.sh10
-rwxr-xr-xtools/testing/selftests/tpm2/test_smoke.sh12
-rwxr-xr-xtools/testing/selftests/tpm2/test_space.sh6
-rw-r--r--tools/testing/selftests/tpm2/tpm2.py93
-rw-r--r--tools/testing/selftests/tpm2/tpm2_tests.py80
-rw-r--r--tools/testing/selftests/tty/.gitignore2
-rw-r--r--tools/testing/selftests/tty/Makefile5
-rw-r--r--tools/testing/selftests/tty/tty_tstamp_update.c88
-rw-r--r--tools/testing/selftests/uevent/.gitignore1
-rw-r--r--tools/testing/selftests/uevent/uevent_filtering.c9
-rw-r--r--tools/testing/selftests/user_events/.gitignore4
-rw-r--r--tools/testing/selftests/user_events/Makefile9
-rw-r--r--tools/testing/selftests/user_events/abi_test.c423
-rw-r--r--tools/testing/selftests/user_events/config1
-rw-r--r--tools/testing/selftests/user_events/dyn_test.c294
-rw-r--r--tools/testing/selftests/user_events/ftrace_test.c589
-rw-r--r--tools/testing/selftests/user_events/perf_test.c254
-rw-r--r--tools/testing/selftests/user_events/settings1
-rw-r--r--tools/testing/selftests/user_events/user_events_selftests.h114
-rw-r--r--tools/testing/selftests/vDSO/.gitignore3
-rw-r--r--tools/testing/selftests/vDSO/Makefile16
-rw-r--r--tools/testing/selftests/vDSO/vdso_config.h90
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_abi.c239
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_clock_getres.c124
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_correctness.c (renamed from tools/testing/selftests/x86/test_vdso.c)118
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_getcpu.c4
-rw-r--r--tools/testing/selftests/vDSO/vdso_test_gettimeofday.c3
-rw-r--r--tools/testing/selftests/vm/.gitignore22
-rw-r--r--tools/testing/selftests/vm/Makefile115
-rw-r--r--tools/testing/selftests/vm/gup_benchmark.c137
-rw-r--r--tools/testing/selftests/vm/khugepaged.c1035
-rw-r--r--tools/testing/selftests/vm/on-fault-limit.c48
-rwxr-xr-xtools/testing/selftests/vm/run_vmtests326
-rw-r--r--tools/testing/selftests/vm/transhuge-stress.c144
-rw-r--r--tools/testing/selftests/vm/userfaultfd.c1479
-rw-r--r--tools/testing/selftests/watchdog/watchdog-test.c106
-rwxr-xr-xtools/testing/selftests/wireguard/netns.sh96
-rw-r--r--tools/testing/selftests/wireguard/qemu/.gitignore1
-rw-r--r--tools/testing/selftests/wireguard/qemu/Makefile254
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config5
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/arm.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/armeb.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/i686.config7
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/m68k.config10
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mips64el.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/mipsel.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config2
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv32.config14
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/riscv64.config13
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/s390x.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/um.config3
-rw-r--r--tools/testing/selftests/wireguard/qemu/arch/x86_64.config6
-rw-r--r--tools/testing/selftests/wireguard/qemu/debug.config8
-rw-r--r--tools/testing/selftests/wireguard/qemu/init.c46
-rw-r--r--tools/testing/selftests/wireguard/qemu/kernel.config8
-rw-r--r--tools/testing/selftests/x86/Makefile27
-rw-r--r--tools/testing/selftests/x86/amx.c955
-rwxr-xr-xtools/testing/selftests/x86/check_cc.sh2
-rw-r--r--tools/testing/selftests/x86/corrupt_xstate_header.c102
-rw-r--r--tools/testing/selftests/x86/fsgsbase.c108
-rw-r--r--tools/testing/selftests/x86/fsgsbase_restore.c245
-rw-r--r--tools/testing/selftests/x86/helpers.h25
-rw-r--r--tools/testing/selftests/x86/iopl.c78
-rw-r--r--tools/testing/selftests/x86/lam.c1241
-rw-r--r--tools/testing/selftests/x86/ldt_gdt.c2
-rw-r--r--tools/testing/selftests/x86/mov_ss_trap.c4
-rw-r--r--tools/testing/selftests/x86/nx_stack.c212
-rw-r--r--tools/testing/selftests/x86/raw_syscall_helper_32.S2
-rw-r--r--tools/testing/selftests/x86/sigaltstack.c128
-rw-r--r--tools/testing/selftests/x86/sigreturn.c7
-rw-r--r--tools/testing/selftests/x86/single_step_syscall.c21
-rw-r--r--tools/testing/selftests/x86/syscall_arg_fault.c54
-rw-r--r--tools/testing/selftests/x86/syscall_nt.c47
-rw-r--r--tools/testing/selftests/x86/syscall_numbering.c491
-rw-r--r--tools/testing/selftests/x86/test_shadow_stack.c884
-rw-r--r--tools/testing/selftests/x86/test_vsyscall.c46
-rw-r--r--tools/testing/selftests/x86/thunks.S2
-rw-r--r--tools/testing/selftests/x86/thunks_32.S2
-rw-r--r--tools/testing/selftests/x86/unwind_vdso.c23
-rwxr-xr-xtools/testing/selftests/zram/zram.sh15
-rwxr-xr-xtools/testing/selftests/zram/zram01.sh33
-rwxr-xr-xtools/testing/selftests/zram/zram02.sh1
-rwxr-xr-xtools/testing/selftests/zram/zram_lib.sh134
3248 files changed, 446841 insertions, 50508 deletions
diff --git a/tools/testing/selftests/.gitignore b/tools/testing/selftests/.gitignore
index 055a5019b13c..cb24124ac5b9 100644
--- a/tools/testing/selftests/.gitignore
+++ b/tools/testing/selftests/.gitignore
@@ -3,6 +3,7 @@ gpiogpio-event-mon
gpiogpio-hammer
gpioinclude/
gpiolsgpio
+kselftest_install/
tpm2/SpaceTest.log
# Python bytecode and cache
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index 1195bd85af38..e1504833654d 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -1,67 +1,112 @@
# SPDX-License-Identifier: GPL-2.0
-TARGETS = android
+TARGETS += alsa
+TARGETS += amd-pstate
TARGETS += arm64
TARGETS += bpf
TARGETS += breakpoints
+TARGETS += cachestat
TARGETS += capabilities
TARGETS += cgroup
TARGETS += clone3
+TARGETS += connector
+TARGETS += core
TARGETS += cpufreq
TARGETS += cpu-hotplug
+TARGETS += damon
+TARGETS += devices
+TARGETS += dmabuf-heaps
TARGETS += drivers/dma-buf
+TARGETS += drivers/s390x/uvdevice
+TARGETS += drivers/net/bonding
+TARGETS += drivers/net/team
+TARGETS += dt
TARGETS += efivarfs
TARGETS += exec
+TARGETS += fchmodat2
TARGETS += filesystems
TARGETS += filesystems/binderfs
TARGETS += filesystems/epoll
+TARGETS += filesystems/fat
+TARGETS += filesystems/overlayfs
+TARGETS += filesystems/statmount
TARGETS += firmware
+TARGETS += fpu
TARGETS += ftrace
TARGETS += futex
TARGETS += gpio
+TARGETS += hid
TARGETS += intel_pstate
+TARGETS += iommu
TARGETS += ipc
TARGETS += ir
TARGETS += kcmp
TARGETS += kexec
TARGETS += kvm
+TARGETS += landlock
TARGETS += lib
TARGETS += livepatch
TARGETS += lkdtm
+TARGETS += lsm
TARGETS += membarrier
TARGETS += memfd
TARGETS += memory-hotplug
+TARGETS += mincore
TARGETS += mount
+TARGETS += mount_setattr
+TARGETS += move_mount_set_group
TARGETS += mqueue
+TARGETS += nci
TARGETS += net
+TARGETS += net/af_unix
TARGETS += net/forwarding
+TARGETS += net/hsr
TARGETS += net/mptcp
+TARGETS += net/openvswitch
+TARGETS += net/tcp_ao
TARGETS += netfilter
TARGETS += nsfs
+TARGETS += perf_events
TARGETS += pidfd
TARGETS += pid_namespace
+TARGETS += power_supply
TARGETS += powerpc
+TARGETS += prctl
TARGETS += proc
TARGETS += pstore
TARGETS += ptrace
TARGETS += openat2
+TARGETS += resctrl
+TARGETS += riscv
+TARGETS += rlimits
TARGETS += rseq
TARGETS += rtc
+TARGETS += rust
TARGETS += seccomp
+TARGETS += sgx
TARGETS += sigaltstack
TARGETS += size
TARGETS += sparc64
TARGETS += splice
TARGETS += static_keys
TARGETS += sync
+TARGETS += syscall_user_dispatch
TARGETS += sysctl
+TARGETS += tc-testing
+TARGETS += tdx
+TARGETS += thermal/intel/power_floor
+TARGETS += thermal/intel/workload_hint
TARGETS += timens
ifneq (1, $(quicktest))
TARGETS += timers
endif
TARGETS += tmpfs
TARGETS += tpm2
+TARGETS += tty
+TARGETS += uevent
TARGETS += user
-TARGETS += vm
+TARGETS += user_events
+TARGETS += vDSO
+TARGETS += mm
TARGETS += x86
TARGETS += zram
#Please keep the TARGETS list alphabetically sorted
@@ -71,8 +116,10 @@ TARGETS += zram
TARGETS_HOTPLUG = cpu-hotplug
TARGETS_HOTPLUG += memory-hotplug
-# User can optionally provide a TARGETS skiplist.
-SKIP_TARGETS ?=
+# User can optionally provide a TARGETS skiplist. By default we skip
+# BPF since it has cutting edge build time dependencies which require
+# more effort to install.
+SKIP_TARGETS ?= bpf
ifneq ($(SKIP_TARGETS),)
TMP := $(filter-out $(SKIP_TARGETS), $(TARGETS))
override TARGETS := $(TMP)
@@ -84,10 +131,10 @@ endif
# of the targets gets built.
FORCE_TARGETS ?=
-# Clear LDFLAGS and MAKEFLAGS if called from main
-# Makefile to avoid test build failures when test
-# Makefile doesn't have explicit build rules.
-ifeq (1,$(MAKELEVEL))
+# Clear LDFLAGS and MAKEFLAGS when implicit rules are missing. This provides
+# implicit rules to sub-test Makefiles which avoids build failures in test
+# Makefile that don't have explicit build rules.
+ifeq (,$(LINK.c))
override LDFLAGS =
override MAKEFLAGS =
endif
@@ -99,66 +146,46 @@ ifdef building_out_of_srctree
override LDFLAGS =
endif
-ifneq ($(O),)
- BUILD := $(O)/kselftest
+top_srcdir ?= ../../..
+
+ifeq ("$(origin O)", "command line")
+ KBUILD_OUTPUT := $(O)
+endif
+
+ifneq ($(KBUILD_OUTPUT),)
+ # Make's built-in functions such as $(abspath ...), $(realpath ...) cannot
+ # expand a shell special character '~'. We use a somewhat tedious way here.
+ abs_objtree := $(shell cd $(top_srcdir) && mkdir -p $(KBUILD_OUTPUT) && cd $(KBUILD_OUTPUT) && pwd)
+ $(if $(abs_objtree),, \
+ $(error failed to create output directory "$(KBUILD_OUTPUT)"))
+ # $(realpath ...) resolves symlinks
+ abs_objtree := $(realpath $(abs_objtree))
+ BUILD := $(abs_objtree)/kselftest
+ KHDR_INCLUDES := -isystem ${abs_objtree}/usr/include
else
- ifneq ($(KBUILD_OUTPUT),)
- BUILD := $(KBUILD_OUTPUT)/kselftest
- else
- BUILD := $(shell pwd)
- DEFAULT_INSTALL_HDR_PATH := 1
- endif
+ BUILD := $(CURDIR)
+ abs_srctree := $(shell cd $(top_srcdir) && pwd)
+ KHDR_INCLUDES := -isystem ${abs_srctree}/usr/include
+ DEFAULT_INSTALL_HDR_PATH := 1
endif
# Prepare for headers install
-top_srcdir ?= ../../..
include $(top_srcdir)/scripts/subarch.include
ARCH ?= $(SUBARCH)
-export KSFT_KHDR_INSTALL_DONE := 1
export BUILD
-
-# build and run gpio when output directory is the src dir.
-# gpio has dependency on tools/gpio and builds tools/gpio
-# objects in the src directory in all cases making the src
-# repo dirty even when objects are relocated.
-ifneq (1,$(DEFAULT_INSTALL_HDR_PATH))
- TMP := $(filter-out gpio, $(TARGETS))
- TARGETS := $(TMP)
-endif
+export KHDR_INCLUDES
# set default goal to all, so make without a target runs all, even when
# all isn't the first target in the file.
.DEFAULT_GOAL := all
-# Install headers here once for all tests. KSFT_KHDR_INSTALL_DONE
-# is used to avoid running headers_install from lib.mk.
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-#
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Local build cases: "make kselftest", "make -C" - headers are installed
-# in the default INSTALL_HDR_PATH usr/include.
-khdr:
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- $(MAKE) --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- $(MAKE) --no-builtin-rules INSTALL_HDR_PATH=$$BUILD/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-
-all: khdr
+all:
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
mkdir $$BUILD_TARGET -p; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET \
+ O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
@@ -166,7 +193,10 @@ all: khdr
run_tests: all
@for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(BUILD) \
+ O=$(abs_objtree); \
done;
hotplug:
@@ -202,6 +232,7 @@ KSFT_INSTALL_PATH := $(abspath $(KSFT_INSTALL_PATH))
# Avoid changing the rest of the logic here and lib.mk.
INSTALL_PATH := $(KSFT_INSTALL_PATH)
ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh
+TEST_LIST := $(INSTALL_PATH)/kselftest-list.txt
install: all
ifdef INSTALL_PATH
@@ -210,41 +241,33 @@ ifdef INSTALL_PATH
install -m 744 kselftest/module.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/runner.sh $(INSTALL_PATH)/kselftest/
install -m 744 kselftest/prefix.pl $(INSTALL_PATH)/kselftest/
+ install -m 744 kselftest/ktap_helpers.sh $(INSTALL_PATH)/kselftest/
+ install -m 744 run_kselftest.sh $(INSTALL_PATH)/
+ rm -f $(TEST_LIST)
@ret=1; \
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \
+ INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \
+ SRC_PATH=$(shell readlink -e $$(pwd)) \
+ OBJ_PATH=$(INSTALL_PATH) \
+ O=$(abs_objtree) \
$(if $(FORCE_TARGETS),|| exit); \
ret=$$((ret * $$?)); \
done; exit $$ret;
- @# Ask all targets to emit their test scripts
- echo "#!/bin/sh" > $(ALL_SCRIPT)
- echo "BASE_DIR=\$$(realpath \$$(dirname \$$0))" >> $(ALL_SCRIPT)
- echo "cd \$$BASE_DIR" >> $(ALL_SCRIPT)
- echo ". ./kselftest/runner.sh" >> $(ALL_SCRIPT)
- echo "ROOT=\$$PWD" >> $(ALL_SCRIPT)
- echo "if [ \"\$$1\" = \"--summary\" ]; then" >> $(ALL_SCRIPT)
- echo " logfile=\$$BASE_DIR/output.log" >> $(ALL_SCRIPT)
- echo " cat /dev/null > \$$logfile" >> $(ALL_SCRIPT)
- echo "fi" >> $(ALL_SCRIPT)
- @# While building run_kselftest.sh skip also non-existent TARGET dirs:
+ @# Ask all targets to emit their test scripts
+ @# While building kselftest-list.text skip also non-existent TARGET dirs:
@# they could be the result of a build failure and should NOT be
@# included in the generated runlist.
for TARGET in $(TARGETS); do \
BUILD_TARGET=$$BUILD/$$TARGET; \
- [ ! -d $(INSTALL_PATH)/$$TARGET ] && echo "Skipping non-existent dir: $$TARGET" && continue; \
- echo "[ -w /dev/kmsg ] && echo \"kselftest: Running tests in $$TARGET\" >> /dev/kmsg" >> $(ALL_SCRIPT); \
- echo "cd $$TARGET" >> $(ALL_SCRIPT); \
- echo -n "run_many" >> $(ALL_SCRIPT); \
- echo -n "Emit Tests for $$TARGET\n"; \
- $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET -C $$TARGET emit_tests >> $(ALL_SCRIPT); \
- echo "" >> $(ALL_SCRIPT); \
- echo "cd \$$ROOT" >> $(ALL_SCRIPT); \
+ [ ! -d $(INSTALL_PATH)/$$TARGET ] && printf "Skipping non-existent dir: $$TARGET\n" && continue; \
+ printf "Emit Tests for $$TARGET\n"; \
+ $(MAKE) -s --no-print-directory OUTPUT=$$BUILD_TARGET COLLECTION=$$TARGET \
+ -C $$TARGET emit_tests >> $(TEST_LIST); \
done;
-
- chmod u+x $(ALL_SCRIPT)
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -262,4 +285,4 @@ clean:
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean;\
done;
-.PHONY: khdr all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
+.PHONY: all run_tests hotplug run_hotplug clean_hotplug run_pstore_crash install clean gen_tar
diff --git a/tools/testing/selftests/alsa/.gitignore b/tools/testing/selftests/alsa/.gitignore
new file mode 100644
index 000000000000..12dc3fcd3456
--- /dev/null
+++ b/tools/testing/selftests/alsa/.gitignore
@@ -0,0 +1,3 @@
+mixer-test
+pcm-test
+test-pcmtest-driver
diff --git a/tools/testing/selftests/alsa/Makefile b/tools/testing/selftests/alsa/Makefile
new file mode 100644
index 000000000000..5af9ba8a4645
--- /dev/null
+++ b/tools/testing/selftests/alsa/Makefile
@@ -0,0 +1,27 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+
+CFLAGS += $(shell pkg-config --cflags alsa)
+LDLIBS += $(shell pkg-config --libs alsa)
+ifeq ($(LDLIBS),)
+LDLIBS += -lasound
+endif
+CFLAGS += -L$(OUTPUT) -Wl,-rpath=./
+
+LDLIBS+=-lpthread
+
+OVERRIDE_TARGETS = 1
+
+TEST_GEN_PROGS := mixer-test pcm-test test-pcmtest-driver
+
+TEST_GEN_PROGS_EXTENDED := libatest.so
+
+TEST_FILES := conf.d pcm-test.conf
+
+include ../lib.mk
+
+$(OUTPUT)/libatest.so: conf.c alsa-local.h
+ $(CC) $(CFLAGS) -shared -fPIC $< $(LDLIBS) -o $@
+
+$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) alsa-local.h
+ $(CC) $(CFLAGS) $< $(LDLIBS) -latest -o $@
diff --git a/tools/testing/selftests/alsa/alsa-local.h b/tools/testing/selftests/alsa/alsa-local.h
new file mode 100644
index 000000000000..29143ef52101
--- /dev/null
+++ b/tools/testing/selftests/alsa/alsa-local.h
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest configuration helpers for the hw specific configuration
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+#ifndef __ALSA_LOCAL_H
+#define __ALSA_LOCAL_H
+
+#include <alsa/asoundlib.h>
+
+snd_config_t *get_alsalib_config(void);
+
+snd_config_t *conf_load_from_file(const char *filename);
+void conf_load(void);
+void conf_free(void);
+snd_config_t *conf_by_card(int card);
+snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2);
+int conf_get_count(snd_config_t *root, const char *key1, const char *key2);
+const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def);
+long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def);
+int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def);
+void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
+ const char **array, int array_size, const char *def);
+
+struct card_cfg_data {
+ int card;
+ snd_config_t *config;
+ const char *filename;
+ const char *config_id;
+ struct card_cfg_data *next;
+};
+
+extern struct card_cfg_data *conf_cards;
+
+#endif /* __ALSA_LOCAL_H */
diff --git a/tools/testing/selftests/alsa/conf.c b/tools/testing/selftests/alsa/conf.c
new file mode 100644
index 000000000000..89e3656a042d
--- /dev/null
+++ b/tools/testing/selftests/alsa/conf.c
@@ -0,0 +1,475 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest configuration helpers for the hw specific configuration
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <dirent.h>
+#include <regex.h>
+#include <sys/stat.h>
+
+#include "../kselftest.h"
+#include "alsa-local.h"
+
+#define SYSFS_ROOT "/sys"
+
+struct card_cfg_data *conf_cards;
+
+static const char *alsa_config =
+"ctl.hw {\n"
+" @args [ CARD ]\n"
+" @args.CARD.type string\n"
+" type hw\n"
+" card $CARD\n"
+"}\n"
+"pcm.hw {\n"
+" @args [ CARD DEV SUBDEV ]\n"
+" @args.CARD.type string\n"
+" @args.DEV.type integer\n"
+" @args.SUBDEV.type integer\n"
+" type hw\n"
+" card $CARD\n"
+" device $DEV\n"
+" subdevice $SUBDEV\n"
+"}\n"
+;
+
+#ifdef SND_LIB_VER
+#if SND_LIB_VERSION >= SND_LIB_VER(1, 2, 6)
+#define LIB_HAS_LOAD_STRING
+#endif
+#endif
+
+#ifndef LIB_HAS_LOAD_STRING
+static int snd_config_load_string(snd_config_t **config, const char *s,
+ size_t size)
+{
+ snd_input_t *input;
+ snd_config_t *dst;
+ int err;
+
+ assert(config && s);
+ if (size == 0)
+ size = strlen(s);
+ err = snd_input_buffer_open(&input, s, size);
+ if (err < 0)
+ return err;
+ err = snd_config_top(&dst);
+ if (err < 0) {
+ snd_input_close(input);
+ return err;
+ }
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0) {
+ snd_config_delete(dst);
+ return err;
+ }
+ *config = dst;
+ return 0;
+}
+#endif
+
+snd_config_t *get_alsalib_config(void)
+{
+ snd_config_t *config;
+ int err;
+
+ err = snd_config_load_string(&config, alsa_config, strlen(alsa_config));
+ if (err < 0) {
+ ksft_print_msg("Unable to parse custom alsa-lib configuration: %s\n",
+ snd_strerror(err));
+ ksft_exit_fail();
+ }
+ return config;
+}
+
+static struct card_cfg_data *conf_data_by_card(int card, bool msg)
+{
+ struct card_cfg_data *conf;
+
+ for (conf = conf_cards; conf; conf = conf->next) {
+ if (conf->card == card) {
+ if (msg)
+ ksft_print_msg("using hw card config %s for card %d\n",
+ conf->filename, card);
+ return conf;
+ }
+ }
+ return NULL;
+}
+
+static int dump_config_tree(snd_config_t *top)
+{
+ snd_output_t *out;
+ int err;
+
+ err = snd_output_stdio_attach(&out, stdout, 0);
+ if (err < 0)
+ ksft_exit_fail_msg("stdout attach\n");
+ if (snd_config_save(top, out))
+ ksft_exit_fail_msg("config save\n");
+ snd_output_close(out);
+}
+
+snd_config_t *conf_load_from_file(const char *filename)
+{
+ snd_config_t *dst;
+ snd_input_t *input;
+ int err;
+
+ err = snd_input_stdio_open(&input, filename, "r");
+ if (err < 0)
+ ksft_exit_fail_msg("Unable to parse filename %s\n", filename);
+ err = snd_config_top(&dst);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+ err = snd_config_load(dst, input);
+ snd_input_close(input);
+ if (err < 0)
+ ksft_exit_fail_msg("Unable to parse filename %s\n", filename);
+ return dst;
+}
+
+static char *sysfs_get(const char *sysfs_root, const char *id)
+{
+ char path[PATH_MAX], link[PATH_MAX + 1];
+ struct stat sb;
+ ssize_t len;
+ char *e;
+ int fd;
+
+ if (id[0] == '/')
+ id++;
+ snprintf(path, sizeof(path), "%s/%s", sysfs_root, id);
+ if (lstat(path, &sb) != 0)
+ return NULL;
+ if (S_ISLNK(sb.st_mode)) {
+ len = readlink(path, link, sizeof(link) - 1);
+ if (len <= 0) {
+ ksft_exit_fail_msg("sysfs: cannot read link '%s': %s\n",
+ path, strerror(errno));
+ return NULL;
+ }
+ link[len] = '\0';
+ e = strrchr(link, '/');
+ if (e)
+ return strdup(e + 1);
+ return NULL;
+ }
+ if (S_ISDIR(sb.st_mode))
+ return NULL;
+ if ((sb.st_mode & S_IRUSR) == 0)
+ return NULL;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ return NULL;
+ ksft_exit_fail_msg("sysfs: open failed for '%s': %s\n",
+ path, strerror(errno));
+ }
+ len = read(fd, path, sizeof(path)-1);
+ close(fd);
+ if (len < 0)
+ ksft_exit_fail_msg("sysfs: unable to read value '%s': %s\n",
+ path, strerror(errno));
+ while (len > 0 && path[len-1] == '\n')
+ len--;
+ path[len] = '\0';
+ e = strdup(path);
+ if (e == NULL)
+ ksft_exit_fail_msg("Out of memory\n");
+ return e;
+}
+
+static bool sysfs_match(const char *sysfs_root, snd_config_t *config)
+{
+ snd_config_t *node, *path_config, *regex_config;
+ snd_config_iterator_t i, next;
+ const char *path_string, *regex_string, *v;
+ regex_t re;
+ regmatch_t match[1];
+ int iter = 0, ret;
+
+ snd_config_for_each(i, next, config) {
+ node = snd_config_iterator_entry(i);
+ if (snd_config_search(node, "path", &path_config))
+ ksft_exit_fail_msg("Missing path field in the sysfs block\n");
+ if (snd_config_search(node, "regex", &regex_config))
+ ksft_exit_fail_msg("Missing regex field in the sysfs block\n");
+ if (snd_config_get_string(path_config, &path_string))
+ ksft_exit_fail_msg("Path field in the sysfs block is not a string\n");
+ if (snd_config_get_string(regex_config, &regex_string))
+ ksft_exit_fail_msg("Regex field in the sysfs block is not a string\n");
+ iter++;
+ v = sysfs_get(sysfs_root, path_string);
+ if (!v)
+ return false;
+ if (regcomp(&re, regex_string, REG_EXTENDED))
+ ksft_exit_fail_msg("Wrong regex '%s'\n", regex_string);
+ ret = regexec(&re, v, 1, match, 0);
+ regfree(&re);
+ if (ret)
+ return false;
+ }
+ return iter > 0;
+}
+
+static void assign_card_config(int card, const char *sysfs_card_root)
+{
+ struct card_cfg_data *data;
+ snd_config_t *sysfs_card_config;
+
+ for (data = conf_cards; data; data = data->next) {
+ snd_config_search(data->config, "sysfs", &sysfs_card_config);
+ if (!sysfs_match(sysfs_card_root, sysfs_card_config))
+ continue;
+
+ data->card = card;
+ break;
+ }
+}
+
+static void assign_card_configs(void)
+{
+ char fn[128];
+ int card;
+
+ for (card = 0; card < 32; card++) {
+ snprintf(fn, sizeof(fn), "%s/class/sound/card%d", SYSFS_ROOT, card);
+ if (access(fn, R_OK) == 0)
+ assign_card_config(card, fn);
+ }
+}
+
+static int filename_filter(const struct dirent *dirent)
+{
+ size_t flen;
+
+ if (dirent == NULL)
+ return 0;
+ if (dirent->d_type == DT_DIR)
+ return 0;
+ flen = strlen(dirent->d_name);
+ if (flen <= 5)
+ return 0;
+ if (strncmp(&dirent->d_name[flen-5], ".conf", 5) == 0)
+ return 1;
+ return 0;
+}
+
+static bool match_config(const char *filename)
+{
+ struct card_cfg_data *data;
+ snd_config_t *config, *sysfs_config, *card_config, *sysfs_card_config, *node;
+ snd_config_iterator_t i, next;
+
+ config = conf_load_from_file(filename);
+ if (snd_config_search(config, "sysfs", &sysfs_config) ||
+ snd_config_get_type(sysfs_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global sysfs block in filename %s\n", filename);
+ if (snd_config_search(config, "card", &card_config) ||
+ snd_config_get_type(card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing global card block in filename %s\n", filename);
+ if (!sysfs_match(SYSFS_ROOT, sysfs_config))
+ return false;
+ snd_config_for_each(i, next, card_config) {
+ node = snd_config_iterator_entry(i);
+ if (snd_config_search(node, "sysfs", &sysfs_card_config) ||
+ snd_config_get_type(sysfs_card_config) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("Missing card sysfs block in filename %s\n", filename);
+
+ data = malloc(sizeof(*data));
+ if (!data)
+ ksft_exit_fail_msg("Out of memory\n");
+ data->filename = filename;
+ data->config = node;
+ data->card = -1;
+ if (snd_config_get_id(node, &data->config_id))
+ ksft_exit_fail_msg("snd_config_get_id failed for card\n");
+ data->next = conf_cards;
+ conf_cards = data;
+ }
+ return true;
+}
+
+void conf_load(void)
+{
+ const char *fn = "conf.d";
+ struct dirent **namelist;
+ int n, j;
+
+ n = scandir(fn, &namelist, filename_filter, alphasort);
+ if (n < 0)
+ ksft_exit_fail_msg("scandir: %s\n", strerror(errno));
+ for (j = 0; j < n; j++) {
+ size_t sl = strlen(fn) + strlen(namelist[j]->d_name) + 2;
+ char *filename = malloc(sl);
+ if (filename == NULL)
+ ksft_exit_fail_msg("Out of memory\n");
+ sprintf(filename, "%s/%s", fn, namelist[j]->d_name);
+ if (match_config(filename))
+ filename = NULL;
+ free(filename);
+ free(namelist[j]);
+ }
+ free(namelist);
+
+ assign_card_configs();
+}
+
+void conf_free(void)
+{
+ struct card_cfg_data *conf;
+
+ while (conf_cards) {
+ conf = conf_cards;
+ conf_cards = conf->next;
+ snd_config_delete(conf->config);
+ }
+}
+
+snd_config_t *conf_by_card(int card)
+{
+ struct card_cfg_data *conf;
+
+ conf = conf_data_by_card(card, true);
+ if (conf)
+ return conf->config;
+ return NULL;
+}
+
+static int conf_get_by_keys(snd_config_t *root, const char *key1,
+ const char *key2, snd_config_t **result)
+{
+ int ret;
+
+ if (key1) {
+ ret = snd_config_search(root, key1, &root);
+ if (ret != -ENOENT && ret < 0)
+ return ret;
+ }
+ if (key2)
+ ret = snd_config_search(root, key2, &root);
+ if (ret >= 0)
+ *result = root;
+ return ret;
+}
+
+snd_config_t *conf_get_subtree(snd_config_t *root, const char *key1, const char *key2)
+{
+ int ret;
+
+ if (!root)
+ return NULL;
+ ret = conf_get_by_keys(root, key1, key2, &root);
+ if (ret == -ENOENT)
+ return NULL;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ return root;
+}
+
+int conf_get_count(snd_config_t *root, const char *key1, const char *key2)
+{
+ snd_config_t *cfg;
+ snd_config_iterator_t i, next;
+ int count, ret;
+
+ if (!root)
+ return -1;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return -1;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_type(cfg) != SND_CONFIG_TYPE_COMPOUND)
+ ksft_exit_fail_msg("key '%s'.'%s' is not a compound\n", key1, key2);
+ count = 0;
+ snd_config_for_each(i, next, cfg)
+ count++;
+ return count;
+}
+
+const char *conf_get_string(snd_config_t *root, const char *key1, const char *key2, const char *def)
+{
+ snd_config_t *cfg;
+ const char *s;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_string(cfg, &s))
+ ksft_exit_fail_msg("key '%s'.'%s' is not a string\n", key1, key2);
+ return s;
+}
+
+long conf_get_long(snd_config_t *root, const char *key1, const char *key2, long def)
+{
+ snd_config_t *cfg;
+ long l;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ if (snd_config_get_integer(cfg, &l))
+ ksft_exit_fail_msg("key '%s'.'%s' is not an integer\n", key1, key2);
+ return l;
+}
+
+int conf_get_bool(snd_config_t *root, const char *key1, const char *key2, int def)
+{
+ snd_config_t *cfg;
+ int ret;
+
+ if (!root)
+ return def;
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ return def;
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ ret = snd_config_get_bool(cfg);
+ if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' is not an bool\n", key1, key2);
+ return !!ret;
+}
+
+void conf_get_string_array(snd_config_t *root, const char *key1, const char *key2,
+ const char **array, int array_size, const char *def)
+{
+ snd_config_t *cfg;
+ char buf[16];
+ int ret, index;
+
+ ret = conf_get_by_keys(root, key1, key2, &cfg);
+ if (ret == -ENOENT)
+ cfg = NULL;
+ else if (ret < 0)
+ ksft_exit_fail_msg("key '%s'.'%s' search error: %s\n", key1, key2, snd_strerror(ret));
+ for (index = 0; index < array_size; index++) {
+ if (cfg == NULL) {
+ array[index] = def;
+ } else {
+ sprintf(buf, "%i", index);
+ array[index] = conf_get_string(cfg, buf, NULL, def);
+ }
+ }
+}
diff --git a/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf
new file mode 100644
index 000000000000..5b40a916295d
--- /dev/null
+++ b/tools/testing/selftests/alsa/conf.d/Lenovo_ThinkPad_P1_Gen2.conf
@@ -0,0 +1,84 @@
+#
+# Example configuration for Lenovo ThinkPad P1 Gen2
+#
+
+#
+# Use regex match for the string read from the given sysfs path
+#
+# The sysfs root directory (/sys) is hardwired in the test code
+# (may be changed on demand).
+#
+# All strings must match.
+#
+sysfs [
+ {
+ path "class/dmi/id/product_sku"
+ regex "LENOVO_MT_20QU_BU_Think_FM_ThinkPad P1 Gen 2"
+ }
+]
+
+card.hda {
+ #
+ # Use regex match for the /sys/class/sound/card*/ tree (relative)
+ #
+ sysfs [
+ {
+ path "device/subsystem_device"
+ regex "0x229e"
+ }
+ {
+ path "device/subsystem_vendor"
+ regex "0x17aa"
+ }
+ ]
+
+ #
+ # PCM configuration
+ #
+ # pcm.0.0 - device 0 subdevice 0
+ #
+ pcm.0.0 {
+ PLAYBACK {
+ test.time1 {
+ access RW_INTERLEAVED # can be omitted - default
+ format S16_LE # can be omitted - default
+ rate 48000 # can be omitted - default
+ channels 2 # can be omitted - default
+ period_size 512
+ buffer_size 4096
+ }
+ test.time2 {
+ access RW_INTERLEAVED
+ format S16_LE
+ rate 48000
+ channels 2
+ period_size 24000
+ buffer_size 192000
+ }
+ test.time3 {
+ access RW_INTERLEAVED
+ format S16_LE
+ rate 44100
+ channels 2
+ period_size 24000
+ buffer_size 192000
+ }
+ }
+ CAPTURE {
+ # use default tests, check for the presence
+ }
+ }
+ #
+ # uncomment to force the missing device checks
+ #
+ #pcm.0.2 {
+ # PLAYBACK {
+ # # check for the presence
+ # }
+ #}
+ #pcm.0.3 {
+ # CAPTURE {
+ # # check for the presence
+ # }
+ #}
+}
diff --git a/tools/testing/selftests/alsa/mixer-test.c b/tools/testing/selftests/alsa/mixer-test.c
new file mode 100644
index 000000000000..1c04e5f638a0
--- /dev/null
+++ b/tools/testing/selftests/alsa/mixer-test.c
@@ -0,0 +1,1112 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA mixer API
+//
+// Original author: Mark Brown <broonie@kernel.org>
+// Copyright (c) 2021-2 Arm Limited
+
+// This test will iterate over all cards detected in the system, exercising
+// every mixer control it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <string.h>
+#include <getopt.h>
+#include <stdarg.h>
+#include <ctype.h>
+#include <math.h>
+#include <errno.h>
+#include <assert.h>
+#include <alsa/asoundlib.h>
+#include <poll.h>
+#include <stdint.h>
+
+#include "../kselftest.h"
+#include "alsa-local.h"
+
+#define TESTS_PER_CONTROL 7
+
+struct card_data {
+ snd_ctl_t *handle;
+ int card;
+ struct pollfd pollfd;
+ int num_ctls;
+ snd_ctl_elem_list_t *ctls;
+ struct card_data *next;
+};
+
+struct ctl_data {
+ const char *name;
+ snd_ctl_elem_id_t *id;
+ snd_ctl_elem_info_t *info;
+ snd_ctl_elem_value_t *def_val;
+ int elem;
+ int event_missing;
+ int event_spurious;
+ struct card_data *card;
+ struct ctl_data *next;
+};
+
+int num_cards = 0;
+int num_controls = 0;
+struct card_data *card_list = NULL;
+struct ctl_data *ctl_list = NULL;
+
+static void find_controls(void)
+{
+ char name[32];
+ int card, ctl, err;
+ struct card_data *card_data;
+ struct ctl_data *ctl_data;
+ snd_config_t *config;
+ char *card_name, *card_longname;
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ config = get_alsalib_config();
+
+ while (card >= 0) {
+ sprintf(name, "hw:%d", card);
+
+ card_data = malloc(sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_open_lconf(&card_data->handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ err = snd_card_get_name(card, &card_name);
+ if (err != 0)
+ card_name = "Unknown";
+ err = snd_card_get_longname(card, &card_longname);
+ if (err != 0)
+ card_longname = "Unknown";
+ ksft_print_msg("Card %d - %s (%s)\n", card,
+ card_name, card_longname);
+
+ /* Count controls */
+ snd_ctl_elem_list_malloc(&card_data->ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+ card_data->num_ctls = snd_ctl_elem_list_get_count(card_data->ctls);
+
+ /* Enumerate control information */
+ snd_ctl_elem_list_alloc_space(card_data->ctls, card_data->num_ctls);
+ snd_ctl_elem_list(card_data->handle, card_data->ctls);
+
+ card_data->card = num_cards++;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ num_controls += card_data->num_ctls;
+
+ for (ctl = 0; ctl < card_data->num_ctls; ctl++) {
+ ctl_data = malloc(sizeof(*ctl_data));
+ if (!ctl_data)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ memset(ctl_data, 0, sizeof(*ctl_data));
+ ctl_data->card = card_data;
+ ctl_data->elem = ctl;
+ ctl_data->name = snd_ctl_elem_list_get_name(card_data->ctls,
+ ctl);
+
+ err = snd_ctl_elem_id_malloc(&ctl_data->id);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_info_malloc(&ctl_data->info);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ err = snd_ctl_elem_value_malloc(&ctl_data->def_val);
+ if (err < 0)
+ ksft_exit_fail_msg("Out of memory\n");
+
+ snd_ctl_elem_list_get_id(card_data->ctls, ctl,
+ ctl_data->id);
+ snd_ctl_elem_info_set_id(ctl_data->info, ctl_data->id);
+ err = snd_ctl_elem_info(card_data->handle,
+ ctl_data->info);
+ if (err < 0) {
+ ksft_print_msg("%s getting info for %s\n",
+ snd_strerror(err),
+ ctl_data->name);
+ }
+
+ snd_ctl_elem_value_set_id(ctl_data->def_val,
+ ctl_data->id);
+
+ ctl_data->next = ctl_list;
+ ctl_list = ctl_data;
+ }
+
+ /* Set up for events */
+ err = snd_ctl_subscribe_events(card_data->handle, true);
+ if (err < 0) {
+ ksft_exit_fail_msg("snd_ctl_subscribe_events() failed for card %d: %d\n",
+ card, err);
+ }
+
+ err = snd_ctl_poll_descriptors_count(card_data->handle);
+ if (err != 1) {
+ ksft_exit_fail_msg("Unexpected descriptor count %d for card %d\n",
+ err, card);
+ }
+
+ err = snd_ctl_poll_descriptors(card_data->handle,
+ &card_data->pollfd, 1);
+ if (err != 1) {
+ ksft_exit_fail_msg("snd_ctl_poll_descriptors() failed for card %d: %d\n",
+ card, err);
+ }
+
+ next_card:
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+/*
+ * Block for up to timeout ms for an event, returns a negative value
+ * on error, 0 for no event and 1 for an event.
+ */
+static int wait_for_event(struct ctl_data *ctl, int timeout)
+{
+ unsigned short revents;
+ snd_ctl_event_t *event;
+ int err;
+ unsigned int mask = 0;
+ unsigned int ev_id;
+
+ snd_ctl_event_alloca(&event);
+
+ do {
+ err = poll(&(ctl->card->pollfd), 1, timeout);
+ if (err < 0) {
+ ksft_print_msg("poll() failed for %s: %s (%d)\n",
+ ctl->name, strerror(errno), errno);
+ return -1;
+ }
+ /* Timeout */
+ if (err == 0)
+ return 0;
+
+ err = snd_ctl_poll_descriptors_revents(ctl->card->handle,
+ &(ctl->card->pollfd),
+ 1, &revents);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_poll_descriptors_revents() failed for %s: %d\n",
+ ctl->name, err);
+ return err;
+ }
+ if (revents & POLLERR) {
+ ksft_print_msg("snd_ctl_poll_descriptors_revents() reported POLLERR for %s\n",
+ ctl->name);
+ return -1;
+ }
+ /* No read events */
+ if (!(revents & POLLIN)) {
+ ksft_print_msg("No POLLIN\n");
+ continue;
+ }
+
+ err = snd_ctl_read(ctl->card->handle, event);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_read() failed for %s: %d\n",
+ ctl->name, err);
+ return err;
+ }
+
+ if (snd_ctl_event_get_type(event) != SND_CTL_EVENT_ELEM)
+ continue;
+
+ /* The ID returned from the event is 1 less than numid */
+ mask = snd_ctl_event_elem_get_mask(event);
+ ev_id = snd_ctl_event_elem_get_numid(event);
+ if (ev_id != snd_ctl_elem_info_get_numid(ctl->info)) {
+ ksft_print_msg("Event for unexpected ctl %s\n",
+ snd_ctl_event_elem_get_name(event));
+ continue;
+ }
+
+ if ((mask & SND_CTL_EVENT_MASK_REMOVE) == SND_CTL_EVENT_MASK_REMOVE) {
+ ksft_print_msg("Removal event for %s\n",
+ ctl->name);
+ return -1;
+ }
+ } while ((mask & SND_CTL_EVENT_MASK_VALUE) != SND_CTL_EVENT_MASK_VALUE);
+
+ return 1;
+}
+
+static bool ctl_value_index_valid(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *val,
+ int index)
+{
+ long int_val;
+ long long int64_val;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_NONE:
+ ksft_print_msg("%s.%d Invalid control type NONE\n",
+ ctl->name, index);
+ return false;
+
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ int_val = snd_ctl_elem_value_get_boolean(val, index);
+ switch (int_val) {
+ case 0:
+ case 1:
+ break;
+ default:
+ ksft_print_msg("%s.%d Invalid boolean value %ld\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ int_val = snd_ctl_elem_value_get_integer(val, index);
+
+ if (int_val < snd_ctl_elem_info_get_min(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld less than minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+
+ if (int_val > snd_ctl_elem_info_get_max(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than maximum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step(ctl->info) &&
+ (int_val - snd_ctl_elem_info_get_min(ctl->info) %
+ snd_ctl_elem_info_get_step(ctl->info))) {
+ ksft_print_msg("%s.%d value %ld invalid for step %ld minimum %ld\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_step(ctl->info),
+ snd_ctl_elem_info_get_min(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ int64_val = snd_ctl_elem_value_get_integer64(val, index);
+
+ if (int64_val < snd_ctl_elem_info_get_min64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld less than minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+
+ if (int64_val > snd_ctl_elem_info_get_max64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld more than maximum %ld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_max(ctl->info));
+ return false;
+ }
+
+ /* Only check step size if there is one and we're in bounds */
+ if (snd_ctl_elem_info_get_step64(ctl->info) &&
+ (int64_val - snd_ctl_elem_info_get_min64(ctl->info)) %
+ snd_ctl_elem_info_get_step64(ctl->info)) {
+ ksft_print_msg("%s.%d value %lld invalid for step %lld minimum %lld\n",
+ ctl->name, index, int64_val,
+ snd_ctl_elem_info_get_step64(ctl->info),
+ snd_ctl_elem_info_get_min64(ctl->info));
+ return false;
+ }
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ int_val = snd_ctl_elem_value_get_enumerated(val, index);
+
+ if (int_val < 0) {
+ ksft_print_msg("%s.%d negative value %ld for enumeration\n",
+ ctl->name, index, int_val);
+ return false;
+ }
+
+ if (int_val >= snd_ctl_elem_info_get_items(ctl->info)) {
+ ksft_print_msg("%s.%d value %ld more than item count %u\n",
+ ctl->name, index, int_val,
+ snd_ctl_elem_info_get_items(ctl->info));
+ return false;
+ }
+ break;
+
+ default:
+ /* No tests for other types */
+ break;
+ }
+
+ return true;
+}
+
+/*
+ * Check that the provided value meets the constraints for the
+ * provided control.
+ */
+static bool ctl_value_valid(struct ctl_data *ctl, snd_ctl_elem_value_t *val)
+{
+ int i;
+ bool valid = true;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (!ctl_value_index_valid(ctl, val, i))
+ valid = false;
+
+ return valid;
+}
+
+/*
+ * Check that we can read the default value and it is valid. Write
+ * tests use the read value to restore the default.
+ */
+static void test_ctl_get_value(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Can't test reading on an unreadable control */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s is not readable\n", ctl->name);
+ ksft_test_result_skip("get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = snd_ctl_elem_read(ctl->card->handle, ctl->def_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ goto out;
+ }
+
+ if (!ctl_value_valid(ctl, ctl->def_val))
+ err = -EINVAL;
+
+out:
+ ksft_test_result(err >= 0, "get_value.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static bool strend(const char *haystack, const char *needle)
+{
+ size_t haystack_len = strlen(haystack);
+ size_t needle_len = strlen(needle);
+
+ if (needle_len > haystack_len)
+ return false;
+ return strcmp(haystack + haystack_len - needle_len, needle) == 0;
+}
+
+static void test_ctl_name(struct ctl_data *ctl)
+{
+ bool name_ok = true;
+
+ ksft_print_msg("%d.%d %s\n", ctl->card->card, ctl->elem,
+ ctl->name);
+
+ /* Only boolean controls should end in Switch */
+ if (strend(ctl->name, " Switch")) {
+ if (snd_ctl_elem_info_get_type(ctl->info) != SND_CTL_ELEM_TYPE_BOOLEAN) {
+ ksft_print_msg("%d.%d %s ends in Switch but is not boolean\n",
+ ctl->card->card, ctl->elem, ctl->name);
+ name_ok = false;
+ }
+ }
+
+ /* Writeable boolean controls should end in Switch */
+ if (snd_ctl_elem_info_get_type(ctl->info) == SND_CTL_ELEM_TYPE_BOOLEAN &&
+ snd_ctl_elem_info_is_writable(ctl->info)) {
+ if (!strend(ctl->name, " Switch")) {
+ ksft_print_msg("%d.%d %s is a writeable boolean but not a Switch\n",
+ ctl->card->card, ctl->elem, ctl->name);
+ name_ok = false;
+ }
+ }
+
+ ksft_test_result(name_ok, "name.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static void show_values(struct ctl_data *ctl, snd_ctl_elem_value_t *orig_val,
+ snd_ctl_elem_value_t *read_val)
+{
+ long long orig_int, read_int;
+ int i;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ orig_int = snd_ctl_elem_value_get_boolean(orig_val, i);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ orig_int = snd_ctl_elem_value_get_integer(orig_val, i);
+ read_int = snd_ctl_elem_value_get_integer(read_val, i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ orig_int = snd_ctl_elem_value_get_integer64(orig_val,
+ i);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ i);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ orig_int = snd_ctl_elem_value_get_enumerated(orig_val,
+ i);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ i);
+ break;
+
+ default:
+ return;
+ }
+
+ ksft_print_msg("%s.%d orig %lld read %lld, is_volatile %d\n",
+ ctl->name, i, orig_int, read_int,
+ snd_ctl_elem_info_is_volatile(ctl->info));
+ }
+}
+
+static bool show_mismatch(struct ctl_data *ctl, int index,
+ snd_ctl_elem_value_t *read_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ long long expected_int, read_int;
+
+ /*
+ * We factor out the code to compare values representable as
+ * integers, ensure that check doesn't log otherwise.
+ */
+ expected_int = 0;
+ read_int = 0;
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ expected_int = snd_ctl_elem_value_get_boolean(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_boolean(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ expected_int = snd_ctl_elem_value_get_integer(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer(read_val, index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ expected_int = snd_ctl_elem_value_get_integer64(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_integer64(read_val,
+ index);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ expected_int = snd_ctl_elem_value_get_enumerated(expected_val,
+ index);
+ read_int = snd_ctl_elem_value_get_enumerated(read_val,
+ index);
+ break;
+
+ default:
+ break;
+ }
+
+ if (expected_int != read_int) {
+ /*
+ * NOTE: The volatile attribute means that the hardware
+ * can voluntarily change the state of control element
+ * independent of any operation by software.
+ */
+ bool is_volatile = snd_ctl_elem_info_is_volatile(ctl->info);
+ ksft_print_msg("%s.%d expected %lld but read %lld, is_volatile %d\n",
+ ctl->name, index, expected_int, read_int, is_volatile);
+ return !is_volatile;
+ } else {
+ return false;
+ }
+}
+
+/*
+ * Write a value then if possible verify that we get the expected
+ * result. An optional expected value can be provided if we expect
+ * the write to fail, for verifying that invalid writes don't corrupt
+ * anything.
+ */
+static int write_and_verify(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *write_val,
+ snd_ctl_elem_value_t *expected_val)
+{
+ int err, i;
+ bool error_expected, mismatch_shown;
+ snd_ctl_elem_value_t *initial_val, *read_val, *w_val;
+ snd_ctl_elem_value_alloca(&initial_val);
+ snd_ctl_elem_value_alloca(&read_val);
+ snd_ctl_elem_value_alloca(&w_val);
+
+ /*
+ * We need to copy the write value since writing can modify
+ * the value which causes surprises, and allocate an expected
+ * value if we expect to read back what we wrote.
+ */
+ snd_ctl_elem_value_copy(w_val, write_val);
+ if (expected_val) {
+ error_expected = true;
+ } else {
+ error_expected = false;
+ snd_ctl_elem_value_alloca(&expected_val);
+ snd_ctl_elem_value_copy(expected_val, write_val);
+ }
+
+ /* Store the value before we write */
+ if (snd_ctl_elem_info_is_readable(ctl->info)) {
+ snd_ctl_elem_value_set_id(initial_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, initial_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+ }
+
+ /*
+ * Do the write, if we have an expected value ignore the error
+ * and carry on to validate the expected value.
+ */
+ err = snd_ctl_elem_write(ctl->card->handle, w_val);
+ if (err < 0 && !error_expected) {
+ ksft_print_msg("snd_ctl_elem_write() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /* Can we do the verification part? */
+ if (!snd_ctl_elem_info_is_readable(ctl->info))
+ return err;
+
+ snd_ctl_elem_value_set_id(read_val, ctl->id);
+
+ err = snd_ctl_elem_read(ctl->card->handle, read_val);
+ if (err < 0) {
+ ksft_print_msg("snd_ctl_elem_read() failed: %s\n",
+ snd_strerror(err));
+ return err;
+ }
+
+ /*
+ * Check for an event if the value changed, or confirm that
+ * there was none if it didn't. We rely on the kernel
+ * generating the notification before it returns from the
+ * write, this is currently true, should that ever change this
+ * will most likely break and need updating.
+ */
+ if (!snd_ctl_elem_info_is_volatile(ctl->info)) {
+ err = wait_for_event(ctl, 0);
+ if (snd_ctl_elem_value_compare(initial_val, read_val)) {
+ if (err < 1) {
+ ksft_print_msg("No event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_missing++;
+ }
+ } else {
+ if (err != 0) {
+ ksft_print_msg("Spurious event generated for %s\n",
+ ctl->name);
+ show_values(ctl, initial_val, read_val);
+ ctl->event_spurious++;
+ }
+ }
+ }
+
+ /*
+ * Use the libray to compare values, if there's a mismatch
+ * carry on and try to provide a more useful diagnostic than
+ * just "mismatch".
+ */
+ if (!snd_ctl_elem_value_compare(expected_val, read_val))
+ return 0;
+
+ mismatch_shown = false;
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++)
+ if (show_mismatch(ctl, i, read_val, expected_val))
+ mismatch_shown = true;
+
+ if (!mismatch_shown)
+ ksft_print_msg("%s read and written values differ\n",
+ ctl->name);
+
+ return -1;
+}
+
+/*
+ * Make sure we can write the default value back to the control, this
+ * should validate that at least some write works.
+ */
+static void test_ctl_write_default(struct ctl_data *ctl)
+{
+ int err;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* No idea what the default was for unreadable controls */
+ if (!snd_ctl_elem_info_is_readable(ctl->info)) {
+ ksft_print_msg("%s couldn't read default\n", ctl->name);
+ ksft_test_result_skip("write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ err = write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(err >= 0, "write_default.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static bool test_ctl_write_valid_boolean(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < 2; j++) {
+ snd_ctl_elem_value_set_boolean(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_integer(struct ctl_data *ctl)
+{
+ int err;
+ int i;
+ long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min(ctl->info);
+ j <= snd_ctl_elem_info_get_max(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_integer64(struct ctl_data *ctl)
+{
+ int err, i;
+ long long j, step;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ step = snd_ctl_elem_info_get_step64(ctl->info);
+ if (!step)
+ step = 1;
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = snd_ctl_elem_info_get_min64(ctl->info);
+ j <= snd_ctl_elem_info_get_max64(ctl->info); j += step) {
+
+ snd_ctl_elem_value_set_integer64(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_valid_enumerated(struct ctl_data *ctl)
+{
+ int err, i, j;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ for (j = 0; j < snd_ctl_elem_info_get_items(ctl->info); j++) {
+ snd_ctl_elem_value_set_enumerated(val, i, j);
+ err = write_and_verify(ctl, val, NULL);
+ if (err != 0)
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static void test_ctl_write_valid(struct ctl_data *ctl)
+{
+ bool pass;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_valid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_valid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_valid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_valid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(pass, "write_valid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static bool test_ctl_write_invalid_value(struct ctl_data *ctl,
+ snd_ctl_elem_value_t *val)
+{
+ int err;
+
+ /* Ideally this will fail... */
+ err = snd_ctl_elem_write(ctl->card->handle, val);
+ if (err < 0)
+ return false;
+
+ /* ...but some devices will clamp to an in range value */
+ err = snd_ctl_elem_read(ctl->card->handle, val);
+ if (err < 0) {
+ ksft_print_msg("%s failed to read: %s\n",
+ ctl->name, snd_strerror(err));
+ return true;
+ }
+
+ return !ctl_value_valid(ctl, val);
+}
+
+static bool test_ctl_write_invalid_boolean(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_boolean(val, i, 2);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_integer(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ if (snd_ctl_elem_info_get_min(ctl->info) != LONG_MIN) {
+ /* Just under range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i,
+ snd_ctl_elem_info_get_min(ctl->info) - 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Minimum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i, LONG_MIN);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ if (snd_ctl_elem_info_get_max(ctl->info) != LONG_MAX) {
+ /* Just over range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i,
+ snd_ctl_elem_info_get_max(ctl->info) + 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer(val, i, LONG_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_integer64(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ if (snd_ctl_elem_info_get_min64(ctl->info) != LLONG_MIN) {
+ /* Just under range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i,
+ snd_ctl_elem_info_get_min64(ctl->info) - 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Minimum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i, LLONG_MIN);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+
+ if (snd_ctl_elem_info_get_max64(ctl->info) != LLONG_MAX) {
+ /* Just over range */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i,
+ snd_ctl_elem_info_get_max64(ctl->info) + 1);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_integer64(val, i, LLONG_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+ }
+ }
+
+ return !fail;
+}
+
+static bool test_ctl_write_invalid_enumerated(struct ctl_data *ctl)
+{
+ int i;
+ bool fail = false;
+ snd_ctl_elem_value_t *val;
+ snd_ctl_elem_value_alloca(&val);
+
+ snd_ctl_elem_value_set_id(val, ctl->id);
+
+ for (i = 0; i < snd_ctl_elem_info_get_count(ctl->info); i++) {
+ /* One beyond maximum */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_enumerated(val, i,
+ snd_ctl_elem_info_get_items(ctl->info));
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ /* Maximum representable value */
+ snd_ctl_elem_value_copy(val, ctl->def_val);
+ snd_ctl_elem_value_set_enumerated(val, i, UINT_MAX);
+
+ if (test_ctl_write_invalid_value(ctl, val))
+ fail = true;
+
+ }
+
+ return !fail;
+}
+
+
+static void test_ctl_write_invalid(struct ctl_data *ctl)
+{
+ bool pass;
+
+ /* If the control is turned off let's be polite */
+ if (snd_ctl_elem_info_is_inactive(ctl->info)) {
+ ksft_print_msg("%s is inactive\n", ctl->name);
+ ksft_test_result_skip("write_invalid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ if (!snd_ctl_elem_info_is_writable(ctl->info)) {
+ ksft_print_msg("%s is not writeable\n", ctl->name);
+ ksft_test_result_skip("write_invalid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ switch (snd_ctl_elem_info_get_type(ctl->info)) {
+ case SND_CTL_ELEM_TYPE_BOOLEAN:
+ pass = test_ctl_write_invalid_boolean(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER:
+ pass = test_ctl_write_invalid_integer(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_INTEGER64:
+ pass = test_ctl_write_invalid_integer64(ctl);
+ break;
+
+ case SND_CTL_ELEM_TYPE_ENUMERATED:
+ pass = test_ctl_write_invalid_enumerated(ctl);
+ break;
+
+ default:
+ /* No tests for this yet */
+ ksft_test_result_skip("write_invalid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+ return;
+ }
+
+ /* Restore the default value to minimise disruption */
+ write_and_verify(ctl, ctl->def_val, NULL);
+
+ ksft_test_result(pass, "write_invalid.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static void test_ctl_event_missing(struct ctl_data *ctl)
+{
+ ksft_test_result(!ctl->event_missing, "event_missing.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+static void test_ctl_event_spurious(struct ctl_data *ctl)
+{
+ ksft_test_result(!ctl->event_spurious, "event_spurious.%d.%d\n",
+ ctl->card->card, ctl->elem);
+}
+
+int main(void)
+{
+ struct ctl_data *ctl;
+
+ ksft_print_header();
+
+ find_controls();
+
+ ksft_set_plan(num_controls * TESTS_PER_CONTROL);
+
+ for (ctl = ctl_list; ctl != NULL; ctl = ctl->next) {
+ /*
+ * Must test get_value() before we write anything, the
+ * test stores the default value for later cleanup.
+ */
+ test_ctl_get_value(ctl);
+ test_ctl_name(ctl);
+ test_ctl_write_default(ctl);
+ test_ctl_write_valid(ctl);
+ test_ctl_write_invalid(ctl);
+ test_ctl_event_missing(ctl);
+ test_ctl_event_spurious(ctl);
+ }
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/alsa/pcm-test.c b/tools/testing/selftests/alsa/pcm-test.c
new file mode 100644
index 000000000000..de664dedb541
--- /dev/null
+++ b/tools/testing/selftests/alsa/pcm-test.c
@@ -0,0 +1,641 @@
+// SPDX-License-Identifier: GPL-2.0
+//
+// kselftest for the ALSA PCM API
+//
+// Original author: Jaroslav Kysela <perex@perex.cz>
+// Copyright (c) 2022 Red Hat Inc.
+
+// This test will iterate over all cards detected in the system, exercising
+// every PCM device it can find. This may conflict with other system
+// software if there is audio activity so is best run on a system with a
+// minimal active userspace.
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <assert.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "alsa-local.h"
+
+typedef struct timespec timestamp_t;
+
+struct card_data {
+ int card;
+ pthread_t thread;
+ struct card_data *next;
+};
+
+struct card_data *card_list = NULL;
+
+struct pcm_data {
+ snd_pcm_t *handle;
+ int card;
+ int device;
+ int subdevice;
+ snd_pcm_stream_t stream;
+ snd_config_t *pcm_config;
+ struct pcm_data *next;
+};
+
+struct pcm_data *pcm_list = NULL;
+
+int num_missing = 0;
+struct pcm_data *pcm_missing = NULL;
+
+snd_config_t *default_pcm_config;
+
+/* Lock while reporting results since kselftest doesn't */
+pthread_mutex_t results_lock = PTHREAD_MUTEX_INITIALIZER;
+
+enum test_class {
+ TEST_CLASS_DEFAULT,
+ TEST_CLASS_SYSTEM,
+};
+
+void timestamp_now(timestamp_t *tstamp)
+{
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, tstamp))
+ ksft_exit_fail_msg("clock_get_time\n");
+}
+
+long long timestamp_diff_ms(timestamp_t *tstamp)
+{
+ timestamp_t now, diff;
+ timestamp_now(&now);
+ if (tstamp->tv_nsec > now.tv_nsec) {
+ diff.tv_sec = now.tv_sec - tstamp->tv_sec - 1;
+ diff.tv_nsec = (now.tv_nsec + 1000000000L) - tstamp->tv_nsec;
+ } else {
+ diff.tv_sec = now.tv_sec - tstamp->tv_sec;
+ diff.tv_nsec = now.tv_nsec - tstamp->tv_nsec;
+ }
+ return (diff.tv_sec * 1000) + ((diff.tv_nsec + 500000L) / 1000000L);
+}
+
+static long device_from_id(snd_config_t *node)
+{
+ const char *id;
+ char *end;
+ long v;
+
+ if (snd_config_get_id(node, &id))
+ ksft_exit_fail_msg("snd_config_get_id\n");
+ errno = 0;
+ v = strtol(id, &end, 10);
+ if (errno || *end)
+ return -1;
+ return v;
+}
+
+static void missing_device(int card, int device, int subdevice, snd_pcm_stream_t stream)
+{
+ struct pcm_data *pcm_data;
+
+ for (pcm_data = pcm_list; pcm_data != NULL; pcm_data = pcm_data->next) {
+ if (pcm_data->card != card)
+ continue;
+ if (pcm_data->device != device)
+ continue;
+ if (pcm_data->subdevice != subdevice)
+ continue;
+ if (pcm_data->stream != stream)
+ continue;
+ return;
+ }
+ pcm_data = calloc(1, sizeof(*pcm_data));
+ if (!pcm_data)
+ ksft_exit_fail_msg("Out of memory\n");
+ pcm_data->card = card;
+ pcm_data->device = device;
+ pcm_data->subdevice = subdevice;
+ pcm_data->stream = stream;
+ pcm_data->next = pcm_missing;
+ pcm_missing = pcm_data;
+ num_missing++;
+}
+
+static void missing_devices(int card, snd_config_t *card_config)
+{
+ snd_config_t *pcm_config, *node1, *node2;
+ snd_config_iterator_t i1, i2, next1, next2;
+ int device, subdevice;
+
+ pcm_config = conf_get_subtree(card_config, "pcm", NULL);
+ if (!pcm_config)
+ return;
+ snd_config_for_each(i1, next1, pcm_config) {
+ node1 = snd_config_iterator_entry(i1);
+ device = device_from_id(node1);
+ if (device < 0)
+ continue;
+ if (snd_config_get_type(node1) != SND_CONFIG_TYPE_COMPOUND)
+ continue;
+ snd_config_for_each(i2, next2, node1) {
+ node2 = snd_config_iterator_entry(i2);
+ subdevice = device_from_id(node2);
+ if (subdevice < 0)
+ continue;
+ if (conf_get_subtree(node2, "PLAYBACK", NULL))
+ missing_device(card, device, subdevice, SND_PCM_STREAM_PLAYBACK);
+ if (conf_get_subtree(node2, "CAPTURE", NULL))
+ missing_device(card, device, subdevice, SND_PCM_STREAM_CAPTURE);
+ }
+ }
+}
+
+static void find_pcms(void)
+{
+ char name[32], key[64];
+ char *card_name, *card_longname;
+ int card, dev, subdev, count, direction, err;
+ snd_pcm_stream_t stream;
+ struct pcm_data *pcm_data;
+ snd_ctl_t *handle;
+ snd_pcm_info_t *pcm_info;
+ snd_config_t *config, *card_config, *pcm_config;
+ struct card_data *card_data;
+
+ snd_pcm_info_alloca(&pcm_info);
+
+ card = -1;
+ if (snd_card_next(&card) < 0 || card < 0)
+ return;
+
+ config = get_alsalib_config();
+
+ while (card >= 0) {
+ sprintf(name, "hw:%d", card);
+
+ err = snd_ctl_open_lconf(&handle, name, 0, config);
+ if (err < 0) {
+ ksft_print_msg("Failed to get hctl for card %d: %s\n",
+ card, snd_strerror(err));
+ goto next_card;
+ }
+
+ err = snd_card_get_name(card, &card_name);
+ if (err != 0)
+ card_name = "Unknown";
+ err = snd_card_get_longname(card, &card_longname);
+ if (err != 0)
+ card_longname = "Unknown";
+ ksft_print_msg("Card %d - %s (%s)\n", card,
+ card_name, card_longname);
+
+ card_config = conf_by_card(card);
+
+ card_data = calloc(1, sizeof(*card_data));
+ if (!card_data)
+ ksft_exit_fail_msg("Out of memory\n");
+ card_data->card = card;
+ card_data->next = card_list;
+ card_list = card_data;
+
+ dev = -1;
+ while (1) {
+ if (snd_ctl_pcm_next_device(handle, &dev) < 0)
+ ksft_exit_fail_msg("snd_ctl_pcm_next_device\n");
+ if (dev < 0)
+ break;
+
+ for (direction = 0; direction < 2; direction++) {
+ stream = direction ? SND_PCM_STREAM_CAPTURE : SND_PCM_STREAM_PLAYBACK;
+ sprintf(key, "pcm.%d.%s", dev, snd_pcm_stream_name(stream));
+ pcm_config = conf_get_subtree(card_config, key, NULL);
+ if (conf_get_bool(card_config, key, "skip", false)) {
+ ksft_print_msg("skipping pcm %d.%d.%s\n", card, dev, snd_pcm_stream_name(stream));
+ continue;
+ }
+ snd_pcm_info_set_device(pcm_info, dev);
+ snd_pcm_info_set_subdevice(pcm_info, 0);
+ snd_pcm_info_set_stream(pcm_info, stream);
+ err = snd_ctl_pcm_info(handle, pcm_info);
+ if (err == -ENOENT)
+ continue;
+ if (err < 0)
+ ksft_exit_fail_msg("snd_ctl_pcm_info: %d:%d:%d\n",
+ dev, 0, stream);
+ count = snd_pcm_info_get_subdevices_count(pcm_info);
+ for (subdev = 0; subdev < count; subdev++) {
+ sprintf(key, "pcm.%d.%d.%s", dev, subdev, snd_pcm_stream_name(stream));
+ if (conf_get_bool(card_config, key, "skip", false)) {
+ ksft_print_msg("skipping pcm %d.%d.%d.%s\n", card, dev,
+ subdev, snd_pcm_stream_name(stream));
+ continue;
+ }
+ pcm_data = calloc(1, sizeof(*pcm_data));
+ if (!pcm_data)
+ ksft_exit_fail_msg("Out of memory\n");
+ pcm_data->card = card;
+ pcm_data->device = dev;
+ pcm_data->subdevice = subdev;
+ pcm_data->stream = stream;
+ pcm_data->pcm_config = conf_get_subtree(card_config, key, NULL);
+ pcm_data->next = pcm_list;
+ pcm_list = pcm_data;
+ }
+ }
+ }
+
+ /* check for missing devices */
+ missing_devices(card, card_config);
+
+ next_card:
+ snd_ctl_close(handle);
+ if (snd_card_next(&card) < 0) {
+ ksft_print_msg("snd_card_next");
+ break;
+ }
+ }
+
+ snd_config_delete(config);
+}
+
+static void test_pcm_time(struct pcm_data *data, enum test_class class,
+ const char *test_name, snd_config_t *pcm_cfg)
+{
+ char name[64], msg[256];
+ const int duration_s = 2, margin_ms = 100;
+ const int duration_ms = duration_s * 1000;
+ const char *cs;
+ int i, err;
+ snd_pcm_t *handle = NULL;
+ snd_pcm_access_t access = SND_PCM_ACCESS_RW_INTERLEAVED;
+ snd_pcm_format_t format, old_format;
+ const char *alt_formats[8];
+ unsigned char *samples = NULL;
+ snd_pcm_sframes_t frames;
+ long long ms;
+ long rate, channels, period_size, buffer_size;
+ unsigned int rrate;
+ snd_pcm_uframes_t rperiod_size, rbuffer_size, start_threshold;
+ timestamp_t tstamp;
+ bool pass = false;
+ snd_pcm_hw_params_t *hw_params;
+ snd_pcm_sw_params_t *sw_params;
+ const char *test_class_name;
+ bool skip = true;
+ const char *desc;
+
+ switch (class) {
+ case TEST_CLASS_DEFAULT:
+ test_class_name = "default";
+ break;
+ case TEST_CLASS_SYSTEM:
+ test_class_name = "system";
+ break;
+ default:
+ ksft_exit_fail_msg("Unknown test class %d\n", class);
+ break;
+ }
+
+ desc = conf_get_string(pcm_cfg, "description", NULL, NULL);
+ if (desc)
+ ksft_print_msg("%s.%s.%d.%d.%d.%s - %s\n",
+ test_class_name, test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ desc);
+
+
+ snd_pcm_hw_params_alloca(&hw_params);
+ snd_pcm_sw_params_alloca(&sw_params);
+
+ cs = conf_get_string(pcm_cfg, "format", NULL, "S16_LE");
+ format = snd_pcm_format_value(cs);
+ if (format == SND_PCM_FORMAT_UNKNOWN)
+ ksft_exit_fail_msg("Wrong format '%s'\n", cs);
+ conf_get_string_array(pcm_cfg, "alt_formats", NULL,
+ alt_formats, ARRAY_SIZE(alt_formats), NULL);
+ rate = conf_get_long(pcm_cfg, "rate", NULL, 48000);
+ channels = conf_get_long(pcm_cfg, "channels", NULL, 2);
+ period_size = conf_get_long(pcm_cfg, "period_size", NULL, 4096);
+ buffer_size = conf_get_long(pcm_cfg, "buffer_size", NULL, 16384);
+
+ samples = malloc((rate * channels * snd_pcm_format_physical_width(format)) / 8);
+ if (!samples)
+ ksft_exit_fail_msg("Out of memory\n");
+ snd_pcm_format_set_silence(format, samples, rate * channels);
+
+ sprintf(name, "hw:%d,%d,%d", data->card, data->device, data->subdevice);
+ err = snd_pcm_open(&handle, name, data->stream, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "Failed to get pcm handle: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ err = snd_pcm_hw_params_any(handle, hw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_any: %s", snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_rate_resample(handle, hw_params, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate_resample: %s", snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_access(handle, hw_params, access);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_access %s: %s",
+ snd_pcm_access_name(access), snd_strerror(err));
+ goto __close;
+ }
+ i = -1;
+__format:
+ err = snd_pcm_hw_params_set_format(handle, hw_params, format);
+ if (err < 0) {
+ i++;
+ if (i < ARRAY_SIZE(alt_formats) && alt_formats[i]) {
+ old_format = format;
+ format = snd_pcm_format_value(alt_formats[i]);
+ if (format != SND_PCM_FORMAT_UNKNOWN) {
+ ksft_print_msg("%s.%d.%d.%d.%s.%s format %s -> %s\n",
+ test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ snd_pcm_access_name(access),
+ snd_pcm_format_name(old_format),
+ snd_pcm_format_name(format));
+ samples = realloc(samples, (rate * channels *
+ snd_pcm_format_physical_width(format)) / 8);
+ if (!samples)
+ ksft_exit_fail_msg("Out of memory\n");
+ snd_pcm_format_set_silence(format, samples, rate * channels);
+ goto __format;
+ }
+ }
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_format %s: %s",
+ snd_pcm_format_name(format), snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params_set_channels(handle, hw_params, channels);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_channels %ld: %s", channels, snd_strerror(err));
+ goto __close;
+ }
+ rrate = rate;
+ err = snd_pcm_hw_params_set_rate_near(handle, hw_params, &rrate, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_rate %ld: %s", rate, snd_strerror(err));
+ goto __close;
+ }
+ if (rrate != rate) {
+ snprintf(msg, sizeof(msg), "rate mismatch %ld != %d", rate, rrate);
+ goto __close;
+ }
+ rperiod_size = period_size;
+ err = snd_pcm_hw_params_set_period_size_near(handle, hw_params, &rperiod_size, 0);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_period_size %ld: %s", period_size, snd_strerror(err));
+ goto __close;
+ }
+ rbuffer_size = buffer_size;
+ err = snd_pcm_hw_params_set_buffer_size_near(handle, hw_params, &rbuffer_size);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params_set_buffer_size %ld: %s", buffer_size, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_hw_params(handle, hw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_hw_params: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ err = snd_pcm_sw_params_current(handle, sw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_current: %s", snd_strerror(err));
+ goto __close;
+ }
+ if (data->stream == SND_PCM_STREAM_PLAYBACK) {
+ start_threshold = (rbuffer_size / rperiod_size) * rperiod_size;
+ } else {
+ start_threshold = rperiod_size;
+ }
+ err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, start_threshold);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_start_threshold %ld: %s", (long)start_threshold, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_sw_params_set_avail_min(handle, sw_params, rperiod_size);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params_set_avail_min %ld: %s", (long)rperiod_size, snd_strerror(err));
+ goto __close;
+ }
+ err = snd_pcm_sw_params(handle, sw_params);
+ if (err < 0) {
+ snprintf(msg, sizeof(msg), "snd_pcm_sw_params: %s", snd_strerror(err));
+ goto __close;
+ }
+
+ ksft_print_msg("%s.%s.%d.%d.%d.%s hw_params.%s.%s.%ld.%ld.%ld.%ld sw_params.%ld\n",
+ test_class_name, test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream),
+ snd_pcm_access_name(access),
+ snd_pcm_format_name(format),
+ (long)rate, (long)channels,
+ (long)rperiod_size, (long)rbuffer_size,
+ (long)start_threshold);
+
+ /* Set all the params, actually run the test */
+ skip = false;
+
+ timestamp_now(&tstamp);
+ for (i = 0; i < duration_s; i++) {
+ if (data->stream == SND_PCM_STREAM_PLAYBACK) {
+ frames = snd_pcm_writei(handle, samples, rate);
+ if (frames < 0) {
+ snprintf(msg, sizeof(msg),
+ "Write failed: expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ if (frames < rate) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ } else {
+ frames = snd_pcm_readi(handle, samples, rate);
+ if (frames < 0) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ if (frames < rate) {
+ snprintf(msg, sizeof(msg),
+ "expected %ld, wrote %li", rate, frames);
+ goto __close;
+ }
+ }
+ }
+
+ snd_pcm_drain(handle);
+ ms = timestamp_diff_ms(&tstamp);
+ if (ms < duration_ms - margin_ms || ms > duration_ms + margin_ms) {
+ snprintf(msg, sizeof(msg), "time mismatch: expected %dms got %lld", duration_ms, ms);
+ goto __close;
+ }
+
+ msg[0] = '\0';
+ pass = true;
+__close:
+ pthread_mutex_lock(&results_lock);
+
+ switch (class) {
+ case TEST_CLASS_SYSTEM:
+ test_class_name = "system";
+ /*
+ * Anything specified as specific to this system
+ * should always be supported.
+ */
+ ksft_test_result(!skip, "%s.%s.%d.%d.%d.%s.params\n",
+ test_class_name, test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream));
+ break;
+ default:
+ break;
+ }
+
+ if (!skip)
+ ksft_test_result(pass, "%s.%s.%d.%d.%d.%s\n",
+ test_class_name, test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream));
+ else
+ ksft_test_result_skip("%s.%s.%d.%d.%d.%s\n",
+ test_class_name, test_name,
+ data->card, data->device, data->subdevice,
+ snd_pcm_stream_name(data->stream));
+
+ if (msg[0])
+ ksft_print_msg("%s\n", msg);
+
+ pthread_mutex_unlock(&results_lock);
+
+ free(samples);
+ if (handle)
+ snd_pcm_close(handle);
+}
+
+void run_time_tests(struct pcm_data *pcm, enum test_class class,
+ snd_config_t *cfg)
+{
+ const char *test_name, *test_type;
+ snd_config_t *pcm_cfg;
+ snd_config_iterator_t i, next;
+
+ if (!cfg)
+ return;
+
+ cfg = conf_get_subtree(cfg, "test", NULL);
+ if (cfg == NULL)
+ return;
+
+ snd_config_for_each(i, next, cfg) {
+ pcm_cfg = snd_config_iterator_entry(i);
+ if (snd_config_get_id(pcm_cfg, &test_name) < 0)
+ ksft_exit_fail_msg("snd_config_get_id\n");
+ test_type = conf_get_string(pcm_cfg, "type", NULL, "time");
+ if (strcmp(test_type, "time") == 0)
+ test_pcm_time(pcm, class, test_name, pcm_cfg);
+ else
+ ksft_exit_fail_msg("unknown test type '%s'\n", test_type);
+ }
+}
+
+void *card_thread(void *data)
+{
+ struct card_data *card = data;
+ struct pcm_data *pcm;
+
+ for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
+ if (pcm->card != card->card)
+ continue;
+
+ run_time_tests(pcm, TEST_CLASS_DEFAULT, default_pcm_config);
+ run_time_tests(pcm, TEST_CLASS_SYSTEM, pcm->pcm_config);
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ struct card_data *card;
+ struct card_cfg_data *conf;
+ struct pcm_data *pcm;
+ snd_config_t *global_config, *cfg;
+ int num_pcm_tests = 0, num_tests, num_std_pcm_tests;
+ int ret;
+ void *thread_ret;
+
+ ksft_print_header();
+
+ global_config = conf_load_from_file("pcm-test.conf");
+ default_pcm_config = conf_get_subtree(global_config, "pcm", NULL);
+ if (default_pcm_config == NULL)
+ ksft_exit_fail_msg("default pcm test configuration (pcm compound) is missing\n");
+
+ conf_load();
+
+ find_pcms();
+
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ num_missing++;
+
+ num_std_pcm_tests = conf_get_count(default_pcm_config, "test", NULL);
+
+ for (pcm = pcm_list; pcm != NULL; pcm = pcm->next) {
+ num_pcm_tests += num_std_pcm_tests;
+ cfg = pcm->pcm_config;
+ if (cfg == NULL)
+ continue;
+ /* Setting params is reported as a separate test */
+ num_tests = conf_get_count(cfg, "test", NULL) * 2;
+ if (num_tests > 0)
+ num_pcm_tests += num_tests;
+ }
+
+ ksft_set_plan(num_missing + num_pcm_tests);
+
+ for (conf = conf_cards; conf; conf = conf->next)
+ if (conf->card < 0)
+ ksft_test_result_fail("test.missing.%s.%s\n",
+ conf->filename, conf->config_id);
+
+ for (pcm = pcm_missing; pcm != NULL; pcm = pcm->next) {
+ ksft_test_result(false, "test.missing.%d.%d.%d.%s\n",
+ pcm->card, pcm->device, pcm->subdevice,
+ snd_pcm_stream_name(pcm->stream));
+ }
+
+ for (card = card_list; card != NULL; card = card->next) {
+ ret = pthread_create(&card->thread, NULL, card_thread, card);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to create card %d thread: %d (%s)\n",
+ card->card, ret,
+ strerror(errno));
+ }
+ }
+
+ for (card = card_list; card != NULL; card = card->next) {
+ ret = pthread_join(card->thread, &thread_ret);
+ if (ret != 0) {
+ ksft_exit_fail_msg("Failed to join card %d thread: %d (%s)\n",
+ card->card, ret,
+ strerror(errno));
+ }
+ }
+
+ snd_config_delete(global_config);
+ conf_free();
+
+ ksft_exit_pass();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/alsa/pcm-test.conf b/tools/testing/selftests/alsa/pcm-test.conf
new file mode 100644
index 000000000000..71bd3f78a6f2
--- /dev/null
+++ b/tools/testing/selftests/alsa/pcm-test.conf
@@ -0,0 +1,63 @@
+pcm.test.time1 {
+ description "8kHz mono large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 8000
+ channels 1
+ period_size 8000
+ buffer_size 32000
+}
+pcm.test.time2 {
+ description "8kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 8000
+ channels 2
+ period_size 8000
+ buffer_size 32000
+}
+pcm.test.time3 {
+ description "44.1kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 44100
+ channels 2
+ period_size 22500
+ buffer_size 192000
+}
+pcm.test.time4 {
+ description "48kHz stereo small periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 512
+ buffer_size 4096
+}
+pcm.test.time5 {
+ description "48kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 24000
+ buffer_size 192000
+}
+pcm.test.time6 {
+ description "48kHz 6 channel large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 48000
+ channels 2
+ period_size 48000
+ buffer_size 576000
+}
+pcm.test.time7 {
+ description "96kHz stereo large periods"
+ format S16_LE
+ alt_formats [ S32_LE ]
+ rate 96000
+ channels 2
+ period_size 48000
+ buffer_size 192000
+}
diff --git a/tools/testing/selftests/alsa/test-pcmtest-driver.c b/tools/testing/selftests/alsa/test-pcmtest-driver.c
new file mode 100644
index 000000000000..ca81afa4ee90
--- /dev/null
+++ b/tools/testing/selftests/alsa/test-pcmtest-driver.c
@@ -0,0 +1,330 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This is the test which covers PCM middle layer data transferring using
+ * the virtual pcm test driver (snd-pcmtest).
+ *
+ * Copyright 2023 Ivan Orlov <ivan.orlov0322@gmail.com>
+ */
+#include <string.h>
+#include <alsa/asoundlib.h>
+#include "../kselftest_harness.h"
+
+#define CH_NUM 4
+
+struct pattern_buf {
+ char buf[1024];
+ int len;
+};
+
+struct pattern_buf patterns[CH_NUM];
+
+struct pcmtest_test_params {
+ unsigned long buffer_size;
+ unsigned long period_size;
+ unsigned long channels;
+ unsigned int rate;
+ snd_pcm_access_t access;
+ size_t sec_buf_len;
+ size_t sample_size;
+ int time;
+ snd_pcm_format_t format;
+};
+
+static int read_patterns(void)
+{
+ FILE *fp, *fpl;
+ int i;
+ char pf[64];
+ char plf[64];
+
+ for (i = 0; i < CH_NUM; i++) {
+ sprintf(plf, "/sys/kernel/debug/pcmtest/fill_pattern%d_len", i);
+ fpl = fopen(plf, "r");
+ if (!fpl)
+ return -1;
+ fscanf(fpl, "%u", &patterns[i].len);
+ fclose(fpl);
+
+ sprintf(pf, "/sys/kernel/debug/pcmtest/fill_pattern%d", i);
+ fp = fopen(pf, "r");
+ if (!fp)
+ return -1;
+ fread(patterns[i].buf, 1, patterns[i].len, fp);
+ fclose(fp);
+ }
+
+ return 0;
+}
+
+static int get_test_results(char *debug_name)
+{
+ int result;
+ FILE *f;
+ char fname[128];
+
+ sprintf(fname, "/sys/kernel/debug/pcmtest/%s", debug_name);
+
+ f = fopen(fname, "r");
+ if (!f) {
+ printf("Failed to open file\n");
+ return -1;
+ }
+ fscanf(f, "%d", &result);
+ fclose(f);
+
+ return result;
+}
+
+static size_t get_sec_buf_len(unsigned int rate, unsigned long channels, snd_pcm_format_t format)
+{
+ return rate * channels * snd_pcm_format_physical_width(format) / 8;
+}
+
+static int setup_handle(snd_pcm_t **handle, snd_pcm_sw_params_t *swparams,
+ snd_pcm_hw_params_t *hwparams, struct pcmtest_test_params *params,
+ int card, snd_pcm_stream_t stream)
+{
+ char pcm_name[32];
+ int err;
+
+ sprintf(pcm_name, "hw:%d,0,0", card);
+ err = snd_pcm_open(handle, pcm_name, stream, 0);
+ if (err < 0)
+ return err;
+ snd_pcm_hw_params_any(*handle, hwparams);
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_hw_params_set_access(*handle, hwparams, params->access);
+ snd_pcm_hw_params_set_format(*handle, hwparams, params->format);
+ snd_pcm_hw_params_set_channels(*handle, hwparams, params->channels);
+ snd_pcm_hw_params_set_rate_near(*handle, hwparams, &params->rate, 0);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params(*handle, hwparams);
+ snd_pcm_sw_params_current(*handle, swparams);
+
+ snd_pcm_hw_params_set_rate_resample(*handle, hwparams, 0);
+ snd_pcm_sw_params_set_avail_min(*handle, swparams, params->period_size);
+ snd_pcm_hw_params_set_buffer_size_near(*handle, hwparams, &params->buffer_size);
+ snd_pcm_hw_params_set_period_size_near(*handle, hwparams, &params->period_size, 0);
+ snd_pcm_sw_params(*handle, swparams);
+ snd_pcm_hw_params(*handle, hwparams);
+
+ return 0;
+}
+
+FIXTURE(pcmtest) {
+ int card;
+ snd_pcm_sw_params_t *swparams;
+ snd_pcm_hw_params_t *hwparams;
+ struct pcmtest_test_params params;
+};
+
+FIXTURE_TEARDOWN(pcmtest) {
+}
+
+FIXTURE_SETUP(pcmtest) {
+ char *card_name;
+ int err;
+
+ if (geteuid())
+ SKIP(return, "This test needs root to run!");
+
+ err = read_patterns();
+ if (err)
+ SKIP(return, "Can't read patterns. Probably, module isn't loaded");
+
+ card_name = malloc(127);
+ ASSERT_NE(card_name, NULL);
+ self->params.buffer_size = 16384;
+ self->params.period_size = 4096;
+ self->params.channels = CH_NUM;
+ self->params.rate = 8000;
+ self->params.access = SND_PCM_ACCESS_RW_INTERLEAVED;
+ self->params.format = SND_PCM_FORMAT_S16_LE;
+ self->card = -1;
+ self->params.sample_size = snd_pcm_format_physical_width(self->params.format) / 8;
+
+ self->params.sec_buf_len = get_sec_buf_len(self->params.rate, self->params.channels,
+ self->params.format);
+ self->params.time = 4;
+
+ while (snd_card_next(&self->card) >= 0) {
+ if (self->card == -1)
+ break;
+ snd_card_get_name(self->card, &card_name);
+ if (!strcmp(card_name, "PCM-Test"))
+ break;
+ }
+ free(card_name);
+ ASSERT_NE(self->card, -1);
+}
+
+/*
+ * Here we are trying to send the looped monotonically increasing sequence of bytes to the driver.
+ * If our data isn't corrupted, the driver will set the content of 'pc_test' debugfs file to '1'
+ */
+TEST_F(pcmtest, playback) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t write_res;
+ int test_results;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_PLAYBACK), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ it = samples;
+ for (i = 0; i < self->params.sec_buf_len * params->time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ it[i] = patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len];
+ }
+ write_res = snd_pcm_writei(handle, samples, params->rate * params->time);
+ ASSERT_GE(write_res, 0);
+
+ snd_pcm_close(handle);
+ free(samples);
+ test_results = get_test_results("pc_test");
+ ASSERT_EQ(test_results, 1);
+}
+
+/*
+ * Here we test that the virtual alsa driver returns looped and monotonically increasing sequence
+ * of bytes. In the interleaved mode the buffer will contain samples in the following order:
+ * C0, C1, C2, C3, C0, C1, ...
+ */
+TEST_F(pcmtest, capture) {
+ snd_pcm_t *handle;
+ unsigned char *it;
+ size_t read_res;
+ int i, cur_ch, pos_in_ch;
+ void *samples;
+ struct pcmtest_test_params *params = &self->params;
+
+ samples = calloc(self->params.sec_buf_len * self->params.time, 1);
+ ASSERT_NE(samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_format_set_silence(params->format, samples,
+ params->rate * params->channels * params->time);
+ read_res = snd_pcm_readi(handle, samples, params->rate * params->time);
+ ASSERT_GE(read_res, 0);
+ snd_pcm_close(handle);
+ it = (unsigned char *)samples;
+ for (i = 0; i < self->params.sec_buf_len * self->params.time; i++) {
+ cur_ch = (i / params->sample_size) % CH_NUM;
+ pos_in_ch = i / params->sample_size / CH_NUM * params->sample_size
+ + (i % params->sample_size);
+ ASSERT_EQ(it[i], patterns[cur_ch].buf[pos_in_ch % patterns[cur_ch].len]);
+ }
+ free(samples);
+}
+
+// Test capture in the non-interleaved access mode. The are buffers for each recorded channel
+TEST_F(pcmtest, ni_capture) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_CAPTURE), 0);
+
+ for (i = 0; i < CH_NUM; i++)
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_readn(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+ snd_pcm_close(handle);
+
+ for (i = 0; i < CH_NUM; i++) {
+ for (j = 0; j < params.rate * params.time; j++)
+ ASSERT_EQ(chan_samples[i][j], patterns[i].buf[j % patterns[i].len]);
+ free(chan_samples[i]);
+ }
+ free(chan_samples);
+}
+
+TEST_F(pcmtest, ni_playback) {
+ snd_pcm_t *handle;
+ struct pcmtest_test_params params = self->params;
+ char **chan_samples;
+ size_t i, j, read_res;
+ int test_res;
+
+ chan_samples = calloc(CH_NUM, sizeof(*chan_samples));
+ ASSERT_NE(chan_samples, NULL);
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ params.access = SND_PCM_ACCESS_RW_NONINTERLEAVED;
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams,
+ &params, self->card, SND_PCM_STREAM_PLAYBACK), 0);
+
+ for (i = 0; i < CH_NUM; i++) {
+ chan_samples[i] = calloc(params.sec_buf_len * params.time, 1);
+ for (j = 0; j < params.sec_buf_len * params.time; j++)
+ chan_samples[i][j] = patterns[i].buf[j % patterns[i].len];
+ }
+
+ for (i = 0; i < 1; i++) {
+ read_res = snd_pcm_writen(handle, (void **)chan_samples, params.rate * params.time);
+ ASSERT_GE(read_res, 0);
+ }
+
+ snd_pcm_close(handle);
+ test_res = get_test_results("pc_test");
+ ASSERT_EQ(test_res, 1);
+
+ for (i = 0; i < CH_NUM; i++)
+ free(chan_samples[i]);
+ free(chan_samples);
+}
+
+/*
+ * Here we are testing the custom ioctl definition inside the virtual driver. If it triggers
+ * successfully, the driver sets the content of 'ioctl_test' debugfs file to '1'.
+ */
+TEST_F(pcmtest, reset_ioctl) {
+ snd_pcm_t *handle;
+ int test_res;
+ struct pcmtest_test_params *params = &self->params;
+
+ snd_pcm_sw_params_alloca(&self->swparams);
+ snd_pcm_hw_params_alloca(&self->hwparams);
+
+ ASSERT_EQ(setup_handle(&handle, self->swparams, self->hwparams, params,
+ self->card, SND_PCM_STREAM_CAPTURE), 0);
+ snd_pcm_reset(handle);
+ test_res = get_test_results("ioctl_test");
+ ASSERT_EQ(test_res, 1);
+ snd_pcm_close(handle);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/amd-pstate/Makefile b/tools/testing/selftests/amd-pstate/Makefile
new file mode 100644
index 000000000000..c382f579fe94
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0-only
+# Makefile for amd-pstate/ function selftests
+
+# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
+all:
+
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq (x86,$(ARCH))
+TEST_FILES += ../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+TEST_FILES += ../../../power/x86/intel_pstate_tracer/intel_pstate_tracer.py
+endif
+
+TEST_PROGS += run.sh
+TEST_FILES += basic.sh tbench.sh gitsource.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/amd-pstate/basic.sh b/tools/testing/selftests/amd-pstate/basic.sh
new file mode 100755
index 000000000000..e4c43193e4a3
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/basic.sh
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# amd-pstate-ut is a test module for testing the amd-pstate driver.
+# It can only run on x86 architectures and current cpufreq driver
+# must be amd-pstate.
+# (1) It can help all users to verify their processor support
+# (SBIOS/Firmware or Hardware).
+# (2) Kernel can have a basic function test to avoid the kernel
+# regression during the update.
+# (3) We can introduce more functional or performance tests to align
+# the result together, it will benefit power and performance scale optimization.
+
+# protect against multiple inclusion
+if [ $FILE_BASIC ]; then
+ return 0
+else
+ FILE_BASIC=DONE
+fi
+
+amd_pstate_basic()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running AMD P-state ut ***"
+ printf "\n---------------------------------------------\n"
+
+ if ! /sbin/modprobe -q -n amd-pstate-ut; then
+ echo "amd-pstate-ut: module amd-pstate-ut is not found [SKIP]"
+ exit $ksft_skip
+ fi
+ if /sbin/modprobe -q amd-pstate-ut; then
+ /sbin/modprobe -q -r amd-pstate-ut
+ echo "amd-pstate-basic: ok"
+ else
+ echo "amd-pstate-basic: [FAIL]"
+ exit 1
+ fi
+}
diff --git a/tools/testing/selftests/amd-pstate/config b/tools/testing/selftests/amd-pstate/config
new file mode 100644
index 000000000000..f43103c9adc4
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/config
@@ -0,0 +1 @@
+CONFIG_X86_AMD_PSTATE_UT=m
diff --git a/tools/testing/selftests/amd-pstate/gitsource.sh b/tools/testing/selftests/amd-pstate/gitsource.sh
new file mode 100755
index 000000000000..4cde62f90468
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/gitsource.sh
@@ -0,0 +1,359 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Testing and monitor the cpu desire performance, frequency, load,
+# power consumption and throughput etc. when this script trigger
+# gitsource test.
+# 1) Download and tar gitsource codes.
+# 2) Run gitsource benchmark on specific governors, ondemand or schedutil.
+# 3) Run tbench benchmark comparative test on acpi-cpufreq kernel driver.
+# 4) Get desire performance, frequency, load by perf.
+# 5) Get power consumption and throughput by amd_pstate_trace.py.
+# 6) Get run time by /usr/bin/time.
+# 7) Analyse test results and save it in file selftest.gitsource.csv.
+#8) Plot png images about time, energy and performance per watt for each test.
+
+# protect against multiple inclusion
+if [ $FILE_GITSOURCE ]; then
+ return 0
+else
+ FILE_GITSOURCE=DONE
+fi
+
+git_name="git-2.15.1"
+git_tar="$git_name.tar.gz"
+gitsource_url="https://github.com/git/git/archive/refs/tags/v2.15.1.tar.gz"
+gitsource_governors=("ondemand" "schedutil")
+
+# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: time $7: energy, $8: PPW
+store_csv_gitsource()
+{
+ echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_GIT.csv > /dev/null 2>&1
+}
+
+# clear some special lines
+clear_csv_gitsource()
+{
+ if [ -f $OUTFILE_GIT.csv ]; then
+ sed -i '/Comprison(%)/d' $OUTFILE_GIT.csv
+ sed -i "/$(scaling_name)/d" $OUTFILE_GIT.csv
+ fi
+}
+
+# find string $1 in file csv and get the number of lines
+get_lines_csv_gitsource()
+{
+ if [ -f $OUTFILE_GIT.csv ]; then
+ return `grep -c "$1" $OUTFILE_GIT.csv`
+ else
+ return 0
+ fi
+}
+
+pre_clear_gitsource()
+{
+ post_clear_gitsource
+ rm -rf gitsource_*.png
+ clear_csv_gitsource
+}
+
+post_clear_gitsource()
+{
+ rm -rf results/tracer-gitsource*
+ rm -rf $OUTFILE_GIT*.log
+ rm -rf $OUTFILE_GIT*.result
+}
+
+install_gitsource()
+{
+ if [ ! -d $SCRIPTDIR/$git_name ]; then
+ pushd $(pwd) > /dev/null 2>&1
+ cd $SCRIPTDIR
+ printf "Download gitsource, please wait a moment ...\n\n"
+ wget -O $git_tar $gitsource_url > /dev/null 2>&1
+
+ printf "Tar gitsource ...\n\n"
+ tar -xzf $git_tar
+ popd > /dev/null 2>&1
+ fi
+}
+
+# $1: governor, $2: loop
+run_gitsource()
+{
+ echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
+ $TRACER -n tracer-gitsource-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+
+ printf "Make and test gitsource for $1 #$2 make_cpus: $MAKE_CPUS\n"
+ BACKUP_DIR=$(pwd)
+ pushd $BACKUP_DIR > /dev/null 2>&1
+ cd $SCRIPTDIR/$git_name
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ /usr/bin/time -o $BACKUP_DIR/$OUTFILE_GIT.time-gitsource-$1-$2.log make test -j$MAKE_CPUS > $BACKUP_DIR/$OUTFILE_GIT-perf-$1-$2.log 2>&1
+ popd > /dev/null 2>&1
+
+ for job in `jobs -p`
+ do
+ echo "Waiting for job id $job"
+ wait $job
+ done
+}
+
+# $1: governor, $2: loop
+parse_gitsource()
+{
+ awk '{print $5}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-des-perf-$1-$2.log
+ avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-des-perf-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result
+
+ awk '{print $7}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-freq-$1-$2.log
+ avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-freq-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result
+
+ awk '{print $11}' results/tracer-gitsource-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_GIT-load-$1-$2.log
+ avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_GIT-load-$1-$2.log)
+ printf "Gitsource-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result
+
+ grep user $OUTFILE_GIT.time-gitsource-$1-$2.log | awk '{print $1}' | sed -e 's/user//' > $OUTFILE_GIT-time-$1-$2.log
+ time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1-$2.log)
+ printf "Gitsource-$1-#$2 user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result
+
+ grep Joules $OUTFILE_GIT-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_GIT-energy-$1-$2.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1-$2.log)
+ printf "Gitsource-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result
+
+ # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t
+ # seconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # 1/t 1/t 1
+ # ----- = ----- = ---
+ # P E/t E
+ # with unit given by 1 per joule.
+ ppw=`echo "scale=9;1/$en_sum" | bc | awk '{printf "%.9f", $0}'`
+ printf "Gitsource-$1-#$2 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_gitsource "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $time_sum $en_sum $ppw
+}
+
+# $1: governor
+loop_gitsource()
+{
+ printf "\nGitsource total test times is $LOOP_TIMES for $1\n\n"
+ for i in `seq 1 $LOOP_TIMES`
+ do
+ run_gitsource $1 $i
+ parse_gitsource $1 $i
+ done
+}
+
+# $1: governor
+gather_gitsource()
+{
+ printf "Gitsource test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_GIT.result
+ printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_GIT-des-perf-$1.log
+ avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-des-perf-$1.log)
+ printf "Gitsource-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_GIT-freq-$1.log
+ avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-freq-$1.log)
+ printf "Gitsource-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_GIT-load-$1.log
+ avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-load-$1.log)
+ printf "Gitsource-$1 avg load: $avg_load\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "user time(s):" | awk '{print $NF}' > $OUTFILE_GIT-time-$1.log
+ time_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-time-$1.log)
+ printf "Gitsource-$1 total user time(s): $time_sum\n" | tee -a $OUTFILE_GIT.result
+
+ avg_time=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-time-$1.log)
+ printf "Gitsource-$1 avg user times(s): $avg_time\n" | tee -a $OUTFILE_GIT.result
+
+ grep "Gitsource-$1-#" $OUTFILE_GIT.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_GIT-energy-$1.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_GIT-energy-$1.log)
+ printf "Gitsource-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_GIT.result
+
+ avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_GIT-energy-$1.log)
+ printf "Gitsource-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_GIT.result
+
+ # Permance is the number of run gitsource per second, denoted 1/t, where 1 is the number of run gitsource in t
+ # seconds. It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # 1/t 1/t 1
+ # ----- = ----- = ---
+ # P E/t E
+ # with unit given by 1 per joule.
+ ppw=`echo "scale=9;1/$avg_en" | bc | awk '{printf "%.9f", $0}'`
+ printf "Gitsource-$1 performance per watt(1/J): $ppw\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_gitsource "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_time $avg_en $ppw
+}
+
+# $1: base scaling_driver $2: base governor $3: comparison scaling_driver $4: comparison governor
+__calc_comp_gitsource()
+{
+ base=`grep "$1-$2" $OUTFILE_GIT.csv | grep "Average"`
+ comp=`grep "$3-$4" $OUTFILE_GIT.csv | grep "Average"`
+
+ if [ -n "$base" -a -n "$comp" ]; then
+ printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result
+ printf "Gitsource comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_GIT.result
+ printf "\n==================================================\n" | tee -a $OUTFILE_GIT.result
+
+ # get the base values
+ des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//`
+ freq_base=`echo "$base" | awk '{print $4}' | sed s/,//`
+ load_base=`echo "$base" | awk '{print $5}' | sed s/,//`
+ time_base=`echo "$base" | awk '{print $6}' | sed s/,//`
+ energy_base=`echo "$base" | awk '{print $7}' | sed s/,//`
+ ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//`
+
+ # get the comparison values
+ des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//`
+ freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//`
+ load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//`
+ time_comp=`echo "$comp" | awk '{print $6}' | sed s/,//`
+ energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//`
+ ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//`
+
+ # compare the base and comp values
+ des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_GIT.result
+
+ freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_GIT.result
+
+ load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_GIT.result
+
+ time_drop=`echo "scale=4;($time_comp-$time_base)*100/$time_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 time base: $time_base comprison: $time_comp percent: $time_drop\n" | tee -a $OUTFILE_GIT.result
+
+ energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_GIT.result
+
+ ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Gitsource-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_GIT.result
+ printf "\n" | tee -a $OUTFILE_GIT.result
+
+ store_csv_gitsource "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$time_drop" "$energy_drop" "$ppw_drop"
+ fi
+}
+
+# calculate the comparison(%)
+calc_comp_gitsource()
+{
+ # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[0]} ${gitsource_governors[1]}
+
+ # amd-pstate-ondemand VS amd-pstate-schedutil
+ __calc_comp_gitsource ${all_scaling_names[1]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[1]}
+
+ # acpi-cpufreq-ondemand VS amd-pstate-ondemand
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[0]} ${all_scaling_names[1]} ${gitsource_governors[0]}
+
+ # acpi-cpufreq-schedutil VS amd-pstate-schedutil
+ __calc_comp_gitsource ${all_scaling_names[0]} ${gitsource_governors[1]} ${all_scaling_names[1]} ${gitsource_governors[1]}
+}
+
+# $1: file_name, $2: title, $3: ylable, $4: column
+plot_png_gitsource()
+{
+ # all_scaling_names[1] all_scaling_names[0] flag
+ # amd-pstate acpi-cpufreq
+ # N N 0
+ # N Y 1
+ # Y N 2
+ # Y Y 3
+ ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ flag=0
+ else
+ flag=1
+ fi
+ else
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_GIT.csv`
+ if [ $ret -eq 0 ]; then
+ flag=2
+ else
+ flag=3
+ fi
+ fi
+
+ gnuplot << EOF
+ set term png
+ set output "$1"
+
+ set title "$2"
+ set xlabel "Test Cycles (round)"
+ set ylabel "$3"
+
+ set grid
+ set style data histogram
+ set style fill solid 0.5 border
+ set boxwidth 0.8
+
+ if ($flag == 1) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}"
+ } else {
+ if ($flag == 2) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}"
+ } else {
+ if ($flag == 3 ) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${gitsource_governors[1]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[0]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${gitsource_governors[1]}/p' $OUTFILE_GIT.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${gitsource_governors[1]}"
+ }
+ }
+ }
+ quit
+EOF
+}
+
+amd_pstate_gitsource()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running gitsource ***"
+ printf "\n---------------------------------------------\n"
+
+ pre_clear_gitsource
+
+ install_gitsource
+
+ get_lines_csv_gitsource "Governor"
+ if [ $? -eq 0 ]; then
+ # add titles and unit for csv file
+ store_csv_gitsource "Governor" "Round" "Des-perf" "Freq" "Load" "Time" "Energy" "Performance Per Watt"
+ store_csv_gitsource "Unit" "" "" "GHz" "" "s" "J" "1/J"
+ fi
+
+ backup_governor
+ for governor in ${gitsource_governors[*]} ; do
+ printf "\nSpecified governor is $governor\n\n"
+ switch_governor $governor
+ loop_gitsource $governor
+ gather_gitsource $governor
+ done
+ restore_governor
+
+ plot_png_gitsource "gitsource_time.png" "Gitsource Benchmark Time" "Time (s)" 6
+ plot_png_gitsource "gitsource_energy.png" "Gitsource Benchmark Energy" "Energy (J)" 7
+ plot_png_gitsource "gitsource_ppw.png" "Gitsource Benchmark Performance Per Watt" "Performance Per Watt (1/J)" 8
+
+ calc_comp_gitsource
+
+ post_clear_gitsource
+}
diff --git a/tools/testing/selftests/amd-pstate/run.sh b/tools/testing/selftests/amd-pstate/run.sh
new file mode 100755
index 000000000000..b053eea8bb19
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/run.sh
@@ -0,0 +1,396 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# protect against multiple inclusion
+if [ $FILE_MAIN ]; then
+ return 0
+else
+ FILE_MAIN=DONE
+fi
+
+SCRIPTDIR=`dirname "$0"`
+TRACER=$SCRIPTDIR/../../../power/x86/amd_pstate_tracer/amd_pstate_trace.py
+
+source $SCRIPTDIR/basic.sh
+source $SCRIPTDIR/tbench.sh
+source $SCRIPTDIR/gitsource.sh
+
+# amd-pstate-ut only run on x86/x86_64 AMD systems.
+ARCH=$(uname -m 2>/dev/null | sed -e 's/i.86/x86/' -e 's/x86_64/x86/')
+VENDOR=$(cat /proc/cpuinfo | grep -m 1 'vendor_id' | awk '{print $NF}')
+
+msg="Skip all tests:"
+FUNC=all
+OUTFILE=selftest
+OUTFILE_TBENCH="$OUTFILE.tbench"
+OUTFILE_GIT="$OUTFILE.gitsource"
+
+PERF=/usr/bin/perf
+SYSFS=
+CPUROOT=
+CPUFREQROOT=
+MAKE_CPUS=
+
+TIME_LIMIT=100
+PROCESS_NUM=128
+LOOP_TIMES=3
+TRACER_INTERVAL=10
+CURRENT_TEST=amd-pstate
+COMPARATIVE_TEST=
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+all_scaling_names=("acpi-cpufreq" "amd-pstate")
+
+# Get current cpufreq scaling driver name
+scaling_name()
+{
+ if [ "$COMPARATIVE_TEST" = "" ]; then
+ echo "$CURRENT_TEST"
+ else
+ echo "$COMPARATIVE_TEST"
+ fi
+}
+
+# Counts CPUs with cpufreq directories
+count_cpus()
+{
+ count=0;
+
+ for cpu in `ls $CPUROOT | grep "cpu[0-9].*"`; do
+ if [ -d $CPUROOT/$cpu/cpufreq ]; then
+ let count=count+1;
+ fi
+ done
+
+ echo $count;
+}
+
+# $1: policy
+find_current_governor()
+{
+ cat $CPUFREQROOT/$1/scaling_governor
+}
+
+backup_governor()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ cur_gov=$(find_current_governor $policy)
+ echo "$policy $cur_gov" >> $OUTFILE.backup_governor.log
+ done
+
+ printf "Governor $cur_gov backup done.\n"
+}
+
+restore_governor()
+{
+ i=0;
+
+ policies=$(awk '{print $1}' $OUTFILE.backup_governor.log)
+ for policy in $policies; do
+ let i++;
+ governor=$(sed -n ''$i'p' $OUTFILE.backup_governor.log | awk '{print $2}')
+
+ # switch governor
+ echo $governor > $CPUFREQROOT/$policy/scaling_governor
+ done
+
+ printf "Governor restored to $governor.\n"
+}
+
+# $1: governor
+switch_governor()
+{
+ policies=$(ls $CPUFREQROOT| grep "policy[0-9].*")
+ for policy in $policies; do
+ filepath=$CPUFREQROOT/$policy/scaling_available_governors
+
+ # Exit if cpu isn't managed by cpufreq core
+ if [ ! -f $filepath ]; then
+ return;
+ fi
+
+ echo $1 > $CPUFREQROOT/$policy/scaling_governor
+ done
+
+ printf "Switched governor to $1.\n"
+}
+
+# All amd-pstate tests
+amd_pstate_all()
+{
+ printf "\n=============================================\n"
+ printf "***** Running AMD P-state Sanity Tests *****\n"
+ printf "=============================================\n\n"
+
+ count=$(count_cpus)
+ if [ $count = 0 ]; then
+ printf "No cpu is managed by cpufreq core, exiting\n"
+ exit;
+ else
+ printf "AMD P-state manages: $count CPUs\n"
+ fi
+
+ # unit test for amd-pstate kernel driver
+ amd_pstate_basic
+
+ # tbench
+ amd_pstate_tbench
+
+ # gitsource
+ amd_pstate_gitsource
+}
+
+help()
+{
+ printf "Usage: $0 [OPTION...]
+ [-h <help>]
+ [-o <output-file-for-dump>]
+ [-c <all: All testing,
+ basic: Basic testing,
+ tbench: Tbench testing,
+ gitsource: Gitsource testing.>]
+ [-t <tbench time limit>]
+ [-p <tbench process number>]
+ [-l <loop times for tbench>]
+ [-i <amd tracer interval>]
+ [-b <perf binary>]
+ [-m <comparative test: acpi-cpufreq>]
+ \n"
+ exit 2
+}
+
+parse_arguments()
+{
+ while getopts ho:c:t:p:l:i:b:m: arg
+ do
+ case $arg in
+ h) # --help
+ help
+ ;;
+
+ c) # --func_type (Function to perform: basic, tbench, gitsource (default: all))
+ FUNC=$OPTARG
+ ;;
+
+ o) # --output-file (Output file to store dumps)
+ OUTFILE=$OPTARG
+ ;;
+
+ t) # --tbench-time-limit
+ TIME_LIMIT=$OPTARG
+ ;;
+
+ p) # --tbench-process-number
+ PROCESS_NUM=$OPTARG
+ ;;
+
+ l) # --tbench/gitsource-loop-times
+ LOOP_TIMES=$OPTARG
+ ;;
+
+ i) # --amd-tracer-interval
+ TRACER_INTERVAL=$OPTARG
+ ;;
+
+ b) # --perf-binary
+ PERF=`realpath $OPTARG`
+ ;;
+
+ m) # --comparative-test
+ COMPARATIVE_TEST=$OPTARG
+ ;;
+
+ *)
+ help
+ ;;
+ esac
+ done
+}
+
+command_perf()
+{
+ if ! $PERF -v; then
+ echo $msg please install perf or provide perf binary path as argument >&2
+ exit $ksft_skip
+ fi
+}
+
+command_tbench()
+{
+ if ! command -v tbench > /dev/null; then
+ if apt policy dbench > /dev/null 2>&1; then
+ echo $msg apt install dbench >&2
+ exit $ksft_skip
+ elif yum list available | grep dbench > /dev/null 2>&1; then
+ echo $msg yum install dbench >&2
+ exit $ksft_skip
+ fi
+ fi
+
+ if ! command -v tbench > /dev/null; then
+ echo $msg please install tbench. >&2
+ exit $ksft_skip
+ fi
+}
+
+prerequisite()
+{
+ if ! echo "$ARCH" | grep -q x86; then
+ echo "$0 # Skipped: Test can only run on x86 architectures."
+ exit $ksft_skip
+ fi
+
+ if ! echo "$VENDOR" | grep -iq amd; then
+ echo "$0 # Skipped: Test can only run on AMD CPU."
+ echo "$0 # Current cpu vendor is $VENDOR."
+ exit $ksft_skip
+ fi
+
+ scaling_driver=$(cat /sys/devices/system/cpu/cpufreq/policy0/scaling_driver)
+ if [ "$COMPARATIVE_TEST" = "" ]; then
+ if [ "$scaling_driver" != "$CURRENT_TEST" ]; then
+ echo "$0 # Skipped: Test can only run on $CURRENT_TEST driver or run comparative test."
+ echo "$0 # Please set X86_AMD_PSTATE enabled or run comparative test."
+ echo "$0 # Current cpufreq scaling driver is $scaling_driver."
+ exit $ksft_skip
+ fi
+ else
+ case "$FUNC" in
+ "tbench" | "gitsource")
+ if [ "$scaling_driver" != "$COMPARATIVE_TEST" ]; then
+ echo "$0 # Skipped: Comparison test can only run on $COMPARISON_TEST driver."
+ echo "$0 # Current cpufreq scaling driver is $scaling_driver."
+ exit $ksft_skip
+ fi
+ ;;
+
+ *)
+ echo "$0 # Skipped: Comparison test are only for tbench or gitsource."
+ echo "$0 # Current comparative test is for $FUNC."
+ exit $ksft_skip
+ ;;
+ esac
+ fi
+
+ if [ ! -w /dev ]; then
+ echo $msg please run this as root >&2
+ exit $ksft_skip
+ fi
+
+ case "$FUNC" in
+ "all")
+ command_perf
+ command_tbench
+ ;;
+
+ "tbench")
+ command_perf
+ command_tbench
+ ;;
+
+ "gitsource")
+ command_perf
+ ;;
+ esac
+
+ SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
+
+ if [ ! -d "$SYSFS" ]; then
+ echo $msg sysfs is not mounted >&2
+ exit 2
+ fi
+
+ CPUROOT=$SYSFS/devices/system/cpu
+ CPUFREQROOT="$CPUROOT/cpufreq"
+
+ if ! ls $CPUROOT/cpu* > /dev/null 2>&1; then
+ echo $msg cpus not available in sysfs >&2
+ exit 2
+ fi
+
+ if ! ls $CPUROOT/cpufreq > /dev/null 2>&1; then
+ echo $msg cpufreq directory not available in sysfs >&2
+ exit 2
+ fi
+}
+
+do_test()
+{
+ # Check if CPUs are managed by cpufreq or not
+ count=$(count_cpus)
+ MAKE_CPUS=$((count*2))
+
+ if [ $count = 0 ]; then
+ echo "No cpu is managed by cpufreq core, exiting"
+ exit 2;
+ fi
+
+ case "$FUNC" in
+ "all")
+ amd_pstate_all
+ ;;
+
+ "basic")
+ amd_pstate_basic
+ ;;
+
+ "tbench")
+ amd_pstate_tbench
+ ;;
+
+ "gitsource")
+ amd_pstate_gitsource
+ ;;
+
+ *)
+ echo "Invalid [-f] function type"
+ help
+ ;;
+ esac
+}
+
+# clear dumps
+pre_clear_dumps()
+{
+ case "$FUNC" in
+ "all")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf *.png
+ ;;
+
+ "tbench")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf tbench_*.png
+ ;;
+
+ "gitsource")
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+ rm -rf gitsource_*.png
+ ;;
+
+ *)
+ ;;
+ esac
+}
+
+post_clear_dumps()
+{
+ rm -rf $OUTFILE.log
+ rm -rf $OUTFILE.backup_governor.log
+}
+
+# Parse arguments
+parse_arguments $@
+
+# Make sure all requirements are met
+prerequisite
+
+# Run requested functions
+pre_clear_dumps
+do_test | tee -a $OUTFILE.log
+post_clear_dumps
diff --git a/tools/testing/selftests/amd-pstate/tbench.sh b/tools/testing/selftests/amd-pstate/tbench.sh
new file mode 100755
index 000000000000..2a98d9c9202e
--- /dev/null
+++ b/tools/testing/selftests/amd-pstate/tbench.sh
@@ -0,0 +1,339 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Testing and monitor the cpu desire performance, frequency, load,
+# power consumption and throughput etc.when this script trigger tbench
+# test cases.
+# 1) Run tbench benchmark on specific governors, ondemand or schedutil.
+# 2) Run tbench benchmark comparative test on acpi-cpufreq kernel driver.
+# 3) Get desire performance, frequency, load by perf.
+# 4) Get power consumption and throughput by amd_pstate_trace.py.
+# 5) Analyse test results and save it in file selftest.tbench.csv.
+# 6) Plot png images about performance, energy and performance per watt for each test.
+
+# protect against multiple inclusion
+if [ $FILE_TBENCH ]; then
+ return 0
+else
+ FILE_TBENCH=DONE
+fi
+
+tbench_governors=("ondemand" "schedutil")
+
+# $1: governor, $2: round, $3: des-perf, $4: freq, $5: load, $6: performance, $7: energy, $8: performance per watt
+store_csv_tbench()
+{
+ echo "$1, $2, $3, $4, $5, $6, $7, $8" | tee -a $OUTFILE_TBENCH.csv > /dev/null 2>&1
+}
+
+# clear some special lines
+clear_csv_tbench()
+{
+ if [ -f $OUTFILE_TBENCH.csv ]; then
+ sed -i '/Comprison(%)/d' $OUTFILE_TBENCH.csv
+ sed -i "/$(scaling_name)/d" $OUTFILE_TBENCH.csv
+ fi
+}
+
+# find string $1 in file csv and get the number of lines
+get_lines_csv_tbench()
+{
+ if [ -f $OUTFILE_TBENCH.csv ]; then
+ return `grep -c "$1" $OUTFILE_TBENCH.csv`
+ else
+ return 0
+ fi
+}
+
+pre_clear_tbench()
+{
+ post_clear_tbench
+ rm -rf tbench_*.png
+ clear_csv_tbench
+}
+
+post_clear_tbench()
+{
+ rm -rf results/tracer-tbench*
+ rm -rf $OUTFILE_TBENCH*.log
+ rm -rf $OUTFILE_TBENCH*.result
+
+}
+
+# $1: governor, $2: loop
+run_tbench()
+{
+ echo "Launching amd pstate tracer for $1 #$2 tracer_interval: $TRACER_INTERVAL"
+ $TRACER -n tracer-tbench-$1-$2 -i $TRACER_INTERVAL > /dev/null 2>&1 &
+
+ printf "Test tbench for $1 #$2 time_limit: $TIME_LIMIT procs_num: $PROCESS_NUM\n"
+ tbench_srv > /dev/null 2>&1 &
+ $PERF stat -a --per-socket -I 1000 -e power/energy-pkg/ tbench -t $TIME_LIMIT $PROCESS_NUM > $OUTFILE_TBENCH-perf-$1-$2.log 2>&1
+
+ pid=`pidof tbench_srv`
+ kill $pid
+
+ for job in `jobs -p`
+ do
+ echo "Waiting for job id $job"
+ wait $job
+ done
+}
+
+# $1: governor, $2: loop
+parse_tbench()
+{
+ awk '{print $5}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-des-perf-$1-$2.log
+ avg_des_perf=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-des-perf-$1-$2.log)
+ printf "Tbench-$1-#$2 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result
+
+ awk '{print $7}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-freq-$1-$2.log
+ avg_freq=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-freq-$1-$2.log)
+ printf "Tbench-$1-#$2 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result
+
+ awk '{print $11}' results/tracer-tbench-$1-$2/cpu.csv | sed -e '1d' | sed s/,// > $OUTFILE_TBENCH-load-$1-$2.log
+ avg_load=$(awk 'BEGIN {i=0; sum=0};{i++; sum += $1};END {print sum/i}' $OUTFILE_TBENCH-load-$1-$2.log)
+ printf "Tbench-$1-#$2 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep Throughput $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $2}' > $OUTFILE_TBENCH-throughput-$1-$2.log
+ tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1-$2.log)
+ printf "Tbench-$1-#$2 throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep Joules $OUTFILE_TBENCH-perf-$1-$2.log | awk '{print $4}' > $OUTFILE_TBENCH-energy-$1-$2.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1-$2.log)
+ printf "Tbench-$1-#$2 power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds.
+ # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # T/t T/t T
+ # --- = --- = ---
+ # P E/t E
+ # with unit given by MB per joule.
+ ppw=`echo "scale=4;($TIME_LIMIT-1)*$tp_sum/$en_sum" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1-#$2 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_tbench "$driver_name-$1" $2 $avg_des_perf $avg_freq $avg_load $tp_sum $en_sum $ppw
+}
+
+# $1: governor
+loop_tbench()
+{
+ printf "\nTbench total test times is $LOOP_TIMES for $1\n\n"
+ for i in `seq 1 $LOOP_TIMES`
+ do
+ run_tbench $1 $i
+ parse_tbench $1 $i
+ done
+}
+
+# $1: governor
+gather_tbench()
+{
+ printf "Tbench test result for $1 (loops:$LOOP_TIMES)" | tee -a $OUTFILE_TBENCH.result
+ printf "\n--------------------------------------------------\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg des perf:" | awk '{print $NF}' > $OUTFILE_TBENCH-des-perf-$1.log
+ avg_des_perf=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-des-perf-$1.log)
+ printf "Tbench-$1 avg des perf: $avg_des_perf\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg freq:" | awk '{print $NF}' > $OUTFILE_TBENCH-freq-$1.log
+ avg_freq=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-freq-$1.log)
+ printf "Tbench-$1 avg freq: $avg_freq\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "avg load:" | awk '{print $NF}' > $OUTFILE_TBENCH-load-$1.log
+ avg_load=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-load-$1.log)
+ printf "Tbench-$1 avg load: $avg_load\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "throughput(MB/s):" | awk '{print $NF}' > $OUTFILE_TBENCH-throughput-$1.log
+ tp_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-throughput-$1.log)
+ printf "Tbench-$1 total throughput(MB/s): $tp_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ avg_tp=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-throughput-$1.log)
+ printf "Tbench-$1 avg throughput(MB/s): $avg_tp\n" | tee -a $OUTFILE_TBENCH.result
+
+ grep "Tbench-$1-#" $OUTFILE_TBENCH.result | grep "power consumption(J):" | awk '{print $NF}' > $OUTFILE_TBENCH-energy-$1.log
+ en_sum=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum}' $OUTFILE_TBENCH-energy-$1.log)
+ printf "Tbench-$1 total power consumption(J): $en_sum\n" | tee -a $OUTFILE_TBENCH.result
+
+ avg_en=$(awk 'BEGIN {sum=0};{sum += $1};END {print sum/'$LOOP_TIMES'}' $OUTFILE_TBENCH-energy-$1.log)
+ printf "Tbench-$1 avg power consumption(J): $avg_en\n" | tee -a $OUTFILE_TBENCH.result
+
+ # Permance is throughput per second, denoted T/t, where T is throught rendered in t seconds.
+ # It is well known that P=E/t, where P is power measured in watts(W), E is energy measured in joules(J),
+ # and t is time measured in seconds(s). This means that performance per watt becomes
+ # T/t T/t T
+ # --- = --- = ---
+ # P E/t E
+ # with unit given by MB per joule.
+ ppw=`echo "scale=4;($TIME_LIMIT-1)*$avg_tp/$avg_en" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 performance per watt(MB/J): $ppw\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ driver_name=`echo $(scaling_name)`
+ store_csv_tbench "$driver_name-$1" "Average" $avg_des_perf $avg_freq $avg_load $avg_tp $avg_en $ppw
+}
+
+# $1: base scaling_driver $2: base governor $3: comparative scaling_driver $4: comparative governor
+__calc_comp_tbench()
+{
+ base=`grep "$1-$2" $OUTFILE_TBENCH.csv | grep "Average"`
+ comp=`grep "$3-$4" $OUTFILE_TBENCH.csv | grep "Average"`
+
+ if [ -n "$base" -a -n "$comp" ]; then
+ printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result
+ printf "Tbench comparison $1-$2 VS $3-$4" | tee -a $OUTFILE_TBENCH.result
+ printf "\n==================================================\n" | tee -a $OUTFILE_TBENCH.result
+
+ # get the base values
+ des_perf_base=`echo "$base" | awk '{print $3}' | sed s/,//`
+ freq_base=`echo "$base" | awk '{print $4}' | sed s/,//`
+ load_base=`echo "$base" | awk '{print $5}' | sed s/,//`
+ perf_base=`echo "$base" | awk '{print $6}' | sed s/,//`
+ energy_base=`echo "$base" | awk '{print $7}' | sed s/,//`
+ ppw_base=`echo "$base" | awk '{print $8}' | sed s/,//`
+
+ # get the comparative values
+ des_perf_comp=`echo "$comp" | awk '{print $3}' | sed s/,//`
+ freq_comp=`echo "$comp" | awk '{print $4}' | sed s/,//`
+ load_comp=`echo "$comp" | awk '{print $5}' | sed s/,//`
+ perf_comp=`echo "$comp" | awk '{print $6}' | sed s/,//`
+ energy_comp=`echo "$comp" | awk '{print $7}' | sed s/,//`
+ ppw_comp=`echo "$comp" | awk '{print $8}' | sed s/,//`
+
+ # compare the base and comp values
+ des_perf_drop=`echo "scale=4;($des_perf_comp-$des_perf_base)*100/$des_perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 des perf base: $des_perf_base comprison: $des_perf_comp percent: $des_perf_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ freq_drop=`echo "scale=4;($freq_comp-$freq_base)*100/$freq_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 freq base: $freq_base comprison: $freq_comp percent: $freq_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ load_drop=`echo "scale=4;($load_comp-$load_base)*100/$load_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 load base: $load_base comprison: $load_comp percent: $load_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ perf_drop=`echo "scale=4;($perf_comp-$perf_base)*100/$perf_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 perf base: $perf_base comprison: $perf_comp percent: $perf_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ energy_drop=`echo "scale=4;($energy_comp-$energy_base)*100/$energy_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 energy base: $energy_base comprison: $energy_comp percent: $energy_drop\n" | tee -a $OUTFILE_TBENCH.result
+
+ ppw_drop=`echo "scale=4;($ppw_comp-$ppw_base)*100/$ppw_base" | bc | awk '{printf "%.4f", $0}'`
+ printf "Tbench-$1 performance per watt base: $ppw_base comprison: $ppw_comp percent: $ppw_drop\n" | tee -a $OUTFILE_TBENCH.result
+ printf "\n" | tee -a $OUTFILE_TBENCH.result
+
+ store_csv_tbench "$1-$2 VS $3-$4" "Comprison(%)" "$des_perf_drop" "$freq_drop" "$load_drop" "$perf_drop" "$energy_drop" "$ppw_drop"
+ fi
+}
+
+# calculate the comparison(%)
+calc_comp_tbench()
+{
+ # acpi-cpufreq-ondemand VS acpi-cpufreq-schedutil
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[0]} ${tbench_governors[1]}
+
+ # amd-pstate-ondemand VS amd-pstate-schedutil
+ __calc_comp_tbench ${all_scaling_names[1]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[1]}
+
+ # acpi-cpufreq-ondemand VS amd-pstate-ondemand
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[0]} ${all_scaling_names[1]} ${tbench_governors[0]}
+
+ # acpi-cpufreq-schedutil VS amd-pstate-schedutil
+ __calc_comp_tbench ${all_scaling_names[0]} ${tbench_governors[1]} ${all_scaling_names[1]} ${tbench_governors[1]}
+}
+
+# $1: file_name, $2: title, $3: ylable, $4: column
+plot_png_tbench()
+{
+ # all_scaling_names[1] all_scaling_names[0] flag
+ # amd-pstate acpi-cpufreq
+ # N N 0
+ # N Y 1
+ # Y N 2
+ # Y Y 3
+ ret=`grep -c "${all_scaling_names[1]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ flag=0
+ else
+ flag=1
+ fi
+ else
+ ret=`grep -c "${all_scaling_names[0]}" $OUTFILE_TBENCH.csv`
+ if [ $ret -eq 0 ]; then
+ flag=2
+ else
+ flag=3
+ fi
+ fi
+
+ gnuplot << EOF
+ set term png
+ set output "$1"
+
+ set title "$2"
+ set xlabel "Test Cycles (round)"
+ set ylabel "$3"
+
+ set grid
+ set style data histogram
+ set style fill solid 0.5 border
+ set boxwidth 0.8
+
+ if ($flag == 1) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}"
+ } else {
+ if ($flag == 2) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}"
+ } else {
+ if ($flag == 3 ) {
+ plot \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[0]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[0]}-${tbench_governors[1]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[0]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[0]}", \
+ "<(sed -n -e 's/,//g' -e '/${all_scaling_names[1]}-${tbench_governors[1]}/p' $OUTFILE_TBENCH.csv)" using $4:xtic(2) title "${all_scaling_names[1]}-${tbench_governors[1]}"
+ }
+ }
+ }
+ quit
+EOF
+}
+
+amd_pstate_tbench()
+{
+ printf "\n---------------------------------------------\n"
+ printf "*** Running tbench ***"
+ printf "\n---------------------------------------------\n"
+
+ pre_clear_tbench
+
+ get_lines_csv_tbench "Governor"
+ if [ $? -eq 0 ]; then
+ # add titles and unit for csv file
+ store_csv_tbench "Governor" "Round" "Des-perf" "Freq" "Load" "Performance" "Energy" "Performance Per Watt"
+ store_csv_tbench "Unit" "" "" "GHz" "" "MB/s" "J" "MB/J"
+ fi
+
+ backup_governor
+ for governor in ${tbench_governors[*]} ; do
+ printf "\nSpecified governor is $governor\n\n"
+ switch_governor $governor
+ loop_tbench $governor
+ gather_tbench $governor
+ done
+ restore_governor
+
+ plot_png_tbench "tbench_perfromance.png" "Tbench Benchmark Performance" "Performance" 6
+ plot_png_tbench "tbench_energy.png" "Tbench Benchmark Energy" "Energy (J)" 7
+ plot_png_tbench "tbench_ppw.png" "Tbench Benchmark Performance Per Watt" "Performance Per Watt (MB/J)" 8
+
+ calc_comp_tbench
+
+ post_clear_tbench
+}
diff --git a/tools/testing/selftests/android/Makefile b/tools/testing/selftests/android/Makefile
deleted file mode 100644
index 9258306cafe9..000000000000
--- a/tools/testing/selftests/android/Makefile
+++ /dev/null
@@ -1,39 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-SUBDIRS := ion
-
-TEST_PROGS := run.sh
-
-.PHONY: all clean
-
-include ../lib.mk
-
-all:
- @for DIR in $(SUBDIRS); do \
- BUILD_TARGET=$(OUTPUT)/$$DIR; \
- mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
- #SUBDIR test prog name should be in the form: SUBDIR_test.sh \
- TEST=$$DIR"_test.sh"; \
- if [ -e $$DIR/$$TEST ]; then \
- rsync -a $$DIR/$$TEST $$BUILD_TARGET/; \
- fi \
- done
-
-override define INSTALL_RULE
- mkdir -p $(INSTALL_PATH)
-install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-
- @for SUBDIR in $(SUBDIRS); do \
- BUILD_TARGET=$(OUTPUT)/$$SUBDIR; \
- mkdir $$BUILD_TARGET -p; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \
- done;
-endef
-
-override define CLEAN
- @for DIR in $(SUBDIRS); do \
- BUILD_TARGET=$(OUTPUT)/$$DIR; \
- mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
- done
-endef
diff --git a/tools/testing/selftests/android/config b/tools/testing/selftests/android/config
deleted file mode 100644
index b4ad748a9dd9..000000000000
--- a/tools/testing/selftests/android/config
+++ /dev/null
@@ -1,5 +0,0 @@
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-CONFIG_DRM_VGEM=y
diff --git a/tools/testing/selftests/android/ion/Makefile b/tools/testing/selftests/android/ion/Makefile
deleted file mode 100644
index 42b71f005332..000000000000
--- a/tools/testing/selftests/android/ion/Makefile
+++ /dev/null
@@ -1,20 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-
-INCLUDEDIR := -I. -I../../../../../drivers/staging/android/uapi/ -I../../../../../usr/include/
-CFLAGS := $(CFLAGS) $(INCLUDEDIR) -Wall -O2 -g
-
-TEST_GEN_FILES := ionapp_export ionapp_import ionmap_test
-
-all: $(TEST_GEN_FILES)
-
-$(TEST_GEN_FILES): ipcsocket.c ionutils.c
-
-TEST_PROGS := ion_test.sh
-
-KSFT_KHDR_INSTALL := 1
-top_srcdir = ../../../../..
-include ../../lib.mk
-
-$(OUTPUT)/ionapp_export: ionapp_export.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionapp_import: ionapp_import.c ipcsocket.c ionutils.c
-$(OUTPUT)/ionmap_test: ionmap_test.c ionutils.c ipcsocket.c
diff --git a/tools/testing/selftests/android/ion/README b/tools/testing/selftests/android/ion/README
deleted file mode 100644
index 21783e9c451e..000000000000
--- a/tools/testing/selftests/android/ion/README
+++ /dev/null
@@ -1,101 +0,0 @@
-ION BUFFER SHARING UTILITY
-==========================
-File: ion_test.sh : Utility to test ION driver buffer sharing mechanism.
-Author: Pintu Kumar <pintu.ping@gmail.com>
-
-Introduction:
--------------
-This is a test utility to verify ION buffer sharing in user space
-between 2 independent processes.
-It uses unix domain socket (with SCM_RIGHTS) as IPC to transfer an FD to
-another process to share the same buffer.
-This utility demonstrates how ION buffer sharing can be implemented between
-two user space processes, using various heap types.
-The following heap types are supported by ION driver.
-ION_HEAP_TYPE_SYSTEM (0)
-ION_HEAP_TYPE_SYSTEM_CONTIG (1)
-ION_HEAP_TYPE_CARVEOUT (2)
-ION_HEAP_TYPE_CHUNK (3)
-ION_HEAP_TYPE_DMA (4)
-
-By default only the SYSTEM and SYSTEM_CONTIG heaps are supported.
-Each heap is associated with the respective heap id.
-This utility is designed in the form of client/server program.
-The server part (ionapp_export) is the exporter of the buffer.
-It is responsible for creating an ION client, allocating the buffer based on
-the heap id, writing some data to this buffer and then exporting the FD
-(associated with this buffer) to another process using socket IPC.
-This FD is called as buffer FD (which is different than the ION client FD).
-
-The client part (ionapp_import) is the importer of the buffer.
-It retrives the FD from the socket data and installs into its address space.
-This new FD internally points to the same kernel buffer.
-So first it reads the data that is stored in this buffer and prints it.
-Then it writes the different size of data (it could be different data) to the
-same buffer.
-Finally the buffer FD must be closed by both the exporter and importer.
-Thus the same kernel buffer is shared among two user space processes using
-ION driver and only one time allocation.
-
-Prerequisite:
--------------
-This utility works only if /dev/ion interface is present.
-The following configs needs to be enabled in kernel to include ion driver.
-CONFIG_ANDROID=y
-CONFIG_STAGING=y
-CONFIG_ION=y
-CONFIG_ION_SYSTEM_HEAP=y
-
-This utility requires to be run as root user.
-
-
-Compile and test:
------------------
-This utility is made to be run as part of kselftest framework in kernel.
-To compile and run using kselftest you can simply do the following from the
-kernel top directory.
-linux$ make TARGETS=android kselftest
-Or you can also use:
-linux$ make -C tools/testing/selftests TARGETS=android run_tests
-Using the selftest it can directly execute the ion_test.sh script to test the
-buffer sharing using ion system heap.
-Currently the heap size is hard coded as just 10 bytes inside this script.
-You need to be a root user to run under selftest.
-
-You can also compile and test manually using the following steps:
-ion$ make
-These will generate 2 executable: ionapp_export, ionapp_import
-Now you can run the export and import manually by specifying the heap type
-and the heap size.
-You can also directly execute the shell script to run the test automatically.
-Simply use the following command to run the test.
-ion$ sudo ./ion_test.sh
-
-Test Results:
--------------
-The utility is verified on Ubuntu-32 bit system with Linux Kernel 4.14.
-Here is the snapshot of the test result using kselftest.
-
-linux# make TARGETS=android kselftest
-heap_type: 0, heap_size: 10
---------------------------------------
-heap type: 0
- heap id: 1
-heap name: ion_system_heap
---------------------------------------
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-Sharing fd: 6, Client fd: 5
-<ion_close_buffer_fd>: buffer release successfully....
-Received buffer fd: 4
-Read buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0x0 0x0 0x0 0x0 0x0 0x0
-0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0 0x0
-Fill buffer content:
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd 0xfd
-0xfd 0xfd
-<ion_close_buffer_fd>: buffer release successfully....
-ion_test.sh: heap_type: 0 - [PASS]
-
-ion_test.sh: done
diff --git a/tools/testing/selftests/android/ion/ion.h b/tools/testing/selftests/android/ion/ion.h
deleted file mode 100644
index 33db23018abf..000000000000
--- a/tools/testing/selftests/android/ion/ion.h
+++ /dev/null
@@ -1,134 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * ion.h
- *
- * Copyright (C) 2011 Google, Inc.
- */
-
-/* This file is copied from drivers/staging/android/uapi/ion.h
- * This local copy is required for the selftest to pass, when build
- * outside the kernel source tree.
- * Please keep this file in sync with its original file until the
- * ion driver is moved outside the staging tree.
- */
-
-#ifndef _UAPI_LINUX_ION_H
-#define _UAPI_LINUX_ION_H
-
-#include <linux/ioctl.h>
-#include <linux/types.h>
-
-/**
- * enum ion_heap_types - list of all possible types of heaps
- * @ION_HEAP_TYPE_SYSTEM: memory allocated via vmalloc
- * @ION_HEAP_TYPE_SYSTEM_CONTIG: memory allocated via kmalloc
- * @ION_HEAP_TYPE_CARVEOUT: memory allocated from a prereserved
- * carveout heap, allocations are physically
- * contiguous
- * @ION_HEAP_TYPE_DMA: memory allocated via DMA API
- * @ION_NUM_HEAPS: helper for iterating over heaps, a bit mask
- * is used to identify the heaps, so only 32
- * total heap types are supported
- */
-enum ion_heap_type {
- ION_HEAP_TYPE_SYSTEM,
- ION_HEAP_TYPE_SYSTEM_CONTIG,
- ION_HEAP_TYPE_CARVEOUT,
- ION_HEAP_TYPE_CHUNK,
- ION_HEAP_TYPE_DMA,
- ION_HEAP_TYPE_CUSTOM, /*
- * must be last so device specific heaps always
- * are at the end of this enum
- */
-};
-
-#define ION_NUM_HEAP_IDS (sizeof(unsigned int) * 8)
-
-/**
- * allocation flags - the lower 16 bits are used by core ion, the upper 16
- * bits are reserved for use by the heaps themselves.
- */
-
-/*
- * mappings of this buffer should be cached, ion will do cache maintenance
- * when the buffer is mapped for dma
- */
-#define ION_FLAG_CACHED 1
-
-/**
- * DOC: Ion Userspace API
- *
- * create a client by opening /dev/ion
- * most operations handled via following ioctls
- *
- */
-
-/**
- * struct ion_allocation_data - metadata passed from userspace for allocations
- * @len: size of the allocation
- * @heap_id_mask: mask of heap ids to allocate from
- * @flags: flags passed to heap
- * @handle: pointer that will be populated with a cookie to use to
- * refer to this allocation
- *
- * Provided by userspace as an argument to the ioctl
- */
-struct ion_allocation_data {
- __u64 len;
- __u32 heap_id_mask;
- __u32 flags;
- __u32 fd;
- __u32 unused;
-};
-
-#define MAX_HEAP_NAME 32
-
-/**
- * struct ion_heap_data - data about a heap
- * @name - first 32 characters of the heap name
- * @type - heap type
- * @heap_id - heap id for the heap
- */
-struct ion_heap_data {
- char name[MAX_HEAP_NAME];
- __u32 type;
- __u32 heap_id;
- __u32 reserved0;
- __u32 reserved1;
- __u32 reserved2;
-};
-
-/**
- * struct ion_heap_query - collection of data about all heaps
- * @cnt - total number of heaps to be copied
- * @heaps - buffer to copy heap data
- */
-struct ion_heap_query {
- __u32 cnt; /* Total number of heaps to be copied */
- __u32 reserved0; /* align to 64bits */
- __u64 heaps; /* buffer to be populated */
- __u32 reserved1;
- __u32 reserved2;
-};
-
-#define ION_IOC_MAGIC 'I'
-
-/**
- * DOC: ION_IOC_ALLOC - allocate memory
- *
- * Takes an ion_allocation_data struct and returns it with the handle field
- * populated with the opaque handle for the allocation.
- */
-#define ION_IOC_ALLOC _IOWR(ION_IOC_MAGIC, 0, \
- struct ion_allocation_data)
-
-/**
- * DOC: ION_IOC_HEAP_QUERY - information about available heaps
- *
- * Takes an ion_heap_query structure and populates information about
- * available Ion heaps.
- */
-#define ION_IOC_HEAP_QUERY _IOWR(ION_IOC_MAGIC, 8, \
- struct ion_heap_query)
-
-#endif /* _UAPI_LINUX_ION_H */
diff --git a/tools/testing/selftests/android/ion/ion_test.sh b/tools/testing/selftests/android/ion/ion_test.sh
deleted file mode 100755
index 69e676cfc94e..000000000000
--- a/tools/testing/selftests/android/ion/ion_test.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/bash
-
-heapsize=4096
-TCID="ion_test.sh"
-errcode=0
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-run_test()
-{
- heaptype=$1
- ./ionapp_export -i $heaptype -s $heapsize &
- sleep 1
- ./ionapp_import
- if [ $? -ne 0 ]; then
- echo "$TCID: heap_type: $heaptype - [FAIL]"
- errcode=1
- else
- echo "$TCID: heap_type: $heaptype - [PASS]"
- fi
- sleep 1
- echo ""
-}
-
-check_root()
-{
- uid=$(id -u)
- if [ $uid -ne 0 ]; then
- echo $TCID: must be run as root >&2
- exit $ksft_skip
- fi
-}
-
-check_device()
-{
- DEVICE=/dev/ion
- if [ ! -e $DEVICE ]; then
- echo $TCID: No $DEVICE device found >&2
- echo $TCID: May be CONFIG_ION is not set >&2
- exit $ksft_skip
- fi
-}
-
-main_function()
-{
- check_device
- check_root
-
- # ION_SYSTEM_HEAP TEST
- run_test 0
- # ION_SYSTEM_CONTIG_HEAP TEST
- run_test 1
-}
-
-main_function
-echo "$TCID: done"
-exit $errcode
diff --git a/tools/testing/selftests/android/ion/ionapp_export.c b/tools/testing/selftests/android/ion/ionapp_export.c
deleted file mode 100644
index 063b7830d1bd..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_export.c
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_export.c
- *
- * It is a user space utility to create and export android
- * ion memory buffer fd to another process using unix domain socket as IPC.
- * This acts like a server for ionapp_import(client).
- * So, this server has to be started first before the client.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <errno.h>
-#include <sys/time.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void print_usage(int argc, char *argv[])
-{
- printf("Usage: %s [-h <help>] [-i <heap id>] [-s <size in bytes>]\n",
- argv[0]);
-}
-
-int main(int argc, char *argv[])
-{
- int opt, ret, status, heapid;
- int sockfd, client_fd, shared_fd;
- unsigned char *map_buf;
- unsigned long map_len, heap_type, heap_size, flags;
- struct ion_buffer_info info;
- struct socket_info skinfo;
-
- if (argc < 2) {
- print_usage(argc, argv);
- return -1;
- }
-
- heap_size = 0;
- flags = 0;
- heap_type = ION_HEAP_TYPE_SYSTEM;
-
- while ((opt = getopt(argc, argv, "hi:s:")) != -1) {
- switch (opt) {
- case 'h':
- print_usage(argc, argv);
- exit(0);
- break;
- case 'i':
- heapid = atoi(optarg);
- switch (heapid) {
- case 0:
- heap_type = ION_HEAP_TYPE_SYSTEM;
- break;
- case 1:
- heap_type = ION_HEAP_TYPE_SYSTEM_CONTIG;
- break;
- default:
- printf("ERROR: heap type not supported\n");
- exit(1);
- }
- break;
- case 's':
- heap_size = atoi(optarg);
- break;
- default:
- print_usage(argc, argv);
- exit(1);
- break;
- }
- }
-
- if (heap_size <= 0) {
- printf("heap_size cannot be 0\n");
- print_usage(argc, argv);
- exit(1);
- }
-
- printf("heap_type: %ld, heap_size: %ld\n", heap_type, heap_size);
- info.heap_type = heap_type;
- info.heap_size = heap_size;
- info.flag_type = flags;
-
- /* This is server: open the socket connection first */
- /* Here; 1 indicates server or exporter */
- status = opensocket(&sockfd, SOCKET_NAME, 1);
- if (status < 0) {
- fprintf(stderr, "<%s>: Failed opensocket.\n", __func__);
- goto err_socket;
- }
- skinfo.sockfd = sockfd;
-
- ret = ion_export_buffer_fd(&info);
- if (ret < 0) {
- fprintf(stderr, "FAILED: ion_get_buffer_fd\n");
- goto err_export;
- }
- client_fd = info.ionfd;
- shared_fd = info.buffd;
- map_buf = info.buffer;
- map_len = info.buflen;
- write_buffer(map_buf, map_len);
-
- /* share ion buf fd with other user process */
- printf("Sharing fd: %d, Client fd: %d\n", shared_fd, client_fd);
- skinfo.datafd = shared_fd;
- skinfo.buflen = map_len;
-
- ret = socket_send_fd(&skinfo);
- if (ret < 0) {
- fprintf(stderr, "FAILED: socket_send_fd\n");
- goto err_send;
- }
-
-err_send:
-err_export:
- ion_close_buffer_fd(&info);
-
-err_socket:
- closesocket(sockfd, SOCKET_NAME);
-
- return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ionapp_import.c b/tools/testing/selftests/android/ion/ionapp_import.c
deleted file mode 100644
index 54b580cb04f6..000000000000
--- a/tools/testing/selftests/android/ion/ionapp_import.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * ionapp_import.c
- *
- * It is a user space utility to receive android ion memory buffer fd
- * over unix domain socket IPC that can be exported by ionapp_export.
- * This acts like a client for ionapp_export.
- *
- * Copyright (C) 2017 Pintu Kumar <pintu.ping@gmail.com>
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <string.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-int main(void)
-{
- int ret, status;
- int sockfd, shared_fd;
- unsigned char *map_buf;
- unsigned long map_len;
- struct ion_buffer_info info;
- struct socket_info skinfo;
-
- /* This is the client part. Here 0 means client or importer */
- status = opensocket(&sockfd, SOCKET_NAME, 0);
- if (status < 0) {
- fprintf(stderr, "No exporter exists...\n");
- ret = status;
- goto err_socket;
- }
-
- skinfo.sockfd = sockfd;
-
- ret = socket_receive_fd(&skinfo);
- if (ret < 0) {
- fprintf(stderr, "Failed: socket_receive_fd\n");
- goto err_recv;
- }
-
- shared_fd = skinfo.datafd;
- printf("Received buffer fd: %d\n", shared_fd);
- if (shared_fd <= 0) {
- fprintf(stderr, "ERROR: improper buf fd\n");
- ret = -1;
- goto err_fd;
- }
-
- memset(&info, 0, sizeof(info));
- info.buffd = shared_fd;
- info.buflen = ION_BUFFER_LEN;
-
- ret = ion_import_buffer_fd(&info);
- if (ret < 0) {
- fprintf(stderr, "Failed: ion_use_buffer_fd\n");
- goto err_import;
- }
-
- map_buf = info.buffer;
- map_len = info.buflen;
- read_buffer(map_buf, map_len);
-
- /* Write probably new data to the same buffer again */
- map_len = ION_BUFFER_LEN;
- write_buffer(map_buf, map_len);
-
-err_import:
- ion_close_buffer_fd(&info);
-err_fd:
-err_recv:
-err_socket:
- closesocket(sockfd, SOCKET_NAME);
-
- return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionmap_test.c b/tools/testing/selftests/android/ion/ionmap_test.c
deleted file mode 100644
index dab36b06b37d..000000000000
--- a/tools/testing/selftests/android/ion/ionmap_test.c
+++ /dev/null
@@ -1,136 +0,0 @@
-#include <errno.h>
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-
-#include <linux/dma-buf.h>
-
-#include <drm/drm.h>
-
-#include "ion.h"
-#include "ionutils.h"
-
-int check_vgem(int fd)
-{
- drm_version_t version = { 0 };
- char name[5];
- int ret;
-
- version.name_len = 4;
- version.name = name;
-
- ret = ioctl(fd, DRM_IOCTL_VERSION, &version);
- if (ret)
- return 1;
-
- return strcmp(name, "vgem");
-}
-
-int open_vgem(void)
-{
- int i, fd;
- const char *drmstr = "/dev/dri/card";
-
- fd = -1;
- for (i = 0; i < 16; i++) {
- char name[80];
-
- sprintf(name, "%s%u", drmstr, i);
-
- fd = open(name, O_RDWR);
- if (fd < 0)
- continue;
-
- if (check_vgem(fd)) {
- close(fd);
- continue;
- } else {
- break;
- }
-
- }
- return fd;
-}
-
-int import_vgem_fd(int vgem_fd, int dma_buf_fd, uint32_t *handle)
-{
- struct drm_prime_handle import_handle = { 0 };
- int ret;
-
- import_handle.fd = dma_buf_fd;
- import_handle.flags = 0;
- import_handle.handle = 0;
-
- ret = ioctl(vgem_fd, DRM_IOCTL_PRIME_FD_TO_HANDLE, &import_handle);
- if (ret == 0)
- *handle = import_handle.handle;
- return ret;
-}
-
-void close_handle(int vgem_fd, uint32_t handle)
-{
- struct drm_gem_close close = { 0 };
-
- close.handle = handle;
- ioctl(vgem_fd, DRM_IOCTL_GEM_CLOSE, &close);
-}
-
-int main()
-{
- int ret, vgem_fd;
- struct ion_buffer_info info;
- uint32_t handle = 0;
- struct dma_buf_sync sync = { 0 };
-
- info.heap_type = ION_HEAP_TYPE_SYSTEM;
- info.heap_size = 4096;
- info.flag_type = ION_FLAG_CACHED;
-
- ret = ion_export_buffer_fd(&info);
- if (ret < 0) {
- printf("ion buffer alloc failed\n");
- return -1;
- }
-
- vgem_fd = open_vgem();
- if (vgem_fd < 0) {
- ret = vgem_fd;
- printf("Failed to open vgem\n");
- goto out_ion;
- }
-
- ret = import_vgem_fd(vgem_fd, info.buffd, &handle);
-
- if (ret < 0) {
- printf("Failed to import buffer\n");
- goto out_vgem;
- }
-
- sync.flags = DMA_BUF_SYNC_START | DMA_BUF_SYNC_RW;
- ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
- if (ret)
- printf("sync start failed %d\n", errno);
-
- memset(info.buffer, 0xff, 4096);
-
- sync.flags = DMA_BUF_SYNC_END | DMA_BUF_SYNC_RW;
- ret = ioctl(info.buffd, DMA_BUF_IOCTL_SYNC, &sync);
- if (ret)
- printf("sync end failed %d\n", errno);
-
- close_handle(vgem_fd, handle);
- ret = 0;
-
-out_vgem:
- close(vgem_fd);
-out_ion:
- ion_close_buffer_fd(&info);
- printf("done.\n");
- return ret;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.c b/tools/testing/selftests/android/ion/ionutils.c
deleted file mode 100644
index 7d1d37c4ef6a..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.c
+++ /dev/null
@@ -1,253 +0,0 @@
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <errno.h>
-//#include <stdint.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include "ionutils.h"
-#include "ipcsocket.h"
-
-
-void write_buffer(void *buffer, unsigned long len)
-{
- int i;
- unsigned char *ptr = (unsigned char *)buffer;
-
- if (!ptr) {
- fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
- return;
- }
-
- printf("Fill buffer content:\n");
- memset(ptr, 0xfd, len);
- for (i = 0; i < len; i++)
- printf("0x%x ", ptr[i]);
- printf("\n");
-}
-
-void read_buffer(void *buffer, unsigned long len)
-{
- int i;
- unsigned char *ptr = (unsigned char *)buffer;
-
- if (!ptr) {
- fprintf(stderr, "<%s>: Invalid buffer...\n", __func__);
- return;
- }
-
- printf("Read buffer content:\n");
- for (i = 0; i < len; i++)
- printf("0x%x ", ptr[i]);
- printf("\n");
-}
-
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info)
-{
- int i, ret, ionfd, buffer_fd;
- unsigned int heap_id;
- unsigned long maplen;
- unsigned char *map_buffer;
- struct ion_allocation_data alloc_data;
- struct ion_heap_query query;
- struct ion_heap_data heap_data[MAX_HEAP_COUNT];
-
- if (!ion_info) {
- fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
- return -1;
- }
-
- /* Create an ION client */
- ionfd = open(ION_DEVICE, O_RDWR);
- if (ionfd < 0) {
- fprintf(stderr, "<%s>: Failed to open ion client: %s\n",
- __func__, strerror(errno));
- return -1;
- }
-
- memset(&query, 0, sizeof(query));
- query.cnt = MAX_HEAP_COUNT;
- query.heaps = (unsigned long int)&heap_data[0];
- /* Query ION heap_id_mask from ION heap */
- ret = ioctl(ionfd, ION_IOC_HEAP_QUERY, &query);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed: ION_IOC_HEAP_QUERY: %s\n",
- __func__, strerror(errno));
- goto err_query;
- }
-
- heap_id = MAX_HEAP_COUNT + 1;
- for (i = 0; i < query.cnt; i++) {
- if (heap_data[i].type == ion_info->heap_type) {
- heap_id = heap_data[i].heap_id;
- break;
- }
- }
-
- if (heap_id > MAX_HEAP_COUNT) {
- fprintf(stderr, "<%s>: ERROR: heap type does not exists\n",
- __func__);
- goto err_heap;
- }
-
- alloc_data.len = ion_info->heap_size;
- alloc_data.heap_id_mask = 1 << heap_id;
- alloc_data.flags = ion_info->flag_type;
-
- /* Allocate memory for this ION client as per heap_type */
- ret = ioctl(ionfd, ION_IOC_ALLOC, &alloc_data);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed: ION_IOC_ALLOC: %s\n",
- __func__, strerror(errno));
- goto err_alloc;
- }
-
- /* This will return a valid buffer fd */
- buffer_fd = alloc_data.fd;
- maplen = alloc_data.len;
-
- if (buffer_fd < 0 || maplen <= 0) {
- fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
- __func__, buffer_fd, maplen);
- goto err_fd_data;
- }
-
- /* Create memory mapped buffer for the buffer fd */
- map_buffer = (unsigned char *)mmap(NULL, maplen, PROT_READ|PROT_WRITE,
- MAP_SHARED, buffer_fd, 0);
- if (map_buffer == MAP_FAILED) {
- fprintf(stderr, "<%s>: Failed: mmap: %s\n",
- __func__, strerror(errno));
- goto err_mmap;
- }
-
- ion_info->ionfd = ionfd;
- ion_info->buffd = buffer_fd;
- ion_info->buffer = map_buffer;
- ion_info->buflen = maplen;
-
- return 0;
-
- munmap(map_buffer, maplen);
-
-err_fd_data:
-err_mmap:
- /* in case of error: close the buffer fd */
- if (buffer_fd)
- close(buffer_fd);
-
-err_query:
-err_heap:
-err_alloc:
- /* In case of error: close the ion client fd */
- if (ionfd)
- close(ionfd);
-
- return -1;
-}
-
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info)
-{
- int buffd;
- unsigned char *map_buf;
- unsigned long map_len;
-
- if (!ion_info) {
- fprintf(stderr, "<%s>: Invalid ion info\n", __func__);
- return -1;
- }
-
- map_len = ion_info->buflen;
- buffd = ion_info->buffd;
-
- if (buffd < 0 || map_len <= 0) {
- fprintf(stderr, "<%s>: Invalid map data, fd: %d, len: %ld\n",
- __func__, buffd, map_len);
- goto err_buffd;
- }
-
- map_buf = (unsigned char *)mmap(NULL, map_len, PROT_READ|PROT_WRITE,
- MAP_SHARED, buffd, 0);
- if (map_buf == MAP_FAILED) {
- printf("<%s>: Failed - mmap: %s\n",
- __func__, strerror(errno));
- goto err_mmap;
- }
-
- ion_info->buffer = map_buf;
- ion_info->buflen = map_len;
-
- return 0;
-
-err_mmap:
- if (buffd)
- close(buffd);
-
-err_buffd:
- return -1;
-}
-
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info)
-{
- if (ion_info) {
- /* unmap the buffer properly in the end */
- munmap(ion_info->buffer, ion_info->buflen);
- /* close the buffer fd */
- if (ion_info->buffd > 0)
- close(ion_info->buffd);
- /* Finally, close the client fd */
- if (ion_info->ionfd > 0)
- close(ion_info->ionfd);
- }
-}
-
-int socket_send_fd(struct socket_info *info)
-{
- int status;
- int fd, sockfd;
- struct socketdata skdata;
-
- if (!info) {
- fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
- return -1;
- }
-
- sockfd = info->sockfd;
- fd = info->datafd;
- memset(&skdata, 0, sizeof(skdata));
- skdata.data = fd;
- skdata.len = sizeof(skdata.data);
- status = sendtosocket(sockfd, &skdata);
- if (status < 0) {
- fprintf(stderr, "<%s>: Failed: sendtosocket\n", __func__);
- return -1;
- }
-
- return 0;
-}
-
-int socket_receive_fd(struct socket_info *info)
-{
- int status;
- int fd, sockfd;
- struct socketdata skdata;
-
- if (!info) {
- fprintf(stderr, "<%s>: Invalid socket info\n", __func__);
- return -1;
- }
-
- sockfd = info->sockfd;
- memset(&skdata, 0, sizeof(skdata));
- status = receivefromsocket(sockfd, &skdata);
- if (status < 0) {
- fprintf(stderr, "<%s>: Failed: receivefromsocket\n", __func__);
- return -1;
- }
-
- fd = (int)skdata.data;
- info->datafd = fd;
-
- return status;
-}
diff --git a/tools/testing/selftests/android/ion/ionutils.h b/tools/testing/selftests/android/ion/ionutils.h
deleted file mode 100644
index 9941eb858576..000000000000
--- a/tools/testing/selftests/android/ion/ionutils.h
+++ /dev/null
@@ -1,55 +0,0 @@
-#ifndef __ION_UTILS_H
-#define __ION_UTILS_H
-
-#include "ion.h"
-
-#define SOCKET_NAME "ion_socket"
-#define ION_DEVICE "/dev/ion"
-
-#define ION_BUFFER_LEN 4096
-#define MAX_HEAP_COUNT ION_HEAP_TYPE_CUSTOM
-
-struct socket_info {
- int sockfd;
- int datafd;
- unsigned long buflen;
-};
-
-struct ion_buffer_info {
- int ionfd;
- int buffd;
- unsigned int heap_type;
- unsigned int flag_type;
- unsigned long heap_size;
- unsigned long buflen;
- unsigned char *buffer;
-};
-
-
-/* This is used to fill the data into the mapped buffer */
-void write_buffer(void *buffer, unsigned long len);
-
-/* This is used to read the data from the exported buffer */
-void read_buffer(void *buffer, unsigned long len);
-
-/* This is used to create an ION buffer FD for the kernel buffer
- * So you can export this same buffer to others in the form of FD
- */
-int ion_export_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to import or map an exported FD.
- * So we point to same buffer without making a copy. Hence zero-copy.
- */
-int ion_import_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to close all references for the ION client */
-void ion_close_buffer_fd(struct ion_buffer_info *ion_info);
-
-/* This is used to send FD to another process using socket IPC */
-int socket_send_fd(struct socket_info *skinfo);
-
-/* This is used to receive FD from another process using socket IPC */
-int socket_receive_fd(struct socket_info *skinfo);
-
-
-#endif
diff --git a/tools/testing/selftests/android/ion/ipcsocket.c b/tools/testing/selftests/android/ion/ipcsocket.c
deleted file mode 100644
index 7dc521002095..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.c
+++ /dev/null
@@ -1,227 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-#include <sys/time.h>
-#include <sys/un.h>
-#include <errno.h>
-
-#include "ipcsocket.h"
-
-
-int opensocket(int *sockfd, const char *name, int connecttype)
-{
- int ret, temp = 1;
-
- if (!name || strlen(name) > MAX_SOCK_NAME_LEN) {
- fprintf(stderr, "<%s>: Invalid socket name.\n", __func__);
- return -1;
- }
-
- ret = socket(PF_LOCAL, SOCK_STREAM, 0);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed socket: <%s>\n",
- __func__, strerror(errno));
- return ret;
- }
-
- *sockfd = ret;
- if (setsockopt(*sockfd, SOL_SOCKET, SO_REUSEADDR,
- (char *)&temp, sizeof(int)) < 0) {
- fprintf(stderr, "<%s>: Failed setsockopt: <%s>\n",
- __func__, strerror(errno));
- goto err;
- }
-
- sprintf(sock_name, "/tmp/%s", name);
-
- if (connecttype == 1) {
- /* This is for Server connection */
- struct sockaddr_un skaddr;
- int clientfd;
- socklen_t sklen;
-
- unlink(sock_name);
- memset(&skaddr, 0, sizeof(skaddr));
- skaddr.sun_family = AF_LOCAL;
- strcpy(skaddr.sun_path, sock_name);
-
- ret = bind(*sockfd, (struct sockaddr *)&skaddr,
- SUN_LEN(&skaddr));
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed bind: <%s>\n",
- __func__, strerror(errno));
- goto err;
- }
-
- ret = listen(*sockfd, 5);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed listen: <%s>\n",
- __func__, strerror(errno));
- goto err;
- }
-
- memset(&skaddr, 0, sizeof(skaddr));
- sklen = sizeof(skaddr);
-
- ret = accept(*sockfd, (struct sockaddr *)&skaddr,
- (socklen_t *)&sklen);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed accept: <%s>\n",
- __func__, strerror(errno));
- goto err;
- }
-
- clientfd = ret;
- *sockfd = clientfd;
- } else {
- /* This is for client connection */
- struct sockaddr_un skaddr;
-
- memset(&skaddr, 0, sizeof(skaddr));
- skaddr.sun_family = AF_LOCAL;
- strcpy(skaddr.sun_path, sock_name);
-
- ret = connect(*sockfd, (struct sockaddr *)&skaddr,
- SUN_LEN(&skaddr));
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed connect: <%s>\n",
- __func__, strerror(errno));
- goto err;
- }
- }
-
- return 0;
-
-err:
- if (*sockfd)
- close(*sockfd);
-
- return ret;
-}
-
-int sendtosocket(int sockfd, struct socketdata *skdata)
-{
- int ret, buffd;
- unsigned int len;
- char cmsg_b[CMSG_SPACE(sizeof(int))];
- struct cmsghdr *cmsg;
- struct msghdr msgh;
- struct iovec iov;
- struct timeval timeout;
- fd_set selFDs;
-
- if (!skdata) {
- fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
- return -1;
- }
-
- FD_ZERO(&selFDs);
- FD_SET(0, &selFDs);
- FD_SET(sockfd, &selFDs);
- timeout.tv_sec = 20;
- timeout.tv_usec = 0;
-
- ret = select(sockfd+1, NULL, &selFDs, NULL, &timeout);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed select: <%s>\n",
- __func__, strerror(errno));
- return -1;
- }
-
- if (FD_ISSET(sockfd, &selFDs)) {
- buffd = skdata->data;
- len = skdata->len;
- memset(&msgh, 0, sizeof(msgh));
- msgh.msg_control = &cmsg_b;
- msgh.msg_controllen = CMSG_LEN(len);
- iov.iov_base = "OK";
- iov.iov_len = 2;
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
- cmsg = CMSG_FIRSTHDR(&msgh);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(len);
- memcpy(CMSG_DATA(cmsg), &buffd, len);
-
- ret = sendmsg(sockfd, &msgh, MSG_DONTWAIT);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed sendmsg: <%s>\n",
- __func__, strerror(errno));
- return -1;
- }
- }
-
- return 0;
-}
-
-int receivefromsocket(int sockfd, struct socketdata *skdata)
-{
- int ret, buffd;
- unsigned int len = 0;
- char cmsg_b[CMSG_SPACE(sizeof(int))];
- struct cmsghdr *cmsg;
- struct msghdr msgh;
- struct iovec iov;
- fd_set recvFDs;
- char data[32];
-
- if (!skdata) {
- fprintf(stderr, "<%s>: socketdata is NULL\n", __func__);
- return -1;
- }
-
- FD_ZERO(&recvFDs);
- FD_SET(0, &recvFDs);
- FD_SET(sockfd, &recvFDs);
-
- ret = select(sockfd+1, &recvFDs, NULL, NULL, NULL);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed select: <%s>\n",
- __func__, strerror(errno));
- return -1;
- }
-
- if (FD_ISSET(sockfd, &recvFDs)) {
- len = sizeof(buffd);
- memset(&msgh, 0, sizeof(msgh));
- msgh.msg_control = &cmsg_b;
- msgh.msg_controllen = CMSG_LEN(len);
- iov.iov_base = data;
- iov.iov_len = sizeof(data)-1;
- msgh.msg_iov = &iov;
- msgh.msg_iovlen = 1;
- cmsg = CMSG_FIRSTHDR(&msgh);
- cmsg->cmsg_level = SOL_SOCKET;
- cmsg->cmsg_type = SCM_RIGHTS;
- cmsg->cmsg_len = CMSG_LEN(len);
-
- ret = recvmsg(sockfd, &msgh, MSG_DONTWAIT);
- if (ret < 0) {
- fprintf(stderr, "<%s>: Failed recvmsg: <%s>\n",
- __func__, strerror(errno));
- return -1;
- }
-
- memcpy(&buffd, CMSG_DATA(cmsg), len);
- skdata->data = buffd;
- skdata->len = len;
- }
- return 0;
-}
-
-int closesocket(int sockfd, char *name)
-{
- char sockname[MAX_SOCK_NAME_LEN];
-
- if (sockfd)
- close(sockfd);
- sprintf(sockname, "/tmp/%s", name);
- unlink(sockname);
- shutdown(sockfd, 2);
-
- return 0;
-}
diff --git a/tools/testing/selftests/android/ion/ipcsocket.h b/tools/testing/selftests/android/ion/ipcsocket.h
deleted file mode 100644
index b3e84498a8a1..000000000000
--- a/tools/testing/selftests/android/ion/ipcsocket.h
+++ /dev/null
@@ -1,35 +0,0 @@
-
-#ifndef _IPCSOCKET_H
-#define _IPCSOCKET_H
-
-
-#define MAX_SOCK_NAME_LEN 64
-
-char sock_name[MAX_SOCK_NAME_LEN];
-
-/* This structure is responsible for holding the IPC data
- * data: hold the buffer fd
- * len: just the length of 32-bit integer fd
- */
-struct socketdata {
- int data;
- unsigned int len;
-};
-
-/* This API is used to open the IPC socket connection
- * name: implies a unique socket name in the system
- * connecttype: implies server(0) or client(1)
- */
-int opensocket(int *sockfd, const char *name, int connecttype);
-
-/* This is the API to send socket data over IPC socket */
-int sendtosocket(int sockfd, struct socketdata *data);
-
-/* This is the API to receive socket data over IPC socket */
-int receivefromsocket(int sockfd, struct socketdata *data);
-
-/* This is the API to close the socket connection */
-int closesocket(int sockfd, char *name);
-
-
-#endif
diff --git a/tools/testing/selftests/android/run.sh b/tools/testing/selftests/android/run.sh
deleted file mode 100755
index dd8edf291454..000000000000
--- a/tools/testing/selftests/android/run.sh
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-(cd ion; ./ion_test.sh)
diff --git a/tools/testing/selftests/arm64/Makefile b/tools/testing/selftests/arm64/Makefile
index 93b567d23c8b..28b93cab8c0d 100644
--- a/tools/testing/selftests/arm64/Makefile
+++ b/tools/testing/selftests/arm64/Makefile
@@ -4,7 +4,7 @@
ARCH ?= $(shell uname -m 2>/dev/null || echo not)
ifneq (,$(filter $(ARCH),aarch64 arm64))
-ARM64_SUBTARGETS ?= tags signal
+ARM64_SUBTARGETS ?= tags signal pauth fp mte bti abi
else
ARM64_SUBTARGETS :=
endif
@@ -17,16 +17,9 @@ top_srcdir = $(realpath ../../../../)
# Additional include paths needed by kselftest.h and local headers
CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
-# Guessing where the Kernel headers could have been installed
-# depending on ENV config
-ifeq ($(KBUILD_OUTPUT),)
-khdr_dir = $(top_srcdir)/usr/include
-else
-# the KSFT preferred location when KBUILD_OUTPUT is set
-khdr_dir = $(KBUILD_OUTPUT)/kselftest/usr/include
-endif
+CFLAGS += $(KHDR_INCLUDES)
-CFLAGS += -I$(khdr_dir)
+CFLAGS += -I$(top_srcdir)/tools/include
export CFLAGS
export top_srcdir
@@ -51,7 +44,7 @@ run_tests: all
done
# Avoid any output on non arm64 on emit_tests
-emit_tests: all
+emit_tests:
@for DIR in $(ARM64_SUBTARGETS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
make OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
diff --git a/tools/testing/selftests/arm64/abi/.gitignore b/tools/testing/selftests/arm64/abi/.gitignore
new file mode 100644
index 000000000000..44f8b80f37e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/.gitignore
@@ -0,0 +1,4 @@
+hwcap
+ptrace
+syscall-abi
+tpidr2
diff --git a/tools/testing/selftests/arm64/abi/Makefile b/tools/testing/selftests/arm64/abi/Makefile
new file mode 100644
index 000000000000..a6d30c620908
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+
+TEST_GEN_PROGS := hwcap ptrace syscall-abi tpidr2
+
+include ../../lib.mk
+
+$(OUTPUT)/syscall-abi: syscall-abi.c syscall-abi-asm.S
+
+# Build with nolibc since TPIDR2 is intended to be actively managed by
+# libc and we're trying to test the functionality that it depends on here.
+$(OUTPUT)/tpidr2: tpidr2.c
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -static -include ../../../../include/nolibc/nolibc.h \
+ -ffreestanding -Wall $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
new file mode 100644
index 000000000000..d8909b2b535a
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -0,0 +1,995 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#include <errno.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include "../../kselftest.h"
+
+#define TESTS_PER_HWCAP 3
+
+/*
+ * Function expected to generate exception when the feature is not
+ * supported and return when it is supported. If the specific exception
+ * is generated then the handler must be able to skip over the
+ * instruction safely.
+ *
+ * Note that it is expected that for many architecture extensions
+ * there are no specific traps due to no architecture state being
+ * added so we may not fault if running on a kernel which doesn't know
+ * to add the hwcap.
+ */
+typedef void (*sig_fn)(void);
+
+static void aes_sigill(void)
+{
+ /* AESE V0.16B, V0.16B */
+ asm volatile(".inst 0x4e284800" : : : );
+}
+
+static void atomics_sigill(void)
+{
+ /* STADD W0, [SP] */
+ asm volatile(".inst 0xb82003ff" : : : );
+}
+
+static void crc32_sigill(void)
+{
+ /* CRC32W W0, W0, W1 */
+ asm volatile(".inst 0x1ac14800" : : : );
+}
+
+static void cssc_sigill(void)
+{
+ /* CNT x0, x0 */
+ asm volatile(".inst 0xdac01c00" : : : "x0");
+}
+
+static void f8cvt_sigill(void)
+{
+ /* FSCALE V0.4H, V0.4H, V0.4H */
+ asm volatile(".inst 0x2ec03c00");
+}
+
+static void f8dp2_sigill(void)
+{
+ /* FDOT V0.4H, V0.4H, V0.5H */
+ asm volatile(".inst 0xe40fc00");
+}
+
+static void f8dp4_sigill(void)
+{
+ /* FDOT V0.2S, V0.2S, V0.2S */
+ asm volatile(".inst 0xe00fc00");
+}
+
+static void f8fma_sigill(void)
+{
+ /* FMLALB V0.8H, V0.16B, V0.16B */
+ asm volatile(".inst 0xec0fc00");
+}
+
+static void faminmax_sigill(void)
+{
+ /* FAMIN V0.4H, V0.4H, V0.4H */
+ asm volatile(".inst 0x2ec01c00");
+}
+
+static void fp_sigill(void)
+{
+ asm volatile("fmov s0, #1");
+}
+
+static void fpmr_sigill(void)
+{
+ asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0");
+}
+
+static void ilrcpc_sigill(void)
+{
+ /* LDAPUR W0, [SP, #8] */
+ asm volatile(".inst 0x994083e0" : : : );
+}
+
+static void jscvt_sigill(void)
+{
+ /* FJCVTZS W0, D0 */
+ asm volatile(".inst 0x1e7e0000" : : : );
+}
+
+static void lrcpc_sigill(void)
+{
+ /* LDAPR W0, [SP, #0] */
+ asm volatile(".inst 0xb8bfc3e0" : : : );
+}
+
+static void lse128_sigill(void)
+{
+ u64 __attribute__ ((aligned (16))) mem[2] = { 10, 20 };
+ register u64 *memp asm ("x0") = mem;
+ register u64 val0 asm ("x1") = 5;
+ register u64 val1 asm ("x2") = 4;
+
+ /* SWPP X1, X2, [X0] */
+ asm volatile(".inst 0x19228001"
+ : "+r" (memp), "+r" (val0), "+r" (val1)
+ :
+ : "cc", "memory");
+}
+
+static void lut_sigill(void)
+{
+ /* LUTI2 V0.16B, { V0.16B }, V[0] */
+ asm volatile(".inst 0x4e801000");
+}
+
+static void mops_sigill(void)
+{
+ char dst[1], src[1];
+ register char *dstp asm ("x0") = dst;
+ register char *srcp asm ("x1") = src;
+ register long size asm ("x2") = 1;
+
+ /* CPYP [x0]!, [x1]!, x2! */
+ asm volatile(".inst 0x1d010440"
+ : "+r" (dstp), "+r" (srcp), "+r" (size)
+ :
+ : "cc", "memory");
+}
+
+static void pmull_sigill(void)
+{
+ /* PMULL V0.1Q, V0.1D, V0.1D */
+ asm volatile(".inst 0x0ee0e000" : : : );
+}
+
+static void rng_sigill(void)
+{
+ asm volatile("mrs x0, S3_3_C2_C4_0" : : : "x0");
+}
+
+static void sha1_sigill(void)
+{
+ /* SHA1H S0, S0 */
+ asm volatile(".inst 0x5e280800" : : : );
+}
+
+static void sha2_sigill(void)
+{
+ /* SHA256H Q0, Q0, V0.4S */
+ asm volatile(".inst 0x5e004000" : : : );
+}
+
+static void sha512_sigill(void)
+{
+ /* SHA512H Q0, Q0, V0.2D */
+ asm volatile(".inst 0xce608000" : : : );
+}
+
+static void sme_sigill(void)
+{
+ /* RDSVL x0, #0 */
+ asm volatile(".inst 0x04bf5800" : : : "x0");
+}
+
+static void sme2_sigill(void)
+{
+ /* SMSTART ZA */
+ asm volatile("msr S0_3_C4_C5_3, xzr" : : : );
+
+ /* ZERO ZT0 */
+ asm volatile(".inst 0xc0480001" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme2p1_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* BFCLAMP { Z0.H - Z1.H }, Z0.H, Z0.H */
+ asm volatile(".inst 0xc120C000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smei16i32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* SMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+ asm volatile(".inst 0xa0800000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smebi32i32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* BMOPA ZA0.S, P0/M, P0/M, Z0.B, Z0.B */
+ asm volatile(".inst 0x80800008" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smeb16b16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+ asm volatile(".inst 0xC1E41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef16f16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FADD ZA.H[W0, 0], { Z0.H-Z1.H } */
+ asm volatile(".inst 0xc1a41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef8f16_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT ZA.H[W0, 0], Z0.B-Z1.B, Z0.B-Z1.B */
+ asm volatile(".inst 0xc1a01020" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smef8f32_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT ZA.S[W0, 0], { Z0.B-Z1.B }, Z0.B[0] */
+ asm volatile(".inst 0xc1500038" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smelutv2_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* LUTI4 { Z0.B-Z3.B }, ZT0, { Z0-Z1 } */
+ asm volatile(".inst 0xc08b0000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8dp2_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT Z0.H, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0x64204400" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8dp4_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FDOT Z0.S, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0xc1a41C00" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesf8fma_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FMLALB V0.8H, V0.16B, V0.16B */
+ asm volatile(".inst 0xec0fc00");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sve_sigill(void)
+{
+ /* RDVL x0, #0 */
+ asm volatile(".inst 0x04bf5000" : : : "x0");
+}
+
+static void sve2_sigill(void)
+{
+ /* SQABS Z0.b, P0/M, Z0.B */
+ asm volatile(".inst 0x4408A000" : : : "z0");
+}
+
+static void sve2p1_sigill(void)
+{
+ /* BFADD Z0.H, Z0.H, Z0.H */
+ asm volatile(".inst 0x65000000" : : : "z0");
+}
+
+static void sveaes_sigill(void)
+{
+ /* AESD z0.b, z0.b, z0.b */
+ asm volatile(".inst 0x4522e400" : : : "z0");
+}
+
+static void sveb16b16_sigill(void)
+{
+ /* BFADD ZA.H[W0, 0], {Z0.H-Z1.H} */
+ asm volatile(".inst 0xC1E41C00" : : : );
+}
+
+static void svepmull_sigill(void)
+{
+ /* PMULLB Z0.Q, Z0.D, Z0.D */
+ asm volatile(".inst 0x45006800" : : : "z0");
+}
+
+static void svebitperm_sigill(void)
+{
+ /* BDEP Z0.B, Z0.B, Z0.B */
+ asm volatile(".inst 0x4500b400" : : : "z0");
+}
+
+static void svesha3_sigill(void)
+{
+ /* EOR3 Z0.D, Z0.D, Z0.D, Z0.D */
+ asm volatile(".inst 0x4203800" : : : "z0");
+}
+
+static void svesm4_sigill(void)
+{
+ /* SM4E Z0.S, Z0.S, Z0.S */
+ asm volatile(".inst 0x4523e000" : : : "z0");
+}
+
+static void svei8mm_sigill(void)
+{
+ /* USDOT Z0.S, Z0.B, Z0.B[0] */
+ asm volatile(".inst 0x44a01800" : : : "z0");
+}
+
+static void svef32mm_sigill(void)
+{
+ /* FMMLA Z0.S, Z0.S, Z0.S */
+ asm volatile(".inst 0x64a0e400" : : : "z0");
+}
+
+static void svef64mm_sigill(void)
+{
+ /* FMMLA Z0.D, Z0.D, Z0.D */
+ asm volatile(".inst 0x64e0e400" : : : "z0");
+}
+
+static void svebf16_sigill(void)
+{
+ /* BFCVT Z0.H, P0/M, Z0.S */
+ asm volatile(".inst 0x658aa000" : : : "z0");
+}
+
+static void hbc_sigill(void)
+{
+ /* BC.EQ +4 */
+ asm volatile("cmp xzr, xzr\n"
+ ".inst 0x54000030" : : : "cc");
+}
+
+static void uscat_sigbus(void)
+{
+ /* unaligned atomic access */
+ asm volatile("ADD x1, sp, #2" : : : );
+ /* STADD W0, [X1] */
+ asm volatile(".inst 0xb820003f" : : : );
+}
+
+static void lrcpc3_sigill(void)
+{
+ int data[2] = { 1, 2 };
+
+ register int *src asm ("x0") = data;
+ register int data0 asm ("w2") = 0;
+ register int data1 asm ("w3") = 0;
+
+ /* LDIAPP w2, w3, [x0] */
+ asm volatile(".inst 0x99431802"
+ : "=r" (data0), "=r" (data1) : "r" (src) :);
+}
+
+static const struct hwcap_data {
+ const char *name;
+ unsigned long at_hwcap;
+ unsigned long hwcap_bit;
+ const char *cpuinfo;
+ sig_fn sigill_fn;
+ bool sigill_reliable;
+ sig_fn sigbus_fn;
+ bool sigbus_reliable;
+} hwcaps[] = {
+ {
+ .name = "AES",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_AES,
+ .cpuinfo = "aes",
+ .sigill_fn = aes_sigill,
+ },
+ {
+ .name = "CRC32",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_CRC32,
+ .cpuinfo = "crc32",
+ .sigill_fn = crc32_sigill,
+ },
+ {
+ .name = "CSSC",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_CSSC,
+ .cpuinfo = "cssc",
+ .sigill_fn = cssc_sigill,
+ },
+ {
+ .name = "F8CVT",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8CVT,
+ .cpuinfo = "f8cvt",
+ .sigill_fn = f8cvt_sigill,
+ },
+ {
+ .name = "F8DP4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8DP4,
+ .cpuinfo = "f8dp4",
+ .sigill_fn = f8dp4_sigill,
+ },
+ {
+ .name = "F8DP2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8DP2,
+ .cpuinfo = "f8dp4",
+ .sigill_fn = f8dp2_sigill,
+ },
+ {
+ .name = "F8E5M2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8E5M2,
+ .cpuinfo = "f8e5m2",
+ },
+ {
+ .name = "F8E4M3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8E4M3,
+ .cpuinfo = "f8e4m3",
+ },
+ {
+ .name = "F8FMA",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_F8FMA,
+ .cpuinfo = "f8fma",
+ .sigill_fn = f8fma_sigill,
+ },
+ {
+ .name = "FAMINMAX",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_FAMINMAX,
+ .cpuinfo = "faminmax",
+ .sigill_fn = faminmax_sigill,
+ },
+ {
+ .name = "FP",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_FP,
+ .cpuinfo = "fp",
+ .sigill_fn = fp_sigill,
+ },
+ {
+ .name = "FPMR",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_FPMR,
+ .cpuinfo = "fpmr",
+ .sigill_fn = fpmr_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "JSCVT",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_JSCVT,
+ .cpuinfo = "jscvt",
+ .sigill_fn = jscvt_sigill,
+ },
+ {
+ .name = "LRCPC",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_LRCPC,
+ .cpuinfo = "lrcpc",
+ .sigill_fn = lrcpc_sigill,
+ },
+ {
+ .name = "LRCPC2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_ILRCPC,
+ .cpuinfo = "ilrcpc",
+ .sigill_fn = ilrcpc_sigill,
+ },
+ {
+ .name = "LRCPC3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LRCPC3,
+ .cpuinfo = "lrcpc3",
+ .sigill_fn = lrcpc3_sigill,
+ },
+ {
+ .name = "LSE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_ATOMICS,
+ .cpuinfo = "atomics",
+ .sigill_fn = atomics_sigill,
+ },
+ {
+ .name = "LSE2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_USCAT,
+ .cpuinfo = "uscat",
+ .sigill_fn = atomics_sigill,
+ .sigbus_fn = uscat_sigbus,
+ .sigbus_reliable = true,
+ },
+ {
+ .name = "LSE128",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LSE128,
+ .cpuinfo = "lse128",
+ .sigill_fn = lse128_sigill,
+ },
+ {
+ .name = "LUT",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_LUT,
+ .cpuinfo = "lut",
+ .sigill_fn = lut_sigill,
+ },
+ {
+ .name = "MOPS",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_MOPS,
+ .cpuinfo = "mops",
+ .sigill_fn = mops_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "PMULL",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_PMULL,
+ .cpuinfo = "pmull",
+ .sigill_fn = pmull_sigill,
+ },
+ {
+ .name = "RNG",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_RNG,
+ .cpuinfo = "rng",
+ .sigill_fn = rng_sigill,
+ },
+ {
+ .name = "RPRFM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_RPRFM,
+ .cpuinfo = "rprfm",
+ },
+ {
+ .name = "SHA1",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA1,
+ .cpuinfo = "sha1",
+ .sigill_fn = sha1_sigill,
+ },
+ {
+ .name = "SHA2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA2,
+ .cpuinfo = "sha2",
+ .sigill_fn = sha2_sigill,
+ },
+ {
+ .name = "SHA512",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SHA512,
+ .cpuinfo = "sha512",
+ .sigill_fn = sha512_sigill,
+ },
+ {
+ .name = "SME",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME,
+ .cpuinfo = "sme",
+ .sigill_fn = sme_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SME2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME2,
+ .cpuinfo = "sme2",
+ .sigill_fn = sme2_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SME 2.1",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME2P1,
+ .cpuinfo = "sme2p1",
+ .sigill_fn = sme2p1_sigill,
+ },
+ {
+ .name = "SME I16I32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_I16I32,
+ .cpuinfo = "smei16i32",
+ .sigill_fn = smei16i32_sigill,
+ },
+ {
+ .name = "SME BI32I32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_BI32I32,
+ .cpuinfo = "smebi32i32",
+ .sigill_fn = smebi32i32_sigill,
+ },
+ {
+ .name = "SME B16B16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_B16B16,
+ .cpuinfo = "smeb16b16",
+ .sigill_fn = smeb16b16_sigill,
+ },
+ {
+ .name = "SME F16F16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F16F16,
+ .cpuinfo = "smef16f16",
+ .sigill_fn = smef16f16_sigill,
+ },
+ {
+ .name = "SME F8F16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F8F16,
+ .cpuinfo = "smef8f16",
+ .sigill_fn = smef8f16_sigill,
+ },
+ {
+ .name = "SME F8F32",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_F8F32,
+ .cpuinfo = "smef8f32",
+ .sigill_fn = smef8f32_sigill,
+ },
+ {
+ .name = "SME LUTV2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_LUTV2,
+ .cpuinfo = "smelutv2",
+ .sigill_fn = smelutv2_sigill,
+ },
+ {
+ .name = "SME SF8FMA",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8FMA,
+ .cpuinfo = "smesf8fma",
+ .sigill_fn = smesf8fma_sigill,
+ },
+ {
+ .name = "SME SF8DP2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8DP2,
+ .cpuinfo = "smesf8dp2",
+ .sigill_fn = smesf8dp2_sigill,
+ },
+ {
+ .name = "SME SF8DP4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SME_SF8DP4,
+ .cpuinfo = "smesf8dp4",
+ .sigill_fn = smesf8dp4_sigill,
+ },
+ {
+ .name = "SVE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE,
+ .cpuinfo = "sve",
+ .sigill_fn = sve_sigill,
+ .sigill_reliable = true,
+ },
+ {
+ .name = "SVE 2",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE2,
+ .cpuinfo = "sve2",
+ .sigill_fn = sve2_sigill,
+ },
+ {
+ .name = "SVE 2.1",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE2P1,
+ .cpuinfo = "sve2p1",
+ .sigill_fn = sve2p1_sigill,
+ },
+ {
+ .name = "SVE AES",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEAES,
+ .cpuinfo = "sveaes",
+ .sigill_fn = sveaes_sigill,
+ },
+ {
+ .name = "SVE2 B16B16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE_B16B16,
+ .cpuinfo = "sveb16b16",
+ .sigill_fn = sveb16b16_sigill,
+ },
+ {
+ .name = "SVE2 PMULL",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEPMULL,
+ .cpuinfo = "svepmull",
+ .sigill_fn = svepmull_sigill,
+ },
+ {
+ .name = "SVE2 BITPERM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEBITPERM,
+ .cpuinfo = "svebitperm",
+ .sigill_fn = svebitperm_sigill,
+ },
+ {
+ .name = "SVE2 SHA3",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVESHA3,
+ .cpuinfo = "svesha3",
+ .sigill_fn = svesha3_sigill,
+ },
+ {
+ .name = "SVE2 SM4",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVESM4,
+ .cpuinfo = "svesm4",
+ .sigill_fn = svesm4_sigill,
+ },
+ {
+ .name = "SVE2 I8MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEI8MM,
+ .cpuinfo = "svei8mm",
+ .sigill_fn = svei8mm_sigill,
+ },
+ {
+ .name = "SVE2 F32MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEF32MM,
+ .cpuinfo = "svef32mm",
+ .sigill_fn = svef32mm_sigill,
+ },
+ {
+ .name = "SVE2 F64MM",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEF64MM,
+ .cpuinfo = "svef64mm",
+ .sigill_fn = svef64mm_sigill,
+ },
+ {
+ .name = "SVE2 BF16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVEBF16,
+ .cpuinfo = "svebf16",
+ .sigill_fn = svebf16_sigill,
+ },
+ {
+ .name = "SVE2 EBF16",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_SVE_EBF16,
+ .cpuinfo = "sveebf16",
+ },
+ {
+ .name = "HBC",
+ .at_hwcap = AT_HWCAP2,
+ .hwcap_bit = HWCAP2_HBC,
+ .cpuinfo = "hbc",
+ .sigill_fn = hbc_sigill,
+ .sigill_reliable = true,
+ },
+};
+
+typedef void (*sighandler_fn)(int, siginfo_t *, void *);
+
+#define DEF_SIGHANDLER_FUNC(SIG, NUM) \
+static bool seen_##SIG; \
+static void handle_##SIG(int sig, siginfo_t *info, void *context) \
+{ \
+ ucontext_t *uc = context; \
+ \
+ seen_##SIG = true; \
+ /* Skip over the offending instruction */ \
+ uc->uc_mcontext.pc += 4; \
+}
+
+DEF_SIGHANDLER_FUNC(sigill, SIGILL);
+DEF_SIGHANDLER_FUNC(sigbus, SIGBUS);
+
+bool cpuinfo_present(const char *name)
+{
+ FILE *f;
+ char buf[2048], name_space[30], name_newline[30];
+ char *s;
+
+ /*
+ * The feature should appear with a leading space and either a
+ * trailing space or a newline.
+ */
+ snprintf(name_space, sizeof(name_space), " %s ", name);
+ snprintf(name_newline, sizeof(name_newline), " %s\n", name);
+
+ f = fopen("/proc/cpuinfo", "r");
+ if (!f) {
+ ksft_print_msg("Failed to open /proc/cpuinfo\n");
+ return false;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ /* Features: line? */
+ if (strncmp(buf, "Features\t:", strlen("Features\t:")) != 0)
+ continue;
+
+ /* All CPUs should be symmetric, don't read any more */
+ fclose(f);
+
+ s = strstr(buf, name_space);
+ if (s)
+ return true;
+ s = strstr(buf, name_newline);
+ if (s)
+ return true;
+
+ return false;
+ }
+
+ ksft_print_msg("Failed to find Features in /proc/cpuinfo\n");
+ fclose(f);
+ return false;
+}
+
+static int install_sigaction(int signum, sighandler_fn handler)
+{
+ int ret;
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(signum, &sa, NULL);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to install SIGNAL handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ return ret;
+}
+
+static void uninstall_sigaction(int signum)
+{
+ if (sigaction(signum, NULL, NULL) < 0)
+ ksft_exit_fail_msg("Failed to uninstall SIGNAL handler: %s (%d)\n",
+ strerror(errno), errno);
+}
+
+#define DEF_INST_RAISE_SIG(SIG, NUM) \
+static bool inst_raise_##SIG(const struct hwcap_data *hwcap, \
+ bool have_hwcap) \
+{ \
+ if (!hwcap->SIG##_fn) { \
+ ksft_test_result_skip(#SIG"_%s\n", hwcap->name); \
+ /* assume that it would raise exception in default */ \
+ return true; \
+ } \
+ \
+ install_sigaction(NUM, handle_##SIG); \
+ \
+ seen_##SIG = false; \
+ hwcap->SIG##_fn(); \
+ \
+ if (have_hwcap) { \
+ /* Should be able to use the extension */ \
+ ksft_test_result(!seen_##SIG, \
+ #SIG"_%s\n", hwcap->name); \
+ } else if (hwcap->SIG##_reliable) { \
+ /* Guaranteed a SIGNAL */ \
+ ksft_test_result(seen_##SIG, \
+ #SIG"_%s\n", hwcap->name); \
+ } else { \
+ /* Missing SIGNAL might be fine */ \
+ ksft_print_msg(#SIG"_%sreported for %s\n", \
+ seen_##SIG ? "" : "not ", \
+ hwcap->name); \
+ ksft_test_result_skip(#SIG"_%s\n", \
+ hwcap->name); \
+ } \
+ \
+ uninstall_sigaction(NUM); \
+ return seen_##SIG; \
+}
+
+DEF_INST_RAISE_SIG(sigill, SIGILL);
+DEF_INST_RAISE_SIG(sigbus, SIGBUS);
+
+int main(void)
+{
+ int i;
+ const struct hwcap_data *hwcap;
+ bool have_cpuinfo, have_hwcap, raise_sigill;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(hwcaps) * TESTS_PER_HWCAP);
+
+ for (i = 0; i < ARRAY_SIZE(hwcaps); i++) {
+ hwcap = &hwcaps[i];
+
+ have_hwcap = getauxval(hwcap->at_hwcap) & hwcap->hwcap_bit;
+ have_cpuinfo = cpuinfo_present(hwcap->cpuinfo);
+
+ if (have_hwcap)
+ ksft_print_msg("%s present\n", hwcap->name);
+
+ ksft_test_result(have_hwcap == have_cpuinfo,
+ "cpuinfo_match_%s\n", hwcap->name);
+
+ /*
+ * Testing for SIGBUS only makes sense after make sure
+ * that the instruction does not cause a SIGILL signal.
+ */
+ raise_sigill = inst_raise_sigill(hwcap, have_hwcap);
+ if (!raise_sigill)
+ inst_raise_sigbus(hwcap, have_hwcap);
+ else
+ ksft_test_result_skip("sigbus_%s\n", hwcap->name);
+ }
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/abi/ptrace.c b/tools/testing/selftests/arm64/abi/ptrace.c
new file mode 100644
index 000000000000..abe4d58d731d
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/ptrace.c
@@ -0,0 +1,271 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+#define EXPECTED_TESTS 11
+
+#define MAX_TPIDRS 2
+
+static bool have_sme(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+static void test_tpidr(pid_t child)
+{
+ uint64_t read_val[MAX_TPIDRS];
+ uint64_t write_val[MAX_TPIDRS];
+ struct iovec read_iov, write_iov;
+ bool test_tpidr2 = false;
+ int ret, i;
+
+ read_iov.iov_base = read_val;
+ write_iov.iov_base = write_val;
+
+ /* Should be able to read a single TPIDR... */
+ read_iov.iov_len = sizeof(uint64_t);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ ksft_test_result(ret == 0, "read_tpidr_one\n");
+
+ /* ...write a new value.. */
+ write_iov.iov_len = sizeof(uint64_t);
+ write_val[0] = read_val[0]++;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+ ksft_test_result(ret == 0, "write_tpidr_one\n");
+
+ /* ...then read it back */
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ ksft_test_result(ret == 0 && write_val[0] == read_val[0],
+ "verify_tpidr_one\n");
+
+ /* If we have TPIDR2 we should be able to read it */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+ if (ret == 0) {
+ /* If we have SME there should be two TPIDRs */
+ if (read_iov.iov_len >= sizeof(read_val))
+ test_tpidr2 = true;
+
+ if (have_sme() && test_tpidr2) {
+ ksft_test_result(test_tpidr2, "count_tpidrs\n");
+ } else {
+ ksft_test_result(read_iov.iov_len % sizeof(uint64_t) == 0,
+ "count_tpidrs\n");
+ }
+ } else {
+ ksft_test_result_fail("count_tpidrs\n");
+ }
+
+ if (test_tpidr2) {
+ /* Try to write new values to all known TPIDRs... */
+ write_iov.iov_len = sizeof(write_val);
+ for (i = 0; i < MAX_TPIDRS; i++)
+ write_val[i] = read_val[i] + 1;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+ ksft_test_result(ret == 0 &&
+ write_iov.iov_len == sizeof(write_val),
+ "tpidr2_write\n");
+
+ /* ...then read them back */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS, &read_iov);
+
+ if (have_sme()) {
+ /* Should read back the written value */
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ memcmp(read_val, write_val,
+ sizeof(read_val)) == 0,
+ "tpidr2_read\n");
+ } else {
+ /* TPIDR2 should read as zero */
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ read_val[0] == write_val[0] &&
+ read_val[1] == 0,
+ "tpidr2_read\n");
+ }
+
+ /* Writing only TPIDR... */
+ write_iov.iov_len = sizeof(uint64_t);
+ memcpy(write_val, read_val, sizeof(read_val));
+ write_val[0] += 1;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_TLS, &write_iov);
+
+ if (ret == 0) {
+ /* ...should leave TPIDR2 untouched */
+ read_iov.iov_len = sizeof(read_val);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_TLS,
+ &read_iov);
+
+ ksft_test_result(ret == 0 &&
+ read_iov.iov_len >= sizeof(read_val) &&
+ memcmp(read_val, write_val,
+ sizeof(read_val)) == 0,
+ "write_tpidr_only\n");
+ } else {
+ ksft_test_result_fail("write_tpidr_only\n");
+ }
+ } else {
+ ksft_test_result_skip("tpidr2_write\n");
+ ksft_test_result_skip("tpidr2_read\n");
+ ksft_test_result_skip("write_tpidr_only\n");
+ }
+}
+
+static void test_hw_debug(pid_t child, int type, const char *type_name)
+{
+ struct user_hwdebug_state state;
+ struct iovec iov;
+ int slots, arch, ret;
+
+ iov.iov_len = sizeof(state);
+ iov.iov_base = &state;
+
+ /* Should be able to read the values */
+ ret = ptrace(PTRACE_GETREGSET, child, type, &iov);
+ ksft_test_result(ret == 0, "read_%s\n", type_name);
+
+ if (ret == 0) {
+ /* Low 8 bits is the number of slots, next 4 bits the arch */
+ slots = state.dbg_info & 0xff;
+ arch = (state.dbg_info >> 8) & 0xf;
+
+ ksft_print_msg("%s version %d with %d slots\n", type_name,
+ arch, slots);
+
+ /* Zero is not currently architecturally valid */
+ ksft_test_result(arch, "%s_arch_set\n", type_name);
+ } else {
+ ksft_test_result_skip("%s_arch_set\n");
+ }
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ test_tpidr(child);
+ test_hw_debug(child, NT_ARM_HW_WATCH, "NT_ARM_HW_WATCH");
+ test_hw_debug(child, NT_ARM_HW_BREAK, "NT_ARM_HW_BREAK");
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi-asm.S b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
new file mode 100644
index 000000000000..df3230fdac39
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi-asm.S
@@ -0,0 +1,362 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+//
+// Assembly portion of the syscall ABI test
+
+//
+// Load values from memory into registers, invoke a syscall and save the
+// register values back to memory for later checking. The syscall to be
+// invoked is configured in x8 of the input GPR data.
+//
+// x0: SVE VL, 0 for FP only
+// x1: SME VL
+//
+// GPRs: gpr_in, gpr_out
+// FPRs: fpr_in, fpr_out
+// Zn: z_in, z_out
+// Pn: p_in, p_out
+// FFR: ffr_in, ffr_out
+// ZA: za_in, za_out
+// SVCR: svcr_in, svcr_out
+
+#include "syscall-abi.h"
+
+.arch_extension sve
+
+#define ID_AA64SMFR0_EL1_SMEver_SHIFT 56
+#define ID_AA64SMFR0_EL1_SMEver_WIDTH 4
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ .inst 0xe11f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ .inst 0xe13f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+.globl do_syscall
+do_syscall:
+ // Store callee saved registers x19-x29 (80 bytes) plus x0 and x1
+ stp x29, x30, [sp, #-112]!
+ mov x29, sp
+ stp x0, x1, [sp, #16]
+ stp x19, x20, [sp, #32]
+ stp x21, x22, [sp, #48]
+ stp x23, x24, [sp, #64]
+ stp x25, x26, [sp, #80]
+ stp x27, x28, [sp, #96]
+
+ // Set SVCR if we're doing SME
+ cbz x1, 1f
+ adrp x2, svcr_in
+ ldr x2, [x2, :lo12:svcr_in]
+ msr S3_3_C4_C2_2, x2
+1:
+
+ // Load ZA and ZT0 if enabled - uses x12 as scratch due to SME LDR
+ tbz x2, #SVCR_ZA_SHIFT, 1f
+ mov w12, #0
+ ldr x2, =za_in
+2: _ldr_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 2b
+
+ // ZT0
+ mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+ #ID_AA64SMFR0_EL1_SMEver_WIDTH
+ cbz x2, 1f
+ adrp x2, zt_in
+ add x2, x2, :lo12:zt_in
+ _ldr_zt 2
+1:
+
+ // Load GPRs x8-x28, and save our SP/FP for later comparison
+ ldr x2, =gpr_in
+ add x2, x2, #64
+ ldp x8, x9, [x2], #16
+ ldp x10, x11, [x2], #16
+ ldp x12, x13, [x2], #16
+ ldp x14, x15, [x2], #16
+ ldp x16, x17, [x2], #16
+ ldp x18, x19, [x2], #16
+ ldp x20, x21, [x2], #16
+ ldp x22, x23, [x2], #16
+ ldp x24, x25, [x2], #16
+ ldp x26, x27, [x2], #16
+ ldr x28, [x2], #8
+ str x29, [x2], #8 // FP
+ str x30, [x2], #8 // LR
+
+ // Load FPRs if we're not doing neither SVE nor streaming SVE
+ cbnz x0, 1f
+ ldr x2, =svcr_in
+ tbnz x2, #SVCR_SM_SHIFT, 1f
+
+ ldr x2, =fpr_in
+ ldp q0, q1, [x2]
+ ldp q2, q3, [x2, #16 * 2]
+ ldp q4, q5, [x2, #16 * 4]
+ ldp q6, q7, [x2, #16 * 6]
+ ldp q8, q9, [x2, #16 * 8]
+ ldp q10, q11, [x2, #16 * 10]
+ ldp q12, q13, [x2, #16 * 12]
+ ldp q14, q15, [x2, #16 * 14]
+ ldp q16, q17, [x2, #16 * 16]
+ ldp q18, q19, [x2, #16 * 18]
+ ldp q20, q21, [x2, #16 * 20]
+ ldp q22, q23, [x2, #16 * 22]
+ ldp q24, q25, [x2, #16 * 24]
+ ldp q26, q27, [x2, #16 * 26]
+ ldp q28, q29, [x2, #16 * 28]
+ ldp q30, q31, [x2, #16 * 30]
+
+ b 2f
+1:
+
+ // Load the SVE registers if we're doing SVE/SME
+
+ ldr x2, =z_in
+ ldr z0, [x2, #0, MUL VL]
+ ldr z1, [x2, #1, MUL VL]
+ ldr z2, [x2, #2, MUL VL]
+ ldr z3, [x2, #3, MUL VL]
+ ldr z4, [x2, #4, MUL VL]
+ ldr z5, [x2, #5, MUL VL]
+ ldr z6, [x2, #6, MUL VL]
+ ldr z7, [x2, #7, MUL VL]
+ ldr z8, [x2, #8, MUL VL]
+ ldr z9, [x2, #9, MUL VL]
+ ldr z10, [x2, #10, MUL VL]
+ ldr z11, [x2, #11, MUL VL]
+ ldr z12, [x2, #12, MUL VL]
+ ldr z13, [x2, #13, MUL VL]
+ ldr z14, [x2, #14, MUL VL]
+ ldr z15, [x2, #15, MUL VL]
+ ldr z16, [x2, #16, MUL VL]
+ ldr z17, [x2, #17, MUL VL]
+ ldr z18, [x2, #18, MUL VL]
+ ldr z19, [x2, #19, MUL VL]
+ ldr z20, [x2, #20, MUL VL]
+ ldr z21, [x2, #21, MUL VL]
+ ldr z22, [x2, #22, MUL VL]
+ ldr z23, [x2, #23, MUL VL]
+ ldr z24, [x2, #24, MUL VL]
+ ldr z25, [x2, #25, MUL VL]
+ ldr z26, [x2, #26, MUL VL]
+ ldr z27, [x2, #27, MUL VL]
+ ldr z28, [x2, #28, MUL VL]
+ ldr z29, [x2, #29, MUL VL]
+ ldr z30, [x2, #30, MUL VL]
+ ldr z31, [x2, #31, MUL VL]
+
+ // Only set a non-zero FFR, test patterns must be zero since the
+ // syscall should clear it - this lets us handle FA64.
+ ldr x2, =ffr_in
+ ldr p0, [x2]
+ ldr x2, [x2, #0]
+ cbz x2, 1f
+ wrffr p0.b
+1:
+
+ ldr x2, =p_in
+ ldr p0, [x2, #0, MUL VL]
+ ldr p1, [x2, #1, MUL VL]
+ ldr p2, [x2, #2, MUL VL]
+ ldr p3, [x2, #3, MUL VL]
+ ldr p4, [x2, #4, MUL VL]
+ ldr p5, [x2, #5, MUL VL]
+ ldr p6, [x2, #6, MUL VL]
+ ldr p7, [x2, #7, MUL VL]
+ ldr p8, [x2, #8, MUL VL]
+ ldr p9, [x2, #9, MUL VL]
+ ldr p10, [x2, #10, MUL VL]
+ ldr p11, [x2, #11, MUL VL]
+ ldr p12, [x2, #12, MUL VL]
+ ldr p13, [x2, #13, MUL VL]
+ ldr p14, [x2, #14, MUL VL]
+ ldr p15, [x2, #15, MUL VL]
+2:
+
+ // Do the syscall
+ svc #0
+
+ // Save GPRs x8-x30
+ ldr x2, =gpr_out
+ add x2, x2, #64
+ stp x8, x9, [x2], #16
+ stp x10, x11, [x2], #16
+ stp x12, x13, [x2], #16
+ stp x14, x15, [x2], #16
+ stp x16, x17, [x2], #16
+ stp x18, x19, [x2], #16
+ stp x20, x21, [x2], #16
+ stp x22, x23, [x2], #16
+ stp x24, x25, [x2], #16
+ stp x26, x27, [x2], #16
+ stp x28, x29, [x2], #16
+ str x30, [x2]
+
+ // Restore x0 and x1 for feature checks
+ ldp x0, x1, [sp, #16]
+
+ // Save FPSIMD state
+ ldr x2, =fpr_out
+ stp q0, q1, [x2]
+ stp q2, q3, [x2, #16 * 2]
+ stp q4, q5, [x2, #16 * 4]
+ stp q6, q7, [x2, #16 * 6]
+ stp q8, q9, [x2, #16 * 8]
+ stp q10, q11, [x2, #16 * 10]
+ stp q12, q13, [x2, #16 * 12]
+ stp q14, q15, [x2, #16 * 14]
+ stp q16, q17, [x2, #16 * 16]
+ stp q18, q19, [x2, #16 * 18]
+ stp q20, q21, [x2, #16 * 20]
+ stp q22, q23, [x2, #16 * 22]
+ stp q24, q25, [x2, #16 * 24]
+ stp q26, q27, [x2, #16 * 26]
+ stp q28, q29, [x2, #16 * 28]
+ stp q30, q31, [x2, #16 * 30]
+
+ // Save SVCR if we're doing SME
+ cbz x1, 1f
+ mrs x2, S3_3_C4_C2_2
+ adrp x3, svcr_out
+ str x2, [x3, :lo12:svcr_out]
+1:
+
+ // Save ZA if it's enabled - uses x12 as scratch due to SME STR
+ tbz x2, #SVCR_ZA_SHIFT, 1f
+ mov w12, #0
+ ldr x2, =za_out
+2: _str_za 12, 2
+ add x2, x2, x1
+ add x12, x12, #1
+ cmp x1, x12
+ bne 2b
+
+ // ZT0
+ mrs x2, S3_0_C0_C4_5 // ID_AA64SMFR0_EL1
+ ubfx x2, x2, #ID_AA64SMFR0_EL1_SMEver_SHIFT, \
+ #ID_AA64SMFR0_EL1_SMEver_WIDTH
+ cbz x2, 1f
+ adrp x2, zt_out
+ add x2, x2, :lo12:zt_out
+ _str_zt 2
+1:
+
+ // Save the SVE state if we have some
+ cbz x0, 1f
+
+ ldr x2, =z_out
+ str z0, [x2, #0, MUL VL]
+ str z1, [x2, #1, MUL VL]
+ str z2, [x2, #2, MUL VL]
+ str z3, [x2, #3, MUL VL]
+ str z4, [x2, #4, MUL VL]
+ str z5, [x2, #5, MUL VL]
+ str z6, [x2, #6, MUL VL]
+ str z7, [x2, #7, MUL VL]
+ str z8, [x2, #8, MUL VL]
+ str z9, [x2, #9, MUL VL]
+ str z10, [x2, #10, MUL VL]
+ str z11, [x2, #11, MUL VL]
+ str z12, [x2, #12, MUL VL]
+ str z13, [x2, #13, MUL VL]
+ str z14, [x2, #14, MUL VL]
+ str z15, [x2, #15, MUL VL]
+ str z16, [x2, #16, MUL VL]
+ str z17, [x2, #17, MUL VL]
+ str z18, [x2, #18, MUL VL]
+ str z19, [x2, #19, MUL VL]
+ str z20, [x2, #20, MUL VL]
+ str z21, [x2, #21, MUL VL]
+ str z22, [x2, #22, MUL VL]
+ str z23, [x2, #23, MUL VL]
+ str z24, [x2, #24, MUL VL]
+ str z25, [x2, #25, MUL VL]
+ str z26, [x2, #26, MUL VL]
+ str z27, [x2, #27, MUL VL]
+ str z28, [x2, #28, MUL VL]
+ str z29, [x2, #29, MUL VL]
+ str z30, [x2, #30, MUL VL]
+ str z31, [x2, #31, MUL VL]
+
+ ldr x2, =p_out
+ str p0, [x2, #0, MUL VL]
+ str p1, [x2, #1, MUL VL]
+ str p2, [x2, #2, MUL VL]
+ str p3, [x2, #3, MUL VL]
+ str p4, [x2, #4, MUL VL]
+ str p5, [x2, #5, MUL VL]
+ str p6, [x2, #6, MUL VL]
+ str p7, [x2, #7, MUL VL]
+ str p8, [x2, #8, MUL VL]
+ str p9, [x2, #9, MUL VL]
+ str p10, [x2, #10, MUL VL]
+ str p11, [x2, #11, MUL VL]
+ str p12, [x2, #12, MUL VL]
+ str p13, [x2, #13, MUL VL]
+ str p14, [x2, #14, MUL VL]
+ str p15, [x2, #15, MUL VL]
+
+ // Only save FFR if we wrote a value for SME
+ ldr x2, =ffr_in
+ ldr x2, [x2, #0]
+ cbz x2, 1f
+ ldr x2, =ffr_out
+ rdffr p0.b
+ str p0, [x2]
+1:
+
+ // Restore callee saved registers x19-x30
+ ldp x19, x20, [sp, #32]
+ ldp x21, x22, [sp, #48]
+ ldp x23, x24, [sp, #64]
+ ldp x25, x26, [sp, #80]
+ ldp x27, x28, [sp, #96]
+ ldp x29, x30, [sp], #112
+
+ // Clear SVCR if we were doing SME so future tests don't have ZA
+ cbz x1, 1f
+ msr S3_3_C4_C2_2, xzr
+1:
+
+ ret
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.c b/tools/testing/selftests/arm64/abi/syscall-abi.c
new file mode 100644
index 000000000000..d704511a0955
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+#include <asm/unistd.h>
+
+#include "../../kselftest.h"
+
+#include "syscall-abi.h"
+
+/*
+ * The kernel defines a much larger SVE_VQ_MAX than is expressable in
+ * the architecture, this creates a *lot* of overhead filling the
+ * buffers (especially ZA) on emulated platforms so use the actual
+ * architectural maximum instead.
+ */
+#define ARCH_SVE_VQ_MAX 16
+
+static int default_sme_vl;
+
+static int sve_vl_count;
+static unsigned int sve_vls[ARCH_SVE_VQ_MAX];
+static int sme_vl_count;
+static unsigned int sme_vls[ARCH_SVE_VQ_MAX];
+
+extern void do_syscall(int sve_vl, int sme_vl);
+
+static void fill_random(void *buf, size_t size)
+{
+ int i;
+ uint32_t *lbuf = buf;
+
+ /* random() returns a 32 bit number regardless of the size of long */
+ for (i = 0; i < size / sizeof(uint32_t); i++)
+ lbuf[i] = random();
+}
+
+/*
+ * We also repeat the test for several syscalls to try to expose different
+ * behaviour.
+ */
+static struct syscall_cfg {
+ int syscall_nr;
+ const char *name;
+} syscalls[] = {
+ { __NR_getpid, "getpid()" },
+ { __NR_sched_yield, "sched_yield()" },
+};
+
+#define NUM_GPR 31
+uint64_t gpr_in[NUM_GPR];
+uint64_t gpr_out[NUM_GPR];
+
+static void setup_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(gpr_in, sizeof(gpr_in));
+ gpr_in[8] = cfg->syscall_nr;
+ memset(gpr_out, 0, sizeof(gpr_out));
+}
+
+static int check_gpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl, uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ /*
+ * GPR x0-x7 may be clobbered, and all others should be preserved.
+ */
+ for (i = 9; i < ARRAY_SIZE(gpr_in); i++) {
+ if (gpr_in[i] != gpr_out[i]) {
+ ksft_print_msg("%s SVE VL %d mismatch in GPR %d: %llx != %llx\n",
+ cfg->name, sve_vl, i,
+ gpr_in[i], gpr_out[i]);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+#define NUM_FPR 32
+uint64_t fpr_in[NUM_FPR * 2];
+uint64_t fpr_out[NUM_FPR * 2];
+uint64_t fpr_zero[NUM_FPR * 2];
+
+static void setup_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(fpr_in, sizeof(fpr_in));
+ memset(fpr_out, 0, sizeof(fpr_out));
+}
+
+static int check_fpr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ if (!sve_vl && !(svcr & SVCR_SM_MASK)) {
+ for (i = 0; i < ARRAY_SIZE(fpr_in); i++) {
+ if (fpr_in[i] != fpr_out[i]) {
+ ksft_print_msg("%s Q%d/%d mismatch %llx != %llx\n",
+ cfg->name,
+ i / 2, i % 2,
+ fpr_in[i], fpr_out[i]);
+ errors++;
+ }
+ }
+ }
+
+ /*
+ * In streaming mode the whole register set should be cleared
+ * by the transition out of streaming mode.
+ */
+ if (svcr & SVCR_SM_MASK) {
+ if (memcmp(fpr_zero, fpr_out, sizeof(fpr_out)) != 0) {
+ ksft_print_msg("%s FPSIMD registers non-zero exiting SM\n",
+ cfg->name);
+ errors++;
+ }
+ }
+
+ return errors;
+}
+
+#define SVE_Z_SHARED_BYTES (128 / 8)
+
+static uint8_t z_zero[__SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_in[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t z_out[SVE_NUM_ZREGS * __SVE_ZREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(z_in, sizeof(z_in));
+ fill_random(z_out, sizeof(z_out));
+}
+
+static int check_z(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vl;
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ for (i = 0; i < SVE_NUM_ZREGS; i++) {
+ uint8_t *in = &z_in[reg_size * i];
+ uint8_t *out = &z_out[reg_size * i];
+
+ if (svcr & SVCR_SM_MASK) {
+ /*
+ * In streaming mode the whole register should
+ * be cleared by the transition out of
+ * streaming mode.
+ */
+ if (memcmp(z_zero, out, reg_size) != 0) {
+ ksft_print_msg("%s SVE VL %d Z%d non-zero\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+ } else {
+ /*
+ * For standard SVE the low 128 bits should be
+ * preserved and any additional bits cleared.
+ */
+ if (memcmp(in, out, SVE_Z_SHARED_BYTES) != 0) {
+ ksft_print_msg("%s SVE VL %d Z%d low 128 bits changed\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+
+ if (reg_size > SVE_Z_SHARED_BYTES &&
+ (memcmp(z_zero, out + SVE_Z_SHARED_BYTES,
+ reg_size - SVE_Z_SHARED_BYTES) != 0)) {
+ ksft_print_msg("%s SVE VL %d Z%d high bits non-zero\n",
+ cfg->name, sve_vl, i);
+ errors++;
+ }
+ }
+ }
+
+ return errors;
+}
+
+uint8_t p_in[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t p_out[SVE_NUM_PREGS * __SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(p_in, sizeof(p_in));
+ fill_random(p_out, sizeof(p_out));
+}
+
+static int check_p(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ /* After a syscall the P registers should be zeroed */
+ for (i = 0; i < SVE_NUM_PREGS * reg_size; i++)
+ if (p_out[i])
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d predicate registers non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+uint8_t ffr_in[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t ffr_out[__SVE_PREG_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ /*
+ * If we are in streaming mode and do not have FA64 then FFR
+ * is unavailable.
+ */
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)) {
+ memset(&ffr_in, 0, sizeof(ffr_in));
+ return;
+ }
+
+ /*
+ * It is only valid to set a contiguous set of bits starting
+ * at 0. For now since we're expecting this to be cleared by
+ * a syscall just set all bits.
+ */
+ memset(ffr_in, 0xff, sizeof(ffr_in));
+ fill_random(ffr_out, sizeof(ffr_out));
+}
+
+static int check_ffr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sve_vq_from_vl(sve_vl) * 2; /* 1 bit per VL byte */
+ int errors = 0;
+ int i;
+
+ if (!sve_vl)
+ return 0;
+
+ if ((svcr & SVCR_SM_MASK) &&
+ !(getauxval(AT_HWCAP2) & HWCAP2_SME_FA64))
+ return 0;
+
+ /* After a syscall FFR should be zeroed */
+ for (i = 0; i < reg_size; i++)
+ if (ffr_out[i])
+ errors++;
+ if (errors)
+ ksft_print_msg("%s SVE VL %d FFR non-zero\n",
+ cfg->name, sve_vl);
+
+ return errors;
+}
+
+uint64_t svcr_in, svcr_out;
+
+static void setup_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ svcr_in = svcr;
+}
+
+static int check_svcr(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+
+ if (svcr_out & SVCR_SM_MASK) {
+ ksft_print_msg("%s Still in SM, SVCR %llx\n",
+ cfg->name, svcr_out);
+ errors++;
+ }
+
+ if ((svcr_in & SVCR_ZA_MASK) != (svcr_out & SVCR_ZA_MASK)) {
+ ksft_print_msg("%s PSTATE.ZA changed, SVCR %llx != %llx\n",
+ cfg->name, svcr_in, svcr_out);
+ errors++;
+ }
+
+ return errors;
+}
+
+uint8_t za_in[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+uint8_t za_out[ZA_SIG_REGS_SIZE(ARCH_SVE_VQ_MAX)];
+
+static void setup_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(za_in, sizeof(za_in));
+ memset(za_out, 0, sizeof(za_out));
+}
+
+static int check_za(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ size_t reg_size = sme_vl * sme_vl;
+ int errors = 0;
+
+ if (!(svcr & SVCR_ZA_MASK))
+ return 0;
+
+ if (memcmp(za_in, za_out, reg_size) != 0) {
+ ksft_print_msg("SME VL %d ZA does not match\n", sme_vl);
+ errors++;
+ }
+
+ return errors;
+}
+
+uint8_t zt_in[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+uint8_t zt_out[ZT_SIG_REG_BYTES] __attribute__((aligned(16)));
+
+static void setup_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ fill_random(zt_in, sizeof(zt_in));
+ memset(zt_out, 0, sizeof(zt_out));
+}
+
+static int check_zt(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2))
+ return 0;
+
+ if (!(svcr & SVCR_ZA_MASK))
+ return 0;
+
+ if (memcmp(zt_in, zt_out, sizeof(zt_in)) != 0) {
+ ksft_print_msg("SME VL %d ZT does not match\n", sme_vl);
+ errors++;
+ }
+
+ return errors;
+}
+
+typedef void (*setup_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
+typedef int (*check_fn)(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr);
+
+/*
+ * Each set of registers has a setup function which is called before
+ * the syscall to fill values in a global variable for loading by the
+ * test code and a check function which validates that the results are
+ * as expected. Vector lengths are passed everywhere, a vector length
+ * of 0 should be treated as do not test.
+ */
+static struct {
+ setup_fn setup;
+ check_fn check;
+} regset[] = {
+ { setup_gpr, check_gpr },
+ { setup_fpr, check_fpr },
+ { setup_z, check_z },
+ { setup_p, check_p },
+ { setup_ffr, check_ffr },
+ { setup_svcr, check_svcr },
+ { setup_za, check_za },
+ { setup_zt, check_zt },
+};
+
+static bool do_test(struct syscall_cfg *cfg, int sve_vl, int sme_vl,
+ uint64_t svcr)
+{
+ int errors = 0;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ regset[i].setup(cfg, sve_vl, sme_vl, svcr);
+
+ do_syscall(sve_vl, sme_vl);
+
+ for (i = 0; i < ARRAY_SIZE(regset); i++)
+ errors += regset[i].check(cfg, sve_vl, sme_vl, svcr);
+
+ return errors == 0;
+}
+
+static void test_one_syscall(struct syscall_cfg *cfg)
+{
+ int sve, sme;
+ int ret;
+
+ /* FPSIMD only case */
+ ksft_test_result(do_test(cfg, 0, default_sme_vl, 0),
+ "%s FPSIMD\n", cfg->name);
+
+ for (sve = 0; sve < sve_vl_count; sve++) {
+ ret = prctl(PR_SVE_SET_VL, sve_vls[sve]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, sve_vls[sve], default_sme_vl, 0),
+ "%s SVE VL %d\n", cfg->name, sve_vls[sve]);
+
+ for (sme = 0; sme < sme_vl_count; sme++) {
+ ret = prctl(PR_SME_SET_VL, sme_vls[sme]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme],
+ SVCR_ZA_MASK | SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM+ZA\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme], SVCR_SM_MASK),
+ "%s SVE VL %d/SME VL %d SM\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ ksft_test_result(do_test(cfg, sve_vls[sve],
+ sme_vls[sme], SVCR_ZA_MASK),
+ "%s SVE VL %d/SME VL %d ZA\n",
+ cfg->name, sve_vls[sve],
+ sme_vls[sme]);
+ }
+ }
+
+ for (sme = 0; sme < sme_vl_count; sme++) {
+ ret = prctl(PR_SME_SET_VL, sme_vls[sme]);
+ if (ret == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme],
+ SVCR_ZA_MASK | SVCR_SM_MASK),
+ "%s SME VL %d SM+ZA\n",
+ cfg->name, sme_vls[sme]);
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_SM_MASK),
+ "%s SME VL %d SM\n",
+ cfg->name, sme_vls[sme]);
+ ksft_test_result(do_test(cfg, 0, sme_vls[sme], SVCR_ZA_MASK),
+ "%s SME VL %d ZA\n",
+ cfg->name, sme_vls[sme]);
+ }
+}
+
+void sve_count_vls(void)
+{
+ unsigned int vq;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ return;
+
+ /*
+ * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
+ */
+ for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ sve_vls[sve_vl_count++] = vl;
+ }
+}
+
+void sme_count_vls(void)
+{
+ unsigned int vq;
+ int vl;
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME))
+ return;
+
+ /*
+ * Enumerate up to ARCH_SVE_VQ_MAX vector lengths
+ */
+ for (vq = ARCH_SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SME_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Found lowest VL */
+ if (sve_vq_from_vl(vl) > vq)
+ break;
+
+ if (vq != sve_vq_from_vl(vl))
+ vq = sve_vq_from_vl(vl);
+
+ sme_vls[sme_vl_count++] = vl;
+ }
+
+ /* Ensure we configure a SME VL, used to flag if SVCR is set */
+ default_sme_vl = sme_vls[0];
+}
+
+int main(void)
+{
+ int i;
+ int tests = 1; /* FPSIMD */
+ int sme_ver;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ sve_count_vls();
+ sme_count_vls();
+
+ tests += sve_vl_count;
+ tests += sme_vl_count * 3;
+ tests += (sve_vl_count * sme_vl_count) * 3;
+ ksft_set_plan(ARRAY_SIZE(syscalls) * tests);
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+ sme_ver = 2;
+ else
+ sme_ver = 1;
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ ksft_print_msg("SME%d with FA64\n", sme_ver);
+ else if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ ksft_print_msg("SME%d without FA64\n", sme_ver);
+
+ for (i = 0; i < ARRAY_SIZE(syscalls); i++)
+ test_one_syscall(&syscalls[i]);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/abi/syscall-abi.h b/tools/testing/selftests/arm64/abi/syscall-abi.h
new file mode 100644
index 000000000000..bda5a87ad381
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/syscall-abi.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+
+#ifndef SYSCALL_ABI_H
+#define SYSCALL_ABI_H
+
+#define SVCR_ZA_MASK 2
+#define SVCR_SM_MASK 1
+
+#define SVCR_ZA_SHIFT 1
+#define SVCR_SM_SHIFT 0
+
+#endif
diff --git a/tools/testing/selftests/arm64/abi/tpidr2.c b/tools/testing/selftests/arm64/abi/tpidr2.c
new file mode 100644
index 000000000000..02ee3a91b780
--- /dev/null
+++ b/tools/testing/selftests/arm64/abi/tpidr2.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+#define EXPECTED_TESTS 5
+
+static void putstr(const char *str)
+{
+ write(1, str, strlen(str));
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+static int tests_run;
+static int tests_passed;
+static int tests_failed;
+static int tests_skipped;
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+static void print_summary(void)
+{
+ if (tests_passed + tests_failed + tests_skipped != EXPECTED_TESTS)
+ putstr("# UNEXPECTED TEST COUNT: ");
+
+ putstr("# Totals: pass:");
+ putnum(tests_passed);
+ putstr(" fail:");
+ putnum(tests_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(tests_skipped);
+ putstr(" error:0\n");
+}
+
+/* Processes should start with TPIDR2 == 0 */
+static int default_value(void)
+{
+ return get_tpidr2() == 0;
+}
+
+/* If we set TPIDR2 we should read that value */
+static int write_read(void)
+{
+ set_tpidr2(getpid());
+
+ return getpid() == get_tpidr2();
+}
+
+/* If we set a value we should read the same value after scheduling out */
+static int write_sleep_read(void)
+{
+ set_tpidr2(getpid());
+
+ msleep(100);
+
+ return getpid() == get_tpidr2();
+}
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value and be able to set its own
+ * value.
+ */
+static int write_fork_read(void)
+{
+ pid_t newpid, waiting, oldpid;
+ int status;
+
+ set_tpidr2(getpid());
+
+ oldpid = getpid();
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (get_tpidr2() != oldpid) {
+ putstr("# TPIDR2 changed in child: ");
+ putnum(get_tpidr2());
+ putstr("\n");
+ exit(0);
+ }
+
+ set_tpidr2(getpid());
+ if (get_tpidr2() == getpid()) {
+ exit(1);
+ } else {
+ putstr("# Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+ if (newpid < 0) {
+ putstr("# fork() failed: -");
+ putnum(-newpid);
+ putstr("\n");
+ return 0;
+ }
+
+ for (;;) {
+ waiting = waitpid(newpid, &status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ putstr("# waitpid() failed: ");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+ if (waiting != newpid) {
+ putstr("# waitpid() returned wrong PID\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ putstr("# child did not exit\n");
+ return 0;
+ }
+
+ if (getpid() != get_tpidr2()) {
+ putstr("# TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+/*
+ * sys_clone() has a lot of per architecture variation so just define
+ * it here rather than adding it to nolibc, plus the raw API is a
+ * little more convenient for this test.
+ */
+static int sys_clone(unsigned long clone_flags, unsigned long newsp,
+ int *parent_tidptr, unsigned long tls,
+ int *child_tidptr)
+{
+ return my_syscall5(__NR_clone, clone_flags, newsp, parent_tidptr, tls,
+ child_tidptr);
+}
+
+/*
+ * If we clone with CLONE_SETTLS then the value in the parent should
+ * be unchanged and the child should start with zero and be able to
+ * set its own value.
+ */
+static int write_clone_read(void)
+{
+ int parent_tid, child_tid;
+ pid_t parent, waiting;
+ int ret, status;
+
+ parent = getpid();
+ set_tpidr2(parent);
+
+ ret = sys_clone(CLONE_SETTLS, 0, &parent_tid, 0, &child_tid);
+ if (ret == -1) {
+ putstr("# clone() failed\n");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+
+ if (ret == 0) {
+ /* In child */
+ if (get_tpidr2() != 0) {
+ putstr("# TPIDR2 non-zero in child: ");
+ putnum(get_tpidr2());
+ putstr("\n");
+ exit(0);
+ }
+
+ if (gettid() == 0)
+ putstr("# Child TID==0\n");
+ set_tpidr2(gettid());
+ if (get_tpidr2() == gettid()) {
+ exit(1);
+ } else {
+ putstr("# Failed to set TPIDR2 in child\n");
+ exit(0);
+ }
+ }
+
+ for (;;) {
+ waiting = wait4(ret, &status, __WCLONE, NULL);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ putstr("# wait4() failed: ");
+ putnum(errno);
+ putstr("\n");
+ return 0;
+ }
+ if (waiting != ret) {
+ putstr("# wait4() returned wrong PID ");
+ putnum(waiting);
+ putstr("\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(status)) {
+ putstr("# child did not exit\n");
+ return 0;
+ }
+
+ if (parent != get_tpidr2()) {
+ putstr("# TPIDR2 corrupted in parent\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(status);
+ }
+}
+
+#define run_test(name) \
+ if (name()) { \
+ tests_passed++; \
+ } else { \
+ tests_failed++; \
+ putstr("not "); \
+ } \
+ putstr("ok "); \
+ putnum(++tests_run); \
+ putstr(" " #name "\n");
+
+#define skip_test(name) \
+ tests_skipped++; \
+ putstr("ok "); \
+ putnum(++tests_run); \
+ putstr(" # SKIP " #name "\n");
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ putstr("# PID: ");
+ putnum(getpid());
+ putstr("\n");
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ run_test(default_value);
+ run_test(write_read);
+ run_test(write_sleep_read);
+ run_test(write_fork_read);
+ run_test(write_clone_read);
+
+ } else {
+ putstr("# SME support not present\n");
+
+ skip_test(default_value);
+ skip_test(write_read);
+ skip_test(write_sleep_read);
+ skip_test(write_fork_read);
+ skip_test(write_clone_read);
+ }
+
+ print_summary();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/bti/.gitignore b/tools/testing/selftests/arm64/bti/.gitignore
new file mode 100644
index 000000000000..73869fabada4
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/.gitignore
@@ -0,0 +1,2 @@
+btitest
+nobtitest
diff --git a/tools/testing/selftests/arm64/bti/Makefile b/tools/testing/selftests/arm64/bti/Makefile
new file mode 100644
index 000000000000..05e4ee523a53
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := btitest nobtitest
+
+# These tests are built as freestanding binaries since otherwise BTI
+# support in ld.so is required which is not currently widespread; when
+# it is available it will still be useful to test this separately as the
+# cases for statically linked and dynamically lined binaries are
+# slightly different.
+
+CFLAGS_NOBTI = -mbranch-protection=none -DBTI=0
+CFLAGS_BTI = -mbranch-protection=standard -DBTI=1
+
+CFLAGS_COMMON = -ffreestanding -Wall -Wextra $(CFLAGS)
+
+BTI_CC_COMMAND = $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -c -o $@ $<
+NOBTI_CC_COMMAND = $(CC) $(CFLAGS_NOBTI) $(CFLAGS_COMMON) -c -o $@ $<
+
+$(OUTPUT)/%-bti.o: %.c
+ $(BTI_CC_COMMAND)
+
+$(OUTPUT)/%-bti.o: %.S
+ $(BTI_CC_COMMAND)
+
+$(OUTPUT)/%-nobti.o: %.c
+ $(NOBTI_CC_COMMAND)
+
+$(OUTPUT)/%-nobti.o: %.S
+ $(NOBTI_CC_COMMAND)
+
+BTI_OBJS = \
+ $(OUTPUT)/test-bti.o \
+ $(OUTPUT)/signal-bti.o \
+ $(OUTPUT)/start-bti.o \
+ $(OUTPUT)/syscall-bti.o \
+ $(OUTPUT)/system-bti.o \
+ $(OUTPUT)/teststubs-bti.o \
+ $(OUTPUT)/trampoline-bti.o
+$(OUTPUT)/btitest: $(BTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+NOBTI_OBJS = \
+ $(OUTPUT)/test-nobti.o \
+ $(OUTPUT)/signal-nobti.o \
+ $(OUTPUT)/start-nobti.o \
+ $(OUTPUT)/syscall-nobti.o \
+ $(OUTPUT)/system-nobti.o \
+ $(OUTPUT)/teststubs-nobti.o \
+ $(OUTPUT)/trampoline-nobti.o
+$(OUTPUT)/nobtitest: $(NOBTI_OBJS)
+ $(CC) $(CFLAGS_BTI) $(CFLAGS_COMMON) -nostdlib -static -o $@ $^
+
+# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
+# to account for any OUTPUT target-dirs optionally provided by
+# the toplevel makefile
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/bti/assembler.h b/tools/testing/selftests/arm64/bti/assembler.h
new file mode 100644
index 000000000000..04e7b72880ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/assembler.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+#define NT_GNU_PROPERTY_TYPE_0 5
+#define GNU_PROPERTY_AARCH64_FEATURE_1_AND 0xc0000000
+
+/* Bits for GNU_PROPERTY_AARCH64_FEATURE_1_BTI */
+#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI (1U << 0)
+#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC (1U << 1)
+
+
+.macro startfn name:req
+ .globl \name
+\name:
+ .macro endfn
+ .size \name, . - \name
+ .type \name, @function
+ .purgem endfn
+ .endm
+.endm
+
+.macro emit_aarch64_feature_1_and
+ .pushsection .note.gnu.property, "a"
+ .align 3
+ .long 2f - 1f
+ .long 6f - 3f
+ .long NT_GNU_PROPERTY_TYPE_0
+1: .string "GNU"
+2:
+ .align 3
+3: .long GNU_PROPERTY_AARCH64_FEATURE_1_AND
+ .long 5f - 4f
+4:
+#if BTI
+ .long GNU_PROPERTY_AARCH64_FEATURE_1_PAC | \
+ GNU_PROPERTY_AARCH64_FEATURE_1_BTI
+#else
+ .long 0
+#endif
+5:
+ .align 3
+6:
+ .popsection
+.endm
+
+.macro paciasp
+ hint 0x19
+.endm
+
+.macro autiasp
+ hint 0x1d
+.endm
+
+.macro __bti_
+ hint 0x20
+.endm
+
+.macro __bti_c
+ hint 0x22
+.endm
+
+.macro __bti_j
+ hint 0x24
+.endm
+
+.macro __bti_jc
+ hint 0x26
+.endm
+
+.macro bti what=
+ __bti_\what
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/bti/btitest.h b/tools/testing/selftests/arm64/bti/btitest.h
new file mode 100644
index 000000000000..2aff9b10336e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/btitest.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef BTITEST_H
+#define BTITEST_H
+
+/* Trampolines for calling the test stubs: */
+void call_using_br_x0(void (*)(void));
+void call_using_br_x16(void (*)(void));
+void call_using_blr(void (*)(void));
+
+/* Test stubs: */
+void nohint_func(void);
+void bti_none_func(void);
+void bti_c_func(void);
+void bti_j_func(void);
+void bti_jc_func(void);
+void paciasp_func(void);
+
+#endif /* !BTITEST_H */
diff --git a/tools/testing/selftests/arm64/bti/signal.c b/tools/testing/selftests/arm64/bti/signal.c
new file mode 100644
index 000000000000..f3fd29b91141
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+#include "signal.h"
+
+int sigemptyset(sigset_t *s)
+{
+ unsigned int i;
+
+ for (i = 0; i < _NSIG_WORDS; ++i)
+ s->sig[i] = 0;
+
+ return 0;
+}
+
+int sigaddset(sigset_t *s, int n)
+{
+ if (n < 1 || n > _NSIG)
+ return -EINVAL;
+
+ s->sig[(n - 1) / _NSIG_BPW] |= 1UL << (n - 1) % _NSIG_BPW;
+ return 0;
+}
+
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old)
+{
+ return syscall(__NR_rt_sigaction, n, sa, old, sizeof(sa->sa_mask));
+}
+
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old)
+{
+ return syscall(__NR_rt_sigprocmask, how, mask, old, sizeof(*mask));
+}
diff --git a/tools/testing/selftests/arm64/bti/signal.h b/tools/testing/selftests/arm64/bti/signal.h
new file mode 100644
index 000000000000..103457dc880e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/signal.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SIGNAL_H
+#define SIGNAL_H
+
+#include <linux/signal.h>
+
+#include "system.h"
+
+typedef __sighandler_t sighandler_t;
+
+int sigemptyset(sigset_t *s);
+int sigaddset(sigset_t *s, int n);
+int sigaction(int n, struct sigaction *sa, const struct sigaction *old);
+int sigprocmask(int how, const sigset_t *mask, sigset_t *old);
+
+#endif /* ! SIGNAL_H */
diff --git a/tools/testing/selftests/arm64/bti/start.S b/tools/testing/selftests/arm64/bti/start.S
new file mode 100644
index 000000000000..831f952e0572
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/start.S
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn _start
+ mov x0, sp
+ b start
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/syscall.S b/tools/testing/selftests/arm64/bti/syscall.S
new file mode 100644
index 000000000000..8dde8b6f3db1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/syscall.S
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn syscall
+ bti c
+ mov w8, w0
+ mov x0, x1
+ mov x1, x2
+ mov x2, x3
+ mov x3, x4
+ mov x4, x5
+ mov x5, x6
+ mov x6, x7
+ svc #0
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/system.c b/tools/testing/selftests/arm64/bti/system.c
new file mode 100644
index 000000000000..93d772b00bfe
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <asm/unistd.h>
+
+void __noreturn exit(int n)
+{
+ syscall(__NR_exit, n);
+ unreachable();
+}
+
+ssize_t write(int fd, const void *buf, size_t size)
+{
+ return syscall(__NR_write, fd, buf, size);
+}
diff --git a/tools/testing/selftests/arm64/bti/system.h b/tools/testing/selftests/arm64/bti/system.h
new file mode 100644
index 000000000000..2e9ee1284a0c
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/system.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#ifndef SYSTEM_H
+#define SYSTEM_H
+
+#include <linux/types.h>
+#include <linux/stddef.h>
+
+typedef __kernel_size_t size_t;
+typedef __kernel_ssize_t ssize_t;
+
+#include <linux/errno.h>
+#include <linux/compiler.h>
+
+#include <asm/hwcap.h>
+#include <asm/ptrace.h>
+#include <asm/unistd.h>
+
+long syscall(int nr, ...);
+
+void __noreturn exit(int n);
+ssize_t write(int fd, const void *buf, size_t size);
+
+#endif /* ! SYSTEM_H */
diff --git a/tools/testing/selftests/arm64/bti/test.c b/tools/testing/selftests/arm64/bti/test.c
new file mode 100644
index 000000000000..28a8e8a28a84
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/test.c
@@ -0,0 +1,229 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2019,2021 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "system.h"
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/errno.h>
+#include <linux/auxvec.h>
+#include <linux/signal.h>
+#include <asm/sigcontext.h>
+#include <asm/ucontext.h>
+
+typedef struct ucontext ucontext_t;
+
+#include "btitest.h"
+#include "signal.h"
+
+#define EXPECTED_TESTS 18
+
+static volatile unsigned int test_num = 1;
+static unsigned int test_passed;
+static unsigned int test_failed;
+static unsigned int test_skipped;
+
+static void fdputs(int fd, const char *str)
+{
+ size_t len = 0;
+ const char *p = str;
+
+ while (*p++)
+ ++len;
+
+ write(fd, str, len);
+}
+
+static void putstr(const char *str)
+{
+ fdputs(1, str);
+}
+
+static void putnum(unsigned int num)
+{
+ char c;
+
+ if (num / 10)
+ putnum(num / 10);
+
+ c = '0' + (num % 10);
+ write(1, &c, 1);
+}
+
+#define puttestname(test_name, trampoline_name) do { \
+ putstr(test_name); \
+ putstr("/"); \
+ putstr(trampoline_name); \
+} while (0)
+
+void print_summary(void)
+{
+ putstr("# Totals: pass:");
+ putnum(test_passed);
+ putstr(" fail:");
+ putnum(test_failed);
+ putstr(" xfail:0 xpass:0 skip:");
+ putnum(test_skipped);
+ putstr(" error:0\n");
+}
+
+static const char *volatile current_test_name;
+static const char *volatile current_trampoline_name;
+static volatile int sigill_expected, sigill_received;
+
+static void handler(int n, siginfo_t *si __always_unused,
+ void *uc_ __always_unused)
+{
+ ucontext_t *uc = uc_;
+
+ putstr("# \t[SIGILL in ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr(", BTYPE=");
+ write(1, &"00011011"[((uc->uc_mcontext.pstate & PSR_BTYPE_MASK)
+ >> PSR_BTYPE_SHIFT) * 2], 2);
+ if (!sigill_expected) {
+ putstr("]\n");
+ putstr("not ok ");
+ putnum(test_num);
+ putstr(" ");
+ puttestname(current_test_name, current_trampoline_name);
+ putstr("(unexpected SIGILL)\n");
+ print_summary();
+ exit(128 + n);
+ }
+
+ putstr(" (expected)]\n");
+ sigill_received = 1;
+ /* zap BTYPE so that resuming the faulting code will work */
+ uc->uc_mcontext.pstate &= ~PSR_BTYPE_MASK;
+}
+
+/* Does the system have BTI? */
+static bool have_bti;
+
+static void __do_test(void (*trampoline)(void (*)(void)),
+ void (*fn)(void),
+ const char *trampoline_name,
+ const char *name,
+ int expect_sigill)
+{
+ /*
+ * Branch Target exceptions should only happen for BTI
+ * binaries running on a system with BTI:
+ */
+ if (!BTI || !have_bti)
+ expect_sigill = 0;
+
+ sigill_expected = expect_sigill;
+ sigill_received = 0;
+ current_test_name = name;
+ current_trampoline_name = trampoline_name;
+
+ trampoline(fn);
+
+ if (expect_sigill && !sigill_received) {
+ putstr("not ok ");
+ test_failed++;
+ } else {
+ putstr("ok ");
+ test_passed++;
+ }
+ putnum(test_num++);
+ putstr(" ");
+ puttestname(name, trampoline_name);
+ putstr("\n");
+}
+
+#define do_test(expect_sigill_br_x0, \
+ expect_sigill_br_x16, \
+ expect_sigill_blr, \
+ name) \
+do { \
+ __do_test(call_using_br_x0, name, "call_using_br_x0", #name, \
+ expect_sigill_br_x0); \
+ __do_test(call_using_br_x16, name, "call_using_br_x16", #name, \
+ expect_sigill_br_x16); \
+ __do_test(call_using_blr, name, "call_using_blr", #name, \
+ expect_sigill_blr); \
+} while (0)
+
+void start(int *argcp)
+{
+ struct sigaction sa;
+ void *const *p;
+ const struct auxv_entry {
+ unsigned long type;
+ unsigned long val;
+ } *auxv;
+ unsigned long hwcap = 0, hwcap2 = 0;
+
+ putstr("TAP version 13\n");
+ putstr("1..");
+ putnum(EXPECTED_TESTS);
+ putstr("\n");
+
+ /* Gross hack for finding AT_HWCAP2 from the initial process stack: */
+ p = (void *const *)argcp + 1 + *argcp + 1; /* start of environment */
+ /* step over environment */
+ while (*p++)
+ ;
+ for (auxv = (const struct auxv_entry *)p; auxv->type != AT_NULL; ++auxv) {
+ switch (auxv->type) {
+ case AT_HWCAP:
+ hwcap = auxv->val;
+ break;
+ case AT_HWCAP2:
+ hwcap2 = auxv->val;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (hwcap & HWCAP_PACA)
+ putstr("# HWCAP_PACA present\n");
+ else
+ putstr("# HWCAP_PACA not present\n");
+
+ if (hwcap2 & HWCAP2_BTI) {
+ putstr("# HWCAP2_BTI present\n");
+ if (!(hwcap & HWCAP_PACA))
+ putstr("# Bad hardware? Expect problems.\n");
+ have_bti = true;
+ } else {
+ putstr("# HWCAP2_BTI not present\n");
+ have_bti = false;
+ }
+
+ putstr("# Test binary");
+ if (!BTI)
+ putstr(" not");
+ putstr(" built for BTI\n");
+
+ sa.sa_handler = (sighandler_t)(void *)handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(SIGILL, &sa, NULL);
+ sigaddset(&sa.sa_mask, SIGILL);
+ sigprocmask(SIG_UNBLOCK, &sa.sa_mask, NULL);
+
+ do_test(1, 1, 1, nohint_func);
+ do_test(1, 1, 1, bti_none_func);
+ do_test(1, 0, 0, bti_c_func);
+ do_test(0, 0, 1, bti_j_func);
+ do_test(0, 0, 0, bti_jc_func);
+ do_test(1, 0, 0, paciasp_func);
+
+ print_summary();
+
+ if (test_num - 1 != EXPECTED_TESTS)
+ putstr("# WARNING - EXPECTED TEST COUNT WRONG\n");
+
+ if (test_failed)
+ exit(1);
+ else
+ exit(0);
+}
diff --git a/tools/testing/selftests/arm64/bti/teststubs.S b/tools/testing/selftests/arm64/bti/teststubs.S
new file mode 100644
index 000000000000..b62c8c35f67e
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/teststubs.S
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn bti_none_func
+ bti
+ ret
+endfn
+
+startfn bti_c_func
+ bti c
+ ret
+endfn
+
+startfn bti_j_func
+ bti j
+ ret
+endfn
+
+startfn bti_jc_func
+ bti jc
+ ret
+endfn
+
+startfn paciasp_func
+ paciasp
+ autiasp
+ ret
+endfn
+
+startfn nohint_func
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/bti/trampoline.S b/tools/testing/selftests/arm64/bti/trampoline.S
new file mode 100644
index 000000000000..09beb3f361f1
--- /dev/null
+++ b/tools/testing/selftests/arm64/bti/trampoline.S
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2019 Arm Limited
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+
+#include "assembler.h"
+
+startfn call_using_br_x0
+ bti c
+ br x0
+endfn
+
+startfn call_using_br_x16
+ bti c
+ mov x16, x0
+ br x16
+endfn
+
+startfn call_using_blr
+ paciasp
+ stp x29, x30, [sp, #-16]!
+ blr x0
+ ldp x29, x30, [sp], #16
+ autiasp
+ ret
+endfn
+
+emit_aarch64_feature_1_and
diff --git a/tools/testing/selftests/arm64/fp/.gitignore b/tools/testing/selftests/arm64/fp/.gitignore
new file mode 100644
index 000000000000..00e52c966281
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/.gitignore
@@ -0,0 +1,17 @@
+fp-pidbench
+fp-ptrace
+fp-stress
+fpsimd-test
+rdvl-sme
+rdvl-sve
+sve-probe-vls
+sve-ptrace
+sve-test
+ssve-test
+vec-syscfg
+vlset
+za-fork
+za-ptrace
+za-test
+zt-ptrace
+zt-test
diff --git a/tools/testing/selftests/arm64/fp/Makefile b/tools/testing/selftests/arm64/fp/Makefile
new file mode 100644
index 000000000000..55d4f00d9e8e
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/Makefile
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../../)
+
+CFLAGS += $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := \
+ fp-ptrace \
+ fp-stress \
+ sve-ptrace sve-probe-vls \
+ vec-syscfg \
+ za-fork za-ptrace
+TEST_GEN_PROGS_EXTENDED := fp-pidbench fpsimd-test \
+ rdvl-sme rdvl-sve \
+ sve-test \
+ ssve-test \
+ za-test \
+ zt-ptrace \
+ zt-test \
+ vlset
+TEST_PROGS_EXTENDED := fpsimd-stress sve-stress ssve-stress za-stress
+
+EXTRA_CLEAN += $(OUTPUT)/asm-utils.o $(OUTPUT)/rdvl.o $(OUTPUT)/za-fork-asm.o
+
+# Build with nolibc to avoid effects due to libc's clone() support
+$(OUTPUT)/fp-pidbench: fp-pidbench.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/fp-ptrace: fp-ptrace.c fp-ptrace-asm.S
+$(OUTPUT)/fpsimd-test: fpsimd-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/rdvl-sve: rdvl-sve.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/rdvl-sme: rdvl-sme.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-ptrace: sve-ptrace.c
+$(OUTPUT)/sve-probe-vls: sve-probe-vls.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/sve-test: sve-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/ssve-test: sve-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -DSSVE -nostdlib $^ -o $@
+$(OUTPUT)/vec-syscfg: vec-syscfg.c $(OUTPUT)/rdvl.o
+$(OUTPUT)/vlset: vlset.c
+$(OUTPUT)/za-fork: za-fork.c $(OUTPUT)/za-fork-asm.o
+ $(CC) -fno-asynchronous-unwind-tables -fno-ident -s -Os -nostdlib \
+ -include ../../../../include/nolibc/nolibc.h -I../..\
+ -static -ffreestanding -Wall $^ -o $@
+$(OUTPUT)/za-ptrace: za-ptrace.c
+$(OUTPUT)/za-test: za-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+$(OUTPUT)/zt-ptrace: zt-ptrace.c
+$(OUTPUT)/zt-test: zt-test.S $(OUTPUT)/asm-utils.o
+ $(CC) -nostdlib $^ -o $@
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/arm64/fp/README b/tools/testing/selftests/arm64/fp/README
new file mode 100644
index 000000000000..03e3dad865d8
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/README
@@ -0,0 +1,100 @@
+This directory contains a mix of tests integrated with kselftest and
+standalone stress tests.
+
+kselftest tests
+===============
+
+sve-probe-vls - Checks the SVE vector length enumeration interface
+sve-ptrace - Checks the SVE ptrace interface
+
+Running the non-kselftest tests
+===============================
+
+sve-stress performs an SVE context switch stress test, as described
+below.
+
+(The fpsimd-stress test works the same way; just substitute "fpsimd" for
+"sve" in the following commands.)
+
+
+The test runs until killed by the user.
+
+If no context switch error was detected, you will see output such as
+the following:
+
+$ ./sve-stress
+(wait for some time)
+^C
+Vector length: 512 bits
+PID: 1573
+Terminated by signal 15, no error, iterations=9467, signals=1014
+Vector length: 512 bits
+PID: 1575
+Terminated by signal 15, no error, iterations=9448, signals=1028
+Vector length: 512 bits
+PID: 1577
+Terminated by signal 15, no error, iterations=9436, signals=1039
+Vector length: 512 bits
+PID: 1579
+Terminated by signal 15, no error, iterations=9421, signals=1039
+Vector length: 512 bits
+PID: 1581
+Terminated by signal 15, no error, iterations=9403, signals=1039
+Vector length: 512 bits
+PID: 1583
+Terminated by signal 15, no error, iterations=9385, signals=1036
+Vector length: 512 bits
+PID: 1585
+Terminated by signal 15, no error, iterations=9376, signals=1039
+Vector length: 512 bits
+PID: 1587
+Terminated by signal 15, no error, iterations=9361, signals=1039
+Vector length: 512 bits
+PID: 1589
+Terminated by signal 15, no error, iterations=9350, signals=1039
+
+
+If an error was detected, details of the mismatch will be printed
+instead of "no error".
+
+Ideally, the test should be allowed to run for many minutes or hours
+to maximise test coverage.
+
+
+KVM stress testing
+==================
+
+To try to reproduce the bugs that we have been observing, sve-stress
+should be run in parallel in two KVM guests, while simultaneously
+running on the host.
+
+1) Start 2 guests, using the following command for each:
+
+$ lkvm run --console=virtio -pconsole=hvc0 --sve Image
+
+(Depending on the hardware GIC implementation, you may also need
+--irqchip=gicv3. New kvmtool defaults to that if appropriate, but I
+can't remember whether my branch is new enough for that. Try without
+the option first.)
+
+Kvmtool occupies the terminal until you kill it (Ctrl+A x),
+or until the guest terminates. It is therefore recommended to run
+each instance in separate terminal (use screen or ssh etc.) This
+allows multiple guests to be run in parallel while running other
+commands on the host.
+
+Within the guest, the host filesystem is accessible, mounted on /host.
+
+2) Run the sve-stress on *each* guest with the Vector-Length set to 32:
+guest$ ./vlset --inherit 32 ./sve-stress
+
+3) Run the sve-stress on the host with the maximum Vector-Length:
+host$ ./vlset --inherit --max ./sve-stress
+
+
+Again, the test should be allowed to run for many minutes or hours to
+maximise test coverage.
+
+If no error is detected, you will see output from each sve-stress
+instance similar to that illustrated above; otherwise details of the
+observed mismatches will be printed.
diff --git a/tools/testing/selftests/arm64/fp/TODO b/tools/testing/selftests/arm64/fp/TODO
new file mode 100644
index 000000000000..44004e53da33
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/TODO
@@ -0,0 +1,7 @@
+- Test unsupported values in the ABIs.
+- More coverage for ptrace:
+ - Get/set of FFR.
+ - Ensure ptraced processes actually see the register state visible through
+ the ptrace interface.
+ - Big endian.
+- Test PR_SVE_VL_INHERIT after a double fork.
diff --git a/tools/testing/selftests/arm64/fp/asm-offsets.h b/tools/testing/selftests/arm64/fp/asm-offsets.h
new file mode 100644
index 000000000000..757b2fd75dd7
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-offsets.h
@@ -0,0 +1,12 @@
+#define sa_sz 32
+#define sa_flags 8
+#define sa_handler 0
+#define sa_mask_sz 8
+#define SIGUSR1 10
+#define SIGUSR2 12
+#define SIGTERM 15
+#define SIGINT 2
+#define SIGABRT 6
+#define SA_NODEFER 1073741824
+#define SA_SIGINFO 4
+#define ucontext_regs 184
diff --git a/tools/testing/selftests/arm64/fp/asm-utils.S b/tools/testing/selftests/arm64/fp/asm-utils.S
new file mode 100644
index 000000000000..4b9728efc18d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/asm-utils.S
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2021 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Utility functions for assembly code.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+// Print a single character x0 to stdout
+// Clobbers x0-x2,x8
+function putc
+ str x0, [sp, #-16]!
+
+ mov x0, #1 // STDOUT_FILENO
+ mov x1, sp
+ mov x2, #1
+ mov x8, #__NR_write
+ svc #0
+
+ add sp, sp, #16
+ ret
+endfunction
+.globl putc
+
+// Print a NUL-terminated string starting at address x0 to stdout
+// Clobbers x0-x3,x8
+function puts
+ mov x1, x0
+
+ mov x2, #0
+0: ldrb w3, [x0], #1
+ cbz w3, 1f
+ add x2, x2, #1
+ b 0b
+
+1: mov w0, #1 // STDOUT_FILENO
+ mov x8, #__NR_write
+ svc #0
+
+ ret
+endfunction
+.globl puts
+
+// Print an unsigned decimal number x0 to stdout
+// Clobbers x0-x4,x8
+function putdec
+ mov x1, sp
+ str x30, [sp, #-32]! // Result can't be > 20 digits
+
+ mov x2, #0
+ strb w2, [x1, #-1]! // Write the NUL terminator
+
+ mov x2, #10
+0: udiv x3, x0, x2 // div-mod loop to generate the digits
+ msub x0, x3, x2, x0
+ add w0, w0, #'0'
+ strb w0, [x1, #-1]!
+ mov x0, x3
+ cbnz x3, 0b
+
+ ldrb w0, [x1]
+ cbnz w0, 1f
+ mov w0, #'0' // Print "0" for 0, not ""
+ strb w0, [x1, #-1]!
+
+1: mov x0, x1
+ bl puts
+
+ ldr x30, [sp], #32
+ ret
+endfunction
+.globl putdec
+
+// Print an unsigned decimal number x0 to stdout, followed by a newline
+// Clobbers x0-x5,x8
+function putdecn
+ mov x5, x30
+
+ bl putdec
+ mov x0, #'\n'
+ bl putc
+
+ ret x5
+endfunction
+.globl putdecn
+
+// Clobbers x0-x3,x8
+function puthexb
+ str x30, [sp, #-0x10]!
+
+ mov w3, w0
+ lsr w0, w0, #4
+ bl puthexnibble
+ mov w0, w3
+
+ ldr x30, [sp], #0x10
+ // fall through to puthexnibble
+endfunction
+.globl puthexb
+
+// Clobbers x0-x2,x8
+function puthexnibble
+ and w0, w0, #0xf
+ cmp w0, #10
+ blo 1f
+ add w0, w0, #'a' - ('9' + 1)
+1: add w0, w0, #'0'
+ b putc
+endfunction
+.globl puthexnibble
+
+// x0=data in, x1=size in, clobbers x0-x5,x8
+function dumphex
+ str x30, [sp, #-0x10]!
+
+ mov x4, x0
+ mov x5, x1
+
+0: subs x5, x5, #1
+ b.lo 1f
+ ldrb w0, [x4], #1
+ bl puthexb
+ b 0b
+
+1: ldr x30, [sp], #0x10
+ ret
+endfunction
+.globl dumphex
+
+ // Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+.globl memcpy
+
+// Fill x1 bytes starting at x0 with 0xae (for canary purposes)
+// Clobbers x1, x2.
+function memfill_ae
+ mov w2, #0xae
+ b memfill
+endfunction
+.globl memfill_ae
+
+// Fill x1 bytes starting at x0 with 0.
+// Clobbers x1, x2.
+function memclr
+ mov w2, #0
+endfunction
+.globl memclr
+ // fall through to memfill
+
+// Trivial memory fill: fill x1 bytes starting at address x0 with byte w2
+// Clobbers x1
+function memfill
+ cmp x1, #0
+ b.eq 1f
+
+0: strb w2, [x0], #1
+ subs x1, x1, #1
+ b.ne 0b
+
+1: ret
+endfunction
+.globl memfill
diff --git a/tools/testing/selftests/arm64/fp/assembler.h b/tools/testing/selftests/arm64/fp/assembler.h
new file mode 100644
index 000000000000..9b38a0da407d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/assembler.h
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+
+#ifndef ASSEMBLER_H
+#define ASSEMBLER_H
+
+.macro __for from:req, to:req
+ .if (\from) == (\to)
+ _for__body %\from
+ .else
+ __for \from, %(\from) + ((\to) - (\from)) / 2
+ __for %(\from) + ((\to) - (\from)) / 2 + 1, \to
+ .endif
+.endm
+
+.macro _for var:req, from:req, to:req, insn:vararg
+ .macro _for__body \var:req
+ .noaltmacro
+ \insn
+ .altmacro
+ .endm
+
+ .altmacro
+ __for \from, \to
+ .noaltmacro
+
+ .purgem _for__body
+.endm
+
+.macro function name
+ .macro endfunction
+ .type \name, @function
+ .purgem endfunction
+ .endm
+\name:
+.endm
+
+.macro define_accessor name, num, insn
+ .macro \name\()_entry n
+ \insn \n, 1
+ ret
+ .endm
+
+function \name
+ adr x2, .L__accessor_tbl\@
+ add x2, x2, x0, lsl #3
+ br x2
+
+.L__accessor_tbl\@:
+ _for x, 0, (\num) - 1, \name\()_entry \x
+endfunction
+
+ .purgem \name\()_entry
+.endm
+
+// Utility macro to print a literal string
+// Clobbers x0-x4,x8
+.macro puts string
+ .pushsection .rodata.str1.1, "aMS", @progbits, 1
+.L__puts_literal\@: .string "\string"
+ .popsection
+
+ ldr x0, =.L__puts_literal\@
+ bl puts
+.endm
+
+#endif /* ! ASSEMBLER_H */
diff --git a/tools/testing/selftests/arm64/fp/fp-pidbench.S b/tools/testing/selftests/arm64/fp/fp-pidbench.S
new file mode 100644
index 000000000000..73830f6bc99b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-pidbench.S
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Trivial syscall overhead benchmark.
+//
+// This is implemented in asm to ensure that we don't have any issues with
+// system libraries using instructions that disrupt the test.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+
+.arch_extension sve
+
+.macro test_loop per_loop
+ mov x10, x20
+ mov x8, #__NR_getpid
+ mrs x11, CNTVCT_EL0
+1:
+ \per_loop
+ svc #0
+ sub x10, x10, #1
+ cbnz x10, 1b
+
+ mrs x12, CNTVCT_EL0
+ sub x0, x12, x11
+ bl putdec
+ puts "\n"
+.endm
+
+// Main program entry point
+.globl _start
+function _start
+ puts "Iterations per test: "
+ mov x20, #10000
+ lsl x20, x20, #8
+ mov x0, x20
+ bl putdec
+ puts "\n"
+
+ // Test having never used SVE
+ puts "No SVE: "
+ test_loop
+
+ // Check for SVE support - should use hwcap but that's hard in asm
+ mrs x0, ID_AA64PFR0_EL1
+ ubfx x0, x0, #32, #4
+ cbnz x0, 1f
+ puts "System does not support SVE\n"
+ b out
+1:
+
+ // Execute a SVE instruction
+ puts "SVE VL: "
+ rdvl x0, #8
+ bl putdec
+ puts "\n"
+
+ puts "SVE used once: "
+ test_loop
+
+ // Use SVE per syscall
+ puts "SVE used per syscall: "
+ test_loop "rdvl x0, #8"
+
+ // And we're done
+out:
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S
new file mode 100644
index 000000000000..7ad59d92d02b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace-asm.S
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-3 ARM Limited.
+//
+// Assembly portion of the FP ptrace test
+
+//
+// Load values from memory into registers, break on a breakpoint, then
+// break on a further breakpoint
+//
+
+#include "fp-ptrace.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+// Load and save register values with pauses for ptrace
+//
+// x0 - SVE in use
+// x1 - SME in use
+// x2 - SME2 in use
+// x3 - FA64 supported
+
+.globl load_and_save
+load_and_save:
+ stp x11, x12, [sp, #-0x10]!
+
+ // This should be redundant in the SVE case
+ ldr x7, =v_in
+ ldp q0, q1, [x7]
+ ldp q2, q3, [x7, #16 * 2]
+ ldp q4, q5, [x7, #16 * 4]
+ ldp q6, q7, [x7, #16 * 6]
+ ldp q8, q9, [x7, #16 * 8]
+ ldp q10, q11, [x7, #16 * 10]
+ ldp q12, q13, [x7, #16 * 12]
+ ldp q14, q15, [x7, #16 * 14]
+ ldp q16, q17, [x7, #16 * 16]
+ ldp q18, q19, [x7, #16 * 18]
+ ldp q20, q21, [x7, #16 * 20]
+ ldp q22, q23, [x7, #16 * 22]
+ ldp q24, q25, [x7, #16 * 24]
+ ldp q26, q27, [x7, #16 * 26]
+ ldp q28, q29, [x7, #16 * 28]
+ ldp q30, q31, [x7, #16 * 30]
+
+ // SME?
+ cbz x1, check_sve_in
+
+ adrp x7, svcr_in
+ ldr x7, [x7, :lo12:svcr_in]
+ // SVCR is 0 by default, avoid triggering SME if not in use
+ cbz x7, check_sve_in
+ msr S3_3_C4_C2_2, x7
+
+ // ZA?
+ tbz x7, #SVCR_ZA_SHIFT, check_sm_in
+ rdsvl 11, 1
+ mov w12, #0
+ ldr x6, =za_in
+1: _ldr_za 12, 6
+ add x6, x6, x11
+ add x12, x12, #1
+ cmp x11, x12
+ bne 1b
+
+ // ZT?
+ cbz x2, check_sm_in
+ adrp x6, zt_in
+ add x6, x6, :lo12:zt_in
+ _ldr_zt 6
+
+ // In streaming mode?
+check_sm_in:
+ tbz x7, #SVCR_SM_SHIFT, check_sve_in
+ mov x4, x3 // Load FFR if we have FA64
+ b load_sve
+
+ // SVE?
+check_sve_in:
+ cbz x0, wait_for_writes
+ mov x4, #1
+
+load_sve:
+ ldr x7, =z_in
+ ldr z0, [x7, #0, MUL VL]
+ ldr z1, [x7, #1, MUL VL]
+ ldr z2, [x7, #2, MUL VL]
+ ldr z3, [x7, #3, MUL VL]
+ ldr z4, [x7, #4, MUL VL]
+ ldr z5, [x7, #5, MUL VL]
+ ldr z6, [x7, #6, MUL VL]
+ ldr z7, [x7, #7, MUL VL]
+ ldr z8, [x7, #8, MUL VL]
+ ldr z9, [x7, #9, MUL VL]
+ ldr z10, [x7, #10, MUL VL]
+ ldr z11, [x7, #11, MUL VL]
+ ldr z12, [x7, #12, MUL VL]
+ ldr z13, [x7, #13, MUL VL]
+ ldr z14, [x7, #14, MUL VL]
+ ldr z15, [x7, #15, MUL VL]
+ ldr z16, [x7, #16, MUL VL]
+ ldr z17, [x7, #17, MUL VL]
+ ldr z18, [x7, #18, MUL VL]
+ ldr z19, [x7, #19, MUL VL]
+ ldr z20, [x7, #20, MUL VL]
+ ldr z21, [x7, #21, MUL VL]
+ ldr z22, [x7, #22, MUL VL]
+ ldr z23, [x7, #23, MUL VL]
+ ldr z24, [x7, #24, MUL VL]
+ ldr z25, [x7, #25, MUL VL]
+ ldr z26, [x7, #26, MUL VL]
+ ldr z27, [x7, #27, MUL VL]
+ ldr z28, [x7, #28, MUL VL]
+ ldr z29, [x7, #29, MUL VL]
+ ldr z30, [x7, #30, MUL VL]
+ ldr z31, [x7, #31, MUL VL]
+
+ // FFR is not present in base SME
+ cbz x4, 1f
+ ldr x7, =ffr_in
+ ldr p0, [x7]
+ ldr x7, [x7, #0]
+ cbz x7, 1f
+ wrffr p0.b
+1:
+
+ ldr x7, =p_in
+ ldr p0, [x7, #0, MUL VL]
+ ldr p1, [x7, #1, MUL VL]
+ ldr p2, [x7, #2, MUL VL]
+ ldr p3, [x7, #3, MUL VL]
+ ldr p4, [x7, #4, MUL VL]
+ ldr p5, [x7, #5, MUL VL]
+ ldr p6, [x7, #6, MUL VL]
+ ldr p7, [x7, #7, MUL VL]
+ ldr p8, [x7, #8, MUL VL]
+ ldr p9, [x7, #9, MUL VL]
+ ldr p10, [x7, #10, MUL VL]
+ ldr p11, [x7, #11, MUL VL]
+ ldr p12, [x7, #12, MUL VL]
+ ldr p13, [x7, #13, MUL VL]
+ ldr p14, [x7, #14, MUL VL]
+ ldr p15, [x7, #15, MUL VL]
+
+wait_for_writes:
+ // Wait for the parent
+ brk #0
+
+ // Save values
+ ldr x7, =v_out
+ stp q0, q1, [x7]
+ stp q2, q3, [x7, #16 * 2]
+ stp q4, q5, [x7, #16 * 4]
+ stp q6, q7, [x7, #16 * 6]
+ stp q8, q9, [x7, #16 * 8]
+ stp q10, q11, [x7, #16 * 10]
+ stp q12, q13, [x7, #16 * 12]
+ stp q14, q15, [x7, #16 * 14]
+ stp q16, q17, [x7, #16 * 16]
+ stp q18, q19, [x7, #16 * 18]
+ stp q20, q21, [x7, #16 * 20]
+ stp q22, q23, [x7, #16 * 22]
+ stp q24, q25, [x7, #16 * 24]
+ stp q26, q27, [x7, #16 * 26]
+ stp q28, q29, [x7, #16 * 28]
+ stp q30, q31, [x7, #16 * 30]
+
+ // SME?
+ cbz x1, check_sve_out
+
+ rdsvl 11, 1
+ adrp x6, sme_vl_out
+ str x11, [x6, :lo12:sme_vl_out]
+
+ mrs x7, S3_3_C4_C2_2
+ adrp x6, svcr_out
+ str x7, [x6, :lo12:svcr_out]
+
+ // ZA?
+ tbz x7, #SVCR_ZA_SHIFT, check_sm_out
+ mov w12, #0
+ ldr x6, =za_out
+1: _str_za 12, 6
+ add x6, x6, x11
+ add x12, x12, #1
+ cmp x11, x12
+ bne 1b
+
+ // ZT?
+ cbz x2, check_sm_out
+ adrp x6, zt_out
+ add x6, x6, :lo12:zt_out
+ _str_zt 6
+
+ // In streaming mode?
+check_sm_out:
+ tbz x7, #SVCR_SM_SHIFT, check_sve_out
+ mov x4, x3 // FFR?
+ b read_sve
+
+ // SVE?
+check_sve_out:
+ cbz x0, wait_for_reads
+ mov x4, #1
+
+ rdvl x7, #1
+ adrp x6, sve_vl_out
+ str x7, [x6, :lo12:sve_vl_out]
+
+read_sve:
+ ldr x7, =z_out
+ str z0, [x7, #0, MUL VL]
+ str z1, [x7, #1, MUL VL]
+ str z2, [x7, #2, MUL VL]
+ str z3, [x7, #3, MUL VL]
+ str z4, [x7, #4, MUL VL]
+ str z5, [x7, #5, MUL VL]
+ str z6, [x7, #6, MUL VL]
+ str z7, [x7, #7, MUL VL]
+ str z8, [x7, #8, MUL VL]
+ str z9, [x7, #9, MUL VL]
+ str z10, [x7, #10, MUL VL]
+ str z11, [x7, #11, MUL VL]
+ str z12, [x7, #12, MUL VL]
+ str z13, [x7, #13, MUL VL]
+ str z14, [x7, #14, MUL VL]
+ str z15, [x7, #15, MUL VL]
+ str z16, [x7, #16, MUL VL]
+ str z17, [x7, #17, MUL VL]
+ str z18, [x7, #18, MUL VL]
+ str z19, [x7, #19, MUL VL]
+ str z20, [x7, #20, MUL VL]
+ str z21, [x7, #21, MUL VL]
+ str z22, [x7, #22, MUL VL]
+ str z23, [x7, #23, MUL VL]
+ str z24, [x7, #24, MUL VL]
+ str z25, [x7, #25, MUL VL]
+ str z26, [x7, #26, MUL VL]
+ str z27, [x7, #27, MUL VL]
+ str z28, [x7, #28, MUL VL]
+ str z29, [x7, #29, MUL VL]
+ str z30, [x7, #30, MUL VL]
+ str z31, [x7, #31, MUL VL]
+
+ ldr x7, =p_out
+ str p0, [x7, #0, MUL VL]
+ str p1, [x7, #1, MUL VL]
+ str p2, [x7, #2, MUL VL]
+ str p3, [x7, #3, MUL VL]
+ str p4, [x7, #4, MUL VL]
+ str p5, [x7, #5, MUL VL]
+ str p6, [x7, #6, MUL VL]
+ str p7, [x7, #7, MUL VL]
+ str p8, [x7, #8, MUL VL]
+ str p9, [x7, #9, MUL VL]
+ str p10, [x7, #10, MUL VL]
+ str p11, [x7, #11, MUL VL]
+ str p12, [x7, #12, MUL VL]
+ str p13, [x7, #13, MUL VL]
+ str p14, [x7, #14, MUL VL]
+ str p15, [x7, #15, MUL VL]
+
+ // Only save FFR if it exists
+ cbz x4, wait_for_reads
+ ldr x7, =ffr_out
+ rdffr p0.b
+ str p0, [x7]
+
+wait_for_reads:
+ // Wait for the parent
+ brk #0
+
+ // Ensure we don't leave ourselves in streaming mode
+ cbz x1, out
+ msr S3_3_C4_C2_2, xzr
+
+out:
+ ldp x11, x12, [sp, #-0x10]
+ ret
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.c b/tools/testing/selftests/arm64/fp/fp-ptrace.c
new file mode 100644
index 000000000000..c7ceafe5f471
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.c
@@ -0,0 +1,1503 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+
+#include <linux/kernel.h>
+
+#include <asm/sigcontext.h>
+#include <asm/sve_context.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+#include "fp-ptrace.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+#ifndef NT_ARM_ZT
+#define NT_ARM_ZT 0x40d
+#endif
+
+#define ARCH_VQ_MAX 256
+
+/* VL 128..2048 in powers of 2 */
+#define MAX_NUM_VLS 5
+
+#define NUM_FPR 32
+__uint128_t v_in[NUM_FPR];
+__uint128_t v_expected[NUM_FPR];
+__uint128_t v_out[NUM_FPR];
+
+char z_in[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+char z_expected[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+char z_out[__SVE_ZREGS_SIZE(ARCH_VQ_MAX)];
+
+char p_in[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+char p_expected[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+char p_out[__SVE_PREGS_SIZE(ARCH_VQ_MAX)];
+
+char ffr_in[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+char ffr_expected[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+char ffr_out[__SVE_PREG_SIZE(ARCH_VQ_MAX)];
+
+char za_in[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+char za_expected[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+char za_out[ZA_SIG_REGS_SIZE(ARCH_VQ_MAX)];
+
+char zt_in[ZT_SIG_REG_BYTES];
+char zt_expected[ZT_SIG_REG_BYTES];
+char zt_out[ZT_SIG_REG_BYTES];
+
+uint64_t sve_vl_out;
+uint64_t sme_vl_out;
+uint64_t svcr_in, svcr_expected, svcr_out;
+
+void load_and_save(int sve, int sme, int sme2, int fa64);
+
+static bool got_alarm;
+
+static void handle_alarm(int sig, siginfo_t *info, void *context)
+{
+ got_alarm = true;
+}
+
+#ifdef CONFIG_CPU_BIG_ENDIAN
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+ u64 a = swab64(x);
+ u64 b = swab64(x >> 64);
+
+ return ((__uint128_t)a << 64) | b;
+}
+#else
+static __uint128_t arm64_cpu_to_le128(__uint128_t x)
+{
+ return x;
+}
+#endif
+
+#define arm64_le128_to_cpu(x) arm64_cpu_to_le128(x)
+
+static bool sve_supported(void)
+{
+ return getauxval(AT_HWCAP) & HWCAP_SVE;
+}
+
+static bool sme_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME;
+}
+
+static bool sme2_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME2;
+}
+
+static bool fa64_supported(void)
+{
+ return getauxval(AT_HWCAP2) & HWCAP2_SME_FA64;
+}
+
+static bool compare_buffer(const char *name, void *out,
+ void *expected, size_t size)
+{
+ void *tmp;
+
+ if (memcmp(out, expected, size) == 0)
+ return true;
+
+ ksft_print_msg("Mismatch in %s\n", name);
+
+ /* Did we just get zeros back? */
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_print_msg("OOM allocating %lu bytes for %s\n",
+ size, name);
+ ksft_exit_fail();
+ }
+ memset(tmp, 0, size);
+
+ if (memcmp(out, tmp, size) == 0)
+ ksft_print_msg("%s is zero\n", name);
+
+ free(tmp);
+
+ return false;
+}
+
+struct test_config {
+ int sve_vl_in;
+ int sve_vl_expected;
+ int sme_vl_in;
+ int sme_vl_expected;
+ int svcr_in;
+ int svcr_expected;
+};
+
+struct test_definition {
+ const char *name;
+ bool sve_vl_change;
+ bool (*supported)(struct test_config *config);
+ void (*set_expected_values)(struct test_config *config);
+ void (*modify_values)(pid_t child, struct test_config *test_config);
+};
+
+static int vl_in(struct test_config *config)
+{
+ int vl;
+
+ if (config->svcr_in & SVCR_SM)
+ vl = config->sme_vl_in;
+ else
+ vl = config->sve_vl_in;
+
+ return vl;
+}
+
+static int vl_expected(struct test_config *config)
+{
+ int vl;
+
+ if (config->svcr_expected & SVCR_SM)
+ vl = config->sme_vl_expected;
+ else
+ vl = config->sve_vl_expected;
+
+ return vl;
+}
+
+static void run_child(struct test_config *config)
+{
+ int ret;
+
+ /* Let the parent attach to us */
+ ret = ptrace(PTRACE_TRACEME, 0, 0, 0);
+ if (ret < 0)
+ ksft_exit_fail_msg("PTRACE_TRACEME failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* VL setup */
+ if (sve_supported()) {
+ ret = prctl(PR_SVE_SET_VL, config->sve_vl_in);
+ if (ret != config->sve_vl_in) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ config->sve_vl_in, ret);
+ }
+ }
+
+ if (sme_supported()) {
+ ret = prctl(PR_SME_SET_VL, config->sme_vl_in);
+ if (ret != config->sme_vl_in) {
+ ksft_print_msg("Failed to set SME VL %d: %d\n",
+ config->sme_vl_in, ret);
+ }
+ }
+
+ /* Load values and wait for the parent */
+ load_and_save(sve_supported(), sme_supported(),
+ sme2_supported(), fa64_supported());
+
+ exit(0);
+}
+
+static void read_one_child_regs(pid_t child, char *name,
+ struct iovec *iov_parent,
+ struct iovec *iov_child)
+{
+ int len = iov_parent->iov_len;
+ int ret;
+
+ ret = process_vm_readv(child, iov_parent, 1, iov_child, 1, 0);
+ if (ret == -1)
+ ksft_print_msg("%s read failed: %s (%d)\n",
+ name, strerror(errno), errno);
+ else if (ret != len)
+ ksft_print_msg("Short read of %s: %d\n", name, ret);
+}
+
+static void read_child_regs(pid_t child)
+{
+ struct iovec iov_parent, iov_child;
+
+ /*
+ * Since the child fork()ed from us the buffer addresses are
+ * the same in parent and child.
+ */
+ iov_parent.iov_base = &v_out;
+ iov_parent.iov_len = sizeof(v_out);
+ iov_child.iov_base = &v_out;
+ iov_child.iov_len = sizeof(v_out);
+ read_one_child_regs(child, "FPSIMD", &iov_parent, &iov_child);
+
+ if (sve_supported() || sme_supported()) {
+ iov_parent.iov_base = &sve_vl_out;
+ iov_parent.iov_len = sizeof(sve_vl_out);
+ iov_child.iov_base = &sve_vl_out;
+ iov_child.iov_len = sizeof(sve_vl_out);
+ read_one_child_regs(child, "SVE VL", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &z_out;
+ iov_parent.iov_len = sizeof(z_out);
+ iov_child.iov_base = &z_out;
+ iov_child.iov_len = sizeof(z_out);
+ read_one_child_regs(child, "Z", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &p_out;
+ iov_parent.iov_len = sizeof(p_out);
+ iov_child.iov_base = &p_out;
+ iov_child.iov_len = sizeof(p_out);
+ read_one_child_regs(child, "P", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &ffr_out;
+ iov_parent.iov_len = sizeof(ffr_out);
+ iov_child.iov_base = &ffr_out;
+ iov_child.iov_len = sizeof(ffr_out);
+ read_one_child_regs(child, "FFR", &iov_parent, &iov_child);
+ }
+
+ if (sme_supported()) {
+ iov_parent.iov_base = &sme_vl_out;
+ iov_parent.iov_len = sizeof(sme_vl_out);
+ iov_child.iov_base = &sme_vl_out;
+ iov_child.iov_len = sizeof(sme_vl_out);
+ read_one_child_regs(child, "SME VL", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &svcr_out;
+ iov_parent.iov_len = sizeof(svcr_out);
+ iov_child.iov_base = &svcr_out;
+ iov_child.iov_len = sizeof(svcr_out);
+ read_one_child_regs(child, "SVCR", &iov_parent, &iov_child);
+
+ iov_parent.iov_base = &za_out;
+ iov_parent.iov_len = sizeof(za_out);
+ iov_child.iov_base = &za_out;
+ iov_child.iov_len = sizeof(za_out);
+ read_one_child_regs(child, "ZA", &iov_parent, &iov_child);
+ }
+
+ if (sme2_supported()) {
+ iov_parent.iov_base = &zt_out;
+ iov_parent.iov_len = sizeof(zt_out);
+ iov_child.iov_base = &zt_out;
+ iov_child.iov_len = sizeof(zt_out);
+ read_one_child_regs(child, "ZT", &iov_parent, &iov_child);
+ }
+}
+
+static bool continue_breakpoint(pid_t child,
+ enum __ptrace_request restart_type)
+{
+ struct user_pt_regs pt_regs;
+ struct iovec iov;
+ int ret;
+
+ /* Get PC */
+ iov.iov_base = &pt_regs;
+ iov.iov_len = sizeof(pt_regs);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PRSTATUS, &iov);
+ if (ret < 0) {
+ ksft_print_msg("Failed to get PC: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ /* Skip over the BRK */
+ pt_regs.pc += 4;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PRSTATUS, &iov);
+ if (ret < 0) {
+ ksft_print_msg("Failed to skip BRK: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ /* Restart */
+ ret = ptrace(restart_type, child, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("Failed to restart child: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return true;
+}
+
+static bool check_ptrace_values_sve(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sve_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sve_vl_in);
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte SVE buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_SVE, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial SVE: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ sve = iov.iov_base;
+
+ if (sve->vl != config->sve_vl_in) {
+ ksft_print_msg("Mismatch in initial SVE VL: %d != %d\n",
+ sve->vl, config->sve_vl_in);
+ pass = false;
+ }
+
+ /* If we are in streaming mode we should just read FPSIMD */
+ if ((config->svcr_in & SVCR_SM) && (sve->flags & SVE_PT_REGS_SVE)) {
+ ksft_print_msg("NT_ARM_SVE reports SVE with PSTATE.SM\n");
+ pass = false;
+ }
+
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
+
+ /* The registers might be in completely different formats! */
+ if (sve->flags & SVE_PT_REGS_SVE) {
+ if (!compare_buffer("initial SVE Z",
+ iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_in, SVE_PT_SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SVE P",
+ iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_in, SVE_PT_SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SVE FFR",
+ iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_in, SVE_PT_SVE_PREG_SIZE(vq)))
+ pass = false;
+ } else {
+ fpsimd = iov.iov_base + SVE_PT_FPSIMD_OFFSET;
+ if (!compare_buffer("initial V via SVE", &fpsimd->vregs[0],
+ v_in, sizeof(v_in)))
+ pass = false;
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_ssve(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sme_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte SSVE buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_SSVE, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial SSVE: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ sve = iov.iov_base;
+
+ if (sve->vl != config->sme_vl_in) {
+ ksft_print_msg("Mismatch in initial SSVE VL: %d != %d\n",
+ sve->vl, config->sme_vl_in);
+ pass = false;
+ }
+
+ if ((config->svcr_in & SVCR_SM) && !(sve->flags & SVE_PT_REGS_SVE)) {
+ ksft_print_msg("NT_ARM_SSVE reports FPSIMD with PSTATE.SM\n");
+ pass = false;
+ }
+
+ if (sve->size != SVE_PT_SIZE(vq, sve->flags)) {
+ ksft_print_msg("Mismatch in SSVE header size: %d != %lu\n",
+ sve->size, SVE_PT_SIZE(vq, sve->flags));
+ pass = false;
+ }
+
+ /* The registers might be in completely different formats! */
+ if (sve->flags & SVE_PT_REGS_SVE) {
+ if (!compare_buffer("initial SSVE Z",
+ iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_in, SVE_PT_SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SSVE P",
+ iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_in, SVE_PT_SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("initial SSVE FFR",
+ iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_in, SVE_PT_SVE_PREG_SIZE(vq)))
+ pass = false;
+ } else {
+ fpsimd = iov.iov_base + SVE_PT_FPSIMD_OFFSET;
+ if (!compare_buffer("initial V via SSVE",
+ &fpsimd->vregs[0], v_in, sizeof(v_in)))
+ pass = false;
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_za(pid_t child, struct test_config *config)
+{
+ struct user_za_header *za;
+ struct iovec iov;
+ int ret, vq;
+ bool pass = true;
+
+ if (!sme_supported())
+ return true;
+
+ vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ iov.iov_len = ZA_SIG_CONTEXT_SIZE(vq);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("OOM allocating %lu byte ZA buffer\n",
+ iov.iov_len);
+ return false;
+ }
+
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_ZA, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial ZA: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ goto out;
+ }
+
+ za = iov.iov_base;
+
+ if (za->vl != config->sme_vl_in) {
+ ksft_print_msg("Mismatch in initial SME VL: %d != %d\n",
+ za->vl, config->sme_vl_in);
+ pass = false;
+ }
+
+ /* If PSTATE.ZA is not set we should just read the header */
+ if (config->svcr_in & SVCR_ZA) {
+ if (za->size != ZA_PT_SIZE(vq)) {
+ ksft_print_msg("Unexpected ZA ptrace read size: %d != %lu\n",
+ za->size, ZA_PT_SIZE(vq));
+ pass = false;
+ }
+
+ if (!compare_buffer("initial ZA",
+ iov.iov_base + ZA_PT_ZA_OFFSET,
+ za_in, ZA_PT_ZA_SIZE(vq)))
+ pass = false;
+ } else {
+ if (za->size != sizeof(*za)) {
+ ksft_print_msg("Unexpected ZA ptrace read size: %d != %lu\n",
+ za->size, sizeof(*za));
+ pass = false;
+ }
+ }
+
+out:
+ free(iov.iov_base);
+ return pass;
+}
+
+static bool check_ptrace_values_zt(pid_t child, struct test_config *config)
+{
+ uint8_t buf[512];
+ struct iovec iov;
+ int ret;
+
+ if (!sme2_supported())
+ return true;
+
+ iov.iov_base = &buf;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ ret = ptrace(PTRACE_GETREGSET, child, NT_ARM_ZT, &iov);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read initial ZT: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ return compare_buffer("initial ZT", buf, zt_in, ZT_SIG_REG_BYTES);
+}
+
+
+static bool check_ptrace_values(pid_t child, struct test_config *config)
+{
+ bool pass = true;
+ struct user_fpsimd_state fpsimd;
+ struct iovec iov;
+ int ret;
+
+ iov.iov_base = &fpsimd;
+ iov.iov_len = sizeof(fpsimd);
+ ret = ptrace(PTRACE_GETREGSET, child, NT_PRFPREG, &iov);
+ if (ret == 0) {
+ if (!compare_buffer("initial V", &fpsimd.vregs, v_in,
+ sizeof(v_in))) {
+ pass = false;
+ }
+ } else {
+ ksft_print_msg("Failed to read initial V: %s (%d)\n",
+ strerror(errno), errno);
+ pass = false;
+ }
+
+ if (!check_ptrace_values_sve(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_ssve(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_za(child, config))
+ pass = false;
+
+ if (!check_ptrace_values_zt(child, config))
+ pass = false;
+
+ return pass;
+}
+
+static bool run_parent(pid_t child, struct test_definition *test,
+ struct test_config *config)
+{
+ int wait_status, ret;
+ pid_t pid;
+ bool pass;
+
+ /* Initial attach */
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+ if (WIFEXITED(wait_status)) {
+ ksft_print_msg("Child exited loading values with status %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d loading values\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ /* Read initial values via ptrace */
+ pass = check_ptrace_values(child, config);
+
+ /* Do whatever writes we want to do */
+ if (test->modify_values)
+ test->modify_values(child, config);
+
+ if (!continue_breakpoint(child, PTRACE_CONT))
+ goto cleanup;
+
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+ if (WIFEXITED(wait_status)) {
+ ksft_print_msg("Child exited saving values with status %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d saving values\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ /* See what happened as a result */
+ read_child_regs(child);
+
+ if (!continue_breakpoint(child, PTRACE_DETACH))
+ goto cleanup;
+
+ /* The child should exit cleanly */
+ got_alarm = false;
+ alarm(1);
+ while (1) {
+ if (got_alarm) {
+ ksft_print_msg("Wait for child timed out\n");
+ goto cleanup;
+ }
+
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+ alarm(0);
+
+ if (got_alarm) {
+ ksft_print_msg("Timed out waiting for child\n");
+ pass = false;
+ goto cleanup;
+ }
+
+ if (pid == child && WIFSIGNALED(wait_status)) {
+ ksft_print_msg("Child died from signal %d cleaning up\n",
+ WTERMSIG(wait_status));
+ pass = false;
+ goto out;
+ }
+
+ if (pid == child && WIFEXITED(wait_status)) {
+ if (WEXITSTATUS(wait_status) != 0) {
+ ksft_print_msg("Child exited with error %d\n",
+ WEXITSTATUS(wait_status));
+ pass = false;
+ }
+ } else {
+ ksft_print_msg("Child did not exit cleanly\n");
+ pass = false;
+ goto cleanup;
+ }
+
+ goto out;
+
+cleanup:
+ ret = kill(child, SIGKILL);
+ if (ret != 0) {
+ ksft_print_msg("kill() failed: %s (%d)\n",
+ strerror(errno), errno);
+ return false;
+ }
+
+ while (1) {
+ pid = waitpid(child, &wait_status, 0);
+ if (pid < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("waitpid() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ if (pid == child)
+ break;
+ }
+
+out:
+ return pass;
+}
+
+static void fill_random(void *buf, size_t size)
+{
+ int i;
+ uint32_t *lbuf = buf;
+
+ /* random() returns a 32 bit number regardless of the size of long */
+ for (i = 0; i < size / sizeof(uint32_t); i++)
+ lbuf[i] = random();
+}
+
+static void fill_random_ffr(void *buf, size_t vq)
+{
+ uint8_t *lbuf = buf;
+ int bits, i;
+
+ /*
+ * Only values with a continuous set of 0..n bits set are
+ * valid for FFR, set all bits then clear a random number of
+ * high bits.
+ */
+ memset(buf, 0, __SVE_FFR_SIZE(vq));
+
+ bits = random() % (__SVE_FFR_SIZE(vq) * 8);
+ for (i = 0; i < bits / 8; i++)
+ lbuf[i] = 0xff;
+ if (bits / 8 != __SVE_FFR_SIZE(vq))
+ lbuf[i] = (1 << (bits % 8)) - 1;
+}
+
+static void fpsimd_to_sve(__uint128_t *v, char *z, int vl)
+{
+ int vq = __sve_vq_from_vl(vl);
+ int i;
+ __uint128_t *p;
+
+ if (!vl)
+ return;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ p = (__uint128_t *)&z[__SVE_ZREG_OFFSET(vq, i)];
+ *p = arm64_cpu_to_le128(v[i]);
+ }
+}
+
+static void set_initial_values(struct test_config *config)
+{
+ int vq = __sve_vq_from_vl(vl_in(config));
+ int sme_vq = __sve_vq_from_vl(config->sme_vl_in);
+
+ svcr_in = config->svcr_in;
+ svcr_expected = config->svcr_expected;
+ svcr_out = 0;
+
+ fill_random(&v_in, sizeof(v_in));
+ memcpy(v_expected, v_in, sizeof(v_in));
+ memset(v_out, 0, sizeof(v_out));
+
+ /* Changes will be handled in the test case */
+ if (sve_supported() || (config->svcr_in & SVCR_SM)) {
+ /* The low 128 bits of Z are shared with the V registers */
+ fill_random(&z_in, __SVE_ZREGS_SIZE(vq));
+ fpsimd_to_sve(v_in, z_in, vl_in(config));
+ memcpy(z_expected, z_in, __SVE_ZREGS_SIZE(vq));
+ memset(z_out, 0, sizeof(z_out));
+
+ fill_random(&p_in, __SVE_PREGS_SIZE(vq));
+ memcpy(p_expected, p_in, __SVE_PREGS_SIZE(vq));
+ memset(p_out, 0, sizeof(p_out));
+
+ if ((config->svcr_in & SVCR_SM) && !fa64_supported())
+ memset(ffr_in, 0, __SVE_PREG_SIZE(vq));
+ else
+ fill_random_ffr(&ffr_in, vq);
+ memcpy(ffr_expected, ffr_in, __SVE_PREG_SIZE(vq));
+ memset(ffr_out, 0, __SVE_PREG_SIZE(vq));
+ }
+
+ if (config->svcr_in & SVCR_ZA)
+ fill_random(za_in, ZA_SIG_REGS_SIZE(sme_vq));
+ else
+ memset(za_in, 0, ZA_SIG_REGS_SIZE(sme_vq));
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(za_expected, za_in, ZA_SIG_REGS_SIZE(sme_vq));
+ else
+ memset(za_expected, 0, ZA_SIG_REGS_SIZE(sme_vq));
+ if (sme_supported())
+ memset(za_out, 0, sizeof(za_out));
+
+ if (sme2_supported()) {
+ if (config->svcr_in & SVCR_ZA)
+ fill_random(zt_in, ZT_SIG_REG_BYTES);
+ else
+ memset(zt_in, 0, ZT_SIG_REG_BYTES);
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(zt_expected, zt_in, ZT_SIG_REG_BYTES);
+ else
+ memset(zt_expected, 0, ZT_SIG_REG_BYTES);
+ memset(zt_out, 0, sizeof(zt_out));
+ }
+}
+
+static bool check_memory_values(struct test_config *config)
+{
+ bool pass = true;
+ int vq, sme_vq;
+
+ if (!compare_buffer("saved V", v_out, v_expected, sizeof(v_out)))
+ pass = false;
+
+ vq = __sve_vq_from_vl(vl_expected(config));
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (svcr_out != svcr_expected) {
+ ksft_print_msg("Mismatch in saved SVCR %lx != %lx\n",
+ svcr_out, svcr_expected);
+ pass = false;
+ }
+
+ if (sve_vl_out != config->sve_vl_expected) {
+ ksft_print_msg("Mismatch in SVE VL: %ld != %d\n",
+ sve_vl_out, config->sve_vl_expected);
+ pass = false;
+ }
+
+ if (sme_vl_out != config->sme_vl_expected) {
+ ksft_print_msg("Mismatch in SME VL: %ld != %d\n",
+ sme_vl_out, config->sme_vl_expected);
+ pass = false;
+ }
+
+ if (!compare_buffer("saved Z", z_out, z_expected,
+ __SVE_ZREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved P", p_out, p_expected,
+ __SVE_PREGS_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved FFR", ffr_out, ffr_expected,
+ __SVE_PREG_SIZE(vq)))
+ pass = false;
+
+ if (!compare_buffer("saved ZA", za_out, za_expected,
+ ZA_PT_ZA_SIZE(sme_vq)))
+ pass = false;
+
+ if (!compare_buffer("saved ZT", zt_out, zt_expected, ZT_SIG_REG_BYTES))
+ pass = false;
+
+ return pass;
+}
+
+static bool sve_sme_same(struct test_config *config)
+{
+ if (config->sve_vl_in != config->sve_vl_expected)
+ return false;
+
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+
+ if (config->svcr_in != config->svcr_expected)
+ return false;
+
+ return true;
+}
+
+static bool sve_write_supported(struct test_config *config)
+{
+ if (!sve_supported() && !sme_supported())
+ return false;
+
+ if ((config->svcr_in & SVCR_ZA) != (config->svcr_expected & SVCR_ZA))
+ return false;
+
+ if (config->svcr_expected & SVCR_SM) {
+ if (config->sve_vl_in != config->sve_vl_expected) {
+ return false;
+ }
+
+ /* Changing the SME VL disables ZA */
+ if ((config->svcr_expected & SVCR_ZA) &&
+ (config->sme_vl_in != config->sme_vl_expected)) {
+ return false;
+ }
+ } else {
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static void fpsimd_write_expected(struct test_config *config)
+{
+ int vl;
+
+ fill_random(&v_expected, sizeof(v_expected));
+
+ /* The SVE registers are flushed by a FPSIMD write */
+ vl = vl_expected(config);
+
+ memset(z_expected, 0, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
+ memset(p_expected, 0, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(__sve_vq_from_vl(vl)));
+
+ fpsimd_to_sve(v_expected, z_expected, vl);
+}
+
+static void fpsimd_write(pid_t child, struct test_config *test_config)
+{
+ struct user_fpsimd_state fpsimd;
+ struct iovec iov;
+ int ret;
+
+ memset(&fpsimd, 0, sizeof(fpsimd));
+ memcpy(&fpsimd.vregs, v_expected, sizeof(v_expected));
+
+ iov.iov_base = &fpsimd;
+ iov.iov_len = sizeof(fpsimd);
+ ret = ptrace(PTRACE_SETREGSET, child, NT_PRFPREG, &iov);
+ if (ret == -1)
+ ksft_print_msg("FPSIMD set failed: (%s) %d\n",
+ strerror(errno), errno);
+}
+
+static void sve_write_expected(struct test_config *config)
+{
+ int vl = vl_expected(config);
+ int sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ fill_random(z_expected, __SVE_ZREGS_SIZE(__sve_vq_from_vl(vl)));
+ fill_random(p_expected, __SVE_PREGS_SIZE(__sve_vq_from_vl(vl)));
+
+ if ((svcr_expected & SVCR_SM) && !fa64_supported())
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(sme_vq));
+ else
+ fill_random_ffr(ffr_expected, __sve_vq_from_vl(vl));
+
+ /* Share the low bits of Z with V */
+ fill_random(&v_expected, sizeof(v_expected));
+ fpsimd_to_sve(v_expected, z_expected, vl);
+
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+}
+
+static void sve_write(pid_t child, struct test_config *config)
+{
+ struct user_sve_header *sve;
+ struct iovec iov;
+ int ret, vl, vq, regset;
+
+ vl = vl_expected(config);
+ vq = __sve_vq_from_vl(vl);
+
+ iov.iov_len = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte SVE write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ sve = iov.iov_base;
+ sve->size = iov.iov_len;
+ sve->flags = SVE_PT_REGS_SVE;
+ sve->vl = vl;
+
+ memcpy(iov.iov_base + SVE_PT_SVE_ZREG_OFFSET(vq, 0),
+ z_expected, SVE_PT_SVE_ZREGS_SIZE(vq));
+ memcpy(iov.iov_base + SVE_PT_SVE_PREG_OFFSET(vq, 0),
+ p_expected, SVE_PT_SVE_PREGS_SIZE(vq));
+ memcpy(iov.iov_base + SVE_PT_SVE_FFR_OFFSET(vq),
+ ffr_expected, SVE_PT_SVE_PREG_SIZE(vq));
+
+ if (svcr_expected & SVCR_SM)
+ regset = NT_ARM_SSVE;
+ else
+ regset = NT_ARM_SVE;
+
+ ret = ptrace(PTRACE_SETREGSET, child, regset, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write SVE: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static bool za_write_supported(struct test_config *config)
+{
+ if (config->svcr_expected & SVCR_SM) {
+ if (!(config->svcr_in & SVCR_SM))
+ return false;
+
+ /* Changing the SME VL exits streaming mode */
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ return false;
+ }
+ }
+
+ /* Can't disable SM outside a VL change */
+ if ((config->svcr_in & SVCR_SM) &&
+ !(config->svcr_expected & SVCR_SM))
+ return false;
+
+ return true;
+}
+
+static void za_write_expected(struct test_config *config)
+{
+ int sme_vq, sve_vq;
+
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA) {
+ fill_random(za_expected, ZA_PT_ZA_SIZE(sme_vq));
+ } else {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+
+ /* Changing the SME VL flushes ZT, SVE state and exits SM */
+ if (config->sme_vl_in != config->sme_vl_expected) {
+ svcr_expected &= ~SVCR_SM;
+
+ sve_vq = __sve_vq_from_vl(vl_expected(config));
+ memset(z_expected, 0, __SVE_ZREGS_SIZE(sve_vq));
+ memset(p_expected, 0, __SVE_PREGS_SIZE(sve_vq));
+ memset(ffr_expected, 0, __SVE_PREG_SIZE(sve_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+
+ fpsimd_to_sve(v_expected, z_expected, vl_expected(config));
+ }
+}
+
+static void za_write(pid_t child, struct test_config *config)
+{
+ struct user_za_header *za;
+ struct iovec iov;
+ int ret, vq;
+
+ vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA)
+ iov.iov_len = ZA_PT_SIZE(vq);
+ else
+ iov.iov_len = sizeof(*za);
+ iov.iov_base = malloc(iov.iov_len);
+ if (!iov.iov_base) {
+ ksft_print_msg("Failed allocating %lu byte ZA write buffer\n",
+ iov.iov_len);
+ return;
+ }
+ memset(iov.iov_base, 0, iov.iov_len);
+
+ za = iov.iov_base;
+ za->size = iov.iov_len;
+ za->vl = config->sme_vl_expected;
+ if (config->svcr_expected & SVCR_ZA)
+ memcpy(iov.iov_base + ZA_PT_ZA_OFFSET, za_expected,
+ ZA_PT_ZA_SIZE(vq));
+
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_ZA, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write ZA: %s (%d)\n",
+ strerror(errno), errno);
+
+ free(iov.iov_base);
+}
+
+static bool zt_write_supported(struct test_config *config)
+{
+ if (!sme2_supported())
+ return false;
+ if (config->sme_vl_in != config->sme_vl_expected)
+ return false;
+ if (!(config->svcr_expected & SVCR_ZA))
+ return false;
+ if ((config->svcr_in & SVCR_SM) != (config->svcr_expected & SVCR_SM))
+ return false;
+
+ return true;
+}
+
+static void zt_write_expected(struct test_config *config)
+{
+ int sme_vq;
+
+ sme_vq = __sve_vq_from_vl(config->sme_vl_expected);
+
+ if (config->svcr_expected & SVCR_ZA) {
+ fill_random(zt_expected, sizeof(zt_expected));
+ } else {
+ memset(za_expected, 0, ZA_PT_ZA_SIZE(sme_vq));
+ memset(zt_expected, 0, sizeof(zt_expected));
+ }
+}
+
+static void zt_write(pid_t child, struct test_config *config)
+{
+ struct iovec iov;
+ int ret;
+
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ iov.iov_base = zt_expected;
+ ret = ptrace(PTRACE_SETREGSET, child, NT_ARM_ZT, &iov);
+ if (ret != 0)
+ ksft_print_msg("Failed to write ZT: %s (%d)\n",
+ strerror(errno), errno);
+}
+
+/* Actually run a test */
+static void run_test(struct test_definition *test, struct test_config *config)
+{
+ pid_t child;
+ char name[1024];
+ bool pass;
+
+ if (sve_supported() && sme_supported())
+ snprintf(name, sizeof(name), "%s, SVE %d->%d, SME %d/%x->%d/%x",
+ test->name,
+ config->sve_vl_in, config->sve_vl_expected,
+ config->sme_vl_in, config->svcr_in,
+ config->sme_vl_expected, config->svcr_expected);
+ else if (sve_supported())
+ snprintf(name, sizeof(name), "%s, SVE %d->%d", test->name,
+ config->sve_vl_in, config->sve_vl_expected);
+ else if (sme_supported())
+ snprintf(name, sizeof(name), "%s, SME %d/%x->%d/%x",
+ test->name,
+ config->sme_vl_in, config->svcr_in,
+ config->sme_vl_expected, config->svcr_expected);
+ else
+ snprintf(name, sizeof(name), "%s", test->name);
+
+ if (test->supported && !test->supported(config)) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ set_initial_values(config);
+
+ if (test->set_expected_values)
+ test->set_expected_values(config);
+
+ child = fork();
+ if (child < 0)
+ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+ strerror(errno), errno);
+ /* run_child() never returns */
+ if (child == 0)
+ run_child(config);
+
+ pass = run_parent(child, test, config);
+ if (!check_memory_values(config))
+ pass = false;
+
+ ksft_test_result(pass, "%s\n", name);
+}
+
+static void run_tests(struct test_definition defs[], int count,
+ struct test_config *config)
+{
+ int i;
+
+ for (i = 0; i < count; i++)
+ run_test(&defs[i], config);
+}
+
+static struct test_definition base_test_defs[] = {
+ {
+ .name = "No writes",
+ .supported = sve_sme_same,
+ },
+ {
+ .name = "FPSIMD write",
+ .supported = sve_sme_same,
+ .set_expected_values = fpsimd_write_expected,
+ .modify_values = fpsimd_write,
+ },
+};
+
+static struct test_definition sve_test_defs[] = {
+ {
+ .name = "SVE write",
+ .supported = sve_write_supported,
+ .set_expected_values = sve_write_expected,
+ .modify_values = sve_write,
+ },
+};
+
+static struct test_definition za_test_defs[] = {
+ {
+ .name = "ZA write",
+ .supported = za_write_supported,
+ .set_expected_values = za_write_expected,
+ .modify_values = za_write,
+ },
+};
+
+static struct test_definition zt_test_defs[] = {
+ {
+ .name = "ZT write",
+ .supported = zt_write_supported,
+ .set_expected_values = zt_write_expected,
+ .modify_values = zt_write,
+ },
+};
+
+static int sve_vls[MAX_NUM_VLS], sme_vls[MAX_NUM_VLS];
+static int sve_vl_count, sme_vl_count;
+
+static void probe_vls(const char *name, int vls[], int *vl_count, int set_vl)
+{
+ unsigned int vq;
+ int vl;
+
+ *vl_count = 0;
+
+ for (vq = ARCH_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(set_vl, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (*vl_count && (vl == vls[*vl_count - 1]))
+ break;
+
+ vq = sve_vq_from_vl(vl);
+
+ vls[*vl_count] = vl;
+ *vl_count += 1;
+ }
+
+ if (*vl_count > 2) {
+ /* Just use the minimum and maximum */
+ vls[1] = vls[*vl_count - 1];
+ ksft_print_msg("%d %s VLs, using %d and %d\n",
+ *vl_count, name, vls[0], vls[1]);
+ *vl_count = 2;
+ } else {
+ ksft_print_msg("%d %s VLs\n", *vl_count, name);
+ }
+}
+
+static struct {
+ int svcr_in, svcr_expected;
+} svcr_combinations[] = {
+ { .svcr_in = 0, .svcr_expected = 0, },
+ { .svcr_in = 0, .svcr_expected = SVCR_SM, },
+ { .svcr_in = 0, .svcr_expected = SVCR_ZA, },
+ /* Can't enable both SM and ZA with a single ptrace write */
+
+ { .svcr_in = SVCR_SM, .svcr_expected = 0, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_SM, .svcr_expected = SVCR_SM | SVCR_ZA, },
+
+ { .svcr_in = SVCR_ZA, .svcr_expected = 0, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_ZA, .svcr_expected = SVCR_SM | SVCR_ZA, },
+
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = 0, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_SM, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_ZA, },
+ { .svcr_in = SVCR_SM | SVCR_ZA, .svcr_expected = SVCR_SM | SVCR_ZA, },
+};
+
+static void run_sve_tests(void)
+{
+ struct test_config test_config;
+ int i, j;
+
+ if (!sve_supported())
+ return;
+
+ test_config.sme_vl_in = sme_vls[0];
+ test_config.sme_vl_expected = sme_vls[0];
+ test_config.svcr_in = 0;
+ test_config.svcr_expected = 0;
+
+ for (i = 0; i < sve_vl_count; i++) {
+ test_config.sve_vl_in = sve_vls[i];
+
+ for (j = 0; j < sve_vl_count; j++) {
+ test_config.sve_vl_expected = sve_vls[j];
+
+ run_tests(base_test_defs,
+ ARRAY_SIZE(base_test_defs),
+ &test_config);
+ if (sve_supported())
+ run_tests(sve_test_defs,
+ ARRAY_SIZE(sve_test_defs),
+ &test_config);
+ }
+ }
+
+}
+
+static void run_sme_tests(void)
+{
+ struct test_config test_config;
+ int i, j, k;
+
+ if (!sme_supported())
+ return;
+
+ test_config.sve_vl_in = sve_vls[0];
+ test_config.sve_vl_expected = sve_vls[0];
+
+ /*
+ * Every SME VL/SVCR combination
+ */
+ for (i = 0; i < sme_vl_count; i++) {
+ test_config.sme_vl_in = sme_vls[i];
+
+ for (j = 0; j < sme_vl_count; j++) {
+ test_config.sme_vl_expected = sme_vls[j];
+
+ for (k = 0; k < ARRAY_SIZE(svcr_combinations); k++) {
+ test_config.svcr_in = svcr_combinations[k].svcr_in;
+ test_config.svcr_expected = svcr_combinations[k].svcr_expected;
+
+ run_tests(base_test_defs,
+ ARRAY_SIZE(base_test_defs),
+ &test_config);
+ run_tests(sve_test_defs,
+ ARRAY_SIZE(sve_test_defs),
+ &test_config);
+ run_tests(za_test_defs,
+ ARRAY_SIZE(za_test_defs),
+ &test_config);
+
+ if (sme2_supported())
+ run_tests(zt_test_defs,
+ ARRAY_SIZE(zt_test_defs),
+ &test_config);
+ }
+ }
+ }
+}
+
+int main(void)
+{
+ struct test_config test_config;
+ struct sigaction sa;
+ int tests, ret, tmp;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (sve_supported()) {
+ probe_vls("SVE", sve_vls, &sve_vl_count, PR_SVE_SET_VL);
+
+ tests = ARRAY_SIZE(base_test_defs) +
+ ARRAY_SIZE(sve_test_defs);
+ tests *= sve_vl_count * sve_vl_count;
+ } else {
+ /* Only run the FPSIMD tests */
+ sve_vl_count = 1;
+ tests = ARRAY_SIZE(base_test_defs);
+ }
+
+ if (sme_supported()) {
+ probe_vls("SME", sme_vls, &sme_vl_count, PR_SME_SET_VL);
+
+ tmp = ARRAY_SIZE(base_test_defs) + ARRAY_SIZE(sve_test_defs)
+ + ARRAY_SIZE(za_test_defs);
+
+ if (sme2_supported())
+ tmp += ARRAY_SIZE(zt_test_defs);
+
+ tmp *= sme_vl_count * sme_vl_count;
+ tmp *= ARRAY_SIZE(svcr_combinations);
+ tests += tmp;
+ } else {
+ sme_vl_count = 1;
+ }
+
+ if (sme2_supported())
+ ksft_print_msg("SME2 supported\n");
+
+ if (fa64_supported())
+ ksft_print_msg("FA64 supported\n");
+
+ ksft_set_plan(tests);
+
+ /* Get signal handers ready before we start any children */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_alarm;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGALRM, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGALRM handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ /*
+ * Run the test set if there is no SVE or SME, with those we
+ * have to pick a VL for each run.
+ */
+ if (!sve_supported()) {
+ test_config.sve_vl_in = 0;
+ test_config.sve_vl_expected = 0;
+ test_config.sme_vl_in = 0;
+ test_config.sme_vl_expected = 0;
+ test_config.svcr_in = 0;
+ test_config.svcr_expected = 0;
+
+ run_tests(base_test_defs, ARRAY_SIZE(base_test_defs),
+ &test_config);
+ }
+
+ run_sve_tests();
+ run_sme_tests();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/arm64/fp/fp-ptrace.h b/tools/testing/selftests/arm64/fp/fp-ptrace.h
new file mode 100644
index 000000000000..db4f2c4d750c
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-ptrace.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-3 ARM Limited.
+
+#ifndef FP_PTRACE_H
+#define FP_PTRACE_H
+
+#define SVCR_SM_SHIFT 0
+#define SVCR_ZA_SHIFT 1
+
+#define SVCR_SM (1 << SVCR_SM_SHIFT)
+#define SVCR_ZA (1 << SVCR_ZA_SHIFT)
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/fp-stress.c b/tools/testing/selftests/arm64/fp/fp-stress.c
new file mode 100644
index 000000000000..dd31647b00a2
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fp-stress.c
@@ -0,0 +1,649 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ */
+
+#define _GNU_SOURCE
+#define _POSIX_C_SOURCE 199309L
+
+#include <errno.h>
+#include <getopt.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/epoll.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+
+#define MAX_VLS 16
+
+struct child_data {
+ char *name, *output;
+ pid_t pid;
+ int stdout;
+ bool output_seen;
+ bool exited;
+ int exit_status;
+};
+
+static int epoll_fd;
+static struct child_data *children;
+static struct epoll_event *evs;
+static int tests;
+static int num_children;
+static bool terminate;
+
+static int startup_pipe[2];
+
+static int num_processors(void)
+{
+ long nproc = sysconf(_SC_NPROCESSORS_CONF);
+ if (nproc < 0) {
+ perror("Unable to read number of processors\n");
+ exit(EXIT_FAILURE);
+ }
+
+ return nproc;
+}
+
+static void child_start(struct child_data *child, const char *program)
+{
+ int ret, pipefd[2], i;
+ struct epoll_event ev;
+
+ ret = pipe(pipefd);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create stdout pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ child->pid = fork();
+ if (child->pid == -1)
+ ksft_exit_fail_msg("fork() failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ if (!child->pid) {
+ /*
+ * In child, replace stdout with the pipe, errors to
+ * stderr from here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Duplicate the read side of the startup pipe to
+ * FD 3 so we can close everything else.
+ */
+ ret = dup2(startup_pipe[0], 3);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /*
+ * Very dumb mechanism to clean open FDs other than
+ * stdio. We don't want O_CLOEXEC for the pipes...
+ */
+ for (i = 4; i < 8192; i++)
+ close(i);
+
+ /*
+ * Read from the startup pipe, there should be no data
+ * and we should block until it is closed. We just
+ * carry on on error since this isn't super critical.
+ */
+ ret = read(3, &i, sizeof(i));
+ if (ret < 0)
+ fprintf(stderr, "read(startp pipe) failed: %s (%d)\n",
+ strerror(errno), errno);
+ if (ret > 0)
+ fprintf(stderr, "%d bytes of data on startup pipe\n",
+ ret);
+ close(3);
+
+ ret = execl(program, program, NULL);
+ fprintf(stderr, "execl(%s) failed: %d (%s)\n",
+ program, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ } else {
+ /*
+ * In parent, remember the child and close our copy of the
+ * write side of stdout.
+ */
+ close(pipefd[1]);
+ child->stdout = pipefd[0];
+ child->output = NULL;
+ child->exited = false;
+ child->output_seen = false;
+
+ ev.events = EPOLLIN | EPOLLHUP;
+ ev.data.ptr = child;
+
+ ret = epoll_ctl(epoll_fd, EPOLL_CTL_ADD, child->stdout, &ev);
+ if (ret < 0) {
+ ksft_exit_fail_msg("%s EPOLL_CTL_ADD failed: %s (%d)\n",
+ child->name, strerror(errno), errno);
+ }
+ }
+}
+
+static bool child_output_read(struct child_data *child)
+{
+ char read_data[1024];
+ char work[1024];
+ int ret, len, cur_work, cur_read;
+
+ ret = read(child->stdout, read_data, sizeof(read_data));
+ if (ret < 0) {
+ if (errno == EINTR)
+ return true;
+
+ ksft_print_msg("%s: read() failed: %s (%d)\n",
+ child->name, strerror(errno),
+ errno);
+ return false;
+ }
+ len = ret;
+
+ child->output_seen = true;
+
+ /* Pick up any partial read */
+ if (child->output) {
+ strncpy(work, child->output, sizeof(work) - 1);
+ cur_work = strnlen(work, sizeof(work));
+ free(child->output);
+ child->output = NULL;
+ } else {
+ cur_work = 0;
+ }
+
+ cur_read = 0;
+ while (cur_read < len) {
+ work[cur_work] = read_data[cur_read++];
+
+ if (work[cur_work] == '\n') {
+ work[cur_work] = '\0';
+ ksft_print_msg("%s: %s\n", child->name, work);
+ cur_work = 0;
+ } else {
+ cur_work++;
+ }
+ }
+
+ if (cur_work) {
+ work[cur_work] = '\0';
+ ret = asprintf(&child->output, "%s", work);
+ if (ret == -1)
+ ksft_exit_fail_msg("Out of memory\n");
+ }
+
+ return false;
+}
+
+static void child_output(struct child_data *child, uint32_t events,
+ bool flush)
+{
+ bool read_more;
+
+ if (events & EPOLLIN) {
+ do {
+ read_more = child_output_read(child);
+ } while (read_more);
+ }
+
+ if (events & EPOLLHUP) {
+ close(child->stdout);
+ child->stdout = -1;
+ flush = true;
+ }
+
+ if (flush && child->output) {
+ ksft_print_msg("%s: %s<EOF>\n", child->name, child->output);
+ free(child->output);
+ child->output = NULL;
+ }
+}
+
+static void child_tickle(struct child_data *child)
+{
+ if (child->output_seen && !child->exited)
+ kill(child->pid, SIGUSR2);
+}
+
+static void child_stop(struct child_data *child)
+{
+ if (!child->exited)
+ kill(child->pid, SIGTERM);
+}
+
+static void child_cleanup(struct child_data *child)
+{
+ pid_t ret;
+ int status;
+ bool fail = false;
+
+ if (!child->exited) {
+ do {
+ ret = waitpid(child->pid, &status, 0);
+ if (ret == -1 && errno == EINTR)
+ continue;
+
+ if (ret == -1) {
+ ksft_print_msg("waitpid(%d) failed: %s (%d)\n",
+ child->pid, strerror(errno),
+ errno);
+ fail = true;
+ break;
+ }
+ } while (!WIFEXITED(status));
+ child->exit_status = WEXITSTATUS(status);
+ }
+
+ if (!child->output_seen) {
+ ksft_print_msg("%s no output seen\n", child->name);
+ fail = true;
+ }
+
+ if (child->exit_status != 0) {
+ ksft_print_msg("%s exited with error code %d\n",
+ child->name, child->exit_status);
+ fail = true;
+ }
+
+ ksft_test_result(!fail, "%s\n", child->name);
+}
+
+static void handle_child_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+ bool found = false;
+
+ for (i = 0; i < num_children; i++) {
+ if (children[i].pid == info->si_pid) {
+ children[i].exited = true;
+ children[i].exit_status = info->si_status;
+ found = true;
+ break;
+ }
+ }
+
+ if (!found)
+ ksft_print_msg("SIGCHLD for unknown PID %d with status %d\n",
+ info->si_pid, info->si_status);
+}
+
+static void handle_exit_signal(int sig, siginfo_t *info, void *context)
+{
+ int i;
+
+ /* If we're already exiting then don't signal again */
+ if (terminate)
+ return;
+
+ ksft_print_msg("Got signal, exiting...\n");
+
+ terminate = true;
+
+ /*
+ * This should be redundant, the main loop should clean up
+ * after us, but for safety stop everything we can here.
+ */
+ for (i = 0; i < num_children; i++)
+ child_stop(&children[i]);
+}
+
+static void start_fpsimd(struct child_data *child, int cpu, int copy)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "FPSIMD-%d-%d", cpu, copy);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./fpsimd-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_sve(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = prctl(PR_SVE_SET_VL, vl | PR_SVE_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SVE VL %d\n", vl);
+
+ ret = asprintf(&child->name, "SVE-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./sve-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_ssve(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "SSVE-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ ret = prctl(PR_SME_SET_VL, vl | PR_SME_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+ child_start(child, "./ssve-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_za(struct child_data *child, int vl, int cpu)
+{
+ int ret;
+
+ ret = prctl(PR_SME_SET_VL, vl | PR_SVE_VL_INHERIT);
+ if (ret < 0)
+ ksft_exit_fail_msg("Failed to set SME VL %d\n", ret);
+
+ ret = asprintf(&child->name, "ZA-VL-%d-%d", vl, cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./za-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void start_zt(struct child_data *child, int cpu)
+{
+ int ret;
+
+ ret = asprintf(&child->name, "ZT-%d", cpu);
+ if (ret == -1)
+ ksft_exit_fail_msg("asprintf() failed\n");
+
+ child_start(child, "./zt-test");
+
+ ksft_print_msg("Started %s\n", child->name);
+}
+
+static void probe_vls(int vls[], int *vl_count, int set_vl)
+{
+ unsigned int vq;
+ int vl;
+
+ *vl_count = 0;
+
+ for (vq = SVE_VQ_MAX; vq > 0; vq /= 2) {
+ vl = prctl(set_vl, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (*vl_count && (vl == vls[*vl_count - 1]))
+ break;
+
+ vq = sve_vq_from_vl(vl);
+
+ vls[*vl_count] = vl;
+ *vl_count += 1;
+ }
+}
+
+/* Handle any pending output without blocking */
+static void drain_output(bool flush)
+{
+ int ret = 1;
+ int i;
+
+ while (ret > 0) {
+ ret = epoll_wait(epoll_fd, evs, tests, 0);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ for (i = 0; i < ret; i++)
+ child_output(evs[i].data.ptr, evs[i].events, flush);
+ }
+}
+
+static const struct option options[] = {
+ { "timeout", required_argument, NULL, 't' },
+ { }
+};
+
+int main(int argc, char **argv)
+{
+ int ret;
+ int timeout = 10;
+ int cpus, i, j, c;
+ int sve_vl_count, sme_vl_count, fpsimd_per_cpu;
+ bool all_children_started = false;
+ int seen_children;
+ int sve_vls[MAX_VLS], sme_vls[MAX_VLS];
+ bool have_sme2;
+ struct sigaction sa;
+
+ while ((c = getopt_long(argc, argv, "t:", options, NULL)) != -1) {
+ switch (c) {
+ case 't':
+ ret = sscanf(optarg, "%d", &timeout);
+ if (ret != 1)
+ ksft_exit_fail_msg("Failed to parse timeout %s\n",
+ optarg);
+ break;
+ default:
+ ksft_exit_fail_msg("Unknown argument\n");
+ }
+ }
+
+ cpus = num_processors();
+ tests = 0;
+
+ if (getauxval(AT_HWCAP) & HWCAP_SVE) {
+ probe_vls(sve_vls, &sve_vl_count, PR_SVE_SET_VL);
+ tests += sve_vl_count * cpus;
+ } else {
+ sve_vl_count = 0;
+ }
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME) {
+ probe_vls(sme_vls, &sme_vl_count, PR_SME_SET_VL);
+ tests += sme_vl_count * cpus * 2;
+ } else {
+ sme_vl_count = 0;
+ }
+
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2) {
+ tests += cpus;
+ have_sme2 = true;
+ } else {
+ have_sme2 = false;
+ }
+
+ /* Force context switching if we only have FPSIMD */
+ if (!sve_vl_count && !sme_vl_count)
+ fpsimd_per_cpu = 2;
+ else
+ fpsimd_per_cpu = 1;
+ tests += cpus * fpsimd_per_cpu;
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ ksft_print_msg("%d CPUs, %d SVE VLs, %d SME VLs, SME2 %s\n",
+ cpus, sve_vl_count, sme_vl_count,
+ have_sme2 ? "present" : "absent");
+
+ if (timeout > 0)
+ ksft_print_msg("Will run for %ds\n", timeout);
+ else
+ ksft_print_msg("Will run until terminated\n");
+
+ children = calloc(sizeof(*children), tests);
+ if (!children)
+ ksft_exit_fail_msg("Unable to allocate child data\n");
+
+ ret = epoll_create1(EPOLL_CLOEXEC);
+ if (ret < 0)
+ ksft_exit_fail_msg("epoll_create1() failed: %s (%d)\n",
+ strerror(errno), ret);
+ epoll_fd = ret;
+
+ /* Create a pipe which children will block on before execing */
+ ret = pipe(startup_pipe);
+ if (ret != 0)
+ ksft_exit_fail_msg("Failed to create startup pipe: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* Get signal handers ready before we start any children */
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handle_exit_signal;
+ sa.sa_flags = SA_RESTART | SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ ret = sigaction(SIGINT, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGINT handler: %s (%d)\n",
+ strerror(errno), errno);
+ ret = sigaction(SIGTERM, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGTERM handler: %s (%d)\n",
+ strerror(errno), errno);
+ sa.sa_sigaction = handle_child_signal;
+ ret = sigaction(SIGCHLD, &sa, NULL);
+ if (ret < 0)
+ ksft_print_msg("Failed to install SIGCHLD handler: %s (%d)\n",
+ strerror(errno), errno);
+
+ evs = calloc(tests, sizeof(*evs));
+ if (!evs)
+ ksft_exit_fail_msg("Failed to allocated %d epoll events\n",
+ tests);
+
+ for (i = 0; i < cpus; i++) {
+ for (j = 0; j < fpsimd_per_cpu; j++)
+ start_fpsimd(&children[num_children++], i, j);
+
+ for (j = 0; j < sve_vl_count; j++)
+ start_sve(&children[num_children++], sve_vls[j], i);
+
+ for (j = 0; j < sme_vl_count; j++) {
+ start_ssve(&children[num_children++], sme_vls[j], i);
+ start_za(&children[num_children++], sme_vls[j], i);
+ }
+
+ if (have_sme2)
+ start_zt(&children[num_children++], i);
+ }
+
+ /*
+ * All children started, close the startup pipe and let them
+ * run.
+ */
+ close(startup_pipe[0]);
+ close(startup_pipe[1]);
+
+ for (;;) {
+ /* Did we get a signal asking us to exit? */
+ if (terminate)
+ break;
+
+ /*
+ * Timeout is counted in seconds with no output, the
+ * tests print during startup then are silent when
+ * running so this should ensure they all ran enough
+ * to install the signal handler, this is especially
+ * useful in emulation where we will both be slow and
+ * likely to have a large set of VLs.
+ */
+ ret = epoll_wait(epoll_fd, evs, tests, 1000);
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_exit_fail_msg("epoll_wait() failed: %s (%d)\n",
+ strerror(errno), errno);
+ }
+
+ /* Output? */
+ if (ret > 0) {
+ for (i = 0; i < ret; i++) {
+ child_output(evs[i].data.ptr, evs[i].events,
+ false);
+ }
+ continue;
+ }
+
+ /* Otherwise epoll_wait() timed out */
+
+ /*
+ * If the child processes have not produced output they
+ * aren't actually running the tests yet .
+ */
+ if (!all_children_started) {
+ seen_children = 0;
+
+ for (i = 0; i < num_children; i++)
+ if (children[i].output_seen ||
+ children[i].exited)
+ seen_children++;
+
+ if (seen_children != num_children) {
+ ksft_print_msg("Waiting for %d children\n",
+ num_children - seen_children);
+ continue;
+ }
+
+ all_children_started = true;
+ }
+
+ ksft_print_msg("Sending signals, timeout remaining: %d\n",
+ timeout);
+
+ for (i = 0; i < num_children; i++)
+ child_tickle(&children[i]);
+
+ /* Negative timeout means run indefinitely */
+ if (timeout < 0)
+ continue;
+ if (--timeout == 0)
+ break;
+ }
+
+ ksft_print_msg("Finishing up...\n");
+ terminate = true;
+
+ for (i = 0; i < tests; i++)
+ child_stop(&children[i]);
+
+ drain_output(false);
+
+ for (i = 0; i < tests; i++)
+ child_cleanup(&children[i]);
+
+ drain_output(true);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-stress b/tools/testing/selftests/arm64/fp/fpsimd-stress
new file mode 100755
index 000000000000..781b5b022eaf
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-stress
@@ -0,0 +1,60 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+trap child_died CHLD
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./fpsimd-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/fpsimd-test.S b/tools/testing/selftests/arm64/fp/fpsimd-test.S
new file mode 100644
index 000000000000..8b960d01ed2e
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/fpsimd-test.S
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple FPSIMD context switch test
+// Repeatedly writes unique test patterns into each FPSIMD register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do fpsimd-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+
+#define NVR 32
+#define MAXVL_B (128 / 8)
+
+.macro _vldr Vn:req, Xt:req
+ ld1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+.macro _vstr Vn:req, Xt:req
+ st1 {v\Vn\().2d}, [x\Xt]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// FPSIMD registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getv,setp) or store to (setv,setp)
+// All clobber x0-x2
+define_accessor setv, NVR, _vldr
+define_accessor getv, NVR, _vstr
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+vref:
+ .space MAXVL_B * NVR
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for FPSIMD V-register V<xn>
+.macro _adrv xd, xn, nrtmp
+ ldr \xd, =vref
+ mov x\nrtmp, #16
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a FPSIMD V-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_vreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrv x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setv
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 1f
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne barf
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ret
+endfunction
+
+// Verify that a FPSIMD V-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x5.
+function check_vreg
+ mov x3, x30
+
+ _adrv x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getv
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random V-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #7
+ movi v9.16b, #9
+ movi v31.8b, #31
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ // Sanity-check and report the vector length
+
+ mov x19, #128
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+
+ mov x21, #0 // Set up V-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_vreg
+ add x21, x21, #1
+ cmp x21, #NVR
+ b.lo 0b
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sme.c b/tools/testing/selftests/arm64/fp/rdvl-sme.c
new file mode 100644
index 000000000000..49b0b2e08bac
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sme.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sme();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl-sve.c b/tools/testing/selftests/arm64/fp/rdvl-sve.c
new file mode 100644
index 000000000000..7f8a13a18f5d
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl-sve.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdio.h>
+
+#include "rdvl.h"
+
+int main(void)
+{
+ int vl = rdvl_sve();
+
+ printf("%d\n", vl);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/rdvl.S b/tools/testing/selftests/arm64/fp/rdvl.S
new file mode 100644
index 000000000000..20dc29996dc6
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.S
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+.globl rdvl_sve
+rdvl_sve:
+ hint 34 // BTI C
+ rdvl x0, #1
+ ret
+
+.globl rdvl_sme
+rdvl_sme:
+ hint 34 // BTI C
+
+ rdsvl 0, 1
+
+ ret
diff --git a/tools/testing/selftests/arm64/fp/rdvl.h b/tools/testing/selftests/arm64/fp/rdvl.h
new file mode 100644
index 000000000000..5d323679fbc9
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/rdvl.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef RDVL_H
+#define RDVL_H
+
+int rdvl_sme(void);
+int rdvl_sve(void);
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/sme-inst.h b/tools/testing/selftests/arm64/fp/sme-inst.h
new file mode 100644
index 000000000000..9292bba5400b
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sme-inst.h
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+
+#ifndef SME_INST_H
+#define SME_INST_H
+
+/*
+ * RDSVL X\nx, #\imm
+ */
+.macro rdsvl nx, imm
+ .inst 0x4bf5800 \
+ | (\imm << 5) \
+ | (\nx)
+.endm
+
+.macro smstop
+ msr S0_3_C4_C6_3, xzr
+.endm
+
+.macro smstart_za
+ msr S0_3_C4_C5_3, xzr
+.endm
+
+.macro smstart_sm
+ msr S0_3_C4_C3_3, xzr
+.endm
+
+/*
+ * LDR (vector to ZA array):
+ * LDR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _ldr_za nw, nxbase, offset=0
+ .inst 0xe1000000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * STR (vector from ZA array):
+ * STR ZA[\nw, #\offset], [X\nxbase, #\offset, MUL VL]
+ */
+.macro _str_za nw, nxbase, offset=0
+ .inst 0xe1200000 \
+ | (((\nw) & 3) << 13) \
+ | ((\nxbase) << 5) \
+ | ((\offset) & 7)
+.endm
+
+/*
+ * LDR (ZT0)
+ *
+ * LDR ZT0, nx
+ */
+.macro _ldr_zt nx
+ .inst 0xe11f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+/*
+ * STR (ZT0)
+ *
+ * STR ZT0, nx
+ */
+.macro _str_zt nx
+ .inst 0xe13f8000 \
+ | (((\nx) & 0x1f) << 5)
+.endm
+
+#endif
diff --git a/tools/testing/selftests/arm64/fp/ssve-stress b/tools/testing/selftests/arm64/fp/ssve-stress
new file mode 100644
index 000000000000..e2bd2cc184ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/ssve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./ssve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-probe-vls.c b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
new file mode 100644
index 000000000000..a24eca7a4ecb
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-probe-vls.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2020 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/sigcontext.h>
+
+#include "../../kselftest.h"
+#include "rdvl.h"
+
+int main(int argc, char **argv)
+{
+ unsigned int vq;
+ int vl;
+ static unsigned int vqs[SVE_VQ_MAX];
+ unsigned int nvqs = 0;
+
+ ksft_print_header();
+ ksft_set_plan(2);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available\n");
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ ksft_exit_fail_msg("PR_SVE_SET_VL failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ if (rdvl_sve() != vl)
+ ksft_exit_fail_msg("PR_SVE_SET_VL reports %d, RDVL %d\n",
+ vl, rdvl_sve());
+
+ if (!sve_vl_valid(vl))
+ ksft_exit_fail_msg("VL %d invalid\n", vl);
+ vq = sve_vq_from_vl(vl);
+
+ if (!(nvqs < SVE_VQ_MAX))
+ ksft_exit_fail_msg("Too many VLs %u >= SVE_VQ_MAX\n",
+ nvqs);
+ vqs[nvqs++] = vq;
+ }
+ ksft_test_result_pass("Enumerated %d vector lengths\n", nvqs);
+ ksft_test_result_pass("All vector lengths valid\n");
+
+ /* Print out the vector lengths in ascending order: */
+ while (nvqs--)
+ ksft_print_msg("%u\n", 16 * vqs[nvqs]);
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-ptrace.c b/tools/testing/selftests/arm64/fp/sve-ptrace.c
new file mode 100644
index 000000000000..6d61992fe8a0
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-ptrace.c
@@ -0,0 +1,764 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2021 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_SVE
+#define NT_ARM_SVE 0x405
+#endif
+
+#ifndef NT_ARM_SSVE
+#define NT_ARM_SSVE 0x40b
+#endif
+
+/*
+ * The architecture defines the maximum VQ as 16 but for extensibility
+ * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running
+ * a *lot* more tests than are useful if we use it. Until the
+ * architecture is extended let's limit our coverage to what is
+ * currently allowed, plus one extra to ensure we cover constraining
+ * the VL as expected.
+ */
+#define TEST_VQ_MAX 17
+
+struct vec_type {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ int regset;
+ int prctl_set;
+};
+
+static const struct vec_type vec_types[] = {
+ {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .regset = NT_ARM_SVE,
+ .prctl_set = PR_SVE_SET_VL,
+ },
+ {
+ .name = "Streaming SVE",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .regset = NT_ARM_SSVE,
+ .prctl_set = PR_SME_SET_VL,
+ },
+};
+
+#define VL_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 4)
+#define FLAG_TESTS 2
+#define FPSIMD_TESTS 2
+
+#define EXPECTED_TESTS ((VL_TESTS + FLAG_TESTS + FPSIMD_TESTS) * ARRAY_SIZE(vec_types))
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static int get_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+ struct iovec iov;
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ return ptrace(PTRACE_GETREGSET, pid, NT_PRFPREG, &iov);
+}
+
+static int set_fpsimd(pid_t pid, struct user_fpsimd_state *fpsimd)
+{
+ struct iovec iov;
+
+ iov.iov_base = fpsimd;
+ iov.iov_len = sizeof(*fpsimd);
+ return ptrace(PTRACE_SETREGSET, pid, NT_PRFPREG, &iov);
+}
+
+static struct user_sve_header *get_sve(pid_t pid, const struct vec_type *type,
+ void **buf, size_t *size)
+{
+ struct user_sve_header *sve;
+ void *p;
+ size_t sz = sizeof *sve;
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, type->regset, &iov))
+ goto error;
+
+ sve = *buf;
+ if (sve->size <= sz)
+ break;
+
+ sz = sve->size;
+ }
+
+ return sve;
+
+error:
+ return NULL;
+}
+
+static int set_sve(pid_t pid, const struct vec_type *type,
+ const struct user_sve_header *sve)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)sve;
+ iov.iov_len = sve->size;
+ return ptrace(PTRACE_SETREGSET, pid, type->regset, &iov);
+}
+
+/* Validate setting and getting the inherit flag */
+static void ptrace_set_get_inherit(pid_t child, const struct vec_type *type)
+{
+ struct user_sve_header sve;
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ int ret;
+
+ /* First set the flag */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.vl = sve_vl_from_vq(SVE_VQ_MIN);
+ sve.flags = SVE_PT_VL_INHERIT;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s SVE_PT_VL_INHERIT\n",
+ type->name);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have
+ * set the flags we expected.
+ */
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
+ return;
+ }
+
+ ksft_test_result(new_sve->flags & SVE_PT_VL_INHERIT,
+ "%s SVE_PT_VL_INHERIT set\n", type->name);
+
+ /* Now clear */
+ sve.flags &= ~SVE_PT_VL_INHERIT;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to clear %s SVE_PT_VL_INHERIT\n",
+ type->name);
+ return;
+ }
+
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s SVE flags\n",
+ type->name);
+ return;
+ }
+
+ ksft_test_result(!(new_sve->flags & SVE_PT_VL_INHERIT),
+ "%s SVE_PT_VL_INHERIT cleared\n", type->name);
+
+ free(new_sve);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, const struct vec_type *type,
+ unsigned int vl, bool *supported)
+{
+ struct user_sve_header sve;
+ struct user_sve_header *new_sve = NULL;
+ size_t new_sve_size = 0;
+ int ret, prctl_vl;
+
+ *supported = false;
+
+ /* Check if the VL is supported in this process */
+ prctl_vl = prctl(type->prctl_set, vl);
+ if (prctl_vl == -1)
+ ksft_exit_fail_msg("prctl(PR_%s_SET_VL) failed: %s (%d)\n",
+ type->name, strerror(errno), errno);
+
+ /* If the VL is not supported then a supported VL will be returned */
+ *supported = (prctl_vl == vl);
+
+ /* Set the VL by doing a set with no register payload */
+ memset(&sve, 0, sizeof(sve));
+ sve.size = sizeof(sve);
+ sve.vl = vl;
+ ret = set_sve(child, type, &sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u\n",
+ type->name, vl);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have the
+ * same VL that we got from prctl() on ourselves.
+ */
+ if (!get_sve(child, type, (void **)&new_sve, &new_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u\n",
+ type->name, vl);
+ return;
+ }
+
+ ksft_test_result(new_sve->vl = prctl_vl, "Set %s VL %u\n",
+ type->name, vl);
+
+ free(new_sve);
+}
+
+static void check_u32(unsigned int vl, const char *reg,
+ uint32_t *in, uint32_t *out, int *errors)
+{
+ if (*in != *out) {
+ printf("# VL %d %s wrote %x read %x\n",
+ vl, reg, *in, *out);
+ (*errors)++;
+ }
+}
+
+/* Access the FPSIMD registers via the SVE regset */
+static void ptrace_sve_fpsimd(pid_t child, const struct vec_type *type)
+{
+ void *svebuf;
+ struct user_sve_header *sve;
+ struct user_fpsimd_state *fpsimd, new_fpsimd;
+ unsigned int i, j;
+ unsigned char *p;
+ int ret;
+
+ svebuf = malloc(SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ if (!svebuf) {
+ ksft_test_result_fail("Failed to allocate FPSIMD buffer\n");
+ return;
+ }
+
+ memset(svebuf, 0, SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD));
+ sve = svebuf;
+ sve->flags = SVE_PT_REGS_FPSIMD;
+ sve->size = SVE_PT_SIZE(0, SVE_PT_REGS_FPSIMD);
+ sve->vl = 16; /* We don't care what the VL is */
+
+ /* Try to set a known FPSIMD state via PT_REGS_SVE */
+ fpsimd = (struct user_fpsimd_state *)((char *)sve +
+ SVE_PT_FPSIMD_OFFSET);
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&fpsimd->vregs[i];
+
+ for (j = 0; j < sizeof(fpsimd->vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_sve(child, type, sve);
+ ksft_test_result(ret == 0, "%s FPSIMD set via SVE: %d\n",
+ type->name, ret);
+ if (ret)
+ goto out;
+
+ /* Verify via the FPSIMD regset */
+ if (get_fpsimd(child, &new_fpsimd)) {
+ ksft_test_result_fail("get_fpsimd(): %s\n",
+ strerror(errno));
+ goto out;
+ }
+ if (memcmp(fpsimd, &new_fpsimd, sizeof(*fpsimd)) == 0)
+ ksft_test_result_pass("%s get_fpsimd() gave same state\n",
+ type->name);
+ else
+ ksft_test_result_fail("%s get_fpsimd() gave different state\n",
+ type->name);
+
+out:
+ free(svebuf);
+}
+
+/* Validate attempting to set SVE data and read SVE data */
+static void ptrace_set_sve_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *write_buf;
+ void *read_buf = NULL;
+ struct user_sve_header *write_sve;
+ struct user_sve_header *read_sve;
+ size_t read_sve_size = 0;
+ unsigned int vq = sve_vq_from_vl(vl);
+ int ret, i;
+ size_t data_size;
+ int errors = 0;
+
+ data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
+ return;
+ }
+ write_sve = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_sve, 0, data_size);
+ write_sve->size = data_size;
+ write_sve->vl = vl;
+ write_sve->flags = SVE_PT_REGS_SVE;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq));
+
+ for (i = 0; i < __SVE_NUM_PREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ SVE_PT_SVE_PREG_SIZE(vq));
+
+ fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+ fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+ /* TODO: Generate a valid FFR pattern */
+
+ ret = set_sve(child, type, write_sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
+ read_sve = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_sve->size < write_sve->size) {
+ ksft_test_result_fail("%s wrote %d bytes, only read %d\n",
+ type->name, write_sve->size,
+ read_sve->size);
+ goto out_read;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ if (memcmp(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq)) != 0) {
+ printf("# Mismatch in %u Z%d\n", vl, i);
+ errors++;
+ }
+ }
+
+ for (i = 0; i < __SVE_NUM_PREGS; i++) {
+ if (memcmp(write_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ read_buf + SVE_PT_SVE_PREG_OFFSET(vq, i),
+ SVE_PT_SVE_PREG_SIZE(vq)) != 0) {
+ printf("# Mismatch in %u P%d\n", vl, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+ read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+ check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+ read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+
+ ksft_test_result(errors == 0, "Set and get %s data for VL %u\n",
+ type->name, vl);
+
+out_read:
+ free(read_buf);
+out:
+ free(write_buf);
+}
+
+/* Validate attempting to set SVE data and read it via the FPSIMD regset */
+static void ptrace_set_sve_get_fpsimd_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *write_buf;
+ struct user_sve_header *write_sve;
+ unsigned int vq = sve_vq_from_vl(vl);
+ struct user_fpsimd_state fpsimd_state;
+ int ret, i;
+ size_t data_size;
+ int errors = 0;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN) {
+ ksft_test_result_skip("Big endian not supported\n");
+ return;
+ }
+
+ data_size = SVE_PT_SVE_OFFSET + SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %d byte buffer for %s VL %u\n",
+ data_size, type->name, vl);
+ return;
+ }
+ write_sve = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_sve, 0, data_size);
+ write_sve->size = data_size;
+ write_sve->vl = vl;
+ write_sve->flags = SVE_PT_REGS_SVE;
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++)
+ fill_buf(write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ SVE_PT_SVE_ZREG_SIZE(vq));
+
+ fill_buf(write_buf + SVE_PT_SVE_FPSR_OFFSET(vq), SVE_PT_SVE_FPSR_SIZE);
+ fill_buf(write_buf + SVE_PT_SVE_FPCR_OFFSET(vq), SVE_PT_SVE_FPCR_SIZE);
+
+ ret = set_sve(child, type, write_sve);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set %s VL %u data\n",
+ type->name, vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (get_fpsimd(child, &fpsimd_state)) {
+ ksft_test_result_fail("Failed to read %s VL %u FPSIMD data\n",
+ type->name, vl);
+ goto out;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ __uint128_t tmp = 0;
+
+ /*
+ * Z regs are stored endianness invariant, this won't
+ * work for big endian
+ */
+ memcpy(&tmp, write_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ sizeof(tmp));
+
+ if (tmp != fpsimd_state.vregs[i]) {
+ printf("# Mismatch in FPSIMD for %s VL %u Z%d\n",
+ type->name, vl, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", write_buf + SVE_PT_SVE_FPSR_OFFSET(vq),
+ &fpsimd_state.fpsr, &errors);
+ check_u32(vl, "FPCR", write_buf + SVE_PT_SVE_FPCR_OFFSET(vq),
+ &fpsimd_state.fpcr, &errors);
+
+ ksft_test_result(errors == 0, "Set and get FPSIMD data for %s VL %u\n",
+ type->name, vl);
+
+out:
+ free(write_buf);
+}
+
+/* Validate attempting to set FPSIMD data and read it via the SVE regset */
+static void ptrace_set_fpsimd_get_sve_data(pid_t child,
+ const struct vec_type *type,
+ unsigned int vl)
+{
+ void *read_buf = NULL;
+ unsigned char *p;
+ struct user_sve_header *read_sve;
+ unsigned int vq = sve_vq_from_vl(vl);
+ struct user_fpsimd_state write_fpsimd;
+ int ret, i, j;
+ size_t read_sve_size = 0;
+ size_t expected_size;
+ int errors = 0;
+
+ if (__BYTE_ORDER == __BIG_ENDIAN) {
+ ksft_test_result_skip("Big endian not supported\n");
+ return;
+ }
+
+ for (i = 0; i < 32; ++i) {
+ p = (unsigned char *)&write_fpsimd.vregs[i];
+
+ for (j = 0; j < sizeof(write_fpsimd.vregs[i]); ++j)
+ p[j] = j;
+ }
+
+ ret = set_fpsimd(child, &write_fpsimd);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set FPSIMD state: %d\n)",
+ ret);
+ return;
+ }
+
+ if (!get_sve(child, type, (void **)&read_buf, &read_sve_size)) {
+ ksft_test_result_fail("Failed to read %s VL %u data\n",
+ type->name, vl);
+ return;
+ }
+ read_sve = read_buf;
+
+ if (read_sve->vl != vl) {
+ ksft_test_result_fail("Child VL != expected VL %d\n",
+ read_sve->vl, vl);
+ goto out;
+ }
+
+ /* The kernel may return either SVE or FPSIMD format */
+ switch (read_sve->flags & SVE_PT_REGS_MASK) {
+ case SVE_PT_REGS_FPSIMD:
+ expected_size = SVE_PT_FPSIMD_SIZE(vq, SVE_PT_REGS_FPSIMD);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %d bytes, expected %d\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ ret = memcmp(&write_fpsimd, read_buf + SVE_PT_FPSIMD_OFFSET,
+ sizeof(write_fpsimd));
+ if (ret != 0) {
+ ksft_print_msg("Read FPSIMD data mismatch\n");
+ errors++;
+ }
+ break;
+
+ case SVE_PT_REGS_SVE:
+ expected_size = SVE_PT_SVE_SIZE(vq, SVE_PT_REGS_SVE);
+ if (read_sve_size < expected_size) {
+ ksft_test_result_fail("Read %d bytes, expected %d\n",
+ read_sve_size, expected_size);
+ goto out;
+ }
+
+ for (i = 0; i < __SVE_NUM_ZREGS; i++) {
+ __uint128_t tmp = 0;
+
+ /*
+ * Z regs are stored endianness invariant, this won't
+ * work for big endian
+ */
+ memcpy(&tmp, read_buf + SVE_PT_SVE_ZREG_OFFSET(vq, i),
+ sizeof(tmp));
+
+ if (tmp != write_fpsimd.vregs[i]) {
+ ksft_print_msg("Mismatch in FPSIMD for %s VL %u Z%d/V%d\n",
+ type->name, vl, i, i);
+ errors++;
+ }
+ }
+
+ check_u32(vl, "FPSR", &write_fpsimd.fpsr,
+ read_buf + SVE_PT_SVE_FPSR_OFFSET(vq), &errors);
+ check_u32(vl, "FPCR", &write_fpsimd.fpcr,
+ read_buf + SVE_PT_SVE_FPCR_OFFSET(vq), &errors);
+ break;
+ default:
+ ksft_print_msg("Unexpected regs type %d\n",
+ read_sve->flags & SVE_PT_REGS_MASK);
+ errors++;
+ break;
+ }
+
+ ksft_test_result(errors == 0, "Set FPSIMD, read via SVE for %s VL %u\n",
+ type->name, vl);
+
+out:
+ free(read_buf);
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status, i;
+ siginfo_t si;
+ unsigned int vq, vl;
+ bool vl_supported;
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vec_types); i++) {
+ /* FPSIMD via SVE regset */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_sve_fpsimd(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s FPSIMD set via SVE\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s FPSIMD read\n",
+ vec_types[i].name);
+ }
+
+ /* prctl() flags */
+ if (getauxval(vec_types[i].hwcap_type) & vec_types[i].hwcap) {
+ ptrace_set_get_inherit(child, &vec_types[i]);
+ } else {
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT set\n",
+ vec_types[i].name);
+ ksft_test_result_skip("%s SVE_PT_VL_INHERIT cleared\n",
+ vec_types[i].name);
+ }
+
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ if (getauxval(vec_types[i].hwcap_type) &
+ vec_types[i].hwcap) {
+ ptrace_set_get_vl(child, &vec_types[i], vl,
+ &vl_supported);
+ } else {
+ ksft_test_result_skip("%s get/set VL %d\n",
+ vec_types[i].name, vl);
+ vl_supported = false;
+ }
+
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_sve_get_sve_data(child, &vec_types[i], vl);
+ ptrace_set_sve_get_fpsimd_data(child, &vec_types[i], vl);
+ ptrace_set_fpsimd_get_sve_data(child, &vec_types[i], vl);
+ } else {
+ ksft_test_result_skip("%s set SVE get SVE for VL %d\n",
+ vec_types[i].name, vl);
+ ksft_test_result_skip("%s set SVE get FPSIMD for VL %d\n",
+ vec_types[i].name, vl);
+ ksft_test_result_skip("%s set FPSIMD get SVE for VL %d\n",
+ vec_types[i].name, vl);
+ }
+ }
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+ ksft_set_plan(EXPECTED_TESTS);
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE))
+ ksft_exit_skip("SVE not available\n");
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/sve-stress b/tools/testing/selftests/arm64/fp/sve-stress
new file mode 100755
index 000000000000..24dd0922cc02
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./sve-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/sve-test.S b/tools/testing/selftests/arm64/fp/sve-test.S
new file mode 100644
index 000000000000..fff60e2a25ad
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/sve-test.S
@@ -0,0 +1,584 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2015-2019 ARM Limited.
+// Original author: Dave Martin <Dave.Martin@arm.com>
+//
+// Simple Scalable Vector Extension context switch test
+// Repeatedly writes unique test patterns into each SVE register
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+#define NZR 32
+#define NPR 16
+#define MAXVL_B (2048 / 8)
+
+.arch_extension sve
+
+.macro _sve_ldr_v zt, xn
+ ldr z\zt, [x\xn]
+.endm
+
+.macro _sve_str_v zt, xn
+ str z\zt, [x\xn]
+.endm
+
+.macro _sve_ldr_p pt, xn
+ ldr p\pt, [x\xn]
+.endm
+
+.macro _sve_str_p pt, xn
+ str p\pt, [x\xn]
+.endm
+
+// Generate accessor functions to read/write programmatically selected
+// SVE registers.
+// x0 is the register index to access
+// x1 is the memory address to read from (getz,setp) or store to (setz,setp)
+// All clobber x0-x2
+define_accessor setz, NZR, _sve_ldr_v
+define_accessor getz, NZR, _sve_str_v
+define_accessor setp, NPR, _sve_ldr_p
+define_accessor getp, NPR, _sve_str_p
+
+// Declare some storate space to shadow the SVE register contents:
+.pushsection .text
+.data
+.align 4
+zref:
+ .space MAXVL_B * NZR
+pref:
+ .space MAXVL_B / 8 * NPR
+ffrref:
+ .space MAXVL_B / 8
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Generate a test pattern for storage in SVE registers
+// x0: pid (16 bits)
+// x1: register number (6 bits)
+// x2: generation (4 bits)
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:22 32-bit lane index
+// bits 21:16 register number
+// bits 15: 0 pid
+
+function pattern
+ orr w1, w0, w1, lsl #16
+ orr w2, w1, w2, lsl #28
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w2, [x0], #4
+ add w2, w2, #(1 << 22)
+ subs w1, w1, #1
+ bne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for SVE Z-register Z<xn>
+.macro _adrz xd, xn, nrtmp
+ ldr \xd, =zref
+ rdvl x\nrtmp, #1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Get the address of shadow data for SVE P-register P<xn - NZR>
+.macro _adrp xd, xn, nrtmp
+ ldr \xd, =pref
+ rdvl x\nrtmp, #1
+ lsr x\nrtmp, x\nrtmp, #3
+ sub \xn, \xn, #NZR
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a SVE Z-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_zreg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrz x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setz
+
+ ret x4
+endfunction
+
+// Set up test pattern in a SVE P-register
+// x0: pid
+// x1: register number
+// x2: generation
+function setup_preg
+ mov x4, x30
+
+ mov x6, x1
+ bl pattern
+ _adrp x0, x6, 2
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy
+
+ mov x0, x6
+ mov x1, x5
+ bl setp
+
+ ret x4
+endfunction
+
+// Set up test pattern in the FFR
+// x0: pid
+// x2: generation
+//
+// We need to generate a canonical FFR value, which consists of a number of
+// low "1" bits, followed by a number of zeros. This gives us 17 unique values
+// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
+// generation, and use that as the initial number of ones in the pattern.
+// We fill the upper lanes of FFR with zeros.
+// Beware: corrupts P0.
+function setup_ffr
+#ifndef SSVE
+ mov x4, x30
+
+ and w0, w0, #0x3
+ bfi w0, w2, #2, #2
+ mov w1, #1
+ lsl w1, w1, w0
+ sub w1, w1, #1
+
+ ldr x0, =ffrref
+ strh w1, [x0], 2
+ rdvl x1, #1
+ lsr x1, x1, #3
+ sub x1, x1, #2
+ bl memclr
+
+ mov x0, #0
+ ldr x1, =ffrref
+ bl setp
+
+ wrffr p0.b
+
+ ret x4
+#else
+ ret
+#endif
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a SVE Z-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_zreg
+ mov x3, x30
+
+ _adrz x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getz
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that a SVE P-register matches its shadow in memory, else abort
+// x0: reg number
+// Clobbers x0-x7.
+function check_preg
+ mov x3, x30
+
+ _adrp x5, x0, 6
+ mov x4, x0
+ ldr x7, =scratch
+
+ mov x0, x7
+ mov x1, x6
+ bl memfill_ae
+
+ mov x0, x4
+ mov x1, x7
+ bl getp
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Verify that the FFR matches its shadow in memory, else abort
+// Beware -- corrupts P0.
+// Clobbers x0-x5.
+function check_ffr
+#ifndef SSVE
+ mov x3, x30
+
+ ldr x4, =scratch
+ rdvl x5, #1
+ lsr x5, x5, #3
+
+ mov x0, x4
+ mov x1, x5
+ bl memfill_ae
+
+ rdffr p0.b
+ mov x0, #0
+ mov x1, x4
+ bl getp
+
+ ldr x0, =ffrref
+ mov x1, x4
+ mov x2, x5
+ mov x30, x3
+ b memcmp
+#else
+ ret
+#endif
+endfunction
+
+// Any SVE register modified here can cause corruption in the main
+// thread -- but *only* the registers modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random Z-regs
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+#ifndef SSVE
+ // And P0
+ rdffr p0.b
+ // And FFR
+ wrffr p15.b
+#endif
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ mov x23, #0 // Irritation signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+#ifdef SSVE
+ puts "Streaming mode "
+ smstart_sm
+#endif
+
+ // Sanity-check and report the vector length
+
+ rdvl x19, #8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "Bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "Vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+#ifdef SSVE
+ smstart_sm // syscalls will have exited streaming mode
+#endif
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdvl x0, #8
+ cmp x0, x19
+ b.ne vl_barf
+
+ mov x21, #0 // Set up Z-regs & shadow with test pattern
+0: mov x0, x20
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+ mov x0, x20 // Set up FFR & shadow with test pattern
+ mov x1, #NZR + NPR
+ and x2, x22, #0xf
+ bl setup_ffr
+
+0: mov x0, x20 // Set up P-regs & shadow with test pattern
+ mov x1, x21
+ and x2, x22, #0xf
+ bl setup_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+// Can't do this when SVE state is volatile across SVC:
+// mov x8, #__NR_sched_yield // Encourage preemption
+// svc #0
+
+#ifdef SSVE
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=0,SM=1
+ and x1, x0, #3
+ cmp x1, #1
+ b.ne svcr_barf
+#endif
+
+ mov x21, #0
+0: mov x0, x21
+ bl check_zreg
+ add x21, x21, #1
+ cmp x21, #NZR
+ b.lo 0b
+
+0: mov x0, x21
+ bl check_preg
+ add x21, x21, #1
+ cmp x21, #NZR + NPR
+ b.lo 0b
+
+ bl check_ffr
+
+ add x22, x22, #1
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+#ifdef SSVE
+ mrs x13, S3_3_C4_C2_2
+#endif
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", reg="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+
+#ifdef SSVE
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+#endif
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/vec-syscfg.c b/tools/testing/selftests/arm64/fp/vec-syscfg.c
new file mode 100644
index 000000000000..ea9c7d47790f
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vec-syscfg.c
@@ -0,0 +1,797 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/hwcap.h>
+
+#include "../../kselftest.h"
+#include "rdvl.h"
+
+#define ARCH_MIN_VL SVE_VL_MIN
+
+struct vec_data {
+ const char *name;
+ unsigned long hwcap_type;
+ unsigned long hwcap;
+ const char *rdvl_binary;
+ int (*rdvl)(void);
+
+ int prctl_get;
+ int prctl_set;
+ const char *default_vl_file;
+
+ int default_vl;
+ int min_vl;
+ int max_vl;
+};
+
+#define VEC_SVE 0
+#define VEC_SME 1
+
+static struct vec_data vec_data[] = {
+ [VEC_SVE] = {
+ .name = "SVE",
+ .hwcap_type = AT_HWCAP,
+ .hwcap = HWCAP_SVE,
+ .rdvl = rdvl_sve,
+ .rdvl_binary = "./rdvl-sve",
+ .prctl_get = PR_SVE_GET_VL,
+ .prctl_set = PR_SVE_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sve_default_vector_length",
+ },
+ [VEC_SME] = {
+ .name = "SME",
+ .hwcap_type = AT_HWCAP2,
+ .hwcap = HWCAP2_SME,
+ .rdvl = rdvl_sme,
+ .rdvl_binary = "./rdvl-sme",
+ .prctl_get = PR_SME_GET_VL,
+ .prctl_set = PR_SME_SET_VL,
+ .default_vl_file = "/proc/sys/abi/sme_default_vector_length",
+ },
+};
+
+static bool vec_type_supported(struct vec_data *data)
+{
+ return getauxval(data->hwcap_type) & data->hwcap;
+}
+
+static int stdio_read_integer(FILE *f, const char *what, int *val)
+{
+ int n = 0;
+ int ret;
+
+ ret = fscanf(f, "%d%*1[\n]%n", val, &n);
+ if (ret < 1 || n < 1) {
+ ksft_print_msg("failed to parse integer from %s\n", what);
+ return -1;
+ }
+
+ return 0;
+}
+
+/* Start a new process and return the vector length it sees */
+static int get_child_rdvl(struct vec_data *data)
+{
+ FILE *out;
+ int pipefd[2];
+ pid_t pid, child;
+ int read_vl, ret;
+
+ ret = pipe(pipefd);
+ if (ret == -1) {
+ ksft_print_msg("pipe() failed: %d (%s)\n",
+ errno, strerror(errno));
+ return -1;
+ }
+
+ fflush(stdout);
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("fork() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ close(pipefd[1]);
+ return -1;
+ }
+
+ /* Child: put vector length on the pipe */
+ if (child == 0) {
+ /*
+ * Replace stdout with the pipe, errors to stderr from
+ * here as kselftest prints to stdout.
+ */
+ ret = dup2(pipefd[1], 1);
+ if (ret == -1) {
+ fprintf(stderr, "dup2() %d\n", errno);
+ exit(EXIT_FAILURE);
+ }
+
+ /* exec() a new binary which puts the VL on stdout */
+ ret = execl(data->rdvl_binary, data->rdvl_binary, NULL);
+ fprintf(stderr, "execl(%s) failed: %d (%s)\n",
+ data->rdvl_binary, errno, strerror(errno));
+
+ exit(EXIT_FAILURE);
+ }
+
+ close(pipefd[1]);
+
+ /* Parent; wait for the exit status from the child & verify it */
+ do {
+ pid = wait(&ret);
+ if (pid == -1) {
+ ksft_print_msg("wait() failed: %d (%s)\n",
+ errno, strerror(errno));
+ close(pipefd[0]);
+ return -1;
+ }
+ } while (pid != child);
+
+ assert(pid == child);
+
+ if (!WIFEXITED(ret)) {
+ ksft_print_msg("child exited abnormally\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ if (WEXITSTATUS(ret) != 0) {
+ ksft_print_msg("child returned error %d\n",
+ WEXITSTATUS(ret));
+ close(pipefd[0]);
+ return -1;
+ }
+
+ out = fdopen(pipefd[0], "r");
+ if (!out) {
+ ksft_print_msg("failed to open child stdout\n");
+ close(pipefd[0]);
+ return -1;
+ }
+
+ ret = stdio_read_integer(out, "child", &read_vl);
+ fclose(out);
+ if (ret != 0)
+ return ret;
+
+ return read_vl;
+}
+
+static int file_read_integer(const char *name, int *val)
+{
+ FILE *f;
+ int ret;
+
+ f = fopen(name, "r");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ ret = stdio_read_integer(f, name, val);
+ fclose(f);
+
+ return ret;
+}
+
+static int file_write_integer(const char *name, int val)
+{
+ FILE *f;
+
+ f = fopen(name, "w");
+ if (!f) {
+ ksft_test_result_fail("Unable to open %s: %d (%s)\n",
+ name, errno,
+ strerror(errno));
+ return -1;
+ }
+
+ fprintf(f, "%d", val);
+ fclose(f);
+
+ return 0;
+}
+
+/*
+ * Verify that we can read the default VL via proc, checking that it
+ * is set in a freshly spawned child.
+ */
+static void proc_read_default(struct vec_data *data)
+{
+ int default_vl, child_vl, ret;
+
+ ret = file_read_integer(data->default_vl_file, &default_vl);
+ if (ret != 0)
+ return;
+
+ /* Is this the actual default seen by new processes? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != default_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ default_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s default vector length %d\n", data->name,
+ default_vl);
+ data->default_vl = default_vl;
+}
+
+/* Verify that we can write a minimum value and have it take effect */
+static void proc_write_min(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ ret = file_write_integer(data->default_vl_file, ARCH_MIN_VL);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s minimum vector length %d\n", data->name,
+ new_default);
+ data->min_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Verify that we can write a maximum value and have it take effect */
+static void proc_write_max(struct vec_data *data)
+{
+ int ret, new_default, child_vl;
+
+ if (geteuid() != 0) {
+ ksft_test_result_skip("Need to be root to write to /proc\n");
+ return;
+ }
+
+ /* -1 is accepted by the /proc interface as the maximum VL */
+ ret = file_write_integer(data->default_vl_file, -1);
+ if (ret != 0)
+ return;
+
+ /* What was the new value? */
+ ret = file_read_integer(data->default_vl_file, &new_default);
+ if (ret != 0)
+ return;
+
+ /* Did it take effect in a new process? */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != new_default) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ new_default, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s maximum vector length %d\n", data->name,
+ new_default);
+ data->max_vl = new_default;
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* Can we read back a VL from prctl? */
+static void prctl_get(struct vec_data *data)
+{
+ int ret;
+
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ /* Mask out any flags */
+ ret &= PR_SVE_VL_LEN_MASK;
+
+ /* Is that what we can read back directly? */
+ if (ret == data->rdvl())
+ ksft_test_result_pass("%s current VL is %d\n",
+ data->name, ret);
+ else
+ ksft_test_result_fail("%s prctl() VL %d but RDVL is %d\n",
+ data->name, ret, data->rdvl());
+}
+
+/* Does the prctl let us set the VL we already have? */
+static void prctl_set_same(struct vec_data *data)
+{
+ int cur_vl = data->rdvl();
+ int ret;
+
+ ret = prctl(data->prctl_set, cur_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ ksft_test_result(cur_vl == data->rdvl(),
+ "%s set VL %d and have VL %d\n",
+ data->name, cur_vl, data->rdvl());
+}
+
+/* Can we set a new VL for this process? */
+static void prctl_set(struct vec_data *data)
+{
+ int ret;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Try to set the minimum VL */
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->min_vl) {
+ ksft_test_result_fail("%s prctl set %d but return value is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ if (data->rdvl() != data->min_vl) {
+ ksft_test_result_fail("%s set %d but RDVL is %d\n",
+ data->name, data->min_vl, data->rdvl());
+ return;
+ }
+
+ /* Try to set the maximum VL */
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->max_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ if ((ret & PR_SVE_VL_LEN_MASK) != data->max_vl) {
+ ksft_test_result_fail("%s prctl() set %d but return value is %d\n",
+ data->name, data->max_vl, data->rdvl());
+ return;
+ }
+
+ /* The _INHERIT flag should not be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+
+ if (ret & PR_SVE_VL_INHERIT) {
+ ksft_test_result_fail("%s prctl() reports _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ ksft_test_result_pass("%s prctl() set min/max\n", data->name);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_no_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child has the default we just set */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->max_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->max_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length used default\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* If we didn't request it a new VL shouldn't affect the child */
+static void prctl_set_for_child(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_VL_INHERIT);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* The _INHERIT flag should be present when we read the VL */
+ ret = prctl(data->prctl_get);
+ if (ret == -1) {
+ ksft_test_result_fail("%s prctl() read failed: %d (%s)\n",
+ data->name, errno, strerror(errno));
+ return;
+ }
+ if (!(ret & PR_SVE_VL_INHERIT)) {
+ ksft_test_result_fail("%s prctl() does not report _INHERIT\n",
+ data->name);
+ return;
+ }
+
+ /* Ensure the default VL is different */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("%s is %d but child VL is %d\n",
+ data->default_vl_file,
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length was inherited\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* _ONEXEC takes effect only in the child process */
+static void prctl_set_onexec(struct vec_data *data)
+{
+ int ret, child_vl;
+
+ if (data->min_vl == data->max_vl) {
+ ksft_test_result_skip("%s only one VL supported\n",
+ data->name);
+ return;
+ }
+
+ /* Set a known value for the default and our current VL */
+ ret = file_write_integer(data->default_vl_file, data->max_vl);
+ if (ret != 0)
+ return;
+
+ ret = prctl(data->prctl_set, data->max_vl);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Set a different value for the child to have on exec */
+ ret = prctl(data->prctl_set, data->min_vl | PR_SVE_SET_VL_ONEXEC);
+ if (ret < 0) {
+ ksft_test_result_fail("%s prctl set failed for %d: %d (%s)\n",
+ data->name, data->min_vl,
+ errno, strerror(errno));
+ return;
+ }
+
+ /* Our current VL should stay the same */
+ if (data->rdvl() != data->max_vl) {
+ ksft_test_result_fail("%s VL changed by _ONEXEC prctl()\n",
+ data->name);
+ return;
+ }
+
+ /* Check that the child inherited our VL */
+ child_vl = get_child_rdvl(data);
+ if (child_vl != data->min_vl) {
+ ksft_test_result_fail("Set %d _ONEXEC but child VL is %d\n",
+ data->min_vl, child_vl);
+ return;
+ }
+
+ ksft_test_result_pass("%s vector length set on exec\n", data->name);
+
+ file_write_integer(data->default_vl_file, data->default_vl);
+}
+
+/* For each VQ verify that setting via prctl() does the right thing */
+static void prctl_set_all_vqs(struct vec_data *data)
+{
+ int ret, vq, vl, new_vl, i;
+ int orig_vls[ARRAY_SIZE(vec_data)];
+ int errors = 0;
+
+ if (!data->min_vl || !data->max_vl) {
+ ksft_test_result_skip("%s Failed to enumerate VLs, not testing VL setting\n",
+ data->name);
+ return;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ if (!vec_type_supported(&vec_data[i]))
+ continue;
+ orig_vls[i] = vec_data[i].rdvl();
+ }
+
+ for (vq = SVE_VQ_MIN; vq <= SVE_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* Attempt to set the VL */
+ ret = prctl(data->prctl_set, vl);
+ if (ret < 0) {
+ errors++;
+ ksft_print_msg("%s prctl set failed for %d: %d (%s)\n",
+ data->name, vl,
+ errno, strerror(errno));
+ continue;
+ }
+
+ new_vl = ret & PR_SVE_VL_LEN_MASK;
+
+ /* Check that we actually have the reported new VL */
+ if (data->rdvl() != new_vl) {
+ ksft_print_msg("Set %s VL %d but RDVL reports %d\n",
+ data->name, new_vl, data->rdvl());
+ errors++;
+ }
+
+ /* Did any other VLs change? */
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ if (&vec_data[i] == data)
+ continue;
+
+ if (!vec_type_supported(&vec_data[i]))
+ continue;
+
+ if (vec_data[i].rdvl() != orig_vls[i]) {
+ ksft_print_msg("%s VL changed from %d to %d\n",
+ vec_data[i].name, orig_vls[i],
+ vec_data[i].rdvl());
+ errors++;
+ }
+ }
+
+ /* Was that the VL we asked for? */
+ if (new_vl == vl)
+ continue;
+
+ /* Should round up to the minimum VL if below it */
+ if (vl < data->min_vl) {
+ if (new_vl != data->min_vl) {
+ ksft_print_msg("%s VL %d returned %d not minimum %d\n",
+ data->name, vl, new_vl,
+ data->min_vl);
+ errors++;
+ }
+
+ continue;
+ }
+
+ /* Should round down to maximum VL if above it */
+ if (vl > data->max_vl) {
+ if (new_vl != data->max_vl) {
+ ksft_print_msg("%s VL %d returned %d not maximum %d\n",
+ data->name, vl, new_vl,
+ data->max_vl);
+ errors++;
+ }
+
+ continue;
+ }
+
+ /* Otherwise we should've rounded down */
+ if (!(new_vl < vl)) {
+ ksft_print_msg("%s VL %d returned %d, did not round down\n",
+ data->name, vl, new_vl);
+ errors++;
+
+ continue;
+ }
+ }
+
+ ksft_test_result(errors == 0, "%s prctl() set all VLs, %d errors\n",
+ data->name, errors);
+}
+
+typedef void (*test_type)(struct vec_data *);
+
+static const test_type tests[] = {
+ /*
+ * The default/min/max tests must be first and in this order
+ * to provide data for other tests.
+ */
+ proc_read_default,
+ proc_write_min,
+ proc_write_max,
+
+ prctl_get,
+ prctl_set_same,
+ prctl_set,
+ prctl_set_no_child,
+ prctl_set_for_child,
+ prctl_set_onexec,
+ prctl_set_all_vqs,
+};
+
+static inline void smstart(void)
+{
+ asm volatile("msr S0_3_C4_C7_3, xzr");
+}
+
+static inline void smstart_sm(void)
+{
+ asm volatile("msr S0_3_C4_C3_3, xzr");
+}
+
+static inline void smstop(void)
+{
+ asm volatile("msr S0_3_C4_C6_3, xzr");
+}
+
+
+/*
+ * Verify we can change the SVE vector length while SME is active and
+ * continue to use SME afterwards.
+ */
+static void change_sve_with_za(void)
+{
+ struct vec_data *sve_data = &vec_data[VEC_SVE];
+ bool pass = true;
+ int ret, i;
+
+ if (sve_data->min_vl == sve_data->max_vl) {
+ ksft_print_msg("Only one SVE VL supported, can't change\n");
+ ksft_test_result_skip("change_sve_while_sme\n");
+ return;
+ }
+
+ /* Ensure we will trigger a change when we set the maximum */
+ ret = prctl(sve_data->prctl_set, sve_data->min_vl);
+ if (ret != sve_data->min_vl) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ sve_data->min_vl, ret);
+ pass = false;
+ }
+
+ /* Enable SM and ZA */
+ smstart();
+
+ /* Trigger another VL change */
+ ret = prctl(sve_data->prctl_set, sve_data->max_vl);
+ if (ret != sve_data->max_vl) {
+ ksft_print_msg("Failed to set SVE VL %d: %d\n",
+ sve_data->max_vl, ret);
+ pass = false;
+ }
+
+ /*
+ * Spin for a bit with SM enabled to try to trigger another
+ * save/restore. We can't use syscalls without exiting
+ * streaming mode.
+ */
+ for (i = 0; i < 100000000; i++)
+ smstart_sm();
+
+ /*
+ * TODO: Verify that ZA was preserved over the VL change and
+ * spin.
+ */
+
+ /* Clean up after ourselves */
+ smstop();
+ ret = prctl(sve_data->prctl_set, sve_data->default_vl);
+ if (ret != sve_data->default_vl) {
+ ksft_print_msg("Failed to restore SVE VL %d: %d\n",
+ sve_data->default_vl, ret);
+ pass = false;
+ }
+
+ ksft_test_result(pass, "change_sve_with_za\n");
+}
+
+typedef void (*test_all_type)(void);
+
+static const struct {
+ const char *name;
+ test_all_type test;
+} all_types_tests[] = {
+ { "change_sve_with_za", change_sve_with_za },
+};
+
+int main(void)
+{
+ bool all_supported = true;
+ int i, j;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests) * ARRAY_SIZE(vec_data) +
+ ARRAY_SIZE(all_types_tests));
+
+ for (i = 0; i < ARRAY_SIZE(vec_data); i++) {
+ struct vec_data *data = &vec_data[i];
+ unsigned long supported;
+
+ supported = vec_type_supported(data);
+ if (!supported)
+ all_supported = false;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (supported)
+ tests[j](data);
+ else
+ ksft_test_result_skip("%s not supported\n",
+ data->name);
+ }
+ }
+
+ for (i = 0; i < ARRAY_SIZE(all_types_tests); i++) {
+ if (all_supported)
+ all_types_tests[i].test();
+ else
+ ksft_test_result_skip("%s\n", all_types_tests[i].name);
+ }
+
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/arm64/fp/vlset.c b/tools/testing/selftests/arm64/fp/vlset.c
new file mode 100644
index 000000000000..76912a581a95
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/vlset.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2015-2019 ARM Limited.
+ * Original author: Dave Martin <Dave.Martin@arm.com>
+ */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <getopt.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <asm/hwcap.h>
+#include <asm/sigcontext.h>
+
+static int inherit = 0;
+static int no_inherit = 0;
+static int force = 0;
+static unsigned long vl;
+static int set_ctl = PR_SVE_SET_VL;
+static int get_ctl = PR_SVE_GET_VL;
+
+static const struct option options[] = {
+ { "force", no_argument, NULL, 'f' },
+ { "inherit", no_argument, NULL, 'i' },
+ { "max", no_argument, NULL, 'M' },
+ { "no-inherit", no_argument, &no_inherit, 1 },
+ { "sme", no_argument, NULL, 's' },
+ { "help", no_argument, NULL, '?' },
+ {}
+};
+
+static char const *program_name;
+
+static int parse_options(int argc, char **argv)
+{
+ int c;
+ char *rest;
+
+ program_name = strrchr(argv[0], '/');
+ if (program_name)
+ ++program_name;
+ else
+ program_name = argv[0];
+
+ while ((c = getopt_long(argc, argv, "Mfhi", options, NULL)) != -1)
+ switch (c) {
+ case 'M': vl = SVE_VL_MAX; break;
+ case 'f': force = 1; break;
+ case 'i': inherit = 1; break;
+ case 's': set_ctl = PR_SME_SET_VL;
+ get_ctl = PR_SME_GET_VL;
+ break;
+ case 0: break;
+ default: goto error;
+ }
+
+ if (inherit && no_inherit)
+ goto error;
+
+ if (!vl) {
+ /* vector length */
+ if (optind >= argc)
+ goto error;
+
+ errno = 0;
+ vl = strtoul(argv[optind], &rest, 0);
+ if (*rest) {
+ vl = ULONG_MAX;
+ errno = EINVAL;
+ }
+ if (vl == ULONG_MAX && errno) {
+ fprintf(stderr, "%s: %s: %s\n",
+ program_name, argv[optind], strerror(errno));
+ goto error;
+ }
+
+ ++optind;
+ }
+
+ /* command */
+ if (optind >= argc)
+ goto error;
+
+ return 0;
+
+error:
+ fprintf(stderr,
+ "Usage: %s [-f | --force] "
+ "[-i | --inherit | --no-inherit] "
+ "{-M | --max | <vector length>} "
+ "<command> [<arguments> ...]\n",
+ program_name);
+ return -1;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 126; /* same as sh(1) command-not-executable error */
+ long flags;
+ char *path;
+ int t, e;
+
+ if (parse_options(argc, argv))
+ return 2; /* same as sh(1) builtin incorrect-usage */
+
+ if (vl & ~(vl & PR_SVE_VL_LEN_MASK)) {
+ fprintf(stderr, "%s: Invalid vector length %lu\n",
+ program_name, vl);
+ return 2; /* same as sh(1) builtin incorrect-usage */
+ }
+
+ if (!(getauxval(AT_HWCAP) & HWCAP_SVE)) {
+ fprintf(stderr, "%s: Scalable Vector Extension not present\n",
+ program_name);
+
+ if (!force)
+ goto error;
+
+ fputs("Going ahead anyway (--force): "
+ "This is a debug option. Don't rely on it.\n",
+ stderr);
+ }
+
+ flags = PR_SVE_SET_VL_ONEXEC;
+ if (inherit)
+ flags |= PR_SVE_VL_INHERIT;
+
+ t = prctl(set_ctl, vl | flags);
+ if (t < 0) {
+ fprintf(stderr, "%s: PR_SVE_SET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+
+ t = prctl(get_ctl);
+ if (t == -1) {
+ fprintf(stderr, "%s: PR_SVE_GET_VL: %s\n",
+ program_name, strerror(errno));
+ goto error;
+ }
+ flags = PR_SVE_VL_LEN_MASK;
+ flags = t & ~flags;
+
+ assert(optind < argc);
+ path = argv[optind];
+
+ execvp(path, &argv[optind]);
+ e = errno;
+ if (errno == ENOENT)
+ ret = 127; /* same as sh(1) not-found error */
+ fprintf(stderr, "%s: %s: %s\n", program_name, path, strerror(e));
+
+error:
+ return ret; /* same as sh(1) not-executable error */
+}
diff --git a/tools/testing/selftests/arm64/fp/za-fork-asm.S b/tools/testing/selftests/arm64/fp/za-fork-asm.S
new file mode 100644
index 000000000000..2fafadd491c3
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork-asm.S
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAGIC 42
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+.pushsection .text
+.data
+.align 4
+scratch:
+ .space MAXVL_B
+.popsection
+
+.globl fork_test
+fork_test:
+ smstart_za
+
+ // For simplicity just set one word in one vector, other tests
+ // cover general data corruption issues.
+ ldr x0, =scratch
+ mov x1, #MAGIC
+ str x1, [x0]
+ mov w12, wzr
+ _ldr_za 12, 0 // ZA.H[W12] loaded from [X0]
+
+ // Tail call into the C portion that does the fork & verify
+ b fork_test_c
+
+.globl verify_fork
+verify_fork:
+ // SVCR should have ZA=1, SM=0
+ mrs x0, S3_3_C4_C2_2
+ and x1, x0, #3
+ cmp x1, #2
+ beq 1f
+ mov x0, xzr
+ b 100f
+1:
+
+ // ZA should still have the value we loaded
+ ldr x0, =scratch
+ mov w12, wzr
+ _str_za 12, 0 // ZA.H[W12] stored to [X0]
+ ldr x1, [x0]
+ cmp x1, #MAGIC
+ beq 2f
+ mov x0, xzr
+ b 100f
+
+2:
+ // All tests passed
+ mov x0, #1
+100:
+ ret
+
diff --git a/tools/testing/selftests/arm64/fp/za-fork.c b/tools/testing/selftests/arm64/fp/za-fork.c
new file mode 100644
index 000000000000..587b94648222
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-fork.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 ARM Limited.
+ * Original author: Mark Brown <broonie@kernel.org>
+ */
+
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "kselftest.h"
+
+#define EXPECTED_TESTS 1
+
+int fork_test(void);
+int verify_fork(void);
+
+/*
+ * If we fork the value in the parent should be unchanged and the
+ * child should start with the same value. This is called from the
+ * fork_test() asm function.
+ */
+int fork_test_c(void)
+{
+ pid_t newpid, waiting;
+ int child_status, parent_result;
+
+ newpid = fork();
+ if (newpid == 0) {
+ /* In child */
+ if (!verify_fork()) {
+ ksft_print_msg("ZA state invalid in child\n");
+ exit(0);
+ } else {
+ exit(1);
+ }
+ }
+ if (newpid < 0) {
+ ksft_print_msg("fork() failed: %d\n", newpid);
+
+ return 0;
+ }
+
+ parent_result = verify_fork();
+ if (!parent_result)
+ ksft_print_msg("ZA state invalid in parent\n");
+
+ for (;;) {
+ waiting = waitpid(newpid, &child_status, 0);
+
+ if (waiting < 0) {
+ if (errno == EINTR)
+ continue;
+ ksft_print_msg("waitpid() failed: %d\n", errno);
+ return 0;
+ }
+ if (waiting != newpid) {
+ ksft_print_msg("waitpid() returned wrong PID\n");
+ return 0;
+ }
+
+ if (!WIFEXITED(child_status)) {
+ ksft_print_msg("child did not exit\n");
+ return 0;
+ }
+
+ return WEXITSTATUS(child_status) && parent_result;
+ }
+}
+
+int main(int argc, char **argv)
+{
+ int ret, i;
+
+ ksft_print_header();
+ ksft_set_plan(EXPECTED_TESTS);
+
+ ksft_print_msg("PID: %d\n", getpid());
+
+ /*
+ * This test is run with nolibc which doesn't support hwcap and
+ * it's probably disproportionate to implement so instead check
+ * for the default vector length configuration in /proc.
+ */
+ ret = open("/proc/sys/abi/sme_default_vector_length", O_RDONLY, 0);
+ if (ret >= 0) {
+ ksft_test_result(fork_test(), "fork_test\n");
+
+ } else {
+ ksft_print_msg("SME not supported\n");
+ for (i = 0; i < EXPECTED_TESTS; i++) {
+ ksft_test_result_skip("fork_test\n");
+ }
+ }
+
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-ptrace.c b/tools/testing/selftests/arm64/fp/za-ptrace.c
new file mode 100644
index 000000000000..ac27d87396fc
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-ptrace.c
@@ -0,0 +1,366 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+
+/*
+ * The architecture defines the maximum VQ as 16 but for extensibility
+ * the kernel specifies the SVE_VQ_MAX as 512 resulting in us running
+ * a *lot* more tests than are useful if we use it. Until the
+ * architecture is extended let's limit our coverage to what is
+ * currently allowed, plus one extra to ensure we cover constraining
+ * the VL as expected.
+ */
+#define TEST_VQ_MAX 17
+
+#define EXPECTED_TESTS (((TEST_VQ_MAX - SVE_VQ_MIN) + 1) * 3)
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+ struct user_za_header *za;
+ void *p;
+ size_t sz = sizeof(*za);
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+ goto error;
+
+ za = *buf;
+ if (za->size <= sz)
+ break;
+
+ sz = za->size;
+ }
+
+ return za;
+
+error:
+ return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)za;
+ iov.iov_len = za->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+/* Validate attempting to set the specfied VL via ptrace */
+static void ptrace_set_get_vl(pid_t child, unsigned int vl, bool *supported)
+{
+ struct user_za_header za;
+ struct user_za_header *new_za = NULL;
+ size_t new_za_size = 0;
+ int ret, prctl_vl;
+
+ *supported = false;
+
+ /* Check if the VL is supported in this process */
+ prctl_vl = prctl(PR_SME_SET_VL, vl);
+ if (prctl_vl == -1)
+ ksft_exit_fail_msg("prctl(PR_SME_SET_VL) failed: %s (%d)\n",
+ strerror(errno), errno);
+
+ /* If the VL is not supported then a supported VL will be returned */
+ *supported = (prctl_vl == vl);
+
+ /* Set the VL by doing a set with no register payload */
+ memset(&za, 0, sizeof(za));
+ za.size = sizeof(za);
+ za.vl = vl;
+ ret = set_za(child, &za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u\n", vl);
+ return;
+ }
+
+ /*
+ * Read back the new register state and verify that we have the
+ * same VL that we got from prctl() on ourselves.
+ */
+ if (!get_za(child, (void **)&new_za, &new_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u\n", vl);
+ return;
+ }
+
+ ksft_test_result(new_za->vl = prctl_vl, "Set VL %u\n", vl);
+
+ free(new_za);
+}
+
+/* Validate attempting to set no ZA data and read it back */
+static void ptrace_set_no_data(pid_t child, unsigned int vl)
+{
+ void *read_buf = NULL;
+ struct user_za_header write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ int ret;
+
+ /* Set up some data and write it out */
+ memset(&write_za, 0, sizeof(write_za));
+ write_za.size = ZA_PT_ZA_OFFSET;
+ write_za.vl = vl;
+
+ ret = set_za(child, &write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u no data\n", vl);
+ return;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u no data\n", vl);
+ return;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za.size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za.size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(read_za->size == write_za.size,
+ "Disabled ZA for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+}
+
+/* Validate attempting to set data and read it back */
+static void ptrace_set_get_data(pid_t child, unsigned int vl)
+{
+ void *write_buf;
+ void *read_buf = NULL;
+ struct user_za_header *write_za;
+ struct user_za_header *read_za;
+ size_t read_za_size = 0;
+ unsigned int vq = sve_vq_from_vl(vl);
+ int ret;
+ size_t data_size;
+
+ data_size = ZA_PT_SIZE(vq);
+ write_buf = malloc(data_size);
+ if (!write_buf) {
+ ksft_test_result_fail("Error allocating %d byte buffer for VL %u\n",
+ data_size, vl);
+ return;
+ }
+ write_za = write_buf;
+
+ /* Set up some data and write it out */
+ memset(write_za, 0, data_size);
+ write_za->size = data_size;
+ write_za->vl = vl;
+
+ fill_buf(write_buf + ZA_PT_ZA_OFFSET, ZA_PT_ZA_SIZE(vq));
+
+ ret = set_za(child, write_za);
+ if (ret != 0) {
+ ksft_test_result_fail("Failed to set VL %u data\n", vl);
+ goto out;
+ }
+
+ /* Read the data back */
+ if (!get_za(child, (void **)&read_buf, &read_za_size)) {
+ ksft_test_result_fail("Failed to read VL %u data\n", vl);
+ goto out;
+ }
+ read_za = read_buf;
+
+ /* We might read more data if there's extensions we don't know */
+ if (read_za->size < write_za->size) {
+ ksft_test_result_fail("VL %u wrote %d bytes, only read %d\n",
+ vl, write_za->size, read_za->size);
+ goto out_read;
+ }
+
+ ksft_test_result(memcmp(write_buf + ZA_PT_ZA_OFFSET,
+ read_buf + ZA_PT_ZA_OFFSET,
+ ZA_PT_ZA_SIZE(vq)) == 0,
+ "Data match for VL %u\n", vl);
+
+out_read:
+ free(read_buf);
+out:
+ free(write_buf);
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+ unsigned int vq, vl;
+ bool vl_supported;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ /* Step through every possible VQ */
+ for (vq = SVE_VQ_MIN; vq <= TEST_VQ_MAX; vq++) {
+ vl = sve_vl_from_vq(vq);
+
+ /* First, try to set this vector length */
+ ptrace_set_get_vl(child, vl, &vl_supported);
+
+ /* If the VL is supported validate data set/get */
+ if (vl_supported) {
+ ptrace_set_no_data(child, vl);
+ ptrace_set_get_data(child, vl);
+ } else {
+ ksft_test_result_skip("Disabled ZA for VL %u\n", vl);
+ ksft_test_result_skip("Get and set data for VL %u\n",
+ vl);
+ }
+ }
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME)) {
+ ksft_set_plan(1);
+ ksft_exit_skip("SME not available\n");
+ }
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/za-stress b/tools/testing/selftests/arm64/fp/za-stress
new file mode 100644
index 000000000000..5ac386b55b95
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-stress
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+# Copyright (C) 2015-2019 ARM Limited.
+# Original author: Dave Martin <Dave.Martin@arm.com>
+
+set -ue
+
+NR_CPUS=`nproc`
+
+pids=
+logs=
+
+cleanup () {
+ trap - INT TERM CHLD
+ set +e
+
+ if [ -n "$pids" ]; then
+ kill $pids
+ wait $pids
+ pids=
+ fi
+
+ if [ -n "$logs" ]; then
+ cat $logs
+ rm $logs
+ logs=
+ fi
+}
+
+interrupt () {
+ cleanup
+ exit 0
+}
+
+child_died () {
+ cleanup
+ exit 1
+}
+
+trap interrupt INT TERM EXIT
+
+for x in `seq 0 $((NR_CPUS * 4))`; do
+ log=`mktemp`
+ logs=$logs\ $log
+ ./za-test >$log &
+ pids=$pids\ $!
+done
+
+# Wait for all child processes to be created:
+sleep 10
+
+while :; do
+ kill -USR1 $pids
+done &
+pids=$pids\ $!
+
+wait
+
+exit 1
diff --git a/tools/testing/selftests/arm64/fp/za-test.S b/tools/testing/selftests/arm64/fp/za-test.S
new file mode 100644
index 000000000000..095b45531640
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/za-test.S
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZA context switch test
+// Repeatedly writes unique test patterns into each ZA tile
+// and reads them back to verify integrity.
+//
+// for x in `seq 1 NR_CPUS`; do sve-test & pids=$pids\ $! ; done
+// (leave it running for as long as you want...)
+// kill $pids
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define MAXVL 2048
+#define MAXVL_B (MAXVL / 8)
+
+// Declare some storage space to shadow ZA register contents and a
+// scratch buffer for a vector.
+.pushsection .text
+.data
+.align 4
+zaref:
+ .space MAXVL_B * MAXVL_B
+scratch:
+ .space MAXVL_B
+.popsection
+
+// Trivial memory copy: copy x2 bytes, starting at address x1, to address x0.
+// Clobbers x0-x3
+function memcpy
+ cmp x2, #0
+ b.eq 1f
+0: ldrb w3, [x1], #1
+ strb w3, [x0], #1
+ subs x2, x2, #1
+ b.ne 0b
+1: ret
+endfunction
+
+// Generate a test pattern for storage in ZA
+// x0: pid
+// x1: row in ZA
+// x2: generation
+
+// These values are used to constuct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:28 generation number (increments once per test_loop)
+// bits 27:16 pid
+// bits 15: 8 row number
+// bits 7: 0 32-bit lane index
+
+function pattern
+ mov w3, wzr
+ bfi w3, w0, #16, #12 // PID
+ bfi w3, w1, #8, #8 // Row
+ bfi w3, w2, #28, #4 // Generation
+
+ ldr x0, =scratch
+ mov w1, #MAXVL_B / 4
+
+0: str w3, [x0], #4
+ add w3, w3, #1 // Lane
+ subs w1, w1, #1
+ b.ne 0b
+
+ ret
+endfunction
+
+// Get the address of shadow data for ZA horizontal vector xn
+.macro _adrza xd, xn, nrtmp
+ ldr \xd, =zaref
+ rdsvl \nrtmp, 1
+ madd \xd, x\nrtmp, \xn, \xd
+.endm
+
+// Set up test pattern in a ZA horizontal vector
+// x0: pid
+// x1: row number
+// x2: generation
+function setup_za
+ mov x4, x30
+ mov x12, x1 // Use x12 for vector select
+
+ bl pattern // Get pattern in scratch buffer
+ _adrza x0, x12, 2 // Shadow buffer pointer to x0 and x5
+ mov x5, x0
+ ldr x1, =scratch
+ bl memcpy // length set up in x2 by _adrza
+
+ _ldr_za 12, 5 // load vector w12 from pointer x5
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a ZA vector matches its shadow in memory, else abort
+// x0: row number
+// Clobbers x0-x7 and x12.
+function check_za
+ mov x3, x30
+
+ mov x12, x0
+ _adrza x5, x0, 6 // pointer to expected value in x5
+ mov x4, x0
+ ldr x7, =scratch // x7 is scratch
+
+ mov x0, x7 // Poison scratch
+ mov x1, x6
+ bl memfill_ae
+
+ _str_za 12, 7 // save vector w12 to pointer x7
+
+ mov x0, x5
+ mov x1, x7
+ mov x2, x6
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random ZA data
+#if 0
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+#endif
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ puts "Streaming mode "
+ smstart_za
+
+ // Sanity-check and report the vector length
+
+ rdsvl 19, 8
+ cmp x19, #128
+ b.lo 1f
+ cmp x19, #2048
+ b.hi 1f
+ tst x19, #(8 - 1)
+ b.eq 2f
+
+1: puts "bad vector length: "
+ mov x0, x19
+ bl putdecn
+ b .Labort
+
+2: puts "vector length:\t"
+ mov x0, x19
+ bl putdec
+ puts " bits\n"
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ rdsvl 0, 8
+ cmp x0, x19
+ b.ne vl_barf
+
+ rdsvl 21, 1 // Set up ZA & shadow with test pattern
+0: mov x0, x20
+ sub x1, x21, #1
+ mov x2, x22
+ bl setup_za
+ subs x21, x21, #1
+ b.ne 0b
+
+ mov x8, #__NR_sched_yield // encourage preemption
+1:
+ svc #0
+
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
+ and x1, x0, #3
+ cmp x1, #2
+ b.ne svcr_barf
+
+ rdsvl 21, 1 // Verify that the data made it through
+ rdsvl 24, 1 // Verify that the data made it through
+0: sub x0, x24, x21
+ bl check_za
+ subs x21, x21, #1
+ bne 0b
+
+ add x22, x22, #1 // Everything still working
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+
+ mrs x13, S3_3_C4_C2_2
+
+ smstop
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts ", row="
+ mov x0, x21
+ bl putdecn
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function vl_barf
+ mov x10, x0
+
+ puts "Bad active VL: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/fp/zt-ptrace.c b/tools/testing/selftests/arm64/fp/zt-ptrace.c
new file mode 100644
index 000000000000..996d9614a131
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/zt-ptrace.c
@@ -0,0 +1,365 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 ARM Limited.
+ */
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/uio.h>
+#include <sys/wait.h>
+#include <asm/sigcontext.h>
+#include <asm/ptrace.h>
+
+#include "../../kselftest.h"
+
+/* <linux/elf.h> and <sys/auxv.h> don't like each other, so: */
+#ifndef NT_ARM_ZA
+#define NT_ARM_ZA 0x40c
+#endif
+#ifndef NT_ARM_ZT
+#define NT_ARM_ZT 0x40d
+#endif
+
+#define EXPECTED_TESTS 3
+
+static int sme_vl;
+
+static void fill_buf(char *buf, size_t size)
+{
+ int i;
+
+ for (i = 0; i < size; i++)
+ buf[i] = random();
+}
+
+static int do_child(void)
+{
+ if (ptrace(PTRACE_TRACEME, -1, NULL, NULL))
+ ksft_exit_fail_msg("PTRACE_TRACEME", strerror(errno));
+
+ if (raise(SIGSTOP))
+ ksft_exit_fail_msg("raise(SIGSTOP)", strerror(errno));
+
+ return EXIT_SUCCESS;
+}
+
+static struct user_za_header *get_za(pid_t pid, void **buf, size_t *size)
+{
+ struct user_za_header *za;
+ void *p;
+ size_t sz = sizeof(*za);
+ struct iovec iov;
+
+ while (1) {
+ if (*size < sz) {
+ p = realloc(*buf, sz);
+ if (!p) {
+ errno = ENOMEM;
+ goto error;
+ }
+
+ *buf = p;
+ *size = sz;
+ }
+
+ iov.iov_base = *buf;
+ iov.iov_len = sz;
+ if (ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZA, &iov))
+ goto error;
+
+ za = *buf;
+ if (za->size <= sz)
+ break;
+
+ sz = za->size;
+ }
+
+ return za;
+
+error:
+ return NULL;
+}
+
+static int set_za(pid_t pid, const struct user_za_header *za)
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)za;
+ iov.iov_len = za->size;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZA, &iov);
+}
+
+static int get_zt(pid_t pid, char zt[ZT_SIG_REG_BYTES])
+{
+ struct iovec iov;
+
+ iov.iov_base = zt;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ return ptrace(PTRACE_GETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+
+static int set_zt(pid_t pid, const char zt[ZT_SIG_REG_BYTES])
+{
+ struct iovec iov;
+
+ iov.iov_base = (void *)zt;
+ iov.iov_len = ZT_SIG_REG_BYTES;
+ return ptrace(PTRACE_SETREGSET, pid, NT_ARM_ZT, &iov);
+}
+
+/* Reading with ZA disabled returns all zeros */
+static void ptrace_za_disabled_read_zt(pid_t child)
+{
+ struct user_za_header za;
+ char zt[ZT_SIG_REG_BYTES];
+ int ret, i;
+ bool fail = false;
+
+ /* Disable PSTATE.ZA using the ZA interface */
+ memset(&za, 0, sizeof(za));
+ za.vl = sme_vl;
+ za.size = sizeof(za);
+
+ ret = set_za(child, &za);
+ if (ret != 0) {
+ ksft_print_msg("Failed to disable ZA\n");
+ fail = true;
+ }
+
+ /* Read back ZT */
+ ret = get_zt(child, zt);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read ZT\n");
+ fail = true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(zt); i++) {
+ if (zt[i]) {
+ ksft_print_msg("zt[%d]: 0x%x != 0\n", i, zt[i]);
+ fail = true;
+ }
+ }
+
+ ksft_test_result(!fail, "ptrace_za_disabled_read_zt\n");
+}
+
+/* Writing then reading ZT should return the data written */
+static void ptrace_set_get_zt(pid_t child)
+{
+ char zt_in[ZT_SIG_REG_BYTES];
+ char zt_out[ZT_SIG_REG_BYTES];
+ int ret, i;
+ bool fail = false;
+
+ fill_buf(zt_in, sizeof(zt_in));
+
+ ret = set_zt(child, zt_in);
+ if (ret != 0) {
+ ksft_print_msg("Failed to set ZT\n");
+ fail = true;
+ }
+
+ ret = get_zt(child, zt_out);
+ if (ret != 0) {
+ ksft_print_msg("Failed to read ZT\n");
+ fail = true;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(zt_in); i++) {
+ if (zt_in[i] != zt_out[i]) {
+ ksft_print_msg("zt[%d]: 0x%x != 0x%x\n", i,
+ zt_in[i], zt_out[i]);
+ fail = true;
+ }
+ }
+
+ ksft_test_result(!fail, "ptrace_set_get_zt\n");
+}
+
+/* Writing ZT should set PSTATE.ZA */
+static void ptrace_enable_za_via_zt(pid_t child)
+{
+ struct user_za_header za_in;
+ struct user_za_header *za_out;
+ char zt[ZT_SIG_REG_BYTES];
+ char *za_data;
+ size_t za_out_size;
+ int ret, i, vq;
+ bool fail = false;
+
+ /* Disable PSTATE.ZA using the ZA interface */
+ memset(&za_in, 0, sizeof(za_in));
+ za_in.vl = sme_vl;
+ za_in.size = sizeof(za_in);
+
+ ret = set_za(child, &za_in);
+ if (ret != 0) {
+ ksft_print_msg("Failed to disable ZA\n");
+ fail = true;
+ }
+
+ /* Write ZT */
+ fill_buf(zt, sizeof(zt));
+ ret = set_zt(child, zt);
+ if (ret != 0) {
+ ksft_print_msg("Failed to set ZT\n");
+ fail = true;
+ }
+
+ /* Read back ZA and check for register data */
+ za_out = NULL;
+ za_out_size = 0;
+ if (get_za(child, (void **)&za_out, &za_out_size)) {
+ /* Should have an unchanged VL */
+ if (za_out->vl != sme_vl) {
+ ksft_print_msg("VL changed from %d to %d\n",
+ sme_vl, za_out->vl);
+ fail = true;
+ }
+ vq = __sve_vq_from_vl(za_out->vl);
+ za_data = (char *)za_out + ZA_PT_ZA_OFFSET;
+
+ /* Should have register data */
+ if (za_out->size < ZA_PT_SIZE(vq)) {
+ ksft_print_msg("ZA data less than expected: %u < %u\n",
+ za_out->size, ZA_PT_SIZE(vq));
+ fail = true;
+ vq = 0;
+ }
+
+ /* That register data should be non-zero */
+ for (i = 0; i < ZA_PT_ZA_SIZE(vq); i++) {
+ if (za_data[i]) {
+ ksft_print_msg("ZA byte %d is %x\n",
+ i, za_data[i]);
+ fail = true;
+ }
+ }
+ } else {
+ ksft_print_msg("Failed to read ZA\n");
+ fail = true;
+ }
+
+ ksft_test_result(!fail, "ptrace_enable_za_via_zt\n");
+}
+
+static int do_parent(pid_t child)
+{
+ int ret = EXIT_FAILURE;
+ pid_t pid;
+ int status;
+ siginfo_t si;
+
+ /* Attach to the child */
+ while (1) {
+ int sig;
+
+ pid = wait(&status);
+ if (pid == -1) {
+ perror("wait");
+ goto error;
+ }
+
+ /*
+ * This should never happen but it's hard to flag in
+ * the framework.
+ */
+ if (pid != child)
+ continue;
+
+ if (WIFEXITED(status) || WIFSIGNALED(status))
+ ksft_exit_fail_msg("Child died unexpectedly\n");
+
+ if (!WIFSTOPPED(status))
+ goto error;
+
+ sig = WSTOPSIG(status);
+
+ if (ptrace(PTRACE_GETSIGINFO, pid, NULL, &si)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ if (errno == EINVAL) {
+ sig = 0; /* bust group-stop */
+ goto cont;
+ }
+
+ ksft_test_result_fail("PTRACE_GETSIGINFO: %s\n",
+ strerror(errno));
+ goto error;
+ }
+
+ if (sig == SIGSTOP && si.si_code == SI_TKILL &&
+ si.si_pid == pid)
+ break;
+
+ cont:
+ if (ptrace(PTRACE_CONT, pid, NULL, sig)) {
+ if (errno == ESRCH)
+ goto disappeared;
+
+ ksft_test_result_fail("PTRACE_CONT: %s\n",
+ strerror(errno));
+ goto error;
+ }
+ }
+
+ ksft_print_msg("Parent is %d, child is %d\n", getpid(), child);
+
+ ptrace_za_disabled_read_zt(child);
+ ptrace_set_get_zt(child);
+ ptrace_enable_za_via_zt(child);
+
+ ret = EXIT_SUCCESS;
+
+error:
+ kill(child, SIGKILL);
+
+disappeared:
+ return ret;
+}
+
+int main(void)
+{
+ int ret = EXIT_SUCCESS;
+ pid_t child;
+
+ srandom(getpid());
+
+ ksft_print_header();
+
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_SME2)) {
+ ksft_set_plan(1);
+ ksft_exit_skip("SME2 not available\n");
+ }
+
+ /* We need a valid SME VL to enable/disable ZA */
+ sme_vl = prctl(PR_SME_GET_VL);
+ if (sme_vl == -1) {
+ ksft_set_plan(1);
+ ksft_exit_skip("Failed to read SME VL: %d (%s)\n",
+ errno, strerror(errno));
+ }
+
+ ksft_set_plan(EXPECTED_TESTS);
+
+ child = fork();
+ if (!child)
+ return do_child();
+
+ if (do_parent(child))
+ ret = EXIT_FAILURE;
+
+ ksft_print_cnts();
+
+ return ret;
+}
diff --git a/tools/testing/selftests/arm64/fp/zt-test.S b/tools/testing/selftests/arm64/fp/zt-test.S
new file mode 100644
index 000000000000..b5c81e81a379
--- /dev/null
+++ b/tools/testing/selftests/arm64/fp/zt-test.S
@@ -0,0 +1,321 @@
+// SPDX-License-Identifier: GPL-2.0-only
+// Copyright (C) 2021-2 ARM Limited.
+// Original author: Mark Brown <broonie@kernel.org>
+//
+// Scalable Matrix Extension ZT context switch test
+// Repeatedly writes unique test patterns into ZT0
+// and reads them back to verify integrity.
+
+#include <asm/unistd.h>
+#include "assembler.h"
+#include "asm-offsets.h"
+#include "sme-inst.h"
+
+.arch_extension sve
+
+#define ZT_SZ 512
+#define ZT_B (ZT_SZ / 8)
+
+// Declare some storage space to shadow ZT register contents and a
+// scratch buffer.
+.pushsection .text
+.data
+.align 4
+ztref:
+ .space ZT_B
+scratch:
+ .space ZT_B
+.popsection
+
+
+// Generate a test pattern for storage in ZT
+// x0: pid
+// x1: generation
+
+// These values are used to construct a 32-bit pattern that is repeated in the
+// scratch buffer as many times as will fit:
+// bits 31:24 generation number (increments once per test_loop)
+// bits 23: 8 pid
+// bits 7: 0 32-bit lane index
+
+function pattern
+ mov w3, wzr
+ bfi w3, w0, #8, #16 // PID
+ bfi w3, w1, #24, #8 // Generation
+
+ ldr x0, =scratch
+ mov w1, #ZT_B / 4
+
+0: str w3, [x0], #4
+ add w3, w3, #1 // Lane
+ subs w1, w1, #1
+ b.ne 0b
+
+ ret
+endfunction
+
+// Set up test pattern in a ZT horizontal vector
+// x0: pid
+// x1: generation
+function setup_zt
+ mov x4, x30
+
+ bl pattern // Get pattern in scratch buffer
+ ldr x0, =ztref
+ ldr x1, =scratch
+ mov x2, #ZT_B
+ bl memcpy
+
+ ldr x0, =ztref
+ _ldr_zt 0 // load zt0 from pointer x0
+
+ ret x4
+endfunction
+
+// Trivial memory compare: compare x2 bytes starting at address x0 with
+// bytes starting at address x1.
+// Returns only if all bytes match; otherwise, the program is aborted.
+// Clobbers x0-x5.
+function memcmp
+ cbz x2, 2f
+
+ stp x0, x1, [sp, #-0x20]!
+ str x2, [sp, #0x10]
+
+ mov x5, #0
+0: ldrb w3, [x0, x5]
+ ldrb w4, [x1, x5]
+ add x5, x5, #1
+ cmp w3, w4
+ b.ne 1f
+ subs x2, x2, #1
+ b.ne 0b
+
+1: ldr x2, [sp, #0x10]
+ ldp x0, x1, [sp], #0x20
+ b.ne barf
+
+2: ret
+endfunction
+
+// Verify that a ZT vector matches its shadow in memory, else abort
+// Clobbers x0-x3
+function check_zt
+ mov x3, x30
+
+ ldr x0, =scratch // Poison scratch
+ mov x1, #ZT_B
+ bl memfill_ae
+
+ ldr x0, =scratch
+ _str_zt 0
+
+ ldr x0, =ztref
+ ldr x1, =scratch
+ mov x2, #ZT_B
+ mov x30, x3
+ b memcmp
+endfunction
+
+// Any SME register modified here can cause corruption in the main
+// thread -- but *only* the locations modified here.
+function irritator_handler
+ // Increment the irritation signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ // Corrupt some random ZT data
+#if 0
+ adr x0, .text + (irritator_handler - .text) / 16 * 16
+ movi v0.8b, #1
+ movi v9.16b, #2
+ movi v31.8b, #3
+#endif
+
+ ret
+endfunction
+
+function tickle_handler
+ // Increment the signal count (x23):
+ ldr x0, [x2, #ucontext_regs + 8 * 23]
+ add x0, x0, #1
+ str x0, [x2, #ucontext_regs + 8 * 23]
+
+ ret
+endfunction
+
+function terminate_handler
+ mov w21, w0
+ mov x20, x2
+
+ puts "Terminated by signal "
+ mov w0, w21
+ bl putdec
+ puts ", no error, iterations="
+ ldr x0, [x20, #ucontext_regs + 8 * 22]
+ bl putdec
+ puts ", signals="
+ ldr x0, [x20, #ucontext_regs + 8 * 23]
+ bl putdecn
+
+ mov x0, #0
+ mov x8, #__NR_exit
+ svc #0
+endfunction
+
+// w0: signal number
+// x1: sa_action
+// w2: sa_flags
+// Clobbers x0-x6,x8
+function setsignal
+ str x30, [sp, #-((sa_sz + 15) / 16 * 16 + 16)]!
+
+ mov w4, w0
+ mov x5, x1
+ mov w6, w2
+
+ add x0, sp, #16
+ mov x1, #sa_sz
+ bl memclr
+
+ mov w0, w4
+ add x1, sp, #16
+ str w6, [x1, #sa_flags]
+ str x5, [x1, #sa_handler]
+ mov x2, #0
+ mov x3, #sa_mask_sz
+ mov x8, #__NR_rt_sigaction
+ svc #0
+
+ cbz w0, 1f
+
+ puts "sigaction failure\n"
+ b .Labort
+
+1: ldr x30, [sp], #((sa_sz + 15) / 16 * 16 + 16)
+ ret
+endfunction
+
+// Main program entry point
+.globl _start
+function _start
+ mov x23, #0 // signal count
+
+ mov w0, #SIGINT
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGTERM
+ adr x1, terminate_handler
+ mov w2, #SA_SIGINFO
+ bl setsignal
+
+ mov w0, #SIGUSR1
+ adr x1, irritator_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ mov w0, #SIGUSR2
+ adr x1, tickle_handler
+ mov w2, #SA_SIGINFO
+ orr w2, w2, #SA_NODEFER
+ bl setsignal
+
+ smstart_za
+
+ // Obtain our PID, to ensure test pattern uniqueness between processes
+ mov x8, #__NR_getpid
+ svc #0
+ mov x20, x0
+
+ puts "PID:\t"
+ mov x0, x20
+ bl putdecn
+
+ mov x22, #0 // generation number, increments per iteration
+.Ltest_loop:
+ mov x0, x20
+ mov x1, x22
+ bl setup_zt
+
+ mov x8, #__NR_sched_yield // Encourage preemption
+ svc #0
+
+ mrs x0, S3_3_C4_C2_2 // SVCR should have ZA=1,SM=0
+ and x1, x0, #3
+ cmp x1, #2
+ b.ne svcr_barf
+
+ bl check_zt
+
+ add x22, x22, #1 // Everything still working
+ b .Ltest_loop
+
+.Labort:
+ mov x0, #0
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+endfunction
+
+function barf
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// end hack
+
+ mrs x13, S3_3_C4_C2_2
+ smstop
+ mov x10, x0 // expected data
+ mov x11, x1 // actual data
+ mov x12, x2 // data size
+
+ puts "Mismatch: PID="
+ mov x0, x20
+ bl putdec
+ puts ", iteration="
+ mov x0, x22
+ bl putdec
+ puts "\tExpected ["
+ mov x0, x10
+ mov x1, x12
+ bl dumphex
+ puts "]\n\tGot ["
+ mov x0, x11
+ mov x1, x12
+ bl dumphex
+ puts "]\n"
+ puts "\tSVCR: "
+ mov x0, x13
+ bl putdecn
+
+ mov x8, #__NR_getpid
+ svc #0
+// fpsimd.c acitivty log dump hack
+// ldr w0, =0xdeadc0de
+// mov w8, #__NR_exit
+// svc #0
+// ^ end of hack
+ mov x1, #SIGABRT
+ mov x8, #__NR_kill
+ svc #0
+// mov x8, #__NR_exit
+// mov x1, #1
+// svc #0
+endfunction
+
+function svcr_barf
+ mov x10, x0
+
+ puts "Bad SVCR: "
+ mov x0, x10
+ bl putdecn
+
+ mov x8, #__NR_exit
+ mov x1, #1
+ svc #0
+endfunction
diff --git a/tools/testing/selftests/arm64/mte/.gitignore b/tools/testing/selftests/arm64/mte/.gitignore
new file mode 100644
index 000000000000..052d0f9f92b3
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/.gitignore
@@ -0,0 +1,8 @@
+check_buffer_fill
+check_gcr_el1_cswitch
+check_tags_inclusion
+check_child_memory
+check_mmap_options
+check_prctl
+check_ksm_options
+check_user_mem
diff --git a/tools/testing/selftests/arm64/mte/Makefile b/tools/testing/selftests/arm64/mte/Makefile
new file mode 100644
index 000000000000..0d7ac3db8390
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/Makefile
@@ -0,0 +1,41 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+CFLAGS += -std=gnu99 -I. -pthread
+LDFLAGS += -pthread
+SRCS := $(filter-out mte_common_util.c,$(wildcard *.c))
+PROGS := $(patsubst %.c,%,$(SRCS))
+
+ifeq ($(LLVM),)
+# For GCC check that the toolchain has MTE support.
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+#check if the compiler works well
+mte_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.5-a+memtag -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+else
+
+# All supported clang versions also support MTE.
+mte_cc_support := 1
+
+endif
+
+ifeq ($(mte_cc_support),1)
+# Generated binaries to be installed by top KSFT script
+TEST_GEN_PROGS := $(PROGS)
+
+else
+ $(warning compiler "$(CC)" does not support the ARMv8.5 MTE extension.)
+ $(warning test program "mte" will not be created.)
+endif
+
+# Include KSFT lib.mk.
+include ../../lib.mk
+
+ifeq ($(mte_cc_support),1)
+$(TEST_GEN_PROGS): mte_common_util.c mte_helper.S
+endif
diff --git a/tools/testing/selftests/arm64/mte/check_buffer_fill.c b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
new file mode 100644
index 000000000000..1dbbbd47dd50
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_buffer_fill.c
@@ -0,0 +1,478 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define OVERFLOW_RANGE MT_GRANULE_SIZE
+
+static int sizes[] = {
+ 1, 555, 1033, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+enum mte_block_test_alloc {
+ UNTAGGED_TAGGED,
+ TAGGED_UNTAGGED,
+ TAGGED_TAGGED,
+ BLOCK_ALLOC_MAX,
+};
+
+static int check_buffer_by_byte(int mem_type, int mode)
+{
+ char *ptr;
+ int i, j, item;
+ bool err;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = ARRAY_SIZE(sizes);
+
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory(sizes[i], mem_type, 0, true);
+ if (check_allocated_memory(ptr, sizes[i], mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i]);
+ /* Set some value in tagged memory */
+ for (j = 0; j < sizes[i]; j++)
+ ptr[j] = '1';
+ mte_wait_after_trig();
+ err = cur_mte_cxt.fault_valid;
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < sizes[i] && !err; j++) {
+ if (ptr[j] != '1')
+ err = true;
+ }
+ mte_free_memory((void *)ptr, sizes[i], mem_type, true);
+
+ if (err)
+ break;
+ }
+ if (!err)
+ return KSFT_PASS;
+ else
+ return KSFT_FAIL;
+}
+
+static int check_buffer_underflow_by_byte(int mem_type, int mode,
+ int underflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ char *und_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = ARRAY_SIZE(sizes);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ underflow_range, 0);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ underflow_range, 0) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -underflow_range);
+ last_index = 0;
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = sizes[i] - 1; (j >= -underflow_range) &&
+ (!cur_mte_cxt.fault_valid); j--) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_underflow_by_byte_err;
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if (cur_mte_cxt.fault_valid == true || last_index != -underflow_range) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the underflow area should be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr - underflow_range);
+ for (j = 0 ; j < underflow_range; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the underflow area before the fault should be filled.
+ */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = last_index ; j < 0 ; j++) {
+ if (und_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != (-1))) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ und_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ if (und_ptr[-1] == '1')
+ err = true;
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_underflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, underflow_range, 0);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_overflow_by_byte(int mem_type, int mode,
+ int overflow_range)
+{
+ char *ptr;
+ int i, j, item, last_index;
+ bool err;
+ size_t tagged_size, overflow_size;
+ char *over_ptr = NULL;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = ARRAY_SIZE(sizes);
+ for (i = 0; i < item; i++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[i], mem_type, 0,
+ 0, overflow_range);
+ if (check_allocated_memory_range(ptr, sizes[i], mem_type,
+ 0, overflow_range) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ tagged_size = MT_ALIGN_UP(sizes[i]);
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[i] + overflow_range);
+
+ /* Set some value in tagged memory and make the buffer underflow */
+ for (j = 0, last_index = 0 ; (j < (sizes[i] + overflow_range)) &&
+ (cur_mte_cxt.fault_valid == false); j++) {
+ ptr[j] = '1';
+ last_index = j;
+ }
+ mte_wait_after_trig();
+ err = false;
+ /* Check whether the buffer is filled */
+ for (j = 0; j < sizes[i]; j++) {
+ if (ptr[j] != '1') {
+ err = true;
+ ksft_print_msg("Buffer is not filled at index:%d of ptr:0x%lx\n",
+ j, ptr);
+ break;
+ }
+ }
+ if (err)
+ goto check_buffer_overflow_by_byte_err;
+
+ overflow_size = overflow_range - (tagged_size - sizes[i]);
+
+ switch (mode) {
+ case MTE_NONE_ERR:
+ if ((cur_mte_cxt.fault_valid == true) ||
+ (last_index != (sizes[i] + overflow_range - 1))) {
+ err = true;
+ break;
+ }
+ /* There were no fault so the overflow area should be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_ASYNC_ERR:
+ /* Imprecise fault should occur otherwise return error */
+ if (cur_mte_cxt.fault_valid == false) {
+ err = true;
+ break;
+ }
+ /*
+ * The imprecise fault is checked after the write to the buffer,
+ * so the overflow area should be filled before the fault.
+ */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr);
+ for (j = tagged_size ; j < last_index; j++) {
+ if (over_ptr[j] != '1') {
+ err = true;
+ break;
+ }
+ }
+ break;
+ case MTE_SYNC_ERR:
+ /* Precise fault should occur otherwise return error */
+ if (!cur_mte_cxt.fault_valid || (last_index != tagged_size)) {
+ err = true;
+ break;
+ }
+ /* Underflow area should not be filled */
+ over_ptr = (char *) MT_CLEAR_TAG((size_t) ptr + tagged_size);
+ for (j = 0 ; j < overflow_size; j++) {
+ if (over_ptr[j] == '1')
+ err = true;
+ }
+ break;
+ default:
+ err = true;
+ break;
+ }
+check_buffer_overflow_by_byte_err:
+ mte_free_memory_tag_range((void *)ptr, sizes[i], mem_type, 0, overflow_range);
+ if (err)
+ break;
+ }
+ return (err ? KSFT_FAIL : KSFT_PASS);
+}
+
+static int check_buffer_by_block_iterate(int mem_type, int mode, size_t size)
+{
+ char *src, *dst;
+ int j, result = KSFT_PASS;
+ enum mte_block_test_alloc alloc_type = UNTAGGED_TAGGED;
+
+ for (alloc_type = UNTAGGED_TAGGED; alloc_type < (int) BLOCK_ALLOC_MAX; alloc_type++) {
+ switch (alloc_type) {
+ case UNTAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(src, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ break;
+ case TAGGED_UNTAGGED:
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, false);
+ if (check_allocated_memory(dst, size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)dst, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+ break;
+ case TAGGED_TAGGED:
+ src = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(src, size, mem_type, true) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ dst = (char *)mte_allocate_memory(size, mem_type, 0, true);
+ if (check_allocated_memory(dst, size, mem_type, true) != KSFT_PASS) {
+ mte_free_memory((void *)src, size, mem_type, true);
+ return KSFT_FAIL;
+ }
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+
+ cur_mte_cxt.fault_valid = false;
+ result = KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)dst, size);
+ /* Set some value in memory and copy*/
+ memset((void *)src, (int)'1', size);
+ memcpy((void *)dst, (void *)src, size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid) {
+ result = KSFT_FAIL;
+ goto check_buffer_by_block_err;
+ }
+ /* Check the buffer whether it is filled. */
+ for (j = 0; j < size; j++) {
+ if (src[j] != dst[j] || src[j] != '1') {
+ result = KSFT_FAIL;
+ break;
+ }
+ }
+check_buffer_by_block_err:
+ mte_free_memory((void *)src, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)src) ? true : false);
+ mte_free_memory((void *)dst, size, mem_type,
+ MT_FETCH_TAG((uintptr_t)dst) ? true : false);
+ if (result != KSFT_PASS)
+ return result;
+ }
+ return result;
+}
+
+static int check_buffer_by_block(int mem_type, int mode)
+{
+ int i, item, result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ item = ARRAY_SIZE(sizes);
+ cur_mte_cxt.fault_valid = false;
+ for (i = 0; i < item; i++) {
+ result = check_buffer_by_block_iterate(mem_type, mode, sizes[i]);
+ if (result != KSFT_PASS)
+ break;
+ }
+ return result;
+}
+
+static int compare_memory_tags(char *ptr, size_t size, int tag)
+{
+ int i, new_tag;
+
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ new_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (tag != new_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ return KSFT_FAIL;
+ }
+ }
+ return KSFT_PASS;
+}
+
+static int check_memory_initial_tags(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, fd;
+ int total = ARRAY_SIZE(sizes);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ /* check initial tags for anonymous mmap */
+ ptr = (char *)mte_allocate_memory(sizes[run], mem_type, mapping, false);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+
+ /* check initial tags for file mmap */
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory(sizes[run], mem_type, mapping, false, fd);
+ if (check_allocated_memory(ptr, sizes[run], mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ if (compare_memory_tags(ptr, sizes[run], 0) != KSFT_PASS) {
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_free_memory((void *)ptr, sizes[run], mem_type, false);
+ close(fd);
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ size_t page_size = getpagesize();
+ int item = ARRAY_SIZE(sizes);
+
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(20);
+
+ /* Buffer by byte tests */
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_SYNC_ERR),
+ "Check buffer correctness by byte with sync err mode and mmap/mprotect memory\n");
+ evaluate_test(check_buffer_by_byte(USE_MPROTECT, MTE_ASYNC_ERR),
+ "Check buffer correctness by byte with async err mode and mmap/mprotect memory\n");
+
+ /* Check buffer underflow with underflow size as 16 */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer underflow with underflow size as page size */
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_SYNC_ERR, page_size),
+ "Check buffer write underflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, page_size),
+ "Check buffer write underflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_underflow_by_byte(USE_MMAP, MTE_NONE_ERR, page_size),
+ "Check buffer write underflow by byte with tag check fault ignore and mmap memory\n");
+
+ /* Check buffer overflow with overflow size as 16 */
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_SYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_ASYNC_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with async mode and mmap memory\n");
+ evaluate_test(check_buffer_overflow_by_byte(USE_MMAP, MTE_NONE_ERR, MT_GRANULE_SIZE),
+ "Check buffer write overflow by byte with tag fault ignore mode and mmap memory\n");
+
+ /* Buffer by block tests */
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_SYNC_ERR),
+ "Check buffer write correctness by block with sync mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_ASYNC_ERR),
+ "Check buffer write correctness by block with async mode and mmap memory\n");
+ evaluate_test(check_buffer_by_block(USE_MMAP, MTE_NONE_ERR),
+ "Check buffer write correctness by block with tag fault ignore and mmap memory\n");
+
+ /* Initial tags are supposed to be 0 */
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check initial tags with private mapping, sync error mode and mmap/mprotect memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_memory_initial_tags(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check initial tags with shared mapping, sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_child_memory.c b/tools/testing/selftests/arm64/mte/check_child_memory.c
new file mode 100644
index 000000000000..7597fc632cad
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_child_memory.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_child_tag_inheritance(char *ptr, int size, int mode)
+{
+ int i, parent_tag, child_tag, fault, child_status;
+ pid_t child;
+
+ parent_tag = MT_FETCH_TAG((uintptr_t)ptr);
+ fault = 0;
+
+ child = fork();
+ if (child == -1) {
+ ksft_print_msg("FAIL: child process creation\n");
+ return KSFT_FAIL;
+ } else if (child == 0) {
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ /* Do copy on write */
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ for (i = 0 ; i < size ; i += MT_GRANULE_SIZE) {
+ child_tag = MT_FETCH_TAG((uintptr_t)(mte_get_tag_address(ptr + i)));
+ if (parent_tag != child_tag) {
+ ksft_print_msg("FAIL: child mte tag mismatch\n");
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false) {
+ fault = 1;
+ goto check_child_tag_inheritance_err;
+ }
+check_child_tag_inheritance_err:
+ _exit(fault);
+ }
+ /* Wait for child process to terminate */
+ wait(&child_status);
+ if (WIFEXITED(child_status))
+ fault = WEXITSTATUS(child_status);
+ else
+ fault = 1;
+ return (fault) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int check_child_memory_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int run, result;
+ int item = ARRAY_SIZE(sizes);
+
+ item = ARRAY_SIZE(sizes);
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result == KSFT_FAIL)
+ return result;
+ }
+ return KSFT_PASS;
+}
+
+static int check_child_file_mapping(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size, result = KSFT_PASS;
+ int total = ARRAY_SIZE(sizes);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_child_tag_inheritance(ptr, sizes[run], mode);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = ARRAY_SIZE(sizes);
+
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+ mte_register_signal(SIGBUS, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(12);
+
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child anonymous memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child anonymous memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_file_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, imprecise mode and mmap memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check child file memory with private mapping, precise mode and mmap/mprotect memory\n");
+ evaluate_test(check_child_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED),
+ "Check child file memory with shared mapping, precise mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
new file mode 100644
index 000000000000..325bca0de0f6
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_gcr_el1_cswitch.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <unistd.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+
+#include "mte_def.h"
+
+#define NUM_ITERATIONS 1024
+#define MAX_THREADS 5
+#define THREAD_ITERATIONS 1000
+
+void *execute_thread(void *x)
+{
+ pid_t pid = *((pid_t *)x);
+ pid_t tid = gettid();
+ uint64_t prctl_tag_mask;
+ uint64_t prctl_set;
+ uint64_t prctl_get;
+ uint64_t prctl_tcf;
+
+ srand(time(NULL) ^ (pid << 16) ^ (tid << 16));
+
+ prctl_tag_mask = rand() & 0xffff;
+
+ if (prctl_tag_mask % 2)
+ prctl_tcf = PR_MTE_TCF_SYNC;
+ else
+ prctl_tcf = PR_MTE_TCF_ASYNC;
+
+ prctl_set = PR_TAGGED_ADDR_ENABLE | prctl_tcf | (prctl_tag_mask << PR_MTE_TAG_SHIFT);
+
+ for (int j = 0; j < THREAD_ITERATIONS; j++) {
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL, prctl_set, 0, 0, 0)) {
+ perror("prctl() failed");
+ goto fail;
+ }
+
+ prctl_get = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+
+ if (prctl_set != prctl_get) {
+ ksft_print_msg("Error: prctl_set: 0x%lx != prctl_get: 0x%lx\n",
+ prctl_set, prctl_get);
+ goto fail;
+ }
+ }
+
+ return (void *)KSFT_PASS;
+
+fail:
+ return (void *)KSFT_FAIL;
+}
+
+int execute_test(pid_t pid)
+{
+ pthread_t thread_id[MAX_THREADS];
+ int thread_data[MAX_THREADS];
+
+ for (int i = 0; i < MAX_THREADS; i++)
+ pthread_create(&thread_id[i], NULL,
+ execute_thread, (void *)&pid);
+
+ for (int i = 0; i < MAX_THREADS; i++)
+ pthread_join(thread_id[i], (void *)&thread_data[i]);
+
+ for (int i = 0; i < MAX_THREADS; i++)
+ if (thread_data[i] == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+int mte_gcr_fork_test(void)
+{
+ pid_t pid;
+ int results[NUM_ITERATIONS];
+ pid_t cpid;
+ int res;
+
+ for (int i = 0; i < NUM_ITERATIONS; i++) {
+ pid = fork();
+
+ if (pid < 0)
+ return KSFT_FAIL;
+
+ if (pid == 0) {
+ cpid = getpid();
+
+ res = execute_test(cpid);
+
+ exit(res);
+ }
+ }
+
+ for (int i = 0; i < NUM_ITERATIONS; i++) {
+ wait(&res);
+
+ if (WIFEXITED(res))
+ results[i] = WEXITSTATUS(res);
+ else
+ --i;
+ }
+
+ for (int i = 0; i < NUM_ITERATIONS; i++)
+ if (results[i] == KSFT_FAIL)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ ksft_set_plan(1);
+
+ evaluate_test(mte_gcr_fork_test(),
+ "Verify that GCR_EL1 is set correctly on context switch\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_ksm_options.c b/tools/testing/selftests/arm64/mte/check_ksm_options.c
new file mode 100644
index 000000000000..88c74bc46d4f
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_ksm_options.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define TEST_UNIT 10
+#define PATH_KSM "/sys/kernel/mm/ksm/"
+#define MAX_LOOP 4
+
+static size_t page_sz;
+static unsigned long ksm_sysfs[5];
+
+static unsigned long read_sysfs(char *str)
+{
+ FILE *f;
+ unsigned long val = 0;
+
+ f = fopen(str, "r");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return 0;
+ }
+ if (fscanf(f, "%lu", &val) != 1) {
+ ksft_print_msg("ERR: parsing %s\n", str);
+ val = 0;
+ }
+ fclose(f);
+ return val;
+}
+
+static void write_sysfs(char *str, unsigned long val)
+{
+ FILE *f;
+
+ f = fopen(str, "w");
+ if (!f) {
+ ksft_print_msg("ERR: missing %s\n", str);
+ return;
+ }
+ fprintf(f, "%lu", val);
+ fclose(f);
+}
+
+static void mte_ksm_setup(void)
+{
+ ksm_sysfs[0] = read_sysfs(PATH_KSM "merge_across_nodes");
+ write_sysfs(PATH_KSM "merge_across_nodes", 1);
+ ksm_sysfs[1] = read_sysfs(PATH_KSM "sleep_millisecs");
+ write_sysfs(PATH_KSM "sleep_millisecs", 0);
+ ksm_sysfs[2] = read_sysfs(PATH_KSM "run");
+ write_sysfs(PATH_KSM "run", 1);
+ ksm_sysfs[3] = read_sysfs(PATH_KSM "max_page_sharing");
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3] + TEST_UNIT);
+ ksm_sysfs[4] = read_sysfs(PATH_KSM "pages_to_scan");
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4] + TEST_UNIT);
+}
+
+static void mte_ksm_restore(void)
+{
+ write_sysfs(PATH_KSM "merge_across_nodes", ksm_sysfs[0]);
+ write_sysfs(PATH_KSM "sleep_millisecs", ksm_sysfs[1]);
+ write_sysfs(PATH_KSM "run", ksm_sysfs[2]);
+ write_sysfs(PATH_KSM "max_page_sharing", ksm_sysfs[3]);
+ write_sysfs(PATH_KSM "pages_to_scan", ksm_sysfs[4]);
+}
+
+static void mte_ksm_scan(void)
+{
+ int cur_count = read_sysfs(PATH_KSM "full_scans");
+ int scan_count = cur_count + 1;
+ int max_loop_count = MAX_LOOP;
+
+ while ((cur_count < scan_count) && max_loop_count) {
+ sleep(1);
+ cur_count = read_sysfs(PATH_KSM "full_scans");
+ max_loop_count--;
+ }
+#ifdef DEBUG
+ ksft_print_msg("INFO: pages_shared=%lu pages_sharing=%lu\n",
+ read_sysfs(PATH_KSM "pages_shared"),
+ read_sysfs(PATH_KSM "pages_sharing"));
+#endif
+}
+
+static int check_madvise_options(int mem_type, int mode, int mapping)
+{
+ char *ptr;
+ int err, ret;
+
+ err = KSFT_FAIL;
+ if (access(PATH_KSM, F_OK) == -1) {
+ ksft_print_msg("ERR: Kernel KSM config not enabled\n");
+ return err;
+ }
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ ptr = mte_allocate_memory(TEST_UNIT * page_sz, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, TEST_UNIT * page_sz, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ /* Insert same data in all the pages */
+ memset(ptr, 'A', TEST_UNIT * page_sz);
+ ret = madvise(ptr, TEST_UNIT * page_sz, MADV_MERGEABLE);
+ if (ret) {
+ ksft_print_msg("ERR: madvise failed to set MADV_UNMERGEABLE\n");
+ goto madvise_err;
+ }
+ mte_ksm_scan();
+ /* Tagged pages should not merge */
+ if ((read_sysfs(PATH_KSM "pages_shared") < 1) ||
+ (read_sysfs(PATH_KSM "pages_sharing") < (TEST_UNIT - 1)))
+ err = KSFT_PASS;
+madvise_err:
+ mte_free_memory(ptr, TEST_UNIT * page_sz, mem_type, true);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(4);
+
+ /* Enable KSM */
+ mte_ksm_setup();
+
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE),
+ "Check KSM mte page merge for private mapping, async mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, sync mode and mmap memory\n");
+ evaluate_test(check_madvise_options(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED),
+ "Check KSM mte page merge for shared mapping, async mode and mmap memory\n");
+
+ mte_ksm_restore();
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_mmap_options.c b/tools/testing/selftests/arm64/mte/check_mmap_options.c
new file mode 100644
index 000000000000..17694caaff53
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_mmap_options.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define RUNS (MT_TAG_COUNT)
+#define UNDERFLOW MT_GRANULE_SIZE
+#define OVERFLOW MT_GRANULE_SIZE
+#define TAG_CHECK_ON 0
+#define TAG_CHECK_OFF 1
+
+static size_t page_size;
+static int sizes[] = {
+ 1, 537, 989, 1269, MT_GRANULE_SIZE - 1, MT_GRANULE_SIZE,
+ /* page size - 1*/ 0, /* page_size */ 0, /* page size + 1 */ 0
+};
+
+static int check_mte_memory(char *ptr, int size, int mode, int tag_check)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size);
+ memset(ptr, '1', size);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == true)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, -UNDERFLOW);
+ memset(ptr - UNDERFLOW, '2', UNDERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ mte_initialize_current_context(mode, (uintptr_t)ptr, size + OVERFLOW);
+ memset(ptr + size, '3', OVERFLOW);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid == false && tag_check == TAG_CHECK_ON)
+ return KSFT_FAIL;
+ if (cur_mte_cxt.fault_valid == true && tag_check == TAG_CHECK_OFF)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
+
+static int check_anonymous_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, result, map_size;
+ int item = ARRAY_SIZE(sizes);
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < item; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ map_ptr = (char *)mte_allocate_memory(map_size, mem_type, mapping, false);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on anonymous mmap memory\n");
+ munmap((void *)map_ptr, map_size);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ mte_free_memory((void *)map_ptr, map_size, mem_type, false);
+ if (result == KSFT_FAIL)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+static int check_file_memory_mapping(int mem_type, int mode, int mapping, int tag_check)
+{
+ char *ptr, *map_ptr;
+ int run, fd, map_size;
+ int total = ARRAY_SIZE(sizes);
+ int result = KSFT_PASS;
+
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+
+ map_size = sizes[run] + UNDERFLOW + OVERFLOW;
+ map_ptr = (char *)mte_allocate_file_memory(map_size, mem_type, mapping, false, fd);
+ if (check_allocated_memory(map_ptr, map_size, mem_type, false) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ ptr = map_ptr + UNDERFLOW;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, sizes[run]);
+ /* Only mte enabled memory will allow tag insertion */
+ ptr = mte_insert_tags((void *)ptr, sizes[run]);
+ if (!ptr || cur_mte_cxt.fault_valid == true) {
+ ksft_print_msg("FAIL: Insert tags on file based memory\n");
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, tag_check);
+ mte_clear_tags((void *)ptr, sizes[run]);
+ munmap((void *)map_ptr, map_size);
+ close(fd);
+ if (result == KSFT_FAIL)
+ break;
+ }
+ return result;
+}
+
+static int check_clear_prot_mte_flag(int mem_type, int mode, int mapping)
+{
+ char *ptr, *map_ptr;
+ int run, prot_flag, result, fd, map_size;
+ int total = ARRAY_SIZE(sizes);
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ for (run = 0; run < total; run++) {
+ map_size = sizes[run] + OVERFLOW + UNDERFLOW;
+ ptr = (char *)mte_allocate_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS)
+ return KSFT_FAIL;
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ ptr = (char *)mte_allocate_file_memory_tag_range(sizes[run], mem_type, mapping,
+ UNDERFLOW, OVERFLOW, fd);
+ if (check_allocated_memory_range(ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ map_ptr = ptr - UNDERFLOW;
+ /* Try to clear PROT_MTE property and verify it by tag checking */
+ if (mprotect(map_ptr, map_size, prot_flag)) {
+ ksft_print_msg("FAIL: mprotect not ignoring clear PROT_MTE property\n");
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type,
+ UNDERFLOW, OVERFLOW);
+ close(fd);
+ return KSFT_FAIL;
+ }
+ result = check_mte_memory(ptr, sizes[run], mode, TAG_CHECK_ON);
+ mte_free_memory_tag_range((void *)ptr, sizes[run], mem_type, UNDERFLOW, OVERFLOW);
+ close(fd);
+ if (result != KSFT_PASS)
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int item = ARRAY_SIZE(sizes);
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+ page_size = getpagesize();
+ if (!page_size) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ sizes[item - 3] = page_size - 1;
+ sizes[item - 2] = page_size;
+ sizes[item - 1] = page_size + 1;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGBUS, mte_default_handler);
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(22);
+
+ mte_enable_pstate_tco();
+
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check off\n");
+
+ mte_disable_pstate_tco();
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check anonymous memory with private mapping, no error mode, mmap memory and tag check off\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_NONE_ERR, MAP_PRIVATE, TAG_CHECK_OFF),
+ "Check file memory with private mapping, no error mode, mmap/mprotect memory and tag check off\n");
+
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check anonymous memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_anonymous_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check anonymous memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_SYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, sync error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_PRIVATE, TAG_CHECK_ON),
+ "Check file memory with private mapping, async error mode, mmap/mprotect memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MMAP, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap memory and tag check on\n");
+ evaluate_test(check_file_memory_mapping(USE_MPROTECT, MTE_ASYNC_ERR, MAP_SHARED, TAG_CHECK_ON),
+ "Check file memory with shared mapping, async error mode, mmap/mprotect memory and tag check on\n");
+
+ evaluate_test(check_clear_prot_mte_flag(USE_MMAP, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping, sync error mode and mmap memory\n");
+ evaluate_test(check_clear_prot_mte_flag(USE_MPROTECT, MTE_SYNC_ERR, MAP_PRIVATE),
+ "Check clear PROT_MTE flags with private mapping and sync error mode and mmap/mprotect memory\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_prctl.c b/tools/testing/selftests/arm64/mte/check_prctl.c
new file mode 100644
index 000000000000..f139a33a43ef
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_prctl.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2022 ARM Limited
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+
+static int set_tagged_addr_ctrl(int val)
+{
+ int ret;
+
+ ret = prctl(PR_SET_TAGGED_ADDR_CTRL, val, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_SET_TAGGED_ADDR_CTRL: failed %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+static int get_tagged_addr_ctrl(void)
+{
+ int ret;
+
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, 0, 0, 0, 0);
+ if (ret < 0)
+ ksft_print_msg("PR_GET_TAGGED_ADDR_CTRL failed: %d %d (%s)\n",
+ ret, errno, strerror(errno));
+ return ret;
+}
+
+/*
+ * Read the current mode without having done any configuration, should
+ * run first.
+ */
+void check_basic_read(void)
+{
+ int ret;
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("check_basic_read\n");
+ return;
+ }
+
+ if (ret & PR_MTE_TCF_SYNC)
+ ksft_print_msg("SYNC enabled\n");
+ if (ret & PR_MTE_TCF_ASYNC)
+ ksft_print_msg("ASYNC enabled\n");
+
+ /* Any configuration is valid */
+ ksft_test_result_pass("check_basic_read\n");
+}
+
+/*
+ * Attempt to set a specified combination of modes.
+ */
+void set_mode_test(const char *name, int hwcap2, int mask)
+{
+ int ret;
+
+ if ((getauxval(AT_HWCAP2) & hwcap2) != hwcap2) {
+ ksft_test_result_skip("%s\n", name);
+ return;
+ }
+
+ ret = set_tagged_addr_ctrl(mask);
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ ret = get_tagged_addr_ctrl();
+ if (ret < 0) {
+ ksft_test_result_fail("%s\n", name);
+ return;
+ }
+
+ if ((ret & PR_MTE_TCF_MASK) == mask) {
+ ksft_test_result_pass("%s\n", name);
+ } else {
+ ksft_print_msg("Got %x, expected %x\n",
+ (ret & PR_MTE_TCF_MASK), mask);
+ ksft_test_result_fail("%s\n", name);
+ }
+}
+
+struct mte_mode {
+ int mask;
+ int hwcap2;
+ const char *name;
+} mte_modes[] = {
+ { PR_MTE_TCF_NONE, 0, "NONE" },
+ { PR_MTE_TCF_SYNC, HWCAP2_MTE, "SYNC" },
+ { PR_MTE_TCF_ASYNC, HWCAP2_MTE, "ASYNC" },
+ { PR_MTE_TCF_SYNC | PR_MTE_TCF_ASYNC, HWCAP2_MTE, "SYNC+ASYNC" },
+};
+
+int main(void)
+{
+ int i;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ check_basic_read();
+ for (i = 0; i < ARRAY_SIZE(mte_modes); i++)
+ set_mode_test(mte_modes[i].name, mte_modes[i].hwcap2,
+ mte_modes[i].mask);
+
+ ksft_print_cnts();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_tags_inclusion.c b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
new file mode 100644
index 000000000000..2b1425b92b69
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_tags_inclusion.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <sys/wait.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define BUFFER_SIZE (5 * MT_GRANULE_SIZE)
+#define RUNS (MT_TAG_COUNT * 2)
+#define MTE_LAST_TAG_MASK (0x7FFF)
+
+static int verify_mte_pointer_validity(char *ptr, int mode)
+{
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the validity of the tagged pointer */
+ memset(ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid) {
+ ksft_print_msg("Unexpected fault recorded for %p-%p in mode %x\n",
+ ptr, ptr + BUFFER_SIZE, mode);
+ return KSFT_FAIL;
+ }
+ /* Proceed further for nonzero tags */
+ if (!MT_FETCH_TAG((uintptr_t)ptr))
+ return KSFT_PASS;
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE + 1);
+ /* Check the validity outside the range */
+ ptr[BUFFER_SIZE] = '2';
+ mte_wait_after_trig();
+ if (!cur_mte_cxt.fault_valid) {
+ ksft_print_msg("No valid fault recorded for %p in mode %x\n",
+ ptr, mode);
+ return KSFT_FAIL;
+ } else {
+ return KSFT_PASS;
+ }
+}
+
+static int check_single_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, ret, result = KSFT_PASS;
+
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT) && (result == KSFT_PASS); tag++) {
+ ret = mte_switch_mode(mode, MT_INCLUDE_VALID_TAG(tag));
+ if (ret != 0)
+ result = KSFT_FAIL;
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) == tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAG(tag));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_multiple_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int tag, run, result = KSFT_PASS;
+ unsigned long excl_mask = 0;
+
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ for (tag = 0; (tag < MT_TAG_COUNT - 1) && (result == KSFT_PASS); tag++) {
+ excl_mask |= 1 << tag;
+ mte_switch_mode(mode, MT_INCLUDE_VALID_TAGS(excl_mask));
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Check tag value */
+ if (MT_FETCH_TAG((uintptr_t)ptr) < tag) {
+ ksft_print_msg("FAIL: wrong tag = 0x%x with include mask=0x%x\n",
+ MT_FETCH_TAG((uintptr_t)ptr),
+ MT_INCLUDE_VALID_TAGS(excl_mask));
+ result = KSFT_FAIL;
+ break;
+ }
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ }
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_all_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run, ret, result = KSFT_PASS;
+
+ ptr = mte_allocate_memory(BUFFER_SIZE + MT_GRANULE_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE + MT_GRANULE_SIZE,
+ mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ret = mte_switch_mode(mode, MT_INCLUDE_TAG_MASK);
+ if (ret != 0)
+ return KSFT_FAIL;
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; (run < RUNS) && (result == KSFT_PASS); run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /*
+ * Here tag byte can be between 0x0 to 0xF (full allowed range)
+ * so no need to match so just verify if it is writable.
+ */
+ result = verify_mte_pointer_validity(ptr, mode);
+ }
+ mte_free_memory_tag_range(ptr, BUFFER_SIZE, mem_type, 0, MT_GRANULE_SIZE);
+ return result;
+}
+
+static int check_none_included_tags(int mem_type, int mode)
+{
+ char *ptr;
+ int run, ret;
+
+ ptr = mte_allocate_memory(BUFFER_SIZE, mem_type, 0, false);
+ if (check_allocated_memory(ptr, BUFFER_SIZE, mem_type, false) != KSFT_PASS)
+ return KSFT_FAIL;
+
+ ret = mte_switch_mode(mode, MT_EXCLUDE_TAG_MASK);
+ if (ret != 0)
+ return KSFT_FAIL;
+ /* Try to catch a excluded tag by a number of tries. */
+ for (run = 0; run < RUNS; run++) {
+ ptr = (char *)mte_insert_tags(ptr, BUFFER_SIZE);
+ /* Here all tags exluded so tag value generated should be 0 */
+ if (MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: included tag value found\n");
+ mte_free_memory((void *)ptr, BUFFER_SIZE, mem_type, true);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, BUFFER_SIZE);
+ /* Check the write validity of the untagged pointer */
+ memset(ptr, '1', BUFFER_SIZE);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid)
+ break;
+ }
+ mte_free_memory(ptr, BUFFER_SIZE, mem_type, false);
+ if (cur_mte_cxt.fault_valid)
+ return KSFT_FAIL;
+ else
+ return KSFT_PASS;
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register SIGSEGV handler */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(4);
+
+ evaluate_test(check_single_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check an included tag value with sync mode\n");
+ evaluate_test(check_multiple_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check different included tags value with sync mode\n");
+ evaluate_test(check_none_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check none included tags value with sync mode\n");
+ evaluate_test(check_all_included_tags(USE_MMAP, MTE_SYNC_ERR),
+ "Check all included tags value with sync mode\n");
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/check_user_mem.c b/tools/testing/selftests/arm64/mte/check_user_mem.c
new file mode 100644
index 000000000000..f4ae5f87a3b7
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/check_user_mem.c
@@ -0,0 +1,243 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+#include <sys/uio.h>
+#include <sys/mman.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+static size_t page_sz;
+
+#define TEST_NAME_MAX 100
+
+enum test_type {
+ READ_TEST,
+ WRITE_TEST,
+ READV_TEST,
+ WRITEV_TEST,
+ LAST_TEST,
+};
+
+static int check_usermem_access_fault(int mem_type, int mode, int mapping,
+ int tag_offset, int tag_len,
+ enum test_type test_type)
+{
+ int fd, i, err;
+ char val = 'A';
+ ssize_t len, syscall_len;
+ void *ptr, *ptr_next;
+ int fileoff, ptroff, size;
+ int sizes[] = {1, 2, 3, 8, 16, 32, 4096, page_sz};
+
+ err = KSFT_PASS;
+ len = 2 * page_sz;
+ mte_switch_mode(mode, MTE_ALLOW_NON_ZERO_TAG);
+ fd = create_temp_file();
+ if (fd == -1)
+ return KSFT_FAIL;
+ for (i = 0; i < len; i++)
+ if (write(fd, &val, sizeof(val)) != sizeof(val))
+ return KSFT_FAIL;
+ lseek(fd, 0, 0);
+ ptr = mte_allocate_memory(len, mem_type, mapping, true);
+ if (check_allocated_memory(ptr, len, mem_type, true) != KSFT_PASS) {
+ close(fd);
+ return KSFT_FAIL;
+ }
+ mte_initialize_current_context(mode, (uintptr_t)ptr, len);
+ /* Copy from file into buffer with valid tag */
+ syscall_len = read(fd, ptr, len);
+ mte_wait_after_trig();
+ if (cur_mte_cxt.fault_valid || syscall_len < len)
+ goto usermem_acc_err;
+ /* Verify same pattern is read */
+ for (i = 0; i < len; i++)
+ if (*(char *)(ptr + i) != val)
+ break;
+ if (i < len)
+ goto usermem_acc_err;
+
+ if (!tag_len)
+ tag_len = len - tag_offset;
+ /* Tag a part of memory with different value */
+ ptr_next = (void *)((unsigned long)ptr + tag_offset);
+ ptr_next = mte_insert_new_tag(ptr_next);
+ mte_set_tag_address_range(ptr_next, tag_len);
+
+ for (fileoff = 0; fileoff < 16; fileoff++) {
+ for (ptroff = 0; ptroff < 16; ptroff++) {
+ for (i = 0; i < ARRAY_SIZE(sizes); i++) {
+ size = sizes[i];
+ lseek(fd, 0, 0);
+
+ /* perform file operation on buffer with invalid tag */
+ switch (test_type) {
+ case READ_TEST:
+ syscall_len = read(fd, ptr + ptroff, size);
+ break;
+ case WRITE_TEST:
+ syscall_len = write(fd, ptr + ptroff, size);
+ break;
+ case READV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = readv(fd, iov, 1);
+ break;
+ }
+ case WRITEV_TEST: {
+ struct iovec iov[1];
+ iov[0].iov_base = ptr + ptroff;
+ iov[0].iov_len = size;
+ syscall_len = writev(fd, iov, 1);
+ break;
+ }
+ case LAST_TEST:
+ goto usermem_acc_err;
+ }
+
+ mte_wait_after_trig();
+ /*
+ * Accessing user memory in kernel with invalid tag should fail in sync
+ * mode without fault but may not fail in async mode as per the
+ * implemented MTE userspace support in Arm64 kernel.
+ */
+ if (cur_mte_cxt.fault_valid) {
+ goto usermem_acc_err;
+ }
+ if (mode == MTE_SYNC_ERR && syscall_len < len) {
+ /* test passed */
+ } else if (mode == MTE_ASYNC_ERR && syscall_len == size) {
+ /* test passed */
+ } else {
+ goto usermem_acc_err;
+ }
+ }
+ }
+ }
+
+ goto exit;
+
+usermem_acc_err:
+ err = KSFT_FAIL;
+exit:
+ mte_free_memory((void *)ptr, len, mem_type, true);
+ close(fd);
+ return err;
+}
+
+void format_test_name(char* name, int name_len, int type, int sync, int map, int len, int offset) {
+ const char* test_type;
+ const char* mte_type;
+ const char* map_type;
+
+ switch (type) {
+ case READ_TEST:
+ test_type = "read";
+ break;
+ case WRITE_TEST:
+ test_type = "write";
+ break;
+ case READV_TEST:
+ test_type = "readv";
+ break;
+ case WRITEV_TEST:
+ test_type = "writev";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (sync) {
+ case MTE_SYNC_ERR:
+ mte_type = "MTE_SYNC_ERR";
+ break;
+ case MTE_ASYNC_ERR:
+ mte_type = "MTE_ASYNC_ERR";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ switch (map) {
+ case MAP_SHARED:
+ map_type = "MAP_SHARED";
+ break;
+ case MAP_PRIVATE:
+ map_type = "MAP_PRIVATE";
+ break;
+ default:
+ assert(0);
+ break;
+ }
+
+ snprintf(name, name_len,
+ "test type: %s, %s, %s, tag len: %d, tag offset: %d\n",
+ test_type, mte_type, map_type, len, offset);
+}
+
+int main(int argc, char *argv[])
+{
+ int err;
+ int t, s, m, l, o;
+ int mte_sync[] = {MTE_SYNC_ERR, MTE_ASYNC_ERR};
+ int maps[] = {MAP_SHARED, MAP_PRIVATE};
+ int tag_lens[] = {0, MT_GRANULE_SIZE};
+ int tag_offsets[] = {page_sz, MT_GRANULE_SIZE};
+ char test_name[TEST_NAME_MAX];
+
+ page_sz = getpagesize();
+ if (!page_sz) {
+ ksft_print_msg("ERR: Unable to get page size\n");
+ return KSFT_FAIL;
+ }
+ err = mte_default_setup();
+ if (err)
+ return err;
+
+ /* Register signal handlers */
+ mte_register_signal(SIGSEGV, mte_default_handler);
+
+ /* Set test plan */
+ ksft_set_plan(64);
+
+ for (t = 0; t < LAST_TEST; t++) {
+ for (s = 0; s < ARRAY_SIZE(mte_sync); s++) {
+ for (m = 0; m < ARRAY_SIZE(maps); m++) {
+ for (l = 0; l < ARRAY_SIZE(tag_lens); l++) {
+ for (o = 0; o < ARRAY_SIZE(tag_offsets); o++) {
+ int sync = mte_sync[s];
+ int map = maps[m];
+ int offset = tag_offsets[o];
+ int tag_len = tag_lens[l];
+ int res = check_usermem_access_fault(USE_MMAP, sync,
+ map, offset,
+ tag_len, t);
+ format_test_name(test_name, TEST_NAME_MAX,
+ t, sync, map, tag_len, offset);
+ evaluate_test(res, test_name);
+ }
+ }
+ }
+ }
+ }
+
+ mte_restore_setup();
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.c b/tools/testing/selftests/arm64/mte/mte_common_util.c
new file mode 100644
index 000000000000..00ffd34c66d3
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <linux/auxvec.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+
+#include <asm/hwcap.h>
+
+#include "kselftest.h"
+#include "mte_common_util.h"
+#include "mte_def.h"
+
+#define INIT_BUFFER_SIZE 256
+
+struct mte_fault_cxt cur_mte_cxt;
+static unsigned int mte_cur_mode;
+static unsigned int mte_cur_pstate_tco;
+
+void mte_default_handler(int signum, siginfo_t *si, void *uc)
+{
+ unsigned long addr = (unsigned long)si->si_addr;
+
+ if (signum == SIGSEGV) {
+#ifdef DEBUG
+ ksft_print_msg("INFO: SIGSEGV signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+#endif
+ if (si->si_code == SEGV_MTEAERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code)
+ cur_mte_cxt.fault_valid = true;
+ else
+ ksft_print_msg("Got unexpected SEGV_MTEAERR at pc=$lx, fault addr=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc,
+ addr);
+ return;
+ }
+ /* Compare the context for precise error */
+ else if (si->si_code == SEGV_MTESERR) {
+ if (cur_mte_cxt.trig_si_code == si->si_code &&
+ ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ } else {
+ ksft_print_msg("Invalid MTE synchronous exception caught!\n");
+ exit(1);
+ }
+ } else {
+ ksft_print_msg("Unknown SIGSEGV exception caught!\n");
+ exit(1);
+ }
+ } else if (signum == SIGBUS) {
+ ksft_print_msg("INFO: SIGBUS signal at pc=%lx, fault addr=%lx, si_code=%lx\n",
+ ((ucontext_t *)uc)->uc_mcontext.pc, addr, si->si_code);
+ if ((cur_mte_cxt.trig_range >= 0 &&
+ addr >= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr <= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range)) ||
+ (cur_mte_cxt.trig_range < 0 &&
+ addr <= MT_CLEAR_TAG(cur_mte_cxt.trig_addr) &&
+ addr >= (MT_CLEAR_TAG(cur_mte_cxt.trig_addr) + cur_mte_cxt.trig_range))) {
+ cur_mte_cxt.fault_valid = true;
+ /* Adjust the pc by 4 */
+ ((ucontext_t *)uc)->uc_mcontext.pc += 4;
+ }
+ }
+}
+
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *))
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO;
+ sigemptyset(&sa.sa_mask);
+ sigaction(signal, &sa, NULL);
+}
+
+void mte_wait_after_trig(void)
+{
+ sched_yield();
+}
+
+void *mte_insert_tags(void *ptr, size_t size)
+{
+ void *tag_ptr;
+ int align_size;
+
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return NULL;
+ }
+ align_size = MT_ALIGN_UP(size);
+ tag_ptr = mte_insert_random_tag(ptr);
+ mte_set_tag_address_range(tag_ptr, align_size);
+ return tag_ptr;
+}
+
+void mte_clear_tags(void *ptr, size_t size)
+{
+ if (!ptr || (unsigned long)(ptr) & MT_ALIGN_GRANULE) {
+ ksft_print_msg("FAIL: Addr=%lx: invalid\n", ptr);
+ return;
+ }
+ size = MT_ALIGN_UP(size);
+ ptr = (void *)MT_CLEAR_TAG((unsigned long)ptr);
+ mte_clear_tag_address_range(ptr, size);
+}
+
+static void *__mte_allocate_memory_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after,
+ bool tags, int fd)
+{
+ void *ptr;
+ int prot_flag, map_flag;
+ size_t entire_size = size + range_before + range_after;
+
+ switch (mem_type) {
+ case USE_MALLOC:
+ return malloc(entire_size) + range_before;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid allocate request\n");
+ return NULL;
+ }
+
+ prot_flag = PROT_READ | PROT_WRITE;
+ if (mem_type == USE_MMAP)
+ prot_flag |= PROT_MTE;
+
+ map_flag = mapping;
+ if (fd == -1)
+ map_flag = MAP_ANONYMOUS | map_flag;
+ if (!(mapping & MAP_SHARED))
+ map_flag |= MAP_PRIVATE;
+ ptr = mmap(NULL, entire_size, prot_flag, map_flag, fd, 0);
+ if (ptr == MAP_FAILED) {
+ ksft_print_msg("FAIL: mmap allocation\n");
+ return NULL;
+ }
+ if (mem_type == USE_MPROTECT) {
+ if (mprotect(ptr, entire_size, prot_flag | PROT_MTE)) {
+ munmap(ptr, size);
+ ksft_print_msg("FAIL: mprotect PROT_MTE property\n");
+ return NULL;
+ }
+ }
+ if (tags)
+ ptr = mte_insert_tags(ptr + range_before, size);
+ return ptr;
+}
+
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, -1);
+}
+
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags)
+{
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, -1);
+}
+
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping, bool tags, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < size; index += INIT_BUFFER_SIZE) {
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ perror("initialising buffer");
+ return NULL;
+ }
+ }
+ index -= INIT_BUFFER_SIZE;
+ if (write(fd, buffer, size - index) != size - index) {
+ perror("initialising buffer");
+ return NULL;
+ }
+ return __mte_allocate_memory_range(size, mem_type, mapping, 0, 0, tags, fd);
+}
+
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd)
+{
+ int index;
+ char buffer[INIT_BUFFER_SIZE];
+ int map_size = size + range_before + range_after;
+
+ if (mem_type != USE_MPROTECT && mem_type != USE_MMAP) {
+ ksft_print_msg("FAIL: Invalid mmap file request\n");
+ return NULL;
+ }
+ /* Initialize the file for mappable size */
+ lseek(fd, 0, SEEK_SET);
+ for (index = INIT_BUFFER_SIZE; index < map_size; index += INIT_BUFFER_SIZE)
+ if (write(fd, buffer, INIT_BUFFER_SIZE) != INIT_BUFFER_SIZE) {
+ perror("initialising buffer");
+ return NULL;
+ }
+ index -= INIT_BUFFER_SIZE;
+ if (write(fd, buffer, map_size - index) != map_size - index) {
+ perror("initialising buffer");
+ return NULL;
+ }
+ return __mte_allocate_memory_range(size, mem_type, mapping, range_before,
+ range_after, true, fd);
+}
+
+static void __mte_free_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after, bool tags)
+{
+ switch (mem_type) {
+ case USE_MALLOC:
+ free(ptr - range_before);
+ break;
+ case USE_MMAP:
+ case USE_MPROTECT:
+ if (tags)
+ mte_clear_tags(ptr, size);
+ munmap(ptr - range_before, size + range_before + range_after);
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid free request\n");
+ break;
+ }
+}
+
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ __mte_free_memory_range(ptr, size, mem_type, range_before, range_after, true);
+}
+
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags)
+{
+ __mte_free_memory_range(ptr, size, mem_type, 0, 0, tags);
+}
+
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range)
+{
+ cur_mte_cxt.fault_valid = false;
+ cur_mte_cxt.trig_addr = ptr;
+ cur_mte_cxt.trig_range = range;
+ if (mode == MTE_SYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTESERR;
+ else if (mode == MTE_ASYNC_ERR)
+ cur_mte_cxt.trig_si_code = SEGV_MTEAERR;
+ else
+ cur_mte_cxt.trig_si_code = 0;
+}
+
+int mte_switch_mode(int mte_option, unsigned long incl_mask)
+{
+ unsigned long en = 0;
+
+ switch (mte_option) {
+ case MTE_NONE_ERR:
+ case MTE_SYNC_ERR:
+ case MTE_ASYNC_ERR:
+ break;
+ default:
+ ksft_print_msg("FAIL: Invalid MTE option %x\n", mte_option);
+ return -EINVAL;
+ }
+
+ if (incl_mask & ~MT_INCLUDE_TAG_MASK) {
+ ksft_print_msg("FAIL: Invalid incl_mask %lx\n", incl_mask);
+ return -EINVAL;
+ }
+
+ en = PR_TAGGED_ADDR_ENABLE;
+ switch (mte_option) {
+ case MTE_SYNC_ERR:
+ en |= PR_MTE_TCF_SYNC;
+ break;
+ case MTE_ASYNC_ERR:
+ en |= PR_MTE_TCF_ASYNC;
+ break;
+ case MTE_NONE_ERR:
+ en |= PR_MTE_TCF_NONE;
+ break;
+ }
+
+ en |= (incl_mask << PR_MTE_TAG_SHIFT);
+ /* Enable address tagging ABI, mte error reporting mode and tag inclusion mask. */
+ if (prctl(PR_SET_TAGGED_ADDR_CTRL, en, 0, 0, 0) != 0) {
+ ksft_print_msg("FAIL:prctl PR_SET_TAGGED_ADDR_CTRL for mte mode\n");
+ return -EINVAL;
+ }
+ return 0;
+}
+
+int mte_default_setup(void)
+{
+ unsigned long hwcaps2 = getauxval(AT_HWCAP2);
+ unsigned long en = 0;
+ int ret;
+
+ if (!(hwcaps2 & HWCAP2_MTE)) {
+ ksft_print_msg("SKIP: MTE features unavailable\n");
+ return KSFT_SKIP;
+ }
+ /* Get current mte mode */
+ ret = prctl(PR_GET_TAGGED_ADDR_CTRL, en, 0, 0, 0);
+ if (ret < 0) {
+ ksft_print_msg("FAIL:prctl PR_GET_TAGGED_ADDR_CTRL with error =%d\n", ret);
+ return KSFT_FAIL;
+ }
+ if (ret & PR_MTE_TCF_SYNC)
+ mte_cur_mode = MTE_SYNC_ERR;
+ else if (ret & PR_MTE_TCF_ASYNC)
+ mte_cur_mode = MTE_ASYNC_ERR;
+ else if (ret & PR_MTE_TCF_NONE)
+ mte_cur_mode = MTE_NONE_ERR;
+
+ mte_cur_pstate_tco = mte_get_pstate_tco();
+ /* Disable PSTATE.TCO */
+ mte_disable_pstate_tco();
+ return 0;
+}
+
+void mte_restore_setup(void)
+{
+ mte_switch_mode(mte_cur_mode, MTE_ALLOW_NON_ZERO_TAG);
+ if (mte_cur_pstate_tco == MT_PSTATE_TCO_EN)
+ mte_enable_pstate_tco();
+ else if (mte_cur_pstate_tco == MT_PSTATE_TCO_DIS)
+ mte_disable_pstate_tco();
+}
+
+int create_temp_file(void)
+{
+ int fd;
+ char filename[] = "/dev/shm/tmp_XXXXXX";
+
+ /* Create a file in the tmpfs filesystem */
+ fd = mkstemp(&filename[0]);
+ if (fd == -1) {
+ perror(filename);
+ ksft_print_msg("FAIL: Unable to open temporary file\n");
+ return 0;
+ }
+ unlink(&filename[0]);
+ return fd;
+}
diff --git a/tools/testing/selftests/arm64/mte/mte_common_util.h b/tools/testing/selftests/arm64/mte/mte_common_util.h
new file mode 100644
index 000000000000..2d3e71724e55
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_common_util.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _MTE_COMMON_UTIL_H
+#define _MTE_COMMON_UTIL_H
+
+#include <signal.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include "mte_def.h"
+#include "kselftest.h"
+
+enum mte_mem_type {
+ USE_MALLOC,
+ USE_MMAP,
+ USE_MPROTECT,
+};
+
+enum mte_mode {
+ MTE_NONE_ERR,
+ MTE_SYNC_ERR,
+ MTE_ASYNC_ERR,
+};
+
+struct mte_fault_cxt {
+ /* Address start which triggers mte tag fault */
+ unsigned long trig_addr;
+ /* Address range for mte tag fault and negative value means underflow */
+ ssize_t trig_range;
+ /* siginfo si code */
+ unsigned long trig_si_code;
+ /* Flag to denote if correct fault caught */
+ bool fault_valid;
+};
+
+extern struct mte_fault_cxt cur_mte_cxt;
+
+/* MTE utility functions */
+void mte_default_handler(int signum, siginfo_t *si, void *uc);
+void mte_register_signal(int signal, void (*handler)(int, siginfo_t *, void *));
+void mte_wait_after_trig(void);
+void *mte_allocate_memory(size_t size, int mem_type, int mapping, bool tags);
+void *mte_allocate_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after);
+void *mte_allocate_file_memory(size_t size, int mem_type, int mapping,
+ bool tags, int fd);
+void *mte_allocate_file_memory_tag_range(size_t size, int mem_type, int mapping,
+ size_t range_before, size_t range_after, int fd);
+void mte_free_memory(void *ptr, size_t size, int mem_type, bool tags);
+void mte_free_memory_tag_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after);
+void *mte_insert_tags(void *ptr, size_t size);
+void mte_clear_tags(void *ptr, size_t size);
+int mte_default_setup(void);
+void mte_restore_setup(void);
+int mte_switch_mode(int mte_option, unsigned long incl_mask);
+void mte_initialize_current_context(int mode, uintptr_t ptr, ssize_t range);
+
+/* Common utility functions */
+int create_temp_file(void);
+
+/* Assembly MTE utility functions */
+void *mte_insert_random_tag(void *ptr);
+void *mte_insert_new_tag(void *ptr);
+void *mte_get_tag_address(void *ptr);
+void mte_set_tag_address_range(void *ptr, int range);
+void mte_clear_tag_address_range(void *ptr, int range);
+void mte_disable_pstate_tco(void);
+void mte_enable_pstate_tco(void);
+unsigned int mte_get_pstate_tco(void);
+
+/* Test framework static inline functions/macros */
+static inline void evaluate_test(int err, const char *msg)
+{
+ switch (err) {
+ case KSFT_PASS:
+ ksft_test_result_pass(msg);
+ break;
+ case KSFT_FAIL:
+ ksft_test_result_fail(msg);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip(msg);
+ break;
+ default:
+ ksft_test_result_error("Unknown return code %d from %s",
+ err, msg);
+ break;
+ }
+}
+
+static inline int check_allocated_memory(void *ptr, size_t size,
+ int mem_type, bool tags)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (tags && !MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory((void *)ptr, size, mem_type, false);
+ return KSFT_FAIL;
+ }
+
+ return KSFT_PASS;
+}
+
+static inline int check_allocated_memory_range(void *ptr, size_t size, int mem_type,
+ size_t range_before, size_t range_after)
+{
+ if (ptr == NULL) {
+ ksft_print_msg("FAIL: memory allocation\n");
+ return KSFT_FAIL;
+ }
+
+ if (!MT_FETCH_TAG((uintptr_t)ptr)) {
+ ksft_print_msg("FAIL: tag not found at addr(%p)\n", ptr);
+ mte_free_memory_tag_range((void *)ptr, size, mem_type, range_before,
+ range_after);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+}
+
+#endif /* _MTE_COMMON_UTIL_H */
diff --git a/tools/testing/selftests/arm64/mte/mte_def.h b/tools/testing/selftests/arm64/mte/mte_def.h
new file mode 100644
index 000000000000..9b188254b61a
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_def.h
@@ -0,0 +1,60 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+/*
+ * Below definitions may be found in kernel headers, However, they are
+ * redefined here to decouple the MTE selftests compilations from them.
+ */
+#ifndef SEGV_MTEAERR
+#define SEGV_MTEAERR 8
+#endif
+#ifndef SEGV_MTESERR
+#define SEGV_MTESERR 9
+#endif
+#ifndef PROT_MTE
+#define PROT_MTE 0x20
+#endif
+#ifndef HWCAP2_MTE
+#define HWCAP2_MTE (1 << 18)
+#endif
+
+#ifndef PR_MTE_TCF_SHIFT
+#define PR_MTE_TCF_SHIFT 1
+#endif
+#ifndef PR_MTE_TCF_NONE
+#define PR_MTE_TCF_NONE (0UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_SYNC
+#define PR_MTE_TCF_SYNC (1UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TCF_ASYNC
+#define PR_MTE_TCF_ASYNC (2UL << PR_MTE_TCF_SHIFT)
+#endif
+#ifndef PR_MTE_TAG_SHIFT
+#define PR_MTE_TAG_SHIFT 3
+#endif
+
+/* MTE Hardware feature definitions below. */
+#define MT_TAG_SHIFT 56
+#define MT_TAG_MASK 0xFUL
+#define MT_FREE_TAG 0x0UL
+#define MT_GRANULE_SIZE 16
+#define MT_TAG_COUNT 16
+#define MT_INCLUDE_TAG_MASK 0xFFFF
+#define MT_EXCLUDE_TAG_MASK 0x0
+
+#define MT_ALIGN_GRANULE (MT_GRANULE_SIZE - 1)
+#define MT_CLEAR_TAG(x) ((x) & ~(MT_TAG_MASK << MT_TAG_SHIFT))
+#define MT_SET_TAG(x, y) ((x) | (y << MT_TAG_SHIFT))
+#define MT_FETCH_TAG(x) ((x >> MT_TAG_SHIFT) & (MT_TAG_MASK))
+#define MT_ALIGN_UP(x) ((x + MT_ALIGN_GRANULE) & ~(MT_ALIGN_GRANULE))
+
+#define MT_PSTATE_TCO_SHIFT 25
+#define MT_PSTATE_TCO_MASK ~(0x1 << MT_PSTATE_TCO_SHIFT)
+#define MT_PSTATE_TCO_EN 1
+#define MT_PSTATE_TCO_DIS 0
+
+#define MT_EXCLUDE_TAG(x) (1 << (x))
+#define MT_INCLUDE_VALID_TAG(x) (MT_INCLUDE_TAG_MASK ^ MT_EXCLUDE_TAG(x))
+#define MT_INCLUDE_VALID_TAGS(x) (MT_INCLUDE_TAG_MASK ^ (x))
+#define MTE_ALLOW_NON_ZERO_TAG MT_INCLUDE_VALID_TAG(0)
diff --git a/tools/testing/selftests/arm64/mte/mte_helper.S b/tools/testing/selftests/arm64/mte/mte_helper.S
new file mode 100644
index 000000000000..a55dbbc56ed1
--- /dev/null
+++ b/tools/testing/selftests/arm64/mte/mte_helper.S
@@ -0,0 +1,130 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#include "mte_def.h"
+
+.arch armv8.5-a+memtag
+
+#define ENTRY(name) \
+ .globl name ;\
+ .p2align 2;\
+ .type name, @function ;\
+name:
+
+#define ENDPROC(name) \
+ .size name, .-name ;
+
+ .text
+/*
+ * mte_insert_random_tag: Insert random tag and might be same as the source tag if
+ * the source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_random_tag)
+ irg x0, x0, xzr
+ ret
+ENDPROC(mte_insert_random_tag)
+
+/*
+ * mte_insert_new_tag: Insert new tag and different from the source tag if
+ * source pointer has it.
+ * Input:
+ * x0 - source pointer with a tag/no-tag
+ * Return:
+ * x0 - pointer with random tag
+ */
+ENTRY(mte_insert_new_tag)
+ gmi x1, x0, xzr
+ irg x0, x0, x1
+ ret
+ENDPROC(mte_insert_new_tag)
+
+/*
+ * mte_get_tag_address: Get the tag from given address.
+ * Input:
+ * x0 - source pointer
+ * Return:
+ * x0 - pointer with appended tag
+ */
+ENTRY(mte_get_tag_address)
+ ldg x0, [x0]
+ ret
+ENDPROC(mte_get_tag_address)
+
+/*
+ * mte_set_tag_address_range: Set the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_set_tag_address_range)
+ cbz x1, 2f
+1:
+ stg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_set_tag_address_range)
+
+/*
+ * mt_clear_tag_address_range: Clear the tag range from the given address
+ * Input:
+ * x0 - source pointer with tag data
+ * x1 - range
+ * Return:
+ * none
+ */
+ENTRY(mte_clear_tag_address_range)
+ cbz x1, 2f
+1:
+ stzg x0, [x0, #0x0]
+ add x0, x0, #MT_GRANULE_SIZE
+ sub x1, x1, #MT_GRANULE_SIZE
+ cbnz x1, 1b
+2:
+ ret
+ENDPROC(mte_clear_tag_address_range)
+
+/*
+ * mte_enable_pstate_tco: Enable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_enable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_EN
+ ret
+ENDPROC(mte_enable_pstate_tco)
+
+/*
+ * mte_disable_pstate_tco: Disable PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * none
+ */
+ENTRY(mte_disable_pstate_tco)
+ msr tco, #MT_PSTATE_TCO_DIS
+ ret
+ENDPROC(mte_disable_pstate_tco)
+
+/*
+ * mte_get_pstate_tco: Get PSTATE.TCO (tag check override) field
+ * Input:
+ * none
+ * Return:
+ * x0
+ */
+ENTRY(mte_get_pstate_tco)
+ mrs x0, tco
+ ubfx x0, x0, #MT_PSTATE_TCO_SHIFT, #1
+ ret
+ENDPROC(mte_get_pstate_tco)
diff --git a/tools/testing/selftests/arm64/pauth/.gitignore b/tools/testing/selftests/arm64/pauth/.gitignore
new file mode 100644
index 000000000000..155137d92722
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/.gitignore
@@ -0,0 +1,2 @@
+exec_target
+pac
diff --git a/tools/testing/selftests/arm64/pauth/Makefile b/tools/testing/selftests/arm64/pauth/Makefile
new file mode 100644
index 000000000000..72e290b0b10c
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/Makefile
@@ -0,0 +1,39 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020 ARM Limited
+
+# preserve CC value from top level Makefile
+ifeq ($(CC),cc)
+CC := $(CROSS_COMPILE)gcc
+endif
+
+CFLAGS += -mbranch-protection=pac-ret
+# check if the compiler supports ARMv8.3 and branch protection with PAuth
+pauth_cc_support := $(shell if ($(CC) $(CFLAGS) -march=armv8.3-a -E -x c /dev/null -o /dev/null 2>&1) then echo "1"; fi)
+
+ifeq ($(pauth_cc_support),1)
+TEST_GEN_PROGS := pac
+TEST_GEN_FILES := pac_corruptor.o helper.o
+TEST_GEN_PROGS_EXTENDED := exec_target
+endif
+
+include ../../lib.mk
+
+ifeq ($(pauth_cc_support),1)
+# pac* and aut* instructions are not available on architectures berfore
+# ARMv8.3. Therefore target ARMv8.3 wherever they are used directly
+$(OUTPUT)/pac_corruptor.o: pac_corruptor.S
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+$(OUTPUT)/helper.o: helper.c
+ $(CC) -c $^ -o $@ $(CFLAGS) -march=armv8.3-a
+
+# when -mbranch-protection is enabled and the target architecture is ARMv8.3 or
+# greater, gcc emits pac* instructions which are not in HINT NOP space,
+# preventing the tests from occurring at all. Compile for ARMv8.2 so tests can
+# run on earlier targets and print a meaningful error messages
+$(OUTPUT)/exec_target: exec_target.c $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+
+$(OUTPUT)/pac: pac.c $(OUTPUT)/pac_corruptor.o $(OUTPUT)/helper.o
+ $(CC) $^ -o $@ $(CFLAGS) -march=armv8.2-a
+endif
diff --git a/tools/testing/selftests/arm64/pauth/exec_target.c b/tools/testing/selftests/arm64/pauth/exec_target.c
new file mode 100644
index 000000000000..4435600ca400
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/exec_target.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+
+#include "helper.h"
+
+int main(void)
+{
+ struct signatures signed_vals;
+ unsigned long hwcaps;
+ size_t val;
+
+ fread(&val, sizeof(size_t), 1, stdin);
+
+ /* don't try to execute illegal (unimplemented) instructions) caller
+ * should have checked this and keep worker simple
+ */
+ hwcaps = getauxval(AT_HWCAP);
+
+ if (hwcaps & HWCAP_PACA) {
+ signed_vals.keyia = keyia_sign(val);
+ signed_vals.keyib = keyib_sign(val);
+ signed_vals.keyda = keyda_sign(val);
+ signed_vals.keydb = keydb_sign(val);
+ }
+ signed_vals.keyg = (hwcaps & HWCAP_PACG) ? keyg_sign(val) : 0;
+
+ fwrite(&signed_vals, sizeof(struct signatures), 1, stdout);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.c b/tools/testing/selftests/arm64/pauth/helper.c
new file mode 100644
index 000000000000..2c201e7d0d50
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#include "helper.h"
+
+size_t keyia_sign(size_t ptr)
+{
+ asm volatile("paciza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyib_sign(size_t ptr)
+{
+ asm volatile("pacizb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyda_sign(size_t ptr)
+{
+ asm volatile("pacdza %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keydb_sign(size_t ptr)
+{
+ asm volatile("pacdzb %0" : "+r" (ptr));
+ return ptr;
+}
+
+size_t keyg_sign(size_t ptr)
+{
+ /* output is encoded in the upper 32 bits */
+ size_t dest = 0;
+ size_t modifier = 0;
+
+ asm volatile("pacga %0, %1, %2" : "=r" (dest) : "r" (ptr), "r" (modifier));
+
+ return dest;
+}
diff --git a/tools/testing/selftests/arm64/pauth/helper.h b/tools/testing/selftests/arm64/pauth/helper.h
new file mode 100644
index 000000000000..652496c7b411
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/helper.h
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+#ifndef _HELPER_H_
+#define _HELPER_H_
+
+#include <stdlib.h>
+
+#define NKEYS 5
+
+struct signatures {
+ size_t keyia;
+ size_t keyib;
+ size_t keyda;
+ size_t keydb;
+ size_t keyg;
+};
+
+void pac_corruptor(void);
+
+/* PAuth sign a value with key ia and modifier value 0 */
+size_t keyia_sign(size_t val);
+size_t keyib_sign(size_t val);
+size_t keyda_sign(size_t val);
+size_t keydb_sign(size_t val);
+size_t keyg_sign(size_t val);
+
+#endif
diff --git a/tools/testing/selftests/arm64/pauth/pac.c b/tools/testing/selftests/arm64/pauth/pac.c
new file mode 100644
index 000000000000..b743daa772f5
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2020 ARM Limited
+
+#define _GNU_SOURCE
+
+#include <sys/auxv.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sched.h>
+
+#include "../../kselftest_harness.h"
+#include "helper.h"
+
+#define PAC_COLLISION_ATTEMPTS 10
+/*
+ * The kernel sets TBID by default. So bits 55 and above should remain
+ * untouched no matter what.
+ * The VA space size is 48 bits. Bigger is opt-in.
+ */
+#define PAC_MASK (~0xff80ffffffffffff)
+#define ARBITRARY_VALUE (0x1234)
+#define ASSERT_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* data key instructions are not in NOP space. This prevents a SIGILL */ \
+ if (!(hwcaps & HWCAP_PACA)) \
+ SKIP(return, "PAUTH not enabled"); \
+} while (0)
+#define ASSERT_GENERIC_PAUTH_ENABLED() \
+do { \
+ unsigned long hwcaps = getauxval(AT_HWCAP); \
+ /* generic key instructions are not in NOP space. This prevents a SIGILL */ \
+ if (!(hwcaps & HWCAP_PACG)) \
+ SKIP(return, "Generic PAUTH not enabled"); \
+} while (0)
+
+void sign_specific(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+}
+
+void sign_all(struct signatures *sign, size_t val)
+{
+ sign->keyia = keyia_sign(val);
+ sign->keyib = keyib_sign(val);
+ sign->keyda = keyda_sign(val);
+ sign->keydb = keydb_sign(val);
+ sign->keyg = keyg_sign(val);
+}
+
+int n_same(struct signatures *old, struct signatures *new, int nkeys)
+{
+ int res = 0;
+
+ res += old->keyia == new->keyia;
+ res += old->keyib == new->keyib;
+ res += old->keyda == new->keyda;
+ res += old->keydb == new->keydb;
+ if (nkeys == NKEYS)
+ res += old->keyg == new->keyg;
+
+ return res;
+}
+
+int n_same_single_set(struct signatures *sign, int nkeys)
+{
+ size_t vals[nkeys];
+ int same = 0;
+
+ vals[0] = sign->keyia & PAC_MASK;
+ vals[1] = sign->keyib & PAC_MASK;
+ vals[2] = sign->keyda & PAC_MASK;
+ vals[3] = sign->keydb & PAC_MASK;
+
+ if (nkeys >= 4)
+ vals[4] = sign->keyg & PAC_MASK;
+
+ for (int i = 0; i < nkeys - 1; i++) {
+ for (int j = i + 1; j < nkeys; j++) {
+ if (vals[i] == vals[j])
+ same += 1;
+ }
+ }
+ return same;
+}
+
+int exec_sign_all(struct signatures *signed_vals, size_t val)
+{
+ int new_stdin[2];
+ int new_stdout[2];
+ int status;
+ int i;
+ ssize_t ret;
+ pid_t pid;
+ cpu_set_t mask;
+
+ ret = pipe(new_stdin);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ ret = pipe(new_stdout);
+ if (ret == -1) {
+ perror("pipe returned error");
+ return -1;
+ }
+
+ /*
+ * pin this process and all its children to a single CPU, so it can also
+ * guarantee a context switch with its child
+ */
+ sched_getaffinity(0, sizeof(mask), &mask);
+
+ for (i = 0; i < sizeof(cpu_set_t); i++)
+ if (CPU_ISSET(i, &mask))
+ break;
+
+ CPU_ZERO(&mask);
+ CPU_SET(i, &mask);
+ sched_setaffinity(0, sizeof(mask), &mask);
+
+ pid = fork();
+ // child
+ if (pid == 0) {
+ dup2(new_stdin[0], STDIN_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ dup2(new_stdout[1], STDOUT_FILENO);
+ if (ret == -1) {
+ perror("dup2 returned error");
+ exit(1);
+ }
+
+ close(new_stdin[0]);
+ close(new_stdin[1]);
+ close(new_stdout[0]);
+ close(new_stdout[1]);
+
+ ret = execl("exec_target", "exec_target", (char *)NULL);
+ if (ret == -1) {
+ perror("exec returned error");
+ exit(1);
+ }
+ }
+
+ close(new_stdin[0]);
+ close(new_stdout[1]);
+
+ ret = write(new_stdin[1], &val, sizeof(size_t));
+ if (ret == -1) {
+ perror("write returned error");
+ return -1;
+ }
+
+ /*
+ * wait for the worker to finish, so that read() reads all data
+ * will also context switch with worker so that this function can be used
+ * for context switch tests
+ */
+ waitpid(pid, &status, 0);
+ if (WIFEXITED(status) == 0) {
+ fprintf(stderr, "worker exited unexpectedly\n");
+ return -1;
+ }
+ if (WEXITSTATUS(status) != 0) {
+ fprintf(stderr, "worker exited with error\n");
+ return -1;
+ }
+
+ ret = read(new_stdout[0], signed_vals, sizeof(struct signatures));
+ if (ret == -1) {
+ perror("read returned error");
+ return -1;
+ }
+
+ return 0;
+}
+
+sigjmp_buf jmpbuf;
+void pac_signal_handler(int signum, siginfo_t *si, void *uc)
+{
+ if (signum == SIGSEGV || signum == SIGILL)
+ siglongjmp(jmpbuf, 1);
+}
+
+/* check that a corrupted PAC results in SIGSEGV or SIGILL */
+TEST(corrupt_pac)
+{
+ struct sigaction sa;
+
+ ASSERT_PAUTH_ENABLED();
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sa.sa_sigaction = pac_signal_handler;
+ sa.sa_flags = SA_SIGINFO | SA_RESETHAND;
+ sigemptyset(&sa.sa_mask);
+
+ sigaction(SIGSEGV, &sa, NULL);
+ sigaction(SIGILL, &sa, NULL);
+
+ pac_corruptor();
+ ASSERT_TRUE(0) TH_LOG("SIGSEGV/SIGILL signal did not occur");
+ }
+}
+
+/*
+ * There are no separate pac* and aut* controls so checking only the pac*
+ * instructions is sufficient
+ */
+TEST(pac_instructions_not_nop)
+{
+ size_t keyia = 0;
+ size_t keyib = 0;
+ size_t keyda = 0;
+ size_t keydb = 0;
+
+ ASSERT_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ keyia |= keyia_sign(i) & PAC_MASK;
+ keyib |= keyib_sign(i) & PAC_MASK;
+ keyda |= keyda_sign(i) & PAC_MASK;
+ keydb |= keydb_sign(i) & PAC_MASK;
+ }
+
+ ASSERT_NE(0, keyia) TH_LOG("keyia instructions did nothing");
+ ASSERT_NE(0, keyib) TH_LOG("keyib instructions did nothing");
+ ASSERT_NE(0, keyda) TH_LOG("keyda instructions did nothing");
+ ASSERT_NE(0, keydb) TH_LOG("keydb instructions did nothing");
+}
+
+TEST(pac_instructions_not_nop_generic)
+{
+ size_t keyg = 0;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++)
+ keyg |= keyg_sign(i) & PAC_MASK;
+
+ ASSERT_NE(0, keyg) TH_LOG("keyg instructions did nothing");
+}
+
+TEST(single_thread_different_keys)
+{
+ int same = 10;
+ int nkeys = NKEYS;
+ int tmp;
+ struct signatures signed_vals;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_PAUTH_ENABLED();
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ /*
+ * In Linux the PAC field can be up to 7 bits wide. Even if keys are
+ * different, there is about 5% chance for PACs to collide with
+ * different addresses. This chance rapidly increases with fewer bits
+ * allocated for the PAC (e.g. wider address). A comparison of the keys
+ * directly will be more reliable.
+ * All signed values need to be different at least once out of n
+ * attempts to be certain that the keys are different
+ */
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ if (nkeys == NKEYS)
+ sign_all(&signed_vals, i);
+ else
+ sign_specific(&signed_vals, i);
+
+ tmp = n_same_single_set(&signed_vals, nkeys);
+ if (tmp < same)
+ same = tmp;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("%d keys clashed every time", same);
+}
+
+/*
+ * fork() does not change keys. Only exec() does so call a worker program.
+ * Its only job is to sign a value and report back the resutls
+ */
+TEST(exec_changed_keys)
+{
+ struct signatures new_keys;
+ struct signatures old_keys;
+ int ret;
+ int same = 10;
+ int nkeys = NKEYS;
+ unsigned long hwcaps = getauxval(AT_HWCAP);
+
+ /* generic and data key instructions are not in NOP space. This prevents a SIGILL */
+ ASSERT_PAUTH_ENABLED();
+ if (!(hwcaps & HWCAP_PACG)) {
+ TH_LOG("WARNING: Generic PAUTH not enabled. Skipping generic key checks");
+ nkeys = NKEYS - 1;
+ }
+
+ for (int i = 0; i < PAC_COLLISION_ATTEMPTS; i++) {
+ ret = exec_sign_all(&new_keys, i);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ if (nkeys == NKEYS)
+ sign_all(&old_keys, i);
+ else
+ sign_specific(&old_keys, i);
+
+ ret = n_same(&old_keys, &new_keys, nkeys);
+ if (ret < same)
+ same = ret;
+ }
+
+ ASSERT_EQ(0, same) TH_LOG("exec() did not change %d keys", same);
+}
+
+TEST(context_switch_keep_keys)
+{
+ int ret;
+ struct signatures trash;
+ struct signatures before;
+ struct signatures after;
+
+ ASSERT_PAUTH_ENABLED();
+
+ sign_specific(&before, ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ sign_specific(&after, ARBITRARY_VALUE);
+
+ ASSERT_EQ(before.keyia, after.keyia) TH_LOG("keyia changed after context switching");
+ ASSERT_EQ(before.keyib, after.keyib) TH_LOG("keyib changed after context switching");
+ ASSERT_EQ(before.keyda, after.keyda) TH_LOG("keyda changed after context switching");
+ ASSERT_EQ(before.keydb, after.keydb) TH_LOG("keydb changed after context switching");
+}
+
+TEST(context_switch_keep_keys_generic)
+{
+ int ret;
+ struct signatures trash;
+ size_t before;
+ size_t after;
+
+ ASSERT_GENERIC_PAUTH_ENABLED();
+
+ before = keyg_sign(ARBITRARY_VALUE);
+
+ /* will context switch with a process with different keys at least once */
+ ret = exec_sign_all(&trash, ARBITRARY_VALUE);
+ ASSERT_EQ(0, ret) TH_LOG("failed to run worker");
+
+ after = keyg_sign(ARBITRARY_VALUE);
+
+ ASSERT_EQ(before, after) TH_LOG("keyg changed after context switching");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/arm64/pauth/pac_corruptor.S b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
new file mode 100644
index 000000000000..aa6588050752
--- /dev/null
+++ b/tools/testing/selftests/arm64/pauth/pac_corruptor.S
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 ARM Limited */
+
+.global pac_corruptor
+
+.text
+/*
+ * Corrupting a single bit of the PAC ensures the authentication will fail. It
+ * also guarantees no possible collision. TCR_EL1.TBI0 is set by default so no
+ * top byte PAC is tested
+ */
+ pac_corruptor:
+ paciasp
+
+ /* corrupt the top bit of the PAC */
+ eor lr, lr, #1 << 53
+
+ autiasp
+ ret
diff --git a/tools/testing/selftests/arm64/signal/.gitignore b/tools/testing/selftests/arm64/signal/.gitignore
index 78c902045ca7..1ce5b5eac386 100644
--- a/tools/testing/selftests/arm64/signal/.gitignore
+++ b/tools/testing/selftests/arm64/signal/.gitignore
@@ -1,4 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
mangle_*
fake_sigreturn_*
+fpmr_*
+sme_*
+ssve_*
+sve_*
+tpidr2_*
+za_*
+zt_*
!*.[ch]
diff --git a/tools/testing/selftests/arm64/signal/Makefile b/tools/testing/selftests/arm64/signal/Makefile
index b497cfea4643..8f5febaf1a9a 100644
--- a/tools/testing/selftests/arm64/signal/Makefile
+++ b/tools/testing/selftests/arm64/signal/Makefile
@@ -11,7 +11,6 @@ PROGS := $(patsubst %.c,%,$(SRCS))
TEST_GEN_PROGS := $(notdir $(PROGS))
# Get Kernel headers installed and use them.
-KSFT_KHDR_INSTALL := 1
# Including KSFT lib.mk here will also mangle the TEST_GEN_PROGS list
# to account for any OUTPUT target-dirs optionally provided by
@@ -21,12 +20,12 @@ include ../../lib.mk
$(TEST_GEN_PROGS): $(PROGS)
cp $(PROGS) $(OUTPUT)/
-clean:
- $(CLEAN)
- rm -f $(PROGS)
-
# Common test-unit targets to build common-layout test-cases executables
# Needs secondary expansion to properly include the testcase c-file in pre-reqs
+COMMON_SOURCES := test_signals.c test_signals_utils.c testcases/testcases.c \
+ signals.S
+COMMON_HEADERS := test_signals.h test_signals_utils.h testcases/testcases.h
+
.SECONDEXPANSION:
-$(PROGS): test_signals.c test_signals_utils.c testcases/testcases.c signals.S $$@.c test_signals.h test_signals_utils.h testcases/testcases.h
- $(CC) $(CFLAGS) $^ -o $@
+$(PROGS): $$@.c ${COMMON_SOURCES} ${COMMON_HEADERS}
+ $(CC) $(CFLAGS) ${@}.c ${COMMON_SOURCES} -o $@
diff --git a/tools/testing/selftests/arm64/signal/test_signals.c b/tools/testing/selftests/arm64/signal/test_signals.c
index 416b1ff43199..00051b40d71e 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.c
+++ b/tools/testing/selftests/arm64/signal/test_signals.c
@@ -12,12 +12,10 @@
#include "test_signals.h"
#include "test_signals_utils.h"
-struct tdescr *current;
+struct tdescr *current = &tde;
int main(int argc, char *argv[])
{
- current = &tde;
-
ksft_print_msg("%s :: %s\n", current->name, current->descr);
if (test_setup(current) && test_init(current)) {
test_run(current);
diff --git a/tools/testing/selftests/arm64/signal/test_signals.h b/tools/testing/selftests/arm64/signal/test_signals.h
index f96baf1cef1a..1e6273d81575 100644
--- a/tools/testing/selftests/arm64/signal/test_signals.h
+++ b/tools/testing/selftests/arm64/signal/test_signals.h
@@ -9,9 +9,7 @@
#include <ucontext.h>
/*
- * Using ARCH specific and sanitized Kernel headers installed by KSFT
- * framework since we asked for it by setting flag KSFT_KHDR_INSTALL
- * in our Makefile.
+ * Using ARCH specific and sanitized Kernel headers from the tree.
*/
#include <asm/ptrace.h>
#include <asm/hwcap.h>
@@ -33,10 +31,18 @@
*/
enum {
FSSBS_BIT,
+ FSVE_BIT,
+ FSME_BIT,
+ FSME_FA64_BIT,
+ FSME2_BIT,
FMAX_END
};
#define FEAT_SSBS (1UL << FSSBS_BIT)
+#define FEAT_SVE (1UL << FSVE_BIT)
+#define FEAT_SME (1UL << FSME_BIT)
+#define FEAT_SME_FA64 (1UL << FSME_FA64_BIT)
+#define FEAT_SME2 (1UL << FSME2_BIT)
/*
* A descriptor used to describe and configure a test case.
@@ -51,6 +57,7 @@ struct tdescr {
char *name;
char *descr;
unsigned long feats_required;
+ unsigned long feats_incompatible;
/* bitmask of effectively supported feats: populated at run-time */
unsigned long feats_supported;
bool initialized;
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.c b/tools/testing/selftests/arm64/signal/test_signals_utils.c
index 2de6e5ed5e25..0dc948db3a4a 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.c
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.c
@@ -26,6 +26,10 @@ static int sig_copyctx = SIGTRAP;
static char const *const feats_names[FMAX_END] = {
" SSBS ",
+ " SVE ",
+ " SME ",
+ " FA64 ",
+ " SME2 ",
};
#define MAX_FEATS_SZ 128
@@ -35,6 +39,8 @@ static inline char *feats_to_string(unsigned long feats)
{
size_t flen = MAX_FEATS_SZ - 1;
+ feats_string[0] = '\0';
+
for (int i = 0; i < FMAX_END; i++) {
if (feats & (1UL << i)) {
size_t tlen = strlen(feats_names[i]);
@@ -160,15 +166,66 @@ static bool handle_signal_ok(struct tdescr *td,
}
static bool handle_signal_copyctx(struct tdescr *td,
- siginfo_t *si, void *uc)
+ siginfo_t *si, void *uc_in)
{
+ ucontext_t *uc = uc_in;
+ struct _aarch64_ctx *head;
+ struct extra_context *extra, *copied_extra;
+ size_t offset = 0;
+ size_t to_copy;
+
+ ASSERT_GOOD_CONTEXT(uc);
+
/* Mangling PC to avoid loops on original BRK instr */
- ((ucontext_t *)uc)->uc_mcontext.pc += 4;
- memcpy(td->live_uc, uc, td->live_sz);
- ASSERT_GOOD_CONTEXT(td->live_uc);
+ uc->uc_mcontext.pc += 4;
+
+ /*
+ * Check for an preserve any extra data too with fixups.
+ */
+ head = (struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+ head = get_header(head, EXTRA_MAGIC, td->live_sz, &offset);
+ if (head) {
+ extra = (struct extra_context *)head;
+
+ /*
+ * The extra buffer must be immediately after the
+ * extra_context and a 16 byte terminator. Include it
+ * in the copy, this was previously validated in
+ * ASSERT_GOOD_CONTEXT().
+ */
+ to_copy = __builtin_offsetof(ucontext_t,
+ uc_mcontext.__reserved);
+ to_copy += offset + sizeof(struct extra_context) + 16;
+ to_copy += extra->size;
+ copied_extra = (struct extra_context *)&(td->live_uc->uc_mcontext.__reserved[offset]);
+ } else {
+ copied_extra = NULL;
+ to_copy = sizeof(ucontext_t);
+ }
+
+ if (to_copy > td->live_sz) {
+ fprintf(stderr,
+ "Not enough space to grab context, %lu/%lu bytes\n",
+ td->live_sz, to_copy);
+ return false;
+ }
+
+ memcpy(td->live_uc, uc, to_copy);
+
+ /*
+ * If there was any EXTRA_CONTEXT fix up the size to be the
+ * struct extra_context and the following terminator record,
+ * this means that the rest of the code does not need to have
+ * special handling for the record and we don't need to fix up
+ * datap for the new location.
+ */
+ if (copied_extra)
+ copied_extra->head.size = sizeof(*copied_extra) + 16;
+
td->live_uc_valid = 1;
fprintf(stderr,
- "GOOD CONTEXT grabbed from sig_copyctx handler\n");
+ "%lu byte GOOD CONTEXT grabbed from sig_copyctx handler\n",
+ to_copy);
return true;
}
@@ -192,7 +249,8 @@ static void default_handler(int signum, siginfo_t *si, void *uc)
fprintf(stderr, "-- Timeout !\n");
} else {
fprintf(stderr,
- "-- RX UNEXPECTED SIGNAL: %d\n", signum);
+ "-- RX UNEXPECTED SIGNAL: %d code %d address %p\n",
+ signum, si->si_code, si->si_addr);
}
default_result(current, 1);
}
@@ -255,7 +313,7 @@ int test_init(struct tdescr *td)
td->minsigstksz = MINSIGSTKSZ;
fprintf(stderr, "Detected MINSTKSIGSZ:%d\n", td->minsigstksz);
- if (td->feats_required) {
+ if (td->feats_required || td->feats_incompatible) {
td->feats_supported = 0;
/*
* Checking for CPU required features using both the
@@ -263,16 +321,41 @@ int test_init(struct tdescr *td)
*/
if (getauxval(AT_HWCAP) & HWCAP_SSBS)
td->feats_supported |= FEAT_SSBS;
- if (feats_ok(td))
- fprintf(stderr,
- "Required Features: [%s] supported\n",
- feats_to_string(td->feats_required &
- td->feats_supported));
- else
- fprintf(stderr,
- "Required Features: [%s] NOT supported\n",
- feats_to_string(td->feats_required &
- ~td->feats_supported));
+ if (getauxval(AT_HWCAP) & HWCAP_SVE)
+ td->feats_supported |= FEAT_SVE;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME)
+ td->feats_supported |= FEAT_SME;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME_FA64)
+ td->feats_supported |= FEAT_SME_FA64;
+ if (getauxval(AT_HWCAP2) & HWCAP2_SME2)
+ td->feats_supported |= FEAT_SME2;
+ if (feats_ok(td)) {
+ if (td->feats_required & td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] supported\n",
+ feats_to_string(td->feats_required &
+ td->feats_supported));
+ if (!(td->feats_incompatible & td->feats_supported))
+ fprintf(stderr,
+ "Incompatible Features: [%s] absent\n",
+ feats_to_string(td->feats_incompatible));
+ } else {
+ if ((td->feats_required & td->feats_supported) !=
+ td->feats_supported)
+ fprintf(stderr,
+ "Required Features: [%s] NOT supported\n",
+ feats_to_string(td->feats_required &
+ ~td->feats_supported));
+ if (td->feats_incompatible & td->feats_supported)
+ fprintf(stderr,
+ "Incompatible Features: [%s] supported\n",
+ feats_to_string(td->feats_incompatible &
+ ~td->feats_supported));
+
+
+ td->result = KSFT_SKIP;
+ return 0;
+ }
}
/* Perform test specific additional initialization */
@@ -304,14 +387,12 @@ int test_setup(struct tdescr *td)
int test_run(struct tdescr *td)
{
- if (td->sig_trig) {
- if (td->trigger)
- return td->trigger(td);
- else
- return default_trigger(td);
- } else {
+ if (td->trigger)
+ return td->trigger(td);
+ else if (td->sig_trig)
+ return default_trigger(td);
+ else
return td->run(td, NULL, NULL);
- }
}
void test_result(struct tdescr *td)
diff --git a/tools/testing/selftests/arm64/signal/test_signals_utils.h b/tools/testing/selftests/arm64/signal/test_signals_utils.h
index 6772b5c8d274..762c8fe9c54a 100644
--- a/tools/testing/selftests/arm64/signal/test_signals_utils.h
+++ b/tools/testing/selftests/arm64/signal/test_signals_utils.h
@@ -8,6 +8,8 @@
#include <stdio.h>
#include <string.h>
+#include <linux/compiler.h>
+
#include "test_signals.h"
int test_init(struct tdescr *td);
@@ -18,6 +20,8 @@ void test_result(struct tdescr *td);
static inline bool feats_ok(struct tdescr *td)
{
+ if (td->feats_incompatible & td->feats_supported)
+ return false;
return (td->feats_required & td->feats_supported) == td->feats_required;
}
@@ -54,16 +58,29 @@ static inline bool feats_ok(struct tdescr *td)
* at sizeof(ucontext_t).
*/
static __always_inline bool get_current_context(struct tdescr *td,
- ucontext_t *dest_uc)
+ ucontext_t *dest_uc,
+ size_t dest_sz)
{
static volatile bool seen_already;
+ int i;
+ char *uc = (char *)dest_uc;
assert(td && dest_uc);
/* it's a genuine invocation..reinit */
seen_already = 0;
td->live_uc_valid = 0;
- td->live_sz = sizeof(*dest_uc);
- memset(dest_uc, 0x00, td->live_sz);
+ td->live_sz = dest_sz;
+
+ /*
+ * This is a memset() but we don't want the compiler to
+ * optimise it into either instructions or a library call
+ * which might be incompatible with streaming mode.
+ */
+ for (i = 0; i < td->live_sz; i++) {
+ uc[i] = 0;
+ OPTIMIZER_HIDE_VAR(uc[0]);
+ }
+
td->live_uc = dest_uc;
/*
* Grab ucontext_t triggering a SIGTRAP.
@@ -101,6 +118,17 @@ static __always_inline bool get_current_context(struct tdescr *td,
: "memory");
/*
+ * If we were grabbing a streaming mode context then we may
+ * have entered streaming mode behind the system's back and
+ * libc or compiler generated code might decide to do
+ * something invalid in streaming mode, or potentially even
+ * the state of ZA. Issue a SMSTOP to exit both now we have
+ * grabbed the state.
+ */
+ if (td->feats_supported & FEAT_SME)
+ asm volatile("msr S0_3_C4_C6_3, xzr");
+
+ /*
* If we get here with seen_already==1 it implies the td->live_uc
* context has been used to get back here....this probably means
* a test has failed to cause a SEGV...anyway live_uc does not
diff --git a/tools/testing/selftests/arm64/signal/testcases/TODO b/tools/testing/selftests/arm64/signal/testcases/TODO
new file mode 100644
index 000000000000..1f7fba8194fe
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/TODO
@@ -0,0 +1 @@
+- Validate that register contents are saved and restored as expected.
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
index 8dc600a7d4fd..8c7f00ea9823 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_magic.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_magic_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* need at least 2*HDR_SZ space: KSFT_BAD_MAGIC + terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
index b3c362100666..1c03f6b638e0 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size.c
@@ -24,7 +24,7 @@ static int fake_sigreturn_bad_size_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
index a44b88bfc81a..bc22f64b544e 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_bad_size_for_magic0.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_bad_size_for_magic0_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* at least HDR_SZ for the badly sized terminator. */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
index afe8915f0998..63e3906b631c 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_duplicated_fpsimd.c
@@ -21,7 +21,7 @@ static int fake_sigreturn_duplicated_fpsimd_run(struct tdescr *td,
struct _aarch64_ctx *shead = GET_SF_RESV_HEAD(sf), *head;
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
head = get_starting_head(shead, sizeof(struct fpsimd_context) + HDR_SZ,
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
index 1e089e66f9f3..d00625ff12c2 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_misaligned_sp.c
@@ -19,7 +19,7 @@ static int fake_sigreturn_misaligned_run(struct tdescr *td,
siginfo_t *si, ucontext_t *uc)
{
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
/* Forcing sigframe on misaligned SP (16 + 3) */
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
index 08ecd8073a1a..f805138cb20d 100644
--- a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_missing_fpsimd.c
@@ -23,7 +23,7 @@ static int fake_sigreturn_missing_fpsimd_run(struct tdescr *td,
struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
/* just to fill the ucontext_t with something real */
- if (!get_current_context(td, &sf.uc))
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
return 1;
resv_sz = GET_SF_RESV_SIZE(sf);
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
new file mode 100644
index 000000000000..ebd5815b54bb
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sme_change_vl.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the streaming SVE vector length in a signal
+ * handler, this is not supported and is expected to segfault.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least two VLs */
+ if (nvls < 2) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static int fake_sigreturn_ssve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SME ZA frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "Register data present, aborting\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SSVE_CHANGE",
+ .descr = "Attempt to change Streaming SVE VL",
+ .feats_required = FEAT_SME,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = fake_sigreturn_ssve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
new file mode 100644
index 000000000000..e2a452190511
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fake_sigreturn_sve_change_vl.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Attempt to change the SVE vector length in a signal hander, this is not
+ * supported and is expected to segfault.
+ */
+
+#include <kselftest.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least two VLs */
+ if (nvls < 2) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ td->result = KSFT_SKIP;
+ return false;
+ }
+
+ return true;
+}
+
+static int fake_sigreturn_sve_change_vl(struct tdescr *td,
+ siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context with a SVE frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ if (head->size != sizeof(struct sve_context)) {
+ fprintf(stderr, "SVE register state active, skipping\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+
+ /* No changes are supported; init left us at minimum VL so go to max */
+ fprintf(stderr, "Attempting to change VL from %d to %d\n",
+ sve->vl, vls[0]);
+ sve->vl = vls[0];
+
+ fake_sigreturn(&sf, sizeof(sf), 0);
+
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "FAKE_SIGRETURN_SVE_CHANGE",
+ .descr = "Attempt to change SVE VL",
+ .feats_required = FEAT_SVE,
+ .sig_ok = SIGSEGV,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = fake_sigreturn_sve_change_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c
new file mode 100644
index 000000000000..e9d24685e741
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/fpmr_siginfo.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the FPMR register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_FPMR "S3_3_C4_C4_2"
+
+static uint64_t get_fpmr(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_FPMR "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+int fpmr_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct fpmr_context *fpmr_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_fpmr;
+ __u64 orig_fpmr;
+
+ have_fpmr = getauxval(AT_HWCAP2) & HWCAP2_FPMR;
+ if (have_fpmr)
+ orig_fpmr = get_fpmr();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ fpmr_ctx = (struct fpmr_context *)
+ get_header(head, FPMR_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = fpmr_ctx != NULL;
+
+ fprintf(stderr, "FPMR sigframe %s on system %s FPMR\n",
+ in_sigframe ? "present" : "absent",
+ have_fpmr ? "with" : "without");
+
+ td->pass = (in_sigframe == have_fpmr);
+
+ if (have_fpmr && fpmr_ctx) {
+ if (fpmr_ctx->fpmr != orig_fpmr) {
+ fprintf(stderr, "FPMR in frame is %llx, was %llx\n",
+ fpmr_ctx->fpmr, orig_fpmr);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "FPMR",
+ .descr = "Validate that FPMR is present as expected",
+ .timeout = 3,
+ .run = fpmr_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
new file mode 100644
index 000000000000..f9d76ae32bba
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_no_sm.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using a streaming mode instruction without enabling it
+ * generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_no_sm_trigger(struct tdescr *td)
+{
+ /* SMSTART ZA ; ADDHA ZA0.S, P0/M, P0/M, Z0.S */
+ asm volatile(".inst 0xd503457f ; .inst 0xc0900000");
+
+ return 0;
+}
+
+int sme_trap_no_sm_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME trap without SM",
+ .descr = "Check that we get a SIGILL if we use streaming mode without enabling it",
+ .timeout = 3,
+ .feats_required = FEAT_SME, /* We need a SMSTART ZA */
+ .sanity_disabled = true,
+ .trigger = sme_trap_no_sm_trigger,
+ .run = sme_trap_no_sm_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
new file mode 100644
index 000000000000..e469ae5348e3
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_non_streaming.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_non_streaming_trigger(struct tdescr *td)
+{
+ /*
+ * The framework will handle SIGILL so we need to exit SM to
+ * stop any other code triggering a further SIGILL down the
+ * line from using a streaming-illegal instruction.
+ */
+ asm volatile(".inst 0xd503437f; /* SMSTART ZA */ \
+ cnt v0.16b, v0.16b; \
+ .inst 0xd503447f /* SMSTOP ZA */");
+
+ return 0;
+}
+
+int sme_trap_non_streaming_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME SM trap unsupported instruction",
+ .descr = "Check that we get a SIGILL if we use an unsupported instruction in streaming mode",
+ .feats_required = FEAT_SME,
+ .feats_incompatible = FEAT_SME_FA64,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_non_streaming_trigger,
+ .run = sme_trap_non_streaming_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
new file mode 100644
index 000000000000..3a7747af4715
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_trap_za.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that accessing ZA without enabling it generates a SIGILL.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+int sme_trap_za_trigger(struct tdescr *td)
+{
+ /* ZERO ZA */
+ asm volatile(".inst 0xc00800ff");
+
+ return 0;
+}
+
+int sme_trap_za_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ return 1;
+}
+
+struct tdescr tde = {
+ .name = "SME ZA trap",
+ .descr = "Check that we get a SIGILL if we access ZA without enabling",
+ .timeout = 3,
+ .sanity_disabled = true,
+ .trigger = sme_trap_za_trigger,
+ .run = sme_trap_za_run,
+ .sig_ok = SIGILL,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sme_vl.c b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
new file mode 100644
index 000000000000..75f387f2db81
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sme_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SME vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sme_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SME_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct za_context *za;
+
+ /* Get a signal context which should have a ZA frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, ZA_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+ za = (struct za_context *)head;
+
+ if (za->vl != vl) {
+ fprintf(stderr, "ZA sigframe VL %u, expected %u\n",
+ za->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SME VL",
+ .descr = "Check that we get the right SME VL reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = get_sme_vl,
+ .run = sme_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
new file mode 100644
index 000000000000..3d37daafcff5
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_regs.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the streaming SVE register context in signal frames is
+ * set up as expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 64];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Did we find the lowest supported VL? */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_ssve_regs(void)
+{
+ /* smstart sm; real data is TODO */
+ asm volatile(".inst 0xd503437f" : : : );
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct sve_context *ssve;
+ int ret;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ ret = prctl(PR_SME_SET_VL, vl);
+ if (ret != vl) {
+ fprintf(stderr, "Failed to set VL, got %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_ssve_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ ssve = (struct sve_context *)head;
+ if (ssve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", ssve->vl, vl);
+ return 1;
+ }
+
+ if (!(ssve->flags & SVE_SIG_FLAG_SM)) {
+ fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n");
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, ssve->vl);
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "Streaming SVE registers",
+ .descr = "Check that we get the right Streaming SVE registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
new file mode 100644
index 000000000000..9dc5f128bbc0
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/ssve_za_regs.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that both the streaming SVE and ZA register context in
+ * signal frames is set up as expected when enabled simultaneously.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Did we find the lowest supported VL? */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_regs(void)
+{
+ /* smstart sm; real data is TODO */
+ asm volatile(".inst 0xd503437f" : : : );
+
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct _aarch64_ctx *regs;
+ struct sve_context *ssve;
+ struct za_context *za;
+ int ret;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ ret = prctl(PR_SME_SET_VL, vl);
+ if (ret != vl) {
+ fprintf(stderr, "Failed to set VL, got %d\n", ret);
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have the SVE and ZA
+ * frames in it.
+ */
+ setup_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ regs = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!regs) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ ssve = (struct sve_context *)regs;
+ if (ssve->vl != vl) {
+ fprintf(stderr, "Got SSVE VL %d, expected %d\n", ssve->vl, vl);
+ return 1;
+ }
+
+ if (!(ssve->flags & SVE_SIG_FLAG_SM)) {
+ fprintf(stderr, "SVE_SIG_FLAG_SM not set in SVE record\n");
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected SSVE size %u and VL %d\n",
+ regs->size, ssve->vl);
+
+ regs = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!regs) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)regs;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got ZA VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected ZA size %u and VL %d\n",
+ regs->size, za->vl);
+
+ /* We didn't load any data into ZA so it should be all zeros */
+ if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) {
+ fprintf(stderr, "ZA data invalid\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "Streaming SVE registers",
+ .descr = "Check that we get the right Streaming SVE registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_regs.c b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
new file mode 100644
index 000000000000..8b16eabbb769
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_regs.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the SVE register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 64];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sve_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SVE_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SVE_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SVE_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_sve_regs(void)
+{
+ /* RDVL x16, #1 so we should have SVE regs; real data is TODO */
+ asm volatile(".inst 0x04bf5030" : : : "x16" );
+}
+
+static int do_one_sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct sve_context *sve;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SVE_SET_VL, vl) == -1) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_sve_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, SVE_MAGIC, GET_BUF_RESV_SIZE(context),
+ &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+
+ sve = (struct sve_context *)head;
+ if (sve->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", sve->vl, vl);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, sve->vl);
+
+ return 0;
+}
+
+static int sve_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sve_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE registers",
+ .descr = "Check that we get the right SVE registers reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = sve_get_vls,
+ .run = sve_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/sve_vl.c b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
new file mode 100644
index 000000000000..aa835acec062
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/sve_vl.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Check that the SVE vector length reported in signal contexts is the
+ * expected one.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+struct fake_sigframe sf;
+unsigned int vl;
+
+static bool get_sve_vl(struct tdescr *td)
+{
+ int ret = prctl(PR_SVE_GET_VL);
+ if (ret == -1)
+ return false;
+
+ vl = ret;
+
+ return true;
+}
+
+static int sve_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t resv_sz, offset;
+ struct _aarch64_ctx *head = GET_SF_RESV_HEAD(sf);
+ struct sve_context *sve;
+
+ /* Get a signal context which should have a SVE frame in it */
+ if (!get_current_context(td, &sf.uc, sizeof(sf.uc)))
+ return 1;
+
+ resv_sz = GET_SF_RESV_SIZE(sf);
+ head = get_header(head, SVE_MAGIC, resv_sz, &offset);
+ if (!head) {
+ fprintf(stderr, "No SVE context\n");
+ return 1;
+ }
+ sve = (struct sve_context *)head;
+
+ if (sve->vl != vl) {
+ fprintf(stderr, "sigframe VL %u, expected %u\n",
+ sve->vl, vl);
+ return 1;
+ } else {
+ fprintf(stderr, "got expected VL %u\n", vl);
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "SVE VL",
+ .descr = "Check that we get the right SVE VL reported",
+ .feats_required = FEAT_SVE,
+ .timeout = 3,
+ .init = get_sve_vl,
+ .run = sve_vl,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.c b/tools/testing/selftests/arm64/signal/testcases/testcases.c
index 61ebcdf63831..674b88cc8c39 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.c
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.c
@@ -1,5 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2019 ARM Limited */
+
+#include <ctype.h>
+#include <string.h>
+
#include "testcases.h"
struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
@@ -25,7 +29,8 @@ struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
return found;
}
-bool validate_extra_context(struct extra_context *extra, char **err)
+bool validate_extra_context(struct extra_context *extra, char **err,
+ void **extra_data, size_t *extra_size)
{
struct _aarch64_ctx *term;
@@ -33,7 +38,7 @@ bool validate_extra_context(struct extra_context *extra, char **err)
return false;
fprintf(stderr, "Validating EXTRA...\n");
- term = GET_RESV_NEXT_HEAD(extra);
+ term = GET_RESV_NEXT_HEAD(&extra->head);
if (!term || term->magic || term->size) {
*err = "Missing terminator after EXTRA context";
return false;
@@ -42,11 +47,84 @@ bool validate_extra_context(struct extra_context *extra, char **err)
*err = "Extra DATAP misaligned";
else if (extra->size & 0x0fUL)
*err = "Extra SIZE misaligned";
- else if (extra->datap != (uint64_t)term + sizeof(*term))
+ else if (extra->datap != (uint64_t)term + 0x10UL)
*err = "Extra DATAP misplaced (not contiguous)";
if (*err)
return false;
+ *extra_data = (void *)extra->datap;
+ *extra_size = extra->size;
+
+ return true;
+}
+
+bool validate_sve_context(struct sve_context *sve, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((SVE_SIG_CONTEXT_SIZE(sve_vq_from_vl(sve->vl)) + 15) / 16) * 16;
+
+ if (!sve || !err)
+ return false;
+
+ /* Either a bare sve_context or a sve_context followed by regs data */
+ if ((sve->head.size != sizeof(struct sve_context)) &&
+ (sve->head.size != regs_size)) {
+ *err = "bad size for SVE context";
+ return false;
+ }
+
+ if (!sve_vl_valid(sve->vl)) {
+ *err = "SVE VL invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
+bool validate_za_context(struct za_context *za, char **err)
+{
+ /* Size will be rounded up to a multiple of 16 bytes */
+ size_t regs_size
+ = ((ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(za->vl)) + 15) / 16) * 16;
+
+ if (!za || !err)
+ return false;
+
+ /* Either a bare za_context or a za_context followed by regs data */
+ if ((za->head.size != sizeof(struct za_context)) &&
+ (za->head.size != regs_size)) {
+ *err = "bad size for ZA context";
+ return false;
+ }
+
+ if (!sve_vl_valid(za->vl)) {
+ *err = "SME VL in ZA context invalid";
+
+ return false;
+ }
+
+ return true;
+}
+
+bool validate_zt_context(struct zt_context *zt, char **err)
+{
+ if (!zt || !err)
+ return false;
+
+ /* If the context is present there should be at least one register */
+ if (zt->nregs == 0) {
+ *err = "no registers";
+ return false;
+ }
+
+ /* Size should agree with the number of registers */
+ if (zt->head.size != ZT_SIG_CONTEXT_SIZE(zt->nregs)) {
+ *err = "register count does not match size";
+ return false;
+ }
+
return true;
}
@@ -55,9 +133,16 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
bool terminated = false;
size_t offs = 0;
int flags = 0;
+ int new_flags, i;
struct extra_context *extra = NULL;
+ struct sve_context *sve = NULL;
+ struct za_context *za = NULL;
+ struct zt_context *zt = NULL;
struct _aarch64_ctx *head =
(struct _aarch64_ctx *)uc->uc_mcontext.__reserved;
+ void *extra_data = NULL;
+ size_t extra_sz = 0;
+ char magic[4];
if (!err)
return false;
@@ -68,12 +153,24 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
+ new_flags = 0;
+
switch (head->magic) {
case 0:
- if (head->size)
+ if (head->size) {
*err = "Bad size for terminator";
- else
+ } else if (extra_data) {
+ /* End of main data, walking the extra data */
+ head = extra_data;
+ resv_sz = extra_sz;
+ offs = 0;
+
+ extra_data = NULL;
+ extra_sz = 0;
+ continue;
+ } else {
terminated = true;
+ }
break;
case FPSIMD_MAGIC:
if (flags & FPSIMD_CTX)
@@ -81,19 +178,44 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
else if (head->size !=
sizeof(struct fpsimd_context))
*err = "Bad size for fpsimd_context";
- flags |= FPSIMD_CTX;
+ new_flags |= FPSIMD_CTX;
break;
case ESR_MAGIC:
if (head->size != sizeof(struct esr_context))
*err = "Bad size for esr_context";
break;
+ case TPIDR2_MAGIC:
+ if (head->size != sizeof(struct tpidr2_context))
+ *err = "Bad size for tpidr2_context";
+ break;
case SVE_MAGIC:
if (flags & SVE_CTX)
*err = "Multiple SVE_MAGIC";
+ /* Size is validated in validate_sve_context() */
+ sve = (struct sve_context *)head;
+ new_flags |= SVE_CTX;
+ break;
+ case ZA_MAGIC:
+ if (flags & ZA_CTX)
+ *err = "Multiple ZA_MAGIC";
+ /* Size is validated in validate_za_context() */
+ za = (struct za_context *)head;
+ new_flags |= ZA_CTX;
+ break;
+ case ZT_MAGIC:
+ if (flags & ZT_CTX)
+ *err = "Multiple ZT_MAGIC";
+ /* Size is validated in validate_za_context() */
+ zt = (struct zt_context *)head;
+ new_flags |= ZT_CTX;
+ break;
+ case FPMR_MAGIC:
+ if (flags & FPMR_CTX)
+ *err = "Multiple FPMR_MAGIC";
else if (head->size !=
- sizeof(struct sve_context))
- *err = "Bad size for sve_context";
- flags |= SVE_CTX;
+ sizeof(struct fpmr_context))
+ *err = "Bad size for fpmr_context";
+ new_flags |= FPMR_CTX;
break;
case EXTRA_MAGIC:
if (flags & EXTRA_CTX)
@@ -101,7 +223,7 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
else if (head->size !=
sizeof(struct extra_context))
*err = "Bad size for extra_context";
- flags |= EXTRA_CTX;
+ new_flags |= EXTRA_CTX;
extra = (struct extra_context *)head;
break;
case KSFT_BAD_MAGIC:
@@ -117,11 +239,19 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
/*
* A still unknown Magic: potentially freshly added
* to the Kernel code and still unknown to the
- * tests.
+ * tests. Magic numbers are supposed to be allocated
+ * as somewhat meaningful ASCII strings so try to
+ * print as such as well as the raw number.
*/
+ memcpy(magic, &head->magic, sizeof(magic));
+ for (i = 0; i < sizeof(magic); i++)
+ if (!isalnum(magic[i]))
+ magic[i] = '?';
+
fprintf(stdout,
- "SKIP Unknown MAGIC: 0x%X - Is KSFT arm64/signal up to date ?\n",
- head->magic);
+ "SKIP Unknown MAGIC: 0x%X (%c%c%c%c) - Is KSFT arm64/signal up to date ?\n",
+ head->magic,
+ magic[3], magic[2], magic[1], magic[0]);
break;
}
@@ -134,9 +264,21 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
- if (flags & EXTRA_CTX)
- if (!validate_extra_context(extra, err))
+ if (new_flags & EXTRA_CTX)
+ if (!validate_extra_context(extra, err,
+ &extra_data, &extra_sz))
return false;
+ if (new_flags & SVE_CTX)
+ if (!validate_sve_context(sve, err))
+ return false;
+ if (new_flags & ZA_CTX)
+ if (!validate_za_context(za, err))
+ return false;
+ if (new_flags & ZT_CTX)
+ if (!validate_zt_context(zt, err))
+ return false;
+
+ flags |= new_flags;
head = GET_RESV_NEXT_HEAD(head);
}
@@ -146,6 +288,11 @@ bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err)
return false;
}
+ if (terminated && (flags & ZT_CTX) && !(flags & ZA_CTX)) {
+ *err = "ZT context but no ZA context";
+ return false;
+ }
+
return true;
}
diff --git a/tools/testing/selftests/arm64/signal/testcases/testcases.h b/tools/testing/selftests/arm64/signal/testcases/testcases.h
index ad884c135314..7727126347e0 100644
--- a/tools/testing/selftests/arm64/signal/testcases/testcases.h
+++ b/tools/testing/selftests/arm64/signal/testcases/testcases.h
@@ -16,7 +16,10 @@
#define FPSIMD_CTX (1 << 0)
#define SVE_CTX (1 << 1)
-#define EXTRA_CTX (1 << 2)
+#define ZA_CTX (1 << 2)
+#define EXTRA_CTX (1 << 3)
+#define ZT_CTX (1 << 4)
+#define FPMR_CTX (1 << 5)
#define KSFT_BAD_MAGIC 0xdeadbeef
@@ -29,6 +32,13 @@
#define GET_SF_RESV_SIZE(sf) \
sizeof((sf).uc.uc_mcontext.__reserved)
+#define GET_BUF_RESV_HEAD(buf) \
+ (struct _aarch64_ctx *)(&(buf).uc.uc_mcontext.__reserved)
+
+#define GET_BUF_RESV_SIZE(buf) \
+ (sizeof(buf) - sizeof(buf.uc) + \
+ sizeof((buf).uc.uc_mcontext.__reserved))
+
#define GET_UCP_RESV_SIZE(ucp) \
sizeof((ucp)->uc_mcontext.__reserved)
@@ -78,8 +88,6 @@ struct fake_sigframe {
bool validate_reserved(ucontext_t *uc, size_t resv_sz, char **err);
-bool validate_extra_context(struct extra_context *extra, char **err);
-
struct _aarch64_ctx *get_header(struct _aarch64_ctx *head, uint32_t magic,
size_t resv_sz, size_t *offset);
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
new file mode 100644
index 000000000000..f9a86c00c28c
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_restore.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is restored.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+static void set_tpidr2(uint64_t val)
+{
+ asm volatile (
+ "msr " SYS_TPIDR2 ", %0\n"
+ :
+ : "r"(val)
+ : "cc");
+}
+
+
+static uint64_t initial_tpidr2;
+
+static bool save_tpidr2(struct tdescr *td)
+{
+ initial_tpidr2 = get_tpidr2();
+ fprintf(stderr, "Initial TPIDR2: %lx\n", initial_tpidr2);
+
+ return true;
+}
+
+static int modify_tpidr2(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ uint64_t my_tpidr2 = get_tpidr2();
+
+ my_tpidr2++;
+ fprintf(stderr, "Setting TPIDR2 to %lx\n", my_tpidr2);
+ set_tpidr2(my_tpidr2);
+
+ return 0;
+}
+
+static void check_tpidr2(struct tdescr *td)
+{
+ uint64_t tpidr2 = get_tpidr2();
+
+ td->pass = tpidr2 == initial_tpidr2;
+
+ if (td->pass)
+ fprintf(stderr, "TPIDR2 restored\n");
+ else
+ fprintf(stderr, "TPIDR2 was %lx but is now %lx\n",
+ initial_tpidr2, tpidr2);
+}
+
+struct tdescr tde = {
+ .name = "TPIDR2 restore",
+ .descr = "Validate that TPIDR2 is restored from the sigframe",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .sig_trig = SIGUSR1,
+ .init = save_tpidr2,
+ .run = modify_tpidr2,
+ .check_result = check_tpidr2,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c
new file mode 100644
index 000000000000..6a2c82bf7ead
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/tpidr2_siginfo.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022 ARM Limited
+ *
+ * Verify that the TPIDR2 register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <asm/sigcontext.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+#define SYS_TPIDR2 "S3_3_C13_C0_5"
+
+static uint64_t get_tpidr2(void)
+{
+ uint64_t val;
+
+ asm volatile (
+ "mrs %0, " SYS_TPIDR2 "\n"
+ : "=r"(val)
+ :
+ : "cc");
+
+ return val;
+}
+
+int tpidr2_present(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct tpidr2_context *tpidr2_ctx;
+ size_t offset;
+ bool in_sigframe;
+ bool have_sme;
+ __u64 orig_tpidr2;
+
+ have_sme = getauxval(AT_HWCAP2) & HWCAP2_SME;
+ if (have_sme)
+ orig_tpidr2 = get_tpidr2();
+
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ tpidr2_ctx = (struct tpidr2_context *)
+ get_header(head, TPIDR2_MAGIC, td->live_sz, &offset);
+
+ in_sigframe = tpidr2_ctx != NULL;
+
+ fprintf(stderr, "TPIDR2 sigframe %s on system %s SME\n",
+ in_sigframe ? "present" : "absent",
+ have_sme ? "with" : "without");
+
+ td->pass = (in_sigframe == have_sme);
+
+ /*
+ * Check that the value we read back was the one present at
+ * the time that the signal was triggered. TPIDR2 is owned by
+ * libc so we can't safely choose the value and it is possible
+ * that we may need to revisit this in future if something
+ * starts deciding to set a new TPIDR2 between us reading and
+ * the signal.
+ */
+ if (have_sme && tpidr2_ctx) {
+ if (tpidr2_ctx->tpidr2 != orig_tpidr2) {
+ fprintf(stderr, "TPIDR2 in frame is %llx, was %llx\n",
+ tpidr2_ctx->tpidr2, orig_tpidr2);
+ td->pass = false;
+ }
+ }
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "TPIDR2",
+ .descr = "Validate that TPIDR2 is present as expected",
+ .timeout = 3,
+ .run = tpidr2_present,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
new file mode 100644
index 000000000000..4d6f94b6178f
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_no_regs.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SME_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct za_context *za;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SME_SET_VL, vl) != vl) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ if (head->size != ZA_SIG_REGS_OFFSET) {
+ fprintf(stderr, "Context size %u, expected %lu\n",
+ head->size, ZA_SIG_REGS_OFFSET);
+ return 1;
+ }
+
+ /* The actual size validation is done in get_current_context() */
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, za->vl);
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZA registers - ZA disabled",
+ .descr = "Check ZA context with ZA disabled",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/za_regs.c b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
new file mode 100644
index 000000000000..174ad6656696
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/za_regs.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that the ZA register context in signal frames is set up as
+ * expected.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+static unsigned int vls[SVE_VQ_MAX];
+unsigned int nvls = 0;
+
+static bool sme_get_vls(struct tdescr *td)
+{
+ int vq, vl;
+
+ /*
+ * Enumerate up to SME_VQ_MAX vector lengths
+ */
+ for (vq = SVE_VQ_MAX; vq > 0; --vq) {
+ vl = prctl(PR_SME_SET_VL, vq * 16);
+ if (vl == -1)
+ return false;
+
+ vl &= PR_SME_VL_LEN_MASK;
+
+ /* Did we find the lowest supported VL? */
+ if (vq < sve_vq_from_vl(vl))
+ break;
+
+ /* Skip missing VLs */
+ vq = sve_vq_from_vl(vl);
+
+ vls[nvls++] = vl;
+ }
+
+ /* We need at least one VL */
+ if (nvls < 1) {
+ fprintf(stderr, "Only %d VL supported\n", nvls);
+ return false;
+ }
+
+ return true;
+}
+
+static void setup_za_regs(void)
+{
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+static char zeros[ZA_SIG_REGS_SIZE(SVE_VQ_MAX)];
+
+static int do_one_sme_vl(struct tdescr *td, siginfo_t *si, ucontext_t *uc,
+ unsigned int vl)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct za_context *za;
+
+ fprintf(stderr, "Testing VL %d\n", vl);
+
+ if (prctl(PR_SME_SET_VL, vl) != vl) {
+ fprintf(stderr, "Failed to set VL\n");
+ return 1;
+ }
+
+ /*
+ * Get a signal context which should have a SVE frame and registers
+ * in it.
+ */
+ setup_za_regs();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZA_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZA context\n");
+ return 1;
+ }
+
+ za = (struct za_context *)head;
+ if (za->vl != vl) {
+ fprintf(stderr, "Got VL %d, expected %d\n", za->vl, vl);
+ return 1;
+ }
+
+ if (head->size != ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl))) {
+ fprintf(stderr, "ZA context size %u, expected %lu\n",
+ head->size, ZA_SIG_CONTEXT_SIZE(sve_vq_from_vl(vl)));
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected size %u and VL %d\n",
+ head->size, za->vl);
+
+ /* We didn't load any data into ZA so it should be all zeros */
+ if (memcmp(zeros, (char *)za + ZA_SIG_REGS_OFFSET,
+ ZA_SIG_REGS_SIZE(sve_vq_from_vl(za->vl))) != 0) {
+ fprintf(stderr, "ZA data invalid\n");
+ return 1;
+ }
+
+ return 0;
+}
+
+static int sme_regs(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ int i;
+
+ for (i = 0; i < nvls; i++) {
+ if (do_one_sme_vl(td, si, uc, vls[i]))
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZA register",
+ .descr = "Check that we get the right ZA registers reported",
+ .feats_required = FEAT_SME,
+ .timeout = 3,
+ .init = sme_get_vls,
+ .run = sme_regs,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c
new file mode 100644
index 000000000000..34f69bcf821e
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_no_regs.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+int zt_no_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+
+ /*
+ * Get a signal context which should not have a ZT frame and
+ * registers in it.
+ */
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (head) {
+ fprintf(stderr, "Got unexpected ZT context\n");
+ return 1;
+ }
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZT register data not present",
+ .descr = "Validate that ZT is not present when ZA is disabled",
+ .feats_required = FEAT_SME2,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .run = zt_no_regs_run,
+};
diff --git a/tools/testing/selftests/arm64/signal/testcases/zt_regs.c b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
new file mode 100644
index 000000000000..2e384d731618
--- /dev/null
+++ b/tools/testing/selftests/arm64/signal/testcases/zt_regs.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2021 ARM Limited
+ *
+ * Verify that using an instruction not supported in streaming mode
+ * traps when in streaming mode.
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+#include <sys/prctl.h>
+
+#include "test_signals_utils.h"
+#include "testcases.h"
+
+static union {
+ ucontext_t uc;
+ char buf[1024 * 128];
+} context;
+
+static void enable_za(void)
+{
+ /* smstart za; real data is TODO */
+ asm volatile(".inst 0xd503457f" : : : );
+}
+
+int zt_regs_run(struct tdescr *td, siginfo_t *si, ucontext_t *uc)
+{
+ size_t offset;
+ struct _aarch64_ctx *head = GET_BUF_RESV_HEAD(context);
+ struct zt_context *zt;
+ char *zeros;
+
+ /*
+ * Get a signal context which should have a ZT frame and registers
+ * in it.
+ */
+ enable_za();
+ if (!get_current_context(td, &context.uc, sizeof(context)))
+ return 1;
+
+ head = get_header(head, ZT_MAGIC, GET_BUF_RESV_SIZE(context), &offset);
+ if (!head) {
+ fprintf(stderr, "No ZT context\n");
+ return 1;
+ }
+
+ zt = (struct zt_context *)head;
+ if (zt->nregs == 0) {
+ fprintf(stderr, "Got context with no registers\n");
+ return 1;
+ }
+
+ fprintf(stderr, "Got expected size %u for %d registers\n",
+ head->size, zt->nregs);
+
+ /* We didn't load any data into ZT so it should be all zeros */
+ zeros = malloc(ZT_SIG_REGS_SIZE(zt->nregs));
+ if (!zeros) {
+ fprintf(stderr, "Out of memory, nregs=%u\n", zt->nregs);
+ return 1;
+ }
+ memset(zeros, 0, ZT_SIG_REGS_SIZE(zt->nregs));
+
+ if (memcmp(zeros, (char *)zt + ZT_SIG_REGS_OFFSET,
+ ZT_SIG_REGS_SIZE(zt->nregs)) != 0) {
+ fprintf(stderr, "ZT data invalid\n");
+ free(zeros);
+ return 1;
+ }
+
+ free(zeros);
+
+ td->pass = 1;
+
+ return 0;
+}
+
+struct tdescr tde = {
+ .name = "ZT register data",
+ .descr = "Validate that ZT is present and has data when ZA is enabled",
+ .feats_required = FEAT_SME2,
+ .timeout = 3,
+ .sanity_disabled = true,
+ .run = zt_regs_run,
+};
diff --git a/tools/testing/selftests/arm64/tags/Makefile b/tools/testing/selftests/arm64/tags/Makefile
index 41cb75070511..6d29cfde43a2 100644
--- a/tools/testing/selftests/arm64/tags/Makefile
+++ b/tools/testing/selftests/arm64/tags/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := tags_test
TEST_PROGS := run_tags_test.sh
diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 1bb204cee853..f1aebabfb017 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -1,4 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
+bpftool
+bpf-helpers*
+bpf-syscall*
test_verifier
test_maps
test_lru_map
@@ -6,36 +9,46 @@ test_lpm_map
test_tag
FEATURE-DUMP.libbpf
fixdep
-test_align
test_dev_cgroup
-/test_progs*
-test_tcpbpf_user
+/test_progs
+/test_progs-no_alu32
+/test_progs-bpf_gcc
+/test_progs-cpuv4
test_verifier_log
feature
test_sock
test_sock_addr
-test_sock_fields
urandom_read
-test_btf
test_sockmap
test_lirc_mode2_user
get_cgroup_id_user
test_skb_cgroup_id_user
-test_socket_cookie
test_cgroup_storage
test_flow_dissector
flow_dissector_load
-test_netcnt
test_tcpnotify_user
test_libbpf
test_tcp_check_syncookie_user
test_sysctl
-test_current_pid_tgid_new_ns
xdping
test_cpp
+*.subskel.h
*.skel.h
+*.lskel.h
/no_alu32
/bpf_gcc
+/cpuv4
+/host-tools
/tools
/runqslower
/bench
+/veristat
+/sign-file
+/uprobe_multi
+*.ko
+*.tmp
+xskxceiver
+xdp_redirect_multi
+xdp_synproxy
+xdp_hw_metadata
+xdp_features
diff --git a/tools/testing/selftests/bpf/DENYLIST b/tools/testing/selftests/bpf/DENYLIST
new file mode 100644
index 000000000000..f748f2c33b22
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST
@@ -0,0 +1,7 @@
+# TEMPORARY
+# Alphabetical order
+get_stack_raw_tp # spams with kernel warnings until next bpf -> bpf-next merge
+stacktrace_build_id
+stacktrace_build_id_nmi
+task_fd_query_rawtp
+varlen
diff --git a/tools/testing/selftests/bpf/DENYLIST.aarch64 b/tools/testing/selftests/bpf/DENYLIST.aarch64
new file mode 100644
index 000000000000..d8ade15e2789
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.aarch64
@@ -0,0 +1,14 @@
+bpf_cookie/multi_kprobe_attach_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+bpf_cookie/multi_kprobe_link_api # kprobe_multi_link_api_subtest:FAIL:fentry_raw_skel_load unexpected error: -3
+fexit_sleep # The test never returns. The remaining tests cannot start.
+kprobe_multi_bench_attach # needs CONFIG_FPROBE
+kprobe_multi_test # needs CONFIG_FPROBE
+module_attach # prog 'kprobe_multi': failed to auto-attach: -95
+fentry_test/fentry_many_args # fentry_many_args:FAIL:fentry_many_args_attach unexpected error: -524
+fexit_test/fexit_many_args # fexit_many_args:FAIL:fexit_many_args_attach unexpected error: -524
+fill_link_info/kprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+fill_link_info/kretprobe_multi_link_info # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+fill_link_info/kprobe_multi_invalid_ubuff # bpf_program__attach_kprobe_multi_opts unexpected error: -95
+missed/kprobe_recursion # missed_kprobe_recursion__attach unexpected error: -95 (errno 95)
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/DENYLIST.s390x b/tools/testing/selftests/bpf/DENYLIST.s390x
new file mode 100644
index 000000000000..f4a2f66a683d
--- /dev/null
+++ b/tools/testing/selftests/bpf/DENYLIST.s390x
@@ -0,0 +1,8 @@
+# TEMPORARY
+# Alphabetical order
+exceptions # JIT does not support calling kfunc bpf_throw (exceptions)
+get_stack_raw_tp # user_stack corrupted user stack (no backchain userspace)
+stacktrace_build_id # compare_map_keys stackid_hmap vs. stackmap err -2 errno 2 (?)
+verifier_iterating_callbacks
+verifier_arena # JIT does not support arena
+arena_htab # JIT does not support arena
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 22aaec74ea0a..3b9eb40d6343 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../../scripts/Kbuild.include
+include ../../../build/Build.include
include ../../../scripts/Makefile.arch
+include ../../../scripts/Makefile.include
CXX ?= $(CROSS_COMPILE)g++
@@ -11,46 +12,94 @@ BPFDIR := $(LIBDIR)/bpf
TOOLSINCDIR := $(TOOLSDIR)/include
BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
APIDIR := $(TOOLSINCDIR)/uapi
+ifneq ($(O),)
+GENDIR := $(O)/include/generated
+else
GENDIR := $(abspath ../../../../include/generated)
+endif
GENHDR := $(GENDIR)/autoconf.h
+PKG_CONFIG ?= $(CROSS_COMPILE)pkg-config
ifneq ($(wildcard $(GENHDR)),)
GENFLAGS := -DHAVE_GENHDR
endif
-CLANG ?= clang
-LLC ?= llc
-LLVM_OBJCOPY ?= llvm-objcopy
BPF_GCC ?= $(shell command -v bpf-gcc;)
SAN_CFLAGS ?=
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS) \
+SAN_LDFLAGS ?= $(SAN_CFLAGS)
+RELEASE ?=
+OPT_FLAGS ?= $(if $(RELEASE),-O2,-O0)
+
+LIBELF_CFLAGS := $(shell $(PKG_CONFIG) libelf --cflags 2>/dev/null)
+LIBELF_LIBS := $(shell $(PKG_CONFIG) libelf --libs 2>/dev/null || echo -lelf)
+
+CFLAGS += -g $(OPT_FLAGS) -rdynamic \
+ -Wall -Werror -fno-omit-frame-pointer \
+ $(GENFLAGS) $(SAN_CFLAGS) $(LIBELF_CFLAGS) \
-I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) \
- -I$(TOOLSINCDIR) -I$(APIDIR) \
- -Dbpf_prog_load=bpf_prog_test_load \
- -Dbpf_load_program=bpf_test_load_program
-LDLIBS += -lcap -lelf -lz -lrt -lpthread
+ -I$(TOOLSINCDIR) -I$(APIDIR) -I$(OUTPUT)
+LDFLAGS += $(SAN_LDFLAGS)
+LDLIBS += $(LIBELF_LIBS) -lz -lrt -lpthread
+
+# The following tests perform type punning and they may break strict
+# aliasing rules, which are exploited by both GCC and clang by default
+# while optimizing. This can lead to broken programs.
+progs/bind4_prog.c-CFLAGS := -fno-strict-aliasing
+progs/bind6_prog.c-CFLAGS := -fno-strict-aliasing
+progs/dynptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/linked_list_fail.c-CFLAGS := -fno-strict-aliasing
+progs/map_kptr_fail.c-CFLAGS := -fno-strict-aliasing
+progs/syscall.c-CFLAGS := -fno-strict-aliasing
+progs/test_pkt_md_access.c-CFLAGS := -fno-strict-aliasing
+progs/test_sk_lookup.c-CFLAGS := -fno-strict-aliasing
+progs/timer_crash.c-CFLAGS := -fno-strict-aliasing
+
+ifneq ($(LLVM),)
+# Silence some warnings when compiled with clang
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
+# Check whether bpf cpu=v4 is supported or not by clang
+ifneq ($(shell $(CLANG) --target=bpf -mcpu=help 2>&1 | grep 'v4'),)
+CLANG_CPUV4 := 1
+endif
# Order correspond to 'make run_tests' order
TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \
- test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
- test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
+ test_dev_cgroup \
+ test_sock test_sockmap get_cgroup_id_user \
test_cgroup_storage \
- test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
- test_progs-no_alu32 \
- test_current_pid_tgid_new_ns
+ test_tcpnotify_user test_sysctl \
+ test_progs-no_alu32
+TEST_INST_SUBDIRS := no_alu32
# Also test bpf-gcc, if present
ifneq ($(BPF_GCC),)
TEST_GEN_PROGS += test_progs-bpf_gcc
+TEST_INST_SUBDIRS += bpf_gcc
+
+# The following tests contain C code that, although technically legal,
+# triggers GCC warnings that cannot be disabled: declaration of
+# anonymous struct types in function parameter lists.
+progs/btf_dump_test_case_bitfields.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_namespacing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_packing.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_padding.c-CFLAGS := -Wno-error
+progs/btf_dump_test_case_syntax.c-CFLAGS := -Wno-error
+endif
+
+ifneq ($(CLANG_CPUV4),)
+TEST_GEN_PROGS += test_progs-cpuv4
+TEST_INST_SUBDIRS += cpuv4
endif
-TEST_GEN_FILES =
-TEST_FILES = test_lwt_ip_encap.o \
- test_tc_edt.o
+TEST_GEN_FILES = test_lwt_ip_encap.bpf.o test_tc_edt.bpf.o
+TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
# Order correspond to 'make run_tests' order
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
+ test_xdp_redirect_multi.sh \
test_xdp_meta.sh \
test_xdp_veth.sh \
test_offload.py \
@@ -68,20 +117,24 @@ TEST_PROGS := test_kmod.sh \
test_tc_edt.sh \
test_xdping.sh \
test_bpftool_build.sh \
- test_bpftool.sh
+ test_bpftool.sh \
+ test_bpftool_metadata.sh \
+ test_doc_build.sh \
+ test_xsk.sh \
+ test_xdp_features.sh
TEST_PROGS_EXTENDED := with_addr.sh \
- with_tunnels.sh \
- tcp_client.py \
- tcp_server.py \
- test_xdp_vlan.sh
+ with_tunnels.sh ima_setup.sh verify_sig_setup.sh \
+ test_xdp_vlan.sh test_bpftool.py
# Compile but not part of 'make run_tests'
TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
- test_lirc_mode2_user xdping test_cpp runqslower bench
+ test_lirc_mode2_user xdping test_cpp runqslower bench bpf_testmod.ko \
+ xskxceiver xdp_redirect_multi xdp_synproxy veristat xdp_hw_metadata \
+ xdp_features bpf_test_no_cfi.ko
-TEST_CUSTOM_PROGS = urandom_read
+TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi
# Emit succinct information message describing current building step
# $1 - generic step name (e.g., CC, LINK, etc);
@@ -102,7 +155,12 @@ endif
OVERRIDE_TARGETS := 1
override define CLEAN
$(call msg,CLEAN)
- $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS_EXTENDED)
+ $(Q)$(RM) -r $(TEST_GEN_FILES)
+ $(Q)$(RM) -r $(EXTRA_CLEAN)
+ $(Q)$(MAKE) -C bpf_testmod clean
+ $(Q)$(MAKE) docs-clean
endef
include ../lib.mk
@@ -111,89 +169,232 @@ SCRATCH_DIR := $(OUTPUT)/tools
BUILD_DIR := $(SCRATCH_DIR)/build
INCLUDE_DIR := $(SCRATCH_DIR)/include
BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+ifneq ($(CROSS_COMPILE),)
+HOST_BUILD_DIR := $(BUILD_DIR)/host
+HOST_SCRATCH_DIR := $(OUTPUT)/host-tools
+HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include
+else
+HOST_BUILD_DIR := $(BUILD_DIR)
+HOST_SCRATCH_DIR := $(SCRATCH_DIR)
+HOST_INCLUDE_DIR := $(INCLUDE_DIR)
+endif
+HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
+RUNQSLOWER_OUTPUT := $(BUILD_DIR)/runqslower/
+
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
# to build individual tests.
# NOTE: Semicolon at the end is critical to override lib.mk's default static
# rule for binaries.
$(notdir $(TEST_GEN_PROGS) \
- $(TEST_PROGS) \
- $(TEST_PROGS_EXTENDED) \
- $(TEST_GEN_PROGS_EXTENDED) \
- $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ;
+ $(TEST_GEN_PROGS_EXTENDED)): %: $(OUTPUT)/% ;
+
+# sort removes libbpf duplicates when not cross-building
+MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
+ $(BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/bpftool \
+ $(HOST_BUILD_DIR)/resolve_btfids \
+ $(RUNQSLOWER_OUTPUT) $(INCLUDE_DIR))
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+$(OUTPUT)/%.o: %.c
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/%:%.c
$(call msg,BINARY,,$@)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
-$(OUTPUT)/urandom_read: urandom_read.c
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),$(filter $(SRCARCH),x86 riscv))
+LLD := lld
+else
+LLD := ld
+endif
+
+# Filter out -static for liburandom_read.so and its dependent targets so that static builds
+# do not fail. Static builds leave urandom_read relying on system-wide shared libraries.
+$(OUTPUT)/liburandom_read.so: urandom_read_lib1.c urandom_read_lib2.c liburandom_read.map
+ $(call msg,LIB,,$@)
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) \
+ $(filter %.c,$^) $(filter-out -static,$(LDLIBS)) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,--version-script=liburandom_read.map \
+ -fPIC -shared -o $@
+
+$(OUTPUT)/urandom_read: urandom_read.c urandom_read_aux.c $(OUTPUT)/liburandom_read.so
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $< $(LDLIBS) -Wl,--build-id
+ $(Q)$(CLANG) $(CLANG_TARGET_ARCH) \
+ $(filter-out -static,$(CFLAGS) $(LDFLAGS)) $(filter %.c,$^) \
+ -lurandom_read $(filter-out -static,$(LDLIBS)) -L$(OUTPUT) \
+ -fuse-ld=$(LLD) -Wl,-znoseparate-code -Wl,--build-id=sha1 \
+ -Wl,-rpath=. -o $@
+
+$(OUTPUT)/sign-file: ../../../../scripts/sign-file.c
+ $(call msg,SIGN-FILE,,$@)
+ $(Q)$(CC) $(shell $(PKG_CONFIG) --cflags libcrypto 2> /dev/null) \
+ $< -o $@ \
+ $(shell $(PKG_CONFIG) --libs libcrypto 2> /dev/null || echo -lcrypto)
+
+$(OUTPUT)/bpf_testmod.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_testmod/Makefile bpf_testmod/*.[ch])
+ $(call msg,MOD,,$@)
+ $(Q)$(RM) bpf_testmod/bpf_testmod.ko # force re-compilation
+ $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_testmod
+ $(Q)cp bpf_testmod/bpf_testmod.ko $@
+
+$(OUTPUT)/bpf_test_no_cfi.ko: $(VMLINUX_BTF) $(RESOLVE_BTFIDS) $(wildcard bpf_test_no_cfi/Makefile bpf_test_no_cfi/*.[ch])
+ $(call msg,MOD,,$@)
+ $(Q)$(RM) bpf_test_no_cfi/bpf_test_no_cfi.ko # force re-compilation
+ $(Q)$(MAKE) $(submake_extras) RESOLVE_BTFIDS=$(RESOLVE_BTFIDS) -C bpf_test_no_cfi
+ $(Q)cp bpf_test_no_cfi/bpf_test_no_cfi.ko $@
+
+DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
+ifneq ($(CROSS_COMPILE),)
+CROSS_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
+TRUNNER_BPFTOOL := $(CROSS_BPFTOOL)
+USE_BOOTSTRAP := ""
+else
+TRUNNER_BPFTOOL := $(DEFAULT_BPFTOOL)
+USE_BOOTSTRAP := "bootstrap/"
+endif
-$(OUTPUT)/test_stub.o: test_stub.c $(BPFOBJ)
- $(call msg,CC,,$@)
- $(CC) -c $(CFLAGS) -o $@ $<
+$(OUTPUT)/runqslower: $(BPFOBJ) | $(DEFAULT_BPFTOOL) $(RUNQSLOWER_OUTPUT)
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
+ OUTPUT=$(RUNQSLOWER_OUTPUT) VMLINUX_BTF=$(VMLINUX_BTF) \
+ BPFTOOL_OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
+ BPFOBJ_OUTPUT=$(BUILD_DIR)/libbpf \
+ BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' && \
+ cp $(RUNQSLOWER_OUTPUT)runqslower $@
+
+TEST_GEN_PROGS_EXTENDED += $(TRUNNER_BPFTOOL)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
+
+TESTING_HELPERS := $(OUTPUT)/testing_helpers.o
+CGROUP_HELPERS := $(OUTPUT)/cgroup_helpers.o
+UNPRIV_HELPERS := $(OUTPUT)/unpriv_helpers.o
+TRACE_HELPERS := $(OUTPUT)/trace_helpers.o
+JSON_WRITER := $(OUTPUT)/json_writer.o
+CAP_HELPERS := $(OUTPUT)/cap_helpers.o
+
+$(OUTPUT)/test_dev_cgroup: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_skb_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock_addr: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sockmap: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tcpnotify_user: $(CGROUP_HELPERS) $(TESTING_HELPERS) $(TRACE_HELPERS)
+$(OUTPUT)/get_cgroup_id_user: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_cgroup_storage: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sock_fields: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_sysctl: $(CGROUP_HELPERS) $(TESTING_HELPERS)
+$(OUTPUT)/test_tag: $(TESTING_HELPERS)
+$(OUTPUT)/test_lirc_mode2_user: $(TESTING_HELPERS)
+$(OUTPUT)/xdping: $(TESTING_HELPERS)
+$(OUTPUT)/flow_dissector_load: $(TESTING_HELPERS)
+$(OUTPUT)/test_maps: $(TESTING_HELPERS)
+$(OUTPUT)/test_verifier: $(TESTING_HELPERS) $(CAP_HELPERS) $(UNPRIV_HELPERS)
+$(OUTPUT)/xsk.o: $(BPFOBJ)
-VMLINUX_BTF_PATHS := $(if $(O),$(O)/vmlinux) \
- $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
- ../../../../vmlinux \
- /sys/kernel/btf/vmlinux \
- /boot/vmlinux-$(shell uname -r)
-VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
-
-$(OUTPUT)/runqslower: $(BPFOBJ)
- $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower \
- OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF) \
- BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) && \
- cp $(SCRATCH_DIR)/runqslower $@
-
-$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
-
-$(OUTPUT)/test_dev_cgroup: cgroup_helpers.c
-$(OUTPUT)/test_skb_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_sock: cgroup_helpers.c
-$(OUTPUT)/test_sock_addr: cgroup_helpers.c
-$(OUTPUT)/test_socket_cookie: cgroup_helpers.c
-$(OUTPUT)/test_sockmap: cgroup_helpers.c
-$(OUTPUT)/test_tcpbpf_user: cgroup_helpers.c
-$(OUTPUT)/test_tcpnotify_user: cgroup_helpers.c trace_helpers.c
-$(OUTPUT)/get_cgroup_id_user: cgroup_helpers.c
-$(OUTPUT)/test_cgroup_storage: cgroup_helpers.c
-$(OUTPUT)/test_netcnt: cgroup_helpers.c
-$(OUTPUT)/test_sock_fields: cgroup_helpers.c
-$(OUTPUT)/test_sysctl: cgroup_helpers.c
-
-DEFAULT_BPFTOOL := $(SCRATCH_DIR)/sbin/bpftool
BPFTOOL ?= $(DEFAULT_BPFTOOL)
$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
- $(BPFOBJ) | $(BUILD_DIR)/bpftool
+ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
$(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
- OUTPUT=$(BUILD_DIR)/bpftool/ \
- prefix= DESTDIR=$(SCRATCH_DIR)/ install
+ ARCH= CROSS_COMPILE= CC="$(HOSTCC)" LD="$(HOSTLD)" \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' \
+ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
+ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
+
+ifneq ($(CROSS_COMPILE),)
+$(CROSS_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(BPFOBJ) | $(BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' \
+ OUTPUT=$(BUILD_DIR)/bpftool/ \
+ LIBBPF_OUTPUT=$(BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(SCRATCH_DIR)/ install-bin
+endif
+
+all: docs
+
+docs:
+ $(Q)RST2MAN_OPTS="--exit-status=1" $(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
+
+docs-clean:
+ $(Q)$(MAKE) $(submake_extras) \
+ -f Makefile.docs \
+ prefix= OUTPUT=$(OUTPUT)/ DESTDIR=$(OUTPUT)/ $@
$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
- ../../../include/uapi/linux/bpf.h \
- | $(INCLUDE_DIR) $(BUILD_DIR)/libbpf
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
$(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS) $(SAN_CFLAGS)' \
+ EXTRA_LDFLAGS='$(SAN_LDFLAGS)' \
DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
-$(BUILD_DIR)/libbpf $(BUILD_DIR)/bpftool $(INCLUDE_DIR):
- $(call msg,MKDIR,,$@)
- mkdir -p $@
+ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(HOST_BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
+ EXTRA_CFLAGS='-g $(OPT_FLAGS)' ARCH= CROSS_COMPILE= \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ CC="$(HOSTCC)" LD="$(HOSTLD)" \
+ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
+endif
-$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) | $(BPFTOOL) $(INCLUDE_DIR)
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
$(call msg,GEN,,$@)
- $(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
+ $(TOOLSDIR)/bpf/resolve_btfids/main.c \
+ $(TOOLSDIR)/lib/rbtree.c \
+ $(TOOLSDIR)/lib/zalloc.c \
+ $(TOOLSDIR)/lib/string.c \
+ $(TOOLSDIR)/lib/ctype.c \
+ $(TOOLSDIR)/lib/str_error_r.c
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
+ CC="$(HOSTCC)" LD="$(HOSTLD)" AR="$(HOSTAR)" \
+ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
+ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
# Get Clang's default includes on this system, as opposed to those seen by
-# '-target bpf'. This fixes "missing" files on some architectures/distros,
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
#
# Use '-idirafter': Don't interfere with include mechanics except where the
# build would have failed anyways.
define get_sys_includes
-$(shell $(1) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__loongarch_grlen ' | awk '{printf("-D__BITS_PER_LONG=%d", $$3)}')
endef
# Determine target endianness.
@@ -201,57 +402,86 @@ IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
-CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
-BPF_CFLAGS = -g -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+BPF_CFLAGS = -g -Wall -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
-I$(INCLUDE_DIR) -I$(CURDIR) -I$(APIDIR) \
- -I$(abspath $(OUTPUT)/../usr/include)
+ -I$(abspath $(OUTPUT)/../usr/include) \
+ -Wno-compare-distinct-pointer-types
+# TODO: enable me -Wsign-compare
-CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES)
$(OUTPUT)/test_l4lb_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/test_xdp_noinline.o: BPF_CFLAGS += -fno-inline
$(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h
+$(OUTPUT)/cgroup_getset_retval_hooks.o: cgroup_getset_retval_hooks.h
# Build BPF object using Clang
# $1 - input .c file
# $2 - output .o file
# $3 - CFLAGS
-# $4 - LDFLAGS
define CLANG_BPF_BUILD_RULE
- $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -mattr=dwarfris -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
endef
# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
define CLANG_NOALU32_BPF_BUILD_RULE
- $(call msg,CLNG-LLC,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -target bpf -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
endef
-# Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC
-define CLANG_NATIVE_BPF_BUILD_RULE
+# Similar to CLANG_BPF_BUILD_RULE, but with cpu-v4
+define CLANG_CPUV4_BPF_BUILD_RULE
$(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
- ($(CLANG) $3 -O2 -emit-llvm \
- -c $1 -o - || echo "BPF obj compilation failed") | \
- $(LLC) -march=bpf -mcpu=v3 $4 -filetype=obj -o $2
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v4 -o $2
endef
# Build BPF object using GCC
define GCC_BPF_BUILD_RULE
$(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
- $(BPF_GCC) $3 $4 -O2 -c $1 -o $2
+ $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
endef
SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
+LINKED_SKELS := test_static_linked.skel.h linked_funcs.skel.h \
+ linked_vars.skel.h linked_maps.skel.h \
+ test_subskeleton.skel.h test_subskeleton_lib.skel.h \
+ test_usdt.skel.h
+
+LSKELS := fentry_test.c fexit_test.c fexit_sleep.c atomics.c \
+ trace_printk.c trace_vprintk.c map_ptr_kern.c \
+ core_kern.c core_kern_overflow.c test_ringbuf.c \
+ test_ringbuf_map_key.c
+
+# Generate both light skeleton and libbpf skeleton for these
+LSKELS_EXTRA := test_ksyms_module.c test_ksyms_weak.c kfunc_call_test.c \
+ kfunc_call_test_subprog.c
+SKEL_BLACKLIST += $$(LSKELS)
+
+test_static_linked.skel.h-deps := test_static_linked1.bpf.o test_static_linked2.bpf.o
+linked_funcs.skel.h-deps := linked_funcs1.bpf.o linked_funcs2.bpf.o
+linked_vars.skel.h-deps := linked_vars1.bpf.o linked_vars2.bpf.o
+linked_maps.skel.h-deps := linked_maps1.bpf.o linked_maps2.bpf.o
+# In the subskeleton case, we want the test_subskeleton_lib.subskel.h file
+# but that's created as a side-effect of the skel.h generation.
+test_subskeleton.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o test_subskeleton.bpf.o
+test_subskeleton_lib.skel.h-deps := test_subskeleton_lib2.bpf.o test_subskeleton_lib.bpf.o
+test_usdt.skel.h-deps := test_usdt.bpf.o test_usdt_multispec.bpf.o
+xsk_xdp_progs.skel.h-deps := xsk_xdp_progs.bpf.o
+xdp_hw_metadata.skel.h-deps := xdp_hw_metadata.bpf.o
+xdp_features.skel.h-deps := xdp_features.bpf.o
+
+LINKED_BPF_SRCS := $(patsubst %.bpf.o,%.c,$(foreach skel,$(LINKED_SKELS),$($(skel)-deps)))
+
# Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
# $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
# Parameters:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
define DEFINE_TEST_RUNNER
TRUNNER_OUTPUT := $(OUTPUT)$(if $2,/)$2
@@ -263,10 +493,12 @@ TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \
TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES))
TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h
TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))
-TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS))
+TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.bpf.o, $$(TRUNNER_BPF_SRCS))
TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \
- $$(filter-out $(SKEL_BLACKLIST), \
+ $$(filter-out $(SKEL_BLACKLIST) $(LINKED_BPF_SRCS),\
$$(TRUNNER_BPF_SRCS)))
+TRUNNER_BPF_LSKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.lskel.h, $$(LSKELS) $$(LSKELS_EXTRA))
+TRUNNER_BPF_SKELS_LINKED := $$(addprefix $$(TRUNNER_OUTPUT)/,$(LINKED_SKELS))
TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
# Evaluate rules now with extra TRUNNER_XXX variables above already defined
@@ -277,34 +509,60 @@ endef
# Using TRUNNER_XXX variables, provided by callers of DEFINE_TEST_RUNNER and
# set up by DEFINE_TEST_RUNNER itself, create test runner build rules with:
# $1 - test runner base binary name (e.g., test_progs)
-# $2 - test runner extra "flavor" (e.g., no_alu32, gcc-bpf, etc)
+# $2 - test runner extra "flavor" (e.g., no_alu32, cpuv4, gcc-bpf, etc)
define DEFINE_TEST_RUNNER_RULES
ifeq ($($(TRUNNER_OUTPUT)-dir),)
$(TRUNNER_OUTPUT)-dir := y
$(TRUNNER_OUTPUT):
$$(call msg,MKDIR,,$$@)
- mkdir -p $$@
+ $(Q)mkdir -p $$@
endif
# ensure we set up BPF objects generation rule just once for a given
# input/output directory combination
ifeq ($($(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs),)
$(TRUNNER_BPF_PROGS_DIR)$(if $2,-)$2-bpfobjs := y
-$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \
+$(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.bpf.o: \
$(TRUNNER_BPF_PROGS_DIR)/%.c \
$(TRUNNER_BPF_PROGS_DIR)/*.h \
$$(INCLUDE_DIR)/vmlinux.h \
- $$(BPFOBJ) | $(TRUNNER_OUTPUT)
+ $(wildcard $(BPFDIR)/bpf_*.h) \
+ $(wildcard $(BPFDIR)/*.bpf.h) \
+ | $(TRUNNER_OUTPUT) $$(BPFOBJ)
$$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \
- $(TRUNNER_BPF_CFLAGS), \
- $(TRUNNER_BPF_LDFLAGS))
+ $(TRUNNER_BPF_CFLAGS) \
+ $$($$<-CFLAGS))
-$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \
- $(TRUNNER_OUTPUT)/%.o \
- | $(BPFTOOL) $(TRUNNER_OUTPUT)
+$(TRUNNER_BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked2.o) $$(<:.o=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.linked3.o) $$(<:.o=.linked2.o)
+ $(Q)diff $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$@
+ $(Q)$$(BPFTOOL) gen subskeleton $$(<:.o=.linked3.o) name $$(notdir $$(<:.bpf.o=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(<:.o=.linked1.o) $$(<:.o=.linked2.o) $$(<:.o=.linked3.o)
+
+$(TRUNNER_BPF_LSKELS): %.lskel.h: %.bpf.o $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked1.o) $$<
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked2.o) $$(<:.o=.llinked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(<:.o=.llinked3.o) $$(<:.o=.llinked2.o)
+ $(Q)diff $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+ $(Q)$$(BPFTOOL) gen skeleton -L $$(<:.o=.llinked3.o) name $$(notdir $$(<:.bpf.o=_lskel)) > $$@
+ $(Q)rm -f $$(<:.o=.llinked1.o) $$(<:.o=.llinked2.o) $$(<:.o=.llinked3.o)
+
+$(TRUNNER_BPF_SKELS_LINKED): $(TRUNNER_BPF_OBJS) $(BPFTOOL) | $(TRUNNER_OUTPUT)
+ $$(call msg,LINK-BPF,$(TRUNNER_BINARY),$$(@:.skel.h=.bpf.o))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked1.o) $$(addprefix $(TRUNNER_OUTPUT)/,$$($$(@F)-deps))
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked1.o)
+ $(Q)$$(BPFTOOL) gen object $$(@:.skel.h=.linked3.o) $$(@:.skel.h=.linked2.o)
+ $(Q)diff $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
$$(call msg,GEN-SKEL,$(TRUNNER_BINARY),$$@)
- $$(BPFTOOL) gen skeleton $$< > $$@
+ $(Q)$$(BPFTOOL) gen skeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$@
+ $(Q)$$(BPFTOOL) gen subskeleton $$(@:.skel.h=.linked3.o) name $$(notdir $$(@:.skel.h=)) > $$(@:.skel.h=.subskel.h)
+ $(Q)rm -f $$(@:.skel.h=.linked1.o) $$(@:.skel.h=.linked2.o) $$(@:.skel.h=.linked3.o)
endif
# ensure we set up tests.h header generation rule just once
@@ -312,10 +570,9 @@ ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),)
$(TRUNNER_TESTS_DIR)-tests-hdr := y
$(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c
$$(call msg,TEST-HDR,$(TRUNNER_BINARY),$$@)
- $$(shell ( cd $(TRUNNER_TESTS_DIR); \
- echo '/* Generated header, do not edit */'; \
- ls *.c 2> /dev/null | \
- sed -e 's@\([^\.]*\)\.c@DEFINE_TEST(\1)@'; \
+ $$(shell (echo '/* Generated header, do not edit */'; \
+ sed -n -E 's/^void (serial_)?test_([a-zA-Z0-9_]+)\((void)?\).*/DEFINE_TEST(\2)/p' \
+ $(TRUNNER_TESTS_DIR)/*.c | sort ; \
) > $$@)
endif
@@ -326,9 +583,11 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \
$(TRUNNER_EXTRA_HDRS) \
$(TRUNNER_BPF_OBJS) \
$(TRUNNER_BPF_SKELS) \
+ $(TRUNNER_BPF_LSKELS) \
+ $(TRUNNER_BPF_SKELS_LINKED) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
- cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+ $(Q)cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
$(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
%.c \
@@ -336,46 +595,77 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \
$(TRUNNER_TESTS_HDR) \
$$(BPFOBJ) | $(TRUNNER_OUTPUT)
$$(call msg,EXT-OBJ,$(TRUNNER_BINARY),$$@)
- $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@
-# only copy extra resources if in flavored build
+# non-flavored in-srctree builds receive special treatment, in particular, we
+# do not need to copy extra resources (see e.g. test_btf_dump_case())
$(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT)
-ifneq ($2,)
+ifneq ($2:$(OUTPUT),:$(shell pwd))
$$(call msg,EXT-COPY,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES))
- cp -a $$^ $(TRUNNER_OUTPUT)/
+ $(Q)rsync -aq $$^ $(TRUNNER_OUTPUT)/
endif
$(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \
$(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \
+ $(RESOLVE_BTFIDS) \
+ $(TRUNNER_BPFTOOL) \
| $(TRUNNER_BINARY)-extras
$$(call msg,BINARY,,$$@)
- $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@
+ $(Q)$(RESOLVE_BTFIDS) --btf $(TRUNNER_OUTPUT)/btf_data.bpf.o $$@
+ $(Q)ln -sf $(if $2,..,.)/tools/build/bpftool/$(USE_BOOTSTRAP)bpftool \
+ $(OUTPUT)/$(if $2,$2/)bpftool
endef
# Define test_progs test runner.
TRUNNER_TESTS_DIR := prog_tests
TRUNNER_BPF_PROGS_DIR := progs
-TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c \
- network_helpers.c testing_helpers.c \
- flow_dissector_load.h
-TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read \
- $(wildcard progs/btf_dump_test_case_*.c)
+TRUNNER_EXTRA_SOURCES := test_progs.c \
+ cgroup_helpers.c \
+ trace_helpers.c \
+ network_helpers.c \
+ testing_helpers.c \
+ btf_helpers.c \
+ cap_helpers.c \
+ unpriv_helpers.c \
+ netlink_helpers.c \
+ test_loader.c \
+ xsk.c \
+ disasm.c \
+ json_writer.c \
+ flow_dissector_load.h \
+ ip_check_defrag_frags.h
+TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read $(OUTPUT)/bpf_testmod.ko \
+ $(OUTPUT)/bpf_test_no_cfi.ko \
+ $(OUTPUT)/liburandom_read.so \
+ $(OUTPUT)/xdp_synproxy \
+ $(OUTPUT)/sign-file \
+ $(OUTPUT)/uprobe_multi \
+ ima_setup.sh \
+ verify_sig_setup.sh \
+ $(wildcard progs/btf_dump_test_case_*.c) \
+ $(wildcard progs/*.bpf.o)
TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
-TRUNNER_BPF_LDFLAGS := -mattr=+alu32
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS) -DENABLE_ATOMICS_TESTS
$(eval $(call DEFINE_TEST_RUNNER,test_progs))
# Define test_progs-no_alu32 test runner.
TRUNNER_BPF_BUILD_RULE := CLANG_NOALU32_BPF_BUILD_RULE
-TRUNNER_BPF_LDFLAGS :=
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
$(eval $(call DEFINE_TEST_RUNNER,test_progs,no_alu32))
+# Define test_progs-cpuv4 test runner.
+ifneq ($(CLANG_CPUV4),)
+TRUNNER_BPF_BUILD_RULE := CLANG_CPUV4_BPF_BUILD_RULE
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(CLANG_CFLAGS)
+$(eval $(call DEFINE_TEST_RUNNER,test_progs,cpuv4))
+endif
+
# Define test_progs BPF-GCC-flavored test runner.
ifneq ($(BPF_GCC),)
TRUNNER_BPF_BUILD_RULE := GCC_BPF_BUILD_RULE
-TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc)
-TRUNNER_BPF_LDFLAGS :=
+TRUNNER_BPF_CFLAGS := $(BPF_CFLAGS) $(call get_sys_includes,gcc,)
$(eval $(call DEFINE_TEST_RUNNER,test_progs,bpf_gcc))
endif
@@ -386,7 +676,6 @@ TRUNNER_EXTRA_SOURCES := test_maps.c
TRUNNER_EXTRA_FILES :=
TRUNNER_BPF_BUILD_RULE := $$(error no BPF objects should be built)
TRUNNER_BPF_CFLAGS :=
-TRUNNER_BPF_LDFLAGS :=
$(eval $(call DEFINE_TEST_RUNNER,test_maps))
# Define test_verifier test runner.
@@ -402,32 +691,94 @@ verifier/tests.h: verifier/*.c
) > verifier/tests.h)
$(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT)
$(call msg,BINARY,,$@)
- $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+# Include find_bit.c to compile xskxceiver.
+EXTRA_SRC := $(TOOLSDIR)/lib/find_bit.c
+$(OUTPUT)/xskxceiver: $(EXTRA_SRC) xskxceiver.c xskxceiver.h $(OUTPUT)/xsk.o $(OUTPUT)/xsk_xdp_progs.skel.h $(BPFOBJ) | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/xdp_hw_metadata: xdp_hw_metadata.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xsk.o $(OUTPUT)/xdp_hw_metadata.skel.h | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/xdp_features: xdp_features.c $(OUTPUT)/network_helpers.o $(OUTPUT)/xdp_features.skel.h | $(OUTPUT)
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@
# Make sure we are able to include and link libbpf against c++.
$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
$(call msg,CXX,,$@)
- $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
+ $(Q)$(CXX) $(CFLAGS) $(filter %.a %.o %.cpp,$^) $(LDLIBS) -o $@
# Benchmark runner
-$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h $(BPFOBJ)
$(call msg,CC,,$@)
- $(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+ $(Q)$(CC) $(CFLAGS) -O2 -c $(filter %.c,$^) $(LDLIBS) -o $@
$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
$(OUTPUT)/perfbuf_bench.skel.h
-$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench_bloom_filter_map.o: $(OUTPUT)/bloom_filter_bench.skel.h
+$(OUTPUT)/bench_bpf_loop.o: $(OUTPUT)/bpf_loop_bench.skel.h
+$(OUTPUT)/bench_strncmp.o: $(OUTPUT)/strncmp_bench.skel.h
+$(OUTPUT)/bench_bpf_hashmap_full_update.o: $(OUTPUT)/bpf_hashmap_full_update_bench.skel.h
+$(OUTPUT)/bench_local_storage.o: $(OUTPUT)/local_storage_bench.skel.h
+$(OUTPUT)/bench_local_storage_rcu_tasks_trace.o: $(OUTPUT)/local_storage_rcu_tasks_trace_bench.skel.h
+$(OUTPUT)/bench_local_storage_create.o: $(OUTPUT)/bench_local_storage_create.skel.h
+$(OUTPUT)/bench_bpf_hashmap_lookup.o: $(OUTPUT)/bpf_hashmap_lookup.skel.h
+$(OUTPUT)/bench_htab_mem.o: $(OUTPUT)/htab_mem_bench.skel.h
+$(OUTPUT)/bench.o: bench.h testing_helpers.h $(BPFOBJ)
$(OUTPUT)/bench: LDLIBS += -lm
-$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+$(OUTPUT)/bench: $(OUTPUT)/bench.o \
+ $(TESTING_HELPERS) \
+ $(TRACE_HELPERS) \
+ $(CGROUP_HELPERS) \
$(OUTPUT)/bench_count.o \
$(OUTPUT)/bench_rename.o \
$(OUTPUT)/bench_trigger.o \
- $(OUTPUT)/bench_ringbufs.o
+ $(OUTPUT)/bench_ringbufs.o \
+ $(OUTPUT)/bench_bloom_filter_map.o \
+ $(OUTPUT)/bench_bpf_loop.o \
+ $(OUTPUT)/bench_strncmp.o \
+ $(OUTPUT)/bench_bpf_hashmap_full_update.o \
+ $(OUTPUT)/bench_local_storage.o \
+ $(OUTPUT)/bench_local_storage_rcu_tasks_trace.o \
+ $(OUTPUT)/bench_bpf_hashmap_lookup.o \
+ $(OUTPUT)/bench_local_storage_create.o \
+ $(OUTPUT)/bench_htab_mem.o \
+ #
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/veristat.o: $(BPFOBJ)
+$(OUTPUT)/veristat: $(OUTPUT)/veristat.o
+ $(call msg,BINARY,,$@)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $(filter %.a %.o,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/uprobe_multi: uprobe_multi.c
$(call msg,BINARY,,$@)
- $(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+ $(Q)$(CC) $(CFLAGS) $(LDFLAGS) $^ $(LDLIBS) -o $@
-EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR) \
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) \
prog_tests/tests.h map_tests/tests.h verifier/tests.h \
- feature \
- $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32 bpf_gcc)
+ feature bpftool \
+ $(addprefix $(OUTPUT)/,*.o *.skel.h *.lskel.h *.subskel.h \
+ no_alu32 cpuv4 bpf_gcc bpf_testmod.ko \
+ bpf_test_no_cfi.ko \
+ liburandom_read.so)
+
+.PHONY: docs docs-clean
+
+# Delete partially updated (corrupted) files on error
+.DELETE_ON_ERROR:
+
+DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
+override define INSTALL_RULE
+ $(DEFAULT_INSTALL_RULE)
+ @for DIR in $(TEST_INST_SUBDIRS); do \
+ mkdir -p $(INSTALL_PATH)/$$DIR; \
+ rsync -a $(OUTPUT)/$$DIR/*.bpf.o $(INSTALL_PATH)/$$DIR;\
+ done
+endef
diff --git a/tools/testing/selftests/bpf/Makefile.docs b/tools/testing/selftests/bpf/Makefile.docs
new file mode 100644
index 000000000000..eb6a4fea8c79
--- /dev/null
+++ b/tools/testing/selftests/bpf/Makefile.docs
@@ -0,0 +1,83 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+
+INSTALL ?= install
+RM ?= rm -f
+RMDIR ?= rmdir --ignore-fail-on-non-empty
+
+ifeq ($(V),1)
+ Q =
+else
+ Q = @
+endif
+
+prefix ?= /usr/local
+mandir ?= $(prefix)/man
+man2dir = $(mandir)/man2
+man7dir = $(mandir)/man7
+
+SYSCALL_RST = bpf-syscall.rst
+MAN2_RST = $(SYSCALL_RST)
+
+HELPERS_RST = bpf-helpers.rst
+MAN7_RST = $(HELPERS_RST)
+
+_DOC_MAN2 = $(patsubst %.rst,%.2,$(MAN2_RST))
+DOC_MAN2 = $(addprefix $(OUTPUT),$(_DOC_MAN2))
+
+_DOC_MAN7 = $(patsubst %.rst,%.7,$(MAN7_RST))
+DOC_MAN7 = $(addprefix $(OUTPUT),$(_DOC_MAN7))
+
+DOCTARGETS := helpers syscall
+
+docs: $(DOCTARGETS)
+syscall: man2
+helpers: man7
+man2: $(DOC_MAN2)
+man7: $(DOC_MAN7)
+
+RST2MAN_DEP := $(shell command -v rst2man 2>/dev/null)
+
+# Configure make rules for the man page bpf-$1.$2.
+# $1 - target for scripts/bpf_doc.py
+# $2 - man page section to generate the troff file
+define DOCS_RULES =
+$(OUTPUT)bpf-$1.rst: ../../../../include/uapi/linux/bpf.h
+ $$(QUIET_GEN)../../../../scripts/bpf_doc.py $1 \
+ --filename $$< > $$@
+
+$(OUTPUT)%.$2: $(OUTPUT)%.rst
+ifndef RST2MAN_DEP
+ $$(error "rst2man not found, but required to generate man pages")
+endif
+ $$(QUIET_GEN)rst2man --exit-status=1 $$< > $$@.tmp
+ $$(QUIET_GEN)mv $$@.tmp $$@
+
+docs-clean-$1:
+ $$(call QUIET_CLEAN, eBPF_$1-manpage)
+ $(Q)$(RM) $$(DOC_MAN$2) $(OUTPUT)bpf-$1.rst
+
+docs-install-$1: docs
+ $$(call QUIET_INSTALL, eBPF_$1-manpage)
+ $(Q)$(INSTALL) -d -m 755 $(DESTDIR)$$(man$2dir)
+ $(Q)$(INSTALL) -m 644 $$(DOC_MAN$2) $(DESTDIR)$$(man$2dir)
+
+docs-uninstall-$1:
+ $$(call QUIET_UNINST, eBPF_$1-manpage)
+ $(Q)$(RM) $$(addprefix $(DESTDIR)$$(man$2dir)/,$$(_DOC_MAN$2))
+ $(Q)$(RMDIR) $(DESTDIR)$$(man$2dir)
+
+.PHONY: $1 docs-clean-$1 docs-install-$1 docs-uninstall-$1
+endef
+
+# Create the make targets to generate manual pages by name and section
+$(eval $(call DOCS_RULES,helpers,7))
+$(eval $(call DOCS_RULES,syscall,2))
+
+docs-clean: $(foreach doctarget,$(DOCTARGETS), docs-clean-$(doctarget))
+docs-install: $(foreach doctarget,$(DOCTARGETS), docs-install-$(doctarget))
+docs-uninstall: $(foreach doctarget,$(DOCTARGETS), docs-uninstall-$(doctarget))
+
+.PHONY: docs docs-clean docs-install docs-uninstall man2 man7
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index e885d351595f..9b974e425af3 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -2,11 +2,133 @@
BPF Selftest Notes
==================
General instructions on running selftests can be found in
-`Documentation/bpf/bpf_devel_QA.rst`_.
+`Documentation/bpf/bpf_devel_QA.rst`__.
+
+__ /Documentation/bpf/bpf_devel_QA.rst#q-how-to-run-bpf-selftests
+
+=============
+BPF CI System
+=============
+
+BPF employs a continuous integration (CI) system to check patch submission in an
+automated fashion. The system runs selftests for each patch in a series. Results
+are propagated to patchwork, where failures are highlighted similar to
+violations of other checks (such as additional warnings being emitted or a
+``scripts/checkpatch.pl`` reported deficiency):
+
+ https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173
+
+The CI system executes tests on multiple architectures. It uses a kernel
+configuration derived from both the generic and architecture specific config
+file fragments below ``tools/testing/selftests/bpf/`` (e.g., ``config`` and
+``config.x86_64``).
+
+Denylisting Tests
+=================
+
+It is possible for some architectures to not have support for all BPF features.
+In such a case tests in CI may fail. An example of such a shortcoming is BPF
+trampoline support on IBM's s390x architecture. For cases like this, an in-tree
+deny list file, located at ``tools/testing/selftests/bpf/DENYLIST.<arch>``, can
+be used to prevent the test from running on such an architecture.
+
+In addition to that, the generic ``tools/testing/selftests/bpf/DENYLIST`` is
+honored on every architecture running tests.
+
+These files are organized in three columns. The first column lists the test in
+question. This can be the name of a test suite or of an individual test. The
+remaining two columns provide additional meta data that helps identify and
+classify the entry: column two is a copy and paste of the error being reported
+when running the test in the setting in question. The third column, if
+available, summarizes the underlying problem. A value of ``trampoline``, for
+example, indicates that lack of trampoline support is causing the test to fail.
+This last entry helps identify tests that can be re-enabled once such support is
+added.
+
+=========================
+Running Selftests in a VM
+=========================
+
+It's now possible to run the selftests using ``tools/testing/selftests/bpf/vmtest.sh``.
+The script tries to ensure that the tests are run with the same environment as they
+would be run post-submit in the CI used by the Maintainers, with the exception
+that deny lists are not automatically honored.
+
+This script uses the in-tree kernel configuration and downloads a VM userspace
+image from the system used by the CI. It builds the kernel (without overwriting
+your existing Kconfig), recompiles the bpf selftests, runs them (by default
+``tools/testing/selftests/bpf/test_progs``) and saves the resulting output (by
+default in ``~/.bpf_selftests``).
+
+Script dependencies:
+- clang (preferably built from sources, https://github.com/llvm/llvm-project);
+- pahole (preferably built from sources, https://git.kernel.org/pub/scm/devel/pahole/pahole.git/);
+- qemu;
+- docutils (for ``rst2man``);
+- libcap-devel.
+
+For more information about using the script, run:
+
+.. code-block:: console
+
+ $ tools/testing/selftests/bpf/vmtest.sh -h
+
+In case of linker errors when running selftests, try using static linking:
+
+.. code-block:: console
+
+ $ LDLIBS=-static PKG_CONFIG='pkg-config --static' vmtest.sh
+
+.. note:: Some distros may not support static linking.
+
+.. note:: The script uses pahole and clang based on host environment setting.
+ If you want to change pahole and llvm, you can change `PATH` environment
+ variable in the beginning of script.
+
+.. note:: The script currently only supports x86_64 and s390x architectures.
Additional information about selftest failures are
documented here.
+profiler[23] test failures with clang/llvm <12.0.0
+==================================================
+
+With clang/llvm <12.0.0, the profiler[23] test may fail.
+The symptom looks like
+
+.. code-block:: c
+
+ // r9 is a pointer to map_value
+ // r7 is a scalar
+ 17: bf 96 00 00 00 00 00 00 r6 = r9
+ 18: 0f 76 00 00 00 00 00 00 r6 += r7
+ math between map_value pointer and register with unbounded min value is not allowed
+
+ // the instructions below will not be seen in the verifier log
+ 19: a5 07 01 00 01 01 00 00 if r7 < 257 goto +1
+ 20: bf 96 00 00 00 00 00 00 r6 = r9
+ // r6 is used here
+
+The verifier will reject such code with above error.
+At insn 18 the r7 is indeed unbounded. The later insn 19 checks the bounds and
+the insn 20 undoes map_value addition. It is currently impossible for the
+verifier to understand such speculative pointer arithmetic.
+Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12.
+
+__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6
+
+The corresponding C code
+
+.. code-block:: c
+
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length = bpf_probe_read_str(payload, ...);
+ if (filepart_length <= MAX_PATH) {
+ barrier_var(filepart_length); // workaround
+ payload += filepart_length;
+ }
+ }
+
bpf_iter test failures with clang/llvm 10.0.0
=============================================
@@ -39,7 +161,149 @@ The symptom for ``bpf_iter/netlink`` looks like
17: (7b) *(u64 *)(r7 +0) = r2
only read is supported
-This is due to a llvm BPF backend bug. The fix
- https://reviews.llvm.org/D78466
+This is due to a llvm BPF backend bug. `The fix`__
has been pushed to llvm 10.x release branch and will be
-available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
+available in 10.0.1. The patch is available in llvm 11.0.0 trunk.
+
+__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0
+
+bpf_verif_scale/loop6.bpf.o test failure with Clang 12
+======================================================
+
+With Clang 12, the following bpf_verif_scale test failed:
+ * ``bpf_verif_scale/loop6.bpf.o``
+
+The verifier output looks like
+
+.. code-block:: c
+
+ R1 type=ctx expected=fp
+ The sequence of 8193 jumps is too complex.
+
+The reason is compiler generating the following code
+
+.. code-block:: c
+
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 14: 16 05 40 00 00 00 00 00 if w5 == 0 goto +64 <LBB0_6>
+ 15: bc 51 00 00 00 00 00 00 w1 = w5
+ 16: 04 01 00 00 ff ff ff ff w1 += -1
+ 17: 67 05 00 00 20 00 00 00 r5 <<= 32
+ 18: 77 05 00 00 20 00 00 00 r5 >>= 32
+ 19: a6 01 01 00 05 00 00 00 if w1 < 5 goto +1 <LBB0_4>
+ 20: b7 05 00 00 06 00 00 00 r5 = 6
+ 00000000000000a8 <LBB0_4>:
+ 21: b7 02 00 00 00 00 00 00 r2 = 0
+ 22: b7 01 00 00 00 00 00 00 r1 = 0
+ ; for (i = 0; (i < VIRTIO_MAX_SGS) && (i < num); i++) {
+ 23: 7b 1a e0 ff 00 00 00 00 *(u64 *)(r10 - 32) = r1
+ 24: 7b 5a c0 ff 00 00 00 00 *(u64 *)(r10 - 64) = r5
+
+Note that insn #15 has w1 = w5 and w1 is refined later but
+r5(w5) is eventually saved on stack at insn #24 for later use.
+This cause later verifier failure. The bug has been `fixed`__ in
+Clang 13.
+
+__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229
+
+BPF CO-RE-based tests and Clang version
+=======================================
+
+A set of selftests use BPF target-specific built-ins, which might require
+bleeding-edge Clang versions (Clang 12 nightly at this time).
+
+Few sub-tests of core_reloc test suit (part of test_progs test runner) require
+the following built-ins, listed with corresponding Clang diffs introducing
+them to Clang/LLVM. These sub-tests are going to be skipped if Clang is too
+old to support them, they shouldn't cause build failures or runtime test
+failures:
+
+- __builtin_btf_type_id() [0_, 1_, 2_];
+- __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_].
+
+.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9
+.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f
+.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
+.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c
+.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8
+
+Floating-point tests and Clang version
+======================================
+
+Certain selftests, e.g. core_reloc, require support for the floating-point
+types, which was introduced in `Clang 13`__. The older Clang versions will
+either crash when compiling these tests, or generate an incorrect BTF.
+
+__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2
+
+Kernel function call test and Clang version
+===========================================
+
+Some selftests (e.g. kfunc_call and bpf_tcp_ca) require a LLVM support
+to generate extern function in BTF. It was introduced in `Clang 13`__.
+
+Without it, the error from compiling bpf selftests looks like:
+
+.. code-block:: console
+
+ libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2
+
+__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c
+
+btf_tag test and Clang version
+==============================
+
+The btf_tag selftest requires LLVM support to recognize the btf_decl_tag and
+btf_type_tag attributes. They are introduced in `Clang 14` [0_, 1_].
+The subtests ``btf_type_tag_user_{mod1, mod2, vmlinux}`` also requires
+pahole version ``1.23``.
+
+Without them, the btf_tag selftest will be skipped and you will observe:
+
+.. code-block:: console
+
+ #<test_num> btf_tag:SKIP
+
+.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6
+.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784
+
+Clang dependencies for static linking tests
+===========================================
+
+linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to
+generate valid BTF information for weak variables. Please make sure you use
+Clang that contains the fix.
+
+__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035
+
+Clang relocation changes
+========================
+
+Clang 13 patch `clang reloc patch`_ made some changes on relocations such
+that existing relocation types are broken into more types and
+each new type corresponds to only one way to resolve relocation.
+See `kernel llvm reloc`_ for more explanation and some examples.
+Using clang 13 to compile old libbpf which has static linker support,
+there will be a compilation failure::
+
+ libbpf: ELF relo #0 in section #6 has unexpected type 2 in .../bpf_tcp_nogpl.bpf.o
+
+Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``.
+To fix this issue, user newer libbpf.
+
+.. Links
+.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b
+.. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst
+
+Clang dependencies for the u32 spill test (xdpwall)
+===================================================
+The xdpwall selftest requires a change in `Clang 14`__.
+
+Without it, the xdpwall selftest will fail and the error message
+from running test_progs will look like:
+
+.. code-block:: console
+
+ test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007
+
+__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5
diff --git a/tools/testing/selftests/bpf/autoconf_helper.h b/tools/testing/selftests/bpf/autoconf_helper.h
new file mode 100644
index 000000000000..5b243b9cdf8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/autoconf_helper.h
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#ifdef HAVE_GENHDR
+# include "autoconf.h"
+#else
+# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
+# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
+# endif
+#endif
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 944ad4721c83..b2b4c391eb0a 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -8,7 +8,6 @@
#include <fcntl.h>
#include <pthread.h>
#include <sys/sysinfo.h>
-#include <sys/resource.h>
#include <signal.h>
#include "bench.h"
#include "testing_helpers.h"
@@ -17,7 +16,8 @@ struct env env = {
.warmup_sec = 1,
.duration_sec = 5,
.affinity = false,
- .consumer_cnt = 1,
+ .quiet = false,
+ .consumer_cnt = 0,
.producer_cnt = 1,
};
@@ -29,25 +29,39 @@ static int libbpf_print_fn(enum libbpf_print_level level,
return vfprintf(stderr, format, args);
}
-static int bump_memlock_rlimit(void)
+void setup_libbpf(void)
{
- struct rlimit rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+}
+
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ long total = res->false_hits + res->hits + res->drops;
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
- return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+ printf("%ld false hits of %ld total operations. Percentage = %2.2f %%\n",
+ res->false_hits, total, ((float)res->false_hits / total) * 100);
}
-void setup_libbpf()
+void false_hits_report_final(struct bench_res res[], int res_cnt)
{
- int err;
+ long total_hits = 0, total_drops = 0, total_false_hits = 0, total_ops = 0;
+ int i;
- libbpf_set_print(libbpf_print_fn);
+ for (i = 0; i < res_cnt; i++) {
+ total_hits += res[i].hits;
+ total_false_hits += res[i].false_hits;
+ total_drops += res[i].drops;
+ }
+ total_ops = total_hits + total_false_hits + total_drops;
- err = bump_memlock_rlimit();
- if (err)
- fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+ printf("Summary: %ld false hits of %ld total operations. ",
+ total_false_hits, total_ops);
+ printf("Percentage = %2.2f %%\n",
+ ((float)total_false_hits / total_ops) * 100);
}
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
@@ -62,20 +76,59 @@ void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
printf("Iter %3d (%7.3lfus): ",
iter, (delta_ns - 1000000000) / 1000.0);
- printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
- hits_per_sec, hits_per_prod, drops_per_sec);
+ printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s, total operations %8.3lfM/s\n",
+ hits_per_sec, hits_per_prod, drops_per_sec, hits_per_sec + drops_per_sec);
+}
+
+void
+grace_period_latency_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].gp_ns / 1000.0 / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].gp_ns / 1000.0 / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
+}
+
+void
+grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt, struct basic_stats *gp_stat)
+{
+ int i;
+
+ memset(gp_stat, 0, sizeof(struct basic_stats));
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->mean += res[i].stime / (double)res[i].gp_ct / (0.0 + res_cnt);
+
+#define IT_MEAN_DIFF (res[i].stime / (double)res[i].gp_ct - gp_stat->mean)
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ gp_stat->stddev += (IT_MEAN_DIFF * IT_MEAN_DIFF) / (res_cnt - 1.0);
+ }
+ gp_stat->stddev = sqrt(gp_stat->stddev);
+#undef IT_MEAN_DIFF
}
void hits_drops_report_final(struct bench_res res[], int res_cnt)
{
int i;
- double hits_mean = 0.0, drops_mean = 0.0;
- double hits_stddev = 0.0, drops_stddev = 0.0;
+ double hits_mean = 0.0, drops_mean = 0.0, total_ops_mean = 0.0;
+ double hits_stddev = 0.0, drops_stddev = 0.0, total_ops_stddev = 0.0;
+ double total_ops;
for (i = 0; i < res_cnt; i++) {
hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
}
+ total_ops_mean = hits_mean + drops_mean;
if (res_cnt > 1) {
for (i = 0; i < res_cnt; i++) {
@@ -85,14 +138,101 @@ void hits_drops_report_final(struct bench_res res[], int res_cnt)
drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
(drops_mean - res[i].drops / 1000000.0) /
(res_cnt - 1.0);
+ total_ops = res[i].hits + res[i].drops;
+ total_ops_stddev += (total_ops_mean - total_ops / 1000000.0) *
+ (total_ops_mean - total_ops / 1000000.0) /
+ (res_cnt - 1.0);
}
hits_stddev = sqrt(hits_stddev);
drops_stddev = sqrt(drops_stddev);
+ total_ops_stddev = sqrt(total_ops_stddev);
}
printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
hits_mean, hits_stddev, hits_mean / env.producer_cnt);
- printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+ printf("drops %8.3lf \u00B1 %5.3lfM/s, ",
drops_mean, drops_stddev);
+ printf("total operations %8.3lf \u00B1 %5.3lfM/s\n",
+ total_ops_mean, total_ops_stddev);
+}
+
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double hits_per_sec, hits_per_prod;
+
+ hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+ hits_per_prod = hits_per_sec / env.producer_cnt;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s (%7.3lfM/prod)\n", hits_per_sec, hits_per_prod);
+}
+
+void ops_report_final(struct bench_res res[], int res_cnt)
+{
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++)
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+
+ hits_stddev = sqrt(hits_stddev);
+ }
+ printf("Summary: throughput %8.3lf \u00B1 %5.3lf M ops/s (%7.3lfM ops/prod), ",
+ hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+ printf("latency %8.3lf ns/op\n", 1000.0 / hits_mean * env.producer_cnt);
+}
+
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns)
+{
+ double important_hits_per_sec, hits_per_sec;
+ double delta_sec = delta_ns / 1000000000.0;
+
+ hits_per_sec = res->hits / 1000000.0 / delta_sec;
+ important_hits_per_sec = res->important_hits / 1000000.0 / delta_sec;
+
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+
+ printf("hits %8.3lfM/s ", hits_per_sec);
+ printf("important_hits %8.3lfM/s\n", important_hits_per_sec);
+}
+
+void local_storage_report_final(struct bench_res res[], int res_cnt)
+{
+ double important_hits_mean = 0.0, important_hits_stddev = 0.0;
+ double hits_mean = 0.0, hits_stddev = 0.0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+ important_hits_mean += res[i].important_hits / 1000000.0 / (0.0 + res_cnt);
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+ (hits_mean - res[i].hits / 1000000.0) /
+ (res_cnt - 1.0);
+ important_hits_stddev +=
+ (important_hits_mean - res[i].important_hits / 1000000.0) *
+ (important_hits_mean - res[i].important_hits / 1000000.0) /
+ (res_cnt - 1.0);
+ }
+
+ hits_stddev = sqrt(hits_stddev);
+ important_hits_stddev = sqrt(important_hits_stddev);
+ }
+ printf("Summary: hits throughput %8.3lf \u00B1 %5.3lf M ops/s, ",
+ hits_mean, hits_stddev);
+ printf("hits latency %8.3lf ns/op, ", 1000.0 / hits_mean);
+ printf("important_hits throughput %8.3lf \u00B1 %5.3lf M ops/s\n",
+ important_hits_mean, important_hits_stddev);
}
const char *argp_program_version = "benchmark";
@@ -123,6 +263,7 @@ static const struct argp_option opts[] = {
{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
{ "verbose", 'v', NULL, 0, "Verbose debug output"},
{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+ { "quiet", 'q', NULL, 0, "Be more quiet"},
{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
"Set of CPUs for producer threads; implies --affinity"},
{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
@@ -131,16 +272,34 @@ static const struct argp_option opts[] = {
};
extern struct argp bench_ringbufs_argp;
+extern struct argp bench_bloom_map_argp;
+extern struct argp bench_bpf_loop_argp;
+extern struct argp bench_local_storage_argp;
+extern struct argp bench_local_storage_rcu_tasks_trace_argp;
+extern struct argp bench_strncmp_argp;
+extern struct argp bench_hashmap_lookup_argp;
+extern struct argp bench_local_storage_create_argp;
+extern struct argp bench_htab_mem_argp;
static const struct argp_child bench_parsers[] = {
{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+ { &bench_bloom_map_argp, 0, "Bloom filter map benchmark", 0 },
+ { &bench_bpf_loop_argp, 0, "bpf_loop helper benchmark", 0 },
+ { &bench_local_storage_argp, 0, "local_storage benchmark", 0 },
+ { &bench_strncmp_argp, 0, "bpf_strncmp helper benchmark", 0 },
+ { &bench_local_storage_rcu_tasks_trace_argp, 0,
+ "local_storage RCU Tasks Trace slowdown benchmark", 0 },
+ { &bench_hashmap_lookup_argp, 0, "Hashmap lookup benchmark", 0 },
+ { &bench_local_storage_create_argp, 0, "local-storage-create benchmark", 0 },
+ { &bench_htab_mem_argp, 0, "hash map memory benchmark", 0 },
{},
};
+/* Make pos_args global, so that we can run argp_parse twice, if necessary */
+static int pos_args;
+
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
- static int pos_args;
-
switch (key) {
case 'v':
env.verbose = true;
@@ -164,14 +323,14 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
break;
case 'p':
env.producer_cnt = strtol(arg, NULL, 10);
- if (env.producer_cnt <= 0) {
+ if (env.producer_cnt < 0) {
fprintf(stderr, "Invalid producer count: %s\n", arg);
argp_usage(state);
}
break;
case 'c':
env.consumer_cnt = strtol(arg, NULL, 10);
- if (env.consumer_cnt <= 0) {
+ if (env.consumer_cnt < 0) {
fprintf(stderr, "Invalid consumer count: %s\n", arg);
argp_usage(state);
}
@@ -179,6 +338,9 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
case 'a':
env.affinity = true;
break;
+ case 'q':
+ env.quiet = true;
+ break;
case ARG_PROD_AFFINITY_SET:
env.affinity = true;
if (parse_num_list(arg, &env.prod_cpus.cpus,
@@ -209,7 +371,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return 0;
}
-static void parse_cmdline_args(int argc, char **argv)
+static void parse_cmdline_args_init(int argc, char **argv)
{
static const struct argp argp = {
.options = opts,
@@ -219,9 +381,25 @@ static void parse_cmdline_args(int argc, char **argv)
};
if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
exit(1);
- if (!env.list && !env.bench_name) {
- argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
- exit(1);
+}
+
+static void parse_cmdline_args_final(int argc, char **argv)
+{
+ struct argp_child bench_parsers[2] = {};
+ const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+ .children = bench_parsers,
+ };
+
+ /* Parse arguments the second time with the correct set of parsers */
+ if (bench->argp) {
+ bench_parsers[0].argp = bench->argp;
+ bench_parsers[0].header = bench->name;
+ pos_args = 0;
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ exit(1);
}
}
@@ -265,12 +443,14 @@ static void setup_timer()
static void set_thread_affinity(pthread_t thread, int cpu)
{
cpu_set_t cpuset;
+ int err;
CPU_ZERO(&cpuset);
CPU_SET(cpu, &cpuset);
- if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+ err = pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset);
+ if (err) {
fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
- cpu, errno);
+ cpu, -err);
exit(1);
}
}
@@ -291,7 +471,7 @@ static int next_cpu(struct cpu_set *cpu_set)
exit(1);
}
- return cpu_set->next_cpu++;
+ return cpu_set->next_cpu++ % env.nr_cpus;
}
static struct bench_state {
@@ -311,17 +491,44 @@ extern const struct bench bench_rename_kretprobe;
extern const struct bench bench_rename_rawtp;
extern const struct bench bench_rename_fentry;
extern const struct bench bench_rename_fexit;
-extern const struct bench bench_rename_fmodret;
extern const struct bench bench_trig_base;
extern const struct bench bench_trig_tp;
extern const struct bench bench_trig_rawtp;
extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_kretprobe;
+extern const struct bench bench_trig_kprobe_multi;
+extern const struct bench bench_trig_kretprobe_multi;
extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fexit;
+extern const struct bench bench_trig_fentry_sleep;
extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_trig_uprobe_base;
+extern const struct bench bench_trig_uprobe_nop;
+extern const struct bench bench_trig_uretprobe_nop;
+extern const struct bench bench_trig_uprobe_push;
+extern const struct bench bench_trig_uretprobe_push;
+extern const struct bench bench_trig_uprobe_ret;
+extern const struct bench bench_trig_uretprobe_ret;
extern const struct bench bench_rb_libbpf;
extern const struct bench bench_rb_custom;
extern const struct bench bench_pb_libbpf;
extern const struct bench bench_pb_custom;
+extern const struct bench bench_bloom_lookup;
+extern const struct bench bench_bloom_update;
+extern const struct bench bench_bloom_false_positive;
+extern const struct bench bench_hashmap_without_bloom;
+extern const struct bench bench_hashmap_with_bloom;
+extern const struct bench bench_bpf_loop;
+extern const struct bench bench_strncmp_no_helper;
+extern const struct bench bench_strncmp_helper;
+extern const struct bench bench_bpf_hashmap_full_update;
+extern const struct bench bench_local_storage_cache_seq_get;
+extern const struct bench bench_local_storage_cache_interleaved_get;
+extern const struct bench bench_local_storage_cache_hashmap_control;
+extern const struct bench bench_local_storage_tasks_trace;
+extern const struct bench bench_bpf_hashmap_lookup;
+extern const struct bench bench_local_storage_create;
+extern const struct bench bench_htab_mem;
static const struct bench *benchs[] = {
&bench_count_global,
@@ -332,28 +539,54 @@ static const struct bench *benchs[] = {
&bench_rename_rawtp,
&bench_rename_fentry,
&bench_rename_fexit,
- &bench_rename_fmodret,
&bench_trig_base,
&bench_trig_tp,
&bench_trig_rawtp,
&bench_trig_kprobe,
+ &bench_trig_kretprobe,
+ &bench_trig_kprobe_multi,
+ &bench_trig_kretprobe_multi,
&bench_trig_fentry,
+ &bench_trig_fexit,
+ &bench_trig_fentry_sleep,
&bench_trig_fmodret,
+ &bench_trig_uprobe_base,
+ &bench_trig_uprobe_nop,
+ &bench_trig_uretprobe_nop,
+ &bench_trig_uprobe_push,
+ &bench_trig_uretprobe_push,
+ &bench_trig_uprobe_ret,
+ &bench_trig_uretprobe_ret,
&bench_rb_libbpf,
&bench_rb_custom,
&bench_pb_libbpf,
&bench_pb_custom,
+ &bench_bloom_lookup,
+ &bench_bloom_update,
+ &bench_bloom_false_positive,
+ &bench_hashmap_without_bloom,
+ &bench_hashmap_with_bloom,
+ &bench_bpf_loop,
+ &bench_strncmp_no_helper,
+ &bench_strncmp_helper,
+ &bench_bpf_hashmap_full_update,
+ &bench_local_storage_cache_seq_get,
+ &bench_local_storage_cache_interleaved_get,
+ &bench_local_storage_cache_hashmap_control,
+ &bench_local_storage_tasks_trace,
+ &bench_bpf_hashmap_lookup,
+ &bench_local_storage_create,
+ &bench_htab_mem,
};
-static void setup_benchmark()
+static void find_benchmark(void)
{
- int i, err;
+ int i;
if (!env.bench_name) {
fprintf(stderr, "benchmark name is not specified\n");
exit(1);
}
-
for (i = 0; i < ARRAY_SIZE(benchs); i++) {
if (strcmp(benchs[i]->name, env.bench_name) == 0) {
bench = benchs[i];
@@ -364,8 +597,14 @@ static void setup_benchmark()
fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
exit(1);
}
+}
+
+static void setup_benchmark(void)
+{
+ int i, err;
- printf("Setting up benchmark '%s'...\n", bench->name);
+ if (!env.quiet)
+ printf("Setting up benchmark '%s'...\n", bench->name);
state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
@@ -380,11 +619,15 @@ static void setup_benchmark()
bench->setup();
for (i = 0; i < env.consumer_cnt; i++) {
+ if (!bench->consumer_thread) {
+ fprintf(stderr, "benchmark doesn't support consumers!\n");
+ exit(1);
+ }
err = pthread_create(&state.consumers[i], NULL,
bench->consumer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create consumer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -399,11 +642,15 @@ static void setup_benchmark()
env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
for (i = 0; i < env.producer_cnt; i++) {
+ if (!bench->producer_thread) {
+ fprintf(stderr, "benchmark doesn't support producers!\n");
+ exit(1);
+ }
err = pthread_create(&state.producers[i], NULL,
bench->producer_thread, (void *)(long)i);
if (err) {
fprintf(stderr, "failed to create producer thread #%d: %d\n",
- i, -errno);
+ i, -err);
exit(1);
}
if (env.affinity)
@@ -411,7 +658,8 @@ static void setup_benchmark()
next_cpu(&env.prod_cpus));
}
- printf("Benchmark '%s' started.\n", bench->name);
+ if (!env.quiet)
+ printf("Benchmark '%s' started.\n", bench->name);
}
static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
@@ -435,7 +683,8 @@ static void collect_measurements(long delta_ns) {
int main(int argc, char **argv)
{
- parse_cmdline_args(argc, argv);
+ env.nr_cpus = get_nprocs();
+ parse_cmdline_args_init(argc, argv);
if (env.list) {
int i;
@@ -447,6 +696,9 @@ int main(int argc, char **argv)
return 0;
}
+ find_benchmark();
+ parse_cmdline_args_final(argc, argv);
+
setup_benchmark();
setup_timer();
@@ -462,4 +714,3 @@ int main(int argc, char **argv)
return 0;
}
-
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
index c1f48a473b02..68180d8f8558 100644
--- a/tools/testing/selftests/bpf/bench.h
+++ b/tools/testing/selftests/bpf/bench.h
@@ -24,21 +24,34 @@ struct env {
bool verbose;
bool list;
bool affinity;
+ bool quiet;
int consumer_cnt;
int producer_cnt;
+ int nr_cpus;
struct cpu_set prod_cpus;
struct cpu_set cons_cpus;
};
+struct basic_stats {
+ double mean;
+ double stddev;
+};
+
struct bench_res {
long hits;
long drops;
+ long false_hits;
+ long important_hits;
+ unsigned long gp_ns;
+ unsigned long gp_ct;
+ unsigned int stime;
};
struct bench {
const char *name;
- void (*validate)();
- void (*setup)();
+ const struct argp *argp;
+ void (*validate)(void);
+ void (*setup)(void);
void *(*producer_thread)(void *ctx);
void *(*consumer_thread)(void *ctx);
void (*measure)(struct bench_res* res);
@@ -53,17 +66,20 @@ struct counter {
extern struct env env;
extern const struct bench *bench;
-void setup_libbpf();
+void setup_libbpf(void);
void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
void hits_drops_report_final(struct bench_res res[], int res_cnt);
-
-static inline __u64 get_time_ns() {
- struct timespec t;
-
- clock_gettime(CLOCK_MONOTONIC, &t);
-
- return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
-}
+void false_hits_report_progress(int iter, struct bench_res *res, long delta_ns);
+void false_hits_report_final(struct bench_res res[], int res_cnt);
+void ops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void ops_report_final(struct bench_res res[], int res_cnt);
+void local_storage_report_progress(int iter, struct bench_res *res,
+ long delta_ns);
+void local_storage_report_final(struct bench_res res[], int res_cnt);
+void grace_period_latency_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
+void grace_period_ticks_basic_stats(struct bench_res res[], int res_cnt,
+ struct basic_stats *gp_stat);
static inline void atomic_inc(long *value)
{
diff --git a/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
new file mode 100644
index 000000000000..e289dd1a14ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bloom_filter_map.c
@@ -0,0 +1,477 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include <linux/log2.h>
+#include <pthread.h>
+#include "bench.h"
+#include "bloom_filter_bench.skel.h"
+#include "bpf_util.h"
+
+static struct ctx {
+ bool use_array_map;
+ bool use_hashmap;
+ bool hashmap_use_bloom;
+ bool count_false_hits;
+
+ struct bloom_filter_bench *skel;
+
+ int bloom_fd;
+ int hashmap_fd;
+ int array_map_fd;
+
+ pthread_mutex_t map_done_mtx;
+ pthread_cond_t map_done_cv;
+ bool map_done;
+ bool map_prepare_err;
+
+ __u32 next_map_idx;
+} ctx = {
+ .map_done_mtx = PTHREAD_MUTEX_INITIALIZER,
+ .map_done_cv = PTHREAD_COND_INITIALIZER,
+};
+
+struct stat {
+ __u32 stats[3];
+};
+
+static struct {
+ __u32 nr_entries;
+ __u8 nr_hash_funcs;
+ __u8 value_size;
+} args = {
+ .nr_entries = 1000,
+ .nr_hash_funcs = 3,
+ .value_size = 8,
+};
+
+enum {
+ ARG_NR_ENTRIES = 3000,
+ ARG_NR_HASH_FUNCS = 3001,
+ ARG_VALUE_SIZE = 3002,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "Set number of expected unique entries in the bloom filter"},
+ { "nr_hash_funcs", ARG_NR_HASH_FUNCS, "NR_HASH_FUNCS", 0,
+ "Set number of hash functions in the bloom filter"},
+ { "value_size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+ "Set value size (in bytes) of bloom filter entries"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "Invalid nr_entries count.");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_NR_HASH_FUNCS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > 15) {
+ fprintf(stderr,
+ "The bloom filter must use 1 to 15 hash functions.");
+ argp_usage(state);
+ }
+ args.nr_hash_funcs = ret;
+ break;
+ case ARG_VALUE_SIZE:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 2 || ret > 256) {
+ fprintf(stderr,
+ "Invalid value size. Must be between 2 and 256 bytes");
+ argp_usage(state);
+ }
+ args.value_size = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bloom_map_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr,
+ "The bloom filter benchmarks do not support consumer\n");
+ exit(1);
+ }
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+static void *map_prepare_thread(void *arg)
+{
+ __u32 val_size, i;
+ void *val = NULL;
+ int err;
+
+ val_size = args.value_size;
+ val = malloc(val_size);
+ if (!val) {
+ ctx.map_prepare_err = true;
+ goto done;
+ }
+
+ while (true) {
+ i = __atomic_add_fetch(&ctx.next_map_idx, 1, __ATOMIC_RELAXED);
+ if (i > args.nr_entries)
+ break;
+
+again:
+ /* Populate hashmap, bloom filter map, and array map with the same
+ * random values
+ */
+ err = syscall(__NR_getrandom, val, val_size, 0);
+ if (err != val_size) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to get random value: %d\n", -errno);
+ break;
+ }
+
+ if (ctx.use_hashmap) {
+ err = bpf_map_update_elem(ctx.hashmap_fd, val, val, BPF_NOEXIST);
+ if (err) {
+ if (err != -EEXIST) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to add elem to hashmap: %d\n",
+ -errno);
+ break;
+ }
+ goto again;
+ }
+ }
+
+ i--;
+
+ if (ctx.use_array_map) {
+ err = bpf_map_update_elem(ctx.array_map_fd, &i, val, 0);
+ if (err) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr, "failed to add elem to array map: %d\n", -errno);
+ break;
+ }
+ }
+
+ if (ctx.use_hashmap && !ctx.hashmap_use_bloom)
+ continue;
+
+ err = bpf_map_update_elem(ctx.bloom_fd, NULL, val, 0);
+ if (err) {
+ ctx.map_prepare_err = true;
+ fprintf(stderr,
+ "failed to add elem to bloom filter map: %d\n", -errno);
+ break;
+ }
+ }
+done:
+ pthread_mutex_lock(&ctx.map_done_mtx);
+ ctx.map_done = true;
+ pthread_cond_signal(&ctx.map_done_cv);
+ pthread_mutex_unlock(&ctx.map_done_mtx);
+
+ if (val)
+ free(val);
+
+ return NULL;
+}
+
+static void populate_maps(void)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ pthread_t map_thread;
+ int i, err, nr_rand_bytes;
+
+ ctx.bloom_fd = bpf_map__fd(ctx.skel->maps.bloom_map);
+ ctx.hashmap_fd = bpf_map__fd(ctx.skel->maps.hashmap);
+ ctx.array_map_fd = bpf_map__fd(ctx.skel->maps.array_map);
+
+ for (i = 0; i < nr_cpus; i++) {
+ err = pthread_create(&map_thread, NULL, map_prepare_thread,
+ NULL);
+ if (err) {
+ fprintf(stderr, "failed to create pthread: %d\n", -errno);
+ exit(1);
+ }
+ }
+
+ pthread_mutex_lock(&ctx.map_done_mtx);
+ while (!ctx.map_done)
+ pthread_cond_wait(&ctx.map_done_cv, &ctx.map_done_mtx);
+ pthread_mutex_unlock(&ctx.map_done_mtx);
+
+ if (ctx.map_prepare_err)
+ exit(1);
+
+ nr_rand_bytes = syscall(__NR_getrandom, ctx.skel->bss->rand_vals,
+ ctx.skel->rodata->nr_rand_bytes, 0);
+ if (nr_rand_bytes != ctx.skel->rodata->nr_rand_bytes) {
+ fprintf(stderr, "failed to get random bytes\n");
+ exit(1);
+ }
+}
+
+static void check_args(void)
+{
+ if (args.value_size < 8) {
+ __u64 nr_unique_entries = 1ULL << (args.value_size * 8);
+
+ if (args.nr_entries > nr_unique_entries) {
+ fprintf(stderr,
+ "Not enough unique values for the nr_entries requested\n");
+ exit(1);
+ }
+ }
+}
+
+static struct bloom_filter_bench *setup_skeleton(void)
+{
+ struct bloom_filter_bench *skel;
+
+ check_args();
+
+ setup_libbpf();
+
+ skel = bloom_filter_bench__open();
+ if (!skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ skel->rodata->hashmap_use_bloom = ctx.hashmap_use_bloom;
+ skel->rodata->count_false_hits = ctx.count_false_hits;
+
+ /* Resize number of entries */
+ bpf_map__set_max_entries(skel->maps.hashmap, args.nr_entries);
+
+ bpf_map__set_max_entries(skel->maps.array_map, args.nr_entries);
+
+ bpf_map__set_max_entries(skel->maps.bloom_map, args.nr_entries);
+
+ /* Set value size */
+ bpf_map__set_value_size(skel->maps.array_map, args.value_size);
+
+ bpf_map__set_value_size(skel->maps.bloom_map, args.value_size);
+
+ bpf_map__set_value_size(skel->maps.hashmap, args.value_size);
+
+ /* For the hashmap, we use the value as the key as well */
+ bpf_map__set_key_size(skel->maps.hashmap, args.value_size);
+
+ skel->bss->value_size = args.value_size;
+
+ /* Set number of hash functions */
+ bpf_map__set_map_extra(skel->maps.bloom_map, args.nr_hash_funcs);
+
+ if (bloom_filter_bench__load(skel)) {
+ fprintf(stderr, "failed to load skeleton\n");
+ exit(1);
+ }
+
+ return skel;
+}
+
+static void bloom_lookup_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_array_map = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void bloom_update_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_array_map = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_update);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void false_positive_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+ ctx.hashmap_use_bloom = true;
+ ctx.count_false_hits = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void hashmap_with_bloom_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+ ctx.hashmap_use_bloom = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void hashmap_no_bloom_setup(void)
+{
+ struct bpf_link *link;
+
+ ctx.use_hashmap = true;
+
+ ctx.skel = setup_skeleton();
+
+ populate_maps();
+
+ link = bpf_program__attach(ctx.skel->progs.bloom_hashmap_lookup);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ unsigned long total_hits = 0, total_drops = 0, total_false_hits = 0;
+ static unsigned long last_hits, last_drops, last_false_hits;
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int hit_key, drop_key, false_hit_key;
+ int i;
+
+ hit_key = ctx.skel->rodata->hit_key;
+ drop_key = ctx.skel->rodata->drop_key;
+ false_hit_key = ctx.skel->rodata->false_hit_key;
+
+ if (ctx.skel->bss->error != 0) {
+ fprintf(stderr, "error (%d) when searching the bloom filter\n",
+ ctx.skel->bss->error);
+ exit(1);
+ }
+
+ for (i = 0; i < nr_cpus; i++) {
+ struct stat *s = (void *)&ctx.skel->bss->percpu_stats[i];
+
+ total_hits += s->stats[hit_key];
+ total_drops += s->stats[drop_key];
+ total_false_hits += s->stats[false_hit_key];
+ }
+
+ res->hits = total_hits - last_hits;
+ res->drops = total_drops - last_drops;
+ res->false_hits = total_false_hits - last_false_hits;
+
+ last_hits = total_hits;
+ last_drops = total_drops;
+ last_false_hits = total_false_hits;
+}
+
+const struct bench bench_bloom_lookup = {
+ .name = "bloom-lookup",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = bloom_lookup_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_update = {
+ .name = "bloom-update",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = bloom_update_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_bloom_false_positive = {
+ .name = "bloom-false-positive",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = false_positive_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = false_hits_report_progress,
+ .report_final = false_hits_report_final,
+};
+
+const struct bench bench_hashmap_without_bloom = {
+ .name = "hashmap-without-bloom",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = hashmap_no_bloom_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_hashmap_with_bloom = {
+ .name = "hashmap-with-bloom",
+ .argp = &bench_bloom_map_argp,
+ .validate = validate,
+ .setup = hashmap_with_bloom_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
new file mode 100644
index 000000000000..ee1dc12c5e5e
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_full_update.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "bench.h"
+#include "bpf_hashmap_full_update_bench.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_full_update_bench *skel;
+} ctx;
+
+#define MAX_LOOP_NUM 10000
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd, i, max_entries;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_full_update_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = MAX_LOOP_NUM;
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ /* fill hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ max_entries = bpf_map__max_entries(ctx.skel->maps.hash_map_bench);
+ for (i = 0; i < max_entries; i++)
+ bpf_map_update_elem(map_fd, &i, &i, BPF_ANY);
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ int i;
+
+ for (i = 0; i < nr_cpus; i++) {
+ u64 time = ctx.skel->bss->percpu_time[i];
+
+ if (!time)
+ continue;
+
+ printf("%d:hash_map_full_perf %lld events per sec\n",
+ i, ctx.skel->bss->nr_loops * 1000000000ll / time);
+ }
+}
+
+const struct bench bench_bpf_hashmap_full_update = {
+ .name = "bpf-hashmap-full-update",
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..279ff1b8b5b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_hashmap_lookup.c
@@ -0,0 +1,277 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <sys/random.h>
+#include <argp.h>
+#include "bench.h"
+#include "bpf_hashmap_lookup.skel.h"
+#include "bpf_util.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_hashmap_lookup *skel;
+} ctx;
+
+/* only available to kernel, so define it here */
+#define BPF_MAX_LOOPS (1<<23)
+
+#define MAX_KEY_SIZE 1024 /* the size of the key map */
+
+static struct {
+ __u32 key_size;
+ __u32 map_flags;
+ __u32 max_entries;
+ __u32 nr_entries;
+ __u32 nr_loops;
+} args = {
+ .key_size = 4,
+ .map_flags = 0,
+ .max_entries = 1000,
+ .nr_entries = 500,
+ .nr_loops = 1000000,
+};
+
+enum {
+ ARG_KEY_SIZE = 8001,
+ ARG_MAP_FLAGS,
+ ARG_MAX_ENTRIES,
+ ARG_NR_ENTRIES,
+ ARG_NR_LOOPS,
+};
+
+static const struct argp_option opts[] = {
+ { "key_size", ARG_KEY_SIZE, "KEY_SIZE", 0,
+ "The hashmap key size (max 1024)"},
+ { "map_flags", ARG_MAP_FLAGS, "MAP_FLAGS", 0,
+ "The hashmap flags passed to BPF_MAP_CREATE"},
+ { "max_entries", ARG_MAX_ENTRIES, "MAX_ENTRIES", 0,
+ "The hashmap max entries"},
+ { "nr_entries", ARG_NR_ENTRIES, "NR_ENTRIES", 0,
+ "The number of entries to insert/lookup"},
+ { "nr_loops", ARG_NR_LOOPS, "NR_LOOPS", 0,
+ "The number of loops for the benchmark"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_KEY_SIZE:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > MAX_KEY_SIZE) {
+ fprintf(stderr, "invalid key_size");
+ argp_usage(state);
+ }
+ args.key_size = ret;
+ break;
+ case ARG_MAP_FLAGS:
+ ret = strtol(arg, NULL, 0);
+ if (ret < 0 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid map_flags");
+ argp_usage(state);
+ }
+ args.map_flags = ret;
+ break;
+ case ARG_MAX_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid max_entries");
+ argp_usage(state);
+ }
+ args.max_entries = ret;
+ break;
+ case ARG_NR_ENTRIES:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_entries");
+ argp_usage(state);
+ }
+ args.nr_entries = ret;
+ break;
+ case ARG_NR_LOOPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > BPF_MAX_LOOPS) {
+ fprintf(stderr, "invalid nr_loops: %ld (min=1 max=%u)\n",
+ ret, BPF_MAX_LOOPS);
+ argp_usage(state);
+ }
+ args.nr_loops = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_hashmap_lookup_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_entries > args.max_entries) {
+ fprintf(stderr, "args.nr_entries is too big! (max %u, got %u)\n",
+ args.max_entries, args.nr_entries);
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true) {
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+ }
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+}
+
+static inline void patch_key(u32 i, u32 *key)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ *key = i + 1;
+#else
+ *key = __builtin_bswap32(i + 1);
+#endif
+ /* the rest of key is random */
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+ int map_fd;
+ int ret;
+ int i;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_hashmap_lookup__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ bpf_map__set_max_entries(ctx.skel->maps.hash_map_bench, args.max_entries);
+ bpf_map__set_key_size(ctx.skel->maps.hash_map_bench, args.key_size);
+ bpf_map__set_value_size(ctx.skel->maps.hash_map_bench, 8);
+ bpf_map__set_map_flags(ctx.skel->maps.hash_map_bench, args.map_flags);
+
+ ctx.skel->bss->nr_entries = args.nr_entries;
+ ctx.skel->bss->nr_loops = args.nr_loops / args.nr_entries;
+
+ if (args.key_size > 4) {
+ for (i = 1; i < args.key_size/4; i++)
+ ctx.skel->bss->key[i] = 2654435761 * i;
+ }
+
+ ret = bpf_hashmap_lookup__load(ctx.skel);
+ if (ret) {
+ bpf_hashmap_lookup__destroy(ctx.skel);
+ fprintf(stderr, "failed to load map: %s", strerror(-ret));
+ exit(1);
+ }
+
+ /* fill in the hash_map */
+ map_fd = bpf_map__fd(ctx.skel->maps.hash_map_bench);
+ for (u64 i = 0; i < args.nr_entries; i++) {
+ patch_key(i, ctx.skel->bss->key);
+ bpf_map_update_elem(map_fd, ctx.skel->bss->key, &i, BPF_ANY);
+ }
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static inline double events_from_time(u64 time)
+{
+ if (time)
+ return args.nr_loops * 1000000000llu / time / 1000000.0L;
+
+ return 0;
+}
+
+static int compute_events(u64 *times, double *events_mean, double *events_stddev, u64 *mean_time)
+{
+ int i, n = 0;
+
+ *events_mean = 0;
+ *events_stddev = 0;
+ *mean_time = 0;
+
+ for (i = 0; i < 32; i++) {
+ if (!times[i])
+ break;
+ *mean_time += times[i];
+ *events_mean += events_from_time(times[i]);
+ n += 1;
+ }
+ if (!n)
+ return 0;
+
+ *mean_time /= n;
+ *events_mean /= n;
+
+ if (n > 1) {
+ for (i = 0; i < n; i++) {
+ double events_i = *events_mean - events_from_time(times[i]);
+ *events_stddev += events_i * events_i / (n - 1);
+ }
+ *events_stddev = sqrt(*events_stddev);
+ }
+
+ return n;
+}
+
+static void hashmap_report_final(struct bench_res res[], int res_cnt)
+{
+ unsigned int nr_cpus = bpf_num_possible_cpus();
+ double events_mean, events_stddev;
+ u64 mean_time;
+ int i, n;
+
+ for (i = 0; i < nr_cpus; i++) {
+ n = compute_events(ctx.skel->bss->percpu_times[i], &events_mean,
+ &events_stddev, &mean_time);
+ if (n == 0)
+ continue;
+
+ if (env.quiet) {
+ /* we expect only one cpu to be present */
+ if (env.affinity)
+ printf("%.3lf\n", events_mean);
+ else
+ printf("cpu%02d %.3lf\n", i, events_mean);
+ } else {
+ printf("cpu%02d: lookup %.3lfM ± %.3lfM events/sec"
+ " (approximated from %d samples of ~%lums)\n",
+ i, events_mean, 2*events_stddev,
+ n, mean_time / 1000000);
+ }
+ }
+}
+
+const struct bench bench_bpf_hashmap_lookup = {
+ .name = "bpf-hashmap-lookup",
+ .argp = &bench_hashmap_lookup_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = NULL,
+ .report_final = hashmap_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
new file mode 100644
index 000000000000..a705cfb2bccc
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_bpf_loop.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <argp.h>
+#include "bench.h"
+#include "bpf_loop_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+ struct bpf_loop_bench *skel;
+} ctx;
+
+static struct {
+ __u32 nr_loops;
+} args = {
+ .nr_loops = 10,
+};
+
+enum {
+ ARG_NR_LOOPS = 4000,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_loops", ARG_NR_LOOPS, "nr_loops", 0,
+ "Set number of loops for the bpf_loop helper"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_NR_LOOPS:
+ args.nr_loops = strtol(arg, NULL, 10);
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_bpf_loop_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ /* trigger the bpf program */
+ syscall(__NR_getpgid);
+
+ return NULL;
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup(void)
+{
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = bpf_loop_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ link = bpf_program__attach(ctx.skel->progs.benchmark);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+
+ ctx.skel->bss->nr_loops = args.nr_loops;
+}
+
+const struct bench bench_bpf_loop = {
+ .name = "bpf-loop",
+ .argp = &bench_bpf_loop_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = ops_report_progress,
+ .report_final = ops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
index befba7a82643..ba89ed3936b7 100644
--- a/tools/testing/selftests/bpf/benchs/bench_count.c
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -18,11 +18,6 @@ static void *count_global_producer(void *input)
return NULL;
}
-static void *count_global_consumer(void *input)
-{
- return NULL;
-}
-
static void count_global_measure(struct bench_res *res)
{
struct count_global_ctx *ctx = &count_global_ctx;
@@ -36,11 +31,11 @@ static struct count_local_ctx {
struct counter *hits;
} count_local_ctx;
-static void count_local_setup()
+static void count_local_setup(void)
{
struct count_local_ctx *ctx = &count_local_ctx;
- ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+ ctx->hits = calloc(env.producer_cnt, sizeof(*ctx->hits));
if (!ctx->hits)
exit(1);
}
@@ -56,11 +51,6 @@ static void *count_local_producer(void *input)
return NULL;
}
-static void *count_local_consumer(void *input)
-{
- return NULL;
-}
-
static void count_local_measure(struct bench_res *res)
{
struct count_local_ctx *ctx = &count_local_ctx;
@@ -74,7 +64,6 @@ static void count_local_measure(struct bench_res *res)
const struct bench bench_count_global = {
.name = "count-global",
.producer_thread = count_global_producer,
- .consumer_thread = count_global_consumer,
.measure = count_global_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -84,7 +73,6 @@ const struct bench bench_count_local = {
.name = "count-local",
.setup = count_local_setup,
.producer_thread = count_local_producer,
- .consumer_thread = count_local_consumer,
.measure = count_local_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_htab_mem.c b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
new file mode 100644
index 000000000000..926ee822143e
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_htab_mem.c
@@ -0,0 +1,351 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include <stdbool.h>
+#include <pthread.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/param.h>
+#include <fcntl.h>
+
+#include "bench.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "htab_mem_bench.skel.h"
+
+struct htab_mem_use_case {
+ const char *name;
+ const char **progs;
+ /* Do synchronization between addition thread and deletion thread */
+ bool need_sync;
+};
+
+static struct htab_mem_ctx {
+ const struct htab_mem_use_case *uc;
+ struct htab_mem_bench *skel;
+ pthread_barrier_t *notify;
+ int fd;
+} ctx;
+
+const char *ow_progs[] = {"overwrite", NULL};
+const char *batch_progs[] = {"batch_add_batch_del", NULL};
+const char *add_del_progs[] = {"add_only", "del_only", NULL};
+const static struct htab_mem_use_case use_cases[] = {
+ { .name = "overwrite", .progs = ow_progs },
+ { .name = "batch_add_batch_del", .progs = batch_progs },
+ { .name = "add_del_on_diff_cpu", .progs = add_del_progs, .need_sync = true },
+};
+
+static struct htab_mem_args {
+ u32 value_size;
+ const char *use_case;
+ bool preallocated;
+} args = {
+ .value_size = 8,
+ .use_case = "overwrite",
+ .preallocated = false,
+};
+
+enum {
+ ARG_VALUE_SIZE = 10000,
+ ARG_USE_CASE = 10001,
+ ARG_PREALLOCATED = 10002,
+};
+
+static const struct argp_option opts[] = {
+ { "value-size", ARG_VALUE_SIZE, "VALUE_SIZE", 0,
+ "Set the value size of hash map (default 8)" },
+ { "use-case", ARG_USE_CASE, "USE_CASE", 0,
+ "Set the use case of hash map: overwrite|batch_add_batch_del|add_del_on_diff_cpu" },
+ { "preallocated", ARG_PREALLOCATED, NULL, 0, "use preallocated hash map" },
+ {},
+};
+
+static error_t htab_mem_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_VALUE_SIZE:
+ args.value_size = strtoul(arg, NULL, 10);
+ if (args.value_size > 4096) {
+ fprintf(stderr, "too big value size %u\n", args.value_size);
+ argp_usage(state);
+ }
+ break;
+ case ARG_USE_CASE:
+ args.use_case = strdup(arg);
+ if (!args.use_case) {
+ fprintf(stderr, "no mem for use-case\n");
+ argp_usage(state);
+ }
+ break;
+ case ARG_PREALLOCATED:
+ args.preallocated = true;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_htab_mem_argp = {
+ .options = opts,
+ .parser = htab_mem_parse_arg,
+};
+
+static void htab_mem_validate(void)
+{
+ if (!strcmp(use_cases[2].name, args.use_case) && env.producer_cnt % 2) {
+ fprintf(stderr, "%s needs an even number of producers\n", args.use_case);
+ exit(1);
+ }
+}
+
+static int htab_mem_bench_init_barriers(void)
+{
+ pthread_barrier_t *barriers;
+ unsigned int i, nr;
+
+ if (!ctx.uc->need_sync)
+ return 0;
+
+ nr = (env.producer_cnt + 1) / 2;
+ barriers = calloc(nr, sizeof(*barriers));
+ if (!barriers)
+ return -1;
+
+ /* Used for synchronization between two threads */
+ for (i = 0; i < nr; i++)
+ pthread_barrier_init(&barriers[i], NULL, 2);
+
+ ctx.notify = barriers;
+ return 0;
+}
+
+static void htab_mem_bench_exit_barriers(void)
+{
+ unsigned int i, nr;
+
+ if (!ctx.notify)
+ return;
+
+ nr = (env.producer_cnt + 1) / 2;
+ for (i = 0; i < nr; i++)
+ pthread_barrier_destroy(&ctx.notify[i]);
+ free(ctx.notify);
+}
+
+static const struct htab_mem_use_case *htab_mem_find_use_case_or_exit(const char *name)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++) {
+ if (!strcmp(name, use_cases[i].name))
+ return &use_cases[i];
+ }
+
+ fprintf(stderr, "no such use-case: %s\n", name);
+ fprintf(stderr, "available use case:");
+ for (i = 0; i < ARRAY_SIZE(use_cases); i++)
+ fprintf(stderr, " %s", use_cases[i].name);
+ fprintf(stderr, "\n");
+ exit(1);
+}
+
+static void htab_mem_setup(void)
+{
+ struct bpf_map *map;
+ const char **names;
+ int err;
+
+ setup_libbpf();
+
+ ctx.uc = htab_mem_find_use_case_or_exit(args.use_case);
+ err = htab_mem_bench_init_barriers();
+ if (err) {
+ fprintf(stderr, "failed to init barrier\n");
+ exit(1);
+ }
+
+ ctx.fd = cgroup_setup_and_join("/htab_mem");
+ if (ctx.fd < 0)
+ goto cleanup;
+
+ ctx.skel = htab_mem_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ goto cleanup;
+ }
+
+ map = ctx.skel->maps.htab;
+ bpf_map__set_value_size(map, args.value_size);
+ /* Ensure that different CPUs can operate on different subset */
+ bpf_map__set_max_entries(map, MAX(8192, 64 * env.nr_cpus));
+ if (args.preallocated)
+ bpf_map__set_map_flags(map, bpf_map__map_flags(map) & ~BPF_F_NO_PREALLOC);
+
+ names = ctx.uc->progs;
+ while (*names) {
+ struct bpf_program *prog;
+
+ prog = bpf_object__find_program_by_name(ctx.skel->obj, *names);
+ if (!prog) {
+ fprintf(stderr, "no such program %s\n", *names);
+ goto cleanup;
+ }
+ bpf_program__set_autoload(prog, true);
+ names++;
+ }
+ ctx.skel->bss->nr_thread = env.producer_cnt;
+
+ err = htab_mem_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ goto cleanup;
+ }
+ err = htab_mem_bench__attach(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to attach skeleton\n");
+ goto cleanup;
+ }
+ return;
+
+cleanup:
+ htab_mem_bench__destroy(ctx.skel);
+ htab_mem_bench_exit_barriers();
+ if (ctx.fd >= 0) {
+ close(ctx.fd);
+ cleanup_cgroup_environment();
+ }
+ exit(1);
+}
+
+static void htab_mem_add_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Do addition */
+ (void)syscall(__NR_getpgid, 0);
+ /* Notify deletion thread to do deletion */
+ pthread_barrier_wait(notify);
+ /* Wait for deletion to complete */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void htab_mem_delete_fn(pthread_barrier_t *notify)
+{
+ while (true) {
+ /* Wait for addition to complete */
+ pthread_barrier_wait(notify);
+ /* Do deletion */
+ (void)syscall(__NR_getppid);
+ /* Notify addition thread to do addition */
+ pthread_barrier_wait(notify);
+ }
+}
+
+static void *htab_mem_producer(void *arg)
+{
+ pthread_barrier_t *notify;
+ int seq;
+
+ if (!ctx.uc->need_sync) {
+ while (true)
+ (void)syscall(__NR_getpgid, 0);
+ return NULL;
+ }
+
+ seq = (long)arg;
+ notify = &ctx.notify[seq / 2];
+ if (seq & 1)
+ htab_mem_delete_fn(notify);
+ else
+ htab_mem_add_fn(notify);
+ return NULL;
+}
+
+static void htab_mem_read_mem_cgrp_file(const char *name, unsigned long *value)
+{
+ char buf[32];
+ ssize_t got;
+ int fd;
+
+ fd = openat(ctx.fd, name, O_RDONLY);
+ if (fd < 0) {
+ /* cgroup v1 ? */
+ fprintf(stderr, "no %s\n", name);
+ *value = 0;
+ return;
+ }
+
+ got = read(fd, buf, sizeof(buf) - 1);
+ if (got <= 0) {
+ *value = 0;
+ return;
+ }
+ buf[got] = 0;
+
+ *value = strtoull(buf, NULL, 0);
+
+ close(fd);
+}
+
+static void htab_mem_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->op_cnt, 0) / env.producer_cnt;
+ htab_mem_read_mem_cgrp_file("memory.current", &res->gp_ct);
+}
+
+static void htab_mem_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double loop, mem;
+
+ loop = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ mem = res->gp_ct / 1048576.0;
+ printf("Iter %3d (%7.3lfus): ", iter, (delta_ns - 1000000000) / 1000.0);
+ printf("per-prod-op %7.2lfk/s, memory usage %7.2lfMiB\n", loop, mem);
+}
+
+static void htab_mem_report_final(struct bench_res res[], int res_cnt)
+{
+ double mem_mean = 0.0, mem_stddev = 0.0;
+ double loop_mean = 0.0, loop_stddev = 0.0;
+ unsigned long peak_mem;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ loop_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ mem_mean += res[i].gp_ct / 1048576.0 / (0.0 + res_cnt);
+ }
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++) {
+ loop_stddev += (loop_mean - res[i].hits / 1000.0) *
+ (loop_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ mem_stddev += (mem_mean - res[i].gp_ct / 1048576.0) *
+ (mem_mean - res[i].gp_ct / 1048576.0) /
+ (res_cnt - 1.0);
+ }
+ loop_stddev = sqrt(loop_stddev);
+ mem_stddev = sqrt(mem_stddev);
+ }
+
+ htab_mem_read_mem_cgrp_file("memory.peak", &peak_mem);
+ printf("Summary: per-prod-op %7.2lf \u00B1 %7.2lfk/s, memory usage %7.2lf \u00B1 %7.2lfMiB,"
+ " peak memory usage %7.2lfMiB\n",
+ loop_mean, loop_stddev, mem_mean, mem_stddev, peak_mem / 1048576.0);
+
+ close(ctx.fd);
+ cleanup_cgroup_environment();
+}
+
+const struct bench bench_htab_mem = {
+ .name = "htab-mem",
+ .argp = &bench_htab_mem_argp,
+ .validate = htab_mem_validate,
+ .setup = htab_mem_setup,
+ .producer_thread = htab_mem_producer,
+ .measure = htab_mem_measure,
+ .report_progress = htab_mem_report_progress,
+ .report_final = htab_mem_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage.c b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
new file mode 100644
index 000000000000..452499428ceb
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage.c
@@ -0,0 +1,282 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+#include <linux/btf.h>
+
+#include "local_storage_bench.skel.h"
+#include "bench.h"
+
+#include <test_btf.h>
+
+static struct {
+ __u32 nr_maps;
+ __u32 hashmap_nr_keys_used;
+} args = {
+ .nr_maps = 1000,
+ .hashmap_nr_keys_used = 1000,
+};
+
+enum {
+ ARG_NR_MAPS = 6000,
+ ARG_HASHMAP_NR_KEYS_USED = 6001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_maps", ARG_NR_MAPS, "NR_MAPS", 0,
+ "Set number of local_storage maps"},
+ { "hashmap_nr_keys_used", ARG_HASHMAP_NR_KEYS_USED, "NR_KEYS",
+ 0, "When doing hashmap test, set number of hashmap keys test uses"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_MAPS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_maps");
+ argp_usage(state);
+ }
+ args.nr_maps = ret;
+ break;
+ case ARG_HASHMAP_NR_KEYS_USED:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid hashmap_nr_keys_used");
+ argp_usage(state);
+ }
+ args.hashmap_nr_keys_used = ret;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+/* Keep in sync w/ array of maps in bpf */
+#define MAX_NR_MAPS 1000
+/* keep in sync w/ same define in bpf */
+#define HASHMAP_SZ 4194304
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_maps > MAX_NR_MAPS) {
+ fprintf(stderr, "nr_maps must be <= 1000\n");
+ exit(1);
+ }
+
+ if (args.hashmap_nr_keys_used > HASHMAP_SZ) {
+ fprintf(stderr, "hashmap_nr_keys_used must be <= %u\n", HASHMAP_SZ);
+ exit(1);
+ }
+}
+
+static struct {
+ struct local_storage_bench *skel;
+ void *bpf_obj;
+ struct bpf_map *array_of_maps;
+} ctx;
+
+static void prepopulate_hashmap(int fd)
+{
+ int i, key, val;
+
+ /* local_storage gets will have BPF_LOCAL_STORAGE_GET_F_CREATE flag set, so
+ * populate the hashmap for a similar comparison
+ */
+ for (i = 0; i < HASHMAP_SZ; i++) {
+ key = val = i;
+ if (bpf_map_update_elem(fd, &key, &val, 0)) {
+ fprintf(stderr, "Error prepopulating hashmap (key %d)\n", key);
+ exit(1);
+ }
+ }
+}
+
+static void __setup(struct bpf_program *prog, bool hashmap)
+{
+ struct bpf_map *inner_map;
+ int i, fd, mim_fd, err;
+
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts);
+
+ if (!hashmap)
+ create_opts.map_flags = BPF_F_NO_PREALLOC;
+
+ ctx.skel->rodata->num_maps = args.nr_maps;
+ ctx.skel->rodata->hashmap_num_keys = args.hashmap_nr_keys_used;
+ inner_map = bpf_map__inner_map(ctx.array_of_maps);
+ create_opts.btf_key_type_id = bpf_map__btf_key_type_id(inner_map);
+ create_opts.btf_value_type_id = bpf_map__btf_value_type_id(inner_map);
+
+ err = local_storage_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "Error loading skeleton\n");
+ goto err_out;
+ }
+
+ create_opts.btf_fd = bpf_object__btf_fd(ctx.skel->obj);
+
+ mim_fd = bpf_map__fd(ctx.array_of_maps);
+ if (mim_fd < 0) {
+ fprintf(stderr, "Error getting map_in_map fd\n");
+ goto err_out;
+ }
+
+ for (i = 0; i < args.nr_maps; i++) {
+ if (hashmap)
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), HASHMAP_SZ, &create_opts);
+ else
+ fd = bpf_map_create(BPF_MAP_TYPE_TASK_STORAGE, NULL, sizeof(int),
+ sizeof(int), 0, &create_opts);
+ if (fd < 0) {
+ fprintf(stderr, "Error creating map %d: %d\n", i, fd);
+ goto err_out;
+ }
+
+ if (hashmap)
+ prepopulate_hashmap(fd);
+
+ err = bpf_map_update_elem(mim_fd, &i, &fd, 0);
+ if (err) {
+ fprintf(stderr, "Error updating array-of-maps w/ map %d\n", i);
+ goto err_out;
+ }
+ }
+
+ if (!bpf_program__attach(prog)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void hashmap_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_hash_maps;
+ skel->rodata->use_hashmap = 1;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, true);
+}
+
+static void local_storage_cache_get_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 0;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void local_storage_cache_get_interleaved_setup(void)
+{
+ struct local_storage_bench *skel;
+
+ setup_libbpf();
+
+ skel = local_storage_bench__open();
+ ctx.skel = skel;
+ ctx.array_of_maps = skel->maps.array_of_local_storage_maps;
+ skel->rodata->use_hashmap = 0;
+ skel->rodata->interleave = 1;
+
+ __setup(skel->progs.get_local, false);
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+ res->important_hits = atomic_swap(&ctx.skel->bss->important_hits, 0);
+}
+
+static inline void trigger_bpf_program(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ trigger_bpf_program();
+
+ return NULL;
+}
+
+/* cache sequential and interleaved get benchs test local_storage get
+ * performance, specifically they demonstrate performance cliff of
+ * current list-plus-cache local_storage model.
+ *
+ * cache sequential get: call bpf_task_storage_get on n maps in order
+ * cache interleaved get: like "sequential get", but interleave 4 calls to the
+ * 'important' map (idx 0 in array_of_maps) for every 10 calls. Goal
+ * is to mimic environment where many progs are accessing their local_storage
+ * maps, with 'our' prog needing to access its map more often than others
+ */
+const struct bench bench_local_storage_cache_seq_get = {
+ .name = "local-storage-cache-seq-get",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = local_storage_cache_get_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_interleaved_get = {
+ .name = "local-storage-cache-int-get",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = local_storage_cache_get_interleaved_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
+
+const struct bench bench_local_storage_cache_hashmap_control = {
+ .name = "local-storage-cache-hashmap-control",
+ .argp = &bench_local_storage_argp,
+ .validate = validate,
+ .setup = hashmap_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = local_storage_report_progress,
+ .report_final = local_storage_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
new file mode 100644
index 000000000000..b36de42ee4d9
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_create.c
@@ -0,0 +1,258 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <pthread.h>
+#include <argp.h>
+
+#include "bench.h"
+#include "bench_local_storage_create.skel.h"
+
+struct thread {
+ int *fds;
+ pthread_t *pthds;
+ int *pthd_results;
+};
+
+static struct bench_local_storage_create *skel;
+static struct thread *threads;
+static long create_owner_errs;
+static int storage_type = BPF_MAP_TYPE_SK_STORAGE;
+static int batch_sz = 32;
+
+enum {
+ ARG_BATCH_SZ = 9000,
+ ARG_STORAGE_TYPE = 9001,
+};
+
+static const struct argp_option opts[] = {
+ { "batch-size", ARG_BATCH_SZ, "BATCH_SIZE", 0,
+ "The number of storage creations in each batch" },
+ { "storage-type", ARG_STORAGE_TYPE, "STORAGE_TYPE", 0,
+ "The type of local storage to test (socket or task)" },
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ int ret;
+
+ switch (key) {
+ case ARG_BATCH_SZ:
+ ret = atoi(arg);
+ if (ret < 1) {
+ fprintf(stderr, "invalid batch-size\n");
+ argp_usage(state);
+ }
+ batch_sz = ret;
+ break;
+ case ARG_STORAGE_TYPE:
+ if (!strcmp(arg, "task")) {
+ storage_type = BPF_MAP_TYPE_TASK_STORAGE;
+ } else if (!strcmp(arg, "socket")) {
+ storage_type = BPF_MAP_TYPE_SK_STORAGE;
+ } else {
+ fprintf(stderr, "invalid storage-type (socket or task)\n");
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_create_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+static void validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr,
+ "local-storage-create benchmark does not need consumer\n");
+ exit(1);
+ }
+}
+
+static void setup(void)
+{
+ int i;
+
+ skel = bench_local_storage_create__open_and_load();
+ if (!skel) {
+ fprintf(stderr, "error loading skel\n");
+ exit(1);
+ }
+
+ skel->bss->bench_pid = getpid();
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ if (!bpf_program__attach(skel->progs.socket_post_create)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ } else {
+ if (!bpf_program__attach(skel->progs.sched_process_fork)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+ }
+
+ if (!bpf_program__attach(skel->progs.kmalloc)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ exit(1);
+ }
+
+ threads = calloc(env.producer_cnt, sizeof(*threads));
+
+ if (!threads) {
+ fprintf(stderr, "cannot alloc thread_res\n");
+ exit(1);
+ }
+
+ for (i = 0; i < env.producer_cnt; i++) {
+ struct thread *t = &threads[i];
+
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE) {
+ t->fds = malloc(batch_sz * sizeof(*t->fds));
+ if (!t->fds) {
+ fprintf(stderr, "cannot alloc t->fds\n");
+ exit(1);
+ }
+ } else {
+ t->pthds = malloc(batch_sz * sizeof(*t->pthds));
+ if (!t->pthds) {
+ fprintf(stderr, "cannot alloc t->pthds\n");
+ exit(1);
+ }
+ t->pthd_results = malloc(batch_sz * sizeof(*t->pthd_results));
+ if (!t->pthd_results) {
+ fprintf(stderr, "cannot alloc t->pthd_results\n");
+ exit(1);
+ }
+ }
+ }
+}
+
+static void measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&skel->bss->create_cnts, 0);
+ res->drops = atomic_swap(&skel->bss->kmalloc_cnts, 0);
+}
+
+static void *sk_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ int *fds = t->fds;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ fds[i] = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (fds[i] == -1)
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (fds[i] != -1)
+ close(fds[i]);
+ }
+ }
+
+ return NULL;
+}
+
+static void *thread_func(void *arg)
+{
+ return NULL;
+}
+
+static void *task_producer(void *input)
+{
+ struct thread *t = &threads[(long)(input)];
+ pthread_t *pthds = t->pthds;
+ int *pthd_results = t->pthd_results;
+ int i;
+
+ while (true) {
+ for (i = 0; i < batch_sz; i++) {
+ pthd_results[i] = pthread_create(&pthds[i], NULL, thread_func, NULL);
+ if (pthd_results[i])
+ atomic_inc(&create_owner_errs);
+ }
+
+ for (i = 0; i < batch_sz; i++) {
+ if (!pthd_results[i])
+ pthread_join(pthds[i], NULL);;
+ }
+ }
+
+ return NULL;
+}
+
+static void *producer(void *input)
+{
+ if (storage_type == BPF_MAP_TYPE_SK_STORAGE)
+ return sk_producer(input);
+ else
+ return task_producer(input);
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ double creates_per_sec, kmallocs_per_create;
+
+ creates_per_sec = res->hits / 1000.0 / (delta_ns / 1000000000.0);
+ kmallocs_per_create = (double)res->drops / res->hits;
+
+ printf("Iter %3d (%7.3lfus): ",
+ iter, (delta_ns - 1000000000) / 1000.0);
+ printf("creates %8.3lfk/s (%7.3lfk/prod), ",
+ creates_per_sec, creates_per_sec / env.producer_cnt);
+ printf("%3.2lf kmallocs/create\n", kmallocs_per_create);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ double creates_mean = 0.0, creates_stddev = 0.0;
+ long total_creates = 0, total_kmallocs = 0;
+ int i;
+
+ for (i = 0; i < res_cnt; i++) {
+ creates_mean += res[i].hits / 1000.0 / (0.0 + res_cnt);
+ total_creates += res[i].hits;
+ total_kmallocs += res[i].drops;
+ }
+
+ if (res_cnt > 1) {
+ for (i = 0; i < res_cnt; i++)
+ creates_stddev += (creates_mean - res[i].hits / 1000.0) *
+ (creates_mean - res[i].hits / 1000.0) /
+ (res_cnt - 1.0);
+ creates_stddev = sqrt(creates_stddev);
+ }
+ printf("Summary: creates %8.3lf \u00B1 %5.3lfk/s (%7.3lfk/prod), ",
+ creates_mean, creates_stddev, creates_mean / env.producer_cnt);
+ printf("%4.2lf kmallocs/create\n", (double)total_kmallocs / total_creates);
+ if (create_owner_errs || skel->bss->create_errs)
+ printf("%s() errors %ld create_errs %ld\n",
+ storage_type == BPF_MAP_TYPE_SK_STORAGE ?
+ "socket" : "pthread_create",
+ create_owner_errs,
+ skel->bss->create_errs);
+}
+
+/* Benchmark performance of creating bpf local storage */
+const struct bench bench_local_storage_create = {
+ .name = "local-storage-create",
+ .argp = &bench_local_storage_create_argp,
+ .validate = validate,
+ .setup = setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
new file mode 100644
index 000000000000..edf0b00418c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_local_storage_rcu_tasks_trace.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <argp.h>
+
+#include <sys/prctl.h>
+#include "local_storage_rcu_tasks_trace_bench.skel.h"
+#include "bench.h"
+
+#include <signal.h>
+
+static struct {
+ __u32 nr_procs;
+ __u32 kthread_pid;
+} args = {
+ .nr_procs = 1000,
+ .kthread_pid = 0,
+};
+
+enum {
+ ARG_NR_PROCS = 7000,
+ ARG_KTHREAD_PID = 7001,
+};
+
+static const struct argp_option opts[] = {
+ { "nr_procs", ARG_NR_PROCS, "NR_PROCS", 0,
+ "Set number of user processes to spin up"},
+ { "kthread_pid", ARG_KTHREAD_PID, "PID", 0,
+ "Pid of rcu_tasks_trace kthread for ticks tracking"},
+ {},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ long ret;
+
+ switch (key) {
+ case ARG_NR_PROCS:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1 || ret > UINT_MAX) {
+ fprintf(stderr, "invalid nr_procs\n");
+ argp_usage(state);
+ }
+ args.nr_procs = ret;
+ break;
+ case ARG_KTHREAD_PID:
+ ret = strtol(arg, NULL, 10);
+ if (ret < 1) {
+ fprintf(stderr, "invalid kthread_pid\n");
+ argp_usage(state);
+ }
+ args.kthread_pid = ret;
+ break;
+break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_local_storage_rcu_tasks_trace_argp = {
+ .options = opts,
+ .parser = parse_arg,
+};
+
+#define MAX_SLEEP_PROCS 150000
+
+static void validate(void)
+{
+ if (env.producer_cnt != 1) {
+ fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+ exit(1);
+ }
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+
+ if (args.nr_procs > MAX_SLEEP_PROCS) {
+ fprintf(stderr, "benchmark supports up to %u sleeper procs!\n",
+ MAX_SLEEP_PROCS);
+ exit(1);
+ }
+}
+
+static long kthread_pid_ticks(void)
+{
+ char procfs_path[100];
+ long stime;
+ FILE *f;
+
+ if (!args.kthread_pid)
+ return -1;
+
+ sprintf(procfs_path, "/proc/%u/stat", args.kthread_pid);
+ f = fopen(procfs_path, "r");
+ if (!f) {
+ fprintf(stderr, "couldn't open %s, exiting\n", procfs_path);
+ goto err_out;
+ }
+ if (fscanf(f, "%*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %*s %ld", &stime) != 1) {
+ fprintf(stderr, "fscanf of %s failed, exiting\n", procfs_path);
+ goto err_out;
+ }
+ fclose(f);
+ return stime;
+
+err_out:
+ if (f)
+ fclose(f);
+ exit(1);
+ return 0;
+}
+
+static struct {
+ struct local_storage_rcu_tasks_trace_bench *skel;
+ long prev_kthread_stime;
+} ctx;
+
+static void sleep_and_loop(void)
+{
+ while (true) {
+ sleep(rand() % 4);
+ syscall(__NR_getpgid);
+ }
+}
+
+static void local_storage_tasks_trace_setup(void)
+{
+ int i, err, forkret, runner_pid;
+
+ runner_pid = getpid();
+
+ for (i = 0; i < args.nr_procs; i++) {
+ forkret = fork();
+ if (forkret < 0) {
+ fprintf(stderr, "Error forking sleeper proc %u of %u, exiting\n", i,
+ args.nr_procs);
+ goto err_out;
+ }
+
+ if (!forkret) {
+ err = prctl(PR_SET_PDEATHSIG, SIGKILL);
+ if (err < 0) {
+ fprintf(stderr, "prctl failed with err %d, exiting\n", errno);
+ goto err_out;
+ }
+
+ if (getppid() != runner_pid) {
+ fprintf(stderr, "Runner died while spinning up procs, exiting\n");
+ goto err_out;
+ }
+ sleep_and_loop();
+ }
+ }
+ printf("Spun up %u procs (our pid %d)\n", args.nr_procs, runner_pid);
+
+ setup_libbpf();
+
+ ctx.skel = local_storage_rcu_tasks_trace_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "Error doing open_and_load, exiting\n");
+ goto err_out;
+ }
+
+ ctx.prev_kthread_stime = kthread_pid_ticks();
+
+ if (!bpf_program__attach(ctx.skel->progs.get_local)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.pregp_step)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ if (!bpf_program__attach(ctx.skel->progs.postgp)) {
+ fprintf(stderr, "Error attaching bpf program\n");
+ goto err_out;
+ }
+
+ return;
+err_out:
+ exit(1);
+}
+
+static void measure(struct bench_res *res)
+{
+ long ticks;
+
+ res->gp_ct = atomic_swap(&ctx.skel->bss->gp_hits, 0);
+ res->gp_ns = atomic_swap(&ctx.skel->bss->gp_times, 0);
+ ticks = kthread_pid_ticks();
+ res->stime = ticks - ctx.prev_kthread_stime;
+ ctx.prev_kthread_stime = ticks;
+}
+
+static void *producer(void *input)
+{
+ while (true)
+ syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+ if (ctx.skel->bss->unexpected) {
+ fprintf(stderr, "Error: Unexpected order of bpf prog calls (postgp after pregp).");
+ fprintf(stderr, "Data can't be trusted, exiting\n");
+ exit(1);
+ }
+
+ if (env.quiet)
+ return;
+
+ printf("Iter %d\t avg tasks_trace grace period latency\t%lf ns\n",
+ iter, res->gp_ns / (double)res->gp_ct);
+ printf("Iter %d\t avg ticks per tasks_trace grace period\t%lf\n",
+ iter, res->stime / (double)res->gp_ct);
+}
+
+static void report_final(struct bench_res res[], int res_cnt)
+{
+ struct basic_stats gp_stat;
+
+ grace_period_latency_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY tasks_trace grace period latency");
+ printf("\tavg %.3lf us\tstddev %.3lf us\n", gp_stat.mean, gp_stat.stddev);
+ grace_period_ticks_basic_stats(res, res_cnt, &gp_stat);
+ printf("SUMMARY ticks per tasks_trace grace period");
+ printf("\tavg %.3lf\tstddev %.3lf\n", gp_stat.mean, gp_stat.stddev);
+}
+
+/* local-storage-tasks-trace: Benchmark performance of BPF local_storage's use
+ * of RCU Tasks-Trace.
+ *
+ * Stress RCU Tasks Trace by forking many tasks, all of which do no work aside
+ * from sleep() loop, and creating/destroying BPF task-local storage on wakeup.
+ * The number of forked tasks is configurable.
+ *
+ * exercising code paths which call call_rcu_tasks_trace while there are many
+ * thousands of tasks on the system should result in RCU Tasks-Trace having to
+ * do a noticeable amount of work.
+ *
+ * This should be observable by measuring rcu_tasks_trace_kthread CPU usage
+ * after the grace period has ended, or by measuring grace period latency.
+ *
+ * This benchmark uses both approaches, attaching to rcu_tasks_trace_pregp_step
+ * and rcu_tasks_trace_postgp functions to measure grace period latency and
+ * using /proc/PID/stat to measure rcu_tasks_trace_kthread kernel ticks
+ */
+const struct bench bench_local_storage_tasks_trace = {
+ .name = "local-storage-tasks-trace",
+ .argp = &bench_local_storage_rcu_tasks_trace_argp,
+ .validate = validate,
+ .setup = local_storage_tasks_trace_setup,
+ .producer_thread = producer,
+ .measure = measure,
+ .report_progress = report_progress,
+ .report_final = report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
index e74cff40f4fe..bf66893c7a33 100644
--- a/tools/testing/selftests/bpf/benchs/bench_rename.c
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -11,14 +11,14 @@ static struct ctx {
int fd;
} ctx;
-static void validate()
+static void validate(void)
{
if (env.producer_cnt != 1) {
fprintf(stderr, "benchmark doesn't support multi-producer!\n");
exit(1);
}
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -43,7 +43,7 @@ static void measure(struct bench_res *res)
res->hits = atomic_swap(&ctx.hits.value, 0);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
@@ -65,64 +65,52 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
-static void setup_base()
+static void setup_base(void)
{
setup_ctx();
}
-static void setup_kprobe()
+static void setup_kprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog1);
}
-static void setup_kretprobe()
+static void setup_kretprobe(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog2);
}
-static void setup_rawtp()
+static void setup_rawtp(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog3);
}
-static void setup_fentry()
+static void setup_fentry(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog4);
}
-static void setup_fexit()
+static void setup_fexit(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.prog5);
}
-static void setup_fmodret()
-{
- setup_ctx();
- attach_bpf(ctx.skel->progs.prog6);
-}
-
-static void *consumer(void *input)
-{
- return NULL;
-}
-
const struct bench bench_rename_base = {
.name = "rename-base",
.validate = validate,
.setup = setup_base,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -133,7 +121,6 @@ const struct bench bench_rename_kprobe = {
.validate = validate,
.setup = setup_kprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -144,7 +131,6 @@ const struct bench bench_rename_kretprobe = {
.validate = validate,
.setup = setup_kretprobe,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -155,7 +141,6 @@ const struct bench bench_rename_rawtp = {
.validate = validate,
.setup = setup_rawtp,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -166,7 +151,6 @@ const struct bench bench_rename_fentry = {
.validate = validate,
.setup = setup_fentry,
.producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -177,18 +161,6 @@ const struct bench bench_rename_fexit = {
.validate = validate,
.setup = setup_fexit,
.producer_thread = producer,
- .consumer_thread = consumer,
- .measure = measure,
- .report_progress = hits_drops_report_progress,
- .report_final = hits_drops_report_final,
-};
-
-const struct bench bench_rename_fmodret = {
- .name = "rename-fmodret",
- .validate = validate,
- .setup = setup_fmodret,
- .producer_thread = producer,
- .consumer_thread = consumer,
.measure = measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
index da87c7f31891..e1ee979e6acc 100644
--- a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -88,15 +88,15 @@ const struct argp bench_ringbufs_argp = {
static struct counter buf_hits;
-static inline void bufs_trigger_batch()
+static inline void bufs_trigger_batch(void)
{
(void)syscall(__NR_getpgid);
}
-static void bufs_validate()
+static void bufs_validate(void)
{
if (env.consumer_cnt != 1) {
- fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+ fprintf(stderr, "rb-libbpf benchmark needs one consumer!\n");
exit(1);
}
@@ -132,7 +132,7 @@ static void ringbuf_libbpf_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct ringbuf_bench *ringbuf_setup_skeleton()
+static struct ringbuf_bench *ringbuf_setup_skeleton(void)
{
struct ringbuf_bench *skel;
@@ -151,7 +151,7 @@ static struct ringbuf_bench *ringbuf_setup_skeleton()
/* record data + header take 16 bytes */
skel->rodata->wakeup_data_size = args.sample_rate * 16;
- bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+ bpf_map__set_max_entries(skel->maps.ringbuf, args.ringbuf_sz);
if (ringbuf_bench__load(skel)) {
fprintf(stderr, "failed to load skeleton\n");
@@ -167,7 +167,7 @@ static int buf_process_sample(void *ctx, void *data, size_t len)
return 0;
}
-static void ringbuf_libbpf_setup()
+static void ringbuf_libbpf_setup(void)
{
struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
struct bpf_link *link;
@@ -181,7 +181,7 @@ static void ringbuf_libbpf_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
@@ -223,7 +223,7 @@ static void ringbuf_custom_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static void ringbuf_custom_setup()
+static void ringbuf_custom_setup(void)
{
struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
const size_t page_size = getpagesize();
@@ -271,7 +271,7 @@ static void ringbuf_custom_setup()
}
link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -319,7 +319,7 @@ static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
smp_store_release(r->consumer_pos, cons_pos);
else
break;
- };
+ }
}
static void *ringbuf_custom_consumer(void *input)
@@ -352,7 +352,7 @@ static void perfbuf_measure(struct bench_res *res)
res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
}
-static struct perfbuf_bench *perfbuf_setup_skeleton()
+static struct perfbuf_bench *perfbuf_setup_skeleton(void)
{
struct perfbuf_bench *skel;
@@ -390,21 +390,16 @@ perfbuf_process_sample_raw(void *input_ctx, int cpu,
return LIBBPF_PERF_EVENT_CONT;
}
-static void perfbuf_libbpf_setup()
+static void perfbuf_libbpf_setup(void)
{
struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
struct perf_event_attr attr;
- struct perf_buffer_raw_opts pb_opts = {
- .event_cb = perfbuf_process_sample_raw,
- .ctx = (void *)(long)0,
- .attr = &attr,
- };
struct bpf_link *link;
ctx->skel = perfbuf_setup_skeleton();
memset(&attr, 0, sizeof(attr));
- attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+ attr.config = PERF_COUNT_SW_BPF_OUTPUT;
attr.type = PERF_TYPE_SOFTWARE;
attr.sample_type = PERF_SAMPLE_RAW;
/* notify only every Nth sample */
@@ -423,14 +418,15 @@ static void perfbuf_libbpf_setup()
}
ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
- args.perfbuf_sz, &pb_opts);
+ args.perfbuf_sz, &attr,
+ perfbuf_process_sample_raw, NULL, NULL);
if (!ctx->perfbuf) {
fprintf(stderr, "failed to create perfbuf\n");
exit(1);
}
link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program\n");
exit(1);
}
@@ -522,6 +518,7 @@ static void *perfbuf_custom_consumer(void *input)
const struct bench bench_rb_libbpf = {
.name = "rb-libbpf",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = ringbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
@@ -533,6 +530,7 @@ const struct bench bench_rb_libbpf = {
const struct bench bench_rb_custom = {
.name = "rb-custom",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = ringbuf_custom_setup,
.producer_thread = bufs_sample_producer,
@@ -544,6 +542,7 @@ const struct bench bench_rb_custom = {
const struct bench bench_pb_libbpf = {
.name = "pb-libbpf",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = perfbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
@@ -555,6 +554,7 @@ const struct bench bench_pb_libbpf = {
const struct bench bench_pb_custom = {
.name = "pb-custom",
+ .argp = &bench_ringbufs_argp,
.validate = bufs_validate,
.setup = perfbuf_libbpf_setup,
.producer_thread = bufs_sample_producer,
diff --git a/tools/testing/selftests/bpf/benchs/bench_strncmp.c b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
new file mode 100644
index 000000000000..a5e1428fd7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_strncmp.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <argp.h>
+#include "bench.h"
+#include "strncmp_bench.skel.h"
+
+static struct strncmp_ctx {
+ struct strncmp_bench *skel;
+} ctx;
+
+static struct strncmp_args {
+ u32 cmp_str_len;
+} args = {
+ .cmp_str_len = 32,
+};
+
+enum {
+ ARG_CMP_STR_LEN = 5000,
+};
+
+static const struct argp_option opts[] = {
+ { "cmp-str-len", ARG_CMP_STR_LEN, "CMP_STR_LEN", 0,
+ "Set the length of compared string" },
+ {},
+};
+
+static error_t strncmp_parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case ARG_CMP_STR_LEN:
+ args.cmp_str_len = strtoul(arg, NULL, 10);
+ if (!args.cmp_str_len ||
+ args.cmp_str_len >= sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "Invalid cmp str len (limit %zu)\n",
+ sizeof(ctx.skel->bss->str));
+ argp_usage(state);
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+const struct argp bench_strncmp_argp = {
+ .options = opts,
+ .parser = strncmp_parse_arg,
+};
+
+static void strncmp_validate(void)
+{
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "strncmp benchmark doesn't support consumer!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_setup(void)
+{
+ int err;
+ char *target;
+ size_t i, sz;
+
+ sz = sizeof(ctx.skel->rodata->target);
+ if (!sz || sz < sizeof(ctx.skel->bss->str)) {
+ fprintf(stderr, "invalid string size (target %zu, src %zu)\n",
+ sz, sizeof(ctx.skel->bss->str));
+ exit(1);
+ }
+
+ setup_libbpf();
+
+ ctx.skel = strncmp_bench__open();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ srandom(time(NULL));
+ target = ctx.skel->rodata->target;
+ for (i = 0; i < sz - 1; i++)
+ target[i] = '1' + random() % 9;
+ target[sz - 1] = '\0';
+
+ ctx.skel->rodata->cmp_str_len = args.cmp_str_len;
+
+ memcpy(ctx.skel->bss->str, target, args.cmp_str_len);
+ ctx.skel->bss->str[args.cmp_str_len] = '\0';
+ /* Make bss->str < rodata->target */
+ ctx.skel->bss->str[args.cmp_str_len - 1] -= 1;
+
+ err = strncmp_bench__load(ctx.skel);
+ if (err) {
+ fprintf(stderr, "failed to load skeleton\n");
+ strncmp_bench__destroy(ctx.skel);
+ exit(1);
+ }
+}
+
+static void strncmp_attach_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(prog);
+ if (!link) {
+ fprintf(stderr, "failed to attach program!\n");
+ exit(1);
+ }
+}
+
+static void strncmp_no_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_no_helper);
+}
+
+static void strncmp_helper_setup(void)
+{
+ strncmp_setup();
+ strncmp_attach_prog(ctx.skel->progs.strncmp_helper);
+}
+
+static void *strncmp_producer(void *ctx)
+{
+ while (true)
+ (void)syscall(__NR_getpgid);
+ return NULL;
+}
+
+static void strncmp_measure(struct bench_res *res)
+{
+ res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+const struct bench bench_strncmp_no_helper = {
+ .name = "strncmp-no-helper",
+ .argp = &bench_strncmp_argp,
+ .validate = strncmp_validate,
+ .setup = strncmp_no_helper_setup,
+ .producer_thread = strncmp_producer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_strncmp_helper = {
+ .name = "strncmp-helper",
+ .argp = &bench_strncmp_argp,
+ .validate = strncmp_validate,
+ .setup = strncmp_helper_setup,
+ .producer_thread = strncmp_producer,
+ .measure = strncmp_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
index 49c22832f216..ace0d1011a8e 100644
--- a/tools/testing/selftests/bpf/benchs/bench_trigger.c
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2020 Facebook */
#include "bench.h"
#include "trigger_bench.skel.h"
+#include "trace_helpers.h"
/* BPF triggering benchmarks */
static struct trigger_ctx {
@@ -10,10 +11,10 @@ static struct trigger_ctx {
static struct counter base_hits;
-static void trigger_validate()
+static void trigger_validate(void)
{
- if (env.consumer_cnt != 1) {
- fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+ if (env.consumer_cnt != 0) {
+ fprintf(stderr, "benchmark doesn't support consumer!\n");
exit(1);
}
}
@@ -44,7 +45,7 @@ static void trigger_measure(struct bench_res *res)
res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
}
-static void setup_ctx()
+static void setup_ctx(void)
{
setup_libbpf();
@@ -60,52 +61,195 @@ static void attach_bpf(struct bpf_program *prog)
struct bpf_link *link;
link = bpf_program__attach(prog);
- if (IS_ERR(link)) {
+ if (!link) {
fprintf(stderr, "failed to attach program!\n");
exit(1);
}
}
-static void trigger_tp_setup()
+static void trigger_tp_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_tp);
}
-static void trigger_rawtp_setup()
+static void trigger_rawtp_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
}
-static void trigger_kprobe_setup()
+static void trigger_kprobe_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
}
-static void trigger_fentry_setup()
+static void trigger_kretprobe_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe);
+}
+
+static void trigger_kprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kprobe_multi);
+}
+
+static void trigger_kretprobe_multi_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_kretprobe_multi);
+}
+
+static void trigger_fentry_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fentry);
}
-static void trigger_fmodret_setup()
+static void trigger_fexit_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fexit);
+}
+
+static void trigger_fentry_sleep_setup(void)
+{
+ setup_ctx();
+ attach_bpf(ctx.skel->progs.bench_trigger_fentry_sleep);
+}
+
+static void trigger_fmodret_setup(void)
{
setup_ctx();
attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
}
-static void *trigger_consumer(void *input)
+/* make sure call is not inlined and not avoided by compiler, so __weak and
+ * inline asm volatile in the body of the function
+ *
+ * There is a performance difference between uprobing at nop location vs other
+ * instructions. So use two different targets, one of which starts with nop
+ * and another doesn't.
+ *
+ * GCC doesn't generate stack setup preample for these functions due to them
+ * having no input arguments and doing nothing in the body.
+ */
+__weak void uprobe_target_nop(void)
{
+ asm volatile ("nop");
+}
+
+__weak void opaque_noop_func(void)
+{
+}
+
+__weak int uprobe_target_push(void)
+{
+ /* overhead of function call is negligible compared to uprobe
+ * triggering, so this shouldn't affect benchmark results much
+ */
+ opaque_noop_func();
+ return 1;
+}
+
+__weak void uprobe_target_ret(void)
+{
+ asm volatile ("");
+}
+
+static void *uprobe_base_producer(void *input)
+{
+ while (true) {
+ uprobe_target_nop();
+ atomic_inc(&base_hits.value);
+ }
return NULL;
}
+static void *uprobe_producer_nop(void *input)
+{
+ while (true)
+ uprobe_target_nop();
+ return NULL;
+}
+
+static void *uprobe_producer_push(void *input)
+{
+ while (true)
+ uprobe_target_push();
+ return NULL;
+}
+
+static void *uprobe_producer_ret(void *input)
+{
+ while (true)
+ uprobe_target_ret();
+ return NULL;
+}
+
+static void usetup(bool use_retprobe, void *target_addr)
+{
+ size_t uprobe_offset;
+ struct bpf_link *link;
+
+ setup_libbpf();
+
+ ctx.skel = trigger_bench__open_and_load();
+ if (!ctx.skel) {
+ fprintf(stderr, "failed to open skeleton\n");
+ exit(1);
+ }
+
+ uprobe_offset = get_uprobe_offset(target_addr);
+ link = bpf_program__attach_uprobe(ctx.skel->progs.bench_trigger_uprobe,
+ use_retprobe,
+ -1 /* all PIDs */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!link) {
+ fprintf(stderr, "failed to attach uprobe!\n");
+ exit(1);
+ }
+ ctx.skel->links.bench_trigger_uprobe = link;
+}
+
+static void uprobe_setup_nop(void)
+{
+ usetup(false, &uprobe_target_nop);
+}
+
+static void uretprobe_setup_nop(void)
+{
+ usetup(true, &uprobe_target_nop);
+}
+
+static void uprobe_setup_push(void)
+{
+ usetup(false, &uprobe_target_push);
+}
+
+static void uretprobe_setup_push(void)
+{
+ usetup(true, &uprobe_target_push);
+}
+
+static void uprobe_setup_ret(void)
+{
+ usetup(false, &uprobe_target_ret);
+}
+
+static void uretprobe_setup_ret(void)
+{
+ usetup(true, &uprobe_target_ret);
+}
+
const struct bench bench_trig_base = {
.name = "trig-base",
.validate = trigger_validate,
.producer_thread = trigger_base_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_base_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -116,7 +260,6 @@ const struct bench bench_trig_tp = {
.validate = trigger_validate,
.setup = trigger_tp_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -127,7 +270,6 @@ const struct bench bench_trig_rawtp = {
.validate = trigger_validate,
.setup = trigger_rawtp_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -138,7 +280,36 @@ const struct bench bench_trig_kprobe = {
.validate = trigger_validate,
.setup = trigger_kprobe_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kretprobe = {
+ .name = "trig-kretprobe",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe_multi = {
+ .name = "trig-kprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kprobe_multi_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kretprobe_multi = {
+ .name = "trig-kretprobe-multi",
+ .validate = trigger_validate,
+ .setup = trigger_kretprobe_multi_setup,
+ .producer_thread = trigger_producer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -149,7 +320,26 @@ const struct bench bench_trig_fentry = {
.validate = trigger_validate,
.setup = trigger_fentry_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fexit = {
+ .name = "trig-fexit",
+ .validate = trigger_validate,
+ .setup = trigger_fexit_setup,
+ .producer_thread = trigger_producer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry_sleep = {
+ .name = "trig-fentry-sleep",
+ .validate = trigger_validate,
+ .setup = trigger_fentry_sleep_setup,
+ .producer_thread = trigger_producer,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
@@ -160,7 +350,69 @@ const struct bench bench_trig_fmodret = {
.validate = trigger_validate,
.setup = trigger_fmodret_setup,
.producer_thread = trigger_producer,
- .consumer_thread = trigger_consumer,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_base = {
+ .name = "trig-uprobe-base",
+ .setup = NULL, /* no uprobe/uretprobe is attached */
+ .producer_thread = uprobe_base_producer,
+ .measure = trigger_base_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_nop = {
+ .name = "trig-uprobe-nop",
+ .setup = uprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_nop = {
+ .name = "trig-uretprobe-nop",
+ .setup = uretprobe_setup_nop,
+ .producer_thread = uprobe_producer_nop,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_push = {
+ .name = "trig-uprobe-push",
+ .setup = uprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_push = {
+ .name = "trig-uretprobe-push",
+ .setup = uretprobe_setup_push,
+ .producer_thread = uprobe_producer_push,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uprobe_ret = {
+ .name = "trig-uprobe-ret",
+ .setup = uprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
+ .measure = trigger_measure,
+ .report_progress = hits_drops_report_progress,
+ .report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_uretprobe_ret = {
+ .name = "trig-uretprobe-ret",
+ .setup = uretprobe_setup_ret,
+ .producer_thread = uprobe_producer_ret,
.measure = trigger_measure,
.report_progress = hits_drops_report_progress,
.report_final = hits_drops_report_final,
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
new file mode 100755
index 000000000000..8ffd385ab2f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bloom_filter_map.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Bloom filter map"
+for v in 2 4 8 16 40; do
+for t in 1 4 8 12 16; do
+for h in {1..10}; do
+subtitle "value_size: $v bytes, # threads: $t, # hashes: $h"
+ for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+ printf "%'d entries -\n" $e
+ printf "\t"
+ summarize "Lookups, total operations: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-lookup)"
+ printf "\t"
+ summarize "Updates, total operations: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-update)"
+ printf "\t"
+ summarize_percentage "False positive rate: " \
+ "$($RUN_BENCH -p $t --nr_hash_funcs $h --nr_entries $e --value_size $v bloom-false-positive)"
+ done
+ printf "\n"
+done
+done
+done
+
+header "Hashmap without bloom filter vs. hashmap with bloom filter (throughput, 8 threads)"
+for v in 2 4 8 16 40; do
+for h in {1..10}; do
+subtitle "value_size: $v, # hashes: $h"
+ for e in 10000 50000 75000 100000 250000 500000 750000 1000000 2500000 5000000; do
+ printf "%'d entries -\n" $e
+ printf "\t"
+ summarize_total "Hashmap without bloom filter: " \
+ "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-without-bloom)"
+ printf "\t"
+ summarize_total "Hashmap with bloom filter: " \
+ "$($RUN_BENCH --nr_hash_funcs $h --nr_entries $e --value_size $v -p 8 hashmap-with-bloom)"
+ done
+ printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
new file mode 100755
index 000000000000..cd2efd3fdef3
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_hashmap_full_update.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+nr_threads=`expr $(cat /proc/cpuinfo | grep "processor"| wc -l) - 1`
+summary=$($RUN_BENCH -p $nr_threads bpf-hashmap-full-update)
+printf "$summary"
+printf "\n"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
new file mode 100755
index 000000000000..d4f5f73b356b
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_bpf_loop.sh
@@ -0,0 +1,15 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for t in 1 4 8 12 16; do
+for i in 10 100 500 1000 5000 10000 50000 100000 500000 1000000; do
+subtitle "nr_loops: $i, nr_threads: $t"
+ summarize_ops "bpf_loop: " \
+ "$($RUN_BENCH -p $t --nr_loops $i bpf-loop)"
+ printf "\n"
+done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
new file mode 100755
index 000000000000..9ff5832463a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_htab_mem.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+htab_mem()
+{
+ echo -n "per-prod-op: "
+ echo -n "$*" | sed -E "s/.* per-prod-op\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+k\/s).*/\1/"
+ echo -n -e ", avg mem: "
+ echo -n "$*" | sed -E "s/.* memory usage\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+MiB).*/\1/"
+ echo -n ", peak mem: "
+ echo "$*" | sed -E "s/.* peak memory usage\s+([0-9]+\.[0-9]+MiB).*/\1/"
+}
+
+summarize_htab_mem()
+{
+ local bench="$1"
+ local summary=$(echo $2 | tail -n1)
+
+ printf "%-20s %s\n" "$bench" "$(htab_mem $summary)"
+}
+
+htab_mem_bench()
+{
+ local name
+
+ for name in overwrite batch_add_batch_del add_del_on_diff_cpu
+ do
+ summarize_htab_mem "$name" "$($RUN_BENCH htab-mem --use-case $name -p8 "$@")"
+ done
+}
+
+header "preallocated"
+htab_mem_bench "--preallocated"
+
+header "normal bpf ma"
+htab_mem_bench
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
new file mode 100755
index 000000000000..2eb2b513a173
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+header "Hashmap Control"
+for i in 10 1000 10000 100000 4194304; do
+subtitle "num keys: $i"
+ summarize_local_storage "hashmap (control) sequential get: "\
+ "$(./bench --nr_maps 1 --hashmap_nr_keys_used=$i local-storage-cache-hashmap-control)"
+ printf "\n"
+done
+
+header "Local Storage"
+for i in 1 10 16 17 24 32 100 1000; do
+subtitle "num_maps: $i"
+ summarize_local_storage "local_storage cache sequential get: "\
+ "$(./bench --nr_maps $i local-storage-cache-seq-get)"
+ summarize_local_storage "local_storage cache interleaved get: "\
+ "$(./bench --nr_maps $i local-storage-cache-int-get)"
+ printf "\n"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
new file mode 100755
index 000000000000..3e8a969f2096
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_local_storage_rcu_tasks_trace.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+kthread_pid=`pgrep rcu_tasks_trace_kthread`
+
+if [ -z $kthread_pid ]; then
+ echo "error: Couldn't find rcu_tasks_trace_kthread"
+ exit 1
+fi
+
+./bench --nr_procs 15000 --kthread_pid $kthread_pid -d 600 --quiet local-storage-tasks-trace
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
index 16f774b1cdbe..7b281dbe4165 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -2,7 +2,7 @@
set -eufo pipefail
-for i in base kprobe kretprobe rawtp fentry fexit fmodret
+for i in base kprobe kretprobe rawtp fentry fexit
do
summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
printf "%-10s: %s\n" $i "$summary"
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
index af4aa04caba6..91e3567962ff 100755
--- a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -1,75 +1,51 @@
#!/bin/bash
-set -eufo pipefail
-
-RUN_BENCH="sudo ./bench -w3 -d10 -a"
-
-function hits()
-{
- echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
+source ./benchs/run_common.sh
-function drops()
-{
- echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
-}
-
-function header()
-{
- local len=${#1}
-
- printf "\n%s\n" "$1"
- for i in $(seq 1 $len); do printf '='; done
- printf '\n'
-}
+set -eufo pipefail
-function summarize()
-{
- bench="$1"
- summary=$(echo $2 | tail -n1)
- printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
-}
+RUN_RB_BENCH="$RUN_BENCH -c1"
header "Single-producer, parallel producer"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH $b)"
+ summarize $b "$($RUN_RB_BENCH $b)"
done
header "Single-producer, parallel producer, sampled notification"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-sampled $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-sampled $b)"
done
header "Single-producer, back-to-back mode"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-b2b $b)"
- summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-b2b $b)"
+ summarize $b-sampled "$($RUN_RB_BENCH --rb-sampled --rb-b2b $b)"
done
header "Ringbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+ summarize "rb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
done
header "Perfbuf back-to-back, effect of sample rate"
for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
- summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+ summarize "pb-sampled-$b" "$($RUN_RB_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
done
header "Ringbuf back-to-back, reserve+commit vs output"
-summarize "reserve" "$($RUN_BENCH --rb-b2b rb-custom)"
-summarize "output" "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+summarize "reserve" "$($RUN_RB_BENCH --rb-b2b rb-custom)"
+summarize "output" "$($RUN_RB_BENCH --rb-b2b --rb-use-output rb-custom)"
header "Ringbuf sampled, reserve+commit vs output"
-summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled rb-custom)"
-summarize "output-sampled" "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+summarize "reserve-sampled" "$($RUN_RB_BENCH --rb-sampled rb-custom)"
+summarize "output-sampled" "$($RUN_RB_BENCH --rb-sampled --rb-use-output rb-custom)"
header "Single-producer, consumer/producer competing on the same CPU, low batch count"
for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
- summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+ summarize $b "$($RUN_RB_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
done
header "Ringbuf, multi-producer contention"
for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
- summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+ summarize "rb-libbpf nr_prod $b" "$($RUN_RB_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
new file mode 100755
index 000000000000..142697284b45
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_strncmp.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source ./benchs/run_common.sh
+
+set -eufo pipefail
+
+for s in 1 8 64 512 2048 4095; do
+ for b in no-helper helper; do
+ summarize ${b}-${s} "$($RUN_BENCH --cmp-str-len=$s strncmp-${b})"
+ done
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
new file mode 100755
index 000000000000..9bdcc74e03a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_uprobes.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base {uprobe,uretprobe}-{nop,push,ret}
+do
+ summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+ printf "%-15s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/benchs/run_common.sh b/tools/testing/selftests/bpf/benchs/run_common.sh
new file mode 100644
index 000000000000..d9f40af82006
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_common.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function header()
+{
+ local len=${#1}
+
+ printf "\n%s\n" "$1"
+ for i in $(seq 1 $len); do printf '='; done
+ printf '\n'
+}
+
+function subtitle()
+{
+ local len=${#1}
+ printf "\t%s\n" "$1"
+}
+
+function hits()
+{
+ echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+ echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function percentage()
+{
+ echo "$*" | sed -E "s/.*Percentage\s=\s+([0-9]+\.[0-9]+).*/\1/"
+}
+
+function ops()
+{
+ echo -n "throughput: "
+ echo -n "$*" | sed -E "s/.*throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", latency: "
+ echo "$*" | sed -E "s/.*latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+}
+
+function local_storage()
+{
+ echo -n "hits throughput: "
+ echo -n "$*" | sed -E "s/.* hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+ echo -n -e ", hits latency: "
+ echo -n "$*" | sed -E "s/.* hits latency\s+([0-9]+\.[0-9]+\sns\/op).*/\1/"
+ echo -n ", important_hits throughput: "
+ echo "$*" | sed -E "s/.*important_hits throughput\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+\sM\sops\/s).*/\1/"
+}
+
+function total()
+{
+ echo "$*" | sed -E "s/.*total operations\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function summarize()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+function summarize_percentage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s%%\n" "$bench" "$(percentage $summary)"
+}
+
+function summarize_ops()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(ops $summary)"
+}
+
+function summarize_local_storage()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(local_storage $summary)"
+}
+
+function summarize_total()
+{
+ bench="$1"
+ summary=$(echo $2 | tail -n1)
+ printf "%-20s %s\n" "$bench" "$(total $summary)"
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_alloc.h b/tools/testing/selftests/bpf/bpf_arena_alloc.h
new file mode 100644
index 000000000000..c27678299e0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_alloc.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+#ifndef __round_mask
+#define __round_mask(x, y) ((__typeof__(x))((y)-1))
+#endif
+#ifndef round_up
+#define round_up(x, y) ((((x)-1) | __round_mask(x, y))+1)
+#endif
+
+#ifdef __BPF__
+#define NR_CPUS (sizeof(struct cpumask) * 8)
+
+static void __arena * __arena page_frag_cur_page[NR_CPUS];
+static int __arena page_frag_cur_offset[NR_CPUS];
+
+/* Simple page_frag allocator */
+static inline void __arena* bpf_alloc(unsigned int size)
+{
+ __u64 __arena *obj_cnt;
+ __u32 cpu = bpf_get_smp_processor_id();
+ void __arena *page = page_frag_cur_page[cpu];
+ int __arena *cur_offset = &page_frag_cur_offset[cpu];
+ int offset;
+
+ size = round_up(size, 8);
+ if (size >= PAGE_SIZE - 8)
+ return NULL;
+ if (!page) {
+refill:
+ page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page)
+ return NULL;
+ cast_kern(page);
+ page_frag_cur_page[cpu] = page;
+ *cur_offset = PAGE_SIZE - 8;
+ obj_cnt = page + PAGE_SIZE - 8;
+ *obj_cnt = 0;
+ } else {
+ cast_kern(page);
+ obj_cnt = page + PAGE_SIZE - 8;
+ }
+
+ offset = *cur_offset - size;
+ if (offset < 0)
+ goto refill;
+
+ (*obj_cnt)++;
+ *cur_offset = offset;
+ return page + offset;
+}
+
+static inline void bpf_free(void __arena *addr)
+{
+ __u64 __arena *obj_cnt;
+
+ addr = (void __arena *)(((long)addr) & ~(PAGE_SIZE - 1));
+ obj_cnt = addr + PAGE_SIZE - 8;
+ if (--(*obj_cnt) == 0)
+ bpf_arena_free_pages(&arena, addr, 1);
+}
+#else
+static inline void __arena* bpf_alloc(unsigned int size) { return NULL; }
+static inline void bpf_free(void __arena *addr) {}
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_common.h b/tools/testing/selftests/bpf/bpf_arena_common.h
new file mode 100644
index 000000000000..bcf195c64a45
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_common.h
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+
+#ifndef WRITE_ONCE
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+#endif
+
+#ifndef NUMA_NO_NODE
+#define NUMA_NO_NODE (-1)
+#endif
+
+#ifndef arena_container_of
+#define arena_container_of(ptr, type, member) \
+ ({ \
+ void __arena *__mptr = (void __arena *)(ptr); \
+ ((type *)(__mptr - offsetof(type, member))); \
+ })
+#endif
+
+#ifdef __BPF__ /* when compiled as bpf program */
+
+#ifndef PAGE_SIZE
+#define PAGE_SIZE __PAGE_SIZE
+/*
+ * for older kernels try sizeof(struct genradix_node)
+ * or flexible:
+ * static inline long __bpf_page_size(void) {
+ * return bpf_core_enum_value(enum page_size_enum___l, __PAGE_SIZE___l) ?: sizeof(struct genradix_node);
+ * }
+ * but generated code is not great.
+ */
+#endif
+
+#if defined(__BPF_FEATURE_ARENA_CAST) && !defined(BPF_ARENA_FORCE_ASM)
+#define __arena __attribute__((address_space(1)))
+#define cast_kern(ptr) /* nop for bpf prog. emitted by LLVM */
+#define cast_user(ptr) /* nop for bpf prog. emitted by LLVM */
+#else
+#define __arena
+#define cast_kern(ptr) bpf_addr_space_cast(ptr, 0, 1)
+#define cast_user(ptr) bpf_addr_space_cast(ptr, 1, 0)
+#endif
+
+void __arena* bpf_arena_alloc_pages(void *map, void __arena *addr, __u32 page_cnt,
+ int node_id, __u64 flags) __ksym __weak;
+void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt) __ksym __weak;
+
+#else /* when compiled as user space code */
+
+#define __arena
+#define __arg_arena
+#define cast_kern(ptr) /* nop for user space */
+#define cast_user(ptr) /* nop for user space */
+__weak char arena[1];
+
+#ifndef offsetof
+#define offsetof(type, member) ((unsigned long)&((type *)0)->member)
+#endif
+
+static inline void __arena* bpf_arena_alloc_pages(void *map, void *addr, __u32 page_cnt,
+ int node_id, __u64 flags)
+{
+ return NULL;
+}
+static inline void bpf_arena_free_pages(void *map, void __arena *ptr, __u32 page_cnt)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_arena_htab.h b/tools/testing/selftests/bpf/bpf_arena_htab.h
new file mode 100644
index 000000000000..acc01a876668
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_htab.h
@@ -0,0 +1,100 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include <errno.h>
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct htab_bucket {
+ struct arena_list_head head;
+};
+typedef struct htab_bucket __arena htab_bucket_t;
+
+struct htab {
+ htab_bucket_t *buckets;
+ int n_buckets;
+};
+typedef struct htab __arena htab_t;
+
+static inline htab_bucket_t *__select_bucket(htab_t *htab, __u32 hash)
+{
+ htab_bucket_t *b = htab->buckets;
+
+ cast_kern(b);
+ return &b[hash & (htab->n_buckets - 1)];
+}
+
+static inline arena_list_head_t *select_bucket(htab_t *htab, __u32 hash)
+{
+ return &__select_bucket(htab, hash)->head;
+}
+
+struct hashtab_elem {
+ int hash;
+ int key;
+ int value;
+ struct arena_list_node hash_node;
+};
+typedef struct hashtab_elem __arena hashtab_elem_t;
+
+static hashtab_elem_t *lookup_elem_raw(arena_list_head_t *head, __u32 hash, int key)
+{
+ hashtab_elem_t *l;
+
+ list_for_each_entry(l, head, hash_node)
+ if (l->hash == hash && l->key == key)
+ return l;
+
+ return NULL;
+}
+
+static int htab_hash(int key)
+{
+ return key;
+}
+
+__weak int htab_lookup_elem(htab_t *htab __arg_arena, int key)
+{
+ hashtab_elem_t *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+ if (l_old)
+ return l_old->value;
+ return 0;
+}
+
+__weak int htab_update_elem(htab_t *htab __arg_arena, int key, int value)
+{
+ hashtab_elem_t *l_new = NULL, *l_old;
+ arena_list_head_t *head;
+
+ cast_kern(htab);
+ head = select_bucket(htab, key);
+ l_old = lookup_elem_raw(head, htab_hash(key), key);
+
+ l_new = bpf_alloc(sizeof(*l_new));
+ if (!l_new)
+ return -ENOMEM;
+ l_new->key = key;
+ l_new->hash = htab_hash(key);
+ l_new->value = value;
+
+ list_add_head(&l_new->hash_node, head);
+ if (l_old) {
+ list_del(&l_old->hash_node);
+ bpf_free(l_old);
+ }
+ return 0;
+}
+
+void htab_init(htab_t *htab)
+{
+ void __arena *buckets = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+
+ cast_user(buckets);
+ htab->buckets = buckets;
+ htab->n_buckets = 2 * PAGE_SIZE / sizeof(struct htab_bucket);
+}
diff --git a/tools/testing/selftests/bpf/bpf_arena_list.h b/tools/testing/selftests/bpf/bpf_arena_list.h
new file mode 100644
index 000000000000..b99b9f408eff
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_arena_list.h
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#pragma once
+#include "bpf_arena_common.h"
+
+struct arena_list_node;
+
+typedef struct arena_list_node __arena arena_list_node_t;
+
+struct arena_list_node {
+ arena_list_node_t *next;
+ arena_list_node_t * __arena *pprev;
+};
+
+struct arena_list_head {
+ struct arena_list_node __arena *first;
+};
+typedef struct arena_list_head __arena arena_list_head_t;
+
+#define list_entry(ptr, type, member) arena_container_of(ptr, type, member)
+
+#define list_entry_safe(ptr, type, member) \
+ ({ typeof(*ptr) * ___ptr = (ptr); \
+ ___ptr ? ({ cast_kern(___ptr); list_entry(___ptr, type, member); }) : NULL; \
+ })
+
+#ifndef __BPF__
+static inline void *bpf_iter_num_new(struct bpf_iter_num *it, int i, int j) { return NULL; }
+static inline void bpf_iter_num_destroy(struct bpf_iter_num *it) {}
+static inline bool bpf_iter_num_next(struct bpf_iter_num *it) { return true; }
+#define cond_break ({})
+#endif
+
+/* Safely walk link list elements. Deletion of elements is allowed. */
+#define list_for_each_entry(pos, head, member) \
+ for (void * ___tmp = (pos = list_entry_safe((head)->first, \
+ typeof(*(pos)), member), \
+ (void *)0); \
+ pos && ({ ___tmp = (void *)pos->member.next; 1; }); \
+ cond_break, \
+ pos = list_entry_safe((void __arena *)___tmp, typeof(*(pos)), member))
+
+static inline void list_add_head(arena_list_node_t *n, arena_list_head_t *h)
+{
+ arena_list_node_t *first = h->first, * __arena *tmp;
+
+ cast_user(first);
+ cast_kern(n);
+ WRITE_ONCE(n->next, first);
+ cast_kern(first);
+ if (first) {
+ tmp = &n->next;
+ cast_user(tmp);
+ WRITE_ONCE(first->pprev, tmp);
+ }
+ cast_user(n);
+ WRITE_ONCE(h->first, n);
+
+ tmp = &h->first;
+ cast_user(tmp);
+ cast_kern(n);
+ WRITE_ONCE(n->pprev, tmp);
+}
+
+static inline void __list_del(arena_list_node_t *n)
+{
+ arena_list_node_t *next = n->next, *tmp;
+ arena_list_node_t * __arena *pprev = n->pprev;
+
+ cast_user(next);
+ cast_kern(pprev);
+ tmp = *pprev;
+ cast_kern(tmp);
+ WRITE_ONCE(tmp, next);
+ if (next) {
+ cast_user(pprev);
+ cast_kern(next);
+ WRITE_ONCE(next->pprev, pprev);
+ }
+}
+
+#define POISON_POINTER_DELTA 0
+
+#define LIST_POISON1 ((void __arena *) 0x100 + POISON_POINTER_DELTA)
+#define LIST_POISON2 ((void __arena *) 0x122 + POISON_POINTER_DELTA)
+
+static inline void list_del(arena_list_node_t *n)
+{
+ __list_del(n);
+ n->next = LIST_POISON1;
+ n->pprev = LIST_POISON2;
+}
diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h
new file mode 100644
index 000000000000..a5b9df38c162
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_experimental.h
@@ -0,0 +1,462 @@
+#ifndef __BPF_EXPERIMENTAL__
+#define __BPF_EXPERIMENTAL__
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#define __contains(name, node) __attribute__((btf_decl_tag("contains:" #name ":" #node)))
+
+/* Description
+ * Allocates an object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * A pointer to an object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_new_impl */
+#define bpf_obj_new(type) ((type *)bpf_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * Void.
+ */
+extern void bpf_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_obj_drop(kptr) bpf_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Increment the refcount on a refcounted local kptr, turning the
+ * non-owning reference input into an owning reference in the process.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * An owning reference to the object pointed to by 'kptr'
+ */
+extern void *bpf_refcount_acquire_impl(void *kptr, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_refcount_acquire_impl */
+#define bpf_refcount_acquire(kptr) bpf_refcount_acquire_impl(kptr, NULL)
+
+/* Description
+ * Add a new entry to the beginning of the BPF linked list.
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a list
+ */
+extern int bpf_list_push_front_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_list_push_front_impl */
+#define bpf_list_push_front(head, node) bpf_list_push_front_impl(head, node, NULL, 0)
+
+/* Description
+ * Add a new entry to the end of the BPF linked list.
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a list
+ */
+extern int bpf_list_push_back_impl(struct bpf_list_head *head,
+ struct bpf_list_node *node,
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_list_push_back_impl */
+#define bpf_list_push_back(head, node) bpf_list_push_back_impl(head, node, NULL, 0)
+
+/* Description
+ * Remove the entry at the beginning of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_front(struct bpf_list_head *head) __ksym;
+
+/* Description
+ * Remove the entry at the end of the BPF linked list.
+ * Returns
+ * Pointer to bpf_list_node of deleted entry, or NULL if list is empty.
+ */
+extern struct bpf_list_node *bpf_list_pop_back(struct bpf_list_head *head) __ksym;
+
+/* Description
+ * Remove 'node' from rbtree with root 'root'
+ * Returns
+ * Pointer to the removed node, or NULL if 'root' didn't contain 'node'
+ */
+extern struct bpf_rb_node *bpf_rbtree_remove(struct bpf_rb_root *root,
+ struct bpf_rb_node *node) __ksym;
+
+/* Description
+ * Add 'node' to rbtree with root 'root' using comparator 'less'
+ *
+ * The 'meta' and 'off' parameters are rewritten by the verifier, no need
+ * for BPF programs to set them
+ * Returns
+ * 0 if the node was successfully added
+ * -EINVAL if the node wasn't added because it's already in a tree
+ */
+extern int bpf_rbtree_add_impl(struct bpf_rb_root *root, struct bpf_rb_node *node,
+ bool (less)(struct bpf_rb_node *a, const struct bpf_rb_node *b),
+ void *meta, __u64 off) __ksym;
+
+/* Convenience macro to wrap over bpf_rbtree_add_impl */
+#define bpf_rbtree_add(head, node, less) bpf_rbtree_add_impl(head, node, less, NULL, 0)
+
+/* Description
+ * Return the first (leftmost) node in input tree
+ * Returns
+ * Pointer to the node, which is _not_ removed from the tree. If the tree
+ * contains no nodes, returns NULL.
+ */
+extern struct bpf_rb_node *bpf_rbtree_first(struct bpf_rb_root *root) __ksym;
+
+/* Description
+ * Allocates a percpu object of the type represented by 'local_type_id' in
+ * program BTF. User may use the bpf_core_type_id_local macro to pass the
+ * type ID of a struct in program BTF.
+ *
+ * The 'local_type_id' parameter must be a known constant.
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * A pointer to a percpu object of the type corresponding to the passed in
+ * 'local_type_id', or NULL on failure.
+ */
+extern void *bpf_percpu_obj_new_impl(__u64 local_type_id, void *meta) __ksym;
+
+/* Convenience macro to wrap over bpf_percpu_obj_new_impl */
+#define bpf_percpu_obj_new(type) ((type __percpu_kptr *)bpf_percpu_obj_new_impl(bpf_core_type_id_local(type), NULL))
+
+/* Description
+ * Free an allocated percpu object. All fields of the object that require
+ * destruction will be destructed before the storage is freed.
+ *
+ * The 'meta' parameter is rewritten by the verifier, no need for BPF
+ * program to set it.
+ * Returns
+ * Void.
+ */
+extern void bpf_percpu_obj_drop_impl(void *kptr, void *meta) __ksym;
+
+struct bpf_iter_task_vma;
+
+extern int bpf_iter_task_vma_new(struct bpf_iter_task_vma *it,
+ struct task_struct *task,
+ unsigned long addr) __ksym;
+extern struct vm_area_struct *bpf_iter_task_vma_next(struct bpf_iter_task_vma *it) __ksym;
+extern void bpf_iter_task_vma_destroy(struct bpf_iter_task_vma *it) __ksym;
+
+/* Convenience macro to wrap over bpf_obj_drop_impl */
+#define bpf_percpu_obj_drop(kptr) bpf_percpu_obj_drop_impl(kptr, NULL)
+
+/* Description
+ * Throw a BPF exception from the program, immediately terminating its
+ * execution and unwinding the stack. The supplied 'cookie' parameter
+ * will be the return value of the program when an exception is thrown,
+ * and the default exception callback is used. Otherwise, if an exception
+ * callback is set using the '__exception_cb(callback)' declaration tag
+ * on the main program, the 'cookie' parameter will be the callback's only
+ * input argument.
+ *
+ * Thus, in case of default exception callback, 'cookie' is subjected to
+ * constraints on the program's return value (as with R0 on exit).
+ * Otherwise, the return value of the marked exception callback will be
+ * subjected to the same checks.
+ *
+ * Note that throwing an exception with lingering resources (locks,
+ * references, etc.) will lead to a verification error.
+ *
+ * Note that callbacks *cannot* call this helper.
+ * Returns
+ * Never.
+ * Throws
+ * An exception with the specified 'cookie' value.
+ */
+extern void bpf_throw(u64 cookie) __ksym;
+
+/* This macro must be used to mark the exception callback corresponding to the
+ * main program. For example:
+ *
+ * int exception_cb(u64 cookie) {
+ * return cookie;
+ * }
+ *
+ * SEC("tc")
+ * __exception_cb(exception_cb)
+ * int main_prog(struct __sk_buff *ctx) {
+ * ...
+ * return TC_ACT_OK;
+ * }
+ *
+ * Here, exception callback for the main program will be 'exception_cb'. Note
+ * that this attribute can only be used once, and multiple exception callbacks
+ * specified for the main program will lead to verification error.
+ */
+#define __exception_cb(name) __attribute__((btf_decl_tag("exception_callback:" #name)))
+
+#define __bpf_assert_signed(x) _Generic((x), \
+ unsigned long: 0, \
+ unsigned long long: 0, \
+ signed long: 1, \
+ signed long long: 1 \
+)
+
+#define __bpf_assert_check(LHS, op, RHS) \
+ _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+ _Static_assert(sizeof(LHS) == 8, "Only 8-byte integers are supported\n"); \
+ _Static_assert(__builtin_constant_p(__bpf_assert_signed(LHS)), "internal static assert"); \
+ _Static_assert(__builtin_constant_p((RHS)), "2nd argument must be a constant expression")
+
+#define __bpf_assert(LHS, op, cons, RHS, VAL) \
+ ({ \
+ (void)bpf_throw; \
+ asm volatile ("if %[lhs] " op " %[rhs] goto +2; r1 = %[value]; call bpf_throw" \
+ : : [lhs] "r"(LHS), [rhs] cons(RHS), [value] "ri"(VAL) : ); \
+ })
+
+#define __bpf_assert_op_sign(LHS, op, cons, RHS, VAL, supp_sign) \
+ ({ \
+ __bpf_assert_check(LHS, op, RHS); \
+ if (__bpf_assert_signed(LHS) && !(supp_sign)) \
+ __bpf_assert(LHS, "s" #op, cons, RHS, VAL); \
+ else \
+ __bpf_assert(LHS, #op, cons, RHS, VAL); \
+ })
+
+#define __bpf_assert_op(LHS, op, RHS, VAL, supp_sign) \
+ ({ \
+ if (sizeof(typeof(RHS)) == 8) { \
+ const typeof(RHS) rhs_var = (RHS); \
+ __bpf_assert_op_sign(LHS, op, "r", rhs_var, VAL, supp_sign); \
+ } else { \
+ __bpf_assert_op_sign(LHS, op, "i", RHS, VAL, supp_sign); \
+ } \
+ })
+
+#define __cmp_cannot_be_signed(x) \
+ __builtin_strcmp(#x, "==") == 0 || __builtin_strcmp(#x, "!=") == 0 || \
+ __builtin_strcmp(#x, "&") == 0
+
+#define __is_signed_type(type) (((type)(-1)) < (type)1)
+
+#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \
+ ({ \
+ __label__ l_true; \
+ bool ret = DEFAULT; \
+ asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \
+ :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \
+ ret = !DEFAULT; \
+l_true: \
+ ret; \
+ })
+
+/* C type conversions coupled with comparison operator are tricky.
+ * Make sure BPF program is compiled with -Wsign-compare then
+ * __lhs OP __rhs below will catch the mistake.
+ * Be aware that we check only __lhs to figure out the sign of compare.
+ */
+#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \
+ ({ \
+ typeof(LHS) __lhs = (LHS); \
+ typeof(RHS) __rhs = (RHS); \
+ bool ret; \
+ _Static_assert(sizeof(&(LHS)), "1st argument must be an lvalue expression"); \
+ (void)(__lhs OP __rhs); \
+ if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \
+ if (sizeof(__rhs) == 8) \
+ /* "i" will truncate 64-bit constant into s32, \
+ * so we have to use extra register via "r". \
+ */ \
+ ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \
+ else \
+ ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \
+ } else { \
+ if (sizeof(__rhs) == 8) \
+ ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \
+ else \
+ ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \
+ } \
+ ret; \
+ })
+
+#ifndef bpf_cmp_unlikely
+#define bpf_cmp_unlikely(LHS, OP, RHS) _bpf_cmp(LHS, OP, RHS, true)
+#endif
+
+#ifndef bpf_cmp_likely
+#define bpf_cmp_likely(LHS, OP, RHS) \
+ ({ \
+ bool ret = 0; \
+ if (__builtin_strcmp(#OP, "==") == 0) \
+ ret = _bpf_cmp(LHS, !=, RHS, false); \
+ else if (__builtin_strcmp(#OP, "!=") == 0) \
+ ret = _bpf_cmp(LHS, ==, RHS, false); \
+ else if (__builtin_strcmp(#OP, "<=") == 0) \
+ ret = _bpf_cmp(LHS, >, RHS, false); \
+ else if (__builtin_strcmp(#OP, "<") == 0) \
+ ret = _bpf_cmp(LHS, >=, RHS, false); \
+ else if (__builtin_strcmp(#OP, ">") == 0) \
+ ret = _bpf_cmp(LHS, <=, RHS, false); \
+ else if (__builtin_strcmp(#OP, ">=") == 0) \
+ ret = _bpf_cmp(LHS, <, RHS, false); \
+ else \
+ asm volatile("r0 " #OP " invalid compare"); \
+ ret; \
+ })
+#endif
+
+#define cond_break \
+ ({ __label__ l_break, l_continue; \
+ asm volatile goto("1:.byte 0xe5; \
+ .byte 0; \
+ .long ((%l[l_break] - 1b - 8) / 8) & 0xffff; \
+ .short 0" \
+ :::: l_break); \
+ goto l_continue; \
+ l_break: break; \
+ l_continue:; \
+ })
+
+#ifndef bpf_nop_mov
+#define bpf_nop_mov(var) \
+ asm volatile("%[reg]=%[reg]"::[reg]"r"((short)var))
+#endif
+
+/* emit instruction:
+ * rX = rX .off = BPF_ADDR_SPACE_CAST .imm32 = (dst_as << 16) | src_as
+ */
+#ifndef bpf_addr_space_cast
+#define bpf_addr_space_cast(var, dst_as, src_as)\
+ asm volatile(".byte 0xBF; \
+ .ifc %[reg], r0; \
+ .byte 0x00; \
+ .endif; \
+ .ifc %[reg], r1; \
+ .byte 0x11; \
+ .endif; \
+ .ifc %[reg], r2; \
+ .byte 0x22; \
+ .endif; \
+ .ifc %[reg], r3; \
+ .byte 0x33; \
+ .endif; \
+ .ifc %[reg], r4; \
+ .byte 0x44; \
+ .endif; \
+ .ifc %[reg], r5; \
+ .byte 0x55; \
+ .endif; \
+ .ifc %[reg], r6; \
+ .byte 0x66; \
+ .endif; \
+ .ifc %[reg], r7; \
+ .byte 0x77; \
+ .endif; \
+ .ifc %[reg], r8; \
+ .byte 0x88; \
+ .endif; \
+ .ifc %[reg], r9; \
+ .byte 0x99; \
+ .endif; \
+ .short %[off]; \
+ .long %[as]" \
+ : [reg]"+r"(var) \
+ : [off]"i"(BPF_ADDR_SPACE_CAST) \
+ , [as]"i"((dst_as << 16) | src_as));
+#endif
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert(cond) if (!(cond)) bpf_throw(0);
+
+/* Description
+ * Assert that a conditional expression is true.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_with(cond, value) if (!(cond)) bpf_throw(value);
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the value zero when the assertion fails.
+ */
+#define bpf_assert_range(LHS, BEG, END) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, 0, false); \
+ __bpf_assert_op(LHS, <=, END, 0, false); \
+ })
+
+/* Description
+ * Assert that LHS is in the range [BEG, END] (inclusive of both). This
+ * statement updates the known bounds of LHS during verification. Note
+ * that both BEG and END must be constant values, and must fit within the
+ * data type of LHS.
+ * Returns
+ * Void.
+ * Throws
+ * An exception with the specified value when the assertion fails.
+ */
+#define bpf_assert_range_with(LHS, BEG, END, value) \
+ ({ \
+ _Static_assert(BEG <= END, "BEG must be <= END"); \
+ barrier_var(LHS); \
+ __bpf_assert_op(LHS, >=, BEG, value, false); \
+ __bpf_assert_op(LHS, <=, END, value, false); \
+ })
+
+struct bpf_iter_css_task;
+struct cgroup_subsys_state;
+extern int bpf_iter_css_task_new(struct bpf_iter_css_task *it,
+ struct cgroup_subsys_state *css, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_css_task_next(struct bpf_iter_css_task *it) __weak __ksym;
+extern void bpf_iter_css_task_destroy(struct bpf_iter_css_task *it) __weak __ksym;
+
+struct bpf_iter_task;
+extern int bpf_iter_task_new(struct bpf_iter_task *it,
+ struct task_struct *task, unsigned int flags) __weak __ksym;
+extern struct task_struct *bpf_iter_task_next(struct bpf_iter_task *it) __weak __ksym;
+extern void bpf_iter_task_destroy(struct bpf_iter_task *it) __weak __ksym;
+
+struct bpf_iter_css;
+extern int bpf_iter_css_new(struct bpf_iter_css *it,
+ struct cgroup_subsys_state *start, unsigned int flags) __weak __ksym;
+extern struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *it) __weak __ksym;
+extern void bpf_iter_css_destroy(struct bpf_iter_css *it) __weak __ksym;
+
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h
new file mode 100644
index 000000000000..14ebe7d9e1a3
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_kfuncs.h
@@ -0,0 +1,78 @@
+#ifndef __BPF_KFUNCS__
+#define __BPF_KFUNCS__
+
+struct bpf_sock_addr_kern;
+
+/* Description
+ * Initializes an skb-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
+/* Description
+ * Initializes an xdp-type dynptr
+ * Returns
+ * Error code
+ */
+extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym __weak;
+
+/* Description
+ * Obtain a read-only pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym __weak;
+
+/* Description
+ * Obtain a read-write pointer to the dynptr's data
+ * Returns
+ * Either a direct pointer to the dynptr data or a pointer to the user-provided
+ * buffer if unable to obtain a direct pointer
+ */
+extern void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *ptr, __u32 offset,
+ void *buffer, __u32 buffer__szk) __ksym __weak;
+
+extern int bpf_dynptr_adjust(const struct bpf_dynptr *ptr, __u32 start, __u32 end) __ksym __weak;
+extern bool bpf_dynptr_is_null(const struct bpf_dynptr *ptr) __ksym __weak;
+extern bool bpf_dynptr_is_rdonly(const struct bpf_dynptr *ptr) __ksym __weak;
+extern __u32 bpf_dynptr_size(const struct bpf_dynptr *ptr) __ksym __weak;
+extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clone__init) __ksym __weak;
+
+/* Description
+ * Modify the address of a AF_UNIX sockaddr.
+ * Returns__bpf_kfunc
+ * -EINVAL if the address size is too big or, 0 if the sockaddr was successfully modified.
+ */
+extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern,
+ const __u8 *sun_path, __u32 sun_path__sz) __ksym;
+
+/* Description
+ * Allocate and configure a reqsk and link it with a listener and skb.
+ * Returns
+ * Error code
+ */
+struct sock;
+struct bpf_tcp_req_attrs;
+extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk,
+ struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym;
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+
+extern void *bpf_rdonly_cast(const void *obj, __u32 btf_id) __ksym __weak;
+
+extern int bpf_get_file_xattr(struct file *file, const char *name,
+ struct bpf_dynptr *value_ptr) __ksym;
+extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym;
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+ struct bpf_dynptr *sig_ptr,
+ struct bpf_key *trusted_keyring) __ksym;
+#endif
diff --git a/tools/testing/selftests/bpf/bpf_legacy.h b/tools/testing/selftests/bpf/bpf_legacy.h
index 6f8988738bc1..bc4555a003a7 100644
--- a/tools/testing/selftests/bpf/bpf_legacy.h
+++ b/tools/testing/selftests/bpf/bpf_legacy.h
@@ -2,38 +2,22 @@
#ifndef __BPF_LEGACY__
#define __BPF_LEGACY__
-/*
- * legacy bpf_map_def with extra fields supported only by bpf_load(), do not
- * use outside of samples/bpf
+#if __GNUC__ && !__clang__
+/* Functions to emit BPF_LD_ABS and BPF_LD_IND instructions. We
+ * provide the "standard" names as synonyms of the corresponding GCC
+ * builtins. Note how the SKB argument is ignored.
*/
-struct bpf_map_def_legacy {
- unsigned int type;
- unsigned int key_size;
- unsigned int value_size;
- unsigned int max_entries;
- unsigned int map_flags;
- unsigned int inner_map_idx;
- unsigned int numa_node;
-};
-
-#define BPF_ANNOTATE_KV_PAIR(name, type_key, type_val) \
- struct ____btf_map_##name { \
- type_key key; \
- type_val value; \
- }; \
- struct ____btf_map_##name \
- __attribute__ ((section(".maps." #name), used)) \
- ____btf_map_##name = { }
-
+#define load_byte(skb, off) __builtin_bpf_load_byte(off)
+#define load_half(skb, off) __builtin_bpf_load_half(off)
+#define load_word(skb, off) __builtin_bpf_load_word(off)
+#else
/* llvm builtin functions that eBPF C program may use to
* emit BPF_LD_ABS and BPF_LD_IND instructions
*/
-unsigned long long load_byte(void *skb,
- unsigned long long off) asm("llvm.bpf.load.byte");
-unsigned long long load_half(void *skb,
- unsigned long long off) asm("llvm.bpf.load.half");
-unsigned long long load_word(void *skb,
- unsigned long long off) asm("llvm.bpf.load.word");
+unsigned long long load_byte(void *skb, unsigned long long off) asm("llvm.bpf.load.byte");
+unsigned long long load_half(void *skb, unsigned long long off) asm("llvm.bpf.load.half");
+unsigned long long load_word(void *skb, unsigned long long off) asm("llvm.bpf.load.word");
+#endif
#endif
diff --git a/tools/testing/selftests/bpf/bpf_rlimit.h b/tools/testing/selftests/bpf/bpf_rlimit.h
deleted file mode 100644
index 9dac9b30f8ef..000000000000
--- a/tools/testing/selftests/bpf/bpf_rlimit.h
+++ /dev/null
@@ -1,28 +0,0 @@
-#include <sys/resource.h>
-#include <stdio.h>
-
-static __attribute__((constructor)) void bpf_rlimit_ctor(void)
-{
- struct rlimit rlim_old, rlim_new = {
- .rlim_cur = RLIM_INFINITY,
- .rlim_max = RLIM_INFINITY,
- };
-
- getrlimit(RLIMIT_MEMLOCK, &rlim_old);
- /* For the sake of running the test cases, we temporarily
- * set rlimit to infinity in order for kernel to focus on
- * errors from actual test cases and not getting noise
- * from hitting memlock limits. The limit is on per-process
- * basis and not a global one, hence destructor not really
- * needed here.
- */
- if (setrlimit(RLIMIT_MEMLOCK, &rlim_new) < 0) {
- perror("Unable to lift memlock rlimit");
- /* Trying out lower limit, but expect potential test
- * case failures from this!
- */
- rlim_new.rlim_cur = rlim_old.rlim_cur + (1UL << 20);
- rlim_new.rlim_max = rlim_old.rlim_max + (1UL << 20);
- setrlimit(RLIMIT_MEMLOCK, &rlim_new);
- }
-}
diff --git a/tools/testing/selftests/bpf/bpf_sockopt_helpers.h b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h
new file mode 100644
index 000000000000..11f3a0976174
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_sockopt_helpers.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+
+int get_set_sk_priority(void *ctx)
+{
+ int prio;
+
+ /* Verify that context allows calling bpf_getsockopt and
+ * bpf_setsockopt by reading and writing back socket
+ * priority.
+ */
+
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/bpf_tcp_helpers.h b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
index 5bf2fe9b1efa..82a7c9de95f9 100644
--- a/tools/testing/selftests/bpf/bpf_tcp_helpers.h
+++ b/tools/testing/selftests/bpf/bpf_tcp_helpers.h
@@ -12,10 +12,19 @@
SEC("struct_ops/"#name) \
BPF_PROG(name, args)
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
#define tcp_jiffies32 ((__u32)bpf_jiffies64())
struct sock_common {
unsigned char skc_state;
+ __u16 skc_num;
} __attribute__((preserve_access_index));
enum sk_pacing {
@@ -26,6 +35,7 @@ enum sk_pacing {
struct sock {
struct sock_common __sk_common;
+#define sk_state __sk_common.skc_state
unsigned long sk_pacing_rate;
__u32 sk_pacing_status; /* see enum sk_pacing */
} __attribute__((preserve_access_index));
@@ -45,12 +55,17 @@ struct inet_connection_sock {
__u64 icsk_ca_priv[104 / sizeof(__u64)];
} __attribute__((preserve_access_index));
+struct request_sock {
+ struct sock_common __req_common;
+} __attribute__((preserve_access_index));
+
struct tcp_sock {
struct inet_connection_sock inet_conn;
__u32 rcv_nxt;
__u32 snd_nxt;
__u32 snd_una;
+ __u32 window_clamp;
__u8 ecn_flags;
__u32 delivered;
__u32 delivered_ce;
@@ -70,6 +85,7 @@ struct tcp_sock {
__u32 lsndtime;
__u32 prior_cwnd;
__u64 tcp_mstamp; /* most recent packet received/sent */
+ bool is_mptcp;
} __attribute__((preserve_access_index));
static __always_inline struct inet_connection_sock *inet_csk(const struct sock *sk)
@@ -115,14 +131,6 @@ enum tcp_ca_event {
CA_EVENT_ECN_IS_CE = 5,
};
-enum tcp_ca_state {
- TCP_CA_Open = 0,
- TCP_CA_Disorder = 1,
- TCP_CA_CWR = 2,
- TCP_CA_Recovery = 3,
- TCP_CA_Loss = 4
-};
-
struct ack_sample {
__u32 pkts_acked;
__s32 rtt_us;
@@ -179,6 +187,7 @@ struct tcp_congestion_ops {
* after all the ca_state processing. (optional)
*/
void (*cong_control)(struct sock *sk, const struct rate_sample *rs);
+ void *owner;
};
#define min(a, b) ((a) < (b) ? (a) : (b))
@@ -188,16 +197,6 @@ struct tcp_congestion_ops {
typeof(y) __y = (y); \
__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-static __always_inline __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked)
-{
- __u32 cwnd = min(tp->snd_cwnd + acked, tp->snd_ssthresh);
-
- acked -= cwnd - tp->snd_cwnd;
- tp->snd_cwnd = min(cwnd, tp->snd_cwnd_clamp);
-
- return acked;
-}
-
static __always_inline bool tcp_in_slow_start(const struct tcp_sock *tp)
{
return tp->snd_cwnd < tp->snd_ssthresh;
@@ -214,22 +213,29 @@ static __always_inline bool tcp_is_cwnd_limited(const struct sock *sk)
return !!BPF_CORE_READ_BITFIELD(tp, is_cwnd_limited);
}
-static __always_inline void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked)
+static __always_inline bool tcp_cc_eq(const char *a, const char *b)
{
- /* If credits accumulated at a higher w, apply them gently now. */
- if (tp->snd_cwnd_cnt >= w) {
- tp->snd_cwnd_cnt = 0;
- tp->snd_cwnd++;
- }
-
- tp->snd_cwnd_cnt += acked;
- if (tp->snd_cwnd_cnt >= w) {
- __u32 delta = tp->snd_cwnd_cnt / w;
+ int i;
- tp->snd_cwnd_cnt -= delta * w;
- tp->snd_cwnd += delta;
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
}
- tp->snd_cwnd = min(tp->snd_cwnd, tp->snd_cwnd_clamp);
+
+ return true;
}
+extern __u32 tcp_slow_start(struct tcp_sock *tp, __u32 acked) __ksym;
+extern void tcp_cong_avoid_ai(struct tcp_sock *tp, __u32 w, __u32 acked) __ksym;
+
+struct mptcp_sock {
+ struct inet_connection_sock sk;
+
+ __u32 token;
+ struct sock *first;
+ char ca_name[TCP_CA_NAME_MAX];
+} __attribute__((preserve_access_index));
+
#endif
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
new file mode 100644
index 000000000000..ed5143b79edf
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/Makefile
@@ -0,0 +1,19 @@
+BPF_TEST_NO_CFI_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TEST_NO_CFI_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_test_no_cfi.ko
+
+obj-m += bpf_test_no_cfi.o
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BPF_TEST_NO_CFI_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
new file mode 100644
index 000000000000..b1dd889d5d7d
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_test_no_cfi/bpf_test_no_cfi.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/init.h>
+#include <linux/module.h>
+
+struct bpf_test_no_cfi_ops {
+ void (*fn_1)(void);
+ void (*fn_2)(void);
+};
+
+static int dummy_init(struct btf *btf)
+{
+ return 0;
+}
+
+static int dummy_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ return 0;
+}
+
+static int dummy_reg(void *kdata)
+{
+ return 0;
+}
+
+static void dummy_unreg(void *kdata)
+{
+}
+
+static const struct bpf_verifier_ops dummy_verifier_ops;
+
+static void bpf_test_no_cfi_ops__fn_1(void)
+{
+}
+
+static void bpf_test_no_cfi_ops__fn_2(void)
+{
+}
+
+static struct bpf_test_no_cfi_ops __test_no_cif_ops = {
+ .fn_1 = bpf_test_no_cfi_ops__fn_1,
+ .fn_2 = bpf_test_no_cfi_ops__fn_2,
+};
+
+static struct bpf_struct_ops test_no_cif_ops = {
+ .verifier_ops = &dummy_verifier_ops,
+ .init = dummy_init,
+ .init_member = dummy_init_member,
+ .reg = dummy_reg,
+ .unreg = dummy_unreg,
+ .name = "bpf_test_no_cfi_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_test_no_cfi_init(void)
+{
+ int ret;
+
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ if (!ret)
+ return -EINVAL;
+
+ test_no_cif_ops.cfi_stubs = &__test_no_cif_ops;
+ ret = register_bpf_struct_ops(&test_no_cif_ops,
+ bpf_test_no_cfi_ops);
+ return ret;
+}
+
+static void bpf_test_no_cfi_exit(void)
+{
+}
+
+module_init(bpf_test_no_cfi_init);
+module_exit(bpf_test_no_cfi_exit);
+
+MODULE_AUTHOR("Kuifeng Lee");
+MODULE_DESCRIPTION("BPF no cfi_stubs test module");
+MODULE_LICENSE("Dual BSD/GPL");
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/.gitignore b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
new file mode 100644
index 000000000000..ded513777281
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/.gitignore
@@ -0,0 +1,6 @@
+*.mod
+*.mod.c
+*.o
+.ko
+/Module.symvers
+/modules.order
diff --git a/tools/testing/selftests/bpf/bpf_testmod/Makefile b/tools/testing/selftests/bpf/bpf_testmod/Makefile
new file mode 100644
index 000000000000..15cb36c4483a
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/Makefile
@@ -0,0 +1,20 @@
+BPF_TESTMOD_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= $(abspath $(BPF_TESTMOD_DIR)/../../../../..)
+
+ifeq ($(V),1)
+Q =
+else
+Q = @
+endif
+
+MODULES = bpf_testmod.ko
+
+obj-m += bpf_testmod.o
+CFLAGS_bpf_testmod.o = -I$(src)
+
+all:
+ +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) modules
+
+clean:
+ +$(Q)make -C $(KDIR) M=$(BPF_TESTMOD_DIR) clean
+
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
new file mode 100644
index 000000000000..11ee801e75e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod-events.h
@@ -0,0 +1,57 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM bpf_testmod
+
+#if !defined(_BPF_TESTMOD_EVENTS_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _BPF_TESTMOD_EVENTS_H
+
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+
+TRACE_EVENT(bpf_testmod_test_read,
+ TP_PROTO(struct task_struct *task, struct bpf_testmod_test_read_ctx *ctx),
+ TP_ARGS(task, ctx),
+ TP_STRUCT__entry(
+ __field(pid_t, pid)
+ __array(char, comm, TASK_COMM_LEN)
+ __field(loff_t, off)
+ __field(size_t, len)
+ ),
+ TP_fast_assign(
+ __entry->pid = task->pid;
+ memcpy(__entry->comm, task->comm, TASK_COMM_LEN);
+ __entry->off = ctx->off;
+ __entry->len = ctx->len;
+ ),
+ TP_printk("pid=%d comm=%s off=%llu len=%zu",
+ __entry->pid, __entry->comm, __entry->off, __entry->len)
+);
+
+/* A bare tracepoint with no event associated with it */
+DECLARE_TRACE(bpf_testmod_test_write_bare,
+ TP_PROTO(struct task_struct *task, struct bpf_testmod_test_write_ctx *ctx),
+ TP_ARGS(task, ctx)
+);
+
+#undef BPF_TESTMOD_DECLARE_TRACE
+#ifdef DECLARE_TRACE_WRITABLE
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+ DECLARE_TRACE_WRITABLE(call, PARAMS(proto), PARAMS(args), size)
+#else
+#define BPF_TESTMOD_DECLARE_TRACE(call, proto, args, size) \
+ DECLARE_TRACE(call, PARAMS(proto), PARAMS(args))
+#endif
+
+BPF_TESTMOD_DECLARE_TRACE(bpf_testmod_test_writable_bare,
+ TP_PROTO(struct bpf_testmod_test_writable_ctx *ctx),
+ TP_ARGS(ctx),
+ sizeof(struct bpf_testmod_test_writable_ctx)
+);
+
+#endif /* _BPF_TESTMOD_EVENTS_H */
+
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#define TRACE_INCLUDE_FILE bpf_testmod-events
+#include <trace/define_trace.h>
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
new file mode 100644
index 000000000000..39ad96a18123
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/btf_ids.h>
+#include <linux/delay.h>
+#include <linux/error-injection.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/percpu-defs.h>
+#include <linux/sysfs.h>
+#include <linux/tracepoint.h>
+#include "bpf_testmod.h"
+#include "bpf_testmod_kfunc.h"
+
+#define CREATE_TRACE_POINTS
+#include "bpf_testmod-events.h"
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+DEFINE_PER_CPU(int, bpf_testmod_ksym_percpu) = 123;
+long bpf_testmod_test_struct_arg_result;
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+struct bpf_testmod_struct_arg_2 {
+ long a;
+ long b;
+};
+
+struct bpf_testmod_struct_arg_3 {
+ int a;
+ int b[];
+};
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+__bpf_hook_start();
+
+noinline int
+bpf_testmod_test_struct_arg_1(struct bpf_testmod_struct_arg_2 a, int b, int c) {
+ bpf_testmod_test_struct_arg_result = a.a + a.b + b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_2(int a, struct bpf_testmod_struct_arg_2 b, int c) {
+ bpf_testmod_test_struct_arg_result = a + b.a + b.b + c;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_3(int a, int b, struct bpf_testmod_struct_arg_2 c) {
+ bpf_testmod_test_struct_arg_result = a + b + c.a + c.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_4(struct bpf_testmod_struct_arg_1 a, int b,
+ int c, int d, struct bpf_testmod_struct_arg_2 e) {
+ bpf_testmod_test_struct_arg_result = a.a + b + c + d + e.a + e.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_5(void) {
+ bpf_testmod_test_struct_arg_result = 1;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_6(struct bpf_testmod_struct_arg_3 *a) {
+ bpf_testmod_test_struct_arg_result = a->b[0];
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_7(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_struct_arg_8(u64 a, void *b, short c, int d, void *e,
+ struct bpf_testmod_struct_arg_4 f, int g)
+{
+ bpf_testmod_test_struct_arg_result = a + (long)b + c + d +
+ (long)e + f.a + f.b + g;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+noinline int
+bpf_testmod_test_arg_ptr_to_struct(struct bpf_testmod_struct_arg_1 *a) {
+ bpf_testmod_test_struct_arg_result = a->a;
+ return bpf_testmod_test_struct_arg_result;
+}
+
+__bpf_kfunc void
+bpf_testmod_test_mod_kfunc(int i)
+{
+ *(int *)this_cpu_ptr(&bpf_testmod_ksym_percpu) = i;
+}
+
+__bpf_kfunc int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt)
+{
+ if (cnt < 0) {
+ it->cnt = 0;
+ return -EINVAL;
+ }
+
+ it->value = value;
+ it->cnt = cnt;
+
+ return 0;
+}
+
+__bpf_kfunc s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq* it)
+{
+ if (it->cnt <= 0)
+ return NULL;
+
+ it->cnt--;
+
+ return &it->value;
+}
+
+__bpf_kfunc void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it)
+{
+ it->cnt = 0;
+}
+
+__bpf_kfunc void bpf_kfunc_common_test(void)
+{
+}
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 __user *p;
+};
+
+struct bpf_testmod_btf_type_tag_3 {
+ struct bpf_testmod_btf_type_tag_1 __percpu *p;
+};
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_1(struct bpf_testmod_btf_type_tag_1 __user *arg) {
+ BTF_TYPE_EMIT(func_proto_typedef);
+ BTF_TYPE_EMIT(func_proto_typedef_nested1);
+ BTF_TYPE_EMIT(func_proto_typedef_nested2);
+ return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_user_2(struct bpf_testmod_btf_type_tag_2 *arg) {
+ return arg->p->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_1(struct bpf_testmod_btf_type_tag_1 __percpu *arg) {
+ return arg->a;
+}
+
+noinline int
+bpf_testmod_test_btf_type_tag_percpu_2(struct bpf_testmod_btf_type_tag_3 *arg) {
+ return arg->p->a;
+}
+
+noinline int bpf_testmod_loop_test(int n)
+{
+ /* Make sum volatile, so smart compilers, such as clang, will not
+ * optimize the code by removing the loop.
+ */
+ volatile int sum = 0;
+ int i;
+
+ /* the primary goal of this test is to test LBR. Create a lot of
+ * branches in the function, so we can catch it easily.
+ */
+ for (i = 0; i < n; i++)
+ sum += i;
+ return sum;
+}
+
+__weak noinline struct file *bpf_testmod_return_ptr(int arg)
+{
+ static struct file f = {};
+
+ switch (arg) {
+ case 1: return (void *)EINVAL; /* user addr */
+ case 2: return (void *)0xcafe4a11; /* user addr */
+ case 3: return (void *)-EINVAL; /* canonical, but invalid */
+ case 4: return (void *)(1ull << 60); /* non-canonical and invalid */
+ case 5: return (void *)~(1ull << 30); /* trigger extable */
+ case 6: return &f; /* valid addr */
+ case 7: return (void *)((long)&f | 1); /* kernel tricks */
+ default: return NULL;
+ }
+}
+
+noinline int bpf_testmod_fentry_test1(int a)
+{
+ return a + 1;
+}
+
+noinline int bpf_testmod_fentry_test2(int a, u64 b)
+{
+ return a + b;
+}
+
+noinline int bpf_testmod_fentry_test3(char a, int b, u64 c)
+{
+ return a + b + c;
+}
+
+noinline int bpf_testmod_fentry_test7(u64 a, void *b, short c, int d,
+ void *e, char f, int g)
+{
+ return a + (long)b + c + d + (long)e + f + g;
+}
+
+noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d,
+ void *e, char f, int g,
+ unsigned int h, long i, __u64 j,
+ unsigned long k)
+{
+ return a + (long)b + c + d + (long)e + f + g + h + i + j + k;
+}
+
+int bpf_testmod_fentry_ok;
+
+noinline ssize_t
+bpf_testmod_test_read(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ struct bpf_testmod_test_read_ctx ctx = {
+ .buf = buf,
+ .off = off,
+ .len = len,
+ };
+ struct bpf_testmod_struct_arg_1 struct_arg1 = {10}, struct_arg1_2 = {-1};
+ struct bpf_testmod_struct_arg_2 struct_arg2 = {2, 3};
+ struct bpf_testmod_struct_arg_3 *struct_arg3;
+ struct bpf_testmod_struct_arg_4 struct_arg4 = {21, 22};
+ int i = 1;
+
+ while (bpf_testmod_return_ptr(i))
+ i++;
+
+ (void)bpf_testmod_test_struct_arg_1(struct_arg2, 1, 4);
+ (void)bpf_testmod_test_struct_arg_2(1, struct_arg2, 4);
+ (void)bpf_testmod_test_struct_arg_3(1, 4, struct_arg2);
+ (void)bpf_testmod_test_struct_arg_4(struct_arg1, 1, 2, 3, struct_arg2);
+ (void)bpf_testmod_test_struct_arg_5();
+ (void)bpf_testmod_test_struct_arg_7(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4);
+ (void)bpf_testmod_test_struct_arg_8(16, (void *)17, 18, 19,
+ (void *)20, struct_arg4, 23);
+
+ (void)bpf_testmod_test_arg_ptr_to_struct(&struct_arg1_2);
+
+ struct_arg3 = kmalloc((sizeof(struct bpf_testmod_struct_arg_3) +
+ sizeof(int)), GFP_KERNEL);
+ if (struct_arg3 != NULL) {
+ struct_arg3->b[0] = 1;
+ (void)bpf_testmod_test_struct_arg_6(struct_arg3);
+ kfree(struct_arg3);
+ }
+
+ /* This is always true. Use the check to make sure the compiler
+ * doesn't remove bpf_testmod_loop_test.
+ */
+ if (bpf_testmod_loop_test(101) > 100)
+ trace_bpf_testmod_test_read(current, &ctx);
+
+ /* Magic number to enable writable tp */
+ if (len == 64) {
+ struct bpf_testmod_test_writable_ctx writable = {
+ .val = 1024,
+ };
+ trace_bpf_testmod_test_writable_bare(&writable);
+ if (writable.early_ret)
+ return snprintf(buf, len, "%d\n", writable.val);
+ }
+
+ if (bpf_testmod_fentry_test1(1) != 2 ||
+ bpf_testmod_fentry_test2(2, 3) != 5 ||
+ bpf_testmod_fentry_test3(4, 5, 6) != 15 ||
+ bpf_testmod_fentry_test7(16, (void *)17, 18, 19, (void *)20,
+ 21, 22) != 133 ||
+ bpf_testmod_fentry_test11(16, (void *)17, 18, 19, (void *)20,
+ 21, 22, 23, 24, 25, 26) != 231)
+ goto out;
+
+ bpf_testmod_fentry_ok = 1;
+out:
+ return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_read);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_read, ERRNO);
+
+noinline ssize_t
+bpf_testmod_test_write(struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr,
+ char *buf, loff_t off, size_t len)
+{
+ struct bpf_testmod_test_write_ctx ctx = {
+ .buf = buf,
+ .off = off,
+ .len = len,
+ };
+
+ trace_bpf_testmod_test_write_bare(current, &ctx);
+
+ return -EIO; /* always fail */
+}
+EXPORT_SYMBOL(bpf_testmod_test_write);
+ALLOW_ERROR_INJECTION(bpf_testmod_test_write, ERRNO);
+
+noinline int bpf_fentry_shadow_test(int a)
+{
+ return a + 2;
+}
+EXPORT_SYMBOL_GPL(bpf_fentry_shadow_test);
+
+__bpf_hook_end();
+
+static struct bin_attribute bin_attr_bpf_testmod_file __ro_after_init = {
+ .attr = { .name = "bpf_testmod", .mode = 0666, },
+ .read = bpf_testmod_test_read,
+ .write = bpf_testmod_test_write,
+};
+
+BTF_KFUNCS_START(bpf_testmod_common_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_new, KF_ITER_NEW)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_next, KF_ITER_NEXT | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_iter_testmod_seq_destroy, KF_ITER_DESTROY)
+BTF_ID_FLAGS(func, bpf_kfunc_common_test)
+BTF_KFUNCS_END(bpf_testmod_common_kfunc_ids)
+
+static const struct btf_kfunc_id_set bpf_testmod_common_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_common_kfunc_ids,
+};
+
+__bpf_kfunc u64 bpf_kfunc_call_test1(struct sock *sk, u32 a, u64 b, u32 c, u64 d)
+{
+ return a + b + c + d;
+}
+
+__bpf_kfunc int bpf_kfunc_call_test2(struct sock *sk, u32 a, u32 b)
+{
+ return a + b;
+}
+
+__bpf_kfunc struct sock *bpf_kfunc_call_test3(struct sock *sk)
+{
+ return sk;
+}
+
+__bpf_kfunc long noinline bpf_kfunc_call_test4(signed char a, short b, int c, long d)
+{
+ /* Provoke the compiler to assume that the caller has sign-extended a,
+ * b and c on platforms where this is required (e.g. s390x).
+ */
+ return (long)a + (long)b + (long)c + d;
+}
+
+static struct prog_test_ref_kfunc prog_test_struct = {
+ .a = 42,
+ .b = 108,
+ .next = &prog_test_struct,
+ .cnt = REFCOUNT_INIT(1),
+};
+
+__bpf_kfunc struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr)
+{
+ refcount_inc(&prog_test_struct.cnt);
+ return &prog_test_struct;
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+__bpf_kfunc struct prog_test_member *
+bpf_kfunc_call_memb_acquire(void)
+{
+ WARN_ON_ONCE(1);
+ return NULL;
+}
+
+__bpf_kfunc void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p)
+{
+ WARN_ON_ONCE(1);
+}
+
+static int *__bpf_kfunc_call_test_get_mem(struct prog_test_ref_kfunc *p, const int size)
+{
+ if (size > 2 * sizeof(int))
+ return NULL;
+
+ return (int *)p;
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p,
+ const int rdwr_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdwr_buf_size);
+}
+
+__bpf_kfunc int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+/* the next 2 ones can't be really used for testing expect to ensure
+ * that the verifier rejects the call.
+ * Acquire functions must return struct pointers, so these ones are
+ * failing.
+ */
+__bpf_kfunc int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p,
+ const int rdonly_buf_size)
+{
+ return __bpf_kfunc_call_test_get_mem(p, rdonly_buf_size);
+}
+
+__bpf_kfunc void bpf_kfunc_call_int_mem_release(int *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_pass1(void *mem, int mem__sz)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_mem_len_fail2(u64 *mem, int len)
+{
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p)
+{
+ /* p != NULL, but p->cnt could be 0 */
+}
+
+__bpf_kfunc void bpf_kfunc_call_test_destructive(void)
+{
+}
+
+__bpf_kfunc static u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused)
+{
+ return arg;
+}
+
+BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids)
+BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test4)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_mem_len_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb_acquire, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_memb1_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdwr_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_get_rdonly_mem, KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_acq_rdonly_mem, KF_ACQUIRE | KF_RET_NULL)
+BTF_ID_FLAGS(func, bpf_kfunc_call_int_mem_release, KF_RELEASE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass_ctx)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_pass2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail1)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail2)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_fail3)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_ref, KF_TRUSTED_ARGS | KF_RCU)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_destructive, KF_DESTRUCTIVE)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg)
+BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset)
+BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids)
+
+static int bpf_testmod_ops_init(struct btf *btf)
+{
+ return 0;
+}
+
+static bool bpf_testmod_ops_is_valid_access(int off, int size,
+ enum bpf_access_type type,
+ const struct bpf_prog *prog,
+ struct bpf_insn_access_aux *info)
+{
+ return bpf_tracing_btf_ctx_access(off, size, type, prog, info);
+}
+
+static int bpf_testmod_ops_init_member(const struct btf_type *t,
+ const struct btf_member *member,
+ void *kdata, const void *udata)
+{
+ if (member->offset == offsetof(struct bpf_testmod_ops, data) * 8) {
+ /* For data fields, this function has to copy it and return
+ * 1 to indicate that the data has been handled by the
+ * struct_ops type, or the verifier will reject the map if
+ * the value of the data field is not zero.
+ */
+ ((struct bpf_testmod_ops *)kdata)->data = ((struct bpf_testmod_ops *)udata)->data;
+ return 1;
+ }
+ return 0;
+}
+
+static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = {
+ .owner = THIS_MODULE,
+ .set = &bpf_testmod_check_kfunc_ids,
+};
+
+static const struct bpf_verifier_ops bpf_testmod_verifier_ops = {
+ .is_valid_access = bpf_testmod_ops_is_valid_access,
+};
+
+static int bpf_dummy_reg(void *kdata)
+{
+ struct bpf_testmod_ops *ops = kdata;
+
+ if (ops->test_1)
+ ops->test_1();
+ /* Some test cases (ex. struct_ops_maybe_null) may not have test_2
+ * initialized, so we need to check for NULL.
+ */
+ if (ops->test_2)
+ ops->test_2(4, ops->data);
+
+ return 0;
+}
+
+static void bpf_dummy_unreg(void *kdata)
+{
+}
+
+static int bpf_testmod_test_1(void)
+{
+ return 0;
+}
+
+static void bpf_testmod_test_2(int a, int b)
+{
+}
+
+static int bpf_testmod_ops__test_maybe_null(int dummy,
+ struct task_struct *task__nullable)
+{
+ return 0;
+}
+
+static struct bpf_testmod_ops __bpf_testmod_ops = {
+ .test_1 = bpf_testmod_test_1,
+ .test_2 = bpf_testmod_test_2,
+ .test_maybe_null = bpf_testmod_ops__test_maybe_null,
+};
+
+struct bpf_struct_ops bpf_bpf_testmod_ops = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops,
+ .name = "bpf_testmod_ops",
+ .owner = THIS_MODULE,
+};
+
+static int bpf_dummy_reg2(void *kdata)
+{
+ struct bpf_testmod_ops2 *ops = kdata;
+
+ ops->test_1();
+ return 0;
+}
+
+static struct bpf_testmod_ops2 __bpf_testmod_ops2 = {
+ .test_1 = bpf_testmod_test_1,
+};
+
+struct bpf_struct_ops bpf_testmod_ops2 = {
+ .verifier_ops = &bpf_testmod_verifier_ops,
+ .init = bpf_testmod_ops_init,
+ .init_member = bpf_testmod_ops_init_member,
+ .reg = bpf_dummy_reg2,
+ .unreg = bpf_dummy_unreg,
+ .cfi_stubs = &__bpf_testmod_ops2,
+ .name = "bpf_testmod_ops2",
+ .owner = THIS_MODULE,
+};
+
+extern int bpf_fentry_test1(int a);
+
+static int bpf_testmod_init(void)
+{
+ int ret;
+
+ ret = register_btf_kfunc_id_set(BPF_PROG_TYPE_UNSPEC, &bpf_testmod_common_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set);
+ ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops);
+ ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2);
+ if (ret < 0)
+ return ret;
+ if (bpf_fentry_test1(0) < 0)
+ return -EINVAL;
+ return sysfs_create_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+static void bpf_testmod_exit(void)
+{
+ /* Need to wait for all references to be dropped because
+ * bpf_kfunc_call_test_release() which currently resides in kernel can
+ * be called after bpf_testmod is unloaded. Once release function is
+ * moved into the module this wait can be removed.
+ */
+ while (refcount_read(&prog_test_struct.cnt) > 1)
+ msleep(20);
+
+ sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file);
+}
+
+module_init(bpf_testmod_init);
+module_exit(bpf_testmod_exit);
+
+MODULE_AUTHOR("Andrii Nakryiko");
+MODULE_DESCRIPTION("BPF selftests module");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
new file mode 100644
index 000000000000..23fa1872ee67
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h
@@ -0,0 +1,96 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef _BPF_TESTMOD_H
+#define _BPF_TESTMOD_H
+
+#include <linux/types.h>
+
+struct task_struct;
+
+struct bpf_testmod_test_read_ctx {
+ char *buf;
+ loff_t off;
+ size_t len;
+};
+
+struct bpf_testmod_test_write_ctx {
+ char *buf;
+ loff_t off;
+ size_t len;
+};
+
+struct bpf_testmod_test_writable_ctx {
+ bool early_ret;
+ int val;
+};
+
+/* BPF iter that returns *value* *n* times in a row */
+struct bpf_iter_testmod_seq {
+ s64 value;
+ int cnt;
+};
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ /* Used to test nullable arguments. */
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+
+ /* The following fields are used to test shadow copies. */
+ char onebyte;
+ struct {
+ int a;
+ int b;
+ } unsupported;
+ int data;
+
+ /* The following pointers are used to test the maps having multiple
+ * pages of trampolines.
+ */
+ int (*tramp_1)(int value);
+ int (*tramp_2)(int value);
+ int (*tramp_3)(int value);
+ int (*tramp_4)(int value);
+ int (*tramp_5)(int value);
+ int (*tramp_6)(int value);
+ int (*tramp_7)(int value);
+ int (*tramp_8)(int value);
+ int (*tramp_9)(int value);
+ int (*tramp_10)(int value);
+ int (*tramp_11)(int value);
+ int (*tramp_12)(int value);
+ int (*tramp_13)(int value);
+ int (*tramp_14)(int value);
+ int (*tramp_15)(int value);
+ int (*tramp_16)(int value);
+ int (*tramp_17)(int value);
+ int (*tramp_18)(int value);
+ int (*tramp_19)(int value);
+ int (*tramp_20)(int value);
+ int (*tramp_21)(int value);
+ int (*tramp_22)(int value);
+ int (*tramp_23)(int value);
+ int (*tramp_24)(int value);
+ int (*tramp_25)(int value);
+ int (*tramp_26)(int value);
+ int (*tramp_27)(int value);
+ int (*tramp_28)(int value);
+ int (*tramp_29)(int value);
+ int (*tramp_30)(int value);
+ int (*tramp_31)(int value);
+ int (*tramp_32)(int value);
+ int (*tramp_33)(int value);
+ int (*tramp_34)(int value);
+ int (*tramp_35)(int value);
+ int (*tramp_36)(int value);
+ int (*tramp_37)(int value);
+ int (*tramp_38)(int value);
+ int (*tramp_39)(int value);
+ int (*tramp_40)(int value);
+};
+
+struct bpf_testmod_ops2 {
+ int (*test_1)(void);
+};
+
+#endif /* _BPF_TESTMOD_H */
diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
new file mode 100644
index 000000000000..7c664dd61059
--- /dev/null
+++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod_kfunc.h
@@ -0,0 +1,109 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_TESTMOD_KFUNC_H
+#define _BPF_TESTMOD_KFUNC_H
+
+#ifndef __KERNEL__
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#else
+#define __ksym
+struct prog_test_member1 {
+ int a;
+};
+
+struct prog_test_member {
+ struct prog_test_member1 m;
+ int c;
+};
+
+struct prog_test_ref_kfunc {
+ int a;
+ int b;
+ struct prog_test_member memb;
+ struct prog_test_ref_kfunc *next;
+ refcount_t cnt;
+};
+#endif
+
+struct prog_test_pass1 {
+ int x0;
+ struct {
+ int x1;
+ struct {
+ int x2;
+ struct {
+ int x3;
+ };
+ };
+ };
+};
+
+struct prog_test_pass2 {
+ int len;
+ short arr1[4];
+ struct {
+ char arr2[4];
+ unsigned long arr3[8];
+ } x;
+};
+
+struct prog_test_fail1 {
+ void *p;
+ int x;
+};
+
+struct prog_test_fail2 {
+ int x8;
+ struct prog_test_pass1 x;
+};
+
+struct prog_test_fail3 {
+ int len;
+ char arr1[2];
+ char arr2[];
+};
+
+struct prog_test_ref_kfunc *
+bpf_kfunc_call_test_acquire(unsigned long *scalar_ptr) __ksym;
+void bpf_kfunc_call_test_release(struct prog_test_ref_kfunc *p) __ksym;
+void bpf_kfunc_call_test_ref(struct prog_test_ref_kfunc *p) __ksym;
+
+void bpf_kfunc_call_test_mem_len_pass1(void *mem, int len) __ksym;
+int *bpf_kfunc_call_test_get_rdwr_mem(struct prog_test_ref_kfunc *p, const int rdwr_buf_size) __ksym;
+int *bpf_kfunc_call_test_get_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+int *bpf_kfunc_call_test_acq_rdonly_mem(struct prog_test_ref_kfunc *p, const int rdonly_buf_size) __ksym;
+void bpf_kfunc_call_int_mem_release(int *p) __ksym;
+
+/* The bpf_kfunc_call_test_static_unused_arg is defined as static,
+ * but bpf program compilation needs to see it as global symbol.
+ */
+#ifndef __KERNEL__
+u32 bpf_kfunc_call_test_static_unused_arg(u32 arg, u32 unused) __ksym;
+#endif
+
+void bpf_testmod_test_mod_kfunc(int i) __ksym;
+
+__u64 bpf_kfunc_call_test1(struct sock *sk, __u32 a, __u64 b,
+ __u32 c, __u64 d) __ksym;
+int bpf_kfunc_call_test2(struct sock *sk, __u32 a, __u32 b) __ksym;
+struct sock *bpf_kfunc_call_test3(struct sock *sk) __ksym;
+long bpf_kfunc_call_test4(signed char a, short b, int c, long d) __ksym;
+
+void bpf_kfunc_call_test_pass_ctx(struct __sk_buff *skb) __ksym;
+void bpf_kfunc_call_test_pass1(struct prog_test_pass1 *p) __ksym;
+void bpf_kfunc_call_test_pass2(struct prog_test_pass2 *p) __ksym;
+void bpf_kfunc_call_test_mem_len_fail2(__u64 *mem, int len) __ksym;
+
+void bpf_kfunc_call_test_destructive(void) __ksym;
+
+void bpf_kfunc_call_test_offset(struct prog_test_ref_kfunc *p);
+struct prog_test_member *bpf_kfunc_call_memb_acquire(void);
+void bpf_kfunc_call_memb1_release(struct prog_test_member1 *p);
+void bpf_kfunc_call_test_fail1(struct prog_test_fail1 *p);
+void bpf_kfunc_call_test_fail2(struct prog_test_fail2 *p);
+void bpf_kfunc_call_test_fail3(struct prog_test_fail3 *p);
+void bpf_kfunc_call_test_mem_len_fail1(void *mem, int len);
+
+void bpf_kfunc_common_test(void) __ksym;
+#endif /* _BPF_TESTMOD_KFUNC_H */
diff --git a/tools/testing/selftests/bpf/bpf_util.h b/tools/testing/selftests/bpf/bpf_util.h
index a3352a64c067..10587a29b967 100644
--- a/tools/testing/selftests/bpf/bpf_util.h
+++ b/tools/testing/selftests/bpf/bpf_util.h
@@ -20,6 +20,25 @@ static inline unsigned int bpf_num_possible_cpus(void)
return possible_cpus;
}
+/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst
+ * is zero-terminated string no matter what (unless sz == 0, in which case
+ * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs
+ * in what is returned. Given this is internal helper, it's trivial to extend
+ * this, when necessary. Use this instead of strncpy inside libbpf source code.
+ */
+static inline void bpf_strlcpy(char *dst, const char *src, size_t sz)
+{
+ size_t i;
+
+ if (sz == 0)
+ return;
+
+ sz--;
+ for (i = 0; i < sz && src[i]; i++)
+ dst[i] = src[i];
+ dst[i] = '\0';
+}
+
#define __bpf_percpu_val_align __attribute__((__aligned__(8)))
#define BPF_DECLARE_PERCPU(type, name) \
diff --git a/tools/testing/selftests/bpf/btf_helpers.c b/tools/testing/selftests/bpf/btf_helpers.c
new file mode 100644
index 000000000000..1c1c2c26690a
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.c
@@ -0,0 +1,292 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <stdio.h>
+#include <errno.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
+
+static const char * const btf_kind_str_mapping[] = {
+ [BTF_KIND_UNKN] = "UNKNOWN",
+ [BTF_KIND_INT] = "INT",
+ [BTF_KIND_PTR] = "PTR",
+ [BTF_KIND_ARRAY] = "ARRAY",
+ [BTF_KIND_STRUCT] = "STRUCT",
+ [BTF_KIND_UNION] = "UNION",
+ [BTF_KIND_ENUM] = "ENUM",
+ [BTF_KIND_FWD] = "FWD",
+ [BTF_KIND_TYPEDEF] = "TYPEDEF",
+ [BTF_KIND_VOLATILE] = "VOLATILE",
+ [BTF_KIND_CONST] = "CONST",
+ [BTF_KIND_RESTRICT] = "RESTRICT",
+ [BTF_KIND_FUNC] = "FUNC",
+ [BTF_KIND_FUNC_PROTO] = "FUNC_PROTO",
+ [BTF_KIND_VAR] = "VAR",
+ [BTF_KIND_DATASEC] = "DATASEC",
+ [BTF_KIND_FLOAT] = "FLOAT",
+ [BTF_KIND_DECL_TAG] = "DECL_TAG",
+ [BTF_KIND_TYPE_TAG] = "TYPE_TAG",
+ [BTF_KIND_ENUM64] = "ENUM64",
+};
+
+static const char *btf_kind_str(__u16 kind)
+{
+ if (kind > BTF_KIND_ENUM64)
+ return "UNKNOWN";
+ return btf_kind_str_mapping[kind];
+}
+
+static const char *btf_int_enc_str(__u8 encoding)
+{
+ switch (encoding) {
+ case 0:
+ return "(none)";
+ case BTF_INT_SIGNED:
+ return "SIGNED";
+ case BTF_INT_CHAR:
+ return "CHAR";
+ case BTF_INT_BOOL:
+ return "BOOL";
+ default:
+ return "UNKN";
+ }
+}
+
+static const char *btf_var_linkage_str(__u32 linkage)
+{
+ switch (linkage) {
+ case BTF_VAR_STATIC:
+ return "static";
+ case BTF_VAR_GLOBAL_ALLOCATED:
+ return "global-alloc";
+ default:
+ return "(unknown)";
+ }
+}
+
+static const char *btf_func_linkage_str(const struct btf_type *t)
+{
+ switch (btf_vlen(t)) {
+ case BTF_FUNC_STATIC:
+ return "static";
+ case BTF_FUNC_GLOBAL:
+ return "global";
+ case BTF_FUNC_EXTERN:
+ return "extern";
+ default:
+ return "(unknown)";
+ }
+}
+
+static const char *btf_str(const struct btf *btf, __u32 off)
+{
+ if (!off)
+ return "(anon)";
+ return btf__str_by_offset(btf, off) ?: "(invalid)";
+}
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id)
+{
+ const struct btf_type *t;
+ int kind, i;
+ __u32 vlen;
+
+ t = btf__type_by_id(btf, id);
+ if (!t)
+ return -EINVAL;
+
+ vlen = btf_vlen(t);
+ kind = btf_kind(t);
+
+ fprintf(out, "[%u] %s '%s'", id, btf_kind_str(kind), btf_str(btf, t->name_off));
+
+ switch (kind) {
+ case BTF_KIND_INT:
+ fprintf(out, " size=%u bits_offset=%u nr_bits=%u encoding=%s",
+ t->size, btf_int_offset(t), btf_int_bits(t),
+ btf_int_enc_str(btf_int_encoding(t)));
+ break;
+ case BTF_KIND_PTR:
+ case BTF_KIND_CONST:
+ case BTF_KIND_VOLATILE:
+ case BTF_KIND_RESTRICT:
+ case BTF_KIND_TYPEDEF:
+ case BTF_KIND_TYPE_TAG:
+ fprintf(out, " type_id=%u", t->type);
+ break;
+ case BTF_KIND_ARRAY: {
+ const struct btf_array *arr = btf_array(t);
+
+ fprintf(out, " type_id=%u index_type_id=%u nr_elems=%u",
+ arr->type, arr->index_type, arr->nelems);
+ break;
+ }
+ case BTF_KIND_STRUCT:
+ case BTF_KIND_UNION: {
+ const struct btf_member *m = btf_members(t);
+
+ fprintf(out, " size=%u vlen=%u", t->size, vlen);
+ for (i = 0; i < vlen; i++, m++) {
+ __u32 bit_off, bit_sz;
+
+ bit_off = btf_member_bit_offset(t, i);
+ bit_sz = btf_member_bitfield_size(t, i);
+ fprintf(out, "\n\t'%s' type_id=%u bits_offset=%u",
+ btf_str(btf, m->name_off), m->type, bit_off);
+ if (bit_sz)
+ fprintf(out, " bitfield_size=%u", bit_sz);
+ }
+ break;
+ }
+ case BTF_KIND_ENUM: {
+ const struct btf_enum *v = btf_enum(t);
+ const char *fmt_str;
+
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%d" : "\n\t'%s' val=%u";
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, fmt_str,
+ btf_str(btf, v->name_off), v->val);
+ }
+ break;
+ }
+ case BTF_KIND_ENUM64: {
+ const struct btf_enum64 *v = btf_enum64(t);
+ const char *fmt_str;
+
+ fmt_str = btf_kflag(t) ? "\n\t'%s' val=%lld" : "\n\t'%s' val=%llu";
+
+ fprintf(out, " encoding=%s size=%u vlen=%u",
+ btf_kflag(t) ? "SIGNED" : "UNSIGNED", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, fmt_str,
+ btf_str(btf, v->name_off),
+ ((__u64)v->val_hi32 << 32) | v->val_lo32);
+ }
+ break;
+ }
+ case BTF_KIND_FWD:
+ fprintf(out, " fwd_kind=%s", btf_kflag(t) ? "union" : "struct");
+ break;
+ case BTF_KIND_FUNC:
+ fprintf(out, " type_id=%u linkage=%s", t->type, btf_func_linkage_str(t));
+ break;
+ case BTF_KIND_FUNC_PROTO: {
+ const struct btf_param *p = btf_params(t);
+
+ fprintf(out, " ret_type_id=%u vlen=%u", t->type, vlen);
+ for (i = 0; i < vlen; i++, p++) {
+ fprintf(out, "\n\t'%s' type_id=%u",
+ btf_str(btf, p->name_off), p->type);
+ }
+ break;
+ }
+ case BTF_KIND_VAR:
+ fprintf(out, " type_id=%u, linkage=%s",
+ t->type, btf_var_linkage_str(btf_var(t)->linkage));
+ break;
+ case BTF_KIND_DATASEC: {
+ const struct btf_var_secinfo *v = btf_var_secinfos(t);
+
+ fprintf(out, " size=%u vlen=%u", t->size, vlen);
+ for (i = 0; i < vlen; i++, v++) {
+ fprintf(out, "\n\ttype_id=%u offset=%u size=%u",
+ v->type, v->offset, v->size);
+ }
+ break;
+ }
+ case BTF_KIND_FLOAT:
+ fprintf(out, " size=%u", t->size);
+ break;
+ case BTF_KIND_DECL_TAG:
+ fprintf(out, " type_id=%u component_idx=%d",
+ t->type, btf_decl_tag(t)->component_idx);
+ break;
+ default:
+ break;
+ }
+
+ return 0;
+}
+
+/* Print raw BTF type dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_raw_dump(const struct btf *btf, int type_id)
+{
+ static char buf[16 * 1024];
+ FILE *buf_file;
+
+ buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+ if (!buf_file) {
+ fprintf(stderr, "Failed to open memstream: %d\n", errno);
+ return NULL;
+ }
+
+ fprintf_btf_type_raw(buf_file, btf, type_id);
+ fflush(buf_file);
+ fclose(buf_file);
+
+ return buf;
+}
+
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[])
+{
+ int i;
+ bool ok = true;
+
+ ASSERT_EQ(btf__type_cnt(btf) - 1, nr_types, "btf_nr_types");
+
+ for (i = 1; i <= nr_types; i++) {
+ if (!ASSERT_STREQ(btf_type_raw_dump(btf, i), exp_types[i - 1], "raw_dump"))
+ ok = false;
+ }
+
+ return ok;
+}
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+ vfprintf(ctx, fmt, args);
+}
+
+/* Print BTF-to-C dump into a local buffer and return string pointer back.
+ * Buffer *will* be overwritten by subsequent btf_type_raw_dump() calls
+ */
+const char *btf_type_c_dump(const struct btf *btf)
+{
+ static char buf[16 * 1024];
+ FILE *buf_file;
+ struct btf_dump *d = NULL;
+ int err, i;
+
+ buf_file = fmemopen(buf, sizeof(buf) - 1, "w");
+ if (!buf_file) {
+ fprintf(stderr, "Failed to open memstream: %d\n", errno);
+ return NULL;
+ }
+
+ d = btf_dump__new(btf, btf_dump_printf, buf_file, NULL);
+ if (libbpf_get_error(d)) {
+ fprintf(stderr, "Failed to create btf_dump instance: %ld\n", libbpf_get_error(d));
+ goto err_out;
+ }
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ if (err) {
+ fprintf(stderr, "Failed to dump type [%d]: %d\n", i, err);
+ goto err_out;
+ }
+ }
+
+ btf_dump__free(d);
+ fflush(buf_file);
+ fclose(buf_file);
+ return buf;
+err_out:
+ btf_dump__free(d);
+ fclose(buf_file);
+ return NULL;
+}
diff --git a/tools/testing/selftests/bpf/btf_helpers.h b/tools/testing/selftests/bpf/btf_helpers.h
new file mode 100644
index 000000000000..295c0137d9bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/btf_helpers.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#ifndef __BTF_HELPERS_H
+#define __BTF_HELPERS_H
+
+#include <stdio.h>
+#include <bpf/btf.h>
+
+int fprintf_btf_type_raw(FILE *out, const struct btf *btf, __u32 id);
+const char *btf_type_raw_dump(const struct btf *btf, int type_id);
+int btf_validate_raw(struct btf *btf, int nr_types, const char *exp_types[]);
+
+#define VALIDATE_RAW_BTF(btf, raw_types...) \
+ btf_validate_raw(btf, \
+ sizeof((const char *[]){raw_types})/sizeof(void *),\
+ (const char *[]){raw_types})
+
+const char *btf_type_c_dump(const struct btf *btf);
+#endif
diff --git a/tools/testing/selftests/bpf/cap_helpers.c b/tools/testing/selftests/bpf/cap_helpers.c
new file mode 100644
index 000000000000..d5ac507401d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "cap_helpers.h"
+
+/* Avoid including <sys/capability.h> from the libcap-devel package,
+ * so directly declare them here and use them from glibc.
+ */
+int capget(cap_user_header_t header, cap_user_data_t data);
+int capset(cap_user_header_t header, const cap_user_data_t data);
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ __u32 cap0 = caps;
+ __u32 cap1 = caps >> 32;
+ int err;
+
+ err = capget(&hdr, data);
+ if (err)
+ return err;
+
+ if (old_caps)
+ *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+ if ((data[0].effective & cap0) == cap0 &&
+ (data[1].effective & cap1) == cap1)
+ return 0;
+
+ data[0].effective |= cap0;
+ data[1].effective |= cap1;
+ err = capset(&hdr, data);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+int cap_disable_effective(__u64 caps, __u64 *old_caps)
+{
+ struct __user_cap_data_struct data[_LINUX_CAPABILITY_U32S_3];
+ struct __user_cap_header_struct hdr = {
+ .version = _LINUX_CAPABILITY_VERSION_3,
+ };
+ __u32 cap0 = caps;
+ __u32 cap1 = caps >> 32;
+ int err;
+
+ err = capget(&hdr, data);
+ if (err)
+ return err;
+
+ if (old_caps)
+ *old_caps = (__u64)(data[1].effective) << 32 | data[0].effective;
+
+ if (!(data[0].effective & cap0) && !(data[1].effective & cap1))
+ return 0;
+
+ data[0].effective &= ~cap0;
+ data[1].effective &= ~cap1;
+ err = capset(&hdr, data);
+ if (err)
+ return err;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/cap_helpers.h b/tools/testing/selftests/bpf/cap_helpers.h
new file mode 100644
index 000000000000..6d163530cb0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/cap_helpers.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __CAP_HELPERS_H
+#define __CAP_HELPERS_H
+
+#include <linux/types.h>
+#include <linux/capability.h>
+
+#ifndef CAP_PERFMON
+#define CAP_PERFMON 38
+#endif
+
+#ifndef CAP_BPF
+#define CAP_BPF 39
+#endif
+
+int cap_enable_effective(__u64 caps, __u64 *old_caps);
+int cap_disable_effective(__u64 caps, __u64 *old_caps);
+
+#endif
diff --git a/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
new file mode 100644
index 000000000000..a525d3544fd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_getset_retval_hooks.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+BPF_RETVAL_HOOK(ingress, "cgroup_skb/ingress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(egress, "cgroup_skb/egress", __sk_buff, -EINVAL)
+BPF_RETVAL_HOOK(sock_create, "cgroup/sock_create", bpf_sock, 0)
+BPF_RETVAL_HOOK(sock_ops, "sockops", bpf_sock_ops, -EINVAL)
+BPF_RETVAL_HOOK(dev, "cgroup/dev", bpf_cgroup_dev_ctx, 0)
+BPF_RETVAL_HOOK(bind4, "cgroup/bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(bind6, "cgroup/bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect4, "cgroup/connect4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(connect6, "cgroup/connect6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind4, "cgroup/post_bind4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(post_bind6, "cgroup/post_bind6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg4, "cgroup/sendmsg4", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sendmsg6, "cgroup/sendmsg6", bpf_sock_addr, 0)
+BPF_RETVAL_HOOK(sysctl, "cgroup/sysctl", bpf_sysctl, 0)
+BPF_RETVAL_HOOK(recvmsg4, "cgroup/recvmsg4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(recvmsg6, "cgroup/recvmsg6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockopt, "cgroup/getsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(setsockopt, "cgroup/setsockopt", bpf_sockopt, 0)
+BPF_RETVAL_HOOK(getpeername4, "cgroup/getpeername4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getpeername6, "cgroup/getpeername6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname4, "cgroup/getsockname4", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(getsockname6, "cgroup/getsockname6", bpf_sock_addr, -EINVAL)
+BPF_RETVAL_HOOK(sock_release, "cgroup/sock_release", bpf_sock, 0)
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c
index 0fb910df5387..19be9c63d5e8 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.c
+++ b/tools/testing/selftests/bpf/cgroup_helpers.c
@@ -12,62 +12,81 @@
#include <unistd.h>
#include <ftw.h>
-
#include "cgroup_helpers.h"
+#include "bpf_util.h"
/*
* To avoid relying on the system setup, when setup_cgroup_env is called
- * we create a new mount namespace, and cgroup namespace. The cgroup2
- * root is mounted at CGROUP_MOUNT_PATH
- *
- * Unfortunately, most people don't have cgroupv2 enabled at this point in time.
- * It's easier to create our own mount namespace and manage it ourselves.
+ * we create a new mount namespace, and cgroup namespace. The cgroupv2
+ * root is mounted at CGROUP_MOUNT_PATH. Unfortunately, most people don't
+ * have cgroupv2 enabled at this point in time. It's easier to create our
+ * own mount namespace and manage it ourselves. We assume /mnt exists.
*
- * We assume /mnt exists.
+ * Related cgroupv1 helpers are named *classid*(), since we only use the
+ * net_cls controller for tagging net_cls.classid. We assume the default
+ * mount under /sys/fs/cgroup/net_cls, which should be the case for the
+ * vast majority of users.
*/
#define WALK_FD_LIMIT 16
+
#define CGROUP_MOUNT_PATH "/mnt"
+#define CGROUP_MOUNT_DFLT "/sys/fs/cgroup"
+#define NETCLS_MOUNT_PATH CGROUP_MOUNT_DFLT "/net_cls"
#define CGROUP_WORK_DIR "/cgroup-test-work-dir"
+
+#define format_cgroup_path_pid(buf, path, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d%s", CGROUP_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid, path)
+
#define format_cgroup_path(buf, path) \
- snprintf(buf, sizeof(buf), "%s%s%s", CGROUP_MOUNT_PATH, \
- CGROUP_WORK_DIR, path)
+ format_cgroup_path_pid(buf, path, getpid())
-/**
- * enable_all_controllers() - Enable all available cgroup v2 controllers
- *
- * Enable all available cgroup v2 controllers in order to increase
- * the code coverage.
- *
- * If successful, 0 is returned.
- */
-static int enable_all_controllers(char *cgroup_path)
+#define format_parent_cgroup_path(buf, path) \
+ format_cgroup_path_pid(buf, path, getppid())
+
+#define format_classid_path_pid(buf, pid) \
+ snprintf(buf, sizeof(buf), "%s%s%d", NETCLS_MOUNT_PATH, \
+ CGROUP_WORK_DIR, pid)
+
+#define format_classid_path(buf) \
+ format_classid_path_pid(buf, getpid())
+
+static __thread bool cgroup_workdir_mounted;
+
+static void __cleanup_cgroup_environment(void);
+
+static int __enable_controllers(const char *cgroup_path, const char *controllers)
{
char path[PATH_MAX + 1];
- char buf[PATH_MAX];
+ char enable[PATH_MAX + 1];
char *c, *c2;
int fd, cfd;
ssize_t len;
- snprintf(path, sizeof(path), "%s/cgroup.controllers", cgroup_path);
- fd = open(path, O_RDONLY);
- if (fd < 0) {
- log_err("Opening cgroup.controllers: %s", path);
- return 1;
- }
-
- len = read(fd, buf, sizeof(buf) - 1);
- if (len < 0) {
+ /* If not controllers are passed, enable all available controllers */
+ if (!controllers) {
+ snprintf(path, sizeof(path), "%s/cgroup.controllers",
+ cgroup_path);
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup.controllers: %s", path);
+ return 1;
+ }
+ len = read(fd, enable, sizeof(enable) - 1);
+ if (len < 0) {
+ close(fd);
+ log_err("Reading cgroup.controllers: %s", path);
+ return 1;
+ } else if (len == 0) { /* No controllers to enable */
+ close(fd);
+ return 0;
+ }
+ enable[len] = 0;
close(fd);
- log_err("Reading cgroup.controllers: %s", path);
- return 1;
+ } else {
+ bpf_strlcpy(enable, controllers, sizeof(enable));
}
- buf[len] = 0;
- close(fd);
-
- /* No controllers available? We're probably on cgroup v1. */
- if (len == 0)
- return 0;
snprintf(path, sizeof(path), "%s/cgroup.subtree_control", cgroup_path);
cfd = open(path, O_RDWR);
@@ -76,7 +95,7 @@ static int enable_all_controllers(char *cgroup_path)
return 1;
}
- for (c = strtok_r(buf, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
+ for (c = strtok_r(enable, " ", &c2); c; c = strtok_r(NULL, " ", &c2)) {
if (dprintf(cfd, "+%s\n", c) <= 0) {
log_err("Enabling controller %s: %s", c, path);
close(cfd);
@@ -88,6 +107,87 @@ static int enable_all_controllers(char *cgroup_path)
}
/**
+ * enable_controllers() - Enable cgroup v2 controllers
+ * @relative_path: The cgroup path, relative to the workdir
+ * @controllers: List of controllers to enable in cgroup.controllers format
+ *
+ *
+ * Enable given cgroup v2 controllers, if @controllers is NULL, enable all
+ * available controllers.
+ *
+ * If successful, 0 is returned.
+ */
+int enable_controllers(const char *relative_path, const char *controllers)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return __enable_controllers(cgroup_path, controllers);
+}
+
+static int __write_cgroup_file(const char *cgroup_path, const char *file,
+ const char *buf)
+{
+ char file_path[PATH_MAX + 1];
+ int fd;
+
+ snprintf(file_path, sizeof(file_path), "%s/%s", cgroup_path, file);
+ fd = open(file_path, O_RDWR);
+ if (fd < 0) {
+ log_err("Opening %s", file_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%s", buf) <= 0) {
+ log_err("Writing to %s", file_path);
+ close(fd);
+ return 1;
+ }
+ close(fd);
+ return 0;
+}
+
+/**
+ * write_cgroup_file() - Write to a cgroup file
+ * @relative_path: The cgroup path, relative to the workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file(const char *relative_path, const char *file,
+ const char *buf)
+{
+ char cgroup_path[PATH_MAX - 24];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return __write_cgroup_file(cgroup_path, file, buf);
+}
+
+/**
+ * write_cgroup_file_parent() - Write to a cgroup file in the parent process
+ * workdir
+ * @relative_path: The cgroup path, relative to the parent process workdir
+ * @file: The name of the file in cgroupfs to write to
+ * @buf: Buffer to write to the file
+ *
+ * Write to a file in the given cgroup's directory under the parent process
+ * workdir.
+ *
+ * If successful, 0 is returned.
+ */
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+ const char *buf)
+{
+ char cgroup_path[PATH_MAX - 24];
+
+ format_parent_cgroup_path(cgroup_path, relative_path);
+ return __write_cgroup_file(cgroup_path, file, buf);
+}
+
+/**
* setup_cgroup_environment() - Setup the cgroup environment
*
* After calling this function, cleanup_cgroup_environment should be called
@@ -102,6 +202,11 @@ int setup_cgroup_environment(void)
format_cgroup_path(cgroup_workdir, "");
+ if (mkdir(CGROUP_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir mount");
+ return 1;
+ }
+
if (unshare(CLONE_NEWNS)) {
log_err("unshare");
return 1;
@@ -116,16 +221,19 @@ int setup_cgroup_environment(void)
log_err("mount cgroup2");
return 1;
}
+ cgroup_workdir_mounted = true;
/* Cleanup existing failed runs, now that the environment is setup */
- cleanup_cgroup_environment();
+ __cleanup_cgroup_environment();
if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
log_err("mkdir cgroup work dir");
return 1;
}
- if (enable_all_controllers(cgroup_workdir))
+ /* Enable all available controllers to increase test coverage */
+ if (__enable_controllers(CGROUP_MOUNT_PATH, NULL) ||
+ __enable_controllers(cgroup_workdir, NULL))
return 1;
return 0;
@@ -139,8 +247,7 @@ static int nftwfunc(const char *filename, const struct stat *statptr,
return 0;
}
-
-static int join_cgroup_from_top(char *cgroup_path)
+static int join_cgroup_from_top(const char *cgroup_path)
{
char cgroup_procs_path[PATH_MAX + 1];
pid_t pid = getpid();
@@ -166,7 +273,7 @@ static int join_cgroup_from_top(char *cgroup_path)
/**
* join_cgroup() - Join a cgroup
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
*
* This function expects a cgroup to already be created, relative to the cgroup
* work dir, and it joins it. For example, passing "/my-cgroup" as the path
@@ -175,19 +282,62 @@ static int join_cgroup_from_top(char *cgroup_path)
*
* On success, it returns 0, otherwise on failure it returns 1.
*/
-int join_cgroup(const char *path)
+int join_cgroup(const char *relative_path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ return join_cgroup_from_top(cgroup_path);
+}
+
+/**
+ * join_root_cgroup() - Join the root cgroup
+ *
+ * This function joins the root cgroup.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_root_cgroup(void)
+{
+ return join_cgroup_from_top(CGROUP_MOUNT_PATH);
+}
+
+/**
+ * join_parent_cgroup() - Join a cgroup in the parent process workdir
+ * @relative_path: The cgroup path, relative to parent process workdir, to join
+ *
+ * See join_cgroup().
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_parent_cgroup(const char *relative_path)
{
char cgroup_path[PATH_MAX + 1];
- format_cgroup_path(cgroup_path, path);
+ format_parent_cgroup_path(cgroup_path, relative_path);
return join_cgroup_from_top(cgroup_path);
}
/**
+ * __cleanup_cgroup_environment() - Delete temporary cgroups
+ *
+ * This is a helper for cleanup_cgroup_environment() that is responsible for
+ * deletion of all temporary cgroups that have been created during the test.
+ */
+static void __cleanup_cgroup_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, "");
+ join_cgroup_from_top(CGROUP_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
* cleanup_cgroup_environment() - Cleanup Cgroup Testing Environment
*
* This is an idempotent function to delete all temporary cgroups that
- * have been created during the test, including the cgroup testing work
+ * have been created during the test and unmount the cgroup testing work
* directory.
*
* At call time, it moves the calling process to the root cgroup, and then
@@ -198,16 +348,52 @@ int join_cgroup(const char *path)
*/
void cleanup_cgroup_environment(void)
{
- char cgroup_workdir[PATH_MAX + 1];
+ __cleanup_cgroup_environment();
+ if (cgroup_workdir_mounted && umount(CGROUP_MOUNT_PATH))
+ log_err("umount cgroup2");
+ cgroup_workdir_mounted = false;
+}
- format_cgroup_path(cgroup_workdir, "");
- join_cgroup_from_top(CGROUP_MOUNT_PATH);
- nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+/**
+ * get_root_cgroup() - Get the FD of the root cgroup
+ *
+ * On success, it returns the file descriptor. On failure, it returns -1.
+ * If there is a failure, it prints the error to stderr.
+ */
+int get_root_cgroup(void)
+{
+ int fd;
+
+ fd = open(CGROUP_MOUNT_PATH, O_RDONLY);
+ if (fd < 0) {
+ log_err("Opening root cgroup");
+ return -1;
+ }
+ return fd;
+}
+
+/*
+ * remove_cgroup() - Remove a cgroup
+ * @relative_path: The cgroup path, relative to the workdir, to remove
+ *
+ * This function expects a cgroup to already be created, relative to the cgroup
+ * work dir. It also expects the cgroup doesn't have any children or live
+ * processes and it removes the cgroup.
+ *
+ * On failure, it will print an error to stderr.
+ */
+void remove_cgroup(const char *relative_path)
+{
+ char cgroup_path[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_path, relative_path);
+ if (rmdir(cgroup_path))
+ log_err("rmdiring cgroup %s .. %s", relative_path, cgroup_path);
}
/**
* create_and_get_cgroup() - Create a cgroup, relative to workdir, and get the FD
- * @path: The cgroup path, relative to the workdir, to join
+ * @relative_path: The cgroup path, relative to the workdir, to join
*
* This function creates a cgroup under the top level workdir and returns the
* file descriptor. It is idempotent.
@@ -215,14 +401,14 @@ void cleanup_cgroup_environment(void)
* On success, it returns the file descriptor. On failure it returns -1.
* If there is a failure, it prints the error to stderr.
*/
-int create_and_get_cgroup(const char *path)
+int create_and_get_cgroup(const char *relative_path)
{
char cgroup_path[PATH_MAX + 1];
int fd;
- format_cgroup_path(cgroup_path, path);
+ format_cgroup_path(cgroup_path, relative_path);
if (mkdir(cgroup_path, 0777) && errno != EEXIST) {
- log_err("mkdiring cgroup %s .. %s", path, cgroup_path);
+ log_err("mkdiring cgroup %s .. %s", relative_path, cgroup_path);
return -1;
}
@@ -236,26 +422,23 @@ int create_and_get_cgroup(const char *path)
}
/**
- * get_cgroup_id() - Get cgroup id for a particular cgroup path
- * @path: The cgroup path, relative to the workdir, to join
+ * get_cgroup_id_from_path - Get cgroup id for a particular cgroup path
+ * @cgroup_workdir: The absolute cgroup path
*
* On success, it returns the cgroup id. On failure it returns 0,
* which is an invalid cgroup id.
* If there is a failure, it prints the error to stderr.
*/
-unsigned long long get_cgroup_id(const char *path)
+unsigned long long get_cgroup_id_from_path(const char *cgroup_workdir)
{
int dirfd, err, flags, mount_id, fhsize;
union {
unsigned long long cgid;
unsigned char raw_bytes[8];
} id;
- char cgroup_workdir[PATH_MAX + 1];
struct file_handle *fhp, *fhp2;
unsigned long long ret = 0;
- format_cgroup_path(cgroup_workdir, path);
-
dirfd = AT_FDCWD;
flags = 0;
fhsize = sizeof(*fhp);
@@ -290,3 +473,235 @@ free_mem:
free(fhp);
return ret;
}
+
+unsigned long long get_cgroup_id(const char *relative_path)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_cgroup_path(cgroup_workdir, relative_path);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+int cgroup_setup_and_join(const char *path) {
+ int cg_fd;
+
+ if (setup_cgroup_environment()) {
+ fprintf(stderr, "Failed to setup cgroup environment\n");
+ return -EINVAL;
+ }
+
+ cg_fd = create_and_get_cgroup(path);
+ if (cg_fd < 0) {
+ fprintf(stderr, "Failed to create test cgroup\n");
+ cleanup_cgroup_environment();
+ return cg_fd;
+ }
+
+ if (join_cgroup(path)) {
+ fprintf(stderr, "Failed to join cgroup\n");
+ cleanup_cgroup_environment();
+ return -EINVAL;
+ }
+ return cg_fd;
+}
+
+/**
+ * setup_classid_environment() - Setup the cgroupv1 net_cls environment
+ *
+ * After calling this function, cleanup_classid_environment should be called
+ * once testing is complete.
+ *
+ * This function will print an error to stderr and return 1 if it is unable
+ * to setup the cgroup environment. If setup is successful, 0 is returned.
+ */
+int setup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+
+ if (mount("tmpfs", CGROUP_MOUNT_DFLT, "tmpfs", 0, NULL) &&
+ errno != EBUSY) {
+ log_err("mount cgroup base");
+ return 1;
+ }
+
+ if (mkdir(NETCLS_MOUNT_PATH, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup net_cls");
+ return 1;
+ }
+
+ if (mount("net_cls", NETCLS_MOUNT_PATH, "cgroup", 0, "net_cls")) {
+ if (errno != EBUSY) {
+ log_err("mount cgroup net_cls");
+ return 1;
+ }
+
+ if (rmdir(NETCLS_MOUNT_PATH)) {
+ log_err("rmdir cgroup net_cls");
+ return 1;
+ }
+ if (umount(CGROUP_MOUNT_DFLT)) {
+ log_err("umount cgroup base");
+ return 1;
+ }
+ }
+
+ cleanup_classid_environment();
+
+ if (mkdir(cgroup_workdir, 0777) && errno != EEXIST) {
+ log_err("mkdir cgroup work dir");
+ return 1;
+ }
+
+ return 0;
+}
+
+/**
+ * set_classid() - Set a cgroupv1 net_cls classid
+ *
+ * Writes the classid into the cgroup work dir's net_cls.classid
+ * file in order to later on trigger socket tagging.
+ *
+ * We leverage the current pid as the classid, ensuring unique identification.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1. If there
+ * is a failure, it prints the error to stderr.
+ */
+int set_classid(void)
+{
+ char cgroup_workdir[PATH_MAX - 42];
+ char cgroup_classid_path[PATH_MAX + 1];
+ int fd, rc = 0;
+
+ format_classid_path(cgroup_workdir);
+ snprintf(cgroup_classid_path, sizeof(cgroup_classid_path),
+ "%s/net_cls.classid", cgroup_workdir);
+
+ fd = open(cgroup_classid_path, O_WRONLY);
+ if (fd < 0) {
+ log_err("Opening cgroup classid: %s", cgroup_classid_path);
+ return 1;
+ }
+
+ if (dprintf(fd, "%u\n", getpid()) < 0) {
+ log_err("Setting cgroup classid");
+ rc = 1;
+ }
+
+ close(fd);
+ return rc;
+}
+
+/**
+ * join_classid() - Join a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * join it here. This causes the process sockets to be tagged with the given
+ * net_cls classid.
+ *
+ * On success, it returns 0, otherwise on failure it returns 1.
+ */
+int join_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return join_cgroup_from_top(cgroup_workdir);
+}
+
+/**
+ * cleanup_classid_environment() - Cleanup the cgroupv1 net_cls environment
+ *
+ * At call time, it moves the calling process to the root cgroup, and then
+ * runs the deletion process.
+ *
+ * On failure, it will print an error to stderr, and try to continue.
+ */
+void cleanup_classid_environment(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ join_cgroup_from_top(NETCLS_MOUNT_PATH);
+ nftw(cgroup_workdir, nftwfunc, WALK_FD_LIMIT, FTW_DEPTH | FTW_MOUNT);
+}
+
+/**
+ * get_classid_cgroup_id - Get the cgroup id of a net_cls cgroup
+ */
+unsigned long long get_classid_cgroup_id(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return get_cgroup_id_from_path(cgroup_workdir);
+}
+
+/**
+ * get_cgroup1_hierarchy_id - Retrieves the ID of a cgroup1 hierarchy from the cgroup1 subsys name.
+ * @subsys_name: The cgroup1 subsys name, which can be retrieved from /proc/self/cgroup. It can be
+ * a named cgroup like "name=systemd", a controller name like "net_cls", or multi-contollers like
+ * "net_cls,net_prio".
+ */
+int get_cgroup1_hierarchy_id(const char *subsys_name)
+{
+ char *c, *c2, *c3, *c4;
+ bool found = false;
+ char line[1024];
+ FILE *file;
+ int i, id;
+
+ if (!subsys_name)
+ return -1;
+
+ file = fopen("/proc/self/cgroup", "r");
+ if (!file) {
+ log_err("fopen /proc/self/cgroup");
+ return -1;
+ }
+
+ while (fgets(line, 1024, file)) {
+ i = 0;
+ for (c = strtok_r(line, ":", &c2); c && i < 2; c = strtok_r(NULL, ":", &c2)) {
+ if (i == 0) {
+ id = strtol(c, NULL, 10);
+ } else if (i == 1) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+
+ /* Multiple subsystems may share one single mount point */
+ for (c3 = strtok_r(c, ",", &c4); c3;
+ c3 = strtok_r(NULL, ",", &c4)) {
+ if (!strcmp(c, subsys_name)) {
+ found = true;
+ break;
+ }
+ }
+ }
+ i++;
+ }
+ if (found)
+ break;
+ }
+ fclose(file);
+ return found ? id : -1;
+}
+
+/**
+ * open_classid() - Open a cgroupv1 net_cls classid
+ *
+ * This function expects the cgroup work dir to be already created, as we
+ * open it here.
+ *
+ * On success, it returns the file descriptor. On failure it returns -1.
+ */
+int open_classid(void)
+{
+ char cgroup_workdir[PATH_MAX + 1];
+
+ format_classid_path(cgroup_workdir);
+ return open(cgroup_workdir, O_RDONLY);
+}
diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h
index d64bb8957090..502845160d88 100644
--- a/tools/testing/selftests/bpf/cgroup_helpers.h
+++ b/tools/testing/selftests/bpf/cgroup_helpers.h
@@ -1,6 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __CGROUP_HELPERS_H
#define __CGROUP_HELPERS_H
+
#include <errno.h>
#include <string.h>
@@ -8,11 +9,33 @@
#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+/* cgroupv2 related */
+int enable_controllers(const char *relative_path, const char *controllers);
+int write_cgroup_file(const char *relative_path, const char *file,
+ const char *buf);
+int write_cgroup_file_parent(const char *relative_path, const char *file,
+ const char *buf);
+int cgroup_setup_and_join(const char *relative_path);
+int get_root_cgroup(void);
+int create_and_get_cgroup(const char *relative_path);
+void remove_cgroup(const char *relative_path);
+unsigned long long get_cgroup_id(const char *relative_path);
+int get_cgroup1_hierarchy_id(const char *subsys_name);
+
+int join_cgroup(const char *relative_path);
+int join_root_cgroup(void);
+int join_parent_cgroup(const char *relative_path);
-int create_and_get_cgroup(const char *path);
-int join_cgroup(const char *path);
int setup_cgroup_environment(void);
void cleanup_cgroup_environment(void);
-unsigned long long get_cgroup_id(const char *path);
-#endif
+/* cgroupv1 related */
+int set_classid(void);
+int join_classid(void);
+unsigned long long get_classid_cgroup_id(void);
+int open_classid(void);
+
+int setup_classid_environment(void);
+void cleanup_classid_environment(void);
+
+#endif /* __CGROUP_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/cgroup_tcp_skb.h b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
new file mode 100644
index 000000000000..7f6b24f102fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/cgroup_tcp_skb.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+/* Define states of a socket to tracking messages sending to and from the
+ * socket.
+ *
+ * These states are based on rfc9293 with some modifications to support
+ * tracking of messages sent out from a socket. For example, when a SYN is
+ * received, a new socket is transiting to the SYN_RECV state defined in
+ * rfc9293. But, we put it in SYN_RECV_SENDING_SYN_ACK state and when
+ * SYN-ACK is sent out, it moves to SYN_RECV state. With this modification,
+ * we can track the message sent out from a socket.
+ */
+
+#ifndef __CGROUP_TCP_SKB_H__
+#define __CGROUP_TCP_SKB_H__
+
+enum {
+ INIT,
+ CLOSED,
+ SYN_SENT,
+ SYN_RECV_SENDING_SYN_ACK,
+ SYN_RECV,
+ ESTABLISHED,
+ FIN_WAIT1,
+ FIN_WAIT2,
+ CLOSE_WAIT_SENDING_ACK,
+ CLOSE_WAIT,
+ CLOSING,
+ LAST_ACK,
+ TIME_WAIT_SENDING_ACK,
+ TIME_WAIT,
+};
+
+#endif /* __CGROUP_TCP_SKB_H__ */
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 2118e23ac07a..01f241ea2c67 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -1,41 +1,90 @@
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTPARAM_SOFTLOCKUP_PANIC=y
CONFIG_BPF=y
-CONFIG_BPF_SYSCALL=y
-CONFIG_NET_CLS_BPF=m
CONFIG_BPF_EVENTS=y
-CONFIG_TEST_BPF=m
-CONFIG_CGROUP_BPF=y
-CONFIG_NETDEVSIM=m
-CONFIG_NET_CLS_ACT=y
-CONFIG_NET_SCHED=y
-CONFIG_NET_SCH_INGRESS=y
-CONFIG_NET_IPIP=y
-CONFIG_IPV6=y
-CONFIG_NET_IPGRE_DEMUX=y
-CONFIG_NET_IPGRE=y
-CONFIG_IPV6_GRE=y
-CONFIG_CRYPTO_USER_API_HASH=m
-CONFIG_CRYPTO_HMAC=m
-CONFIG_CRYPTO_SHA256=m
-CONFIG_VXLAN=y
-CONFIG_GENEVE=y
-CONFIG_NET_CLS_FLOWER=m
-CONFIG_LWTUNNEL=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_BPF_LIRC_MODE2=y
+CONFIG_BPF_LSM=y
CONFIG_BPF_STREAM_PARSER=y
-CONFIG_XDP_SOCKETS=y
+CONFIG_BPF_SYSCALL=y
+# CONFIG_BPF_UNPRIV_DEFAULT_OFF is not set
+CONFIG_CGROUP_BPF=y
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_SHA256=y
+CONFIG_CRYPTO_USER_API_HASH=y
+CONFIG_DEBUG_INFO=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DUMMY=y
+CONFIG_DYNAMIC_FTRACE=y
+CONFIG_FPROBE=y
CONFIG_FTRACE_SYSCALLS=y
-CONFIG_IPV6_TUNNEL=y
+CONFIG_FUNCTION_ERROR_INJECTION=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_FS_VERITY=y
+CONFIG_GENEVE=y
+CONFIG_IKCONFIG=y
+CONFIG_IKCONFIG_PROC=y
+CONFIG_IMA=y
+CONFIG_IMA_READ_POLICY=y
+CONFIG_IMA_WRITE_POLICY=y
+CONFIG_INET_ESP=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_RAW=y
+CONFIG_IP_NF_TARGET_SYNPROXY=y
+CONFIG_IPV6=y
+CONFIG_IPV6_FOU=y
+CONFIG_IPV6_FOU_TUNNEL=y
CONFIG_IPV6_GRE=y
CONFIG_IPV6_SEG6_BPF=y
-CONFIG_NET_FOU=m
-CONFIG_NET_FOU_IP_TUNNELS=y
-CONFIG_IPV6_FOU=m
-CONFIG_IPV6_FOU_TUNNEL=m
+CONFIG_IPV6_SIT=y
+CONFIG_IPV6_TUNNEL=y
+CONFIG_KEYS=y
+CONFIG_LIRC=y
+CONFIG_LWTUNNEL=y
+CONFIG_MODULE_SIG=y
+CONFIG_MODULE_SRCVERSION_ALL=y
+CONFIG_MODULE_UNLOAD=y
+CONFIG_MODULES=y
+CONFIG_MODVERSIONS=y
CONFIG_MPLS=y
-CONFIG_NET_MPLS_GSO=m
-CONFIG_MPLS_ROUTING=m
-CONFIG_MPLS_IPTUNNEL=m
-CONFIG_IPV6_SIT=m
-CONFIG_BPF_JIT=y
-CONFIG_BPF_LSM=y
+CONFIG_MPLS_IPTUNNEL=y
+CONFIG_MPLS_ROUTING=y
+CONFIG_MPTCP=y
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_BPF=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_NET_IPGRE=y
+CONFIG_NET_IPGRE_DEMUX=y
+CONFIG_NET_IPIP=y
+CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCHED=y
+CONFIG_NETDEVSIM=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_SYNPROXY=y
+CONFIG_NETFILTER_XT_CONNMARK=y
+CONFIG_NETFILTER_XT_MATCH_STATE=y
+CONFIG_NETFILTER_XT_TARGET_CT=y
+CONFIG_NETKIT=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_CONNTRACK_MARK=y
+CONFIG_NF_DEFRAG_IPV4=y
+CONFIG_NF_DEFRAG_IPV6=y
+CONFIG_NF_NAT=y
+CONFIG_RC_CORE=y
CONFIG_SECURITY=y
-CONFIG_LIRC=y
+CONFIG_SECURITYFS=y
+CONFIG_SYN_COOKIES=y
+CONFIG_TEST_BPF=m
+CONFIG_USERFAULTFD=y
+CONFIG_VSOCKETS=y
+CONFIG_VXLAN=y
+CONFIG_XDP_SOCKETS=y
+CONFIG_XFRM_INTERFACE=y
diff --git a/tools/testing/selftests/bpf/config.aarch64 b/tools/testing/selftests/bpf/config.aarch64
new file mode 100644
index 000000000000..3720b7611523
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.aarch64
@@ -0,0 +1,167 @@
+CONFIG_ARCH_VEXPRESS=y
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_ARM_SMMU_V3=y
+CONFIG_ATA=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_SD=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BRIDGE=m
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CHR_DEV_SG=y
+CONFIG_COMPAT=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_INFO_REDUCED=n
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DRM=y
+CONFIG_DUMMY=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FANOTIFY=y
+CONFIG_FB=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_CFG_SYSFS_CMDLINE=y
+CONFIG_FW_CFG_SYSFS=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET_ESP=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KPROBES=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MAILBOX=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NF_TABLES=y
+CONFIG_NLMON=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET_DIAG=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI_HOST_GENERIC=y
+CONFIG_PCI=y
+CONFIG_PL320_MBOX=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RTC_CLASS=y
+CONFIG_RTC_DRV_PL031=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_AUTOGROUP=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI_CONSTANTS=y
+CONFIG_SCSI_LOGGING=y
+CONFIG_SCSI_SCAN_ASYNC=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_SERIAL_AMBA_PL011=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCG_TIS=y
+CONFIG_TCG_TPM=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USELIB=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.s390x b/tools/testing/selftests/bpf/config.s390x
new file mode 100644
index 000000000000..706931a8c2c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.s390x
@@ -0,0 +1,137 @@
+CONFIG_ARCH_WANT_DEFAULT_BPF_JIT=y
+CONFIG_AUDIT=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_INITRD=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BONDING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT_DEFAULT_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
+CONFIG_CPUSETS=y
+CONFIG_CRASH_DUMP=y
+CONFIG_CRYPTO_USER_API_RNG=y
+CONFIG_CRYPTO_USER_API_SKCIPHER=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF4=y
+CONFIG_DEBUG_LIST=y
+CONFIG_DEBUG_LOCKDEP=y
+CONFIG_DEBUG_NOTIFIERS=y
+CONFIG_DEBUG_PAGEALLOC=y
+CONFIG_DEBUG_SECTION_MISMATCH=y
+CONFIG_DEBUG_SG=y
+CONFIG_DETECT_HUNG_TASK=y
+CONFIG_DEVTMPFS=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FANOTIFY=y
+CONFIG_FUNCTION_PROFILER=y
+CONFIG_GDB_SCRIPTS=y
+CONFIG_HAVE_EBPF_JIT=y
+CONFIG_HAVE_KPROBES=y
+CONFIG_HAVE_KPROBES_ON_FTRACE=y
+CONFIG_HAVE_KRETPROBES=y
+CONFIG_HAVE_MARCH_Z10_FEATURES=y
+CONFIG_HAVE_MARCH_Z196_FEATURES=y
+CONFIG_HEADERS_INSTALL=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HUGETLBFS=y
+CONFIG_HW_RANDOM=y
+CONFIG_HZ_100=y
+CONFIG_IDLE_PAGE_TRACKING=y
+CONFIG_IKHEADERS=y
+CONFIG_INET6_ESP=y
+CONFIG_INET=y
+CONFIG_INET_ESP=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPVLAN=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KERNEL_UNCOMPRESSED=y
+CONFIG_KPROBES=y
+CONFIG_KPROBES_ON_FTRACE=y
+CONFIG_KRETPROBES=y
+CONFIG_KSM=y
+CONFIG_LATENCYTOP=y
+CONFIG_LIVEPATCH=y
+CONFIG_LOCK_STAT=y
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MARCH_Z196=y
+CONFIG_MARCH_Z196_TUNE=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_HOTPLUG=y
+CONFIG_MEMORY_HOTREMOVE=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_KEY=y
+CONFIG_NET_SCH_FQ=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_TARGET_MARK=y
+CONFIG_NF_TABLES=y
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NR_CPUS=256
+CONFIG_NUMA=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_POSIX_MQUEUE=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTDUMP_DEBUGFS=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RT_GROUP_SCHED=y
+CONFIG_SAMPLE_SECCOMP=y
+CONFIG_SAMPLES=y
+CONFIG_SCHED_TRACER=y
+CONFIG_SCSI=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_STACK_TRACER=y
+CONFIG_STATIC_KEYS_SELFTEST=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_CONG_DCTCP=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRACER_SNAPSHOT_PER_CPU_SWAP=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UPROBES=y
+CONFIG_USELIB=y
+CONFIG_USER_NS=y
+CONFIG_VETH=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_XFRM_USER=y
diff --git a/tools/testing/selftests/bpf/config.vm b/tools/testing/selftests/bpf/config.vm
new file mode 100644
index 000000000000..a9746ca78777
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.vm
@@ -0,0 +1,12 @@
+CONFIG_9P_FS=y
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VIRTIO_VSOCKETS_COMMON=y
diff --git a/tools/testing/selftests/bpf/config.x86_64 b/tools/testing/selftests/bpf/config.x86_64
new file mode 100644
index 000000000000..5680befae8c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/config.x86_64
@@ -0,0 +1,233 @@
+CONFIG_AGP=y
+CONFIG_AGP_AMD64=y
+CONFIG_AGP_INTEL=y
+CONFIG_AGP_SIS=y
+CONFIG_AGP_VIA=y
+CONFIG_AMIGA_PARTITION=y
+CONFIG_AUDIT=y
+CONFIG_BACKLIGHT_CLASS_DEVICE=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BONDING=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CMA=y
+CONFIG_CMA_AREAS=7
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPUSETS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_BLAKE2B=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_DCB=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEFAULT_FQ_CODEL=y
+CONFIG_DEFAULT_RENO=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DMA_CMA=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_EFI=y
+CONFIG_EFI_STUB=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FB=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VESA=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_MINI_4x6=y
+CONFIG_FONTS=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_PHY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HID_A4TECH=y
+CONFIG_HID_BELKIN=y
+CONFIG_HID_CHERRY=y
+CONFIG_HID_CYPRESS=y
+CONFIG_HID_DRAGONRISE=y
+CONFIG_HID_EZKEY=y
+CONFIG_HID_GREENASIA=y
+CONFIG_HID_GYRATION=y
+CONFIG_HID_KENSINGTON=y
+CONFIG_HID_KYE=y
+CONFIG_HID_MICROSOFT=y
+CONFIG_HID_MONTEREY=y
+CONFIG_HID_PANTHERLORD=y
+CONFIG_HID_PETALYNX=y
+CONFIG_HID_SMARTJOYPLUS=y
+CONFIG_HID_SUNPLUS=y
+CONFIG_HID_TOPSEED=y
+CONFIG_HID_TWINHAN=y
+CONFIG_HID_ZEROPLUS=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPET=y
+CONFIG_HUGETLBFS=y
+CONFIG_HWPOISON_INJECT=y
+CONFIG_HZ_1000=y
+CONFIG_INET=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_INTEL_POWERCLAMP=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IRQ_POLL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_KEXEC=y
+CONFIG_KPROBES=y
+CONFIG_KSM=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_LOG_BUF_SHIFT=21
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=0
+CONFIG_LOGO=y
+CONFIG_LSM="selinux,bpf,integrity"
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MCORE2=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_NAMESPACES=y
+CONFIG_NET=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NET_EMATCH=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_VRF=y
+CONFIG_NETDEVICES=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETLABEL=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=128
+CONFIG_NUMA=y
+CONFIG_NUMA_BALANCING=y
+CONFIG_NVMEM=y
+CONFIG_OSF_PARTITION=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCI=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_MSI=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_PREEMPT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SMP=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYNC_FILE=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASK_XACCT=y
+CONFIG_TASKSTATS=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TLS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_TUN=y
+CONFIG_UNIX=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_USER_NS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_VETH=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VLAN_8021Q=y
+CONFIG_VSOCKETS=y
+CONFIG_VSOCKETS_LOOPBACK=y
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_MSR=y
+CONFIG_X86_POWERNOW_K8=y
+CONFIG_XDP_SOCKETS_DIAG=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZEROPLUS_FF=y
diff --git a/tools/testing/selftests/bpf/disasm.c b/tools/testing/selftests/bpf/disasm.c
new file mode 120000
index 000000000000..b1571927bd54
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.c
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/disasm.h b/tools/testing/selftests/bpf/disasm.h
new file mode 120000
index 000000000000..8054fd497340
--- /dev/null
+++ b/tools/testing/selftests/bpf/disasm.h
@@ -0,0 +1 @@
+../../../../kernel/bpf/disasm.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.c b/tools/testing/selftests/bpf/flow_dissector_load.c
index 3fd83b9dc1bf..c8be6406777f 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.c
+++ b/tools/testing/selftests/bpf/flow_dissector_load.c
@@ -11,13 +11,12 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "flow_dissector_load.h"
const char *cfg_pin_path = "/sys/fs/bpf/flow_dissector";
const char *cfg_map_name = "jmp_table";
bool cfg_attach = true;
-char *cfg_section_name;
+char *cfg_prog_name;
char *cfg_path_name;
static void load_and_attach_program(void)
@@ -25,7 +24,10 @@ static void load_and_attach_program(void)
int prog_fd, ret;
struct bpf_object *obj;
- ret = bpf_flow_load(&obj, cfg_path_name, cfg_section_name,
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ ret = bpf_flow_load(&obj, cfg_path_name, cfg_prog_name,
cfg_map_name, NULL, &prog_fd, NULL);
if (ret)
error(1, 0, "bpf_flow_load %s", cfg_path_name);
@@ -75,15 +77,15 @@ static void parse_opts(int argc, char **argv)
break;
case 'p':
if (cfg_path_name)
- error(1, 0, "only one prog name can be given");
+ error(1, 0, "only one path can be given");
cfg_path_name = optarg;
break;
case 's':
- if (cfg_section_name)
- error(1, 0, "only one section can be given");
+ if (cfg_prog_name)
+ error(1, 0, "only one prog can be given");
- cfg_section_name = optarg;
+ cfg_prog_name = optarg;
break;
}
}
@@ -94,7 +96,7 @@ static void parse_opts(int argc, char **argv)
if (cfg_attach && !cfg_path_name)
error(1, 0, "must provide a path to the BPF program");
- if (cfg_attach && !cfg_section_name)
+ if (cfg_attach && !cfg_prog_name)
error(1, 0, "must provide a section name");
}
diff --git a/tools/testing/selftests/bpf/flow_dissector_load.h b/tools/testing/selftests/bpf/flow_dissector_load.h
index daeaeb518894..f40b585f4e7e 100644
--- a/tools/testing/selftests/bpf/flow_dissector_load.h
+++ b/tools/testing/selftests/bpf/flow_dissector_load.h
@@ -4,10 +4,11 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
static inline int bpf_flow_load(struct bpf_object **obj,
const char *path,
- const char *section_name,
+ const char *prog_name,
const char *map_name,
const char *keys_map_name,
int *prog_fd,
@@ -18,12 +19,12 @@ static inline int bpf_flow_load(struct bpf_object **obj,
int prog_array_fd;
int ret, fd, i;
- ret = bpf_prog_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
+ ret = bpf_prog_test_load(path, BPF_PROG_TYPE_FLOW_DISSECTOR, obj,
prog_fd);
if (ret)
return ret;
- main_prog = bpf_object__find_program_by_title(*obj, section_name);
+ main_prog = bpf_object__find_program_by_name(*obj, prog_name);
if (!main_prog)
return -1;
diff --git a/tools/testing/selftests/bpf/generate_udp_fragments.py b/tools/testing/selftests/bpf/generate_udp_fragments.py
new file mode 100755
index 000000000000..2b8a1187991c
--- /dev/null
+++ b/tools/testing/selftests/bpf/generate_udp_fragments.py
@@ -0,0 +1,90 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+"""
+This script helps generate fragmented UDP packets.
+
+While it is technically possible to dynamically generate
+fragmented packets in C, it is much harder to read and write
+said code. `scapy` is relatively industry standard and really
+easy to read / write.
+
+So we choose to write this script that generates a valid C
+header. Rerun script and commit generated file after any
+modifications.
+"""
+
+import argparse
+import os
+
+from scapy.all import *
+
+
+# These constants must stay in sync with `ip_check_defrag.c`
+VETH1_ADDR = "172.16.1.200"
+VETH0_ADDR6 = "fc00::100"
+VETH1_ADDR6 = "fc00::200"
+CLIENT_PORT = 48878
+SERVER_PORT = 48879
+MAGIC_MESSAGE = "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME"
+
+
+def print_header(f):
+ f.write("// SPDX-License-Identifier: GPL-2.0\n")
+ f.write("/* DO NOT EDIT -- this file is generated */\n")
+ f.write("\n")
+ f.write("#ifndef _IP_CHECK_DEFRAG_FRAGS_H\n")
+ f.write("#define _IP_CHECK_DEFRAG_FRAGS_H\n")
+ f.write("\n")
+ f.write("#include <stdint.h>\n")
+ f.write("\n")
+
+
+def print_frags(f, frags, v6):
+ for idx, frag in enumerate(frags):
+ # 10 bytes per line to keep width in check
+ chunks = [frag[i : i + 10] for i in range(0, len(frag), 10)]
+ chunks_fmted = [", ".join([str(hex(b)) for b in chunk]) for chunk in chunks]
+ suffix = "6" if v6 else ""
+
+ f.write(f"static uint8_t frag{suffix}_{idx}[] = {{\n")
+ for chunk in chunks_fmted:
+ f.write(f"\t{chunk},\n")
+ f.write(f"}};\n")
+
+
+def print_trailer(f):
+ f.write("\n")
+ f.write("#endif /* _IP_CHECK_DEFRAG_FRAGS_H */\n")
+
+
+def main(f):
+ # srcip of 0 is filled in by IP_HDRINCL
+ sip = "0.0.0.0"
+ sip6 = VETH0_ADDR6
+ dip = VETH1_ADDR
+ dip6 = VETH1_ADDR6
+ sport = CLIENT_PORT
+ dport = SERVER_PORT
+ payload = MAGIC_MESSAGE.encode()
+
+ # Disable UDPv4 checksums to keep code simpler
+ pkt = IP(src=sip,dst=dip) / UDP(sport=sport,dport=dport,chksum=0) / Raw(load=payload)
+ # UDPv6 requires a checksum
+ # Also pin the ipv6 fragment header ID, otherwise it's a random value
+ pkt6 = IPv6(src=sip6,dst=dip6) / IPv6ExtHdrFragment(id=0xBEEF) / UDP(sport=sport,dport=dport) / Raw(load=payload)
+
+ frags = [f.build() for f in pkt.fragment(24)]
+ frags6 = [f.build() for f in fragment6(pkt6, 72)]
+
+ print_header(f)
+ print_frags(f, frags, False)
+ print_frags(f, frags6, True)
+ print_trailer(f)
+
+
+if __name__ == "__main__":
+ dir = os.path.dirname(os.path.realpath(__file__))
+ header = f"{dir}/ip_check_defrag_frags.h"
+ with open(header, "w") as f:
+ main(f)
diff --git a/tools/testing/selftests/bpf/get_cgroup_id_user.c b/tools/testing/selftests/bpf/get_cgroup_id_user.c
index e8da7b39158d..aefd83ebdcd7 100644
--- a/tools/testing/selftests/bpf/get_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/get_cgroup_id_user.c
@@ -19,7 +19,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
+#include "testing_helpers.h"
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -48,7 +48,7 @@ static int bpf_find_map(const char *test, struct bpf_object *obj,
int main(int argc, char **argv)
{
const char *probe_name = "syscalls/sys_enter_nanosleep";
- const char *file = "get_cgroup_id_kern.o";
+ const char *file = "get_cgroup_id_kern.bpf.o";
int err, bytes, efd, prog_fd, pmu_fd;
int cgroup_fd, cgidmap_fd, pidmap_fd;
struct perf_event_attr attr = {};
@@ -57,23 +57,20 @@ int main(int argc, char **argv)
__u32 key = 0, pid;
int exit_code = 1;
char buf[256];
+ const struct timespec req = {
+ .tv_sec = 1,
+ .tv_nsec = 0,
+ };
- err = setup_cgroup_environment();
- if (CHECK(err, "setup_cgroup_environment", "err %d errno %d\n", err,
- errno))
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
+ if (CHECK(cgroup_fd < 0, "cgroup_setup_and_join", "err %d errno %d\n", cgroup_fd, errno))
return 1;
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (CHECK(cgroup_fd < 0, "create_and_get_cgroup", "err %d errno %d\n",
- cgroup_fd, errno))
- goto cleanup_cgroup_env;
-
- err = join_cgroup(TEST_CGROUP);
- if (CHECK(err, "join_cgroup", "err %d errno %d\n", err, errno))
- goto cleanup_cgroup_env;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto cleanup_cgroup_env;
cgidmap_fd = bpf_find_map(__func__, obj, "cg_ids");
@@ -89,8 +86,13 @@ int main(int argc, char **argv)
pid = getpid();
bpf_map_update_elem(pidmap_fd, &key, &pid, 0);
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/%s/id", probe_name);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
@@ -125,7 +127,7 @@ int main(int argc, char **argv)
goto close_pmu;
/* trigger some syscalls */
- sleep(1);
+ syscall(__NR_nanosleep, &req, NULL);
err = bpf_map_lookup_elem(cgidmap_fd, &key, &kcgid);
if (CHECK(err, "bpf_map_lookup_elem", "err %d errno %d\n", err, errno))
diff --git a/tools/testing/selftests/bpf/gnu/stubs.h b/tools/testing/selftests/bpf/gnu/stubs.h
index 719225b16626..1c638d9dce1a 100644
--- a/tools/testing/selftests/bpf/gnu/stubs.h
+++ b/tools/testing/selftests/bpf/gnu/stubs.h
@@ -1 +1 @@
-/* dummy .h to trick /usr/include/features.h to work with 'clang -target bpf' */
+/* dummy .h to trick /usr/include/features.h to work with 'clang --target=bpf' */
diff --git a/tools/testing/selftests/bpf/ima_setup.sh b/tools/testing/selftests/bpf/ima_setup.sh
new file mode 100755
index 000000000000..8ecead4ccad0
--- /dev/null
+++ b/tools/testing/selftests/bpf/ima_setup.sh
@@ -0,0 +1,156 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -o pipefail
+
+IMA_POLICY_FILE="/sys/kernel/security/ima/policy"
+TEST_BINARY="/bin/true"
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/ima_setup.XXXX.log)"
+
+usage()
+{
+ echo "Usage: $0 <setup|cleanup|run|modify-bin|restore-bin|load-policy> <existing_tmp_dir>"
+ exit 1
+}
+
+ensure_mount_securityfs()
+{
+ local securityfs_dir=$(grep "securityfs" /proc/mounts | awk '{print $2}')
+
+ if [ -z "${securityfs_dir}" ]; then
+ securityfs_dir=/sys/kernel/security
+ mount -t securityfs security "${securityfs_dir}"
+ fi
+
+ if [ ! -d "${securityfs_dir}" ]; then
+ echo "${securityfs_dir}: securityfs is not mounted" && exit 1
+ fi
+}
+
+setup()
+{
+ local tmp_dir="$1"
+ local mount_img="${tmp_dir}/test.img"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+ mkdir -p ${mount_dir}
+
+ dd if=/dev/zero of="${mount_img}" bs=1M count=10
+
+ losetup -f "${mount_img}"
+ local loop_device=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+
+ mkfs.ext2 "${loop_device:?}"
+ mount "${loop_device}" "${mount_dir}"
+
+ cp "${TEST_BINARY}" "${mount_dir}"
+ local mount_uuid="$(blkid ${loop_device} | sed 's/.*UUID="\([^"]*\)".*/\1/')"
+
+ ensure_mount_securityfs
+ echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${IMA_POLICY_FILE}
+ echo "measure func=BPRM_CHECK fsuuid=${mount_uuid}" > ${mount_dir}/policy_test
+}
+
+cleanup() {
+ local tmp_dir="$1"
+ local mount_img="${tmp_dir}/test.img"
+ local mount_dir="${tmp_dir}/mnt"
+
+ local loop_devices=$(losetup -a | grep ${mount_img:?} | cut -d ":" -f1)
+
+ for loop_dev in "${loop_devices}"; do
+ losetup -d $loop_dev
+ done
+
+ umount ${mount_dir}
+ rm -rf ${tmp_dir}
+}
+
+run()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+ exec "${copied_bin_path}"
+}
+
+modify_bin()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+ echo "mod" >> "${copied_bin_path}"
+}
+
+restore_bin()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+ local copied_bin_path="${mount_dir}/$(basename ${TEST_BINARY})"
+
+ truncate -s -4 "${copied_bin_path}"
+}
+
+load_policy()
+{
+ local tmp_dir="$1"
+ local mount_dir="${tmp_dir}/mnt"
+
+ echo ${mount_dir}/policy_test > ${IMA_POLICY_FILE} 2> /dev/null
+}
+
+catch()
+{
+ local exit_code="$1"
+ local log_file="$2"
+
+ if [[ "${exit_code}" -ne 0 ]]; then
+ cat "${log_file}" >&3
+ fi
+
+ rm -f "${log_file}"
+ exit ${exit_code}
+}
+
+main()
+{
+ [[ $# -ne 2 ]] && usage
+
+ local action="$1"
+ local tmp_dir="$2"
+
+ [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+ if [[ "${action}" == "setup" ]]; then
+ setup "${tmp_dir}"
+ elif [[ "${action}" == "cleanup" ]]; then
+ cleanup "${tmp_dir}"
+ elif [[ "${action}" == "run" ]]; then
+ run "${tmp_dir}"
+ elif [[ "${action}" == "modify-bin" ]]; then
+ modify_bin "${tmp_dir}"
+ elif [[ "${action}" == "restore-bin" ]]; then
+ restore_bin "${tmp_dir}"
+ elif [[ "${action}" == "load-policy" ]]; then
+ load_policy "${tmp_dir}"
+ else
+ echo "Unknown action: ${action}"
+ exit 1
+ fi
+}
+
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+ # Save the stderr to 3 so that we can output back to
+ # it incase of an error.
+ exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
+main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/ip_check_defrag_frags.h b/tools/testing/selftests/bpf/ip_check_defrag_frags.h
new file mode 100644
index 000000000000..70ab7e9fa22b
--- /dev/null
+++ b/tools/testing/selftests/bpf/ip_check_defrag_frags.h
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* DO NOT EDIT -- this file is generated */
+
+#ifndef _IP_CHECK_DEFRAG_FRAGS_H
+#define _IP_CHECK_DEFRAG_FRAGS_H
+
+#include <stdint.h>
+
+static uint8_t frag_0[] = {
+ 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x0, 0x40, 0x11,
+ 0xac, 0xe8, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0xbe, 0xee, 0xbe, 0xef, 0x0, 0x3a, 0x0, 0x0, 0x54, 0x48,
+ 0x49, 0x53, 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20,
+ 0x4f, 0x52, 0x49, 0x47,
+};
+static uint8_t frag_1[] = {
+ 0x45, 0x0, 0x0, 0x2c, 0x0, 0x1, 0x20, 0x3, 0x40, 0x11,
+ 0xac, 0xe5, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0x49, 0x4e, 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41,
+ 0x47, 0x45, 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45,
+ 0x20, 0x52, 0x45, 0x41,
+};
+static uint8_t frag_2[] = {
+ 0x45, 0x0, 0x0, 0x1e, 0x0, 0x1, 0x0, 0x6, 0x40, 0x11,
+ 0xcc, 0xf0, 0x0, 0x0, 0x0, 0x0, 0xac, 0x10, 0x1, 0xc8,
+ 0x53, 0x53, 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45,
+};
+static uint8_t frag6_0[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x1, 0x0, 0x0, 0xbe, 0xef, 0xbe, 0xee,
+ 0xbe, 0xef, 0x0, 0x3a, 0xd0, 0xf8, 0x54, 0x48, 0x49, 0x53,
+ 0x20, 0x49, 0x53, 0x20, 0x54, 0x48, 0x45, 0x20, 0x4f, 0x52,
+ 0x49, 0x47,
+};
+static uint8_t frag6_1[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x20, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x19, 0x0, 0x0, 0xbe, 0xef, 0x49, 0x4e,
+ 0x41, 0x4c, 0x20, 0x4d, 0x45, 0x53, 0x53, 0x41, 0x47, 0x45,
+ 0x2c, 0x20, 0x50, 0x4c, 0x45, 0x41, 0x53, 0x45, 0x20, 0x52,
+ 0x45, 0x41,
+};
+static uint8_t frag6_2[] = {
+ 0x60, 0x0, 0x0, 0x0, 0x0, 0x12, 0x2c, 0x40, 0xfc, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x1, 0x0, 0xfc, 0x0, 0x0, 0x0, 0x0, 0x0,
+ 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x2, 0x0,
+ 0x11, 0x0, 0x0, 0x30, 0x0, 0x0, 0xbe, 0xef, 0x53, 0x53,
+ 0x45, 0x4d, 0x42, 0x4c, 0x45, 0x20, 0x4d, 0x45,
+};
+
+#endif /* _IP_CHECK_DEFRAG_FRAGS_H */
diff --git a/tools/testing/selftests/bpf/json_writer.c b/tools/testing/selftests/bpf/json_writer.c
new file mode 120000
index 000000000000..5effa31e2f39
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.c
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.c \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/json_writer.h b/tools/testing/selftests/bpf/json_writer.h
new file mode 120000
index 000000000000..e0a264c26752
--- /dev/null
+++ b/tools/testing/selftests/bpf/json_writer.h
@@ -0,0 +1 @@
+../../../bpf/bpftool/json_writer.h \ No newline at end of file
diff --git a/tools/testing/selftests/bpf/liburandom_read.map b/tools/testing/selftests/bpf/liburandom_read.map
new file mode 100644
index 000000000000..38a97a419a04
--- /dev/null
+++ b/tools/testing/selftests/bpf/liburandom_read.map
@@ -0,0 +1,15 @@
+LIBURANDOM_READ_1.0.0 {
+ global:
+ urandlib_api;
+ urandlib_api_sameoffset;
+ urandlib_read_without_sema;
+ urandlib_read_with_sema;
+ urandlib_read_with_sema_semaphore;
+ local:
+ *;
+};
+
+LIBURANDOM_READ_2.0.0 {
+ global:
+ urandlib_api;
+} LIBURANDOM_READ_1.0.0;
diff --git a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
index f0a64d8ac59a..b595556315bc 100644
--- a/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/array_map_batch_ops.c
@@ -3,16 +3,20 @@
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
#include <test_maps.h>
+static int nr_cpus;
+
static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
- int *values)
+ __s64 *values, bool is_pcpu)
{
- int i, err;
+ int i, j, err;
+ int cpu_offset = 0;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
@@ -20,22 +24,41 @@ static void map_batch_update(int map_fd, __u32 max_entries, int *keys,
for (i = 0; i < max_entries; i++) {
keys[i] = i;
- values[i] = i + 1;
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++)
+ (values + cpu_offset)[j] = i + 1 + j;
+ } else {
+ values[i] = i + 1;
+ }
}
err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
}
-static void map_batch_verify(int *visited, __u32 max_entries,
- int *keys, int *values)
+static void map_batch_verify(int *visited, __u32 max_entries, int *keys,
+ __s64 *values, bool is_pcpu)
{
- int i;
+ int i, j;
+ int cpu_offset = 0;
memset(visited, 0, max_entries * sizeof(*visited));
for (i = 0; i < max_entries; i++) {
- CHECK(keys[i] + 1 != values[i], "key/value checking",
- "error: i %d key %d value %d\n", i, keys[i], values[i]);
+ if (is_pcpu) {
+ cpu_offset = i * nr_cpus;
+ for (j = 0; j < nr_cpus; j++) {
+ __s64 value = (values + cpu_offset)[j];
+ CHECK(keys[i] + j + 1 != value,
+ "key/value checking",
+ "error: i %d j %d key %d value %lld\n", i,
+ j, keys[i], value);
+ }
+ } else {
+ CHECK(keys[i] + 1 != values[i], "key/value checking",
+ "error: i %d key %d value %lld\n", i, keys[i],
+ values[i]);
+ }
visited[i] = 1;
}
for (i = 0; i < max_entries; i++) {
@@ -44,59 +67,53 @@ static void map_batch_verify(int *visited, __u32 max_entries,
}
}
-void test_array_map_batch_ops(void)
+static void __test_map_lookup_and_update_batch(bool is_pcpu)
{
- struct bpf_create_map_attr xattr = {
- .name = "array_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- };
- int map_fd, *keys, *values, *visited;
+ int map_fd, *keys, *visited;
__u32 count, total, total_success;
const __u32 max_entries = 10;
- bool nospace_err;
__u64 batch = 0;
- int err, step;
+ int err, step, value_size;
+ void *values;
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_ARRAY : BPF_MAP_TYPE_ARRAY,
+ "array_map", sizeof(int), sizeof(__s64), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
+
+ value_size = sizeof(__s64);
+ if (is_pcpu)
+ value_size *= nr_cpus;
- keys = malloc(max_entries * sizeof(int));
- values = malloc(max_entries * sizeof(int));
- visited = malloc(max_entries * sizeof(int));
+ keys = calloc(max_entries, sizeof(*keys));
+ values = calloc(max_entries, value_size);
+ visited = calloc(max_entries, sizeof(*visited));
CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
strerror(errno));
- /* populate elements to the map */
- map_batch_update(map_fd, max_entries, keys, values);
-
/* test 1: lookup in a loop with various steps. */
total_success = 0;
for (step = 1; step < max_entries; step++) {
- map_batch_update(map_fd, max_entries, keys, values);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_update(map_fd, max_entries, keys, values, is_pcpu);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
memset(keys, 0, max_entries * sizeof(*keys));
- memset(values, 0, max_entries * sizeof(*values));
+ memset(values, 0, max_entries * value_size);
batch = 0;
total = 0;
/* iteratively lookup/delete elements with 'step'
* elements each.
*/
count = step;
- nospace_err = false;
while (true) {
err = bpf_map_lookup_batch(map_fd,
- total ? &batch : NULL, &batch,
- keys + total,
- values + total,
- &count, &opts);
+ total ? &batch : NULL,
+ &batch, keys + total,
+ values + total * value_size,
+ &count, &opts);
CHECK((err && errno != ENOENT), "lookup with steps",
"error: %s\n", strerror(errno));
@@ -107,13 +124,10 @@ void test_array_map_batch_ops(void)
}
- if (nospace_err == true)
- continue;
-
CHECK(total != max_entries, "lookup with steps",
"total = %u, max_entries = %u\n", total, max_entries);
- map_batch_verify(visited, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values, is_pcpu);
total_success++;
}
@@ -121,9 +135,31 @@ void test_array_map_batch_ops(void)
CHECK(total_success == 0, "check total_success",
"unexpected failure\n");
- printf("%s:PASS\n", __func__);
-
free(keys);
free(values);
free(visited);
+ close(map_fd);
+}
+
+static void array_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(false);
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void array_percpu_map_batch_ops(void)
+{
+ __test_map_lookup_and_update_batch(true);
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_array_map_batch_ops(void)
+{
+ nr_cpus = libbpf_num_possible_cpus();
+
+ CHECK(nr_cpus < 0, "nr_cpus checking",
+ "error: get possible cpus failed");
+
+ array_map_batch_ops();
+ array_percpu_map_batch_ops();
}
diff --git a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
index 976bf415fbdd..1230ccf90128 100644
--- a/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
+++ b/tools/testing/selftests/bpf/map_tests/htab_map_batch_ops.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <errno.h>
#include <string.h>
+#include <unistd.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -83,22 +84,15 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
int err, step, value_size;
bool nospace_err;
void *values;
- struct bpf_create_map_attr xattr = {
- .name = "hash_map",
- .map_type = is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH :
- BPF_MAP_TYPE_HASH,
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- };
DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
.elem_flags = 0,
.flags = 0,
);
- xattr.max_entries = max_entries;
- map_fd = bpf_create_map_xattr(&xattr);
+ map_fd = bpf_map_create(is_pcpu ? BPF_MAP_TYPE_PERCPU_HASH : BPF_MAP_TYPE_HASH,
+ "hash_map", sizeof(int), sizeof(int), max_entries, NULL);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "error:%s\n", strerror(errno));
+ "bpf_map_create()", "error:%s\n", strerror(errno));
value_size = is_pcpu ? sizeof(value) : sizeof(int);
keys = malloc(max_entries * sizeof(int));
@@ -262,6 +256,7 @@ void __test_map_lookup_and_delete_batch(bool is_pcpu)
free(visited);
if (!is_pcpu)
free(values);
+ close(map_fd);
}
void htab_map_batch_ops(void)
diff --git a/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
new file mode 100644
index 000000000000..b66d56ddb7ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/lpm_trie_map_batch_ops.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <linux/bpf.h>
+#include <netinet/in.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+struct test_lpm_key {
+ __u32 prefix;
+ struct in_addr ipv4;
+};
+
+static void map_batch_update(int map_fd, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ __u32 i;
+ int err;
+ char buff[16] = { 0 };
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ for (i = 0; i < max_entries; i++) {
+ keys[i].prefix = 32;
+ snprintf(buff, 16, "192.168.1.%d", i + 1);
+ inet_pton(AF_INET, buff, &keys[i].ipv4);
+ values[i] = i + 1;
+ }
+
+ err = bpf_map_update_batch(map_fd, keys, values, &max_entries, &opts);
+ CHECK(err, "bpf_map_update_batch()", "error:%s\n", strerror(errno));
+}
+
+static void map_batch_verify(int *visited, __u32 max_entries,
+ struct test_lpm_key *keys, int *values)
+{
+ char buff[16] = { 0 };
+ int lower_byte = 0;
+ __u32 i;
+
+ memset(visited, 0, max_entries * sizeof(*visited));
+ for (i = 0; i < max_entries; i++) {
+ inet_ntop(AF_INET, &keys[i].ipv4, buff, 32);
+ CHECK(sscanf(buff, "192.168.1.%d", &lower_byte) == EOF,
+ "sscanf()", "error: i %d\n", i);
+ CHECK(lower_byte != values[i], "key/value checking",
+ "error: i %d key %s value %d\n", i, buff, values[i]);
+ visited[i] = 1;
+ }
+ for (i = 0; i < max_entries; i++) {
+ CHECK(visited[i] != 1, "visited checking",
+ "error: keys array at index %d missing\n", i);
+ }
+}
+
+void test_lpm_trie_map_batch_ops(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, create_opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct test_lpm_key *keys, key;
+ int map_fd, *values, *visited;
+ __u32 step, count, total, total_success;
+ const __u32 max_entries = 10;
+ __u64 batch = 0;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_map_batch_opts, opts,
+ .elem_flags = 0,
+ .flags = 0,
+ );
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, "lpm_trie_map",
+ sizeof(struct test_lpm_key), sizeof(int),
+ max_entries, &create_opts);
+ CHECK(map_fd == -1, "bpf_map_create()", "error:%s\n",
+ strerror(errno));
+
+ keys = malloc(max_entries * sizeof(struct test_lpm_key));
+ values = malloc(max_entries * sizeof(int));
+ visited = malloc(max_entries * sizeof(int));
+ CHECK(!keys || !values || !visited, "malloc()", "error:%s\n",
+ strerror(errno));
+
+ total_success = 0;
+ for (step = 1; step < max_entries; step++) {
+ map_batch_update(map_fd, max_entries, keys, values);
+ map_batch_verify(visited, max_entries, keys, values);
+ memset(keys, 0, max_entries * sizeof(*keys));
+ memset(values, 0, max_entries * sizeof(*values));
+ batch = 0;
+ total = 0;
+ /* iteratively lookup/delete elements with 'step'
+ * elements each.
+ */
+ count = step;
+ while (true) {
+ err = bpf_map_lookup_batch(map_fd,
+ total ? &batch : NULL, &batch,
+ keys + total, values + total, &count, &opts);
+
+ CHECK((err && errno != ENOENT), "lookup with steps",
+ "error: %s\n", strerror(errno));
+
+ total += count;
+ if (err)
+ break;
+ }
+
+ CHECK(total != max_entries, "lookup with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ map_batch_verify(visited, max_entries, keys, values);
+
+ total = 0;
+ count = step;
+ while (total < max_entries) {
+ if (max_entries - total < step)
+ count = max_entries - total;
+ err = bpf_map_delete_batch(map_fd, keys + total, &count,
+ &opts);
+ CHECK((err && errno != ENOENT), "delete batch",
+ "error: %s\n", strerror(errno));
+ total += count;
+ if (err)
+ break;
+ }
+ CHECK(total != max_entries, "delete with steps",
+ "total = %u, max_entries = %u\n", total, max_entries);
+
+ /* check map is empty, errono == ENOENT */
+ err = bpf_map_get_next_key(map_fd, NULL, &key);
+ CHECK(!err || errno != ENOENT, "bpf_map_get_next_key()",
+ "error: %s\n", strerror(errno));
+
+ total_success++;
+ }
+
+ CHECK(total_success == 0, "check total_success",
+ "unexpected failure\n");
+
+ printf("%s:PASS\n", __func__);
+
+ free(keys);
+ free(values);
+ free(visited);
+ close(map_fd);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
new file mode 100644
index 000000000000..66191ae9863c
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_in_map_batch_ops.c
@@ -0,0 +1,252 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <test_maps.h>
+
+#define OUTER_MAP_ENTRIES 10
+
+static __u32 get_map_id_from_fd(int map_fd)
+{
+ struct bpf_map_info map_info = {};
+ uint32_t info_len = sizeof(map_info);
+ int ret;
+
+ ret = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ CHECK(ret < 0, "Finding map info failed", "error:%s\n",
+ strerror(errno));
+
+ return map_info.id;
+}
+
+/* This creates number of OUTER_MAP_ENTRIES maps that will be stored
+ * in outer map and return the created map_fds
+ */
+static void create_inner_maps(enum bpf_map_type map_type,
+ __u32 *inner_map_fds)
+{
+ int map_fd, map_index, ret;
+ __u32 map_key = 0, map_id;
+ char map_name[16];
+
+ for (map_index = 0; map_index < OUTER_MAP_ENTRIES; map_index++) {
+ memset(map_name, 0, sizeof(map_name));
+ snprintf(map_name, sizeof(map_name), "inner_map_fd_%d", map_index);
+ map_fd = bpf_map_create(map_type, map_name, sizeof(__u32),
+ sizeof(__u32), 1, NULL);
+ CHECK(map_fd < 0,
+ "inner bpf_map_create() failed",
+ "map_type=(%d) map_name(%s), error:%s\n",
+ map_type, map_name, strerror(errno));
+
+ /* keep track of the inner map fd as it is required
+ * to add records in outer map
+ */
+ inner_map_fds[map_index] = map_fd;
+
+ /* Add entry into this created map
+ * eg: map1 key = 0, value = map1's map id
+ * map2 key = 0, value = map2's map id
+ */
+ map_id = get_map_id_from_fd(map_fd);
+ ret = bpf_map_update_elem(map_fd, &map_key, &map_id, 0);
+ CHECK(ret != 0,
+ "bpf_map_update_elem failed",
+ "map_type=(%d) map_name(%s), error:%s\n",
+ map_type, map_name, strerror(errno));
+ }
+}
+
+static int create_outer_map(enum bpf_map_type map_type, __u32 inner_map_fd)
+{
+ int outer_map_fd;
+ LIBBPF_OPTS(bpf_map_create_opts, attr);
+
+ attr.inner_map_fd = inner_map_fd;
+ outer_map_fd = bpf_map_create(map_type, "outer_map", sizeof(__u32),
+ sizeof(__u32), OUTER_MAP_ENTRIES,
+ &attr);
+ CHECK(outer_map_fd < 0,
+ "outer bpf_map_create()",
+ "map_type=(%d), error:%s\n",
+ map_type, strerror(errno));
+
+ return outer_map_fd;
+}
+
+static void validate_fetch_results(int outer_map_fd,
+ __u32 *fetched_keys, __u32 *fetched_values,
+ __u32 max_entries_fetched)
+{
+ __u32 inner_map_key, inner_map_value;
+ int inner_map_fd, entry, err;
+ __u32 outer_map_value;
+
+ for (entry = 0; entry < max_entries_fetched; ++entry) {
+ outer_map_value = fetched_values[entry];
+ inner_map_fd = bpf_map_get_fd_by_id(outer_map_value);
+ CHECK(inner_map_fd < 0,
+ "Failed to get inner map fd",
+ "from id(%d), error=%s\n",
+ outer_map_value, strerror(errno));
+ err = bpf_map_get_next_key(inner_map_fd, NULL, &inner_map_key);
+ CHECK(err != 0,
+ "Failed to get inner map key",
+ "error=%s\n", strerror(errno));
+
+ err = bpf_map_lookup_elem(inner_map_fd, &inner_map_key,
+ &inner_map_value);
+
+ close(inner_map_fd);
+
+ CHECK(err != 0,
+ "Failed to get inner map value",
+ "for key(%d), error=%s\n",
+ inner_map_key, strerror(errno));
+
+ /* Actual value validation */
+ CHECK(outer_map_value != inner_map_value,
+ "Failed to validate inner map value",
+ "fetched(%d) and lookedup(%d)!\n",
+ outer_map_value, inner_map_value);
+ }
+}
+
+static void fetch_and_validate(int outer_map_fd,
+ struct bpf_map_batch_opts *opts,
+ __u32 batch_size, bool delete_entries)
+{
+ __u32 *fetched_keys, *fetched_values, total_fetched = 0;
+ __u32 batch_key = 0, fetch_count, step_size;
+ int err, max_entries = OUTER_MAP_ENTRIES;
+ __u32 value_size = sizeof(__u32);
+
+ /* Total entries needs to be fetched */
+ fetched_keys = calloc(max_entries, value_size);
+ fetched_values = calloc(max_entries, value_size);
+ CHECK((!fetched_keys || !fetched_values),
+ "Memory allocation failed for fetched_keys or fetched_values",
+ "error=%s\n", strerror(errno));
+
+ for (step_size = batch_size;
+ step_size <= max_entries;
+ step_size += batch_size) {
+ fetch_count = step_size;
+ err = delete_entries
+ ? bpf_map_lookup_and_delete_batch(outer_map_fd,
+ total_fetched ? &batch_key : NULL,
+ &batch_key,
+ fetched_keys + total_fetched,
+ fetched_values + total_fetched,
+ &fetch_count, opts)
+ : bpf_map_lookup_batch(outer_map_fd,
+ total_fetched ? &batch_key : NULL,
+ &batch_key,
+ fetched_keys + total_fetched,
+ fetched_values + total_fetched,
+ &fetch_count, opts);
+
+ if (err && errno == ENOSPC) {
+ /* Fetch again with higher batch size */
+ total_fetched = 0;
+ continue;
+ }
+
+ CHECK((err < 0 && (errno != ENOENT)),
+ "lookup with steps failed",
+ "error: %s\n", strerror(errno));
+
+ /* Update the total fetched number */
+ total_fetched += fetch_count;
+ if (err)
+ break;
+ }
+
+ CHECK((total_fetched != max_entries),
+ "Unable to fetch expected entries !",
+ "total_fetched(%d) and max_entries(%d) error: (%d):%s\n",
+ total_fetched, max_entries, errno, strerror(errno));
+
+ /* validate the fetched entries */
+ validate_fetch_results(outer_map_fd, fetched_keys,
+ fetched_values, total_fetched);
+ printf("batch_op(%s) is successful with batch_size(%d)\n",
+ delete_entries ? "LOOKUP_AND_DELETE" : "LOOKUP", batch_size);
+
+ free(fetched_keys);
+ free(fetched_values);
+}
+
+static void _map_in_map_batch_ops(enum bpf_map_type outer_map_type,
+ enum bpf_map_type inner_map_type)
+{
+ __u32 *outer_map_keys, *inner_map_fds;
+ __u32 max_entries = OUTER_MAP_ENTRIES;
+ LIBBPF_OPTS(bpf_map_batch_opts, opts);
+ __u32 value_size = sizeof(__u32);
+ int batch_size[2] = {5, 10};
+ __u32 map_index, op_index;
+ int outer_map_fd, ret;
+
+ outer_map_keys = calloc(max_entries, value_size);
+ inner_map_fds = calloc(max_entries, value_size);
+ CHECK((!outer_map_keys || !inner_map_fds),
+ "Memory allocation failed for outer_map_keys or inner_map_fds",
+ "error=%s\n", strerror(errno));
+
+ create_inner_maps(inner_map_type, inner_map_fds);
+
+ outer_map_fd = create_outer_map(outer_map_type, *inner_map_fds);
+ /* create outer map keys */
+ for (map_index = 0; map_index < max_entries; map_index++)
+ outer_map_keys[map_index] =
+ ((outer_map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS)
+ ? 9 : 1000) - map_index;
+
+ /* batch operation - map_update */
+ ret = bpf_map_update_batch(outer_map_fd, outer_map_keys,
+ inner_map_fds, &max_entries, &opts);
+ CHECK(ret != 0,
+ "Failed to update the outer map batch ops",
+ "error=%s\n", strerror(errno));
+
+ /* batch operation - map_lookup */
+ for (op_index = 0; op_index < 2; ++op_index)
+ fetch_and_validate(outer_map_fd, &opts,
+ batch_size[op_index], false);
+
+ /* batch operation - map_lookup_delete */
+ if (outer_map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ fetch_and_validate(outer_map_fd, &opts,
+ max_entries, true /*delete*/);
+
+ /* close all map fds */
+ for (map_index = 0; map_index < max_entries; map_index++)
+ close(inner_map_fds[map_index]);
+ close(outer_map_fd);
+
+ free(inner_map_fds);
+ free(outer_map_keys);
+}
+
+void test_map_in_map_batch_ops_array(void)
+{
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ printf("%s:PASS with inner ARRAY map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_ARRAY_OF_MAPS, BPF_MAP_TYPE_HASH);
+ printf("%s:PASS with inner HASH map\n", __func__);
+}
+
+void test_map_in_map_batch_ops_hash(void)
+{
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_ARRAY);
+ printf("%s:PASS with inner ARRAY map\n", __func__);
+ _map_in_map_batch_ops(BPF_MAP_TYPE_HASH_OF_MAPS, BPF_MAP_TYPE_HASH);
+ printf("%s:PASS with inner HASH map\n", __func__);
+}
diff --git a/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
new file mode 100644
index 000000000000..2ea36408816b
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/map_percpu_stats.c
@@ -0,0 +1,470 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include <errno.h>
+#include <unistd.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include <bpf_util.h>
+#include <test_maps.h>
+
+#include "map_percpu_stats.skel.h"
+
+#define MAX_ENTRIES 16384
+#define MAX_ENTRIES_HASH_OF_MAPS 64
+#define N_THREADS 8
+#define MAX_MAP_KEY_SIZE 4
+
+static void map_info(int map_fd, struct bpf_map_info *info)
+{
+ __u32 len = sizeof(*info);
+ int ret;
+
+ memset(info, 0, sizeof(*info));
+
+ ret = bpf_obj_get_info_by_fd(map_fd, info, &len);
+ CHECK(ret < 0, "bpf_obj_get_info_by_fd", "error: %s\n", strerror(errno));
+}
+
+static const char *map_type_to_s(__u32 type)
+{
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ return "HASH";
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ return "PERCPU_HASH";
+ case BPF_MAP_TYPE_LRU_HASH:
+ return "LRU_HASH";
+ case BPF_MAP_TYPE_LRU_PERCPU_HASH:
+ return "LRU_PERCPU_HASH";
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ return "BPF_MAP_TYPE_HASH_OF_MAPS";
+ default:
+ return "<define-me>";
+ }
+}
+
+static __u32 map_count_elements(__u32 type, int map_fd)
+{
+ __u32 key = -1;
+ int n = 0;
+
+ while (!bpf_map_get_next_key(map_fd, &key, &key))
+ n++;
+ return n;
+}
+
+#define BATCH true
+
+static void delete_and_lookup_batch(int map_fd, void *keys, __u32 count)
+{
+ static __u8 values[(8 << 10) * MAX_ENTRIES];
+ void *in_batch = NULL, *out_batch;
+ __u32 save_count = count;
+ int ret;
+
+ ret = bpf_map_lookup_and_delete_batch(map_fd,
+ &in_batch, &out_batch,
+ keys, values, &count,
+ NULL);
+
+ /*
+ * Despite what uapi header says, lookup_and_delete_batch will return
+ * -ENOENT in case we successfully have deleted all elements, so check
+ * this separately
+ */
+ CHECK(ret < 0 && (errno != ENOENT || !count), "bpf_map_lookup_and_delete_batch",
+ "error: %s\n", strerror(errno));
+
+ CHECK(count != save_count,
+ "bpf_map_lookup_and_delete_batch",
+ "deleted not all elements: removed=%u expected=%u\n",
+ count, save_count);
+}
+
+static void delete_all_elements(__u32 type, int map_fd, bool batch)
+{
+ static __u8 val[8 << 10]; /* enough for 1024 CPUs */
+ __u32 key = -1;
+ void *keys;
+ __u32 i, n;
+ int ret;
+
+ keys = calloc(MAX_MAP_KEY_SIZE, MAX_ENTRIES);
+ CHECK(!keys, "calloc", "error: %s\n", strerror(errno));
+
+ for (n = 0; !bpf_map_get_next_key(map_fd, &key, &key); n++)
+ memcpy(keys + n*MAX_MAP_KEY_SIZE, &key, MAX_MAP_KEY_SIZE);
+
+ if (batch) {
+ /* Can't mix delete_batch and delete_and_lookup_batch because
+ * they have different semantics in relation to the keys
+ * argument. However, delete_batch utilize map_delete_elem,
+ * so we actually test it in non-batch scenario */
+ delete_and_lookup_batch(map_fd, keys, n);
+ } else {
+ /* Intentionally mix delete and lookup_and_delete so we can test both */
+ for (i = 0; i < n; i++) {
+ void *keyp = keys + i*MAX_MAP_KEY_SIZE;
+
+ if (i % 2 || type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ ret = bpf_map_delete_elem(map_fd, keyp);
+ CHECK(ret < 0, "bpf_map_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ } else {
+ ret = bpf_map_lookup_and_delete_elem(map_fd, keyp, val);
+ CHECK(ret < 0, "bpf_map_lookup_and_delete_elem",
+ "error: key %u: %s\n", i, strerror(errno));
+ }
+ }
+ }
+
+ free(keys);
+}
+
+static bool is_lru(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_LRU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+static bool is_percpu(__u32 map_type)
+{
+ return map_type == BPF_MAP_TYPE_PERCPU_HASH ||
+ map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH;
+}
+
+struct upsert_opts {
+ __u32 map_type;
+ int map_fd;
+ __u32 n;
+ bool retry_for_nomem;
+};
+
+static int create_small_hash(void)
+{
+ int map_fd;
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_HASH, "small", 4, 4, 4, NULL);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), "small");
+
+ return map_fd;
+}
+
+static bool retry_for_nomem_fn(int err)
+{
+ return err == ENOMEM;
+}
+
+static void *patch_map_thread(void *arg)
+{
+ /* 8KB is enough for 1024 CPUs. And it is shared between N_THREADS. */
+ static __u8 blob[8 << 10];
+ struct upsert_opts *opts = arg;
+ void *val_ptr;
+ int val;
+ int ret;
+ int i;
+
+ for (i = 0; i < opts->n; i++) {
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ val = create_small_hash();
+ val_ptr = &val;
+ } else if (is_percpu(opts->map_type)) {
+ val_ptr = blob;
+ } else {
+ val = rand();
+ val_ptr = &val;
+ }
+
+ /* 2 seconds may be enough ? */
+ if (opts->retry_for_nomem)
+ ret = map_update_retriable(opts->map_fd, &i, val_ptr, 0,
+ 40, retry_for_nomem_fn);
+ else
+ ret = bpf_map_update_elem(opts->map_fd, &i, val_ptr, 0);
+ CHECK(ret < 0, "bpf_map_update_elem", "key=%d error: %s\n", i, strerror(errno));
+
+ if (opts->map_type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ close(val);
+ }
+ return NULL;
+}
+
+static void upsert_elements(struct upsert_opts *opts)
+{
+ pthread_t threads[N_THREADS];
+ int ret;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_create(&i[threads], NULL, patch_map_thread, opts);
+ CHECK(ret != 0, "pthread_create", "error: %s\n", strerror(ret));
+ }
+
+ for (i = 0; i < ARRAY_SIZE(threads); i++) {
+ ret = pthread_join(i[threads], NULL);
+ CHECK(ret != 0, "pthread_join", "error: %s\n", strerror(ret));
+ }
+}
+
+static __u32 read_cur_elements(int iter_fd)
+{
+ char buf[64];
+ ssize_t n;
+ __u32 ret;
+
+ n = read(iter_fd, buf, sizeof(buf)-1);
+ CHECK(n <= 0, "read", "error: %s\n", strerror(errno));
+ buf[n] = '\0';
+
+ errno = 0;
+ ret = (__u32)strtol(buf, NULL, 10);
+ CHECK(errno != 0, "strtol", "error: %s\n", strerror(errno));
+
+ return ret;
+}
+
+static __u32 get_cur_elements(int map_id)
+{
+ struct map_percpu_stats *skel;
+ struct bpf_link *link;
+ __u32 n_elements;
+ int iter_fd;
+ int ret;
+
+ skel = map_percpu_stats__open();
+ CHECK(skel == NULL, "map_percpu_stats__open", "error: %s", strerror(errno));
+
+ skel->bss->target_id = map_id;
+
+ ret = map_percpu_stats__load(skel);
+ CHECK(ret != 0, "map_percpu_stats__load", "error: %s", strerror(errno));
+
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+ CHECK(!link, "bpf_program__attach_iter", "error: %s\n", strerror(errno));
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ CHECK(iter_fd < 0, "bpf_iter_create", "error: %s\n", strerror(errno));
+
+ n_elements = read_cur_elements(iter_fd);
+
+ close(iter_fd);
+ bpf_link__destroy(link);
+ map_percpu_stats__destroy(skel);
+
+ return n_elements;
+}
+
+static void check_expected_number_elements(__u32 n_inserted, int map_fd,
+ struct bpf_map_info *info)
+{
+ __u32 n_real;
+ __u32 n_iter;
+
+ /* Count the current number of elements in the map by iterating through
+ * all the map keys via bpf_get_next_key
+ */
+ n_real = map_count_elements(info->type, map_fd);
+
+ /* The "real" number of elements should be the same as the inserted
+ * number of elements in all cases except LRU maps, where some elements
+ * may have been evicted
+ */
+ if (n_inserted == 0 || !is_lru(info->type))
+ CHECK(n_inserted != n_real, "map_count_elements",
+ "n_real(%u) != n_inserted(%u)\n", n_real, n_inserted);
+
+ /* Count the current number of elements in the map using an iterator */
+ n_iter = get_cur_elements(info->id);
+
+ /* Both counts should be the same, as all updates are over */
+ CHECK(n_iter != n_real, "get_cur_elements",
+ "n_iter=%u, expected %u (map_type=%s,map_flags=%08x)\n",
+ n_iter, n_real, map_type_to_s(info->type), info->map_flags);
+}
+
+static void __test(int map_fd)
+{
+ struct upsert_opts opts = {
+ .map_fd = map_fd,
+ };
+ struct bpf_map_info info;
+
+ map_info(map_fd, &info);
+ opts.map_type = info.type;
+ opts.n = info.max_entries;
+
+ /* Reduce the number of elements we are updating such that we don't
+ * bump into -E2BIG from non-preallocated hash maps, but still will
+ * have some evictions for LRU maps */
+ if (opts.map_type != BPF_MAP_TYPE_HASH_OF_MAPS)
+ opts.n -= 512;
+ else
+ opts.n /= 2;
+
+ /* per-cpu bpf memory allocator may not be able to allocate per-cpu
+ * pointer successfully and it can not refill free llist timely, and
+ * bpf_map_update_elem() will return -ENOMEM. so just retry to mitigate
+ * the problem temporarily.
+ */
+ opts.retry_for_nomem = is_percpu(opts.map_type) && (info.map_flags & BPF_F_NO_PREALLOC);
+
+ /*
+ * Upsert keys [0, n) under some competition: with random values from
+ * N_THREADS threads. Check values, then delete all elements and check
+ * values again.
+ */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, !BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ /* Now do the same, but using batch delete operations */
+ upsert_elements(&opts);
+ check_expected_number_elements(opts.n, map_fd, &info);
+ delete_all_elements(info.type, map_fd, BATCH);
+ check_expected_number_elements(0, map_fd, &info);
+
+ close(map_fd);
+}
+
+static int map_create_opts(__u32 type, const char *name,
+ struct bpf_map_create_opts *map_opts,
+ __u32 key_size, __u32 val_size)
+{
+ int max_entries;
+ int map_fd;
+
+ if (type == BPF_MAP_TYPE_HASH_OF_MAPS)
+ max_entries = MAX_ENTRIES_HASH_OF_MAPS;
+ else
+ max_entries = MAX_ENTRIES;
+
+ map_fd = bpf_map_create(type, name, key_size, val_size, max_entries, map_opts);
+ CHECK(map_fd < 0, "bpf_map_create()", "error:%s (name=%s)\n",
+ strerror(errno), name);
+
+ return map_fd;
+}
+
+static int map_create(__u32 type, const char *name, struct bpf_map_create_opts *map_opts)
+{
+ return map_create_opts(type, name, map_opts, sizeof(int), sizeof(int));
+}
+
+static int create_hash(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
+
+ return map_create(BPF_MAP_TYPE_HASH, "hash", &map_opts);
+}
+
+static int create_percpu_hash(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = BPF_F_NO_PREALLOC);
+
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash", &map_opts);
+}
+
+static int create_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_HASH, "hash", NULL);
+}
+
+static int create_percpu_hash_prealloc(void)
+{
+ return map_create(BPF_MAP_TYPE_PERCPU_HASH, "percpu_hash_prealloc", NULL);
+}
+
+static int create_lru_hash(__u32 type, __u32 map_flags)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts, .map_flags = map_flags);
+
+ return map_create(type, "lru_hash", &map_opts);
+}
+
+static int create_hash_of_maps(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts,
+ .map_flags = BPF_F_NO_PREALLOC,
+ .inner_map_fd = create_small_hash(),
+ );
+ int ret;
+
+ ret = map_create_opts(BPF_MAP_TYPE_HASH_OF_MAPS, "hash_of_maps",
+ &map_opts, sizeof(int), sizeof(int));
+ close(map_opts.inner_map_fd);
+ return ret;
+}
+
+static void map_percpu_stats_hash(void)
+{
+ __test(create_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash(void)
+{
+ __test(create_percpu_hash());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_prealloc(void)
+{
+ __test(create_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_hash_prealloc(void)
+{
+ __test(create_percpu_hash_prealloc());
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, 0));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_percpu_lru_hash_no_common(void)
+{
+ __test(create_lru_hash(BPF_MAP_TYPE_LRU_PERCPU_HASH, BPF_F_NO_COMMON_LRU));
+ printf("test_%s:PASS\n", __func__);
+}
+
+static void map_percpu_stats_hash_of_maps(void)
+{
+ __test(create_hash_of_maps());
+ printf("test_%s:PASS\n", __func__);
+}
+
+void test_map_percpu_stats(void)
+{
+ map_percpu_stats_hash();
+ map_percpu_stats_percpu_hash();
+ map_percpu_stats_hash_prealloc();
+ map_percpu_stats_percpu_hash_prealloc();
+ map_percpu_stats_lru_hash();
+ map_percpu_stats_lru_hash_no_common();
+ map_percpu_stats_percpu_lru_hash();
+ map_percpu_stats_percpu_lru_hash_no_common();
+ map_percpu_stats_hash_of_maps();
+}
diff --git a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
index e569edc679d8..18405c3b7cee 100644
--- a/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
+++ b/tools/testing/selftests/bpf/map_tests/sk_storage_map.c
@@ -19,16 +19,12 @@
#include <test_btf.h>
#include <test_maps.h>
-static struct bpf_create_map_attr xattr = {
- .name = "sk_storage_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .map_flags = BPF_F_NO_PREALLOC,
- .max_entries = 0,
- .key_size = 4,
- .value_size = 8,
+static struct bpf_map_create_opts map_opts = {
+ .sz = sizeof(map_opts),
.btf_key_type_id = 1,
.btf_value_type_id = 3,
.btf_fd = -1,
+ .map_flags = BPF_F_NO_PREALLOC,
};
static unsigned int nr_sk_threads_done;
@@ -140,7 +136,7 @@ static int load_btf(void)
memcpy(raw_btf + sizeof(btf_hdr) + sizeof(btf_raw_types),
btf_str_sec, sizeof(btf_str_sec));
- return bpf_load_btf(raw_btf, sizeof(raw_btf), 0, 0, 0);
+ return bpf_btf_load(raw_btf, sizeof(raw_btf), NULL);
}
static int create_sk_storage_map(void)
@@ -150,13 +146,13 @@ static int create_sk_storage_map(void)
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
- map_fd = bpf_create_map_xattr(&xattr);
- xattr.btf_fd = -1;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ map_opts.btf_fd = -1;
close(btf_fd);
CHECK(map_fd == -1,
- "bpf_create_map_xattr()", "errno:%d\n", errno);
+ "bpf_map_create()", "errno:%d\n", errno);
return map_fd;
}
@@ -462,21 +458,21 @@ static void test_sk_storage_map_basic(void)
struct {
int cnt;
int lock;
- } value = { .cnt = 0xeB9f, .lock = 0, }, lookup_value;
- struct bpf_create_map_attr bad_xattr;
+ } value = { .cnt = 0xeB9f, .lock = 1, }, lookup_value;
+ struct bpf_map_create_opts bad_xattr;
int btf_fd, map_fd, sk_fd, err;
btf_fd = load_btf();
CHECK(btf_fd == -1, "bpf_load_btf", "btf_fd:%d errno:%d\n",
btf_fd, errno);
- xattr.btf_fd = btf_fd;
+ map_opts.btf_fd = btf_fd;
sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
CHECK(sk_fd == -1, "socket()", "sk_fd:%d errno:%d\n",
sk_fd, errno);
- map_fd = bpf_create_map_xattr(&xattr);
- CHECK(map_fd == -1, "bpf_create_map_xattr(good_xattr)",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &map_opts);
+ CHECK(map_fd == -1, "bpf_map_create(good_xattr)",
"map_fd:%d errno:%d\n", map_fd, errno);
/* Add new elem */
@@ -487,38 +483,41 @@ static void test_sk_storage_map_basic(void)
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST | BPF_F_LOCK */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_EXIST | BPF_F_LOCK);
CHECK(err, "bpf_map_update_elem(BPF_EXIST|BPF_F_LOCK)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt and update with BPF_EXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, BPF_EXIST);
CHECK(err, "bpf_map_update_elem(BPF_EXIST)",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Update with BPF_NOEXIST */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value,
BPF_NOEXIST | BPF_F_LOCK);
CHECK(!err || errno != EEXIST,
@@ -530,22 +529,23 @@ static void test_sk_storage_map_basic(void)
value.cnt -= 1;
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Bump the cnt again and update with map_flags == 0 */
value.cnt += 1;
+ value.lock = 2;
err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
CHECK(err, "bpf_map_update_elem()", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem_flags(map_fd, &sk_fd, &lookup_value,
BPF_F_LOCK);
- CHECK(err || lookup_value.cnt != value.cnt,
+ CHECK(err || lookup_value.lock || lookup_value.cnt != value.cnt,
"bpf_map_lookup_elem_flags(BPF_F_LOCK)",
- "err:%d errno:%d cnt:%x(%x)\n",
- err, errno, lookup_value.cnt, value.cnt);
+ "err:%d errno:%d lock:%x cnt:%x(%x)\n",
+ err, errno, lookup_value.lock, lookup_value.cnt, value.cnt);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &sk_fd);
@@ -560,31 +560,29 @@ static void test_sk_storage_map_basic(void)
CHECK(!err || errno != ENOENT, "bpf_map_delete_elem()",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 0;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.btf_key_type_id = 3;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
- bad_xattr.max_entries = 1;
- err = bpf_create_map_xattr(&bad_xattr);
- CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 1, &map_opts);
+ CHECK(!err || errno != EINVAL, "bpf_map_create(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- memcpy(&bad_xattr, &xattr, sizeof(xattr));
+ memcpy(&bad_xattr, &map_opts, sizeof(map_opts));
bad_xattr.map_flags = 0;
- err = bpf_create_map_xattr(&bad_xattr);
+ err = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "sk_storage_map", 4, 8, 0, &bad_xattr);
CHECK(!err || errno != EINVAL, "bap_create_map_xattr(bad_xattr)",
"err:%d errno:%d\n", err, errno);
- xattr.btf_fd = -1;
+ map_opts.btf_fd = -1;
close(btf_fd);
close(map_fd);
close(sk_fd);
diff --git a/tools/testing/selftests/bpf/map_tests/task_storage_map.c b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
new file mode 100644
index 000000000000..7d050364efca
--- /dev/null
+++ b/tools/testing/selftests/bpf/map_tests/task_storage_map.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <string.h>
+#include <pthread.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_maps.h"
+#include "task_local_storage_helpers.h"
+#include "read_bpf_task_storage_busy.skel.h"
+
+struct lookup_ctx {
+ bool start;
+ bool stop;
+ int pid_fd;
+ int map_fd;
+ int loop;
+};
+
+static void *lookup_fn(void *arg)
+{
+ struct lookup_ctx *ctx = arg;
+ long value;
+ int i = 0;
+
+ while (!ctx->start)
+ usleep(1);
+
+ while (!ctx->stop && i++ < ctx->loop)
+ bpf_map_lookup_elem(ctx->map_fd, &ctx->pid_fd, &value);
+ return NULL;
+}
+
+static void abort_lookup(struct lookup_ctx *ctx, pthread_t *tids, unsigned int nr)
+{
+ unsigned int i;
+
+ ctx->stop = true;
+ ctx->start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+}
+
+void test_task_storage_map_stress_lookup(void)
+{
+#define MAX_NR_THREAD 4096
+ unsigned int i, nr = 256, loop = 8192, cpu = 0;
+ struct read_bpf_task_storage_busy *skel;
+ pthread_t tids[MAX_NR_THREAD];
+ struct lookup_ctx ctx;
+ cpu_set_t old, new;
+ const char *cfg;
+ int err;
+
+ cfg = getenv("TASK_STORAGE_MAP_NR_THREAD");
+ if (cfg) {
+ nr = atoi(cfg);
+ if (nr > MAX_NR_THREAD)
+ nr = MAX_NR_THREAD;
+ }
+ cfg = getenv("TASK_STORAGE_MAP_NR_LOOP");
+ if (cfg)
+ loop = atoi(cfg);
+ cfg = getenv("TASK_STORAGE_MAP_PIN_CPU");
+ if (cfg)
+ cpu = atoi(cfg);
+
+ skel = read_bpf_task_storage_busy__open_and_load();
+ err = libbpf_get_error(skel);
+ CHECK(err, "open_and_load", "error %d\n", err);
+
+ /* Only for a fully preemptible kernel */
+ if (!skel->kconfig->CONFIG_PREEMPT) {
+ printf("%s SKIP (no CONFIG_PREEMPT)\n", __func__);
+ read_bpf_task_storage_busy__destroy(skel);
+ skips++;
+ return;
+ }
+
+ /* Save the old affinity setting */
+ sched_getaffinity(getpid(), sizeof(old), &old);
+
+ /* Pinned on a specific CPU */
+ CPU_ZERO(&new);
+ CPU_SET(cpu, &new);
+ sched_setaffinity(getpid(), sizeof(new), &new);
+
+ ctx.start = false;
+ ctx.stop = false;
+ ctx.pid_fd = sys_pidfd_open(getpid(), 0);
+ ctx.map_fd = bpf_map__fd(skel->maps.task);
+ ctx.loop = loop;
+ for (i = 0; i < nr; i++) {
+ err = pthread_create(&tids[i], NULL, lookup_fn, &ctx);
+ if (err) {
+ abort_lookup(&ctx, tids, i);
+ CHECK(err, "pthread_create", "error %d\n", err);
+ goto out;
+ }
+ }
+
+ ctx.start = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+
+ skel->bss->pid = getpid();
+ err = read_bpf_task_storage_busy__attach(skel);
+ CHECK(err, "attach", "error %d\n", err);
+
+ /* Trigger program */
+ syscall(SYS_gettid);
+ skel->bss->pid = 0;
+
+ CHECK(skel->bss->busy != 0, "bad bpf_task_storage_busy", "got %d\n", skel->bss->busy);
+out:
+ read_bpf_task_storage_busy__destroy(skel);
+ /* Restore affinity setting */
+ sched_setaffinity(getpid(), sizeof(old), &old);
+ printf("%s:PASS\n", __func__);
+}
diff --git a/tools/testing/selftests/bpf/netcnt_common.h b/tools/testing/selftests/bpf/netcnt_common.h
index 81084c1c2c23..2d4a58e4e39c 100644
--- a/tools/testing/selftests/bpf/netcnt_common.h
+++ b/tools/testing/selftests/bpf/netcnt_common.h
@@ -6,19 +6,39 @@
#define MAX_PERCPU_PACKETS 32
-struct percpu_net_cnt {
- __u64 packets;
- __u64 bytes;
+/* sizeof(struct bpf_local_storage_elem):
+ *
+ * It is about 128 bytes on x86_64 and 512 bytes on s390x, but allocate more to
+ * account for possible layout changes, different architectures, etc.
+ * The kernel will wrap up to PAGE_SIZE internally anyway.
+ */
+#define SIZEOF_BPF_LOCAL_STORAGE_ELEM 768
- __u64 prev_ts;
+/* Try to estimate kernel's BPF_LOCAL_STORAGE_MAX_VALUE_SIZE: */
+#define BPF_LOCAL_STORAGE_MAX_VALUE_SIZE (0xFFFF - \
+ SIZEOF_BPF_LOCAL_STORAGE_ELEM)
- __u64 prev_packets;
- __u64 prev_bytes;
+#define PCPU_MIN_UNIT_SIZE 32768
+
+union percpu_net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+
+ __u64 prev_ts;
+
+ __u64 prev_packets;
+ __u64 prev_bytes;
+ };
+ __u8 data[PCPU_MIN_UNIT_SIZE];
};
-struct net_cnt {
- __u64 packets;
- __u64 bytes;
+union net_cnt {
+ struct {
+ __u64 packets;
+ __u64 bytes;
+ };
+ __u8 data[BPF_LOCAL_STORAGE_MAX_VALUE_SIZE];
};
#endif
diff --git a/tools/testing/selftests/bpf/netlink_helpers.c b/tools/testing/selftests/bpf/netlink_helpers.c
new file mode 100644
index 000000000000..caf36eb1d032
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.c
@@ -0,0 +1,358 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Taken & modified from iproute2's libnetlink.c
+ * Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/socket.h>
+
+#include "netlink_helpers.h"
+
+static int rcvbuf = 1024 * 1024;
+
+void rtnl_close(struct rtnl_handle *rth)
+{
+ if (rth->fd >= 0) {
+ close(rth->fd);
+ rth->fd = -1;
+ }
+}
+
+int rtnl_open_byproto(struct rtnl_handle *rth, unsigned int subscriptions,
+ int protocol)
+{
+ socklen_t addr_len;
+ int sndbuf = 32768;
+ int one = 1;
+
+ memset(rth, 0, sizeof(*rth));
+ rth->proto = protocol;
+ rth->fd = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, protocol);
+ if (rth->fd < 0) {
+ perror("Cannot open netlink socket");
+ return -1;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf, sizeof(sndbuf)) < 0) {
+ perror("SO_SNDBUF");
+ goto err;
+ }
+ if (setsockopt(rth->fd, SOL_SOCKET, SO_RCVBUF,
+ &rcvbuf, sizeof(rcvbuf)) < 0) {
+ perror("SO_RCVBUF");
+ goto err;
+ }
+
+ /* Older kernels may no support extended ACK reporting */
+ setsockopt(rth->fd, SOL_NETLINK, NETLINK_EXT_ACK,
+ &one, sizeof(one));
+
+ memset(&rth->local, 0, sizeof(rth->local));
+ rth->local.nl_family = AF_NETLINK;
+ rth->local.nl_groups = subscriptions;
+
+ if (bind(rth->fd, (struct sockaddr *)&rth->local,
+ sizeof(rth->local)) < 0) {
+ perror("Cannot bind netlink socket");
+ goto err;
+ }
+ addr_len = sizeof(rth->local);
+ if (getsockname(rth->fd, (struct sockaddr *)&rth->local,
+ &addr_len) < 0) {
+ perror("Cannot getsockname");
+ goto err;
+ }
+ if (addr_len != sizeof(rth->local)) {
+ fprintf(stderr, "Wrong address length %d\n", addr_len);
+ goto err;
+ }
+ if (rth->local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "Wrong address family %d\n",
+ rth->local.nl_family);
+ goto err;
+ }
+ rth->seq = time(NULL);
+ return 0;
+err:
+ rtnl_close(rth);
+ return -1;
+}
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+{
+ return rtnl_open_byproto(rth, subscriptions, NETLINK_ROUTE);
+}
+
+static int __rtnl_recvmsg(int fd, struct msghdr *msg, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(fd, msg, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+ if (len < 0) {
+ fprintf(stderr, "netlink receive error %s (%d)\n",
+ strerror(errno), errno);
+ return -errno;
+ }
+ if (len == 0) {
+ fprintf(stderr, "EOF on netlink\n");
+ return -ENODATA;
+ }
+ return len;
+}
+
+static int rtnl_recvmsg(int fd, struct msghdr *msg, char **answer)
+{
+ struct iovec *iov = msg->msg_iov;
+ char *buf;
+ int len;
+
+ iov->iov_base = NULL;
+ iov->iov_len = 0;
+
+ len = __rtnl_recvmsg(fd, msg, MSG_PEEK | MSG_TRUNC);
+ if (len < 0)
+ return len;
+ if (len < 32768)
+ len = 32768;
+ buf = malloc(len);
+ if (!buf) {
+ fprintf(stderr, "malloc error: not enough buffer\n");
+ return -ENOMEM;
+ }
+ iov->iov_base = buf;
+ iov->iov_len = len;
+ len = __rtnl_recvmsg(fd, msg, 0);
+ if (len < 0) {
+ free(buf);
+ return len;
+ }
+ if (answer)
+ *answer = buf;
+ else
+ free(buf);
+ return len;
+}
+
+static void rtnl_talk_error(struct nlmsghdr *h, struct nlmsgerr *err,
+ nl_ext_ack_fn_t errfn)
+{
+ fprintf(stderr, "RTNETLINK answers: %s\n",
+ strerror(-err->error));
+}
+
+static int __rtnl_talk_iov(struct rtnl_handle *rtnl, struct iovec *iov,
+ size_t iovlen, struct nlmsghdr **answer,
+ bool show_rtnl_err, nl_ext_ack_fn_t errfn)
+{
+ struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+ struct iovec riov;
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = iov,
+ .msg_iovlen = iovlen,
+ };
+ unsigned int seq = 0;
+ struct nlmsghdr *h;
+ int i, status;
+ char *buf;
+
+ for (i = 0; i < iovlen; i++) {
+ h = iov[i].iov_base;
+ h->nlmsg_seq = seq = ++rtnl->seq;
+ if (answer == NULL)
+ h->nlmsg_flags |= NLM_F_ACK;
+ }
+ status = sendmsg(rtnl->fd, &msg, 0);
+ if (status < 0) {
+ perror("Cannot talk to rtnetlink");
+ return -1;
+ }
+ /* change msg to use the response iov */
+ msg.msg_iov = &riov;
+ msg.msg_iovlen = 1;
+ i = 0;
+ while (1) {
+next:
+ status = rtnl_recvmsg(rtnl->fd, &msg, &buf);
+ ++i;
+ if (status < 0)
+ return status;
+ if (msg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "Sender address length == %d!\n",
+ msg.msg_namelen);
+ exit(1);
+ }
+ for (h = (struct nlmsghdr *)buf; status >= sizeof(*h); ) {
+ int len = h->nlmsg_len;
+ int l = len - sizeof(*h);
+
+ if (l < 0 || len > status) {
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Truncated message!\n");
+ free(buf);
+ return -1;
+ }
+ fprintf(stderr,
+ "Malformed message: len=%d!\n",
+ len);
+ exit(1);
+ }
+ if (nladdr.nl_pid != 0 ||
+ h->nlmsg_pid != rtnl->local.nl_pid ||
+ h->nlmsg_seq > seq || h->nlmsg_seq < seq - iovlen) {
+ /* Don't forget to skip that message. */
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ continue;
+ }
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(h);
+ int error = err->error;
+
+ if (l < sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "ERROR truncated\n");
+ free(buf);
+ return -1;
+ }
+ if (error) {
+ errno = -error;
+ if (rtnl->proto != NETLINK_SOCK_DIAG &&
+ show_rtnl_err)
+ rtnl_talk_error(h, err, errfn);
+ }
+ if (i < iovlen) {
+ free(buf);
+ goto next;
+ }
+ if (error) {
+ free(buf);
+ return -i;
+ }
+ if (answer)
+ *answer = (struct nlmsghdr *)buf;
+ else
+ free(buf);
+ return 0;
+ }
+ if (answer) {
+ *answer = (struct nlmsghdr *)buf;
+ return 0;
+ }
+ fprintf(stderr, "Unexpected reply!\n");
+ status -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((char *)h + NLMSG_ALIGN(len));
+ }
+ free(buf);
+ if (msg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "Message truncated!\n");
+ continue;
+ }
+ if (status) {
+ fprintf(stderr, "Remnant of size %d!\n", status);
+ exit(1);
+ }
+ }
+}
+
+static int __rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer, bool show_rtnl_err,
+ nl_ext_ack_fn_t errfn)
+{
+ struct iovec iov = {
+ .iov_base = n,
+ .iov_len = n->nlmsg_len,
+ };
+
+ return __rtnl_talk_iov(rtnl, &iov, 1, answer, show_rtnl_err, errfn);
+}
+
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+{
+ return __rtnl_talk(rtnl, n, answer, true, NULL);
+}
+
+int addattr(struct nlmsghdr *n, int maxlen, int type)
+{
+ return addattr_l(n, maxlen, type, NULL, 0);
+}
+
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u8));
+}
+
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u16));
+}
+
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u32));
+}
+
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data)
+{
+ return addattr_l(n, maxlen, type, &data, sizeof(__u64));
+}
+
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *str)
+{
+ return addattr_l(n, maxlen, type, str, strlen(str)+1);
+}
+
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data,
+ int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if (NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+ rta = NLMSG_TAIL(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+ if (alen)
+ memcpy(RTA_DATA(rta), data, alen);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+ return 0;
+}
+
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len)
+{
+ if (NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len) > maxlen) {
+ fprintf(stderr, "%s: Message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -1;
+ }
+
+ memcpy(NLMSG_TAIL(n), data, len);
+ memset((void *) NLMSG_TAIL(n) + len, 0, NLMSG_ALIGN(len) - len);
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + NLMSG_ALIGN(len);
+ return 0;
+}
+
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type)
+{
+ struct rtattr *nest = NLMSG_TAIL(n);
+
+ addattr_l(n, maxlen, type, NULL, 0);
+ return nest;
+}
+
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest)
+{
+ nest->rta_len = (void *)NLMSG_TAIL(n) - (void *)nest;
+ return n->nlmsg_len;
+}
diff --git a/tools/testing/selftests/bpf/netlink_helpers.h b/tools/testing/selftests/bpf/netlink_helpers.h
new file mode 100644
index 000000000000..68116818a47e
--- /dev/null
+++ b/tools/testing/selftests/bpf/netlink_helpers.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef NETLINK_HELPERS_H
+#define NETLINK_HELPERS_H
+
+#include <string.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+
+struct rtnl_handle {
+ int fd;
+ struct sockaddr_nl local;
+ struct sockaddr_nl peer;
+ __u32 seq;
+ __u32 dump;
+ int proto;
+ FILE *dump_fp;
+#define RTNL_HANDLE_F_LISTEN_ALL_NSID 0x01
+#define RTNL_HANDLE_F_SUPPRESS_NLERR 0x02
+#define RTNL_HANDLE_F_STRICT_CHK 0x04
+ int flags;
+};
+
+#define NLMSG_TAIL(nmsg) \
+ ((struct rtattr *) (((void *) (nmsg)) + NLMSG_ALIGN((nmsg)->nlmsg_len)))
+
+typedef int (*nl_ext_ack_fn_t)(const char *errmsg, uint32_t off,
+ const struct nlmsghdr *inner_nlh);
+
+int rtnl_open(struct rtnl_handle *rth, unsigned int subscriptions)
+ __attribute__((warn_unused_result));
+void rtnl_close(struct rtnl_handle *rth);
+int rtnl_talk(struct rtnl_handle *rtnl, struct nlmsghdr *n,
+ struct nlmsghdr **answer)
+ __attribute__((warn_unused_result));
+
+int addattr(struct nlmsghdr *n, int maxlen, int type);
+int addattr8(struct nlmsghdr *n, int maxlen, int type, __u8 data);
+int addattr16(struct nlmsghdr *n, int maxlen, int type, __u16 data);
+int addattr32(struct nlmsghdr *n, int maxlen, int type, __u32 data);
+int addattr64(struct nlmsghdr *n, int maxlen, int type, __u64 data);
+int addattrstrz(struct nlmsghdr *n, int maxlen, int type, const char *data);
+int addattr_l(struct nlmsghdr *n, int maxlen, int type, const void *data, int alen);
+int addraw_l(struct nlmsghdr *n, int maxlen, const void *data, int len);
+struct rtattr *addattr_nest(struct nlmsghdr *n, int maxlen, int type);
+int addattr_nest_end(struct nlmsghdr *n, struct rtattr *nest);
+#endif /* NETLINK_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index e36dd1a1780d..6db27a9088e9 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -1,24 +1,39 @@
// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+
#include <errno.h>
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
+#include <sched.h>
#include <arpa/inet.h>
-
-#include <sys/epoll.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/un.h>
#include <linux/err.h>
#include <linux/in.h>
#include <linux/in6.h>
+#include <linux/limits.h>
#include "bpf_util.h"
#include "network_helpers.h"
+#include "test_progs.h"
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
#define clean_errno() (errno == 0 ? "None" : strerror(errno))
-#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
- __FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+#define log_err(MSG, ...) ({ \
+ int __save = errno; \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, clean_errno(), \
+ ##__VA_ARGS__); \
+ errno = __save; \
+})
struct ipv4_packet pkt_v4 = {
.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
@@ -37,131 +52,448 @@ struct ipv6_packet pkt_v6 = {
.tcp.doff = 5,
};
-int start_server_with_port(int family, int type, __u16 port)
+int settimeo(int fd, int timeout_ms)
{
- struct sockaddr_storage addr = {};
- socklen_t len;
- int fd;
+ struct timeval timeout = { .tv_sec = 3 };
- if (family == AF_INET) {
- struct sockaddr_in *sin = (void *)&addr;
+ if (timeout_ms > 0) {
+ timeout.tv_sec = timeout_ms / 1000;
+ timeout.tv_usec = (timeout_ms % 1000) * 1000;
+ }
- sin->sin_family = AF_INET;
- sin->sin_port = htons(port);
- len = sizeof(*sin);
- } else {
- struct sockaddr_in6 *sin6 = (void *)&addr;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_RCVTIMEO");
+ return -1;
+ }
- sin6->sin6_family = AF_INET6;
- sin6->sin6_port = htons(port);
- len = sizeof(*sin6);
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeout,
+ sizeof(timeout))) {
+ log_err("Failed to set SO_SNDTIMEO");
+ return -1;
}
- fd = socket(family, type | SOCK_NONBLOCK, 0);
+ return 0;
+}
+
+#define save_errno_close(fd) ({ int __save = errno; close(fd); errno = __save; })
+
+static int __start_server(int type, int protocol, const struct sockaddr *addr,
+ socklen_t addrlen, int timeout_ms, bool reuseport)
+{
+ int on = 1;
+ int fd;
+
+ fd = socket(addr->sa_family, type, protocol);
if (fd < 0) {
log_err("Failed to create server socket");
return -1;
}
- if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ if (reuseport &&
+ setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &on, sizeof(on))) {
+ log_err("Failed to set SO_REUSEPORT");
+ goto error_close;
+ }
+
+ if (bind(fd, addr, addrlen) < 0) {
log_err("Failed to bind socket");
- close(fd);
- return -1;
+ goto error_close;
}
if (type == SOCK_STREAM) {
if (listen(fd, 1) < 0) {
log_err("Failed to listed on socket");
- close(fd);
- return -1;
+ goto error_close;
}
}
return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+static int start_server_proto(int family, int type, int protocol,
+ const char *addr_str, __u16 port, int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return -1;
+
+ return __start_server(type, protocol, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, false);
+}
+
+int start_server(int family, int type, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ return start_server_proto(family, type, 0, addr_str, port, timeout_ms);
+}
+
+int start_mptcp_server(int family, const char *addr_str, __u16 port,
+ int timeout_ms)
+{
+ return start_server_proto(family, SOCK_STREAM, IPPROTO_MPTCP, addr_str,
+ port, timeout_ms);
}
-int start_server(int family, int type)
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms, unsigned int nr_listens)
{
- return start_server_with_port(family, type, 0);
+ struct sockaddr_storage addr;
+ unsigned int nr_fds = 0;
+ socklen_t addrlen;
+ int *fds;
+
+ if (!nr_listens)
+ return NULL;
+
+ if (make_sockaddr(family, addr_str, port, &addr, &addrlen))
+ return NULL;
+
+ fds = malloc(sizeof(*fds) * nr_listens);
+ if (!fds)
+ return NULL;
+
+ fds[0] = __start_server(type, 0, (struct sockaddr *)&addr, addrlen,
+ timeout_ms, true);
+ if (fds[0] == -1)
+ goto close_fds;
+ nr_fds = 1;
+
+ if (getsockname(fds[0], (struct sockaddr *)&addr, &addrlen))
+ goto close_fds;
+
+ for (; nr_fds < nr_listens; nr_fds++) {
+ fds[nr_fds] = __start_server(type, 0, (struct sockaddr *)&addr,
+ addrlen, timeout_ms, true);
+ if (fds[nr_fds] == -1)
+ goto close_fds;
+ }
+
+ return fds;
+
+close_fds:
+ free_fds(fds, nr_fds);
+ return NULL;
}
-static const struct timeval timeo_sec = { .tv_sec = 3 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
+void free_fds(int *fds, unsigned int nr_close_fds)
+{
+ if (fds) {
+ while (nr_close_fds)
+ close(fds[--nr_close_fds]);
+ free(fds);
+ }
+}
-int connect_to_fd(int family, int type, int server_fd)
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms)
{
- int fd, save_errno;
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ struct sockaddr_in *addr_in;
+ int fd, ret;
- fd = socket(family, type, 0);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
+
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, SOCK_STREAM, 0);
if (fd < 0) {
log_err("Failed to create client socket");
return -1;
}
- if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
- save_errno = errno;
- close(fd);
- errno = save_errno;
+ if (settimeo(fd, timeout_ms))
+ goto error_close;
+
+ ret = sendto(fd, data, data_len, MSG_FASTOPEN, (struct sockaddr *)&addr,
+ addrlen);
+ if (ret != data_len) {
+ log_err("sendto(data, %u) != %d\n", data_len, ret);
+ goto error_close;
+ }
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
+}
+
+static int connect_fd_to_addr(int fd,
+ const struct sockaddr_storage *addr,
+ socklen_t addrlen, const bool must_fail)
+{
+ int ret;
+
+ errno = 0;
+ ret = connect(fd, (const struct sockaddr *)addr, addrlen);
+ if (must_fail) {
+ if (!ret) {
+ log_err("Unexpected success to connect to server");
+ return -1;
+ }
+ if (errno != EPERM) {
+ log_err("Unexpected error from connect to server");
+ return -1;
+ }
+ } else {
+ if (ret) {
+ log_err("Failed to connect to server");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t addrlen, int type)
+{
+ int fd;
+
+ fd = socket(addr->ss_family, type, 0);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
return -1;
}
+ if (connect_fd_to_addr(fd, addr, addrlen, false))
+ goto error_close;
+
return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-int connect_fd_to_fd(int client_fd, int server_fd)
+static const struct network_helper_opts default_opts;
+
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts)
{
struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int save_errno;
+ struct sockaddr_in *addr_in;
+ socklen_t addrlen, optlen;
+ int fd, type, protocol;
- if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen)) {
- log_err("Failed to set SO_RCVTIMEO");
- return -1;
+ if (!opts)
+ opts = &default_opts;
+
+ optlen = sizeof(type);
+
+ if (opts->type) {
+ type = opts->type;
+ } else {
+ if (getsockopt(server_fd, SOL_SOCKET, SO_TYPE, &type, &optlen)) {
+ log_err("getsockopt(SOL_TYPE)");
+ return -1;
+ }
}
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ if (opts->proto) {
+ protocol = opts->proto;
+ } else {
+ if (getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen)) {
+ log_err("getsockopt(SOL_PROTOCOL)");
+ return -1;
+ }
+ }
+
+ addrlen = sizeof(addr);
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &addrlen)) {
log_err("Failed to get server addr");
return -1;
}
- if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
- if (errno != EINPROGRESS) {
- save_errno = errno;
- log_err("Failed to connect to server");
- errno = save_errno;
- }
+ addr_in = (struct sockaddr_in *)&addr;
+ fd = socket(addr_in->sin_family, type, protocol);
+ if (fd < 0) {
+ log_err("Failed to create client socket");
return -1;
}
- return 0;
+ if (settimeo(fd, opts->timeout_ms))
+ goto error_close;
+
+ if (opts->cc && opts->cc[0] &&
+ setsockopt(fd, SOL_TCP, TCP_CONGESTION, opts->cc,
+ strlen(opts->cc) + 1))
+ goto error_close;
+
+ if (!opts->noconnect)
+ if (connect_fd_to_addr(fd, &addr, addrlen, opts->must_fail))
+ goto error_close;
+
+ return fd;
+
+error_close:
+ save_errno_close(fd);
+ return -1;
}
-int connect_wait(int fd)
+int connect_to_fd(int server_fd, int timeout_ms)
{
- struct epoll_event ev = {}, events[2];
- int timeout_ms = 1000;
- int efd, nfd;
+ struct network_helper_opts opts = {
+ .timeout_ms = timeout_ms,
+ };
- efd = epoll_create1(EPOLL_CLOEXEC);
- if (efd < 0) {
- log_err("Failed to open epoll fd");
+ return connect_to_fd_opts(server_fd, &opts);
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+
+ if (settimeo(client_fd, timeout_ms))
return -1;
- }
- ev.events = EPOLLRDHUP | EPOLLOUT;
- ev.data.fd = fd;
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return -1;
+ }
- if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
- log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
- close(efd);
+ if (connect_fd_to_addr(client_fd, &addr, len, false))
return -1;
+
+ return 0;
+}
+
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len)
+{
+ if (family == AF_INET) {
+ struct sockaddr_in *sin = (void *)addr;
+
+ memset(addr, 0, sizeof(*sin));
+ sin->sin_family = AF_INET;
+ sin->sin_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET, addr_str, &sin->sin_addr) != 1) {
+ log_err("inet_pton(AF_INET, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin);
+ return 0;
+ } else if (family == AF_INET6) {
+ struct sockaddr_in6 *sin6 = (void *)addr;
+
+ memset(addr, 0, sizeof(*sin6));
+ sin6->sin6_family = AF_INET6;
+ sin6->sin6_port = htons(port);
+ if (addr_str &&
+ inet_pton(AF_INET6, addr_str, &sin6->sin6_addr) != 1) {
+ log_err("inet_pton(AF_INET6, %s)", addr_str);
+ return -1;
+ }
+ if (len)
+ *len = sizeof(*sin6);
+ return 0;
+ } else if (family == AF_UNIX) {
+ /* Note that we always use abstract unix sockets to avoid having
+ * to clean up leftover files.
+ */
+ struct sockaddr_un *sun = (void *)addr;
+
+ memset(addr, 0, sizeof(*sun));
+ sun->sun_family = family;
+ sun->sun_path[0] = 0;
+ strcpy(sun->sun_path + 1, addr_str);
+ if (len)
+ *len = offsetof(struct sockaddr_un, sun_path) + 1 + strlen(addr_str);
+ return 0;
+ }
+ return -1;
+}
+
+char *ping_command(int family)
+{
+ if (family == AF_INET6) {
+ /* On some systems 'ping' doesn't support IPv6, so use ping6 if it is present. */
+ if (!system("which ping6 >/dev/null 2>&1"))
+ return "ping6";
+ else
+ return "ping -6";
}
+ return "ping";
+}
+
+struct nstoken {
+ int orig_netns_fd;
+};
+
+struct nstoken *open_netns(const char *name)
+{
+ int nsfd;
+ char nspath[PATH_MAX];
+ int err;
+ struct nstoken *token;
+
+ token = calloc(1, sizeof(struct nstoken));
+ if (!ASSERT_OK_PTR(token, "malloc token"))
+ return NULL;
+
+ token->orig_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(token->orig_netns_fd, 0, "open /proc/self/ns/net"))
+ goto fail;
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (!ASSERT_GE(nsfd, 0, "open netns fd"))
+ goto fail;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ if (!ASSERT_OK(err, "setns"))
+ goto fail;
+
+ return token;
+fail:
+ free(token);
+ return NULL;
+}
+
+void close_netns(struct nstoken *token)
+{
+ if (!token)
+ return;
- nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
- if (nfd < 0)
- log_err("Failed to wait for I/O event on epoll fd=%d", efd);
+ ASSERT_OK(setns(token->orig_netns_fd, CLONE_NEWNET), "setns");
+ close(token->orig_netns_fd);
+ free(token);
+}
+
+int get_socket_local_port(int sock_fd)
+{
+ struct sockaddr_storage addr;
+ socklen_t addrlen = sizeof(addr);
+ int err;
+
+ err = getsockname(sock_fd, (struct sockaddr *)&addr, &addrlen);
+ if (err < 0)
+ return err;
+
+ if (addr.ss_family == AF_INET) {
+ struct sockaddr_in *sin = (struct sockaddr_in *)&addr;
+
+ return sin->sin_port;
+ } else if (addr.ss_family == AF_INET6) {
+ struct sockaddr_in6 *sin = (struct sockaddr_in6 *)&addr;
+
+ return sin->sin6_port;
+ }
- close(efd);
- return nfd;
+ return -1;
}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 6a8009605670..94b9be24e39b 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -17,6 +17,15 @@ typedef __u16 __sum16;
#define VIP_NUM 5
#define MAGIC_BYTES 123
+struct network_helper_opts {
+ const char *cc;
+ int timeout_ms;
+ bool must_fail;
+ bool noconnect;
+ int type;
+ int proto;
+};
+
/* ipv4 test vector */
struct ipv4_packet {
struct ethhdr eth;
@@ -33,10 +42,76 @@ struct ipv6_packet {
} __packed;
extern struct ipv6_packet pkt_v6;
-int start_server(int family, int type);
-int start_server_with_port(int family, int type, __u16 port);
-int connect_to_fd(int family, int type, int server_fd);
-int connect_fd_to_fd(int client_fd, int server_fd);
-int connect_wait(int client_fd);
+int settimeo(int fd, int timeout_ms);
+int start_server(int family, int type, const char *addr, __u16 port,
+ int timeout_ms);
+int start_mptcp_server(int family, const char *addr, __u16 port,
+ int timeout_ms);
+int *start_reuseport_server(int family, int type, const char *addr_str,
+ __u16 port, int timeout_ms,
+ unsigned int nr_listens);
+void free_fds(int *fds, unsigned int nr_close_fds);
+int connect_to_addr(const struct sockaddr_storage *addr, socklen_t len, int type);
+int connect_to_fd(int server_fd, int timeout_ms);
+int connect_to_fd_opts(int server_fd, const struct network_helper_opts *opts);
+int connect_fd_to_fd(int client_fd, int server_fd, int timeout_ms);
+int fastopen_connect(int server_fd, const char *data, unsigned int data_len,
+ int timeout_ms);
+int make_sockaddr(int family, const char *addr_str, __u16 port,
+ struct sockaddr_storage *addr, socklen_t *len);
+char *ping_command(int family);
+int get_socket_local_port(int sock_fd);
+
+struct nstoken;
+/**
+ * open_netns() - Switch to specified network namespace by name.
+ *
+ * Returns token with which to restore the original namespace
+ * using close_netns().
+ */
+struct nstoken *open_netns(const char *name);
+void close_netns(struct nstoken *token);
+
+static __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+
+ return (__u16)~csum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __wsum csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+ s += htons(proto + len);
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto,
+ __wsum csum)
+{
+ __u64 s = csum;
+ int i;
+
+ for (i = 0; i < 4; i++)
+ s += (__u32)saddr->s6_addr32[i];
+ for (i = 0; i < 4; i++)
+ s += (__u32)daddr->s6_addr32[i];
+ s += htons(proto + len);
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/access_variable_array.c b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
new file mode 100644
index 000000000000..08131782437c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/access_variable_array.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <test_progs.h>
+#include "test_access_variable_array.skel.h"
+
+void test_access_variable_array(void)
+{
+ struct test_access_variable_array *skel;
+
+ skel = test_access_variable_array__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_access_variable_array__open_and_load"))
+ return;
+
+ test_access_variable_array__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
index c548aded6585..4ebd0da898f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/align.c
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -2,10 +2,11 @@
#include <test_progs.h>
#define MAX_INSNS 512
-#define MAX_MATCHES 16
+#define MAX_MATCHES 24
struct bpf_reg_match {
unsigned int line;
+ const char *reg;
const char *match;
};
@@ -39,13 +40,13 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv2"},
- {2, "R3_w=inv4"},
- {3, "R3_w=inv8"},
- {4, "R3_w=inv16"},
- {5, "R3_w=inv32"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "2"},
+ {1, "R3_w", "4"},
+ {2, "R3_w", "8"},
+ {3, "R3_w", "16"},
+ {4, "R3_w", "32"},
},
},
{
@@ -67,19 +68,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv1"},
- {2, "R3_w=inv2"},
- {3, "R3_w=inv4"},
- {4, "R3_w=inv8"},
- {5, "R3_w=inv16"},
- {6, "R3_w=inv1"},
- {7, "R4_w=inv32"},
- {8, "R4_w=inv16"},
- {9, "R4_w=inv8"},
- {10, "R4_w=inv4"},
- {11, "R4_w=inv2"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "1"},
+ {1, "R3_w", "2"},
+ {2, "R3_w", "4"},
+ {3, "R3_w", "8"},
+ {4, "R3_w", "16"},
+ {5, "R3_w", "1"},
+ {6, "R4_w", "32"},
+ {7, "R4_w", "16"},
+ {8, "R4_w", "8"},
+ {9, "R4_w", "4"},
+ {10, "R4_w", "2"},
},
},
{
@@ -96,14 +97,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv4"},
- {2, "R3_w=inv8"},
- {3, "R3_w=inv10"},
- {4, "R4_w=inv8"},
- {5, "R4_w=inv12"},
- {6, "R4_w=inv14"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "4"},
+ {1, "R3_w", "8"},
+ {2, "R3_w", "10"},
+ {3, "R4_w", "8"},
+ {4, "R4_w", "12"},
+ {5, "R4_w", "14"},
},
},
{
@@ -118,12 +119,12 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {1, "R1=ctx(id=0,off=0,imm=0)"},
- {1, "R10=fp0"},
- {1, "R3_w=inv7"},
- {2, "R3_w=inv7"},
- {3, "R3_w=inv14"},
- {4, "R3_w=inv56"},
+ {0, "R1", "ctx()"},
+ {0, "R10", "fp0"},
+ {0, "R3_w", "7"},
+ {1, "R3_w", "7"},
+ {2, "R3_w", "14"},
+ {3, "R3_w", "56"},
},
},
@@ -161,19 +162,19 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {18, "R3=pkt_end(id=0,off=0,imm=0)"},
- {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
- {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
- {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+ {6, "R0_w", "pkt(off=8,r=8)"},
+ {6, "R3_w", "var_off=(0x0; 0xff)"},
+ {7, "R3_w", "var_off=(0x0; 0x1fe)"},
+ {8, "R3_w", "var_off=(0x0; 0x3fc)"},
+ {9, "R3_w", "var_off=(0x0; 0x7f8)"},
+ {10, "R3_w", "var_off=(0x0; 0xff0)"},
+ {12, "R3_w", "pkt_end()"},
+ {17, "R4_w", "var_off=(0x0; 0xff)"},
+ {18, "R4_w", "var_off=(0x0; 0x1fe0)"},
+ {19, "R4_w", "var_off=(0x0; 0xff0)"},
+ {20, "R4_w", "var_off=(0x0; 0x7f8)"},
+ {21, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {22, "R4_w", "var_off=(0x0; 0x1fe)"},
},
},
{
@@ -194,16 +195,16 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
- {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
- {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+ {6, "R3_w", "var_off=(0x0; 0xff)"},
+ {7, "R4_w", "var_off=(0x0; 0xff)"},
+ {8, "R4_w", "var_off=(0x0; 0xff)"},
+ {9, "R4_w", "var_off=(0x0; 0xff)"},
+ {10, "R4_w", "var_off=(0x0; 0x1fe)"},
+ {11, "R4_w", "var_off=(0x0; 0xff)"},
+ {12, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {13, "R4_w", "var_off=(0x0; 0xff)"},
+ {14, "R4_w", "var_off=(0x0; 0x7f8)"},
+ {15, "R4_w", "var_off=(0x0; 0xff0)"},
},
},
{
@@ -234,14 +235,14 @@ static struct bpf_align_test tests[] = {
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.matches = {
- {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
- {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
- {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
- {10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
- {10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
- {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
- {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
- {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+ {2, "R5_w", "pkt(r=0)"},
+ {4, "R5_w", "pkt(off=14,r=0)"},
+ {5, "R4_w", "pkt(off=14,r=0)"},
+ {9, "R2", "pkt(r=18)"},
+ {10, "R5", "pkt(off=14,r=18)"},
+ {10, "R4_w", "var_off=(0x0; 0xff)"},
+ {13, "R4_w", "var_off=(0x0; 0xffff)"},
+ {14, "R4_w", "var_off=(0x0; 0xffff)"},
},
},
{
@@ -267,6 +268,7 @@ static struct bpf_align_test tests[] = {
*/
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
@@ -280,6 +282,7 @@ static struct bpf_align_test tests[] = {
BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
@@ -296,59 +299,67 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(r=8)"},
+ {7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Offset is added to packet pointer R5, resulting in
* known fixed offset, and variable offset from R6.
*/
- {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {11, "R5_w", "pkt(id=1,off=14,"},
/* At the time the word size load is performed from R5,
* it's total offset is NET_IP_ALIGN + reg->off (0) +
* reg->aux_off (14) which is 16. Then the variable
* offset is considered using reg->aux_off_align which
* is 4 and meets the load's requirements.
*/
- {15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {15, "R4", "var_off=(0x0; 0x3fc)"},
+ {15, "R5", "var_off=(0x0; 0x3fc)"},
/* Variable offset is added to R5 packet pointer,
- * resulting in auxiliary alignment of 4.
+ * resulting in auxiliary alignment of 4. To avoid BPF
+ * verifier's precision backtracking logging
+ * interfering we also have a no-op R4 = R5
+ * instruction to validate R5 state. We also check
+ * that R4 is what it should be in such case.
*/
- {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {18, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {18, "R5_w", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5, resulting in
* reg->off of 14.
*/
- {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {19, "R5_w", "pkt(id=2,off=14,"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off
* (14) which is 16. Then the variable offset is 4-byte
* aligned, so the total offset is 4-byte aligned and
* meets the load's requirements.
*/
- {23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
- {23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {24, "R4", "var_off=(0x0; 0x3fc)"},
+ {24, "R5", "var_off=(0x0; 0x3fc)"},
/* Constant offset is added to R5 packet pointer,
* resulting in reg->off value of 14.
*/
- {26, "R5_w=pkt(id=0,off=14,r=8"},
+ {26, "R5_w", "pkt(off=14,r=8)"},
/* Variable offset is added to R5, resulting in a
- * variable offset of (4n).
+ * variable offset of (4n). See comment for insn #18
+ * for R4 = R5 trick.
*/
- {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {28, "R4_w", "var_off=(0x0; 0x3fc)"},
+ {28, "R5_w", "var_off=(0x0; 0x3fc)"},
/* Constant is added to R5 again, setting reg->off to 18. */
- {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {29, "R5_w", "pkt(id=3,off=18,"},
/* And once more we add a variable; resulting var_off
* is still (4n), fixed offset is not changed.
* Also, we create a new reg->id.
*/
- {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {31, "R4_w", "var_off=(0x0; 0x7fc)"},
+ {31, "R5_w", "var_off=(0x0; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (18)
* which is 20. Then the variable offset is (4n), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
- {33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+ {35, "R4", "var_off=(0x0; 0x7fc)"},
+ {35, "R5", "var_off=(0x0; 0x7fc)"},
},
},
{
@@ -386,36 +397,36 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(r=8)"},
+ {7, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {8, "R6_w", "var_off=(0x2; 0x7fc)"},
/* Packet pointer has (4n+2) offset */
- {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
- {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {11, "R5_w", "var_off=(0x2; 0x7fc)"},
+ {12, "R4", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+ {15, "R5", "var_off=(0x2; 0x7fc)"},
/* Newly read value in R6 was shifted left by 2, so has
* known alignment of 4.
*/
- {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {17, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Added (4n) to packet pointer's (4n+2) var_off, giving
* another (4n+2).
*/
- {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
- {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {19, "R5_w", "var_off=(0x2; 0xffc)"},
+ {20, "R4", "var_off=(0x2; 0xffc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+ {23, "R5", "var_off=(0x2; 0xffc)"},
},
},
{
@@ -448,18 +459,18 @@ static struct bpf_align_test tests[] = {
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
.matches = {
- {4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+ {3, "R5_w", "pkt_end()"},
/* (ptr - ptr) << 2 == unknown, (4n) */
- {6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+ {5, "R5_w", "var_off=(0x0; 0xfffffffffffffffc)"},
/* (4n) + 14 == (4n+2). We blow our bounds, because
* the add could overflow.
*/
- {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {6, "R5_w", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>=0 */
- {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+ {9, "R5", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* packet pointer + nonnegative (4n+2) */
- {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
- {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+ {11, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
+ {12, "R4_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
* We checked the bounds, but it might have been able
* to overflow if the packet pointer started in the
@@ -467,7 +478,7 @@ static struct bpf_align_test tests[] = {
* So we did not get a 'range' on R6, and the access
* attempt will fail.
*/
- {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+ {15, "R6_w", "var_off=(0x2; 0x7ffffffffffffffc)"},
}
},
{
@@ -502,24 +513,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {6, "R2_w", "pkt(r=8)"},
+ {8, "R6_w", "var_off=(0x0; 0x3fc)"},
/* Adding 14 makes R6 be (4n+2) */
- {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {9, "R6_w", "var_off=(0x2; 0x7fc)"},
/* New unknown value in R7 is (4n) */
- {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+ {10, "R7_w", "var_off=(0x0; 0x3fc)"},
/* Subtracting it from R6 blows our unsigned bounds */
- {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+ {11, "R6", "var_off=(0x2; 0xfffffffffffffffc)"},
/* Checked s>= 0 */
- {14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+ {14, "R6", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
-
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
{
@@ -556,23 +566,23 @@ static struct bpf_align_test tests[] = {
/* Calculated offset in R6 has unknown value, but known
* alignment of 4.
*/
- {7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
- {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+ {6, "R2_w", "pkt(r=8)"},
+ {9, "R6_w", "var_off=(0x0; 0x3c)"},
/* Adding 14 makes R6 be (4n+2) */
- {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+ {10, "R6_w", "var_off=(0x2; 0x7c)"},
/* Subtracting from packet pointer overflows ubounds */
- {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+ {13, "R5_w", "var_off=(0xffffffffffffff82; 0x7c)"},
/* New unknown value in R7 is (4n), >= 76 */
- {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+ {14, "R7_w", "var_off=(0x0; 0x7fc)"},
/* Adding it to packet pointer gives nice bounds again */
- {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {16, "R5_w", "var_off=(0x2; 0x7fc)"},
/* At the time the word size load is performed from R5,
* its total fixed offset is NET_IP_ALIGN + reg->off (0)
* which is 2. Then the variable offset is (4n+2), so
* the total offset is 4-byte aligned and meets the
* load's requirements.
*/
- {20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+ {20, "R5", "var_off=(0x2; 0x7fc)"},
},
},
};
@@ -594,6 +604,12 @@ static int do_test_single(struct bpf_align_test *test)
struct bpf_insn *prog = test->insns;
int prog_type = test->prog_type;
char bpf_vlog_copy[32768];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .prog_flags = BPF_F_STRICT_ALIGNMENT,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = 2,
+ );
const char *line_ptr;
int cur_line = -1;
int prog_len, i;
@@ -601,9 +617,8 @@ static int do_test_single(struct bpf_align_test *test)
int ret;
prog_len = probe_filter_length(prog);
- fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
- prog, prog_len, BPF_F_STRICT_ALIGNMENT,
- "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+ fd_prog = bpf_prog_load(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL",
+ prog, prog_len, &opts);
if (fd_prog < 0 && test->result != REJECT) {
printf("Failed to load program.\n");
printf("%s", bpf_vlog);
@@ -620,26 +635,48 @@ static int do_test_single(struct bpf_align_test *test)
line_ptr = strtok(bpf_vlog_copy, "\n");
for (i = 0; i < MAX_MATCHES; i++) {
struct bpf_reg_match m = test->matches[i];
+ const char *p;
+ int tmp;
if (!m.match)
break;
while (line_ptr) {
cur_line = -1;
sscanf(line_ptr, "%u: ", &cur_line);
+ if (cur_line == -1)
+ sscanf(line_ptr, "from %u to %u: ", &tmp, &cur_line);
if (cur_line == m.line)
break;
line_ptr = strtok(NULL, "\n");
}
if (!line_ptr) {
- printf("Failed to find line %u for match: %s\n",
- m.line, m.match);
+ printf("Failed to find line %u for match: %s=%s\n",
+ m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
}
- if (!strstr(line_ptr, m.match)) {
- printf("Failed to find match %u: %s\n",
- m.line, m.match);
+ /* Check the next line as well in case the previous line
+ * did not have a corresponding bpf insn. Example:
+ * func#0 @0
+ * 0: R1=ctx() R10=fp0
+ * 0: (b7) r3 = 2 ; R3_w=2
+ *
+ * Sometimes it's actually two lines below, e.g. when
+ * searching for "6: R3_w=scalar(umax=255,var_off=(0x0; 0xff))":
+ * from 4 to 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: R0_w=pkt(off=8,r=8) R1=ctx() R2_w=pkt(r=8) R3_w=pkt_end() R10=fp0
+ * 6: (71) r3 = *(u8 *)(r2 +0) ; R2_w=pkt(r=8) R3_w=scalar(umax=255,var_off=(0x0; 0xff))
+ */
+ while (!(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+ cur_line = -1;
+ line_ptr = strtok(NULL, "\n");
+ sscanf(line_ptr ?: "", "%u: ", &cur_line);
+ if (!line_ptr || cur_line != m.line)
+ break;
+ }
+ if (cur_line != m.line || !line_ptr || !(p = strstr(line_ptr, m.reg)) || !strstr(p, m.match)) {
+ printf("Failed to find match %u: %s=%s\n", m.line, m.reg, m.match);
ret = 1;
printf("%s", bpf_vlog);
break;
@@ -661,6 +698,6 @@ void test_align(void)
if (!test__start_subtest(test->descr))
continue;
- CHECK_FAIL(do_test_single(test));
+ ASSERT_OK(do_test_single(test), test->descr);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_htab.c b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
new file mode 100644
index 000000000000..0766702de846
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_htab.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#include "arena_htab_asm.skel.h"
+#include "arena_htab.skel.h"
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_htab.h"
+
+static void test_arena_htab_common(struct htab *htab)
+{
+ int i;
+
+ printf("htab %p buckets %p n_buckets %d\n", htab, htab->buckets, htab->n_buckets);
+ ASSERT_OK_PTR(htab->buckets, "htab->buckets shouldn't be NULL");
+ for (i = 0; htab->buckets && i < 16; i += 4) {
+ /*
+ * Walk htab buckets and link lists since all pointers are correct,
+ * though they were written by bpf program.
+ */
+ int val = htab_lookup_elem(htab, i);
+
+ ASSERT_EQ(i, val, "key == value");
+ }
+}
+
+static void test_arena_htab_llvm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab *skel;
+ struct htab *htab;
+ size_t arena_sz;
+ void *area;
+ int ret;
+
+ skel = arena_htab__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab__open_and_load"))
+ return;
+
+ area = bpf_map__initial_value(skel->maps.arena, &arena_sz);
+ /* fault-in a page with pgoff == 0 as sanity check */
+ *(volatile int *)area = 0x55aa;
+
+ /* bpf prog will allocate more pages */
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_llvm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+out:
+ arena_htab__destroy(skel);
+}
+
+static void test_arena_htab_asm(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_htab_asm *skel;
+ struct htab *htab;
+ int ret;
+
+ skel = arena_htab_asm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_htab_asm__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_htab_asm), &opts);
+ ASSERT_OK(ret, "ret");
+ ASSERT_OK(opts.retval, "retval");
+ htab = skel->bss->htab_for_user;
+ test_arena_htab_common(htab);
+ arena_htab_asm__destroy(skel);
+}
+
+void test_arena_htab(void)
+{
+ if (test__start_subtest("arena_htab_llvm"))
+ test_arena_htab_llvm();
+ if (test__start_subtest("arena_htab_asm"))
+ test_arena_htab_asm();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arena_list.c b/tools/testing/selftests/bpf/prog_tests/arena_list.c
new file mode 100644
index 000000000000..e61886debab1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arena_list.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <network_helpers.h>
+
+#define PAGE_SIZE 4096
+
+#include "bpf_arena_list.h"
+#include "arena_list.skel.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+static int list_sum(struct arena_list_head *head)
+{
+ struct elem __arena *n;
+ int sum = 0;
+
+ list_for_each_entry(n, head, node)
+ sum += n->value;
+ return sum;
+}
+
+static void test_arena_list_add_del(int cnt)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct arena_list *skel;
+ int expected_sum = (u64)cnt * (cnt - 1) / 2;
+ int ret, sum;
+
+ skel = arena_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "arena_list__open_and_load"))
+ return;
+
+ skel->bss->cnt = cnt;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_add), &opts);
+ ASSERT_OK(ret, "ret_add");
+ ASSERT_OK(opts.retval, "retval");
+ if (skel->bss->skip) {
+ printf("%s:SKIP:compiler doesn't support arena_cast\n", __func__);
+ test__skip();
+ goto out;
+ }
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, expected_sum, "sum of elems");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+ ASSERT_EQ(skel->arena->test_val, cnt + 1, "num of elems");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.arena_list_del), &opts);
+ ASSERT_OK(ret, "ret_del");
+ sum = list_sum(skel->bss->list_head);
+ ASSERT_EQ(sum, 0, "sum of list elems after del");
+ ASSERT_EQ(skel->bss->list_sum, expected_sum, "sum of list elems computed by prog");
+ ASSERT_EQ(skel->arena->arena_sum, expected_sum, "__arena sum of elems");
+out:
+ arena_list__destroy(skel);
+}
+
+void test_arena_list(void)
+{
+ if (test__start_subtest("arena_list_1"))
+ test_arena_list_add_del(1);
+ if (test__start_subtest("arena_list_1000"))
+ test_arena_list_add_del(1000);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/arg_parsing.c b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
new file mode 100644
index 000000000000..bb143de68875
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/arg_parsing.c
@@ -0,0 +1,175 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void init_test_filter_set(struct test_filter_set *set)
+{
+ set->cnt = 0;
+ set->tests = NULL;
+}
+
+static void free_test_filter_set(struct test_filter_set *set)
+{
+ int i, j;
+
+ for (i = 0; i < set->cnt; i++) {
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
+ free(set->tests[i].subtests);
+ free(set->tests[i].name);
+ }
+
+ free(set->tests);
+ init_test_filter_set(set);
+}
+
+static void test_parse_test_list(void)
+{
+ struct test_filter_set set;
+
+ init_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "test filters count"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing,bpf_cookie", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing,bpf_cookie",
+ &set,
+ true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 2, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("arg_parsing/arg_parsing", &set, true),
+ "parsing");
+ ASSERT_OK(parse_test_list("bpf_cookie", &set, true), "parsing");
+ ASSERT_OK(parse_test_list("send_signal", &set, true), "parsing");
+ if (!ASSERT_EQ(set.cnt, 3, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_EQ(set.tests[1].subtest_cnt, 0, "subtest filters count");
+ ASSERT_EQ(set.tests[2].subtest_cnt, 0, "subtest filters count");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("arg_parsing", set.tests[0].subtests[0]),
+ "subtest name");
+ ASSERT_OK(strcmp("bpf_cookie", set.tests[1].name), "test name");
+ ASSERT_OK(strcmp("send_signal", set.tests[2].name), "test name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("bpf_cookie/trace", &set, false), "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 1, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("*bpf_cookie*", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("*trace*", set.tests[0].subtests[0]), "subtest name");
+ free_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list("t/subtest1,t/subtest2", &set, true),
+ "parsing");
+ if (!ASSERT_EQ(set.cnt, 1, "count of test filters"))
+ goto error;
+ if (!ASSERT_OK_PTR(set.tests, "test filters initialized"))
+ goto error;
+ if (!ASSERT_EQ(set.tests[0].subtest_cnt, 2, "subtest filters count"))
+ goto error;
+ ASSERT_OK(strcmp("t", set.tests[0].name), "test name");
+ ASSERT_OK(strcmp("subtest1", set.tests[0].subtests[0]), "subtest name");
+ ASSERT_OK(strcmp("subtest2", set.tests[0].subtests[1]), "subtest name");
+error:
+ free_test_filter_set(&set);
+}
+
+static void test_parse_test_list_file(void)
+{
+ struct test_filter_set set;
+ char tmpfile[80];
+ FILE *fp;
+ int fd;
+
+ snprintf(tmpfile, sizeof(tmpfile), "/tmp/bpf_arg_parsing_test.XXXXXX");
+ fd = mkstemp(tmpfile);
+ if (!ASSERT_GE(fd, 0, "create tmp"))
+ return;
+
+ fp = fdopen(fd, "w");
+ if (!ASSERT_NEQ(fp, NULL, "fdopen tmp")) {
+ close(fd);
+ goto out_remove;
+ }
+
+ fprintf(fp, "# comment\n");
+ fprintf(fp, " test_with_spaces \n");
+ fprintf(fp, "testA/subtest # comment\n");
+ fprintf(fp, "testB#comment with no space\n");
+ fprintf(fp, "testB # duplicate\n");
+ fprintf(fp, "testA/subtest # subtest duplicate\n");
+ fprintf(fp, "testA/subtest2\n");
+ fprintf(fp, "testC_no_eof_newline");
+ fflush(fp);
+
+ if (!ASSERT_OK(ferror(fp), "prepare tmp"))
+ goto out_fclose;
+
+ init_test_filter_set(&set);
+
+ ASSERT_OK(parse_test_list_file(tmpfile, &set, true), "parse file");
+
+ ASSERT_EQ(set.cnt, 4, "test count");
+ ASSERT_OK(strcmp("test_with_spaces", set.tests[0].name), "test 0 name");
+ ASSERT_EQ(set.tests[0].subtest_cnt, 0, "test 0 subtest count");
+ ASSERT_OK(strcmp("testA", set.tests[1].name), "test 1 name");
+ ASSERT_EQ(set.tests[1].subtest_cnt, 2, "test 1 subtest count");
+ ASSERT_OK(strcmp("subtest", set.tests[1].subtests[0]), "test 1 subtest 0");
+ ASSERT_OK(strcmp("subtest2", set.tests[1].subtests[1]), "test 1 subtest 1");
+ ASSERT_OK(strcmp("testB", set.tests[2].name), "test 2 name");
+ ASSERT_OK(strcmp("testC_no_eof_newline", set.tests[3].name), "test 3 name");
+
+ free_test_filter_set(&set);
+
+out_fclose:
+ fclose(fp);
+out_remove:
+ remove(tmpfile);
+}
+
+void test_arg_parsing(void)
+{
+ if (test__start_subtest("test_parse_test_list"))
+ test_parse_test_list();
+ if (test__start_subtest("test_parse_test_list_file"))
+ test_parse_test_list_file();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/assign_reuse.c b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c
new file mode 100644
index 000000000000..989ee4d9785b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/assign_reuse.c
@@ -0,0 +1,199 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+
+#include "network_helpers.h"
+#include "test_assign_reuse.skel.h"
+
+#define NS_TEST "assign_reuse"
+#define LOOPBACK 1
+#define PORT 4443
+
+static int attach_reuseport(int sock_fd, int prog_fd)
+{
+ return setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+}
+
+static __u64 cookie(int fd)
+{
+ __u64 cookie = 0;
+ socklen_t cookie_len = sizeof(cookie);
+ int ret;
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len);
+ ASSERT_OK(ret, "cookie");
+ ASSERT_GT(cookie, 0, "cookie_invalid");
+
+ return cookie;
+}
+
+static int echo_test_udp(int fd_sv)
+{
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+ char buff[1] = {};
+ int fd_cl = -1, ret;
+
+ fd_cl = connect_to_fd(fd_sv, 100);
+ ASSERT_GT(fd_cl, 0, "create_client");
+ ASSERT_EQ(getsockname(fd_cl, (void *)&addr, &len), 0, "getsockname");
+
+ ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client");
+
+ ret = recv(fd_sv, buff, sizeof(buff), 0);
+ if (ret < 0) {
+ close(fd_cl);
+ return errno;
+ }
+
+ ASSERT_EQ(ret, 1, "recv_server");
+ ASSERT_EQ(sendto(fd_sv, buff, sizeof(buff), 0, (void *)&addr, len), 1, "send_server");
+ ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client");
+ close(fd_cl);
+ return 0;
+}
+
+static int echo_test_tcp(int fd_sv)
+{
+ char buff[1] = {};
+ int fd_cl = -1, fd_sv_cl = -1;
+
+ fd_cl = connect_to_fd(fd_sv, 100);
+ if (fd_cl < 0)
+ return errno;
+
+ fd_sv_cl = accept(fd_sv, NULL, NULL);
+ ASSERT_GE(fd_sv_cl, 0, "accept_fd");
+
+ ASSERT_EQ(send(fd_cl, buff, sizeof(buff), 0), 1, "send_client");
+ ASSERT_EQ(recv(fd_sv_cl, buff, sizeof(buff), 0), 1, "recv_server");
+ ASSERT_EQ(send(fd_sv_cl, buff, sizeof(buff), 0), 1, "send_server");
+ ASSERT_EQ(recv(fd_cl, buff, sizeof(buff), 0), 1, "recv_client");
+ close(fd_sv_cl);
+ close(fd_cl);
+ return 0;
+}
+
+void run_assign_reuse(int family, int sotype, const char *ip, __u16 port)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .ifindex = LOOPBACK,
+ .attach_point = BPF_TC_INGRESS,
+ );
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, tc_opts,
+ .handle = 1,
+ .priority = 1,
+ );
+ bool hook_created = false, tc_attached = false;
+ int ret, fd_tc, fd_accept, fd_drop, fd_map;
+ int *fd_sv = NULL;
+ __u64 fd_val;
+ struct test_assign_reuse *skel;
+ const int zero = 0;
+
+ skel = test_assign_reuse__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->rodata->dest_port = port;
+
+ ret = test_assign_reuse__load(skel);
+ if (!ASSERT_OK(ret, "skel_load"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->sk_cookie_seen, 0, "cookie_init");
+
+ fd_tc = bpf_program__fd(skel->progs.tc_main);
+ fd_accept = bpf_program__fd(skel->progs.reuse_accept);
+ fd_drop = bpf_program__fd(skel->progs.reuse_drop);
+ fd_map = bpf_map__fd(skel->maps.sk_map);
+
+ fd_sv = start_reuseport_server(family, sotype, ip, port, 100, 1);
+ if (!ASSERT_NEQ(fd_sv, NULL, "start_reuseport_server"))
+ goto cleanup;
+
+ ret = attach_reuseport(*fd_sv, fd_drop);
+ if (!ASSERT_OK(ret, "attach_reuseport"))
+ goto cleanup;
+
+ fd_val = *fd_sv;
+ ret = bpf_map_update_elem(fd_map, &zero, &fd_val, BPF_NOEXIST);
+ if (!ASSERT_OK(ret, "bpf_sk_map"))
+ goto cleanup;
+
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (ret == 0)
+ hook_created = true;
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd_tc;
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+
+ if (sotype == SOCK_STREAM)
+ ASSERT_EQ(echo_test_tcp(*fd_sv), ECONNREFUSED, "drop_tcp");
+ else
+ ASSERT_EQ(echo_test_udp(*fd_sv), EAGAIN, "drop_udp");
+ ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once");
+
+ skel->bss->sk_cookie_seen = 0;
+ skel->bss->reuseport_executed = 0;
+ ASSERT_OK(attach_reuseport(*fd_sv, fd_accept), "attach_reuseport(accept)");
+
+ if (sotype == SOCK_STREAM)
+ ASSERT_EQ(echo_test_tcp(*fd_sv), 0, "echo_tcp");
+ else
+ ASSERT_EQ(echo_test_udp(*fd_sv), 0, "echo_udp");
+
+ ASSERT_EQ(skel->bss->sk_cookie_seen, cookie(*fd_sv),
+ "cookie_mismatch");
+ ASSERT_EQ(skel->bss->reuseport_executed, 1, "program executed once");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ ret = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ test_assign_reuse__destroy(skel);
+ free_fds(fd_sv, 1);
+}
+
+void test_assign_reuse(void)
+{
+ struct nstoken *tok = NULL;
+
+ SYS(out, "ip netns add %s", NS_TEST);
+ SYS(cleanup, "ip -net %s link set dev lo up", NS_TEST);
+
+ tok = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(tok, "netns token"))
+ return;
+
+ if (test__start_subtest("tcpv4"))
+ run_assign_reuse(AF_INET, SOCK_STREAM, "127.0.0.1", PORT);
+ if (test__start_subtest("tcpv6"))
+ run_assign_reuse(AF_INET6, SOCK_STREAM, "::1", PORT);
+ if (test__start_subtest("udpv4"))
+ run_assign_reuse(AF_INET, SOCK_DGRAM, "127.0.0.1", PORT);
+ if (test__start_subtest("udpv6"))
+ run_assign_reuse(AF_INET6, SOCK_DGRAM, "::1", PORT);
+
+cleanup:
+ close_netns(tok);
+ SYS_NOFAIL("ip netns delete %s", NS_TEST);
+out:
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
new file mode 100644
index 000000000000..118abc29b236
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/async_stack_depth.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "async_stack_depth.skel.h"
+
+void test_async_stack_depth(void)
+{
+ RUN_TESTS(async_stack_depth);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
new file mode 100644
index 000000000000..69bd7853e8f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomic_bounds.c
@@ -0,0 +1,17 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "atomic_bounds.skel.h"
+
+void test_atomic_bounds(void)
+{
+ struct atomic_bounds *skel;
+ __u32 duration = 0;
+
+ skel = atomic_bounds__open_and_load();
+ if (CHECK(!skel, "skel_load", "couldn't load program\n"))
+ return;
+
+ atomic_bounds__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/atomics.c b/tools/testing/selftests/bpf/prog_tests/atomics.c
new file mode 100644
index 000000000000..13e101f370a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/atomics.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "atomics.lskel.h"
+
+static void test_add(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.add.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->add64_value, 3, "add64_value");
+ ASSERT_EQ(skel->bss->add64_result, 1, "add64_result");
+
+ ASSERT_EQ(skel->data->add32_value, 3, "add32_value");
+ ASSERT_EQ(skel->bss->add32_result, 1, "add32_result");
+
+ ASSERT_EQ(skel->bss->add_stack_value_copy, 3, "add_stack_value");
+ ASSERT_EQ(skel->bss->add_stack_result, 1, "add_stack_result");
+
+ ASSERT_EQ(skel->data->add_noreturn_value, 3, "add_noreturn_value");
+}
+
+static void test_sub(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.sub.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->sub64_value, -1, "sub64_value");
+ ASSERT_EQ(skel->bss->sub64_result, 1, "sub64_result");
+
+ ASSERT_EQ(skel->data->sub32_value, -1, "sub32_value");
+ ASSERT_EQ(skel->bss->sub32_result, 1, "sub32_result");
+
+ ASSERT_EQ(skel->bss->sub_stack_value_copy, -1, "sub_stack_value");
+ ASSERT_EQ(skel->bss->sub_stack_result, 1, "sub_stack_result");
+
+ ASSERT_EQ(skel->data->sub_noreturn_value, -1, "sub_noreturn_value");
+}
+
+static void test_and(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.and.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->and64_value, 0x010ull << 32, "and64_value");
+ ASSERT_EQ(skel->bss->and64_result, 0x110ull << 32, "and64_result");
+
+ ASSERT_EQ(skel->data->and32_value, 0x010, "and32_value");
+ ASSERT_EQ(skel->bss->and32_result, 0x110, "and32_result");
+
+ ASSERT_EQ(skel->data->and_noreturn_value, 0x010ull << 32, "and_noreturn_value");
+}
+
+static void test_or(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.or.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->or64_value, 0x111ull << 32, "or64_value");
+ ASSERT_EQ(skel->bss->or64_result, 0x110ull << 32, "or64_result");
+
+ ASSERT_EQ(skel->data->or32_value, 0x111, "or32_value");
+ ASSERT_EQ(skel->bss->or32_result, 0x110, "or32_result");
+
+ ASSERT_EQ(skel->data->or_noreturn_value, 0x111ull << 32, "or_noreturn_value");
+}
+
+static void test_xor(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.xor.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->xor64_value, 0x101ull << 32, "xor64_value");
+ ASSERT_EQ(skel->bss->xor64_result, 0x110ull << 32, "xor64_result");
+
+ ASSERT_EQ(skel->data->xor32_value, 0x101, "xor32_value");
+ ASSERT_EQ(skel->bss->xor32_result, 0x110, "xor32_result");
+
+ ASSERT_EQ(skel->data->xor_noreturn_value, 0x101ull << 32, "xor_nxoreturn_value");
+}
+
+static void test_cmpxchg(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.cmpxchg.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->cmpxchg64_value, 2, "cmpxchg64_value");
+ ASSERT_EQ(skel->bss->cmpxchg64_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->bss->cmpxchg64_result_succeed, 1, "cmpxchg_result_succeed");
+
+ ASSERT_EQ(skel->data->cmpxchg32_value, 2, "lcmpxchg32_value");
+ ASSERT_EQ(skel->bss->cmpxchg32_result_fail, 1, "cmpxchg_result_fail");
+ ASSERT_EQ(skel->bss->cmpxchg32_result_succeed, 1, "cmpxchg_result_succeed");
+}
+
+static void test_xchg(struct atomics_lskel *skel)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* No need to attach it, just run it directly */
+ prog_fd = skel->progs.xchg.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err"))
+ return;
+ if (!ASSERT_OK(topts.retval, "test_run_opts retval"))
+ return;
+
+ ASSERT_EQ(skel->data->xchg64_value, 2, "xchg64_value");
+ ASSERT_EQ(skel->bss->xchg64_result, 1, "xchg64_result");
+
+ ASSERT_EQ(skel->data->xchg32_value, 2, "xchg32_value");
+ ASSERT_EQ(skel->bss->xchg32_result, 1, "xchg32_result");
+}
+
+void test_atomics(void)
+{
+ struct atomics_lskel *skel;
+
+ skel = atomics_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "atomics skeleton load"))
+ return;
+
+ if (skel->data->skip_tests) {
+ printf("%s:SKIP:no ENABLE_ATOMICS_TESTS (missing Clang BPF atomics support)",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("add"))
+ test_add(skel);
+ if (test__start_subtest("sub"))
+ test_sub(skel);
+ if (test__start_subtest("and"))
+ test_and(skel);
+ if (test__start_subtest("or"))
+ test_or(skel);
+ if (test__start_subtest("xor"))
+ test_xor(skel);
+ if (test__start_subtest("cmpxchg"))
+ test_cmpxchg(skel);
+ if (test__start_subtest("xchg"))
+ test_xchg(skel);
+
+cleanup:
+ atomics_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
index a0ee87c8e1ea..7175af39134f 100644
--- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c
+++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c
@@ -1,105 +1,327 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_attach_kprobe_sleepable.skel.h"
+#include "test_attach_probe_manual.skel.h"
#include "test_attach_probe.skel.h"
-ssize_t get_base_addr() {
- size_t start, offset;
- char buf[256];
- FILE *f;
-
- f = fopen("/proc/self/maps", "r");
- if (!f)
- return -errno;
-
- while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n",
- &start, buf, &offset) == 3) {
- if (strcmp(buf, "r-xp") == 0) {
- fclose(f);
- return start - offset;
- }
- }
-
- fclose(f);
- return -EINVAL;
+/* this is how USDT semaphore is actually defined, except volatile modifier */
+volatile unsigned short uprobe_ref_ctr __attribute__((unused)) __attribute((section(".probes")));
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
}
-void test_attach_probe(void)
+/* attach point for byname uprobe */
+static noinline void trigger_func2(void)
+{
+ asm volatile ("");
+}
+
+/* attach point for byname sleepable uprobe */
+static noinline void trigger_func3(void)
{
- int duration = 0;
+ asm volatile ("");
+}
+
+/* attach point for ref_ctr */
+static noinline void trigger_func4(void)
+{
+ asm volatile ("");
+}
+
+static char test_data[] = "test_data";
+
+/* manual attach kprobe/kretprobe/uprobe/uretprobe testings */
+static void test_attach_probe_manual(enum probe_attach_mode attach_mode)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, kprobe_opts);
struct bpf_link *kprobe_link, *kretprobe_link;
struct bpf_link *uprobe_link, *uretprobe_link;
- struct test_attach_probe* skel;
- size_t uprobe_offset;
- ssize_t base_addr;
+ struct test_attach_probe_manual *skel;
+ ssize_t uprobe_offset;
- base_addr = get_base_addr();
- if (CHECK(base_addr < 0, "get_base_addr",
- "failed to find base addr: %zd", base_addr))
+ skel = test_attach_probe_manual__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_manual_open_and_load"))
return;
- uprobe_offset = (size_t)&get_base_addr - base_addr;
- skel = test_attach_probe__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
- return;
- if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n"))
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
goto cleanup;
- kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe,
- false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link)))
+ /* manual-attach kprobe/kretprobe */
+ kprobe_opts.attach_mode = attach_mode;
+ kprobe_opts.retprobe = false;
+ kprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kprobe_link, "attach_kprobe"))
goto cleanup;
skel->links.handle_kprobe = kprobe_link;
- kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe,
- true /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe",
- "err %ld\n", PTR_ERR(kretprobe_link)))
+ kprobe_opts.retprobe = true;
+ kretprobe_link = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME,
+ &kprobe_opts);
+ if (!ASSERT_OK_PTR(kretprobe_link, "attach_kretprobe"))
goto cleanup;
skel->links.handle_kretprobe = kretprobe_link;
- uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
- false /* retprobe */,
- 0 /* self pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uprobe_link), "attach_uprobe",
- "err %ld\n", PTR_ERR(uprobe_link)))
+ /* manual-attach uprobe/uretprobe */
+ uprobe_opts.attach_mode = attach_mode;
+ uprobe_opts.ref_ctr_offset = 0;
+ uprobe_opts.retprobe = false;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
goto cleanup;
skel->links.handle_uprobe = uprobe_link;
- uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe,
- true /* retprobe */,
- -1 /* any pid */,
- "/proc/self/exe",
- uprobe_offset);
- if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe",
- "err %ld\n", PTR_ERR(uretprobe_link)))
+ uprobe_opts.retprobe = true;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe"))
goto cleanup;
skel->links.handle_uretprobe = uretprobe_link;
+ /* attach uprobe by function name manually */
+ uprobe_opts.func_name = "trigger_func2";
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = 0;
+ skel->links.handle_uprobe_byname =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname,
+ 0 /* this pid */,
+ "/proc/self/exe",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname, "attach_uprobe_byname"))
+ goto cleanup;
+
/* trigger & validate kprobe && kretprobe */
usleep(1);
- if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res",
- "wrong kprobe res: %d\n", skel->bss->kprobe_res))
+ /* trigger & validate uprobe & uretprobe */
+ trigger_func();
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe_res, 1, "check_kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 2, "check_kretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_res, 3, "check_uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 4, "check_uretprobe_res");
+ ASSERT_EQ(skel->bss->uprobe_byname_res, 5, "check_uprobe_byname_res");
+
+cleanup:
+ test_attach_probe_manual__destroy(skel);
+}
+
+static void test_attach_probe_auto(struct test_attach_probe *skel)
+{
+ struct bpf_link *uprobe_err_link;
+
+ /* auto-attachable kprobe and kretprobe */
+ skel->links.handle_kprobe_auto = bpf_program__attach(skel->progs.handle_kprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kprobe_auto, "attach_kprobe_auto");
+
+ skel->links.handle_kretprobe_auto = bpf_program__attach(skel->progs.handle_kretprobe_auto);
+ ASSERT_OK_PTR(skel->links.handle_kretprobe_auto, "attach_kretprobe_auto");
+
+ /* verify auto-attach fails for old-style uprobe definition */
+ uprobe_err_link = bpf_program__attach(skel->progs.handle_uprobe_byname);
+ if (!ASSERT_EQ(libbpf_get_error(uprobe_err_link), -EOPNOTSUPP,
+ "auto-attach should fail for old-style name"))
+ return;
+
+ /* verify auto-attach works */
+ skel->links.handle_uretprobe_byname =
+ bpf_program__attach(skel->progs.handle_uretprobe_byname);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname, "attach_uretprobe_byname"))
+ return;
+
+ /* trigger & validate kprobe && kretprobe */
+ usleep(1);
+
+ /* trigger & validate uprobe attached by name */
+ trigger_func2();
+
+ ASSERT_EQ(skel->bss->kprobe2_res, 11, "check_kprobe_auto_res");
+ ASSERT_EQ(skel->bss->kretprobe2_res, 22, "check_kretprobe_auto_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname_res, 6, "check_uretprobe_byname_res");
+}
+
+static void test_uprobe_lib(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ FILE *devnull;
+
+ /* test attach by name for a library function, using the library
+ * as the binary argument. libc.so.6 will be resolved via dlopen()/dlinfo().
+ */
+ uprobe_opts.func_name = "fopen";
+ uprobe_opts.retprobe = false;
+ skel->links.handle_uprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_byname2,
+ 0 /* this pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname2, "attach_uprobe_byname2"))
+ return;
+
+ uprobe_opts.func_name = "fclose";
+ uprobe_opts.retprobe = true;
+ skel->links.handle_uretprobe_byname2 =
+ bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_byname2,
+ -1 /* any pid */,
+ "libc.so.6",
+ 0, &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname2, "attach_uretprobe_byname2"))
+ return;
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen("/dev/null", "r");
+ fclose(devnull);
+
+ ASSERT_EQ(skel->bss->uprobe_byname2_res, 7, "check_uprobe_byname2_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_res, 8, "check_uretprobe_byname2_res");
+}
+
+static void test_uprobe_ref_ctr(struct test_attach_probe *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct bpf_link *uprobe_link, *uretprobe_link;
+ ssize_t uprobe_offset, ref_ctr_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func4);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset_ref_ctr"))
+ return;
+
+ ref_ctr_offset = get_rel_offset((uintptr_t)&uprobe_ref_ctr);
+ if (!ASSERT_GE(ref_ctr_offset, 0, "ref_ctr_offset"))
+ return;
+
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_before");
+
+ uprobe_opts.retprobe = false;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe_ref_ctr,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe_ref_ctr"))
+ return;
+ skel->links.handle_uprobe_ref_ctr = uprobe_link;
+
+ ASSERT_GT(uprobe_ref_ctr, 0, "uprobe_ref_ctr_after");
+
+ /* if uprobe uses ref_ctr, uretprobe has to use ref_ctr as well */
+ uprobe_opts.retprobe = true;
+ uprobe_opts.ref_ctr_offset = ref_ctr_offset;
+ uretprobe_link = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe_ref_ctr,
+ -1 /* any pid */,
+ "/proc/self/exe",
+ uprobe_offset, &uprobe_opts);
+ if (!ASSERT_OK_PTR(uretprobe_link, "attach_uretprobe_ref_ctr"))
+ return;
+ skel->links.handle_uretprobe_ref_ctr = uretprobe_link;
+}
+
+static void test_kprobe_sleepable(void)
+{
+ struct test_attach_kprobe_sleepable *skel;
+
+ skel = test_attach_kprobe_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_kprobe_sleepable_open"))
+ return;
+
+ /* sleepable kprobe test case needs flags set before loading */
+ if (!ASSERT_OK(bpf_program__set_flags(skel->progs.handle_kprobe_sleepable,
+ BPF_F_SLEEPABLE), "kprobe_sleepable_flags"))
goto cleanup;
- if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res",
- "wrong kretprobe res: %d\n", skel->bss->kretprobe_res))
+
+ if (!ASSERT_OK(test_attach_kprobe_sleepable__load(skel),
+ "skel_kprobe_sleepable_load"))
goto cleanup;
- /* trigger & validate uprobe & uretprobe */
- get_base_addr();
+ /* sleepable kprobes should not attach successfully */
+ skel->links.handle_kprobe_sleepable = bpf_program__attach(skel->progs.handle_kprobe_sleepable);
+ ASSERT_ERR_PTR(skel->links.handle_kprobe_sleepable, "attach_kprobe_sleepable");
+
+cleanup:
+ test_attach_kprobe_sleepable__destroy(skel);
+}
+
+static void test_uprobe_sleepable(struct test_attach_probe *skel)
+{
+ /* test sleepable uprobe and uretprobe variants */
+ skel->links.handle_uprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3_sleepable, "attach_uprobe_byname3_sleepable"))
+ return;
+
+ skel->links.handle_uprobe_byname3 = bpf_program__attach(skel->progs.handle_uprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uprobe_byname3, "attach_uprobe_byname3"))
+ return;
+
+ skel->links.handle_uretprobe_byname3_sleepable = bpf_program__attach(skel->progs.handle_uretprobe_byname3_sleepable);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3_sleepable, "attach_uretprobe_byname3_sleepable"))
+ return;
+
+ skel->links.handle_uretprobe_byname3 = bpf_program__attach(skel->progs.handle_uretprobe_byname3);
+ if (!ASSERT_OK_PTR(skel->links.handle_uretprobe_byname3, "attach_uretprobe_byname3"))
+ return;
- if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res",
- "wrong uprobe res: %d\n", skel->bss->uprobe_res))
+ skel->bss->user_ptr = test_data;
+
+ /* trigger & validate sleepable uprobe attached by name */
+ trigger_func3();
+
+ ASSERT_EQ(skel->bss->uprobe_byname3_sleepable_res, 9, "check_uprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uprobe_byname3_res, 10, "check_uprobe_byname3_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_sleepable_res, 11, "check_uretprobe_byname3_sleepable_res");
+ ASSERT_EQ(skel->bss->uretprobe_byname3_res, 12, "check_uretprobe_byname3_res");
+}
+
+void test_attach_probe(void)
+{
+ struct test_attach_probe *skel;
+
+ skel = test_attach_probe__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_attach_probe__load(skel), "skel_load"))
goto cleanup;
- if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res",
- "wrong uretprobe res: %d\n", skel->bss->uretprobe_res))
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
goto cleanup;
+ if (test__start_subtest("manual-default"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_DEFAULT);
+ if (test__start_subtest("manual-legacy"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LEGACY);
+ if (test__start_subtest("manual-perf"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_PERF);
+ if (test__start_subtest("manual-link"))
+ test_attach_probe_manual(PROBE_ATTACH_MODE_LINK);
+
+ if (test__start_subtest("auto"))
+ test_attach_probe_auto(skel);
+ if (test__start_subtest("kprobe-sleepable"))
+ test_kprobe_sleepable();
+ if (test__start_subtest("uprobe-lib"))
+ test_uprobe_lib(skel);
+ if (test__start_subtest("uprobe-sleepable"))
+ test_uprobe_sleepable(skel);
+ if (test__start_subtest("uprobe-ref_ctr"))
+ test_uprobe_ref_ctr(skel);
+
cleanup:
test_attach_probe__destroy(skel);
+ ASSERT_EQ(uprobe_ref_ctr, 0, "uprobe_ref_ctr_cleanup");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/autoattach.c b/tools/testing/selftests/bpf/prog_tests/autoattach.c
new file mode 100644
index 000000000000..dc5e01d279bd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoattach.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include "test_autoattach.skel.h"
+
+void test_autoattach(void)
+{
+ struct test_autoattach *skel;
+
+ skel = test_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto cleanup;
+
+ /* disable auto-attach for prog2 */
+ bpf_program__set_autoattach(skel->progs.prog2, false);
+ ASSERT_TRUE(bpf_program__autoattach(skel->progs.prog1), "autoattach_prog1");
+ ASSERT_FALSE(bpf_program__autoattach(skel->progs.prog2), "autoattach_prog2");
+ if (!ASSERT_OK(test_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_TRUE(skel->bss->prog1_called, "attached_prog1");
+ ASSERT_FALSE(skel->bss->prog2_called, "attached_prog2");
+
+cleanup:
+ test_autoattach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/autoload.c b/tools/testing/selftests/bpf/prog_tests/autoload.c
new file mode 100644
index 000000000000..3693f7d133eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/autoload.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_autoload.skel.h"
+
+void test_autoload(void)
+{
+ int duration = 0, err;
+ struct test_autoload* skel;
+
+ skel = test_autoload__open_and_load();
+ /* prog3 should be broken */
+ if (CHECK(skel, "skel_open_and_load", "unexpected success\n"))
+ goto cleanup;
+
+ skel = test_autoload__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ goto cleanup;
+
+ /* don't load prog3 */
+ bpf_program__set_autoload(skel->progs.prog3, false);
+
+ err = test_autoload__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ err = test_autoload__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(!skel->bss->prog1_called, "prog1", "not called\n");
+ CHECK(!skel->bss->prog2_called, "prog2", "not called\n");
+ CHECK(skel->bss->prog3_called, "prog3", "called?!\n");
+
+cleanup:
+ test_autoload__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
new file mode 100644
index 000000000000..6a707213e46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bad_struct_ops.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "bad_struct_ops.skel.h"
+#include "bad_struct_ops2.skel.h"
+
+static void invalid_prog_reuse(void)
+{
+ struct bad_struct_ops *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops__load(skel);
+ log = stop_libbpf_log_capture();
+ ASSERT_ERR(err, "bad_struct_ops__load should fail");
+ ASSERT_HAS_SUBSTR(log,
+ "struct_ops init_kern testmod_2 func ptr test_1: invalid reuse of prog test_1",
+ "expected init_kern message");
+
+cleanup:
+ free(log);
+ bad_struct_ops__destroy(skel);
+}
+
+static void unused_program(void)
+{
+ struct bad_struct_ops2 *skel;
+ char *log = NULL;
+ int err;
+
+ skel = bad_struct_ops2__open();
+ if (!ASSERT_OK_PTR(skel, "bad_struct_ops2__open"))
+ return;
+
+ /* struct_ops programs not referenced from any maps are open
+ * with autoload set to true.
+ */
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo autoload == true");
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+
+ err = bad_struct_ops2__load(skel);
+ ASSERT_ERR(err, "bad_struct_ops2__load should fail");
+ log = stop_libbpf_log_capture();
+ ASSERT_HAS_SUBSTR(log, "prog 'foo': failed to load",
+ "message about 'foo' failing to load");
+
+cleanup:
+ free(log);
+ bad_struct_ops2__destroy(skel);
+}
+
+void test_bad_struct_ops(void)
+{
+ if (test__start_subtest("invalid_prog_reuse"))
+ invalid_prog_reuse();
+ if (test__start_subtest("unused_program"))
+ unused_program();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bind_perm.c b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
new file mode 100644
index 000000000000..f7cd129cb82b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bind_perm.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "test_progs.h"
+#include "cap_helpers.h"
+#include "bind_perm.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ return 0;
+}
+
+void try_bind(int family, int port, int expected_errno)
+{
+ struct sockaddr_storage addr = {};
+ struct sockaddr_in6 *sin6;
+ struct sockaddr_in *sin;
+ int fd = -1;
+
+ fd = socket(family, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto close_socket;
+
+ if (family == AF_INET) {
+ sin = (struct sockaddr_in *)&addr;
+ sin->sin_family = family;
+ sin->sin_port = htons(port);
+ } else {
+ sin6 = (struct sockaddr_in6 *)&addr;
+ sin6->sin6_family = family;
+ sin6->sin6_port = htons(port);
+ }
+
+ errno = 0;
+ bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+ ASSERT_EQ(errno, expected_errno, "bind");
+
+close_socket:
+ if (fd >= 0)
+ close(fd);
+}
+
+void test_bind_perm(void)
+{
+ const __u64 net_bind_svc_cap = 1ULL << CAP_NET_BIND_SERVICE;
+ struct bind_perm *skel;
+ __u64 old_caps = 0;
+ int cgroup_fd;
+
+ if (create_netns())
+ return;
+
+ cgroup_fd = test__join_cgroup("/bind_perm");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ skel = bind_perm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto close_cgroup_fd;
+
+ skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel, "bind_v4_prog"))
+ goto close_skeleton;
+
+ skel->links.bind_v6_prog = bpf_program__attach_cgroup(skel->progs.bind_v6_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel, "bind_v6_prog"))
+ goto close_skeleton;
+
+ ASSERT_OK(cap_disable_effective(net_bind_svc_cap, &old_caps),
+ "cap_disable_effective");
+
+ try_bind(AF_INET, 110, EACCES);
+ try_bind(AF_INET6, 110, EACCES);
+
+ try_bind(AF_INET, 111, 0);
+ try_bind(AF_INET6, 111, 0);
+
+ if (old_caps & net_bind_svc_cap)
+ ASSERT_OK(cap_enable_effective(net_bind_svc_cap, NULL),
+ "cap_enable_effective");
+
+close_skeleton:
+ bind_perm__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
new file mode 100644
index 000000000000..053f4d6da77a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bloom_filter_map.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include "bloom_filter_map.skel.h"
+
+static void test_fail_cases(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ __u32 value;
+ int fd, err;
+
+ /* Invalid key size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 4, sizeof(value), 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid key size"))
+ close(fd);
+
+ /* Invalid value size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, 0, 100, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid value size 0"))
+ close(fd);
+
+ /* Invalid max entries size */
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 0, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid max entries size"))
+ close(fd);
+
+ /* Bloom filter maps do not support BPF_F_NO_PREALLOC */
+ opts.map_flags = BPF_F_NO_PREALLOC;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_LT(fd, 0, "bpf_map_create bloom filter invalid flags"))
+ close(fd);
+
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, NULL);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter"))
+ return;
+
+ /* Test invalid flags */
+ err = bpf_map_update_elem(fd, NULL, &value, -1);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_EXIST);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_F_LOCK);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, BPF_NOEXIST);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ err = bpf_map_update_elem(fd, NULL, &value, 10000);
+ ASSERT_EQ(err, -EINVAL, "bpf_map_update_elem bloom filter invalid flags");
+
+ close(fd);
+}
+
+static void test_success_cases(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
+ char value[11];
+ int fd, err;
+
+ /* Create a map */
+ opts.map_flags = BPF_F_ZERO_SEED | BPF_F_NUMA_NODE;
+ fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(value), 100, &opts);
+ if (!ASSERT_GE(fd, 0, "bpf_map_create bloom filter success case"))
+ return;
+
+ /* Add a value to the bloom filter */
+ err = bpf_map_update_elem(fd, NULL, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem bloom filter success case"))
+ goto done;
+
+ /* Lookup a value in the bloom filter */
+ err = bpf_map_lookup_elem(fd, NULL, &value);
+ ASSERT_OK(err, "bpf_map_update_elem bloom filter success case");
+
+done:
+ close(fd);
+}
+
+static void check_bloom(struct bloom_filter_map *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.check_bloom);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->error, 0, "error");
+
+ bpf_link__destroy(link);
+}
+
+static void test_inner_map(struct bloom_filter_map *skel, const __u32 *rand_vals,
+ __u32 nr_rand_vals)
+{
+ int outer_map_fd, inner_map_fd, err, i, key = 0;
+ struct bpf_link *link;
+
+ /* Create a bloom filter map that will be used as the inner map */
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_BLOOM_FILTER, NULL, 0, sizeof(*rand_vals),
+ nr_rand_vals, NULL);
+ if (!ASSERT_GE(inner_map_fd, 0, "bpf_map_create bloom filter inner map"))
+ return;
+
+ for (i = 0; i < nr_rand_vals; i++) {
+ err = bpf_map_update_elem(inner_map_fd, NULL, rand_vals + i, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to inner_map_fd"))
+ goto done;
+ }
+
+ /* Add the bloom filter map to the outer map */
+ outer_map_fd = bpf_map__fd(skel->maps.outer_map);
+ err = bpf_map_update_elem(outer_map_fd, &key, &inner_map_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "Add bloom filter map to outer map"))
+ goto done;
+
+ /* Attach the bloom_filter_inner_map prog */
+ link = bpf_program__attach(skel->progs.inner_map);
+ if (!ASSERT_OK_PTR(link, "link"))
+ goto delete_inner_map;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->error, 0, "error");
+
+ bpf_link__destroy(link);
+
+delete_inner_map:
+ /* Ensure the inner bloom filter map can be deleted */
+ err = bpf_map_delete_elem(outer_map_fd, &key);
+ ASSERT_OK(err, "Delete inner bloom filter map");
+
+done:
+ close(inner_map_fd);
+}
+
+static int setup_progs(struct bloom_filter_map **out_skel, __u32 **out_rand_vals,
+ __u32 *out_nr_rand_vals)
+{
+ struct bloom_filter_map *skel;
+ int random_data_fd, bloom_fd;
+ __u32 *rand_vals = NULL;
+ __u32 map_size, val;
+ int err, i;
+
+ /* Set up a bloom filter map skeleton */
+ skel = bloom_filter_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bloom_filter_map__open_and_load"))
+ return -EINVAL;
+
+ /* Set up rand_vals */
+ map_size = bpf_map__max_entries(skel->maps.map_random_data);
+ rand_vals = malloc(sizeof(*rand_vals) * map_size);
+ if (!rand_vals) {
+ err = -ENOMEM;
+ goto error;
+ }
+
+ /* Generate random values and populate both skeletons */
+ random_data_fd = bpf_map__fd(skel->maps.map_random_data);
+ bloom_fd = bpf_map__fd(skel->maps.map_bloom);
+ for (i = 0; i < map_size; i++) {
+ val = rand();
+
+ err = bpf_map_update_elem(random_data_fd, &i, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to map_random_data"))
+ goto error;
+
+ err = bpf_map_update_elem(bloom_fd, NULL, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "Add random value to map_bloom"))
+ goto error;
+
+ rand_vals[i] = val;
+ }
+
+ *out_skel = skel;
+ *out_rand_vals = rand_vals;
+ *out_nr_rand_vals = map_size;
+
+ return 0;
+
+error:
+ bloom_filter_map__destroy(skel);
+ if (rand_vals)
+ free(rand_vals);
+ return err;
+}
+
+void test_bloom_filter_map(void)
+{
+ __u32 *rand_vals = NULL, nr_rand_vals = 0;
+ struct bloom_filter_map *skel = NULL;
+ int err;
+
+ test_fail_cases();
+ test_success_cases();
+
+ err = setup_progs(&skel, &rand_vals, &nr_rand_vals);
+ if (err)
+ return;
+
+ test_inner_map(skel, rand_vals, nr_rand_vals);
+ free(rand_vals);
+
+ check_bloom(skel);
+
+ bloom_filter_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
new file mode 100644
index 000000000000..1454cebc262b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_cookie.c
@@ -0,0 +1,606 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <bpf/btf.h>
+#include "test_bpf_cookie.skel.h"
+#include "kprobe_multi.skel.h"
+#include "uprobe_multi.skel.h"
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
+}
+
+static void kprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+
+ /* attach two kprobes */
+ opts.bpf_cookie = 0x1;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two kretprobes */
+ opts.bpf_cookie = 0x10;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x20;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_kprobe_opts(skel->progs.handle_kretprobe,
+ SYS_NANOSLEEP_KPROBE_NAME, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger kprobe && kretprobe */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->kprobe_res, 0x1 | 0x2, "kprobe_res");
+ ASSERT_EQ(skel->bss->kretprobe_res, 0x10 | 0x20, "kretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void kprobe_multi_test_run(struct kprobe_multi *skel)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+ ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+ ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+ ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+ ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+ ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+ ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+ ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+ ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+ ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+ ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+ ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+}
+
+static void kprobe_multi_link_api_subtest(void)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1;
+ struct kprobe_multi *skel = NULL;
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ unsigned long long addrs[8];
+ __u64 cookies[8];
+
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ goto cleanup;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+#define GET_ADDR(__sym, __addr) ({ \
+ __addr = ksym_get_addr(__sym); \
+ if (!ASSERT_NEQ(__addr, 0, "ksym_get_addr " #__sym)) \
+ goto cleanup; \
+})
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test3", addrs[1]);
+ GET_ADDR("bpf_fentry_test4", addrs[2]);
+ GET_ADDR("bpf_fentry_test5", addrs[3]);
+ GET_ADDR("bpf_fentry_test6", addrs[4]);
+ GET_ADDR("bpf_fentry_test7", addrs[5]);
+ GET_ADDR("bpf_fentry_test2", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+#undef GET_ADDR
+
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
+
+ opts.kprobe_multi.addrs = (const unsigned long *) &addrs;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+ opts.kprobe_multi.cookies = (const __u64 *) &cookies;
+ prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+ goto cleanup;
+
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
+
+ opts.kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel);
+
+cleanup:
+ close(link1_fd);
+ close(link2_fd);
+ kprobe_multi__destroy(skel);
+}
+
+static void kprobe_multi_attach_api_subtest(void)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct kprobe_multi *skel = NULL;
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test2",
+ "bpf_fentry_test8",
+ };
+ __u64 cookies[8];
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+ cookies[0] = 1; /* bpf_fentry_test1 */
+ cookies[1] = 2; /* bpf_fentry_test3 */
+ cookies[2] = 3; /* bpf_fentry_test4 */
+ cookies[3] = 4; /* bpf_fentry_test5 */
+ cookies[4] = 5; /* bpf_fentry_test6 */
+ cookies[5] = 6; /* bpf_fentry_test7 */
+ cookies[6] = 7; /* bpf_fentry_test2 */
+ cookies[7] = 8; /* bpf_fentry_test8 */
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = cookies;
+
+ link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ cookies[0] = 8; /* bpf_fentry_test1 */
+ cookies[1] = 7; /* bpf_fentry_test3 */
+ cookies[2] = 6; /* bpf_fentry_test4 */
+ cookies[3] = 5; /* bpf_fentry_test5 */
+ cookies[4] = 4; /* bpf_fentry_test6 */
+ cookies[5] = 3; /* bpf_fentry_test7 */
+ cookies[6] = 2; /* bpf_fentry_test2 */
+ cookies[7] = 1; /* bpf_fentry_test8 */
+
+ opts.retprobe = true;
+
+ link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe,
+ NULL, &opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ kprobe_multi__destroy(skel);
+}
+
+/* defined in prog_tests/uprobe_multi_test.c */
+void uprobe_multi_func_1(void);
+void uprobe_multi_func_2(void);
+void uprobe_multi_func_3(void);
+
+static void uprobe_multi_test_run(struct uprobe_multi *skel)
+{
+ skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
+ skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2;
+ skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3;
+
+ skel->bss->pid = getpid();
+ skel->bss->test_cookie = true;
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 1, "uprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 1, "uprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 1, "uprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 1, "uretprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 1, "uretprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 1, "uretprobe_multi_func_3_result");
+}
+
+static void uprobe_multi_attach_api_subtest(void)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct uprobe_multi *skel = NULL;
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+ __u64 cookies[3];
+
+ cookies[0] = 3; /* uprobe_multi_func_1 */
+ cookies[1] = 1; /* uprobe_multi_func_2 */
+ cookies[2] = 2; /* uprobe_multi_func_3 */
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = &cookies[0];
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi"))
+ goto cleanup;
+
+ link1 = bpf_program__attach_uprobe_multi(skel->progs.uprobe, -1,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ cookies[0] = 2; /* uprobe_multi_func_1 */
+ cookies[1] = 3; /* uprobe_multi_func_2 */
+ cookies[2] = 1; /* uprobe_multi_func_3 */
+
+ opts.retprobe = true;
+ link2 = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, -1,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_uprobe_multi_retprobe"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ uprobe_multi__destroy(skel);
+}
+
+static void uprobe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct bpf_link *retlink1 = NULL, *retlink2 = NULL;
+ ssize_t uprobe_offset;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ goto cleanup;
+
+ /* attach two uprobes */
+ opts.bpf_cookie = 0x100;
+ opts.retprobe = false;
+ link1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x200;
+ opts.retprobe = false;
+ link2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* attach two uretprobes */
+ opts.bpf_cookie = 0x1000;
+ opts.retprobe = true;
+ retlink1 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, -1 /* any pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink1, "retlink1"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x2000;
+ opts.retprobe = true;
+ retlink2 = bpf_program__attach_uprobe_opts(skel->progs.handle_uretprobe, 0 /* self pid */,
+ "/proc/self/exe", uprobe_offset, &opts);
+ if (!ASSERT_OK_PTR(retlink2, "retlink2"))
+ goto cleanup;
+
+ /* trigger uprobe && uretprobe */
+ trigger_func();
+
+ ASSERT_EQ(skel->bss->uprobe_res, 0x100 | 0x200, "uprobe_res");
+ ASSERT_EQ(skel->bss->uretprobe_res, 0x1000 | 0x2000, "uretprobe_res");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(retlink1);
+ bpf_link__destroy(retlink2);
+}
+
+static void tp_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts);
+ struct bpf_link *link1 = NULL, *link2 = NULL, *link3 = NULL;
+
+ /* attach first tp prog */
+ opts.bpf_cookie = 0x10000;
+ link1 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp1,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link1, "link1"))
+ goto cleanup;
+
+ /* attach second tp prog */
+ opts.bpf_cookie = 0x20000;
+ link2 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp2,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link2, "link2"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x10000 | 0x20000, "tp_res1");
+
+ /* now we detach first prog and will attach third one, which causes
+ * two internal calls to bpf_prog_array_copy(), shuffling
+ * bpf_prog_array_items around. We test here that we don't lose track
+ * of associated bpf_cookies.
+ */
+ bpf_link__destroy(link1);
+ link1 = NULL;
+ kern_sync_rcu();
+ skel->bss->tp_res = 0;
+
+ /* attach third tp prog */
+ opts.bpf_cookie = 0x40000;
+ link3 = bpf_program__attach_tracepoint_opts(skel->progs.handle_tp3,
+ "syscalls", "sys_enter_nanosleep", &opts);
+ if (!ASSERT_OK_PTR(link3, "link3"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->tp_res, 0x20000 | 0x40000, "tp_res2");
+
+cleanup:
+ bpf_link__destroy(link1);
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link3);
+}
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+static void pe_subtest(struct test_bpf_cookie *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts);
+ struct bpf_link *link = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1;
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ opts.bpf_cookie = 0x100000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link1"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x100000, "pe_res1");
+
+ /* prevent bpf_link__destroy() closing pfd itself */
+ bpf_link__disconnect(link);
+ /* close BPF link's FD explicitly */
+ close(bpf_link__fd(link));
+ /* free up memory used by struct bpf_link */
+ bpf_link__destroy(link);
+ link = NULL;
+ kern_sync_rcu();
+ skel->bss->pe_res = 0;
+
+ opts.bpf_cookie = 0x200000;
+ link = bpf_program__attach_perf_event_opts(skel->progs.handle_pe, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "link2"))
+ goto cleanup;
+
+ burn_cpu(); /* trigger BPF prog */
+
+ ASSERT_EQ(skel->bss->pe_res, 0x200000, "pe_res2");
+
+cleanup:
+ close(pfd);
+ bpf_link__destroy(link);
+}
+
+static void tracing_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd;
+ int fentry_fd = -1, fexit_fd = -1, fmod_ret_fd = -1;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+
+ skel->bss->fentry_res = 0;
+ skel->bss->fexit_res = 0;
+
+ cookie = 0x10000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+ link_opts.tracing.cookie = cookie;
+ fentry_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FENTRY, &link_opts);
+ if (!ASSERT_GE(fentry_fd, 0, "fentry.link_create"))
+ goto cleanup;
+
+ cookie = 0x20000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fexit_test1);
+ link_opts.tracing.cookie = cookie;
+ fexit_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_FEXIT, &link_opts);
+ if (!ASSERT_GE(fexit_fd, 0, "fexit.link_create"))
+ goto cleanup;
+
+ cookie = 0x30000000000000L;
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ link_opts.tracing.cookie = cookie;
+ fmod_ret_fd = bpf_link_create(prog_fd, 0, BPF_MODIFY_RETURN, &link_opts);
+ if (!ASSERT_GE(fmod_ret_fd, 0, "fmod_ret.link_create"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.fentry_test1);
+ bpf_prog_test_run_opts(prog_fd, &opts);
+
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ bpf_prog_test_run_opts(prog_fd, &opts);
+
+ ASSERT_EQ(skel->bss->fentry_res, 0x10000000000000L, "fentry_res");
+ ASSERT_EQ(skel->bss->fexit_res, 0x20000000000000L, "fexit_res");
+ ASSERT_EQ(skel->bss->fmod_ret_res, 0x30000000000000L, "fmod_ret_res");
+
+cleanup:
+ if (fentry_fd >= 0)
+ close(fentry_fd);
+ if (fexit_fd >= 0)
+ close(fexit_fd);
+ if (fmod_ret_fd >= 0)
+ close(fmod_ret_fd);
+}
+
+int stack_mprotect(void);
+
+static void lsm_subtest(struct test_bpf_cookie *skel)
+{
+ __u64 cookie;
+ int prog_fd;
+ int lsm_fd = -1;
+ LIBBPF_OPTS(bpf_link_create_opts, link_opts);
+ int err;
+
+ skel->bss->lsm_res = 0;
+
+ cookie = 0x90000000000090L;
+ prog_fd = bpf_program__fd(skel->progs.test_int_hook);
+ link_opts.tracing.cookie = cookie;
+ lsm_fd = bpf_link_create(prog_fd, 0, BPF_LSM_MAC, &link_opts);
+ if (!ASSERT_GE(lsm_fd, 0, "lsm.link_create"))
+ goto cleanup;
+
+ err = stack_mprotect();
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->lsm_res, 0x90000000000090L, "fentry_res");
+
+cleanup:
+ if (lsm_fd >= 0)
+ close(lsm_fd);
+}
+
+void test_bpf_cookie(void)
+{
+ struct test_bpf_cookie *skel;
+
+ skel = test_bpf_cookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->my_tid = syscall(SYS_gettid);
+
+ if (test__start_subtest("kprobe"))
+ kprobe_subtest(skel);
+ if (test__start_subtest("multi_kprobe_link_api"))
+ kprobe_multi_link_api_subtest();
+ if (test__start_subtest("multi_kprobe_attach_api"))
+ kprobe_multi_attach_api_subtest();
+ if (test__start_subtest("uprobe"))
+ uprobe_subtest(skel);
+ if (test__start_subtest("multi_uprobe_attach_api"))
+ uprobe_multi_attach_api_subtest();
+ if (test__start_subtest("tracepoint"))
+ tp_subtest(skel);
+ if (test__start_subtest("perf_event"))
+ pe_subtest(skel);
+ if (test__start_subtest("trampoline"))
+ tracing_subtest(skel);
+ if (test__start_subtest("lsm"))
+ lsm_subtest(skel);
+
+ test_bpf_cookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
index 87c29dde1cf9..618af9dfae9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -1,48 +1,68 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <task_local_storage_helpers.h>
#include "bpf_iter_ipv6_route.skel.h"
#include "bpf_iter_netlink.skel.h"
#include "bpf_iter_bpf_map.skel.h"
-#include "bpf_iter_task.skel.h"
+#include "bpf_iter_tasks.skel.h"
+#include "bpf_iter_task_stack.skel.h"
#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_task_vmas.skel.h"
+#include "bpf_iter_task_btf.skel.h"
+#include "bpf_iter_tcp4.skel.h"
+#include "bpf_iter_tcp6.skel.h"
+#include "bpf_iter_udp4.skel.h"
+#include "bpf_iter_udp6.skel.h"
+#include "bpf_iter_unix.skel.h"
+#include "bpf_iter_vma_offset.skel.h"
#include "bpf_iter_test_kern1.skel.h"
#include "bpf_iter_test_kern2.skel.h"
#include "bpf_iter_test_kern3.skel.h"
#include "bpf_iter_test_kern4.skel.h"
-
-static int duration;
+#include "bpf_iter_bpf_hash_map.skel.h"
+#include "bpf_iter_bpf_percpu_hash_map.skel.h"
+#include "bpf_iter_bpf_array_map.skel.h"
+#include "bpf_iter_bpf_percpu_array_map.skel.h"
+#include "bpf_iter_bpf_sk_storage_helpers.skel.h"
+#include "bpf_iter_bpf_sk_storage_map.skel.h"
+#include "bpf_iter_test_kern5.skel.h"
+#include "bpf_iter_test_kern6.skel.h"
+#include "bpf_iter_bpf_link.skel.h"
+#include "bpf_iter_ksym.skel.h"
+#include "bpf_iter_sockmap.skel.h"
static void test_btf_id_or_null(void)
{
struct bpf_iter_test_kern3 *skel;
skel = bpf_iter_test_kern3__open_and_load();
- if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
- "skeleton open_and_load unexpectedly succeeded\n")) {
+ if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern3__open_and_load")) {
bpf_iter_test_kern3__destroy(skel);
return;
}
}
-static void do_dummy_read(struct bpf_program *prog)
+static void do_dummy_read_opts(struct bpf_program *prog, struct bpf_iter_attach_opts *opts)
{
struct bpf_link *link;
char buf[16] = {};
int iter_fd, len;
- link = bpf_program__attach_iter(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ link = bpf_program__attach_iter(prog, opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
return;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
/* not check contents, but ensure read() ends without error */
while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
;
- CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+ ASSERT_GE(len, 0, "read");
close(iter_fd);
@@ -50,13 +70,77 @@ free_link:
bpf_link__destroy(link);
}
+static void do_dummy_read(struct bpf_program *prog)
+{
+ do_dummy_read_opts(prog, NULL);
+}
+
+static void do_read_map_iter_fd(struct bpf_object_skeleton **skel, struct bpf_program *prog,
+ struct bpf_map *map)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+ char buf[16] = {};
+ int iter_fd, len;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(map);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_map_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_map_iter")) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ /* Close link and map fd prematurely */
+ bpf_link__destroy(link);
+ bpf_object__destroy_skeleton(*skel);
+ *skel = NULL;
+
+ /* Try to let map free work to run first if map is freed */
+ usleep(100);
+ /* Memory used by both sock map and sock local storage map are
+ * freed after two synchronize_rcu() calls, so wait for it
+ */
+ kern_sync_rcu();
+ kern_sync_rcu();
+
+ /* Read after both map fd and link fd are closed */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ ASSERT_GE(len, 0, "read_iterator");
+
+ close(iter_fd);
+}
+
+static int read_fd_into_buffer(int fd, char *buf, int size)
+{
+ int bufleft = size;
+ int len;
+
+ do {
+ len = read(fd, buf, bufleft);
+ if (len > 0) {
+ buf += len;
+ bufleft -= len;
+ }
+ } while (len > 0);
+
+ return len < 0 ? len : size - bufleft;
+}
+
static void test_ipv6_route(void)
{
struct bpf_iter_ipv6_route *skel;
skel = bpf_iter_ipv6_route__open_and_load();
- if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_ipv6_route__open_and_load"))
return;
do_dummy_read(skel->progs.dump_ipv6_route);
@@ -69,8 +153,7 @@ static void test_netlink(void)
struct bpf_iter_netlink *skel;
skel = bpf_iter_netlink__open_and_load();
- if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_netlink__open_and_load"))
return;
do_dummy_read(skel->progs.dump_netlink);
@@ -83,8 +166,7 @@ static void test_bpf_map(void)
struct bpf_iter_bpf_map *skel;
skel = bpf_iter_bpf_map__open_and_load();
- if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_map__open_and_load"))
return;
do_dummy_read(skel->progs.dump_bpf_map);
@@ -92,55 +174,366 @@ static void test_bpf_map(void)
bpf_iter_bpf_map__destroy(skel);
}
-static void test_task(void)
+static void check_bpf_link_info(const struct bpf_program *prog)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link_info info = {};
+ struct bpf_link *link;
+ __u32 info_len;
+ int err;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ info_len = sizeof(info);
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link), &info, &info_len);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info.iter.task.tid, getpid(), "check_task_tid");
+
+ bpf_link__destroy(link);
+}
+
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+ pthread_mutex_lock(&do_nothing_mutex);
+ pthread_mutex_unlock(&do_nothing_mutex);
+
+ pthread_exit(arg);
+}
+
+static void test_task_common_nocheck(struct bpf_iter_attach_opts *opts,
+ int *num_unknown, int *num_known)
+{
+ struct bpf_iter_tasks *skel;
+ pthread_t thread_id;
+ void *ret;
+
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
+ return;
+
+ ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
+
+ ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ skel->bss->tid = getpid();
+
+ do_dummy_read_opts(skel->progs.dump_task, opts);
+
+ *num_unknown = skel->bss->num_unknown_tid;
+ *num_known = skel->bss->num_known_tid;
+
+ ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+ ASSERT_FALSE(pthread_join(thread_id, &ret) || ret != NULL,
+ "pthread_join");
+
+ bpf_iter_tasks__destroy(skel);
+}
+
+static void test_task_common(struct bpf_iter_attach_opts *opts, int num_unknown, int num_known)
+{
+ int num_unknown_tid, num_known_tid;
+
+ test_task_common_nocheck(opts, &num_unknown_tid, &num_known_tid);
+ ASSERT_EQ(num_unknown_tid, num_unknown, "check_num_unknown_tid");
+ ASSERT_EQ(num_known_tid, num_known, "check_num_known_tid");
+}
+
+static void test_task_tid(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ int num_unknown_tid, num_known_tid;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ test_task_common(&opts, 0, 1);
+
+ linfo.task.tid = 0;
+ linfo.task.pid = getpid();
+ test_task_common(&opts, 1, 1);
+
+ test_task_common_nocheck(NULL, &num_unknown_tid, &num_known_tid);
+ ASSERT_GT(num_unknown_tid, 1, "check_num_unknown_tid");
+ ASSERT_EQ(num_known_tid, 1, "check_num_known_tid");
+}
+
+static void test_task_pid(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_common(&opts, 1, 1);
+}
+
+static void test_task_pidfd(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ int pidfd;
+
+ pidfd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_GT(pidfd, 0, "sys_pidfd_open"))
+ return;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid_fd = pidfd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_common(&opts, 1, 1);
+
+ close(pidfd);
+}
+
+static void test_task_sleepable(void)
+{
+ struct bpf_iter_tasks *skel;
+
+ skel = bpf_iter_tasks__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tasks__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_task_sleepable);
+
+ ASSERT_GT(skel->bss->num_expected_failure_copy_from_user_task, 0,
+ "num_expected_failure_copy_from_user_task");
+ ASSERT_GT(skel->bss->num_success_copy_from_user_task, 0,
+ "num_success_copy_from_user_task");
+
+ bpf_iter_tasks__destroy(skel);
+}
+
+static void test_task_stack(void)
{
- struct bpf_iter_task *skel;
+ struct bpf_iter_task_stack *skel;
- skel = bpf_iter_task__open_and_load();
- if (CHECK(!skel, "bpf_iter_task__open_and_load",
- "skeleton open_and_load failed\n"))
+ skel = bpf_iter_task_stack__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_stack__open_and_load"))
return;
- do_dummy_read(skel->progs.dump_task);
+ do_dummy_read(skel->progs.dump_task_stack);
+ do_dummy_read(skel->progs.get_task_user_stacks);
+
+ ASSERT_EQ(skel->bss->num_user_stacks, 1, "num_user_stacks");
- bpf_iter_task__destroy(skel);
+ bpf_iter_task_stack__destroy(skel);
}
static void test_task_file(void)
{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
struct bpf_iter_task_file *skel;
+ union bpf_iter_link_info linfo;
+ pthread_t thread_id;
+ void *ret;
skel = bpf_iter_task_file__open_and_load();
- if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_file__open_and_load"))
return;
+ skel->bss->tgid = getpid();
+
+ ASSERT_OK(pthread_mutex_lock(&do_nothing_mutex), "pthread_mutex_lock");
+
+ ASSERT_OK(pthread_create(&thread_id, NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ do_dummy_read_opts(skel->progs.dump_task_file, &opts);
+
+ ASSERT_EQ(skel->bss->count, 0, "check_count");
+ ASSERT_EQ(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+ skel->bss->last_tgid = 0;
+ skel->bss->count = 0;
+ skel->bss->unique_tgid_count = 0;
+
do_dummy_read(skel->progs.dump_task_file);
+ ASSERT_EQ(skel->bss->count, 0, "check_count");
+ ASSERT_GT(skel->bss->unique_tgid_count, 1, "check_unique_tgid_count");
+
+ check_bpf_link_info(skel->progs.dump_task_file);
+
+ ASSERT_OK(pthread_mutex_unlock(&do_nothing_mutex), "pthread_mutex_unlock");
+ ASSERT_OK(pthread_join(thread_id, &ret), "pthread_join");
+ ASSERT_NULL(ret, "pthread_join");
+
bpf_iter_task_file__destroy(skel);
}
+#define TASKBUFSZ 32768
+
+static char taskbuf[TASKBUFSZ];
+
+static int do_btf_read(struct bpf_iter_task_btf *skel)
+{
+ struct bpf_program *prog = skel->progs.dump_task_struct;
+ struct bpf_iter_task_btf__bss *bss = skel->bss;
+ int iter_fd = -1, err;
+ struct bpf_link *link;
+ char *buf = taskbuf;
+ int ret = 0;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return ret;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ err = read_fd_into_buffer(iter_fd, buf, TASKBUFSZ);
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ ret = 1;
+ test__skip();
+ goto free_link;
+ }
+
+ if (!ASSERT_GE(err, 0, "read"))
+ goto free_link;
+
+ ASSERT_HAS_SUBSTR(taskbuf, "(struct task_struct)",
+ "check for btf representation of task_struct in iter data");
+free_link:
+ if (iter_fd > 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ return ret;
+}
+
+static void test_task_btf(void)
+{
+ struct bpf_iter_task_btf__bss *bss;
+ struct bpf_iter_task_btf *skel;
+ int ret;
+
+ skel = bpf_iter_task_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_btf__open_and_load"))
+ return;
+
+ bss = skel->bss;
+
+ ret = do_btf_read(skel);
+ if (ret)
+ goto cleanup;
+
+ if (!ASSERT_NEQ(bss->tasks, 0, "no task iteration, did BPF program run?"))
+ goto cleanup;
+
+ ASSERT_EQ(bss->seq_err, 0, "check for unexpected err");
+
+cleanup:
+ bpf_iter_task_btf__destroy(skel);
+}
+
+static void test_tcp4(void)
+{
+ struct bpf_iter_tcp4 *skel;
+
+ skel = bpf_iter_tcp4__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp4__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp4);
+
+ bpf_iter_tcp4__destroy(skel);
+}
+
+static void test_tcp6(void)
+{
+ struct bpf_iter_tcp6 *skel;
+
+ skel = bpf_iter_tcp6__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_tcp6__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_tcp6);
+
+ bpf_iter_tcp6__destroy(skel);
+}
+
+static void test_udp4(void)
+{
+ struct bpf_iter_udp4 *skel;
+
+ skel = bpf_iter_udp4__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_udp4__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp4);
+
+ bpf_iter_udp4__destroy(skel);
+}
+
+static void test_udp6(void)
+{
+ struct bpf_iter_udp6 *skel;
+
+ skel = bpf_iter_udp6__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_udp6__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_udp6);
+
+ bpf_iter_udp6__destroy(skel);
+}
+
+static void test_unix(void)
+{
+ struct bpf_iter_unix *skel;
+
+ skel = bpf_iter_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_unix__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_unix);
+
+ bpf_iter_unix__destroy(skel);
+}
+
/* The expected string is less than 16 bytes */
static int do_read_with_fd(int iter_fd, const char *expected,
bool read_one_char)
{
- int err = -1, len, read_buf_len, start;
+ int len, read_buf_len, start;
char buf[16] = {};
read_buf_len = read_one_char ? 1 : 16;
start = 0;
while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
start += len;
- if (CHECK(start >= 16, "read", "read len %d\n", len))
+ if (!ASSERT_LT(start, 16, "read"))
return -1;
read_buf_len = read_one_char ? 1 : 16 - start;
}
- if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
return -1;
- err = strcmp(buf, expected);
- if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
- buf, expected))
+ if (!ASSERT_STREQ(buf, expected, "read"))
return -1;
return 0;
@@ -153,19 +546,17 @@ static void test_anon_iter(bool read_one_char)
int iter_fd, err;
skel = bpf_iter_test_kern1__open_and_load();
- if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern1__open_and_load"))
return;
err = bpf_iter_test_kern1__attach(skel);
- if (CHECK(err, "bpf_iter_test_kern1__attach",
- "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "bpf_iter_test_kern1__attach")) {
goto out;
}
link = skel->links.dump_task;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto out;
do_read_with_fd(iter_fd, "abcd", read_one_char);
@@ -180,8 +571,7 @@ static int do_read(const char *path, const char *expected)
int err, iter_fd;
iter_fd = open(path, O_RDONLY);
- if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
- path, strerror(errno)))
+ if (!ASSERT_GE(iter_fd, 0, "open"))
return -1;
err = do_read_with_fd(iter_fd, expected, false);
@@ -198,19 +588,18 @@ static void test_file_iter(void)
int err;
skel1 = bpf_iter_test_kern1__open_and_load();
- if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel1, "bpf_iter_test_kern1__open_and_load"))
return;
link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto out;
/* unlink this path if it exists. */
unlink(path);
err = bpf_link__pin(link, path);
- if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+ if (!ASSERT_OK(err, "pin_iter"))
goto free_link;
err = do_read(path, "abcd");
@@ -222,12 +611,11 @@ static void test_file_iter(void)
* should change.
*/
skel2 = bpf_iter_test_kern2__open_and_load();
- if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
- "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel2, "bpf_iter_test_kern2__open_and_load"))
goto unlink_path;
err = bpf_link__update_program(link, skel2->progs.dump_task);
- if (CHECK(err, "update_prog", "update_prog failed\n"))
+ if (!ASSERT_OK(err, "update_prog"))
goto destroy_skel2;
do_read(path, "ABCD");
@@ -249,12 +637,11 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
struct bpf_map_info map_info = {};
struct bpf_iter_test_kern4 *skel;
struct bpf_link *link;
- __u32 page_size;
+ __u32 iter_size;
char *buf;
skel = bpf_iter_test_kern4__open();
- if (CHECK(!skel, "bpf_iter_test_kern4__open",
- "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern4__open"))
return;
/* create two maps: bpf program will only do bpf_seq_write
@@ -262,62 +649,58 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
* fills seq_file buffer and then the other will trigger
* overflow and needs restart.
*/
- map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map1_fd < 0, "bpf_create_map",
- "map_creation failed: %s\n", strerror(errno)))
+ map1_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (!ASSERT_GE(map1_fd, 0, "bpf_map_create"))
goto out;
- map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
- if (CHECK(map2_fd < 0, "bpf_create_map",
- "map_creation failed: %s\n", strerror(errno)))
+ map2_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
+ if (!ASSERT_GE(map2_fd, 0, "bpf_map_create"))
goto free_map1;
- /* bpf_seq_printf kernel buffer is one page, so one map
+ /* bpf_seq_printf kernel buffer is 8 pages, so one map
* bpf_seq_write will mostly fill it, and the other map
* will partially fill and then trigger overflow and need
* bpf_seq_read restart.
*/
- page_size = sysconf(_SC_PAGE_SIZE);
+ iter_size = sysconf(_SC_PAGE_SIZE) << 3;
if (test_e2big_overflow) {
- skel->rodata->print_len = (page_size + 8) / 8;
- expected_read_len = 2 * (page_size + 8);
+ skel->rodata->print_len = (iter_size + 8) / 8;
+ expected_read_len = 2 * (iter_size + 8);
} else if (!ret1) {
- skel->rodata->print_len = (page_size - 8) / 8;
- expected_read_len = 2 * (page_size - 8);
+ skel->rodata->print_len = (iter_size - 8) / 8;
+ expected_read_len = 2 * (iter_size - 8);
} else {
skel->rodata->print_len = 1;
expected_read_len = 2 * 8;
}
skel->rodata->ret1 = ret1;
- if (CHECK(bpf_iter_test_kern4__load(skel),
- "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+ if (!ASSERT_OK(bpf_iter_test_kern4__load(skel),
+ "bpf_iter_test_kern4__load"))
goto free_map2;
/* setup filtering map_id in bpf program */
map_info_len = sizeof(map_info);
- err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ err = bpf_map_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map1_id = map_info.id;
- err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
- if (CHECK(err, "get_map_info", "get map info failed: %s\n",
- strerror(errno)))
+ err = bpf_map_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+ if (!ASSERT_OK(err, "get_map_info"))
goto free_map2;
skel->bss->map2_id = map_info.id;
link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
- if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
goto free_map2;
iter_fd = bpf_iter_create(bpf_link__fd(link));
- if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
goto free_link;
buf = malloc(expected_read_len);
- if (!buf)
+ if (!ASSERT_OK_PTR(buf, "malloc"))
goto close_iter;
/* do read */
@@ -326,16 +709,14 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- CHECK(len != -1 || errno != E2BIG, "read",
- "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
- len, strerror(errno));
+ ASSERT_EQ(len, -1, "read");
+ ASSERT_EQ(errno, E2BIG, "read");
goto free_buf;
} else if (!ret1) {
while ((len = read(iter_fd, buf, expected_read_len)) > 0)
total_read_len += len;
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
} else {
do {
@@ -344,27 +725,20 @@ static void test_overflow(bool test_e2big_overflow, bool ret1)
total_read_len += len;
} while (len > 0 || len == -EAGAIN);
- if (CHECK(len < 0, "read", "read failed: %s\n",
- strerror(errno)))
+ if (!ASSERT_GE(len, 0, "read"))
goto free_buf;
}
- if (CHECK(total_read_len != expected_read_len, "read",
- "total len %u, expected len %u\n", total_read_len,
- expected_read_len))
+ if (!ASSERT_EQ(total_read_len, expected_read_len, "read"))
goto free_buf;
- if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
- "expected 1 actual %d\n", skel->bss->map1_accessed))
+ if (!ASSERT_EQ(skel->bss->map1_accessed, 1, "map1_accessed"))
goto free_buf;
- if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
- "expected 2 actual %d\n", skel->bss->map2_accessed))
+ if (!ASSERT_EQ(skel->bss->map2_accessed, 2, "map2_accessed"))
goto free_buf;
- CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
- "map2_seqnum", "two different seqnum %lld %lld\n",
- skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+ ASSERT_EQ(skel->bss->map2_seqnum1, skel->bss->map2_seqnum2, "map2_seqnum");
free_buf:
free(buf);
@@ -380,8 +754,884 @@ out:
bpf_iter_test_kern4__destroy(skel);
}
+static void test_bpf_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_hash_map *skel;
+ int err, i, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u64 val, expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+
+ skel = bpf_iter_bpf_hash_map__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_hash_map__open"))
+ return;
+
+ skel->bss->in_test_mode = true;
+
+ err = bpf_iter_bpf_hash_map__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_bpf_hash_map__load"))
+ goto out;
+
+ /* iterator with hashmap2 and hashmap3 should fail */
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap2);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
+ goto out;
+
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap3);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
+ goto out;
+
+ /* hashmap1 should be good, update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ val = i + 4;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &key, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ /* Sleepable program is prohibited for hash map iterator */
+ linfo.map.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.sleepable_dummy_dump, &opts);
+ if (!ASSERT_ERR_PTR(link, "attach_sleepable_prog_to_iter"))
+ goto out;
+
+ linfo.map.map_fd = map_fd;
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_hash_map__destroy(skel);
+}
+
+static void test_bpf_percpu_hash_map(void)
+{
+ __u32 expected_key_a = 0, expected_key_b = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_hash_map *skel;
+ int err, i, j, len, map_fd, iter_fd;
+ union bpf_iter_link_info linfo;
+ __u32 expected_val = 0;
+ struct bpf_link *link;
+ struct key_t {
+ int a;
+ int b;
+ int c;
+ } key;
+ char buf[64];
+ void *val;
+
+ skel = bpf_iter_bpf_percpu_hash_map__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__open"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
+
+ err = bpf_iter_bpf_percpu_hash_map__load(skel);
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_hash_map__load"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.hashmap1); i++) {
+ key.a = i + 1;
+ key.b = i + 2;
+ key.c = i + 3;
+ expected_key_a += key.a;
+ expected_key_b += key.b;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &key, val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_hash_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ if (!ASSERT_EQ(skel->bss->key_sum_a, expected_key_a, "key_sum_a"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->key_sum_b, expected_key_b, "key_sum_b"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_hash_map__destroy(skel);
+ free(val);
+}
+
+static void test_bpf_array_map(void)
+{
+ __u64 val, expected_val = 0, res_first_val, first_val = 0;
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ __u32 key, expected_key = 0, res_first_key;
+ int err, i, map_fd, hash_fd, iter_fd;
+ struct bpf_iter_bpf_array_map *skel;
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+ char buf[64] = {};
+ int len, start;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ val = i + 4;
+ expected_key += i;
+ expected_val += val;
+
+ if (i == 0)
+ first_val = val;
+
+ err = bpf_map_update_elem(map_fd, &i, &val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_array_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ start = 0;
+ while ((len = read(iter_fd, buf + start, sizeof(buf) - start)) > 0)
+ start += len;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ res_first_key = *(__u32 *)buf;
+ res_first_val = *(__u64 *)(buf + sizeof(__u32));
+ if (!ASSERT_EQ(res_first_key, 0, "bpf_seq_write") ||
+ !ASSERT_EQ(res_first_val, first_val, "bpf_seq_write"))
+ goto close_iter;
+
+ if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
+ goto close_iter;
+
+ hash_fd = bpf_map__fd(skel->maps.hashmap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ err = bpf_map_lookup_elem(map_fd, &i, &val);
+ if (!ASSERT_OK(err, "map_lookup arraymap1"))
+ goto close_iter;
+ if (!ASSERT_EQ(i, val, "invalid_val arraymap1"))
+ goto close_iter;
+
+ val = i + 4;
+ err = bpf_map_lookup_elem(hash_fd, &val, &key);
+ if (!ASSERT_OK(err, "map_lookup hashmap1"))
+ goto close_iter;
+ if (!ASSERT_EQ(key, val - 4, "invalid_val hashmap1"))
+ goto close_iter;
+ }
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_array_map_iter_fd(void)
+{
+ struct bpf_iter_bpf_array_map *skel;
+
+ skel = bpf_iter_bpf_array_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_array_map__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.dump_bpf_array_map,
+ skel->maps.arraymap1);
+
+ bpf_iter_bpf_array_map__destroy(skel);
+}
+
+static void test_bpf_percpu_array_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_percpu_array_map *skel;
+ __u32 expected_key = 0, expected_val = 0;
+ union bpf_iter_link_info linfo;
+ int err, i, j, map_fd, iter_fd;
+ struct bpf_link *link;
+ char buf[64];
+ void *val;
+ int len;
+
+ skel = bpf_iter_bpf_percpu_array_map__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__open"))
+ return;
+
+ skel->rodata->num_cpus = bpf_num_possible_cpus();
+ val = malloc(8 * bpf_num_possible_cpus());
+ if (!ASSERT_OK_PTR(val, "malloc"))
+ goto out;
+
+ err = bpf_iter_bpf_percpu_array_map__load(skel);
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_percpu_array_map__load"))
+ goto out;
+
+ /* update map values here */
+ map_fd = bpf_map__fd(skel->maps.arraymap1);
+ for (i = 0; i < bpf_map__max_entries(skel->maps.arraymap1); i++) {
+ expected_key += i;
+
+ for (j = 0; j < bpf_num_possible_cpus(); j++) {
+ *(__u32 *)(val + j * 8) = i + j;
+ expected_val += i + j;
+ }
+
+ err = bpf_map_update_elem(map_fd, &i, val, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_percpu_array_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ if (!ASSERT_EQ(skel->bss->key_sum, expected_key, "key_sum"))
+ goto close_iter;
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
+ goto close_iter;
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ bpf_iter_bpf_percpu_array_map__destroy(skel);
+ free(val);
+}
+
+/* An iterator program deletes all local storage in a map. */
+static void test_bpf_sk_storage_delete(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_bpf_sk_storage_helpers *skel;
+ union bpf_iter_link_info linfo;
+ int err, len, map_fd, iter_fd;
+ struct bpf_link *link;
+ int sock_fd = -1;
+ __u32 val = 42;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.delete_bpf_sk_storage_map,
+ &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+
+ /* Note: The following assertions serve to ensure
+ * the value was deleted. It does so by asserting
+ * that bpf_map_lookup_elem has failed. This might
+ * seem counterintuitive at first.
+ */
+ ASSERT_ERR(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem");
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ if (sock_fd >= 0)
+ close(sock_fd);
+ bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
+/* This creates a socket and its local storage. It then runs a task_iter BPF
+ * program that replaces the existing socket local storage with the tgid of the
+ * only task owning a file descriptor to this socket, this process, prog_tests.
+ * It then runs a tcp socket iterator that negates the value in the existing
+ * socket local storage, the test verifies that the resulting value is -pid.
+ */
+static void test_bpf_sk_storage_get(void)
+{
+ struct bpf_iter_bpf_sk_storage_helpers *skel;
+ int err, map_fd, val = -1;
+ int sock_fd = -1;
+
+ skel = bpf_iter_bpf_sk_storage_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_helpers__open_and_load"))
+ return;
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
+ goto out;
+
+ err = listen(sock_fd, 1);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_socket;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+
+ err = bpf_map_update_elem(map_fd, &sock_fd, &val, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto close_socket;
+
+ do_dummy_read(skel->progs.fill_socket_owner);
+
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem") ||
+ !ASSERT_EQ(val, getpid(), "bpf_map_lookup_elem"))
+ goto close_socket;
+
+ do_dummy_read(skel->progs.negate_socket_local_storage);
+
+ err = bpf_map_lookup_elem(map_fd, &sock_fd, &val);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
+ ASSERT_EQ(val, -getpid(), "bpf_map_lookup_elem");
+
+close_socket:
+ close(sock_fd);
+out:
+ bpf_iter_bpf_sk_storage_helpers__destroy(skel);
+}
+
+static void test_bpf_sk_stoarge_map_iter_fd(void)
+{
+ struct bpf_iter_bpf_sk_storage_map *skel;
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.rw_bpf_sk_storage_map,
+ skel->maps.sk_stg_map);
+
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
+static void test_bpf_sk_storage_map(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, i, len, map_fd, iter_fd, num_sockets;
+ struct bpf_iter_bpf_sk_storage_map *skel;
+ union bpf_iter_link_info linfo;
+ int sock_fd[3] = {-1, -1, -1};
+ __u32 val, expected_val = 0;
+ struct bpf_link *link;
+ char buf[64];
+
+ skel = bpf_iter_bpf_sk_storage_map__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_sk_storage_map__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sk_stg_map);
+ num_sockets = ARRAY_SIZE(sock_fd);
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd[i], 0, "socket"))
+ goto out;
+
+ val = i + 1;
+ expected_val += val;
+
+ err = bpf_map_update_elem(map_fd, &sock_fd[i], &val,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = map_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.oob_write_bpf_sk_storage_map, &opts);
+ err = libbpf_get_error(link);
+ if (!ASSERT_EQ(err, -EACCES, "attach_oob_write_iter")) {
+ if (!err)
+ bpf_link__destroy(link);
+ goto out;
+ }
+
+ link = bpf_program__attach_iter(skel->progs.rw_bpf_sk_storage_map, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ skel->bss->to_add_val = time(NULL);
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ if (!ASSERT_EQ(skel->bss->ipv6_sk_count, num_sockets, "ipv6_sk_count"))
+ goto close_iter;
+
+ if (!ASSERT_EQ(skel->bss->val_sum, expected_val, "val_sum"))
+ goto close_iter;
+
+ for (i = 0; i < num_sockets; i++) {
+ err = bpf_map_lookup_elem(map_fd, &sock_fd[i], &val);
+ if (!ASSERT_OK(err, "map_lookup") ||
+ !ASSERT_EQ(val, i + 1 + skel->bss->to_add_val, "check_map_value"))
+ break;
+ }
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; i < num_sockets; i++) {
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ }
+ bpf_iter_bpf_sk_storage_map__destroy(skel);
+}
+
+static void test_rdonly_buf_out_of_bound(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ struct bpf_iter_test_kern5 *skel;
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ skel = bpf_iter_test_kern5__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_test_kern5__open_and_load"))
+ return;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.map.map_fd = bpf_map__fd(skel->maps.hashmap1);
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.dump_bpf_hash_map, &opts);
+ if (!ASSERT_ERR_PTR(link, "attach_iter"))
+ bpf_link__destroy(link);
+
+ bpf_iter_test_kern5__destroy(skel);
+}
+
+static void test_buf_neg_offset(void)
+{
+ struct bpf_iter_test_kern6 *skel;
+
+ skel = bpf_iter_test_kern6__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "bpf_iter_test_kern6__open_and_load"))
+ bpf_iter_test_kern6__destroy(skel);
+}
+
+static void test_link_iter(void)
+{
+ struct bpf_iter_bpf_link *skel;
+
+ skel = bpf_iter_bpf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_bpf_link__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_bpf_link);
+
+ bpf_iter_bpf_link__destroy(skel);
+}
+
+static void test_ksym_iter(void)
+{
+ struct bpf_iter_ksym *skel;
+
+ skel = bpf_iter_ksym__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_ksym__open_and_load"))
+ return;
+
+ do_dummy_read(skel->progs.dump_ksym);
+
+ bpf_iter_ksym__destroy(skel);
+}
+
+#define CMP_BUFFER_SIZE 1024
+static char task_vma_output[CMP_BUFFER_SIZE];
+static char proc_maps_output[CMP_BUFFER_SIZE];
+
+/* remove \0 and \t from str, and only keep the first line */
+static void str_strip_first_line(char *str)
+{
+ char *dst = str, *src = str;
+
+ do {
+ if (*src == ' ' || *src == '\t')
+ src++;
+ else
+ *(dst++) = *(src++);
+
+ } while (*src != '\0' && *src != '\n');
+
+ *dst = '\0';
+}
+
+static void test_task_vma_common(struct bpf_iter_attach_opts *opts)
+{
+ int err, iter_fd = -1, proc_maps_fd = -1;
+ struct bpf_iter_task_vmas *skel;
+ int len, read_size = 4;
+ char maps_path[64];
+
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->one_task = opts ? 1 : 0;
+
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, opts);
+
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
+ skel->links.proc_maps = NULL;
+ goto out;
+ }
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto out;
+
+ /* Read CMP_BUFFER_SIZE (1kB) from bpf_iter. Read in small chunks
+ * to trigger seq_file corner cases.
+ */
+ len = 0;
+ while (len < CMP_BUFFER_SIZE) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output + len,
+ MIN(read_size, CMP_BUFFER_SIZE - len));
+ if (!err)
+ break;
+ if (!ASSERT_GE(err, 0, "read_iter_fd"))
+ goto out;
+ len += err;
+ }
+ if (opts)
+ ASSERT_EQ(skel->bss->one_task_error, 0, "unexpected task");
+
+ /* read CMP_BUFFER_SIZE (1kB) from /proc/pid/maps */
+ snprintf(maps_path, 64, "/proc/%u/maps", skel->bss->pid);
+ proc_maps_fd = open(maps_path, O_RDONLY);
+ if (!ASSERT_GE(proc_maps_fd, 0, "open_proc_maps"))
+ goto out;
+ err = read_fd_into_buffer(proc_maps_fd, proc_maps_output, CMP_BUFFER_SIZE);
+ if (!ASSERT_GE(err, 0, "read_prog_maps_fd"))
+ goto out;
+
+ /* strip and compare the first line of the two files */
+ str_strip_first_line(task_vma_output);
+ str_strip_first_line(proc_maps_output);
+
+ ASSERT_STREQ(task_vma_output, proc_maps_output, "compare_output");
+
+ check_bpf_link_info(skel->progs.proc_maps);
+
+out:
+ close(proc_maps_fd);
+ close(iter_fd);
+ bpf_iter_task_vmas__destroy(skel);
+}
+
+static void test_task_vma_dead_task(void)
+{
+ struct bpf_iter_task_vmas *skel;
+ int wstatus, child_pid = -1;
+ time_t start_tm, cur_tm;
+ int err, iter_fd = -1;
+ int wait_sec = 3;
+
+ skel = bpf_iter_task_vmas__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_task_vmas__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = bpf_iter_task_vmas__load(skel);
+ if (!ASSERT_OK(err, "bpf_iter_task_vmas__load"))
+ goto out;
+
+ skel->links.proc_maps = bpf_program__attach_iter(
+ skel->progs.proc_maps, NULL);
+
+ if (!ASSERT_OK_PTR(skel->links.proc_maps, "bpf_program__attach_iter")) {
+ skel->links.proc_maps = NULL;
+ goto out;
+ }
+
+ start_tm = time(NULL);
+ cur_tm = start_tm;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ /* Fork short-lived processes in the background. */
+ while (cur_tm < start_tm + wait_sec) {
+ system("echo > /dev/null");
+ cur_tm = time(NULL);
+ }
+ exit(0);
+ }
+
+ if (!ASSERT_GE(child_pid, 0, "fork_child"))
+ goto out;
+
+ while (cur_tm < start_tm + wait_sec) {
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.proc_maps));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto out;
+
+ /* Drain all data from iter_fd. */
+ while (cur_tm < start_tm + wait_sec) {
+ err = read_fd_into_buffer(iter_fd, task_vma_output, CMP_BUFFER_SIZE);
+ if (!ASSERT_GE(err, 0, "read_iter_fd"))
+ goto out;
+
+ cur_tm = time(NULL);
+
+ if (err == 0)
+ break;
+ }
+
+ close(iter_fd);
+ iter_fd = -1;
+ }
+
+ check_bpf_link_info(skel->progs.proc_maps);
+
+out:
+ waitpid(child_pid, &wstatus, 0);
+ close(iter_fd);
+ bpf_iter_task_vmas__destroy(skel);
+}
+
+void test_bpf_sockmap_map_iter_fd(void)
+{
+ struct bpf_iter_sockmap *skel;
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
+ return;
+
+ do_read_map_iter_fd(&skel->skeleton, skel->progs.copy, skel->maps.sockmap);
+
+ bpf_iter_sockmap__destroy(skel);
+}
+
+static void test_task_vma(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.tid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_vma_common(&opts);
+ test_task_vma_common(NULL);
+}
+
+/* uprobe attach point */
+static noinline int trigger_func(int arg)
+{
+ asm volatile ("");
+ return arg + 1;
+}
+
+static void test_task_vma_offset_common(struct bpf_iter_attach_opts *opts, bool one_proc)
+{
+ struct bpf_iter_vma_offset *skel;
+ char buf[16] = {};
+ int iter_fd, len;
+ int pgsz, shift;
+
+ skel = bpf_iter_vma_offset__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_vma_offset__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->address = (uintptr_t)trigger_func;
+ for (pgsz = getpagesize(), shift = 0; pgsz > 1; pgsz >>= 1, shift++)
+ ;
+ skel->bss->page_shift = shift;
+
+ skel->links.get_vma_offset = bpf_program__attach_iter(skel->progs.get_vma_offset, opts);
+ if (!ASSERT_OK_PTR(skel->links.get_vma_offset, "attach_iter"))
+ goto exit;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.get_vma_offset));
+ if (!ASSERT_GT(iter_fd, 0, "create_iter"))
+ goto exit;
+
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ buf[15] = 0;
+ ASSERT_EQ(strcmp(buf, "OK\n"), 0, "strcmp");
+
+ ASSERT_EQ(skel->bss->offset, get_uprobe_offset(trigger_func), "offset");
+ if (one_proc)
+ ASSERT_EQ(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+ else
+ ASSERT_GT(skel->bss->unique_tgid_cnt, 1, "unique_tgid_count");
+
+ close(iter_fd);
+
+exit:
+ bpf_iter_vma_offset__destroy(skel);
+}
+
+static void test_task_vma_offset(void)
+{
+ LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.task.pid = getpid();
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ test_task_vma_offset_common(&opts, true);
+
+ linfo.task.pid = 0;
+ linfo.task.tid = getpid();
+ test_task_vma_offset_common(&opts, true);
+
+ test_task_vma_offset_common(NULL, false);
+}
+
void test_bpf_iter(void)
{
+ ASSERT_OK(pthread_mutex_init(&do_nothing_mutex, NULL), "pthread_mutex_init");
+
if (test__start_subtest("btf_id_or_null"))
test_btf_id_or_null();
if (test__start_subtest("ipv6_route"))
@@ -390,10 +1640,34 @@ void test_bpf_iter(void)
test_netlink();
if (test__start_subtest("bpf_map"))
test_bpf_map();
- if (test__start_subtest("task"))
- test_task();
+ if (test__start_subtest("task_tid"))
+ test_task_tid();
+ if (test__start_subtest("task_pid"))
+ test_task_pid();
+ if (test__start_subtest("task_pidfd"))
+ test_task_pidfd();
+ if (test__start_subtest("task_sleepable"))
+ test_task_sleepable();
+ if (test__start_subtest("task_stack"))
+ test_task_stack();
if (test__start_subtest("task_file"))
test_task_file();
+ if (test__start_subtest("task_vma"))
+ test_task_vma();
+ if (test__start_subtest("task_vma_dead_task"))
+ test_task_vma_dead_task();
+ if (test__start_subtest("task_btf"))
+ test_task_btf();
+ if (test__start_subtest("tcp4"))
+ test_tcp4();
+ if (test__start_subtest("tcp6"))
+ test_tcp6();
+ if (test__start_subtest("udp4"))
+ test_udp4();
+ if (test__start_subtest("udp6"))
+ test_udp6();
+ if (test__start_subtest("unix"))
+ test_unix();
if (test__start_subtest("anon"))
test_anon_iter(false);
if (test__start_subtest("anon-read-one-char"))
@@ -406,4 +1680,34 @@ void test_bpf_iter(void)
test_overflow(true, false);
if (test__start_subtest("prog-ret-1"))
test_overflow(false, true);
+ if (test__start_subtest("bpf_hash_map"))
+ test_bpf_hash_map();
+ if (test__start_subtest("bpf_percpu_hash_map"))
+ test_bpf_percpu_hash_map();
+ if (test__start_subtest("bpf_array_map"))
+ test_bpf_array_map();
+ if (test__start_subtest("bpf_array_map_iter_fd"))
+ test_bpf_array_map_iter_fd();
+ if (test__start_subtest("bpf_percpu_array_map"))
+ test_bpf_percpu_array_map();
+ if (test__start_subtest("bpf_sk_storage_map"))
+ test_bpf_sk_storage_map();
+ if (test__start_subtest("bpf_sk_storage_map_iter_fd"))
+ test_bpf_sk_stoarge_map_iter_fd();
+ if (test__start_subtest("bpf_sk_storage_delete"))
+ test_bpf_sk_storage_delete();
+ if (test__start_subtest("bpf_sk_storage_get"))
+ test_bpf_sk_storage_get();
+ if (test__start_subtest("rdonly-buf-out-of-bound"))
+ test_rdonly_buf_out_of_bound();
+ if (test__start_subtest("buf-neg-offset"))
+ test_buf_neg_offset();
+ if (test__start_subtest("link-iter"))
+ test_link_iter();
+ if (test__start_subtest("ksym"))
+ test_ksym_iter();
+ if (test__start_subtest("bpf_sockmap_map_iter_fd"))
+ test_bpf_sockmap_map_iter_fd();
+ if (test__start_subtest("vma_offset"))
+ test_task_vma_offset();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..b52ff8ce34db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "bpf_dctcp.skel.h"
+#include "bpf_cubic.skel.h"
+#include "bpf_iter_setsockopt.skel.h"
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return -1;
+
+ return 0;
+}
+
+static unsigned int set_bpf_cubic(int *fds, unsigned int nr_fds)
+{
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (setsockopt(fds[i], SOL_TCP, TCP_CONGESTION, "bpf_cubic",
+ sizeof("bpf_cubic")))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static unsigned int check_bpf_dctcp(int *fds, unsigned int nr_fds)
+{
+ char tcp_cc[16];
+ socklen_t optlen = sizeof(tcp_cc);
+ unsigned int i;
+
+ for (i = 0; i < nr_fds; i++) {
+ if (getsockopt(fds[i], SOL_TCP, TCP_CONGESTION,
+ tcp_cc, &optlen) ||
+ strcmp(tcp_cc, "bpf_dctcp"))
+ return i;
+ }
+
+ return nr_fds;
+}
+
+static int *make_established(int listen_fd, unsigned int nr_est,
+ int **paccepted_fds)
+{
+ int *est_fds, *accepted_fds;
+ unsigned int i;
+
+ est_fds = malloc(sizeof(*est_fds) * nr_est);
+ if (!est_fds)
+ return NULL;
+
+ accepted_fds = malloc(sizeof(*accepted_fds) * nr_est);
+ if (!accepted_fds) {
+ free(est_fds);
+ return NULL;
+ }
+
+ for (i = 0; i < nr_est; i++) {
+ est_fds[i] = connect_to_fd(listen_fd, 0);
+ if (est_fds[i] == -1)
+ break;
+ if (set_bpf_cubic(&est_fds[i], 1) != 1) {
+ close(est_fds[i]);
+ break;
+ }
+
+ accepted_fds[i] = accept(listen_fd, NULL, 0);
+ if (accepted_fds[i] == -1) {
+ close(est_fds[i]);
+ break;
+ }
+ }
+
+ if (!ASSERT_EQ(i, nr_est, "create established fds")) {
+ free_fds(accepted_fds, i);
+ free_fds(est_fds, i);
+ return NULL;
+ }
+
+ *paccepted_fds = accepted_fds;
+ return est_fds;
+}
+
+static unsigned short get_local_port(int fd)
+{
+ struct sockaddr_in6 addr;
+ socklen_t addrlen = sizeof(addr);
+
+ if (!getsockname(fd, &addr, &addrlen))
+ return ntohs(addr.sin6_port);
+
+ return 0;
+}
+
+static void do_bpf_iter_setsockopt(struct bpf_iter_setsockopt *iter_skel,
+ bool random_retry)
+{
+ int *reuse_listen_fds = NULL, *accepted_fds = NULL, *est_fds = NULL;
+ unsigned int nr_reuse_listens = 256, nr_est = 256;
+ int err, iter_fd = -1, listen_fd = -1;
+ char buf;
+
+ /* Prepare non-reuseport listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(listen_fd, 0, "start_server"))
+ return;
+ if (!ASSERT_EQ(set_bpf_cubic(&listen_fd, 1), 1,
+ "set listen_fd to cubic"))
+ goto done;
+ iter_skel->bss->listen_hport = get_local_port(listen_fd);
+ if (!ASSERT_NEQ(iter_skel->bss->listen_hport, 0,
+ "get_local_port(listen_fd)"))
+ goto done;
+
+ /* Connect to non-reuseport listen_fd */
+ est_fds = make_established(listen_fd, nr_est, &accepted_fds);
+ if (!ASSERT_OK_PTR(est_fds, "create established"))
+ goto done;
+
+ /* Prepare reuseport listen fds */
+ reuse_listen_fds = start_reuseport_server(AF_INET6, SOCK_STREAM,
+ "::1", 0, 0,
+ nr_reuse_listens);
+ if (!ASSERT_OK_PTR(reuse_listen_fds, "start_reuseport_server"))
+ goto done;
+ if (!ASSERT_EQ(set_bpf_cubic(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens, "set reuse_listen_fds to cubic"))
+ goto done;
+ iter_skel->bss->reuse_listen_hport = get_local_port(reuse_listen_fds[0]);
+ if (!ASSERT_NEQ(iter_skel->bss->reuse_listen_hport, 0,
+ "get_local_port(reuse_listen_fds[0])"))
+ goto done;
+
+ /* Run bpf tcp iter to switch from bpf_cubic to bpf_dctcp */
+ iter_skel->bss->random_retry = random_retry;
+ iter_fd = bpf_iter_create(bpf_link__fd(iter_skel->links.change_tcp_cc));
+ if (!ASSERT_GE(iter_fd, 0, "create iter_fd"))
+ goto done;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto done;
+
+ /* Check reuseport listen fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(reuse_listen_fds, nr_reuse_listens),
+ nr_reuse_listens,
+ "check reuse_listen_fds dctcp");
+
+ /* Check non reuseport listen fd for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(&listen_fd, 1), 1,
+ "check listen_fd dctcp");
+
+ /* Check established fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(est_fds, nr_est), nr_est,
+ "check est_fds dctcp");
+
+ /* Check accepted fds for dctcp */
+ ASSERT_EQ(check_bpf_dctcp(accepted_fds, nr_est), nr_est,
+ "check accepted_fds dctcp");
+
+done:
+ if (iter_fd != -1)
+ close(iter_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+ free_fds(reuse_listen_fds, nr_reuse_listens);
+ free_fds(accepted_fds, nr_est);
+ free_fds(est_fds, nr_est);
+}
+
+void serial_test_bpf_iter_setsockopt(void)
+{
+ struct bpf_iter_setsockopt *iter_skel = NULL;
+ struct bpf_cubic *cubic_skel = NULL;
+ struct bpf_dctcp *dctcp_skel = NULL;
+ struct bpf_link *cubic_link = NULL;
+ struct bpf_link *dctcp_link = NULL;
+
+ if (create_netns())
+ return;
+
+ /* Load iter_skel */
+ iter_skel = bpf_iter_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(iter_skel, "iter_skel"))
+ return;
+ iter_skel->links.change_tcp_cc = bpf_program__attach_iter(iter_skel->progs.change_tcp_cc, NULL);
+ if (!ASSERT_OK_PTR(iter_skel->links.change_tcp_cc, "attach iter"))
+ goto done;
+
+ /* Load bpf_cubic */
+ cubic_skel = bpf_cubic__open_and_load();
+ if (!ASSERT_OK_PTR(cubic_skel, "cubic_skel"))
+ goto done;
+ cubic_link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
+ if (!ASSERT_OK_PTR(cubic_link, "cubic_link"))
+ goto done;
+
+ /* Load bpf_dctcp */
+ dctcp_skel = bpf_dctcp__open_and_load();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ goto done;
+ dctcp_link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(dctcp_link, "dctcp_link"))
+ goto done;
+
+ do_bpf_iter_setsockopt(iter_skel, true);
+ do_bpf_iter_setsockopt(iter_skel, false);
+
+done:
+ bpf_link__destroy(cubic_link);
+ bpf_link__destroy(dctcp_link);
+ bpf_cubic__destroy(cubic_skel);
+ bpf_dctcp__destroy(dctcp_skel);
+ bpf_iter_setsockopt__destroy(iter_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..ee725d4d98a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <test_progs.h>
+#include "bpf_iter_setsockopt_unix.skel.h"
+
+#define NR_CASES 5
+
+static int create_unix_socket(struct bpf_iter_setsockopt_unix *skel)
+{
+ struct sockaddr_un addr = {
+ .sun_family = AF_UNIX,
+ .sun_path = "",
+ };
+ socklen_t len;
+ int fd, err;
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_NEQ(fd, -1, "socket"))
+ return -1;
+
+ len = offsetof(struct sockaddr_un, sun_path);
+ err = bind(fd, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ len = sizeof(addr);
+ err = getsockname(fd, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+
+ memcpy(&skel->bss->sun_path, &addr.sun_path,
+ len - offsetof(struct sockaddr_un, sun_path));
+
+ return fd;
+}
+
+static void test_sndbuf(struct bpf_iter_setsockopt_unix *skel, int fd)
+{
+ socklen_t optlen;
+ int i, err;
+
+ for (i = 0; i < NR_CASES; i++) {
+ if (!ASSERT_NEQ(skel->data->sndbuf_getsockopt[i], -1,
+ "bpf_(get|set)sockopt"))
+ return;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->data->sndbuf_setsockopt[i]),
+ sizeof(skel->data->sndbuf_setsockopt[i]));
+ if (!ASSERT_OK(err, "setsockopt"))
+ return;
+
+ optlen = sizeof(skel->bss->sndbuf_getsockopt_expected[i]);
+ err = getsockopt(fd, SOL_SOCKET, SO_SNDBUF,
+ &(skel->bss->sndbuf_getsockopt_expected[i]),
+ &optlen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ return;
+
+ if (!ASSERT_EQ(skel->data->sndbuf_getsockopt[i],
+ skel->bss->sndbuf_getsockopt_expected[i],
+ "bpf_(get|set)sockopt"))
+ return;
+ }
+}
+
+void test_bpf_iter_setsockopt_unix(void)
+{
+ struct bpf_iter_setsockopt_unix *skel;
+ int err, unix_fd, iter_fd;
+ char buf;
+
+ skel = bpf_iter_setsockopt_unix__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ unix_fd = create_unix_socket(skel);
+ if (!ASSERT_NEQ(unix_fd, -1, "create_unix_server"))
+ goto destroy;
+
+ skel->links.change_sndbuf = bpf_program__attach_iter(skel->progs.change_sndbuf, NULL);
+ if (!ASSERT_OK_PTR(skel->links.change_sndbuf, "bpf_program__attach_iter"))
+ goto destroy;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(skel->links.change_sndbuf));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto destroy;
+
+ while ((err = read(iter_fd, &buf, sizeof(buf))) == -1 &&
+ errno == EAGAIN)
+ ;
+ if (!ASSERT_OK(err, "read iter error"))
+ goto destroy;
+
+ test_sndbuf(skel, unix_fd);
+destroy:
+ bpf_iter_setsockopt_unix__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_loop.c b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
new file mode 100644
index 000000000000..4cd8a25afe68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_loop.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "bpf_loop.skel.h"
+
+static void check_nr_loops(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* test 0 loops */
+ skel->bss->nr_loops = 0;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "0 loops");
+
+ /* test 500 loops */
+ skel->bss->nr_loops = 500;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "500 loops");
+ ASSERT_EQ(skel->bss->g_output, (500 * 499) / 2, "g_output");
+
+ /* test exceeding the max limit */
+ skel->bss->nr_loops = -1;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -E2BIG, "over max limit");
+
+ bpf_link__destroy(link);
+}
+
+static void check_callback_fn_stop(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ link = bpf_program__attach(skel->progs.test_prog);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ /* testing that loop is stopped when callback_fn returns 1 */
+ skel->bss->nr_loops = 400;
+ skel->data->stop_index = 50;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->data->stop_index + 1,
+ "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (50 * 49) / 2,
+ "g_output");
+
+ bpf_link__destroy(link);
+}
+
+static void check_null_callback_ctx(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that user is able to pass in a null callback_ctx */
+ link = bpf_program__attach(skel->progs.prog_null_ctx);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = 10;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, skel->bss->nr_loops,
+ "nr_loops_returned");
+
+ bpf_link__destroy(link);
+}
+
+static void check_invalid_flags(struct bpf_loop *skel)
+{
+ struct bpf_link *link;
+
+ /* check that passing in non-zero flags returns -EINVAL */
+ link = bpf_program__attach(skel->progs.prog_invalid_flags);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->err, -EINVAL, "err");
+
+ bpf_link__destroy(link);
+}
+
+static void check_nested_calls(struct bpf_loop *skel)
+{
+ __u32 nr_loops = 100, nested_callback_nr_loops = 4;
+ struct bpf_link *link;
+
+ /* check that nested calls are supported */
+ link = bpf_program__attach(skel->progs.prog_nested_calls);
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->nr_loops = nr_loops;
+ skel->bss->nested_callback_nr_loops = nested_callback_nr_loops;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->nr_loops_returned, nr_loops * nested_callback_nr_loops
+ * nested_callback_nr_loops, "nr_loops_returned");
+ ASSERT_EQ(skel->bss->g_output, (4 * 3) / 2 * nested_callback_nr_loops
+ * nr_loops, "g_output");
+
+ bpf_link__destroy(link);
+}
+
+static void check_non_constant_callback(struct bpf_loop *skel)
+{
+ struct bpf_link *link =
+ bpf_program__attach(skel->progs.prog_non_constant_callback);
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ skel->bss->callback_selector = 0x0F;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0x0F, "g_output #1");
+
+ skel->bss->callback_selector = 0xF0;
+ usleep(1);
+ ASSERT_EQ(skel->bss->g_output, 0xF0, "g_output #2");
+
+ bpf_link__destroy(link);
+}
+
+static void check_stack(struct bpf_loop *skel)
+{
+ struct bpf_link *link = bpf_program__attach(skel->progs.stack_check);
+ const int max_key = 12;
+ int key;
+ int map_fd;
+
+ if (!ASSERT_OK_PTR(link, "link"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.map1);
+
+ if (!ASSERT_GE(map_fd, 0, "bpf_map__fd"))
+ goto out;
+
+ for (key = 1; key <= max_key; ++key) {
+ int val = key;
+ int err = bpf_map_update_elem(map_fd, &key, &val, BPF_NOEXIST);
+
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ goto out;
+ }
+
+ usleep(1);
+
+ for (key = 1; key <= max_key; ++key) {
+ int val;
+ int err = bpf_map_lookup_elem(map_fd, &key, &val);
+
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val, key + 1, "bad value in the map"))
+ goto out;
+ }
+
+out:
+ bpf_link__destroy(link);
+}
+
+void test_bpf_loop(void)
+{
+ struct bpf_loop *skel;
+
+ skel = bpf_loop__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_loop__open_and_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (test__start_subtest("check_nr_loops"))
+ check_nr_loops(skel);
+ if (test__start_subtest("check_callback_fn_stop"))
+ check_callback_fn_stop(skel);
+ if (test__start_subtest("check_null_callback_ctx"))
+ check_null_callback_ctx(skel);
+ if (test__start_subtest("check_invalid_flags"))
+ check_invalid_flags(skel);
+ if (test__start_subtest("check_nested_calls"))
+ check_nested_calls(skel);
+ if (test__start_subtest("check_non_constant_callback"))
+ check_non_constant_callback(skel);
+ if (test__start_subtest("check_stack"))
+ check_stack(skel);
+
+ bpf_loop__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
new file mode 100644
index 000000000000..fe2c502e5089
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_mod_race.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <stdatomic.h>
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include <linux/module.h>
+#include <linux/userfaultfd.h>
+
+#include "ksym_race.skel.h"
+#include "bpf_mod_race.skel.h"
+#include "kfunc_call_race.skel.h"
+#include "testing_helpers.h"
+
+/* This test crafts a race between btf_try_get_module and do_init_module, and
+ * checks whether btf_try_get_module handles the invocation for a well-formed
+ * but uninitialized module correctly. Unless the module has completed its
+ * initcalls, the verifier should fail the program load and return ENXIO.
+ *
+ * userfaultfd is used to trigger a fault in an fmod_ret program, and make it
+ * sleep, then the BPF program is loaded and the return value from verifier is
+ * inspected. After this, the userfaultfd is closed so that the module loading
+ * thread makes forward progress, and fmod_ret injects an error so that the
+ * module load fails and it is freed.
+ *
+ * If the verifier succeeded in loading the supplied program, it will end up
+ * taking reference to freed module, and trigger a crash when the program fd
+ * is closed later. This is true for both kfuncs and ksyms. In both cases,
+ * the crash is triggered inside bpf_prog_free_deferred, when module reference
+ * is finally released.
+ */
+
+struct test_config {
+ const char *str_open;
+ void *(*bpf_open_and_load)();
+ void (*bpf_destroy)(void *);
+};
+
+enum bpf_test_state {
+ _TS_INVALID,
+ TS_MODULE_LOAD,
+ TS_MODULE_LOAD_FAIL,
+};
+
+static _Atomic enum bpf_test_state state = _TS_INVALID;
+
+static void *load_module_thread(void *p)
+{
+
+ if (!ASSERT_NEQ(load_bpf_testmod(false), 0, "load_module_thread must fail"))
+ atomic_store(&state, TS_MODULE_LOAD);
+ else
+ atomic_store(&state, TS_MODULE_LOAD_FAIL);
+ return p;
+}
+
+static int sys_userfaultfd(int flags)
+{
+ return syscall(__NR_userfaultfd, flags);
+}
+
+static int test_setup_uffd(void *fault_addr)
+{
+ struct uffdio_register uffd_register = {};
+ struct uffdio_api uffd_api = {};
+ int uffd;
+
+ uffd = sys_userfaultfd(O_CLOEXEC);
+ if (uffd < 0)
+ return -errno;
+
+ uffd_api.api = UFFD_API;
+ uffd_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffd_api)) {
+ close(uffd);
+ return -1;
+ }
+
+ uffd_register.range.start = (unsigned long)fault_addr;
+ uffd_register.range.len = 4096;
+ uffd_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffd_register)) {
+ close(uffd);
+ return -1;
+ }
+ return uffd;
+}
+
+static void test_bpf_mod_race_config(const struct test_config *config)
+{
+ void *fault_addr, *skel_fail;
+ struct bpf_mod_race *skel;
+ struct uffd_msg uffd_msg;
+ pthread_t load_mod_thrd;
+ _Atomic int *blockingp;
+ int uffd, ret;
+
+ fault_addr = mmap(0, 4096, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (!ASSERT_NEQ(fault_addr, MAP_FAILED, "mmap for uffd registration"))
+ return;
+
+ if (!ASSERT_OK(unload_bpf_testmod(false), "unload bpf_testmod"))
+ goto end_mmap;
+
+ skel = bpf_mod_race__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_mod_kfunc_race__open"))
+ goto end_module;
+
+ skel->rodata->bpf_mod_race_config.tgid = getpid();
+ skel->rodata->bpf_mod_race_config.inject_error = -4242;
+ skel->rodata->bpf_mod_race_config.fault_addr = fault_addr;
+ if (!ASSERT_OK(bpf_mod_race__load(skel), "bpf_mod___load"))
+ goto end_destroy;
+ blockingp = (_Atomic int *)&skel->bss->bpf_blocking;
+
+ if (!ASSERT_OK(bpf_mod_race__attach(skel), "bpf_mod_kfunc_race__attach"))
+ goto end_destroy;
+
+ uffd = test_setup_uffd(fault_addr);
+ if (!ASSERT_GE(uffd, 0, "userfaultfd open + register address"))
+ goto end_destroy;
+
+ if (!ASSERT_OK(pthread_create(&load_mod_thrd, NULL, load_module_thread, NULL),
+ "load module thread"))
+ goto end_uffd;
+
+ /* Now, we either fail loading module, or block in bpf prog, spin to find out */
+ while (!atomic_load(&state) && !atomic_load(blockingp))
+ ;
+ if (!ASSERT_EQ(state, _TS_INVALID, "module load should block"))
+ goto end_join;
+ if (!ASSERT_EQ(*blockingp, 1, "module load blocked")) {
+ pthread_kill(load_mod_thrd, SIGKILL);
+ goto end_uffd;
+ }
+
+ /* We might have set bpf_blocking to 1, but may have not blocked in
+ * bpf_copy_from_user. Read userfaultfd descriptor to verify that.
+ */
+ if (!ASSERT_EQ(read(uffd, &uffd_msg, sizeof(uffd_msg)), sizeof(uffd_msg),
+ "read uffd block event"))
+ goto end_join;
+ if (!ASSERT_EQ(uffd_msg.event, UFFD_EVENT_PAGEFAULT, "read uffd event is pagefault"))
+ goto end_join;
+
+ /* We know that load_mod_thrd is blocked in the fmod_ret program, the
+ * module state is still MODULE_STATE_COMING because mod->init hasn't
+ * returned. This is the time we try to load a program calling kfunc and
+ * check if we get ENXIO from verifier.
+ */
+ skel_fail = config->bpf_open_and_load();
+ ret = errno;
+ if (!ASSERT_EQ(skel_fail, NULL, config->str_open)) {
+ /* Close uffd to unblock load_mod_thrd */
+ close(uffd);
+ uffd = -1;
+ while (atomic_load(blockingp) != 2)
+ ;
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+ config->bpf_destroy(skel_fail);
+ goto end_join;
+
+ }
+ ASSERT_EQ(ret, ENXIO, "verifier returns ENXIO");
+ ASSERT_EQ(skel->data->res_try_get_module, false, "btf_try_get_module == false");
+
+ close(uffd);
+ uffd = -1;
+end_join:
+ pthread_join(load_mod_thrd, NULL);
+ if (uffd < 0)
+ ASSERT_EQ(atomic_load(&state), TS_MODULE_LOAD_FAIL, "load_mod_thrd success");
+end_uffd:
+ if (uffd >= 0)
+ close(uffd);
+end_destroy:
+ bpf_mod_race__destroy(skel);
+ ASSERT_OK(kern_sync_rcu(), "kern_sync_rcu");
+end_module:
+ unload_bpf_testmod(false);
+ ASSERT_OK(load_bpf_testmod(false), "restore bpf_testmod");
+end_mmap:
+ munmap(fault_addr, 4096);
+ atomic_store(&state, _TS_INVALID);
+}
+
+static const struct test_config ksym_config = {
+ .str_open = "ksym_race__open_and_load",
+ .bpf_open_and_load = (void *)ksym_race__open_and_load,
+ .bpf_destroy = (void *)ksym_race__destroy,
+};
+
+static const struct test_config kfunc_config = {
+ .str_open = "kfunc_call_race__open_and_load",
+ .bpf_open_and_load = (void *)kfunc_call_race__open_and_load,
+ .bpf_destroy = (void *)kfunc_call_race__destroy,
+};
+
+void serial_test_bpf_mod_race(void)
+{
+ if (test__start_subtest("ksym (used_btfs UAF)"))
+ test_bpf_mod_race_config(&ksym_config);
+ if (test__start_subtest("kfunc (kfunc_btf_tab UAF)"))
+ test_bpf_mod_race_config(&kfunc_config);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_nf.c b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
new file mode 100644
index 000000000000..b30ff6b3b81a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_nf.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <linux/netfilter/nf_conntrack_common.h>
+#include "test_bpf_nf.skel.h"
+#include "test_bpf_nf_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+ const char *prog_name;
+ const char *err_msg;
+} test_bpf_nf_fail_tests[] = {
+ { "alloc_release", "kernel function bpf_ct_release args#0 expected pointer to STRUCT nf_conn but" },
+ { "insert_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "lookup_insert", "kernel function bpf_ct_insert_entry args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_timeout_after_insert", "kernel function bpf_ct_set_timeout args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "set_status_after_insert", "kernel function bpf_ct_set_status args#0 expected pointer to STRUCT nf_conn___init but" },
+ { "change_timeout_after_alloc", "kernel function bpf_ct_change_timeout args#0 expected pointer to STRUCT nf_conn but" },
+ { "change_status_after_alloc", "kernel function bpf_ct_change_status args#0 expected pointer to STRUCT nf_conn but" },
+ { "write_not_allowlisted_field", "no write support to nf_conn at off" },
+};
+
+enum {
+ TEST_XDP,
+ TEST_TC_BPF,
+};
+
+#define TIMEOUT_MS 3000
+#define IPS_STATUS_MASK (IPS_CONFIRMED | IPS_SEEN_REPLY | \
+ IPS_SRC_NAT_DONE | IPS_DST_NAT_DONE | \
+ IPS_SRC_NAT | IPS_DST_NAT)
+
+static int connect_to_server(int srv_fd)
+{
+ int fd = -1;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto out;
+
+ if (!ASSERT_EQ(connect_fd_to_fd(fd, srv_fd, TIMEOUT_MS), 0, "connect_fd_to_fd")) {
+ close(fd);
+ fd = -1;
+ }
+out:
+ return fd;
+}
+
+static void test_bpf_nf_ct(int mode)
+{
+ const char *iptables = "iptables-legacy -t raw %s PREROUTING -j CONNMARK --set-mark 42/0";
+ int srv_fd = -1, client_fd = -1, srv_client_fd = -1;
+ struct sockaddr_in peer_addr = {};
+ struct test_bpf_nf *skel;
+ int prog_fd, err;
+ socklen_t len;
+ u16 srv_port;
+ char cmd[128];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = test_bpf_nf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf__open_and_load"))
+ return;
+
+ /* Enable connection tracking */
+ snprintf(cmd, sizeof(cmd), iptables, "-A");
+ if (!ASSERT_OK(system(cmd), cmd))
+ goto end;
+
+ srv_port = (mode == TEST_XDP) ? 5005 : 5006;
+ srv_fd = start_server(AF_INET, SOCK_STREAM, "127.0.0.1", srv_port, TIMEOUT_MS);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto end;
+
+ client_fd = connect_to_server(srv_fd);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
+ goto end;
+
+ len = sizeof(peer_addr);
+ srv_client_fd = accept(srv_fd, (struct sockaddr *)&peer_addr, &len);
+ if (!ASSERT_GE(srv_client_fd, 0, "accept"))
+ goto end;
+ if (!ASSERT_EQ(len, sizeof(struct sockaddr_in), "sockaddr len"))
+ goto end;
+
+ skel->bss->saddr = peer_addr.sin_addr.s_addr;
+ skel->bss->sport = peer_addr.sin_port;
+ skel->bss->daddr = peer_addr.sin_addr.s_addr;
+ skel->bss->dport = htons(srv_port);
+
+ if (mode == TEST_XDP)
+ prog_fd = bpf_program__fd(skel->progs.nf_xdp_ct_test);
+ else
+ prog_fd = bpf_program__fd(skel->progs.nf_skb_ct_test);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto end;
+
+ ASSERT_EQ(skel->bss->test_einval_bpf_tuple, -EINVAL, "Test EINVAL for NULL bpf_tuple");
+ ASSERT_EQ(skel->bss->test_einval_reserved, -EINVAL, "Test EINVAL for reserved not set to 0");
+ ASSERT_EQ(skel->bss->test_einval_netns_id, -EINVAL, "Test EINVAL for netns_id < -1");
+ ASSERT_EQ(skel->bss->test_einval_len_opts, -EINVAL, "Test EINVAL for len__opts != NF_BPF_CT_OPTS_SZ");
+ ASSERT_EQ(skel->bss->test_eproto_l4proto, -EPROTO, "Test EPROTO for l4proto != TCP or UDP");
+ ASSERT_EQ(skel->bss->test_enonet_netns_id, -ENONET, "Test ENONET for bad but valid netns_id");
+ ASSERT_EQ(skel->bss->test_enoent_lookup, -ENOENT, "Test ENOENT for failed lookup");
+ ASSERT_EQ(skel->bss->test_eafnosupport, -EAFNOSUPPORT, "Test EAFNOSUPPORT for invalid len__tuple");
+ ASSERT_EQ(skel->data->test_alloc_entry, 0, "Test for alloc new entry");
+ ASSERT_EQ(skel->data->test_insert_entry, 0, "Test for insert new entry");
+ ASSERT_EQ(skel->data->test_succ_lookup, 0, "Test for successful lookup");
+ /* allow some tolerance for test_delta_timeout value to avoid races. */
+ ASSERT_GT(skel->bss->test_delta_timeout, 8, "Test for min ct timeout update");
+ ASSERT_LE(skel->bss->test_delta_timeout, 10, "Test for max ct timeout update");
+ ASSERT_EQ(skel->bss->test_insert_lookup_mark, 77, "Test for insert and lookup mark value");
+ ASSERT_EQ(skel->bss->test_status, IPS_STATUS_MASK, "Test for ct status update ");
+ ASSERT_EQ(skel->data->test_exist_lookup, 0, "Test existing connection lookup");
+ ASSERT_EQ(skel->bss->test_exist_lookup_mark, 43, "Test existing connection lookup ctmark");
+ ASSERT_EQ(skel->data->test_snat_addr, 0, "Test for source natting");
+ ASSERT_EQ(skel->data->test_dnat_addr, 0, "Test for destination natting");
+end:
+ if (client_fd != -1)
+ close(client_fd);
+ if (srv_client_fd != -1)
+ close(srv_client_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+
+ snprintf(cmd, sizeof(cmd), iptables, "-D");
+ system(cmd);
+ test_bpf_nf__destroy(skel);
+}
+
+static void test_bpf_nf_ct_fail(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_bpf_nf_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_bpf_nf_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_bpf_nf_fail__open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_bpf_nf_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_bpf_nf_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_bpf_nf_fail__destroy(skel);
+}
+
+void test_bpf_nf(void)
+{
+ int i;
+ if (test__start_subtest("xdp-ct"))
+ test_bpf_nf_ct(TEST_XDP);
+ if (test__start_subtest("tc-bpf-ct"))
+ test_bpf_nf_ct(TEST_TC_BPF);
+ for (i = 0; i < ARRAY_SIZE(test_bpf_nf_fail_tests); i++) {
+ if (test__start_subtest(test_bpf_nf_fail_tests[i].prog_name))
+ test_bpf_nf_ct_fail(test_bpf_nf_fail_tests[i].prog_name,
+ test_bpf_nf_fail_tests[i].err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index 7afa4160416f..f09d6ac2ef09 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -3,11 +3,11 @@
#define nr_iters 2
-void test_bpf_obj_id(void)
+void serial_test_bpf_obj_id(void)
{
const __u64 array_magic_value = 0xfaceb00c;
const __u32 array_key = 0;
- const char *file = "./test_obj_id.o";
+ const char *file = "./test_obj_id.bpf.o";
const char *expected_prog_name = "test_obj_id";
const char *expected_map_name = "test_map_id";
const __u64 nsec_per_sec = 1000000000;
@@ -25,7 +25,7 @@ void test_bpf_obj_id(void)
*/
__u32 map_ids[nr_iters + 1];
char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
- __u32 i, next_id, info_len, nr_id_found, duration = 0;
+ __u32 i, next_id, info_len, nr_id_found;
struct timespec real_time_ts, boot_time_ts;
int err = 0;
__u64 array_value;
@@ -33,45 +33,46 @@ void test_bpf_obj_id(void)
time_t now, load_time;
err = bpf_prog_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-prog-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_prog_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_prog_get_fd_by_id");
err = bpf_map_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
err = bpf_link_get_fd_by_id(0);
- CHECK(err >= 0 || errno != ENOENT,
- "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
+ ASSERT_LT(err, 0, "bpf_map_get_fd_by_id");
+ ASSERT_EQ(errno, ENOENT, "bpf_map_get_fd_by_id");
- /* Check bpf_obj_get_info_by_fd() */
+ /* Check bpf_map_get_info_by_fd() */
bzero(zeros, sizeof(zeros));
for (i = 0; i < nr_iters; i++) {
now = time(NULL);
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
&objs[i], &prog_fds[i]);
/* test_obj_id.o is a dumb prog. It should never fail
* to load.
*/
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_prog_test_load"))
continue;
/* Insert a magic value to the map */
map_fds[i] = bpf_find_map(__func__, objs[i], "test_map_id");
- if (CHECK_FAIL(map_fds[i] < 0))
+ if (!ASSERT_GE(map_fds[i], 0, "bpf_find_map"))
goto done;
+
err = bpf_map_update_elem(map_fds[i], &array_key,
&array_magic_value, 0);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto done;
- prog = bpf_object__find_program_by_title(objs[i],
- "raw_tp/sys_enter");
- if (CHECK_FAIL(!prog))
+ prog = bpf_object__find_program_by_name(objs[i], "test_obj_id");
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
goto done;
+
links[i] = bpf_program__attach(prog);
err = libbpf_get_error(links[i]);
- if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+ if (!ASSERT_OK(err, "bpf_program__attach")) {
links[i] = NULL;
goto done;
}
@@ -79,26 +80,16 @@ void test_bpf_obj_id(void)
/* Check getting map info */
info_len = sizeof(struct bpf_map_info) * 2;
bzero(&map_infos[i], info_len);
- err = bpf_obj_get_info_by_fd(map_fds[i], &map_infos[i],
+ err = bpf_map_get_info_by_fd(map_fds[i], &map_infos[i],
&info_len);
- if (CHECK(err ||
- map_infos[i].type != BPF_MAP_TYPE_ARRAY ||
- map_infos[i].key_size != sizeof(__u32) ||
- map_infos[i].value_size != sizeof(__u64) ||
- map_infos[i].max_entries != 1 ||
- map_infos[i].map_flags != 0 ||
- info_len != sizeof(struct bpf_map_info) ||
- strcmp((char *)map_infos[i].name, expected_map_name),
- "get-map-info(fd)",
- "err %d errno %d type %d(%d) info_len %u(%zu) key_size %u value_size %u max_entries %u map_flags %X name %s(%s)\n",
- err, errno,
- map_infos[i].type, BPF_MAP_TYPE_ARRAY,
- info_len, sizeof(struct bpf_map_info),
- map_infos[i].key_size,
- map_infos[i].value_size,
- map_infos[i].max_entries,
- map_infos[i].map_flags,
- map_infos[i].name, expected_map_name))
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd") ||
+ !ASSERT_EQ(map_infos[i].type, BPF_MAP_TYPE_ARRAY, "map_type") ||
+ !ASSERT_EQ(map_infos[i].key_size, sizeof(__u32), "key_size") ||
+ !ASSERT_EQ(map_infos[i].value_size, sizeof(__u64), "value_size") ||
+ !ASSERT_EQ(map_infos[i].max_entries, 1, "max_entries") ||
+ !ASSERT_EQ(map_infos[i].map_flags, 0, "map_flags") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "map_info_len") ||
+ !ASSERT_STREQ((char *)map_infos[i].name, expected_map_name, "map_name"))
goto done;
/* Check getting prog info */
@@ -112,76 +103,49 @@ void test_bpf_obj_id(void)
prog_infos[i].xlated_prog_len = sizeof(xlated_insns);
prog_infos[i].map_ids = ptr_to_u64(map_ids + i);
prog_infos[i].nr_map_ids = 2;
+
err = clock_gettime(CLOCK_REALTIME, &real_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
+
err = clock_gettime(CLOCK_BOOTTIME, &boot_time_ts);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "clock_gettime"))
goto done;
- err = bpf_obj_get_info_by_fd(prog_fds[i], &prog_infos[i],
- &info_len);
+
+ err = bpf_prog_get_info_by_fd(prog_fds[i], &prog_infos[i],
+ &info_len);
load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
+ (prog_infos[i].load_time / nsec_per_sec);
- if (CHECK(err ||
- prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
- info_len != sizeof(struct bpf_prog_info) ||
- (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
- (env.jit_enabled &&
- !memcmp(jited_insns, zeros, sizeof(zeros))) ||
- !prog_infos[i].xlated_prog_len ||
- !memcmp(xlated_insns, zeros, sizeof(zeros)) ||
- load_time < now - 60 || load_time > now + 60 ||
- prog_infos[i].created_by_uid != my_uid ||
- prog_infos[i].nr_map_ids != 1 ||
- *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
- strcmp((char *)prog_infos[i].name, expected_prog_name),
- "get-prog-info(fd)",
- "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
- "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
- "jited_prog %d xlated_prog %d load_time %lu(%lu) "
- "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
- "name %s(%s)\n",
- err, errno, i,
- prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
- info_len, sizeof(struct bpf_prog_info),
- env.jit_enabled,
- prog_infos[i].jited_prog_len,
- prog_infos[i].xlated_prog_len,
- !!memcmp(jited_insns, zeros, sizeof(zeros)),
- !!memcmp(xlated_insns, zeros, sizeof(zeros)),
- load_time, now,
- prog_infos[i].created_by_uid, my_uid,
- prog_infos[i].nr_map_ids, 1,
- *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
- prog_infos[i].name, expected_prog_name))
+
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(prog_infos[i].type, BPF_PROG_TYPE_RAW_TRACEPOINT, "prog_type") ||
+ !ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !prog_infos[i].jited_prog_len), "jited_prog_len") ||
+ !ASSERT_FALSE((env.jit_enabled && !memcmp(jited_insns, zeros, sizeof(zeros))),
+ "jited_insns") ||
+ !ASSERT_NEQ(prog_infos[i].xlated_prog_len, 0, "xlated_prog_len") ||
+ !ASSERT_NEQ(memcmp(xlated_insns, zeros, sizeof(zeros)), 0, "xlated_insns") ||
+ !ASSERT_GE(load_time, (now - 60), "load_time") ||
+ !ASSERT_LE(load_time, (now + 60), "load_time") ||
+ !ASSERT_EQ(prog_infos[i].created_by_uid, my_uid, "created_by_uid") ||
+ !ASSERT_EQ(prog_infos[i].nr_map_ids, 1, "nr_map_ids") ||
+ !ASSERT_EQ(*(int *)(long)prog_infos[i].map_ids, map_infos[i].id, "map_ids") ||
+ !ASSERT_STREQ((char *)prog_infos[i].name, expected_prog_name, "prog_name"))
goto done;
/* Check getting link info */
info_len = sizeof(struct bpf_link_info) * 2;
bzero(&link_infos[i], info_len);
- link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+ link_infos[i].raw_tracepoint.tp_name = ptr_to_u64(&tp_name);
link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
- err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
- &link_infos[i], &info_len);
- if (CHECK(err ||
- link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
- link_infos[i].prog_id != prog_infos[i].id ||
- link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
- strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
- "sys_enter") ||
- info_len != sizeof(struct bpf_link_info),
- "get-link-info(fd)",
- "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
- "prog_id %d (%d) tp_name %s(%s)\n",
- err, errno,
- info_len, sizeof(struct bpf_link_info),
- link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
- link_infos[i].id,
- link_infos[i].prog_id, prog_infos[i].id,
- (char *)link_infos[i].raw_tracepoint.tp_name,
- "sys_enter"))
+ err = bpf_link_get_info_by_fd(bpf_link__fd(links[i]),
+ &link_infos[i], &info_len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd") ||
+ !ASSERT_EQ(link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT, "link_type") ||
+ !ASSERT_EQ(link_infos[i].prog_id, prog_infos[i].id, "prog_id") ||
+ !ASSERT_EQ(link_infos[i].raw_tracepoint.tp_name, ptr_to_u64(&tp_name), "&tp_name") ||
+ !ASSERT_STREQ(u64_to_ptr(link_infos[i].raw_tracepoint.tp_name), "sys_enter", "tp_name"))
goto done;
-
}
/* Check bpf_prog_get_next_id() */
@@ -190,7 +154,7 @@ void test_bpf_obj_id(void)
while (!bpf_prog_get_next_id(next_id, &next_id)) {
struct bpf_prog_info prog_info = {};
__u32 saved_map_id;
- int prog_fd;
+ int prog_fd, cmp_res;
info_len = sizeof(prog_info);
@@ -198,9 +162,7 @@ void test_bpf_obj_id(void)
if (prog_fd < 0 && errno == ENOENT)
/* The bpf_prog is in the dead row */
continue;
- if (CHECK(prog_fd < 0, "get-prog-fd(next_id)",
- "prog_fd %d next_id %d errno %d\n",
- prog_fd, next_id, errno))
+ if (!ASSERT_GE(prog_fd, 0, "bpf_prog_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -217,10 +179,9 @@ void test_bpf_obj_id(void)
* prog_info.map_ids = NULL
*/
prog_info.nr_map_ids = 1;
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
- if (CHECK(!err || errno != EFAULT,
- "get-prog-fd-bad-nr-map-ids", "err %d errno %d(%d)",
- err, errno, EFAULT))
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (!ASSERT_ERR(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(errno, EFAULT, "bpf_prog_get_info_by_fd"))
break;
bzero(&prog_info, sizeof(prog_info));
info_len = sizeof(prog_info);
@@ -228,30 +189,25 @@ void test_bpf_obj_id(void)
saved_map_id = *(int *)((long)prog_infos[i].map_ids);
prog_info.map_ids = prog_infos[i].map_ids;
prog_info.nr_map_ids = 2;
- err = bpf_obj_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
prog_infos[i].jited_prog_insns = 0;
prog_infos[i].xlated_prog_insns = 0;
- CHECK(err || info_len != sizeof(struct bpf_prog_info) ||
- memcmp(&prog_info, &prog_infos[i], info_len) ||
- *(int *)(long)prog_info.map_ids != saved_map_id,
- "get-prog-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d map_id %u(%u)\n",
- err, errno, info_len, sizeof(struct bpf_prog_info),
- memcmp(&prog_info, &prog_infos[i], info_len),
- *(int *)(long)prog_info.map_ids, saved_map_id);
+ cmp_res = memcmp(&prog_info, &prog_infos[i], info_len);
+
+ ASSERT_OK(err, "bpf_prog_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_prog_info), "prog_info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(*(int *)(long)prog_info.map_ids, saved_map_id, "map_id");
close(prog_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total prog id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "prog_nr_id_found");
/* Check bpf_map_get_next_id() */
nr_id_found = 0;
next_id = 0;
while (!bpf_map_get_next_id(next_id, &next_id)) {
struct bpf_map_info map_info = {};
- int map_fd;
+ int map_fd, cmp_res;
info_len = sizeof(map_info);
@@ -259,9 +215,7 @@ void test_bpf_obj_id(void)
if (map_fd < 0 && errno == ENOENT)
/* The bpf_map is in the dead row */
continue;
- if (CHECK(map_fd < 0, "get-map-fd(next_id)",
- "map_fd %d next_id %u errno %d\n",
- map_fd, next_id, errno))
+ if (!ASSERT_GE(map_fd, 0, "bpf_map_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -274,25 +228,19 @@ void test_bpf_obj_id(void)
nr_id_found++;
err = bpf_map_lookup_elem(map_fd, &array_key, &array_value);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
goto done;
- err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
- CHECK(err || info_len != sizeof(struct bpf_map_info) ||
- memcmp(&map_info, &map_infos[i], info_len) ||
- array_value != array_magic_value,
- "check get-map-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d array_value %llu(%llu)\n",
- err, errno, info_len, sizeof(struct bpf_map_info),
- memcmp(&map_info, &map_infos[i], info_len),
- array_value, array_magic_value);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ cmp_res = memcmp(&map_info, &map_infos[i], info_len);
+ ASSERT_OK(err, "bpf_map_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(struct bpf_map_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
+ ASSERT_EQ(array_value, array_magic_value, "array_value");
close(map_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total map id found by get_next_id",
- "nr_id_found %u(%u)\n",
- nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "map_nr_id_found");
/* Check bpf_link_get_next_id() */
nr_id_found = 0;
@@ -308,9 +256,7 @@ void test_bpf_obj_id(void)
if (link_fd < 0 && errno == ENOENT)
/* The bpf_link is in the dead row */
continue;
- if (CHECK(link_fd < 0, "get-link-fd(next_id)",
- "link_fd %d next_id %u errno %d\n",
- link_fd, next_id, errno))
+ if (!ASSERT_GE(link_fd, 0, "bpf_link_get_fd_by_id"))
break;
for (i = 0; i < nr_iters; i++)
@@ -322,20 +268,16 @@ void test_bpf_obj_id(void)
nr_id_found++;
- err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+ err = bpf_link_get_info_by_fd(link_fd, &link_info, &info_len);
cmp_res = memcmp(&link_info, &link_infos[i],
offsetof(struct bpf_link_info, raw_tracepoint));
- CHECK(err || info_len != sizeof(link_info) || cmp_res,
- "check get-link-info(next_id->fd)",
- "err %d errno %d info_len %u(%zu) memcmp %d\n",
- err, errno, info_len, sizeof(struct bpf_link_info),
- cmp_res);
+ ASSERT_OK(err, "bpf_link_get_info_by_fd");
+ ASSERT_EQ(info_len, sizeof(link_info), "info_len");
+ ASSERT_OK(cmp_res, "memcmp");
close(link_fd);
}
- CHECK(nr_id_found != nr_iters,
- "check total link id found by get_next_id",
- "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+ ASSERT_EQ(nr_id_found, nr_iters, "link_nr_id_found");
done:
for (i = 0; i < nr_iters; i++) {
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
new file mode 100644
index 000000000000..ee0458a5ce78
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_pinning.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include "bpf/libbpf_internal.h"
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+__attribute__((unused))
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned int ms_flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, ms_flags);
+}
+
+static void bpf_obj_pinning_detached(void)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ int fs_fd = -1, mnt_fd = -1;
+ int map_fd = -1, map_fd2 = -1;
+ int zero = 0, src_value, dst_value, err;
+ const char *map_name = "fsmount_map";
+
+ /* A bunch of below UAPI calls are constructed based on reading:
+ * https://brauner.io/2023/02/28/mounting-into-mount-namespaces.html
+ */
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ if (!ASSERT_GE(fs_fd, 0, "fs_fd"))
+ goto cleanup;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (!ASSERT_OK(err, "fs_create"))
+ goto cleanup;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (!ASSERT_GE(mnt_fd, 0, "mnt_fd"))
+ goto cleanup;
+
+ /* If we wanted to expose detached mount in the file system, we'd do
+ * something like below. But the whole point is that we actually don't
+ * even have to expose BPF FS in the file system to be able to work
+ * (pin/get objects) with it.
+ *
+ * err = sys_move_mount(mnt_fd, "", -EBADF, mnt_path, MOVE_MOUNT_F_EMPTY_PATH);
+ * if (!ASSERT_OK(err, "move_mount"))
+ * goto cleanup;
+ */
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ goto cleanup;
+
+ /* pin BPF map into detached BPF FS through mnt_fd */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = mnt_fd;
+ err = bpf_obj_pin_opts(map_fd, map_name, &pin_opts);
+ if (!ASSERT_OK(err, "map_pin"))
+ goto cleanup;
+
+ /* get BPF map from detached BPF FS through mnt_fd */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = mnt_fd;
+ map_fd2 = bpf_obj_get_opts(map_name, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ src_value = 0xcafebeef;
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq1");
+ ASSERT_EQ(dst_value, 0xcafebeef, "map_value_eq2");
+
+cleanup:
+ if (map_fd >= 0)
+ ASSERT_OK(close(map_fd), "close_map_fd");
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ if (fs_fd >= 0)
+ ASSERT_OK(close(fs_fd), "close_fs_fd");
+ if (mnt_fd >= 0)
+ ASSERT_OK(close(mnt_fd), "close_mnt_fd");
+}
+
+enum path_kind
+{
+ PATH_STR_ABS,
+ PATH_STR_REL,
+ PATH_FD_REL,
+};
+
+static void validate_pin(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_pin_opts, pin_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ pin_opts.file_flags = BPF_F_PATH_FD;
+ pin_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(pin_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin_opts(map_fd, pin_path, &pin_opts);
+ ASSERT_OK(err, "obj_pin");
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && pin_opts.path_fd >= 0)
+ close(pin_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ map_fd2 = bpf_obj_get(abs_path);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void validate_get(int map_fd, const char *map_name, int src_value,
+ enum path_kind path_kind)
+{
+ LIBBPF_OPTS(bpf_obj_get_opts, get_opts);
+ char abs_path[PATH_MAX], old_cwd[PATH_MAX];
+ const char *pin_path = NULL;
+ int zero = 0, dst_value, map_fd2, err;
+
+ snprintf(abs_path, sizeof(abs_path), "/sys/fs/bpf/%s", map_name);
+ /* pin BPF map using specified path definition */
+ err = bpf_obj_pin(map_fd, abs_path);
+ if (!ASSERT_OK(err, "pin_map"))
+ return;
+
+ old_cwd[0] = '\0';
+
+ switch (path_kind) {
+ case PATH_STR_ABS:
+ /* absolute path */
+ pin_path = abs_path;
+ break;
+ case PATH_STR_REL:
+ /* cwd + relative path */
+ ASSERT_OK_PTR(getcwd(old_cwd, sizeof(old_cwd)), "getcwd");
+ ASSERT_OK(chdir("/sys/fs/bpf"), "chdir");
+ pin_path = map_name;
+ break;
+ case PATH_FD_REL:
+ /* dir fd + relative path */
+ get_opts.file_flags = BPF_F_PATH_FD;
+ get_opts.path_fd = open("/sys/fs/bpf", O_PATH);
+ ASSERT_GE(get_opts.path_fd, 0, "path_fd");
+ pin_path = map_name;
+ break;
+ }
+
+ map_fd2 = bpf_obj_get_opts(pin_path, &get_opts);
+ if (!ASSERT_GE(map_fd2, 0, "map_get"))
+ goto cleanup;
+
+ /* cleanup */
+ if (path_kind == PATH_FD_REL && get_opts.path_fd >= 0)
+ close(get_opts.path_fd);
+ if (old_cwd[0])
+ ASSERT_OK(chdir(old_cwd), "restore_cwd");
+
+ /* update map through one FD */
+ err = bpf_map_update_elem(map_fd, &zero, &src_value, 0);
+ ASSERT_OK(err, "map_update");
+
+ /* check values written/read through different FDs do match */
+ dst_value = 0;
+ err = bpf_map_lookup_elem(map_fd2, &zero, &dst_value);
+ ASSERT_OK(err, "map_lookup");
+ ASSERT_EQ(dst_value, src_value, "map_value_eq");
+cleanup:
+ if (map_fd2 >= 0)
+ ASSERT_OK(close(map_fd2), "close_map_fd2");
+ unlink(abs_path);
+}
+
+static void bpf_obj_pinning_mounted(enum path_kind path_kind)
+{
+ const char *map_name = "mounted_map";
+ int map_fd;
+
+ /* create BPF map to pin */
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, map_name, 4, 4, 1, NULL);
+ if (!ASSERT_GE(map_fd, 0, "map_fd"))
+ return;
+
+ validate_pin(map_fd, map_name, 100 + (int)path_kind, path_kind);
+ validate_get(map_fd, map_name, 200 + (int)path_kind, path_kind);
+ ASSERT_OK(close(map_fd), "close_map_fd");
+}
+
+void test_bpf_obj_pinning()
+{
+ if (test__start_subtest("detached"))
+ bpf_obj_pinning_detached();
+ if (test__start_subtest("mounted-str-abs"))
+ bpf_obj_pinning_mounted(PATH_STR_ABS);
+ if (test__start_subtest("mounted-str-rel"))
+ bpf_obj_pinning_mounted(PATH_STR_REL);
+ if (test__start_subtest("mounted-fd-rel"))
+ bpf_obj_pinning_mounted(PATH_FD_REL);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
index 9a8f47fc0b91..a88e6e07e4f5 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_tcp_ca.c
@@ -2,44 +2,32 @@
/* Copyright (c) 2019 Facebook */
#include <linux/err.h>
+#include <netinet/tcp.h>
#include <test_progs.h>
+#include "network_helpers.h"
#include "bpf_dctcp.skel.h"
#include "bpf_cubic.skel.h"
+#include "bpf_tcp_nogpl.skel.h"
+#include "tcp_ca_update.skel.h"
+#include "bpf_dctcp_release.skel.h"
+#include "tcp_ca_write_sk_pacing.skel.h"
+#include "tcp_ca_incompl_cong_ops.skel.h"
+#include "tcp_ca_unsupp_cong_op.skel.h"
-#define min(a, b) ((a) < (b) ? (a) : (b))
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
static const unsigned int total_bytes = 10 * 1024 * 1024;
-static const struct timeval timeo_sec = { .tv_sec = 10 };
-static const size_t timeo_optlen = sizeof(timeo_sec);
static int expected_stg = 0xeB9F;
-static int stop, duration;
-
-static int settimeo(int fd)
-{
- int err;
-
- err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_RCVTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo_sec,
- timeo_optlen);
- if (CHECK(err == -1, "setsockopt(fd, SO_SNDTIMEO)", "errno:%d\n",
- errno))
- return -1;
-
- return 0;
-}
+static int stop;
static int settcpca(int fd, const char *tcp_ca)
{
int err;
err = setsockopt(fd, IPPROTO_TCP, TCP_CONGESTION, tcp_ca, strlen(tcp_ca));
- if (CHECK(err == -1, "setsockopt(fd, TCP_CONGESTION)", "errno:%d\n",
- errno))
+ if (!ASSERT_NEQ(err, -1, "setsockopt"))
return -1;
return 0;
@@ -59,14 +47,14 @@ static void *server(void *arg)
goto done;
}
- if (settimeo(fd)) {
+ if (settimeo(fd, 0)) {
err = -errno;
goto done;
}
while (bytes < total_bytes && !READ_ONCE(stop)) {
nr_sent = send(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
if (nr_sent == -1 && errno == EINTR)
continue;
if (nr_sent == -1) {
@@ -76,11 +64,10 @@ static void *server(void *arg)
bytes += nr_sent;
}
- CHECK(bytes != total_bytes, "send", "%zd != %u nr_sent:%zd errno:%d\n",
- bytes, total_bytes, nr_sent, errno);
+ ASSERT_EQ(bytes, total_bytes, "send");
done:
- if (fd != -1)
+ if (fd >= 0)
close(fd);
if (err) {
WRITE_ONCE(stop, 1);
@@ -103,42 +90,44 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
WRITE_ONCE(stop, 0);
lfd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(lfd == -1, "socket", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(lfd, -1, "socket"))
return;
+
fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (CHECK(fd == -1, "socket", "errno:%d\n", errno)) {
+ if (!ASSERT_NEQ(fd, -1, "socket")) {
close(lfd);
return;
}
if (settcpca(lfd, tcp_ca) || settcpca(fd, tcp_ca) ||
- settimeo(lfd) || settimeo(fd))
+ settimeo(lfd, 0) || settimeo(fd, 0))
goto done;
/* bind, listen and start server thread to accept */
sa6.sin6_family = AF_INET6;
sa6.sin6_addr = in6addr_loopback;
err = bind(lfd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "bind", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "bind"))
goto done;
+
err = getsockname(lfd, (struct sockaddr *)&sa6, &addrlen);
- if (CHECK(err == -1, "getsockname", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "getsockname"))
goto done;
+
err = listen(lfd, 1);
- if (CHECK(err == -1, "listen", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "listen"))
goto done;
if (sk_stg_map) {
err = bpf_map_update_elem(bpf_map__fd(sk_stg_map), &fd,
&expected_stg, BPF_NOEXIST);
- if (CHECK(err, "bpf_map_update_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "bpf_map_update_elem(sk_stg_map)"))
goto done;
}
/* connect to server */
err = connect(fd, (struct sockaddr *)&sa6, addrlen);
- if (CHECK(err == -1, "connect", "errno:%d\n", errno))
+ if (!ASSERT_NEQ(err, -1, "connect"))
goto done;
if (sk_stg_map) {
@@ -146,20 +135,19 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
err = bpf_map_lookup_elem(bpf_map__fd(sk_stg_map), &fd,
&tmp_stg);
- if (CHECK(!err || errno != ENOENT,
- "bpf_map_lookup_elem(sk_stg_map)",
- "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem(sk_stg_map)") ||
+ !ASSERT_EQ(errno, ENOENT, "bpf_map_lookup_elem(sk_stg_map)"))
goto done;
}
err = pthread_create(&srv_thread, NULL, server, (void *)(long)lfd);
- if (CHECK(err != 0, "pthread_create", "err:%d errno:%d\n", err, errno))
+ if (!ASSERT_OK(err, "pthread_create"))
goto done;
/* recv total_bytes */
while (bytes < total_bytes && !READ_ONCE(stop)) {
nr_recv = recv(fd, &batch,
- min(total_bytes - bytes, sizeof(batch)), 0);
+ MIN(total_bytes - bytes, sizeof(batch)), 0);
if (nr_recv == -1 && errno == EINTR)
continue;
if (nr_recv == -1)
@@ -167,13 +155,12 @@ static void do_test(const char *tcp_ca, const struct bpf_map *sk_stg_map)
bytes += nr_recv;
}
- CHECK(bytes != total_bytes, "recv", "%zd != %u nr_recv:%zd errno:%d\n",
- bytes, total_bytes, nr_recv, errno);
+ ASSERT_EQ(bytes, total_bytes, "recv");
WRITE_ONCE(stop, 1);
pthread_join(srv_thread, &thread_ret);
- CHECK(IS_ERR(thread_ret), "pthread_join", "thread_ret:%ld",
- PTR_ERR(thread_ret));
+ ASSERT_OK(IS_ERR(thread_ret), "thread_ret");
+
done:
close(lfd);
close(fd);
@@ -185,18 +172,19 @@ static void test_cubic(void)
struct bpf_link *link;
cubic_skel = bpf_cubic__open_and_load();
- if (CHECK(!cubic_skel, "bpf_cubic__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(cubic_skel, "bpf_cubic__open_and_load"))
return;
link = bpf_map__attach_struct_ops(cubic_skel->maps.cubic);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_cubic__destroy(cubic_skel);
return;
}
do_test("bpf_cubic", NULL);
+ ASSERT_EQ(cubic_skel->bss->bpf_cubic_acked_called, 1, "pkts_acked called");
+
bpf_link__destroy(link);
bpf_cubic__destroy(cubic_skel);
}
@@ -207,23 +195,338 @@ static void test_dctcp(void)
struct bpf_link *link;
dctcp_skel = bpf_dctcp__open_and_load();
- if (CHECK(!dctcp_skel, "bpf_dctcp__open_and_load", "failed\n"))
+ if (!ASSERT_OK_PTR(dctcp_skel, "bpf_dctcp__open_and_load"))
return;
link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
- if (CHECK(IS_ERR(link), "bpf_map__attach_struct_ops", "err:%ld\n",
- PTR_ERR(link))) {
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops")) {
bpf_dctcp__destroy(dctcp_skel);
return;
}
do_test("bpf_dctcp", dctcp_skel->maps.sk_stg_map);
- CHECK(dctcp_skel->bss->stg_result != expected_stg,
- "Unexpected stg_result", "stg_result (%x) != expected_stg (%x)\n",
- dctcp_skel->bss->stg_result, expected_stg);
+ ASSERT_EQ(dctcp_skel->bss->stg_result, expected_stg, "stg_result");
+
+ bpf_link__destroy(link);
+ bpf_dctcp__destroy(dctcp_skel);
+}
+
+static char *err_str;
+static bool found;
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ const char *prog_name, *log_buf;
+
+ if (level != LIBBPF_WARN ||
+ !strstr(format, "-- BEGIN PROG LOAD LOG --")) {
+ vprintf(format, args);
+ return 0;
+ }
+
+ prog_name = va_arg(args, char *);
+ log_buf = va_arg(args, char *);
+ if (!log_buf)
+ goto out;
+ if (err_str && strstr(log_buf, err_str) != NULL)
+ found = true;
+out:
+ printf(format, prog_name, log_buf);
+ return 0;
+}
+
+static void test_invalid_license(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct bpf_tcp_nogpl *skel;
+
+ err_str = "struct ops programs must have a GPL compatible license";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = bpf_tcp_nogpl__open_and_load();
+ ASSERT_NULL(skel, "bpf_tcp_nogpl");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ bpf_tcp_nogpl__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
+static void test_dctcp_fallback(void)
+{
+ int err, lfd = -1, cli_fd = -1, srv_fd = -1;
+ struct network_helper_opts opts = {
+ .cc = "cubic",
+ };
+ struct bpf_dctcp *dctcp_skel;
+ struct bpf_link *link = NULL;
+ char srv_cc[16];
+ socklen_t cc_len = sizeof(srv_cc);
+ dctcp_skel = bpf_dctcp__open();
+ if (!ASSERT_OK_PTR(dctcp_skel, "dctcp_skel"))
+ return;
+ strcpy(dctcp_skel->rodata->fallback, "cubic");
+ if (!ASSERT_OK(bpf_dctcp__load(dctcp_skel), "bpf_dctcp__load"))
+ goto done;
+
+ link = bpf_map__attach_struct_ops(dctcp_skel->maps.dctcp);
+ if (!ASSERT_OK_PTR(link, "dctcp link"))
+ goto done;
+
+ lfd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(lfd, 0, "lfd") ||
+ !ASSERT_OK(settcpca(lfd, "bpf_dctcp"), "lfd=>bpf_dctcp"))
+ goto done;
+
+ cli_fd = connect_to_fd_opts(lfd, &opts);
+ if (!ASSERT_GE(cli_fd, 0, "cli_fd"))
+ goto done;
+
+ srv_fd = accept(lfd, NULL, 0);
+ if (!ASSERT_GE(srv_fd, 0, "srv_fd"))
+ goto done;
+ ASSERT_STREQ(dctcp_skel->bss->cc_res, "cubic", "cc_res");
+ ASSERT_EQ(dctcp_skel->bss->tcp_cdg_res, -ENOTSUPP, "tcp_cdg_res");
+ /* All setsockopt(TCP_CONGESTION) in the recurred
+ * bpf_dctcp->init() should fail with -EBUSY.
+ */
+ ASSERT_EQ(dctcp_skel->bss->ebusy_cnt, 3, "ebusy_cnt");
+
+ err = getsockopt(srv_fd, SOL_TCP, TCP_CONGESTION, srv_cc, &cc_len);
+ if (!ASSERT_OK(err, "getsockopt(srv_fd, TCP_CONGESTION)"))
+ goto done;
+ ASSERT_STREQ(srv_cc, "cubic", "srv_fd cc");
+
+done:
bpf_link__destroy(link);
bpf_dctcp__destroy(dctcp_skel);
+ if (lfd != -1)
+ close(lfd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+}
+
+static void test_rel_setsockopt(void)
+{
+ struct bpf_dctcp_release *rel_skel;
+ libbpf_print_fn_t old_print_fn;
+
+ err_str = "unknown func bpf_setsockopt";
+ found = false;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+ rel_skel = bpf_dctcp_release__open_and_load();
+ libbpf_set_print(old_print_fn);
+
+ ASSERT_ERR_PTR(rel_skel, "rel_skel");
+ ASSERT_TRUE(found, "expected_err_msg");
+
+ bpf_dctcp_release__destroy(rel_skel);
+}
+
+static void test_write_sk_pacing(void)
+{
+ struct tcp_ca_write_sk_pacing *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_write_sk_pacing__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.write_sk_pacing);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_write_sk_pacing__destroy(skel);
+}
+
+static void test_incompl_cong_ops(void)
+{
+ struct tcp_ca_incompl_cong_ops *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_incompl_cong_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ /* That cong_avoid() and cong_control() are missing is only reported at
+ * this point:
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.incompl_cong_ops);
+ ASSERT_ERR_PTR(link, "attach_struct_ops");
+
+ bpf_link__destroy(link);
+ tcp_ca_incompl_cong_ops__destroy(skel);
+}
+
+static void test_unsupp_cong_op(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct tcp_ca_unsupp_cong_op *skel;
+
+ err_str = "attach to unsupported member get_info";
+ found = false;
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ skel = tcp_ca_unsupp_cong_op__open_and_load();
+ ASSERT_NULL(skel, "open_and_load");
+ ASSERT_EQ(found, true, "expected_err_msg");
+
+ tcp_ca_unsupp_cong_op__destroy(skel);
+ libbpf_set_print(old_print_fn);
+}
+
+static void test_update_ca(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_update_2);
+ ASSERT_OK(err, "update_map");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_EQ(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+ ASSERT_GT(skel->bss->ca2_cnt, 0, "ca2_ca2_cnt");
+
+ bpf_link__destroy(link);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_update_wrong(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int saved_ca1_cnt;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ saved_ca1_cnt = skel->bss->ca1_cnt;
+ ASSERT_GT(saved_ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_wrong);
+ ASSERT_ERR(err, "update_map");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_GT(skel->bss->ca1_cnt, saved_ca1_cnt, "ca2_ca1_cnt");
+
+ bpf_link__destroy(link);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_mixed_links(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link, *link_nl;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link_nl = bpf_map__attach_struct_ops(skel->maps.ca_no_link);
+ ASSERT_OK_PTR(link_nl, "attach_struct_ops_nl");
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops");
+
+ do_test("tcp_ca_update", NULL);
+ ASSERT_GT(skel->bss->ca1_cnt, 0, "ca1_ca1_cnt");
+
+ err = bpf_link__update_map(link, skel->maps.ca_no_link);
+ ASSERT_ERR(err, "update_map");
+
+ bpf_link__destroy(link);
+ bpf_link__destroy(link_nl);
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_multi_links(void)
+{
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ /* A map should be able to be used to create links multiple
+ * times.
+ */
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+ bpf_link__destroy(link);
+
+ tcp_ca_update__destroy(skel);
+}
+
+static void test_link_replace(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, opts);
+ struct tcp_ca_update *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = tcp_ca_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_1);
+ ASSERT_OK_PTR(link, "attach_struct_ops_1st");
+ bpf_link__destroy(link);
+
+ link = bpf_map__attach_struct_ops(skel->maps.ca_update_2);
+ ASSERT_OK_PTR(link, "attach_struct_ops_2nd");
+
+ /* BPF_F_REPLACE with a wrong old map Fd. It should fail!
+ *
+ * With BPF_F_REPLACE, the link should be updated only if the
+ * old map fd given here matches the map backing the link.
+ */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_1);
+ opts.flags = BPF_F_REPLACE;
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_ERR(err, "bpf_link_update_fail");
+
+ /* BPF_F_REPLACE with a correct old map Fd. It should success! */
+ opts.old_map_fd = bpf_map__fd(skel->maps.ca_update_2);
+ err = bpf_link_update(bpf_link__fd(link),
+ bpf_map__fd(skel->maps.ca_update_1),
+ &opts);
+ ASSERT_OK(err, "bpf_link_update_success");
+
+ bpf_link__destroy(link);
+
+ tcp_ca_update__destroy(skel);
}
void test_bpf_tcp_ca(void)
@@ -232,4 +535,26 @@ void test_bpf_tcp_ca(void)
test_dctcp();
if (test__start_subtest("cubic"))
test_cubic();
+ if (test__start_subtest("invalid_license"))
+ test_invalid_license();
+ if (test__start_subtest("dctcp_fallback"))
+ test_dctcp_fallback();
+ if (test__start_subtest("rel_setsockopt"))
+ test_rel_setsockopt();
+ if (test__start_subtest("write_sk_pacing"))
+ test_write_sk_pacing();
+ if (test__start_subtest("incompl_cong_ops"))
+ test_incompl_cong_ops();
+ if (test__start_subtest("unsupp_cong_op"))
+ test_unsupp_cong_op();
+ if (test__start_subtest("update_ca"))
+ test_update_ca();
+ if (test__start_subtest("update_wrong"))
+ test_update_wrong();
+ if (test__start_subtest("mixed_links"))
+ test_mixed_links();
+ if (test__start_subtest("multi_links"))
+ test_multi_links();
+ if (test__start_subtest("link_replace"))
+ test_link_replace();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
index e9f2f12ba06b..4c6ada5b270b 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c
@@ -19,16 +19,28 @@ extern int extra_prog_load_log_flags;
static int check_load(const char *file, enum bpf_prog_type type)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj = NULL;
- int err, prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.log_level = 4 | extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+ struct bpf_program *prog;
+ int err;
+
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ return err;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ bpf_program__set_type(prog, type);
+ bpf_program__set_flags(prog, testing_prog_flags());
+ bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags);
+
+ err = bpf_object__load(obj);
+
+err_out:
bpf_object__close(obj);
return err;
}
@@ -39,77 +51,189 @@ struct scale_test_def {
bool fails;
};
-void test_bpf_verif_scale(void)
-{
- struct scale_test_def tests[] = {
- { "loop3.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */ },
-
- { "test_verif_scale1.o", BPF_PROG_TYPE_SCHED_CLS },
- { "test_verif_scale2.o", BPF_PROG_TYPE_SCHED_CLS },
- { "test_verif_scale3.o", BPF_PROG_TYPE_SCHED_CLS },
-
- { "pyperf_global.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* full unroll by llvm */
- { "pyperf50.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "pyperf100.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "pyperf180.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* partial unroll. llvm will unroll loop ~150 times.
- * C loop count -> 600.
- * Asm loop count -> 4.
- * 16k insns in loop body.
- * Total of 5 such loops. Total program size ~82k insns.
- */
- { "pyperf600.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* no unroll at all.
- * C loop count -> 600.
- * ASM loop count -> 600.
- * ~110 insns in loop body.
- * Total of 5 such loops. Total program size ~1500 insns.
- */
- { "pyperf600_nounroll.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- { "loop1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "loop2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "loop4.o", BPF_PROG_TYPE_SCHED_CLS },
- { "loop5.o", BPF_PROG_TYPE_SCHED_CLS },
-
- /* partial unroll. 19k insn in a loop.
- * Total program size 20.8k insn.
- * ~350k processed_insns
- */
- { "strobemeta.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- /* no unroll, tiny loops */
- { "strobemeta_nounroll1.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
- { "strobemeta_nounroll2.o", BPF_PROG_TYPE_RAW_TRACEPOINT },
-
- { "test_sysctl_loop1.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
- { "test_sysctl_loop2.o", BPF_PROG_TYPE_CGROUP_SYSCTL },
-
- { "test_xdp_loop.o", BPF_PROG_TYPE_XDP },
- { "test_seg6_loop.o", BPF_PROG_TYPE_LWT_SEG6LOCAL },
- };
+static void scale_test(const char *file,
+ enum bpf_prog_type attach_type,
+ bool should_fail)
+{
libbpf_print_fn_t old_print_fn = NULL;
- int err, i;
+ int err;
if (env.verifier_stats) {
test__force_log();
old_print_fn = libbpf_set_print(libbpf_debug_print);
}
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- const struct scale_test_def *test = &tests[i];
-
- if (!test__start_subtest(test->file))
- continue;
-
- err = check_load(test->file, test->attach_type);
- CHECK_FAIL(err && !test->fails);
- }
+ err = check_load(file, attach_type);
+ if (should_fail)
+ ASSERT_ERR(err, "expect_error");
+ else
+ ASSERT_OK(err, "expect_success");
if (env.verifier_stats)
libbpf_set_print(old_print_fn);
}
+
+void test_verif_scale1()
+{
+ scale_test("test_verif_scale1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale2()
+{
+ scale_test("test_verif_scale2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale3()
+{
+ scale_test("test_verif_scale3.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_pyperf_global()
+{
+ scale_test("pyperf_global.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf_subprogs()
+{
+ scale_test("pyperf_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf50()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf50.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf100()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf100.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf180()
+{
+ /* full unroll by llvm */
+ scale_test("pyperf180.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600()
+{
+ /* partial unroll. llvm will unroll loop ~150 times.
+ * C loop count -> 600.
+ * Asm loop count -> 4.
+ * 16k insns in loop body.
+ * Total of 5 such loops. Total program size ~82k insns.
+ */
+ scale_test("pyperf600.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("pyperf600_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_nounroll()
+{
+ /* no unroll at all.
+ * C loop count -> 600.
+ * ASM loop count -> 600.
+ * ~110 insns in loop body.
+ * Total of 5 such loops. Total program size ~1500 insns.
+ */
+ scale_test("pyperf600_nounroll.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_pyperf600_iter()
+{
+ /* open-coded BPF iterator version */
+ scale_test("pyperf600_iter.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop1()
+{
+ scale_test("loop1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop2()
+{
+ scale_test("loop2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_loop3_fail()
+{
+ scale_test("loop3.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, true /* fails */);
+}
+
+void test_verif_scale_loop4()
+{
+ scale_test("loop4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop5()
+{
+ scale_test("loop5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, false);
+}
+
+void test_verif_scale_loop6()
+{
+ scale_test("loop6.bpf.o", BPF_PROG_TYPE_KPROBE, false);
+}
+
+void test_verif_scale_strobemeta()
+{
+ /* partial unroll. 19k insn in a loop.
+ * Total program size 20.8k insn.
+ * ~350k processed_insns
+ */
+ scale_test("strobemeta.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_bpf_loop(void)
+{
+ /* use the bpf_loop helper*/
+ scale_test("strobemeta_bpf_loop.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll1()
+{
+ /* no unroll, tiny loops */
+ scale_test("strobemeta_nounroll1.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_nounroll2()
+{
+ /* no unroll, tiny loops */
+ scale_test("strobemeta_nounroll2.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_strobemeta_subprogs()
+{
+ /* non-inlined subprogs */
+ scale_test("strobemeta_subprogs.bpf.o", BPF_PROG_TYPE_RAW_TRACEPOINT, false);
+}
+
+void test_verif_scale_sysctl_loop1()
+{
+ scale_test("test_sysctl_loop1.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_sysctl_loop2()
+{
+ scale_test("test_sysctl_loop2.bpf.o", BPF_PROG_TYPE_CGROUP_SYSCTL, false);
+}
+
+void test_verif_scale_xdp_loop()
+{
+ scale_test("test_xdp_loop.bpf.o", BPF_PROG_TYPE_XDP, false);
+}
+
+void test_verif_scale_seg6_loop()
+{
+ scale_test("test_seg6_loop.bpf.o", BPF_PROG_TYPE_LWT_SEG6LOCAL, false);
+}
+
+void test_verif_twfw()
+{
+ scale_test("twfw.bpf.o", BPF_PROG_TYPE_CGROUP_SKB, false);
+}
diff --git a/tools/testing/selftests/bpf/test_btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c
index 305fae8f80a9..00965a6e83bb 100644
--- a/tools/testing/selftests/bpf/test_btf.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf.c
@@ -8,7 +8,6 @@
#include <linux/filter.h>
#include <linux/unistd.h>
#include <bpf/bpf.h>
-#include <sys/resource.h>
#include <libelf.h>
#include <gelf.h>
#include <string.h>
@@ -22,68 +21,28 @@
#include <bpf/libbpf.h>
#include <bpf/btf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
-#include "test_btf.h"
+#include "../test_btf.h"
+#include "test_progs.h"
#define MAX_INSNS 512
#define MAX_SUBPROGS 16
-static uint32_t pass_cnt;
-static uint32_t error_cnt;
-static uint32_t skip_cnt;
+static int duration = 0;
+static bool always_log;
-#define CHECK(condition, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__); \
- fprintf(stderr, format); \
- } \
- __ret; \
-})
+#undef CHECK
+#define CHECK(condition, format...) _CHECK(condition, "check", duration, format)
-static int count_result(int err)
-{
- if (err)
- error_cnt++;
- else
- pass_cnt++;
-
- fprintf(stderr, "\n");
- return err;
-}
-
-static int __base_pr(enum libbpf_print_level level __attribute__((unused)),
- const char *format, va_list args)
-{
- return vfprintf(stderr, format, args);
-}
-
-#define BTF_END_RAW 0xdeadbeef
#define NAME_TBD 0xdeadb33f
-#define NAME_NTH(N) (0xffff0000 | N)
-#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xffff0000)
+#define NAME_NTH(N) (0xfffe0000 | N)
+#define IS_NAME_NTH(X) ((X & 0xffff0000) == 0xfffe0000)
#define GET_NAME_NTH_IDX(X) (X & 0x0000ffff)
#define MAX_NR_RAW_U32 1024
#define BTF_LOG_BUF_SIZE 65535
-static struct args {
- unsigned int raw_test_num;
- unsigned int file_test_num;
- unsigned int get_info_test_num;
- unsigned int info_raw_test_num;
- unsigned int dedup_test_num;
- bool raw_test;
- bool file_test;
- bool get_info_test;
- bool pprint_test;
- bool always_log;
- bool info_raw_test;
- bool dedup_test;
-} args;
-
static char btf_log_buf[BTF_LOG_BUF_SIZE];
static struct btf_header hdr_tmpl = {
@@ -920,6 +879,34 @@ static struct btf_raw_test raw_tests[] = {
.btf_load_err = true,
.err_str = "Invalid elem",
},
+{
+ .descr = "var after datasec, ptr followed by modifier",
+ .raw_types = {
+ /* .bss section */ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 2),
+ sizeof(void*)+4),
+ BTF_VAR_SECINFO_ENC(4, 0, sizeof(void*)),
+ BTF_VAR_SECINFO_ENC(6, sizeof(void*), 4),
+ /* int */ /* [2] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
+ /* int* */ /* [3] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),
+ BTF_VAR_ENC(NAME_TBD, 3, 0), /* [4] */
+ /* const int */ /* [5] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_CONST, 0, 0), 2),
+ BTF_VAR_ENC(NAME_TBD, 5, 0), /* [6] */
+ BTF_END_RAW,
+ },
+ .str_sec = "\0a\0b\0c\0",
+ .str_sec_size = sizeof("\0a\0b\0c\0"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = ".bss",
+ .key_size = sizeof(int),
+ .value_size = sizeof(void*)+4,
+ .key_type_id = 0,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
/* Test member exceeds the size of struct.
*
* struct A {
@@ -952,7 +939,7 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Member exceeds struct_size",
},
-/* Test member exeeds the size of struct
+/* Test member exceeds the size of struct
*
* struct A {
* int m;
@@ -986,7 +973,7 @@ static struct btf_raw_test raw_tests[] = {
.err_str = "Member exceeds struct_size",
},
-/* Test member exeeds the size of struct
+/* Test member exceeds the size of struct
*
* struct A {
* int m;
@@ -1941,7 +1928,7 @@ static struct btf_raw_test raw_tests[] = {
.raw_types = {
/* int */ /* [1] */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),
- BTF_TYPE_ENC(0, 0x10000000, 4),
+ BTF_TYPE_ENC(0, 0x20000000, 4),
BTF_END_RAW,
},
.str_sec = "",
@@ -2937,26 +2924,6 @@ static struct btf_raw_test raw_tests[] = {
},
{
- .descr = "invalid enum kind_flag",
- .raw_types = {
- BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
- BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_ENUM, 1, 1), 4), /* [2] */
- BTF_ENUM_ENC(NAME_TBD, 0),
- BTF_END_RAW,
- },
- BTF_STR_SEC("\0A"),
- .map_type = BPF_MAP_TYPE_ARRAY,
- .map_name = "enum_type_check_btf",
- .key_size = sizeof(int),
- .value_size = sizeof(int),
- .key_type_id = 1,
- .value_type_id = 1,
- .max_entries = 4,
- .btf_load_err = true,
- .err_str = "Invalid btf_info kind_flag",
-},
-
-{
.descr = "valid fwd kind_flag",
.raw_types = {
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -3547,6 +3514,700 @@ static struct btf_raw_test raw_tests[] = {
.value_type_id = 3 /* arr_t */,
.max_entries = 4,
},
+/*
+ * elf .rodata section size 4 and btf .rodata section vlen 0.
+ */
+{
+ .descr = "datasec: vlen == 0",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* .rodata section */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 0), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0.rodata"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .key_size = sizeof(int),
+ .value_size = sizeof(int),
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "datasec: name '?.foo bar:buz' is ok",
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* VAR x */ /* [2] */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1),
+ BTF_VAR_STATIC,
+ /* DATASEC ?.data */ /* [3] */
+ BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4),
+ BTF_VAR_SECINFO_ENC(2, 0, 4),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0x\0?.foo bar:buz"),
+},
+{
+ .descr = "type name '?foo' is not ok",
+ .raw_types = {
+ /* union ?foo; */
+ BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_FWD, 1, 0), 0), /* [1] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0?foo"),
+ .err_str = "Invalid name",
+ .btf_load_err = true,
+},
+
+{
+ .descr = "float test #1, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [2] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [3] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 8), /* [4] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 12), /* [5] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 16), /* [6] */
+ BTF_STRUCT_ENC(NAME_TBD, 5, 48), /* [7] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 3, 32),
+ BTF_MEMBER_ENC(NAME_TBD, 4, 64),
+ BTF_MEMBER_ENC(NAME_TBD, 5, 128),
+ BTF_MEMBER_ENC(NAME_TBD, 6, 256),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0_Float16\0float\0double\0_Float80\0long_double"
+ "\0floats\0a\0b\0c\0d\0e"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 48,
+ .key_type_id = 1,
+ .value_type_id = 7,
+ .max_entries = 1,
+},
+{
+ .descr = "float test #2, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 1), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "float test #3, invalid kind_flag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_FLOAT, 1, 0), 4),
+ /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info kind_flag",
+},
+{
+ .descr = "float test #4, member does not fit",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 2), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member exceeds struct_size",
+},
+{
+ .descr = "float test #5, member is not properly aligned",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 4), /* [2] */
+ BTF_STRUCT_ENC(NAME_TBD, 1, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_TBD, 2, 8),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float\0floats\0x"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Member is not properly aligned",
+},
+{
+ .descr = "float test #6, invalid size",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(NAME_TBD, BTF_INT_SIGNED, 0, 32, 4),
+ /* [1] */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 6), /* [2] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0int\0float"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "float_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 6,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_size",
+},
+
+{
+ .descr = "decl_tag test #1, struct/member, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #2, union/member, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_UNION_ENC(NAME_TBD, 2, 4), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #3, variable, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_VAR_ENC(NAME_TBD, 1, 1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0global\0tag1\0tag2"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #4, func/parameter, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -1),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 0),
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag1\0tag2\0tag3"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #5, invalid value",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_DECL_TAG_ENC(0, 2, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid value",
+},
+{
+ .descr = "decl_tag test #6, invalid target type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 1, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type",
+},
+{
+ .descr = "decl_tag test #7, invalid vlen",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 1), 2), (0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "vlen != 0",
+},
+{
+ .descr = "decl_tag test #8, invalid kflag",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 1, 0), 2), (-1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid btf_info kind_flag",
+},
+{
+ .descr = "decl_tag test #9, var, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #10, struct member, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #11, func parameter, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #12, < -1 component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_FUNC_ENC(NAME_TBD, 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, -2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0arg1\0arg2\0f\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #13, typedef, well-formed",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "decl_tag test #14, typedef, invalid component_idx",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid component_idx",
+},
+{
+ .descr = "decl_tag test #15, func, invalid func proto",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_DECL_TAG_ENC(NAME_TBD, 3, 0), /* [2] */
+ BTF_FUNC_ENC(NAME_TBD, 8), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0func"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Invalid type_id",
+},
+{
+ .descr = "decl_tag test #16, func proto, return type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_TBD, 1, 0), /* [2] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 2), (-1), /* [3] */
+ BTF_FUNC_PROTO_ENC(3, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1"),
+ .btf_load_err = true,
+ .err_str = "Invalid return type",
+},
+{
+ .descr = "decl_tag test #17, func proto, argument",
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), 4), (-1), /* [1] */
+ BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 0), /* [2] */
+ BTF_FUNC_PROTO_ENC(0, 1), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
+ BTF_VAR_ENC(NAME_TBD, 2, 0), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0local\0tag1\0var"),
+ .btf_load_err = true,
+ .err_str = "Invalid arg#1",
+},
+{
+ .descr = "decl_tag test #18, decl_tag as the map key type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 8,
+ .value_size = 4,
+ .key_type_id = 3,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
+ .descr = "decl_tag test #19, decl_tag as the map value type",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(0, 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_TBD, 1, 0),
+ BTF_MEMBER_ENC(NAME_TBD, 1, 32),
+ BTF_DECL_TAG_ENC(NAME_TBD, 2, -1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0m1\0m2\0tag"),
+ .map_type = BPF_MAP_TYPE_HASH,
+ .map_name = "tag_type_check_btf",
+ .key_size = 4,
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 3,
+ .max_entries = 1,
+ .map_create_err = true,
+},
+{
+ .descr = "type_tag test #1",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "type_tag test #2, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_CONST_ENC(3), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [3] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #3, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #4, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 1), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "type_tag test #5, type tag order",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(1), /* [3] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 2), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+},
+{
+ .descr = "type_tag test #6, type tag order",
+ .raw_types = {
+ BTF_PTR_ENC(2), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 3), /* [2] */
+ BTF_CONST_ENC(4), /* [3] */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [4] */
+ BTF_PTR_ENC(6), /* [5] */
+ BTF_CONST_ENC(2), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 1,
+ .max_entries = 1,
+ .btf_load_err = true,
+ .err_str = "Type tags don't precede modifiers",
+},
+{
+ .descr = "enum64 test #1, unsigned, size 8",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 8,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
+{
+ .descr = "enum64 test #2, signed, size 4",
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 1, 2), 4), /* [2] */
+ BTF_ENUM64_ENC(NAME_TBD, -1, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a\0b\0c"),
+ .map_type = BPF_MAP_TYPE_ARRAY,
+ .map_name = "tag_type_check_btf",
+ .key_size = sizeof(int),
+ .value_size = 4,
+ .key_type_id = 1,
+ .value_type_id = 2,
+ .max_entries = 1,
+},
}; /* struct btf_raw_test raw_tests[] */
@@ -3654,35 +4315,56 @@ static void *btf_raw_create(const struct btf_header *hdr,
next_str_idx < strs_cnt ? strs_idx[next_str_idx] : NULL;
done:
+ free(strs_idx);
if (err) {
- if (raw_btf)
- free(raw_btf);
- if (strs_idx)
- free(strs_idx);
+ free(raw_btf);
return NULL;
}
return raw_btf;
}
-static int do_test_raw(unsigned int test_num)
+static int load_raw_btf(const void *raw_data, size_t raw_size)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ int btf_fd;
+
+ if (always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ }
+
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ if (btf_fd < 0 && !always_log) {
+ opts.log_buf = btf_log_buf,
+ opts.log_size = BTF_LOG_BUF_SIZE,
+ opts.log_level = 1;
+ btf_fd = bpf_btf_load(raw_data, raw_size, &opts);
+ }
+
+ return btf_fd;
+}
+
+static void do_test_raw(unsigned int test_num)
{
struct btf_raw_test *test = &raw_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int map_fd = -1, btf_fd = -1;
unsigned int raw_btf_size;
struct btf_header *hdr;
void *raw_btf;
int err;
- fprintf(stderr, "BTF raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
test->str_sec,
test->str_sec_size,
&raw_btf_size, NULL);
-
if (!raw_btf)
- return -1;
+ return;
hdr = raw_btf;
@@ -3692,65 +4374,41 @@ static int do_test_raw(unsigned int test_num)
hdr->str_len = (int)hdr->str_len + test->str_len_delta;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- err = ((btf_fd == -1) != test->btf_load_err);
+ err = ((btf_fd < 0) != test->btf_load_err);
if (CHECK(err, "btf_fd:%d test->btf_load_err:%u",
btf_fd, test->btf_load_err) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
- "expected err_str:%s", test->err_str)) {
+ "expected err_str:%s\n", test->err_str)) {
err = -1;
goto done;
}
- if (err || btf_fd == -1)
+ if (err || btf_fd < 0)
goto done;
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
+ if (!test->map_type)
+ goto done;
- map_fd = bpf_create_map_xattr(&create_attr);
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
- err = ((map_fd == -1) != test->map_create_err);
+ err = ((map_fd < 0) != test->map_create_err);
CHECK(err, "map_fd:%d test->map_create_err:%u",
map_fd, test->map_create_err);
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
-
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
-
- return err;
-}
-
-static int test_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.raw_test_num)
- return count_result(do_test_raw(args.raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
- err |= count_result(do_test_raw(i));
-
- return err;
}
struct btf_get_info_test {
@@ -3814,11 +4472,6 @@ const struct btf_get_info_test get_info_tests[] = {
},
};
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64)(unsigned long)ptr;
-}
-
static int test_big_btf_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
@@ -3849,10 +4502,8 @@ static int test_big_btf_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -3868,7 +4519,7 @@ static int test_big_btf_info(unsigned int test_num)
info->btf = ptr_to_u64(user_btf);
info->btf_size = raw_btf_size;
- err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(!err, "!err")) {
err = -1;
goto done;
@@ -3881,9 +4532,9 @@ static int test_big_btf_info(unsigned int test_num)
* to userspace.
*/
info_garbage.garbage = 0;
- err = bpf_obj_get_info_by_fd(btf_fd, info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, info, &info_len);
if (CHECK(err || info_len != sizeof(*info),
- "err:%d errno:%d info_len:%u sizeof(*info):%lu",
+ "err:%d errno:%d info_len:%u sizeof(*info):%zu",
err, errno, info_len, sizeof(*info))) {
err = -1;
goto done;
@@ -3892,13 +4543,13 @@ static int test_big_btf_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
return err;
@@ -3907,7 +4558,7 @@ done:
static int test_btf_id(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
uint8_t *raw_btf = NULL, *user_btf[2] = {};
int btf_fd[2] = {-1, -1}, map_fd = -1;
struct bpf_map_info map_info = {};
@@ -3937,30 +4588,28 @@ static int test_btf_id(unsigned int test_num)
info[i].btf_size = raw_btf_size;
}
- btf_fd[0] = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ btf_fd[0] = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
/* Test BPF_OBJ_GET_INFO_BY_ID on btf_id */
info_len = sizeof(info[0]);
- err = bpf_obj_get_info_by_fd(btf_fd[0], &info[0], &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd[0], &info[0], &info_len);
if (CHECK(err, "errno:%d", errno)) {
err = -1;
goto done;
}
btf_fd[1] = bpf_btf_get_fd_by_id(info[0].id);
- if (CHECK(btf_fd[1] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[1] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
ret = 0;
- err = bpf_obj_get_info_by_fd(btf_fd[1], &info[1], &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd[1], &info[1], &info_len);
if (CHECK(err || info[0].id != info[1].id ||
info[0].btf_size != info[1].btf_size ||
(ret = memcmp(user_btf[0], user_btf[1], info[0].btf_size)),
@@ -3972,23 +4621,18 @@ static int test_btf_id(unsigned int test_num)
}
/* Test btf members in struct bpf_map_info */
- create_attr.name = "test_btf_id";
- create_attr.map_type = BPF_MAP_TYPE_ARRAY;
- create_attr.key_size = sizeof(int);
- create_attr.value_size = sizeof(unsigned int);
- create_attr.max_entries = 4;
- create_attr.btf_fd = btf_fd[0];
- create_attr.btf_key_type_id = 1;
- create_attr.btf_value_type_id = 2;
-
- map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ opts.btf_fd = btf_fd[0];
+ opts.btf_key_type_id = 1;
+ opts.btf_value_type_id = 2;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_btf_id",
+ sizeof(int), sizeof(int), 4, &opts);
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
info_len = sizeof(map_info);
- err = bpf_obj_get_info_by_fd(map_fd, &map_info, &info_len);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
if (CHECK(err || map_info.btf_id != info[0].id ||
map_info.btf_key_type_id != 1 || map_info.btf_value_type_id != 2,
"err:%d errno:%d info.id:%u btf_id:%u btf_key_type_id:%u btf_value_type_id:%u",
@@ -4005,7 +4649,7 @@ static int test_btf_id(unsigned int test_num)
/* Test BTF ID is removed from the kernel */
btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd[0] < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4015,31 +4659,26 @@ static int test_btf_id(unsigned int test_num)
/* The map holds the last ref to BTF and its btf_id */
close(map_fd);
map_fd = -1;
- btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id);
- if (CHECK(btf_fd[0] != -1, "BTF lingers")) {
- err = -1;
- goto done;
- }
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
for (i = 0; i < 2; i++) {
free(user_btf[i]);
- if (btf_fd[i] != -1)
+ if (btf_fd[i] >= 0)
close(btf_fd[i]);
}
return err;
}
-static int do_test_get_info(unsigned int test_num)
+static void do_test_get_info(unsigned int test_num)
{
const struct btf_get_info_test *test = &get_info_tests[test_num - 1];
unsigned int raw_btf_size, user_btf_size, expected_nbytes;
@@ -4048,11 +4687,14 @@ static int do_test_get_info(unsigned int test_num)
int btf_fd = -1, err, ret;
uint32_t info_len;
- fprintf(stderr, "BTF GET_INFO test[%u] (%s): ",
- test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
- if (test->special_test)
- return test->special_test(test_num);
+ if (test->special_test) {
+ err = test->special_test(test_num);
+ if (CHECK(err, "failed: %d\n", err))
+ return;
+ }
raw_btf = btf_raw_create(&hdr_tmpl,
test->raw_types,
@@ -4061,7 +4703,7 @@ static int do_test_get_info(unsigned int test_num)
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
@@ -4071,10 +4713,8 @@ static int do_test_get_info(unsigned int test_num)
goto done;
}
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
+ if (CHECK(btf_fd <= 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4090,11 +4730,11 @@ static int do_test_get_info(unsigned int test_num)
info.btf_size = user_btf_size;
ret = 0;
- err = bpf_obj_get_info_by_fd(btf_fd, &info, &info_len);
+ err = bpf_btf_get_info_by_fd(btf_fd, &info, &info_len);
if (CHECK(err || !info.id || info_len != sizeof(info) ||
info.btf_size != raw_btf_size ||
(ret = memcmp(raw_btf, user_btf, expected_nbytes)),
- "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%lu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
+ "err:%d errno:%d info.id:%u info_len:%u sizeof(info):%zu raw_btf_size:%u info.btf_size:%u expected_nbytes:%u memcmp:%d",
err, errno, info.id, info_len, sizeof(info),
raw_btf_size, info.btf_size, expected_nbytes, ret)) {
err = -1;
@@ -4114,30 +4754,14 @@ static int do_test_get_info(unsigned int test_num)
fprintf(stderr, "OK");
done:
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
free(raw_btf);
free(user_btf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
-
- return err;
-}
-
-static int test_get_info(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.get_info_test_num)
- return count_result(do_test_get_info(args.get_info_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
- err |= count_result(do_test_get_info(i));
-
- return err;
}
struct btf_file_test {
@@ -4146,12 +4770,11 @@ struct btf_file_test {
};
static struct btf_file_test file_tests[] = {
- { .file = "test_btf_haskv.o", },
- { .file = "test_btf_newkv.o", },
- { .file = "test_btf_nokv.o", .btf_kv_notfound = true, },
+ { .file = "test_btf_newkv.bpf.o", },
+ { .file = "test_btf_nokv.bpf.o", .btf_kv_notfound = true, },
};
-static int do_test_file(unsigned int test_num)
+static void do_test_file(unsigned int test_num)
{
const struct btf_file_test *test = &file_tests[test_num - 1];
const char *expected_fnames[] = {"_dummy_tracepoint",
@@ -4169,28 +4792,32 @@ static int do_test_file(unsigned int test_num)
struct bpf_map *map;
int i, err, prog_fd;
- fprintf(stderr, "BTF libbpf test[%u] (%s): ", test_num,
- test->file);
+ if (!test__start_subtest(test->file))
+ return;
btf = btf__parse_elf(test->file, &btf_ext);
- if (IS_ERR(btf)) {
- if (PTR_ERR(btf) == -ENOENT) {
- fprintf(stderr, "SKIP. No ELF %s found", BTF_ELF_SEC);
- skip_cnt++;
- return 0;
+ err = libbpf_get_error(btf);
+ if (err) {
+ if (err == -ENOENT) {
+ printf("%s:SKIP: No ELF %s found", __func__, BTF_ELF_SEC);
+ test__skip();
+ return;
}
- return PTR_ERR(btf);
+ return;
}
btf__free(btf);
has_btf_ext = btf_ext != NULL;
btf_ext__free(btf_ext);
+ /* temporary disable LIBBPF_STRICT_MAP_DEFINITIONS to test legacy maps */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL & ~LIBBPF_STRICT_MAP_DEFINITIONS);
obj = bpf_object__open(test->file);
- if (CHECK(IS_ERR(obj), "obj: %ld", PTR_ERR(obj)))
- return PTR_ERR(obj);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "obj: %d", err))
+ return;
- prog = bpf_program__next(NULL, obj);
+ prog = bpf_object__next_program(obj, NULL);
if (CHECK(!prog, "Cannot find bpf_prog")) {
err = -1;
goto done;
@@ -4220,9 +4847,9 @@ static int do_test_file(unsigned int test_num)
/* get necessary program info */
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4252,9 +4879,9 @@ static int do_test_file(unsigned int test_num)
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -4272,7 +4899,8 @@ static int do_test_file(unsigned int test_num)
goto done;
}
- err = btf__get_from_id(info.btf_id, &btf);
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ err = libbpf_get_error(btf);
if (CHECK(err, "cannot get btf from kernel, err: %d", err))
goto done;
@@ -4308,23 +4936,11 @@ skip:
fprintf(stderr, "OK");
done:
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ btf__free(btf);
free(func_info);
bpf_object__close(obj);
- return err;
-}
-
-static int test_file(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.file_test_num)
- return count_result(do_test_file(args.file_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
- err |= count_result(do_test_file(i));
-
- return err;
}
const char *pprint_enum_str[] = {
@@ -4428,7 +5044,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4493,7 +5109,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
{
@@ -4564,7 +5180,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv),
.key_type_id = 3, /* unsigned int */
.value_type_id = 16, /* struct pprint_mapv */
- .max_entries = 128 * 1024,
+ .max_entries = 128,
},
#ifdef __SIZEOF_INT128__
@@ -4591,7 +5207,7 @@ static struct btf_raw_test pprint_test_template[] = {
.value_size = sizeof(struct pprint_mapv_int128),
.key_type_id = 1,
.value_type_id = 4,
- .max_entries = 128 * 1024,
+ .max_entries = 128,
.mapv_kind = PPRINT_MAPV_KIND_INT128,
},
#endif
@@ -4673,6 +5289,7 @@ static size_t get_pprint_mapv_size(enum pprint_mapv_kind_t mapv_kind)
#endif
assert(0);
+ return 0;
}
static void set_pprint_mapv(enum pprint_mapv_kind_t mapv_kind,
@@ -4730,7 +5347,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
nexpected_line = snprintf(expected_line, line_size,
"%s%u: {%u,0,%d,0x%x,0x%x,0x%x,"
- "{%lu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
+ "{%llu|[%u,%u,%u,%u,%u,%u,%u,%u]},%s,"
"%u,0x%x,[[%d,%d],[%d,%d]]}\n",
percpu_map ? "\tcpu" : "",
percpu_map ? cpu : next_key,
@@ -4738,7 +5355,7 @@ ssize_t get_pprint_expected_line(enum pprint_mapv_kind_t mapv_kind,
v->unused_bits2a,
v->bits28,
v->unused_bits2b,
- v->ui64,
+ (__u64)v->ui64,
v->ui8a[0], v->ui8a[1],
v->ui8a[2], v->ui8a[3],
v->ui8a[4], v->ui8a[5],
@@ -4790,11 +5407,11 @@ static int check_line(const char *expected_line, int nexpected_line,
}
-static int do_test_pprint(int test_num)
+static void do_test_pprint(int test_num)
{
const struct btf_raw_test *test = &pprint_test_template[test_num];
enum pprint_mapv_kind_t mapv_kind = test->mapv_kind;
- struct bpf_create_map_attr create_attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
bool ordered_map, lossless_map, percpu_map;
int err, ret, num_cpus, rounded_value_size;
unsigned int key, nr_read_elems;
@@ -4809,36 +5426,31 @@ static int do_test_pprint(int test_num)
uint8_t *raw_btf;
ssize_t nread;
- fprintf(stderr, "%s(#%d)......", test->descr, test_num);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, NULL);
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- if (CHECK(btf_fd == -1, "errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "errno:%d\n", errno)) {
err = -1;
goto done;
}
- create_attr.name = test->map_name;
- create_attr.map_type = test->map_type;
- create_attr.key_size = test->key_size;
- create_attr.value_size = test->value_size;
- create_attr.max_entries = test->max_entries;
- create_attr.btf_fd = btf_fd;
- create_attr.btf_key_type_id = test->key_type_id;
- create_attr.btf_value_type_id = test->value_type_id;
-
- map_fd = bpf_create_map_xattr(&create_attr);
- if (CHECK(map_fd == -1, "errno:%d", errno)) {
+ opts.btf_fd = btf_fd;
+ opts.btf_key_type_id = test->key_type_id;
+ opts.btf_value_type_id = test->value_type_id;
+ map_fd = bpf_map_create(test->map_type, test->map_name,
+ test->key_size, test->value_size, test->max_entries, &opts);
+ if (CHECK(map_fd < 0, "errno:%d", errno)) {
err = -1;
goto done;
}
@@ -4846,7 +5458,7 @@ static int do_test_pprint(int test_num)
ret = snprintf(pin_path, sizeof(pin_path), "%s/%s",
"/sys/fs/bpf", test->map_name);
- if (CHECK(ret == sizeof(pin_path), "pin_path %s/%s is too long",
+ if (CHECK(ret >= sizeof(pin_path), "pin_path %s/%s is too long",
"/sys/fs/bpf", test->map_name)) {
err = -1;
goto done;
@@ -4919,7 +5531,7 @@ static int do_test_pprint(int test_num)
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
}
@@ -4935,7 +5547,7 @@ static int do_test_pprint(int test_num)
cpu, cmapv);
err = check_line(expected_line, nexpected_line,
sizeof(expected_line), line);
- if (err == -1)
+ if (err < 0)
goto done;
cmapv = cmapv + rounded_value_size;
@@ -4971,24 +5583,21 @@ done:
free(mapv);
if (!err)
fprintf(stderr, "OK");
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (map_fd != -1)
+ if (map_fd >= 0)
close(map_fd);
if (pin_file)
fclose(pin_file);
unlink(pin_path);
free(line);
-
- return err;
}
-static int test_pprint(void)
+static void test_pprint(void)
{
unsigned int i;
- int err = 0;
/* test various maps with the first test template */
for (i = 0; i < ARRAY_SIZE(pprint_tests_meta); i++) {
@@ -4999,7 +5608,7 @@ static int test_pprint(void)
pprint_test_template[0].lossless_map = pprint_tests_meta[i].lossless_map;
pprint_test_template[0].percpu_map = pprint_tests_meta[i].percpu_map;
- err |= count_result(do_test_pprint(0));
+ do_test_pprint(0);
}
/* test rest test templates with the first map */
@@ -5010,10 +5619,8 @@ static int test_pprint(void)
pprint_test_template[i].ordered_map = pprint_tests_meta[0].ordered_map;
pprint_test_template[i].lossless_map = pprint_tests_meta[0].lossless_map;
pprint_test_template[i].percpu_map = pprint_tests_meta[0].percpu_map;
- err |= count_result(do_test_pprint(i));
+ do_test_pprint(i);
}
-
- return err;
}
#define BPF_LINE_INFO_ENC(insn_off, file_off, line_off, line_num, line_col) \
@@ -5891,8 +6498,8 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
/* get necessary lens */
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (1st) errno:%d", errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "invalid get info (1st) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
return -1;
}
@@ -5921,8 +6528,8 @@ static int test_get_finfo(const struct prog_info_raw_test *test,
info.nr_func_info = nr_func_info;
info.func_info_rec_size = rec_size;
info.func_info = ptr_to_u64(func_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "invalid get info (2nd) errno:%d", errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "invalid get info (2nd) errno:%d", errno)) {
fprintf(stderr, "%s\n", btf_log_buf);
err = -1;
goto done;
@@ -5985,8 +6592,8 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
nr_jited_func_lens = nr_jited_ksyms;
info_len = sizeof(struct bpf_prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
- if (CHECK(err == -1, "err:%d errno:%d", err, errno)) {
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err < 0, "err:%d errno:%d", err, errno)) {
err = -1;
goto done;
}
@@ -6059,13 +6666,13 @@ static int test_get_linfo(const struct prog_info_raw_test *test,
info.jited_func_lens = ptr_to_u64(jited_func_lens);
}
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
/*
* Only recheck the info.*line_info* fields.
* Other fields are not the concern of this test.
*/
- if (CHECK(err == -1 ||
+ if (CHECK(err < 0 ||
info.nr_line_info != cnt ||
(jited_cnt && !info.jited_line_info) ||
info.nr_jited_line_info != jited_cnt ||
@@ -6178,35 +6785,34 @@ done:
return err;
}
-static int do_test_info_raw(unsigned int test_num)
+static void do_test_info_raw(unsigned int test_num)
{
const struct prog_info_raw_test *test = &info_raw_tests[test_num - 1];
- unsigned int raw_btf_size, linfo_str_off, linfo_size;
+ unsigned int raw_btf_size, linfo_str_off, linfo_size = 0;
int btf_fd = -1, prog_fd = -1, err = 0;
void *raw_btf, *patched_linfo = NULL;
const char *ret_next_str;
union bpf_attr attr = {};
- fprintf(stderr, "BTF prog info raw test[%u] (%s): ", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
+
raw_btf = btf_raw_create(&hdr_tmpl, test->raw_types,
test->str_sec, test->str_sec_size,
&raw_btf_size, &ret_next_str);
-
if (!raw_btf)
- return -1;
+ return;
*btf_log_buf = '\0';
- btf_fd = bpf_load_btf(raw_btf, raw_btf_size,
- btf_log_buf, BTF_LOG_BUF_SIZE,
- args.always_log);
+ btf_fd = load_raw_btf(raw_btf, raw_btf_size);
free(raw_btf);
- if (CHECK(btf_fd == -1, "invalid btf_fd errno:%d", errno)) {
+ if (CHECK(btf_fd < 0, "invalid btf_fd errno:%d", errno)) {
err = -1;
goto done;
}
- if (*btf_log_buf && args.always_log)
+ if (*btf_log_buf && always_log)
fprintf(stderr, "\n%s", btf_log_buf);
*btf_log_buf = '\0';
@@ -6214,7 +6820,8 @@ static int do_test_info_raw(unsigned int test_num)
patched_linfo = patch_name_tbd(test->line_info,
test->str_sec, linfo_str_off,
test->str_sec_size, &linfo_size);
- if (IS_ERR(patched_linfo)) {
+ err = libbpf_get_error(patched_linfo);
+ if (err) {
fprintf(stderr, "error in creating raw bpf_line_info");
err = -1;
goto done;
@@ -6238,7 +6845,7 @@ static int do_test_info_raw(unsigned int test_num)
}
prog_fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
- err = ((prog_fd == -1) != test->expected_prog_load_failure);
+ err = ((prog_fd < 0) != test->expected_prog_load_failure);
if (CHECK(err, "prog_fd:%d expected_prog_load_failure:%u errno:%d",
prog_fd, test->expected_prog_load_failure, errno) ||
CHECK(test->err_str && !strstr(btf_log_buf, test->err_str),
@@ -6247,7 +6854,7 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
}
- if (prog_fd == -1)
+ if (prog_fd < 0)
goto done;
err = test_get_finfo(test, prog_fd);
@@ -6261,35 +6868,16 @@ static int do_test_info_raw(unsigned int test_num)
goto done;
done:
- if (!err)
- fprintf(stderr, "OK");
-
- if (*btf_log_buf && (err || args.always_log))
+ if (*btf_log_buf && (err || always_log))
fprintf(stderr, "\n%s", btf_log_buf);
- if (btf_fd != -1)
+ if (btf_fd >= 0)
close(btf_fd);
- if (prog_fd != -1)
+ if (prog_fd >= 0)
close(prog_fd);
- if (!IS_ERR(patched_linfo))
+ if (!libbpf_get_error(patched_linfo))
free(patched_linfo);
-
- return err;
-}
-
-static int test_info_raw(void)
-{
- unsigned int i;
- int err = 0;
-
- if (args.info_raw_test_num)
- return count_result(do_test_info_raw(args.info_raw_test_num));
-
- for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
- err |= count_result(do_test_info_raw(i));
-
- return err;
}
struct btf_raw_data {
@@ -6305,7 +6893,7 @@ struct btf_dedup_test {
struct btf_dedup_opts opts;
};
-const struct btf_dedup_test dedup_tests[] = {
+static struct btf_dedup_test dedup_tests[] = {
{
.descr = "dedup: unused strings filtering",
@@ -6325,9 +6913,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: strings deduplication",
@@ -6350,9 +6935,6 @@ const struct btf_dedup_test dedup_tests[] = {
},
BTF_STR_SEC("\0int\0long int"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: struct example #1",
@@ -6371,57 +6953,67 @@ const struct btf_dedup_test dedup_tests[] = {
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [3] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [3] */
BTF_MEMBER_ENC(NAME_NTH(3), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(4), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(5), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(6), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(8), 15, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ /* tag -> [3] struct s */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */
+ /* tag -> [3] struct s, member 1 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */
/* full copy of the above */
- BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [7] */
- BTF_TYPE_ARRAY_ENC(7, 7, 16), /* [8] */
- BTF_STRUCT_ENC(NAME_NTH(2), 4, 84), /* [9] */
- BTF_MEMBER_ENC(NAME_NTH(3), 10, 0),
- BTF_MEMBER_ENC(NAME_NTH(4), 11, 64),
- BTF_MEMBER_ENC(NAME_NTH(5), 8, 128),
- BTF_MEMBER_ENC(NAME_NTH(6), 7, 640),
- BTF_PTR_ENC(9), /* [10] */
- BTF_PTR_ENC(12), /* [11] */
- BTF_CONST_ENC(7), /* [12] */
+ BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4), /* [9] */
+ BTF_TYPE_ARRAY_ENC(9, 9, 16), /* [10] */
+ BTF_STRUCT_ENC(NAME_NTH(2), 5, 88), /* [11] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 12, 0),
+ BTF_MEMBER_ENC(NAME_NTH(4), 13, 64),
+ BTF_MEMBER_ENC(NAME_NTH(5), 10, 128),
+ BTF_MEMBER_ENC(NAME_NTH(6), 9, 640),
+ BTF_MEMBER_ENC(NAME_NTH(8), 15, 672),
+ BTF_PTR_ENC(11), /* [12] */
+ BTF_PTR_ENC(14), /* [13] */
+ BTF_CONST_ENC(9), /* [14] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [15] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 11, -1), /* [16] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 11, 1), /* [17] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0"),
+ BTF_STR_SEC("\0int\0s\0next\0a\0b\0c\0float\0d"),
},
.expect = {
.raw_types = {
/* int */
- BTF_TYPE_INT_ENC(NAME_NTH(4), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_INT_ENC(NAME_NTH(5), BTF_INT_SIGNED, 0, 32, 4), /* [1] */
/* int[16] */
BTF_TYPE_ARRAY_ENC(1, 1, 16), /* [2] */
/* struct s { */
- BTF_STRUCT_ENC(NAME_NTH(6), 4, 84), /* [3] */
- BTF_MEMBER_ENC(NAME_NTH(5), 4, 0), /* struct s *next; */
+ BTF_STRUCT_ENC(NAME_NTH(8), 5, 88), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(7), 4, 0), /* struct s *next; */
BTF_MEMBER_ENC(NAME_NTH(1), 5, 64), /* const int *a; */
BTF_MEMBER_ENC(NAME_NTH(2), 2, 128), /* int b[16]; */
BTF_MEMBER_ENC(NAME_NTH(3), 1, 640), /* int c; */
+ BTF_MEMBER_ENC(NAME_NTH(4), 9, 672), /* float d; */
/* ptr -> [3] struct s */
BTF_PTR_ENC(3), /* [4] */
/* ptr -> [6] const int */
BTF_PTR_ENC(6), /* [5] */
/* const -> [1] int */
BTF_CONST_ENC(1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, 1), /* [8] */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(7), 4), /* [9] */
BTF_END_RAW,
},
- BTF_STR_SEC("\0a\0b\0c\0int\0next\0s"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0a\0b\0c\0d\0int\0float\0next\0s"),
},
},
{
@@ -6465,8 +7057,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6512,8 +7103,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0s\0x"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1, /* force hash collisions */
+ .force_collisions = true, /* force hash collisions */
},
},
{
@@ -6537,11 +7127,19 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
.expect = {
.raw_types = {
@@ -6562,18 +7160,23 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_RESTRICT_ENC(8), /* [11] restrict */
BTF_FUNC_PROTO_ENC(1, 2), /* [12] func_proto */
BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 1),
- BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 8),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_TBD, 18),
BTF_FUNC_ENC(NAME_TBD, 12), /* [13] func */
+ BTF_TYPE_FLOAT_ENC(NAME_TBD, 2), /* [14] float */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, -1), /* [15] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 13, 1), /* [16] decl_tag */
+ BTF_DECL_TAG_ENC(NAME_TBD, 7, -1), /* [17] decl_tag */
+ BTF_TYPE_TAG_ENC(NAME_TBD, 8), /* [18] type_tag */
+ BTF_TYPE_ENC(NAME_TBD, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 2), 8), /* [19] enum64 */
+ BTF_ENUM64_ENC(NAME_TBD, 0, 0),
+ BTF_ENUM64_ENC(NAME_TBD, 1, 1),
BTF_END_RAW,
},
- BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0A\0B\0C\0D\0E\0F\0G\0H\0I\0J\0K\0L\0M\0N\0O\0P\0Q\0R\0S\0T\0U"),
},
},
{
- .descr = "dedup: no int duplicates",
+ .descr = "dedup: no int/float duplicates",
.input = {
.raw_types = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 8),
@@ -6588,9 +7191,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
.expect = {
.raw_types = {
@@ -6606,12 +7215,15 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 27, 8),
/* different byte size */
BTF_TYPE_INT_ENC(NAME_NTH(1), BTF_INT_SIGNED, 0, 32, 4),
+ /* all allowed sizes */
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 2),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 4),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 8),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 12),
+ BTF_TYPE_FLOAT_ENC(NAME_NTH(3), 16),
BTF_END_RAW,
},
- BTF_STR_SEC("\0int\0some other int"),
- },
- .opts = {
- .dont_resolve_fwds = false,
+ BTF_STR_SEC("\0int\0some other int\0float"),
},
},
{
@@ -6628,7 +7240,7 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_ENUM_ENC(NAME_NTH(4), 456),
/* [4] fwd enum 'e2' after full enum */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
- /* [5] incompatible fwd enum with different size */
+ /* [5] fwd enum with different size, size does not matter for fwd */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
/* [6] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
@@ -6645,18 +7257,13 @@ const struct btf_dedup_test dedup_tests[] = {
/* [2] full enum 'e2' */
BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(4), 456),
- /* [3] incompatible fwd enum with different size */
- BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 1),
- /* [4] incompatible full enum with different value */
+ /* [3] incompatible full enum with different value */
BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
BTF_ENUM_ENC(NAME_NTH(2), 321),
BTF_END_RAW,
},
BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
},
- .opts = {
- .dont_resolve_fwds = false,
- },
},
{
.descr = "dedup: datasec and vars pass-through",
@@ -6699,11 +7306,673 @@ const struct btf_dedup_test dedup_tests[] = {
BTF_STR_SEC("\0.bss\0t"),
},
.opts = {
- .dont_resolve_fwds = false,
- .dedup_table_size = 1
+ .force_collisions = true
+ },
+},
+{
+ .descr = "dedup: func/func_arg/var tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* static int t */
+ BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
+ /* tag -> t */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [6] */
+ /* tag -> func */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [8] */
+ /* tag -> func arg a1 */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [9] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [10] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_VAR_ENC(NAME_NTH(1), 1, 0), /* [2] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [3] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(3), 1),
+ BTF_FUNC_ENC(NAME_NTH(4), 3), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 4, 1), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0a1\0a2\0f\0tag"),
+ },
+},
+{
+ .descr = "dedup: func/func_param tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */
+ /* void f(int a1, int a2) */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [4] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 4), /* [5] */
+ /* tag -> f: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [7] */
+ /* tag -> f/a2: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [9] */
+ /* tag -> f: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 5, -1), /* [10] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 5, -1), /* [11] */
+ /* tag -> f/a2: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 5, 1), /* [12] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 5, 1), /* [13] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_FUNC_PROTO_ENC(0, 2), /* [2] */
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(1), 1),
+ BTF_FUNC_PROTO_ARG_ENC(NAME_NTH(2), 1),
+ BTF_FUNC_ENC(NAME_NTH(3), 2), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 3, 1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [9] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0a1\0a2\0f\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: struct/struct_member tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ /* tag -> t: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [5] */
+ /* tag -> t/m2: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */
+ /* tag -> t: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [8] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, -1), /* [9] */
+ /* tag -> t/m2: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, 1), /* [10] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 3, 1), /* [11] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [2] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 1, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 1, 32),
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 2, -1), /* [5] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, 1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(5), 2, 1), /* [7] */
+ BTF_DECL_TAG_ENC(NAME_NTH(6), 2, 1), /* [8] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0m1\0m2\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: typedef tags",
+ .input = {
+ .raw_types = {
+ /* int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [3] */
+ /* tag -> t: tag1, tag2 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [5] */
+ /* tag -> t: tag1, tag3 */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 3, -1), /* [6] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 3, -1), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPEDEF_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_DECL_TAG_ENC(NAME_NTH(2), 2, -1), /* [3] */
+ BTF_DECL_TAG_ENC(NAME_NTH(3), 2, -1), /* [4] */
+ BTF_DECL_TAG_ENC(NAME_NTH(4), 2, -1), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0t\0tag1\0tag2\0tag3"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #1",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [8] */
+ BTF_PTR_ENC(8), /* [9] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> int */
+ BTF_PTR_ENC(2), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #2",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #3",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag2 -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 2), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ /* ptr -> tag1 -> tag2 -> int */
+ BTF_TYPE_TAG_ENC(NAME_NTH(2), 1), /* [5] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 5), /* [6] */
+ BTF_PTR_ENC(6), /* [7] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0tag2"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #4",
+ .input = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+ .expect = {
+ .raw_types = {
+ /* ptr -> tag1 -> int */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_PTR_ENC(2), /* [3] */
+ /* ptr -> tag1 -> long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [4] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 4), /* [5] */
+ BTF_PTR_ENC(5), /* [6] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1"),
+ },
+},
+{
+ .descr = "dedup: btf_type_tag #5, struct",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [4] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [5] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 4, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
+ BTF_TYPE_TAG_ENC(NAME_NTH(1), 1), /* [2] */
+ BTF_TYPE_ENC(NAME_NTH(2), BTF_INFO_ENC(BTF_KIND_STRUCT, 1, 1), 4), /* [3] */
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, BTF_MEMBER_OFFSET(0, 0)),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0tag1\0t\0m"),
+ },
+},
+{
+ .descr = "dedup: enum64, standalone",
+ .input = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum64, fwd resolution",
+ .input = {
+ .raw_types = {
+ /* [1] fwd enum64 'e1' before full enum */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [2] full enum64 'e1' after fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [3] full enum64 'e2' before fwd */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [4] fwd enum64 'e2' after full enum */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [5] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] full enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 123),
+ /* [2] full enum64 'e2' */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(4), 0, 456),
+ /* [3] incompatible full enum64 with different value */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 0, 321),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0e2\0e2_val"),
+ },
+},
+{
+ .descr = "dedup: enum and enum64, no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 4),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum of different size: no dedup",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 2),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val"),
+ },
+},
+{
+ .descr = "dedup: enum fwd to enum64",
+ .input = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] enum 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 0), 4),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum64 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 1), 8),
+ BTF_ENUM64_ENC(NAME_NTH(2), 1, 0),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: enum64 fwd to enum",
+ .input = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] enum64 'e1' fwd */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8),
+ /* [3] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 2),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+ .expect = {
+ .raw_types = {
+ /* [1] enum 'e1' */
+ BTF_TYPE_ENC(NAME_NTH(1), BTF_INFO_ENC(BTF_KIND_ENUM, 0, 1), 4),
+ BTF_ENUM_ENC(NAME_NTH(2), 1),
+ /* [2] typedef enum 'e1' td */
+ BTF_TYPE_ENC(NAME_NTH(3), BTF_INFO_ENC(BTF_KIND_TYPEDEF, 0, 0), 1),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0e1\0e1_val\0td"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration struct",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration union",
+ /*
+ * Verify that CU1:foo and CU2:foo would be unified and that
+ * typedef/ptr would be updated to point to CU1:foo.
+ * Same as "dedup: standalone fwd declaration struct" but for unions.
+ *
+ * // CU 1:
+ * union foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_TBD, 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ BTF_UNION_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ BTF_PTR_ENC(1), /* [3] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 3), /* [4] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration wrong kind",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - CU1:foo is a struct, C2:foo is a union, thus CU2:foo is not deduped;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * union foo;
+ * typedef union foo *foo_ptr;
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(3), 1), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(3), 4), /* [5] */
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0foo_ptr"),
+ },
+},
+{
+ .descr = "dedup: standalone fwd declaration name conflict",
+ /*
+ * Negative test for btf_dedup_resolve_fwds:
+ * - two candidates for CU2:foo dedup, thus it is unchanged;
+ * - typedef/ptr should remain unchanged as well.
+ *
+ * // CU 1:
+ * struct foo { int x; };
+ *
+ * // CU 2:
+ * struct foo;
+ * typedef struct foo *foo_ptr;
+ *
+ * // CU 3:
+ * struct foo { int x; int y; };
+ */
+ .input = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
+ },
+ .expect = {
+ .raw_types = {
+ /* CU 1 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 1, 4), /* [1] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [2] */
+ /* CU 2 */
+ BTF_FWD_ENC(NAME_NTH(1), 0), /* [3] */
+ BTF_PTR_ENC(3), /* [4] */
+ BTF_TYPEDEF_ENC(NAME_NTH(4), 4), /* [5] */
+ /* CU 3 */
+ BTF_STRUCT_ENC(NAME_NTH(1), 2, 8), /* [6] */
+ BTF_MEMBER_ENC(NAME_NTH(2), 2, 0),
+ BTF_MEMBER_ENC(NAME_NTH(3), 2, 0),
+ BTF_END_RAW,
+ },
+ BTF_STR_SEC("\0foo\0x\0y\0foo_ptr"),
},
},
-
};
static int btf_type_size(const struct btf_type *t)
@@ -6720,11 +7989,15 @@ static int btf_type_size(const struct btf_type *t)
case BTF_KIND_PTR:
case BTF_KIND_TYPEDEF:
case BTF_KIND_FUNC:
+ case BTF_KIND_FLOAT:
+ case BTF_KIND_TYPE_TAG:
return base_size;
case BTF_KIND_INT:
return base_size + sizeof(__u32);
case BTF_KIND_ENUM:
return base_size + vlen * sizeof(struct btf_enum);
+ case BTF_KIND_ENUM64:
+ return base_size + vlen * sizeof(struct btf_enum64);
case BTF_KIND_ARRAY:
return base_size + sizeof(struct btf_array);
case BTF_KIND_STRUCT:
@@ -6736,6 +8009,8 @@ static int btf_type_size(const struct btf_type *t)
return base_size + sizeof(struct btf_var);
case BTF_KIND_DATASEC:
return base_size + vlen * sizeof(struct btf_var_secinfo);
+ case BTF_KIND_DECL_TAG:
+ return base_size + sizeof(struct btf_decl_tag);
default:
fprintf(stderr, "Unsupported BTF_KIND:%u\n", kind);
return -EINVAL;
@@ -6754,32 +8029,34 @@ static void dump_btf_strings(const char *strs, __u32 len)
}
}
-static int do_test_dedup(unsigned int test_num)
+static void do_test_dedup(unsigned int test_num)
{
- const struct btf_dedup_test *test = &dedup_tests[test_num - 1];
+ struct btf_dedup_test *test = &dedup_tests[test_num - 1];
__u32 test_nr_types, expect_nr_types, test_btf_size, expect_btf_size;
const struct btf_header *test_hdr, *expect_hdr;
struct btf *test_btf = NULL, *expect_btf = NULL;
const void *test_btf_data, *expect_btf_data;
const char *ret_test_next_str, *ret_expect_next_str;
const char *test_strs, *expect_strs;
- const char *test_str_cur, *test_str_end;
+ const char *test_str_cur;
const char *expect_str_cur, *expect_str_end;
unsigned int raw_btf_size;
void *raw_btf;
int err = 0, i;
- fprintf(stderr, "BTF dedup test[%u] (%s):", test_num, test->descr);
+ if (!test__start_subtest(test->descr))
+ return;
raw_btf = btf_raw_create(&hdr_tmpl, test->input.raw_types,
test->input.str_sec, test->input.str_sec_size,
&raw_btf_size, &ret_test_next_str);
if (!raw_btf)
- return -1;
+ return;
+
test_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(test_btf);
free(raw_btf);
- if (CHECK(IS_ERR(test_btf), "invalid test_btf errno:%ld",
- PTR_ERR(test_btf))) {
+ if (CHECK(err, "invalid test_btf errno:%d", err)) {
err = -1;
goto done;
}
@@ -6789,23 +8066,24 @@ static int do_test_dedup(unsigned int test_num)
test->expect.str_sec_size,
&raw_btf_size, &ret_expect_next_str);
if (!raw_btf)
- return -1;
+ return;
expect_btf = btf__new((__u8 *)raw_btf, raw_btf_size);
+ err = libbpf_get_error(expect_btf);
free(raw_btf);
- if (CHECK(IS_ERR(expect_btf), "invalid expect_btf errno:%ld",
- PTR_ERR(expect_btf))) {
+ if (CHECK(err, "invalid expect_btf errno:%d", err)) {
err = -1;
goto done;
}
- err = btf__dedup(test_btf, NULL, &test->opts);
+ test->opts.sz = sizeof(test->opts);
+ err = btf__dedup(test_btf, &test->opts);
if (CHECK(err, "btf_dedup failed errno:%d", err)) {
err = -1;
goto done;
}
- test_btf_data = btf__get_raw_data(test_btf, &test_btf_size);
- expect_btf_data = btf__get_raw_data(expect_btf, &expect_btf_size);
+ test_btf_data = btf__raw_data(test_btf, &test_btf_size);
+ expect_btf_data = btf__raw_data(expect_btf, &expect_btf_size);
if (CHECK(test_btf_size != expect_btf_size,
"test_btf_size:%u != expect_btf_size:%u",
test_btf_size, expect_btf_size)) {
@@ -6828,12 +8106,18 @@ static int do_test_dedup(unsigned int test_num)
goto done;
}
- test_str_cur = test_strs;
- test_str_end = test_strs + test_hdr->str_len;
expect_str_cur = expect_strs;
expect_str_end = expect_strs + expect_hdr->str_len;
- while (test_str_cur < test_str_end && expect_str_cur < expect_str_end) {
+ while (expect_str_cur < expect_str_end) {
size_t test_len, expect_len;
+ int off;
+
+ off = btf__find_str(test_btf, expect_str_cur);
+ if (CHECK(off < 0, "exp str '%s' not found: %d\n", expect_str_cur, off)) {
+ err = -1;
+ goto done;
+ }
+ test_str_cur = btf__str_by_offset(test_btf, off);
test_len = strlen(test_str_cur);
expect_len = strlen(expect_str_cur);
@@ -6850,18 +8134,11 @@ static int do_test_dedup(unsigned int test_num)
err = -1;
goto done;
}
- test_str_cur += test_len + 1;
expect_str_cur += expect_len + 1;
}
- if (CHECK(test_str_cur != test_str_end,
- "test_str_cur:%p != test_str_end:%p",
- test_str_cur, test_str_end)) {
- err = -1;
- goto done;
- }
- test_nr_types = btf__get_nr_types(test_btf);
- expect_nr_types = btf__get_nr_types(expect_btf);
+ test_nr_types = btf__type_cnt(test_btf);
+ expect_nr_types = btf__type_cnt(expect_btf);
if (CHECK(test_nr_types != expect_nr_types,
"test_nr_types:%u != expect_nr_types:%u",
test_nr_types, expect_nr_types)) {
@@ -6869,7 +8146,7 @@ static int do_test_dedup(unsigned int test_num)
goto done;
}
- for (i = 1; i <= test_nr_types; i++) {
+ for (i = 1; i < test_nr_types; i++) {
const struct btf_type *test_type, *expect_type;
int test_size, expect_size;
@@ -6884,184 +8161,46 @@ static int do_test_dedup(unsigned int test_num)
err = -1;
goto done;
}
- if (CHECK(memcmp((void *)test_type,
- (void *)expect_type,
- test_size),
- "type #%d: contents differ", i)) {
+ if (CHECK(btf_kind(test_type) != btf_kind(expect_type),
+ "type %d kind: exp %d != got %u\n",
+ i, btf_kind(expect_type), btf_kind(test_type))) {
+ err = -1;
+ goto done;
+ }
+ if (CHECK(test_type->info != expect_type->info,
+ "type %d info: exp %d != got %u\n",
+ i, expect_type->info, test_type->info)) {
+ err = -1;
+ goto done;
+ }
+ if (CHECK(test_type->size != expect_type->size,
+ "type %d size/type: exp %d != got %u\n",
+ i, expect_type->size, test_type->size)) {
err = -1;
goto done;
}
}
done:
- if (!err)
- fprintf(stderr, "OK");
- if (!IS_ERR(test_btf))
- btf__free(test_btf);
- if (!IS_ERR(expect_btf))
- btf__free(expect_btf);
-
- return err;
+ btf__free(test_btf);
+ btf__free(expect_btf);
}
-static int test_dedup(void)
+void test_btf(void)
{
- unsigned int i;
- int err = 0;
+ int i;
- if (args.dedup_test_num)
- return count_result(do_test_dedup(args.dedup_test_num));
+ always_log = env.verbosity > VERBOSE_NONE;
+ for (i = 1; i <= ARRAY_SIZE(raw_tests); i++)
+ do_test_raw(i);
+ for (i = 1; i <= ARRAY_SIZE(get_info_tests); i++)
+ do_test_get_info(i);
+ for (i = 1; i <= ARRAY_SIZE(file_tests); i++)
+ do_test_file(i);
+ for (i = 1; i <= ARRAY_SIZE(info_raw_tests); i++)
+ do_test_info_raw(i);
for (i = 1; i <= ARRAY_SIZE(dedup_tests); i++)
- err |= count_result(do_test_dedup(i));
-
- return err;
-}
-
-static void usage(const char *cmd)
-{
- fprintf(stderr, "Usage: %s [-l] [[-r btf_raw_test_num (1 - %zu)] |\n"
- "\t[-g btf_get_info_test_num (1 - %zu)] |\n"
- "\t[-f btf_file_test_num (1 - %zu)] |\n"
- "\t[-k btf_prog_info_raw_test_num (1 - %zu)] |\n"
- "\t[-p (pretty print test)] |\n"
- "\t[-d btf_dedup_test_num (1 - %zu)]]\n",
- cmd, ARRAY_SIZE(raw_tests), ARRAY_SIZE(get_info_tests),
- ARRAY_SIZE(file_tests), ARRAY_SIZE(info_raw_tests),
- ARRAY_SIZE(dedup_tests));
-}
-
-static int parse_args(int argc, char **argv)
-{
- const char *optstr = "hlpk:f:r:g:d:";
- int opt;
-
- while ((opt = getopt(argc, argv, optstr)) != -1) {
- switch (opt) {
- case 'l':
- args.always_log = true;
- break;
- case 'f':
- args.file_test_num = atoi(optarg);
- args.file_test = true;
- break;
- case 'r':
- args.raw_test_num = atoi(optarg);
- args.raw_test = true;
- break;
- case 'g':
- args.get_info_test_num = atoi(optarg);
- args.get_info_test = true;
- break;
- case 'p':
- args.pprint_test = true;
- break;
- case 'k':
- args.info_raw_test_num = atoi(optarg);
- args.info_raw_test = true;
- break;
- case 'd':
- args.dedup_test_num = atoi(optarg);
- args.dedup_test = true;
- break;
- case 'h':
- usage(argv[0]);
- exit(0);
- default:
- usage(argv[0]);
- return -1;
- }
- }
-
- if (args.raw_test_num &&
- (args.raw_test_num < 1 ||
- args.raw_test_num > ARRAY_SIZE(raw_tests))) {
- fprintf(stderr, "BTF raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(raw_tests));
- return -1;
- }
-
- if (args.file_test_num &&
- (args.file_test_num < 1 ||
- args.file_test_num > ARRAY_SIZE(file_tests))) {
- fprintf(stderr, "BTF file test number must be [1 - %zu]\n",
- ARRAY_SIZE(file_tests));
- return -1;
- }
-
- if (args.get_info_test_num &&
- (args.get_info_test_num < 1 ||
- args.get_info_test_num > ARRAY_SIZE(get_info_tests))) {
- fprintf(stderr, "BTF get info test number must be [1 - %zu]\n",
- ARRAY_SIZE(get_info_tests));
- return -1;
- }
-
- if (args.info_raw_test_num &&
- (args.info_raw_test_num < 1 ||
- args.info_raw_test_num > ARRAY_SIZE(info_raw_tests))) {
- fprintf(stderr, "BTF prog info raw test number must be [1 - %zu]\n",
- ARRAY_SIZE(info_raw_tests));
- return -1;
- }
-
- if (args.dedup_test_num &&
- (args.dedup_test_num < 1 ||
- args.dedup_test_num > ARRAY_SIZE(dedup_tests))) {
- fprintf(stderr, "BTF dedup test number must be [1 - %zu]\n",
- ARRAY_SIZE(dedup_tests));
- return -1;
- }
-
- return 0;
-}
-
-static void print_summary(void)
-{
- fprintf(stderr, "PASS:%u SKIP:%u FAIL:%u\n",
- pass_cnt - skip_cnt, skip_cnt, error_cnt);
-}
-
-int main(int argc, char **argv)
-{
- int err = 0;
-
- err = parse_args(argc, argv);
- if (err)
- return err;
-
- if (args.always_log)
- libbpf_set_print(__base_pr);
-
- if (args.raw_test)
- err |= test_raw();
-
- if (args.get_info_test)
- err |= test_get_info();
-
- if (args.file_test)
- err |= test_file();
-
- if (args.pprint_test)
- err |= test_pprint();
-
- if (args.info_raw_test)
- err |= test_info_raw();
-
- if (args.dedup_test)
- err |= test_dedup();
-
- if (args.raw_test || args.get_info_test || args.file_test ||
- args.pprint_test || args.info_raw_test || args.dedup_test)
- goto done;
-
- err |= test_raw();
- err |= test_get_info();
- err |= test_file();
- err |= test_info_raw();
- err |= test_dedup();
-
-done:
- print_summary();
- return err;
+ do_test_dedup(i);
+ test_pprint();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
new file mode 100644
index 000000000000..d9024c7a892a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dedup_split.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void test_split_simple() {
+ const struct btf_type *t;
+ struct btf *btf1, *btf2;
+ int str_off, err;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 1); /* [2] ptr to int */
+ btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ ASSERT_STREQ(btf_type_c_dump(btf1), "\
+struct s1 {\n\
+ int f1;\n\
+};\n\n", "c_dump");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ /* pointer size should be "inherited" from main BTF */
+ ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+ str_off = btf__find_str(btf2, "int");
+ ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+ t = btf__type_by_id(btf2, 1);
+ if (!ASSERT_OK_PTR(t, "int_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "int_kind");
+ ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+ btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */
+ btf__add_field(btf2, "f1", 6, 0, 0); /* struct s1 f1; */
+ btf__add_field(btf2, "f2", 5, 32, 0); /* int f2; */
+ btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */
+ /* } */
+
+ /* duplicated int */
+ btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [5] int */
+
+ /* duplicated struct s1 */
+ btf__add_struct(btf2, "s1", 4); /* [6] struct s1 { */
+ btf__add_field(btf2, "f1", 5, 0, 0); /* int f1; */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=16 vlen=3\n"
+ "\t'f1' type_id=6 bits_offset=0\n"
+ "\t'f2' type_id=5 bits_offset=32\n"
+ "\t'f3' type_id=2 bits_offset=64",
+ "[5] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[6] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=5 bits_offset=0");
+
+ ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+ int f1;\n\
+};\n\
+\n\
+struct s1___2 {\n\
+ int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+ struct s1___2 f1;\n\
+ int f2;\n\
+ int *f3;\n\
+};\n\n", "c_dump");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] STRUCT 's1' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[4] STRUCT 's2' size=16 vlen=3\n"
+ "\t'f1' type_id=3 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32\n"
+ "\t'f3' type_id=2 bits_offset=64");
+
+ ASSERT_STREQ(btf_type_c_dump(btf2), "\
+struct s1 {\n\
+ int f1;\n\
+};\n\
+\n\
+struct s2 {\n\
+ struct s1 f1;\n\
+ int f2;\n\
+ int *f3;\n\
+};\n\n", "c_dump");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+static void test_split_fwd_resolve() {
+ struct btf *btf1, *btf2;
+ int err;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 4); /* [2] ptr to struct s1 */
+ btf__add_ptr(btf1, 5); /* [3] ptr to struct s2 */
+ btf__add_struct(btf1, "s1", 16); /* [4] struct s1 { */
+ btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf1, "f2", 3, 64, 0); /* struct s2 *f2; */
+ /* } */
+ btf__add_struct(btf1, "s2", 4); /* [5] struct s2 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+ /* keep this not a part of type the graph to test btf_dedup_resolve_fwds */
+ btf__add_struct(btf1, "s3", 4); /* [6] struct s3 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=4",
+ "[3] PTR '(anon)' type_id=5",
+ "[4] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=64",
+ "[5] STRUCT 's2' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [7] int */
+ btf__add_ptr(btf2, 11); /* [8] ptr to struct s1 */
+ btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [9] fwd for struct s2 */
+ btf__add_ptr(btf2, 9); /* [10] ptr to fwd struct s2 */
+ btf__add_struct(btf2, "s1", 16); /* [11] struct s1 { */
+ btf__add_field(btf2, "f1", 8, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf2, "f2", 10, 64, 0); /* struct s2 *f2; */
+ /* } */
+ btf__add_fwd(btf2, "s3", BTF_FWD_STRUCT); /* [12] fwd for struct s3 */
+ btf__add_ptr(btf2, 12); /* [13] ptr to struct s1 */
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=4",
+ "[3] PTR '(anon)' type_id=5",
+ "[4] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=64",
+ "[5] STRUCT 's2' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[8] PTR '(anon)' type_id=11",
+ "[9] FWD 's2' fwd_kind=struct",
+ "[10] PTR '(anon)' type_id=9",
+ "[11] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=8 bits_offset=0\n"
+ "\t'f2' type_id=10 bits_offset=64",
+ "[12] FWD 's3' fwd_kind=struct",
+ "[13] PTR '(anon)' type_id=12");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=4",
+ "[3] PTR '(anon)' type_id=5",
+ "[4] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=3 bits_offset=64",
+ "[5] STRUCT 's2' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[6] STRUCT 's3' size=4 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0",
+ "[7] PTR '(anon)' type_id=6");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+static void test_split_struct_duped() {
+ struct btf *btf1, *btf2;
+ int err;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 5); /* [2] ptr to struct s1 */
+ btf__add_fwd(btf1, "s2", BTF_FWD_STRUCT); /* [3] fwd for struct s2 */
+ btf__add_ptr(btf1, 3); /* [4] ptr to fwd struct s2 */
+ btf__add_struct(btf1, "s1", 16); /* [5] struct s1 { */
+ btf__add_field(btf1, "f1", 2, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf1, "f2", 4, 64, 0); /* struct s2 *f2; */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=5",
+ "[3] FWD 's2' fwd_kind=struct",
+ "[4] PTR '(anon)' type_id=3",
+ "[5] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=64");
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf__add_int(btf2, "int", 4, BTF_INT_SIGNED); /* [6] int */
+ btf__add_ptr(btf2, 10); /* [7] ptr to struct s1 */
+ btf__add_fwd(btf2, "s2", BTF_FWD_STRUCT); /* [8] fwd for struct s2 */
+ btf__add_ptr(btf2, 11); /* [9] ptr to struct s2 */
+ btf__add_struct(btf2, "s1", 16); /* [10] struct s1 { */
+ btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */
+ /* } */
+ btf__add_struct(btf2, "s2", 40); /* [11] struct s2 { */
+ btf__add_field(btf2, "f1", 7, 0, 0); /* struct s1 *f1; */
+ btf__add_field(btf2, "f2", 9, 64, 0); /* struct s2 *f2; */
+ btf__add_field(btf2, "f3", 6, 128, 0); /* int f3; */
+ btf__add_field(btf2, "f4", 10, 192, 0); /* struct s1 f4; */
+ /* } */
+ btf__add_ptr(btf2, 8); /* [12] ptr to fwd struct s2 */
+ btf__add_struct(btf2, "s3", 8); /* [13] struct s3 { */
+ btf__add_field(btf2, "f1", 12, 0, 0); /* struct s2 *f1; (fwd) */
+ /* } */
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=5",
+ "[3] FWD 's2' fwd_kind=struct",
+ "[4] PTR '(anon)' type_id=3",
+ "[5] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=64",
+ "[6] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[7] PTR '(anon)' type_id=10",
+ "[8] FWD 's2' fwd_kind=struct",
+ "[9] PTR '(anon)' type_id=11",
+ "[10] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=7 bits_offset=0\n"
+ "\t'f2' type_id=9 bits_offset=64",
+ "[11] STRUCT 's2' size=40 vlen=4\n"
+ "\t'f1' type_id=7 bits_offset=0\n"
+ "\t'f2' type_id=9 bits_offset=64\n"
+ "\t'f3' type_id=6 bits_offset=128\n"
+ "\t'f4' type_id=10 bits_offset=192",
+ "[12] PTR '(anon)' type_id=8",
+ "[13] STRUCT 's3' size=8 vlen=1\n"
+ "\t'f1' type_id=12 bits_offset=0");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=5",
+ "[3] FWD 's2' fwd_kind=struct",
+ "[4] PTR '(anon)' type_id=3",
+ "[5] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=2 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=64",
+ "[6] PTR '(anon)' type_id=8",
+ "[7] PTR '(anon)' type_id=9",
+ "[8] STRUCT 's1' size=16 vlen=2\n"
+ "\t'f1' type_id=6 bits_offset=0\n"
+ "\t'f2' type_id=7 bits_offset=64",
+ "[9] STRUCT 's2' size=40 vlen=4\n"
+ "\t'f1' type_id=6 bits_offset=0\n"
+ "\t'f2' type_id=7 bits_offset=64\n"
+ "\t'f3' type_id=1 bits_offset=128\n"
+ "\t'f4' type_id=8 bits_offset=192",
+ "[10] STRUCT 's3' size=8 vlen=1\n"
+ "\t'f1' type_id=7 bits_offset=0");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+static void btf_add_dup_struct_in_cu(struct btf *btf, int start_id)
+{
+#define ID(n) (start_id + n)
+ btf__set_pointer_size(btf, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf, "int", 4, BTF_INT_SIGNED); /* [1] int */
+
+ btf__add_struct(btf, "s", 8); /* [2] struct s { */
+ btf__add_field(btf, "a", ID(3), 0, 0); /* struct anon a; */
+ btf__add_field(btf, "b", ID(4), 0, 0); /* struct anon b; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [3] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+
+ btf__add_struct(btf, "(anon)", 8); /* [4] struct anon { */
+ btf__add_field(btf, "f1", ID(1), 0, 0); /* int f1; */
+ btf__add_field(btf, "f2", ID(1), 32, 0); /* int f2; */
+ /* } */
+#undef ID
+}
+
+static void test_split_dup_struct_in_cu()
+{
+ struct btf *btf1, *btf2 = NULL;
+ int err;
+
+ /* generate the base data.. */
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf_add_dup_struct_in_cu(btf1, 0);
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=4 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* ..dedup them... */
+ err = btf__dedup(btf1, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+ /* and add the same data on top of it */
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ btf_add_dup_struct_in_cu(btf2, 3);
+
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32",
+ "[4] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[5] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=6 bits_offset=0\n"
+ "\t'b' type_id=7 bits_offset=0",
+ "[6] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32",
+ "[7] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=4 bits_offset=0\n"
+ "\t'f2' type_id=4 bits_offset=32");
+
+ err = btf__dedup(btf2, NULL);
+ if (!ASSERT_OK(err, "btf_dedup"))
+ goto cleanup;
+
+ /* after dedup it should match the original data */
+ VALIDATE_RAW_BTF(
+ btf2,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] STRUCT 's' size=8 vlen=2\n"
+ "\t'a' type_id=3 bits_offset=0\n"
+ "\t'b' type_id=3 bits_offset=0",
+ "[3] STRUCT '(anon)' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32");
+
+cleanup:
+ btf__free(btf2);
+ btf__free(btf1);
+}
+
+void test_btf_dedup_split()
+{
+ if (test__start_subtest("split_simple"))
+ test_split_simple();
+ if (test__start_subtest("split_struct_duped"))
+ test_split_struct_duped();
+ if (test__start_subtest("split_fwd_resolve"))
+ test_split_fwd_resolve();
+ if (test__start_subtest("split_dup_struct_in_cu"))
+ test_split_dup_struct_in_cu();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_dump.c b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
index cb33a7ee4e04..e9ea38aa8248 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_dump.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_dump.c
@@ -12,29 +12,29 @@ void btf_dump_printf(void *ctx, const char *fmt, va_list args)
static struct btf_dump_test_case {
const char *name;
const char *file;
- struct btf_dump_opts opts;
+ bool known_ptr_sz;
} btf_dump_test_cases[] = {
- {"btf_dump: syntax", "btf_dump_test_case_syntax", {}},
- {"btf_dump: ordering", "btf_dump_test_case_ordering", {}},
- {"btf_dump: padding", "btf_dump_test_case_padding", {}},
- {"btf_dump: packing", "btf_dump_test_case_packing", {}},
- {"btf_dump: bitfields", "btf_dump_test_case_bitfields", {}},
- {"btf_dump: multidim", "btf_dump_test_case_multidim", {}},
- {"btf_dump: namespacing", "btf_dump_test_case_namespacing", {}},
+ {"btf_dump: syntax", "btf_dump_test_case_syntax", true},
+ {"btf_dump: ordering", "btf_dump_test_case_ordering", false},
+ {"btf_dump: padding", "btf_dump_test_case_padding", true},
+ {"btf_dump: packing", "btf_dump_test_case_packing", true},
+ {"btf_dump: bitfields", "btf_dump_test_case_bitfields", true},
+ {"btf_dump: multidim", "btf_dump_test_case_multidim", false},
+ {"btf_dump: namespacing", "btf_dump_test_case_namespacing", false},
};
-static int btf_dump_all_types(const struct btf *btf,
- const struct btf_dump_opts *opts)
+static int btf_dump_all_types(const struct btf *btf, void *ctx)
{
- size_t type_cnt = btf__get_nr_types(btf);
+ size_t type_cnt = btf__type_cnt(btf);
struct btf_dump *d;
int err = 0, id;
- d = btf_dump__new(btf, NULL, opts, btf_dump_printf);
- if (IS_ERR(d))
- return PTR_ERR(d);
+ d = btf_dump__new(btf, btf_dump_printf, ctx, NULL);
+ err = libbpf_get_error(d);
+ if (err)
+ return err;
- for (id = 1; id <= type_cnt; id++) {
+ for (id = 1; id < type_cnt; id++) {
err = btf_dump__dump_type(d, id);
if (err)
goto done;
@@ -52,19 +52,30 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
int err = 0, fd = -1;
FILE *f = NULL;
- snprintf(test_file, sizeof(test_file), "%s.o", t->file);
+ snprintf(test_file, sizeof(test_file), "%s.bpf.o", t->file);
btf = btf__parse_elf(test_file, NULL);
- if (CHECK(IS_ERR(btf), "btf_parse_elf",
- "failed to load test BTF: %ld\n", PTR_ERR(btf))) {
+ if (!ASSERT_OK_PTR(btf, "btf_parse_elf")) {
err = -PTR_ERR(btf);
btf = NULL;
goto done;
}
+ /* tests with t->known_ptr_sz have no "long" or "unsigned long" type,
+ * so it's impossible to determine correct pointer size; but if they
+ * do, it should be 8 regardless of host architecture, becaues BPF
+ * target is always 64-bit
+ */
+ if (!t->known_ptr_sz) {
+ btf__set_pointer_size(btf, 8);
+ } else {
+ CHECK(btf__pointer_size(btf) != 8, "ptr_sz", "exp %d, got %zu\n",
+ 8, btf__pointer_size(btf));
+ }
+
snprintf(out_file, sizeof(out_file), "/tmp/%s.output.XXXXXX", t->file);
fd = mkstemp(out_file);
- if (CHECK(fd < 0, "create_tmp", "failed to create file: %d\n", fd)) {
+ if (!ASSERT_GE(fd, 0, "create_tmp")) {
err = fd;
goto done;
}
@@ -75,8 +86,7 @@ static int test_btf_dump_case(int n, struct btf_dump_test_case *t)
goto done;
}
- t->opts.ctx = f;
- err = btf_dump_all_types(btf, &t->opts);
+ err = btf_dump_all_types(btf, f);
fclose(f);
close(fd);
if (CHECK(err, "btf_dump", "failure during C dumping: %d\n", err)) {
@@ -116,7 +126,741 @@ done:
return err;
}
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+static void test_btf_dump_incremental(void)
+{
+ struct btf *btf = NULL;
+ struct btf_dump *d = NULL;
+ int id, err, i;
+
+ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+ if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+ return;
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
+ goto err_out;
+ d = btf_dump__new(btf, btf_dump_printf, dump_buf_file, NULL);
+ if (!ASSERT_OK(libbpf_get_error(d), "btf_dump__new"))
+ goto err_out;
+
+ /* First, generate BTF corresponding to the following C code:
+ *
+ * enum x;
+ *
+ * enum x { X = 1 };
+ *
+ * enum { Y = 1 };
+ *
+ * struct s;
+ *
+ * struct s { int x; };
+ *
+ */
+ id = btf__add_enum(btf, "x", 4);
+ ASSERT_EQ(id, 1, "enum_declaration_id");
+ id = btf__add_enum(btf, "x", 4);
+ ASSERT_EQ(id, 2, "named_enum_id");
+ err = btf__add_enum_value(btf, "X", 1);
+ ASSERT_OK(err, "named_enum_val_ok");
+
+ id = btf__add_enum(btf, NULL, 4);
+ ASSERT_EQ(id, 3, "anon_enum_id");
+ err = btf__add_enum_value(btf, "Y", 1);
+ ASSERT_OK(err, "anon_enum_val_ok");
+
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 4, "int_id");
+
+ id = btf__add_fwd(btf, "s", BTF_FWD_STRUCT);
+ ASSERT_EQ(id, 5, "fwd_id");
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 6, "struct_id");
+ err = btf__add_field(btf, "x", 4, 0, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+
+ ASSERT_STREQ(dump_buf,
+"enum x;\n"
+"\n"
+"enum x {\n"
+" X = 1,\n"
+"};\n"
+"\n"
+"enum {\n"
+" Y = 1,\n"
+"};\n"
+"\n"
+"struct s;\n"
+"\n"
+"struct s {\n"
+" int x;\n"
+"};\n\n", "c_dump1");
+
+ /* Now, after dumping original BTF, append another struct that embeds
+ * anonymous enum. It also has a name conflict with the first struct:
+ *
+ * struct s___2 {
+ * enum { VAL___2 = 1 } x;
+ * struct s s;
+ * };
+ *
+ * This will test that btf_dump'er maintains internal state properly.
+ * Note that VAL___2 enum value. It's because we've already emitted
+ * that enum as a global anonymous enum, so btf_dump will ensure that
+ * enum values don't conflict;
+ *
+ */
+ fseek(dump_buf_file, 0, SEEK_SET);
+
+ id = btf__add_struct(btf, "s", 4);
+ ASSERT_EQ(id, 7, "struct_id");
+ err = btf__add_field(btf, "x", 2, 0, 0);
+ ASSERT_OK(err, "field_ok");
+ err = btf__add_field(btf, "y", 3, 32, 0);
+ ASSERT_OK(err, "field_ok");
+ err = btf__add_field(btf, "s", 6, 64, 0);
+ ASSERT_OK(err, "field_ok");
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"struct s___2 {\n"
+" enum x x;\n"
+" enum {\n"
+" Y___2 = 1,\n"
+" } y;\n"
+" struct s s;\n"
+"};\n\n" , "c_dump1");
+
+err_out:
+ fclose(dump_buf_file);
+ free(dump_buf);
+ btf_dump__free(d);
+ btf__free(btf);
+}
+
+#define STRSIZE 4096
+
+static void btf_dump_snprintf(void *ctx, const char *fmt, va_list args)
+{
+ char *s = ctx, new[STRSIZE];
+
+ vsnprintf(new, STRSIZE, fmt, args);
+ if (strlen(s) < STRSIZE)
+ strncat(s, new, STRSIZE - strlen(s) - 1);
+}
+
+static int btf_dump_data(struct btf *btf, struct btf_dump *d,
+ char *name, char *prefix, __u64 flags, void *ptr,
+ size_t ptr_sz, char *str, const char *expected_val)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ size_t type_sz;
+ __s32 type_id;
+ int ret = 0;
+
+ if (flags & BTF_F_COMPACT)
+ opts.compact = true;
+ if (flags & BTF_F_NONAME)
+ opts.skip_names = true;
+ if (flags & BTF_F_ZERO)
+ opts.emit_zeroes = true;
+ if (prefix) {
+ ASSERT_STRNEQ(name, prefix, strlen(prefix),
+ "verify prefix match");
+ name += strlen(prefix) + 1;
+ }
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GE(type_id, 0, "find type id"))
+ return -ENOENT;
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, ptr, ptr_sz, &opts);
+ if (type_sz <= ptr_sz) {
+ if (!ASSERT_EQ(ret, type_sz, "failed/unexpected type_sz"))
+ return -EINVAL;
+ } else {
+ if (!ASSERT_EQ(ret, -E2BIG, "failed to return -E2BIG"))
+ return -EINVAL;
+ }
+ if (!ASSERT_STREQ(str, expected_val, "ensure expected/actual match"))
+ return -EFAULT;
+ return 0;
+}
+
+#define TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_DUMP_DATA_C(_b, _d, _prefix, _str, _type, _flags, \
+ ...) \
+ TEST_BTF_DUMP_DATA(_b, _d, _prefix, _str, _type, _flags, \
+ "(" #_type ")" #__VA_ARGS__, __VA_ARGS__)
+
+/* overflow test; pass typesize < expected type size, ensure E2BIG returned */
+#define TEST_BTF_DUMP_DATA_OVER(_b, _d, _prefix, _str, _type, _type_sz, \
+ _expected, ...) \
+ do { \
+ char __ptrtype[64] = #_type; \
+ char *_ptrtype = (char *)__ptrtype; \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _ptrtype, _prefix, 0, \
+ _ptr, _type_sz, _str, _expected); \
+ } while (0)
+
+#define TEST_BTF_DUMP_VAR(_b, _d, _prefix, _str, _var, _type, _flags, \
+ _expected, ...) \
+ do { \
+ _type _ptrdata = __VA_ARGS__; \
+ void *_ptr = &_ptrdata; \
+ \
+ (void) btf_dump_data(_b, _d, _var, _prefix, _flags, \
+ _ptr, sizeof(_type), _str, \
+ _expected); \
+ } while (0)
+
+static void test_btf_dump_int_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#ifdef __SIZEOF_INT128__
+ unsigned __int128 i = 0xffffffffffffffff;
+
+ /* this dance is required because we cannot directly initialize
+ * a 128-bit value to anything larger than a 64-bit value.
+ */
+ i = (i << 64) | (i - 1);
+#endif
+ /* simple int */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "1234", 1234);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)1234", 1234);
+
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT, "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_ZERO,
+ "(int)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, int, BTF_F_COMPACT, -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, BTF_F_COMPACT | BTF_F_NONAME,
+ "-4567", -4567);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, int, 0, "(int)-4567", -4567);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, int, sizeof(int)-1, "", 1);
+
+#ifdef __SIZEOF_INT128__
+ /* gcc encode unsigned __int128 type with name "__int128 unsigned" in dwarf,
+ * and clang encode it with name "unsigned __int128" in dwarf.
+ * Do an availability test for either variant before doing actual test.
+ */
+ if (btf__find_by_name(btf, "unsigned __int128") > 0) {
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, unsigned __int128, BTF_F_COMPACT,
+ "(unsigned __int128)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "unsigned __int128", NULL, 0, &i, 16, str,
+ "(unsigned __int128)0xfffffffffffffffffffffffffffffffe"),
+ "dump unsigned __int128");
+ } else if (btf__find_by_name(btf, "__int128 unsigned") > 0) {
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, __int128 unsigned, BTF_F_COMPACT,
+ "(__int128 unsigned)0xffffffffffffffff",
+ 0xffffffffffffffff);
+ ASSERT_OK(btf_dump_data(btf, d, "__int128 unsigned", NULL, 0, &i, 16, str,
+ "(__int128 unsigned)0xfffffffffffffffffffffffffffffffe"),
+ "dump unsigned __int128");
+ } else {
+ ASSERT_TRUE(false, "unsigned_int128_not_found");
+ }
+#endif
+}
+
+static void test_btf_dump_float_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ float t1 = 1.234567;
+ float t2 = -1.234567;
+ float t3 = 0.0;
+ double t4 = 5.678912;
+ double t5 = -5.678912;
+ double t6 = 0.0;
+ long double t7 = 9.876543;
+ long double t8 = -9.876543;
+ long double t9 = 0.0;
+
+ /* since the kernel does not likely have any float types in its BTF, we
+ * will need to add some of various sizes.
+ */
+
+ ASSERT_GT(btf__add_float(btf, "test_float", 4), 0, "add float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t1, 4, str,
+ "(test_float)1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t2, 4, str,
+ "(test_float)-1.234567"), "dump float");
+ ASSERT_OK(btf_dump_data(btf, d, "test_float", NULL, 0, &t3, 4, str,
+ "(test_float)0.000000"), "dump float");
+
+ ASSERT_GT(btf__add_float(btf, "test_double", 8), 0, "add_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t4, 8, str,
+ "(test_double)5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t5, 8, str,
+ "(test_double)-5.678912"), "dump double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_double", NULL, 0, &t6, 8, str,
+ "(test_double)0.000000"), "dump double");
+
+ ASSERT_GT(btf__add_float(btf, "test_long_double", 16), 0, "add long double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t7, 16,
+ str, "(test_long_double)9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t8, 16,
+ str, "(test_long_double)-9.876543"),
+ "dump long_double");
+ ASSERT_OK(btf_dump_data(btf, d, "test_long_double", NULL, 0, &t9, 16,
+ str, "(test_long_double)0.000000"),
+ "dump long_double");
+}
+
+static void test_btf_dump_char_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple char */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, char, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "100", 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_ZERO,
+ "(char)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, char, 0, "(char)0", 0);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, char, sizeof(char)-1, "", 100);
+}
+
+static void test_btf_dump_typedef_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* simple typedef */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, uint64_t, BTF_F_COMPACT, 100);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "1", 1);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT, "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_NONAME,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, BTF_F_COMPACT | BTF_F_ZERO,
+ "(u64)0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "0", 0);
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, u64, 0, "(u64)0", 0);
+
+ /* typedef struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, NULL, str, atomic_t, BTF_F_COMPACT,
+ {.counter = (int)1,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,}", { .counter = 1 });
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+" .counter = (int)1,\n"
+"}",
+ {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT, "(atomic_t){}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_NONAME,
+ "{}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, 0,
+"(atomic_t){\n"
+"}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_COMPACT | BTF_F_ZERO,
+ "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, NULL, str, atomic_t, BTF_F_ZERO,
+"(atomic_t){\n"
+" .counter = (int)0,\n"
+"}",
+ { .counter = 0,});
+
+ /* overflow should show type but not value since it overflows */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, NULL, str, atomic_t, sizeof(atomic_t)-1,
+ "(atomic_t){\n", { .counter = 1});
+}
+
+static void test_btf_dump_enum_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT,
+ "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_DUMP_DATA_C(btf, d, "enum", str, enum bpf_cmd, BTF_F_COMPACT, 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "2000", 2000);
+ TEST_BTF_DUMP_DATA(btf, d, "enum", str, enum bpf_cmd, 0,
+ "(enum bpf_cmd)2000", 2000);
+
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "enum", str, enum bpf_cmd,
+ sizeof(enum bpf_cmd) - 1, "", BPF_MAP_CREATE);
+}
+
+static void test_btf_dump_struct_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ char zero_data[512] = { };
+ char type_data[512];
+ void *fops = type_data;
+ void *skb = type_data;
+ size_t type_sz;
+ __s32 type_id;
+ char *cmpstr;
+ int ret;
+
+ memset(type_data, 255, sizeof(type_data));
+
+ /* simple struct */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+" .name_off = (__u32)3,\n"
+" .val = (__s32)-1,\n"
+"}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME | BTF_F_ZERO,
+ "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, BTF_F_COMPACT,
+ "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum, 0,
+"(struct btf_enum){\n"
+"}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_COMPACT | BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct btf_enum,
+ BTF_F_ZERO,
+"(struct btf_enum){\n"
+" .name_off = (__u32)0,\n"
+" .val = (__s32)0,\n"
+"}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){.next = (struct list_head *)0x1,}",
+ { .next = (struct list_head *)1 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+" .next = (struct list_head *)0x1,\n"
+"}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, BTF_F_COMPACT,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct list_head, 0,
+"(struct list_head){\n"
+"}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with function pointers */
+ type_id = btf__find_by_name(btf, "file_operations");
+ if (ASSERT_GT(type_id, 0, "find type id")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, fops, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping file_operations");
+ cmpstr =
+"(struct file_operations){\n"
+" .owner = (struct module *)0xffffffffffffffff,\n"
+" .llseek = (loff_t (*)(struct file *, loff_t, int))0xffffffffffffffff,";
+
+ ASSERT_STRNEQ(str, cmpstr, strlen(cmpstr), "file_operations");
+ }
+
+ /* struct with char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, 0,
+"(struct bpf_prog_info){\n"
+" .name = (char[16])[\n"
+" 'f',\n"
+" 'o',\n"
+" 'o',\n"
+" ],\n"
+"}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_prog_info, BTF_F_COMPACT,
+ "(struct bpf_prog_info){.name = (char[16])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 1,\n"
+" 2,\n"
+" 3,\n"
+" 4,\n"
+" 5,\n"
+" ],\n"
+"}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, BTF_F_COMPACT,
+ "(struct __sk_buff){.cb = (__u32[5])[0,0,1,0,0,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct __sk_buff, 0,
+"(struct __sk_buff){\n"
+" .cb = (__u32[5])[\n"
+" 0,\n"
+" 0,\n"
+" 1,\n"
+" 0,\n"
+" 0,\n"
+" ],\n"
+"}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_DUMP_DATA_C(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn,
+ BTF_F_COMPACT | BTF_F_NONAME,
+ "{1,0x2,0x3,4,5,}",
+ { .code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, 0,
+"(struct bpf_insn){\n"
+" .code = (__u8)1,\n"
+" .dst_reg = (__u8)0x2,\n"
+" .src_reg = (__u8)0x3,\n"
+" .off = (__s16)4,\n"
+" .imm = (__s32)5,\n"
+"}",
+ {.code = 1, .dst_reg = 2, .src_reg = 3, .off = 4, .imm = 5});
+
+ /* zeroed bitfields should not be displayed */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_insn, BTF_F_COMPACT,
+ "(struct bpf_insn){.dst_reg = (__u8)0x1,}",
+ { .code = 0, .dst_reg = 1});
+
+ /* struct with enum bitfield */
+ type_id = btf__find_by_name(btf, "fs_context");
+ if (ASSERT_GT(type_id, 0, "find fs_context")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ opts.emit_zeroes = true;
+ ret = btf_dump__dump_type_data(d, type_id, zero_data, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping fs_context");
+
+ ASSERT_NEQ(strstr(str, "FS_CONTEXT_FOR_MOUNT"), NULL,
+ "bitfield value not present");
+ }
+
+ /* struct with nested anon union */
+ TEST_BTF_DUMP_DATA(btf, d, "struct", str, struct bpf_sock_ops, BTF_F_COMPACT,
+ "(struct bpf_sock_ops){.op = (__u32)1,(union){.args = (__u32[4])[1,2,3,4,],.reply = (__u32)1,.replylong = (__u32[4])[1,2,3,4,],},}",
+ { .op = 1, .args = { 1, 2, 3, 4}});
+
+ /* union with nested struct */
+ TEST_BTF_DUMP_DATA(btf, d, "union", str, union bpf_iter_link_info, BTF_F_COMPACT,
+ "(union bpf_iter_link_info){.map = (struct){.map_fd = (__u32)1,},.cgroup = (struct){.order = (enum bpf_cgroup_iter_order)BPF_CGROUP_ITER_SELF_ONLY,.cgroup_fd = (__u32)1,},.task = (struct){.tid = (__u32)1,.pid = (__u32)1,},}",
+ { .cgroup = { .order = 1, .cgroup_fd = 1, }});
+
+ /* struct skb with nested structs/unions; because type output is so
+ * complex, we don't do a string comparison, just verify we return
+ * the type size as the amount of data displayed.
+ */
+ type_id = btf__find_by_name(btf, "sk_buff");
+ if (ASSERT_GT(type_id, 0, "find struct sk_buff")) {
+ type_sz = btf__resolve_size(btf, type_id);
+ str[0] = '\0';
+
+ ret = btf_dump__dump_type_data(d, type_id, skb, type_sz, &opts);
+ ASSERT_EQ(ret, type_sz,
+ "unexpected return value dumping sk_buff");
+ }
+
+ /* overflow bpf_sock_ops struct with final element nonzero/zero.
+ * Regardless of the value of the final field, we don't have all the
+ * data we need to display it, so we should trigger an overflow.
+ * In other words overflow checking should trump "is field zero?"
+ * checks because if we've overflowed, it shouldn't matter what the
+ * field is - we can't trust its value so shouldn't display it.
+ */
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_hwtstamp = 2});
+ TEST_BTF_DUMP_DATA_OVER(btf, d, "struct", str, struct bpf_sock_ops,
+ sizeof(struct bpf_sock_ops) - 1,
+ "(struct bpf_sock_ops){\n\t.op = (__u32)1,\n",
+ { .op = 1, .skb_hwtstamp = 0});
+}
+
+static void test_btf_dump_var_data(struct btf *btf, struct btf_dump *d,
+ char *str)
+{
+#if 0
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_number", int, BTF_F_COMPACT,
+ "int cpu_number = (int)100", 100);
+#endif
+ TEST_BTF_DUMP_VAR(btf, d, NULL, str, "cpu_profile_flip", int, BTF_F_COMPACT,
+ "static int cpu_profile_flip = (int)2", 2);
+}
+
+static void test_btf_datasec(struct btf *btf, struct btf_dump *d, char *str,
+ const char *name, const char *expected_val,
+ void *data, size_t data_sz)
+{
+ DECLARE_LIBBPF_OPTS(btf_dump_type_data_opts, opts);
+ int ret = 0, cmp;
+ size_t secsize;
+ __s32 type_id;
+
+ opts.compact = true;
+
+ type_id = btf__find_by_name(btf, name);
+ if (!ASSERT_GT(type_id, 0, "find type id"))
+ return;
+
+ secsize = btf__resolve_size(btf, type_id);
+ ASSERT_EQ(secsize, 0, "verify section size");
+
+ str[0] = '\0';
+ ret = btf_dump__dump_type_data(d, type_id, data, data_sz, &opts);
+ ASSERT_EQ(ret, 0, "unexpected return value");
+
+ cmp = strcmp(str, expected_val);
+ ASSERT_EQ(cmp, 0, "ensure expected/actual match");
+}
+
+static void test_btf_dump_datasec_data(char *str)
+{
+ struct btf *btf;
+ char license[4] = "GPL";
+ struct btf_dump *d;
+
+ btf = btf__parse("xdping_kern.bpf.o", NULL);
+ if (!ASSERT_OK_PTR(btf, "xdping_kern.bpf.o BTF not found"))
+ return;
+
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ goto out;
+
+ test_btf_datasec(btf, d, str, "license",
+ "SEC(\"license\") char[4] _license = (char[4])['G','P','L',];",
+ license, sizeof(license));
+out:
+ btf_dump__free(d);
+ btf__free(btf);
+}
+
void test_btf_dump() {
+ char str[STRSIZE];
+ struct btf_dump *d;
+ struct btf *btf;
int i;
for (i = 0; i < ARRAY_SIZE(btf_dump_test_cases); i++) {
@@ -127,4 +871,35 @@ void test_btf_dump() {
test_btf_dump_case(i, &btf_dump_test_cases[i]);
}
+ if (test__start_subtest("btf_dump: incremental"))
+ test_btf_dump_incremental();
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "no kernel BTF found"))
+ return;
+
+ d = btf_dump__new(btf, btf_dump_snprintf, str, NULL);
+ if (!ASSERT_OK_PTR(d, "could not create BTF dump"))
+ return;
+
+ /* Verify type display for various types. */
+ if (test__start_subtest("btf_dump: int_data"))
+ test_btf_dump_int_data(btf, d, str);
+ if (test__start_subtest("btf_dump: float_data"))
+ test_btf_dump_float_data(btf, d, str);
+ if (test__start_subtest("btf_dump: char_data"))
+ test_btf_dump_char_data(btf, d, str);
+ if (test__start_subtest("btf_dump: typedef_data"))
+ test_btf_dump_typedef_data(btf, d, str);
+ if (test__start_subtest("btf_dump: enum_data"))
+ test_btf_dump_enum_data(btf, d, str);
+ if (test__start_subtest("btf_dump: struct_data"))
+ test_btf_dump_struct_data(btf, d, str);
+ if (test__start_subtest("btf_dump: var_data"))
+ test_btf_dump_var_data(btf, d, str);
+ btf_dump__free(d);
+ btf__free(btf);
+
+ if (test__start_subtest("btf_dump: datasec_data"))
+ test_btf_dump_datasec_data(str);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_endian.c b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
new file mode 100644
index 000000000000..5b9f84dbeb43
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_endian.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <string.h>
+#include <byteswap.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+void test_btf_endian() {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ enum btf_endianness endian = BTF_LITTLE_ENDIAN;
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ enum btf_endianness endian = BTF_BIG_ENDIAN;
+#else
+#error "Unrecognized __BYTE_ORDER__"
+#endif
+ enum btf_endianness swap_endian = 1 - endian;
+ struct btf *btf = NULL, *swap_btf = NULL;
+ const void *raw_data, *swap_raw_data;
+ const struct btf_type *t;
+ const struct btf_header *hdr;
+ __u32 raw_sz, swap_raw_sz;
+ int var_id;
+
+ /* Load BTF in native endianness */
+ btf = btf__parse_elf("btf_dump_test_case_syntax.bpf.o", NULL);
+ if (!ASSERT_OK_PTR(btf, "parse_native_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(btf), endian, "endian");
+ btf__set_endianness(btf, swap_endian);
+ ASSERT_EQ(btf__endianness(btf), swap_endian, "endian");
+
+ /* Get raw BTF data in non-native endianness... */
+ raw_data = btf__raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* ...and open it as a new BTF instance */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
+
+ swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* both raw data should be identical (with non-native endianness) */
+ ASSERT_OK(memcmp(raw_data, swap_raw_data, raw_sz), "mem_identical");
+
+ /* make sure that at least BTF header data is really swapped */
+ hdr = swap_raw_data;
+ ASSERT_EQ(bswap_16(hdr->magic), BTF_MAGIC, "btf_magic_swapped");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* swap it back to native endianness */
+ btf__set_endianness(swap_btf, endian);
+ swap_raw_data = btf__raw_data(swap_btf, &swap_raw_sz);
+ if (!ASSERT_OK_PTR(swap_raw_data, "swap_raw_data"))
+ goto err_out;
+
+ /* now header should have native BTF_MAGIC */
+ hdr = swap_raw_data;
+ ASSERT_EQ(hdr->magic, BTF_MAGIC, "btf_magic_native");
+ ASSERT_EQ(raw_sz, swap_raw_sz, "raw_sizes");
+
+ /* now modify original BTF */
+ var_id = btf__add_var(btf, "some_var", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ ASSERT_GT(var_id, 0, "var_id");
+
+ btf__free(swap_btf);
+ swap_btf = NULL;
+
+ btf__set_endianness(btf, swap_endian);
+ raw_data = btf__raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data_inverted"))
+ goto err_out;
+
+ /* and re-open swapped raw data again */
+ swap_btf = btf__new(raw_data, raw_sz);
+ if (!ASSERT_OK_PTR(swap_btf, "parse_swap_btf"))
+ goto err_out;
+
+ ASSERT_EQ(btf__endianness(swap_btf), swap_endian, "endian");
+ ASSERT_EQ(btf__type_cnt(swap_btf), btf__type_cnt(btf), "nr_types");
+
+ /* the type should appear as if it was stored in native endianness */
+ t = btf__type_by_id(swap_btf, var_id);
+ ASSERT_STREQ(btf__str_by_offset(swap_btf, t->name_off), "some_var", "var_name");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_linkage");
+ ASSERT_EQ(t->type, 1, "var_type");
+
+err_out:
+ btf__free(btf);
+ btf__free(swap_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
index f7ee8fa377ad..a8b53b8736f0 100644
--- a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -5,10 +5,27 @@
#include "test_btf_map_in_map.skel.h"
-void test_btf_map_in_map(void)
+static int duration;
+
+static __u32 bpf_map_id(struct bpf_map *map)
{
- int duration = 0, err, key = 0, val;
- struct test_btf_map_in_map* skel;
+ struct bpf_map_info info;
+ __u32 info_len = sizeof(info);
+ int err;
+
+ memset(&info, 0, info_len);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(map), &info, &info_len);
+ if (err)
+ return 0;
+ return info.id;
+}
+
+static void test_lookup_update(void)
+{
+ int map1_fd, map2_fd, map3_fd, map4_fd, map5_fd, map1_id, map2_id;
+ int outer_arr_fd, outer_hash_fd, outer_arr_dyn_fd;
+ struct test_btf_map_in_map *skel;
+ int err, key = 0, val, i, fd;
skel = test_btf_map_in_map__open_and_load();
if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
@@ -18,32 +35,130 @@ void test_btf_map_in_map(void)
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
- /* inner1 = input, inner2 = input + 1 */
- val = bpf_map__fd(skel->maps.inner_map1);
- bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
- val = bpf_map__fd(skel->maps.inner_map2);
- bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+ map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ map2_fd = bpf_map__fd(skel->maps.inner_map2);
+ map3_fd = bpf_map__fd(skel->maps.inner_map3);
+ map4_fd = bpf_map__fd(skel->maps.inner_map4);
+ map5_fd = bpf_map__fd(skel->maps.inner_map5);
+ outer_arr_dyn_fd = bpf_map__fd(skel->maps.outer_arr_dyn);
+ outer_arr_fd = bpf_map__fd(skel->maps.outer_arr);
+ outer_hash_fd = bpf_map__fd(skel->maps.outer_hash);
+
+ /* inner1 = input, inner2 = input + 1, inner3 = input + 2 */
+ bpf_map_update_elem(outer_arr_fd, &key, &map1_fd, 0);
+ bpf_map_update_elem(outer_hash_fd, &key, &map2_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map3_fd, 0);
skel->bss->input = 1;
usleep(1);
-
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+ bpf_map_lookup_elem(map1_fd, &key, &val);
CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+ bpf_map_lookup_elem(map2_fd, &key, &val);
CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+ bpf_map_lookup_elem(map3_fd, &key, &val);
+ CHECK(val != 3, "inner3", "got %d != exp %d\n", val, 3);
- /* inner1 = input + 1, inner2 = input */
- val = bpf_map__fd(skel->maps.inner_map2);
- bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
- val = bpf_map__fd(skel->maps.inner_map1);
- bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+ /* inner2 = input, inner1 = input + 1, inner4 = input + 2 */
+ bpf_map_update_elem(outer_arr_fd, &key, &map2_fd, 0);
+ bpf_map_update_elem(outer_hash_fd, &key, &map1_fd, 0);
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map4_fd, 0);
skel->bss->input = 3;
usleep(1);
-
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+ bpf_map_lookup_elem(map1_fd, &key, &val);
CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
- bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+ bpf_map_lookup_elem(map2_fd, &key, &val);
CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+ bpf_map_lookup_elem(map4_fd, &key, &val);
+ CHECK(val != 5, "inner4", "got %d != exp %d\n", val, 5);
+
+ /* inner5 = input + 2 */
+ bpf_map_update_elem(outer_arr_dyn_fd, &key, &map5_fd, 0);
+ skel->bss->input = 5;
+ usleep(1);
+ bpf_map_lookup_elem(map5_fd, &key, &val);
+ CHECK(val != 7, "inner5", "got %d != exp %d\n", val, 7);
+
+ for (i = 0; i < 5; i++) {
+ val = i % 2 ? map1_fd : map2_fd;
+ err = bpf_map_update_elem(outer_hash_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update hash_of_maps on iter #%d\n", i);
+ goto cleanup;
+ }
+ err = bpf_map_update_elem(outer_arr_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update array_of_maps on iter #%d\n", i);
+ goto cleanup;
+ }
+ val = i % 2 ? map4_fd : map5_fd;
+ err = bpf_map_update_elem(outer_arr_dyn_fd, &key, &val, 0);
+ if (CHECK_FAIL(err)) {
+ printf("failed to update array_of_maps (dyn) on iter #%d\n", i);
+ goto cleanup;
+ }
+ }
+
+ map1_id = bpf_map_id(skel->maps.inner_map1);
+ map2_id = bpf_map_id(skel->maps.inner_map2);
+ CHECK(map1_id == 0, "map1_id", "failed to get ID 1\n");
+ CHECK(map2_id == 0, "map2_id", "failed to get ID 2\n");
+
+ test_btf_map_in_map__destroy(skel);
+ skel = NULL;
+
+ /* we need to either wait for or force synchronize_rcu(), before
+ * checking for "still exists" condition, otherwise map could still be
+ * resolvable by ID, causing false positives.
+ *
+ * Older kernels (5.8 and earlier) freed map only after two
+ * synchronize_rcu()s, so trigger two, to be entirely sure.
+ */
+ CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+ CHECK(kern_sync_rcu(), "sync_rcu", "failed\n");
+
+ fd = bpf_map_get_fd_by_id(map1_id);
+ if (CHECK(fd >= 0, "map1_leak", "inner_map1 leaked!\n")) {
+ close(fd);
+ goto cleanup;
+ }
+ fd = bpf_map_get_fd_by_id(map2_id);
+ if (CHECK(fd >= 0, "map2_leak", "inner_map2 leaked!\n")) {
+ close(fd);
+ goto cleanup;
+ }
cleanup:
test_btf_map_in_map__destroy(skel);
}
+
+static void test_diff_size(void)
+{
+ struct test_btf_map_in_map *skel;
+ int err, inner_map_fd, zero = 0;
+
+ skel = test_btf_map_in_map__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+ return;
+
+ inner_map_fd = bpf_map__fd(skel->maps.sockarr_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_sockarr), &zero,
+ &inner_map_fd, 0);
+ CHECK(err, "outer_sockarr inner map size check",
+ "cannot use a different size inner_map\n");
+
+ inner_map_fd = bpf_map__fd(skel->maps.inner_map_sz2);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &zero,
+ &inner_map_fd, 0);
+ CHECK(!err, "outer_arr inner map size check",
+ "incorrectly updated with a different size inner_map\n");
+
+ test_btf_map_in_map__destroy(skel);
+}
+
+void test_btf_map_in_map(void)
+{
+ if (test__start_subtest("lookup_update"))
+ test_lookup_update();
+
+ if (test__start_subtest("diff_size"))
+ test_diff_size();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_module.c b/tools/testing/selftests/bpf/prog_tests/btf_module.c
new file mode 100644
index 000000000000..2239d1fe0332
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_module.c
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_testmod_test_read";
+
+void test_btf_module()
+{
+ struct btf *vmlinux_btf, *module_btf;
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "could not load vmlinux BTF"))
+ return;
+
+ module_btf = btf__load_module_btf(module_name, vmlinux_btf);
+ if (!ASSERT_OK_PTR(module_btf, "could not load module BTF"))
+ goto cleanup;
+
+ type_id = btf__find_by_name(module_btf, symbol_name);
+ ASSERT_GT(type_id, 0, "func not found");
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..ef4d6a3ae423
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_skc_cls_ingress.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+#include <sched.h>
+#include <net/if.h>
+#include <linux/compiler.h>
+#include <bpf/libbpf.h>
+
+#include "network_helpers.h"
+#include "test_progs.h"
+#include "test_btf_skc_cls_ingress.skel.h"
+
+static struct test_btf_skc_cls_ingress *skel;
+static struct sockaddr_in6 srv_sa6;
+static __u32 duration;
+
+static int prepare_netns(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.cls_ingress));
+
+ if (CHECK(unshare(CLONE_NEWNET), "create netns",
+ "unshare(CLONE_NEWNET): %s (%d)",
+ strerror(errno), errno))
+ return -1;
+
+ if (CHECK(system("ip link set dev lo up"),
+ "ip link set dev lo up", "failed\n"))
+ return -1;
+
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ return -1;
+
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ return -1;
+
+ /* Ensure 20 bytes options (i.e. in total 40 bytes tcp header) for the
+ * bpf_tcp_gen_syncookie() helper.
+ */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_window_scaling", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_timestamps", "1") ||
+ write_sysctl("/proc/sys/net/ipv4/tcp_sack", "1"))
+ return -1;
+
+ return 0;
+}
+
+static void reset_test(void)
+{
+ memset(&skel->bss->srv_sa6, 0, sizeof(skel->bss->srv_sa6));
+ skel->bss->listen_tp_sport = 0;
+ skel->bss->req_sk_sport = 0;
+ skel->bss->recv_cookie = 0;
+ skel->bss->gen_cookie = 0;
+ skel->bss->linum = 0;
+}
+
+static void print_err_line(void)
+{
+ if (skel->bss->linum)
+ printf("bpf prog error at line %u\n", skel->bss->linum);
+}
+
+static void test_conn(void)
+{
+ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ srv_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port ||
+ skel->bss->req_sk_sport != srv_port,
+ "Unexpected sk src port",
+ "listen_tp_sport:%u req_sk_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, skel->bss->req_sk_sport,
+ srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->gen_cookie || skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+}
+
+static void test_syncookie(void)
+{
+ int listen_fd = -1, cli_fd = -1, srv_fd = -1, err;
+ socklen_t addrlen = sizeof(srv_sa6);
+ int srv_port;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK_FAIL(listen_fd == -1))
+ return;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+ srv_port = ntohs(srv_sa6.sin6_port);
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ srv_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK_FAIL(srv_fd == -1))
+ goto done;
+
+ if (CHECK(skel->bss->listen_tp_sport != srv_port,
+ "Unexpected tp src port",
+ "listen_tp_sport:%u expected:%u\n",
+ skel->bss->listen_tp_sport, srv_port))
+ goto done;
+
+ if (CHECK(skel->bss->req_sk_sport,
+ "Unexpected req_sk src port",
+ "req_sk_sport:%u expected:0\n",
+ skel->bss->req_sk_sport))
+ goto done;
+
+ if (CHECK(!skel->bss->gen_cookie ||
+ skel->bss->gen_cookie != skel->bss->recv_cookie,
+ "Unexpected syncookie states",
+ "gen_cookie:%u recv_cookie:%u\n",
+ skel->bss->gen_cookie, skel->bss->recv_cookie))
+ goto done;
+
+ CHECK(skel->bss->linum, "bpf prog detected error", "at line %u\n",
+ skel->bss->linum);
+
+done:
+ if (listen_fd != -1)
+ close(listen_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, test_##name }
+static struct test tests[] = {
+ DEF_TEST(conn),
+ DEF_TEST(syncookie),
+};
+
+void test_btf_skc_cls_ingress(void)
+{
+ int i;
+
+ skel = test_btf_skc_cls_ingress__open_and_load();
+ if (CHECK(!skel, "test_btf_skc_cls_ingress__open_and_load", "failed\n"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (prepare_netns())
+ break;
+
+ tests[i].run();
+
+ print_err_line();
+ reset_test();
+ }
+
+ test_btf_skc_cls_ingress__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_split.c b/tools/testing/selftests/bpf/prog_tests/btf_split.c
new file mode 100644
index 000000000000..eef1158676ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_split.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+static char *dump_buf;
+static size_t dump_buf_sz;
+static FILE *dump_buf_file;
+
+static void btf_dump_printf(void *ctx, const char *fmt, va_list args)
+{
+ vfprintf(ctx, fmt, args);
+}
+
+void test_btf_split() {
+ struct btf_dump *d = NULL;
+ const struct btf_type *t;
+ struct btf *btf1, *btf2;
+ int str_off, i, err;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "empty_main_btf"))
+ return;
+
+ btf__set_pointer_size(btf1, 8); /* enforce 64-bit arch */
+
+ btf__add_int(btf1, "int", 4, BTF_INT_SIGNED); /* [1] int */
+ btf__add_ptr(btf1, 1); /* [2] ptr to int */
+
+ btf__add_struct(btf1, "s1", 4); /* [3] struct s1 { */
+ btf__add_field(btf1, "f1", 1, 0, 0); /* int f1; */
+ /* } */
+
+ btf2 = btf__new_empty_split(btf1);
+ if (!ASSERT_OK_PTR(btf2, "empty_split_btf"))
+ goto cleanup;
+
+ /* pointer size should be "inherited" from main BTF */
+ ASSERT_EQ(btf__pointer_size(btf2), 8, "inherit_ptr_sz");
+
+ str_off = btf__find_str(btf2, "int");
+ ASSERT_NEQ(str_off, -ENOENT, "str_int_missing");
+
+ t = btf__type_by_id(btf2, 1);
+ if (!ASSERT_OK_PTR(t, "int_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_int(t), true, "int_kind");
+ ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "int", "int_name");
+
+ btf__add_struct(btf2, "s2", 16); /* [4] struct s2 { */
+ btf__add_field(btf2, "f1", 3, 0, 0); /* struct s1 f1; */
+ btf__add_field(btf2, "f2", 1, 32, 0); /* int f2; */
+ btf__add_field(btf2, "f3", 2, 64, 0); /* int *f3; */
+ /* } */
+
+ t = btf__type_by_id(btf1, 4);
+ ASSERT_NULL(t, "split_type_in_main");
+
+ t = btf__type_by_id(btf2, 4);
+ if (!ASSERT_OK_PTR(t, "split_struct_type"))
+ goto cleanup;
+ ASSERT_EQ(btf_is_struct(t), true, "split_struct_kind");
+ ASSERT_EQ(btf_vlen(t), 3, "split_struct_vlen");
+ ASSERT_STREQ(btf__str_by_offset(btf2, t->name_off), "s2", "split_struct_name");
+
+ /* BTF-to-C dump of split BTF */
+ dump_buf_file = open_memstream(&dump_buf, &dump_buf_sz);
+ if (!ASSERT_OK_PTR(dump_buf_file, "dump_memstream"))
+ return;
+ d = btf_dump__new(btf2, btf_dump_printf, dump_buf_file, NULL);
+ if (!ASSERT_OK_PTR(d, "btf_dump__new"))
+ goto cleanup;
+ for (i = 1; i < btf__type_cnt(btf2); i++) {
+ err = btf_dump__dump_type(d, i);
+ ASSERT_OK(err, "dump_type_ok");
+ }
+ fflush(dump_buf_file);
+ dump_buf[dump_buf_sz] = 0; /* some libc implementations don't do this */
+ ASSERT_STREQ(dump_buf,
+"struct s1 {\n"
+" int f1;\n"
+"};\n"
+"\n"
+"struct s2 {\n"
+" struct s1 f1;\n"
+" int f2;\n"
+" int *f3;\n"
+"};\n\n", "c_dump");
+
+cleanup:
+ if (dump_buf_file)
+ fclose(dump_buf_file);
+ free(dump_buf);
+ btf_dump__free(d);
+ btf__free(btf1);
+ btf__free(btf2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_tag.c b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
new file mode 100644
index 000000000000..071430cd54de
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_tag.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "test_btf_decl_tag.skel.h"
+
+/* struct btf_type_tag_test is referenced in btf_type_tag.skel.h */
+struct btf_type_tag_test {
+ int **p;
+};
+#include "btf_type_tag.skel.h"
+#include "btf_type_tag_user.skel.h"
+#include "btf_type_tag_percpu.skel.h"
+
+static void test_btf_decl_tag(void)
+{
+ struct test_btf_decl_tag *skel;
+
+ skel = test_btf_decl_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_decl_tag"))
+ return;
+
+ if (skel->rodata->skip_tests) {
+ printf("%s:SKIP: btf_decl_tag attribute not supported", __func__);
+ test__skip();
+ }
+
+ test_btf_decl_tag__destroy(skel);
+}
+
+static void test_btf_type_tag(void)
+{
+ struct btf_type_tag *skel;
+
+ skel = btf_type_tag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag"))
+ return;
+
+ if (skel->rodata->skip_tests) {
+ printf("%s:SKIP: btf_type_tag attribute not supported", __func__);
+ test__skip();
+ }
+
+ btf_type_tag__destroy(skel);
+}
+
+/* loads vmlinux_btf as well as module_btf. If the caller passes NULL as
+ * module_btf, it will not load module btf.
+ *
+ * Returns 0 on success.
+ * Return -1 On error. In case of error, the loaded btf will be freed and the
+ * input parameters will be set to pointing to NULL.
+ */
+static int load_btfs(struct btf **vmlinux_btf, struct btf **module_btf,
+ bool needs_vmlinux_tag)
+{
+ const char *module_name = "bpf_testmod";
+ __s32 type_id;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return -1;
+ }
+
+ *vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(*vmlinux_btf, "could not load vmlinux BTF"))
+ return -1;
+
+ if (!needs_vmlinux_tag)
+ goto load_module_btf;
+
+ /* skip the test if the vmlinux does not have __user tags */
+ type_id = btf__find_by_name_kind(*vmlinux_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in vmlinux btf", __func__);
+ test__skip();
+ goto free_vmlinux_btf;
+ }
+
+load_module_btf:
+ /* skip loading module_btf, if not requested by caller */
+ if (!module_btf)
+ return 0;
+
+ *module_btf = btf__load_module_btf(module_name, *vmlinux_btf);
+ if (!ASSERT_OK_PTR(*module_btf, "could not load module BTF"))
+ goto free_vmlinux_btf;
+
+ /* skip the test if the module does not have __user tags */
+ type_id = btf__find_by_name_kind(*module_btf, "user", BTF_KIND_TYPE_TAG);
+ if (type_id <= 0) {
+ printf("%s:SKIP: btf_type_tag attribute not in %s", __func__, module_name);
+ test__skip();
+ goto free_module_btf;
+ }
+
+ return 0;
+
+free_module_btf:
+ btf__free(*module_btf);
+free_vmlinux_btf:
+ btf__free(*vmlinux_btf);
+
+ *vmlinux_btf = NULL;
+ if (module_btf)
+ *module_btf = NULL;
+ return -1;
+}
+
+static void test_btf_type_tag_mod_user(bool load_test_user1)
+{
+ struct btf *vmlinux_btf = NULL, *module_btf = NULL;
+ struct btf_type_tag_user *skel;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+ return;
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_sys_getsockname, false);
+ if (load_test_user1)
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ else
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_user(void)
+{
+ struct btf_type_tag_user *skel;
+ struct btf *vmlinux_btf = NULL;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
+ return;
+
+ skel = btf_type_tag_user__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_user"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_user2, false);
+ bpf_program__set_autoload(skel->progs.test_user1, false);
+
+ err = btf_type_tag_user__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_user");
+
+ btf_type_tag_user__destroy(skel);
+
+cleanup:
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_mod_percpu(bool load_test_percpu1)
+{
+ struct btf *vmlinux_btf, *module_btf;
+ struct btf_type_tag_percpu *skel;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, &module_btf, /*needs_vmlinux_tag=*/false))
+ return;
+
+ skel = btf_type_tag_percpu__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+ bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+ if (load_test_percpu1)
+ bpf_program__set_autoload(skel->progs.test_percpu2, false);
+ else
+ bpf_program__set_autoload(skel->progs.test_percpu1, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_percpu");
+
+ btf_type_tag_percpu__destroy(skel);
+
+cleanup:
+ btf__free(module_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void test_btf_type_tag_vmlinux_percpu(bool load_test)
+{
+ struct btf_type_tag_percpu *skel;
+ struct btf *vmlinux_btf = NULL;
+ int err;
+
+ if (load_btfs(&vmlinux_btf, NULL, /*needs_vmlinux_tag=*/true))
+ return;
+
+ skel = btf_type_tag_percpu__open();
+ if (!ASSERT_OK_PTR(skel, "btf_type_tag_percpu"))
+ goto cleanup;
+
+ bpf_program__set_autoload(skel->progs.test_percpu2, false);
+ bpf_program__set_autoload(skel->progs.test_percpu1, false);
+ if (load_test) {
+ bpf_program__set_autoload(skel->progs.test_percpu_helper, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_ERR(err, "btf_type_tag_percpu_load");
+ } else {
+ bpf_program__set_autoload(skel->progs.test_percpu_load, false);
+
+ err = btf_type_tag_percpu__load(skel);
+ ASSERT_OK(err, "btf_type_tag_percpu_helper");
+ }
+
+ btf_type_tag_percpu__destroy(skel);
+
+cleanup:
+ btf__free(vmlinux_btf);
+}
+
+void test_btf_tag(void)
+{
+ if (test__start_subtest("btf_decl_tag"))
+ test_btf_decl_tag();
+ if (test__start_subtest("btf_type_tag"))
+ test_btf_type_tag();
+
+ if (test__start_subtest("btf_type_tag_user_mod1"))
+ test_btf_type_tag_mod_user(true);
+ if (test__start_subtest("btf_type_tag_user_mod2"))
+ test_btf_type_tag_mod_user(false);
+ if (test__start_subtest("btf_type_tag_sys_user_vmlinux"))
+ test_btf_type_tag_vmlinux_user();
+
+ if (test__start_subtest("btf_type_tag_percpu_mod1"))
+ test_btf_type_tag_mod_percpu(true);
+ if (test__start_subtest("btf_type_tag_percpu_mod2"))
+ test_btf_type_tag_mod_percpu(false);
+ if (test__start_subtest("btf_type_tag_percpu_vmlinux_load"))
+ test_btf_type_tag_vmlinux_percpu(true);
+ if (test__start_subtest("btf_type_tag_percpu_vmlinux_helper"))
+ test_btf_type_tag_vmlinux_percpu(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_write.c b/tools/testing/selftests/bpf/prog_tests/btf_write.c
new file mode 100644
index 000000000000..6e36de1302fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_write.c
@@ -0,0 +1,506 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "btf_helpers.h"
+
+static void gen_btf(struct btf *btf)
+{
+ const struct btf_var_secinfo *vi;
+ const struct btf_type *t;
+ const struct btf_member *m;
+ const struct btf_enum64 *v64;
+ const struct btf_enum *v;
+ const struct btf_param *p;
+ int id, err, str_off;
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, -ENOENT, "int_str_missing_off");
+
+ str_off = btf__add_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_off");
+
+ str_off = btf__find_str(btf, "int");
+ ASSERT_EQ(str_off, 1, "int_str_found_off");
+
+ /* BTF_KIND_INT */
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ ASSERT_EQ(id, 1, "int_id");
+
+ t = btf__type_by_id(btf, 1);
+ /* should re-use previously added "int" string */
+ ASSERT_EQ(t->name_off, str_off, "int_name_off");
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "int", "int_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_INT, "int_kind");
+ ASSERT_EQ(t->size, 4, "int_sz");
+ ASSERT_EQ(btf_int_encoding(t), BTF_INT_SIGNED, "int_enc");
+ ASSERT_EQ(btf_int_bits(t), 32, "int_bits");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 1),
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED", "raw_dump");
+
+ /* invalid int size */
+ id = btf__add_int(btf, "bad sz int", 7, 0);
+ ASSERT_ERR(id, "int_bad_sz");
+ /* invalid encoding */
+ id = btf__add_int(btf, "bad enc int", 4, 123);
+ ASSERT_ERR(id, "int_bad_enc");
+ /* NULL name */
+ id = btf__add_int(btf, NULL, 4, 0);
+ ASSERT_ERR(id, "int_bad_null_name");
+ /* empty name */
+ id = btf__add_int(btf, "", 4, 0);
+ ASSERT_ERR(id, "int_bad_empty_name");
+
+ /* PTR/CONST/VOLATILE/RESTRICT */
+ id = btf__add_ptr(btf, 1);
+ ASSERT_EQ(id, 2, "ptr_id");
+ t = btf__type_by_id(btf, 2);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_kind");
+ ASSERT_EQ(t->type, 1, "ptr_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 2),
+ "[2] PTR '(anon)' type_id=1", "raw_dump");
+
+ id = btf__add_const(btf, 5); /* points forward to restrict */
+ ASSERT_EQ(id, 3, "const_id");
+ t = btf__type_by_id(btf, 3);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_CONST, "const_kind");
+ ASSERT_EQ(t->type, 5, "const_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 3),
+ "[3] CONST '(anon)' type_id=5", "raw_dump");
+
+ id = btf__add_volatile(btf, 3);
+ ASSERT_EQ(id, 4, "volatile_id");
+ t = btf__type_by_id(btf, 4);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VOLATILE, "volatile_kind");
+ ASSERT_EQ(t->type, 3, "volatile_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 4),
+ "[4] VOLATILE '(anon)' type_id=3", "raw_dump");
+
+ id = btf__add_restrict(btf, 4);
+ ASSERT_EQ(id, 5, "restrict_id");
+ t = btf__type_by_id(btf, 5);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_RESTRICT, "restrict_kind");
+ ASSERT_EQ(t->type, 4, "restrict_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 5),
+ "[5] RESTRICT '(anon)' type_id=4", "raw_dump");
+
+ /* ARRAY */
+ id = btf__add_array(btf, 1, 2, 10); /* int *[10] */
+ ASSERT_EQ(id, 6, "array_id");
+ t = btf__type_by_id(btf, 6);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ARRAY, "array_kind");
+ ASSERT_EQ(btf_array(t)->index_type, 1, "array_index_type");
+ ASSERT_EQ(btf_array(t)->type, 2, "array_elem_type");
+ ASSERT_EQ(btf_array(t)->nelems, 10, "array_nelems");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 6),
+ "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10", "raw_dump");
+
+ /* STRUCT */
+ err = btf__add_field(btf, "field", 1, 0, 0);
+ ASSERT_ERR(err, "no_struct_field");
+ id = btf__add_struct(btf, "s1", 8);
+ ASSERT_EQ(id, 7, "struct_id");
+ err = btf__add_field(btf, "f1", 1, 0, 0);
+ ASSERT_OK(err, "f1_res");
+ err = btf__add_field(btf, "f2", 1, 32, 16);
+ ASSERT_OK(err, "f2_res");
+
+ t = btf__type_by_id(btf, 7);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "s1", "struct_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_STRUCT, "struct_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "struct_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "struct_kflag");
+ ASSERT_EQ(t->size, 8, "struct_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 0, "f1_bit_sz");
+ m = btf_members(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f2", "f2_name");
+ ASSERT_EQ(m->type, 1, "f2_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 1), 32, "f2_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 1), 16, "f2_bit_sz");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 7),
+ "[7] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32 bitfield_size=16", "raw_dump");
+
+ /* UNION */
+ id = btf__add_union(btf, "u1", 8);
+ ASSERT_EQ(id, 8, "union_id");
+
+ /* invalid, non-zero offset */
+ err = btf__add_field(btf, "field", 1, 1, 0);
+ ASSERT_ERR(err, "no_struct_field");
+
+ err = btf__add_field(btf, "f1", 1, 0, 16);
+ ASSERT_OK(err, "f1_res");
+
+ t = btf__type_by_id(btf, 8);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "u1", "union_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_UNION, "union_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "union_vlen");
+ ASSERT_EQ(btf_kflag(t), true, "union_kflag");
+ ASSERT_EQ(t->size, 8, "union_sz");
+ m = btf_members(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, m->name_off), "f1", "f1_name");
+ ASSERT_EQ(m->type, 1, "f1_type");
+ ASSERT_EQ(btf_member_bit_offset(t, 0), 0, "f1_bit_off");
+ ASSERT_EQ(btf_member_bitfield_size(t, 0), 16, "f1_bit_sz");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 8),
+ "[8] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0 bitfield_size=16", "raw_dump");
+
+ /* ENUM */
+ id = btf__add_enum(btf, "e1", 4);
+ ASSERT_EQ(id, 9, "enum_id");
+ err = btf__add_enum_value(btf, "v1", 1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum_value(btf, "v2", 2);
+ ASSERT_OK(err, "v2_res");
+
+ t = btf__type_by_id(btf, 9);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum_vlen");
+ ASSERT_EQ(t->size, 4, "enum_sz");
+ v = btf_enum(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v1", "v1_name");
+ ASSERT_EQ(v->val, 1, "v1_val");
+ v = btf_enum(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v->name_off), "v2", "v2_name");
+ ASSERT_EQ(v->val, 2, "v2_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 9),
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2", "raw_dump");
+
+ /* FWDs */
+ id = btf__add_fwd(btf, "struct_fwd", BTF_FWD_STRUCT);
+ ASSERT_EQ(id, 10, "struct_fwd_id");
+ t = btf__type_by_id(btf, 10);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "struct_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 0, "fwd_kflag");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 10),
+ "[10] FWD 'struct_fwd' fwd_kind=struct", "raw_dump");
+
+ id = btf__add_fwd(btf, "union_fwd", BTF_FWD_UNION);
+ ASSERT_EQ(id, 11, "union_fwd_id");
+ t = btf__type_by_id(btf, 11);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "union_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FWD, "fwd_kind");
+ ASSERT_EQ(btf_kflag(t), 1, "fwd_kflag");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 11),
+ "[11] FWD 'union_fwd' fwd_kind=union", "raw_dump");
+
+ id = btf__add_fwd(btf, "enum_fwd", BTF_FWD_ENUM);
+ ASSERT_EQ(id, 12, "enum_fwd_id");
+ t = btf__type_by_id(btf, 12);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "enum_fwd", "fwd_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM, "enum_fwd_kind");
+ ASSERT_EQ(btf_vlen(t), 0, "enum_fwd_kind");
+ ASSERT_EQ(t->size, 4, "enum_fwd_sz");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 12),
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0", "raw_dump");
+
+ /* TYPEDEF */
+ id = btf__add_typedef(btf, "typedef1", 1);
+ ASSERT_EQ(id, 13, "typedef_fwd_id");
+ t = btf__type_by_id(btf, 13);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "typedef1", "typedef_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPEDEF, "typedef_kind");
+ ASSERT_EQ(t->type, 1, "typedef_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 13),
+ "[13] TYPEDEF 'typedef1' type_id=1", "raw_dump");
+
+ /* FUNC & FUNC_PROTO */
+ id = btf__add_func(btf, "func1", BTF_FUNC_GLOBAL, 15);
+ ASSERT_EQ(id, 14, "func_id");
+ t = btf__type_by_id(btf, 14);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "func1", "func_name");
+ ASSERT_EQ(t->type, 15, "func_type");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC, "func_kind");
+ ASSERT_EQ(btf_vlen(t), BTF_FUNC_GLOBAL, "func_vlen");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 14),
+ "[14] FUNC 'func1' type_id=15 linkage=global", "raw_dump");
+
+ id = btf__add_func_proto(btf, 1);
+ ASSERT_EQ(id, 15, "func_proto_id");
+ err = btf__add_func_param(btf, "p1", 1);
+ ASSERT_OK(err, "p1_res");
+ err = btf__add_func_param(btf, "p2", 2);
+ ASSERT_OK(err, "p2_res");
+
+ t = btf__type_by_id(btf, 15);
+ ASSERT_EQ(btf_kind(t), BTF_KIND_FUNC_PROTO, "func_proto_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "func_proto_vlen");
+ ASSERT_EQ(t->type, 1, "func_proto_ret_type");
+ p = btf_params(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p1", "p1_name");
+ ASSERT_EQ(p->type, 1, "p1_type");
+ p = btf_params(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, p->name_off), "p2", "p2_name");
+ ASSERT_EQ(p->type, 2, "p2_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 15),
+ "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=2", "raw_dump");
+
+ /* VAR */
+ id = btf__add_var(btf, "var1", BTF_VAR_GLOBAL_ALLOCATED, 1);
+ ASSERT_EQ(id, 16, "var_id");
+ t = btf__type_by_id(btf, 16);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "var1", "var_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_VAR, "var_kind");
+ ASSERT_EQ(t->type, 1, "var_type");
+ ASSERT_EQ(btf_var(t)->linkage, BTF_VAR_GLOBAL_ALLOCATED, "var_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 16),
+ "[16] VAR 'var1' type_id=1, linkage=global-alloc", "raw_dump");
+
+ /* DATASECT */
+ id = btf__add_datasec(btf, "datasec1", 12);
+ ASSERT_EQ(id, 17, "datasec_id");
+ err = btf__add_datasec_var_info(btf, 1, 4, 8);
+ ASSERT_OK(err, "v1_res");
+
+ t = btf__type_by_id(btf, 17);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "datasec1", "datasec_name");
+ ASSERT_EQ(t->size, 12, "datasec_sz");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DATASEC, "datasec_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "datasec_vlen");
+ vi = btf_var_secinfos(t) + 0;
+ ASSERT_EQ(vi->type, 1, "v1_type");
+ ASSERT_EQ(vi->offset, 4, "v1_off");
+ ASSERT_EQ(vi->size, 8, "v1_sz");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 17),
+ "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=1 offset=4 size=8", "raw_dump");
+
+ /* DECL_TAG */
+ id = btf__add_decl_tag(btf, "tag1", 16, -1);
+ ASSERT_EQ(id, 18, "tag_id");
+ t = btf__type_by_id(btf, 18);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 16, "tag_type");
+ ASSERT_EQ(btf_decl_tag(t)->component_idx, -1, "tag_component_idx");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 18),
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1", "raw_dump");
+
+ id = btf__add_decl_tag(btf, "tag2", 14, 1);
+ ASSERT_EQ(id, 19, "tag_id");
+ t = btf__type_by_id(btf, 19);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag2", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_DECL_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 14, "tag_type");
+ ASSERT_EQ(btf_decl_tag(t)->component_idx, 1, "tag_component_idx");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 19),
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1", "raw_dump");
+
+ /* TYPE_TAG */
+ id = btf__add_type_tag(btf, "tag1", 1);
+ ASSERT_EQ(id, 20, "tag_id");
+ t = btf__type_by_id(btf, 20);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "tag1", "tag_value");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_TYPE_TAG, "tag_kind");
+ ASSERT_EQ(t->type, 1, "tag_type");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 20),
+ "[20] TYPE_TAG 'tag1' type_id=1", "raw_dump");
+
+ /* ENUM64 */
+ id = btf__add_enum64(btf, "e1", 8, true);
+ ASSERT_EQ(id, 21, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", -1);
+ ASSERT_OK(err, "v1_res");
+ err = btf__add_enum64_value(btf, "v2", 0x123456789); /* 4886718345 */
+ ASSERT_OK(err, "v2_res");
+ t = btf__type_by_id(btf, 21);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 2, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ v64 = btf_enum64(t) + 1;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v2", "v2_name");
+ ASSERT_EQ(v64->val_hi32, 0x1, "v2_val");
+ ASSERT_EQ(v64->val_lo32, 0x23456789, "v2_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 21),
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345", "raw_dump");
+
+ id = btf__add_enum64(btf, "e1", 8, false);
+ ASSERT_EQ(id, 22, "enum64_id");
+ err = btf__add_enum64_value(btf, "v1", 0xffffffffFFFFFFFF); /* 18446744073709551615 */
+ ASSERT_OK(err, "v1_res");
+ t = btf__type_by_id(btf, 22);
+ ASSERT_STREQ(btf__str_by_offset(btf, t->name_off), "e1", "enum64_name");
+ ASSERT_EQ(btf_kind(t), BTF_KIND_ENUM64, "enum64_kind");
+ ASSERT_EQ(btf_vlen(t), 1, "enum64_vlen");
+ ASSERT_EQ(t->size, 8, "enum64_sz");
+ v64 = btf_enum64(t) + 0;
+ ASSERT_STREQ(btf__str_by_offset(btf, v64->name_off), "v1", "v1_name");
+ ASSERT_EQ(v64->val_hi32, 0xffffffff, "v1_val");
+ ASSERT_EQ(v64->val_lo32, 0xffffffff, "v1_val");
+ ASSERT_STREQ(btf_type_raw_dump(btf, 22),
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615", "raw_dump");
+}
+
+static void test_btf_add()
+{
+ struct btf *btf;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "new_empty"))
+ return;
+
+ gen_btf(btf);
+
+ VALIDATE_RAW_BTF(
+ btf,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] CONST '(anon)' type_id=5",
+ "[4] VOLATILE '(anon)' type_id=3",
+ "[5] RESTRICT '(anon)' type_id=4",
+ "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+ "[7] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+ "[8] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[10] FWD 'struct_fwd' fwd_kind=struct",
+ "[11] FWD 'union_fwd' fwd_kind=union",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[13] TYPEDEF 'typedef1' type_id=1",
+ "[14] FUNC 'func1' type_id=15 linkage=global",
+ "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=2",
+ "[16] VAR 'var1' type_id=1, linkage=global-alloc",
+ "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=1 offset=4 size=8",
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
+
+ btf__free(btf);
+}
+
+static void test_btf_add_btf()
+{
+ struct btf *btf1 = NULL, *btf2 = NULL;
+ int id;
+
+ btf1 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf1, "btf1"))
+ return;
+
+ btf2 = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf2, "btf2"))
+ goto cleanup;
+
+ gen_btf(btf1);
+ gen_btf(btf2);
+
+ id = btf__add_btf(btf1, btf2);
+ if (!ASSERT_EQ(id, 23, "id"))
+ goto cleanup;
+
+ VALIDATE_RAW_BTF(
+ btf1,
+ "[1] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[2] PTR '(anon)' type_id=1",
+ "[3] CONST '(anon)' type_id=5",
+ "[4] VOLATILE '(anon)' type_id=3",
+ "[5] RESTRICT '(anon)' type_id=4",
+ "[6] ARRAY '(anon)' type_id=2 index_type_id=1 nr_elems=10",
+ "[7] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=1 bits_offset=0\n"
+ "\t'f2' type_id=1 bits_offset=32 bitfield_size=16",
+ "[8] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=1 bits_offset=0 bitfield_size=16",
+ "[9] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[10] FWD 'struct_fwd' fwd_kind=struct",
+ "[11] FWD 'union_fwd' fwd_kind=union",
+ "[12] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[13] TYPEDEF 'typedef1' type_id=1",
+ "[14] FUNC 'func1' type_id=15 linkage=global",
+ "[15] FUNC_PROTO '(anon)' ret_type_id=1 vlen=2\n"
+ "\t'p1' type_id=1\n"
+ "\t'p2' type_id=2",
+ "[16] VAR 'var1' type_id=1, linkage=global-alloc",
+ "[17] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=1 offset=4 size=8",
+ "[18] DECL_TAG 'tag1' type_id=16 component_idx=-1",
+ "[19] DECL_TAG 'tag2' type_id=14 component_idx=1",
+ "[20] TYPE_TAG 'tag1' type_id=1",
+ "[21] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[22] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615",
+
+ /* types appended from the second BTF */
+ "[23] INT 'int' size=4 bits_offset=0 nr_bits=32 encoding=SIGNED",
+ "[24] PTR '(anon)' type_id=23",
+ "[25] CONST '(anon)' type_id=27",
+ "[26] VOLATILE '(anon)' type_id=25",
+ "[27] RESTRICT '(anon)' type_id=26",
+ "[28] ARRAY '(anon)' type_id=24 index_type_id=23 nr_elems=10",
+ "[29] STRUCT 's1' size=8 vlen=2\n"
+ "\t'f1' type_id=23 bits_offset=0\n"
+ "\t'f2' type_id=23 bits_offset=32 bitfield_size=16",
+ "[30] UNION 'u1' size=8 vlen=1\n"
+ "\t'f1' type_id=23 bits_offset=0 bitfield_size=16",
+ "[31] ENUM 'e1' encoding=UNSIGNED size=4 vlen=2\n"
+ "\t'v1' val=1\n"
+ "\t'v2' val=2",
+ "[32] FWD 'struct_fwd' fwd_kind=struct",
+ "[33] FWD 'union_fwd' fwd_kind=union",
+ "[34] ENUM 'enum_fwd' encoding=UNSIGNED size=4 vlen=0",
+ "[35] TYPEDEF 'typedef1' type_id=23",
+ "[36] FUNC 'func1' type_id=37 linkage=global",
+ "[37] FUNC_PROTO '(anon)' ret_type_id=23 vlen=2\n"
+ "\t'p1' type_id=23\n"
+ "\t'p2' type_id=24",
+ "[38] VAR 'var1' type_id=23, linkage=global-alloc",
+ "[39] DATASEC 'datasec1' size=12 vlen=1\n"
+ "\ttype_id=23 offset=4 size=8",
+ "[40] DECL_TAG 'tag1' type_id=38 component_idx=-1",
+ "[41] DECL_TAG 'tag2' type_id=36 component_idx=1",
+ "[42] TYPE_TAG 'tag1' type_id=23",
+ "[43] ENUM64 'e1' encoding=SIGNED size=8 vlen=2\n"
+ "\t'v1' val=-1\n"
+ "\t'v2' val=4886718345",
+ "[44] ENUM64 'e1' encoding=UNSIGNED size=8 vlen=1\n"
+ "\t'v1' val=18446744073709551615");
+
+cleanup:
+ btf__free(btf1);
+ btf__free(btf2);
+}
+
+void test_btf_write()
+{
+ if (test__start_subtest("btf_add"))
+ test_btf_add();
+ if (test__start_subtest("btf_add_btf"))
+ test_btf_add_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cb_refs.c b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
new file mode 100644
index 000000000000..3bff680de16c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cb_refs.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf/libbpf.h"
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "cb_refs.skel.h"
+
+static char log_buf[1024 * 1024];
+
+struct {
+ const char *prog_name;
+ const char *err_msg;
+} cb_refs_tests[] = {
+ { "underflow_prog", "reference has not been acquired before" },
+ { "leak_prog", "Unreleased reference" },
+ { "nested_cb", "Unreleased reference id=4 alloc_insn=2" }, /* alloc_insn=2{4,5} */
+ { "non_cb_transfer_ref", "Unreleased reference id=4 alloc_insn=1" }, /* alloc_insn=1{1,2} */
+};
+
+void test_cb_refs(void)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct bpf_program *prog;
+ struct cb_refs *skel;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cb_refs_tests); i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ skel = cb_refs__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "cb_refs__open_and_load"))
+ return;
+ prog = bpf_object__find_program_by_name(skel->obj, cb_refs_tests[i].prog_name);
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_ERR(cb_refs__load(skel), "cb_refs__load"))
+ bpf_prog_test_run_opts(bpf_program__fd(prog), &run_opts);
+ if (!ASSERT_OK_PTR(strstr(log_buf, cb_refs_tests[i].err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", cb_refs_tests[i].err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+ cb_refs__destroy(skel);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
new file mode 100644
index 000000000000..63ee892bc757
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cg_storage_multi.c
@@ -0,0 +1,393 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+#include "cg_storage_multi_egress_only.skel.h"
+#include "cg_storage_multi_isolated.skel.h"
+#include "cg_storage_multi_shared.skel.h"
+
+#define PARENT_CGROUP "/cgroup_storage"
+#define CHILD_CGROUP "/cgroup_storage/child"
+
+static int duration;
+
+static bool assert_storage(struct bpf_map *map, const void *key,
+ struct cgroup_value *expected)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) < 0,
+ "map-lookup", "errno %d", errno))
+ return true;
+ if (CHECK(memcmp(&value, expected, sizeof(struct cgroup_value)),
+ "assert-storage", "storages differ"))
+ return true;
+
+ return false;
+}
+
+static bool assert_storage_noexist(struct bpf_map *map, const void *key)
+{
+ struct cgroup_value value;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+
+ if (CHECK(bpf_map_lookup_elem(map_fd, key, &value) == 0,
+ "map-lookup", "succeeded, expected ENOENT"))
+ return true;
+ if (CHECK(errno != ENOENT,
+ "map-lookup", "errno %d, expected ENOENT", errno))
+ return true;
+
+ return false;
+}
+
+static bool connect_send(const char *cgroup_path)
+{
+ int server_fd = -1, client_fd = -1;
+ char message[] = "message";
+ bool res = true;
+
+ if (join_cgroup(cgroup_path))
+ goto out_clean;
+
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (server_fd < 0)
+ goto out_clean;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (client_fd < 0)
+ goto out_clean;
+
+ if (send(client_fd, &message, sizeof(message), 0) < 0)
+ goto out_clean;
+
+ if (read(server_fd, &message, sizeof(message)) < 0)
+ goto out_clean;
+
+ res = false;
+
+out_clean:
+ close(client_fd);
+ close(server_fd);
+ return res;
+}
+
+static void test_egress_only(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_egress_only *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_link = NULL, *child_link = NULL;
+ bool err;
+
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+
+ obj = cg_storage_multi_egress_only__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is only one run and in that run the storage is
+ * parent cgroup's storage.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_link = bpf_program__attach_cgroup(obj->progs.egress,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_link, "parent-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 1,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there are two additional runs, one that run with parent
+ * cgroup's storage and one with child cgroup's storage.
+ */
+ child_link = bpf_program__attach_cgroup(obj->progs.egress,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_link, "child-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(parent_link);
+ bpf_link__destroy(child_link);
+
+ cg_storage_multi_egress_only__destroy(obj);
+}
+
+static void test_isolated(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_isolated *obj;
+ struct cgroup_value expected_cgroup_value;
+ struct bpf_cgroup_storage_key key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_isolated__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress, stored in separate parent storages.
+ * Also assert that child cgroup's storages does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ * Assert that egree and ingress storages are separate.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(PARENT_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 4 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.cgroup_inode_id = get_cgroup_id(CHILD_CGROUP);
+ key.attach_type = BPF_CGROUP_INET_EGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .egress_pkts = 2 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key.attach_type = BPF_CGROUP_INET_INGRESS;
+ expected_cgroup_value = (struct cgroup_value) { .ingress_pkts = 1 };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_isolated__destroy(obj);
+}
+
+static void test_shared(int parent_cgroup_fd, int child_cgroup_fd)
+{
+ struct cg_storage_multi_shared *obj;
+ struct cgroup_value expected_cgroup_value;
+ __u64 key;
+ struct bpf_link *parent_egress1_link = NULL, *parent_egress2_link = NULL;
+ struct bpf_link *child_egress1_link = NULL, *child_egress2_link = NULL;
+ struct bpf_link *parent_ingress_link = NULL, *child_ingress_link = NULL;
+ bool err;
+
+ obj = cg_storage_multi_shared__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ /* Attach to parent cgroup, trigger packet from child.
+ * Assert that there is three runs, two with parent cgroup egress and
+ * one with parent cgroup ingress.
+ * Also assert that child cgroup's storage does not exist
+ */
+ parent_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_egress1_link, "parent-egress1-cg-attach"))
+ goto close_bpf_object;
+ parent_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_egress2_link, "parent-egress2-cg-attach"))
+ goto close_bpf_object;
+ parent_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ parent_cgroup_fd);
+ if (!ASSERT_OK_PTR(parent_ingress_link, "parent-ingress-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "first-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 3,
+ "first-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ if (assert_storage_noexist(obj->maps.cgroup_storage, &key))
+ goto close_bpf_object;
+
+ /* Attach to parent and child cgroup, trigger packet from child.
+ * Assert that there is six additional runs, parent cgroup egresses and
+ * ingress, child cgroup egresses and ingress.
+ */
+ child_egress1_link = bpf_program__attach_cgroup(obj->progs.egress1,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_egress1_link, "child-egress1-cg-attach"))
+ goto close_bpf_object;
+ child_egress2_link = bpf_program__attach_cgroup(obj->progs.egress2,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_egress2_link, "child-egress2-cg-attach"))
+ goto close_bpf_object;
+ child_ingress_link = bpf_program__attach_cgroup(obj->progs.ingress,
+ child_cgroup_fd);
+ if (!ASSERT_OK_PTR(child_ingress_link, "child-ingress-cg-attach"))
+ goto close_bpf_object;
+ err = connect_send(CHILD_CGROUP);
+ if (CHECK(err, "second-connect-send", "errno %d", errno))
+ goto close_bpf_object;
+ if (CHECK(obj->bss->invocations != 9,
+ "second-invoke", "invocations=%d", obj->bss->invocations))
+ goto close_bpf_object;
+ key = get_cgroup_id(PARENT_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 4,
+ .ingress_pkts = 2,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+ key = get_cgroup_id(CHILD_CGROUP);
+ expected_cgroup_value = (struct cgroup_value) {
+ .egress_pkts = 2,
+ .ingress_pkts = 1,
+ };
+ if (assert_storage(obj->maps.cgroup_storage,
+ &key, &expected_cgroup_value))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(parent_egress1_link);
+ bpf_link__destroy(parent_egress2_link);
+ bpf_link__destroy(parent_ingress_link);
+ bpf_link__destroy(child_egress1_link);
+ bpf_link__destroy(child_egress2_link);
+ bpf_link__destroy(child_ingress_link);
+
+ cg_storage_multi_shared__destroy(obj);
+}
+
+void serial_test_cg_storage_multi(void)
+{
+ int parent_cgroup_fd = -1, child_cgroup_fd = -1;
+
+ parent_cgroup_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK(parent_cgroup_fd < 0, "cg-create-parent", "errno %d", errno))
+ goto close_cgroup_fd;
+ child_cgroup_fd = create_and_get_cgroup(CHILD_CGROUP);
+ if (CHECK(child_cgroup_fd < 0, "cg-create-child", "errno %d", errno))
+ goto close_cgroup_fd;
+
+ if (test__start_subtest("egress_only"))
+ test_egress_only(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("isolated"))
+ test_isolated(parent_cgroup_fd, child_cgroup_fd);
+
+ if (test__start_subtest("shared"))
+ test_shared(parent_cgroup_fd, child_cgroup_fd);
+
+close_cgroup_fd:
+ close(child_cgroup_fd);
+ close(parent_cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
new file mode 100644
index 000000000000..74d6d7546f40
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup1_hierarchy.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <sys/types.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "test_cgroup1_hierarchy.skel.h"
+
+static void bpf_cgroup1(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_sleepable(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_s_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ ASSERT_NULL(fentry_link, "fentry_attach_fail");
+
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_lsm");
+}
+
+static void bpf_cgroup1_invalid_id(struct test_cgroup1_hierarchy *skel)
+{
+ struct bpf_link *lsm_link, *fentry_link;
+ int err;
+
+ /* Attach LSM prog first */
+ lsm_link = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(lsm_link, "lsm_attach"))
+ return;
+
+ /* LSM prog will be triggered when attaching fentry */
+ fentry_link = bpf_program__attach_trace(skel->progs.fentry_run);
+ if (!ASSERT_OK_PTR(fentry_link, "fentry_attach_success"))
+ goto cleanup;
+
+ err = bpf_link__destroy(fentry_link);
+ ASSERT_OK(err, "destroy_lsm");
+
+cleanup:
+ err = bpf_link__destroy(lsm_link);
+ ASSERT_OK(err, "destroy_fentry");
+}
+
+void test_cgroup1_hierarchy(void)
+{
+ struct test_cgroup1_hierarchy *skel;
+ __u64 current_cgid;
+ int hid, err;
+
+ skel = test_cgroup1_hierarchy__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = bpf_program__set_attach_target(skel->progs.fentry_run, 0, "bpf_fentry_test1");
+ if (!ASSERT_OK(err, "fentry_set_target"))
+ goto destroy;
+
+ err = test_cgroup1_hierarchy__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto destroy;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ goto destroy;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ current_cgid = get_classid_cgroup_id();
+ if (!ASSERT_GE(current_cgid, 0, "cgroup1 id"))
+ goto cleanup;
+
+ hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(hid, 0, "cgroup1 id"))
+ goto cleanup;
+ skel->bss->target_hid = hid;
+
+ if (test__start_subtest("test_cgroup1_hierarchy")) {
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_root_cgid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ bpf_cgroup1(skel);
+ }
+
+ if (test__start_subtest("test_invalid_level")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgid")) {
+ skel->bss->target_ancestor_cgid = 0;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_hid")) {
+ skel->bss->target_ancestor_cgid = 1;
+ skel->bss->target_ancestor_level = 0;
+ skel->bss->target_hid = -1;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cl");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_invalid_cgrp_name2")) {
+ skel->bss->target_hid = get_cgroup1_hierarchy_id("net_cls,");
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_invalid_id(skel);
+ }
+
+ if (test__start_subtest("test_sleepable_prog")) {
+ skel->bss->target_hid = hid;
+ skel->bss->target_ancestor_cgid = current_cgid;
+ bpf_cgroup1_sleepable(skel);
+ }
+
+cleanup:
+ cleanup_classid_environment();
+destroy:
+ test_cgroup1_hierarchy__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
index 70e94e783070..9367bd2f0ae1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c
@@ -14,14 +14,14 @@ static int prog_load(void)
BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
-void test_cgroup_attach_autodetach(void)
+void serial_test_cgroup_attach_autodetach(void)
{
__u32 duration = 0, prog_cnt = 4, attach_flags;
int allow_prog[2] = {-1};
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
index b549fcfacc0b..db0b7bac78d1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c
@@ -15,22 +15,22 @@ static int prog_load_cnt(int verdict, int val)
int cgroup_storage_fd, percpu_cgroup_storage_fd;
if (map_fd < 0)
- map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 8, 1, NULL);
if (map_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ cgroup_storage_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
}
- percpu_cgroup_storage_fd = bpf_create_map(
- BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(struct bpf_cgroup_storage_key), 8, 0, 0);
+ percpu_cgroup_storage_fd = bpf_map_create(
+ BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL,
+ sizeof(struct bpf_cgroup_storage_key), 8, 0, NULL);
if (percpu_cgroup_storage_fd < 0) {
printf("failed to create map '%s'\n", strerror(errno));
return -1;
@@ -45,13 +45,13 @@ static int prog_load_cnt(int verdict, int val)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, val),
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd),
BPF_MOV64_IMM(BPF_REG_2, 0),
@@ -63,10 +63,10 @@ static int prog_load_cnt(int verdict, int val)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
int ret;
- ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ ret = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
@@ -74,7 +74,7 @@ static int prog_load_cnt(int verdict, int val)
return ret;
}
-void test_cgroup_attach_multi(void)
+void serial_test_cgroup_attach_multi(void)
{
__u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id;
int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0;
@@ -167,7 +167,7 @@ void test_cgroup_attach_multi(void)
prog_cnt = 2;
CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS,
BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt) != -1);
+ prog_ids, &prog_cnt) >= 0);
CHECK_FAIL(errno != ENOSPC);
CHECK_FAIL(prog_cnt != 4);
/* check that prog_ids are returned even when buffer is too small */
@@ -194,14 +194,14 @@ void test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE;
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_override", "unexpected success\n"))
goto err;
CHECK_FAIL(errno != EINVAL);
attach_opts.flags = BPF_F_REPLACE;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_multi", "unexpected success\n"))
goto err;
@@ -209,7 +209,7 @@ void test_cgroup_attach_multi(void)
attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE;
attach_opts.replace_prog_fd = -1;
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_bad_fd", "unexpected success\n"))
goto err;
@@ -217,7 +217,7 @@ void test_cgroup_attach_multi(void)
/* replacing a program that is not attached to cgroup should fail */
attach_opts.replace_prog_fd = allow_prog[3];
- if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(!bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"fail_prog_replace_no_ent", "unexpected success\n"))
goto err;
@@ -225,14 +225,14 @@ void test_cgroup_attach_multi(void)
/* replace 1st from the top program */
attach_opts.replace_prog_fd = allow_prog[0];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
/* replace program with itself */
attach_opts.replace_prog_fd = allow_prog[6];
- if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1,
+ if (CHECK(bpf_prog_attach_opts(allow_prog[6], cg1,
BPF_CGROUP_INET_EGRESS, &attach_opts),
"prog_replace", "errno=%d\n", errno))
goto err;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
index 9e96f8d87fea..9421a5b7f4e1 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c
@@ -16,14 +16,14 @@ static int prog_load(int verdict)
BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
- return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ return bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
}
-void test_cgroup_attach_override(void)
+void serial_test_cgroup_attach_override(void)
{
int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1;
__u32 duration = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
new file mode 100644
index 000000000000..2bb5773d6f99
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_getset_retval.c
@@ -0,0 +1,549 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "cgroup_getset_retval_setsockopt.skel.h"
+#include "cgroup_getset_retval_getsockopt.skel.h"
+#include "cgroup_getset_retval_hooks.skel.h"
+
+#define SOL_CUSTOM 0xdeadbeef
+
+static int zero;
+
+static void test_setsockopt_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, assert that
+ * we actually get that error when we run setsockopt()
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_set_and_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, and one that gets the
+ * previously set errno. Assert that we get the same errno back.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that gets the previously set errno.
+ * Assert that, without anything setting one, we get 0.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_default_zero_and_set(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_get_retval = NULL, *link_set_eunatch = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that gets the previously set errno, and then
+ * one that sets the errno to EUNATCH. Assert that the get does not
+ * see EUNATCH set later, and does not prevent EUNATCH from being set.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+ bpf_link__destroy(link_set_eunatch);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_set_eisconn = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, then one that sets EISCONN,
+ * and then one that gets the exported errno. Assert both the syscall
+ * and the helper sees the last set errno.
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EISCONN, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_eperm(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_legacy_eperm = NULL, *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that return a reject without setting errno
+ * (legacy reject), and one that gets the errno. Assert that for
+ * backward compatibility the syscall result in EPERM, and this
+ * is also visible to the helper.
+ */
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EPERM, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 2, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EPERM, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_setsockopt_legacy_no_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_setsockopt *obj;
+ struct bpf_link *link_set_eunatch = NULL, *link_legacy_eperm = NULL;
+ struct bpf_link *link_get_retval = NULL;
+
+ obj = cgroup_getset_retval_setsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach setsockopt that sets EUNATCH, then one that return a reject
+ * without setting errno, and then one that gets the exported errno.
+ * Assert both the syscall and the helper's errno are unaffected by
+ * the second prog (i.e. legacy rejects does not override the errno
+ * to EPERM).
+ */
+ link_set_eunatch = bpf_program__attach_cgroup(obj->progs.set_eunatch,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eunatch, "cg-attach-set_eunatch"))
+ goto close_bpf_object;
+ link_legacy_eperm = bpf_program__attach_cgroup(obj->progs.legacy_eperm,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_legacy_eperm, "cg-attach-legacy_eperm"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(setsockopt(sock_fd, SOL_SOCKET, SO_REUSEADDR,
+ &zero, sizeof(int)), "setsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EUNATCH, "setsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EUNATCH, "retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eunatch);
+ bpf_link__destroy(link_legacy_eperm);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_setsockopt__destroy(obj);
+}
+
+static void test_getsockopt_get(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that gets previously set errno. Assert that the
+ * error from kernel is in both ctx_retval_value and retval_value.
+ */
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EOPNOTSUPP, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, -EOPNOTSUPP, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, -EOPNOTSUPP, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_override(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that sets retval to -EISCONN. Assert that this
+ * overrides the value from kernel.
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+
+ if (!ASSERT_ERR(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(errno, EISCONN, "getsockopt-errno"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 1, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+static void test_getsockopt_retval_sync(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_getsockopt *obj;
+ struct bpf_link *link_set_eisconn = NULL, *link_clear_retval = NULL;
+ struct bpf_link *link_get_retval = NULL;
+ int buf;
+ socklen_t optlen = sizeof(buf);
+
+ obj = cgroup_getset_retval_getsockopt__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
+ return;
+
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ /* Attach getsockopt that sets retval to -EISCONN, and one that clears
+ * ctx retval. Assert that the clearing ctx retval is synced to helper
+ * and clears any errors both from kernel and BPF..
+ */
+ link_set_eisconn = bpf_program__attach_cgroup(obj->progs.set_eisconn,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_set_eisconn, "cg-attach-set_eisconn"))
+ goto close_bpf_object;
+ link_clear_retval = bpf_program__attach_cgroup(obj->progs.clear_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_clear_retval, "cg-attach-clear_retval"))
+ goto close_bpf_object;
+ link_get_retval = bpf_program__attach_cgroup(obj->progs.get_retval,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_get_retval, "cg-attach-get_retval"))
+ goto close_bpf_object;
+
+ if (!ASSERT_OK(getsockopt(sock_fd, SOL_CUSTOM, 0,
+ &buf, &optlen), "getsockopt"))
+ goto close_bpf_object;
+
+ if (!ASSERT_EQ(obj->bss->invocations, 3, "invocations"))
+ goto close_bpf_object;
+ if (!ASSERT_FALSE(obj->bss->assertion_error, "assertion_error"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->retval_value, 0, "retval_value"))
+ goto close_bpf_object;
+ if (!ASSERT_EQ(obj->bss->ctx_retval_value, 0, "ctx_retval_value"))
+ goto close_bpf_object;
+
+close_bpf_object:
+ bpf_link__destroy(link_set_eisconn);
+ bpf_link__destroy(link_clear_retval);
+ bpf_link__destroy(link_get_retval);
+
+ cgroup_getset_retval_getsockopt__destroy(obj);
+}
+
+struct exposed_hook {
+ const char *name;
+ int expected_err;
+} exposed_hooks[] = {
+
+#define BPF_RETVAL_HOOK(NAME, SECTION, CTX, EXPECTED_ERR) \
+ { \
+ .name = #NAME, \
+ .expected_err = EXPECTED_ERR, \
+ },
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
+};
+
+static void test_exposed_hooks(int cgroup_fd, int sock_fd)
+{
+ struct cgroup_getset_retval_hooks *skel;
+ struct bpf_program *prog;
+ int err;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(exposed_hooks); i++) {
+ skel = cgroup_getset_retval_hooks__open();
+ if (!ASSERT_OK_PTR(skel, "cgroup_getset_retval_hooks__open"))
+ continue;
+
+ prog = bpf_object__find_program_by_name(skel->obj, exposed_hooks[i].name);
+ if (!ASSERT_NEQ(prog, NULL, "bpf_object__find_program_by_name"))
+ goto close_skel;
+
+ err = bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(err, "bpf_program__set_autoload"))
+ goto close_skel;
+
+ err = cgroup_getset_retval_hooks__load(skel);
+ ASSERT_EQ(err, exposed_hooks[i].expected_err, "expected_err");
+
+close_skel:
+ cgroup_getset_retval_hooks__destroy(skel);
+ }
+}
+
+void test_cgroup_getset_retval(void)
+{
+ int cgroup_fd = -1;
+ int sock_fd = -1;
+
+ cgroup_fd = test__join_cgroup("/cgroup_getset_retval");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto close_fd;
+
+ sock_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(sock_fd, 0, "start-server"))
+ goto close_fd;
+
+ if (test__start_subtest("setsockopt-set"))
+ test_setsockopt_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-set_and_get"))
+ test_setsockopt_set_and_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero"))
+ test_setsockopt_default_zero(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-default_zero_and_set"))
+ test_setsockopt_default_zero_and_set(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-override"))
+ test_setsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_eperm"))
+ test_setsockopt_legacy_eperm(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("setsockopt-legacy_no_override"))
+ test_setsockopt_legacy_no_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-get"))
+ test_getsockopt_get(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-override"))
+ test_getsockopt_override(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("getsockopt-retval_sync"))
+ test_getsockopt_retval_sync(cgroup_fd, sock_fd);
+
+ if (test__start_subtest("exposed_hooks"))
+ test_exposed_hooks(cgroup_fd, sock_fd);
+
+close_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..3bd27d2ea668
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_hierarchical_stats.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test makes sure BPF stats collection using rstat works correctly.
+ * The test uses 3 BPF progs:
+ * (a) counter: This BPF prog is invoked every time we attach a process to a
+ * cgroup and locklessly increments a percpu counter.
+ * The program then calls cgroup_rstat_updated() to inform rstat
+ * of an update on the (cpu, cgroup) pair.
+ *
+ * (b) flusher: This BPF prog is invoked when an rstat flush is ongoing, it
+ * aggregates all percpu counters to a total counter, and also
+ * propagates the changes to the ancestor cgroups.
+ *
+ * (c) dumper: This BPF prog is a cgroup_iter. It is used to output the total
+ * counter of a cgroup through reading a file in userspace.
+ *
+ * The test sets up a cgroup hierarchy, and the above programs. It spawns a few
+ * processes in the leaf cgroups and makes sure all the counters are aggregated
+ * correctly.
+ *
+ * Copyright 2022 Google LLC.
+ */
+#include <asm-generic/errno.h>
+#include <errno.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "cgroup_helpers.h"
+#include "cgroup_hierarchical_stats.skel.h"
+
+#define PAGE_SIZE 4096
+#define MB(x) (x << 20)
+
+#define PROCESSES_PER_CGROUP 3
+
+#define BPFFS_ROOT "/sys/fs/bpf/"
+#define BPFFS_ATTACH_COUNTERS BPFFS_ROOT "attach_counters/"
+
+#define CG_ROOT_NAME "root"
+#define CG_ROOT_ID 1
+
+#define CGROUP_PATH(p, n) {.path = p"/"n, .name = n}
+
+static struct {
+ const char *path, *name;
+ unsigned long long id;
+ int fd;
+} cgroups[] = {
+ CGROUP_PATH("/", "test"),
+ CGROUP_PATH("/test", "child1"),
+ CGROUP_PATH("/test", "child2"),
+ CGROUP_PATH("/test/child1", "child1_1"),
+ CGROUP_PATH("/test/child1", "child1_2"),
+ CGROUP_PATH("/test/child2", "child2_1"),
+ CGROUP_PATH("/test/child2", "child2_2"),
+};
+
+#define N_CGROUPS ARRAY_SIZE(cgroups)
+#define N_NON_LEAF_CGROUPS 3
+
+static int root_cgroup_fd;
+static bool mounted_bpffs;
+
+/* reads file at 'path' to 'buf', returns 0 on success. */
+static int read_from_file(const char *path, char *buf, size_t size)
+{
+ int fd, len;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return fd;
+
+ len = read(fd, buf, size);
+ close(fd);
+ if (len < 0)
+ return len;
+
+ buf[len] = 0;
+ return 0;
+}
+
+/* mounts bpffs and mkdir for reading stats, returns 0 on success. */
+static int setup_bpffs(void)
+{
+ int err;
+
+ /* Mount bpffs */
+ err = mount("bpf", BPFFS_ROOT, "bpf", 0, NULL);
+ mounted_bpffs = !err;
+ if (ASSERT_FALSE(err && errno != EBUSY, "mount"))
+ return err;
+
+ /* Create a directory to contain stat files in bpffs */
+ err = mkdir(BPFFS_ATTACH_COUNTERS, 0755);
+ if (!ASSERT_OK(err, "mkdir"))
+ return err;
+
+ return 0;
+}
+
+static void cleanup_bpffs(void)
+{
+ /* Remove created directory in bpffs */
+ ASSERT_OK(rmdir(BPFFS_ATTACH_COUNTERS), "rmdir "BPFFS_ATTACH_COUNTERS);
+
+ /* Unmount bpffs, if it wasn't already mounted when we started */
+ if (mounted_bpffs)
+ return;
+
+ ASSERT_OK(umount(BPFFS_ROOT), "unmount bpffs");
+}
+
+/* sets up cgroups, returns 0 on success. */
+static int setup_cgroups(void)
+{
+ int i, fd, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ return err;
+
+ root_cgroup_fd = get_root_cgroup();
+ if (!ASSERT_GE(root_cgroup_fd, 0, "get_root_cgroup"))
+ return root_cgroup_fd;
+
+ for (i = 0; i < N_CGROUPS; i++) {
+ fd = create_and_get_cgroup(cgroups[i].path);
+ if (!ASSERT_GE(fd, 0, "create_and_get_cgroup"))
+ return fd;
+
+ cgroups[i].fd = fd;
+ cgroups[i].id = get_cgroup_id(cgroups[i].path);
+ }
+ return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+ close(root_cgroup_fd);
+ for (int i = 0; i < N_CGROUPS; i++)
+ close(cgroups[i].fd);
+ cleanup_cgroup_environment();
+}
+
+/* Sets up cgroup hiearchary, returns 0 on success. */
+static int setup_hierarchy(void)
+{
+ return setup_bpffs() || setup_cgroups();
+}
+
+static void destroy_hierarchy(void)
+{
+ cleanup_cgroups();
+ cleanup_bpffs();
+}
+
+static int attach_processes(void)
+{
+ int i, j, status;
+
+ /* In every leaf cgroup, attach 3 processes */
+ for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++) {
+ for (j = 0; j < PROCESSES_PER_CGROUP; j++) {
+ pid_t pid;
+
+ /* Create child and attach to cgroup */
+ pid = fork();
+ if (pid == 0) {
+ if (join_parent_cgroup(cgroups[i].path))
+ exit(EACCES);
+ exit(0);
+ }
+
+ /* Cleanup child */
+ waitpid(pid, &status, 0);
+ if (!ASSERT_TRUE(WIFEXITED(status), "child process exited"))
+ return 1;
+ if (!ASSERT_EQ(WEXITSTATUS(status), 0,
+ "child process exit code"))
+ return 1;
+ }
+ }
+ return 0;
+}
+
+static unsigned long long
+get_attach_counter(unsigned long long cgroup_id, const char *file_name)
+{
+ unsigned long long attach_counter = 0, id = 0;
+ static char buf[128], path[128];
+
+ /* For every cgroup, read the file generated by cgroup_iter */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
+ if (!ASSERT_OK(read_from_file(path, buf, 128), "read cgroup_iter"))
+ return 0;
+
+ /* Check the output file formatting */
+ ASSERT_EQ(sscanf(buf, "cg_id: %llu, attach_counter: %llu\n",
+ &id, &attach_counter), 2, "output format");
+
+ /* Check that the cgroup_id is displayed correctly */
+ ASSERT_EQ(id, cgroup_id, "cgroup_id");
+ /* Check that the counter is non-zero */
+ ASSERT_GT(attach_counter, 0, "attach counter non-zero");
+ return attach_counter;
+}
+
+static void check_attach_counters(void)
+{
+ unsigned long long attach_counters[N_CGROUPS], root_attach_counter;
+ int i;
+
+ for (i = 0; i < N_CGROUPS; i++)
+ attach_counters[i] = get_attach_counter(cgroups[i].id,
+ cgroups[i].name);
+
+ /* Read stats for root too */
+ root_attach_counter = get_attach_counter(CG_ROOT_ID, CG_ROOT_NAME);
+
+ /* Check that all leafs cgroups have an attach counter of 3 */
+ for (i = N_NON_LEAF_CGROUPS; i < N_CGROUPS; i++)
+ ASSERT_EQ(attach_counters[i], PROCESSES_PER_CGROUP,
+ "leaf cgroup attach counter");
+
+ /* Check that child1 == child1_1 + child1_2 */
+ ASSERT_EQ(attach_counters[1], attach_counters[3] + attach_counters[4],
+ "child1_counter");
+ /* Check that child2 == child2_1 + child2_2 */
+ ASSERT_EQ(attach_counters[2], attach_counters[5] + attach_counters[6],
+ "child2_counter");
+ /* Check that test == child1 + child2 */
+ ASSERT_EQ(attach_counters[0], attach_counters[1] + attach_counters[2],
+ "test_counter");
+ /* Check that root >= test */
+ ASSERT_GE(root_attach_counter, attach_counters[1], "root_counter");
+}
+
+/* Creates iter link and pins in bpffs, returns 0 on success, -errno on failure.
+ */
+static int setup_cgroup_iter(struct cgroup_hierarchical_stats *obj,
+ int cgroup_fd, const char *file_name)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo = {};
+ struct bpf_link *link;
+ static char path[128];
+ int err;
+
+ /*
+ * Create an iter link, parameterized by cgroup_fd. We only want to
+ * traverse one cgroup, so set the traversal order to "self".
+ */
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(obj->progs.dumper, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return -EFAULT;
+
+ /* Pin the link to a bpffs file */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, file_name);
+ err = bpf_link__pin(link, path);
+ ASSERT_OK(err, "pin cgroup_iter");
+
+ /* Remove the link, leaving only the ref held by the pinned file */
+ bpf_link__destroy(link);
+ return err;
+}
+
+/* Sets up programs for collecting stats, returns 0 on success. */
+static int setup_progs(struct cgroup_hierarchical_stats **skel)
+{
+ int i, err;
+
+ *skel = cgroup_hierarchical_stats__open_and_load();
+ if (!ASSERT_OK_PTR(*skel, "open_and_load"))
+ return 1;
+
+ /* Attach cgroup_iter program that will dump the stats to cgroups */
+ for (i = 0; i < N_CGROUPS; i++) {
+ err = setup_cgroup_iter(*skel, cgroups[i].fd, cgroups[i].name);
+ if (!ASSERT_OK(err, "setup_cgroup_iter"))
+ return err;
+ }
+
+ /* Also dump stats for root */
+ err = setup_cgroup_iter(*skel, root_cgroup_fd, CG_ROOT_NAME);
+ if (!ASSERT_OK(err, "setup_cgroup_iter"))
+ return err;
+
+ bpf_program__set_autoattach((*skel)->progs.dumper, false);
+ err = cgroup_hierarchical_stats__attach(*skel);
+ if (!ASSERT_OK(err, "attach"))
+ return err;
+
+ return 0;
+}
+
+static void destroy_progs(struct cgroup_hierarchical_stats *skel)
+{
+ static char path[128];
+ int i;
+
+ for (i = 0; i < N_CGROUPS; i++) {
+ /* Delete files in bpffs that cgroup_iters are pinned in */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS,
+ cgroups[i].name);
+ ASSERT_OK(remove(path), "remove cgroup_iter pin");
+ }
+
+ /* Delete root file in bpffs */
+ snprintf(path, 128, "%s%s", BPFFS_ATTACH_COUNTERS, CG_ROOT_NAME);
+ ASSERT_OK(remove(path), "remove cgroup_iter root pin");
+ cgroup_hierarchical_stats__destroy(skel);
+}
+
+void test_cgroup_hierarchical_stats(void)
+{
+ struct cgroup_hierarchical_stats *skel = NULL;
+
+ if (setup_hierarchy())
+ goto hierarchy_cleanup;
+ if (setup_progs(&skel))
+ goto cleanup;
+ if (attach_processes())
+ goto cleanup;
+ check_attach_counters();
+cleanup:
+ destroy_progs(skel);
+hierarchy_cleanup:
+ destroy_hierarchy();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
new file mode 100644
index 000000000000..574d9a0cdc8e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_iter.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "iters_css_task.skel.h"
+#include "cgroup_iter.skel.h"
+#include "cgroup_helpers.h"
+
+#define ROOT 0
+#define PARENT 1
+#define CHILD1 2
+#define CHILD2 3
+#define NUM_CGROUPS 4
+
+#define PROLOGUE "prologue\n"
+#define EPILOGUE "epilogue\n"
+
+static const char *cg_path[] = {
+ "/", "/parent", "/parent/child1", "/parent/child2"
+};
+
+static int cg_fd[] = {-1, -1, -1, -1};
+static unsigned long long cg_id[] = {0, 0, 0, 0};
+static char expected_output[64];
+
+static int setup_cgroups(void)
+{
+ int fd, i = 0;
+
+ for (i = 0; i < NUM_CGROUPS; i++) {
+ fd = create_and_get_cgroup(cg_path[i]);
+ if (fd < 0)
+ return fd;
+
+ cg_fd[i] = fd;
+ cg_id[i] = get_cgroup_id(cg_path[i]);
+ }
+ return 0;
+}
+
+static void cleanup_cgroups(void)
+{
+ int i;
+
+ for (i = 0; i < NUM_CGROUPS; i++)
+ close(cg_fd[i]);
+}
+
+static void read_from_cgroup_iter(struct bpf_program *prog, int cgroup_fd,
+ int order, const char *testname)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+ int len, iter_fd;
+ static char buf[128];
+ size_t left;
+ char *p;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = order;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(prog, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (iter_fd < 0)
+ goto free_link;
+
+ memset(buf, 0, sizeof(buf));
+ left = ARRAY_SIZE(buf);
+ p = buf;
+ while ((len = read(iter_fd, p, left)) > 0) {
+ p += len;
+ left -= len;
+ }
+
+ ASSERT_STREQ(buf, expected_output, testname);
+
+ /* read() after iter finishes should be ok. */
+ if (len == 0)
+ ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+}
+
+/* Invalid cgroup. */
+static void test_invalid_cgroup(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = (__u32)-1;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ ASSERT_ERR_PTR(link, "attach_iter");
+ bpf_link__destroy(link);
+}
+
+/* Specifying both cgroup_fd and cgroup_id is invalid. */
+static void test_invalid_cgroup_spec(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct bpf_link *link;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = (__u32)cg_fd[PARENT];
+ linfo.cgroup.cgroup_id = (__u64)cg_id[PARENT];
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ ASSERT_ERR_PTR(link, "attach_iter");
+ bpf_link__destroy(link);
+}
+
+/* Preorder walk prints parent and child in order. */
+static void test_walk_preorder(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+ cg_id[PARENT], cg_id[CHILD1], cg_id[CHILD2]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_PRE, "preorder");
+}
+
+/* Postorder walk prints child and parent in order. */
+static void test_walk_postorder(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n%8llu\n" EPILOGUE,
+ cg_id[CHILD1], cg_id[CHILD2], cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_POST, "postorder");
+}
+
+/* Walking parents prints parent and then root. */
+static void test_walk_ancestors_up(struct cgroup_iter *skel)
+{
+ /* terminate the walk when ROOT is met. */
+ skel->bss->terminal_cgroup = cg_id[ROOT];
+
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n%8llu\n" EPILOGUE,
+ cg_id[PARENT], cg_id[ROOT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_ANCESTORS_UP, "ancestors_up");
+
+ skel->bss->terminal_cgroup = 0;
+}
+
+/* Early termination prints parent only. */
+static void test_early_termination(struct cgroup_iter *skel)
+{
+ /* terminate the walk after the first element is processed. */
+ skel->bss->terminate_early = 1;
+
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_DESCENDANTS_PRE, "early_termination");
+
+ skel->bss->terminate_early = 0;
+}
+
+/* Waling self prints self only. */
+static void test_walk_self_only(struct cgroup_iter *skel)
+{
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[PARENT]);
+
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[PARENT],
+ BPF_CGROUP_ITER_SELF_ONLY, "self_only");
+}
+
+static void test_walk_dead_self_only(struct cgroup_iter *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ char expected_output[128], buf[128];
+ const char *cgrp_name = "/dead";
+ union bpf_iter_link_info linfo;
+ int len, cgrp_fd, iter_fd;
+ struct bpf_link *link;
+ size_t left;
+ char *p;
+
+ cgrp_fd = create_and_get_cgroup(cgrp_name);
+ if (!ASSERT_GE(cgrp_fd, 0, "create cgrp"))
+ return;
+
+ /* The cgroup will be dead during read() iteration, so it only has
+ * epilogue in the output
+ */
+ snprintf(expected_output, sizeof(expected_output), EPILOGUE);
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgrp_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+
+ link = bpf_program__attach_iter(skel->progs.cgroup_id_printer, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto close_cgrp;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto free_link;
+
+ /* Close link fd and cgroup fd */
+ bpf_link__destroy(link);
+ close(cgrp_fd);
+
+ /* Remove cgroup to mark it as dead */
+ remove_cgroup(cgrp_name);
+
+ /* Two kern_sync_rcu() and usleep() pairs are used to wait for the
+ * releases of cgroup css, and the last kern_sync_rcu() and usleep()
+ * pair is used to wait for the free of cgroup itself.
+ */
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(8000);
+ kern_sync_rcu();
+ usleep(1000);
+
+ memset(buf, 0, sizeof(buf));
+ left = ARRAY_SIZE(buf);
+ p = buf;
+ while ((len = read(iter_fd, p, left)) > 0) {
+ p += len;
+ left -= len;
+ }
+
+ ASSERT_STREQ(buf, expected_output, "dead cgroup output");
+
+ /* read() after iter finishes should be ok. */
+ if (len == 0)
+ ASSERT_OK(read(iter_fd, buf, sizeof(buf)), "second_read");
+
+ close(iter_fd);
+ return;
+free_link:
+ bpf_link__destroy(link);
+close_cgrp:
+ close(cgrp_fd);
+}
+
+static void test_walk_self_only_css_task(void)
+{
+ struct iters_css_task *skel;
+ int err;
+
+ skel = iters_css_task__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.cgroup_id_printer, true);
+
+ err = iters_css_task__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = join_cgroup(cg_path[CHILD2]);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ snprintf(expected_output, sizeof(expected_output),
+ PROLOGUE "%8llu\n" EPILOGUE, cg_id[CHILD2]);
+ read_from_cgroup_iter(skel->progs.cgroup_id_printer, cg_fd[CHILD2],
+ BPF_CGROUP_ITER_SELF_ONLY, "test_walk_self_only_css_task");
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+cleanup:
+ iters_css_task__destroy(skel);
+}
+
+void test_cgroup_iter(void)
+{
+ struct cgroup_iter *skel = NULL;
+
+ if (setup_cgroup_environment())
+ return;
+
+ if (setup_cgroups())
+ goto out;
+
+ skel = cgroup_iter__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "cgroup_iter__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("cgroup_iter__invalid_cgroup"))
+ test_invalid_cgroup(skel);
+ if (test__start_subtest("cgroup_iter__invalid_cgroup_spec"))
+ test_invalid_cgroup_spec(skel);
+ if (test__start_subtest("cgroup_iter__preorder"))
+ test_walk_preorder(skel);
+ if (test__start_subtest("cgroup_iter__postorder"))
+ test_walk_postorder(skel);
+ if (test__start_subtest("cgroup_iter__ancestors_up_walk"))
+ test_walk_ancestors_up(skel);
+ if (test__start_subtest("cgroup_iter__early_termination"))
+ test_early_termination(skel);
+ if (test__start_subtest("cgroup_iter__self_only"))
+ test_walk_self_only(skel);
+ if (test__start_subtest("cgroup_iter__dead_self_only"))
+ test_walk_dead_self_only(skel);
+ if (test__start_subtest("cgroup_iter__self_only_css_task"))
+ test_walk_self_only_css_task();
+
+out:
+ cgroup_iter__destroy(skel);
+ cleanup_cgroups();
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
index 6e04f8d1d15b..15093a69510e 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_link.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#include "test_cgroup_link.skel.h"
static __u32 duration = 0;
@@ -23,7 +24,7 @@ int ping_and_check(int exp_calls, int exp_alt_calls)
return 0;
}
-void test_cgroup_link(void)
+void serial_test_cgroup_link(void)
{
struct {
const char *path;
@@ -37,7 +38,8 @@ void test_cgroup_link(void)
int last_cg = ARRAY_SIZE(cgs) - 1, cg_nr = ARRAY_SIZE(cgs);
DECLARE_LIBBPF_OPTS(bpf_link_update_opts, link_upd_opts);
struct bpf_link *links[ARRAY_SIZE(cgs)] = {}, *tmp_link;
- __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags;
+ __u32 prog_ids[ARRAY_SIZE(cgs)], prog_cnt = 0, attach_flags, prog_id;
+ struct bpf_link_info info;
int i = 0, err, prog_fd;
bool detach_legacy = false;
@@ -52,7 +54,7 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
cgs[i].fd = create_and_get_cgroup(cgs[i].path);
- if (CHECK(cgs[i].fd < 0, "cg_create", "fail: %d\n", cgs[i].fd))
+ if (!ASSERT_GE(cgs[i].fd, 0, "cg_create"))
goto cleanup;
}
@@ -63,17 +65,15 @@ void test_cgroup_link(void)
for (i = 0; i < cg_nr; i++) {
links[i] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[i].fd);
- if (CHECK(IS_ERR(links[i]), "cg_attach", "i: %d, err: %ld\n",
- i, PTR_ERR(links[i])))
+ if (!ASSERT_OK_PTR(links[i], "cg_attach"))
goto cleanup;
}
ping_and_check(cg_nr, 0);
- /* query the number of effective progs and attach flags in root cg */
+ /* query the number of attached progs and attach flags in root cg */
err = bpf_prog_query(cgs[0].fd, BPF_CGROUP_INET_EGRESS,
- BPF_F_QUERY_EFFECTIVE, &attach_flags, NULL,
- &prog_cnt);
+ 0, &attach_flags, NULL, &prog_cnt);
CHECK_FAIL(err);
CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != 1, "effect_cnt", "exp %d, got %d\n", 1, prog_cnt))
@@ -84,17 +84,15 @@ void test_cgroup_link(void)
BPF_F_QUERY_EFFECTIVE, NULL, NULL,
&prog_cnt);
CHECK_FAIL(err);
- CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
/* query the effective prog IDs in last cg */
err = bpf_prog_query(cgs[last_cg].fd, BPF_CGROUP_INET_EGRESS,
- BPF_F_QUERY_EFFECTIVE, &attach_flags,
- prog_ids, &prog_cnt);
+ BPF_F_QUERY_EFFECTIVE, NULL, prog_ids,
+ &prog_cnt);
CHECK_FAIL(err);
- CHECK_FAIL(attach_flags != BPF_F_ALLOW_MULTI);
if (CHECK(prog_cnt != cg_nr, "effect_cnt", "exp %d, got %d\n",
cg_nr, prog_cnt))
goto cleanup;
@@ -119,8 +117,7 @@ void test_cgroup_link(void)
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr + 1, 0);
@@ -145,7 +142,7 @@ void test_cgroup_link(void)
/* attempt to mix in with multi-attach bpf_link */
tmp_link = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(!IS_ERR(tmp_link), "cg_attach_fail", "unexpected success!\n")) {
+ if (!ASSERT_ERR_PTR(tmp_link, "cg_attach_fail")) {
bpf_link__destroy(tmp_link);
goto cleanup;
}
@@ -163,8 +160,7 @@ void test_cgroup_link(void)
/* attach back link-based one */
links[last_cg] = bpf_program__attach_cgroup(skel->progs.egress,
cgs[last_cg].fd);
- if (CHECK(IS_ERR(links[last_cg]), "cg_attach", "err: %ld\n",
- PTR_ERR(links[last_cg])))
+ if (!ASSERT_OK_PTR(links[last_cg], "cg_attach"))
goto cleanup;
ping_and_check(cg_nr, 0);
@@ -219,6 +215,22 @@ void test_cgroup_link(void)
/* BPF programs should still get called */
ping_and_check(0, cg_nr);
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id == 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ err = bpf_link__detach(links[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto cleanup;
+
+ /* cgroup_id should be zero in link_info */
+ prog_id = link_info_prog_id(links[0], &info);
+ CHECK(prog_id == 0, "link_info", "failed\n");
+ CHECK(info.cgroup.cgroup_id != 0, "cgroup_id", "unexpected %llu\n", info.cgroup.cgroup_id);
+
+ /* First BPF program shouldn't be called anymore */
+ ping_and_check(0, cg_nr - 1);
+
/* leave cgroup and remove them, don't detach programs */
cleanup_cgroup_environment();
@@ -231,8 +243,7 @@ cleanup:
BPF_CGROUP_INET_EGRESS);
for (i = 0; i < cg_nr; i++) {
- if (!IS_ERR(links[i]))
- bpf_link__destroy(links[i]);
+ bpf_link__destroy(links[i]);
}
test_cgroup_link__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
index 059047af7df3..b9dc4ec655b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -13,7 +13,7 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
socklen_t addr_len = sizeof(addr);
__u32 duration = 0;
- serv_sk = start_server(AF_INET6, SOCK_STREAM);
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
return;
@@ -24,17 +24,13 @@ static void run_lookup_test(__u16 *g_serv_port, int out_sk)
*g_serv_port = addr.sin6_port;
/* Client outside of test cgroup should fail to connect by timeout. */
- err = connect_fd_to_fd(out_sk, serv_sk);
+ err = connect_fd_to_fd(out_sk, serv_sk, 1000);
if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
"unexpected result err %d errno %d\n", err, errno))
goto cleanup;
- err = connect_wait(out_sk);
- if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
- goto cleanup;
-
/* Client inside test cgroup should connect just fine. */
- in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+ in_sk = connect_to_fd(serv_sk, 0);
if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
goto cleanup;
@@ -64,7 +60,7 @@ static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
goto cleanup;
link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
- if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "cgroup_attach"))
goto cleanup;
run_lookup_test(&skel->bss->g_serv_port, out_sk);
@@ -85,7 +81,7 @@ void test_cgroup_skb_sk_lookup(void)
* differs from that of testing cgroup. Moving selftests process to
* testing cgroup won't change cgroup id of an already created socket.
*/
- out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ out_sk = socket(AF_INET6, SOCK_STREAM, 0);
if (CHECK_FAIL(out_sk < 0))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..a1542faf7873
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_tcp_skb.c
@@ -0,0 +1,344 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+#include <sched.h>
+#include <unistd.h>
+#include "cgroup_helpers.h"
+#include "testing_helpers.h"
+#include "cgroup_tcp_skb.skel.h"
+#include "cgroup_tcp_skb.h"
+#include "network_helpers.h"
+
+#define CGROUP_TCP_SKB_PATH "/test_cgroup_tcp_skb"
+
+static int install_filters(int cgroup_fd,
+ struct bpf_link **egress_link,
+ struct bpf_link **ingress_link,
+ struct bpf_program *egress_prog,
+ struct bpf_program *ingress_prog,
+ struct cgroup_tcp_skb *skel)
+{
+ /* Prepare filters */
+ skel->bss->g_sock_state = 0;
+ skel->bss->g_unexpected = 0;
+ *egress_link =
+ bpf_program__attach_cgroup(egress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(egress_link, "egress_link"))
+ return -1;
+ *ingress_link =
+ bpf_program__attach_cgroup(ingress_prog,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(ingress_link, "ingress_link"))
+ return -1;
+
+ return 0;
+}
+
+static void uninstall_filters(struct bpf_link **egress_link,
+ struct bpf_link **ingress_link)
+{
+ bpf_link__destroy(*egress_link);
+ *egress_link = NULL;
+ bpf_link__destroy(*ingress_link);
+ *ingress_link = NULL;
+}
+
+static int create_client_sock_v6(void)
+{
+ int fd;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (fd < 0) {
+ perror("socket");
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Connect to the server in a cgroup from the outside of the cgroup. */
+static int talk_to_cgroup(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ int err, cp;
+ char buf[5];
+ int port;
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ port = get_socket_local_port(*listen_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+ skel->bss->g_sock_port = ntohs(port);
+
+ /* Connect client to server */
+ err = connect_fd_to_fd(*client_fd, *listen_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+/* Connect to the server out of a cgroup from inside the cgroup. */
+static int talk_to_outside(int *client_fd, int *listen_fd, int *service_fd,
+ struct cgroup_tcp_skb *skel)
+
+{
+ int err, cp;
+ char buf[5];
+ int port;
+
+ /* Create client & server socket */
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ *listen_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(*listen_fd, 0, "listen_fd"))
+ return -1;
+ err = join_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ return -1;
+ *client_fd = create_client_sock_v6();
+ if (!ASSERT_GE(*client_fd, 0, "client_fd"))
+ return -1;
+ err = join_root_cgroup();
+ if (!ASSERT_OK(err, "join_root_cgroup"))
+ return -1;
+ port = get_socket_local_port(*listen_fd);
+ if (!ASSERT_GE(port, 0, "get_socket_local_port"))
+ return -1;
+ skel->bss->g_sock_port = ntohs(port);
+
+ /* Connect client to server */
+ err = connect_fd_to_fd(*client_fd, *listen_fd, 0);
+ if (!ASSERT_OK(err, "connect_fd_to_fd"))
+ return -1;
+ *service_fd = accept(*listen_fd, NULL, NULL);
+ if (!ASSERT_GE(*service_fd, 0, "service_fd"))
+ return -1;
+ cp = write(*client_fd, "hello", 5);
+ if (!ASSERT_EQ(cp, 5, "write"))
+ return -1;
+ cp = read(*service_fd, buf, 5);
+ if (!ASSERT_EQ(cp, 5, "read"))
+ return -1;
+
+ return 0;
+}
+
+static int close_connection(int *closing_fd, int *peer_fd, int *listen_fd,
+ struct cgroup_tcp_skb *skel)
+{
+ __u32 saved_packet_count = 0;
+ int err;
+ int i;
+
+ /* Wait for ACKs to be sent */
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ for (i = 0;
+ skel->bss->g_packet_count != saved_packet_count && i < 10;
+ i++) {
+ saved_packet_count = skel->bss->g_packet_count;
+ usleep(100000); /* 0.1s */
+ }
+ if (!ASSERT_EQ(skel->bss->g_packet_count, saved_packet_count,
+ "packet_count"))
+ return -1;
+
+ skel->bss->g_packet_count = 0;
+ saved_packet_count = 0;
+
+ /* Half shutdown to make sure the closing socket having a chance to
+ * receive a FIN from the peer.
+ */
+ err = shutdown(*closing_fd, SHUT_WR);
+ if (!ASSERT_OK(err, "shutdown closing_fd"))
+ return -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ saved_packet_count = skel->bss->g_packet_count;
+
+ /* Fully shutdown the connection */
+ err = close(*peer_fd);
+ if (!ASSERT_OK(err, "close peer_fd"))
+ return -1;
+ *peer_fd = -1;
+
+ /* Wait for FIN and the ACK of the FIN to be observed */
+ for (i = 0;
+ skel->bss->g_packet_count < saved_packet_count + 2 && i < 10;
+ i++)
+ usleep(100000); /* 0.1s */
+ if (!ASSERT_GE(skel->bss->g_packet_count, saved_packet_count + 2,
+ "packet_count"))
+ return -1;
+
+ err = close(*closing_fd);
+ if (!ASSERT_OK(err, "close closing_fd"))
+ return -1;
+ *closing_fd = -1;
+
+ close(*listen_fd);
+ *listen_fd = -1;
+
+ return 0;
+}
+
+/* This test case includes four scenarios:
+ * 1. Connect to the server from outside the cgroup and close the connection
+ * from outside the cgroup.
+ * 2. Connect to the server from outside the cgroup and close the connection
+ * from inside the cgroup.
+ * 3. Connect to the server from inside the cgroup and close the connection
+ * from outside the cgroup.
+ * 4. Connect to the server from inside the cgroup and close the connection
+ * from inside the cgroup.
+ *
+ * The test case is to verify that cgroup_skb/{egress,ingress} filters
+ * receive expected packets including SYN, SYN/ACK, ACK, FIN, and FIN/ACK.
+ */
+void test_cgroup_tcp_skb(void)
+{
+ struct bpf_link *ingress_link = NULL;
+ struct bpf_link *egress_link = NULL;
+ int client_fd = -1, listen_fd = -1;
+ struct cgroup_tcp_skb *skel;
+ int service_fd = -1;
+ int cgroup_fd = -1;
+ int err;
+
+ skel = cgroup_tcp_skb__open_and_load();
+ if (!ASSERT_OK(!skel, "skel_open_load"))
+ return;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+
+ cgroup_fd = create_and_get_cgroup(CGROUP_TCP_SKB_PATH);
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ goto cleanup;
+
+ /* Scenario 1 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress,
+ skel->progs.server_ingress,
+ skel);
+ if (!ASSERT_OK(err, "install_filters"))
+ goto cleanup;
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 2 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.server_egress_srv,
+ skel->progs.server_ingress_srv,
+ skel);
+
+ err = talk_to_cgroup(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_cgroup"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 3 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress_srv,
+ skel->progs.client_ingress_srv,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&service_fd, &client_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, CLOSED, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+ /* Scenario 4 */
+ err = install_filters(cgroup_fd, &egress_link, &ingress_link,
+ skel->progs.client_egress,
+ skel->progs.client_ingress,
+ skel);
+
+ err = talk_to_outside(&client_fd, &listen_fd, &service_fd, skel);
+ if (!ASSERT_OK(err, "talk_to_outside"))
+ goto cleanup;
+
+ err = close_connection(&client_fd, &service_fd, &listen_fd, skel);
+ if (!ASSERT_OK(err, "close_connection"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->g_unexpected, 0, "g_unexpected");
+ ASSERT_EQ(skel->bss->g_sock_state, TIME_WAIT, "g_sock_state");
+
+ uninstall_filters(&egress_link, &ingress_link);
+
+cleanup:
+ close(client_fd);
+ close(listen_fd);
+ close(service_fd);
+ close(cgroup_fd);
+ bpf_link__destroy(egress_link);
+ bpf_link__destroy(ingress_link);
+ cleanup_cgroup_environment();
+ cgroup_tcp_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
new file mode 100644
index 000000000000..addf720428f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_v1v2.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+
+#include "connect4_dropper.skel.h"
+
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int run_test(int cgroup_fd, int server_fd, bool classid)
+{
+ struct network_helper_opts opts = {
+ .must_fail = true,
+ };
+ struct connect4_dropper *skel;
+ int fd, err = 0;
+
+ skel = connect4_dropper__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -1;
+
+ skel->links.connect_v4_dropper =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_dropper,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_dropper, "prog_attach")) {
+ err = -1;
+ goto out;
+ }
+
+ if (classid && !ASSERT_OK(join_classid(), "join_classid")) {
+ err = -1;
+ goto out;
+ }
+
+ fd = connect_to_fd_opts(server_fd, &opts);
+ if (fd < 0)
+ err = -1;
+ else
+ close(fd);
+out:
+ connect4_dropper__destroy(skel);
+ return err;
+}
+
+void test_cgroup_v1v2(void)
+{
+ struct network_helper_opts opts = {};
+ int server_fd, client_fd, cgroup_fd;
+ static const int port = 60120;
+
+ /* Step 1: Check base connectivity works without any BPF. */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd"))
+ return;
+ client_fd = connect_to_fd_opts(server_fd, &opts);
+ if (!ASSERT_GE(client_fd, 0, "client_fd")) {
+ close(server_fd);
+ return;
+ }
+ close(client_fd);
+ close(server_fd);
+
+ /* Step 2: Check BPF policy prog attached to cgroups drops connectivity. */
+ cgroup_fd = test__join_cgroup("/connect_dropper");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, port, 0);
+ if (!ASSERT_GE(server_fd, 0, "server_fd")) {
+ close(cgroup_fd);
+ return;
+ }
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "cgroup-v2-only");
+ setup_classid_environment();
+ set_classid();
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "cgroup-v1v2");
+ cleanup_classid_environment();
+ close(server_fd);
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
new file mode 100644
index 000000000000..adda85f97058
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_kfunc.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <cgroup_helpers.h>
+#include <test_progs.h>
+
+#include "cgrp_kfunc_failure.skel.h"
+#include "cgrp_kfunc_success.skel.h"
+
+static struct cgrp_kfunc_success *open_load_cgrp_kfunc_skel(void)
+{
+ struct cgrp_kfunc_success *skel;
+ int err;
+
+ skel = cgrp_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = cgrp_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ cgrp_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static int mkdir_rm_test_dir(void)
+{
+ int fd;
+ const char *cgrp_path = "cgrp_kfunc";
+
+ fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GT(fd, 0, "mkdir_cgrp_fd"))
+ return -1;
+
+ close(fd);
+ remove_cgroup(cgrp_path);
+
+ return 0;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct cgrp_kfunc_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_cgrp_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_mkdir_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 0, "pre_rmdir_count");
+ if (!ASSERT_OK(mkdir_rm_test_dir(), "cgrp_mkdir"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->invocations, 1, "post_rmdir_count");
+ ASSERT_OK(skel->bss->err, "post_rmdir_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ cgrp_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_cgrp_acquire_release_argument",
+ "test_cgrp_acquire_leave_in_map",
+ "test_cgrp_xchg_release",
+ "test_cgrp_get_release",
+ "test_cgrp_get_ancestors",
+ "test_cgrp_from_id",
+};
+
+void test_cgrp_kfunc(void)
+{
+ int i, err;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "cgrp_env_setup"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ RUN_TESTS(cgrp_kfunc_failure);
+
+cleanup:
+ cleanup_cgroup_environment();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
new file mode 100644
index 000000000000..747761572098
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include "cgrp_ls_tp_btf.skel.h"
+#include "cgrp_ls_recursion.skel.h"
+#include "cgrp_ls_attach_cgroup.skel.h"
+#include "cgrp_ls_negative.skel.h"
+#include "cgrp_ls_sleepable.skel.h"
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+
+struct socket_cookie {
+ __u64 cookie_key;
+ __u64 cookie_value;
+};
+
+static bool is_cgroup1;
+static int target_hid;
+
+#define CGROUP_MODE_SET(skel) \
+{ \
+ skel->bss->is_cgroup1 = is_cgroup1; \
+ skel->bss->target_hid = target_hid; \
+}
+
+static void cgroup_mode_value_init(bool cgroup, int hid)
+{
+ is_cgroup1 = cgroup;
+ target_hid = hid;
+}
+
+static void test_tp_btf(int cgroup_fd)
+{
+ struct cgrp_ls_tp_btf *skel;
+ long val1 = 1, val2 = 0;
+ int err;
+
+ skel = cgrp_ls_tp_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ /* populate a value in map_b */
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY);
+ if (!ASSERT_OK(err, "map_update_elem"))
+ goto out;
+
+ /* check value */
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val2);
+ if (!ASSERT_OK(err, "map_lookup_elem"))
+ goto out;
+ if (!ASSERT_EQ(val2, 1, "map_lookup_elem, invalid val"))
+ goto out;
+
+ /* delete value */
+ err = bpf_map_delete_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd);
+ if (!ASSERT_OK(err, "map_delete_elem"))
+ goto out;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ err = cgrp_ls_tp_btf__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_gettid);
+ syscall(SYS_gettid);
+
+ skel->bss->target_pid = 0;
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ cgrp_ls_tp_btf__destroy(skel);
+}
+
+static void test_attach_cgroup(int cgroup_fd)
+{
+ int server_fd = 0, client_fd = 0, err = 0;
+ socklen_t addr_len = sizeof(struct sockaddr_in6);
+ struct cgrp_ls_attach_cgroup *skel;
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ struct socket_cookie val;
+
+ skel = cgrp_ls_attach_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->links.set_cookie = bpf_program__attach_cgroup(
+ skel->progs.set_cookie, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach"))
+ goto out;
+
+ skel->links.update_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.update_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach"))
+ goto out;
+
+ skel->links.update_cookie_tracing = bpf_program__attach(
+ skel->progs.update_cookie_tracing);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach"))
+ goto out;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto close_server_fd;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies),
+ &cgroup_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(socket_cookies)"))
+ goto close_client_fd;
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close_client_fd;
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value");
+
+close_client_fd:
+ close(client_fd);
+close_server_fd:
+ close(server_fd);
+out:
+ cgrp_ls_attach_cgroup__destroy(skel);
+}
+
+static void test_recursion(int cgroup_fd)
+{
+ struct cgrp_ls_recursion *skel;
+ int err;
+
+ skel = cgrp_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ err = cgrp_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ syscall(SYS_gettid);
+
+out:
+ cgrp_ls_recursion__destroy(skel);
+}
+
+static void test_negative(void)
+{
+ struct cgrp_ls_negative *skel;
+
+ skel = cgrp_ls_negative__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "skel_open_and_load")) {
+ cgrp_ls_negative__destroy(skel);
+ return;
+ }
+}
+
+static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ union bpf_iter_link_info linfo;
+ struct cgrp_ls_sleepable *skel;
+ struct bpf_link *link;
+ int err, iter_fd;
+ char buf[16];
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.cgroup_iter, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ memset(&linfo, 0, sizeof(linfo));
+ linfo.cgroup.cgroup_fd = cgroup_fd;
+ linfo.cgroup.order = BPF_CGROUP_ITER_SELF_ONLY;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.cgroup_iter, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "iter_create"))
+ goto out;
+
+ /* trigger the program run */
+ (void)read(iter_fd, buf, sizeof(buf));
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+
+ close(iter_fd);
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_yes_rcu_lock(__u64 cgroup_id)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.yes_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = cgrp_ls_sleepable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_no_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_ERR(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void test_cgrp1_no_rcu_lock(void)
+{
+ struct cgrp_ls_sleepable *skel;
+ int err;
+
+ skel = cgrp_ls_sleepable__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ CGROUP_MODE_SET(skel);
+
+ bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true);
+ err = cgrp_ls_sleepable__load(skel);
+ ASSERT_OK(err, "skel_load");
+
+ cgrp_ls_sleepable__destroy(skel);
+}
+
+static void cgrp2_local_storage(void)
+{
+ __u64 cgroup_id;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/cgrp_local_storage");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage"))
+ return;
+
+ cgroup_mode_value_init(0, -1);
+
+ cgroup_id = get_cgroup_id("/cgrp_local_storage");
+ if (test__start_subtest("tp_btf"))
+ test_tp_btf(cgroup_fd);
+ if (test__start_subtest("attach_cgroup"))
+ test_attach_cgroup(cgroup_fd);
+ if (test__start_subtest("recursion"))
+ test_recursion(cgroup_fd);
+ if (test__start_subtest("negative"))
+ test_negative();
+ if (test__start_subtest("cgroup_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgroup_fd, cgroup_id);
+ if (test__start_subtest("yes_rcu_lock"))
+ test_yes_rcu_lock(cgroup_id);
+ if (test__start_subtest("no_rcu_lock"))
+ test_no_rcu_lock();
+
+ close(cgroup_fd);
+}
+
+static void cgrp1_local_storage(void)
+{
+ int cgrp1_fd, cgrp1_hid, cgrp1_id, err;
+
+ /* Setup cgroup1 hierarchy */
+ err = setup_classid_environment();
+ if (!ASSERT_OK(err, "setup_classid_environment"))
+ return;
+
+ err = join_classid();
+ if (!ASSERT_OK(err, "join_cgroup1"))
+ goto cleanup;
+
+ cgrp1_fd = open_classid();
+ if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd"))
+ goto cleanup;
+
+ cgrp1_id = get_classid_cgroup_id();
+ if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id"))
+ goto close_fd;
+
+ cgrp1_hid = get_cgroup1_hierarchy_id("net_cls");
+ if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid"))
+ goto close_fd;
+
+ cgroup_mode_value_init(1, cgrp1_hid);
+
+ if (test__start_subtest("cgrp1_tp_btf"))
+ test_tp_btf(cgrp1_fd);
+ if (test__start_subtest("cgrp1_recursion"))
+ test_recursion(cgrp1_fd);
+ if (test__start_subtest("cgrp1_negative"))
+ test_negative();
+ if (test__start_subtest("cgrp1_iter_sleepable"))
+ test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id);
+ if (test__start_subtest("cgrp1_yes_rcu_lock"))
+ test_yes_rcu_lock(cgrp1_id);
+ if (test__start_subtest("cgrp1_no_rcu_lock"))
+ test_cgrp1_no_rcu_lock();
+
+close_fd:
+ close(cgrp1_fd);
+cleanup:
+ cleanup_classid_environment();
+}
+
+void test_cgrp_local_storage(void)
+{
+ cgrp2_local_storage();
+ cgrp1_local_storage();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/check_mtu.c b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
new file mode 100644
index 000000000000..2a9a30650350
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/check_mtu.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Jesper Dangaard Brouer */
+
+#include <linux/if_link.h> /* before test_progs.h, avoid bpf_util.h redefines */
+#include <test_progs.h>
+#include "test_check_mtu.skel.h"
+#include "network_helpers.h"
+
+#include <stdlib.h>
+#include <inttypes.h>
+
+#define IFINDEX_LO 1
+
+static __u32 duration; /* Hint: needed for CHECK macro */
+
+static int read_mtu_device_lo(void)
+{
+ const char *filename = "/sys/class/net/lo/mtu";
+ char buf[11] = {};
+ int value, n, fd;
+
+ fd = open(filename, 0, O_RDONLY);
+ if (fd == -1)
+ return -1;
+
+ n = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ if (n == -1)
+ return -2;
+
+ value = strtoimax(buf, NULL, 10);
+ if (errno == ERANGE)
+ return -3;
+
+ return value;
+}
+
+static void test_check_mtu_xdp_attach(void)
+{
+ struct bpf_link_info link_info;
+ __u32 link_info_len = sizeof(link_info);
+ struct test_check_mtu *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err = 0;
+ int fd;
+
+ skel = test_check_mtu__open_and_load();
+ if (CHECK(!skel, "open and load skel", "failed"))
+ return; /* Exit if e.g. helper unknown to kernel */
+
+ prog = skel->progs.xdp_use_helper_basic;
+
+ link = bpf_program__attach_xdp(prog, IFINDEX_LO);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto out;
+ skel->links.xdp_use_helper_basic = link;
+
+ memset(&link_info, 0, sizeof(link_info));
+ fd = bpf_link__fd(link);
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (CHECK(err, "link_info", "failed: %d\n", err))
+ goto out;
+
+ CHECK(link_info.type != BPF_LINK_TYPE_XDP, "link_type",
+ "got %u != exp %u\n", link_info.type, BPF_LINK_TYPE_XDP);
+ CHECK(link_info.xdp.ifindex != IFINDEX_LO, "link_ifindex",
+ "got %u != exp %u\n", link_info.xdp.ifindex, IFINDEX_LO);
+
+ err = bpf_link__detach(link);
+ CHECK(err, "link_detach", "failed %d\n", err);
+
+out:
+ test_check_mtu__destroy(skel);
+}
+
+static void test_check_mtu_run_xdp(struct test_check_mtu *skel,
+ struct bpf_program *prog,
+ __u32 mtu_expect)
+{
+ int retval_expect = XDP_PASS;
+ __u32 mtu_result = 0;
+ char buf[256] = {};
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .repeat = 1,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
+
+ /* Extract MTU that BPF-prog got */
+ mtu_result = skel->bss->global_bpf_mtu_xdp;
+ ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
+}
+
+
+static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex)
+{
+ struct test_check_mtu *skel;
+ int err;
+
+ skel = test_check_mtu__open();
+ if (CHECK(!skel, "skel_open", "failed"))
+ return;
+
+ /* Update "constants" in BPF-prog *BEFORE* libbpf load */
+ skel->rodata->GLOBAL_USER_MTU = mtu;
+ skel->rodata->GLOBAL_USER_IFINDEX = ifindex;
+
+ err = test_check_mtu__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu);
+ test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu);
+
+cleanup:
+ test_check_mtu__destroy(skel);
+}
+
+static void test_check_mtu_run_tc(struct test_check_mtu *skel,
+ struct bpf_program *prog,
+ __u32 mtu_expect)
+{
+ int retval_expect = BPF_OK;
+ __u32 mtu_result = 0;
+ char buf[256] = {};
+ int err, prog_fd = bpf_program__fd(prog);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, retval_expect, "retval");
+
+ /* Extract MTU that BPF-prog got */
+ mtu_result = skel->bss->global_bpf_mtu_tc;
+ ASSERT_EQ(mtu_result, mtu_expect, "MTU-compare-user");
+}
+
+
+static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
+{
+ struct test_check_mtu *skel;
+ int err;
+
+ skel = test_check_mtu__open();
+ if (CHECK(!skel, "skel_open", "failed"))
+ return;
+
+ /* Update "constants" in BPF-prog *BEFORE* libbpf load */
+ skel->rodata->GLOBAL_USER_MTU = mtu;
+ skel->rodata->GLOBAL_USER_IFINDEX = ifindex;
+
+ err = test_check_mtu__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_check_mtu_run_tc(skel, skel->progs.tc_use_helper, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
+ test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
+cleanup:
+ test_check_mtu__destroy(skel);
+}
+
+void serial_test_check_mtu(void)
+{
+ int mtu_lo;
+
+ if (test__start_subtest("bpf_check_mtu XDP-attach"))
+ test_check_mtu_xdp_attach();
+
+ mtu_lo = read_mtu_device_lo();
+ if (CHECK(mtu_lo < 0, "reading MTU value", "failed (err:%d)", mtu_lo))
+ return;
+
+ if (test__start_subtest("bpf_check_mtu XDP-run"))
+ test_check_mtu_xdp(mtu_lo, 0);
+
+ if (test__start_subtest("bpf_check_mtu XDP-run ifindex-lookup"))
+ test_check_mtu_xdp(mtu_lo, IFINDEX_LO);
+
+ if (test__start_subtest("bpf_check_mtu TC-run"))
+ test_check_mtu_tc(mtu_lo, 0);
+
+ if (test__start_subtest("bpf_check_mtu TC-run ifindex-lookup"))
+ test_check_mtu_tc(mtu_lo, IFINDEX_LO);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
index f259085cca6a..2a55f717fc07 100644
--- a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -7,15 +7,20 @@
#include <string.h>
#include <linux/pkt_cls.h>
+#include <netinet/tcp.h>
#include <test_progs.h>
#include "progs/test_cls_redirect.h"
#include "test_cls_redirect.skel.h"
+#include "test_cls_redirect_dynptr.skel.h"
+#include "test_cls_redirect_subprogs.skel.h"
#define ENCAP_IP INADDR_LOOPBACK
#define ENCAP_PORT (1234)
+static int duration = 0;
+
struct addr_port {
in_port_t port;
union {
@@ -157,7 +162,7 @@ static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
}
}
-static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+static bool was_decapsulated(struct bpf_test_run_opts *tattr)
{
return tattr->data_size_out < tattr->data_size_in;
}
@@ -361,30 +366,18 @@ static void close_fds(int *fds, int n)
close(fds[i]);
}
-void test_cls_redirect(void)
+static void test_cls_redirect_common(struct bpf_program *prog)
{
- struct test_cls_redirect *skel = NULL;
- struct bpf_prog_test_run_attr tattr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
int families[] = { AF_INET, AF_INET6 };
struct sockaddr_storage ss;
struct sockaddr *addr;
socklen_t slen;
- int i, j, err;
-
+ int i, j, err, prog_fd;
int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
- skel = test_cls_redirect__open();
- if (CHECK_FAIL(!skel))
- return;
-
- skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
- skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
-
- if (CHECK_FAIL(test_cls_redirect__load(skel)))
- goto cleanup;
-
addr = (struct sockaddr *)&ss;
for (i = 0; i < ARRAY_SIZE(families); i++) {
slen = prepare_addr(&ss, families[i]);
@@ -402,7 +395,7 @@ void test_cls_redirect(void)
goto cleanup;
}
- tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+ prog_fd = bpf_program__fd(prog);
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct test_cfg *test = &tests[i];
@@ -423,7 +416,7 @@ void test_cls_redirect(void)
if (CHECK_FAIL(!tattr.data_size_in))
continue;
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
if (CHECK_FAIL(err))
continue;
@@ -450,7 +443,82 @@ void test_cls_redirect(void)
}
cleanup:
- test_cls_redirect__destroy(skel);
close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
}
+
+static void test_cls_redirect_dynptr(void)
+{
+ struct test_cls_redirect_dynptr *skel;
+ int err;
+
+ skel = test_cls_redirect_dynptr__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_dynptr__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_dynptr__destroy(skel);
+}
+
+static void test_cls_redirect_inlined(void)
+{
+ struct test_cls_redirect *skel;
+ int err;
+
+ skel = test_cls_redirect__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect__destroy(skel);
+}
+
+static void test_cls_redirect_subprogs(void)
+{
+ struct test_cls_redirect_subprogs *skel;
+ int err;
+
+ skel = test_cls_redirect_subprogs__open();
+ if (CHECK(!skel, "skel_open", "failed\n"))
+ return;
+
+ skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+ skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+ err = test_cls_redirect_subprogs__load(skel);
+ if (CHECK(err, "skel_load", "failed: %d\n", err))
+ goto cleanup;
+
+ test_cls_redirect_common(skel->progs.cls_redirect);
+
+cleanup:
+ test_cls_redirect_subprogs__destroy(skel);
+}
+
+void test_cls_redirect(void)
+{
+ if (test__start_subtest("cls_redirect_inlined"))
+ test_cls_redirect_inlined();
+ if (test__start_subtest("cls_redirect_subprogs"))
+ test_cls_redirect_subprogs();
+ if (test__start_subtest("cls_redirect_dynptr"))
+ test_cls_redirect_dynptr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 17bbf76812ca..24d553109f8d 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -51,24 +51,25 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
bool v4 = family == AF_INET;
__u16 expected_local_port = v4 ? 22222 : 22223;
__u16 expected_peer_port = 60000;
- struct bpf_prog_load_attr attr = {
- .file = v4 ? "./connect_force_port4.o" :
- "./connect_force_port6.o",
- };
struct bpf_program *prog;
struct bpf_object *obj;
- int xlate_fd, fd, err;
+ const char *obj_file = v4 ? "connect_force_port4.bpf.o" : "connect_force_port6.bpf.o";
+ int fd, err;
__u32 duration = 0;
- err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "bpf_obj_open"))
return -1;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "bpf_obj_load")) {
+ err = -EIO;
+ goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/connect4" :
- "cgroup/connect6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "connect4" :
+ "connect6");
if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -82,9 +83,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getpeername4" :
- "cgroup/getpeername6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getpeername4" :
+ "getpeername6");
if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -98,9 +99,9 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- prog = bpf_object__find_program_by_title(obj, v4 ?
- "cgroup/getsockname4" :
- "cgroup/getsockname6");
+ prog = bpf_object__find_program_by_name(obj, v4 ?
+ "getsockname4" :
+ "getsockname6");
if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
err = -EIO;
goto close_bpf_object;
@@ -114,7 +115,7 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
goto close_bpf_object;
}
- fd = connect_to_fd(family, type, server_fd);
+ fd = connect_to_fd(server_fd, 0);
if (fd < 0) {
err = -1;
goto close_bpf_object;
@@ -137,25 +138,25 @@ void test_connect_force_port(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
+ server_fd = start_server(AF_INET, SOCK_DGRAM, NULL, 60123, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
close(server_fd);
- server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 60124, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_ping.c b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
new file mode 100644
index 000000000000..40fe571f2fe7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_ping.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#define _GNU_SOURCE
+#include <sys/mount.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "connect_ping.skel.h"
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+static const struct in6_addr bindaddr_v6 = BINDADDR_V6;
+
+static void subtest(int cgroup_fd, struct connect_ping *skel,
+ int family, int do_bind)
+{
+ struct sockaddr_in sa4 = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = htonl(INADDR_LOOPBACK),
+ };
+ struct sockaddr_in6 sa6 = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+ };
+ struct sockaddr *sa = NULL;
+ socklen_t sa_len;
+ int protocol = -1;
+ int sock_fd;
+
+ switch (family) {
+ case AF_INET:
+ sa = (struct sockaddr *)&sa4;
+ sa_len = sizeof(sa4);
+ protocol = IPPROTO_ICMP;
+ break;
+ case AF_INET6:
+ sa = (struct sockaddr *)&sa6;
+ sa_len = sizeof(sa6);
+ protocol = IPPROTO_ICMPV6;
+ break;
+ }
+
+ memset(skel->bss, 0, sizeof(*skel->bss));
+ skel->bss->do_bind = do_bind;
+
+ sock_fd = socket(family, SOCK_DGRAM, protocol);
+ if (!ASSERT_GE(sock_fd, 0, "sock-create"))
+ return;
+
+ if (!ASSERT_OK(connect(sock_fd, sa, sa_len), "connect"))
+ goto close_sock;
+
+ if (!ASSERT_EQ(skel->bss->invocations_v4, family == AF_INET ? 1 : 0,
+ "invocations_v4"))
+ goto close_sock;
+ if (!ASSERT_EQ(skel->bss->invocations_v6, family == AF_INET6 ? 1 : 0,
+ "invocations_v6"))
+ goto close_sock;
+ if (!ASSERT_EQ(skel->bss->has_error, 0, "has_error"))
+ goto close_sock;
+
+ if (!ASSERT_OK(getsockname(sock_fd, sa, &sa_len),
+ "getsockname"))
+ goto close_sock;
+
+ switch (family) {
+ case AF_INET:
+ if (!ASSERT_EQ(sa4.sin_family, family, "sin_family"))
+ goto close_sock;
+ if (!ASSERT_EQ(sa4.sin_addr.s_addr,
+ htonl(do_bind ? 0x01010101 : INADDR_LOOPBACK),
+ "sin_addr"))
+ goto close_sock;
+ break;
+ case AF_INET6:
+ if (!ASSERT_EQ(sa6.sin6_family, AF_INET6, "sin6_family"))
+ goto close_sock;
+ if (!ASSERT_EQ(memcmp(&sa6.sin6_addr,
+ do_bind ? &bindaddr_v6 : &in6addr_loopback,
+ sizeof(sa6.sin6_addr)),
+ 0, "sin6_addr"))
+ goto close_sock;
+ break;
+ }
+
+close_sock:
+ close(sock_fd);
+}
+
+void test_connect_ping(void)
+{
+ struct connect_ping *skel;
+ int cgroup_fd;
+
+ if (!ASSERT_OK(unshare(CLONE_NEWNET | CLONE_NEWNS), "unshare"))
+ return;
+
+ /* overmount sysfs, and making original sysfs private so overmount
+ * does not propagate to other mntns.
+ */
+ if (!ASSERT_OK(mount("none", "/sys", NULL, MS_PRIVATE, NULL),
+ "remount-private-sys"))
+ return;
+ if (!ASSERT_OK(mount("sysfs", "/sys", "sysfs", 0, NULL),
+ "mount-sys"))
+ return;
+ if (!ASSERT_OK(mount("bpffs", "/sys/fs/bpf", "bpf", 0, NULL),
+ "mount-bpf"))
+ goto clean_mount;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "lo-up"))
+ goto clean_mount;
+ if (!ASSERT_OK(system("ip addr add 1.1.1.1 dev lo"), "lo-addr-v4"))
+ goto clean_mount;
+ if (!ASSERT_OK(system("ip -6 addr add 2001:db8::1 dev lo"), "lo-addr-v6"))
+ goto clean_mount;
+ if (write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0"))
+ goto clean_mount;
+
+ cgroup_fd = test__join_cgroup("/connect_ping");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-create"))
+ goto clean_mount;
+
+ skel = connect_ping__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel-load"))
+ goto close_cgroup;
+ skel->links.connect_v4_prog =
+ bpf_program__attach_cgroup(skel->progs.connect_v4_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v4_prog, "cg-attach-v4"))
+ goto skel_destroy;
+ skel->links.connect_v6_prog =
+ bpf_program__attach_cgroup(skel->progs.connect_v6_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_v6_prog, "cg-attach-v6"))
+ goto skel_destroy;
+
+ /* Connect a v4 ping socket to localhost, assert that only v4 is called,
+ * and called exactly once, and that the socket's bound address is
+ * original loopback address.
+ */
+ if (test__start_subtest("ipv4"))
+ subtest(cgroup_fd, skel, AF_INET, 0);
+
+ /* Connect a v4 ping socket to localhost, assert that only v4 is called,
+ * and called exactly once, and that the socket's bound address is
+ * address we explicitly bound.
+ */
+ if (test__start_subtest("ipv4-bind"))
+ subtest(cgroup_fd, skel, AF_INET, 1);
+
+ /* Connect a v6 ping socket to localhost, assert that only v6 is called,
+ * and called exactly once, and that the socket's bound address is
+ * original loopback address.
+ */
+ if (test__start_subtest("ipv6"))
+ subtest(cgroup_fd, skel, AF_INET6, 0);
+
+ /* Connect a v6 ping socket to localhost, assert that only v6 is called,
+ * and called exactly once, and that the socket's bound address is
+ * address we explicitly bound.
+ */
+ if (test__start_subtest("ipv6-bind"))
+ subtest(cgroup_fd, skel, AF_INET6, 1);
+
+skel_destroy:
+ connect_ping__destroy(skel);
+
+close_cgroup:
+ close(cgroup_fd);
+
+clean_mount:
+ umount2("/sys", MNT_DETACH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_autosize.c b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
new file mode 100644
index 000000000000..f2ce4fd1cdae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_autosize.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/* real layout and sizes according to test's (32-bit) BTF
+ * needs to be defined before skeleton is included */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+};
+
+#include "test_core_autosize.skel.h"
+
+static int duration = 0;
+
+static struct {
+ unsigned long long ptr_samesized;
+ unsigned long long val1_samesized;
+ unsigned long long val2_samesized;
+ unsigned long long val3_samesized;
+ unsigned long long val4_samesized;
+ struct test_struct___real output_samesized;
+
+ unsigned long long ptr_downsized;
+ unsigned long long val1_downsized;
+ unsigned long long val2_downsized;
+ unsigned long long val3_downsized;
+ unsigned long long val4_downsized;
+ struct test_struct___real output_downsized;
+
+ unsigned long long ptr_probed;
+ unsigned long long val1_probed;
+ unsigned long long val2_probed;
+ unsigned long long val3_probed;
+ unsigned long long val4_probed;
+
+ unsigned long long ptr_signed;
+ unsigned long long val1_signed;
+ unsigned long long val2_signed;
+ unsigned long long val3_signed;
+ unsigned long long val4_signed;
+ struct test_struct___real output_signed;
+} out;
+
+void test_core_autosize(void)
+{
+ char btf_file[] = "/tmp/core_autosize.btf.XXXXXX";
+ int err, fd = -1, zero = 0;
+ int char_id, short_id, int_id, long_long_id, void_ptr_id, id;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+ struct test_core_autosize* skel = NULL;
+ struct bpf_program *prog;
+ struct bpf_map *bss_map;
+ struct btf *btf = NULL;
+ size_t written;
+ const void *raw_data;
+ __u32 raw_sz;
+ FILE *f = NULL;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+ /* Emit the following struct with 32-bit pointer size:
+ *
+ * struct test_struct {
+ * void *ptr;
+ * unsigned long val2;
+ * unsigned long long val1;
+ * unsigned short val3;
+ * unsigned char val4;
+ * char: 8;
+ * };
+ *
+ * This struct is going to be used as the "kernel BTF" for this test.
+ * It's equivalent memory-layout-wise to test_struct__real above.
+ */
+
+ /* force 32-bit pointer size */
+ btf__set_pointer_size(btf, 4);
+
+ char_id = btf__add_int(btf, "unsigned char", 1, 0);
+ ASSERT_EQ(char_id, 1, "char_id");
+ short_id = btf__add_int(btf, "unsigned short", 2, 0);
+ ASSERT_EQ(short_id, 2, "short_id");
+ /* "long unsigned int" of 4 byte size tells BTF that sizeof(void *) == 4 */
+ int_id = btf__add_int(btf, "long unsigned int", 4, 0);
+ ASSERT_EQ(int_id, 3, "int_id");
+ long_long_id = btf__add_int(btf, "unsigned long long", 8, 0);
+ ASSERT_EQ(long_long_id, 4, "long_long_id");
+ void_ptr_id = btf__add_ptr(btf, 0);
+ ASSERT_EQ(void_ptr_id, 5, "void_ptr_id");
+
+ id = btf__add_struct(btf, "test_struct", 20 /* bytes */);
+ ASSERT_EQ(id, 6, "struct_id");
+ err = btf__add_field(btf, "ptr", void_ptr_id, 0, 0);
+ err = err ?: btf__add_field(btf, "val2", int_id, 32, 0);
+ err = err ?: btf__add_field(btf, "val1", long_long_id, 64, 0);
+ err = err ?: btf__add_field(btf, "val3", short_id, 128, 0);
+ err = err ?: btf__add_field(btf, "val4", char_id, 144, 0);
+ ASSERT_OK(err, "struct_fields");
+
+ fd = mkstemp(btf_file);
+ if (CHECK(fd < 0, "btf_tmp", "failed to create file: %d\n", fd))
+ goto cleanup;
+ f = fdopen(fd, "w");
+ if (!ASSERT_OK_PTR(f, "btf_fdopen"))
+ goto cleanup;
+
+ raw_data = btf__raw_data(btf, &raw_sz);
+ if (!ASSERT_OK_PTR(raw_data, "raw_data"))
+ goto cleanup;
+ written = fwrite(raw_data, 1, raw_sz, f);
+ if (CHECK(written != raw_sz, "btf_write", "written: %zu, errno: %d\n", written, errno))
+ goto cleanup;
+ fflush(f);
+ fclose(f);
+ f = NULL;
+ close(fd);
+ fd = -1;
+
+ /* open and load BPF program with custom BTF as the kernel BTF */
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* disable handle_signed() for now */
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_signed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ bpf_program__set_autoload(prog, false);
+
+ err = bpf_object__load(skel->obj);
+ if (!ASSERT_OK(err, "prog_load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_samesize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_samesize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_samesize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_downsize");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_downsize = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_downsize, "prog_attach"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, "handle_probed");
+ if (!ASSERT_OK_PTR(prog, "prog_find"))
+ goto cleanup;
+ skel->links.handle_probed = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(skel->links.handle_probed, "prog_attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ bss_map = bpf_object__find_map_by_name(skel->obj, ".bss");
+ if (!ASSERT_OK_PTR(bss_map, "bss_map_find"))
+ goto cleanup;
+
+ err = bpf_map__lookup_elem(bss_map, &zero, sizeof(zero), &out, sizeof(out), 0);
+ if (!ASSERT_OK(err, "bss_lookup"))
+ goto cleanup;
+
+ ASSERT_EQ(out.ptr_samesized, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.val1_samesized, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.val2_samesized, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.val3_samesized, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.val4_samesized, 0xb9, "val4_samesized");
+ ASSERT_EQ(out.output_samesized.ptr, 0x01020304, "ptr_samesized");
+ ASSERT_EQ(out.output_samesized.val1, 0x1020304050607080, "val1_samesized");
+ ASSERT_EQ(out.output_samesized.val2, 0x0a0b0c0d, "val2_samesized");
+ ASSERT_EQ(out.output_samesized.val3, 0xfeed, "val3_samesized");
+ ASSERT_EQ(out.output_samesized.val4, 0xb9, "val4_samesized");
+
+ ASSERT_EQ(out.ptr_downsized, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.val1_downsized, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.val2_downsized, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.val3_downsized, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.val4_downsized, 0xb9, "val4_downsized");
+ ASSERT_EQ(out.output_downsized.ptr, 0x01020304, "ptr_downsized");
+ ASSERT_EQ(out.output_downsized.val1, 0x1020304050607080, "val1_downsized");
+ ASSERT_EQ(out.output_downsized.val2, 0x0a0b0c0d, "val2_downsized");
+ ASSERT_EQ(out.output_downsized.val3, 0xfeed, "val3_downsized");
+ ASSERT_EQ(out.output_downsized.val4, 0xb9, "val4_downsized");
+
+ ASSERT_EQ(out.ptr_probed, 0x01020304, "ptr_probed");
+ ASSERT_EQ(out.val1_probed, 0x1020304050607080, "val1_probed");
+ ASSERT_EQ(out.val2_probed, 0x0a0b0c0d, "val2_probed");
+ ASSERT_EQ(out.val3_probed, 0xfeed, "val3_probed");
+ ASSERT_EQ(out.val4_probed, 0xb9, "val4_probed");
+
+ test_core_autosize__destroy(skel);
+ skel = NULL;
+
+ /* now re-load with handle_signed() enabled, it should fail loading */
+ open_opts.btf_custom_path = btf_file;
+ skel = test_core_autosize__open_opts(&open_opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ err = test_core_autosize__load(skel);
+ if (!ASSERT_ERR(err, "skel_load"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ if (fd >= 0)
+ close(fd);
+ remove(btf_file);
+ btf__free(btf);
+ test_core_autosize__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c
index b093787e9448..63a51e9f3630 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_extern.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c
@@ -39,6 +39,7 @@ static struct test_case {
"CONFIG_STR=\"abracad\"\n"
"CONFIG_MISSING=0",
.data = {
+ .unkn_virt_val = 0,
.bpf_syscall = false,
.tristate_val = TRI_MODULE,
.bool_val = true,
@@ -121,7 +122,7 @@ static struct test_case {
void test_core_extern(void)
{
const uint32_t kern_ver = get_kernel_version();
- int err, duration = 0, i, j;
+ int err, i, j;
struct test_core_extern *skel = NULL;
uint64_t *got, *exp;
int n = sizeof(*skel->data) / sizeof(uint64_t);
@@ -136,19 +137,17 @@ void test_core_extern(void)
continue;
skel = test_core_extern__open_opts(&opts);
- if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
goto cleanup;
err = test_core_extern__load(skel);
if (t->fails) {
- CHECK(!err, "skel_load",
- "shouldn't succeed open/load of skeleton\n");
+ ASSERT_ERR(err, "skel_load_should_fail");
goto cleanup;
- } else if (CHECK(err, "skel_load",
- "failed to open/load skeleton\n")) {
+ } else if (!ASSERT_OK(err, "skel_load")) {
goto cleanup;
}
err = test_core_extern__attach(skel);
- if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err))
+ if (!ASSERT_OK(err, "attach_raw_tp"))
goto cleanup;
usleep(1);
@@ -158,9 +157,7 @@ void test_core_extern(void)
got = (uint64_t *)skel->data;
exp = (uint64_t *)&t->data;
for (j = 0; j < n; j++) {
- CHECK(got[j] != exp[j], "check_res",
- "result #%d: expected %lx, but got %lx\n",
- j, exp[j], got[j]);
+ ASSERT_EQ(got[j], exp[j], "result");
}
cleanup:
test_core_extern__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern.c b/tools/testing/selftests/bpf/prog_tests/core_kern.c
new file mode 100644
index 000000000000..6a5a1c019a5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "core_kern.lskel.h"
+
+void test_core_kern_lskel(void)
+{
+ struct core_kern_lskel *skel;
+ int link_fd;
+
+ skel = core_kern_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ link_fd = core_kern_lskel__core_relo_proto__attach(skel);
+ if (!ASSERT_GT(link_fd, 0, "attach(core_relo_proto)"))
+ goto cleanup;
+
+ /* trigger tracepoints */
+ usleep(1);
+ ASSERT_TRUE(skel->bss->proto_out[0], "bpf_core_type_exists");
+ ASSERT_FALSE(skel->bss->proto_out[1], "!bpf_core_type_exists");
+ ASSERT_TRUE(skel->bss->proto_out[2], "bpf_core_type_exists. nested");
+
+cleanup:
+ core_kern_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
new file mode 100644
index 000000000000..04cc145bc26a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_kern_overflow.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "test_progs.h"
+#include "core_kern_overflow.lskel.h"
+
+void test_core_kern_overflow_lskel(void)
+{
+ struct core_kern_overflow_lskel *skel;
+
+ skel = core_kern_overflow_lskel__open_and_load();
+ if (!ASSERT_NULL(skel, "open_and_load"))
+ core_kern_overflow_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_read_macros.c b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c
new file mode 100644
index 000000000000..96f5cf3c6fa2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_read_macros.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+
+struct callback_head {
+ struct callback_head *next;
+ void (*func)(struct callback_head *head);
+};
+
+/* ___shuffled flavor is just an illusion for BPF code, it doesn't really
+ * exist and user-space needs to provide data in the memory layout that
+ * matches callback_head. We just defined ___shuffled flavor to make it easier
+ * to work with the skeleton
+ */
+struct callback_head___shuffled {
+ struct callback_head___shuffled *next;
+ void (*func)(struct callback_head *head);
+};
+
+#include "test_core_read_macros.skel.h"
+
+void test_core_read_macros(void)
+{
+ int duration = 0, err;
+ struct test_core_read_macros* skel;
+ struct test_core_read_macros__bss *bss;
+ struct callback_head u_probe_in;
+ struct callback_head___shuffled u_core_in;
+
+ skel = test_core_read_macros__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ /* next pointers have to be set from the kernel side */
+ bss->k_probe_in.func = (void *)(long)0x1234;
+ bss->k_core_in.func = (void *)(long)0xabcd;
+
+ u_probe_in.next = &u_probe_in;
+ u_probe_in.func = (void *)(long)0x5678;
+ bss->u_probe_in = &u_probe_in;
+
+ u_core_in.next = &u_core_in;
+ u_core_in.func = (void *)(long)0xdbca;
+ bss->u_core_in = &u_core_in;
+
+ err = test_core_read_macros__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_EQ(bss->k_probe_out, 0x1234, "k_probe_out");
+ ASSERT_EQ(bss->k_core_out, 0xabcd, "k_core_out");
+
+ ASSERT_EQ(bss->u_probe_out, 0x5678, "u_probe_out");
+ ASSERT_EQ(bss->u_core_out, 0xdbca, "u_core_out");
+
+cleanup:
+ test_core_read_macros__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 084ed26a7d78..47f42e680105 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -1,11 +1,40 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include "progs/core_reloc_types.h"
+#include "bpf_testmod/bpf_testmod.h"
+#include <linux/limits.h>
#include <sys/mman.h>
#include <sys/syscall.h>
+#include <bpf/btf.h>
+
+static int duration = 0;
#define STRUCT_TO_CHAR_PTR(struct_name) (const char *)&(struct struct_name)
+#define MODULES_CASE(name, pg_name, tp_name) { \
+ .case_name = name, \
+ .bpf_obj_file = "test_core_reloc_module.bpf.o", \
+ .btf_src_file = NULL, /* find in kernel module BTFs */ \
+ .input = "", \
+ .input_len = 0, \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_module_output) { \
+ .read_ctx_sz = sizeof(struct bpf_testmod_test_read_ctx),\
+ .read_ctx_exists = true, \
+ .buf_exists = true, \
+ .len_exists = true, \
+ .off_exists = true, \
+ .len = 123, \
+ .off = 0, \
+ .comm = "test_progs", \
+ .comm_len = sizeof("test_progs"), \
+ }, \
+ .output_len = sizeof(struct core_reloc_module_output), \
+ .prog_name = pg_name, \
+ .raw_tp_name = tp_name, \
+ .trigger = __trigger_module_test_read, \
+ .needs_testmod = true, \
+}
+
#define FLAVORS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
.a = 42, \
.b = 0xc001, \
@@ -14,8 +43,10 @@
#define FLAVORS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_flavors.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o" \
+ .bpf_obj_file = "test_core_reloc_flavors.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_flavors" \
#define FLAVORS_CASE(name) { \
FLAVORS_CASE_COMMON(name), \
@@ -37,8 +68,10 @@
#define NESTING_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_nesting.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_nesting.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_nesting" \
#define NESTING_CASE(name) { \
NESTING_CASE_COMMON(name), \
@@ -51,6 +84,7 @@
#define NESTING_ERR_CASE(name) { \
NESTING_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
}
#define ARRAYS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -62,8 +96,10 @@
#define ARRAYS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_arrays.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_arrays.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_arrays" \
#define ARRAYS_CASE(name) { \
ARRAYS_CASE_COMMON(name), \
@@ -94,8 +130,10 @@
#define PRIMITIVES_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_primitives.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_primitives.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_primitives" \
#define PRIMITIVES_CASE(name) { \
PRIMITIVES_CASE_COMMON(name), \
@@ -112,8 +150,8 @@
#define MODS_CASE(name) { \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_mods.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .bpf_obj_file = "test_core_reloc_mods.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) { \
.a = 1, \
.b = 2, \
@@ -130,12 +168,14 @@
.e = 5, .f = 6, .g = 7, .h = 8, \
}, \
.output_len = sizeof(struct core_reloc_mods_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_mods", \
}
#define PTR_AS_ARR_CASE(name) { \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_ptr_as_arr.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
+ .bpf_obj_file = "test_core_reloc_ptr_as_arr.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
.input = (const char *)&(struct core_reloc_##name []){ \
{ .a = 1 }, \
{ .a = 2 }, \
@@ -146,6 +186,8 @@
.a = 3, \
}, \
.output_len = sizeof(struct core_reloc_ptr_as_arr), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ptr_as_arr", \
}
#define INTS_DATA(struct_name) STRUCT_TO_CHAR_PTR(struct_name) { \
@@ -161,8 +203,10 @@
#define INTS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_ints.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .bpf_obj_file = "test_core_reloc_ints.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_ints"
#define INTS_CASE(name) { \
INTS_CASE_COMMON(name), \
@@ -177,68 +221,80 @@
.fails = true, \
}
-#define EXISTENCE_CASE_COMMON(name) \
+#define FIELD_EXISTS_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_existence.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
-
-#define EXISTENCE_ERR_CASE(name) { \
- EXISTENCE_CASE_COMMON(name), \
- .fails = true, \
-}
+ .bpf_obj_file = "test_core_reloc_existence.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_existence"
#define BITFIELDS_CASE_COMMON(objfile, test_name_prefix, name) \
.case_name = test_name_prefix#name, \
.bpf_obj_file = objfile, \
- .btf_src_file = "btf__core_reloc_" #name ".o"
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o"
#define BITFIELDS_CASE(name, ...) { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
- "direct:", name), \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o", \
+ "probed:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
- "probed:", name), \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o", \
+ "direct:", name), \
.input = STRUCT_TO_CHAR_PTR(core_reloc_##name) __VA_ARGS__, \
.input_len = sizeof(struct core_reloc_##name), \
.output = STRUCT_TO_CHAR_PTR(core_reloc_bitfields_output) \
__VA_ARGS__, \
.output_len = sizeof(struct core_reloc_bitfields_output), \
- .direct_raw_tp = true, \
+ .prog_name = "test_core_bitfields_direct", \
}
#define BITFIELDS_ERR_CASE(name) { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.o", \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_probed.bpf.o", \
"probed:", name), \
.fails = true, \
+ .run_btfgen_fails = true, \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_bitfields", \
}, { \
- BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.o", \
+ BITFIELDS_CASE_COMMON("test_core_reloc_bitfields_direct.bpf.o", \
"direct:", name), \
- .direct_raw_tp = true, \
.fails = true, \
+ .run_btfgen_fails = true, \
+ .prog_name = "test_core_bitfields_direct", \
}
#define SIZE_CASE_COMMON(name) \
.case_name = #name, \
- .bpf_obj_file = "test_core_reloc_size.o", \
- .btf_src_file = "btf__core_reloc_" #name ".o", \
- .relaxed_core_relocs = true
+ .bpf_obj_file = "test_core_reloc_size.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_size"
#define SIZE_OUTPUT_DATA(type) \
STRUCT_TO_CHAR_PTR(core_reloc_size_output) { \
.int_sz = sizeof(((type *)0)->int_field), \
+ .int_off = offsetof(type, int_field), \
.struct_sz = sizeof(((type *)0)->struct_field), \
+ .struct_off = offsetof(type, struct_field), \
.union_sz = sizeof(((type *)0)->union_field), \
+ .union_off = offsetof(type, union_field), \
.arr_sz = sizeof(((type *)0)->arr_field), \
- .arr_elem_sz = sizeof(((type *)0)->arr_field[0]), \
- .ptr_sz = sizeof(((type *)0)->ptr_field), \
- .enum_sz = sizeof(((type *)0)->enum_field), \
+ .arr_off = offsetof(type, arr_field), \
+ .arr_elem_sz = sizeof(((type *)0)->arr_field[1]), \
+ .arr_elem_off = offsetof(type, arr_field[1]), \
+ .ptr_sz = 8, /* always 8-byte pointer for BPF */ \
+ .ptr_off = offsetof(type, ptr_field), \
+ .enum_sz = sizeof(((type *)0)->enum_field), \
+ .enum_off = offsetof(type, enum_field), \
+ .float_sz = sizeof(((type *)0)->float_field), \
+ .float_off = offsetof(type, float_field), \
}
#define SIZE_CASE(name) { \
@@ -251,8 +307,90 @@
#define SIZE_ERR_CASE(name) { \
SIZE_CASE_COMMON(name), \
.fails = true, \
+ .run_btfgen_fails = true, \
+}
+
+#define TYPE_BASED_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_based.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_based"
+
+#define TYPE_BASED_CASE(name, ...) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_based_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_type_based_output), \
+}
+
+#define TYPE_BASED_ERR_CASE(name) { \
+ TYPE_BASED_CASE_COMMON(name), \
+ .fails = true, \
}
+#define TYPE_ID_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_type_id.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_type_id"
+
+#define TYPE_ID_CASE(name, setup_fn) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_type_id_output) {}, \
+ .output_len = sizeof(struct core_reloc_type_id_output), \
+ .setup = setup_fn, \
+}
+
+#define TYPE_ID_ERR_CASE(name) { \
+ TYPE_ID_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ENUMVAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enumval.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enumval"
+
+#define ENUMVAL_CASE(name, ...) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enumval_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enumval_output), \
+}
+
+#define ENUMVAL_ERR_CASE(name) { \
+ ENUMVAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+#define ENUM64VAL_CASE_COMMON(name) \
+ .case_name = #name, \
+ .bpf_obj_file = "test_core_reloc_enum64val.bpf.o", \
+ .btf_src_file = "btf__core_reloc_" #name ".bpf.o", \
+ .raw_tp_name = "sys_enter", \
+ .prog_name = "test_core_enum64val"
+
+#define ENUM64VAL_CASE(name, ...) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_enum64val_output) \
+ __VA_ARGS__, \
+ .output_len = sizeof(struct core_reloc_enum64val_output), \
+}
+
+#define ENUM64VAL_ERR_CASE(name) { \
+ ENUM64VAL_CASE_COMMON(name), \
+ .fails = true, \
+}
+
+struct core_reloc_test_case;
+
+typedef int (*setup_test_fn)(struct core_reloc_test_case *test);
+typedef int (*trigger_test_fn)(const struct core_reloc_test_case *test);
+
struct core_reloc_test_case {
const char *case_name;
const char *bpf_obj_file;
@@ -262,15 +400,154 @@ struct core_reloc_test_case {
const char *output;
int output_len;
bool fails;
+ bool run_btfgen_fails;
+ bool needs_testmod;
bool relaxed_core_relocs;
- bool direct_raw_tp;
+ const char *prog_name;
+ const char *raw_tp_name;
+ setup_test_fn setup;
+ trigger_test_fn trigger;
};
-static struct core_reloc_test_case test_cases[] = {
+static int find_btf_type(const struct btf *btf, const char *name, __u32 kind)
+{
+ int id;
+
+ id = btf__find_by_name_kind(btf, name, kind);
+ if (CHECK(id <= 0, "find_type_id", "failed to find '%s', kind %d: %d\n", name, kind, id))
+ return -1;
+
+ return id;
+}
+
+static int setup_type_id_case_local(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *local_btf = btf__parse(test->bpf_obj_file, NULL);
+ struct btf *targ_btf = btf__parse(test->btf_src_file, NULL);
+ const struct btf_type *t;
+ const char *name;
+ int i;
+
+ if (!ASSERT_OK_PTR(local_btf, "local_btf") || !ASSERT_OK_PTR(targ_btf, "targ_btf")) {
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return -EINVAL;
+ }
+
+ exp->local_anon_struct = -1;
+ exp->local_anon_union = -1;
+ exp->local_anon_enum = -1;
+ exp->local_anon_func_proto_ptr = -1;
+ exp->local_anon_void_ptr = -1;
+ exp->local_anon_arr = -1;
+
+ for (i = 1; i < btf__type_cnt(local_btf); i++)
+ {
+ t = btf__type_by_id(local_btf, i);
+ /* we are interested only in anonymous types */
+ if (t->name_off)
+ continue;
+
+ if (btf_is_struct(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_struct = i;
+ } else if (btf_is_union(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_members(t)[0].name_off)) &&
+ strcmp(name, "marker_field") == 0) {
+ exp->local_anon_union = i;
+ } else if (btf_is_enum(t) && btf_vlen(t) &&
+ (name = btf__name_by_offset(local_btf, btf_enum(t)[0].name_off)) &&
+ strcmp(name, "MARKER_ENUM_VAL") == 0) {
+ exp->local_anon_enum = i;
+ } else if (btf_is_ptr(t) && (t = btf__type_by_id(local_btf, t->type))) {
+ if (btf_is_func_proto(t) && (t = btf__type_by_id(local_btf, t->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* ptr -> func_proto -> _Bool */
+ exp->local_anon_func_proto_ptr = i;
+ } else if (btf_is_void(t)) {
+ /* ptr -> void */
+ exp->local_anon_void_ptr = i;
+ }
+ } else if (btf_is_array(t) && (t = btf__type_by_id(local_btf, btf_array(t)->type)) &&
+ btf_is_int(t) && (name = btf__name_by_offset(local_btf, t->name_off)) &&
+ strcmp(name, "_Bool") == 0) {
+ /* _Bool[] */
+ exp->local_anon_arr = i;
+ }
+ }
+
+ exp->local_struct = find_btf_type(local_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->local_union = find_btf_type(local_btf, "a_union", BTF_KIND_UNION);
+ exp->local_enum = find_btf_type(local_btf, "an_enum", BTF_KIND_ENUM);
+ exp->local_int = find_btf_type(local_btf, "int", BTF_KIND_INT);
+ exp->local_struct_typedef = find_btf_type(local_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->local_func_proto_typedef = find_btf_type(local_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->local_arr_typedef = find_btf_type(local_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(local_btf);
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_success(struct core_reloc_test_case *test) {
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ struct btf *targ_btf;
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ targ_btf = btf__parse(test->btf_src_file, NULL);
+
+ exp->targ_struct = find_btf_type(targ_btf, "a_struct", BTF_KIND_STRUCT);
+ exp->targ_union = find_btf_type(targ_btf, "a_union", BTF_KIND_UNION);
+ exp->targ_enum = find_btf_type(targ_btf, "an_enum", BTF_KIND_ENUM);
+ exp->targ_int = find_btf_type(targ_btf, "int", BTF_KIND_INT);
+ exp->targ_struct_typedef = find_btf_type(targ_btf, "named_struct_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_func_proto_typedef = find_btf_type(targ_btf, "func_proto_typedef", BTF_KIND_TYPEDEF);
+ exp->targ_arr_typedef = find_btf_type(targ_btf, "arr_typedef", BTF_KIND_TYPEDEF);
+
+ btf__free(targ_btf);
+ return 0;
+}
+
+static int setup_type_id_case_failure(struct core_reloc_test_case *test)
+{
+ struct core_reloc_type_id_output *exp = (void *)test->output;
+ int err;
+
+ err = setup_type_id_case_local(test);
+ if (err)
+ return err;
+
+ exp->targ_struct = 0;
+ exp->targ_union = 0;
+ exp->targ_enum = 0;
+ exp->targ_int = 0;
+ exp->targ_struct_typedef = 0;
+ exp->targ_func_proto_typedef = 0;
+ exp->targ_arr_typedef = 0;
+
+ return 0;
+}
+
+static int __trigger_module_test_read(const struct core_reloc_test_case *test)
+{
+ struct core_reloc_module_output *exp = (void *)test->output;
+
+ trigger_module_test_read(exp->len);
+ return 0;
+}
+
+static const struct core_reloc_test_case test_cases[] = {
/* validate we can find kernel image and use its BTF for relocs */
{
.case_name = "kernel",
- .bpf_obj_file = "test_core_reloc_kernel.o",
+ .bpf_obj_file = "test_core_reloc_kernel.bpf.o",
.btf_src_file = NULL, /* load from /lib/modules/$(uname -r) */
.input = "",
.input_len = 0,
@@ -278,10 +555,17 @@ static struct core_reloc_test_case test_cases[] = {
.valid = { 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, },
.comm = "test_progs",
.comm_len = sizeof("test_progs"),
+ .local_task_struct_matches = true,
},
.output_len = sizeof(struct core_reloc_kernel_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_kernel",
},
+ /* validate we can find kernel module BTF types for relocs/attach */
+ MODULES_CASE("module_probed", "test_core_module_probed", "bpf_testmod_test_read"),
+ MODULES_CASE("module_direct", "test_core_module_direct", NULL),
+
/* validate BPF program can use multiple flavors to match against
* single target BTF type
*/
@@ -315,8 +599,7 @@ static struct core_reloc_test_case test_cases[] = {
ARRAYS_ERR_CASE(arrays___err_too_small),
ARRAYS_ERR_CASE(arrays___err_too_shallow),
ARRAYS_ERR_CASE(arrays___err_non_array),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type1),
- ARRAYS_ERR_CASE(arrays___err_wrong_val_type2),
+ ARRAYS_ERR_CASE(arrays___err_wrong_val_type),
ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr),
/* enum/ptr/int handling scenarios */
@@ -346,8 +629,8 @@ static struct core_reloc_test_case test_cases[] = {
/* validate edge cases of capturing relocations */
{
.case_name = "misc",
- .bpf_obj_file = "test_core_reloc_misc.o",
- .btf_src_file = "btf__core_reloc_misc.o",
+ .bpf_obj_file = "test_core_reloc_misc.bpf.o",
+ .btf_src_file = "btf__core_reloc_misc.bpf.o",
.input = (const char *)&(struct core_reloc_misc_extensible[]){
{ .a = 1 },
{ .a = 2 }, /* not read */
@@ -360,11 +643,13 @@ static struct core_reloc_test_case test_cases[] = {
.c = 0, /* BUG in clang, should be 3 */
},
.output_len = sizeof(struct core_reloc_misc_output),
+ .raw_tp_name = "sys_enter",
+ .prog_name = "test_core_misc",
},
/* validate field existence checks */
{
- EXISTENCE_CASE_COMMON(existence),
+ FIELD_EXISTS_CASE_COMMON(existence),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence) {
.a = 1,
.b = 2,
@@ -388,7 +673,7 @@ static struct core_reloc_test_case test_cases[] = {
.output_len = sizeof(struct core_reloc_existence_output),
},
{
- EXISTENCE_CASE_COMMON(existence___minimal),
+ FIELD_EXISTS_CASE_COMMON(existence___minimal),
.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
.a = 42,
},
@@ -407,13 +692,25 @@ static struct core_reloc_test_case test_cases[] = {
},
.output_len = sizeof(struct core_reloc_existence_output),
},
-
- EXISTENCE_ERR_CASE(existence__err_int_sz),
- EXISTENCE_ERR_CASE(existence__err_int_type),
- EXISTENCE_ERR_CASE(existence__err_int_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_kind),
- EXISTENCE_ERR_CASE(existence__err_arr_value_type),
- EXISTENCE_ERR_CASE(existence__err_struct_type),
+ {
+ FIELD_EXISTS_CASE_COMMON(existence___wrong_field_defs),
+ .input = STRUCT_TO_CHAR_PTR(core_reloc_existence___wrong_field_defs) {
+ },
+ .input_len = sizeof(struct core_reloc_existence___wrong_field_defs),
+ .output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
+ .a_exists = 0,
+ .b_exists = 0,
+ .c_exists = 0,
+ .arr_exists = 0,
+ .s_exists = 0,
+ .a_value = 0xff000001u,
+ .b_value = 0xff000002u,
+ .c_value = 0xff000003u,
+ .arr_value = 0xff000004u,
+ .s_value = 0xff000005u,
+ },
+ .output_len = sizeof(struct core_reloc_existence_output),
+ },
/* bitfield relocation checks */
BITFIELDS_CASE(bitfields, {
@@ -432,31 +729,248 @@ static struct core_reloc_test_case test_cases[] = {
.sb4 = -1,
.sb20 = -0x17654321,
.u32 = 0xBEEF,
- .s32 = -0x3FEDCBA987654321,
+ .s32 = -0x3FEDCBA987654321LL,
}),
BITFIELDS_CASE(bitfields___bitfield_vs_int, {
- .ub1 = 0xFEDCBA9876543210,
+ .ub1 = 0xFEDCBA9876543210LL,
.ub2 = 0xA6,
- .ub7 = -0x7EDCBA987654321,
- .sb4 = -0x6123456789ABCDE,
- .sb20 = 0xD00D,
+ .ub7 = -0x7EDCBA987654321LL,
+ .sb4 = -0x6123456789ABCDELL,
+ .sb20 = 0xD00DLL,
.u32 = -0x76543,
- .s32 = 0x0ADEADBEEFBADB0B,
+ .s32 = 0x0ADEADBEEFBADB0BLL,
}),
BITFIELDS_CASE(bitfields___just_big_enough, {
- .ub1 = 0xF,
- .ub2 = 0x0812345678FEDCBA,
+ .ub1 = 0xFLL,
+ .ub2 = 0x0812345678FEDCBALL,
}),
BITFIELDS_ERR_CASE(bitfields___err_too_big_bitfield),
- /* size relocation checks */
+ /* field size and offset relocation checks */
SIZE_CASE(size),
SIZE_CASE(size___diff_sz),
+ SIZE_CASE(size___diff_offs),
+ SIZE_ERR_CASE(size___err_ambiguous),
+
+ /* validate type existence, match, and size relocations */
+ TYPE_BASED_CASE(type_based, {
+ .struct_exists = 1,
+ .complex_struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_restrict_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 1,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_restrict_ptr_matches = 1,
+ .typedef_func_proto_matches = 1,
+ .typedef_arr_matches = 1,
+
+ .struct_sz = sizeof(struct a_struct),
+ .union_sz = sizeof(union a_union),
+ .enum_sz = sizeof(enum an_enum),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef),
+ .typedef_int_sz = sizeof(int_typedef),
+ .typedef_enum_sz = sizeof(enum_typedef),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef),
+ .typedef_arr_sz = sizeof(arr_typedef),
+ }),
+ TYPE_BASED_CASE(type_based___all_missing, {
+ /* all zeros */
+ }),
+ TYPE_BASED_CASE(type_based___diff, {
+ .struct_exists = 1,
+ .complex_struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+
+ .struct_matches = 1,
+ .complex_struct_matches = 1,
+ .union_matches = 1,
+ .enum_matches = 1,
+ .typedef_named_struct_matches = 1,
+ .typedef_anon_struct_matches = 1,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 1,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
+ .struct_sz = sizeof(struct a_struct___diff),
+ .union_sz = sizeof(union a_union___diff),
+ .enum_sz = sizeof(enum an_enum___diff),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff),
+ .typedef_int_sz = sizeof(int_typedef___diff),
+ .typedef_enum_sz = sizeof(enum_typedef___diff),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff),
+ .typedef_arr_sz = sizeof(arr_typedef___diff),
+ }),
+ TYPE_BASED_CASE(type_based___diff_sz, {
+ .struct_exists = 1,
+ .union_exists = 1,
+ .enum_exists = 1,
+ .typedef_named_struct_exists = 1,
+ .typedef_anon_struct_exists = 1,
+ .typedef_struct_ptr_exists = 1,
+ .typedef_int_exists = 1,
+ .typedef_enum_exists = 1,
+ .typedef_void_ptr_exists = 1,
+ .typedef_func_proto_exists = 1,
+ .typedef_arr_exists = 1,
+
+ .struct_matches = 0,
+ .union_matches = 0,
+ .enum_matches = 0,
+ .typedef_named_struct_matches = 0,
+ .typedef_anon_struct_matches = 0,
+ .typedef_struct_ptr_matches = 1,
+ .typedef_int_matches = 0,
+ .typedef_enum_matches = 0,
+ .typedef_void_ptr_matches = 1,
+ .typedef_func_proto_matches = 0,
+ .typedef_arr_matches = 0,
+
+ .struct_sz = sizeof(struct a_struct___diff_sz),
+ .union_sz = sizeof(union a_union___diff_sz),
+ .enum_sz = sizeof(enum an_enum___diff_sz),
+ .typedef_named_struct_sz = sizeof(named_struct_typedef___diff_sz),
+ .typedef_anon_struct_sz = sizeof(anon_struct_typedef___diff_sz),
+ .typedef_struct_ptr_sz = sizeof(struct_ptr_typedef___diff_sz),
+ .typedef_int_sz = sizeof(int_typedef___diff_sz),
+ .typedef_enum_sz = sizeof(enum_typedef___diff_sz),
+ .typedef_void_ptr_sz = sizeof(void_ptr_typedef___diff_sz),
+ .typedef_func_proto_sz = sizeof(func_proto_typedef___diff_sz),
+ .typedef_arr_sz = sizeof(arr_typedef___diff_sz),
+ }),
+ TYPE_BASED_CASE(type_based___incompat, {
+ .enum_exists = 1,
+ .enum_matches = 1,
+ .enum_sz = sizeof(enum an_enum),
+ }),
+ TYPE_BASED_CASE(type_based___fn_wrong_args, {
+ .struct_exists = 1,
+ .struct_matches = 1,
+ .struct_sz = sizeof(struct a_struct),
+ }),
+
+ /* BTF_TYPE_ID_LOCAL/BTF_TYPE_ID_TARGET tests */
+ TYPE_ID_CASE(type_id, setup_type_id_case_success),
+ TYPE_ID_CASE(type_id___missing_targets, setup_type_id_case_failure),
+
+ /* Enumerator value existence and value relocations */
+ ENUMVAL_CASE(enumval, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 1,
+ .named_val2 = 2,
+ .anon_val1 = 0x10,
+ .anon_val2 = 0x20,
+ }),
+ ENUMVAL_CASE(enumval___diff, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = true,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = true,
+ .named_val1 = 101,
+ .named_val2 = 202,
+ .anon_val1 = 0x11,
+ .anon_val2 = 0x22,
+ }),
+ ENUMVAL_CASE(enumval___val3_missing, {
+ .named_val1_exists = true,
+ .named_val2_exists = true,
+ .named_val3_exists = false,
+ .anon_val1_exists = true,
+ .anon_val2_exists = true,
+ .anon_val3_exists = false,
+ .named_val1 = 111,
+ .named_val2 = 222,
+ .anon_val1 = 0x111,
+ .anon_val2 = 0x222,
+ }),
+ ENUMVAL_ERR_CASE(enumval___err_missing),
+
+ /* 64bit enumerator value existence and value relocations */
+ ENUM64VAL_CASE(enum64val, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x1ffffffffULL,
+ .unsigned_val2 = 0x2,
+ .signed_val1 = 0x1ffffffffLL,
+ .signed_val2 = -2,
+ }),
+ ENUM64VAL_CASE(enum64val___diff, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = true,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = true,
+ .unsigned_val1 = 0x101ffffffffULL,
+ .unsigned_val2 = 0x202ffffffffULL,
+ .signed_val1 = -101,
+ .signed_val2 = -202,
+ }),
+ ENUM64VAL_CASE(enum64val___val3_missing, {
+ .unsigned_val1_exists = true,
+ .unsigned_val2_exists = true,
+ .unsigned_val3_exists = false,
+ .signed_val1_exists = true,
+ .signed_val2_exists = true,
+ .signed_val3_exists = false,
+ .unsigned_val1 = 0x111ffffffffULL,
+ .unsigned_val2 = 0x222,
+ .signed_val1 = 0x111ffffffffLL,
+ .signed_val2 = -222,
+ }),
+ ENUM64VAL_ERR_CASE(enum64val___err_missing),
};
struct data {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
};
@@ -466,13 +980,27 @@ static size_t roundup_page(size_t sz)
return (sz + page_size - 1) / page_size * page_size;
}
-void test_core_reloc(void)
+static int run_btfgen(const char *src_btf, const char *dst_btf, const char *objpath)
+{
+ char command[4096];
+ int n;
+
+ n = snprintf(command, sizeof(command),
+ "./bpftool gen min_core_btf %s %s %s",
+ src_btf, dst_btf, objpath);
+ if (n < 0 || n >= sizeof(command))
+ return -1;
+
+ return system(command);
+}
+
+static void run_core_reloc_tests(bool use_btfgen)
{
const size_t mmap_sz = roundup_page(sizeof(struct data));
- struct bpf_object_load_attr load_attr = {};
- struct core_reloc_test_case *test_case;
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts);
+ struct core_reloc_test_case *test_case, test_case_copy;
const char *tp_name, *probe_name;
- int err, duration = 0, i, equal;
+ int err, i, equal, fd;
struct bpf_link *link = NULL;
struct bpf_map *data_map;
struct bpf_program *prog;
@@ -484,49 +1012,71 @@ void test_core_reloc(void)
my_pid_tgid = getpid() | ((uint64_t)syscall(SYS_gettid) << 32);
for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
- test_case = &test_cases[i];
+ char btf_file[] = "/tmp/core_reloc.btf.XXXXXX";
+
+ test_case_copy = test_cases[i];
+ test_case = &test_case_copy;
+
if (!test__start_subtest(test_case->case_name))
continue;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .relaxed_core_relocs = test_case->relaxed_core_relocs,
- );
-
- obj = bpf_object__open_file(test_case->bpf_obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n",
- test_case->bpf_obj_file, PTR_ERR(obj)))
+ if (test_case->needs_testmod && !env.has_testmod) {
+ test__skip();
continue;
+ }
- /* for typed raw tracepoints, NULL should be specified */
- if (test_case->direct_raw_tp) {
- probe_name = "tp_btf/sys_enter";
- tp_name = NULL;
- } else {
- probe_name = "raw_tracepoint/sys_enter";
- tp_name = "sys_enter";
+ /* generate a "minimal" BTF file and use it as source */
+ if (use_btfgen) {
+
+ if (!test_case->btf_src_file || test_case->run_btfgen_fails) {
+ test__skip();
+ continue;
+ }
+
+ fd = mkstemp(btf_file);
+ if (!ASSERT_GE(fd, 0, "btf_tmp"))
+ continue;
+ close(fd); /* we only need the path */
+ err = run_btfgen(test_case->btf_src_file, btf_file,
+ test_case->bpf_obj_file);
+ if (!ASSERT_OK(err, "run_btfgen"))
+ continue;
+
+ test_case->btf_src_file = btf_file;
+ }
+
+ if (test_case->setup) {
+ err = test_case->setup(test_case);
+ if (CHECK(err, "test_setup", "test #%d setup failed: %d\n", i, err))
+ continue;
+ }
+
+ if (test_case->btf_src_file) {
+ err = access(test_case->btf_src_file, R_OK);
+ if (!ASSERT_OK(err, "btf_src_file"))
+ continue;
}
- prog = bpf_object__find_program_by_title(obj, probe_name);
+ open_opts.btf_custom_path = test_case->btf_src_file;
+ obj = bpf_object__open_file(test_case->bpf_obj_file, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
+ goto cleanup;
+
+ probe_name = test_case->prog_name;
+ tp_name = test_case->raw_tp_name; /* NULL for tp_btf */
+ prog = bpf_object__find_program_by_name(obj, probe_name);
if (CHECK(!prog, "find_probe",
"prog '%s' not found\n", probe_name))
goto cleanup;
- load_attr.obj = obj;
- load_attr.log_level = 0;
- load_attr.target_btf_path = test_case->btf_src_file;
- err = bpf_object__load_xattr(&load_attr);
- if (test_case->fails) {
- CHECK(!err, "obj_load_fail",
- "should fail to load prog '%s'\n", probe_name);
+ err = bpf_object__load(obj);
+ if (err) {
+ if (!test_case->fails)
+ ASSERT_OK(err, "obj_load");
goto cleanup;
- } else {
- if (CHECK(err, "obj_load",
- "failed to load prog '%s': %d\n",
- probe_name, err))
- goto cleanup;
}
- data_map = bpf_object__find_map_by_name(obj, "test_cor.bss");
+ data_map = bpf_object__find_map_by_name(obj, ".bss");
if (CHECK(!data_map, "find_data_map", "data map not found\n"))
goto cleanup;
@@ -540,16 +1090,29 @@ void test_core_reloc(void)
data = mmap_data;
memset(mmap_data, 0, sizeof(*data));
- memcpy(data->in, test_case->input, test_case->input_len);
+ if (test_case->input_len)
+ memcpy(data->in, test_case->input, test_case->input_len);
data->my_pid_tgid = my_pid_tgid;
link = bpf_program__attach_raw_tracepoint(prog, tp_name);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
/* trigger test run */
- usleep(1);
+ if (test_case->trigger) {
+ if (!ASSERT_OK(test_case->trigger(test_case), "test_trigger"))
+ goto cleanup;
+ } else {
+ usleep(1);
+ }
+
+ if (data->skip) {
+ test__skip();
+ goto cleanup;
+ }
+
+ if (!ASSERT_FALSE(test_case->fails, "obj_load_should_fail"))
+ goto cleanup;
equal = memcmp(data->out, test_case->output,
test_case->output_len) == 0;
@@ -573,10 +1136,20 @@ cleanup:
CHECK_FAIL(munmap(mmap_data, mmap_sz));
mmap_data = NULL;
}
- if (!IS_ERR_OR_NULL(link)) {
- bpf_link__destroy(link);
- link = NULL;
- }
+ if (use_btfgen)
+ remove(test_case->btf_src_file);
+ bpf_link__destroy(link);
+ link = NULL;
bpf_object__close(obj);
}
}
+
+void test_core_reloc(void)
+{
+ run_core_reloc_tests(false);
+}
+
+void test_core_reloc_btfgen(void)
+{
+ run_core_reloc_tests(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/core_retro.c b/tools/testing/selftests/bpf/prog_tests/core_retro.c
new file mode 100644
index 000000000000..4a2c256c8db6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/core_retro.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_core_retro.skel.h"
+
+void test_core_retro(void)
+{
+ int err, zero = 0, res, my_pid = getpid();
+ struct test_core_retro *skel;
+
+ /* load program */
+ skel = test_core_retro__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto out_close;
+
+ err = bpf_map__update_elem(skel->maps.exp_tgid_map, &zero, sizeof(zero),
+ &my_pid, sizeof(my_pid), 0);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out_close;
+
+ /* attach probe */
+ err = test_core_retro__attach(skel);
+ if (!ASSERT_OK(err, "attach_kprobe"))
+ goto out_close;
+
+ /* trigger */
+ usleep(1);
+
+ err = bpf_map__lookup_elem(skel->maps.results, &zero, sizeof(zero), &res, sizeof(res), 0);
+ if (!ASSERT_OK(err, "map_lookup"))
+ goto out_close;
+
+ ASSERT_EQ(res, my_pid, "pid_check");
+
+out_close:
+ test_core_retro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c
new file mode 100644
index 000000000000..ecf89df78109
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "cpumask_failure.skel.h"
+#include "cpumask_success.skel.h"
+
+static const char * const cpumask_success_testcases[] = {
+ "test_alloc_free_cpumask",
+ "test_set_clear_cpu",
+ "test_setall_clear_cpu",
+ "test_first_firstzero_cpu",
+ "test_firstand_nocpu",
+ "test_test_and_set_clear",
+ "test_and_or_xor",
+ "test_intersects_subset",
+ "test_copy_any_anyand",
+ "test_insert_leave",
+ "test_insert_remove_release",
+ "test_global_mask_rcu",
+ "test_cpumask_weight",
+};
+
+static void verify_success(const char *prog_name)
+{
+ struct cpumask_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+ pid_t child_pid;
+ int status, err;
+
+ skel = cpumask_success__open();
+ if (!ASSERT_OK_PTR(skel, "cpumask_success__open"))
+ return;
+
+ skel->bss->pid = getpid();
+ skel->bss->nr_cpus = libbpf_num_possible_cpus();
+
+ err = cpumask_success__load(skel);
+ if (!ASSERT_OK(err, "cpumask_success__load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ cpumask_success__destroy(skel);
+}
+
+void test_cpumask(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(cpumask_success_testcases); i++) {
+ if (!test__start_subtest(cpumask_success_testcases[i]))
+ continue;
+
+ verify_success(cpumask_success_testcases[i]);
+ }
+
+ RUN_TESTS(cpumask_success);
+ RUN_TESTS(cpumask_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
new file mode 100644
index 000000000000..3b7c57fe55a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c
@@ -0,0 +1,873 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <regex.h>
+#include <test_progs.h>
+
+#include "bpf/btf.h"
+#include "bpf_util.h"
+#include "linux/filter.h"
+#include "disasm.h"
+
+#define MAX_PROG_TEXT_SZ (32 * 1024)
+
+/* The code in this file serves the sole purpose of executing test cases
+ * specified in the test_cases array. Each test case specifies a program
+ * type, context field offset, and disassembly patterns that correspond
+ * to read and write instructions generated by
+ * verifier.c:convert_ctx_access() for accessing that field.
+ *
+ * For each test case, up to three programs are created:
+ * - One that uses BPF_LDX_MEM to read the context field.
+ * - One that uses BPF_STX_MEM to write to the context field.
+ * - One that uses BPF_ST_MEM to write to the context field.
+ *
+ * The disassembly of each program is then compared with the pattern
+ * specified in the test case.
+ */
+struct test_case {
+ char *name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ int field_offset;
+ int field_sz;
+ /* Program generated for BPF_ST_MEM uses value 42 by default,
+ * this field allows to specify custom value.
+ */
+ struct {
+ bool use;
+ int value;
+ } st_value;
+ /* Pattern for BPF_LDX_MEM(field_sz, dst, ctx, field_offset) */
+ char *read;
+ /* Pattern for BPF_STX_MEM(field_sz, ctx, src, field_offset) and
+ * BPF_ST_MEM (field_sz, ctx, src, field_offset)
+ */
+ char *write;
+ /* Pattern for BPF_ST_MEM(field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_st;
+ /* Pattern for BPF_STX_MEM (field_sz, ctx, src, field_offset),
+ * takes priority over `write`.
+ */
+ char *write_stx;
+};
+
+#define N(_prog_type, type, field, name_extra...) \
+ .name = #_prog_type "." #field name_extra, \
+ .prog_type = BPF_PROG_TYPE_##_prog_type, \
+ .field_offset = offsetof(type, field), \
+ .field_sz = sizeof(typeof(((type *)NULL)->field))
+
+static struct test_case test_cases[] = {
+/* Sign extension on s390 changes the pattern */
+#if defined(__x86_64__) || defined(__aarch64__)
+ {
+ N(SCHED_CLS, struct __sk_buff, tstamp),
+ .read = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "w11 &= 3;"
+ "if w11 != 0x3 goto pc+2;"
+ "$dst = 0;"
+ "goto pc+1;"
+ "$dst = *(u64 *)($ctx + sk_buff::tstamp);",
+ .write = "r11 = *(u8 *)($ctx + sk_buff::__mono_tc_offset);"
+ "if w11 & 0x2 goto pc+1;"
+ "goto pc+2;"
+ "w11 &= -2;"
+ "*(u8 *)($ctx + sk_buff::__mono_tc_offset) = r11;"
+ "*(u64 *)($ctx + sk_buff::tstamp) = $src;",
+ },
+#endif
+ {
+ N(SCHED_CLS, struct __sk_buff, priority),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::priority);",
+ .write = "*(u32 *)($ctx + sk_buff::priority) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, mark),
+ .read = "$dst = *(u32 *)($ctx + sk_buff::mark);",
+ .write = "*(u32 *)($ctx + sk_buff::mark) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, cb[0]),
+ .read = "$dst = *(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data));",
+ .write = "*(u32 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::data)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_classid),
+ .read = "$dst = *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid));",
+ .write = "*(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, tc_index),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::tc_index);",
+ .write = "*(u16 *)($ctx + sk_buff::tc_index) = $src;",
+ },
+ {
+ N(SCHED_CLS, struct __sk_buff, queue_mapping),
+ .read = "$dst = *(u16 *)($ctx + sk_buff::queue_mapping);",
+ .write_stx = "if $src >= 0xffff goto pc+1;"
+ "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ .write_st = "*(u16 *)($ctx + sk_buff::queue_mapping) = $src;",
+ },
+ {
+ /* This is a corner case in filter.c:bpf_convert_ctx_access() */
+ N(SCHED_CLS, struct __sk_buff, queue_mapping, ".ushrt_max"),
+ .st_value = { true, USHRT_MAX },
+ .write_st = "goto pc+0;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, bound_dev_if),
+ .read = "$dst = *(u32 *)($ctx + sock_common::skc_bound_dev_if);",
+ .write = "*(u32 *)($ctx + sock_common::skc_bound_dev_if) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, mark),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_mark);",
+ .write = "*(u32 *)($ctx + sock::sk_mark) = $src;",
+ },
+ {
+ N(CGROUP_SOCK, struct bpf_sock, priority),
+ .read = "$dst = *(u32 *)($ctx + sock::sk_priority);",
+ .write = "*(u32 *)($ctx + sock::sk_priority) = $src;",
+ },
+ {
+ N(SOCK_OPS, struct bpf_sock_ops, replylong[0]),
+ .read = "$dst = *(u32 *)($ctx + bpf_sock_ops_kern::replylong);",
+ .write = "*(u32 *)($ctx + bpf_sock_ops_kern::replylong) = $src;",
+ },
+ {
+ N(CGROUP_SYSCTL, struct bpf_sysctl, file_pos),
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +0);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +0) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#else
+ .read = "$dst = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "$dst = *(u32 *)($dst +4);",
+ .write = "*(u64 *)($ctx + bpf_sysctl_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::ppos);"
+ "*(u32 *)(r9 +4) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sysctl_kern::tmp_reg);",
+#endif
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, sk),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::sk);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, level),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::level);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::level) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optname),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optname);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optname) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optlen),
+ .read = "$dst = *(u32 *)($ctx + bpf_sockopt_kern::optlen);",
+ .write = "*(u32 *)($ctx + bpf_sockopt_kern::optlen) = $src;",
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, retval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "$dst = *(u64 *)($dst + task_struct::bpf_ctx);"
+ "$dst = *(u32 *)($dst + bpf_cg_run_ctx::retval);",
+ .write = "*(u64 *)($ctx + bpf_sockopt_kern::tmp_reg) = r9;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::current_task);"
+ "r9 = *(u64 *)(r9 + task_struct::bpf_ctx);"
+ "*(u32 *)(r9 + bpf_cg_run_ctx::retval) = $src;"
+ "r9 = *(u64 *)($ctx + bpf_sockopt_kern::tmp_reg);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+ {
+ N(CGROUP_SOCKOPT, struct bpf_sockopt, optval_end),
+ .read = "$dst = *(u64 *)($ctx + bpf_sockopt_kern::optval_end);",
+ .expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ },
+};
+
+#undef N
+
+static regex_t *ident_regex;
+static regex_t *field_regex;
+
+static char *skip_space(char *str)
+{
+ while (*str && isspace(*str))
+ ++str;
+ return str;
+}
+
+static char *skip_space_and_semi(char *str)
+{
+ while (*str && (isspace(*str) || *str == ';'))
+ ++str;
+ return str;
+}
+
+static char *match_str(char *str, char *prefix)
+{
+ while (*str && *prefix && *str == *prefix) {
+ ++str;
+ ++prefix;
+ }
+ if (*prefix)
+ return NULL;
+ return str;
+}
+
+static char *match_number(char *str, int num)
+{
+ char *next;
+ int snum = strtol(str, &next, 10);
+
+ if (next - str == 0 || num != snum)
+ return NULL;
+
+ return next;
+}
+
+static int find_field_offset_aux(struct btf *btf, int btf_id, char *field_name, int off)
+{
+ const struct btf_type *type = btf__type_by_id(btf, btf_id);
+ const struct btf_member *m;
+ __u16 mnum;
+ int i;
+
+ if (!type) {
+ PRINT_FAIL("Can't find btf_type for id %d\n", btf_id);
+ return -1;
+ }
+
+ if (!btf_is_struct(type) && !btf_is_union(type)) {
+ PRINT_FAIL("BTF id %d is not struct or union\n", btf_id);
+ return -1;
+ }
+
+ m = btf_members(type);
+ mnum = btf_vlen(type);
+
+ for (i = 0; i < mnum; ++i, ++m) {
+ const char *mname = btf__name_by_offset(btf, m->name_off);
+
+ if (strcmp(mname, "") == 0) {
+ int msize = find_field_offset_aux(btf, m->type, field_name,
+ off + m->offset);
+ if (msize >= 0)
+ return msize;
+ }
+
+ if (strcmp(mname, field_name))
+ continue;
+
+ return (off + m->offset) / 8;
+ }
+
+ return -1;
+}
+
+static int find_field_offset(struct btf *btf, char *pattern, regmatch_t *matches)
+{
+ int type_sz = matches[1].rm_eo - matches[1].rm_so;
+ int field_sz = matches[2].rm_eo - matches[2].rm_so;
+ char *type = pattern + matches[1].rm_so;
+ char *field = pattern + matches[2].rm_so;
+ char field_str[128] = {};
+ char type_str[128] = {};
+ int btf_id, field_offset;
+
+ if (type_sz >= sizeof(type_str)) {
+ PRINT_FAIL("Malformed pattern: type ident is too long: %d\n", type_sz);
+ return -1;
+ }
+
+ if (field_sz >= sizeof(field_str)) {
+ PRINT_FAIL("Malformed pattern: field ident is too long: %d\n", field_sz);
+ return -1;
+ }
+
+ strncpy(type_str, type, type_sz);
+ strncpy(field_str, field, field_sz);
+ btf_id = btf__find_by_name(btf, type_str);
+ if (btf_id < 0) {
+ PRINT_FAIL("No BTF info for type %s\n", type_str);
+ return -1;
+ }
+
+ field_offset = find_field_offset_aux(btf, btf_id, field_str, 0);
+ if (field_offset < 0) {
+ PRINT_FAIL("No BTF info for field %s::%s\n", type_str, field_str);
+ return -1;
+ }
+
+ return field_offset;
+}
+
+static regex_t *compile_regex(char *pat)
+{
+ regex_t *re;
+ int err;
+
+ re = malloc(sizeof(regex_t));
+ if (!re) {
+ PRINT_FAIL("Can't alloc regex\n");
+ return NULL;
+ }
+
+ err = regcomp(re, pat, REG_EXTENDED);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ free(re);
+ return NULL;
+ }
+
+ return re;
+}
+
+static void free_regex(regex_t *re)
+{
+ if (!re)
+ return;
+
+ regfree(re);
+ free(re);
+}
+
+static u32 max_line_len(char *str)
+{
+ u32 max_line = 0;
+ char *next = str;
+
+ while (next) {
+ next = strchr(str, '\n');
+ if (next) {
+ max_line = max_t(u32, max_line, (next - str));
+ str = next + 1;
+ } else {
+ max_line = max_t(u32, max_line, strlen(str));
+ }
+ }
+
+ return min(max_line, 60u);
+}
+
+/* Print strings `pattern_origin` and `text_origin` side by side,
+ * assume `pattern_pos` and `text_pos` designate location within
+ * corresponding origin string where match diverges.
+ * The output should look like:
+ *
+ * Can't match disassembly(left) with pattern(right):
+ * r2 = *(u64 *)(r1 +0) ; $dst = *(u64 *)($ctx + bpf_sockopt_kern::sk1)
+ * ^ ^
+ * r0 = 0 ;
+ * exit ;
+ */
+static void print_match_error(FILE *out,
+ char *pattern_origin, char *text_origin,
+ char *pattern_pos, char *text_pos)
+{
+ char *pattern = pattern_origin;
+ char *text = text_origin;
+ int middle = max_line_len(text) + 2;
+
+ fprintf(out, "Can't match disassembly(left) with pattern(right):\n");
+ while (*pattern || *text) {
+ int column = 0;
+ int mark1 = -1;
+ int mark2 = -1;
+
+ /* Print one line from text */
+ while (*text && *text != '\n') {
+ if (text == text_pos)
+ mark1 = column;
+ fputc(*text, out);
+ ++text;
+ ++column;
+ }
+ if (text == text_pos)
+ mark1 = column;
+
+ /* Pad to the middle */
+ while (column < middle) {
+ fputc(' ', out);
+ ++column;
+ }
+ fputs("; ", out);
+ column += 3;
+
+ /* Print one line from pattern, pattern lines are terminated by ';' */
+ while (*pattern && *pattern != ';') {
+ if (pattern == pattern_pos)
+ mark2 = column;
+ fputc(*pattern, out);
+ ++pattern;
+ ++column;
+ }
+ if (pattern == pattern_pos)
+ mark2 = column;
+
+ fputc('\n', out);
+ if (*pattern)
+ ++pattern;
+ if (*text)
+ ++text;
+
+ /* If pattern and text diverge at this line, print an
+ * additional line with '^' marks, highlighting
+ * positions where match fails.
+ */
+ if (mark1 > 0 || mark2 > 0) {
+ for (column = 0; column <= max(mark1, mark2); ++column) {
+ if (column == mark1 || column == mark2)
+ fputc('^', out);
+ else
+ fputc(' ', out);
+ }
+ fputc('\n', out);
+ }
+ }
+}
+
+/* Test if `text` matches `pattern`. Pattern consists of the following elements:
+ *
+ * - Field offset references:
+ *
+ * <type>::<field>
+ *
+ * When such reference is encountered BTF is used to compute numerical
+ * value for the offset of <field> in <type>. The `text` is expected to
+ * contain matching numerical value.
+ *
+ * - Field groups:
+ *
+ * $(<type>::<field> [+ <type>::<field>]*)
+ *
+ * Allows to specify an offset that is a sum of multiple field offsets.
+ * The `text` is expected to contain matching numerical value.
+ *
+ * - Variable references, e.g. `$src`, `$dst`, `$ctx`.
+ * These are substitutions specified in `reg_map` array.
+ * If a substring of pattern is equal to `reg_map[i][0]` the `text` is
+ * expected to contain `reg_map[i][1]` in the matching position.
+ *
+ * - Whitespace is ignored, ';' counts as whitespace for `pattern`.
+ *
+ * - Any other characters, `pattern` and `text` should match one-to-one.
+ *
+ * Example of a pattern:
+ *
+ * __________ fields group ________________
+ * ' '
+ * *(u16 *)($ctx + $(sk_buff::cb + qdisc_skb_cb::tc_classid)) = $src;
+ * ^^^^ '______________________'
+ * variable reference field offset reference
+ */
+static bool match_pattern(struct btf *btf, char *pattern, char *text, char *reg_map[][2])
+{
+ char *pattern_origin = pattern;
+ char *text_origin = text;
+ regmatch_t matches[3];
+
+_continue:
+ while (*pattern) {
+ if (!*text)
+ goto err;
+
+ /* Skip whitespace */
+ if (isspace(*pattern) || *pattern == ';') {
+ if (!isspace(*text) && text != text_origin && isalnum(text[-1]))
+ goto err;
+ pattern = skip_space_and_semi(pattern);
+ text = skip_space(text);
+ continue;
+ }
+
+ /* Check for variable references */
+ for (int i = 0; reg_map[i][0]; ++i) {
+ char *pattern_next, *text_next;
+
+ pattern_next = match_str(pattern, reg_map[i][0]);
+ if (!pattern_next)
+ continue;
+
+ text_next = match_str(text, reg_map[i][1]);
+ if (!text_next)
+ goto err;
+
+ pattern = pattern_next;
+ text = text_next;
+ goto _continue;
+ }
+
+ /* Match field group:
+ * $(sk_buff::cb + qdisc_skb_cb::tc_classid)
+ */
+ if (strncmp(pattern, "$(", 2) == 0) {
+ char *group_start = pattern, *text_next;
+ int acc_offset = 0;
+
+ pattern += 2;
+
+ for (;;) {
+ int field_offset;
+
+ pattern = skip_space(pattern);
+ if (!*pattern) {
+ PRINT_FAIL("Unexpected end of pattern\n");
+ goto err;
+ }
+
+ if (*pattern == ')') {
+ ++pattern;
+ break;
+ }
+
+ if (*pattern == '+') {
+ ++pattern;
+ continue;
+ }
+
+ printf("pattern: %s\n", pattern);
+ if (regexec(field_regex, pattern, 3, matches, 0) != 0) {
+ PRINT_FAIL("Field reference expected\n");
+ goto err;
+ }
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ acc_offset += field_offset;
+ }
+
+ text_next = match_number(text, acc_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for group offset %.*s (%d)\n",
+ (int)(pattern - group_start),
+ group_start,
+ acc_offset);
+ goto err;
+ }
+ text = text_next;
+ }
+
+ /* Match field reference:
+ * sk_buff::cb
+ */
+ if (regexec(field_regex, pattern, 3, matches, 0) == 0) {
+ int field_offset;
+ char *text_next;
+
+ field_offset = find_field_offset(btf, pattern, matches);
+ if (field_offset < 0)
+ goto err;
+
+ text_next = match_number(text, field_offset);
+ if (!text_next) {
+ PRINT_FAIL("No match for field offset %.*s (%d)\n",
+ (int)matches[0].rm_eo, pattern, field_offset);
+ goto err;
+ }
+
+ pattern += matches[0].rm_eo;
+ text = text_next;
+ continue;
+ }
+
+ /* If pattern points to identifier not followed by '::'
+ * skip the identifier to avoid n^2 application of the
+ * field reference rule.
+ */
+ if (regexec(ident_regex, pattern, 1, matches, 0) == 0) {
+ if (strncmp(pattern, text, matches[0].rm_eo) != 0)
+ goto err;
+
+ pattern += matches[0].rm_eo;
+ text += matches[0].rm_eo;
+ continue;
+ }
+
+ /* Match literally */
+ if (*pattern != *text)
+ goto err;
+
+ ++pattern;
+ ++text;
+ }
+
+ return true;
+
+err:
+ test__fail();
+ print_match_error(stdout, pattern_origin, text_origin, pattern, text);
+ return false;
+}
+
+static void print_insn(void *private_data, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ vfprintf((FILE *)private_data, fmt, args);
+ va_end(args);
+}
+
+/* Disassemble instructions to a stream */
+static void print_xlated(FILE *out, struct bpf_insn *insn, __u32 len)
+{
+ const struct bpf_insn_cbs cbs = {
+ .cb_print = print_insn,
+ .cb_call = NULL,
+ .cb_imm = NULL,
+ .private_data = out,
+ };
+ bool double_insn = false;
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (double_insn) {
+ double_insn = false;
+ continue;
+ }
+
+ double_insn = insn[i].code == (BPF_LD | BPF_IMM | BPF_DW);
+ print_bpf_insn(&cbs, insn + i, true);
+ }
+}
+
+/* We share code with kernel BPF disassembler, it adds '(FF) ' prefix
+ * for each instruction (FF stands for instruction `code` byte).
+ * This function removes the prefix inplace for each line in `str`.
+ */
+static void remove_insn_prefix(char *str, int size)
+{
+ const int prefix_size = 5;
+
+ int write_pos = 0, read_pos = prefix_size;
+ int len = strlen(str);
+ char c;
+
+ size = min(size, len);
+
+ while (read_pos < size) {
+ c = str[read_pos++];
+ if (c == 0)
+ break;
+ str[write_pos++] = c;
+ if (c == '\n')
+ read_pos += prefix_size;
+ }
+ str[write_pos] = 0;
+}
+
+struct prog_info {
+ char *prog_kind;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type expected_attach_type;
+ struct bpf_insn *prog;
+ u32 prog_len;
+};
+
+static void match_program(struct btf *btf,
+ struct prog_info *pinfo,
+ char *pattern,
+ char *reg_map[][2],
+ bool skip_first_insn)
+{
+ struct bpf_insn *buf = NULL;
+ int err = 0, prog_fd = 0;
+ FILE *prog_out = NULL;
+ char *text = NULL;
+ __u32 cnt = 0;
+
+ text = calloc(MAX_PROG_TEXT_SZ, 1);
+ if (!text) {
+ PRINT_FAIL("Can't allocate %d bytes\n", MAX_PROG_TEXT_SZ);
+ goto out;
+ }
+
+ // TODO: log level
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ opts.log_buf = text;
+ opts.log_size = MAX_PROG_TEXT_SZ;
+ opts.log_level = 1 | 2 | 4;
+ opts.expected_attach_type = pinfo->expected_attach_type;
+
+ prog_fd = bpf_prog_load(pinfo->prog_type, NULL, "GPL",
+ pinfo->prog, pinfo->prog_len, &opts);
+ if (prog_fd < 0) {
+ PRINT_FAIL("Can't load program, errno %d (%s), verifier log:\n%s\n",
+ errno, strerror(errno), text);
+ goto out;
+ }
+
+ memset(text, 0, MAX_PROG_TEXT_SZ);
+
+ err = get_xlated_program(prog_fd, &buf, &cnt);
+ if (err) {
+ PRINT_FAIL("Can't load back BPF program\n");
+ goto out;
+ }
+
+ prog_out = fmemopen(text, MAX_PROG_TEXT_SZ - 1, "w");
+ if (!prog_out) {
+ PRINT_FAIL("Can't open memory stream\n");
+ goto out;
+ }
+ if (skip_first_insn)
+ print_xlated(prog_out, buf + 1, cnt - 1);
+ else
+ print_xlated(prog_out, buf, cnt);
+ fclose(prog_out);
+ remove_insn_prefix(text, MAX_PROG_TEXT_SZ);
+
+ ASSERT_TRUE(match_pattern(btf, pattern, text, reg_map),
+ pinfo->prog_kind);
+
+out:
+ if (prog_fd)
+ close(prog_fd);
+ free(buf);
+ free(text);
+}
+
+static void run_one_testcase(struct btf *btf, struct test_case *test)
+{
+ struct prog_info pinfo = {};
+ int bpf_sz;
+
+ if (!test__start_subtest(test->name))
+ return;
+
+ switch (test->field_sz) {
+ case 8:
+ bpf_sz = BPF_DW;
+ break;
+ case 4:
+ bpf_sz = BPF_W;
+ break;
+ case 2:
+ bpf_sz = BPF_H;
+ break;
+ case 1:
+ bpf_sz = BPF_B;
+ break;
+ default:
+ PRINT_FAIL("Unexpected field size: %d, want 8,4,2 or 1\n", test->field_sz);
+ return;
+ }
+
+ pinfo.prog_type = test->prog_type;
+ pinfo.expected_attach_type = test->expected_attach_type;
+
+ if (test->read) {
+ struct bpf_insn ldx_prog[] = {
+ BPF_LDX_MEM(bpf_sz, BPF_REG_2, BPF_REG_1, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$dst", "r2" },
+ {}
+ };
+
+ pinfo.prog_kind = "LDX";
+ pinfo.prog = ldx_prog;
+ pinfo.prog_len = ARRAY_SIZE(ldx_prog);
+ match_program(btf, &pinfo, test->read, reg_map, false);
+ }
+
+ if (test->write || test->write_st || test->write_stx) {
+ struct bpf_insn stx_prog[] = {
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_STX_MEM(bpf_sz, BPF_REG_1, BPF_REG_2, test->field_offset),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *stx_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "r2" },
+ {}
+ };
+ struct bpf_insn st_prog[] = {
+ BPF_ST_MEM(bpf_sz, BPF_REG_1, test->field_offset,
+ test->st_value.use ? test->st_value.value : 42),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ char *st_reg_map[][2] = {
+ { "$ctx", "r1" },
+ { "$src", "42" },
+ {}
+ };
+
+ if (test->write || test->write_stx) {
+ char *pattern = test->write_stx ? test->write_stx : test->write;
+
+ pinfo.prog_kind = "STX";
+ pinfo.prog = stx_prog;
+ pinfo.prog_len = ARRAY_SIZE(stx_prog);
+ match_program(btf, &pinfo, pattern, stx_reg_map, true);
+ }
+
+ if (test->write || test->write_st) {
+ char *pattern = test->write_st ? test->write_st : test->write;
+
+ pinfo.prog_kind = "ST";
+ pinfo.prog = st_prog;
+ pinfo.prog_len = ARRAY_SIZE(st_prog);
+ match_program(btf, &pinfo, pattern, st_reg_map, false);
+ }
+ }
+
+ test__end_subtest();
+}
+
+void test_ctx_rewrite(void)
+{
+ struct btf *btf;
+ int i;
+
+ field_regex = compile_regex("^([[:alpha:]_][[:alnum:]_]+)::([[:alpha:]_][[:alnum:]_]+)");
+ ident_regex = compile_regex("^[[:alpha:]_][[:alnum:]_]+");
+ if (!field_regex || !ident_regex)
+ return;
+
+ btf = btf__load_vmlinux_btf();
+ if (!btf) {
+ PRINT_FAIL("Can't load vmlinux BTF, errno %d (%s)\n", errno, strerror(errno));
+ goto out;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); ++i)
+ run_one_testcase(btf, &test_cases[i]);
+
+out:
+ btf__free(btf);
+ free_regex(field_regex);
+ free_regex(ident_regex);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
new file mode 100644
index 000000000000..b2dfc5954aea
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/custom_sec_handlers.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include "test_custom_sec_handlers.skel.h"
+
+#define COOKIE_ABC1 1
+#define COOKIE_ABC2 2
+#define COOKIE_CUSTOM 3
+#define COOKIE_FALLBACK 4
+#define COOKIE_KPROBE 5
+
+static int custom_setup_prog(struct bpf_program *prog, long cookie)
+{
+ if (cookie == COOKIE_ABC1)
+ bpf_program__set_autoload(prog, false);
+
+ return 0;
+}
+
+static int custom_prepare_load_prog(struct bpf_program *prog,
+ struct bpf_prog_load_opts *opts, long cookie)
+{
+ if (cookie == COOKIE_FALLBACK)
+ opts->prog_flags |= BPF_F_SLEEPABLE;
+ else if (cookie == COOKIE_ABC1)
+ ASSERT_FALSE(true, "unexpected preload for abc");
+
+ return 0;
+}
+
+static int custom_attach_prog(const struct bpf_program *prog, long cookie,
+ struct bpf_link **link)
+{
+ switch (cookie) {
+ case COOKIE_ABC2:
+ *link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
+ return libbpf_get_error(*link);
+ case COOKIE_CUSTOM:
+ *link = bpf_program__attach_tracepoint(prog, "syscalls", "sys_enter_nanosleep");
+ return libbpf_get_error(*link);
+ case COOKIE_KPROBE:
+ case COOKIE_FALLBACK:
+ /* no auto-attach for SEC("xyz") and SEC("kprobe") */
+ *link = NULL;
+ return 0;
+ default:
+ ASSERT_FALSE(true, "unexpected cookie");
+ return -EINVAL;
+ }
+}
+
+static int abc1_id;
+static int abc2_id;
+static int custom_id;
+static int fallback_id;
+static int kprobe_id;
+
+__attribute__((constructor))
+static void register_sec_handlers(void)
+{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, abc1_opts,
+ .cookie = COOKIE_ABC1,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = NULL,
+ );
+ LIBBPF_OPTS(libbpf_prog_handler_opts, abc2_opts,
+ .cookie = COOKIE_ABC2,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = custom_attach_prog,
+ );
+ LIBBPF_OPTS(libbpf_prog_handler_opts, custom_opts,
+ .cookie = COOKIE_CUSTOM,
+ .prog_setup_fn = NULL,
+ .prog_prepare_load_fn = NULL,
+ .prog_attach_fn = custom_attach_prog,
+ );
+
+ abc1_id = libbpf_register_prog_handler("abc", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc1_opts);
+ abc2_id = libbpf_register_prog_handler("abc/", BPF_PROG_TYPE_RAW_TRACEPOINT, 0, &abc2_opts);
+ custom_id = libbpf_register_prog_handler("custom+", BPF_PROG_TYPE_TRACEPOINT, 0, &custom_opts);
+}
+
+__attribute__((destructor))
+static void unregister_sec_handlers(void)
+{
+ libbpf_unregister_prog_handler(abc1_id);
+ libbpf_unregister_prog_handler(abc2_id);
+ libbpf_unregister_prog_handler(custom_id);
+}
+
+void test_custom_sec_handlers(void)
+{
+ LIBBPF_OPTS(libbpf_prog_handler_opts, opts,
+ .prog_setup_fn = custom_setup_prog,
+ .prog_prepare_load_fn = custom_prepare_load_prog,
+ .prog_attach_fn = custom_attach_prog,
+ );
+ struct test_custom_sec_handlers* skel;
+ int err;
+
+ ASSERT_GT(abc1_id, 0, "abc1_id");
+ ASSERT_GT(abc2_id, 0, "abc2_id");
+ ASSERT_GT(custom_id, 0, "custom_id");
+
+ /* override libbpf's handle of SEC("kprobe/...") but also allow pure
+ * SEC("kprobe") due to "kprobe+" specifier. Register it as
+ * TRACEPOINT, just for fun.
+ */
+ opts.cookie = COOKIE_KPROBE;
+ kprobe_id = libbpf_register_prog_handler("kprobe+", BPF_PROG_TYPE_TRACEPOINT, 0, &opts);
+ /* fallback treats everything as BPF_PROG_TYPE_SYSCALL program to test
+ * setting custom BPF_F_SLEEPABLE bit in preload handler
+ */
+ opts.cookie = COOKIE_FALLBACK;
+ fallback_id = libbpf_register_prog_handler(NULL, BPF_PROG_TYPE_SYSCALL, 0, &opts);
+
+ if (!ASSERT_GT(fallback_id, 0, "fallback_id") /* || !ASSERT_GT(kprobe_id, 0, "kprobe_id")*/) {
+ if (fallback_id > 0)
+ libbpf_unregister_prog_handler(fallback_id);
+ if (kprobe_id > 0)
+ libbpf_unregister_prog_handler(kprobe_id);
+ return;
+ }
+
+ /* open skeleton and validate assumptions */
+ skel = test_custom_sec_handlers__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__type(skel->progs.abc1), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc1_type");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.abc1), "abc1_autoload");
+
+ ASSERT_EQ(bpf_program__type(skel->progs.abc2), BPF_PROG_TYPE_RAW_TRACEPOINT, "abc2_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.custom1), BPF_PROG_TYPE_TRACEPOINT, "custom1_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.custom2), BPF_PROG_TYPE_TRACEPOINT, "custom2_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.kprobe1), BPF_PROG_TYPE_TRACEPOINT, "kprobe1_type");
+ ASSERT_EQ(bpf_program__type(skel->progs.xyz), BPF_PROG_TYPE_SYSCALL, "xyz_type");
+
+ skel->rodata->my_pid = getpid();
+
+ /* now attempt to load everything */
+ err = test_custom_sec_handlers__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ /* now try to auto-attach everything */
+ err = test_custom_sec_handlers__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ skel->links.xyz = bpf_program__attach(skel->progs.kprobe1);
+ ASSERT_EQ(errno, EOPNOTSUPP, "xyz_attach_err");
+ ASSERT_ERR_PTR(skel->links.xyz, "xyz_attach");
+
+ /* trigger programs */
+ usleep(1);
+
+ /* SEC("abc") is set to not auto-loaded */
+ ASSERT_FALSE(skel->bss->abc1_called, "abc1_called");
+ ASSERT_TRUE(skel->bss->abc2_called, "abc2_called");
+ ASSERT_TRUE(skel->bss->custom1_called, "custom1_called");
+ ASSERT_TRUE(skel->bss->custom2_called, "custom2_called");
+ /* SEC("kprobe") shouldn't be auto-attached */
+ ASSERT_FALSE(skel->bss->kprobe1_called, "kprobe1_called");
+ /* SEC("xyz") shouldn't be auto-attached */
+ ASSERT_FALSE(skel->bss->xyz_called, "xyz_called");
+
+cleanup:
+ test_custom_sec_handlers__destroy(skel);
+
+ ASSERT_OK(libbpf_unregister_prog_handler(fallback_id), "unregister_fallback");
+ ASSERT_OK(libbpf_unregister_prog_handler(kprobe_id), "unregister_kprobe");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/d_path.c b/tools/testing/selftests/bpf/prog_tests/d_path.c
new file mode 100644
index 000000000000..ccc768592e66
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/d_path.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+#include "test_d_path.skel.h"
+#include "test_d_path_check_rdonly_mem.skel.h"
+#include "test_d_path_check_types.skel.h"
+
+/* sys_close_range is not around for long time, so let's
+ * make sure we can call it on systems with older glibc
+ */
+#ifndef __NR_close_range
+#ifdef __alpha__
+#define __NR_close_range 546
+#else
+#define __NR_close_range 436
+#endif
+#endif
+
+static int duration;
+
+static struct {
+ __u32 cnt;
+ char paths[MAX_FILES][MAX_PATH_LEN];
+} src;
+
+static int set_pathname(int fd, pid_t pid)
+{
+ char buf[MAX_PATH_LEN];
+
+ snprintf(buf, MAX_PATH_LEN, "/proc/%d/fd/%d", pid, fd);
+ return readlink(buf, src.paths[src.cnt++], MAX_PATH_LEN);
+}
+
+static int trigger_fstat_events(pid_t pid)
+{
+ int sockfd = -1, procfd = -1, devfd = -1;
+ int localfd = -1, indicatorfd = -1;
+ int pipefd[2] = { -1, -1 };
+ struct stat fileStat;
+ int ret = -1;
+
+ /* unmountable pseudo-filesystems */
+ if (CHECK(pipe(pipefd) < 0, "trigger", "pipe failed\n"))
+ return ret;
+ /* unmountable pseudo-filesystems */
+ sockfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (CHECK(sockfd < 0, "trigger", "socket failed\n"))
+ goto out_close;
+ /* mountable pseudo-filesystems */
+ procfd = open("/proc/self/comm", O_RDONLY);
+ if (CHECK(procfd < 0, "trigger", "open /proc/self/comm failed\n"))
+ goto out_close;
+ devfd = open("/dev/urandom", O_RDONLY);
+ if (CHECK(devfd < 0, "trigger", "open /dev/urandom failed\n"))
+ goto out_close;
+ localfd = open("/tmp/d_path_loadgen.txt", O_CREAT | O_RDONLY, 0644);
+ if (CHECK(localfd < 0, "trigger", "open /tmp/d_path_loadgen.txt failed\n"))
+ goto out_close;
+ /* bpf_d_path will return path with (deleted) */
+ remove("/tmp/d_path_loadgen.txt");
+ indicatorfd = open("/tmp/", O_PATH);
+ if (CHECK(indicatorfd < 0, "trigger", "open /tmp/ failed\n"))
+ goto out_close;
+
+ ret = set_pathname(pipefd[0], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[0]\n"))
+ goto out_close;
+ ret = set_pathname(pipefd[1], pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for pipe[1]\n"))
+ goto out_close;
+ ret = set_pathname(sockfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for socket\n"))
+ goto out_close;
+ ret = set_pathname(procfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for proc\n"))
+ goto out_close;
+ ret = set_pathname(devfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dev\n"))
+ goto out_close;
+ ret = set_pathname(localfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for file\n"))
+ goto out_close;
+ ret = set_pathname(indicatorfd, pid);
+ if (CHECK(ret < 0, "trigger", "set_pathname failed for dir\n"))
+ goto out_close;
+
+ /* triggers vfs_getattr */
+ fstat(pipefd[0], &fileStat);
+ fstat(pipefd[1], &fileStat);
+ fstat(sockfd, &fileStat);
+ fstat(procfd, &fileStat);
+ fstat(devfd, &fileStat);
+ fstat(localfd, &fileStat);
+ fstat(indicatorfd, &fileStat);
+
+out_close:
+ /* sys_close no longer triggers filp_close, but we can
+ * call sys_close_range instead which still does
+ */
+#define close(fd) syscall(__NR_close_range, fd, fd, 0)
+
+ close(pipefd[0]);
+ close(pipefd[1]);
+ close(sockfd);
+ close(procfd);
+ close(devfd);
+ close(localfd);
+ close(indicatorfd);
+
+#undef close
+ return ret;
+}
+
+static void test_d_path_basic(void)
+{
+ struct test_d_path__bss *bss;
+ struct test_d_path *skel;
+ int err;
+
+ skel = test_d_path__open_and_load();
+ if (CHECK(!skel, "setup", "d_path skeleton failed\n"))
+ goto cleanup;
+
+ err = test_d_path__attach(skel);
+ if (CHECK(err, "setup", "attach failed: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = trigger_fstat_events(bss->my_pid);
+ if (err < 0)
+ goto cleanup;
+
+ if (CHECK(!bss->called_stat,
+ "stat",
+ "trampoline for security_inode_getattr was not called\n"))
+ goto cleanup;
+
+ if (CHECK(!bss->called_close,
+ "close",
+ "trampoline for filp_close was not called\n"))
+ goto cleanup;
+
+ for (int i = 0; i < MAX_FILES; i++) {
+ CHECK(strncmp(src.paths[i], bss->paths_stat[i], MAX_PATH_LEN),
+ "check",
+ "failed to get stat path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_stat[i]);
+ CHECK(strncmp(src.paths[i], bss->paths_close[i], MAX_PATH_LEN),
+ "check",
+ "failed to get close path[%d]: %s vs %s\n",
+ i, src.paths[i], bss->paths_close[i]);
+ /* The d_path helper returns size plus NUL char, hence + 1 */
+ CHECK(bss->rets_stat[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_stat[i], strlen(bss->paths_stat[i]) + 1,
+ bss->paths_stat[i]);
+ CHECK(bss->rets_close[i] != strlen(bss->paths_stat[i]) + 1,
+ "check",
+ "failed to match stat return [%d]: %d vs %zd [%s]\n",
+ i, bss->rets_close[i], strlen(bss->paths_close[i]) + 1,
+ bss->paths_stat[i]);
+ }
+
+cleanup:
+ test_d_path__destroy(skel);
+}
+
+static void test_d_path_check_rdonly_mem(void)
+{
+ struct test_d_path_check_rdonly_mem *skel;
+
+ skel = test_d_path_check_rdonly_mem__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_overwriting_rdonly_mem");
+
+ test_d_path_check_rdonly_mem__destroy(skel);
+}
+
+static void test_d_path_check_types(void)
+{
+ struct test_d_path_check_types *skel;
+
+ skel = test_d_path_check_types__open_and_load();
+ ASSERT_ERR_PTR(skel, "unexpected_load_passing_wrong_type");
+
+ test_d_path_check_types__destroy(skel);
+}
+
+void test_d_path(void)
+{
+ if (test__start_subtest("basic"))
+ test_d_path_basic();
+
+ if (test__start_subtest("check_rdonly_mem"))
+ test_d_path_check_rdonly_mem();
+
+ if (test__start_subtest("check_alloc_mem"))
+ test_d_path_check_types();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/decap_sanity.c b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
new file mode 100644
index 000000000000..dcb9e5070cc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/decap_sanity.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/in6.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "decap_sanity.skel.h"
+
+#define NS_TEST "decap_sanity_ns"
+#define IPV6_IFACE_ADDR "face::1"
+#define UDP_TEST_PORT 7777
+
+void test_decap_sanity(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_hook, .attach_point = BPF_TC_EGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ struct nstoken *nstoken = NULL;
+ struct decap_sanity *skel;
+ struct sockaddr_in6 addr;
+ socklen_t addrlen;
+ char buf[128] = {};
+ int sockfd, err;
+
+ skel = decap_sanity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s -6 addr add %s/128 dev lo nodad", NS_TEST, IPV6_IFACE_ADDR);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ qdisc_hook.ifindex = if_nametoindex("lo");
+ if (!ASSERT_GT(qdisc_hook.ifindex, 0, "if_nametoindex lo"))
+ goto fail;
+
+ err = bpf_tc_hook_create(&qdisc_hook);
+ if (!ASSERT_OK(err, "create qdisc hook"))
+ goto fail;
+
+ tc_attach.prog_fd = bpf_program__fd(skel->progs.decap_sanity);
+ err = bpf_tc_attach(&qdisc_hook, &tc_attach);
+ if (!ASSERT_OK(err, "attach filter"))
+ goto fail;
+
+ addrlen = sizeof(addr);
+ err = make_sockaddr(AF_INET6, IPV6_IFACE_ADDR, UDP_TEST_PORT,
+ (void *)&addr, &addrlen);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ goto fail;
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (!ASSERT_NEQ(sockfd, -1, "socket"))
+ goto fail;
+ err = sendto(sockfd, buf, sizeof(buf), 0, (void *)&addr, addrlen);
+ close(sockfd);
+ if (!ASSERT_EQ(err, sizeof(buf), "send"))
+ goto fail;
+
+ ASSERT_TRUE(skel->bss->init_csum_partial, "init_csum_partial");
+ ASSERT_TRUE(skel->bss->final_csum_none, "final_csum_none");
+ ASSERT_FALSE(skel->bss->broken_csum_start, "broken_csum_start");
+
+fail:
+ if (nstoken) {
+ bpf_tc_hook_destroy(&qdisc_hook);
+ close_netns(nstoken);
+ }
+ SYS_NOFAIL("ip netns del " NS_TEST);
+ decap_sanity__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/deny_namespace.c b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
new file mode 100644
index 000000000000..1bc6241b755b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/deny_namespace.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_deny_namespace.skel.h"
+#include <sched.h>
+#include "cap_helpers.h"
+#include <stdio.h>
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+/* negative return value -> some internal error
+ * positive return value -> userns creation failed
+ * 0 -> userns creation succeeded
+ */
+static int create_user_ns(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0)
+ return -1;
+
+ if (pid == 0) {
+ if (unshare(CLONE_NEWUSER))
+ _exit(EXIT_FAILURE);
+ _exit(EXIT_SUCCESS);
+ }
+
+ return wait_for_pid(pid);
+}
+
+static void test_userns_create_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_enable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "priv new user ns");
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_EQ(create_user_ns(), EPERM, "unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+static void test_unpriv_userns_create_no_bpf(void)
+{
+ __u32 cap_mask = 1ULL << CAP_SYS_ADMIN;
+ __u64 old_caps = 0;
+
+ cap_disable_effective(cap_mask, &old_caps);
+
+ ASSERT_OK(create_user_ns(), "no-bpf unpriv new user ns");
+
+ if (cap_mask & old_caps)
+ cap_enable_effective(cap_mask, NULL);
+}
+
+void test_deny_namespace(void)
+{
+ struct test_deny_namespace *skel = NULL;
+ int err;
+
+ if (test__start_subtest("unpriv_userns_create_no_bpf"))
+ test_unpriv_userns_create_no_bpf();
+
+ skel = test_deny_namespace__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel load"))
+ goto close_prog;
+
+ err = test_deny_namespace__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto close_prog;
+
+ if (test__start_subtest("userns_create_bpf"))
+ test_userns_create_bpf();
+
+ test_deny_namespace__detach(skel);
+
+close_prog:
+ test_deny_namespace__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
new file mode 100644
index 000000000000..f43fcb13d2c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dummy_st_ops.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "dummy_st_ops_success.skel.h"
+#include "dummy_st_ops_fail.skel.h"
+#include "trace_dummy_st_ops.skel.h"
+
+/* Need to keep consistent with definition in include/linux/bpf.h */
+struct bpf_dummy_ops_state {
+ int val;
+};
+
+static void test_dummy_st_ops_attach(void)
+{
+ struct dummy_st_ops_success *skel;
+ struct bpf_link *link;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.dummy_1);
+ ASSERT_EQ(libbpf_get_error(link), -EOPNOTSUPP, "dummy_st_ops_attach");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_init_ret_value(void)
+{
+ __u64 args[1] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_1);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(attr.retval, 0xf2f3f4f5, "test_ret");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_init_ptr_arg(void)
+{
+ int exp_retval = 0xbeef;
+ struct bpf_dummy_ops_state in_state = {
+ .val = exp_retval,
+ };
+ __u64 args[1] = {(unsigned long)&in_state};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct trace_dummy_st_ops *trace_skel;
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_1);
+
+ trace_skel = trace_dummy_st_ops__open();
+ if (!ASSERT_OK_PTR(trace_skel, "trace_dummy_st_ops__open"))
+ goto done;
+
+ err = bpf_program__set_attach_target(trace_skel->progs.fentry_test_1,
+ fd, "test_1");
+ if (!ASSERT_OK(err, "set_attach_target(fentry_test_1)"))
+ goto done;
+
+ err = trace_dummy_st_ops__load(trace_skel);
+ if (!ASSERT_OK(err, "load(trace_skel)"))
+ goto done;
+
+ err = trace_dummy_st_ops__attach(trace_skel);
+ if (!ASSERT_OK(err, "attach(trace_skel)"))
+ goto done;
+
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(in_state.val, 0x5a, "test_ptr_ret");
+ ASSERT_EQ(attr.retval, exp_retval, "test_ret");
+ ASSERT_EQ(trace_skel->bss->val, exp_retval, "fentry_val");
+
+done:
+ dummy_st_ops_success__destroy(skel);
+ trace_dummy_st_ops__destroy(trace_skel);
+}
+
+static void test_dummy_multiple_args(void)
+{
+ __u64 args[5] = {0, -100, 0x8a5f, 'c', 0x1234567887654321ULL};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+ size_t i;
+ char name[8];
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_2);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+ for (i = 0; i < ARRAY_SIZE(args); i++) {
+ snprintf(name, sizeof(name), "arg %zu", i);
+ ASSERT_EQ(skel->bss->test_2_args[i], args[i], name);
+ }
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+static void test_dummy_sleepable(void)
+{
+ __u64 args[1] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, attr,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ );
+ struct dummy_st_ops_success *skel;
+ int fd, err;
+
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "dummy_st_ops_load"))
+ return;
+
+ fd = bpf_program__fd(skel->progs.test_sleepable);
+ err = bpf_prog_test_run_opts(fd, &attr);
+ ASSERT_OK(err, "test_run");
+
+ dummy_st_ops_success__destroy(skel);
+}
+
+void test_dummy_st_ops(void)
+{
+ if (test__start_subtest("dummy_st_ops_attach"))
+ test_dummy_st_ops_attach();
+ if (test__start_subtest("dummy_init_ret_value"))
+ test_dummy_init_ret_value();
+ if (test__start_subtest("dummy_init_ptr_arg"))
+ test_dummy_init_ptr_arg();
+ if (test__start_subtest("dummy_multiple_args"))
+ test_dummy_multiple_args();
+ if (test__start_subtest("dummy_sleepable"))
+ test_dummy_sleepable();
+
+ RUN_TESTS(dummy_st_ops_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c
new file mode 100644
index 000000000000..7cfac53c0d58
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "dynptr_fail.skel.h"
+#include "dynptr_success.skel.h"
+
+enum test_setup_type {
+ SETUP_SYSCALL_SLEEP,
+ SETUP_SKB_PROG,
+};
+
+static struct {
+ const char *prog_name;
+ enum test_setup_type type;
+} success_tests[] = {
+ {"test_read_write", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_data", SETUP_SYSCALL_SLEEP},
+ {"test_ringbuf", SETUP_SYSCALL_SLEEP},
+ {"test_skb_readonly", SETUP_SKB_PROG},
+ {"test_dynptr_skb_data", SETUP_SKB_PROG},
+ {"test_adjust", SETUP_SYSCALL_SLEEP},
+ {"test_adjust_err", SETUP_SYSCALL_SLEEP},
+ {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_null", SETUP_SYSCALL_SLEEP},
+ {"test_dynptr_is_rdonly", SETUP_SKB_PROG},
+ {"test_dynptr_clone", SETUP_SKB_PROG},
+ {"test_dynptr_skb_no_buff", SETUP_SKB_PROG},
+ {"test_dynptr_skb_strcmp", SETUP_SKB_PROG},
+};
+
+static void verify_success(const char *prog_name, enum test_setup_type setup_type)
+{
+ struct dynptr_success *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ int err;
+
+ skel = dynptr_success__open();
+ if (!ASSERT_OK_PTR(skel, "dynptr_success__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, true);
+
+ err = dynptr_success__load(skel);
+ if (!ASSERT_OK(err, "dynptr_success__load"))
+ goto cleanup;
+
+ switch (setup_type) {
+ case SETUP_SYSCALL_SLEEP:
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ bpf_link__destroy(link);
+ break;
+ case SETUP_SKB_PROG:
+ {
+ int prog_fd;
+ char buf[64];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ break;
+ }
+ }
+
+ ASSERT_EQ(skel->bss->err, 0, "err");
+
+cleanup:
+ dynptr_success__destroy(skel);
+}
+
+void test_dynptr(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i].prog_name))
+ continue;
+
+ verify_success(success_tests[i].prog_name, success_tests[i].type);
+ }
+
+ RUN_TESTS(dynptr_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/empty_skb.c b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
new file mode 100644
index 000000000000..261228eb68e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/empty_skb.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include "empty_skb.skel.h"
+
+void test_empty_skb(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, tattr);
+ struct empty_skb *bpf_obj = NULL;
+ struct nstoken *tok = NULL;
+ struct bpf_program *prog;
+ char eth_hlen_pp[15];
+ char eth_hlen[14];
+ int veth_ifindex;
+ int ipip_ifindex;
+ int err;
+ int i;
+
+ struct {
+ const char *msg;
+ const void *data_in;
+ __u32 data_size_in;
+ int *ifindex;
+ int err;
+ int ret;
+ int lwt_egress_ret; /* expected retval at lwt/egress */
+ bool success_on_tc;
+ } tests[] = {
+ /* Empty packets are always rejected. */
+
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "veth empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &veth_ifindex,
+ .err = -EINVAL,
+ },
+ {
+ /* BPF_PROG_RUN ETH_HLEN size check */
+ .msg = "ipip empty ingress packet",
+ .data_in = NULL,
+ .data_size_in = 0,
+ .ifindex = &ipip_ifindex,
+ .err = -EINVAL,
+ },
+
+ /* ETH_HLEN-sized packets:
+ * - can not be redirected at LWT_XMIT
+ * - can be redirected at TC to non-tunneling dest
+ */
+
+ {
+ /* __bpf_redirect_common */
+ .msg = "veth ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &veth_ifindex,
+ .ret = -ERANGE,
+ .lwt_egress_ret = -ERANGE,
+ .success_on_tc = true,
+ },
+ {
+ /* __bpf_redirect_no_mac
+ *
+ * lwt: skb->len=0 <= skb_network_offset=0
+ * tc: skb->len=14 <= skb_network_offset=14
+ */
+ .msg = "ipip ETH_HLEN packet ingress",
+ .data_in = eth_hlen,
+ .data_size_in = sizeof(eth_hlen),
+ .ifindex = &ipip_ifindex,
+ .ret = -ERANGE,
+ .lwt_egress_ret = -ERANGE,
+ },
+
+ /* ETH_HLEN+1-sized packet should be redirected. */
+
+ {
+ .msg = "veth ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &veth_ifindex,
+ .lwt_egress_ret = 1, /* veth_xmit NET_XMIT_DROP */
+ },
+ {
+ .msg = "ipip ETH_HLEN+1 packet ingress",
+ .data_in = eth_hlen_pp,
+ .data_size_in = sizeof(eth_hlen_pp),
+ .ifindex = &ipip_ifindex,
+ },
+ };
+
+ SYS(out, "ip netns add empty_skb");
+ tok = open_netns("empty_skb");
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "ip link set dev veth0 up");
+ SYS(out, "ip link set dev veth1 up");
+ SYS(out, "ip addr add 10.0.0.1/8 dev veth0");
+ SYS(out, "ip addr add 10.0.0.2/8 dev veth1");
+ veth_ifindex = if_nametoindex("veth0");
+
+ SYS(out, "ip link add ipip0 type ipip local 10.0.0.1 remote 10.0.0.2");
+ SYS(out, "ip link set ipip0 up");
+ SYS(out, "ip addr add 192.168.1.1/16 dev ipip0");
+ ipip_ifindex = if_nametoindex("ipip0");
+
+ bpf_obj = empty_skb__open_and_load();
+ if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ bpf_object__for_each_program(prog, bpf_obj->obj) {
+ bool at_egress = strstr(bpf_program__name(prog), "egress") != NULL;
+ bool at_tc = !strncmp(bpf_program__section_name(prog), "tc", 2);
+ int expected_ret;
+ char buf[128];
+
+ expected_ret = at_egress && !at_tc ? tests[i].lwt_egress_ret : tests[i].ret;
+
+ tattr.data_in = tests[i].data_in;
+ tattr.data_size_in = tests[i].data_size_in;
+
+ tattr.data_size_out = 0;
+ bpf_obj->bss->ifindex = *tests[i].ifindex;
+ bpf_obj->bss->ret = 0;
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &tattr);
+ sprintf(buf, "err: %s [%s]", tests[i].msg, bpf_program__name(prog));
+
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(err, 0, buf);
+ else
+ ASSERT_EQ(err, tests[i].err, buf);
+ sprintf(buf, "ret: %s [%s]", tests[i].msg, bpf_program__name(prog));
+ if (at_tc && tests[i].success_on_tc)
+ ASSERT_GE(bpf_obj->bss->ret, 0, buf);
+ else
+ ASSERT_EQ(bpf_obj->bss->ret, expected_ret, buf);
+ }
+ }
+
+out:
+ if (bpf_obj)
+ empty_skb__destroy(bpf_obj);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del empty_skb");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
index 2cb2085917e7..75f85d0fe74a 100644
--- a/tools/testing/selftests/bpf/prog_tests/enable_stats.c
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -28,7 +28,7 @@ void test_enable_stats(void)
prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
memset(&info, 0, info_len);
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
if (CHECK(err, "get_prog_info",
"failed to get bpf_prog_info for fd %d\n", prog_fd))
goto cleanup;
diff --git a/tools/testing/selftests/bpf/prog_tests/endian.c b/tools/testing/selftests/bpf/prog_tests/endian.c
new file mode 100644
index 000000000000..1a11612ace6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/endian.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include "test_endian.skel.h"
+
+static int duration;
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+#define OUT16 0x3412
+#define OUT32 0x78563412U
+#define OUT64 0xf0debc9a78563412ULL
+
+void test_endian(void)
+{
+ struct test_endian* skel;
+ struct test_endian__bss *bss;
+ int err;
+
+ skel = test_endian__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+
+ bss->in16 = IN16;
+ bss->in32 = IN32;
+ bss->in64 = IN64;
+
+ err = test_endian__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ usleep(1);
+
+ CHECK(bss->out16 != OUT16, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out16, (__u64)OUT16);
+ CHECK(bss->out32 != OUT32, "out32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out32, (__u64)OUT32);
+ CHECK(bss->out64 != OUT64, "out16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->out64, (__u64)OUT64);
+
+ CHECK(bss->const16 != OUT16, "const16", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const16, (__u64)OUT16);
+ CHECK(bss->const32 != OUT32, "const32", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const32, (__u64)OUT32);
+ CHECK(bss->const64 != OUT64, "const64", "got 0x%llx != exp 0x%llx\n",
+ (__u64)bss->const64, (__u64)OUT64);
+cleanup:
+ test_endian__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/exceptions.c b/tools/testing/selftests/bpf/prog_tests/exceptions.c
new file mode 100644
index 000000000000..516f4a13013c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exceptions.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "exceptions.skel.h"
+#include "exceptions_ext.skel.h"
+#include "exceptions_fail.skel.h"
+#include "exceptions_assert.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static void test_exceptions_failure(void)
+{
+ RUN_TESTS(exceptions_fail);
+}
+
+static void test_exceptions_success(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, ropts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct exceptions_ext *eskel = NULL;
+ struct exceptions *skel;
+ int ret;
+
+ skel = exceptions__open();
+ if (!ASSERT_OK_PTR(skel, "exceptions__open"))
+ return;
+
+ ret = exceptions__load(skel);
+ if (!ASSERT_OK(ret, "exceptions__load"))
+ goto done;
+
+ if (!ASSERT_OK(bpf_map_update_elem(bpf_map__fd(skel->maps.jmp_table), &(int){0},
+ &(int){bpf_program__fd(skel->progs.exception_tail_call_target)}, BPF_ANY),
+ "bpf_map_update_elem jmp_table"))
+ goto done;
+
+#define RUN_SUCCESS(_prog, return_val) \
+ if (!test__start_subtest(#_prog)) goto _prog##_##return_val; \
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs._prog), &ropts); \
+ ASSERT_OK(ret, #_prog " prog run ret"); \
+ ASSERT_EQ(ropts.retval, return_val, #_prog " prog run retval"); \
+ _prog##_##return_val:
+
+ RUN_SUCCESS(exception_throw_always_1, 64);
+ RUN_SUCCESS(exception_throw_always_2, 32);
+ RUN_SUCCESS(exception_throw_unwind_1, 16);
+ RUN_SUCCESS(exception_throw_unwind_2, 32);
+ RUN_SUCCESS(exception_throw_default, 0);
+ RUN_SUCCESS(exception_throw_default_value, 5);
+ RUN_SUCCESS(exception_tail_call, 24);
+ RUN_SUCCESS(exception_ext, 0);
+ RUN_SUCCESS(exception_ext_mod_cb_runtime, 35);
+ RUN_SUCCESS(exception_throw_subprog, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc, 1);
+ RUN_SUCCESS(exception_assert_nz_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_zero_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_neg_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_pos_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_negeq_gfunc_with, 1);
+ RUN_SUCCESS(exception_assert_poseq_gfunc_with, 1);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc, 0);
+ RUN_SUCCESS(exception_bad_assert_nz_gfunc_with, 100);
+ RUN_SUCCESS(exception_bad_assert_zero_gfunc_with, 105);
+ RUN_SUCCESS(exception_bad_assert_neg_gfunc_with, 200);
+ RUN_SUCCESS(exception_bad_assert_pos_gfunc_with, 0);
+ RUN_SUCCESS(exception_bad_assert_negeq_gfunc_with, 101);
+ RUN_SUCCESS(exception_bad_assert_poseq_gfunc_with, 99);
+ RUN_SUCCESS(exception_assert_range, 1);
+ RUN_SUCCESS(exception_assert_range_with, 1);
+ RUN_SUCCESS(exception_bad_assert_range, 0);
+ RUN_SUCCESS(exception_bad_assert_range_with, 10);
+
+#define RUN_EXT(load_ret, attach_err, expr, msg, after_link) \
+ { \
+ LIBBPF_OPTS(bpf_object_open_opts, o, .kernel_log_buf = log_buf, \
+ .kernel_log_size = sizeof(log_buf), \
+ .kernel_log_level = 2); \
+ exceptions_ext__destroy(eskel); \
+ eskel = exceptions_ext__open_opts(&o); \
+ struct bpf_program *prog = NULL; \
+ struct bpf_link *link = NULL; \
+ if (!ASSERT_OK_PTR(eskel, "exceptions_ext__open")) \
+ goto done; \
+ (expr); \
+ ASSERT_OK_PTR(bpf_program__name(prog), bpf_program__name(prog)); \
+ if (!ASSERT_EQ(exceptions_ext__load(eskel), load_ret, \
+ "exceptions_ext__load")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ if (load_ret != 0) { \
+ if (!ASSERT_OK_PTR(strstr(log_buf, msg), "strstr")) { \
+ printf("%s\n", log_buf); \
+ goto done; \
+ } \
+ } \
+ if (!load_ret && attach_err) { \
+ if (!ASSERT_ERR_PTR(link = bpf_program__attach(prog), "attach err")) \
+ goto done; \
+ } else if (!load_ret) { \
+ if (!ASSERT_OK_PTR(link = bpf_program__attach(prog), "attach ok")) \
+ goto done; \
+ (void)(after_link); \
+ bpf_link__destroy(link); \
+ } \
+ }
+
+ if (test__start_subtest("non-throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fentry -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("non-throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing fexit -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "FENTRY/FEXIT programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension (with custom cb) -> exception_cb"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod"), "set_attach_target"))
+ goto done;
+ }), "Extension programs cannot attach to exception callback", 0);
+
+ if (test__start_subtest("throwing extension -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_exception_cb_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext_mod_cb_runtime),
+ "exception_cb_mod_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext_mod_cb_runtime, 131); }));
+
+ if (test__start_subtest("throwing extension (with custom cb) -> global func in exception_cb"))
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_ext),
+ "exception_ext_global"), "set_attach_target"))
+ goto done;
+ }), "", ({ RUN_SUCCESS(exception_ext, 128); }));
+
+ if (test__start_subtest("non-throwing fentry -> non-throwing subprog"))
+ /* non-throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> non-throwing subprog"))
+ /* throwing fentry -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fentry -> throwing subprog"))
+ /* non-throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fentry -> throwing subprog"))
+ /* throwing fentry -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fentry;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> non-throwing subprog"))
+ /* non-throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> non-throwing subprog"))
+ /* throwing fexit -> non-throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing fexit -> throwing subprog"))
+ /* non-throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.pfexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing fexit -> throwing subprog"))
+ /* throwing fexit -> throwing subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_fexit;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ /* fmod_ret not allowed for subprog - Check so we remember to handle its
+ * throwing specification compatibility with target when supported.
+ */
+ if (test__start_subtest("non-throwing fmod_ret -> non-throwing global subprog"))
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.pfmod_ret;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "can't modify return codes of BPF program", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "subprog"), "set_attach_target"))
+ goto done;
+ }), "subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing subprog"))
+ /* non-throwing extension -> throwing subprog : BAD (!global) */
+ RUN_EXT(-EINVAL, true, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_subprog"), "set_attach_target"))
+ goto done;
+ }), "throwing_subprog() is not a global function", 0);
+
+ if (test__start_subtest("non-throwing extension -> non-throwing subprog"))
+ /* non-throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> throwing global subprog"))
+ /* non-throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> throwing global subprog"))
+ /* throwing extension -> throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "throwing_global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> non-throwing global subprog"))
+ /* throwing extension -> non-throwing global subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "global_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("non-throwing extension -> main subprog"))
+ /* non-throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+ if (test__start_subtest("throwing extension -> main subprog"))
+ /* throwing extension -> main subprog : OK */
+ RUN_EXT(0, false, ({
+ prog = eskel->progs.throwing_extension;
+ bpf_program__set_autoload(prog, true);
+ if (!ASSERT_OK(bpf_program__set_attach_target(prog,
+ bpf_program__fd(skel->progs.exception_throw_subprog),
+ "exception_throw_subprog"), "set_attach_target"))
+ goto done;
+ }), "", 0);
+
+done:
+ exceptions_ext__destroy(eskel);
+ exceptions__destroy(skel);
+}
+
+static void test_exceptions_assertions(void)
+{
+ RUN_TESTS(exceptions_assert);
+}
+
+void test_exceptions(void)
+{
+ test_exceptions_success();
+ test_exceptions_failure();
+ test_exceptions_assertions();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/exhandler.c b/tools/testing/selftests/bpf/prog_tests/exhandler.c
new file mode 100644
index 000000000000..118bb182ee20
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/exhandler.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+/* Test that verifies exception handling is working. fork()
+ * triggers task_newtask tracepoint; that new task will have a
+ * NULL pointer task_works, and the associated task->task_works->func
+ * should not be NULL if task_works itself is non-NULL.
+ *
+ * So to verify exception handling we want to see a NULL task_works
+ * and task_works->func; if we see this we can conclude that the
+ * exception handler ran when we attempted to dereference task->task_works
+ * and zeroed the destination register.
+ */
+#include "exhandler_kern.skel.h"
+
+void test_exhandler(void)
+{
+ int err = 0, duration = 0, status;
+ struct exhandler_kern *skel;
+ pid_t cpid;
+
+ skel = exhandler_kern__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton failed: %d\n", err))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ err = exhandler_kern__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto cleanup;
+ cpid = fork();
+ if (!ASSERT_GT(cpid, -1, "fork failed"))
+ goto cleanup;
+ if (cpid == 0)
+ _exit(0);
+ waitpid(cpid, &status, 0);
+
+ ASSERT_NEQ(skel->bss->exception_triggered, 0, "verify exceptions occurred");
+cleanup:
+ exhandler_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
index 109d0345a2be..130f5b82d2e6 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c
@@ -1,49 +1,45 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
-#include "fexit_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fexit_test.lskel.h"
void test_fentry_fexit(void)
{
- struct fentry_test *fentry_skel = NULL;
- struct fexit_test *fexit_skel = NULL;
+ struct fentry_test_lskel *fentry_skel = NULL;
+ struct fexit_test_lskel *fexit_skel = NULL;
__u64 *fentry_res, *fexit_res;
- __u32 duration = 0, retval;
int err, prog_fd, i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
+ fentry_skel = fentry_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
goto close_prog;
- fexit_skel = fexit_test__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ fexit_skel = fexit_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
goto close_prog;
- err = fentry_test__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
+ err = fentry_test_lskel__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_attach"))
goto close_prog;
- err = fexit_test__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ err = fexit_test_lskel__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_attach"))
goto close_prog;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test retval");
fentry_res = (__u64 *)fentry_skel->bss;
fexit_res = (__u64 *)fexit_skel->bss;
printf("%lld\n", fentry_skel->bss->test1_result);
for (i = 0; i < 8; i++) {
- CHECK(fentry_res[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]);
- CHECK(fexit_res[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]);
+ ASSERT_EQ(fentry_res[i], 1, "fentry result");
+ ASSERT_EQ(fexit_res[i], 1, "fexit result");
}
close_prog:
- fentry_test__destroy(fentry_skel);
- fexit_test__destroy(fexit_skel);
+ fentry_test_lskel__destroy(fentry_skel);
+ fexit_test_lskel__destroy(fexit_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
index 04ebbf1cb390..aee1bc77a17f 100644
--- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c
@@ -1,37 +1,93 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fentry_test.skel.h"
+#include "fentry_test.lskel.h"
+#include "fentry_many_args.skel.h"
-void test_fentry_test(void)
+static int fentry_test_common(struct fentry_test_lskel *fentry_skel)
{
- struct fentry_test *fentry_skel = NULL;
int err, prog_fd, i;
- __u32 duration = 0, retval;
+ int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fentry_skel = fentry_test__open_and_load();
- if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n"))
- goto cleanup;
+ err = fentry_test_lskel__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_attach"))
+ return err;
- err = fentry_test__attach(fentry_skel);
- if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err))
- goto cleanup;
+ /* Check that already linked program can't be attached again. */
+ link_fd = fentry_test_lskel__test1__attach(fentry_skel);
+ if (!ASSERT_LT(link_fd, 0, "fentry_attach_link"))
+ return -1;
- prog_fd = bpf_program__fd(fentry_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fentry_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fentry_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fentry_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fentry_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fentry_result"))
+ return -1;
}
+ fentry_test_lskel__detach(fentry_skel);
+
+ /* zero results for re-attach test */
+ memset(fentry_skel->bss, 0, sizeof(*fentry_skel->bss));
+ return 0;
+}
+
+static void fentry_test(void)
+{
+ struct fentry_test_lskel *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_skel_load"))
+ goto cleanup;
+
+ err = fentry_test_common(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_first_attach"))
+ goto cleanup;
+
+ err = fentry_test_common(fentry_skel);
+ ASSERT_OK(err, "fentry_second_attach");
+
+cleanup:
+ fentry_test_lskel__destroy(fentry_skel);
+}
+
+static void fentry_many_args(void)
+{
+ struct fentry_many_args *fentry_skel = NULL;
+ int err;
+
+ fentry_skel = fentry_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fentry_skel, "fentry_many_args_skel_load"))
+ goto cleanup;
+
+ err = fentry_many_args__attach(fentry_skel);
+ if (!ASSERT_OK(err, "fentry_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fentry_skel->bss->test1_result, 1,
+ "fentry_many_args_result1");
+ ASSERT_EQ(fentry_skel->bss->test2_result, 1,
+ "fentry_many_args_result2");
+ ASSERT_EQ(fentry_skel->bss->test3_result, 1,
+ "fentry_many_args_result3");
+
cleanup:
- fentry_test__destroy(fentry_skel);
+ fentry_many_args__destroy(fentry_skel);
+}
+
+void test_fentry_test(void)
+{
+ if (test__start_subtest("fentry"))
+ fentry_test();
+ if (test__start_subtest("fentry_many_args"))
+ fentry_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index a895bfed55db..f29fc789c14b 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -2,90 +2,173 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
#include <network_helpers.h>
+#include <bpf/btf.h>
+#include "bind4_prog.skel.h"
+#include "freplace_progmap.skel.h"
+#include "xdp_dummy.skel.h"
+
+typedef int (*test_cb)(struct bpf_object *obj);
+
+static int check_data_map(struct bpf_object *obj, int prog_cnt, bool reset)
+{
+ struct bpf_map *data_map = NULL, *map;
+ __u64 *result = NULL;
+ const int zero = 0;
+ __u32 duration = 0;
+ int ret = -1, i;
+
+ result = malloc((prog_cnt + 32 /* spare */) * sizeof(__u64));
+ if (CHECK(!result, "alloc_memory", "failed to alloc memory"))
+ return -ENOMEM;
+
+ bpf_object__for_each_map(map, obj)
+ if (bpf_map__is_internal(map)) {
+ data_map = map;
+ break;
+ }
+ if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ goto out;
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
+ if (CHECK(ret, "get_result",
+ "failed to get output data: %d\n", ret))
+ goto out;
+
+ for (i = 0; i < prog_cnt; i++) {
+ if (CHECK(result[i] != 1, "result",
+ "fexit_bpf2bpf result[%d] failed err %llu\n",
+ i, result[i]))
+ goto out;
+ result[i] = 0;
+ }
+ if (reset) {
+ ret = bpf_map_update_elem(bpf_map__fd(data_map), &zero, result, 0);
+ if (CHECK(ret, "reset_result", "failed to reset result\n"))
+ goto out;
+ }
+
+ ret = 0;
+out:
+ free(result);
+ return ret;
+}
static void test_fexit_bpf2bpf_common(const char *obj_file,
const char *target_obj_file,
int prog_cnt,
const char **prog_name,
- bool run_prog)
+ bool run_prog,
+ test_cb cb)
{
- struct bpf_object *obj = NULL, *pkt_obj;
- int err, pkt_fd, i;
+ struct bpf_object *obj = NULL, *tgt_obj;
+ __u32 tgt_prog_id, info_len;
+ struct bpf_prog_info prog_info = {};
+ struct bpf_program **prog = NULL, *p;
struct bpf_link **link = NULL;
- struct bpf_program **prog = NULL;
- __u32 duration = 0, retval;
- struct bpf_map *data_map;
- const int zero = 0;
- u64 *result = NULL;
+ int err, tgt_fd, i;
+ struct btf *btf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
- err = bpf_prog_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
- &pkt_obj, &pkt_fd);
- if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
- target_obj_file, err, errno))
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &tgt_obj, &tgt_fd);
+ if (!ASSERT_OK(err, "tgt_prog_load"))
return;
- DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
- .attach_prog_fd = pkt_fd,
- );
+
+ info_len = sizeof(prog_info);
+ err = bpf_prog_get_info_by_fd(tgt_fd, &prog_info, &info_len);
+ if (!ASSERT_OK(err, "tgt_fd_get_info"))
+ goto close_prog;
+
+ tgt_prog_id = prog_info.id;
+ btf = bpf_object__btf(tgt_obj);
link = calloc(sizeof(struct bpf_link *), prog_cnt);
+ if (!ASSERT_OK_PTR(link, "link_ptr"))
+ goto close_prog;
+
prog = calloc(sizeof(struct bpf_program *), prog_cnt);
- result = malloc((prog_cnt + 32 /* spare */) * sizeof(u64));
- if (CHECK(!link || !prog || !result, "alloc_memory",
- "failed to alloc memory"))
+ if (!ASSERT_OK_PTR(prog, "prog_ptr"))
goto close_prog;
- obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR_OR_NULL(obj), "obj_open",
- "failed to open %s: %ld\n", obj_file,
- PTR_ERR(obj)))
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
goto close_prog;
+ bpf_object__for_each_program(p, obj) {
+ err = bpf_program__set_attach_target(p, tgt_fd, NULL);
+ ASSERT_OK(err, "set_attach_target");
+ }
+
err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ if (!ASSERT_OK(err, "obj_load"))
goto close_prog;
for (i = 0; i < prog_cnt; i++) {
- prog[i] = bpf_object__find_program_by_title(obj, prog_name[i]);
- if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name[i]))
+ struct bpf_link_info link_info;
+ struct bpf_program *pos;
+ const char *pos_sec_name;
+ char *tgt_name;
+ __s32 btf_id;
+
+ tgt_name = strstr(prog_name[i], "/");
+ if (!ASSERT_OK_PTR(tgt_name, "tgt_name"))
goto close_prog;
+ btf_id = btf__find_by_name_kind(btf, tgt_name + 1, BTF_KIND_FUNC);
+
+ prog[i] = NULL;
+ bpf_object__for_each_program(pos, obj) {
+ pos_sec_name = bpf_program__section_name(pos);
+ if (pos_sec_name && !strcmp(pos_sec_name, prog_name[i])) {
+ prog[i] = pos;
+ break;
+ }
+ }
+ if (!ASSERT_OK_PTR(prog[i], prog_name[i]))
+ goto close_prog;
+
link[i] = bpf_program__attach_trace(prog[i]);
- if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n"))
+ if (!ASSERT_OK_PTR(link[i], "attach_trace"))
goto close_prog;
+
+ info_len = sizeof(link_info);
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link[i]),
+ &link_info, &info_len);
+ ASSERT_OK(err, "link_fd_get_info");
+ ASSERT_EQ(link_info.tracing.attach_type,
+ bpf_program__expected_attach_type(prog[i]),
+ "link_attach_type");
+ ASSERT_EQ(link_info.tracing.target_obj_id, tgt_prog_id, "link_tgt_obj_id");
+ ASSERT_EQ(link_info.tracing.target_btf_id, btf_id, "link_tgt_btf_id");
}
- if (!run_prog)
- goto close_prog;
+ if (cb) {
+ err = cb(obj);
+ if (err)
+ goto close_prog;
+ }
- data_map = bpf_object__find_map_by_name(obj, "fexit_bp.bss");
- if (CHECK(!data_map, "find_data_map", "data map not found\n"))
+ if (!run_prog)
goto close_prog;
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
+ ASSERT_OK(err, "prog_run");
+ ASSERT_EQ(topts.retval, 0, "prog_run_ret");
- err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, result);
- if (CHECK(err, "get_result",
- "failed to get output data: %d\n", err))
+ if (check_data_map(obj, prog_cnt, false))
goto close_prog;
- for (i = 0; i < prog_cnt; i++)
- if (CHECK(result[i] != 1, "result", "fexit_bpf2bpf failed err %ld\n",
- result[i]))
- goto close_prog;
-
close_prog:
for (i = 0; i < prog_cnt; i++)
- if (!IS_ERR_OR_NULL(link[i]))
- bpf_link__destroy(link[i]);
- if (!IS_ERR_OR_NULL(obj))
- bpf_object__close(obj);
- bpf_object__close(pkt_obj);
+ bpf_link__destroy(link[i]);
+ bpf_object__close(obj);
+ bpf_object__close(tgt_obj);
free(link);
free(prog);
- free(result);
}
static void test_target_no_callees(void)
@@ -93,10 +176,10 @@ static void test_target_no_callees(void)
const char *prog_name[] = {
"fexit/test_pkt_md_access",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.o",
- "./test_pkt_md_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf_simple.bpf.o",
+ "./test_pkt_md_access.bpf.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_target_yes_callees(void)
@@ -107,10 +190,10 @@ static void test_target_yes_callees(void)
"fexit/test_pkt_access_subprog2",
"fexit/test_pkt_access_subprog3",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
- "./test_pkt_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace(void)
@@ -123,11 +206,12 @@ static void test_func_replace(void)
"freplace/get_skb_len",
"freplace/get_skb_ifindex",
"freplace/get_constant",
+ "freplace/test_pkt_write_access_subprog",
};
- test_fexit_bpf2bpf_common("./fexit_bpf2bpf.o",
- "./test_pkt_access.o",
+ test_fexit_bpf2bpf_common("./fexit_bpf2bpf.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
- prog_name, true);
+ prog_name, true, NULL);
}
static void test_func_replace_verify(void)
@@ -135,16 +219,382 @@ static void test_func_replace_verify(void)
const char *prog_name[] = {
"freplace/do_bind",
};
- test_fexit_bpf2bpf_common("./freplace_connect4.o",
- "./connect4_prog.o",
+ test_fexit_bpf2bpf_common("./freplace_connect4.bpf.o",
+ "./connect4_prog.bpf.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
+static int test_second_attach(struct bpf_object *obj)
+{
+ const char *prog_name = "security_new_get_constant";
+ const char *tgt_name = "get_constant";
+ const char *tgt_obj_file = "./test_pkt_access.bpf.o";
+ struct bpf_program *prog = NULL;
+ struct bpf_object *tgt_obj;
+ struct bpf_link *link;
+ int err = 0, tgt_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .repeat = 1,
+ );
+
+ prog = bpf_object__find_program_by_name(obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "find_prog"))
+ return -ENOENT;
+
+ err = bpf_prog_test_load(tgt_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &tgt_obj, &tgt_fd);
+ if (!ASSERT_OK(err, "second_prog_load"))
+ return err;
+
+ link = bpf_program__attach_freplace(prog, tgt_fd, tgt_name);
+ if (!ASSERT_OK_PTR(link, "second_link"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(tgt_fd, &topts);
+ if (!ASSERT_OK(err, "ipv6 test_run"))
+ goto out;
+ if (!ASSERT_OK(topts.retval, "ipv6 retval"))
+ goto out;
+
+ err = check_data_map(obj, 1, true);
+ if (err)
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_object__close(tgt_obj);
+ return err;
+}
+
+static void test_func_replace_multi(void)
+{
+ const char *prog_name[] = {
+ "freplace/get_constant",
+ };
+ test_fexit_bpf2bpf_common("./freplace_get_constant.bpf.o",
+ "./test_pkt_access.bpf.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, true, test_second_attach);
+}
+
+static void test_fmod_ret_freplace(void)
+{
+ struct bpf_object *freplace_obj = NULL, *pkt_obj, *fmod_obj = NULL;
+ const char *freplace_name = "./freplace_get_constant.bpf.o";
+ const char *fmod_ret_name = "./fmod_ret_freplace.bpf.o";
+ DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const char *tgt_name = "./test_pkt_access.bpf.o";
+ struct bpf_link *freplace_link = NULL;
+ struct bpf_program *prog;
+ __u32 duration = 0;
+ int err, pkt_fd, attach_prog_fd;
+
+ err = bpf_prog_test_load(tgt_name, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ tgt_name, err, errno))
+ return;
+
+ freplace_obj = bpf_object__open_file(freplace_name, NULL);
+ if (!ASSERT_OK_PTR(freplace_obj, "freplace_obj_open"))
+ goto out;
+
+ prog = bpf_object__next_program(freplace_obj, NULL);
+ err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+ ASSERT_OK(err, "freplace__set_attach_target");
+
+ err = bpf_object__load(freplace_obj);
+ if (CHECK(err, "freplace_obj_load", "err %d\n", err))
+ goto out;
+
+ freplace_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(freplace_link, "freplace_attach_trace"))
+ goto out;
+
+ fmod_obj = bpf_object__open_file(fmod_ret_name, NULL);
+ if (!ASSERT_OK_PTR(fmod_obj, "fmod_obj_open"))
+ goto out;
+
+ attach_prog_fd = bpf_program__fd(prog);
+ prog = bpf_object__next_program(fmod_obj, NULL);
+ err = bpf_program__set_attach_target(prog, attach_prog_fd, NULL);
+ ASSERT_OK(err, "fmod_ret_set_attach_target");
+
+ err = bpf_object__load(fmod_obj);
+ if (CHECK(!err, "fmod_obj_load", "loading fmod_ret should fail\n"))
+ goto out;
+
+out:
+ bpf_link__destroy(freplace_link);
+ bpf_object__close(freplace_obj);
+ bpf_object__close(fmod_obj);
+ bpf_object__close(pkt_obj);
+}
+
+
+static void test_func_sockmap_update(void)
+{
+ const char *prog_name[] = {
+ "freplace/cls_redirect",
+ };
+ test_fexit_bpf2bpf_common("./freplace_cls_redirect.bpf.o",
+ "./test_cls_redirect.bpf.o",
+ ARRAY_SIZE(prog_name),
+ prog_name, false, NULL);
+}
+
+static void test_obj_load_failure_common(const char *obj_file,
+ const char *target_obj_file,
+ const char *exp_msg)
+{
+ /*
+ * standalone test that asserts failure to load freplace prog
+ * because of invalid return code.
+ */
+ struct bpf_object *obj = NULL, *pkt_obj;
+ struct bpf_program *prog;
+ char log_buf[64 * 1024];
+ int err, pkt_fd;
+ __u32 duration = 0;
+
+ err = bpf_prog_test_load(target_obj_file, BPF_PROG_TYPE_UNSPEC,
+ &pkt_obj, &pkt_fd);
+ /* the target prog should load fine */
+ if (CHECK(err, "tgt_prog_load", "file %s err %d errno %d\n",
+ target_obj_file, err, errno))
+ return;
+
+ obj = bpf_object__open_file(obj_file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
+ goto close_prog;
+
+ prog = bpf_object__next_program(obj, NULL);
+ err = bpf_program__set_attach_target(prog, pkt_fd, NULL);
+ ASSERT_OK(err, "set_attach_target");
+
+ log_buf[0] = '\0';
+ if (exp_msg)
+ bpf_program__set_log_buf(prog, log_buf, sizeof(log_buf));
+ if (env.verbosity > VERBOSE_NONE)
+ bpf_program__set_log_level(prog, 2);
+
+ /* It should fail to load the program */
+ err = bpf_object__load(obj);
+ if (env.verbosity > VERBOSE_NONE && exp_msg) /* we overtook log */
+ printf("VERIFIER LOG:\n================\n%s\n================\n", log_buf);
+ if (CHECK(!err, "bpf_obj_load should fail", "err %d\n", err))
+ goto close_prog;
+
+ if (exp_msg)
+ ASSERT_HAS_SUBSTR(log_buf, exp_msg, "fail_msg");
+close_prog:
+ bpf_object__close(obj);
+ bpf_object__close(pkt_obj);
+}
+
+static void test_func_replace_return_code(void)
+{
+ /* test invalid return code in the replaced program */
+ test_obj_load_failure_common("./freplace_connect_v4_prog.bpf.o",
+ "./connect4_prog.bpf.o", NULL);
+}
+
+static void test_func_map_prog_compatibility(void)
+{
+ /* test with spin lock map value in the replaced program */
+ test_obj_load_failure_common("./freplace_attach_probe.bpf.o",
+ "./test_attach_probe.bpf.o", NULL);
+}
+
+static void test_func_replace_unreliable(void)
+{
+ /* freplace'ing unreliable main prog should fail with error
+ * "Cannot replace static functions"
+ */
+ test_obj_load_failure_common("freplace_unreliable_prog.bpf.o",
+ "./verifier_btf_unreliable_prog.bpf.o",
+ "Cannot replace static functions");
+}
+
+static void test_func_replace_global_func(void)
+{
+ const char *prog_name[] = {
+ "freplace/test_pkt_access",
+ };
+
+ test_fexit_bpf2bpf_common("./freplace_global_func.bpf.o",
+ "./test_pkt_access.bpf.o",
ARRAY_SIZE(prog_name),
- prog_name, false);
+ prog_name, false, NULL);
+}
+
+static int find_prog_btf_id(const char *name, __u32 attach_prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ struct btf *btf;
+ int ret;
+
+ ret = bpf_prog_get_info_by_fd(attach_prog_fd, &info, &info_len);
+ if (ret)
+ return ret;
+
+ if (!info.btf_id)
+ return -EINVAL;
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ ret = libbpf_get_error(btf);
+ if (ret)
+ return ret;
+
+ ret = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
+ btf__free(btf);
+ return ret;
+}
+
+static int load_fentry(int attach_prog_fd, int attach_btf_id)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .attach_prog_fd = attach_prog_fd,
+ .attach_btf_id = attach_btf_id,
+ );
+ struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ return bpf_prog_load(BPF_PROG_TYPE_TRACING,
+ "bind4_fentry",
+ "GPL",
+ insns,
+ ARRAY_SIZE(insns),
+ &opts);
+}
+
+static void test_fentry_to_cgroup_bpf(void)
+{
+ struct bind4_prog *skel = NULL;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int cgroup_fd = -1;
+ int fentry_fd = -1;
+ int btf_id;
+
+ cgroup_fd = test__join_cgroup("/fentry_to_cgroup_bpf");
+ if (!ASSERT_GE(cgroup_fd, 0, "cgroup_fd"))
+ return;
+
+ skel = bind4_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto cleanup;
+
+ skel->links.bind_v4_prog = bpf_program__attach_cgroup(skel->progs.bind_v4_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.bind_v4_prog, "bpf_program__attach_cgroup"))
+ goto cleanup;
+
+ btf_id = find_prog_btf_id("bind_v4_prog", bpf_program__fd(skel->progs.bind_v4_prog));
+ if (!ASSERT_GE(btf_id, 0, "find_prog_btf_id"))
+ goto cleanup;
+
+ fentry_fd = load_fentry(bpf_program__fd(skel->progs.bind_v4_prog), btf_id);
+ if (!ASSERT_GE(fentry_fd, 0, "load_fentry"))
+ goto cleanup;
+
+ /* Make sure bpf_prog_get_info_by_fd works correctly when attaching
+ * to another BPF program.
+ */
+
+ ASSERT_OK(bpf_prog_get_info_by_fd(fentry_fd, &info, &info_len),
+ "bpf_prog_get_info_by_fd");
+
+ ASSERT_EQ(info.btf_id, 0, "info.btf_id");
+ ASSERT_EQ(info.attach_btf_id, btf_id, "info.attach_btf_id");
+ ASSERT_GT(info.attach_btf_obj_id, 0, "info.attach_btf_obj_id");
+
+cleanup:
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ if (fentry_fd >= 0)
+ close(fentry_fd);
+ bind4_prog__destroy(skel);
+}
+
+static void test_func_replace_progmap(void)
+{
+ struct bpf_cpumap_val value = { .qsize = 1 };
+ struct freplace_progmap *skel = NULL;
+ struct xdp_dummy *tgt_skel = NULL;
+ __u32 key = 0;
+ int err;
+
+ skel = freplace_progmap__open();
+ if (!ASSERT_OK_PTR(skel, "prog_open"))
+ return;
+
+ tgt_skel = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(tgt_skel, "tgt_prog_load"))
+ goto out;
+
+ err = bpf_program__set_attach_target(skel->progs.xdp_cpumap_prog,
+ bpf_program__fd(tgt_skel->progs.xdp_dummy_prog),
+ "xdp_dummy_prog");
+ if (!ASSERT_OK(err, "set_attach_target"))
+ goto out;
+
+ err = freplace_progmap__load(skel);
+ if (!ASSERT_OK(err, "obj_load"))
+ goto out;
+
+ /* Prior to fixing the kernel, loading the PROG_TYPE_EXT 'redirect'
+ * program above will cause the map owner type of 'cpumap' to be set to
+ * PROG_TYPE_EXT. This in turn will cause the bpf_map_update_elem()
+ * below to fail, because the program we are inserting into the map is
+ * of PROG_TYPE_XDP. After fixing the kernel, the initial ownership will
+ * be correctly resolved to the *target* of the PROG_TYPE_EXT program
+ * (i.e., PROG_TYPE_XDP) and the map update will succeed.
+ */
+ value.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_drop_prog);
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.cpu_map),
+ &key, &value, 0);
+ ASSERT_OK(err, "map_update");
+
+out:
+ xdp_dummy__destroy(tgt_skel);
+ freplace_progmap__destroy(skel);
}
-void test_fexit_bpf2bpf(void)
+/* NOTE: affect other tests, must run in serial mode */
+void serial_test_fexit_bpf2bpf(void)
{
- test_target_no_callees();
- test_target_yes_callees();
- test_func_replace();
- test_func_replace_verify();
+ if (test__start_subtest("target_no_callees"))
+ test_target_no_callees();
+ if (test__start_subtest("target_yes_callees"))
+ test_target_yes_callees();
+ if (test__start_subtest("func_replace"))
+ test_func_replace();
+ if (test__start_subtest("func_replace_verify"))
+ test_func_replace_verify();
+ if (test__start_subtest("func_sockmap_update"))
+ test_func_sockmap_update();
+ if (test__start_subtest("func_replace_return_code"))
+ test_func_replace_return_code();
+ if (test__start_subtest("func_map_prog_compatibility"))
+ test_func_map_prog_compatibility();
+ if (test__start_subtest("func_replace_unreliable"))
+ test_func_replace_unreliable();
+ if (test__start_subtest("func_replace_multi"))
+ test_func_replace_multi();
+ if (test__start_subtest("fmod_ret_freplace"))
+ test_fmod_ret_freplace();
+ if (test__start_subtest("func_replace_global_func"))
+ test_func_replace_global_func();
+ if (test__start_subtest("fentry_to_cgroup_bpf"))
+ test_fentry_to_cgroup_bpf();
+ if (test__start_subtest("func_replace_progmap"))
+ test_func_replace_progmap();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
new file mode 100644
index 000000000000..f949647dbbc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include "fexit_sleep.lskel.h"
+
+static int do_sleep(void *skel)
+{
+ struct fexit_sleep_lskel *fexit_skel = skel;
+ struct timespec ts1 = { .tv_nsec = 1 };
+ struct timespec ts2 = { .tv_sec = 10 };
+
+ fexit_skel->bss->pid = getpid();
+ (void)syscall(__NR_nanosleep, &ts1, NULL);
+ (void)syscall(__NR_nanosleep, &ts2, NULL);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+void test_fexit_sleep(void)
+{
+ struct fexit_sleep_lskel *fexit_skel = NULL;
+ int wstatus, duration = 0;
+ pid_t cpid;
+ int err, fexit_cnt;
+
+ fexit_skel = fexit_sleep_lskel__open_and_load();
+ if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+ goto cleanup;
+
+ err = fexit_sleep_lskel__attach(fexit_skel);
+ if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+ goto cleanup;
+
+ cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ goto cleanup;
+
+ /* wait until first sys_nanosleep ends and second sys_nanosleep starts */
+ while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2);
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+ /* close progs and detach them. That will trigger two nop5->jmp5 rewrites
+ * in the trampolines to skip nanosleep_fexit prog.
+ * The nanosleep_fentry prog will get detached first.
+ * The nanosleep_fexit prog will get detached second.
+ * Detaching will trigger freeing of both progs JITed images.
+ * There will be two dying bpf_tramp_image-s, but only the initial
+ * bpf_tramp_image (with both _fentry and _fexit progs will be stuck
+ * waiting for percpu_ref_kill to confirm). The other one
+ * will be freed quickly.
+ */
+ close(fexit_skel->progs.nanosleep_fentry.prog_fd);
+ close(fexit_skel->progs.nanosleep_fexit.prog_fd);
+ fexit_sleep_lskel__detach(fexit_skel);
+
+ /* kill the thread to unwind sys_nanosleep stack through the trampoline */
+ kill(cpid, 9);
+
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ goto cleanup;
+ if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
+ goto cleanup;
+
+ /* The bypassed nanosleep_fexit prog shouldn't have executed.
+ * Unlike progs the maps were not freed and directly accessible.
+ */
+ fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+ if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+ goto cleanup;
+
+cleanup:
+ fexit_sleep_lskel__destroy(fexit_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
index 3b9dbf7433f0..596536def43d 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_stress.c
@@ -2,75 +2,58 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-/* x86-64 fits 55 JITed and 43 interpreted progs into half page */
-#define CNT 40
-
-void test_fexit_stress(void)
+void serial_test_fexit_stress(void)
{
- char test_skb[128] = {};
- int fexit_fd[CNT] = {};
- int link_fd[CNT] = {};
- __u32 duration = 0;
- char error[4096];
- __u32 prog_ret;
- int err, i, filter_fd;
+ int bpf_max_tramp_links, err, i;
+ int *fd, *fexit_fd, *link_fd;
+
+ bpf_max_tramp_links = get_bpf_max_tramp_links();
+ if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+ return;
+ fd = calloc(bpf_max_tramp_links * 2, sizeof(*fd));
+ if (!ASSERT_OK_PTR(fd, "fd"))
+ return;
+ fexit_fd = fd;
+ link_fd = fd + bpf_max_tramp_links;
const struct bpf_insn trace_program[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_TRACING,
- .license = "GPL",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.expected_attach_type = BPF_TRACE_FEXIT,
- };
+ );
- const struct bpf_insn skb_program[] = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- };
-
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
err = libbpf_find_vmlinux_btf_id("bpf_fentry_test1",
- load_attr.expected_attach_type);
- if (CHECK(err <= 0, "find_vmlinux_btf_id", "failed: %d\n", err))
+ trace_opts.expected_attach_type);
+ if (!ASSERT_GT(err, 0, "find_vmlinux_btf_id"))
goto out;
- load_attr.attach_btf_id = err;
-
- for (i = 0; i < CNT; i++) {
- fexit_fd[i] = bpf_load_program_xattr(&load_attr, error, sizeof(error));
- if (CHECK(fexit_fd[i] < 0, "fexit loaded",
- "failed: %d errno %d\n", fexit_fd[i], errno))
+ trace_opts.attach_btf_id = err;
+
+ for (i = 0; i < bpf_max_tramp_links; i++) {
+ fexit_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ sizeof(trace_program) / sizeof(struct bpf_insn),
+ &trace_opts);
+ if (!ASSERT_GE(fexit_fd[i], 0, "fexit load"))
goto out;
- link_fd[i] = bpf_raw_tracepoint_open(NULL, fexit_fd[i]);
- if (CHECK(link_fd[i] < 0, "fexit attach failed",
- "prog %d failed: %d err %d\n", i, link_fd[i], errno))
+ link_fd[i] = bpf_link_create(fexit_fd[i], 0, BPF_TRACE_FEXIT, NULL);
+ if (!ASSERT_GE(link_fd[i], 0, "fexit attach"))
goto out;
}
- filter_fd = bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
- if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
- filter_fd, errno))
- goto out;
+ err = bpf_prog_test_run_opts(fexit_fd[0], &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
- close(filter_fd);
- CHECK_FAIL(err);
out:
- for (i = 0; i < CNT; i++) {
+ for (i = 0; i < bpf_max_tramp_links; i++) {
if (link_fd[i])
close(link_fd[i]);
if (fexit_fd[i])
close(fexit_fd[i]);
}
+ free(fd);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_test.c b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
index 78d7a2765c27..1c13007e37dd 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_test.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_test.c
@@ -1,37 +1,93 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
-#include "fexit_test.skel.h"
+#include "fexit_test.lskel.h"
+#include "fexit_many_args.skel.h"
-void test_fexit_test(void)
+static int fexit_test_common(struct fexit_test_lskel *fexit_skel)
{
- struct fexit_test *fexit_skel = NULL;
int err, prog_fd, i;
- __u32 duration = 0, retval;
+ int link_fd;
__u64 *result;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
- fexit_skel = fexit_test__open_and_load();
- if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
- goto cleanup;
+ err = fexit_test_lskel__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_attach"))
+ return err;
- err = fexit_test__attach(fexit_skel);
- if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
- goto cleanup;
+ /* Check that already linked program can't be attached again. */
+ link_fd = fexit_test_lskel__test1__attach(fexit_skel);
+ if (!ASSERT_LT(link_fd, 0, "fexit_attach_link"))
+ return -1;
- prog_fd = bpf_program__fd(fexit_skel->progs.test1);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0,
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "test_run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ prog_fd = fexit_skel->progs.test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
result = (__u64 *)fexit_skel->bss;
- for (i = 0; i < 6; i++) {
- if (CHECK(result[i] != 1, "result",
- "fexit_test%d failed err %lld\n", i + 1, result[i]))
- goto cleanup;
+ for (i = 0; i < sizeof(*fexit_skel->bss) / sizeof(__u64); i++) {
+ if (!ASSERT_EQ(result[i], 1, "fexit_result"))
+ return -1;
}
+ fexit_test_lskel__detach(fexit_skel);
+
+ /* zero results for re-attach test */
+ memset(fexit_skel->bss, 0, sizeof(*fexit_skel->bss));
+ return 0;
+}
+
+static void fexit_test(void)
+{
+ struct fexit_test_lskel *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_skel_load"))
+ goto cleanup;
+
+ err = fexit_test_common(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_first_attach"))
+ goto cleanup;
+
+ err = fexit_test_common(fexit_skel);
+ ASSERT_OK(err, "fexit_second_attach");
+
+cleanup:
+ fexit_test_lskel__destroy(fexit_skel);
+}
+
+static void fexit_many_args(void)
+{
+ struct fexit_many_args *fexit_skel = NULL;
+ int err;
+
+ fexit_skel = fexit_many_args__open_and_load();
+ if (!ASSERT_OK_PTR(fexit_skel, "fexit_many_args_skel_load"))
+ goto cleanup;
+
+ err = fexit_many_args__attach(fexit_skel);
+ if (!ASSERT_OK(err, "fexit_many_args_attach"))
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+
+ ASSERT_EQ(fexit_skel->bss->test1_result, 1,
+ "fexit_many_args_result1");
+ ASSERT_EQ(fexit_skel->bss->test2_result, 1,
+ "fexit_many_args_result2");
+ ASSERT_EQ(fexit_skel->bss->test3_result, 1,
+ "fexit_many_args_result3");
+
cleanup:
- fexit_test__destroy(fexit_skel);
+ fexit_many_args__destroy(fexit_skel);
+}
+
+void test_fexit_test(void)
+{
+ if (test__start_subtest("fexit"))
+ fexit_test();
+ if (test__start_subtest("fexit_many_args"))
+ fexit_many_args();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/fib_lookup.c b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
new file mode 100644
index 000000000000..3379df2d4cf2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fib_lookup.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/rtnetlink.h>
+#include <sys/types.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "fib_lookup.skel.h"
+
+#define NS_TEST "fib_lookup_ns"
+#define IPV6_IFACE_ADDR "face::face"
+#define IPV6_IFACE_ADDR_SEC "cafe::cafe"
+#define IPV6_ADDR_DST "face::3"
+#define IPV6_NUD_FAILED_ADDR "face::1"
+#define IPV6_NUD_STALE_ADDR "face::2"
+#define IPV4_IFACE_ADDR "10.0.0.254"
+#define IPV4_IFACE_ADDR_SEC "10.1.0.254"
+#define IPV4_ADDR_DST "10.2.0.254"
+#define IPV4_NUD_FAILED_ADDR "10.0.0.1"
+#define IPV4_NUD_STALE_ADDR "10.0.0.2"
+#define IPV4_TBID_ADDR "172.0.0.254"
+#define IPV4_TBID_NET "172.0.0.0"
+#define IPV4_TBID_DST "172.0.0.2"
+#define IPV6_TBID_ADDR "fd00::FFFF"
+#define IPV6_TBID_NET "fd00::"
+#define IPV6_TBID_DST "fd00::2"
+#define DMAC "11:11:11:11:11:11"
+#define DMAC_INIT { 0x11, 0x11, 0x11, 0x11, 0x11, 0x11, }
+#define DMAC2 "01:01:01:01:01:01"
+#define DMAC_INIT2 { 0x01, 0x01, 0x01, 0x01, 0x01, 0x01, }
+
+struct fib_lookup_test {
+ const char *desc;
+ const char *daddr;
+ int expected_ret;
+ const char *expected_src;
+ int lookup_flags;
+ __u32 tbid;
+ __u8 dmac[6];
+};
+
+static const struct fib_lookup_test tests[] = {
+ { .desc = "IPv6 failed neigh",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+ { .desc = "IPv6 stale neigh",
+ .daddr = IPV6_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .dmac = DMAC_INIT, },
+ { .desc = "IPv6 skip neigh",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 failed neigh",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_NO_NEIGH, },
+ { .desc = "IPv4 stale neigh",
+ .daddr = IPV4_NUD_STALE_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .dmac = DMAC_INIT, },
+ { .desc = "IPv4 skip neigh",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 TBID lookup failure",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv4 TBID lookup success",
+ .daddr = IPV4_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv6 TBID lookup failure",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_NOT_FWDED,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID,
+ .tbid = RT_TABLE_MAIN, },
+ { .desc = "IPv6 TBID lookup success",
+ .daddr = IPV6_TBID_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .lookup_flags = BPF_FIB_LOOKUP_DIRECT | BPF_FIB_LOOKUP_TBID, .tbid = 100,
+ .dmac = DMAC_INIT2, },
+ { .desc = "IPv4 set src addr from netdev",
+ .daddr = IPV4_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set src addr from netdev",
+ .daddr = IPV6_NUD_FAILED_ADDR, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv4 set prefsrc addr from route",
+ .daddr = IPV4_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV4_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+ { .desc = "IPv6 set prefsrc addr route",
+ .daddr = IPV6_ADDR_DST, .expected_ret = BPF_FIB_LKUP_RET_SUCCESS,
+ .expected_src = IPV6_IFACE_ADDR_SEC,
+ .lookup_flags = BPF_FIB_LOOKUP_SRC | BPF_FIB_LOOKUP_SKIP_NEIGH, },
+};
+
+static int ifindex;
+
+static int setup_netns(void)
+{
+ int err;
+
+ SYS(fail, "ip link add veth1 type veth peer name veth2");
+ SYS(fail, "ip link set dev veth1 up");
+ SYS(fail, "ip link set dev veth2 up");
+
+ err = write_sysctl("/proc/sys/net/ipv4/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/neigh/veth1/gc_stale_time", "900");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.neigh.veth1.gc_stale_time)"))
+ goto fail;
+
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV6_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV6_NUD_STALE_ADDR, DMAC);
+
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 nud failed", IPV4_NUD_FAILED_ADDR);
+ SYS(fail, "ip neigh add %s dev veth1 lladdr %s nud stale", IPV4_NUD_STALE_ADDR, DMAC);
+
+ /* Setup for prefsrc IP addr selection */
+ SYS(fail, "ip addr add %s/24 dev veth1", IPV4_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/32 dev veth1 src %s", IPV4_ADDR_DST, IPV4_IFACE_ADDR_SEC);
+
+ SYS(fail, "ip addr add %s/64 dev veth1 nodad", IPV6_IFACE_ADDR_SEC);
+ SYS(fail, "ip route add %s/128 dev veth1 src %s", IPV6_ADDR_DST, IPV6_IFACE_ADDR_SEC);
+
+ /* Setup for tbid lookup tests */
+ SYS(fail, "ip addr add %s/24 dev veth2", IPV4_TBID_ADDR);
+ SYS(fail, "ip route del %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip route add table 100 %s/24 dev veth2", IPV4_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV4_TBID_DST, DMAC2);
+
+ SYS(fail, "ip addr add %s/64 dev veth2", IPV6_TBID_ADDR);
+ SYS(fail, "ip -6 route del %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip -6 route add table 100 %s/64 dev veth2", IPV6_TBID_NET);
+ SYS(fail, "ip neigh add %s dev veth2 lladdr %s nud stale", IPV6_TBID_DST, DMAC2);
+
+ err = write_sysctl("/proc/sys/net/ipv4/conf/veth1/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv4.conf.veth1.forwarding)"))
+ goto fail;
+
+ err = write_sysctl("/proc/sys/net/ipv6/conf/veth1/forwarding", "1");
+ if (!ASSERT_OK(err, "write_sysctl(net.ipv6.conf.veth1.forwarding)"))
+ goto fail;
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int set_lookup_params(struct bpf_fib_lookup *params, const struct fib_lookup_test *test)
+{
+ int ret;
+
+ memset(params, 0, sizeof(*params));
+
+ params->l4_protocol = IPPROTO_TCP;
+ params->ifindex = ifindex;
+ params->tbid = test->tbid;
+
+ if (inet_pton(AF_INET6, test->daddr, params->ipv6_dst) == 1) {
+ params->family = AF_INET6;
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET6, IPV6_IFACE_ADDR, params->ipv6_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV6_IFACE_ADDR)"))
+ return -1;
+ }
+
+ return 0;
+ }
+
+ ret = inet_pton(AF_INET, test->daddr, &params->ipv4_dst);
+ if (!ASSERT_EQ(ret, 1, "convert IP[46] address"))
+ return -1;
+ params->family = AF_INET;
+
+ if (!(test->lookup_flags & BPF_FIB_LOOKUP_SRC)) {
+ ret = inet_pton(AF_INET, IPV4_IFACE_ADDR, &params->ipv4_src);
+ if (!ASSERT_EQ(ret, 1, "inet_pton(IPV4_IFACE_ADDR)"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static void mac_str(char *b, const __u8 *mac)
+{
+ sprintf(b, "%02X:%02X:%02X:%02X:%02X:%02X",
+ mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]);
+}
+
+static void assert_src_ip(struct bpf_fib_lookup *fib_params, const char *expected_src)
+{
+ int ret;
+ __u32 src6[4];
+ __be32 src4;
+
+ switch (fib_params->family) {
+ case AF_INET6:
+ ret = inet_pton(AF_INET6, expected_src, src6);
+ ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+ ret = memcmp(src6, fib_params->ipv6_src, sizeof(fib_params->ipv6_src));
+ if (!ASSERT_EQ(ret, 0, "fib_lookup ipv6 src")) {
+ char str_src6[64];
+
+ inet_ntop(AF_INET6, fib_params->ipv6_src, str_src6,
+ sizeof(str_src6));
+ printf("ipv6 expected %s actual %s ", expected_src,
+ str_src6);
+ }
+
+ break;
+ case AF_INET:
+ ret = inet_pton(AF_INET, expected_src, &src4);
+ ASSERT_EQ(ret, 1, "inet_pton(expected_src)");
+
+ ASSERT_EQ(fib_params->ipv4_src, src4, "fib_lookup ipv4 src");
+
+ break;
+ default:
+ PRINT_FAIL("invalid addr family: %d", fib_params->family);
+ }
+}
+
+void test_fib_lookup(void)
+{
+ struct bpf_fib_lookup *fib_params;
+ struct nstoken *nstoken = NULL;
+ struct __sk_buff skb = { };
+ struct fib_lookup *skel;
+ int prog_fd, err, ret, i;
+
+ /* The test does not use the skb->data, so
+ * use pkt_v6 for both v6 and v4 test.
+ */
+ LIBBPF_OPTS(bpf_test_run_opts, run_opts,
+ .data_in = &pkt_v6,
+ .data_size_in = sizeof(pkt_v6),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ skel = fib_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.fib_lookup);
+
+ SYS(fail, "ip netns add %s", NS_TEST);
+
+ nstoken = open_netns(NS_TEST);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto fail;
+
+ if (setup_netns())
+ goto fail;
+
+ ifindex = if_nametoindex("veth1");
+ skb.ifindex = ifindex;
+ fib_params = &skel->bss->fib_params;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ printf("Testing %s ", tests[i].desc);
+
+ if (set_lookup_params(fib_params, &tests[i]))
+ continue;
+ skel->bss->fib_lookup_ret = -1;
+ skel->bss->lookup_flags = tests[i].lookup_flags;
+
+ err = bpf_prog_test_run_opts(prog_fd, &run_opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ continue;
+
+ ASSERT_EQ(skel->bss->fib_lookup_ret, tests[i].expected_ret,
+ "fib_lookup_ret");
+
+ if (tests[i].expected_src)
+ assert_src_ip(fib_params, tests[i].expected_src);
+
+ ret = memcmp(tests[i].dmac, fib_params->dmac, sizeof(tests[i].dmac));
+ if (!ASSERT_EQ(ret, 0, "dmac not match")) {
+ char expected[18], actual[18];
+
+ mac_str(expected, tests[i].dmac);
+ mac_str(actual, fib_params->dmac);
+ printf("dmac expected %s actual %s ", expected, actual);
+ }
+
+ // ensure tbid is zero'd out after fib lookup.
+ if (tests[i].lookup_flags & BPF_FIB_LOOKUP_DIRECT) {
+ if (!ASSERT_EQ(skel->bss->fib_params.tbid, 0,
+ "expected fib_params.tbid to be zero"))
+ goto fail;
+ }
+ }
+
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " NS_TEST);
+ fib_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
new file mode 100644
index 000000000000..f3932941bbaa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c
@@ -0,0 +1,622 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/limits.h>
+#include <test_progs.h>
+#include "trace_helpers.h"
+#include "test_fill_link_info.skel.h"
+#include "bpf/libbpf_internal.h"
+
+#define TP_CAT "sched"
+#define TP_NAME "sched_switch"
+
+static const char *kmulti_syms[] = {
+ "bpf_fentry_test2",
+ "bpf_fentry_test1",
+ "bpf_fentry_test3",
+};
+#define KMULTI_CNT ARRAY_SIZE(kmulti_syms)
+static __u64 kmulti_addrs[KMULTI_CNT];
+static __u64 kmulti_cookies[] = { 3, 1, 2 };
+
+#define KPROBE_FUNC "bpf_fentry_test1"
+static __u64 kprobe_addr;
+
+#define UPROBE_FILE "/proc/self/exe"
+static ssize_t uprobe_offset;
+/* uprobe attach point */
+static noinline void uprobe_func(void)
+{
+ asm volatile ("");
+}
+
+#define PERF_EVENT_COOKIE 0xdeadbeef
+
+static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr,
+ ssize_t offset, ssize_t entry_offset)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ char buf[PATH_MAX];
+ int err;
+
+ memset(&info, 0, sizeof(info));
+ buf[0] = '\0';
+
+again:
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type"))
+ return -1;
+ if (!ASSERT_EQ(info.perf_event.type, type, "perf_type_match"))
+ return -1;
+
+ switch (info.perf_event.type) {
+ case BPF_PERF_EVENT_KPROBE:
+ case BPF_PERF_EVENT_KRETPROBE:
+ ASSERT_EQ(info.perf_event.kprobe.offset, offset, "kprobe_offset");
+
+ /* In case kernel.kptr_restrict is not permitted or MAX_SYMS is reached */
+ if (addr)
+ ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset,
+ "kprobe_addr");
+
+ ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie");
+
+ if (!info.perf_event.kprobe.func_name) {
+ ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len");
+ info.perf_event.kprobe.func_name = ptr_to_u64(&buf);
+ info.perf_event.kprobe.name_len = sizeof(buf);
+ goto again;
+ }
+
+ err = strncmp(u64_to_ptr(info.perf_event.kprobe.func_name), KPROBE_FUNC,
+ strlen(KPROBE_FUNC));
+ ASSERT_EQ(err, 0, "cmp_kprobe_func_name");
+ break;
+ case BPF_PERF_EVENT_TRACEPOINT:
+ if (!info.perf_event.tracepoint.tp_name) {
+ ASSERT_EQ(info.perf_event.tracepoint.name_len, 0, "name_len");
+ info.perf_event.tracepoint.tp_name = ptr_to_u64(&buf);
+ info.perf_event.tracepoint.name_len = sizeof(buf);
+ goto again;
+ }
+
+ ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie");
+
+ err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME,
+ strlen(TP_NAME));
+ ASSERT_EQ(err, 0, "cmp_tp_name");
+ break;
+ case BPF_PERF_EVENT_UPROBE:
+ case BPF_PERF_EVENT_URETPROBE:
+ ASSERT_EQ(info.perf_event.uprobe.offset, offset, "uprobe_offset");
+
+ if (!info.perf_event.uprobe.file_name) {
+ ASSERT_EQ(info.perf_event.uprobe.name_len, 0, "name_len");
+ info.perf_event.uprobe.file_name = ptr_to_u64(&buf);
+ info.perf_event.uprobe.name_len = sizeof(buf);
+ goto again;
+ }
+
+ ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie");
+
+ err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE,
+ strlen(UPROBE_FILE));
+ ASSERT_EQ(err, 0, "cmp_file_name");
+ break;
+ case BPF_PERF_EVENT_EVENT:
+ ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type");
+ ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config");
+ ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie");
+ break;
+ default:
+ err = -1;
+ break;
+ }
+ return err;
+}
+
+static void kprobe_fill_invalid_user_buffer(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int err;
+
+ memset(&info, 0, sizeof(info));
+
+ info.perf_event.kprobe.func_name = 0x1; /* invalid address */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_buff_and_len");
+
+ info.perf_event.kprobe.name_len = 64;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff");
+
+ info.perf_event.kprobe.func_name = 0;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_len");
+
+ ASSERT_EQ(info.perf_event.kprobe.addr, 0, "func_addr");
+ ASSERT_EQ(info.perf_event.kprobe.offset, 0, "func_offset");
+ ASSERT_EQ(info.perf_event.type, 0, "type");
+}
+
+static void test_kprobe_fill_link_info(struct test_fill_link_info *skel,
+ enum bpf_perf_event_type type,
+ bool invalid)
+{
+ DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts,
+ .attach_mode = PROBE_ATTACH_MODE_LINK,
+ .retprobe = type == BPF_PERF_EVENT_KRETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ ssize_t entry_offset = 0;
+ struct bpf_link *link;
+ int link_fd, err;
+
+ link = bpf_program__attach_kprobe_opts(skel->progs.kprobe_run, KPROBE_FUNC, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ if (!invalid) {
+ /* See also arch_adjust_kprobe_addr(). */
+ if (skel->kconfig->CONFIG_X86_KERNEL_IBT)
+ entry_offset = 4;
+ err = verify_perf_link_info(link_fd, type, kprobe_addr, 0, entry_offset);
+ ASSERT_OK(err, "verify_perf_link_info");
+ } else {
+ kprobe_fill_invalid_user_buffer(link_fd);
+ }
+ bpf_link__destroy(link);
+}
+
+static void test_tp_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err;
+
+ link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_TRACEPOINT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+}
+
+static void test_event_fill_link_info(struct test_fill_link_info *skel)
+{
+ DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err, pfd;
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_PAGE_FAULTS,
+ .freq = 1,
+ .sample_freq = 1,
+ .size = sizeof(struct perf_event_attr),
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */,
+ -1 /* group id */, 0 /* flags */);
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ return;
+
+ link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_event"))
+ goto error;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+
+error:
+ close(pfd);
+}
+
+static void test_uprobe_fill_link_info(struct test_fill_link_info *skel,
+ enum bpf_perf_event_type type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts,
+ .retprobe = type == BPF_PERF_EVENT_URETPROBE,
+ .bpf_cookie = PERF_EVENT_COOKIE,
+ );
+ struct bpf_link *link;
+ int link_fd, err;
+
+ link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run,
+ 0, /* self pid */
+ UPROBE_FILE, uprobe_offset,
+ &opts);
+ if (!ASSERT_OK_PTR(link, "attach_uprobe"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ err = verify_perf_link_info(link_fd, type, 0, uprobe_offset, 0);
+ ASSERT_OK(err, "verify_perf_link_info");
+ bpf_link__destroy(link);
+}
+
+static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies)
+{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int flags, i, err;
+
+ memset(&info, 0, sizeof(info));
+
+again:
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "get_link_info"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_KPROBE_MULTI, "kmulti_type"))
+ return -1;
+
+ ASSERT_EQ(info.kprobe_multi.count, KMULTI_CNT, "func_cnt");
+ flags = info.kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN;
+ if (!retprobe)
+ ASSERT_EQ(flags, 0, "kmulti_flags");
+ else
+ ASSERT_NEQ(flags, 0, "kretmulti_flags");
+
+ if (!info.kprobe_multi.addrs) {
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ goto again;
+ }
+ for (i = 0; i < KMULTI_CNT; i++) {
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0,
+ "kmulti_cookies_value");
+ }
+ return 0;
+}
+
+static void verify_kmulti_invalid_user_buffer(int fd)
+{
+ __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ int err, i;
+
+ memset(&info, 0, sizeof(info));
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "no_addr");
+
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.count = 0;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "no_cnt");
+
+ for (i = 0; i < KMULTI_CNT; i++)
+ addrs[i] = 0;
+ info.kprobe_multi.count = KMULTI_CNT - 1;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -ENOSPC, "smaller_cnt");
+ for (i = 0; i < KMULTI_CNT - 1; i++)
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+ ASSERT_EQ(addrs[i], 0, "kmulti_addrs");
+
+ for (i = 0; i < KMULTI_CNT; i++)
+ addrs[i] = 0;
+ info.kprobe_multi.count = KMULTI_CNT + 1;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, 0, "bigger_cnt");
+ for (i = 0; i < KMULTI_CNT; i++)
+ ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs");
+
+ info.kprobe_multi.count = KMULTI_CNT;
+ info.kprobe_multi.addrs = ptr_to_u64(addrs);
+ info.kprobe_multi.cookies = 0x1; /* invalid addr */
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies");
+
+ /* cookies && !count */
+ info.kprobe_multi.count = 0;
+ info.kprobe_multi.addrs = ptr_to_u64(NULL);
+ info.kprobe_multi.cookies = ptr_to_u64(cookies);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "invalid_cookies_count");
+}
+
+static int symbols_cmp_r(const void *a, const void *b)
+{
+ const char **str_a = (const char **) a;
+ const char **str_b = (const char **) b;
+
+ return strcmp(*str_a, *str_b);
+}
+
+static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel,
+ bool retprobe, bool cookies,
+ bool invalid)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct bpf_link *link;
+ int link_fd, err;
+
+ opts.syms = kmulti_syms;
+ opts.cookies = cookies ? kmulti_cookies : NULL;
+ opts.cnt = KMULTI_CNT;
+ opts.retprobe = retprobe;
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+ return;
+
+ link_fd = bpf_link__fd(link);
+ if (!invalid) {
+ err = verify_kmulti_link_info(link_fd, retprobe, cookies);
+ ASSERT_OK(err, "verify_kmulti_link_info");
+ } else {
+ verify_kmulti_invalid_user_buffer(link_fd);
+ }
+ bpf_link__destroy(link);
+}
+
+#define SEC(name) __attribute__((section(name), used))
+
+static short uprobe_link_info_sema_1 SEC(".probes");
+static short uprobe_link_info_sema_2 SEC(".probes");
+static short uprobe_link_info_sema_3 SEC(".probes");
+
+noinline void uprobe_link_info_func_1(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_1++;
+}
+
+noinline void uprobe_link_info_func_2(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_2++;
+}
+
+noinline void uprobe_link_info_func_3(void)
+{
+ asm volatile ("");
+ uprobe_link_info_sema_3++;
+}
+
+static int
+verify_umulti_link_info(int fd, bool retprobe, __u64 *offsets,
+ __u64 *cookies, __u64 *ref_ctr_offsets)
+{
+ char path[PATH_MAX], path_buf[PATH_MAX];
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ __u64 ref_ctr_offsets_buf[3];
+ __u64 offsets_buf[3];
+ __u64 cookies_buf[3];
+ int i, err, bit;
+ __u32 count = 0;
+
+ memset(path, 0, sizeof(path));
+ err = readlink("/proc/self/exe", path, sizeof(path));
+ if (!ASSERT_NEQ(err, -1, "readlink"))
+ return -1;
+
+ for (bit = 0; bit < 8; bit++) {
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path = ptr_to_u64(path_buf);
+ info.uprobe_multi.path_size = sizeof(path_buf);
+ info.uprobe_multi.count = count;
+
+ if (bit & 0x1)
+ info.uprobe_multi.offsets = ptr_to_u64(offsets_buf);
+ if (bit & 0x2)
+ info.uprobe_multi.cookies = ptr_to_u64(cookies_buf);
+ if (bit & 0x4)
+ info.uprobe_multi.ref_ctr_offsets = ptr_to_u64(ref_ctr_offsets_buf);
+
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ return -1;
+
+ if (!ASSERT_EQ(info.type, BPF_LINK_TYPE_UPROBE_MULTI, "info.type"))
+ return -1;
+
+ ASSERT_EQ(info.uprobe_multi.pid, getpid(), "info.uprobe_multi.pid");
+ ASSERT_EQ(info.uprobe_multi.count, 3, "info.uprobe_multi.count");
+ ASSERT_EQ(info.uprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN,
+ retprobe, "info.uprobe_multi.flags.retprobe");
+ ASSERT_EQ(info.uprobe_multi.path_size, strlen(path) + 1, "info.uprobe_multi.path_size");
+ ASSERT_STREQ(path_buf, path, "info.uprobe_multi.path");
+
+ for (i = 0; i < info.uprobe_multi.count; i++) {
+ if (info.uprobe_multi.offsets)
+ ASSERT_EQ(offsets_buf[i], offsets[i], "info.uprobe_multi.offsets");
+ if (info.uprobe_multi.cookies)
+ ASSERT_EQ(cookies_buf[i], cookies[i], "info.uprobe_multi.cookies");
+ if (info.uprobe_multi.ref_ctr_offsets) {
+ ASSERT_EQ(ref_ctr_offsets_buf[i], ref_ctr_offsets[i],
+ "info.uprobe_multi.ref_ctr_offsets");
+ }
+ }
+ count = count ?: info.uprobe_multi.count;
+ }
+
+ return 0;
+}
+
+static void verify_umulti_invalid_user_buffer(int fd)
+{
+ struct bpf_link_info info;
+ __u32 len = sizeof(info);
+ __u64 buf[3];
+ int err;
+
+ /* upath_size defined, not path */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path_size = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "failed_upath_size");
+
+ /* path defined, but small */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path = ptr_to_u64(buf);
+ info.uprobe_multi.path_size = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_LT(err, 0, "failed_upath_small");
+
+ /* path has wrong pointer */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.path_size = PATH_MAX;
+ info.uprobe_multi.path = 123;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "failed_bad_path_ptr");
+
+ /* count zero, with offsets */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = ptr_to_u64(buf);
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EINVAL, "failed_count");
+
+ /* offsets not big enough */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = ptr_to_u64(buf);
+ info.uprobe_multi.count = 2;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -ENOSPC, "failed_small_count");
+
+ /* offsets has wrong pointer */
+ memset(&info, 0, sizeof(info));
+ info.uprobe_multi.offsets = 123;
+ info.uprobe_multi.count = 3;
+ err = bpf_link_get_info_by_fd(fd, &info, &len);
+ ASSERT_EQ(err, -EFAULT, "failed_wrong_offsets");
+}
+
+static void test_uprobe_multi_fill_link_info(struct test_fill_link_info *skel,
+ bool retprobe, bool invalid)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts,
+ .retprobe = retprobe,
+ );
+ const char *syms[3] = {
+ "uprobe_link_info_func_1",
+ "uprobe_link_info_func_2",
+ "uprobe_link_info_func_3",
+ };
+ __u64 cookies[3] = {
+ 0xdead,
+ 0xbeef,
+ 0xcafe,
+ };
+ const char *sema[3] = {
+ "uprobe_link_info_sema_1",
+ "uprobe_link_info_sema_2",
+ "uprobe_link_info_sema_3",
+ };
+ __u64 *offsets = NULL, *ref_ctr_offsets;
+ struct bpf_link *link;
+ int link_fd, err;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 3, sema,
+ (unsigned long **) &ref_ctr_offsets, STT_OBJECT);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_object"))
+ return;
+
+ err = elf_resolve_syms_offsets("/proc/self/exe", 3, syms,
+ (unsigned long **) &offsets, STT_FUNC);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets_func"))
+ goto out;
+
+ opts.syms = syms;
+ opts.cookies = &cookies[0];
+ opts.ref_ctr_offsets = (unsigned long *) &ref_ctr_offsets[0];
+ opts.cnt = ARRAY_SIZE(syms);
+
+ link = bpf_program__attach_uprobe_multi(skel->progs.umulti_run, 0,
+ "/proc/self/exe", NULL, &opts);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_uprobe_multi"))
+ goto out;
+
+ link_fd = bpf_link__fd(link);
+ if (invalid)
+ verify_umulti_invalid_user_buffer(link_fd);
+ else
+ verify_umulti_link_info(link_fd, retprobe, offsets, cookies, ref_ctr_offsets);
+
+ bpf_link__destroy(link);
+out:
+ free(ref_ctr_offsets);
+ free(offsets);
+}
+
+void test_fill_link_info(void)
+{
+ struct test_fill_link_info *skel;
+ int i;
+
+ skel = test_fill_link_info__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* load kallsyms to compare the addr */
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ goto cleanup;
+
+ kprobe_addr = ksym_get_addr(KPROBE_FUNC);
+ if (test__start_subtest("kprobe_link_info"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, false);
+ if (test__start_subtest("kretprobe_link_info"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KRETPROBE, false);
+ if (test__start_subtest("kprobe_invalid_ubuff"))
+ test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true);
+ if (test__start_subtest("tracepoint_link_info"))
+ test_tp_fill_link_info(skel);
+ if (test__start_subtest("event_link_info"))
+ test_event_fill_link_info(skel);
+
+ uprobe_offset = get_uprobe_offset(&uprobe_func);
+ if (test__start_subtest("uprobe_link_info"))
+ test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_UPROBE);
+ if (test__start_subtest("uretprobe_link_info"))
+ test_uprobe_fill_link_info(skel, BPF_PERF_EVENT_URETPROBE);
+
+ qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r);
+ for (i = 0; i < KMULTI_CNT; i++)
+ kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]);
+ if (test__start_subtest("kprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, false, false, false);
+ test_kprobe_multi_fill_link_info(skel, false, true, false);
+ }
+ if (test__start_subtest("kretprobe_multi_link_info")) {
+ test_kprobe_multi_fill_link_info(skel, true, false, false);
+ test_kprobe_multi_fill_link_info(skel, true, true, false);
+ }
+ if (test__start_subtest("kprobe_multi_invalid_ubuff"))
+ test_kprobe_multi_fill_link_info(skel, true, true, true);
+
+ if (test__start_subtest("uprobe_multi_link_info"))
+ test_uprobe_multi_fill_link_info(skel, false, false);
+ if (test__start_subtest("uretprobe_multi_link_info"))
+ test_uprobe_multi_fill_link_info(skel, true, false);
+ if (test__start_subtest("uprobe_multi_invalid"))
+ test_uprobe_multi_fill_link_info(skel, false, true);
+
+cleanup:
+ test_fill_link_info__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/find_vma.c b/tools/testing/selftests/bpf/prog_tests/find_vma.c
new file mode 100644
index 000000000000..5165b38f0e59
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/find_vma.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include "find_vma.skel.h"
+#include "find_vma_fail1.skel.h"
+#include "find_vma_fail2.skel.h"
+
+static void test_and_reset_skel(struct find_vma *skel, int expected_find_zero_ret, bool need_test)
+{
+ if (need_test) {
+ ASSERT_EQ(skel->bss->found_vm_exec, 1, "found_vm_exec");
+ ASSERT_EQ(skel->data->find_addr_ret, 0, "find_addr_ret");
+ ASSERT_EQ(skel->data->find_zero_ret, expected_find_zero_ret, "find_zero_ret");
+ ASSERT_OK_PTR(strstr(skel->bss->d_iname, "test_progs"), "find_test_progs");
+ }
+
+ skel->bss->found_vm_exec = 0;
+ skel->data->find_addr_ret = -1;
+ skel->data->find_zero_ret = -1;
+ skel->bss->d_iname[0] = 0;
+}
+
+static int open_pe(void)
+{
+ struct perf_event_attr attr = {0};
+ int pfd;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ pfd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+
+ return pfd >= 0 ? pfd : -errno;
+}
+
+static bool find_vma_pe_condition(struct find_vma *skel)
+{
+ return skel->bss->found_vm_exec == 0 ||
+ skel->data->find_addr_ret != 0 ||
+ skel->data->find_zero_ret == -1 ||
+ strcmp(skel->bss->d_iname, "test_progs") != 0;
+}
+
+static void test_find_vma_pe(struct find_vma *skel)
+{
+ struct bpf_link *link = NULL;
+ volatile int j = 0;
+ int pfd, i;
+ const int one_bn = 1000000000;
+
+ pfd = open_pe();
+ if (pfd < 0) {
+ if (pfd == -ENOENT || pfd == -EOPNOTSUPP) {
+ printf("%s:SKIP:no PERF_COUNT_HW_CPU_CYCLES\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (!ASSERT_GE(pfd, 0, "perf_event_open"))
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_perf_event(skel->progs.handle_pe, pfd);
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
+ goto cleanup;
+
+ for (i = 0; i < one_bn && find_vma_pe_condition(skel); ++i)
+ ++j;
+
+ test_and_reset_skel(skel, -EBUSY /* in nmi, irq_work is busy */, i == one_bn);
+cleanup:
+ bpf_link__destroy(link);
+ close(pfd);
+}
+
+static void test_find_vma_kprobe(struct find_vma *skel)
+{
+ int err;
+
+ err = find_vma__attach(skel);
+ if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+ return;
+
+ getpgid(skel->bss->target_pid);
+ test_and_reset_skel(skel, -ENOENT /* could not find vma for ptr 0 */, true);
+}
+
+static void test_illegal_write_vma(void)
+{
+ struct find_vma_fail1 *skel;
+
+ skel = find_vma_fail1__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail1__open_and_load"))
+ find_vma_fail1__destroy(skel);
+}
+
+static void test_illegal_write_task(void)
+{
+ struct find_vma_fail2 *skel;
+
+ skel = find_vma_fail2__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "find_vma_fail2__open_and_load"))
+ find_vma_fail2__destroy(skel);
+}
+
+void serial_test_find_vma(void)
+{
+ struct find_vma *skel;
+
+ skel = find_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "find_vma__open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->addr = (__u64)(uintptr_t)test_find_vma_pe;
+
+ test_find_vma_pe(skel);
+ test_find_vma_kprobe(skel);
+
+ find_vma__destroy(skel);
+ test_illegal_write_vma();
+ test_illegal_write_task();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index f11f187990e9..c4773173a4e4 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -8,13 +8,16 @@
#include "bpf_flow.skel.h"
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
+
#ifndef IP_MF
#define IP_MF 0x2000
#endif
#define CHECK_FLOW_KEYS(desc, got, expected) \
- CHECK_ATTR(memcmp(&got, &expected, sizeof(got)) != 0, \
+ _CHECK(memcmp(&got, &expected, sizeof(got)) != 0, \
desc, \
+ topts.duration, \
"nhoff=%u/%u " \
"thoff=%u/%u " \
"addr_proto=0x%x/0x%x " \
@@ -99,6 +102,7 @@ struct test {
} pkt;
struct bpf_flow_keys keys;
__u32 flags;
+ __u32 retval;
};
#define VLAN_HLEN 4
@@ -125,6 +129,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6",
@@ -145,6 +150,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "802.1q-ipv4",
@@ -167,6 +173,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "802.1ad-ipv6",
@@ -190,6 +197,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipv4-frag",
@@ -216,6 +224,7 @@ struct test tests[] = {
.dport = 8080,
},
.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .retval = BPF_OK,
},
{
.name = "ipv4-no-frag",
@@ -238,6 +247,7 @@ struct test tests[] = {
.is_frag = true,
.is_first_frag = true,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-frag",
@@ -264,6 +274,7 @@ struct test tests[] = {
.dport = 8080,
},
.flags = BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG,
+ .retval = BPF_OK,
},
{
.name = "ipv6-no-frag",
@@ -286,6 +297,7 @@ struct test tests[] = {
.is_frag = true,
.is_first_frag = true,
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-flow-label",
@@ -308,6 +320,7 @@ struct test tests[] = {
.dport = 8080,
.flow_label = __bpf_constant_htonl(0xbeeef),
},
+ .retval = BPF_OK,
},
{
.name = "ipv6-no-flow-label",
@@ -330,6 +343,31 @@ struct test tests[] = {
.flow_label = __bpf_constant_htonl(0xbeeef),
},
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
+ },
+ {
+ .name = "ipv6-empty-flow-label",
+ .pkt.ipv6 = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .iph.nexthdr = IPPROTO_TCP,
+ .iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.flow_lbl = { 0x00, 0x00, 0x00 },
+ .tcp.doff = 5,
+ .tcp.source = 80,
+ .tcp.dest = 8080,
+ },
+ .keys = {
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .nhoff = ETH_HLEN,
+ .thoff = ETH_HLEN + sizeof(struct ipv6hdr),
+ .addr_proto = ETH_P_IPV6,
+ .ip_proto = IPPROTO_TCP,
+ .n_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .sport = 80,
+ .dport = 8080,
+ },
+ .flags = BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL,
+ .retval = BPF_OK,
},
{
.name = "ipip-encap",
@@ -358,6 +396,7 @@ struct test tests[] = {
.sport = 80,
.dport = 8080,
},
+ .retval = BPF_OK,
},
{
.name = "ipip-no-encap",
@@ -385,6 +424,26 @@ struct test tests[] = {
.is_encap = true,
},
.flags = BPF_FLOW_DISSECTOR_F_STOP_AT_ENCAP,
+ .retval = BPF_OK,
+ },
+ {
+ .name = "ipip-encap-dissector-continue",
+ .pkt.ipip = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+ .iph.ihl = 5,
+ .iph.protocol = IPPROTO_IPIP,
+ .iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+ .iph.saddr = __bpf_constant_htonl(FLOW_CONTINUE_SADDR),
+ .iph_inner.ihl = 5,
+ .iph_inner.protocol = IPPROTO_TCP,
+ .iph_inner.tot_len =
+ __bpf_constant_htons(MAGIC_BYTES) -
+ sizeof(struct iphdr),
+ .tcp.doff = 5,
+ .tcp.source = 99,
+ .tcp.dest = 9090,
+ },
+ .retval = BPF_FLOW_DISSECTOR_CONTINUE,
},
};
@@ -457,10 +516,10 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
if (map_fd < 0)
return -1;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "flow_dissector_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (!prog)
return -1;
@@ -487,7 +546,7 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
/* Keep in sync with 'flags' from eth_get_headlen. */
__u32 eth_get_headlen_flags =
BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
- struct bpf_prog_test_run_attr tattr = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
struct bpf_flow_keys flow_keys = {};
__u32 key = (__u32)(tests[i].keys.sport) << 16 |
tests[i].keys.dport;
@@ -502,14 +561,17 @@ static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+ /* check the stored flow_keys only if BPF_OK expected */
+ if (tests[i].retval != BPF_OK)
+ continue;
+
err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
- CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+ ASSERT_OK(err, "bpf_map_lookup_elem");
- CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
err = bpf_map_delete_elem(keys_fd, &key);
- CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+ ASSERT_OK(err, "bpf_map_delete_elem");
}
}
@@ -541,7 +603,7 @@ static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
return;
link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
- if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_netns"))
goto out_close;
run_tests_skb_less(tap_fd, skel->maps.last_dissection);
@@ -573,27 +635,28 @@ void test_flow_dissector(void)
for (i = 0; i < ARRAY_SIZE(tests); i++) {
struct bpf_flow_keys flow_keys;
- struct bpf_prog_test_run_attr tattr = {
- .prog_fd = prog_fd,
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &tests[i].pkt,
.data_size_in = sizeof(tests[i].pkt),
.data_out = &flow_keys,
- };
+ );
static struct bpf_flow_keys ctx = {};
if (tests[i].flags) {
- tattr.ctx_in = &ctx;
- tattr.ctx_size_in = sizeof(ctx);
+ topts.ctx_in = &ctx;
+ topts.ctx_size_in = sizeof(ctx);
ctx.flags = tests[i].flags;
}
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(tattr.data_size_out != sizeof(flow_keys) ||
- err || tattr.retval != 1,
- tests[i].name,
- "err %d errno %d retval %d duration %d size %u/%lu\n",
- err, errno, tattr.retval, tattr.duration,
- tattr.data_size_out, sizeof(flow_keys));
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, tests[i].retval, "test_run retval");
+
+ /* check the resulting flow_keys only if BPF_OK returned */
+ if (topts.retval != BPF_OK)
+ continue;
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index 0e8a4d2f023d..c7a47b57ac91 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -2,10 +2,9 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_flow_dissector_load_bytes(void)
+void serial_test_flow_dissector_load_bytes(void)
{
struct bpf_flow_keys flow_keys;
- __u32 duration = 0, retval, size;
struct bpf_insn prog[] = {
// BPF_REG_1 - 1st argument: context
// BPF_REG_2 - 2nd argument: offset, start at first byte
@@ -27,22 +26,25 @@ void test_flow_dissector_load_bytes(void)
BPF_EXIT_INSN(),
};
int fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = &flow_keys,
+ .data_size_out = sizeof(flow_keys),
+ .repeat = 1,
+ );
/* make sure bpf_skb_load_bytes is not allowed from skb-less context
*/
- fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
+ fd = bpf_test_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
- CHECK(fd < 0,
- "flow_dissector-bpf_skb_load_bytes-load",
- "fd %d errno %d\n",
- fd, errno);
+ ASSERT_GE(fd, 0, "bpf_test_load_program good fd");
- err = bpf_prog_test_run(fd, 1, &pkt_v4, sizeof(pkt_v4),
- &flow_keys, &size, &retval, &duration);
- CHECK(size != sizeof(flow_keys) || err || retval != 1,
- "flow_dissector-bpf_skb_load_bytes",
- "err %d errno %d retval %d duration %d size %u/%zu\n",
- err, errno, retval, duration, size, sizeof(flow_keys));
+ err = bpf_prog_test_run_opts(fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.data_size_out, sizeof(flow_keys),
+ "test_run data_size_out");
+ ASSERT_EQ(topts.retval, BPF_OK, "test_run retval");
if (fd >= -1)
close(fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 172c586b6996..9333f7346d15 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -47,9 +47,9 @@ static int load_prog(enum bpf_prog_type type)
};
int fd;
- fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ fd = bpf_test_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
if (CHECK_FAIL(fd < 0))
- perror("bpf_load_program");
+ perror("bpf_test_load_program");
return fd;
}
@@ -60,9 +60,9 @@ static __u32 query_prog_id(int prog)
__u32 info_len = sizeof(info);
int err;
- err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+ err = bpf_prog_get_info_by_fd(prog, &info, &info_len);
if (CHECK_FAIL(err || info_len != sizeof(info))) {
- perror("bpf_obj_get_info_by_fd");
+ perror("bpf_prog_get_info_by_fd");
return 0;
}
@@ -134,9 +134,9 @@ static void test_link_create_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when another link exists */
errno = 0;
link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+ if (CHECK_FAIL(link2 >= 0 || errno != E2BIG))
perror("bpf_prog_attach(prog2) expected E2BIG");
- if (link2 != -1)
+ if (link2 >= 0)
close(link2);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -159,9 +159,9 @@ static void test_prog_attach_link_create(int netns, int prog1, int prog2)
/* Expect failure creating link when prog attached */
errno = 0;
link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
- if (CHECK_FAIL(link != -1 || errno != EEXIST))
+ if (CHECK_FAIL(link >= 0 || errno != EEXIST))
perror("bpf_link_create(prog2) expected EEXIST");
- if (link != -1)
+ if (link >= 0)
close(link);
CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
@@ -497,7 +497,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
}
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -521,7 +521,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
link_id = info.id;
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -546,7 +546,7 @@ static void test_link_get_info(int netns, int prog1, int prog2)
netns = -1;
info_len = sizeof(info);
- err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+ err = bpf_link_get_info_by_fd(link, &info, &info_len);
if (CHECK_FAIL(err)) {
perror("bpf_obj_get_info");
goto out_unlink;
@@ -623,12 +623,12 @@ static void run_tests(int netns)
}
out_close:
for (i = 0; i < ARRAY_SIZE(progs); i++) {
- if (progs[i] != -1)
+ if (progs[i] >= 0)
CHECK_FAIL(close(progs[i]));
}
}
-void test_flow_dissector_reattach(void)
+void serial_test_flow_dissector_reattach(void)
{
int err, new_net, saved_net;
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
new file mode 100644
index 000000000000..8963f8a549f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "for_each_hash_map_elem.skel.h"
+#include "for_each_array_map_elem.skel.h"
+#include "for_each_map_elem_write_key.skel.h"
+
+static unsigned int duration;
+
+static void test_hash_map(void)
+{
+ int i, err, max_entries;
+ struct for_each_hash_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ size_t percpu_val_sz;
+ __u32 key, num_cpus;
+ __u64 val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_hash_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_map_elem__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_val_sz = sizeof(__u64) * num_cpus;
+ percpu_valbuf = malloc(percpu_val_sz);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 1;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+ percpu_valbuf, percpu_val_sz, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->hashmap_output, 4, "hashmap_output");
+ ASSERT_EQ(skel->bss->hashmap_elems, max_entries, "hashmap_elems");
+
+ key = 1;
+ err = bpf_map__lookup_elem(skel->maps.hashmap, &key, sizeof(key), &val, sizeof(val), 0);
+ ASSERT_ERR(err, "hashmap_lookup");
+
+ ASSERT_EQ(skel->bss->percpu_called, 1, "percpu_called");
+ ASSERT_LT(skel->bss->cpu, num_cpus, "num_cpus");
+ ASSERT_EQ(skel->bss->percpu_map_elems, 1, "percpu_map_elems");
+ ASSERT_EQ(skel->bss->percpu_key, 1, "percpu_key");
+ ASSERT_EQ(skel->bss->percpu_val, skel->bss->cpu + 1, "percpu_val");
+ ASSERT_EQ(skel->bss->percpu_output, 100, "percpu_output");
+out:
+ free(percpu_valbuf);
+ for_each_hash_map_elem__destroy(skel);
+}
+
+static void test_array_map(void)
+{
+ __u32 key, num_cpus, max_entries;
+ int i, err;
+ struct for_each_array_map_elem *skel;
+ __u64 *percpu_valbuf = NULL;
+ size_t percpu_val_sz;
+ __u64 val, expected_total;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = for_each_array_map_elem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_array_map_elem__open_and_load"))
+ return;
+
+ expected_total = 0;
+ max_entries = bpf_map__max_entries(skel->maps.arraymap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i + 1;
+ /* skip the last iteration for expected total */
+ if (i != max_entries - 1)
+ expected_total += val;
+ err = bpf_map__update_elem(skel->maps.arraymap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ num_cpus = bpf_num_possible_cpus();
+ percpu_val_sz = sizeof(__u64) * num_cpus;
+ percpu_valbuf = malloc(percpu_val_sz);
+ if (!ASSERT_OK_PTR(percpu_valbuf, "percpu_valbuf"))
+ goto out;
+
+ key = 0;
+ for (i = 0; i < num_cpus; i++)
+ percpu_valbuf[i] = i + 1;
+ err = bpf_map__update_elem(skel->maps.percpu_map, &key, sizeof(key),
+ percpu_valbuf, percpu_val_sz, BPF_ANY);
+ if (!ASSERT_OK(err, "percpu_map_update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ duration = topts.duration;
+ if (CHECK(err || topts.retval, "ipv4", "err %d errno %d retval %d\n",
+ err, errno, topts.retval))
+ goto out;
+
+ ASSERT_EQ(skel->bss->arraymap_output, expected_total, "array_output");
+ ASSERT_EQ(skel->bss->cpu + 1, skel->bss->percpu_val, "percpu_val");
+
+out:
+ free(percpu_valbuf);
+ for_each_array_map_elem__destroy(skel);
+}
+
+static void test_write_map_key(void)
+{
+ struct for_each_map_elem_write_key *skel;
+
+ skel = for_each_map_elem_write_key__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "for_each_map_elem_write_key__open_and_load"))
+ for_each_map_elem_write_key__destroy(skel);
+}
+
+void test_for_each(void)
+{
+ if (test__start_subtest("hash_map"))
+ test_hash_map();
+ if (test__start_subtest("array_map"))
+ test_array_map();
+ if (test__start_subtest("write_map_key"))
+ test_write_map_key();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
new file mode 100644
index 000000000000..37056ba73847
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <unistd.h>
+#include <test_progs.h>
+#include "test_get_xattr.skel.h"
+#include "test_fsverity.skel.h"
+
+static const char testfile[] = "/tmp/test_progs_fs_kfuncs";
+
+static void test_xattr(void)
+{
+ struct test_get_xattr *skel = NULL;
+ int fd = -1, err;
+
+ fd = open(testfile, O_CREAT | O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ close(fd);
+ fd = -1;
+
+ err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0);
+ if (err && errno == EOPNOTSUPP) {
+ printf("%s:SKIP:local fs doesn't support xattr (%d)\n"
+ "To run this test, make sure /tmp filesystem supports xattr.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ if (!ASSERT_OK(err, "setxattr"))
+ goto out;
+
+ skel = test_get_xattr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_get_xattr__attach(skel);
+
+ if (!ASSERT_OK(err, "test_get_xattr__attach"))
+ goto out;
+
+ fd = open(testfile, O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "open_file"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr");
+
+out:
+ close(fd);
+ test_get_xattr__destroy(skel);
+ remove(testfile);
+}
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+static void test_fsverity(void)
+{
+ struct fsverity_enable_arg arg = {0};
+ struct test_fsverity *skel = NULL;
+ struct fsverity_digest *d;
+ int fd, err;
+ char buffer[4096];
+
+ fd = open(testfile, O_CREAT | O_RDWR, 0644);
+ if (!ASSERT_GE(fd, 0, "create_file"))
+ return;
+
+ /* Write random buffer, so the file is not empty */
+ err = write(fd, buffer, 4096);
+ if (!ASSERT_EQ(err, 4096, "write_file"))
+ goto out;
+ close(fd);
+
+ /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */
+ fd = open(testfile, O_RDONLY, 0644);
+ if (!ASSERT_GE(fd, 0, "open_file1"))
+ return;
+
+ /* Enable fsverity for the file.
+ * If the file system doesn't support verity, this will fail. Skip
+ * the test in such case.
+ */
+ arg.version = 1;
+ arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ arg.block_size = 4096;
+ err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg);
+ if (err) {
+ printf("%s:SKIP:local fs doesn't support fsverity (%d)\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__, errno);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_fsverity__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load"))
+ goto out;
+
+ /* Get fsverity_digest from ioctl */
+ d = (struct fsverity_digest *)skel->bss->expected_digest;
+ d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256;
+ d->digest_size = SHA256_DIGEST_SIZE;
+ err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest);
+ if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY"))
+ goto out;
+
+ skel->bss->monitored_pid = getpid();
+ err = test_fsverity__attach(skel);
+ if (!ASSERT_OK(err, "test_fsverity__attach"))
+ goto out;
+
+ /* Reopen the file to trigger the program */
+ close(fd);
+ fd = open(testfile, O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open_file2"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity");
+ ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches");
+out:
+ close(fd);
+ test_fsverity__destroy(skel);
+ remove(testfile);
+}
+
+void test_fs_kfuncs(void)
+{
+ if (test__start_subtest("xattr"))
+ test_xattr();
+
+ if (test__start_subtest("fsverity"))
+ test_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
new file mode 100644
index 000000000000..0394a1156d99
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_branch_snapshot.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "get_branch_snapshot.skel.h"
+
+static int *pfd_array;
+static int cpu_cnt;
+
+static bool is_hypervisor(void)
+{
+ char *line = NULL;
+ bool ret = false;
+ size_t len;
+ FILE *fp;
+
+ fp = fopen("/proc/cpuinfo", "r");
+ if (!fp)
+ return false;
+
+ while (getline(&line, &len, fp) != -1) {
+ if (!strncmp(line, "flags", 5)) {
+ if (strstr(line, "hypervisor") != NULL)
+ ret = true;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(fp);
+ return ret;
+}
+
+static int create_perf_events(void)
+{
+ struct perf_event_attr attr = {0};
+ int cpu;
+
+ /* create perf event */
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_HARDWARE;
+ attr.config = PERF_COUNT_HW_CPU_CYCLES;
+ attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ attr.branch_sample_type = PERF_SAMPLE_BRANCH_KERNEL |
+ PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
+
+ cpu_cnt = libbpf_num_possible_cpus();
+ pfd_array = malloc(sizeof(int) * cpu_cnt);
+ if (!pfd_array) {
+ cpu_cnt = 0;
+ return 1;
+ }
+
+ for (cpu = 0; cpu < cpu_cnt; cpu++) {
+ pfd_array[cpu] = syscall(__NR_perf_event_open, &attr,
+ -1, cpu, -1, PERF_FLAG_FD_CLOEXEC);
+ if (pfd_array[cpu] < 0)
+ break;
+ }
+
+ return cpu == 0;
+}
+
+static void close_perf_events(void)
+{
+ int cpu, fd;
+
+ for (cpu = 0; cpu < cpu_cnt; cpu++) {
+ fd = pfd_array[cpu];
+ if (fd < 0)
+ break;
+ close(fd);
+ }
+ free(pfd_array);
+}
+
+void serial_test_get_branch_snapshot(void)
+{
+ struct get_branch_snapshot *skel = NULL;
+ int err;
+
+ /* Skip the test before we fix LBR snapshot for hypervisor. */
+ if (is_hypervisor()) {
+ test__skip();
+ return;
+ }
+
+ if (create_perf_events()) {
+ test__skip(); /* system doesn't support LBR */
+ goto cleanup;
+ }
+
+ skel = get_branch_snapshot__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_branch_snapshot__open_and_load"))
+ goto cleanup;
+
+ err = kallsyms_find("bpf_testmod_loop_test", &skel->bss->address_low);
+ if (!ASSERT_OK(err, "kallsyms_find"))
+ goto cleanup;
+
+ /* Just a guess for the end of this function, as module functions
+ * in /proc/kallsyms could come in any order.
+ */
+ skel->bss->address_high = skel->bss->address_low + 128;
+
+ err = get_branch_snapshot__attach(skel);
+ if (!ASSERT_OK(err, "get_branch_snapshot__attach"))
+ goto cleanup;
+
+ trigger_module_test_read(100);
+
+ if (skel->bss->total_entries < 16) {
+ /* too few entries for the hit/waste test */
+ test__skip();
+ goto cleanup;
+ }
+
+ ASSERT_GT(skel->bss->test1_hits, 6, "find_looptest_in_lbr");
+
+ /* Given we stop LBR in software, we will waste a few entries.
+ * But we should try to waste as few as possible entries. We are at
+ * about 7 on x86_64 systems.
+ * Add a check for < 10 so that we get heads-up when something
+ * changes and wastes too many entries.
+ */
+ ASSERT_LT(skel->bss->wasted_entries, 10, "check_wasted_entries");
+
+cleanup:
+ get_branch_snapshot__destroy(skel);
+ close_perf_events();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
new file mode 100644
index 000000000000..64a9c95d4acf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_args_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_args_test.skel.h"
+
+void test_get_func_args_test(void)
+{
+ struct get_func_args_test *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_args_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_args_test__open_and_load"))
+ return;
+
+ err = get_func_args_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_args_test__attach"))
+ goto cleanup;
+
+ /* This runs bpf_fentry_test* functions and triggers
+ * fentry/fexit programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ /* This runs bpf_modify_return_test function and triggers
+ * fmod_ret_test and fexit_test programs.
+ */
+ prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(topts.retval >> 16, 1, "test_run");
+ ASSERT_EQ(topts.retval & 0xffff, 1234 + 29, "test_run");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+
+cleanup:
+ get_func_args_test__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
new file mode 100644
index 000000000000..c40242dfa8fb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_func_ip_test.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "get_func_ip_test.skel.h"
+#include "get_func_ip_uprobe_test.skel.h"
+
+static noinline void uprobe_trigger(void)
+{
+}
+
+static void test_function_entry(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ err = get_func_ip_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ skel->bss->uprobe_trigger = (unsigned long) uprobe_trigger;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ prog_fd = bpf_program__fd(skel->progs.test5);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "test_run");
+
+ uprobe_trigger();
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+ ASSERT_EQ(skel->bss->test2_result, 1, "test2_result");
+ ASSERT_EQ(skel->bss->test3_result, 1, "test3_result");
+ ASSERT_EQ(skel->bss->test4_result, 1, "test4_result");
+ ASSERT_EQ(skel->bss->test5_result, 1, "test5_result");
+ ASSERT_EQ(skel->bss->test7_result, 1, "test7_result");
+ ASSERT_EQ(skel->bss->test8_result, 1, "test8_result");
+
+cleanup:
+ get_func_ip_test__destroy(skel);
+}
+
+#ifdef __x86_64__
+extern void uprobe_trigger_body(void);
+asm(
+".globl uprobe_trigger_body\n"
+".type uprobe_trigger_body, @function\n"
+"uprobe_trigger_body:\n"
+" nop\n"
+" ret\n"
+);
+
+static void test_function_body_kprobe(void)
+{
+ struct get_func_ip_test *skel = NULL;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ LIBBPF_OPTS(bpf_kprobe_opts, kopts);
+ struct bpf_link *link6 = NULL;
+ int err, prog_fd;
+
+ skel = get_func_ip_test__open();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_test__open"))
+ return;
+
+ /* test6 is x86_64 specific and is disabled by default,
+ * enable it for body test.
+ */
+ bpf_program__set_autoload(skel->progs.test6, true);
+
+ err = get_func_ip_test__load(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__load"))
+ goto cleanup;
+
+ kopts.offset = skel->kconfig->CONFIG_X86_KERNEL_IBT ? 9 : 5;
+
+ link6 = bpf_program__attach_kprobe_opts(skel->progs.test6, "bpf_fentry_test6", &kopts);
+ if (!ASSERT_OK_PTR(link6, "link6"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->test6_result, 1, "test6_result");
+
+cleanup:
+ bpf_link__destroy(link6);
+ get_func_ip_test__destroy(skel);
+}
+
+static void test_function_body_uprobe(void)
+{
+ struct get_func_ip_uprobe_test *skel = NULL;
+ int err;
+
+ skel = get_func_ip_uprobe_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "get_func_ip_uprobe_test__open_and_load"))
+ return;
+
+ err = get_func_ip_uprobe_test__attach(skel);
+ if (!ASSERT_OK(err, "get_func_ip_test__attach"))
+ goto cleanup;
+
+ skel->bss->uprobe_trigger_body = (unsigned long) uprobe_trigger_body;
+
+ uprobe_trigger_body();
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "test1_result");
+
+cleanup:
+ get_func_ip_uprobe_test__destroy(skel);
+}
+
+static void test_function_body(void)
+{
+ test_function_body_kprobe();
+ test_function_body_uprobe();
+}
+#else
+#define test_function_body()
+#endif
+
+void test_get_func_ip_test(void)
+{
+ test_function_entry();
+ test_function_body();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
index 925722217edf..858e0575f502 100644
--- a/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/get_stack_raw_tp.c
@@ -24,12 +24,15 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
{
bool good_kern_stack = false, good_user_stack = false;
const char *nonjit_func = "___bpf_prog_run";
- struct get_stack_trace_t *e = data;
+ /* perfbuf-submitted data is 4-byte aligned, but we need 8-byte
+ * alignment, so copy data into a local variable, for simplicity
+ */
+ struct get_stack_trace_t e;
int i, num_stack;
- static __u64 cnt;
struct ksym *ks;
- cnt++;
+ memset(&e, 0, sizeof(e));
+ memcpy(&e, data, size <= sizeof(e) ? size : sizeof(e));
if (size < sizeof(struct get_stack_trace_t)) {
__u64 *raw_data = data;
@@ -57,19 +60,19 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
good_user_stack = true;
}
} else {
- num_stack = e->kern_stack_size / sizeof(__u64);
+ num_stack = e.kern_stack_size / sizeof(__u64);
if (env.jit_enabled) {
good_kern_stack = num_stack > 0;
} else {
for (i = 0; i < num_stack; i++) {
- ks = ksym_search(e->kern_stack[i]);
+ ks = ksym_search(e.kern_stack[i]);
if (ks && (strcmp(ks->name, nonjit_func) == 0)) {
good_kern_stack = true;
break;
}
}
}
- if (e->user_stack_size > 0 && e->user_stack_buildid_size > 0)
+ if (e.user_stack_size > 0 && e.user_stack_buildid_size > 0)
good_user_stack = true;
}
@@ -81,11 +84,10 @@ static void get_stack_print_output(void *ctx, int cpu, void *data, __u32 size)
void test_get_stack_raw_tp(void)
{
- const char *file = "./test_get_stack_rawtp.o";
- const char *file_err = "./test_get_stack_rawtp_err.o";
- const char *prog_name = "raw_tracepoint/sys_enter";
+ const char *file = "./test_get_stack_rawtp.bpf.o";
+ const char *file_err = "./test_get_stack_rawtp_err.bpf.o";
+ const char *prog_name = "bpf_prog1";
int i, err, prog_fd, exp_cnt = MAX_CNT_RAWTP;
- struct perf_buffer_opts pb_opts = {};
struct perf_buffer *pb = NULL;
struct bpf_link *link = NULL;
struct timespec tv = {0, 10};
@@ -94,15 +96,15 @@ void test_get_stack_raw_tp(void)
struct bpf_map *map;
cpu_set_t cpu_set;
- err = bpf_prog_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file_err, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err >= 0, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
goto close_prog;
@@ -121,12 +123,12 @@ void test_get_stack_raw_tp(void)
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
- pb_opts.sample_cb = get_stack_print_output;
- pb = perf_buffer__new(bpf_map__fd(map), 8, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(map), 8, get_stack_print_output,
+ NULL, NULL, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
/* trigger some syscall action */
@@ -141,9 +143,7 @@ void test_get_stack_raw_tp(void)
}
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ bpf_link__destroy(link);
+ perf_buffer__free(pb);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
new file mode 100644
index 000000000000..2715c68301f5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/get_stackid_cannot_attach.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <test_progs.h>
+#include "test_stacktrace_build_id.skel.h"
+
+void test_get_stackid_cannot_attach(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 1,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .sample_period = 5000,
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct test_stacktrace_build_id *skel;
+ __u32 duration = 0;
+ int pmu_fd, err;
+
+ skel = test_stacktrace_build_id__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ /* override program type */
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
+
+ err = test_stacktrace_build_id__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0 && (errno == ENOENT || errno == EOPNOTSUPP)) {
+ printf("%s:SKIP:cannot open PERF_COUNT_HW_CPU_CYCLES with precise_ip > 0\n",
+ __func__);
+ test__skip();
+ goto cleanup;
+ }
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_no_callchain");
+ close(pmu_fd);
+
+ /* add PERF_SAMPLE_CALLCHAIN, attach should succeed */
+ attr.sample_type |= PERF_SAMPLE_CALLCHAIN;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event_callchain");
+ bpf_link__destroy(skel->links.oncpu);
+ close(pmu_fd);
+
+ /* add exclude_callchain_kernel, attach should fail */
+ attr.exclude_callchain_kernel = 1;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+
+ if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d\n",
+ pmu_fd, errno))
+ goto cleanup;
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ ASSERT_ERR_PTR(skel->links.oncpu, "attach_perf_event_exclude_callchain_kernel");
+ close(pmu_fd);
+
+cleanup:
+ test_stacktrace_build_id__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index e3cb62b0a110..fadfb64e2a71 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -5,7 +5,7 @@
static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{
int i, err, map_fd;
- uint64_t num;
+ __u64 num;
map_fd = bpf_find_map(__func__, obj, "result_number");
if (CHECK_FAIL(map_fd < 0))
@@ -14,7 +14,7 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
struct {
char *name;
uint32_t key;
- uint64_t num;
+ __u64 num;
} tests[] = {
{ "relocate .bss reference", 0, 0 },
{ "relocate .data reference", 1, 42 },
@@ -29,10 +29,10 @@ static void test_global_data_number(struct bpf_object *obj, __u32 duration)
{ "relocate .rodata reference", 10, ~0 },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &num);
CHECK(err || num != tests[i].num, tests[i].name,
- "err %d result %lx expected %lx\n",
+ "err %d result %llx expected %llx\n",
err, num, tests[i].num);
}
}
@@ -58,7 +58,7 @@ static void test_global_data_string(struct bpf_object *obj, __u32 duration)
{ "relocate .bss reference", 4, "\0\0hello" },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, str);
CHECK(err || memcmp(str, tests[i].str, sizeof(str)),
tests[i].name, "err %d result \'%s\' expected \'%s\'\n",
@@ -92,7 +92,7 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
{ "relocate .data reference", 3, { 41, 0xeeeeefef, 0x2111111111111111ULL, } },
};
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
err = bpf_map_lookup_elem(map_fd, &tests[i].key, &val);
CHECK(err || memcmp(&val, &tests[i].val, sizeof(val)),
tests[i].name, "err %d result { %u, %u, %llu } expected { %u, %u, %llu }\n",
@@ -103,18 +103,25 @@ static void test_global_data_struct(struct bpf_object *obj, __u32 duration)
static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
{
int err = -ENOMEM, map_fd, zero = 0;
- struct bpf_map *map;
+ struct bpf_map *map, *map2;
__u8 *buff;
map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
- if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
+ if (!ASSERT_OK_PTR(map, "map"))
+ return;
+ if (!ASSERT_TRUE(bpf_map__is_internal(map), "is_internal"))
+ return;
+
+ /* ensure we can lookup internal maps by their ELF names */
+ map2 = bpf_object__find_map_by_name(obj, ".rodata");
+ if (!ASSERT_EQ(map, map2, "same_maps"))
return;
map_fd = bpf_map__fd(map);
if (CHECK_FAIL(map_fd < 0))
return;
- buff = malloc(bpf_map__def(map)->value_size);
+ buff = malloc(bpf_map__value_size(map));
if (buff)
err = bpf_map_update_elem(map_fd, &zero, buff, 0);
free(buff);
@@ -124,25 +131,27 @@ static void test_global_data_rdonly(struct bpf_object *obj, __u32 duration)
void test_global_data(void)
{
- const char *file = "./test_global_data.o";
- __u32 duration = 0, retval;
+ const char *file = "./test_global_data.bpf.o";
struct bpf_object *obj;
int err, prog_fd;
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
- if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load program"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "pass global data run",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "pass global data run err");
+ ASSERT_OK(topts.retval, "pass global data run retval");
- test_global_data_number(obj, duration);
- test_global_data_string(obj, duration);
- test_global_data_struct(obj, duration);
- test_global_data_rdonly(obj, duration);
+ test_global_data_number(obj, topts.duration);
+ test_global_data_string(obj, topts.duration);
+ test_global_data_struct(obj, topts.duration);
+ test_global_data_rdonly(obj, topts.duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data_init.c b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
index 3bdaa5a40744..8466332d7406 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data_init.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data_init.c
@@ -3,7 +3,7 @@
void test_global_data_init(void)
{
- const char *file = "./test_global_data.o";
+ const char *file = "./test_global_data.bpf.o";
int err = -ENOMEM, map_fd, zero = 0;
__u8 *buff = NULL, *newval = NULL;
struct bpf_object *obj;
@@ -12,14 +12,15 @@ void test_global_data_init(void)
size_t sz;
obj = bpf_object__open_file(file, NULL);
- if (CHECK_FAIL(!obj))
+ err = libbpf_get_error(obj);
+ if (CHECK_FAIL(err))
return;
- map = bpf_object__find_map_by_name(obj, "test_glo.rodata");
+ map = bpf_object__find_map_by_name(obj, ".rodata");
if (CHECK_FAIL(!map || !bpf_map__is_internal(map)))
goto out;
- sz = bpf_map__def(map)->value_size;
+ sz = bpf_map__value_size(map);
newval = malloc(sz);
if (CHECK_FAIL(!newval))
goto out;
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_args.c b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
new file mode 100644
index 000000000000..d997099f62d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_args.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "test_progs.h"
+#include "network_helpers.h"
+
+static __u32 duration;
+
+static void test_global_func_args0(struct bpf_object *obj)
+{
+ int err, i, map_fd, actual_value;
+ const char *map_name = "values";
+
+ map_fd = bpf_find_map(__func__, obj, map_name);
+ if (CHECK(map_fd < 0, "bpf_find_map", "cannot find BPF map %s: %s\n",
+ map_name, strerror(errno)))
+ return;
+
+ struct {
+ const char *descr;
+ int expected_value;
+ } tests[] = {
+ {"passing NULL pointer", 0},
+ {"returning value", 1},
+ {"reading local variable", 100 },
+ {"writing local variable", 101 },
+ {"reading global variable", 42 },
+ {"writing global variable", 43 },
+ {"writing to pointer-to-pointer", 1 },
+ };
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ const int expected_value = tests[i].expected_value;
+
+ err = bpf_map_lookup_elem(map_fd, &i, &actual_value);
+
+ CHECK(err || actual_value != expected_value, tests[i].descr,
+ "err %d result %d expected %d\n", err, actual_value, expected_value);
+ }
+}
+
+void test_global_func_args(void)
+{
+ const char *file = "./test_global_func_args.bpf.o";
+ struct bpf_object *obj;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ if (CHECK(err, "load program", "error %d loading %s\n", err, file))
+ return;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+
+ test_global_func_args0(obj);
+
+ bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
new file mode 100644
index 000000000000..65309894b27a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "verifier_global_subprogs.skel.h"
+#include "freplace_dead_global_func.skel.h"
+
+void test_global_func_dead_code(void)
+{
+ struct verifier_global_subprogs *tgt_skel = NULL;
+ struct freplace_dead_global_func *skel = NULL;
+ char log_buf[4096];
+ int err, tgt_fd;
+
+ /* first, try to load target with good global subprog */
+ tgt_skel = verifier_global_subprogs__open();
+ if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open"))
+ return;
+
+ bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true);
+
+ err = verifier_global_subprogs__load(tgt_skel);
+ if (!ASSERT_OK(err, "tgt_skel_good_load"))
+ goto out;
+
+ tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success);
+
+ /* Attach to good non-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_good_open"))
+ goto out;
+
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good");
+ ASSERT_OK(err, "attach_target_good");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_OK(err, "skel_good_load"))
+ goto out;
+
+ freplace_dead_global_func__destroy(skel);
+
+ /* Try attaching to dead code-eliminated subprog */
+ skel = freplace_dead_global_func__open();
+ if (!ASSERT_OK_PTR(skel, "skel_dead_open"))
+ goto out;
+
+ bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf));
+ err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead");
+ ASSERT_OK(err, "attach_target_dead");
+
+ err = freplace_dead_global_func__load(skel);
+ if (!ASSERT_ERR(err, "skel_dead_load"))
+ goto out;
+
+ ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg");
+
+out:
+ verifier_global_subprogs__destroy(tgt_skel);
+ freplace_dead_global_func__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/global_map_resize.c b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
new file mode 100644
index 000000000000..56b5baef35c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/global_map_resize.c
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "test_global_map_resize.skel.h"
+#include "test_progs.h"
+
+static void run_prog_bss_array_sum(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void run_prog_data_array_sum(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void global_map_resize_bss_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sizeof(skel->bss->sum) + sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz, new_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->bss->array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.bss;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->bss->array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->bss = bpf_map__initial_value(skel->maps.bss, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->bss, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->bss->array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = array_len;
+ skel->rodata->data_array_len = 1;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_bss_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_data_subtest(void)
+{
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ const __u32 desired_sz = sysconf(_SC_PAGE_SIZE) * 2;
+ size_t array_len, actual_sz, new_sz;
+ int err;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ goto teardown;
+
+ /* set some initial value before resizing.
+ * it is expected this non-zero value will be preserved
+ * while resizing.
+ */
+ skel->data_custom->my_array[0] = 1;
+
+ /* resize map value and verify the new size */
+ map = skel->maps.data_custom;
+ err = bpf_map__set_value_size(map, desired_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_value_size"))
+ goto teardown;
+ if (!ASSERT_EQ(bpf_map__value_size(map), desired_sz, "resize"))
+ goto teardown;
+
+ new_sz = sizeof(skel->data_percpu_arr->percpu_arr[0]) * libbpf_num_possible_cpus();
+ err = bpf_map__set_value_size(skel->maps.data_percpu_arr, new_sz);
+ ASSERT_OK(err, "percpu_arr_resize");
+
+ /* set the expected number of elements based on the resized array */
+ array_len = (desired_sz - sizeof(skel->bss->sum)) / sizeof(skel->data_custom->my_array[0]);
+ if (!ASSERT_GT(array_len, 1, "array_len"))
+ goto teardown;
+
+ skel->data_custom = bpf_map__initial_value(skel->maps.data_custom, &actual_sz);
+ if (!ASSERT_OK_PTR(skel->data_custom, "bpf_map__initial_value (ptr)"))
+ goto teardown;
+ if (!ASSERT_EQ(actual_sz, desired_sz, "bpf_map__initial_value (size)"))
+ goto teardown;
+
+ /* fill the newly resized array with ones,
+ * skipping the first element which was previously set
+ */
+ for (int i = 1; i < array_len; i++)
+ skel->data_custom->my_array[i] = 1;
+
+ /* set global const values before loading */
+ skel->rodata->pid = getpid();
+ skel->rodata->bss_array_len = 1;
+ skel->rodata->data_array_len = array_len;
+
+ err = test_global_map_resize__load(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__load"))
+ goto teardown;
+ err = test_global_map_resize__attach(skel);
+ if (!ASSERT_OK(err, "test_global_map_resize__attach"))
+ goto teardown;
+
+ /* run the bpf program which will sum the contents of the array.
+ * since the array was filled with ones,verify the sum equals array_len
+ */
+ run_prog_data_array_sum();
+ if (!ASSERT_EQ(skel->bss->sum, array_len, "sum"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+static void global_map_resize_invalid_subtest(void)
+{
+ int err;
+ struct test_global_map_resize *skel;
+ struct bpf_map *map;
+ __u32 element_sz, desired_sz;
+
+ skel = test_global_map_resize__open();
+ if (!ASSERT_OK_PTR(skel, "test_global_map_resize__open"))
+ return;
+
+ /* attempt to resize a global datasec map to size
+ * which does NOT align with array
+ */
+ map = skel->maps.data_custom;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.custom initial btf"))
+ goto teardown;
+ /* set desired size a fraction of element size beyond an aligned size */
+ element_sz = sizeof(skel->data_custom->my_array[0]);
+ desired_sz = element_sz + element_sz / 2;
+ /* confirm desired size does NOT align with array */
+ if (!ASSERT_NEQ(desired_sz % element_sz, 0, "my_array alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.custom bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.custom clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.custom clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map whose only var is NOT an array */
+ map = skel->maps.data_non_array;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array initial btf"))
+ goto teardown;
+ /* set desired size to arbitrary value */
+ desired_sz = 1024;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.non_array bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.non_array clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.non_array clear btf val"))
+ goto teardown;
+
+ /* attempt to resize a global datasec map
+ * whose last var is NOT an array
+ */
+ map = skel->maps.data_array_not_last;
+ if (!ASSERT_NEQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last initial btf"))
+ goto teardown;
+ /* set desired size to a multiple of element size */
+ element_sz = sizeof(skel->data_array_not_last->my_array_first[0]);
+ desired_sz = element_sz * 8;
+ /* confirm desired size aligns with array */
+ if (!ASSERT_EQ(desired_sz % element_sz, 0, "my_array_first alignment"))
+ goto teardown;
+ err = bpf_map__set_value_size(map, desired_sz);
+ /* confirm resize is OK but BTF info is cleared */
+ if (!ASSERT_OK(err, ".data.array_not_last bpf_map__set_value_size") ||
+ !ASSERT_EQ(bpf_map__btf_key_type_id(map), 0, ".data.array_not_last clear btf key") ||
+ !ASSERT_EQ(bpf_map__btf_value_type_id(map), 0, ".data.array_not_last clear btf val"))
+ goto teardown;
+
+teardown:
+ test_global_map_resize__destroy(skel);
+}
+
+void test_global_map_resize(void)
+{
+ if (test__start_subtest("global_map_resize_bss"))
+ global_map_resize_bss_subtest();
+
+ if (test__start_subtest("global_map_resize_data"))
+ global_map_resize_data_subtest();
+
+ if (test__start_subtest("global_map_resize_invalid"))
+ global_map_resize_invalid_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/hash_large_key.c b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
new file mode 100644
index 000000000000..34684c0fc76d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/hash_large_key.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_hash_large_key.skel.h"
+
+void test_hash_large_key(void)
+{
+ int err, value = 21, duration = 0, hash_map_fd;
+ struct test_hash_large_key *skel;
+
+ struct bigelement {
+ int a;
+ char b[4096];
+ long long c;
+ } key;
+ bzero(&key, sizeof(key));
+
+ skel = test_hash_large_key__open_and_load();
+ if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ return;
+
+ hash_map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (CHECK(hash_map_fd < 0, "bpf_map__fd", "failed\n"))
+ goto cleanup;
+
+ err = test_hash_large_key__attach(skel);
+ if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+ goto cleanup;
+
+ err = bpf_map_update_elem(hash_map_fd, &key, &value, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "errno=%d\n", errno))
+ goto cleanup;
+
+ key.c = 1;
+ err = bpf_map_lookup_elem(hash_map_fd, &key, &value);
+ if (CHECK(err, "bpf_map_lookup_elem", "errno=%d\n", errno))
+ goto cleanup;
+
+ CHECK_FAIL(value != 42);
+
+cleanup:
+ test_hash_large_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
index 428d488830c6..d358a223fd2d 100644
--- a/tools/testing/selftests/bpf/prog_tests/hashmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -7,17 +7,18 @@
*/
#include "test_progs.h"
#include "bpf/hashmap.h"
+#include <stddef.h>
static int duration = 0;
-static size_t hash_fn(const void *k, void *ctx)
+static size_t hash_fn(long k, void *ctx)
{
- return (long)k;
+ return k;
}
-static bool equal_fn(const void *a, const void *b, void *ctx)
+static bool equal_fn(long a, long b, void *ctx)
{
- return (long)a == (long)b;
+ return a == b;
}
static inline size_t next_pow_2(size_t n)
@@ -48,13 +49,12 @@ static void test_hashmap_generic(void)
struct hashmap *map;
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(1024 + i);
+ long oldk, k = i;
+ long oldv, v = 1024 + i;
err = hashmap__update(map, k, v, &oldk, &oldv);
if (CHECK(err != -ENOENT, "hashmap__update",
@@ -65,20 +65,18 @@ static void test_hashmap_generic(void)
err = hashmap__add(map, k, v);
} else {
err = hashmap__set(map, k, v, &oldk, &oldv);
- if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
- "unexpected k/v: %p=%p\n", oldk, oldv))
+ if (CHECK(oldk != 0 || oldv != 0, "check_kv",
+ "unexpected k/v: %ld=%ld\n", oldk, oldv))
goto cleanup;
}
- if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n", k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
- if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ if (CHECK(oldv != v, "elem_val", "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -92,8 +90,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 1024, "check_kv",
@@ -105,8 +103,8 @@ static void test_hashmap_generic(void)
goto cleanup;
for (i = 0; i < ELEM_CNT; i++) {
- const void *oldk, *k = (const void *)(long)i;
- void *oldv, *v = (void *)(long)(256 + i);
+ long oldk, k = i;
+ long oldv, v = 256 + i;
err = hashmap__add(map, k, v);
if (CHECK(err != -EEXIST, "hashmap__add",
@@ -120,13 +118,13 @@ static void test_hashmap_generic(void)
if (CHECK(err, "elem_upd",
"failed to update k/v %ld = %ld: %d\n",
- (long)k, (long)v, err))
+ k, v, err))
goto cleanup;
if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
- "failed to find key %ld\n", (long)k))
+ "failed to find key %ld\n", k))
goto cleanup;
if (CHECK(oldv != v, "elem_val",
- "found value is wrong: %ld\n", (long)oldv))
+ "found value is wrong: %ld\n", oldv))
goto cleanup;
}
@@ -140,8 +138,8 @@ static void test_hashmap_generic(void)
found_msk = 0;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- long k = (long)entry->key;
- long v = (long)entry->value;
+ long k = entry->key;
+ long v = entry->value;
found_msk |= 1ULL << k;
if (CHECK(v - k != 256, "elem_check",
@@ -153,7 +151,7 @@ static void test_hashmap_generic(void)
goto cleanup;
found_cnt = 0;
- hashmap__for_each_key_entry(map, entry, (void *)0) {
+ hashmap__for_each_key_entry(map, entry, 0) {
found_cnt++;
}
if (CHECK(!found_cnt, "found_cnt",
@@ -162,27 +160,25 @@ static void test_hashmap_generic(void)
found_msk = 0;
found_cnt = 0;
- hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
- const void *oldk, *k;
- void *oldv, *v;
+ hashmap__for_each_key_entry_safe(map, entry, tmp, 0) {
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "check_old",
"invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)oldk, (long)oldv))
+ "unexpectedly deleted k/v %ld = %ld\n", oldk, oldv))
goto cleanup;
}
@@ -199,26 +195,24 @@ static void test_hashmap_generic(void)
goto cleanup;
hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
- const void *oldk, *k;
- void *oldv, *v;
+ long oldk, k;
+ long oldv, v;
k = entry->key;
v = entry->value;
found_cnt++;
- found_msk |= 1ULL << (long)k;
+ found_msk |= 1ULL << k;
if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "failed to delete k/v %ld = %ld\n",
- (long)k, (long)v))
+ "failed to delete k/v %ld = %ld\n", k, v))
goto cleanup;
if (CHECK(oldk != k || oldv != v, "elem_check",
"invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
- (long)k, (long)v, (long)oldk, (long)oldv))
+ k, v, oldk, oldv))
goto cleanup;
if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
- "unexpectedly deleted k/v %ld = %ld\n",
- (long)k, (long)v))
+ "unexpectedly deleted k/v %ld = %ld\n", k, v))
goto cleanup;
}
@@ -236,7 +230,7 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
goto cleanup;
}
@@ -244,22 +238,107 @@ static void test_hashmap_generic(void)
hashmap__for_each_entry(map, entry, bkt) {
CHECK(false, "elem_exists",
"unexpected map entries left: %ld = %ld\n",
- (long)entry->key, (long)entry->value);
+ entry->key, entry->value);
+ goto cleanup;
+ }
+
+cleanup:
+ hashmap__free(map);
+}
+
+static size_t str_hash_fn(long a, void *ctx)
+{
+ return str_hash((char *)a);
+}
+
+static bool str_equal_fn(long a, long b, void *ctx)
+{
+ return strcmp((char *)a, (char *)b) == 0;
+}
+
+/* Verify that hashmap interface works with pointer keys and values */
+static void test_hashmap_ptr_iface(void)
+{
+ const char *key, *value, *old_key, *old_value;
+ struct hashmap_entry *cur;
+ struct hashmap *map;
+ int err, i, bkt;
+
+ map = hashmap__new(str_hash_fn, str_equal_fn, NULL);
+ if (CHECK(!map, "hashmap__new", "can't allocate hashmap\n"))
goto cleanup;
+
+#define CHECK_STR(fn, var, expected) \
+ CHECK(strcmp(var, (expected)), (fn), \
+ "wrong value of " #var ": '%s' instead of '%s'\n", var, (expected))
+
+ err = hashmap__insert(map, "a", "apricot", HASHMAP_ADD, NULL, NULL);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__insert(map, "a", "apple", HASHMAP_SET, &old_key, &old_value);
+ if (CHECK(err, "hashmap__insert", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "a");
+ CHECK_STR("hashmap__update", old_value, "apricot");
+
+ err = hashmap__add(map, "b", "banana");
+ if (CHECK(err, "hashmap__add", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ err = hashmap__set(map, "b", "breadfruit", &old_key, &old_value);
+ if (CHECK(err, "hashmap__set", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__set", old_key, "b");
+ CHECK_STR("hashmap__set", old_value, "banana");
+
+ err = hashmap__update(map, "b", "blueberry", &old_key, &old_value);
+ if (CHECK(err, "hashmap__update", "unexpected error: %d\n", err))
+ goto cleanup;
+ CHECK_STR("hashmap__update", old_key, "b");
+ CHECK_STR("hashmap__update", old_value, "breadfruit");
+
+ err = hashmap__append(map, "c", "cherry");
+ if (CHECK(err, "hashmap__append", "unexpected error: %d\n", err))
+ goto cleanup;
+
+ if (CHECK(!hashmap__delete(map, "c", &old_key, &old_value),
+ "hashmap__delete", "expected to have entry for 'c'\n"))
+ goto cleanup;
+ CHECK_STR("hashmap__delete", old_key, "c");
+ CHECK_STR("hashmap__delete", old_value, "cherry");
+
+ CHECK(!hashmap__find(map, "b", &value), "hashmap__find", "can't find value for 'b'\n");
+ CHECK_STR("hashmap__find", value, "blueberry");
+
+ if (CHECK(!hashmap__delete(map, "b", NULL, NULL),
+ "hashmap__delete", "expected to have entry for 'b'\n"))
+ goto cleanup;
+
+ i = 0;
+ hashmap__for_each_entry(map, cur, bkt) {
+ if (CHECK(i != 0, "hashmap__for_each_entry", "too many entries"))
+ goto cleanup;
+ key = cur->pkey;
+ value = cur->pvalue;
+ CHECK_STR("entry", key, "a");
+ CHECK_STR("entry", value, "apple");
+ i++;
}
+#undef CHECK_STR
cleanup:
hashmap__free(map);
}
-static size_t collision_hash_fn(const void *k, void *ctx)
+static size_t collision_hash_fn(long k, void *ctx)
{
return 0;
}
static void test_hashmap_multimap(void)
{
- void *k1 = (void *)0, *k2 = (void *)1;
+ long k1 = 0, k2 = 1;
struct hashmap_entry *entry;
struct hashmap *map;
long found_msk;
@@ -267,31 +346,30 @@ static void test_hashmap_multimap(void)
/* force collisions */
map = hashmap__new(collision_hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
return;
/* set up multimap:
* [0] -> 1, 2, 4;
* [1] -> 8, 16, 32;
*/
- err = hashmap__append(map, k1, (void *)1);
+ err = hashmap__append(map, k1, 1);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)2);
+ err = hashmap__append(map, k1, 2);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k1, (void *)4);
+ err = hashmap__append(map, k1, 4);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)8);
+ err = hashmap__append(map, k2, 8);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)16);
+ err = hashmap__append(map, k2, 16);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
- err = hashmap__append(map, k2, (void *)32);
+ err = hashmap__append(map, k2, 32);
if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
goto cleanup;
@@ -302,7 +380,7 @@ static void test_hashmap_multimap(void)
/* verify global iteration still works and sees all values */
found_msk = 0;
hashmap__for_each_entry(map, entry, bkt) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
"not all keys iterated: %lx\n", found_msk))
@@ -311,7 +389,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 1 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k1) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
"invalid k1 values: %lx\n", found_msk))
@@ -320,7 +398,7 @@ static void test_hashmap_multimap(void)
/* iterate values for key 2 */
found_msk = 0;
hashmap__for_each_key_entry(map, entry, k2) {
- found_msk |= (long)entry->value;
+ found_msk |= entry->value;
}
if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
"invalid k2 values: %lx\n", found_msk))
@@ -335,12 +413,11 @@ static void test_hashmap_empty()
struct hashmap_entry *entry;
int bkt;
struct hashmap *map;
- void *k = (void *)0;
+ long k = 0;
/* force collisions */
map = hashmap__new(hash_fn, equal_fn, NULL);
- if (CHECK(IS_ERR(map), "hashmap__new",
- "failed to create map: %ld\n", PTR_ERR(map)))
+ if (!ASSERT_OK_PTR(map, "hashmap__new"))
goto cleanup;
if (CHECK(hashmap__size(map) != 0, "hashmap__size",
@@ -377,4 +454,6 @@ void test_hashmap()
test_hashmap_multimap();
if (test__start_subtest("empty"))
test_hashmap_empty();
+ if (test__start_subtest("ptr_iface"))
+ test_hashmap_ptr_iface();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/helper_restricted.c b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
new file mode 100644
index 000000000000..0354f9b82c65
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/helper_restricted.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_helper_restricted.skel.h"
+
+void test_helper_restricted(void)
+{
+ int prog_i = 0, prog_cnt;
+
+ do {
+ struct test_helper_restricted *test;
+ int err;
+
+ test = test_helper_restricted__open();
+ if (!ASSERT_OK_PTR(test, "open"))
+ return;
+
+ prog_cnt = test->skeleton->prog_cnt;
+
+ for (int j = 0; j < prog_cnt; ++j) {
+ struct bpf_program *prog = *test->skeleton->progs[j].prog;
+
+ bpf_program__set_autoload(prog, true);
+ }
+
+ err = test_helper_restricted__load(test);
+ ASSERT_ERR(err, "load_should_fail");
+
+ test_helper_restricted__destroy(test);
+ } while (++prog_i < prog_cnt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_reuse.c b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
new file mode 100644
index 000000000000..a742dd994d60
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_reuse.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_reuse.skel.h"
+
+struct htab_op_ctx {
+ int fd;
+ int loop;
+ bool stop;
+};
+
+struct htab_val {
+ unsigned int lock;
+ unsigned int data;
+};
+
+static void *htab_lookup_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ struct htab_val value;
+ unsigned int key;
+
+ /* Use BPF_F_LOCK to use spin-lock in map value. */
+ key = 7;
+ bpf_map_lookup_elem_flags(ctx->fd, &key, &value, BPF_F_LOCK);
+ }
+
+ return NULL;
+}
+
+static void *htab_update_fn(void *arg)
+{
+ struct htab_op_ctx *ctx = arg;
+ int i = 0;
+
+ while (i++ < ctx->loop && !ctx->stop) {
+ struct htab_val value;
+ unsigned int key;
+
+ key = 7;
+ value.lock = 0;
+ value.data = key;
+ bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+ bpf_map_delete_elem(ctx->fd, &key);
+
+ key = 24;
+ value.lock = 0;
+ value.data = key;
+ bpf_map_update_elem(ctx->fd, &key, &value, BPF_F_LOCK);
+ bpf_map_delete_elem(ctx->fd, &key);
+ }
+
+ return NULL;
+}
+
+void test_htab_reuse(void)
+{
+ unsigned int i, wr_nr = 1, rd_nr = 4;
+ pthread_t tids[wr_nr + rd_nr];
+ struct htab_reuse *skel;
+ struct htab_op_ctx ctx;
+ int err;
+
+ skel = htab_reuse__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "htab_reuse__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.htab);
+ ctx.loop = 500;
+ ctx.stop = false;
+
+ memset(tids, 0, sizeof(tids));
+ for (i = 0; i < wr_nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+ for (i = 0; i < rd_nr; i++) {
+ err = pthread_create(&tids[i + wr_nr], NULL, htab_lookup_fn, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ ctx.stop = true;
+ goto reap;
+ }
+ }
+
+reap:
+ for (i = 0; i < wr_nr + rd_nr; i++) {
+ if (!tids[i])
+ continue;
+ pthread_join(tids[i], NULL);
+ }
+ htab_reuse__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/htab_update.c b/tools/testing/selftests/bpf/prog_tests/htab_update.c
new file mode 100644
index 000000000000..2bc85f4814f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/htab_update.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdbool.h>
+#include <test_progs.h>
+#include "htab_update.skel.h"
+
+struct htab_update_ctx {
+ int fd;
+ int loop;
+ bool stop;
+};
+
+static void test_reenter_update(void)
+{
+ struct htab_update *skel;
+ unsigned int key, value;
+ int err;
+
+ skel = htab_update__open();
+ if (!ASSERT_OK_PTR(skel, "htab_update__open"))
+ return;
+
+ /* lookup_elem_raw() may be inlined and find_kernel_btf_id() will return -ESRCH */
+ bpf_program__set_autoload(skel->progs.lookup_elem_raw, true);
+ err = htab_update__load(skel);
+ if (!ASSERT_TRUE(!err || err == -ESRCH, "htab_update__load") || err)
+ goto out;
+
+ skel->bss->pid = getpid();
+ err = htab_update__attach(skel);
+ if (!ASSERT_OK(err, "htab_update__attach"))
+ goto out;
+
+ /* Will trigger the reentrancy of bpf_map_update_elem() */
+ key = 0;
+ value = 0;
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.htab), &key, &value, 0);
+ if (!ASSERT_OK(err, "add element"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->update_err, -EBUSY, "no reentrancy");
+out:
+ htab_update__destroy(skel);
+}
+
+static void *htab_update_thread(void *arg)
+{
+ struct htab_update_ctx *ctx = arg;
+ cpu_set_t cpus;
+ int i;
+
+ /* Pinned on CPU 0 */
+ CPU_ZERO(&cpus);
+ CPU_SET(0, &cpus);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpus), &cpus);
+
+ i = 0;
+ while (i++ < ctx->loop && !ctx->stop) {
+ unsigned int key = 0, value = 0;
+ int err;
+
+ err = bpf_map_update_elem(ctx->fd, &key, &value, 0);
+ if (err) {
+ ctx->stop = true;
+ return (void *)(long)err;
+ }
+ }
+
+ return NULL;
+}
+
+static void test_concurrent_update(void)
+{
+ struct htab_update_ctx ctx;
+ struct htab_update *skel;
+ unsigned int i, nr;
+ pthread_t *tids;
+ int err;
+
+ skel = htab_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "htab_update__open_and_load"))
+ return;
+
+ ctx.fd = bpf_map__fd(skel->maps.htab);
+ ctx.loop = 1000;
+ ctx.stop = false;
+
+ nr = 4;
+ tids = calloc(nr, sizeof(*tids));
+ if (!ASSERT_NEQ(tids, NULL, "no mem"))
+ goto out;
+
+ for (i = 0; i < nr; i++) {
+ err = pthread_create(&tids[i], NULL, htab_update_thread, &ctx);
+ if (!ASSERT_OK(err, "pthread_create")) {
+ unsigned int j;
+
+ ctx.stop = true;
+ for (j = 0; j < i; j++)
+ pthread_join(tids[j], NULL);
+ goto out;
+ }
+ }
+
+ for (i = 0; i < nr; i++) {
+ void *thread_err = NULL;
+
+ pthread_join(tids[i], &thread_err);
+ ASSERT_EQ(thread_err, NULL, "update error");
+ }
+
+out:
+ if (tids)
+ free(tids);
+ htab_update__destroy(skel);
+}
+
+void test_htab_update(void)
+{
+ if (test__start_subtest("reenter_update"))
+ test_reenter_update();
+ if (test__start_subtest("concurrent_update"))
+ test_concurrent_update();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
new file mode 100644
index 000000000000..9ab4cd195108
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/inner_array_lookup.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "inner_array_lookup.skel.h"
+
+void test_inner_array_lookup(void)
+{
+ int map1_fd, err;
+ int key = 3;
+ int val = 1;
+ struct inner_array_lookup *skel;
+
+ skel = inner_array_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load_skeleton"))
+ return;
+
+ err = inner_array_lookup__attach(skel);
+ if (!ASSERT_OK(err, "skeleton_attach"))
+ goto cleanup;
+
+ map1_fd = bpf_map__fd(skel->maps.inner_map1);
+ bpf_map_update_elem(map1_fd, &key, &val, 0);
+
+ /* Probe should have set the element at index 3 to 2 */
+ bpf_map_lookup_elem(map1_fd, &key, &val);
+ ASSERT_EQ(val, 2, "value_is_2");
+
+cleanup:
+ inner_array_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
new file mode 100644
index 000000000000..8dd2af9081f4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ip_check_defrag.c
@@ -0,0 +1,283 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <net/if.h>
+#include <linux/netfilter.h>
+#include <network_helpers.h>
+#include "ip_check_defrag.skel.h"
+#include "ip_check_defrag_frags.h"
+
+/*
+ * This selftest spins up a client and an echo server, each in their own
+ * network namespace. The client will send a fragmented message to the server.
+ * The prog attached to the server will shoot down any fragments. Thus, if
+ * the server is able to correctly echo back the message to the client, we will
+ * have verified that netfilter is reassembling packets for us.
+ *
+ * Topology:
+ * =========
+ * NS0 | NS1
+ * |
+ * client | server
+ * ---------- | ----------
+ * | veth0 | --------- | veth1 |
+ * ---------- peer ----------
+ * |
+ * | with bpf
+ */
+
+#define NS0 "defrag_ns0"
+#define NS1 "defrag_ns1"
+#define VETH0 "veth0"
+#define VETH1 "veth1"
+#define VETH0_ADDR "172.16.1.100"
+#define VETH0_ADDR6 "fc00::100"
+/* The following constants must stay in sync with `generate_udp_fragments.py` */
+#define VETH1_ADDR "172.16.1.200"
+#define VETH1_ADDR6 "fc00::200"
+#define CLIENT_PORT 48878
+#define SERVER_PORT 48879
+#define MAGIC_MESSAGE "THIS IS THE ORIGINAL MESSAGE, PLEASE REASSEMBLE ME"
+
+static int setup_topology(bool ipv6)
+{
+ bool up;
+ int i;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip link add " VETH0 " netns " NS0 " type veth peer name " VETH1 " netns " NS1);
+ if (ipv6) {
+ SYS(fail, "ip -6 -net " NS0 " addr add " VETH0_ADDR6 "/64 dev " VETH0 " nodad");
+ SYS(fail, "ip -6 -net " NS1 " addr add " VETH1_ADDR6 "/64 dev " VETH1 " nodad");
+ } else {
+ SYS(fail, "ip -net " NS0 " addr add " VETH0_ADDR "/24 dev " VETH0);
+ SYS(fail, "ip -net " NS1 " addr add " VETH1_ADDR "/24 dev " VETH1);
+ }
+ SYS(fail, "ip -net " NS0 " link set dev " VETH0 " up");
+ SYS(fail, "ip -net " NS1 " link set dev " VETH1 " up");
+
+ /* Wait for up to 5s for links to come up */
+ for (i = 0; i < 5; ++i) {
+ if (ipv6)
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -6 -c 1 -W 1 " VETH1_ADDR6);
+ else
+ up = !SYS_NOFAIL("ip netns exec " NS0 " ping -c 1 -W 1 " VETH1_ADDR);
+
+ if (up)
+ break;
+ }
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void cleanup_topology(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+}
+
+static int attach(struct ip_check_defrag *skel, bool ipv6)
+{
+ LIBBPF_OPTS(bpf_netfilter_opts, opts,
+ .pf = ipv6 ? NFPROTO_IPV6 : NFPROTO_IPV4,
+ .priority = 42,
+ .flags = BPF_F_NETFILTER_IP_DEFRAG);
+ struct nstoken *nstoken;
+ int err = -1;
+
+ nstoken = open_netns(NS1);
+
+ skel->links.defrag = bpf_program__attach_netfilter(skel->progs.defrag, &opts);
+ if (!ASSERT_OK_PTR(skel->links.defrag, "program attach"))
+ goto out;
+
+ err = 0;
+out:
+ close_netns(nstoken);
+ return err;
+}
+
+static int send_frags(int client)
+{
+ struct sockaddr_storage saddr;
+ struct sockaddr *saddr_p;
+ socklen_t saddr_len;
+ int err;
+
+ saddr_p = (struct sockaddr *)&saddr;
+ err = make_sockaddr(AF_INET, VETH1_ADDR, SERVER_PORT, &saddr, &saddr_len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ return -1;
+
+ err = sendto(client, frag_0, sizeof(frag_0), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_0"))
+ return -1;
+
+ err = sendto(client, frag_1, sizeof(frag_1), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_1"))
+ return -1;
+
+ err = sendto(client, frag_2, sizeof(frag_2), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag_2"))
+ return -1;
+
+ return 0;
+}
+
+static int send_frags6(int client)
+{
+ struct sockaddr_storage saddr;
+ struct sockaddr *saddr_p;
+ socklen_t saddr_len;
+ int err;
+
+ saddr_p = (struct sockaddr *)&saddr;
+ /* Port needs to be set to 0 for raw ipv6 socket for some reason */
+ err = make_sockaddr(AF_INET6, VETH1_ADDR6, 0, &saddr, &saddr_len);
+ if (!ASSERT_OK(err, "make_sockaddr"))
+ return -1;
+
+ err = sendto(client, frag6_0, sizeof(frag6_0), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_0"))
+ return -1;
+
+ err = sendto(client, frag6_1, sizeof(frag6_1), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_1"))
+ return -1;
+
+ err = sendto(client, frag6_2, sizeof(frag6_2), 0, saddr_p, saddr_len);
+ if (!ASSERT_GE(err, 0, "sendto frag6_2"))
+ return -1;
+
+ return 0;
+}
+
+void test_bpf_ip_check_defrag_ok(bool ipv6)
+{
+ struct network_helper_opts rx_opts = {
+ .timeout_ms = 1000,
+ .noconnect = true,
+ };
+ struct network_helper_opts tx_ops = {
+ .timeout_ms = 1000,
+ .type = SOCK_RAW,
+ .proto = IPPROTO_RAW,
+ .noconnect = true,
+ };
+ struct sockaddr_storage caddr;
+ struct ip_check_defrag *skel;
+ struct nstoken *nstoken;
+ int client_tx_fd = -1;
+ int client_rx_fd = -1;
+ socklen_t caddr_len;
+ int srv_fd = -1;
+ char buf[1024];
+ int len, err;
+
+ skel = ip_check_defrag__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(setup_topology(ipv6), "setup_topology"))
+ goto out;
+
+ if (!ASSERT_OK(attach(skel, ipv6), "attach"))
+ goto out;
+
+ /* Start server in ns1 */
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns1"))
+ goto out;
+ srv_fd = start_server(ipv6 ? AF_INET6 : AF_INET, SOCK_DGRAM, NULL, SERVER_PORT, 0);
+ close_netns(nstoken);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto out;
+
+ /* Open tx raw socket in ns0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ client_tx_fd = connect_to_fd_opts(srv_fd, &tx_ops);
+ close_netns(nstoken);
+ if (!ASSERT_GE(client_tx_fd, 0, "connect_to_fd_opts"))
+ goto out;
+
+ /* Open rx socket in ns0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ client_rx_fd = connect_to_fd_opts(srv_fd, &rx_opts);
+ close_netns(nstoken);
+ if (!ASSERT_GE(client_rx_fd, 0, "connect_to_fd_opts"))
+ goto out;
+
+ /* Bind rx socket to a premeditated port */
+ memset(&caddr, 0, sizeof(caddr));
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns ns0"))
+ goto out;
+ if (ipv6) {
+ struct sockaddr_in6 *c = (struct sockaddr_in6 *)&caddr;
+
+ c->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, VETH0_ADDR6, &c->sin6_addr);
+ c->sin6_port = htons(CLIENT_PORT);
+ err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c));
+ } else {
+ struct sockaddr_in *c = (struct sockaddr_in *)&caddr;
+
+ c->sin_family = AF_INET;
+ inet_pton(AF_INET, VETH0_ADDR, &c->sin_addr);
+ c->sin_port = htons(CLIENT_PORT);
+ err = bind(client_rx_fd, (struct sockaddr *)c, sizeof(*c));
+ }
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "bind"))
+ goto out;
+
+ /* Send message in fragments */
+ if (ipv6) {
+ if (!ASSERT_OK(send_frags6(client_tx_fd), "send_frags6"))
+ goto out;
+ } else {
+ if (!ASSERT_OK(send_frags(client_tx_fd), "send_frags"))
+ goto out;
+ }
+
+ if (!ASSERT_EQ(skel->bss->shootdowns, 0, "shootdowns"))
+ goto out;
+
+ /* Receive reassembled msg on server and echo back to client */
+ caddr_len = sizeof(caddr);
+ len = recvfrom(srv_fd, buf, sizeof(buf), 0, (struct sockaddr *)&caddr, &caddr_len);
+ if (!ASSERT_GE(len, 0, "server recvfrom"))
+ goto out;
+ len = sendto(srv_fd, buf, len, 0, (struct sockaddr *)&caddr, caddr_len);
+ if (!ASSERT_GE(len, 0, "server sendto"))
+ goto out;
+
+ /* Expect reassembed message to be echoed back */
+ len = recvfrom(client_rx_fd, buf, sizeof(buf), 0, NULL, NULL);
+ if (!ASSERT_EQ(len, sizeof(MAGIC_MESSAGE) - 1, "client short read"))
+ goto out;
+
+out:
+ if (client_rx_fd != -1)
+ close(client_rx_fd);
+ if (client_tx_fd != -1)
+ close(client_tx_fd);
+ if (srv_fd != -1)
+ close(srv_fd);
+ cleanup_topology();
+ ip_check_defrag__destroy(skel);
+}
+
+void test_bpf_ip_check_defrag(void)
+{
+ if (test__start_subtest("v4"))
+ test_bpf_ip_check_defrag_ok(false);
+ if (test__start_subtest("v6"))
+ test_bpf_ip_check_defrag_ok(true);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/iters.c b/tools/testing/selftests/bpf/prog_tests/iters.c
new file mode 100644
index 000000000000..3c440370c1f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/iters.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <sys/syscall.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <malloc.h>
+#include <stdlib.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+
+#include "iters.skel.h"
+#include "iters_state_safety.skel.h"
+#include "iters_looping.skel.h"
+#include "iters_num.skel.h"
+#include "iters_testmod_seq.skel.h"
+#include "iters_task_vma.skel.h"
+#include "iters_task.skel.h"
+#include "iters_css_task.skel.h"
+#include "iters_css.skel.h"
+#include "iters_task_failure.skel.h"
+
+static void subtest_num_iters(void)
+{
+ struct iters_num *skel;
+ int err;
+
+ skel = iters_num__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_num__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_num__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty_zero);
+ VALIDATE_CASE(empty_int_min);
+ VALIDATE_CASE(empty_int_max);
+ VALIDATE_CASE(empty_minus_one);
+
+ VALIDATE_CASE(simple_sum);
+ VALIDATE_CASE(neg_sum);
+ VALIDATE_CASE(very_neg_sum);
+ VALIDATE_CASE(neg_pos_sum);
+
+ VALIDATE_CASE(invalid_range);
+ VALIDATE_CASE(max_range);
+ VALIDATE_CASE(e2big_range);
+
+ VALIDATE_CASE(succ_elem_cnt);
+ VALIDATE_CASE(overfetched_elem_cnt);
+ VALIDATE_CASE(fail_elem_cnt);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_num__destroy(skel);
+}
+
+static void subtest_testmod_seq_iters(void)
+{
+ struct iters_testmod_seq *skel;
+ int err;
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ skel = iters_testmod_seq__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = iters_testmod_seq__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ usleep(1);
+ iters_testmod_seq__detach(skel);
+
+#define VALIDATE_CASE(case_name) \
+ ASSERT_EQ(skel->bss->res_##case_name, \
+ skel->rodata->exp_##case_name, \
+ #case_name)
+
+ VALIDATE_CASE(empty);
+ VALIDATE_CASE(full);
+ VALIDATE_CASE(truncated);
+
+#undef VALIDATE_CASE
+
+cleanup:
+ iters_testmod_seq__destroy(skel);
+}
+
+static void subtest_task_vma_iters(void)
+{
+ unsigned long start, end, bpf_iter_start, bpf_iter_end;
+ struct iters_task_vma *skel;
+ char rest_of_line[1000];
+ unsigned int seen;
+ FILE *f = NULL;
+ int err;
+
+ skel = iters_task_vma__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+
+ err = iters_task_vma__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ getpgid(skel->bss->target_pid);
+ iters_task_vma__detach(skel);
+
+ if (!ASSERT_GT(skel->bss->vmas_seen, 0, "vmas_seen_gt_zero"))
+ goto cleanup;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!ASSERT_OK_PTR(f, "proc_maps_fopen"))
+ goto cleanup;
+
+ seen = 0;
+ while (fscanf(f, "%lx-%lx %[^\n]\n", &start, &end, rest_of_line) == 3) {
+ /* [vsyscall] vma isn't _really_ part of task->mm vmas.
+ * /proc/PID/maps returns it when out of vmas - see get_gate_vma
+ * calls in fs/proc/task_mmu.c
+ */
+ if (strstr(rest_of_line, "[vsyscall]"))
+ continue;
+
+ bpf_iter_start = skel->bss->vm_ranges[seen].vm_start;
+ bpf_iter_end = skel->bss->vm_ranges[seen].vm_end;
+
+ ASSERT_EQ(bpf_iter_start, start, "vma->vm_start match");
+ ASSERT_EQ(bpf_iter_end, end, "vma->vm_end match");
+ seen++;
+ }
+
+ if (!ASSERT_EQ(skel->bss->vmas_seen, seen, "vmas_seen_eq"))
+ goto cleanup;
+
+cleanup:
+ if (f)
+ fclose(f);
+ iters_task_vma__destroy(skel);
+}
+
+static pthread_mutex_t do_nothing_mutex;
+
+static void *do_nothing_wait(void *arg)
+{
+ pthread_mutex_lock(&do_nothing_mutex);
+ pthread_mutex_unlock(&do_nothing_mutex);
+
+ pthread_exit(arg);
+}
+
+#define thread_num 2
+
+static void subtest_task_iters(void)
+{
+ struct iters_task *skel = NULL;
+ pthread_t thread_ids[thread_num];
+ void *ret;
+ int err;
+
+ skel = iters_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+ skel->bss->target_pid = getpid();
+ err = iters_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ pthread_mutex_lock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_create(&thread_ids[i], NULL, &do_nothing_wait, NULL),
+ "pthread_create");
+
+ syscall(SYS_getpgid);
+ iters_task__detach(skel);
+ ASSERT_EQ(skel->bss->procs_cnt, 1, "procs_cnt");
+ ASSERT_EQ(skel->bss->threads_cnt, thread_num + 1, "threads_cnt");
+ ASSERT_EQ(skel->bss->proc_threads_cnt, thread_num + 1, "proc_threads_cnt");
+ ASSERT_EQ(skel->bss->invalid_cnt, 0, "invalid_cnt");
+ pthread_mutex_unlock(&do_nothing_mutex);
+ for (int i = 0; i < thread_num; i++)
+ ASSERT_OK(pthread_join(thread_ids[i], &ret), "pthread_join");
+cleanup:
+ iters_task__destroy(skel);
+}
+
+extern int stack_mprotect(void);
+
+static void subtest_css_task_iters(void)
+{
+ struct iters_css_task *skel = NULL;
+ int err, cg_fd, cg_id;
+ const char *cgrp_path = "/cg1";
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ cg_fd = create_and_get_cgroup(cgrp_path);
+ if (!ASSERT_GE(cg_fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ cg_id = get_cgroup_id(cgrp_path);
+ err = join_cgroup(cgrp_path);
+ if (!ASSERT_OK(err, "join_cgroup"))
+ goto cleanup;
+
+ skel = iters_css_task__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->cg_id = cg_id;
+ err = iters_css_task__attach(skel);
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+ err = stack_mprotect();
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ goto cleanup;
+ iters_css_task__detach(skel);
+ ASSERT_EQ(skel->bss->css_task_cnt, 1, "css_task_cnt");
+
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css_task__destroy(skel);
+}
+
+static void subtest_css_iters(void)
+{
+ struct iters_css *skel = NULL;
+ struct {
+ const char *path;
+ int fd;
+ } cgs[] = {
+ { "/cg1" },
+ { "/cg1/cg2" },
+ { "/cg1/cg2/cg3" },
+ { "/cg1/cg2/cg3/cg4" },
+ };
+ int err, cg_nr = ARRAY_SIZE(cgs);
+ int i;
+
+ err = setup_cgroup_environment();
+ if (!ASSERT_OK(err, "setup_cgroup_environment"))
+ goto cleanup;
+ for (i = 0; i < cg_nr; i++) {
+ cgs[i].fd = create_and_get_cgroup(cgs[i].path);
+ if (!ASSERT_GE(cgs[i].fd, 0, "create_and_get_cgroup"))
+ goto cleanup;
+ }
+
+ skel = iters_css__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ skel->bss->target_pid = getpid();
+ skel->bss->root_cg_id = get_cgroup_id(cgs[0].path);
+ skel->bss->leaf_cg_id = get_cgroup_id(cgs[cg_nr - 1].path);
+ err = iters_css__attach(skel);
+
+ if (!ASSERT_OK(err, "iters_task__attach"))
+ goto cleanup;
+
+ syscall(SYS_getpgid);
+ ASSERT_EQ(skel->bss->pre_order_cnt, cg_nr, "pre_order_cnt");
+ ASSERT_EQ(skel->bss->first_cg_id, get_cgroup_id(cgs[0].path), "first_cg_id");
+
+ ASSERT_EQ(skel->bss->post_order_cnt, cg_nr, "post_order_cnt");
+ ASSERT_EQ(skel->bss->last_cg_id, get_cgroup_id(cgs[0].path), "last_cg_id");
+ ASSERT_EQ(skel->bss->tree_high, cg_nr - 1, "tree_high");
+ iters_css__detach(skel);
+cleanup:
+ cleanup_cgroup_environment();
+ iters_css__destroy(skel);
+}
+
+void test_iters(void)
+{
+ RUN_TESTS(iters_state_safety);
+ RUN_TESTS(iters_looping);
+ RUN_TESTS(iters);
+ RUN_TESTS(iters_css_task);
+
+ if (env.has_testmod)
+ RUN_TESTS(iters_testmod_seq);
+
+ if (test__start_subtest("num"))
+ subtest_num_iters();
+ if (test__start_subtest("testmod_seq"))
+ subtest_testmod_seq_iters();
+ if (test__start_subtest("task_vma"))
+ subtest_task_vma_iters();
+ if (test__start_subtest("task"))
+ subtest_task_iters();
+ if (test__start_subtest("css_task"))
+ subtest_css_task_iters();
+ if (test__start_subtest("css"))
+ subtest_css_iters();
+ RUN_TESTS(iters_task_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
new file mode 100644
index 000000000000..3add34df5767
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jeq_infer_not_null.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "jeq_infer_not_null_fail.skel.h"
+
+void test_jeq_infer_not_null(void)
+{
+ RUN_TESTS(jeq_infer_not_null_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
new file mode 100644
index 000000000000..5639428607e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/jit_probe_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "jit_probe_mem.skel.h"
+
+void test_jit_probe_mem(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct jit_probe_mem *skel;
+ int ret;
+
+ skel = jit_probe_mem__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "jit_probe_mem__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_jit_probe_mem), &opts);
+ ASSERT_OK(ret, "jit_probe_mem ret");
+ ASSERT_OK(opts.retval, "jit_probe_mem opts.retval");
+ ASSERT_EQ(skel->data->total_sum, 192, "jit_probe_mem total_sum");
+
+ jit_probe_mem__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 42c3a3103c26..c07991544a78 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,6 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include "kfree_skb.skel.h"
struct meta {
int ifindex;
@@ -35,7 +36,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
"cb32_0 %x != %x\n",
meta->cb32_0, cb.cb32[0]))
return;
- if (CHECK(pkt_v6->eth.h_proto != 0xdd86, "check_eth",
+ if (CHECK(pkt_v6->eth.h_proto != htons(ETH_P_IPV6), "check_eth",
"h_proto %x\n", pkt_v6->eth.h_proto))
return;
if (CHECK(pkt_v6->iph.nexthdr != 6, "check_ip",
@@ -48,83 +49,60 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
*(bool *)ctx = true;
}
-void test_kfree_skb(void)
+/* TODO: fix kernel panic caused by this test in parallel mode */
+void serial_test_kfree_skb(void)
{
struct __sk_buff skb = {};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v6,
.data_size_in = sizeof(pkt_v6),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
- };
- struct bpf_prog_load_attr attr = {
- .file = "./kfree_skb.o",
- };
-
- struct bpf_link *link = NULL, *link_fentry = NULL, *link_fexit = NULL;
- struct bpf_map *perf_buf_map, *global_data;
- struct bpf_program *prog, *fentry, *fexit;
- struct bpf_object *obj, *obj2 = NULL;
- struct perf_buffer_opts pb_opts = {};
+ );
+ struct kfree_skb *skel = NULL;
+ struct bpf_link *link;
+ struct bpf_object *obj;
struct perf_buffer *pb = NULL;
- int err, kfree_skb_fd;
+ int err, prog_fd;
bool passed = false;
__u32 duration = 0;
const int zero = 0;
bool test_ok[2];
- err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS,
- &obj, &tattr.prog_fd);
+ err = bpf_prog_test_load("./test_pkt_access.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno))
return;
- err = bpf_prog_load_xattr(&attr, &obj2, &kfree_skb_fd);
- if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
+ skel = kfree_skb__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kfree_skb_skel"))
goto close_prog;
- prog = bpf_object__find_program_by_title(obj2, "tp_btf/kfree_skb");
- if (CHECK(!prog, "find_prog", "prog kfree_skb not found\n"))
- goto close_prog;
- fentry = bpf_object__find_program_by_title(obj2, "fentry/eth_type_trans");
- if (CHECK(!fentry, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
- fexit = bpf_object__find_program_by_title(obj2, "fexit/eth_type_trans");
- if (CHECK(!fexit, "find_prog", "prog eth_type_trans not found\n"))
- goto close_prog;
-
- global_data = bpf_object__find_map_by_name(obj2, "kfree_sk.bss");
- if (CHECK(!global_data, "find global data", "not found\n"))
+ link = bpf_program__attach_raw_tracepoint(skel->progs.trace_kfree_skb, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
+ skel->links.trace_kfree_skb = link;
- link = bpf_program__attach_raw_tracepoint(prog, NULL);
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
- goto close_prog;
- link_fentry = bpf_program__attach_trace(fentry);
- if (CHECK(IS_ERR(link_fentry), "attach fentry", "err %ld\n",
- PTR_ERR(link_fentry)))
- goto close_prog;
- link_fexit = bpf_program__attach_trace(fexit);
- if (CHECK(IS_ERR(link_fexit), "attach fexit", "err %ld\n",
- PTR_ERR(link_fexit)))
+ link = bpf_program__attach_trace(skel->progs.fentry_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fentry"))
goto close_prog;
+ skel->links.fentry_eth_type_trans = link;
- perf_buf_map = bpf_object__find_map_by_name(obj2, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ link = bpf_program__attach_trace(skel->progs.fexit_eth_type_trans);
+ if (!ASSERT_OK_PTR(link, "attach fexit"))
goto close_prog;
+ skel->links.fexit_eth_type_trans = link;
/* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &passed, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto close_prog;
memcpy(skb.cb, &cb, sizeof(cb));
- err = bpf_prog_test_run_xattr(&tattr);
- duration = tattr.duration;
- CHECK(err || tattr.retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, tattr.retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_OK(topts.retval, "ipv6 test_run retval");
/* read perf buffer */
err = perf_buffer__poll(pb, 100);
@@ -134,9 +112,9 @@ void test_kfree_skb(void)
/* make sure kfree_skb program was triggered
* and it sent expected skb into ring buffer
*/
- CHECK_FAIL(!passed);
+ ASSERT_TRUE(passed, "passed");
- err = bpf_map_lookup_elem(bpf_map__fd(global_data), &zero, test_ok);
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.bss), &zero, test_ok);
if (CHECK(err, "get_result",
"failed to get output data: %d\n", err))
goto close_prog;
@@ -144,12 +122,6 @@ void test_kfree_skb(void)
CHECK_FAIL(!test_ok[0] || !test_ok[1]);
close_prog:
perf_buffer__free(pb);
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
- if (!IS_ERR_OR_NULL(link_fentry))
- bpf_link__destroy(link_fentry);
- if (!IS_ERR_OR_NULL(link_fexit))
- bpf_link__destroy(link_fexit);
bpf_object__close(obj);
- bpf_object__close(obj2);
+ kfree_skb__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_call.c b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
new file mode 100644
index 000000000000..2eb71559713c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_call.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "kfunc_call_fail.skel.h"
+#include "kfunc_call_test.skel.h"
+#include "kfunc_call_test.lskel.h"
+#include "kfunc_call_test_subprog.skel.h"
+#include "kfunc_call_test_subprog.lskel.h"
+#include "kfunc_call_destructive.skel.h"
+
+#include "cap_helpers.h"
+
+static size_t log_buf_sz = 1048576; /* 1 MB */
+static char obj_log_buf[1048576];
+
+enum kfunc_test_type {
+ tc_test = 0,
+ syscall_test,
+ syscall_null_ctx_test,
+};
+
+struct kfunc_test_params {
+ const char *prog_name;
+ unsigned long lskel_prog_desc_offset;
+ int retval;
+ enum kfunc_test_type test_type;
+ const char *expected_err_msg;
+};
+
+#define __BPF_TEST_SUCCESS(name, __retval, type) \
+ { \
+ .prog_name = #name, \
+ .lskel_prog_desc_offset = offsetof(struct kfunc_call_test_lskel, progs.name), \
+ .retval = __retval, \
+ .test_type = type, \
+ .expected_err_msg = NULL, \
+ }
+
+#define __BPF_TEST_FAIL(name, __retval, type, error_msg) \
+ { \
+ .prog_name = #name, \
+ .lskel_prog_desc_offset = 0 /* unused when test is failing */, \
+ .retval = __retval, \
+ .test_type = type, \
+ .expected_err_msg = error_msg, \
+ }
+
+#define TC_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, tc_test)
+#define SYSCALL_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_test)
+#define SYSCALL_NULL_CTX_TEST(name, retval) __BPF_TEST_SUCCESS(name, retval, syscall_null_ctx_test)
+
+#define TC_FAIL(name, retval, error_msg) __BPF_TEST_FAIL(name, retval, tc_test, error_msg)
+#define SYSCALL_NULL_CTX_FAIL(name, retval, error_msg) \
+ __BPF_TEST_FAIL(name, retval, syscall_null_ctx_test, error_msg)
+
+static struct kfunc_test_params kfunc_tests[] = {
+ /* failure cases:
+ * if retval is 0 -> the program will fail to load and the error message is an error
+ * if retval is not 0 -> the program can be loaded but running it will gives the
+ * provided return value. The error message is thus the one
+ * from a successful load
+ */
+ SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_fail, -EINVAL, "processed 4 insns"),
+ SYSCALL_NULL_CTX_FAIL(kfunc_syscall_test_null_fail, -EINVAL, "processed 4 insns"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_rdonly, 0, "R0 cannot write into rdonly_mem"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_use_after_free, 0, "invalid mem access 'scalar'"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_oob, 0, "min value is outside of the allowed memory range"),
+ TC_FAIL(kfunc_call_test_get_mem_fail_not_const, 0, "is not a const"),
+ TC_FAIL(kfunc_call_test_mem_acquire_fail, 0, "acquire kernel function does not return PTR_TO_BTF_ID"),
+
+ /* success cases */
+ TC_TEST(kfunc_call_test1, 12),
+ TC_TEST(kfunc_call_test2, 3),
+ TC_TEST(kfunc_call_test4, -1234),
+ TC_TEST(kfunc_call_test_ref_btf_id, 0),
+ TC_TEST(kfunc_call_test_get_mem, 42),
+ SYSCALL_TEST(kfunc_syscall_test, 0),
+ SYSCALL_NULL_CTX_TEST(kfunc_syscall_test_null, 0),
+ TC_TEST(kfunc_call_test_static_unused_arg, 0),
+};
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+static void verify_success(struct kfunc_test_params *param)
+{
+ struct kfunc_call_test_lskel *lskel = NULL;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_prog_desc *lskel_prog;
+ struct kfunc_call_test *skel;
+ struct bpf_program *prog;
+ int prog_fd, err;
+ struct syscall_test_args args = {
+ .size = 10,
+ };
+
+ switch (param->test_type) {
+ case syscall_test:
+ topts.ctx_in = &args;
+ topts.ctx_size_in = sizeof(args);
+ /* fallthrough */
+ case syscall_null_ctx_test:
+ break;
+ case tc_test:
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
+ topts.repeat = 1;
+ break;
+ }
+
+ /* first test with normal libbpf */
+ skel = kfunc_call_test__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, param->prog_name))
+ goto cleanup;
+
+ if (!ASSERT_EQ(topts.retval, param->retval, "retval"))
+ goto cleanup;
+
+ /* second test with light skeletons */
+ lskel = kfunc_call_test_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(lskel, "lskel"))
+ goto cleanup;
+
+ lskel_prog = (struct bpf_prog_desc *)((char *)lskel + param->lskel_prog_desc_offset);
+
+ prog_fd = lskel_prog->prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, param->prog_name))
+ goto cleanup;
+
+ ASSERT_EQ(topts.retval, param->retval, "retval");
+
+cleanup:
+ kfunc_call_test__destroy(skel);
+ if (lskel)
+ kfunc_call_test_lskel__destroy(lskel);
+}
+
+static void verify_fail(struct kfunc_test_params *param)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_program *prog;
+ struct kfunc_call_fail *skel;
+ int prog_fd, err;
+ struct syscall_test_args args = {
+ .size = 10,
+ };
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 1;
+
+ switch (param->test_type) {
+ case syscall_test:
+ topts.ctx_in = &args;
+ topts.ctx_size_in = sizeof(args);
+ /* fallthrough */
+ case syscall_null_ctx_test:
+ break;
+ case tc_test:
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
+ topts.repeat = 1;
+ break;
+ }
+
+ skel = kfunc_call_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "kfunc_call_fail__open_opts"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, param->prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ bpf_program__set_autoload(prog, true);
+
+ err = kfunc_call_fail__load(skel);
+ if (!param->retval) {
+ /* the verifier is supposed to complain and refuses to load */
+ if (!ASSERT_ERR(err, "unexpected load success"))
+ goto out_err;
+
+ } else {
+ /* the program is loaded but must dynamically fail */
+ if (!ASSERT_OK(err, "unexpected load error"))
+ goto out_err;
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_EQ(err, param->retval, param->prog_name))
+ goto out_err;
+ }
+
+out_err:
+ if (!ASSERT_OK_PTR(strstr(obj_log_buf, param->expected_err_msg), "expected_err_msg")) {
+ fprintf(stderr, "Expected err_msg: %s\n", param->expected_err_msg);
+ fprintf(stderr, "Verifier output: %s\n", obj_log_buf);
+ }
+
+cleanup:
+ kfunc_call_fail__destroy(skel);
+}
+
+static void test_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(kfunc_tests); i++) {
+ if (!test__start_subtest(kfunc_tests[i].prog_name))
+ continue;
+
+ if (!kfunc_tests[i].expected_err_msg)
+ verify_success(&kfunc_tests[i]);
+ else
+ verify_fail(&kfunc_tests[i]);
+ }
+}
+
+static void test_subprog(void)
+{
+ struct kfunc_call_test_subprog *skel;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = kfunc_call_test_subprog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.kfunc_call_test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
+
+ kfunc_call_test_subprog__destroy(skel);
+}
+
+static void test_subprog_lskel(void)
+{
+ struct kfunc_call_test_subprog_lskel *skel;
+ int prog_fd, err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = kfunc_call_test_subprog_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ prog_fd = skel->progs.kfunc_call_test1.prog_fd;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "bpf_prog_test_run(test1)");
+ ASSERT_EQ(topts.retval, 10, "test1-retval");
+ ASSERT_NEQ(skel->data->active_res, -1, "active_res");
+ ASSERT_EQ(skel->data->sk_state_res, BPF_TCP_CLOSE, "sk_state_res");
+
+ kfunc_call_test_subprog_lskel__destroy(skel);
+}
+
+static int test_destructive_open_and_load(void)
+{
+ struct kfunc_call_destructive *skel;
+ int err;
+
+ skel = kfunc_call_destructive__open();
+ if (!ASSERT_OK_PTR(skel, "prog_open"))
+ return -1;
+
+ err = kfunc_call_destructive__load(skel);
+
+ kfunc_call_destructive__destroy(skel);
+
+ return err;
+}
+
+static void test_destructive(void)
+{
+ __u64 save_caps = 0;
+
+ ASSERT_OK(test_destructive_open_and_load(), "successful_load");
+
+ if (!ASSERT_OK(cap_disable_effective(1ULL << CAP_SYS_BOOT, &save_caps), "drop_caps"))
+ return;
+
+ ASSERT_EQ(test_destructive_open_and_load(), -13, "no_caps_failure");
+
+ cap_enable_effective(save_caps, NULL);
+}
+
+void test_kfunc_call(void)
+{
+ test_main();
+
+ if (test__start_subtest("subprog"))
+ test_subprog();
+
+ if (test__start_subtest("subprog_lskel"))
+ test_subprog_lskel();
+
+ if (test__start_subtest("destructive"))
+ test_destructive();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
new file mode 100644
index 000000000000..8cd298b78e44
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kfunc_dynptr_param.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (c) 2022 Facebook
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+#include "test_kfunc_dynptr_param.skel.h"
+
+static struct {
+ const char *prog_name;
+ int expected_runtime_err;
+} kfunc_dynptr_tests[] = {
+ {"dynptr_data_null", -EBADMSG},
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+static bool has_pkcs7_kfunc_support(void)
+{
+ struct test_kfunc_dynptr_param *skel;
+ libbpf_print_fn_t old_print_cb;
+ int err;
+
+ skel = test_kfunc_dynptr_param__open();
+ if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+ return false;
+
+ kfunc_not_supported = false;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ err = test_kfunc_dynptr_param__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (err < 0 && kfunc_not_supported) {
+ fprintf(stderr,
+ "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+ __func__);
+ test_kfunc_dynptr_param__destroy(skel);
+ return false;
+ }
+
+ test_kfunc_dynptr_param__destroy(skel);
+
+ return true;
+}
+
+static void verify_success(const char *prog_name, int expected_runtime_err)
+{
+ struct test_kfunc_dynptr_param *skel;
+ struct bpf_program *prog;
+ struct bpf_link *link;
+ __u32 next_id;
+ int err;
+
+ skel = test_kfunc_dynptr_param__open();
+ if (!ASSERT_OK_PTR(skel, "test_kfunc_dynptr_param__open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ err = test_kfunc_dynptr_param__load(skel);
+
+ if (!ASSERT_OK(err, "test_kfunc_dynptr_param__load"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach"))
+ goto cleanup;
+
+ err = bpf_prog_get_next_id(0, &next_id);
+
+ bpf_link__destroy(link);
+
+ if (!ASSERT_OK(err, "bpf_prog_get_next_id"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->err, expected_runtime_err, "err");
+
+cleanup:
+ test_kfunc_dynptr_param__destroy(skel);
+}
+
+void test_kfunc_dynptr_param(void)
+{
+ int i;
+
+ if (!has_pkcs7_kfunc_support())
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(kfunc_dynptr_tests); i++) {
+ if (!test__start_subtest(kfunc_dynptr_tests[i].prog_name))
+ continue;
+
+ verify_success(kfunc_dynptr_tests[i].prog_name,
+ kfunc_dynptr_tests[i].expected_runtime_err);
+ }
+ RUN_TESTS(test_kfunc_dynptr_param);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
new file mode 100644
index 000000000000..05000810e28e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c
@@ -0,0 +1,541 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+#include "kprobe_multi_empty.skel.h"
+#include "kprobe_multi_override.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "bpf/hashmap.h"
+
+static void kprobe_multi_test_run(struct kprobe_multi *skel, bool test_return)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(skel->bss->kprobe_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_test3_result, 1, "kprobe_test3_result");
+ ASSERT_EQ(skel->bss->kprobe_test4_result, 1, "kprobe_test4_result");
+ ASSERT_EQ(skel->bss->kprobe_test5_result, 1, "kprobe_test5_result");
+ ASSERT_EQ(skel->bss->kprobe_test6_result, 1, "kprobe_test6_result");
+ ASSERT_EQ(skel->bss->kprobe_test7_result, 1, "kprobe_test7_result");
+ ASSERT_EQ(skel->bss->kprobe_test8_result, 1, "kprobe_test8_result");
+
+ if (test_return) {
+ ASSERT_EQ(skel->bss->kretprobe_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_test3_result, 1, "kretprobe_test3_result");
+ ASSERT_EQ(skel->bss->kretprobe_test4_result, 1, "kretprobe_test4_result");
+ ASSERT_EQ(skel->bss->kretprobe_test5_result, 1, "kretprobe_test5_result");
+ ASSERT_EQ(skel->bss->kretprobe_test6_result, 1, "kretprobe_test6_result");
+ ASSERT_EQ(skel->bss->kretprobe_test7_result, 1, "kretprobe_test7_result");
+ ASSERT_EQ(skel->bss->kretprobe_test8_result, 1, "kretprobe_test8_result");
+ }
+}
+
+static void test_skel_api(void)
+{
+ struct kprobe_multi *skel = NULL;
+ int err;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi__open_and_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ err = kprobe_multi__attach(skel);
+ if (!ASSERT_OK(err, "kprobe_multi__attach"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel, true);
+
+cleanup:
+ kprobe_multi__destroy(skel);
+}
+
+static void test_link_api(struct bpf_link_create_opts *opts)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1;
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ prog_fd = bpf_program__fd(skel->progs.test_kprobe);
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+ if (!ASSERT_GE(link1_fd, 0, "link_fd"))
+ goto cleanup;
+
+ opts->kprobe_multi.flags = BPF_F_KPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.test_kretprobe);
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_KPROBE_MULTI, opts);
+ if (!ASSERT_GE(link2_fd, 0, "link_fd"))
+ goto cleanup;
+
+ kprobe_multi_test_run(skel, true);
+
+cleanup:
+ if (link1_fd != -1)
+ close(link1_fd);
+ if (link2_fd != -1)
+ close(link2_fd);
+ kprobe_multi__destroy(skel);
+}
+
+#define GET_ADDR(__sym, __addr) ({ \
+ __addr = ksym_get_addr(__sym); \
+ if (!ASSERT_NEQ(__addr, 0, "kallsyms load failed for " #__sym)) \
+ return; \
+})
+
+static void test_link_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ unsigned long long addrs[8];
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test2", addrs[1]);
+ GET_ADDR("bpf_fentry_test3", addrs[2]);
+ GET_ADDR("bpf_fentry_test4", addrs[3]);
+ GET_ADDR("bpf_fentry_test5", addrs[4]);
+ GET_ADDR("bpf_fentry_test6", addrs[5]);
+ GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+ opts.kprobe_multi.addrs = (const unsigned long*) addrs;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(addrs);
+ test_link_api(&opts);
+}
+
+static void test_link_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test8",
+ };
+
+ opts.kprobe_multi.syms = syms;
+ opts.kprobe_multi.cnt = ARRAY_SIZE(syms);
+ test_link_api(&opts);
+}
+
+static void
+test_attach_api(const char *pattern, struct bpf_kprobe_multi_opts *opts)
+{
+ struct bpf_link *link1 = NULL, *link2 = NULL;
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+ link1 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ pattern, opts);
+ if (!ASSERT_OK_PTR(link1, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ if (opts) {
+ opts->retprobe = true;
+ link2 = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kretprobe_manual,
+ pattern, opts);
+ if (!ASSERT_OK_PTR(link2, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+ }
+
+ kprobe_multi_test_run(skel, !!opts);
+
+cleanup:
+ bpf_link__destroy(link2);
+ bpf_link__destroy(link1);
+ kprobe_multi__destroy(skel);
+}
+
+static void test_attach_api_pattern(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+
+ test_attach_api("bpf_fentry_test*", &opts);
+ test_attach_api("bpf_fentry_test?", NULL);
+}
+
+static void test_attach_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ unsigned long long addrs[8];
+
+ GET_ADDR("bpf_fentry_test1", addrs[0]);
+ GET_ADDR("bpf_fentry_test2", addrs[1]);
+ GET_ADDR("bpf_fentry_test3", addrs[2]);
+ GET_ADDR("bpf_fentry_test4", addrs[3]);
+ GET_ADDR("bpf_fentry_test5", addrs[4]);
+ GET_ADDR("bpf_fentry_test6", addrs[5]);
+ GET_ADDR("bpf_fentry_test7", addrs[6]);
+ GET_ADDR("bpf_fentry_test8", addrs[7]);
+
+ opts.addrs = (const unsigned long *) addrs;
+ opts.cnt = ARRAY_SIZE(addrs);
+ test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *syms[8] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ "bpf_fentry_test3",
+ "bpf_fentry_test4",
+ "bpf_fentry_test5",
+ "bpf_fentry_test6",
+ "bpf_fentry_test7",
+ "bpf_fentry_test8",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_attach_api(NULL, &opts);
+}
+
+static void test_attach_api_fails(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi *skel = NULL;
+ struct bpf_link *link = NULL;
+ unsigned long long addrs[2];
+ const char *syms[2] = {
+ "bpf_fentry_test1",
+ "bpf_fentry_test2",
+ };
+ __u64 cookies[2];
+ int saved_error;
+
+ addrs[0] = ksym_get_addr("bpf_fentry_test1");
+ addrs[1] = ksym_get_addr("bpf_fentry_test2");
+
+ if (!ASSERT_FALSE(!addrs[0] || !addrs[1], "ksym_get_addr"))
+ goto cleanup;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ goto cleanup;
+
+ skel->bss->pid = getpid();
+
+ /* fail_1 - pattern and opts NULL */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, NULL);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_1"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error"))
+ goto cleanup;
+
+ /* fail_2 - both addrs and syms set */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_2"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error"))
+ goto cleanup;
+
+ /* fail_3 - pattern and addrs set */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = NULL;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_3"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error"))
+ goto cleanup;
+
+ /* fail_4 - pattern and cnt set */
+ opts.addrs = NULL;
+ opts.syms = NULL;
+ opts.cnt = ARRAY_SIZE(syms);
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_4"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error"))
+ goto cleanup;
+
+ /* fail_5 - pattern and cookies */
+ opts.addrs = NULL;
+ opts.syms = NULL;
+ opts.cnt = 0;
+ opts.cookies = cookies;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ "ksys_*", &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_5"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error"))
+ goto cleanup;
+
+ /* fail_6 - abnormal cnt */
+ opts.addrs = (const unsigned long *) addrs;
+ opts.syms = NULL;
+ opts.cnt = INT_MAX;
+ opts.cookies = NULL;
+
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual,
+ NULL, &opts);
+ saved_error = -errno;
+ if (!ASSERT_ERR_PTR(link, "fail_6"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error"))
+ goto cleanup;
+
+cleanup:
+ bpf_link__destroy(link);
+ kprobe_multi__destroy(skel);
+}
+
+static size_t symbol_hash(long key, void *ctx __maybe_unused)
+{
+ return str_hash((const char *) key);
+}
+
+static bool symbol_equal(long key1, long key2, void *ctx __maybe_unused)
+{
+ return strcmp((const char *) key1, (const char *) key2) == 0;
+}
+
+static int get_syms(char ***symsp, size_t *cntp, bool kernel)
+{
+ size_t cap = 0, cnt = 0, i;
+ char *name = NULL, **syms = NULL;
+ struct hashmap *map;
+ char buf[256];
+ FILE *f;
+ int err = 0;
+
+ /*
+ * The available_filter_functions contains many duplicates,
+ * but other than that all symbols are usable in kprobe multi
+ * interface.
+ * Filtering out duplicates by using hashmap__add, which won't
+ * add existing entry.
+ */
+
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0)
+ f = fopen("/sys/kernel/tracing/available_filter_functions", "r");
+ else
+ f = fopen("/sys/kernel/debug/tracing/available_filter_functions", "r");
+
+ if (!f)
+ return -EINVAL;
+
+ map = hashmap__new(symbol_hash, symbol_equal, NULL);
+ if (IS_ERR(map)) {
+ err = libbpf_get_error(map);
+ goto error;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ if (kernel && strchr(buf, '['))
+ continue;
+ if (!kernel && !strchr(buf, '['))
+ continue;
+
+ free(name);
+ if (sscanf(buf, "%ms$*[^\n]\n", &name) != 1)
+ continue;
+ /*
+ * We attach to almost all kernel functions and some of them
+ * will cause 'suspicious RCU usage' when fprobe is attached
+ * to them. Filter out the current culprits - arch_cpu_idle
+ * default_idle and rcu_* functions.
+ */
+ if (!strcmp(name, "arch_cpu_idle"))
+ continue;
+ if (!strcmp(name, "default_idle"))
+ continue;
+ if (!strncmp(name, "rcu_", 4))
+ continue;
+ if (!strcmp(name, "bpf_dispatcher_xdp_func"))
+ continue;
+ if (!strncmp(name, "__ftrace_invalid_address__",
+ sizeof("__ftrace_invalid_address__") - 1))
+ continue;
+
+ err = hashmap__add(map, name, 0);
+ if (err == -EEXIST) {
+ err = 0;
+ continue;
+ }
+ if (err)
+ goto error;
+
+ err = libbpf_ensure_mem((void **) &syms, &cap,
+ sizeof(*syms), cnt + 1);
+ if (err)
+ goto error;
+
+ syms[cnt++] = name;
+ name = NULL;
+ }
+
+ *symsp = syms;
+ *cntp = cnt;
+
+error:
+ free(name);
+ fclose(f);
+ hashmap__free(map);
+ if (err) {
+ for (i = 0; i < cnt; i++)
+ free(syms[i]);
+ free(syms);
+ }
+ return err;
+}
+
+static void test_kprobe_multi_bench_attach(bool kernel)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ struct kprobe_multi_empty *skel = NULL;
+ long attach_start_ns, attach_end_ns;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+ struct bpf_link *link = NULL;
+ char **syms = NULL;
+ size_t cnt = 0, i;
+
+ if (!ASSERT_OK(get_syms(&syms, &cnt, kernel), "get_syms"))
+ return;
+
+ skel = kprobe_multi_empty__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ opts.syms = (const char **) syms;
+ opts.cnt = cnt;
+
+ attach_start_ns = get_time_ns();
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_empty,
+ NULL, &opts);
+ attach_end_ns = get_time_ns();
+
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_kprobe_multi_opts"))
+ goto cleanup;
+
+ detach_start_ns = get_time_ns();
+ bpf_link__destroy(link);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: found %lu functions\n", __func__, cnt);
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+
+cleanup:
+ kprobe_multi_empty__destroy(skel);
+ if (syms) {
+ for (i = 0; i < cnt; i++)
+ free(syms[i]);
+ free(syms);
+ }
+}
+
+static void test_attach_override(void)
+{
+ struct kprobe_multi_override *skel = NULL;
+ struct bpf_link *link = NULL;
+
+ skel = kprobe_multi_override__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "kprobe_multi_empty__open_and_load"))
+ goto cleanup;
+
+ /* The test_override calls bpf_override_return so it should fail
+ * to attach to bpf_fentry_test1 function, which is not on error
+ * injection list.
+ */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ "bpf_fentry_test1", NULL);
+ if (!ASSERT_ERR_PTR(link, "override_attached_bpf_fentry_test1")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ /* The should_fail_bio function is on error injection list,
+ * attach should succeed.
+ */
+ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_override,
+ "should_fail_bio", NULL);
+ if (!ASSERT_OK_PTR(link, "override_attached_should_fail_bio"))
+ goto cleanup;
+
+ bpf_link__destroy(link);
+
+cleanup:
+ kprobe_multi_override__destroy(skel);
+}
+
+void serial_test_kprobe_multi_bench_attach(void)
+{
+ if (test__start_subtest("kernel"))
+ test_kprobe_multi_bench_attach(true);
+ if (test__start_subtest("modules"))
+ test_kprobe_multi_bench_attach(false);
+}
+
+void test_kprobe_multi_test(void)
+{
+ if (!ASSERT_OK(load_kallsyms(), "load_kallsyms"))
+ return;
+
+ if (test__start_subtest("skel_api"))
+ test_skel_api();
+ if (test__start_subtest("link_api_addrs"))
+ test_link_api_syms();
+ if (test__start_subtest("link_api_syms"))
+ test_link_api_addrs();
+ if (test__start_subtest("attach_api_pattern"))
+ test_attach_api_pattern();
+ if (test__start_subtest("attach_api_addrs"))
+ test_attach_api_addrs();
+ if (test__start_subtest("attach_api_syms"))
+ test_attach_api_syms();
+ if (test__start_subtest("attach_api_fails"))
+ test_attach_api_fails();
+ if (test__start_subtest("attach_override"))
+ test_attach_override();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
new file mode 100644
index 000000000000..9d03528f05db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_testmod_test.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "kprobe_multi.skel.h"
+#include "trace_helpers.h"
+#include "bpf/libbpf_internal.h"
+
+static struct ksyms *ksyms;
+
+static void kprobe_multi_testmod_check(struct kprobe_multi *skel)
+{
+ ASSERT_EQ(skel->bss->kprobe_testmod_test1_result, 1, "kprobe_test1_result");
+ ASSERT_EQ(skel->bss->kprobe_testmod_test2_result, 1, "kprobe_test2_result");
+ ASSERT_EQ(skel->bss->kprobe_testmod_test3_result, 1, "kprobe_test3_result");
+
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test1_result, 1, "kretprobe_test1_result");
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test2_result, 1, "kretprobe_test2_result");
+ ASSERT_EQ(skel->bss->kretprobe_testmod_test3_result, 1, "kretprobe_test3_result");
+}
+
+static void test_testmod_attach_api(struct bpf_kprobe_multi_opts *opts)
+{
+ struct kprobe_multi *skel = NULL;
+
+ skel = kprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "fentry_raw_skel_load"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ skel->links.test_kprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.test_kprobe_testmod,
+ NULL, opts);
+ if (!skel->links.test_kprobe_testmod)
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.test_kretprobe_testmod = bpf_program__attach_kprobe_multi_opts(
+ skel->progs.test_kretprobe_testmod,
+ NULL, opts);
+ if (!skel->links.test_kretprobe_testmod)
+ goto cleanup;
+
+ ASSERT_OK(trigger_module_test_read(1), "trigger_read");
+ kprobe_multi_testmod_check(skel);
+
+cleanup:
+ kprobe_multi__destroy(skel);
+}
+
+static void test_testmod_attach_api_addrs(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ unsigned long long addrs[3];
+
+ addrs[0] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test1");
+ ASSERT_NEQ(addrs[0], 0, "ksym_get_addr_local");
+ addrs[1] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test2");
+ ASSERT_NEQ(addrs[1], 0, "ksym_get_addr_local");
+ addrs[2] = ksym_get_addr_local(ksyms, "bpf_testmod_fentry_test3");
+ ASSERT_NEQ(addrs[2], 0, "ksym_get_addr_local");
+
+ opts.addrs = (const unsigned long *) addrs;
+ opts.cnt = ARRAY_SIZE(addrs);
+
+ test_testmod_attach_api(&opts);
+}
+
+static void test_testmod_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_kprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "bpf_testmod_fentry_test1",
+ "bpf_testmod_fentry_test2",
+ "bpf_testmod_fentry_test3",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_testmod_attach_api(&opts);
+}
+
+void serial_test_kprobe_multi_testmod_test(void)
+{
+ ksyms = load_kallsyms_local();
+ if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local"))
+ return;
+
+ if (test__start_subtest("testmod_attach_api_syms"))
+ test_testmod_attach_api_syms();
+
+ if (test__start_subtest("testmod_attach_api_addrs"))
+ test_testmod_attach_api_addrs();
+
+ free_kallsyms_local(ksyms);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
new file mode 100644
index 000000000000..7def158da9eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "linux/filter.h"
+#include "kptr_xchg_inline.skel.h"
+
+void test_kptr_xchg_inline(void)
+{
+ struct kptr_xchg_inline *skel;
+ struct bpf_insn *insn = NULL;
+ struct bpf_insn exp;
+ unsigned int cnt;
+ int err;
+
+#if !(defined(__x86_64__) || defined(__aarch64__) || \
+ (defined(__riscv) && __riscv_xlen == 64))
+ test__skip();
+ return;
+#endif
+
+ skel = kptr_xchg_inline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt);
+ if (!ASSERT_OK(err, "prog insn"))
+ goto out;
+
+ /* The original instructions are:
+ * r1 = map[id:xxx][0]+0
+ * r2 = 0
+ * call bpf_kptr_xchg#yyy
+ *
+ * call bpf_kptr_xchg#yyy will be inlined as:
+ * r0 = r2
+ * r0 = atomic64_xchg((u64 *)(r1 +0), r0)
+ */
+ if (!ASSERT_GT(cnt, 5, "insn cnt"))
+ goto out;
+
+ exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2);
+ if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov"))
+ goto out;
+
+ exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0);
+ if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg"))
+ goto out;
+out:
+ free(insn);
+ kptr_xchg_inline__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms.c b/tools/testing/selftests/bpf/prog_tests/ksyms.c
new file mode 100644
index 000000000000..b295969b263b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_ksyms.skel.h"
+#include <sys/stat.h>
+
+static int duration;
+
+void test_ksyms(void)
+{
+ const char *btf_path = "/sys/kernel/btf/vmlinux";
+ struct test_ksyms *skel;
+ struct test_ksyms__data *data;
+ __u64 link_fops_addr, per_cpu_start_addr;
+ struct stat st;
+ __u64 btf_size;
+ int err;
+
+ err = kallsyms_find("bpf_link_fops", &link_fops_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_link_fops' not found\n"))
+ return;
+
+ err = kallsyms_find("__per_cpu_start", &per_cpu_start_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'per_cpu_start' not found\n"))
+ return;
+
+ if (CHECK(stat(btf_path, &st), "stat_btf", "err %d\n", errno))
+ return;
+ btf_size = st.st_size;
+
+ skel = test_ksyms__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ return;
+
+ err = test_ksyms__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__bpf_link_fops != link_fops_addr, "bpf_link_fops",
+ "got 0x%llx, exp 0x%llx\n",
+ data->out__bpf_link_fops, link_fops_addr);
+ CHECK(data->out__bpf_link_fops1 != 0, "bpf_link_fops1",
+ "got %llu, exp %llu\n", data->out__bpf_link_fops1, (__u64)0);
+ CHECK(data->out__btf_size != btf_size, "btf_size",
+ "got %llu, exp %llu\n", data->out__btf_size, btf_size);
+ CHECK(data->out__per_cpu_start != per_cpu_start_addr, "__per_cpu_start",
+ "got %llu, exp %llu\n", data->out__per_cpu_start,
+ per_cpu_start_addr);
+
+cleanup:
+ test_ksyms__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
new file mode 100644
index 000000000000..1d7a2f1e0731
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include "test_ksyms_btf.skel.h"
+#include "test_ksyms_btf_null_check.skel.h"
+#include "test_ksyms_weak.skel.h"
+#include "test_ksyms_weak.lskel.h"
+#include "test_ksyms_btf_write_check.skel.h"
+
+static int duration;
+
+static void test_basic(void)
+{
+ __u64 runqueues_addr, bpf_prog_active_addr;
+ __u32 this_rq_cpu;
+ int this_bpf_prog_active;
+ struct test_ksyms_btf *skel = NULL;
+ struct test_ksyms_btf__data *data;
+ int err;
+
+ err = kallsyms_find("runqueues", &runqueues_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'runqueues' not found\n"))
+ return;
+
+ err = kallsyms_find("bpf_prog_active", &bpf_prog_active_addr);
+ if (CHECK(err == -EINVAL, "kallsyms_fopen", "failed to open: %d\n", errno))
+ return;
+ if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n"))
+ return;
+
+ skel = test_ksyms_btf__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n"))
+ goto cleanup;
+
+ err = test_ksyms_btf__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ CHECK(data->out__runqueues_addr != runqueues_addr, "runqueues_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__runqueues_addr,
+ (unsigned long long)runqueues_addr);
+ CHECK(data->out__bpf_prog_active_addr != bpf_prog_active_addr, "bpf_prog_active_addr",
+ "got %llu, exp %llu\n",
+ (unsigned long long)data->out__bpf_prog_active_addr,
+ (unsigned long long)bpf_prog_active_addr);
+
+ CHECK(data->out__rq_cpu == -1, "rq_cpu",
+ "got %u, exp != -1\n", data->out__rq_cpu);
+ CHECK(data->out__bpf_prog_active < 0, "bpf_prog_active",
+ "got %d, exp >= 0\n", data->out__bpf_prog_active);
+ CHECK(data->out__cpu_0_rq_cpu != 0, "cpu_rq(0)->cpu",
+ "got %u, exp 0\n", data->out__cpu_0_rq_cpu);
+
+ this_rq_cpu = data->out__this_rq_cpu;
+ CHECK(this_rq_cpu != data->out__rq_cpu, "this_rq_cpu",
+ "got %u, exp %u\n", this_rq_cpu, data->out__rq_cpu);
+
+ this_bpf_prog_active = data->out__this_bpf_prog_active;
+ CHECK(this_bpf_prog_active != data->out__bpf_prog_active, "this_bpf_prog_active",
+ "got %d, exp %d\n", this_bpf_prog_active,
+ data->out__bpf_prog_active);
+
+cleanup:
+ test_ksyms_btf__destroy(skel);
+}
+
+static void test_null_check(void)
+{
+ struct test_ksyms_btf_null_check *skel;
+
+ skel = test_ksyms_btf_null_check__open_and_load();
+ CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n");
+
+ test_ksyms_btf_null_check__destroy(skel);
+}
+
+static void test_weak_syms(void)
+{
+ struct test_ksyms_weak *skel;
+ struct test_ksyms_weak__data *data;
+ int err;
+
+ skel = test_ksyms_weak__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_weak__open_and_load"))
+ return;
+
+ err = test_ksyms_weak__attach(skel);
+ if (!ASSERT_OK(err, "test_ksyms_weak__attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak__destroy(skel);
+}
+
+static void test_weak_syms_lskel(void)
+{
+ struct test_ksyms_weak_lskel *skel;
+ struct test_ksyms_weak_lskel__data *data;
+ int err;
+
+ skel = test_ksyms_weak_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_weak_lskel__open_and_load"))
+ return;
+
+ err = test_ksyms_weak_lskel__attach(skel);
+ if (!ASSERT_OK(err, "test_ksyms_weak_lskel__attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ data = skel->data;
+ ASSERT_EQ(data->out__existing_typed, 0, "existing typed ksym");
+ ASSERT_NEQ(data->out__existing_typeless, -1, "existing typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typeless, 0, "nonexistent typeless ksym");
+ ASSERT_EQ(data->out__non_existent_typed, 0, "nonexistent typed ksym");
+
+cleanup:
+ test_ksyms_weak_lskel__destroy(skel);
+}
+
+static void test_write_check(bool test_handler1)
+{
+ struct test_ksyms_btf_write_check *skel;
+
+ skel = test_ksyms_btf_write_check__open();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_btf_write_check__open"))
+ return;
+ bpf_program__set_autoload(test_handler1 ? skel->progs.handler2 : skel->progs.handler1, false);
+ ASSERT_ERR(test_ksyms_btf_write_check__load(skel),
+ "unexpected load of a prog writing to ksym memory\n");
+
+ test_ksyms_btf_write_check__destroy(skel);
+}
+
+void test_ksyms_btf(void)
+{
+ int percpu_datasec;
+ struct btf *btf;
+
+ btf = libbpf_find_kernel_btf();
+ if (!ASSERT_OK_PTR(btf, "btf_exists"))
+ return;
+
+ percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu",
+ BTF_KIND_DATASEC);
+ btf__free(btf);
+ if (percpu_datasec < 0) {
+ printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n",
+ __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("basic"))
+ test_basic();
+
+ if (test__start_subtest("null_check"))
+ test_null_check();
+
+ if (test__start_subtest("weak_ksyms"))
+ test_weak_syms();
+
+ if (test__start_subtest("weak_ksyms_lskel"))
+ test_weak_syms_lskel();
+
+ if (test__start_subtest("write_check1"))
+ test_write_check(true);
+
+ if (test__start_subtest("write_check2"))
+ test_write_check(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_module.c b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
new file mode 100644
index 000000000000..a1ebac70ec29
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ksyms_module.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_ksyms_module.lskel.h"
+#include "test_ksyms_module.skel.h"
+
+static void test_ksyms_module_lskel(void)
+{
+ struct test_ksyms_module_lskel *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ skel = test_ksyms_module_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_module_lskel__open_and_load"))
+ return;
+ err = bpf_prog_test_run_opts(skel->progs.load.prog_fd, &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+ ASSERT_EQ(topts.retval, 0, "retval");
+ ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
+cleanup:
+ test_ksyms_module_lskel__destroy(skel);
+}
+
+static void test_ksyms_module_libbpf(void)
+{
+ struct test_ksyms_module *skel;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ skel = test_ksyms_module__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_ksyms_module__open"))
+ return;
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.load), &topts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run"))
+ goto cleanup;
+ ASSERT_EQ(topts.retval, 0, "retval");
+ ASSERT_EQ(skel->bss->out_bpf_testmod_ksym, 42, "bpf_testmod_ksym");
+cleanup:
+ test_ksyms_module__destroy(skel);
+}
+
+void test_ksyms_module(void)
+{
+ if (test__start_subtest("lskel"))
+ test_ksyms_module_lskel();
+ if (test__start_subtest("libbpf"))
+ test_ksyms_module_libbpf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index c2d373e294bb..1eab286b14fe 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -23,14 +23,18 @@ static void test_l4lb(const char *file)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
int err, i, prog_fd, map_fd;
__u64 bytes = 0, pkts = 0;
struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -49,19 +53,24 @@ static void test_l4lb(const char *file)
goto out;
bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ topts.data_in = &pkt_v4;
+ topts.data_size_in = sizeof(pkt_v4);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 7/*TC_ACT_REDIRECT*/ || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 magic");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf); /* reset out size */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 7 /*TC_ACT_REDIRECT*/, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 magic");
map_fd = bpf_find_map(__func__, obj, "stats");
if (map_fd < 0)
@@ -80,9 +89,10 @@ out:
void test_l4lb_all(void)
{
- const char *file1 = "./test_l4lb.o";
- const char *file2 = "./test_l4lb_noinline.o";
-
- test_l4lb(file1);
- test_l4lb(file2);
+ if (test__start_subtest("l4lb_inline"))
+ test_l4lb("test_l4lb.bpf.o");
+ if (test__start_subtest("l4lb_noinline"))
+ test_l4lb("test_l4lb_noinline.bpf.o");
+ if (test__start_subtest("l4lb_noinline_dynptr"))
+ test_l4lb("test_l4lb_noinline_dynptr.bpf.o");
}
diff --git a/tools/testing/selftests/bpf/prog_tests/legacy_printk.c b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
new file mode 100644
index 000000000000..ec6e45f2a644
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/legacy_printk.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "test_legacy_printk.skel.h"
+
+static int execute_one_variant(bool legacy)
+{
+ struct test_legacy_printk *skel;
+ int err, zero = 0, my_pid = getpid(), res, map_fd;
+
+ skel = test_legacy_printk__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return -errno;
+
+ bpf_program__set_autoload(skel->progs.handle_legacy, legacy);
+ bpf_program__set_autoload(skel->progs.handle_modern, !legacy);
+
+ err = test_legacy_printk__load(skel);
+ /* no ASSERT_OK, we expect one of two variants can fail here */
+ if (err)
+ goto err_out;
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.my_pid_map);
+ err = bpf_map_update_elem(map_fd, &zero, &my_pid, BPF_ANY);
+ if (!ASSERT_OK(err, "my_pid_map_update"))
+ goto err_out;
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ } else {
+ skel->bss->my_pid_var = my_pid;
+ }
+
+ err = test_legacy_printk__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto err_out;
+
+ usleep(1); /* trigger */
+
+ if (legacy) {
+ map_fd = bpf_map__fd(skel->maps.res_map);
+ err = bpf_map_lookup_elem(map_fd, &zero, &res);
+ if (!ASSERT_OK(err, "res_map_lookup"))
+ goto err_out;
+ } else {
+ res = skel->bss->res_var;
+ }
+
+ if (!ASSERT_GT(res, 0, "res")) {
+ err = -EINVAL;
+ goto err_out;
+ }
+
+err_out:
+ test_legacy_printk__destroy(skel);
+ return err;
+}
+
+void test_legacy_printk(void)
+{
+ /* legacy variant should work everywhere */
+ ASSERT_OK(execute_one_variant(true /* legacy */), "legacy_case");
+
+ /* execute modern variant, can fail the load on old kernels */
+ execute_one_variant(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..a3f238f51d05
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <test_progs.h>
+
+#include "test_libbpf_get_fd_by_id_opts.skel.h"
+
+void test_libbpf_get_fd_by_id_opts(void)
+{
+ struct test_libbpf_get_fd_by_id_opts *skel;
+ struct bpf_map_info info_m = {};
+ __u32 len = sizeof(info_m), value;
+ int ret, zero = 0, fd = -1;
+ LIBBPF_OPTS(bpf_get_fd_by_id_opts, fd_opts_rdonly,
+ .open_flags = BPF_F_RDONLY,
+ );
+
+ skel = test_libbpf_get_fd_by_id_opts__open_and_load();
+ if (!ASSERT_OK_PTR(skel,
+ "test_libbpf_get_fd_by_id_opts__open_and_load"))
+ return;
+
+ ret = test_libbpf_get_fd_by_id_opts__attach(skel);
+ if (!ASSERT_OK(ret, "test_libbpf_get_fd_by_id_opts__attach"))
+ goto close_prog;
+
+ ret = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.data_input),
+ &info_m, &len);
+ if (!ASSERT_OK(ret, "bpf_map_get_info_by_fd"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id(info_m.id);
+ if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id_opts(info_m.id, NULL);
+ if (!ASSERT_LT(fd, 0, "bpf_map_get_fd_by_id_opts"))
+ goto close_prog;
+
+ fd = bpf_map_get_fd_by_id_opts(info_m.id, &fd_opts_rdonly);
+ if (!ASSERT_GE(fd, 0, "bpf_map_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* Map lookup should work with read-only fd. */
+ ret = bpf_map_lookup_elem(fd, &zero, &value);
+ if (!ASSERT_OK(ret, "bpf_map_lookup_elem"))
+ goto close_prog;
+
+ if (!ASSERT_EQ(value, 0, "map value mismatch"))
+ goto close_prog;
+
+ /* Map update should not work with read-only fd. */
+ ret = bpf_map_update_elem(fd, &zero, &len, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem"))
+ goto close_prog;
+
+ /* Map update should work with read-write fd. */
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.data_input), &zero,
+ &len, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem"))
+ goto close_prog;
+
+ /* Prog get fd with opts set should not work (no kernel support). */
+ ret = bpf_prog_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_prog_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* Link get fd with opts set should not work (no kernel support). */
+ ret = bpf_link_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_link_get_fd_by_id_opts"))
+ goto close_prog;
+
+ /* BTF get fd with opts set should not work (no kernel support). */
+ ret = bpf_btf_get_fd_by_id_opts(0, &fd_opts_rdonly);
+ ASSERT_EQ(ret, -EINVAL, "bpf_btf_get_fd_by_id_opts");
+
+close_prog:
+ if (fd >= 0)
+ close(fd);
+
+ test_libbpf_get_fd_by_id_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
new file mode 100644
index 000000000000..4ed46ed58a7b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c
@@ -0,0 +1,128 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+void test_libbpf_probe_prog_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_prog_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *prog_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ int res;
+
+ if (prog_type == BPF_PROG_TYPE_UNSPEC)
+ continue;
+ if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0)
+ continue;
+
+ if (!test__start_subtest(prog_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_prog_type(prog_type, NULL);
+ ASSERT_EQ(res, 1, prog_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_map_types(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ if (!ASSERT_OK_PTR(t, "bpf_map_type_enum"))
+ goto cleanup;
+
+ for (e = btf_enum(t), i = 0, n = btf_vlen(t); i < n; e++, i++) {
+ const char *map_type_name = btf__str_by_offset(btf, e->name_off);
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ int res;
+
+ if (map_type == BPF_MAP_TYPE_UNSPEC)
+ continue;
+ if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0)
+ continue;
+
+ if (!test__start_subtest(map_type_name))
+ continue;
+
+ res = libbpf_probe_bpf_map_type(map_type, NULL);
+ ASSERT_EQ(res, 1, map_type_name);
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_libbpf_probe_helpers(void)
+{
+#define CASE(prog, helper, supp) { \
+ .prog_type_name = "BPF_PROG_TYPE_" # prog, \
+ .helper_name = "bpf_" # helper, \
+ .prog_type = BPF_PROG_TYPE_ ## prog, \
+ .helper_id = BPF_FUNC_ ## helper, \
+ .supported = supp, \
+}
+ const struct case_def {
+ const char *prog_type_name;
+ const char *helper_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_func_id helper_id;
+ bool supported;
+ } cases[] = {
+ CASE(KPROBE, unspec, false),
+ CASE(KPROBE, map_lookup_elem, true),
+ CASE(KPROBE, loop, true),
+
+ CASE(KPROBE, ktime_get_coarse_ns, false),
+ CASE(SOCKET_FILTER, ktime_get_coarse_ns, true),
+
+ CASE(KPROBE, sys_bpf, false),
+ CASE(SYSCALL, sys_bpf, true),
+ };
+ size_t case_cnt = ARRAY_SIZE(cases), i;
+ char buf[128];
+
+ for (i = 0; i < case_cnt; i++) {
+ const struct case_def *d = &cases[i];
+ int res;
+
+ snprintf(buf, sizeof(buf), "%s+%s", d->prog_type_name, d->helper_name);
+
+ if (!test__start_subtest(buf))
+ continue;
+
+ res = libbpf_probe_bpf_helper(d->prog_type, d->helper_id, NULL);
+ ASSERT_EQ(res, d->supported, buf);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
new file mode 100644
index 000000000000..62ea855ec4d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <ctype.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+/*
+ * Utility function uppercasing an entire string.
+ */
+static void uppercase(char *s)
+{
+ for (; *s != '\0'; s++)
+ *s = toupper(*s);
+}
+
+/*
+ * Test case to check that all bpf_attach_type variants are covered by
+ * libbpf_bpf_attach_type_str.
+ */
+static void test_libbpf_bpf_attach_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_attach_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_attach_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_attach_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_attach_type attach_type = (enum bpf_attach_type)e->val;
+ const char *attach_type_name;
+ const char *attach_type_str;
+ char buf[256];
+
+ if (attach_type == __MAX_BPF_ATTACH_TYPE)
+ continue;
+
+ attach_type_name = btf__str_by_offset(btf, e->name_off);
+ attach_type_str = libbpf_bpf_attach_type_str(attach_type);
+ ASSERT_OK_PTR(attach_type_str, attach_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_%s", attach_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, attach_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_link_type variants are covered by
+ * libbpf_bpf_link_type_str.
+ */
+static void test_libbpf_bpf_link_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_link_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_link_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_link_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_link_type link_type = (enum bpf_link_type)e->val;
+ const char *link_type_name;
+ const char *link_type_str;
+ char buf[256];
+
+ if (link_type == __MAX_BPF_LINK_TYPE)
+ continue;
+
+ link_type_name = btf__str_by_offset(btf, e->name_off);
+ link_type_str = libbpf_bpf_link_type_str(link_type);
+ ASSERT_OK_PTR(link_type_str, link_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_LINK_TYPE_%s", link_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, link_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_map_type variants are covered by
+ * libbpf_bpf_map_type_str.
+ */
+static void test_libbpf_bpf_map_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_map_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_map_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_map_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_map_type map_type = (enum bpf_map_type)e->val;
+ const char *map_type_name;
+ const char *map_type_str;
+ char buf[256];
+
+ if (map_type == __MAX_BPF_MAP_TYPE)
+ continue;
+
+ map_type_name = btf__str_by_offset(btf, e->name_off);
+ map_type_str = libbpf_bpf_map_type_str(map_type);
+ ASSERT_OK_PTR(map_type_str, map_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_MAP_TYPE_%s", map_type_str);
+ uppercase(buf);
+
+ /* Special case for map_type_name BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED
+ * where it and BPF_MAP_TYPE_CGROUP_STORAGE have the same enum value
+ * (map_type). For this enum value, libbpf_bpf_map_type_str() picks
+ * BPF_MAP_TYPE_CGROUP_STORAGE. The same for
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ * BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE.
+ */
+ if (strcmp(map_type_name, "BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED") == 0)
+ continue;
+ if (strcmp(map_type_name, "BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED") == 0)
+ continue;
+
+ ASSERT_STREQ(buf, map_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Test case to check that all bpf_prog_type variants are covered by
+ * libbpf_bpf_prog_type_str.
+ */
+static void test_libbpf_bpf_prog_type_str(void)
+{
+ struct btf *btf;
+ const struct btf_type *t;
+ const struct btf_enum *e;
+ int i, n, id;
+
+ btf = btf__parse("/sys/kernel/btf/vmlinux", NULL);
+ if (!ASSERT_OK_PTR(btf, "btf_parse"))
+ return;
+
+ /* find enum bpf_prog_type and enumerate each value */
+ id = btf__find_by_name_kind(btf, "bpf_prog_type", BTF_KIND_ENUM);
+ if (!ASSERT_GT(id, 0, "bpf_prog_type_id"))
+ goto cleanup;
+ t = btf__type_by_id(btf, id);
+ e = btf_enum(t);
+ n = btf_vlen(t);
+ for (i = 0; i < n; e++, i++) {
+ enum bpf_prog_type prog_type = (enum bpf_prog_type)e->val;
+ const char *prog_type_name;
+ const char *prog_type_str;
+ char buf[256];
+
+ if (prog_type == __MAX_BPF_PROG_TYPE)
+ continue;
+
+ prog_type_name = btf__str_by_offset(btf, e->name_off);
+ prog_type_str = libbpf_bpf_prog_type_str(prog_type);
+ ASSERT_OK_PTR(prog_type_str, prog_type_name);
+
+ snprintf(buf, sizeof(buf), "BPF_PROG_TYPE_%s", prog_type_str);
+ uppercase(buf);
+
+ ASSERT_STREQ(buf, prog_type_name, "exp_str_value");
+ }
+
+cleanup:
+ btf__free(btf);
+}
+
+/*
+ * Run all libbpf str conversion tests.
+ */
+void test_libbpf_str(void)
+{
+ if (test__start_subtest("bpf_attach_type_str"))
+ test_libbpf_bpf_attach_type_str();
+
+ if (test__start_subtest("bpf_link_type_str"))
+ test_libbpf_bpf_link_type_str();
+
+ if (test__start_subtest("bpf_map_type_str"))
+ test_libbpf_bpf_map_type_str();
+
+ if (test__start_subtest("bpf_prog_type_str"))
+ test_libbpf_bpf_prog_type_str();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/link_pinning.c b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
index a743288cf384..6fc97c45f71e 100644
--- a/tools/testing/selftests/bpf/prog_tests/link_pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/link_pinning.c
@@ -17,7 +17,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
int err, i;
link = bpf_program__attach(prog);
- if (CHECK(IS_ERR(link), "link_attach", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_attach"))
goto cleanup;
bss->in = 1;
@@ -51,7 +51,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
/* re-open link from BPFFS */
link = bpf_link__open(link_pin_path);
- if (CHECK(IS_ERR(link), "link_open", "err: %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "link_open"))
goto cleanup;
CHECK(strcmp(link_pin_path, bpf_link__pin_path(link)), "pin_path2",
@@ -84,8 +84,7 @@ void test_link_pinning_subtest(struct bpf_program *prog,
CHECK(i == 10000, "link_attached", "got to iteration #%d\n", i);
cleanup:
- if (!IS_ERR(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
}
void test_link_pinning(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_funcs.c b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
new file mode 100644
index 000000000000..cad664546912
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_funcs.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_funcs.skel.h"
+
+void test_linked_funcs(void)
+{
+ int err;
+ struct linked_funcs *skel;
+
+ skel = linked_funcs__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ /* handler1 and handler2 are marked as SEC("?raw_tp/sys_enter") and
+ * are set to not autoload by default
+ */
+ bpf_program__set_autoload(skel->progs.handler1, true);
+ bpf_program__set_autoload(skel->progs.handler2, true);
+
+ skel->rodata->my_tid = syscall(SYS_gettid);
+ skel->bss->syscall_id = SYS_getpgid;
+
+ err = linked_funcs__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_funcs__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_val1, 2000 + 2000, "output_val1");
+ ASSERT_EQ(skel->bss->output_ctx1, SYS_getpgid, "output_ctx1");
+ ASSERT_EQ(skel->bss->output_weak1, 42, "output_weak1");
+
+ ASSERT_EQ(skel->bss->output_val2, 2 * 1000 + 2 * (2 * 1000), "output_val2");
+ ASSERT_EQ(skel->bss->output_ctx2, SYS_getpgid, "output_ctx2");
+ /* output_weak2 should never be updated */
+ ASSERT_EQ(skel->bss->output_weak2, 0, "output_weak2");
+
+cleanup:
+ linked_funcs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_list.c b/tools/testing/selftests/bpf/prog_tests/linked_list.c
new file mode 100644
index 000000000000..2fb89de63bd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_list.c
@@ -0,0 +1,795 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <bpf/btf.h>
+#include <test_btf.h>
+#include <linux/btf.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "linked_list.skel.h"
+#include "linked_list_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} linked_list_fail_tests[] = {
+#define TEST(test, off) \
+ { #test "_missing_lock_push_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_push_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_front", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" }, \
+ { #test "_missing_lock_pop_back", \
+ "bpf_spin_lock at off=" #off " must be held for bpf_list_head" },
+ TEST(kptr, 40)
+ TEST(global, 16)
+ TEST(map, 0)
+ TEST(inner_map, 0)
+#undef TEST
+#define TEST(test, op) \
+ { #test "_kptr_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=40 must be held for bpf_list_head" }, \
+ { #test "_global_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=16 must be held for bpf_list_head" }, \
+ { #test "_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" }, \
+ { #test "_inner_map_incorrect_lock_" #op, \
+ "held lock and object are not in the same allocation\n" \
+ "bpf_spin_lock at off=0 must be held for bpf_list_head" },
+ TEST(kptr, push_front)
+ TEST(kptr, push_back)
+ TEST(kptr, pop_front)
+ TEST(kptr, pop_back)
+ TEST(global, push_front)
+ TEST(global, push_back)
+ TEST(global, pop_front)
+ TEST(global, pop_back)
+ TEST(map, push_front)
+ TEST(map, push_back)
+ TEST(map, pop_front)
+ TEST(map, pop_back)
+ TEST(inner_map, push_front)
+ TEST(inner_map, push_back)
+ TEST(inner_map, pop_front)
+ TEST(inner_map, pop_back)
+#undef TEST
+ { "map_compat_kprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_kretprobe", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_perf", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "map_compat_raw_tp_w", "tracing progs cannot use bpf_{list_head,rb_root} yet" },
+ { "obj_type_id_oor", "local type ID argument must be in range [0, U32_MAX]" },
+ { "obj_new_no_composite", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+ { "obj_new_no_struct", "bpf_obj_new/bpf_percpu_obj_new type ID argument must be of a struct" },
+ { "obj_drop_non_zero_off", "R1 must have zero offset when passed to release func" },
+ { "new_null_ret", "R0 invalid mem access 'ptr_or_null_'" },
+ { "obj_new_acq", "Unreleased reference id=" },
+ { "use_after_drop", "invalid mem access 'scalar'" },
+ { "ptr_walk_scalar", "type=scalar expected=percpu_ptr_" },
+ { "direct_read_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_write_lock", "direct access to bpf_spin_lock is disallowed" },
+ { "direct_read_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_write_head", "direct access to bpf_list_head is disallowed" },
+ { "direct_read_node", "direct access to bpf_list_node is disallowed" },
+ { "direct_write_node", "direct access to bpf_list_node is disallowed" },
+ { "use_after_unlock_push_front", "invalid mem access 'scalar'" },
+ { "use_after_unlock_push_back", "invalid mem access 'scalar'" },
+ { "double_push_front", "arg#1 expected pointer to allocated object" },
+ { "double_push_back", "arg#1 expected pointer to allocated object" },
+ { "no_node_value_type", "bpf_list_node not found at offset=0" },
+ { "incorrect_value_type",
+ "operation on bpf_list_head expects arg#1 bpf_list_node at offset=48 in struct foo, "
+ "but arg is at offset=0 in struct bar" },
+ { "incorrect_node_var_off", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_node_off1", "bpf_list_node not found at offset=49" },
+ { "incorrect_node_off2", "arg#1 offset=0, but expected bpf_list_node at offset=48 in struct foo" },
+ { "no_head_type", "bpf_list_head not found at offset=0" },
+ { "incorrect_head_var_off1", "R1 doesn't have constant offset" },
+ { "incorrect_head_var_off2", "variable ptr_ access var_off=(0x0; 0xffffffff) disallowed" },
+ { "incorrect_head_off1", "bpf_list_head not found at offset=25" },
+ { "incorrect_head_off2", "bpf_list_head not found at offset=1" },
+ { "pop_front_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+ { "pop_back_off", "off 48 doesn't point to 'struct bpf_spin_lock' that is at 40" },
+};
+
+static void test_linked_list_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct linked_list_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = linked_list_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "linked_list_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = linked_list_fail__load(skel);
+ if (!ASSERT_ERR(ret, "linked_list_fail__load must fail"))
+ goto end;
+
+ if (!ASSERT_OK_PTR(strstr(log_buf, err_msg), "expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ linked_list_fail__destroy(skel);
+}
+
+static void clear_fields(struct bpf_map *map)
+{
+ char buf[24];
+ int key = 0;
+
+ memset(buf, 0xff, sizeof(buf));
+ ASSERT_OK(bpf_map__update_elem(map, &key, sizeof(key), buf, sizeof(buf), 0), "check_and_free_fields");
+}
+
+enum {
+ TEST_ALL,
+ PUSH_POP,
+ PUSH_POP_MULT,
+ LIST_IN_LIST,
+};
+
+static void test_linked_list_success(int mode, bool leave_in_map)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct linked_list *skel;
+ int ret;
+
+ skel = linked_list__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "linked_list__open_and_load"))
+ return;
+
+ if (mode == LIST_IN_LIST)
+ goto lil;
+ if (mode == PUSH_POP_MULT)
+ goto ppm;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop), &opts);
+ ASSERT_OK(ret, "map_list_push_pop");
+ ASSERT_OK(opts.retval, "map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop), &opts);
+ ASSERT_OK(ret, "global_list_push_pop");
+ ASSERT_OK(opts.retval, "global_list_push_pop retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP)
+ goto end;
+
+ppm:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "inner_map_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "inner_map_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_push_pop_multiple), &opts);
+ ASSERT_OK(ret, "global_list_push_pop_multiple");
+ ASSERT_OK(opts.retval, "global_list_push_pop_multiple retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+
+ if (mode == PUSH_POP_MULT)
+ goto end;
+
+lil:
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.map_list_in_list), &opts);
+ ASSERT_OK(ret, "map_list_in_list");
+ ASSERT_OK(opts.retval, "map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.array_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.inner_map_list_in_list), &opts);
+ ASSERT_OK(ret, "inner_map_list_in_list");
+ ASSERT_OK(opts.retval, "inner_map_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.inner_map);
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.global_list_in_list), &opts);
+ ASSERT_OK(ret, "global_list_in_list");
+ ASSERT_OK(opts.retval, "global_list_in_list retval");
+ if (!leave_in_map)
+ clear_fields(skel->maps.bss_A);
+end:
+ linked_list__destroy(skel);
+}
+
+#define SPIN_LOCK 2
+#define LIST_HEAD 3
+#define LIST_NODE 4
+
+static struct btf *init_btf(void)
+{
+ int id, lid, hid, nid;
+ struct btf *btf;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf__new_empty"))
+ return NULL;
+ id = btf__add_int(btf, "int", 4, BTF_INT_SIGNED);
+ if (!ASSERT_EQ(id, 1, "btf__add_int"))
+ goto end;
+ lid = btf__add_struct(btf, "bpf_spin_lock", 4);
+ if (!ASSERT_EQ(lid, SPIN_LOCK, "btf__add_struct bpf_spin_lock"))
+ goto end;
+ hid = btf__add_struct(btf, "bpf_list_head", 16);
+ if (!ASSERT_EQ(hid, LIST_HEAD, "btf__add_struct bpf_list_head"))
+ goto end;
+ nid = btf__add_struct(btf, "bpf_list_node", 24);
+ if (!ASSERT_EQ(nid, LIST_NODE, "btf__add_struct bpf_list_node"))
+ goto end;
+ return btf;
+end:
+ btf__free(btf);
+ return NULL;
+}
+
+static void list_and_rb_node_same_struct(bool refcount_field)
+{
+ int bpf_rb_node_btf_id, bpf_refcount_btf_id = 0, foo_btf_id;
+ struct btf *btf;
+ int id, err;
+
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ return;
+
+ bpf_rb_node_btf_id = btf__add_struct(btf, "bpf_rb_node", 32);
+ if (!ASSERT_GT(bpf_rb_node_btf_id, 0, "btf__add_struct bpf_rb_node"))
+ return;
+
+ if (refcount_field) {
+ bpf_refcount_btf_id = btf__add_struct(btf, "bpf_refcount", 4);
+ if (!ASSERT_GT(bpf_refcount_btf_id, 0, "btf__add_struct bpf_refcount"))
+ return;
+ }
+
+ id = btf__add_struct(btf, "bar", refcount_field ? 60 : 56);
+ if (!ASSERT_GT(id, 0, "btf__add_struct bar"))
+ return;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ return;
+ err = btf__add_field(btf, "c", bpf_rb_node_btf_id, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ return;
+ if (refcount_field) {
+ err = btf__add_field(btf, "ref", bpf_refcount_btf_id, 448, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::ref"))
+ return;
+ }
+
+ foo_btf_id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_GT(foo_btf_id, 0, "btf__add_struct foo"))
+ return;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ return;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ return;
+ id = btf__add_decl_tag(btf, "contains:bar:a", foo_btf_id, 0);
+ if (!ASSERT_GT(id, 0, "btf__add_decl_tag contains:bar:a"))
+ return;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, refcount_field ? 0 : -EINVAL, "check btf");
+ btf__free(btf);
+}
+
+static void test_btf(void)
+{
+ struct btf *btf = NULL;
+ int id, err;
+
+ while (test__start_subtest("btf: too many locks")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 32, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ err = btf__add_field(btf, "c", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -E2BIG, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing lock")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 16);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_struct foo::a"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 16);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: bad offset")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 36);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EEXIST, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing contains:")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing struct")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:bar", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:bar"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: missing node")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 24);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_HEAD, 64, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:c", 5, 1);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:c"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ btf__free(btf);
+ ASSERT_EQ(err, -ENOENT, "check btf");
+ break;
+ }
+
+ while (test__start_subtest("btf: node incorrect type")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 4);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", SPIN_LOCK, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: multiple bpf_list_node with name b")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 52);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 256, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ err = btf__add_field(btf, "d", SPIN_LOCK, 384, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::d"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -EINVAL, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned AA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned ABA cycle")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:foo:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:foo:b"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 28);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:a", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:a"))
+ break;
+ id = btf__add_struct(btf, "bar", 24);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 28);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 192, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 24);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, 0, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 44);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar:c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:a", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag contains:baz:a"))
+ break;
+ id = btf__add_struct(btf, "baz", 24);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field baz:a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: owning -> owning | owned -> owning | owned -> owned")) {
+ btf = init_btf();
+ if (!ASSERT_OK_PTR(btf, "init_btf"))
+ break;
+ id = btf__add_struct(btf, "foo", 20);
+ if (!ASSERT_EQ(id, 5, "btf__add_struct foo"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::a"))
+ break;
+ err = btf__add_field(btf, "b", SPIN_LOCK, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field foo::b"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bar:b", 5, 0);
+ if (!ASSERT_EQ(id, 6, "btf__add_decl_tag contains:bar:b"))
+ break;
+ id = btf__add_struct(btf, "bar", 44);
+ if (!ASSERT_EQ(id, 7, "btf__add_struct bar"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:baz:b", 7, 0);
+ if (!ASSERT_EQ(id, 8, "btf__add_decl_tag"))
+ break;
+ id = btf__add_struct(btf, "baz", 44);
+ if (!ASSERT_EQ(id, 9, "btf__add_struct baz"))
+ break;
+ err = btf__add_field(btf, "a", LIST_HEAD, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::a"))
+ break;
+ err = btf__add_field(btf, "b", LIST_NODE, 128, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::b"))
+ break;
+ err = btf__add_field(btf, "c", SPIN_LOCK, 320, 0);
+ if (!ASSERT_OK(err, "btf__add_field bar::c"))
+ break;
+ id = btf__add_decl_tag(btf, "contains:bam:a", 9, 0);
+ if (!ASSERT_EQ(id, 10, "btf__add_decl_tag contains:bam:a"))
+ break;
+ id = btf__add_struct(btf, "bam", 24);
+ if (!ASSERT_EQ(id, 11, "btf__add_struct bam"))
+ break;
+ err = btf__add_field(btf, "a", LIST_NODE, 0, 0);
+ if (!ASSERT_OK(err, "btf__add_field bam::a"))
+ break;
+
+ err = btf__load_into_kernel(btf);
+ ASSERT_EQ(err, -ELOOP, "check btf");
+ btf__free(btf);
+ break;
+ }
+
+ while (test__start_subtest("btf: list_node and rb_node in same struct")) {
+ list_and_rb_node_same_struct(true);
+ break;
+ }
+
+ while (test__start_subtest("btf: list_node and rb_node in same struct, no bpf_refcount")) {
+ list_and_rb_node_same_struct(false);
+ break;
+ }
+}
+
+void test_linked_list(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(linked_list_fail_tests); i++) {
+ if (!test__start_subtest(linked_list_fail_tests[i].prog_name))
+ continue;
+ test_linked_list_fail_prog(linked_list_fail_tests[i].prog_name,
+ linked_list_fail_tests[i].err_msg);
+ }
+ test_btf();
+ test_linked_list_success(PUSH_POP, false);
+ test_linked_list_success(PUSH_POP, true);
+ test_linked_list_success(PUSH_POP_MULT, false);
+ test_linked_list_success(PUSH_POP_MULT, true);
+ test_linked_list_success(LIST_IN_LIST, false);
+ test_linked_list_success(LIST_IN_LIST, true);
+ test_linked_list_success(TEST_ALL, false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_maps.c b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
new file mode 100644
index 000000000000..85dcaaaf2775
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_maps.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_maps.skel.h"
+
+void test_linked_maps(void)
+{
+ int err;
+ struct linked_maps *skel;
+
+ skel = linked_maps__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = linked_maps__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_first1, 2000, "output_first1");
+ ASSERT_EQ(skel->bss->output_second1, 2, "output_second1");
+ ASSERT_EQ(skel->bss->output_weak1, 2, "output_weak1");
+
+cleanup:
+ linked_maps__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/linked_vars.c b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
new file mode 100644
index 000000000000..267166abe4c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/linked_vars.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <sys/syscall.h>
+#include "linked_vars.skel.h"
+
+void test_linked_vars(void)
+{
+ int err;
+ struct linked_vars *skel;
+
+ skel = linked_vars__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->input_bss1 = 1000;
+ skel->bss->input_bss2 = 2000;
+ skel->bss->input_bss_weak = 3000;
+
+ err = linked_vars__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = linked_vars__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->output_bss1, 1000 + 2000 + 3000, "output_bss1");
+ ASSERT_EQ(skel->bss->output_bss2, 1000 + 2000 + 3000, "output_bss2");
+ /* 10 comes from "winner" input_data_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_data1, 1 + 2 + 10, "output_bss1");
+ ASSERT_EQ(skel->bss->output_data2, 1 + 2 + 10, "output_bss2");
+ /* 100 comes from "winner" input_rodata_weak in first obj file */
+ ASSERT_EQ(skel->bss->output_rodata1, 11 + 22 + 100, "output_weak1");
+ ASSERT_EQ(skel->bss->output_rodata2, 11 + 22 + 100, "output_weak2");
+
+cleanup:
+ linked_vars__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
index c1168e4a9036..581c0eb0a0a1 100644
--- a/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
+++ b/tools/testing/selftests/bpf/prog_tests/load_bytes_relative.c
@@ -23,12 +23,12 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(cgroup_fd < 0))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
if (CHECK_FAIL(server_fd < 0))
goto close_cgroup_fd;
- err = bpf_prog_load("./load_bytes_relative.o", BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd);
+ err = bpf_prog_test_load("./load_bytes_relative.bpf.o", BPF_PROG_TYPE_CGROUP_SKB,
+ &obj, &prog_fd);
if (CHECK_FAIL(err))
goto close_server_fd;
@@ -49,7 +49,7 @@ void test_load_bytes_relative(void)
if (CHECK_FAIL(err))
goto close_bpf_object;
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (CHECK_FAIL(client_fd < 0))
goto close_bpf_object;
close(client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
new file mode 100644
index 000000000000..827e713f6cf1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "local_kptr_stash.skel.h"
+#include "local_kptr_stash_fail.skel.h"
+static void test_local_kptr_stash_simple(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_plain(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_plain), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_plain run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_plain retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_local_with_root(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_rb_nodes), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_OK(opts.retval, "local_kptr_stash_add_nodes retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.unstash_rb_node), &opts);
+ ASSERT_OK(ret, "local_kptr_stash_add_nodes run");
+ ASSERT_EQ(opts.retval, 42, "local_kptr_stash_add_nodes retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_refcount_acquire_without_unstash(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct local_kptr_stash *skel;
+ int ret;
+
+ skel = local_kptr_stash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash run");
+ ASSERT_EQ(opts.retval, 2, "refcount_acquire_without_unstash retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_refcounted_node), &opts);
+ ASSERT_OK(ret, "stash_refcounted_node run");
+ ASSERT_OK(opts.retval, "stash_refcounted_node retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.refcount_acquire_without_unstash),
+ &opts);
+ ASSERT_OK(ret, "refcount_acquire_without_unstash (2) run");
+ ASSERT_EQ(opts.retval, 42, "refcount_acquire_without_unstash (2) retval");
+
+ local_kptr_stash__destroy(skel);
+}
+
+static void test_local_kptr_stash_fail(void)
+{
+ RUN_TESTS(local_kptr_stash_fail);
+}
+
+void test_local_kptr_stash(void)
+{
+ if (test__start_subtest("local_kptr_stash_simple"))
+ test_local_kptr_stash_simple();
+ if (test__start_subtest("local_kptr_stash_plain"))
+ test_local_kptr_stash_plain();
+ if (test__start_subtest("local_kptr_stash_local_with_root"))
+ test_local_kptr_stash_local_with_root();
+ if (test__start_subtest("local_kptr_stash_unstash"))
+ test_local_kptr_stash_unstash();
+ if (test__start_subtest("refcount_acquire_without_unstash"))
+ test_refcount_acquire_without_unstash();
+ if (test__start_subtest("local_kptr_stash_fail"))
+ test_local_kptr_stash_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_buf.c b/tools/testing/selftests/bpf/prog_tests/log_buf.c
new file mode 100644
index 000000000000..0f7ea4d7d9f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_buf.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+
+static size_t libbpf_log_pos;
+static char libbpf_log_buf[1024 * 1024];
+static bool libbpf_log_error;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt, va_list args)
+{
+ int emitted_cnt;
+ size_t left_cnt;
+
+ left_cnt = sizeof(libbpf_log_buf) - libbpf_log_pos;
+ emitted_cnt = vsnprintf(libbpf_log_buf + libbpf_log_pos, left_cnt, fmt, args);
+
+ if (emitted_cnt < 0 || emitted_cnt + 1 > left_cnt) {
+ libbpf_log_error = true;
+ return 0;
+ }
+
+ libbpf_log_pos += emitted_cnt;
+ return 0;
+}
+
+static void obj_load_log_buf(void)
+{
+ libbpf_print_fn_t old_print_cb = libbpf_set_print(libbpf_print_cb);
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ struct test_log_buf* skel;
+ char *obj_log_buf, *good_log_buf, *bad_log_buf;
+ int err;
+
+ obj_log_buf = malloc(3 * log_buf_sz);
+ if (!ASSERT_OK_PTR(obj_log_buf, "obj_log_buf"))
+ return;
+
+ good_log_buf = obj_log_buf + log_buf_sz;
+ bad_log_buf = obj_log_buf + 2 * log_buf_sz;
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+
+ opts.kernel_log_buf = obj_log_buf;
+ opts.kernel_log_size = log_buf_sz;
+ opts.kernel_log_level = 4; /* for BTF this will turn into 1 */
+
+ /* In the first round every prog has its own log_buf, so libbpf logs
+ * don't have program failure logs
+ */
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set very verbose level for good_prog so we always get detailed logs */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 2);
+
+ bpf_program__set_log_buf(skel->progs.bad_prog, bad_log_buf, log_buf_sz);
+ /* log_level 0 with custom log_buf means that verbose logs are not
+ * requested if program load is successful, but libbpf should retry
+ * with log_level 1 on error and put program's verbose load log into
+ * custom log_buf
+ */
+ bpf_program__set_log_level(skel->progs.bad_prog, 0);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* there should be no prog loading log because we specified per-prog log buf */
+ ASSERT_NULL(strstr(libbpf_log_buf, "-- BEGIN PROG LOAD LOG --"), "unexp_libbpf_log");
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "prog 'bad_prog': BPF program load failed"),
+ "libbpf_log_not_empty");
+ ASSERT_OK_PTR(strstr(obj_log_buf, "DATASEC license"), "obj_log_not_empty");
+ ASSERT_OK_PTR(strstr(good_log_buf, "0: R1=ctx() R10=fp0"),
+ "good_log_verbose");
+ ASSERT_OK_PTR(strstr(bad_log_buf, "invalid access to map value, value_size=16 off=16000 size=4"),
+ "bad_log_not_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+ /* reset everything */
+ test_log_buf__destroy(skel);
+ obj_log_buf[0] = good_log_buf[0] = bad_log_buf[0] = '\0';
+ libbpf_log_buf[0] = '\0';
+ libbpf_log_pos = 0;
+ libbpf_log_error = false;
+
+ /* In the second round we let bad_prog's failure be logged through print callback */
+ opts.kernel_log_buf = NULL; /* let everything through into print callback */
+ opts.kernel_log_size = 0;
+ opts.kernel_log_level = 1;
+
+ skel = test_log_buf__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* set normal verbose level for good_prog to check log_level is taken into account */
+ bpf_program__set_log_buf(skel->progs.good_prog, good_log_buf, log_buf_sz);
+ bpf_program__set_log_level(skel->progs.good_prog, 1);
+
+ err = test_log_buf__load(skel);
+ if (!ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ ASSERT_FALSE(libbpf_log_error, "libbpf_log_error");
+
+ /* this time prog loading error should be logged through print callback */
+ ASSERT_OK_PTR(strstr(libbpf_log_buf, "libbpf: prog 'bad_prog': -- BEGIN PROG LOAD LOG --"),
+ "libbpf_log_correct");
+ ASSERT_STREQ(obj_log_buf, "", "obj_log__empty");
+ ASSERT_STREQ(good_log_buf, "processed 4 insns (limit 1000000) max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "good_log_ok");
+ ASSERT_STREQ(bad_log_buf, "", "bad_log_empty");
+
+ if (env.verbosity > VERBOSE_NONE) {
+ printf("LIBBPF LOG: \n=================\n%s=================\n", libbpf_log_buf);
+ printf("OBJ LOG: \n=================\n%s=================\n", obj_log_buf);
+ printf("GOOD_PROG LOG:\n=================\n%s=================\n", good_log_buf);
+ printf("BAD_PROG LOG:\n=================\n%s=================\n", bad_log_buf);
+ }
+
+cleanup:
+ free(obj_log_buf);
+ test_log_buf__destroy(skel);
+ libbpf_set_print(old_print_cb);
+}
+
+static void bpf_prog_load_log_buf(void)
+{
+ const struct bpf_insn good_prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t good_prog_insn_cnt = sizeof(good_prog_insns) / sizeof(struct bpf_insn);
+ const struct bpf_insn bad_prog_insns[] = {
+ BPF_EXIT_INSN(),
+ };
+ size_t bad_prog_insn_cnt = sizeof(bad_prog_insns) / sizeof(struct bpf_insn);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ char *log_buf;
+ int fd = -1;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ return;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf shoud stay empty for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 2 should always fill log_buf, even for good prog */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "good_prog", "GPL",
+ good_prog_insns, good_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "0: R1=ctx() R10=fp0"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 0 should fill log_buf for bad prog */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "bad_prog", "GPL",
+ bad_prog_insns, bad_prog_insn_cnt, &opts);
+ ASSERT_OK_PTR(strstr(log_buf, "R0 !read_ok"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ free(log_buf);
+}
+
+static void bpf_btf_load_log_buf(void)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ const size_t log_buf_sz = 1024 * 1024;
+ const void *raw_btf_data;
+ __u32 raw_btf_size;
+ struct btf *btf;
+ char *log_buf = NULL;
+ int fd = -1;
+
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ return;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+ goto cleanup;
+
+ log_buf = malloc(log_buf_sz);
+ if (!ASSERT_OK_PTR(log_buf, "log_buf_alloc"))
+ goto cleanup;
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ /* with log_level == 0 log_buf shoud stay empty for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ ASSERT_STREQ(log_buf, "", "good_log_0");
+ ASSERT_GE(fd, 0, "good_fd1");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* log_level == 2 should always fill log_buf, even for good BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 2;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "magic: 0xeb9f"), "good_log_2");
+ ASSERT_GE(fd, 0, "good_fd2");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+ /* make BTF bad, add pointer pointing to non-existing type */
+ ASSERT_GT(btf__add_ptr(btf, 100), 0, "bad_ptr_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_bad"))
+ goto cleanup;
+
+ /* log_level == 0 should fill log_buf for bad BTF */
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_btf_load(raw_btf_data, raw_btf_size, &opts);
+ printf("LOG_BUF: %s\n", log_buf);
+ ASSERT_OK_PTR(strstr(log_buf, "[2] PTR (anon) type_id=100 Invalid type_id"), "bad_log_0");
+ ASSERT_LT(fd, 0, "bad_fd");
+ if (fd >= 0)
+ close(fd);
+ fd = -1;
+
+cleanup:
+ free(log_buf);
+ btf__free(btf);
+}
+
+void test_log_buf(void)
+{
+ if (test__start_subtest("obj_load_log_buf"))
+ obj_load_log_buf();
+ if (test__start_subtest("bpf_prog_load_log_buf"))
+ bpf_prog_load_log_buf();
+ if (test__start_subtest("bpf_btf_load_log_buf"))
+ bpf_btf_load_log_buf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/log_fixup.c b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
new file mode 100644
index 000000000000..90a98e23be61
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/log_fixup.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_fixup.skel.h"
+
+enum trunc_type {
+ TRUNC_NONE,
+ TRUNC_PARTIAL,
+ TRUNC_FULL,
+};
+
+static void bad_core_relo(size_t log_buf_size, enum trunc_type trunc_type)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo, true);
+ memset(log_buf, 0, sizeof(log_buf));
+ bpf_program__set_log_buf(skel->progs.bad_relo, log_buf, log_buf_size ?: sizeof(log_buf));
+ bpf_program__set_log_level(skel->progs.bad_relo, 1 | 8); /* BPF_LOG_FIXED to force truncation */
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ "0: <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_sz> ",
+ "log_buf_part1");
+
+ switch (trunc_type) {
+ case TRUNC_NONE:
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n",
+ "log_buf_end");
+ break;
+ case TRUNC_PARTIAL:
+ /* we should get full libbpf message patch */
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field (0:1 @ offset 4)\n",
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ case TRUNC_FULL:
+ /* we shouldn't get second part of libbpf message patch */
+ ASSERT_NULL(strstr(log_buf, "struct task_struct___bad.fake_field (0:1 @ offset 4)\n"),
+ "log_buf_part2");
+ /* we shouldn't get full end of BPF verifier log */
+ ASSERT_NULL(strstr(log_buf, "max_states_per_insn 0 total_states 0 peak_states 0 mark_read 0\n"),
+ "log_buf_end");
+ break;
+ }
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void bad_core_relo_subprog(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.bad_relo_subprog, true);
+ bpf_program__set_log_buf(skel->progs.bad_relo_subprog, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ ": <invalid CO-RE relocation>\n"
+ "failed to resolve CO-RE relocation <byte_off> ",
+ "log_buf");
+ ASSERT_HAS_SUBSTR(log_buf,
+ "struct task_struct___bad.fake_field_subprog (0:2 @ offset 8)\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void missing_map(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_map__set_autocreate(skel->maps.missing_map, false);
+
+ bpf_program__set_autoload(skel->progs.use_missing_map, true);
+ bpf_program__set_log_buf(skel->progs.use_missing_map, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.existing_map), "existing_map_autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.missing_map), "missing_map_autocreate");
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ ": <invalid BPF map reference>\n"
+ "BPF map 'missing_map' is referenced but wasn't created\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+static void missing_kfunc(void)
+{
+ char log_buf[8 * 1024];
+ struct test_log_fixup* skel;
+ int err;
+
+ skel = test_log_fixup__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.use_missing_kfunc, true);
+ bpf_program__set_log_buf(skel->progs.use_missing_kfunc, log_buf, sizeof(log_buf));
+
+ err = test_log_fixup__load(skel);
+ if (!ASSERT_ERR(err, "load_fail"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log_buf,
+ "0: <invalid kfunc call>\n"
+ "kfunc 'bpf_nonexistent_kfunc' is referenced but wasn't resolved\n",
+ "log_buf");
+
+ if (env.verbosity > VERBOSE_NONE)
+ printf("LOG: \n=================\n%s=================\n", log_buf);
+
+cleanup:
+ test_log_fixup__destroy(skel);
+}
+
+void test_log_fixup(void)
+{
+ if (test__start_subtest("bad_core_relo_trunc_none"))
+ bad_core_relo(0, TRUNC_NONE /* full buf */);
+ if (test__start_subtest("bad_core_relo_trunc_partial"))
+ bad_core_relo(300, TRUNC_PARTIAL /* truncate original log a bit */);
+ if (test__start_subtest("bad_core_relo_trunc_full"))
+ bad_core_relo(240, TRUNC_FULL /* truncate also libbpf's message patch */);
+ if (test__start_subtest("bad_core_relo_subprog"))
+ bad_core_relo_subprog();
+ if (test__start_subtest("missing_map"))
+ missing_map();
+ if (test__start_subtest("missing_kfunc"))
+ missing_kfunc();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
new file mode 100644
index 000000000000..a767bb4a271c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_and_delete.c
@@ -0,0 +1,291 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+#include "test_lookup_and_delete.skel.h"
+
+#define START_VALUE 1234
+#define NEW_VALUE 4321
+#define MAX_ENTRIES 2
+
+static int duration;
+static int nr_cpus;
+
+static int fill_values(int map_fd)
+{
+ __u64 key, value = START_VALUE;
+ int err;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, &value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int fill_values_percpu(int map_fd)
+{
+ __u64 key, value[nr_cpus];
+ int i, err;
+
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = START_VALUE;
+
+ for (key = 1; key < MAX_ENTRIES + 1; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_lookup_and_delete *setup_prog(enum bpf_map_type map_type,
+ int *map_fd)
+{
+ struct test_lookup_and_delete *skel;
+ int err;
+
+ skel = test_lookup_and_delete__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_and_delete__open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hash_map, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.hash_map, MAX_ENTRIES);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto cleanup;
+
+ err = test_lookup_and_delete__load(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__load"))
+ goto cleanup;
+
+ *map_fd = bpf_map__fd(skel->maps.hash_map);
+ if (!ASSERT_GE(*map_fd, 0, "bpf_map__fd"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+ return NULL;
+}
+
+/* Triggers BPF program that updates map with given key and value */
+static int trigger_tp(struct test_lookup_and_delete *skel, __u64 key,
+ __u64 value)
+{
+ int err;
+
+ skel->bss->set_pid = getpid();
+ skel->bss->set_key = key;
+ skel->bss->set_value = value;
+
+ err = test_lookup_and_delete__attach(skel);
+ if (!ASSERT_OK(err, "test_lookup_and_delete__attach"))
+ return -1;
+
+ syscall(__NR_getpgid);
+
+ test_lookup_and_delete__detach(skel);
+
+ return 0;
+}
+
+static void test_lookup_and_delete_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(value != START_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i;
+
+ /* Setup program and fill the map. */
+ skel = setup_prog(BPF_MAP_TYPE_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Lookup and delete element. */
+ key = 1;
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+
+ /* Fetched value should match the initially set value. */
+ if (CHECK(val != START_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ }
+
+ /* Check that the entry is non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, value;
+ int map_fd, err;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values(map_fd);
+ if (!ASSERT_OK(err, "fill_values"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), &value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Value should match the new value. */
+ if (CHECK(value != NEW_VALUE, "bpf_map_lookup_and_delete_elem",
+ "unexpected value=%lld\n", value))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+static void test_lookup_and_delete_lru_percpu_hash(void)
+{
+ struct test_lookup_and_delete *skel;
+ __u64 key, val, value[nr_cpus];
+ int map_fd, err, i, cpucnt = 0;
+
+ /* Setup program and fill the LRU map. */
+ skel = setup_prog(BPF_MAP_TYPE_LRU_PERCPU_HASH, &map_fd);
+ if (!ASSERT_OK_PTR(skel, "setup_prog"))
+ return;
+
+ err = fill_values_percpu(map_fd);
+ if (!ASSERT_OK(err, "fill_values_percpu"))
+ goto cleanup;
+
+ /* Insert new element at key=3, should reuse LRU element 1. */
+ key = 3;
+ err = trigger_tp(skel, key, NEW_VALUE);
+ if (!ASSERT_OK(err, "trigger_tp"))
+ goto cleanup;
+
+ /* Clean value. */
+ for (i = 0; i < nr_cpus; i++)
+ value[i] = 0;
+
+ /* Lookup and delete element 3. */
+ err = bpf_map__lookup_and_delete_elem(skel->maps.hash_map,
+ &key, sizeof(key), value, sizeof(value), 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_and_delete_elem"))
+ goto cleanup;
+
+ /* Check if only one CPU has set the value. */
+ for (i = 0; i < nr_cpus; i++) {
+ val = value[i];
+ if (val) {
+ if (CHECK(val != NEW_VALUE, "map value",
+ "unexpected for cpu %d: %lld\n", i, val))
+ goto cleanup;
+ cpucnt++;
+ }
+ }
+ if (CHECK(cpucnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ cpucnt))
+ goto cleanup;
+
+ /* Check that entries 3 and 1 are non existent. */
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ key = 1;
+ err = bpf_map_lookup_elem(map_fd, &key, &value);
+ if (!ASSERT_ERR(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+cleanup:
+ test_lookup_and_delete__destroy(skel);
+}
+
+void test_lookup_and_delete(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+
+ if (test__start_subtest("lookup_and_delete"))
+ test_lookup_and_delete_hash();
+ if (test__start_subtest("lookup_and_delete_percpu"))
+ test_lookup_and_delete_percpu_hash();
+ if (test__start_subtest("lookup_and_delete_lru"))
+ test_lookup_and_delete_lru_hash();
+ if (test__start_subtest("lookup_and_delete_lru_percpu"))
+ test_lookup_and_delete_lru_percpu_hash();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lookup_key.c b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
new file mode 100644
index 000000000000..68025e88f352
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lookup_key.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <linux/keyctl.h>
+#include <test_progs.h>
+
+#include "test_lookup_key.skel.h"
+
+#define KEY_LOOKUP_CREATE 0x01
+#define KEY_LOOKUP_PARTIAL 0x02
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ char *func;
+
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ func = va_arg(args, char *);
+
+ if (strcmp(func, "bpf_lookup_user_key") && strcmp(func, "bpf_key_put") &&
+ strcmp(func, "bpf_lookup_system_key"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+void test_lookup_key(void)
+{
+ libbpf_print_fn_t old_print_cb;
+ struct test_lookup_key *skel;
+ __u32 next_id;
+ int ret;
+
+ skel = test_lookup_key__open();
+ if (!ASSERT_OK_PTR(skel, "test_lookup_key__open"))
+ return;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ ret = test_lookup_key__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (ret < 0 && kfunc_not_supported) {
+ printf("%s:SKIP:bpf_lookup_*_key(), bpf_key_put() kfuncs not supported\n",
+ __func__);
+ test__skip();
+ goto close_prog;
+ }
+
+ if (!ASSERT_OK(ret, "test_lookup_key__load"))
+ goto close_prog;
+
+ ret = test_lookup_key__attach(skel);
+ if (!ASSERT_OK(ret, "test_lookup_key__attach"))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+ skel->bss->key_serial = KEY_SPEC_THREAD_KEYRING;
+
+ /* The thread-specific keyring does not exist, this test fails. */
+ skel->bss->flags = 0;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Force creation of the thread-specific keyring, this test succeeds. */
+ skel->bss->flags = KEY_LOOKUP_CREATE;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Pass both lookup flags for parameter validation. */
+ skel->bss->flags = KEY_LOOKUP_CREATE | KEY_LOOKUP_PARTIAL;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ /* Pass invalid flags. */
+ skel->bss->flags = UINT64_MAX;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_LT(ret, 0, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ skel->bss->key_serial = 0;
+ skel->bss->key_id = 1;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ if (!ASSERT_OK(ret, "bpf_prog_get_next_id"))
+ goto close_prog;
+
+ skel->bss->key_id = UINT32_MAX;
+
+ ret = bpf_prog_get_next_id(0, &next_id);
+ ASSERT_LT(ret, 0, "bpf_prog_get_next_id");
+
+close_prog:
+ skel->bss->monitored_pid = 0;
+ test_lookup_key__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lru_bug.c b/tools/testing/selftests/bpf/prog_tests/lru_bug.c
new file mode 100644
index 000000000000..3c7822390827
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lru_bug.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#include "lru_bug.skel.h"
+
+void test_lru_bug(void)
+{
+ struct lru_bug *skel;
+ int ret;
+
+ skel = lru_bug__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lru_bug__open_and_load"))
+ return;
+ ret = lru_bug__attach(skel);
+ if (!ASSERT_OK(ret, "lru_bug__attach"))
+ goto end;
+ usleep(1);
+ ASSERT_OK(skel->data->result, "prealloc_lru_pop doesn't call check_and_init_map_value");
+end:
+ lru_bug__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
new file mode 100644
index 000000000000..130a3b21e467
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lsm_cgroup.c
@@ -0,0 +1,323 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "lsm_cgroup.skel.h"
+#include "lsm_cgroup_nonvoid.skel.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
+
+static struct btf *btf;
+
+static __u32 query_prog_cnt(int cgroup_fd, const char *attach_func)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, p);
+ int cnt = 0;
+ int i;
+
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ if (!attach_func)
+ return p.prog_cnt;
+
+ /* When attach_func is provided, count the number of progs that
+ * attach to the given symbol.
+ */
+
+ if (!btf)
+ btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK(libbpf_get_error(btf), "btf_vmlinux"))
+ return -1;
+
+ p.prog_ids = malloc(sizeof(u32) * p.prog_cnt);
+ p.prog_attach_flags = malloc(sizeof(u32) * p.prog_cnt);
+ ASSERT_OK(bpf_prog_query_opts(cgroup_fd, BPF_LSM_CGROUP, &p), "prog_query");
+
+ for (i = 0; i < p.prog_cnt; i++) {
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int fd;
+
+ fd = bpf_prog_get_fd_by_id(p.prog_ids[i]);
+ ASSERT_GE(fd, 0, "prog_get_fd_by_id");
+ ASSERT_OK(bpf_prog_get_info_by_fd(fd, &info, &info_len),
+ "prog_info_by_fd");
+ close(fd);
+
+ if (info.attach_btf_id ==
+ btf__find_by_name_kind(btf, attach_func, BTF_KIND_FUNC))
+ cnt++;
+ }
+
+ free(p.prog_ids);
+ free(p.prog_attach_flags);
+
+ return cnt;
+}
+
+static void test_lsm_cgroup_functional(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts);
+ DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+ int cgroup_fd = -1, cgroup_fd2 = -1, cgroup_fd3 = -1;
+ int listen_fd, client_fd, accepted_fd;
+ struct lsm_cgroup *skel = NULL;
+ int post_create_prog_fd2 = -1;
+ int post_create_prog_fd = -1;
+ int bind_link_fd2 = -1;
+ int bind_prog_fd2 = -1;
+ int alloc_prog_fd = -1;
+ int bind_prog_fd = -1;
+ int bind_link_fd = -1;
+ int clone_prog_fd = -1;
+ int err, fd, prio;
+ socklen_t socklen;
+
+ cgroup_fd3 = test__join_cgroup("/sock_policy_empty");
+ if (!ASSERT_GE(cgroup_fd3, 0, "create empty cgroup"))
+ goto close_cgroup;
+
+ cgroup_fd2 = test__join_cgroup("/sock_policy_reuse");
+ if (!ASSERT_GE(cgroup_fd2, 0, "create cgroup for reuse"))
+ goto close_cgroup;
+
+ cgroup_fd = test__join_cgroup("/sock_policy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto close_cgroup;
+
+ skel = lsm_cgroup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto close_cgroup;
+
+ post_create_prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ post_create_prog_fd2 = bpf_program__fd(skel->progs.socket_post_create2);
+ bind_prog_fd = bpf_program__fd(skel->progs.socket_bind);
+ bind_prog_fd2 = bpf_program__fd(skel->progs.socket_bind2);
+ alloc_prog_fd = bpf_program__fd(skel->progs.socket_alloc);
+ clone_prog_fd = bpf_program__fd(skel->progs.socket_clone);
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 0, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 0, "total prog count");
+ err = bpf_prog_attach(alloc_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (err == -ENOTSUPP) {
+ test__skip();
+ goto close_cgroup;
+ }
+ if (!ASSERT_OK(err, "attach alloc_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_sk_alloc_security"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 1, "total prog count");
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 0, "prog count");
+ err = bpf_prog_attach(clone_prog_fd, cgroup_fd, BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach clone_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_inet_csk_clone"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 2, "total prog count");
+
+ /* Make sure replacing works. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 0, "prog count");
+ err = bpf_prog_attach(post_create_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, 0);
+ if (!ASSERT_OK(err, "attach post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ attach_opts.replace_prog_fd = post_create_prog_fd;
+ err = bpf_prog_attach_opts(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP, &attach_opts);
+ if (!ASSERT_OK(err, "prog replace post_create_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_post_create"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 3, "total prog count");
+
+ /* Try the same attach/replace via link API. */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd = bpf_link_create(bind_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd, 0, "link create bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ update_opts.old_prog_fd = bind_prog_fd;
+ update_opts.flags = BPF_F_REPLACE;
+
+ err = bpf_link_update(bind_link_fd, bind_prog_fd2, &update_opts);
+ if (!ASSERT_OK(err, "link update bind_prog_fd"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+
+ /* Attach another instance of bind program to another cgroup.
+ * This should trigger the reuse of the trampoline shim (two
+ * programs attaching to the same btf_id).
+ */
+
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 0, "prog count");
+ bind_link_fd2 = bpf_link_create(bind_prog_fd2, cgroup_fd2,
+ BPF_LSM_CGROUP, NULL);
+ if (!ASSERT_GE(bind_link_fd2, 0, "link create bind_prog_fd2"))
+ goto detach_cgroup;
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, "bpf_lsm_socket_bind"), 1, "prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd, NULL), 4, "total prog count");
+ ASSERT_EQ(query_prog_cnt(cgroup_fd2, NULL), 1, "total prog count");
+
+ fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!(skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK))
+ /* AF_UNIX is prohibited. */
+ ASSERT_LT(fd, 0, "socket(AF_UNIX)");
+ close(fd);
+
+ /* AF_INET6 gets default policy (sk_priority). */
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 123, "sk_priority");
+
+ close(fd);
+
+ /* TX-only AF_PACKET is allowed. */
+
+ ASSERT_LT(socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL)), 0,
+ "socket(AF_PACKET, ..., ETH_P_ALL)");
+
+ fd = socket(AF_PACKET, SOCK_RAW, 0);
+ ASSERT_GE(fd, 0, "socket(AF_PACKET, ..., 0)");
+
+ /* TX-only AF_PACKET can not be rebound. */
+
+ struct sockaddr_ll sa = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_ALL),
+ };
+ ASSERT_LT(bind(fd, (struct sockaddr *)&sa, sizeof(sa)), 0,
+ "bind(ETH_P_ALL)");
+
+ close(fd);
+
+ /* Trigger passive open. */
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ ASSERT_GE(listen_fd, 0, "start_server");
+ client_fd = connect_to_fd(listen_fd, 0);
+ ASSERT_GE(client_fd, 0, "connect_to_fd");
+ accepted_fd = accept(listen_fd, NULL, NULL);
+ ASSERT_GE(accepted_fd, 0, "accept");
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(accepted_fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 234, "sk_priority");
+
+ /* These are replaced and never called. */
+ ASSERT_EQ(skel->bss->called_socket_post_create, 0, "called_create");
+ ASSERT_EQ(skel->bss->called_socket_bind, 0, "called_bind");
+
+ /* AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW
+ * AF_UNIX+SOCK_RAW if already have non-bpf lsms installed
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ if (skel->kconfig->CONFIG_SECURITY_APPARMOR
+ || skel->kconfig->CONFIG_SECURITY_SELINUX
+ || skel->kconfig->CONFIG_SECURITY_SMACK)
+ /* AF_UNIX+SOCK_RAW if already have non-bpf lsms installed */
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 6, "called_create2");
+ else
+ ASSERT_EQ(skel->bss->called_socket_post_create2, 5, "called_create2");
+
+ /* start_server
+ * bind(ETH_P_ALL)
+ */
+ ASSERT_EQ(skel->bss->called_socket_bind2, 2, "called_bind2");
+ /* Single accept(). */
+ ASSERT_EQ(skel->bss->called_socket_clone, 1, "called_clone");
+
+ /* AF_UNIX+SOCK_STREAM (failed)
+ * AF_INET6+SOCK_STREAM
+ * AF_PACKET+SOCK_RAW (failed)
+ * AF_PACKET+SOCK_RAW
+ * listen_fd
+ * client_fd
+ * accepted_fd
+ */
+ ASSERT_EQ(skel->bss->called_socket_alloc, 7, "called_alloc");
+
+ close(listen_fd);
+ close(client_fd);
+ close(accepted_fd);
+
+ /* Make sure other cgroup doesn't trigger the programs. */
+
+ if (!ASSERT_OK(join_cgroup("/sock_policy_empty"), "join root cgroup"))
+ goto detach_cgroup;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket(SOCK_STREAM)"))
+ goto detach_cgroup;
+
+ prio = 0;
+ socklen = sizeof(prio);
+ ASSERT_GE(getsockopt(fd, SOL_SOCKET, SO_PRIORITY, &prio, &socklen), 0,
+ "getsockopt");
+ ASSERT_EQ(prio, 0, "sk_priority");
+
+ close(fd);
+
+detach_cgroup:
+ ASSERT_GE(bpf_prog_detach2(post_create_prog_fd2, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_create");
+ close(bind_link_fd);
+ /* Don't close bind_link_fd2, exercise cgroup release cleanup. */
+ ASSERT_GE(bpf_prog_detach2(alloc_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_alloc");
+ ASSERT_GE(bpf_prog_detach2(clone_prog_fd, cgroup_fd,
+ BPF_LSM_CGROUP), 0, "detach_clone");
+
+close_cgroup:
+ close(cgroup_fd);
+ close(cgroup_fd2);
+ close(cgroup_fd3);
+ lsm_cgroup__destroy(skel);
+}
+
+static void test_lsm_cgroup_nonvoid(void)
+{
+ struct lsm_cgroup_nonvoid *skel = NULL;
+
+ skel = lsm_cgroup_nonvoid__open_and_load();
+ ASSERT_NULL(skel, "open succeeds");
+ lsm_cgroup_nonvoid__destroy(skel);
+}
+
+void test_lsm_cgroup(void)
+{
+ if (test__start_subtest("functional"))
+ test_lsm_cgroup_functional();
+ if (test__start_subtest("nonvoid"))
+ test_lsm_cgroup_nonvoid();
+ btf__free(btf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
new file mode 100644
index 000000000000..fb1eb8c67361
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_helpers.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __LWT_HELPERS_H
+#define __LWT_HELPERS_H
+
+#include <time.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <linux/icmp.h>
+
+#include "test_progs.h"
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+#define RUN_TEST(name) \
+ ({ \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_create(), "netns_create")) { \
+ struct nstoken *token = open_netns(NETNS); \
+ if (ASSERT_OK_PTR(token, "setns")) { \
+ test_ ## name(); \
+ close_netns(token); \
+ } \
+ netns_delete(); \
+ } \
+ })
+
+static inline int netns_create(void)
+{
+ return system("ip netns add " NETNS);
+}
+
+static inline int netns_delete(void)
+{
+ return system("ip netns del " NETNS ">/dev/null 2>&1");
+}
+
+static int open_tuntap(const char *dev_name, bool need_mac)
+{
+ int err = 0;
+ struct ifreq ifr;
+ int fd = open("/dev/net/tun", O_RDWR);
+
+ if (!ASSERT_GT(fd, 0, "open(/dev/net/tun)"))
+ return -1;
+
+ ifr.ifr_flags = IFF_NO_PI | (need_mac ? IFF_TAP : IFF_TUN);
+ strncpy(ifr.ifr_name, dev_name, IFNAMSIZ - 1);
+ ifr.ifr_name[IFNAMSIZ - 1] = '\0';
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl(TUNSETIFF)")) {
+ close(fd);
+ return -1;
+ }
+
+ err = fcntl(fd, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+#define ICMP_PAYLOAD_SIZE 100
+
+/* Match an ICMP packet with payload len ICMP_PAYLOAD_SIZE */
+static int __expect_icmp_ipv4(char *buf, ssize_t len)
+{
+ struct iphdr *ip = (struct iphdr *)buf;
+ struct icmphdr *icmp = (struct icmphdr *)(ip + 1);
+ ssize_t min_header_len = sizeof(*ip) + sizeof(*icmp);
+
+ if (len < min_header_len)
+ return -1;
+
+ if (ip->protocol != IPPROTO_ICMP)
+ return -1;
+
+ if (icmp->type != ICMP_ECHO)
+ return -1;
+
+ return len == ICMP_PAYLOAD_SIZE + min_header_len;
+}
+
+typedef int (*filter_t) (char *, ssize_t);
+
+/* wait_for_packet - wait for a packet that matches the filter
+ *
+ * @fd: tun fd/packet socket to read packet
+ * @filter: filter function, returning 1 if matches
+ * @timeout: timeout to wait for the packet
+ *
+ * Returns 1 if a matching packet is read, 0 if timeout expired, -1 on error.
+ */
+static int wait_for_packet(int fd, filter_t filter, struct timeval *timeout)
+{
+ char buf[4096];
+ int max_retry = 5; /* in case we read some spurious packets */
+ fd_set fds;
+
+ FD_ZERO(&fds);
+ while (max_retry--) {
+ /* Linux modifies timeout arg... So make a copy */
+ struct timeval copied_timeout = *timeout;
+ ssize_t ret = -1;
+
+ FD_SET(fd, &fds);
+
+ ret = select(1 + fd, &fds, NULL, NULL, &copied_timeout);
+ if (ret <= 0) {
+ if (errno == EINTR)
+ continue;
+ else if (errno == EAGAIN || ret == 0)
+ return 0;
+
+ log_err("select failed");
+ return -1;
+ }
+
+ ret = read(fd, buf, sizeof(buf));
+
+ if (ret <= 0) {
+ log_err("read(dev): %ld", ret);
+ return -1;
+ }
+
+ if (filter && filter(buf, ret) > 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+#endif /* __LWT_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
new file mode 100644
index 000000000000..835a1d756c16
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_redirect.c
@@ -0,0 +1,332 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Test suite of lwt_xmit BPF programs that redirect packets
+ * The file tests focus not only if these programs work as expected normally,
+ * but also if they can handle abnormal situations gracefully.
+ *
+ * WARNING
+ * -------
+ * This test suite may crash the kernel, thus should be run in a VM.
+ *
+ * Setup:
+ * ---------
+ * All tests are performed in a single netns. Two lwt encap routes are setup for
+ * each subtest:
+ *
+ * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<ingress_sec>" dev link_err
+ * ip route add 20.0.0.0/24 encap bpf xmit <obj> sec "<egress_sec>" dev link_err
+ *
+ * Here <obj> is statically defined to test_lwt_redirect.bpf.o, and each section
+ * of this object holds a program entry to test. The BPF object is built from
+ * progs/test_lwt_redirect.c. We didn't use generated BPF skeleton since the
+ * attachment for lwt programs are not supported by libbpf yet.
+ *
+ * For testing, ping commands are run in the test netns:
+ *
+ * ping 10.0.0.<ifindex> -c 1 -w 1 -s 100
+ * ping 20.0.0.<ifindex> -c 1 -w 1 -s 100
+ *
+ * Scenarios:
+ * --------------------------------
+ * 1. Redirect to a running tap/tun device
+ * 2. Redirect to a down tap/tun device
+ * 3. Redirect to a vlan device with lower layer down
+ *
+ * Case 1, ping packets should be received by packet socket on target device
+ * when redirected to ingress, and by tun/tap fd when redirected to egress.
+ *
+ * Case 2,3 are considered successful as long as they do not crash the kernel
+ * as a regression.
+ *
+ * Case 1,2 use tap device to test redirect to device that requires MAC
+ * header, and tun device to test the case with no MAC header added.
+ */
+#include <sys/socket.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_tun.h>
+#include <linux/icmp.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#define NETNS "ns_lwt_redirect"
+#include "lwt_helpers.h"
+#include "test_progs.h"
+#include "network_helpers.h"
+
+#define BPF_OBJECT "test_lwt_redirect.bpf.o"
+#define INGRESS_SEC(need_mac) ((need_mac) ? "redir_ingress" : "redir_ingress_nomac")
+#define EGRESS_SEC(need_mac) ((need_mac) ? "redir_egress" : "redir_egress_nomac")
+#define LOCAL_SRC "10.0.0.1"
+#define CIDR_TO_INGRESS "10.0.0.0/24"
+#define CIDR_TO_EGRESS "20.0.0.0/24"
+
+/* ping to redirect toward given dev, with last byte of dest IP being the target
+ * device index.
+ *
+ * Note: ping command inside BPF-CI is busybox version, so it does not have certain
+ * function, such like -m option to set packet mark.
+ */
+static void ping_dev(const char *dev, bool is_ingress)
+{
+ int link_index = if_nametoindex(dev);
+ char ip[256];
+
+ if (!ASSERT_GE(link_index, 0, "if_nametoindex"))
+ return;
+
+ if (is_ingress)
+ snprintf(ip, sizeof(ip), "10.0.0.%d", link_index);
+ else
+ snprintf(ip, sizeof(ip), "20.0.0.%d", link_index);
+
+ /* We won't get a reply. Don't fail here */
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
+ ip, ICMP_PAYLOAD_SIZE);
+}
+
+static int new_packet_sock(const char *ifname)
+{
+ int err = 0;
+ int ignore_outgoing = 1;
+ int ifindex = -1;
+ int s = -1;
+
+ s = socket(AF_PACKET, SOCK_RAW, 0);
+ if (!ASSERT_GE(s, 0, "socket(AF_PACKET)"))
+ return -1;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex")) {
+ close(s);
+ return -1;
+ }
+
+ struct sockaddr_ll addr = {
+ .sll_family = AF_PACKET,
+ .sll_protocol = htons(ETH_P_IP),
+ .sll_ifindex = ifindex,
+ };
+
+ err = bind(s, (struct sockaddr *)&addr, sizeof(addr));
+ if (!ASSERT_OK(err, "bind(AF_PACKET)")) {
+ close(s);
+ return -1;
+ }
+
+ /* Use packet socket to capture only the ingress, so we can distinguish
+ * the case where a regression that actually redirects the packet to
+ * the egress.
+ */
+ err = setsockopt(s, SOL_PACKET, PACKET_IGNORE_OUTGOING,
+ &ignore_outgoing, sizeof(ignore_outgoing));
+ if (!ASSERT_OK(err, "setsockopt(PACKET_IGNORE_OUTGOING)")) {
+ close(s);
+ return -1;
+ }
+
+ err = fcntl(s, F_SETFL, O_NONBLOCK);
+ if (!ASSERT_OK(err, "fcntl(O_NONBLOCK)")) {
+ close(s);
+ return -1;
+ }
+
+ return s;
+}
+
+static int expect_icmp(char *buf, ssize_t len)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ if (len < (ssize_t)sizeof(*eth))
+ return -1;
+
+ if (eth->h_proto == htons(ETH_P_IP))
+ return __expect_icmp_ipv4((char *)(eth + 1), len - sizeof(*eth));
+
+ return -1;
+}
+
+static int expect_icmp_nomac(char *buf, ssize_t len)
+{
+ return __expect_icmp_ipv4(buf, len);
+}
+
+static void send_and_capture_test_packets(const char *test_name, int tap_fd,
+ const char *target_dev, bool need_mac)
+{
+ int psock = -1;
+ struct timeval timeo = {
+ .tv_sec = 0,
+ .tv_usec = 250000,
+ };
+ int ret = -1;
+
+ filter_t filter = need_mac ? expect_icmp : expect_icmp_nomac;
+
+ ping_dev(target_dev, false);
+
+ ret = wait_for_packet(tap_fd, filter, &timeo);
+ if (!ASSERT_EQ(ret, 1, "wait_for_epacket")) {
+ log_err("%s egress test fails", test_name);
+ goto out;
+ }
+
+ psock = new_packet_sock(target_dev);
+ ping_dev(target_dev, true);
+
+ ret = wait_for_packet(psock, filter, &timeo);
+ if (!ASSERT_EQ(ret, 1, "wait_for_ipacket")) {
+ log_err("%s ingress test fails", test_name);
+ goto out;
+ }
+
+out:
+ if (psock >= 0)
+ close(psock);
+}
+
+static int setup_redirect_target(const char *target_dev, bool need_mac)
+{
+ int target_index = -1;
+ int tap_fd = -1;
+
+ tap_fd = open_tuntap(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "open_tuntap"))
+ goto fail;
+
+ target_index = if_nametoindex(target_dev);
+ if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
+ goto fail;
+
+ SYS(fail, "sysctl -w net.ipv6.conf.all.disable_ipv6=1");
+ SYS(fail, "ip link add link_err type dummy");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
+ SYS(fail, "ip link set link_err up");
+ SYS(fail, "ip link set %s up", target_dev);
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
+ CIDR_TO_INGRESS, BPF_OBJECT, INGRESS_SEC(need_mac));
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec %s",
+ CIDR_TO_EGRESS, BPF_OBJECT, EGRESS_SEC(need_mac));
+
+ return tap_fd;
+
+fail:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ return -1;
+}
+
+static void test_lwt_redirect_normal(void)
+{
+ const char *target_dev = "tap0";
+ int tap_fd = -1;
+ bool need_mac = true;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
+ close(tap_fd);
+}
+
+static void test_lwt_redirect_normal_nomac(void)
+{
+ const char *target_dev = "tun0";
+ int tap_fd = -1;
+ bool need_mac = false;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ send_and_capture_test_packets(__func__, tap_fd, target_dev, need_mac);
+ close(tap_fd);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+static void __test_lwt_redirect_dev_down(bool need_mac)
+{
+ const char *target_dev = "tap0";
+ int tap_fd = -1;
+
+ tap_fd = setup_redirect_target(target_dev, need_mac);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ SYS(out, "ip link set %s down", target_dev);
+ ping_dev(target_dev, true);
+ ping_dev(target_dev, false);
+
+out:
+ close(tap_fd);
+}
+
+static void test_lwt_redirect_dev_down(void)
+{
+ __test_lwt_redirect_dev_down(true);
+}
+
+static void test_lwt_redirect_dev_down_nomac(void)
+{
+ __test_lwt_redirect_dev_down(false);
+}
+
+/* This test aims to prevent regression of future. As long as the kernel does
+ * not panic, it is considered as success.
+ */
+static void test_lwt_redirect_dev_carrier_down(void)
+{
+ const char *lower_dev = "tap0";
+ const char *vlan_dev = "vlan100";
+ int tap_fd = -1;
+
+ tap_fd = setup_redirect_target(lower_dev, true);
+ if (!ASSERT_GE(tap_fd, 0, "setup_redirect_target"))
+ return;
+
+ SYS(out, "ip link add vlan100 link %s type vlan id 100", lower_dev);
+ SYS(out, "ip link set %s up", vlan_dev);
+ SYS(out, "ip link set %s down", lower_dev);
+ ping_dev(vlan_dev, true);
+ ping_dev(vlan_dev, false);
+
+out:
+ close(tap_fd);
+}
+
+static void *test_lwt_redirect_run(void *arg)
+{
+ netns_delete();
+ RUN_TEST(lwt_redirect_normal);
+ RUN_TEST(lwt_redirect_normal_nomac);
+ RUN_TEST(lwt_redirect_dev_down);
+ RUN_TEST(lwt_redirect_dev_down_nomac);
+ RUN_TEST(lwt_redirect_dev_carrier_down);
+ return NULL;
+}
+
+void test_lwt_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_lwt_redirect_run, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
new file mode 100644
index 000000000000..03825d2b45a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/lwt_reroute.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Test suite of lwt BPF programs that reroutes packets
+ * The file tests focus not only if these programs work as expected normally,
+ * but also if they can handle abnormal situations gracefully. This test
+ * suite currently only covers lwt_xmit hook. lwt_in tests have not been
+ * implemented.
+ *
+ * WARNING
+ * -------
+ * This test suite can crash the kernel, thus should be run in a VM.
+ *
+ * Setup:
+ * ---------
+ * all tests are performed in a single netns. A lwt encap route is setup for
+ * each subtest:
+ *
+ * ip route add 10.0.0.0/24 encap bpf xmit <obj> sec "<section_N>" dev link_err
+ *
+ * Here <obj> is statically defined to test_lwt_reroute.bpf.o, and it contains
+ * a single test program entry. This program sets packet mark by last byte of
+ * the IPv4 daddr. For example, a packet going to 1.2.3.4 will receive a skb
+ * mark 4. A packet will only be marked once, and IP x.x.x.0 will be skipped
+ * to avoid route loop. We didn't use generated BPF skeleton since the
+ * attachment for lwt programs are not supported by libbpf yet.
+ *
+ * The test program will bring up a tun device, and sets up the following
+ * routes:
+ *
+ * ip rule add pref 100 from all fwmark <tun_index> lookup 100
+ * ip route add table 100 default dev tun0
+ *
+ * For normal testing, a ping command is running in the test netns:
+ *
+ * ping 10.0.0.<tun_index> -c 1 -w 1 -s 100
+ *
+ * For abnormal testing, fq is used as the qdisc of the tun device. Then a UDP
+ * socket will try to overflow the fq queue and trigger qdisc drop error.
+ *
+ * Scenarios:
+ * --------------------------------
+ * 1. Reroute to a running tun device
+ * 2. Reroute to a device where qdisc drop
+ *
+ * For case 1, ping packets should be received by the tun device.
+ *
+ * For case 2, force UDP packets to overflow fq limit. As long as kernel
+ * is not crashed, it is considered successful.
+ */
+#define NETNS "ns_lwt_reroute"
+#include "lwt_helpers.h"
+#include "network_helpers.h"
+#include <linux/net_tstamp.h>
+
+#define BPF_OBJECT "test_lwt_reroute.bpf.o"
+#define LOCAL_SRC "10.0.0.1"
+#define TEST_CIDR "10.0.0.0/24"
+#define XMIT_HOOK "xmit"
+#define XMIT_SECTION "lwt_xmit"
+#define NSEC_PER_SEC 1000000000ULL
+
+/* send a ping to be rerouted to the target device */
+static void ping_once(const char *ip)
+{
+ /* We won't get a reply. Don't fail here */
+ SYS_NOFAIL("ping %s -c1 -W1 -s %d",
+ ip, ICMP_PAYLOAD_SIZE);
+}
+
+/* Send snd_target UDP packets to overflow the fq queue and trigger qdisc drop
+ * error. This is done via TX tstamp to force buffering delayed packets.
+ */
+static int overflow_fq(int snd_target, const char *target_ip)
+{
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = htons(1234),
+ };
+
+ char data_buf[8]; /* only #pkts matter, so use a random small buffer */
+ char control_buf[CMSG_SPACE(sizeof(uint64_t))];
+ struct iovec iov = {
+ .iov_base = data_buf,
+ .iov_len = sizeof(data_buf),
+ };
+ int err = -1;
+ int s = -1;
+ struct sock_txtime txtime_on = {
+ .clockid = CLOCK_MONOTONIC,
+ .flags = 0,
+ };
+ struct msghdr msg = {
+ .msg_name = &addr,
+ .msg_namelen = sizeof(addr),
+ .msg_control = control_buf,
+ .msg_controllen = sizeof(control_buf),
+ .msg_iovlen = 1,
+ .msg_iov = &iov,
+ };
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+
+ memset(data_buf, 0, sizeof(data_buf));
+
+ s = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto out;
+
+ err = setsockopt(s, SOL_SOCKET, SO_TXTIME, &txtime_on, sizeof(txtime_on));
+ if (!ASSERT_OK(err, "setsockopt(SO_TXTIME)"))
+ goto out;
+
+ err = inet_pton(AF_INET, target_ip, &addr.sin_addr);
+ if (!ASSERT_EQ(err, 1, "inet_pton"))
+ goto out;
+
+ while (snd_target > 0) {
+ struct timespec now;
+
+ memset(control_buf, 0, sizeof(control_buf));
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(uint64_t));
+
+ err = clock_gettime(CLOCK_MONOTONIC, &now);
+ if (!ASSERT_OK(err, "clock_gettime(CLOCK_MONOTONIC)")) {
+ err = -1;
+ goto out;
+ }
+
+ *(uint64_t *)CMSG_DATA(cmsg) = (now.tv_nsec + 1) * NSEC_PER_SEC +
+ now.tv_nsec;
+
+ /* we will intentionally send more than fq limit, so ignore
+ * the error here.
+ */
+ sendmsg(s, &msg, MSG_NOSIGNAL);
+ snd_target--;
+ }
+
+ /* no kernel crash so far is considered success */
+ err = 0;
+
+out:
+ if (s >= 0)
+ close(s);
+
+ return err;
+}
+
+static int setup(const char *tun_dev)
+{
+ int target_index = -1;
+ int tap_fd = -1;
+
+ tap_fd = open_tuntap(tun_dev, false);
+ if (!ASSERT_GE(tap_fd, 0, "open_tun"))
+ return -1;
+
+ target_index = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(target_index, 0, "if_nametoindex"))
+ return -1;
+
+ SYS(fail, "ip link add link_err type dummy");
+ SYS(fail, "ip link set lo up");
+ SYS(fail, "ip addr add dev lo " LOCAL_SRC "/32");
+ SYS(fail, "ip link set link_err up");
+ SYS(fail, "ip link set %s up", tun_dev);
+
+ SYS(fail, "ip route add %s dev link_err encap bpf xmit obj %s sec lwt_xmit",
+ TEST_CIDR, BPF_OBJECT);
+
+ SYS(fail, "ip rule add pref 100 from all fwmark %d lookup 100",
+ target_index);
+ SYS(fail, "ip route add t 100 default dev %s", tun_dev);
+
+ return tap_fd;
+
+fail:
+ if (tap_fd >= 0)
+ close(tap_fd);
+ return -1;
+}
+
+static void test_lwt_reroute_normal_xmit(void)
+{
+ const char *tun_dev = "tun0";
+ int tun_fd = -1;
+ int ifindex = -1;
+ char ip[256];
+ struct timeval timeo = {
+ .tv_sec = 0,
+ .tv_usec = 250000,
+ };
+
+ tun_fd = setup(tun_dev);
+ if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
+ return;
+
+ ifindex = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
+ return;
+
+ snprintf(ip, 256, "10.0.0.%d", ifindex);
+
+ /* ping packets should be received by the tun device */
+ ping_once(ip);
+
+ if (!ASSERT_EQ(wait_for_packet(tun_fd, __expect_icmp_ipv4, &timeo), 1,
+ "wait_for_packet"))
+ log_err("%s xmit", __func__);
+}
+
+/*
+ * Test the failure case when the skb is dropped at the qdisc. This is a
+ * regression prevention at the xmit hook only.
+ */
+static void test_lwt_reroute_qdisc_dropped(void)
+{
+ const char *tun_dev = "tun0";
+ int tun_fd = -1;
+ int ifindex = -1;
+ char ip[256];
+
+ tun_fd = setup(tun_dev);
+ if (!ASSERT_GE(tun_fd, 0, "setup_reroute"))
+ goto fail;
+
+ SYS(fail, "tc qdisc replace dev %s root fq limit 5 flow_limit 5", tun_dev);
+
+ ifindex = if_nametoindex(tun_dev);
+ if (!ASSERT_GE(ifindex, 0, "if_nametoindex"))
+ return;
+
+ snprintf(ip, 256, "10.0.0.%d", ifindex);
+ ASSERT_EQ(overflow_fq(10, ip), 0, "overflow_fq");
+
+fail:
+ if (tun_fd >= 0)
+ close(tun_fd);
+}
+
+static void *test_lwt_reroute_run(void *arg)
+{
+ netns_delete();
+ RUN_TEST(lwt_reroute_normal_xmit);
+ RUN_TEST(lwt_reroute_qdisc_dropped);
+ return NULL;
+}
+
+void test_lwt_reroute(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_lwt_reroute_run, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c
new file mode 100644
index 000000000000..2c4ef6037573
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+
+#include "normal_map_btf.skel.h"
+#include "map_in_map_btf.skel.h"
+
+static void do_test_normal_map_btf(void)
+{
+ struct normal_map_btf *skel;
+ int i, err, new_fd = -1;
+ int map_fd_arr[64];
+
+ skel = normal_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = normal_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Use percpu_array to slow bpf_map_free_deferred() down.
+ * The memory allocation may fail, so doesn't check the returned fd.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++)
+ map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL);
+
+ /* Close array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.array));
+out:
+ normal_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the release of the map
+ * btf which is held by other maps (e.g, bss). After that, array map
+ * holds the last reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(4000);
+ /* Spawn multiple kworkers to delay the invocation of
+ * bpf_map_free_deferred() for array map.
+ */
+ for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) {
+ if (map_fd_arr[i] < 0)
+ continue;
+ close(map_fd_arr[i]);
+ }
+ close(new_fd);
+}
+
+static void do_test_map_in_map_btf(void)
+{
+ int err, zero = 0, new_fd = -1;
+ struct map_in_map_btf *skel;
+
+ skel = map_in_map_btf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_load"))
+ return;
+
+ err = map_in_map_btf__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_TRUE(skel->bss->done, "done");
+
+ /* Close inner_array fd later */
+ new_fd = dup(bpf_map__fd(skel->maps.inner_array));
+ /* Defer the free of inner_array */
+ err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0);
+ ASSERT_OK(err, "delete inner map");
+out:
+ map_in_map_btf__destroy(skel);
+ if (new_fd < 0)
+ return;
+ /* Use kern_sync_rcu() to wait for the start of the free of the bpf
+ * program and use an assumed delay to wait for the free of the outer
+ * map and the release of map btf. After that, inner map holds the last
+ * reference of map btf.
+ */
+ kern_sync_rcu();
+ usleep(10000);
+ close(new_fd);
+}
+
+void test_map_btf(void)
+{
+ if (test__start_subtest("array_btf"))
+ do_test_normal_map_btf();
+ if (test__start_subtest("inner_array_btf"))
+ do_test_map_in_map_btf();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
new file mode 100644
index 000000000000..d2a10eb4e5b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "access_map_in_map.skel.h"
+
+struct thread_ctx {
+ pthread_barrier_t barrier;
+ int outer_map_fd;
+ int start, abort;
+ int loop, err;
+};
+
+static int wait_for_start_or_abort(struct thread_ctx *ctx)
+{
+ while (!ctx->start && !ctx->abort)
+ usleep(1);
+ return ctx->abort ? -1 : 0;
+}
+
+static void *update_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop, err = 0;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ int fd, zero = 0;
+
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (fd < 0) {
+ err |= 1;
+ pthread_barrier_wait(&ctx->barrier);
+ continue;
+ }
+
+ /* Remove the old inner map */
+ if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0)
+ err |= 2;
+ close(fd);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ ctx->err = err;
+
+ return NULL;
+}
+
+static void *access_map_fn(void *data)
+{
+ struct thread_ctx *ctx = data;
+ int loop = ctx->loop;
+
+ if (wait_for_start_or_abort(ctx) < 0)
+ return NULL;
+ pthread_barrier_wait(&ctx->barrier);
+
+ while (loop-- > 0) {
+ /* Access the old inner map */
+ syscall(SYS_getpgid);
+ pthread_barrier_wait(&ctx->barrier);
+ }
+
+ return NULL;
+}
+
+static void test_map_in_map_access(const char *prog_name, const char *map_name)
+{
+ struct access_map_in_map *skel;
+ struct bpf_map *outer_map;
+ struct bpf_program *prog;
+ struct thread_ctx ctx;
+ pthread_t tid[2];
+ int err;
+
+ skel = access_map_in_map__open();
+ if (!ASSERT_OK_PTR(skel, "access_map_in_map open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "find program"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
+ outer_map = bpf_object__find_map_by_name(skel->obj, map_name);
+ if (!ASSERT_OK_PTR(outer_map, "find map"))
+ goto out;
+
+ err = access_map_in_map__load(skel);
+ if (!ASSERT_OK(err, "access_map_in_map load"))
+ goto out;
+
+ err = access_map_in_map__attach(skel);
+ if (!ASSERT_OK(err, "access_map_in_map attach"))
+ goto out;
+
+ skel->bss->tgid = getpid();
+
+ memset(&ctx, 0, sizeof(ctx));
+ pthread_barrier_init(&ctx.barrier, NULL, 2);
+ ctx.outer_map_fd = bpf_map__fd(outer_map);
+ ctx.loop = 4;
+
+ err = pthread_create(&tid[0], NULL, update_map_fn, &ctx);
+ if (!ASSERT_OK(err, "close_thread"))
+ goto out;
+
+ err = pthread_create(&tid[1], NULL, access_map_fn, &ctx);
+ if (!ASSERT_OK(err, "read_thread")) {
+ ctx.abort = 1;
+ pthread_join(tid[0], NULL);
+ goto out;
+ }
+
+ ctx.start = 1;
+ pthread_join(tid[0], NULL);
+ pthread_join(tid[1], NULL);
+
+ ASSERT_OK(ctx.err, "err");
+out:
+ access_map_in_map__destroy(skel);
+}
+
+void test_map_in_map(void)
+{
+ if (test__start_subtest("acc_map_in_array"))
+ test_map_in_map_access("access_map_in_array", "outer_array_map");
+ if (test__start_subtest("sleepable_acc_map_in_array"))
+ test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map");
+ if (test__start_subtest("acc_map_in_htab"))
+ test_map_in_map_access("access_map_in_htab", "outer_htab_map");
+ if (test__start_subtest("sleepable_acc_map_in_htab"))
+ test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map");
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/map_init.c b/tools/testing/selftests/bpf/prog_tests/map_init.c
new file mode 100644
index 000000000000..14a31109dd0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_init.c
@@ -0,0 +1,214 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include <test_progs.h>
+#include "test_map_init.skel.h"
+
+#define TEST_VALUE 0x1234
+#define FILL_VALUE 0xdeadbeef
+
+static int nr_cpus;
+static int duration;
+
+typedef unsigned long long map_key_t;
+typedef unsigned long long map_value_t;
+typedef struct {
+ map_value_t v; /* padding */
+} __bpf_percpu_val_align pcpu_map_value_t;
+
+
+static int map_populate(int map_fd, int num)
+{
+ pcpu_map_value_t value[nr_cpus];
+ int i, err;
+ map_key_t key;
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value, i) = FILL_VALUE;
+
+ for (key = 1; key <= num; key++) {
+ err = bpf_map_update_elem(map_fd, &key, value, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static struct test_map_init *setup(enum bpf_map_type map_type, int map_sz,
+ int *map_fd, int populate)
+{
+ struct test_map_init *skel;
+ int err;
+
+ skel = test_map_init__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ err = bpf_map__set_type(skel->maps.hashmap1, map_type);
+ if (!ASSERT_OK(err, "bpf_map__set_type"))
+ goto error;
+
+ err = bpf_map__set_max_entries(skel->maps.hashmap1, map_sz);
+ if (!ASSERT_OK(err, "bpf_map__set_max_entries"))
+ goto error;
+
+ err = test_map_init__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto error;
+
+ *map_fd = bpf_map__fd(skel->maps.hashmap1);
+ if (CHECK(*map_fd < 0, "bpf_map__fd", "failed\n"))
+ goto error;
+
+ err = map_populate(*map_fd, populate);
+ if (!ASSERT_OK(err, "map_populate"))
+ goto error_map;
+
+ return skel;
+
+error_map:
+ close(*map_fd);
+error:
+ test_map_init__destroy(skel);
+ return NULL;
+}
+
+/* executes bpf program that updates map with key, value */
+static int prog_run_insert_elem(struct test_map_init *skel, map_key_t key,
+ map_value_t value)
+{
+ struct test_map_init__bss *bss;
+
+ bss = skel->bss;
+
+ bss->inKey = key;
+ bss->inValue = value;
+ bss->inPid = getpid();
+
+ if (!ASSERT_OK(test_map_init__attach(skel), "skel_attach"))
+ return -1;
+
+ /* Let tracepoint trigger */
+ syscall(__NR_getpgid);
+
+ test_map_init__detach(skel);
+
+ return 0;
+}
+
+static int check_values_one_cpu(pcpu_map_value_t *value, map_value_t expected)
+{
+ int i, nzCnt = 0;
+ map_value_t val;
+
+ for (i = 0; i < nr_cpus; i++) {
+ val = bpf_percpu(value, i);
+ if (val) {
+ if (CHECK(val != expected, "map value",
+ "unexpected for cpu %d: 0x%llx\n", i, val))
+ return -1;
+ nzCnt++;
+ }
+ }
+
+ if (CHECK(nzCnt != 1, "map value", "set for %d CPUs instead of 1!\n",
+ nzCnt))
+ return -1;
+
+ return 0;
+}
+
+/* Add key=1 elem with values set for all CPUs
+ * Delete elem key=1
+ * Run bpf prog that inserts new key=1 elem with value=0x1234
+ * (bpf prog can only set value for current CPU)
+ * Lookup Key=1 and check value is as expected for all CPUs:
+ * value set by bpf prog for one CPU, 0 for all others
+ */
+static void test_pcpu_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* max 1 elem in map so insertion is forced to reuse freed entry */
+ skel = setup(BPF_MAP_TYPE_PERCPU_HASH, 1, &map_fd, 1);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* delete element so the entry can be re-used*/
+ key = 1;
+ err = bpf_map_delete_elem(map_fd, &key);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem"))
+ goto cleanup;
+
+ /* run bpf prog that inserts new elem, re-using the slot just freed */
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=1 was re-created by bpf prog */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+/* Add key=1 and key=2 elems with values set for all CPUs
+ * Run bpf prog that inserts new key=3 elem
+ * (only for current cpu; other cpus should have initial value = 0)
+ * Lookup Key=1 and check value is as expected for all CPUs
+ */
+static void test_pcpu_lru_map_init(void)
+{
+ pcpu_map_value_t value[nr_cpus];
+ struct test_map_init *skel;
+ int map_fd, err;
+ map_key_t key;
+
+ /* Set up LRU map with 2 elements, values filled for all CPUs.
+ * With these 2 elements, the LRU map is full
+ */
+ skel = setup(BPF_MAP_TYPE_LRU_PERCPU_HASH, 2, &map_fd, 2);
+ if (!ASSERT_OK_PTR(skel, "prog_setup"))
+ return;
+
+ /* run bpf prog that inserts new key=3 element, re-using LRU slot */
+ key = 3;
+ err = prog_run_insert_elem(skel, key, TEST_VALUE);
+ if (!ASSERT_OK(err, "prog_run_insert_elem"))
+ goto cleanup;
+
+ /* check that key=3 replaced one of earlier elements */
+ err = bpf_map_lookup_elem(map_fd, &key, value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ goto cleanup;
+
+ /* and has expected values */
+ check_values_one_cpu(value, TEST_VALUE);
+
+cleanup:
+ test_map_init__destroy(skel);
+}
+
+void test_map_init(void)
+{
+ nr_cpus = bpf_num_possible_cpus();
+ if (nr_cpus <= 1) {
+ printf("%s:SKIP: >1 cpu needed for this test\n", __func__);
+ test__skip();
+ return;
+ }
+
+ if (test__start_subtest("pcpu_map_init"))
+ test_pcpu_map_init();
+ if (test__start_subtest("pcpu_lru_map_init"))
+ test_pcpu_lru_map_init();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_kptr.c b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
new file mode 100644
index 000000000000..8743df599567
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_kptr.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_kptr.skel.h"
+#include "map_kptr_fail.skel.h"
+#include "rcu_tasks_trace_gp.skel.h"
+
+static void test_map_kptr_success(bool test_run)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, lopts);
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ int key = 0, ret, cpu;
+ struct map_kptr *skel;
+ char buf[16], *pbuf;
+
+ skel = map_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "map_kptr__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref1), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref1 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref1 retval");
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref2), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref2 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref2 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref1), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref1 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref2), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref2 refcount");
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref2 retval");
+
+ if (test_run)
+ goto exit;
+
+ cpu = libbpf_num_possible_cpus();
+ if (!ASSERT_GT(cpu, 0, "libbpf_num_possible_cpus"))
+ goto exit;
+
+ pbuf = calloc(cpu, sizeof(buf));
+ if (!ASSERT_OK_PTR(pbuf, "calloc(pbuf)"))
+ goto exit;
+
+ ret = bpf_map__update_elem(skel->maps.array_map,
+ &key, sizeof(key), buf, sizeof(buf), 0);
+ ASSERT_OK(ret, "array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__update_elem(skel->maps.pcpu_array_map,
+ &key, sizeof(key), pbuf, cpu * sizeof(buf), 0);
+ ASSERT_OK(ret, "pcpu_array_map update");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.pcpu_hash_malloc_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "pcpu_hash_malloc_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_map__delete_elem(skel->maps.lru_pcpu_hash_map, &key, sizeof(key), 0);
+ ASSERT_OK(ret, "lru_pcpu_hash_map delete");
+ skel->data->ref--;
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_map_kptr_ref3), &opts);
+ ASSERT_OK(ret, "test_map_kptr_ref3 refcount");
+ ASSERT_OK(opts.retval, "test_map_kptr_ref3 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_ls_map_kptr_ref_del), &lopts);
+ ASSERT_OK(ret, "test_ls_map_kptr_ref_del delete");
+ skel->data->ref--;
+ ASSERT_OK(lopts.retval, "test_ls_map_kptr_ref_del retval");
+
+ free(pbuf);
+exit:
+ map_kptr__destroy(skel);
+}
+
+static int kern_sync_rcu_tasks_trace(struct rcu_tasks_trace_gp *rcu)
+{
+ long gp_seq = READ_ONCE(rcu->bss->gp_seq);
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ if (!ASSERT_OK(bpf_prog_test_run_opts(bpf_program__fd(rcu->progs.do_call_rcu_tasks_trace),
+ &opts), "do_call_rcu_tasks_trace"))
+ return -EFAULT;
+ if (!ASSERT_OK(opts.retval, "opts.retval == 0"))
+ return -EFAULT;
+ while (gp_seq == READ_ONCE(rcu->bss->gp_seq))
+ sched_yield();
+ return 0;
+}
+
+void serial_test_map_kptr(void)
+{
+ struct rcu_tasks_trace_gp *skel;
+
+ RUN_TESTS(map_kptr_fail);
+
+ skel = rcu_tasks_trace_gp__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rcu_tasks_trace_gp__open_and_load"))
+ return;
+ if (!ASSERT_OK(rcu_tasks_trace_gp__attach(skel), "rcu_tasks_trace_gp__attach"))
+ goto end;
+
+ if (test__start_subtest("success-map")) {
+ test_map_kptr_success(true);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on bpf_map_free_deferred */
+ test_map_kptr_success(false);
+
+ ASSERT_OK(kern_sync_rcu_tasks_trace(skel), "sync rcu_tasks_trace");
+ ASSERT_OK(kern_sync_rcu(), "sync rcu");
+ /* Observe refcount dropping to 1 on synchronous delete elem */
+ test_map_kptr_success(true);
+ }
+
+end:
+ rcu_tasks_trace_gp__destroy(skel);
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index ce17b1ed8709..1d6726f01dd2 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -4,14 +4,17 @@
static void *spin_lock_thread(void *arg)
{
- __u32 duration, retval;
int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
pthread_exit(arg);
}
@@ -46,16 +49,16 @@ out:
void test_map_lock(void)
{
- const char *file = "./test_map_lock.o";
+ const char *file = "./test_map_lock.bpf.o";
int prog_fd, map_fd[2], vars[17] = {};
pthread_t thread_id[6];
struct bpf_object *obj = NULL;
int err = 0, key = 0, i;
void *ret;
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
if (CHECK_FAIL(err)) {
- printf("test_map_lock:bpf_prog_load errno %d\n", errno);
+ printf("test_map_lock:bpf_prog_test_load errno %d\n", errno);
goto close_prog;
}
map_fd[0] = bpf_find_map(__func__, obj, "hash_map");
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..bfb1bf3fd427
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_lookup_percpu_elem.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include <test_progs.h>
+#include "test_map_lookup_percpu_elem.skel.h"
+
+void test_map_lookup_percpu_elem(void)
+{
+ struct test_map_lookup_percpu_elem *skel;
+ __u64 key = 0, sum;
+ int ret, i, nr_cpus = libbpf_num_possible_cpus();
+ __u64 *buf;
+
+ buf = malloc(nr_cpus*sizeof(__u64));
+ if (!ASSERT_OK_PTR(buf, "malloc"))
+ return;
+
+ for (i = 0; i < nr_cpus; i++)
+ buf[i] = i;
+ sum = (nr_cpus - 1) * nr_cpus / 2;
+
+ skel = test_map_lookup_percpu_elem__open();
+ if (!ASSERT_OK_PTR(skel, "test_map_lookup_percpu_elem__open"))
+ goto exit;
+
+ skel->rodata->my_pid = getpid();
+ skel->rodata->nr_cpus = nr_cpus;
+
+ ret = test_map_lookup_percpu_elem__load(skel);
+ if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__load"))
+ goto cleanup;
+
+ ret = test_map_lookup_percpu_elem__attach(skel);
+ if (!ASSERT_OK(ret, "test_map_lookup_percpu_elem__attach"))
+ goto cleanup;
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_array_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_array_map update");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_hash_map update");
+
+ ret = bpf_map_update_elem(bpf_map__fd(skel->maps.percpu_lru_hash_map), &key, buf, 0);
+ ASSERT_OK(ret, "percpu_lru_hash_map update");
+
+ syscall(__NR_getuid);
+
+ test_map_lookup_percpu_elem__detach(skel);
+
+ ASSERT_EQ(skel->bss->percpu_array_elem_sum, sum, "percpu_array lookup percpu elem");
+ ASSERT_EQ(skel->bss->percpu_hash_elem_sum, sum, "percpu_hash lookup percpu elem");
+ ASSERT_EQ(skel->bss->percpu_lru_hash_elem_sum, sum, "percpu_lru_hash lookup percpu elem");
+
+cleanup:
+ test_map_lookup_percpu_elem__destroy(skel);
+exit:
+ free(buf);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ops.c b/tools/testing/selftests/bpf/prog_tests/map_ops.c
new file mode 100644
index 000000000000..be5e42a413b4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ops.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "test_map_ops.skel.h"
+#include "test_progs.h"
+
+static void map_update(void)
+{
+ (void)syscall(__NR_getpid);
+}
+
+static void map_delete(void)
+{
+ (void)syscall(__NR_getppid);
+}
+
+static void map_push(void)
+{
+ (void)syscall(__NR_getuid);
+}
+
+static void map_pop(void)
+{
+ (void)syscall(__NR_geteuid);
+}
+
+static void map_peek(void)
+{
+ (void)syscall(__NR_getgid);
+}
+
+static void map_for_each_pass(void)
+{
+ (void)syscall(__NR_gettid);
+}
+
+static void map_for_each_fail(void)
+{
+ (void)syscall(__NR_getpgid);
+}
+
+static int setup(struct test_map_ops **skel)
+{
+ int err = 0;
+
+ if (!skel)
+ return -1;
+
+ *skel = test_map_ops__open();
+ if (!ASSERT_OK_PTR(*skel, "test_map_ops__open"))
+ return -1;
+
+ (*skel)->rodata->pid = getpid();
+
+ err = test_map_ops__load(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__load"))
+ return err;
+
+ err = test_map_ops__attach(*skel);
+ if (!ASSERT_OK(err, "test_map_ops__attach"))
+ return err;
+
+ return err;
+}
+
+static void teardown(struct test_map_ops **skel)
+{
+ if (skel && *skel)
+ test_map_ops__destroy(*skel);
+}
+
+static void map_ops_update_delete_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_update();
+ ASSERT_OK(skel->bss->err, "map_update_initial");
+
+ map_update();
+ ASSERT_LT(skel->bss->err, 0, "map_update_existing");
+ ASSERT_EQ(skel->bss->err, -EEXIST, "map_update_existing");
+
+ map_delete();
+ ASSERT_OK(skel->bss->err, "map_delete_existing");
+
+ map_delete();
+ ASSERT_LT(skel->bss->err, 0, "map_delete_non_existing");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_delete_non_existing");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_push_peek_pop_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_push();
+ ASSERT_OK(skel->bss->err, "map_push_initial");
+
+ map_push();
+ ASSERT_LT(skel->bss->err, 0, "map_push_when_full");
+ ASSERT_EQ(skel->bss->err, -E2BIG, "map_push_when_full");
+
+ map_peek();
+ ASSERT_OK(skel->bss->err, "map_peek");
+
+ map_pop();
+ ASSERT_OK(skel->bss->err, "map_pop");
+
+ map_peek();
+ ASSERT_LT(skel->bss->err, 0, "map_peek_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_peek_when_empty");
+
+ map_pop();
+ ASSERT_LT(skel->bss->err, 0, "map_pop_when_empty");
+ ASSERT_EQ(skel->bss->err, -ENOENT, "map_pop_when_empty");
+
+teardown:
+ teardown(&skel);
+}
+
+static void map_ops_for_each_subtest(void)
+{
+ struct test_map_ops *skel;
+
+ if (setup(&skel))
+ goto teardown;
+
+ map_for_each_pass();
+ /* expect to iterate over 1 element */
+ ASSERT_EQ(skel->bss->err, 1, "map_for_each_no_flags");
+
+ map_for_each_fail();
+ ASSERT_LT(skel->bss->err, 0, "map_for_each_with_flags");
+ ASSERT_EQ(skel->bss->err, -EINVAL, "map_for_each_with_flags");
+
+teardown:
+ teardown(&skel);
+}
+
+void test_map_ops(void)
+{
+ if (test__start_subtest("map_ops_update_delete"))
+ map_ops_update_delete_subtest();
+
+ if (test__start_subtest("map_ops_push_peek_pop"))
+ map_ops_push_peek_pop_subtest();
+
+ if (test__start_subtest("map_ops_for_each"))
+ map_ops_for_each_subtest();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/map_ptr.c b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
new file mode 100644
index 000000000000..43e502acf050
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/map_ptr.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "map_ptr_kern.lskel.h"
+
+void test_map_ptr(void)
+{
+ struct map_ptr_kern_lskel *skel;
+ char buf[128];
+ int err;
+ int page_size = getpagesize();
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ skel = map_ptr_kern_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->maps.m_ringbuf.max_entries = page_size;
+
+ err = map_ptr_kern_lskel__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = page_size;
+
+ err = bpf_prog_test_run_opts(skel->progs.cg_skb.prog_fd, &topts);
+
+ if (!ASSERT_OK(err, "test_run"))
+ goto cleanup;
+
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
+ goto cleanup;
+
+cleanup:
+ map_ptr_kern_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/metadata.c b/tools/testing/selftests/bpf/prog_tests/metadata.c
new file mode 100644
index 000000000000..8b67dfc10f5c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/metadata.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include <network_helpers.h>
+
+#include "metadata_unused.skel.h"
+#include "metadata_used.skel.h"
+
+static int duration;
+
+static int prog_holds_map(int prog_fd, int map_fd)
+{
+ struct bpf_prog_info prog_info = {};
+ struct bpf_map_info map_info = {};
+ __u32 prog_info_len;
+ __u32 map_info_len;
+ __u32 *map_ids;
+ int nr_maps;
+ int ret;
+ int i;
+
+ map_info_len = sizeof(map_info);
+ ret = bpf_map_get_info_by_fd(map_fd, &map_info, &map_info_len);
+ if (ret)
+ return -errno;
+
+ prog_info_len = sizeof(prog_info);
+ ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret)
+ return -errno;
+
+ map_ids = calloc(prog_info.nr_map_ids, sizeof(__u32));
+ if (!map_ids)
+ return -ENOMEM;
+
+ nr_maps = prog_info.nr_map_ids;
+ memset(&prog_info, 0, sizeof(prog_info));
+ prog_info.nr_map_ids = nr_maps;
+ prog_info.map_ids = ptr_to_u64(map_ids);
+ prog_info_len = sizeof(prog_info);
+
+ ret = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len);
+ if (ret) {
+ ret = -errno;
+ goto free_map_ids;
+ }
+
+ ret = -ENOENT;
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ if (map_ids[i] == map_info.id) {
+ ret = 0;
+ break;
+ }
+ }
+
+free_map_ids:
+ free(map_ids);
+ return ret;
+}
+
+static void test_metadata_unused(void)
+{
+ struct metadata_unused *obj;
+ int err;
+
+ obj = metadata_unused__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "foo",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "bpf_metadata_a", "expected \"foo\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 1, "bpf_metadata_b",
+ "expected 1, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_unused__destroy(obj);
+}
+
+static void test_metadata_used(void)
+{
+ struct metadata_used *obj;
+ int err;
+
+ obj = metadata_used__open_and_load();
+ if (CHECK(!obj, "skel-load", "errno %d", errno))
+ return;
+
+ err = prog_holds_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata));
+ if (CHECK(err, "prog-holds-rodata", "errno: %d", err))
+ return;
+
+ /* Assert that we can access the metadata in skel and the values are
+ * what we expect.
+ */
+ if (CHECK(strncmp(obj->rodata->bpf_metadata_a, "bar",
+ sizeof(obj->rodata->bpf_metadata_a)),
+ "metadata_a", "expected \"bar\", value differ"))
+ goto close_bpf_object;
+ if (CHECK(obj->rodata->bpf_metadata_b != 2, "metadata_b",
+ "expected 2, got %d", obj->rodata->bpf_metadata_b))
+ goto close_bpf_object;
+
+ /* Assert that binding metadata map to prog again succeeds. */
+ err = bpf_prog_bind_map(bpf_program__fd(obj->progs.prog),
+ bpf_map__fd(obj->maps.rodata), NULL);
+ CHECK(err, "rebind_map", "errno %d, expected 0", errno);
+
+close_bpf_object:
+ metadata_used__destroy(obj);
+}
+
+void test_metadata(void)
+{
+ if (test__start_subtest("unused"))
+ test_metadata_unused();
+
+ if (test__start_subtest("used"))
+ test_metadata_used();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
new file mode 100644
index 000000000000..653b0a20fab9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/migrate_reuseport.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. call listen() for 4 server sockets.
+ * 2. call connect() for 25 client sockets.
+ * 3. call listen() for 1 server socket. (migration target)
+ * 4. update a map to migrate all child sockets
+ * to the last server socket (migrate_map[cookie] = 4)
+ * 5. call shutdown() for first 4 server sockets
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 6. call listen() for the second server socket.
+ * 7. call shutdown() for the last server
+ * and migrate the requests in the accept queue
+ * to the second server socket.
+ * 8. call listen() for the last server.
+ * 9. call shutdown() for the second server
+ * and migrate the requests in the accept queue
+ * to the last server socket.
+ * 10. call accept() for the last server socket.
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#include "test_progs.h"
+#include "test_migrate_reuseport.skel.h"
+#include "network_helpers.h"
+
+#ifndef TCP_FASTOPEN_CONNECT
+#define TCP_FASTOPEN_CONNECT 30
+#endif
+
+#define IFINDEX_LO 1
+
+#define NR_SERVERS 5
+#define NR_CLIENTS (NR_SERVERS * 5)
+#define MIGRATED_TO (NR_SERVERS - 1)
+
+/* fastopenq->max_qlen and sk->sk_max_ack_backlog */
+#define QLEN (NR_CLIENTS * 5)
+
+#define MSG "Hello World\0"
+#define MSGLEN 12
+
+static struct migrate_reuseport_test_case {
+ const char *name;
+ __s64 servers[NR_SERVERS];
+ __s64 clients[NR_CLIENTS];
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ int family;
+ int state;
+ bool drop_ack;
+ bool expire_synack_timer;
+ bool fastopen;
+ struct bpf_link *link;
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv4 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_ESTABLISHED inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_ESTABLISHED,
+ .drop_ack = false,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_SYN_RECV inet_csk_listen_stop",
+ .family = AF_INET6,
+ .state = BPF_TCP_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = true,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV reqsk_timer_handler",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = true,
+ .fastopen = false,
+ },
+ {
+ .name = "IPv6 TCP_NEW_SYN_RECV inet_csk_complete_hashdance",
+ .family = AF_INET6,
+ .state = BPF_TCP_NEW_SYN_RECV,
+ .drop_ack = true,
+ .expire_synack_timer = false,
+ .fastopen = false,
+ }
+};
+
+static void init_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++)
+ fds[i] = -1;
+}
+
+static void close_fds(__s64 fds[], int len)
+{
+ int i;
+
+ for (i = 0; i < len; i++) {
+ if (fds[i] != -1) {
+ close(fds[i]);
+ fds[i] = -1;
+ }
+ }
+}
+
+static int setup_fastopen(char *buf, int size, int *saved_len, bool restore)
+{
+ int err = 0, fd, len;
+
+ fd = open("/proc/sys/net/ipv4/tcp_fastopen", O_RDWR);
+ if (!ASSERT_NEQ(fd, -1, "open"))
+ return -1;
+
+ if (restore) {
+ len = write(fd, buf, *saved_len);
+ if (!ASSERT_EQ(len, *saved_len, "write - restore"))
+ err = -1;
+ } else {
+ *saved_len = read(fd, buf, size);
+ if (!ASSERT_GE(*saved_len, 1, "read")) {
+ err = -1;
+ goto close;
+ }
+
+ err = lseek(fd, 0, SEEK_SET);
+ if (!ASSERT_OK(err, "lseek"))
+ goto close;
+
+ /* (TFO_CLIENT_ENABLE | TFO_SERVER_ENABLE |
+ * TFO_CLIENT_NO_COOKIE | TFO_SERVER_COOKIE_NOT_REQD)
+ */
+ len = write(fd, "519", 3);
+ if (!ASSERT_EQ(len, 3, "write - setup"))
+ err = -1;
+ }
+
+close:
+ close(fd);
+
+ return err;
+}
+
+static int drop_ack(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ if (test_case->family == AF_INET)
+ skel->bss->server_port = ((struct sockaddr_in *)
+ &test_case->addr)->sin_port;
+ else
+ skel->bss->server_port = ((struct sockaddr_in6 *)
+ &test_case->addr)->sin6_port;
+
+ test_case->link = bpf_program__attach_xdp(skel->progs.drop_ack,
+ IFINDEX_LO);
+ if (!ASSERT_OK_PTR(test_case->link, "bpf_program__attach_xdp"))
+ return -1;
+
+ return 0;
+}
+
+static int pass_ack(struct migrate_reuseport_test_case *test_case)
+{
+ int err;
+
+ err = bpf_link__destroy(test_case->link);
+ if (!ASSERT_OK(err, "bpf_link__destroy"))
+ return -1;
+
+ test_case->link = NULL;
+
+ return 0;
+}
+
+static int start_servers(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, prog_fd, reuseport = 1, qlen = QLEN;
+
+ prog_fd = bpf_program__fd(skel->progs.migrate_reuseport);
+
+ make_sockaddr(test_case->family,
+ test_case->family == AF_INET ? "127.0.0.1" : "::1", 0,
+ &test_case->addr, &test_case->addrlen);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ test_case->servers[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->servers[i], -1, "socket"))
+ return -1;
+
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_REUSEPORT, &reuseport, sizeof(reuseport));
+ if (!ASSERT_OK(err, "setsockopt - SO_REUSEPORT"))
+ return -1;
+
+ err = bind(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "bind"))
+ return -1;
+
+ if (i == 0) {
+ err = setsockopt(test_case->servers[i], SOL_SOCKET,
+ SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (!ASSERT_OK(err,
+ "setsockopt - SO_ATTACH_REUSEPORT_EBPF"))
+ return -1;
+
+ err = getsockname(test_case->servers[i],
+ (struct sockaddr *)&test_case->addr,
+ &test_case->addrlen);
+ if (!ASSERT_OK(err, "getsockname"))
+ return -1;
+ }
+
+ if (test_case->fastopen) {
+ err = setsockopt(test_case->servers[i],
+ SOL_TCP, TCP_FASTOPEN,
+ &qlen, sizeof(qlen));
+ if (!ASSERT_OK(err, "setsockopt - TCP_FASTOPEN"))
+ return -1;
+ }
+
+ /* All requests will be tied to the first four listeners */
+ if (i != MIGRATED_TO) {
+ err = listen(test_case->servers[i], qlen);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int start_clients(struct migrate_reuseport_test_case *test_case)
+{
+ char buf[MSGLEN] = MSG;
+ int i, err;
+
+ for (i = 0; i < NR_CLIENTS; i++) {
+ test_case->clients[i] = socket(test_case->family, SOCK_STREAM,
+ IPPROTO_TCP);
+ if (!ASSERT_NEQ(test_case->clients[i], -1, "socket"))
+ return -1;
+
+ /* The attached XDP program drops only the final ACK, so
+ * clients will transition to TCP_ESTABLISHED immediately.
+ */
+ err = settimeo(test_case->clients[i], 100);
+ if (!ASSERT_OK(err, "settimeo"))
+ return -1;
+
+ if (test_case->fastopen) {
+ int fastopen = 1;
+
+ err = setsockopt(test_case->clients[i], IPPROTO_TCP,
+ TCP_FASTOPEN_CONNECT, &fastopen,
+ sizeof(fastopen));
+ if (!ASSERT_OK(err,
+ "setsockopt - TCP_FASTOPEN_CONNECT"))
+ return -1;
+ }
+
+ err = connect(test_case->clients[i],
+ (struct sockaddr *)&test_case->addr,
+ test_case->addrlen);
+ if (!ASSERT_OK(err, "connect"))
+ return -1;
+
+ err = write(test_case->clients[i], buf, MSGLEN);
+ if (!ASSERT_EQ(err, MSGLEN, "write"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int update_maps(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int i, err, migrated_to = MIGRATED_TO;
+ int reuseport_map_fd, migrate_map_fd;
+ __u64 value;
+
+ reuseport_map_fd = bpf_map__fd(skel->maps.reuseport_map);
+ migrate_map_fd = bpf_map__fd(skel->maps.migrate_map);
+
+ for (i = 0; i < NR_SERVERS; i++) {
+ value = (__u64)test_case->servers[i];
+ err = bpf_map_update_elem(reuseport_map_fd, &i, &value,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_lookup_elem(reuseport_map_fd, &i, &value);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem - reuseport_map"))
+ return -1;
+
+ err = bpf_map_update_elem(migrate_map_fd, &value, &migrated_to,
+ BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem - migrate_map"))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int migrate_dance(struct migrate_reuseport_test_case *test_case)
+{
+ int i, err;
+
+ /* Migrate TCP_ESTABLISHED and TCP_SYN_RECV requests
+ * to the last listener based on eBPF.
+ */
+ for (i = 0; i < MIGRATED_TO; i++) {
+ err = shutdown(test_case->servers[i], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+ }
+
+ /* No dance for TCP_NEW_SYN_RECV to migrate based on eBPF */
+ if (test_case->state == BPF_TCP_NEW_SYN_RECV)
+ return 0;
+
+ /* Note that we use the second listener instead of the
+ * first one here.
+ *
+ * The fist listener is bind()ed with port 0 and,
+ * SOCK_BINDPORT_LOCK is not set to sk_userlocks, so
+ * calling listen() again will bind() the first listener
+ * on a new ephemeral port and detach it from the existing
+ * reuseport group. (See: __inet_bind(), tcp_set_state())
+ *
+ * OTOH, the second one is bind()ed with a specific port,
+ * and SOCK_BINDPORT_LOCK is set. Thus, re-listen() will
+ * resurrect the listener on the existing reuseport group.
+ */
+ err = listen(test_case->servers[1], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate from the last listener to the second one.
+ *
+ * All listeners were detached out of the reuseport_map,
+ * so migration will be done by kernel random pick from here.
+ */
+ err = shutdown(test_case->servers[MIGRATED_TO], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ /* Back to the existing reuseport group */
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ return -1;
+
+ /* Migrate back to the last one from the second one */
+ err = shutdown(test_case->servers[1], SHUT_RDWR);
+ if (!ASSERT_OK(err, "shutdown"))
+ return -1;
+
+ return 0;
+}
+
+static void count_requests(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int err, cnt = 0, client;
+ char buf[MSGLEN];
+
+ err = settimeo(test_case->servers[MIGRATED_TO], 4000);
+ if (!ASSERT_OK(err, "settimeo"))
+ goto out;
+
+ for (; cnt < NR_CLIENTS; cnt++) {
+ client = accept(test_case->servers[MIGRATED_TO],
+ (struct sockaddr *)&addr, &len);
+ if (!ASSERT_NEQ(client, -1, "accept"))
+ goto out;
+
+ memset(buf, 0, MSGLEN);
+ read(client, &buf, MSGLEN);
+ close(client);
+
+ if (!ASSERT_STREQ(buf, MSG, "read"))
+ goto out;
+ }
+
+out:
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in userspace");
+
+ switch (test_case->state) {
+ case BPF_TCP_ESTABLISHED:
+ cnt = skel->bss->migrated_at_close;
+ break;
+ case BPF_TCP_SYN_RECV:
+ cnt = skel->bss->migrated_at_close_fastopen;
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (test_case->expire_synack_timer)
+ cnt = skel->bss->migrated_at_send_synack;
+ else
+ cnt = skel->bss->migrated_at_recv_ack;
+ break;
+ default:
+ cnt = 0;
+ }
+
+ ASSERT_EQ(cnt, NR_CLIENTS, "count in BPF prog");
+}
+
+static void run_test(struct migrate_reuseport_test_case *test_case,
+ struct test_migrate_reuseport *skel)
+{
+ int err, saved_len;
+ char buf[16];
+
+ skel->bss->migrated_at_close = 0;
+ skel->bss->migrated_at_close_fastopen = 0;
+ skel->bss->migrated_at_send_synack = 0;
+ skel->bss->migrated_at_recv_ack = 0;
+
+ init_fds(test_case->servers, NR_SERVERS);
+ init_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->fastopen) {
+ memset(buf, 0, sizeof(buf));
+
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, false);
+ if (!ASSERT_OK(err, "setup_fastopen - setup"))
+ return;
+ }
+
+ err = start_servers(test_case, skel);
+ if (!ASSERT_OK(err, "start_servers"))
+ goto close_servers;
+
+ if (test_case->drop_ack) {
+ /* Drop the final ACK of the 3-way handshake and stick the
+ * in-flight requests on TCP_SYN_RECV or TCP_NEW_SYN_RECV.
+ */
+ err = drop_ack(test_case, skel);
+ if (!ASSERT_OK(err, "drop_ack"))
+ goto close_servers;
+ }
+
+ /* Tie requests to the first four listeners */
+ err = start_clients(test_case);
+ if (!ASSERT_OK(err, "start_clients"))
+ goto close_clients;
+
+ err = listen(test_case->servers[MIGRATED_TO], QLEN);
+ if (!ASSERT_OK(err, "listen"))
+ goto close_clients;
+
+ err = update_maps(test_case, skel);
+ if (!ASSERT_OK(err, "fill_maps"))
+ goto close_clients;
+
+ /* Migrate the requests in the accept queue only.
+ * TCP_NEW_SYN_RECV requests are not migrated at this point.
+ */
+ err = migrate_dance(test_case);
+ if (!ASSERT_OK(err, "migrate_dance"))
+ goto close_clients;
+
+ if (test_case->expire_synack_timer) {
+ /* Wait for SYN+ACK timers to expire so that
+ * reqsk_timer_handler() migrates TCP_NEW_SYN_RECV requests.
+ */
+ sleep(1);
+ }
+
+ if (test_case->link) {
+ /* Resume 3WHS and migrate TCP_NEW_SYN_RECV requests */
+ err = pass_ack(test_case);
+ if (!ASSERT_OK(err, "pass_ack"))
+ goto close_clients;
+ }
+
+ count_requests(test_case, skel);
+
+close_clients:
+ close_fds(test_case->clients, NR_CLIENTS);
+
+ if (test_case->link) {
+ err = pass_ack(test_case);
+ ASSERT_OK(err, "pass_ack - clean up");
+ }
+
+close_servers:
+ close_fds(test_case->servers, NR_SERVERS);
+
+ if (test_case->fastopen) {
+ err = setup_fastopen(buf, sizeof(buf), &saved_len, true);
+ ASSERT_OK(err, "setup_fastopen - restore");
+ }
+}
+
+void serial_test_migrate_reuseport(void)
+{
+ struct test_migrate_reuseport *skel;
+ int i;
+
+ skel = test_migrate_reuseport__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ test__start_subtest(test_cases[i].name);
+ run_test(&test_cases[i], skel);
+ }
+
+ test_migrate_reuseport__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/missed.c b/tools/testing/selftests/bpf/prog_tests/missed.c
new file mode 100644
index 000000000000..70d90c43537c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/missed.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "missed_kprobe.skel.h"
+#include "missed_kprobe_recursion.skel.h"
+#include "missed_tp_recursion.skel.h"
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc, which has also kprobe on. The latter won't get triggered due
+ * to kprobe recursion check and kprobe missed counter is incremented.
+ */
+static void test_missed_perf_kprobe(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct bpf_link_info info = {};
+ struct missed_kprobe *skel;
+ __u32 len = sizeof(info);
+ int err, prog_fd;
+
+ skel = missed_kprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ err = bpf_link_get_info_by_fd(bpf_link__fd(skel->links.test2), &info, &len);
+ if (!ASSERT_OK(err, "bpf_link_get_info_by_fd"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "info.type");
+ ASSERT_EQ(info.perf_event.type, BPF_PERF_EVENT_KPROBE, "info.perf_event.type");
+ ASSERT_EQ(info.perf_event.kprobe.missed, 1, "info.perf_event.kprobe.missed");
+
+cleanup:
+ missed_kprobe__destroy(skel);
+}
+
+static __u64 get_missed_count(int fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ return (__u64) -1;
+ return info.recursion_misses;
+}
+
+/*
+ * Putting kprobe.multi on bpf_fentry_test1 that calls bpf_kfunc_common_test
+ * kfunc which has 3 perf event kprobes and 1 kprobe.multi attached.
+ *
+ * Because fprobe (kprobe.multi attach layear) does not have strict recursion
+ * check the kprobe's bpf_prog_active check is hit for test2-5.
+ */
+static void test_missed_kprobe_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_kprobe_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_kprobe_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_kprobe_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_kprobe_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_kprobe_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+ ASSERT_GE(get_missed_count(bpf_program__fd(skel->progs.test5)), 1, "test5_recursion_misses");
+
+cleanup:
+ missed_kprobe_recursion__destroy(skel);
+}
+
+/*
+ * Putting kprobe on bpf_fentry_test1 that calls bpf_printk and invokes
+ * bpf_trace_printk tracepoint. The bpf_trace_printk tracepoint has test[234]
+ * programs attached to it.
+ *
+ * Because kprobe execution goes through bpf_prog_active check, programs
+ * attached to the tracepoint will fail the recursion check and increment
+ * the recursion_misses stats.
+ */
+static void test_missed_tp_recursion(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ struct missed_tp_recursion *skel;
+ int err, prog_fd;
+
+ skel = missed_tp_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "missed_tp_recursion__open_and_load"))
+ goto cleanup;
+
+ err = missed_tp_recursion__attach(skel);
+ if (!ASSERT_OK(err, "missed_tp_recursion__attach"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.trigger);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test1)), 0, "test1_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test2)), 1, "test2_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test3)), 1, "test3_recursion_misses");
+ ASSERT_EQ(get_missed_count(bpf_program__fd(skel->progs.test4)), 1, "test4_recursion_misses");
+
+cleanup:
+ missed_tp_recursion__destroy(skel);
+}
+
+void test_missed(void)
+{
+ if (test__start_subtest("perf_kprobe"))
+ test_missed_perf_kprobe();
+ if (test__start_subtest("kprobe_recursion"))
+ test_missed_kprobe_recursion();
+ if (test__start_subtest("tp_recursion"))
+ test_missed_tp_recursion();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c
index 43d0b5578f46..a271d5a0f7ab 100644
--- a/tools/testing/selftests/bpf/prog_tests/mmap.c
+++ b/tools/testing/selftests/bpf/prog_tests/mmap.c
@@ -21,7 +21,7 @@ void test_mmap(void)
const long page_size = sysconf(_SC_PAGE_SIZE);
int err, duration = 0, i, data_map_fd, data_map_id, tmp_fd, rdmap_fd;
struct bpf_map *data_map, *bss_map;
- void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2;
+ void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp0, *tmp1, *tmp2;
struct test_mmap__bss *bss_data;
struct bpf_map_info map_info;
__u32 map_info_sz = sizeof(map_info);
@@ -29,28 +29,42 @@ void test_mmap(void)
struct test_mmap *skel;
__u64 val = 0;
- skel = test_mmap__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+ skel = test_mmap__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ err = bpf_map__set_max_entries(skel->maps.rdonly_map, page_size);
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ /* at least 4 pages of data */
+ err = bpf_map__set_max_entries(skel->maps.data_map,
+ 4 * (page_size / sizeof(u64)));
+ if (CHECK(err != 0, "bpf_map__set_max_entries", "bpf_map__set_max_entries failed\n"))
+ goto cleanup;
+
+ err = test_mmap__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
bss_map = skel->maps.bss;
data_map = skel->maps.data_map;
data_map_fd = bpf_map__fd(data_map);
rdmap_fd = bpf_map__fd(skel->maps.rdonly_map);
- tmp1 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 != MAP_FAILED, "rdonly_write_mmap", "unexpected success\n")) {
- munmap(tmp1, 4096);
+ munmap(tmp1, page_size);
goto cleanup;
}
/* now double-check if it's mmap()'able at all */
- tmp1 = mmap(NULL, 4096, PROT_READ, MAP_SHARED, rdmap_fd, 0);
+ tmp1 = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rdmap_fd, 0);
if (CHECK(tmp1 == MAP_FAILED, "rdonly_read_mmap", "failed: %d\n", errno))
goto cleanup;
/* get map's ID */
memset(&map_info, 0, map_info_sz);
- err = bpf_obj_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
+ err = bpf_map_get_info_by_fd(data_map_fd, &map_info, &map_info_sz);
if (CHECK(err, "map_get_info", "failed %d\n", errno))
goto cleanup;
data_map_id = map_info.id;
@@ -183,16 +197,23 @@ void test_mmap(void)
/* check some more advanced mmap() manipulations */
+ tmp0 = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED | MAP_ANONYMOUS,
+ -1, 0);
+ if (CHECK(tmp0 == MAP_FAILED, "adv_mmap0", "errno %d\n", errno))
+ goto cleanup;
+
/* map all but last page: pages 1-3 mapped */
- tmp1 = mmap(NULL, 3 * page_size, PROT_READ, MAP_SHARED,
+ tmp1 = mmap(tmp0, 3 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
- if (CHECK(tmp1 == MAP_FAILED, "adv_mmap1", "errno %d\n", errno))
+ if (CHECK(tmp0 != tmp1, "adv_mmap1", "tmp0: %p, tmp1: %p\n", tmp0, tmp1)) {
+ munmap(tmp0, 4 * page_size);
goto cleanup;
+ }
/* unmap second page: pages 1, 3 mapped */
err = munmap(tmp1 + page_size, page_size);
if (CHECK(err, "adv_mmap2", "errno %d\n", errno)) {
- munmap(tmp1, map_sz);
+ munmap(tmp1, 4 * page_size);
goto cleanup;
}
@@ -201,7 +222,7 @@ void test_mmap(void)
MAP_SHARED | MAP_FIXED, data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap3", "errno %d\n", errno)) {
munmap(tmp1, page_size);
- munmap(tmp1 + 2*page_size, page_size);
+ munmap(tmp1 + 2*page_size, 2 * page_size);
goto cleanup;
}
CHECK(tmp1 + page_size != tmp2, "adv_mmap4",
@@ -211,7 +232,7 @@ void test_mmap(void)
tmp2 = mmap(tmp1, 4 * page_size, PROT_READ, MAP_SHARED | MAP_FIXED,
data_map_fd, 0);
if (CHECK(tmp2 == MAP_FAILED, "adv_mmap5", "errno %d\n", errno)) {
- munmap(tmp1, 3 * page_size); /* unmap page 1 */
+ munmap(tmp1, 4 * page_size); /* unmap page 1 */
goto cleanup;
}
CHECK(tmp1 != tmp2, "adv_mmap6", "tmp1: %p, tmp2: %p\n", tmp1, tmp2);
diff --git a/tools/testing/selftests/bpf/prog_tests/modify_return.c b/tools/testing/selftests/bpf/prog_tests/modify_return.c
index 97fec70c600b..a70c99c2f8c8 100644
--- a/tools/testing/selftests/bpf/prog_tests/modify_return.c
+++ b/tools/testing/selftests/bpf/prog_tests/modify_return.c
@@ -15,51 +15,47 @@ static void run_test(__u32 input_retval, __u16 want_side_effect, __s16 want_ret)
{
struct modify_return *skel = NULL;
int err, prog_fd;
- __u32 duration = 0, retval;
__u16 side_effect;
__s16 ret;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
skel = modify_return__open_and_load();
- if (CHECK(!skel, "skel_load", "modify_return skeleton failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
goto cleanup;
err = modify_return__attach(skel);
- if (CHECK(err, "modify_return", "attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "modify_return__attach failed"))
goto cleanup;
skel->bss->input_retval = input_retval;
prog_fd = bpf_program__fd(skel->progs.fmod_ret_test);
- err = bpf_prog_test_run(prog_fd, 1, NULL, 0, NULL, 0,
- &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
- CHECK(err, "test_run", "err %d errno %d\n", err, errno);
+ side_effect = UPPER(topts.retval);
+ ret = LOWER(topts.retval);
- side_effect = UPPER(retval);
- ret = LOWER(retval);
+ ASSERT_EQ(ret, want_ret, "test_run ret");
+ ASSERT_EQ(side_effect, want_side_effect, "modify_return side_effect");
+ ASSERT_EQ(skel->bss->fentry_result, 1, "modify_return fentry_result");
+ ASSERT_EQ(skel->bss->fexit_result, 1, "modify_return fexit_result");
+ ASSERT_EQ(skel->bss->fmod_ret_result, 1, "modify_return fmod_ret_result");
- CHECK(ret != want_ret, "test_run",
- "unexpected ret: %d, expected: %d\n", ret, want_ret);
- CHECK(side_effect != want_side_effect, "modify_return",
- "unexpected side_effect: %d\n", side_effect);
-
- CHECK(skel->bss->fentry_result != 1, "modify_return",
- "fentry failed\n");
- CHECK(skel->bss->fexit_result != 1, "modify_return",
- "fexit failed\n");
- CHECK(skel->bss->fmod_ret_result != 1, "modify_return",
- "fmod_ret failed\n");
+ ASSERT_EQ(skel->bss->fentry_result2, 1, "modify_return fentry_result2");
+ ASSERT_EQ(skel->bss->fexit_result2, 1, "modify_return fexit_result2");
+ ASSERT_EQ(skel->bss->fmod_ret_result2, 1, "modify_return fmod_ret_result2");
cleanup:
modify_return__destroy(skel);
}
-void test_modify_return(void)
+/* TODO: conflict with get_func_ip_test */
+void serial_test_modify_return(void)
{
run_test(0 /* input_retval */,
- 1 /* want_side_effect */,
- 4 /* want_ret */);
+ 2 /* want_side_effect */,
+ 33 /* want_ret */);
run_test(-EINVAL /* input_retval */,
0 /* want_side_effect */,
- -EINVAL /* want_ret */);
+ -EINVAL * 2 /* want_ret */);
}
-
diff --git a/tools/testing/selftests/bpf/prog_tests/module_attach.c b/tools/testing/selftests/bpf/prog_tests/module_attach.c
new file mode 100644
index 000000000000..f53d658ed080
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_attach.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <stdbool.h>
+#include "test_module_attach.skel.h"
+#include "testing_helpers.h"
+
+static int duration;
+
+static int trigger_module_test_writable(int *val)
+{
+ int fd, err;
+ char buf[65];
+ ssize_t rd;
+
+ fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
+ err = -errno;
+ if (!ASSERT_GE(fd, 0, "testmode_file_open"))
+ return err;
+
+ rd = read(fd, buf, sizeof(buf) - 1);
+ err = -errno;
+ if (!ASSERT_GT(rd, 0, "testmod_file_rd_val")) {
+ close(fd);
+ return err;
+ }
+
+ buf[rd] = '\0';
+ *val = strtol(buf, NULL, 0);
+ close(fd);
+
+ return 0;
+}
+
+void test_module_attach(void)
+{
+ const int READ_SZ = 456;
+ const int WRITE_SZ = 457;
+ struct test_module_attach* skel;
+ struct test_module_attach__bss *bss;
+ struct bpf_link *link;
+ int err;
+ int writable_val = 0;
+
+ skel = test_module_attach__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = bpf_program__set_attach_target(skel->progs.handle_fentry_manual,
+ 0, "bpf_testmod_test_read");
+ ASSERT_OK(err, "set_attach_target");
+
+ err = test_module_attach__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton\n"))
+ return;
+
+ bss = skel->bss;
+
+ err = test_module_attach__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ ASSERT_OK(trigger_module_test_read(READ_SZ), "trigger_read");
+ ASSERT_OK(trigger_module_test_write(WRITE_SZ), "trigger_write");
+
+ ASSERT_EQ(bss->raw_tp_read_sz, READ_SZ, "raw_tp");
+ ASSERT_EQ(bss->raw_tp_bare_write_sz, WRITE_SZ, "raw_tp_bare");
+ ASSERT_EQ(bss->tp_btf_read_sz, READ_SZ, "tp_btf");
+ ASSERT_EQ(bss->fentry_read_sz, READ_SZ, "fentry");
+ ASSERT_EQ(bss->fentry_manual_read_sz, READ_SZ, "fentry_manual");
+ ASSERT_EQ(bss->fexit_read_sz, READ_SZ, "fexit");
+ ASSERT_EQ(bss->fexit_ret, -EIO, "fexit_tet");
+ ASSERT_EQ(bss->fmod_ret_read_sz, READ_SZ, "fmod_ret");
+
+ bss->raw_tp_writable_bare_early_ret = true;
+ bss->raw_tp_writable_bare_out_val = 0xf1f2f3f4;
+ ASSERT_OK(trigger_module_test_writable(&writable_val),
+ "trigger_writable");
+ ASSERT_EQ(bss->raw_tp_writable_bare_in_val, 1024, "writable_test_in");
+ ASSERT_EQ(bss->raw_tp_writable_bare_out_val, writable_val,
+ "writable_test_out");
+
+ test_module_attach__detach(skel);
+
+ /* attach fentry/fexit and make sure it get's module reference */
+ link = bpf_program__attach(skel->progs.handle_fentry);
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach(skel->progs.handle_fexit);
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
+ link = bpf_program__attach(skel->progs.kprobe_multi);
+ if (!ASSERT_OK_PTR(link, "attach_kprobe_multi"))
+ goto cleanup;
+
+ ASSERT_ERR(unload_bpf_testmod(false), "unload_bpf_testmod");
+ bpf_link__destroy(link);
+
+cleanup:
+ test_module_attach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
new file mode 100644
index 000000000000..aa9f67eb1c95
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/module_fentry_shadow.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat */
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+#include "cgroup_helpers.h"
+
+static const char *module_name = "bpf_testmod";
+static const char *symbol_name = "bpf_fentry_shadow_test";
+
+static int get_bpf_testmod_btf_fd(void)
+{
+ struct bpf_btf_info info;
+ char name[64];
+ __u32 id = 0, len;
+ int err, fd;
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ log_err("failed to iterate BTF objects");
+ return err;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue; /* expected race: BTF was unloaded */
+ err = -errno;
+ log_err("failed to get FD for BTF object #%d", id);
+ return err;
+ }
+
+ len = sizeof(info);
+ memset(&info, 0, sizeof(info));
+ info.name = ptr_to_u64(name);
+ info.name_len = sizeof(name);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ err = -errno;
+ log_err("failed to get info for BTF object #%d", id);
+ close(fd);
+ return err;
+ }
+
+ if (strcmp(name, module_name) == 0)
+ return fd;
+
+ close(fd);
+ }
+ return -ENOENT;
+}
+
+void test_module_fentry_shadow(void)
+{
+ struct btf *vmlinux_btf = NULL, *mod_btf = NULL;
+ int err, i;
+ int btf_fd[2] = {};
+ int prog_fd[2] = {};
+ int link_fd[2] = {};
+ __s32 btf_id[2] = {};
+
+ if (!env.has_testmod) {
+ test__skip();
+ return;
+ }
+
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ );
+
+ const struct bpf_insn trace_program[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "load_vmlinux_btf"))
+ return;
+
+ btf_fd[1] = get_bpf_testmod_btf_fd();
+ if (!ASSERT_GE(btf_fd[1], 0, "get_bpf_testmod_btf_fd"))
+ goto out;
+
+ mod_btf = btf_get_from_fd(btf_fd[1], vmlinux_btf);
+ if (!ASSERT_OK_PTR(mod_btf, "btf_get_from_fd"))
+ goto out;
+
+ btf_id[0] = btf__find_by_name_kind(vmlinux_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[0], 0, "btf_find_by_name"))
+ goto out;
+
+ btf_id[1] = btf__find_by_name_kind(mod_btf, symbol_name, BTF_KIND_FUNC);
+ if (!ASSERT_GT(btf_id[1], 0, "btf_find_by_name"))
+ goto out;
+
+ for (i = 0; i < 2; i++) {
+ load_opts.attach_btf_id = btf_id[i];
+ load_opts.attach_btf_obj_fd = btf_fd[i];
+ prog_fd[i] = bpf_prog_load(BPF_PROG_TYPE_TRACING, NULL, "GPL",
+ trace_program,
+ sizeof(trace_program) / sizeof(struct bpf_insn),
+ &load_opts);
+ if (!ASSERT_GE(prog_fd[i], 0, "bpf_prog_load"))
+ goto out;
+
+ /* If the verifier incorrectly resolves addresses of the
+ * shadowed functions and uses the same address for both the
+ * vmlinux and the bpf_testmod functions, this will fail on
+ * attempting to create two trampolines for the same address,
+ * which is forbidden.
+ */
+ link_fd[i] = bpf_link_create(prog_fd[i], 0, BPF_TRACE_FENTRY, NULL);
+ if (!ASSERT_GE(link_fd[i], 0, "bpf_link_create"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(prog_fd[0], NULL);
+ ASSERT_OK(err, "running test");
+
+out:
+ btf__free(vmlinux_btf);
+ btf__free(mod_btf);
+ for (i = 0; i < 2; i++) {
+ if (btf_fd[i])
+ close(btf_fd[i]);
+ if (prog_fd[i] > 0)
+ close(prog_fd[i]);
+ if (link_fd[i] > 0)
+ close(link_fd[i]);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c
new file mode 100644
index 000000000000..8f8d792307c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c
@@ -0,0 +1,331 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/const.h>
+#include <netinet/in.h>
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "mptcp_sock.skel.h"
+#include "mptcpify.skel.h"
+
+#define NS_TEST "mptcp_ns"
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+#ifndef MPTCP_INFO
+#define MPTCP_INFO 1
+#endif
+#ifndef MPTCP_INFO_FLAG_FALLBACK
+#define MPTCP_INFO_FLAG_FALLBACK _BITUL(0)
+#endif
+#ifndef MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED
+#define MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED _BITUL(1)
+#endif
+
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
+struct __mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+};
+
+struct mptcp_storage {
+ __u32 invoked;
+ __u32 is_mptcp;
+ struct sock *sk;
+ __u32 token;
+ struct sock *first;
+ char ca_name[TCP_CA_NAME_MAX];
+};
+
+static struct nstoken *create_netns(void)
+{
+ SYS(fail, "ip netns add %s", NS_TEST);
+ SYS(fail, "ip -net %s link set dev lo up", NS_TEST);
+
+ return open_netns(NS_TEST);
+fail:
+ return NULL;
+}
+
+static void cleanup_netns(struct nstoken *nstoken)
+{
+ if (nstoken)
+ close_netns(nstoken);
+
+ SYS_NOFAIL("ip netns del %s", NS_TEST);
+}
+
+static int verify_tsk(int map_fd, int client_fd)
+{
+ int err, cfd = client_fd;
+ struct mptcp_storage val;
+
+ err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ return err;
+
+ if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+ err++;
+
+ if (!ASSERT_EQ(val.is_mptcp, 0, "unexpected is_mptcp"))
+ err++;
+
+ return err;
+}
+
+static void get_msk_ca_name(char ca_name[])
+{
+ size_t len;
+ int fd;
+
+ fd = open("/proc/sys/net/ipv4/tcp_congestion_control", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "failed to open tcp_congestion_control"))
+ return;
+
+ len = read(fd, ca_name, TCP_CA_NAME_MAX);
+ if (!ASSERT_GT(len, 0, "failed to read ca_name"))
+ goto err;
+
+ if (len > 0 && ca_name[len - 1] == '\n')
+ ca_name[len - 1] = '\0';
+
+err:
+ close(fd);
+}
+
+static int verify_msk(int map_fd, int client_fd, __u32 token)
+{
+ char ca_name[TCP_CA_NAME_MAX];
+ int err, cfd = client_fd;
+ struct mptcp_storage val;
+
+ if (!ASSERT_GT(token, 0, "invalid token"))
+ return -1;
+
+ get_msk_ca_name(ca_name);
+
+ err = bpf_map_lookup_elem(map_fd, &cfd, &val);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ return err;
+
+ if (!ASSERT_EQ(val.invoked, 1, "unexpected invoked count"))
+ err++;
+
+ if (!ASSERT_EQ(val.is_mptcp, 1, "unexpected is_mptcp"))
+ err++;
+
+ if (!ASSERT_EQ(val.token, token, "unexpected token"))
+ err++;
+
+ if (!ASSERT_EQ(val.first, val.sk, "unexpected first"))
+ err++;
+
+ if (!ASSERT_STRNEQ(val.ca_name, ca_name, TCP_CA_NAME_MAX, "unexpected ca_name"))
+ err++;
+
+ return err;
+}
+
+static int run_test(int cgroup_fd, int server_fd, bool is_mptcp)
+{
+ int client_fd, prog_fd, map_fd, err;
+ struct mptcp_sock *sock_skel;
+
+ sock_skel = mptcp_sock__open_and_load();
+ if (!ASSERT_OK_PTR(sock_skel, "skel_open_load"))
+ return libbpf_get_error(sock_skel);
+
+ err = mptcp_sock__attach(sock_skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(sock_skel->progs._sockops);
+ map_fd = bpf_map__fd(sock_skel->maps.socket_storage_map);
+ err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+ err = -EIO;
+ goto out;
+ }
+
+ err += is_mptcp ? verify_msk(map_fd, client_fd, sock_skel->bss->token) :
+ verify_tsk(map_fd, client_fd);
+
+ close(client_fd);
+
+out:
+ mptcp_sock__destroy(sock_skel);
+ return err;
+}
+
+static void test_base(void)
+{
+ struct nstoken *nstoken = NULL;
+ int server_fd, cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcp");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ nstoken = create_netns();
+ if (!ASSERT_OK_PTR(nstoken, "create_netns"))
+ goto fail;
+
+ /* without MPTCP */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto with_mptcp;
+
+ ASSERT_OK(run_test(cgroup_fd, server_fd, false), "run_test tcp");
+
+ close(server_fd);
+
+with_mptcp:
+ /* with MPTCP */
+ server_fd = start_mptcp_server(AF_INET, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_mptcp_server"))
+ goto fail;
+
+ ASSERT_OK(run_test(cgroup_fd, server_fd, true), "run_test mptcp");
+
+ close(server_fd);
+
+fail:
+ cleanup_netns(nstoken);
+ close(cgroup_fd);
+}
+
+static void send_byte(int fd)
+{
+ char b = 0x55;
+
+ ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
+}
+
+static int verify_mptcpify(int server_fd, int client_fd)
+{
+ struct __mptcp_info info;
+ socklen_t optlen;
+ int protocol;
+ int err = 0;
+
+ optlen = sizeof(protocol);
+ if (!ASSERT_OK(getsockopt(server_fd, SOL_SOCKET, SO_PROTOCOL, &protocol, &optlen),
+ "getsockopt(SOL_PROTOCOL)"))
+ return -1;
+
+ if (!ASSERT_EQ(protocol, IPPROTO_MPTCP, "protocol isn't MPTCP"))
+ err++;
+
+ optlen = sizeof(info);
+ if (!ASSERT_OK(getsockopt(client_fd, SOL_MPTCP, MPTCP_INFO, &info, &optlen),
+ "getsockopt(MPTCP_INFO)"))
+ return -1;
+
+ if (!ASSERT_GE(info.mptcpi_flags, 0, "unexpected mptcpi_flags"))
+ err++;
+ if (!ASSERT_FALSE(info.mptcpi_flags & MPTCP_INFO_FLAG_FALLBACK,
+ "MPTCP fallback"))
+ err++;
+ if (!ASSERT_TRUE(info.mptcpi_flags & MPTCP_INFO_FLAG_REMOTE_KEY_RECEIVED,
+ "no remote key received"))
+ err++;
+
+ return err;
+}
+
+static int run_mptcpify(int cgroup_fd)
+{
+ int server_fd, client_fd, err = 0;
+ struct mptcpify *mptcpify_skel;
+
+ mptcpify_skel = mptcpify__open_and_load();
+ if (!ASSERT_OK_PTR(mptcpify_skel, "skel_open_load"))
+ return libbpf_get_error(mptcpify_skel);
+
+ err = mptcpify__attach(mptcpify_skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* without MPTCP */
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server")) {
+ err = -EIO;
+ goto out;
+ }
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (!ASSERT_GE(client_fd, 0, "connect to fd")) {
+ err = -EIO;
+ goto close_server;
+ }
+
+ send_byte(client_fd);
+
+ err = verify_mptcpify(server_fd, client_fd);
+
+ close(client_fd);
+close_server:
+ close(server_fd);
+out:
+ mptcpify__destroy(mptcpify_skel);
+ return err;
+}
+
+static void test_mptcpify(void)
+{
+ struct nstoken *nstoken = NULL;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/mptcpify");
+ if (!ASSERT_GE(cgroup_fd, 0, "test__join_cgroup"))
+ return;
+
+ nstoken = create_netns();
+ if (!ASSERT_OK_PTR(nstoken, "create_netns"))
+ goto fail;
+
+ ASSERT_OK(run_mptcpify(cgroup_fd), "run_mptcpify");
+
+fail:
+ cleanup_netns(nstoken);
+ close(cgroup_fd);
+}
+
+void test_mptcp(void)
+{
+ if (test__start_subtest("base"))
+ test_base();
+ if (test__start_subtest("mptcpify"))
+ test_mptcpify();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/nested_trust.c b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
new file mode 100644
index 000000000000..39886f58924e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/nested_trust.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "nested_trust_failure.skel.h"
+#include "nested_trust_success.skel.h"
+
+void test_nested_trust(void)
+{
+ RUN_TESTS(nested_trust_success);
+ RUN_TESTS(nested_trust_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netcnt.c b/tools/testing/selftests/bpf/prog_tests/netcnt.c
new file mode 100644
index 000000000000..c3333edd029f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netcnt.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "netcnt_prog.skel.h"
+#include "netcnt_common.h"
+
+#define CG_NAME "/netcnt"
+
+void serial_test_netcnt(void)
+{
+ union percpu_net_cnt *percpu_netcnt = NULL;
+ struct bpf_cgroup_storage_key key;
+ int map_fd, percpu_map_fd;
+ struct netcnt_prog *skel;
+ unsigned long packets;
+ union net_cnt netcnt;
+ unsigned long bytes;
+ int cpu, nproc;
+ int cg_fd = -1;
+ char cmd[128];
+
+ skel = netcnt_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "netcnt_prog__open_and_load"))
+ return;
+
+ nproc = bpf_num_possible_cpus();
+ percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
+ if (!ASSERT_OK_PTR(percpu_netcnt, "malloc(percpu_netcnt)"))
+ goto err;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup"))
+ goto err;
+
+ skel->links.bpf_nextcnt = bpf_program__attach_cgroup(skel->progs.bpf_nextcnt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_nextcnt,
+ "attach_cgroup(bpf_nextcnt)"))
+ goto err;
+
+ snprintf(cmd, sizeof(cmd), "%s ::1 -A -c 10000 -q > /dev/null", ping_command(AF_INET6));
+ ASSERT_OK(system(cmd), cmd);
+
+ map_fd = bpf_map__fd(skel->maps.netcnt);
+ if (!ASSERT_OK(bpf_map_get_next_key(map_fd, NULL, &key), "bpf_map_get_next_key"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(map_fd, &key, &netcnt), "bpf_map_lookup_elem(netcnt)"))
+ goto err;
+
+ percpu_map_fd = bpf_map__fd(skel->maps.percpu_netcnt);
+ if (!ASSERT_OK(bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0]),
+ "bpf_map_lookup_elem(percpu_netcnt)"))
+ goto err;
+
+ /* Some packets can be still in per-cpu cache, but not more than
+ * MAX_PERCPU_PACKETS.
+ */
+ packets = netcnt.packets;
+ bytes = netcnt.bytes;
+ for (cpu = 0; cpu < nproc; cpu++) {
+ ASSERT_LE(percpu_netcnt[cpu].packets, MAX_PERCPU_PACKETS, "MAX_PERCPU_PACKETS");
+
+ packets += percpu_netcnt[cpu].packets;
+ bytes += percpu_netcnt[cpu].bytes;
+ }
+
+ /* No packets should be lost */
+ ASSERT_GE(packets, 10000, "packets");
+
+ /* Let's check that bytes counter matches the number of packets
+ * multiplied by the size of ipv6 ICMP packet.
+ */
+ ASSERT_GE(bytes, packets * 104, "bytes");
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ free(percpu_netcnt);
+ netcnt_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
new file mode 100644
index 000000000000..4297a2a4cb11
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netfilter_link_attach.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <netinet/in.h>
+#include <linux/netfilter.h>
+
+#include "test_progs.h"
+#include "test_netfilter_link_attach.skel.h"
+
+struct nf_link_test {
+ __u32 pf;
+ __u32 hooknum;
+ __s32 priority;
+ __u32 flags;
+
+ bool expect_success;
+ const char * const name;
+};
+
+static const struct nf_link_test nf_hook_link_tests[] = {
+ { .name = "allzero", },
+ { .pf = NFPROTO_NUMPROTO, .name = "invalid-pf", },
+ { .pf = NFPROTO_IPV4, .hooknum = 42, .name = "invalid-hooknum", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MIN, .name = "invalid-priority-min", },
+ { .pf = NFPROTO_IPV4, .priority = INT_MAX, .name = "invalid-priority-max", },
+ { .pf = NFPROTO_IPV4, .flags = UINT_MAX, .name = "invalid-flags", },
+
+ { .pf = NFPROTO_INET, .priority = 1, .name = "invalid-inet-not-supported", },
+
+ { .pf = NFPROTO_IPV4, .priority = -10000, .expect_success = true, .name = "attach ipv4", },
+ { .pf = NFPROTO_IPV6, .priority = 10001, .expect_success = true, .name = "attach ipv6", },
+};
+
+void test_netfilter_link_attach(void)
+{
+ struct test_netfilter_link_attach *skel;
+ struct bpf_program *prog;
+ LIBBPF_OPTS(bpf_netfilter_opts, opts);
+ int i;
+
+ skel = test_netfilter_link_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_netfilter_link_attach__open_and_load"))
+ goto out;
+
+ prog = skel->progs.nf_link_attach_test;
+ if (!ASSERT_OK_PTR(prog, "attach program"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(nf_hook_link_tests); i++) {
+ struct bpf_link *link;
+
+ if (!test__start_subtest(nf_hook_link_tests[i].name))
+ continue;
+
+#define X(opts, m, i) opts.m = nf_hook_link_tests[(i)].m
+ X(opts, pf, i);
+ X(opts, hooknum, i);
+ X(opts, priority, i);
+ X(opts, flags, i);
+#undef X
+ link = bpf_program__attach_netfilter(prog, &opts);
+ if (nf_hook_link_tests[i].expect_success) {
+ struct bpf_link *link2;
+
+ if (!ASSERT_OK_PTR(link, "program attach successful"))
+ continue;
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ ASSERT_ERR_PTR(link2, "attach program with same pf/hook/priority");
+
+ if (!ASSERT_OK(bpf_link__destroy(link), "link destroy"))
+ break;
+
+ link2 = bpf_program__attach_netfilter(prog, &opts);
+ if (!ASSERT_OK_PTR(link2, "program reattach successful"))
+ continue;
+ if (!ASSERT_OK(bpf_link__destroy(link2), "link destroy"))
+ break;
+ } else {
+ ASSERT_ERR_PTR(link, "program load failure");
+ }
+ }
+
+out:
+ test_netfilter_link_attach__destroy(skel);
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/netns_cookie.c b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
new file mode 100644
index 000000000000..71d8f3ba7d6b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/netns_cookie.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "netns_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+static int duration;
+
+void test_netns_cookie(void)
+{
+ int server_fd = -1, client_fd = -1, cgroup_fd = -1;
+ int err, val, ret, map, verdict;
+ struct netns_cookie_prog *skel;
+ uint64_t cookie_expected_value;
+ socklen_t vallen = sizeof(cookie_expected_value);
+ static const char send_msg[] = "message";
+
+ skel = netns_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/netns_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto done;
+
+ skel->links.get_netns_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.get_netns_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.get_netns_cookie_sockops, "prog_attach"))
+ goto done;
+
+ verdict = bpf_program__fd(skel->progs.get_netns_cookie_sk_msg);
+ map = bpf_map__fd(skel->maps.sock_map);
+ err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+ if (!ASSERT_OK(err, "prog_attach"))
+ goto done;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto done;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto done;
+
+ ret = send(client_fd, send_msg, sizeof(send_msg), 0);
+ if (CHECK(ret != sizeof(send_msg), "send(msg)", "ret:%d\n", ret))
+ goto done;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sockops_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sockops_netns_cookies)"))
+ goto done;
+
+ err = getsockopt(client_fd, SOL_SOCKET, SO_NETNS_COOKIE,
+ &cookie_expected_value, &vallen);
+ if (!ASSERT_OK(err, "getsockopt"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_msg_netns_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(sk_msg_netns_cookies)"))
+ goto done;
+
+ ASSERT_EQ(val, cookie_expected_value, "cookie_value");
+
+done:
+ if (server_fd != -1)
+ close(server_fd);
+ if (client_fd != -1)
+ close(client_fd);
+ if (cgroup_fd != -1)
+ close(cgroup_fd);
+ netns_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index e74dc501b27f..24d493482ffc 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -1,85 +1,88 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
+
+#define _GNU_SOURCE
#include <test_progs.h>
+#include "test_ns_current_pid_tgid.skel.h"
#include <sys/stat.h>
#include <sys/types.h>
#include <unistd.h>
#include <sys/syscall.h>
+#include <sched.h>
+#include <sys/wait.h>
+#include <sys/mount.h>
+#include <sys/fcntl.h>
-struct bss {
- __u64 dev;
- __u64 ino;
- __u64 pid_tgid;
- __u64 user_pid_tgid;
-};
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
-void test_ns_current_pid_tgid(void)
+static int test_current_pid_tgid(void *args)
{
- const char *probe_name = "raw_tracepoint/sys_enter";
- const char *file = "test_ns_current_pid_tgid.o";
- int err, key = 0, duration = 0;
- struct bpf_link *link = NULL;
- struct bpf_program *prog;
- struct bpf_map *bss_map;
- struct bpf_object *obj;
- struct bss bss;
+ struct test_ns_current_pid_tgid__bss *bss;
+ struct test_ns_current_pid_tgid *skel;
+ int err = -1, duration = 0;
+ pid_t tgid, pid;
struct stat st;
- __u64 id;
-
- obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
- return;
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
+ skel = test_ns_current_pid_tgid__open_and_load();
+ if (CHECK(!skel, "skel_open_load", "failed to load skeleton\n"))
goto cleanup;
- bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
- if (CHECK(!bss_map, "find_bss_map", "failed\n"))
+ pid = syscall(SYS_gettid);
+ tgid = getpid();
+
+ err = stat("/proc/self/ns/pid", &st);
+ if (CHECK(err, "stat", "failed /proc/self/ns/pid: %d\n", err))
goto cleanup;
- prog = bpf_object__find_program_by_title(obj, probe_name);
- if (CHECK(!prog, "find_prog", "prog '%s' not found\n",
- probe_name))
+ bss = skel->bss;
+ bss->dev = st.st_dev;
+ bss->ino = st.st_ino;
+ bss->user_pid = 0;
+ bss->user_tgid = 0;
+
+ err = test_ns_current_pid_tgid__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
- memset(&bss, 0, sizeof(bss));
- pid_t tid = syscall(SYS_gettid);
- pid_t pid = getpid();
+ /* trigger tracepoint */
+ usleep(1);
+ ASSERT_EQ(bss->user_pid, pid, "pid");
+ ASSERT_EQ(bss->user_tgid, tgid, "tgid");
+ err = 0;
- id = (__u64) tid << 32 | pid;
- bss.user_pid_tgid = id;
+cleanup:
+ test_ns_current_pid_tgid__destroy(skel);
- if (CHECK_FAIL(stat("/proc/self/ns/pid", &st))) {
- perror("Failed to stat /proc/self/ns/pid");
- goto cleanup;
- }
+ return err;
+}
- bss.dev = st.st_dev;
- bss.ino = st.st_ino;
+static void test_ns_current_pid_tgid_new_ns(void)
+{
+ int wstatus, duration = 0;
+ pid_t cpid;
- err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
- if (CHECK(err, "setting_bss", "failed to set bss : %d\n", err))
- goto cleanup;
+ /* Create a process in a new namespace, this process
+ * will be the init process of this new namespace hence will be pid 1.
+ */
+ cpid = clone(test_current_pid_tgid, child_stack + STACK_SIZE,
+ CLONE_NEWPID | SIGCHLD, NULL);
- link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
+ if (CHECK(cpid == -1, "clone", "%s\n", strerror(errno)))
+ return;
- /* trigger some syscalls */
- usleep(1);
+ if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", "%s\n", strerror(errno)))
+ return;
- err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
- if (CHECK(err, "set_bss", "failed to get bss : %d\n", err))
- goto cleanup;
+ if (CHECK(WEXITSTATUS(wstatus) != 0, "newns_pidtgid", "failed"))
+ return;
+}
- if (CHECK(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
- "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
- goto cleanup;
-cleanup:
- bpf_link__destroy(link);
- bpf_object__close(obj);
+/* TODO: use a different tracepoint */
+void serial_test_ns_current_pid_tgid(void)
+{
+ if (test__start_subtest("ns_current_pid_tgid_root_ns"))
+ test_current_pid_tgid(NULL);
+ if (test__start_subtest("ns_current_pid_tgid_new_ns"))
+ test_ns_current_pid_tgid_new_ns();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/obj_name.c b/tools/testing/selftests/bpf/prog_tests/obj_name.c
index e178416bddad..7093edca6e08 100644
--- a/tools/testing/selftests/bpf/prog_tests/obj_name.c
+++ b/tools/testing/selftests/bpf/prog_tests/obj_name.c
@@ -20,7 +20,7 @@ void test_obj_name(void)
__u32 duration = 0;
int i;
- for (i = 0; i < sizeof(tests) / sizeof(tests[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
size_t name_len = strlen(tests[i].name) + 1;
union bpf_attr attr;
size_t ncopy;
@@ -38,13 +38,13 @@ void test_obj_name(void)
fd = syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-prog-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
/* test different attr.map_name during BPF_MAP_CREATE */
@@ -59,13 +59,13 @@ void test_obj_name(void)
memcpy(attr.map_name, tests[i].name, ncopy);
fd = syscall(__NR_bpf, BPF_MAP_CREATE, &attr, sizeof(attr));
CHECK((tests[i].success && fd < 0) ||
- (!tests[i].success && fd != -1) ||
+ (!tests[i].success && fd >= 0) ||
(!tests[i].success && errno != tests[i].expected_errno),
"check-bpf-map-name",
"fd %d(%d) errno %d(%d)\n",
fd, tests[i].success, errno, tests[i].expected_errno);
- if (fd != -1)
+ if (fd >= 0)
close(fd);
}
}
diff --git a/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..daa952711d8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/parse_tcp_hdr_opt.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_parse_tcp_hdr_opt.skel.h"
+#include "test_parse_tcp_hdr_opt_dynptr.skel.h"
+#include "test_tcp_hdr_options.h"
+
+struct test_pkt {
+ struct ipv6_packet pk6_v6;
+ u8 options[16];
+} __packed;
+
+struct test_pkt pkt = {
+ .pk6_v6.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .pk6_v6.iph.nexthdr = IPPROTO_TCP,
+ .pk6_v6.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+ .pk6_v6.tcp.urg_ptr = 123,
+ .pk6_v6.tcp.doff = 9, /* 16 bytes of options */
+
+ .options = {
+ TCPOPT_MSS, 4, 0x05, 0xB4, TCPOPT_NOP, TCPOPT_NOP,
+ 0, 6, 0xBB, 0xBB, 0xBB, 0xBB, TCPOPT_EOL
+ },
+};
+
+static void test_parse_opt(void)
+{
+ struct test_parse_tcp_hdr_opt *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt__destroy(skel);
+}
+
+static void test_parse_opt_dynptr(void)
+{
+ struct test_parse_tcp_hdr_opt_dynptr *skel;
+ struct bpf_program *prog;
+ char buf[128];
+ int err;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt,
+ .data_size_in = sizeof(pkt),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 3,
+ );
+
+ skel = test_parse_tcp_hdr_opt_dynptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ pkt.options[6] = skel->rodata->tcp_hdr_opt_kind_tpr;
+ prog = skel->progs.xdp_ingress_v6;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv6 test_run retval");
+ ASSERT_EQ(skel->bss->server_id, 0xBBBBBBBB, "server id");
+
+ test_parse_tcp_hdr_opt_dynptr__destroy(skel);
+}
+
+void test_parse_tcp_hdr_opt(void)
+{
+ if (test__start_subtest("parse_tcp_hdr_opt"))
+ test_parse_opt();
+ if (test__start_subtest("parse_tcp_hdr_opt_dynptr"))
+ test_parse_opt_dynptr();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
new file mode 100644
index 000000000000..673d38395253
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/pe_preserve_elems.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "test_pe_preserve_elems.skel.h"
+
+static int duration;
+
+static void test_one_map(struct bpf_map *map, struct bpf_program *prog,
+ bool has_share_pe)
+{
+ int err, key = 0, pfd = -1, mfd = bpf_map__fd(map);
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct perf_event_attr attr = {
+ .size = sizeof(struct perf_event_attr),
+ .type = PERF_TYPE_SOFTWARE,
+ .config = PERF_COUNT_SW_CPU_CLOCK,
+ };
+
+ pfd = syscall(__NR_perf_event_open, &attr, 0 /* pid */,
+ -1 /* cpu 0 */, -1 /* group id */, 0 /* flags */);
+ if (CHECK(pfd < 0, "perf_event_open", "failed\n"))
+ return;
+
+ err = bpf_map_update_elem(mfd, &key, &pfd, BPF_ANY);
+ close(pfd);
+ if (CHECK(err < 0, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+ if (CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval))
+ return;
+
+ /* closing mfd, prog still holds a reference on map */
+ close(mfd);
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(prog), &opts);
+ if (CHECK(err < 0, "bpf_prog_test_run_opts", "failed\n"))
+ return;
+
+ if (has_share_pe) {
+ CHECK(opts.retval != 0, "bpf_perf_event_read_value",
+ "failed with %d\n", opts.retval);
+ } else {
+ CHECK(opts.retval != -ENOENT, "bpf_perf_event_read_value",
+ "should have failed with %d, but got %d\n", -ENOENT,
+ opts.retval);
+ }
+}
+
+void test_pe_preserve_elems(void)
+{
+ struct test_pe_preserve_elems *skel;
+
+ skel = test_pe_preserve_elems__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ test_one_map(skel->maps.array_1, skel->progs.read_array_1, false);
+ test_one_map(skel->maps.array_2, skel->progs.read_array_2, true);
+
+ test_pe_preserve_elems__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
new file mode 100644
index 000000000000..343da65864d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/percpu_alloc.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "percpu_alloc_array.skel.h"
+#include "percpu_alloc_cgrp_local_storage.skel.h"
+#include "percpu_alloc_fail.skel.h"
+
+static void test_array(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_array__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_1, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_2, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_3, true);
+ bpf_program__set_autoload(skel->progs.test_array_map_4, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map 1-4");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map 1-4");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_array_sleepable(void)
+{
+ struct percpu_alloc_array *skel;
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ skel = percpu_alloc_array__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc__open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.test_array_map_10, true);
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_array__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__load"))
+ goto out;
+
+ err = percpu_alloc_array__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_array__attach"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.test_array_map_10);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run array_map_10");
+ ASSERT_EQ(topts.retval, 0, "test_run array_map_10");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+out:
+ percpu_alloc_array__destroy(skel);
+}
+
+static void test_cgrp_local_storage(void)
+{
+ struct percpu_alloc_cgrp_local_storage *skel;
+ int err, cgroup_fd, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ cgroup_fd = test__join_cgroup("/percpu_alloc");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /percpu_alloc"))
+ return;
+
+ skel = percpu_alloc_cgrp_local_storage__open();
+ if (!ASSERT_OK_PTR(skel, "percpu_alloc_cgrp_local_storage__open"))
+ goto close_fd;
+
+ skel->bss->my_pid = getpid();
+ skel->rodata->nr_cpus = libbpf_num_possible_cpus();
+
+ err = percpu_alloc_cgrp_local_storage__load(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__load"))
+ goto destroy_skel;
+
+ err = percpu_alloc_cgrp_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "percpu_alloc_cgrp_local_storage__attach"))
+ goto destroy_skel;
+
+ prog_fd = bpf_program__fd(skel->progs.test_cgrp_local_storage_1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(topts.retval, 0, "test_run cgrp_local_storage 1-3");
+ ASSERT_EQ(skel->bss->cpu0_field_d, 2, "cpu0_field_d");
+ ASSERT_EQ(skel->bss->sum_field_c, 1, "sum_field_c");
+
+destroy_skel:
+ percpu_alloc_cgrp_local_storage__destroy(skel);
+close_fd:
+ close(cgroup_fd);
+}
+
+static void test_failure(void) {
+ RUN_TESTS(percpu_alloc_fail);
+}
+
+void test_percpu_alloc(void)
+{
+ if (test__start_subtest("array"))
+ test_array();
+ if (test__start_subtest("array_sleepable"))
+ test_array_sleepable();
+ if (test__start_subtest("cgrp_local_storage"))
+ test_cgrp_local_storage();
+ if (test__start_subtest("failure_tests"))
+ test_failure();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_branches.c b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
index e35c444902a7..bc24f83339d6 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_branches.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_branches.c
@@ -74,7 +74,7 @@ static void test_perf_branches_common(int perf_fd,
/* attach perf_event */
link = bpf_program__attach_perf_event(skel->progs.perf_branches, perf_fd);
- if (CHECK(IS_ERR(link), "attach_perf_event", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_perf_event"))
goto out_destroy_skel;
/* generate some branches on cpu 0 */
@@ -110,7 +110,7 @@ static void test_perf_branches_hw(void)
attr.type = PERF_TYPE_HARDWARE;
attr.config = PERF_COUNT_HW_CPU_CYCLES;
attr.freq = 1;
- attr.sample_freq = 4000;
+ attr.sample_freq = 1000;
attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
attr.branch_sample_type = PERF_SAMPLE_BRANCH_USER | PERF_SAMPLE_BRANCH_ANY;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
@@ -119,7 +119,7 @@ static void test_perf_branches_hw(void)
* Some setups don't support branch records (virtual machines, !x86),
* so skip test in this case.
*/
- if (pfd == -1) {
+ if (pfd < 0) {
if (errno == ENOENT || errno == EOPNOTSUPP) {
printf("%s:SKIP:no PERF_SAMPLE_BRANCH_STACK\n",
__func__);
@@ -151,7 +151,7 @@ static void test_perf_branches_no_hw(void)
attr.type = PERF_TYPE_SOFTWARE;
attr.config = PERF_COUNT_SW_CPU_CLOCK;
attr.freq = 1;
- attr.sample_freq = 4000;
+ attr.sample_freq = 1000;
pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
if (CHECK(pfd < 0, "perf_event_open", "err %d\n", pfd))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index a122ce3b360e..5fc2b3a0711e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -4,8 +4,11 @@
#include <sched.h>
#include <sys/socket.h>
#include <test_progs.h>
+#include "test_perf_buffer.skel.h"
#include "bpf/libbpf_internal.h"
+static int duration;
+
/* AddressSanitizer sometimes crashes due to data dereference below, due to
* this being mmap()'ed memory. Disable instrumentation with
* no_sanitize_address attribute
@@ -23,18 +26,31 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size)
CPU_SET(cpu, cpu_seen);
}
-void test_perf_buffer(void)
+int trigger_on_cpu(int cpu)
+{
+ cpu_set_t cpu_set;
+ int err;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(cpu, &cpu_set);
+
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n", cpu, err))
+ return err;
+
+ usleep(1);
+
+ return 0;
+}
+
+void serial_test_perf_buffer(void)
{
- int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0;
- const char *prog_name = "kprobe/sys_nanosleep";
- const char *file = "./test_perf_buffer.o";
- struct perf_buffer_opts pb_opts = {};
- struct bpf_map *perf_buf_map;
- cpu_set_t cpu_set, cpu_seen;
- struct bpf_program *prog;
- struct bpf_object *obj;
+ int err, on_len, nr_on_cpus = 0, nr_cpus, i, j;
+ int zero = 0, my_pid = getpid();
+ struct test_perf_buffer *skel;
+ cpu_set_t cpu_seen;
struct perf_buffer *pb;
- struct bpf_link *link;
+ int last_fd = -1, fd;
bool *online;
nr_cpus = libbpf_num_possible_cpus();
@@ -51,33 +67,27 @@ void test_perf_buffer(void)
nr_on_cpus++;
/* load program */
- err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd);
- if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) {
- obj = NULL;
+ skel = test_perf_buffer__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton open/load failed\n"))
goto out_close;
- }
- prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name))
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.my_pid_map), &zero, &my_pid, 0);
+ if (!ASSERT_OK(err, "my_pid_update"))
goto out_close;
- /* load map */
- perf_buf_map = bpf_object__find_map_by_name(obj, "perf_buf_map");
- if (CHECK(!perf_buf_map, "find_perf_buf_map", "not found\n"))
+ /* attach probe */
+ err = test_perf_buffer__attach(skel);
+ if (CHECK(err, "attach_kprobe", "err %d\n", err))
goto out_close;
- /* attach kprobe */
- link = bpf_program__attach_kprobe(prog, false /* retprobe */,
- SYS_NANOSLEEP_KPROBE_NAME);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ /* set up perf buffer */
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.perf_buf_map), 1,
+ on_sample, NULL, &cpu_seen, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out_close;
- /* set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &cpu_seen;
- pb = perf_buffer__new(bpf_map__fd(perf_buf_map), 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
- goto out_detach;
+ CHECK(perf_buffer__epoll_fd(pb) < 0, "epoll_fd",
+ "bad fd: %d\n", perf_buffer__epoll_fd(pb));
/* trigger kprobe on every CPU */
CPU_ZERO(&cpu_seen);
@@ -87,16 +97,8 @@ void test_perf_buffer(void)
continue;
}
- CPU_ZERO(&cpu_set);
- CPU_SET(i, &cpu_set);
-
- err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set),
- &cpu_set);
- if (err && CHECK(err, "set_affinity", "cpu #%d, err %d\n",
- i, err))
- goto out_detach;
-
- usleep(1);
+ if (trigger_on_cpu(i))
+ goto out_close;
}
/* read perf buffer */
@@ -108,11 +110,38 @@ void test_perf_buffer(void)
"expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen)))
goto out_free_pb;
+ if (CHECK(perf_buffer__buffer_cnt(pb) != nr_on_cpus, "buf_cnt",
+ "got %zu, expected %d\n", perf_buffer__buffer_cnt(pb), nr_on_cpus))
+ goto out_close;
+
+ for (i = 0, j = 0; i < nr_cpus; i++) {
+ if (i >= on_len || !online[i])
+ continue;
+
+ fd = perf_buffer__buffer_fd(pb, j);
+ CHECK(fd < 0 || last_fd == fd, "fd_check", "last fd %d == fd %d\n", last_fd, fd);
+ last_fd = fd;
+
+ err = perf_buffer__consume_buffer(pb, j);
+ if (CHECK(err, "drain_buf", "cpu %d, err %d\n", i, err))
+ goto out_close;
+
+ CPU_CLR(i, &cpu_seen);
+ if (trigger_on_cpu(i))
+ goto out_close;
+
+ err = perf_buffer__consume_buffer(pb, j);
+ if (CHECK(err, "consume_buf", "cpu %d, err %d\n", j, err))
+ goto out_close;
+
+ if (CHECK(!CPU_ISSET(i, &cpu_seen), "cpu_seen", "cpu %d not seen\n", i))
+ goto out_close;
+ j++;
+ }
+
out_free_pb:
perf_buffer__free(pb);
-out_detach:
- bpf_link__destroy(link);
out_close:
- bpf_object__close(obj);
+ test_perf_buffer__destroy(skel);
free(online);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
new file mode 100644
index 000000000000..f4aad35afae1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_event_stackmap.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "perf_event_stackmap.skel.h"
+
+#ifndef noinline
+#define noinline __attribute__((noinline))
+#endif
+
+noinline int func_1(void)
+{
+ static int val = 1;
+
+ val += 1;
+
+ usleep(100);
+ return val;
+}
+
+noinline int func_2(void)
+{
+ return func_1();
+}
+
+noinline int func_3(void)
+{
+ return func_2();
+}
+
+noinline int func_4(void)
+{
+ return func_3();
+}
+
+noinline int func_5(void)
+{
+ return func_4();
+}
+
+noinline int func_6(void)
+{
+ int i, val = 1;
+
+ for (i = 0; i < 100; i++)
+ val += func_5();
+
+ return val;
+}
+
+void test_perf_event_stackmap(void)
+{
+ struct perf_event_attr attr = {
+ /* .type = PERF_TYPE_SOFTWARE, */
+ .type = PERF_TYPE_HARDWARE,
+ .config = PERF_COUNT_HW_CPU_CYCLES,
+ .precise_ip = 2,
+ .sample_type = PERF_SAMPLE_IP | PERF_SAMPLE_BRANCH_STACK |
+ PERF_SAMPLE_CALLCHAIN,
+ .branch_sample_type = PERF_SAMPLE_BRANCH_USER |
+ PERF_SAMPLE_BRANCH_NO_FLAGS |
+ PERF_SAMPLE_BRANCH_NO_CYCLES |
+ PERF_SAMPLE_BRANCH_CALL_STACK,
+ .freq = 1,
+ .sample_freq = read_perf_max_sample_freq(),
+ .size = sizeof(struct perf_event_attr),
+ };
+ struct perf_event_stackmap *skel;
+ __u32 duration = 0;
+ cpu_set_t cpu_set;
+ int pmu_fd, err;
+
+ skel = perf_event_stackmap__open();
+
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
+ return;
+
+ err = perf_event_stackmap__load(skel);
+ if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
+ goto cleanup;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ if (CHECK(err, "set_affinity", "err %d, errno %d\n", err, errno))
+ goto cleanup;
+
+ pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */,
+ 0 /* cpu 0 */, -1 /* group id */,
+ 0 /* flags */);
+ if (pmu_fd < 0) {
+ printf("%s:SKIP:cpu doesn't support the event\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
+ pmu_fd);
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
+ close(pmu_fd);
+ goto cleanup;
+ }
+
+ /* create kernel and user stack traces for testing */
+ func_6();
+
+ CHECK(skel->data->stackid_kernel != 2, "get_stackid_kernel", "failed\n");
+ CHECK(skel->data->stackid_user != 2, "get_stackid_user", "failed\n");
+ CHECK(skel->data->stack_kernel != 2, "get_stack_kernel", "failed\n");
+ CHECK(skel->data->stack_user != 2, "get_stack_user", "failed\n");
+
+cleanup:
+ perf_event_stackmap__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/perf_link.c b/tools/testing/selftests/bpf/prog_tests/perf_link.c
new file mode 100644
index 000000000000..3a25f1c743a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/perf_link.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sched.h>
+#include <test_progs.h>
+#include "test_perf_link.skel.h"
+
+static void burn_cpu(void)
+{
+ volatile int j = 0;
+ cpu_set_t cpu_set;
+ int i, err;
+
+ /* generate some branches on cpu 0 */
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ err = pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+ ASSERT_OK(err, "set_thread_affinity");
+
+ /* spin the loop for a while (random high number) */
+ for (i = 0; i < 1000000; ++i)
+ ++j;
+}
+
+/* TODO: often fails in concurrent mode */
+void serial_test_perf_link(void)
+{
+ struct test_perf_link *skel = NULL;
+ struct perf_event_attr attr;
+ int pfd = -1, link_fd = -1, err;
+ int run_cnt_before, run_cnt_after;
+ struct bpf_link_info info;
+ __u32 info_len = sizeof(info);
+
+ /* create perf event */
+ memset(&attr, 0, sizeof(attr));
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ pfd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(pfd, 0, "perf_fd"))
+ goto cleanup;
+
+ skel = test_perf_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handler), pfd,
+ BPF_PERF_EVENT, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto cleanup;
+
+ memset(&info, 0, sizeof(info));
+ err = bpf_link_get_info_by_fd(link_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "link_get_info"))
+ goto cleanup;
+
+ ASSERT_EQ(info.type, BPF_LINK_TYPE_PERF_EVENT, "link_type");
+ ASSERT_GT(info.id, 0, "link_id");
+ ASSERT_GT(info.prog_id, 0, "link_prog_id");
+
+ /* ensure we get at least one perf_event prog execution */
+ burn_cpu();
+ ASSERT_GT(skel->bss->run_cnt, 0, "run_cnt");
+
+ /* perf_event is still active, but we close link and BPF program
+ * shouldn't be executed anymore
+ */
+ close(link_fd);
+ link_fd = -1;
+
+ /* make sure there are no stragglers */
+ kern_sync_rcu();
+
+ run_cnt_before = skel->bss->run_cnt;
+ burn_cpu();
+ run_cnt_after = skel->bss->run_cnt;
+
+ ASSERT_EQ(run_cnt_before, run_cnt_after, "run_cnt_before_after");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ if (pfd >= 0)
+ close(pfd);
+ test_perf_link__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/pinning.c b/tools/testing/selftests/bpf/prog_tests/pinning.c
index 041952524c55..c799a3c5ad1f 100644
--- a/tools/testing/selftests/bpf/prog_tests/pinning.c
+++ b/tools/testing/selftests/bpf/prog_tests/pinning.c
@@ -18,7 +18,7 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
if (CHECK(!map, "find map", "NULL map"))
return 0;
- err = bpf_obj_get_info_by_fd(bpf_map__fd(map),
+ err = bpf_map_get_info_by_fd(bpf_map__fd(map),
&map_info, &map_info_len);
CHECK(err, "get map info", "err %d errno %d", err, errno);
return map_info.id;
@@ -26,18 +26,18 @@ __u32 get_map_id(struct bpf_object *obj, const char *name)
void test_pinning(void)
{
- const char *file_invalid = "./test_pinning_invalid.o";
+ const char *file_invalid = "./test_pinning_invalid.bpf.o";
const char *custpinpath = "/sys/fs/bpf/custom/pinmap";
const char *nopinpath = "/sys/fs/bpf/nopinmap";
const char *nopinpath2 = "/sys/fs/bpf/nopinmap2";
const char *custpath = "/sys/fs/bpf/custom";
const char *pinpath = "/sys/fs/bpf/pinmap";
- const char *file = "./test_pinning.o";
+ const char *file = "./test_pinning.bpf.o";
__u32 map_id, map_id2, duration = 0;
struct stat statbuf = {};
struct bpf_object *obj;
struct bpf_map *map;
- int err;
+ int err, map_fd;
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts,
.pin_root_path = custpath,
);
@@ -125,6 +125,10 @@ void test_pinning(void)
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), pinpath, "get pin path"))
+ goto out;
+
/* set pinning path of other map and re-pin all */
map = bpf_object__find_map_by_name(obj, "nopinmap");
if (CHECK(!map, "find map", "NULL map"))
@@ -134,6 +138,11 @@ void test_pinning(void)
if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
goto out;
+ /* get pinning path after set */
+ if (!ASSERT_STREQ(bpf_map__pin_path(map), custpinpath,
+ "get pin path after set"))
+ goto out;
+
/* should only pin the one unpinned map */
err = bpf_object__pin_maps(obj, NULL);
if (CHECK(err, "pin maps", "err %d errno %d\n", err, errno))
@@ -213,6 +222,53 @@ void test_pinning(void)
if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
goto out;
+ /* remove the custom pin path to re-test it with reuse fd below */
+ err = unlink(custpinpath);
+ if (CHECK(err, "unlink custpinpath", "err %d errno %d\n", err, errno))
+ goto out;
+
+ err = rmdir(custpath);
+ if (CHECK(err, "rmdir custpindir", "err %d errno %d\n", err, errno))
+ goto out;
+
+ bpf_object__close(obj);
+
+ /* test pinning at custom path with reuse fd */
+ obj = bpf_object__open_file(file, NULL);
+ err = libbpf_get_error(obj);
+ if (CHECK(err, "default open", "err %d errno %d\n", err, errno)) {
+ obj = NULL;
+ goto out;
+ }
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(__u32),
+ sizeof(__u64), 1, NULL);
+ if (CHECK(map_fd < 0, "create pinmap manually", "fd %d\n", map_fd))
+ goto out;
+
+ map = bpf_object__find_map_by_name(obj, "pinmap");
+ if (CHECK(!map, "find map", "NULL map"))
+ goto close_map_fd;
+
+ err = bpf_map__reuse_fd(map, map_fd);
+ if (CHECK(err, "reuse pinmap fd", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_map__set_pin_path(map, custpinpath);
+ if (CHECK(err, "set pin path", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ err = bpf_object__load(obj);
+ if (CHECK(err, "custom load", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+ /* check that pinmap was pinned at the custom path */
+ err = stat(custpinpath, &statbuf);
+ if (CHECK(err, "stat custpinpath", "err %d errno %d\n", err, errno))
+ goto close_map_fd;
+
+close_map_fd:
+ close(map_fd);
out:
unlink(pinpath);
unlink(nopinpath);
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index 44b514fabccd..682e4ff45b01 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -4,25 +4,29 @@
void test_pkt_access(void)
{
- const char *file = "./test_pkt_access.o";
+ const char *file = "./test_pkt_access.bpf.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 100000,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv4",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv4 test_run_opts retval");
+
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = 0; /* reset from last call */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6 test_run_opts err");
+ ASSERT_OK(topts.retval, "ipv6 test_run_opts retval");
- err = bpf_prog_test_run(prog_fd, 100000, &pkt_v6, sizeof(pkt_v6),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "ipv6",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 939015cd6dba..0d85e0642811 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -4,20 +4,22 @@
void test_pkt_md_access(void)
{
- const char *file = "./test_pkt_md_access.o";
+ const char *file = "./test_pkt_md_access.bpf.o";
struct bpf_object *obj;
- __u32 duration, retval;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 10, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..3a2ec3923fca
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/preempted_bpf_ma_op.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <test_progs.h>
+
+#include "preempted_bpf_ma_op.skel.h"
+
+#define ALLOC_THREAD_NR 4
+#define ALLOC_LOOP_NR 512
+
+struct alloc_ctx {
+ /* output */
+ int run_err;
+ /* input */
+ int fd;
+ bool *nomem_err;
+};
+
+static void *run_alloc_prog(void *data)
+{
+ struct alloc_ctx *ctx = data;
+ cpu_set_t cpu_set;
+ int i;
+
+ CPU_ZERO(&cpu_set);
+ CPU_SET(0, &cpu_set);
+ pthread_setaffinity_np(pthread_self(), sizeof(cpu_set), &cpu_set);
+
+ for (i = 0; i < ALLOC_LOOP_NR && !*ctx->nomem_err; i++) {
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ int err;
+
+ err = bpf_prog_test_run_opts(ctx->fd, &topts);
+ ctx->run_err |= err | topts.retval;
+ }
+
+ return NULL;
+}
+
+void test_preempted_bpf_ma_op(void)
+{
+ struct alloc_ctx ctx[ALLOC_THREAD_NR];
+ struct preempted_bpf_ma_op *skel;
+ pthread_t tid[ALLOC_THREAD_NR];
+ int i, err;
+
+ skel = preempted_bpf_ma_op__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ err = preempted_bpf_ma_op__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(ctx); i++) {
+ struct bpf_program *prog;
+ char name[8];
+
+ snprintf(name, sizeof(name), "test%d", i);
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "no test prog"))
+ goto out;
+
+ ctx[i].run_err = 0;
+ ctx[i].fd = bpf_program__fd(prog);
+ ctx[i].nomem_err = &skel->bss->nomem_err;
+ }
+
+ memset(tid, 0, sizeof(tid));
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ err = pthread_create(&tid[i], NULL, run_alloc_prog, &ctx[i]);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tid); i++) {
+ if (!tid[i])
+ break;
+ pthread_join(tid[i], NULL);
+ ASSERT_EQ(ctx[i].run_err, 0, "run prog err");
+ }
+
+ ASSERT_FALSE(skel->bss->nomem_err, "ENOMEM");
+out:
+ preempted_bpf_ma_op__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
new file mode 100644
index 000000000000..e419298132b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/probe_read_user_str.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_probe_read_user_str.skel.h"
+
+static const char str1[] = "mestring";
+static const char str2[] = "mestringalittlebigger";
+static const char str3[] = "mestringblubblubblubblubblub";
+
+static int test_one_str(struct test_probe_read_user_str *skel, const char *str,
+ size_t len)
+{
+ int err, duration = 0;
+ char buf[256];
+
+ /* Ensure bytes after string are ones */
+ memset(buf, 1, sizeof(buf));
+ memcpy(buf, str, len);
+
+ /* Give prog our userspace pointer */
+ skel->bss->user_ptr = buf;
+
+ /* Trigger tracepoint */
+ usleep(1);
+
+ /* Did helper fail? */
+ if (CHECK(skel->bss->ret < 0, "prog_ret", "prog returned: %ld\n",
+ skel->bss->ret))
+ return 1;
+
+ /* Check that string was copied correctly */
+ err = memcmp(skel->bss->buf, str, len);
+ if (CHECK(err, "memcmp", "prog copied wrong string"))
+ return 1;
+
+ /* Now check that no extra trailing bytes were copied */
+ memset(buf, 0, sizeof(buf));
+ err = memcmp(skel->bss->buf + len, buf, sizeof(buf) - len);
+ if (CHECK(err, "memcmp", "trailing bytes were not stripped"))
+ return 1;
+
+ return 0;
+}
+
+void test_probe_read_user_str(void)
+{
+ struct test_probe_read_user_str *skel;
+ int err, duration = 0;
+
+ skel = test_probe_read_user_str__open_and_load();
+ if (CHECK(!skel, "test_probe_read_user_str__open_and_load",
+ "skeleton open and load failed\n"))
+ return;
+
+ /* Give pid to bpf prog so it doesn't read from anyone else */
+ skel->bss->pid = getpid();
+
+ err = test_probe_read_user_str__attach(skel);
+ if (CHECK(err, "test_probe_read_user_str__attach",
+ "skeleton attach failed: %d\n", err))
+ goto out;
+
+ if (test_one_str(skel, str1, sizeof(str1)))
+ goto out;
+ if (test_one_str(skel, str2, sizeof(str2)))
+ goto out;
+ if (test_one_str(skel, str3, sizeof(str3)))
+ goto out;
+
+out:
+ test_probe_read_user_str__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c
index 7aecfd9e87d1..8721671321de 100644
--- a/tools/testing/selftests/bpf/prog_tests/probe_user.c
+++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c
@@ -1,27 +1,38 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-void test_probe_user(void)
+/* TODO: corrupts other tests uses connect() */
+void serial_test_probe_user(void)
{
- const char *prog_name = "kprobe/__sys_connect";
- const char *obj_file = "./test_probe_user.o";
+ static const char *const prog_names[] = {
+ "handle_sys_connect",
+#if defined(__s390x__)
+ "handle_sys_socketcall",
+#endif
+ };
+ enum { prog_count = ARRAY_SIZE(prog_names) };
+ const char *obj_file = "./test_probe_user.bpf.o";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, );
int err, results_map_fd, sock_fd, duration = 0;
struct sockaddr curr, orig, tmp;
struct sockaddr_in *in = (struct sockaddr_in *)&curr;
- struct bpf_link *kprobe_link = NULL;
- struct bpf_program *kprobe_prog;
+ struct bpf_link *kprobe_links[prog_count] = {};
+ struct bpf_program *kprobe_progs[prog_count];
struct bpf_object *obj;
static const int zero = 0;
+ size_t i;
obj = bpf_object__open_file(obj_file, &opts);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_title(obj, prog_name);
- if (CHECK(!kprobe_prog, "find_probe",
- "prog '%s' not found\n", prog_name))
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_progs[i] =
+ bpf_object__find_program_by_name(obj, prog_names[i]);
+ if (CHECK(!kprobe_progs[i], "find_probe",
+ "prog '%s' not found\n", prog_names[i]))
+ goto cleanup;
+ }
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
@@ -32,11 +43,10 @@ void test_probe_user(void)
"err %d\n", results_map_fd))
goto cleanup;
- kprobe_link = bpf_program__attach(kprobe_prog);
- if (CHECK(IS_ERR(kprobe_link), "attach_kprobe",
- "err %ld\n", PTR_ERR(kprobe_link))) {
- kprobe_link = NULL;
- goto cleanup;
+ for (i = 0; i < prog_count; i++) {
+ kprobe_links[i] = bpf_program__attach(kprobe_progs[i]);
+ if (!ASSERT_OK_PTR(kprobe_links[i], "attach_kprobe"))
+ goto cleanup;
}
memset(&curr, 0, sizeof(curr));
@@ -71,6 +81,7 @@ void test_probe_user(void)
inet_ntoa(in->sin_addr), ntohs(in->sin_port)))
goto cleanup;
cleanup:
- bpf_link__destroy(kprobe_link);
+ for (i = 0; i < prog_count; i++)
+ bpf_link__destroy(kprobe_links[i]);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_array_init.c b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
new file mode 100644
index 000000000000..fc4657619739
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_array_init.c
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_prog_array_init.skel.h"
+
+void test_prog_array_init(void)
+{
+ struct test_prog_array_init *skel;
+ int err;
+
+ skel = test_prog_array_init__open();
+ if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+ return;
+
+ skel->rodata->my_pid = getpid();
+
+ err = test_prog_array_init__load(skel);
+ if (!ASSERT_OK(err, "could not load BPF object"))
+ goto cleanup;
+
+ skel->links.entry = bpf_program__attach_raw_tracepoint(skel->progs.entry, "sys_enter");
+ if (!ASSERT_OK_PTR(skel->links.entry, "could not attach BPF program"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->value, 42, "unexpected value");
+
+cleanup:
+ test_prog_array_init__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
new file mode 100644
index 000000000000..01f1d1b6715a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_opts.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_pkt_access.skel.h"
+
+static const __u32 duration;
+
+static void check_run_cnt(int prog_fd, __u64 run_cnt)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (CHECK(err, "get_prog_info", "failed to get bpf_prog_info for fd %d\n", prog_fd))
+ return;
+
+ CHECK(run_cnt != info.run_cnt, "run_cnt",
+ "incorrect number of repetitions, want %llu have %llu\n", run_cnt, info.run_cnt);
+}
+
+void test_prog_run_opts(void)
+{
+ struct test_pkt_access *skel;
+ int err, stats_fd = -1, prog_fd;
+ char buf[10] = {};
+ __u64 run_cnt = 0;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .repeat = 1,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = 5,
+ );
+
+ stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (!ASSERT_GE(stats_fd, 0, "enable_stats good fd"))
+ return;
+
+ skel = test_pkt_access__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.test_pkt_access);
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_EQ(errno, ENOSPC, "test_run errno");
+ ASSERT_ERR(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4), "test_run data_size_out");
+ ASSERT_EQ(buf[5], 0, "overflow, BPF_PROG_TEST_RUN ignored size hint");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+ topts.data_out = NULL;
+ topts.data_size_out = 0;
+ topts.repeat = 2;
+ errno = 0;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(errno, "run_no_output errno");
+ ASSERT_OK(err, "run_no_output err");
+ ASSERT_OK(topts.retval, "run_no_output retval");
+
+ run_cnt += topts.repeat;
+ check_run_cnt(prog_fd, run_cnt);
+
+cleanup:
+ if (skel)
+ test_pkt_access__destroy(skel);
+ if (stats_fd >= 0)
+ close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
deleted file mode 100644
index dde2b7ae7bc9..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-void test_prog_run_xattr(void)
-{
- const char *file = "./test_pkt_access.o";
- struct bpf_object *obj;
- char buf[10];
- int err;
- struct bpf_prog_test_run_attr tattr = {
- .repeat = 1,
- .data_in = &pkt_v4,
- .data_size_in = sizeof(pkt_v4),
- .data_out = buf,
- .data_size_out = 5,
- };
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
- return;
-
- memset(buf, 0, sizeof(buf));
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -1 || errno != ENOSPC || tattr.retval, "run",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.data_size_out != sizeof(pkt_v4), "data_size_out",
- "incorrect output size, want %lu have %u\n",
- sizeof(pkt_v4), tattr.data_size_out);
-
- CHECK_ATTR(buf[5] != 0, "overflow",
- "BPF_PROG_TEST_RUN ignored size hint\n");
-
- tattr.data_out = NULL;
- tattr.data_size_out = 0;
- errno = 0;
-
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err || errno || tattr.retval, "run_no_output",
- "err %d errno %d retval %d\n", err, errno, tattr.retval);
-
- tattr.data_size_out = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != -EINVAL, "run_wrong_size_out", "err %d\n", err);
-
- bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
new file mode 100644
index 000000000000..14f2796076e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/prog_tests_framework.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+#include "test_progs.h"
+#include "testing_helpers.h"
+
+static void clear_test_state(struct test_state *state)
+{
+ state->error_cnt = 0;
+ state->sub_succ_cnt = 0;
+ state->skip_cnt = 0;
+}
+
+void test_prog_tests_framework(void)
+{
+ struct test_state *state = env.test_state;
+
+ /* in all the ASSERT calls below we need to return on the first
+ * error due to the fact that we are cleaning the test state after
+ * each dummy subtest
+ */
+
+ /* test we properly count skipped tests with subtests */
+ if (test__start_subtest("test_good_subtest"))
+ test__end_subtest();
+ if (!ASSERT_EQ(state->skip_cnt, 0, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->error_cnt, 0, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 1, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (test__start_subtest("test_skip_subtest")) {
+ test__skip();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->skip_cnt, 2, "skip_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 3, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+
+ if (test__start_subtest("test_fail_subtest")) {
+ test__fail();
+ test__end_subtest();
+ }
+ if (!ASSERT_EQ(state->error_cnt, 1, "error_cnt_check"))
+ return;
+ if (!ASSERT_EQ(state->subtest_num, 4, "subtest_num_check"))
+ return;
+ clear_test_state(state);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
new file mode 100644
index 000000000000..8d077d150c56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ptr_untrusted.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include <string.h>
+#include <linux/bpf.h>
+#include <test_progs.h>
+#include "test_ptr_untrusted.skel.h"
+
+#define TP_NAME "sched_switch"
+
+void serial_test_ptr_untrusted(void)
+{
+ struct test_ptr_untrusted *skel;
+ int err;
+
+ skel = test_ptr_untrusted__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ /* First, attach lsm prog */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "lsm_attach"))
+ goto cleanup;
+
+ /* Second, attach raw_tp prog. The lsm prog will be triggered. */
+ skel->links.raw_tp_run = bpf_program__attach_raw_tracepoint(skel->progs.raw_tp_run,
+ TP_NAME);
+ if (!ASSERT_OK_PTR(skel->links.raw_tp_run, "raw_tp_attach"))
+ goto cleanup;
+
+ err = strncmp(skel->bss->tp_name, TP_NAME, strlen(TP_NAME));
+ ASSERT_EQ(err, 0, "cmp_tp_name");
+
+cleanup:
+ test_ptr_untrusted__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index f47e7b1cb32c..a043af9cd6d9 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -10,24 +10,31 @@ enum {
static void test_queue_stack_map_by_type(int type)
{
const int MAP_SIZE = 32;
- __u32 vals[MAP_SIZE], duration, retval, size, val;
+ __u32 vals[MAP_SIZE], val;
int i, err, prog_fd, map_in_fd, map_out_fd;
char file[32], buf[128];
struct bpf_object *obj;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
+ struct iphdr iph = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
/* Fill test values to be used */
for (i = 0; i < MAP_SIZE; i++)
vals[i] = rand();
if (type == QUEUE)
- strncpy(file, "./test_queue_map.o", sizeof(file));
+ strncpy(file, "./test_queue_map.bpf.o", sizeof(file));
else if (type == STACK)
- strncpy(file, "./test_stack_map.o", sizeof(file));
+ strncpy(file, "./test_stack_map.bpf.o", sizeof(file));
else
return;
- err = bpf_prog_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -58,36 +65,37 @@ static void test_queue_stack_map_by_type(int type)
pkt_v4.iph.saddr = vals[MAP_SIZE - 1 - i] * 5;
}
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- if (err || retval || size != sizeof(pkt_v4) ||
- iph->daddr != val)
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (err || topts.retval ||
+ topts.data_size_out != sizeof(pkt_v4))
+ break;
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
+ if (iph.daddr != val)
break;
}
- CHECK(err || retval || size != sizeof(pkt_v4) || iph->daddr != val,
- "bpf_map_pop_elem",
- "err %d errno %d retval %d size %d iph->daddr %u\n",
- err, errno, retval, size, iph->daddr);
+ ASSERT_OK(err, "bpf_map_pop_elem");
+ ASSERT_OK(topts.retval, "bpf_map_pop_elem test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "bpf_map_pop_elem data_size_out");
+ ASSERT_EQ(iph.daddr, val, "bpf_map_pop_elem iph.daddr");
/* Queue is empty, program should return TC_ACT_SHOT */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 2 /* TC_ACT_SHOT */|| size != sizeof(pkt_v4),
- "check-queue-stack-map-empty",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "check-queue-stack-map-empty");
+ ASSERT_EQ(topts.retval, 2 /* TC_ACT_SHOT */,
+ "check-queue-stack-map-empty test retval");
+ ASSERT_EQ(topts.data_size_out, sizeof(pkt_v4),
+ "check-queue-stack-map-empty data_size_out");
/* Check that the program pushed elements correctly */
for (i = 0; i < MAP_SIZE; i++) {
err = bpf_map_lookup_and_delete_elem(map_out_fd, NULL, &val);
- if (err || val != vals[i] * 5)
- break;
+ ASSERT_OK(err, "bpf_map_lookup_and_delete_elem");
+ ASSERT_EQ(val, vals[i] * 5, "bpf_map_push_elem val");
}
-
- CHECK(i != MAP_SIZE && (err || val != vals[i] * 5),
- "bpf_map_push_elem", "err %d value %u\n", err, val);
-
out:
pkt_v4.iph.saddr = 0;
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
new file mode 100644
index 000000000000..fe5b8fae2c36
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_test_run.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2019 Facebook */
+#include <test_progs.h>
+#include <linux/bpf.h>
+#include "bpf/libbpf_internal.h"
+#include "test_raw_tp_test_run.skel.h"
+
+void test_raw_tp_test_run(void)
+{
+ int comm_fd = -1, err, nr_online, i, prog_fd;
+ __u64 args[2] = {0x1234ULL, 0x5678ULL};
+ int expected_retval = 0x1234 + 0x5678;
+ struct test_raw_tp_test_run *skel;
+ char buf[] = "new_name";
+ bool *online = NULL;
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = args,
+ .ctx_size_in = sizeof(args),
+ .flags = BPF_F_TEST_RUN_ON_CPU,
+ );
+
+ err = parse_cpu_mask_file("/sys/devices/system/cpu/online", &online,
+ &nr_online);
+ if (!ASSERT_OK(err, "parse_cpu_mask_file"))
+ return;
+
+ skel = test_raw_tp_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ err = test_raw_tp_test_run__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ comm_fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+ if (!ASSERT_GE(comm_fd, 0, "open /proc/self/comm"))
+ goto cleanup;
+
+ err = write(comm_fd, buf, sizeof(buf));
+ ASSERT_GE(err, 0, "task rename");
+
+ ASSERT_NEQ(skel->bss->count, 0, "check_count");
+ ASSERT_EQ(skel->data->on_cpu, 0xffffffff, "check_on_cpu");
+
+ prog_fd = bpf_program__fd(skel->progs.rename);
+ opts.ctx_in = args;
+ opts.ctx_size_in = sizeof(__u64);
+
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_NEQ(err, 0, "test_run should fail for too small ctx");
+
+ opts.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
+
+ for (i = 0; i < nr_online; i++) {
+ if (!online[i])
+ continue;
+
+ opts.cpu = i;
+ opts.retval = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "test_run_opts");
+ ASSERT_EQ(skel->data->on_cpu, i, "check_on_cpu");
+ ASSERT_EQ(opts.retval, expected_retval, "check_retval");
+ }
+
+ /* invalid cpu ID should fail with ENXIO */
+ opts.cpu = 0xffffffff;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, ENXIO, "test_run_opts should fail with ENXIO");
+ ASSERT_ERR(err, "test_run_opts_fail");
+
+ /* non-zero cpu w/o BPF_F_TEST_RUN_ON_CPU should fail with EINVAL */
+ opts.cpu = 1;
+ opts.flags = 0;
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "test_run_opts should fail with EINVAL");
+ ASSERT_ERR(err, "test_run_opts_fail");
+
+cleanup:
+ close(comm_fd);
+ test_raw_tp_test_run__destroy(skel);
+ free(online);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
index 9807336a3016..e2f1445b0e10 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_reject_nbd_invalid.c
@@ -18,15 +18,15 @@ void test_raw_tp_writable_reject_nbd_invalid(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = program,
- .insns_cnt = sizeof(program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ program, sizeof(program) / sizeof(struct bpf_insn),
+ &opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable load",
"failed: %d errno %d\n", bpf_fd, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
index 5c45424cac5f..f4aa7dab4766 100644
--- a/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/raw_tp_writable_test_run.c
@@ -3,7 +3,8 @@
#include <test_progs.h>
#include <linux/nbd.h>
-void test_raw_tp_writable_test_run(void)
+/* NOTE: conflict with other tests. */
+void serial_test_raw_tp_writable_test_run(void)
{
__u32 duration = 0;
char error[4096];
@@ -16,15 +17,15 @@ void test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr load_attr = {
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .license = "GPL v2",
- .insns = trace_program,
- .insns_cnt = sizeof(trace_program) / sizeof(struct bpf_insn),
+ LIBBPF_OPTS(bpf_prog_load_opts, trace_opts,
.log_level = 2,
- };
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int bpf_fd = bpf_load_program_xattr(&load_attr, error, sizeof(error));
+ int bpf_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE, NULL, "GPL v2",
+ trace_program, sizeof(trace_program) / sizeof(struct bpf_insn),
+ &trace_opts);
if (CHECK(bpf_fd < 0, "bpf_raw_tracepoint_writable loaded",
"failed: %d errno %d\n", bpf_fd, errno))
return;
@@ -34,15 +35,14 @@ void test_raw_tp_writable_test_run(void)
BPF_EXIT_INSN(),
};
- struct bpf_load_program_attr skb_load_attr = {
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .license = "GPL v2",
- .insns = skb_program,
- .insns_cnt = sizeof(skb_program) / sizeof(struct bpf_insn),
- };
+ LIBBPF_OPTS(bpf_prog_load_opts, skb_opts,
+ .log_buf = error,
+ .log_size = sizeof(error),
+ );
- int filter_fd =
- bpf_load_program_xattr(&skb_load_attr, error, sizeof(error));
+ int filter_fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL v2",
+ skb_program, sizeof(skb_program) / sizeof(struct bpf_insn),
+ &skb_opts);
if (CHECK(filter_fd < 0, "test_program_loaded", "failed: %d errno %d\n",
filter_fd, errno))
goto out_bpffd;
@@ -56,21 +56,23 @@ void test_raw_tp_writable_test_run(void)
0,
};
- __u32 prog_ret;
- int err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0,
- 0, &prog_ret, 0);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = test_skb,
+ .data_size_in = sizeof(test_skb),
+ .repeat = 1,
+ );
+ int err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 42, "test_run",
"tracepoint did not modify return value\n");
- CHECK(prog_ret != 0, "test_run_ret",
+ CHECK(topts.retval != 0, "test_run_ret",
"socket_filter did not return 0\n");
close(tp_fd);
- err = bpf_prog_test_run(filter_fd, 1, test_skb, sizeof(test_skb), 0, 0,
- &prog_ret, 0);
+ err = bpf_prog_test_run_opts(filter_fd, &topts);
CHECK(err != 0, "test_run_notrace",
"test_run failed with %d errno %d\n", err, errno);
- CHECK(prog_ret != 0, "test_run_ret_notrace",
+ CHECK(topts.retval != 0, "test_run_ret_notrace",
"socket_filter did not return 0\n");
out_filterfd:
diff --git a/tools/testing/selftests/bpf/prog_tests/rbtree.c b/tools/testing/selftests/bpf/prog_tests/rbtree.c
new file mode 100644
index 000000000000..e9300c96607d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rbtree.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "rbtree.skel.h"
+#include "rbtree_fail.skel.h"
+#include "rbtree_btf_fail__wrong_node_type.skel.h"
+#include "rbtree_btf_fail__add_wrong_type.skel.h"
+
+static void test_rbtree_add_nodes(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_nodes), &opts);
+ ASSERT_OK(ret, "rbtree_add_nodes run");
+ ASSERT_OK(opts.retval, "rbtree_add_nodes retval");
+ ASSERT_EQ(skel->data->less_callback_ran, 1, "rbtree_add_nodes less_callback_ran");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_add_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_add_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_add_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_add_and_remove retval");
+ ASSERT_EQ(skel->data->removed_key, 5, "rbtree_add_and_remove first removed key");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_first_and_remove(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_first_and_remove), &opts);
+ ASSERT_OK(ret, "rbtree_first_and_remove");
+ ASSERT_OK(opts.retval, "rbtree_first_and_remove retval");
+ ASSERT_EQ(skel->data->first_data[0], 2, "rbtree_first_and_remove first rbtree_first()");
+ ASSERT_EQ(skel->data->removed_key, 1, "rbtree_first_and_remove first removed key");
+ ASSERT_EQ(skel->data->first_data[1], 4, "rbtree_first_and_remove second rbtree_first()");
+
+ rbtree__destroy(skel);
+}
+
+static void test_rbtree_api_release_aliasing(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct rbtree *skel;
+ int ret;
+
+ skel = rbtree__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "rbtree__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_api_release_aliasing), &opts);
+ ASSERT_OK(ret, "rbtree_api_release_aliasing");
+ ASSERT_OK(opts.retval, "rbtree_api_release_aliasing retval");
+ ASSERT_EQ(skel->data->first_data[0], 42, "rbtree_api_release_aliasing first rbtree_remove()");
+ ASSERT_EQ(skel->data->first_data[1], -1, "rbtree_api_release_aliasing second rbtree_remove()");
+
+ rbtree__destroy(skel);
+}
+
+void test_rbtree_success(void)
+{
+ if (test__start_subtest("rbtree_add_nodes"))
+ test_rbtree_add_nodes();
+ if (test__start_subtest("rbtree_add_and_remove"))
+ test_rbtree_add_and_remove();
+ if (test__start_subtest("rbtree_first_and_remove"))
+ test_rbtree_first_and_remove();
+ if (test__start_subtest("rbtree_api_release_aliasing"))
+ test_rbtree_api_release_aliasing();
+}
+
+#define BTF_FAIL_TEST(suffix) \
+void test_rbtree_btf_fail__##suffix(void) \
+{ \
+ struct rbtree_btf_fail__##suffix *skel; \
+ \
+ skel = rbtree_btf_fail__##suffix##__open_and_load(); \
+ if (!ASSERT_ERR_PTR(skel, \
+ "rbtree_btf_fail__" #suffix "__open_and_load unexpected success")) \
+ rbtree_btf_fail__##suffix##__destroy(skel); \
+}
+
+#define RUN_BTF_FAIL_TEST(suffix) \
+ if (test__start_subtest("rbtree_btf_fail__" #suffix)) \
+ test_rbtree_btf_fail__##suffix();
+
+BTF_FAIL_TEST(wrong_node_type);
+BTF_FAIL_TEST(add_wrong_type);
+
+void test_rbtree_btf_fail(void)
+{
+ RUN_BTF_FAIL_TEST(wrong_node_type);
+ RUN_BTF_FAIL_TEST(add_wrong_type);
+}
+
+void test_rbtree_fail(void)
+{
+ RUN_TESTS(rbtree_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
new file mode 100644
index 000000000000..a1f7e7378a64
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/rcu_read_lock.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates.*/
+
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "rcu_read_lock.skel.h"
+#include "cgroup_helpers.h"
+
+static unsigned long long cgroup_id;
+
+static void test_success(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.get_cgroup_id, true);
+ bpf_program__set_autoload(skel->progs.task_succ, true);
+ bpf_program__set_autoload(skel->progs.two_regions, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_1, true);
+ bpf_program__set_autoload(skel->progs.non_sleepable_2, true);
+ bpf_program__set_autoload(skel->progs.task_trusted_non_rcuptr, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_global_subprog, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_lock, true);
+ bpf_program__set_autoload(skel->progs.rcu_read_lock_subprog_unlock, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_getpgid);
+
+ ASSERT_EQ(skel->bss->task_storage_val, 2, "task_storage_val");
+ ASSERT_EQ(skel->bss->cgroup_id, cgroup_id, "cgroup_id");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static void test_rcuptr_acquire(void)
+{
+ struct rcu_read_lock *skel;
+ int err;
+
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ bpf_program__set_autoload(skel->progs.task_acquire, true);
+ err = rcu_read_lock__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ err = rcu_read_lock__attach(skel);
+ ASSERT_OK(err, "skel_attach");
+out:
+ rcu_read_lock__destroy(skel);
+}
+
+static const char * const inproper_region_tests[] = {
+ "miss_lock",
+ "no_lock",
+ "miss_unlock",
+ "non_sleepable_rcu_mismatch",
+ "inproper_sleepable_helper",
+ "inproper_sleepable_kfunc",
+ "nested_rcu_region",
+ "rcu_read_lock_global_subprog_lock",
+ "rcu_read_lock_global_subprog_unlock",
+};
+
+static void test_inproper_region(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(inproper_region_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, inproper_region_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+static const char * const rcuptr_misuse_tests[] = {
+ "task_untrusted_rcuptr",
+ "cross_rcu_region",
+};
+
+static void test_rcuptr_misuse(void)
+{
+ struct rcu_read_lock *skel;
+ struct bpf_program *prog;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(rcuptr_misuse_tests); i++) {
+ skel = rcu_read_lock__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, rcuptr_misuse_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = rcu_read_lock__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ rcu_read_lock__destroy(skel);
+ }
+}
+
+void test_rcu_read_lock(void)
+{
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/rcu_read_lock");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /rcu_read_lock"))
+ goto out;
+
+ cgroup_id = get_cgroup_id("/rcu_read_lock");
+ if (test__start_subtest("success"))
+ test_success();
+ if (test__start_subtest("rcuptr_acquire"))
+ test_rcuptr_acquire();
+ if (test__start_subtest("negative_tests_inproper_region"))
+ test_inproper_region();
+ if (test__start_subtest("negative_tests_rcuptr_misuse"))
+ test_rcuptr_misuse();
+ close(cgroup_fd);
+out:;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
index 563e12120e77..19e2f2526dbd 100644
--- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
+++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c
@@ -16,7 +16,7 @@ struct rdonly_map_subtest {
void test_rdonly_maps(void)
{
- const char *file = "test_rdonly_maps.o";
+ const char *file = "test_rdonly_maps.bpf.o";
struct rdonly_map_subtest subtests[] = {
{ "skip loop", "skip_loop", 0, 0 },
{ "part loop", "part_loop", 3, 2 + 3 + 4 },
@@ -30,14 +30,14 @@ void test_rdonly_maps(void)
struct bss bss;
obj = bpf_object__open_file(file, NULL);
- if (CHECK(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
+ if (!ASSERT_OK_PTR(obj, "obj_open"))
return;
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno))
goto cleanup;
- bss_map = bpf_object__find_map_by_name(obj, "test_rdo.bss");
+ bss_map = bpf_object__find_map_by_name(obj, ".bss");
if (CHECK(!bss_map, "find_bss_map", "failed\n"))
goto cleanup;
@@ -58,11 +58,8 @@ void test_rdonly_maps(void)
goto cleanup;
link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK(IS_ERR(link), "attach_prog", "prog '%s', err %ld\n",
- t->prog_name, PTR_ERR(link))) {
- link = NULL;
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- }
/* trigger probe */
usleep(1);
diff --git a/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
new file mode 100644
index 000000000000..3405923fe4e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/read_vsyscall.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include "test_progs.h"
+#include "read_vsyscall.skel.h"
+
+#if defined(__x86_64__)
+/* For VSYSCALL_ADDR */
+#include <asm/vsyscall.h>
+#else
+/* To prevent build failure on non-x86 arch */
+#define VSYSCALL_ADDR 0UL
+#endif
+
+struct read_ret_desc {
+ const char *name;
+ int ret;
+} all_read[] = {
+ { .name = "probe_read_kernel", .ret = -ERANGE },
+ { .name = "probe_read_kernel_str", .ret = -ERANGE },
+ { .name = "probe_read", .ret = -ERANGE },
+ { .name = "probe_read_str", .ret = -ERANGE },
+ { .name = "probe_read_user", .ret = -EFAULT },
+ { .name = "probe_read_user_str", .ret = -EFAULT },
+ { .name = "copy_from_user", .ret = -EFAULT },
+ { .name = "copy_from_user_task", .ret = -EFAULT },
+};
+
+void test_read_vsyscall(void)
+{
+ struct read_vsyscall *skel;
+ unsigned int i;
+ int err;
+
+#if !defined(__x86_64__)
+ test__skip();
+ return;
+#endif
+ skel = read_vsyscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "read_vsyscall open_load"))
+ return;
+
+ skel->bss->target_pid = getpid();
+ err = read_vsyscall__attach(skel);
+ if (!ASSERT_EQ(err, 0, "read_vsyscall attach"))
+ goto out;
+
+ /* userspace may don't have vsyscall page due to LEGACY_VSYSCALL_NONE,
+ * but it doesn't affect the returned error codes.
+ */
+ skel->bss->user_ptr = (void *)VSYSCALL_ADDR;
+ usleep(1);
+
+ for (i = 0; i < ARRAY_SIZE(all_read); i++)
+ ASSERT_EQ(skel->bss->read_ret[i], all_read[i].ret, all_read[i].name);
+out:
+ read_vsyscall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/recursion.c b/tools/testing/selftests/bpf/prog_tests/recursion.c
new file mode 100644
index 000000000000..23552d3e3365
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/recursion.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "recursion.skel.h"
+
+void test_recursion(void)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ struct recursion *skel;
+ int key = 0;
+ int err;
+
+ skel = recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ ASSERT_EQ(skel->bss->pass1, 0, "pass1 == 0");
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
+ ASSERT_EQ(skel->bss->pass1, 1, "pass1 == 1");
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash1), &key);
+ ASSERT_EQ(skel->bss->pass1, 2, "pass1 == 2");
+
+ ASSERT_EQ(skel->bss->pass2, 0, "pass2 == 0");
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
+ ASSERT_EQ(skel->bss->pass2, 1, "pass2 == 1");
+ bpf_map_delete_elem(bpf_map__fd(skel->maps.hash2), &key);
+ ASSERT_EQ(skel->bss->pass2, 2, "pass2 == 2");
+
+ err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.on_delete),
+ &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "get_prog_info"))
+ goto out;
+ ASSERT_EQ(prog_info.recursion_misses, 2, "recursion_misses");
+out:
+ recursion__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/recursive_attach.c b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
new file mode 100644
index 000000000000..8100509e561b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/recursive_attach.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <test_progs.h>
+#include "fentry_recursive.skel.h"
+#include "fentry_recursive_target.skel.h"
+#include <bpf/btf.h>
+#include "bpf/libbpf_internal.h"
+
+/* Test recursive attachment of tracing progs with more than one nesting level
+ * is not possible. Create a chain of attachment, verify that the last prog
+ * will fail. Depending on the arguments, following cases are tested:
+ *
+ * - Recursive loading of tracing progs, without attaching (attach = false,
+ * detach = false). The chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach of tracing progs (attach = true, detach = false). The
+ * chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * attach fentry1 -> target
+ * load fentry2 -> fentry1 (fail)
+ *
+ * - Recursive attach and detach of tracing progs (attach = true, detach =
+ * true). This validates that attach_tracing_prog flag will be set throughout
+ * the whole lifecycle of an fentry prog, independently from whether it's
+ * detached. The chain looks like this:
+ * load target
+ * load fentry1 -> target
+ * attach fentry1 -> target
+ * detach fentry1
+ * load fentry2 -> fentry1 (fail)
+ */
+static void test_recursive_fentry_chain(bool attach, bool detach)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_chain[2] = {};
+ struct bpf_program *prog;
+ int prev_fd, err;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+ return;
+
+ /* Create an attachment chain with two fentry progs */
+ for (int i = 0; i < 2; i++) {
+ tracing_chain[i] = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_chain[i], "fentry_recursive__open"))
+ goto close_prog;
+
+ /* The first prog in the chain is going to be attached to the target
+ * fentry program, the second one to the previous in the chain.
+ */
+ prog = tracing_chain[i]->progs.recursive_attach;
+ if (i == 0) {
+ prev_fd = bpf_program__fd(target_skel->progs.test1);
+ err = bpf_program__set_attach_target(prog, prev_fd, "test1");
+ } else {
+ prev_fd = bpf_program__fd(tracing_chain[i-1]->progs.recursive_attach);
+ err = bpf_program__set_attach_target(prog, prev_fd, "recursive_attach");
+ }
+
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ err = fentry_recursive__load(tracing_chain[i]);
+ /* The first attach should succeed, the second fail */
+ if (i == 0) {
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto close_prog;
+
+ if (attach) {
+ err = fentry_recursive__attach(tracing_chain[i]);
+ if (!ASSERT_OK(err, "fentry_recursive__attach"))
+ goto close_prog;
+ }
+
+ if (detach) {
+ /* Flag attach_tracing_prog should still be set, preventing
+ * attachment of the following prog.
+ */
+ fentry_recursive__detach(tracing_chain[i]);
+ }
+ } else {
+ if (!ASSERT_ERR(err, "fentry_recursive__load"))
+ goto close_prog;
+ }
+ }
+
+close_prog:
+ fentry_recursive_target__destroy(target_skel);
+ for (int i = 0; i < 2; i++) {
+ fentry_recursive__destroy(tracing_chain[i]);
+ }
+}
+
+void test_recursive_fentry(void)
+{
+ if (test__start_subtest("attach"))
+ test_recursive_fentry_chain(true, false);
+ if (test__start_subtest("load"))
+ test_recursive_fentry_chain(false, false);
+ if (test__start_subtest("detach"))
+ test_recursive_fentry_chain(true, true);
+}
+
+/* Test that a tracing prog reattachment (when we land in
+ * "prog->aux->dst_trampoline and tgt_prog is NULL" branch in
+ * bpf_tracing_prog_attach) does not lead to a crash due to missing attach_btf
+ */
+void test_fentry_attach_btf_presence(void)
+{
+ struct fentry_recursive_target *target_skel = NULL;
+ struct fentry_recursive *tracing_skel = NULL;
+ struct bpf_program *prog;
+ int err, link_fd, tgt_prog_fd;
+
+ target_skel = fentry_recursive_target__open_and_load();
+ if (!ASSERT_OK_PTR(target_skel, "fentry_recursive_target__open_and_load"))
+ goto close_prog;
+
+ tracing_skel = fentry_recursive__open();
+ if (!ASSERT_OK_PTR(tracing_skel, "fentry_recursive__open"))
+ goto close_prog;
+
+ prog = tracing_skel->progs.recursive_attach;
+ tgt_prog_fd = bpf_program__fd(target_skel->progs.fentry_target);
+ err = bpf_program__set_attach_target(prog, tgt_prog_fd, "fentry_target");
+ if (!ASSERT_OK(err, "bpf_program__set_attach_target"))
+ goto close_prog;
+
+ err = fentry_recursive__load(tracing_skel);
+ if (!ASSERT_OK(err, "fentry_recursive__load"))
+ goto close_prog;
+
+ tgt_prog_fd = bpf_program__fd(tracing_skel->progs.recursive_attach);
+ link_fd = bpf_link_create(tgt_prog_fd, 0, BPF_TRACE_FENTRY, NULL);
+ if (!ASSERT_GE(link_fd, 0, "link_fd"))
+ goto close_prog;
+
+ fentry_recursive__detach(tracing_skel);
+
+ err = fentry_recursive__attach(tracing_skel);
+ ASSERT_ERR(err, "fentry_recursive__attach");
+
+close_prog:
+ fentry_recursive_target__destroy(target_skel);
+ fentry_recursive__destroy(tracing_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
new file mode 100644
index 000000000000..d6bd5e16e637
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/refcounted_kptr.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "refcounted_kptr.skel.h"
+#include "refcounted_kptr_fail.skel.h"
+
+void test_refcounted_kptr(void)
+{
+ RUN_TESTS(refcounted_kptr);
+}
+
+void test_refcounted_kptr_fail(void)
+{
+ RUN_TESTS(refcounted_kptr_fail);
+}
+
+void test_refcounted_kptr_wrong_owner(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+ struct refcounted_kptr *skel;
+ int ret;
+
+ skel = refcounted_kptr__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "refcounted_kptr__open_and_load"))
+ return;
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a1), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a1");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a1 retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_b), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_b");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_b retval");
+
+ ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.rbtree_wrong_owner_remove_fail_a2), &opts);
+ ASSERT_OK(ret, "rbtree_wrong_owner_remove_fail_a2");
+ ASSERT_OK(opts.retval, "rbtree_wrong_owner_remove_fail_a2 retval");
+ refcounted_kptr__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
index fc0d7f4f02cf..d863205bbe95 100644
--- a/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
+++ b/tools/testing/selftests/bpf/prog_tests/reference_tracking.c
@@ -3,50 +3,61 @@
void test_reference_tracking(void)
{
- const char *file = "test_sk_lookup_kern.o";
+ const char *file = "test_sk_lookup_kern.bpf.o";
const char *obj_name = "ref_track";
DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts,
.object_name = obj_name,
.relaxed_maps = true,
);
- struct bpf_object *obj;
+ struct bpf_object *obj_iter, *obj = NULL;
struct bpf_program *prog;
__u32 duration = 0;
int err = 0;
- obj = bpf_object__open_file(file, &open_opts);
- if (CHECK_FAIL(IS_ERR(obj)))
+ obj_iter = bpf_object__open_file(file, &open_opts);
+ if (!ASSERT_OK_PTR(obj_iter, "obj_iter_open_file"))
return;
- if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name",
+ if (CHECK(strcmp(bpf_object__name(obj_iter), obj_name), "obj_name",
"wrong obj name '%s', expected '%s'\n",
- bpf_object__name(obj), obj_name))
+ bpf_object__name(obj_iter), obj_name))
goto cleanup;
- bpf_object__for_each_program(prog, obj) {
- const char *title;
+ bpf_object__for_each_program(prog, obj_iter) {
+ struct bpf_program *p;
+ const char *name;
- /* Ignore .text sections */
- title = bpf_program__title(prog, false);
- if (strstr(title, ".text") != NULL)
+ name = bpf_program__name(prog);
+ if (!test__start_subtest(name))
continue;
- if (!test__start_subtest(title))
- continue;
+ obj = bpf_object__open_file(file, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+ goto cleanup;
+
+ /* all programs are not loaded by default, so just set
+ * autoload to true for the single prog under test
+ */
+ p = bpf_object__find_program_by_name(obj, name);
+ bpf_program__set_autoload(p, true);
- /* Expect verifier failure if test name has 'fail' */
- if (strstr(title, "fail") != NULL) {
+ /* Expect verifier failure if test name has 'err' */
+ if (strncmp(name, "err_", sizeof("err_") - 1) == 0) {
libbpf_print_fn_t old_print_fn;
old_print_fn = libbpf_set_print(NULL);
- err = !bpf_program__load(prog, "GPL", 0);
+ err = !bpf_object__load(obj);
libbpf_set_print(old_print_fn);
} else {
- err = bpf_program__load(prog, "GPL", 0);
+ err = bpf_object__load(obj);
}
- CHECK(err, title, "\n");
+ ASSERT_OK(err, name);
+
+ bpf_object__close(obj);
+ obj = NULL;
}
cleanup:
bpf_object__close(obj);
+ bpf_object__close(obj_iter);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
new file mode 100644
index 000000000000..eb74363f9f70
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c
@@ -0,0 +1,2131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <limits.h>
+#include <test_progs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+/* =================================
+ * SHORT AND CONSISTENT NUMBER TYPES
+ * =================================
+ */
+#define U64_MAX ((u64)UINT64_MAX)
+#define U32_MAX ((u32)UINT_MAX)
+#define U16_MAX ((u32)UINT_MAX)
+#define S64_MIN ((s64)INT64_MIN)
+#define S64_MAX ((s64)INT64_MAX)
+#define S32_MIN ((s32)INT_MIN)
+#define S32_MAX ((s32)INT_MAX)
+#define S16_MIN ((s16)0x80000000)
+#define S16_MAX ((s16)0x7fffffff)
+
+typedef unsigned long long ___u64;
+typedef unsigned int ___u32;
+typedef long long ___s64;
+typedef int ___s32;
+
+/* avoid conflicts with already defined types in kernel headers */
+#define u64 ___u64
+#define u32 ___u32
+#define s64 ___s64
+#define s32 ___s32
+
+/* ==================================
+ * STRING BUF ABSTRACTION AND HELPERS
+ * ==================================
+ */
+struct strbuf {
+ size_t buf_sz;
+ int pos;
+ char buf[0];
+};
+
+#define DEFINE_STRBUF(name, N) \
+ struct { struct strbuf buf; char data[(N)]; } ___##name; \
+ struct strbuf *name = (___##name.buf.buf_sz = (N), ___##name.buf.pos = 0, &___##name.buf)
+
+__printf(2, 3)
+static inline void snappendf(struct strbuf *s, const char *fmt, ...)
+{
+ va_list args;
+
+ va_start(args, fmt);
+ s->pos += vsnprintf(s->buf + s->pos,
+ s->pos < s->buf_sz ? s->buf_sz - s->pos : 0,
+ fmt, args);
+ va_end(args);
+}
+
+/* ==================================
+ * GENERIC NUMBER TYPE AND OPERATIONS
+ * ==================================
+ */
+enum num_t { U64, first_t = U64, U32, S64, S32, last_t = S32 };
+
+static __always_inline u64 min_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x < (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x < (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x < (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x < (s32)y ? (s32)x : (s32)y;
+ default: printf("min_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 max_t(enum num_t t, u64 x, u64 y)
+{
+ switch (t) {
+ case U64: return (u64)x > (u64)y ? (u64)x : (u64)y;
+ case U32: return (u32)x > (u32)y ? (u32)x : (u32)y;
+ case S64: return (s64)x > (s64)y ? (s64)x : (s64)y;
+ case S32: return (s32)x > (s32)y ? (u32)(s32)x : (u32)(s32)y;
+ default: printf("max_t!\n"); exit(1);
+ }
+}
+
+static __always_inline u64 cast_t(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x;
+ case U32: return (u32)x;
+ case S64: return (s64)x;
+ case S32: return (u32)(s32)x;
+ default: printf("cast_t!\n"); exit(1);
+ }
+}
+
+static const char *t_str(enum num_t t)
+{
+ switch (t) {
+ case U64: return "u64";
+ case U32: return "u32";
+ case S64: return "s64";
+ case S32: return "s32";
+ default: printf("t_str!\n"); exit(1);
+ }
+}
+
+static enum num_t t_is_32(enum num_t t)
+{
+ switch (t) {
+ case U64: return false;
+ case U32: return true;
+ case S64: return false;
+ case S32: return true;
+ default: printf("t_is_32!\n"); exit(1);
+ }
+}
+
+static enum num_t t_signed(enum num_t t)
+{
+ switch (t) {
+ case U64: return S64;
+ case U32: return S32;
+ case S64: return S64;
+ case S32: return S32;
+ default: printf("t_signed!\n"); exit(1);
+ }
+}
+
+static enum num_t t_unsigned(enum num_t t)
+{
+ switch (t) {
+ case U64: return U64;
+ case U32: return U32;
+ case S64: return U64;
+ case S32: return U32;
+ default: printf("t_unsigned!\n"); exit(1);
+ }
+}
+
+#define UNUM_MAX_DECIMAL U16_MAX
+#define SNUM_MAX_DECIMAL S16_MAX
+#define SNUM_MIN_DECIMAL S16_MIN
+
+static bool num_is_small(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return (u64)x <= UNUM_MAX_DECIMAL;
+ case U32: return (u32)x <= UNUM_MAX_DECIMAL;
+ case S64: return (s64)x >= SNUM_MIN_DECIMAL && (s64)x <= SNUM_MAX_DECIMAL;
+ case S32: return (s32)x >= SNUM_MIN_DECIMAL && (s32)x <= SNUM_MAX_DECIMAL;
+ default: printf("num_is_small!\n"); exit(1);
+ }
+}
+
+static void snprintf_num(enum num_t t, struct strbuf *sb, u64 x)
+{
+ bool is_small = num_is_small(t, x);
+
+ if (is_small) {
+ switch (t) {
+ case U64: return snappendf(sb, "%llu", (u64)x);
+ case U32: return snappendf(sb, "%u", (u32)x);
+ case S64: return snappendf(sb, "%lld", (s64)x);
+ case S32: return snappendf(sb, "%d", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ } else {
+ switch (t) {
+ case U64:
+ if (x == U64_MAX)
+ return snappendf(sb, "U64_MAX");
+ else if (x >= U64_MAX - 256)
+ return snappendf(sb, "U64_MAX-%llu", U64_MAX - x);
+ else
+ return snappendf(sb, "%#llx", (u64)x);
+ case U32:
+ if ((u32)x == U32_MAX)
+ return snappendf(sb, "U32_MAX");
+ else if ((u32)x >= U32_MAX - 256)
+ return snappendf(sb, "U32_MAX-%u", U32_MAX - (u32)x);
+ else
+ return snappendf(sb, "%#x", (u32)x);
+ case S64:
+ if ((s64)x == S64_MAX)
+ return snappendf(sb, "S64_MAX");
+ else if ((s64)x >= S64_MAX - 256)
+ return snappendf(sb, "S64_MAX-%lld", S64_MAX - (s64)x);
+ else if ((s64)x == S64_MIN)
+ return snappendf(sb, "S64_MIN");
+ else if ((s64)x <= S64_MIN + 256)
+ return snappendf(sb, "S64_MIN+%lld", (s64)x - S64_MIN);
+ else
+ return snappendf(sb, "%#llx", (s64)x);
+ case S32:
+ if ((s32)x == S32_MAX)
+ return snappendf(sb, "S32_MAX");
+ else if ((s32)x >= S32_MAX - 256)
+ return snappendf(sb, "S32_MAX-%d", S32_MAX - (s32)x);
+ else if ((s32)x == S32_MIN)
+ return snappendf(sb, "S32_MIN");
+ else if ((s32)x <= S32_MIN + 256)
+ return snappendf(sb, "S32_MIN+%d", (s32)x - S32_MIN);
+ else
+ return snappendf(sb, "%#x", (s32)x);
+ default: printf("snprintf_num!\n"); exit(1);
+ }
+ }
+}
+
+/* ===================================
+ * GENERIC RANGE STRUCT AND OPERATIONS
+ * ===================================
+ */
+struct range {
+ u64 a, b;
+};
+
+static void snprintf_range(enum num_t t, struct strbuf *sb, struct range x)
+{
+ if (x.a == x.b)
+ return snprintf_num(t, sb, x.a);
+
+ snappendf(sb, "[");
+ snprintf_num(t, sb, x.a);
+ snappendf(sb, "; ");
+ snprintf_num(t, sb, x.b);
+ snappendf(sb, "]");
+}
+
+static void print_range(enum num_t t, struct range x, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 128);
+
+ snprintf_range(t, sb, x);
+ printf("%s%s", sb->buf, sfx);
+}
+
+static const struct range unkn[] = {
+ [U64] = { 0, U64_MAX },
+ [U32] = { 0, U32_MAX },
+ [S64] = { (u64)S64_MIN, (u64)S64_MAX },
+ [S32] = { (u64)(u32)S32_MIN, (u64)(u32)S32_MAX },
+};
+
+static struct range unkn_subreg(enum num_t t)
+{
+ switch (t) {
+ case U64: return unkn[U32];
+ case U32: return unkn[U32];
+ case S64: return unkn[U32];
+ case S32: return unkn[S32];
+ default: printf("unkn_subreg!\n"); exit(1);
+ }
+}
+
+static struct range range(enum num_t t, u64 a, u64 b)
+{
+ switch (t) {
+ case U64: return (struct range){ (u64)a, (u64)b };
+ case U32: return (struct range){ (u32)a, (u32)b };
+ case S64: return (struct range){ (s64)a, (s64)b };
+ case S32: return (struct range){ (u32)(s32)a, (u32)(s32)b };
+ default: printf("range!\n"); exit(1);
+ }
+}
+
+static __always_inline u32 sign64(u64 x) { return (x >> 63) & 1; }
+static __always_inline u32 sign32(u64 x) { return ((u32)x >> 31) & 1; }
+static __always_inline u32 upper32(u64 x) { return (u32)(x >> 32); }
+static __always_inline u64 swap_low32(u64 x, u32 y) { return (x & 0xffffffff00000000ULL) | y; }
+
+static bool range_eq(struct range x, struct range y)
+{
+ return x.a == y.a && x.b == y.b;
+}
+
+static struct range range_cast_to_s32(struct range x)
+{
+ u64 a = x.a, b = x.b;
+
+ /* if upper 32 bits are constant, lower 32 bits should form a proper
+ * s32 range to be correct
+ */
+ if (upper32(a) == upper32(b) && (s32)a <= (s32)b)
+ return range(S32, a, b);
+
+ /* Special case where upper bits form a small sequence of two
+ * sequential numbers (in 32-bit unsigned space, so 0xffffffff to
+ * 0x00000000 is also valid), while lower bits form a proper s32 range
+ * going from negative numbers to positive numbers.
+ *
+ * E.g.: [0xfffffff0ffffff00; 0xfffffff100000010]. Iterating
+ * over full 64-bit numbers range will form a proper [-16, 16]
+ * ([0xffffff00; 0x00000010]) range in its lower 32 bits.
+ */
+ if (upper32(a) + 1 == upper32(b) && (s32)a < 0 && (s32)b >= 0)
+ return range(S32, a, b);
+
+ /* otherwise we can't derive much meaningful information */
+ return unkn[S32];
+}
+
+static struct range range_cast_u64(enum num_t to_t, struct range x)
+{
+ u64 a = (u64)x.a, b = (u64)x.b;
+
+ switch (to_t) {
+ case U64:
+ return x;
+ case U32:
+ if (upper32(a) != upper32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ if (sign64(a) != sign64(b))
+ return unkn[S64];
+ return range(S64, a, b);
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_u64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s64(enum num_t to_t, struct range x)
+{
+ s64 a = (s64)x.a, b = (s64)x.b;
+
+ switch (to_t) {
+ case U64:
+ /* equivalent to (s64)a <= (s64)b check */
+ if (sign64(a) != sign64(b))
+ return unkn[U64];
+ return range(U64, a, b);
+ case U32:
+ if (upper32(a) != upper32(b) || sign32(a) != sign32(b))
+ return unkn[U32];
+ return range(U32, a, b);
+ case S64:
+ return x;
+ case S32:
+ return range_cast_to_s32(x);
+ default: printf("range_cast_s64!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_u32(enum num_t to_t, struct range x)
+{
+ u32 a = (u32)x.a, b = (u32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case S64:
+ /* u32 is always a valid zero-extended u64/s64 */
+ return range(to_t, a, b);
+ case U32:
+ return x;
+ case S32:
+ return range_cast_to_s32(range(U32, a, b));
+ default: printf("range_cast_u32!\n"); exit(1);
+ }
+}
+
+static struct range range_cast_s32(enum num_t to_t, struct range x)
+{
+ s32 a = (s32)x.a, b = (s32)x.b;
+
+ switch (to_t) {
+ case U64:
+ case U32:
+ case S64:
+ if (sign32(a) != sign32(b))
+ return unkn[to_t];
+ return range(to_t, a, b);
+ case S32:
+ return x;
+ default: printf("range_cast_s32!\n"); exit(1);
+ }
+}
+
+/* Reinterpret range in *from_t* domain as a range in *to_t* domain preserving
+ * all possible information. Worst case, it will be unknown range within
+ * *to_t* domain, if nothing more specific can be guaranteed during the
+ * conversion
+ */
+static struct range range_cast(enum num_t from_t, enum num_t to_t, struct range from)
+{
+ switch (from_t) {
+ case U64: return range_cast_u64(to_t, from);
+ case U32: return range_cast_u32(to_t, from);
+ case S64: return range_cast_s64(to_t, from);
+ case S32: return range_cast_s32(to_t, from);
+ default: printf("range_cast!\n"); exit(1);
+ }
+}
+
+static bool is_valid_num(enum num_t t, u64 x)
+{
+ switch (t) {
+ case U64: return true;
+ case U32: return upper32(x) == 0;
+ case S64: return true;
+ case S32: return upper32(x) == 0;
+ default: printf("is_valid_num!\n"); exit(1);
+ }
+}
+
+static bool is_valid_range(enum num_t t, struct range x)
+{
+ if (!is_valid_num(t, x.a) || !is_valid_num(t, x.b))
+ return false;
+
+ switch (t) {
+ case U64: return (u64)x.a <= (u64)x.b;
+ case U32: return (u32)x.a <= (u32)x.b;
+ case S64: return (s64)x.a <= (s64)x.b;
+ case S32: return (s32)x.a <= (s32)x.b;
+ default: printf("is_valid_range!\n"); exit(1);
+ }
+}
+
+static struct range range_improve(enum num_t t, struct range old, struct range new)
+{
+ return range(t, max_t(t, old.a, new.a), min_t(t, old.b, new.b));
+}
+
+static struct range range_refine(enum num_t x_t, struct range x, enum num_t y_t, struct range y)
+{
+ struct range y_cast;
+
+ y_cast = range_cast(y_t, x_t, y);
+
+ /* the case when new range knowledge, *y*, is a 32-bit subregister
+ * range, while previous range knowledge, *x*, is a full register
+ * 64-bit range, needs special treatment to take into account upper 32
+ * bits of full register range
+ */
+ if (t_is_32(y_t) && !t_is_32(x_t)) {
+ struct range x_swap;
+
+ /* some combinations of upper 32 bits and sign bit can lead to
+ * invalid ranges, in such cases it's easier to detect them
+ * after cast/swap than try to enumerate all the conditions
+ * under which transformation and knowledge transfer is valid
+ */
+ x_swap = range(x_t, swap_low32(x.a, y_cast.a), swap_low32(x.b, y_cast.b));
+ if (!is_valid_range(x_t, x_swap))
+ return x;
+ return range_improve(x_t, x, x_swap);
+ }
+
+ /* otherwise, plain range cast and intersection works */
+ return range_improve(x_t, x, y_cast);
+}
+
+/* =======================
+ * GENERIC CONDITIONAL OPS
+ * =======================
+ */
+enum op { OP_LT, OP_LE, OP_GT, OP_GE, OP_EQ, OP_NE, first_op = OP_LT, last_op = OP_NE };
+
+static enum op complement_op(enum op op)
+{
+ switch (op) {
+ case OP_LT: return OP_GE;
+ case OP_LE: return OP_GT;
+ case OP_GT: return OP_LE;
+ case OP_GE: return OP_LT;
+ case OP_EQ: return OP_NE;
+ case OP_NE: return OP_EQ;
+ default: printf("complement_op!\n"); exit(1);
+ }
+}
+
+static const char *op_str(enum op op)
+{
+ switch (op) {
+ case OP_LT: return "<";
+ case OP_LE: return "<=";
+ case OP_GT: return ">";
+ case OP_GE: return ">=";
+ case OP_EQ: return "==";
+ case OP_NE: return "!=";
+ default: printf("op_str!\n"); exit(1);
+ }
+}
+
+/* Can register with range [x.a, x.b] *EVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_canbe_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+#define range_canbe(T) do { \
+ switch (op) { \
+ case OP_LT: return (T)x.a < (T)y.b; \
+ case OP_LE: return (T)x.a <= (T)y.b; \
+ case OP_GT: return (T)x.b > (T)y.a; \
+ case OP_GE: return (T)x.b >= (T)y.a; \
+ case OP_EQ: return (T)max_t(t, x.a, y.a) <= (T)min_t(t, x.b, y.b); \
+ case OP_NE: return !((T)x.a == (T)x.b && (T)y.a == (T)y.b && (T)x.a == (T)y.a); \
+ default: printf("range_canbe op %d\n", op); exit(1); \
+ } \
+} while (0)
+
+ switch (t) {
+ case U64: { range_canbe(u64); }
+ case U32: { range_canbe(u32); }
+ case S64: { range_canbe(s64); }
+ case S32: { range_canbe(s32); }
+ default: printf("range_canbe!\n"); exit(1);
+ }
+#undef range_canbe
+}
+
+/* Does register with range [x.a, x.b] *ALWAYS* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_always_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ /* always op <=> ! canbe complement(op) */
+ return !range_canbe_op(t, x, y, complement_op(op));
+}
+
+/* Does register with range [x.a, x.b] *NEVER* satisfy
+ * OP (<, <=, >, >=, ==, !=) relation to
+ * a regsiter with range [y.a, y.b]
+ * _in *num_t* domain_
+ */
+static bool range_never_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ return !range_canbe_op(t, x, y, op);
+}
+
+/* similar to verifier's is_branch_taken():
+ * 1 - always taken;
+ * 0 - never taken,
+ * -1 - unsure.
+ */
+static int range_branch_taken_op(enum num_t t, struct range x, struct range y, enum op op)
+{
+ if (range_always_op(t, x, y, op))
+ return 1;
+ if (range_never_op(t, x, y, op))
+ return 0;
+ return -1;
+}
+
+/* What would be the new estimates for register x and y ranges assuming truthful
+ * OP comparison between them. I.e., (x OP y == true) => x <- newx, y <- newy.
+ *
+ * We assume "interesting" cases where ranges overlap. Cases where it's
+ * obvious that (x OP y) is either always true or false should be filtered with
+ * range_never and range_always checks.
+ */
+static void range_cond(enum num_t t, struct range x, struct range y,
+ enum op op, struct range *newx, struct range *newy)
+{
+ if (!range_canbe_op(t, x, y, op)) {
+ /* nothing to adjust, can't happen, return original values */
+ *newx = x;
+ *newy = y;
+ return;
+ }
+ switch (op) {
+ case OP_LT:
+ *newx = range(t, x.a, min_t(t, x.b, y.b - 1));
+ *newy = range(t, max_t(t, x.a + 1, y.a), y.b);
+ break;
+ case OP_LE:
+ *newx = range(t, x.a, min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), y.b);
+ break;
+ case OP_GT:
+ *newx = range(t, max_t(t, x.a, y.a + 1), x.b);
+ *newy = range(t, y.a, min_t(t, x.b - 1, y.b));
+ break;
+ case OP_GE:
+ *newx = range(t, max_t(t, x.a, y.a), x.b);
+ *newy = range(t, y.a, min_t(t, x.b, y.b));
+ break;
+ case OP_EQ:
+ *newx = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ *newy = range(t, max_t(t, x.a, y.a), min_t(t, x.b, y.b));
+ break;
+ case OP_NE:
+ /* below logic is supported by the verifier now */
+ if (x.a == x.b && x.a == y.a) {
+ /* X is a constant matching left side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a + 1, y.b);
+ } else if (x.a == x.b && x.b == y.b) {
+ /* X is a constant matching rigth side of Y */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b - 1);
+ } else if (y.a == y.b && x.a == y.a) {
+ /* Y is a constant matching left side of X */
+ *newx = range(t, x.a + 1, x.b);
+ *newy = range(t, y.a, y.b);
+ } else if (y.a == y.b && x.b == y.b) {
+ /* Y is a constant matching rigth side of X */
+ *newx = range(t, x.a, x.b - 1);
+ *newy = range(t, y.a, y.b);
+ } else {
+ /* generic case, can't derive more information */
+ *newx = range(t, x.a, x.b);
+ *newy = range(t, y.a, y.b);
+ }
+
+ break;
+ default:
+ break;
+ }
+}
+
+/* =======================
+ * REGISTER STATE HANDLING
+ * =======================
+ */
+struct reg_state {
+ struct range r[4]; /* indexed by enum num_t: U64, U32, S64, S32 */
+ bool valid;
+};
+
+static void print_reg_state(struct reg_state *r, const char *sfx)
+{
+ DEFINE_STRBUF(sb, 512);
+ enum num_t t;
+ int cnt = 0;
+
+ if (!r->valid) {
+ printf("<not found>%s", sfx);
+ return;
+ }
+
+ snappendf(sb, "scalar(");
+ for (t = first_t; t <= last_t; t++) {
+ snappendf(sb, "%s%s=", cnt++ ? "," : "", t_str(t));
+ snprintf_range(t, sb, r->r[t]);
+ }
+ snappendf(sb, ")");
+
+ printf("%s%s", sb->buf, sfx);
+}
+
+static void print_refinement(enum num_t s_t, struct range src,
+ enum num_t d_t, struct range old, struct range new,
+ const char *ctx)
+{
+ printf("REFINING (%s) (%s)SRC=", ctx, t_str(s_t));
+ print_range(s_t, src, "");
+ printf(" (%s)DST_OLD=", t_str(d_t));
+ print_range(d_t, old, "");
+ printf(" (%s)DST_NEW=", t_str(d_t));
+ print_range(d_t, new, "\n");
+}
+
+static void reg_state_refine(struct reg_state *r, enum num_t t, struct range x, const char *ctx)
+{
+ enum num_t d_t, s_t;
+ struct range old;
+ bool keep_going = false;
+
+again:
+ /* try to derive new knowledge from just learned range x of type t */
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], t, x);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(t, x, d_t, old, r->r[d_t], ctx);
+ }
+ }
+
+ /* now see if we can derive anything new from updated reg_state's ranges */
+ for (s_t = first_t; s_t <= last_t; s_t++) {
+ for (d_t = first_t; d_t <= last_t; d_t++) {
+ old = r->r[d_t];
+ r->r[d_t] = range_refine(d_t, r->r[d_t], s_t, r->r[s_t]);
+ if (!range_eq(r->r[d_t], old)) {
+ keep_going = true;
+ if (env.verbosity >= VERBOSE_VERY)
+ print_refinement(s_t, r->r[s_t], d_t, old, r->r[d_t], ctx);
+ }
+ }
+ }
+
+ /* keep refining until we converge */
+ if (keep_going) {
+ keep_going = false;
+ goto again;
+ }
+}
+
+static void reg_state_set_const(struct reg_state *rs, enum num_t t, u64 val)
+{
+ enum num_t tt;
+
+ rs->valid = true;
+ for (tt = first_t; tt <= last_t; tt++)
+ rs->r[tt] = tt == t ? range(t, val, val) : unkn[tt];
+
+ reg_state_refine(rs, t, rs->r[t], "CONST");
+}
+
+static void reg_state_cond(enum num_t t, struct reg_state *x, struct reg_state *y, enum op op,
+ struct reg_state *newx, struct reg_state *newy, const char *ctx)
+{
+ char buf[32];
+ enum num_t ts[2];
+ struct reg_state xx = *x, yy = *y;
+ int i, t_cnt;
+ struct range z1, z2;
+
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic, so we need to process
+ * both signed and unsigned domains at the same time
+ */
+ ts[0] = t_unsigned(t);
+ ts[1] = t_signed(t);
+ t_cnt = 2;
+ } else {
+ ts[0] = t;
+ t_cnt = 1;
+ }
+
+ for (i = 0; i < t_cnt; i++) {
+ t = ts[i];
+ z1 = x->r[t];
+ z2 = y->r[t];
+
+ range_cond(t, z1, z2, op, &z1, &z2);
+
+ if (newx) {
+ snprintf(buf, sizeof(buf), "%s R1", ctx);
+ reg_state_refine(&xx, t, z1, buf);
+ }
+ if (newy) {
+ snprintf(buf, sizeof(buf), "%s R2", ctx);
+ reg_state_refine(&yy, t, z2, buf);
+ }
+ }
+
+ if (newx)
+ *newx = xx;
+ if (newy)
+ *newy = yy;
+}
+
+static int reg_state_branch_taken_op(enum num_t t, struct reg_state *x, struct reg_state *y,
+ enum op op)
+{
+ if (op == OP_EQ || op == OP_NE) {
+ /* OP_EQ and OP_NE are sign-agnostic */
+ enum num_t tu = t_unsigned(t);
+ enum num_t ts = t_signed(t);
+ int br_u, br_s, br;
+
+ br_u = range_branch_taken_op(tu, x->r[tu], y->r[tu], op);
+ br_s = range_branch_taken_op(ts, x->r[ts], y->r[ts], op);
+
+ if (br_u >= 0 && br_s >= 0 && br_u != br_s)
+ ASSERT_FALSE(true, "branch taken inconsistency!\n");
+
+ /* if 64-bit ranges are indecisive, use 32-bit subranges to
+ * eliminate always/never taken branches, if possible
+ */
+ if (br_u == -1 && (t == U64 || t == S64)) {
+ br = range_branch_taken_op(U32, x->r[U32], y->r[U32], op);
+ /* we can only reject for OP_EQ, never take branch
+ * based on lower 32 bits
+ */
+ if (op == OP_EQ && br == 0)
+ return 0;
+ /* for OP_NEQ we can be conclusive only if lower 32 bits
+ * differ and thus inequality branch is always taken
+ */
+ if (op == OP_NE && br == 1)
+ return 1;
+
+ br = range_branch_taken_op(S32, x->r[S32], y->r[S32], op);
+ if (op == OP_EQ && br == 0)
+ return 0;
+ if (op == OP_NE && br == 1)
+ return 1;
+ }
+
+ return br_u >= 0 ? br_u : br_s;
+ }
+ return range_branch_taken_op(t, x->r[t], y->r[t], op);
+}
+
+/* =====================================
+ * BPF PROGS GENERATION AND VERIFICATION
+ * =====================================
+ */
+struct case_spec {
+ /* whether to init full register (r1) or sub-register (w1) */
+ bool init_subregs;
+ /* whether to establish initial value range on full register (r1) or
+ * sub-register (w1)
+ */
+ bool setup_subregs;
+ /* whether to establish initial value range using signed or unsigned
+ * comparisons (i.e., initialize umin/umax or smin/smax directly)
+ */
+ bool setup_signed;
+ /* whether to perform comparison on full registers or sub-registers */
+ bool compare_subregs;
+ /* whether to perform comparison using signed or unsigned operations */
+ bool compare_signed;
+};
+
+/* Generate test BPF program based on provided test ranges, operation, and
+ * specifications about register bitness and signedness.
+ */
+static int load_range_cmp_prog(struct range x, struct range y, enum op op,
+ int branch_taken, struct case_spec spec,
+ char *log_buf, size_t log_sz,
+ int *false_pos, int *true_pos)
+{
+#define emit(insn) ({ \
+ struct bpf_insn __insns[] = { insn }; \
+ int __i; \
+ for (__i = 0; __i < ARRAY_SIZE(__insns); __i++) \
+ insns[cur_pos + __i] = __insns[__i]; \
+ cur_pos += __i; \
+})
+#define JMP_TO(target) (target - cur_pos - 1)
+ int cur_pos = 0, exit_pos, fd, op_code;
+ struct bpf_insn insns[64];
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .log_level = 2,
+ .log_buf = log_buf,
+ .log_size = log_sz,
+ .prog_flags = testing_prog_flags(),
+ );
+
+ /* ; skip exit block below
+ * goto +2;
+ */
+ emit(BPF_JMP_A(2));
+ exit_pos = cur_pos;
+ /* ; exit block for all the preparatory conditionals
+ * out:
+ * r0 = 0;
+ * exit;
+ */
+ emit(BPF_MOV64_IMM(BPF_REG_0, 0));
+ emit(BPF_EXIT_INSN());
+ /*
+ * ; assign r6/w6 and r7/w7 unpredictable u64/u32 value
+ * call bpf_get_current_pid_tgid;
+ * r6 = r0; | w6 = w0;
+ * call bpf_get_current_pid_tgid;
+ * r7 = r0; | w7 = w0;
+ */
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_6, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_6, BPF_REG_0));
+ emit(BPF_EMIT_CALL(BPF_FUNC_get_current_pid_tgid));
+ if (spec.init_subregs)
+ emit(BPF_MOV32_REG(BPF_REG_7, BPF_REG_0));
+ else
+ emit(BPF_MOV64_REG(BPF_REG_7, BPF_REG_0));
+ /* ; setup initial r6/w6 possible value range ([x.a, x.b])
+ * r1 = %[x.a] ll; | w1 = %[x.a];
+ * r2 = %[x.b] ll; | w2 = %[x.b];
+ * if r6 < r1 goto out; | if w6 < w1 goto out;
+ * if r6 > r2 goto out; | if w6 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)x.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)x.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, x.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, x.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_6, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_6, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; setup initial r7/w7 possible value range ([y.a, y.b])
+ * r1 = %[y.a] ll; | w1 = %[y.a];
+ * r2 = %[y.b] ll; | w2 = %[y.b];
+ * if r7 < r1 goto out; | if w7 < w1 goto out;
+ * if r7 > r2 goto out; | if w7 > w2 goto out;
+ */
+ if (spec.setup_subregs) {
+ emit(BPF_MOV32_IMM(BPF_REG_1, (s32)y.a));
+ emit(BPF_MOV32_IMM(BPF_REG_2, (s32)y.b));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP32_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ } else {
+ emit(BPF_LD_IMM64(BPF_REG_1, y.a));
+ emit(BPF_LD_IMM64(BPF_REG_2, y.b));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSLT : BPF_JLT,
+ BPF_REG_7, BPF_REG_1, JMP_TO(exit_pos)));
+ emit(BPF_JMP_REG(spec.setup_signed ? BPF_JSGT : BPF_JGT,
+ BPF_REG_7, BPF_REG_2, JMP_TO(exit_pos)));
+ }
+ /* ; range test instruction
+ * if r6 <op> r7 goto +3; | if w6 <op> w7 goto +3;
+ */
+ switch (op) {
+ case OP_LT: op_code = spec.compare_signed ? BPF_JSLT : BPF_JLT; break;
+ case OP_LE: op_code = spec.compare_signed ? BPF_JSLE : BPF_JLE; break;
+ case OP_GT: op_code = spec.compare_signed ? BPF_JSGT : BPF_JGT; break;
+ case OP_GE: op_code = spec.compare_signed ? BPF_JSGE : BPF_JGE; break;
+ case OP_EQ: op_code = BPF_JEQ; break;
+ case OP_NE: op_code = BPF_JNE; break;
+ default:
+ printf("unrecognized op %d\n", op);
+ return -ENOTSUP;
+ }
+ /* ; BEFORE conditional, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * ; this is used for debugging, as verifier doesn't always print
+ * ; registers states as of condition jump instruction (e.g., when
+ * ; precision marking happens)
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ */
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (spec.compare_subregs)
+ emit(BPF_JMP32_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ else
+ emit(BPF_JMP_REG(op_code, BPF_REG_6, BPF_REG_7, 3));
+ /* ; FALSE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *false_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 1) /* false branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ else
+ emit(BPF_EXIT_INSN());
+ /* ; TRUE branch, r0/w0 = {r6/w6,r7/w7} is to extract verifier state reliably
+ * r0 = r6; | w0 = w6;
+ * r0 = r7; | w0 = w7;
+ * exit;
+ */
+ *true_pos = cur_pos;
+ if (spec.compare_subregs) {
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV32_REG(BPF_REG_0, BPF_REG_7));
+ } else {
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_6));
+ emit(BPF_MOV64_REG(BPF_REG_0, BPF_REG_7));
+ }
+ if (branch_taken == 0) /* true branch is never taken */
+ emit(BPF_EMIT_CALL(0xDEAD)); /* poison this branch */
+ emit(BPF_EXIT_INSN()); /* last instruction has to be exit */
+
+ fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "reg_bounds_test",
+ "GPL", insns, cur_pos, &opts);
+ if (fd < 0)
+ return fd;
+
+ close(fd);
+ return 0;
+#undef emit
+#undef JMP_TO
+}
+
+#define str_has_pfx(str, pfx) (strncmp(str, pfx, strlen(pfx)) == 0)
+
+/* Parse register state from verifier log.
+ * `s` should point to the start of "Rx = ..." substring in the verifier log.
+ */
+static int parse_reg_state(const char *s, struct reg_state *reg)
+{
+ /* There are two generic forms for SCALAR register:
+ * - known constant: R6_rwD=P%lld
+ * - range: R6_rwD=scalar(id=1,...), where "..." is a comma-separated
+ * list of optional range specifiers:
+ * - umin=%llu, if missing, assumed 0;
+ * - umax=%llu, if missing, assumed U64_MAX;
+ * - smin=%lld, if missing, assumed S64_MIN;
+ * - smax=%lld, if missing, assummed S64_MAX;
+ * - umin32=%d, if missing, assumed 0;
+ * - umax32=%d, if missing, assumed U32_MAX;
+ * - smin32=%d, if missing, assumed S32_MIN;
+ * - smax32=%d, if missing, assummed S32_MAX;
+ * - var_off=(%#llx; %#llx), tnum part, we don't care about it.
+ *
+ * If some of the values are equal, they will be grouped (but min/max
+ * are not mixed together, and similarly negative values are not
+ * grouped with non-negative ones). E.g.:
+ *
+ * R6_w=Pscalar(smin=smin32=0, smax=umax=umax32=1000)
+ *
+ * _rwD part is optional (and any of the letters can be missing).
+ * P (precision mark) is optional as well.
+ *
+ * Anything inside scalar() is optional, including id, of course.
+ */
+ struct {
+ const char *pfx;
+ u64 *dst, def;
+ bool is_32, is_set;
+ } *f, fields[8] = {
+ {"smin=", &reg->r[S64].a, S64_MIN},
+ {"smax=", &reg->r[S64].b, S64_MAX},
+ {"umin=", &reg->r[U64].a, 0},
+ {"umax=", &reg->r[U64].b, U64_MAX},
+ {"smin32=", &reg->r[S32].a, (u32)S32_MIN, true},
+ {"smax32=", &reg->r[S32].b, (u32)S32_MAX, true},
+ {"umin32=", &reg->r[U32].a, 0, true},
+ {"umax32=", &reg->r[U32].b, U32_MAX, true},
+ };
+ const char *p;
+ int i;
+
+ p = strchr(s, '=');
+ if (!p)
+ return -EINVAL;
+ p++;
+ if (*p == 'P')
+ p++;
+
+ if (!str_has_pfx(p, "scalar(")) {
+ long long sval;
+ enum num_t t;
+
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &sval) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &sval) != 1)
+ return -EINVAL;
+ }
+
+ reg->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ reg->r[t] = range(t, sval, sval);
+ }
+ return 0;
+ }
+
+ p += sizeof("scalar");
+ while (p) {
+ int midxs[ARRAY_SIZE(fields)], mcnt = 0;
+ u64 val;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!str_has_pfx(p, f->pfx))
+ continue;
+ midxs[mcnt++] = i;
+ p += strlen(f->pfx);
+ }
+
+ if (mcnt) {
+ /* populate all matched fields */
+ if (p[0] == '0' && p[1] == 'x') {
+ if (sscanf(p, "%llx", &val) != 1)
+ return -EINVAL;
+ } else {
+ if (sscanf(p, "%lld", &val) != 1)
+ return -EINVAL;
+ }
+
+ for (i = 0; i < mcnt; i++) {
+ f = &fields[midxs[i]];
+ f->is_set = true;
+ *f->dst = f->is_32 ? (u64)(u32)val : val;
+ }
+ } else if (str_has_pfx(p, "var_off")) {
+ /* skip "var_off=(0x0; 0x3f)" part completely */
+ p = strchr(p, ')');
+ if (!p)
+ return -EINVAL;
+ p++;
+ }
+
+ p = strpbrk(p, ",)");
+ if (*p == ')')
+ break;
+ if (p)
+ p++;
+ }
+
+ reg->valid = true;
+
+ for (i = 0; i < ARRAY_SIZE(fields); i++) {
+ f = &fields[i];
+ if (!f->is_set)
+ *f->dst = f->def;
+ }
+
+ return 0;
+}
+
+
+/* Parse all register states (TRUE/FALSE branches and DST/SRC registers)
+ * out of the verifier log for a corresponding test case BPF program.
+ */
+static int parse_range_cmp_log(const char *log_buf, struct case_spec spec,
+ int false_pos, int true_pos,
+ struct reg_state *false1_reg, struct reg_state *false2_reg,
+ struct reg_state *true1_reg, struct reg_state *true2_reg)
+{
+ struct {
+ int insn_idx;
+ int reg_idx;
+ const char *reg_upper;
+ struct reg_state *state;
+ } specs[] = {
+ {false_pos, 6, "R6=", false1_reg},
+ {false_pos + 1, 7, "R7=", false2_reg},
+ {true_pos, 6, "R6=", true1_reg},
+ {true_pos + 1, 7, "R7=", true2_reg},
+ };
+ char buf[32];
+ const char *p = log_buf, *q;
+ int i, err;
+
+ for (i = 0; i < 4; i++) {
+ sprintf(buf, "%d: (%s) %s = %s%d", specs[i].insn_idx,
+ spec.compare_subregs ? "bc" : "bf",
+ spec.compare_subregs ? "w0" : "r0",
+ spec.compare_subregs ? "w" : "r", specs[i].reg_idx);
+
+ q = strstr(p, buf);
+ if (!q) {
+ *specs[i].state = (struct reg_state){.valid = false};
+ continue;
+ }
+ p = strstr(q, specs[i].reg_upper);
+ if (!p)
+ return -EINVAL;
+ err = parse_reg_state(p, specs[i].state);
+ if (err)
+ return -EINVAL;
+ }
+ return 0;
+}
+
+/* Validate ranges match, and print details if they don't */
+static bool assert_range_eq(enum num_t t, struct range x, struct range y,
+ const char *ctx1, const char *ctx2)
+{
+ DEFINE_STRBUF(sb, 512);
+
+ if (range_eq(x, y))
+ return true;
+
+ snappendf(sb, "MISMATCH %s.%s: ", ctx1, ctx2);
+ snprintf_range(t, sb, x);
+ snappendf(sb, " != ");
+ snprintf_range(t, sb, y);
+
+ printf("%s\n", sb->buf);
+
+ return false;
+}
+
+/* Validate that register states match, and print details if they don't */
+static bool assert_reg_state_eq(struct reg_state *r, struct reg_state *e, const char *ctx)
+{
+ bool ok = true;
+ enum num_t t;
+
+ if (r->valid != e->valid) {
+ printf("MISMATCH %s: actual %s != expected %s\n", ctx,
+ r->valid ? "<valid>" : "<invalid>",
+ e->valid ? "<valid>" : "<invalid>");
+ return false;
+ }
+
+ if (!r->valid)
+ return true;
+
+ for (t = first_t; t <= last_t; t++) {
+ if (!assert_range_eq(t, r->r[t], e->r[t], ctx, t_str(t)))
+ ok = false;
+ }
+
+ return ok;
+}
+
+/* Printf verifier log, filtering out irrelevant noise */
+static void print_verifier_log(const char *buf)
+{
+ const char *p;
+
+ while (buf[0]) {
+ p = strchrnul(buf, '\n');
+
+ /* filter out irrelevant precision backtracking logs */
+ if (str_has_pfx(buf, "mark_precise: "))
+ goto skip_line;
+
+ printf("%.*s\n", (int)(p - buf), buf);
+
+skip_line:
+ buf = *p == '\0' ? p : p + 1;
+ }
+}
+
+/* Simulate provided test case purely with our own range-based logic.
+ * This is done to set up expectations for verifier's branch_taken logic and
+ * verifier's register states in the verifier log.
+ */
+static void sim_case(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op,
+ struct reg_state *fr1, struct reg_state *fr2,
+ struct reg_state *tr1, struct reg_state *tr2,
+ int *branch_taken)
+{
+ const u64 A = x.a;
+ const u64 B = x.b;
+ const u64 C = y.a;
+ const u64 D = y.b;
+ struct reg_state rc;
+ enum op rev_op = complement_op(op);
+ enum num_t t;
+
+ fr1->valid = fr2->valid = true;
+ tr1->valid = tr2->valid = true;
+ for (t = first_t; t <= last_t; t++) {
+ /* if we are initializing using 32-bit subregisters,
+ * full registers get upper 32 bits zeroed automatically
+ */
+ struct range z = t_is_32(init_t) ? unkn_subreg(t) : unkn[t];
+
+ fr1->r[t] = fr2->r[t] = tr1->r[t] = tr2->r[t] = z;
+ }
+
+ /* step 1: r1 >= A, r2 >= C */
+ reg_state_set_const(&rc, init_t, A);
+ reg_state_cond(init_t, fr1, &rc, OP_GE, fr1, NULL, "r1>=A");
+ reg_state_set_const(&rc, init_t, C);
+ reg_state_cond(init_t, fr2, &rc, OP_GE, fr2, NULL, "r2>=C");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP1 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP1 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 2: r1 <= B, r2 <= D */
+ reg_state_set_const(&rc, init_t, B);
+ reg_state_cond(init_t, fr1, &rc, OP_LE, fr1, NULL, "r1<=B");
+ reg_state_set_const(&rc, init_t, D);
+ reg_state_cond(init_t, fr2, &rc, OP_LE, fr2, NULL, "r2<=D");
+ *tr1 = *fr1;
+ *tr2 = *fr2;
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP2 (%s) R1: ", t_str(init_t)); print_reg_state(fr1, "\n");
+ printf("STEP2 (%s) R2: ", t_str(init_t)); print_reg_state(fr2, "\n");
+ }
+
+ /* step 3: r1 <op> r2 */
+ *branch_taken = reg_state_branch_taken_op(cond_t, fr1, fr2, op);
+ fr1->valid = fr2->valid = false;
+ tr1->valid = tr2->valid = false;
+ if (*branch_taken != 1) { /* FALSE is possible */
+ fr1->valid = fr2->valid = true;
+ reg_state_cond(cond_t, fr1, fr2, rev_op, fr1, fr2, "FALSE");
+ }
+ if (*branch_taken != 0) { /* TRUE is possible */
+ tr1->valid = tr2->valid = true;
+ reg_state_cond(cond_t, tr1, tr2, op, tr1, tr2, "TRUE");
+ }
+ if (env.verbosity >= VERBOSE_VERY) {
+ printf("STEP3 (%s) FALSE R1:", t_str(cond_t)); print_reg_state(fr1, "\n");
+ printf("STEP3 (%s) FALSE R2:", t_str(cond_t)); print_reg_state(fr2, "\n");
+ printf("STEP3 (%s) TRUE R1:", t_str(cond_t)); print_reg_state(tr1, "\n");
+ printf("STEP3 (%s) TRUE R2:", t_str(cond_t)); print_reg_state(tr2, "\n");
+ }
+}
+
+/* ===============================
+ * HIGH-LEVEL TEST CASE VALIDATION
+ * ===============================
+ */
+static u32 upper_seeds[] = {
+ 0,
+ 1,
+ U32_MAX,
+ U32_MAX - 1,
+ S32_MAX,
+ (u32)S32_MIN,
+};
+
+static u32 lower_seeds[] = {
+ 0,
+ 1,
+ 2, (u32)-2,
+ 255, (u32)-255,
+ UINT_MAX,
+ UINT_MAX - 1,
+ INT_MAX,
+ (u32)INT_MIN,
+};
+
+struct ctx {
+ int val_cnt, subval_cnt, range_cnt, subrange_cnt;
+ u64 uvals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ s64 svals[ARRAY_SIZE(upper_seeds) * ARRAY_SIZE(lower_seeds)];
+ u32 usubvals[ARRAY_SIZE(lower_seeds)];
+ s32 ssubvals[ARRAY_SIZE(lower_seeds)];
+ struct range *uranges, *sranges;
+ struct range *usubranges, *ssubranges;
+ int max_failure_cnt, cur_failure_cnt;
+ int total_case_cnt, case_cnt;
+ int rand_case_cnt;
+ unsigned rand_seed;
+ __u64 start_ns;
+ char progress_ctx[64];
+};
+
+static void cleanup_ctx(struct ctx *ctx)
+{
+ free(ctx->uranges);
+ free(ctx->sranges);
+ free(ctx->usubranges);
+ free(ctx->ssubranges);
+}
+
+struct subtest_case {
+ enum num_t init_t;
+ enum num_t cond_t;
+ struct range x;
+ struct range y;
+ enum op op;
+};
+
+static void subtest_case_str(struct strbuf *sb, struct subtest_case *t, bool use_op)
+{
+ snappendf(sb, "(%s)", t_str(t->init_t));
+ snprintf_range(t->init_t, sb, t->x);
+ snappendf(sb, " (%s)%s ", t_str(t->cond_t), use_op ? op_str(t->op) : "<op>");
+ snprintf_range(t->init_t, sb, t->y);
+}
+
+/* Generate and validate test case based on specific combination of setup
+ * register ranges (including their expected num_t domain), and conditional
+ * operation to perform (including num_t domain in which it has to be
+ * performed)
+ */
+static int verify_case_op(enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, enum op op)
+{
+ char log_buf[256 * 1024];
+ size_t log_sz = sizeof(log_buf);
+ int err, false_pos = 0, true_pos = 0, branch_taken;
+ struct reg_state fr1, fr2, tr1, tr2;
+ struct reg_state fe1, fe2, te1, te2;
+ bool failed = false;
+ struct case_spec spec = {
+ .init_subregs = (init_t == U32 || init_t == S32),
+ .setup_subregs = (init_t == U32 || init_t == S32),
+ .setup_signed = (init_t == S64 || init_t == S32),
+ .compare_subregs = (cond_t == U32 || cond_t == S32),
+ .compare_signed = (cond_t == S64 || cond_t == S32),
+ };
+
+ log_buf[0] = '\0';
+
+ sim_case(init_t, cond_t, x, y, op, &fe1, &fe2, &te1, &te2, &branch_taken);
+
+ err = load_range_cmp_prog(x, y, op, branch_taken, spec,
+ log_buf, log_sz, &false_pos, &true_pos);
+ if (err) {
+ ASSERT_OK(err, "load_range_cmp_prog");
+ failed = true;
+ }
+
+ err = parse_range_cmp_log(log_buf, spec, false_pos, true_pos,
+ &fr1, &fr2, &tr1, &tr2);
+ if (err) {
+ ASSERT_OK(err, "parse_range_cmp_log");
+ failed = true;
+ }
+
+ if (!assert_reg_state_eq(&fr1, &fe1, "false_reg1") ||
+ !assert_reg_state_eq(&fr2, &fe2, "false_reg2") ||
+ !assert_reg_state_eq(&tr1, &te1, "true_reg1") ||
+ !assert_reg_state_eq(&tr2, &te2, "true_reg2")) {
+ failed = true;
+ }
+
+ if (failed || env.verbosity >= VERBOSE_NORMAL) {
+ if (failed || env.verbosity >= VERBOSE_VERY) {
+ printf("VERIFIER LOG:\n========================\n");
+ print_verifier_log(log_buf);
+ printf("=====================\n");
+ }
+ printf("ACTUAL FALSE1: "); print_reg_state(&fr1, "\n");
+ printf("EXPECTED FALSE1: "); print_reg_state(&fe1, "\n");
+ printf("ACTUAL FALSE2: "); print_reg_state(&fr2, "\n");
+ printf("EXPECTED FALSE2: "); print_reg_state(&fe2, "\n");
+ printf("ACTUAL TRUE1: "); print_reg_state(&tr1, "\n");
+ printf("EXPECTED TRUE1: "); print_reg_state(&te1, "\n");
+ printf("ACTUAL TRUE2: "); print_reg_state(&tr2, "\n");
+ printf("EXPECTED TRUE2: "); print_reg_state(&te2, "\n");
+
+ return failed ? -EINVAL : 0;
+ }
+
+ return 0;
+}
+
+/* Given setup ranges and number types, go over all supported operations,
+ * generating individual subtest for each allowed combination
+ */
+static int verify_case_opt(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y, bool is_subtest)
+{
+ DEFINE_STRBUF(sb, 256);
+ int err;
+ struct subtest_case sub = {
+ .init_t = init_t,
+ .cond_t = cond_t,
+ .x = x,
+ .y = y,
+ };
+
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, false /* ignore op */);
+ if (is_subtest && !test__start_subtest(sb->buf))
+ return 0;
+
+ for (sub.op = first_op; sub.op <= last_op; sub.op++) {
+ sb->pos = 0; /* reset position in strbuf */
+ subtest_case_str(sb, &sub, true /* print op */);
+
+ if (env.verbosity >= VERBOSE_NORMAL) /* this speeds up debugging */
+ printf("TEST CASE: %s\n", sb->buf);
+
+ err = verify_case_op(init_t, cond_t, x, y, sub.op);
+ if (err || env.verbosity >= VERBOSE_NORMAL)
+ ASSERT_OK(err, sb->buf);
+ if (err) {
+ ctx->cur_failure_cnt++;
+ if (ctx->cur_failure_cnt > ctx->max_failure_cnt)
+ return err;
+ return 0; /* keep testing other cases */
+ }
+ ctx->case_cnt++;
+ if ((ctx->case_cnt % 10000) == 0) {
+ double progress = (ctx->case_cnt + 0.0) / ctx->total_case_cnt;
+ u64 elapsed_ns = get_time_ns() - ctx->start_ns;
+ double remain_ns = elapsed_ns / progress * (1 - progress);
+
+ fprintf(env.stderr, "PROGRESS (%s): %d/%d (%.2lf%%), "
+ "elapsed %llu mins (%.2lf hrs), "
+ "ETA %.0lf mins (%.2lf hrs)\n",
+ ctx->progress_ctx,
+ ctx->case_cnt, ctx->total_case_cnt, 100.0 * progress,
+ elapsed_ns / 1000000000 / 60,
+ elapsed_ns / 1000000000.0 / 3600,
+ remain_ns / 1000000000.0 / 60,
+ remain_ns / 1000000000.0 / 3600);
+ }
+ }
+
+ return 0;
+}
+
+static int verify_case(struct ctx *ctx, enum num_t init_t, enum num_t cond_t,
+ struct range x, struct range y)
+{
+ return verify_case_opt(ctx, init_t, cond_t, x, y, true /* is_subtest */);
+}
+
+/* ================================
+ * GENERATED CASES FROM SEED VALUES
+ * ================================
+ */
+static int u64_cmp(const void *p1, const void *p2)
+{
+ u64 x1 = *(const u64 *)p1, x2 = *(const u64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int u32_cmp(const void *p1, const void *p2)
+{
+ u32 x1 = *(const u32 *)p1, x2 = *(const u32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s64_cmp(const void *p1, const void *p2)
+{
+ s64 x1 = *(const s64 *)p1, x2 = *(const s64 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+static int s32_cmp(const void *p1, const void *p2)
+{
+ s32 x1 = *(const s32 *)p1, x2 = *(const s32 *)p2;
+
+ return x1 != x2 ? (x1 < x2 ? -1 : 1) : 0;
+}
+
+/* Generate valid unique constants from seeds, both signed and unsigned */
+static void gen_vals(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ARRAY_SIZE(upper_seeds); i++) {
+ for (j = 0; j < ARRAY_SIZE(lower_seeds); j++) {
+ ctx->uvals[cnt++] = (((u64)upper_seeds[i]) << 32) | lower_seeds[j];
+ }
+ }
+
+ /* sort and compact uvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->uvals, cnt, sizeof(*ctx->uvals), u64_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->uvals[j] == ctx->uvals[i])
+ continue;
+ j++;
+ ctx->uvals[j] = ctx->uvals[i];
+ }
+ ctx->val_cnt = j + 1;
+
+ /* we have exactly the same number of s64 values, they are just in
+ * a different order than u64s, so just sort them differently
+ */
+ for (i = 0; i < ctx->val_cnt; i++)
+ ctx->svals[i] = ctx->uvals[i];
+ qsort(ctx->svals, ctx->val_cnt, sizeof(*ctx->svals), s64_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U64, sb1, ctx->uvals[i]);
+ snprintf_num(S64, sb2, ctx->svals[i]);
+ printf("SEED #%d: u64=%-20s s64=%-20s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+
+ /* 32-bit values are generated separately */
+ cnt = 0;
+ for (i = 0; i < ARRAY_SIZE(lower_seeds); i++) {
+ ctx->usubvals[cnt++] = lower_seeds[i];
+ }
+
+ /* sort and compact usubvals (i.e., it's `sort | uniq`) */
+ qsort(ctx->usubvals, cnt, sizeof(*ctx->usubvals), u32_cmp);
+ for (i = 1, j = 0; i < cnt; i++) {
+ if (ctx->usubvals[j] == ctx->usubvals[i])
+ continue;
+ j++;
+ ctx->usubvals[j] = ctx->usubvals[i];
+ }
+ ctx->subval_cnt = j + 1;
+
+ for (i = 0; i < ctx->subval_cnt; i++)
+ ctx->ssubvals[i] = ctx->usubvals[i];
+ qsort(ctx->ssubvals, ctx->subval_cnt, sizeof(*ctx->ssubvals), s32_cmp);
+
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ sb1->pos = sb2->pos = 0;
+ snprintf_num(U32, sb1, ctx->usubvals[i]);
+ snprintf_num(S32, sb2, ctx->ssubvals[i]);
+ printf("SUBSEED #%d: u32=%-10s s32=%-10s\n", i, sb1->buf, sb2->buf);
+ }
+ }
+}
+
+/* Generate valid ranges from upper/lower seeds */
+static int gen_ranges(struct ctx *ctx)
+{
+ int i, j, cnt = 0;
+
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U64, sb1, range(U64, ctx->uvals[i], ctx->uvals[j]));
+ snprintf_range(S64, sb2, range(S64, ctx->svals[i], ctx->svals[j]));
+ printf("RANGE #%d: u64=%-40s s64=%-40s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->range_cnt = cnt;
+
+ ctx->uranges = calloc(ctx->range_cnt, sizeof(*ctx->uranges));
+ if (!ASSERT_OK_PTR(ctx->uranges, "uranges_calloc"))
+ return -EINVAL;
+ ctx->sranges = calloc(ctx->range_cnt, sizeof(*ctx->sranges));
+ if (!ASSERT_OK_PTR(ctx->sranges, "sranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->val_cnt; i++) {
+ for (j = i; j < ctx->val_cnt; j++) {
+ ctx->uranges[cnt] = range(U64, ctx->uvals[i], ctx->uvals[j]);
+ ctx->sranges[cnt] = range(S64, ctx->svals[i], ctx->svals[j]);
+ cnt++;
+ }
+ }
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ if (env.verbosity >= VERBOSE_SUPER) {
+ DEFINE_STRBUF(sb1, 256);
+ DEFINE_STRBUF(sb2, 256);
+
+ sb1->pos = sb2->pos = 0;
+ snprintf_range(U32, sb1, range(U32, ctx->usubvals[i], ctx->usubvals[j]));
+ snprintf_range(S32, sb2, range(S32, ctx->ssubvals[i], ctx->ssubvals[j]));
+ printf("SUBRANGE #%d: u32=%-20s s32=%-20s\n", cnt, sb1->buf, sb2->buf);
+ }
+ cnt++;
+ }
+ }
+ ctx->subrange_cnt = cnt;
+
+ ctx->usubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->usubranges));
+ if (!ASSERT_OK_PTR(ctx->usubranges, "usubranges_calloc"))
+ return -EINVAL;
+ ctx->ssubranges = calloc(ctx->subrange_cnt, sizeof(*ctx->ssubranges));
+ if (!ASSERT_OK_PTR(ctx->ssubranges, "ssubranges_calloc"))
+ return -EINVAL;
+
+ cnt = 0;
+ for (i = 0; i < ctx->subval_cnt; i++) {
+ for (j = i; j < ctx->subval_cnt; j++) {
+ ctx->usubranges[cnt] = range(U32, ctx->usubvals[i], ctx->usubvals[j]);
+ ctx->ssubranges[cnt] = range(S32, ctx->ssubvals[i], ctx->ssubvals[j]);
+ cnt++;
+ }
+ }
+
+ return 0;
+}
+
+static int parse_env_vars(struct ctx *ctx)
+{
+ const char *s;
+
+ if ((s = getenv("REG_BOUNDS_MAX_FAILURE_CNT"))) {
+ errno = 0;
+ ctx->max_failure_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->max_failure_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_MAX_FAILURE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_CASE_CNT"))) {
+ errno = 0;
+ ctx->rand_case_cnt = strtol(s, NULL, 10);
+ if (errno || ctx->rand_case_cnt < 0) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_CASE_CNT");
+ return -EINVAL;
+ }
+ }
+
+ if ((s = getenv("REG_BOUNDS_RAND_SEED"))) {
+ errno = 0;
+ ctx->rand_seed = strtoul(s, NULL, 10);
+ if (errno) {
+ ASSERT_OK(-errno, "REG_BOUNDS_RAND_SEED");
+ return -EINVAL;
+ }
+ }
+
+ return 0;
+}
+
+static int prepare_gen_tests(struct ctx *ctx)
+{
+ const char *s;
+ int err;
+
+ if (!(s = getenv("SLOW_TESTS")) || strcmp(s, "1") != 0) {
+ test__skip();
+ return -ENOTSUP;
+ }
+
+ err = parse_env_vars(ctx);
+ if (err)
+ return err;
+
+ gen_vals(ctx);
+ err = gen_ranges(ctx);
+ if (err) {
+ ASSERT_OK(err, "gen_ranges");
+ return err;
+ }
+
+ return 0;
+}
+
+/* Go over generated constants and ranges and validate various supported
+ * combinations of them
+ */
+static void validate_gen_range_vs_const_64(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u64 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U64 ? ctx.uranges : ctx.sranges;
+ vals = init_t == U64 ? ctx.uvals : (const u64 *)ctx.svals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.range_cnt * ctx.val_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.val_cnt; i++) {
+ for (j = 0; j < ctx.range_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u64|s64)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u64|s64)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_const_32(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ struct range rconst;
+ const struct range *ranges;
+ const u32 *vals;
+ int i, j;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ ranges = init_t == U32 ? ctx.usubranges : ctx.ssubranges;
+ vals = init_t == U32 ? ctx.usubvals : (const u32 *)ctx.ssubvals;
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.subrange_cnt * ctx.subval_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x CONST, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.subval_cnt; i++) {
+ for (j = 0; j < ctx.subrange_cnt; j++) {
+ rconst = range(init_t, vals[i], vals[i]);
+
+ /* (u32|s32)(<range> x <const>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], rconst))
+ goto cleanup;
+ /* (u32|s32)(<const> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, rconst, ranges[j]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+static void validate_gen_range_vs_range(enum num_t init_t, enum num_t cond_t)
+{
+ struct ctx ctx;
+ const struct range *ranges;
+ int i, j, rcnt;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ if (prepare_gen_tests(&ctx))
+ goto cleanup;
+
+ switch (init_t)
+ {
+ case U64:
+ ranges = ctx.uranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case U32:
+ ranges = ctx.usubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ case S64:
+ ranges = ctx.sranges;
+ rcnt = ctx.range_cnt;
+ break;
+ case S32:
+ ranges = ctx.ssubranges;
+ rcnt = ctx.subrange_cnt;
+ break;
+ default:
+ printf("validate_gen_range_vs_range!\n");
+ exit(1);
+ }
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * rcnt * (rcnt + 1) / 2);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "RANGE x RANGE, %s -> %s",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < rcnt; i++) {
+ for (j = i; j < rcnt; j++) {
+ /* (<range> x <range>) */
+ if (verify_case(&ctx, init_t, cond_t, ranges[i], ranges[j]))
+ goto cleanup;
+ if (verify_case(&ctx, init_t, cond_t, ranges[j], ranges[i]))
+ goto cleanup;
+ }
+ }
+
+cleanup:
+ cleanup_ctx(&ctx);
+}
+
+/* Go over thousands of test cases generated from initial seed values.
+ * Given this take a long time, guard this begind SLOW_TESTS=1 envvar. If
+ * envvar is not set, this test is skipped during test_progs testing.
+ *
+ * We split this up into smaller subsets based on initialization and
+ * conditiona numeric domains to get an easy parallelization with test_progs'
+ * -j argument.
+ */
+
+/* RANGE x CONST, U64 initial range */
+void test_reg_bounds_gen_consts_u64_u64(void) { validate_gen_range_vs_const_64(U64, U64); }
+void test_reg_bounds_gen_consts_u64_s64(void) { validate_gen_range_vs_const_64(U64, S64); }
+void test_reg_bounds_gen_consts_u64_u32(void) { validate_gen_range_vs_const_64(U64, U32); }
+void test_reg_bounds_gen_consts_u64_s32(void) { validate_gen_range_vs_const_64(U64, S32); }
+/* RANGE x CONST, S64 initial range */
+void test_reg_bounds_gen_consts_s64_u64(void) { validate_gen_range_vs_const_64(S64, U64); }
+void test_reg_bounds_gen_consts_s64_s64(void) { validate_gen_range_vs_const_64(S64, S64); }
+void test_reg_bounds_gen_consts_s64_u32(void) { validate_gen_range_vs_const_64(S64, U32); }
+void test_reg_bounds_gen_consts_s64_s32(void) { validate_gen_range_vs_const_64(S64, S32); }
+/* RANGE x CONST, U32 initial range */
+void test_reg_bounds_gen_consts_u32_u64(void) { validate_gen_range_vs_const_32(U32, U64); }
+void test_reg_bounds_gen_consts_u32_s64(void) { validate_gen_range_vs_const_32(U32, S64); }
+void test_reg_bounds_gen_consts_u32_u32(void) { validate_gen_range_vs_const_32(U32, U32); }
+void test_reg_bounds_gen_consts_u32_s32(void) { validate_gen_range_vs_const_32(U32, S32); }
+/* RANGE x CONST, S32 initial range */
+void test_reg_bounds_gen_consts_s32_u64(void) { validate_gen_range_vs_const_32(S32, U64); }
+void test_reg_bounds_gen_consts_s32_s64(void) { validate_gen_range_vs_const_32(S32, S64); }
+void test_reg_bounds_gen_consts_s32_u32(void) { validate_gen_range_vs_const_32(S32, U32); }
+void test_reg_bounds_gen_consts_s32_s32(void) { validate_gen_range_vs_const_32(S32, S32); }
+
+/* RANGE x RANGE, U64 initial range */
+void test_reg_bounds_gen_ranges_u64_u64(void) { validate_gen_range_vs_range(U64, U64); }
+void test_reg_bounds_gen_ranges_u64_s64(void) { validate_gen_range_vs_range(U64, S64); }
+void test_reg_bounds_gen_ranges_u64_u32(void) { validate_gen_range_vs_range(U64, U32); }
+void test_reg_bounds_gen_ranges_u64_s32(void) { validate_gen_range_vs_range(U64, S32); }
+/* RANGE x RANGE, S64 initial range */
+void test_reg_bounds_gen_ranges_s64_u64(void) { validate_gen_range_vs_range(S64, U64); }
+void test_reg_bounds_gen_ranges_s64_s64(void) { validate_gen_range_vs_range(S64, S64); }
+void test_reg_bounds_gen_ranges_s64_u32(void) { validate_gen_range_vs_range(S64, U32); }
+void test_reg_bounds_gen_ranges_s64_s32(void) { validate_gen_range_vs_range(S64, S32); }
+/* RANGE x RANGE, U32 initial range */
+void test_reg_bounds_gen_ranges_u32_u64(void) { validate_gen_range_vs_range(U32, U64); }
+void test_reg_bounds_gen_ranges_u32_s64(void) { validate_gen_range_vs_range(U32, S64); }
+void test_reg_bounds_gen_ranges_u32_u32(void) { validate_gen_range_vs_range(U32, U32); }
+void test_reg_bounds_gen_ranges_u32_s32(void) { validate_gen_range_vs_range(U32, S32); }
+/* RANGE x RANGE, S32 initial range */
+void test_reg_bounds_gen_ranges_s32_u64(void) { validate_gen_range_vs_range(S32, U64); }
+void test_reg_bounds_gen_ranges_s32_s64(void) { validate_gen_range_vs_range(S32, S64); }
+void test_reg_bounds_gen_ranges_s32_u32(void) { validate_gen_range_vs_range(S32, U32); }
+void test_reg_bounds_gen_ranges_s32_s32(void) { validate_gen_range_vs_range(S32, S32); }
+
+#define DEFAULT_RAND_CASE_CNT 100
+
+#define RAND_21BIT_MASK ((1 << 22) - 1)
+
+static u64 rand_u64()
+{
+ /* RAND_MAX is guaranteed to be at least 1<<15, but in practice it
+ * seems to be 1<<31, so we need to call it thrice to get full u64;
+ * we'll use rougly equal split: 22 + 21 + 21 bits
+ */
+ return ((u64)random() << 42) |
+ (((u64)random() & RAND_21BIT_MASK) << 21) |
+ (random() & RAND_21BIT_MASK);
+}
+
+static u64 rand_const(enum num_t t)
+{
+ return cast_t(t, rand_u64());
+}
+
+static struct range rand_range(enum num_t t)
+{
+ u64 x = rand_const(t), y = rand_const(t);
+
+ return range(t, min_t(t, x, y), max_t(t, x, y));
+}
+
+static void validate_rand_ranges(enum num_t init_t, enum num_t cond_t, bool const_range)
+{
+ struct ctx ctx;
+ struct range range1, range2;
+ int err, i;
+ u64 t;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ err = parse_env_vars(&ctx);
+ if (err) {
+ ASSERT_OK(err, "parse_env_vars");
+ return;
+ }
+
+ if (ctx.rand_case_cnt == 0)
+ ctx.rand_case_cnt = DEFAULT_RAND_CASE_CNT;
+ if (ctx.rand_seed == 0)
+ ctx.rand_seed = (unsigned)get_time_ns();
+
+ srandom(ctx.rand_seed);
+
+ ctx.total_case_cnt = (last_op - first_op + 1) * (2 * ctx.rand_case_cnt);
+ ctx.start_ns = get_time_ns();
+ snprintf(ctx.progress_ctx, sizeof(ctx.progress_ctx),
+ "[RANDOM SEED %u] RANGE x %s, %s -> %s",
+ ctx.rand_seed, const_range ? "CONST" : "RANGE",
+ t_str(init_t), t_str(cond_t));
+
+ for (i = 0; i < ctx.rand_case_cnt; i++) {
+ range1 = rand_range(init_t);
+ if (const_range) {
+ t = rand_const(init_t);
+ range2 = range(init_t, t, t);
+ } else {
+ range2 = rand_range(init_t);
+ }
+
+ /* <range1> x <range2> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range1, range2, false /* !is_subtest */))
+ goto cleanup;
+ /* <range2> x <range1> */
+ if (verify_case_opt(&ctx, init_t, cond_t, range2, range1, false /* !is_subtest */))
+ goto cleanup;
+ }
+
+cleanup:
+ /* make sure we report random seed for reproducing */
+ ASSERT_TRUE(true, ctx.progress_ctx);
+ cleanup_ctx(&ctx);
+}
+
+/* [RANDOM] RANGE x CONST, U64 initial range */
+void test_reg_bounds_rand_consts_u64_u64(void) { validate_rand_ranges(U64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s64(void) { validate_rand_ranges(U64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u64_u32(void) { validate_rand_ranges(U64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u64_s32(void) { validate_rand_ranges(U64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S64 initial range */
+void test_reg_bounds_rand_consts_s64_u64(void) { validate_rand_ranges(S64, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s64(void) { validate_rand_ranges(S64, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s64_u32(void) { validate_rand_ranges(S64, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s64_s32(void) { validate_rand_ranges(S64, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, U32 initial range */
+void test_reg_bounds_rand_consts_u32_u64(void) { validate_rand_ranges(U32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s64(void) { validate_rand_ranges(U32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_u32_u32(void) { validate_rand_ranges(U32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_u32_s32(void) { validate_rand_ranges(U32, S32, true /* const */); }
+/* [RANDOM] RANGE x CONST, S32 initial range */
+void test_reg_bounds_rand_consts_s32_u64(void) { validate_rand_ranges(S32, U64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s64(void) { validate_rand_ranges(S32, S64, true /* const */); }
+void test_reg_bounds_rand_consts_s32_u32(void) { validate_rand_ranges(S32, U32, true /* const */); }
+void test_reg_bounds_rand_consts_s32_s32(void) { validate_rand_ranges(S32, S32, true /* const */); }
+
+/* [RANDOM] RANGE x RANGE, U64 initial range */
+void test_reg_bounds_rand_ranges_u64_u64(void) { validate_rand_ranges(U64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s64(void) { validate_rand_ranges(U64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_u32(void) { validate_rand_ranges(U64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u64_s32(void) { validate_rand_ranges(U64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S64 initial range */
+void test_reg_bounds_rand_ranges_s64_u64(void) { validate_rand_ranges(S64, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s64(void) { validate_rand_ranges(S64, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_u32(void) { validate_rand_ranges(S64, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s64_s32(void) { validate_rand_ranges(S64, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, U32 initial range */
+void test_reg_bounds_rand_ranges_u32_u64(void) { validate_rand_ranges(U32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s64(void) { validate_rand_ranges(U32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_u32(void) { validate_rand_ranges(U32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_u32_s32(void) { validate_rand_ranges(U32, S32, false /* range */); }
+/* [RANDOM] RANGE x RANGE, S32 initial range */
+void test_reg_bounds_rand_ranges_s32_u64(void) { validate_rand_ranges(S32, U64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s64(void) { validate_rand_ranges(S32, S64, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_u32(void) { validate_rand_ranges(S32, U32, false /* range */); }
+void test_reg_bounds_rand_ranges_s32_s32(void) { validate_rand_ranges(S32, S32, false /* range */); }
+
+/* A set of hard-coded "interesting" cases to validate as part of normal
+ * test_progs test runs
+ */
+static struct subtest_case crafted_cases[] = {
+ {U64, U64, {0, 0xffffffff}, {0, 0}},
+ {U64, U64, {0, 0x80000000}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x100000100ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x180000000ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff00ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1ffffff01ULL}, {0, 0}},
+ {U64, U64, {0x100000000ULL, 0x1fffffffeULL}, {0, 0}},
+ {U64, U64, {0x100000001ULL, 0x1000000ffULL}, {0, 0}},
+
+ /* single point overlap, interesting BPF_EQ and BPF_NE interactions */
+ {U64, U64, {0, 1}, {1, 0x80000000}},
+ {U64, S64, {0, 1}, {1, 0x80000000}},
+ {U64, U32, {0, 1}, {1, 0x80000000}},
+ {U64, S32, {0, 1}, {1, 0x80000000}},
+
+ {U64, S64, {0, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffffffffffffULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0x7fffffff00000001ULL, 0xffffffff00000000ULL}, {0, 0}},
+ {U64, S64, {0, 0xffffffffULL}, {1, 1}},
+ {U64, S64, {0, 0xffffffffULL}, {0x7fffffff, 0x7fffffff}},
+
+ {U64, U32, {0, 0x100000000}, {0, 0}},
+ {U64, U32, {0xfffffffe, 0x100000000}, {0x80000000, 0x80000000}},
+
+ {U64, S32, {0, 0xffffffff00000000ULL}, {0, 0}},
+ /* these are tricky cases where lower 32 bits allow to tighten 64
+ * bit boundaries based on tightened lower 32 bit boundaries
+ */
+ {U64, S32, {0, 0x0ffffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x100000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x100000001ULL}, {0, 0}},
+ {U64, S32, {0, 0x180000000ULL}, {0, 0}},
+ {U64, S32, {0, 0x17fffffffULL}, {0, 0}},
+ {U64, S32, {0, 0x180000001ULL}, {0, 0}},
+
+ /* verifier knows about [-1, 0] range for s32 for this case already */
+ {S64, S64, {0xffffffffffffffffULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+ /* but didn't know about these cases initially */
+ {U64, U64, {0xffffffff, 0x100000000ULL}, {0, 0}}, /* s32: [-1, 0] */
+ {U64, U64, {0xffffffff, 0x100000001ULL}, {0, 0}}, /* s32: [-1, 1] */
+
+ /* longer convergence case: learning from u64 -> s64 -> u64 -> u32,
+ * arriving at u32: [1, U32_MAX] (instead of more pessimistic [0, U32_MAX])
+ */
+ {S64, U64, {0xffffffff00000001ULL, 0}, {0xffffffff00000000ULL, 0xffffffff00000000ULL}},
+
+ {U32, U32, {1, U32_MAX}, {0, 0}},
+
+ {U32, S32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+
+ {S32, U64, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)(s32)-255, 0}},
+ {S32, S64, {(u32)S32_MIN, (u32)(s32)-255}, {(u32)(s32)-2, 0}},
+ {S32, S64, {0, 1}, {(u32)S32_MIN, (u32)S32_MIN}},
+ {S32, U32, {(u32)S32_MIN, (u32)S32_MIN}, {(u32)S32_MIN, (u32)S32_MIN}},
+
+ /* edge overlap testings for BPF_NE */
+ {U64, U64, {0, U64_MAX}, {U64_MAX, U64_MAX}},
+ {U64, U64, {0, U64_MAX}, {0, 0}},
+ {S64, U64, {S64_MIN, 0}, {S64_MIN, S64_MIN}},
+ {S64, U64, {S64_MIN, 0}, {0, 0}},
+ {S64, U64, {S64_MIN, S64_MAX}, {S64_MAX, S64_MAX}},
+ {U32, U32, {0, U32_MAX}, {0, 0}},
+ {U32, U32, {0, U32_MAX}, {U32_MAX, U32_MAX}},
+ {S32, U32, {(u32)S32_MIN, 0}, {0, 0}},
+ {S32, U32, {(u32)S32_MIN, 0}, {(u32)S32_MIN, (u32)S32_MIN}},
+ {S32, U32, {(u32)S32_MIN, S32_MAX}, {S32_MAX, S32_MAX}},
+};
+
+/* Go over crafted hard-coded cases. This is fast, so we do it as part of
+ * normal test_progs run.
+ */
+void test_reg_bounds_crafted(void)
+{
+ struct ctx ctx;
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ for (i = 0; i < ARRAY_SIZE(crafted_cases); i++) {
+ struct subtest_case *c = &crafted_cases[i];
+
+ verify_case(&ctx, c->init_t, c->cond_t, c->x, c->y);
+ verify_case(&ctx, c->init_t, c->cond_t, c->y, c->x);
+ }
+
+ cleanup_ctx(&ctx);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
new file mode 100644
index 000000000000..f81d08d429a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/resolve_btfids.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/err.h>
+#include <string.h>
+#include <bpf/btf.h>
+#include <bpf/libbpf.h>
+#include <linux/btf.h>
+#include <linux/kernel.h>
+#define CONFIG_DEBUG_INFO_BTF
+#include <linux/btf_ids.h>
+#include "test_progs.h"
+
+static int duration;
+
+struct symbol {
+ const char *name;
+ int type;
+ int id;
+};
+
+struct symbol test_symbols[] = {
+ { "unused", BTF_KIND_UNKN, 0 },
+ { "S", BTF_KIND_TYPEDEF, -1 },
+ { "T", BTF_KIND_TYPEDEF, -1 },
+ { "U", BTF_KIND_TYPEDEF, -1 },
+ { "S", BTF_KIND_STRUCT, -1 },
+ { "U", BTF_KIND_UNION, -1 },
+ { "func", BTF_KIND_FUNC, -1 },
+};
+
+/* Align the .BTF_ids section to 4 bytes */
+asm (
+".pushsection " BTF_IDS_SECTION " ,\"a\"; \n"
+".balign 4, 0; \n"
+".popsection; \n");
+
+BTF_ID_LIST(test_list_local)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+extern __u32 test_list_global[];
+BTF_ID_LIST_GLOBAL(test_list_global, 1)
+BTF_ID_UNUSED
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+
+BTF_SET_START(test_set)
+BTF_ID(typedef, S)
+BTF_ID(typedef, T)
+BTF_ID(typedef, U)
+BTF_ID(struct, S)
+BTF_ID(union, U)
+BTF_ID(func, func)
+BTF_SET_END(test_set)
+
+static int
+__resolve_symbol(struct btf *btf, int type_id)
+{
+ const struct btf_type *type;
+ const char *str;
+ unsigned int i;
+
+ type = btf__type_by_id(btf, type_id);
+ if (!type) {
+ PRINT_FAIL("Failed to get type for ID %d\n", type_id);
+ return -1;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+ if (test_symbols[i].id >= 0)
+ continue;
+
+ if (BTF_INFO_KIND(type->info) != test_symbols[i].type)
+ continue;
+
+ str = btf__name_by_offset(btf, type->name_off);
+ if (!str) {
+ PRINT_FAIL("Failed to get name for BTF ID %d\n", type_id);
+ return -1;
+ }
+
+ if (!strcmp(str, test_symbols[i].name))
+ test_symbols[i].id = type_id;
+ }
+
+ return 0;
+}
+
+static int resolve_symbols(void)
+{
+ struct btf *btf;
+ int type_id;
+ __u32 nr;
+
+ btf = btf__parse_elf("btf_data.bpf.o", NULL);
+ if (CHECK(libbpf_get_error(btf), "resolve",
+ "Failed to load BTF from btf_data.o\n"))
+ return -1;
+
+ nr = btf__type_cnt(btf);
+
+ for (type_id = 1; type_id < nr; type_id++) {
+ if (__resolve_symbol(btf, type_id))
+ break;
+ }
+
+ btf__free(btf);
+ return 0;
+}
+
+void test_resolve_btfids(void)
+{
+ __u32 *test_list, *test_lists[] = { test_list_local, test_list_global };
+ unsigned int i, j;
+ int ret = 0;
+
+ if (resolve_symbols())
+ return;
+
+ /* Check BTF_ID_LIST(test_list_local) and
+ * BTF_ID_LIST_GLOBAL(test_list_global) IDs
+ */
+ for (j = 0; j < ARRAY_SIZE(test_lists); j++) {
+ test_list = test_lists[j];
+ for (i = 0; i < ARRAY_SIZE(test_symbols); i++) {
+ ret = CHECK(test_list[i] != test_symbols[i].id,
+ "id_check",
+ "wrong ID for %s (%d != %d)\n",
+ test_symbols[i].name,
+ test_list[i], test_symbols[i].id);
+ if (ret)
+ return;
+ }
+ }
+
+ /* Check BTF_SET_START(test_set) IDs */
+ for (i = 0; i < test_set.cnt; i++) {
+ bool found = false;
+
+ for (j = 0; j < ARRAY_SIZE(test_symbols); j++) {
+ if (test_symbols[j].id != test_set.ids[i])
+ continue;
+ found = true;
+ break;
+ }
+
+ ret = CHECK(!found, "id_check",
+ "ID %d not found in test_symbols\n",
+ test_set.ids[i]);
+ if (ret)
+ break;
+
+ if (i > 0) {
+ if (!ASSERT_LE(test_set.ids[i - 1], test_set.ids[i], "sort_check"))
+ return;
+ }
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index c1650548433c..48c5695b7abf 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -12,7 +12,8 @@
#include <sys/sysinfo.h>
#include <linux/perf_event.h>
#include <linux/ring_buffer.h>
-#include "test_ringbuf.skel.h"
+#include "test_ringbuf.lskel.h"
+#include "test_ringbuf_map_key.lskel.h"
#define EDONE 7777
@@ -58,7 +59,8 @@ static int process_sample(void *ctx, void *data, size_t len)
}
}
-static struct test_ringbuf *skel;
+static struct test_ringbuf_map_key_lskel *skel_map_key;
+static struct test_ringbuf_lskel *skel;
static struct ring_buffer *ringbuf;
static void trigger_samples()
@@ -81,38 +83,102 @@ static void *poll_thread(void *input)
return (void *)(long)ring_buffer__poll(ringbuf, timeout);
}
-void test_ringbuf(void)
+static void ringbuf_subtest(void)
{
const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
pthread_t thread;
long bg_ret = -1;
- int err, cnt;
-
- skel = test_ringbuf__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ int err, cnt, rb_fd;
+ int page_size = getpagesize();
+ void *mmap_ptr, *tmp_ptr;
+ struct ring *ring;
+ int map_fd;
+ unsigned long avail_data, ring_size, cons_pos, prod_pos;
+
+ skel = test_ringbuf_lskel__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ skel->maps.ringbuf.max_entries = page_size;
+
+ err = test_ringbuf_lskel__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ rb_fd = skel->maps.ringbuf.map_fd;
+ /* good read/write cons_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "rw_cons_pos");
+ tmp_ptr = mremap(mmap_ptr, page_size, 2 * page_size, MREMAP_MAYMOVE);
+ if (!ASSERT_ERR_PTR(tmp_ptr, "rw_extend"))
+ goto cleanup;
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw");
+
+ /* bad writeable prod_pos */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_prod_pos");
+ ASSERT_EQ(err, -EPERM, "wr_prod_pos_err");
+
+ /* bad writeable data pages */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ err = -errno;
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_one");
+ ASSERT_EQ(err, -EPERM, "wr_data_page_one_err");
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 3 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_two");
+ mmap_ptr = mmap(NULL, 2 * page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_ERR_PTR(mmap_ptr, "wr_data_page_all");
+
+ /* good read-only pages */
+ mmap_ptr = mmap(NULL, 4 * page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, 4 * page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, 4 * page_size), "unmap_ro");
+
+ /* good read-only pages with initial offset */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, page_size);
+ if (!ASSERT_OK_PTR(mmap_ptr, "ro_prod_pos"))
+ goto cleanup;
+
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 3 * page_size, MREMAP_MAYMOVE), "ro_remap");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro");
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
- ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ ringbuf = ring_buffer__new(skel->maps.ringbuf.map_fd,
process_sample, NULL, NULL);
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
- err = test_ringbuf__attach(skel);
+ err = test_ringbuf_lskel__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
goto cleanup;
trigger_samples();
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+
+ map_fd = ring__map_fd(ring);
+ ASSERT_EQ(map_fd, skel->maps.ringbuf.map_fd, "ring_map_fd");
+
/* 2 submitted + 1 discarded records */
CHECK(skel->bss->avail_data != 3 * rec_sz,
"err_avail_size", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->avail_data);
- CHECK(skel->bss->ring_size != 4096,
+ CHECK(skel->bss->ring_size != page_size,
"err_ring_size", "exp %ld, got %ld\n",
- 4096L, skel->bss->ring_size);
+ (long)page_size, skel->bss->ring_size);
CHECK(skel->bss->cons_pos != 0,
"err_cons_pos", "exp %ld, got %ld\n",
0L, skel->bss->cons_pos);
@@ -120,6 +186,18 @@ void test_ringbuf(void)
"err_prod_pos", "exp %ld, got %ld\n",
3L * rec_sz, skel->bss->prod_pos);
+ /* verify getting this data directly via the ring object yields the same
+ * results
+ */
+ avail_data = ring__avail_data_size(ring);
+ ASSERT_EQ(avail_data, 3 * rec_sz, "ring_avail_size");
+ ring_size = ring__size(ring);
+ ASSERT_EQ(ring_size, page_size, "ring_ring_size");
+ cons_pos = ring__consumer_pos(ring);
+ ASSERT_EQ(cons_pos, 0, "ring_cons_pos");
+ prod_pos = ring__producer_pos(ring);
+ ASSERT_EQ(prod_pos, 3 * rec_sz, "ring_prod_pos");
+
/* poll for samples */
err = ring_buffer__poll(ringbuf, -1);
@@ -217,9 +295,19 @@ void test_ringbuf(void)
if (CHECK(err, "join_bg", "err %d\n", err))
goto cleanup;
- if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+ if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret))
goto cleanup;
+ /* due to timing variations, there could still be non-notified
+ * samples, so consume them here to collect all the samples
+ */
+ err = ring_buffer__consume(ringbuf);
+ CHECK(err < 0, "rb_consume", "failed: %d\b", err);
+
+ /* also consume using ring__consume to make sure it works the same */
+ err = ring__consume(ring);
+ ASSERT_GE(err, 0, "ring_consume");
+
/* 3 rounds, 2 samples each */
cnt = atomic_xchg(&sample_cnt, 0);
CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt);
@@ -232,8 +320,70 @@ void test_ringbuf(void)
CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
1L, skel->bss->discarded);
- test_ringbuf__detach(skel);
+ test_ringbuf_lskel__detach(skel);
cleanup:
ring_buffer__free(ringbuf);
- test_ringbuf__destroy(skel);
+ test_ringbuf_lskel__destroy(skel);
+}
+
+static int process_map_key_sample(void *ctx, void *data, size_t len)
+{
+ struct sample *s;
+ int err, val;
+
+ s = data;
+ switch (s->seq) {
+ case 1:
+ ASSERT_EQ(s->value, 42, "sample_value");
+ err = bpf_map_lookup_elem(skel_map_key->maps.hash_map.map_fd,
+ s, &val);
+ ASSERT_OK(err, "hash_map bpf_map_lookup_elem");
+ ASSERT_EQ(val, 1, "hash_map val");
+ return -EDONE;
+ default:
+ return 0;
+ }
+}
+
+static void ringbuf_map_key_subtest(void)
+{
+ int err;
+
+ skel_map_key = test_ringbuf_map_key_lskel__open();
+ if (!ASSERT_OK_PTR(skel_map_key, "test_ringbuf_map_key_lskel__open"))
+ return;
+
+ skel_map_key->maps.ringbuf.max_entries = getpagesize();
+ skel_map_key->bss->pid = getpid();
+
+ err = test_ringbuf_map_key_lskel__load(skel_map_key);
+ if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__load"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(skel_map_key->maps.ringbuf.map_fd,
+ process_map_key_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ err = test_ringbuf_map_key_lskel__attach(skel_map_key);
+ if (!ASSERT_OK(err, "test_ringbuf_map_key_lskel__attach"))
+ goto cleanup_ringbuf;
+
+ syscall(__NR_getpgid);
+ ASSERT_EQ(skel_map_key->bss->seq, 1, "skel_map_key->bss->seq");
+ err = ring_buffer__poll(ringbuf, -1);
+ ASSERT_EQ(err, -EDONE, "ring_buffer__poll");
+
+cleanup_ringbuf:
+ ring_buffer__free(ringbuf);
+cleanup:
+ test_ringbuf_map_key_lskel__destroy(skel_map_key);
+}
+
+void test_ringbuf(void)
+{
+ if (test__start_subtest("ringbuf"))
+ ringbuf_subtest();
+ if (test__start_subtest("ringbuf_map_key"))
+ ringbuf_map_key_subtest();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
index 78e450609803..58522195081b 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -41,13 +41,43 @@ static int process_sample(void *ctx, void *data, size_t len)
void test_ringbuf_multi(void)
{
struct test_ringbuf_multi *skel;
- struct ring_buffer *ringbuf;
+ struct ring_buffer *ringbuf = NULL;
+ struct ring *ring_old;
+ struct ring *ring;
int err;
+ int page_size = getpagesize();
+ int proto_fd = -1;
- skel = test_ringbuf_multi__open_and_load();
- if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+ skel = test_ringbuf_multi__open();
+ if (CHECK(!skel, "skel_open", "skeleton open failed\n"))
return;
+ /* validate ringbuf size adjustment logic */
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_before");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size + 1), "rb1_resize");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), 2 * page_size, "rb1_size_after");
+ ASSERT_OK(bpf_map__set_max_entries(skel->maps.ringbuf1, page_size), "rb1_reset");
+ ASSERT_EQ(bpf_map__max_entries(skel->maps.ringbuf1), page_size, "rb1_size_final");
+
+ proto_fd = bpf_map_create(BPF_MAP_TYPE_RINGBUF, NULL, 0, 0, page_size, NULL);
+ if (CHECK(proto_fd < 0, "bpf_map_create", "bpf_map_create failed\n"))
+ goto cleanup;
+
+ err = bpf_map__set_inner_map_fd(skel->maps.ringbuf_hash, proto_fd);
+ if (CHECK(err != 0, "bpf_map__set_inner_map_fd", "bpf_map__set_inner_map_fd failed\n"))
+ goto cleanup;
+
+ err = test_ringbuf_multi__load(skel);
+ if (CHECK(err != 0, "skel_load", "skeleton load failed\n"))
+ goto cleanup;
+
+ close(proto_fd);
+ proto_fd = -1;
+
+ /* make sure we can't resize ringbuf after object load */
+ if (!ASSERT_ERR(bpf_map__set_max_entries(skel->maps.ringbuf1, 3 * page_size), "rb1_resize_after_load"))
+ goto cleanup;
+
/* only trigger BPF program for current process */
skel->bss->pid = getpid();
@@ -56,11 +86,24 @@ void test_ringbuf_multi(void)
if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
goto cleanup;
+ /* verify ring_buffer__ring returns expected results */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_OK_PTR(ring, "ring_buffer__ring_idx_0"))
+ goto cleanup;
+ ring_old = ring;
+ ring = ring_buffer__ring(ringbuf, 1);
+ ASSERT_ERR_PTR(ring, "ring_buffer__ring_idx_1");
+
err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
process_sample, (void *)(long)2);
if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
goto cleanup;
+ /* verify adding a new ring didn't invalidate our older pointer */
+ ring = ring_buffer__ring(ringbuf, 0);
+ if (!ASSERT_EQ(ring, ring_old, "ring_buffer__ring_again"))
+ goto cleanup;
+
err = test_ringbuf_multi__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
goto cleanup;
@@ -81,7 +124,7 @@ void test_ringbuf_multi(void)
/* poll for samples, should get 2 ringbufs back */
err = ring_buffer__poll(ringbuf, -1);
- if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+ if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err))
goto cleanup;
/* expect extra polling to return nothing */
@@ -97,6 +140,8 @@ void test_ringbuf_multi(void)
2L, skel->bss->total);
cleanup:
+ if (proto_fd >= 0)
+ close(proto_fd);
ring_buffer__free(ringbuf);
test_ringbuf_multi__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/section_names.c b/tools/testing/selftests/bpf/prog_tests/section_names.c
index 713167449c98..c3d78846f31a 100644
--- a/tools/testing/selftests/bpf/prog_tests/section_names.c
+++ b/tools/testing/selftests/bpf/prog_tests/section_names.c
@@ -35,7 +35,7 @@ static struct sec_name_test tests[] = {
{-EINVAL, 0},
},
{"raw_tp/", {0, BPF_PROG_TYPE_RAW_TRACEPOINT, 0}, {-EINVAL, 0} },
- {"xdp", {0, BPF_PROG_TYPE_XDP, 0}, {-EINVAL, 0} },
+ {"xdp", {0, BPF_PROG_TYPE_XDP, BPF_XDP}, {0, BPF_XDP} },
{"perf_event", {0, BPF_PROG_TYPE_PERF_EVENT, 0}, {-EINVAL, 0} },
{"lwt_in", {0, BPF_PROG_TYPE_LWT_IN, 0}, {-EINVAL, 0} },
{"lwt_out", {0, BPF_PROG_TYPE_LWT_OUT, 0}, {-EINVAL, 0} },
@@ -124,6 +124,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_INET6_CONNECT},
},
{
+ "cgroup/connect_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_CONNECT},
+ {0, BPF_CGROUP_UNIX_CONNECT},
+ },
+ {
"cgroup/sendmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_SENDMSG},
{0, BPF_CGROUP_UDP4_SENDMSG},
@@ -134,6 +139,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_SENDMSG},
},
{
+ "cgroup/sendmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_SENDMSG},
+ {0, BPF_CGROUP_UNIX_SENDMSG},
+ },
+ {
"cgroup/recvmsg4",
{0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UDP4_RECVMSG},
{0, BPF_CGROUP_UDP4_RECVMSG},
@@ -144,6 +154,11 @@ static struct sec_name_test tests[] = {
{0, BPF_CGROUP_UDP6_RECVMSG},
},
{
+ "cgroup/recvmsg_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_RECVMSG},
+ {0, BPF_CGROUP_UNIX_RECVMSG},
+ },
+ {
"cgroup/sysctl",
{0, BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL},
{0, BPF_CGROUP_SYSCTL},
@@ -158,6 +173,36 @@ static struct sec_name_test tests[] = {
{0, BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT},
{0, BPF_CGROUP_SETSOCKOPT},
},
+ {
+ "cgroup/getpeername4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETPEERNAME},
+ {0, BPF_CGROUP_INET4_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETPEERNAME},
+ {0, BPF_CGROUP_INET6_GETPEERNAME},
+ },
+ {
+ "cgroup/getpeername_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETPEERNAME},
+ {0, BPF_CGROUP_UNIX_GETPEERNAME},
+ },
+ {
+ "cgroup/getsockname4",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_GETSOCKNAME},
+ {0, BPF_CGROUP_INET4_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname6",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET6_GETSOCKNAME},
+ {0, BPF_CGROUP_INET6_GETSOCKNAME},
+ },
+ {
+ "cgroup/getsockname_unix",
+ {0, BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_UNIX_GETSOCKNAME},
+ {0, BPF_CGROUP_UNIX_GETSOCKNAME},
+ },
};
static void test_prog_type_by_name(const struct sec_name_test *test)
diff --git a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
index 821b4146b7b6..64c5f5eb2994 100644
--- a/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
+++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c
@@ -18,7 +18,6 @@
#include <netinet/in.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "test_progs.h"
@@ -66,30 +65,21 @@ static union sa46 {
static int create_maps(enum bpf_map_type inner_type)
{
- struct bpf_create_map_attr attr = {};
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
inner_map_type = inner_type;
/* Creating reuseport_array */
- attr.name = "reuseport_array";
- attr.map_type = inner_type;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = REUSEPORT_ARRAY_SIZE;
-
- reuseport_array = bpf_create_map_xattr(&attr);
- RET_ERR(reuseport_array == -1, "creating reuseport_array",
+ reuseport_array = bpf_map_create(inner_type, "reuseport_array",
+ sizeof(__u32), sizeof(__u32), REUSEPORT_ARRAY_SIZE, NULL);
+ RET_ERR(reuseport_array < 0, "creating reuseport_array",
"reuseport_array:%d errno:%d\n", reuseport_array, errno);
/* Creating outer_map */
- attr.name = "outer_map";
- attr.map_type = BPF_MAP_TYPE_ARRAY_OF_MAPS;
- attr.key_size = sizeof(__u32);
- attr.value_size = sizeof(__u32);
- attr.max_entries = 1;
- attr.inner_map_fd = reuseport_array;
- outer_map = bpf_create_map_xattr(&attr);
- RET_ERR(outer_map == -1, "creating outer_map",
+ opts.inner_map_fd = reuseport_array;
+ outer_map = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, "outer_map",
+ sizeof(__u32), sizeof(__u32), 1, &opts);
+ RET_ERR(outer_map < 0, "creating outer_map",
"outer_map:%d errno:%d\n", outer_map, errno);
return 0;
@@ -101,9 +91,10 @@ static int prepare_bpf_obj(void)
struct bpf_map *map;
int err;
- obj = bpf_object__open("test_select_reuseport_kern.o");
- RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o",
- "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj));
+ obj = bpf_object__open("test_select_reuseport_kern.bpf.o");
+ err = libbpf_get_error(obj);
+ RET_ERR(err, "open test_select_reuseport_kern.bpf.o",
+ "obj:%p PTR_ERR(obj):%d\n", obj, err);
map = bpf_object__find_map_by_name(obj, "outer_map");
RET_ERR(!map, "find outer_map", "!map\n");
@@ -113,34 +104,34 @@ static int prepare_bpf_obj(void)
err = bpf_object__load(obj);
RET_ERR(err, "load bpf_object", "err:%d\n", err);
- prog = bpf_program__next(NULL, obj);
+ prog = bpf_object__next_program(obj, NULL);
RET_ERR(!prog, "get first bpf_program", "!prog\n");
select_by_skb_data_prog = bpf_program__fd(prog);
- RET_ERR(select_by_skb_data_prog == -1, "get prog fd",
+ RET_ERR(select_by_skb_data_prog < 0, "get prog fd",
"select_by_skb_data_prog:%d\n", select_by_skb_data_prog);
map = bpf_object__find_map_by_name(obj, "result_map");
RET_ERR(!map, "find result_map", "!map\n");
result_map = bpf_map__fd(map);
- RET_ERR(result_map == -1, "get result_map fd",
+ RET_ERR(result_map < 0, "get result_map fd",
"result_map:%d\n", result_map);
map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map");
RET_ERR(!map, "find tmp_index_ovr_map\n", "!map");
tmp_index_ovr_map = bpf_map__fd(map);
- RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd",
+ RET_ERR(tmp_index_ovr_map < 0, "get tmp_index_ovr_map fd",
"tmp_index_ovr_map:%d\n", tmp_index_ovr_map);
map = bpf_object__find_map_by_name(obj, "linum_map");
RET_ERR(!map, "find linum_map", "!map\n");
linum_map = bpf_map__fd(map);
- RET_ERR(linum_map == -1, "get linum_map fd",
+ RET_ERR(linum_map < 0, "get linum_map fd",
"linum_map:%d\n", linum_map);
map = bpf_object__find_map_by_name(obj, "data_check_map");
RET_ERR(!map, "find data_check_map", "!map\n");
data_check_map = bpf_map__fd(map);
- RET_ERR(data_check_map == -1, "get data_check_map fd",
+ RET_ERR(data_check_map < 0, "get data_check_map fd",
"data_check_map:%d\n", data_check_map);
return 0;
@@ -237,7 +228,7 @@ static long get_linum(void)
int err;
err = bpf_map_lookup_elem(linum_map, &index_zero, &linum);
- RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n",
+ RET_ERR(err < 0, "lookup_elem(linum_map)", "err:%d errno:%d\n",
err, errno);
return linum;
@@ -254,11 +245,11 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd,
addrlen = sizeof(cli_sa);
err = getsockname(cli_fd, (struct sockaddr *)&cli_sa,
&addrlen);
- RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "getsockname(cli_fd)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_lookup_elem(data_check_map, &index_zero, &result);
- RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
+ RET_IF(err < 0, "lookup_elem(data_check_map)", "err:%d errno:%d\n",
err, errno);
if (type == SOCK_STREAM) {
@@ -347,7 +338,7 @@ static void check_results(void)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &results[i]);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
}
@@ -524,12 +515,12 @@ static void test_syncookie(int type, sa_family_t family)
*/
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero,
&tmp_index, BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, 1)",
"err:%d errno:%d\n", err, errno);
do_test(type, family, &cmd, PASS);
err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero,
&tmp_index);
- RET_IF(err == -1 || tmp_index != -1,
+ RET_IF(err < 0 || tmp_index >= 0,
"lookup_elem(tmp_index_ovr_map)",
"err:%d errno:%d tmp_index:%d\n",
err, errno, tmp_index);
@@ -569,7 +560,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_before += tmp;
}
@@ -584,7 +575,7 @@ static void test_detach_bpf(int type, sa_family_t family)
for (i = 0; i < NR_RESULTS; i++) {
err = bpf_map_lookup_elem(result_map, &i, &tmp);
- RET_IF(err == -1, "lookup_elem(result_map)",
+ RET_IF(err < 0, "lookup_elem(result_map)",
"i:%u err:%d errno:%d\n", i, err, errno);
nr_run_after += tmp;
}
@@ -632,24 +623,24 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany)
SO_ATTACH_REUSEPORT_EBPF,
&select_by_skb_data_prog,
sizeof(select_by_skb_data_prog));
- RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
+ RET_IF(err < 0, "setsockopt(SO_ATTACH_REUEPORT_EBPF)",
"err:%d errno:%d\n", err, errno);
}
err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen);
- RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n",
+ RET_IF(err < 0, "bind()", "sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
if (type == SOCK_STREAM) {
err = listen(sk_fds[i], 10);
- RET_IF(err == -1, "listen()",
+ RET_IF(err < 0, "listen()",
"sk_fds[%d] err:%d errno:%d\n",
i, err, errno);
}
err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i],
BPF_NOEXIST);
- RET_IF(err == -1, "update_elem(reuseport_array)",
+ RET_IF(err < 0, "update_elem(reuseport_array)",
"sk_fds[%d] err:%d errno:%d\n", i, err, errno);
if (i == first) {
@@ -682,7 +673,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
prepare_sk_fds(type, family, inany);
err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr,
BPF_ANY);
- RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)",
+ RET_IF(err < 0, "update_elem(tmp_index_ovr_map, 0, -1)",
"err:%d errno:%d\n", err, errno);
/* Install reuseport_array to outer_map? */
@@ -691,7 +682,7 @@ static void setup_per_test(int type, sa_family_t family, bool inany,
err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array,
BPF_ANY);
- RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)",
+ RET_IF(err < 0, "update_elem(outer_map, 0, reuseport_array)",
"err:%d errno:%d\n", err, errno);
}
@@ -720,18 +711,18 @@ static void cleanup_per_test(bool no_inner_map)
return;
err = bpf_map_delete_elem(outer_map, &index_zero);
- RET_IF(err == -1, "delete_elem(outer_map)",
+ RET_IF(err < 0, "delete_elem(outer_map)",
"err:%d errno:%d\n", err, errno);
}
static void cleanup(void)
{
- if (outer_map != -1) {
+ if (outer_map >= 0) {
close(outer_map);
outer_map = -1;
}
- if (reuseport_array != -1) {
+ if (reuseport_array >= 0) {
close(reuseport_array);
reuseport_array = -1;
}
@@ -857,7 +848,7 @@ out:
cleanup();
}
-void test_select_reuseport(void)
+void serial_test_select_reuseport(void)
{
saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL);
if (saved_tcp_fo < 0)
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal.c b/tools/testing/selftests/bpf/prog_tests/send_signal.c
index 504abb7bfb95..b15b343ebb6b 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal.c
@@ -1,38 +1,36 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <sys/time.h>
+#include <sys/resource.h>
#include "test_send_signal_kern.skel.h"
-static volatile int sigusr1_received = 0;
+static int sigusr1_received;
static void sigusr1_handler(int signum)
{
- sigusr1_received++;
+ sigusr1_received = 1;
}
static void test_send_signal_common(struct perf_event_attr *attr,
- bool signal_thread,
- const char *test_name)
+ bool signal_thread)
{
struct test_send_signal_kern *skel;
int pipe_c2p[2], pipe_p2c[2];
int err = -1, pmu_fd = -1;
- __u32 duration = 0;
char buf[256];
pid_t pid;
- if (CHECK(pipe(pipe_c2p), test_name,
- "pipe pipe_c2p error: %s\n", strerror(errno)))
+ if (!ASSERT_OK(pipe(pipe_c2p), "pipe_c2p"))
return;
- if (CHECK(pipe(pipe_p2c), test_name,
- "pipe pipe_p2c error: %s\n", strerror(errno))) {
+ if (!ASSERT_OK(pipe(pipe_p2c), "pipe_p2c")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
return;
}
pid = fork();
- if (CHECK(pid < 0, test_name, "fork error: %s\n", strerror(errno))) {
+ if (!ASSERT_GE(pid, 0, "fork")) {
close(pipe_c2p[0]);
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -41,28 +39,47 @@ static void test_send_signal_common(struct perf_event_attr *attr,
}
if (pid == 0) {
+ int old_prio;
+ volatile int j = 0;
+
/* install signal handler and notify parent */
- signal(SIGUSR1, sigusr1_handler);
+ ASSERT_NEQ(signal(SIGUSR1, sigusr1_handler), SIG_ERR, "signal");
close(pipe_c2p[0]); /* close read */
close(pipe_p2c[1]); /* close write */
+ /* boost with a high priority so we got a higher chance
+ * that if an interrupt happens, the underlying task
+ * is this process.
+ */
+ errno = 0;
+ old_prio = getpriority(PRIO_PROCESS, 0);
+ ASSERT_OK(errno, "getpriority");
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, -20), "setpriority");
+
/* notify parent signal handler is installed */
- write(pipe_c2p[1], buf, 1);
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* make sure parent enabled bpf program to send_signal */
- read(pipe_p2c[0], buf, 1);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
/* wait a little for signal handler */
- sleep(1);
+ for (int i = 0; i < 1000000000 && !sigusr1_received; i++) {
+ j /= i + j + 1;
+ if (!attr)
+ /* trigger the nanosleep tracepoint program. */
+ usleep(1);
+ }
- if (sigusr1_received)
- write(pipe_c2p[1], "2", 1);
- else
- write(pipe_c2p[1], "0", 1);
+ buf[0] = sigusr1_received ? '2' : '0';
+ ASSERT_EQ(sigusr1_received, 1, "sigusr1_received");
+ ASSERT_EQ(write(pipe_c2p[1], buf, 1), 1, "pipe_write");
/* wait for parent notification and exit */
- read(pipe_p2c[0], buf, 1);
+ ASSERT_EQ(read(pipe_p2c[0], buf, 1), 1, "pipe_read");
+
+ /* restore the old priority */
+ ASSERT_OK(setpriority(PRIO_PROCESS, 0, old_prio), "setpriority");
close(pipe_c2p[1]);
close(pipe_p2c[0]);
@@ -73,55 +90,53 @@ static void test_send_signal_common(struct perf_event_attr *attr,
close(pipe_p2c[0]); /* close read */
skel = test_send_signal_kern__open_and_load();
- if (CHECK(!skel, "skel_open_and_load", "skeleton open_and_load failed\n"))
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
goto skel_open_load_failure;
if (!attr) {
err = test_send_signal_kern__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed\n")) {
+ if (!ASSERT_OK(err, "skel_attach")) {
err = -1;
goto destroy_skel;
}
} else {
- pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1,
+ pmu_fd = syscall(__NR_perf_event_open, attr, pid, -1 /* cpu */,
-1 /* group id */, 0 /* flags */);
- if (CHECK(pmu_fd < 0, test_name, "perf_event_open error: %s\n",
- strerror(errno))) {
+ if (!ASSERT_GE(pmu_fd, 0, "perf_event_open")) {
err = -1;
goto destroy_skel;
}
skel->links.send_signal_perf =
bpf_program__attach_perf_event(skel->progs.send_signal_perf, pmu_fd);
- if (CHECK(IS_ERR(skel->links.send_signal_perf), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.send_signal_perf)))
+ if (!ASSERT_OK_PTR(skel->links.send_signal_perf, "attach_perf_event"))
goto disable_pmu;
}
/* wait until child signal handler installed */
- read(pipe_c2p[0], buf, 1);
+ ASSERT_EQ(read(pipe_c2p[0], buf, 1), 1, "pipe_read");
/* trigger the bpf send_signal */
- skel->bss->pid = pid;
- skel->bss->sig = SIGUSR1;
skel->bss->signal_thread = signal_thread;
+ skel->bss->sig = SIGUSR1;
+ skel->bss->pid = pid;
/* notify child that bpf program can send_signal now */
- write(pipe_p2c[1], buf, 1);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
/* wait for result */
err = read(pipe_c2p[0], buf, 1);
- if (CHECK(err < 0, test_name, "reading pipe error: %s\n", strerror(errno)))
+ if (!ASSERT_GE(err, 0, "reading pipe"))
goto disable_pmu;
- if (CHECK(err == 0, test_name, "reading pipe error: size 0\n")) {
+ if (!ASSERT_GT(err, 0, "reading pipe error: size 0")) {
err = -1;
goto disable_pmu;
}
- CHECK(buf[0] != '2', test_name, "incorrect result\n");
+ ASSERT_EQ(buf[0], '2', "incorrect result");
/* notify child safe to exit */
- write(pipe_p2c[1], buf, 1);
+ ASSERT_EQ(write(pipe_p2c[1], buf, 1), 1, "pipe_write");
disable_pmu:
close(pmu_fd);
@@ -135,7 +150,7 @@ skel_open_load_failure:
static void test_send_signal_tracepoint(bool signal_thread)
{
- test_send_signal_common(NULL, signal_thread, "tracepoint");
+ test_send_signal_common(NULL, signal_thread);
}
static void test_send_signal_perf(bool signal_thread)
@@ -146,7 +161,7 @@ static void test_send_signal_perf(bool signal_thread)
.config = PERF_COUNT_SW_CPU_CLOCK,
};
- test_send_signal_common(&attr, signal_thread, "perf_sw_event");
+ test_send_signal_common(&attr, signal_thread);
}
static void test_send_signal_nmi(bool signal_thread)
@@ -175,7 +190,7 @@ static void test_send_signal_nmi(bool signal_thread)
close(pmu_fd);
}
- test_send_signal_common(&attr, signal_thread, "perf_hw_event");
+ test_send_signal_common(&attr, signal_thread);
}
void test_send_signal(void)
diff --git a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
index 189a34a7addb..15dacfcfaa6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
+++ b/tools/testing/selftests/bpf/prog_tests/send_signal_sched_switch.c
@@ -25,7 +25,8 @@ static void *worker(void *p)
return NULL;
}
-void test_send_signal_sched_switch(void)
+/* NOTE: cause events loss */
+void serial_test_send_signal_sched_switch(void)
{
struct test_send_signal_kern *skel;
pthread_t threads[THREAD_COUNT];
diff --git a/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
new file mode 100644
index 000000000000..7d4a9b3d3722
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/setget_sockopt.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <linux/socket.h>
+#include <linux/tls.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+#include "setget_sockopt.skel.h"
+
+#define CG_NAME "/setget-sockopt-test"
+
+static const char addr4_str[] = "127.0.0.1";
+static const char addr6_str[] = "::1";
+static struct setget_sockopt *skel;
+static int cg_fd;
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "set lo up"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link add dev binddevtest1 type veth peer name binddevtest2"),
+ "add veth"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev binddevtest1 up"),
+ "bring veth up"))
+ return -1;
+
+ return 0;
+}
+
+static void test_tcp(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ int sfd, cfd;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+
+ cfd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(cfd, 0, "connect_to_fd_server")) {
+ close(sfd);
+ return;
+ }
+ close(sfd);
+ close(cfd);
+
+ ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+ ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+ ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+}
+
+static void test_udp(int family)
+{
+ struct setget_sockopt__bss *bss = skel->bss;
+ int sfd;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_DGRAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+ close(sfd);
+
+ ASSERT_GE(bss->nr_socket_post_create, 1, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 1, "nr_bind");
+}
+
+static void test_ktls(int family)
+{
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct setget_sockopt__bss *bss = skel->bss;
+ int cfd = -1, sfd = -1, fd = -1, ret;
+ char buf;
+
+ memset(bss, 0, sizeof(*bss));
+
+ sfd = start_server(family, SOCK_STREAM,
+ family == AF_INET6 ? addr6_str : addr4_str, 0, 0);
+ if (!ASSERT_GE(sfd, 0, "start_server"))
+ return;
+ fd = connect_to_fd(sfd, 0);
+ if (!ASSERT_GE(fd, 0, "connect_to_fd"))
+ goto err_out;
+
+ cfd = accept(sfd, NULL, 0);
+ if (!ASSERT_GE(cfd, 0, "accept"))
+ goto err_out;
+
+ close(sfd);
+ sfd = -1;
+
+ /* Setup KTLS */
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+ ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ memset(&aes128, 0, sizeof(aes128));
+ aes128.info.version = TLS_1_2_VERSION;
+ aes128.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &aes128, sizeof(aes128));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &aes128, sizeof(aes128));
+ if (!ASSERT_OK(ret, "setsockopt"))
+ goto err_out;
+
+ /* KTLS is enabled */
+
+ close(fd);
+ /* At this point, the cfd socket is at the CLOSE_WAIT state
+ * and still run TLS protocol. The test for
+ * BPF_TCP_CLOSE_WAIT should be run at this point.
+ */
+ ret = read(cfd, &buf, sizeof(buf));
+ ASSERT_EQ(ret, 0, "read");
+ close(cfd);
+
+ ASSERT_EQ(bss->nr_listen, 1, "nr_listen");
+ ASSERT_EQ(bss->nr_connect, 1, "nr_connect");
+ ASSERT_EQ(bss->nr_active, 1, "nr_active");
+ ASSERT_EQ(bss->nr_passive, 1, "nr_passive");
+ ASSERT_EQ(bss->nr_socket_post_create, 2, "nr_socket_post_create");
+ ASSERT_EQ(bss->nr_binddev, 2, "nr_bind");
+ ASSERT_EQ(bss->nr_fin_wait1, 1, "nr_fin_wait1");
+ return;
+
+err_out:
+ close(fd);
+ close(cfd);
+ close(sfd);
+}
+
+void test_setget_sockopt(void)
+{
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (cg_fd < 0)
+ return;
+
+ if (create_netns())
+ goto done;
+
+ skel = setget_sockopt__open();
+ if (!ASSERT_OK_PTR(skel, "open skel"))
+ goto done;
+
+ strcpy(skel->rodata->veth, "binddevtest1");
+ skel->rodata->veth_ifindex = if_nametoindex("binddevtest1");
+ if (!ASSERT_GT(skel->rodata->veth_ifindex, 0, "if_nametoindex"))
+ goto done;
+
+ if (!ASSERT_OK(setget_sockopt__load(skel), "load skel"))
+ goto done;
+
+ skel->links.skops_sockopt =
+ bpf_program__attach_cgroup(skel->progs.skops_sockopt, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.skops_sockopt, "attach cgroup"))
+ goto done;
+
+ skel->links.socket_post_create =
+ bpf_program__attach_cgroup(skel->progs.socket_post_create, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.socket_post_create, "attach_cgroup"))
+ goto done;
+
+ test_tcp(AF_INET6);
+ test_tcp(AF_INET);
+ test_udp(AF_INET6);
+ test_udp(AF_INET);
+ test_ktls(AF_INET6);
+ test_ktls(AF_INET);
+
+done:
+ setget_sockopt__destroy(skel);
+ close(cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index dfcbddcbe4d3..70b49da5ca0a 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -13,36 +13,37 @@ static void test_signal_pending_by_type(enum bpf_prog_type prog_type)
struct itimerval timeo = {
.it_value.tv_usec = 100000, /* 100ms */
};
- __u32 duration = 0, retval;
int prog_fd;
int err;
int i;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 0xffffffff,
+ );
for (i = 0; i < ARRAY_SIZE(prog); i++)
prog[i] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
prog[ARRAY_SIZE(prog) - 1] = BPF_EXIT_INSN();
- prog_fd = bpf_load_program(prog_type, prog, ARRAY_SIZE(prog),
+ prog_fd = bpf_test_load_program(prog_type, prog, ARRAY_SIZE(prog),
"GPL", 0, NULL, 0);
- CHECK(prog_fd < 0, "test-run", "errno %d\n", errno);
+ ASSERT_GE(prog_fd, 0, "test-run load");
err = sigaction(SIGALRM, &sigalrm_action, NULL);
- CHECK(err, "test-run-signal-sigaction", "errno %d\n", errno);
+ ASSERT_OK(err, "test-run-signal-sigaction");
err = setitimer(ITIMER_REAL, &timeo, NULL);
- CHECK(err, "test-run-signal-timer", "errno %d\n", errno);
+ ASSERT_OK(err, "test-run-signal-timer");
- err = bpf_prog_test_run(prog_fd, 0xffffffff, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(duration > 500000000, /* 500ms */
- "test-run-signal-duration",
- "duration %dns > 500ms\n",
- duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_LE(topts.duration, 500000000 /* 500ms */,
+ "test-run-signal-duration");
signal(SIGALRM, SIG_DFL);
}
-void test_signal_pending(enum bpf_prog_type prog_type)
+void test_signal_pending(void)
{
test_signal_pending_by_type(BPF_PROG_TYPE_SOCKET_FILTER);
test_signal_pending_by_type(BPF_PROG_TYPE_FLOW_DISSECTOR);
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index 47fa04adc147..1374b626a985 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -29,7 +29,23 @@ static int stop, duration;
static bool
configure_stack(void)
{
+ char tc_version[128];
char tc_cmd[BUFSIZ];
+ char *prog;
+ FILE *tc;
+
+ /* Check whether tc is built with libbpf. */
+ tc = popen("tc -V", "r");
+ if (CHECK_FAIL(!tc))
+ return false;
+ if (CHECK_FAIL(!fgets(tc_version, sizeof(tc_version), tc)))
+ return false;
+ if (strstr(tc_version, ", libbpf "))
+ prog = "test_sk_assign_libbpf.bpf.o";
+ else
+ prog = "test_sk_assign.bpf.o";
+ if (CHECK_FAIL(pclose(tc)))
+ return false;
/* Move to a new networking namespace */
if (CHECK_FAIL(unshare(CLONE_NEWNET)))
@@ -46,10 +62,10 @@ configure_stack(void)
/* Load qdisc, BPF program */
if (CHECK_FAIL(system("tc qdisc add dev lo clsact")))
return false;
- sprintf(tc_cmd, "%s %s %s %s", "tc filter add dev lo ingress bpf",
- "direct-action object-file ./test_sk_assign.o",
- "section classifier/sk_assign_test",
- (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "");
+ sprintf(tc_cmd, "%s %s %s %s %s", "tc filter add dev lo ingress bpf",
+ "direct-action object-file", prog,
+ "section tc",
+ (env.verbosity < VERBOSE_VERY) ? " 2>/dev/null" : "verbose");
if (CHECK(system(tc_cmd), "BPF load failed;",
"run with -vv for more info\n"))
return false;
@@ -129,15 +145,12 @@ get_port(int fd)
static ssize_t
rcv_msg(int srv_client, int type)
{
- struct sockaddr_storage ss;
char buf[BUFSIZ];
- socklen_t slen;
if (type == SOCK_STREAM)
return read(srv_client, &buf, sizeof(buf));
else
- return recvfrom(srv_client, &buf, sizeof(buf), 0,
- (struct sockaddr *)&ss, &slen);
+ return recvfrom(srv_client, &buf, sizeof(buf), 0, NULL, NULL);
}
static int
@@ -265,9 +278,10 @@ void test_sk_assign(void)
TEST("ipv6 udp port redir", AF_INET6, SOCK_DGRAM, false),
TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
};
- int server = -1;
+ __s64 server = -1;
int server_map;
int self_net;
+ int i;
self_net = open(NS_SELF, O_RDONLY);
if (CHECK_FAIL(self_net < 0)) {
@@ -286,7 +300,7 @@ void test_sk_assign(void)
goto cleanup;
}
- for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
+ for (i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
struct test_sk_cfg *test = &tests[i];
const struct sockaddr *addr;
const int zero = 0;
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
new file mode 100644
index 000000000000..597d0467a926
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_lookup.c
@@ -0,0 +1,1413 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+/*
+ * Test BPF attach point for INET socket lookup (BPF_SK_LOOKUP).
+ *
+ * Tests exercise:
+ * - attaching/detaching/querying programs to BPF_SK_LOOKUP hook,
+ * - redirecting socket lookup to a socket selected by BPF program,
+ * - failing a socket lookup on BPF program's request,
+ * - error scenarios for selecting a socket from BPF program,
+ * - accessing BPF program context,
+ * - attaching and running multiple BPF programs.
+ *
+ * Tests run in a dedicated network namespace.
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include <bpf/libbpf.h>
+#include <bpf/bpf.h>
+
+#include "test_progs.h"
+#include "bpf_util.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "testing_helpers.h"
+#include "test_sk_lookup.skel.h"
+
+/* External (address, port) pairs the client sends packets to. */
+#define EXT_IP4 "127.0.0.1"
+#define EXT_IP6 "fd00::1"
+#define EXT_PORT 7007
+
+/* Internal (address, port) pairs the server listens/receives at. */
+#define INT_IP4 "127.0.0.2"
+#define INT_IP4_V6 "::ffff:127.0.0.2"
+#define INT_IP6 "fd00::2"
+#define INT_PORT 8008
+
+#define IO_TIMEOUT_SEC 3
+
+enum server {
+ SERVER_A = 0,
+ SERVER_B = 1,
+ MAX_SERVERS,
+};
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+struct inet_addr {
+ const char *ip;
+ unsigned short port;
+};
+
+struct test {
+ const char *desc;
+ struct bpf_program *lookup_prog;
+ struct bpf_program *reuseport_prog;
+ struct bpf_map *sock_map;
+ int sotype;
+ struct inet_addr connect_to;
+ struct inet_addr listen_at;
+ enum server accept_on;
+ bool reuseport_has_conns; /* Add a connected socket to reuseport group */
+};
+
+static __u32 duration; /* for CHECK macro */
+
+static bool is_ipv6(const char *ip)
+{
+ return !!strchr(ip, ':');
+}
+
+static int attach_reuseport(int sock_fd, struct bpf_program *reuseport_prog)
+{
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(reuseport_prog);
+ if (prog_fd < 0) {
+ errno = -prog_fd;
+ return -1;
+ }
+
+ err = setsockopt(sock_fd, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF,
+ &prog_fd, sizeof(prog_fd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static socklen_t inetaddr_len(const struct sockaddr_storage *addr)
+{
+ return (addr->ss_family == AF_INET ? sizeof(struct sockaddr_in) :
+ addr->ss_family == AF_INET6 ? sizeof(struct sockaddr_in6) : 0);
+}
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ struct timeval timeo = { .tv_sec = IO_TIMEOUT_SEC };
+ int err, family, fd;
+
+ family = is_ipv6(ip) ? AF_INET6 : AF_INET;
+ err = make_sockaddr(family, ip, port, addr, NULL);
+ if (CHECK(err, "make_address", "failed\n"))
+ return -1;
+
+ fd = socket(addr->ss_family, sotype, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to make socket");
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_SNDTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_SNDTIMEO)", "failed\n")) {
+ log_err("failed to set SNDTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ err = setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo, sizeof(timeo));
+ if (CHECK(err, "setsockopt(SO_RCVTIMEO)", "failed\n")) {
+ log_err("failed to set RCVTIMEO");
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int make_server(int sotype, const char *ip, int port,
+ struct bpf_program *reuseport_prog)
+{
+ struct sockaddr_storage addr = {0};
+ const int one = 1;
+ int err, fd = -1;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ /* Enabled for UDPv6 sockets for IPv4-mapped IPv6 to work. */
+ if (sotype == SOCK_DGRAM) {
+ err = setsockopt(fd, SOL_IP, IP_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IP_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IP_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_DGRAM && addr.ss_family == AF_INET6) {
+ err = setsockopt(fd, SOL_IPV6, IPV6_RECVORIGDSTADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(IPV6_RECVORIGDSTADDR)", "failed\n")) {
+ log_err("failed to enable IPV6_RECVORIGDSTADDR");
+ goto fail;
+ }
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEADDR)", "failed\n")) {
+ log_err("failed to enable SO_REUSEADDR");
+ goto fail;
+ }
+ }
+
+ if (reuseport_prog) {
+ err = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &one,
+ sizeof(one));
+ if (CHECK(err, "setsockopt(SO_REUSEPORT)", "failed\n")) {
+ log_err("failed to enable SO_REUSEPORT");
+ goto fail;
+ }
+ }
+
+ err = bind(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "bind", "failed\n")) {
+ log_err("failed to bind listen socket");
+ goto fail;
+ }
+
+ if (sotype == SOCK_STREAM) {
+ err = listen(fd, SOMAXCONN);
+ if (CHECK(err, "make_server", "listen")) {
+ log_err("failed to listen on port %d", port);
+ goto fail;
+ }
+ }
+
+ /* Late attach reuseport prog so we can have one init path */
+ if (reuseport_prog) {
+ err = attach_reuseport(fd, reuseport_prog);
+ if (CHECK(err, "attach_reuseport", "failed\n")) {
+ log_err("failed to attach reuseport prog");
+ goto fail;
+ }
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_client(int sotype, const char *ip, int port)
+{
+ struct sockaddr_storage addr = {0};
+ int err, fd;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ err = connect(fd, (void *)&addr, inetaddr_len(&addr));
+ if (CHECK(err, "make_client", "connect")) {
+ log_err("failed to connect client socket");
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (CHECK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie, &cookie_len) < 0,
+ "getsockopt(SO_COOKIE)", "%s\n", strerror(errno)))
+ return 0;
+ return cookie;
+}
+
+static int fill_sk_lookup_ctx(struct bpf_sk_lookup *ctx, const char *local_ip, __u16 local_port,
+ const char *remote_ip, __u16 remote_port)
+{
+ void *local, *remote;
+ int err;
+
+ memset(ctx, 0, sizeof(*ctx));
+ ctx->local_port = local_port;
+ ctx->remote_port = htons(remote_port);
+
+ if (is_ipv6(local_ip)) {
+ ctx->family = AF_INET6;
+ local = &ctx->local_ip6[0];
+ remote = &ctx->remote_ip6[0];
+ } else {
+ ctx->family = AF_INET;
+ local = &ctx->local_ip4;
+ remote = &ctx->remote_ip4;
+ }
+
+ err = inet_pton(ctx->family, local_ip, local);
+ if (CHECK(err != 1, "inet_pton", "local_ip failed\n"))
+ return 1;
+
+ err = inet_pton(ctx->family, remote_ip, remote);
+ if (CHECK(err != 1, "inet_pton", "remote_ip failed\n"))
+ return 1;
+
+ return 0;
+}
+
+static int send_byte(int fd)
+{
+ ssize_t n;
+
+ errno = 0;
+ n = send(fd, "a", 1, 0);
+ if (CHECK(n <= 0, "send_byte", "send")) {
+ log_err("failed/partial send");
+ return -1;
+ }
+ return 0;
+}
+
+static int recv_byte(int fd)
+{
+ char buf[1];
+ ssize_t n;
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv_byte", "recv")) {
+ log_err("failed/partial recv");
+ return -1;
+ }
+ return 0;
+}
+
+static int tcp_recv_send(int server_fd)
+{
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ fd = accept(server_fd, NULL, NULL);
+ if (CHECK(fd < 0, "accept", "failed\n")) {
+ log_err("failed to accept");
+ return -1;
+ }
+
+ n = recv(fd, buf, sizeof(buf), 0);
+ if (CHECK(n <= 0, "recv", "failed\n")) {
+ log_err("failed/partial recv");
+ ret = -1;
+ goto close;
+ }
+
+ n = send(fd, buf, n, 0);
+ if (CHECK(n <= 0, "send", "failed\n")) {
+ log_err("failed/partial send");
+ ret = -1;
+ goto close;
+ }
+
+ ret = 0;
+close:
+ close(fd);
+ return ret;
+}
+
+static void v4_to_v6(struct sockaddr_storage *ss)
+{
+ struct sockaddr_in6 *v6 = (struct sockaddr_in6 *)ss;
+ struct sockaddr_in v4 = *(struct sockaddr_in *)ss;
+
+ v6->sin6_family = AF_INET6;
+ v6->sin6_port = v4.sin_port;
+ v6->sin6_addr.s6_addr[10] = 0xff;
+ v6->sin6_addr.s6_addr[11] = 0xff;
+ memcpy(&v6->sin6_addr.s6_addr[12], &v4.sin_addr.s_addr, 4);
+ memset(&v6->sin6_addr.s6_addr[0], 0, 10);
+}
+
+static int udp_recv_send(int server_fd)
+{
+ char cmsg_buf[CMSG_SPACE(sizeof(struct sockaddr_storage))];
+ struct sockaddr_storage _src_addr = { 0 };
+ struct sockaddr_storage *src_addr = &_src_addr;
+ struct sockaddr_storage *dst_addr = NULL;
+ struct msghdr msg = { 0 };
+ struct iovec iov = { 0 };
+ struct cmsghdr *cm;
+ char buf[1];
+ int ret, fd;
+ ssize_t n;
+
+ iov.iov_base = buf;
+ iov.iov_len = sizeof(buf);
+
+ msg.msg_name = src_addr;
+ msg.msg_namelen = sizeof(*src_addr);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cmsg_buf;
+ msg.msg_controllen = sizeof(cmsg_buf);
+
+ errno = 0;
+ n = recvmsg(server_fd, &msg, 0);
+ if (CHECK(n <= 0, "recvmsg", "failed\n")) {
+ log_err("failed to receive");
+ return -1;
+ }
+ if (CHECK(msg.msg_flags & MSG_CTRUNC, "recvmsg", "truncated cmsg\n"))
+ return -1;
+
+ for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm)) {
+ if ((cm->cmsg_level == SOL_IP &&
+ cm->cmsg_type == IP_ORIGDSTADDR) ||
+ (cm->cmsg_level == SOL_IPV6 &&
+ cm->cmsg_type == IPV6_ORIGDSTADDR)) {
+ dst_addr = (struct sockaddr_storage *)CMSG_DATA(cm);
+ break;
+ }
+ log_err("warning: ignored cmsg at level %d type %d",
+ cm->cmsg_level, cm->cmsg_type);
+ }
+ if (CHECK(!dst_addr, "recvmsg", "missing ORIGDSTADDR\n"))
+ return -1;
+
+ /* Server socket bound to IPv4-mapped IPv6 address */
+ if (src_addr->ss_family == AF_INET6 &&
+ dst_addr->ss_family == AF_INET) {
+ v4_to_v6(dst_addr);
+ }
+
+ /* Reply from original destination address. */
+ fd = socket(dst_addr->ss_family, SOCK_DGRAM, 0);
+ if (CHECK(fd < 0, "socket", "failed\n")) {
+ log_err("failed to create tx socket");
+ return -1;
+ }
+
+ ret = bind(fd, (struct sockaddr *)dst_addr, sizeof(*dst_addr));
+ if (CHECK(ret, "bind", "failed\n")) {
+ log_err("failed to bind tx socket");
+ goto out;
+ }
+
+ msg.msg_control = NULL;
+ msg.msg_controllen = 0;
+ n = sendmsg(fd, &msg, 0);
+ if (CHECK(n <= 0, "sendmsg", "failed\n")) {
+ log_err("failed to send echo reply");
+ ret = -1;
+ goto out;
+ }
+
+ ret = 0;
+out:
+ close(fd);
+ return ret;
+}
+
+static int tcp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = tcp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static int udp_echo_test(int client_fd, int server_fd)
+{
+ int err;
+
+ err = send_byte(client_fd);
+ if (err)
+ return -1;
+ err = udp_recv_send(server_fd);
+ if (err)
+ return -1;
+ err = recv_byte(client_fd);
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static struct bpf_link *attach_lookup_prog(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ int net_fd;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return NULL;
+ }
+
+ link = bpf_program__attach_netns(prog, net_fd);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_netns")) {
+ errno = -PTR_ERR(link);
+ log_err("failed to attach program '%s' to netns",
+ bpf_program__name(prog));
+ link = NULL;
+ }
+
+ close(net_fd);
+ return link;
+}
+
+static int update_lookup_map(struct bpf_map *map, int index, int sock_fd)
+{
+ int err, map_fd;
+ uint64_t value;
+
+ map_fd = bpf_map__fd(map);
+ if (CHECK(map_fd < 0, "bpf_map__fd", "failed\n")) {
+ errno = -map_fd;
+ log_err("failed to get map FD");
+ return -1;
+ }
+
+ value = (uint64_t)sock_fd;
+ err = bpf_map_update_elem(map_fd, &index, &value, BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n")) {
+ log_err("failed to update redir_map @ %d", index);
+ return -1;
+ }
+
+ return 0;
+}
+
+static void query_lookup_prog(struct test_sk_lookup *skel)
+{
+ struct bpf_link *link[3] = {};
+ struct bpf_link_info info;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 prog_id;
+ int net_fd;
+ int err;
+
+ net_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (CHECK(net_fd < 0, "open", "failed\n")) {
+ log_err("failed to open /proc/self/ns/net");
+ return;
+ }
+
+ link[0] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[0])
+ goto close;
+ link[1] = attach_lookup_prog(skel->progs.lookup_pass);
+ if (!link[1])
+ goto detach;
+ link[2] = attach_lookup_prog(skel->progs.lookup_drop);
+ if (!link[2])
+ goto detach;
+
+ err = bpf_prog_query(net_fd, BPF_SK_LOOKUP, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ if (CHECK(err, "bpf_prog_query", "failed\n")) {
+ log_err("failed to query lookup prog");
+ goto detach;
+ }
+
+ errno = 0;
+ if (CHECK(attach_flags != 0, "bpf_prog_query",
+ "wrong attach_flags on query: %u", attach_flags))
+ goto detach;
+ if (CHECK(prog_cnt != 3, "bpf_prog_query",
+ "wrong program count on query: %u", prog_cnt))
+ goto detach;
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[1], &info);
+ CHECK(prog_ids[1] != prog_id, "bpf_prog_query",
+ "invalid program #1 id on query: %u != %u\n",
+ prog_ids[1], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+ prog_id = link_info_prog_id(link[2], &info);
+ CHECK(prog_ids[2] != prog_id, "bpf_prog_query",
+ "invalid program #2 id on query: %u != %u\n",
+ prog_ids[2], prog_id);
+ CHECK(info.netns.netns_ino == 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+ err = bpf_link__detach(link[0]);
+ if (CHECK(err, "link_detach", "failed %d\n", err))
+ goto detach;
+
+ /* prog id is still there, but netns_ino is zeroed out */
+ prog_id = link_info_prog_id(link[0], &info);
+ CHECK(prog_ids[0] != prog_id, "bpf_prog_query",
+ "invalid program #0 id on query: %u != %u\n",
+ prog_ids[0], prog_id);
+ CHECK(info.netns.netns_ino != 0, "netns_ino",
+ "unexpected netns_ino: %u\n", info.netns.netns_ino);
+
+detach:
+ if (link[2])
+ bpf_link__destroy(link[2]);
+ if (link[1])
+ bpf_link__destroy(link[1]);
+ if (link[0])
+ bpf_link__destroy(link[0]);
+close:
+ close(net_fd);
+}
+
+static void run_lookup_prog(const struct test *t)
+{
+ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
+ int client_fd, reuse_conn_fd = -1;
+ struct bpf_link *lookup_link;
+ int i, err;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fds[i] < 0)
+ goto close;
+
+ err = update_lookup_map(t->sock_map, i, server_fds[i]);
+ if (err)
+ goto close;
+
+ /* want just one server for non-reuseport test */
+ if (!t->reuseport_prog)
+ break;
+ }
+
+ /* Regular UDP socket lookup with reuseport behaves
+ * differently when reuseport group contains connected
+ * sockets. Check that adding a connected UDP socket to the
+ * reuseport group does not affect how reuseport works with
+ * BPF socket lookup.
+ */
+ if (t->reuseport_has_conns) {
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+
+ /* Add an extra socket to reuseport group */
+ reuse_conn_fd = make_server(t->sotype, t->listen_at.ip,
+ t->listen_at.port,
+ t->reuseport_prog);
+ if (reuse_conn_fd < 0)
+ goto close;
+
+ /* Connect the extra socket to itself */
+ err = getsockname(reuse_conn_fd, (void *)&addr, &len);
+ if (CHECK(err, "getsockname", "errno %d\n", errno))
+ goto close;
+ err = connect(reuse_conn_fd, (void *)&addr, len);
+ if (CHECK(err, "connect", "errno %d\n", errno))
+ goto close;
+ }
+
+ client_fd = make_client(t->sotype, t->connect_to.ip, t->connect_to.port);
+ if (client_fd < 0)
+ goto close;
+
+ if (t->sotype == SOCK_STREAM)
+ tcp_echo_test(client_fd, server_fds[t->accept_on]);
+ else
+ udp_echo_test(client_fd, server_fds[t->accept_on]);
+
+ close(client_fd);
+close:
+ if (reuse_conn_fd != -1)
+ close(reuse_conn_fd);
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_redirect_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "TCP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "TCP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv4 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir addr",
+ .lookup_prog = skel->progs.redir_ip4,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 redir with reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv4 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv4 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ {
+ .desc = "UDP IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir addr",
+ .lookup_prog = skel->progs.redir_ip6,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4->IPv6 redir port",
+ .lookup_prog = skel->progs.redir_port,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .listen_at = { INT_IP4_V6, INT_PORT },
+ .connect_to = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ },
+ {
+ .desc = "UDP IPv6 redir and reuseport with conns",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_B,
+ .reuseport_has_conns = true,
+ },
+ {
+ .desc = "UDP IPv6 redir skip reuseport",
+ .lookup_prog = skel->progs.select_sock_a_no_reuseport,
+ .reuseport_prog = skel->progs.select_sock_b,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ .accept_on = SERVER_A,
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ run_lookup_prog(t);
+ }
+}
+
+static void drop_on_lookup(const struct test *t)
+{
+ struct sockaddr_storage dst = {};
+ int client_fd, server_fd, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server_fd = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server_fd < 0)
+ goto detach;
+
+ client_fd = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client_fd < 0)
+ goto close_srv;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client_fd);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client_fd, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client_fd);
+close_srv:
+ close(server_fd);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_lookup(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on lookup",
+ .lookup_prog = skel->progs.lookup_drop,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, INT_PORT },
+ },
+ /* The program will drop on success, meaning that the ifindex
+ * was 1.
+ */
+ {
+ .desc = "TCP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "UDP IPv6 drop on valid ifindex",
+ .lookup_prog = skel->progs.check_ifindex,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { EXT_IP6, EXT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_lookup(t);
+ }
+}
+
+static void drop_on_reuseport(const struct test *t)
+{
+ struct sockaddr_storage dst = { 0 };
+ int client, server1, server2, err;
+ struct bpf_link *lookup_link;
+ ssize_t n;
+
+ lookup_link = attach_lookup_prog(t->lookup_prog);
+ if (!lookup_link)
+ return;
+
+ server1 = make_server(t->sotype, t->listen_at.ip, t->listen_at.port,
+ t->reuseport_prog);
+ if (server1 < 0)
+ goto detach;
+
+ err = update_lookup_map(t->sock_map, SERVER_A, server1);
+ if (err)
+ goto detach;
+
+ /* second server on destination address we should never reach */
+ server2 = make_server(t->sotype, t->connect_to.ip, t->connect_to.port,
+ NULL /* reuseport prog */);
+ if (server2 < 0)
+ goto close_srv1;
+
+ client = make_socket(t->sotype, t->connect_to.ip,
+ t->connect_to.port, &dst);
+ if (client < 0)
+ goto close_srv2;
+
+ err = connect(client, (void *)&dst, inetaddr_len(&dst));
+ if (t->sotype == SOCK_DGRAM) {
+ err = send_byte(client);
+ if (err)
+ goto close_all;
+
+ /* Read out asynchronous error */
+ n = recv(client, NULL, 0, 0);
+ err = n == -1;
+ }
+ if (CHECK(!err || errno != ECONNREFUSED, "connect",
+ "unexpected success or error\n"))
+ log_err("expected ECONNREFUSED on connect");
+
+close_all:
+ close(client);
+close_srv2:
+ close(server2);
+close_srv1:
+ close(server1);
+detach:
+ bpf_link__destroy(lookup_link);
+}
+
+static void test_drop_on_reuseport(struct test_sk_lookup *skel)
+{
+ const struct test tests[] = {
+ {
+ .desc = "TCP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ {
+ .desc = "UDP IPv4 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_DGRAM,
+ .connect_to = { EXT_IP4, EXT_PORT },
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "TCP IPv6 drop on reuseport",
+ .lookup_prog = skel->progs.select_sock_a,
+ .reuseport_prog = skel->progs.reuseport_drop,
+ .sock_map = skel->maps.redir_map,
+ .sotype = SOCK_STREAM,
+ .connect_to = { EXT_IP6, EXT_PORT },
+ .listen_at = { INT_IP6, INT_PORT },
+ },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (test__start_subtest(t->desc))
+ drop_on_reuseport(t);
+ }
+}
+
+static void run_sk_assign(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog,
+ const char *remote_ip, const char *local_ip)
+{
+ int server_fds[] = { [0 ... MAX_SERVERS - 1] = -1 };
+ struct bpf_sk_lookup ctx;
+ __u64 server_cookie;
+ int i, err;
+
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ .ctx_out = &ctx,
+ .ctx_size_out = sizeof(ctx),
+ );
+
+ if (fill_sk_lookup_ctx(&ctx, local_ip, EXT_PORT, remote_ip, INT_PORT))
+ return;
+
+ ctx.protocol = IPPROTO_TCP;
+
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ server_fds[i] = make_server(SOCK_STREAM, local_ip, 0, NULL);
+ if (server_fds[i] < 0)
+ goto close_servers;
+
+ err = update_lookup_map(skel->maps.redir_map, i,
+ server_fds[i]);
+ if (err)
+ goto close_servers;
+ }
+
+ server_cookie = socket_cookie(server_fds[SERVER_B]);
+ if (!server_cookie)
+ return;
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(lookup_prog), &opts);
+ if (CHECK(err, "test_run", "failed with error %d\n", errno))
+ goto close_servers;
+
+ if (CHECK(ctx.cookie == 0, "ctx.cookie", "no socket selected\n"))
+ goto close_servers;
+
+ CHECK(ctx.cookie != server_cookie, "ctx.cookie",
+ "selected sk %llu instead of %llu\n", ctx.cookie, server_cookie);
+
+close_servers:
+ for (i = 0; i < ARRAY_SIZE(server_fds); i++) {
+ if (server_fds[i] != -1)
+ close(server_fds[i]);
+ }
+}
+
+static void run_sk_assign_v4(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP4, EXT_IP4);
+}
+
+static void run_sk_assign_v6(struct test_sk_lookup *skel,
+ struct bpf_program *lookup_prog)
+{
+ run_sk_assign(skel, lookup_prog, INT_IP6, EXT_IP6);
+}
+
+static void run_sk_assign_connected(struct test_sk_lookup *skel,
+ int sotype)
+{
+ int err, client_fd, connected_fd, server_fd;
+ struct bpf_link *lookup_link;
+
+ server_fd = make_server(sotype, EXT_IP4, EXT_PORT, NULL);
+ if (server_fd < 0)
+ return;
+
+ connected_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (connected_fd < 0)
+ goto out_close_server;
+
+ /* Put a connected socket in redirect map */
+ err = update_lookup_map(skel->maps.redir_map, SERVER_A, connected_fd);
+ if (err)
+ goto out_close_connected;
+
+ lookup_link = attach_lookup_prog(skel->progs.sk_assign_esocknosupport);
+ if (!lookup_link)
+ goto out_close_connected;
+
+ /* Try to redirect TCP SYN / UDP packet to a connected socket */
+ client_fd = make_client(sotype, EXT_IP4, EXT_PORT);
+ if (client_fd < 0)
+ goto out_unlink_prog;
+ if (sotype == SOCK_DGRAM) {
+ send_byte(client_fd);
+ recv_byte(server_fd);
+ }
+
+ close(client_fd);
+out_unlink_prog:
+ bpf_link__destroy(lookup_link);
+out_close_connected:
+ close(connected_fd);
+out_close_server:
+ close(server_fd);
+}
+
+static void test_sk_assign_helper(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("sk_assign returns EEXIST"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_eexist);
+ if (test__start_subtest("sk_assign honors F_REPLACE"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_replace_flag);
+ if (test__start_subtest("sk_assign accepts NULL socket"))
+ run_sk_assign_v4(skel, skel->progs.sk_assign_null);
+ if (test__start_subtest("access ctx->sk"))
+ run_sk_assign_v4(skel, skel->progs.access_ctx_sk);
+ if (test__start_subtest("narrow access to ctx v4"))
+ run_sk_assign_v4(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("narrow access to ctx v6"))
+ run_sk_assign_v6(skel, skel->progs.ctx_narrow_access);
+ if (test__start_subtest("sk_assign rejects TCP established"))
+ run_sk_assign_connected(skel, SOCK_STREAM);
+ if (test__start_subtest("sk_assign rejects UDP connected"))
+ run_sk_assign_connected(skel, SOCK_DGRAM);
+}
+
+struct test_multi_prog {
+ const char *desc;
+ struct bpf_program *prog1;
+ struct bpf_program *prog2;
+ struct bpf_map *redir_map;
+ struct bpf_map *run_map;
+ int expect_errno;
+ struct inet_addr listen_at;
+};
+
+static void run_multi_prog_lookup(const struct test_multi_prog *t)
+{
+ struct sockaddr_storage dst = {};
+ int map_fd, server_fd, client_fd;
+ struct bpf_link *link1, *link2;
+ int prog_idx, done, err;
+
+ map_fd = bpf_map__fd(t->run_map);
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+ prog_idx = PROG2;
+ err = bpf_map_update_elem(map_fd, &prog_idx, &done, BPF_ANY);
+ if (CHECK(err, "bpf_map_update_elem", "failed\n"))
+ return;
+
+ link1 = attach_lookup_prog(t->prog1);
+ if (!link1)
+ return;
+ link2 = attach_lookup_prog(t->prog2);
+ if (!link2)
+ goto out_unlink1;
+
+ server_fd = make_server(SOCK_STREAM, t->listen_at.ip,
+ t->listen_at.port, NULL);
+ if (server_fd < 0)
+ goto out_unlink2;
+
+ err = update_lookup_map(t->redir_map, SERVER_A, server_fd);
+ if (err)
+ goto out_close_server;
+
+ client_fd = make_socket(SOCK_STREAM, EXT_IP4, EXT_PORT, &dst);
+ if (client_fd < 0)
+ goto out_close_server;
+
+ err = connect(client_fd, (void *)&dst, inetaddr_len(&dst));
+ if (CHECK(err && !t->expect_errno, "connect",
+ "unexpected error %d\n", errno))
+ goto out_close_client;
+ if (CHECK(err && t->expect_errno && errno != t->expect_errno,
+ "connect", "unexpected error %d\n", errno))
+ goto out_close_client;
+
+ done = 0;
+ prog_idx = PROG1;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG1 !done\n");
+
+ done = 0;
+ prog_idx = PROG2;
+ err = bpf_map_lookup_elem(map_fd, &prog_idx, &done);
+ CHECK(err, "bpf_map_lookup_elem", "failed\n");
+ CHECK(!done, "bpf_map_lookup_elem", "PROG2 !done\n");
+
+out_close_client:
+ close(client_fd);
+out_close_server:
+ close(server_fd);
+out_unlink2:
+ bpf_link__destroy(link2);
+out_unlink1:
+ bpf_link__destroy(link1);
+}
+
+static void test_multi_prog_lookup(struct test_sk_lookup *skel)
+{
+ struct test_multi_prog tests[] = {
+ {
+ .desc = "multi prog - pass, pass",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, drop",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, drop",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - drop, pass",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { EXT_IP4, EXT_PORT },
+ .expect_errno = ECONNREFUSED,
+ },
+ {
+ .desc = "multi prog - pass, redir",
+ .prog1 = skel->progs.multi_prog_pass1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, pass",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_pass2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - drop, redir",
+ .prog1 = skel->progs.multi_prog_drop1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, drop",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_drop2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ {
+ .desc = "multi prog - redir, redir",
+ .prog1 = skel->progs.multi_prog_redir1,
+ .prog2 = skel->progs.multi_prog_redir2,
+ .listen_at = { INT_IP4, INT_PORT },
+ },
+ };
+ struct test_multi_prog *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ t->redir_map = skel->maps.redir_map;
+ t->run_map = skel->maps.run_map;
+ if (test__start_subtest(t->desc))
+ run_multi_prog_lookup(t);
+ }
+}
+
+static void run_tests(struct test_sk_lookup *skel)
+{
+ if (test__start_subtest("query lookup prog"))
+ query_lookup_prog(skel);
+ test_redirect_lookup(skel);
+ test_drop_on_lookup(skel);
+ test_drop_on_reuseport(skel);
+ test_sk_assign_helper(skel);
+ test_multi_prog_lookup(skel);
+}
+
+static int switch_netns(void)
+{
+ static const char * const setup_script[] = {
+ "ip -6 addr add dev lo " EXT_IP6 "/128",
+ "ip -6 addr add dev lo " INT_IP6 "/128",
+ "ip link set dev lo up",
+ NULL,
+ };
+ const char * const *cmd;
+ int err;
+
+ err = unshare(CLONE_NEWNET);
+ if (CHECK(err, "unshare", "failed\n")) {
+ log_err("unshare(CLONE_NEWNET)");
+ return -1;
+ }
+
+ for (cmd = setup_script; *cmd; cmd++) {
+ err = system(*cmd);
+ if (CHECK(err, "system", "failed\n")) {
+ log_err("system(%s)", *cmd);
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+void test_sk_lookup(void)
+{
+ struct test_sk_lookup *skel;
+ int err;
+
+ err = switch_netns();
+ if (err)
+ return;
+
+ skel = test_sk_lookup__open_and_load();
+ if (CHECK(!skel, "skel open_and_load", "failed\n"))
+ return;
+
+ run_tests(skel);
+
+ test_sk_lookup__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
new file mode 100644
index 000000000000..f35852d245e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_omem_uncharge.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include <test_progs.h>
+#include <bpf/libbpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include "sk_storage_omem_uncharge.skel.h"
+
+void test_sk_storage_omem_uncharge(void)
+{
+ struct sk_storage_omem_uncharge *skel;
+ int sk_fd = -1, map_fd, err, value;
+ socklen_t optlen;
+
+ skel = sk_storage_omem_uncharge__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel open_and_load"))
+ return;
+ map_fd = bpf_map__fd(skel->maps.sk_storage);
+
+ /* A standalone socket not binding to addr:port,
+ * so nentns is not needed.
+ */
+ sk_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sk_fd, 0, "socket"))
+ goto done;
+
+ optlen = sizeof(skel->bss->cookie);
+ err = getsockopt(sk_fd, SOL_SOCKET, SO_COOKIE, &skel->bss->cookie, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(SO_COOKIE)"))
+ goto done;
+
+ value = 0;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0)"))
+ goto done;
+
+ value = 0xdeadbeef;
+ err = bpf_map_update_elem(map_fd, &sk_fd, &value, 0);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(value=0xdeadbeef)"))
+ goto done;
+
+ err = sk_storage_omem_uncharge__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto done;
+
+ close(sk_fd);
+ sk_fd = -1;
+
+ ASSERT_EQ(skel->bss->cookie_found, 2, "cookie_found");
+ ASSERT_EQ(skel->bss->omem, 0, "omem");
+
+done:
+ sk_storage_omem_uncharge__destroy(skel);
+ if (sk_fd != -1)
+ close(sk_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
new file mode 100644
index 000000000000..547ae53cde74
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sk_storage_tracing.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_sk_storage_trace_itself.skel.h"
+#include "test_sk_storage_tracing.skel.h"
+
+#define LO_ADDR6 "::1"
+#define TEST_COMM "test_progs"
+
+struct sk_stg {
+ __u32 pid;
+ __u32 last_notclose_state;
+ char comm[16];
+};
+
+static struct test_sk_storage_tracing *skel;
+static __u32 duration;
+static pid_t my_pid;
+
+static int check_sk_stg(int sk_fd, __u32 expected_state)
+{
+ struct sk_stg sk_stg;
+ int err;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.sk_stg_map), &sk_fd,
+ &sk_stg);
+ if (!ASSERT_OK(err, "map_lookup(sk_stg_map)"))
+ return -1;
+
+ if (!ASSERT_EQ(sk_stg.last_notclose_state, expected_state,
+ "last_notclose_state"))
+ return -1;
+
+ if (!ASSERT_EQ(sk_stg.pid, my_pid, "pid"))
+ return -1;
+
+ if (!ASSERT_STREQ(sk_stg.comm, skel->bss->task_comm, "task_comm"))
+ return -1;
+
+ return 0;
+}
+
+static void do_test(void)
+{
+ int listen_fd = -1, passive_fd = -1, active_fd = -1, value = 1, err;
+ char abyte;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (CHECK(listen_fd == -1, "start_server",
+ "listen_fd:%d errno:%d\n", listen_fd, errno))
+ return;
+
+ active_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK(active_fd == -1, "connect_to_fd", "active_fd:%d errno:%d\n",
+ active_fd, errno))
+ goto out;
+
+ err = bpf_map_update_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+ &active_fd, &value, 0);
+ if (!ASSERT_OK(err, "map_update(del_sk_stg_map)"))
+ goto out;
+
+ passive_fd = accept(listen_fd, NULL, 0);
+ if (CHECK(passive_fd == -1, "accept", "passive_fd:%d errno:%d\n",
+ passive_fd, errno))
+ goto out;
+
+ shutdown(active_fd, SHUT_WR);
+ err = read(passive_fd, &abyte, 1);
+ if (!ASSERT_OK(err, "read(passive_fd)"))
+ goto out;
+
+ shutdown(passive_fd, SHUT_WR);
+ err = read(active_fd, &abyte, 1);
+ if (!ASSERT_OK(err, "read(active_fd)"))
+ goto out;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.del_sk_stg_map),
+ &active_fd, &value);
+ if (!ASSERT_ERR(err, "map_lookup(del_sk_stg_map)"))
+ goto out;
+
+ err = check_sk_stg(listen_fd, BPF_TCP_LISTEN);
+ if (!ASSERT_OK(err, "listen_fd sk_stg"))
+ goto out;
+
+ err = check_sk_stg(active_fd, BPF_TCP_FIN_WAIT2);
+ if (!ASSERT_OK(err, "active_fd sk_stg"))
+ goto out;
+
+ err = check_sk_stg(passive_fd, BPF_TCP_LAST_ACK);
+ ASSERT_OK(err, "passive_fd sk_stg");
+
+out:
+ if (active_fd != -1)
+ close(active_fd);
+ if (passive_fd != -1)
+ close(passive_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+}
+
+void serial_test_sk_storage_tracing(void)
+{
+ struct test_sk_storage_trace_itself *skel_itself;
+ int err;
+
+ my_pid = getpid();
+
+ skel_itself = test_sk_storage_trace_itself__open_and_load();
+
+ if (!ASSERT_NULL(skel_itself, "test_sk_storage_trace_itself")) {
+ test_sk_storage_trace_itself__destroy(skel_itself);
+ return;
+ }
+
+ skel = test_sk_storage_tracing__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sk_storage_tracing"))
+ return;
+
+ err = test_sk_storage_tracing__attach(skel);
+ if (!ASSERT_OK(err, "test_sk_storage_tracing__attach")) {
+ test_sk_storage_tracing__destroy(skel);
+ return;
+ }
+
+ do_test();
+
+ test_sk_storage_tracing__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 7021b92af313..33f950e2dae3 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -11,93 +11,81 @@ void test_skb_ctx(void)
.cb[3] = 4,
.cb[4] = 5,
.priority = 6,
+ .ingress_ifindex = 11,
+ .ifindex = 1,
.tstamp = 7,
.wire_len = 100,
.gso_segs = 8,
.mark = 9,
.gso_size = 10,
+ .hwtstamp = 11,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
- int i;
+ int err, prog_fd, i;
- err = bpf_prog_load("./test_skb_ctx.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_ctx.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
/* ctx_in != NULL, ctx_size_in == 0 */
tattr.ctx_size_in = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_in", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_in");
tattr.ctx_size_in = sizeof(skb);
/* ctx_out != NULL, ctx_size_out == 0 */
tattr.ctx_size_out = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "ctx_size_out", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "ctx_size_out");
tattr.ctx_size_out = sizeof(skb);
/* non-zero [len, tc_index] fields should be rejected*/
skb.len = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "len");
skb.len = 0;
skb.tc_index = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "tc_index", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "tc_index");
skb.tc_index = 0;
/* non-zero [hash, sk] fields should be rejected */
skb.hash = 1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "hash", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "hash");
skb.hash = 0;
skb.sk = (struct bpf_sock *)1;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err == 0, "sk", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_NEQ(err, 0, "sk");
skb.sk = 0;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err != 0 || tattr.retval,
- "run",
- "err %d errno %d retval %d\n",
- err, errno, tattr.retval);
-
- CHECK_ATTR(tattr.ctx_size_out != sizeof(skb),
- "ctx_size_out",
- "incorrect output size, want %lu have %u\n",
- sizeof(skb), tattr.ctx_size_out);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(tattr.retval, "test_run retval");
+ ASSERT_EQ(tattr.ctx_size_out, sizeof(skb), "ctx_size_out");
for (i = 0; i < 5; i++)
- CHECK_ATTR(skb.cb[i] != i + 2,
- "ctx_out_cb",
- "skb->cb[i] == %d, expected %d\n",
- skb.cb[i], i + 2);
- CHECK_ATTR(skb.priority != 7,
- "ctx_out_priority",
- "skb->priority == %d, expected %d\n",
- skb.priority, 7);
- CHECK_ATTR(skb.tstamp != 8,
- "ctx_out_tstamp",
- "skb->tstamp == %lld, expected %d\n",
- skb.tstamp, 8);
- CHECK_ATTR(skb.mark != 10,
- "ctx_out_mark",
- "skb->mark == %u, expected %d\n",
- skb.mark, 10);
+ ASSERT_EQ(skb.cb[i], i + 2, "ctx_out_cb");
+ ASSERT_EQ(skb.priority, 7, "ctx_out_priority");
+ ASSERT_EQ(skb.ifindex, 1, "ctx_out_ifindex");
+ ASSERT_EQ(skb.ingress_ifindex, 11, "ctx_out_ingress_ifindex");
+ ASSERT_EQ(skb.tstamp, 8, "ctx_out_tstamp");
+ ASSERT_EQ(skb.mark, 10, "ctx_out_mark");
+
+ bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
index f302ad84a298..f7ee25f290f7 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -9,22 +9,22 @@ void test_skb_helpers(void)
.gso_segs = 8,
.gso_size = 10,
};
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
.data_in = &pkt_v4,
.data_size_in = sizeof(pkt_v4),
.ctx_in = &skb,
.ctx_size_in = sizeof(skb),
.ctx_out = &skb,
.ctx_size_out = sizeof(skb),
- };
+ );
struct bpf_object *obj;
- int err;
+ int err, prog_fd;
- err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load("./test_skb_helpers.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "load"))
return;
- err = bpf_prog_test_run_xattr(&tattr);
- CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
new file mode 100644
index 000000000000..d7f83c0a40a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_load_bytes.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_load_bytes.skel.h"
+
+void test_skb_load_bytes(void)
+{
+ struct skb_load_bytes *skel;
+ int err, prog_fd, test_result;
+ struct __sk_buff skb = { 0 };
+
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+
+ skel = skb_load_bytes__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.skb_process);
+ if (!ASSERT_GE(prog_fd, 0, "prog_fd"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)(-1);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, -EFAULT, "offset -1"))
+ goto out;
+
+ skel->bss->load_offset = (uint32_t)10;
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto out;
+ test_result = skel->bss->test_result;
+ if (!ASSERT_EQ(test_result, 0, "offset 10"))
+ goto out;
+
+out:
+ skb_load_bytes__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
new file mode 100644
index 000000000000..3eefdfed1db9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skc_to_unix_sock.c
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include <test_progs.h>
+#include <sys/un.h>
+#include "test_skc_to_unix_sock.skel.h"
+
+static const char *sock_path = "@skc_to_unix_sock";
+
+void test_skc_to_unix_sock(void)
+{
+ struct test_skc_to_unix_sock *skel;
+ struct sockaddr_un sockaddr;
+ int err, sockfd = 0;
+
+ skel = test_skc_to_unix_sock__open();
+ if (!ASSERT_OK_PTR(skel, "could not open BPF object"))
+ return;
+
+ skel->rodata->my_pid = getpid();
+
+ err = test_skc_to_unix_sock__load(skel);
+ if (!ASSERT_OK(err, "could not load BPF object"))
+ goto cleanup;
+
+ err = test_skc_to_unix_sock__attach(skel);
+ if (!ASSERT_OK(err, "could not attach BPF object"))
+ goto cleanup;
+
+ /* trigger unix_listen */
+ sockfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ if (!ASSERT_GT(sockfd, 0, "socket failed"))
+ goto cleanup;
+
+ memset(&sockaddr, 0, sizeof(sockaddr));
+ sockaddr.sun_family = AF_UNIX;
+ strncpy(sockaddr.sun_path, sock_path, strlen(sock_path));
+ sockaddr.sun_path[0] = '\0';
+
+ err = bind(sockfd, (struct sockaddr *)&sockaddr, sizeof(sockaddr));
+ if (!ASSERT_OK(err, "bind failed"))
+ goto cleanup;
+
+ err = listen(sockfd, 1);
+ if (!ASSERT_OK(err, "listen failed"))
+ goto cleanup;
+
+ ASSERT_EQ(strcmp(skel->bss->path, sock_path), 0, "bpf_skc_to_unix_sock failed");
+
+cleanup:
+ if (sockfd)
+ close(sockfd);
+ test_skc_to_unix_sock__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c
index fa153cf67b1b..bc6817aee9aa 100644
--- a/tools/testing/selftests/bpf/prog_tests/skeleton.c
+++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c
@@ -2,6 +2,7 @@
/* Copyright (c) 2019 Facebook */
#include <test_progs.h>
+#include <sys/mman.h>
struct s {
int a;
@@ -16,8 +17,14 @@ void test_skeleton(void)
struct test_skeleton* skel;
struct test_skeleton__bss *bss;
struct test_skeleton__data *data;
+ struct test_skeleton__data_dyn *data_dyn;
struct test_skeleton__rodata *rodata;
+ struct test_skeleton__rodata_dyn *rodata_dyn;
struct test_skeleton__kconfig *kcfg;
+ const void *elf_bytes;
+ size_t elf_bytes_sz = 0;
+ void *m;
+ int i, fd;
skel = test_skeleton__open();
if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
@@ -28,7 +35,12 @@ void test_skeleton(void)
bss = skel->bss;
data = skel->data;
+ data_dyn = skel->data_dyn;
rodata = skel->rodata;
+ rodata_dyn = skel->rodata_dyn;
+
+ ASSERT_STREQ(bpf_map__name(skel->maps.rodata_dyn), ".rodata.dyn", "rodata_dyn_name");
+ ASSERT_STREQ(bpf_map__name(skel->maps.data_dyn), ".data.dyn", "data_dyn_name");
/* validate values are pre-initialized correctly */
CHECK(data->in1 != -1, "in1", "got %d != exp %d\n", data->in1, -1);
@@ -41,15 +53,25 @@ void test_skeleton(void)
CHECK(bss->in4 != 0, "in4", "got %lld != exp %lld\n", bss->in4, 0LL);
CHECK(bss->out4 != 0, "out4", "got %lld != exp %lld\n", bss->out4, 0LL);
- CHECK(rodata->in6 != 0, "in6", "got %d != exp %d\n", rodata->in6, 0);
+ CHECK(rodata->in.in6 != 0, "in6", "got %d != exp %d\n", rodata->in.in6, 0);
CHECK(bss->out6 != 0, "out6", "got %d != exp %d\n", bss->out6, 0);
+ ASSERT_EQ(rodata_dyn->in_dynarr_sz, 0, "in_dynarr_sz");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(rodata_dyn->in_dynarr[i], -(i + 1), "in_dynarr");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(data_dyn->out_dynarr[i], i + 1, "out_dynarr");
+
/* validate we can pre-setup global variables, even in .bss */
data->in1 = 10;
data->in2 = 11;
bss->in3 = 12;
bss->in4 = 13;
- rodata->in6 = 14;
+ rodata->in.in6 = 14;
+
+ rodata_dyn->in_dynarr_sz = 4;
+ for (i = 0; i < 4; i++)
+ rodata_dyn->in_dynarr[i] = i + 10;
err = test_skeleton__load(skel);
if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
@@ -60,7 +82,11 @@ void test_skeleton(void)
CHECK(data->in2 != 11, "in2", "got %lld != exp %lld\n", data->in2, 11LL);
CHECK(bss->in3 != 12, "in3", "got %d != exp %d\n", bss->in3, 12);
CHECK(bss->in4 != 13, "in4", "got %lld != exp %lld\n", bss->in4, 13LL);
- CHECK(rodata->in6 != 14, "in6", "got %d != exp %d\n", rodata->in6, 14);
+ CHECK(rodata->in.in6 != 14, "in6", "got %d != exp %d\n", rodata->in.in6, 14);
+
+ ASSERT_EQ(rodata_dyn->in_dynarr_sz, 4, "in_dynarr_sz");
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(rodata_dyn->in_dynarr[i], i + 10, "in_dynarr");
/* now set new values and attach to get them into outX variables */
data->in1 = 1;
@@ -71,6 +97,8 @@ void test_skeleton(void)
bss->in5.b = 6;
kcfg = skel->kconfig;
+ skel->data_read_mostly->read_mostly_var = 123;
+
err = test_skeleton__attach(skel);
if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
goto cleanup;
@@ -82,10 +110,8 @@ void test_skeleton(void)
CHECK(data->out2 != 2, "res2", "got %lld != exp %d\n", data->out2, 2);
CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3);
CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4);
- CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n",
- bss->handler_out5.a, 5);
- CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n",
- bss->handler_out5.b, 6);
+ CHECK(bss->out5.a != 5, "res5", "got %d != exp %d\n", bss->out5.a, 5);
+ CHECK(bss->out5.b != 6, "res6", "got %lld != exp %d\n", bss->out5.b, 6);
CHECK(bss->out6 != 14, "res7", "got %d != exp %d\n", bss->out6, 14);
CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1",
@@ -93,6 +119,24 @@ void test_skeleton(void)
CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2",
"got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION);
+ for (i = 0; i < 4; i++)
+ ASSERT_EQ(data_dyn->out_dynarr[i], i + 10, "out_dynarr");
+
+ ASSERT_EQ(skel->bss->out_mostly_var, 123, "out_mostly_var");
+
+ ASSERT_EQ(bss->huge_arr[ARRAY_SIZE(bss->huge_arr) - 1], 123, "huge_arr");
+
+ fd = bpf_map__fd(skel->maps.data_non_mmapable);
+ m = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED, fd, 0);
+ if (!ASSERT_EQ(m, MAP_FAILED, "unexpected_mmap_success"))
+ munmap(m, getpagesize());
+
+ ASSERT_EQ(bpf_map__map_flags(skel->maps.data_non_mmapable), 0, "non_mmap_flags");
+
+ elf_bytes = test_skeleton__elf_bytes(&elf_bytes_sz);
+ ASSERT_OK_PTR(elf_bytes, "elf_bytes");
+ ASSERT_GE(elf_bytes_sz, 0, "elf_bytes_sz");
+
cleanup:
test_skeleton__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf.c b/tools/testing/selftests/bpf/prog_tests/snprintf.c
new file mode 100644
index 000000000000..4be6fdb78c6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <test_progs.h>
+#include "test_snprintf.skel.h"
+#include "test_snprintf_single.skel.h"
+
+#define EXP_NUM_OUT "-8 9 96 -424242 1337 DABBAD00"
+#define EXP_NUM_RET sizeof(EXP_NUM_OUT)
+
+#define EXP_IP_OUT "127.000.000.001 0000:0000:0000:0000:0000:0000:0000:0001"
+#define EXP_IP_RET sizeof(EXP_IP_OUT)
+
+/* The third specifier, %pB, depends on compiler inlining so don't check it */
+#define EXP_SYM_OUT "schedule schedule+0x0/"
+#define MIN_SYM_RET sizeof(EXP_SYM_OUT)
+
+/* The third specifier, %p, is a hashed pointer which changes on every reboot */
+#define EXP_ADDR_OUT "0000000000000000 ffff00000add4e55 "
+#define EXP_ADDR_RET sizeof(EXP_ADDR_OUT "unknownhashedptr")
+
+#define EXP_STR_OUT "str1 a b c d e longstr"
+#define EXP_STR_RET sizeof(EXP_STR_OUT)
+
+#define EXP_OVER_OUT "%over"
+#define EXP_OVER_RET 10
+
+#define EXP_PAD_OUT " 4 000"
+#define EXP_PAD_RET 900007
+
+#define EXP_NO_ARG_OUT "simple case"
+#define EXP_NO_ARG_RET 12
+
+#define EXP_NO_BUF_RET 29
+
+static void test_snprintf_positive(void)
+{
+ char exp_addr_out[] = EXP_ADDR_OUT;
+ char exp_sym_out[] = EXP_SYM_OUT;
+ struct test_snprintf *skel;
+
+ skel = test_snprintf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->pid = getpid();
+
+ if (!ASSERT_OK(test_snprintf__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ ASSERT_STREQ(skel->bss->num_out, EXP_NUM_OUT, "num_out");
+ ASSERT_EQ(skel->bss->num_ret, EXP_NUM_RET, "num_ret");
+
+ ASSERT_STREQ(skel->bss->ip_out, EXP_IP_OUT, "ip_out");
+ ASSERT_EQ(skel->bss->ip_ret, EXP_IP_RET, "ip_ret");
+
+ ASSERT_OK(memcmp(skel->bss->sym_out, exp_sym_out,
+ sizeof(exp_sym_out) - 1), "sym_out");
+ ASSERT_LT(MIN_SYM_RET, skel->bss->sym_ret, "sym_ret");
+
+ ASSERT_OK(memcmp(skel->bss->addr_out, exp_addr_out,
+ sizeof(exp_addr_out) - 1), "addr_out");
+ ASSERT_EQ(skel->bss->addr_ret, EXP_ADDR_RET, "addr_ret");
+
+ ASSERT_STREQ(skel->bss->str_out, EXP_STR_OUT, "str_out");
+ ASSERT_EQ(skel->bss->str_ret, EXP_STR_RET, "str_ret");
+
+ ASSERT_STREQ(skel->bss->over_out, EXP_OVER_OUT, "over_out");
+ ASSERT_EQ(skel->bss->over_ret, EXP_OVER_RET, "over_ret");
+
+ ASSERT_STREQ(skel->bss->pad_out, EXP_PAD_OUT, "pad_out");
+ ASSERT_EQ(skel->bss->pad_ret, EXP_PAD_RET, "pad_ret");
+
+ ASSERT_STREQ(skel->bss->noarg_out, EXP_NO_ARG_OUT, "no_arg_out");
+ ASSERT_EQ(skel->bss->noarg_ret, EXP_NO_ARG_RET, "no_arg_ret");
+
+ ASSERT_EQ(skel->bss->nobuf_ret, EXP_NO_BUF_RET, "no_buf_ret");
+
+cleanup:
+ test_snprintf__destroy(skel);
+}
+
+/* Loads an eBPF object calling bpf_snprintf with up to 10 characters of fmt */
+static int load_single_snprintf(char *fmt)
+{
+ struct test_snprintf_single *skel;
+ int ret;
+
+ skel = test_snprintf_single__open();
+ if (!skel)
+ return -EINVAL;
+
+ memcpy(skel->rodata->fmt, fmt, MIN(strlen(fmt) + 1, 10));
+
+ ret = test_snprintf_single__load(skel);
+ test_snprintf_single__destroy(skel);
+
+ return ret;
+}
+
+static void test_snprintf_negative(void)
+{
+ ASSERT_OK(load_single_snprintf("valid %d"), "valid usage");
+
+ ASSERT_ERR(load_single_snprintf("0123456789"), "no terminating zero");
+ ASSERT_ERR(load_single_snprintf("%d %d"), "too many specifiers");
+ ASSERT_ERR(load_single_snprintf("%pi5"), "invalid specifier 1");
+ ASSERT_ERR(load_single_snprintf("%a"), "invalid specifier 2");
+ ASSERT_ERR(load_single_snprintf("%"), "invalid specifier 3");
+ ASSERT_ERR(load_single_snprintf("%12345678"), "invalid specifier 4");
+ ASSERT_ERR(load_single_snprintf("%--------"), "invalid specifier 5");
+ ASSERT_ERR(load_single_snprintf("%lc"), "invalid specifier 6");
+ ASSERT_ERR(load_single_snprintf("%llc"), "invalid specifier 7");
+ ASSERT_ERR(load_single_snprintf("\x80"), "non ascii character");
+ ASSERT_ERR(load_single_snprintf("\x1"), "non printable character");
+}
+
+void test_snprintf(void)
+{
+ if (test__start_subtest("snprintf_positive"))
+ test_snprintf_positive();
+ if (test__start_subtest("snprintf_negative"))
+ test_snprintf_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
new file mode 100644
index 000000000000..dd41b826be30
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/snprintf_btf.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <linux/btf.h>
+#include "netif_receive_skb.skel.h"
+
+/* Demonstrate that bpf_snprintf_btf succeeds and that various data types
+ * are formatted correctly.
+ */
+void serial_test_snprintf_btf(void)
+{
+ struct netif_receive_skb *skel;
+ struct netif_receive_skb__bss *bss;
+ int err, duration = 0;
+
+ skel = netif_receive_skb__open();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ err = netif_receive_skb__load(skel);
+ if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = netif_receive_skb__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ /* generate receive event */
+ err = system("ping -c 1 127.0.0.1 > /dev/null");
+ if (CHECK(err, "system", "ping failed: %d\n", err))
+ goto cleanup;
+
+ if (bss->skip) {
+ printf("%s:SKIP:no __builtin_btf_type_id\n", __func__);
+ test__skip();
+ goto cleanup;
+ }
+
+ /*
+ * Make sure netif_receive_skb program was triggered
+ * and it set expected return values from bpf_trace_printk()s
+ * and all tests ran.
+ */
+ if (!ASSERT_GT(bss->ret, 0, "bpf_snprintf_ret"))
+ goto cleanup;
+
+ if (CHECK(bss->ran_subtests == 0, "check if subtests ran",
+ "no subtests ran, did BPF program run?"))
+ goto cleanup;
+
+ if (CHECK(bss->num_subtests != bss->ran_subtests,
+ "check all subtests ran",
+ "only ran %d of %d tests\n", bss->num_subtests,
+ bss->ran_subtests))
+ goto cleanup;
+
+cleanup:
+ netif_receive_skb__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_addr.c b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
new file mode 100644
index 000000000000..5fd617718991
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_addr.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/un.h>
+
+#include "test_progs.h"
+
+#include "connect_unix_prog.skel.h"
+#include "sendmsg_unix_prog.skel.h"
+#include "recvmsg_unix_prog.skel.h"
+#include "getsockname_unix_prog.skel.h"
+#include "getpeername_unix_prog.skel.h"
+#include "network_helpers.h"
+
+#define SERVUN_ADDRESS "bpf_cgroup_unix_test"
+#define SERVUN_REWRITE_ADDRESS "bpf_cgroup_unix_test_rewrite"
+#define SRCUN_ADDRESS "bpf_cgroup_unix_test_src"
+
+enum sock_addr_test_type {
+ SOCK_ADDR_TEST_BIND,
+ SOCK_ADDR_TEST_CONNECT,
+ SOCK_ADDR_TEST_SENDMSG,
+ SOCK_ADDR_TEST_RECVMSG,
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ SOCK_ADDR_TEST_GETPEERNAME,
+};
+
+typedef void *(*load_fn)(int cgroup_fd);
+typedef void (*destroy_fn)(void *skel);
+
+struct sock_addr_test {
+ enum sock_addr_test_type type;
+ const char *name;
+ /* BPF prog properties */
+ load_fn loadfn;
+ destroy_fn destroyfn;
+ /* Socket properties */
+ int socket_family;
+ int socket_type;
+ /* IP:port pairs for BPF prog to override */
+ const char *requested_addr;
+ unsigned short requested_port;
+ const char *expected_addr;
+ unsigned short expected_port;
+ const char *expected_src_addr;
+};
+
+static void *connect_unix_prog_load(int cgroup_fd)
+{
+ struct connect_unix_prog *skel;
+
+ skel = connect_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.connect_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.connect_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.connect_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ connect_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void connect_unix_prog_destroy(void *skel)
+{
+ connect_unix_prog__destroy(skel);
+}
+
+static void *sendmsg_unix_prog_load(int cgroup_fd)
+{
+ struct sendmsg_unix_prog *skel;
+
+ skel = sendmsg_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.sendmsg_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.sendmsg_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sendmsg_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ sendmsg_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void sendmsg_unix_prog_destroy(void *skel)
+{
+ sendmsg_unix_prog__destroy(skel);
+}
+
+static void *recvmsg_unix_prog_load(int cgroup_fd)
+{
+ struct recvmsg_unix_prog *skel;
+
+ skel = recvmsg_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.recvmsg_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.recvmsg_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.recvmsg_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ recvmsg_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void recvmsg_unix_prog_destroy(void *skel)
+{
+ recvmsg_unix_prog__destroy(skel);
+}
+
+static void *getsockname_unix_prog_load(int cgroup_fd)
+{
+ struct getsockname_unix_prog *skel;
+
+ skel = getsockname_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.getsockname_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.getsockname_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.getsockname_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ getsockname_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void getsockname_unix_prog_destroy(void *skel)
+{
+ getsockname_unix_prog__destroy(skel);
+}
+
+static void *getpeername_unix_prog_load(int cgroup_fd)
+{
+ struct getpeername_unix_prog *skel;
+
+ skel = getpeername_unix_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ skel->links.getpeername_unix_prog = bpf_program__attach_cgroup(
+ skel->progs.getpeername_unix_prog, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.getpeername_unix_prog, "prog_attach"))
+ goto cleanup;
+
+ return skel;
+cleanup:
+ getpeername_unix_prog__destroy(skel);
+ return NULL;
+}
+
+static void getpeername_unix_prog_destroy(void *skel)
+{
+ getpeername_unix_prog__destroy(skel);
+}
+
+static struct sock_addr_test tests[] = {
+ {
+ SOCK_ADDR_TEST_CONNECT,
+ "connect_unix",
+ connect_unix_prog_load,
+ connect_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_SENDMSG,
+ "sendmsg_unix",
+ sendmsg_unix_prog_load,
+ sendmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix-dgram",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_DGRAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ },
+ {
+ SOCK_ADDR_TEST_RECVMSG,
+ "recvmsg_unix-stream",
+ recvmsg_unix_prog_load,
+ recvmsg_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ SERVUN_ADDRESS,
+ },
+ {
+ SOCK_ADDR_TEST_GETSOCKNAME,
+ "getsockname_unix",
+ getsockname_unix_prog_load,
+ getsockname_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+ {
+ SOCK_ADDR_TEST_GETPEERNAME,
+ "getpeername_unix",
+ getpeername_unix_prog_load,
+ getpeername_unix_prog_destroy,
+ AF_UNIX,
+ SOCK_STREAM,
+ SERVUN_ADDRESS,
+ 0,
+ SERVUN_REWRITE_ADDRESS,
+ 0,
+ NULL,
+ },
+};
+
+typedef int (*info_fn)(int, struct sockaddr *, socklen_t *);
+
+static int cmp_addr(const struct sockaddr_storage *addr1, socklen_t addr1_len,
+ const struct sockaddr_storage *addr2, socklen_t addr2_len,
+ bool cmp_port)
+{
+ const struct sockaddr_in *four1, *four2;
+ const struct sockaddr_in6 *six1, *six2;
+ const struct sockaddr_un *un1, *un2;
+
+ if (addr1->ss_family != addr2->ss_family)
+ return -1;
+
+ if (addr1_len != addr2_len)
+ return -1;
+
+ if (addr1->ss_family == AF_INET) {
+ four1 = (const struct sockaddr_in *)addr1;
+ four2 = (const struct sockaddr_in *)addr2;
+ return !((four1->sin_port == four2->sin_port || !cmp_port) &&
+ four1->sin_addr.s_addr == four2->sin_addr.s_addr);
+ } else if (addr1->ss_family == AF_INET6) {
+ six1 = (const struct sockaddr_in6 *)addr1;
+ six2 = (const struct sockaddr_in6 *)addr2;
+ return !((six1->sin6_port == six2->sin6_port || !cmp_port) &&
+ !memcmp(&six1->sin6_addr, &six2->sin6_addr,
+ sizeof(struct in6_addr)));
+ } else if (addr1->ss_family == AF_UNIX) {
+ un1 = (const struct sockaddr_un *)addr1;
+ un2 = (const struct sockaddr_un *)addr2;
+ return memcmp(un1, un2, addr1_len);
+ }
+
+ return -1;
+}
+
+static int cmp_sock_addr(info_fn fn, int sock1,
+ const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ struct sockaddr_storage addr1;
+ socklen_t len1 = sizeof(addr1);
+
+ memset(&addr1, 0, len1);
+ if (fn(sock1, (struct sockaddr *)&addr1, (socklen_t *)&len1) != 0)
+ return -1;
+
+ return cmp_addr(&addr1, len1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_local_addr(int sock1, const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ return cmp_sock_addr(getsockname, sock1, addr2, addr2_len, cmp_port);
+}
+
+static int cmp_peer_addr(int sock1, const struct sockaddr_storage *addr2,
+ socklen_t addr2_len, bool cmp_port)
+{
+ return cmp_sock_addr(getpeername, sock1, addr2, addr2_len, cmp_port);
+}
+
+static void test_bind(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+ /* Try to connect to server just in case */
+ client = connect_to_addr(&expected_addr, expected_addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_connect(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr, expected_src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage),
+ expected_src_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = connect_to_addr(&addr, addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &expected_src_addr, &expected_src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+ }
+
+ err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+ if (test->expected_src_addr) {
+ err = cmp_local_addr(client, &expected_src_addr, expected_src_addr_len, false);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+ }
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_xmsg(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, src_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ src_addr_len = sizeof(struct sockaddr_storage);
+ struct msghdr hdr;
+ struct iovec iov;
+ char data = 'a';
+ int serv = -1, client = -1, err;
+
+ /* Unlike the other tests, here we test that we can rewrite the src addr
+ * with a recvmsg() hook.
+ */
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->expected_addr, test->expected_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ client = socket(test->socket_family, test->socket_type, 0);
+ if (!ASSERT_GE(client, 0, "socket"))
+ goto cleanup;
+
+ /* AF_UNIX sockets have to be bound to something to trigger the recvmsg bpf program. */
+ if (test->socket_family == AF_UNIX) {
+ err = make_sockaddr(AF_UNIX, SRCUN_ADDRESS, 0, &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = bind(client, (const struct sockaddr *) &src_addr, src_addr_len);
+ if (!ASSERT_OK(err, "bind"))
+ goto cleanup;
+ }
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ if (test->socket_type == SOCK_DGRAM) {
+ memset(&iov, 0, sizeof(iov));
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_name = (void *)&addr;
+ hdr.msg_namelen = addr_len;
+ hdr.msg_iov = &iov;
+ hdr.msg_iovlen = 1;
+
+ err = sendmsg(client, &hdr, 0);
+ if (!ASSERT_EQ(err, sizeof(data), "sendmsg"))
+ goto cleanup;
+ } else {
+ /* Testing with connection-oriented sockets is only valid for
+ * recvmsg() tests.
+ */
+ if (!ASSERT_EQ(test->type, SOCK_ADDR_TEST_RECVMSG, "recvmsg"))
+ goto cleanup;
+
+ err = connect(client, (const struct sockaddr *)&addr, addr_len);
+ if (!ASSERT_OK(err, "connect"))
+ goto cleanup;
+
+ err = send(client, &data, sizeof(data), 0);
+ if (!ASSERT_EQ(err, sizeof(data), "send"))
+ goto cleanup;
+
+ err = listen(serv, 0);
+ if (!ASSERT_OK(err, "listen"))
+ goto cleanup;
+
+ err = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(err, 0, "accept"))
+ goto cleanup;
+
+ close(serv);
+ serv = err;
+ }
+
+ addr_len = src_addr_len = sizeof(struct sockaddr_storage);
+
+ err = recvfrom(serv, &data, sizeof(data), 0, (struct sockaddr *) &src_addr, &src_addr_len);
+ if (!ASSERT_EQ(err, sizeof(data), "recvfrom"))
+ goto cleanup;
+
+ ASSERT_EQ(data, 'a', "data mismatch");
+
+ if (test->expected_src_addr) {
+ err = make_sockaddr(test->socket_family, test->expected_src_addr, 0,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_addr(&src_addr, src_addr_len, &addr, addr_len, false);
+ if (!ASSERT_EQ(err, 0, "cmp_addr"))
+ goto cleanup;
+ }
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_getsockname(struct sock_addr_test *test)
+{
+ struct sockaddr_storage expected_addr;
+ socklen_t expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family,
+ test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_local_addr(serv, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_local_addr"))
+ goto cleanup;
+
+cleanup:
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_getpeername(struct sock_addr_test *test)
+{
+ struct sockaddr_storage addr, expected_addr;
+ socklen_t addr_len = sizeof(struct sockaddr_storage),
+ expected_addr_len = sizeof(struct sockaddr_storage);
+ int serv = -1, client = -1, err;
+
+ serv = start_server(test->socket_family, test->socket_type,
+ test->requested_addr, test->requested_port, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->requested_addr, test->requested_port,
+ &addr, &addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ client = connect_to_addr(&addr, addr_len, test->socket_type);
+ if (!ASSERT_GE(client, 0, "connect_to_addr"))
+ goto cleanup;
+
+ err = make_sockaddr(test->socket_family, test->expected_addr, test->expected_port,
+ &expected_addr, &expected_addr_len);
+ if (!ASSERT_EQ(err, 0, "make_sockaddr"))
+ goto cleanup;
+
+ err = cmp_peer_addr(client, &expected_addr, expected_addr_len, true);
+ if (!ASSERT_EQ(err, 0, "cmp_peer_addr"))
+ goto cleanup;
+
+cleanup:
+ if (client != -1)
+ close(client);
+ if (serv != -1)
+ close(serv);
+}
+
+void test_sock_addr(void)
+{
+ int cgroup_fd = -1;
+ void *skel;
+
+ cgroup_fd = test__join_cgroup("/sock_addr");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ for (size_t i = 0; i < ARRAY_SIZE(tests); ++i) {
+ struct sock_addr_test *test = &tests[i];
+
+ if (!test__start_subtest(test->name))
+ continue;
+
+ skel = test->loadfn(cgroup_fd);
+ if (!skel)
+ continue;
+
+ switch (test->type) {
+ /* Not exercised yet but we leave this code here for when the
+ * INET and INET6 sockaddr tests are migrated to this file in
+ * the future.
+ */
+ case SOCK_ADDR_TEST_BIND:
+ test_bind(test);
+ break;
+ case SOCK_ADDR_TEST_CONNECT:
+ test_connect(test);
+ break;
+ case SOCK_ADDR_TEST_SENDMSG:
+ case SOCK_ADDR_TEST_RECVMSG:
+ test_xmsg(test);
+ break;
+ case SOCK_ADDR_TEST_GETSOCKNAME:
+ test_getsockname(test);
+ break;
+ case SOCK_ADDR_TEST_GETPEERNAME:
+ test_getpeername(test);
+ break;
+ default:
+ ASSERT_TRUE(false, "Unknown sock addr test type");
+ break;
+ }
+
+ test->destroyfn(skel);
+ }
+
+cleanup:
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_destroy.c b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
new file mode 100644
index 000000000000..9c11938fe597
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_destroy.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <bpf/bpf_endian.h>
+
+#include "sock_destroy_prog.skel.h"
+#include "sock_destroy_prog_fail.skel.h"
+#include "network_helpers.h"
+
+#define TEST_NS "sock_destroy_netns"
+
+static void start_iter_sockets(struct bpf_program *prog)
+{
+ struct bpf_link *link;
+ char buf[50] = {};
+ int iter_fd, len;
+
+ link = bpf_program__attach_iter(prog, NULL);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ return;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ ASSERT_GE(len, 0, "read");
+
+ close(iter_fd);
+
+free_link:
+ bpf_link__destroy(link);
+}
+
+static void test_tcp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys connected client sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_tcp_server(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, accept_serv = -1, n, serv_port;
+
+ serv = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(serv);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ accept_serv = accept(serv, NULL, NULL);
+ if (!ASSERT_GE(accept_serv, 0, "serv accept"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_tcp6_server);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ ASSERT_EQ(errno, ECONNRESET, "error code on destroyed socket");
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (accept_serv != -1)
+ close(accept_serv);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_client(struct sock_destroy_prog *skel)
+{
+ int serv = -1, clien = -1, n = 0;
+
+ serv = start_server(AF_INET6, SOCK_DGRAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv, 0, "start_server"))
+ goto cleanup;
+
+ clien = connect_to_fd(serv, 0);
+ if (!ASSERT_GE(clien, 0, "connect_to_fd"))
+ goto cleanup;
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_EQ(n, 1, "client send"))
+ goto cleanup;
+
+ /* Run iterator program that destroys sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_client);
+
+ n = send(clien, "t", 1, 0);
+ if (!ASSERT_LT(n, 0, "client_send on destroyed socket"))
+ goto cleanup;
+ /* UDP sockets have an overriding error code after they are disconnected,
+ * so we don't check for ECONNABORTED error code.
+ */
+
+cleanup:
+ if (clien != -1)
+ close(clien);
+ if (serv != -1)
+ close(serv);
+}
+
+static void test_udp_server(struct sock_destroy_prog *skel)
+{
+ int *listen_fds = NULL, n, i, serv_port;
+ unsigned int num_listens = 5;
+ char buf[1];
+
+ /* Start reuseport servers. */
+ listen_fds = start_reuseport_server(AF_INET6, SOCK_DGRAM,
+ "::1", 0, 0, num_listens);
+ if (!ASSERT_OK_PTR(listen_fds, "start_reuseport_server"))
+ goto cleanup;
+ serv_port = get_socket_local_port(listen_fds[0]);
+ if (!ASSERT_GE(serv_port, 0, "get_sock_local_port"))
+ goto cleanup;
+ skel->bss->serv_port = (__be16) serv_port;
+
+ /* Run iterator program that destroys server sockets. */
+ start_iter_sockets(skel->progs.iter_udp6_server);
+
+ for (i = 0; i < num_listens; ++i) {
+ n = read(listen_fds[i], buf, sizeof(buf));
+ if (!ASSERT_EQ(n, -1, "read") ||
+ !ASSERT_EQ(errno, ECONNABORTED, "error code on destroyed socket"))
+ break;
+ }
+ ASSERT_EQ(i, num_listens, "server socket");
+
+cleanup:
+ free_fds(listen_fds, num_listens);
+}
+
+void test_sock_destroy(void)
+{
+ struct sock_destroy_prog *skel;
+ struct nstoken *nstoken = NULL;
+ int cgroup_fd;
+
+ skel = sock_destroy_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/sock_destroy");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
+ goto cleanup;
+
+ skel->links.sock_connect = bpf_program__attach_cgroup(
+ skel->progs.sock_connect, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock_connect, "prog_attach"))
+ goto cleanup;
+
+ SYS(cleanup, "ip netns add %s", TEST_NS);
+ SYS(cleanup, "ip -net %s link set dev lo up", TEST_NS);
+
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto cleanup;
+
+ if (test__start_subtest("tcp_client"))
+ test_tcp_client(skel);
+ if (test__start_subtest("tcp_server"))
+ test_tcp_server(skel);
+ if (test__start_subtest("udp_client"))
+ test_udp_client(skel);
+ if (test__start_subtest("udp_server"))
+ test_udp_server(skel);
+
+ RUN_TESTS(sock_destroy_prog_fail);
+
+cleanup:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del " TEST_NS);
+ if (cgroup_fd >= 0)
+ close(cgroup_fd);
+ sock_destroy_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_fields.c b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
new file mode 100644
index 000000000000..7d23166c77af
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_fields.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#define _GNU_SOURCE
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <sched.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <linux/compiler.h>
+
+#include "network_helpers.h"
+#include "cgroup_helpers.h"
+#include "test_progs.h"
+#include "test_sock_fields.skel.h"
+
+enum bpf_linum_array_idx {
+ EGRESS_LINUM_IDX,
+ INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
+ __NR_BPF_LINUM_ARRAY_IDX,
+};
+
+struct bpf_spinlock_cnt {
+ struct bpf_spin_lock lock;
+ __u32 cnt;
+};
+
+#define PARENT_CGROUP "/test-bpf-sock-fields"
+#define CHILD_CGROUP "/test-bpf-sock-fields/child"
+#define DATA "Hello BPF!"
+#define DATA_LEN sizeof(DATA)
+
+static struct sockaddr_in6 srv_sa6, cli_sa6;
+static int sk_pkt_out_cnt10_fd;
+static struct test_sock_fields *skel;
+static int sk_pkt_out_cnt_fd;
+static __u64 parent_cg_id;
+static __u64 child_cg_id;
+static int linum_map_fd;
+static __u32 duration;
+
+static bool create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return false;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "bring up lo"))
+ return false;
+
+ return true;
+}
+
+static void print_sk(const struct bpf_sock *sk, const char *prefix)
+{
+ char src_ip4[24], dst_ip4[24];
+ char src_ip6[64], dst_ip6[64];
+
+ inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
+ inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
+ inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
+ inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
+
+ printf("%s: state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
+ "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
+ "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
+ prefix,
+ sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
+ sk->mark, sk->priority,
+ sk->src_ip4, src_ip4,
+ sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
+ src_ip6, sk->src_port,
+ sk->dst_ip4, dst_ip4,
+ sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
+ dst_ip6, ntohs(sk->dst_port));
+}
+
+static void print_tp(const struct bpf_tcp_sock *tp, const char *prefix)
+{
+ printf("%s: snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
+ "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
+ "rate_delivered:%u rate_interval_us:%u packets_out:%u "
+ "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
+ "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
+ "bytes_received:%llu bytes_acked:%llu\n",
+ prefix,
+ tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
+ tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
+ tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
+ tp->packets_out, tp->retrans_out, tp->total_retrans,
+ tp->segs_in, tp->data_segs_in, tp->segs_out,
+ tp->data_segs_out, tp->lost_out, tp->sacked_out,
+ tp->bytes_received, tp->bytes_acked);
+}
+
+static void check_result(void)
+{
+ struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
+ struct bpf_sock srv_sk, cli_sk, listen_sk;
+ __u32 idx, ingress_linum, egress_linum, linum;
+ int err;
+
+ idx = EGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &egress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ idx = INGRESS_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &ingress_linum);
+ CHECK(err < 0, "bpf_map_lookup_elem(linum_map_fd)",
+ "err:%d errno:%d\n", err, errno);
+
+ idx = READ_SK_DST_PORT_LINUM_IDX;
+ err = bpf_map_lookup_elem(linum_map_fd, &idx, &linum);
+ ASSERT_OK(err, "bpf_map_lookup_elem(linum_map_fd, READ_SK_DST_PORT_IDX)");
+ ASSERT_EQ(linum, 0, "failure in read_sk_dst_port on line");
+
+ memcpy(&srv_sk, &skel->bss->srv_sk, sizeof(srv_sk));
+ memcpy(&srv_tp, &skel->bss->srv_tp, sizeof(srv_tp));
+ memcpy(&cli_sk, &skel->bss->cli_sk, sizeof(cli_sk));
+ memcpy(&cli_tp, &skel->bss->cli_tp, sizeof(cli_tp));
+ memcpy(&listen_sk, &skel->bss->listen_sk, sizeof(listen_sk));
+ memcpy(&listen_tp, &skel->bss->listen_tp, sizeof(listen_tp));
+
+ print_sk(&listen_sk, "listen_sk");
+ print_sk(&srv_sk, "srv_sk");
+ print_sk(&cli_sk, "cli_sk");
+ print_tp(&listen_tp, "listen_tp");
+ print_tp(&srv_tp, "srv_tp");
+ print_tp(&cli_tp, "cli_tp");
+
+ CHECK(listen_sk.state != 10 ||
+ listen_sk.family != AF_INET6 ||
+ listen_sk.protocol != IPPROTO_TCP ||
+ memcmp(listen_sk.src_ip6, &in6addr_loopback,
+ sizeof(listen_sk.src_ip6)) ||
+ listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
+ listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
+ listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ listen_sk.dst_port,
+ "listen_sk",
+ "Unexpected. Check listen_sk output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_sk.state == 10 ||
+ !srv_sk.state ||
+ srv_sk.family != AF_INET6 ||
+ srv_sk.protocol != IPPROTO_TCP ||
+ memcmp(srv_sk.src_ip6, &in6addr_loopback,
+ sizeof(srv_sk.src_ip6)) ||
+ memcmp(srv_sk.dst_ip6, &in6addr_loopback,
+ sizeof(srv_sk.dst_ip6)) ||
+ srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
+ srv_sk.dst_port != cli_sa6.sin6_port,
+ "srv_sk", "Unexpected. Check srv_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(!skel->bss->lsndtime, "srv_tp", "Unexpected lsndtime:0\n");
+
+ CHECK(cli_sk.state == 10 ||
+ !cli_sk.state ||
+ cli_sk.family != AF_INET6 ||
+ cli_sk.protocol != IPPROTO_TCP ||
+ memcmp(cli_sk.src_ip6, &in6addr_loopback,
+ sizeof(cli_sk.src_ip6)) ||
+ memcmp(cli_sk.dst_ip6, &in6addr_loopback,
+ sizeof(cli_sk.dst_ip6)) ||
+ cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
+ cli_sk.dst_port != srv_sa6.sin6_port,
+ "cli_sk", "Unexpected. Check cli_sk output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(listen_tp.data_segs_out ||
+ listen_tp.data_segs_in ||
+ listen_tp.total_retrans ||
+ listen_tp.bytes_acked,
+ "listen_tp",
+ "Unexpected. Check listen_tp output. ingress_linum:%u\n",
+ ingress_linum);
+
+ CHECK(srv_tp.data_segs_out != 2 ||
+ srv_tp.data_segs_in ||
+ srv_tp.snd_cwnd != 10 ||
+ srv_tp.total_retrans ||
+ srv_tp.bytes_acked < 2 * DATA_LEN,
+ "srv_tp", "Unexpected. Check srv_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(cli_tp.data_segs_out ||
+ cli_tp.data_segs_in != 2 ||
+ cli_tp.snd_cwnd != 10 ||
+ cli_tp.total_retrans ||
+ cli_tp.bytes_received < 2 * DATA_LEN,
+ "cli_tp", "Unexpected. Check cli_tp output. egress_linum:%u\n",
+ egress_linum);
+
+ CHECK(skel->bss->parent_cg_id != parent_cg_id,
+ "parent_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->parent_cg_id, (size_t)parent_cg_id);
+
+ CHECK(skel->bss->child_cg_id != child_cg_id,
+ "child_cg_id", "%zu != %zu\n",
+ (size_t)skel->bss->child_cg_id, (size_t)child_cg_id);
+}
+
+static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
+{
+ struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
+ int err;
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
+ &pkt_out_cnt10);
+
+ /* The bpf prog only counts for fullsock and
+ * passive connection did not become fullsock until 3WHS
+ * had been finished, so the bpf prog only counted two data
+ * packet out.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 2 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 20,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+
+ pkt_out_cnt.cnt = ~0;
+ pkt_out_cnt10.cnt = ~0;
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
+ if (!err)
+ err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
+ &pkt_out_cnt10);
+ /* Active connection is fullsock from the beginning.
+ * 1 SYN and 1 ACK during 3WHS
+ * 2 Acks on data packet.
+ *
+ * The bpf_prog initialized it to 0xeB9F.
+ */
+ CHECK(err || pkt_out_cnt.cnt < 0xeB9F + 4 ||
+ pkt_out_cnt10.cnt < 0xeB9F + 40,
+ "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
+ "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u\n",
+ err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
+}
+
+static int init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
+{
+ struct bpf_spinlock_cnt scnt = {};
+ int err;
+
+ scnt.cnt = pkt_out_cnt;
+ err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
+ BPF_NOEXIST);
+ if (CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
+ "err:%d errno:%d\n", err, errno))
+ return err;
+
+ return 0;
+}
+
+static void test(void)
+{
+ int listen_fd = -1, cli_fd = -1, accept_fd = -1, err, i;
+ socklen_t addrlen = sizeof(struct sockaddr_in6);
+ char buf[DATA_LEN];
+
+ /* Prepare listen_fd */
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0xcafe, 0);
+ /* start_server() has logged the error details */
+ if (CHECK_FAIL(listen_fd == -1))
+ goto done;
+
+ err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
+ if (CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d\n", err,
+ errno))
+ goto done;
+ memcpy(&skel->bss->srv_sa6, &srv_sa6, sizeof(srv_sa6));
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (CHECK_FAIL(cli_fd == -1))
+ goto done;
+
+ err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
+ if (CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (CHECK(accept_fd == -1, "accept(listen_fd)",
+ "accept_fd:%d errno:%d\n",
+ accept_fd, errno))
+ goto done;
+
+ if (init_sk_storage(accept_fd, 0xeB9F))
+ goto done;
+
+ for (i = 0; i < 2; i++) {
+ /* Send some data from accept_fd to cli_fd.
+ * MSG_EOR to stop kernel from coalescing two pkts.
+ */
+ err = send(accept_fd, DATA, DATA_LEN, MSG_EOR);
+ if (CHECK(err != DATA_LEN, "send(accept_fd)",
+ "err:%d errno:%d\n", err, errno))
+ goto done;
+
+ err = recv(cli_fd, buf, DATA_LEN, 0);
+ if (CHECK(err != DATA_LEN, "recv(cli_fd)", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ }
+
+ shutdown(cli_fd, SHUT_WR);
+ err = recv(accept_fd, buf, 1, 0);
+ if (CHECK(err, "recv(accept_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ shutdown(accept_fd, SHUT_WR);
+ err = recv(cli_fd, buf, 1, 0);
+ if (CHECK(err, "recv(cli_fd) for fin", "err:%d errno:%d\n",
+ err, errno))
+ goto done;
+ check_sk_pkt_out_cnt(accept_fd, cli_fd);
+ check_result();
+
+done:
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+}
+
+void serial_test_sock_fields(void)
+{
+ int parent_cg_fd = -1, child_cg_fd = -1;
+ struct bpf_link *link;
+
+ /* Use a dedicated netns to have a fixed listen port */
+ if (!create_netns())
+ return;
+
+ /* Create a cgroup, get fd, and join it */
+ parent_cg_fd = test__join_cgroup(PARENT_CGROUP);
+ if (CHECK_FAIL(parent_cg_fd < 0))
+ return;
+ parent_cg_id = get_cgroup_id(PARENT_CGROUP);
+ if (CHECK_FAIL(!parent_cg_id))
+ goto done;
+
+ child_cg_fd = test__join_cgroup(CHILD_CGROUP);
+ if (CHECK_FAIL(child_cg_fd < 0))
+ goto done;
+ child_cg_id = get_cgroup_id(CHILD_CGROUP);
+ if (CHECK_FAIL(!child_cg_id))
+ goto done;
+
+ skel = test_sock_fields__open_and_load();
+ if (CHECK(!skel, "test_sock_fields__open_and_load", "failed\n"))
+ goto done;
+
+ link = bpf_program__attach_cgroup(skel->progs.egress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(egress_read_sock_fields)"))
+ goto done;
+ skel->links.egress_read_sock_fields = link;
+
+ link = bpf_program__attach_cgroup(skel->progs.ingress_read_sock_fields, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(ingress_read_sock_fields)"))
+ goto done;
+ skel->links.ingress_read_sock_fields = link;
+
+ link = bpf_program__attach_cgroup(skel->progs.read_sk_dst_port, child_cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(read_sk_dst_port"))
+ goto done;
+ skel->links.read_sk_dst_port = link;
+
+ linum_map_fd = bpf_map__fd(skel->maps.linum_map);
+ sk_pkt_out_cnt_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt);
+ sk_pkt_out_cnt10_fd = bpf_map__fd(skel->maps.sk_pkt_out_cnt10);
+
+ test();
+
+done:
+ test_sock_fields__destroy(skel);
+ if (child_cg_fd >= 0)
+ close(child_cg_fd);
+ if (parent_cg_fd >= 0)
+ close(parent_cg_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
new file mode 100644
index 000000000000..d56e18b25528
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Meta
+
+#include <test_progs.h>
+#include "network_helpers.h"
+#include "sock_iter_batch.skel.h"
+
+#define TEST_NS "sock_iter_batch_netns"
+
+static const int nr_soreuse = 4;
+
+static void do_test(int sock_type, bool onebyone)
+{
+ int err, i, nread, to_read, total_read, iter_fd = -1;
+ int first_idx, second_idx, indices[nr_soreuse];
+ struct bpf_link *link = NULL;
+ struct sock_iter_batch *skel;
+ int *fds[2] = {};
+
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ return;
+
+ /* Prepare 2 buckets of sockets in the kernel hashtable */
+ for (i = 0; i < ARRAY_SIZE(fds); i++) {
+ int local_port;
+
+ fds[i] = start_reuseport_server(AF_INET6, sock_type, "::1", 0, 0,
+ nr_soreuse);
+ if (!ASSERT_OK_PTR(fds[i], "start_reuseport_server"))
+ goto done;
+ local_port = get_socket_local_port(*fds[i]);
+ if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+ goto done;
+ skel->rodata->ports[i] = ntohs(local_port);
+ }
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "bpf_iter_create"))
+ goto done;
+
+ /* Test reading a bucket (either from fds[0] or fds[1]).
+ * Only read "nr_soreuse - 1" number of sockets
+ * from a bucket and leave one socket out from
+ * that bucket on purpose.
+ */
+ to_read = (nr_soreuse - 1) * sizeof(*indices);
+ total_read = 0;
+ first_idx = -1;
+ do {
+ nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
+ if (nread <= 0 || nread % sizeof(*indices))
+ break;
+ total_read += nread;
+
+ if (first_idx == -1)
+ first_idx = indices[0];
+ for (i = 0; i < nread / sizeof(*indices); i++)
+ ASSERT_EQ(indices[i], first_idx, "first_idx");
+ } while (total_read < to_read);
+ ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+ ASSERT_EQ(total_read, to_read, "total_read");
+
+ free_fds(fds[first_idx], nr_soreuse);
+ fds[first_idx] = NULL;
+
+ /* Read the "whole" second bucket */
+ to_read = nr_soreuse * sizeof(*indices);
+ total_read = 0;
+ second_idx = !first_idx;
+ do {
+ nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
+ if (nread <= 0 || nread % sizeof(*indices))
+ break;
+ total_read += nread;
+
+ for (i = 0; i < nread / sizeof(*indices); i++)
+ ASSERT_EQ(indices[i], second_idx, "second_idx");
+ } while (total_read <= to_read);
+ ASSERT_EQ(nread, 0, "nread");
+ /* Both so_reuseport ports should be in different buckets, so
+ * total_read must equal to the expected to_read.
+ *
+ * For a very unlikely case, both ports collide at the same bucket,
+ * the bucket offset (i.e. 3) will be skipped and it cannot
+ * expect the to_read number of bytes.
+ */
+ if (skel->bss->bucket[0] != skel->bss->bucket[1])
+ ASSERT_EQ(total_read, to_read, "total_read");
+
+done:
+ for (i = 0; i < ARRAY_SIZE(fds); i++)
+ free_fds(fds[i], nr_soreuse);
+ if (iter_fd < 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+void test_sock_iter_batch(void)
+{
+ struct nstoken *nstoken = NULL;
+
+ SYS_NOFAIL("ip netns del " TEST_NS);
+ SYS(done, "ip netns add %s", TEST_NS);
+ SYS(done, "ip -net %s link set dev lo up", TEST_NS);
+
+ nstoken = open_netns(TEST_NS);
+ if (!ASSERT_OK_PTR(nstoken, "open_netns"))
+ goto done;
+
+ if (test__start_subtest("tcp")) {
+ do_test(SOCK_STREAM, true);
+ do_test(SOCK_STREAM, false);
+ }
+ if (test__start_subtest("udp")) {
+ do_test(SOCK_DGRAM, true);
+ do_test(SOCK_DGRAM, false);
+ }
+ close_netns(nstoken);
+
+done:
+ SYS_NOFAIL("ip netns del " TEST_NS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/socket_cookie.c b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c
new file mode 100644
index 000000000000..232db28dde18
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/socket_cookie.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Google LLC.
+// Copyright (c) 2018 Facebook
+
+#include <test_progs.h>
+#include "socket_cookie_prog.skel.h"
+#include "network_helpers.h"
+
+static int duration;
+
+struct socket_cookie {
+ __u64 cookie_key;
+ __u32 cookie_value;
+};
+
+void test_socket_cookie(void)
+{
+ int server_fd = 0, client_fd = 0, cgroup_fd = 0, err = 0;
+ socklen_t addr_len = sizeof(struct sockaddr_in6);
+ struct socket_cookie_prog *skel;
+ __u32 cookie_expected_value;
+ struct sockaddr_in6 addr;
+ struct socket_cookie val;
+
+ skel = socket_cookie_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ cgroup_fd = test__join_cgroup("/socket_cookie");
+ if (CHECK(cgroup_fd < 0, "join_cgroup", "cgroup creation failed\n"))
+ goto out;
+
+ skel->links.set_cookie = bpf_program__attach_cgroup(
+ skel->progs.set_cookie, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.set_cookie, "prog_attach"))
+ goto close_cgroup_fd;
+
+ skel->links.update_cookie_sockops = bpf_program__attach_cgroup(
+ skel->progs.update_cookie_sockops, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_sockops, "prog_attach"))
+ goto close_cgroup_fd;
+
+ skel->links.update_cookie_tracing = bpf_program__attach(
+ skel->progs.update_cookie_tracing);
+ if (!ASSERT_OK_PTR(skel->links.update_cookie_tracing, "prog_attach"))
+ goto close_cgroup_fd;
+
+ server_fd = start_server(AF_INET6, SOCK_STREAM, "::1", 0, 0);
+ if (CHECK(server_fd < 0, "start_server", "errno %d\n", errno))
+ goto close_cgroup_fd;
+
+ client_fd = connect_to_fd(server_fd, 0);
+ if (CHECK(client_fd < 0, "connect_to_fd", "errno %d\n", errno))
+ goto close_server_fd;
+
+ err = bpf_map_lookup_elem(bpf_map__fd(skel->maps.socket_cookies),
+ &client_fd, &val);
+ if (!ASSERT_OK(err, "map_lookup(socket_cookies)"))
+ goto close_client_fd;
+
+ err = getsockname(client_fd, (struct sockaddr *)&addr, &addr_len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close_client_fd;
+
+ cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
+ ASSERT_EQ(val.cookie_value, cookie_expected_value, "cookie_value");
+
+close_client_fd:
+ close(client_fd);
+close_server_fd:
+ close(server_fd);
+close_cgroup_fd:
+ close(cgroup_fd);
+out:
+ socket_cookie_prog__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index 96e7b7f84c65..77e26ecffa9d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,9 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2020 Cloudflare
#include <error.h>
+#include <netinet/tcp.h>
+#include <sys/epoll.h>
#include "test_progs.h"
#include "test_skmsg_load_helpers.skel.h"
+#include "test_sockmap_update.skel.h"
+#include "test_sockmap_invalid_update.skel.h"
+#include "test_sockmap_skb_verdict_attach.skel.h"
+#include "test_sockmap_progs_query.skel.h"
+#include "test_sockmap_pass_prog.skel.h"
+#include "test_sockmap_drop_prog.skel.h"
+#include "bpf_iter_sockmap.skel.h"
+
+#include "sockmap_helpers.h"
#define TCP_REPAIR 19 /* TCP sock is under repair right now */
@@ -21,21 +32,21 @@ static int connected_socket_v4(void)
int s, repair, err;
s = socket(AF_INET, SOCK_STREAM, 0);
- if (CHECK_FAIL(s == -1))
+ if (!ASSERT_GE(s, 0, "socket"))
goto error;
repair = TCP_REPAIR_ON;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
goto error;
err = connect(s, (struct sockaddr *)&addr, len);
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "connect"))
goto error;
repair = TCP_REPAIR_OFF_NO_WP;
err = setsockopt(s, SOL_TCP, TCP_REPAIR, &repair, sizeof(repair));
- if (CHECK_FAIL(err))
+ if (!ASSERT_OK(err, "setsockopt(TCP_REPAIR)"))
goto error;
return s;
@@ -45,6 +56,35 @@ error:
return -1;
}
+static void compare_cookies(struct bpf_map *src, struct bpf_map *dst)
+{
+ __u32 i, max_entries = bpf_map__max_entries(src);
+ int err, src_fd, dst_fd;
+
+ src_fd = bpf_map__fd(src);
+ dst_fd = bpf_map__fd(dst);
+
+ for (i = 0; i < max_entries; i++) {
+ __u64 src_cookie, dst_cookie;
+
+ err = bpf_map_lookup_elem(src_fd, &i, &src_cookie);
+ if (err && errno == ENOENT) {
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ ASSERT_ERR(err, "map_lookup_elem(dst)");
+ ASSERT_EQ(errno, ENOENT, "map_lookup_elem(dst)");
+ continue;
+ }
+ if (!ASSERT_OK(err, "lookup_elem(src)"))
+ continue;
+
+ err = bpf_map_lookup_elem(dst_fd, &i, &dst_cookie);
+ if (!ASSERT_OK(err, "lookup_elem(dst)"))
+ continue;
+
+ ASSERT_EQ(dst_cookie, src_cookie, "cookie mismatch");
+ }
+}
+
/* Create a map, populate it with one socket, and free the map. */
static void test_sockmap_create_update_free(enum bpf_map_type map_type)
{
@@ -52,20 +92,16 @@ static void test_sockmap_create_update_free(enum bpf_map_type map_type)
int s, map, err;
s = connected_socket_v4();
- if (CHECK_FAIL(s == -1))
+ if (!ASSERT_GE(s, 0, "connected_socket_v4"))
return;
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
- perror("bpf_create_map");
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
+ if (!ASSERT_GE(map, 0, "bpf_map_create"))
goto out;
- }
err = bpf_map_update_elem(map, &zero, &s, BPF_NOEXIST);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_update");
+ if (!ASSERT_OK(err, "bpf_map_update"))
goto out;
- }
out:
close(map);
@@ -78,29 +114,654 @@ static void test_skmsg_helpers(enum bpf_map_type map_type)
int err, map, verdict;
skel = test_skmsg_load_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_skmsg_load_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_skmsg_load_helpers__open_and_load"))
return;
- }
verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
map = bpf_map__fd(skel->maps.sock_map);
err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
- if (CHECK_FAIL(err)) {
- perror("bpf_prog_attach");
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
goto out;
- }
err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
- if (CHECK_FAIL(err)) {
- perror("bpf_prog_detach2");
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
goto out;
- }
out:
test_skmsg_load_helpers__destroy(skel);
}
+static void test_sockmap_update(enum bpf_map_type map_type)
+{
+ int err, prog, src;
+ struct test_sockmap_update *skel;
+ struct bpf_map *dst_map;
+ const __u32 zero = 0;
+ char dummy[14] = {0};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = dummy,
+ .data_size_in = sizeof(dummy),
+ .repeat = 1,
+ );
+ __s64 sk;
+
+ sk = connected_socket_v4();
+ if (!ASSERT_NEQ(sk, -1, "connected_socket_v4"))
+ return;
+
+ skel = test_sockmap_update__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto close_sk;
+
+ prog = bpf_program__fd(skel->progs.copy_sock_map);
+ src = bpf_map__fd(skel->maps.src);
+ if (map_type == BPF_MAP_TYPE_SOCKMAP)
+ dst_map = skel->maps.dst_sock_map;
+ else
+ dst_map = skel->maps.dst_sock_hash;
+
+ err = bpf_map_update_elem(src, &zero, &sk, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "update_elem(src)"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ goto out;
+ if (!ASSERT_NEQ(topts.retval, 0, "test_run retval"))
+ goto out;
+
+ compare_cookies(skel->maps.src, dst_map);
+
+out:
+ test_sockmap_update__destroy(skel);
+close_sk:
+ close(sk);
+}
+
+static void test_sockmap_invalid_update(void)
+{
+ struct test_sockmap_invalid_update *skel;
+
+ skel = test_sockmap_invalid_update__open_and_load();
+ if (!ASSERT_NULL(skel, "open_and_load"))
+ test_sockmap_invalid_update__destroy(skel);
+}
+
+static void test_sockmap_copy(enum bpf_map_type map_type)
+{
+ DECLARE_LIBBPF_OPTS(bpf_iter_attach_opts, opts);
+ int err, len, src_fd, iter_fd;
+ union bpf_iter_link_info linfo = {};
+ __u32 i, num_sockets, num_elems;
+ struct bpf_iter_sockmap *skel;
+ __s64 *sock_fd = NULL;
+ struct bpf_link *link;
+ struct bpf_map *src;
+ char buf[64];
+
+ skel = bpf_iter_sockmap__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_iter_sockmap__open_and_load"))
+ return;
+
+ if (map_type == BPF_MAP_TYPE_SOCKMAP) {
+ src = skel->maps.sockmap;
+ num_elems = bpf_map__max_entries(src);
+ num_sockets = num_elems - 1;
+ } else {
+ src = skel->maps.sockhash;
+ num_elems = bpf_map__max_entries(src) - 1;
+ num_sockets = num_elems;
+ }
+
+ sock_fd = calloc(num_sockets, sizeof(*sock_fd));
+ if (!ASSERT_OK_PTR(sock_fd, "calloc(sock_fd)"))
+ goto out;
+
+ for (i = 0; i < num_sockets; i++)
+ sock_fd[i] = -1;
+
+ src_fd = bpf_map__fd(src);
+
+ for (i = 0; i < num_sockets; i++) {
+ sock_fd[i] = connected_socket_v4();
+ if (!ASSERT_NEQ(sock_fd[i], -1, "connected_socket_v4"))
+ goto out;
+
+ err = bpf_map_update_elem(src_fd, &i, &sock_fd[i], BPF_NOEXIST);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ linfo.map.map_fd = src_fd;
+ opts.link_info = &linfo;
+ opts.link_info_len = sizeof(linfo);
+ link = bpf_program__attach_iter(skel->progs.copy, &opts);
+ if (!ASSERT_OK_PTR(link, "attach_iter"))
+ goto out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_GE(iter_fd, 0, "create_iter"))
+ goto free_link;
+
+ /* do some tests */
+ while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+ ;
+ if (!ASSERT_GE(len, 0, "read"))
+ goto close_iter;
+
+ /* test results */
+ if (!ASSERT_EQ(skel->bss->elems, num_elems, "elems"))
+ goto close_iter;
+
+ if (!ASSERT_EQ(skel->bss->socks, num_sockets, "socks"))
+ goto close_iter;
+
+ compare_cookies(src, skel->maps.dst);
+
+close_iter:
+ close(iter_fd);
+free_link:
+ bpf_link__destroy(link);
+out:
+ for (i = 0; sock_fd && i < num_sockets; i++)
+ if (sock_fd[i] >= 0)
+ close(sock_fd[i]);
+ if (sock_fd)
+ free(sock_fd);
+ bpf_iter_sockmap__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_attach(enum bpf_attach_type first,
+ enum bpf_attach_type second)
+{
+ struct test_sockmap_skb_verdict_attach *skel;
+ int err, map, verdict;
+
+ skel = test_sockmap_skb_verdict_attach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map);
+
+ err = bpf_prog_attach(verdict, map, first, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ err = bpf_prog_attach(verdict, map, second, 0);
+ ASSERT_EQ(err, -EBUSY, "prog_attach_fail");
+
+ err = bpf_prog_detach2(verdict, map, first);
+ if (!ASSERT_OK(err, "bpf_prog_detach2"))
+ goto out;
+out:
+ test_sockmap_skb_verdict_attach__destroy(skel);
+}
+
+static __u32 query_prog_id(int prog_fd)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int err;
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd") ||
+ !ASSERT_EQ(info_len, sizeof(info), "bpf_prog_get_info_by_fd"))
+ return 0;
+
+ return info.id;
+}
+
+static void test_sockmap_progs_query(enum bpf_attach_type attach_type)
+{
+ struct test_sockmap_progs_query *skel;
+ int err, map_fd, verdict_fd;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+
+ skel = test_sockmap_progs_query__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_sockmap_progs_query__open_and_load"))
+ return;
+
+ map_fd = bpf_map__fd(skel->maps.sock_map);
+
+ if (attach_type == BPF_SK_MSG_VERDICT)
+ verdict_fd = bpf_program__fd(skel->progs.prog_skmsg_verdict);
+ else
+ verdict_fd = bpf_program__fd(skel->progs.prog_skb_verdict);
+
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 0, "wrong program count on query");
+
+ err = bpf_prog_attach(verdict_fd, map_fd, attach_type, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach failed"))
+ goto out;
+
+ prog_cnt = 1;
+ err = bpf_prog_query(map_fd, attach_type, 0 /* query flags */,
+ &attach_flags, prog_ids, &prog_cnt);
+ ASSERT_OK(err, "bpf_prog_query failed");
+ ASSERT_EQ(attach_flags, 0, "wrong attach_flags on query");
+ ASSERT_EQ(prog_cnt, 1, "wrong program count on query");
+ ASSERT_EQ(prog_ids[0], query_prog_id(verdict_fd),
+ "wrong prog_ids on query");
+
+ bpf_prog_detach2(verdict_fd, map_fd, attach_type);
+out:
+ test_sockmap_progs_query__destroy(skel);
+}
+
+#define MAX_EVENTS 10
+static void test_sockmap_skb_verdict_shutdown(void)
+{
+ struct epoll_event ev, events[MAX_EVENTS];
+ int n, err, map, verdict, s, c1 = -1, p1 = -1;
+ struct test_sockmap_pass_prog *skel;
+ int epollfd;
+ int zero = 0;
+ char b;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (s < 0)
+ goto out;
+ err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ if (err < 0)
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (err < 0)
+ goto out_close;
+
+ shutdown(p1, SHUT_WR);
+
+ ev.events = EPOLLIN;
+ ev.data.fd = c1;
+
+ epollfd = epoll_create1(0);
+ if (!ASSERT_GT(epollfd, -1, "epoll_create(0)"))
+ goto out_close;
+ err = epoll_ctl(epollfd, EPOLL_CTL_ADD, c1, &ev);
+ if (!ASSERT_OK(err, "epoll_ctl(EPOLL_CTL_ADD)"))
+ goto out_close;
+ err = epoll_wait(epollfd, events, MAX_EVENTS, -1);
+ if (!ASSERT_EQ(err, 1, "epoll_wait(fd)"))
+ goto out_close;
+
+ n = recv(c1, &b, 1, SOCK_NONBLOCK);
+ ASSERT_EQ(n, 0, "recv_timeout(fin)");
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_skb_verdict_fionread(bool pass_prog)
+{
+ int expected, zero = 0, sent, recvd, avail;
+ int err, map, verdict, s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ struct test_sockmap_pass_prog *pass = NULL;
+ struct test_sockmap_drop_prog *drop = NULL;
+ char buf[256] = "0123456789";
+
+ if (pass_prog) {
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+ expected = sizeof(buf);
+ } else {
+ drop = test_sockmap_drop_prog__open_and_load();
+ if (!ASSERT_OK_PTR(drop, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(drop->progs.prog_skb_verdict);
+ map = bpf_map__fd(drop->maps.sock_map_rx);
+ /* On drop data is consumed immediately and copied_seq inc'd */
+ expected = 0;
+ }
+
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
+ goto out;
+ err = create_socket_pairs(s, AF_INET, SOCK_STREAM, &c0, &c1, &p0, &p1);
+ if (!ASSERT_OK(err, "create_socket_pairs(s)"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, &buf, sizeof(buf), 0);
+ ASSERT_EQ(sent, sizeof(buf), "xsend(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, expected, "ioctl(FIONREAD)");
+ /* On DROP test there will be no data to read */
+ if (pass_prog) {
+ recvd = recv_timeout(c1, &buf, sizeof(buf), SOCK_NONBLOCK, IO_TIMEOUT_SEC);
+ ASSERT_EQ(recvd, sizeof(buf), "recv_timeout(c0)");
+ }
+
+out_close:
+ close(c0);
+ close(p0);
+ close(c1);
+ close(p1);
+out:
+ if (pass_prog)
+ test_sockmap_pass_prog__destroy(pass);
+ else
+ test_sockmap_drop_prog__destroy(drop);
+}
+
+static void test_sockmap_skb_verdict_peek(void)
+{
+ int err, map, verdict, s, c1, p1, zero = 0, sent, recvd, avail;
+ struct test_sockmap_pass_prog *pass;
+ char snd[256] = "0123456789";
+ char rcv[256] = "0";
+
+ pass = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(pass, "open_and_load"))
+ return;
+ verdict = bpf_program__fd(pass->progs.prog_skb_verdict);
+ map = bpf_map__fd(pass->maps.sock_map_rx);
+
+ err = bpf_prog_attach(verdict, map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (!ASSERT_OK(err, "bpf_prog_attach"))
+ goto out;
+
+ s = socket_loopback(AF_INET, SOCK_STREAM);
+ if (!ASSERT_GT(s, -1, "socket_loopback(s)"))
+ goto out;
+
+ err = create_pair(s, AF_INET, SOCK_STREAM, &c1, &p1);
+ if (!ASSERT_OK(err, "create_pairs(s)"))
+ goto out;
+
+ err = bpf_map_update_elem(map, &zero, &c1, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem(c1)"))
+ goto out_close;
+
+ sent = xsend(p1, snd, sizeof(snd), 0);
+ ASSERT_EQ(sent, sizeof(snd), "xsend(p1)");
+ recvd = recv(c1, rcv, sizeof(rcv), MSG_PEEK);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(c1)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, sizeof(snd), "after peek ioctl(FIONREAD)");
+ recvd = recv(c1, rcv, sizeof(rcv), 0);
+ ASSERT_EQ(recvd, sizeof(rcv), "recv(p0)");
+ err = ioctl(c1, FIONREAD, &avail);
+ ASSERT_OK(err, "ioctl(FIONREAD) error");
+ ASSERT_EQ(avail, 0, "after read ioctl(FIONREAD)");
+
+out_close:
+ close(c1);
+ close(p1);
+out:
+ test_sockmap_pass_prog__destroy(pass);
+}
+
+static void test_sockmap_unconnected_unix(void)
+{
+ int err, map, stream = 0, dgram = 0, zero = 0;
+ struct test_sockmap_pass_prog *skel;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ stream = xsocket(AF_UNIX, SOCK_STREAM, 0);
+ if (stream < 0)
+ return;
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ close(stream);
+ return;
+ }
+
+ err = bpf_map_update_elem(map, &zero, &stream, BPF_ANY);
+ ASSERT_ERR(err, "bpf_map_update_elem(stream)");
+
+ err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+
+ close(stream);
+ close(dgram);
+}
+
+static void test_sockmap_many_socket(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map, entry = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err)
+ goto out;
+
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map, &entry, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+ for (entry--; entry >= 0; entry--) {
+ err = bpf_map_delete_elem(map, &entry);
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ }
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(tcp);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_many_maps(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map[2], entry = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map[0] = bpf_map__fd(skel->maps.sock_map_rx);
+ map[1] = bpf_map__fd(skel->maps.sock_map_tx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err)
+ goto out;
+
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++, entry++) {
+ err = bpf_map_update_elem(map[i], &entry, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+ for (entry--; entry >= 0; entry--) {
+ err = bpf_map_delete_elem(map[1], &entry);
+ entry--;
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ err = bpf_map_delete_elem(map[0], &entry);
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+ }
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(tcp);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
+static void test_sockmap_same_sock(void)
+{
+ struct test_sockmap_pass_prog *skel;
+ int stream[2], dgram, udp, tcp;
+ int i, err, map, zero = 0;
+
+ skel = test_sockmap_pass_prog__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ map = bpf_map__fd(skel->maps.sock_map_rx);
+
+ dgram = xsocket(AF_UNIX, SOCK_DGRAM, 0);
+ if (dgram < 0) {
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ tcp = connected_socket_v4();
+ if (!ASSERT_GE(tcp, 0, "connected_socket_v4")) {
+ close(dgram);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ udp = xsocket(AF_INET, SOCK_DGRAM | SOCK_NONBLOCK, 0);
+ if (udp < 0) {
+ close(dgram);
+ close(tcp);
+ test_sockmap_pass_prog__destroy(skel);
+ return;
+ }
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, stream);
+ ASSERT_OK(err, "socketpair(af_unix, sock_stream)");
+ if (err)
+ goto out;
+
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &stream[0], BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(stream)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &dgram, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(dgram)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &udp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(udp)");
+ }
+ for (i = 0; i < 2; i++) {
+ err = bpf_map_update_elem(map, &zero, &tcp, BPF_ANY);
+ ASSERT_OK(err, "bpf_map_update_elem(tcp)");
+ }
+
+ err = bpf_map_delete_elem(map, &zero);
+ ASSERT_OK(err, "bpf_map_delete_elem(entry)");
+
+ close(stream[0]);
+ close(stream[1]);
+out:
+ close(dgram);
+ close(tcp);
+ close(udp);
+ test_sockmap_pass_prog__destroy(skel);
+}
+
void test_sockmap_basic(void)
{
if (test__start_subtest("sockmap create_update_free"))
@@ -111,4 +772,44 @@ void test_sockmap_basic(void)
test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
if (test__start_subtest("sockhash sk_msg load helpers"))
test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash update"))
+ test_sockmap_update(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap update in unsafe context"))
+ test_sockmap_invalid_update();
+ if (test__start_subtest("sockmap copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKMAP);
+ if (test__start_subtest("sockhash copy"))
+ test_sockmap_copy(BPF_MAP_TYPE_SOCKHASH);
+ if (test__start_subtest("sockmap skb_verdict attach")) {
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_VERDICT,
+ BPF_SK_SKB_STREAM_VERDICT);
+ test_sockmap_skb_verdict_attach(BPF_SK_SKB_STREAM_VERDICT,
+ BPF_SK_SKB_VERDICT);
+ }
+ if (test__start_subtest("sockmap msg_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_MSG_VERDICT);
+ if (test__start_subtest("sockmap stream_parser progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_PARSER);
+ if (test__start_subtest("sockmap stream_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_STREAM_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict progs query"))
+ test_sockmap_progs_query(BPF_SK_SKB_VERDICT);
+ if (test__start_subtest("sockmap skb_verdict shutdown"))
+ test_sockmap_skb_verdict_shutdown();
+ if (test__start_subtest("sockmap skb_verdict fionread"))
+ test_sockmap_skb_verdict_fionread(true);
+ if (test__start_subtest("sockmap skb_verdict fionread on drop"))
+ test_sockmap_skb_verdict_fionread(false);
+ if (test__start_subtest("sockmap skb_verdict msg_f_peek"))
+ test_sockmap_skb_verdict_peek();
+ if (test__start_subtest("sockmap unconnected af_unix"))
+ test_sockmap_unconnected_unix();
+ if (test__start_subtest("sockmap one socket to many map entries"))
+ test_sockmap_many_socket();
+ if (test__start_subtest("sockmap one socket to many maps"))
+ test_sockmap_many_maps();
+ if (test__start_subtest("sockmap same socket replace"))
+ test_sockmap_same_sock();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
new file mode 100644
index 000000000000..e880f97bc44d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_helpers.h
@@ -0,0 +1,416 @@
+#ifndef __SOCKMAP_HELPERS__
+#define __SOCKMAP_HELPERS__
+
+#include <linux/vm_sockets.h>
+
+#define IO_TIMEOUT_SEC 30
+#define MAX_STRERR_LEN 256
+#define MAX_TEST_NAME 80
+
+/* workaround for older vm_sockets.h */
+#ifndef VMADDR_CID_LOCAL
+#define VMADDR_CID_LOCAL 1
+#endif
+
+#define __always_unused __attribute__((__unused__))
+
+#define _FAIL(errnum, fmt...) \
+ ({ \
+ error_at_line(0, (errnum), __func__, __LINE__, fmt); \
+ CHECK_FAIL(true); \
+ })
+#define FAIL(fmt...) _FAIL(0, fmt)
+#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
+#define FAIL_LIBBPF(err, msg) \
+ ({ \
+ char __buf[MAX_STRERR_LEN]; \
+ libbpf_strerror((err), __buf, sizeof(__buf)); \
+ FAIL("%s: %s", (msg), __buf); \
+ })
+
+/* Wrappers that fail the test on error and report it. */
+
+#define xaccept_nonblock(fd, addr, len) \
+ ({ \
+ int __ret = \
+ accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("accept"); \
+ __ret; \
+ })
+
+#define xbind(fd, addr, len) \
+ ({ \
+ int __ret = bind((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("bind"); \
+ __ret; \
+ })
+
+#define xclose(fd) \
+ ({ \
+ int __ret = close((fd)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("close"); \
+ __ret; \
+ })
+
+#define xconnect(fd, addr, len) \
+ ({ \
+ int __ret = connect((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("connect"); \
+ __ret; \
+ })
+
+#define xgetsockname(fd, addr, len) \
+ ({ \
+ int __ret = getsockname((fd), (addr), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockname"); \
+ __ret; \
+ })
+
+#define xgetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = getsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("getsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xlisten(fd, backlog) \
+ ({ \
+ int __ret = listen((fd), (backlog)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("listen"); \
+ __ret; \
+ })
+
+#define xsetsockopt(fd, level, name, val, len) \
+ ({ \
+ int __ret = setsockopt((fd), (level), (name), (val), (len)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("setsockopt(" #name ")"); \
+ __ret; \
+ })
+
+#define xsend(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = send((fd), (buf), (len), (flags)); \
+ if (__ret == -1) \
+ FAIL_ERRNO("send"); \
+ __ret; \
+ })
+
+#define xrecv_nonblock(fd, buf, len, flags) \
+ ({ \
+ ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
+ IO_TIMEOUT_SEC); \
+ if (__ret == -1) \
+ FAIL_ERRNO("recv"); \
+ __ret; \
+ })
+
+#define xsocket(family, sotype, flags) \
+ ({ \
+ int __ret = socket(family, sotype, flags); \
+ if (__ret == -1) \
+ FAIL_ERRNO("socket"); \
+ __ret; \
+ })
+
+#define xbpf_map_delete_elem(fd, key) \
+ ({ \
+ int __ret = bpf_map_delete_elem((fd), (key)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_delete"); \
+ __ret; \
+ })
+
+#define xbpf_map_lookup_elem(fd, key, val) \
+ ({ \
+ int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_lookup"); \
+ __ret; \
+ })
+
+#define xbpf_map_update_elem(fd, key, val, flags) \
+ ({ \
+ int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("map_update"); \
+ __ret; \
+ })
+
+#define xbpf_prog_attach(prog, target, type, flags) \
+ ({ \
+ int __ret = \
+ bpf_prog_attach((prog), (target), (type), (flags)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_attach(" #type ")"); \
+ __ret; \
+ })
+
+#define xbpf_prog_detach2(prog, target, type) \
+ ({ \
+ int __ret = bpf_prog_detach2((prog), (target), (type)); \
+ if (__ret < 0) \
+ FAIL_ERRNO("prog_detach2(" #type ")"); \
+ __ret; \
+ })
+
+#define xpthread_create(thread, attr, func, arg) \
+ ({ \
+ int __ret = pthread_create((thread), (attr), (func), (arg)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_create"); \
+ __ret; \
+ })
+
+#define xpthread_join(thread, retval) \
+ ({ \
+ int __ret = pthread_join((thread), (retval)); \
+ errno = __ret; \
+ if (__ret) \
+ FAIL_ERRNO("pthread_join"); \
+ __ret; \
+ })
+
+static inline int poll_connect(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set wfds;
+ int r, eval;
+ socklen_t esize = sizeof(eval);
+
+ FD_ZERO(&wfds);
+ FD_SET(fd, &wfds);
+
+ r = select(fd + 1, NULL, &wfds, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+ if (r != 1)
+ return -1;
+
+ if (getsockopt(fd, SOL_SOCKET, SO_ERROR, &eval, &esize) < 0)
+ return -1;
+ if (eval != 0) {
+ errno = eval;
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline int poll_read(int fd, unsigned int timeout_sec)
+{
+ struct timeval timeout = { .tv_sec = timeout_sec };
+ fd_set rfds;
+ int r;
+
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+
+ r = select(fd + 1, &rfds, NULL, NULL, &timeout);
+ if (r == 0)
+ errno = ETIME;
+
+ return r == 1 ? 0 : -1;
+}
+
+static inline int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return accept(fd, addr, len);
+}
+
+static inline int recv_timeout(int fd, void *buf, size_t len, int flags,
+ unsigned int timeout_sec)
+{
+ if (poll_read(fd, timeout_sec))
+ return -1;
+
+ return recv(fd, buf, len, flags);
+}
+
+static inline void init_addr_loopback4(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
+
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = 0;
+ addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ *len = sizeof(*addr4);
+}
+
+static inline void init_addr_loopback6(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
+
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = 0;
+ addr6->sin6_addr = in6addr_loopback;
+ *len = sizeof(*addr6);
+}
+
+static inline void init_addr_loopback_vsock(struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ struct sockaddr_vm *addr = memset(ss, 0, sizeof(*ss));
+
+ addr->svm_family = AF_VSOCK;
+ addr->svm_port = VMADDR_PORT_ANY;
+ addr->svm_cid = VMADDR_CID_LOCAL;
+ *len = sizeof(*addr);
+}
+
+static inline void init_addr_loopback(int family, struct sockaddr_storage *ss,
+ socklen_t *len)
+{
+ switch (family) {
+ case AF_INET:
+ init_addr_loopback4(ss, len);
+ return;
+ case AF_INET6:
+ init_addr_loopback6(ss, len);
+ return;
+ case AF_VSOCK:
+ init_addr_loopback_vsock(ss, len);
+ return;
+ default:
+ FAIL("unsupported address family %d", family);
+ }
+}
+
+static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
+{
+ return (struct sockaddr *)ss;
+}
+
+static inline int add_to_sockmap(int sock_mapfd, int fd1, int fd2)
+{
+ u64 value;
+ u32 key;
+ int err;
+
+ key = 0;
+ value = fd1;
+ err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ if (err)
+ return err;
+
+ key = 1;
+ value = fd2;
+ return xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+}
+
+static inline int create_pair(int s, int family, int sotype, int *c, int *p)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int err = 0;
+
+ len = sizeof(addr);
+ err = xgetsockname(s, sockaddr(&addr), &len);
+ if (err)
+ return err;
+
+ *c = xsocket(family, sotype, 0);
+ if (*c < 0)
+ return errno;
+ err = xconnect(*c, sockaddr(&addr), len);
+ if (err) {
+ err = errno;
+ goto close_cli0;
+ }
+
+ *p = xaccept_nonblock(s, NULL, NULL);
+ if (*p < 0) {
+ err = errno;
+ goto close_cli0;
+ }
+ return err;
+close_cli0:
+ close(*c);
+ return err;
+}
+
+static inline int create_socket_pairs(int s, int family, int sotype,
+ int *c0, int *c1, int *p0, int *p1)
+{
+ int err;
+
+ err = create_pair(s, family, sotype, c0, p0);
+ if (err)
+ return err;
+
+ err = create_pair(s, family, sotype, c1, p1);
+ if (err) {
+ close(*c0);
+ close(*p0);
+ }
+ return err;
+}
+
+static inline int enable_reuseport(int s, int progfd)
+{
+ int err, one = 1;
+
+ err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+ if (err)
+ return -1;
+ err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
+ sizeof(progfd));
+ if (err)
+ return -1;
+
+ return 0;
+}
+
+static inline int socket_loopback_reuseport(int family, int sotype, int progfd)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = 0;
+ int err, s;
+
+ init_addr_loopback(family, &addr, &len);
+
+ s = xsocket(family, sotype, 0);
+ if (s == -1)
+ return -1;
+
+ if (progfd >= 0)
+ enable_reuseport(s, progfd);
+
+ err = xbind(s, sockaddr(&addr), len);
+ if (err)
+ goto close;
+
+ if (sotype & SOCK_DGRAM)
+ return s;
+
+ err = xlisten(s, SOMAXCONN);
+ if (err)
+ goto close;
+
+ return s;
+close:
+ xclose(s);
+ return -1;
+}
+
+static inline int socket_loopback(int family, int sotype)
+{
+ return socket_loopback_reuseport(family, sotype, -1);
+}
+
+
+#endif // __SOCKMAP_HELPERS__
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 06b86addc181..2d0796314862 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -4,6 +4,7 @@
* Tests for sockmap/sockhash holding kTLS sockets.
*/
+#include <netinet/tcp.h>
#include "test_progs.h"
#define MAX_TEST_NAME 80
@@ -14,16 +15,12 @@ static int tcp_server(int family)
int err, s;
s = socket(family, SOCK_STREAM, 0);
- if (CHECK_FAIL(s == -1)) {
- perror("socket");
+ if (!ASSERT_GE(s, 0, "socket"))
return -1;
- }
err = listen(s, SOMAXCONN);
- if (CHECK_FAIL(err)) {
- perror("listen");
+ if (!ASSERT_OK(err, "listen"))
return -1;
- }
return s;
}
@@ -47,44 +44,31 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
return;
err = getsockname(srv, (struct sockaddr *)&addr, &len);
- if (CHECK_FAIL(err)) {
- perror("getsockopt");
+ if (!ASSERT_OK(err, "getsockopt"))
goto close_srv;
- }
cli = socket(family, SOCK_STREAM, 0);
- if (CHECK_FAIL(cli == -1)) {
- perror("socket");
+ if (!ASSERT_GE(cli, 0, "socket"))
goto close_srv;
- }
err = connect(cli, (struct sockaddr *)&addr, len);
- if (CHECK_FAIL(err)) {
- perror("connect");
+ if (!ASSERT_OK(err, "connect"))
goto close_cli;
- }
err = bpf_map_update_elem(map, &zero, &cli, 0);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_update_elem");
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
goto close_cli;
- }
err = setsockopt(cli, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
- if (CHECK_FAIL(err)) {
- perror("setsockopt(TCP_ULP)");
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
goto close_cli;
- }
err = bpf_map_delete_elem(map, &zero);
- if (CHECK_FAIL(err)) {
- perror("bpf_map_delete_elem");
+ if (!ASSERT_OK(err, "bpf_map_delete_elem"))
goto close_cli;
- }
err = disconnect(cli);
- if (CHECK_FAIL(err))
- perror("disconnect");
+ ASSERT_OK(err, "disconnect");
close_cli:
close(cli);
@@ -92,25 +76,88 @@ close_srv:
close(srv);
}
-static void run_tests(int family, enum bpf_map_type map_type)
+static void test_sockmap_ktls_update_fails_when_sock_has_ulp(int family, int map)
{
- char test_name[MAX_TEST_NAME];
- int map;
-
- map = bpf_create_map(map_type, sizeof(int), sizeof(int), 1, 0);
- if (CHECK_FAIL(map == -1)) {
- perror("bpf_map_create");
+ struct sockaddr_storage addr = {};
+ socklen_t len = sizeof(addr);
+ struct sockaddr_in6 *v6;
+ struct sockaddr_in *v4;
+ int err, s, zero = 0;
+
+ switch (family) {
+ case AF_INET:
+ v4 = (struct sockaddr_in *)&addr;
+ v4->sin_family = AF_INET;
+ break;
+ case AF_INET6:
+ v6 = (struct sockaddr_in6 *)&addr;
+ v6->sin6_family = AF_INET6;
+ break;
+ default:
+ PRINT_FAIL("unsupported socket family %d", family);
return;
}
+ s = socket(family, SOCK_STREAM, 0);
+ if (!ASSERT_GE(s, 0, "socket"))
+ return;
+
+ err = bind(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "bind"))
+ goto close;
+
+ err = getsockname(s, (struct sockaddr *)&addr, &len);
+ if (!ASSERT_OK(err, "getsockname"))
+ goto close;
+
+ err = connect(s, (struct sockaddr *)&addr, len);
+ if (!ASSERT_OK(err, "connect"))
+ goto close;
+
+ /* save sk->sk_prot and set it to tls_prots */
+ err = setsockopt(s, IPPROTO_TCP, TCP_ULP, "tls", strlen("tls"));
+ if (!ASSERT_OK(err, "setsockopt(TCP_ULP)"))
+ goto close;
+
+ /* sockmap update should not affect saved sk_prot */
+ err = bpf_map_update_elem(map, &zero, &s, BPF_ANY);
+ if (!ASSERT_ERR(err, "sockmap update elem"))
+ goto close;
+
+ /* call sk->sk_prot->setsockopt to dispatch to saved sk_prot */
+ err = setsockopt(s, IPPROTO_TCP, TCP_NODELAY, &zero, sizeof(zero));
+ ASSERT_OK(err, "setsockopt(TCP_NODELAY)");
+
+close:
+ close(s);
+}
+
+static const char *fmt_test_name(const char *subtest_name, int family,
+ enum bpf_map_type map_type)
+{
+ const char *map_type_str = BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH";
+ const char *family_str = AF_INET ? "IPv4" : "IPv6";
+ static char test_name[MAX_TEST_NAME];
+
snprintf(test_name, MAX_TEST_NAME,
- "sockmap_ktls disconnect_after_delete %s %s",
- family == AF_INET ? "IPv4" : "IPv6",
- map_type == BPF_MAP_TYPE_SOCKMAP ? "SOCKMAP" : "SOCKHASH");
- if (!test__start_subtest(test_name))
+ "sockmap_ktls %s %s %s",
+ subtest_name, family_str, map_type_str);
+
+ return test_name;
+}
+
+static void run_tests(int family, enum bpf_map_type map_type)
+{
+ int map;
+
+ map = bpf_map_create(map_type, NULL, sizeof(int), sizeof(int), 1, NULL);
+ if (!ASSERT_GE(map, 0, "bpf_map_create"))
return;
- test_sockmap_ktls_disconnect_after_delete(family, map);
+ if (test__start_subtest(fmt_test_name("disconnect_after_delete", family, map_type)))
+ test_sockmap_ktls_disconnect_after_delete(family, map);
+ if (test__start_subtest(fmt_test_name("update_fails_when_sock_has_ulp", family, map_type)))
+ test_sockmap_ktls_update_fails_when_sock_has_ulp(family, map);
close(map);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
index d7d65a700799..a92807bfcd13 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_listen.c
@@ -18,6 +18,7 @@
#include <string.h>
#include <sys/select.h>
#include <unistd.h>
+#include <linux/vm_sockets.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
@@ -26,302 +27,10 @@
#include "test_progs.h"
#include "test_sockmap_listen.skel.h"
-#define IO_TIMEOUT_SEC 30
-#define MAX_STRERR_LEN 256
-#define MAX_TEST_NAME 80
-
-#define _FAIL(errnum, fmt...) \
- ({ \
- error_at_line(0, (errnum), __func__, __LINE__, fmt); \
- CHECK_FAIL(true); \
- })
-#define FAIL(fmt...) _FAIL(0, fmt)
-#define FAIL_ERRNO(fmt...) _FAIL(errno, fmt)
-#define FAIL_LIBBPF(err, msg) \
- ({ \
- char __buf[MAX_STRERR_LEN]; \
- libbpf_strerror((err), __buf, sizeof(__buf)); \
- FAIL("%s: %s", (msg), __buf); \
- })
-
-/* Wrappers that fail the test on error and report it. */
-
-#define xaccept_nonblock(fd, addr, len) \
- ({ \
- int __ret = \
- accept_timeout((fd), (addr), (len), IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("accept"); \
- __ret; \
- })
-
-#define xbind(fd, addr, len) \
- ({ \
- int __ret = bind((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("bind"); \
- __ret; \
- })
-
-#define xclose(fd) \
- ({ \
- int __ret = close((fd)); \
- if (__ret == -1) \
- FAIL_ERRNO("close"); \
- __ret; \
- })
-
-#define xconnect(fd, addr, len) \
- ({ \
- int __ret = connect((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("connect"); \
- __ret; \
- })
-
-#define xgetsockname(fd, addr, len) \
- ({ \
- int __ret = getsockname((fd), (addr), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockname"); \
- __ret; \
- })
-
-#define xgetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = getsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("getsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xlisten(fd, backlog) \
- ({ \
- int __ret = listen((fd), (backlog)); \
- if (__ret == -1) \
- FAIL_ERRNO("listen"); \
- __ret; \
- })
-
-#define xsetsockopt(fd, level, name, val, len) \
- ({ \
- int __ret = setsockopt((fd), (level), (name), (val), (len)); \
- if (__ret == -1) \
- FAIL_ERRNO("setsockopt(" #name ")"); \
- __ret; \
- })
-
-#define xsend(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = send((fd), (buf), (len), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("send"); \
- __ret; \
- })
-
-#define xrecv_nonblock(fd, buf, len, flags) \
- ({ \
- ssize_t __ret = recv_timeout((fd), (buf), (len), (flags), \
- IO_TIMEOUT_SEC); \
- if (__ret == -1) \
- FAIL_ERRNO("recv"); \
- __ret; \
- })
-
-#define xsocket(family, sotype, flags) \
- ({ \
- int __ret = socket(family, sotype, flags); \
- if (__ret == -1) \
- FAIL_ERRNO("socket"); \
- __ret; \
- })
-
-#define xbpf_map_delete_elem(fd, key) \
- ({ \
- int __ret = bpf_map_delete_elem((fd), (key)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_delete"); \
- __ret; \
- })
-
-#define xbpf_map_lookup_elem(fd, key, val) \
- ({ \
- int __ret = bpf_map_lookup_elem((fd), (key), (val)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_lookup"); \
- __ret; \
- })
-
-#define xbpf_map_update_elem(fd, key, val, flags) \
- ({ \
- int __ret = bpf_map_update_elem((fd), (key), (val), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("map_update"); \
- __ret; \
- })
-
-#define xbpf_prog_attach(prog, target, type, flags) \
- ({ \
- int __ret = \
- bpf_prog_attach((prog), (target), (type), (flags)); \
- if (__ret == -1) \
- FAIL_ERRNO("prog_attach(" #type ")"); \
- __ret; \
- })
-
-#define xbpf_prog_detach2(prog, target, type) \
- ({ \
- int __ret = bpf_prog_detach2((prog), (target), (type)); \
- if (__ret == -1) \
- FAIL_ERRNO("prog_detach2(" #type ")"); \
- __ret; \
- })
-
-#define xpthread_create(thread, attr, func, arg) \
- ({ \
- int __ret = pthread_create((thread), (attr), (func), (arg)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_create"); \
- __ret; \
- })
-
-#define xpthread_join(thread, retval) \
- ({ \
- int __ret = pthread_join((thread), (retval)); \
- errno = __ret; \
- if (__ret) \
- FAIL_ERRNO("pthread_join"); \
- __ret; \
- })
-
-static int poll_read(int fd, unsigned int timeout_sec)
-{
- struct timeval timeout = { .tv_sec = timeout_sec };
- fd_set rfds;
- int r;
-
- FD_ZERO(&rfds);
- FD_SET(fd, &rfds);
-
- r = select(fd + 1, &rfds, NULL, NULL, &timeout);
- if (r == 0)
- errno = ETIME;
-
- return r == 1 ? 0 : -1;
-}
-
-static int accept_timeout(int fd, struct sockaddr *addr, socklen_t *len,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return accept(fd, addr, len);
-}
-
-static int recv_timeout(int fd, void *buf, size_t len, int flags,
- unsigned int timeout_sec)
-{
- if (poll_read(fd, timeout_sec))
- return -1;
-
- return recv(fd, buf, len, flags);
-}
-
-static void init_addr_loopback4(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in *addr4 = memset(ss, 0, sizeof(*ss));
-
- addr4->sin_family = AF_INET;
- addr4->sin_port = 0;
- addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- *len = sizeof(*addr4);
-}
-
-static void init_addr_loopback6(struct sockaddr_storage *ss, socklen_t *len)
-{
- struct sockaddr_in6 *addr6 = memset(ss, 0, sizeof(*ss));
-
- addr6->sin6_family = AF_INET6;
- addr6->sin6_port = 0;
- addr6->sin6_addr = in6addr_loopback;
- *len = sizeof(*addr6);
-}
-
-static void init_addr_loopback(int family, struct sockaddr_storage *ss,
- socklen_t *len)
-{
- switch (family) {
- case AF_INET:
- init_addr_loopback4(ss, len);
- return;
- case AF_INET6:
- init_addr_loopback6(ss, len);
- return;
- default:
- FAIL("unsupported address family %d", family);
- }
-}
-
-static inline struct sockaddr *sockaddr(struct sockaddr_storage *ss)
-{
- return (struct sockaddr *)ss;
-}
-
-static int enable_reuseport(int s, int progfd)
-{
- int err, one = 1;
-
- err = xsetsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
- if (err)
- return -1;
- err = xsetsockopt(s, SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &progfd,
- sizeof(progfd));
- if (err)
- return -1;
-
- return 0;
-}
-
-static int socket_loopback_reuseport(int family, int sotype, int progfd)
-{
- struct sockaddr_storage addr;
- socklen_t len;
- int err, s;
-
- init_addr_loopback(family, &addr, &len);
-
- s = xsocket(family, sotype, 0);
- if (s == -1)
- return -1;
-
- if (progfd >= 0)
- enable_reuseport(s, progfd);
-
- err = xbind(s, sockaddr(&addr), len);
- if (err)
- goto close;
-
- if (sotype & SOCK_DGRAM)
- return s;
-
- err = xlisten(s, SOMAXCONN);
- if (err)
- goto close;
-
- return s;
-close:
- xclose(s);
- return -1;
-}
-
-static int socket_loopback(int family, int sotype)
-{
- return socket_loopback_reuseport(family, sotype, -1);
-}
+#include "sockmap_helpers.h"
-static void test_insert_invalid(int family, int sotype, int mapfd)
+static void test_insert_invalid(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -338,7 +47,8 @@ static void test_insert_invalid(int family, int sotype, int mapfd)
FAIL_ERRNO("map_update: expected EBADF");
}
-static void test_insert_opened(int family, int sotype, int mapfd)
+static void test_insert_opened(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key = 0;
u64 value;
@@ -351,16 +61,19 @@ static void test_insert_opened(int family, int sotype, int mapfd)
errno = 0;
value = s;
err = bpf_map_update_elem(mapfd, &key, &value, BPF_NOEXIST);
- if (!err || errno != EOPNOTSUPP)
- FAIL_ERRNO("map_update: expected EOPNOTSUPP");
-
+ if (sotype == SOCK_STREAM) {
+ if (!err || errno != EOPNOTSUPP)
+ FAIL_ERRNO("map_update: expected EOPNOTSUPP");
+ } else if (err)
+ FAIL_ERRNO("map_update: expected success");
xclose(s);
}
-static void test_insert_bound(int family, int sotype, int mapfd)
+static void test_insert_bound(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
- socklen_t len;
+ socklen_t len = 0;
u32 key = 0;
u64 value;
int err, s;
@@ -384,7 +97,8 @@ close:
xclose(s);
}
-static void test_insert(int family, int sotype, int mapfd)
+static void test_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -400,7 +114,8 @@ static void test_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_insert(int family, int sotype, int mapfd)
+static void test_delete_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 value;
u32 key;
@@ -417,7 +132,8 @@ static void test_delete_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_delete_after_close(int family, int sotype, int mapfd)
+static void test_delete_after_close(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -440,7 +156,8 @@ static void test_delete_after_close(int family, int sotype, int mapfd)
FAIL_ERRNO("map_delete: expected EINVAL/EINVAL");
}
-static void test_lookup_after_insert(int family, int sotype, int mapfd)
+static void test_lookup_after_insert(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u64 cookie, value;
socklen_t len;
@@ -468,7 +185,8 @@ static void test_lookup_after_insert(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_after_delete(int family, int sotype, int mapfd)
+static void test_lookup_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int err, s;
u64 value;
@@ -491,7 +209,8 @@ static void test_lookup_after_delete(int family, int sotype, int mapfd)
xclose(s);
}
-static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
+static void test_lookup_32_bit_value(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
u32 key, value32;
int err, s;
@@ -500,8 +219,8 @@ static void test_lookup_32_bit_value(int family, int sotype, int mapfd)
if (s < 0)
return;
- mapfd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP, sizeof(key),
- sizeof(value32), 1, 0);
+ mapfd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL, sizeof(key),
+ sizeof(value32), 1, NULL);
if (mapfd < 0) {
FAIL_ERRNO("map_create");
goto close;
@@ -521,7 +240,8 @@ close:
xclose(s);
}
-static void test_update_existing(int family, int sotype, int mapfd)
+static void test_update_existing(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
int s1, s2;
u64 value;
@@ -549,7 +269,7 @@ close_s1:
/* Exercise the code path where we destroy child sockets that never
* got accept()'ed, aka orphans, when parent socket gets closed.
*/
-static void test_destroy_orphan_child(int family, int sotype, int mapfd)
+static void do_destroy_orphan_child(int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -580,10 +300,38 @@ close_srv:
xclose(s);
}
+static void test_destroy_orphan_child(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd)
+{
+ int msg_verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+ int skb_verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ const struct test {
+ int progfd;
+ enum bpf_attach_type atype;
+ } tests[] = {
+ { -1, -1 },
+ { msg_verdict, BPF_SK_MSG_VERDICT },
+ { skb_verdict, BPF_SK_SKB_VERDICT },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ if (t->progfd != -1 &&
+ xbpf_prog_attach(t->progfd, mapfd, t->atype, 0) != 0)
+ return;
+
+ do_destroy_orphan_child(family, sotype, mapfd);
+
+ if (t->progfd != -1)
+ xbpf_prog_detach2(t->progfd, mapfd, t->atype);
+ }
+}
+
/* Perform a passive open after removing listening socket from SOCKMAP
* to ensure that callbacks get restored properly.
*/
-static void test_clone_after_delete(int family, int sotype, int mapfd)
+static void test_clone_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
socklen_t len;
@@ -619,7 +367,8 @@ close_srv:
* SOCKMAP, but got accept()'ed only after the parent has been removed
* from SOCKMAP, gets cloned without parent psock state or callbacks.
*/
-static void test_accept_after_delete(int family, int sotype, int mapfd)
+static void test_accept_after_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0;
@@ -673,7 +422,8 @@ close_srv:
/* Check that child socket that got created and accepted while parent
* was in a SOCKMAP is cloned without parent psock state or callbacks.
*/
-static void test_accept_before_delete(int family, int sotype, int mapfd)
+static void test_accept_before_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct sockaddr_storage addr;
const u32 zero = 0, one = 1;
@@ -782,7 +532,8 @@ done:
return NULL;
}
-static void test_syn_recv_insert_delete(int family, int sotype, int mapfd)
+static void test_syn_recv_insert_delete(struct test_sockmap_listen *skel __always_unused,
+ int family, int sotype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
struct sockaddr_storage addr;
@@ -845,7 +596,8 @@ static void *listen_thread(void *arg)
return NULL;
}
-static void test_race_insert_listen(int family, int socktype, int mapfd)
+static void test_race_insert_listen(struct test_sockmap_listen *skel __always_unused,
+ int family, int socktype, int mapfd)
{
struct connect_accept_ctx ctx = { 0 };
const u32 zero = 0;
@@ -923,12 +675,9 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
int verd_mapfd, enum redir_mode mode)
{
const char *log_prefix = redir_mode_str(mode);
- struct sockaddr_storage addr;
int s, c0, c1, p0, p1;
unsigned int pass;
- socklen_t len;
int err, n;
- u64 value;
u32 key;
char b;
@@ -938,44 +687,13 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (s < 0)
return;
- len = sizeof(addr);
- err = xgetsockname(s, sockaddr(&addr), &len);
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
if (err)
goto close_srv;
- c0 = xsocket(family, sotype, 0);
- if (c0 < 0)
- goto close_srv;
- err = xconnect(c0, sockaddr(&addr), len);
+ err = add_to_sockmap(sock_mapfd, p0, p1);
if (err)
- goto close_cli0;
-
- p0 = xaccept_nonblock(s, NULL, NULL);
- if (p0 < 0)
- goto close_cli0;
-
- c1 = xsocket(family, sotype, 0);
- if (c1 < 0)
- goto close_peer0;
- err = xconnect(c1, sockaddr(&addr), len);
- if (err)
- goto close_cli1;
-
- p1 = xaccept_nonblock(s, NULL, NULL);
- if (p1 < 0)
- goto close_cli1;
-
- key = 0;
- value = p0;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
-
- key = 1;
- value = p1;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer1;
+ goto close;
n = write(mode == REDIR_INGRESS ? c1 : p1, "a", 1);
if (n < 0)
@@ -983,28 +701,24 @@ static void redir_to_connected(int family, int sotype, int sock_mapfd,
if (n == 0)
FAIL("%s: incomplete write", log_prefix);
if (n < 1)
- goto close_peer1;
+ goto close;
key = SK_PASS;
err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
if (err)
- goto close_peer1;
+ goto close;
if (pass != 1)
FAIL("%s: want pass count 1, have %d", log_prefix, pass);
-
- n = read(c0, &b, 1);
+ n = recv_timeout(c0, &b, 1, 0, IO_TIMEOUT_SEC);
if (n < 0)
- FAIL_ERRNO("%s: read", log_prefix);
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
if (n == 0)
- FAIL("%s: incomplete read", log_prefix);
+ FAIL("%s: incomplete recv", log_prefix);
-close_peer1:
+close:
xclose(p1);
-close_cli1:
xclose(c1);
-close_peer0:
xclose(p0);
-close_cli0:
xclose(c0);
close_srv:
xclose(s);
@@ -1014,8 +728,8 @@ static void test_skb_redir_to_connected(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1061,7 +775,6 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
int s, c, p, err, n;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_mapfd);
@@ -1086,15 +799,7 @@ static void redir_to_listening(int family, int sotype, int sock_mapfd,
if (p < 0)
goto close_cli;
- key = 0;
- value = s;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
- if (err)
- goto close_peer;
-
- key = 1;
- value = p;
- err = xbpf_map_update_elem(sock_mapfd, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_mapfd, s, p);
if (err)
goto close_peer;
@@ -1125,8 +830,8 @@ static void test_skb_redir_to_listening(struct test_sockmap_listen *skel,
struct bpf_map *inner_map, int family,
int sotype)
{
- int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
- int parser = bpf_program__fd(skel->progs.prog_skb_parser);
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
int verdict_map = bpf_map__fd(skel->maps.verdict_map);
int sock_map = bpf_map__fd(inner_map);
int err;
@@ -1164,6 +869,77 @@ static void test_msg_redir_to_listening(struct test_sockmap_listen *skel,
xbpf_prog_detach2(verdict, sock_map, BPF_SK_MSG_VERDICT);
}
+static void redir_partial(int family, int sotype, int sock_map, int parser_map)
+{
+ int s, c0 = -1, c1 = -1, p0 = -1, p1 = -1;
+ int err, n, key, value;
+ char buf[] = "abc";
+
+ key = 0;
+ value = sizeof(buf) - 1;
+ err = xbpf_map_update_elem(parser_map, &key, &value, 0);
+ if (err)
+ return;
+
+ s = socket_loopback(family, sotype | SOCK_NONBLOCK);
+ if (s < 0)
+ goto clean_parser_map;
+
+ err = create_socket_pairs(s, family, sotype, &c0, &c1, &p0, &p1);
+ if (err)
+ goto close_srv;
+
+ err = add_to_sockmap(sock_map, p0, p1);
+ if (err)
+ goto close;
+
+ n = xsend(c1, buf, sizeof(buf), 0);
+ if (n < sizeof(buf))
+ FAIL("incomplete write");
+
+ n = xrecv_nonblock(c0, buf, sizeof(buf), 0);
+ if (n != sizeof(buf) - 1)
+ FAIL("expect %zu, received %d", sizeof(buf) - 1, n);
+
+close:
+ xclose(c0);
+ xclose(p0);
+ xclose(c1);
+ xclose(p1);
+close_srv:
+ xclose(s);
+
+clean_parser_map:
+ key = 0;
+ value = 0;
+ xbpf_map_update_elem(parser_map, &key, &value, 0);
+}
+
+static void test_skb_redir_partial(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_stream_verdict);
+ int parser = bpf_program__fd(skel->progs.prog_stream_parser);
+ int parser_map = bpf_map__fd(skel->maps.parser_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(parser, sock_map, BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err)
+ return;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT, 0);
+ if (err)
+ goto detach;
+
+ redir_partial(family, sotype, sock_map, parser_map);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_STREAM_VERDICT);
+detach:
+ xbpf_prog_detach2(parser, sock_map, BPF_SK_SKB_STREAM_PARSER);
+}
+
static void test_reuseport_select_listening(int family, int sotype,
int sock_map, int verd_map,
int reuseport_prog)
@@ -1346,7 +1122,6 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
int s1, s2, c, err;
unsigned int drop;
socklen_t len;
- u64 value;
u32 key;
zero_verdict_count(verd_map);
@@ -1360,16 +1135,10 @@ static void test_reuseport_mixed_groups(int family, int sotype, int sock_map,
if (s2 < 0)
goto close_srv1;
- key = 0;
- value = s1;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
+ err = add_to_sockmap(sock_map, s1, s2);
if (err)
goto close_srv2;
- key = 1;
- value = s2;
- err = xbpf_map_update_elem(sock_map, &key, &value, BPF_NOEXIST);
-
/* Connect to s2, reuseport BPF selects s1 via sock_map[0] */
len = sizeof(addr);
err = xgetsockname(s2, sockaddr(&addr), &len);
@@ -1420,14 +1189,12 @@ close_srv1:
static void test_ops_cleanup(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
int err, mapfd;
u32 key;
- def = bpf_map__def(map);
mapfd = bpf_map__fd(map);
- for (key = 0; key < def->max_entries; key++) {
+ for (key = 0; key < bpf_map__max_entries(map); key++) {
err = bpf_map_delete_elem(mapfd, &key);
if (err && errno != EINVAL && errno != ENOENT)
FAIL_ERRNO("map_delete: expected EINVAL/ENOENT");
@@ -1441,6 +1208,10 @@ static const char *family_str(sa_family_t family)
return "IPv4";
case AF_INET6:
return "IPv6";
+ case AF_UNIX:
+ return "Unix";
+ case AF_VSOCK:
+ return "VSOCK";
default:
return "unknown";
}
@@ -1448,13 +1219,13 @@ static const char *family_str(sa_family_t family)
static const char *map_type_str(const struct bpf_map *map)
{
- const struct bpf_map_def *def;
+ int type;
- def = bpf_map__def(map);
- if (IS_ERR(def))
+ if (!map)
return "invalid";
+ type = bpf_map__type(map);
- switch (def->type) {
+ switch (type) {
case BPF_MAP_TYPE_SOCKMAP:
return "sockmap";
case BPF_MAP_TYPE_SOCKHASH:
@@ -1480,7 +1251,8 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
int family, int sotype)
{
const struct op_test {
- void (*fn)(int family, int sotype, int mapfd);
+ void (*fn)(struct test_sockmap_listen *skel,
+ int family, int sotype, int mapfd);
const char *name;
int sotype;
} tests[] = {
@@ -1527,7 +1299,7 @@ static void test_ops(struct test_sockmap_listen *skel, struct bpf_map *map,
if (!test__start_subtest(s))
continue;
- t->fn(family, sotype, map_fd);
+ t->fn(skel, family, sotype, map_fd);
test_ops_cleanup(map);
}
}
@@ -1542,6 +1314,7 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
} tests[] = {
TEST(test_skb_redir_to_connected),
TEST(test_skb_redir_to_listening),
+ TEST(test_skb_redir_partial),
TEST(test_msg_redir_to_connected),
TEST(test_msg_redir_to_listening),
};
@@ -1563,6 +1336,259 @@ static void test_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
}
}
+static void pairs_redir_to_connected(int cli0, int peer0, int cli1, int peer1,
+ int sock_mapfd, int nop_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ unsigned int pass;
+ int err, n;
+ u32 key;
+ char b;
+
+ zero_verdict_count(verd_mapfd);
+
+ err = add_to_sockmap(sock_mapfd, peer0, peer1);
+ if (err)
+ return;
+
+ if (nop_mapfd >= 0) {
+ err = add_to_sockmap(nop_mapfd, cli0, cli1);
+ if (err)
+ return;
+ }
+
+ n = write(cli1, "a", 1);
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ return;
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ return;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+
+ n = recv_timeout(mode == REDIR_INGRESS ? peer0 : cli0, &b, 1, 0, IO_TIMEOUT_SEC);
+ if (n < 0)
+ FAIL_ERRNO("%s: recv_timeout", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+}
+
+static void unix_redir_to_connected(int sotype, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ int c0, c1, p0, p1;
+ int sfd[2];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ if (socketpair(AF_UNIX, sotype | SOCK_NONBLOCK, 0, sfd))
+ goto close0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+
+ xclose(c1);
+ xclose(p1);
+close0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_redir_to_connected(sotype, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int sotype)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_UNIX);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ unix_skb_redir_to_connected(skel, map, sotype);
+}
+
+/* Returns two connected loopback vsock sockets */
+static int vsock_socketpair_connectible(int sotype, int *v0, int *v1)
+{
+ struct sockaddr_storage addr;
+ socklen_t len = sizeof(addr);
+ int s, p, c;
+
+ s = socket_loopback(AF_VSOCK, sotype);
+ if (s < 0)
+ return -1;
+
+ c = xsocket(AF_VSOCK, sotype | SOCK_NONBLOCK, 0);
+ if (c == -1)
+ goto close_srv;
+
+ if (getsockname(s, sockaddr(&addr), &len) < 0)
+ goto close_cli;
+
+ if (connect(c, sockaddr(&addr), len) < 0 && errno != EINPROGRESS) {
+ FAIL_ERRNO("connect");
+ goto close_cli;
+ }
+
+ len = sizeof(addr);
+ p = accept_timeout(s, sockaddr(&addr), &len, IO_TIMEOUT_SEC);
+ if (p < 0)
+ goto close_cli;
+
+ if (poll_connect(c, IO_TIMEOUT_SEC) < 0) {
+ FAIL_ERRNO("poll_connect");
+ goto close_acc;
+ }
+
+ *v0 = p;
+ *v1 = c;
+
+ return 0;
+
+close_acc:
+ close(p);
+close_cli:
+ close(c);
+close_srv:
+ close(s);
+
+ return -1;
+}
+
+static void vsock_unix_redir_connectible(int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode, int sotype)
+{
+ const char *log_prefix = redir_mode_str(mode);
+ char a = 'a', b = 'b';
+ int u0, u1, v0, v1;
+ int sfd[2];
+ unsigned int pass;
+ int err, n;
+ u32 key;
+
+ zero_verdict_count(verd_mapfd);
+
+ if (socketpair(AF_UNIX, SOCK_STREAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+
+ u0 = sfd[0];
+ u1 = sfd[1];
+
+ err = vsock_socketpair_connectible(sotype, &v0, &v1);
+ if (err) {
+ FAIL("vsock_socketpair_connectible() failed");
+ goto close_uds;
+ }
+
+ err = add_to_sockmap(sock_mapfd, u0, v0);
+ if (err) {
+ FAIL("add_to_sockmap failed");
+ goto close_vsock;
+ }
+
+ n = write(v1, &a, sizeof(a));
+ if (n < 0)
+ FAIL_ERRNO("%s: write", log_prefix);
+ if (n == 0)
+ FAIL("%s: incomplete write", log_prefix);
+ if (n < 1)
+ goto out;
+
+ n = xrecv_nonblock(mode == REDIR_INGRESS ? u0 : u1, &b, sizeof(b), 0);
+ if (n < 0)
+ FAIL("%s: recv() err, errno=%d", log_prefix, errno);
+ if (n == 0)
+ FAIL("%s: incomplete recv", log_prefix);
+ if (b != a)
+ FAIL("%s: vsock socket map failed, %c != %c", log_prefix, a, b);
+
+ key = SK_PASS;
+ err = xbpf_map_lookup_elem(verd_mapfd, &key, &pass);
+ if (err)
+ goto out;
+ if (pass != 1)
+ FAIL("%s: want pass count 1, have %d", log_prefix, pass);
+out:
+ key = 0;
+ bpf_map_delete_elem(sock_mapfd, &key);
+ key = 1;
+ bpf_map_delete_elem(sock_mapfd, &key);
+
+close_vsock:
+ close(v0);
+ close(v1);
+
+close_uds:
+ close(u0);
+ close(u1);
+}
+
+static void vsock_unix_skb_redir_connectible(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map,
+ int sotype)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_EGRESS, sotype);
+ skel->bss->test_ingress = true;
+ vsock_unix_redir_connectible(sock_map, verdict_map, REDIR_INGRESS, sotype);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_vsock_redir(struct test_sockmap_listen *skel, struct bpf_map *map)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(AF_VSOCK);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+
+ vsock_unix_skb_redir_connectible(skel, map, SOCK_STREAM);
+ vsock_unix_skb_redir_connectible(skel, map, SOCK_SEQPACKET);
+}
+
static void test_reuseport(struct test_sockmap_listen *skel,
struct bpf_map *map, int family, int sotype)
{
@@ -1603,6 +1629,242 @@ static void test_reuseport(struct test_sockmap_listen *skel,
}
}
+static int inet_socketpair(int family, int type, int *s, int *c)
+{
+ struct sockaddr_storage addr;
+ socklen_t len;
+ int p0, c0;
+ int err;
+
+ p0 = socket_loopback(family, type | SOCK_NONBLOCK);
+ if (p0 < 0)
+ return p0;
+
+ len = sizeof(addr);
+ err = xgetsockname(p0, sockaddr(&addr), &len);
+ if (err)
+ goto close_peer0;
+
+ c0 = xsocket(family, type | SOCK_NONBLOCK, 0);
+ if (c0 < 0) {
+ err = c0;
+ goto close_peer0;
+ }
+ err = xconnect(c0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+ err = xgetsockname(c0, sockaddr(&addr), &len);
+ if (err)
+ goto close_cli0;
+ err = xconnect(p0, sockaddr(&addr), len);
+ if (err)
+ goto close_cli0;
+
+ *s = p0;
+ *c = c0;
+ return 0;
+
+close_cli0:
+ xclose(c0);
+close_peer0:
+ xclose(p0);
+ return err;
+}
+
+static void udp_redir_to_connected(int family, int sock_mapfd, int verd_mapfd,
+ enum redir_mode mode)
+{
+ int c0, c1, p0, p1;
+ int err;
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ if (err)
+ goto close_cli0;
+
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void udp_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ udp_redir_to_connected(family, sock_map, verdict_map, REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ udp_skb_redir_to_connected(skel, map, family);
+}
+
+static void inet_unix_redir_to_connected(int family, int type, int sock_mapfd,
+ int verd_mapfd, enum redir_mode mode)
+{
+ int c0, c1, p0, p1;
+ int sfd[2];
+ int err;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ return;
+ c0 = sfd[0], p0 = sfd[1];
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p1, &c1);
+ if (err)
+ goto close;
+
+ pairs_redir_to_connected(c0, p0, c1, p1, sock_mapfd, -1, verd_mapfd, mode);
+
+ xclose(c1);
+ xclose(p1);
+close:
+ xclose(c0);
+ xclose(p0);
+}
+
+static void inet_unix_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ inet_unix_redir_to_connected(family, SOCK_DGRAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+ inet_unix_redir_to_connected(family, SOCK_STREAM, sock_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void unix_inet_redir_to_connected(int family, int type,
+ int sock_mapfd, int nop_mapfd,
+ int verd_mapfd,
+ enum redir_mode mode)
+{
+ int c0, c1, p0, p1;
+ int sfd[2];
+ int err;
+
+ err = inet_socketpair(family, SOCK_DGRAM, &p0, &c0);
+ if (err)
+ return;
+
+ if (socketpair(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK, 0, sfd))
+ goto close_cli0;
+ c1 = sfd[0], p1 = sfd[1];
+
+ pairs_redir_to_connected(c0, p0, c1, p1,
+ sock_mapfd, nop_mapfd, verd_mapfd, mode);
+
+ xclose(c1);
+ xclose(p1);
+close_cli0:
+ xclose(c0);
+ xclose(p0);
+
+}
+
+static void unix_inet_skb_redir_to_connected(struct test_sockmap_listen *skel,
+ struct bpf_map *inner_map, int family)
+{
+ int verdict = bpf_program__fd(skel->progs.prog_skb_verdict);
+ int nop_map = bpf_map__fd(skel->maps.nop_map);
+ int verdict_map = bpf_map__fd(skel->maps.verdict_map);
+ int sock_map = bpf_map__fd(inner_map);
+ int err;
+
+ err = xbpf_prog_attach(verdict, sock_map, BPF_SK_SKB_VERDICT, 0);
+ if (err)
+ return;
+
+ skel->bss->test_ingress = false;
+ unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ sock_map, -1, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ sock_map, -1, verdict_map,
+ REDIR_EGRESS);
+
+ unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_EGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_EGRESS);
+ skel->bss->test_ingress = true;
+ unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ sock_map, -1, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
+ sock_map, -1, verdict_map,
+ REDIR_INGRESS);
+
+ unix_inet_redir_to_connected(family, SOCK_DGRAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_INGRESS);
+ unix_inet_redir_to_connected(family, SOCK_STREAM,
+ sock_map, nop_map, verdict_map,
+ REDIR_INGRESS);
+
+ xbpf_prog_detach2(verdict, sock_map, BPF_SK_SKB_VERDICT);
+}
+
+static void test_udp_unix_redir(struct test_sockmap_listen *skel, struct bpf_map *map,
+ int family)
+{
+ const char *family_name, *map_name;
+ char s[MAX_TEST_NAME];
+
+ family_name = family_str(family);
+ map_name = map_type_str(map);
+ snprintf(s, sizeof(s), "%s %s %s", map_name, family_name, __func__);
+ if (!test__start_subtest(s))
+ return;
+ inet_unix_skb_redir_to_connected(skel, map, family);
+ unix_inet_skb_redir_to_connected(skel, map, family);
+}
+
static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
int family)
{
@@ -1611,9 +1873,11 @@ static void run_tests(struct test_sockmap_listen *skel, struct bpf_map *map,
test_redir(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_STREAM);
test_reuseport(skel, map, family, SOCK_DGRAM);
+ test_udp_redir(skel, map, family);
+ test_udp_unix_redir(skel, map, family);
}
-void test_sockmap_listen(void)
+void serial_test_sockmap_listen(void)
{
struct test_sockmap_listen *skel;
@@ -1626,10 +1890,16 @@ void test_sockmap_listen(void)
skel->bss->test_sockmap = true;
run_tests(skel, skel->maps.sock_map, AF_INET);
run_tests(skel, skel->maps.sock_map, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_map, SOCK_STREAM);
+ test_vsock_redir(skel, skel->maps.sock_map);
skel->bss->test_sockmap = false;
run_tests(skel, skel->maps.sock_hash, AF_INET);
run_tests(skel, skel->maps.sock_hash, AF_INET6);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_DGRAM);
+ test_unix_redir(skel, skel->maps.sock_hash, SOCK_STREAM);
+ test_vsock_redir(skel, skel->maps.sock_hash);
test_sockmap_listen__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt.c b/tools/testing/selftests/bpf/prog_tests/sockopt.c
index 3e8517a8395a..5a4491d4edfe 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt.c
@@ -1,14 +1,20 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include <io_uring/mini_liburing.h>
#include "cgroup_helpers.h"
static char bpf_log_buf[4096];
static bool verbose;
+#ifndef PAGE_SIZE
+#define PAGE_SIZE 4096
+#endif
+
enum sockopt_test_error {
OK = 0,
DENY_LOAD,
DENY_ATTACH,
+ EOPNOTSUPP_GETSOCKOPT,
EPERM_GETSOCKOPT,
EFAULT_GETSOCKOPT,
EPERM_SETSOCKOPT,
@@ -33,6 +39,7 @@ static struct sockopt_test {
socklen_t get_optlen_ret;
enum sockopt_test_error error;
+ bool io_uring_support;
} tests[] = {
/* ==================== getsockopt ==================== */
@@ -246,7 +253,9 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny bigger ctx->optlen",
@@ -271,12 +280,34 @@ static struct sockopt_test {
.get_optlen = 64,
.error = EFAULT_GETSOCKOPT,
+ .io_uring_support = true,
},
{
- .descr = "getsockopt: deny arbitrary ctx->retval",
+ .descr = "getsockopt: ignore >PAGE_SIZE optlen",
.insns = {
- /* ctx->retval = 123 */
- BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xFF),
+ /* } */
+
+ /* retval changes are ignored */
+ /* ctx->retval = 5 */
+ BPF_MOV64_IMM(BPF_REG_0, 5),
BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
offsetof(struct bpf_sockopt, retval)),
@@ -287,9 +318,12 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
- .get_optlen = 64,
-
- .error = EFAULT_GETSOCKOPT,
+ .get_level = 1234,
+ .get_optname = 5678,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = PAGE_SIZE + 1,
+ .error = EOPNOTSUPP_GETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: support smaller ctx->optlen",
@@ -309,8 +343,10 @@ static struct sockopt_test {
.attach_type = BPF_CGROUP_GETSOCKOPT,
.expected_attach_type = BPF_CGROUP_GETSOCKOPT,
+ .get_level = SOL_SOCKET,
.get_optlen = 64,
.get_optlen_ret = 32,
+ .io_uring_support = true,
},
{
.descr = "getsockopt: deny writing to ctx->optval",
@@ -490,6 +526,7 @@ static struct sockopt_test {
.set_level = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->level",
@@ -544,6 +581,7 @@ static struct sockopt_test {
.set_optname = 123,
.set_optlen = 1,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: allow changing ctx->optname",
@@ -596,6 +634,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: ctx->optlen == -1 is ok",
@@ -612,6 +651,7 @@ static struct sockopt_test {
.expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.set_optlen = 64,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen < 0 (except -1)",
@@ -630,6 +670,7 @@ static struct sockopt_test {
.set_optlen = 4,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
},
{
.descr = "setsockopt: deny ctx->optlen > input optlen",
@@ -647,6 +688,46 @@ static struct sockopt_test {
.set_optlen = 64,
.error = EFAULT_SETSOCKOPT,
+ .io_uring_support = true,
+ },
+ {
+ .descr = "setsockopt: ignore >PAGE_SIZE optlen",
+ .insns = {
+ /* write 0xFF to the first optval byte */
+
+ /* r6 = ctx->optval */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval)),
+ /* r2 = ctx->optval */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
+ /* r6 = ctx->optval + 1 */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
+
+ /* r7 = ctx->optval_end */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_1,
+ offsetof(struct bpf_sockopt, optval_end)),
+
+ /* if (ctx->optval + 1 <= ctx->optval_end) { */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ /* ctx->optval[0] = 0xF0 */
+ BPF_ST_MEM(BPF_B, BPF_REG_2, 0, 0xF0),
+ /* } */
+
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .attach_type = BPF_CGROUP_SETSOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
+
+ .set_level = SOL_IP,
+ .set_optname = IP_TOS,
+ .set_optval = {},
+ .set_optlen = PAGE_SIZE + 1,
+
+ .get_level = SOL_IP,
+ .get_optname = IP_TOS,
+ .get_optval = {}, /* the changes are ignored */
+ .get_optlen = 4,
},
{
.descr = "setsockopt: allow changing ctx->optlen within bounds",
@@ -852,29 +933,110 @@ static struct sockopt_test {
static int load_prog(const struct bpf_insn *insns,
enum bpf_attach_type expected_attach_type)
{
- struct bpf_load_program_attr attr = {
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
.expected_attach_type = expected_attach_type,
- .insns = insns,
- .license = "GPL",
.log_level = 2,
- };
- int fd;
+ .log_buf = bpf_log_buf,
+ .log_size = sizeof(bpf_log_buf),
+ );
+ int fd, insns_cnt = 0;
for (;
- insns[attr.insns_cnt].code != (BPF_JMP | BPF_EXIT);
- attr.insns_cnt++) {
+ insns[insns_cnt].code != (BPF_JMP | BPF_EXIT);
+ insns_cnt++) {
}
- attr.insns_cnt++;
+ insns_cnt++;
- fd = bpf_load_program_xattr(&attr, bpf_log_buf, sizeof(bpf_log_buf));
+ fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCKOPT, NULL, "GPL", insns, insns_cnt, &opts);
if (verbose && fd < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
return fd;
}
-static int run_test(int cgroup_fd, struct sockopt_test *test)
+/* Core function that handles io_uring ring initialization,
+ * sending SQE with sockopt command and waiting for the CQE.
+ */
+static int uring_sockopt(int op, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ int err;
+
+ err = io_uring_queue_init(1, &ring, 0);
+ if (!ASSERT_OK(err, "io_uring initialization"))
+ return err;
+
+ sqe = io_uring_get_sqe(&ring);
+ if (!ASSERT_NEQ(sqe, NULL, "Get an SQE")) {
+ err = -1;
+ goto fail;
+ }
+
+ io_uring_prep_cmd(sqe, op, fd, level, optname, optval, optlen);
+
+ err = io_uring_submit(&ring);
+ if (!ASSERT_EQ(err, 1, "Submit SQE"))
+ goto fail;
+
+ err = io_uring_wait_cqe(&ring, &cqe);
+ if (!ASSERT_OK(err, "Wait for CQE"))
+ goto fail;
+
+ err = cqe->res;
+
+fail:
+ io_uring_queue_exit(&ring);
+
+ return err;
+}
+
+static int uring_setsockopt(int fd, int level, int optname, const void *optval,
+ socklen_t optlen)
+{
+ return uring_sockopt(SOCKET_URING_OP_SETSOCKOPT, fd, level, optname,
+ optval, optlen);
+}
+
+static int uring_getsockopt(int fd, int level, int optname, void *optval,
+ socklen_t *optlen)
+{
+ int ret = uring_sockopt(SOCKET_URING_OP_GETSOCKOPT, fd, level, optname,
+ optval, *optlen);
+ if (ret < 0)
+ return ret;
+
+ /* Populate optlen back to be compatible with systemcall interface,
+ * and simplify the test.
+ */
+ *optlen = ret;
+
+ return 0;
+}
+
+/* Execute the setsocktopt operation */
+static int call_setsockopt(bool use_io_uring, int fd, int level, int optname,
+ const void *optval, socklen_t optlen)
+{
+ if (use_io_uring)
+ return uring_setsockopt(fd, level, optname, optval, optlen);
+
+ return setsockopt(fd, level, optname, optval, optlen);
+}
+
+/* Execute the getsocktopt operation */
+static int call_getsockopt(bool use_io_uring, int fd, int level, int optname,
+ void *optval, socklen_t *optlen)
+{
+ if (use_io_uring)
+ return uring_getsockopt(fd, level, optname, optval, optlen);
+
+ return getsockopt(fd, level, optname, optval, optlen);
+}
+
+static int run_test(int cgroup_fd, struct sockopt_test *test, bool use_io_uring)
{
int sock_fd, err, prog_fd;
void *optval = NULL;
@@ -907,8 +1069,16 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->set_optlen) {
- err = setsockopt(sock_fd, test->set_level, test->set_optname,
- test->set_optval, test->set_optlen);
+ if (test->set_optlen >= PAGE_SIZE) {
+ int num_pages = test->set_optlen / PAGE_SIZE;
+ int remainder = test->set_optlen % PAGE_SIZE;
+
+ test->set_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
+ err = call_setsockopt(use_io_uring, sock_fd, test->set_level,
+ test->set_optname, test->set_optval,
+ test->set_optlen);
if (err) {
if (errno == EPERM && test->error == EPERM_SETSOCKOPT)
goto close_sock_fd;
@@ -922,14 +1092,24 @@ static int run_test(int cgroup_fd, struct sockopt_test *test)
}
if (test->get_optlen) {
+ if (test->get_optlen >= PAGE_SIZE) {
+ int num_pages = test->get_optlen / PAGE_SIZE;
+ int remainder = test->get_optlen % PAGE_SIZE;
+
+ test->get_optlen = num_pages * sysconf(_SC_PAGESIZE) + remainder;
+ }
+
optval = malloc(test->get_optlen);
+ memset(optval, 0, test->get_optlen);
socklen_t optlen = test->get_optlen;
socklen_t expected_get_optlen = test->get_optlen_ret ?:
test->get_optlen;
- err = getsockopt(sock_fd, test->get_level, test->get_optname,
- optval, &optlen);
+ err = call_getsockopt(use_io_uring, sock_fd, test->get_level,
+ test->get_optname, optval, &optlen);
if (err) {
+ if (errno == EOPNOTSUPP && test->error == EOPNOTSUPP_GETSOCKOPT)
+ goto free_optval;
if (errno == EPERM && test->error == EPERM_GETSOCKOPT)
goto free_optval;
if (errno == EFAULT && test->error == EFAULT_GETSOCKOPT)
@@ -973,12 +1153,18 @@ void test_sockopt(void)
int cgroup_fd, i;
cgroup_fd = test__join_cgroup("/sockopt");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
return;
for (i = 0; i < ARRAY_SIZE(tests); i++) {
- test__start_subtest(tests[i].descr);
- CHECK_FAIL(run_test(cgroup_fd, &tests[i]));
+ if (!test__start_subtest(tests[i].descr))
+ continue;
+
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], false),
+ tests[i].descr);
+ if (tests[i].io_uring_support)
+ ASSERT_OK(run_test(cgroup_fd, &tests[i], true),
+ tests[i].descr);
}
close(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
index 8547ecbdc61f..917f486db826 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_inherit.c
@@ -2,6 +2,8 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include "sockopt_inherit.skel.h"
+
#define SOL_CUSTOM 0xdeadbeef
#define CUSTOM_INHERIT1 0
#define CUSTOM_INHERIT2 1
@@ -76,20 +78,16 @@ static void *server_thread(void *arg)
pthread_cond_signal(&server_started);
pthread_mutex_unlock(&server_started_mtx);
- if (CHECK_FAIL(err < 0)) {
- perror("Failed to listed on socket");
+ if (!ASSERT_GE(err, 0, "listed on socket"))
return NULL;
- }
err += verify_sockopt(fd, CUSTOM_INHERIT1, "listen", 1);
err += verify_sockopt(fd, CUSTOM_INHERIT2, "listen", 1);
err += verify_sockopt(fd, CUSTOM_LISTENER, "listen", 1);
client_fd = accept(fd, (struct sockaddr *)&addr, &len);
- if (CHECK_FAIL(client_fd < 0)) {
- perror("Failed to accept client");
+ if (!ASSERT_GE(client_fd, 0, "accept client"))
return NULL;
- }
err += verify_sockopt(client_fd, CUSTOM_INHERIT1, "accept", 1);
err += verify_sockopt(client_fd, CUSTOM_INHERIT2, "accept", 1);
@@ -136,90 +134,67 @@ static int start_server(void)
return fd;
}
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
-
static void run_test(int cgroup_fd)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_inherit.o",
- };
+ struct bpf_link *link_getsockopt = NULL;
+ struct bpf_link *link_setsockopt = NULL;
int server_fd = -1, client_fd;
- struct bpf_object *obj;
+ struct sockopt_inherit *obj;
void *server_err;
pthread_t tid;
- int ignored;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = sockopt_inherit__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
return;
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ link_getsockopt = bpf_program__attach_cgroup(obj->progs._getsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_getsockopt, "cg-attach-getsockopt"))
goto close_bpf_object;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
+ link_setsockopt = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cgroup_fd);
+ if (!ASSERT_OK_PTR(link_setsockopt, "cg-attach-setsockopt"))
goto close_bpf_object;
server_fd = start_server();
- if (CHECK_FAIL(server_fd < 0))
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_bpf_object;
- if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
- (void *)&server_fd)))
- goto close_server_fd;
-
pthread_mutex_lock(&server_started_mtx);
+ if (!ASSERT_OK(pthread_create(&tid, NULL, server_thread,
+ (void *)&server_fd), "pthread_create")) {
+ pthread_mutex_unlock(&server_started_mtx);
+ goto close_server_fd;
+ }
pthread_cond_wait(&server_started, &server_started_mtx);
pthread_mutex_unlock(&server_started_mtx);
client_fd = connect_to_server(server_fd);
- if (CHECK_FAIL(client_fd < 0))
+ if (!ASSERT_GE(client_fd, 0, "connect_to_server"))
goto close_server_fd;
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0));
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0));
- CHECK_FAIL(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0));
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT1, "connect", 0), "verify_sockopt1");
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_INHERIT2, "connect", 0), "verify_sockopt2");
+ ASSERT_OK(verify_sockopt(client_fd, CUSTOM_LISTENER, "connect", 0), "verify_sockopt ener");
pthread_join(tid, &server_err);
err = (int)(long)server_err;
- CHECK_FAIL(err);
+ ASSERT_OK(err, "pthread_join retval");
close(client_fd);
close_server_fd:
close(server_fd);
close_bpf_object:
- bpf_object__close(obj);
+ bpf_link__destroy(link_getsockopt);
+ bpf_link__destroy(link_setsockopt);
+
+ sockopt_inherit__destroy(obj);
}
void test_sockopt_inherit(void)
@@ -227,7 +202,7 @@ void test_sockopt_inherit(void)
int cgroup_fd;
cgroup_fd = test__join_cgroup("/sockopt_inherit");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup"))
return;
run_test(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
index 29188d6f5c8d..759bbb6f8c5f 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_multi.c
@@ -2,61 +2,13 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
- return -1;
- }
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
- }
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, BPF_F_ALLOW_MULTI);
- if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
- }
-
- return 0;
-}
-
-static int prog_detach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
-
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err)
- return -1;
-
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog)
- return -1;
-
- err = bpf_prog_detach2(bpf_program__fd(prog), cgroup_fd,
- attach_type);
- if (err)
- return -1;
-
- return 0;
-}
+#include "sockopt_multi.skel.h"
-static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_getsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -89,8 +41,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - child: 0x80 -> 0x90
*/
- err = prog_attach(obj, cg_child, "cgroup/getsockopt/child");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._getsockopt_child,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-getsockopt_child"))
goto detach;
buf = 0x00;
@@ -113,8 +66,9 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: 0x90 -> 0xA0
*/
- err = prog_attach(obj, cg_parent, "cgroup/getsockopt/parent");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._getsockopt_parent,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-getsockopt_parent"))
goto detach;
buf = 0x00;
@@ -138,7 +92,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
*/
buf = 0x40;
- if (setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1) < 0) {
+ err = setsockopt(sock_fd, SOL_IP, IP_TOS, &buf, 1);
+ if (err < 0) {
log_err("Failed to call setsockopt(IP_TOS)");
goto detach;
}
@@ -156,11 +111,8 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
* - parent: unexpected 0x40, EPERM
*/
- err = prog_detach(obj, cg_child, "cgroup/getsockopt/child");
- if (err) {
- log_err("Failed to detach child program");
- goto detach;
- }
+ bpf_link__destroy(link_child);
+ link_child = NULL;
buf = 0x00;
optlen = 1;
@@ -197,15 +149,17 @@ static int run_getsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/getsockopt/child");
- prog_detach(obj, cg_parent, "cgroup/getsockopt/parent");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
-static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
+static int run_setsockopt_test(struct sockopt_multi *obj, int cg_parent,
int cg_child, int sock_fd)
{
+ struct bpf_link *link_parent = NULL;
+ struct bpf_link *link_child = NULL;
socklen_t optlen;
__u8 buf;
int err;
@@ -235,8 +189,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach child program and make sure it adds 0x10. */
- err = prog_attach(obj, cg_child, "cgroup/setsockopt");
- if (err)
+ link_child = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_child);
+ if (!ASSERT_OK_PTR(link_child, "cg-attach-setsockopt_child"))
goto detach;
buf = 0x80;
@@ -262,8 +217,9 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
/* Attach parent program and make sure it adds another 0x10. */
- err = prog_attach(obj, cg_parent, "cgroup/setsockopt");
- if (err)
+ link_parent = bpf_program__attach_cgroup(obj->progs._setsockopt,
+ cg_parent);
+ if (!ASSERT_OK_PTR(link_parent, "cg-attach-setsockopt_parent"))
goto detach;
buf = 0x80;
@@ -288,45 +244,42 @@ static int run_setsockopt_test(struct bpf_object *obj, int cg_parent,
}
detach:
- prog_detach(obj, cg_child, "cgroup/setsockopt");
- prog_detach(obj, cg_parent, "cgroup/setsockopt");
+ bpf_link__destroy(link_child);
+ bpf_link__destroy(link_parent);
return err;
}
void test_sockopt_multi(void)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_multi.o",
- };
int cg_parent = -1, cg_child = -1;
- struct bpf_object *obj = NULL;
+ struct sockopt_multi *obj = NULL;
int sock_fd = -1;
- int err = -1;
- int ignored;
cg_parent = test__join_cgroup("/parent");
- if (CHECK_FAIL(cg_parent < 0))
+ if (!ASSERT_GE(cg_parent, 0, "join_cgroup /parent"))
goto out;
cg_child = test__join_cgroup("/parent/child");
- if (CHECK_FAIL(cg_child < 0))
+ if (!ASSERT_GE(cg_child, 0, "join_cgroup /parent/child"))
goto out;
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
+ obj = sockopt_multi__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "skel-load"))
goto out;
+ obj->bss->page_size = sysconf(_SC_PAGESIZE);
+
sock_fd = socket(AF_INET, SOCK_STREAM, 0);
- if (CHECK_FAIL(sock_fd < 0))
+ if (!ASSERT_GE(sock_fd, 0, "socket"))
goto out;
- CHECK_FAIL(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd));
- CHECK_FAIL(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd));
+ ASSERT_OK(run_getsockopt_test(obj, cg_parent, cg_child, sock_fd), "getsockopt_test");
+ ASSERT_OK(run_setsockopt_test(obj, cg_parent, cg_child, sock_fd), "setsockopt_test");
out:
close(sock_fd);
- bpf_object__close(obj);
+ sockopt_multi__destroy(obj);
close(cg_child);
close(cg_parent);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..6b2d300e9fd4
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_qos_to_cc.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include <netinet/tcp.h>
+#include "sockopt_qos_to_cc.skel.h"
+
+static void run_setsockopt_test(int cg_fd, int sock_fd)
+{
+ socklen_t optlen;
+ char cc[16]; /* TCP_CA_NAME_MAX */
+ int buf;
+ int err = -1;
+
+ buf = 0x2D;
+ err = setsockopt(sock_fd, SOL_IPV6, IPV6_TCLASS, &buf, sizeof(buf));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, IPV6_TCLASS)"))
+ return;
+
+ /* Verify the setsockopt cc change */
+ optlen = sizeof(cc);
+ err = getsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, cc, &optlen);
+ if (!ASSERT_OK(err, "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+
+ if (!ASSERT_STREQ(cc, "reno", "getsockopt(sock_fd, TCP_CONGESTION)"))
+ return;
+}
+
+void test_sockopt_qos_to_cc(void)
+{
+ struct sockopt_qos_to_cc *skel;
+ char cc_cubic[16] = "cubic"; /* TCP_CA_NAME_MAX */
+ int cg_fd = -1;
+ int sock_fd = -1;
+ int err;
+
+ cg_fd = test__join_cgroup("/sockopt_qos_to_cc");
+ if (!ASSERT_GE(cg_fd, 0, "cg-join(sockopt_qos_to_cc)"))
+ return;
+
+ skel = sockopt_qos_to_cc__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ goto done;
+
+ skel->bss->page_size = sysconf(_SC_PAGESIZE);
+
+ sock_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (!ASSERT_GE(sock_fd, 0, "v6 socket open"))
+ goto done;
+
+ err = setsockopt(sock_fd, SOL_TCP, TCP_CONGESTION, &cc_cubic,
+ sizeof(cc_cubic));
+ if (!ASSERT_OK(err, "setsockopt(sock_fd, TCP_CONGESTION)"))
+ goto done;
+
+ skel->links.sockopt_qos_to_cc =
+ bpf_program__attach_cgroup(skel->progs.sockopt_qos_to_cc,
+ cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.sockopt_qos_to_cc,
+ "prog_attach(sockopt_qos_to_cc)"))
+ goto done;
+
+ run_setsockopt_test(cg_fd, sock_fd);
+
+done:
+ if (sock_fd != -1)
+ close(sock_fd);
+ if (cg_fd != -1)
+ close(cg_fd);
+ /* destroy can take null and error pointer */
+ sockopt_qos_to_cc__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
index 5f54c6aec7f0..05d0e07da394 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockopt_sk.c
@@ -2,6 +2,14 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
+#include <linux/tcp.h>
+#include <linux/netlink.h>
+#include "sockopt_sk.skel.h"
+
+#ifndef SOL_TCP
+#define SOL_TCP IPPROTO_TCP
+#endif
+
#define SOL_CUSTOM 0xdeadbeef
static int getsetsockopt(void)
@@ -11,6 +19,7 @@ static int getsetsockopt(void)
char u8[4];
__u32 u32;
char cc[16]; /* TCP_CA_NAME_MAX */
+ struct tcp_zerocopy_receive zc;
} buf = {};
socklen_t optlen;
char *big_buf = NULL;
@@ -45,9 +54,9 @@ static int getsetsockopt(void)
goto err;
}
- if (*(int *)big_buf != 0x08) {
+ if (*big_buf != 0x08) {
log_err("Unexpected getsockopt(IP_TOS) optval 0x%x != 0x08",
- *(int *)big_buf);
+ (int)*big_buf);
goto err;
}
@@ -154,69 +163,87 @@ static int getsetsockopt(void)
goto err;
}
- free(big_buf);
- close(fd);
- return 0;
-err:
- free(big_buf);
- close(fd);
- return -1;
-}
+ /* TCP_ZEROCOPY_RECEIVE triggers */
+ memset(&buf, 0, sizeof(buf));
+ optlen = sizeof(buf.zc);
+ err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
+ if (err) {
+ log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
+ err, errno);
+ goto err;
+ }
-static int prog_attach(struct bpf_object *obj, int cgroup_fd, const char *title)
-{
- enum bpf_attach_type attach_type;
- enum bpf_prog_type prog_type;
- struct bpf_program *prog;
- int err;
+ memset(&buf, 0, sizeof(buf));
+ buf.zc.address = 12345; /* Not page aligned. Rejected by tcp_zerocopy_receive() */
+ optlen = sizeof(buf.zc);
+ errno = 0;
+ err = getsockopt(fd, SOL_TCP, TCP_ZEROCOPY_RECEIVE, &buf, &optlen);
+ if (errno != EINVAL) {
+ log_err("Unexpected getsockopt(TCP_ZEROCOPY_RECEIVE) err=%d errno=%d",
+ err, errno);
+ goto err;
+ }
- err = libbpf_prog_type_by_name(title, &prog_type, &attach_type);
- if (err) {
- log_err("Failed to deduct types for %s BPF program", title);
+ /* optval=NULL case is handled correctly */
+
+ close(fd);
+ fd = socket(AF_NETLINK, SOCK_RAW, 0);
+ if (fd < 0) {
+ log_err("Failed to create AF_NETLINK socket");
return -1;
}
- prog = bpf_object__find_program_by_title(obj, title);
- if (!prog) {
- log_err("Failed to find %s BPF program", title);
- return -1;
+ buf.u32 = 1;
+ optlen = sizeof(__u32);
+ err = setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &buf, optlen);
+ if (err) {
+ log_err("Unexpected getsockopt(NETLINK_ADD_MEMBERSHIP) err=%d errno=%d",
+ err, errno);
+ goto err;
}
- err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd,
- attach_type, 0);
+ optlen = 0;
+ err = getsockopt(fd, SOL_NETLINK, NETLINK_LIST_MEMBERSHIPS, NULL, &optlen);
if (err) {
- log_err("Failed to attach %s BPF program", title);
- return -1;
+ log_err("Unexpected getsockopt(NETLINK_LIST_MEMBERSHIPS) err=%d errno=%d",
+ err, errno);
+ goto err;
}
+ ASSERT_EQ(optlen, 8, "Unexpected NETLINK_LIST_MEMBERSHIPS value");
+ free(big_buf);
+ close(fd);
return 0;
+err:
+ free(big_buf);
+ close(fd);
+ return -1;
}
static void run_test(int cgroup_fd)
{
- struct bpf_prog_load_attr attr = {
- .file = "./sockopt_sk.o",
- };
- struct bpf_object *obj;
- int ignored;
- int err;
-
- err = bpf_prog_load_xattr(&attr, &obj, &ignored);
- if (CHECK_FAIL(err))
- return;
+ struct sockopt_sk *skel;
+
+ skel = sockopt_sk__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ skel->bss->page_size = getpagesize();
- err = prog_attach(obj, cgroup_fd, "cgroup/getsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._setsockopt =
+ bpf_program__attach_cgroup(skel->progs._setsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._setsockopt, "setsockopt_link"))
+ goto cleanup;
- err = prog_attach(obj, cgroup_fd, "cgroup/setsockopt");
- if (CHECK_FAIL(err))
- goto close_bpf_object;
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto cleanup;
- CHECK_FAIL(getsetsockopt());
+ ASSERT_OK(getsetsockopt(), "getsetsockopt");
-close_bpf_object:
- bpf_object__close(obj);
+cleanup:
+ sockopt_sk__destroy(skel);
}
void test_sockopt_sk(void)
@@ -224,7 +251,7 @@ void test_sockopt_sk(void)
int cgroup_fd;
cgroup_fd = test__join_cgroup("/sockopt_sk");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /sockopt_sk"))
return;
run_test(cgroup_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/spin_lock.c b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
new file mode 100644
index 000000000000..2b0068742ef9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/spin_lock.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <regex.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_spin_lock.skel.h"
+#include "test_spin_lock_fail.skel.h"
+
+static char log_buf[1024 * 1024];
+
+static struct {
+ const char *prog_name;
+ const char *err_msg;
+} spin_lock_fail_tests[] = {
+ { "lock_id_kptr_preserve",
+ "5: (bf) r1 = r0 ; R0_w=ptr_foo(id=2,ref_obj_id=2) "
+ "R1_w=ptr_foo(id=2,ref_obj_id=2) refs=2\n6: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=ptr_ expected=percpu_ptr_" },
+ { "lock_id_global_zero",
+ "; R1_w=map_value(map=.data.A,ks=4,vs=4)\n2: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mapval_preserve",
+ "[0-9]\\+: (bf) r1 = r0 ;"
+ " R0_w=map_value(id=1,map=array_map,ks=4,vs=8)"
+ " R1_w=map_value(id=1,map=array_map,ks=4,vs=8)\n"
+ "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_innermapval_preserve",
+ "[0-9]\\+: (bf) r1 = r0 ;"
+ " R0=map_value(id=2,ks=4,vs=8)"
+ " R1_w=map_value(id=2,ks=4,vs=8)\n"
+ "[0-9]\\+: (85) call bpf_this_cpu_ptr#154\n"
+ "R1 type=map_value expected=percpu_ptr_" },
+ { "lock_id_mismatch_kptr_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_kptr_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_global_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_mapval_innermapval", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval1", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_innermapval2", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_kptr", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_global", "bpf_spin_unlock of different lock" },
+ { "lock_id_mismatch_innermapval_mapval", "bpf_spin_unlock of different lock" },
+ { "lock_global_subprog_call1", "global function calls are not allowed while holding a lock" },
+ { "lock_global_subprog_call2", "global function calls are not allowed while holding a lock" },
+};
+
+static int match_regex(const char *pattern, const char *string)
+{
+ int err, rc;
+ regex_t re;
+
+ err = regcomp(&re, pattern, REG_NOSUB);
+ if (err) {
+ char errbuf[512];
+
+ regerror(err, &re, errbuf, sizeof(errbuf));
+ PRINT_FAIL("Can't compile regex: %s\n", errbuf);
+ return -1;
+ }
+ rc = regexec(&re, string, 0, NULL, 0);
+ regfree(&re);
+ return rc == 0 ? 1 : 0;
+}
+
+static void test_spin_lock_fail_prog(const char *prog_name, const char *err_msg)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts, .kernel_log_buf = log_buf,
+ .kernel_log_size = sizeof(log_buf),
+ .kernel_log_level = 1);
+ struct test_spin_lock_fail *skel;
+ struct bpf_program *prog;
+ int ret;
+
+ skel = test_spin_lock_fail__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock_fail__open_opts"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto end;
+
+ bpf_program__set_autoload(prog, true);
+
+ ret = test_spin_lock_fail__load(skel);
+ if (!ASSERT_ERR(ret, "test_spin_lock_fail__load must fail"))
+ goto end;
+
+ /* Skip check if JIT does not support kfuncs */
+ if (strstr(log_buf, "JIT does not support calling kernel function")) {
+ test__skip();
+ goto end;
+ }
+
+ ret = match_regex(err_msg, log_buf);
+ if (!ASSERT_GE(ret, 0, "match_regex"))
+ goto end;
+
+ if (!ASSERT_TRUE(ret, "no match for expected error message")) {
+ fprintf(stderr, "Expected: %s\n", err_msg);
+ fprintf(stderr, "Verifier: %s\n", log_buf);
+ }
+
+end:
+ test_spin_lock_fail__destroy(skel);
+}
+
+static void *spin_lock_thread(void *arg)
+{
+ int err, prog_fd = *(u32 *) arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 10000,
+ );
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_OK(topts.retval, "test_run retval");
+ pthread_exit(arg);
+}
+
+void test_spin_lock_success(void)
+{
+ struct test_spin_lock *skel;
+ pthread_t thread_id[4];
+ int prog_fd, i;
+ void *ret;
+
+ skel = test_spin_lock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_spin_lock__open_and_load"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.bpf_spin_lock_test);
+ for (i = 0; i < 4; i++) {
+ int err;
+
+ err = pthread_create(&thread_id[i], NULL, &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ goto end;
+ }
+
+ for (i = 0; i < 4; i++) {
+ if (!ASSERT_OK(pthread_join(thread_id[i], &ret), "pthread_join"))
+ goto end;
+ if (!ASSERT_EQ(ret, &prog_fd, "ret == prog_fd"))
+ goto end;
+ }
+end:
+ test_spin_lock__destroy(skel);
+}
+
+void test_spin_lock(void)
+{
+ int i;
+
+ test_spin_lock_success();
+
+ for (i = 0; i < ARRAY_SIZE(spin_lock_fail_tests); i++) {
+ if (!test__start_subtest(spin_lock_fail_tests[i].prog_name))
+ continue;
+ test_spin_lock_fail_prog(spin_lock_fail_tests[i].prog_name,
+ spin_lock_fail_tests[i].err_msg);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
deleted file mode 100644
index 7577a77a4c4c..000000000000
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <test_progs.h>
-#include <network_helpers.h>
-
-static void *spin_lock_thread(void *arg)
-{
- __u32 duration, retval;
- int err, prog_fd = *(u32 *) arg;
-
- err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
- NULL, NULL, &retval, &duration);
- CHECK(err || retval, "",
- "err %d errno %d retval %d duration %d\n",
- err, errno, retval, duration);
- pthread_exit(arg);
-}
-
-void test_spinlock(void)
-{
- const char *file = "./test_spin_lock.o";
- pthread_t thread_id[4];
- struct bpf_object *obj = NULL;
- int prog_fd;
- int err = 0, i;
- void *ret;
-
- err = bpf_prog_load(file, BPF_PROG_TYPE_CGROUP_SKB, &obj, &prog_fd);
- if (CHECK_FAIL(err)) {
- printf("test_spin_lock:bpf_prog_load errno %d\n", errno);
- goto close_prog;
- }
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_create(&thread_id[i], NULL,
- &spin_lock_thread, &prog_fd)))
- goto close_prog;
-
- for (i = 0; i < 4; i++)
- if (CHECK_FAIL(pthread_join(thread_id[i], &ret) ||
- ret != (void *)&prog_fd))
- goto close_prog;
-close_prog:
- bpf_object__close(obj);
-}
diff --git a/tools/testing/selftests/bpf/prog_tests/stack_var_off.c b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c
new file mode 100644
index 000000000000..2ce9deefa59c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stack_var_off.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_stack_var_off.skel.h"
+
+/* Test read and writes to the stack performed with offsets that are not
+ * statically known.
+ */
+void test_stack_var_off(void)
+{
+ int duration = 0;
+ struct test_stack_var_off *skel;
+
+ skel = test_stack_var_off__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+
+ /* Give pid to bpf prog so it doesn't trigger for anyone else. */
+ skel->bss->test_pid = getpid();
+ /* Initialize the probe's input. */
+ skel->bss->input[0] = 2;
+ skel->bss->input[1] = 42; /* This will be returned in probe_res. */
+
+ if (!ASSERT_OK(test_stack_var_off__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ /* Trigger probe. */
+ usleep(1);
+
+ if (CHECK(skel->bss->probe_res != 42, "check_probe_res",
+ "wrong probe res: %d\n", skel->bss->probe_res))
+ goto cleanup;
+
+cleanup:
+ test_stack_var_off__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
index e8399ae50e77..b7ba5cd47d96 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c
@@ -7,13 +7,12 @@ void test_stacktrace_build_id(void)
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
struct test_stacktrace_build_id *skel;
- int err, stack_trace_len;
- __u32 key, previous_key, val, duration = 0;
- char buf[256];
- int i, j;
+ int err, stack_trace_len, build_id_size;
+ __u32 key, prev_key, val, duration = 0;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
int build_id_matches = 0;
- int retry = 1;
+ int i, retry = 1;
retry:
skel = test_stacktrace_build_id__open_and_load();
@@ -52,20 +51,19 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
- if (CHECK(err, "get build_id with readelf",
+ if (CHECK(err, "read_build_id",
"err %d errno %d\n", err, errno))
goto cleanup;
- err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
do {
- char build_id[64];
-
err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
@@ -73,14 +71,11 @@ retry:
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
- previous_key = key;
- } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+ prev_key = key;
+ } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
/* stack_map_get_build_id_offset() is racy and sometimes can return
* BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
index f002e3090d92..5db9eec24b5b 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c
@@ -2,19 +2,6 @@
#include <test_progs.h>
#include "test_stacktrace_build_id.skel.h"
-static __u64 read_perf_max_sample_freq(void)
-{
- __u64 sample_freq = 5000; /* fallback to 5000 on error */
- FILE *f;
-
- f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
- if (f == NULL)
- return sample_freq;
- fscanf(f, "%llu", &sample_freq);
- fclose(f);
- return sample_freq;
-}
-
void test_stacktrace_build_id_nmi(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd;
@@ -25,12 +12,11 @@ void test_stacktrace_build_id_nmi(void)
.type = PERF_TYPE_HARDWARE,
.config = PERF_COUNT_HW_CPU_CYCLES,
};
- __u32 key, previous_key, val, duration = 0;
- char buf[256];
- int i, j;
+ __u32 key, prev_key, val, duration = 0;
+ char buf[BPF_BUILD_ID_SIZE];
struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH];
- int build_id_matches = 0;
- int retry = 1;
+ int build_id_matches = 0, build_id_size;
+ int i, retry = 1;
attr.sample_freq = read_perf_max_sample_freq();
@@ -40,7 +26,7 @@ retry:
return;
/* override program type */
- bpf_program__set_perf_event(skel->progs.oncpu);
+ bpf_program__set_type(skel->progs.oncpu, BPF_PROG_TYPE_PERF_EVENT);
err = test_stacktrace_build_id__load(skel);
if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err))
@@ -60,8 +46,7 @@ retry:
skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu,
pmu_fd);
- if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event",
- "err %ld\n", PTR_ERR(skel->links.oncpu))) {
+ if (!ASSERT_OK_PTR(skel->links.oncpu, "attach_perf_event")) {
close(pmu_fd);
goto cleanup;
}
@@ -93,35 +78,32 @@ retry:
"err %d errno %d\n", err, errno))
goto cleanup;
- err = extract_build_id(buf, 256);
+ build_id_size = read_build_id("urandom_read", buf, sizeof(buf));
+ err = build_id_size < 0 ? build_id_size : 0;
if (CHECK(err, "get build_id with readelf",
"err %d errno %d\n", err, errno))
goto cleanup;
- err = bpf_map_get_next_key(stackmap_fd, NULL, &key);
+ err = bpf_map__get_next_key(skel->maps.stackmap, NULL, &key, sizeof(key));
if (CHECK(err, "get_next_key from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
do {
- char build_id[64];
-
- err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs);
+ err = bpf_map__lookup_elem(skel->maps.stackmap, &key, sizeof(key),
+ id_offs, sizeof(id_offs), 0);
if (CHECK(err, "lookup_elem from stackmap",
"err %d, errno %d\n", err, errno))
goto cleanup;
for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i)
if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID &&
id_offs[i].offset != 0) {
- for (j = 0; j < 20; ++j)
- sprintf(build_id + 2 * j, "%02x",
- id_offs[i].build_id[j] & 0xff);
- if (strstr(buf, build_id) != NULL)
+ if (memcmp(buf, id_offs[i].build_id, build_id_size) == 0)
build_id_matches = 1;
}
- previous_key = key;
- } while (bpf_map_get_next_key(stackmap_fd, &previous_key, &key) == 0);
+ prev_key = key;
+ } while (bpf_map__get_next_key(skel->maps.stackmap, &prev_key, &key, sizeof(key)) == 0);
/* stack_map_get_build_id_offset() is racy and sometimes can return
* BPF_STACK_BUILD_ID_IP instead of BPF_STACK_BUILD_ID_VALID;
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
index 37269d23df93..df59e4ae2951 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map.c
@@ -4,24 +4,24 @@
void test_stacktrace_map(void)
{
int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd;
- const char *prog_name = "tracepoint/sched/sched_switch";
+ const char *prog_name = "oncpu";
int err, prog_fd, stack_trace_len;
- const char *file = "./test_stacktrace_map.o";
+ const char *file = "./test_stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
link = bpf_program__attach_tracepoint(prog, "sched", "sched_switch");
- if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_tp"))
goto close_prog;
/* find map fds */
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
index 404a5498e1a3..c6ef06f55cdb 100644
--- a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_raw_tp.c
@@ -3,25 +3,25 @@
void test_stacktrace_map_raw_tp(void)
{
- const char *prog_name = "tracepoint/sched/sched_switch";
+ const char *prog_name = "oncpu";
int control_map_fd, stackid_hmap_fd, stackmap_fd;
- const char *file = "./test_stacktrace_map.o";
+ const char *file = "./test_stacktrace_map.bpf.o";
__u32 key, val, duration = 0;
int err, prog_fd;
struct bpf_program *prog;
struct bpf_object *obj;
struct bpf_link *link = NULL;
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name))
goto close_prog;
link = bpf_program__attach_raw_tracepoint(prog, "sched_switch");
- if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto close_prog;
/* find map fds */
@@ -59,7 +59,6 @@ void test_stacktrace_map_raw_tp(void)
goto close_prog;
close_prog:
- if (!IS_ERR_OR_NULL(link))
- bpf_link__destroy(link);
+ bpf_link__destroy(link);
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
new file mode 100644
index 000000000000..1932b1e0685c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_map_skip.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "stacktrace_map_skip.skel.h"
+
+#define TEST_STACK_DEPTH 2
+
+void test_stacktrace_map_skip(void)
+{
+ struct stacktrace_map_skip *skel;
+ int stackid_hmap_fd, stackmap_fd, stack_amap_fd;
+ int err, stack_trace_len;
+
+ skel = stacktrace_map_skip__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ /* find map fds */
+ stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap);
+ if (!ASSERT_GE(stackid_hmap_fd, 0, "stackid_hmap fd"))
+ goto out;
+
+ stackmap_fd = bpf_map__fd(skel->maps.stackmap);
+ if (!ASSERT_GE(stackmap_fd, 0, "stackmap fd"))
+ goto out;
+
+ stack_amap_fd = bpf_map__fd(skel->maps.stack_amap);
+ if (!ASSERT_GE(stack_amap_fd, 0, "stack_amap fd"))
+ goto out;
+
+ skel->bss->pid = getpid();
+
+ err = stacktrace_map_skip__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* give some time for bpf program run */
+ sleep(1);
+
+ /* disable stack trace collection */
+ skel->bss->control = 1;
+
+ /* for every element in stackid_hmap, we can find a corresponding one
+ * in stackmap, and vise versa.
+ */
+ err = compare_map_keys(stackid_hmap_fd, stackmap_fd);
+ if (!ASSERT_OK(err, "compare_map_keys stackid_hmap vs. stackmap"))
+ goto out;
+
+ err = compare_map_keys(stackmap_fd, stackid_hmap_fd);
+ if (!ASSERT_OK(err, "compare_map_keys stackmap vs. stackid_hmap"))
+ goto out;
+
+ stack_trace_len = TEST_STACK_DEPTH * sizeof(__u64);
+ err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len);
+ if (!ASSERT_OK(err, "compare_stack_ips stackmap vs. stack_amap"))
+ goto out;
+
+ if (!ASSERT_EQ(skel->bss->failed, 0, "skip_failed"))
+ goto out;
+
+out:
+ stacktrace_map_skip__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/static_linked.c b/tools/testing/selftests/bpf/prog_tests/static_linked.c
new file mode 100644
index 000000000000..5c4e3014e063
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/static_linked.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <test_progs.h>
+#include "test_static_linked.skel.h"
+
+void test_static_linked(void)
+{
+ int err;
+ struct test_static_linked* skel;
+
+ skel = test_static_linked__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 1;
+ skel->rodata->rovar2 = 4;
+
+ err = test_static_linked__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_static_linked__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger */
+ usleep(1);
+
+ ASSERT_EQ(skel->data->var1, 1 * 2 + 2 + 3, "var1");
+ ASSERT_EQ(skel->data->var2, 4 * 3 + 5 + 6, "var2");
+
+cleanup:
+ test_static_linked__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
new file mode 100644
index 000000000000..a5cc593c1e1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/struct_ops_autocreate.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "struct_ops_autocreate.skel.h"
+#include "struct_ops_autocreate2.skel.h"
+
+static void cant_load_full_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ char *log = NULL;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ if (start_libbpf_log_capture())
+ goto cleanup;
+ /* The testmod_2 map BTF type (struct bpf_testmod_ops___v2) doesn't
+ * match the BTF of the actual struct bpf_testmod_ops defined in the
+ * kernel, so we should fail to load it if we don't disable autocreate
+ * for that map.
+ */
+ err = struct_ops_autocreate__load(skel);
+ log = stop_libbpf_log_capture();
+ if (!ASSERT_ERR(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_HAS_SUBSTR(log, "libbpf: struct_ops init_kern", "init_kern message");
+ ASSERT_EQ(err, -ENOTSUP, "errno should be ENOTSUP");
+
+cleanup:
+ free(log);
+ struct_ops_autocreate__destroy(skel);
+}
+
+static int check_test_1_link(struct struct_ops_autocreate *skel, struct bpf_map *map)
+{
+ struct bpf_link *link;
+ int err;
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ return -1;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+ bpf_link__destroy(link);
+ return err;
+}
+
+static void can_load_partial_object(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 default autoload");
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_2), "test_2 default autoload");
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.test_1), "test_1 actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.test_2), "test_2 actual autoload");
+
+ check_test_1_link(skel, skel->maps.testmod_1);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+static void optional_maps(void)
+{
+ struct struct_ops_autocreate *skel;
+ int err;
+
+ skel = struct_ops_autocreate__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open"))
+ return;
+
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_1), "testmod_1 autocreate");
+ ASSERT_TRUE(bpf_map__autocreate(skel->maps.testmod_2), "testmod_2 autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map), "optional_map autocreate");
+ ASSERT_FALSE(bpf_map__autocreate(skel->maps.optional_map2), "optional_map2 autocreate");
+
+ err = bpf_map__set_autocreate(skel->maps.testmod_1, false);
+ err |= bpf_map__set_autocreate(skel->maps.testmod_2, false);
+ err |= bpf_map__set_autocreate(skel->maps.optional_map2, true);
+ if (!ASSERT_OK(err, "bpf_map__set_autocreate"))
+ goto cleanup;
+
+ err = struct_ops_autocreate__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ check_test_1_link(skel, skel->maps.optional_map2);
+
+cleanup:
+ struct_ops_autocreate__destroy(skel);
+}
+
+/* Swap test_mod1->test_1 program from 'bar' to 'foo' using shadow vars.
+ * test_mod1 load should enable autoload for 'foo'.
+ */
+static void autoload_and_shadow_vars(void)
+{
+ struct struct_ops_autocreate2 *skel = NULL;
+ struct bpf_link *link = NULL;
+ int err;
+
+ skel = struct_ops_autocreate2__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_autocreate__open_opts"))
+ return;
+
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.foo), "foo default autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar default autoload");
+
+ /* loading map testmod_1 would switch foo's autoload to true */
+ skel->struct_ops.testmod_1->test_1 = skel->progs.foo;
+
+ err = struct_ops_autocreate2__load(skel);
+ if (ASSERT_OK(err, "struct_ops_autocreate__load"))
+ goto cleanup;
+
+ ASSERT_TRUE(bpf_program__autoload(skel->progs.foo), "foo actual autoload");
+ ASSERT_FALSE(bpf_program__autoload(skel->progs.bar), "bar actual autoload");
+
+ link = bpf_map__attach_struct_ops(skel->maps.testmod_1);
+ if (!ASSERT_OK_PTR(link, "bpf_map__attach_struct_ops"))
+ goto cleanup;
+
+ /* test_1() would be called from bpf_dummy_reg2() in bpf_testmod.c */
+ err = ASSERT_EQ(skel->bss->test_1_result, 42, "test_1_result");
+
+cleanup:
+ bpf_link__destroy(link);
+ struct_ops_autocreate2__destroy(skel);
+}
+
+void test_struct_ops_autocreate(void)
+{
+ if (test__start_subtest("cant_load_full_object"))
+ cant_load_full_object();
+ if (test__start_subtest("can_load_partial_object"))
+ can_load_partial_object();
+ if (test__start_subtest("autoload_and_shadow_vars"))
+ autoload_and_shadow_vars();
+ if (test__start_subtest("optional_maps"))
+ optional_maps();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs.c b/tools/testing/selftests/bpf/prog_tests/subprogs.c
new file mode 100644
index 000000000000..903f35a9e62e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "test_subprogs.skel.h"
+#include "test_subprogs_unused.skel.h"
+
+struct toggler_ctx {
+ int fd;
+ bool stop;
+};
+
+static void *toggle_jit_harden(void *arg)
+{
+ struct toggler_ctx *ctx = arg;
+ char two = '2';
+ char zero = '0';
+
+ while (!ctx->stop) {
+ lseek(ctx->fd, SEEK_SET, 0);
+ write(ctx->fd, &two, sizeof(two));
+ lseek(ctx->fd, SEEK_SET, 0);
+ write(ctx->fd, &zero, sizeof(zero));
+ }
+
+ return NULL;
+}
+
+static void test_subprogs_with_jit_harden_toggling(void)
+{
+ struct toggler_ctx ctx;
+ pthread_t toggler;
+ int err;
+ unsigned int i, loop = 10;
+
+ ctx.fd = open("/proc/sys/net/core/bpf_jit_harden", O_RDWR);
+ if (!ASSERT_GE(ctx.fd, 0, "open bpf_jit_harden"))
+ return;
+
+ ctx.stop = false;
+ err = pthread_create(&toggler, NULL, toggle_jit_harden, &ctx);
+ if (!ASSERT_OK(err, "new toggler"))
+ goto out;
+
+ /* Make toggler thread to run */
+ usleep(1);
+
+ for (i = 0; i < loop; i++) {
+ struct test_subprogs *skel = test_subprogs__open_and_load();
+
+ if (!ASSERT_OK_PTR(skel, "skel open"))
+ break;
+ test_subprogs__destroy(skel);
+ }
+
+ ctx.stop = true;
+ pthread_join(toggler, NULL);
+out:
+ close(ctx.fd);
+}
+
+static void test_subprogs_alone(void)
+{
+ struct test_subprogs *skel;
+ struct test_subprogs_unused *skel2;
+ int err;
+
+ skel = test_subprogs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ err = test_subprogs__attach(skel);
+ if (!ASSERT_OK(err, "skel attach"))
+ goto cleanup;
+
+ usleep(1);
+
+ ASSERT_EQ(skel->bss->res1, 12, "res1");
+ ASSERT_EQ(skel->bss->res2, 17, "res2");
+ ASSERT_EQ(skel->bss->res3, 19, "res3");
+ ASSERT_EQ(skel->bss->res4, 36, "res4");
+
+ skel2 = test_subprogs_unused__open_and_load();
+ ASSERT_OK_PTR(skel2, "unused_progs_skel");
+ test_subprogs_unused__destroy(skel2);
+
+cleanup:
+ test_subprogs__destroy(skel);
+}
+
+void test_subprogs(void)
+{
+ if (test__start_subtest("subprogs_alone"))
+ test_subprogs_alone();
+ if (test__start_subtest("subprogs_and_jit_harden"))
+ test_subprogs_with_jit_harden_toggling();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
new file mode 100644
index 000000000000..3afd9f775f68
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subprogs_extable.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "test_subprogs_extable.skel.h"
+
+void test_subprogs_extable(void)
+{
+ const int read_sz = 456;
+ struct test_subprogs_extable *skel;
+ int err;
+
+ skel = test_subprogs_extable__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = test_subprogs_extable__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ ASSERT_OK(trigger_module_test_read(read_sz), "trigger_read");
+
+ ASSERT_NEQ(skel->bss->triggered, 0, "verify at least one program ran");
+
+ test_subprogs_extable__detach(skel);
+
+cleanup:
+ test_subprogs_extable__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/subskeleton.c b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
new file mode 100644
index 000000000000..9c31b7004f9c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/subskeleton.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "test_subskeleton.skel.h"
+#include "test_subskeleton_lib.subskel.h"
+
+static void subskeleton_lib_setup(struct bpf_object *obj)
+{
+ struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+
+ if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+ return;
+
+ *lib->rodata.var1 = 1;
+ *lib->data.var2 = 2;
+ lib->bss.var3->var3_1 = 3;
+ lib->bss.var3->var3_2 = 4;
+
+ test_subskeleton_lib__destroy(lib);
+}
+
+static int subskeleton_lib_subresult(struct bpf_object *obj)
+{
+ struct test_subskeleton_lib *lib = test_subskeleton_lib__open(obj);
+ int result;
+
+ if (!ASSERT_OK_PTR(lib, "open subskeleton"))
+ return -EINVAL;
+
+ result = *lib->bss.libout1;
+ ASSERT_EQ(result, 1 + 2 + 3 + 4 + 5 + 6, "lib subresult");
+
+ ASSERT_OK_PTR(lib->progs.lib_perf_handler, "lib_perf_handler");
+ ASSERT_STREQ(bpf_program__name(lib->progs.lib_perf_handler),
+ "lib_perf_handler", "program name");
+
+ ASSERT_OK_PTR(lib->maps.map1, "map1");
+ ASSERT_STREQ(bpf_map__name(lib->maps.map1), "map1", "map name");
+
+ ASSERT_EQ(*lib->data.var5, 5, "__weak var5");
+ ASSERT_EQ(*lib->data.var6, 6, "extern var6");
+ ASSERT_TRUE(*lib->kconfig.CONFIG_BPF_SYSCALL, "CONFIG_BPF_SYSCALL");
+
+ test_subskeleton_lib__destroy(lib);
+ return result;
+}
+
+void test_subskeleton(void)
+{
+ int err, result;
+ struct test_subskeleton *skel;
+
+ skel = test_subskeleton__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->rodata->rovar1 = 10;
+ skel->rodata->var1 = 1;
+ subskeleton_lib_setup(skel->obj);
+
+ err = test_subskeleton__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ err = test_subskeleton__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger tracepoint */
+ usleep(1);
+
+ result = subskeleton_lib_subresult(skel->obj) * 10;
+ ASSERT_EQ(skel->bss->out1, result, "unexpected calculation");
+
+cleanup:
+ test_subskeleton__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c
new file mode 100644
index 000000000000..0be8301c0ffd
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/syscall.c
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "syscall.skel.h"
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+static void test_syscall_load_prog(void)
+{
+ static char verifier_log[8192];
+ struct args ctx = {
+ .max_entries = 1024,
+ .log_buf = (uintptr_t) verifier_log,
+ .log_size = sizeof(verifier_log),
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .ctx_in = &ctx,
+ .ctx_size_in = sizeof(ctx),
+ );
+ struct syscall *skel = NULL;
+ __u64 key = 12, value = 0;
+ int err, prog_fd;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.load_prog);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(tattr.retval, 1, "retval");
+ ASSERT_GT(ctx.map_fd, 0, "ctx.map_fd");
+ ASSERT_GT(ctx.prog_fd, 0, "ctx.prog_fd");
+ ASSERT_OK(memcmp(verifier_log, "processed", sizeof("processed") - 1),
+ "verifier_log");
+
+ err = bpf_map_lookup_elem(ctx.map_fd, &key, &value);
+ ASSERT_EQ(err, 0, "map_lookup");
+ ASSERT_EQ(value, 34, "map lookup value");
+cleanup:
+ syscall__destroy(skel);
+ if (ctx.prog_fd > 0)
+ close(ctx.prog_fd);
+ if (ctx.map_fd > 0)
+ close(ctx.map_fd);
+ if (ctx.btf_fd > 0)
+ close(ctx.btf_fd);
+}
+
+static void test_syscall_update_outer_map(void)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+ struct syscall *skel;
+ int err, prog_fd;
+
+ skel = syscall__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.update_outer_map);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(err, 0, "err");
+ ASSERT_EQ(opts.retval, 1, "retval");
+cleanup:
+ syscall__destroy(skel);
+}
+
+void test_syscall(void)
+{
+ if (test__start_subtest("load_prog"))
+ test_syscall_load_prog();
+ if (test__start_subtest("update_outer_map"))
+ test_syscall_update_outer_map();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tailcalls.c b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
index bb8fe646dd9f..59993fc9c0d7 100644
--- a/tools/testing/selftests/bpf/prog_tests/tailcalls.c
+++ b/tools/testing/selftests/bpf/prog_tests/tailcalls.c
@@ -1,5 +1,9 @@
// SPDX-License-Identifier: GPL-2.0
+#include <unistd.h>
#include <test_progs.h>
+#include <network_helpers.h>
+#include "tailcall_poke.skel.h"
+
/* test_tailcall_1 checks basic functionality by patching multiple locations
* in a single program for a single tail call slot with nop->jmp, jmp->nop
@@ -11,16 +15,20 @@ static void test_tailcall_1(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
-
- err = bpf_prog_load("tailcall1.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall1.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -36,10 +44,10 @@ static void test_tailcall_1(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -52,26 +60,24 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -84,16 +90,15 @@ static void test_tailcall_1(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", j);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", j);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -106,33 +111,30 @@ static void test_tailcall_1(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- j = bpf_map__def(prog_array)->max_entries - 1 - i;
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ j = bpf_map__max_entries(prog_array) - 1 - i;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != j, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, j, "tailcall retval");
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err >= 0 || errno != ENOENT))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
@@ -149,16 +151,20 @@ static void test_tailcall_2(void)
struct bpf_map *prog_array;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
char prog_name[32];
char buff[128] = {};
-
- err = bpf_prog_load("tailcall2.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall2.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -174,10 +180,10 @@ static void test_tailcall_2(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -190,52 +196,52 @@ static void test_tailcall_2(void)
goto out;
}
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 2, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 2, "tailcall retval");
i = 2;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
out:
bpf_object__close(obj);
}
-/* test_tailcall_3 checks that the count value of the tail call limit
- * enforcement matches with expectations.
- */
-static void test_tailcall_3(void)
+static void test_tailcall_count(const char *which, bool test_fentry,
+ bool test_fexit)
{
+ struct bpf_object *obj = NULL, *fentry_obj = NULL, *fexit_obj = NULL;
+ struct bpf_link *fentry_link = NULL, *fexit_link = NULL;
int err, map_fd, prog_fd, main_fd, data_fd, i, val;
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
- struct bpf_object *obj;
- __u32 retval, duration;
char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
- err = bpf_prog_load("tailcall3.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ err = bpf_prog_test_load(which, BPF_PROG_TYPE_SCHED_CLS, &obj,
&prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -251,7 +257,7 @@ static void test_tailcall_3(void)
if (CHECK_FAIL(map_fd < 0))
goto out;
- prog = bpf_object__find_program_by_title(obj, "classifier/0");
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
if (CHECK_FAIL(!prog))
goto out;
@@ -264,37 +270,137 @@ static void test_tailcall_3(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 1, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ if (test_fentry) {
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+ }
+
+ if (test_fexit) {
+ fexit_obj = bpf_object__open_file("tailcall_bpf2bpf_fexit.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fexit_obj, "open fexit_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fexit_obj, "fexit");
+ if (!ASSERT_OK_PTR(prog, "find fexit prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd,
+ "subprog_tail");
+ if (!ASSERT_OK(err, "set_attach_target subprog_tail"))
+ goto out;
+
+ err = bpf_object__load(fexit_obj);
+ if (!ASSERT_OK(err, "load fexit_obj"))
+ goto out;
+
+ fexit_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fexit_link, "attach_trace"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
i = 0;
err = bpf_map_lookup_elem(data_fd, &i, &val);
- CHECK(err || val != 33, "tailcall count", "err %d errno %d count %d\n",
- err, errno, val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
+
+ if (test_fentry) {
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 33, "fentry count");
+ }
+
+ if (test_fexit) {
+ data_map = bpf_object__find_map_by_name(fexit_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fexit.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fexit.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fexit count");
+ ASSERT_EQ(val, 33, "fexit count");
+ }
i = 0;
err = bpf_map_delete_elem(map_fd, &i);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 0, "tailcall", "err %d errno %d retval %d\n",
- err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
+ bpf_link__destroy(fentry_link);
+ bpf_link__destroy(fexit_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(fexit_obj);
bpf_object__close(obj);
}
+/* test_tailcall_3 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses direct jump.
+ */
+static void test_tailcall_3(void)
+{
+ test_tailcall_count("tailcall3.bpf.o", false, false);
+}
+
+/* test_tailcall_6 checks that the count value of the tail call limit
+ * enforcement matches with expectations. JIT uses indirect jump.
+ */
+static void test_tailcall_6(void)
+{
+ test_tailcall_count("tailcall6.bpf.o", false, false);
+}
+
/* test_tailcall_4 checks that the kernel properly selects indirect jump
* for the case where the key is not known. Latter is passed via global
* data to select different targets we can compare return value of.
@@ -305,17 +411,21 @@ static void test_tailcall_4(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
-
- err = bpf_prog_load("tailcall4.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall4.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -333,16 +443,16 @@ static void test_tailcall_4(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -355,18 +465,17 @@ static void test_tailcall_4(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &i, BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -375,10 +484,9 @@ static void test_tailcall_4(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
}
out:
bpf_object__close(obj);
@@ -393,17 +501,21 @@ static void test_tailcall_5(void)
struct bpf_map *prog_array, *data_map;
struct bpf_program *prog;
struct bpf_object *obj;
- __u32 retval, duration;
static const int zero = 0;
char buff[128] = {};
char prog_name[32];
-
- err = bpf_prog_load("tailcall5.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
- &prog_fd);
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall5.bpf.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+ &prog_fd);
if (CHECK_FAIL(err))
return;
- prog = bpf_object__find_program_by_title(obj, "classifier");
+ prog = bpf_object__find_program_by_name(obj, "entry");
if (CHECK_FAIL(!prog))
goto out;
@@ -421,16 +533,16 @@ static void test_tailcall_5(void)
data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
- return;
+ goto out;
data_fd = bpf_map__fd(data_map);
- if (CHECK_FAIL(map_fd < 0))
- return;
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
- snprintf(prog_name, sizeof(prog_name), "classifier/%i", i);
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
- prog = bpf_object__find_program_by_title(obj, prog_name);
+ prog = bpf_object__find_program_by_name(obj, prog_name);
if (CHECK_FAIL(!prog))
goto out;
@@ -443,18 +555,17 @@ static void test_tailcall_5(void)
goto out;
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != i, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, i, "tailcall retval");
}
- for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
err = bpf_map_update_elem(data_fd, &zero, &key[i], BPF_ANY);
if (CHECK_FAIL(err))
goto out;
@@ -463,15 +574,619 @@ static void test_tailcall_5(void)
if (CHECK_FAIL(err))
goto out;
- err = bpf_prog_test_run(main_fd, 1, buff, sizeof(buff), 0,
- &duration, &retval, NULL);
- CHECK(err || retval != 3, "tailcall",
- "err %d errno %d retval %d\n", err, errno, retval);
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 3, "tailcall retval");
+ }
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_1 purpose is to make sure that tailcalls are working
+ * correctly in correlation with BPF subprograms
+ */
+static void test_tailcall_bpf2bpf_1(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf1.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ /* nop -> jmp */
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
+
+ prog = bpf_object__find_program_by_name(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
}
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ /* jmp -> nop, call subprog that will do tailcall */
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
+
+ /* make sure that subprog can access ctx and entry prog that
+ * called this subprog can properly return
+ */
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
+out:
+ bpf_object__close(obj);
+}
+
+/* test_tailcall_bpf2bpf_2 checks that the count value of the tail call limit
+ * enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call.
+ */
+static void test_tailcall_bpf2bpf_2(void)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char buff[128] = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(obj, "classifier_0");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 33, "tailcall count");
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_OK(topts.retval, "tailcall retval");
out:
bpf_object__close(obj);
}
+/* test_tailcall_bpf2bpf_3 checks that non-trivial amount of stack (up to
+ * 256 bytes) can be used within bpf subprograms that have the tailcalls
+ * in them
+ */
+static void test_tailcall_bpf2bpf_3(void)
+{
+ int err, map_fd, prog_fd, main_fd, i;
+ struct bpf_map *prog_array;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf3.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
+
+ prog = bpf_object__find_program_by_name(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
+
+ i = 1;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4), "tailcall retval");
+
+ i = 0;
+ err = bpf_map_delete_elem(map_fd, &i);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 2, "tailcall retval");
+out:
+ bpf_object__close(obj);
+}
+
+#include "tailcall_bpf2bpf4.skel.h"
+
+/* test_tailcall_bpf2bpf_4 checks that tailcall counter is correctly preserved
+ * across tailcalls combined with bpf2bpf calls. for making sure that tailcall
+ * counter behaves correctly, bpf program will go through following flow:
+ *
+ * entry -> entry_subprog -> tailcall0 -> bpf_func0 -> subprog0 ->
+ * -> tailcall1 -> bpf_func1 -> subprog1 -> tailcall2 -> bpf_func2 ->
+ * subprog2 [here bump global counter] --------^
+ *
+ * We go through first two tailcalls and start counting from the subprog2 where
+ * the loop begins. At the end of the test make sure that the global counter is
+ * equal to 31, because tailcall counter includes the first two tailcalls
+ * whereas global counter is incremented only on loop presented on flow above.
+ *
+ * The noise parameter is used to insert bpf_map_update calls into the logic
+ * to force verifier to patch instructions. This allows us to ensure jump
+ * logic remains correct with instruction movement.
+ */
+static void test_tailcall_bpf2bpf_4(bool noise)
+{
+ int err, map_fd, prog_fd, main_fd, data_fd, i;
+ struct tailcall_bpf2bpf4__bss val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ char prog_name[32];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf4.bpf.o", BPF_PROG_TYPE_SCHED_CLS,
+ &obj, &prog_fd);
+ if (CHECK_FAIL(err))
+ return;
+
+ prog = bpf_object__find_program_by_name(obj, "entry");
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ main_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(main_fd < 0))
+ goto out;
+
+ prog_array = bpf_object__find_map_by_name(obj, "jmp_table");
+ if (CHECK_FAIL(!prog_array))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (CHECK_FAIL(map_fd < 0))
+ goto out;
+
+ for (i = 0; i < bpf_map__max_entries(prog_array); i++) {
+ snprintf(prog_name, sizeof(prog_name), "classifier_%d", i);
+
+ prog = bpf_object__find_program_by_name(obj, prog_name);
+ if (CHECK_FAIL(!prog))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (CHECK_FAIL(prog_fd < 0))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+ }
+
+ data_map = bpf_object__find_map_by_name(obj, "tailcall.bss");
+ if (CHECK_FAIL(!data_map || !bpf_map__is_internal(data_map)))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (CHECK_FAIL(data_fd < 0))
+ goto out;
+
+ i = 0;
+ val.noise = noise;
+ val.count = 0;
+ err = bpf_map_update_elem(data_fd, &i, &val, BPF_ANY);
+ if (CHECK_FAIL(err))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, sizeof(pkt_v4) * 3, "tailcall retval");
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val.count, 31, "tailcall count");
+
+out:
+ bpf_object__close(obj);
+}
+
+#include "tailcall_bpf2bpf6.skel.h"
+
+/* Tail call counting works even when there is data on stack which is
+ * not aligned to 8 bytes.
+ */
+static void test_tailcall_bpf2bpf_6(void)
+{
+ struct tailcall_bpf2bpf6 *obj;
+ int err, map_fd, prog_fd, main_fd, data_fd, i, val;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ obj = tailcall_bpf2bpf6__open_and_load();
+ if (!ASSERT_OK_PTR(obj, "open and load"))
+ return;
+
+ main_fd = bpf_program__fd(obj->progs.entry);
+ if (!ASSERT_GE(main_fd, 0, "entry prog fd"))
+ goto out;
+
+ map_fd = bpf_map__fd(obj->maps.jmp_table);
+ if (!ASSERT_GE(map_fd, 0, "jmp_table map fd"))
+ goto out;
+
+ prog_fd = bpf_program__fd(obj->progs.classifier_0);
+ if (!ASSERT_GE(prog_fd, 0, "classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "jmp_table map update"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(main_fd, &topts);
+ ASSERT_OK(err, "entry prog test run");
+ ASSERT_EQ(topts.retval, 0, "tailcall retval");
+
+ data_fd = bpf_map__fd(obj->maps.bss);
+ if (!ASSERT_GE(data_fd, 0, "bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "bss map lookup");
+ ASSERT_EQ(val, 1, "done flag is set");
+
+out:
+ tailcall_bpf2bpf6__destroy(obj);
+}
+
+/* test_tailcall_bpf2bpf_fentry checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, false);
+}
+
+/* test_tailcall_bpf2bpf_fexit checks that the count value of the tail call
+ * limit enforcement matches with expectations when tailcall is preceded with
+ * bpf2bpf call, and the bpf2bpf call is traced by fexit.
+ */
+static void test_tailcall_bpf2bpf_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", false, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_fexit checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf call is traced by both fentry and fexit.
+ */
+static void test_tailcall_bpf2bpf_fentry_fexit(void)
+{
+ test_tailcall_count("tailcall_bpf2bpf2.bpf.o", true, true);
+}
+
+/* test_tailcall_bpf2bpf_fentry_entry checks that the count value of the tail
+ * call limit enforcement matches with expectations when tailcall is preceded
+ * with bpf2bpf call, and the bpf2bpf caller is traced by fentry.
+ */
+static void test_tailcall_bpf2bpf_fentry_entry(void)
+{
+ struct bpf_object *tgt_obj = NULL, *fentry_obj = NULL;
+ int err, map_fd, prog_fd, data_fd, i, val;
+ struct bpf_map *prog_array, *data_map;
+ struct bpf_link *fentry_link = NULL;
+ struct bpf_program *prog;
+ char buff[128] = {};
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = buff,
+ .data_size_in = sizeof(buff),
+ .repeat = 1,
+ );
+
+ err = bpf_prog_test_load("tailcall_bpf2bpf2.bpf.o",
+ BPF_PROG_TYPE_SCHED_CLS,
+ &tgt_obj, &prog_fd);
+ if (!ASSERT_OK(err, "load tgt_obj"))
+ return;
+
+ prog_array = bpf_object__find_map_by_name(tgt_obj, "jmp_table");
+ if (!ASSERT_OK_PTR(prog_array, "find jmp_table map"))
+ goto out;
+
+ map_fd = bpf_map__fd(prog_array);
+ if (!ASSERT_FALSE(map_fd < 0, "find jmp_table map fd"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(tgt_obj, "classifier_0");
+ if (!ASSERT_OK_PTR(prog, "find classifier_0 prog"))
+ goto out;
+
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_FALSE(prog_fd < 0, "find classifier_0 prog fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+ if (!ASSERT_OK(err, "update jmp_table"))
+ goto out;
+
+ fentry_obj = bpf_object__open_file("tailcall_bpf2bpf_fentry.bpf.o",
+ NULL);
+ if (!ASSERT_OK_PTR(fentry_obj, "open fentry_obj file"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(fentry_obj, "fentry");
+ if (!ASSERT_OK_PTR(prog, "find fentry prog"))
+ goto out;
+
+ err = bpf_program__set_attach_target(prog, prog_fd, "classifier_0");
+ if (!ASSERT_OK(err, "set_attach_target classifier_0"))
+ goto out;
+
+ err = bpf_object__load(fentry_obj);
+ if (!ASSERT_OK(err, "load fentry_obj"))
+ goto out;
+
+ fentry_link = bpf_program__attach_trace(prog);
+ if (!ASSERT_OK_PTR(fentry_link, "attach_trace"))
+ goto out;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "tailcall");
+ ASSERT_EQ(topts.retval, 1, "tailcall retval");
+
+ data_map = bpf_object__find_map_by_name(tgt_obj, "tailcall.bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0, "find tailcall.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "tailcall count");
+ ASSERT_EQ(val, 34, "tailcall count");
+
+ data_map = bpf_object__find_map_by_name(fentry_obj, ".bss");
+ if (!ASSERT_FALSE(!data_map || !bpf_map__is_internal(data_map),
+ "find tailcall_bpf2bpf_fentry.bss map"))
+ goto out;
+
+ data_fd = bpf_map__fd(data_map);
+ if (!ASSERT_FALSE(data_fd < 0,
+ "find tailcall_bpf2bpf_fentry.bss map fd"))
+ goto out;
+
+ i = 0;
+ err = bpf_map_lookup_elem(data_fd, &i, &val);
+ ASSERT_OK(err, "fentry count");
+ ASSERT_EQ(val, 1, "fentry count");
+
+out:
+ bpf_link__destroy(fentry_link);
+ bpf_object__close(fentry_obj);
+ bpf_object__close(tgt_obj);
+}
+
+#define JMP_TABLE "/sys/fs/bpf/jmp_table"
+
+static int poke_thread_exit;
+
+static void *poke_update(void *arg)
+{
+ __u32 zero = 0, prog1_fd, prog2_fd, map_fd;
+ struct tailcall_poke *call = arg;
+
+ map_fd = bpf_map__fd(call->maps.jmp_table);
+ prog1_fd = bpf_program__fd(call->progs.call1);
+ prog2_fd = bpf_program__fd(call->progs.call2);
+
+ while (!poke_thread_exit) {
+ bpf_map_update_elem(map_fd, &zero, &prog1_fd, BPF_ANY);
+ bpf_map_update_elem(map_fd, &zero, &prog2_fd, BPF_ANY);
+ }
+
+ return NULL;
+}
+
+/*
+ * We are trying to hit prog array update during another program load
+ * that shares the same prog array map.
+ *
+ * For that we share the jmp_table map between two skeleton instances
+ * by pinning the jmp_table to same path. Then first skeleton instance
+ * periodically updates jmp_table in 'poke update' thread while we load
+ * the second skeleton instance in the main thread.
+ */
+static void test_tailcall_poke(void)
+{
+ struct tailcall_poke *call, *test;
+ int err, cnt = 10;
+ pthread_t thread;
+
+ unlink(JMP_TABLE);
+
+ call = tailcall_poke__open_and_load();
+ if (!ASSERT_OK_PTR(call, "tailcall_poke__open"))
+ return;
+
+ err = bpf_map__pin(call->maps.jmp_table, JMP_TABLE);
+ if (!ASSERT_OK(err, "bpf_map__pin"))
+ goto out;
+
+ err = pthread_create(&thread, NULL, poke_update, call);
+ if (!ASSERT_OK(err, "new toggler"))
+ goto out;
+
+ while (cnt--) {
+ test = tailcall_poke__open();
+ if (!ASSERT_OK_PTR(test, "tailcall_poke__open"))
+ break;
+
+ err = bpf_map__set_pin_path(test->maps.jmp_table, JMP_TABLE);
+ if (!ASSERT_OK(err, "bpf_map__pin")) {
+ tailcall_poke__destroy(test);
+ break;
+ }
+
+ bpf_program__set_autoload(test->progs.test, true);
+ bpf_program__set_autoload(test->progs.call1, false);
+ bpf_program__set_autoload(test->progs.call2, false);
+
+ err = tailcall_poke__load(test);
+ tailcall_poke__destroy(test);
+ if (!ASSERT_OK(err, "tailcall_poke__load"))
+ break;
+ }
+
+ poke_thread_exit = 1;
+ ASSERT_OK(pthread_join(thread, NULL), "pthread_join");
+
+out:
+ bpf_map__unpin(call->maps.jmp_table, JMP_TABLE);
+ tailcall_poke__destroy(call);
+}
+
void test_tailcalls(void)
{
if (test__start_subtest("tailcall_1"))
@@ -484,4 +1199,28 @@ void test_tailcalls(void)
test_tailcall_4();
if (test__start_subtest("tailcall_5"))
test_tailcall_5();
+ if (test__start_subtest("tailcall_6"))
+ test_tailcall_6();
+ if (test__start_subtest("tailcall_bpf2bpf_1"))
+ test_tailcall_bpf2bpf_1();
+ if (test__start_subtest("tailcall_bpf2bpf_2"))
+ test_tailcall_bpf2bpf_2();
+ if (test__start_subtest("tailcall_bpf2bpf_3"))
+ test_tailcall_bpf2bpf_3();
+ if (test__start_subtest("tailcall_bpf2bpf_4"))
+ test_tailcall_bpf2bpf_4(false);
+ if (test__start_subtest("tailcall_bpf2bpf_5"))
+ test_tailcall_bpf2bpf_4(true);
+ if (test__start_subtest("tailcall_bpf2bpf_6"))
+ test_tailcall_bpf2bpf_6();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry"))
+ test_tailcall_bpf2bpf_fentry();
+ if (test__start_subtest("tailcall_bpf2bpf_fexit"))
+ test_tailcall_bpf2bpf_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_fexit"))
+ test_tailcall_bpf2bpf_fentry_fexit();
+ if (test__start_subtest("tailcall_bpf2bpf_fentry_entry"))
+ test_tailcall_bpf2bpf_fentry_entry();
+ if (test__start_subtest("tailcall_poke"))
+ test_tailcall_poke();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
index 1bdc1d86a50c..3d34bab01e48 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_rawtp.c
@@ -3,7 +3,7 @@
void test_task_fd_query_rawtp(void)
{
- const char *file = "./test_get_stack_rawtp.o";
+ const char *file = "./test_get_stack_rawtp.bpf.o";
__u64 probe_offset, probe_addr;
__u32 len, prog_id, fd_type;
struct bpf_object *obj;
@@ -11,7 +11,7 @@ void test_task_fd_query_rawtp(void)
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT, &obj, &prog_fd);
if (CHECK(err, "prog_load raw tp", "err %d errno %d\n", err, errno))
return;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
index 3f131b8fe328..c91eda624657 100644
--- a/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
+++ b/tools/testing/selftests/bpf/prog_tests/task_fd_query_tp.c
@@ -4,7 +4,7 @@
static void test_task_fd_query_tp_core(const char *probe_name,
const char *tp_name)
{
- const char *file = "./test_tracepoint.o";
+ const char *file = "./test_tracepoint.bpf.o";
int err, bytes, efd, prog_fd, pmu_fd;
struct perf_event_attr attr = {};
__u64 probe_offset, probe_addr;
@@ -13,12 +13,17 @@ static void test_task_fd_query_tp_core(const char *probe_name,
__u32 duration = 0;
char buf[256];
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- if (CHECK(err, "bpf_prog_load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (CHECK(err, "bpf_prog_test_load", "err %d errno %d\n", err, errno))
goto close_prog;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/%s/id", probe_name);
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/%s/id", probe_name);
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
goto close_prog;
diff --git a/tools/testing/selftests/bpf/prog_tests/task_kfunc.c b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
new file mode 100644
index 000000000000..d4579f735398
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_kfunc.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <unistd.h>
+
+#include "task_kfunc_failure.skel.h"
+#include "task_kfunc_success.skel.h"
+
+static struct task_kfunc_success *open_load_task_kfunc_skel(void)
+{
+ struct task_kfunc_success *skel;
+ int err;
+
+ skel = task_kfunc_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ skel->bss->pid = getpid();
+
+ err = task_kfunc_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ task_kfunc_success__destroy(skel);
+ return NULL;
+}
+
+static void run_success_test(const char *prog_name)
+{
+ struct task_kfunc_success *skel;
+ int status;
+ pid_t child_pid;
+ struct bpf_program *prog;
+ struct bpf_link *link = NULL;
+
+ skel = open_load_task_kfunc_skel();
+ if (!ASSERT_OK_PTR(skel, "open_load_skel"))
+ return;
+
+ if (!ASSERT_OK(skel->bss->err, "pre_spawn_err"))
+ goto cleanup;
+
+ prog = bpf_object__find_program_by_name(skel->obj, prog_name);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto cleanup;
+
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attached_link"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GT(child_pid, -1, "child_pid"))
+ goto cleanup;
+ if (child_pid == 0)
+ _exit(0);
+ waitpid(child_pid, &status, 0);
+
+ ASSERT_OK(skel->bss->err, "post_wait_err");
+
+cleanup:
+ bpf_link__destroy(link);
+ task_kfunc_success__destroy(skel);
+}
+
+static const char * const success_tests[] = {
+ "test_task_acquire_release_argument",
+ "test_task_acquire_release_current",
+ "test_task_acquire_leave_in_map",
+ "test_task_xchg_release",
+ "test_task_map_acquire_release",
+ "test_task_current_acquire_release",
+ "test_task_from_pid_arg",
+ "test_task_from_pid_current",
+ "test_task_from_pid_invalid",
+ "task_kfunc_acquire_trusted_walked",
+ "test_task_kfunc_flavor_relo",
+ "test_task_kfunc_flavor_relo_not_found",
+};
+
+void test_task_kfunc(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i]))
+ continue;
+
+ run_success_test(success_tests[i]);
+ }
+
+ RUN_TESTS(task_kfunc_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_storage.c b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
new file mode 100644
index 000000000000..c33c05161a9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_local_storage.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define _GNU_SOURCE /* See feature_test_macros(7) */
+#include <unistd.h>
+#include <sched.h>
+#include <pthread.h>
+#include <sys/syscall.h> /* For SYS_xxx definitions */
+#include <sys/types.h>
+#include <test_progs.h>
+#include "task_local_storage_helpers.h"
+#include "task_local_storage.skel.h"
+#include "task_local_storage_exit_creds.skel.h"
+#include "task_ls_recursion.skel.h"
+#include "task_storage_nodeadlock.skel.h"
+
+static void test_sys_enter_exit(void)
+{
+ struct task_local_storage *skel;
+ int err;
+
+ skel = task_local_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ skel->bss->target_pid = syscall(SYS_gettid);
+
+ err = task_local_storage__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ syscall(SYS_gettid);
+ syscall(SYS_gettid);
+
+ /* 3x syscalls: 1x attach and 2x gettid */
+ ASSERT_EQ(skel->bss->enter_cnt, 3, "enter_cnt");
+ ASSERT_EQ(skel->bss->exit_cnt, 3, "exit_cnt");
+ ASSERT_EQ(skel->bss->mismatch_cnt, 0, "mismatch_cnt");
+out:
+ task_local_storage__destroy(skel);
+}
+
+static void test_exit_creds(void)
+{
+ struct task_local_storage_exit_creds *skel;
+ int err, run_count, sync_rcu_calls = 0;
+ const int MAX_SYNC_RCU_CALLS = 1000;
+
+ skel = task_local_storage_exit_creds__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ return;
+
+ err = task_local_storage_exit_creds__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger at least one exit_creds() */
+ if (CHECK_FAIL(system("ls > /dev/null")))
+ goto out;
+
+ /* kern_sync_rcu is not enough on its own as the read section we want
+ * to wait for may start after we enter synchronize_rcu, so our call
+ * won't wait for the section to finish. Loop on the run counter
+ * as well to ensure the program has run.
+ */
+ do {
+ kern_sync_rcu();
+ run_count = __atomic_load_n(&skel->bss->run_count, __ATOMIC_SEQ_CST);
+ } while (run_count == 0 && ++sync_rcu_calls < MAX_SYNC_RCU_CALLS);
+
+ ASSERT_NEQ(sync_rcu_calls, MAX_SYNC_RCU_CALLS,
+ "sync_rcu count too high");
+ ASSERT_NEQ(run_count, 0, "run_count");
+ ASSERT_EQ(skel->bss->valid_ptr_count, 0, "valid_ptr_count");
+ ASSERT_NEQ(skel->bss->null_ptr_count, 0, "null_ptr_count");
+out:
+ task_local_storage_exit_creds__destroy(skel);
+}
+
+static void test_recursion(void)
+{
+ int err, map_fd, prog_fd, task_fd;
+ struct task_ls_recursion *skel;
+ struct bpf_prog_info info;
+ __u32 info_len = sizeof(info);
+ long value;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_NEQ(task_fd, -1, "sys_pidfd_open"))
+ return;
+
+ skel = task_ls_recursion__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
+ goto out;
+
+ err = task_ls_recursion__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto out;
+
+ /* trigger sys_enter, make sure it does not cause deadlock */
+ skel->bss->test_pid = getpid();
+ syscall(SYS_gettid);
+ skel->bss->test_pid = 0;
+ task_ls_recursion__detach(skel);
+
+ /* Refer to the comment in BPF_PROG(on_update) for
+ * the explanation on the value 201 and 100.
+ */
+ map_fd = bpf_map__fd(skel->maps.map_a);
+ err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+ ASSERT_OK(err, "lookup map_a");
+ ASSERT_EQ(value, 201, "map_a value");
+ ASSERT_EQ(skel->bss->nr_del_errs, 1, "bpf_task_storage_delete busy");
+
+ map_fd = bpf_map__fd(skel->maps.map_b);
+ err = bpf_map_lookup_elem(map_fd, &task_fd, &value);
+ ASSERT_OK(err, "lookup map_b");
+ ASSERT_EQ(value, 100, "map_b value");
+
+ prog_fd = bpf_program__fd(skel->progs.on_update);
+ memset(&info, 0, sizeof(info));
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "on_update prog recursion");
+
+ prog_fd = bpf_program__fd(skel->progs.on_enter);
+ memset(&info, 0, sizeof(info));
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "on_enter prog recursion");
+
+out:
+ close(task_fd);
+ task_ls_recursion__destroy(skel);
+}
+
+static bool stop;
+
+static void waitall(const pthread_t *tids, int nr)
+{
+ int i;
+
+ stop = true;
+ for (i = 0; i < nr; i++)
+ pthread_join(tids[i], NULL);
+}
+
+static void *sock_create_loop(void *arg)
+{
+ struct task_storage_nodeadlock *skel = arg;
+ int fd;
+
+ while (!stop) {
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ close(fd);
+ if (skel->bss->nr_get_errs || skel->bss->nr_del_errs)
+ stop = true;
+ }
+
+ return NULL;
+}
+
+static void test_nodeadlock(void)
+{
+ struct task_storage_nodeadlock *skel;
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ const int nr_threads = 32;
+ pthread_t tids[nr_threads];
+ int i, prog_fd, err;
+ cpu_set_t old, new;
+
+ /* Pin all threads to one cpu to increase the chance of preemption
+ * in a sleepable bpf prog.
+ */
+ CPU_ZERO(&new);
+ CPU_SET(0, &new);
+ err = sched_getaffinity(getpid(), sizeof(old), &old);
+ if (!ASSERT_OK(err, "getaffinity"))
+ return;
+ err = sched_setaffinity(getpid(), sizeof(new), &new);
+ if (!ASSERT_OK(err, "setaffinity"))
+ return;
+
+ skel = task_storage_nodeadlock__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ goto done;
+
+ /* Unnecessary recursion and deadlock detection are reproducible
+ * in the preemptible kernel.
+ */
+ if (!skel->kconfig->CONFIG_PREEMPT) {
+ test__skip();
+ goto done;
+ }
+
+ err = task_storage_nodeadlock__attach(skel);
+ ASSERT_OK(err, "attach prog");
+
+ for (i = 0; i < nr_threads; i++) {
+ err = pthread_create(&tids[i], NULL, sock_create_loop, skel);
+ if (err) {
+ /* Only assert once here to avoid excessive
+ * PASS printing during test failure.
+ */
+ ASSERT_OK(err, "pthread_create");
+ waitall(tids, i);
+ goto done;
+ }
+ }
+
+ /* With 32 threads, 1s is enough to reproduce the issue */
+ sleep(1);
+ waitall(tids, nr_threads);
+
+ info_len = sizeof(info);
+ prog_fd = bpf_program__fd(skel->progs.socket_post_create);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ ASSERT_OK(err, "get prog info");
+ ASSERT_EQ(info.recursion_misses, 0, "prog recursion");
+
+ ASSERT_EQ(skel->bss->nr_get_errs, 0, "bpf_task_storage_get busy");
+ ASSERT_EQ(skel->bss->nr_del_errs, 0, "bpf_task_storage_delete busy");
+
+done:
+ task_storage_nodeadlock__destroy(skel);
+ sched_setaffinity(getpid(), sizeof(old), &old);
+}
+
+void test_task_local_storage(void)
+{
+ if (test__start_subtest("sys_enter_exit"))
+ test_sys_enter_exit();
+ if (test__start_subtest("exit_creds"))
+ test_exit_creds();
+ if (test__start_subtest("recursion"))
+ test_recursion();
+ if (test__start_subtest("nodeadlock"))
+ test_nodeadlock();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
new file mode 100644
index 000000000000..f000734a3d1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_pt_regs.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include "test_task_pt_regs.skel.h"
+
+/* uprobe attach point */
+static noinline void trigger_func(void)
+{
+ asm volatile ("");
+}
+
+void test_task_pt_regs(void)
+{
+ struct test_task_pt_regs *skel;
+ struct bpf_link *uprobe_link;
+ ssize_t uprobe_offset;
+ bool match;
+
+ uprobe_offset = get_uprobe_offset(&trigger_func);
+ if (!ASSERT_GE(uprobe_offset, 0, "uprobe_offset"))
+ return;
+
+ skel = test_task_pt_regs__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+ if (!ASSERT_OK_PTR(skel->bss, "check_bss"))
+ goto cleanup;
+
+ uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe,
+ false /* retprobe */,
+ 0 /* self pid */,
+ "/proc/self/exe",
+ uprobe_offset);
+ if (!ASSERT_OK_PTR(uprobe_link, "attach_uprobe"))
+ goto cleanup;
+ skel->links.handle_uprobe = uprobe_link;
+
+ /* trigger & validate uprobe */
+ trigger_func();
+
+ if (!ASSERT_EQ(skel->bss->uprobe_res, 1, "check_uprobe_res"))
+ goto cleanup;
+
+ match = !memcmp(&skel->bss->current_regs, &skel->bss->ctx_regs,
+ sizeof(skel->bss->current_regs));
+ ASSERT_TRUE(match, "check_regs_match");
+
+cleanup:
+ test_task_pt_regs__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
new file mode 100644
index 000000000000..626d76fe43a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/task_under_cgroup.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <sys/syscall.h>
+#include <test_progs.h>
+#include <cgroup_helpers.h>
+#include "test_task_under_cgroup.skel.h"
+
+#define FOO "/foo"
+
+void test_task_under_cgroup(void)
+{
+ struct test_task_under_cgroup *skel;
+ int ret, foo;
+ pid_t pid;
+
+ foo = test__join_cgroup(FOO);
+ if (!ASSERT_OK(foo < 0, "cgroup_join_foo"))
+ return;
+
+ skel = test_task_under_cgroup__open();
+ if (!ASSERT_OK_PTR(skel, "test_task_under_cgroup__open"))
+ goto cleanup;
+
+ skel->rodata->local_pid = getpid();
+ skel->bss->remote_pid = getpid();
+ skel->rodata->cgid = get_cgroup_id(FOO);
+
+ ret = test_task_under_cgroup__load(skel);
+ if (!ASSERT_OK(ret, "test_task_under_cgroup__load"))
+ goto cleanup;
+
+ /* First, attach the LSM program, and then it will be triggered when the
+ * TP_BTF program is attached.
+ */
+ skel->links.lsm_run = bpf_program__attach_lsm(skel->progs.lsm_run);
+ if (!ASSERT_OK_PTR(skel->links.lsm_run, "attach_lsm"))
+ goto cleanup;
+
+ skel->links.tp_btf_run = bpf_program__attach_trace(skel->progs.tp_btf_run);
+ if (!ASSERT_OK_PTR(skel->links.tp_btf_run, "attach_tp_btf"))
+ goto cleanup;
+
+ pid = fork();
+ if (pid == 0)
+ exit(0);
+
+ ret = (pid == -1);
+ if (ASSERT_OK(ret, "fork process"))
+ wait(NULL);
+
+ test_task_under_cgroup__detach(skel);
+
+ ASSERT_NEQ(skel->bss->remote_pid, skel->rodata->local_pid,
+ "test task_under_cgroup");
+
+cleanup:
+ test_task_under_cgroup__destroy(skel);
+ close(foo);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_bpf.c b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
new file mode 100644
index 000000000000..48b55539331e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_bpf.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include <linux/pkt_cls.h>
+
+#include "cap_helpers.h"
+#include "test_tc_bpf.skel.h"
+
+#define LO_IFINDEX 1
+
+#define TEST_DECLARE_OPTS(__fd) \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_h, .handle = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_p, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_f, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hp, .handle = 1, .priority = 1); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hf, .handle = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_pf, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpf, .handle = 1, .priority = 1, .prog_fd = __fd); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpi, .handle = 1, .priority = 1, .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpr, .handle = 1, .priority = 1, \
+ .flags = BPF_TC_F_REPLACE); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_hpfi, .handle = 1, .priority = 1, .prog_fd = __fd, \
+ .prog_id = 42); \
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts_prio_max, .handle = 1, .priority = UINT16_MAX + 1);
+
+static int test_tc_bpf_basic(const struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ int ret;
+
+ ret = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(ret, "bpf_prog_get_info_by_fd"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ return ret;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+ opts.prog_id = 0;
+ opts.flags = BPF_TC_F_REPLACE;
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach replace mode"))
+ goto end;
+
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_query(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_query"))
+ goto end;
+
+ if (!ASSERT_EQ(opts.handle, 1, "handle set") ||
+ !ASSERT_EQ(opts.priority, 1, "priority set") ||
+ !ASSERT_EQ(opts.prog_id, info.id, "prog_id set"))
+ goto end;
+
+end:
+ opts.flags = opts.prog_fd = opts.prog_id = 0;
+ ret = bpf_tc_detach(hook, &opts);
+ ASSERT_OK(ret, "bpf_tc_detach");
+ return ret;
+}
+
+static int test_tc_bpf_api(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, attach_opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, inv_hook, .attach_point = BPF_TC_INGRESS);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1);
+ int ret;
+
+ ret = bpf_tc_hook_create(NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook = NULL"))
+ return -EINVAL;
+
+ /* hook ifindex = 0 */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex == 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex == 0"))
+ return -EINVAL;
+
+ /* hook ifindex < 0 */
+ inv_hook.ifindex = -1;
+
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook ifindex < 0"))
+ return -EINVAL;
+ attach_opts.prog_id = 0;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook ifindex < 0"))
+ return -EINVAL;
+
+ inv_hook.ifindex = LO_IFINDEX;
+
+ /* hook.attach_point invalid */
+ inv_hook.attach_point = 0xabcd;
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook.attach_point"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook.attach_point"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* hook.attach_point valid, but parent invalid */
+ inv_hook.parent = TC_H_MAKE(1UL << 16, 10);
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_CUSTOM;
+ inv_hook.parent = 0;
+ /* These return EOPNOTSUPP instead of EINVAL as parent is checked after
+ * attach_point of the hook.
+ */
+ ret = bpf_tc_hook_create(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_hook_destroy(&inv_hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(&inv_hook, &attach_opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook parent"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(&inv_hook, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook parent"))
+ return -EINVAL;
+
+ inv_hook.attach_point = BPF_TC_INGRESS;
+
+ /* detach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_detach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_detach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_detach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+ }
+
+ /* query */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_query(NULL, &opts);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid opts = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpr);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid flags set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpf);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_fd set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_p);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid handle unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_h);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_query(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ /* when chain is not present, kernel returns -EINVAL */
+ ret = bpf_tc_query(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_query valid handle, priority set"))
+ return -EINVAL;
+ }
+
+ /* attach */
+ {
+ TEST_DECLARE_OPTS(fd);
+
+ ret = bpf_tc_attach(NULL, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid hook = NULL"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid opts = NULL"))
+ return -EINVAL;
+
+ opts_hp.flags = 42;
+ ret = bpf_tc_attach(hook, &opts_hp);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid flags"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, NULL);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_fd unset"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_hpi);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid prog_id set"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_pf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid handle unset"))
+ return -EINVAL;
+ opts_pf.prog_fd = opts_pf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_pf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_hf);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid priority unset"))
+ return -EINVAL;
+ opts_hf.prog_fd = opts_hf.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_hf), "bpf_tc_detach");
+
+ ret = bpf_tc_attach(hook, &opts_prio_max);
+ if (!ASSERT_EQ(ret, -EINVAL, "bpf_tc_attach invalid priority > UINT16_MAX"))
+ return -EINVAL;
+
+ ret = bpf_tc_attach(hook, &opts_f);
+ if (!ASSERT_OK(ret, "bpf_tc_attach valid both handle and priority unset"))
+ return -EINVAL;
+ opts_f.prog_fd = opts_f.prog_id = 0;
+ ASSERT_OK(bpf_tc_detach(hook, &opts_f), "bpf_tc_detach");
+ }
+
+ return 0;
+}
+
+void tc_bpf_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_INGRESS);
+ struct test_tc_bpf *skel = NULL;
+ bool hook_created = false;
+ int cls_fd, ret;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ return;
+
+ cls_fd = bpf_program__fd(skel->progs.cls);
+
+ ret = bpf_tc_hook_create(&hook);
+ if (ret == 0)
+ hook_created = true;
+
+ ret = ret == -EEXIST ? 0 : ret;
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create(BPF_TC_INGRESS)"))
+ goto end;
+
+ hook.attach_point = BPF_TC_CUSTOM;
+ hook.parent = TC_H_MAKE(TC_H_CLSACT, TC_H_MIN_INGRESS);
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_create invalid hook.attach_point"))
+ goto end;
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ ret = bpf_tc_hook_destroy(&hook);
+ if (!ASSERT_EQ(ret, -EOPNOTSUPP, "bpf_tc_hook_destroy invalid hook.attach_point"))
+ goto end;
+
+ hook.attach_point = BPF_TC_INGRESS;
+ hook.parent = 0;
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal ingress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ hook.attach_point = BPF_TC_EGRESS;
+ ret = test_tc_bpf_basic(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_internal egress"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+ ret = test_tc_bpf_api(&hook, cls_fd);
+ if (!ASSERT_OK(ret, "test_tc_bpf_api"))
+ goto end;
+
+ bpf_tc_hook_destroy(&hook);
+
+end:
+ if (hook_created) {
+ hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&hook);
+ }
+ test_tc_bpf__destroy(skel);
+}
+
+void tc_bpf_non_root(void)
+{
+ struct test_tc_bpf *skel = NULL;
+ __u64 caps = 0;
+ int ret;
+
+ /* In case CAP_BPF and CAP_PERFMON is not set */
+ ret = cap_enable_effective(1ULL << CAP_BPF | 1ULL << CAP_NET_ADMIN, &caps);
+ if (!ASSERT_OK(ret, "set_cap_bpf_cap_net_admin"))
+ return;
+ ret = cap_disable_effective(1ULL << CAP_SYS_ADMIN | 1ULL << CAP_PERFMON, NULL);
+ if (!ASSERT_OK(ret, "disable_cap_sys_admin"))
+ goto restore_cap;
+
+ skel = test_tc_bpf__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tc_bpf__open_and_load"))
+ goto restore_cap;
+
+ test_tc_bpf__destroy(skel);
+
+restore_cap:
+ if (caps)
+ cap_enable_effective(caps, NULL);
+}
+
+void test_tc_bpf(void)
+{
+ if (test__start_subtest("tc_bpf_root"))
+ tc_bpf_root();
+ if (test__start_subtest("tc_bpf_non_root"))
+ tc_bpf_non_root();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_helpers.h b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
new file mode 100644
index 000000000000..924d0e25320c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_helpers.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Isovalent */
+#ifndef TC_HELPERS
+#define TC_HELPERS
+#include <test_progs.h>
+
+#ifndef loopback
+# define loopback 1
+#endif
+
+static inline __u32 id_from_prog_fd(int fd)
+{
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ int err;
+
+ err = bpf_obj_get_info_by_fd(fd, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "id_from_prog_fd"))
+ return 0;
+
+ ASSERT_NEQ(prog_info.id, 0, "prog_info.id");
+ return prog_info.id;
+}
+
+static inline __u32 id_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ ASSERT_NEQ(link_info.id, 0, "link_info.id");
+ return link_info.id;
+}
+
+static inline __u32 ifindex_from_link_fd(int fd)
+{
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ int err;
+
+ err = bpf_link_get_info_by_fd(fd, &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "id_from_link_fd"))
+ return 0;
+
+ return link_info.tcx.ifindex;
+}
+
+static inline void __assert_mprog_count(int target, int expected, int ifindex)
+{
+ __u32 count = 0, attach_flags = 0;
+ int err;
+
+ err = bpf_prog_query(ifindex, target, 0, &attach_flags,
+ NULL, &count);
+ ASSERT_EQ(count, expected, "count");
+ ASSERT_EQ(err, 0, "prog_query");
+}
+
+static inline void assert_mprog_count(int target, int expected)
+{
+ __assert_mprog_count(target, expected, loopback);
+}
+
+static inline void assert_mprog_count_ifindex(int ifindex, int target, int expected)
+{
+ __assert_mprog_count(target, expected, ifindex);
+}
+
+static inline void tc_skel_reset_all_seen(struct test_tc_link *skel)
+{
+ memset(skel->bss, 0, sizeof(*skel->bss));
+}
+
+#endif /* TC_HELPERS */
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_links.c b/tools/testing/selftests/bpf/prog_tests/tc_links.c
new file mode 100644
index 000000000000..bc9841144685
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_links.c
@@ -0,0 +1,1901 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <uapi/linux/pkt_sched.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+#include "tc_helpers.h"
+
+void serial_test_tc_links_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+}
+
+static void test_tc_links_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid2, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid2, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_before(void)
+{
+ test_tc_links_before_target(BPF_TCX_INGRESS);
+ test_tc_links_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_after(void)
+{
+ test_tc_links_after_target(BPF_TCX_INGRESS);
+ test_tc_links_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ optl.expected_revision = 2;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_revision(void)
+{
+ test_tc_links_revision_target(BPF_TCX_INGRESS);
+ test_tc_links_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ bool hook_created = false, tc_attached = false;
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ assert_mprog_count(target, 1);
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_links_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, lid1, lid2;
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ optl.expected_revision = 1;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_id = pid1,
+ .expected_revision = 2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK,
+ .relative_fd = bpf_link__fd(skel->links.tc2),
+ .expected_revision = 3,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_REPLACE | BPF_F_LINK | BPF_F_AFTER,
+ .relative_id = lid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_link__update_program(skel->links.tc2, skel->progs.tc3);
+ if (!ASSERT_OK(err, "link_update"))
+ goto cleanup;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "link_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ err = bpf_link__update_program(skel->links.tc1, skel->progs.tc1);
+ if (!ASSERT_OK(err, "link_update_self"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_replace(void)
+{
+ test_tc_links_replace_target(BPF_TCX_INGRESS);
+ test_tc_links_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ optl.flags = BPF_F_BEFORE | BPF_F_AFTER;
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_LINK,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = pid2,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_ID,
+ .relative_id = 42,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, 0, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optl);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER | BPF_F_LINK,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = ~0,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ .relative_id = pid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE | BPF_F_LINK | BPF_F_ID,
+ .relative_id = lid1,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_invalid(void)
+{
+ test_tc_links_invalid_target(BPF_TCX_INGRESS);
+ test_tc_links_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid4, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid4, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid3, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid3, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid2, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid2, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid1, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid1, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_prepend(void)
+{
+ test_tc_links_prepend_target(BPF_TCX_INGRESS);
+ test_tc_links_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 prog_ids[5], link_ids[5];
+ __u32 pid1, pid2, pid3, pid4;
+ __u32 lid1, lid2, lid3, lid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ lid3 = id_from_link_fd(bpf_link__fd(skel->links.tc3));
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_AFTER,
+ );
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid3, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], lid3, "link_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], pid4, "prog_ids[3]");
+ ASSERT_EQ(optq.link_ids[3], lid4, "link_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids[4], 0, "link_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_append(void)
+{
+ test_tc_links_append_target(BPF_TCX_INGRESS);
+ test_tc_links_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, pid3, pid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex");
+
+ test_tc_link__destroy(skel);
+ return;
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_links_dev_cleanup(void)
+{
+ test_tc_links_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_links_dev_cleanup_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ __u32 pid1, pid2, pid3;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc5, target),
+ 0, "tc5_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc6, target),
+ 0, "tc6_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc5));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc6));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+
+ link = bpf_program__attach_tcx(skel->progs.tc6, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc6 = link;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6");
+
+ err = bpf_link__update_program(skel->links.tc6, skel->progs.tc4);
+ if (!ASSERT_OK(err, "link_update"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+ err = bpf_link__detach(skel->links.tc6);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 0);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup:
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_links_chain_mixed(void)
+{
+ test_tc_chain_mixed(BPF_TCX_INGRESS);
+ test_tc_chain_mixed(BPF_TCX_EGRESS);
+}
+
+static void test_tc_links_ingress(int target, bool chain_tc_old,
+ bool tcx_teardown_first)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts,
+ .handle = 1,
+ .priority = 1,
+ );
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .ifindex = loopback,
+ .attach_point = BPF_TC_CUSTOM,
+ .parent = TC_H_INGRESS,
+ );
+ bool hook_created = false, tc_attached = false;
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ ASSERT_OK(system("tc qdisc add dev lo ingress"), "add_ingress");
+ hook_created = true;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc3);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_link__detach(skel->links.tc2);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ assert_mprog_count(target, 1);
+ if (hook_created && tcx_teardown_first)
+ ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress");
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ test_tc_link__destroy(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ if (hook_created && !tcx_teardown_first)
+ ASSERT_OK(system("tc qdisc del dev lo ingress"), "del_ingress");
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_links_ingress(void)
+{
+ test_tc_links_ingress(BPF_TCX_INGRESS, true, true);
+ test_tc_links_ingress(BPF_TCX_INGRESS, true, false);
+ test_tc_links_ingress(BPF_TCX_INGRESS, false, false);
+}
+
+static void test_tc_links_dev_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, pid4;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ link = bpf_program__attach_tcx(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc3, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc3 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc4 = link;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ tc_hook.ifindex = ifindex;
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = bpf_program__fd(skel->progs.tc5);
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc1)), 0, "tc1_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc2)), 0, "tc2_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc3)), 0, "tc3_ifindex");
+ ASSERT_EQ(ifindex_from_link_fd(bpf_link__fd(skel->links.tc4)), 0, "tc4_ifindex");
+
+ test_tc_link__destroy(skel);
+ return;
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_links_dev_mixed(void)
+{
+ test_tc_links_dev_mixed(BPF_TCX_INGRESS);
+ test_tc_links_dev_mixed(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
new file mode 100644
index 000000000000..15ee7b2fc410
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -0,0 +1,687 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define netkit_peer "nk0"
+#define netkit_name "nk1"
+
+#define ping_addr_neigh 0x0a000002 /* 10.0.0.2 */
+#define ping_addr_noneigh 0x0a000003 /* 10.0.0.3 */
+
+#include "test_tc_link.skel.h"
+#include "netlink_helpers.h"
+#include "tc_helpers.h"
+
+#define ICMP_ECHO 8
+
+struct icmphdr {
+ __u8 type;
+ __u8 code;
+ __sum16 checksum;
+ struct {
+ __be16 id;
+ __be16 sequence;
+ } echo;
+};
+
+struct iplink_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+};
+
+static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
+ bool same_netns)
+{
+ struct rtnl_handle rth = { .fd = -1 };
+ struct iplink_req req = {};
+ struct rtattr *linkinfo, *data;
+ const char *type = "netkit";
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, netkit_name,
+ strlen(netkit_name));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_POLICY, policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_POLICY, peer_policy);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ *ifindex = if_nametoindex(netkit_name);
+
+ ASSERT_GT(*ifindex, 0, "retrieve_ifindex");
+ ASSERT_OK(system("ip netns add foo"), "create netns");
+ ASSERT_OK(system("ip link set dev " netkit_name " up"),
+ "up primary");
+ ASSERT_OK(system("ip addr add dev " netkit_name " 10.0.0.1/24"),
+ "addr primary");
+ if (same_netns) {
+ ASSERT_OK(system("ip link set dev " netkit_peer " up"),
+ "up peer");
+ ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
+ "addr peer");
+ } else {
+ ASSERT_OK(system("ip link set " netkit_peer " netns foo"),
+ "move peer");
+ ASSERT_OK(system("ip netns exec foo ip link set dev "
+ netkit_peer " up"), "up peer");
+ ASSERT_OK(system("ip netns exec foo ip addr add dev "
+ netkit_peer " 10.0.0.2/24"), "addr peer");
+ }
+ return err;
+}
+
+static void destroy_netkit(void)
+{
+ ASSERT_OK(system("ip link del dev " netkit_name), "del primary");
+ ASSERT_OK(system("ip netns del foo"), "delete netns");
+ ASSERT_EQ(if_nametoindex(netkit_name), 0, netkit_name "_ifindex");
+}
+
+static int __send_icmp(__u32 dest)
+{
+ struct sockaddr_in addr;
+ struct icmphdr icmp;
+ int sock, ret;
+
+ ret = write_sysctl("/proc/sys/net/ipv4/ping_group_range", "0 0");
+ if (!ASSERT_OK(ret, "write_sysctl(net.ipv4.ping_group_range)"))
+ return ret;
+
+ sock = socket(AF_INET, SOCK_DGRAM, IPPROTO_ICMP);
+ if (!ASSERT_GE(sock, 0, "icmp_socket"))
+ return -errno;
+
+ ret = setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE,
+ netkit_name, strlen(netkit_name) + 1);
+ if (!ASSERT_OK(ret, "setsockopt(SO_BINDTODEVICE)"))
+ goto out;
+
+ memset(&addr, 0, sizeof(addr));
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(dest);
+
+ memset(&icmp, 0, sizeof(icmp));
+ icmp.type = ICMP_ECHO;
+ icmp.echo.id = 1234;
+ icmp.echo.sequence = 1;
+
+ ret = sendto(sock, &icmp, sizeof(icmp), 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ if (!ASSERT_GE(ret, 0, "icmp_sendto"))
+ ret = -errno;
+ else
+ ret = 0;
+out:
+ close(sock);
+ return ret;
+}
+
+static int send_icmp(void)
+{
+ return __send_icmp(ping_addr_neigh);
+}
+
+void serial_test_tc_netkit_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PEER, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_multi_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[3], link_ids[3];
+ __u32 pid1, pid2, lid1, lid2;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ target), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ target), 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optl,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+ ASSERT_NEQ(lid1, lid2, "link_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid2, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], lid1, "link_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(optq.link_ids[2], 0, "link_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_links(void)
+{
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_links_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 pid1, pid2, fd1, fd2;
+ __u32 prog_ids[3];
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ pid1 = id_from_prog_fd(fd1);
+ pid2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd1;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_fd1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_fd2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, true, "seen_eth");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_fd2:
+ err = bpf_prog_detach_opts(fd2, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup_fd1:
+ err = bpf_prog_detach_opts(fd1, ifindex, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_multi_opts(void)
+{
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L2, BPF_NETKIT_PEER);
+ serial_test_tc_netkit_multi_opts_target(NETKIT_L3, BPF_NETKIT_PEER);
+}
+
+void serial_test_tc_netkit_device(void)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, pid2, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex, ifindex2;
+
+ err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, true);
+ if (err)
+ return;
+
+ ifindex2 = if_nametoindex(netkit_peer);
+ ASSERT_NEQ(ifindex, ifindex2, "ifindex_1_2");
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2,
+ BPF_NETKIT_PEER), 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3,
+ BPF_NETKIT_PRIMARY), 0, "tc3_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, BPF_NETKIT_PRIMARY, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(send_icmp(), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PRIMARY, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ err = bpf_prog_query_opts(ifindex2, BPF_NETKIT_PEER, &optq);
+ ASSERT_EQ(err, -EACCES, "prog_query_should_fail");
+
+ link = bpf_program__attach_netkit(skel->progs.tc2, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ link = bpf_program__attach_netkit(skel->progs.tc3, ifindex2, &optl);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 1);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PRIMARY, 0);
+ assert_mprog_count_ifindex(ifindex, BPF_NETKIT_PEER, 0);
+ destroy_netkit();
+}
+
+static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
+{
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_netkit_opts, optl);
+ __u32 prog_ids[2], link_ids[2];
+ __u32 pid1, lid1;
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, ifindex;
+
+ err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
+ &ifindex, false);
+ if (err)
+ return;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1,
+ BPF_NETKIT_PRIMARY), 0, "tc1_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, false, "seen_eth");
+
+ link = bpf_program__attach_netkit(skel->progs.tc1, ifindex, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+
+ skel->links.tc1 = link;
+
+ lid1 = id_from_link_fd(bpf_link__fd(skel->links.tc1));
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ optq.prog_ids = prog_ids;
+ optq.link_ids = link_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(link_ids, 0, sizeof(link_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(ifindex, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid1, "prog_ids[0]");
+ ASSERT_EQ(optq.link_ids[0], lid1, "link_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(optq.link_ids[1], 0, "link_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_EQ(__send_icmp(ping_addr_noneigh), 0, "icmp_pkt");
+
+ ASSERT_EQ(skel->bss->seen_tc1, true /* L2: ARP */, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_eth, mode == NETKIT_L3, "seen_eth");
+cleanup:
+ test_tc_link__destroy(skel);
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+ destroy_netkit();
+}
+
+void serial_test_tc_netkit_neigh_links(void)
+{
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L2, BPF_NETKIT_PRIMARY);
+ serial_test_tc_netkit_neigh_links_target(NETKIT_L3, BPF_NETKIT_PRIMARY);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_opts.c b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
new file mode 100644
index 000000000000..196abf223465
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_opts.c
@@ -0,0 +1,2814 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <uapi/linux/if_link.h>
+#include <net/if.h>
+#include <test_progs.h>
+
+#define loopback 1
+#define ping_cmd "ping -q -c1 -w1 127.0.0.1 > /dev/null"
+
+#include "test_tc_link.skel.h"
+#include "tc_helpers.h"
+
+void serial_test_tc_opts_basic(void)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[2];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ ASSERT_EQ(skel->bss->seen_tc1, false, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd1, loopback, BPF_TCX_INGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_INGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_in;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+
+ err = bpf_prog_attach_opts(fd2, loopback, BPF_TCX_EGRESS, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_in;
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, BPF_TCX_EGRESS, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_eg;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+cleanup_eg:
+ err = bpf_prog_detach_opts(fd2, loopback, BPF_TCX_EGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_eg");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 1);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup_in:
+ err = bpf_prog_detach_opts(fd1, loopback, BPF_TCX_INGRESS, &optd);
+ ASSERT_OK(err, "prog_detach_in");
+
+ assert_mprog_count(BPF_TCX_INGRESS, 0);
+ assert_mprog_count(BPF_TCX_EGRESS, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+static void test_tc_opts_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id2, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_before(void)
+{
+ test_tc_opts_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target3;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_after(void)
+{
+ test_tc_opts_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_revision_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ __u32 prog_ids[3];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, -ESTALE, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ESTALE, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ LIBBPF_OPTS_RESET(optd,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_revision(void)
+{
+ test_tc_opts_revision_target(BPF_TCX_INGRESS);
+ test_tc_opts_revision_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_chain_classic(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ bool hook_created = false, tc_attached = false;
+ __u32 fd1, fd2, fd3, id1, id2, id3;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ if (err == 0)
+ hook_created = true;
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd3;
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup;
+ tc_attached = true;
+ }
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 2);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup_detach;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, chain_tc_old, "seen_tc3");
+
+cleanup_detach:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 0);
+cleanup:
+ if (tc_attached) {
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+ }
+ if (hook_created) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_chain_classic(void)
+{
+ test_tc_chain_classic(BPF_TCX_INGRESS, false);
+ test_tc_chain_classic(BPF_TCX_EGRESS, false);
+ test_tc_chain_classic(BPF_TCX_INGRESS, true);
+ test_tc_chain_classic(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_opts_replace_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, id1, id2, id3, detach_fd;
+ __u32 prog_ids[4], prog_flags[4];
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_attach_flags = prog_flags;
+ optq.prog_ids = prog_ids;
+
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd3;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 4, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, false, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_BEFORE,
+ .replace_prog_fd = fd3,
+ .relative_fd = fd1,
+ .expected_revision = 4,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ detach_fd = fd2;
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE | BPF_F_AFTER,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER | BPF_F_REPLACE,
+ .replace_prog_fd = fd2,
+ .relative_fd = fd1,
+ .expected_revision = 5,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_id = id1,
+ .expected_revision = 5,
+ );
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_replace(void)
+{
+ test_tc_opts_replace_target(BPF_TCX_INGRESS);
+ test_tc_opts_replace_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_invalid_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, id1, id2;
+ struct test_tc_link *skel;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER | BPF_F_ID,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE | BPF_F_AFTER,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_ID,
+ .relative_id = id2,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -ENOENT, "prog_attach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EINVAL, "prog_attach_x1");
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_invalid(void)
+{
+ test_tc_opts_invalid_target(BPF_TCX_INGRESS);
+ test_tc_opts_invalid_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_prepend_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id1, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id2, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id1, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_prepend(void)
+{
+ test_tc_opts_prepend_target(BPF_TCX_INGRESS);
+ test_tc_opts_prepend_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_append_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target;
+
+ assert_mprog_count(target, 2);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target2;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 3, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, false, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target2;
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_target3;
+
+ assert_mprog_count(target, 4);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup_target4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc1, true, "seen_tc1");
+ ASSERT_EQ(skel->bss->seen_tc2, true, "seen_tc2");
+ ASSERT_EQ(skel->bss->seen_tc3, true, "seen_tc3");
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+
+cleanup_target4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup_target3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup_target2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup_target:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_append(void)
+{
+ test_tc_opts_append_target(BPF_TCX_INGRESS);
+ test_tc_opts_append_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_dev_cleanup_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ int err, ifindex;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ err = bpf_prog_attach_opts(fd1, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+
+ err = bpf_prog_attach_opts(fd2, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+
+ err = bpf_prog_attach_opts(fd3, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count_ifindex(ifindex, target, 3);
+
+ err = bpf_prog_attach_opts(fd4, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count_ifindex(ifindex, target, 4);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+ return;
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 2);
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 1);
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_opts_dev_cleanup(void)
+{
+ test_tc_opts_dev_cleanup_target(BPF_TCX_INGRESS);
+ test_tc_opts_dev_cleanup_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_mixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ __u32 pid1, pid2, pid3, pid4, lid2, lid4;
+ __u32 prog_flags[4], link_flags[4];
+ __u32 prog_ids[4], link_ids[4];
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ int err, detach_fd;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc3, target),
+ 0, "tc3_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc4, target),
+ 0, "tc4_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ pid3 = id_from_prog_fd(bpf_program__fd(skel->progs.tc3));
+ pid4 = id_from_prog_fd(bpf_program__fd(skel->progs.tc4));
+
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+ ASSERT_NEQ(pid3, pid4, "prog_ids_3_4");
+ ASSERT_NEQ(pid2, pid3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ detach_fd = bpf_program__fd(skel->progs.tc1);
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ lid2 = id_from_link_fd(bpf_link__fd(skel->links.tc2));
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc2),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EBUSY, "prog_attach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc1),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc3),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ detach_fd = bpf_program__fd(skel->progs.tc3);
+
+ assert_mprog_count(target, 2);
+
+ link = bpf_program__attach_tcx(skel->progs.tc4, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc4 = link;
+
+ lid4 = id_from_link_fd(bpf_link__fd(skel->links.tc4));
+
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = bpf_program__fd(skel->progs.tc4),
+ );
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc2),
+ loopback, target, &opta);
+ ASSERT_EQ(err, -EEXIST, "prog_attach");
+
+ optq.prog_ids = prog_ids;
+ optq.prog_attach_flags = prog_flags;
+ optq.link_ids = link_ids;
+ optq.link_attach_flags = link_flags;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ memset(prog_flags, 0, sizeof(prog_flags));
+ memset(link_ids, 0, sizeof(link_ids));
+ memset(link_flags, 0, sizeof(link_flags));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup1;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], pid3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_attach_flags[0], 0, "prog_flags[0]");
+ ASSERT_EQ(optq.link_ids[0], 0, "link_ids[0]");
+ ASSERT_EQ(optq.link_attach_flags[0], 0, "link_flags[0]");
+ ASSERT_EQ(optq.prog_ids[1], pid2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_attach_flags[1], 0, "prog_flags[1]");
+ ASSERT_EQ(optq.link_ids[1], lid2, "link_ids[1]");
+ ASSERT_EQ(optq.link_attach_flags[1], 0, "link_flags[1]");
+ ASSERT_EQ(optq.prog_ids[2], pid4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_attach_flags[2], 0, "prog_flags[2]");
+ ASSERT_EQ(optq.link_ids[2], lid4, "link_ids[2]");
+ ASSERT_EQ(optq.link_attach_flags[2], 0, "link_flags[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_attach_flags[3], 0, "prog_flags[3]");
+ ASSERT_EQ(optq.link_ids[3], 0, "link_ids[3]");
+ ASSERT_EQ(optq.link_attach_flags[3], 0, "link_flags[3]");
+
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+cleanup1:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_mixed(void)
+{
+ test_tc_opts_mixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_mixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_demixed_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_tcx_opts, optl);
+ struct test_tc_link *skel;
+ struct bpf_link *link;
+ __u32 pid1, pid2;
+ int err;
+
+ skel = test_tc_link__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ goto cleanup;
+
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc1, target),
+ 0, "tc1_attach_type");
+ ASSERT_EQ(bpf_program__set_expected_attach_type(skel->progs.tc2, target),
+ 0, "tc2_attach_type");
+
+ err = test_tc_link__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ pid1 = id_from_prog_fd(bpf_program__fd(skel->progs.tc1));
+ pid2 = id_from_prog_fd(bpf_program__fd(skel->progs.tc2));
+ ASSERT_NEQ(pid1, pid2, "prog_ids_1_2");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ link = bpf_program__attach_tcx(skel->progs.tc2, loopback, &optl);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup1;
+ skel->links.tc2 = link;
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -EBUSY, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+ goto cleanup;
+
+cleanup1:
+ err = bpf_prog_detach_opts(bpf_program__fd(skel->progs.tc1),
+ loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup:
+ test_tc_link__destroy(skel);
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_demixed(void)
+{
+ test_tc_opts_demixed_target(BPF_TCX_INGRESS);
+ test_tc_opts_demixed_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd);
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach(void)
+{
+ test_tc_opts_detach_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_before_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id2, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd2,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id3, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id4, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_BEFORE,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach_before(void)
+{
+ test_tc_opts_detach_before_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_before_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_detach_after_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ __u32 prog_ids[5];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id3, id4, "prog_ids_3_4");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ optq.prog_ids = prog_ids;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 3);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 3, "count");
+ ASSERT_EQ(optq.revision, 6, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id3, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id4, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], 0, "prog_ids[3]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd4,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd3,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_EQ(err, -ERANGE, "prog_detach");
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 2);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 2, "count");
+ ASSERT_EQ(optq.revision, 7, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id4, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], 0, "prog_ids[2]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ .relative_fd = fd1,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 1);
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 8, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+ LIBBPF_OPTS_RESET(optd,
+ .flags = BPF_F_AFTER,
+ );
+
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+
+ assert_mprog_count(target, 0);
+ goto cleanup;
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_detach_after(void)
+{
+ test_tc_opts_detach_after_target(BPF_TCX_INGRESS);
+ test_tc_opts_detach_after_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_delete_empty(int target, bool chain_tc_old)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ int err;
+
+ assert_mprog_count(target, 0);
+ if (chain_tc_old) {
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ ASSERT_OK(err, "bpf_tc_hook_create");
+ assert_mprog_count(target, 0);
+ }
+ err = bpf_prog_detach_opts(0, loopback, target, &optd);
+ ASSERT_EQ(err, -ENOENT, "prog_detach");
+ if (chain_tc_old) {
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+ }
+ assert_mprog_count(target, 0);
+}
+
+void serial_test_tc_opts_delete_empty(void)
+{
+ test_tc_opts_delete_empty(BPF_TCX_INGRESS, false);
+ test_tc_opts_delete_empty(BPF_TCX_EGRESS, false);
+ test_tc_opts_delete_empty(BPF_TCX_INGRESS, true);
+ test_tc_opts_delete_empty(BPF_TCX_EGRESS, true);
+}
+
+static void test_tc_chain_mixed(int target)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .ifindex = loopback);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ __u32 fd1, fd2, fd3, id1, id2, id3;
+ struct test_tc_link *skel;
+ int err, detach_fd;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc4);
+ fd2 = bpf_program__fd(skel->progs.tc5);
+ fd3 = bpf_program__fd(skel->progs.tc6);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+
+ ASSERT_NEQ(id1, id2, "prog_ids_1_2");
+ ASSERT_NEQ(id2, id3, "prog_ids_2_3");
+
+ assert_mprog_count(target, 0);
+
+ tc_hook.attach_point = target == BPF_TCX_INGRESS ?
+ BPF_TC_INGRESS : BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(&tc_hook);
+ err = err == -EEXIST ? 0 : err;
+ if (!ASSERT_OK(err, "bpf_tc_hook_create"))
+ goto cleanup;
+
+ tc_opts.prog_fd = fd2;
+ err = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(err, "bpf_tc_attach"))
+ goto cleanup_hook;
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_filter;
+
+ detach_fd = fd3;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, false, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, true, "seen_tc6");
+
+ LIBBPF_OPTS_RESET(opta,
+ .flags = BPF_F_REPLACE,
+ .replace_prog_fd = fd3,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup_opts;
+
+ detach_fd = fd1;
+
+ assert_mprog_count(target, 1);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, true, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup_opts:
+ err = bpf_prog_detach_opts(detach_fd, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+ tc_skel_reset_all_seen(skel);
+ ASSERT_OK(system(ping_cmd), ping_cmd);
+
+ ASSERT_EQ(skel->bss->seen_tc4, false, "seen_tc4");
+ ASSERT_EQ(skel->bss->seen_tc5, true, "seen_tc5");
+ ASSERT_EQ(skel->bss->seen_tc6, false, "seen_tc6");
+
+cleanup_filter:
+ tc_opts.flags = tc_opts.prog_fd = tc_opts.prog_id = 0;
+ err = bpf_tc_detach(&tc_hook, &tc_opts);
+ ASSERT_OK(err, "bpf_tc_detach");
+
+cleanup_hook:
+ tc_hook.attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ bpf_tc_hook_destroy(&tc_hook);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_chain_mixed(void)
+{
+ test_tc_chain_mixed(BPF_TCX_INGRESS);
+ test_tc_chain_mixed(BPF_TCX_EGRESS);
+}
+
+static int generate_dummy_prog(void)
+{
+ const struct bpf_insn prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ const size_t log_buf_sz = 256;
+ char log_buf[log_buf_sz];
+ int fd = -1;
+
+ opts.log_buf = log_buf;
+ opts.log_size = log_buf_sz;
+
+ log_buf[0] = '\0';
+ opts.log_level = 0;
+ fd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, "tcx_prog", "GPL",
+ prog_insns, prog_insn_cnt, &opts);
+ ASSERT_STREQ(log_buf, "", "log_0");
+ ASSERT_GE(fd, 0, "prog_fd");
+ return fd;
+}
+
+static void test_tc_opts_max_target(int target, int flags, bool relative)
+{
+ int err, ifindex, i, prog_fd, last_fd = -1;
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ const int max_progs = 63;
+
+ ASSERT_OK(system("ip link add dev tcx_opts1 type veth peer name tcx_opts2"), "add veth");
+ ifindex = if_nametoindex("tcx_opts1");
+ ASSERT_NEQ(ifindex, 0, "non_zero_ifindex");
+
+ assert_mprog_count_ifindex(ifindex, target, 0);
+
+ for (i = 0; i < max_progs; i++) {
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+ assert_mprog_count_ifindex(ifindex, target, i + 1);
+ if (i == max_progs - 1 && relative)
+ last_fd = prog_fd;
+ else
+ close(prog_fd);
+ }
+
+ prog_fd = generate_dummy_prog();
+ if (!ASSERT_GE(prog_fd, 0, "dummy_prog"))
+ goto cleanup;
+ opta.flags = flags;
+ if (last_fd > 0)
+ opta.relative_fd = last_fd;
+ err = bpf_prog_attach_opts(prog_fd, ifindex, target, &opta);
+ ASSERT_EQ(err, -ERANGE, "prog_64_attach");
+ assert_mprog_count_ifindex(ifindex, target, max_progs);
+ close(prog_fd);
+cleanup:
+ if (last_fd > 0)
+ close(last_fd);
+ ASSERT_OK(system("ip link del dev tcx_opts1"), "del veth");
+ ASSERT_EQ(if_nametoindex("tcx_opts1"), 0, "dev1_removed");
+ ASSERT_EQ(if_nametoindex("tcx_opts2"), 0, "dev2_removed");
+}
+
+void serial_test_tc_opts_max(void)
+{
+ test_tc_opts_max_target(BPF_TCX_INGRESS, 0, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, 0, false);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_BEFORE, false);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_BEFORE, true);
+
+ test_tc_opts_max_target(BPF_TCX_INGRESS, BPF_F_AFTER, true);
+ test_tc_opts_max_target(BPF_TCX_EGRESS, BPF_F_AFTER, false);
+}
+
+static void test_tc_opts_query_target(int target)
+{
+ const size_t attr_size = offsetofend(union bpf_attr, query);
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ __u32 fd1, fd2, fd3, fd4, id1, id2, id3, id4;
+ struct test_tc_link *skel;
+ union bpf_attr attr;
+ __u32 prog_ids[10];
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ fd2 = bpf_program__fd(skel->progs.tc2);
+ fd3 = bpf_program__fd(skel->progs.tc3);
+ fd4 = bpf_program__fd(skel->progs.tc4);
+
+ id1 = id_from_prog_fd(fd1);
+ id2 = id_from_prog_fd(fd2);
+ id3 = id_from_prog_fd(fd3);
+ id4 = id_from_prog_fd(fd4);
+
+ assert_mprog_count(target, 0);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 1,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ assert_mprog_count(target, 1);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 2,
+ );
+
+ err = bpf_prog_attach_opts(fd2, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup1;
+
+ assert_mprog_count(target, 2);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 3,
+ );
+
+ err = bpf_prog_attach_opts(fd3, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup2;
+
+ assert_mprog_count(target, 3);
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = 4,
+ );
+
+ err = bpf_prog_attach_opts(fd4, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup3;
+
+ assert_mprog_count(target, 4);
+
+ /* Test 1: Double query via libbpf API */
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids, NULL, "prog_ids");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(optq.count, 4, "count");
+ ASSERT_EQ(optq.revision, 5, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(optq.prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(optq.prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(optq.prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(optq.link_ids, NULL, "link_ids");
+
+ /* Test 2: Double query via bpf_attr & bpf(2) directly */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 3: Query with smaller prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 2;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, ENOSPC, "prog_query_should_fail");
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 4: Query with larger prog_ids array */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+ attr.query.count = 10;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], id2, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], id3, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], id4, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 5: Query with NULL prog_ids array but with count > 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.count = sizeof(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, 0, "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 6: Query with non-NULL prog_ids array but with count == 0 */
+ memset(&attr, 0, attr_size);
+ attr.query.target_ifindex = loopback;
+ attr.query.attach_type = target;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ attr.query.prog_ids = ptr_to_u64(prog_ids);
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup4;
+
+ ASSERT_EQ(attr.query.count, 4, "count");
+ ASSERT_EQ(attr.query.revision, 5, "revision");
+ ASSERT_EQ(attr.query.query_flags, 0, "query_flags");
+ ASSERT_EQ(attr.query.attach_flags, 0, "attach_flags");
+ ASSERT_EQ(attr.query.target_ifindex, loopback, "target_ifindex");
+ ASSERT_EQ(attr.query.attach_type, target, "attach_type");
+ ASSERT_EQ(prog_ids[0], 0, "prog_ids[0]");
+ ASSERT_EQ(prog_ids[1], 0, "prog_ids[1]");
+ ASSERT_EQ(prog_ids[2], 0, "prog_ids[2]");
+ ASSERT_EQ(prog_ids[3], 0, "prog_ids[3]");
+ ASSERT_EQ(prog_ids[4], 0, "prog_ids[4]");
+ ASSERT_EQ(attr.query.prog_ids, ptr_to_u64(prog_ids), "prog_ids");
+ ASSERT_EQ(attr.query.prog_attach_flags, 0, "prog_attach_flags");
+ ASSERT_EQ(attr.query.link_ids, 0, "link_ids");
+ ASSERT_EQ(attr.query.link_attach_flags, 0, "link_attach_flags");
+
+ /* Test 7: Query with invalid flags */
+ attr.query.attach_flags = 0;
+ attr.query.query_flags = 1;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+ attr.query.attach_flags = 1;
+ attr.query.query_flags = 0;
+
+ err = syscall(__NR_bpf, BPF_PROG_QUERY, &attr, attr_size);
+ ASSERT_EQ(err, -1, "prog_query_should_fail");
+ ASSERT_EQ(errno, EINVAL, "prog_query_should_fail");
+
+cleanup4:
+ err = bpf_prog_detach_opts(fd4, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 3);
+
+cleanup3:
+ err = bpf_prog_detach_opts(fd3, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 2);
+
+cleanup2:
+ err = bpf_prog_detach_opts(fd2, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 1);
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_query(void)
+{
+ test_tc_opts_query_target(BPF_TCX_INGRESS);
+ test_tc_opts_query_target(BPF_TCX_EGRESS);
+}
+
+static void test_tc_opts_query_attach_target(int target)
+{
+ LIBBPF_OPTS(bpf_prog_attach_opts, opta);
+ LIBBPF_OPTS(bpf_prog_detach_opts, optd);
+ LIBBPF_OPTS(bpf_prog_query_opts, optq);
+ struct test_tc_link *skel;
+ __u32 prog_ids[2];
+ __u32 fd1, id1;
+ int err;
+
+ skel = test_tc_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto cleanup;
+
+ fd1 = bpf_program__fd(skel->progs.tc1);
+ id1 = id_from_prog_fd(fd1);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup;
+
+ ASSERT_EQ(optq.count, 0, "count");
+ ASSERT_EQ(optq.revision, 1, "revision");
+
+ LIBBPF_OPTS_RESET(opta,
+ .expected_revision = optq.revision,
+ );
+
+ err = bpf_prog_attach_opts(fd1, loopback, target, &opta);
+ if (!ASSERT_EQ(err, 0, "prog_attach"))
+ goto cleanup;
+
+ memset(prog_ids, 0, sizeof(prog_ids));
+ optq.prog_ids = prog_ids;
+ optq.count = ARRAY_SIZE(prog_ids);
+
+ err = bpf_prog_query_opts(loopback, target, &optq);
+ if (!ASSERT_OK(err, "prog_query"))
+ goto cleanup1;
+
+ ASSERT_EQ(optq.count, 1, "count");
+ ASSERT_EQ(optq.revision, 2, "revision");
+ ASSERT_EQ(optq.prog_ids[0], id1, "prog_ids[0]");
+ ASSERT_EQ(optq.prog_ids[1], 0, "prog_ids[1]");
+
+cleanup1:
+ err = bpf_prog_detach_opts(fd1, loopback, target, &optd);
+ ASSERT_OK(err, "prog_detach");
+ assert_mprog_count(target, 0);
+cleanup:
+ test_tc_link__destroy(skel);
+}
+
+void serial_test_tc_opts_query_attach(void)
+{
+ test_tc_opts_query_attach_target(BPF_TCX_INGRESS);
+ test_tc_opts_query_attach_target(BPF_TCX_EGRESS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
new file mode 100644
index 000000000000..dbe06aeaa2b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c
@@ -0,0 +1,1290 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * This test sets up 3 netns (src <-> fwd <-> dst). There is no direct veth link
+ * between src and dst. The netns fwd has veth links to each src and dst. The
+ * client is in src and server in dst. The test installs a TC BPF program to each
+ * host facing veth in fwd which calls into i) bpf_redirect_neigh() to perform the
+ * neigh addr population and redirect or ii) bpf_redirect_peer() for namespace
+ * switch from ingress side; it also installs a checker prog on the egress side
+ * to drop unexpected traffic.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "netlink_helpers.h"
+#include "test_tc_neigh_fib.skel.h"
+#include "test_tc_neigh.skel.h"
+#include "test_tc_peer.skel.h"
+#include "test_tc_dtime.skel.h"
+
+#ifndef TCP_TX_DELAY
+#define TCP_TX_DELAY 37
+#endif
+
+#define NS_SRC "ns_src"
+#define NS_FWD "ns_fwd"
+#define NS_DST "ns_dst"
+
+#define IP4_SRC "172.16.1.100"
+#define IP4_DST "172.16.2.100"
+#define IP4_TUN_SRC "172.17.1.100"
+#define IP4_TUN_FWD "172.17.1.200"
+#define IP4_PORT 9004
+
+#define IP6_SRC "0::1:dead:beef:cafe"
+#define IP6_DST "0::2:dead:beef:cafe"
+#define IP6_TUN_SRC "1::1:dead:beef:cafe"
+#define IP6_TUN_FWD "1::2:dead:beef:cafe"
+#define IP6_PORT 9006
+
+#define IP4_SLL "169.254.0.1"
+#define IP4_DLL "169.254.0.2"
+#define IP4_NET "169.254.0.0"
+
+#define MAC_DST_FWD "00:11:22:33:44:55"
+#define MAC_DST "00:22:33:44:55:66"
+
+#define IFADDR_STR_LEN 18
+#define PING_ARGS "-i 0.2 -c 3 -w 10 -q"
+
+#define TIMEOUT_MILLIS 10000
+#define NSEC_PER_SEC 1000000000ULL
+
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+ __FILE__, __LINE__, strerror(errno), ##__VA_ARGS__)
+
+static const char * const namespaces[] = {NS_SRC, NS_FWD, NS_DST, NULL};
+
+static int write_file(const char *path, const char *newval)
+{
+ FILE *f;
+
+ f = fopen(path, "r+");
+ if (!f)
+ return -1;
+ if (fwrite(newval, strlen(newval), 1, f) != 1) {
+ log_err("writing to %s failed", path);
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int netns_setup_namespaces(const char *verb)
+{
+ const char * const *ns = namespaces;
+ char cmd[128];
+
+ while (*ns) {
+ snprintf(cmd, sizeof(cmd), "ip netns %s %s", verb, *ns);
+ if (!ASSERT_OK(system(cmd), cmd))
+ return -1;
+ ns++;
+ }
+ return 0;
+}
+
+static void netns_setup_namespaces_nofail(const char *verb)
+{
+ const char * const *ns = namespaces;
+ char cmd[128];
+
+ while (*ns) {
+ snprintf(cmd, sizeof(cmd), "ip netns %s %s > /dev/null 2>&1", verb, *ns);
+ system(cmd);
+ ns++;
+ }
+}
+
+enum dev_mode {
+ MODE_VETH,
+ MODE_NETKIT,
+};
+
+struct netns_setup_result {
+ enum dev_mode dev_mode;
+ int ifindex_src;
+ int ifindex_src_fwd;
+ int ifindex_dst;
+ int ifindex_dst_fwd;
+};
+
+static int get_ifaddr(const char *name, char *ifaddr)
+{
+ char path[PATH_MAX];
+ FILE *f;
+ int ret;
+
+ snprintf(path, PATH_MAX, "/sys/class/net/%s/address", name);
+ f = fopen(path, "r");
+ if (!ASSERT_OK_PTR(f, path))
+ return -1;
+
+ ret = fread(ifaddr, 1, IFADDR_STR_LEN, f);
+ if (!ASSERT_EQ(ret, IFADDR_STR_LEN, "fread ifaddr")) {
+ fclose(f);
+ return -1;
+ }
+ fclose(f);
+ return 0;
+}
+
+static int create_netkit(int mode, char *prim, char *peer)
+{
+ struct rtattr *linkinfo, *data, *peer_info;
+ struct rtnl_handle rth = { .fd = -1 };
+ const char *type = "netkit";
+ struct {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[1024];
+ } req = {};
+ int err;
+
+ err = rtnl_open(&rth, 0);
+ if (!ASSERT_OK(err, "open_rtnetlink"))
+ return err;
+
+ memset(&req, 0, sizeof(req));
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+ req.i.ifi_family = AF_UNSPEC;
+
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, prim, strlen(prim));
+ linkinfo = addattr_nest(&req.n, sizeof(req), IFLA_LINKINFO);
+ addattr_l(&req.n, sizeof(req), IFLA_INFO_KIND, type, strlen(type));
+ data = addattr_nest(&req.n, sizeof(req), IFLA_INFO_DATA);
+ addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ peer_info = addattr_nest(&req.n, sizeof(req), IFLA_NETKIT_PEER_INFO);
+ req.n.nlmsg_len += sizeof(struct ifinfomsg);
+ addattr_l(&req.n, sizeof(req), IFLA_IFNAME, peer, strlen(peer));
+ addattr_nest_end(&req.n, peer_info);
+ addattr_nest_end(&req.n, data);
+ addattr_nest_end(&req.n, linkinfo);
+
+ err = rtnl_talk(&rth, &req.n, NULL);
+ ASSERT_OK(err, "talk_rtnetlink");
+ rtnl_close(&rth);
+ return err;
+}
+
+static int netns_setup_links_and_routes(struct netns_setup_result *result)
+{
+ struct nstoken *nstoken = NULL;
+ char src_fwd_addr[IFADDR_STR_LEN+1] = {};
+ char src_addr[IFADDR_STR_LEN + 1] = {};
+ int err;
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip link add src type veth peer name src_fwd");
+ SYS(fail, "ip link add dst type veth peer name dst_fwd");
+
+ SYS(fail, "ip link set dst_fwd address " MAC_DST_FWD);
+ SYS(fail, "ip link set dst address " MAC_DST);
+ } else if (result->dev_mode == MODE_NETKIT) {
+ err = create_netkit(NETKIT_L3, "src", "src_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_src"))
+ goto fail;
+ err = create_netkit(NETKIT_L3, "dst", "dst_fwd");
+ if (!ASSERT_OK(err, "create_ifindex_dst"))
+ goto fail;
+ }
+
+ if (get_ifaddr("src_fwd", src_fwd_addr))
+ goto fail;
+
+ if (get_ifaddr("src", src_addr))
+ goto fail;
+
+ result->ifindex_src = if_nametoindex("src");
+ if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src"))
+ goto fail;
+
+ result->ifindex_src_fwd = if_nametoindex("src_fwd");
+ if (!ASSERT_GT(result->ifindex_src_fwd, 0, "ifindex_src_fwd"))
+ goto fail;
+
+ result->ifindex_dst = if_nametoindex("dst");
+ if (!ASSERT_GT(result->ifindex_dst, 0, "ifindex_dst"))
+ goto fail;
+
+ result->ifindex_dst_fwd = if_nametoindex("dst_fwd");
+ if (!ASSERT_GT(result->ifindex_dst_fwd, 0, "ifindex_dst_fwd"))
+ goto fail;
+
+ SYS(fail, "ip link set src netns " NS_SRC);
+ SYS(fail, "ip link set src_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst_fwd netns " NS_FWD);
+ SYS(fail, "ip link set dst netns " NS_DST);
+
+ /** setup in 'src' namespace */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto fail;
+
+ SYS(fail, "ip addr add " IP4_SRC "/32 dev src");
+ SYS(fail, "ip addr add " IP6_SRC "/128 dev src nodad");
+ SYS(fail, "ip link set dev src up");
+
+ SYS(fail, "ip route add " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev src scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev src scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_DST " dev src lladdr %s",
+ src_fwd_addr);
+ SYS(fail, "ip neigh add " IP6_DST " dev src lladdr %s",
+ src_fwd_addr);
+ }
+
+ close_netns(nstoken);
+
+ /** setup in 'fwd' namespace */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto fail;
+
+ /* The fwd netns automatically gets a v6 LL address / routes, but also
+ * needs v4 one in order to start ARP probing. IP4_NET route is added
+ * to the endpoints so that the ARP processing will reply.
+ */
+ SYS(fail, "ip addr add " IP4_SLL "/32 dev src_fwd");
+ SYS(fail, "ip addr add " IP4_DLL "/32 dev dst_fwd");
+ SYS(fail, "ip link set dev src_fwd up");
+ SYS(fail, "ip link set dev dst_fwd up");
+
+ SYS(fail, "ip route add " IP4_SRC "/32 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev src_fwd scope global");
+ SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global");
+ SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr);
+ SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST);
+ SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST);
+ }
+
+ close_netns(nstoken);
+
+ /** setup in 'dst' namespace */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ goto fail;
+
+ SYS(fail, "ip addr add " IP4_DST "/32 dev dst");
+ SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad");
+ SYS(fail, "ip link set dev dst up");
+ SYS(fail, "ip link set dev lo up");
+
+ SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global");
+ SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global");
+ SYS(fail, "ip route add " IP6_SRC "/128 dev dst scope global");
+
+ if (result->dev_mode == MODE_VETH) {
+ SYS(fail, "ip neigh add " IP4_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip neigh add " IP6_SRC " dev dst lladdr " MAC_DST_FWD);
+ }
+
+ close_netns(nstoken);
+
+ return 0;
+fail:
+ if (nstoken)
+ close_netns(nstoken);
+ return -1;
+}
+
+static int qdisc_clsact_create(struct bpf_tc_hook *qdisc_hook, int ifindex)
+{
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->ifindex = ifindex;
+ qdisc_hook->attach_point = BPF_TC_INGRESS | BPF_TC_EGRESS;
+ err = bpf_tc_hook_create(qdisc_hook);
+ snprintf(err_str, sizeof(err_str),
+ "qdisc add dev %s clsact",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>");
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+static int xgress_filter_add(struct bpf_tc_hook *qdisc_hook,
+ enum bpf_tc_attach_point xgress,
+ const struct bpf_program *prog, int priority)
+{
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach);
+ char err_str[128], ifname[16];
+ int err;
+
+ qdisc_hook->attach_point = xgress;
+ tc_attach.prog_fd = bpf_program__fd(prog);
+ tc_attach.priority = priority;
+ err = bpf_tc_attach(qdisc_hook, &tc_attach);
+ snprintf(err_str, sizeof(err_str),
+ "filter add dev %s %s prio %d bpf da %s",
+ if_indextoname(qdisc_hook->ifindex, ifname) ? : "<unknown_iface>",
+ xgress == BPF_TC_INGRESS ? "ingress" : "egress",
+ priority, bpf_program__name(prog));
+ err_str[sizeof(err_str) - 1] = 0;
+ ASSERT_OK(err, err_str);
+
+ return err;
+}
+
+#define QDISC_CLSACT_CREATE(qdisc_hook, ifindex) ({ \
+ if ((err = qdisc_clsact_create(qdisc_hook, ifindex))) \
+ goto fail; \
+})
+
+#define XGRESS_FILTER_ADD(qdisc_hook, xgress, prog, priority) ({ \
+ if ((err = xgress_filter_add(qdisc_hook, xgress, prog, priority))) \
+ goto fail; \
+})
+
+static int netns_load_bpf(const struct bpf_program *src_prog,
+ const struct bpf_program *dst_prog,
+ const struct bpf_program *chk_prog,
+ const struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ int err;
+
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress bpf da src_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS, src_prog, 0);
+ /* tc filter add dev src_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da dst_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, dst_prog, 0);
+ /* tc filter add dev dst_fwd egress bpf da chk_prog */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, chk_prog, 0);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_tcp(int family, const char *addr, __u16 port)
+{
+ int listen_fd = -1, accept_fd = -1, client_fd = -1;
+ char buf[] = "testing testing";
+ int n;
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+
+ listen_fd = start_server(family, SOCK_STREAM, addr, port, 0);
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ goto done;
+
+ close_netns(nstoken);
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ if (!ASSERT_OK(settimeo(accept_fd, TIMEOUT_MILLIS), "settimeo"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+
+ n = read(accept_fd, buf, sizeof(buf));
+ ASSERT_EQ(n, sizeof(buf), "recv from server");
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ if (listen_fd >= 0)
+ close(listen_fd);
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+}
+
+static int test_ping(int family, const char *addr)
+{
+ SYS(fail, "ip netns exec " NS_SRC " %s " PING_ARGS " %s > /dev/null", ping_command(family), addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static void test_connectivity(void)
+{
+ test_tcp(AF_INET, IP4_DST, IP4_PORT);
+ test_ping(AF_INET, IP4_DST);
+ test_tcp(AF_INET6, IP6_DST, IP6_PORT);
+ test_ping(AF_INET6, IP6_DST);
+}
+
+static int set_forwarding(bool enable)
+{
+ int err;
+
+ err = write_file("/proc/sys/net/ipv4/ip_forward", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv4.ip_forward=0"))
+ return err;
+
+ err = write_file("/proc/sys/net/ipv6/conf/all/forwarding", enable ? "1" : "0");
+ if (!ASSERT_OK(err, "set ipv6.forwarding=0"))
+ return err;
+
+ return 0;
+}
+
+static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp)
+{
+ struct __kernel_timespec pkt_ts = {};
+ char ctl[CMSG_SPACE(sizeof(pkt_ts))];
+ struct timespec now_ts;
+ struct msghdr msg = {};
+ __u64 now_ns, pkt_ns;
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ char data[32];
+ int ret;
+
+ iov.iov_base = data;
+ iov.iov_len = sizeof(data);
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &ctl;
+ msg.msg_controllen = sizeof(ctl);
+
+ ret = recvmsg(fd, &msg, 0);
+ if (!ASSERT_EQ(ret, s, "recvmsg"))
+ return -1;
+ ASSERT_STRNEQ(data, expected, s, "expected rcv data");
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg && cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMPNS_NEW)
+ memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts));
+
+ pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec;
+ if (tstamp) {
+ /* caller will check the tstamp itself */
+ *tstamp = pkt_ns;
+ return 0;
+ }
+
+ ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp");
+
+ ret = clock_gettime(CLOCK_REALTIME, &now_ts);
+ ASSERT_OK(ret, "clock_gettime");
+ now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+ if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp"))
+ ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC,
+ "check rcv tstamp");
+ return 0;
+}
+
+static void rcv_tstamp(int fd, const char *expected, size_t s)
+{
+ __rcv_tstamp(fd, expected, s, NULL);
+}
+
+static int wait_netstamp_needed_key(void)
+{
+ int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+ __u64 tstamp = 0;
+
+ nstoken = open_netns(NS_DST);
+ if (!nstoken)
+ return -1;
+
+ srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0);
+ if (!ASSERT_GE(srv_fd, 0, "start_server"))
+ goto done;
+
+ err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS);
+ if (!ASSERT_GE(cli_fd, 0, "connect_to_fd"))
+ goto done;
+
+again:
+ n = write(cli_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp);
+ if (!ASSERT_OK(err, "__rcv_tstamp"))
+ goto done;
+ if (!tstamp && nretries++ < 5) {
+ sleep(1);
+ printf("netstamp_needed_key retry#%d\n", nretries);
+ goto again;
+ }
+
+done:
+ if (!tstamp && srv_fd != -1) {
+ close(srv_fd);
+ srv_fd = -1;
+ }
+ if (cli_fd != -1)
+ close(cli_fd);
+ close_netns(nstoken);
+ return srv_fd;
+}
+
+static void snd_tstamp(int fd, char *b, size_t s)
+{
+ struct sock_txtime opt = { .clockid = CLOCK_TAI };
+ char ctl[CMSG_SPACE(sizeof(__u64))];
+ struct timespec now_ts;
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ __u64 now_ns;
+ int ret;
+
+ ret = clock_gettime(CLOCK_TAI, &now_ts);
+ ASSERT_OK(ret, "clock_get_time(CLOCK_TAI)");
+ now_ns = now_ts.tv_sec * NSEC_PER_SEC + now_ts.tv_nsec;
+
+ iov.iov_base = b;
+ iov.iov_len = s;
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = &ctl;
+ msg.msg_controllen = sizeof(ctl);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(now_ns));
+ *(__u64 *)CMSG_DATA(cmsg) = now_ns;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_TXTIME, &opt, sizeof(opt));
+ ASSERT_OK(ret, "setsockopt(SO_TXTIME)");
+
+ ret = sendmsg(fd, &msg, 0);
+ ASSERT_EQ(ret, s, "sendmsg");
+}
+
+static void test_inet_dtime(int family, int type, const char *addr, __u16 port)
+{
+ int opt = 1, accept_fd = -1, client_fd = -1, listen_fd, err;
+ char buf[] = "testing testing";
+ struct nstoken *nstoken;
+
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns dst"))
+ return;
+ listen_fd = start_server(family, type, addr, port, 0);
+ close_netns(nstoken);
+
+ if (!ASSERT_GE(listen_fd, 0, "listen"))
+ return;
+
+ /* Ensure the kernel puts the (rcv) timestamp for all skb */
+ err = setsockopt(listen_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW,
+ &opt, sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)"))
+ goto done;
+
+ if (type == SOCK_STREAM) {
+ /* Ensure the kernel set EDT when sending out rst/ack
+ * from the kernel's ctl_sk.
+ */
+ err = setsockopt(listen_fd, SOL_TCP, TCP_TX_DELAY, &opt,
+ sizeof(opt));
+ if (!ASSERT_OK(err, "setsockopt(TCP_TX_DELAY)"))
+ goto done;
+ }
+
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ client_fd = connect_to_fd(listen_fd, TIMEOUT_MILLIS);
+ close_netns(nstoken);
+
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto done;
+
+ if (type == SOCK_STREAM) {
+ int n;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto done;
+
+ n = write(client_fd, buf, sizeof(buf));
+ if (!ASSERT_EQ(n, sizeof(buf), "send to server"))
+ goto done;
+ rcv_tstamp(accept_fd, buf, sizeof(buf));
+ } else {
+ snd_tstamp(client_fd, buf, sizeof(buf));
+ rcv_tstamp(listen_fd, buf, sizeof(buf));
+ }
+
+done:
+ close(listen_fd);
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (client_fd != -1)
+ close(client_fd);
+}
+
+static int netns_load_dtime_bpf(struct test_tc_dtime *skel,
+ const struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_src);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst);
+ struct nstoken *nstoken;
+ int err;
+
+ /* setup ns_src tc progs */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return -1;
+ /* tc qdisc add dev src clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src, setup_result->ifindex_src);
+ /* tc filter add dev src ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev src egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_src, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ close_netns(nstoken);
+
+ /* setup ns_dst tc progs */
+ nstoken = open_netns(NS_DST);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_DST))
+ return -1;
+ /* tc qdisc add dev dst clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst, setup_result->ifindex_dst);
+ /* tc filter add dev dst ingress bpf da ingress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_INGRESS, skel->progs.ingress_host, 0);
+ /* tc filter add dev dst egress bpf da egress_host */
+ XGRESS_FILTER_ADD(&qdisc_dst, BPF_TC_EGRESS, skel->progs.egress_host, 0);
+ close_netns(nstoken);
+
+ /* setup ns_fwd tc progs */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ return -1;
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev dst_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev dst_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev dst_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
+
+ /* tc qdisc add dev src_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_src_fwd, setup_result->ifindex_src_fwd);
+ /* tc filter add dev src_fwd ingress prio 100 bpf da ingress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio100, 100);
+ /* tc filter add dev src_fwd ingress prio 101 bpf da ingress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_INGRESS,
+ skel->progs.ingress_fwdns_prio101, 101);
+ /* tc filter add dev src_fwd egress prio 100 bpf da egress_fwdns_prio100 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio100, 100);
+ /* tc filter add dev src_fwd egress prio 101 bpf da egress_fwdns_prio101 */
+ XGRESS_FILTER_ADD(&qdisc_src_fwd, BPF_TC_EGRESS,
+ skel->progs.egress_fwdns_prio101, 101);
+ close_netns(nstoken);
+ return 0;
+
+fail:
+ close_netns(nstoken);
+ return err;
+}
+
+enum {
+ INGRESS_FWDNS_P100,
+ INGRESS_FWDNS_P101,
+ EGRESS_FWDNS_P100,
+ EGRESS_FWDNS_P101,
+ INGRESS_ENDHOST,
+ EGRESS_ENDHOST,
+ SET_DTIME,
+ __MAX_CNT,
+};
+
+const char *cnt_names[] = {
+ "ingress_fwdns_p100",
+ "ingress_fwdns_p101",
+ "egress_fwdns_p100",
+ "egress_fwdns_p101",
+ "ingress_endhost",
+ "egress_endhost",
+ "set_dtime",
+};
+
+enum {
+ TCP_IP6_CLEAR_DTIME,
+ TCP_IP4,
+ TCP_IP6,
+ UDP_IP4,
+ UDP_IP6,
+ TCP_IP4_RT_FWD,
+ TCP_IP6_RT_FWD,
+ UDP_IP4_RT_FWD,
+ UDP_IP6_RT_FWD,
+ UKN_TEST,
+ __NR_TESTS,
+};
+
+const char *test_names[] = {
+ "tcp ip6 clear dtime",
+ "tcp ip4",
+ "tcp ip6",
+ "udp ip4",
+ "udp ip6",
+ "tcp ip4 rt fwd",
+ "tcp ip6 rt fwd",
+ "udp ip4 rt fwd",
+ "udp ip6 rt fwd",
+};
+
+static const char *dtime_cnt_str(int test, int cnt)
+{
+ static char name[64];
+
+ snprintf(name, sizeof(name), "%s %s", test_names[test], cnt_names[cnt]);
+
+ return name;
+}
+
+static const char *dtime_err_str(int test, int cnt)
+{
+ static char name[64];
+
+ snprintf(name, sizeof(name), "%s %s errs", test_names[test],
+ cnt_names[cnt]);
+
+ return name;
+}
+
+static void test_tcp_clear_dtime(struct test_tc_dtime *skel)
+{
+ int i, t = TCP_IP6_CLEAR_DTIME;
+ __u32 *dtimes = skel->bss->dtimes[t];
+ __u32 *errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(AF_INET6, SOCK_STREAM, IP6_DST, 50000 + t);
+
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P101));
+ ASSERT_GT(dtimes[EGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, EGRESS_FWDNS_P100));
+ ASSERT_EQ(dtimes[EGRESS_FWDNS_P101], 0,
+ dtime_cnt_str(t, EGRESS_FWDNS_P101));
+ ASSERT_GT(dtimes[EGRESS_ENDHOST], 0,
+ dtime_cnt_str(t, EGRESS_ENDHOST));
+ ASSERT_GT(dtimes[INGRESS_ENDHOST], 0,
+ dtime_cnt_str(t, INGRESS_ENDHOST));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tcp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+ __u32 *dtimes, *errs;
+ const char *addr;
+ int i, t;
+
+ if (family == AF_INET) {
+ t = bpf_fwd ? TCP_IP4 : TCP_IP4_RT_FWD;
+ addr = IP4_DST;
+ } else {
+ t = bpf_fwd ? TCP_IP6 : TCP_IP6_RT_FWD;
+ addr = IP6_DST;
+ }
+
+ dtimes = skel->bss->dtimes[t];
+ errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(family, SOCK_STREAM, addr, 50000 + t);
+
+ /* fwdns_prio100 prog does not read delivery_time_type, so
+ * kernel puts the (rcv) timetamp in __sk_buff->tstamp
+ */
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ for (i = INGRESS_FWDNS_P101; i < SET_DTIME; i++)
+ ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_udp_dtime(struct test_tc_dtime *skel, int family, bool bpf_fwd)
+{
+ __u32 *dtimes, *errs;
+ const char *addr;
+ int i, t;
+
+ if (family == AF_INET) {
+ t = bpf_fwd ? UDP_IP4 : UDP_IP4_RT_FWD;
+ addr = IP4_DST;
+ } else {
+ t = bpf_fwd ? UDP_IP6 : UDP_IP6_RT_FWD;
+ addr = IP6_DST;
+ }
+
+ dtimes = skel->bss->dtimes[t];
+ errs = skel->bss->errs[t];
+
+ skel->bss->test = t;
+ test_inet_dtime(family, SOCK_DGRAM, addr, 50000 + t);
+
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P100], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P100));
+ /* non mono delivery time is not forwarded */
+ ASSERT_EQ(dtimes[INGRESS_FWDNS_P101], 0,
+ dtime_cnt_str(t, INGRESS_FWDNS_P101));
+ for (i = EGRESS_FWDNS_P100; i < SET_DTIME; i++)
+ ASSERT_GT(dtimes[i], 0, dtime_cnt_str(t, i));
+
+ for (i = INGRESS_FWDNS_P100; i < __MAX_CNT; i++)
+ ASSERT_EQ(errs[i], 0, dtime_err_str(t, i));
+}
+
+static void test_tc_redirect_dtime(struct netns_setup_result *setup_result)
+{
+ struct test_tc_dtime *skel;
+ struct nstoken *nstoken;
+ int hold_tstamp_fd, err;
+
+ /* Hold a sk with the SOCK_TIMESTAMP set to ensure there
+ * is no delay in the kernel net_enable_timestamp().
+ * This ensures the following tests must have
+ * non zero rcv tstamp in the recvmsg().
+ */
+ hold_tstamp_fd = wait_netstamp_needed_key();
+ if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key"))
+ return;
+
+ skel = test_tc_dtime__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_dtime__load(skel);
+ if (!ASSERT_OK(err, "test_tc_dtime__load"))
+ goto done;
+
+ if (netns_load_dtime_bpf(skel, setup_result))
+ goto done;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto done;
+ err = set_forwarding(false);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "disable forwarding"))
+ goto done;
+
+ test_tcp_clear_dtime(skel);
+
+ test_tcp_dtime(skel, AF_INET, true);
+ test_tcp_dtime(skel, AF_INET6, true);
+ test_udp_dtime(skel, AF_INET, true);
+ test_udp_dtime(skel, AF_INET6, true);
+
+ /* Test the kernel ip[6]_forward path instead
+ * of bpf_redirect_neigh().
+ */
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ goto done;
+ err = set_forwarding(true);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "enable forwarding"))
+ goto done;
+
+ test_tcp_dtime(skel, AF_INET, false);
+ test_tcp_dtime(skel, AF_INET6, false);
+ test_udp_dtime(skel, AF_INET, false);
+ test_udp_dtime(skel, AF_INET6, false);
+
+done:
+ test_tc_dtime__destroy(skel);
+ close(hold_tstamp_fd);
+}
+
+static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh_fib *skel = NULL;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh_fib__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh_fib__open"))
+ goto done;
+
+ if (!ASSERT_OK(test_tc_neigh_fib__load(skel), "test_tc_neigh_fib__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ /* bpf_fib_lookup() checks if forwarding is enabled */
+ if (!ASSERT_OK(set_forwarding(true), "enable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh_fib__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_neigh(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken = NULL;
+ struct test_tc_neigh *skel = NULL;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_neigh__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_neigh__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_neigh__load(skel);
+ if (!ASSERT_OK(err, "test_tc_neigh__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_neigh__destroy(skel);
+ close_netns(nstoken);
+}
+
+static void test_tc_redirect_peer(struct netns_setup_result *setup_result)
+{
+ struct nstoken *nstoken;
+ struct test_tc_peer *skel;
+ int err;
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns fwd"))
+ return;
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto done;
+
+ skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto done;
+
+ if (netns_load_bpf(skel->progs.tc_src, skel->progs.tc_dst,
+ skel->progs.tc_chk, setup_result))
+ goto done;
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto done;
+
+ test_connectivity();
+
+done:
+ if (skel)
+ test_tc_peer__destroy(skel);
+ close_netns(nstoken);
+}
+
+static int tun_open(char *name)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (!ASSERT_GE(fd, 0, "open /dev/net/tun"))
+ return -1;
+
+ memset(&ifr, 0, sizeof(ifr));
+
+ ifr.ifr_flags = IFF_TUN | IFF_NO_PI;
+ if (*name)
+ strncpy(ifr.ifr_name, name, IFNAMSIZ);
+
+ err = ioctl(fd, TUNSETIFF, &ifr);
+ if (!ASSERT_OK(err, "ioctl TUNSETIFF"))
+ goto fail;
+
+ SYS(fail, "ip link set dev %s up", name);
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+enum {
+ SRC_TO_TARGET = 0,
+ TARGET_TO_SRC = 1,
+};
+
+static int tun_relay_loop(int src_fd, int target_fd)
+{
+ fd_set rfds, wfds;
+
+ FD_ZERO(&rfds);
+ FD_ZERO(&wfds);
+
+ for (;;) {
+ char buf[1500];
+ int direction, nread, nwrite;
+
+ FD_SET(src_fd, &rfds);
+ FD_SET(target_fd, &rfds);
+
+ if (select(1 + MAX(src_fd, target_fd), &rfds, NULL, NULL, NULL) < 0) {
+ log_err("select failed");
+ return 1;
+ }
+
+ direction = FD_ISSET(src_fd, &rfds) ? SRC_TO_TARGET : TARGET_TO_SRC;
+
+ nread = read(direction == SRC_TO_TARGET ? src_fd : target_fd, buf, sizeof(buf));
+ if (nread < 0) {
+ log_err("read failed");
+ return 1;
+ }
+
+ nwrite = write(direction == SRC_TO_TARGET ? target_fd : src_fd, buf, nread);
+ if (nwrite != nread) {
+ log_err("write failed");
+ return 1;
+ }
+ }
+}
+
+static void test_tc_redirect_peer_l3(struct netns_setup_result *setup_result)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_tun_fwd);
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_dst_fwd);
+ struct test_tc_peer *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int err;
+ int tunnel_pid = -1;
+ int src_fd, target_fd = -1;
+ int ifindex;
+
+ /* Start a L3 TUN/TAP tunnel between the src and dst namespaces.
+ * This test is using TUN/TAP instead of e.g. IPIP or GRE tunnel as those
+ * expose the L2 headers encapsulating the IP packet to BPF and hence
+ * don't have skb in suitable state for this test. Alternative to TUN/TAP
+ * would be e.g. Wireguard which would appear as a pure L3 device to BPF,
+ * but that requires much more complicated setup.
+ */
+ nstoken = open_netns(NS_SRC);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_SRC))
+ return;
+
+ src_fd = tun_open("tun_src");
+ if (!ASSERT_GE(src_fd, 0, "tun_open tun_src"))
+ goto fail;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(NS_FWD);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS_FWD))
+ goto fail;
+
+ target_fd = tun_open("tun_fwd");
+ if (!ASSERT_GE(target_fd, 0, "tun_open tun_fwd"))
+ goto fail;
+
+ tunnel_pid = fork();
+ if (!ASSERT_GE(tunnel_pid, 0, "fork tun_relay_loop"))
+ goto fail;
+
+ if (tunnel_pid == 0)
+ exit(tun_relay_loop(src_fd, target_fd));
+
+ skel = test_tc_peer__open();
+ if (!ASSERT_OK_PTR(skel, "test_tc_peer__open"))
+ goto fail;
+
+ ifindex = if_nametoindex("tun_fwd");
+ if (!ASSERT_GT(ifindex, 0, "if_indextoname tun_fwd"))
+ goto fail;
+
+ skel->rodata->IFINDEX_SRC = ifindex;
+ skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd;
+
+ err = test_tc_peer__load(skel);
+ if (!ASSERT_OK(err, "test_tc_peer__load"))
+ goto fail;
+
+ /* Load "tc_src_l3" to the tun_fwd interface to redirect packets
+ * towards dst, and "tc_dst" to redirect packets
+ * and "tc_chk" on dst_fwd to drop non-redirected packets.
+ */
+ /* tc qdisc add dev tun_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_tun_fwd, ifindex);
+ /* tc filter add dev tun_fwd ingress bpf da tc_src_l3 */
+ XGRESS_FILTER_ADD(&qdisc_tun_fwd, BPF_TC_INGRESS, skel->progs.tc_src_l3, 0);
+
+ /* tc qdisc add dev dst_fwd clsact */
+ QDISC_CLSACT_CREATE(&qdisc_dst_fwd, setup_result->ifindex_dst_fwd);
+ /* tc filter add dev dst_fwd ingress bpf da tc_dst_l3 */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_INGRESS, skel->progs.tc_dst_l3, 0);
+ /* tc filter add dev dst_fwd egress bpf da tc_chk */
+ XGRESS_FILTER_ADD(&qdisc_dst_fwd, BPF_TC_EGRESS, skel->progs.tc_chk, 0);
+
+ /* Setup route and neigh tables */
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP4_TUN_SRC "/24");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP4_TUN_FWD "/24");
+
+ SYS(fail, "ip -netns " NS_SRC " addr add dev tun_src " IP6_TUN_SRC "/64 nodad");
+ SYS(fail, "ip -netns " NS_FWD " addr add dev tun_fwd " IP6_TUN_FWD "/64 nodad");
+
+ SYS(fail, "ip -netns " NS_SRC " route del " IP4_DST "/32 dev src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP4_DST "/32 via " IP4_TUN_FWD
+ " dev tun_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP4_TUN_SRC "/32 dev dst scope global");
+ SYS(fail, "ip -netns " NS_SRC " route del " IP6_DST "/128 dev src scope global");
+ SYS(fail, "ip -netns " NS_SRC " route add " IP6_DST "/128 via " IP6_TUN_FWD
+ " dev tun_src scope global");
+ SYS(fail, "ip -netns " NS_DST " route add " IP6_TUN_SRC "/128 dev dst scope global");
+
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP4_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+ SYS(fail, "ip -netns " NS_DST " neigh add " IP6_TUN_SRC " dev dst lladdr " MAC_DST_FWD);
+
+ if (!ASSERT_OK(set_forwarding(false), "disable forwarding"))
+ goto fail;
+
+ test_connectivity();
+
+fail:
+ if (tunnel_pid > 0) {
+ kill(tunnel_pid, SIGTERM);
+ waitpid(tunnel_pid, NULL, 0);
+ }
+ if (src_fd >= 0)
+ close(src_fd);
+ if (target_fd >= 0)
+ close(target_fd);
+ if (skel)
+ test_tc_peer__destroy(skel);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+#define RUN_TEST(name, mode) \
+ ({ \
+ struct netns_setup_result setup_result = { .dev_mode = mode, }; \
+ if (test__start_subtest(#name)) \
+ if (ASSERT_OK(netns_setup_namespaces("add"), "setup namespaces")) { \
+ if (ASSERT_OK(netns_setup_links_and_routes(&setup_result), \
+ "setup links and routes")) \
+ test_ ## name(&setup_result); \
+ netns_setup_namespaces("delete"); \
+ } \
+ })
+
+static void *test_tc_redirect_run_tests(void *arg)
+{
+ netns_setup_namespaces_nofail("delete");
+
+ RUN_TEST(tc_redirect_peer, MODE_VETH);
+ RUN_TEST(tc_redirect_peer, MODE_NETKIT);
+ RUN_TEST(tc_redirect_peer_l3, MODE_VETH);
+ RUN_TEST(tc_redirect_peer_l3, MODE_NETKIT);
+ RUN_TEST(tc_redirect_neigh, MODE_VETH);
+ RUN_TEST(tc_redirect_neigh_fib, MODE_VETH);
+ RUN_TEST(tc_redirect_dtime, MODE_VETH);
+ return NULL;
+}
+
+void test_tc_redirect(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tc_redirect_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
new file mode 100644
index 000000000000..eaf441dc7e79
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c
@@ -0,0 +1,150 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdlib.h>
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_custom_syncookie.skel.h"
+
+static struct test_tcp_custom_syncookie_case {
+ int family, type;
+ char addr[16];
+ char name[10];
+} test_cases[] = {
+ {
+ .name = "IPv4 TCP",
+ .family = AF_INET,
+ .type = SOCK_STREAM,
+ .addr = "127.0.0.1",
+ },
+ {
+ .name = "IPv6 TCP",
+ .family = AF_INET6,
+ .type = SOCK_STREAM,
+ .addr = "::1",
+ },
+};
+
+static int setup_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "ip"))
+ goto err;
+
+ if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"),
+ "write_sysctl"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int setup_tc(struct test_tcp_custom_syncookie *skel)
+{
+ LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_attach,
+ .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie));
+
+ qdisc_lo.ifindex = if_nametoindex("lo");
+ if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact"))
+ goto err;
+
+ if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach),
+ "filter add dev lo ingress"))
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+#define msg "Hello World"
+#define msglen 11
+
+static void transfer_message(int sender, int receiver)
+{
+ char buf[msglen];
+ int ret;
+
+ ret = send(sender, msg, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "send"))
+ return;
+
+ memset(buf, 0, sizeof(buf));
+
+ ret = recv(receiver, buf, msglen, 0);
+ if (!ASSERT_EQ(ret, msglen, "recv"))
+ return;
+
+ ret = strncmp(buf, msg, msglen);
+ if (!ASSERT_EQ(ret, 0, "strncmp"))
+ return;
+}
+
+static void create_connection(struct test_tcp_custom_syncookie_case *test_case)
+{
+ int server, client, child;
+
+ server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0);
+ if (!ASSERT_NEQ(server, -1, "start_server"))
+ return;
+
+ client = connect_to_fd(server, 0);
+ if (!ASSERT_NEQ(client, -1, "connect_to_fd"))
+ goto close_server;
+
+ child = accept(server, NULL, 0);
+ if (!ASSERT_NEQ(child, -1, "accept"))
+ goto close_client;
+
+ transfer_message(client, child);
+ transfer_message(child, client);
+
+ close(child);
+close_client:
+ close(client);
+close_server:
+ close(server);
+}
+
+void test_tcp_custom_syncookie(void)
+{
+ struct test_tcp_custom_syncookie *skel;
+ int i;
+
+ if (setup_netns())
+ return;
+
+ skel = test_tcp_custom_syncookie__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open_and_load"))
+ return;
+
+ if (setup_tc(skel))
+ goto destroy_skel;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ if (!test__start_subtest(test_cases[i].name))
+ continue;
+
+ skel->bss->handled_syn = false;
+ skel->bss->handled_ack = false;
+
+ create_connection(&test_cases[i]);
+
+ ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc.");
+ ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc");
+ }
+
+destroy_skel:
+ system("tc qdisc del dev lo clsact");
+
+ test_tcp_custom_syncookie__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
index 594307dffd13..e070bca2b764 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_estats.c
@@ -3,14 +3,12 @@
void test_tcp_estats(void)
{
- const char *file = "./test_tcp_estats.o";
+ const char *file = "./test_tcp_estats.bpf.o";
int err, prog_fd;
struct bpf_object *obj;
- __u32 duration = 0;
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
- CHECK(err, "", "err %d errno %d\n", err, errno);
- if (err)
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd);
+ if (!ASSERT_OK(err, ""))
return;
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
new file mode 100644
index 000000000000..56685fc03c7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_hdr_options.c
@@ -0,0 +1,563 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/compiler.h>
+
+#include "test_progs.h"
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+#include "test_tcp_hdr_options.h"
+#include "test_tcp_hdr_options.skel.h"
+#include "test_misc_tcp_hdr_options.skel.h"
+
+#define LO_ADDR6 "::1"
+#define CG_NAME "/tcpbpf-hdr-opt-test"
+
+static struct bpf_test_option exp_passive_estab_in;
+static struct bpf_test_option exp_active_estab_in;
+static struct bpf_test_option exp_passive_fin_in;
+static struct bpf_test_option exp_active_fin_in;
+static struct hdr_stg exp_passive_hdr_stg;
+static struct hdr_stg exp_active_hdr_stg = { .active = true, };
+
+static struct test_misc_tcp_hdr_options *misc_skel;
+static struct test_tcp_hdr_options *skel;
+static int lport_linum_map_fd;
+static int hdr_stg_map_fd;
+static __u32 duration;
+static int cg_fd;
+
+struct sk_fds {
+ int srv_fd;
+ int passive_fd;
+ int active_fd;
+ int passive_lport;
+ int active_lport;
+};
+
+static int create_netns(void)
+{
+ if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns"))
+ return -1;
+
+ if (!ASSERT_OK(system("ip link set dev lo up"), "run ip cmd"))
+ return -1;
+
+ return 0;
+}
+
+static void print_hdr_stg(const struct hdr_stg *hdr_stg, const char *prefix)
+{
+ fprintf(stderr, "%s{active:%u, resend_syn:%u, syncookie:%u, fastopen:%u}\n",
+ prefix ? : "", hdr_stg->active, hdr_stg->resend_syn,
+ hdr_stg->syncookie, hdr_stg->fastopen);
+}
+
+static void print_option(const struct bpf_test_option *opt, const char *prefix)
+{
+ fprintf(stderr, "%s{flags:0x%x, max_delack_ms:%u, rand:0x%x}\n",
+ prefix ? : "", opt->flags, opt->max_delack_ms, opt->rand);
+}
+
+static void sk_fds_close(struct sk_fds *sk_fds)
+{
+ close(sk_fds->srv_fd);
+ close(sk_fds->passive_fd);
+ close(sk_fds->active_fd);
+}
+
+static int sk_fds_shutdown(struct sk_fds *sk_fds)
+{
+ int ret, abyte;
+
+ shutdown(sk_fds->active_fd, SHUT_WR);
+ ret = read(sk_fds->passive_fd, &abyte, sizeof(abyte));
+ if (!ASSERT_EQ(ret, 0, "read-after-shutdown(passive_fd):"))
+ return -1;
+
+ shutdown(sk_fds->passive_fd, SHUT_WR);
+ ret = read(sk_fds->active_fd, &abyte, sizeof(abyte));
+ if (!ASSERT_EQ(ret, 0, "read-after-shutdown(active_fd):"))
+ return -1;
+
+ return 0;
+}
+
+static int sk_fds_connect(struct sk_fds *sk_fds, bool fast_open)
+{
+ const char fast[] = "FAST!!!";
+ struct sockaddr_in6 addr6;
+ socklen_t len;
+
+ sk_fds->srv_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (!ASSERT_NEQ(sk_fds->srv_fd, -1, "start_server"))
+ goto error;
+
+ if (fast_open)
+ sk_fds->active_fd = fastopen_connect(sk_fds->srv_fd, fast,
+ sizeof(fast), 0);
+ else
+ sk_fds->active_fd = connect_to_fd(sk_fds->srv_fd, 0);
+
+ if (!ASSERT_NEQ(sk_fds->active_fd, -1, "")) {
+ close(sk_fds->srv_fd);
+ goto error;
+ }
+
+ len = sizeof(addr6);
+ if (!ASSERT_OK(getsockname(sk_fds->srv_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(srv_fd)"))
+ goto error_close;
+ sk_fds->passive_lport = ntohs(addr6.sin6_port);
+
+ len = sizeof(addr6);
+ if (!ASSERT_OK(getsockname(sk_fds->active_fd, (struct sockaddr *)&addr6,
+ &len), "getsockname(active_fd)"))
+ goto error_close;
+ sk_fds->active_lport = ntohs(addr6.sin6_port);
+
+ sk_fds->passive_fd = accept(sk_fds->srv_fd, NULL, 0);
+ if (!ASSERT_NEQ(sk_fds->passive_fd, -1, "accept(srv_fd)"))
+ goto error_close;
+
+ if (fast_open) {
+ char bytes_in[sizeof(fast)];
+ int ret;
+
+ ret = read(sk_fds->passive_fd, bytes_in, sizeof(bytes_in));
+ if (!ASSERT_EQ(ret, sizeof(fast), "read fastopen syn data")) {
+ close(sk_fds->passive_fd);
+ goto error_close;
+ }
+ }
+
+ return 0;
+
+error_close:
+ close(sk_fds->active_fd);
+ close(sk_fds->srv_fd);
+
+error:
+ memset(sk_fds, -1, sizeof(*sk_fds));
+ return -1;
+}
+
+static int check_hdr_opt(const struct bpf_test_option *exp,
+ const struct bpf_test_option *act,
+ const char *hdr_desc)
+{
+ if (!ASSERT_EQ(memcmp(exp, act, sizeof(*exp)), 0, hdr_desc)) {
+ print_option(exp, "expected: ");
+ print_option(act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_hdr_stg(const struct hdr_stg *exp, int fd,
+ const char *stg_desc)
+{
+ struct hdr_stg act;
+
+ if (!ASSERT_OK(bpf_map_lookup_elem(hdr_stg_map_fd, &fd, &act),
+ "map_lookup(hdr_stg_map_fd)"))
+ return -1;
+
+ if (!ASSERT_EQ(memcmp(exp, &act, sizeof(*exp)), 0, stg_desc)) {
+ print_hdr_stg(exp, "expected: ");
+ print_hdr_stg(&act, " actual: ");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int check_error_linum(const struct sk_fds *sk_fds)
+{
+ unsigned int nr_errors = 0;
+ struct linum_err linum_err;
+ int lport;
+
+ lport = sk_fds->passive_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:passive(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ lport = sk_fds->active_lport;
+ if (!bpf_map_lookup_elem(lport_linum_map_fd, &lport, &linum_err)) {
+ fprintf(stderr,
+ "bpf prog error out at lport:active(%d), linum:%u err:%d\n",
+ lport, linum_err.linum, linum_err.err);
+ nr_errors++;
+ }
+
+ return nr_errors;
+}
+
+static void check_hdr_and_close_fds(struct sk_fds *sk_fds)
+{
+ const __u32 expected_inherit_cb_flags =
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_STATE_CB_FLAG;
+
+ if (sk_fds_shutdown(sk_fds))
+ goto check_linum;
+
+ if (!ASSERT_EQ(expected_inherit_cb_flags, skel->bss->inherit_cb_flags,
+ "inherit_cb_flags"))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_passive_hdr_stg, sk_fds->passive_fd,
+ "passive_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_stg(&exp_active_hdr_stg, sk_fds->active_fd,
+ "active_hdr_stg"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_estab_in, &skel->bss->passive_estab_in,
+ "passive_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_active_estab_in, &skel->bss->active_estab_in,
+ "active_estab_in"))
+ goto check_linum;
+
+ if (check_hdr_opt(&exp_passive_fin_in, &skel->bss->passive_fin_in,
+ "passive_fin_in"))
+ goto check_linum;
+
+ check_hdr_opt(&exp_active_fin_in, &skel->bss->active_fin_in,
+ "active_fin_in");
+
+check_linum:
+ ASSERT_FALSE(check_error_linum(sk_fds), "check_error_linum");
+ sk_fds_close(sk_fds);
+}
+
+static void prepare_out(void)
+{
+ skel->bss->active_syn_out = exp_passive_estab_in;
+ skel->bss->passive_synack_out = exp_active_estab_in;
+
+ skel->bss->active_fin_out = exp_passive_fin_in;
+ skel->bss->passive_fin_out = exp_active_fin_in;
+}
+
+static void reset_test(void)
+{
+ size_t optsize = sizeof(struct bpf_test_option);
+ int lport, err;
+
+ memset(&skel->bss->passive_synack_out, 0, optsize);
+ memset(&skel->bss->passive_fin_out, 0, optsize);
+
+ memset(&skel->bss->passive_estab_in, 0, optsize);
+ memset(&skel->bss->passive_fin_in, 0, optsize);
+
+ memset(&skel->bss->active_syn_out, 0, optsize);
+ memset(&skel->bss->active_fin_out, 0, optsize);
+
+ memset(&skel->bss->active_estab_in, 0, optsize);
+ memset(&skel->bss->active_fin_in, 0, optsize);
+
+ skel->bss->inherit_cb_flags = 0;
+
+ skel->data->test_kind = TCPOPT_EXP;
+ skel->data->test_magic = 0xeB9F;
+
+ memset(&exp_passive_estab_in, 0, optsize);
+ memset(&exp_active_estab_in, 0, optsize);
+ memset(&exp_passive_fin_in, 0, optsize);
+ memset(&exp_active_fin_in, 0, optsize);
+
+ memset(&exp_passive_hdr_stg, 0, sizeof(exp_passive_hdr_stg));
+ memset(&exp_active_hdr_stg, 0, sizeof(exp_active_hdr_stg));
+ exp_active_hdr_stg.active = true;
+
+ err = bpf_map_get_next_key(lport_linum_map_fd, NULL, &lport);
+ while (!err) {
+ bpf_map_delete_elem(lport_linum_map_fd, &lport);
+ err = bpf_map_get_next_key(lport_linum_map_fd, &lport, &lport);
+ }
+}
+
+static void fastopen_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.fastopen = true;
+
+ prepare_out();
+
+ /* Allow fastopen without fastopen cookie */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_fastopen", "1543"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
+ return;
+
+ if (sk_fds_connect(&sk_fds, true)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void syncookie_estab(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS |
+ OPTION_F_RESEND;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ exp_passive_hdr_stg.syncookie = true;
+ exp_active_hdr_stg.resend_syn = true;
+
+ prepare_out();
+
+ /* Clear the RESEND to ensure the bpf prog can learn
+ * want_cookie and set the RESEND by itself.
+ */
+ skel->bss->passive_synack_out.flags &= ~OPTION_F_RESEND;
+
+ /* Enforce syncookie mode */
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "2"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void fin(void)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_fin_in.flags = OPTION_F_RAND;
+ exp_passive_fin_in.rand = 0xfa;
+
+ exp_active_fin_in.flags = OPTION_F_RAND;
+ exp_active_fin_in.rand = 0xce;
+
+ prepare_out();
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void __simple_estab(bool exprm)
+{
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+
+ hdr_stg_map_fd = bpf_map__fd(skel->maps.hdr_stg_map);
+ lport_linum_map_fd = bpf_map__fd(skel->maps.lport_linum_map);
+
+ exp_passive_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_passive_estab_in.rand = 0xfa;
+ exp_passive_estab_in.max_delack_ms = 11;
+
+ exp_active_estab_in.flags = OPTION_F_RAND | OPTION_F_MAX_DELACK_MS;
+ exp_active_estab_in.rand = 0xce;
+ exp_active_estab_in.max_delack_ms = 22;
+
+ prepare_out();
+
+ if (!exprm) {
+ skel->data->test_kind = 0xB9;
+ skel->data->test_magic = 0;
+ }
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(skel->progs.estab, cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(estab)"))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ check_hdr_and_close_fds(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+static void no_exprm_estab(void)
+{
+ __simple_estab(false);
+}
+
+static void simple_estab(void)
+{
+ __simple_estab(true);
+}
+
+static void misc(void)
+{
+ const char send_msg[] = "MISC!!!";
+ char recv_msg[sizeof(send_msg)];
+ const unsigned int nr_data = 2;
+ struct bpf_link *link;
+ struct sk_fds sk_fds;
+ int i, ret;
+
+ lport_linum_map_fd = bpf_map__fd(misc_skel->maps.lport_linum_map);
+
+ if (write_sysctl("/proc/sys/net/ipv4/tcp_syncookies", "1"))
+ return;
+
+ link = bpf_program__attach_cgroup(misc_skel->progs.misc_estab, cg_fd);
+ if (!ASSERT_OK_PTR(link, "attach_cgroup(misc_estab)"))
+ return;
+
+ if (sk_fds_connect(&sk_fds, false)) {
+ bpf_link__destroy(link);
+ return;
+ }
+
+ for (i = 0; i < nr_data; i++) {
+ /* MSG_EOR to ensure skb will not be combined */
+ ret = send(sk_fds.active_fd, send_msg, sizeof(send_msg),
+ MSG_EOR);
+ if (!ASSERT_EQ(ret, sizeof(send_msg), "send(msg)"))
+ goto check_linum;
+
+ ret = read(sk_fds.passive_fd, recv_msg, sizeof(recv_msg));
+ if (!ASSERT_EQ(ret, sizeof(send_msg), "read(msg)"))
+ goto check_linum;
+ }
+
+ if (sk_fds_shutdown(&sk_fds))
+ goto check_linum;
+
+ ASSERT_EQ(misc_skel->bss->nr_syn, 1, "unexpected nr_syn");
+
+ ASSERT_EQ(misc_skel->bss->nr_data, nr_data, "unexpected nr_data");
+
+ /* The last ACK may have been delayed, so it is either 1 or 2. */
+ CHECK(misc_skel->bss->nr_pure_ack != 1 &&
+ misc_skel->bss->nr_pure_ack != 2,
+ "unexpected nr_pure_ack",
+ "expected (1 or 2) != actual (%u)\n",
+ misc_skel->bss->nr_pure_ack);
+
+ ASSERT_EQ(misc_skel->bss->nr_fin, 1, "unexpected nr_fin");
+
+ ASSERT_EQ(misc_skel->bss->nr_hwtstamp, 0, "nr_hwtstamp");
+
+check_linum:
+ ASSERT_FALSE(check_error_linum(&sk_fds), "check_error_linum");
+ sk_fds_close(&sk_fds);
+ bpf_link__destroy(link);
+}
+
+struct test {
+ const char *desc;
+ void (*run)(void);
+};
+
+#define DEF_TEST(name) { #name, name }
+static struct test tests[] = {
+ DEF_TEST(simple_estab),
+ DEF_TEST(no_exprm_estab),
+ DEF_TEST(syncookie_estab),
+ DEF_TEST(fastopen_estab),
+ DEF_TEST(fin),
+ DEF_TEST(misc),
+};
+
+void test_tcp_hdr_options(void)
+{
+ int i;
+
+ skel = test_tcp_hdr_options__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
+ return;
+
+ misc_skel = test_misc_tcp_hdr_options__open_and_load();
+ if (!ASSERT_OK_PTR(misc_skel, "open and load misc test skel"))
+ goto skel_destroy;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "join_cgroup"))
+ goto skel_destroy;
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (!test__start_subtest(tests[i].desc))
+ continue;
+
+ if (create_netns())
+ break;
+
+ tests[i].run();
+
+ reset_test();
+ }
+
+ close(cg_fd);
+skel_destroy:
+ test_misc_tcp_hdr_options__destroy(misc_skel);
+ test_tcp_hdr_options__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index 9013a0c01eed..8fe84da1b9b4 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -2,6 +2,7 @@
#include <test_progs.h>
#include "cgroup_helpers.h"
#include "network_helpers.h"
+#include "tcp_rtt.skel.h"
struct tcp_rtt_storage {
__u32 invoked;
@@ -15,8 +16,7 @@ static void send_byte(int fd)
{
char b = 0x55;
- if (CHECK_FAIL(write(fd, &b, sizeof(b)) != 1))
- perror("Failed to send single byte");
+ ASSERT_EQ(write(fd, &b, sizeof(b)), 1, "send single byte");
}
static int wait_for_ack(int fd, int retries)
@@ -50,10 +50,8 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
int err = 0;
struct tcp_rtt_storage val;
- if (CHECK_FAIL(bpf_map_lookup_elem(map_fd, &client_fd, &val) < 0)) {
- perror("Failed to read socket storage");
+ if (!ASSERT_GE(bpf_map_lookup_elem(map_fd, &client_fd, &val), 0, "read socket storage"))
return -1;
- }
if (val.invoked != invoked) {
log_err("%s: unexpected bpf_tcp_sock.invoked %d != %d",
@@ -91,26 +89,18 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
static int run_test(int cgroup_fd, int server_fd)
{
- struct bpf_prog_load_attr attr = {
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
- .file = "./tcp_rtt.o",
- .expected_attach_type = BPF_CGROUP_SOCK_OPS,
- };
- struct bpf_object *obj;
- struct bpf_map *map;
+ struct tcp_rtt *skel;
int client_fd;
int prog_fd;
int map_fd;
int err;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
- if (err) {
- log_err("Failed to load BPF object");
+ skel = tcp_rtt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_load"))
return -1;
- }
- map = bpf_map__next(NULL, obj);
- map_fd = bpf_map__fd(map);
+ map_fd = bpf_map__fd(skel->maps.socket_storage_map);
+ prog_fd = bpf_program__fd(skel->progs._sockops);
err = bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_SOCK_OPS, 0);
if (err) {
@@ -118,7 +108,7 @@ static int run_test(int cgroup_fd, int server_fd)
goto close_bpf_object;
}
- client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
+ client_fd = connect_to_fd(server_fd, 0);
if (client_fd < 0) {
err = -1;
goto close_bpf_object;
@@ -149,7 +139,7 @@ close_client_fd:
close(client_fd);
close_bpf_object:
- bpf_object__close(obj);
+ tcp_rtt__destroy(skel);
return err;
}
@@ -158,14 +148,14 @@ void test_tcp_rtt(void)
int server_fd, cgroup_fd;
cgroup_fd = test__join_cgroup("/tcp_rtt");
- if (CHECK_FAIL(cgroup_fd < 0))
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /tcp_rtt"))
return;
- server_fd = start_server(AF_INET, SOCK_STREAM);
- if (CHECK_FAIL(server_fd < 0))
+ server_fd = start_server(AF_INET, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
goto close_cgroup_fd;
- CHECK_FAIL(run_test(cgroup_fd, server_fd));
+ ASSERT_OK(run_test(cgroup_fd, server_fd), "run_test");
close(server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
new file mode 100644
index 000000000000..7e8fe1bad03f
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tcpbpf_user.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_tcpbpf.h"
+#include "test_tcpbpf_kern.skel.h"
+
+#define LO_ADDR6 "::1"
+#define CG_NAME "/tcpbpf-user-test"
+
+static void verify_result(struct tcpbpf_globals *result)
+{
+ __u32 expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
+ (1 << BPF_SOCK_OPS_RWND_INIT) |
+ (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
+ (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
+ (1 << BPF_SOCK_OPS_NEEDS_ECN) |
+ (1 << BPF_SOCK_OPS_STATE_CB) |
+ (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
+
+ /* check global map */
+ ASSERT_EQ(expected_events, result->event_map, "event_map");
+
+ ASSERT_EQ(result->bytes_received, 501, "bytes_received");
+ ASSERT_EQ(result->bytes_acked, 1002, "bytes_acked");
+ ASSERT_EQ(result->data_segs_in, 1, "data_segs_in");
+ ASSERT_EQ(result->data_segs_out, 1, "data_segs_out");
+ ASSERT_EQ(result->bad_cb_test_rv, 0x80, "bad_cb_test_rv");
+ ASSERT_EQ(result->good_cb_test_rv, 0, "good_cb_test_rv");
+ ASSERT_EQ(result->num_listen, 1, "num_listen");
+
+ /* 3 comes from one listening socket + both ends of the connection */
+ ASSERT_EQ(result->num_close_events, 3, "num_close_events");
+
+ /* check setsockopt for SAVE_SYN */
+ ASSERT_EQ(result->tcp_save_syn, 0, "tcp_save_syn");
+
+ /* check getsockopt for SAVED_SYN */
+ ASSERT_EQ(result->tcp_saved_syn, 1, "tcp_saved_syn");
+
+ /* check getsockopt for window_clamp */
+ ASSERT_EQ(result->window_clamp_client, 9216, "window_clamp_client");
+ ASSERT_EQ(result->window_clamp_server, 9216, "window_clamp_server");
+}
+
+static void run_test(struct tcpbpf_globals *result)
+{
+ int listen_fd = -1, cli_fd = -1, accept_fd = -1;
+ char buf[1000];
+ int err = -1;
+ int i, rv;
+
+ listen_fd = start_server(AF_INET6, SOCK_STREAM, LO_ADDR6, 0, 0);
+ if (!ASSERT_NEQ(listen_fd, -1, "start_server"))
+ goto done;
+
+ cli_fd = connect_to_fd(listen_fd, 0);
+ if (!ASSERT_NEQ(cli_fd, -1, "connect_to_fd(listen_fd)"))
+ goto done;
+
+ accept_fd = accept(listen_fd, NULL, NULL);
+ if (!ASSERT_NEQ(accept_fd, -1, "accept(listen_fd)"))
+ goto done;
+
+ /* Send 1000B of '+'s from cli_fd -> accept_fd */
+ for (i = 0; i < 1000; i++)
+ buf[i] = '+';
+
+ rv = send(cli_fd, buf, 1000, 0);
+ if (!ASSERT_EQ(rv, 1000, "send(cli_fd)"))
+ goto done;
+
+ rv = recv(accept_fd, buf, 1000, 0);
+ if (!ASSERT_EQ(rv, 1000, "recv(accept_fd)"))
+ goto done;
+
+ /* Send 500B of '.'s from accept_fd ->cli_fd */
+ for (i = 0; i < 500; i++)
+ buf[i] = '.';
+
+ rv = send(accept_fd, buf, 500, 0);
+ if (!ASSERT_EQ(rv, 500, "send(accept_fd)"))
+ goto done;
+
+ rv = recv(cli_fd, buf, 500, 0);
+ if (!ASSERT_EQ(rv, 500, "recv(cli_fd)"))
+ goto done;
+
+ /*
+ * shutdown accept first to guarantee correct ordering for
+ * bytes_received and bytes_acked when we go to verify the results.
+ */
+ shutdown(accept_fd, SHUT_WR);
+ err = recv(cli_fd, buf, 1, 0);
+ if (!ASSERT_OK(err, "recv(cli_fd) for fin"))
+ goto done;
+
+ shutdown(cli_fd, SHUT_WR);
+ err = recv(accept_fd, buf, 1, 0);
+ ASSERT_OK(err, "recv(accept_fd) for fin");
+done:
+ if (accept_fd != -1)
+ close(accept_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+ if (listen_fd != -1)
+ close(listen_fd);
+
+ if (!err)
+ verify_result(result);
+}
+
+void test_tcpbpf_user(void)
+{
+ struct test_tcpbpf_kern *skel;
+ int cg_fd = -1;
+
+ skel = test_tcpbpf_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skel"))
+ return;
+
+ cg_fd = test__join_cgroup(CG_NAME);
+ if (!ASSERT_GE(cg_fd, 0, "test__join_cgroup(" CG_NAME ")"))
+ goto err;
+
+ skel->links.bpf_testcb = bpf_program__attach_cgroup(skel->progs.bpf_testcb, cg_fd);
+ if (!ASSERT_OK_PTR(skel->links.bpf_testcb, "attach_cgroup(bpf_testcb)"))
+ goto err;
+
+ run_test(&skel->bss->global);
+
+err:
+ if (cg_fd != -1)
+ close(cg_fd);
+ test_tcpbpf_kern__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
new file mode 100644
index 000000000000..ccae0b31ac6c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_ma.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <bpf/btf.h>
+#include <test_progs.h>
+
+#include "test_bpf_ma.skel.h"
+
+static void do_bpf_ma_test(const char *name)
+{
+ struct test_bpf_ma *skel;
+ struct bpf_program *prog;
+ struct btf *btf;
+ int i, err, id;
+ char tname[32];
+
+ skel = test_bpf_ma__open();
+ if (!ASSERT_OK_PTR(skel, "open"))
+ return;
+
+ btf = bpf_object__btf(skel->obj);
+ if (!ASSERT_OK_PTR(btf, "btf"))
+ goto out;
+
+ for (i = 0; i < ARRAY_SIZE(skel->rodata->data_sizes); i++) {
+ snprintf(tname, sizeof(tname), "bin_data_%u", skel->rodata->data_sizes[i]);
+ id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (!ASSERT_GT(id, 0, tname))
+ goto out;
+ skel->rodata->data_btf_ids[i] = id;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(skel->rodata->percpu_data_sizes); i++) {
+ snprintf(tname, sizeof(tname), "percpu_bin_data_%u", skel->rodata->percpu_data_sizes[i]);
+ id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT);
+ if (!ASSERT_GT(id, 0, tname))
+ goto out;
+ skel->rodata->percpu_data_btf_ids[i] = id;
+ }
+
+ prog = bpf_object__find_program_by_name(skel->obj, name);
+ if (!ASSERT_OK_PTR(prog, "invalid prog name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+
+ err = test_bpf_ma__load(skel);
+ if (!ASSERT_OK(err, "load"))
+ goto out;
+
+ err = test_bpf_ma__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto out;
+
+ skel->bss->pid = getpid();
+ usleep(1);
+ ASSERT_OK(skel->bss->err, "test error");
+out:
+ test_bpf_ma__destroy(skel);
+}
+
+void test_test_bpf_ma(void)
+{
+ if (test__start_subtest("batch_alloc_free"))
+ do_bpf_ma_test("test_batch_alloc_free");
+ if (test__start_subtest("free_through_map_free"))
+ do_bpf_ma_test("test_free_through_map_free");
+ if (test__start_subtest("batch_percpu_alloc_free"))
+ do_bpf_ma_test("test_batch_percpu_alloc_free");
+ if (test__start_subtest("percpu_free_through_map_free"))
+ do_bpf_ma_test("test_percpu_free_through_map_free");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
new file mode 100644
index 000000000000..2900c5e9a016
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpf_syscall_macro.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/prctl.h>
+#include <test_progs.h>
+#include "bpf_syscall_macro.skel.h"
+
+void test_bpf_syscall_macro(void)
+{
+ struct bpf_syscall_macro *skel = NULL;
+ int err;
+ int exp_arg1 = 1001;
+ unsigned long exp_arg2 = 12;
+ unsigned long exp_arg3 = 13;
+ unsigned long exp_arg4 = 14;
+ unsigned long exp_arg5 = 15;
+ loff_t off_in, off_out;
+ ssize_t r;
+
+ /* check whether it can open program */
+ skel = bpf_syscall_macro__open();
+ if (!ASSERT_OK_PTR(skel, "bpf_syscall_macro__open"))
+ return;
+
+ skel->rodata->filter_pid = getpid();
+
+ /* check whether it can load program */
+ err = bpf_syscall_macro__load(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__load"))
+ goto cleanup;
+
+ /* check whether it can attach kprobe */
+ err = bpf_syscall_macro__attach(skel);
+ if (!ASSERT_OK(err, "bpf_syscall_macro__attach"))
+ goto cleanup;
+
+ /* check whether args of syscall are copied correctly */
+ prctl(exp_arg1, exp_arg2, exp_arg3, exp_arg4, exp_arg5);
+
+#if defined(__aarch64__) || defined(__s390__)
+ ASSERT_NEQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#else
+ ASSERT_EQ(skel->bss->arg1, exp_arg1, "syscall_arg1");
+#endif
+ ASSERT_EQ(skel->bss->arg2, exp_arg2, "syscall_arg2");
+ ASSERT_EQ(skel->bss->arg3, exp_arg3, "syscall_arg3");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4 on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#else
+ ASSERT_EQ(skel->bss->arg4_cx, exp_arg4, "syscall_arg4_from_cx");
+#endif
+ ASSERT_EQ(skel->bss->arg4, exp_arg4, "syscall_arg4");
+ ASSERT_EQ(skel->bss->arg5, exp_arg5, "syscall_arg5");
+
+ /* check whether args of syscall are copied correctly for CORE variants */
+ ASSERT_EQ(skel->bss->arg1_core, exp_arg1, "syscall_arg1_core_variant");
+ ASSERT_EQ(skel->bss->arg2_core, exp_arg2, "syscall_arg2_core_variant");
+ ASSERT_EQ(skel->bss->arg3_core, exp_arg3, "syscall_arg3_core_variant");
+ /* it cannot copy arg4 when uses PT_REGS_PARM4_CORE on x86_64 */
+#ifdef __x86_64__
+ ASSERT_NEQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#else
+ ASSERT_EQ(skel->bss->arg4_core_cx, exp_arg4, "syscall_arg4_from_cx_core_variant");
+#endif
+ ASSERT_EQ(skel->bss->arg4_core, exp_arg4, "syscall_arg4_core_variant");
+ ASSERT_EQ(skel->bss->arg5_core, exp_arg5, "syscall_arg5_core_variant");
+
+ ASSERT_EQ(skel->bss->option_syscall, exp_arg1, "BPF_KPROBE_SYSCALL_option");
+ ASSERT_EQ(skel->bss->arg2_syscall, exp_arg2, "BPF_KPROBE_SYSCALL_arg2");
+ ASSERT_EQ(skel->bss->arg3_syscall, exp_arg3, "BPF_KPROBE_SYSCALL_arg3");
+ ASSERT_EQ(skel->bss->arg4_syscall, exp_arg4, "BPF_KPROBE_SYSCALL_arg4");
+ ASSERT_EQ(skel->bss->arg5_syscall, exp_arg5, "BPF_KPROBE_SYSCALL_arg5");
+
+ r = splice(-42, &off_in, 42, &off_out, 0x12340000, SPLICE_F_NONBLOCK);
+ err = -errno;
+ ASSERT_EQ(r, -1, "splice_res");
+ ASSERT_EQ(err, -EBADF, "splice_err");
+
+ ASSERT_EQ(skel->bss->splice_fd_in, -42, "splice_arg1");
+ ASSERT_EQ(skel->bss->splice_off_in, (__u64)&off_in, "splice_arg2");
+ ASSERT_EQ(skel->bss->splice_fd_out, 42, "splice_arg3");
+ ASSERT_EQ(skel->bss->splice_off_out, (__u64)&off_out, "splice_arg4");
+ ASSERT_EQ(skel->bss->splice_len, 0x12340000, "splice_arg5");
+ ASSERT_EQ(skel->bss->splice_flags, SPLICE_F_NONBLOCK, "splice_arg6");
+
+cleanup:
+ bpf_syscall_macro__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bpffs.c b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
new file mode 100644
index 000000000000..ea933fd151c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bpffs.c
@@ -0,0 +1,164 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <sched.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <test_progs.h>
+
+/* TDIR must be in a location we can create a directory in. */
+#define TDIR "/tmp/test_bpffs_testdir"
+
+static int read_iter(char *file)
+{
+ /* 1024 should be enough to get contiguous 4 "iter" letters at some point */
+ char buf[1024];
+ int fd, len;
+
+ fd = open(file, 0);
+ if (fd < 0)
+ return -1;
+ while ((len = read(fd, buf, sizeof(buf))) > 0) {
+ buf[sizeof(buf) - 1] = '\0';
+ if (strstr(buf, "iter")) {
+ close(fd);
+ return 0;
+ }
+ }
+ close(fd);
+ return -1;
+}
+
+static int fn(void)
+{
+ struct stat a, b, c;
+ int err, map;
+
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "unshare"))
+ goto out;
+
+ err = mount("", "/", "", MS_REC | MS_PRIVATE, NULL);
+ if (!ASSERT_OK(err, "mount /"))
+ goto out;
+
+ err = mkdir(TDIR, 0777);
+ /* If the directory already exists we can carry on. It may be left over
+ * from a previous run.
+ */
+ if ((err && errno != EEXIST) && !ASSERT_OK(err, "mkdir " TDIR))
+ goto out;
+
+ err = mount("none", TDIR, "tmpfs", 0, NULL);
+ if (!ASSERT_OK(err, "mount tmpfs"))
+ goto out;
+
+ err = mkdir(TDIR "/fs1", 0777);
+ if (!ASSERT_OK(err, "mkdir " TDIR "/fs1"))
+ goto out;
+ err = mkdir(TDIR "/fs2", 0777);
+ if (!ASSERT_OK(err, "mkdir " TDIR "/fs2"))
+ goto out;
+
+ err = mount("bpf", TDIR "/fs1", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs1"))
+ goto out;
+ err = mount("bpf", TDIR "/fs2", "bpf", 0, NULL);
+ if (!ASSERT_OK(err, "mount bpffs " TDIR "/fs2"))
+ goto out;
+
+ err = read_iter(TDIR "/fs1/maps.debug");
+ if (!ASSERT_OK(err, "reading " TDIR "/fs1/maps.debug"))
+ goto out;
+ err = read_iter(TDIR "/fs2/progs.debug");
+ if (!ASSERT_OK(err, "reading " TDIR "/fs2/progs.debug"))
+ goto out;
+
+ err = mkdir(TDIR "/fs1/a", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a"))
+ goto out;
+ err = mkdir(TDIR "/fs1/a/1", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/a/1"))
+ goto out;
+ err = mkdir(TDIR "/fs1/b", 0777);
+ if (!ASSERT_OK(err, "creating " TDIR "/fs1/b"))
+ goto out;
+
+ map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL);
+ if (!ASSERT_GT(map, 0, "create_map(ARRAY)"))
+ goto out;
+ err = bpf_obj_pin(map, TDIR "/fs1/c");
+ if (!ASSERT_OK(err, "pin map"))
+ goto out;
+ close(map);
+
+ /* Check that RENAME_EXCHANGE works for directories. */
+ err = stat(TDIR "/fs1/a", &a);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/a)"))
+ goto out;
+ err = renameat2(0, TDIR "/fs1/a", 0, TDIR "/fs1/b", RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "renameat2(/fs1/a, /fs1/b, RENAME_EXCHANGE)"))
+ goto out;
+ err = stat(TDIR "/fs1/b", &b);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)"))
+ goto out;
+ if (!ASSERT_EQ(a.st_ino, b.st_ino, "b should have a's inode"))
+ goto out;
+ err = access(TDIR "/fs1/b/1", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b/1)"))
+ goto out;
+
+ /* Check that RENAME_EXCHANGE works for mixed file types. */
+ err = stat(TDIR "/fs1/c", &c);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/map)"))
+ goto out;
+ err = renameat2(0, TDIR "/fs1/c", 0, TDIR "/fs1/b", RENAME_EXCHANGE);
+ if (!ASSERT_OK(err, "renameat2(/fs1/c, /fs1/b, RENAME_EXCHANGE)"))
+ goto out;
+ err = stat(TDIR "/fs1/b", &b);
+ if (!ASSERT_OK(err, "stat(" TDIR "/fs1/b)"))
+ goto out;
+ if (!ASSERT_EQ(c.st_ino, b.st_ino, "b should have c's inode"))
+ goto out;
+ err = access(TDIR "/fs1/c/1", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/c/1)"))
+ goto out;
+
+ /* Check that RENAME_NOREPLACE works. */
+ err = renameat2(0, TDIR "/fs1/b", 0, TDIR "/fs1/a", RENAME_NOREPLACE);
+ if (!ASSERT_ERR(err, "renameat2(RENAME_NOREPLACE)")) {
+ err = -EINVAL;
+ goto out;
+ }
+ err = access(TDIR "/fs1/b", F_OK);
+ if (!ASSERT_OK(err, "access(" TDIR "/fs1/b)"))
+ goto out;
+
+out:
+ umount(TDIR "/fs1");
+ umount(TDIR "/fs2");
+ rmdir(TDIR "/fs1");
+ rmdir(TDIR "/fs2");
+ umount(TDIR);
+ rmdir(TDIR);
+ exit(err);
+}
+
+void test_test_bpffs(void)
+{
+ int err, duration = 0, status = 0;
+ pid_t pid;
+
+ pid = fork();
+ if (CHECK(pid == -1, "clone", "clone failed %d", errno))
+ return;
+ if (pid == 0)
+ fn();
+ err = waitpid(pid, &status, 0);
+ if (CHECK(err == -1 && errno != ECHILD, "waitpid", "failed %d", errno))
+ return;
+ if (CHECK(WEXITSTATUS(status), "bpffs test ", "failed %d", WEXITSTATUS(status)))
+ return;
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
new file mode 100644
index 000000000000..a0054019e677
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_bprm_opts.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "bprm_opts.skel.h"
+#include "network_helpers.h"
+#include "task_local_storage_helpers.h"
+
+static const char * const bash_envp[] = { "TMPDIR=shouldnotbeset", NULL };
+
+static int update_storage(int map_fd, int secureexec)
+{
+ int task_fd, ret = 0;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (task_fd < 0)
+ return errno;
+
+ ret = bpf_map_update_elem(map_fd, &task_fd, &secureexec, BPF_NOEXIST);
+ if (ret)
+ ret = errno;
+
+ close(task_fd);
+ return ret;
+}
+
+static int run_set_secureexec(int map_fd, int secureexec)
+{
+ int child_pid, child_status, ret, null_fd;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ null_fd = open("/dev/null", O_WRONLY);
+ if (null_fd == -1)
+ exit(errno);
+ dup2(null_fd, STDOUT_FILENO);
+ dup2(null_fd, STDERR_FILENO);
+ close(null_fd);
+
+ /* Ensure that all executions from hereon are
+ * secure by setting a local storage which is read by
+ * the bprm_creds_for_exec hook and sets bprm->secureexec.
+ */
+ ret = update_storage(map_fd, secureexec);
+ if (ret)
+ exit(ret);
+
+ /* If the binary is executed with securexec=1, the dynamic
+ * loader ingores and unsets certain variables like LD_PRELOAD,
+ * TMPDIR etc. TMPDIR is used here to simplify the example, as
+ * LD_PRELOAD requires a real .so file.
+ *
+ * If the value of TMPDIR is set, the bash command returns 10
+ * and if the value is unset, it returns 20.
+ */
+ execle("/bin/bash", "bash", "-c",
+ "[[ -z \"${TMPDIR}\" ]] || exit 10 && exit 20", NULL,
+ bash_envp);
+ exit(errno);
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ ret = WEXITSTATUS(child_status);
+
+ /* If a secureexec occurred, the exit status should be 20 */
+ if (secureexec && ret == 20)
+ return 0;
+
+ /* If normal execution happened, the exit code should be 10 */
+ if (!secureexec && ret == 10)
+ return 0;
+ }
+
+ return -EINVAL;
+}
+
+void test_test_bprm_opts(void)
+{
+ int err, duration = 0;
+ struct bprm_opts *skel = NULL;
+
+ skel = bprm_opts__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+ goto close_prog;
+
+ err = bprm_opts__attach(skel);
+ if (CHECK(err, "attach", "attach failed: %d\n", err))
+ goto close_prog;
+
+ /* Run the test with the secureexec bit unset */
+ err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+ 0 /* secureexec */);
+ if (CHECK(err, "run_set_secureexec:0", "err = %d\n", err))
+ goto close_prog;
+
+ /* Run the test with the secureexec bit set */
+ err = run_set_secureexec(bpf_map__fd(skel->maps.secure_exec_task_map),
+ 1 /* secureexec */);
+ if (CHECK(err, "run_set_secureexec:1", "err = %d\n", err))
+ goto close_prog;
+
+close_prog:
+ bprm_opts__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
index 25b068591e9a..e905cbaf6b3d 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_global_funcs.c
@@ -1,82 +1,162 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
#include <test_progs.h>
+#include "test_global_func1.skel.h"
+#include "test_global_func2.skel.h"
+#include "test_global_func3.skel.h"
+#include "test_global_func4.skel.h"
+#include "test_global_func5.skel.h"
+#include "test_global_func6.skel.h"
+#include "test_global_func7.skel.h"
+#include "test_global_func8.skel.h"
+#include "test_global_func9.skel.h"
+#include "test_global_func10.skel.h"
+#include "test_global_func11.skel.h"
+#include "test_global_func12.skel.h"
+#include "test_global_func13.skel.h"
+#include "test_global_func14.skel.h"
+#include "test_global_func15.skel.h"
+#include "test_global_func16.skel.h"
+#include "test_global_func17.skel.h"
+#include "test_global_func_ctx_args.skel.h"
-const char *err_str;
-bool found;
+#include "bpf/libbpf_internal.h"
+#include "btf_helpers.h"
-static int libbpf_debug_print(enum libbpf_print_level level,
- const char *format, va_list args)
+static void check_ctx_arg_type(const struct btf *btf, const struct btf_param *p)
{
- char *log_buf;
+ const struct btf_type *t;
+ const char *s;
- if (level != LIBBPF_WARN ||
- strcmp(format, "libbpf: \n%s\n")) {
- vprintf(format, args);
- return 0;
+ t = btf__type_by_id(btf, p->type);
+ if (!ASSERT_EQ(btf_kind(t), BTF_KIND_PTR, "ptr_t"))
+ return;
+
+ s = btf_type_raw_dump(btf, t->type);
+ if (!ASSERT_HAS_SUBSTR(s, "STRUCT 'bpf_perf_event_data' size=0 vlen=0",
+ "ctx_struct_t"))
+ return;
+}
+
+static void subtest_ctx_arg_rewrite(void)
+{
+ struct test_global_func_ctx_args *skel = NULL;
+ struct bpf_prog_info info;
+ char func_info_buf[1024] __attribute__((aligned(8)));
+ struct bpf_func_info_min *rec;
+ struct btf *btf = NULL;
+ __u32 info_len = sizeof(info);
+ int err, fd, i;
+ struct btf *kern_btf = NULL;
+
+ kern_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(kern_btf, "kern_btf_load"))
+ return;
+
+ /* simple detection of kernel native arg:ctx tag support */
+ if (btf__find_by_name_kind(kern_btf, "bpf_subprog_arg_info", BTF_KIND_STRUCT) > 0) {
+ test__skip();
+ btf__free(kern_btf);
+ return;
}
+ btf__free(kern_btf);
+
+ skel = test_global_func_ctx_args__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
- log_buf = va_arg(args, char *);
- if (!log_buf)
+ bpf_program__set_autoload(skel->progs.arg_tag_ctx_perf, true);
+
+ err = test_global_func_ctx_args__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
goto out;
- if (strstr(log_buf, err_str) == 0)
- found = true;
-out:
- printf(format, log_buf);
- return 0;
-}
-extern int extra_prog_load_log_flags;
+ memset(&info, 0, sizeof(info));
+ info.func_info = ptr_to_u64(&func_info_buf);
+ info.nr_func_info = 3;
+ info.func_info_rec_size = sizeof(struct bpf_func_info_min);
-static int check_load(const char *file)
-{
- struct bpf_prog_load_attr attr;
- struct bpf_object *obj = NULL;
- int err, prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = BPF_PROG_TYPE_UNSPEC;
- attr.log_level = extra_prog_load_log_flags;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- found = false;
- err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
- bpf_object__close(obj);
- return err;
-}
+ fd = bpf_program__fd(skel->progs.arg_tag_ctx_perf);
+ err = bpf_prog_get_info_by_fd(fd, &info, &info_len);
+ if (!ASSERT_OK(err, "prog_info"))
+ goto out;
+
+ if (!ASSERT_EQ(info.nr_func_info, 3, "nr_func_info"))
+ goto out;
+
+ btf = btf__load_from_kernel_by_id(info.btf_id);
+ if (!ASSERT_OK_PTR(btf, "obj_kern_btf"))
+ goto out;
+
+ rec = (struct bpf_func_info_min *)func_info_buf;
+ for (i = 0; i < info.nr_func_info; i++, rec = (void *)rec + info.func_info_rec_size) {
+ const struct btf_type *fn_t, *proto_t;
+ const char *name;
+
+ if (rec->insn_off == 0)
+ continue; /* main prog, skip */
+
+ fn_t = btf__type_by_id(btf, rec->type_id);
+ if (!ASSERT_OK_PTR(fn_t, "fn_type"))
+ goto out;
+ if (!ASSERT_EQ(btf_kind(fn_t), BTF_KIND_FUNC, "fn_type_kind"))
+ goto out;
+ proto_t = btf__type_by_id(btf, fn_t->type);
+ if (!ASSERT_OK_PTR(proto_t, "proto_type"))
+ goto out;
+
+ name = btf__name_by_offset(btf, fn_t->name_off);
+ if (strcmp(name, "subprog_ctx_tag") == 0) {
+ /* int subprog_ctx_tag(void *ctx __arg_ctx) */
+ if (!ASSERT_EQ(btf_vlen(proto_t), 1, "arg_cnt"))
+ goto out;
-struct test_def {
- const char *file;
- const char *err_str;
-};
+ /* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+ } else if (strcmp(name, "subprog_multi_ctx_tags") == 0) {
+ /* int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+ * struct my_struct *mem,
+ * void *ctx2 __arg_ctx)
+ */
+ if (!ASSERT_EQ(btf_vlen(proto_t), 3, "arg_cnt"))
+ goto out;
+
+ /* arg 0 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[0]);
+ /* arg 2 is PTR -> STRUCT bpf_perf_event_data */
+ check_ctx_arg_type(btf, &btf_params(proto_t)[2]);
+ } else {
+ ASSERT_FAIL("unexpected subprog %s", name);
+ goto out;
+ }
+ }
+
+out:
+ btf__free(btf);
+ test_global_func_ctx_args__destroy(skel);
+}
void test_test_global_funcs(void)
{
- struct test_def tests[] = {
- { "test_global_func1.o", "combined stack size of 4 calls is 544" },
- { "test_global_func2.o" },
- { "test_global_func3.o" , "the call stack of 8 frames" },
- { "test_global_func4.o" },
- { "test_global_func5.o" , "expected pointer to ctx, but got PTR" },
- { "test_global_func6.o" , "modified ctx ptr R2" },
- { "test_global_func7.o" , "foo() doesn't return scalar" },
- };
- libbpf_print_fn_t old_print_fn = NULL;
- int err, i, duration = 0;
-
- old_print_fn = libbpf_set_print(libbpf_debug_print);
-
- for (i = 0; i < ARRAY_SIZE(tests); i++) {
- const struct test_def *test = &tests[i];
-
- if (!test__start_subtest(test->file))
- continue;
-
- err_str = test->err_str;
- err = check_load(test->file);
- CHECK_FAIL(!!err ^ !!err_str);
- if (err_str)
- CHECK(found, "", "expected string '%s'", err_str);
- }
- libbpf_set_print(old_print_fn);
+ RUN_TESTS(test_global_func1);
+ RUN_TESTS(test_global_func2);
+ RUN_TESTS(test_global_func3);
+ RUN_TESTS(test_global_func4);
+ RUN_TESTS(test_global_func5);
+ RUN_TESTS(test_global_func6);
+ RUN_TESTS(test_global_func7);
+ RUN_TESTS(test_global_func8);
+ RUN_TESTS(test_global_func9);
+ RUN_TESTS(test_global_func10);
+ RUN_TESTS(test_global_func11);
+ RUN_TESTS(test_global_func12);
+ RUN_TESTS(test_global_func13);
+ RUN_TESTS(test_global_func14);
+ RUN_TESTS(test_global_func15);
+ RUN_TESTS(test_global_func16);
+ RUN_TESTS(test_global_func17);
+ RUN_TESTS(test_global_func_ctx_args);
+
+ if (test__start_subtest("ctx_arg_rewrite"))
+ subtest_ctx_arg_rewrite();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ima.c b/tools/testing/selftests/bpf/prog_tests/test_ima.c
new file mode 100644
index 000000000000..810b14981c2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_ima.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <test_progs.h>
+#include <linux/ring_buffer.h>
+
+#include "ima.skel.h"
+
+#define MAX_SAMPLES 4
+
+static int _run_measured_process(const char *measured_dir, u32 *monitored_pid,
+ const char *cmd)
+{
+ int child_pid, child_status;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ *monitored_pid = getpid();
+ execlp("./ima_setup.sh", "./ima_setup.sh", cmd, measured_dir,
+ NULL);
+ exit(errno);
+
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ return WEXITSTATUS(child_status);
+ }
+
+ return -EINVAL;
+}
+
+static int run_measured_process(const char *measured_dir, u32 *monitored_pid)
+{
+ return _run_measured_process(measured_dir, monitored_pid, "run");
+}
+
+static u64 ima_hash_from_bpf[MAX_SAMPLES];
+static int ima_hash_from_bpf_idx;
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+ if (ima_hash_from_bpf_idx >= MAX_SAMPLES)
+ return -ENOSPC;
+
+ ima_hash_from_bpf[ima_hash_from_bpf_idx++] = *((u64 *)data);
+ return 0;
+}
+
+static void test_init(struct ima__bss *bss)
+{
+ ima_hash_from_bpf_idx = 0;
+
+ bss->use_ima_file_hash = false;
+ bss->enable_bprm_creds_for_exec = false;
+ bss->enable_kernel_read_file = false;
+ bss->test_deny = false;
+}
+
+void test_test_ima(void)
+{
+ char measured_dir_template[] = "/tmp/ima_measuredXXXXXX";
+ struct ring_buffer *ringbuf = NULL;
+ const char *measured_dir;
+ u64 bin_true_sample;
+ char cmd[256];
+
+ int err, duration = 0, fresh_digest_idx = 0;
+ struct ima *skel = NULL;
+
+ skel = ima__open_and_load();
+ if (CHECK(!skel, "skel_load", "skeleton failed\n"))
+ goto close_prog;
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+ process_sample, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ringbuf"))
+ goto close_prog;
+
+ err = ima__attach(skel);
+ if (CHECK(err, "attach", "attach failed: %d\n", err))
+ goto close_prog;
+
+ measured_dir = mkdtemp(measured_dir_template);
+ if (CHECK(measured_dir == NULL, "mkdtemp", "err %d\n", errno))
+ goto close_prog;
+
+ snprintf(cmd, sizeof(cmd), "./ima_setup.sh setup %s", measured_dir);
+ err = system(cmd);
+ if (CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno))
+ goto close_clean;
+
+ /*
+ * Test #1
+ * - Goal: obtain a sample with the bpf_ima_inode_hash() helper
+ * - Expected result: 1 sample (/bin/true)
+ */
+ test_init(skel->bss);
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #1", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 1, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+
+ /*
+ * Test #2
+ * - Goal: obtain samples with the bpf_ima_file_hash() helper
+ * - Expected result: 2 samples (./ima_setup.sh, /bin/true)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #2", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 2, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+ bin_true_sample = ima_hash_from_bpf[1];
+
+ /*
+ * Test #3
+ * - Goal: confirm that bpf_ima_inode_hash() returns a non-fresh digest
+ * - Expected result:
+ * 1 sample (/bin/true: fresh) if commit 62622dab0a28 applied
+ * 2 samples (/bin/true: non-fresh, fresh) if commit 62622dab0a28 is
+ * not applied
+ *
+ * If commit 62622dab0a28 ("ima: return IMA digest value only when
+ * IMA_COLLECTED flag is set") is applied, bpf_ima_inode_hash() refuses
+ * to give a non-fresh digest, hence the correct result is 1 instead of
+ * 2.
+ */
+ test_init(skel->bss);
+
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "modify-bin");
+ if (CHECK(err, "modify-bin #3", "err = %d\n", err))
+ goto close_clean;
+
+ skel->bss->enable_bprm_creds_for_exec = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #3", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_GE(err, 1, "num_samples_or_err");
+ if (err == 2) {
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_EQ(ima_hash_from_bpf[0], bin_true_sample,
+ "sample_equal_or_err");
+ fresh_digest_idx = 1;
+ }
+
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], 0, "ima_hash");
+ /* IMA refreshed the digest. */
+ ASSERT_NEQ(ima_hash_from_bpf[fresh_digest_idx], bin_true_sample,
+ "sample_equal_or_err");
+
+ /*
+ * Test #4
+ * - Goal: verify that bpf_ima_file_hash() returns a fresh digest
+ * - Expected result: 4 samples (./ima_setup.sh: fresh, fresh;
+ * /bin/true: fresh, fresh)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ skel->bss->enable_bprm_creds_for_exec = true;
+ err = run_measured_process(measured_dir, &skel->bss->monitored_pid);
+ if (CHECK(err, "run_measured_process #4", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 4, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[2], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[3], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[2], bin_true_sample,
+ "sample_different_or_err");
+ ASSERT_EQ(ima_hash_from_bpf[3], ima_hash_from_bpf[2],
+ "sample_equal_or_err");
+
+ skel->bss->use_ima_file_hash = false;
+ skel->bss->enable_bprm_creds_for_exec = false;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "restore-bin");
+ if (CHECK(err, "restore-bin #3", "err = %d\n", err))
+ goto close_clean;
+
+ /*
+ * Test #5
+ * - Goal: obtain a sample from the kernel_read_file hook
+ * - Expected result: 2 samples (./ima_setup.sh, policy_test)
+ */
+ test_init(skel->bss);
+ skel->bss->use_ima_file_hash = true;
+ skel->bss->enable_kernel_read_file = true;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "load-policy");
+ if (CHECK(err, "run_measured_process #5", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 2, "num_samples_or_err");
+ ASSERT_NEQ(ima_hash_from_bpf[0], 0, "ima_hash");
+ ASSERT_NEQ(ima_hash_from_bpf[1], 0, "ima_hash");
+
+ /*
+ * Test #6
+ * - Goal: ensure that the kernel_read_file hook denies an operation
+ * - Expected result: 0 samples
+ */
+ test_init(skel->bss);
+ skel->bss->enable_kernel_read_file = true;
+ skel->bss->test_deny = true;
+ err = _run_measured_process(measured_dir, &skel->bss->monitored_pid,
+ "load-policy");
+ if (CHECK(!err, "run_measured_process #6", "err = %d\n", err))
+ goto close_clean;
+
+ err = ring_buffer__consume(ringbuf);
+ ASSERT_EQ(err, 0, "num_samples_or_err");
+
+close_clean:
+ snprintf(cmd, sizeof(cmd), "./ima_setup.sh cleanup %s", measured_dir);
+ err = system(cmd);
+ CHECK(err, "failed to run command", "%s, errno = %d\n", cmd, errno);
+close_prog:
+ ring_buffer__free(ringbuf);
+ ima__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c
new file mode 100644
index 000000000000..375677c19146
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_ldsx_insn.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates.*/
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_ldsx_insn.skel.h"
+
+static void test_map_val_and_probed_memory(void)
+{
+ struct test_ldsx_insn *skel;
+ int err;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ return;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto out;
+ }
+
+ bpf_program__set_autoload(skel->progs.rdonly_map_prog, true);
+ bpf_program__set_autoload(skel->progs.map_val_prog, true);
+ bpf_program__set_autoload(skel->progs.test_ptr_struct_arg, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto out;
+
+ err = test_ldsx_insn__attach(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__attach"))
+ goto out;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->done1, 1, "done1");
+ ASSERT_EQ(skel->bss->ret1, 1, "ret1");
+ ASSERT_EQ(skel->bss->done2, 1, "done2");
+ ASSERT_EQ(skel->bss->ret2, 1, "ret2");
+ ASSERT_EQ(skel->bss->int_member, -1, "int_member");
+
+out:
+ test_ldsx_insn__destroy(skel);
+}
+
+static void test_ctx_member_sign_ext(void)
+{
+ struct test_ldsx_insn *skel;
+ int err, fd, cgroup_fd;
+ char buf[16] = {0};
+ socklen_t optlen;
+
+ cgroup_fd = test__join_cgroup("/ldsx_test");
+ if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /ldsx_test"))
+ return;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ goto close_cgroup_fd;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto destroy_skel;
+ }
+
+ bpf_program__set_autoload(skel->progs._getsockopt, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto destroy_skel;
+
+ skel->links._getsockopt =
+ bpf_program__attach_cgroup(skel->progs._getsockopt, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links._getsockopt, "getsockopt_link"))
+ goto destroy_skel;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ goto destroy_skel;
+
+ optlen = sizeof(buf);
+ (void)getsockopt(fd, SOL_IP, IP_TTL, buf, &optlen);
+
+ ASSERT_EQ(skel->bss->set_optlen, -1, "optlen");
+ ASSERT_EQ(skel->bss->set_retval, -1, "retval");
+
+ close(fd);
+destroy_skel:
+ test_ldsx_insn__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
+
+static void test_ctx_member_narrow_sign_ext(void)
+{
+ struct test_ldsx_insn *skel;
+ struct __sk_buff skb = {};
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ );
+ int err, prog_fd;
+
+ skel = test_ldsx_insn__open();
+ if (!ASSERT_OK_PTR(skel, "test_ldsx_insn__open"))
+ return;
+
+ if (skel->rodata->skip) {
+ test__skip();
+ goto out;
+ }
+
+ bpf_program__set_autoload(skel->progs._tc, true);
+
+ err = test_ldsx_insn__load(skel);
+ if (!ASSERT_OK(err, "test_ldsx_insn__load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs._tc);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+
+ ASSERT_EQ(skel->bss->set_mark, -2, "set_mark");
+
+out:
+ test_ldsx_insn__destroy(skel);
+}
+
+void test_ldsx_insn(void)
+{
+ if (test__start_subtest("map_val and probed_memory"))
+ test_map_val_and_probed_memory();
+ if (test__start_subtest("ctx_member_sign_ext"))
+ test_ctx_member_sign_ext();
+ if (test__start_subtest("ctx_member_narrow_sign_ext"))
+ test_ctx_member_narrow_sign_ext();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_local_storage.c b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
new file mode 100644
index 000000000000..bcf2e1905ed7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_local_storage.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2020 Google LLC.
+ */
+
+#include <asm-generic/errno-base.h>
+#include <sys/stat.h>
+#include <test_progs.h>
+#include <linux/limits.h>
+
+#include "local_storage.skel.h"
+#include "network_helpers.h"
+#include "task_local_storage_helpers.h"
+
+#define TEST_STORAGE_VALUE 0xbeefdead
+
+struct storage {
+ void *inode;
+ unsigned int value;
+};
+
+/* Fork and exec the provided rm binary and return the exit code of the
+ * forked process and its pid.
+ */
+static int run_self_unlink(struct local_storage *skel, const char *rm_path)
+{
+ int child_pid, child_status, ret;
+ int null_fd;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ null_fd = open("/dev/null", O_WRONLY);
+ dup2(null_fd, STDOUT_FILENO);
+ dup2(null_fd, STDERR_FILENO);
+ close(null_fd);
+
+ skel->bss->monitored_pid = getpid();
+ /* Use the copied /usr/bin/rm to delete itself
+ * /tmp/copy_of_rm /tmp/copy_of_rm.
+ */
+ ret = execlp(rm_path, rm_path, rm_path, NULL);
+ if (ret)
+ exit(errno);
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ ASSERT_EQ(skel->data->task_storage_result, 0, "task_storage_result");
+ return WEXITSTATUS(child_status);
+ }
+
+ return -EINVAL;
+}
+
+static bool check_syscall_operations(int map_fd, int obj_fd)
+{
+ struct storage val = { .value = TEST_STORAGE_VALUE },
+ lookup_val = { .value = 0 };
+ int err;
+
+ /* Looking up an existing element should fail initially */
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
+ return false;
+
+ /* Create a new element */
+ err = bpf_map_update_elem(map_fd, &obj_fd, &val, BPF_NOEXIST);
+ if (!ASSERT_OK(err, "bpf_map_update_elem"))
+ return false;
+
+ /* Lookup the newly created element */
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_OK(err, "bpf_map_lookup_elem"))
+ return false;
+
+ /* Check the value of the newly created element */
+ if (!ASSERT_EQ(lookup_val.value, val.value, "bpf_map_lookup_elem"))
+ return false;
+
+ err = bpf_map_delete_elem(map_fd, &obj_fd);
+ if (!ASSERT_OK(err, "bpf_map_delete_elem()"))
+ return false;
+
+ /* The lookup should fail, now that the element has been deleted */
+ err = bpf_map_lookup_elem_flags(map_fd, &obj_fd, &lookup_val, 0);
+ if (!ASSERT_EQ(err, -ENOENT, "bpf_map_lookup_elem"))
+ return false;
+
+ return true;
+}
+
+void test_test_local_storage(void)
+{
+ char tmp_dir_path[] = "/tmp/local_storageXXXXXX";
+ int err, serv_sk = -1, task_fd = -1, rm_fd = -1;
+ struct local_storage *skel = NULL;
+ char tmp_exec_path[64];
+ char cmd[256];
+
+ skel = local_storage__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_load"))
+ goto close_prog;
+
+ err = local_storage__attach(skel);
+ if (!ASSERT_OK(err, "attach"))
+ goto close_prog;
+
+ task_fd = sys_pidfd_open(getpid(), 0);
+ if (!ASSERT_GE(task_fd, 0, "pidfd_open"))
+ goto close_prog;
+
+ if (!check_syscall_operations(bpf_map__fd(skel->maps.task_storage_map),
+ task_fd))
+ goto close_prog;
+
+ if (!ASSERT_OK_PTR(mkdtemp(tmp_dir_path), "mkdtemp"))
+ goto close_prog;
+
+ snprintf(tmp_exec_path, sizeof(tmp_exec_path), "%s/copy_of_rm",
+ tmp_dir_path);
+ snprintf(cmd, sizeof(cmd), "cp /bin/rm %s", tmp_exec_path);
+ if (!ASSERT_OK(system(cmd), "system(cp)"))
+ goto close_prog_rmdir;
+
+ rm_fd = open(tmp_exec_path, O_RDONLY);
+ if (!ASSERT_GE(rm_fd, 0, "open(tmp_exec_path)"))
+ goto close_prog_rmdir;
+
+ if (!check_syscall_operations(bpf_map__fd(skel->maps.inode_storage_map),
+ rm_fd))
+ goto close_prog_rmdir;
+
+ /* Sets skel->bss->monitored_pid to the pid of the forked child
+ * forks a child process that executes tmp_exec_path and tries to
+ * unlink its executable. This operation should be denied by the loaded
+ * LSM program.
+ */
+ err = run_self_unlink(skel, tmp_exec_path);
+ if (!ASSERT_EQ(err, EPERM, "run_self_unlink"))
+ goto close_prog_rmdir;
+
+ /* Set the process being monitored to be the current process */
+ skel->bss->monitored_pid = getpid();
+
+ /* Move copy_of_rm to a new location so that it triggers the
+ * inode_rename LSM hook with a new_dentry that has a NULL inode ptr.
+ */
+ snprintf(cmd, sizeof(cmd), "mv %s/copy_of_rm %s/check_null_ptr",
+ tmp_dir_path, tmp_dir_path);
+ if (!ASSERT_OK(system(cmd), "system(mv)"))
+ goto close_prog_rmdir;
+
+ ASSERT_EQ(skel->data->inode_storage_result, 0, "inode_storage_result");
+
+ serv_sk = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_GE(serv_sk, 0, "start_server"))
+ goto close_prog_rmdir;
+
+ ASSERT_EQ(skel->data->sk_storage_result, 0, "sk_storage_result");
+
+ if (!check_syscall_operations(bpf_map__fd(skel->maps.sk_storage_map),
+ serv_sk))
+ goto close_prog_rmdir;
+
+close_prog_rmdir:
+ snprintf(cmd, sizeof(cmd), "rm -rf %s", tmp_dir_path);
+ system(cmd);
+close_prog:
+ close(serv_sk);
+ close(rm_fd);
+ close(task_fd);
+ local_storage__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_lsm.c b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
index b17eb2045c1d..16175d579bc7 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_lsm.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_lsm.c
@@ -51,35 +51,65 @@ int exec_cmd(int *monitored_pid)
return -EINVAL;
}
-void test_test_lsm(void)
+static int test_lsm(struct lsm *skel)
{
- struct lsm *skel = NULL;
- int err, duration = 0;
-
- skel = lsm__open_and_load();
- if (CHECK(!skel, "skel_load", "lsm skeleton failed\n"))
- goto close_prog;
+ struct bpf_link *link;
+ int buf = 1234;
+ int err;
err = lsm__attach(skel);
- if (CHECK(err, "attach", "lsm attach failed: %d\n", err))
- goto close_prog;
+ if (!ASSERT_OK(err, "attach"))
+ return err;
+
+ /* Check that already linked program can't be attached again. */
+ link = bpf_program__attach(skel->progs.test_int_hook);
+ if (!ASSERT_ERR_PTR(link, "attach_link"))
+ return -1;
err = exec_cmd(&skel->bss->monitored_pid);
- if (CHECK(err < 0, "exec_cmd", "err %d errno %d\n", err, errno))
- goto close_prog;
+ if (!ASSERT_OK(err, "exec_cmd"))
+ return err;
- CHECK(skel->bss->bprm_count != 1, "bprm_count", "bprm_count = %d\n",
- skel->bss->bprm_count);
+ ASSERT_EQ(skel->bss->bprm_count, 1, "bprm_count");
skel->bss->monitored_pid = getpid();
err = stack_mprotect();
- if (CHECK(errno != EPERM, "stack_mprotect", "want err=EPERM, got %d\n",
- errno))
+ if (!ASSERT_EQ(err, -1, "stack_mprotect") ||
+ !ASSERT_EQ(errno, EPERM, "stack_mprotect"))
+ return err;
+
+ ASSERT_EQ(skel->bss->mprotect_count, 1, "mprotect_count");
+
+ syscall(__NR_setdomainname, &buf, -2L);
+ syscall(__NR_setdomainname, 0, -3L);
+ syscall(__NR_setdomainname, ~0L, -4L);
+
+ ASSERT_EQ(skel->bss->copy_test, 3, "copy_test");
+
+ lsm__detach(skel);
+
+ skel->bss->copy_test = 0;
+ skel->bss->bprm_count = 0;
+ skel->bss->mprotect_count = 0;
+ return 0;
+}
+
+void test_test_lsm(void)
+{
+ struct lsm *skel = NULL;
+ int err;
+
+ skel = lsm__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "lsm_skel_load"))
+ goto close_prog;
+
+ err = test_lsm(skel);
+ if (!ASSERT_OK(err, "test_lsm_first_attach"))
goto close_prog;
- CHECK(skel->bss->mprotect_count != 1, "mprotect_count",
- "mprotect_count = %d\n", skel->bss->mprotect_count);
+ err = test_lsm(skel);
+ ASSERT_OK(err, "test_lsm_second_attach");
close_prog:
lsm__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 2702df2b2343..f27013e38d03 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -56,15 +56,14 @@ static void setaffinity(void)
void test_test_overhead(void)
{
- const char *kprobe_name = "kprobe/__set_task_comm";
- const char *kretprobe_name = "kretprobe/__set_task_comm";
- const char *raw_tp_name = "raw_tp/task_rename";
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
- const char *fmodret_name = "fmod_ret/__set_task_comm";
+ const char *kprobe_name = "prog1";
+ const char *kretprobe_name = "prog2";
+ const char *raw_tp_name = "prog3";
+ const char *fentry_name = "prog4";
+ const char *fexit_name = "prog5";
const char *kprobe_func = "__set_task_comm";
struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
- struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
+ struct bpf_program *fentry_prog, *fexit_prog;
struct bpf_object *obj;
struct bpf_link *link;
int err, duration = 0;
@@ -73,35 +72,30 @@ void test_test_overhead(void)
if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
return;
- obj = bpf_object__open_file("./test_overhead.o", NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj)))
+ obj = bpf_object__open_file("./test_overhead.bpf.o", NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
return;
- kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name);
+ kprobe_prog = bpf_object__find_program_by_name(obj, kprobe_name);
if (CHECK(!kprobe_prog, "find_probe",
"prog '%s' not found\n", kprobe_name))
goto cleanup;
- kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name);
+ kretprobe_prog = bpf_object__find_program_by_name(obj, kretprobe_name);
if (CHECK(!kretprobe_prog, "find_probe",
"prog '%s' not found\n", kretprobe_name))
goto cleanup;
- raw_tp_prog = bpf_object__find_program_by_title(obj, raw_tp_name);
+ raw_tp_prog = bpf_object__find_program_by_name(obj, raw_tp_name);
if (CHECK(!raw_tp_prog, "find_probe",
"prog '%s' not found\n", raw_tp_name))
goto cleanup;
- fentry_prog = bpf_object__find_program_by_title(obj, fentry_name);
+ fentry_prog = bpf_object__find_program_by_name(obj, fentry_name);
if (CHECK(!fentry_prog, "find_probe",
"prog '%s' not found\n", fentry_name))
goto cleanup;
- fexit_prog = bpf_object__find_program_by_title(obj, fexit_name);
+ fexit_prog = bpf_object__find_program_by_name(obj, fexit_name);
if (CHECK(!fexit_prog, "find_probe",
"prog '%s' not found\n", fexit_name))
goto cleanup;
- fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
- if (CHECK(!fmodret_prog, "find_probe",
- "prog '%s' not found\n", fmodret_name))
- goto cleanup;
-
err = bpf_object__load(obj);
if (CHECK(err, "obj_load", "err %d\n", err))
goto cleanup;
@@ -114,7 +108,7 @@ void test_test_overhead(void)
/* attach kprobe */
link = bpf_program__attach_kprobe(kprobe_prog, false /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach_kprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kprobe"))
goto cleanup;
test_run("kprobe");
bpf_link__destroy(link);
@@ -122,38 +116,32 @@ void test_test_overhead(void)
/* attach kretprobe */
link = bpf_program__attach_kprobe(kretprobe_prog, true /* retprobe */,
kprobe_func);
- if (CHECK(IS_ERR(link), "attach kretprobe", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_kretprobe"))
goto cleanup;
test_run("kretprobe");
bpf_link__destroy(link);
/* attach raw_tp */
link = bpf_program__attach_raw_tracepoint(raw_tp_prog, "task_rename");
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_raw_tp"))
goto cleanup;
test_run("raw_tp");
bpf_link__destroy(link);
/* attach fentry */
link = bpf_program__attach_trace(fentry_prog);
- if (CHECK(IS_ERR(link), "attach fentry", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fentry"))
goto cleanup;
test_run("fentry");
bpf_link__destroy(link);
/* attach fexit */
link = bpf_program__attach_trace(fexit_prog);
- if (CHECK(IS_ERR(link), "attach fexit", "err %ld\n", PTR_ERR(link)))
+ if (!ASSERT_OK_PTR(link, "attach_fexit"))
goto cleanup;
test_run("fexit");
bpf_link__destroy(link);
- /* attach fmod_ret */
- link = bpf_program__attach_trace(fmodret_prog);
- if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
- goto cleanup;
- test_run("fmod_ret");
- bpf_link__destroy(link);
cleanup:
prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/prog_tests/test_profiler.c b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
new file mode 100644
index 000000000000..de24e8f0e738
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_profiler.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "progs/profiler.h"
+#include "profiler1.skel.h"
+#include "profiler2.skel.h"
+#include "profiler3.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+ LIBBPF_OPTS(bpf_test_run_opts, test_attr);
+ __u64 args[] = {1, 2, 3};
+ int err, prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ test_attr.ctx_in = args;
+ test_attr.ctx_size_in = sizeof(args);
+ err = bpf_prog_test_run_opts(prog_fd, &test_attr);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+
+ if (!ASSERT_OK(test_attr.retval, "test_run retval"))
+ return -1;
+
+ return 0;
+}
+
+void test_test_profiler(void)
+{
+ struct profiler1 *profiler1_skel = NULL;
+ struct profiler2 *profiler2_skel = NULL;
+ struct profiler3 *profiler3_skel = NULL;
+ __u32 duration = 0;
+ int err;
+
+ profiler1_skel = profiler1__open_and_load();
+ if (CHECK(!profiler1_skel, "profiler1_skel_load", "profiler1 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler1__attach(profiler1_skel);
+ if (CHECK(err, "profiler1_attach", "profiler1 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler1_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler2_skel = profiler2__open_and_load();
+ if (CHECK(!profiler2_skel, "profiler2_skel_load", "profiler2 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler2__attach(profiler2_skel);
+ if (CHECK(err, "profiler2_attach", "profiler2 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler2_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+
+ profiler3_skel = profiler3__open_and_load();
+ if (CHECK(!profiler3_skel, "profiler3_skel_load", "profiler3 skeleton failed\n"))
+ goto cleanup;
+
+ err = profiler3__attach(profiler3_skel);
+ if (CHECK(err, "profiler3_attach", "profiler3 attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(profiler3_skel->progs.raw_tracepoint__sched_process_exec))
+ goto cleanup;
+cleanup:
+ profiler1__destroy(profiler1_skel);
+ profiler2__destroy(profiler2_skel);
+ profiler3__destroy(profiler3_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
new file mode 100644
index 000000000000..ae93411fd582
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_skb_pkt_end.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "skb_pkt_end.skel.h"
+
+static int sanity_run(struct bpf_program *prog)
+{
+ int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ prog_fd = bpf_program__fd(prog);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run"))
+ return -1;
+ if (!ASSERT_EQ(topts.retval, 123, "test_run retval"))
+ return -1;
+ return 0;
+}
+
+void test_test_skb_pkt_end(void)
+{
+ struct skb_pkt_end *skb_pkt_end_skel = NULL;
+ __u32 duration = 0;
+ int err;
+
+ skb_pkt_end_skel = skb_pkt_end__open_and_load();
+ if (CHECK(!skb_pkt_end_skel, "skb_pkt_end_skel_load", "skb_pkt_end skeleton failed\n"))
+ goto cleanup;
+
+ err = skb_pkt_end__attach(skb_pkt_end_skel);
+ if (CHECK(err, "skb_pkt_end_attach", "skb_pkt_end attach failed: %d\n", err))
+ goto cleanup;
+
+ if (sanity_run(skb_pkt_end_skel->progs.main_prog))
+ goto cleanup;
+
+cleanup:
+ skb_pkt_end__destroy(skb_pkt_end_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_strncmp.c b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
new file mode 100644
index 000000000000..7ddd6615b7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_strncmp.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <test_progs.h>
+#include "strncmp_test.skel.h"
+
+static int trigger_strncmp(const struct strncmp_test *skel)
+{
+ int cmp;
+
+ usleep(1);
+
+ cmp = skel->bss->cmp_ret;
+ if (cmp > 0)
+ return 1;
+ if (cmp < 0)
+ return -1;
+ return 0;
+}
+
+/*
+ * Compare str and target after making str[i] != target[i].
+ * When exp is -1, make str[i] < target[i] and delta = -1.
+ */
+static void strncmp_full_str_cmp(struct strncmp_test *skel, const char *name,
+ int exp)
+{
+ size_t nr = sizeof(skel->bss->str);
+ char *str = skel->bss->str;
+ int delta = exp;
+ int got;
+ size_t i;
+
+ memcpy(str, skel->rodata->target, nr);
+ for (i = 0; i < nr - 1; i++) {
+ str[i] += delta;
+
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, exp, name);
+
+ str[i] -= delta;
+ }
+}
+
+static void test_strncmp_ret(void)
+{
+ struct strncmp_test *skel;
+ int err, got;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.do_strncmp, true);
+
+ err = strncmp_test__load(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test load"))
+ goto out;
+
+ err = strncmp_test__attach(skel);
+ if (!ASSERT_EQ(err, 0, "strncmp_test attach"))
+ goto out;
+
+ skel->bss->target_pid = getpid();
+
+ /* Empty str */
+ skel->bss->str[0] = '\0';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, -1, "strncmp: empty str");
+
+ /* Same string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 0, "strncmp: same str");
+
+ /* Not-null-termainted string */
+ memcpy(skel->bss->str, skel->rodata->target, sizeof(skel->bss->str));
+ skel->bss->str[sizeof(skel->bss->str) - 1] = 'A';
+ got = trigger_strncmp(skel);
+ ASSERT_EQ(got, 1, "strncmp: not-null-term str");
+
+ strncmp_full_str_cmp(skel, "strncmp: less than", -1);
+ strncmp_full_str_cmp(skel, "strncmp: greater than", 1);
+out:
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_const_str_size(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_const_str_size, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_const_str_size");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_writable_target(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_writable_target, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_writable_target");
+
+ strncmp_test__destroy(skel);
+}
+
+static void test_strncmp_bad_not_null_term_target(void)
+{
+ struct strncmp_test *skel;
+ int err;
+
+ skel = strncmp_test__open();
+ if (!ASSERT_OK_PTR(skel, "strncmp_test open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.strncmp_bad_not_null_term_target, true);
+
+ err = strncmp_test__load(skel);
+ ASSERT_ERR(err, "strncmp_test load bad_not_null_term_target");
+
+ strncmp_test__destroy(skel);
+}
+
+void test_test_strncmp(void)
+{
+ if (test__start_subtest("strncmp_ret"))
+ test_strncmp_ret();
+ if (test__start_subtest("strncmp_bad_not_const_str_size"))
+ test_strncmp_bad_not_const_str_size();
+ if (test__start_subtest("strncmp_bad_writable_target"))
+ test_strncmp_bad_writable_target();
+ if (test__start_subtest("strncmp_bad_not_null_term_target"))
+ test_strncmp_bad_not_null_term_target();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
new file mode 100644
index 000000000000..01dc2613c8a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_maybe_null.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_maybe_null.skel.h"
+#include "struct_ops_maybe_null_fail.skel.h"
+
+/* Test that the verifier accepts a program that access a nullable pointer
+ * with a proper check.
+ */
+static void maybe_null(void)
+{
+ struct struct_ops_maybe_null *skel;
+
+ skel = struct_ops_maybe_null__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open_and_load"))
+ return;
+
+ struct_ops_maybe_null__destroy(skel);
+}
+
+/* Test that the verifier rejects a program that access a nullable pointer
+ * without a check beforehand.
+ */
+static void maybe_null_fail(void)
+{
+ struct struct_ops_maybe_null_fail *skel;
+
+ skel = struct_ops_maybe_null_fail__open_and_load();
+ if (ASSERT_ERR_PTR(skel, "struct_ops_module_fail__open_and_load"))
+ return;
+
+ struct_ops_maybe_null_fail__destroy(skel);
+}
+
+void test_struct_ops_maybe_null(void)
+{
+ /* The verifier verifies the programs at load time, so testing both
+ * programs in the same compile-unit is complicated. We run them in
+ * separate objects to simplify the testing.
+ */
+ if (test__start_subtest("maybe_null"))
+ maybe_null();
+ if (test__start_subtest("maybe_null_fail"))
+ maybe_null_fail();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
new file mode 100644
index 000000000000..ee5372c7f2c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <time.h>
+
+#include "struct_ops_module.skel.h"
+
+static void check_map_info(struct bpf_map_info *info)
+{
+ struct bpf_btf_info btf_info;
+ char btf_name[256];
+ u32 btf_info_len = sizeof(btf_info);
+ int err, fd;
+
+ fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id);
+ if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd"))
+ return;
+
+ memset(&btf_info, 0, sizeof(btf_info));
+ btf_info.name = ptr_to_u64(btf_name);
+ btf_info.name_len = sizeof(btf_name);
+ err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len);
+ if (!ASSERT_OK(err, "get_value_type_btf_obj_info"))
+ goto cleanup;
+
+ if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name"))
+ goto cleanup;
+
+cleanup:
+ close(fd);
+}
+
+static int attach_ops_and_check(struct struct_ops_module *skel,
+ struct bpf_map *map,
+ int expected_test_2_result)
+{
+ struct bpf_link *link;
+
+ link = bpf_map__attach_struct_ops(map);
+ ASSERT_OK_PTR(link, "attach_test_mod_1");
+ if (!link)
+ return -1;
+
+ /* test_{1,2}() would be called from bpf_dummy_reg() in bpf_testmod.c */
+ ASSERT_EQ(skel->bss->test_1_result, 0xdeadbeef, "test_1_result");
+ ASSERT_EQ(skel->bss->test_2_result, expected_test_2_result, "test_2_result");
+
+ bpf_link__destroy(link);
+ return 0;
+}
+
+static void test_struct_ops_load(void)
+{
+ struct struct_ops_module *skel;
+ struct bpf_map_info info = {};
+ int err;
+ u32 len;
+
+ skel = struct_ops_module__open();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_module_open"))
+ return;
+
+ skel->struct_ops.testmod_1->data = 13;
+ skel->struct_ops.testmod_1->test_2 = skel->progs.test_3;
+ /* Since test_2() is not being used, it should be disabled from
+ * auto-loading, or it will fail to load.
+ */
+ bpf_program__set_autoload(skel->progs.test_2, false);
+
+ err = struct_ops_module__load(skel);
+ if (!ASSERT_OK(err, "struct_ops_module_load"))
+ goto cleanup;
+
+ len = sizeof(info);
+ err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info,
+ &len);
+ if (!ASSERT_OK(err, "bpf_map_get_info_by_fd"))
+ goto cleanup;
+
+ check_map_info(&info);
+ /* test_3() will be called from bpf_dummy_reg() in bpf_testmod.c
+ *
+ * In bpf_testmod.c it will pass 4 and 13 (the value of data) to
+ * .test_2. So, the value of test_2_result should be 20 (4 + 13 +
+ * 3).
+ */
+ if (!attach_ops_and_check(skel, skel->maps.testmod_1, 20))
+ goto cleanup;
+ if (!attach_ops_and_check(skel, skel->maps.testmod_2, 12))
+ goto cleanup;
+
+cleanup:
+ struct_ops_module__destroy(skel);
+}
+
+void serial_test_struct_ops_module(void)
+{
+ if (test__start_subtest("test_struct_ops_load"))
+ test_struct_ops_load();
+}
+
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
new file mode 100644
index 000000000000..645d32b5160c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_multi_pages.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#include "struct_ops_multi_pages.skel.h"
+
+static void do_struct_ops_multi_pages(void)
+{
+ struct struct_ops_multi_pages *skel;
+ struct bpf_link *link;
+
+ /* The size of all trampolines of skel->maps.multi_pages should be
+ * over 1 page (at least for x86).
+ */
+ skel = struct_ops_multi_pages__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "struct_ops_multi_pages_open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.multi_pages);
+ ASSERT_OK_PTR(link, "attach_multi_pages");
+
+ bpf_link__destroy(link);
+ struct_ops_multi_pages__destroy(skel);
+}
+
+void test_struct_ops_multi_pages(void)
+{
+ if (test__start_subtest("multi_pages"))
+ do_struct_ops_multi_pages();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
new file mode 100644
index 000000000000..106ea447965a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_no_cfi.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <testing_helpers.h>
+
+static void load_bpf_test_no_cfi(void)
+{
+ int fd;
+ int err;
+
+ fd = open("bpf_test_no_cfi.ko", O_RDONLY);
+ if (!ASSERT_GE(fd, 0, "open"))
+ return;
+
+ /* The module will try to register a struct_ops type without
+ * cfi_stubs and with cfi_stubs.
+ *
+ * The one without cfi_stub should fail. The module will be loaded
+ * successfully only if the result of the registration is as
+ * expected, or it fails.
+ */
+ err = finit_module(fd, "", 0);
+ close(fd);
+ if (!ASSERT_OK(err, "finit_module"))
+ return;
+
+ err = delete_module("bpf_test_no_cfi", 0);
+ ASSERT_OK(err, "delete_module");
+}
+
+void test_struct_ops_no_cfi(void)
+{
+ if (test__start_subtest("load_bpf_test_no_cfi"))
+ load_bpf_test_no_cfi();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
new file mode 100644
index 000000000000..5f1fb0a2ea56
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c
@@ -0,0 +1,722 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * End-to-end eBPF tunnel test suite
+ * The file tests BPF network tunnel implementation.
+ *
+ * Topology:
+ * ---------
+ * root namespace | at_ns0 namespace
+ * |
+ * ----------- | -----------
+ * | tnl dev | | | tnl dev | (overlay network)
+ * ----------- | -----------
+ * metadata-mode | metadata-mode
+ * with bpf | with bpf
+ * |
+ * ---------- | ----------
+ * | veth1 | --------- | veth0 | (underlay network)
+ * ---------- peer ----------
+ *
+ *
+ * Device Configuration
+ * --------------------
+ * root namespace with metadata-mode tunnel + BPF
+ * Device names and addresses:
+ * veth1 IP 1: 172.16.1.200, IPv6: 00::22 (underlay)
+ * IP 2: 172.16.1.20, IPv6: 00::bb (underlay)
+ * tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
+ *
+ * Namespace at_ns0 with native tunnel
+ * Device names and addresses:
+ * veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
+ * tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
+ *
+ *
+ * End-to-end ping packet flow
+ * ---------------------------
+ * Most of the tests start by namespace creation, device configuration,
+ * then ping the underlay and overlay network. When doing 'ping 10.1.1.100'
+ * from root namespace, the following operations happen:
+ * 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
+ * 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
+ * with local_ip=172.16.1.200, remote_ip=172.16.1.100. BPF program choose
+ * the primary or secondary ip of veth1 as the local ip of tunnel. The
+ * choice is made based on the value of bpf map local_ip_map.
+ * 3) Outer tunnel header is prepended and route the packet to veth1's egress.
+ * 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0.
+ * 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet.
+ * 6) Forward the packet to the overlay tnl dev.
+ */
+
+#include <arpa/inet.h>
+#include <linux/if_link.h>
+#include <linux/if_tun.h>
+#include <linux/limits.h>
+#include <linux/sysctl.h>
+#include <linux/time_types.h>
+#include <linux/net_tstamp.h>
+#include <net/if.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "test_tunnel_kern.skel.h"
+
+#define IP4_ADDR_VETH0 "172.16.1.100"
+#define IP4_ADDR1_VETH1 "172.16.1.200"
+#define IP4_ADDR2_VETH1 "172.16.1.20"
+#define IP4_ADDR_TUNL_DEV0 "10.1.1.100"
+#define IP4_ADDR_TUNL_DEV1 "10.1.1.200"
+
+#define IP6_ADDR_VETH0 "::11"
+#define IP6_ADDR1_VETH1 "::22"
+#define IP6_ADDR2_VETH1 "::bb"
+
+#define IP4_ADDR1_HEX_VETH1 0xac1001c8
+#define IP4_ADDR2_HEX_VETH1 0xac100114
+#define IP6_ADDR1_HEX_VETH1 0x22
+#define IP6_ADDR2_HEX_VETH1 0xbb
+
+#define MAC_TUNL_DEV0 "52:54:00:d9:01:00"
+#define MAC_TUNL_DEV1 "52:54:00:d9:02:00"
+#define MAC_VETH1 "52:54:00:d9:03:00"
+
+#define VXLAN_TUNL_DEV0 "vxlan00"
+#define VXLAN_TUNL_DEV1 "vxlan11"
+#define IP6VXLAN_TUNL_DEV0 "ip6vxlan00"
+#define IP6VXLAN_TUNL_DEV1 "ip6vxlan11"
+
+#define IPIP_TUNL_DEV0 "ipip00"
+#define IPIP_TUNL_DEV1 "ipip11"
+
+#define XFRM_AUTH "0x1111111111111111111111111111111111111111"
+#define XFRM_ENC "0x22222222222222222222222222222222"
+#define XFRM_SPI_IN_TO_OUT 0x1
+#define XFRM_SPI_OUT_TO_IN 0x2
+
+#define PING_ARGS "-i 0.01 -c 3 -w 10 -q"
+
+static int config_device(void)
+{
+ SYS(fail, "ip netns add at_ns0");
+ SYS(fail, "ip link add veth0 address " MAC_VETH1 " type veth peer name veth1");
+ SYS(fail, "ip link set veth0 netns at_ns0");
+ SYS(fail, "ip addr add " IP4_ADDR1_VETH1 "/24 dev veth1");
+ SYS(fail, "ip link set dev veth1 up mtu 1500");
+ SYS(fail, "ip netns exec at_ns0 ip addr add " IP4_ADDR_VETH0 "/24 dev veth0");
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up mtu 1500");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/at_ns0 && ip netns delete at_ns0");
+ SYS_NOFAIL("ip link del veth1");
+ SYS_NOFAIL("ip link del %s", VXLAN_TUNL_DEV1);
+ SYS_NOFAIL("ip link del %s", IP6VXLAN_TUNL_DEV1);
+}
+
+static int add_vxlan_tunnel(void)
+{
+ /* at_ns0 namespace */
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external gbp dstport 4789",
+ VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
+ VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
+ VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev %s",
+ IP4_ADDR_TUNL_DEV1, MAC_TUNL_DEV1, VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip neigh add %s lladdr %s dev veth0",
+ IP4_ADDR2_VETH1, MAC_VETH1);
+
+ /* root namespace */
+ SYS(fail, "ip link add dev %s type vxlan external gbp dstport 4789",
+ VXLAN_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up", VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip neigh add %s lladdr %s dev %s",
+ IP4_ADDR_TUNL_DEV0, MAC_TUNL_DEV0, VXLAN_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_vxlan_tunnel(void)
+{
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+ VXLAN_TUNL_DEV0);
+ SYS_NOFAIL("ip link delete dev %s", VXLAN_TUNL_DEV1);
+}
+
+static int add_ip6vxlan_tunnel(void)
+{
+ SYS(fail, "ip netns exec at_ns0 ip -6 addr add %s/96 dev veth0",
+ IP6_ADDR_VETH0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev veth0 up");
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS(fail, "ip -6 addr add %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS(fail, "ip link set dev veth1 up");
+
+ /* at_ns0 namespace */
+ SYS(fail, "ip netns exec at_ns0 ip link add dev %s type vxlan external dstport 4789",
+ IP6VXLAN_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev %s %s/24",
+ IP6VXLAN_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip link set dev %s address %s up",
+ IP6VXLAN_TUNL_DEV0, MAC_TUNL_DEV0);
+
+ /* root namespace */
+ SYS(fail, "ip link add dev %s type vxlan external dstport 4789",
+ IP6VXLAN_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", IP6VXLAN_TUNL_DEV1, IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s address %s up",
+ IP6VXLAN_TUNL_DEV1, MAC_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_ip6vxlan_tunnel(void)
+{
+ SYS_NOFAIL("ip netns exec at_ns0 ip -6 addr delete %s/96 dev veth0",
+ IP6_ADDR_VETH0);
+ SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR1_VETH1);
+ SYS_NOFAIL("ip -6 addr delete %s/96 dev veth1", IP6_ADDR2_VETH1);
+ SYS_NOFAIL("ip netns exec at_ns0 ip link delete dev %s",
+ IP6VXLAN_TUNL_DEV0);
+ SYS_NOFAIL("ip link delete dev %s", IP6VXLAN_TUNL_DEV1);
+}
+
+enum ipip_encap {
+ NONE = 0,
+ FOU = 1,
+ GUE = 2,
+};
+
+static int set_ipip_encap(const char *ipproto, const char *type)
+{
+ SYS(fail, "ip -n at_ns0 fou add port 5555 %s", ipproto);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap %s",
+ IPIP_TUNL_DEV0, type);
+ SYS(fail, "ip -n at_ns0 link set dev %s type ipip encap-dport 5555",
+ IPIP_TUNL_DEV0);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int add_ipip_tunnel(enum ipip_encap encap)
+{
+ int err;
+ const char *ipproto, *type;
+
+ switch (encap) {
+ case FOU:
+ ipproto = "ipproto 4";
+ type = "fou";
+ break;
+ case GUE:
+ ipproto = "gue";
+ type = ipproto;
+ break;
+ default:
+ ipproto = NULL;
+ type = ipproto;
+ }
+
+ /* at_ns0 namespace */
+ SYS(fail, "ip -n at_ns0 link add dev %s type ipip local %s remote %s",
+ IPIP_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ if (type && ipproto) {
+ err = set_ipip_encap(ipproto, type);
+ if (!ASSERT_OK(err, "set_ipip_encap"))
+ goto fail;
+ }
+
+ SYS(fail, "ip -n at_ns0 link set dev %s up", IPIP_TUNL_DEV0);
+ SYS(fail, "ip -n at_ns0 addr add dev %s %s/24",
+ IPIP_TUNL_DEV0, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace */
+ if (type && ipproto)
+ SYS(fail, "ip fou add port 5555 %s", ipproto);
+ SYS(fail, "ip link add dev %s type ipip external", IPIP_TUNL_DEV1);
+ SYS(fail, "ip link set dev %s up", IPIP_TUNL_DEV1);
+ SYS(fail, "ip addr add dev %s %s/24", IPIP_TUNL_DEV1,
+ IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_ipip_tunnel(void)
+{
+ SYS_NOFAIL("ip -n at_ns0 link delete dev %s", IPIP_TUNL_DEV0);
+ SYS_NOFAIL("ip -n at_ns0 fou del port 5555");
+ SYS_NOFAIL("ip link delete dev %s", IPIP_TUNL_DEV1);
+ SYS_NOFAIL("ip fou del port 5555");
+}
+
+static int add_xfrm_tunnel(void)
+{
+ /* at_ns0 namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip netns exec at_ns0 "
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32",
+ IP4_ADDR_TUNL_DEV0);
+ SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0);
+
+ /* root namespace
+ * at_ns0 -> root
+ */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 1 mode tunnel replay-window 42 "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir in "
+ "tmpl src %s dst %s proto esp reqid 1 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1);
+
+ /* root -> at_ns0 */
+ SYS(fail,
+ "ip xfrm state add src %s dst %s proto esp "
+ "spi %d reqid 2 mode tunnel "
+ "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC);
+ SYS(fail,
+ "ip xfrm policy add src %s/32 dst %s/32 dir out "
+ "tmpl src %s dst %s proto esp reqid 2 "
+ "mode tunnel",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0);
+
+ /* address & route */
+ SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1);
+ SYS(fail, "ip route add %s dev veth1 via %s src %s",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void delete_xfrm_tunnel(void)
+{
+ SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32",
+ IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0);
+ SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32",
+ IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
+ IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT);
+ SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d",
+ IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN);
+}
+
+static int test_ping(int family, const char *addr)
+{
+ SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr);
+ return 0;
+fail:
+ return -1;
+}
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1,
+ .priority = 1, .prog_fd = igr_fd);
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1,
+ .priority = 1, .prog_fd = egr_fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (igr_fd >= 0) {
+ hook->attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ if (egr_fd >= 0) {
+ hook->attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void test_vxlan_tunnel(void)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int local_ip_map_fd = -1;
+ int set_src_prog_fd, get_src_prog_fd;
+ int set_dst_prog_fd;
+ int key = 0, ifindex = -1;
+ uint local_ip;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ /* add vxlan tunnel */
+ err = add_vxlan_tunnel();
+ if (!ASSERT_OK(err, "add vxlan tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ ifindex = if_nametoindex(VXLAN_TUNL_DEV1);
+ if (!ASSERT_NEQ(ifindex, 0, "vxlan11 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ get_src_prog_fd = bpf_program__fd(skel->progs.vxlan_get_tunnel_src);
+ set_src_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_src);
+ if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* load and attach bpf prog to veth dev tc hook point */
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.veth_set_outer_dst);
+ if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, set_dst_prog_fd, -1))
+ goto done;
+
+ /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ ifindex = if_nametoindex(VXLAN_TUNL_DEV0);
+ if (!ASSERT_NEQ(ifindex, 0, "vxlan00 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.vxlan_set_tunnel_dst);
+ if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ goto done;
+ close_netns(nstoken);
+
+ /* use veth1 ip 2 as tunnel source ip */
+ local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+ if (!ASSERT_GE(local_ip_map_fd, 0, "bpf_map__fd"))
+ goto done;
+ local_ip = IP4_ADDR2_HEX_VETH1;
+ err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+ if (!ASSERT_OK(err, "update bpf local_ip_map"))
+ goto done;
+
+ /* ping test */
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+done:
+ /* delete vxlan tunnel */
+ delete_vxlan_tunnel();
+ if (local_ip_map_fd >= 0)
+ close(local_ip_map_fd);
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ip6vxlan_tunnel(void)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int local_ip_map_fd = -1;
+ int set_src_prog_fd, get_src_prog_fd;
+ int set_dst_prog_fd;
+ int key = 0, ifindex = -1;
+ uint local_ip;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ /* add vxlan tunnel */
+ err = add_ip6vxlan_tunnel();
+ if (!ASSERT_OK(err, "add_ip6vxlan_tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV1);
+ if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan11 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ get_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_get_tunnel_src);
+ set_src_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_src);
+ if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* load and attach prog set_md to tunnel dev tc hook point at_ns0 */
+ nstoken = open_netns("at_ns0");
+ if (!ASSERT_OK_PTR(nstoken, "setns src"))
+ goto done;
+ ifindex = if_nametoindex(IP6VXLAN_TUNL_DEV0);
+ if (!ASSERT_NEQ(ifindex, 0, "ip6vxlan00 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_dst_prog_fd = bpf_program__fd(skel->progs.ip6vxlan_set_tunnel_dst);
+ if (!ASSERT_GE(set_dst_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, -1, set_dst_prog_fd))
+ goto done;
+ close_netns(nstoken);
+
+ /* use veth1 ip 2 as tunnel source ip */
+ local_ip_map_fd = bpf_map__fd(skel->maps.local_ip_map);
+ if (!ASSERT_GE(local_ip_map_fd, 0, "get local_ip_map fd"))
+ goto done;
+ local_ip = IP6_ADDR2_HEX_VETH1;
+ err = bpf_map_update_elem(local_ip_map_fd, &key, &local_ip, BPF_ANY);
+ if (!ASSERT_OK(err, "update bpf local_ip_map"))
+ goto done;
+
+ /* ping test */
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+done:
+ /* delete ipv6 vxlan tunnel */
+ delete_ip6vxlan_tunnel();
+ if (local_ip_map_fd >= 0)
+ close(local_ip_map_fd);
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_ipip_tunnel(enum ipip_encap encap)
+{
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int set_src_prog_fd, get_src_prog_fd;
+ int ifindex = -1;
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ /* add ipip tunnel */
+ err = add_ipip_tunnel(encap);
+ if (!ASSERT_OK(err, "add_ipip_tunnel"))
+ goto done;
+
+ /* load and attach bpf prog to tunnel dev tc hook point */
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+ ifindex = if_nametoindex(IPIP_TUNL_DEV1);
+ if (!ASSERT_NEQ(ifindex, 0, "ipip11 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+
+ switch (encap) {
+ case FOU:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_fou_set_tunnel);
+ break;
+ case GUE:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_encap_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_gue_set_tunnel);
+ break;
+ default:
+ get_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_get_tunnel);
+ set_src_prog_fd = bpf_program__fd(
+ skel->progs.ipip_set_tunnel);
+ }
+
+ if (!ASSERT_GE(set_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_src_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_src_prog_fd, set_src_prog_fd))
+ goto done;
+
+ /* ping from root namespace test */
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV0);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+ close_netns(nstoken);
+
+done:
+ /* delete ipip tunnel */
+ delete_ipip_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+static void test_xfrm_tunnel(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct test_tunnel_kern *skel = NULL;
+ struct nstoken *nstoken;
+ int xdp_prog_fd;
+ int tc_prog_fd;
+ int ifindex;
+ int err;
+
+ err = add_xfrm_tunnel();
+ if (!ASSERT_OK(err, "add_xfrm_tunnel"))
+ return;
+
+ skel = test_tunnel_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load"))
+ goto done;
+
+ ifindex = if_nametoindex("veth1");
+ if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex"))
+ goto done;
+
+ /* attach tc prog to tunnel dev */
+ tc_hook.ifindex = ifindex;
+ tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, tc_prog_fd, -1))
+ goto done;
+
+ /* attach xdp prog to tunnel dev */
+ xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "bpf_xdp_attach"))
+ goto done;
+
+ /* ping from at_ns0 namespace test */
+ nstoken = open_netns("at_ns0");
+ err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1);
+ close_netns(nstoken);
+ if (!ASSERT_OK(err, "test_ping"))
+ goto done;
+
+ if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip"))
+ goto done;
+ if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window"))
+ goto done;
+
+done:
+ delete_xfrm_tunnel();
+ if (skel)
+ test_tunnel_kern__destroy(skel);
+}
+
+#define RUN_TEST(name, ...) \
+ ({ \
+ if (test__start_subtest(#name)) { \
+ config_device(); \
+ test_ ## name(__VA_ARGS__); \
+ cleanup(); \
+ } \
+ })
+
+static void *test_tunnel_run_tests(void *arg)
+{
+ RUN_TEST(vxlan_tunnel);
+ RUN_TEST(ip6vxlan_tunnel);
+ RUN_TEST(ipip_tunnel, NONE);
+ RUN_TEST(ipip_tunnel, FOU);
+ RUN_TEST(ipip_tunnel, GUE);
+ RUN_TEST(xfrm_tunnel);
+
+ return NULL;
+}
+
+void test_tunnel(void)
+{
+ pthread_t test_thread;
+ int err;
+
+ /* Run the tests in their own thread to isolate the namespace changes
+ * so they do not affect the environment of other tests.
+ * (specifically needed because of unshare(CLONE_NEWNS) in open_netns())
+ */
+ err = pthread_create(&test_thread, NULL, &test_tunnel_run_tests, NULL);
+ if (ASSERT_OK(err, "pthread_create"))
+ ASSERT_OK(pthread_join(test_thread, NULL), "pthread_join");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c
new file mode 100644
index 000000000000..f45af1b0ef2c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#include "test_time_tai.skel.h"
+
+#include <time.h>
+#include <stdint.h>
+
+#define TAI_THRESHOLD 1000000000ULL /* 1s */
+#define NSEC_PER_SEC 1000000000ULL
+
+static __u64 ts_to_ns(const struct timespec *ts)
+{
+ return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec;
+}
+
+void test_time_tai(void)
+{
+ struct __sk_buff skb = {
+ .cb[0] = 0,
+ .cb[1] = 0,
+ .tstamp = 0,
+ };
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .ctx_in = &skb,
+ .ctx_size_in = sizeof(skb),
+ .ctx_out = &skb,
+ .ctx_size_out = sizeof(skb),
+ );
+ struct test_time_tai *skel;
+ struct timespec now_tai;
+ __u64 ts1, ts2, now;
+ int ret, prog_fd;
+
+ /* Open and load */
+ skel = test_time_tai__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tai_open"))
+ return;
+
+ /* Run test program */
+ prog_fd = bpf_program__fd(skel->progs.time_tai);
+ ret = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(ret, "test_run");
+
+ /* Retrieve generated TAI timestamps */
+ ts1 = skb.tstamp;
+ ts2 = skb.cb[0] | ((__u64)skb.cb[1] << 32);
+
+ /* TAI != 0 */
+ ASSERT_NEQ(ts1, 0, "tai_ts1");
+ ASSERT_NEQ(ts2, 0, "tai_ts2");
+
+ /* TAI is moving forward only */
+ ASSERT_GE(ts2, ts1, "tai_forward");
+
+ /* Check for future */
+ ret = clock_gettime(CLOCK_TAI, &now_tai);
+ ASSERT_EQ(ret, 0, "tai_gettime");
+ now = ts_to_ns(&now_tai);
+
+ ASSERT_TRUE(now > ts1, "tai_future_ts1");
+ ASSERT_TRUE(now > ts2, "tai_future_ts2");
+
+ /* Check for reasonable range */
+ ASSERT_TRUE(now - ts1 < TAI_THRESHOLD, "tai_range_ts1");
+ ASSERT_TRUE(now - ts2 < TAI_THRESHOLD, "tai_range_ts2");
+
+ test_time_tai__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer.c b/tools/testing/selftests/bpf/prog_tests/timer.c
new file mode 100644
index 000000000000..d66687f1ee6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer.skel.h"
+#include "timer_failure.skel.h"
+
+#define NUM_THR 8
+
+static void *spin_lock_thread(void *arg)
+{
+ int i, err, prog_fd = *(int *)arg;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ for (i = 0; i < 10000; i++) {
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ if (!ASSERT_OK(err, "test_run_opts err") ||
+ !ASSERT_OK(topts.retval, "test_run_opts retval"))
+ break;
+ }
+
+ pthread_exit(arg);
+}
+
+static int timer(struct timer *timer_skel)
+{
+ int i, err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+ pthread_t thread_id[NUM_THR];
+ void *ret;
+
+ err = timer__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ ASSERT_EQ(timer_skel->data->callback_check, 52, "callback_check1");
+ ASSERT_EQ(timer_skel->data->callback2_check, 52, "callback2_check1");
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 0, "pinned_callback_check1");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+ timer__detach(timer_skel);
+
+ usleep(50); /* 10 usecs should be enough, but give it extra */
+ /* check that timer_cb1() was executed 10+10 times */
+ ASSERT_EQ(timer_skel->data->callback_check, 42, "callback_check2");
+ ASSERT_EQ(timer_skel->data->callback2_check, 42, "callback2_check2");
+
+ /* check that timer_cb2() was executed twice */
+ ASSERT_EQ(timer_skel->bss->bss_data, 10, "bss_data");
+
+ /* check that timer_cb3() was executed twice */
+ ASSERT_EQ(timer_skel->bss->abs_data, 12, "abs_data");
+
+ /* check that timer_cb_pinned() was executed twice */
+ ASSERT_EQ(timer_skel->bss->pinned_callback_check, 2, "pinned_callback_check");
+
+ /* check that there were no errors in timer execution */
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2 | 4, "ok");
+
+ prog_fd = bpf_program__fd(timer_skel->progs.race);
+ for (i = 0; i < NUM_THR; i++) {
+ err = pthread_create(&thread_id[i], NULL,
+ &spin_lock_thread, &prog_fd);
+ if (!ASSERT_OK(err, "pthread_create"))
+ break;
+ }
+
+ while (i) {
+ err = pthread_join(thread_id[--i], &ret);
+ if (ASSERT_OK(err, "pthread_join"))
+ ASSERT_EQ(ret, (void *)&prog_fd, "pthread_join");
+ }
+
+ return 0;
+}
+
+/* TODO: use pid filtering */
+void serial_test_timer(void)
+{
+ struct timer *timer_skel = NULL;
+ int err;
+
+ timer_skel = timer__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ return;
+
+ err = timer(timer_skel);
+ ASSERT_OK(err, "timer");
+ timer__destroy(timer_skel);
+
+ RUN_TESTS(timer_failure);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_crash.c b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
new file mode 100644
index 000000000000..f74b82305da8
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_crash.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "timer_crash.skel.h"
+
+enum {
+ MODE_ARRAY,
+ MODE_HASH,
+};
+
+static void test_timer_crash_mode(int mode)
+{
+ struct timer_crash *skel;
+
+ skel = timer_crash__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "timer_crash__open_and_load"))
+ return;
+ skel->bss->pid = getpid();
+ skel->bss->crash_map = mode;
+ if (!ASSERT_OK(timer_crash__attach(skel), "timer_crash__attach"))
+ goto end;
+ usleep(1);
+end:
+ timer_crash__destroy(skel);
+}
+
+void test_timer_crash(void)
+{
+ if (test__start_subtest("array"))
+ test_timer_crash_mode(MODE_ARRAY);
+ if (test__start_subtest("hash"))
+ test_timer_crash_mode(MODE_HASH);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/timer_mim.c b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
new file mode 100644
index 000000000000..9ff7843909e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/timer_mim.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <test_progs.h>
+#include "timer_mim.skel.h"
+#include "timer_mim_reject.skel.h"
+
+static int timer_mim(struct timer_mim *timer_skel)
+{
+ __u64 cnt1, cnt2;
+ int err, prog_fd, key1 = 1;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ err = timer_mim__attach(timer_skel);
+ if (!ASSERT_OK(err, "timer_attach"))
+ return err;
+
+ prog_fd = bpf_program__fd(timer_skel->progs.test1);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "test_run");
+ timer_mim__detach(timer_skel);
+
+ /* check that timer_cb[12] are incrementing 'cnt' */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 != cnt1)
+ break;
+ usleep(200); /* 100 times more than interval */
+ }
+ ASSERT_GT(cnt2, cnt1, "cnt");
+
+ ASSERT_EQ(timer_skel->bss->err, 0, "err");
+ /* check that code paths completed */
+ ASSERT_EQ(timer_skel->bss->ok, 1 | 2, "ok");
+
+ close(bpf_map__fd(timer_skel->maps.inner_htab));
+ err = bpf_map__delete_elem(timer_skel->maps.outer_arr, &key1, sizeof(key1), 0);
+ ASSERT_EQ(err, 0, "delete inner map");
+
+ /* check that timer_cb[12] are no longer running */
+ cnt1 = READ_ONCE(timer_skel->bss->cnt);
+ for (int i = 0; i < 100; i++) {
+ usleep(200); /* 100 times more than interval */
+ cnt2 = READ_ONCE(timer_skel->bss->cnt);
+ if (cnt2 == cnt1)
+ break;
+ }
+ ASSERT_EQ(cnt2, cnt1, "cnt");
+
+ return 0;
+}
+
+void serial_test_timer_mim(void)
+{
+ struct timer_mim_reject *timer_reject_skel = NULL;
+ libbpf_print_fn_t old_print_fn = NULL;
+ struct timer_mim *timer_skel = NULL;
+ int err;
+
+ old_print_fn = libbpf_set_print(NULL);
+ timer_reject_skel = timer_mim_reject__open_and_load();
+ libbpf_set_print(old_print_fn);
+ if (!ASSERT_ERR_PTR(timer_reject_skel, "timer_reject_skel_load"))
+ goto cleanup;
+
+ timer_skel = timer_mim__open_and_load();
+ if (!ASSERT_OK_PTR(timer_skel, "timer_skel_load"))
+ goto cleanup;
+
+ err = timer_mim(timer_skel);
+ ASSERT_OK(err, "timer_mim");
+cleanup:
+ timer_mim__destroy(timer_skel);
+ timer_mim_reject__destroy(timer_reject_skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c
new file mode 100644
index 000000000000..fc4a175d8d76
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/token.c
@@ -0,0 +1,1052 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <bpf/btf.h>
+#include "cap_helpers.h"
+#include <fcntl.h>
+#include <sched.h>
+#include <signal.h>
+#include <unistd.h>
+#include <linux/filter.h>
+#include <linux/unistd.h>
+#include <linux/mount.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+#include "priv_map.skel.h"
+#include "priv_prog.skel.h"
+#include "dummy_st_ops_success.skel.h"
+#include "token_lsm.skel.h"
+
+static inline int sys_mount(const char *dev_name, const char *dir_name,
+ const char *type, unsigned long flags,
+ const void *data)
+{
+ return syscall(__NR_mount, dev_name, dir_name, type, flags, data);
+}
+
+static inline int sys_fsopen(const char *fsname, unsigned flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static inline int sys_fspick(int dfd, const char *path, unsigned flags)
+{
+ return syscall(__NR_fspick, dfd, path, flags);
+}
+
+static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux)
+{
+ return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux);
+}
+
+static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags)
+{
+ return syscall(__NR_fsmount, fs_fd, flags, ms_flags);
+}
+
+static inline int sys_move_mount(int from_dfd, const char *from_path,
+ int to_dfd, const char *to_path,
+ unsigned flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags);
+}
+
+static int drop_priv_caps(__u64 *old_caps)
+{
+ return cap_disable_effective((1ULL << CAP_BPF) |
+ (1ULL << CAP_PERFMON) |
+ (1ULL << CAP_NET_ADMIN) |
+ (1ULL << CAP_SYS_ADMIN), old_caps);
+}
+
+static int restore_priv_caps(__u64 old_caps)
+{
+ return cap_enable_effective(old_caps, NULL);
+}
+
+static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str)
+{
+ char buf[32];
+ int err;
+
+ if (!mask_str) {
+ if (mask == ~0ULL) {
+ mask_str = "any";
+ } else {
+ snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask);
+ mask_str = buf;
+ }
+ }
+
+ err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key,
+ mask_str, 0);
+ if (err < 0)
+ err = -errno;
+ return err;
+}
+
+#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0)
+
+struct bpffs_opts {
+ __u64 cmds;
+ __u64 maps;
+ __u64 progs;
+ __u64 attachs;
+ const char *cmds_str;
+ const char *maps_str;
+ const char *progs_str;
+ const char *attachs_str;
+};
+
+static int create_bpffs_fd(void)
+{
+ int fs_fd;
+
+ /* create VFS context */
+ fs_fd = sys_fsopen("bpf", 0);
+ ASSERT_GE(fs_fd, 0, "fs_fd");
+
+ return fs_fd;
+}
+
+static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts)
+{
+ int mnt_fd, err;
+
+ /* set up token delegation mount options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str);
+ if (!ASSERT_OK(err, "fs_cfg_cmds"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str);
+ if (!ASSERT_OK(err, "fs_cfg_maps"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str);
+ if (!ASSERT_OK(err, "fs_cfg_progs"))
+ return err;
+ err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str);
+ if (!ASSERT_OK(err, "fs_cfg_attachs"))
+ return err;
+
+ /* instantiate FS object */
+ err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0);
+ if (err < 0)
+ return -errno;
+
+ /* create O_PATH fd for detached mount */
+ mnt_fd = sys_fsmount(fs_fd, 0, 0);
+ if (err < 0)
+ return -errno;
+
+ return mnt_fd;
+}
+
+/* send FD over Unix domain (AF_UNIX) socket */
+static int sendfd(int sockfd, int fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1] = { fd }, err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fds));
+ memcpy(CMSG_DATA(cmsg), fds, sizeof(fds));
+
+ err = sendmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "sendmsg"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* receive FD over Unix domain (AF_UNIX) socket */
+static int recvfd(int sockfd, int *fd)
+{
+ struct msghdr msg = {};
+ struct cmsghdr *cmsg;
+ int fds[1], err;
+ char iobuf[1];
+ struct iovec io = {
+ .iov_base = iobuf,
+ .iov_len = sizeof(iobuf),
+ };
+ union {
+ char buf[CMSG_SPACE(sizeof(fds))];
+ struct cmsghdr align;
+ } u;
+
+ msg.msg_iov = &io;
+ msg.msg_iovlen = 1;
+ msg.msg_control = u.buf;
+ msg.msg_controllen = sizeof(u.buf);
+
+ err = recvmsg(sockfd, &msg, 0);
+ if (err < 0)
+ err = -errno;
+ if (!ASSERT_EQ(err, 1, "recvmsg"))
+ return -EINVAL;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (!ASSERT_OK_PTR(cmsg, "cmsg_null") ||
+ !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") ||
+ !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") ||
+ !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type"))
+ return -EINVAL;
+
+ memcpy(fds, CMSG_DATA(cmsg), sizeof(fds));
+ *fd = fds[0];
+
+ return 0;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel);
+
+static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback)
+{
+ int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1;
+ struct token_lsm *lsm_skel = NULL;
+
+ /* load and attach LSM "policy" before we go into unpriv userns */
+ lsm_skel = token_lsm__open_and_load();
+ if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ lsm_skel->bss->my_pid = getpid();
+ err = token_lsm__attach(lsm_skel);
+ if (!ASSERT_OK(err, "lsm_skel_attach"))
+ goto cleanup;
+
+ /* setup userns with root mappings */
+ err = create_and_enter_userns();
+ if (!ASSERT_OK(err, "create_and_enter_userns"))
+ goto cleanup;
+
+ /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */
+ err = unshare(CLONE_NEWNS);
+ if (!ASSERT_OK(err, "create_mountns"))
+ goto cleanup;
+
+ err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
+ if (!ASSERT_OK(err, "remount_root"))
+ goto cleanup;
+
+ fs_fd = create_bpffs_fd();
+ if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_maps_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_progs_eperm");
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL);
+ ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm");
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, fs_fd);
+ if (!ASSERT_OK(err, "send_fs_fd"))
+ goto cleanup;
+ zclose(fs_fd);
+
+ /* avoid mucking around with mount namespaces and mounting at
+ * well-known path, just get detach-mounted BPF FS fd back from parent
+ */
+ err = recvfd(sock_fd, &mnt_fd);
+ if (!ASSERT_OK(err, "recv_mnt_fd"))
+ goto cleanup;
+
+ /* try to fspick() BPF FS and try to add some delegation options */
+ fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH);
+ if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* ensure unprivileged child cannot reconfigure to set delegation options */
+ err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any");
+ if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR);
+ if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* create BPF token FD and pass it to parent for some extra checks */
+ token_fd = bpf_token_create(bpffs_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "child_token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = sendfd(sock_fd, token_fd);
+ if (!ASSERT_OK(err, "send_token_fd"))
+ goto cleanup;
+ zclose(token_fd);
+
+ /* do custom test logic with customly set up BPF FS instance */
+ err = callback(bpffs_fd, lsm_skel);
+ if (!ASSERT_OK(err, "test_callback"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ zclose(sock_fd);
+ zclose(mnt_fd);
+ zclose(fs_fd);
+ zclose(bpffs_fd);
+ zclose(token_fd);
+
+ lsm_skel->bss->my_pid = 0;
+ token_lsm__destroy(lsm_skel);
+
+ exit(-err);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd)
+{
+ int fs_fd = -1, mnt_fd = -1, token_fd = -1, err;
+
+ err = recvfd(sock_fd, &fs_fd);
+ if (!ASSERT_OK(err, "recv_bpffs_fd"))
+ goto cleanup;
+
+ mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts);
+ if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+ zclose(fs_fd);
+
+ /* pass BPF FS context object to parent */
+ err = sendfd(sock_fd, mnt_fd);
+ if (!ASSERT_OK(err, "send_mnt_fd"))
+ goto cleanup;
+ zclose(mnt_fd);
+
+ /* receive BPF token FD back from child for some extra tests */
+ err = recvfd(sock_fd, &token_fd);
+ if (!ASSERT_OK(err, "recv_token_fd"))
+ goto cleanup;
+
+ err = wait_for_pid(child_pid);
+ ASSERT_OK(err, "waitpid_child");
+
+cleanup:
+ zclose(sock_fd);
+ zclose(fs_fd);
+ zclose(mnt_fd);
+ zclose(token_fd);
+
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static void subtest_userns(struct bpffs_opts *bpffs_opts,
+ child_callback_fn child_cb)
+{
+ int sock_fds[2] = { -1, -1 };
+ int child_pid = 0, err;
+
+ err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds);
+ if (!ASSERT_OK(err, "socketpair"))
+ goto cleanup;
+
+ child_pid = fork();
+ if (!ASSERT_GE(child_pid, 0, "fork"))
+ goto cleanup;
+
+ if (child_pid == 0) {
+ zclose(sock_fds[0]);
+ return child(sock_fds[1], bpffs_opts, child_cb);
+
+ } else {
+ zclose(sock_fds[1]);
+ return parent(child_pid, bpffs_opts, sock_fds[0]);
+ }
+
+cleanup:
+ zclose(sock_fds[0]);
+ zclose(sock_fds[1]);
+ if (child_pid > 0)
+ (void)kill(child_pid, SIGKILL);
+}
+
+static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, map_opts);
+ int err, token_fd = -1, map_fd = -1;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no token, no CAP_BPF -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* token without CAP_BPF -> fail */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* CAP_BPF without token -> fail */
+ map_opts.map_flags = 0;
+ map_opts.token_fd = 0;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* finally, namespaced CAP_BPF + token -> success */
+ map_opts.map_flags = BPF_F_TOKEN_FD;
+ map_opts.token_fd = token_fd;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts);
+ if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+cleanup:
+ zclose(token_fd);
+ zclose(map_fd);
+ return err;
+}
+
+static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, btf_opts);
+ int err, token_fd = -1, btf_fd = -1;
+ const void *raw_btf_data;
+ struct btf *btf = NULL;
+ __u32 raw_btf_size;
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* while inside non-init userns, we need both a BPF token *and*
+ * CAP_BPF inside current userns to create privileged map; let's test
+ * that neither BPF token alone nor namespaced CAP_BPF is sufficient
+ */
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* setup a trivial BTF data to load to the kernel */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "empty_btf"))
+ goto cleanup;
+
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type");
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data"))
+ goto cleanup;
+
+ /* no token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = 0;
+ btf_opts.token_fd = 0;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* token + no CAP_BPF -> failure */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail"))
+ goto cleanup;
+
+ /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */
+ err = restore_priv_caps(old_caps);
+ if (!ASSERT_OK(err, "restore_caps"))
+ goto cleanup;
+
+ /* token + CAP_BPF -> success */
+ btf_opts.btf_flags = BPF_F_TOKEN_FD;
+ btf_opts.token_fd = token_fd;
+ btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts);
+ if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success"))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ btf__free(btf);
+ zclose(btf_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, prog_opts);
+ int err, token_fd = -1, prog_fd = -1;
+ struct bpf_insn insns[] = {
+ /* bpf_jiffies64() requires CAP_BPF */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ /* bpf_get_current_task() requires CAP_PERFMON */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task),
+ /* r0 = 0; exit; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ size_t insn_cnt = ARRAY_SIZE(insns);
+ __u64 old_caps = 0;
+
+ /* create BPF token from BPF FS mount */
+ token_fd = bpf_token_create(mnt_fd, NULL);
+ if (!ASSERT_GT(token_fd, 0, "token_create")) {
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* validate we can successfully load BPF program with token; this
+ * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF)
+ * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have
+ * BPF token wired properly in a bunch of places in the kernel
+ */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_opts.expected_attach_type = BPF_XDP;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_GT(prog_fd, 0, "prog_fd")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no token + caps -> failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = drop_priv_caps(&old_caps);
+ if (!ASSERT_OK(err, "drop_caps"))
+ goto cleanup;
+
+ /* no caps + token -> failure */
+ prog_opts.prog_flags = BPF_F_TOKEN_FD;
+ prog_opts.token_fd = token_fd;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ /* no caps + no token -> definitely a failure */
+ prog_opts.prog_flags = 0;
+ prog_opts.token_fd = 0;
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL",
+ insns, insn_cnt, &prog_opts);
+ if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) {
+ err = -EPERM;
+ goto cleanup;
+ }
+
+ err = 0;
+cleanup:
+ zclose(prog_fd);
+ zclose(token_fd);
+ return err;
+}
+
+static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_map *skel;
+ int err;
+
+ skel = priv_map__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_map__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_map__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = priv_map__load(skel);
+ priv_map__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct priv_prog *skel;
+ int err;
+
+ skel = priv_prog__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ priv_prog__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* use bpf_token_path to provide BPF FS path */
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_capable() with LSM */
+ lsm_skel->bss->reject_capable = true;
+ lsm_skel->bss->reject_cmd = false;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load"))
+ return -EINVAL;
+
+ /* provide BPF token, but reject bpf_token_cmd() with LSM */
+ lsm_skel->bss->reject_capable = false;
+ lsm_skel->bss->reject_cmd = true;
+ skel = priv_prog__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open"))
+ return -EINVAL;
+ err = priv_prog__load(skel);
+ priv_prog__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command,
+ * which should cause struct_ops application to fail, as BTF won't be uploaded
+ * into the kernel, even if STRUCT_OPS programs themselves are allowed
+ */
+static int validate_struct_ops_load(int mnt_fd, bool expect_success)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ char buf[256];
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd);
+ opts.bpf_token_path = buf;
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (expect_success) {
+ if (!ASSERT_OK(err, "obj_token_path_load"))
+ return -EINVAL;
+ } else /* expect failure */ {
+ if (!ASSERT_ERR(err, "obj_token_path_load"))
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, false /* should fail */);
+}
+
+static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ return validate_struct_ops_load(mnt_fd, true /* should succeed */);
+}
+
+#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH"
+#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs"
+
+static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF
+ * token automatically and implicitly
+ */
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ return -EINVAL;
+
+ /* disable implicit BPF token creation by setting
+ * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail
+ */
+ err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ return -EINVAL;
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) {
+ unsetenv(TOKEN_ENVVAR);
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+ unsetenv(TOKEN_ENVVAR);
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from /sys/fs/bpf mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ return -EINVAL;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, should fail */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ return -EINVAL;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ return -EINVAL;
+
+ return 0;
+}
+
+static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ struct dummy_st_ops_success *skel;
+ int err;
+
+ /* before we mount BPF FS with token delegation, struct_ops skeleton
+ * should fail to load
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) {
+ dummy_st_ops_success__destroy(skel);
+ return -EINVAL;
+ }
+
+ /* mount custom BPF FS over custom location, so libbpf can't create
+ * BPF token implicitly, unless pointed to it through
+ * LIBBPF_BPF_TOKEN_PATH envvar
+ */
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom"))
+ goto err_out;
+ err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH);
+ if (!ASSERT_OK(err, "move_mount_bpffs"))
+ goto err_out;
+
+ /* even though we have BPF FS with delegation, it's not at default
+ * /sys/fs/bpf location, so we still fail to load until envvar is set up
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) {
+ dummy_st_ops_success__destroy(skel);
+ goto err_out;
+ }
+
+ err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/);
+ if (!ASSERT_OK(err, "setenv_token_path"))
+ goto err_out;
+
+ /* now the same struct_ops skeleton should succeed thanks to libppf
+ * creating BPF token from custom mount point
+ */
+ skel = dummy_st_ops_success__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load"))
+ goto err_out;
+
+ dummy_st_ops_success__destroy(skel);
+
+ /* now disable implicit token through empty bpf_token_path, envvar
+ * will be ignored, should fail
+ */
+ opts.bpf_token_path = "";
+ skel = dummy_st_ops_success__open_opts(&opts);
+ if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open"))
+ goto err_out;
+
+ err = dummy_st_ops_success__load(skel);
+ dummy_st_ops_success__destroy(skel);
+ if (!ASSERT_ERR(err, "obj_empty_token_path_load"))
+ goto err_out;
+
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return 0;
+err_out:
+ rmdir(TOKEN_BPFFS_CUSTOM);
+ unsetenv(TOKEN_ENVVAR);
+ return -EINVAL;
+}
+
+#define bit(n) (1ULL << (n))
+
+void test_token(void)
+{
+ if (test__start_subtest("map_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "map_create",
+ .maps_str = "stack",
+ };
+
+ subtest_userns(&opts, userns_map_create);
+ }
+ if (test__start_subtest("btf_token")) {
+ struct bpffs_opts opts = {
+ .cmds = 1ULL << BPF_BTF_LOAD,
+ };
+
+ subtest_userns(&opts, userns_btf_load);
+ }
+ if (test__start_subtest("prog_token")) {
+ struct bpffs_opts opts = {
+ .cmds_str = "PROG_LOAD",
+ .progs_str = "XDP",
+ .attachs_str = "xdp",
+ };
+
+ subtest_userns(&opts, userns_prog_load);
+ }
+ if (test__start_subtest("obj_priv_map")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_MAP_CREATE),
+ .maps = bit(BPF_MAP_TYPE_QUEUE),
+ };
+
+ subtest_userns(&opts, userns_obj_priv_map);
+ }
+ if (test__start_subtest("obj_priv_prog")) {
+ struct bpffs_opts opts = {
+ .cmds = bit(BPF_PROG_LOAD),
+ .progs = bit(BPF_PROG_TYPE_KPROBE),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_prog);
+ }
+ if (test__start_subtest("obj_priv_btf_fail")) {
+ struct bpffs_opts opts = {
+ /* disallow BTF loading */
+ .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_fail);
+ }
+ if (test__start_subtest("obj_priv_btf_success")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_btf_success);
+ }
+ if (test__start_subtest("obj_priv_implicit_token")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token);
+ }
+ if (test__start_subtest("obj_priv_implicit_token_envvar")) {
+ struct bpffs_opts opts = {
+ /* allow BTF loading */
+ .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD),
+ .maps = bit(BPF_MAP_TYPE_STRUCT_OPS),
+ .progs = bit(BPF_PROG_TYPE_STRUCT_OPS),
+ .attachs = ~0ULL,
+ };
+
+ subtest_userns(&opts, userns_obj_priv_implicit_token_envvar);
+ }
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
index fb095e5cd9af..655d69f0ff0b 100644
--- a/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
+++ b/tools/testing/selftests/bpf/prog_tests/tp_attach_query.c
@@ -1,12 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
-void test_tp_attach_query(void)
+void serial_test_tp_attach_query(void)
{
const int num_progs = 3;
int i, j, bytes, efd, err, prog_fd[num_progs], pmu_fd[num_progs];
__u32 duration = 0, info_len, saved_prog_ids[num_progs];
- const char *file = "./test_tracepoint.o";
+ const char *file = "./test_tracepoint.bpf.o";
struct perf_event_query_bpf *query;
struct perf_event_attr attr = {};
struct bpf_object *obj[num_progs];
@@ -16,8 +16,13 @@ void test_tp_attach_query(void)
for (i = 0; i < num_progs; i++)
obj[i] = NULL;
- snprintf(buf, sizeof(buf),
- "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ if (access("/sys/kernel/tracing/trace", F_OK) == 0) {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/tracing/events/sched/sched_switch/id");
+ } else {
+ snprintf(buf, sizeof(buf),
+ "/sys/kernel/debug/tracing/events/sched/sched_switch/id");
+ }
efd = open(buf, O_RDONLY, 0);
if (CHECK(efd < 0, "open", "err %d errno %d\n", efd, errno))
return;
@@ -35,7 +40,7 @@ void test_tp_attach_query(void)
query = malloc(sizeof(*query) + sizeof(__u32) * num_progs);
for (i = 0; i < num_progs; i++) {
- err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj[i],
&prog_fd[i]);
if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno))
goto cleanup1;
@@ -45,8 +50,9 @@ void test_tp_attach_query(void)
prog_info.xlated_prog_len = 0;
prog_info.nr_map_ids = 0;
info_len = sizeof(prog_info);
- err = bpf_obj_get_info_by_fd(prog_fd[i], &prog_info, &info_len);
- if (CHECK(err, "bpf_obj_get_info_by_fd", "err %d errno %d\n",
+ err = bpf_prog_get_info_by_fd(prog_fd[i], &prog_info,
+ &info_len);
+ if (CHECK(err, "bpf_prog_get_info_by_fd", "err %d errno %d\n",
err, errno))
goto cleanup1;
saved_prog_ids[i] = prog_info.id;
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_ext.c b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
new file mode 100644
index 000000000000..aabdff7bea3e
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_ext.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <sys/stat.h>
+#include <linux/sched.h>
+#include <sys/syscall.h>
+
+#include "test_pkt_md_access.skel.h"
+#include "test_trace_ext.skel.h"
+#include "test_trace_ext_tracing.skel.h"
+
+static __u32 duration;
+
+void test_trace_ext(void)
+{
+ struct test_pkt_md_access *skel_pkt = NULL;
+ struct test_trace_ext_tracing *skel_trace = NULL;
+ struct test_trace_ext_tracing__bss *bss_trace;
+ struct test_trace_ext *skel_ext = NULL;
+ struct test_trace_ext__bss *bss_ext;
+ int err, pkt_fd, ext_fd;
+ struct bpf_program *prog;
+ char buf[100];
+ __u64 len;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ /* open/load/attach test_pkt_md_access */
+ skel_pkt = test_pkt_md_access__open_and_load();
+ if (CHECK(!skel_pkt, "setup", "classifier/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ err = test_pkt_md_access__attach(skel_pkt);
+ if (CHECK(err, "setup", "classifier/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_pkt->progs.test_pkt_md_access;
+ pkt_fd = bpf_program__fd(prog);
+
+ /* open extension */
+ skel_ext = test_trace_ext__open();
+ if (CHECK(!skel_ext, "setup", "freplace/test_pkt_md_access open failed\n"))
+ goto cleanup;
+
+ /* set extension's attach target - test_pkt_md_access */
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ bpf_program__set_attach_target(prog, pkt_fd, "test_pkt_md_access");
+
+ /* load/attach extension */
+ err = test_trace_ext__load(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access load failed\n")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext__attach(skel_ext);
+ if (CHECK(err, "setup", "freplace/test_pkt_md_access attach failed: %d\n", err))
+ goto cleanup;
+
+ prog = skel_ext->progs.test_pkt_md_access_new;
+ ext_fd = bpf_program__fd(prog);
+
+ /* open tracing */
+ skel_trace = test_trace_ext_tracing__open();
+ if (CHECK(!skel_trace, "setup", "tracing/test_pkt_md_access_new open failed\n"))
+ goto cleanup;
+
+ /* set tracing's attach target - fentry */
+ prog = skel_trace->progs.fentry;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* set tracing's attach target - fexit */
+ prog = skel_trace->progs.fexit;
+ bpf_program__set_attach_target(prog, ext_fd, "test_pkt_md_access_new");
+
+ /* load/attach tracing */
+ err = test_trace_ext_tracing__load(skel_trace);
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new load")) {
+ libbpf_strerror(err, buf, sizeof(buf));
+ fprintf(stderr, "%s\n", buf);
+ goto cleanup;
+ }
+
+ err = test_trace_ext_tracing__attach(skel_trace);
+ if (!ASSERT_OK(err, "tracing/test_pkt_md_access_new attach"))
+ goto cleanup;
+
+ /* trigger the test */
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+ ASSERT_OK(err, "test_run_opts err");
+ ASSERT_OK(topts.retval, "test_run_opts retval");
+
+ bss_ext = skel_ext->bss;
+ bss_trace = skel_trace->bss;
+
+ len = bss_ext->ext_called;
+
+ ASSERT_NEQ(bss_ext->ext_called, 0,
+ "failed to trigger freplace/test_pkt_md_access");
+ ASSERT_EQ(bss_trace->fentry_called, len,
+ "failed to trigger fentry/test_pkt_md_access_new");
+ ASSERT_EQ(bss_trace->fexit_called, len,
+ "failed to trigger fexit/test_pkt_md_access_new");
+
+cleanup:
+ test_trace_ext_tracing__destroy(skel_trace);
+ test_trace_ext__destroy(skel_ext);
+ test_pkt_md_access__destroy(skel_pkt);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_printk.c b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
new file mode 100644
index 000000000000..7b9124d506a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_printk.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+
+#include "trace_printk.lskel.h"
+
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG "testing,testing"
+
+void serial_test_trace_printk(void)
+{
+ struct trace_printk_lskel__bss *bss;
+ int err = 0, iter = 0, found = 0;
+ struct trace_printk_lskel *skel;
+ char *buf = NULL;
+ FILE *fp = NULL;
+ size_t buflen;
+
+ skel = trace_printk_lskel__open();
+ if (!ASSERT_OK_PTR(skel, "trace_printk__open"))
+ return;
+
+ ASSERT_EQ(skel->rodata->fmt[0], 'T', "skel->rodata->fmt[0]");
+ skel->rodata->fmt[0] = 't';
+
+ err = trace_printk_lskel__load(skel);
+ if (!ASSERT_OK(err, "trace_printk__load"))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = trace_printk_lskel__attach(skel);
+ if (!ASSERT_OK(err, "trace_printk__attach"))
+ goto cleanup;
+
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
+ goto cleanup;
+
+ /* We do not want to wait forever if this test fails... */
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ /* wait for tracepoint to trigger */
+ usleep(1);
+ trace_printk_lskel__detach(skel);
+
+ if (!ASSERT_GT(bss->trace_printk_ran, 0, "bss->trace_printk_ran"))
+ goto cleanup;
+
+ if (!ASSERT_GT(bss->trace_printk_ret, 0, "bss->trace_printk_ret"))
+ goto cleanup;
+
+ /* verify our search string is in the trace buffer */
+ while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+ if (strstr(buf, SEARCHMSG) != NULL)
+ found++;
+ if (found == bss->trace_printk_ran)
+ break;
+ if (++iter > 1000)
+ break;
+ }
+
+ if (!ASSERT_EQ(found, bss->trace_printk_ran, "found"))
+ goto cleanup;
+
+cleanup:
+ trace_printk_lskel__destroy(skel);
+ free(buf);
+ if (fp)
+ fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
new file mode 100644
index 000000000000..44ea2fd88f4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/trace_vprintk.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
+#define SEARCHMSG "1,2,3,4,5,6,7,8,9,10"
+
+void serial_test_trace_vprintk(void)
+{
+ struct trace_vprintk_lskel__bss *bss;
+ int err = 0, iter = 0, found = 0;
+ struct trace_vprintk_lskel *skel;
+ char *buf = NULL;
+ FILE *fp = NULL;
+ size_t buflen;
+
+ skel = trace_vprintk_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+ goto cleanup;
+
+ bss = skel->bss;
+
+ err = trace_vprintk_lskel__attach(skel);
+ if (!ASSERT_OK(err, "trace_vprintk__attach"))
+ goto cleanup;
+
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ fp = fopen(TRACEFS_PIPE, "r");
+ else
+ fp = fopen(DEBUGFS_PIPE, "r");
+ if (!ASSERT_OK_PTR(fp, "fopen(TRACE_PIPE)"))
+ goto cleanup;
+
+ /* We do not want to wait forever if this test fails... */
+ fcntl(fileno(fp), F_SETFL, O_NONBLOCK);
+
+ /* wait for tracepoint to trigger */
+ usleep(1);
+ trace_vprintk_lskel__detach(skel);
+
+ if (!ASSERT_GT(bss->trace_vprintk_ran, 0, "bss->trace_vprintk_ran"))
+ goto cleanup;
+
+ if (!ASSERT_GT(bss->trace_vprintk_ret, 0, "bss->trace_vprintk_ret"))
+ goto cleanup;
+
+ /* verify our search string is in the trace buffer */
+ while (getline(&buf, &buflen, fp) >= 0 || errno == EAGAIN) {
+ if (strstr(buf, SEARCHMSG) != NULL)
+ found++;
+ if (found == bss->trace_vprintk_ran)
+ break;
+ if (++iter > 1000)
+ break;
+ }
+
+ if (!ASSERT_EQ(found, bss->trace_vprintk_ran, "found"))
+ goto cleanup;
+
+ if (!ASSERT_LT(bss->null_data_vprintk_ret, 0, "bss->null_data_vprintk_ret"))
+ goto cleanup;
+
+cleanup:
+ trace_vprintk_lskel__destroy(skel);
+ free(buf);
+ if (fp)
+ fclose(fp);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
new file mode 100644
index 000000000000..a222df765bc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include "tracing_failure.skel.h"
+
+static void test_bpf_spin_lock(bool is_spin_lock)
+{
+ struct tracing_failure *skel;
+ int err;
+
+ skel = tracing_failure__open();
+ if (!ASSERT_OK_PTR(skel, "tracing_failure__open"))
+ return;
+
+ if (is_spin_lock)
+ bpf_program__set_autoload(skel->progs.test_spin_lock, true);
+ else
+ bpf_program__set_autoload(skel->progs.test_spin_unlock, true);
+
+ err = tracing_failure__load(skel);
+ if (!ASSERT_OK(err, "tracing_failure__load"))
+ goto out;
+
+ err = tracing_failure__attach(skel);
+ ASSERT_ERR(err, "tracing_failure__attach");
+
+out:
+ tracing_failure__destroy(skel);
+}
+
+void test_tracing_failure(void)
+{
+ if (test__start_subtest("bpf_spin_lock"))
+ test_bpf_spin_lock(true);
+ if (test__start_subtest("bpf_spin_unlock"))
+ test_bpf_spin_lock(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_struct.c b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
new file mode 100644
index 000000000000..fe0fb0c9849a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/tracing_struct.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include "tracing_struct.skel.h"
+
+static void test_fentry(void)
+{
+ struct tracing_struct *skel;
+ int err;
+
+ skel = tracing_struct__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "tracing_struct__open_and_load"))
+ return;
+
+ err = tracing_struct__attach(skel);
+ if (!ASSERT_OK(err, "tracing_struct__attach"))
+ goto destroy_skel;
+
+ ASSERT_OK(trigger_module_test_read(256), "trigger_read");
+
+ ASSERT_EQ(skel->bss->t1_a_a, 2, "t1:a.a");
+ ASSERT_EQ(skel->bss->t1_a_b, 3, "t1:a.b");
+ ASSERT_EQ(skel->bss->t1_b, 1, "t1:b");
+ ASSERT_EQ(skel->bss->t1_c, 4, "t1:c");
+
+ ASSERT_EQ(skel->bss->t1_nregs, 4, "t1 nregs");
+ ASSERT_EQ(skel->bss->t1_reg0, 2, "t1 reg0");
+ ASSERT_EQ(skel->bss->t1_reg1, 3, "t1 reg1");
+ ASSERT_EQ(skel->bss->t1_reg2, 1, "t1 reg2");
+ ASSERT_EQ(skel->bss->t1_reg3, 4, "t1 reg3");
+ ASSERT_EQ(skel->bss->t1_ret, 10, "t1 ret");
+
+ ASSERT_EQ(skel->bss->t2_a, 1, "t2:a");
+ ASSERT_EQ(skel->bss->t2_b_a, 2, "t2:b.a");
+ ASSERT_EQ(skel->bss->t2_b_b, 3, "t2:b.b");
+ ASSERT_EQ(skel->bss->t2_c, 4, "t2:c");
+ ASSERT_EQ(skel->bss->t2_ret, 10, "t2 ret");
+
+ ASSERT_EQ(skel->bss->t3_a, 1, "t3:a");
+ ASSERT_EQ(skel->bss->t3_b, 4, "t3:b");
+ ASSERT_EQ(skel->bss->t3_c_a, 2, "t3:c.a");
+ ASSERT_EQ(skel->bss->t3_c_b, 3, "t3:c.b");
+ ASSERT_EQ(skel->bss->t3_ret, 10, "t3 ret");
+
+ ASSERT_EQ(skel->bss->t4_a_a, 10, "t4:a.a");
+ ASSERT_EQ(skel->bss->t4_b, 1, "t4:b");
+ ASSERT_EQ(skel->bss->t4_c, 2, "t4:c");
+ ASSERT_EQ(skel->bss->t4_d, 3, "t4:d");
+ ASSERT_EQ(skel->bss->t4_e_a, 2, "t4:e.a");
+ ASSERT_EQ(skel->bss->t4_e_b, 3, "t4:e.b");
+ ASSERT_EQ(skel->bss->t4_ret, 21, "t4 ret");
+
+ ASSERT_EQ(skel->bss->t5_ret, 1, "t5 ret");
+
+ ASSERT_EQ(skel->bss->t6, 1, "t6 ret");
+
+ ASSERT_EQ(skel->bss->t7_a, 16, "t7:a");
+ ASSERT_EQ(skel->bss->t7_b, 17, "t7:b");
+ ASSERT_EQ(skel->bss->t7_c, 18, "t7:c");
+ ASSERT_EQ(skel->bss->t7_d, 19, "t7:d");
+ ASSERT_EQ(skel->bss->t7_e, 20, "t7:e");
+ ASSERT_EQ(skel->bss->t7_f_a, 21, "t7:f.a");
+ ASSERT_EQ(skel->bss->t7_f_b, 22, "t7:f.b");
+ ASSERT_EQ(skel->bss->t7_ret, 133, "t7 ret");
+
+ ASSERT_EQ(skel->bss->t8_a, 16, "t8:a");
+ ASSERT_EQ(skel->bss->t8_b, 17, "t8:b");
+ ASSERT_EQ(skel->bss->t8_c, 18, "t8:c");
+ ASSERT_EQ(skel->bss->t8_d, 19, "t8:d");
+ ASSERT_EQ(skel->bss->t8_e, 20, "t8:e");
+ ASSERT_EQ(skel->bss->t8_f_a, 21, "t8:f.a");
+ ASSERT_EQ(skel->bss->t8_f_b, 22, "t8:f.b");
+ ASSERT_EQ(skel->bss->t8_g, 23, "t8:g");
+ ASSERT_EQ(skel->bss->t8_ret, 156, "t8 ret");
+
+ tracing_struct__detach(skel);
+destroy_skel:
+ tracing_struct__destroy(skel);
+}
+
+void test_tracing_struct(void)
+{
+ test_fentry();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
index 781c8d11604b..6cd7349d4a2b 100644
--- a/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
+++ b/tools/testing/selftests/bpf/prog_tests/trampoline_count.c
@@ -1,123 +1,100 @@
// SPDX-License-Identifier: GPL-2.0-only
#define _GNU_SOURCE
-#include <sched.h>
-#include <sys/prctl.h>
#include <test_progs.h>
-#define MAX_TRAMP_PROGS 40
-
struct inst {
struct bpf_object *obj;
- struct bpf_link *link_fentry;
- struct bpf_link *link_fexit;
+ struct bpf_link *link;
};
-static int test_task_rename(void)
-{
- int fd, duration = 0, err;
- char buf[] = "test_overhead";
-
- fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
- if (CHECK(fd < 0, "open /proc", "err %d", errno))
- return -1;
- err = write(fd, buf, sizeof(buf));
- if (err < 0) {
- CHECK(err < 0, "task rename", "err %d", errno);
- close(fd);
- return -1;
- }
- close(fd);
- return 0;
-}
-
-static struct bpf_link *load(struct bpf_object *obj, const char *name)
+static struct bpf_program *load_prog(char *file, char *name, struct inst *inst)
{
+ struct bpf_object *obj;
struct bpf_program *prog;
- int duration = 0;
+ int err;
+
+ obj = bpf_object__open_file(file, NULL);
+ if (!ASSERT_OK_PTR(obj, "obj_open_file"))
+ return NULL;
- prog = bpf_object__find_program_by_title(obj, name);
- if (CHECK(!prog, "find_probe", "prog '%s' not found\n", name))
- return ERR_PTR(-EINVAL);
- return bpf_program__attach_trace(prog);
+ inst->obj = obj;
+
+ err = bpf_object__load(obj);
+ if (!ASSERT_OK(err, "obj_load"))
+ return NULL;
+
+ prog = bpf_object__find_program_by_name(obj, name);
+ if (!ASSERT_OK_PTR(prog, "obj_find_prog"))
+ return NULL;
+
+ return prog;
}
-void test_trampoline_count(void)
+/* TODO: use different target function to run in concurrent mode */
+void serial_test_trampoline_count(void)
{
- const char *fentry_name = "fentry/__set_task_comm";
- const char *fexit_name = "fexit/__set_task_comm";
- const char *object = "test_trampoline_count.o";
- struct inst inst[MAX_TRAMP_PROGS] = {};
- int err, i = 0, duration = 0;
- struct bpf_object *obj;
+ char *file = "test_trampoline_count.bpf.o";
+ char *const progs[] = { "fentry_test", "fmod_ret_test", "fexit_test" };
+ int bpf_max_tramp_links, err, i, prog_fd;
+ struct bpf_program *prog;
struct bpf_link *link;
- char comm[16] = {};
-
- /* attach 'allowed' 40 trampoline programs */
- for (i = 0; i < MAX_TRAMP_PROGS; i++) {
- obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
- obj = NULL;
+ struct inst *inst;
+ LIBBPF_OPTS(bpf_test_run_opts, opts);
+
+ bpf_max_tramp_links = get_bpf_max_tramp_links();
+ if (!ASSERT_GE(bpf_max_tramp_links, 1, "bpf_max_tramp_links"))
+ return;
+ inst = calloc(bpf_max_tramp_links + 1, sizeof(*inst));
+ if (!ASSERT_OK_PTR(inst, "inst"))
+ return;
+
+ /* attach 'allowed' trampoline programs */
+ for (i = 0; i < bpf_max_tramp_links; i++) {
+ prog = load_prog(file, progs[i % ARRAY_SIZE(progs)], &inst[i]);
+ if (!prog)
goto cleanup;
- }
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
+ link = bpf_program__attach(prog);
+ if (!ASSERT_OK_PTR(link, "attach_prog"))
goto cleanup;
- inst[i].obj = obj;
- obj = NULL;
-
- if (rand() % 2) {
- link = load(inst[i].obj, fentry_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
- inst[i].link_fentry = link;
- } else {
- link = load(inst[i].obj, fexit_name);
- if (CHECK(IS_ERR(link), "attach prog", "err %ld\n", PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
- inst[i].link_fexit = link;
- }
+
+ inst[i].link = link;
}
/* and try 1 extra.. */
- obj = bpf_object__open_file(object, NULL);
- if (CHECK(IS_ERR(obj), "obj_open_file", "err %ld\n", PTR_ERR(obj))) {
- obj = NULL;
+ prog = load_prog(file, "fmod_ret_test", &inst[i]);
+ if (!prog)
goto cleanup;
- }
-
- err = bpf_object__load(obj);
- if (CHECK(err, "obj_load", "err %d\n", err))
- goto cleanup_extra;
/* ..that needs to fail */
- link = load(obj, fentry_name);
- if (CHECK(!IS_ERR(link), "cannot attach over the limit", "err %ld\n", PTR_ERR(link))) {
- bpf_link__destroy(link);
- goto cleanup_extra;
+ link = bpf_program__attach(prog);
+ if (!ASSERT_ERR_PTR(link, "attach_prog")) {
+ inst[i].link = link;
+ goto cleanup;
}
/* with E2BIG error */
- CHECK(PTR_ERR(link) != -E2BIG, "proper error check", "err %ld\n", PTR_ERR(link));
+ if (!ASSERT_EQ(libbpf_get_error(link), -E2BIG, "E2BIG"))
+ goto cleanup;
+ if (!ASSERT_EQ(link, NULL, "ptr_is_null"))
+ goto cleanup;
+
+ /* and finally execute the probe */
+ prog_fd = bpf_program__fd(prog);
+ if (!ASSERT_GE(prog_fd, 0, "bpf_program__fd"))
+ goto cleanup;
+
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ if (!ASSERT_OK(err, "bpf_prog_test_run_opts"))
+ goto cleanup;
- /* and finaly execute the probe */
- if (CHECK_FAIL(prctl(PR_GET_NAME, comm, 0L, 0L, 0L)))
- goto cleanup_extra;
- CHECK_FAIL(test_task_rename());
- CHECK_FAIL(prctl(PR_SET_NAME, comm, 0L, 0L, 0L));
+ ASSERT_EQ(opts.retval & 0xffff, 33, "bpf_modify_return_test.result");
+ ASSERT_EQ(opts.retval >> 16, 2, "bpf_modify_return_test.side_effect");
-cleanup_extra:
- bpf_object__close(obj);
cleanup:
- if (i >= MAX_TRAMP_PROGS)
- i = MAX_TRAMP_PROGS - 1;
for (; i >= 0; i--) {
- bpf_link__destroy(inst[i].link_fentry);
- bpf_link__destroy(inst[i].link_fexit);
+ bpf_link__destroy(inst[i].link);
bpf_object__close(inst[i].obj);
}
+ free(inst);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/type_cast.c b/tools/testing/selftests/bpf/prog_tests/type_cast.c
new file mode 100644
index 000000000000..9317d5fa2635
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/type_cast.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "type_cast.skel.h"
+
+static void test_xdp(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+ char buf[128];
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_xdp, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_xdp);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp test_run retval");
+
+ ASSERT_EQ(skel->bss->ifindex, 1, "xdp_md ifindex");
+ ASSERT_EQ(skel->bss->ifindex, skel->bss->ingress_ifindex, "xdp_md ingress_ifindex");
+ ASSERT_STREQ(skel->bss->name, "lo", "xdp_md name");
+ ASSERT_NEQ(skel->bss->inum, 0, "xdp_md inum");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static void test_tc(void)
+{
+ struct type_cast *skel;
+ int err, prog_fd;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1,
+ );
+
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_program__set_autoload(skel->progs.md_skb, true);
+ err = type_cast__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto out;
+
+ prog_fd = bpf_program__fd(skel->progs.md_skb);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, 0, "tc test_run retval");
+
+ ASSERT_EQ(skel->bss->meta_len, 0, "skb meta_len");
+ ASSERT_EQ(skel->bss->frag0_len, 0, "skb frag0_len");
+ ASSERT_NEQ(skel->bss->kskb_len, 0, "skb len");
+ ASSERT_NEQ(skel->bss->kskb2_len, 0, "skb2 len");
+ ASSERT_EQ(skel->bss->kskb_len, skel->bss->kskb2_len, "skb len compare");
+
+out:
+ type_cast__destroy(skel);
+}
+
+static const char * const negative_tests[] = {
+ "untrusted_ptr",
+ "kctx_u64",
+};
+
+static void test_negative(void)
+{
+ struct bpf_program *prog;
+ struct type_cast *skel;
+ int i, err;
+
+ for (i = 0; i < ARRAY_SIZE(negative_tests); i++) {
+ skel = type_cast__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ prog = bpf_object__find_program_by_name(skel->obj, negative_tests[i]);
+ if (!ASSERT_OK_PTR(prog, "bpf_object__find_program_by_name"))
+ goto out;
+ bpf_program__set_autoload(prog, true);
+ err = type_cast__load(skel);
+ ASSERT_ERR(err, "skel_load");
+out:
+ type_cast__destroy(skel);
+ }
+}
+
+void test_type_cast(void)
+{
+ if (test__start_subtest("xdp"))
+ test_xdp();
+ if (test__start_subtest("tc"))
+ test_tc();
+ if (test__start_subtest("negative"))
+ test_negative();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/udp_limit.c b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
new file mode 100644
index 000000000000..2643d896ddae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/udp_limit.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "udp_limit.skel.h"
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+void test_udp_limit(void)
+{
+ struct udp_limit *skel;
+ int fd1 = -1, fd2 = -1;
+ int cgroup_fd;
+
+ cgroup_fd = test__join_cgroup("/udp_limit");
+ if (!ASSERT_GE(cgroup_fd, 0, "cg-join"))
+ return;
+
+ skel = udp_limit__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel-load"))
+ goto close_cgroup_fd;
+
+ skel->links.sock = bpf_program__attach_cgroup(skel->progs.sock, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock, "cg_attach_sock"))
+ goto close_skeleton;
+ skel->links.sock_release = bpf_program__attach_cgroup(skel->progs.sock_release, cgroup_fd);
+ if (!ASSERT_OK_PTR(skel->links.sock_release, "cg_attach_sock_release"))
+ goto close_skeleton;
+
+ /* BPF program enforces a single UDP socket per cgroup,
+ * verify that.
+ */
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_GE(fd1, 0, "socket(fd1)"))
+ goto close_skeleton;
+
+ fd2 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_LT(fd2, 0, "socket(fd2)"))
+ goto close_skeleton;
+
+ /* We can reopen again after close. */
+ close(fd1);
+ fd1 = -1;
+
+ fd1 = socket(AF_INET, SOCK_DGRAM, 0);
+ if (!ASSERT_GE(fd1, 0, "socket(fd1-again)"))
+ goto close_skeleton;
+
+ /* Make sure the program was invoked the expected
+ * number of times:
+ * - open fd1 - BPF_CGROUP_INET_SOCK_CREATE
+ * - attempt to openfd2 - BPF_CGROUP_INET_SOCK_CREATE
+ * - close fd1 - BPF_CGROUP_INET_SOCK_RELEASE
+ * - open fd1 again - BPF_CGROUP_INET_SOCK_CREATE
+ */
+ if (!ASSERT_EQ(skel->bss->invocations, 4, "bss-invocations"))
+ goto close_skeleton;
+
+ /* We should still have a single socket in use */
+ if (!ASSERT_EQ(skel->bss->in_use, 1, "bss-in_use"))
+ goto close_skeleton;
+
+close_skeleton:
+ if (fd1 >= 0)
+ close(fd1);
+ if (fd2 >= 0)
+ close(fd2);
+ udp_limit__destroy(skel);
+close_cgroup_fd:
+ close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uninit_stack.c b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
new file mode 100644
index 000000000000..e64c71948491
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uninit_stack.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "uninit_stack.skel.h"
+
+void test_uninit_stack(void)
+{
+ RUN_TESTS(uninit_stack);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..0adf8d9475cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/unpriv_bpf_disabled.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_unpriv_bpf_disabled.skel.h"
+
+#include "cap_helpers.h"
+
+/* Using CAP_LAST_CAP is risky here, since it can get pulled in from
+ * an old /usr/include/linux/capability.h and be < CAP_BPF; as a result
+ * CAP_BPF would not be included in ALL_CAPS. Instead use CAP_BPF as
+ * we know its value is correct since it is explicitly defined in
+ * cap_helpers.h.
+ */
+#define ALL_CAPS ((2ULL << CAP_BPF) - 1)
+
+#define PINPATH "/sys/fs/bpf/unpriv_bpf_disabled_"
+#define NUM_MAPS 7
+
+static __u32 got_perfbuf_val;
+static __u32 got_ringbuf_val;
+
+static int process_ringbuf(void *ctx, void *data, size_t len)
+{
+ if (ASSERT_EQ(len, sizeof(__u32), "ringbuf_size_valid"))
+ got_ringbuf_val = *(__u32 *)data;
+ return 0;
+}
+
+static void process_perfbuf(void *ctx, int cpu, void *data, __u32 len)
+{
+ if (ASSERT_EQ(len, sizeof(__u32), "perfbuf_size_valid"))
+ got_perfbuf_val = *(__u32 *)data;
+}
+
+static int sysctl_set(const char *sysctl_path, char *old_val, const char *new_val)
+{
+ int ret = 0;
+ FILE *fp;
+
+ fp = fopen(sysctl_path, "r+");
+ if (!fp)
+ return -errno;
+ if (old_val && fscanf(fp, "%s", old_val) <= 0) {
+ ret = -ENOENT;
+ } else if (!old_val || strcmp(old_val, new_val) != 0) {
+ fseek(fp, 0, SEEK_SET);
+ if (fprintf(fp, "%s", new_val) < 0)
+ ret = -errno;
+ }
+ fclose(fp);
+
+ return ret;
+}
+
+static void test_unpriv_bpf_disabled_positive(struct test_unpriv_bpf_disabled *skel,
+ __u32 prog_id, int prog_fd, int perf_fd,
+ char **map_paths, int *map_fds)
+{
+ struct perf_buffer *perfbuf = NULL;
+ struct ring_buffer *ringbuf = NULL;
+ int i, nr_cpus, link_fd = -1;
+
+ nr_cpus = bpf_num_possible_cpus();
+
+ skel->bss->perfbuf_val = 1;
+ skel->bss->ringbuf_val = 2;
+
+ /* Positive tests for unprivileged BPF disabled. Verify we can
+ * - retrieve and interact with pinned maps;
+ * - set up and interact with perf buffer;
+ * - set up and interact with ring buffer;
+ * - create a link
+ */
+ perfbuf = perf_buffer__new(bpf_map__fd(skel->maps.perfbuf), 8, process_perfbuf, NULL, NULL,
+ NULL);
+ if (!ASSERT_OK_PTR(perfbuf, "perf_buffer__new"))
+ goto cleanup;
+
+ ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf), process_ringbuf, NULL, NULL);
+ if (!ASSERT_OK_PTR(ringbuf, "ring_buffer__new"))
+ goto cleanup;
+
+ /* trigger & validate perf event, ringbuf output */
+ usleep(1);
+
+ ASSERT_GT(perf_buffer__poll(perfbuf, 100), -1, "perf_buffer__poll");
+ ASSERT_EQ(got_perfbuf_val, skel->bss->perfbuf_val, "check_perfbuf_val");
+ ASSERT_EQ(ring_buffer__consume(ringbuf), 1, "ring_buffer__consume");
+ ASSERT_EQ(got_ringbuf_val, skel->bss->ringbuf_val, "check_ringbuf_val");
+
+ for (i = 0; i < NUM_MAPS; i++) {
+ map_fds[i] = bpf_obj_get(map_paths[i]);
+ if (!ASSERT_GT(map_fds[i], -1, "obj_get"))
+ goto cleanup;
+ }
+
+ for (i = 0; i < NUM_MAPS; i++) {
+ bool prog_array = strstr(map_paths[i], "prog_array") != NULL;
+ bool array = strstr(map_paths[i], "array") != NULL;
+ bool buf = strstr(map_paths[i], "buf") != NULL;
+ __u32 key = 0, vals[nr_cpus], lookup_vals[nr_cpus];
+ __u32 expected_val = 1;
+ int j;
+
+ /* skip ringbuf, perfbuf */
+ if (buf)
+ continue;
+
+ for (j = 0; j < nr_cpus; j++)
+ vals[j] = expected_val;
+
+ if (prog_array) {
+ /* need valid prog array value */
+ vals[0] = prog_fd;
+ /* prog array lookup returns prog id, not fd */
+ expected_val = prog_id;
+ }
+ ASSERT_OK(bpf_map_update_elem(map_fds[i], &key, vals, 0), "map_update_elem");
+ ASSERT_OK(bpf_map_lookup_elem(map_fds[i], &key, &lookup_vals), "map_lookup_elem");
+ ASSERT_EQ(lookup_vals[0], expected_val, "map_lookup_elem_values");
+ if (!array)
+ ASSERT_OK(bpf_map_delete_elem(map_fds[i], &key), "map_delete_elem");
+ }
+
+ link_fd = bpf_link_create(bpf_program__fd(skel->progs.handle_perf_event), perf_fd,
+ BPF_PERF_EVENT, NULL);
+ ASSERT_GT(link_fd, 0, "link_create");
+
+cleanup:
+ if (link_fd)
+ close(link_fd);
+ if (perfbuf)
+ perf_buffer__free(perfbuf);
+ if (ringbuf)
+ ring_buffer__free(ringbuf);
+}
+
+static void test_unpriv_bpf_disabled_negative(struct test_unpriv_bpf_disabled *skel,
+ __u32 prog_id, int prog_fd, int perf_fd,
+ char **map_paths, int *map_fds)
+{
+ const struct bpf_insn prog_insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ const size_t prog_insn_cnt = sizeof(prog_insns) / sizeof(struct bpf_insn);
+ LIBBPF_OPTS(bpf_prog_load_opts, load_opts);
+ struct bpf_map_info map_info = {};
+ __u32 map_info_len = sizeof(map_info);
+ struct bpf_link_info link_info = {};
+ __u32 link_info_len = sizeof(link_info);
+ struct btf *btf = NULL;
+ __u32 attach_flags = 0;
+ __u32 prog_ids[3] = {};
+ __u32 prog_cnt = 3;
+ __u32 next;
+ int i;
+
+ /* Negative tests for unprivileged BPF disabled. Verify we cannot
+ * - load BPF programs;
+ * - create BPF maps;
+ * - get a prog/map/link fd by id;
+ * - get next prog/map/link id
+ * - query prog
+ * - BTF load
+ */
+ ASSERT_EQ(bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "simple_prog", "GPL",
+ prog_insns, prog_insn_cnt, &load_opts),
+ -EPERM, "prog_load_fails");
+
+ /* some map types require particular correct parameters which could be
+ * sanity-checked before enforcing -EPERM, so only validate that
+ * the simple ARRAY and HASH maps are failing with -EPERM
+ */
+ for (i = BPF_MAP_TYPE_HASH; i <= BPF_MAP_TYPE_ARRAY; i++)
+ ASSERT_EQ(bpf_map_create(i, NULL, sizeof(int), sizeof(int), 1, NULL),
+ -EPERM, "map_create_fails");
+
+ ASSERT_EQ(bpf_prog_get_fd_by_id(prog_id), -EPERM, "prog_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_prog_get_next_id(prog_id, &next), -EPERM, "prog_get_next_id_fails");
+ ASSERT_EQ(bpf_prog_get_next_id(0, &next), -EPERM, "prog_get_next_id_fails");
+
+ if (ASSERT_OK(bpf_map_get_info_by_fd(map_fds[0], &map_info, &map_info_len),
+ "obj_get_info_by_fd")) {
+ ASSERT_EQ(bpf_map_get_fd_by_id(map_info.id), -EPERM, "map_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_map_get_next_id(map_info.id, &next), -EPERM,
+ "map_get_next_id_fails");
+ }
+ ASSERT_EQ(bpf_map_get_next_id(0, &next), -EPERM, "map_get_next_id_fails");
+
+ if (ASSERT_OK(bpf_link_get_info_by_fd(bpf_link__fd(skel->links.sys_nanosleep_enter),
+ &link_info, &link_info_len),
+ "obj_get_info_by_fd")) {
+ ASSERT_EQ(bpf_link_get_fd_by_id(link_info.id), -EPERM, "link_get_fd_by_id_fails");
+ ASSERT_EQ(bpf_link_get_next_id(link_info.id, &next), -EPERM,
+ "link_get_next_id_fails");
+ }
+ ASSERT_EQ(bpf_link_get_next_id(0, &next), -EPERM, "link_get_next_id_fails");
+
+ ASSERT_EQ(bpf_prog_query(prog_fd, BPF_TRACE_FENTRY, 0, &attach_flags, prog_ids,
+ &prog_cnt), -EPERM, "prog_query_fails");
+
+ btf = btf__new_empty();
+ if (ASSERT_OK_PTR(btf, "empty_btf") &&
+ ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "unpriv_int_type")) {
+ const void *raw_btf_data;
+ __u32 raw_btf_size;
+
+ raw_btf_data = btf__raw_data(btf, &raw_btf_size);
+ if (ASSERT_OK_PTR(raw_btf_data, "raw_btf_data_good"))
+ ASSERT_EQ(bpf_btf_load(raw_btf_data, raw_btf_size, NULL), -EPERM,
+ "bpf_btf_load_fails");
+ }
+ btf__free(btf);
+}
+
+void test_unpriv_bpf_disabled(void)
+{
+ char *map_paths[NUM_MAPS] = { PINPATH "array",
+ PINPATH "percpu_array",
+ PINPATH "hash",
+ PINPATH "percpu_hash",
+ PINPATH "perfbuf",
+ PINPATH "ringbuf",
+ PINPATH "prog_array" };
+ int map_fds[NUM_MAPS];
+ struct test_unpriv_bpf_disabled *skel;
+ char unprivileged_bpf_disabled_orig[32] = {};
+ char perf_event_paranoid_orig[32] = {};
+ struct bpf_prog_info prog_info = {};
+ __u32 prog_info_len = sizeof(prog_info);
+ struct perf_event_attr attr = {};
+ int prog_fd, perf_fd = -1, i, ret;
+ __u64 save_caps = 0;
+ __u32 prog_id;
+
+ skel = test_unpriv_bpf_disabled__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ skel->bss->test_pid = getpid();
+
+ map_fds[0] = bpf_map__fd(skel->maps.array);
+ map_fds[1] = bpf_map__fd(skel->maps.percpu_array);
+ map_fds[2] = bpf_map__fd(skel->maps.hash);
+ map_fds[3] = bpf_map__fd(skel->maps.percpu_hash);
+ map_fds[4] = bpf_map__fd(skel->maps.perfbuf);
+ map_fds[5] = bpf_map__fd(skel->maps.ringbuf);
+ map_fds[6] = bpf_map__fd(skel->maps.prog_array);
+
+ for (i = 0; i < NUM_MAPS; i++)
+ ASSERT_OK(bpf_obj_pin(map_fds[i], map_paths[i]), "pin map_fd");
+
+ /* allow user without caps to use perf events */
+ if (!ASSERT_OK(sysctl_set("/proc/sys/kernel/perf_event_paranoid", perf_event_paranoid_orig,
+ "-1"),
+ "set_perf_event_paranoid"))
+ goto cleanup;
+ /* ensure unprivileged bpf disabled is set */
+ ret = sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled",
+ unprivileged_bpf_disabled_orig, "2");
+ if (ret == -EPERM) {
+ /* if unprivileged_bpf_disabled=1, we get -EPERM back; that's okay. */
+ if (!ASSERT_OK(strcmp(unprivileged_bpf_disabled_orig, "1"),
+ "unprivileged_bpf_disabled_on"))
+ goto cleanup;
+ } else {
+ if (!ASSERT_OK(ret, "set unprivileged_bpf_disabled"))
+ goto cleanup;
+ }
+
+ prog_fd = bpf_program__fd(skel->progs.sys_nanosleep_enter);
+ ASSERT_OK(bpf_prog_get_info_by_fd(prog_fd, &prog_info, &prog_info_len),
+ "obj_get_info_by_fd");
+ prog_id = prog_info.id;
+ ASSERT_GT(prog_id, 0, "valid_prog_id");
+
+ attr.size = sizeof(attr);
+ attr.type = PERF_TYPE_SOFTWARE;
+ attr.config = PERF_COUNT_SW_CPU_CLOCK;
+ attr.freq = 1;
+ attr.sample_freq = 1000;
+ perf_fd = syscall(__NR_perf_event_open, &attr, -1, 0, -1, PERF_FLAG_FD_CLOEXEC);
+ if (!ASSERT_GE(perf_fd, 0, "perf_fd"))
+ goto cleanup;
+
+ if (!ASSERT_OK(test_unpriv_bpf_disabled__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ if (!ASSERT_OK(cap_disable_effective(ALL_CAPS, &save_caps), "disable caps"))
+ goto cleanup;
+
+ if (test__start_subtest("unpriv_bpf_disabled_positive"))
+ test_unpriv_bpf_disabled_positive(skel, prog_id, prog_fd, perf_fd, map_paths,
+ map_fds);
+
+ if (test__start_subtest("unpriv_bpf_disabled_negative"))
+ test_unpriv_bpf_disabled_negative(skel, prog_id, prog_fd, perf_fd, map_paths,
+ map_fds);
+
+cleanup:
+ close(perf_fd);
+ if (save_caps)
+ cap_enable_effective(save_caps, NULL);
+ if (strlen(perf_event_paranoid_orig) > 0)
+ sysctl_set("/proc/sys/kernel/perf_event_paranoid", NULL, perf_event_paranoid_orig);
+ if (strlen(unprivileged_bpf_disabled_orig) > 0)
+ sysctl_set("/proc/sys/kernel/unprivileged_bpf_disabled", NULL,
+ unprivileged_bpf_disabled_orig);
+ for (i = 0; i < NUM_MAPS; i++)
+ unlink(map_paths[i]);
+ test_unpriv_bpf_disabled__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe.c b/tools/testing/selftests/bpf/prog_tests/uprobe.c
new file mode 100644
index 000000000000..cf3e0e7a64fa
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include <test_progs.h>
+#include "test_uprobe.skel.h"
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+void test_uprobe(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_opts, uprobe_opts);
+ struct test_uprobe *skel;
+ FILE *urand_pipe = NULL;
+ int urand_pid = 0, err;
+
+ skel = test_uprobe__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ skel->bss->my_pid = urand_pid;
+
+ /* Manual attach uprobe to urandlib_api
+ * There are two `urandlib_api` symbols in .dynsym section:
+ * - urandlib_api@LIBURANDOM_READ_1.0.0
+ * - urandlib_api@@LIBURANDOM_READ_2.0.0
+ * Both are global bind and would cause a conflict if user
+ * specify the symbol name without a version suffix
+ */
+ uprobe_opts.func_name = "urandlib_api";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_ERR_PTR(skel->links.test4, "urandlib_api_attach_conflict"))
+ goto cleanup;
+
+ uprobe_opts.func_name = "urandlib_api@LIBURANDOM_READ_1.0.0";
+ skel->links.test4 = bpf_program__attach_uprobe_opts(skel->progs.test4,
+ urand_pid,
+ "./liburandom_read.so",
+ 0 /* offset */,
+ &uprobe_opts);
+ if (!ASSERT_OK_PTR(skel->links.test4, "urandlib_api_attach_ok"))
+ goto cleanup;
+
+ /* Auto attach 3 u[ret]probes to urandlib_api_sameoffset */
+ err = test_uprobe__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* trigger urandom_read */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(skel->bss->test1_result, 1, "urandlib_api_sameoffset");
+ ASSERT_EQ(skel->bss->test2_result, 1, "urandlib_api_sameoffset@v1");
+ ASSERT_EQ(skel->bss->test3_result, 3, "urandlib_api_sameoffset@@v2");
+ ASSERT_EQ(skel->bss->test4_result, 1, "urandlib_api");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_uprobe__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
new file mode 100644
index 000000000000..d5b3377aa33c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_autoattach.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include <test_progs.h>
+#include "test_uprobe_autoattach.skel.h"
+
+/* uprobe attach point */
+static noinline int autoattach_trigger_func(int arg1, int arg2, int arg3,
+ int arg4, int arg5, int arg6,
+ int arg7, int arg8)
+{
+ asm volatile ("");
+ return arg1 + arg2 + arg3 + arg4 + arg5 + arg6 + arg7 + arg8 + 1;
+}
+
+void test_uprobe_autoattach(void)
+{
+ const char *devnull_str = "/dev/null";
+ struct test_uprobe_autoattach *skel;
+ int trigger_ret;
+ FILE *devnull;
+
+ skel = test_uprobe_autoattach__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ if (!ASSERT_OK(test_uprobe_autoattach__attach(skel), "skel_attach"))
+ goto cleanup;
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate uprobe & uretprobe */
+ trigger_ret = autoattach_trigger_func(1, 2, 3, 4, 5, 6, 7, 8);
+
+ skel->bss->test_pid = getpid();
+
+ /* trigger & validate shared library u[ret]probes attached by name */
+ devnull = fopen(devnull_str, "r");
+
+ ASSERT_EQ(skel->bss->uprobe_byname_parm1, 1, "check_uprobe_byname_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname_ran, 1, "check_uprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname_rc, trigger_ret, "check_uretprobe_byname_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ret, trigger_ret, "check_uretprobe_byname_ret");
+ ASSERT_EQ(skel->bss->uretprobe_byname_ran, 2, "check_uretprobe_byname_ran");
+ ASSERT_EQ(skel->bss->uprobe_byname2_parm1, (__u64)(long)devnull_str,
+ "check_uprobe_byname2_parm1");
+ ASSERT_EQ(skel->bss->uprobe_byname2_ran, 3, "check_uprobe_byname2_ran");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_rc, (__u64)(long)devnull,
+ "check_uretprobe_byname2_rc");
+ ASSERT_EQ(skel->bss->uretprobe_byname2_ran, 4, "check_uretprobe_byname2_ran");
+
+ ASSERT_EQ(skel->bss->a[0], 1, "arg1");
+ ASSERT_EQ(skel->bss->a[1], 2, "arg2");
+ ASSERT_EQ(skel->bss->a[2], 3, "arg3");
+#if FUNC_REG_ARG_CNT > 3
+ ASSERT_EQ(skel->bss->a[3], 4, "arg4");
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ ASSERT_EQ(skel->bss->a[4], 5, "arg5");
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ ASSERT_EQ(skel->bss->a[5], 6, "arg6");
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ ASSERT_EQ(skel->bss->a[6], 7, "arg7");
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ ASSERT_EQ(skel->bss->a[7], 8, "arg8");
+#endif
+
+ fclose(devnull);
+cleanup:
+ test_uprobe_autoattach__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
new file mode 100644
index 000000000000..8269cdee33ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <unistd.h>
+#include <test_progs.h>
+#include "uprobe_multi.skel.h"
+#include "uprobe_multi_bench.skel.h"
+#include "uprobe_multi_usdt.skel.h"
+#include "bpf/libbpf_internal.h"
+#include "testing_helpers.h"
+
+static char test_data[] = "test_data";
+
+noinline void uprobe_multi_func_1(void)
+{
+ asm volatile ("");
+}
+
+noinline void uprobe_multi_func_2(void)
+{
+ asm volatile ("");
+}
+
+noinline void uprobe_multi_func_3(void)
+{
+ asm volatile ("");
+}
+
+struct child {
+ int go[2];
+ int pid;
+};
+
+static void release_child(struct child *child)
+{
+ int child_status;
+
+ if (!child)
+ return;
+ close(child->go[1]);
+ close(child->go[0]);
+ if (child->pid > 0)
+ waitpid(child->pid, &child_status, 0);
+}
+
+static void kick_child(struct child *child)
+{
+ char c = 1;
+
+ if (child) {
+ write(child->go[1], &c, 1);
+ release_child(child);
+ }
+ fflush(NULL);
+}
+
+static struct child *spawn_child(void)
+{
+ static struct child child;
+ int err;
+ int c;
+
+ /* pipe to notify child to execute the trigger functions */
+ if (pipe(child.go))
+ return NULL;
+
+ child.pid = fork();
+ if (child.pid < 0) {
+ release_child(&child);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ /* child */
+ if (child.pid == 0) {
+ close(child.go[1]);
+
+ /* wait for parent's kick */
+ err = read(child.go[0], &c, 1);
+ if (err != 1)
+ exit(err);
+
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ exit(errno);
+ }
+
+ return &child;
+}
+
+static void uprobe_multi_test_run(struct uprobe_multi *skel, struct child *child)
+{
+ skel->bss->uprobe_multi_func_1_addr = (__u64) uprobe_multi_func_1;
+ skel->bss->uprobe_multi_func_2_addr = (__u64) uprobe_multi_func_2;
+ skel->bss->uprobe_multi_func_3_addr = (__u64) uprobe_multi_func_3;
+
+ skel->bss->user_ptr = test_data;
+
+ /*
+ * Disable pid check in bpf program if we are pid filter test,
+ * because the probe should be executed only by child->pid
+ * passed at the probe attach.
+ */
+ skel->bss->pid = child ? 0 : getpid();
+
+ if (child)
+ kick_child(child);
+
+ /* trigger all probes */
+ uprobe_multi_func_1();
+ uprobe_multi_func_2();
+ uprobe_multi_func_3();
+
+ /*
+ * There are 2 entry and 2 exit probe called for each uprobe_multi_func_[123]
+ * function and each slepable probe (6) increments uprobe_multi_sleep_result.
+ */
+ ASSERT_EQ(skel->bss->uprobe_multi_func_1_result, 2, "uprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_2_result, 2, "uprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uprobe_multi_func_3_result, 2, "uprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_1_result, 2, "uretprobe_multi_func_1_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_2_result, 2, "uretprobe_multi_func_2_result");
+ ASSERT_EQ(skel->bss->uretprobe_multi_func_3_result, 2, "uretprobe_multi_func_3_result");
+
+ ASSERT_EQ(skel->bss->uprobe_multi_sleep_result, 6, "uprobe_multi_sleep_result");
+
+ if (child)
+ ASSERT_EQ(skel->bss->child_pid, child->pid, "uprobe_multi_child_pid");
+}
+
+static void test_skel_api(void)
+{
+ struct uprobe_multi *skel = NULL;
+ int err;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ err = uprobe_multi__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi__attach"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, NULL);
+
+cleanup:
+ uprobe_multi__destroy(skel);
+}
+
+static void
+__test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts,
+ struct child *child)
+{
+ pid_t pid = child ? child->pid : -1;
+ struct uprobe_multi *skel = NULL;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe = bpf_program__attach_uprobe_multi(skel->progs.uprobe, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.uretprobe = bpf_program__attach_uprobe_multi(skel->progs.uretprobe, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uretprobe, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uprobe_sleep, pid,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_sleep, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = true;
+ skel->links.uretprobe_sleep = bpf_program__attach_uprobe_multi(skel->progs.uretprobe_sleep,
+ pid, binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uretprobe_sleep, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ opts->retprobe = false;
+ skel->links.uprobe_extra = bpf_program__attach_uprobe_multi(skel->progs.uprobe_extra, -1,
+ binary, pattern, opts);
+ if (!ASSERT_OK_PTR(skel->links.uprobe_extra, "bpf_program__attach_uprobe_multi"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, child);
+
+cleanup:
+ uprobe_multi__destroy(skel);
+}
+
+static void
+test_attach_api(const char *binary, const char *pattern, struct bpf_uprobe_multi_opts *opts)
+{
+ struct child *child;
+
+ /* no pid filter */
+ __test_attach_api(binary, pattern, opts, NULL);
+
+ /* pid filter */
+ child = spawn_child();
+ if (!ASSERT_OK_PTR(child, "spawn_child"))
+ return;
+
+ __test_attach_api(binary, pattern, opts, child);
+}
+
+static void test_attach_api_pattern(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+
+ test_attach_api("/proc/self/exe", "uprobe_multi_func_*", &opts);
+ test_attach_api("/proc/self/exe", "uprobe_multi_func_?", &opts);
+}
+
+static void test_attach_api_syms(void)
+{
+ LIBBPF_OPTS(bpf_uprobe_multi_opts, opts);
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+
+ opts.syms = syms;
+ opts.cnt = ARRAY_SIZE(syms);
+ test_attach_api("/proc/self/exe", NULL, &opts);
+}
+
+static void test_attach_api_fails(void)
+{
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ struct uprobe_multi *skel = NULL;
+ int prog_fd, link_fd = -1;
+ unsigned long offset = 0;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+
+ /* abnormal cnt */
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = &offset;
+ opts.uprobe_multi.cnt = INT_MAX;
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt"))
+ goto cleanup;
+
+ /* cnt is 0 */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero"))
+ goto cleanup;
+
+ /* negative offset */
+ offset = -1;
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = (unsigned long *) &offset;
+ opts.uprobe_multi.cnt = 1;
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative"))
+ goto cleanup;
+
+ /* offsets is NULL */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null"))
+ goto cleanup;
+
+ /* wrong offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong"))
+ goto cleanup;
+
+ /* path is NULL */
+ offset = 1;
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null"))
+ goto cleanup;
+
+ /* wrong path pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = (const char *) 1,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong"))
+ goto cleanup;
+
+ /* wrong path type */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = "/",
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type"))
+ goto cleanup;
+
+ /* wrong cookies pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) 1ULL,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong"))
+ goto cleanup;
+
+ /* wrong ref_ctr_offsets pointer */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cookies = (__u64 *) &offset,
+ .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1,
+ .uprobe_multi.cnt = 1,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong"))
+ goto cleanup;
+
+ /* wrong flags */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.flags = 1 << 31,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags"))
+ goto cleanup;
+
+ /* wrong pid */
+ LIBBPF_OPTS_RESET(opts,
+ .uprobe_multi.path = path,
+ .uprobe_multi.offsets = (unsigned long *) &offset,
+ .uprobe_multi.cnt = 1,
+ .uprobe_multi.pid = -2,
+ );
+
+ link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_ERR(link_fd, "link_fd"))
+ goto cleanup;
+ ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong");
+
+cleanup:
+ if (link_fd >= 0)
+ close(link_fd);
+ uprobe_multi__destroy(skel);
+}
+
+static void __test_link_api(struct child *child)
+{
+ int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1;
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+ const char *path = "/proc/self/exe";
+ struct uprobe_multi *skel = NULL;
+ unsigned long *offsets = NULL;
+ const char *syms[3] = {
+ "uprobe_multi_func_1",
+ "uprobe_multi_func_2",
+ "uprobe_multi_func_3",
+ };
+ int link_extra_fd = -1;
+ int err;
+
+ err = elf_resolve_syms_offsets(path, 3, syms, (unsigned long **) &offsets, STT_FUNC);
+ if (!ASSERT_OK(err, "elf_resolve_syms_offsets"))
+ return;
+
+ opts.uprobe_multi.path = path;
+ opts.uprobe_multi.offsets = offsets;
+ opts.uprobe_multi.cnt = ARRAY_SIZE(syms);
+ opts.uprobe_multi.pid = child ? child->pid : 0;
+
+ skel = uprobe_multi__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe);
+ link1_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link1_fd, 0, "link1_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.uretprobe);
+ link2_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link2_fd, 0, "link2_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_sleep);
+ link3_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link3_fd, 0, "link3_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = BPF_F_UPROBE_MULTI_RETURN;
+ prog_fd = bpf_program__fd(skel->progs.uretprobe_sleep);
+ link4_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link4_fd, 0, "link4_fd"))
+ goto cleanup;
+
+ opts.kprobe_multi.flags = 0;
+ opts.uprobe_multi.pid = 0;
+ prog_fd = bpf_program__fd(skel->progs.uprobe_extra);
+ link_extra_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts);
+ if (!ASSERT_GE(link_extra_fd, 0, "link_extra_fd"))
+ goto cleanup;
+
+ uprobe_multi_test_run(skel, child);
+
+cleanup:
+ if (link1_fd >= 0)
+ close(link1_fd);
+ if (link2_fd >= 0)
+ close(link2_fd);
+ if (link3_fd >= 0)
+ close(link3_fd);
+ if (link4_fd >= 0)
+ close(link4_fd);
+ if (link_extra_fd >= 0)
+ close(link_extra_fd);
+
+ uprobe_multi__destroy(skel);
+ free(offsets);
+}
+
+static void test_link_api(void)
+{
+ struct child *child;
+
+ /* no pid filter */
+ __test_link_api(NULL);
+
+ /* pid filter */
+ child = spawn_child();
+ if (!ASSERT_OK_PTR(child, "spawn_child"))
+ return;
+
+ __test_link_api(child);
+}
+
+static void test_bench_attach_uprobe(void)
+{
+ long attach_start_ns = 0, attach_end_ns = 0;
+ struct uprobe_multi_bench *skel = NULL;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+ int err;
+
+ skel = uprobe_multi_bench__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi_bench__open_and_load"))
+ goto cleanup;
+
+ attach_start_ns = get_time_ns();
+
+ err = uprobe_multi_bench__attach(skel);
+ if (!ASSERT_OK(err, "uprobe_multi_bench__attach"))
+ goto cleanup;
+
+ attach_end_ns = get_time_ns();
+
+ system("./uprobe_multi bench");
+
+ ASSERT_EQ(skel->bss->count, 50000, "uprobes_count");
+
+cleanup:
+ detach_start_ns = get_time_ns();
+ uprobe_multi_bench__destroy(skel);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+static void test_bench_attach_usdt(void)
+{
+ long attach_start_ns = 0, attach_end_ns = 0;
+ struct uprobe_multi_usdt *skel = NULL;
+ long detach_start_ns, detach_end_ns;
+ double attach_delta, detach_delta;
+
+ skel = uprobe_multi_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "uprobe_multi__open"))
+ goto cleanup;
+
+ attach_start_ns = get_time_ns();
+
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0, -1, "./uprobe_multi",
+ "test", "usdt", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "bpf_program__attach_usdt"))
+ goto cleanup;
+
+ attach_end_ns = get_time_ns();
+
+ system("./uprobe_multi usdt");
+
+ ASSERT_EQ(skel->bss->count, 50000, "usdt_count");
+
+cleanup:
+ detach_start_ns = get_time_ns();
+ uprobe_multi_usdt__destroy(skel);
+ detach_end_ns = get_time_ns();
+
+ attach_delta = (attach_end_ns - attach_start_ns) / 1000000000.0;
+ detach_delta = (detach_end_ns - detach_start_ns) / 1000000000.0;
+
+ printf("%s: attached in %7.3lfs\n", __func__, attach_delta);
+ printf("%s: detached in %7.3lfs\n", __func__, detach_delta);
+}
+
+void test_uprobe_multi_test(void)
+{
+ if (test__start_subtest("skel_api"))
+ test_skel_api();
+ if (test__start_subtest("attach_api_pattern"))
+ test_attach_api_pattern();
+ if (test__start_subtest("attach_api_syms"))
+ test_attach_api_syms();
+ if (test__start_subtest("link_api"))
+ test_link_api();
+ if (test__start_subtest("bench_uprobe"))
+ test_bench_attach_uprobe();
+ if (test__start_subtest("bench_usdt"))
+ test_bench_attach_usdt();
+ if (test__start_subtest("attach_api_fails"))
+ test_attach_api_fails();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/usdt.c b/tools/testing/selftests/bpf/prog_tests/usdt.c
new file mode 100644
index 000000000000..56ed1eb9b527
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/usdt.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <test_progs.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "../sdt.h"
+
+#include "test_usdt.skel.h"
+#include "test_urandom_usdt.skel.h"
+
+int lets_test_this(int);
+
+static volatile int idx = 2;
+static volatile __u64 bla = 0xFEDCBA9876543210ULL;
+static volatile short nums[] = {-1, -2, -3, -4};
+
+static volatile struct {
+ int x;
+ signed char y;
+} t1 = { 1, -127 };
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short test_usdt0_semaphore SEC(".probes");
+unsigned short test_usdt3_semaphore SEC(".probes");
+unsigned short test_usdt12_semaphore SEC(".probes");
+
+static void __always_inline trigger_func(int x) {
+ long y = 42;
+
+ if (test_usdt0_semaphore)
+ STAP_PROBE(test, usdt0);
+ if (test_usdt3_semaphore)
+ STAP_PROBE3(test, usdt3, x, y, &bla);
+ if (test_usdt12_semaphore) {
+ STAP_PROBE12(test, usdt12,
+ x, x + 1, y, x + y, 5,
+ y / 7, bla, &bla, -9, nums[x],
+ nums[idx], t1.y);
+ }
+}
+
+static void subtest_basic_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt0 won't be auto-attached */
+ opts.usdt_cookie = 0xcafedeadbeeffeed;
+ skel->links.usdt0 = bpf_program__attach_usdt(skel->progs.usdt0,
+ 0 /*self*/, "/proc/self/exe",
+ "test", "usdt0", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt0, "usdt0_link"))
+ goto cleanup;
+
+ trigger_func(1);
+
+ ASSERT_EQ(bss->usdt0_called, 1, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 1, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 1, "usdt12_called");
+
+ ASSERT_EQ(bss->usdt0_cookie, 0xcafedeadbeeffeed, "usdt0_cookie");
+ ASSERT_EQ(bss->usdt0_arg_cnt, 0, "usdt0_arg_cnt");
+ ASSERT_EQ(bss->usdt0_arg_ret, -ENOENT, "usdt0_arg_ret");
+
+ /* auto-attached usdt3 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt3_cookie, 0, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 1, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+ /* auto-attached usdt12 gets default zero cookie value */
+ ASSERT_EQ(bss->usdt12_cookie, 0, "usdt12_cookie");
+ ASSERT_EQ(bss->usdt12_arg_cnt, 12, "usdt12_arg_cnt");
+
+ ASSERT_EQ(bss->usdt12_args[0], 1, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 1 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[2], 42, "usdt12_arg3");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 1, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[4], 5, "usdt12_arg5");
+ ASSERT_EQ(bss->usdt12_args[5], 42 / 7, "usdt12_arg6");
+ ASSERT_EQ(bss->usdt12_args[6], bla, "usdt12_arg7");
+ ASSERT_EQ(bss->usdt12_args[7], (uintptr_t)&bla, "usdt12_arg8");
+ ASSERT_EQ(bss->usdt12_args[8], -9, "usdt12_arg9");
+ ASSERT_EQ(bss->usdt12_args[9], nums[1], "usdt12_arg10");
+ ASSERT_EQ(bss->usdt12_args[10], nums[idx], "usdt12_arg11");
+ ASSERT_EQ(bss->usdt12_args[11], t1.y, "usdt12_arg12");
+
+ /* trigger_func() is marked __always_inline, so USDT invocations will be
+ * inlined in two different places, meaning that each USDT will have
+ * at least 2 different places to be attached to. This verifies that
+ * bpf_program__attach_usdt() handles this properly and attaches to
+ * all possible places of USDT invocation.
+ */
+ trigger_func(2);
+
+ ASSERT_EQ(bss->usdt0_called, 2, "usdt0_called");
+ ASSERT_EQ(bss->usdt3_called, 2, "usdt3_called");
+ ASSERT_EQ(bss->usdt12_called, 2, "usdt12_called");
+
+ /* only check values that depend on trigger_func()'s input value */
+ ASSERT_EQ(bss->usdt3_args[0], 2, "usdt3_arg1");
+
+ ASSERT_EQ(bss->usdt12_args[0], 2, "usdt12_arg1");
+ ASSERT_EQ(bss->usdt12_args[1], 2 + 1, "usdt12_arg2");
+ ASSERT_EQ(bss->usdt12_args[3], 42 + 2, "usdt12_arg4");
+ ASSERT_EQ(bss->usdt12_args[9], nums[2], "usdt12_arg10");
+
+ /* detach and re-attach usdt3 */
+ bpf_link__destroy(skel->links.usdt3);
+
+ opts.usdt_cookie = 0xBADC00C51E;
+ skel->links.usdt3 = bpf_program__attach_usdt(skel->progs.usdt3, -1 /* any pid */,
+ "/proc/self/exe", "test", "usdt3", &opts);
+ if (!ASSERT_OK_PTR(skel->links.usdt3, "usdt3_reattach"))
+ goto cleanup;
+
+ trigger_func(3);
+
+ ASSERT_EQ(bss->usdt3_called, 3, "usdt3_called");
+ /* this time usdt3 has custom cookie */
+ ASSERT_EQ(bss->usdt3_cookie, 0xBADC00C51E, "usdt3_cookie");
+ ASSERT_EQ(bss->usdt3_arg_cnt, 3, "usdt3_arg_cnt");
+
+ ASSERT_EQ(bss->usdt3_arg_rets[0], 0, "usdt3_arg1_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[1], 0, "usdt3_arg2_ret");
+ ASSERT_EQ(bss->usdt3_arg_rets[2], 0, "usdt3_arg3_ret");
+ ASSERT_EQ(bss->usdt3_args[0], 3, "usdt3_arg1");
+ ASSERT_EQ(bss->usdt3_args[1], 42, "usdt3_arg2");
+ ASSERT_EQ(bss->usdt3_args[2], (uintptr_t)&bla, "usdt3_arg3");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+unsigned short test_usdt_100_semaphore SEC(".probes");
+unsigned short test_usdt_300_semaphore SEC(".probes");
+unsigned short test_usdt_400_semaphore SEC(".probes");
+
+#define R10(F, X) F(X+0); F(X+1);F(X+2); F(X+3); F(X+4); \
+ F(X+5); F(X+6); F(X+7); F(X+8); F(X+9);
+#define R100(F, X) R10(F,X+ 0);R10(F,X+10);R10(F,X+20);R10(F,X+30);R10(F,X+40); \
+ R10(F,X+50);R10(F,X+60);R10(F,X+70);R10(F,X+80);R10(F,X+90);
+
+/* carefully control that we get exactly 100 inlines by preventing inlining */
+static void __always_inline f100(int x)
+{
+ STAP_PROBE1(test, usdt_100, x);
+}
+
+__weak void trigger_100_usdts(void)
+{
+ R100(f100, 0);
+}
+
+/* we shouldn't be able to attach to test:usdt2_300 USDT as we don't have as
+ * many slots for specs. It's important that each STAP_PROBE2() invocation
+ * (after untolling) gets different arg spec due to compiler inlining i as
+ * a constant
+ */
+static void __always_inline f300(int x)
+{
+ STAP_PROBE1(test, usdt_300, x);
+}
+
+__weak void trigger_300_usdts(void)
+{
+ R100(f300, 0);
+ R100(f300, 100);
+ R100(f300, 200);
+}
+
+static void __always_inline f400(int x __attribute__((unused)))
+{
+ STAP_PROBE1(test, usdt_400, 400);
+}
+
+/* this time we have 400 different USDT call sites, but they have uniform
+ * argument location, so libbpf's spec string deduplication logic should keep
+ * spec count use very small and so we should be able to attach to all 400
+ * call sites
+ */
+__weak void trigger_400_usdts(void)
+{
+ R100(f400, 0);
+ R100(f400, 100);
+ R100(f400, 200);
+ R100(f400, 300);
+}
+
+static void subtest_multispec_usdt(void)
+{
+ LIBBPF_OPTS(bpf_usdt_opts, opts);
+ struct test_usdt *skel;
+ struct test_usdt__bss *bss;
+ int err, i;
+
+ skel = test_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bss = skel->bss;
+ bss->my_pid = getpid();
+
+ err = test_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ /* usdt_100 is auto-attached and there are 100 inlined call sites,
+ * let's validate that all of them are properly attached to and
+ * handled from BPF side
+ */
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+
+ /* Stress test free spec ID tracking. By default libbpf allows up to
+ * 256 specs to be used, so if we don't return free spec IDs back
+ * after few detachments and re-attachments we should run out of
+ * available spec IDs.
+ */
+ for (i = 0; i < 2; i++) {
+ bpf_link__destroy(skel->links.usdt_100);
+
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_100", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_100_reattach"))
+ goto cleanup;
+
+ bss->usdt_100_sum = 0;
+ trigger_100_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, (i + 1) * 100 + 100, "usdt_100_called");
+ ASSERT_EQ(bss->usdt_100_sum, 99 * 100 / 2, "usdt_100_sum");
+ }
+
+ /* Now let's step it up and try to attach USDT that requires more than
+ * 256 attach points with different specs for each.
+ * Note that we need trigger_300_usdts() only to actually have 300
+ * USDT call sites, we are not going to actually trace them.
+ */
+ trigger_300_usdts();
+
+ /* we'll reuse usdt_100 BPF program for usdt_300 test */
+ bpf_link__destroy(skel->links.usdt_100);
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1, "/proc/self/exe",
+ "test", "usdt_300", NULL);
+ err = -errno;
+ if (!ASSERT_ERR_PTR(skel->links.usdt_100, "usdt_300_bad_attach"))
+ goto cleanup;
+ ASSERT_EQ(err, -E2BIG, "usdt_300_attach_err");
+
+ /* let's check that there are no "dangling" BPF programs attached due
+ * to partial success of the above test:usdt_300 attachment
+ */
+ bss->usdt_100_called = 0;
+ bss->usdt_100_sum = 0;
+
+ f300(777); /* this is 301st instance of usdt_300 */
+
+ ASSERT_EQ(bss->usdt_100_called, 0, "usdt_301_called");
+ ASSERT_EQ(bss->usdt_100_sum, 0, "usdt_301_sum");
+
+ /* This time we have USDT with 400 inlined invocations, but arg specs
+ * should be the same across all sites, so libbpf will only need to
+ * use one spec and thus we'll be able to attach 400 uprobes
+ * successfully.
+ *
+ * Again, we are reusing usdt_100 BPF program.
+ */
+ skel->links.usdt_100 = bpf_program__attach_usdt(skel->progs.usdt_100, -1,
+ "/proc/self/exe",
+ "test", "usdt_400", NULL);
+ if (!ASSERT_OK_PTR(skel->links.usdt_100, "usdt_400_attach"))
+ goto cleanup;
+
+ trigger_400_usdts();
+
+ ASSERT_EQ(bss->usdt_100_called, 400, "usdt_400_called");
+ ASSERT_EQ(bss->usdt_100_sum, 400 * 400, "usdt_400_sum");
+
+cleanup:
+ test_usdt__destroy(skel);
+}
+
+static FILE *urand_spawn(int *pid)
+{
+ FILE *f;
+
+ /* urandom_read's stdout is wired into f */
+ f = popen("./urandom_read 1 report-pid", "r");
+ if (!f)
+ return NULL;
+
+ if (fscanf(f, "%d", pid) != 1) {
+ pclose(f);
+ errno = EINVAL;
+ return NULL;
+ }
+
+ return f;
+}
+
+static int urand_trigger(FILE **urand_pipe)
+{
+ int exit_code;
+
+ /* pclose() waits for child process to exit and returns their exit code */
+ exit_code = pclose(*urand_pipe);
+ *urand_pipe = NULL;
+
+ return exit_code;
+}
+
+static void subtest_urandom_usdt(bool auto_attach)
+{
+ struct test_urandom_usdt *skel;
+ struct test_urandom_usdt__bss *bss;
+ struct bpf_link *l;
+ FILE *urand_pipe = NULL;
+ int err, urand_pid = 0;
+
+ skel = test_urandom_usdt__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ urand_pipe = urand_spawn(&urand_pid);
+ if (!ASSERT_OK_PTR(urand_pipe, "urand_spawn"))
+ goto cleanup;
+
+ bss = skel->bss;
+ bss->urand_pid = urand_pid;
+
+ if (auto_attach) {
+ err = test_urandom_usdt__attach(skel);
+ if (!ASSERT_OK(err, "skel_auto_attach"))
+ goto cleanup;
+ } else {
+ l = bpf_program__attach_usdt(skel->progs.urand_read_without_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_without_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urand_read_with_sema,
+ urand_pid, "./urandom_read",
+ "urand", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urand_with_sema_attach"))
+ goto cleanup;
+ skel->links.urand_read_with_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_without_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_without_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_without_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_without_sema = l;
+
+ l = bpf_program__attach_usdt(skel->progs.urandlib_read_with_sema,
+ urand_pid, "./liburandom_read.so",
+ "urandlib", "read_with_sema", NULL);
+ if (!ASSERT_OK_PTR(l, "urandlib_with_sema_attach"))
+ goto cleanup;
+ skel->links.urandlib_read_with_sema = l;
+
+ }
+
+ /* trigger urandom_read USDTs */
+ ASSERT_OK(urand_trigger(&urand_pipe), "urand_exit_code");
+
+ ASSERT_EQ(bss->urand_read_without_sema_call_cnt, 1, "urand_wo_sema_cnt");
+ ASSERT_EQ(bss->urand_read_without_sema_buf_sz_sum, 256, "urand_wo_sema_sum");
+
+ ASSERT_EQ(bss->urand_read_with_sema_call_cnt, 1, "urand_w_sema_cnt");
+ ASSERT_EQ(bss->urand_read_with_sema_buf_sz_sum, 256, "urand_w_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_without_sema_call_cnt, 1, "urandlib_wo_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_without_sema_buf_sz_sum, 256, "urandlib_wo_sema_sum");
+
+ ASSERT_EQ(bss->urandlib_read_with_sema_call_cnt, 1, "urandlib_w_sema_cnt");
+ ASSERT_EQ(bss->urandlib_read_with_sema_buf_sz_sum, 256, "urandlib_w_sema_sum");
+
+cleanup:
+ if (urand_pipe)
+ pclose(urand_pipe);
+ test_urandom_usdt__destroy(skel);
+}
+
+void test_usdt(void)
+{
+ if (test__start_subtest("basic"))
+ subtest_basic_usdt();
+ if (test__start_subtest("multispec"))
+ subtest_multispec_usdt();
+ if (test__start_subtest("urand_auto_attach"))
+ subtest_urandom_usdt(true /* auto_attach */);
+ if (test__start_subtest("urand_pid_attach"))
+ subtest_urandom_usdt(false /* auto_attach */);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
new file mode 100644
index 000000000000..e51721df14fc
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/user_ringbuf.c
@@ -0,0 +1,696 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <linux/ring_buffer.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/sysinfo.h>
+#include <test_progs.h>
+#include <uapi/linux/bpf.h>
+#include <unistd.h>
+
+#include "user_ringbuf_fail.skel.h"
+#include "user_ringbuf_success.skel.h"
+
+#include "../progs/test_user_ringbuf.h"
+
+static const long c_sample_size = sizeof(struct sample) + BPF_RINGBUF_HDR_SZ;
+static const long c_ringbuf_size = 1 << 12; /* 1 small page */
+static const long c_max_entries = c_ringbuf_size / c_sample_size;
+
+static void drain_current_samples(void)
+{
+ syscall(__NR_getpgid);
+}
+
+static int write_samples(struct user_ring_buffer *ringbuf, uint32_t num_samples)
+{
+ int i, err = 0;
+
+ /* Write some number of samples to the ring buffer. */
+ for (i = 0; i < num_samples; i++) {
+ struct sample *entry;
+ int read;
+
+ entry = user_ring_buffer__reserve(ringbuf, sizeof(*entry));
+ if (!entry) {
+ err = -errno;
+ goto done;
+ }
+
+ entry->pid = getpid();
+ entry->seq = i;
+ entry->value = i * i;
+
+ read = snprintf(entry->comm, sizeof(entry->comm), "%u", i);
+ if (read <= 0) {
+ /* Assert on the error path to avoid spamming logs with
+ * mostly success messages.
+ */
+ ASSERT_GT(read, 0, "snprintf_comm");
+ err = read;
+ user_ring_buffer__discard(ringbuf, entry);
+ goto done;
+ }
+
+ user_ring_buffer__submit(ringbuf, entry);
+ }
+
+done:
+ drain_current_samples();
+
+ return err;
+}
+
+static struct user_ringbuf_success *open_load_ringbuf_skel(void)
+{
+ struct user_ringbuf_success *skel;
+ int err;
+
+ skel = user_ringbuf_success__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return NULL;
+
+ err = bpf_map__set_max_entries(skel->maps.user_ringbuf, c_ringbuf_size);
+ if (!ASSERT_OK(err, "set_max_entries"))
+ goto cleanup;
+
+ err = bpf_map__set_max_entries(skel->maps.kernel_ringbuf, c_ringbuf_size);
+ if (!ASSERT_OK(err, "set_max_entries"))
+ goto cleanup;
+
+ err = user_ringbuf_success__load(skel);
+ if (!ASSERT_OK(err, "skel_load"))
+ goto cleanup;
+
+ return skel;
+
+cleanup:
+ user_ringbuf_success__destroy(skel);
+ return NULL;
+}
+
+static void test_user_ringbuf_mappings(void)
+{
+ int err, rb_fd;
+ int page_size = getpagesize();
+ void *mmap_ptr;
+ struct user_ringbuf_success *skel;
+
+ skel = open_load_ringbuf_skel();
+ if (!skel)
+ return;
+
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+ /* cons_pos can be mapped R/O, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ, MAP_SHARED, rb_fd, 0);
+ ASSERT_OK_PTR(mmap_ptr, "ro_cons_pos");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_WRITE), "write_cons_pos_protect");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_cons_pos_protect");
+ ASSERT_ERR_PTR(mremap(mmap_ptr, 0, 4 * page_size, MREMAP_MAYMOVE), "wr_prod_pos");
+ err = -errno;
+ ASSERT_ERR(err, "wr_prod_pos_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_ro_cons");
+
+ /* prod_pos can be mapped RW, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+ rb_fd, page_size);
+ ASSERT_OK_PTR(mmap_ptr, "rw_prod_pos");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_prod_pos_protect");
+ err = -errno;
+ ASSERT_ERR(err, "wr_prod_pos_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_prod");
+
+ /* data pages can be mapped RW, can't add +X with mprotect. */
+ mmap_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd,
+ 2 * page_size);
+ ASSERT_OK_PTR(mmap_ptr, "rw_data");
+ ASSERT_ERR(mprotect(mmap_ptr, page_size, PROT_EXEC), "exec_data_protect");
+ err = -errno;
+ ASSERT_ERR(err, "exec_data_err");
+ ASSERT_OK(munmap(mmap_ptr, page_size), "unmap_rw_data");
+
+ user_ringbuf_success__destroy(skel);
+}
+
+static int load_skel_create_ringbufs(struct user_ringbuf_success **skel_out,
+ struct ring_buffer **kern_ringbuf_out,
+ ring_buffer_sample_fn callback,
+ struct user_ring_buffer **user_ringbuf_out)
+{
+ struct user_ringbuf_success *skel;
+ struct ring_buffer *kern_ringbuf = NULL;
+ struct user_ring_buffer *user_ringbuf = NULL;
+ int err = -ENOMEM, rb_fd;
+
+ skel = open_load_ringbuf_skel();
+ if (!skel)
+ return err;
+
+ /* only trigger BPF program for current process */
+ skel->bss->pid = getpid();
+
+ if (kern_ringbuf_out) {
+ rb_fd = bpf_map__fd(skel->maps.kernel_ringbuf);
+ kern_ringbuf = ring_buffer__new(rb_fd, callback, skel, NULL);
+ if (!ASSERT_OK_PTR(kern_ringbuf, "kern_ringbuf_create"))
+ goto cleanup;
+
+ *kern_ringbuf_out = kern_ringbuf;
+ }
+
+ if (user_ringbuf_out) {
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+ user_ringbuf = user_ring_buffer__new(rb_fd, NULL);
+ if (!ASSERT_OK_PTR(user_ringbuf, "user_ringbuf_create"))
+ goto cleanup;
+
+ *user_ringbuf_out = user_ringbuf;
+ ASSERT_EQ(skel->bss->read, 0, "no_reads_after_load");
+ }
+
+ err = user_ringbuf_success__attach(skel);
+ if (!ASSERT_OK(err, "skel_attach"))
+ goto cleanup;
+
+ *skel_out = skel;
+ return 0;
+
+cleanup:
+ if (kern_ringbuf_out)
+ *kern_ringbuf_out = NULL;
+ if (user_ringbuf_out)
+ *user_ringbuf_out = NULL;
+ ring_buffer__free(kern_ringbuf);
+ user_ring_buffer__free(user_ringbuf);
+ user_ringbuf_success__destroy(skel);
+ return err;
+}
+
+static int load_skel_create_user_ringbuf(struct user_ringbuf_success **skel_out,
+ struct user_ring_buffer **ringbuf_out)
+{
+ return load_skel_create_ringbufs(skel_out, NULL, NULL, ringbuf_out);
+}
+
+static void manually_write_test_invalid_sample(struct user_ringbuf_success *skel,
+ __u32 size, __u64 producer_pos, int err)
+{
+ void *data_ptr;
+ __u64 *producer_pos_ptr;
+ int rb_fd, page_size = getpagesize();
+
+ rb_fd = bpf_map__fd(skel->maps.user_ringbuf);
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_before_bad_sample");
+
+ /* Map the producer_pos as RW. */
+ producer_pos_ptr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, rb_fd, page_size);
+ ASSERT_OK_PTR(producer_pos_ptr, "producer_pos_ptr");
+
+ /* Map the data pages as RW. */
+ data_ptr = mmap(NULL, page_size, PROT_WRITE, MAP_SHARED, rb_fd, 2 * page_size);
+ ASSERT_OK_PTR(data_ptr, "rw_data");
+
+ memset(data_ptr, 0, BPF_RINGBUF_HDR_SZ);
+ *(__u32 *)data_ptr = size;
+
+ /* Synchronizes with smp_load_acquire() in __bpf_user_ringbuf_peek() in the kernel. */
+ smp_store_release(producer_pos_ptr, producer_pos + BPF_RINGBUF_HDR_SZ);
+
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_after_bad_sample");
+ ASSERT_EQ(skel->bss->err, err, "err_after_bad_sample");
+
+ ASSERT_OK(munmap(producer_pos_ptr, page_size), "unmap_producer_pos");
+ ASSERT_OK(munmap(data_ptr, page_size), "unmap_data_ptr");
+}
+
+static void test_user_ringbuf_post_misaligned(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = (1 << 5) + 7;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "misaligned_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size, -EINVAL);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_producer_wrong_offset(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = (1 << 5);
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "wrong_offset_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size - 8, -EINVAL);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_larger_than_ringbuf_sz(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ __u32 size = c_ringbuf_size;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "huge_sample_skel"))
+ return;
+
+ manually_write_test_invalid_sample(skel, size, size, -E2BIG);
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_basic(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_basic_skel"))
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ err = write_samples(ringbuf, 2);
+ if (!ASSERT_OK(err, "write_samples"))
+ goto cleanup;
+
+ ASSERT_EQ(skel->bss->read, 2, "num_samples_read_after");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_sample_full_ring_buffer(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+ void *sample;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_full_sample_skel"))
+ return;
+
+ sample = user_ring_buffer__reserve(ringbuf, c_ringbuf_size - BPF_RINGBUF_HDR_SZ);
+ if (!ASSERT_OK_PTR(sample, "full_sample"))
+ goto cleanup;
+
+ user_ring_buffer__submit(ringbuf, sample);
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_post_alignment_autoadjust(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ struct sample *sample;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (!ASSERT_OK(err, "ringbuf_align_autoadjust_skel"))
+ return;
+
+ /* libbpf should automatically round any sample up to an 8-byte alignment. */
+ sample = user_ring_buffer__reserve(ringbuf, sizeof(*sample) + 1);
+ ASSERT_OK_PTR(sample, "reserve_autoaligned");
+ user_ring_buffer__submit(ringbuf, sample);
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 1, "num_samples_read_after");
+
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_overfill(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ err = write_samples(ringbuf, c_max_entries * 5);
+ ASSERT_ERR(err, "write_samples");
+ ASSERT_EQ(skel->bss->read, c_max_entries, "max_entries");
+
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_discards_properly_ignored(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err, num_discarded = 0;
+ __u64 *token;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ while (1) {
+ /* Write samples until the buffer is full. */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+ if (!token)
+ break;
+
+ user_ring_buffer__discard(ringbuf, token);
+ num_discarded++;
+ }
+
+ if (!ASSERT_GE(num_discarded, 0, "num_discarded"))
+ goto cleanup;
+
+ /* Should not read any samples, as they are all discarded. */
+ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+ drain_current_samples();
+ ASSERT_EQ(skel->bss->read, 0, "num_post_kick");
+
+ /* Now that the ring buffer has been drained, we should be able to
+ * reserve another token.
+ */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+
+ if (!ASSERT_OK_PTR(token, "new_token"))
+ goto cleanup;
+
+ user_ring_buffer__discard(ringbuf, token);
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void test_user_ringbuf_loop(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ uint32_t total_samples = 8192;
+ uint32_t remaining_samples = total_samples;
+ int err;
+
+ BUILD_BUG_ON(total_samples <= c_max_entries);
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ do {
+ uint32_t curr_samples;
+
+ curr_samples = remaining_samples > c_max_entries
+ ? c_max_entries : remaining_samples;
+ err = write_samples(ringbuf, curr_samples);
+ if (err != 0) {
+ /* Assert inside of if statement to avoid flooding logs
+ * on the success path.
+ */
+ ASSERT_OK(err, "write_samples");
+ goto cleanup;
+ }
+
+ remaining_samples -= curr_samples;
+ ASSERT_EQ(skel->bss->read, total_samples - remaining_samples,
+ "current_batched_entries");
+ } while (remaining_samples > 0);
+ ASSERT_EQ(skel->bss->read, total_samples, "total_batched_entries");
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static int send_test_message(struct user_ring_buffer *ringbuf,
+ enum test_msg_op op, s64 operand_64,
+ s32 operand_32)
+{
+ struct test_msg *msg;
+
+ msg = user_ring_buffer__reserve(ringbuf, sizeof(*msg));
+ if (!msg) {
+ /* Assert on the error path to avoid spamming logs with mostly
+ * success messages.
+ */
+ ASSERT_OK_PTR(msg, "reserve_msg");
+ return -ENOMEM;
+ }
+
+ msg->msg_op = op;
+
+ switch (op) {
+ case TEST_MSG_OP_INC64:
+ case TEST_MSG_OP_MUL64:
+ msg->operand_64 = operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ case TEST_MSG_OP_MUL32:
+ msg->operand_32 = operand_32;
+ break;
+ default:
+ PRINT_FAIL("Invalid operand %d\n", op);
+ user_ring_buffer__discard(ringbuf, msg);
+ return -EINVAL;
+ }
+
+ user_ring_buffer__submit(ringbuf, msg);
+
+ return 0;
+}
+
+static void kick_kernel_read_messages(void)
+{
+ syscall(__NR_prctl);
+}
+
+static int handle_kernel_msg(void *ctx, void *data, size_t len)
+{
+ struct user_ringbuf_success *skel = ctx;
+ struct test_msg *msg = data;
+
+ switch (msg->msg_op) {
+ case TEST_MSG_OP_INC64:
+ skel->bss->user_mutated += msg->operand_64;
+ return 0;
+ case TEST_MSG_OP_INC32:
+ skel->bss->user_mutated += msg->operand_32;
+ return 0;
+ case TEST_MSG_OP_MUL64:
+ skel->bss->user_mutated *= msg->operand_64;
+ return 0;
+ case TEST_MSG_OP_MUL32:
+ skel->bss->user_mutated *= msg->operand_32;
+ return 0;
+ default:
+ fprintf(stderr, "Invalid operand %d\n", msg->msg_op);
+ return -EINVAL;
+ }
+}
+
+static void drain_kernel_messages_buffer(struct ring_buffer *kern_ringbuf,
+ struct user_ringbuf_success *skel)
+{
+ int cnt;
+
+ cnt = ring_buffer__consume(kern_ringbuf);
+ ASSERT_EQ(cnt, 8, "consume_kern_ringbuf");
+ ASSERT_OK(skel->bss->err, "consume_kern_ringbuf_err");
+}
+
+static void test_user_ringbuf_msg_protocol(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *user_ringbuf;
+ struct ring_buffer *kern_ringbuf;
+ int err, i;
+ __u64 expected_kern = 0;
+
+ err = load_skel_create_ringbufs(&skel, &kern_ringbuf, handle_kernel_msg, &user_ringbuf);
+ if (!ASSERT_OK(err, "create_ringbufs"))
+ return;
+
+ for (i = 0; i < 64; i++) {
+ enum test_msg_op op = i % TEST_MSG_OP_NUM_OPS;
+ __u64 operand_64 = TEST_OP_64;
+ __u32 operand_32 = TEST_OP_32;
+
+ err = send_test_message(user_ringbuf, op, operand_64, operand_32);
+ if (err) {
+ /* Only assert on a failure to avoid spamming success logs. */
+ ASSERT_OK(err, "send_test_message");
+ goto cleanup;
+ }
+
+ switch (op) {
+ case TEST_MSG_OP_INC64:
+ expected_kern += operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ expected_kern += operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ expected_kern *= operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ expected_kern *= operand_32;
+ break;
+ default:
+ PRINT_FAIL("Unexpected op %d\n", op);
+ goto cleanup;
+ }
+
+ if (i % 8 == 0) {
+ kick_kernel_read_messages();
+ ASSERT_EQ(skel->bss->kern_mutated, expected_kern, "expected_kern");
+ ASSERT_EQ(skel->bss->err, 0, "bpf_prog_err");
+ drain_kernel_messages_buffer(kern_ringbuf, skel);
+ }
+ }
+
+cleanup:
+ ring_buffer__free(kern_ringbuf);
+ user_ring_buffer__free(user_ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+static void *kick_kernel_cb(void *arg)
+{
+ /* Kick the kernel, causing it to drain the ring buffer and then wake
+ * up the test thread waiting on epoll.
+ */
+ syscall(__NR_prlimit64);
+
+ return NULL;
+}
+
+static int spawn_kick_thread_for_poll(void)
+{
+ pthread_t thread;
+
+ return pthread_create(&thread, NULL, kick_kernel_cb, NULL);
+}
+
+static void test_user_ringbuf_blocking_reserve(void)
+{
+ struct user_ringbuf_success *skel;
+ struct user_ring_buffer *ringbuf;
+ int err, num_written = 0;
+ __u64 *token;
+
+ err = load_skel_create_user_ringbuf(&skel, &ringbuf);
+ if (err)
+ return;
+
+ ASSERT_EQ(skel->bss->read, 0, "num_samples_read_before");
+
+ while (1) {
+ /* Write samples until the buffer is full. */
+ token = user_ring_buffer__reserve(ringbuf, sizeof(*token));
+ if (!token)
+ break;
+
+ *token = 0xdeadbeef;
+
+ user_ring_buffer__submit(ringbuf, token);
+ num_written++;
+ }
+
+ if (!ASSERT_GE(num_written, 0, "num_written"))
+ goto cleanup;
+
+ /* Should not have read any samples until the kernel is kicked. */
+ ASSERT_EQ(skel->bss->read, 0, "num_pre_kick");
+
+ /* We correctly time out after 1 second, without a sample. */
+ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 1000);
+ if (!ASSERT_EQ(token, NULL, "pre_kick_timeout_token"))
+ goto cleanup;
+
+ err = spawn_kick_thread_for_poll();
+ if (!ASSERT_EQ(err, 0, "deferred_kick_thread\n"))
+ goto cleanup;
+
+ /* After spawning another thread that asychronously kicks the kernel to
+ * drain the messages, we're able to block and successfully get a
+ * sample once we receive an event notification.
+ */
+ token = user_ring_buffer__reserve_blocking(ringbuf, sizeof(*token), 10000);
+
+ if (!ASSERT_OK_PTR(token, "block_token"))
+ goto cleanup;
+
+ ASSERT_GT(skel->bss->read, 0, "num_post_kill");
+ ASSERT_LE(skel->bss->read, num_written, "num_post_kill");
+ ASSERT_EQ(skel->bss->err, 0, "err_post_poll");
+ user_ring_buffer__discard(ringbuf, token);
+
+cleanup:
+ user_ring_buffer__free(ringbuf);
+ user_ringbuf_success__destroy(skel);
+}
+
+#define SUCCESS_TEST(_func) { _func, #_func }
+
+static struct {
+ void (*test_callback)(void);
+ const char *test_name;
+} success_tests[] = {
+ SUCCESS_TEST(test_user_ringbuf_mappings),
+ SUCCESS_TEST(test_user_ringbuf_post_misaligned),
+ SUCCESS_TEST(test_user_ringbuf_post_producer_wrong_offset),
+ SUCCESS_TEST(test_user_ringbuf_post_larger_than_ringbuf_sz),
+ SUCCESS_TEST(test_user_ringbuf_basic),
+ SUCCESS_TEST(test_user_ringbuf_sample_full_ring_buffer),
+ SUCCESS_TEST(test_user_ringbuf_post_alignment_autoadjust),
+ SUCCESS_TEST(test_user_ringbuf_overfill),
+ SUCCESS_TEST(test_user_ringbuf_discards_properly_ignored),
+ SUCCESS_TEST(test_user_ringbuf_loop),
+ SUCCESS_TEST(test_user_ringbuf_msg_protocol),
+ SUCCESS_TEST(test_user_ringbuf_blocking_reserve),
+};
+
+void test_user_ringbuf(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(success_tests); i++) {
+ if (!test__start_subtest(success_tests[i].test_name))
+ continue;
+
+ success_tests[i].test_callback();
+ }
+
+ RUN_TESTS(user_ringbuf_fail);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/varlen.c b/tools/testing/selftests/bpf/prog_tests/varlen.c
new file mode 100644
index 000000000000..4d7056f8f177
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/varlen.c
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+#include <time.h>
+#include "test_varlen.skel.h"
+
+#define CHECK_VAL(got, exp) \
+ CHECK((got) != (exp), "check", "got %ld != exp %ld\n", \
+ (long)(got), (long)(exp))
+
+void test_varlen(void)
+{
+ int duration = 0, err;
+ struct test_varlen* skel;
+ struct test_varlen__bss *bss;
+ struct test_varlen__data *data;
+ const char str1[] = "Hello, ";
+ const char str2[] = "World!";
+ const char exp_str[] = "Hello, \0World!\0";
+ const int size1 = sizeof(str1);
+ const int size2 = sizeof(str2);
+
+ skel = test_varlen__open_and_load();
+ if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ return;
+ bss = skel->bss;
+ data = skel->data;
+
+ err = test_varlen__attach(skel);
+ if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ goto cleanup;
+
+ bss->test_pid = getpid();
+
+ /* trigger everything */
+ memcpy(bss->buf_in1, str1, size1);
+ memcpy(bss->buf_in2, str2, size2);
+ bss->capture = true;
+ usleep(1);
+ bss->capture = false;
+
+ CHECK_VAL(bss->payload1_len1, size1);
+ CHECK_VAL(bss->payload1_len2, size2);
+ CHECK_VAL(bss->total1, size1 + size2);
+ CHECK(memcmp(bss->payload1, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload2_len1, size1);
+ CHECK_VAL(data->payload2_len2, size2);
+ CHECK_VAL(data->total2, size1 + size2);
+ CHECK(memcmp(data->payload2, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload3_len1, size1);
+ CHECK_VAL(data->payload3_len2, size2);
+ CHECK_VAL(data->total3, size1 + size2);
+ CHECK(memcmp(data->payload3, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(data->payload4_len1, size1);
+ CHECK_VAL(data->payload4_len2, size2);
+ CHECK_VAL(data->total4, size1 + size2);
+ CHECK(memcmp(data->payload4, exp_str, size1 + size2), "content_check",
+ "doesn't match!\n");
+
+ CHECK_VAL(bss->ret_bad_read, -EFAULT);
+ CHECK_VAL(data->payload_bad[0], 0x42);
+ CHECK_VAL(data->payload_bad[1], 0x42);
+ CHECK_VAL(data->payload_bad[2], 0);
+ CHECK_VAL(data->payload_bad[3], 0x42);
+ CHECK_VAL(data->payload_bad[4], 0x42);
+cleanup:
+ test_varlen__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verif_stats.c b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
new file mode 100644
index 000000000000..af4b95f57ac1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verif_stats.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <test_progs.h>
+
+#include "trace_vprintk.lskel.h"
+
+void test_verif_stats(void)
+{
+ __u32 len = sizeof(struct bpf_prog_info);
+ struct trace_vprintk_lskel *skel;
+ struct bpf_prog_info info = {};
+ int err;
+
+ skel = trace_vprintk_lskel__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "trace_vprintk__open_and_load"))
+ goto cleanup;
+
+ err = bpf_prog_get_info_by_fd(skel->progs.sys_enter.prog_fd,
+ &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto cleanup;
+
+ if (!ASSERT_GT(info.verified_insns, 0, "verified_insns"))
+ goto cleanup;
+
+cleanup:
+ trace_vprintk_lskel__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c
new file mode 100644
index 000000000000..985273832f89
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <test_progs.h>
+
+#include "cap_helpers.h"
+#include "verifier_and.skel.h"
+#include "verifier_arena.skel.h"
+#include "verifier_array_access.skel.h"
+#include "verifier_basic_stack.skel.h"
+#include "verifier_bitfield_write.skel.h"
+#include "verifier_bounds.skel.h"
+#include "verifier_bounds_deduction.skel.h"
+#include "verifier_bounds_deduction_non_const.skel.h"
+#include "verifier_bounds_mix_sign_unsign.skel.h"
+#include "verifier_bpf_get_stack.skel.h"
+#include "verifier_bswap.skel.h"
+#include "verifier_btf_ctx_access.skel.h"
+#include "verifier_btf_unreliable_prog.skel.h"
+#include "verifier_cfg.skel.h"
+#include "verifier_cgroup_inv_retcode.skel.h"
+#include "verifier_cgroup_skb.skel.h"
+#include "verifier_cgroup_storage.skel.h"
+#include "verifier_const_or.skel.h"
+#include "verifier_ctx.skel.h"
+#include "verifier_ctx_sk_msg.skel.h"
+#include "verifier_d_path.skel.h"
+#include "verifier_direct_packet_access.skel.h"
+#include "verifier_direct_stack_access_wraparound.skel.h"
+#include "verifier_div0.skel.h"
+#include "verifier_div_overflow.skel.h"
+#include "verifier_global_subprogs.skel.h"
+#include "verifier_global_ptr_args.skel.h"
+#include "verifier_gotol.skel.h"
+#include "verifier_helper_access_var_len.skel.h"
+#include "verifier_helper_packet_access.skel.h"
+#include "verifier_helper_restricted.skel.h"
+#include "verifier_helper_value_access.skel.h"
+#include "verifier_int_ptr.skel.h"
+#include "verifier_iterating_callbacks.skel.h"
+#include "verifier_jeq_infer_not_null.skel.h"
+#include "verifier_ld_ind.skel.h"
+#include "verifier_ldsx.skel.h"
+#include "verifier_leak_ptr.skel.h"
+#include "verifier_loops1.skel.h"
+#include "verifier_lwt.skel.h"
+#include "verifier_map_in_map.skel.h"
+#include "verifier_map_ptr.skel.h"
+#include "verifier_map_ptr_mixing.skel.h"
+#include "verifier_map_ret_val.skel.h"
+#include "verifier_masking.skel.h"
+#include "verifier_meta_access.skel.h"
+#include "verifier_movsx.skel.h"
+#include "verifier_netfilter_ctx.skel.h"
+#include "verifier_netfilter_retcode.skel.h"
+#include "verifier_precision.skel.h"
+#include "verifier_prevent_map_lookup.skel.h"
+#include "verifier_raw_stack.skel.h"
+#include "verifier_raw_tp_writable.skel.h"
+#include "verifier_reg_equal.skel.h"
+#include "verifier_ref_tracking.skel.h"
+#include "verifier_regalloc.skel.h"
+#include "verifier_ringbuf.skel.h"
+#include "verifier_runtime_jit.skel.h"
+#include "verifier_scalar_ids.skel.h"
+#include "verifier_sdiv.skel.h"
+#include "verifier_search_pruning.skel.h"
+#include "verifier_sock.skel.h"
+#include "verifier_spill_fill.skel.h"
+#include "verifier_spin_lock.skel.h"
+#include "verifier_stack_ptr.skel.h"
+#include "verifier_subprog_precision.skel.h"
+#include "verifier_subreg.skel.h"
+#include "verifier_typedef.skel.h"
+#include "verifier_uninit.skel.h"
+#include "verifier_unpriv.skel.h"
+#include "verifier_unpriv_perf.skel.h"
+#include "verifier_value_adj_spill.skel.h"
+#include "verifier_value.skel.h"
+#include "verifier_value_illegal_alu.skel.h"
+#include "verifier_value_or_null.skel.h"
+#include "verifier_value_ptr_arith.skel.h"
+#include "verifier_var_off.skel.h"
+#include "verifier_xadd.skel.h"
+#include "verifier_xdp.skel.h"
+#include "verifier_xdp_direct_packet_access.skel.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+__maybe_unused
+static void run_tests_aux(const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory,
+ pre_execution_cb pre_execution_cb)
+{
+ struct test_loader tester = {};
+ __u64 old_caps;
+ int err;
+
+ /* test_verifier tests are executed w/o CAP_SYS_ADMIN, do the same here */
+ err = cap_disable_effective(1ULL << CAP_SYS_ADMIN, &old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+ return;
+ }
+
+ test_loader__set_pre_execution_cb(&tester, pre_execution_cb);
+ test_loader__run_subtests(&tester, skel_name, elf_bytes_factory);
+ test_loader_fini(&tester);
+
+ err = cap_enable_effective(old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore CAP_SYS_ADMIN: %i, %s\n", err, strerror(err));
+}
+
+#define RUN(skel) run_tests_aux(#skel, skel##__elf_bytes, NULL)
+
+void test_verifier_and(void) { RUN(verifier_and); }
+void test_verifier_arena(void) { RUN(verifier_arena); }
+void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); }
+void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); }
+void test_verifier_bounds(void) { RUN(verifier_bounds); }
+void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); }
+void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); }
+void test_verifier_bounds_mix_sign_unsign(void) { RUN(verifier_bounds_mix_sign_unsign); }
+void test_verifier_bpf_get_stack(void) { RUN(verifier_bpf_get_stack); }
+void test_verifier_bswap(void) { RUN(verifier_bswap); }
+void test_verifier_btf_ctx_access(void) { RUN(verifier_btf_ctx_access); }
+void test_verifier_btf_unreliable_prog(void) { RUN(verifier_btf_unreliable_prog); }
+void test_verifier_cfg(void) { RUN(verifier_cfg); }
+void test_verifier_cgroup_inv_retcode(void) { RUN(verifier_cgroup_inv_retcode); }
+void test_verifier_cgroup_skb(void) { RUN(verifier_cgroup_skb); }
+void test_verifier_cgroup_storage(void) { RUN(verifier_cgroup_storage); }
+void test_verifier_const_or(void) { RUN(verifier_const_or); }
+void test_verifier_ctx(void) { RUN(verifier_ctx); }
+void test_verifier_ctx_sk_msg(void) { RUN(verifier_ctx_sk_msg); }
+void test_verifier_d_path(void) { RUN(verifier_d_path); }
+void test_verifier_direct_packet_access(void) { RUN(verifier_direct_packet_access); }
+void test_verifier_direct_stack_access_wraparound(void) { RUN(verifier_direct_stack_access_wraparound); }
+void test_verifier_div0(void) { RUN(verifier_div0); }
+void test_verifier_div_overflow(void) { RUN(verifier_div_overflow); }
+void test_verifier_global_subprogs(void) { RUN(verifier_global_subprogs); }
+void test_verifier_global_ptr_args(void) { RUN(verifier_global_ptr_args); }
+void test_verifier_gotol(void) { RUN(verifier_gotol); }
+void test_verifier_helper_access_var_len(void) { RUN(verifier_helper_access_var_len); }
+void test_verifier_helper_packet_access(void) { RUN(verifier_helper_packet_access); }
+void test_verifier_helper_restricted(void) { RUN(verifier_helper_restricted); }
+void test_verifier_helper_value_access(void) { RUN(verifier_helper_value_access); }
+void test_verifier_int_ptr(void) { RUN(verifier_int_ptr); }
+void test_verifier_iterating_callbacks(void) { RUN(verifier_iterating_callbacks); }
+void test_verifier_jeq_infer_not_null(void) { RUN(verifier_jeq_infer_not_null); }
+void test_verifier_ld_ind(void) { RUN(verifier_ld_ind); }
+void test_verifier_ldsx(void) { RUN(verifier_ldsx); }
+void test_verifier_leak_ptr(void) { RUN(verifier_leak_ptr); }
+void test_verifier_loops1(void) { RUN(verifier_loops1); }
+void test_verifier_lwt(void) { RUN(verifier_lwt); }
+void test_verifier_map_in_map(void) { RUN(verifier_map_in_map); }
+void test_verifier_map_ptr(void) { RUN(verifier_map_ptr); }
+void test_verifier_map_ptr_mixing(void) { RUN(verifier_map_ptr_mixing); }
+void test_verifier_map_ret_val(void) { RUN(verifier_map_ret_val); }
+void test_verifier_masking(void) { RUN(verifier_masking); }
+void test_verifier_meta_access(void) { RUN(verifier_meta_access); }
+void test_verifier_movsx(void) { RUN(verifier_movsx); }
+void test_verifier_netfilter_ctx(void) { RUN(verifier_netfilter_ctx); }
+void test_verifier_netfilter_retcode(void) { RUN(verifier_netfilter_retcode); }
+void test_verifier_precision(void) { RUN(verifier_precision); }
+void test_verifier_prevent_map_lookup(void) { RUN(verifier_prevent_map_lookup); }
+void test_verifier_raw_stack(void) { RUN(verifier_raw_stack); }
+void test_verifier_raw_tp_writable(void) { RUN(verifier_raw_tp_writable); }
+void test_verifier_reg_equal(void) { RUN(verifier_reg_equal); }
+void test_verifier_ref_tracking(void) { RUN(verifier_ref_tracking); }
+void test_verifier_regalloc(void) { RUN(verifier_regalloc); }
+void test_verifier_ringbuf(void) { RUN(verifier_ringbuf); }
+void test_verifier_runtime_jit(void) { RUN(verifier_runtime_jit); }
+void test_verifier_scalar_ids(void) { RUN(verifier_scalar_ids); }
+void test_verifier_sdiv(void) { RUN(verifier_sdiv); }
+void test_verifier_search_pruning(void) { RUN(verifier_search_pruning); }
+void test_verifier_sock(void) { RUN(verifier_sock); }
+void test_verifier_spill_fill(void) { RUN(verifier_spill_fill); }
+void test_verifier_spin_lock(void) { RUN(verifier_spin_lock); }
+void test_verifier_stack_ptr(void) { RUN(verifier_stack_ptr); }
+void test_verifier_subprog_precision(void) { RUN(verifier_subprog_precision); }
+void test_verifier_subreg(void) { RUN(verifier_subreg); }
+void test_verifier_typedef(void) { RUN(verifier_typedef); }
+void test_verifier_uninit(void) { RUN(verifier_uninit); }
+void test_verifier_unpriv(void) { RUN(verifier_unpriv); }
+void test_verifier_unpriv_perf(void) { RUN(verifier_unpriv_perf); }
+void test_verifier_value_adj_spill(void) { RUN(verifier_value_adj_spill); }
+void test_verifier_value(void) { RUN(verifier_value); }
+void test_verifier_value_illegal_alu(void) { RUN(verifier_value_illegal_alu); }
+void test_verifier_value_or_null(void) { RUN(verifier_value_or_null); }
+void test_verifier_var_off(void) { RUN(verifier_var_off); }
+void test_verifier_xadd(void) { RUN(verifier_xadd); }
+void test_verifier_xdp(void) { RUN(verifier_xdp); }
+void test_verifier_xdp_direct_packet_access(void) { RUN(verifier_xdp_direct_packet_access); }
+
+static int init_test_val_map(struct bpf_object *obj, char *map_name)
+{
+ struct test_val value = {
+ .index = (6 + 1) * sizeof(int),
+ .foo[6] = 0xabcdef12,
+ };
+ struct bpf_map *map;
+ int err, key = 0;
+
+ map = bpf_object__find_map_by_name(obj, map_name);
+ if (!map) {
+ PRINT_FAIL("Can't find map '%s'\n", map_name);
+ return -EINVAL;
+ }
+
+ err = bpf_map_update_elem(bpf_map__fd(map), &key, &value, 0);
+ if (err) {
+ PRINT_FAIL("Error while updating map '%s': %d\n", map_name, err);
+ return err;
+ }
+
+ return 0;
+}
+
+static int init_array_access_maps(struct bpf_object *obj)
+{
+ return init_test_val_map(obj, "map_array_ro");
+}
+
+void test_verifier_array_access(void)
+{
+ run_tests_aux("verifier_array_access",
+ verifier_array_access__elf_bytes,
+ init_array_access_maps);
+}
+
+static int init_value_ptr_arith_maps(struct bpf_object *obj)
+{
+ return init_test_val_map(obj, "map_array_48b");
+}
+
+void test_verifier_value_ptr_arith(void)
+{
+ run_tests_aux("verifier_value_ptr_arith",
+ verifier_value_ptr_arith__elf_bytes,
+ init_value_ptr_arith_maps);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verifier_log.c b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
new file mode 100644
index 000000000000..8337c6bc5b95
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verifier_log.c
@@ -0,0 +1,450 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "test_log_buf.skel.h"
+
+
+static bool check_prog_load(int prog_fd, bool expect_err, const char *tag)
+{
+ if (expect_err) {
+ if (!ASSERT_LT(prog_fd, 0, tag)) {
+ close(prog_fd);
+ return false;
+ }
+ } else /* !expect_err */ {
+ if (!ASSERT_GT(prog_fd, 0, tag))
+ return false;
+ }
+ if (prog_fd >= 0)
+ close(prog_fd);
+ return true;
+}
+
+static struct {
+ /* strategically placed before others to avoid accidental modification by kernel */
+ char filler[1024];
+ char buf[1024];
+ /* strategically placed after buf[] to catch more accidental corruptions */
+ char reference[1024];
+} logs;
+static const struct bpf_insn *insns;
+static size_t insn_cnt;
+
+static int load_prog(struct bpf_prog_load_opts *opts, bool expect_load_error)
+{
+ int prog_fd;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_prog",
+ "GPL", insns, insn_cnt, opts);
+ check_prog_load(prog_fd, expect_load_error, "prog_load");
+
+ return prog_fd;
+}
+
+static void verif_log_subtest(const char *name, bool expect_load_error, int log_level)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ char *exp_log, prog_name[16], op_name[32];
+ struct test_log_buf *skel;
+ struct bpf_program *prog;
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, mode, err, prog_fd, res;
+
+ skel = test_log_buf__open();
+ if (!ASSERT_OK_PTR(skel, "skel_open"))
+ return;
+
+ bpf_object__for_each_program(prog, skel->obj) {
+ if (strcmp(bpf_program__name(prog), name) == 0)
+ bpf_program__set_autoload(prog, true);
+ else
+ bpf_program__set_autoload(prog, false);
+ }
+
+ err = test_log_buf__load(skel);
+ if (!expect_load_error && !ASSERT_OK(err, "unexpected_load_failure"))
+ goto cleanup;
+ if (expect_load_error && !ASSERT_ERR(err, "unexpected_load_success"))
+ goto cleanup;
+
+ insns = bpf_program__insns(skel->progs.good_prog);
+ insn_cnt = bpf_program__insn_cnt(skel->progs.good_prog);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = log_level | 8 /* BPF_LOG_FIXED */;
+ load_prog(&opts, expect_load_error);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED works as verifier log used to work, that is:
+ * we get -ENOSPC and beginning of the full verifier log. This only
+ * works for log_level 2 and log_level 1 + failed program. For log
+ * level 2 we don't reset log at all. For log_level 1 + failed program
+ * we don't get to verification stats output. With log level 1
+ * for successful program final result will be just verifier stats.
+ * But if provided too short log buf, kernel will NULL-out log->ubuf
+ * and will stop emitting further log. This means we'll never see
+ * predictable verifier stats.
+ * Long story short, we do the following -ENOSPC test only for
+ * predictable combinations.
+ */
+ if (log_level >= 2 || expect_load_error) {
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* fixed-length log */
+ opts.log_size = 25;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, "log_fixed25",
+ "GPL", insns, insn_cnt, &opts);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, "unexpected_log_fixed_prog_load_result")) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+ if (!ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25"))
+ goto cleanup;
+ if (!ASSERT_STRNEQ(logs.buf, logs.reference, 24, "log_fixed_contents_25"))
+ goto cleanup;
+ }
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+
+ /* rotating mode, then fixed mode */
+ for (mode = 1; mode >= 0; mode--) {
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+ opts.log_level = log_level | (mode ? 0 : 8 /* BPF_LOG_FIXED */);
+
+ snprintf(prog_name, sizeof(prog_name),
+ "log_%s_%d", mode ? "roll" : "fixed", i);
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_RAW_TRACEPOINT, prog_name,
+ "GPL", insns, insn_cnt, &opts);
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_prog_load_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_EQ(prog_fd, -ENOSPC, op_name)) {
+ if (prog_fd >= 0)
+ close(prog_fd);
+ goto cleanup;
+ }
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_strlen_%d", mode ? "roll" : "fixed", i);
+ ASSERT_EQ(strlen(logs.buf), i - 1, op_name);
+
+ if (mode)
+ exp_log = logs.reference + fixed_log_sz - i;
+ else
+ exp_log = logs.reference;
+
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_contents_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STRNEQ(logs.buf, exp_log, i - 1, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strncmp(logs.buf, exp_log, i - 1),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf is not overwritten */
+ snprintf(op_name, sizeof(op_name),
+ "log_%s_unused_%d", mode ? "roll" : "fixed", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = log_level;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_prog(&opts, true /* should fail */);
+ ASSERT_EQ(res, -ENOSPC, "prog_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = log_level;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_prog(&opts, expect_load_error);
+ ASSERT_NEQ(res, -ENOSPC, "prog_load_res_just_right_rolling");
+
+cleanup:
+ test_log_buf__destroy(skel);
+}
+
+static const void *btf_data;
+static u32 btf_data_sz;
+
+static int load_btf(struct bpf_btf_load_opts *opts, bool expect_err)
+{
+ int fd;
+
+ fd = bpf_btf_load(btf_data, btf_data_sz, opts);
+ if (fd >= 0)
+ close(fd);
+ if (expect_err)
+ ASSERT_LT(fd, 0, "btf_load_failure");
+ else /* !expect_err */
+ ASSERT_GT(fd, 0, "btf_load_success");
+ return fd;
+}
+
+static void verif_btf_log_subtest(bool bad_btf)
+{
+ LIBBPF_OPTS(bpf_btf_load_opts, opts);
+ struct btf *btf;
+ struct btf_type *t;
+ char *exp_log, op_name[32];
+ size_t fixed_log_sz;
+ __u32 log_true_sz_fixed, log_true_sz_rolling;
+ int i, res;
+
+ /* prepare simple BTF contents */
+ btf = btf__new_empty();
+ if (!ASSERT_OK_PTR(btf, "btf_new_empty"))
+ return;
+ res = btf__add_int(btf, "whatever", 4, 0);
+ if (!ASSERT_GT(res, 0, "btf_add_int_id"))
+ goto cleanup;
+ if (bad_btf) {
+ /* btf__add_int() doesn't allow bad value of size, so we'll just
+ * force-cast btf_type pointer and manually override size to invalid
+ * 3 if we need to simulate failure
+ */
+ t = (void *)btf__type_by_id(btf, res);
+ if (!ASSERT_OK_PTR(t, "int_btf_type"))
+ goto cleanup;
+ t->size = 3;
+ }
+
+ btf_data = btf__raw_data(btf, &btf_data_sz);
+ if (!ASSERT_OK_PTR(btf_data, "btf_data"))
+ goto cleanup;
+
+ load_btf(&opts, bad_btf);
+
+ opts.log_buf = logs.reference;
+ opts.log_size = sizeof(logs.reference);
+ opts.log_level = 1 | 8 /* BPF_LOG_FIXED */;
+ load_btf(&opts, bad_btf);
+
+ fixed_log_sz = strlen(logs.reference) + 1;
+ if (!ASSERT_GT(fixed_log_sz, 50, "fixed_log_sz"))
+ goto cleanup;
+ memset(logs.reference + fixed_log_sz, 0, sizeof(logs.reference) - fixed_log_sz);
+
+ /* validate BPF_LOG_FIXED truncation works as verifier log used to work */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* fixed-length log */
+ opts.log_size = 25;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "half_log_fd");
+ ASSERT_EQ(strlen(logs.buf), 24, "log_fixed_25");
+ ASSERT_STRNEQ(logs.buf, logs.reference, 24, op_name);
+
+ /* validate rolling verifier log logic: try all variations of log buf
+ * length to force various truncation scenarios
+ */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1; /* rolling log */
+
+ /* prefill logs.buf with 'A's to detect any write beyond allowed length */
+ memset(logs.filler, 'A', sizeof(logs.filler));
+ logs.filler[sizeof(logs.filler) - 1] = '\0';
+ memset(logs.buf, 'A', sizeof(logs.buf));
+ logs.buf[sizeof(logs.buf) - 1] = '\0';
+
+ for (i = 1; i < fixed_log_sz; i++) {
+ opts.log_size = i;
+
+ snprintf(op_name, sizeof(op_name), "log_roll_btf_load_%d", i);
+ res = load_btf(&opts, true);
+ if (!ASSERT_EQ(res, -ENOSPC, op_name))
+ goto cleanup;
+
+ exp_log = logs.reference + fixed_log_sz - i;
+ snprintf(op_name, sizeof(op_name), "log_roll_contents_%d", i);
+ if (!ASSERT_STREQ(logs.buf, exp_log, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf, exp_log),
+ logs.buf, exp_log);
+ goto cleanup;
+ }
+
+ /* check that unused portions of logs.buf are not overwritten */
+ snprintf(op_name, sizeof(op_name), "log_roll_unused_tail_%d", i);
+ if (!ASSERT_STREQ(logs.buf + i, logs.filler + i, op_name)) {
+ printf("CMP:%d\nS1:'%s'\nS2:'%s'\n",
+ strcmp(logs.buf + i, logs.filler + i),
+ logs.buf + i, logs.filler + i);
+ goto cleanup;
+ }
+ }
+
+ /* (FIXED) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed");
+
+ log_true_sz_fixed = opts.log_true_size;
+ ASSERT_GT(log_true_sz_fixed, 0, "log_true_sz_fixed");
+
+ /* (FIXED, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_fixed_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_fixed, "log_sz_fixed_null_eq");
+
+ /* (ROLLING) get actual log size */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = sizeof(logs.buf);
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling");
+
+ log_true_sz_rolling = opts.log_true_size;
+ ASSERT_EQ(log_true_sz_rolling, log_true_sz_fixed, "log_true_sz_eq");
+
+ /* (ROLLING, NULL) get actual log size */
+ opts.log_buf = NULL;
+ opts.log_level = 1;
+ opts.log_size = 0;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_rolling_null");
+ ASSERT_EQ(opts.log_true_size, log_true_sz_rolling, "log_true_sz_null_eq");
+
+ /* (FIXED) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed - 1;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_fixed");
+
+ /* (FIXED) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1 | 8; /* BPF_LOG_FIXED */
+ opts.log_size = log_true_sz_fixed;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_fixed");
+
+ /* (ROLLING) expect -ENOSPC for one byte short log */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling - 1;
+ res = load_btf(&opts, true);
+ ASSERT_EQ(res, -ENOSPC, "btf_load_res_too_short_rolling");
+
+ /* (ROLLING) expect *not* -ENOSPC with exact log_true_size buffer */
+ opts.log_buf = logs.buf;
+ opts.log_level = 1;
+ opts.log_size = log_true_sz_rolling;
+ opts.log_true_size = 0;
+ res = load_btf(&opts, bad_btf);
+ ASSERT_NEQ(res, -ENOSPC, "btf_load_res_just_right_rolling");
+
+cleanup:
+ btf__free(btf);
+}
+
+void test_verifier_log(void)
+{
+ if (test__start_subtest("good_prog-level1"))
+ verif_log_subtest("good_prog", false, 1);
+ if (test__start_subtest("good_prog-level2"))
+ verif_log_subtest("good_prog", false, 2);
+ if (test__start_subtest("bad_prog-level1"))
+ verif_log_subtest("bad_prog", true, 1);
+ if (test__start_subtest("bad_prog-level2"))
+ verif_log_subtest("bad_prog", true, 2);
+ if (test__start_subtest("bad_btf"))
+ verif_btf_log_subtest(true /* bad btf */);
+ if (test__start_subtest("good_btf"))
+ verif_btf_log_subtest(false /* !bad btf */);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
new file mode 100644
index 000000000000..ab0f02faa80c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c
@@ -0,0 +1,565 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <endian.h>
+#include <limits.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+#include <linux/keyctl.h>
+#include <sys/xattr.h>
+#include <linux/fsverity.h>
+#include <test_progs.h>
+
+#include "test_verify_pkcs7_sig.skel.h"
+#include "test_sig_in_xattr.skel.h"
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+#define VERIFY_USE_SECONDARY_KEYRING (1UL)
+#define VERIFY_USE_PLATFORM_KEYRING (2UL)
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+/* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */
+#define MODULE_SIG_STRING "~Module signature appended~\n"
+
+/*
+ * Module signature information block.
+ *
+ * The constituents of the signature section are, in order:
+ *
+ * - Signer's name
+ * - Key identifier
+ * - Signature data
+ * - Information block
+ */
+struct module_signature {
+ __u8 algo; /* Public-key crypto algorithm [0] */
+ __u8 hash; /* Digest algorithm [0] */
+ __u8 id_type; /* Key identifier type [PKEY_ID_PKCS7] */
+ __u8 signer_len; /* Length of signer's name [0] */
+ __u8 key_id_len; /* Length of key identifier [0] */
+ __u8 __pad[3];
+ __be32 sig_len; /* Length of signature data */
+};
+
+struct data {
+ __u8 data[MAX_DATA_SIZE];
+ __u32 data_len;
+ __u8 sig[MAX_SIG_SIZE];
+ __u32 sig_len;
+};
+
+static bool kfunc_not_supported;
+
+static int libbpf_print_cb(enum libbpf_print_level level, const char *fmt,
+ va_list args)
+{
+ if (level == LIBBPF_WARN)
+ vprintf(fmt, args);
+
+ if (strcmp(fmt, "libbpf: extern (func ksym) '%s': not found in kernel or module BTFs\n"))
+ return 0;
+
+ if (strcmp(va_arg(args, char *), "bpf_verify_pkcs7_signature"))
+ return 0;
+
+ kfunc_not_supported = true;
+ return 0;
+}
+
+static int _run_setup_process(const char *setup_dir, const char *cmd)
+{
+ int child_pid, child_status;
+
+ child_pid = fork();
+ if (child_pid == 0) {
+ execlp("./verify_sig_setup.sh", "./verify_sig_setup.sh", cmd,
+ setup_dir, NULL);
+ exit(errno);
+
+ } else if (child_pid > 0) {
+ waitpid(child_pid, &child_status, 0);
+ return WEXITSTATUS(child_status);
+ }
+
+ return -EINVAL;
+}
+
+static int populate_data_item_str(const char *tmp_dir, struct data *data_item)
+{
+ struct stat st;
+ char data_template[] = "/tmp/dataXXXXXX";
+ char path[PATH_MAX];
+ int ret, fd, child_status, child_pid;
+
+ data_item->data_len = 4;
+ memcpy(data_item->data, "test", data_item->data_len);
+
+ fd = mkstemp(data_template);
+ if (fd == -1)
+ return -errno;
+
+ ret = write(fd, data_item->data, data_item->data_len);
+
+ close(fd);
+
+ if (ret != data_item->data_len) {
+ ret = -EIO;
+ goto out;
+ }
+
+ child_pid = fork();
+
+ if (child_pid == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (child_pid == 0) {
+ snprintf(path, sizeof(path), "%s/signing_key.pem", tmp_dir);
+
+ return execlp("./sign-file", "./sign-file", "-d", "sha256",
+ path, path, data_template, NULL);
+ }
+
+ waitpid(child_pid, &child_status, 0);
+
+ ret = WEXITSTATUS(child_status);
+ if (ret)
+ goto out;
+
+ snprintf(path, sizeof(path), "%s.p7s", data_template);
+
+ ret = stat(path, &st);
+ if (ret == -1) {
+ ret = -errno;
+ goto out;
+ }
+
+ if (st.st_size > sizeof(data_item->sig)) {
+ ret = -EINVAL;
+ goto out_sig;
+ }
+
+ data_item->sig_len = st.st_size;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1) {
+ ret = -errno;
+ goto out_sig;
+ }
+
+ ret = read(fd, data_item->sig, data_item->sig_len);
+
+ close(fd);
+
+ if (ret != data_item->sig_len) {
+ ret = -EIO;
+ goto out_sig;
+ }
+
+ ret = 0;
+out_sig:
+ unlink(path);
+out:
+ unlink(data_template);
+ return ret;
+}
+
+static int populate_data_item_mod(struct data *data_item)
+{
+ char mod_path[PATH_MAX], *mod_path_ptr;
+ struct stat st;
+ void *mod;
+ FILE *fp;
+ struct module_signature ms;
+ int ret, fd, modlen, marker_len, sig_len;
+
+ data_item->data_len = 0;
+
+ if (stat("/lib/modules", &st) == -1)
+ return 0;
+
+ /* Requires CONFIG_TCP_CONG_BIC=m. */
+ fp = popen("find /lib/modules/$(uname -r) -name tcp_bic.ko", "r");
+ if (!fp)
+ return 0;
+
+ mod_path_ptr = fgets(mod_path, sizeof(mod_path), fp);
+ pclose(fp);
+
+ if (!mod_path_ptr)
+ return 0;
+
+ mod_path_ptr = strchr(mod_path, '\n');
+ if (!mod_path_ptr)
+ return 0;
+
+ *mod_path_ptr = '\0';
+
+ if (stat(mod_path, &st) == -1)
+ return 0;
+
+ modlen = st.st_size;
+ marker_len = sizeof(MODULE_SIG_STRING) - 1;
+
+ fd = open(mod_path, O_RDONLY);
+ if (fd == -1)
+ return -errno;
+
+ mod = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+
+ close(fd);
+
+ if (mod == MAP_FAILED)
+ return -errno;
+
+ if (strncmp(mod + modlen - marker_len, MODULE_SIG_STRING, marker_len)) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ modlen -= marker_len;
+
+ memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
+
+ sig_len = __be32_to_cpu(ms.sig_len);
+ modlen -= sig_len + sizeof(ms);
+
+ if (modlen > sizeof(data_item->data)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ memcpy(data_item->data, mod, modlen);
+ data_item->data_len = modlen;
+
+ if (sig_len > sizeof(data_item->sig)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ memcpy(data_item->sig, mod + modlen, sig_len);
+ data_item->sig_len = sig_len;
+ ret = 0;
+out:
+ munmap(mod, st.st_size);
+ return ret;
+}
+
+static void test_verify_pkcs7_sig_from_map(void)
+{
+ libbpf_print_fn_t old_print_cb;
+ char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+ char *tmp_dir;
+ struct test_verify_pkcs7_sig *skel = NULL;
+ struct bpf_map *map;
+ struct data data;
+ int ret, zero = 0;
+
+ /* Trigger creation of session keyring. */
+ syscall(__NR_request_key, "keyring", "_uid.0", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+
+ tmp_dir = mkdtemp(tmp_dir_template);
+ if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+ return;
+
+ ret = _run_setup_process(tmp_dir, "setup");
+ if (!ASSERT_OK(ret, "_run_setup_process"))
+ goto close_prog;
+
+ skel = test_verify_pkcs7_sig__open();
+ if (!ASSERT_OK_PTR(skel, "test_verify_pkcs7_sig__open"))
+ goto close_prog;
+
+ old_print_cb = libbpf_set_print(libbpf_print_cb);
+ ret = test_verify_pkcs7_sig__load(skel);
+ libbpf_set_print(old_print_cb);
+
+ if (ret < 0 && kfunc_not_supported) {
+ printf(
+ "%s:SKIP:bpf_verify_pkcs7_signature() kfunc not supported\n",
+ __func__);
+ test__skip();
+ goto close_prog;
+ }
+
+ if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__load"))
+ goto close_prog;
+
+ ret = test_verify_pkcs7_sig__attach(skel);
+ if (!ASSERT_OK(ret, "test_verify_pkcs7_sig__attach"))
+ goto close_prog;
+
+ map = bpf_object__find_map_by_name(skel->obj, "data_input");
+ if (!ASSERT_OK_PTR(map, "data_input not found"))
+ goto close_prog;
+
+ skel->bss->monitored_pid = getpid();
+
+ /* Test without data and signature. */
+ skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /* Test successful signature verification with session keyring. */
+ ret = populate_data_item_str(tmp_dir, &data);
+ if (!ASSERT_OK(ret, "populate_data_item_str"))
+ goto close_prog;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /* Test successful signature verification with testing keyring. */
+ skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+ "ebpf_testing_keyring", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ /*
+ * Ensure key_task_permission() is called and rejects the keyring
+ * (no Search permission).
+ */
+ syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+ 0x37373737);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ syscall(__NR_keyctl, KEYCTL_SETPERM, skel->bss->user_keyring_serial,
+ 0x3f3f3f3f);
+
+ /*
+ * Ensure key_validate() is called and rejects the keyring (key expired)
+ */
+ syscall(__NR_keyctl, KEYCTL_SET_TIMEOUT,
+ skel->bss->user_keyring_serial, 1);
+ sleep(1);
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->user_keyring_serial = KEY_SPEC_SESSION_KEYRING;
+
+ /* Test with corrupted data (signature verification should fail). */
+ data.data[0] = 'a';
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data, BPF_ANY);
+ if (!ASSERT_LT(ret, 0, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ ret = populate_data_item_mod(&data);
+ if (!ASSERT_OK(ret, "populate_data_item_mod"))
+ goto close_prog;
+
+ /* Test signature verification with system keyrings. */
+ if (data.data_len) {
+ skel->bss->user_keyring_serial = 0;
+ skel->bss->system_keyring_id = 0;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->system_keyring_id = VERIFY_USE_SECONDARY_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ if (!ASSERT_OK(ret, "bpf_map_update_elem data_input"))
+ goto close_prog;
+
+ skel->bss->system_keyring_id = VERIFY_USE_PLATFORM_KEYRING;
+
+ ret = bpf_map_update_elem(bpf_map__fd(map), &zero, &data,
+ BPF_ANY);
+ ASSERT_LT(ret, 0, "bpf_map_update_elem data_input");
+ }
+
+close_prog:
+ _run_setup_process(tmp_dir, "cleanup");
+
+ if (!skel)
+ return;
+
+ skel->bss->monitored_pid = 0;
+ test_verify_pkcs7_sig__destroy(skel);
+}
+
+static int get_signature_size(const char *sig_path)
+{
+ struct stat st;
+
+ if (stat(sig_path, &st) == -1)
+ return -1;
+
+ return st.st_size;
+}
+
+static int add_signature_to_xattr(const char *data_path, const char *sig_path)
+{
+ char sig[MAX_SIG_SIZE] = {0};
+ int fd, size, ret;
+
+ if (sig_path) {
+ fd = open(sig_path, O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ size = read(fd, sig, MAX_SIG_SIZE);
+ close(fd);
+ if (size <= 0)
+ return -1;
+ } else {
+ /* no sig_path, just write 32 bytes of zeros */
+ size = 32;
+ }
+ ret = setxattr(data_path, "user.sig", sig, size, 0);
+ if (!ASSERT_OK(ret, "setxattr"))
+ return -1;
+
+ return 0;
+}
+
+static int test_open_file(struct test_sig_in_xattr *skel, char *data_path,
+ pid_t pid, bool should_success, char *name)
+{
+ int ret;
+
+ skel->bss->monitored_pid = pid;
+ ret = open(data_path, O_RDONLY);
+ close(ret);
+ skel->bss->monitored_pid = 0;
+
+ if (should_success) {
+ if (!ASSERT_GE(ret, 0, name))
+ return -1;
+ } else {
+ if (!ASSERT_LT(ret, 0, name))
+ return -1;
+ }
+ return 0;
+}
+
+static void test_pkcs7_sig_fsverity(void)
+{
+ char data_path[PATH_MAX];
+ char sig_path[PATH_MAX];
+ char tmp_dir_template[] = "/tmp/verify_sigXXXXXX";
+ char *tmp_dir;
+ struct test_sig_in_xattr *skel = NULL;
+ pid_t pid;
+ int ret;
+
+ tmp_dir = mkdtemp(tmp_dir_template);
+ if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp"))
+ return;
+
+ snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir);
+ snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir);
+
+ ret = _run_setup_process(tmp_dir, "setup");
+ if (!ASSERT_OK(ret, "_run_setup_process"))
+ goto out;
+
+ ret = _run_setup_process(tmp_dir, "fsverity-create-sign");
+
+ if (ret) {
+ printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n"
+ "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n",
+ __func__);
+ test__skip();
+ goto out;
+ }
+
+ skel = test_sig_in_xattr__open();
+ if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open"))
+ goto out;
+ ret = get_signature_size(sig_path);
+ if (!ASSERT_GT(ret, 0, "get_signature_size"))
+ goto out;
+ skel->bss->sig_size = ret;
+ skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring",
+ "ebpf_testing_keyring", NULL,
+ KEY_SPEC_SESSION_KEYRING);
+ memcpy(skel->bss->digest, "FSVerity", 8);
+
+ ret = test_sig_in_xattr__load(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__load"))
+ goto out;
+
+ ret = test_sig_in_xattr__attach(skel);
+ if (!ASSERT_OK(ret, "test_sig_in_xattr__attach"))
+ goto out;
+
+ pid = getpid();
+
+ /* Case 1: fsverity is not enabled, open should succeed */
+ if (test_open_file(skel, data_path, pid, true, "open_1"))
+ goto out;
+
+ /* Case 2: fsverity is enabled, xattr is missing, open should
+ * fail
+ */
+ ret = _run_setup_process(tmp_dir, "fsverity-enable");
+ if (!ASSERT_OK(ret, "fsverity-enable"))
+ goto out;
+ if (test_open_file(skel, data_path, pid, false, "open_2"))
+ goto out;
+
+ /* Case 3: fsverity is enabled, xattr has valid signature, open
+ * should succeed
+ */
+ ret = add_signature_to_xattr(data_path, sig_path);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_1"))
+ goto out;
+
+ if (test_open_file(skel, data_path, pid, true, "open_3"))
+ goto out;
+
+ /* Case 4: fsverity is enabled, xattr has invalid signature, open
+ * should fail
+ */
+ ret = add_signature_to_xattr(data_path, NULL);
+ if (!ASSERT_OK(ret, "add_signature_to_xattr_2"))
+ goto out;
+ test_open_file(skel, data_path, pid, false, "open_4");
+
+out:
+ _run_setup_process(tmp_dir, "cleanup");
+ if (!skel)
+ return;
+
+ skel->bss->monitored_pid = 0;
+ test_sig_in_xattr__destroy(skel);
+}
+
+void test_verify_pkcs7_sig(void)
+{
+ if (test__start_subtest("pkcs7_sig_from_map"))
+ test_verify_pkcs7_sig_from_map();
+ if (test__start_subtest("pkcs7_sig_fsverity"))
+ test_pkcs7_sig_fsverity();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/vmlinux.c b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
index 72310cfc6474..6fb2217d940b 100644
--- a/tools/testing/selftests/bpf/prog_tests/vmlinux.c
+++ b/tools/testing/selftests/bpf/prog_tests/vmlinux.c
@@ -16,27 +16,27 @@ static void nsleep()
void test_vmlinux(void)
{
- int duration = 0, err;
+ int err;
struct test_vmlinux* skel;
struct test_vmlinux__bss *bss;
skel = test_vmlinux__open_and_load();
- if (CHECK(!skel, "skel_open", "failed to open skeleton\n"))
+ if (!ASSERT_OK_PTR(skel, "test_vmlinux__open_and_load"))
return;
bss = skel->bss;
err = test_vmlinux__attach(skel);
- if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_vmlinux__attach"))
goto cleanup;
/* trigger everything */
nsleep();
- CHECK(!bss->tp_called, "tp", "not called\n");
- CHECK(!bss->raw_tp_called, "raw_tp", "not called\n");
- CHECK(!bss->tp_btf_called, "tp_btf", "not called\n");
- CHECK(!bss->kprobe_called, "kprobe", "not called\n");
- CHECK(!bss->fentry_called, "fentry", "not called\n");
+ ASSERT_TRUE(bss->tp_called, "tp");
+ ASSERT_TRUE(bss->raw_tp_called, "raw_tp");
+ ASSERT_TRUE(bss->tp_btf_called, "tp_btf");
+ ASSERT_TRUE(bss->kprobe_called, "kprobe");
+ ASSERT_TRUE(bss->fentry_called, "fentry");
cleanup:
test_vmlinux__destroy(skel);
diff --git a/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
new file mode 100644
index 000000000000..2a5e207edad6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/vrf_socket_lookup.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace
+ * |
+ * +--------------+ | +--------------+
+ * | veth01 |----------| veth10 |
+ * | 172.16.1.100 | | | 172.16.1.200 |
+ * | bpf | | +--------------+
+ * +--------------+ |
+ * server(UDP/TCP) |
+ * +-------------------+ |
+ * | vrf1 | |
+ * | +--------------+ | | +--------------+
+ * | | veth02 |----------| veth20 |
+ * | | 172.16.2.100 | | | | 172.16.2.200 |
+ * | | bpf | | | +--------------+
+ * | +--------------+ | |
+ * | server(UDP/TCP) | |
+ * +-------------------+ |
+ *
+ * Test flow
+ * -----------
+ * The tests verifies that socket lookup via TC is VRF aware:
+ * 1) Creates two veth pairs between NS0 and NS1:
+ * a) veth01 <-> veth10 outside the VRF
+ * b) veth02 <-> veth20 in the VRF
+ * 2) Attaches to veth01 and veth02 a program that calls:
+ * a) bpf_skc_lookup_tcp() with TCP and tcp_skc is true
+ * b) bpf_sk_lookup_tcp() with TCP and tcp_skc is false
+ * c) bpf_sk_lookup_udp() with UDP
+ * The program stores the lookup result in bss->lookup_status.
+ * 3) Creates a socket TCP/UDP server in/outside the VRF.
+ * 4) The test expects lookup_status to be:
+ * a) 0 from device in VRF to server outside VRF
+ * b) 0 from device outside VRF to server in VRF
+ * c) 1 from device in VRF to server in VRF
+ * d) 1 from device outside VRF to server outside VRF
+ */
+
+#include <net/if.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "vrf_socket_lookup.skel.h"
+
+#define NS0 "vrf_socket_lookup_0"
+#define NS1 "vrf_socket_lookup_1"
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define NON_VRF_PORT 5000
+#define IN_VRF_PORT 5001
+
+#define TIMEOUT_MS 3000
+
+static int make_socket(int sotype, const char *ip, int port,
+ struct sockaddr_storage *addr)
+{
+ int err, fd;
+
+ err = make_sockaddr(AF_INET, ip, port, addr, NULL);
+ if (!ASSERT_OK(err, "make_address"))
+ return -1;
+
+ fd = socket(AF_INET, sotype, 0);
+ if (!ASSERT_GE(fd, 0, "socket"))
+ return -1;
+
+ if (!ASSERT_OK(settimeo(fd, TIMEOUT_MS), "settimeo"))
+ goto fail;
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int make_server(int sotype, const char *ip, int port, const char *ifname)
+{
+ int err, fd = -1;
+
+ fd = start_server(AF_INET, sotype, ip, port, TIMEOUT_MS);
+ if (!ASSERT_GE(fd, 0, "start_server"))
+ return -1;
+
+ if (ifname) {
+ err = setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE,
+ ifname, strlen(ifname) + 1);
+ if (!ASSERT_OK(err, "setsockopt(SO_BINDTODEVICE)"))
+ goto fail;
+ }
+
+ return fd;
+fail:
+ close(fd);
+ return -1;
+}
+
+static int attach_progs(char *ifname, int tc_prog_fd, int xdp_prog_fd)
+{
+ LIBBPF_OPTS(bpf_tc_hook, hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1,
+ .prog_fd = tc_prog_fd);
+ int ret, ifindex;
+
+ ifindex = if_nametoindex(ifname);
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex"))
+ return -1;
+ hook.ifindex = ifindex;
+
+ ret = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ return ret;
+
+ ret = bpf_tc_attach(&hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+ ret = bpf_xdp_attach(ifindex, xdp_prog_fd, 0, NULL);
+ if (!ASSERT_OK(ret, "bpf_xdp_attach")) {
+ bpf_tc_hook_destroy(&hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete "
+ NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete "
+ NS1);
+}
+
+static int setup(struct vrf_socket_lookup *skel)
+{
+ int tc_prog_fd, xdp_prog_fd, ret = 0;
+ struct nstoken *nstoken = NULL;
+
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS1 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20"
+ " netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS1 " link set dev veth20 up");
+
+ /* veth02 -> vrf1 */
+ SYS(fail, "ip -net " NS0 " link add vrf1 type vrf table 11");
+ SYS(fail, "ip -net " NS0 " route add vrf vrf1 unreachable default"
+ " metric 4278198272");
+ SYS(fail, "ip -net " NS0 " link set vrf1 alias vrf");
+ SYS(fail, "ip -net " NS0 " link set vrf1 up");
+ SYS(fail, "ip -net " NS0 " link set veth02 master vrf1");
+
+ /* Attach TC and XDP progs to veth devices in NS0 */
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto fail;
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_socket_lookup);
+ if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__tc_fd"))
+ goto fail;
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_socket_lookup);
+ if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__xdp_fd"))
+ goto fail;
+
+ if (attach_progs("veth01", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ if (attach_progs("veth02", tc_prog_fd, xdp_prog_fd))
+ goto fail;
+
+ goto close;
+fail:
+ ret = -1;
+close:
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int test_lookup(struct vrf_socket_lookup *skel, int sotype,
+ const char *ip, int port, bool test_xdp, bool tcp_skc,
+ int lookup_status_exp)
+{
+ static const char msg[] = "Hello Server";
+ struct sockaddr_storage addr = {};
+ int fd, ret = 0;
+
+ fd = make_socket(sotype, ip, port, &addr);
+ if (fd < 0)
+ return -1;
+
+ skel->bss->test_xdp = test_xdp;
+ skel->bss->tcp_skc = tcp_skc;
+ skel->bss->lookup_status = -1;
+
+ if (sotype == SOCK_STREAM)
+ connect(fd, (void *)&addr, sizeof(struct sockaddr_in));
+ else
+ sendto(fd, msg, sizeof(msg), 0, (void *)&addr,
+ sizeof(struct sockaddr_in));
+
+ if (!ASSERT_EQ(skel->bss->lookup_status, lookup_status_exp,
+ "lookup_status"))
+ goto fail;
+
+ goto close;
+
+fail:
+ ret = -1;
+close:
+ close(fd);
+ return ret;
+}
+
+static void _test_vrf_socket_lookup(struct vrf_socket_lookup *skel, int sotype,
+ bool test_xdp, bool tcp_skc)
+{
+ int in_vrf_server = -1, non_vrf_server = -1;
+ struct nstoken *nstoken = NULL;
+
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+
+ /* Open sockets in and outside VRF */
+ non_vrf_server = make_server(sotype, "0.0.0.0", NON_VRF_PORT, NULL);
+ if (!ASSERT_GE(non_vrf_server, 0, "make_server__outside_vrf_fd"))
+ goto done;
+
+ in_vrf_server = make_server(sotype, "0.0.0.0", IN_VRF_PORT, "veth02");
+ if (!ASSERT_GE(in_vrf_server, 0, "make_server__in_vrf_fd"))
+ goto done;
+
+ /* Perform test from NS1 */
+ close_netns(nstoken);
+ nstoken = open_netns(NS1);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS1))
+ goto done;
+
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, NON_VRF_PORT,
+ test_xdp, tcp_skc, 0), "in_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH02, IN_VRF_PORT,
+ test_xdp, tcp_skc, 1), "in_to_in"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, NON_VRF_PORT,
+ test_xdp, tcp_skc, 1), "out_to_out"))
+ goto done;
+ if (!ASSERT_OK(test_lookup(skel, sotype, IP4_ADDR_VETH01, IN_VRF_PORT,
+ test_xdp, tcp_skc, 0), "out_to_in"))
+ goto done;
+
+done:
+ if (non_vrf_server >= 0)
+ close(non_vrf_server);
+ if (in_vrf_server >= 0)
+ close(in_vrf_server);
+ if (nstoken)
+ close_netns(nstoken);
+}
+
+void test_vrf_socket_lookup(void)
+{
+ struct vrf_socket_lookup *skel;
+
+ cleanup();
+
+ skel = vrf_socket_lookup__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "vrf_socket_lookup__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(setup(skel), "setup"))
+ goto done;
+
+ if (test__start_subtest("tc_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("tc_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, false, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_tcp_skc"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+ if (test__start_subtest("xdp_socket_lookup_udp"))
+ _test_vrf_socket_lookup(skel, SOCK_STREAM, true, false);
+
+done:
+ vrf_socket_lookup__destroy(skel);
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index 48921ff74850..947863a1d536 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -8,15 +8,21 @@ void test_xdp(void)
struct vip key6 = {.protocol = 6, .family = AF_INET6};
struct iptnl_info value4 = {.family = AF_INET};
struct iptnl_info value6 = {.family = AF_INET6};
- const char *file = "./test_xdp.o";
+ const char *file = "./test_xdp.bpf.o";
struct bpf_object *obj;
char buf[128];
- struct ipv6hdr *iph6 = (void *)buf + sizeof(struct ethhdr);
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
- __u32 duration, retval, size;
+ struct ipv6hdr iph6;
+ struct iphdr iph;
int err, prog_fd, map_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
@@ -26,20 +32,23 @@ void test_xdp(void)
bpf_map_update_elem(map_fd, &key4, &value4, 0);
bpf_map_update_elem(map_fd, &key6, &value6, 0);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ memcpy(&iph, buf + sizeof(struct ethhdr), sizeof(iph));
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv4 test_run data_size_out");
+ ASSERT_EQ(iph.protocol, IPPROTO_IPIP, "ipv4 test_run iph.protocol");
- CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != 114 ||
- iph6->nexthdr != IPPROTO_IPV6, "ipv6",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ memcpy(&iph6, buf + sizeof(struct ethhdr), sizeof(iph6));
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 114, "ipv6 test_run data_size_out");
+ ASSERT_EQ(iph6.nexthdr, IPPROTO_IPV6, "ipv6 test_run iph6.nexthdr");
out:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
new file mode 100644
index 000000000000..fce203640f8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_frags.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+static void test_xdp_update_frags(void)
+{
+ const char *file = "./test_xdp_update_frags.bpf.o";
+ int err, prog_fd, max_skb_frags, buf_size, num;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ __u32 *offset;
+ __u8 *buf;
+ FILE *f;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(128);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 128b"))
+ goto out;
+
+ memset(buf, 0, 128);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa; /* marker at offset 16 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 31 (head) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 128;
+ topts.data_size_out = 128;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[16,31]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[16], 0xbb, "xdp_update_frag buf[16]");
+ ASSERT_EQ(buf[31], 0xbb, "xdp_update_frag buf[31]");
+
+ free(buf);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 5000;
+ buf[*offset] = 0xaa; /* marker at offset 5000 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 5015 (frag0) */
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[5000,5015]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[5000], 0xbb, "xdp_update_frag buf[5000]");
+ ASSERT_EQ(buf[5015], 0xbb, "xdp_update_frag buf[5015]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 3510;
+ buf[*offset] = 0xaa; /* marker at offset 3510 (head) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 3525 (frag0) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[3510,3525]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[3510], 0xbb, "xdp_update_frag buf[3510]");
+ ASSERT_EQ(buf[3525], 0xbb, "xdp_update_frag buf[3525]");
+
+ memset(buf, 0, 9000);
+ offset = (__u32 *)buf;
+ *offset = 7606;
+ buf[*offset] = 0xaa; /* marker at offset 7606 (frag0) */
+ buf[*offset + 15] = 0xaa; /* marker at offset 7621 (frag1) */
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ /* test_xdp_update_frags: buf[7606,7621]: 0xaa -> 0xbb */
+ ASSERT_OK(err, "xdp_update_frag");
+ ASSERT_EQ(topts.retval, XDP_PASS, "xdp_update_frag retval");
+ ASSERT_EQ(buf[7606], 0xbb, "xdp_update_frag buf[7606]");
+ ASSERT_EQ(buf[7621], 0xbb, "xdp_update_frag buf[7621]");
+
+ free(buf);
+
+ /* test_xdp_update_frags: unsupported buffer size */
+ f = fopen("/proc/sys/net/core/max_skb_frags", "r");
+ if (!ASSERT_OK_PTR(f, "max_skb_frag file pointer"))
+ goto out;
+
+ num = fscanf(f, "%d", &max_skb_frags);
+ fclose(f);
+
+ if (!ASSERT_EQ(num, 1, "max_skb_frags read failed"))
+ goto out;
+
+ /* xdp_buff linear area size is always set to 4096 in the
+ * bpf_prog_test_run_xdp routine.
+ */
+ buf_size = 4096 + (max_skb_frags + 1) * sysconf(_SC_PAGE_SIZE);
+ buf = malloc(buf_size);
+ if (!ASSERT_OK_PTR(buf, "alloc buf"))
+ goto out;
+
+ memset(buf, 0, buf_size);
+ offset = (__u32 *)buf;
+ *offset = 16;
+ buf[*offset] = 0xaa;
+ buf[*offset + 15] = 0xaa;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = buf_size;
+ topts.data_size_out = buf_size;
+
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_EQ(err, -ENOMEM,
+ "unsupported buf size, possible non-default /proc/sys/net/core/max_skb_flags?");
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+void test_xdp_adjust_frags(void)
+{
+ if (test__start_subtest("xdp_adjust_frags"))
+ test_xdp_update_frags();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index d5c98f2cb12f..f09505f8b038 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -2,81 +2,100 @@
#include <test_progs.h>
#include <network_helpers.h>
-void test_xdp_adjust_tail_shrink(void)
+static void test_xdp_adjust_tail_shrink(void)
{
- const char *file = "./test_xdp_adjust_tail_shrink.o";
- __u32 duration, retval, size, expect_sz;
+ const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
+ __u32 expect_sz;
struct bpf_object *obj;
int err, prog_fd;
char buf[128];
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_shrink"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) - 20; /* Test shrink with 20 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
+
bpf_object__close(obj);
}
-void test_xdp_adjust_tail_grow(void)
+static void test_xdp_adjust_tail_grow(void)
{
- const char *file = "./test_xdp_adjust_tail_grow.o";
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
struct bpf_object *obj;
char buf[4096]; /* avoid segfault: large buf to hold grow results */
- __u32 duration, retval, size, expect_sz;
+ __u32 expect_sz;
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = 1,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_DROP,
- "ipv4", "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_DROP, "ipv4 retval");
expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
- err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
- buf, &size, &retval, &duration);
- CHECK(err || retval != XDP_TX || size != expect_sz,
- "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, retval, size, expect_sz);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_size_out = sizeof(buf);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "ipv6");
+ ASSERT_EQ(topts.retval, XDP_TX, "ipv6 retval");
+ ASSERT_EQ(topts.data_size_out, expect_sz, "ipv6 size");
bpf_object__close(obj);
}
-void test_xdp_adjust_tail_grow2(void)
+static void test_xdp_adjust_tail_grow2(void)
{
- const char *file = "./test_xdp_adjust_tail_grow.o";
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
char buf[4096]; /* avoid segfault: large buf to hold grow results */
- int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
struct bpf_object *obj;
int err, cnt, i;
- int max_grow;
+ int max_grow, prog_fd;
+ /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__s390x__)
+ int tailroom = 512;
+#else
+ int tailroom = 320;
+#endif
- struct bpf_prog_test_run_attr tattr = {
+ LIBBPF_OPTS(bpf_test_run_opts, tattr,
.repeat = 1,
.data_in = &buf,
.data_out = &buf,
.data_size_in = 0, /* Per test */
.data_size_out = 0, /* Per test */
- };
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
- if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ if (!ASSERT_OK(err, "test_xdp_adjust_tail_grow"))
return;
/* Test case-64 */
@@ -84,49 +103,171 @@ void test_xdp_adjust_tail_grow2(void)
tattr.data_size_in = 64; /* Determine test case via pkt size */
tattr.data_size_out = 128; /* Limit copy_size */
/* Kernel side alloc packet memory area that is zero init */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
- CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
- || tattr.retval != XDP_TX
- || tattr.data_size_out != 192, /* Expected grow size */
- "case-64",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(errno, ENOSPC, "case-64 errno"); /* Due limit copy_size in bpf_test_finish */
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-64 retval");
+ ASSERT_EQ(tattr.data_size_out, 192, "case-64 data_size_out"); /* Expected grow size */
/* Extra checks for data contents */
- CHECK_ATTR(tattr.data_size_out != 192
- || buf[0] != 1 || buf[63] != 1 /* 0-63 memset to 1 */
- || buf[64] != 0 || buf[127] != 0 /* 64-127 memset to 0 */
- || buf[128] != 1 || buf[191] != 1, /*128-191 memset to 1 */
- "case-64-data",
- "err %d errno %d retval %d size %d\n",
- err, errno, tattr.retval, tattr.data_size_out);
+ ASSERT_EQ(buf[0], 1, "case-64-data buf[0]"); /* 0-63 memset to 1 */
+ ASSERT_EQ(buf[63], 1, "case-64-data buf[63]");
+ ASSERT_EQ(buf[64], 0, "case-64-data buf[64]"); /* 64-127 memset to 0 */
+ ASSERT_EQ(buf[127], 0, "case-64-data buf[127]");
+ ASSERT_EQ(buf[128], 1, "case-64-data buf[128]"); /* 128-191 memset to 1 */
+ ASSERT_EQ(buf[191], 1, "case-64-data buf[191]");
/* Test case-128 */
memset(buf, 2, sizeof(buf));
tattr.data_size_in = 128; /* Determine test case via pkt size */
tattr.data_size_out = sizeof(buf); /* Copy everything */
- err = bpf_prog_test_run_xattr(&tattr);
+ err = bpf_prog_test_run_opts(prog_fd, &tattr);
max_grow = 4096 - XDP_PACKET_HEADROOM - tailroom; /* 3520 */
- CHECK_ATTR(err
- || tattr.retval != XDP_TX
- || tattr.data_size_out != max_grow,/* Expect max grow size */
- "case-128",
- "err %d errno %d retval %d size %d expect-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, max_grow);
+ ASSERT_OK(err, "case-128");
+ ASSERT_EQ(tattr.retval, XDP_TX, "case-128 retval");
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128 data_size_out"); /* Expect max grow */
/* Extra checks for data content: Count grow size, will contain zeros */
for (i = 0, cnt = 0; i < sizeof(buf); i++) {
if (buf[i] == 0)
cnt++;
}
- CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
- || tattr.data_size_out != max_grow, /* Total grow size */
- "case-128-data",
- "err %d errno %d retval %d size %d grow-size %d\n",
- err, errno, tattr.retval, tattr.data_size_out, cnt);
+ ASSERT_EQ(cnt, max_grow - tattr.data_size_in, "case-128-data cnt"); /* Grow increase */
+ ASSERT_EQ(tattr.data_size_out, max_grow, "case-128-data data_size_out"); /* Total grow */
+
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_shrink(void)
+{
+ const char *file = "./test_xdp_adjust_tail_shrink.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ /* For the individual test cases, the first byte in the packet
+ * indicates which test will be run.
+ */
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(9000);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 9Kb"))
+ goto out;
+
+ memset(buf, 0, 9000);
+
+ /* Test case removing 10 bytes from last frag, NOT freeing it */
+ exp_size = 8990; /* 9000 - 10 */
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 9000;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-10b size");
+
+ /* Test case removing one of two pages, assuming 4K pages */
+ buf[0] = 1;
+ exp_size = 4900; /* 9000 - 4100 */
+
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-4Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-4Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-4Kb size");
+
+ /* Test case removing two pages resulting in a linear xdp_buff */
+ buf[0] = 2;
+ exp_size = 800; /* 9000 - 8200 */
+ topts.data_size_out = 9000; /* reset from previous invocation */
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb-9Kb");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb-9Kb retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb-9Kb size");
+
+ free(buf);
+out:
+ bpf_object__close(obj);
+}
+
+static void test_xdp_adjust_frags_tail_grow(void)
+{
+ const char *file = "./test_xdp_adjust_tail_grow.bpf.o";
+ __u32 exp_size;
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int err, i, prog_fd;
+ __u8 *buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ obj = bpf_object__open(file);
+ if (libbpf_get_error(obj))
+ return;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (bpf_object__load(obj))
+ return;
+
+ prog_fd = bpf_program__fd(prog);
+
+ buf = malloc(16384);
+ if (!ASSERT_OK_PTR(buf, "alloc buf 16Kb"))
+ goto out;
+
+ /* Test case add 10 bytes to last frag */
+ memset(buf, 1, 16384);
+ exp_size = 9000 + 10;
+
+ topts.data_in = buf;
+ topts.data_out = buf;
+ topts.data_size_in = 9000;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_TX, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+
+ for (i = 0; i < 9000; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-old");
+
+ for (i = 9000; i < 9010; i++)
+ ASSERT_EQ(buf[i], 0, "9Kb+10b-new");
+
+ for (i = 9010; i < 16384; i++)
+ ASSERT_EQ(buf[i], 1, "9Kb+10b-untouched");
+
+ /* Test a too large grow */
+ memset(buf, 1, 16384);
+ exp_size = 9001;
+
+ topts.data_in = topts.data_out = buf;
+ topts.data_size_in = 9001;
+ topts.data_size_out = 16384;
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+
+ ASSERT_OK(err, "9Kb+10b");
+ ASSERT_EQ(topts.retval, XDP_DROP, "9Kb+10b retval");
+ ASSERT_EQ(topts.data_size_out, exp_size, "9Kb+10b size");
+ free(buf);
+out:
bpf_object__close(obj);
}
@@ -138,4 +279,8 @@ void test_xdp_adjust_tail(void)
test_xdp_adjust_tail_grow();
if (test__start_subtest("xdp_adjust_tail_grow2"))
test_xdp_adjust_tail_grow2();
+ if (test__start_subtest("xdp_adjust_frags_tail_shrink"))
+ test_xdp_adjust_frags_tail_shrink();
+ if (test__start_subtest("xdp_adjust_frags_tail_grow"))
+ test_xdp_adjust_frags_tail_grow();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
index 15ef3531483e..e6bcb6051402 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_attach.c
@@ -1,86 +1,83 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
+#include "test_xdp_attach_fail.skel.h"
#define IFINDEX_LO 1
#define XDP_FLAGS_REPLACE (1U << 4)
-void test_xdp_attach(void)
+static void test_xdp_attach(const char *file)
{
__u32 duration = 0, id1, id2, id0 = 0, len;
struct bpf_object *obj1, *obj2, *obj3;
- const char *file = "./test_xdp.o";
struct bpf_prog_info info = {};
int err, fd1, fd2, fd3;
- DECLARE_LIBBPF_OPTS(bpf_xdp_set_link_opts, opts,
- .old_fd = -1);
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
len = sizeof(info);
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj1, &fd1);
if (CHECK_FAIL(err))
return;
- err = bpf_obj_get_info_by_fd(fd1, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd1, &info, &len);
if (CHECK_FAIL(err))
goto out_1;
id1 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj2, &fd2);
if (CHECK_FAIL(err))
goto out_1;
memset(&info, 0, sizeof(info));
- err = bpf_obj_get_info_by_fd(fd2, &info, &len);
+ err = bpf_prog_get_info_by_fd(fd2, &info, &len);
if (CHECK_FAIL(err))
goto out_2;
id2 = info.id;
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj3, &fd3);
if (CHECK_FAIL(err))
goto out_2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd1, XDP_FLAGS_REPLACE, &opts);
if (CHECK(err, "load_ok", "initial load failed"))
goto out_close;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id1, "id1_check",
"loaded prog id %u != id1 %u, err %d", id0, id1, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE,
- &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, XDP_FLAGS_REPLACE, &opts);
if (CHECK(!err, "load_fail", "load with expected id didn't fail"))
goto out;
- opts.old_fd = fd1;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd2, 0, &opts);
+ opts.old_prog_fd = fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, fd2, 0, &opts);
if (CHECK(err, "replace_ok", "replace valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != id2, "id2_check",
"loaded prog id %u != id2 %u, err %d", id0, id2, err))
goto out_close;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, fd3, 0, &opts);
+ err = bpf_xdp_attach(IFINDEX_LO, fd3, 0, &opts);
if (CHECK(!err, "replace_fail", "replace invalid old_fd didn't fail"))
goto out;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(!err, "remove_fail", "remove invalid old_fd didn't fail"))
goto out;
- opts.old_fd = fd2;
- err = bpf_set_link_xdp_fd_opts(IFINDEX_LO, -1, 0, &opts);
+ opts.old_prog_fd = fd2;
+ err = bpf_xdp_detach(IFINDEX_LO, 0, &opts);
if (CHECK(err, "remove_ok", "remove valid old_fd failed"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &id0, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
if (CHECK(err || id0 != 0, "unload_check",
"loaded prog id %u != 0, err %d", id0, err))
goto out_close;
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj3);
out_2:
@@ -88,3 +85,75 @@ out_2:
out_1:
bpf_object__close(obj1);
}
+
+#define ERRMSG_LEN 64
+
+struct xdp_errmsg {
+ char msg[ERRMSG_LEN];
+};
+
+static void on_xdp_errmsg(void *ctx, int cpu, void *data, __u32 size)
+{
+ struct xdp_errmsg *ctx_errmg = ctx, *tp_errmsg = data;
+
+ memcpy(&ctx_errmg->msg, &tp_errmsg->msg, ERRMSG_LEN);
+}
+
+static const char tgt_errmsg[] = "Invalid XDP flags for BPF link attachment";
+
+static void test_xdp_attach_fail(const char *file)
+{
+ struct test_xdp_attach_fail *skel = NULL;
+ struct xdp_errmsg errmsg = {};
+ struct perf_buffer *pb = NULL;
+ struct bpf_object *obj = NULL;
+ int err, fd_xdp;
+
+ LIBBPF_OPTS(bpf_link_create_opts, opts);
+
+ skel = test_xdp_attach_fail__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_attach_fail__open_and_load"))
+ goto out_close;
+
+ err = test_xdp_attach_fail__attach(skel);
+ if (!ASSERT_EQ(err, 0, "test_xdp_attach_fail__attach"))
+ goto out_close;
+
+ /* set up perf buffer */
+ pb = perf_buffer__new(bpf_map__fd(skel->maps.xdp_errmsg_pb), 1,
+ on_xdp_errmsg, NULL, &errmsg, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buffer__new"))
+ goto out_close;
+
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &fd_xdp);
+ if (!ASSERT_EQ(err, 0, "bpf_prog_test_load"))
+ goto out_close;
+
+ opts.flags = 0xFF; // invalid flags to fail to attach XDP prog
+ err = bpf_link_create(fd_xdp, IFINDEX_LO, BPF_XDP, &opts);
+ if (!ASSERT_EQ(err, -EINVAL, "bpf_link_create"))
+ goto out_close;
+
+ /* read perf buffer */
+ err = perf_buffer__poll(pb, 100);
+ if (!ASSERT_GT(err, -1, "perf_buffer__poll"))
+ goto out_close;
+
+ ASSERT_STRNEQ((const char *) errmsg.msg, tgt_errmsg,
+ 42 /* strlen(tgt_errmsg) */, "check error message");
+
+out_close:
+ perf_buffer__free(pb);
+ bpf_object__close(obj);
+ test_xdp_attach_fail__destroy(skel);
+}
+
+void serial_test_xdp_attach(void)
+{
+ if (test__start_subtest("xdp_attach"))
+ test_xdp_attach("./test_xdp.bpf.o");
+ if (test__start_subtest("xdp_attach_dynptr"))
+ test_xdp_attach("./test_xdp_dynptr.bpf.o");
+ if (test__start_subtest("xdp_attach_failed"))
+ test_xdp_attach_fail("./xdp_dummy.bpf.o");
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
new file mode 100644
index 000000000000..6d8b54124cb3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bonding.c
@@ -0,0 +1,691 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/**
+ * Test XDP bonding support
+ *
+ * Sets up two bonded veth pairs between two fresh namespaces
+ * and verifies that XDP_TX program loaded on a bond device
+ * are correctly loaded onto the slave devices and XDP_TX'd
+ * packets are balanced using bonding.
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include "test_progs.h"
+#include "network_helpers.h"
+#include <linux/if_bonding.h>
+#include <linux/limits.h>
+#include <linux/udp.h>
+#include <uapi/linux/netdev.h>
+
+#include "xdp_dummy.skel.h"
+#include "xdp_redirect_multi_kern.skel.h"
+#include "xdp_tx.skel.h"
+
+#define BOND1_MAC {0x00, 0x11, 0x22, 0x33, 0x44, 0x55}
+#define BOND1_MAC_STR "00:11:22:33:44:55"
+#define BOND2_MAC {0x00, 0x22, 0x33, 0x44, 0x55, 0x66}
+#define BOND2_MAC_STR "00:22:33:44:55:66"
+#define NPACKETS 100
+
+static int root_netns_fd = -1;
+
+static void restore_root_netns(void)
+{
+ ASSERT_OK(setns(root_netns_fd, CLONE_NEWNET), "restore_root_netns");
+}
+
+static int setns_by_name(char *name)
+{
+ int nsfd, err;
+ char nspath[PATH_MAX];
+
+ snprintf(nspath, sizeof(nspath), "%s/%s", "/var/run/netns", name);
+ nsfd = open(nspath, O_RDONLY | O_CLOEXEC);
+ if (nsfd < 0)
+ return -1;
+
+ err = setns(nsfd, CLONE_NEWNET);
+ close(nsfd);
+ return err;
+}
+
+static int get_rx_packets(const char *iface)
+{
+ FILE *f;
+ char line[512];
+ int iface_len = strlen(iface);
+
+ f = fopen("/proc/net/dev", "r");
+ if (!f)
+ return -1;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *p = line;
+
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ if (!strncmp(p, iface, iface_len)) {
+ p += iface_len;
+ if (*p++ != ':')
+ continue;
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ while (*p && *p != ' ')
+ p++; /* skip rx bytes */
+ while (*p == ' ')
+ p++; /* skip whitespace */
+ fclose(f);
+ return atoi(p);
+ }
+ }
+ fclose(f);
+ return -1;
+}
+
+#define MAX_BPF_LINKS 8
+
+struct skeletons {
+ struct xdp_dummy *xdp_dummy;
+ struct xdp_tx *xdp_tx;
+ struct xdp_redirect_multi_kern *xdp_redirect_multi_kern;
+
+ int nlinks;
+ struct bpf_link *links[MAX_BPF_LINKS];
+};
+
+static int xdp_attach(struct skeletons *skeletons, struct bpf_program *prog, char *iface)
+{
+ struct bpf_link *link;
+ int ifindex;
+
+ ifindex = if_nametoindex(iface);
+ if (!ASSERT_GT(ifindex, 0, "get ifindex"))
+ return -1;
+
+ if (!ASSERT_LE(skeletons->nlinks+1, MAX_BPF_LINKS, "too many XDP programs attached"))
+ return -1;
+
+ link = bpf_program__attach_xdp(prog, ifindex);
+ if (!ASSERT_OK_PTR(link, "attach xdp program"))
+ return -1;
+
+ skeletons->links[skeletons->nlinks++] = link;
+ return 0;
+}
+
+enum {
+ BOND_ONE_NO_ATTACH = 0,
+ BOND_BOTH_AND_ATTACH,
+};
+
+static const char * const mode_names[] = {
+ [BOND_MODE_ROUNDROBIN] = "balance-rr",
+ [BOND_MODE_ACTIVEBACKUP] = "active-backup",
+ [BOND_MODE_XOR] = "balance-xor",
+ [BOND_MODE_BROADCAST] = "broadcast",
+ [BOND_MODE_8023AD] = "802.3ad",
+ [BOND_MODE_TLB] = "balance-tlb",
+ [BOND_MODE_ALB] = "balance-alb",
+};
+
+static const char * const xmit_policy_names[] = {
+ [BOND_XMIT_POLICY_LAYER2] = "layer2",
+ [BOND_XMIT_POLICY_LAYER34] = "layer3+4",
+ [BOND_XMIT_POLICY_LAYER23] = "layer2+3",
+ [BOND_XMIT_POLICY_ENCAP23] = "encap2+3",
+ [BOND_XMIT_POLICY_ENCAP34] = "encap3+4",
+};
+
+static int bonding_setup(struct skeletons *skeletons, int mode, int xmit_policy,
+ int bond_both_attach)
+{
+ SYS(fail, "ip netns add ns_dst");
+ SYS(fail, "ip link add veth1_1 type veth peer name veth2_1 netns ns_dst");
+ SYS(fail, "ip link add veth1_2 type veth peer name veth2_2 netns ns_dst");
+
+ SYS(fail, "ip link add bond1 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS(fail, "ip link set bond1 up address " BOND1_MAC_STR " addrgenmode none");
+ SYS(fail, "ip -netns ns_dst link add bond2 type bond mode %s xmit_hash_policy %s",
+ mode_names[mode], xmit_policy_names[xmit_policy]);
+ SYS(fail, "ip -netns ns_dst link set bond2 up address " BOND2_MAC_STR " addrgenmode none");
+
+ SYS(fail, "ip link set veth1_1 master bond1");
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ SYS(fail, "ip link set veth1_2 master bond1");
+ } else {
+ SYS(fail, "ip link set veth1_2 up addrgenmode none");
+
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "veth1_2"))
+ return -1;
+ }
+
+ SYS(fail, "ip -netns ns_dst link set veth2_1 master bond2");
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH)
+ SYS(fail, "ip -netns ns_dst link set veth2_2 master bond2");
+ else
+ SYS(fail, "ip -netns ns_dst link set veth2_2 up addrgenmode none");
+
+ /* Load a dummy program on sending side as with veth peer needs to have a
+ * XDP program loaded as well.
+ */
+ if (xdp_attach(skeletons, skeletons->xdp_dummy->progs.xdp_dummy_prog, "bond1"))
+ return -1;
+
+ if (bond_both_attach == BOND_BOTH_AND_ATTACH) {
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "set netns to ns_dst"))
+ return -1;
+
+ if (xdp_attach(skeletons, skeletons->xdp_tx->progs.xdp_tx, "bond2"))
+ return -1;
+
+ restore_root_netns();
+ }
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void bonding_cleanup(struct skeletons *skeletons)
+{
+ restore_root_netns();
+ while (skeletons->nlinks) {
+ skeletons->nlinks--;
+ bpf_link__destroy(skeletons->links[skeletons->nlinks]);
+ }
+ ASSERT_OK(system("ip link delete bond1"), "delete bond1");
+ ASSERT_OK(system("ip link delete veth1_1"), "delete veth1_1");
+ ASSERT_OK(system("ip link delete veth1_2"), "delete veth1_2");
+ ASSERT_OK(system("ip netns delete ns_dst"), "delete ns_dst");
+}
+
+static int send_udp_packets(int vary_dst_ip)
+{
+ struct ethhdr eh = {
+ .h_source = BOND1_MAC,
+ .h_dest = BOND2_MAC,
+ .h_proto = htons(ETH_P_IP),
+ };
+ struct iphdr iph = {};
+ struct udphdr uh = {};
+ uint8_t buf[128];
+ int i, s = -1;
+ int ifindex;
+
+ s = socket(AF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (!ASSERT_GE(s, 0, "socket"))
+ goto err;
+
+ ifindex = if_nametoindex("bond1");
+ if (!ASSERT_GT(ifindex, 0, "get bond1 ifindex"))
+ goto err;
+
+ iph.ihl = 5;
+ iph.version = 4;
+ iph.tos = 16;
+ iph.id = 1;
+ iph.ttl = 64;
+ iph.protocol = IPPROTO_UDP;
+ iph.saddr = 1;
+ iph.daddr = 2;
+ iph.tot_len = htons(sizeof(buf) - ETH_HLEN);
+ iph.check = 0;
+
+ for (i = 1; i <= NPACKETS; i++) {
+ int n;
+ struct sockaddr_ll saddr_ll = {
+ .sll_ifindex = ifindex,
+ .sll_halen = ETH_ALEN,
+ .sll_addr = BOND2_MAC,
+ };
+
+ /* vary the UDP destination port for even distribution with roundrobin/xor modes */
+ uh.dest++;
+
+ if (vary_dst_ip)
+ iph.daddr++;
+
+ /* construct a packet */
+ memcpy(buf, &eh, sizeof(eh));
+ memcpy(buf + sizeof(eh), &iph, sizeof(iph));
+ memcpy(buf + sizeof(eh) + sizeof(iph), &uh, sizeof(uh));
+
+ n = sendto(s, buf, sizeof(buf), 0, (struct sockaddr *)&saddr_ll, sizeof(saddr_ll));
+ if (!ASSERT_EQ(n, sizeof(buf), "sendto"))
+ goto err;
+ }
+
+ return 0;
+
+err:
+ if (s >= 0)
+ close(s);
+ return -1;
+}
+
+static void test_xdp_bonding_with_mode(struct skeletons *skeletons, int mode, int xmit_policy)
+{
+ int bond1_rx;
+
+ if (bonding_setup(skeletons, mode, xmit_policy, BOND_BOTH_AND_ATTACH))
+ goto out;
+
+ if (send_udp_packets(xmit_policy != BOND_XMIT_POLICY_LAYER34))
+ goto out;
+
+ bond1_rx = get_rx_packets("bond1");
+ ASSERT_EQ(bond1_rx, NPACKETS, "expected more received packets");
+
+ switch (mode) {
+ case BOND_MODE_ROUNDROBIN:
+ case BOND_MODE_XOR: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(veth1_rx + veth2_rx, NPACKETS, "expected more packets");
+
+ switch (xmit_policy) {
+ case BOND_XMIT_POLICY_LAYER2:
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ case BOND_XMIT_POLICY_LAYER23:
+ case BOND_XMIT_POLICY_LAYER34:
+ ASSERT_LT(diff, NPACKETS/2,
+ "expected even distribution of packets");
+ break;
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+ break;
+ }
+ case BOND_MODE_ACTIVEBACKUP: {
+ int veth1_rx = get_rx_packets("veth1_1");
+ int veth2_rx = get_rx_packets("veth1_2");
+ int diff = abs(veth1_rx - veth2_rx);
+
+ ASSERT_GE(diff, NPACKETS,
+ "expected packets on only one of the interfaces");
+ break;
+ }
+ default:
+ PRINT_FAIL("Unimplemented xmit_policy=%d\n", xmit_policy);
+ break;
+ }
+
+out:
+ bonding_cleanup(skeletons);
+}
+
+/* Test the broadcast redirection using xdp_redirect_map_multi_prog and adding
+ * all the interfaces to it and checking that broadcasting won't send the packet
+ * to neither the ingress bond device (bond2) or its slave (veth2_1).
+ */
+static void test_xdp_bonding_redirect_multi(struct skeletons *skeletons)
+{
+ static const char * const ifaces[] = {"bond2", "veth2_1", "veth2_2"};
+ int veth1_1_rx, veth1_2_rx;
+ int err;
+
+ if (bonding_setup(skeletons, BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23,
+ BOND_ONE_NO_ATTACH))
+ goto out;
+
+
+ if (!ASSERT_OK(setns_by_name("ns_dst"), "could not set netns to ns_dst"))
+ goto out;
+
+ /* populate the devmap with the relevant interfaces */
+ for (int i = 0; i < ARRAY_SIZE(ifaces); i++) {
+ int ifindex = if_nametoindex(ifaces[i]);
+ int map_fd = bpf_map__fd(skeletons->xdp_redirect_multi_kern->maps.map_all);
+
+ if (!ASSERT_GT(ifindex, 0, "could not get interface index"))
+ goto out;
+
+ err = bpf_map_update_elem(map_fd, &ifindex, &ifindex, 0);
+ if (!ASSERT_OK(err, "add interface to map_all"))
+ goto out;
+ }
+
+ if (xdp_attach(skeletons,
+ skeletons->xdp_redirect_multi_kern->progs.xdp_redirect_map_multi_prog,
+ "bond2"))
+ goto out;
+
+ restore_root_netns();
+
+ if (send_udp_packets(BOND_MODE_ROUNDROBIN))
+ goto out;
+
+ veth1_1_rx = get_rx_packets("veth1_1");
+ veth1_2_rx = get_rx_packets("veth1_2");
+
+ ASSERT_EQ(veth1_1_rx, 0, "expected no packets on veth1_1");
+ ASSERT_GE(veth1_2_rx, NPACKETS, "expected packets on veth1_2");
+
+out:
+ restore_root_netns();
+ bonding_cleanup(skeletons);
+}
+
+/* Test that XDP programs cannot be attached to both the bond master and slaves simultaneously */
+static void test_xdp_bonding_attach(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ struct bpf_link *link2 = NULL;
+ int veth, bond, err;
+
+ if (!ASSERT_OK(system("ip link add veth type veth"), "add veth"))
+ goto out;
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ veth = if_nametoindex("veth");
+ if (!ASSERT_GE(veth, 0, "if_nametoindex veth"))
+ goto out;
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* enslaving with a XDP program loaded is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to veth"))
+ goto out;
+
+ err = system("ip link set veth master bond");
+ if (!ASSERT_OK(err, "set veth master"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching to slave when master has no program is allowed */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_OK_PTR(link, "attach program to slave when enslaved"))
+ goto out;
+
+ /* attaching to master not allowed when slave has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link2, "attach program to master when slave has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* attaching XDP program to master allowed when slave has no program */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_OK_PTR(link, "attach program to master"))
+ goto out;
+
+ /* attaching to slave not allowed when master has program loaded */
+ link2 = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, veth);
+ if (!ASSERT_ERR_PTR(link2, "attach program to slave when master has program"))
+ goto out;
+
+ bpf_link__destroy(link);
+ link = NULL;
+
+ /* test program unwinding with a non-XDP slave */
+ if (!ASSERT_OK(system("ip link add vxlan type vxlan id 1 remote 1.2.3.4 dstport 0 dev lo"),
+ "add vxlan"))
+ goto out;
+
+ err = system("ip link set vxlan master bond");
+ if (!ASSERT_OK(err, "set vxlan master"))
+ goto out;
+
+ /* attaching not allowed when one slave does not support XDP */
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ if (!ASSERT_ERR_PTR(link, "attach program to master when slave does not support XDP"))
+ goto out;
+
+out:
+ bpf_link__destroy(link);
+ bpf_link__destroy(link2);
+
+ system("ip link del veth");
+ system("ip link del bond");
+ system("ip link del vxlan");
+}
+
+/* Test with nested bonding devices to catch issue with negative jump label count */
+static void test_xdp_bonding_nested(struct skeletons *skeletons)
+{
+ struct bpf_link *link = NULL;
+ int bond, err;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond = if_nametoindex("bond");
+ if (!ASSERT_GE(bond, 0, "if_nametoindex bond"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest1 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest1 master bond");
+ if (!ASSERT_OK(err, "set bond_nest1 master"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add bond_nest2 type bond"), "add bond_nest1"))
+ goto out;
+
+ err = system("ip link set bond_nest2 master bond_nest1");
+ if (!ASSERT_OK(err, "set bond_nest2 master"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog, bond);
+ ASSERT_OK_PTR(link, "attach program to master");
+
+out:
+ bpf_link__destroy(link);
+ system("ip link del bond");
+ system("ip link del bond_nest1");
+ system("ip link del bond_nest2");
+}
+
+static void test_xdp_bonding_features(struct skeletons *skeletons)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int bond_idx, veth1_idx, err;
+ struct bpf_link *link = NULL;
+
+ if (!ASSERT_OK(system("ip link add bond type bond"), "add bond"))
+ goto out;
+
+ bond_idx = if_nametoindex("bond");
+ if (!ASSERT_GE(bond_idx, 0, "if_nametoindex bond"))
+ goto out;
+
+ /* query default xdp-feature for bond device */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags, 0,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth0 type veth peer name veth1"),
+ "add veth{0,1} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link add veth2 type veth peer name veth3"),
+ "add veth{2,3} pair"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 master bond"),
+ "add veth0 to master bond"))
+ goto out;
+
+ /* xdp-feature for bond device should be obtained from the single slave
+ * device (veth0)
+ */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ veth1_idx = if_nametoindex("veth1");
+ if (!ASSERT_GE(veth1_idx, 0, "if_nametoindex veth1"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skeletons->xdp_dummy->progs.xdp_dummy_prog,
+ veth1_idx);
+ if (!ASSERT_OK_PTR(link, "attach program to veth1"))
+ goto out;
+
+ /* xdp-feature for veth0 are changed */
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 master bond"),
+ "add veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ /* xdp-feature for bond device should be set to the most restrict
+ * value obtained from attached slave devices (veth0 and veth2)
+ */
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth2 nomaster"),
+ "del veth2 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG | NETDEV_XDP_ACT_NDO_XMIT |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "bond query_opts.feature_flags"))
+ goto out;
+
+ if (!ASSERT_OK(system("ip link set veth0 nomaster"),
+ "del veth0 to master bond"))
+ goto out;
+
+ err = bpf_xdp_query(bond_idx, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "bond bpf_xdp_query"))
+ goto out;
+
+ ASSERT_EQ(query_opts.feature_flags, 0,
+ "bond query_opts.feature_flags");
+out:
+ bpf_link__destroy(link);
+ system("ip link del veth0");
+ system("ip link del veth2");
+ system("ip link del bond");
+}
+
+static int libbpf_debug_print(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level != LIBBPF_WARN)
+ vprintf(format, args);
+ return 0;
+}
+
+struct bond_test_case {
+ char *name;
+ int mode;
+ int xmit_policy;
+};
+
+static struct bond_test_case bond_test_cases[] = {
+ { "xdp_bonding_roundrobin", BOND_MODE_ROUNDROBIN, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_activebackup", BOND_MODE_ACTIVEBACKUP, BOND_XMIT_POLICY_LAYER23 },
+
+ { "xdp_bonding_xor_layer2", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER2, },
+ { "xdp_bonding_xor_layer23", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER23, },
+ { "xdp_bonding_xor_layer34", BOND_MODE_XOR, BOND_XMIT_POLICY_LAYER34, },
+};
+
+void serial_test_xdp_bonding(void)
+{
+ libbpf_print_fn_t old_print_fn;
+ struct skeletons skeletons = {};
+ int i;
+
+ old_print_fn = libbpf_set_print(libbpf_debug_print);
+
+ root_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (!ASSERT_GE(root_netns_fd, 0, "open /proc/self/ns/net"))
+ goto out;
+
+ skeletons.xdp_dummy = xdp_dummy__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_dummy, "xdp_dummy__open_and_load"))
+ goto out;
+
+ skeletons.xdp_tx = xdp_tx__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_tx, "xdp_tx__open_and_load"))
+ goto out;
+
+ skeletons.xdp_redirect_multi_kern = xdp_redirect_multi_kern__open_and_load();
+ if (!ASSERT_OK_PTR(skeletons.xdp_redirect_multi_kern,
+ "xdp_redirect_multi_kern__open_and_load"))
+ goto out;
+
+ if (test__start_subtest("xdp_bonding_attach"))
+ test_xdp_bonding_attach(&skeletons);
+
+ if (test__start_subtest("xdp_bonding_nested"))
+ test_xdp_bonding_nested(&skeletons);
+
+ if (test__start_subtest("xdp_bonding_features"))
+ test_xdp_bonding_features(&skeletons);
+
+ for (i = 0; i < ARRAY_SIZE(bond_test_cases); i++) {
+ struct bond_test_case *test_case = &bond_test_cases[i];
+
+ if (test__start_subtest(test_case->name))
+ test_xdp_bonding_with_mode(
+ &skeletons,
+ test_case->mode,
+ test_case->xmit_policy);
+ }
+
+ if (test__start_subtest("xdp_bonding_redirect_multi"))
+ test_xdp_bonding_redirect_multi(&skeletons);
+
+out:
+ xdp_dummy__destroy(skeletons.xdp_dummy);
+ xdp_tx__destroy(skeletons.xdp_tx);
+ xdp_redirect_multi_kern__destroy(skeletons.xdp_redirect_multi_kern);
+
+ libbpf_set_print(old_print_fn);
+ if (root_netns_fd >= 0)
+ close(root_netns_fd);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index 2c6c570b21f8..76967d8ace9c 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -10,50 +10,110 @@ struct meta {
int pkt_len;
};
+struct test_ctx_s {
+ bool passed;
+ int pkt_size;
+};
+
+struct test_ctx_s test_ctx;
+
static void on_sample(void *ctx, int cpu, void *data, __u32 size)
{
- int duration = 0;
struct meta *meta = (struct meta *)data;
struct ipv4_packet *trace_pkt_v4 = data + sizeof(*meta);
+ unsigned char *raw_pkt = data + sizeof(*meta);
+ struct test_ctx_s *tst_ctx = ctx;
+
+ ASSERT_GE(size, sizeof(pkt_v4) + sizeof(*meta), "check_size");
+ ASSERT_EQ(meta->ifindex, if_nametoindex("lo"), "check_meta_ifindex");
+ ASSERT_EQ(meta->pkt_len, tst_ctx->pkt_size, "check_meta_pkt_len");
+ ASSERT_EQ(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)), 0,
+ "check_packet_content");
+
+ if (meta->pkt_len > sizeof(pkt_v4)) {
+ for (int i = 0; i < meta->pkt_len - sizeof(pkt_v4); i++)
+ ASSERT_EQ(raw_pkt[i + sizeof(pkt_v4)], (unsigned char)i,
+ "check_packet_content");
+ }
+
+ tst_ctx->passed = true;
+}
- if (CHECK(size < sizeof(pkt_v4) + sizeof(*meta),
- "check_size", "size %u < %zu\n",
- size, sizeof(pkt_v4) + sizeof(*meta)))
- return;
+#define BUF_SZ 9000
- if (CHECK(meta->ifindex != if_nametoindex("lo"), "check_meta_ifindex",
- "meta->ifindex = %d\n", meta->ifindex))
+static void run_xdp_bpf2bpf_pkt_size(int pkt_fd, struct perf_buffer *pb,
+ struct test_xdp_bpf2bpf *ftrace_skel,
+ int pkt_size)
+{
+ __u8 *buf, *buf_in;
+ int err;
+ LIBBPF_OPTS(bpf_test_run_opts, topts);
+
+ if (!ASSERT_LE(pkt_size, BUF_SZ, "pkt_size") ||
+ !ASSERT_GE(pkt_size, sizeof(pkt_v4), "pkt_size"))
return;
- if (CHECK(meta->pkt_len != sizeof(pkt_v4), "check_meta_pkt_len",
- "meta->pkt_len = %zd\n", sizeof(pkt_v4)))
+ buf_in = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf_in, "buf_in malloc()"))
return;
- if (CHECK(memcmp(trace_pkt_v4, &pkt_v4, sizeof(pkt_v4)),
- "check_packet_content", "content not the same\n"))
+ buf = malloc(BUF_SZ);
+ if (!ASSERT_OK_PTR(buf, "buf malloc()")) {
+ free(buf_in);
return;
+ }
+
+ test_ctx.passed = false;
+ test_ctx.pkt_size = pkt_size;
+
+ memcpy(buf_in, &pkt_v4, sizeof(pkt_v4));
+ if (pkt_size > sizeof(pkt_v4)) {
+ for (int i = 0; i < (pkt_size - sizeof(pkt_v4)); i++)
+ buf_in[i + sizeof(pkt_v4)] = i;
+ }
+
+ /* Run test program */
+ topts.data_in = buf_in;
+ topts.data_size_in = pkt_size;
+ topts.data_out = buf;
+ topts.data_size_out = BUF_SZ;
+
+ err = bpf_prog_test_run_opts(pkt_fd, &topts);
+
+ ASSERT_OK(err, "ipv4");
+ ASSERT_EQ(topts.retval, XDP_PASS, "ipv4 retval");
+ ASSERT_EQ(topts.data_size_out, pkt_size, "ipv4 size");
+
+ /* Make sure bpf_xdp_output() was triggered and it sent the expected
+ * data to the perf ring buffer.
+ */
+ err = perf_buffer__poll(pb, 100);
- *(bool *)ctx = true;
+ ASSERT_GE(err, 0, "perf_buffer__poll");
+ ASSERT_TRUE(test_ctx.passed, "test passed");
+ /* Verify test results */
+ ASSERT_EQ(ftrace_skel->bss->test_result_fentry, if_nametoindex("lo"),
+ "fentry result");
+ ASSERT_EQ(ftrace_skel->bss->test_result_fexit, XDP_PASS, "fexit result");
+
+ free(buf);
+ free(buf_in);
}
void test_xdp_bpf2bpf(void)
{
- __u32 duration = 0, retval, size;
- char buf[128];
int err, pkt_fd, map_fd;
- bool passed = false;
- struct iphdr *iph = (void *)buf + sizeof(struct ethhdr);
- struct iptnl_info value4 = {.family = AF_INET};
+ int pkt_sizes[] = {sizeof(pkt_v4), 1024, 4100, 8200};
+ struct iptnl_info value4 = {.family = AF_INET6};
struct test_xdp *pkt_skel = NULL;
struct test_xdp_bpf2bpf *ftrace_skel = NULL;
struct vip key4 = {.protocol = 6, .family = AF_INET};
struct bpf_program *prog;
struct perf_buffer *pb = NULL;
- struct perf_buffer_opts pb_opts = {};
/* Load XDP program to introspect */
pkt_skel = test_xdp__open_and_load();
- if (CHECK(!pkt_skel, "pkt_skel_load", "test_xdp skeleton failed\n"))
+ if (!ASSERT_OK_PTR(pkt_skel, "test_xdp__open_and_load"))
return;
pkt_fd = bpf_program__fd(pkt_skel->progs._xdp_tx_iptunnel);
@@ -63,7 +123,7 @@ void test_xdp_bpf2bpf(void)
/* Load trace program */
ftrace_skel = test_xdp_bpf2bpf__open();
- if (CHECK(!ftrace_skel, "__open", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK_PTR(ftrace_skel, "test_xdp_bpf2bpf__open"))
goto out;
/* Demonstrate the bpf_program__set_attach_target() API rather than
@@ -78,52 +138,24 @@ void test_xdp_bpf2bpf(void)
bpf_program__set_attach_target(prog, pkt_fd, "_xdp_tx_iptunnel");
err = test_xdp_bpf2bpf__load(ftrace_skel);
- if (CHECK(err, "__load", "ftrace skeleton failed\n"))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__load"))
goto out;
err = test_xdp_bpf2bpf__attach(ftrace_skel);
- if (CHECK(err, "ftrace_attach", "ftrace attach failed: %d\n", err))
+ if (!ASSERT_OK(err, "test_xdp_bpf2bpf__attach"))
goto out;
/* Set up perf buffer */
- pb_opts.sample_cb = on_sample;
- pb_opts.ctx = &passed;
- pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map),
- 1, &pb_opts);
- if (CHECK(IS_ERR(pb), "perf_buf__new", "err %ld\n", PTR_ERR(pb)))
+ pb = perf_buffer__new(bpf_map__fd(ftrace_skel->maps.perf_buf_map), 8,
+ on_sample, NULL, &test_ctx, NULL);
+ if (!ASSERT_OK_PTR(pb, "perf_buf__new"))
goto out;
- /* Run test program */
- err = bpf_prog_test_run(pkt_fd, 1, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
-
- if (CHECK(err || retval != XDP_TX || size != 74 ||
- iph->protocol != IPPROTO_IPIP, "ipv4",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size))
- goto out;
-
- /* Make sure bpf_xdp_output() was triggered and it sent the expected
- * data to the perf ring buffer.
- */
- err = perf_buffer__poll(pb, 100);
- if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err))
- goto out;
-
- CHECK_FAIL(!passed);
-
- /* Verify test results */
- if (CHECK(ftrace_skel->bss->test_result_fentry != if_nametoindex("lo"),
- "result", "fentry failed err %llu\n",
- ftrace_skel->bss->test_result_fentry))
- goto out;
-
- CHECK(ftrace_skel->bss->test_result_fexit != XDP_TX, "result",
- "fexit failed err %llu\n", ftrace_skel->bss->test_result_fexit);
-
+ for (int i = 0; i < ARRAY_SIZE(pkt_sizes); i++)
+ run_xdp_bpf2bpf_pkt_size(pkt_fd, pb, ftrace_skel,
+ pkt_sizes[i]);
out:
- if (pb)
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
test_xdp__destroy(pkt_skel);
test_xdp_bpf2bpf__destroy(ftrace_skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
new file mode 100644
index 000000000000..e6a783c7f5db
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "test_xdp_context_test_run.skel.h"
+
+void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
+ __u32 data_meta, __u32 data, __u32 data_end,
+ __u32 ingress_ifindex, __u32 rx_queue_index,
+ __u32 egress_ifindex)
+{
+ struct xdp_md ctx = {
+ .data = data,
+ .data_end = data_end,
+ .data_meta = data_meta,
+ .ingress_ifindex = ingress_ifindex,
+ .rx_queue_index = rx_queue_index,
+ .egress_ifindex = egress_ifindex,
+ };
+ int err;
+
+ opts.ctx_in = &ctx;
+ opts.ctx_size_in = sizeof(ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, EINVAL, "errno-EINVAL");
+ ASSERT_ERR(err, "bpf_prog_test_run");
+}
+
+void test_xdp_context_test_run(void)
+{
+ struct test_xdp_context_test_run *skel = NULL;
+ char data[sizeof(pkt_v4) + sizeof(__u32)];
+ char bad_ctx[sizeof(struct xdp_md) + 1];
+ struct xdp_md ctx_in, ctx_out;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_out = &ctx_out,
+ .ctx_size_out = sizeof(ctx_out),
+ .repeat = 1,
+ );
+ int err, prog_fd;
+
+ skel = test_xdp_context_test_run__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+ prog_fd = bpf_program__fd(skel->progs.xdp_context);
+
+ /* Data past the end of the kernel's struct xdp_md must be 0 */
+ bad_ctx[sizeof(bad_ctx) - 1] = 1;
+ opts.ctx_in = bad_ctx;
+ opts.ctx_size_in = sizeof(bad_ctx);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_EQ(errno, E2BIG, "extradata-errno");
+ ASSERT_ERR(err, "bpf_prog_test_run(extradata)");
+
+ *(__u32 *)data = XDP_PASS;
+ *(struct ipv4_packet *)(data + sizeof(__u32)) = pkt_v4;
+ opts.ctx_in = &ctx_in;
+ opts.ctx_size_in = sizeof(ctx_in);
+ memset(&ctx_in, 0, sizeof(ctx_in));
+ ctx_in.data_meta = 0;
+ ctx_in.data = sizeof(__u32);
+ ctx_in.data_end = ctx_in.data + sizeof(pkt_v4);
+ err = bpf_prog_test_run_opts(prog_fd, &opts);
+ ASSERT_OK(err, "bpf_prog_test_run(valid)");
+ ASSERT_EQ(opts.retval, XDP_PASS, "valid-retval");
+ ASSERT_EQ(opts.data_size_out, sizeof(pkt_v4), "valid-datasize");
+ ASSERT_EQ(opts.ctx_size_out, opts.ctx_size_in, "valid-ctxsize");
+ ASSERT_EQ(ctx_out.data_meta, 0, "valid-datameta");
+ ASSERT_EQ(ctx_out.data, 0, "valid-data");
+ ASSERT_EQ(ctx_out.data_end, sizeof(pkt_v4), "valid-dataend");
+
+ /* Meta data's size must be a multiple of 4 */
+ test_xdp_context_error(prog_fd, opts, 0, 1, sizeof(data), 0, 0, 0);
+
+ /* data_meta must reference the start of data */
+ test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data),
+ 0, 0, 0);
+
+ /* Meta data must be 255 bytes or smaller */
+ test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0);
+
+ /* Total size of data must match data_end - data_meta */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) - 1, 0, 0, 0);
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32),
+ sizeof(data) + 1, 0, 0, 0);
+
+ /* RX queue cannot be specified without specifying an ingress */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 1, 0);
+
+ /* Interface 1 is always the loopback interface which always has only
+ * one RX queue (index 0). This makes index 1 an invalid rx queue index
+ * for interface 1.
+ */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 1, 1, 0);
+
+ /* The egress cannot be specified */
+ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), sizeof(data),
+ 0, 0, 1);
+
+ test_xdp_context_test_run__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
new file mode 100644
index 000000000000..481626a875d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_cpumap_attach.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_with_cpumap_frags_helpers.skel.h"
+#include "test_xdp_with_cpumap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+static void test_xdp_with_cpumap_helpers(void)
+{
+ struct test_xdp_with_cpumap_helpers *skel;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ int err, prog_fd, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_cpumap_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+ return;
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte CPUMAP"))
+ goto out_close;
+
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
+
+ prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to cpumap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to cpumap entry prog_id");
+
+ /* can not attach BPF_XDP_CPUMAP program to a device */
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_CPUMAP program"))
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_CPUMAP program to cpumap entry");
+
+ /* Try to attach BPF_XDP program with frags to cpumap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to cpumap entry");
+
+out_close:
+ test_xdp_with_cpumap_helpers__destroy(skel);
+}
+
+static void test_xdp_with_cpumap_frags_helpers(void)
+{
+ struct test_xdp_with_cpumap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ __u32 len = sizeof(info);
+ struct bpf_cpumap_val val = {
+ .qsize = 192,
+ };
+ int err, frags_prog_fd, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_cpumap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_cpumap_helpers__open_and_load"))
+ return;
+
+ frags_prog_fd = bpf_program__fd(skel->progs.xdp_dummy_cm_frags);
+ map_fd = bpf_map__fd(skel->maps.cpu_map);
+ err = bpf_prog_get_info_by_fd(frags_prog_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = frags_prog_fd;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add program to cpumap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read cpumap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to cpumap entry prog_id");
+
+ /* Try to attach BPF_XDP program to cpumap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.qsize = 192;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_cm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to cpumap entry");
+
+out_close:
+ test_xdp_with_cpumap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_cpumap_attach(void)
+{
+ if (test__start_subtest("CPUMAP with programs in entries"))
+ test_xdp_with_cpumap_helpers();
+
+ if (test__start_subtest("CPUMAP with frags programs in entries"))
+ test_xdp_with_cpumap_frags_helpers();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
new file mode 100644
index 000000000000..7dd18c6d06c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_dev_bound_only.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <net/if.h>
+#include <test_progs.h>
+#include <network_helpers.h>
+
+#define LOCAL_NETNS "xdp_dev_bound_only_netns"
+
+static int load_dummy_prog(char *name, __u32 ifindex, __u32 flags)
+{
+ struct bpf_insn insns[] = { BPF_MOV64_IMM(BPF_REG_0, 0), BPF_EXIT_INSN() };
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ opts.prog_flags = flags;
+ opts.prog_ifindex = ifindex;
+ return bpf_prog_load(BPF_PROG_TYPE_XDP, name, "GPL", insns, ARRAY_SIZE(insns), &opts);
+}
+
+/* A test case for bpf_offload_netdev->offload handling bug:
+ * - create a veth device (does not support offload);
+ * - create a device bound XDP program with BPF_F_XDP_DEV_BOUND_ONLY flag
+ * (such programs are not offloaded);
+ * - create a device bound XDP program without flags (such programs are offloaded).
+ * This might lead to 'BUG: kernel NULL pointer dereference'.
+ */
+void test_xdp_dev_bound_only_offdev(void)
+{
+ struct nstoken *tok = NULL;
+ __u32 ifindex;
+ int fd1 = -1;
+ int fd2 = -1;
+
+ SYS(out, "ip netns add " LOCAL_NETNS);
+ tok = open_netns(LOCAL_NETNS);
+ if (!ASSERT_OK_PTR(tok, "open_netns"))
+ goto out;
+ SYS(out, "ip link add eth42 type veth");
+ ifindex = if_nametoindex("eth42");
+ if (!ASSERT_NEQ(ifindex, 0, "if_nametoindex")) {
+ perror("if_nametoindex");
+ goto out;
+ }
+ fd1 = load_dummy_prog("dummy1", ifindex, BPF_F_XDP_DEV_BOUND_ONLY);
+ if (!ASSERT_GE(fd1, 0, "load_dummy_prog #1")) {
+ perror("load_dummy_prog #1");
+ goto out;
+ }
+ /* Program with ifindex is considered offloaded, however veth
+ * does not support offload => error should be reported.
+ */
+ fd2 = load_dummy_prog("dummy2", ifindex, 0);
+ ASSERT_EQ(fd2, -EINVAL, "load_dummy_prog #2 (offloaded)");
+
+out:
+ close(fd1);
+ close(fd2);
+ close_netns(tok);
+ /* eth42 was added inside netns, removing the netns will
+ * also remove eth42 veth pair.
+ */
+ SYS_NOFAIL("ip netns del " LOCAL_NETNS);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
index 88ef3ec8ac4c..ce6812558287 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -4,11 +4,12 @@
#include <test_progs.h>
#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_frags_helpers.skel.h"
#include "test_xdp_with_devmap_helpers.skel.h"
#define IFINDEX_LO 1
-void test_xdp_with_devmap_helpers(void)
+static void test_xdp_with_devmap_helpers(void)
{
struct test_xdp_with_devmap_helpers *skel;
struct bpf_prog_info info = {};
@@ -16,74 +17,121 @@ void test_xdp_with_devmap_helpers(void)
.ifindex = IFINDEX_LO,
};
__u32 len = sizeof(info);
- __u32 duration = 0, idx = 0;
int err, dm_fd, map_fd;
+ __u32 idx = 0;
skel = test_xdp_with_devmap_helpers__open_and_load();
- if (CHECK_FAIL(!skel)) {
- perror("test_xdp_with_devmap_helpers__open_and_load");
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
return;
- }
- /* can not attach program with DEVMAPs that allow programs
- * as xdp generic
- */
dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Generic attach of program with 8-byte devmap",
- "should have failed\n");
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_OK(err, "Generic attach of program with 8-byte devmap"))
+ goto out_close;
+
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
+ ASSERT_OK(err, "XDP program detach");
dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
map_fd = bpf_map__fd(skel->maps.dm_ports);
- err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
- if (CHECK_FAIL(err))
+ err = bpf_prog_get_info_by_fd(dm_fd, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
goto out_close;
val.bpf_prog.fd = dm_fd;
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err, "Add program to devmap entry",
- "err %d errno %d\n", err, errno);
+ ASSERT_OK(err, "Add program to devmap entry");
err = bpf_map_lookup_elem(map_fd, &idx, &val);
- CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
- CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
- "expected %u read %u\n", info.id, val.bpf_prog.id);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id, "Match program id to devmap entry prog_id");
/* can not attach BPF_XDP_DEVMAP program to a device */
- err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
- CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
- "should have failed\n");
+ err = bpf_xdp_attach(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE, NULL);
+ if (!ASSERT_NEQ(err, 0, "Attach of BPF_XDP_DEVMAP program"))
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_SKB_MODE, NULL);
val.ifindex = 1;
val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
err = bpf_map_update_elem(map_fd, &idx, &val, 0);
- CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
- "should have failed\n");
+ ASSERT_NEQ(err, 0, "Add non-BPF_XDP_DEVMAP program to devmap entry");
+
+ /* Try to attach BPF_XDP program with frags to devmap when we have
+ * already loaded a BPF_XDP program on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program with frags to devmap entry");
out_close:
test_xdp_with_devmap_helpers__destroy(skel);
}
-void test_neg_xdp_devmap_helpers(void)
+static void test_neg_xdp_devmap_helpers(void)
{
struct test_xdp_devmap_helpers *skel;
- __u32 duration = 0;
skel = test_xdp_devmap_helpers__open_and_load();
- if (CHECK(skel,
- "Load of XDP program accessing egress ifindex without attach type",
- "should have failed\n")) {
+ if (!ASSERT_EQ(skel, NULL,
+ "Load of XDP program accessing egress ifindex without attach type")) {
test_xdp_devmap_helpers__destroy(skel);
}
}
+static void test_xdp_with_devmap_frags_helpers(void)
+{
+ struct test_xdp_with_devmap_frags_helpers *skel;
+ struct bpf_prog_info info = {};
+ struct bpf_devmap_val val = {
+ .ifindex = IFINDEX_LO,
+ };
+ __u32 len = sizeof(info);
+ int err, dm_fd_frags, map_fd;
+ __u32 idx = 0;
+
+ skel = test_xdp_with_devmap_frags_helpers__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "test_xdp_with_devmap_helpers__open_and_load"))
+ return;
-void test_xdp_devmap_attach(void)
+ dm_fd_frags = bpf_program__fd(skel->progs.xdp_dummy_dm_frags);
+ map_fd = bpf_map__fd(skel->maps.dm_ports);
+ err = bpf_prog_get_info_by_fd(dm_fd_frags, &info, &len);
+ if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd"))
+ goto out_close;
+
+ val.bpf_prog.fd = dm_fd_frags;
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_OK(err, "Add frags program to devmap entry");
+
+ err = bpf_map_lookup_elem(map_fd, &idx, &val);
+ ASSERT_OK(err, "Read devmap entry");
+ ASSERT_EQ(info.id, val.bpf_prog.id,
+ "Match program id to devmap entry prog_id");
+
+ /* Try to attach BPF_XDP program to devmap when we have
+ * already loaded a BPF_XDP program with frags on the map
+ */
+ idx = 1;
+ val.ifindex = 1;
+ val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+ err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+ ASSERT_NEQ(err, 0, "Add BPF_XDP program to devmap entry");
+
+out_close:
+ test_xdp_with_devmap_frags_helpers__destroy(skel);
+}
+
+void serial_test_xdp_devmap_attach(void)
{
if (test__start_subtest("DEVMAP with programs in entries"))
test_xdp_with_devmap_helpers();
+ if (test__start_subtest("DEVMAP with frags programs in entries"))
+ test_xdp_with_devmap_frags_helpers();
+
if (test__start_subtest("Verifier check of DEVMAP programs"))
test_neg_xdp_devmap_helpers();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
new file mode 100644
index 000000000000..498d3bdaa4b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_do_redirect.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <net/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/if_link.h>
+#include <linux/ipv6.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <uapi/linux/netdev.h>
+#include "test_xdp_do_redirect.skel.h"
+
+struct udp_packet {
+ struct ethhdr eth;
+ struct ipv6hdr iph;
+ struct udphdr udp;
+ __u8 payload[64 - sizeof(struct udphdr)
+ - sizeof(struct ethhdr) - sizeof(struct ipv6hdr)];
+} __packed;
+
+static struct udp_packet pkt_udp = {
+ .eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+ .eth.h_dest = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55},
+ .eth.h_source = {0x66, 0x77, 0x88, 0x99, 0xaa, 0xbb},
+ .iph.version = 6,
+ .iph.nexthdr = IPPROTO_UDP,
+ .iph.payload_len = bpf_htons(sizeof(struct udp_packet)
+ - offsetof(struct udp_packet, udp)),
+ .iph.hop_limit = 2,
+ .iph.saddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(1)},
+ .iph.daddr.s6_addr16 = {bpf_htons(0xfc00), 0, 0, 0, 0, 0, 0, bpf_htons(2)},
+ .udp.source = bpf_htons(1),
+ .udp.dest = bpf_htons(1),
+ .udp.len = bpf_htons(sizeof(struct udp_packet)
+ - offsetof(struct udp_packet, udp)),
+ .payload = {0x42}, /* receiver XDP program matches on this */
+};
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int fd)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_opts, opts, .handle = 1, .priority = 1, .prog_fd = fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ ret = bpf_tc_attach(hook, &opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+
+ return 0;
+}
+
+/* The maximum permissible size is: PAGE_SIZE - sizeof(struct xdp_page_head) -
+ * SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) - XDP_PACKET_HEADROOM =
+ * 3408 bytes for 64-byte cacheline and 3216 for 256-byte one.
+ */
+#if defined(__s390x__)
+#define MAX_PKT_SIZE 3216
+#else
+#define MAX_PKT_SIZE 3408
+#endif
+static void test_max_pkt_size(int fd)
+{
+ char data[MAX_PKT_SIZE + 1] = {};
+ int err;
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = MAX_PKT_SIZE,
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = 1,
+ );
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_OK(err, "prog_run_max_size");
+
+ opts.data_size_in += 1;
+ err = bpf_prog_test_run_opts(fd, &opts);
+ ASSERT_EQ(err, -EINVAL, "prog_run_too_big");
+}
+
+#define NUM_PKTS 10000
+void test_xdp_do_redirect(void)
+{
+ int err, xdp_prog_fd, tc_prog_fd, ifindex_src, ifindex_dst;
+ char data[sizeof(pkt_udp) + sizeof(__u64)];
+ struct test_xdp_do_redirect *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ struct bpf_link *link;
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ struct xdp_md ctx_in = { .data = sizeof(__u64),
+ .data_end = sizeof(data) };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, opts,
+ .data_in = &data,
+ .data_size_in = sizeof(data),
+ .ctx_in = &ctx_in,
+ .ctx_size_in = sizeof(ctx_in),
+ .flags = BPF_F_TEST_XDP_LIVE_FRAMES,
+ .repeat = NUM_PKTS,
+ .batch_size = 64,
+ );
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook,
+ .attach_point = BPF_TC_INGRESS);
+
+ memcpy(&data[sizeof(__u64)], &pkt_udp, sizeof(pkt_udp));
+ *((__u32 *)data) = 0x42; /* metadata test value */
+ *((__u32 *)data + 4) = 0;
+
+ skel = test_xdp_do_redirect__open();
+ if (!ASSERT_OK_PTR(skel, "skel"))
+ return;
+
+ /* The XDP program we run with bpf_prog_run() will cycle through all
+ * three xmit (PASS/TX/REDIRECT) return codes starting from above, and
+ * ending up with PASS, so we should end up with two packets on the dst
+ * iface and NUM_PKTS-2 in the TC hook. We match the packets on the UDP
+ * payload.
+ */
+ SYS(out, "ip netns add testns");
+ nstoken = open_netns("testns");
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto out;
+
+ SYS(out, "ip link add veth_src type veth peer name veth_dst");
+ SYS(out, "ip link set dev veth_src address 00:11:22:33:44:55");
+ SYS(out, "ip link set dev veth_dst address 66:77:88:99:aa:bb");
+ SYS(out, "ip link set dev veth_src up");
+ SYS(out, "ip link set dev veth_dst up");
+ SYS(out, "ip addr add dev veth_src fc00::1/64");
+ SYS(out, "ip addr add dev veth_dst fc00::2/64");
+ SYS(out, "ip neigh add fc00::2 dev veth_src lladdr 66:77:88:99:aa:bb");
+
+ /* We enable forwarding in the test namespace because that will cause
+ * the packets that go through the kernel stack (with XDP_PASS) to be
+ * forwarded back out the same interface (because of the packet dst
+ * combined with the interface addresses). When this happens, the
+ * regular forwarding path will end up going through the same
+ * veth_xdp_xmit() call as the XDP_REDIRECT code, which can cause a
+ * deadlock if it happens on the same CPU. There's a local_bh_disable()
+ * in the test_run code to prevent this, but an earlier version of the
+ * code didn't have this, so we keep the test behaviour to make sure the
+ * bug doesn't resurface.
+ */
+ SYS(out, "sysctl -qw net.ipv6.conf.all.forwarding=1");
+
+ ifindex_src = if_nametoindex("veth_src");
+ ifindex_dst = if_nametoindex("veth_dst");
+ if (!ASSERT_NEQ(ifindex_src, 0, "ifindex_src") ||
+ !ASSERT_NEQ(ifindex_dst, 0, "ifindex_dst"))
+ goto out;
+
+ /* Check xdp features supported by veth driver */
+ err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_src bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "veth_src query_opts.feature_flags"))
+ goto out;
+
+ err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_dst bpf_xdp_query"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_RX_SG,
+ "veth_dst query_opts.feature_flags"))
+ goto out;
+
+ /* Enable GRO */
+ SYS(out, "ethtool -K veth_src gro on");
+ SYS(out, "ethtool -K veth_dst gro on");
+
+ err = bpf_xdp_query(ifindex_src, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_src bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "veth_src query_opts.feature_flags gro on"))
+ goto out;
+
+ err = bpf_xdp_query(ifindex_dst, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (!ASSERT_OK(err, "veth_dst bpf_xdp_query gro on"))
+ goto out;
+
+ if (!ASSERT_EQ(query_opts.feature_flags,
+ NETDEV_XDP_ACT_BASIC | NETDEV_XDP_ACT_REDIRECT |
+ NETDEV_XDP_ACT_NDO_XMIT | NETDEV_XDP_ACT_RX_SG |
+ NETDEV_XDP_ACT_NDO_XMIT_SG,
+ "veth_dst query_opts.feature_flags gro on"))
+ goto out;
+
+ memcpy(skel->rodata->expect_dst, &pkt_udp.eth.h_dest, ETH_ALEN);
+ skel->rodata->ifindex_out = ifindex_src; /* redirect back to the same iface */
+ skel->rodata->ifindex_in = ifindex_src;
+ ctx_in.ingress_ifindex = ifindex_src;
+ tc_hook.ifindex = ifindex_src;
+
+ if (!ASSERT_OK(test_xdp_do_redirect__load(skel), "load"))
+ goto out;
+
+ link = bpf_program__attach_xdp(skel->progs.xdp_count_pkts, ifindex_dst);
+ if (!ASSERT_OK_PTR(link, "prog_attach"))
+ goto out;
+ skel->links.xdp_count_pkts = link;
+
+ tc_prog_fd = bpf_program__fd(skel->progs.tc_count_pkts);
+ if (attach_tc_prog(&tc_hook, tc_prog_fd))
+ goto out;
+
+ xdp_prog_fd = bpf_program__fd(skel->progs.xdp_redirect);
+ err = bpf_prog_test_run_opts(xdp_prog_fd, &opts);
+ if (!ASSERT_OK(err, "prog_run"))
+ goto out_tc;
+
+ /* wait for the packets to be flushed */
+ kern_sync_rcu();
+
+ /* There will be one packet sent through XDP_REDIRECT and one through
+ * XDP_TX; these will show up on the XDP counting program, while the
+ * rest will be counted at the TC ingress hook (and the counting program
+ * resets the packet payload so they don't get counted twice even though
+ * they are re-xmited out the veth device
+ */
+ ASSERT_EQ(skel->bss->pkts_seen_xdp, 2, "pkt_count_xdp");
+ ASSERT_EQ(skel->bss->pkts_seen_zero, 2, "pkt_count_zero");
+ ASSERT_EQ(skel->bss->pkts_seen_tc, NUM_PKTS - 2, "pkt_count_tc");
+
+ test_max_pkt_size(bpf_program__fd(skel->progs.xdp_count_pkts));
+
+out_tc:
+ bpf_tc_hook_destroy(&tc_hook);
+out:
+ if (nstoken)
+ close_netns(nstoken);
+ SYS_NOFAIL("ip netns del testns");
+ test_xdp_do_redirect__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_info.c b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
index d2d7a283d72f..1dbddcab87a8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_info.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_info.c
@@ -4,23 +4,24 @@
#define IFINDEX_LO 1
-void test_xdp_info(void)
+void serial_test_xdp_info(void)
{
__u32 len = sizeof(struct bpf_prog_info), duration = 0, prog_id;
- const char *file = "./xdp_dummy.o";
+ const char *file = "./xdp_dummy.bpf.o";
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
struct bpf_prog_info info = {};
struct bpf_object *obj;
int err, prog_fd;
/* Get prog_id for XDP_ATTACHED_NONE mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp_none", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none", "unexpected prog_id=%u\n", prog_id))
return;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_none_skb", "errno=%d\n", errno))
return;
if (CHECK(prog_id, "prog_id_none_skb", "unexpected prog_id=%u\n",
@@ -29,40 +30,47 @@ void test_xdp_info(void)
/* Setup prog */
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_obj_get_info_by_fd(prog_fd, &info, &len);
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &len);
if (CHECK(err, "get_prog_info", "errno=%d\n", errno))
goto out_close;
- err = bpf_set_link_xdp_fd(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd, XDP_FLAGS_SKB_MODE, NULL);
if (CHECK(err, "set_xdp_skb", "errno=%d\n", errno))
goto out_close;
/* Get prog_id for single prog mode */
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, 0);
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &prog_id);
if (CHECK(err, "get_xdp", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_SKB_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_SKB_MODE, &prog_id);
if (CHECK(err, "get_xdp_skb", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id != info.id, "prog_id_skb", "prog_id not available\n"))
goto out;
- err = bpf_get_link_xdp_id(IFINDEX_LO, &prog_id, XDP_FLAGS_DRV_MODE);
+ err = bpf_xdp_query_id(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &prog_id);
if (CHECK(err, "get_xdp_drv", "errno=%d\n", errno))
goto out;
if (CHECK(prog_id, "prog_id_drv", "unexpected prog_id=%u\n", prog_id))
goto out;
+ /* Check xdp features supported by lo device */
+ opts.feature_flags = ~0;
+ err = bpf_xdp_query(IFINDEX_LO, XDP_FLAGS_DRV_MODE, &opts);
+ if (!ASSERT_OK(err, "bpf_xdp_query"))
+ goto out;
+
+ ASSERT_EQ(opts.feature_flags, 0, "opts.feature_flags");
out:
- bpf_set_link_xdp_fd(IFINDEX_LO, -1, 0);
+ bpf_xdp_detach(IFINDEX_LO, 0, NULL);
out_close:
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_link.c b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
new file mode 100644
index 000000000000..e7e9f3c22edf
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_link.c
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <uapi/linux/if_link.h>
+#include <test_progs.h>
+#include "test_xdp_link.skel.h"
+
+#define IFINDEX_LO 1
+
+void serial_test_xdp_link(void)
+{
+ struct test_xdp_link *skel1 = NULL, *skel2 = NULL;
+ __u32 id1, id2, id0 = 0, prog_fd1, prog_fd2;
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ struct bpf_link_info link_info;
+ struct bpf_prog_info prog_info;
+ struct bpf_link *link;
+ int err;
+ __u32 link_info_len = sizeof(link_info);
+ __u32 prog_info_len = sizeof(prog_info);
+
+ skel1 = test_xdp_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel1, "skel_load"))
+ goto cleanup;
+ prog_fd1 = bpf_program__fd(skel1->progs.xdp_handler);
+
+ skel2 = test_xdp_link__open_and_load();
+ if (!ASSERT_OK_PTR(skel2, "skel_load"))
+ goto cleanup;
+ prog_fd2 = bpf_program__fd(skel2->progs.xdp_handler);
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_prog_get_info_by_fd(prog_fd1, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "fd_info1"))
+ goto cleanup;
+ id1 = prog_info.id;
+
+ memset(&prog_info, 0, sizeof(prog_info));
+ err = bpf_prog_get_info_by_fd(prog_fd2, &prog_info, &prog_info_len);
+ if (!ASSERT_OK(err, "fd_info2"))
+ goto cleanup;
+ id2 = prog_info.id;
+
+ /* set initial prog attachment */
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd1, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "fd_attach"))
+ goto cleanup;
+
+ /* validate prog ID */
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
+
+ /* BPF link is not allowed to replace prog attachment */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ /* best-effort detach prog */
+ opts.old_prog_fd = prog_fd1;
+ bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
+ goto cleanup;
+ }
+
+ /* detach BPF program */
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_detach(IFINDEX_LO, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_OK(err, "prog_detach"))
+ goto cleanup;
+
+ /* now BPF link should attach successfully */
+ link = bpf_program__attach_xdp(skel1->progs.xdp_handler, IFINDEX_LO);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+ skel1->links.xdp_handler = link;
+
+ /* validate prog ID */
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id1_check_err") || !ASSERT_EQ(id0, id1, "id1_check_val"))
+ goto cleanup;
+
+ /* BPF prog attach is not allowed to replace BPF link */
+ opts.old_prog_fd = prog_fd1;
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, XDP_FLAGS_REPLACE, &opts);
+ if (!ASSERT_ERR(err, "prog_attach_fail"))
+ goto cleanup;
+
+ /* Can't force-update when BPF link is active */
+ err = bpf_xdp_attach(IFINDEX_LO, prog_fd2, 0, NULL);
+ if (!ASSERT_ERR(err, "prog_update_fail"))
+ goto cleanup;
+
+ /* Can't force-detach when BPF link is active */
+ err = bpf_xdp_detach(IFINDEX_LO, 0, NULL);
+ if (!ASSERT_ERR(err, "prog_detach_fail"))
+ goto cleanup;
+
+ /* BPF link is not allowed to replace another BPF link */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (!ASSERT_ERR_PTR(link, "link_attach_should_fail")) {
+ bpf_link__destroy(link);
+ goto cleanup;
+ }
+
+ bpf_link__destroy(skel1->links.xdp_handler);
+ skel1->links.xdp_handler = NULL;
+
+ /* new link attach should succeed */
+ link = bpf_program__attach_xdp(skel2->progs.xdp_handler, IFINDEX_LO);
+ if (!ASSERT_OK_PTR(link, "link_attach"))
+ goto cleanup;
+ skel2->links.xdp_handler = link;
+
+ err = bpf_xdp_query_id(IFINDEX_LO, 0, &id0);
+ if (!ASSERT_OK(err, "id2_check_err") || !ASSERT_EQ(id0, id2, "id2_check_val"))
+ goto cleanup;
+
+ /* updating program under active BPF link works as expected */
+ err = bpf_link__update_program(link, skel1->progs.xdp_handler);
+ if (!ASSERT_OK(err, "link_upd"))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+ &link_info, &link_info_len);
+ if (!ASSERT_OK(err, "link_info"))
+ goto cleanup;
+
+ ASSERT_EQ(link_info.type, BPF_LINK_TYPE_XDP, "link_type");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ ASSERT_EQ(link_info.xdp.ifindex, IFINDEX_LO, "link_ifindex");
+
+ /* updating program under active BPF link with different type fails */
+ err = bpf_link__update_program(link, skel1->progs.tc_handler);
+ if (!ASSERT_ERR(err, "link_upd_invalid"))
+ goto cleanup;
+
+ err = bpf_link__detach(link);
+ if (!ASSERT_OK(err, "link_detach"))
+ goto cleanup;
+
+ memset(&link_info, 0, sizeof(link_info));
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link),
+ &link_info, &link_info_len);
+
+ ASSERT_OK(err, "link_info");
+ ASSERT_EQ(link_info.prog_id, id1, "link_prog_id");
+ /* ifindex should be zeroed out */
+ ASSERT_EQ(link_info.xdp.ifindex, 0, "link_ifindex");
+
+cleanup:
+ test_xdp_link__destroy(skel1);
+ test_xdp_link__destroy(skel2);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
new file mode 100644
index 000000000000..05edcf32f528
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c
@@ -0,0 +1,525 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_metadata.skel.h"
+#include "xdp_metadata2.skel.h"
+#include "xdp_metadata.h"
+#include "xsk.h"
+
+#include <bpf/btf.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <linux/udp.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <poll.h>
+
+#define TX_NAME "veTX"
+#define RX_NAME "veRX"
+
+#define UDP_PAYLOAD_BYTES 4
+
+#define UDP_SOURCE_PORT 1234
+#define AF_XDP_CONSUMER_PORT 8080
+
+#define UMEM_NUM 16
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS XDP_FLAGS_DRV_MODE
+#define QUEUE_ID 0
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define PREFIX_LEN "8"
+#define FAMILY AF_INET
+#define TX_NETNS_NAME "xdp_metadata_tx"
+#define RX_NETNS_NAME "xdp_metadata_rx"
+#define TX_MAC "00:00:00:00:00:01"
+#define RX_MAC "00:00:00:00:00:02"
+
+#define VLAN_ID 59
+#define VLAN_PROTO "802.1Q"
+#define VLAN_PID htons(ETH_P_8021Q)
+#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID)
+
+#define XDP_RSS_TYPE_L4 BIT(3)
+#define VLAN_VID_MASK 0xfff
+
+struct xsk {
+ void *umem_area;
+ struct xsk_umem *umem;
+ struct xsk_ring_prod fill;
+ struct xsk_ring_cons comp;
+ struct xsk_ring_prod tx;
+ struct xsk_ring_cons rx;
+ struct xsk_socket *socket;
+};
+
+static int open_xsk(int ifindex, struct xsk *xsk)
+{
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ const struct xsk_socket_config socket_config = {
+ .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .bind_flags = XDP_COPY,
+ };
+ const struct xsk_umem_config umem_config = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .flags = XDP_UMEM_UNALIGNED_CHUNK_FLAG | XDP_UMEM_TX_SW_CSUM,
+ .tx_metadata_len = sizeof(struct xsk_tx_metadata),
+ };
+ __u32 idx;
+ u64 addr;
+ int ret;
+ int i;
+
+ xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (!ASSERT_NEQ(xsk->umem_area, MAP_FAILED, "mmap"))
+ return -1;
+
+ ret = xsk_umem__create(&xsk->umem,
+ xsk->umem_area, UMEM_SIZE,
+ &xsk->fill,
+ &xsk->comp,
+ &umem_config);
+ if (!ASSERT_OK(ret, "xsk_umem__create"))
+ return ret;
+
+ ret = xsk_socket__create(&xsk->socket, ifindex, QUEUE_ID,
+ xsk->umem,
+ &xsk->rx,
+ &xsk->tx,
+ &socket_config);
+ if (!ASSERT_OK(ret, "xsk_socket__create"))
+ return ret;
+
+ /* First half of umem is for TX. This way address matches 1-to-1
+ * to the completion queue index.
+ */
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = i * UMEM_FRAME_SIZE;
+ printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+ }
+
+ /* Second half of umem is for RX. */
+
+ ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+ if (!ASSERT_EQ(UMEM_NUM / 2, ret, "xsk_ring_prod__reserve"))
+ return ret;
+ if (!ASSERT_EQ(idx, 0, "fill idx != 0"))
+ return -1;
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+ printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, i) = addr;
+ }
+ xsk_ring_prod__submit(&xsk->fill, ret);
+
+ return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+ if (xsk->umem)
+ xsk_umem__delete(xsk->umem);
+ if (xsk->socket)
+ xsk_socket__delete(xsk->socket);
+ munmap(xsk->umem_area, UMEM_SIZE);
+}
+
+static void ip_csum(struct iphdr *iph)
+{
+ __u32 sum = 0;
+ __u16 *p;
+ int i;
+
+ iph->check = 0;
+ p = (void *)iph;
+ for (i = 0; i < sizeof(*iph) / sizeof(*p); i++)
+ sum += p[i];
+
+ while (sum >> 16)
+ sum = (sum & 0xffff) + (sum >> 16);
+
+ iph->check = ~sum;
+}
+
+static int generate_packet(struct xsk *xsk, __u16 dst_port)
+{
+ struct xsk_tx_metadata *meta;
+ struct xdp_desc *tx_desc;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ void *data;
+ __u32 idx;
+ int ret;
+
+ ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+ if (!ASSERT_EQ(ret, 1, "xsk_ring_prod__reserve"))
+ return -1;
+
+ tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+ tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
+ printf("%p: tx_desc[%u]->addr=%llx\n", xsk, idx, tx_desc->addr);
+ data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+ meta = data - sizeof(struct xsk_tx_metadata);
+ memset(meta, 0, sizeof(*meta));
+ meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
+ eth = data;
+ iph = (void *)(eth + 1);
+ udph = (void *)(iph + 1);
+
+ memcpy(eth->h_dest, "\x00\x00\x00\x00\x00\x02", ETH_ALEN);
+ memcpy(eth->h_source, "\x00\x00\x00\x00\x00\x01", ETH_ALEN);
+ eth->h_proto = htons(ETH_P_IP);
+
+ iph->version = 0x4;
+ iph->ihl = 0x5;
+ iph->tos = 0x9;
+ iph->tot_len = htons(sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES);
+ iph->id = 0;
+ iph->frag_off = 0;
+ iph->ttl = 0;
+ iph->protocol = IPPROTO_UDP;
+ ASSERT_EQ(inet_pton(FAMILY, TX_ADDR, &iph->saddr), 1, "inet_pton(TX_ADDR)");
+ ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)");
+ ip_csum(iph);
+
+ udph->source = htons(UDP_SOURCE_PORT);
+ udph->dest = htons(dst_port);
+ udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES);
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+
+ memset(udph + 1, 0xAA, UDP_PAYLOAD_BYTES);
+
+ meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+ meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+ meta->request.csum_offset = offsetof(struct udphdr, check);
+
+ tx_desc->len = sizeof(*eth) + sizeof(*iph) + sizeof(*udph) + UDP_PAYLOAD_BYTES;
+ tx_desc->options |= XDP_TX_METADATA;
+ xsk_ring_prod__submit(&xsk->tx, 1);
+
+ ret = sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (!ASSERT_GE(ret, 0, "sendto"))
+ return ret;
+
+ return 0;
+}
+
+static int generate_packet_inet(void)
+{
+ char udp_payload[UDP_PAYLOAD_BYTES];
+ struct sockaddr_in rx_addr;
+ int sock_fd, err = 0;
+
+ /* Build a packet */
+ memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES);
+ rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR);
+ rx_addr.sin_family = AF_INET;
+ rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT);
+
+ sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP);
+ if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)"))
+ return sock_fd;
+
+ err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT,
+ (void *)&rx_addr, sizeof(rx_addr));
+ ASSERT_GE(err, 0, "sendto");
+
+ close(sock_fd);
+ return err;
+}
+
+static void complete_tx(struct xsk *xsk)
+{
+ struct xsk_tx_metadata *meta;
+ __u64 addr;
+ void *data;
+ __u32 idx;
+
+ if (ASSERT_EQ(xsk_ring_cons__peek(&xsk->comp, 1, &idx), 1, "xsk_ring_cons__peek")) {
+ addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+
+ printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+ meta = data - sizeof(struct xsk_tx_metadata);
+
+ ASSERT_NEQ(meta->completion.tx_timestamp, 0, "tx_timestamp");
+
+ xsk_ring_cons__release(&xsk->comp, 1);
+ }
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+ __u32 idx;
+
+ if (ASSERT_EQ(xsk_ring_prod__reserve(&xsk->fill, 1, &idx), 1, "xsk_ring_prod__reserve")) {
+ printf("%p: complete idx=%u addr=%llx\n", xsk, idx, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+ xsk_ring_prod__submit(&xsk->fill, 1);
+ }
+}
+
+static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp)
+{
+ const struct xdp_desc *rx_desc;
+ struct pollfd fds = {};
+ struct xdp_meta *meta;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ struct iphdr *iph;
+ __u64 comp_addr;
+ void *data;
+ __u64 addr;
+ __u32 idx = 0;
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (!ASSERT_EQ(ret, 0, "recvfrom"))
+ return -1;
+
+ fds.fd = xsk_socket__fd(xsk->socket);
+ fds.events = POLLIN;
+
+ ret = poll(&fds, 1, 1000);
+ if (!ASSERT_GT(ret, 0, "poll"))
+ return -1;
+
+ ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+ if (!ASSERT_EQ(ret, 1, "xsk_ring_cons__peek"))
+ return -2;
+
+ rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+ comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+ addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+ printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx\n",
+ xsk, idx, rx_desc->addr, addr, comp_addr);
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+
+ /* Make sure we got the packet offset correctly. */
+
+ eth = data;
+ ASSERT_EQ(eth->h_proto, htons(ETH_P_IP), "eth->h_proto");
+ iph = (void *)(eth + 1);
+ ASSERT_EQ((int)iph->version, 4, "iph->version");
+ udph = (void *)(iph + 1);
+
+ /* custom metadata */
+
+ meta = data - sizeof(struct xdp_meta);
+
+ if (!ASSERT_NEQ(meta->rx_timestamp, 0, "rx_timestamp"))
+ return -1;
+
+ if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash"))
+ return -1;
+
+ if (!sent_from_af_xdp) {
+ if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci"))
+ return -1;
+
+ if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto"))
+ return -1;
+ goto done;
+ }
+
+ ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type");
+
+ /* checksum offload */
+ ASSERT_EQ(udph->check, htons(0x721c), "csum");
+
+done:
+ xsk_ring_cons__release(&xsk->rx, 1);
+ refill_rx(xsk, comp_addr);
+
+ return 0;
+}
+
+static void switch_ns_to_rx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(RX_NETNS_NAME);
+}
+
+static void switch_ns_to_tx(struct nstoken **tok)
+{
+ close_netns(*tok);
+ *tok = open_netns(TX_NETNS_NAME);
+}
+
+void test_xdp_metadata(void)
+{
+ struct xdp_metadata2 *bpf_obj2 = NULL;
+ struct xdp_metadata *bpf_obj = NULL;
+ struct bpf_program *new_prog, *prog;
+ struct nstoken *tok = NULL;
+ __u32 queue_id = QUEUE_ID;
+ struct bpf_map *prog_arr;
+ struct xsk tx_xsk = {};
+ struct xsk rx_xsk = {};
+ __u32 val, key = 0;
+ int retries = 10;
+ int rx_ifindex;
+ int tx_ifindex;
+ int sock_fd;
+ int ret;
+
+ /* Setup new networking namespaces, with a veth pair. */
+ SYS(out, "ip netns add " TX_NETNS_NAME);
+ SYS(out, "ip netns add " RX_NETNS_NAME);
+
+ tok = open_netns(TX_NETNS_NAME);
+ SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME
+ " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1");
+ SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME);
+
+ SYS(out, "ip link set dev " TX_NAME " address " TX_MAC);
+ SYS(out, "ip link set dev " TX_NAME " up");
+
+ SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN
+ " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID));
+ SYS(out, "ip link set dev " TX_NAME_VLAN " up");
+ SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN);
+
+ /* Avoid ARP calls */
+ SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN);
+
+ switch_ns_to_rx(&tok);
+
+ SYS(out, "ip link set dev " RX_NAME " address " RX_MAC);
+ SYS(out, "ip link set dev " RX_NAME " up");
+ SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME);
+
+ rx_ifindex = if_nametoindex(RX_NAME);
+
+ /* Setup separate AF_XDP for RX interface. */
+
+ ret = open_xsk(rx_ifindex, &rx_xsk);
+ if (!ASSERT_OK(ret, "open_xsk(RX_NAME)"))
+ goto out;
+
+ bpf_obj = xdp_metadata__open();
+ if (!ASSERT_OK_PTR(bpf_obj, "open skeleton"))
+ goto out;
+
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+ bpf_program__set_ifindex(prog, rx_ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ if (!ASSERT_OK(xdp_metadata__load(bpf_obj), "load skeleton"))
+ goto out;
+
+ /* Make sure we can't add dev-bound programs to prog maps. */
+ prog_arr = bpf_object__find_map_by_name(bpf_obj->obj, "prog_arr");
+ if (!ASSERT_OK_PTR(prog_arr, "no prog_arr map"))
+ goto out;
+
+ val = bpf_program__fd(prog);
+ if (!ASSERT_ERR(bpf_map__update_elem(prog_arr, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY),
+ "update prog_arr"))
+ goto out;
+
+ /* Attach BPF program to RX interface. */
+
+ ret = bpf_xdp_attach(rx_ifindex,
+ bpf_program__fd(bpf_obj->progs.rx),
+ XDP_FLAGS, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto out;
+
+ sock_fd = xsk_socket__fd(rx_xsk.socket);
+ ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+ if (!ASSERT_GE(ret, 0, "bpf_map_update_elem"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+
+ /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */
+ tx_ifindex = if_nametoindex(TX_NAME);
+ ret = open_xsk(tx_ifindex, &tx_xsk);
+ if (!ASSERT_OK(ret, "open_xsk(TX_NAME)"))
+ goto out;
+
+ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+ "generate AF_XDP_CONSUMER_PORT"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+
+ /* Verify packet sent from AF_XDP has proper metadata. */
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0,
+ "verify_xsk_metadata"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+ complete_tx(&tx_xsk);
+
+ /* Now check metadata of packet, generated with network stack */
+ if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+
+ if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0,
+ "verify_xsk_metadata"))
+ goto out;
+
+ /* Make sure freplace correctly picks up original bound device
+ * and doesn't crash.
+ */
+
+ bpf_obj2 = xdp_metadata2__open();
+ if (!ASSERT_OK_PTR(bpf_obj2, "open skeleton"))
+ goto out;
+
+ new_prog = bpf_object__find_program_by_name(bpf_obj2->obj, "freplace_rx");
+ bpf_program__set_attach_target(new_prog, bpf_program__fd(prog), "rx");
+
+ if (!ASSERT_OK(xdp_metadata2__load(bpf_obj2), "load freplace skeleton"))
+ goto out;
+
+ if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace"))
+ goto out;
+
+ switch_ns_to_tx(&tok);
+
+ /* Send packet to trigger . */
+ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0,
+ "generate freplace packet"))
+ goto out;
+
+ switch_ns_to_rx(&tok);
+
+ while (!retries--) {
+ if (bpf_obj2->bss->called)
+ break;
+ usleep(10);
+ }
+ ASSERT_GT(bpf_obj2->bss->called, 0, "not called");
+
+out:
+ close_xsk(&rx_xsk);
+ close_xsk(&tx_xsk);
+ xdp_metadata2__destroy(bpf_obj2);
+ xdp_metadata__destroy(bpf_obj);
+ if (tok)
+ close_netns(tok);
+ SYS_NOFAIL("ip netns del " RX_NETNS_NAME);
+ SYS_NOFAIL("ip netns del " TX_NETNS_NAME);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index f284f72158ef..92ef0aa50866 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,11 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
#include <test_progs.h>
#include <network_helpers.h>
+#include "test_xdp_noinline.skel.h"
void test_xdp_noinline(void)
{
- const char *file = "./test_xdp_noinline.o";
unsigned int nr_cpus = bpf_num_possible_cpus();
+ struct test_xdp_noinline *skel;
struct vip key = {.protocol = 6};
struct vip_meta {
__u32 flags;
@@ -24,59 +25,49 @@ void test_xdp_noinline(void)
__u8 flags;
} real_def = {.dst = MAGIC_VAL};
__u32 ch_key = 11, real_num = 3;
- __u32 duration, retval, size;
- int err, i, prog_fd, map_fd;
+ int err, i;
__u64 bytes = 0, pkts = 0;
- struct bpf_object *obj;
char buf[128];
u32 *magic = (u32 *)buf;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .data_out = buf,
+ .data_size_out = sizeof(buf),
+ .repeat = NUM_ITER,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
- if (CHECK_FAIL(err))
+ skel = test_xdp_noinline__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "skel_open_and_load"))
return;
- map_fd = bpf_find_map(__func__, obj, "vip_map");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.vip_map), &key, &value, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.ch_rings), &ch_key, &real_num, 0);
+ bpf_map_update_elem(bpf_map__fd(skel->maps.reals), &real_num, &real_def, 0);
- map_fd = bpf_find_map(__func__, obj, "ch_rings");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &ch_key, &real_num, 0);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v4), &topts);
+ ASSERT_OK(err, "ipv4 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv4 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 54, "ipv4 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv4 test_run magic");
- map_fd = bpf_find_map(__func__, obj, "reals");
- if (map_fd < 0)
- goto out;
- bpf_map_update_elem(map_fd, &real_num, &real_def, 0);
+ topts.data_in = &pkt_v6;
+ topts.data_size_in = sizeof(pkt_v6);
+ topts.data_out = buf;
+ topts.data_size_out = sizeof(buf);
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v4, sizeof(pkt_v4),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 54 ||
- *magic != MAGIC_VAL, "ipv4",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.balancer_ingress_v6), &topts);
+ ASSERT_OK(err, "ipv6 test_run");
+ ASSERT_EQ(topts.retval, 1, "ipv6 test_run retval");
+ ASSERT_EQ(topts.data_size_out, 74, "ipv6 test_run data_size_out");
+ ASSERT_EQ(*magic, MAGIC_VAL, "ipv6 test_run magic");
- err = bpf_prog_test_run(prog_fd, NUM_ITER, &pkt_v6, sizeof(pkt_v6),
- buf, &size, &retval, &duration);
- CHECK(err || retval != 1 || size != 74 ||
- *magic != MAGIC_VAL, "ipv6",
- "err %d errno %d retval %d size %d magic %x\n",
- err, errno, retval, size, *magic);
-
- map_fd = bpf_find_map(__func__, obj, "stats");
- if (map_fd < 0)
- goto out;
- bpf_map_lookup_elem(map_fd, &stats_key, stats);
+ bpf_map_lookup_elem(bpf_map__fd(skel->maps.stats), &stats_key, stats);
for (i = 0; i < nr_cpus; i++) {
bytes += stats[i].bytes;
pkts += stats[i].pkts;
}
- if (CHECK_FAIL(bytes != MAGIC_BYTES * NUM_ITER * 2 ||
- pkts != NUM_ITER * 2)) {
- printf("test_xdp_noinline:FAIL:stats %lld %lld\n",
- bytes, pkts);
- }
-out:
- bpf_object__close(obj);
+ ASSERT_EQ(bytes, MAGIC_BYTES * NUM_ITER * 2, "stats bytes");
+ ASSERT_EQ(pkts, NUM_ITER * 2, "stats pkts");
+ test_xdp_noinline__destroy(skel);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
index 7185bee16fe4..ec5369f247cb 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c
@@ -3,23 +3,26 @@
void test_xdp_perf(void)
{
- const char *file = "./xdp_dummy.o";
- __u32 duration, retval, size;
+ const char *file = "./xdp_dummy.bpf.o";
struct bpf_object *obj;
char in[128], out[128];
int err, prog_fd;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = in,
+ .data_size_in = sizeof(in),
+ .data_out = out,
+ .data_size_out = sizeof(out),
+ .repeat = 1000000,
+ );
- err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+ err = bpf_prog_test_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
if (CHECK_FAIL(err))
return;
- err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128,
- out, &size, &retval, &duration);
-
- CHECK(err || retval != XDP_PASS || size != 128,
- "xdp-perf",
- "err %d errno %d retval %d size %d\n",
- err, errno, retval, size);
+ err = bpf_prog_test_run_opts(prog_fd, &topts);
+ ASSERT_OK(err, "test_run");
+ ASSERT_EQ(topts.retval, XDP_PASS, "test_run retval");
+ ASSERT_EQ(topts.data_size_out, 128, "test_run data_size_out");
bpf_object__close(obj);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
new file mode 100644
index 000000000000..8b50a992d233
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_synproxy.c
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <network_helpers.h>
+#include <ctype.h>
+
+#define CMD_OUT_BUF_SIZE 1023
+
+#define SYS_OUT(cmd, ...) ({ \
+ char buf[1024]; \
+ snprintf(buf, sizeof(buf), (cmd), ##__VA_ARGS__); \
+ FILE *f = popen(buf, "r"); \
+ if (!ASSERT_OK_PTR(f, buf)) \
+ goto out; \
+ f; \
+})
+
+/* out must be at least `size * 4 + 1` bytes long */
+static void escape_str(char *out, const char *in, size_t size)
+{
+ static const char *hex = "0123456789ABCDEF";
+ size_t i;
+
+ for (i = 0; i < size; i++) {
+ if (isprint(in[i]) && in[i] != '\\' && in[i] != '\'') {
+ *out++ = in[i];
+ } else {
+ *out++ = '\\';
+ *out++ = 'x';
+ *out++ = hex[(in[i] >> 4) & 0xf];
+ *out++ = hex[in[i] & 0xf];
+ }
+ }
+ *out++ = '\0';
+}
+
+static bool expect_str(char *buf, size_t size, const char *str, const char *name)
+{
+ static char escbuf_expected[CMD_OUT_BUF_SIZE * 4];
+ static char escbuf_actual[CMD_OUT_BUF_SIZE * 4];
+ static int duration = 0;
+ bool ok;
+
+ ok = size == strlen(str) && !memcmp(buf, str, size);
+
+ if (!ok) {
+ escape_str(escbuf_expected, str, strlen(str));
+ escape_str(escbuf_actual, buf, size);
+ }
+ CHECK(!ok, name, "unexpected %s: actual '%s' != expected '%s'\n",
+ name, escbuf_actual, escbuf_expected);
+
+ return ok;
+}
+
+static void test_synproxy(bool xdp)
+{
+ int server_fd = -1, client_fd = -1, accept_fd = -1;
+ char *prog_id = NULL, *prog_id_end;
+ struct nstoken *ns = NULL;
+ FILE *ctrl_file = NULL;
+ char buf[CMD_OUT_BUF_SIZE];
+ size_t size;
+
+ SYS(out, "ip netns add synproxy");
+
+ SYS(out, "ip link add tmp0 type veth peer name tmp1");
+ SYS(out, "ip link set tmp1 netns synproxy");
+ SYS(out, "ip link set tmp0 up");
+ SYS(out, "ip addr replace 198.18.0.1/24 dev tmp0");
+
+ /* When checksum offload is enabled, the XDP program sees wrong
+ * checksums and drops packets.
+ */
+ SYS(out, "ethtool -K tmp0 tx off");
+ if (xdp)
+ /* Workaround required for veth. */
+ SYS(out, "ip link set tmp0 xdp object xdp_dummy.bpf.o section xdp 2> /dev/null");
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ SYS(out, "ip link set lo up");
+ SYS(out, "ip link set tmp1 up");
+ SYS(out, "ip addr replace 198.18.0.2/24 dev tmp1");
+ SYS(out, "sysctl -w net.ipv4.tcp_syncookies=2");
+ SYS(out, "sysctl -w net.ipv4.tcp_timestamps=1");
+ SYS(out, "sysctl -w net.netfilter.nf_conntrack_tcp_loose=0");
+ SYS(out, "iptables-legacy -t raw -I PREROUTING \
+ -i tmp1 -p tcp -m tcp --syn --dport 8080 -j CT --notrack");
+ SYS(out, "iptables-legacy -t filter -A INPUT \
+ -i tmp1 -p tcp -m tcp --dport 8080 -m state --state INVALID,UNTRACKED \
+ -j SYNPROXY --sack-perm --timestamp --wscale 7 --mss 1460");
+ SYS(out, "iptables-legacy -t filter -A INPUT \
+ -i tmp1 -m state --state INVALID -j DROP");
+
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --ports 8080 \
+ --single --mss4 1460 --mss6 1440 \
+ --wscale 7 --ttl 64%s", xdp ? "" : " --tc");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 0\n",
+ "initial SYNACKs"))
+ goto out;
+
+ if (!xdp) {
+ ctrl_file = SYS_OUT("tc filter show dev tmp1 ingress");
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ prog_id = memmem(buf, size, " id ", 4);
+ if (!ASSERT_OK_PTR(prog_id, "find prog id"))
+ goto out;
+ prog_id += 4;
+ if (!ASSERT_LT(prog_id, buf + size, "find prog id begin"))
+ goto out;
+ prog_id_end = prog_id;
+ while (prog_id_end < buf + size && *prog_id_end >= '0' &&
+ *prog_id_end <= '9')
+ prog_id_end++;
+ if (!ASSERT_LT(prog_id_end, buf + size, "find prog id end"))
+ goto out;
+ *prog_id_end = '\0';
+ }
+
+ server_fd = start_server(AF_INET, SOCK_STREAM, "198.18.0.2", 8080, 0);
+ if (!ASSERT_GE(server_fd, 0, "start_server"))
+ goto out;
+
+ close_netns(ns);
+ ns = NULL;
+
+ client_fd = connect_to_fd(server_fd, 10000);
+ if (!ASSERT_GE(client_fd, 0, "connect_to_fd"))
+ goto out;
+
+ accept_fd = accept(server_fd, NULL, NULL);
+ if (!ASSERT_GE(accept_fd, 0, "accept"))
+ goto out;
+
+ ns = open_netns("synproxy");
+ if (!ASSERT_OK_PTR(ns, "setns"))
+ goto out;
+
+ if (xdp)
+ ctrl_file = SYS_OUT("./xdp_synproxy --iface tmp1 --single");
+ else
+ ctrl_file = SYS_OUT("./xdp_synproxy --prog %s --single",
+ prog_id);
+ size = fread(buf, 1, sizeof(buf), ctrl_file);
+ pclose(ctrl_file);
+ if (!expect_str(buf, size, "Total SYNACKs generated: 1\n",
+ "SYNACKs after connection"))
+ goto out;
+
+out:
+ if (accept_fd >= 0)
+ close(accept_fd);
+ if (client_fd >= 0)
+ close(client_fd);
+ if (server_fd >= 0)
+ close(server_fd);
+ if (ns)
+ close_netns(ns);
+
+ SYS_NOFAIL("ip link del tmp0");
+ SYS_NOFAIL("ip netns del synproxy");
+}
+
+void test_xdp_synproxy(void)
+{
+ if (test__start_subtest("xdp"))
+ test_synproxy(true);
+ if (test__start_subtest("tc"))
+ test_synproxy(false);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
new file mode 100644
index 000000000000..4599154c8e9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "test_progs.h"
+#include "xdpwall.skel.h"
+
+void test_xdpwall(void)
+{
+ struct xdpwall *skel;
+
+ skel = xdpwall__open_and_load();
+ ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?");
+
+ xdpwall__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/xfrm_info.c b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
new file mode 100644
index 000000000000..d37f5394e199
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xfrm_info.c
@@ -0,0 +1,347 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+
+/*
+ * Topology:
+ * ---------
+ * NS0 namespace | NS1 namespace | NS2 namespace
+ * | |
+ * +---------------+ | +---------------+ |
+ * | ipsec0 |---------| ipsec0 | |
+ * | 192.168.1.100 | | | 192.168.1.200 | |
+ * | if_id: bpf | | +---------------+ |
+ * +---------------+ | |
+ * | | | +---------------+
+ * | | | | ipsec0 |
+ * \------------------------------------------| 192.168.1.200 |
+ * | | +---------------+
+ * | |
+ * | | (overlay network)
+ * ------------------------------------------------------
+ * | | (underlay network)
+ * +--------------+ | +--------------+ |
+ * | veth01 |----------| veth10 | |
+ * | 172.16.1.100 | | | 172.16.1.200 | |
+ * ---------------+ | +--------------+ |
+ * | |
+ * +--------------+ | | +--------------+
+ * | veth02 |-----------------------------------| veth20 |
+ * | 172.16.2.100 | | | | 172.16.2.200 |
+ * +--------------+ | | +--------------+
+ *
+ *
+ * Test Packet flow
+ * -----------
+ * The tests perform 'ping 192.168.1.200' from the NS0 namespace:
+ * 1) request is routed to NS0 ipsec0
+ * 2) NS0 ipsec0 tc egress BPF program is triggered and sets the if_id based
+ * on the requested value. This makes the ipsec0 device in external mode
+ * select the destination tunnel
+ * 3) ping reaches the other namespace (NS1 or NS2 based on which if_id was
+ * used) and response is sent
+ * 4) response is received on NS0 ipsec0, tc ingress program is triggered and
+ * records the response if_id
+ * 5) requested if_id is compared with received if_id
+ */
+
+#include <net/if.h>
+#include <linux/rtnetlink.h>
+#include <linux/if_link.h>
+
+#include "test_progs.h"
+#include "network_helpers.h"
+#include "xfrm_info.skel.h"
+
+#define NS0 "xfrm_test_ns0"
+#define NS1 "xfrm_test_ns1"
+#define NS2 "xfrm_test_ns2"
+
+#define IF_ID_0_TO_1 1
+#define IF_ID_0_TO_2 2
+#define IF_ID_1 3
+#define IF_ID_2 4
+
+#define IP4_ADDR_VETH01 "172.16.1.100"
+#define IP4_ADDR_VETH10 "172.16.1.200"
+#define IP4_ADDR_VETH02 "172.16.2.100"
+#define IP4_ADDR_VETH20 "172.16.2.200"
+
+#define ESP_DUMMY_PARAMS \
+ "proto esp aead 'rfc4106(gcm(aes))' " \
+ "0xe4d8f4b4da1df18a3510b3781496daa82488b713 128 mode tunnel "
+
+static int attach_tc_prog(struct bpf_tc_hook *hook, int igr_fd, int egr_fd)
+{
+ LIBBPF_OPTS(bpf_tc_opts, opts1, .handle = 1, .priority = 1,
+ .prog_fd = igr_fd);
+ LIBBPF_OPTS(bpf_tc_opts, opts2, .handle = 1, .priority = 1,
+ .prog_fd = egr_fd);
+ int ret;
+
+ ret = bpf_tc_hook_create(hook);
+ if (!ASSERT_OK(ret, "create tc hook"))
+ return ret;
+
+ if (igr_fd >= 0) {
+ hook->attach_point = BPF_TC_INGRESS;
+ ret = bpf_tc_attach(hook, &opts1);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ if (egr_fd >= 0) {
+ hook->attach_point = BPF_TC_EGRESS;
+ ret = bpf_tc_attach(hook, &opts2);
+ if (!ASSERT_OK(ret, "bpf_tc_attach")) {
+ bpf_tc_hook_destroy(hook);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+static void cleanup(void)
+{
+ SYS_NOFAIL("test -f /var/run/netns/" NS0 " && ip netns delete " NS0);
+ SYS_NOFAIL("test -f /var/run/netns/" NS1 " && ip netns delete " NS1);
+ SYS_NOFAIL("test -f /var/run/netns/" NS2 " && ip netns delete " NS2);
+}
+
+static int config_underlay(void)
+{
+ SYS(fail, "ip netns add " NS0);
+ SYS(fail, "ip netns add " NS1);
+ SYS(fail, "ip netns add " NS2);
+
+ /* NS0 <-> NS1 [veth01 <-> veth10] */
+ SYS(fail, "ip link add veth01 netns " NS0 " type veth peer name veth10 netns " NS1);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH01 "/24 dev veth01");
+ SYS(fail, "ip -net " NS0 " link set dev veth01 up");
+ SYS(fail, "ip -net " NS1 " addr add " IP4_ADDR_VETH10 "/24 dev veth10");
+ SYS(fail, "ip -net " NS1 " link set dev veth10 up");
+
+ /* NS0 <-> NS2 [veth02 <-> veth20] */
+ SYS(fail, "ip link add veth02 netns " NS0 " type veth peer name veth20 netns " NS2);
+ SYS(fail, "ip -net " NS0 " addr add " IP4_ADDR_VETH02 "/24 dev veth02");
+ SYS(fail, "ip -net " NS0 " link set dev veth02 up");
+ SYS(fail, "ip -net " NS2 " addr add " IP4_ADDR_VETH20 "/24 dev veth20");
+ SYS(fail, "ip -net " NS2 " link set dev veth20 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel_ns(const char *ns, const char *ipv4_local,
+ const char *ipv4_remote, int if_id)
+{
+ /* State: local -> remote */
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_local, ipv4_remote, if_id);
+
+ /* State: local <- remote */
+ SYS(fail, "ip -net %s xfrm state add src %s dst %s spi 1 "
+ ESP_DUMMY_PARAMS "if_id %d", ns, ipv4_remote, ipv4_local, if_id);
+
+ /* Policy: local -> remote */
+ SYS(fail, "ip -net %s xfrm policy add dir out src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_local, ipv4_remote, if_id);
+
+ /* Policy: local <- remote */
+ SYS(fail, "ip -net %s xfrm policy add dir in src 0.0.0.0/0 dst 0.0.0.0/0 "
+ "if_id %d tmpl src %s dst %s proto esp mode tunnel if_id %d", ns,
+ if_id, ipv4_remote, ipv4_local, if_id);
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int setup_xfrm_tunnel(const char *ns_a, const char *ns_b,
+ const char *ipv4_a, const char *ipv4_b,
+ int if_id_a, int if_id_b)
+{
+ return setup_xfrm_tunnel_ns(ns_a, ipv4_a, ipv4_b, if_id_a) ||
+ setup_xfrm_tunnel_ns(ns_b, ipv4_b, ipv4_a, if_id_b);
+}
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned short len)
+{
+ struct rtattr *rta =
+ (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(len);
+ nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+ return rta;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+ const char *s)
+{
+ struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+ memcpy(RTA_DATA(rta), s, strlen(s));
+ return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+ return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+ attr->rta_len = end - (uint8_t *)attr;
+}
+
+static int setup_xfrmi_external_dev(const char *ns)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ unsigned char data[128];
+ } req;
+ struct rtattr *link_info, *info_data;
+ struct nstoken *nstoken;
+ int ret = -1, sock = -1;
+ struct nlmsghdr *nh;
+
+ memset(&req, 0, sizeof(req));
+ nh = &req.nh;
+ nh->nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ nh->nlmsg_type = RTM_NEWLINK;
+ nh->nlmsg_flags |= NLM_F_CREATE | NLM_F_REQUEST;
+
+ rtattr_add_str(nh, IFLA_IFNAME, "ipsec0");
+ link_info = rtattr_begin(nh, IFLA_LINKINFO);
+ rtattr_add_str(nh, IFLA_INFO_KIND, "xfrm");
+ info_data = rtattr_begin(nh, IFLA_INFO_DATA);
+ rtattr_add(nh, IFLA_XFRM_COLLECT_METADATA, 0);
+ rtattr_end(nh, info_data);
+ rtattr_end(nh, link_info);
+
+ nstoken = open_netns(ns);
+ if (!ASSERT_OK_PTR(nstoken, "setns"))
+ goto done;
+
+ sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
+ if (!ASSERT_GE(sock, 0, "netlink socket"))
+ goto done;
+ ret = send(sock, nh, nh->nlmsg_len, 0);
+ if (!ASSERT_EQ(ret, nh->nlmsg_len, "netlink send length"))
+ goto done;
+
+ ret = 0;
+done:
+ if (sock != -1)
+ close(sock);
+ if (nstoken)
+ close_netns(nstoken);
+ return ret;
+}
+
+static int config_overlay(void)
+{
+ if (setup_xfrm_tunnel(NS0, NS1, IP4_ADDR_VETH01, IP4_ADDR_VETH10,
+ IF_ID_0_TO_1, IF_ID_1))
+ goto fail;
+ if (setup_xfrm_tunnel(NS0, NS2, IP4_ADDR_VETH02, IP4_ADDR_VETH20,
+ IF_ID_0_TO_2, IF_ID_2))
+ goto fail;
+
+ /* Older iproute2 doesn't support this option */
+ if (!ASSERT_OK(setup_xfrmi_external_dev(NS0), "xfrmi"))
+ goto fail;
+
+ SYS(fail, "ip -net " NS0 " addr add 192.168.1.100/24 dev ipsec0");
+ SYS(fail, "ip -net " NS0 " link set dev ipsec0 up");
+
+ SYS(fail, "ip -net " NS1 " link add ipsec0 type xfrm if_id %d", IF_ID_1);
+ SYS(fail, "ip -net " NS1 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS1 " link set dev ipsec0 up");
+
+ SYS(fail, "ip -net " NS2 " link add ipsec0 type xfrm if_id %d", IF_ID_2);
+ SYS(fail, "ip -net " NS2 " addr add 192.168.1.200/24 dev ipsec0");
+ SYS(fail, "ip -net " NS2 " link set dev ipsec0 up");
+
+ return 0;
+fail:
+ return -1;
+}
+
+static int test_xfrm_ping(struct xfrm_info *skel, u32 if_id)
+{
+ skel->bss->req_if_id = if_id;
+
+ SYS(fail, "ping -i 0.01 -c 3 -w 10 -q 192.168.1.200 > /dev/null");
+
+ if (!ASSERT_EQ(skel->bss->resp_if_id, if_id, "if_id"))
+ goto fail;
+
+ return 0;
+fail:
+ return -1;
+}
+
+static void _test_xfrm_info(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ int get_xfrm_info_prog_fd, set_xfrm_info_prog_fd;
+ struct nstoken *nstoken = NULL;
+ struct xfrm_info *skel;
+ int ifindex;
+
+ /* load and attach bpf progs to ipsec dev tc hook point */
+ skel = xfrm_info__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "xfrm_info__open_and_load"))
+ goto done;
+ nstoken = open_netns(NS0);
+ if (!ASSERT_OK_PTR(nstoken, "setns " NS0))
+ goto done;
+ ifindex = if_nametoindex("ipsec0");
+ if (!ASSERT_NEQ(ifindex, 0, "ipsec0 ifindex"))
+ goto done;
+ tc_hook.ifindex = ifindex;
+ set_xfrm_info_prog_fd = bpf_program__fd(skel->progs.set_xfrm_info);
+ get_xfrm_info_prog_fd = bpf_program__fd(skel->progs.get_xfrm_info);
+ if (!ASSERT_GE(set_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (!ASSERT_GE(get_xfrm_info_prog_fd, 0, "bpf_program__fd"))
+ goto done;
+ if (attach_tc_prog(&tc_hook, get_xfrm_info_prog_fd,
+ set_xfrm_info_prog_fd))
+ goto done;
+
+ /* perform test */
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_1), 0, "ping " NS1))
+ goto done;
+ if (!ASSERT_EQ(test_xfrm_ping(skel, IF_ID_0_TO_2), 0, "ping " NS2))
+ goto done;
+
+done:
+ if (nstoken)
+ close_netns(nstoken);
+ xfrm_info__destroy(skel);
+}
+
+void test_xfrm_info(void)
+{
+ cleanup();
+
+ if (!ASSERT_OK(config_underlay(), "config_underlay"))
+ goto done;
+ if (!ASSERT_OK(config_overlay(), "config_overlay"))
+ goto done;
+
+ if (test__start_subtest("xfrm_info"))
+ _test_xfrm_info();
+
+done:
+ cleanup();
+}
diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c
new file mode 100644
index 000000000000..1126871c2ebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <time.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_htab_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int tgid = 0;
+
+static int acc_map_in_map(void *outer_map)
+{
+ int i, key, value = 0xdeadbeef;
+ void *inner_map;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != tgid)
+ return 0;
+
+ /* Find nonexistent inner map */
+ key = 1;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (inner_map)
+ return 0;
+
+ /* Find the old inner map */
+ key = 0;
+ inner_map = bpf_map_lookup_elem(outer_map, &key);
+ if (!inner_map)
+ return 0;
+
+ /* Wait for the old inner map to be replaced */
+ for (i = 0; i < 2048; i++)
+ bpf_map_update_elem(inner_map, &key, &value, 0);
+
+ return 0;
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_array(void *ctx)
+{
+ return acc_map_in_map(&outer_array_map);
+}
+
+SEC("?kprobe/" SYS_PREFIX "sys_getpgid")
+int access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int sleepable_access_map_in_htab(void *ctx)
+{
+ return acc_map_in_map(&outer_htab_map);
+}
diff --git a/tools/testing/selftests/bpf/progs/arena_htab.c b/tools/testing/selftests/bpf/progs/arena_htab.c
new file mode 100644
index 000000000000..b7bb712cacfd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+} arena SEC(".maps");
+
+#include "bpf_arena_htab.h"
+
+void __arena *htab_for_user;
+bool skip = false;
+
+int zero = 0;
+
+SEC("syscall")
+int arena_htab_llvm(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST) || defined(BPF_ARENA_FORCE_ASM)
+ struct htab __arena *htab;
+ __u64 i;
+
+ htab = bpf_alloc(sizeof(*htab));
+ cast_kern(htab);
+ htab_init(htab);
+
+ /* first run. No old elems in the table */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+
+ /* should replace all elems with new ones */
+ for (i = zero; i < 1000; i++)
+ htab_update_elem(htab, i, i);
+ cast_user(htab);
+ htab_for_user = htab;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/arena_htab_asm.c b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
new file mode 100644
index 000000000000..6cd70ea12f0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_htab_asm.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#define BPF_ARENA_FORCE_ASM
+#define arena_htab_llvm arena_htab_asm
+#include "arena_htab.c"
diff --git a/tools/testing/selftests/bpf/progs/arena_list.c b/tools/testing/selftests/bpf/progs/arena_list.c
new file mode 100644
index 000000000000..cd35b8448435
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/arena_list.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 100); /* number of pages */
+#ifdef __TARGET_ARCH_arm64
+ __ulong(map_extra, 0x1ull << 32); /* start of mmap() region */
+#else
+ __ulong(map_extra, 0x1ull << 44); /* start of mmap() region */
+#endif
+} arena SEC(".maps");
+
+#include "bpf_arena_alloc.h"
+#include "bpf_arena_list.h"
+
+struct elem {
+ struct arena_list_node node;
+ __u64 value;
+};
+
+struct arena_list_head __arena *list_head;
+int list_sum;
+int cnt;
+bool skip = false;
+
+#ifdef __BPF_FEATURE_ARENA_CAST
+long __arena arena_sum;
+int __arena test_val = 1;
+struct arena_list_head __arena global_head;
+#else
+long arena_sum SEC(".arena.1");
+int test_val SEC(".arena.1");
+#endif
+
+int zero;
+
+SEC("syscall")
+int arena_list_add(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ __u64 i;
+
+ list_head = &global_head;
+
+ for (i = zero; i < cnt; cond_break, i++) {
+ struct elem __arena *n = bpf_alloc(sizeof(*n));
+
+ test_val++;
+ n->value = i;
+ arena_sum += i;
+ list_add_head(&n->node, list_head);
+ }
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+int arena_list_del(void *ctx)
+{
+#ifdef __BPF_FEATURE_ARENA_CAST
+ struct elem __arena *n;
+ int sum = 0;
+
+ arena_sum = 0;
+ list_for_each_entry(n, list_head, node) {
+ sum += n->value;
+ arena_sum += n->value;
+ list_del(&n->node);
+ bpf_free(n);
+ }
+ list_sum = sum;
+#else
+ skip = true;
+#endif
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/async_stack_depth.c b/tools/testing/selftests/bpf/progs/async_stack_depth.c
new file mode 100644
index 000000000000..36734683acbd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/async_stack_depth.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+__attribute__((noinline))
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[256] = {};
+ return buf[69];
+}
+
+__attribute__((noinline))
+static int bad_timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ volatile char buf[300] = {};
+ return buf[255] + timer_cb(NULL, NULL, NULL);
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is")
+int pseudo_call_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ timer_cb(NULL, NULL, NULL);
+ return bpf_timer_set_callback(&elem->timer, timer_cb) + buf[0];
+}
+
+SEC("tc")
+__failure __msg("combined stack size of 2 calls is")
+int async_call_root_check(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+ volatile char buf[256] = {};
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+
+ return bpf_timer_set_callback(&elem->timer, bad_timer_cb) + buf[0];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/atomic_bounds.c b/tools/testing/selftests/bpf/progs/atomic_bounds.c
new file mode 100644
index 000000000000..e5fff7fc7f8f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/atomic_bounds.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+#ifdef ENABLE_ATOMICS_TESTS
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+ int a = 0;
+ int b = __sync_fetch_and_add(&a, 1);
+ /* b is certainly 0 here. Can the verifier tell? */
+ while (b)
+ continue;
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/atomics.c b/tools/testing/selftests/bpf/progs/atomics.c
new file mode 100644
index 000000000000..f89c7f0cc53b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/atomics.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+#ifdef ENABLE_ATOMICS_TESTS
+bool skip_tests __attribute((__section__(".data"))) = false;
+#else
+bool skip_tests = true;
+#endif
+
+__u32 pid = 0;
+
+__u64 add64_value = 1;
+__u64 add64_result = 0;
+__u32 add32_value = 1;
+__u32 add32_result = 0;
+__u64 add_stack_value_copy = 0;
+__u64 add_stack_result = 0;
+__u64 add_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int add(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 add_stack_value = 1;
+
+ add64_result = __sync_fetch_and_add(&add64_value, 2);
+ add32_result = __sync_fetch_and_add(&add32_value, 2);
+ add_stack_result = __sync_fetch_and_add(&add_stack_value, 2);
+ add_stack_value_copy = add_stack_value;
+ __sync_fetch_and_add(&add_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__s64 sub64_value = 1;
+__s64 sub64_result = 0;
+__s32 sub32_value = 1;
+__s32 sub32_result = 0;
+__s64 sub_stack_value_copy = 0;
+__s64 sub_stack_result = 0;
+__s64 sub_noreturn_value = 1;
+
+SEC("raw_tp/sys_enter")
+int sub(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 sub_stack_value = 1;
+
+ sub64_result = __sync_fetch_and_sub(&sub64_value, 2);
+ sub32_result = __sync_fetch_and_sub(&sub32_value, 2);
+ sub_stack_result = __sync_fetch_and_sub(&sub_stack_value, 2);
+ sub_stack_value_copy = sub_stack_value;
+ __sync_fetch_and_sub(&sub_noreturn_value, 2);
+#endif
+
+ return 0;
+}
+
+__u64 and64_value = (0x110ull << 32);
+__u64 and64_result = 0;
+__u32 and32_value = 0x110;
+__u32 and32_result = 0;
+__u64 and_noreturn_value = (0x110ull << 32);
+
+SEC("raw_tp/sys_enter")
+int and(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+
+ and64_result = __sync_fetch_and_and(&and64_value, 0x011ull << 32);
+ and32_result = __sync_fetch_and_and(&and32_value, 0x011);
+ __sync_fetch_and_and(&and_noreturn_value, 0x011ull << 32);
+#endif
+
+ return 0;
+}
+
+__u64 or64_value = (0x110ull << 32);
+__u64 or64_result = 0;
+__u32 or32_value = 0x110;
+__u32 or32_result = 0;
+__u64 or_noreturn_value = (0x110ull << 32);
+
+SEC("raw_tp/sys_enter")
+int or(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ or64_result = __sync_fetch_and_or(&or64_value, 0x011ull << 32);
+ or32_result = __sync_fetch_and_or(&or32_value, 0x011);
+ __sync_fetch_and_or(&or_noreturn_value, 0x011ull << 32);
+#endif
+
+ return 0;
+}
+
+__u64 xor64_value = (0x110ull << 32);
+__u64 xor64_result = 0;
+__u32 xor32_value = 0x110;
+__u32 xor32_result = 0;
+__u64 xor_noreturn_value = (0x110ull << 32);
+
+SEC("raw_tp/sys_enter")
+int xor(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ xor64_result = __sync_fetch_and_xor(&xor64_value, 0x011ull << 32);
+ xor32_result = __sync_fetch_and_xor(&xor32_value, 0x011);
+ __sync_fetch_and_xor(&xor_noreturn_value, 0x011ull << 32);
+#endif
+
+ return 0;
+}
+
+__u64 cmpxchg64_value = 1;
+__u64 cmpxchg64_result_fail = 0;
+__u64 cmpxchg64_result_succeed = 0;
+__u32 cmpxchg32_value = 1;
+__u32 cmpxchg32_result_fail = 0;
+__u32 cmpxchg32_result_succeed = 0;
+
+SEC("raw_tp/sys_enter")
+int cmpxchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ cmpxchg64_result_fail = __sync_val_compare_and_swap(&cmpxchg64_value, 0, 3);
+ cmpxchg64_result_succeed = __sync_val_compare_and_swap(&cmpxchg64_value, 1, 2);
+
+ cmpxchg32_result_fail = __sync_val_compare_and_swap(&cmpxchg32_value, 0, 3);
+ cmpxchg32_result_succeed = __sync_val_compare_and_swap(&cmpxchg32_value, 1, 2);
+#endif
+
+ return 0;
+}
+
+__u64 xchg64_value = 1;
+__u64 xchg64_result = 0;
+__u32 xchg32_value = 1;
+__u32 xchg32_result = 0;
+
+SEC("raw_tp/sys_enter")
+int xchg(const void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+#ifdef ENABLE_ATOMICS_TESTS
+ __u64 val64 = 2;
+ __u32 val32 = 2;
+
+ xchg64_result = __sync_lock_test_and_set(&xchg64_value, val64);
+ xchg32_result = __sync_lock_test_and_set(&xchg32_value, val32);
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops.c b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
new file mode 100644
index 000000000000..b7e175cd0af0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1) { return 0; }
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2) { return 0; }
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops2 testmod_2 = {
+ .test_1 = (void *)test_1
+};
diff --git a/tools/testing/selftests/bpf/progs/bad_struct_ops2.c b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
new file mode 100644
index 000000000000..64a95f6be86d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bad_struct_ops2.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* This is an unused struct_ops program, it lacks corresponding
+ * struct_ops map, which provides attachment information.
+ * W/o additional configuration attempt to load such
+ * BPF object file would fail.
+ */
+SEC("struct_ops/foo")
+void foo(void) {}
diff --git a/tools/testing/selftests/bpf/progs/bench_local_storage_create.c b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
new file mode 100644
index 000000000000..e4bfbba6c193
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bench_local_storage_create.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+long create_errs = 0;
+long create_cnts = 0;
+long kmalloc_cnts = 0;
+__u32 bench_pid = 0;
+
+struct storage {
+ __u8 data[64];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} sk_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct storage);
+} task_storage_map SEC(".maps");
+
+SEC("raw_tp/kmalloc")
+int BPF_PROG(kmalloc, unsigned long call_site, const void *ptr,
+ size_t bytes_req, size_t bytes_alloc, gfp_t gfp_flags,
+ int node)
+{
+ __sync_fetch_and_add(&kmalloc_cnts, 1);
+
+ return 0;
+}
+
+SEC("tp_btf/sched_process_fork")
+int BPF_PROG(sched_process_fork, struct task_struct *parent, struct task_struct *child)
+{
+ struct storage *stg;
+
+ if (parent->tgid != bench_pid)
+ return 0;
+
+ stg = bpf_task_storage_get(&task_storage_map, child, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct storage *stg;
+ __u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != bench_pid)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_storage_map, sock->sk, NULL,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+ if (stg)
+ __sync_fetch_and_add(&create_cnts, 1);
+ else
+ __sync_fetch_and_add(&create_errs, 1);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind4_prog.c b/tools/testing/selftests/bpf/progs/bind4_prog.c
new file mode 100644
index 000000000000..a487f60b73ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind4_prog.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */
+#define SERV4_PORT 4040
+#define SERV4_REWRITE_IP 0x7f000001U /* 127.0.0.1 */
+#define SERV4_REWRITE_PORT 4444
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+ int veth1_idx, veth2_idx;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth1_idx, sizeof(veth1_idx)) || !veth1_idx)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth2_idx, sizeof(veth2_idx)) || !veth2_idx ||
+ veth1_idx == veth2_idx)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth1_idx, sizeof(veth1_idx)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
+static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
+{
+ int old, tmp, new = 0xeb9f;
+
+ /* Socket in test case has guarantee that old never equals to new. */
+ if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) ||
+ old == new)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) ||
+ tmp != new)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock *sk;
+ __u32 user_ip4;
+ __u16 user_port;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+
+ if (sk->family != AF_INET)
+ return 0;
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 0;
+
+ if (ctx->user_ip4 != bpf_htonl(SERV4_IP) ||
+ ctx->user_port != bpf_htons(SERV4_PORT))
+ return 0;
+
+ // u8 narrow loads:
+ user_ip4 = 0;
+ user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[0] << 0;
+ user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[1] << 8;
+ user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[2] << 16;
+ user_ip4 |= ((volatile __u8 *)&ctx->user_ip4)[3] << 24;
+ if (ctx->user_ip4 != user_ip4)
+ return 0;
+
+ user_port = 0;
+ user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+ user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ if (ctx->user_port != user_port)
+ return 0;
+
+ // u16 narrow loads:
+ user_ip4 = 0;
+ user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[0] << 0;
+ user_ip4 |= ((volatile __u16 *)&ctx->user_ip4)[1] << 16;
+ if (ctx->user_ip4 != user_ip4)
+ return 0;
+
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
+ /* Test for misc socket options. */
+ if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
+ return 0;
+
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
+ ctx->user_ip4 = bpf_htonl(SERV4_REWRITE_IP);
+ ctx->user_port = bpf_htons(SERV4_REWRITE_PORT);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind6_prog.c b/tools/testing/selftests/bpf/progs/bind6_prog.c
new file mode 100644
index 000000000000..d62cd9e9cf0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind6_prog.c
@@ -0,0 +1,176 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/if.h>
+#include <errno.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */
+#define SERV6_IP_1 0x12345678
+#define SERV6_IP_2 0x00000000
+#define SERV6_IP_3 0x0000abcd
+#define SERV6_PORT 6060
+#define SERV6_REWRITE_IP_0 0x00000000
+#define SERV6_REWRITE_IP_1 0x00000000
+#define SERV6_REWRITE_IP_2 0x00000000
+#define SERV6_REWRITE_IP_3 0x00000001
+#define SERV6_REWRITE_PORT 6666
+
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+ char veth1[IFNAMSIZ] = "test_sock_addr1";
+ char veth2[IFNAMSIZ] = "test_sock_addr2";
+ char missing[IFNAMSIZ] = "nonexistent_dev";
+ char del_bind[IFNAMSIZ] = "";
+ int veth1_idx, veth2_idx;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth1, sizeof(veth1)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth1_idx, sizeof(veth1_idx)) || !veth1_idx)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &veth2, sizeof(veth2)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth2_idx, sizeof(veth2_idx)) || !veth2_idx ||
+ veth1_idx == veth2_idx)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &missing, sizeof(missing)) != -ENODEV)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &veth1_idx, sizeof(veth1_idx)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ &del_bind, sizeof(del_bind)))
+ return 1;
+
+ return 0;
+}
+
+static __inline int bind_reuseport(struct bpf_sock_addr *ctx)
+{
+ int val = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || !val)
+ return 1;
+ val = 0;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_REUSEPORT,
+ &val, sizeof(val)) || val)
+ return 1;
+
+ return 0;
+}
+
+static __inline int misc_opts(struct bpf_sock_addr *ctx, int opt)
+{
+ int old, tmp, new = 0xeb9f;
+
+ /* Socket in test case has guarantee that old never equals to new. */
+ if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)) ||
+ old == new)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, opt, &tmp, sizeof(tmp)) ||
+ tmp != new)
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_SOCKET, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock *sk;
+ __u32 user_ip6;
+ __u16 user_port;
+ int i;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+
+ if (sk->family != AF_INET6)
+ return 0;
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 0;
+
+ if (ctx->user_ip6[0] != bpf_htonl(SERV6_IP_0) ||
+ ctx->user_ip6[1] != bpf_htonl(SERV6_IP_1) ||
+ ctx->user_ip6[2] != bpf_htonl(SERV6_IP_2) ||
+ ctx->user_ip6[3] != bpf_htonl(SERV6_IP_3) ||
+ ctx->user_port != bpf_htons(SERV6_PORT))
+ return 0;
+
+ // u8 narrow loads:
+ for (i = 0; i < 4; i++) {
+ user_ip6 = 0;
+ user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[0] << 0;
+ user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[1] << 8;
+ user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[2] << 16;
+ user_ip6 |= ((volatile __u8 *)&ctx->user_ip6[i])[3] << 24;
+ if (ctx->user_ip6[i] != user_ip6)
+ return 0;
+ }
+
+ user_port = 0;
+ user_port |= ((volatile __u8 *)&ctx->user_port)[0] << 0;
+ user_port |= ((volatile __u8 *)&ctx->user_port)[1] << 8;
+ if (ctx->user_port != user_port)
+ return 0;
+
+ // u16 narrow loads:
+ for (i = 0; i < 4; i++) {
+ user_ip6 = 0;
+ user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[0] << 0;
+ user_ip6 |= ((volatile __u16 *)&ctx->user_ip6[i])[1] << 16;
+ if (ctx->user_ip6[i] != user_ip6)
+ return 0;
+ }
+
+ /* Bind to device and unbind it. */
+ if (bind_to_device(ctx))
+ return 0;
+
+ /* Test for misc socket options. */
+ if (misc_opts(ctx, SO_MARK) || misc_opts(ctx, SO_PRIORITY))
+ return 0;
+
+ /* Set reuseport and unset */
+ if (bind_reuseport(ctx))
+ return 0;
+
+ ctx->user_ip6[0] = bpf_htonl(SERV6_REWRITE_IP_0);
+ ctx->user_ip6[1] = bpf_htonl(SERV6_REWRITE_IP_1);
+ ctx->user_ip6[2] = bpf_htonl(SERV6_REWRITE_IP_2);
+ ctx->user_ip6[3] = bpf_htonl(SERV6_REWRITE_IP_3);
+ ctx->user_port = bpf_htons(SERV6_REWRITE_PORT);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bind_perm.c b/tools/testing/selftests/bpf/progs/bind_perm.c
new file mode 100644
index 000000000000..7bd2a027025d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bind_perm.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+static __always_inline int bind_prog(struct bpf_sock_addr *ctx, int family)
+{
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+
+ if (sk->family != family)
+ return 0;
+
+ if (ctx->type != SOCK_STREAM)
+ return 0;
+
+ /* Return 1 OR'ed with the first bit set to indicate
+ * that CAP_NET_BIND_SERVICE should be bypassed.
+ */
+ if (ctx->user_port == bpf_htons(111))
+ return (1 | 2);
+
+ return 1;
+}
+
+SEC("cgroup/bind4")
+int bind_v4_prog(struct bpf_sock_addr *ctx)
+{
+ return bind_prog(ctx, AF_INET);
+}
+
+SEC("cgroup/bind6")
+int bind_v6_prog(struct bpf_sock_addr *ctx)
+{
+ return bind_prog(ctx, AF_INET6);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_bench.c b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
new file mode 100644
index 000000000000..7efcbdbe772d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_bench.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+__u8 rand_vals[2500000];
+const __u32 nr_rand_bytes = 2500000;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(__u32));
+ /* max entries and value_size will be set programmatically.
+ * They are configurable from the userspace bench program.
+ */
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ /* max entries, value_size, and # of hash functions will be set
+ * programmatically. They are configurable from the userspace
+ * bench program.
+ */
+ __uint(map_extra, 3);
+} bloom_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ /* max entries, key_size, and value_size, will be set
+ * programmatically. They are configurable from the userspace
+ * bench program.
+ */
+} hashmap SEC(".maps");
+
+struct callback_ctx {
+ struct bpf_map *map;
+ bool update;
+};
+
+/* Tracks the number of hits, drops, and false hits */
+struct {
+ __u32 stats[3];
+} __attribute__((__aligned__(256))) percpu_stats[256];
+
+const __u32 hit_key = 0;
+const __u32 drop_key = 1;
+const __u32 false_hit_key = 2;
+
+__u8 value_size;
+
+const volatile bool hashmap_use_bloom;
+const volatile bool count_false_hits;
+
+int error = 0;
+
+static __always_inline void log_result(__u32 key)
+{
+ __u32 cpu = bpf_get_smp_processor_id();
+
+ percpu_stats[cpu & 255].stats[key]++;
+}
+
+static __u64
+bloom_callback(struct bpf_map *map, __u32 *key, void *val,
+ struct callback_ctx *data)
+{
+ int err;
+
+ if (data->update)
+ err = bpf_map_push_elem(data->map, val, 0);
+ else
+ err = bpf_map_peek_elem(data->map, val);
+
+ if (err) {
+ error |= 1;
+ return 1; /* stop the iteration */
+ }
+
+ log_result(hit_key);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_lookup(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&bloom_map;
+ data.update = false;
+
+ bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_update(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&bloom_map;
+ data.update = true;
+
+ bpf_for_each_map_elem(&array_map, bloom_callback, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int bloom_hashmap_lookup(void *ctx)
+{
+ __u64 *result;
+ int i, err;
+
+ __u32 index = bpf_get_prandom_u32();
+ __u32 bitmask = (1ULL << 21) - 1;
+
+ for (i = 0; i < 1024; i++, index += value_size) {
+ index = index & bitmask;
+
+ if (hashmap_use_bloom) {
+ err = bpf_map_peek_elem(&bloom_map,
+ rand_vals + index);
+ if (err) {
+ if (err != -ENOENT) {
+ error |= 2;
+ return 0;
+ }
+ log_result(hit_key);
+ continue;
+ }
+ }
+
+ result = bpf_map_lookup_elem(&hashmap,
+ rand_vals + index);
+ if (result) {
+ log_result(hit_key);
+ } else {
+ if (hashmap_use_bloom && count_false_hits)
+ log_result(false_hit_key);
+ log_result(drop_key);
+ }
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bloom_filter_map.c b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
new file mode 100644
index 000000000000..f245fcfe0c61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bloom_filter_map.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1000);
+} map_random_data SEC(".maps");
+
+struct map_bloom_type {
+ __uint(type, BPF_MAP_TYPE_BLOOM_FILTER);
+ __type(value, __u32);
+ __uint(max_entries, 10000);
+ __uint(map_extra, 5);
+} map_bloom SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct map_bloom_type);
+} outer_map SEC(".maps");
+
+struct callback_ctx {
+ struct bpf_map *map;
+};
+
+int error = 0;
+
+static __u64
+check_elem(struct bpf_map *map, __u32 *key, __u32 *val,
+ struct callback_ctx *data)
+{
+ int err;
+
+ err = bpf_map_peek_elem(data->map, val);
+ if (err) {
+ error |= 1;
+ return 1; /* stop the iteration */
+ }
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int inner_map(void *ctx)
+{
+ struct bpf_map *inner_map;
+ struct callback_ctx data;
+ int key = 0;
+
+ inner_map = bpf_map_lookup_elem(&outer_map, &key);
+ if (!inner_map) {
+ error |= 2;
+ return 0;
+ }
+
+ data.map = inner_map;
+ bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int check_bloom(void *ctx)
+{
+ struct callback_ctx data;
+
+ data.map = (struct bpf_map *)&map_bloom;
+ bpf_for_each_map_elem(&map_random_data, check_elem, &data, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_compiler.h b/tools/testing/selftests/bpf/progs/bpf_compiler.h
new file mode 100644
index 000000000000..a7c343dc82e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_compiler.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_COMPILER_H__
+#define __BPF_COMPILER_H__
+
+#define DO_PRAGMA_(X) _Pragma(#X)
+
+#if __clang__
+#define __pragma_loop_unroll DO_PRAGMA_(clang loop unroll(enable))
+#else
+/* In GCC -funroll-loops, which is enabled with -O2, should have the
+ same impact than the loop-unroll-enable pragma above. */
+#define __pragma_loop_unroll
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(clang loop unroll_count(N))
+#else
+#define __pragma_loop_unroll_count(N) DO_PRAGMA_(GCC unroll N)
+#endif
+
+#if __clang__
+#define __pragma_loop_unroll_full DO_PRAGMA_(clang loop unroll(full))
+#else
+#define __pragma_loop_unroll_full DO_PRAGMA_(GCC unroll 65534)
+#endif
+
+#if __clang__
+#define __pragma_loop_no_unroll DO_PRAGMA_(clang loop unroll(disable))
+#else
+#define __pragma_loop_no_unroll DO_PRAGMA_(GCC unroll 1)
+#endif
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_cubic.c b/tools/testing/selftests/bpf/progs/bpf_cubic.c
index ef574087f1e1..c997e3e3d3fb 100644
--- a/tools/testing/selftests/bpf/progs/bpf_cubic.c
+++ b/tools/testing/selftests/bpf/progs/bpf_cubic.c
@@ -15,6 +15,8 @@
*/
#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
@@ -167,13 +169,9 @@ static __always_inline void bictcp_hystart_reset(struct sock *sk)
ca->sample_cnt = 0;
}
-/* "struct_ops/" prefix is not a requirement
- * It will be recognized as BPF_PROG_TYPE_STRUCT_OPS
- * as long as it is used in one of the func ptr
- * under SEC(".struct_ops").
- */
-SEC("struct_ops/bictcp_init")
-void BPF_PROG(bictcp_init, struct sock *sk)
+/* "struct_ops/" prefix is a requirement */
+SEC("struct_ops/bpf_cubic_init")
+void BPF_PROG(bpf_cubic_init, struct sock *sk)
{
struct bictcp *ca = inet_csk_ca(sk);
@@ -186,11 +184,9 @@ void BPF_PROG(bictcp_init, struct sock *sk)
tcp_sk(sk)->snd_ssthresh = initial_ssthresh;
}
-/* No prefix in SEC will also work.
- * The remaining tcp-cubic functions have an easier way.
- */
-SEC("no-sec-prefix-bictcp_cwnd_event")
-void BPF_PROG(bictcp_cwnd_event, struct sock *sk, enum tcp_ca_event event)
+/* "struct_ops" prefix is a requirement */
+SEC("struct_ops/bpf_cubic_cwnd_event")
+void BPF_PROG(bpf_cubic_cwnd_event, struct sock *sk, enum tcp_ca_event event)
{
if (event == CA_EVENT_TX_START) {
struct bictcp *ca = inet_csk_ca(sk);
@@ -382,7 +378,7 @@ tcp_friendliness:
}
/* Or simply use the BPF_STRUCT_OPS to avoid the SEC boiler plate. */
-void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+void BPF_STRUCT_OPS(bpf_cubic_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
{
struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -401,7 +397,7 @@ void BPF_STRUCT_OPS(bictcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
tcp_cong_avoid_ai(tp, ca->cnt, acked);
}
-__u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
+__u32 BPF_STRUCT_OPS(bpf_cubic_recalc_ssthresh, struct sock *sk)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
@@ -418,7 +414,7 @@ __u32 BPF_STRUCT_OPS(bictcp_recalc_ssthresh, struct sock *sk)
return max((tp->snd_cwnd * beta) / BICTCP_BETA_SCALE, 2U);
}
-void BPF_STRUCT_OPS(bictcp_state, struct sock *sk, __u8 new_state)
+void BPF_STRUCT_OPS(bpf_cubic_state, struct sock *sk, __u8 new_state)
{
if (new_state == TCP_CA_Loss) {
bictcp_reset(inet_csk_ca(sk));
@@ -494,13 +490,16 @@ static __always_inline void hystart_update(struct sock *sk, __u32 delay)
}
}
-void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
+int bpf_cubic_acked_called = 0;
+
+void BPF_STRUCT_OPS(bpf_cubic_acked, struct sock *sk,
const struct ack_sample *sample)
{
const struct tcp_sock *tp = tcp_sk(sk);
struct bictcp *ca = inet_csk_ca(sk);
__u32 delay;
+ bpf_cubic_acked_called = 1;
/* Some calls are for duplicates without timetamps */
if (sample->rtt_us < 0)
return;
@@ -523,21 +522,21 @@ void BPF_STRUCT_OPS(bictcp_acked, struct sock *sk,
hystart_update(sk, delay);
}
-__u32 BPF_STRUCT_OPS(tcp_reno_undo_cwnd, struct sock *sk)
-{
- const struct tcp_sock *tp = tcp_sk(sk);
+extern __u32 tcp_reno_undo_cwnd(struct sock *sk) __ksym;
- return max(tp->snd_cwnd, tp->prior_cwnd);
+__u32 BPF_STRUCT_OPS(bpf_cubic_undo_cwnd, struct sock *sk)
+{
+ return tcp_reno_undo_cwnd(sk);
}
SEC(".struct_ops")
struct tcp_congestion_ops cubic = {
- .init = (void *)bictcp_init,
- .ssthresh = (void *)bictcp_recalc_ssthresh,
- .cong_avoid = (void *)bictcp_cong_avoid,
- .set_state = (void *)bictcp_state,
- .undo_cwnd = (void *)tcp_reno_undo_cwnd,
- .cwnd_event = (void *)bictcp_cwnd_event,
- .pkts_acked = (void *)bictcp_acked,
+ .init = (void *)bpf_cubic_init,
+ .ssthresh = (void *)bpf_cubic_recalc_ssthresh,
+ .cong_avoid = (void *)bpf_cubic_cong_avoid,
+ .set_state = (void *)bpf_cubic_state,
+ .undo_cwnd = (void *)bpf_cubic_undo_cwnd,
+ .cwnd_event = (void *)bpf_cubic_cwnd_event,
+ .pkts_acked = (void *)bpf_cubic_acked,
.name = "bpf_cubic",
};
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp.c b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
index 3fb4260570b1..460682759aed 100644
--- a/tools/testing/selftests/bpf/progs/bpf_dctcp.c
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp.c
@@ -9,13 +9,22 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
#include "bpf_tcp_helpers.h"
char _license[] SEC("license") = "GPL";
+volatile const char fallback[TCP_CA_NAME_MAX];
+const char bpf_dctcp[] = "bpf_dctcp";
+const char tcp_cdg[] = "cdg";
+char cc_res[TCP_CA_NAME_MAX];
+int tcp_cdg_res = 0;
int stg_result = 0;
+int ebusy_cnt = 0;
struct {
__uint(type, BPF_MAP_TYPE_SK_STORAGE);
@@ -55,6 +64,33 @@ void BPF_PROG(dctcp_init, struct sock *sk)
struct dctcp *ca = inet_csk_ca(sk);
int *stg;
+ if (!(tp->ecn_flags & TCP_ECN_OK) && fallback[0]) {
+ /* Switch to fallback */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Switch back to myself and the recurred dctcp_init()
+ * will get -EBUSY for all bpf_setsockopt(TCP_CONGESTION),
+ * except the last "cdg" one.
+ */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)bpf_dctcp, sizeof(bpf_dctcp)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Switch back to fallback */
+ if (bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)fallback, sizeof(fallback)) == -EBUSY)
+ ebusy_cnt++;
+
+ /* Expecting -ENOTSUPP for tcp_cdg_res */
+ tcp_cdg_res = bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)tcp_cdg, sizeof(tcp_cdg));
+ bpf_getsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cc_res, sizeof(cc_res));
+ return;
+ }
+
ca->prior_rcv_nxt = tp->rcv_nxt;
ca->dctcp_alpha = min(dctcp_alpha_on_init, DCTCP_MAX_ALPHA);
ca->loss_cwnd = 0;
@@ -192,22 +228,12 @@ __u32 BPF_PROG(dctcp_cwnd_undo, struct sock *sk)
return max(tcp_sk(sk)->snd_cwnd, ca->loss_cwnd);
}
-SEC("struct_ops/tcp_reno_cong_avoid")
-void BPF_PROG(tcp_reno_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
-{
- struct tcp_sock *tp = tcp_sk(sk);
-
- if (!tcp_is_cwnd_limited(sk))
- return;
+extern void tcp_reno_cong_avoid(struct sock *sk, __u32 ack, __u32 acked) __ksym;
- /* In "safe" area, increase. */
- if (tcp_in_slow_start(tp)) {
- acked = tcp_slow_start(tp, acked);
- if (!acked)
- return;
- }
- /* In dangerous area, increase slowly. */
- tcp_cong_avoid_ai(tp, tp->snd_cwnd, acked);
+SEC("struct_ops/dctcp_reno_cong_avoid")
+void BPF_PROG(dctcp_cong_avoid, struct sock *sk, __u32 ack, __u32 acked)
+{
+ tcp_reno_cong_avoid(sk, ack, acked);
}
SEC(".struct_ops")
@@ -224,7 +250,7 @@ struct tcp_congestion_ops dctcp = {
.in_ack_event = (void *)dctcp_update_alpha,
.cwnd_event = (void *)dctcp_cwnd_event,
.ssthresh = (void *)dctcp_ssthresh,
- .cong_avoid = (void *)tcp_reno_cong_avoid,
+ .cong_avoid = (void *)dctcp_cong_avoid,
.undo_cwnd = (void *)dctcp_cwnd_undo,
.set_state = (void *)dctcp_state,
.flags = TCP_CONG_NEEDS_ECN,
diff --git a/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
new file mode 100644
index 000000000000..d836f7c372f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_dctcp_release.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <linux/stddef.h>
+#include <linux/tcp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+const char cubic[] = "cubic";
+
+void BPF_STRUCT_OPS(dctcp_nouse_release, struct sock *sk)
+{
+ bpf_setsockopt(sk, SOL_TCP, TCP_CONGESTION,
+ (void *)cubic, sizeof(cubic));
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops dctcp_rel = {
+ .release = (void *)dctcp_nouse_release,
+ .name = "bpf_dctcp_rel",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index de6de9221518..b04e092fac94 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -19,9 +19,10 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
#define PROG(F) PROG_(F, _##F)
-#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
+#define PROG_(NUM, NAME) SEC("flow_dissector") int flow_dissector_##NUM
+
+#define FLOW_CONTINUE_SADDR 0x7f00007f /* 127.0.0.127 */
/* These are the identifiers of the BPF programs that will be used in tail
* calls. Name is limited to 16 characters, with the terminating character and
@@ -118,18 +119,18 @@ static __always_inline int parse_eth_proto(struct __sk_buff *skb, __be16 proto)
switch (proto) {
case bpf_htons(ETH_P_IP):
- bpf_tail_call(skb, &jmp_table, IP);
+ bpf_tail_call_static(skb, &jmp_table, IP);
break;
case bpf_htons(ETH_P_IPV6):
- bpf_tail_call(skb, &jmp_table, IPV6);
+ bpf_tail_call_static(skb, &jmp_table, IPV6);
break;
case bpf_htons(ETH_P_MPLS_MC):
case bpf_htons(ETH_P_MPLS_UC):
- bpf_tail_call(skb, &jmp_table, MPLS);
+ bpf_tail_call_static(skb, &jmp_table, MPLS);
break;
case bpf_htons(ETH_P_8021Q):
case bpf_htons(ETH_P_8021AD):
- bpf_tail_call(skb, &jmp_table, VLAN);
+ bpf_tail_call_static(skb, &jmp_table, VLAN);
break;
default:
/* Protocol not supported */
@@ -144,6 +145,19 @@ int _dissect(struct __sk_buff *skb)
{
struct bpf_flow_keys *keys = skb->flow_keys;
+ if (keys->n_proto == bpf_htons(ETH_P_IP)) {
+ /* IP traffic from FLOW_CONTINUE_SADDR falls-back to
+ * standard dissector
+ */
+ struct iphdr *iph, _iph;
+
+ iph = bpf_flow_dissect_get_header(skb, sizeof(*iph), &_iph);
+ if (iph && iph->ihl == 5 &&
+ iph->saddr == bpf_htonl(FLOW_CONTINUE_SADDR)) {
+ return BPF_FLOW_DISSECTOR_CONTINUE;
+ }
+ }
+
return parse_eth_proto(skb, keys->n_proto);
}
@@ -246,10 +260,10 @@ static __always_inline int parse_ipv6_proto(struct __sk_buff *skb, __u8 nexthdr)
switch (nexthdr) {
case IPPROTO_HOPOPTS:
case IPPROTO_DSTOPTS:
- bpf_tail_call(skb, &jmp_table, IPV6OP);
+ bpf_tail_call_static(skb, &jmp_table, IPV6OP);
break;
case IPPROTO_FRAGMENT:
- bpf_tail_call(skb, &jmp_table, IPV6FR);
+ bpf_tail_call_static(skb, &jmp_table, IPV6FR);
break;
default:
return parse_ip_proto(skb, nexthdr);
@@ -323,7 +337,7 @@ PROG(IPV6)(struct __sk_buff *skb)
keys->ip_proto = ip6h->nexthdr;
keys->flow_label = ip6_flowlabel(ip6h);
- if (keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
+ if (keys->flow_label && keys->flags & BPF_FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL)
return export_flow_keys(keys, BPF_OK);
return parse_ipv6_proto(skb, ip6h->nexthdr);
@@ -368,6 +382,8 @@ PROG(IPV6FR)(struct __sk_buff *skb)
*/
if (!(keys->flags & BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG))
return export_flow_keys(keys, BPF_OK);
+ } else {
+ return export_flow_keys(keys, BPF_OK);
}
return parse_ipv6_proto(skb, fragh->nexthdr);
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
new file mode 100644
index 000000000000..56957557e3e1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_full_update_bench.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_ENTRIES 1000
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, u64);
+ __uint(max_entries, MAX_ENTRIES);
+} hash_map_bench SEC(".maps");
+
+u64 __attribute__((__aligned__(256))) percpu_time[256];
+u64 nr_loops;
+
+static int loop_update_callback(__u32 index, u32 *key)
+{
+ u64 init_val = 1;
+
+ bpf_map_update_elem(&hash_map_bench, key, &init_val, BPF_ANY);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 key = cpu + MAX_ENTRIES;
+ u64 start_time = bpf_ktime_get_ns();
+
+ bpf_loop(nr_loops, loop_update_callback, &key, 0);
+ percpu_time[cpu & 255] = bpf_ktime_get_ns() - start_time;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
new file mode 100644
index 000000000000..1eb74ddca414
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_hashmap_lookup.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+} hash_map_bench SEC(".maps");
+
+/* The number of slots to store times */
+#define NR_SLOTS 32
+#define NR_CPUS 256
+#define CPU_MASK (NR_CPUS-1)
+
+/* Configured by userspace */
+u64 nr_entries;
+u64 nr_loops;
+u32 __attribute__((__aligned__(8))) key[NR_CPUS];
+
+/* Filled by us */
+u64 __attribute__((__aligned__(256))) percpu_times_index[NR_CPUS];
+u64 __attribute__((__aligned__(256))) percpu_times[NR_CPUS][NR_SLOTS];
+
+static inline void patch_key(u32 i)
+{
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ key[0] = i + 1;
+#else
+ key[0] = __builtin_bswap32(i + 1);
+#endif
+ /* the rest of key is random and is configured by userspace */
+}
+
+static int lookup_callback(__u32 index, u32 *unused)
+{
+ patch_key(index);
+ return bpf_map_lookup_elem(&hash_map_bench, key) ? 0 : 1;
+}
+
+static int loop_lookup_callback(__u32 index, u32 *unused)
+{
+ return bpf_loop(nr_entries, lookup_callback, NULL, 0) ? 0 : 1;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ u32 cpu = bpf_get_smp_processor_id();
+ u32 times_index;
+ u64 start_time;
+
+ times_index = percpu_times_index[cpu & CPU_MASK] % NR_SLOTS;
+ start_time = bpf_ktime_get_ns();
+ bpf_loop(nr_loops, loop_lookup_callback, NULL, 0);
+ percpu_times[cpu & CPU_MASK][times_index] = bpf_ktime_get_ns() - start_time;
+ percpu_times_index[cpu & CPU_MASK] += 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter.h b/tools/testing/selftests/bpf/progs/bpf_iter.h
new file mode 100644
index 000000000000..c41ee80533ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter.h
@@ -0,0 +1,167 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
+#define bpf_iter__task bpf_iter__task___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
+#define bpf_iter__task_vma bpf_iter__task_vma___not_used
+#define bpf_iter__tcp bpf_iter__tcp___not_used
+#define tcp6_sock tcp6_sock___not_used
+#define bpf_iter__udp bpf_iter__udp___not_used
+#define udp6_sock udp6_sock___not_used
+#define bpf_iter__unix bpf_iter__unix___not_used
+#define bpf_iter__bpf_map_elem bpf_iter__bpf_map_elem___not_used
+#define bpf_iter__bpf_sk_storage_map bpf_iter__bpf_sk_storage_map___not_used
+#define bpf_iter__sockmap bpf_iter__sockmap___not_used
+#define bpf_iter__bpf_link bpf_iter__bpf_link___not_used
+#define bpf_iter__cgroup bpf_iter__cgroup___not_used
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#define bpf_iter__ksym bpf_iter__ksym___not_used
+#include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
+#undef bpf_iter__ipv6_route
+#undef bpf_iter__netlink
+#undef bpf_iter__task
+#undef bpf_iter__task_file
+#undef bpf_iter__task_vma
+#undef bpf_iter__tcp
+#undef tcp6_sock
+#undef bpf_iter__udp
+#undef udp6_sock
+#undef bpf_iter__unix
+#undef bpf_iter__bpf_map_elem
+#undef bpf_iter__bpf_sk_storage_map
+#undef bpf_iter__sockmap
+#undef bpf_iter__bpf_link
+#undef bpf_iter__cgroup
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+#undef bpf_iter__ksym
+
+struct bpf_iter_meta {
+ struct seq_file *seq;
+ __u64 session_id;
+ __u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+ struct bpf_iter_meta *meta;
+ struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+ struct bpf_iter_meta *meta;
+ struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ __u32 fd;
+ struct file *file;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_vma {
+ struct bpf_iter_meta *meta;
+ struct task_struct *task;
+ struct vm_area_struct *vma;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__tcp {
+ struct bpf_iter_meta *meta;
+ struct sock_common *sk_common;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct tcp6_sock {
+ struct tcp_sock tcp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__udp {
+ struct bpf_iter_meta *meta;
+ struct udp_sock *udp_sk;
+ uid_t uid __attribute__((aligned(8)));
+ int bucket __attribute__((aligned(8)));
+} __attribute__((preserve_access_index));
+
+struct udp6_sock {
+ struct udp_sock udp;
+ struct ipv6_pinfo inet6;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__unix {
+ struct bpf_iter_meta *meta;
+ struct unix_sock *unix_sk;
+ uid_t uid;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map_elem {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ void *value;
+};
+
+struct bpf_iter__bpf_sk_storage_map {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ struct sock *sk;
+ void *value;
+};
+
+struct bpf_iter__sockmap {
+ struct bpf_iter_meta *meta;
+ struct bpf_map *map;
+ void *key;
+ struct sock *sk;
+};
+
+struct bpf_iter__bpf_link {
+ struct bpf_iter_meta *meta;
+ struct bpf_link *link;
+};
+
+struct bpf_iter__cgroup {
+ struct bpf_iter_meta *meta;
+ struct cgroup *cgroup;
+} __attribute__((preserve_access_index));
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
+
+struct bpf_iter__ksym {
+ struct bpf_iter_meta *meta;
+ struct kallsym_iter *ksym;
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
new file mode 100644
index 000000000000..c5969ca6f26b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_array_map.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10);
+ __type(key, __u64);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *hmap_val, *key = ctx->key;
+ __u64 *val = ctx->value;
+
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ bpf_seq_write(ctx->meta->seq, key, sizeof(__u32));
+ bpf_seq_write(ctx->meta->seq, val, sizeof(__u64));
+ key_sum += *key;
+ val_sum += *val;
+
+ /* workaround - It's necessary to do this convoluted (val, key)
+ * write into hashmap1, instead of simply doing
+ * bpf_map_update_elem(&hashmap1, val, key, BPF_ANY);
+ * because key has MEM_RDONLY flag and bpf_map_update elem expects
+ * types without this flag
+ */
+ bpf_map_update_elem(&hashmap1, val, val, BPF_ANY);
+ hmap_val = bpf_map_lookup_elem(&hashmap1, val);
+ if (hmap_val)
+ *hmap_val = *key;
+
+ *val = *key;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
new file mode 100644
index 000000000000..d7a69217fb68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_hash_map.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap3 SEC(".maps");
+
+/* will set before prog run */
+bool in_test_mode = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u64 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ __u32 seq_num = ctx->meta->seq_num;
+ struct bpf_map *map = ctx->map;
+ struct key_t *key = ctx->key;
+ struct key_t tmp_key;
+ __u64 *val = ctx->value;
+ __u64 tmp_val = 0;
+ int ret;
+
+ if (in_test_mode) {
+ /* test mode is used by selftests to
+ * test functionality of bpf_hash_map iter.
+ *
+ * the above hashmap1 will have correct size
+ * and will be accepted, hashmap2 and hashmap3
+ * should be rejected due to smaller key/value
+ * size.
+ */
+ if (key == (void *)0 || val == (void *)0)
+ return 0;
+
+ /* update the value and then delete the <key, value> pair.
+ * it should not impact the existing 'val' which is still
+ * accessible under rcu.
+ */
+ __builtin_memcpy(&tmp_key, key, sizeof(struct key_t));
+ ret = bpf_map_update_elem(&hashmap1, &tmp_key, &tmp_val, 0);
+ if (ret)
+ return 0;
+ ret = bpf_map_delete_elem(&hashmap1, &tmp_key);
+ if (ret)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+ val_sum += *val;
+ return 0;
+ }
+
+ /* non-test mode, the map is prepared with the
+ * below bpftool command sequence:
+ * bpftool map create /sys/fs/bpf/m1 type hash \
+ * key 12 value 8 entries 3 name map1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 1 \
+ * value 0 0 0 1 0 0 0 1
+ * bpftool map update id 77 key 0 0 0 1 0 0 0 0 0 0 0 2 \
+ * value 0 0 0 1 0 0 0 2
+ * The bpftool iter command line:
+ * bpftool iter pin ./bpf_iter_bpf_hash_map.o /sys/fs/bpf/p1 \
+ * map id 77
+ * The below output will be:
+ * map dump starts
+ * 77: (1000000 0 2000000) (200000001000000)
+ * 77: (1000000 0 1000000) (100000001000000)
+ * map dump ends
+ */
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "map dump starts\n");
+
+ if (key == (void *)0 || val == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "map dump ends\n");
+ return 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%d: (%x %d %x) (%llx)\n", map->id,
+ key->a, key->b, key->c, *val);
+
+ return 0;
+}
+
+SEC("iter.s/bpf_map_elem")
+int sleepable_dummy_dump(struct bpf_iter__bpf_map_elem *ctx)
+{
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(ctx->meta->seq, "map dump starts\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
new file mode 100644
index 000000000000..e1af2f8f75a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_link.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat, Inc. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_link")
+int dump_bpf_link(struct bpf_iter__bpf_link *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_link *link = ctx->link;
+ int link_id;
+
+ if (!link)
+ return 0;
+
+ link_id = link->id;
+ bpf_seq_write(seq, &link_id, sizeof(link_id));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index b57bd6fef208..6c39e86b666f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,27 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
{
@@ -39,6 +22,6 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
map->usercnt.counter,
- map->memory.user->locked_vm.counter);
+ 0LLU);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
new file mode 100644
index 000000000000..85fa710fad90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_array_map.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u32);
+} arraymap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __u32 num_cpus = 0;
+
+__u32 key_sum = 0, val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_array_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ __u32 *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum += *key;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
new file mode 100644
index 000000000000..5014a17d6c02
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_percpu_hash_map.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u32);
+} hashmap1 SEC(".maps");
+
+/* will set before prog run */
+volatile const __s32 num_cpus = 0;
+
+/* will collect results during prog run */
+__u32 key_sum_a = 0, key_sum_b = 0, key_sum_c = 0;
+__u32 val_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_percpu_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ struct key_t *key = ctx->key;
+ void *pptr = ctx->value;
+ __u32 step;
+ int i;
+
+ if (key == (void *)0 || pptr == (void *)0)
+ return 0;
+
+ key_sum_a += key->a;
+ key_sum_b += key->b;
+ key_sum_c += key->c;
+
+ step = 8;
+ for (i = 0; i < num_cpus; i++) {
+ val_sum += *(__u32 *)pptr;
+ pptr += step;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
new file mode 100644
index 000000000000..6cecab2b32ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_helpers.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google LLC. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("iter/bpf_sk_storage_map")
+int delete_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ if (ctx->sk)
+ bpf_sk_storage_delete(&sk_stg_map, ctx->sk);
+
+ return 0;
+}
+
+SEC("iter/task_file")
+int fill_socket_owner(struct bpf_iter__task_file *ctx)
+{
+ struct task_struct *task = ctx->task;
+ struct file *file = ctx->file;
+ struct socket *sock;
+ int *sock_tgid;
+
+ if (!task || !file)
+ return 0;
+
+ sock = bpf_sock_from_file(file);
+ if (!sock)
+ return 0;
+
+ sock_tgid = bpf_sk_storage_get(&sk_stg_map, sock->sk, 0, 0);
+ if (!sock_tgid)
+ return 0;
+
+ *sock_tgid = task->tgid;
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int negate_socket_local_storage(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ int *sock_tgid;
+
+ if (!sk_common)
+ return 0;
+
+ sock_tgid = bpf_sk_storage_get(&sk_stg_map, sk_common, 0, 0);
+ if (!sock_tgid)
+ return 0;
+
+ *sock_tgid = -*sock_tgid;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
new file mode 100644
index 000000000000..c7b8e006b171
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_sk_storage_map.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+__u32 val_sum = 0;
+__u32 ipv6_sk_count = 0;
+__u32 to_add_val = 0;
+
+SEC("iter/bpf_sk_storage_map")
+int rw_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == NULL || val == NULL)
+ return 0;
+
+ if (sk->sk_family == AF_INET6)
+ ipv6_sk_count++;
+
+ val_sum += *val;
+
+ *val += to_add_val;
+
+ return 0;
+}
+
+SEC("iter/bpf_sk_storage_map")
+int oob_write_bpf_sk_storage_map(struct bpf_iter__bpf_sk_storage_map *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 *val = ctx->value;
+
+ if (sk == NULL || val == NULL)
+ return 0;
+
+ *(val + 1) = 0xdeadbeef;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index c8e9ca74c87b..784a610ce039 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,35 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__ipv6_route
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__ipv6_route {
- struct bpf_iter_meta *meta;
- struct fib6_info *rt;
-} __attribute__((preserve_access_index));
char _license[] SEC("license") = "GPL";
extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
-#define RTF_GATEWAY 0x0002
-#define IFNAMSIZ 16
-#define fib_nh_gw_family nh_common.nhc_gw_family
-#define fib_nh_gw6 nh_common.nhc_gw.ipv6
-#define fib_nh_dev nh_common.nhc_dev
-
SEC("iter/ipv6_route")
int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
new file mode 100644
index 000000000000..521267818f4d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ksym.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned long last_sym_value = 0;
+
+static inline char to_lower(char c)
+{
+ if (c >= 'A' && c <= 'Z')
+ c += ('a' - 'A');
+ return c;
+}
+
+static inline char to_upper(char c)
+{
+ if (c >= 'a' && c <= 'z')
+ c -= ('a' - 'A');
+ return c;
+}
+
+/* Dump symbols with max size; the latter is calculated by caching symbol N value
+ * and when iterating on symbol N+1, we can print max size of symbol N via
+ * address of N+1 - address of N.
+ */
+SEC("iter/ksym")
+int dump_ksym(struct bpf_iter__ksym *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct kallsym_iter *iter = ctx->ksym;
+ __u32 seq_num = ctx->meta->seq_num;
+ unsigned long value;
+ char type;
+
+ if (!iter)
+ return 0;
+
+ if (seq_num == 0) {
+ BPF_SEQ_PRINTF(seq, "ADDR TYPE NAME MODULE_NAME KIND MAX_SIZE\n");
+ return 0;
+ }
+ if (last_sym_value)
+ BPF_SEQ_PRINTF(seq, "0x%x\n", iter->value - last_sym_value);
+ else
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ value = iter->show_value ? iter->value : 0;
+
+ last_sym_value = value;
+
+ type = iter->type;
+
+ if (iter->module_name[0]) {
+ type = iter->exported ? to_upper(type) : to_lower(type);
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s [ %s ] ",
+ value, type, iter->name, iter->module_name);
+ } else {
+ BPF_SEQ_PRINTF(seq, "0x%llx %c %s ", value, type, iter->name);
+ }
+ if (!iter->pos_mod_end || iter->pos_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "MOD ");
+ else if (!iter->pos_ftrace_mod_end || iter->pos_ftrace_mod_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "FTRACE_MOD ");
+ else if (!iter->pos_bpf_end || iter->pos_bpf_end > iter->pos)
+ BPF_SEQ_PRINTF(seq, "BPF ");
+ else
+ BPF_SEQ_PRINTF(seq, "KPROBE ");
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 75ecf956a2df..a28e51e2dcee 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,30 +1,11 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__netlink bpf_iter__netlink___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__netlink
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-#define sk_rmem_alloc sk_backlog.rmem_alloc
-#define sk_refcnt __sk_common.skc_refcnt
-
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__netlink {
- struct bpf_iter_meta *meta;
- struct netlink_sock *sk;
-} __attribute__((preserve_access_index));
-
static __attribute__((noinline)) struct inode *SOCK_INODE(struct socket *socket)
{
return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
@@ -54,10 +35,10 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
if (!nlk->groups) {
group = 0;
} else {
- /* FIXME: temporary use bpf_probe_read here, needs
+ /* FIXME: temporary use bpf_probe_read_kernel here, needs
* verifier support to do direct access.
*/
- bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+ bpf_probe_read_kernel(&group, sizeof(group), &nlk->groups[0]);
}
BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
nlk->portid, (u32)group,
@@ -74,7 +55,7 @@ int dump_netlink(struct bpf_iter__netlink *ctx)
* with current verifier.
*/
inode = SOCK_INODE(sk);
- bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
}
BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
new file mode 100644
index 000000000000..ec7f91850dec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define bpf_tcp_sk(skc) ({ \
+ struct sock_common *_skc = skc; \
+ sk = NULL; \
+ tp = NULL; \
+ if (_skc) { \
+ tp = bpf_skc_to_tcp_sock(_skc); \
+ sk = (struct sock *)tp; \
+ } \
+ tp; \
+})
+
+unsigned short reuse_listen_hport = 0;
+unsigned short listen_hport = 0;
+char cubic_cc[TCP_CA_NAME_MAX] = "bpf_cubic";
+char dctcp_cc[TCP_CA_NAME_MAX] = "bpf_dctcp";
+bool random_retry = false;
+
+static bool tcp_cc_eq(const char *a, const char *b)
+{
+ int i;
+
+ for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+ if (a[i] != b[i])
+ return false;
+ if (!a[i])
+ break;
+ }
+
+ return true;
+}
+
+SEC("iter/tcp")
+int change_tcp_cc(struct bpf_iter__tcp *ctx)
+{
+ char cur_cc[TCP_CA_NAME_MAX];
+ struct tcp_sock *tp;
+ struct sock *sk;
+
+ if (!bpf_tcp_sk(ctx->sk_common))
+ return 0;
+
+ if (sk->sk_family != AF_INET6 ||
+ (sk->sk_state != TCP_LISTEN &&
+ sk->sk_state != TCP_ESTABLISHED) ||
+ (sk->sk_num != reuse_listen_hport &&
+ sk->sk_num != listen_hport &&
+ bpf_ntohs(sk->sk_dport) != listen_hport))
+ return 0;
+
+ if (bpf_getsockopt(tp, SOL_TCP, TCP_CONGESTION,
+ cur_cc, sizeof(cur_cc)))
+ return 0;
+
+ if (!tcp_cc_eq(cur_cc, cubic_cc))
+ return 0;
+
+ if (random_retry && bpf_get_prandom_u32() % 4 == 1)
+ return 1;
+
+ bpf_setsockopt(tp, SOL_TCP, TCP_CONGESTION, dctcp_cc, sizeof(dctcp_cc));
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
new file mode 100644
index 000000000000..eafc877ea460
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_setsockopt_unix.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+
+#define AUTOBIND_LEN 6
+char sun_path[AUTOBIND_LEN];
+
+#define NR_CASES 5
+int sndbuf_setsockopt[NR_CASES] = {-1, 0, 8192, INT_MAX / 2, INT_MAX};
+int sndbuf_getsockopt[NR_CASES] = {-1, -1, -1, -1, -1};
+int sndbuf_getsockopt_expected[NR_CASES];
+
+static inline int cmpname(struct unix_sock *unix_sk)
+{
+ int i;
+
+ for (i = 0; i < AUTOBIND_LEN; i++) {
+ if (unix_sk->addr->name->sun_path[i] != sun_path[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+SEC("iter/unix")
+int change_sndbuf(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ int i, err;
+
+ if (!unix_sk || !unix_sk->addr)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ if (cmpname(unix_sk))
+ return 0;
+
+ for (i = 0; i < NR_CASES; i++) {
+ err = bpf_setsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_setsockopt[i],
+ sizeof(sndbuf_setsockopt[i]));
+ if (err)
+ break;
+
+ err = bpf_getsockopt(unix_sk, SOL_SOCKET, SO_SNDBUF,
+ &sndbuf_getsockopt[i],
+ sizeof(sndbuf_getsockopt[i]));
+ if (err)
+ break;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
new file mode 100644
index 000000000000..f3af0e30cead
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_sockmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Cloudflare */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 64);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst SEC(".maps");
+
+__u32 elems = 0;
+__u32 socks = 0;
+
+SEC("iter/sockmap")
+int copy(struct bpf_iter__sockmap *ctx)
+{
+ struct sock *sk = ctx->sk;
+ __u32 tmp, *key = ctx->key;
+ int ret;
+
+ if (!key)
+ return 0;
+
+ elems++;
+
+ /* We need a temporary buffer on the stack, since the verifier doesn't
+ * let us use the pointer from the context as an argument to the helper.
+ */
+ tmp = *key;
+
+ if (sk) {
+ socks++;
+ return bpf_map_update_elem(&dst, &tmp, sk, 0) != 0;
+ }
+
+ ret = bpf_map_delete_elem(&dst, &tmp);
+ return ret && ret != -ENOENT;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
deleted file mode 100644
index ee754021f98e..000000000000
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ /dev/null
@@ -1,41 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
-
-char _license[] SEC("license") = "GPL";
-
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
-SEC("iter/task")
-int dump_task(struct bpf_iter__task *ctx)
-{
- struct seq_file *seq = ctx->meta->seq;
- struct task_struct *task = ctx->task;
-
- if (task == (void *)0) {
- BPF_SEQ_PRINTF(seq, " === END ===\n");
- return 0;
- }
-
- if (ctx->meta->seq_num == 0)
- BPF_SEQ_PRINTF(seq, " tgid gid\n");
-
- BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
new file mode 100644
index 000000000000..bca8b889cb10
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_btf.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+long tasks = 0;
+long seq_err = 0;
+bool skip = false;
+
+SEC("iter/task")
+int dump_task_struct(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static struct btf_ptr ptr = { };
+ long ret;
+
+#if __has_builtin(__builtin_btf_type_id)
+ ptr.type_id = bpf_core_type_id_kernel(struct task_struct);
+ ptr.ptr = task;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Raw BTF task\n");
+
+ ret = bpf_seq_printf_btf(seq, &ptr, sizeof(ptr), 0);
+ switch (ret) {
+ case 0:
+ tasks++;
+ break;
+ case -ERANGE:
+ /* NULL task or task->fs, don't count it as an error. */
+ break;
+ case -E2BIG:
+ return 1;
+ default:
+ seq_err = ret;
+ break;
+ }
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index 0f0ec3db20ba..b0255080662d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,42 +1,38 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task_file bpf_iter__task_file___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task_file
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task_file {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
- __u32 fd;
- struct file *file;
-} __attribute__((preserve_access_index));
+int count = 0;
+int tgid = 0;
+int last_tgid = 0;
+int unique_tgid_count = 0;
SEC("iter/task_file")
int dump_task_file(struct bpf_iter__task_file *ctx)
{
struct seq_file *seq = ctx->meta->seq;
struct task_struct *task = ctx->task;
- __u32 fd = ctx->fd;
struct file *file = ctx->file;
+ __u32 fd = ctx->fd;
if (task == (void *)0 || file == (void *)0)
return 0;
- if (ctx->meta->seq_num == 0)
+ if (ctx->meta->seq_num == 0) {
+ count = 0;
BPF_SEQ_PRINTF(seq, " tgid gid fd file\n");
+ }
+
+ if (tgid == task->tgid && task->tgid != task->pid)
+ count++;
+
+ if (last_tgid != task->tgid) {
+ last_tgid = task->tgid;
+ unique_tgid_count++;
+ }
BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
(long)file->f_op);
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
new file mode 100644
index 000000000000..442f4ca39fd7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_stack.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define MAX_STACK_TRACE_DEPTH 64
+unsigned long entries[MAX_STACK_TRACE_DEPTH] = {};
+#define SIZE_OF_ULONG (sizeof(unsigned long))
+
+SEC("iter/task")
+int dump_task_stack(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ long i, retlen;
+
+ if (task == (void *)0)
+ return 0;
+
+ retlen = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, 0);
+ if (retlen < 0)
+ return 0;
+
+ BPF_SEQ_PRINTF(seq, "pid: %8u num_entries: %8u\n", task->pid,
+ retlen / SIZE_OF_ULONG);
+ for (i = 0; i < MAX_STACK_TRACE_DEPTH; i++) {
+ if (retlen > i * SIZE_OF_ULONG)
+ BPF_SEQ_PRINTF(seq, "[<0>] %pB\n", (void *)entries[i]);
+ }
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
+
+int num_user_stacks = 0;
+
+SEC("iter/task")
+int get_task_user_stacks(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ uint64_t buf_sz = 0;
+ int64_t res;
+
+ if (task == (void *)0)
+ return 0;
+
+ res = bpf_get_task_stack(task, entries,
+ MAX_STACK_TRACE_DEPTH * SIZE_OF_ULONG, BPF_F_USER_STACK);
+ if (res <= 0)
+ return 0;
+
+ /* Only one task, the current one, should succeed */
+ ++num_user_stacks;
+
+ buf_sz += res;
+
+ /* If the verifier doesn't refine bpf_get_task_stack res, and instead
+ * assumes res is entirely unknown, this program will fail to load as
+ * the verifier will believe that max buf_sz value allows reading
+ * past the end of entries in bpf_seq_write call
+ */
+ bpf_seq_write(seq, &entries, buf_sz);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
new file mode 100644
index 000000000000..423b39e60b6f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_vmas.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Copied from mm.h */
+#define VM_READ 0x00000001
+#define VM_WRITE 0x00000002
+#define VM_EXEC 0x00000004
+#define VM_MAYSHARE 0x00000080
+
+/* Copied from kdev_t.h */
+#define MINORBITS 20
+#define MINORMASK ((1U << MINORBITS) - 1)
+#define MAJOR(dev) ((unsigned int) ((dev) >> MINORBITS))
+#define MINOR(dev) ((unsigned int) ((dev) & MINORMASK))
+
+#define D_PATH_BUF_SIZE 1024
+char d_path_buf[D_PATH_BUF_SIZE] = {};
+__u32 pid = 0;
+__u32 one_task = 0;
+__u32 one_task_error = 0;
+
+SEC("iter/task_vma") int proc_maps(struct bpf_iter__task_vma *ctx)
+{
+ struct vm_area_struct *vma = ctx->vma;
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ struct file *file;
+ char perm_str[] = "----";
+
+ if (task == (void *)0 || vma == (void *)0)
+ return 0;
+
+ file = vma->vm_file;
+ if (task->tgid != (pid_t)pid) {
+ if (one_task)
+ one_task_error = 1;
+ return 0;
+ }
+ perm_str[0] = (vma->vm_flags & VM_READ) ? 'r' : '-';
+ perm_str[1] = (vma->vm_flags & VM_WRITE) ? 'w' : '-';
+ perm_str[2] = (vma->vm_flags & VM_EXEC) ? 'x' : '-';
+ perm_str[3] = (vma->vm_flags & VM_MAYSHARE) ? 's' : 'p';
+ BPF_SEQ_PRINTF(seq, "%08llx-%08llx %s ", vma->vm_start, vma->vm_end, perm_str);
+
+ if (file) {
+ __u32 dev = file->f_inode->i_sb->s_dev;
+
+ bpf_d_path(&file->f_path, d_path_buf, D_PATH_BUF_SIZE);
+
+ BPF_SEQ_PRINTF(seq, "%08llx ", vma->vm_pgoff << 12);
+ BPF_SEQ_PRINTF(seq, "%02x:%02x %u", MAJOR(dev), MINOR(dev),
+ file->f_inode->i_ino);
+ BPF_SEQ_PRINTF(seq, "\t%s\n", d_path_buf);
+ } else {
+ BPF_SEQ_PRINTF(seq, "%08llx 00:00 0\n", 0ULL);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
new file mode 100644
index 000000000000..6cbb3393f243
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tasks.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+uint32_t tid = 0;
+int num_unknown_tid = 0;
+int num_known_tid = 0;
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static char info[] = " === END ===";
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ if (task->pid != (pid_t)tid)
+ num_unknown_tid++;
+ else
+ num_known_tid++;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+ return 0;
+}
+
+int num_expected_failure_copy_from_user_task = 0;
+int num_success_copy_from_user_task = 0;
+
+SEC("iter.s/task")
+int dump_task_sleepable(struct bpf_iter__task *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+ static const char info[] = " === END ===";
+ struct pt_regs *regs;
+ void *ptr;
+ uint32_t user_data = 0;
+ int ret;
+
+ if (task == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Read an invalid pointer and ensure we get an error */
+ ptr = NULL;
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ ++num_expected_failure_copy_from_user_task;
+ } else {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+
+ /* Try to read the contents of the task's instruction pointer from the
+ * remote task's address space.
+ */
+ regs = (struct pt_regs *)bpf_task_pt_regs(task);
+ if (regs == (void *)0) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ ptr = (void *)PT_REGS_IP(regs);
+
+ ret = bpf_copy_from_user_task(&user_data, sizeof(uint32_t), ptr, task, 0);
+ if (ret) {
+ BPF_SEQ_PRINTF(seq, "%s\n", info);
+ return 0;
+ }
+ ++num_success_copy_from_user_task;
+
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " tgid gid data\n");
+
+ BPF_SEQ_PRINTF(seq, "%8d %8d %8d\n", task->tgid, task->pid, user_data);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
new file mode 100644
index 000000000000..92267abb462f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp4.c
@@ -0,0 +1,233 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp_sock(struct seq_file *seq, struct tcp_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->rcv_nxt - tp->copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, dest, destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->write_seq - tp->snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ __u16 destp, srcp;
+ __be32 dest, src;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = tw->tw_daddr;
+ src = tw->tw_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X:%04X %08X:%04X ",
+ seq_num, irsk->ir_loc_addr,
+ irsk->ir_num, irsk->ir_rmt_addr,
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp4(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "rem_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET)
+ return 0;
+
+ tp = bpf_skc_to_tcp_sock(sk_common);
+ if (tp)
+ return dump_tcp_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
new file mode 100644
index 000000000000..943f7bba180e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_tcp6.c
@@ -0,0 +1,249 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static int hlist_unhashed_lockless(const struct hlist_node *h)
+{
+ return !(h->pprev);
+}
+
+static int timer_pending(const struct timer_list * timer)
+{
+ return !hlist_unhashed_lockless(&timer->entry);
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+#define USER_HZ 100
+#define NSEC_PER_SEC 1000000000ULL
+static clock_t jiffies_to_clock_t(unsigned long x)
+{
+ /* The implementation here tailored to a particular
+ * setting of USER_HZ.
+ */
+ u64 tick_nsec = (NSEC_PER_SEC + CONFIG_HZ/2) / CONFIG_HZ;
+ u64 user_hz_nsec = NSEC_PER_SEC / USER_HZ;
+
+ if ((tick_nsec % user_hz_nsec) == 0) {
+ if (CONFIG_HZ < USER_HZ)
+ return x * (USER_HZ / CONFIG_HZ);
+ else
+ return x / (CONFIG_HZ / USER_HZ);
+ }
+ return x * tick_nsec/user_hz_nsec;
+}
+
+static clock_t jiffies_delta_to_clock_t(long delta)
+{
+ if (delta <= 0)
+ return 0;
+
+ return jiffies_to_clock_t(delta);
+}
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+static bool
+inet_csk_in_pingpong_mode(const struct inet_connection_sock *icsk)
+{
+ return icsk->icsk_ack.pingpong >= TCP_PINGPONG_THRESH;
+}
+
+static bool tcp_in_initial_slowstart(const struct tcp_sock *tcp)
+{
+ return tcp->snd_ssthresh >= TCP_INFINITE_SSTHRESH;
+}
+
+static int dump_tcp6_sock(struct seq_file *seq, struct tcp6_sock *tp,
+ uid_t uid, __u32 seq_num)
+{
+ const struct inet_connection_sock *icsk;
+ const struct fastopen_queue *fastopenq;
+ const struct in6_addr *dest, *src;
+ const struct inet_sock *inet;
+ unsigned long timer_expires;
+ const struct sock *sp;
+ __u16 destp, srcp;
+ int timer_active;
+ int rx_queue;
+ int state;
+
+ icsk = &tp->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ sp = &inet->sk;
+ fastopenq = &icsk->icsk_accept_queue.fastopenq;
+
+ dest = &sp->sk_v6_daddr;
+ src = &sp->sk_v6_rcv_saddr;
+ destp = bpf_ntohs(inet->inet_dport);
+ srcp = bpf_ntohs(inet->inet_sport);
+
+ if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
+ icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
+ icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
+ timer_active = 1;
+ timer_expires = icsk->icsk_timeout;
+ } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
+ timer_active = 4;
+ timer_expires = icsk->icsk_timeout;
+ } else if (timer_pending(&sp->sk_timer)) {
+ timer_active = 2;
+ timer_expires = sp->sk_timer.expires;
+ } else {
+ timer_active = 0;
+ timer_expires = bpf_jiffies64();
+ }
+
+ state = sp->sk_state;
+ if (state == TCP_LISTEN) {
+ rx_queue = sp->sk_ack_backlog;
+ } else {
+ rx_queue = tp->tcp.rcv_nxt - tp->tcp.copied_seq;
+ if (rx_queue < 0)
+ rx_queue = 0;
+ }
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d ",
+ state,
+ tp->tcp.write_seq - tp->tcp.snd_una, rx_queue,
+ timer_active,
+ jiffies_delta_to_clock_t(timer_expires - bpf_jiffies64()),
+ icsk->icsk_retransmits, uid,
+ icsk->icsk_probes_out,
+ sock_i_ino(sp),
+ sp->sk_refcnt.refs.counter);
+ BPF_SEQ_PRINTF(seq, "%pK %lu %lu %u %u %d\n",
+ tp,
+ jiffies_to_clock_t(icsk->icsk_rto),
+ jiffies_to_clock_t(icsk->icsk_ack.ato),
+ (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(icsk),
+ tp->tcp.snd_cwnd,
+ state == TCP_LISTEN ? fastopenq->max_qlen
+ : (tcp_in_initial_slowstart(&tp->tcp) ? -1
+ : tp->tcp.snd_ssthresh)
+ );
+
+ return 0;
+}
+
+static int dump_tw_sock(struct seq_file *seq, struct tcp_timewait_sock *ttw,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_timewait_sock *tw = &ttw->tw_sk;
+ const struct in6_addr *dest, *src;
+ __u16 destp, srcp;
+ long delta;
+
+ delta = tw->tw_timer.expires - bpf_jiffies64();
+ dest = &tw->tw_v6_daddr;
+ src = &tw->tw_v6_rcv_saddr;
+ destp = bpf_ntohs(tw->tw_dport);
+ srcp = bpf_ntohs(tw->tw_sport);
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ tw->tw_substate, 0, 0,
+ 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
+ tw->tw_refcnt.refs.counter, tw);
+
+ return 0;
+}
+
+static int dump_req_sock(struct seq_file *seq, struct tcp_request_sock *treq,
+ uid_t uid, __u32 seq_num)
+{
+ struct inet_request_sock *irsk = &treq->req;
+ struct request_sock *req = &irsk->req;
+ struct in6_addr *src, *dest;
+ long ttd;
+
+ ttd = req->rsk_timer.expires - bpf_jiffies64();
+ src = &irsk->ir_v6_loc_addr;
+ dest = &irsk->ir_v6_rmt_addr;
+
+ if (ttd < 0)
+ ttd = 0;
+
+ BPF_SEQ_PRINTF(seq, "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ seq_num,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3],
+ irsk->ir_num,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3],
+ bpf_ntohs(irsk->ir_rmt_port));
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
+ TCP_SYN_RECV, 0, 0, 1, jiffies_to_clock_t(ttd),
+ req->num_timeout, uid, 0, 0, 0, req);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int dump_tcp6(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ struct seq_file *seq = ctx->meta->seq;
+ struct tcp_timewait_sock *tw;
+ struct tcp_request_sock *req;
+ struct tcp6_sock *tp;
+ uid_t uid = ctx->uid;
+ __u32 seq_num;
+
+ if (sk_common == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, " sl "
+ "local_address "
+ "remote_address "
+ "st tx_queue rx_queue tr tm->when retrnsmt"
+ " uid timeout inode\n");
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tp = bpf_skc_to_tcp6_sock(sk_common);
+ if (tp)
+ return dump_tcp6_sock(seq, tp, uid, seq_num);
+
+ tw = bpf_skc_to_tcp_timewait_sock(sk_common);
+ if (tw)
+ return dump_tw_sock(seq, tw, uid, seq_num);
+
+ req = bpf_skc_to_tcp_request_sock(sk_common);
+ if (req)
+ return dump_req_sock(seq, req, uid, seq_num);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 13c2c90c835f..2a4647f20c46 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,25 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index 0aa71b333cf3..dbf61c44acac 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,31 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2020 Facebook */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__bpf_map
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__bpf_map {
- struct bpf_iter_meta *meta;
- struct bpf_map *map;
-} __attribute__((preserve_access_index));
-
__u32 map1_id = 0, map2_id = 0;
__u32 map1_accessed = 0, map2_accessed = 0;
__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-static volatile const __u32 print_len;
-static volatile const __u32 ret1;
+volatile const __u32 print_len;
+volatile const __u32 ret1;
SEC("iter/bpf_map")
int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
@@ -60,7 +45,7 @@ int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
}
/* fill seq_file buffer */
- for (i = 0; i < print_len; i++)
+ for (i = 0; i < (int)print_len; i++)
bpf_seq_write(seq, &seq_num, sizeof(seq_num));
return ret;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
new file mode 100644
index 000000000000..e3a7575e81d2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern5.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct key_t {
+ int a;
+ int b;
+ int c;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+__u32 key_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *key = ctx->key;
+
+ if (key == (void *)0)
+ return 0;
+
+ /* out of bound access w.r.t. hashmap1 */
+ key_sum += *(__u32 *)(key + sizeof(struct key_t));
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
new file mode 100644
index 000000000000..1c7304f56b1e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern6.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 value_sum = 0;
+
+SEC("iter/bpf_map_elem")
+int dump_bpf_hash_map(struct bpf_iter__bpf_map_elem *ctx)
+{
+ void *value = ctx->value;
+
+ if (value == (void *)0)
+ return 0;
+
+ /* negative offset, verifier failure. */
+ value_sum += *(__u32 *)(value - 4);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index dee1339e6905..d5e3df66ad9a 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,27 +1,11 @@
/* SPDX-License-Identifier: GPL-2.0 */
/* Copyright (c) 2020 Facebook */
-/* "undefine" structs in vmlinux.h, because we "override" them below */
-#define bpf_iter_meta bpf_iter_meta___not_used
-#define bpf_iter__task bpf_iter__task___not_used
-#include "vmlinux.h"
-#undef bpf_iter_meta
-#undef bpf_iter__task
+#include "bpf_iter.h"
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
int count = 0;
-struct bpf_iter_meta {
- struct seq_file *seq;
- __u64 session_id;
- __u64 seq_num;
-} __attribute__((preserve_access_index));
-
-struct bpf_iter__task {
- struct bpf_iter_meta *meta;
- struct task_struct *task;
-} __attribute__((preserve_access_index));
-
SEC("iter/task")
int dump_task(struct bpf_iter__task *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
new file mode 100644
index 000000000000..cf0c485b1ed7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp4.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp4(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __be32 dest, src;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq,
+ " sl local_address rem_address st tx_queue "
+ "rx_queue tr tm->when retrnsmt uid timeout "
+ "inode ref pointer drops\n");
+
+ /* filter out udp6 sockets */
+ inet = &udp_sk->inet;
+ if (inet->sk.sk_family == AF_INET6)
+ return 0;
+
+ inet = &udp_sk->inet;
+ dest = inet->inet_daddr;
+ src = inet->inet_rcv_saddr;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X:%04X %08X:%04X ",
+ ctx->bucket, src, srcp, dest, destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
new file mode 100644
index 000000000000..5031e21c433f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_udp6.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define IPV6_SEQ_DGRAM_HEADER \
+ " sl " \
+ "local_address " \
+ "remote_address " \
+ "st tx_queue rx_queue tr tm->when retrnsmt" \
+ " uid timeout inode ref pointer drops\n"
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/udp")
+int dump_udp6(struct bpf_iter__udp *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ const struct in6_addr *dest, *src;
+ struct udp6_sock *udp6_sk;
+ struct inet_sock *inet;
+ __u16 srcp, destp;
+ __u32 seq_num;
+ int rqueue;
+
+ if (udp_sk == (void *)0)
+ return 0;
+
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, IPV6_SEQ_DGRAM_HEADER);
+
+ udp6_sk = bpf_skc_to_udp6_sock(udp_sk);
+ if (udp6_sk == (void *)0)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = bpf_ntohs(inet->inet_sport);
+ destp = bpf_ntohs(inet->inet_dport);
+ rqueue = inet->sk.sk_rmem_alloc.counter - udp_sk->forward_deficit;
+ dest = &inet->sk.sk_v6_daddr;
+ src = &inet->sk.sk_v6_rcv_saddr;
+
+ BPF_SEQ_PRINTF(seq, "%5d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X ",
+ ctx->bucket,
+ src->s6_addr32[0], src->s6_addr32[1],
+ src->s6_addr32[2], src->s6_addr32[3], srcp,
+ dest->s6_addr32[0], dest->s6_addr32[1],
+ dest->s6_addr32[2], dest->s6_addr32[3], destp);
+
+ BPF_SEQ_PRINTF(seq, "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %u\n",
+ inet->sk.sk_state,
+ inet->sk.sk_wmem_alloc.refs.counter - 1,
+ rqueue,
+ 0, 0L, 0, ctx->uid, 0,
+ sock_i_ino(&inet->sk),
+ inet->sk.sk_refcnt.refs.counter, udp_sk,
+ inet->sk.sk_drops.counter);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_unix.c b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
new file mode 100644
index 000000000000..e6aefae38894
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_unix.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#include "bpf_iter.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+static long sock_i_ino(const struct sock *sk)
+{
+ const struct socket *sk_socket = sk->sk_socket;
+ const struct inode *inode;
+ unsigned long ino;
+
+ if (!sk_socket)
+ return 0;
+
+ inode = &container_of(sk_socket, struct socket_alloc, socket)->vfs_inode;
+ bpf_probe_read_kernel(&ino, sizeof(ino), &inode->i_ino);
+ return ino;
+}
+
+SEC("iter/unix")
+int dump_unix(struct bpf_iter__unix *ctx)
+{
+ struct unix_sock *unix_sk = ctx->unix_sk;
+ struct sock *sk = (struct sock *)unix_sk;
+ struct seq_file *seq;
+ __u32 seq_num;
+
+ if (!unix_sk)
+ return 0;
+
+ seq = ctx->meta->seq;
+ seq_num = ctx->meta->seq_num;
+ if (seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "Num RefCount Protocol Flags Type St Inode Path\n");
+
+ BPF_SEQ_PRINTF(seq, "%pK: %08X %08X %08X %04X %02X %8lu",
+ unix_sk,
+ sk->sk_refcnt.refs.counter,
+ 0,
+ sk->sk_state == TCP_LISTEN ? __SO_ACCEPTCON : 0,
+ sk->sk_type,
+ sk->sk_socket ?
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTED : SS_UNCONNECTED) :
+ (sk->sk_state == TCP_ESTABLISHED ? SS_CONNECTING : SS_DISCONNECTING),
+ sock_i_ino(sk));
+
+ if (unix_sk->addr) {
+ if (unix_sk->addr->name->sun_path[0]) {
+ BPF_SEQ_PRINTF(seq, " %s", unix_sk->addr->name->sun_path);
+ } else {
+ /* The name of the abstract UNIX domain socket starts
+ * with '\0' and can contain '\0'. The null bytes
+ * should be escaped as done in unix_seq_show().
+ */
+ __u64 i, len;
+
+ len = unix_sk->addr->len - sizeof(short);
+
+ BPF_SEQ_PRINTF(seq, " @");
+
+ for (i = 1; i < len; i++) {
+ /* unix_validate_addr() tests this upper bound. */
+ if (i >= sizeof(struct sockaddr_un))
+ break;
+
+ BPF_SEQ_PRINTF(seq, "%c",
+ unix_sk->addr->name->sun_path[i] ?:
+ '@');
+ }
+ }
+ }
+
+ BPF_SEQ_PRINTF(seq, "\n");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
new file mode 100644
index 000000000000..ee7455d2623a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_vma_offset.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 unique_tgid_cnt = 0;
+uintptr_t address = 0;
+uintptr_t offset = 0;
+__u32 last_tgid = 0;
+__u32 pid = 0;
+__u32 page_shift = 0;
+
+SEC("iter/task_vma")
+int get_vma_offset(struct bpf_iter__task_vma *ctx)
+{
+ struct vm_area_struct *vma = ctx->vma;
+ struct seq_file *seq = ctx->meta->seq;
+ struct task_struct *task = ctx->task;
+
+ if (task == NULL || vma == NULL)
+ return 0;
+
+ if (last_tgid != task->tgid)
+ unique_tgid_cnt++;
+ last_tgid = task->tgid;
+
+ if (task->tgid != pid)
+ return 0;
+
+ if (vma->vm_start <= address && vma->vm_end > address) {
+ offset = address - vma->vm_start + (vma->vm_pgoff << page_shift);
+ BPF_SEQ_PRINTF(seq, "OK\n");
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop.c b/tools/testing/selftests/bpf/progs/bpf_loop.c
new file mode 100644
index 000000000000..1d194455b109
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int output;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 32);
+ __type(key, int);
+ __type(value, int);
+} map1 SEC(".maps");
+
+/* These should be set by the user program */
+u32 nested_callback_nr_loops;
+u32 stop_index = -1;
+u32 nr_loops;
+int pid;
+int callback_selector;
+
+/* Making these global variables so that the userspace program
+ * can verify the output through the skeleton
+ */
+int nr_loops_returned;
+int g_output;
+int err;
+
+static int callback(__u32 index, void *data)
+{
+ struct callback_ctx *ctx = data;
+
+ if (index >= stop_index)
+ return 1;
+
+ ctx->output += index;
+
+ return 0;
+}
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+static int nested_callback2(__u32 index, void *data)
+{
+ nr_loops_returned += bpf_loop(nested_callback_nr_loops, callback, data, 0);
+
+ return 0;
+}
+
+static int nested_callback1(__u32 index, void *data)
+{
+ bpf_loop(nested_callback_nr_loops, nested_callback2, data, 0);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int test_prog(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, callback, &data, 0);
+
+ if (nr_loops_returned < 0)
+ err = nr_loops_returned;
+ else
+ g_output = data.output;
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_null_ctx(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = bpf_loop(nr_loops, empty_callback, NULL, 0);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_invalid_flags(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_loop(nr_loops, callback, &data, 1);
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_nested_calls(void *ctx)
+{
+ struct callback_ctx data = {};
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ nr_loops_returned = 0;
+ bpf_loop(nr_loops, nested_callback1, &data, 0);
+
+ g_output = data.output;
+
+ return 0;
+}
+
+static int callback_set_f0(int i, void *ctx)
+{
+ g_output = 0xF0;
+ return 0;
+}
+
+static int callback_set_0f(int i, void *ctx)
+{
+ g_output = 0x0F;
+ return 0;
+}
+
+/*
+ * non-constant callback is a corner case for bpf_loop inline logic
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int prog_non_constant_callback(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int (*callback)(int i, void *ctx);
+
+ g_output = 0;
+
+ if (callback_selector == 0x0F)
+ callback = callback_set_0f;
+ else
+ callback = callback_set_f0;
+
+ bpf_loop(1, callback, NULL, 0);
+
+ return 0;
+}
+
+static int stack_check_inner_callback(void *ctx)
+{
+ return 0;
+}
+
+static int map1_lookup_elem(int key)
+{
+ int *val = bpf_map_lookup_elem(&map1, &key);
+
+ return val ? *val : -1;
+}
+
+static void map1_update_elem(int key, int val)
+{
+ bpf_map_update_elem(&map1, &key, &val, BPF_ANY);
+}
+
+static int stack_check_outer_callback(void *ctx)
+{
+ int a = map1_lookup_elem(1);
+ int b = map1_lookup_elem(2);
+ int c = map1_lookup_elem(3);
+ int d = map1_lookup_elem(4);
+ int e = map1_lookup_elem(5);
+ int f = map1_lookup_elem(6);
+
+ bpf_loop(1, stack_check_inner_callback, NULL, 0);
+
+ map1_update_elem(1, a + 1);
+ map1_update_elem(2, b + 1);
+ map1_update_elem(3, c + 1);
+ map1_update_elem(4, d + 1);
+ map1_update_elem(5, e + 1);
+ map1_update_elem(6, f + 1);
+
+ return 0;
+}
+
+/* Some of the local variables in stack_check and
+ * stack_check_outer_callback would be allocated on stack by
+ * compiler. This test should verify that stack content for these
+ * variables is preserved between calls to bpf_loop (might be an issue
+ * if loop inlining allocates stack slots incorrectly).
+ */
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int stack_check(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ int a = map1_lookup_elem(7);
+ int b = map1_lookup_elem(8);
+ int c = map1_lookup_elem(9);
+ int d = map1_lookup_elem(10);
+ int e = map1_lookup_elem(11);
+ int f = map1_lookup_elem(12);
+
+ bpf_loop(1, stack_check_outer_callback, NULL, 0);
+
+ map1_update_elem(7, a + 1);
+ map1_update_elem(8, b + 1);
+ map1_update_elem(9, c + 1);
+ map1_update_elem(10, d + 1);
+ map1_update_elem(11, e + 1);
+ map1_update_elem(12, f + 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_loop_bench.c b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
new file mode 100644
index 000000000000..d461746fd3c1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_loop_bench.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+u32 nr_loops;
+long hits;
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+static int outer_loop(__u32 index, void *data)
+{
+ bpf_loop(nr_loops, empty_callback, NULL, 0);
+ __sync_add_and_fetch(&hits, nr_loops);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int benchmark(void *ctx)
+{
+ bpf_loop(1000, outer_loop, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
new file mode 100644
index 000000000000..fb2f5513e29e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -0,0 +1,138 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __BPF_MISC_H__
+#define __BPF_MISC_H__
+
+/* This set of attributes controls behavior of the
+ * test_loader.c:test_loader__run_subtests().
+ *
+ * The test_loader sequentially loads each program in a skeleton.
+ * Programs could be loaded in privileged and unprivileged modes.
+ * - __success, __failure, __msg imply privileged mode;
+ * - __success_unpriv, __failure_unpriv, __msg_unpriv imply
+ * unprivileged mode.
+ * If combination of privileged and unprivileged attributes is present
+ * both modes are used. If none are present privileged mode is implied.
+ *
+ * See test_loader.c:drop_capabilities() for exact set of capabilities
+ * that differ between privileged and unprivileged modes.
+ *
+ * For test filtering purposes the name of the program loaded in
+ * unprivileged mode is derived from the usual program name by adding
+ * `@unpriv' suffix.
+ *
+ * __msg Message expected to be found in the verifier log.
+ * Multiple __msg attributes could be specified.
+ * __msg_unpriv Same as __msg but for unprivileged mode.
+ *
+ * __success Expect program load success in privileged mode.
+ * __success_unpriv Expect program load success in unprivileged mode.
+ *
+ * __failure Expect program load failure in privileged mode.
+ * __failure_unpriv Expect program load failure in unprivileged mode.
+ *
+ * __retval Execute the program using BPF_PROG_TEST_RUN command,
+ * expect return value to match passed parameter:
+ * - a decimal number
+ * - a hexadecimal number, when starts from 0x
+ * - literal INT_MIN
+ * - literal POINTER_VALUE (see definition below)
+ * - literal TEST_DATA_LEN (see definition below)
+ * __retval_unpriv Same, but load program in unprivileged mode.
+ *
+ * __description Text to be used instead of a program name for display
+ * and filtering purposes.
+ *
+ * __log_level Log level to use for the program, numeric value expected.
+ *
+ * __flag Adds one flag use for the program, the following values are valid:
+ * - BPF_F_STRICT_ALIGNMENT;
+ * - BPF_F_TEST_RND_HI32;
+ * - BPF_F_TEST_STATE_FREQ;
+ * - BPF_F_SLEEPABLE;
+ * - BPF_F_XDP_HAS_FRAGS;
+ * - A numeric value.
+ * Multiple __flag attributes could be specified, the final flags
+ * value is derived by applying binary "or" to all specified values.
+ *
+ * __auxiliary Annotated program is not a separate test, but used as auxiliary
+ * for some other test cases and should always be loaded.
+ * __auxiliary_unpriv Same, but load program in unprivileged mode.
+ */
+#define __msg(msg) __attribute__((btf_decl_tag("comment:test_expect_msg=" msg)))
+#define __failure __attribute__((btf_decl_tag("comment:test_expect_failure")))
+#define __success __attribute__((btf_decl_tag("comment:test_expect_success")))
+#define __description(desc) __attribute__((btf_decl_tag("comment:test_description=" desc)))
+#define __msg_unpriv(msg) __attribute__((btf_decl_tag("comment:test_expect_msg_unpriv=" msg)))
+#define __failure_unpriv __attribute__((btf_decl_tag("comment:test_expect_failure_unpriv")))
+#define __success_unpriv __attribute__((btf_decl_tag("comment:test_expect_success_unpriv")))
+#define __log_level(lvl) __attribute__((btf_decl_tag("comment:test_log_level="#lvl)))
+#define __flag(flag) __attribute__((btf_decl_tag("comment:test_prog_flags="#flag)))
+#define __retval(val) __attribute__((btf_decl_tag("comment:test_retval="#val)))
+#define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val)))
+#define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary")))
+#define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv")))
+#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path)))
+
+/* Convenience macro for use with 'asm volatile' blocks */
+#define __naked __attribute__((naked))
+#define __clobber_all "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", "r8", "r9", "memory"
+#define __clobber_common "r0", "r1", "r2", "r3", "r4", "r5", "memory"
+#define __imm(name) [name]"i"(name)
+#define __imm_const(name, expr) [name]"i"(expr)
+#define __imm_addr(name) [name]"i"(&name)
+#define __imm_ptr(name) [name]"r"(&name)
+#define __imm_insn(name, expr) [name]"i"(*(long *)&(expr))
+
+/* Magic constants used with __retval() */
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
+
+#ifndef __used
+#define __used __attribute__((used))
+#endif
+
+#if defined(__TARGET_ARCH_x86)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__x64_"
+#elif defined(__TARGET_ARCH_s390)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__s390x_"
+#elif defined(__TARGET_ARCH_arm64)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__arm64_"
+#elif defined(__TARGET_ARCH_riscv)
+#define SYSCALL_WRAPPER 1
+#define SYS_PREFIX "__riscv_"
+#else
+#define SYSCALL_WRAPPER 0
+#define SYS_PREFIX "__se_"
+#endif
+
+/* How many arguments are passed to function in register */
+#if defined(__TARGET_ARCH_x86) || defined(__x86_64__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__i386__)
+#define FUNC_REG_ARG_CNT 3
+#elif defined(__TARGET_ARCH_s390) || defined(__s390x__)
+#define FUNC_REG_ARG_CNT 5
+#elif defined(__TARGET_ARCH_arm) || defined(__arm__)
+#define FUNC_REG_ARG_CNT 4
+#elif defined(__TARGET_ARCH_arm64) || defined(__aarch64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_mips) || defined(__mips__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_powerpc) || defined(__powerpc__) || defined(__powerpc64__)
+#define FUNC_REG_ARG_CNT 8
+#elif defined(__TARGET_ARCH_sparc) || defined(__sparc__)
+#define FUNC_REG_ARG_CNT 6
+#elif defined(__TARGET_ARCH_riscv) || defined(__riscv__)
+#define FUNC_REG_ARG_CNT 8
+#else
+/* default to 5 for others */
+#define FUNC_REG_ARG_CNT 5
+#endif
+
+/* make it look to compiler like value is read and written */
+#define __sink(expr) asm volatile("" : "+g"(expr))
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_mod_race.c b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
new file mode 100644
index 000000000000..82a5c6c6ba83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_mod_race.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile struct {
+ /* thread to activate trace programs for */
+ pid_t tgid;
+ /* return error from __init function */
+ int inject_error;
+ /* uffd monitored range start address */
+ void *fault_addr;
+} bpf_mod_race_config = { -1 };
+
+int bpf_blocking = 0;
+int res_try_get_module = -1;
+
+static __always_inline bool check_thread_id(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+
+ return task->tgid == bpf_mod_race_config.tgid;
+}
+
+/* The trace of execution is something like this:
+ *
+ * finit_module()
+ * load_module()
+ * prepare_coming_module()
+ * notifier_call(MODULE_STATE_COMING)
+ * btf_parse_module()
+ * btf_alloc_id() // Visible to userspace at this point
+ * list_add(btf_mod->list, &btf_modules)
+ * do_init_module()
+ * freeinit = kmalloc()
+ * ret = mod->init()
+ * bpf_prog_widen_race()
+ * bpf_copy_from_user()
+ * ...<sleep>...
+ * if (ret < 0)
+ * ...
+ * free_module()
+ * return ret
+ *
+ * At this point, module loading thread is blocked, we now load the program:
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module_live == false
+ * return -ENXIO
+ *
+ * Without the fix (try_get_module_live in btf_try_get_module):
+ *
+ * bpf_check
+ * add_kfunc_call/check_pseudo_btf_id
+ * btf_try_get_module
+ * try_get_module == true
+ * <store module reference in btf_kfunc_tab or used_btf array>
+ * ...
+ * return fd
+ *
+ * Now, if we inject an error in the blocked program, our module will be freed
+ * (going straight from MODULE_STATE_COMING to MODULE_STATE_GOING).
+ * Later, when bpf program is freed, it will try to module_put already freed
+ * module. This is why try_get_module_live returns false if mod->state is not
+ * MODULE_STATE_LIVE.
+ */
+
+SEC("fmod_ret.s/bpf_fentry_test1")
+int BPF_PROG(widen_race, int a, int ret)
+{
+ char dst;
+
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we will attempt to block */
+ bpf_blocking = 1;
+ bpf_copy_from_user(&dst, 1, bpf_mod_race_config.fault_addr);
+ return bpf_mod_race_config.inject_error;
+}
+
+SEC("fexit/do_init_module")
+int BPF_PROG(fexit_init_module, struct module *mod, int ret)
+{
+ if (!check_thread_id())
+ return 0;
+ /* Indicate that we finished blocking */
+ bpf_blocking = 2;
+ return 0;
+}
+
+SEC("fexit/btf_try_get_module")
+int BPF_PROG(fexit_module_get, const struct btf *btf, struct module *mod)
+{
+ res_try_get_module = !!mod;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
new file mode 100644
index 000000000000..1a476d8ed354
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_syscall_macro.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2022 Sony Group Corporation */
+#include <vmlinux.h>
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int arg1 = 0;
+unsigned long arg2 = 0;
+unsigned long arg3 = 0;
+unsigned long arg4_cx = 0;
+unsigned long arg4 = 0;
+unsigned long arg5 = 0;
+
+int arg1_core = 0;
+unsigned long arg2_core = 0;
+unsigned long arg3_core = 0;
+unsigned long arg4_core_cx = 0;
+unsigned long arg4_core = 0;
+unsigned long arg5_core = 0;
+
+int option_syscall = 0;
+unsigned long arg2_syscall = 0;
+unsigned long arg3_syscall = 0;
+unsigned long arg4_syscall = 0;
+unsigned long arg5_syscall = 0;
+
+const volatile pid_t filter_pid = 0;
+
+SEC("kprobe/" SYS_PREFIX "sys_prctl")
+int BPF_KPROBE(handle_sys_prctl)
+{
+ struct pt_regs *real_regs;
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ unsigned long tmp = 0;
+
+ if (pid != filter_pid)
+ return 0;
+
+ real_regs = PT_REGS_SYSCALL_REGS(ctx);
+
+ /* test for PT_REGS_PARM */
+
+#if !defined(bpf_target_arm64) && !defined(bpf_target_s390)
+ bpf_probe_read_kernel(&tmp, sizeof(tmp), &PT_REGS_PARM1_SYSCALL(real_regs));
+#endif
+ arg1 = tmp;
+ bpf_probe_read_kernel(&arg2, sizeof(arg2), &PT_REGS_PARM2_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg3, sizeof(arg3), &PT_REGS_PARM3_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg4_cx, sizeof(arg4_cx), &PT_REGS_PARM4(real_regs));
+ bpf_probe_read_kernel(&arg4, sizeof(arg4), &PT_REGS_PARM4_SYSCALL(real_regs));
+ bpf_probe_read_kernel(&arg5, sizeof(arg5), &PT_REGS_PARM5_SYSCALL(real_regs));
+
+ /* test for the CORE variant of PT_REGS_PARM */
+ arg1_core = PT_REGS_PARM1_CORE_SYSCALL(real_regs);
+ arg2_core = PT_REGS_PARM2_CORE_SYSCALL(real_regs);
+ arg3_core = PT_REGS_PARM3_CORE_SYSCALL(real_regs);
+ arg4_core_cx = PT_REGS_PARM4_CORE(real_regs);
+ arg4_core = PT_REGS_PARM4_CORE_SYSCALL(real_regs);
+ arg5_core = PT_REGS_PARM5_CORE_SYSCALL(real_regs);
+
+ return 0;
+}
+
+SEC("ksyscall/prctl")
+int BPF_KSYSCALL(prctl_enter, int option, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4, unsigned long arg5)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != filter_pid)
+ return 0;
+
+ option_syscall = option;
+ arg2_syscall = arg2;
+ arg3_syscall = arg3;
+ arg4_syscall = arg4;
+ arg5_syscall = arg5;
+ return 0;
+}
+
+__u64 splice_fd_in;
+__u64 splice_off_in;
+__u64 splice_fd_out;
+__u64 splice_off_out;
+__u64 splice_len;
+__u64 splice_flags;
+
+SEC("ksyscall/splice")
+int BPF_KSYSCALL(splice_enter, int fd_in, loff_t *off_in, int fd_out,
+ loff_t *off_out, size_t len, unsigned int flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != filter_pid)
+ return 0;
+
+ splice_fd_in = fd_in;
+ splice_off_in = (__u64)off_in;
+ splice_fd_out = fd_out;
+ splice_off_out = (__u64)off_out;
+ splice_len = len;
+ splice_flags = flags;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
new file mode 100644
index 000000000000..2ecd833dcd41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tcp_nogpl.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "X";
+
+void BPF_STRUCT_OPS(nogpltcp_init, struct sock *sk)
+{
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops bpf_nogpltcp = {
+ .init = (void *)nogpltcp_init,
+ .name = "bpf_nogpltcp",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
new file mode 100644
index 000000000000..7001965d1cc3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -0,0 +1,122 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+#ifndef __BPF_TRACING_NET_H__
+#define __BPF_TRACING_NET_H__
+
+#define AF_INET 2
+#define AF_INET6 10
+
+#define SOL_SOCKET 1
+#define SO_REUSEADDR 2
+#define SO_SNDBUF 7
+#define SO_RCVBUF 8
+#define SO_KEEPALIVE 9
+#define SO_PRIORITY 12
+#define SO_REUSEPORT 15
+#define SO_RCVLOWAT 18
+#define SO_BINDTODEVICE 25
+#define SO_MARK 36
+#define SO_MAX_PACING_RATE 47
+#define SO_BINDTOIFINDEX 62
+#define SO_TXREHASH 74
+#define __SO_ACCEPTCON (1 << 16)
+
+#define IP_TOS 1
+
+#define IPV6_TCLASS 67
+#define IPV6_AUTOFLOWLABEL 70
+
+#define TC_ACT_UNSPEC (-1)
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define SOL_TCP 6
+#define TCP_NODELAY 1
+#define TCP_MAXSEG 2
+#define TCP_KEEPIDLE 4
+#define TCP_KEEPINTVL 5
+#define TCP_KEEPCNT 6
+#define TCP_SYNCNT 7
+#define TCP_WINDOW_CLAMP 10
+#define TCP_CONGESTION 13
+#define TCP_THIN_LINEAR_TIMEOUTS 16
+#define TCP_USER_TIMEOUT 18
+#define TCP_NOTSENT_LOWAT 25
+#define TCP_SAVE_SYN 27
+#define TCP_SAVED_SYN 28
+#define TCP_CA_NAME_MAX 16
+#define TCP_NAGLE_OFF 1
+
+#define ICSK_TIME_RETRANS 1
+#define ICSK_TIME_PROBE0 3
+#define ICSK_TIME_LOSS_PROBE 5
+#define ICSK_TIME_REO_TIMEOUT 6
+
+#define ETH_ALEN 6
+#define ETH_HLEN 14
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_TIMESTAMP 8
+#define TCPOPT_SACK_PERM 4
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_TIMESTAMP 10
+#define TCPOLEN_SACK_PERM 2
+
+#define CHECKSUM_NONE 0
+#define CHECKSUM_PARTIAL 3
+
+#define IFNAMSIZ 16
+
+#define RTF_GATEWAY 0x0002
+
+#define TCP_INFINITE_SSTHRESH 0x7fffffff
+#define TCP_PINGPONG_THRESH 3
+
+#define fib_nh_dev nh_common.nhc_dev
+#define fib_nh_gw_family nh_common.nhc_gw_family
+#define fib_nh_gw6 nh_common.nhc_gw.ipv6
+
+#define inet_daddr sk.__sk_common.skc_daddr
+#define inet_rcv_saddr sk.__sk_common.skc_rcv_saddr
+#define inet_dport sk.__sk_common.skc_dport
+
+#define udp_portaddr_hash inet.sk.__sk_common.skc_u16hashes[1]
+
+#define ir_loc_addr req.__req_common.skc_rcv_saddr
+#define ir_num req.__req_common.skc_num
+#define ir_rmt_addr req.__req_common.skc_daddr
+#define ir_rmt_port req.__req_common.skc_dport
+#define ir_v6_rmt_addr req.__req_common.skc_v6_daddr
+#define ir_v6_loc_addr req.__req_common.skc_v6_rcv_saddr
+
+#define sk_num __sk_common.skc_num
+#define sk_dport __sk_common.skc_dport
+#define sk_family __sk_common.skc_family
+#define sk_rmem_alloc sk_backlog.rmem_alloc
+#define sk_refcnt __sk_common.skc_refcnt
+#define sk_state __sk_common.skc_state
+#define sk_net __sk_common.skc_net
+#define sk_v6_daddr __sk_common.skc_v6_daddr
+#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
+#define sk_flags __sk_common.skc_flags
+#define sk_reuse __sk_common.skc_reuse
+#define sk_cookie __sk_common.skc_cookie
+
+#define s6_addr32 in6_u.u6_addr32
+
+#define tw_daddr __tw_common.skc_daddr
+#define tw_rcv_saddr __tw_common.skc_rcv_saddr
+#define tw_dport __tw_common.skc_dport
+#define tw_refcnt __tw_common.skc_refcnt
+#define tw_v6_daddr __tw_common.skc_v6_daddr
+#define tw_v6_rcv_saddr __tw_common.skc_v6_rcv_saddr
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bprm_opts.c b/tools/testing/selftests/bpf/progs/bprm_opts.c
new file mode 100644
index 000000000000..418d9c6d4952
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bprm_opts.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} secure_exec_task_map SEC(".maps");
+
+SEC("lsm/bprm_creds_for_exec")
+int BPF_PROG(secure_exec, struct linux_binprm *bprm)
+{
+ int *secureexec;
+
+ secureexec = bpf_task_storage_get(&secure_exec_task_map,
+ bpf_get_current_task_btf(), 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+ if (secureexec && *secureexec)
+ bpf_bprm_opts_set(bprm, BPF_F_BPRM_SECUREEXEC);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
new file mode 100644
index 000000000000..888e79db6a77
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
new file mode 100644
index 000000000000..194749130d87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
new file mode 100644
index 000000000000..3d732d4193e4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
new file mode 100644
index 000000000000..17cf5d6a848d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enum64val___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enum64val___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
new file mode 100644
index 000000000000..48e62f3f074f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
new file mode 100644
index 000000000000..53e5e5a76888
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
new file mode 100644
index 000000000000..d024fb2ac06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___err_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___err_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
new file mode 100644
index 000000000000..9de6595d250c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_enumval___val3_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_enumval___val3_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
deleted file mode 100644
index dd0ffa518f36..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
deleted file mode 100644
index bc83372088ad..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_arr_value_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_arr_value_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
deleted file mode 100644
index 917bec41be08..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_kind.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_kind x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
deleted file mode 100644
index 6ec7e6ec1c91..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_sz.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
deleted file mode 100644
index 7bbcacf2b0d1..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_int_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_int_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
deleted file mode 100644
index f384dd38ec70..000000000000
--- a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___err_wrong_struct_type.c
+++ /dev/null
@@ -1,3 +0,0 @@
-#include "core_reloc_types.h"
-
-void f(struct core_reloc_existence___err_wrong_struct_type x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
new file mode 100644
index 000000000000..d14b496190c3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_existence___wrong_field_defs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_existence___wrong_field_defs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
new file mode 100644
index 000000000000..3824345d82ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___diff_offs.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___diff_offs x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
new file mode 100644
index 000000000000..f3e9904df9c2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_size___err_ambiguous.c
@@ -0,0 +1,4 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_size___err_ambiguous1 x,
+ struct core_reloc_size___err_ambiguous2 y) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
new file mode 100644
index 000000000000..fc3f69e58c71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
new file mode 100644
index 000000000000..51511648b4ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___all_missing.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___all_missing x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
new file mode 100644
index 000000000000..57ae2c258928
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
new file mode 100644
index 000000000000..67db3dceb279
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___diff_sz.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___diff_sz x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
new file mode 100644
index 000000000000..b357fc65431d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___fn_wrong_args.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___fn_wrong_args x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
new file mode 100644
index 000000000000..8ddf20d33d9e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_based___incompat.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_based___incompat x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
new file mode 100644
index 000000000000..abbe5bddcefd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
new file mode 100644
index 000000000000..24e7caf4f013
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_type_id___missing_targets.c
@@ -0,0 +1,3 @@
+#include "core_reloc_types.h"
+
+void f(struct core_reloc_type_id___missing_targets x) {}
diff --git a/tools/testing/selftests/bpf/progs/btf_data.c b/tools/testing/selftests/bpf/progs/btf_data.c
new file mode 100644
index 000000000000..baa525275bde
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_data.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+struct S {
+ int a;
+ int b;
+ int c;
+};
+
+union U {
+ int a;
+ int b;
+ int c;
+};
+
+struct S1 {
+ int a;
+ int b;
+ int c;
+};
+
+union U1 {
+ int a;
+ int b;
+ int c;
+};
+
+typedef int T;
+typedef int S;
+typedef int U;
+typedef int T1;
+typedef int S1;
+typedef int U1;
+
+struct root_struct {
+ S m_1;
+ T m_2;
+ U m_3;
+ S1 m_4;
+ T1 m_5;
+ U1 m_6;
+ struct S m_7;
+ struct S1 m_8;
+ union U m_9;
+ union U1 m_10;
+};
+
+int func(struct root_struct *root)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
index 8f44767a75fa..e01690618e1e 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_bitfields.c
@@ -11,7 +11,7 @@
/*
*struct bitfields_only_mixed_types {
* int a: 3;
- * long int b: 2;
+ * long b: 2;
* _Bool c: 1;
* enum {
* A = 0,
@@ -27,7 +27,7 @@
struct bitfields_only_mixed_types {
int a: 3;
- long int b: 2;
+ long b: 2;
bool c: 1; /* it's really a _Bool type */
enum {
A, /* A = 0, dumper is very explicit */
@@ -44,8 +44,8 @@ struct bitfields_only_mixed_types {
* char: 4;
* int a: 4;
* short b;
- * long int c;
- * long int d: 8;
+ * long c;
+ * long d: 8;
* int e;
* int f;
*};
@@ -53,7 +53,7 @@ struct bitfields_only_mixed_types {
*/
/* ------ END-EXPECTED-OUTPUT ------ */
struct bitfield_mixed_with_others {
- long: 4; /* char is enough as a backing field */
+ char: 4; /* char is enough as a backing field */
int a: 4;
/* 8-bit implicit padding */
short b; /* combined with previous bitfield */
@@ -71,7 +71,7 @@ struct bitfield_mixed_with_others {
*struct bitfield_flushed {
* int a: 4;
* long: 60;
- * long int b: 16;
+ * long b: 16;
*};
*
*/
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
index 1cef3bec1dc7..7998f27df7dd 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_packing.c
@@ -29,7 +29,7 @@ struct non_packed_fields {
struct nested_packed {
char: 4;
int a: 4;
- long int b;
+ long b;
struct {
char c;
int d;
@@ -44,7 +44,7 @@ union union_is_never_packed {
union union_does_not_need_packing {
struct {
- long int a;
+ long a;
int b;
} __attribute__((packed));
int c;
@@ -58,7 +58,81 @@ union jump_code_union {
} __attribute__((packed));
};
-/*------ END-EXPECTED-OUTPUT ------ */
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct nested_packed_but_aligned_struct {
+ * int x1;
+ * int x2;
+ *};
+ *
+ *struct outer_implicitly_packed_struct {
+ * char y1;
+ * struct nested_packed_but_aligned_struct y2;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct nested_packed_but_aligned_struct {
+ int x1;
+ int x2;
+} __attribute__((packed));
+
+struct outer_implicitly_packed_struct {
+ char y1;
+ struct nested_packed_but_aligned_struct y2;
+};
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct usb_ss_ep_comp_descriptor {
+ * char: 8;
+ * char bDescriptorType;
+ * char bMaxBurst;
+ * short wBytesPerInterval;
+ *};
+ *
+ *struct usb_host_endpoint {
+ * long: 64;
+ * char: 8;
+ * struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ * long: 0;
+ *} __attribute__((packed));
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+struct usb_ss_ep_comp_descriptor {
+ char: 8;
+ char bDescriptorType;
+ char bMaxBurst;
+ int: 0;
+ short wBytesPerInterval;
+} __attribute__((packed));
+
+struct usb_host_endpoint {
+ long: 64;
+ char: 8;
+ struct usb_ss_ep_comp_descriptor ss_ep_comp;
+ long: 0;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct nested_packed_struct {
+ int a;
+ char b;
+} __attribute__((packed));
+
+struct outer_nonpacked_struct {
+ short a;
+ struct nested_packed_struct b;
+};
+
+struct outer_packed_struct {
+ short a;
+ struct nested_packed_struct b;
+} __attribute__((packed));
+
+/* ------ END-EXPECTED-OUTPUT ------ */
int f(struct {
struct packed_trailing_space _1;
@@ -69,6 +143,10 @@ int f(struct {
union union_is_never_packed _6;
union union_does_not_need_packing _7;
union jump_code_union _8;
+ struct outer_implicitly_packed_struct _9;
+ struct usb_host_endpoint _10;
+ struct outer_nonpacked_struct _11;
+ struct outer_packed_struct _12;
} *_)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
index 35c512818a56..79276fbe454a 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_padding.c
@@ -9,7 +9,7 @@
/* ----- START-EXPECTED-OUTPUT ----- */
struct padded_implicitly {
int a;
- long int b;
+ long b;
char c;
};
@@ -19,7 +19,7 @@ struct padded_implicitly {
/*
*struct padded_explicitly {
* int a;
- * int: 32;
+ * long: 0;
* int b;
*};
*
@@ -28,41 +28,28 @@ struct padded_implicitly {
struct padded_explicitly {
int a;
- int: 1; /* algo will explicitly pad with full 32 bits here */
+ int: 1; /* algo will emit aligning `long: 0;` here */
int b;
};
/* ----- START-EXPECTED-OUTPUT ----- */
-/*
- *struct padded_a_lot {
- * int a;
- * long: 32;
- * long: 64;
- * long: 64;
- * int b;
- *};
- *
- */
-/* ------ END-EXPECTED-OUTPUT ------ */
-
struct padded_a_lot {
int a;
- /* 32 bit of implicit padding here, which algo will make explicit */
long: 64;
long: 64;
int b;
};
+/* ------ END-EXPECTED-OUTPUT ------ */
+
/* ----- START-EXPECTED-OUTPUT ----- */
/*
*struct padded_cache_line {
* int a;
- * long: 32;
* long: 64;
* long: 64;
* long: 64;
* int b;
- * long: 32;
* long: 64;
* long: 64;
* long: 64;
@@ -85,7 +72,7 @@ struct padded_cache_line {
*struct zone {
* int a;
* short b;
- * short: 16;
+ * long: 0;
* struct zone_padding __pad__;
*};
*
@@ -102,12 +89,160 @@ struct zone {
struct zone_padding __pad__;
};
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct padding_wo_named_members {
+ long: 64;
+ long: 64;
+};
+
+struct padding_weird_1 {
+ int a;
+ long: 64;
+ short: 16;
+ short b;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *struct padding_weird_2 {
+ * long: 56;
+ * char a;
+ * long: 56;
+ * char b;
+ * char: 8;
+ *};
+ *
+ */
+/* ------ END-EXPECTED-OUTPUT ------ */
+struct padding_weird_2 {
+ int: 32; /* these paddings will be collapsed into `long: 56;` */
+ short: 16;
+ char: 8;
+ char a;
+ int: 32; /* these paddings will be collapsed into `long: 56;` */
+ short: 16;
+ char: 8;
+ char b;
+ char: 8;
+};
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+struct exact_1byte {
+ char x;
+};
+
+struct padded_1byte {
+ char: 8;
+};
+
+struct exact_2bytes {
+ short x;
+};
+
+struct padded_2bytes {
+ short: 16;
+};
+
+struct exact_4bytes {
+ int x;
+};
+
+struct padded_4bytes {
+ int: 32;
+};
+
+struct exact_8bytes {
+ long x;
+};
+
+struct padded_8bytes {
+ long: 64;
+};
+
+struct ff_periodic_effect {
+ int: 32;
+ short magnitude;
+ long: 0;
+ short phase;
+ long: 0;
+ int: 32;
+ int custom_len;
+ short *custom_data;
+};
+
+struct ib_wc {
+ long: 64;
+ long: 64;
+ int: 32;
+ int byte_len;
+ void *qp;
+ union {} ex;
+ long: 64;
+ int slid;
+ int wc_flags;
+ long: 64;
+ char smac[6];
+ long: 0;
+ char network_hdr_type;
+};
+
+struct acpi_object_method {
+ long: 64;
+ char: 8;
+ char type;
+ short reference_count;
+ char flags;
+ short: 0;
+ char: 8;
+ char sync_level;
+ long: 64;
+ void *node;
+ void *aml_start;
+ union {} dispatch;
+ long: 64;
+ int aml_length;
+};
+
+struct nested_unpacked {
+ int x;
+};
+
+struct nested_packed {
+ struct nested_unpacked a;
+ char c;
+} __attribute__((packed));
+
+struct outer_mixed_but_unpacked {
+ struct nested_packed b1;
+ short a1;
+ struct nested_packed b2;
+};
+
+/* ------ END-EXPECTED-OUTPUT ------ */
+
int f(struct {
struct padded_implicitly _1;
struct padded_explicitly _2;
struct padded_a_lot _3;
struct padded_cache_line _4;
struct zone _5;
+ struct padding_wo_named_members _6;
+ struct padding_weird_1 _7;
+ struct padding_weird_2 _8;
+ struct exact_1byte _100;
+ struct padded_1byte _101;
+ struct exact_2bytes _102;
+ struct padded_2bytes _103;
+ struct exact_4bytes _104;
+ struct padded_4bytes _105;
+ struct exact_8bytes _106;
+ struct padded_8bytes _107;
+ struct ff_periodic_effect _200;
+ struct ib_wc _201;
+ struct acpi_object_method _202;
+ struct outer_mixed_but_unpacked _203;
} *_)
{
return 0;
diff --git a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
index 31975c96e2c9..ad21ee8c7e23 100644
--- a/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
+++ b/tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
@@ -25,6 +25,39 @@ typedef enum {
H = 2,
} e3_t;
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_byte {
+ * EBYTE_1 = 0,
+ * EBYTE_2 = 1,
+ *} __attribute__((mode(byte)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_byte {
+ EBYTE_1,
+ EBYTE_2,
+} __attribute__((mode(byte)));
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+/*
+ *enum e_word {
+ * EWORD_1 = 0LL,
+ * EWORD_2 = 1LL,
+ *} __attribute__((mode(word)));
+ *
+ */
+/* ----- END-EXPECTED-OUTPUT ----- */
+enum e_word {
+ EWORD_1,
+ EWORD_2,
+} __attribute__((mode(word))); /* force to use 8-byte backing for this enum */
+
+/* ----- START-EXPECTED-OUTPUT ----- */
+enum e_big {
+ EBIG_1 = 1000000000000ULL,
+};
+
typedef int int_t;
typedef volatile const int * volatile const crazy_ptr_t;
@@ -51,7 +84,7 @@ typedef void (*printf_fn_t)(const char *, ...);
* typedef int (*fn_t)(int);
* typedef char * const * (*fn_ptr2_t)(s_t, fn_t);
*
- * - `fn_complext_t`: pointer to a function returning struct and accepting
+ * - `fn_complex_t`: pointer to a function returning struct and accepting
* union and struct. All structs and enum are anonymous and defined inline.
*
* - `signal_t: pointer to a function accepting a pointer to a function as an
@@ -94,7 +127,7 @@ typedef void (* (*signal_t)(int, void (*)(int)))(int);
typedef char * (*fn_ptr_arr1_t[10])(int **);
-typedef char * (* const (* const fn_ptr_arr2_t[5])())(char * (*)(int));
+typedef char * (* (* const fn_ptr_arr2_t[5])())(char * (*)(int));
struct struct_w_typedefs {
int_t a;
@@ -174,6 +207,12 @@ struct struct_in_struct {
};
};
+struct struct_in_array {};
+
+struct struct_in_array_typed {};
+
+typedef struct struct_in_array_typed struct_in_array_t[2];
+
struct struct_with_embedded_stuff {
int a;
struct {
@@ -183,7 +222,7 @@ struct struct_with_embedded_stuff {
const char *d;
} e;
union {
- volatile long int f;
+ volatile long f;
void * restrict g;
};
};
@@ -203,6 +242,14 @@ struct struct_with_embedded_stuff {
} r[5];
struct struct_in_struct s[10];
int t[11];
+ struct struct_in_array (*u)[2];
+ struct_in_array_t *v;
+};
+
+struct float_struct {
+ float f;
+ const double *d;
+ volatile long double *ld;
};
struct root_struct {
@@ -210,6 +257,9 @@ struct root_struct {
enum e2 _2;
e2_t _2_1;
e3_t _2_2;
+ enum e_byte _100;
+ enum e_word _101;
+ enum e_big _102;
struct struct_w_typedefs _3;
anon_struct_t _7;
struct struct_fwd *_8;
@@ -219,6 +269,7 @@ struct root_struct {
union_fwd_t *_12;
union_fwd_ptr_t _13;
struct struct_with_embedded_stuff _14;
+ struct float_struct _15;
};
/* ------ END-EXPECTED-OUTPUT ------ */
diff --git a/tools/testing/selftests/bpf/progs/btf_ptr.h b/tools/testing/selftests/bpf/progs/btf_ptr.h
new file mode 100644
index 000000000000..c3c9797c67db
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_ptr.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define btf_ptr btf_ptr___not_used
+#define BTF_F_COMPACT BTF_F_COMPACT___not_used
+#define BTF_F_NONAME BTF_F_NONAME___not_used
+#define BTF_F_PTR_RAW BTF_F_PTR_RAW___not_used
+#define BTF_F_ZERO BTF_F_ZERO___not_used
+#include "vmlinux.h"
+#undef btf_ptr
+#undef BTF_F_COMPACT
+#undef BTF_F_NONAME
+#undef BTF_F_PTR_RAW
+#undef BTF_F_ZERO
+
+struct btf_ptr {
+ void *ptr;
+ __u32 type_id;
+ __u32 flags;
+};
+
+enum {
+ BTF_F_COMPACT = (1ULL << 0),
+ BTF_F_NONAME = (1ULL << 1),
+ BTF_F_PTR_RAW = (1ULL << 2),
+ BTF_F_ZERO = (1ULL << 3),
+};
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag.c b/tools/testing/selftests/bpf/progs/btf_type_tag.c
new file mode 100644
index 000000000000..1d488da7e920
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_type_tag)
+#define __tag1 __attribute__((btf_type_tag("tag1")))
+#define __tag2 __attribute__((btf_type_tag("tag2")))
+volatile const bool skip_tests = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct btf_type_tag_test {
+ int __tag1 * __tag1 __tag2 *p;
+} g;
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
new file mode 100644
index 000000000000..38f78d9345de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_percpu.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+__u64 g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_1")
+int BPF_PROG(test_percpu1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+ g = arg->a;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_percpu_2")
+int BPF_PROG(test_percpu2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+ g = arg->p->a;
+ return 0;
+}
+
+/* trace_cgroup_mkdir(struct cgroup *cgrp, const char *path)
+ *
+ * struct cgroup_rstat_cpu {
+ * ...
+ * struct cgroup *updated_children;
+ * ...
+ * };
+ *
+ * struct cgroup {
+ * ...
+ * struct cgroup_rstat_cpu __percpu *rstat_cpu;
+ * ...
+ * };
+ */
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_load, struct cgroup *cgrp, const char *path)
+{
+ g = (__u64)cgrp->rstat_cpu->updated_children;
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_percpu_helper, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup_rstat_cpu *rstat;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ rstat = (struct cgroup_rstat_cpu *)bpf_per_cpu_ptr(cgrp->rstat_cpu, cpu);
+ if (rstat) {
+ /* READ_ONCE */
+ *(volatile int *)rstat;
+ }
+
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/btf_type_tag_user.c b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
new file mode 100644
index 000000000000..5523f77c5a44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/btf_type_tag_user.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct bpf_testmod_btf_type_tag_1 {
+ int a;
+};
+
+struct bpf_testmod_btf_type_tag_2 {
+ struct bpf_testmod_btf_type_tag_1 *p;
+};
+
+int g;
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_1")
+int BPF_PROG(test_user1, struct bpf_testmod_btf_type_tag_1 *arg)
+{
+ g = arg->a;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_btf_type_tag_user_2")
+int BPF_PROG(test_user2, struct bpf_testmod_btf_type_tag_2 *arg)
+{
+ g = arg->p->a;
+ return 0;
+}
+
+/* int __sys_getsockname(int fd, struct sockaddr __user *usockaddr,
+ * int __user *usockaddr_len);
+ */
+SEC("fentry/__sys_getsockname")
+int BPF_PROG(test_sys_getsockname, int fd, struct sockaddr *usockaddr,
+ int *usockaddr_len)
+{
+ g = usockaddr->sa_family;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cb_refs.c b/tools/testing/selftests/bpf/progs/cb_refs.c
new file mode 100644
index 000000000000..56c764df8196
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cb_refs.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr *ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 16);
+} array_map SEC(".maps");
+
+static __noinline int cb1(void *map, void *key, void *value, void *ctx)
+{
+ void *p = *(void **)ctx;
+ bpf_kfunc_call_test_release(p);
+ /* Without the fix this would cause underflow */
+ return 0;
+}
+
+SEC("?tc")
+int underflow_prog(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ bpf_for_each_map_elem(&array_map, cb1, &p, 0);
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+static __always_inline int cb2(void *map, void *key, void *value, void *ctx)
+{
+ unsigned long sl = 0;
+
+ *(void **)ctx = bpf_kfunc_call_test_acquire(&sl);
+ /* Without the fix this would leak memory */
+ return 0;
+}
+
+SEC("?tc")
+int leak_prog(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 0;
+
+ p = NULL;
+ bpf_for_each_map_elem(&array_map, cb2, &p, 0);
+ p = bpf_kptr_xchg(&v->ptr, p);
+ if (p)
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+static __always_inline int cb(void *map, void *key, void *value, void *ctx)
+{
+ return 0;
+}
+
+static __always_inline int cb3(void *map, void *key, void *value, void *ctx)
+{
+ unsigned long sl = 0;
+ void *p;
+
+ bpf_kfunc_call_test_acquire(&sl);
+ bpf_for_each_map_elem(&array_map, cb, &p, 0);
+ /* It should only complain here, not in cb. This is why we need
+ * callback_ref to be set to frameno.
+ */
+ return 0;
+}
+
+SEC("?tc")
+int nested_cb(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+ int sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ bpf_for_each_map_elem(&array_map, cb3, &sp, 0);
+ bpf_kfunc_call_test_release(p);
+ return 0;
+}
+
+SEC("?tc")
+int non_cb_transfer_ref(void *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sl = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sl);
+ if (!p)
+ return 0;
+ cb1(NULL, NULL, NULL, &p);
+ bpf_kfunc_call_test_acquire(&sl);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi.h b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
new file mode 100644
index 000000000000..a0778fe7857a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __PROGS_CG_STORAGE_MULTI_H
+#define __PROGS_CG_STORAGE_MULTI_H
+
+#include <asm/types.h>
+
+struct cgroup_value {
+ __u32 egress_pkts;
+ __u32 ingress_pkts;
+};
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
new file mode 100644
index 000000000000..44ad46b33539
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_egress_only.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
new file mode 100644
index 000000000000..3f81ff92184c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_isolated.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
new file mode 100644
index 000000000000..d662db27fe4a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cg_storage_multi_shared.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <linux/ip.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+
+#include "progs/cg_storage_multi.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, struct cgroup_value);
+} cgroup_storage SEC(".maps");
+
+__u32 invocations = 0;
+
+SEC("cgroup_skb/egress")
+int egress1(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int egress2(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->egress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress(struct __sk_buff *skb)
+{
+ struct cgroup_value *ptr_cg_storage =
+ bpf_get_local_storage(&cgroup_storage, 0);
+
+ __sync_fetch_and_add(&ptr_cg_storage->ingress_pkts, 1);
+ __sync_fetch_and_add(&invocations, 1);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
new file mode 100644
index 000000000000..932b8ecd4ae3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_getsockopt.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__u32 ctx_retval_value = 0;
+__u32 page_size = 0;
+
+SEC("cgroup/getsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ ctx_retval_value = ctx->retval;
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/getsockopt")
+int clear_retval(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ ctx->retval = 0;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
new file mode 100644
index 000000000000..13dfb4bbfd28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_hooks.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define BPF_RETVAL_HOOK(name, section, ctx, expected_err) \
+ __attribute__((__section__("?" section))) \
+ int name(struct ctx *_ctx) \
+ { \
+ bpf_set_retval(bpf_get_retval()); \
+ return 1; \
+ }
+
+#include "cgroup_getset_retval_hooks.h"
+
+#undef BPF_RETVAL_HOOK
diff --git a/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
new file mode 100644
index 000000000000..45a0e9f492a9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_getset_retval_setsockopt.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2021 Google LLC.
+ */
+
+#include <errno.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 invocations = 0;
+__u32 assertion_error = 0;
+__u32 retval_value = 0;
+__s32 page_size = 0;
+
+SEC("cgroup/setsockopt")
+int get_retval(struct bpf_sockopt *ctx)
+{
+ retval_value = bpf_get_retval();
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 1;
+}
+
+SEC("cgroup/setsockopt")
+int set_eunatch(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EUNATCH))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int set_eisconn(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (bpf_set_retval(-EISCONN))
+ assertion_error = 1;
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
+
+SEC("cgroup/setsockopt")
+int legacy_eperm(struct bpf_sockopt *ctx)
+{
+ __sync_fetch_and_add(&invocations, 1);
+
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
new file mode 100644
index 000000000000..c74362854948
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_hierarchical_stats.c
@@ -0,0 +1,155 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022 Google LLC.
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct percpu_attach_counter {
+ /* Previous percpu state, to figure out if we have new updates */
+ __u64 prev;
+ /* Current percpu state */
+ __u64 state;
+};
+
+struct attach_counter {
+ /* State propagated through children, pending aggregation */
+ __u64 pending;
+ /* Total state, including all cpus and all children */
+ __u64 state;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1024);
+ __type(key, __u64);
+ __type(value, struct percpu_attach_counter);
+} percpu_attach_counters SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, __u64);
+ __type(value, struct attach_counter);
+} attach_counters SEC(".maps");
+
+extern void cgroup_rstat_updated(struct cgroup *cgrp, int cpu) __ksym;
+extern void cgroup_rstat_flush(struct cgroup *cgrp) __ksym;
+
+static uint64_t cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+static int create_percpu_attach_counter(__u64 cg_id, __u64 state)
+{
+ struct percpu_attach_counter pcpu_init = {.state = state, .prev = 0};
+
+ return bpf_map_update_elem(&percpu_attach_counters, &cg_id,
+ &pcpu_init, BPF_NOEXIST);
+}
+
+static int create_attach_counter(__u64 cg_id, __u64 state, __u64 pending)
+{
+ struct attach_counter init = {.state = state, .pending = pending};
+
+ return bpf_map_update_elem(&attach_counters, &cg_id,
+ &init, BPF_NOEXIST);
+}
+
+SEC("fentry/cgroup_attach_task")
+int BPF_PROG(counter, struct cgroup *dst_cgrp, struct task_struct *leader,
+ bool threadgroup)
+{
+ __u64 cg_id = cgroup_id(dst_cgrp);
+ struct percpu_attach_counter *pcpu_counter = bpf_map_lookup_elem(
+ &percpu_attach_counters,
+ &cg_id);
+
+ if (pcpu_counter)
+ pcpu_counter->state += 1;
+ else if (create_percpu_attach_counter(cg_id, 1))
+ return 0;
+
+ cgroup_rstat_updated(dst_cgrp, bpf_get_smp_processor_id());
+ return 0;
+}
+
+SEC("fentry/bpf_rstat_flush")
+int BPF_PROG(flusher, struct cgroup *cgrp, struct cgroup *parent, int cpu)
+{
+ struct percpu_attach_counter *pcpu_counter;
+ struct attach_counter *total_counter, *parent_counter;
+ __u64 cg_id = cgroup_id(cgrp);
+ __u64 parent_cg_id = parent ? cgroup_id(parent) : 0;
+ __u64 state;
+ __u64 delta = 0;
+
+ /* Add CPU changes on this level since the last flush */
+ pcpu_counter = bpf_map_lookup_percpu_elem(&percpu_attach_counters,
+ &cg_id, cpu);
+ if (pcpu_counter) {
+ state = pcpu_counter->state;
+ delta += state - pcpu_counter->prev;
+ pcpu_counter->prev = state;
+ }
+
+ total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+ if (!total_counter) {
+ if (create_attach_counter(cg_id, delta, 0))
+ return 0;
+ goto update_parent;
+ }
+
+ /* Collect pending stats from subtree */
+ if (total_counter->pending) {
+ delta += total_counter->pending;
+ total_counter->pending = 0;
+ }
+
+ /* Propagate changes to this cgroup's total */
+ total_counter->state += delta;
+
+update_parent:
+ /* Skip if there are no changes to propagate, or no parent */
+ if (!delta || !parent_cg_id)
+ return 0;
+
+ /* Propagate changes to cgroup's parent */
+ parent_counter = bpf_map_lookup_elem(&attach_counters,
+ &parent_cg_id);
+ if (parent_counter)
+ parent_counter->pending += delta;
+ else
+ create_attach_counter(parent_cg_id, 0, delta);
+ return 0;
+}
+
+SEC("iter.s/cgroup")
+int BPF_PROG(dumper, struct bpf_iter_meta *meta, struct cgroup *cgrp)
+{
+ struct seq_file *seq = meta->seq;
+ struct attach_counter *total_counter;
+ __u64 cg_id = cgrp ? cgroup_id(cgrp) : 0;
+
+ /* Do nothing for the terminal call */
+ if (!cg_id)
+ return 1;
+
+ /* Flush the stats to make sure we get the most updated numbers */
+ cgroup_rstat_flush(cgrp);
+
+ total_counter = bpf_map_lookup_elem(&attach_counters, &cg_id);
+ if (!total_counter) {
+ BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: 0\n",
+ cg_id);
+ } else {
+ BPF_SEQ_PRINTF(seq, "cg_id: %llu, attach_counter: %llu\n",
+ cg_id, total_counter->state);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_iter.c b/tools/testing/selftests/bpf/progs/cgroup_iter.c
new file mode 100644
index 000000000000..de03997322a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_iter.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+int terminate_early = 0;
+u64 terminal_cgroup = 0;
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+SEC("iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+
+ /* epilogue */
+ if (cgrp == NULL) {
+ BPF_SEQ_PRINTF(seq, "epilogue\n");
+ return 0;
+ }
+
+ /* prologue */
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "prologue\n");
+
+ BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+ if (terminal_cgroup == cgroup_id(cgrp))
+ return 1;
+
+ return terminate_early ? 1 : 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
index 3f757e30d7a0..ac86a8a61605 100644
--- a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -14,7 +14,6 @@
#include <sys/types.h>
#include <sys/socket.h>
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
__u16 g_serv_port = 0;
@@ -67,7 +66,6 @@ static inline int is_allowed_peer_cg(struct __sk_buff *skb,
SEC("cgroup_skb/ingress")
int ingress_lookup(struct __sk_buff *skb)
{
- __u32 serv_port_key = 0;
struct ipv6hdr ip6h;
struct tcphdr tcph;
diff --git a/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
new file mode 100644
index 000000000000..1e2e73f3b749
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_tcp_skb.c
@@ -0,0 +1,382 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#include "cgroup_tcp_skb.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u16 g_sock_port = 0;
+__u32 g_sock_state = 0;
+int g_unexpected = 0;
+__u32 g_packet_count = 0;
+
+int needed_tcp_pkt(struct __sk_buff *skb, struct tcphdr *tcph)
+{
+ struct ipv6hdr ip6h;
+
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return 0;
+ if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+ return 0;
+
+ if (ip6h.nexthdr != IPPROTO_TCP)
+ return 0;
+
+ if (bpf_skb_load_bytes(skb, sizeof(ip6h), tcph, sizeof(*tcph)))
+ return 0;
+
+ if (tcph->source != bpf_htons(g_sock_port) &&
+ tcph->dest != bpf_htons(g_sock_port))
+ return 0;
+
+ return 1;
+}
+
+/* Run accept() on a socket in the cgroup to receive a new connection. */
+static int egress_accept(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_RECV_SENDING_SYN_ACK) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_accept(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case INIT:
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_RECV_SENDING_SYN_ACK;
+ break;
+ case SYN_RECV:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Run connect() on a socket in the cgroup to start a new connection. */
+static int egress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == INIT) {
+ if (!tcph->syn || tcph->fin || tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = SYN_SENT;
+ return 1;
+ }
+
+ return 0;
+}
+
+static int ingress_connect(struct tcphdr *tcph)
+{
+ if (g_sock_state == SYN_SENT) {
+ if (tcph->fin || !tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = ESTABLISHED;
+ return 1;
+ }
+
+ return 0;
+}
+
+/* The connection is closed by the peer outside the cgroup. */
+static int egress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case CLOSE_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSE_WAIT;
+ break;
+ case CLOSE_WAIT:
+ if (!tcph->fin)
+ g_unexpected++;
+ else
+ g_sock_state = LAST_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_remote(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = CLOSE_WAIT_SENDING_ACK;
+ break;
+ case LAST_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = CLOSED;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* The connection is closed by the endpoint inside the cgroup. */
+static int egress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ if (tcph->fin)
+ g_sock_state = FIN_WAIT1;
+ break;
+ case TIME_WAIT_SENDING_ACK:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+static int ingress_close_local(struct tcphdr *tcph)
+{
+ switch (g_sock_state) {
+ case ESTABLISHED:
+ break;
+ case FIN_WAIT1:
+ if (tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = FIN_WAIT2;
+ break;
+ case FIN_WAIT2:
+ if (!tcph->fin || tcph->syn || !tcph->ack)
+ g_unexpected++;
+ else
+ g_sock_state = TIME_WAIT_SENDING_ACK;
+ break;
+ default:
+ return 0;
+ }
+
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int server_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_accept(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a server socket to make sure they
+ * are consistent with the state of the server socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int server_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_accept(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the server side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress_srv(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_remote(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of outgoing packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/egress")
+int client_egress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Egress of the server socket. */
+ if (egress_connect(&tcph) || egress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
+
+/* Check the types of incoming packets of a client socket to make sure they
+ * are consistent with the state of the client socket.
+ *
+ * The connection is closed by the client side.
+ */
+SEC("cgroup_skb/ingress")
+int client_ingress(struct __sk_buff *skb)
+{
+ struct tcphdr tcph;
+
+ if (!needed_tcp_pkt(skb, &tcph))
+ return 1;
+
+ g_packet_count++;
+
+ /* Ingress of the server socket. */
+ if (ingress_connect(&tcph) || ingress_close_local(&tcph))
+ return 1;
+
+ g_unexpected++;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
new file mode 100644
index 000000000000..22914a70db54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_common.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CGRP_KFUNC_COMMON_H
+#define _CGRP_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __cgrps_kfunc_map_value {
+ struct cgroup __kptr * cgrp;
+};
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __cgrps_kfunc_map_value);
+ __uint(max_entries, 1);
+} __cgrps_kfunc_map SEC(".maps");
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+static inline struct __cgrps_kfunc_map_value *cgrps_kfunc_map_value_lookup(struct cgroup *cgrp)
+{
+ s32 id;
+ long status;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+}
+
+static inline int cgrps_kfunc_map_insert(struct cgroup *cgrp)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return status;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired) {
+ bpf_map_delete_elem(&__cgrps_kfunc_map, &id);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+ if (old) {
+ bpf_cgroup_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CGRP_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
new file mode 100644
index 000000000000..9fe9c4a4e8f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_failure.c
@@ -0,0 +1,247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static struct __cgrps_kfunc_map_value *insert_lookup_cgrp(struct cgroup *cgrp)
+{
+ int status;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ return NULL;
+
+ return cgrps_kfunc_map_value_lookup(cgrp);
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_acquire() on an untrusted pointer. */
+ acquired = bpf_cgroup_acquire(v->cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_no_null_check, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ /*
+ * Can't invoke bpf_cgroup_release() without checking the return value
+ * of bpf_cgroup_acquire().
+ */
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
+int BPF_PROG(cgrp_kfunc_acquire_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired, *stack_cgrp = (struct cgroup *)&path;
+
+ /* Can't invoke bpf_cgroup_acquire() on a random frame pointer. */
+ acquired = bpf_cgroup_acquire((struct cgroup *)&stack_cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/cgroup_destroy_locked")
+__failure __msg("calling kernel function bpf_cgroup_acquire is not allowed")
+int BPF_PROG(cgrp_kfunc_acquire_unsafe_kretprobe, struct cgroup *cgrp)
+{
+ struct cgroup *acquired;
+
+ /* Can't acquire an untrusted struct cgroup * pointer. */
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("cgrp_kfunc_acquire_trusted_walked")
+int BPF_PROG(cgrp_kfunc_acquire_trusted_walked, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a pointer obtained from walking a trusted cgroup. */
+ acquired = bpf_cgroup_acquire(cgrp->old_dom_cgrp);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_acquire_null, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ /* Can't invoke bpf_cgroup_acquire() on a NULL pointer. */
+ acquired = bpf_cgroup_acquire(NULL);
+ if (acquired)
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
+int BPF_PROG(cgrp_kfunc_acquire_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+
+ /* Acquired cgroup is never released. */
+ __sink(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Unreleased reference")
+int BPF_PROG(cgrp_kfunc_xchg_unreleased, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("must be referenced or trusted")
+int BPF_PROG(cgrp_kfunc_rcu_get_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (kptr)
+ /* Can't release a cgroup kptr stored in a map. */
+ bpf_cgroup_release(kptr);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_release_untrusted, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value *v;
+
+ v = insert_lookup_cgrp(cgrp);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_cgroup_release() on an untrusted pointer. */
+ bpf_cgroup_release(v->cgrp);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("arg#0 pointer type STRUCT cgroup must point")
+int BPF_PROG(cgrp_kfunc_release_fp, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired = (struct cgroup *)&path;
+
+ /* Cannot release random frame pointer. */
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(cgrp_kfunc_release_null, struct cgroup *cgrp, const char *path)
+{
+ struct __cgrps_kfunc_map_value local, *v;
+ long status;
+ struct cgroup *acquired, *old;
+ s32 id;
+
+ status = bpf_probe_read_kernel(&id, sizeof(id), &cgrp->self.id);
+ if (status)
+ return 0;
+
+ local.cgrp = NULL;
+ status = bpf_map_update_elem(&__cgrps_kfunc_map, &id, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__cgrps_kfunc_map, &id);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired)
+ return -ENOENT;
+
+ old = bpf_kptr_xchg(&v->cgrp, acquired);
+
+ /* old cannot be passed to bpf_cgroup_release() without a NULL check. */
+ bpf_cgroup_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+__failure __msg("release kernel function bpf_cgroup_release expects")
+int BPF_PROG(cgrp_kfunc_release_unacquired, struct cgroup *cgrp, const char *path)
+{
+ /* Cannot release trusted cgroup pointer which was not acquired. */
+ bpf_cgroup_release(cgrp);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
new file mode 100644
index 000000000000..5354455a01be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_kfunc_success.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "cgrp_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid, invocations;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(cgroup_mkdir,
+ * TP_PROTO(struct cgroup *cgrp, const char *path),
+ * TP_ARGS(cgrp, path)
+ */
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ bool same = pid == cur_pid;
+
+ if (same)
+ __sync_fetch_and_add(&invocations, 1);
+
+ return same;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_release_argument, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ acquired = bpf_cgroup_acquire(cgrp);
+ if (!acquired)
+ err = 1;
+ else
+ bpf_cgroup_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_acquire_leave_in_map, struct cgroup *cgrp, const char *path)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_xchg_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr, *cg;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = v->cgrp;
+ if (!kptr) {
+ err = 4;
+ return 0;
+ }
+
+ cg = bpf_cgroup_ancestor(kptr, 1);
+ if (cg) /* verifier only check */
+ bpf_cgroup_release(cg);
+
+ kptr = bpf_kptr_xchg(&v->cgrp, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_cgroup_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_release, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *kptr;
+ struct __cgrps_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = cgrps_kfunc_map_insert(cgrp);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = cgrps_kfunc_map_value_lookup(cgrp);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ kptr = v->cgrp;
+ if (!kptr)
+ err = 3;
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_get_ancestors, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *self, *ancestor1, *invalid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ self = bpf_cgroup_ancestor(cgrp, cgrp->level);
+ if (!self) {
+ err = 1;
+ return 0;
+ }
+
+ if (self->self.id != cgrp->self.id) {
+ bpf_cgroup_release(self);
+ err = 2;
+ return 0;
+ }
+ bpf_cgroup_release(self);
+
+ ancestor1 = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!ancestor1) {
+ err = 3;
+ return 0;
+ }
+ bpf_cgroup_release(ancestor1);
+
+ invalid = bpf_cgroup_ancestor(cgrp, 10000);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 4;
+ return 0;
+ }
+
+ invalid = bpf_cgroup_ancestor(cgrp, -1);
+ if (invalid) {
+ bpf_cgroup_release(invalid);
+ err = 5;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_from_id, struct cgroup *cgrp, const char *path)
+{
+ struct cgroup *parent, *res;
+ u64 parent_cgid;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ /* @cgrp's ID is not visible yet, let's test with the parent */
+ parent = bpf_cgroup_ancestor(cgrp, cgrp->level - 1);
+ if (!parent) {
+ err = 1;
+ return 0;
+ }
+
+ parent_cgid = parent->kn->id;
+ bpf_cgroup_release(parent);
+
+ res = bpf_cgroup_from_id(parent_cgid);
+ if (!res) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_cgroup_release(res);
+
+ if (res != parent) {
+ err = 3;
+ return 0;
+ }
+
+ res = bpf_cgroup_from_id((u64)-1);
+ if (res) {
+ bpf_cgroup_release(res);
+ err = 4;
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
new file mode 100644
index 000000000000..8aeba1b75c83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_attach_cgroup.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct socket_cookie {
+ __u64 cookie_key;
+ __u64 cookie_value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct socket_cookie);
+} socket_cookies SEC(".maps");
+
+SEC("cgroup/connect6")
+int set_cookie(struct bpf_sock_addr *ctx)
+{
+ struct socket_cookie *p;
+ struct tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tcp_sk = bpf_skc_to_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ p = bpf_cgrp_storage_get(&socket_cookies,
+ tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!p)
+ return 1;
+
+ p->cookie_value = 0xF;
+ p->cookie_key = bpf_get_socket_cookie(ctx);
+ return 1;
+}
+
+SEC("sockops")
+int update_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct socket_cookie *p;
+ struct tcp_sock *tcp_sk;
+ struct bpf_sock *sk;
+
+ if (ctx->family != AF_INET6 || ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tcp_sk = bpf_skc_to_tcp_sock(sk);
+ if (!tcp_sk)
+ return 1;
+
+ p = bpf_cgrp_storage_get(&socket_cookies,
+ tcp_sk->inet_conn.icsk_inet.sk.sk_cgrp_data.cgroup, 0, 0);
+ if (!p)
+ return 1;
+
+ if (p->cookie_key != bpf_get_socket_cookie(ctx))
+ return 1;
+
+ p->cookie_value |= (ctx->local_port << 8);
+ return 1;
+}
+
+SEC("fexit/inet_stream_connect")
+int BPF_PROG(update_cookie_tracing, struct socket *sock,
+ struct sockaddr *uaddr, int addr_len, int flags)
+{
+ struct socket_cookie *p;
+
+ if (uaddr->sa_family != AF_INET6)
+ return 0;
+
+ p = bpf_cgrp_storage_get(&socket_cookies, sock->sk->sk_cgrp_data.cgroup, 0, 0);
+ if (!p)
+ return 0;
+
+ if (p->cookie_key != bpf_get_socket_cookie(sock->sk))
+ return 0;
+
+ p->cookie_value |= 0xF0;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
new file mode 100644
index 000000000000..d41f90e2ab64
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_negative.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ (void)bpf_cgrp_storage_get(&map_a, (struct cgroup *)task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
new file mode 100644
index 000000000000..3500e4b69ebe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_update(struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+}
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_update(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_update(task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 200;
+
+ ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr = 100;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct cgroup *cgrp;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
new file mode 100644
index 000000000000..5e282c16eadc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "bpf_iter.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__s32 target_pid;
+__u64 cgroup_id;
+int target_hid;
+bool is_cgroup1;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?iter.s/cgroup")
+int cgroup_iter(struct bpf_iter__cgroup *ctx)
+{
+ struct cgroup *cgrp = ctx->cgroup;
+ long *ptr;
+
+ if (cgrp == NULL)
+ return 0;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ return 0;
+}
+
+static void __no_rcu_lock(struct cgroup *cgrp)
+{
+ long *ptr;
+
+ /* Note that trace rcu is held in sleepable prog, so we can use
+ * bpf_cgrp_storage_get() in sleepable prog.
+ */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int cgrp1_no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* bpf_task_get_cgroup1 can work in sleepable prog */
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __no_rcu_lock(cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int no_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* task->cgroups is untrusted in sleepable prog outside of RCU CS */
+ __no_rcu_lock(task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int yes_rcu_lock(void *ctx)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ bpf_rcu_read_lock();
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_cgroup_release(cgrp);
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ cgrp = task->cgroups->dfl_cgrp;
+ /* cgrp is trusted under RCU CS */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ cgroup_id = cgrp->kn->id;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
new file mode 100644
index 000000000000..1c348f000f38
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+int target_hid = 0;
+bool is_cgroup1 = 0;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+ int err;
+
+ /* populate value 0 */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+
+ /* delete value 0 */
+ err = bpf_cgrp_storage_delete(&map_a, cgrp);
+ if (err)
+ return;
+
+ /* value is not available */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0);
+ if (ptr)
+ return;
+
+ /* re-populate the value */
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_enter(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_enter(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
+
+static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp)
+{
+ long *ptr;
+
+ ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ if (is_cgroup1) {
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ __on_exit(regs, id, cgrp);
+ bpf_cgroup_release(cgrp);
+ return 0;
+ }
+
+ __on_exit(regs, id, task->cgroups->dfl_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect4_dropper.c b/tools/testing/selftests/bpf/progs/connect4_dropper.c
new file mode 100644
index 000000000000..d3f4c5e4fb69
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect4_dropper.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <string.h>
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define VERDICT_REJECT 0
+#define VERDICT_PROCEED 1
+
+SEC("cgroup/connect4")
+int connect_v4_dropper(struct bpf_sock_addr *ctx)
+{
+ if (ctx->type != SOCK_STREAM)
+ return VERDICT_PROCEED;
+ if (ctx->user_port == bpf_htons(60120))
+ return VERDICT_REJECT;
+ return VERDICT_PROCEED;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 1ab2c5eba86c..7ef49ec04838 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -7,14 +7,15 @@
#include <linux/bpf.h>
#include <linux/in.h>
#include <linux/in6.h>
-#include <sys/socket.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
#include <linux/if.h>
#include <errno.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
#define SRC_REWRITE_IP4 0x7f000004U
#define DST_REWRITE_IP4 0x7f000001U
#define DST_REWRITE_PORT4 4444
@@ -23,13 +24,15 @@
#define TCP_CA_NAME_MAX 16
#endif
+#ifndef TCP_NOTSENT_LOWAT
+#define TCP_NOTSENT_LOWAT 25
+#endif
+
#ifndef IFNAMSIZ
#define IFNAMSIZ 16
#endif
-int _version SEC("version") = 1;
-
-__attribute__ ((noinline))
+__attribute__ ((noinline)) __weak
int do_bind(struct bpf_sock_addr *ctx)
{
struct sockaddr_in sa = {};
@@ -104,6 +107,42 @@ static __inline int bind_to_device(struct bpf_sock_addr *ctx)
return 0;
}
+static __inline int set_keepalive(struct bpf_sock_addr *ctx)
+{
+ int zero = 0, one = 1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &one, sizeof(one)))
+ return 1;
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPIDLE, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPINTVL, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_KEEPCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_SYNCNT, &one, sizeof(one)))
+ return 1;
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_USER_TIMEOUT, &one, sizeof(one)))
+ return 1;
+ }
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_KEEPALIVE, &zero, sizeof(zero)))
+ return 1;
+
+ return 0;
+}
+
+static __inline int set_notsent_lowat(struct bpf_sock_addr *ctx)
+{
+ int lowat = 65535;
+
+ if (ctx->type == SOCK_STREAM) {
+ if (bpf_setsockopt(ctx, SOL_TCP, TCP_NOTSENT_LOWAT, &lowat, sizeof(lowat)))
+ return 1;
+ }
+
+ return 0;
+}
+
SEC("cgroup/connect4")
int connect_v4_prog(struct bpf_sock_addr *ctx)
{
@@ -121,6 +160,12 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
if (bind_to_device(ctx))
return 0;
+ if (set_keepalive(ctx))
+ return 0;
+
+ if (set_notsent_lowat(ctx))
+ return 0;
+
if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
return 0;
else if (ctx->type == SOCK_STREAM)
diff --git a/tools/testing/selftests/bpf/progs/connect6_prog.c b/tools/testing/selftests/bpf/progs/connect6_prog.c
index 506d0f81a375..40266d2c737c 100644
--- a/tools/testing/selftests/bpf/progs/connect6_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect6_prog.c
@@ -24,8 +24,6 @@
#define DST_REWRITE_PORT6 6666
-int _version SEC("version") = 1;
-
SEC("cgroup/connect6")
int connect_v6_prog(struct bpf_sock_addr *ctx)
{
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
index 7396308677a3..27a632dd382e 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port4.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -10,8 +10,9 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf_sockopt_helpers.h>
+
char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
struct svc_addr {
__be32 addr;
@@ -58,6 +59,9 @@ int connect4(struct bpf_sock_addr *ctx)
SEC("cgroup/getsockname4")
int getsockname4(struct bpf_sock_addr *ctx)
{
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
/* Expose local server as 1.2.3.4:60000 to client. */
if (ctx->user_port == bpf_htons(60123)) {
ctx->user_ip4 = bpf_htonl(0x01020304);
@@ -71,6 +75,9 @@ int getpeername4(struct bpf_sock_addr *ctx)
{
struct svc_addr *orig;
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
/* Expose service 1.2.3.4:60000 as peer instead of backend. */
if (ctx->user_port == bpf_htons(60123)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
index c1a2b555e9ad..19cad93e612f 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port6.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -9,8 +9,9 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf_sockopt_helpers.h>
+
char _license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
struct svc_addr {
__be32 addr[4];
@@ -63,6 +64,9 @@ int connect6(struct bpf_sock_addr *ctx)
SEC("cgroup/getsockname6")
int getsockname6(struct bpf_sock_addr *ctx)
{
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
/* Expose local server as [fc00::1]:60000 to client. */
if (ctx->user_port == bpf_htons(60124)) {
ctx->user_ip6[0] = bpf_htonl(0xfc000000);
@@ -79,6 +83,9 @@ int getpeername6(struct bpf_sock_addr *ctx)
{
struct svc_addr *orig;
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
/* Expose service [fc00::1]:60000 as peer instead of backend. */
if (ctx->user_port == bpf_htons(60124)) {
orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
diff --git a/tools/testing/selftests/bpf/progs/connect_ping.c b/tools/testing/selftests/bpf/progs/connect_ping.c
new file mode 100644
index 000000000000..60178192b672
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_ping.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * Copyright 2022 Google LLC.
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <netinet/in.h>
+#include <sys/socket.h>
+
+/* 2001:db8::1 */
+#define BINDADDR_V6 { { { 0x20,0x01,0x0d,0xb8,0,0,0,0,0,0,0,0,0,0,0,1 } } }
+
+__u32 do_bind = 0;
+__u32 has_error = 0;
+__u32 invocations_v4 = 0;
+__u32 invocations_v6 = 0;
+
+SEC("cgroup/connect4")
+int connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in sa = {
+ .sin_family = AF_INET,
+ .sin_addr.s_addr = bpf_htonl(0x01010101),
+ };
+
+ __sync_fetch_and_add(&invocations_v4, 1);
+
+ if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+ has_error = 1;
+
+ return 1;
+}
+
+SEC("cgroup/connect6")
+int connect_v6_prog(struct bpf_sock_addr *ctx)
+{
+ struct sockaddr_in6 sa = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = BINDADDR_V6,
+ };
+
+ __sync_fetch_and_add(&invocations_v6, 1);
+
+ if (do_bind && bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)))
+ has_error = 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/connect_unix_prog.c b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
new file mode 100644
index 000000000000..2ef0e0c46d17
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/connect_unix")
+int connect_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern.c b/tools/testing/selftests/bpf/progs/core_kern.c
new file mode 100644
index 000000000000..004f2acef2eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define ATTR __always_inline
+#include "test_jhash.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, u32);
+ __type(value, u32);
+ __uint(max_entries, 256);
+} array2 SEC(".maps");
+
+static __noinline int randmap(int v, const struct net_device *dev)
+{
+ struct bpf_map *map = (struct bpf_map *)&array1;
+ int key = bpf_get_prandom_u32() & 0xff;
+ int *val;
+
+ if (bpf_get_prandom_u32() & 1)
+ map = (struct bpf_map *)&array2;
+
+ val = bpf_map_lookup_elem(map, &key);
+ if (val)
+ *val = bpf_get_prandom_u32() + v + dev->mtu;
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_devmap_xmit")
+int BPF_PROG(tp_xdp_devmap_xmit_multi, const struct net_device
+ *from_dev, const struct net_device *to_dev, int sent, int drops,
+ int err)
+{
+ return randmap(from_dev->ifindex, from_dev);
+}
+
+SEC("fentry/eth_type_trans")
+int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+SEC("fexit/eth_type_trans")
+int BPF_PROG(fexit_eth_type_trans, struct sk_buff *skb,
+ struct net_device *dev, unsigned short protocol)
+{
+ return randmap(dev->ifindex + skb->len, dev);
+}
+
+volatile const int never;
+
+struct __sk_bUfF /* it will not exist in vmlinux */ {
+ int len;
+} __attribute__((preserve_access_index));
+
+struct bpf_testmod_test_read_ctx /* it exists in bpf_testmod */ {
+ size_t len;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ void *ptr;
+ int nh_off, i = 0;
+
+ nh_off = 14;
+
+ /* pragma unroll doesn't work on large loops */
+#define C do { \
+ ptr = data + i; \
+ if (ptr + nh_off > data_end) \
+ break; \
+ ctx->tc_index = jhash(ptr, nh_off, ctx->cb[0] + i++); \
+ if (never) { \
+ /* below is a dead code with unresolvable CO-RE relo */ \
+ i += ((struct __sk_bUfF *)ctx)->len; \
+ /* this CO-RE relo may or may not resolve
+ * depending on whether bpf_testmod is loaded.
+ */ \
+ i += ((struct bpf_testmod_test_read_ctx *)ctx)->len; \
+ } \
+ } while (0);
+#define C30 C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;C;
+ C30;C30;C30; /* 90 calls */
+ return 0;
+}
+
+typedef int (*func_proto_typedef___match)(long);
+typedef int (*func_proto_typedef___doesnt_match)(char *);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef___match);
+
+int proto_out[3];
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out[0] = bpf_core_type_exists(func_proto_typedef___match);
+ proto_out[1] = bpf_core_type_exists(func_proto_typedef___doesnt_match);
+ proto_out[2] = bpf_core_type_exists(func_proto_typedef_nested1);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_kern_overflow.c b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
new file mode 100644
index 000000000000..f0d5652256ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/core_kern_overflow.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+typedef int (*func_proto_typedef)(long);
+typedef int (*func_proto_typedef_nested1)(func_proto_typedef);
+typedef int (*func_proto_typedef_nested2)(func_proto_typedef_nested1);
+
+int proto_out;
+
+SEC("raw_tracepoint/sys_enter")
+int core_relo_proto(void *ctx)
+{
+ proto_out = bpf_core_type_exists(func_proto_typedef_nested2);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 34d84717c946..fd8e1b4c6762 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -1,5 +1,10 @@
#include <stdint.h>
#include <stdbool.h>
+
+void preserce_ptr_sz_fn(long x) {}
+
+#define __bpf_aligned __attribute__((aligned(8)))
+
/*
* KERNEL
*/
@@ -8,6 +13,24 @@ struct core_reloc_kernel_output {
int valid[10];
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
+};
+
+/*
+ * MODULE
+ */
+
+struct core_reloc_module_output {
+ long long len;
+ long long off;
+ int read_ctx_sz;
+ bool read_ctx_exists;
+ bool buf_exists;
+ bool len_exists;
+ bool off_exists;
+ /* we have test_progs[-flavor], so cut flavor part */
+ char comm[sizeof("test_progs")];
+ int comm_len;
};
/*
@@ -444,51 +467,51 @@ struct core_reloc_primitives {
char a;
int b;
enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___diff_enum_def {
char a;
int b;
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
enum {
X = 100,
Y = 200,
- } c; /* inline enum def with differing set of values */
+ } c __bpf_aligned; /* inline enum def with differing set of values */
};
struct core_reloc_primitives___diff_func_proto {
- void (*f)(int); /* incompatible function prototype */
- void *d;
- enum core_reloc_primitives_enum c;
+ void (*f)(int) __bpf_aligned; /* incompatible function prototype */
+ void *d __bpf_aligned;
+ enum core_reloc_primitives_enum c __bpf_aligned;
int b;
char a;
};
struct core_reloc_primitives___diff_ptr_type {
- const char * const d; /* different pointee type + modifiers */
- char a;
+ const char * const d __bpf_aligned; /* different pointee type + modifiers */
+ char a __bpf_aligned;
int b;
enum core_reloc_primitives_enum c;
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_enum {
char a[1];
int b;
int c; /* int instead of enum */
- void *d;
- int (*f)(const char *);
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_int {
char a[1];
- int *b; /* ptr instead of int */
- enum core_reloc_primitives_enum c;
- void *d;
- int (*f)(const char *);
+ int *b __bpf_aligned; /* ptr instead of int */
+ enum core_reloc_primitives_enum c __bpf_aligned;
+ void *d __bpf_aligned;
+ int (*f)(const char *) __bpf_aligned;
};
struct core_reloc_primitives___err_non_ptr {
@@ -496,7 +519,7 @@ struct core_reloc_primitives___err_non_ptr {
int b;
enum core_reloc_primitives_enum c;
int d; /* int instead of ptr */
- int (*f)(const char *);
+ int (*f)(const char *) __bpf_aligned;
};
/*
@@ -507,7 +530,7 @@ struct core_reloc_mods_output {
};
typedef const int int_t;
-typedef const char *char_ptr_t;
+typedef const char *char_ptr_t __bpf_aligned;
typedef const int arr_t[7];
struct core_reloc_mods_substruct {
@@ -523,9 +546,9 @@ typedef struct {
struct core_reloc_mods {
int a;
int_t b;
- char *c;
+ char *c __bpf_aligned;
char_ptr_t d;
- int e[3];
+ int e[3] __bpf_aligned;
arr_t f;
struct core_reloc_mods_substruct g;
core_reloc_mods_substruct_t h;
@@ -535,9 +558,9 @@ struct core_reloc_mods {
struct core_reloc_mods___mod_swap {
int b;
int_t a;
- char *d;
+ char *d __bpf_aligned;
char_ptr_t c;
- int f[3];
+ int f[3] __bpf_aligned;
arr_t e;
struct {
int y;
@@ -555,7 +578,7 @@ typedef arr1_t arr2_t;
typedef arr2_t arr3_t;
typedef arr3_t arr4_t;
-typedef const char * const volatile fancy_char_ptr_t;
+typedef const char * const volatile fancy_char_ptr_t __bpf_aligned;
typedef core_reloc_mods_substruct_t core_reloc_mods_substruct_tt;
@@ -567,7 +590,7 @@ struct core_reloc_mods___typedefs {
arr4_t e;
fancy_char_ptr_t d;
fancy_char_ptr_t c;
- int3_t b;
+ int3_t b __bpf_aligned;
int3_t a;
};
@@ -647,7 +670,7 @@ struct core_reloc_misc_extensible {
};
/*
- * EXISTENCE
+ * FIELD EXISTENCE
*/
struct core_reloc_existence_output {
int a_exists;
@@ -678,27 +701,11 @@ struct core_reloc_existence___minimal {
int a;
};
-struct core_reloc_existence___err_wrong_int_sz {
- short a;
-};
-
-struct core_reloc_existence___err_wrong_int_type {
+struct core_reloc_existence___wrong_field_defs {
+ void *a;
int b[1];
-};
-
-struct core_reloc_existence___err_wrong_int_kind {
struct{ int x; } c;
-};
-
-struct core_reloc_existence___err_wrong_arr_kind {
int arr;
-};
-
-struct core_reloc_existence___err_wrong_arr_value_type {
- short arr[1];
-};
-
-struct core_reloc_existence___err_wrong_struct_type {
int s;
};
@@ -739,19 +746,19 @@ struct core_reloc_bitfields___bit_sz_change {
int8_t sb4: 1; /* 4 -> 1 */
int32_t sb20: 30; /* 20 -> 30 */
/* non-bitfields */
- uint16_t u32; /* 32 -> 16 */
- int64_t s32; /* 32 -> 64 */
+ uint16_t u32; /* 32 -> 16 */
+ int64_t s32 __bpf_aligned; /* 32 -> 64 */
};
/* turn bitfield into non-bitfield and vice versa */
struct core_reloc_bitfields___bitfield_vs_int {
uint64_t ub1; /* 3 -> 64 non-bitfield */
uint8_t ub2; /* 20 -> 8 non-bitfield */
- int64_t ub7; /* 7 -> 64 non-bitfield signed */
- int64_t sb4; /* 4 -> 64 non-bitfield signed */
- uint64_t sb20; /* 20 -> 16 non-bitfield unsigned */
- int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
- uint64_t s32: 60; /* 32 non-bitfield -> 60 bitfield */
+ int64_t ub7 __bpf_aligned; /* 7 -> 64 non-bitfield signed */
+ int64_t sb4 __bpf_aligned; /* 4 -> 64 non-bitfield signed */
+ uint64_t sb20 __bpf_aligned; /* 20 -> 16 non-bitfield unsigned */
+ int32_t u32: 20; /* 32 non-bitfield -> 20 bitfield */
+ uint64_t s32: 60 __bpf_aligned; /* 32 non-bitfield -> 60 bitfield */
};
struct core_reloc_bitfields___just_big_enough {
@@ -779,12 +786,21 @@ struct core_reloc_bitfields___err_too_big_bitfield {
*/
struct core_reloc_size_output {
int int_sz;
+ int int_off;
int struct_sz;
+ int struct_off;
int union_sz;
+ int union_off;
int arr_sz;
+ int arr_off;
int arr_elem_sz;
+ int arr_elem_off;
int ptr_sz;
+ int ptr_off;
int enum_sz;
+ int enum_off;
+ int float_sz;
+ int float_off;
};
struct core_reloc_size {
@@ -794,6 +810,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
struct core_reloc_size___diff_sz {
@@ -803,4 +820,534 @@ struct core_reloc_size___diff_sz {
char arr_field[10];
void *ptr_field;
enum { OTHER_VALUE = 0xFFFFFFFFFFFFFFFF } enum_field;
+ double float_field;
+};
+
+struct core_reloc_size___diff_offs {
+ float float_field;
+ enum { YET_OTHER_VALUE = 123 } enum_field;
+ void *ptr_field;
+ int arr_field[4];
+ union { int x; } union_field;
+ struct { int x; } struct_field;
+ int int_field;
+};
+
+/* Error case of two candidates with the fields (int_field) at the same
+ * offset, but with differing final relocation values: size 4 vs size 1
+ */
+struct core_reloc_size___err_ambiguous1 {
+ /* int at offset 0 */
+ int int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___1 = 123 } enum_field;
+ float float_field;
+};
+
+struct core_reloc_size___err_ambiguous2 {
+ /* char at offset 0 */
+ char int_field;
+
+ struct { int x; } struct_field;
+ union { int x; } union_field;
+ int arr_field[4];
+ void *ptr_field;
+ enum { VALUE___2 = 123 } enum_field;
+ float float_field;
+};
+
+/*
+ * TYPE EXISTENCE, MATCH & SIZE
+ */
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool complex_struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+struct a_struct {
+ int x;
+};
+
+struct a_complex_struct {
+ union {
+ struct a_struct * restrict a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based {
+ struct a_struct f1;
+ struct a_complex_struct f2;
+ union a_union f3;
+ enum an_enum f4;
+ named_struct_typedef f5;
+ anon_struct_typedef f6;
+ struct_ptr_typedef f7;
+ int_typedef f8;
+ enum_typedef f9;
+ void_ptr_typedef f10;
+ restrict_ptr_typedef f11;
+ func_proto_typedef f12;
+ arr_typedef f13;
+};
+
+/* no types in target */
+struct core_reloc_type_based___all_missing {
+};
+
+/* different member orders, enum variant values, signedness, etc */
+struct a_struct___diff {
+ int x;
+ int a;
+};
+
+struct a_struct___forward;
+
+struct a_complex_struct___diff {
+ union {
+ struct a_struct___forward *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
+union a_union___diff {
+ int z;
+ int y;
+};
+
+typedef struct a_struct___diff named_struct_typedef___diff;
+
+typedef struct { int z, x, y; } anon_struct_typedef___diff;
+
+typedef struct {
+ int c;
+ int b;
+ int a;
+} *struct_ptr_typedef___diff;
+
+enum an_enum___diff {
+ AN_ENUM_VAL2___diff = 0,
+ AN_ENUM_VAL1___diff = 42,
+ AN_ENUM_VAL3___diff = 1,
+};
+
+typedef unsigned int int_typedef___diff;
+
+typedef enum { TYPEDEF_ENUM_VAL2___diff, TYPEDEF_ENUM_VAL1___diff = 50 } enum_typedef___diff;
+
+typedef const void *void_ptr_typedef___diff;
+
+typedef int_typedef___diff (*func_proto_typedef___diff)(long);
+
+typedef char arr_typedef___diff[3];
+
+struct core_reloc_type_based___diff {
+ struct a_struct___diff f1;
+ struct a_complex_struct___diff f2;
+ union a_union___diff f3;
+ enum an_enum___diff f4;
+ named_struct_typedef___diff f5;
+ anon_struct_typedef___diff f6;
+ struct_ptr_typedef___diff f7;
+ int_typedef___diff f8;
+ enum_typedef___diff f9;
+ void_ptr_typedef___diff f10;
+ func_proto_typedef___diff f11;
+ arr_typedef___diff f12;
+};
+
+/* different type sizes, extra modifiers, anon vs named enums, etc */
+struct a_struct___diff_sz {
+ long x;
+ int y;
+ char z;
+};
+
+union a_union___diff_sz {
+ char yy;
+ char zz;
+};
+
+typedef struct a_struct___diff_sz named_struct_typedef___diff_sz;
+
+typedef struct { long xx, yy, zzz; } anon_struct_typedef___diff_sz;
+
+typedef struct {
+ char aa[1], bb[2], cc[3];
+} *struct_ptr_typedef___diff_sz;
+
+enum an_enum___diff_sz {
+ AN_ENUM_VAL1___diff_sz = 0x123412341234,
+ AN_ENUM_VAL2___diff_sz = 2,
+};
+
+typedef unsigned long int_typedef___diff_sz;
+
+typedef enum an_enum___diff_sz enum_typedef___diff_sz;
+
+typedef const void * const void_ptr_typedef___diff_sz;
+
+typedef int_typedef___diff_sz (*func_proto_typedef___diff_sz)(char);
+
+typedef int arr_typedef___diff_sz[2];
+
+struct core_reloc_type_based___diff_sz {
+ struct a_struct___diff_sz f1;
+ union a_union___diff_sz f2;
+ enum an_enum___diff_sz f3;
+ named_struct_typedef___diff_sz f4;
+ anon_struct_typedef___diff_sz f5;
+ struct_ptr_typedef___diff_sz f6;
+ int_typedef___diff_sz f7;
+ enum_typedef___diff_sz f8;
+ void_ptr_typedef___diff_sz f9;
+ func_proto_typedef___diff_sz f10;
+ arr_typedef___diff_sz f11;
+};
+
+/* incompatibilities between target and local types */
+union a_struct___incompat { /* union instead of struct */
+ int x;
+};
+
+struct a_union___incompat { /* struct instead of union */
+ int y;
+ int z;
+};
+
+/* typedef to union, not to struct */
+typedef union a_struct___incompat named_struct_typedef___incompat;
+
+/* typedef to void pointer, instead of struct */
+typedef void *anon_struct_typedef___incompat;
+
+/* extra pointer indirection */
+typedef struct {
+ int a, b, c;
+} **struct_ptr_typedef___incompat;
+
+/* typedef of a struct with int, instead of int */
+typedef struct { int x; } int_typedef___incompat;
+
+/* typedef to func_proto, instead of enum */
+typedef int (*enum_typedef___incompat)(void);
+
+/* pointer to char instead of void */
+typedef char *void_ptr_typedef___incompat;
+
+/* void return type instead of int */
+typedef void (*func_proto_typedef___incompat)(long);
+
+/* multi-dimensional array instead of a single-dimensional */
+typedef int arr_typedef___incompat[20][2];
+
+struct core_reloc_type_based___incompat {
+ union a_struct___incompat f1;
+ struct a_union___incompat f2;
+ /* the only valid one is enum, to check that something still succeeds */
+ enum an_enum f3;
+ named_struct_typedef___incompat f4;
+ anon_struct_typedef___incompat f5;
+ struct_ptr_typedef___incompat f6;
+ int_typedef___incompat f7;
+ enum_typedef___incompat f8;
+ void_ptr_typedef___incompat f9;
+ func_proto_typedef___incompat f10;
+ arr_typedef___incompat f11;
+};
+
+/* func_proto with incompatible signature */
+typedef void (*func_proto_typedef___fn_wrong_ret1)(long);
+typedef int * (*func_proto_typedef___fn_wrong_ret2)(long);
+typedef struct { int x; } int_struct_typedef;
+typedef int_struct_typedef (*func_proto_typedef___fn_wrong_ret3)(long);
+typedef int (*func_proto_typedef___fn_wrong_arg)(void *);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt1)(long, long);
+typedef int (*func_proto_typedef___fn_wrong_arg_cnt2)(void);
+
+struct core_reloc_type_based___fn_wrong_args {
+ /* one valid type to make sure relos still work */
+ struct a_struct f1;
+ func_proto_typedef___fn_wrong_ret1 f2;
+ func_proto_typedef___fn_wrong_ret2 f3;
+ func_proto_typedef___fn_wrong_ret3 f4;
+ func_proto_typedef___fn_wrong_arg f5;
+ func_proto_typedef___fn_wrong_arg_cnt1 f6;
+ func_proto_typedef___fn_wrong_arg_cnt2 f7;
+};
+
+/*
+ * TYPE ID MAPPING (LOCAL AND TARGET)
+ */
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+struct core_reloc_type_id {
+ struct a_struct f1;
+ union a_union f2;
+ enum an_enum f3;
+ named_struct_typedef f4;
+ func_proto_typedef f5;
+ arr_typedef f6;
+};
+
+struct core_reloc_type_id___missing_targets {
+ /* nothing */
+};
+
+/*
+ * ENUMERATOR VALUE EXISTENCE AND VALUE RELOCATION
+ */
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval {
+ enum named_enum f1;
+ anon_enum f2;
+};
+
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val {
+ enum named_unsigned_enum64 f1;
+ enum named_signed_enum64 f2;
+};
+
+/* differing enumerator values */
+enum named_enum___diff {
+ NAMED_ENUM_VAL1___diff = 101,
+ NAMED_ENUM_VAL2___diff = 202,
+ NAMED_ENUM_VAL3___diff = 303,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___diff = 0x11,
+ ANON_ENUM_VAL2___diff = 0x22,
+ ANON_ENUM_VAL3___diff = 0x33,
+} anon_enum___diff;
+
+struct core_reloc_enumval___diff {
+ enum named_enum___diff f1;
+ anon_enum___diff f2;
+};
+
+enum named_unsigned_enum64___diff {
+ UNSIGNED_ENUM64_VAL1___diff = 0x101ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___diff = 0x202ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___diff = 0x303ffffffffULL,
+};
+
+enum named_signed_enum64___diff {
+ SIGNED_ENUM64_VAL1___diff = -101,
+ SIGNED_ENUM64_VAL2___diff = -202,
+ SIGNED_ENUM64_VAL3___diff = -303,
+};
+
+struct core_reloc_enum64val___diff {
+ enum named_unsigned_enum64___diff f1;
+ enum named_signed_enum64___diff f2;
+};
+
+/* missing (optional) third enum value */
+enum named_enum___val3_missing {
+ NAMED_ENUM_VAL1___val3_missing = 111,
+ NAMED_ENUM_VAL2___val3_missing = 222,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___val3_missing = 0x111,
+ ANON_ENUM_VAL2___val3_missing = 0x222,
+} anon_enum___val3_missing;
+
+struct core_reloc_enumval___val3_missing {
+ enum named_enum___val3_missing f1;
+ anon_enum___val3_missing f2;
+};
+
+enum named_unsigned_enum64___val3_missing {
+ UNSIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffULL,
+ UNSIGNED_ENUM64_VAL2___val3_missing = 0x222,
+};
+
+enum named_signed_enum64___val3_missing {
+ SIGNED_ENUM64_VAL1___val3_missing = 0x111ffffffffLL,
+ SIGNED_ENUM64_VAL2___val3_missing = -222,
+};
+
+struct core_reloc_enum64val___val3_missing {
+ enum named_unsigned_enum64___val3_missing f1;
+ enum named_signed_enum64___val3_missing f2;
+};
+
+/* missing (mandatory) second enum value, should fail */
+enum named_enum___err_missing {
+ NAMED_ENUM_VAL1___err_missing = 1,
+ NAMED_ENUM_VAL3___err_missing = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1___err_missing = 0x111,
+ ANON_ENUM_VAL3___err_missing = 0x222,
+} anon_enum___err_missing;
+
+struct core_reloc_enumval___err_missing {
+ enum named_enum___err_missing f1;
+ anon_enum___err_missing f2;
+};
+
+enum named_unsigned_enum64___err_missing {
+ UNSIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL3___err_missing = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64___err_missing {
+ SIGNED_ENUM64_VAL1___err_missing = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL3___err_missing = -3,
+};
+
+struct core_reloc_enum64val___err_missing {
+ enum named_unsigned_enum64___err_missing f1;
+ enum named_signed_enum64___err_missing f2;
};
diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h
new file mode 100644
index 000000000000..c705d8112a35
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_common.h
@@ -0,0 +1,123 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _CPUMASK_COMMON_H
+#define _CPUMASK_COMMON_H
+
+#include "errno.h"
+#include <stdbool.h>
+
+int err;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(MASK) static struct bpf_cpumask __kptr * global_mask;
+
+struct __cpumask_map_value {
+ struct bpf_cpumask __kptr * cpumask;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct __cpumask_map_value);
+ __uint(max_entries, 1);
+} __cpumask_map SEC(".maps");
+
+struct bpf_cpumask *bpf_cpumask_create(void) __ksym __weak;
+void bpf_cpumask_release(struct bpf_cpumask *cpumask) __ksym __weak;
+struct bpf_cpumask *bpf_cpumask_acquire(struct bpf_cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym __weak;
+u32 bpf_cpumask_first_and(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_cpu(u32 cpu, const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_set_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_test_and_clear_cpu(u32 cpu, struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_setall(struct bpf_cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_clear(struct bpf_cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_and(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_or(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+void bpf_cpumask_xor(struct bpf_cpumask *cpumask,
+ const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_equal(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_intersects(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_subset(const struct cpumask *src1, const struct cpumask *src2) __ksym __weak;
+bool bpf_cpumask_empty(const struct cpumask *cpumask) __ksym __weak;
+bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym __weak;
+void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym __weak;
+u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1,
+ const struct cpumask *src2) __ksym __weak;
+u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym __weak;
+
+void bpf_rcu_read_lock(void) __ksym __weak;
+void bpf_rcu_read_unlock(void) __ksym __weak;
+
+static inline const struct cpumask *cast(struct bpf_cpumask *cpumask)
+{
+ return (const struct cpumask *)cpumask;
+}
+
+static inline struct bpf_cpumask *create_cpumask(void)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = bpf_cpumask_create();
+ if (!cpumask) {
+ err = 1;
+ return NULL;
+ }
+
+ if (!bpf_cpumask_empty(cast(cpumask))) {
+ err = 2;
+ bpf_cpumask_release(cpumask);
+ return NULL;
+ }
+
+ return cpumask;
+}
+
+static inline struct __cpumask_map_value *cpumask_map_value_lookup(void)
+{
+ u32 key = 0;
+
+ return bpf_map_lookup_elem(&__cpumask_map, &key);
+}
+
+static inline int cpumask_map_insert(struct bpf_cpumask *mask)
+{
+ struct __cpumask_map_value local, *v;
+ long status;
+ struct bpf_cpumask *old;
+ u32 key = 0;
+
+ local.cpumask = NULL;
+ status = bpf_map_update_elem(&__cpumask_map, &key, &local, 0);
+ if (status) {
+ bpf_cpumask_release(mask);
+ return status;
+ }
+
+ v = bpf_map_lookup_elem(&__cpumask_map, &key);
+ if (!v) {
+ bpf_cpumask_release(mask);
+ return -ENOENT;
+ }
+
+ old = bpf_kptr_xchg(&v->cpumask, mask);
+ if (old) {
+ bpf_cpumask_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _CPUMASK_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/cpumask_failure.c b/tools/testing/selftests/bpf/progs/cpumask_failure.c
new file mode 100644
index 000000000000..a9bf6ea336cf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_failure.c
@@ -0,0 +1,192 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_alloc_no_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+ __sink(cpumask);
+
+ /* cpumask is never released. */
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_alloc_double_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+
+ /* cpumask is released twice. */
+ bpf_cpumask_release(cpumask);
+ bpf_cpumask_release(cpumask);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("must be referenced")
+int BPF_PROG(test_acquire_wrong_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ /* Can't acquire a non-struct bpf_cpumask. */
+ cpumask = bpf_cpumask_acquire((struct bpf_cpumask *)task->cpus_ptr);
+ __sink(cpumask);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("bpf_cpumask_set_cpu args#1 expected pointer to STRUCT bpf_cpumask")
+int BPF_PROG(test_mutate_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ /* Can't set the CPU of a non-struct bpf_cpumask. */
+ bpf_cpumask_set_cpu(0, (struct bpf_cpumask *)task->cpus_ptr);
+ __sink(cpumask);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(test_insert_remove_no_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+ struct __cpumask_map_value *v;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask))
+ return 0;
+
+ v = cpumask_map_value_lookup();
+ if (!v)
+ return 0;
+
+ cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+
+ /* cpumask is never released. */
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg0")
+int BPF_PROG(test_cpumask_null, struct task_struct *task, u64 clone_flags)
+{
+ /* NULL passed to KF_TRUSTED_ARGS kfunc. */
+ bpf_cpumask_empty(NULL);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be a rcu pointer")
+int BPF_PROG(test_global_mask_out_of_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_rcu_read_unlock();
+
+ /* RCU region is exited before calling KF_RCU kfunc. */
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("NULL pointer passed to trusted arg1")
+int BPF_PROG(test_global_mask_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+
+ /* No NULL check is performed on global cpumask kptr. */
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int BPF_PROG(test_global_mask_rcu_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *prev, *curr;
+
+ curr = bpf_cpumask_create();
+ if (!curr)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ if (prev)
+ bpf_cpumask_release(prev);
+
+ bpf_rcu_read_lock();
+ curr = global_mask;
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL | MEM_RCU passed to bpf_kptr_xchg() */
+ prev = bpf_kptr_xchg(&global_mask, curr);
+ bpf_rcu_read_unlock();
+ if (prev)
+ bpf_cpumask_release(prev);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c
new file mode 100644
index 000000000000..7a1e64c6c065
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cpumask_success.c
@@ -0,0 +1,527 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "cpumask_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, nr_cpus;
+
+static bool is_test_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+static bool create_cpumask_set(struct bpf_cpumask **out1,
+ struct bpf_cpumask **out2,
+ struct bpf_cpumask **out3,
+ struct bpf_cpumask **out4)
+{
+ struct bpf_cpumask *mask1, *mask2, *mask3, *mask4;
+
+ mask1 = create_cpumask();
+ if (!mask1)
+ return false;
+
+ mask2 = create_cpumask();
+ if (!mask2) {
+ bpf_cpumask_release(mask1);
+ err = 3;
+ return false;
+ }
+
+ mask3 = create_cpumask();
+ if (!mask3) {
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ err = 4;
+ return false;
+ }
+
+ mask4 = create_cpumask();
+ if (!mask4) {
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(mask3);
+ err = 5;
+ return false;
+ }
+
+ *out1 = mask1;
+ *out2 = mask2;
+ *out3 = mask3;
+ *out4 = mask4;
+
+ return true;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_alloc_free_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_set_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_set_cpu(0, cpumask);
+ if (!bpf_cpumask_test_cpu(0, cast(cpumask))) {
+ err = 3;
+ goto release_exit;
+ }
+
+ bpf_cpumask_clear_cpu(0, cpumask);
+ if (bpf_cpumask_test_cpu(0, cast(cpumask))) {
+ err = 4;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_setall_clear_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ bpf_cpumask_setall(cpumask);
+ if (!bpf_cpumask_full(cast(cpumask))) {
+ err = 3;
+ goto release_exit;
+ }
+
+ bpf_cpumask_clear(cpumask);
+ if (!bpf_cpumask_empty(cast(cpumask))) {
+ err = 4;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_first_firstzero_cpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (bpf_cpumask_first(cast(cpumask)) < nr_cpus) {
+ err = 3;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_first_zero(cast(cpumask)) != 0) {
+ bpf_printk("first zero: %d", bpf_cpumask_first_zero(cast(cpumask)));
+ err = 4;
+ goto release_exit;
+ }
+
+ bpf_cpumask_set_cpu(0, cpumask);
+ if (bpf_cpumask_first(cast(cpumask)) != 0) {
+ err = 5;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_first_zero(cast(cpumask)) != 1) {
+ err = 6;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_firstand_nocpu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+ u32 first;
+
+ if (!is_test_task())
+ return 0;
+
+ mask1 = create_cpumask();
+ if (!mask1)
+ return 0;
+
+ mask2 = create_cpumask();
+ if (!mask2)
+ goto release_exit;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+
+ first = bpf_cpumask_first_and(cast(mask1), cast(mask2));
+ if (first <= 1)
+ err = 3;
+
+release_exit:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_test_and_set_clear, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ if (!is_test_task())
+ return 0;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+ err = 3;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_test_and_set_cpu(0, cpumask)) {
+ err = 4;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_test_and_clear_cpu(0, cpumask)) {
+ err = 5;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(cpumask);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_and_or_xor, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+
+ if (bpf_cpumask_and(dst1, cast(mask1), cast(mask2))) {
+ err = 6;
+ goto release_exit;
+ }
+ if (!bpf_cpumask_empty(cast(dst1))) {
+ err = 7;
+ goto release_exit;
+ }
+
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_test_cpu(0, cast(dst1))) {
+ err = 8;
+ goto release_exit;
+ }
+ if (!bpf_cpumask_test_cpu(1, cast(dst1))) {
+ err = 9;
+ goto release_exit;
+ }
+
+ bpf_cpumask_xor(dst2, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+ err = 10;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_intersects_subset, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+ if (bpf_cpumask_intersects(cast(mask1), cast(mask2))) {
+ err = 6;
+ goto release_exit;
+ }
+
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+ if (!bpf_cpumask_subset(cast(mask1), cast(dst1))) {
+ err = 7;
+ goto release_exit;
+ }
+
+ if (!bpf_cpumask_subset(cast(mask2), cast(dst1))) {
+ err = 8;
+ goto release_exit;
+ }
+
+ if (bpf_cpumask_subset(cast(dst1), cast(mask1))) {
+ err = 9;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_copy_any_anyand, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2, *dst1, *dst2;
+ int cpu;
+
+ if (!is_test_task())
+ return 0;
+
+ if (!create_cpumask_set(&mask1, &mask2, &dst1, &dst2))
+ return 0;
+
+ bpf_cpumask_set_cpu(0, mask1);
+ bpf_cpumask_set_cpu(1, mask2);
+ bpf_cpumask_or(dst1, cast(mask1), cast(mask2));
+
+ cpu = bpf_cpumask_any_distribute(cast(mask1));
+ if (cpu != 0) {
+ err = 6;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
+ if (cpu < nr_cpus) {
+ err = 7;
+ goto release_exit;
+ }
+
+ bpf_cpumask_copy(dst2, cast(dst1));
+ if (!bpf_cpumask_equal(cast(dst1), cast(dst2))) {
+ err = 8;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_distribute(cast(dst2));
+ if (cpu > 1) {
+ err = 9;
+ goto release_exit;
+ }
+
+ cpu = bpf_cpumask_any_and_distribute(cast(mask1), cast(mask2));
+ if (cpu < nr_cpus) {
+ err = 10;
+ goto release_exit;
+ }
+
+release_exit:
+ bpf_cpumask_release(mask1);
+ bpf_cpumask_release(mask2);
+ bpf_cpumask_release(dst1);
+ bpf_cpumask_release(dst2);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_leave, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask))
+ err = 3;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_insert_remove_release, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *cpumask;
+ struct __cpumask_map_value *v;
+
+ cpumask = create_cpumask();
+ if (!cpumask)
+ return 0;
+
+ if (cpumask_map_insert(cpumask)) {
+ err = 3;
+ return 0;
+ }
+
+ v = cpumask_map_value_lookup();
+ if (!v) {
+ err = 4;
+ return 0;
+ }
+
+ cpumask = bpf_kptr_xchg(&v->cpumask, NULL);
+ if (cpumask)
+ bpf_cpumask_release(cpumask);
+ else
+ err = 5;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local, *prev;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ prev = bpf_kptr_xchg(&global_mask, local);
+ if (prev) {
+ bpf_cpumask_release(prev);
+ err = 3;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ local = global_mask;
+ if (!local) {
+ err = 4;
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)local);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *local;
+
+ if (!is_test_task())
+ return 0;
+
+ local = create_cpumask();
+ if (!local)
+ return 0;
+
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 3;
+ goto out;
+ }
+
+ bpf_cpumask_set_cpu(0, local);
+ if (bpf_cpumask_weight(cast(local)) != 1) {
+ err = 4;
+ goto out;
+ }
+
+ /*
+ * Make sure that adding additional CPUs changes the weight. Test to
+ * see whether the CPU was set to account for running on UP machines.
+ */
+ bpf_cpumask_set_cpu(1, local);
+ if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) {
+ err = 5;
+ goto out;
+ }
+
+ bpf_cpumask_clear(local);
+ if (bpf_cpumask_weight(cast(local)) != 0) {
+ err = 6;
+ goto out;
+ }
+out:
+ bpf_cpumask_release(local);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags)
+{
+ struct bpf_cpumask *mask1, *mask2;
+
+ mask1 = bpf_cpumask_create();
+ mask2 = bpf_cpumask_create();
+
+ if (!mask1 || !mask2)
+ goto free_masks_return;
+
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask1);
+ bpf_cpumask_test_cpu(0, (const struct cpumask *)mask2);
+
+free_masks_return:
+ if (mask1)
+ bpf_cpumask_release(mask1);
+ if (mask2)
+ bpf_cpumask_release(mask2);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/decap_sanity.c b/tools/testing/selftests/bpf/progs/decap_sanity.c
new file mode 100644
index 000000000000..bd3c657c58a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/decap_sanity.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define UDP_TEST_PORT 7777
+
+void *bpf_cast_to_kern_ctx(void *) __ksym;
+bool init_csum_partial = false;
+bool final_csum_none = false;
+bool broken_csum_start = false;
+
+static unsigned int skb_headlen(const struct sk_buff *skb)
+{
+ return skb->len - skb->data_len;
+}
+
+static unsigned int skb_headroom(const struct sk_buff *skb)
+{
+ return skb->data - skb->head;
+}
+
+static int skb_checksum_start_offset(const struct sk_buff *skb)
+{
+ return skb->csum_start - skb_headroom(skb);
+}
+
+SEC("tc")
+int decap_sanity(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb;
+ struct ipv6hdr ip6h;
+ struct udphdr udph;
+ int err;
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IPV6))
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &ip6h, sizeof(ip6h)))
+ return TC_ACT_SHOT;
+
+ if (ip6h.nexthdr != IPPROTO_UDP)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + sizeof(ip6h), &udph, sizeof(udph)))
+ return TC_ACT_SHOT;
+
+ if (udph.dest != __bpf_constant_htons(UDP_TEST_PORT))
+ return TC_ACT_SHOT;
+
+ kskb = bpf_cast_to_kern_ctx(skb);
+ init_csum_partial = (kskb->ip_summed == CHECKSUM_PARTIAL);
+ err = bpf_skb_adjust_room(skb, -(s32)(ETH_HLEN + sizeof(ip6h) + sizeof(udph)),
+ 1, BPF_F_ADJ_ROOM_FIXED_GSO);
+ if (err)
+ return TC_ACT_SHOT;
+ final_csum_none = (kskb->ip_summed == CHECKSUM_NONE);
+ if (kskb->ip_summed == CHECKSUM_PARTIAL &&
+ (unsigned int)skb_checksum_start_offset(kskb) >= skb_headlen(kskb))
+ broken_csum_start = true;
+
+ return TC_ACT_SHOT;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/dev_cgroup.c b/tools/testing/selftests/bpf/progs/dev_cgroup.c
index 8924e06bdef0..79b54a4fa244 100644
--- a/tools/testing/selftests/bpf/progs/dev_cgroup.c
+++ b/tools/testing/selftests/bpf/progs/dev_cgroup.c
@@ -57,4 +57,3 @@ int bpf_prog1(struct bpf_cgroup_dev_ctx *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
new file mode 100644
index 000000000000..0bf969a0b5ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_fail.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops.s/test_2")
+__failure __msg("attach to unsupported member test_2 of struct bpf_dummy_ops")
+int BPF_PROG(test_unsupported_field_sleepable,
+ struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ /* Tries to mark an unsleepable field in struct bpf_dummy_ops as sleepable. */
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = NULL,
+ .test_2 = (void *)test_unsupported_field_sleepable,
+ .test_sleepable = (void *)NULL,
+};
diff --git a/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
new file mode 100644
index 000000000000..1efa746c25dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dummy_st_ops_success.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1, struct bpf_dummy_ops_state *state)
+{
+ int ret;
+
+ if (!state)
+ return 0xf2f3f4f5;
+
+ ret = state->val;
+ state->val = 0x5a;
+ return ret;
+}
+
+__u64 test_2_args[5];
+
+SEC("struct_ops/test_2")
+int BPF_PROG(test_2, struct bpf_dummy_ops_state *state, int a1, unsigned short a2,
+ char a3, unsigned long a4)
+{
+ test_2_args[0] = (unsigned long)state;
+ test_2_args[1] = a1;
+ test_2_args[2] = a2;
+ test_2_args[3] = a3;
+ test_2_args[4] = a4;
+ return 0;
+}
+
+SEC("struct_ops.s/test_sleepable")
+int BPF_PROG(test_sleepable, struct bpf_dummy_ops_state *state)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .test_sleepable = (void *)test_sleepable,
+};
diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c
new file mode 100644
index 000000000000..7ce7e827d5f0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c
@@ -0,0 +1,1688 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct test_info {
+ int x;
+ struct bpf_dynptr ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct bpf_dynptr);
+} array_map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct test_info);
+} array_map2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map3 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map4 SEC(".maps");
+
+struct sample {
+ int pid;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+int err, val;
+
+static int get_map_val_dynptr(struct bpf_dynptr *ptr)
+{
+ __u32 key = 0, *map_val;
+
+ bpf_map_update_elem(&array_map3, &key, &val, 0);
+
+ map_val = bpf_map_lookup_elem(&array_map3, &key);
+ if (!map_val)
+ return -ENOENT;
+
+ bpf_dynptr_from_mem(map_val, sizeof(*map_val), 0, ptr);
+
+ return 0;
+}
+
+/* Every bpf_ringbuf_reserve_dynptr call must have a corresponding
+ * bpf_ringbuf_submit/discard_dynptr call
+ */
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=2")
+int ringbuf_missing_release1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=4")
+int ringbuf_missing_release2(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+ sample = bpf_dynptr_data(&ptr1, 0, sizeof(*sample));
+ if (!sample) {
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+ return 0;
+ }
+
+ bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr on ptr2 */
+
+ return 0;
+}
+
+static int missing_release_callback_fn(__u32 index, void *data)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* missing a call to bpf_ringbuf_discard/submit_dynptr */
+
+ return 0;
+}
+
+/* Any dynptr initialized within a callback must have bpf_dynptr_put called */
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id")
+int ringbuf_missing_release_callback(void *ctx)
+{
+ bpf_loop(10, missing_release_callback_fn, NULL, 0);
+ return 0;
+}
+
+/* Can't call bpf_ringbuf_submit/discard_dynptr on a non-initialized dynptr */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int ringbuf_release_uninit_dynptr(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A dynptr can't be used after it has been invalidated */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int use_after_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(read_data), 0, &ptr);
+
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ return 0;
+}
+
+/* Can't call non-dynptr ringbuf APIs on a dynptr ringbuf sample */
+SEC("?raw_tp")
+__failure __msg("type=mem expected=ringbuf_mem")
+int ringbuf_invalid_api(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+ sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 123;
+
+ /* invalid API use. need to use dynptr API to submit/discard */
+ bpf_ringbuf_submit(sample, 0);
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Can't add a dynptr to a map */
+SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
+int add_dynptr_to_map1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ int key = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ /* this should fail */
+ bpf_map_update_elem(&array_map1, &key, &ptr, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Can't add a struct with an embedded dynptr to a map */
+SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
+int add_dynptr_to_map2(void *ctx)
+{
+ struct test_info x;
+ int key = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &x.ptr);
+
+ /* this should fail */
+ bpf_map_update_elem(&array_map2, &key, &x, 0);
+
+ bpf_ringbuf_submit_dynptr(&x.ptr, 0);
+
+ return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_ringbuf(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, 8);
+ if (!data)
+ goto done;
+
+ /* can't index out of bounds of the data slice */
+ val = *((char *)data + 8);
+
+done:
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be accessed out of bounds */
+SEC("?tc")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_skb(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ *(__u8*)(hdr + 1) = 1;
+
+ return SK_PASS;
+}
+
+SEC("?raw_tp")
+__failure __msg("value is outside of the allowed memory range")
+int data_slice_out_of_bounds_map_value(void *ctx)
+{
+ __u32 map_val;
+ struct bpf_dynptr ptr;
+ void *data;
+
+ get_map_val_dynptr(&ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, sizeof(map_val));
+ if (!data)
+ return 0;
+
+ /* can't index out of bounds of the data slice */
+ val = *((char *)data + (sizeof(map_val) + 1));
+
+ return 0;
+}
+
+/* A data slice can't be used after it has been released */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int data_slice_use_after_release1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr);
+ sample = bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 123;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ val = sample->pid;
+
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be used after it has been released.
+ *
+ * This tests the case where the data slice tracks a dynptr (ptr2)
+ * that is at a non-zero offset from the frame pointer (ptr1 is at fp,
+ * ptr2 is at fp - 16).
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int data_slice_use_after_release2(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+ struct sample *sample;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(*sample), 0, &ptr2);
+
+ sample = bpf_dynptr_data(&ptr2, 0, sizeof(*sample));
+ if (!sample)
+ goto done;
+
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr2, 0);
+
+ /* this should fail */
+ sample->pid = 23;
+
+ bpf_ringbuf_submit_dynptr(&ptr1, 0);
+
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
+/* A data slice must be first checked for NULL */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
+int data_slice_missing_null_check1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ data = bpf_dynptr_data(&ptr, 0, 8);
+
+ /* missing if (!data) check */
+
+ /* this should fail */
+ *(__u8 *)data = 3;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* A data slice can't be dereferenced if it wasn't checked for null */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'mem_or_null'")
+int data_slice_missing_null_check2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data1, *data2;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+ data1 = bpf_dynptr_data(&ptr, 0, 8);
+ data2 = bpf_dynptr_data(&ptr, 0, 8);
+ if (data1)
+ /* this should fail */
+ *data2 = 3;
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Can't pass in a dynptr as an arg to a helper function that doesn't take in a
+ * dynptr argument
+ */
+SEC("?raw_tp")
+__failure __msg("invalid indirect read from stack")
+int invalid_helper1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_strncmp((const char *)&ptr, sizeof(ptr), "hello!");
+
+ return 0;
+}
+
+/* A dynptr can't be passed into a helper function at a non-zero offset */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset=-8")
+int invalid_helper2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 8, 0, 0);
+ return 0;
+}
+
+/* A bpf_dynptr is invalidated if it's been written into */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int invalid_write1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ void *data;
+ __u8 x = 0;
+
+ get_map_val_dynptr(&ptr);
+
+ memcpy(&ptr, &x, sizeof(x));
+
+ /* this should fail */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ __sink(data);
+
+ return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a fixed
+ * offset
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+ __u8 x = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ memcpy((void *)&ptr + 8, &x, sizeof(x));
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/*
+ * A bpf_dynptr can't be used as a dynptr if it has been written into at a
+ * non-const offset
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write3(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char stack_buf[16];
+ unsigned long len;
+ __u8 x = 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, &ptr);
+
+ memcpy(stack_buf, &val, sizeof(val));
+ len = stack_buf[0] & 0xf;
+
+ memcpy((void *)&ptr + len, &x, sizeof(x));
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+static int invalid_write4_callback(__u32 index, void *data)
+{
+ *(__u32 *)data = 123;
+
+ return 0;
+}
+
+/* If the dynptr is written into in a callback function, it should
+ * be invalidated as a dynptr
+ */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int invalid_write4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_loop(10, invalid_write4_callback, &ptr, 0);
+
+ /* this should fail */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A globally-defined bpf_dynptr can't be used (it must reside as a stack frame) */
+struct bpf_dynptr global_dynptr;
+
+SEC("?raw_tp")
+__failure __msg("type=map_value expected=fp")
+int global(void *ctx)
+{
+ /* this should fail */
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &global_dynptr);
+
+ bpf_ringbuf_discard_dynptr(&global_dynptr, 0);
+
+ return 0;
+}
+
+/* A direct read should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read1(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ /* this should fail */
+ val = *(int *)&ptr;
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* A direct read at an offset should fail */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset")
+int invalid_read2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ get_map_val_dynptr(&ptr);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), (void *)&ptr + 1, 0, 0);
+
+ return 0;
+}
+
+/* A direct read at an offset into the lower stack slot should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read3(void *ctx)
+{
+ struct bpf_dynptr ptr1, ptr2;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr1);
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr2);
+
+ /* this should fail */
+ memcpy(&val, (void *)&ptr1 + 8, sizeof(val));
+
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+
+ return 0;
+}
+
+static int invalid_read4_callback(__u32 index, void *data)
+{
+ /* this should fail */
+ val = *(__u32 *)data;
+
+ return 0;
+}
+
+/* A direct read within a callback function should fail */
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int invalid_read4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+
+ bpf_loop(10, invalid_read4_callback, &ptr, 0);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Initializing a dynptr on an offset should fail */
+SEC("?raw_tp")
+__failure __msg("cannot pass in dynptr at an offset=0")
+int invalid_offset(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr + 1);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+/* Can't release a dynptr twice */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int release_twice(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 16, 0, &ptr);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ /* this second release should fail */
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ return 0;
+}
+
+static int release_twice_callback_fn(__u32 index, void *data)
+{
+ /* this should fail */
+ bpf_ringbuf_discard_dynptr(data, 0);
+
+ return 0;
+}
+
+/* Test that releasing a dynptr twice, where one of the releases happens
+ * within a callback function, fails
+ */
+SEC("?raw_tp")
+__failure __msg("arg 1 is an unacquired reference")
+int release_twice_callback(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 32, 0, &ptr);
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+
+ bpf_loop(10, release_twice_callback_fn, &ptr, 0);
+
+ return 0;
+}
+
+/* Reject unsupported local mem types for dynptr_from_mem API */
+SEC("?raw_tp")
+__failure __msg("Unsupported reg type fp for bpf_dynptr_from_mem data")
+int dynptr_from_mem_invalid_api(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ int x = 0;
+
+ /* this should fail */
+ bpf_dynptr_from_mem(&x, sizeof(x), 0, &ptr);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_overwrite(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 0xeB9F; \
+ r6 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto pjmp1; \
+ goto pjmp2; \
+ pjmp1: \
+ *(u64 *)(r10 - 16) = r9; \
+ pjmp2: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__success __msg("12: safe") __log_level(2)
+int dynptr_pruning_stacksafe(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 0xeB9F; \
+ r6 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto stjmp1; \
+ goto stjmp2; \
+ stjmp1: \
+ r9 = r9; \
+ stjmp2: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_pruning_type_confusion(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r6 = %[array_map4] ll; \
+ r7 = %[ringbuf] ll; \
+ r1 = r6; \
+ r2 = r10; \
+ r2 += -8; \
+ r9 = 0; \
+ *(u64 *)(r2 + 0) = r9; \
+ r3 = r10; \
+ r3 += -24; \
+ r9 = 0xeB9FeB9F; \
+ *(u64 *)(r10 - 16) = r9; \
+ *(u64 *)(r10 - 24) = r9; \
+ r9 = 0; \
+ r4 = 0; \
+ r8 = r2; \
+ call %[bpf_map_update_elem]; \
+ r1 = r6; \
+ r2 = r8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto tjmp1; \
+ exit; \
+ tjmp1: \
+ r8 = r0; \
+ r1 = r7; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ r0 = *(u64 *)(r0 + 0); \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ if r0 == 0 goto tjmp2; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ r8 = r8; \
+ goto tjmp3; \
+ tjmp2: \
+ *(u64 *)(r10 - 8) = r9; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r8; \
+ r1 += 8; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_dynptr_from_mem]; \
+ tjmp3: \
+ r1 = r10; \
+ r1 += -16; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_map_update_elem),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_dynptr_from_mem),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(array_map4),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("dynptr has to be at a constant offset") __log_level(2)
+int dynptr_var_off_overwrite(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r9 = 16; \
+ *(u32 *)(r10 - 4) = r9; \
+ r8 = *(u32 *)(r10 - 4); \
+ if r8 >= 0 goto vjmp1; \
+ r0 = 1; \
+ exit; \
+ vjmp1: \
+ if r8 <= 16 goto vjmp2; \
+ r0 = 1; \
+ exit; \
+ vjmp2: \
+ r8 &= 16; \
+ r1 = %[ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -32; \
+ r4 += r8; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ r9 = 0xeB9F; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r10; \
+ r1 += -32; \
+ r1 += r8; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm_addr(ringbuf)
+ : __clobber_all
+ );
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot overwrite referenced dynptr") __log_level(2)
+int dynptr_partial_slot_invalidate(struct __sk_buff *ctx)
+{
+ asm volatile (
+ "r6 = %[ringbuf] ll; \
+ r7 = %[array_map4] ll; \
+ r1 = r7; \
+ r2 = r10; \
+ r2 += -8; \
+ r9 = 0; \
+ *(u64 *)(r2 + 0) = r9; \
+ r3 = r2; \
+ r4 = 0; \
+ r8 = r2; \
+ call %[bpf_map_update_elem]; \
+ r1 = r7; \
+ r2 = r8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto sjmp1; \
+ exit; \
+ sjmp1: \
+ r7 = r0; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -24; \
+ call %[bpf_ringbuf_reserve_dynptr]; \
+ *(u64 *)(r10 - 16) = r9; \
+ r1 = r7; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = r10; \
+ r4 += -16; \
+ call %[bpf_dynptr_from_mem]; \
+ r1 = r10; \
+ r1 += -512; \
+ r2 = 488; \
+ r3 = r10; \
+ r3 += -24; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_dynptr_read]; \
+ r8 = 1; \
+ if r0 != 0 goto sjmp2; \
+ r8 = 0; \
+ sjmp2: \
+ r1 = r10; \
+ r1 += -24; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard_dynptr]; "
+ :
+ : __imm(bpf_map_update_elem),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_reserve_dynptr),
+ __imm(bpf_ringbuf_discard_dynptr),
+ __imm(bpf_dynptr_from_mem),
+ __imm(bpf_dynptr_read),
+ __imm_addr(ringbuf),
+ __imm_addr(array_map4)
+ : __clobber_all
+ );
+ return 0;
+}
+
+/* Test that it is allowed to overwrite unreferenced dynptr. */
+SEC("?raw_tp")
+__success
+int dynptr_overwrite_unref(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ return 0;
+}
+
+/* Test that slices are invalidated on reinitializing a dynptr. */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int dynptr_invalidate_slice_reinit(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u8 *p;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ p = bpf_dynptr_data(&ptr, 0, 1);
+ if (!p)
+ return 0;
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+ /* this should fail */
+ return *p;
+}
+
+/* Invalidation of dynptr slices on destruction of dynptr should not miss
+ * mem_or_null pointers.
+ */
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=percpu_ptr_")
+int dynptr_invalidate_slice_or_null(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u8 *p;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ p = bpf_dynptr_data(&ptr, 0, 1);
+ *(__u8 *)&ptr = 0;
+ /* this should fail */
+ bpf_this_cpu_ptr(p);
+ return 0;
+}
+
+/* Destruction of dynptr should also any slices obtained from it */
+SEC("?raw_tp")
+__failure __msg("R7 invalid mem access 'scalar'")
+int dynptr_invalidate_slice_failure(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u8 *p1, *p2;
+
+ if (get_map_val_dynptr(&ptr1))
+ return 0;
+ if (get_map_val_dynptr(&ptr2))
+ return 0;
+
+ p1 = bpf_dynptr_data(&ptr1, 0, 1);
+ if (!p1)
+ return 0;
+ p2 = bpf_dynptr_data(&ptr2, 0, 1);
+ if (!p2)
+ return 0;
+
+ *(__u8 *)&ptr1 = 0;
+ /* this should fail */
+ return *p1;
+}
+
+/* Invalidation of slices should be scoped and should not prevent dereferencing
+ * slices of another dynptr after destroying unrelated dynptr
+ */
+SEC("?raw_tp")
+__success
+int dynptr_invalidate_slice_success(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u8 *p1, *p2;
+
+ if (get_map_val_dynptr(&ptr1))
+ return 1;
+ if (get_map_val_dynptr(&ptr2))
+ return 1;
+
+ p1 = bpf_dynptr_data(&ptr1, 0, 1);
+ if (!p1)
+ return 1;
+ p2 = bpf_dynptr_data(&ptr2, 0, 1);
+ if (!p2)
+ return 1;
+
+ *(__u8 *)&ptr1 = 0;
+ return *p2;
+}
+
+/* Overwriting referenced dynptr should be rejected */
+SEC("?raw_tp")
+__failure __msg("cannot overwrite referenced dynptr")
+int dynptr_overwrite_ref(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr);
+ /* this should fail */
+ if (get_map_val_dynptr(&ptr))
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+/* Reject writes to dynptr slot from bpf_dynptr_read */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int dynptr_read_into_slot(void *ctx)
+{
+ union {
+ struct {
+ char _pad[48];
+ struct bpf_dynptr ptr;
+ };
+ char buf[64];
+ } data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &data.ptr);
+ /* this should fail */
+ bpf_dynptr_read(data.buf, sizeof(data.buf), &data.ptr, 0, 0);
+
+ return 0;
+}
+
+/* bpf_dynptr_slice()s are read-only and cannot be written to */
+SEC("?tc")
+__failure __msg("R0 cannot write into rdonly_mem")
+int skb_invalid_slice_write(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice2(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice3(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return SK_PASS;
+}
+
+/* The read-write data slice is invalidated whenever bpf_dynptr_write() is called */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int skb_invalid_data_slice4(struct __sk_buff *skb)
+{
+ char write_data[64] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 123;
+
+ bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return SK_PASS;
+}
+
+/* The read-only data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice1(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ val = hdr->h_proto;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ val = hdr->h_proto;
+
+ return XDP_PASS;
+}
+
+/* The read-write data slice is invalidated whenever a helper changes packet data */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int xdp_invalid_data_slice2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!hdr)
+ return SK_DROP;
+
+ hdr->h_proto = 9;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ hdr->h_proto = 1;
+
+ return XDP_PASS;
+}
+
+/* Only supported prog type can create skb-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_skb is not allowed")
+int skb_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+
+ return 0;
+}
+
+/* Reject writes to dynptr slot for uninit arg */
+SEC("?raw_tp")
+__failure __msg("potential write to dynptr at off=-16")
+int uninit_write_into_slot(void *ctx)
+{
+ struct {
+ char buf[64];
+ struct bpf_dynptr ptr;
+ } data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 80, 0, &data.ptr);
+ /* this should fail */
+ bpf_get_current_comm(data.buf, 80);
+
+ return 0;
+}
+
+/* Only supported prog type can create xdp-type dynptrs */
+SEC("?raw_tp")
+__failure __msg("calling kernel function bpf_dynptr_from_xdp is not allowed")
+int xdp_invalid_ctx(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_from_xdp(ctx, 0, &ptr);
+
+ return 0;
+}
+
+__u32 hdr_size = sizeof(struct ethhdr);
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("unbounded memory access")
+int dynptr_slice_var_len1(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ char buffer[sizeof(*hdr)] = {};
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail */
+ hdr = bpf_dynptr_slice(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+/* Can't pass in variable-sized len to bpf_dynptr_slice */
+SEC("?tc")
+__failure __msg("must be a known constant")
+int dynptr_slice_var_len2(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ if (hdr_size <= sizeof(buffer)) {
+ /* this should fail */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, hdr_size);
+ if (!hdr)
+ return SK_DROP;
+ hdr->h_proto = 12;
+ }
+
+ return SK_PASS;
+}
+
+static int callback(__u32 index, void *data)
+{
+ *(__u32 *)data = 123;
+
+ return 0;
+}
+
+/* If the dynptr is written into in a callback function, its data
+ * slices should be invalidated as well.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int invalid_data_slices(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u32 *slice;
+
+ if (get_map_val_dynptr(&ptr))
+ return 0;
+
+ slice = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+ if (!slice)
+ return 0;
+
+ bpf_loop(10, callback, &ptr, 0);
+
+ /* this should fail */
+ *slice = 1;
+
+ return 0;
+}
+
+/* Program types that don't allow writes to packet data should fail if
+ * bpf_dynptr_slice_rdwr is called
+ */
+SEC("cgroup_skb/ingress")
+__failure __msg("the prog does not allow writes to packet data")
+int invalid_slice_rdwr_rdonly(struct __sk_buff *skb)
+{
+ char buffer[sizeof(struct ethhdr)] = {};
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ /* this should fail since cgroup_skb doesn't allow
+ * changing packet data
+ */
+ hdr = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ __sink(hdr);
+
+ return 0;
+}
+
+/* bpf_dynptr_adjust can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_adjust_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_adjust(&ptr, 1, 2);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_null can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_null_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_is_null(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_is_rdonly can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_is_rdonly_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_is_rdonly(&ptr);
+
+ return 0;
+}
+
+/* bpf_dynptr_size can only be called on initialized dynptrs */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int dynptr_size_invalid(void *ctx)
+{
+ struct bpf_dynptr ptr;
+
+ /* this should fail */
+ bpf_dynptr_size(&ptr);
+
+ return 0;
+}
+
+/* Only initialized dynptrs can be cloned */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #1")
+int clone_invalid1(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &ptr2);
+
+ return 0;
+}
+
+/* Can't overwrite an existing dynptr when cloning */
+SEC("?xdp")
+__failure __msg("cannot overwrite referenced dynptr")
+int clone_invalid2(struct xdp_md *xdp)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr clone;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr1);
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &clone);
+
+ /* this should fail */
+ bpf_dynptr_clone(&ptr1, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its clones */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate1(void *ctx)
+{
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its parent */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate2(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate its siblings */
+SEC("?raw_tp")
+__failure __msg("Expected an initialized dynptr as arg #3")
+int clone_invalidate3(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ char read_data[64];
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ bpf_dynptr_read(read_data, sizeof(read_data), &clone1, 0, 0);
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its clones
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate4(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_data(&clone, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its parent
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate5(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+ data = bpf_dynptr_data(&ptr, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_dynptr_clone(&ptr, &clone);
+
+ bpf_ringbuf_submit_dynptr(&clone, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Invalidating a dynptr should invalidate any data slices
+ * of its sibling
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_invalidate6(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct bpf_dynptr clone1;
+ struct bpf_dynptr clone2;
+ int *data;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone1);
+
+ bpf_dynptr_clone(&ptr, &clone2);
+
+ data = bpf_dynptr_data(&clone1, 0, sizeof(val));
+ if (!data)
+ return 0;
+
+ bpf_ringbuf_submit_dynptr(&clone2, 0);
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A skb clone's data slices should be invalid anytime packet data changes */
+SEC("?tc")
+__failure __msg("invalid mem access 'scalar'")
+int clone_skb_packet_data(struct __sk_buff *skb)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ __u32 *data;
+
+ bpf_dynptr_from_skb(skb, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_skb_pull_data(skb, skb->len))
+ return SK_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* A xdp clone's data slices should be invalid anytime packet data changes */
+SEC("?xdp")
+__failure __msg("invalid mem access 'scalar'")
+int clone_xdp_packet_data(struct xdp_md *xdp)
+{
+ char buffer[sizeof(__u32)] = {};
+ struct bpf_dynptr clone;
+ struct bpf_dynptr ptr;
+ struct ethhdr *hdr;
+ __u32 *data;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ bpf_dynptr_clone(&ptr, &clone);
+ data = bpf_dynptr_slice_rdwr(&clone, 0, buffer, sizeof(buffer));
+ if (!data)
+ return XDP_DROP;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(*hdr)))
+ return XDP_DROP;
+
+ /* this should fail */
+ *data = 123;
+
+ return 0;
+}
+
+/* Buffers that are provided must be sufficiently long */
+SEC("?cgroup_skb/egress")
+__failure __msg("memory, len pair leads to invalid memory access")
+int test_dynptr_skb_small_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char buffer[8] = {};
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, buffer, 9);
+
+ return !!data;
+}
diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c
new file mode 100644
index 000000000000..5985920d162e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/dynptr_success.c
@@ -0,0 +1,546 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_kfuncs.h"
+#include "errno.h"
+
+char _license[] SEC("license") = "GPL";
+
+int pid, err, val;
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_read_write(void *ctx)
+{
+ char write_data[64] = "hello there, world!!";
+ char read_data[64] = {};
+ struct bpf_dynptr ptr;
+ int i;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_ringbuf_reserve_dynptr(&ringbuf, sizeof(write_data), 0, &ptr);
+
+ /* Write data into the dynptr */
+ err = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+
+ /* Read the data that was written into the dynptr */
+ err = err ?: bpf_dynptr_read(read_data, sizeof(read_data), &ptr, 0, 0);
+
+ /* Ensure the data we read matches the data we wrote */
+ for (i = 0; i < sizeof(read_data); i++) {
+ if (read_data[i] != write_data[i]) {
+ err = 1;
+ break;
+ }
+ }
+
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_data(void *ctx)
+{
+ __u32 key = 0, val = 235, *map_val;
+ struct bpf_dynptr ptr;
+ __u32 map_val_size;
+ void *data;
+
+ map_val_size = sizeof(*map_val);
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, 0);
+
+ map_val = bpf_map_lookup_elem(&array_map, &key);
+ if (!map_val) {
+ err = 1;
+ return 0;
+ }
+
+ bpf_dynptr_from_mem(map_val, map_val_size, 0, &ptr);
+
+ /* Try getting a data slice that is out of range */
+ data = bpf_dynptr_data(&ptr, map_val_size + 1, 1);
+ if (data) {
+ err = 2;
+ return 0;
+ }
+
+ /* Try getting more bytes than available */
+ data = bpf_dynptr_data(&ptr, 0, map_val_size + 1);
+ if (data) {
+ err = 3;
+ return 0;
+ }
+
+ data = bpf_dynptr_data(&ptr, 0, sizeof(__u32));
+ if (!data) {
+ err = 4;
+ return 0;
+ }
+
+ *(__u32 *)data = 999;
+
+ err = bpf_probe_read_kernel(&val, sizeof(val), data);
+ if (err)
+ return 0;
+
+ if (val != *(int *)data)
+ err = 5;
+
+ return 0;
+}
+
+static int ringbuf_callback(__u32 index, void *data)
+{
+ struct sample *sample;
+
+ struct bpf_dynptr *ptr = (struct bpf_dynptr *)data;
+
+ sample = bpf_dynptr_data(ptr, 0, sizeof(*sample));
+ if (!sample)
+ err = 2;
+ else
+ sample->pid += index;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int test_ringbuf(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ struct sample *sample;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ val = 100;
+
+ /* check that you can reserve a dynamic size reservation */
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, val, 0, &ptr);
+
+ sample = err ? NULL : bpf_dynptr_data(&ptr, 0, sizeof(*sample));
+ if (!sample) {
+ err = 1;
+ goto done;
+ }
+
+ sample->pid = 10;
+
+ /* Can pass dynptr to callback functions */
+ bpf_loop(10, ringbuf_callback, &ptr, 0);
+
+ if (sample->pid != 55)
+ err = 2;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_skb_readonly(struct __sk_buff *skb)
+{
+ __u8 write_data[2] = {1, 2};
+ struct bpf_dynptr ptr;
+ int ret;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* since cgroup skbs are read only, writes should fail */
+ ret = bpf_dynptr_write(&ptr, 0, write_data, sizeof(write_data), 0);
+ if (ret != -EINVAL) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_data(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This should return NULL. Must use bpf_dynptr_slice API */
+ data = bpf_dynptr_data(&ptr, 0, 1);
+ if (data) {
+ err = 2;
+ return 1;
+ }
+
+ return 1;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust(void *ctx)
+{
+ struct bpf_dynptr ptr;
+ __u32 bytes = 64;
+ __u32 off = 10;
+ __u32 trim = 15;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ err = bpf_ringbuf_reserve_dynptr(&ringbuf, bytes, 0, &ptr);
+ if (err) {
+ err = 1;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes) {
+ err = 2;
+ goto done;
+ }
+
+ /* Advance the dynptr by off */
+ err = bpf_dynptr_adjust(&ptr, off, bpf_dynptr_size(&ptr));
+ if (err) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_size(&ptr) != bytes - off) {
+ err = 4;
+ goto done;
+ }
+
+ /* Trim the dynptr */
+ err = bpf_dynptr_adjust(&ptr, off, 15);
+ if (err) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that the size was adjusted correctly */
+ if (bpf_dynptr_size(&ptr) != trim - off) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_adjust_err(void *ctx)
+{
+ char write_data[45] = "hello there, world!!";
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+ __u32 off = 20;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* Check that start can't be greater than end */
+ if (bpf_dynptr_adjust(&ptr, 5, 1) != -EINVAL) {
+ err = 2;
+ goto done;
+ }
+
+ /* Check that start can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, size + 1, size + 1) != -ERANGE) {
+ err = 3;
+ goto done;
+ }
+
+ /* Check that end can't be greater than size */
+ if (bpf_dynptr_adjust(&ptr, 0, size + 1) != -ERANGE) {
+ err = 4;
+ goto done;
+ }
+
+ if (bpf_dynptr_adjust(&ptr, off, size)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Check that you can't write more bytes than available into the dynptr
+ * after you've adjusted it
+ */
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 6;
+ goto done;
+ }
+
+ /* Check that even after adjusting, submitting/discarding
+ * a ringbuf dynptr works
+ */
+ bpf_ringbuf_submit_dynptr(&ptr, 0);
+ return 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_zero_size_dynptr(void *ctx)
+{
+ char write_data = 'x', read_data;
+ struct bpf_dynptr ptr;
+ __u32 size = 64;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr)) {
+ err = 1;
+ goto done;
+ }
+
+ /* After this, the dynptr has a size of 0 */
+ if (bpf_dynptr_adjust(&ptr, size, size)) {
+ err = 2;
+ goto done;
+ }
+
+ /* Test that reading + writing non-zero bytes is not ok */
+ if (bpf_dynptr_read(&read_data, sizeof(read_data), &ptr, 0, 0) != -E2BIG) {
+ err = 3;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, sizeof(write_data), 0) != -E2BIG) {
+ err = 4;
+ goto done;
+ }
+
+ /* Test that reading + writing 0 bytes from a 0-size dynptr is ok */
+ if (bpf_dynptr_read(&read_data, 0, &ptr, 0, 0)) {
+ err = 5;
+ goto done;
+ }
+
+ if (bpf_dynptr_write(&ptr, 0, &write_data, 0, 0)) {
+ err = 6;
+ goto done;
+ }
+
+ err = 0;
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr, 0);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_nanosleep")
+int test_dynptr_is_null(void *ctx)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u64 size = 4;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ goto exit_early;
+ }
+
+ /* Test that the invalid dynptr is null */
+ if (!bpf_dynptr_is_null(&ptr1)) {
+ err = 2;
+ goto exit_early;
+ }
+
+ /* Get a valid dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, size, 0, &ptr2)) {
+ err = 3;
+ goto exit;
+ }
+
+ /* Test that the valid dynptr is not null */
+ if (bpf_dynptr_is_null(&ptr2)) {
+ err = 4;
+ goto exit;
+ }
+
+exit:
+ bpf_ringbuf_discard_dynptr(&ptr2, 0);
+exit_early:
+ bpf_ringbuf_discard_dynptr(&ptr1, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_is_rdonly(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ struct bpf_dynptr ptr3;
+
+ /* Pass in invalid flags, get back an invalid dynptr */
+ if (bpf_dynptr_from_skb(skb, 123, &ptr1) != -EINVAL) {
+ err = 1;
+ return 0;
+ }
+
+ /* Test that an invalid dynptr is_rdonly returns false */
+ if (bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 2;
+ return 0;
+ }
+
+ /* Get a read-only dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (!bpf_dynptr_is_rdonly(&ptr2)) {
+ err = 4;
+ return 0;
+ }
+
+ /* Get a read-writeable dynptr */
+ if (bpf_ringbuf_reserve_dynptr(&ringbuf, 64, 0, &ptr3)) {
+ err = 5;
+ goto done;
+ }
+
+ /* Test that the dynptr is read-only */
+ if (bpf_dynptr_is_rdonly(&ptr3)) {
+ err = 6;
+ goto done;
+ }
+
+done:
+ bpf_ringbuf_discard_dynptr(&ptr3, 0);
+ return 0;
+}
+
+SEC("cgroup_skb/egress")
+int test_dynptr_clone(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr1;
+ struct bpf_dynptr ptr2;
+ __u32 off = 2, size;
+
+ /* Get a dynptr */
+ if (bpf_dynptr_from_skb(skb, 0, &ptr1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (bpf_dynptr_adjust(&ptr1, off, bpf_dynptr_size(&ptr1))) {
+ err = 2;
+ return 0;
+ }
+
+ /* Clone the dynptr */
+ if (bpf_dynptr_clone(&ptr1, &ptr2)) {
+ err = 3;
+ return 0;
+ }
+
+ size = bpf_dynptr_size(&ptr1);
+
+ /* Check that the clone has the same size and rd-only */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 4;
+ return 0;
+ }
+
+ if (bpf_dynptr_is_rdonly(&ptr2) != bpf_dynptr_is_rdonly(&ptr1)) {
+ err = 5;
+ return 0;
+ }
+
+ /* Advance and trim the original dynptr */
+ bpf_dynptr_adjust(&ptr1, 5, 5);
+
+ /* Check that only original dynptr was affected, and the clone wasn't */
+ if (bpf_dynptr_size(&ptr2) != size) {
+ err = 6;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_no_buff(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ __u64 *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 1);
+
+ return !!data;
+}
+
+SEC("?cgroup_skb/egress")
+int test_dynptr_skb_strcmp(struct __sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ char *data;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr)) {
+ err = 1;
+ return 1;
+ }
+
+ /* This may return NULL. SKB may require a buffer */
+ data = bpf_dynptr_slice(&ptr, 0, NULL, 10);
+ if (data) {
+ bpf_strncmp(data, 10, "foo");
+ return 1;
+ }
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/empty_skb.c b/tools/testing/selftests/bpf/progs/empty_skb.c
new file mode 100644
index 000000000000..4b0cd6753251
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/empty_skb.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ifindex;
+int ret;
+
+SEC("lwt_xmit")
+int redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("lwt_xmit")
+int redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_ingress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, BPF_F_INGRESS);
+ return 0;
+}
+
+SEC("tc")
+int tc_redirect_egress(struct __sk_buff *skb)
+{
+ ret = bpf_clone_redirect(skb, ifindex, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/err.h b/tools/testing/selftests/bpf/progs/err.h
new file mode 100644
index 000000000000..d66d283d9e59
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/err.h
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ERR_H__
+#define __ERR_H__
+
+#define MAX_ERRNO 4095
+#define IS_ERR_VALUE(x) (unsigned long)(void *)(x) >= (unsigned long)-MAX_ERRNO
+
+static inline int IS_ERR_OR_NULL(const void *ptr)
+{
+ return !ptr || IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static inline long PTR_ERR(const void *ptr)
+{
+ return (long) ptr;
+}
+
+#endif /* __ERR_H__ */
diff --git a/tools/testing/selftests/bpf/progs/exceptions.c b/tools/testing/selftests/bpf/progs/exceptions.c
new file mode 100644
index 000000000000..f09cd14d8e04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline int static_func(u64 i)
+{
+ bpf_throw(32);
+ return i;
+}
+
+__noinline int global2static_simple(u64 i)
+{
+ static_func(i + 2);
+ return i - 1;
+}
+
+__noinline int global2static(u64 i)
+{
+ if (i == ETH_P_IP)
+ bpf_throw(16);
+ return static_func(i);
+}
+
+static __noinline int static2global(u64 i)
+{
+ return global2static(i) + i;
+}
+
+SEC("tc")
+int exception_throw_always_1(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+/* In this case, the global func will never be seen executing after call to
+ * static subprog, hence verifier will DCE the remaining instructions. Ensure we
+ * are resilient to that.
+ */
+SEC("tc")
+int exception_throw_always_2(struct __sk_buff *ctx)
+{
+ return global2static_simple(ctx->protocol);
+}
+
+SEC("tc")
+int exception_throw_unwind_1(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol));
+}
+
+SEC("tc")
+int exception_throw_unwind_2(struct __sk_buff *ctx)
+{
+ return static2global(bpf_ntohs(ctx->protocol) - 1);
+}
+
+SEC("tc")
+int exception_throw_default(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 1;
+}
+
+SEC("tc")
+int exception_throw_default_value(struct __sk_buff *ctx)
+{
+ bpf_throw(5);
+ return 1;
+}
+
+SEC("tc")
+int exception_tail_call_target(struct __sk_buff *ctx)
+{
+ bpf_throw(16);
+ return 0;
+}
+
+static __noinline
+int exception_tail_call_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 10;
+
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int exception_tail_call(struct __sk_buff *ctx) {
+ volatile int ret = 0;
+
+ ret = exception_tail_call_subprog(ctx);
+ return ret + 8;
+}
+
+__noinline int exception_ext_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+static __noinline int exception_ext_static(struct __sk_buff *ctx)
+{
+ return exception_ext_global(ctx);
+}
+
+SEC("tc")
+int exception_ext(struct __sk_buff *ctx)
+{
+ return exception_ext_static(ctx);
+}
+
+__noinline int exception_cb_mod_global(u64 cookie)
+{
+ volatile int ret = 0;
+
+ return ret;
+}
+
+/* Example of how the exception callback supplied during verification can still
+ * introduce extensions by calling to dummy global functions, and alter runtime
+ * behavior.
+ *
+ * Right now we don't allow freplace attachment to exception callback itself,
+ * but if the need arises this restriction is technically feasible to relax in
+ * the future.
+ */
+__noinline int exception_cb_mod(u64 cookie)
+{
+ return exception_cb_mod_global(cookie) + cookie + 10;
+}
+
+SEC("tc")
+__exception_cb(exception_cb_mod)
+int exception_ext_mod_cb_runtime(struct __sk_buff *ctx)
+{
+ bpf_throw(25);
+ return 0;
+}
+
+__noinline static int subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+__noinline int global_subprog(struct __sk_buff *ctx)
+{
+ return bpf_ktime_get_ns();
+}
+
+__noinline int throwing_global_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->tstamp)
+ bpf_throw(0);
+ return bpf_ktime_get_ns();
+}
+
+SEC("tc")
+int exception_throw_subprog(struct __sk_buff *ctx)
+{
+ switch (ctx->protocol) {
+ case 1:
+ return subprog(ctx);
+ case 2:
+ return global_subprog(ctx);
+ case 3:
+ return throwing_subprog(ctx);
+ case 4:
+ return throwing_global_subprog(ctx);
+ default:
+ break;
+ }
+ bpf_throw(1);
+ return 0;
+}
+
+__noinline int assert_nz_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert(cookie != 0);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, ==, 0));
+ return 0;
+}
+
+__noinline int assert_neg_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, <, 0));
+ return 0;
+}
+
+__noinline int assert_pos_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, >, 0));
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, <=, -1));
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert(bpf_cmp_unlikely(cookie, >=, 1));
+ return 0;
+}
+
+__noinline int assert_nz_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_with(cookie != 0, cookie + 100);
+ return 0;
+}
+
+__noinline int assert_zero_gfunc_with(u64 c)
+{
+ volatile u64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, ==, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_neg_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, <, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_pos_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, >, 0), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_negeq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, <=, -1), cookie + 100);
+ return 0;
+}
+
+__noinline int assert_poseq_gfunc_with(s64 c)
+{
+ volatile s64 cookie = c;
+
+ bpf_assert_with(bpf_cmp_unlikely(cookie, >=, 1), cookie + 100);
+ return 0;
+}
+
+#define check_assert(name, cookie, tag) \
+SEC("tc") \
+int exception##tag##name(struct __sk_buff *ctx) \
+{ \
+ return name(cookie) + 1; \
+}
+
+check_assert(assert_nz_gfunc, 5, _);
+check_assert(assert_zero_gfunc, 0, _);
+check_assert(assert_neg_gfunc, -100, _);
+check_assert(assert_pos_gfunc, 100, _);
+check_assert(assert_negeq_gfunc, -1, _);
+check_assert(assert_poseq_gfunc, 1, _);
+
+check_assert(assert_nz_gfunc_with, 5, _);
+check_assert(assert_zero_gfunc_with, 0, _);
+check_assert(assert_neg_gfunc_with, -100, _);
+check_assert(assert_pos_gfunc_with, 100, _);
+check_assert(assert_negeq_gfunc_with, -1, _);
+check_assert(assert_poseq_gfunc_with, 1, _);
+
+check_assert(assert_nz_gfunc, 0, _bad_);
+check_assert(assert_zero_gfunc, 5, _bad_);
+check_assert(assert_neg_gfunc, 100, _bad_);
+check_assert(assert_pos_gfunc, -100, _bad_);
+check_assert(assert_negeq_gfunc, 1, _bad_);
+check_assert(assert_poseq_gfunc, -1, _bad_);
+
+check_assert(assert_nz_gfunc_with, 0, _bad_);
+check_assert(assert_zero_gfunc_with, 5, _bad_);
+check_assert(assert_neg_gfunc_with, 100, _bad_);
+check_assert(assert_pos_gfunc_with, -100, _bad_);
+check_assert(assert_negeq_gfunc_with, 1, _bad_);
+check_assert(assert_poseq_gfunc_with, -1, _bad_);
+
+SEC("tc")
+int exception_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, 0, ~0ULL);
+ return 1;
+}
+
+SEC("tc")
+int exception_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, 0, ~0ULL, 10);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range(time, -100, 100);
+ return 1;
+}
+
+SEC("tc")
+int exception_bad_assert_range_with(struct __sk_buff *ctx)
+{
+ u64 time = bpf_ktime_get_ns();
+
+ bpf_assert_range_with(time, -1000, 1000, 10);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c
new file mode 100644
index 000000000000..5e0a1ca96d4e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <limits.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+#define check_assert(type, op, name, value) \
+ SEC("?tc") \
+ __log_level(2) __failure \
+ int check_assert_##name(void *ctx) \
+ { \
+ type num = bpf_ktime_get_ns(); \
+ bpf_assert(bpf_cmp_unlikely(num, op, value)); \
+ return *(u64 *)num; \
+ }
+
+__msg(": R0_w=0xffffffff80000000")
+check_assert(s64, ==, eq_int_min, INT_MIN);
+__msg(": R0_w=0x7fffffff")
+check_assert(s64, ==, eq_int_max, INT_MAX);
+__msg(": R0_w=0")
+check_assert(s64, ==, eq_zero, 0);
+__msg(": R0_w=0x8000000000000000 R1_w=0x8000000000000000")
+check_assert(s64, ==, eq_llong_min, LLONG_MIN);
+__msg(": R0_w=0x7fffffffffffffff R1_w=0x7fffffffffffffff")
+check_assert(s64, ==, eq_llong_max, LLONG_MAX);
+
+__msg(": R0_w=scalar(id=1,smax=0x7ffffffe)")
+check_assert(s64, <, lt_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smax=-1,umin=0x8000000000000000,var_off=(0x8000000000000000; 0x7fffffffffffffff))")
+check_assert(s64, <, lt_zero, 0);
+__msg(": R0_w=scalar(id=1,smax=0xffffffff7fffffff")
+check_assert(s64, <, lt_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smax=0x7fffffff)")
+check_assert(s64, <=, le_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smax=0)")
+check_assert(s64, <=, le_zero, 0);
+__msg(": R0_w=scalar(id=1,smax=0xffffffff80000000")
+check_assert(s64, <=, le_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smin=umin=0x80000000,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smin=umin=1,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >, gt_zero, 0);
+__msg(": R0_w=scalar(id=1,smin=0xffffffff80000001")
+check_assert(s64, >, gt_neg, INT_MIN);
+
+__msg(": R0_w=scalar(id=1,smin=umin=0x7fffffff,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_pos, INT_MAX);
+__msg(": R0_w=scalar(id=1,smin=0,umax=0x7fffffffffffffff,var_off=(0x0; 0x7fffffffffffffff))")
+check_assert(s64, >=, ge_zero, 0);
+__msg(": R0_w=scalar(id=1,smin=0xffffffff80000000")
+check_assert(s64, >=, ge_neg, INT_MIN);
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx() R2=scalar(smin=0xffffffff80000002,smax=smax32=0x7ffffffd,smin32=0x80000002) R10=fp0")
+int check_assert_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+ bpf_assert_range(num, INT_MIN + 2, INT_MAX - 2);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx() R2=scalar(smin=umin=smin32=umin32=4096,smax=umax=smax32=umax32=8192,var_off=(0x0; 0x3fff))")
+int check_assert_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 8192);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R0=0 R1=ctx() R2=4096 R10=fp0")
+int check_assert_single_range_s64(struct __sk_buff *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ s64 num;
+
+ _Static_assert(_Generic((sk->rx_queue_mapping), s32: 1, default: 0), "type match");
+ if (!sk)
+ return 0;
+ num = sk->rx_queue_mapping;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=ctx() R2=4096 R10=fp0")
+int check_assert_single_range_u64(struct __sk_buff *ctx)
+{
+ u64 num = ctx->len;
+
+ bpf_assert_range(num, 4096, 4096);
+ return *((u8 *)ctx + num);
+}
+
+SEC("?tc")
+__log_level(2) __failure
+__msg(": R1=pkt(off=64,r=64) R2=pkt_end() R6=pkt(r=64) R10=fp0")
+int check_assert_generic(struct __sk_buff *ctx)
+{
+ u8 *data_end = (void *)(long)ctx->data_end;
+ u8 *data = (void *)(long)ctx->data;
+
+ bpf_assert(data + 64 <= data_end);
+ return data[128];
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R1 has smin=64 smax=64")
+int check_assert_with_return(void *ctx)
+{
+ bpf_assert_with(!ctx, 64);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_ext.c b/tools/testing/selftests/bpf/progs/exceptions_ext.c
new file mode 100644
index 000000000000..743c05185d9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_ext.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+SEC("?fentry")
+int pfentry(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry")
+int throwing_fentry(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline int exception_cb(u64 cookie)
+{
+ return cookie + 64;
+}
+
+SEC("?freplace")
+int extension(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_exception_cb_extension(u64 cookie)
+{
+ bpf_throw(32);
+ return 0;
+}
+
+SEC("?freplace")
+__exception_cb(exception_cb)
+int throwing_extension(struct __sk_buff *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+SEC("?fexit")
+int pfexit(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fexit")
+int throwing_fexit(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?fmod_ret")
+int pfmod_ret(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fmod_ret")
+int throwing_fmod_ret(void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c
new file mode 100644
index 000000000000..9cceb6521143
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+struct foo {
+ struct bpf_rb_node node;
+};
+
+struct hmap_elem {
+ struct bpf_timer timer;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root rbtree __contains(foo, node);
+
+__noinline void *exception_cb_bad_ret_type(u64 cookie)
+{
+ return NULL;
+}
+
+__noinline int exception_cb_bad_arg_0(void)
+{
+ return 0;
+}
+
+__noinline int exception_cb_bad_arg_2(int a, int b)
+{
+ return 0;
+}
+
+__noinline int exception_cb_ok_arg_small(int a)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_ret_type)
+__failure __msg("Global function exception_cb_bad_ret_type() doesn't return scalar.")
+int reject_exception_cb_type_1(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_0)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_2(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_bad_arg_2)
+__failure __msg("exception cb only supports single integer argument")
+int reject_exception_cb_type_3(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb_ok_arg_small)
+__success
+int reject_exception_cb_type_4(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline
+static int timer_cb(void *map, int *key, struct bpf_timer *timer)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback subprog")
+int reject_async_callback_throw(struct __sk_buff *ctx)
+{
+ struct hmap_elem *elem;
+
+ elem = bpf_map_lookup_elem(&hmap, &(int){0});
+ if (!elem)
+ return 0;
+ return bpf_timer_set_callback(&elem->timer, timer_cb);
+}
+
+__noinline static int subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_spin_lock(&lock);
+ if (ctx->len)
+ bpf_throw(0);
+ return ret;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_lock(void *ctx)
+{
+ bpf_spin_lock(&lock);
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_subprog_with_lock(void *ctx)
+{
+ return subprog_lock(ctx);
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int throwing_subprog(struct __sk_buff *ctx)
+{
+ if (ctx->len)
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_rcu_read_unlock is missing")
+int reject_subprog_with_rcu_read_lock(void *ctx)
+{
+ bpf_rcu_read_lock();
+ return throwing_subprog(ctx);
+}
+
+static bool rbless(struct bpf_rb_node *n1, const struct bpf_rb_node *n2)
+{
+ bpf_throw(0);
+ return true;
+}
+
+SEC("?tc")
+__failure __msg("function calls are not allowed while holding a lock")
+int reject_with_rbtree_add_throw(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&rbtree, &f->node, rbless);
+ bpf_spin_unlock(&lock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_ref(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int subprog_cb_ref(u32 i, void *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_cb_reference(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ bpf_obj_drop(f);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_with_cb(void *ctx)
+{
+ bpf_loop(5, subprog_cb_ref, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference")
+int reject_with_subprog_reference(void *ctx)
+{
+ return subprog_ref(ctx) + 1;
+}
+
+__noinline int throwing_exception_cb(u64 c)
+{
+ bpf_throw(0);
+ return c;
+}
+
+__noinline int exception_cb1(u64 c)
+{
+ return c;
+}
+
+__noinline int exception_cb2(u64 c)
+{
+ return c;
+}
+
+static __noinline int static_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+__noinline int global_func(struct __sk_buff *ctx)
+{
+ return exception_cb1(ctx->tstamp);
+}
+
+SEC("?tc")
+__exception_cb(throwing_exception_cb)
+__failure __msg("cannot be called from callback subprog")
+int reject_throwing_exception_cb(struct __sk_buff *ctx)
+{
+ return 0;
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_global_func(struct __sk_buff *ctx)
+{
+ return global_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__failure __msg("cannot call exception cb directly")
+int reject_exception_cb_call_static_func(struct __sk_buff *ctx)
+{
+ return static_func(ctx);
+}
+
+SEC("?tc")
+__exception_cb(exception_cb1)
+__exception_cb(exception_cb2)
+__failure __msg("multiple exception callback tags for main subprog")
+int reject_multiple_exception_cb(struct __sk_buff *ctx)
+{
+ bpf_throw(0);
+ return 16;
+}
+
+__noinline int exception_cb_bad_ret(u64 c)
+{
+ return c;
+}
+
+SEC("?fentry/bpf_check")
+__exception_cb(exception_cb_bad_ret)
+__failure __msg("At program exit the register R0 has unknown scalar value should")
+int reject_set_exception_cb_bad_ret1(void *ctx)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_check")
+__failure __msg("At program exit the register R1 has smin=64 smax=64 should")
+int reject_set_exception_cb_bad_ret2(void *ctx)
+{
+ bpf_throw(64);
+ return 0;
+}
+
+__noinline static int loop_cb1(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+__noinline static int loop_cb2(u32 index, int *ctx)
+{
+ bpf_throw(0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb(struct __sk_buff *ctx)
+{
+ bpf_loop(5, loop_cb1, NULL, 0);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("cannot be called from callback")
+int reject_exception_throw_cb_diff(struct __sk_buff *ctx)
+{
+ if (ctx->protocol)
+ bpf_loop(5, loop_cb1, NULL, 0);
+ else
+ bpf_loop(5, loop_cb2, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/exhandler_kern.c b/tools/testing/selftests/bpf/progs/exhandler_kern.c
new file mode 100644
index 000000000000..20d009e2d266
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/exhandler_kern.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned int exception_triggered;
+int test_pid;
+
+/* TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+SEC("tp_btf/task_newtask")
+int BPF_PROG(trace_task_newtask, struct task_struct *task, u64 clone_flags)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ struct callback_head *work;
+ void *func;
+
+ if (test_pid != pid)
+ return 0;
+
+ /* To verify we hit an exception we dereference task->task_works->func.
+ * If task work has been added,
+ * - task->task_works is non-NULL; and
+ * - task->task_works->func is non-NULL also (the callback function
+ * must be specified for the task work.
+ *
+ * However, for a newly-created task, task->task_works is NULLed,
+ * so we know the exception handler triggered if task_works is
+ * NULL and func is NULL.
+ */
+ work = task->task_works;
+ func = work->func;
+ /* Currently verifier will fail for `btf_ptr |= btf_ptr` * instruction.
+ * To workaround the issue, use barrier_var() and rewrite as below to
+ * prevent compiler from generating verifier-unfriendly code.
+ */
+ barrier_var(work);
+ if (work)
+ return 0;
+ barrier_var(func);
+ if (func)
+ return 0;
+ exception_triggered++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_many_args.c b/tools/testing/selftests/bpf/progs/fentry_many_args.c
new file mode 100644
index 000000000000..b61bb92fee2c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_many_args.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fentry/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive.c b/tools/testing/selftests/bpf/progs/fentry_recursive.c
new file mode 100644
index 000000000000..2c9fb5ac42b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains */
+SEC("fentry/XXX")
+int BPF_PROG(recursive_attach, int a)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_recursive_target.c b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
new file mode 100644
index 000000000000..267c876d0aba
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fentry_recursive_target.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Red Hat, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Dummy fentry bpf prog for testing fentry attachment chains. It's going to be
+ * a start of the chain.
+ */
+SEC("fentry/bpf_testmod_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ return 0;
+}
+
+/* Dummy bpf prog for testing attach_btf presence when attaching an fentry
+ * program.
+ */
+SEC("raw_tp/sys_enter")
+int BPF_PROG(fentry_target, struct pt_regs *regs, long id)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fentry_test.c b/tools/testing/selftests/bpf/progs/fentry_test.c
index 5f645fdaba6f..52a550d281d9 100644
--- a/tools/testing/selftests/bpf/progs/fentry_test.c
+++ b/tools/testing/selftests/bpf/progs/fentry_test.c
@@ -64,7 +64,7 @@ __u64 test7_result = 0;
SEC("fentry/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
index 98e1efe14549..983b7c233382 100644
--- a/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/fexit_bpf2bpf.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (c) 2019 Facebook */
#include <linux/stddef.h>
+#include <linux/if_ether.h>
#include <linux/ipv6.h>
#include <linux/bpf.h>
+#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_tracing.h>
@@ -71,10 +73,10 @@ int test_subprog2(struct args_subprog2 *ctx)
__builtin_preserve_access_index(&skb->len));
ret = ctx->ret;
- /* bpf_prog_load() loads "test_pkt_access.o" with BPF_F_TEST_RND_HI32
- * which randomizes upper 32 bits after BPF_ALU32 insns.
- * Hence after 'w0 <<= 1' upper bits of $rax are random.
- * That is expected and correct. Trim them.
+ /* bpf_prog_test_load() loads "test_pkt_access.bpf.o" with
+ * BPF_F_TEST_RND_HI32 which randomizes upper 32 bits after BPF_ALU32
+ * insns. Hence after 'w0 <<= 1' upper bits of $rax are random. That is
+ * expected and correct. Trim them.
*/
ret = (__u32) ret;
if (len != 74 || ret != 148)
@@ -118,8 +120,6 @@ int new_get_skb_ifindex(int val, struct __sk_buff *skb, int var)
void *data = (void *)(long)skb->data;
struct ipv6hdr ip6, *ip6p;
int ifindex = skb->ifindex;
- __u32 eth_proto;
- __u32 nh_off;
/* check that BPF extension can read packet via direct packet access */
if (data + 14 + sizeof(ip6) > data_end)
@@ -151,4 +151,29 @@ int new_get_constant(long val)
test_get_constant = 1;
return test_get_constant; /* original get_constant() returns val - 122 */
}
+
+__u64 test_pkt_write_access_subprog = 0;
+SEC("freplace/test_pkt_write_access_subprog")
+int new_test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+
+ /* make modifications to the packet data */
+ tcp->check++;
+ tcp->syn = 0;
+
+ test_pkt_write_access_subprog = 1;
+ return 0;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/fexit_many_args.c b/tools/testing/selftests/bpf/progs/fexit_many_args.c
new file mode 100644
index 000000000000..53b335c2dafb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_many_args.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Tencent */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fexit/bpf_testmod_fentry_test7")
+int BPF_PROG(test1, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ test1_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && ret == 133;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test2, __u64 a, void *b, short c, int d, void *e, char f,
+ int g, unsigned int h, long i, __u64 j, unsigned long k,
+ int ret)
+{
+ test2_result = a == 16 && b == (void *)17 && c == 18 && d == 19 &&
+ e == (void *)20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fexit/bpf_testmod_fentry_test11")
+int BPF_PROG(test3, __u64 a, __u64 b, __u64 c, __u64 d, __u64 e, __u64 f,
+ __u64 g, __u64 h, __u64 i, __u64 j, __u64 k, __u64 ret)
+{
+ test3_result = a == 16 && b == 17 && c == 18 && d == 19 &&
+ e == 20 && f == 21 && g == 22 && h == 23 &&
+ i == 24 && j == 25 && k == 26 && ret == 231;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
new file mode 100644
index 000000000000..106dc75efcc4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fexit_sleep.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char LICENSE[] SEC("license") = "GPL";
+
+int pid = 0;
+int fentry_cnt = 0;
+int fexit_cnt = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fentry(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ fentry_cnt++;
+ return 0;
+}
+
+SEC("fexit/" SYS_PREFIX "sys_nanosleep")
+int nanosleep_fexit(void *ctx)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ fexit_cnt++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fexit_test.c b/tools/testing/selftests/bpf/progs/fexit_test.c
index 0952affb22a6..8f1ccb7302e1 100644
--- a/tools/testing/selftests/bpf/progs/fexit_test.c
+++ b/tools/testing/selftests/bpf/progs/fexit_test.c
@@ -65,7 +65,7 @@ __u64 test7_result = 0;
SEC("fexit/bpf_fentry_test7")
int BPF_PROG(test7, struct bpf_fentry_test_t *arg)
{
- if (arg == 0)
+ if (!arg)
test7_result = 1;
return 0;
}
@@ -74,7 +74,7 @@ __u64 test8_result = 0;
SEC("fexit/bpf_fentry_test8")
int BPF_PROG(test8, struct bpf_fentry_test_t *arg)
{
- if (arg->a == 0)
+ if (!arg->a)
test8_result = 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/fib_lookup.c b/tools/testing/selftests/bpf/progs/fib_lookup.c
new file mode 100644
index 000000000000..c4514dd58c62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fib_lookup.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tracing_net.h"
+
+struct bpf_fib_lookup fib_params = {};
+int fib_lookup_ret = 0;
+int lookup_flags = 0;
+
+SEC("tc")
+int fib_lookup(struct __sk_buff *skb)
+{
+ fib_lookup_ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params),
+ lookup_flags);
+
+ return TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/find_vma.c b/tools/testing/selftests/bpf/progs/find_vma.c
new file mode 100644
index 000000000000..38034fb82530
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+#define VM_EXEC 0x00000004
+#define DNAME_INLINE_LEN 32
+
+pid_t target_pid = 0;
+char d_iname[DNAME_INLINE_LEN] = {0};
+__u32 found_vm_exec = 0;
+__u64 addr = 0;
+int find_zero_ret = -1;
+int find_addr_ret = -1;
+
+static long check_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ if (vma->vm_file)
+ bpf_probe_read_kernel_str(d_iname, DNAME_INLINE_LEN - 1,
+ vma->vm_file->f_path.dentry->d_iname);
+
+ /* check for VM_EXEC */
+ if (vma->vm_flags & VM_EXEC)
+ found_vm_exec = 1;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* this should return -ENOENT */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
+
+SEC("perf_event")
+int handle_pe(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ if (task->pid != target_pid)
+ return 0;
+
+ find_addr_ret = bpf_find_vma(task, addr, check_vma, &data, 0);
+
+ /* In NMI, this should return -EBUSY, as the previous call is using
+ * the irq_work.
+ */
+ find_zero_ret = bpf_find_vma(task, 0, check_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail1.c b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
new file mode 100644
index 000000000000..7ba9a428f228
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#define vm_flags vm_start
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_vma(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to vma, which is illegal */
+ vma->vm_start = 0xffffffffff600000;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_vma, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/find_vma_fail2.c b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
new file mode 100644
index 000000000000..9bcf3203e26b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/find_vma_fail2.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct callback_ctx {
+ int dummy;
+};
+
+static long write_task(struct task_struct *task, struct vm_area_struct *vma,
+ struct callback_ctx *data)
+{
+ /* writing to task, which is illegal */
+ task->mm = NULL;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_getpid(void)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct callback_ctx data = {};
+
+ bpf_find_vma(task, 0, write_task, &data, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
new file mode 100644
index 000000000000..c8943ccee6c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/fmod_ret_freplace.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+volatile __u64 test_fmod_ret = 0;
+SEC("fmod_ret/security_new_get_constant")
+int BPF_PROG(fmod_ret_test, long val, int ret)
+{
+ test_fmod_ret = 1;
+ return 120;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
new file mode 100644
index 000000000000..52f6995ff29c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_array_map_elem.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} arraymap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ int output;
+};
+
+const volatile int bypass_unused = 1;
+
+static __u64
+unused_subprog(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output = 0;
+ return 1;
+}
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ data->output += *val;
+ if (*key == 1)
+ return 1; /* stop the iteration */
+ return 0;
+}
+
+__u32 cpu = 0;
+__u64 percpu_val = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ cpu = bpf_get_smp_processor_id();
+ percpu_val = *val;
+ return 0;
+}
+
+u32 arraymap_output = 0;
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.output = 0;
+ bpf_for_each_map_elem(&arraymap, check_array_elem, &data, 0);
+ if (!bypass_unused)
+ bpf_for_each_map_elem(&arraymap, unused_subprog, &data, 0);
+ arraymap_output = data.output;
+
+ bpf_for_each_map_elem(&percpu_map, check_percpu_elem, (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
new file mode 100644
index 000000000000..276994d5c0c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_map_elem.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, __u32);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_map SEC(".maps");
+
+struct callback_ctx {
+ struct __sk_buff *ctx;
+ int input;
+ int output;
+};
+
+static __u64
+check_hash_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *data)
+{
+ struct __sk_buff *skb = data->ctx;
+ __u32 k;
+ __u64 v;
+
+ if (skb) {
+ k = *key;
+ v = *val;
+ if (skb->len == 10000 && k == 10 && v == 10)
+ data->output = 3; /* impossible path */
+ else
+ data->output = 4;
+ } else {
+ data->output = data->input;
+ bpf_map_delete_elem(map, key);
+ }
+
+ return 0;
+}
+
+__u32 cpu = 0;
+__u32 percpu_called = 0;
+__u32 percpu_key = 0;
+__u64 percpu_val = 0;
+int percpu_output = 0;
+
+static __u64
+check_percpu_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ struct callback_ctx *unused)
+{
+ struct callback_ctx data;
+
+ percpu_called++;
+ cpu = bpf_get_smp_processor_id();
+ percpu_key = *key;
+ percpu_val = *val;
+
+ data.ctx = 0;
+ data.input = 100;
+ data.output = 0;
+ bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ percpu_output = data.output;
+
+ return 0;
+}
+
+int hashmap_output = 0;
+int hashmap_elems = 0;
+int percpu_map_elems = 0;
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ struct callback_ctx data;
+
+ data.ctx = skb;
+ data.input = 10;
+ data.output = 0;
+ hashmap_elems = bpf_for_each_map_elem(&hashmap, check_hash_elem, &data, 0);
+ hashmap_output = data.output;
+
+ percpu_map_elems = bpf_for_each_map_elem(&percpu_map, check_percpu_elem,
+ (void *)0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
new file mode 100644
index 000000000000..8e545865ea33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_map_elem_write_key.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array_map SEC(".maps");
+
+static __u64
+check_array_elem(struct bpf_map *map, __u32 *key, __u64 *val,
+ void *data)
+{
+ bpf_get_current_comm(key, sizeof(*key));
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int test_map_key_write(const void *ctx)
+{
+ bpf_for_each_map_elem(&array_map, check_array_elem, NULL, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_attach_probe.c b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
new file mode 100644
index 000000000000..370a0e1922e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_attach_probe.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/ptrace.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define VAR_NUM 2
+
+struct hmap_elem {
+ struct bpf_spin_lock lock;
+ int var[VAR_NUM];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct hmap_elem);
+} hash_map SEC(".maps");
+
+SEC("freplace/handle_kprobe")
+int new_handle_kprobe(struct pt_regs *ctx)
+{
+ struct hmap_elem *val;
+ int key = 0;
+
+ val = bpf_map_lookup_elem(&hash_map, &key);
+ if (!val)
+ return 1;
+ /* spin_lock in hash map */
+ bpf_spin_lock(&val->lock);
+ val->var[0] = 99;
+ bpf_spin_unlock(&val->lock);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
new file mode 100644
index 000000000000..7e94412d47a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_cls_redirect.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/pkt_cls.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 2);
+} sock_map SEC(".maps");
+
+SEC("freplace/cls_redirect")
+int freplace_cls_redirect_test(struct __sk_buff *skb)
+{
+ int ret = 0;
+ const int zero = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sock_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+
+ ret = bpf_map_update_elem(&sock_map, &zero, sk, 0);
+ bpf_sk_release(sk);
+
+ return ret == 0 ? TC_ACT_OK : TC_ACT_SHOT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
new file mode 100644
index 000000000000..544e5ac90461
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_connect_v4_prog.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/stddef.h>
+#include <linux/ipv6.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+SEC("freplace/connect_v4_prog")
+int new_connect_v4_prog(struct bpf_sock_addr *ctx)
+{
+ // return value thats in invalid range
+ return 255;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
new file mode 100644
index 000000000000..e6a75f86cac6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("freplace")
+int freplace_prog(void)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_get_constant.c b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
new file mode 100644
index 000000000000..705e4b64dfc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_get_constant.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+volatile __u64 test_get_constant = 0;
+SEC("freplace/get_constant")
+int security_new_get_constant(long val)
+{
+ if (val != 123)
+ return 0;
+ test_get_constant = 1;
+ return test_get_constant; /* original get_constant() returns val - 122 */
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_global_func.c b/tools/testing/selftests/bpf/progs/freplace_global_func.c
new file mode 100644
index 000000000000..96cb61a6ce87
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_global_func.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__noinline
+int test_ctx_global_func(struct __sk_buff *skb)
+{
+ volatile int retval = 1;
+ return retval;
+}
+
+SEC("freplace/test_pkt_access")
+int new_test_pkt_access(struct __sk_buff *skb)
+{
+ return test_ctx_global_func(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_progmap.c b/tools/testing/selftests/bpf/progs/freplace_progmap.c
new file mode 100644
index 000000000000..81b56b9aa7d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_progmap.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __type(key, __u32);
+ __type(value, struct bpf_cpumap_val);
+ __uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_drop_prog(struct xdp_md *ctx)
+{
+ return XDP_DROP;
+}
+
+SEC("freplace")
+int xdp_cpumap_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 0, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
new file mode 100644
index 000000000000..624078abf3de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/freplace_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+SEC("freplace/btf_unreliable_kprobe")
+/* context type is what BPF verifier expects for kprobe context, but target
+ * program has `stuct whatever *ctx` argument, so freplace operation will be
+ * rejected with the following message:
+ *
+ * arg0 replace_btf_unreliable_kprobe(struct pt_regs *) doesn't match btf_unreliable_kprobe(struct whatever *)
+ */
+int replace_btf_unreliable_kprobe(bpf_user_pt_regs_t *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/get_branch_snapshot.c b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
new file mode 100644
index 000000000000..511ac634eef0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_branch_snapshot.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_hits = 0;
+__u64 address_low = 0;
+__u64 address_high = 0;
+int wasted_entries = 0;
+long total_entries = 0;
+
+#define ENTRY_CNT 32
+struct perf_branch_entry entries[ENTRY_CNT] = {};
+
+static inline bool gbs_in_range(__u64 val)
+{
+ return (val >= address_low) && (val < address_high);
+}
+
+SEC("fexit/bpf_testmod_loop_test")
+int BPF_PROG(test1, int n, int ret)
+{
+ long i;
+
+ total_entries = bpf_get_branch_snapshot(entries, sizeof(entries), 0);
+ total_entries /= sizeof(struct perf_branch_entry);
+
+ for (i = 0; i < ENTRY_CNT; i++) {
+ if (i >= total_entries)
+ break;
+ if (gbs_in_range(entries[i].from) && gbs_in_range(entries[i].to))
+ test1_hits++;
+ else if (!test1_hits)
+ wasted_entries++;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
index 6b42db2fe391..68587b1de34e 100644
--- a/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/get_cgroup_id_kern.c
@@ -37,4 +37,3 @@ int trace(void *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/get_func_args_test.c b/tools/testing/selftests/bpf/progs/get_func_args_test.c
new file mode 100644
index 000000000000..e0f34a55e697
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_args_test.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test1_result = cnt == 1;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+
+ /* We need to cast access to traced function argument values with
+ * proper type cast, because trampoline uses type specific instruction
+ * to save it, like for 'int a' with 32-bit mov like:
+ *
+ * mov %edi,-0x8(%rbp)
+ *
+ * so the upper 4 bytes are not zeroed.
+ */
+ test1_result &= err == 0 && ((int) a == 1);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 1, &z);
+ test1_result &= err == -EINVAL;
+
+ /* return value fails in fentry */
+ err = bpf_get_func_ret(ctx, &ret);
+ test1_result &= err == -EOPNOTSUPP;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test2_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test2_result &= err == 0 && (int) a == 2;
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test2_result &= err == 0 && b == 3;
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test2_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test2_result &= err == 0 && ret == 5;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test3_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test3_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test3_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test3_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test3_result &= err == 0 && ret == 0;
+
+ /* change return value, it's checked in fexit_test program */
+ return 1234;
+}
+
+__u64 test4_result = 0;
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int _a, int *_b, int _ret)
+{
+ __u64 cnt = bpf_get_func_arg_cnt(ctx);
+ __u64 a = 0, b = 0, z = 0, ret = 0;
+ __s64 err;
+
+ test4_result = cnt == 2;
+
+ /* valid arguments */
+ err = bpf_get_func_arg(ctx, 0, &a);
+ test4_result &= err == 0 && ((int) a == 1);
+
+ err = bpf_get_func_arg(ctx, 1, &b);
+ test4_result &= err == 0 && ((int *) b == _b);
+
+ /* not valid argument */
+ err = bpf_get_func_arg(ctx, 2, &z);
+ test4_result &= err == -EINVAL;
+
+ /* return value */
+ err = bpf_get_func_ret(ctx, &ret);
+ test4_result &= err == 0 && ret == 1234;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
new file mode 100644
index 000000000000..8956eb78a226
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_test.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_modify_return_test __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+
+/* This function is here to have CONFIG_X86_KERNEL_IBT
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+}
+
+__u64 test1_result = 0;
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == &bpf_fentry_test1;
+ return 0;
+}
+
+__u64 test2_result = 0;
+SEC("fexit/bpf_fentry_test2")
+int BPF_PROG(test2, int a)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test2_result = (const void *) addr == &bpf_fentry_test2;
+ return 0;
+}
+
+__u64 test3_result = 0;
+SEC("kprobe/bpf_fentry_test3")
+int test3(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test3_result = (const void *) addr == &bpf_fentry_test3;
+ return 0;
+}
+
+__u64 test4_result = 0;
+SEC("kretprobe/bpf_fentry_test4")
+int BPF_KRETPROBE(test4)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test4_result = (const void *) addr == &bpf_fentry_test4;
+ return 0;
+}
+
+__u64 test5_result = 0;
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(test5, int a, int *b, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test5_result = (const void *) addr == &bpf_modify_return_test;
+ return ret;
+}
+
+__u64 test6_result = 0;
+SEC("?kprobe")
+int test6(struct pt_regs *ctx)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test6_result = (const void *) addr == 0;
+ return 0;
+}
+
+unsigned long uprobe_trigger;
+
+__u64 test7_result = 0;
+SEC("uprobe//proc/self/exe:uprobe_trigger")
+int BPF_UPROBE(test7)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test7_result = (const void *) addr == (const void *) uprobe_trigger;
+ return 0;
+}
+
+__u64 test8_result = 0;
+SEC("uretprobe//proc/self/exe:uprobe_trigger")
+int BPF_URETPROBE(test8, int ret)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test8_result = (const void *) addr == (const void *) uprobe_trigger;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c
new file mode 100644
index 000000000000..052f8a4345a8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/get_func_ip_uprobe_test.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+unsigned long uprobe_trigger_body;
+
+__u64 test1_result = 0;
+SEC("uprobe//proc/self/exe:uprobe_trigger_body+1")
+int BPF_UPROBE(test1)
+{
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ test1_result = (const void *) addr == (const void *) uprobe_trigger_body + 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
new file mode 100644
index 000000000000..5a76754f846b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getpeername_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getpeername_unix")
+int getpeername_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
new file mode 100644
index 000000000000..7867113c696f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/getsockname_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/getsockname_unix")
+int getsockname_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/htab_mem_bench.c b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
new file mode 100644
index 000000000000..b1b721b14d67
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_mem_bench.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define OP_BATCH 64
+
+struct update_ctx {
+ unsigned int from;
+ unsigned int step;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(key_size, 4);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+unsigned char zeroed_value[4096];
+unsigned int nr_thread = 0;
+long op_cnt = 0;
+
+static int write_htab(unsigned int i, struct update_ctx *ctx, unsigned int flags)
+{
+ bpf_map_update_elem(&htab, &ctx->from, zeroed_value, flags);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+static int overwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, 0);
+}
+
+static int newwrite_htab(unsigned int i, struct update_ctx *ctx)
+{
+ return write_htab(i, ctx, BPF_NOEXIST);
+}
+
+static int del_htab(unsigned int i, struct update_ctx *ctx)
+{
+ bpf_map_delete_elem(&htab, &ctx->from);
+ ctx->from += ctx->step;
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int overwrite(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int batch_add_batch_del(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id();
+ update.step = nr_thread;
+ bpf_loop(OP_BATCH, overwrite_htab, &update, 0);
+
+ update.from = bpf_get_smp_processor_id();
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+
+ __sync_fetch_and_add(&op_cnt, 2);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getpgid")
+int add_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, newwrite_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_getppid")
+int del_only(void *ctx)
+{
+ struct update_ctx update;
+
+ update.from = bpf_get_smp_processor_id() / 2;
+ update.step = nr_thread / 2;
+ bpf_loop(OP_BATCH, del_htab, &update, 0);
+ __sync_fetch_and_add(&op_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/htab_reuse.c b/tools/testing/selftests/bpf/progs/htab_reuse.c
new file mode 100644
index 000000000000..7f7368cb3095
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_reuse.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct htab_val {
+ struct bpf_spin_lock lock;
+ unsigned int data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 64);
+ __type(key, unsigned int);
+ __type(value, struct htab_val);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} htab SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/htab_update.c b/tools/testing/selftests/bpf/progs/htab_update.c
new file mode 100644
index 000000000000..7481bb30b29b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/htab_update.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} htab SEC(".maps");
+
+int pid = 0;
+int update_err = 0;
+
+SEC("?fentry/lookup_elem_raw")
+int lookup_elem_raw(void *ctx)
+{
+ __u32 key = 0, value = 1;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != pid)
+ return 0;
+
+ update_err = bpf_map_update_elem(&htab, &key, &value, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ima.c b/tools/testing/selftests/bpf/progs/ima.c
new file mode 100644
index 000000000000..e16a2c208481
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ima.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+u32 monitored_pid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+bool use_ima_file_hash;
+bool enable_bprm_creds_for_exec;
+bool enable_kernel_read_file;
+bool test_deny;
+
+static void ima_test_common(struct file *file)
+{
+ u64 ima_hash = 0;
+ u64 *sample;
+ int ret;
+ u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid == monitored_pid) {
+ if (!use_ima_file_hash)
+ ret = bpf_ima_inode_hash(file->f_inode, &ima_hash,
+ sizeof(ima_hash));
+ else
+ ret = bpf_ima_file_hash(file, &ima_hash,
+ sizeof(ima_hash));
+ if (ret < 0 || ima_hash == 0)
+ return;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(u64), 0);
+ if (!sample)
+ return;
+
+ *sample = ima_hash;
+ bpf_ringbuf_submit(sample, 0);
+ }
+
+ return;
+}
+
+static int ima_test_deny(void)
+{
+ u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid == monitored_pid && test_deny)
+ return -EPERM;
+
+ return 0;
+}
+
+SEC("lsm.s/bprm_committed_creds")
+void BPF_PROG(bprm_committed_creds, struct linux_binprm *bprm)
+{
+ ima_test_common(bprm->file);
+}
+
+SEC("lsm.s/bprm_creds_for_exec")
+int BPF_PROG(bprm_creds_for_exec, struct linux_binprm *bprm)
+{
+ if (!enable_bprm_creds_for_exec)
+ return 0;
+
+ ima_test_common(bprm->file);
+ return 0;
+}
+
+SEC("lsm.s/kernel_read_file")
+int BPF_PROG(kernel_read_file, struct file *file, enum kernel_read_file_id id,
+ bool contents)
+{
+ int ret;
+
+ if (!enable_kernel_read_file)
+ return 0;
+
+ if (!contents)
+ return 0;
+
+ if (id != READING_POLICY)
+ return 0;
+
+ ret = ima_test_deny();
+ if (ret < 0)
+ return ret;
+
+ ima_test_common(file);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/inner_array_lookup.c b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
new file mode 100644
index 000000000000..c2c8f2fa451d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/inner_array_lookup.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map1 SEC(".maps");
+
+struct outer_map {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __array(values, struct inner_map);
+} outer_map1 SEC(".maps") = {
+ .values = {
+ [2] = &inner_map1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+ int outer_key = 2, inner_key = 3;
+ int *val;
+ void *map;
+
+ map = bpf_map_lookup_elem(&outer_map1, &outer_key);
+ if (!map)
+ return 1;
+
+ val = bpf_map_lookup_elem(map, &inner_key);
+ if (!val)
+ return 1;
+
+ if (*val == 1)
+ *val = 2;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ip_check_defrag.c b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
new file mode 100644
index 000000000000..1c2b6c1616b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ip_check_defrag.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+
+#define NF_DROP 0
+#define NF_ACCEPT 1
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1FFF
+#define NEXTHDR_FRAGMENT 44
+
+extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
+ void *buffer, uint32_t buffer__sz) __ksym;
+
+volatile int shootdowns = 0;
+
+static bool is_frag_v4(struct iphdr *iph)
+{
+ int offset;
+ int flags;
+
+ offset = bpf_ntohs(iph->frag_off);
+ flags = offset & ~IP_OFFSET;
+ offset &= IP_OFFSET;
+ offset <<= 3;
+
+ return (flags & IP_MF) || offset;
+}
+
+static bool is_frag_v6(struct ipv6hdr *ip6h)
+{
+ /* Simplifying assumption that there are no extension headers
+ * between fixed header and fragmentation header. This assumption
+ * is only valid in this test case. It saves us the hassle of
+ * searching all potential extension headers.
+ */
+ return ip6h->nexthdr == NEXTHDR_FRAGMENT;
+}
+
+static int handle_v4(struct sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ u8 iph_buf[20] = {};
+ struct iphdr *iph;
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_DROP;
+
+ iph = bpf_dynptr_slice(&ptr, 0, iph_buf, sizeof(iph_buf));
+ if (!iph)
+ return NF_DROP;
+
+ /* Shootdown any frags */
+ if (is_frag_v4(iph)) {
+ shootdowns++;
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+static int handle_v6(struct sk_buff *skb)
+{
+ struct bpf_dynptr ptr;
+ struct ipv6hdr *ip6h;
+ u8 ip6h_buf[40] = {};
+
+ if (bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_DROP;
+
+ ip6h = bpf_dynptr_slice(&ptr, 0, ip6h_buf, sizeof(ip6h_buf));
+ if (!ip6h)
+ return NF_DROP;
+
+ /* Shootdown any frags */
+ if (is_frag_v6(ip6h)) {
+ shootdowns++;
+ return NF_DROP;
+ }
+
+ return NF_ACCEPT;
+}
+
+SEC("netfilter")
+int defrag(struct bpf_nf_ctx *ctx)
+{
+ struct sk_buff *skb = ctx->skb;
+
+ switch (bpf_ntohs(skb->protocol)) {
+ case ETH_P_IP:
+ return handle_v4(skb);
+ case ETH_P_IPV6:
+ return handle_v6(skb);
+ default:
+ return NF_ACCEPT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c
new file mode 100644
index 000000000000..3db416606f2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters.c
@@ -0,0 +1,1437 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_compiler.h"
+
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
+
+static volatile int zero = 0;
+
+int my_pid;
+int arr[256];
+int small_arr[16] SEC(".data.small_arr");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 10);
+ __type(key, int);
+ __type(value, int);
+} amap SEC(".maps");
+
+#ifdef REAL_TEST
+#define MY_PID_GUARD() if (my_pid != (bpf_get_current_pid_tgid() >> 32)) return 0
+#else
+#define MY_PID_GUARD() ({ })
+#endif
+
+SEC("?raw_tp")
+__failure __msg("math between map_value pointer and register with unbounded min value is not allowed")
+int iter_err_unsafe_c_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = zero; /* obscure initial value of i */
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 1000);
+ while ((v = bpf_iter_num_next(&it))) {
+ i++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ small_arr[i] = 123; /* invalid */
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("unbounded memory access")
+int iter_err_unsafe_asm_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ MY_PID_GUARD();
+
+ asm volatile (
+ "r6 = %[zero];" /* iteration counter */
+ "r1 = %[it];" /* iterator state */
+ "r2 = 0;"
+ "r3 = 1000;"
+ "r4 = 1;"
+ "call %[bpf_iter_num_new];"
+ "loop:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto out;"
+ "r6 += 1;"
+ "goto loop;"
+ "out:"
+ "r1 = %[it];"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = %[small_arr];"
+ "r2 = r6;"
+ "r2 <<= 2;"
+ "r1 += r2;"
+ "*(u32 *)(r1 + 0) = r6;" /* invalid */
+ :
+ : [it]"r"(&it),
+ [small_arr]"r"(small_arr),
+ [zero]"r"(zero),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_while_loop_auto_cleanup(const void *ctx)
+{
+ __attribute__((cleanup(bpf_iter_num_destroy))) struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ /* (!) no explicit bpf_iter_num_destroy() */
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_for_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_each_macro(const void *ctx)
+{
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_for_each(num, v, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_bpf_for_macro(const void *ctx)
+{
+ int i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 5, 10) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", i);
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_pragma_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 2);
+ __pragma_loop_no_unroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_manual_unroll_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_multiple_sequential_loops(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 3);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_BASIC: E1 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 5, 10);
+ for (v = bpf_iter_num_next(&it); v; v = bpf_iter_num_next(&it)) {
+ bpf_printk("ITER_BASIC: E2 VAL: v=%d", *v);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 0, 2);
+ __pragma_loop_no_unroll
+ for (i = 0; i < 3; i++) {
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E3 VAL: i=%d v=%d", i, v ? *v : -1);
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_iter_num_new(&it, 100, 200);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d", v ? *v : -1);
+ v = bpf_iter_num_next(&it);
+ bpf_printk("ITER_BASIC: E4 VAL: v=%d\n", v ? *v : -1);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_limit_cond_break_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, i = 0, sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SIMPLE: i=%d v=%d", i, *v);
+ sum += *v;
+
+ i++;
+ if (i > 3)
+ break;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_SIMPLE: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_obfuscate_counter(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, sum = 0;
+ /* Make i's initial value unknowable for verifier to prevent it from
+ * pruning if/else branch inside the loop body and marking i as precise.
+ */
+ int i = zero;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ int x;
+
+ i += 1;
+
+ /* If we initialized i as `int i = 0;` above, verifier would
+ * track that i becomes 1 on first iteration after increment
+ * above, and here verifier would eagerly prune else branch
+ * and mark i as precise, ruining open-coded iterator logic
+ * completely, as each next iteration would have a different
+ * *precise* value of i, and thus there would be no
+ * convergence of state. This would result in reaching maximum
+ * instruction limit, no matter what the limit is.
+ */
+ if (i == 1)
+ x = 123;
+ else
+ x = i * 3 + 1;
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: i=%d v=%d x=%d", i, *v, x);
+
+ sum += x;
+ }
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("ITER_OBFUSCATE_COUNTER: sum=%d\n", sum);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_search_loop(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int *v, *elem = NULL;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_iter_num_new(&it, 0, 10);
+
+ while ((v = bpf_iter_num_next(&it))) {
+ bpf_printk("ITER_SEARCH_LOOP: v=%d", *v);
+
+ if (*v == 2) {
+ found = true;
+ elem = v;
+ barrier_var(elem);
+ }
+ }
+
+ /* should fail to verify if bpf_iter_num_destroy() is here */
+
+ if (found)
+ /* here found element will be wrong, we should have copied
+ * value to a variable, but here we want to make sure we can
+ * access memory after the loop anyways
+ */
+ bpf_printk("ITER_SEARCH_LOOP: FOUND IT = %d!\n", *elem);
+ else
+ bpf_printk("ITER_SEARCH_LOOP: NOT FOUND IT!\n");
+
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_array_fill(const void *ctx)
+{
+ int sum, i;
+
+ MY_PID_GUARD();
+
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ arr[i] = i * 2;
+ }
+
+ sum = 0;
+ bpf_for(i, 0, ARRAY_SIZE(arr)) {
+ sum += arr[i];
+ }
+
+ bpf_printk("ITER_ARRAY_FILL: sum=%d (should be %d)\n", sum, 255 * 256);
+
+ return 0;
+}
+
+static int arr2d[4][5];
+static int arr2d_row_sums[4];
+static int arr2d_col_sums[5];
+
+SEC("raw_tp")
+__success
+int iter_nested_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for( col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+ }
+
+ bpf_printk("ITER_NESTED_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_NESTED_ITERS: row #%d sum=%d", row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_NESTED_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_nested_deeply_iters(const void *ctx)
+{
+ int sum = 0;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ bpf_repeat(10) {
+ sum += 1;
+ }
+ }
+ }
+ }
+ /* validate that we can break from inside bpf_repeat() */
+ break;
+ }
+
+ return sum;
+}
+
+static __noinline void fill_inner_dimension(int row)
+{
+ int col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d[row][col] = row * col;
+ }
+}
+
+static __noinline int sum_inner_dimension(int row)
+{
+ int sum = 0, col;
+
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ sum += arr2d[row][col];
+ arr2d_row_sums[row] += arr2d[row][col];
+ arr2d_col_sums[col] += arr2d[row][col];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_subprog_iters(const void *ctx)
+{
+ int sum, row, col;
+
+ MY_PID_GUARD();
+
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ fill_inner_dimension(row);
+ }
+
+ /* zero-initialize sums */
+ sum = 0;
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ arr2d_row_sums[row] = 0;
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ arr2d_col_sums[col] = 0;
+ }
+
+ /* calculate sums */
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ sum += sum_inner_dimension(row);
+ }
+
+ bpf_printk("ITER_SUBPROG_ITERS: total sum=%d", sum);
+ bpf_for(row, 0, ARRAY_SIZE(arr2d)) {
+ bpf_printk("ITER_SUBPROG_ITERS: row #%d sum=%d",
+ row, arr2d_row_sums[row]);
+ }
+ bpf_for(col, 0, ARRAY_SIZE(arr2d[0])) {
+ bpf_printk("ITER_SUBPROG_ITERS: col #%d sum=%d%s",
+ col, arr2d_col_sums[col],
+ col == ARRAY_SIZE(arr2d[0]) - 1 ? "\n" : "");
+ }
+
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1000);
+} arr_map SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int iter_err_too_permissive1(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = NULL;
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive2(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+
+ MY_PID_GUARD();
+
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (!map_val)
+ return 0;
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ }
+
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'map_value_or_null'")
+int iter_err_too_permissive3(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ found = true;
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+SEC("raw_tp")
+__success
+int iter_tricky_but_fine(const void *ctx)
+{
+ int *map_val = NULL;
+ int key = 0;
+ bool found = false;
+
+ MY_PID_GUARD();
+
+ bpf_repeat(1000000) {
+ map_val = bpf_map_lookup_elem(&arr_map, &key);
+ if (map_val) {
+ found = true;
+ break;
+ }
+ }
+
+ if (found)
+ *map_val = 123;
+
+ return 0;
+}
+
+#define __bpf_memzero(p, sz) bpf_probe_read_kernel((p), (sz), 0)
+
+SEC("raw_tp")
+__success
+int iter_stack_array_loop(const void *ctx)
+{
+ long arr1[16], arr2[16], sum = 0;
+ int i;
+
+ MY_PID_GUARD();
+
+ /* zero-init arr1 and arr2 in such a way that verifier doesn't know
+ * it's all zeros; if we don't do that, we'll make BPF verifier track
+ * all combination of zero/non-zero stack slots for arr1/arr2, which
+ * will lead to O(2^(ARRAY_SIZE(arr1)+ARRAY_SIZE(arr2))) different
+ * states
+ */
+ __bpf_memzero(arr1, sizeof(arr1));
+ __bpf_memzero(arr2, sizeof(arr1));
+
+ /* validate that we can break and continue when using bpf_for() */
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ if (i & 1) {
+ arr1[i] = i;
+ continue;
+ } else {
+ arr2[i] = i;
+ break;
+ }
+ }
+
+ bpf_for(i, 0, ARRAY_SIZE(arr1)) {
+ sum += arr1[i] + arr2[i];
+ }
+
+ return sum;
+}
+
+static __noinline void fill(struct bpf_iter_num *it, int *arr, __u32 n, int mul)
+{
+ int *t, i;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if (i >= n)
+ break;
+ arr[i] = i * mul;
+ }
+}
+
+static __noinline int sum(struct bpf_iter_num *it, int *arr, __u32 n)
+{
+ int *t, i, sum = 0;;
+
+ while ((t = bpf_iter_num_next(it))) {
+ i = *t;
+ if ((__u32)i >= n)
+ break;
+ sum += arr[i];
+ }
+
+ return sum;
+}
+
+SEC("raw_tp")
+__success
+int iter_pass_iter_ptr_to_subprog(const void *ctx)
+{
+ int arr1[16], arr2[32];
+ struct bpf_iter_num it;
+ int n, sum1, sum2;
+
+ MY_PID_GUARD();
+
+ /* fill arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr1, n, 2);
+ bpf_iter_num_destroy(&it);
+
+ /* fill arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ fill(&it, arr2, n, 10);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr1 */
+ n = ARRAY_SIZE(arr1);
+ bpf_iter_num_new(&it, 0, n);
+ sum1 = sum(&it, arr1, n);
+ bpf_iter_num_destroy(&it);
+
+ /* sum arr2 */
+ n = ARRAY_SIZE(arr2);
+ bpf_iter_num_new(&it, 0, n);
+ sum2 = sum(&it, arr2, n);
+ bpf_iter_num_destroy(&it);
+
+ bpf_printk("sum1=%d, sum2=%d", sum1, sum2);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("R1 type=scalar expected=fp")
+__naked int delayed_read_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The call to bpf_iter_num_next() is reachable with r7 values &fp[-16] and 0xdead.
+ * State with r7=&fp[-16] is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read mark.
+ * Loop body with r7=0xdead would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of read mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r7 = &fp[-16]
+ * fp[-16] = 0
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r6++
+ * if (r6 != 42) {
+ * r7 = 0xdead
+ * continue;
+ * }
+ * bpf_probe_read_user(r7, 8, 0xdeadbeef); // this is not safe
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r7 = r10;"
+ "r7 += -16;"
+ "r0 = 0;"
+ "*(u64 *)(r7 + 0) = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "r6 += 1;"
+ "if r6 != 42 goto 3f;"
+ "r7 = 0xdead;"
+ "goto 1b;"
+ "3:"
+ "r1 = r7;"
+ "r2 = 8;"
+ "r3 = 0xdeadbeef;"
+ "call %[bpf_probe_read_user];"
+ "goto 1b;"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__naked int delayed_precision_mark(void)
+{
+ /* This is equivalent to C program below.
+ * The test is similar to delayed_iter_mark but verifies that incomplete
+ * precision don't fool verifier.
+ * The call to bpf_iter_num_next() is reachable with r7 values -16 and -32.
+ * State with r7=-16 is visited first and follows r6 != 42 ... continue branch.
+ * At this point iterator next() call is reached with r7 that has no read
+ * and precision marks.
+ * Loop body with r7=-32 would only be visited if verifier would decide to continue
+ * with second loop iteration. Absence of precision mark on r7 might affect state
+ * equivalent logic used for iterator convergence tracking.
+ *
+ * r8 = 0
+ * fp[-16] = 0
+ * r7 = -16
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (r6 != 42) {
+ * r7 = -32
+ * r6 = bpf_get_prandom_u32()
+ * continue;
+ * }
+ * r0 = r10
+ * r0 += r7
+ * r8 = *(u64 *)(r0 + 0) // this is not safe
+ * r6 = bpf_get_prandom_u32()
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return r8
+ */
+ asm volatile (
+ "r8 = 0;"
+ "*(u64 *)(r10 - 16) = r8;"
+ "r7 = -16;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "1:"
+ "r1 = r10;"
+ "r1 += -8;\n"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto 2f;"
+ "if r6 != 42 goto 3f;"
+ "r7 = -33;"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "3:"
+ "r0 = r10;"
+ "r0 += r7;"
+ "r8 = *(u64 *)(r0 + 0);"
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "goto 1b;\n"
+ "2:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = r8;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm(bpf_probe_read_user)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps1(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with c=-25 are explored only on a second iteration
+ * of the outer loop;
+ * - states with read+precise mark on c are explored only on
+ * second iteration of the inner loop and in a state which
+ * is pushed to states stack first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * a = 0;
+ * b = 0;
+ * c = -25;
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -25;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__failure
+__msg("math between fp pointer and register with unbounded")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked int loop_state_deps2(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * The case turns out to be tricky in a sense that:
+ * - states with read+precise mark on c are explored only on a second
+ * iteration of the first inner loop and in a state which is pushed to
+ * states stack first.
+ * - states with c=-25 are explored only on a second iteration of the
+ * second inner loop and in a state which is pushed to states stack
+ * first.
+ *
+ * Depending on the details of iterator convergence logic
+ * verifier might stop states traversal too early and miss
+ * unsafe c=-25 memory access.
+ *
+ * j = iter_new(); // fp[-16]
+ * a = 0; // r6
+ * b = 0; // r7
+ * c = -24; // r8
+ * while (iter_next(j)) {
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * *(r10 + c) = 7; // this is not safe
+ * iter_destroy(i);
+ * iter_destroy(j);
+ * return;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * i = iter_new(); // fp[-8]
+ * a = 0; // r6
+ * b = 0; // r7
+ * while (iter_next(i)) {
+ * if (a == 1) {
+ * a = 0;
+ * b = 1;
+ * } else if (a == 0) {
+ * a = 1;
+ * if (random() == 42)
+ * continue;
+ * if (b == 1) {
+ * a = 0;
+ * c = -25;
+ * }
+ * }
+ * }
+ * iter_destroy(i);
+ * }
+ * iter_destroy(j);
+ * return;
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -16;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "r8 = -24;"
+ "j_loop_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto j_loop_end_%=;"
+
+ /* first inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i_loop_end_%=;"
+ "check_one_r6_%=:"
+ "if r6 != 1 goto check_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i_loop_%=;"
+ "check_zero_r6_%=:"
+ "if r6 != 0 goto i_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check_one_r7_%=;"
+ "goto i_loop_%=;"
+ "check_one_r7_%=:"
+ "if r7 != 1 goto i_loop_%=;"
+ "r0 = r10;"
+ "r0 += r8;"
+ "r1 = 7;"
+ "*(u64 *)(r0 + 0) = r1;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ "i_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* second inner loop */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "r6 = 0;"
+ "r7 = 0;"
+ "i2_loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto i2_loop_end_%=;"
+ "check2_one_r6_%=:"
+ "if r6 != 1 goto check2_zero_r6_%=;"
+ "r6 = 0;"
+ "r7 = 1;"
+ "goto i2_loop_%=;"
+ "check2_zero_r6_%=:"
+ "if r6 != 0 goto i2_loop_%=;"
+ "r6 = 1;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto check2_one_r7_%=;"
+ "goto i2_loop_%=;"
+ "check2_one_r7_%=:"
+ "if r7 != 1 goto i2_loop_%=;"
+ "r6 = 0;"
+ "r8 = -25;"
+ "goto i2_loop_%=;"
+ "i2_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+
+ "r6 = 0;"
+ "r7 = 0;"
+ "goto j_loop_%=;"
+ "j_loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -16;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int triple_continue(void)
+{
+ /* This is equivalent to C program below.
+ * High branching factor of the loop body turned out to be
+ * problematic for one of the iterator convergence tracking
+ * algorithms explored.
+ *
+ * r6 = bpf_get_prandom_u32()
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * if (bpf_get_prandom_u32() != 42)
+ * continue;
+ * r0 += 0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "call %[bpf_get_prandom_u32];"
+ "if r0 != 42 goto loop_%=;"
+ "r0 += 0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("?raw_tp")
+__success
+__naked int widen_spill(void)
+{
+ /* This is equivalent to C program below.
+ * The counter is stored in fp[-16], if this counter is not widened
+ * verifier states representing loop iterations would never converge.
+ *
+ * fp[-16] = 0
+ * bpf_iter_num_new(&fp[-8], 0, 10)
+ * while (bpf_iter_num_next(&fp[-8])) {
+ * r0 = fp[-16];
+ * r0 += 1;
+ * fp[-16] = r0;
+ * }
+ * bpf_iter_num_destroy(&fp[-8])
+ * return 0
+ */
+ asm volatile (
+ "r0 = 0;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+ "r0 = *(u64 *)(r10 - 16);"
+ "r0 += 1;"
+ "*(u64 *)(r10 - 16) = r0;"
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy)
+ : __clobber_all
+ );
+}
+
+SEC("raw_tp")
+__success
+__naked int checkpoint_states_deletion(void)
+{
+ /* This is equivalent to C program below.
+ *
+ * int *a, *b, *c, *d, *e, *f;
+ * int i, sum = 0;
+ * bpf_for(i, 0, 10) {
+ * a = bpf_map_lookup_elem(&amap, &i);
+ * b = bpf_map_lookup_elem(&amap, &i);
+ * c = bpf_map_lookup_elem(&amap, &i);
+ * d = bpf_map_lookup_elem(&amap, &i);
+ * e = bpf_map_lookup_elem(&amap, &i);
+ * f = bpf_map_lookup_elem(&amap, &i);
+ * if (a) sum += 1;
+ * if (b) sum += 1;
+ * if (c) sum += 1;
+ * if (d) sum += 1;
+ * if (e) sum += 1;
+ * if (f) sum += 1;
+ * }
+ * return 0;
+ *
+ * The body of the loop spawns multiple simulation paths
+ * with different combination of NULL/non-NULL information for a/b/c/d/e/f.
+ * Each combination is unique from states_equal() point of view.
+ * Explored states checkpoint is created after each iterator next call.
+ * Iterator convergence logic expects that eventually current state
+ * would get equal to one of the explored states and thus loop
+ * exploration would be finished (at-least for a specific path).
+ * Verifier evicts explored states with high miss to hit ratio
+ * to to avoid comparing current state with too many explored
+ * states per instruction.
+ * This test is designed to "stress test" eviction policy defined using formula:
+ *
+ * sl->miss_cnt > sl->hit_cnt * N + N // if true sl->state is evicted
+ *
+ * Currently N is set to 64, which allows for 6 variables in this test.
+ */
+ asm volatile (
+ "r6 = 0;" /* a */
+ "r7 = 0;" /* b */
+ "r8 = 0;" /* c */
+ "*(u64 *)(r10 - 24) = r6;" /* d */
+ "*(u64 *)(r10 - 32) = r6;" /* e */
+ "*(u64 *)(r10 - 40) = r6;" /* f */
+ "r9 = 0;" /* sum */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+ "loop_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_next];"
+ "if r0 == 0 goto loop_end_%=;"
+
+ "*(u64 *)(r10 - 16) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r6 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r7 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "r8 = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 24) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 32) = r0;"
+
+ "r1 = %[amap] ll;"
+ "r2 = r10;"
+ "r2 += -16;"
+ "call %[bpf_map_lookup_elem];"
+ "*(u64 *)(r10 - 40) = r0;"
+
+ "if r6 == 0 goto +1;"
+ "r9 += 1;"
+ "if r7 == 0 goto +1;"
+ "r9 += 1;"
+ "if r8 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 24);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 32);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+ "r0 = *(u64 *)(r10 - 40);"
+ "if r0 == 0 goto +1;"
+ "r9 += 1;"
+
+ "goto loop_%=;"
+ "loop_end_%=:"
+ "r1 = r10;"
+ "r1 += -8;"
+ "call %[bpf_iter_num_destroy];"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_iter_num_new),
+ __imm(bpf_iter_num_next),
+ __imm(bpf_iter_num_destroy),
+ __imm_addr(amap)
+ : __clobber_all
+ );
+}
+
+struct {
+ int data[32];
+ int n;
+} loop_data;
+
+SEC("raw_tp")
+__success
+int iter_arr_with_actual_elem_count(const void *ctx)
+{
+ int i, n = loop_data.n, sum = 0;
+
+ if (n > ARRAY_SIZE(loop_data.data))
+ return 0;
+
+ bpf_for(i, 0, n) {
+ /* no rechecking of i against ARRAY_SIZE(loop_data.n) */
+ sum += loop_data.data[i];
+ }
+
+ return sum;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_css.c b/tools/testing/selftests/bpf/progs/iters_css.c
new file mode 100644
index 000000000000..ec1f6c2f590b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+u64 root_cg_id, leaf_cg_id;
+u64 first_cg_id, last_cg_id;
+
+int pre_order_cnt, post_order_cnt, tree_high;
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_css_for_each(const void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *root_css, *leaf_css, *pos;
+ struct cgroup *root_cgrp, *leaf_cgrp, *cur_cgrp;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+
+ root_cgrp = bpf_cgroup_from_id(root_cg_id);
+
+ if (!root_cgrp)
+ return 0;
+
+ leaf_cgrp = bpf_cgroup_from_id(leaf_cg_id);
+
+ if (!leaf_cgrp) {
+ bpf_cgroup_release(root_cgrp);
+ return 0;
+ }
+ root_css = &root_cgrp->self;
+ leaf_css = &leaf_cgrp->self;
+ pre_order_cnt = post_order_cnt = tree_high = 0;
+ first_cg_id = last_cg_id = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ cur_cgrp = pos->cgroup;
+ post_order_cnt++;
+ last_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_PRE) {
+ cur_cgrp = pos->cgroup;
+ pre_order_cnt++;
+ if (!first_cg_id)
+ first_cg_id = cur_cgrp->kn->id;
+ }
+
+ bpf_for_each(css, pos, leaf_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high++;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_ANCESTORS_UP)
+ tree_high--;
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(root_cgrp);
+ bpf_cgroup_release(leaf_cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_css_task.c b/tools/testing/selftests/bpf/progs/iters_css_task.c
new file mode 100644
index 000000000000..9ac758649cb8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_css_task.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_acquire(struct cgroup *p) __ksym;
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+
+pid_t target_pid;
+int css_task_cnt;
+u64 cg_id;
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(iter_css_task_for_each, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+ struct cgroup *cgrp;
+
+ if (cur_task->pid != target_pid)
+ return ret;
+
+ cgrp = bpf_cgroup_from_id(cg_id);
+
+ if (!cgrp)
+ return -EPERM;
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS)
+ if (task->pid == target_pid)
+ css_task_cnt++;
+
+ bpf_cgroup_release(cgrp);
+
+ return -EPERM;
+}
+
+static inline u64 cgroup_id(struct cgroup *cgrp)
+{
+ return cgrp->kn->id;
+}
+
+SEC("?iter/cgroup")
+int cgroup_id_printer(struct bpf_iter__cgroup *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct cgroup *cgrp = ctx->cgroup;
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ /* epilogue */
+ if (cgrp == NULL) {
+ BPF_SEQ_PRINTF(seq, "epilogue\n");
+ return 0;
+ }
+
+ /* prologue */
+ if (ctx->meta->seq_num == 0)
+ BPF_SEQ_PRINTF(seq, "prologue\n");
+
+ BPF_SEQ_PRINTF(seq, "%8llu\n", cgroup_id(cgrp));
+
+ css = &cgrp->self;
+ css_task_cnt = 0;
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+ if (task->pid == target_pid)
+ css_task_cnt++;
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int BPF_PROG(iter_css_task_for_each_sleep)
+{
+ u64 cgrp_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cgrp_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c
new file mode 100644
index 000000000000..05fa5ce7fc59
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_looping.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int consume_first_item_only(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ "r0 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R0 invalid mem access 'scalar'")
+int missing_null_check_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ /* FAIL: deref with no NULL check */
+ "r1 = *(u32 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure
+__msg("invalid access to memory, mem_size=4 off=0 size=8")
+__msg("R0 min value is outside of the allowed memory range")
+int wrong_sized_read_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* consume first element */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto +1;"
+ /* FAIL: deref more than available 4 bytes */
+ "r0 = *(u64 *)(r0 + 0);"
+
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+int simplest_loop(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r6 = 0;" /* init sum */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 10;"
+ "call %[bpf_iter_num_new];"
+
+ "1:"
+ /* consume next item */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+
+ "if r0 == 0 goto 2f;"
+ "r0 = *(u32 *)(r0 + 0);"
+ "r6 += r0;" /* accumulate sum */
+ "goto 1b;"
+
+ "2:"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common, "r6"
+ );
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_num.c b/tools/testing/selftests/bpf/progs/iters_num.c
new file mode 100644
index 000000000000..7a77a8daee0d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_num.c
@@ -0,0 +1,242 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <limits.h>
+#include <linux/errno.h>
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+const volatile __s64 exp_empty_zero = 0 + 1;
+__s64 res_empty_zero;
+
+SEC("raw_tp/sys_enter")
+int num_empty_zero(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 0) sum += i;
+ res_empty_zero = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_min = 0 + 2;
+__s64 res_empty_int_min;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_min(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN) sum += i;
+ res_empty_int_min = 2 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_int_max = 0 + 3;
+__s64 res_empty_int_max;
+
+SEC("raw_tp/sys_enter")
+int num_empty_int_max(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX, INT_MAX) sum += i;
+ res_empty_int_max = 3 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_empty_minus_one = 0 + 4;
+__s64 res_empty_minus_one;
+
+SEC("raw_tp/sys_enter")
+int num_empty_minus_one(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -1, -1) sum += i;
+ res_empty_minus_one = 4 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_simple_sum = 9 * 10 / 2;
+__s64 res_simple_sum;
+
+SEC("raw_tp/sys_enter")
+int num_simple_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, 0, 10) sum += i;
+ res_simple_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_sum = -11 * 10 / 2;
+__s64 res_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -10, 0) sum += i;
+ res_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_neg_sum = INT_MIN + (__s64)(INT_MIN + 1);
+__s64 res_very_neg_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_neg_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MIN, INT_MIN + 2) sum += i;
+ res_very_neg_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_very_big_sum = (__s64)(INT_MAX - 1) + (__s64)(INT_MAX - 2);
+__s64 res_very_big_sum;
+
+SEC("raw_tp/sys_enter")
+int num_very_big_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, INT_MAX - 2, INT_MAX) sum += i;
+ res_very_big_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_neg_pos_sum = -3;
+__s64 res_neg_pos_sum;
+
+SEC("raw_tp/sys_enter")
+int num_neg_pos_sum(const void *ctx)
+{
+ __s64 sum = 0, i;
+
+ bpf_for(i, -3, 3) sum += i;
+ res_neg_pos_sum = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_invalid_range = -EINVAL;
+__s64 res_invalid_range;
+
+SEC("raw_tp/sys_enter")
+int num_invalid_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_invalid_range = bpf_iter_num_new(&it, 1, 0);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_max_range = 0 + 10;
+__s64 res_max_range;
+
+SEC("raw_tp/sys_enter")
+int num_max_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_max_range = 10 + bpf_iter_num_new(&it, 0, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_e2big_range = -E2BIG;
+__s64 res_e2big_range;
+
+SEC("raw_tp/sys_enter")
+int num_e2big_range(const void *ctx)
+{
+ struct bpf_iter_num it;
+
+ res_e2big_range = bpf_iter_num_new(&it, -1, BPF_MAX_LOOPS);
+ bpf_iter_num_destroy(&it);
+
+ return 0;
+}
+
+const volatile __s64 exp_succ_elem_cnt = 10;
+__s64 res_succ_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_succ_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v;
+
+ bpf_iter_num_new(&it, 0, 10);
+ while ((v = bpf_iter_num_next(&it))) {
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_succ_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_overfetched_elem_cnt = 5;
+__s64 res_overfetched_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_overfetched_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 0, 5);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_overfetched_elem_cnt = cnt;
+
+ return 0;
+}
+
+const volatile __s64 exp_fail_elem_cnt = 20 + 0;
+__s64 res_fail_elem_cnt;
+
+SEC("raw_tp/sys_enter")
+int num_fail_elem_cnt(const void *ctx)
+{
+ struct bpf_iter_num it;
+ int cnt = 0, *v, i;
+
+ bpf_iter_num_new(&it, 100, 10);
+ for (i = 0; i < 10; i++) {
+ v = bpf_iter_num_next(&it);
+ if (v)
+ cnt++;
+ }
+ bpf_iter_num_destroy(&it);
+
+ res_fail_elem_cnt = 20 + cnt;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_state_safety.c b/tools/testing/selftests/bpf/progs/iters_state_safety.c
new file mode 100644
index 000000000000..d47e59aba6de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_state_safety.c
@@ -0,0 +1,426 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define ITER_HELPERS \
+ __imm(bpf_iter_num_new), \
+ __imm(bpf_iter_num_next), \
+ __imm(bpf_iter_num_destroy)
+
+SEC("?raw_tp")
+__success
+int force_clang_to_emit_btf_for_externs(void *ctx)
+{
+ /* we need this as a workaround to enforce compiler emitting BTF
+ * information for bpf_iter_num_{new,next,destroy}() kfuncs,
+ * as, apparently, it doesn't emit it for symbols only referenced from
+ * assembly (or cleanup attribute, for that matter, as well)
+ */
+ bpf_repeat(0);
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+int create_and_destroy(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int create_and_forget_to_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int destroy_without_creating_fail(void *ctx)
+{
+ /* init with zeros to stop verifier complaining about uninit stack */
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int compromise_iter_w_direct_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Unreleased reference id=1")
+int compromise_iter_w_direct_write_and_skip_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* directly write over first half of iter state */
+ "*(u64 *)(%[iter] + 0) = r0;"
+
+ /* don't destroy iter, leaking ref, which should fail */
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int compromise_iter_w_helper_write_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* overwrite 8th byte with bpf_probe_read_kernel() */
+ "r1 = %[iter];"
+ "r1 += 7;"
+ "r2 = 1;"
+ "r3 = 0;" /* NULL */
+ "call %[bpf_probe_read_kernel];"
+
+ /* (attempt to) destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS, __imm(bpf_probe_read_kernel)
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+static __noinline void subprog_with_iter(void)
+{
+ struct bpf_iter_num iter;
+
+ bpf_iter_num_new(&iter, 0, 1);
+
+ return;
+}
+
+SEC("?raw_tp")
+__failure
+/* ensure there was a call to subprog, which might happen without __noinline */
+__msg("returning from callee:")
+__msg("Unreleased reference id=1")
+int leak_iter_from_subprog_fail(void *ctx)
+{
+ subprog_with_iter();
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("fp-8_w=iter_num(ref_id=1,state=active,depth=0)")
+int valid_stack_reuse(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+
+ /* now reuse same stack slots */
+
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected uninitialized iter_num as arg #1")
+int double_create_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* (attempt to) create iterator again */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int double_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* (attempt to) destroy iterator again */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int next_without_new_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("expected an initialized iter_num as arg #1")
+int next_after_destroy_fail(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* create iterator */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+ /* destroy iterator */
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_destroy];"
+ /* don't create iterator and try to iterate*/
+ "r1 = %[iter];"
+ "call %[bpf_iter_num_next];"
+ :
+ : __imm_ptr(iter), ITER_HELPERS
+ : __clobber_common
+ );
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid read from stack")
+int __naked read_from_iter_slot_fail(void)
+{
+ asm volatile (
+ /* r6 points to struct bpf_iter_num on the stack */
+ "r6 = r10;"
+ "r6 += -24;"
+
+ /* create iterator */
+ "r1 = r6;"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* attemp to leak bpf_iter_num state */
+ "r7 = *(u64 *)(r6 + 0);"
+ "r8 = *(u64 *)(r6 + 8);"
+
+ /* destroy iterator */
+ "r1 = r6;"
+ "call %[bpf_iter_num_destroy];"
+
+ /* leak bpf_iter_num state */
+ "r0 = r7;"
+ "if r7 > r8 goto +1;"
+ "r0 = r8;"
+ "exit;"
+ :
+ : ITER_HELPERS
+ : __clobber_common, "r6", "r7", "r8"
+ );
+}
+
+int zero;
+
+SEC("?raw_tp")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+__msg("Unreleased reference")
+int stacksafe_should_not_conflate_stack_spill_and_iter(void *ctx)
+{
+ struct bpf_iter_num iter;
+
+ asm volatile (
+ /* Create a fork in logic, with general setup as follows:
+ * - fallthrough (first) path is valid;
+ * - branch (second) path is invalid.
+ * Then depending on what we do in fallthrough vs branch path,
+ * we try to detect bugs in func_states_equal(), regsafe(),
+ * refsafe(), stack_safe(), and similar by tricking verifier
+ * into believing that branch state is a valid subset of
+ * a fallthrough state. Verifier should reject overall
+ * validation, unless there is a bug somewhere in verifier
+ * logic.
+ */
+ "call %[bpf_get_prandom_u32];"
+ "r6 = r0;"
+ "call %[bpf_get_prandom_u32];"
+ "r7 = r0;"
+
+ "if r6 > r7 goto bad;" /* fork */
+
+ /* spill r6 into stack slot of bpf_iter_num var */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "goto skip_bad;"
+
+ "bad:"
+ /* create iterator in the same stack slot */
+ "r1 = %[iter];"
+ "r2 = 0;"
+ "r3 = 1000;"
+ "call %[bpf_iter_num_new];"
+
+ /* but then forget about it and overwrite it back to r6 spill */
+ "*(u64 *)(%[iter] + 0) = r6;"
+
+ "skip_bad:"
+ "goto +0;" /* force checkpoint */
+
+ /* corrupt stack slots, if they are really dynptr */
+ "*(u64 *)(%[iter] + 0) = r6;"
+ :
+ : __imm_ptr(iter),
+ __imm_addr(zero),
+ __imm(bpf_get_prandom_u32),
+ __imm(bpf_dynptr_from_mem),
+ ITER_HELPERS
+ : __clobber_common, "r6", "r7"
+ );
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task.c b/tools/testing/selftests/bpf/progs/iters_task.c
new file mode 100644
index 000000000000..e4d53e40ff20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t target_pid;
+int procs_cnt, threads_cnt, proc_threads_cnt, invalid_cnt;
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int iter_task_for_each_sleep(void *ctx)
+{
+ struct task_struct *cur_task = bpf_get_current_task_btf();
+ struct task_struct *pos;
+
+ if (cur_task->pid != target_pid)
+ return 0;
+ procs_cnt = threads_cnt = proc_threads_cnt = 0;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, ~0U) {
+ /* Below instructions shouldn't be executed for invalid flags */
+ invalid_cnt++;
+ }
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_PROC_THREADS) {
+ /* Below instructions shouldn't be executed for invalid task__nullable */
+ invalid_cnt++;
+ }
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS)
+ if (pos->pid == target_pid)
+ procs_cnt++;
+
+ bpf_for_each(task, pos, cur_task, BPF_TASK_ITER_PROC_THREADS)
+ proc_threads_cnt++;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_THREADS)
+ if (pos->tgid == target_pid)
+ threads_cnt++;
+ bpf_rcu_read_unlock();
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_failure.c b/tools/testing/selftests/bpf/progs/iters_task_failure.c
new file mode 100644
index 000000000000..6b1588d70652
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_failure.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Chuyi Zhou <zhouchuyi@bytedance.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_without_lock)
+{
+ struct task_struct *pos;
+
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+
+ }
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_without_lock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_task_next")
+int BPF_PROG(iter_tasks_lock_and_unlock)
+{
+ struct task_struct *pos;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(task, pos, NULL, BPF_TASK_ITER_ALL_PROCS) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+__failure __msg("expected an RCU CS when using bpf_iter_css_next")
+int BPF_PROG(iter_css_lock_and_unlock)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *root_css, *pos;
+
+ if (!cgrp)
+ return 0;
+ root_css = &cgrp->self;
+
+ bpf_rcu_read_lock();
+ bpf_for_each(css, pos, root_css, BPF_CGROUP_ITER_DESCENDANTS_POST) {
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ }
+ bpf_rcu_read_unlock();
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+__failure __msg("css_task_iter is only allowed in bpf_lsm, bpf_iter and sleepable progs")
+int BPF_PROG(iter_css_task_for_each)
+{
+ u64 cg_id = bpf_get_current_cgroup_id();
+ struct cgroup *cgrp = bpf_cgroup_from_id(cg_id);
+ struct cgroup_subsys_state *css;
+ struct task_struct *task;
+
+ if (cgrp == NULL)
+ return 0;
+ css = &cgrp->self;
+
+ bpf_for_each(css_task, task, css, CSS_TASK_ITER_PROCS) {
+
+ }
+ bpf_cgroup_release(cgrp);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/iters_task_vma.c b/tools/testing/selftests/bpf/progs/iters_task_vma.c
new file mode 100644
index 000000000000..dc0c3691dcc2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_task_vma.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_experimental.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+pid_t target_pid = 0;
+unsigned int vmas_seen = 0;
+
+struct {
+ __u64 vm_start;
+ __u64 vm_end;
+} vm_ranges[1000];
+
+SEC("raw_tp/sys_enter")
+int iter_task_vma_for_each(const void *ctx)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct vm_area_struct *vma;
+ unsigned int seen = 0;
+
+ if (task->pid != target_pid)
+ return 0;
+
+ if (vmas_seen)
+ return 0;
+
+ bpf_for_each(task_vma, vma, task, 0) {
+ if (bpf_cmp_unlikely(seen, >=, 1000))
+ break;
+
+ vm_ranges[seen].vm_start = vma->vm_start;
+ vm_ranges[seen].vm_end = vma->vm_end;
+ seen++;
+ }
+
+ vmas_seen = seen;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/iters_testmod_seq.c b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
new file mode 100644
index 000000000000..3873fb6c292a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/iters_testmod_seq.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct bpf_iter_testmod_seq {
+ u64 :64;
+ u64 :64;
+};
+
+extern int bpf_iter_testmod_seq_new(struct bpf_iter_testmod_seq *it, s64 value, int cnt) __ksym;
+extern s64 *bpf_iter_testmod_seq_next(struct bpf_iter_testmod_seq *it) __ksym;
+extern void bpf_iter_testmod_seq_destroy(struct bpf_iter_testmod_seq *it) __ksym;
+
+const volatile __s64 exp_empty = 0 + 1;
+__s64 res_empty;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_empty(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 0) sum += *i;
+ res_empty = 1 + sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_full = 1000000;
+__s64 res_full;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_full(const void *ctx)
+{
+ __s64 sum = 0, *i;
+
+ bpf_for_each(testmod_seq, i, 1000, 1000) sum += *i;
+ res_full = sum;
+
+ return 0;
+}
+
+const volatile __s64 exp_truncated = 10 * 1000000;
+__s64 res_truncated;
+
+static volatile int zero = 0;
+
+SEC("raw_tp/sys_enter")
+__success __log_level(2)
+__msg("fp-16_w=iter_testmod_seq(ref_id=1,state=active,depth=0)")
+__msg("fp-16=iter_testmod_seq(ref_id=1,state=drained,depth=0)")
+__msg("call bpf_iter_testmod_seq_destroy")
+int testmod_seq_truncated(const void *ctx)
+{
+ __s64 sum = 0, *i;
+ int cnt = zero;
+
+ bpf_for_each(testmod_seq, i, 10, 2000000) {
+ sum += *i;
+ cnt++;
+ if (cnt >= 1000000)
+ break;
+ }
+ res_truncated = sum;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
new file mode 100644
index 000000000000..f46965053acb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jeq_infer_not_null_fail.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, u64);
+ __type(value, u64);
+} m_hash SEC(".maps");
+
+SEC("?raw_tp")
+__failure __msg("R8 invalid mem access 'map_value_or_null")
+int jeq_infer_not_null_ptr_to_btfid(void *ctx)
+{
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ struct bpf_map *inner_map = map->inner_map_meta;
+ u64 key = 0, ret = 0, *val;
+
+ val = bpf_map_lookup_elem(map, &key);
+ /* Do not mark ptr as non-null if one of them is
+ * PTR_TO_BTF_ID (R9), reject because of invalid
+ * access to map value (R8).
+ *
+ * Here, we need to inline those insns to access
+ * R8 directly, since compiler may use other reg
+ * once it figures out val==inner_map.
+ */
+ asm volatile("r8 = %[val];\n"
+ "r9 = %[inner_map];\n"
+ "if r8 != r9 goto +1;\n"
+ "%[ret] = *(u64 *)(r8 +0);\n"
+ : [ret] "+r"(ret)
+ : [inner_map] "r"(inner_map), [val] "r"(val)
+ : "r8", "r9");
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/jit_probe_mem.c b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
new file mode 100644
index 000000000000..f9789e668297
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/jit_probe_mem.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+static struct prog_test_ref_kfunc __kptr *v;
+long total_sum = -1;
+
+SEC("tc")
+int test_jit_probe_mem(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long zero = 0, sum;
+
+ p = bpf_kfunc_call_test_acquire(&zero);
+ if (!p)
+ return 1;
+
+ p = bpf_kptr_xchg(&v, p);
+ if (p)
+ goto release_out;
+
+ /* Direct map value access of kptr, should be PTR_UNTRUSTED */
+ p = v;
+ if (!p)
+ return 1;
+
+ asm volatile (
+ "r9 = %[p];"
+ "%[sum] = 0;"
+
+ /* r8 = p->a */
+ "r8 = *(u32 *)(r9 + 0);"
+ "%[sum] += r8;"
+
+ /* r8 = p->b */
+ "r8 = *(u32 *)(r9 + 4);"
+ "%[sum] += r8;"
+
+ "r9 += 8;"
+ /* r9 = p->a */
+ "r9 = *(u32 *)(r9 - 8);"
+ "%[sum] += r9;"
+
+ : [sum] "=r"(sum)
+ : [p] "r"(p)
+ : "r8", "r9"
+ );
+
+ total_sum = sum;
+ return 0;
+release_out:
+ bpf_kfunc_call_test_release(p);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfree_skb.c b/tools/testing/selftests/bpf/progs/kfree_skb.c
index a46a264ce24e..7236da72ce80 100644
--- a/tools/testing/selftests/bpf/progs/kfree_skb.c
+++ b/tools/testing/selftests/bpf/progs/kfree_skb.c
@@ -9,8 +9,8 @@
char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
#define _(P) (__builtin_preserve_access_index(P))
@@ -109,10 +109,10 @@ int BPF_PROG(trace_kfree_skb, struct sk_buff *skb, void *location)
return 0;
}
-static volatile struct {
+struct {
bool fentry_test_ok;
bool fexit_test_ok;
-} result;
+} result = {};
SEC("fentry/eth_type_trans")
int BPF_PROG(fentry_eth_type_trans, struct sk_buff *skb, struct net_device *dev,
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
new file mode 100644
index 000000000000..7632d9ecb253
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_destructive.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_destructive_test(void)
+{
+ bpf_kfunc_call_test_destructive();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_fail.c b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
new file mode 100644
index 000000000000..4b0b7b79cdfb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_fail.c
@@ -0,0 +1,153 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+SEC("?syscall")
+int kfunc_syscall_test_fail(struct syscall_test_args *args)
+{
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args) + 1);
+
+ return 0;
+}
+
+SEC("?syscall")
+int kfunc_syscall_test_null_fail(struct syscall_test_args *args)
+{
+ /* Must be called with args as a NULL pointer
+ * we do not check for it to have the verifier consider that
+ * the pointer might not be null, and so we can load it.
+ *
+ * So the following can not be added:
+ *
+ * if (args)
+ * return -22;
+ */
+
+ bpf_kfunc_call_test_mem_len_pass1(args, sizeof(*args));
+
+ return 0;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_rdonly(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ p[0] = 42; /* this is a read-only buffer, so -EACCES */
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_use_after_free(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+ if (p) {
+ p[0] = 42;
+ ret = p[1]; /* 108 */
+ } else {
+ ret = -1;
+ }
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ if (p)
+ ret = p[0]; /* p is not valid anymore */
+
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_oob(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[2 * sizeof(int)]; /* oob access, so -EACCES */
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+int not_const_size = 2 * sizeof(int);
+
+SEC("?tc")
+int kfunc_call_test_get_mem_fail_not_const(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, not_const_size); /* non const size, -EINVAL */
+ if (p)
+ ret = p[0];
+ else
+ ret = -1;
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("?tc")
+int kfunc_call_test_mem_acquire_fail(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ /* we are failing on this one, because we are not acquiring a PTR_TO_BTF_ID (a struct ptr) */
+ p = bpf_kfunc_call_test_acq_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[0];
+ else
+ ret = -1;
+
+ bpf_kfunc_call_int_mem_release(p);
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_race.c b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
new file mode 100644
index 000000000000..d532af07decf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_call_fail(struct __sk_buff *ctx)
+{
+ bpf_testmod_test_mod_kfunc(0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test.c b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
new file mode 100644
index 000000000000..cf68d1e48a0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+SEC("tc")
+int kfunc_call_test4(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ long tmp;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ tmp = bpf_kfunc_call_test4(-3, -30, -200, -1000);
+ return (tmp >> 32) + tmp;
+}
+
+SEC("tc")
+int kfunc_call_test2(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ return bpf_kfunc_call_test2((struct sock *)sk, 1, 2);
+}
+
+SEC("tc")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ __u64 a = 1ULL << 32;
+ __u32 ret;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ a = bpf_kfunc_call_test1((struct sock *)sk, 1, a | 2, 3, a | 4);
+ ret = a >> 32; /* ret should be 2 */
+ ret += (__u32)a; /* ret should be 12 */
+
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_ref_btf_id(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ if (pt->a != 42 || pt->b != 108)
+ ret = -1;
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_pass(struct __sk_buff *skb)
+{
+ struct prog_test_pass1 p1 = {};
+ struct prog_test_pass2 p2 = {};
+ short a = 0;
+ __u64 b = 0;
+ long c = 0;
+ char d = 0;
+ int e = 0;
+
+ bpf_kfunc_call_test_pass_ctx(skb);
+ bpf_kfunc_call_test_pass1(&p1);
+ bpf_kfunc_call_test_pass2(&p2);
+
+ bpf_kfunc_call_test_mem_len_pass1(&a, sizeof(a));
+ bpf_kfunc_call_test_mem_len_pass1(&b, sizeof(b));
+ bpf_kfunc_call_test_mem_len_pass1(&c, sizeof(c));
+ bpf_kfunc_call_test_mem_len_pass1(&d, sizeof(d));
+ bpf_kfunc_call_test_mem_len_pass1(&e, sizeof(e));
+ bpf_kfunc_call_test_mem_len_fail2(&b, -1);
+
+ return 0;
+}
+
+struct syscall_test_args {
+ __u8 data[16];
+ size_t size;
+};
+
+SEC("syscall")
+int kfunc_syscall_test(struct syscall_test_args *args)
+{
+ const long size = args->size;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(args->data));
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, sizeof(*args));
+ bpf_kfunc_call_test_mem_len_pass1(&args->data, size);
+
+ return 0;
+}
+
+SEC("syscall")
+int kfunc_syscall_test_null(struct syscall_test_args *args)
+{
+ /* Must be called with args as a NULL pointer
+ * we do not check for it to have the verifier consider that
+ * the pointer might not be null, and so we can load it.
+ *
+ * So the following can not be added:
+ *
+ * if (args)
+ * return -22;
+ */
+
+ bpf_kfunc_call_test_mem_len_pass1(args, 0);
+
+ return 0;
+}
+
+SEC("tc")
+int kfunc_call_test_get_mem(struct __sk_buff *skb)
+{
+ struct prog_test_ref_kfunc *pt;
+ unsigned long s = 0;
+ int *p = NULL;
+ int ret = 0;
+
+ pt = bpf_kfunc_call_test_acquire(&s);
+ if (pt) {
+ p = bpf_kfunc_call_test_get_rdwr_mem(pt, 2 * sizeof(int));
+ if (p) {
+ p[0] = 42;
+ ret = p[1]; /* 108 */
+ } else {
+ ret = -1;
+ }
+
+ if (ret >= 0) {
+ p = bpf_kfunc_call_test_get_rdonly_mem(pt, 2 * sizeof(int));
+ if (p)
+ ret = p[0]; /* 42 */
+ else
+ ret = -1;
+ }
+
+ bpf_kfunc_call_test_release(pt);
+ }
+ return ret;
+}
+
+SEC("tc")
+int kfunc_call_test_static_unused_arg(struct __sk_buff *skb)
+{
+
+ u32 expected = 5, actual;
+
+ actual = bpf_kfunc_call_test_static_unused_arg(expected, 0xdeadbeef);
+ return actual != expected ? -1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
new file mode 100644
index 000000000000..2380c75e74ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kfunc_call_test_subprog.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+extern const int bpf_prog_active __ksym;
+int active_res = -1;
+int sk_state_res = -1;
+
+int __noinline f1(struct __sk_buff *skb)
+{
+ struct bpf_sock *sk = skb->sk;
+ int *active;
+
+ if (!sk)
+ return -1;
+
+ sk = bpf_sk_fullsock(sk);
+ if (!sk)
+ return -1;
+
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active,
+ bpf_get_smp_processor_id());
+ if (active)
+ active_res = *active;
+
+ sk_state_res = bpf_kfunc_call_test3((struct sock *)sk)->__sk_common.skc_state;
+
+ return (__u32)bpf_kfunc_call_test1((struct sock *)sk, 1, 2, 3, 4);
+}
+
+SEC("tc")
+int kfunc_call_test1(struct __sk_buff *skb)
+{
+ return f1(skb);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi.c b/tools/testing/selftests/bpf/progs/kprobe_multi.c
new file mode 100644
index 000000000000..9e1ca8e34913
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern const void bpf_fentry_test1 __ksym;
+extern const void bpf_fentry_test2 __ksym;
+extern const void bpf_fentry_test3 __ksym;
+extern const void bpf_fentry_test4 __ksym;
+extern const void bpf_fentry_test5 __ksym;
+extern const void bpf_fentry_test6 __ksym;
+extern const void bpf_fentry_test7 __ksym;
+extern const void bpf_fentry_test8 __ksym;
+
+int pid = 0;
+bool test_cookie = false;
+
+__u64 kprobe_test1_result = 0;
+__u64 kprobe_test2_result = 0;
+__u64 kprobe_test3_result = 0;
+__u64 kprobe_test4_result = 0;
+__u64 kprobe_test5_result = 0;
+__u64 kprobe_test6_result = 0;
+__u64 kprobe_test7_result = 0;
+__u64 kprobe_test8_result = 0;
+
+__u64 kretprobe_test1_result = 0;
+__u64 kretprobe_test2_result = 0;
+__u64 kretprobe_test3_result = 0;
+__u64 kretprobe_test4_result = 0;
+__u64 kretprobe_test5_result = 0;
+__u64 kretprobe_test6_result = 0;
+__u64 kretprobe_test7_result = 0;
+__u64 kretprobe_test8_result = 0;
+
+static void kprobe_multi_check(void *ctx, bool is_return)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
+ __u64 addr = bpf_get_func_ip(ctx);
+
+#define SET(__var, __addr, __cookie) ({ \
+ if (((const void *) addr == __addr) && \
+ (!test_cookie || (cookie == __cookie))) \
+ __var = 1; \
+})
+
+ if (is_return) {
+ SET(kretprobe_test1_result, &bpf_fentry_test1, 8);
+ SET(kretprobe_test2_result, &bpf_fentry_test2, 2);
+ SET(kretprobe_test3_result, &bpf_fentry_test3, 7);
+ SET(kretprobe_test4_result, &bpf_fentry_test4, 6);
+ SET(kretprobe_test5_result, &bpf_fentry_test5, 5);
+ SET(kretprobe_test6_result, &bpf_fentry_test6, 4);
+ SET(kretprobe_test7_result, &bpf_fentry_test7, 3);
+ SET(kretprobe_test8_result, &bpf_fentry_test8, 1);
+ } else {
+ SET(kprobe_test1_result, &bpf_fentry_test1, 1);
+ SET(kprobe_test2_result, &bpf_fentry_test2, 7);
+ SET(kprobe_test3_result, &bpf_fentry_test3, 2);
+ SET(kprobe_test4_result, &bpf_fentry_test4, 3);
+ SET(kprobe_test5_result, &bpf_fentry_test5, 4);
+ SET(kprobe_test6_result, &bpf_fentry_test6, 5);
+ SET(kprobe_test7_result, &bpf_fentry_test7, 6);
+ SET(kprobe_test8_result, &bpf_fentry_test8, 8);
+ }
+
+#undef SET
+}
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_tes??")
+int test_kprobe(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi/bpf_fentry_test*")
+int test_kretprobe(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, true);
+ return 0;
+}
+
+SEC("kprobe.multi")
+int test_kprobe_manual(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_manual(struct pt_regs *ctx)
+{
+ kprobe_multi_check(ctx, true);
+ return 0;
+}
+
+extern const void bpf_testmod_fentry_test1 __ksym;
+extern const void bpf_testmod_fentry_test2 __ksym;
+extern const void bpf_testmod_fentry_test3 __ksym;
+
+__u64 kprobe_testmod_test1_result = 0;
+__u64 kprobe_testmod_test2_result = 0;
+__u64 kprobe_testmod_test3_result = 0;
+
+__u64 kretprobe_testmod_test1_result = 0;
+__u64 kretprobe_testmod_test2_result = 0;
+__u64 kretprobe_testmod_test3_result = 0;
+
+static void kprobe_multi_testmod_check(void *ctx, bool is_return)
+{
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return;
+
+ __u64 addr = bpf_get_func_ip(ctx);
+
+ if (is_return) {
+ if ((const void *) addr == &bpf_testmod_fentry_test1)
+ kretprobe_testmod_test1_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test2)
+ kretprobe_testmod_test2_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test3)
+ kretprobe_testmod_test3_result = 1;
+ } else {
+ if ((const void *) addr == &bpf_testmod_fentry_test1)
+ kprobe_testmod_test1_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test2)
+ kprobe_testmod_test2_result = 1;
+ if ((const void *) addr == &bpf_testmod_fentry_test3)
+ kprobe_testmod_test3_result = 1;
+ }
+}
+
+SEC("kprobe.multi")
+int test_kprobe_testmod(struct pt_regs *ctx)
+{
+ kprobe_multi_testmod_check(ctx, false);
+ return 0;
+}
+
+SEC("kretprobe.multi")
+int test_kretprobe_testmod(struct pt_regs *ctx)
+{
+ kprobe_multi_testmod_check(ctx, true);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
new file mode 100644
index 000000000000..e76e499aca39
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_empty.c
@@ -0,0 +1,12 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi/")
+int test_kprobe_empty(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kprobe_multi_override.c b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
new file mode 100644
index 000000000000..28f8487c9059
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kprobe_multi_override.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe.multi")
+int test_override(struct pt_regs *ctx)
+{
+ bpf_override_return(ctx, 123);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
new file mode 100644
index 000000000000..2414ac20b6d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct bin_data {
+ char blob[32];
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(kptr) struct bin_data __kptr * ptr;
+
+SEC("tc")
+__naked int kptr_xchg_inline(void)
+{
+ asm volatile (
+ "r1 = %[ptr] ll;"
+ "r2 = 0;"
+ "call %[bpf_kptr_xchg];"
+ "if r0 == 0 goto 1f;"
+ "r1 = r0;"
+ "r2 = 0;"
+ "call %[bpf_obj_drop_impl];"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_addr(ptr),
+ __imm(bpf_kptr_xchg),
+ __imm(bpf_obj_drop_impl)
+ : __clobber_all
+ );
+}
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __btf_root(void)
+{
+ bpf_obj_drop(NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/ksym_race.c b/tools/testing/selftests/bpf/progs/ksym_race.c
new file mode 100644
index 000000000000..def97f2fed90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ksym_race.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+extern int bpf_testmod_ksym_percpu __ksym;
+
+SEC("tc")
+int ksym_fail(struct __sk_buff *ctx)
+{
+ return *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs1.c b/tools/testing/selftests/bpf/progs/linked_funcs1.c
new file mode 100644
index 000000000000..cc79dddac182
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs1.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+/* weak and shared between two files */
+const volatile __u32 my_tid __weak;
+long syscall_id __weak;
+
+int output_val1;
+int output_ctx1;
+int output_weak1;
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 1;
+}
+
+/* Global functions can't be void */
+int set_output_val1(int x)
+{
+ output_val1 = x + subprog(x);
+ return x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx1(__u64 *ctx)
+{
+ output_ctx1 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should win because it's the first one */
+__weak int set_output_weak(int x)
+{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ output_weak1 = x;
+ return x;
+}
+
+extern int set_output_val2(int x);
+
+/* here we'll force set_output_ctx2() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx2(__u64 *ctx);
+
+SEC("?raw_tp/sys_enter")
+int BPF_PROG(handler1, struct pt_regs *regs, long id)
+{
+ static volatile int whatever;
+
+ if (my_tid != (u32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ set_output_val2(1000);
+ set_output_ctx2(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_funcs2.c b/tools/testing/selftests/bpf/progs/linked_funcs2.c
new file mode 100644
index 000000000000..942cc5526ddf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_funcs2.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+/* weak and shared between both files */
+const volatile int my_tid __weak;
+long syscall_id __weak;
+
+int output_val2;
+int output_ctx2;
+int output_weak2; /* should stay zero */
+
+/* same "subprog" name in all files, but it's ok because they all are static */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+/* Global functions can't be void */
+int set_output_val2(int x)
+{
+ output_val2 = 2 * x + 2 * subprog(x);
+ return 2 * x;
+}
+
+/* This function can't be verified as global, as it assumes raw_tp/sys_enter
+ * context and accesses syscall id (second argument). So we mark it as
+ * __hidden, so that libbpf will mark it as static in the final object file,
+ * right before verifying it in the kernel.
+ *
+ * But we don't mark it as __hidden here, rather at extern site. __hidden is
+ * "contaminating" visibility, so it will get propagated from either extern or
+ * actual definition (including from the losing __weak definition).
+ */
+void set_output_ctx2(__u64 *ctx)
+{
+ output_ctx2 = ctx[1]; /* long id, same as in BPF_PROG below */
+}
+
+/* this weak instance should lose, because it will be processed second */
+__weak int set_output_weak(int x)
+{
+ static volatile int whatever;
+
+ /* make sure we use CO-RE relocations in a weak function, this used to
+ * cause problems for BPF static linker
+ */
+ whatever = 2 * bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ output_weak2 = x;
+ return 2 * x;
+}
+
+extern int set_output_val1(int x);
+
+/* here we'll force set_output_ctx1() to be __hidden in the final obj file */
+__hidden extern void set_output_ctx1(__u64 *ctx);
+
+SEC("?raw_tp/sys_enter")
+int BPF_PROG(handler2, struct pt_regs *regs, long id)
+{
+ static volatile int whatever;
+
+ if (my_tid != (s32)bpf_get_current_pid_tgid() || id != syscall_id)
+ return 0;
+
+ /* make sure we have CO-RE relocations in main program */
+ whatever = bpf_core_type_size(struct task_struct);
+ __sink(whatever);
+
+ set_output_val1(2000);
+ set_output_ctx1(ctx); /* ctx definition is hidden in BPF_PROG macro */
+
+ /* keep input value the same across both files to avoid dependency on
+ * handler call order; differentiate by output_weak1 vs output_weak2.
+ */
+ set_output_weak(42);
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.c b/tools/testing/selftests/bpf/progs/linked_list.c
new file mode 100644
index 000000000000..26205ca80679
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.c
@@ -0,0 +1,381 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (int)(sizeof(x) / sizeof((x)[0]))
+#endif
+
+#include "linked_list.h"
+
+static __always_inline
+int list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ bpf_obj_drop(f);
+ return 3;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ bpf_obj_drop(f);
+ return 4;
+ }
+
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+ if (leave_in_map)
+ return 0;
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 13;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 7;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 13) {
+ bpf_obj_drop(f);
+ return 8;
+ }
+ bpf_obj_drop(f);
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 9;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 10;
+ }
+ return 0;
+}
+
+
+static __always_inline
+int list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct foo *f[200], *pf;
+ int i;
+
+ /* Loop following this check adds nodes 2-at-a-time in order to
+ * validate multiple release_on_unlock release logic
+ */
+ if (ARRAY_SIZE(f) % 2)
+ return 10;
+
+ for (i = 0; i < ARRAY_SIZE(f); i += 2) {
+ f[i] = bpf_obj_new(typeof(**f));
+ if (!f[i])
+ return 2;
+ f[i]->data = i;
+
+ f[i + 1] = bpf_obj_new(typeof(**f));
+ if (!f[i + 1]) {
+ bpf_obj_drop(f[i]);
+ return 9;
+ }
+ f[i + 1]->data = i + 1;
+
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &f[i]->node2);
+ bpf_list_push_front(head, &f[i + 1]->node2);
+ bpf_spin_unlock(lock);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 3;
+ pf = container_of(n, struct foo, node2);
+ if (pf->data != (ARRAY_SIZE(f) - i - 1)) {
+ bpf_obj_drop(pf);
+ return 4;
+ }
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &pf->node2);
+ bpf_spin_unlock(lock);
+ }
+
+ if (leave_in_map)
+ return 0;
+
+ for (i = 0; i < ARRAY_SIZE(f); i++) {
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 5;
+ pf = container_of(n, struct foo, node2);
+ if (pf->data != i) {
+ bpf_obj_drop(pf);
+ return 6;
+ }
+ bpf_obj_drop(pf);
+ }
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_back(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 7;
+ }
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (n) {
+ bpf_obj_drop(container_of(n, struct foo, node2));
+ return 8;
+ }
+ return 0;
+}
+
+static __always_inline
+int list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head, bool leave_in_map)
+{
+ struct bpf_list_node *n;
+ struct bar *ba[8], *b;
+ struct foo *f;
+ int i;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 2;
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 3;
+ }
+ b->data = i;
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_back(&f->head, &b->node);
+ bpf_spin_unlock(&f->lock);
+ }
+
+ bpf_spin_lock(lock);
+ f->data = 42;
+ bpf_list_push_front(head, &f->node2);
+ bpf_spin_unlock(lock);
+
+ if (leave_in_map)
+ return 0;
+
+ bpf_spin_lock(lock);
+ n = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ if (!n)
+ return 4;
+ f = container_of(n, struct foo, node2);
+ if (f->data != 42) {
+ bpf_obj_drop(f);
+ return 5;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(ba); i++) {
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (!n) {
+ bpf_obj_drop(f);
+ return 6;
+ }
+ b = container_of(n, struct bar, node);
+ if (b->data != i) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(b);
+ return 7;
+ }
+ bpf_obj_drop(b);
+ }
+ bpf_spin_lock(&f->lock);
+ n = bpf_list_pop_front(&f->head);
+ bpf_spin_unlock(&f->lock);
+ if (n) {
+ bpf_obj_drop(f);
+ bpf_obj_drop(container_of(n, struct bar, node));
+ return 8;
+ }
+ bpf_obj_drop(f);
+ return 0;
+}
+
+static __always_inline
+int test_list_push_pop(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop(lock, head, false);
+ if (ret)
+ return ret;
+ return list_push_pop(lock, head, true);
+}
+
+static __always_inline
+int test_list_push_pop_multiple(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(lock, head, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(lock, head, true);
+}
+
+static __always_inline
+int test_list_in_list(struct bpf_spin_lock *lock, struct bpf_list_head *head)
+{
+ int ret;
+
+ ret = list_in_list(lock, head, false);
+ if (ret)
+ return ret;
+ return list_in_list(lock, head, true);
+}
+
+SEC("tc")
+int map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop(void *ctx)
+{
+ return test_list_push_pop(&glock, &ghead);
+}
+
+SEC("tc")
+int map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_push_pop_multiple(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_push_pop_multiple(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_push_pop_multiple(void *ctx)
+{
+ int ret;
+
+ ret = list_push_pop_multiple(&glock, &ghead, false);
+ if (ret)
+ return ret;
+ return list_push_pop_multiple(&glock, &ghead, true);
+}
+
+SEC("tc")
+int map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+
+ v = bpf_map_lookup_elem(&array_map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int inner_map_list_in_list(void *ctx)
+{
+ struct map_value *v;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){0});
+ if (!map)
+ return 1;
+ v = bpf_map_lookup_elem(map, &(int){0});
+ if (!v)
+ return 1;
+ return test_list_in_list(&v->lock, &v->head);
+}
+
+SEC("tc")
+int global_list_in_list(void *ctx)
+{
+ return test_list_in_list(&glock, &ghead);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_list.h b/tools/testing/selftests/bpf/progs/linked_list.h
new file mode 100644
index 000000000000..c0f3609a7ffa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list.h
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef LINKED_LIST_H
+#define LINKED_LIST_H
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct bar {
+ struct bpf_list_node node;
+ int data;
+};
+
+struct foo {
+ struct bpf_list_node node;
+ struct bpf_list_head head __contains(bar, node);
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_node node2;
+};
+
+struct map_value {
+ struct bpf_spin_lock lock;
+ int data;
+ struct bpf_list_head head __contains(foo, node2);
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+};
+
+struct array_map array_map SEC(".maps");
+struct array_map inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_list_head ghead __contains(foo, node2);
+private(B) struct bpf_spin_lock glock2;
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/linked_list_fail.c b/tools/testing/selftests/bpf/progs/linked_list_fail.c
new file mode 100644
index 000000000000..6438982b928b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_list_fail.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+#include "linked_list.h"
+
+#define INIT \
+ struct map_value *v, *v2, *iv, *iv2; \
+ struct foo *f, *f1, *f2; \
+ struct bar *b; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &(int){ 0 }); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v) \
+ return 0; \
+ v2 = bpf_map_lookup_elem(&array_map, &(int){ 0 }); \
+ if (!v2) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv) \
+ return 0; \
+ iv2 = bpf_map_lookup_elem(map, &(int){ 0 }); \
+ if (!iv2) \
+ return 0; \
+ f = bpf_obj_new(typeof(*f)); \
+ if (!f) \
+ return 0; \
+ f1 = f; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ b = bpf_obj_new(typeof(*b)); \
+ if (!b) { \
+ bpf_obj_drop(f2); \
+ bpf_obj_drop(f1); \
+ return 0; \
+ }
+
+#define CHECK(test, op, hexpr) \
+ SEC("?tc") \
+ int test##_missing_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ p(hexpr); \
+ return 0; \
+ }
+
+CHECK(kptr, pop_front, &f->head);
+CHECK(kptr, pop_back, &f->head);
+
+CHECK(global, pop_front, &ghead);
+CHECK(global, pop_back, &ghead);
+
+CHECK(map, pop_front, &v->head);
+CHECK(map, pop_back, &v->head);
+
+CHECK(inner_map, pop_front, &iv->head);
+CHECK(inner_map, pop_back, &iv->head);
+
+#undef CHECK
+
+#define CHECK(test, op, hexpr, nexpr) \
+ SEC("?tc") \
+ int test##_missing_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ bpf_list_##op(hexpr, nexpr); \
+ return 0; \
+ }
+
+CHECK(kptr, push_front, &f->head, &b->node);
+CHECK(kptr, push_back, &f->head, &b->node);
+
+CHECK(global, push_front, &ghead, &f->node2);
+CHECK(global, push_back, &ghead, &f->node2);
+
+CHECK(map, push_front, &v->head, &f->node2);
+CHECK(map, push_back, &v->head, &f->node2);
+
+CHECK(inner_map, push_front, &iv->head, &f->node2);
+CHECK(inner_map, push_back, &iv->head, &f->node2);
+
+#undef CHECK
+
+#define CHECK(test, op, lexpr, hexpr) \
+ SEC("?tc") \
+ int test##_incorrect_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ void (*p)(void *) = (void *)&bpf_list_##op; \
+ bpf_spin_lock(lexpr); \
+ p(hexpr); \
+ return 0; \
+ }
+
+#define CHECK_OP(op) \
+ CHECK(kptr_kptr, op, &f1->lock, &f2->head); \
+ CHECK(kptr_global, op, &f1->lock, &ghead); \
+ CHECK(kptr_map, op, &f1->lock, &v->head); \
+ CHECK(kptr_inner_map, op, &f1->lock, &iv->head); \
+ \
+ CHECK(global_global, op, &glock2, &ghead); \
+ CHECK(global_kptr, op, &glock, &f1->head); \
+ CHECK(global_map, op, &glock, &v->head); \
+ CHECK(global_inner_map, op, &glock, &iv->head); \
+ \
+ CHECK(map_map, op, &v->lock, &v2->head); \
+ CHECK(map_kptr, op, &v->lock, &f2->head); \
+ CHECK(map_global, op, &v->lock, &ghead); \
+ CHECK(map_inner_map, op, &v->lock, &iv->head); \
+ \
+ CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head); \
+ CHECK(inner_map_kptr, op, &iv->lock, &f2->head); \
+ CHECK(inner_map_global, op, &iv->lock, &ghead); \
+ CHECK(inner_map_map, op, &iv->lock, &v->head);
+
+CHECK_OP(pop_front);
+CHECK_OP(pop_back);
+
+#undef CHECK
+#undef CHECK_OP
+
+#define CHECK(test, op, lexpr, hexpr, nexpr) \
+ SEC("?tc") \
+ int test##_incorrect_lock_##op(void *ctx) \
+ { \
+ INIT; \
+ bpf_spin_lock(lexpr); \
+ bpf_list_##op(hexpr, nexpr); \
+ return 0; \
+ }
+
+#define CHECK_OP(op) \
+ CHECK(kptr_kptr, op, &f1->lock, &f2->head, &b->node); \
+ CHECK(kptr_global, op, &f1->lock, &ghead, &f->node2); \
+ CHECK(kptr_map, op, &f1->lock, &v->head, &f->node2); \
+ CHECK(kptr_inner_map, op, &f1->lock, &iv->head, &f->node2); \
+ \
+ CHECK(global_global, op, &glock2, &ghead, &f->node2); \
+ CHECK(global_kptr, op, &glock, &f1->head, &b->node); \
+ CHECK(global_map, op, &glock, &v->head, &f->node2); \
+ CHECK(global_inner_map, op, &glock, &iv->head, &f->node2); \
+ \
+ CHECK(map_map, op, &v->lock, &v2->head, &f->node2); \
+ CHECK(map_kptr, op, &v->lock, &f2->head, &b->node); \
+ CHECK(map_global, op, &v->lock, &ghead, &f->node2); \
+ CHECK(map_inner_map, op, &v->lock, &iv->head, &f->node2); \
+ \
+ CHECK(inner_map_inner_map, op, &iv->lock, &iv2->head, &f->node2);\
+ CHECK(inner_map_kptr, op, &iv->lock, &f2->head, &b->node); \
+ CHECK(inner_map_global, op, &iv->lock, &ghead, &f->node2); \
+ CHECK(inner_map_map, op, &iv->lock, &v->head, &f->node2);
+
+CHECK_OP(push_front);
+CHECK_OP(push_back);
+
+#undef CHECK
+#undef CHECK_OP
+#undef INIT
+
+SEC("?kprobe/xyz")
+int map_compat_kprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?kretprobe/xyz")
+int map_compat_kretprobe(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tracepoint/xyz")
+int map_compat_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?perf_event")
+int map_compat_perf(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp/xyz")
+int map_compat_raw_tp(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?raw_tp.w/xyz")
+int map_compat_raw_tp_w(void *ctx)
+{
+ bpf_list_push_front(&ghead, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_type_id_oor(void *ctx)
+{
+ bpf_obj_new_impl(~0UL, NULL);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_composite(void *ctx)
+{
+ bpf_obj_new_impl(bpf_core_type_id_local(int), (void *)42);
+ return 0;
+}
+
+SEC("?tc")
+int obj_new_no_struct(void *ctx)
+{
+
+ bpf_obj_new(union { int data; unsigned udata; });
+ return 0;
+}
+
+SEC("?tc")
+int obj_drop_non_zero_off(void *ctx)
+{
+ void *f;
+
+ f = bpf_obj_new(struct foo);
+ if (!f)
+ return 0;
+ bpf_obj_drop(f+1);
+ return 0;
+}
+
+SEC("?tc")
+int new_null_ret(void *ctx)
+{
+ return bpf_obj_new(struct foo)->data;
+}
+
+SEC("?tc")
+int obj_new_acq(void *ctx)
+{
+ bpf_obj_new(struct foo);
+ return 0;
+}
+
+SEC("?tc")
+int use_after_drop(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_obj_drop(f);
+ return f->data;
+}
+
+SEC("?tc")
+int ptr_walk_scalar(void *ctx)
+{
+ struct test1 {
+ struct test2 {
+ struct test2 *next;
+ } *ptr;
+ } *p;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_this_cpu_ptr(p->ptr);
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->lock;
+}
+
+SEC("?tc")
+int direct_write_lock(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->lock = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->head;
+}
+
+SEC("?tc")
+int direct_write_head(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->head = 0;
+ return 0;
+}
+
+SEC("?tc")
+int direct_read_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ return *(int *)&f->node2;
+}
+
+SEC("?tc")
+int direct_write_node(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ *(int *)&f->node2 = 0;
+ return 0;
+}
+
+static __always_inline
+int use_after_unlock(bool push_front)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ f->data = 42;
+ if (push_front)
+ bpf_list_push_front(&ghead, &f->node2);
+ else
+ bpf_list_push_back(&ghead, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return f->data;
+}
+
+SEC("?tc")
+int use_after_unlock_push_front(void *ctx)
+{
+ return use_after_unlock(true);
+}
+
+SEC("?tc")
+int use_after_unlock_push_back(void *ctx)
+{
+ return use_after_unlock(false);
+}
+
+static __always_inline
+int list_double_add(bool push_front)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ if (push_front) {
+ bpf_list_push_front(&ghead, &f->node2);
+ bpf_list_push_front(&ghead, &f->node2);
+ } else {
+ bpf_list_push_back(&ghead, &f->node2);
+ bpf_list_push_back(&ghead, &f->node2);
+ }
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int double_push_front(void *ctx)
+{
+ return list_double_add(true);
+}
+
+SEC("?tc")
+int double_push_back(void *ctx)
+{
+ return list_double_add(false);
+}
+
+SEC("?tc")
+int no_node_value_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(struct { int data; });
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, p);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_value_type(void *ctx)
+{
+ struct bar *b;
+
+ b = bpf_obj_new(typeof(*b));
+ if (!b)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &b->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_var_off(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node2 + ctx->protocol);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off1(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, (void *)&f->node2 + 1);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_node_off2(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(&ghead, &f->node);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int no_head_type(void *ctx)
+{
+ void *p;
+
+ p = bpf_obj_new(typeof(struct { int data; }));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front(p, NULL);
+ bpf_spin_lock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off1(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + ctx->protocol, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_var_off2(struct __sk_buff *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&f->head + ctx->protocol, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off1(void *ctx)
+{
+ struct foo *f;
+ struct bar *b;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ b = bpf_obj_new(typeof(*b));
+ if (!b) {
+ bpf_obj_drop(f);
+ return 0;
+ }
+
+ bpf_spin_lock(&f->lock);
+ bpf_list_push_front((void *)&f->head + 1, &b->node);
+ bpf_spin_unlock(&f->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int incorrect_head_off2(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+
+ bpf_spin_lock(&glock);
+ bpf_list_push_front((void *)&ghead + 1, &f->node2);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+static __always_inline
+int pop_ptr_off(void *(*op)(void *head))
+{
+ struct {
+ struct bpf_list_head head __contains(foo, node2);
+ struct bpf_spin_lock lock;
+ } *p;
+ struct bpf_list_node *n;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 0;
+ bpf_spin_lock(&p->lock);
+ n = op(&p->head);
+ bpf_spin_unlock(&p->lock);
+
+ if (!n)
+ return 0;
+ bpf_spin_lock((void *)n);
+ return 0;
+}
+
+SEC("?tc")
+int pop_front_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_front);
+}
+
+SEC("?tc")
+int pop_back_off(void *ctx)
+{
+ return pop_ptr_off((void *)bpf_list_pop_back);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps1.c b/tools/testing/selftests/bpf/progs/linked_maps1.c
new file mode 100644
index 000000000000..00bf1ca95986
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps1.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct my_key { long x; };
+struct my_value { long x; };
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, struct my_key);
+ __type(value, struct my_value);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+ /* Matches map2 definition in linked_maps2.c. Order of the attributes doesn't
+ * matter.
+ */
+typedef struct {
+ __uint(max_entries, 8);
+ __type(key, int);
+ __type(value, int);
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+} map2_t;
+
+extern map2_t map2 SEC(".maps");
+
+/* This should be the winning map definition, but we have no way of verifying,
+ * so we just make sure that it links and works without errors
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first1;
+int output_second1;
+int output_weak1;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter1)
+{
+ /* update values with key = 1 */
+ int key = 1, val = 1;
+ struct my_key key_struct = { .x = 1 };
+ struct my_value val_struct = { .x = 1000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit1)
+{
+ /* lookup values with key = 2, set in another file */
+ int key = 2, *val;
+ struct my_key key_struct = { .x = 2 };
+ struct my_value *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first1 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second1 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak1 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_maps2.c b/tools/testing/selftests/bpf/progs/linked_maps2.c
new file mode 100644
index 000000000000..0693687474ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_maps2.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* modifiers and typedefs are ignored when comparing key/value types */
+typedef struct my_key { long x; } key_type;
+typedef struct my_value { long x; } value_type;
+
+extern struct {
+ __uint(max_entries, 16);
+ __type(key, key_type);
+ __type(value, value_type);
+ __uint(type, BPF_MAP_TYPE_HASH);
+} map1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 8);
+} map2 SEC(".maps");
+
+/* this definition will lose, but it has to exactly match the winner */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 16);
+} map_weak __weak SEC(".maps");
+
+int output_first2;
+int output_second2;
+int output_weak2;
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler_enter2)
+{
+ /* update values with key = 2 */
+ int key = 2, val = 2;
+ key_type key_struct = { .x = 2 };
+ value_type val_struct = { .x = 2000 };
+
+ bpf_map_update_elem(&map1, &key_struct, &val_struct, 0);
+ bpf_map_update_elem(&map2, &key, &val, 0);
+ bpf_map_update_elem(&map_weak, &key, &val, 0);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int BPF_PROG(handler_exit2)
+{
+ /* lookup values with key = 1, set in another file */
+ int key = 1, *val;
+ key_type key_struct = { .x = 1 };
+ value_type *value_struct;
+
+ value_struct = bpf_map_lookup_elem(&map1, &key_struct);
+ if (value_struct)
+ output_first2 = value_struct->x;
+
+ val = bpf_map_lookup_elem(&map2, &key);
+ if (val)
+ output_second2 = *val;
+
+ val = bpf_map_lookup_elem(&map_weak, &key);
+ if (val)
+ output_weak2 = *val;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars1.c b/tools/testing/selftests/bpf/progs/linked_vars1.c
new file mode 100644
index 000000000000..ef9e9d0bb0ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars1.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* this weak extern will be strict due to the other file's strong extern */
+extern bool CONFIG_BPF_SYSCALL __kconfig __weak;
+extern const void bpf_link_fops __ksym __weak;
+
+int input_bss1;
+int input_data1 = 1;
+const volatile int input_rodata1 = 11;
+
+int input_bss_weak __weak;
+/* these two definitions should win */
+int input_data_weak __weak = 10;
+const volatile int input_rodata_weak __weak = 100;
+
+extern int input_bss2;
+extern int input_data2;
+extern const int input_rodata2;
+
+int output_bss1;
+int output_data1;
+int output_rodata1;
+
+long output_sink1;
+
+static __noinline int get_bss_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_bss1 + input_bss2 + input_bss_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler1)
+{
+ output_bss1 = get_bss_res();
+ output_data1 = input_data1 + input_data2 + input_data_weak;
+ output_rodata1 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink1 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&bpf_link_fops;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/linked_vars2.c b/tools/testing/selftests/bpf/progs/linked_vars2.c
new file mode 100644
index 000000000000..e4f5bd388a3c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/linked_vars2.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern int LINUX_KERNEL_VERSION __kconfig;
+/* when an extern is defined as both strong and weak, resulting symbol will be strong */
+extern bool CONFIG_BPF_SYSCALL __kconfig;
+extern const void __start_BTF __ksym;
+
+int input_bss2;
+int input_data2 = 2;
+const volatile int input_rodata2 = 22;
+
+int input_bss_weak __weak;
+/* these two weak variables should lose */
+int input_data_weak __weak = 20;
+const volatile int input_rodata_weak __weak = 200;
+
+extern int input_bss1;
+extern int input_data1;
+extern const int input_rodata1;
+
+int output_bss2;
+int output_data2;
+int output_rodata2;
+
+int output_sink2;
+
+static __noinline int get_data_res(void)
+{
+ /* just make sure all the relocations work against .text as well */
+ return input_data1 + input_data2 + input_data_weak;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(handler2)
+{
+ output_bss2 = input_bss1 + input_bss2 + input_bss_weak;
+ output_data2 = get_data_res();
+ output_rodata2 = input_rodata1 + input_rodata2 + input_rodata_weak;
+
+ /* make sure we actually use above special externs, otherwise compiler
+ * will optimize them out
+ */
+ output_sink2 = LINUX_KERNEL_VERSION
+ + CONFIG_BPF_SYSCALL
+ + (long)&__start_BTF;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
new file mode 100644
index 000000000000..75043ffc5dad
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_experimental.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+struct refcounted_node {
+ long data;
+ struct bpf_rb_node rb_node;
+ struct bpf_refcount refcount;
+};
+
+struct stash {
+ struct bpf_spin_lock l;
+ struct refcounted_node __kptr *stashed;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct stash);
+ __uint(max_entries, 10);
+} refcounted_node_stash SEC(".maps");
+
+struct plain_local {
+ long key;
+ long data;
+};
+
+struct local_with_root {
+ long key;
+ struct bpf_spin_lock l;
+ struct bpf_rb_root r __contains(node_data, node);
+};
+
+struct map_value {
+ struct prog_test_ref_kfunc *not_kptr;
+ struct prog_test_ref_kfunc __kptr *val;
+ struct node_data __kptr *node;
+ struct plain_local __kptr *plain;
+ struct local_with_root __kptr *local_root;
+};
+
+/* This is necessary so that LLVM generates BTF for node_data struct
+ * If it's not included, a fwd reference for node_data will be generated but
+ * no struct. Example BTF of "node" field in map_value when not included:
+ *
+ * [10] PTR '(anon)' type_id=35
+ * [34] FWD 'node_data' fwd_kind=struct
+ * [35] TYPE_TAG 'kptr_ref' type_id=34
+ *
+ * (with no node_data struct defined)
+ * Had to do the same w/ bpf_kfunc_call_test_release below
+ */
+struct node_data *just_here_because_btf_bug;
+struct refcounted_node *just_here_because_btf_bug2;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} some_nodes SEC(".maps");
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+static int create_and_stash(int idx, int val)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key = val;
+
+ res = bpf_kptr_xchg(&mapval->node, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+long stash_rb_nodes(void *ctx)
+{
+ return create_and_stash(0, 41) ?: create_and_stash(1, 42);
+}
+
+SEC("tc")
+long stash_plain(void *ctx)
+{
+ struct map_value *mapval;
+ struct plain_local *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key = 41;
+
+ res = bpf_kptr_xchg(&mapval->plain, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+long stash_local_with_root(void *ctx)
+{
+ struct local_with_root *res;
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 2;
+ res->key = 41;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n) {
+ bpf_obj_drop(res);
+ return 3;
+ }
+
+ bpf_spin_lock(&res->l);
+ bpf_rbtree_add(&res->r, &n->node, less);
+ bpf_spin_unlock(&res->l);
+
+ res = bpf_kptr_xchg(&mapval->local_root, res);
+ if (res) {
+ bpf_obj_drop(res);
+ return 4;
+ }
+ return 0;
+}
+
+SEC("tc")
+long unstash_rb_node(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+ long retval;
+ int key = 1;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->node, NULL);
+ if (res) {
+ retval = res->key;
+ bpf_obj_drop(res);
+ return retval;
+ }
+ return 1;
+}
+
+SEC("tc")
+long stash_test_ref_kfunc(void *ctx)
+{
+ struct prog_test_ref_kfunc *res;
+ struct map_value *mapval;
+ int key = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &key);
+ if (!mapval)
+ return 1;
+
+ res = bpf_kptr_xchg(&mapval->val, NULL);
+ if (res)
+ bpf_kfunc_call_test_release(res);
+ return 0;
+}
+
+SEC("tc")
+long refcount_acquire_without_unstash(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int ret = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &ret);
+ if (!s)
+ return 1;
+
+ if (!s->stashed)
+ /* refcount_acquire failure is expected when no refcounted_node
+ * has been stashed before this program executes
+ */
+ return 2;
+
+ p = bpf_refcount_acquire(s->stashed);
+ if (!p)
+ return 3;
+
+ ret = s->stashed ? s->stashed->data : -1;
+ bpf_obj_drop(p);
+ return ret;
+}
+
+/* Helper for refcount_acquire_without_unstash test */
+SEC("tc")
+long stash_refcounted_node(void *ctx)
+{
+ struct refcounted_node *p;
+ struct stash *s;
+ int key = 0;
+
+ s = bpf_map_lookup_elem(&refcounted_node_stash, &key);
+ if (!s)
+ return 1;
+
+ p = bpf_obj_new(typeof(*p));
+ if (!p)
+ return 2;
+ p->data = 42;
+
+ p = bpf_kptr_xchg(&s->stashed, p);
+ if (p) {
+ bpf_obj_drop(p);
+ return 3;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c
new file mode 100644
index 000000000000..fcf7a7567da2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_kptr_stash_fail.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+struct map_value {
+ struct node_data __kptr *node;
+};
+
+struct node_data2 {
+ long key[4];
+};
+
+/* This is necessary so that LLVM generates BTF for node_data struct
+ * If it's not included, a fwd reference for node_data will be generated but
+ * no struct. Example BTF of "node" field in map_value when not included:
+ *
+ * [10] PTR '(anon)' type_id=35
+ * [34] FWD 'node_data' fwd_kind=struct
+ * [35] TYPE_TAG 'kptr_ref' type_id=34
+ */
+struct node_data *just_here_because_btf_bug;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} some_nodes SEC(".maps");
+
+SEC("tc")
+__failure __msg("invalid kptr access, R2 type=ptr_node_data2 expected=ptr_node_data")
+long stash_rb_nodes(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data2 *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ res->key[0] = 40;
+
+ res = bpf_kptr_xchg(&mapval->node, res);
+ if (res)
+ bpf_obj_drop(res);
+ return 0;
+}
+
+SEC("tc")
+__failure __msg("R1 must have zero offset when passed to release func")
+long drop_rb_node_off(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *res;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&some_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ res = bpf_obj_new(typeof(*res));
+ if (!res)
+ return 1;
+ /* Try releasing with graph node offset */
+ bpf_obj_drop(&res->node);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage.c b/tools/testing/selftests/bpf/progs/local_storage.c
new file mode 100644
index 000000000000..e5e3a8b8dd07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020 Google LLC.
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define DUMMY_STORAGE_VALUE 0xdeadbeef
+
+__u32 monitored_pid = 0;
+int inode_storage_result = -1;
+int sk_storage_result = -1;
+int task_storage_result = -1;
+
+struct local_storage {
+ struct inode *exec_inode;
+ __u32 value;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} inode_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct local_storage);
+} sk_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC | BPF_F_CLONE);
+ __type(key, int);
+ __type(value, struct local_storage);
+} sk_storage_map2 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} task_storage_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct local_storage);
+} task_storage_map2 SEC(".maps");
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(unlink_hook, struct inode *dir, struct dentry *victim)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct bpf_local_storage *local_storage;
+ struct local_storage *storage;
+ struct task_struct *task;
+ bool is_self_unlink;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+
+ task_storage_result = -1;
+
+ storage = bpf_task_storage_get(&task_storage_map, task, 0, 0);
+ if (!storage)
+ return 0;
+
+ /* Don't let an executable delete itself */
+ is_self_unlink = storage->exec_inode == victim->d_inode;
+
+ storage = bpf_task_storage_get(&task_storage_map2, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage || storage->value)
+ return 0;
+
+ if (bpf_task_storage_delete(&task_storage_map, task))
+ return 0;
+
+ /* Ensure that the task_storage_map is disconnected from the storage.
+ * The storage memory should not be freed back to the
+ * bpf_mem_alloc.
+ */
+ local_storage = task->bpf_storage;
+ if (!local_storage || local_storage->smap)
+ return 0;
+
+ task_storage_result = 0;
+
+ return is_self_unlink ? -EPERM : 0;
+}
+
+SEC("lsm.s/inode_rename")
+int BPF_PROG(inode_rename, struct inode *old_dir, struct dentry *old_dentry,
+ struct inode *new_dir, struct dentry *new_dentry,
+ unsigned int flags)
+{
+ struct local_storage *storage;
+ int err;
+
+ /* new_dentry->d_inode can be NULL when the inode is renamed to a file
+ * that did not exist before. The helper should be able to handle this
+ * NULL pointer.
+ */
+ bpf_inode_storage_get(&inode_storage_map, new_dentry->d_inode, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+
+ storage = bpf_inode_storage_get(&inode_storage_map, old_dentry->d_inode,
+ 0, 0);
+ if (!storage)
+ return 0;
+
+ if (storage->value != DUMMY_STORAGE_VALUE)
+ inode_storage_result = -1;
+
+ err = bpf_inode_storage_delete(&inode_storage_map, old_dentry->d_inode);
+ if (!err)
+ inode_storage_result = err;
+
+ return 0;
+}
+
+SEC("lsm.s/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct local_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0, 0);
+ if (!storage)
+ return 0;
+
+ sk_storage_result = -1;
+ if (storage->value != DUMMY_STORAGE_VALUE)
+ return 0;
+
+ /* This tests that we can associate multiple elements
+ * with the local storage.
+ */
+ storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (bpf_sk_storage_delete(&sk_storage_map2, sock->sk))
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map2, sock->sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ if (bpf_sk_storage_delete(&sk_storage_map, sock->sk))
+ return 0;
+
+ /* Ensure that the sk_storage_map is disconnected from the storage. */
+ if (!sock->sk->sk_bpf_storage || sock->sk->sk_bpf_storage->smap)
+ return 0;
+
+ sk_storage_result = 0;
+ return 0;
+}
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct local_storage *storage;
+
+ if (pid != monitored_pid)
+ return 0;
+
+ storage = bpf_sk_storage_get(&sk_storage_map, sock->sk, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 0;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+
+ return 0;
+}
+
+/* This uses the local storage to remember the inode of the binary that a
+ * process was originally executing.
+ */
+SEC("lsm.s/bprm_committed_creds")
+void BPF_PROG(exec, struct linux_binprm *bprm)
+{
+ __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct local_storage *storage;
+
+ if (pid != monitored_pid)
+ return;
+
+ storage = bpf_task_storage_get(&task_storage_map,
+ bpf_get_current_task_btf(), 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (storage)
+ storage->exec_inode = bprm->file->f_inode;
+
+ storage = bpf_inode_storage_get(&inode_storage_map, bprm->file->f_inode,
+ 0, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return;
+
+ storage->value = DUMMY_STORAGE_VALUE;
+}
diff --git a/tools/testing/selftests/bpf/progs/local_storage_bench.c b/tools/testing/selftests/bpf/progs/local_storage_bench.c
new file mode 100644
index 000000000000..2c3234c5b73a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_bench.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define HASHMAP_SZ 4194304
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_local_storage_maps SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, HASHMAP_SZ);
+ __type(key, int);
+ __type(value, int);
+ });
+} array_of_hash_maps SEC(".maps");
+
+long important_hits;
+long hits;
+
+/* set from user-space */
+const volatile unsigned int use_hashmap;
+const volatile unsigned int hashmap_num_keys;
+const volatile unsigned int num_maps;
+const volatile unsigned int interleave;
+
+struct loop_ctx {
+ struct task_struct *task;
+ long loop_hits;
+ long loop_important_hits;
+};
+
+static int do_lookup(unsigned int elem, struct loop_ctx *lctx)
+{
+ void *map, *inner_map;
+ int idx = 0;
+
+ if (use_hashmap)
+ map = &array_of_hash_maps;
+ else
+ map = &array_of_local_storage_maps;
+
+ inner_map = bpf_map_lookup_elem(map, &elem);
+ if (!inner_map)
+ return -1;
+
+ if (use_hashmap) {
+ idx = bpf_get_prandom_u32() % hashmap_num_keys;
+ bpf_map_lookup_elem(inner_map, &idx);
+ } else {
+ bpf_task_storage_get(inner_map, lctx->task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ }
+
+ lctx->loop_hits++;
+ if (!elem)
+ lctx->loop_important_hits++;
+ return 0;
+}
+
+static long loop(u32 index, void *ctx)
+{
+ struct loop_ctx *lctx = (struct loop_ctx *)ctx;
+ unsigned int map_idx = index % num_maps;
+
+ do_lookup(map_idx, lctx);
+ if (interleave && map_idx % 3 == 0)
+ do_lookup(0, lctx);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct loop_ctx lctx;
+
+ lctx.task = bpf_get_current_task_btf();
+ lctx.loop_hits = 0;
+ lctx.loop_important_hits = 0;
+ bpf_loop(10000, &loop, &lctx, 0);
+ __sync_add_and_fetch(&hits, lctx.loop_hits);
+ __sync_add_and_fetch(&important_hits, lctx.loop_important_hits);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
new file mode 100644
index 000000000000..03bf69f49075
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/local_storage_rcu_tasks_trace_bench.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_storage SEC(".maps");
+
+long hits;
+long gp_hits;
+long gp_times;
+long current_gp_start;
+long unexpected;
+bool postgp_seen;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int get_local(void *ctx)
+{
+ struct task_struct *task;
+ int idx;
+ int *s;
+
+ idx = 0;
+ task = bpf_get_current_task_btf();
+ s = bpf_task_storage_get(&task_storage, task, &idx,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!s)
+ return 0;
+
+ *s = 3;
+ bpf_task_storage_delete(&task_storage, task);
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_pregp_step")
+int pregp_step(struct pt_regs *ctx)
+{
+ current_gp_start = bpf_ktime_get_ns();
+ return 0;
+}
+
+SEC("fentry/rcu_tasks_trace_postgp")
+int postgp(struct pt_regs *ctx)
+{
+ if (!current_gp_start && postgp_seen) {
+ /* Will only happen if prog tracing rcu_tasks_trace_pregp_step doesn't
+ * execute before this prog
+ */
+ __sync_add_and_fetch(&unexpected, 1);
+ return 0;
+ }
+
+ __sync_add_and_fetch(&gp_times, bpf_ktime_get_ns() - current_gp_start);
+ __sync_add_and_fetch(&gp_hits, 1);
+ current_gp_start = 0;
+ postgp_seen = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop3.c b/tools/testing/selftests/bpf/progs/loop3.c
index 76e93b31c14b..717dab14322b 100644
--- a/tools/testing/selftests/bpf/progs/loop3.c
+++ b/tools/testing/selftests/bpf/progs/loop3.c
@@ -12,9 +12,9 @@
char _license[] SEC("license") = "GPL";
SEC("raw_tracepoint/consume_skb")
-int while_true(volatile struct pt_regs* ctx)
+int while_true(struct pt_regs *ctx)
{
- __u64 i = 0, sum = 0;
+ volatile __u64 i = 0, sum = 0;
do {
i++;
sum += PT_REGS_RC(ctx);
diff --git a/tools/testing/selftests/bpf/progs/loop4.c b/tools/testing/selftests/bpf/progs/loop4.c
index b35337926d66..0de0357f57cc 100644
--- a/tools/testing/selftests/bpf/progs/loop4.c
+++ b/tools/testing/selftests/bpf/progs/loop4.c
@@ -3,6 +3,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
SEC("socket")
@@ -10,7 +12,7 @@ int combinations(volatile struct __sk_buff* skb)
{
int ret = 0, i;
-#pragma nounroll
+ __pragma_loop_no_unroll
for (i = 0; i < 20; i++)
if (skb->len)
ret |= 1 << i;
diff --git a/tools/testing/selftests/bpf/progs/loop5.c b/tools/testing/selftests/bpf/progs/loop5.c
index 913791923fa3..1b13f37f85ec 100644
--- a/tools/testing/selftests/bpf/progs/loop5.c
+++ b/tools/testing/selftests/bpf/progs/loop5.c
@@ -2,7 +2,6 @@
// Copyright (c) 2019 Facebook
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-#define barrier() __asm__ __volatile__("": : :"memory")
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/loop6.c b/tools/testing/selftests/bpf/progs/loop6.c
new file mode 100644
index 000000000000..e4ff97fbcce1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/loop6.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/ptrace.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* typically virtio scsi has max SGs of 6 */
+#define VIRTIO_MAX_SGS 6
+
+/* Verifier will fail with SG_MAX = 128. The failure can be
+ * workarounded with a smaller SG_MAX, e.g. 10.
+ */
+#define WORKAROUND
+#ifdef WORKAROUND
+#define SG_MAX 10
+#else
+/* typically virtio blk has max SEG of 128 */
+#define SG_MAX 128
+#endif
+
+#define SG_CHAIN 0x01UL
+#define SG_END 0x02UL
+
+struct scatterlist {
+ unsigned long page_link;
+ unsigned int offset;
+ unsigned int length;
+};
+
+#define sg_is_chain(sg) ((sg)->page_link & SG_CHAIN)
+#define sg_is_last(sg) ((sg)->page_link & SG_END)
+#define sg_chain_ptr(sg) \
+ ((struct scatterlist *) ((sg)->page_link & ~(SG_CHAIN | SG_END)))
+
+static inline struct scatterlist *__sg_next(struct scatterlist *sgp)
+{
+ struct scatterlist sg;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_last(&sg))
+ return NULL;
+
+ sgp++;
+
+ bpf_probe_read_kernel(&sg, sizeof(sg), sgp);
+ if (sg_is_chain(&sg))
+ sgp = sg_chain_ptr(&sg);
+
+ return sgp;
+}
+
+static inline struct scatterlist *get_sgp(struct scatterlist **sgs, int i)
+{
+ struct scatterlist *sgp;
+
+ bpf_probe_read_kernel(&sgp, sizeof(sgp), sgs + i);
+ return sgp;
+}
+
+int config = 0;
+int result = 0;
+
+SEC("kprobe/virtqueue_add_sgs")
+int BPF_KPROBE(trace_virtqueue_add_sgs, void *unused, struct scatterlist **sgs,
+ unsigned int out_sgs, unsigned int in_sgs)
+{
+ struct scatterlist *sgp = NULL;
+ __u64 length1 = 0, length2 = 0;
+ unsigned int i, n, len;
+
+ if (config != 0)
+ return 0;
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < out_sgs); i++) {
+ __sink(out_sgs);
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length1 += len;
+ n++;
+ }
+ }
+
+ for (i = 0; (i < VIRTIO_MAX_SGS) && (i < in_sgs); i++) {
+ __sink(in_sgs);
+ for (n = 0, sgp = get_sgp(sgs, i); sgp && (n < SG_MAX);
+ sgp = __sg_next(sgp)) {
+ bpf_probe_read_kernel(&len, sizeof(len), &sgp->length);
+ length2 += len;
+ n++;
+ }
+ }
+
+ config = 1;
+ result = length2 - length1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/lru_bug.c b/tools/testing/selftests/bpf/progs/lru_bug.c
new file mode 100644
index 000000000000..ad73029cb1e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lru_bug.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_value {
+ struct task_struct __kptr_untrusted *ptr;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_value);
+} lru_map SEC(".maps");
+
+int pid = 0;
+int result = 1;
+
+SEC("fentry/bpf_ktime_get_ns")
+int printk(void *ctx)
+{
+ struct map_value v = {};
+
+ if (pid == bpf_get_current_task_btf()->pid)
+ bpf_map_update_elem(&lru_map, &(int){0}, &v, 0);
+ return 0;
+}
+
+SEC("fentry/do_nanosleep")
+int nanosleep(void *ctx)
+{
+ struct map_value val = {}, *v;
+ struct task_struct *current;
+
+ bpf_map_update_elem(&lru_map, &(int){0}, &val, 0);
+ v = bpf_map_lookup_elem(&lru_map, &(int){0});
+ if (!v)
+ return 0;
+ bpf_map_delete_elem(&lru_map, &(int){0});
+ current = bpf_get_current_task_btf();
+ v->ptr = current;
+ pid = current->pid;
+ bpf_ktime_get_ns();
+ result = !v->ptr;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/lsm.c b/tools/testing/selftests/bpf/progs/lsm.c
index b4598d4bc4f7..0c13b7409947 100644
--- a/tools/testing/selftests/bpf/progs/lsm.c
+++ b/tools/testing/selftests/bpf/progs/lsm.c
@@ -5,9 +5,79 @@
*/
#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-#include <errno.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} lru_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} lru_percpu_hash SEC(".maps");
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, __u64);
+} inner_map SEC(".maps");
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [0] = &inner_map },
+};
+
+struct outer_hash {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+ .values = { [0] = &inner_map },
+};
char _license[] SEC("license") = "GPL";
@@ -22,7 +92,7 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
if (ret != 0)
return ret;
- __u32 pid = bpf_get_current_pid_tgid() >> 32;
+ __s32 pid = bpf_get_current_pid_tgid() >> 32;
int is_stack = 0;
is_stack = (vma->vm_start <= vma->vm_mm->start_stack &&
@@ -36,13 +106,76 @@ int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
return ret;
}
-SEC("lsm/bprm_committed_creds")
+SEC("lsm.s/bprm_committed_creds")
int BPF_PROG(test_void_hook, struct linux_binprm *bprm)
{
__u32 pid = bpf_get_current_pid_tgid() >> 32;
+ struct inner_map *inner_map;
+ char args[64];
+ __u32 key = 0;
+ __u64 *value;
if (monitored_pid == pid)
bprm_count++;
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->vma->vm_mm->arg_start);
+ bpf_copy_from_user(args, sizeof(args), (void *)bprm->mm->arg_start);
+
+ value = bpf_map_lookup_elem(&array, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&lru_hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&percpu_array, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&percpu_hash, &key);
+ if (value)
+ *value = 0;
+ value = bpf_map_lookup_elem(&lru_percpu_hash, &key);
+ if (value)
+ *value = 0;
+ inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+ if (inner_map) {
+ value = bpf_map_lookup_elem(inner_map, &key);
+ if (value)
+ *value = 0;
+ }
+ inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+ if (inner_map) {
+ value = bpf_map_lookup_elem(inner_map, &key);
+ if (value)
+ *value = 0;
+ }
+
+ return 0;
+}
+SEC("lsm/task_free") /* lsm/ is ok, lsm.s/ fails */
+int BPF_PROG(test_task_free, struct task_struct *task)
+{
+ return 0;
+}
+
+int copy_test = 0;
+
+SEC("fentry.s/" SYS_PREFIX "sys_setdomainname")
+int BPF_PROG(test_sys_setdomainname, struct pt_regs *regs)
+{
+ void *ptr = (void *)PT_REGS_PARM1_SYSCALL(regs);
+ int len = PT_REGS_PARM2_SYSCALL(regs);
+ int buf = 0;
+ long ret;
+
+ ret = bpf_copy_from_user(&buf, sizeof(buf), ptr);
+ if (len == -2 && ret == 0 && buf == 1234)
+ copy_test++;
+ if (len == -3 && ret == -EFAULT)
+ copy_test++;
+ if (len == -4 && ret == -EFAULT)
+ copy_test++;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup.c b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
new file mode 100644
index 000000000000..02c11d16b692
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_SECURITY_SELINUX __kconfig __weak;
+extern bool CONFIG_SECURITY_SMACK __kconfig __weak;
+extern bool CONFIG_SECURITY_APPARMOR __kconfig __weak;
+
+#ifndef AF_PACKET
+#define AF_PACKET 17
+#endif
+
+#ifndef AF_UNIX
+#define AF_UNIX 1
+#endif
+
+#ifndef EPERM
+#define EPERM 1
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, __u64);
+ __type(value, __u64);
+} cgroup_storage SEC(".maps");
+
+int called_socket_post_create;
+int called_socket_post_create2;
+int called_socket_bind;
+int called_socket_bind2;
+int called_socket_alloc;
+int called_socket_clone;
+
+static __always_inline int test_local_storage(void)
+{
+ __u64 *val;
+
+ val = bpf_get_local_storage(&cgroup_storage, 0);
+ if (!val)
+ return 0;
+ *val += 1;
+
+ return 1;
+}
+
+static __always_inline int real_create(struct socket *sock, int family,
+ int protocol)
+{
+ struct sock *sk;
+ int prio = 123;
+
+ /* Reject non-tx-only AF_PACKET. */
+ if (family == AF_PACKET && protocol != 0)
+ return 0; /* EPERM */
+
+ sk = sock->sk;
+ if (!sk)
+ return 1;
+
+ /* The rest of the sockets get default policy. */
+ if (bpf_setsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(sk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 0; /* EPERM */
+ if (prio != 123)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create++;
+ return real_create(sock, family, protocol);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create2, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ called_socket_post_create2++;
+ return real_create(sock, family, protocol);
+}
+
+static __always_inline int real_bind(struct socket *sock,
+ struct sockaddr *address,
+ int addrlen)
+{
+ struct sockaddr_ll sa = {};
+
+ if (sock->sk->__sk_common.skc_family != AF_PACKET)
+ return 1;
+
+ if (sock->sk->sk_kern_sock)
+ return 1;
+
+ bpf_probe_read_kernel(&sa, sizeof(sa), address);
+ if (sa.sll_protocol)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_socket */
+SEC("lsm_cgroup/socket_bind")
+int BPF_PROG(socket_bind2, struct socket *sock, struct sockaddr *address,
+ int addrlen)
+{
+ called_socket_bind2++;
+ return real_bind(sock, address, addrlen);
+}
+
+/* __cgroup_bpf_run_lsm_current (via bpf_lsm_current_hooks) */
+SEC("lsm_cgroup/sk_alloc_security")
+int BPF_PROG(socket_alloc, struct sock *sk, int family, gfp_t priority)
+{
+ called_socket_alloc++;
+ /* if already have non-bpf lsms installed, EPERM will cause memory leak of non-bpf lsms */
+ if (CONFIG_SECURITY_SELINUX || CONFIG_SECURITY_SMACK || CONFIG_SECURITY_APPARMOR)
+ return 1;
+
+ if (family == AF_UNIX)
+ return 0; /* EPERM */
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 0; /* EPERM */
+
+ return 1;
+}
+
+/* __cgroup_bpf_run_lsm_sock */
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ int prio = 234;
+
+ if (!newsk)
+ return 1;
+
+ /* Accepted request sockets get a different priority. */
+ if (bpf_setsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+
+ /* Make sure bpf_getsockopt is allowed and works. */
+ prio = 0;
+ if (bpf_getsockopt(newsk, SOL_SOCKET, SO_PRIORITY, &prio, sizeof(prio)))
+ return 1;
+ if (prio != 234)
+ return 1;
+
+ /* Can access cgroup local storage. */
+ if (!test_local_storage())
+ return 1;
+
+ called_socket_clone++;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
new file mode 100644
index 000000000000..6cb0f161f417
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/lsm_cgroup_nonvoid.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm_cgroup/inet_csk_clone")
+int BPF_PROG(nonvoid_socket_clone, struct sock *newsk, const struct request_sock *req)
+{
+ /* Can not return any errors from void LSM hooks. */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
new file mode 100644
index 000000000000..7a1336d7b16a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct inner_array_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} inner_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+ __array(values, struct inner_array_type);
+} outer_array SEC(".maps") = {
+ .values = {
+ [0] = &inner_array,
+ },
+};
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_inner_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ struct bpf_map *map;
+ int zero = 0;
+
+ if (done || (u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ map = bpf_map_lookup_elem(&outer_array, &zero);
+ if (!map)
+ return 0;
+
+ value = bpf_map_lookup_elem(map, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/map_kptr.c b/tools/testing/selftests/bpf/progs/map_kptr.c
new file mode 100644
index 000000000000..da30f0d59364
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr.c
@@ -0,0 +1,530 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+struct map_value {
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct pcpu_array_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_array_map SEC(".maps");
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} hash_map SEC(".maps");
+
+struct pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} pcpu_hash_map SEC(".maps");
+
+struct hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} hash_malloc_map SEC(".maps");
+
+struct pcpu_hash_malloc_map {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} pcpu_hash_malloc_map SEC(".maps");
+
+struct lru_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_hash_map SEC(".maps");
+
+struct lru_pcpu_hash_map {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} lru_pcpu_hash_map SEC(".maps");
+
+struct cgrp_ls_map {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} cgrp_ls_map SEC(".maps");
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} task_ls_map SEC(".maps");
+
+struct inode_ls_map {
+ __uint(type, BPF_MAP_TYPE_INODE_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} inode_ls_map SEC(".maps");
+
+struct sk_ls_map {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct map_value);
+} sk_ls_map SEC(".maps");
+
+#define DEFINE_MAP_OF_MAP(map_type, inner_map_type, name) \
+ struct { \
+ __uint(type, map_type); \
+ __uint(max_entries, 1); \
+ __uint(key_size, sizeof(int)); \
+ __uint(value_size, sizeof(int)); \
+ __array(values, struct inner_map_type); \
+ } name SEC(".maps") = { \
+ .values = { [0] = &inner_map_type }, \
+ }
+
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_map, array_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_map, array_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, hash_malloc_map, array_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_ARRAY_OF_MAPS, lru_hash_map, array_of_lru_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, array_map, hash_of_array_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_map, hash_of_hash_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, hash_malloc_map, hash_of_hash_malloc_maps);
+DEFINE_MAP_OF_MAP(BPF_MAP_TYPE_HASH_OF_MAPS, lru_hash_map, hash_of_lru_hash_maps);
+
+#define WRITE_ONCE(x, val) ((*(volatile typeof(x) *) &(x)) = (val))
+
+static void test_kptr_unref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->unref_ptr;
+ /* store untrusted_ptr_or_null_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ if (!p)
+ return;
+ if (p->a + p->b > 100)
+ return;
+ /* store untrusted_ptr_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ /* store NULL */
+ WRITE_ONCE(v->unref_ptr, NULL);
+}
+
+static void test_kptr_ref(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p;
+
+ p = v->ref_ptr;
+ /* store ptr_or_null_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ if (!p)
+ return;
+ /*
+ * p is rcu_ptr_prog_test_ref_kfunc,
+ * because bpf prog is non-sleepable and runs in RCU CS.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
+ if (p->a + p->b > 100)
+ return;
+ /* store NULL */
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return;
+ /*
+ * p is trusted_ptr_prog_test_ref_kfunc.
+ * p can be passed to kfunc that requires KF_RCU.
+ */
+ bpf_kfunc_call_test_ref(p);
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ /* store ptr_ */
+ WRITE_ONCE(v->unref_ptr, p);
+ bpf_kfunc_call_test_release(p);
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return;
+ /* store ptr_ */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (!p)
+ return;
+ if (p->a + p->b > 100) {
+ bpf_kfunc_call_test_release(p);
+ return;
+ }
+ bpf_kfunc_call_test_release(p);
+}
+
+static void test_kptr(struct map_value *v)
+{
+ test_kptr_unref(v);
+ test_kptr_ref(v);
+}
+
+SEC("tc")
+int test_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+#undef TEST
+ return 0;
+}
+
+SEC("tp_btf/cgroup_mkdir")
+int BPF_PROG(test_cgrp_map_kptr, struct cgroup *cgrp, const char *path)
+{
+ struct map_value *v;
+
+ v = bpf_cgrp_storage_get(&cgrp_ls_map, cgrp, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_task_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct task_struct *task;
+ struct map_value *v;
+
+ task = bpf_get_current_task_btf();
+ if (!task)
+ return 0;
+ v = bpf_task_storage_get(&task_ls_map, task, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("lsm/inode_unlink")
+int BPF_PROG(test_inode_map_kptr, struct inode *inode, struct dentry *victim)
+{
+ struct map_value *v;
+
+ v = bpf_inode_storage_get(&inode_ls_map, inode, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_sk_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 0;
+ v = bpf_sk_storage_get(&sk_ls_map, sk, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (v)
+ test_kptr(v);
+ return 0;
+}
+
+SEC("tc")
+int test_map_in_map_kptr(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+ void *map;
+
+#define TEST(map_in_map) \
+ map = bpf_map_lookup_elem(&map_in_map, &key); \
+ if (!map) \
+ return 0; \
+ v = bpf_map_lookup_elem(map, &key); \
+ if (!v) \
+ return 0; \
+ test_kptr(v)
+
+ TEST(array_of_array_maps);
+ TEST(array_of_hash_maps);
+ TEST(array_of_hash_malloc_maps);
+ TEST(array_of_lru_hash_maps);
+ TEST(hash_of_array_maps);
+ TEST(hash_of_hash_maps);
+ TEST(hash_of_hash_malloc_maps);
+ TEST(hash_of_lru_hash_maps);
+
+#undef TEST
+ return 0;
+}
+
+int ref = 1;
+
+static __always_inline
+int test_map_kptr_ref_pre(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *p_st;
+ unsigned long arg = 0;
+ int ret;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 1;
+ ref++;
+
+ p_st = p->next;
+ if (p_st->cnt.refs.counter != ref) {
+ ret = 2;
+ goto end;
+ }
+
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ ret = 3;
+ goto end;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 4;
+
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return 5;
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ if (p_st->cnt.refs.counter != ref)
+ return 6;
+
+ p = bpf_kfunc_call_test_acquire(&arg);
+ if (!p)
+ return 7;
+ ref++;
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ ret = 8;
+ goto end;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 9;
+ /* Leave in map */
+
+ return 0;
+end:
+ ref--;
+ bpf_kfunc_call_test_release(p);
+ return ret;
+}
+
+static __always_inline
+int test_map_kptr_ref_post(struct map_value *v)
+{
+ struct prog_test_ref_kfunc *p, *p_st;
+
+ p_st = v->ref_ptr;
+ if (!p_st || p_st->cnt.refs.counter != ref)
+ return 1;
+
+ p = bpf_kptr_xchg(&v->ref_ptr, NULL);
+ if (!p)
+ return 2;
+ if (p_st->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 3;
+ }
+
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p) {
+ bpf_kfunc_call_test_release(p);
+ return 4;
+ }
+ if (p_st->cnt.refs.counter != ref)
+ return 5;
+
+ return 0;
+}
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_pre(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref1(struct __sk_buff *ctx)
+{
+ struct map_value *v, val = {};
+ int key = 0, ret;
+
+ bpf_map_update_elem(&hash_map, &key, &val, 0);
+ bpf_map_update_elem(&hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_hash_map, &key, &val, 0);
+
+ bpf_map_update_elem(&pcpu_hash_map, &key, &val, 0);
+ bpf_map_update_elem(&pcpu_hash_malloc_map, &key, &val, 0);
+ bpf_map_update_elem(&lru_pcpu_hash_map, &key, &val, 0);
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+#define TEST(map) \
+ v = bpf_map_lookup_elem(&map, &key); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+#define TEST_PCPU(map) \
+ v = bpf_map_lookup_percpu_elem(&map, &key, 0); \
+ if (!v) \
+ return -1; \
+ ret = test_map_kptr_ref_post(v); \
+ if (ret) \
+ return ret;
+
+SEC("tc")
+int test_map_kptr_ref2(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, ret;
+
+ TEST(array_map);
+ TEST(hash_map);
+ TEST(hash_malloc_map);
+ TEST(lru_hash_map);
+
+ TEST_PCPU(pcpu_array_map);
+ TEST_PCPU(pcpu_hash_map);
+ TEST_PCPU(pcpu_hash_malloc_map);
+ TEST_PCPU(lru_pcpu_hash_map);
+
+ return 0;
+}
+
+#undef TEST
+#undef TEST_PCPU
+
+SEC("tc")
+int test_map_kptr_ref3(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ unsigned long sp = 0;
+
+ p = bpf_kfunc_call_test_acquire(&sp);
+ if (!p)
+ return 1;
+ ref++;
+ if (p->cnt.refs.counter != ref) {
+ bpf_kfunc_call_test_release(p);
+ return 2;
+ }
+ bpf_kfunc_call_test_release(p);
+ ref--;
+ return 0;
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref1(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (v)
+ return 150;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_pre(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref2(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ return test_map_kptr_ref_post(v);
+}
+
+SEC("syscall")
+int test_ls_map_kptr_ref_del(void *ctx)
+{
+ struct task_struct *current;
+ struct map_value *v;
+
+ current = bpf_get_current_task_btf();
+ if (!current)
+ return 100;
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, 0);
+ if (!v)
+ return 200;
+ if (!v->ref_ptr)
+ return 300;
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_kptr_fail.c b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
new file mode 100644
index 000000000000..450bb373b179
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_kptr_fail.c
@@ -0,0 +1,388 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+struct map_value {
+ char buf[8];
+ struct prog_test_ref_kfunc __kptr_untrusted *unref_ptr;
+ struct prog_test_ref_kfunc __kptr *ref_ptr;
+ struct prog_test_member __kptr *ref_memb_ptr;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+SEC("?tc")
+__failure __msg("kptr access size must be BPF_DW")
+int size_not_bpf_dw(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ *(u32 *)&v->unref_ptr = 0;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access cannot have variable offset")
+int non_const_var_off(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ *(u64 *)((void *)v + id) = 0;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 doesn't have constant offset. kptr has to be")
+int non_const_var_off_kptr_xchg(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ bpf_kptr_xchg((void *)v + id, NULL);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=7")
+int misaligned_access_write(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ *(void **)((void *)v + 7) = NULL;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr access misaligned expected=8 off=1")
+int misaligned_access_read(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ return *(u64 *)((void *)v + 1);
+}
+
+SEC("?tc")
+__failure __msg("variable untrusted_ptr_ access var_off=(0x0; 0x1e0)")
+int reject_var_off_store(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0, id;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ id = ctx->protocol;
+ if (id < 4 || id > 12)
+ return 0;
+ unref_ptr += id;
+ v->unref_ptr = unref_ptr;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc")
+int reject_bad_type_match(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ unref_ptr = (void *)unref_ptr + 4;
+ v->unref_ptr = unref_ptr;
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_or_null_ expected=percpu_ptr_")
+int marked_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_this_cpu_ptr(v->unref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("access beyond struct prog_test_ref_kfunc at off 32 size 4")
+int correct_btf_id_check_size(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->unref_ptr;
+ if (!p)
+ return 0;
+ return *(int *)((void *)p + bpf_core_type_size(struct prog_test_ref_kfunc));
+}
+
+SEC("?tc")
+__failure __msg("R1 type=untrusted_ptr_ expected=percpu_ptr_")
+int inherit_untrusted_on_walk(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *unref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ unref_ptr = v->unref_ptr;
+ if (!unref_ptr)
+ return 0;
+ unref_ptr = unref_ptr->next;
+ bpf_this_cpu_ptr(unref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("off=8 kptr isn't referenced kptr")
+int reject_kptr_xchg_on_unref(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_kptr_xchg(&v->unref_ptr, NULL);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R1 type=rcu_ptr_or_null_ expected=percpu_ptr_")
+int mark_ref_as_untrusted_or_null(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_this_cpu_ptr(v->ref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("store to referenced kptr disallowed")
+int reject_untrusted_store_to_ref(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->ref_ptr;
+ if (!p)
+ return 0;
+ /* Checkmate, clang */
+ *(struct prog_test_ref_kfunc * volatile *)&v->ref_ptr = p;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("R2 must be referenced")
+int reject_untrusted_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = v->ref_ptr;
+ if (!p)
+ return 0;
+ bpf_kptr_xchg(&v->ref_ptr, p);
+ return 0;
+}
+
+SEC("?tc")
+__failure
+__msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member")
+int reject_bad_type_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *ref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!ref_ptr)
+ return 0;
+ bpf_kptr_xchg(&v->ref_memb_ptr, ref_ptr);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("invalid kptr access, R2 type=ptr_prog_test_ref_kfunc")
+int reject_member_of_ref_xchg(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *ref_ptr;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ ref_ptr = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!ref_ptr)
+ return 0;
+ bpf_kptr_xchg(&v->ref_memb_ptr, &ref_ptr->memb);
+ return 0;
+}
+
+SEC("?syscall")
+__failure __msg("kptr cannot be accessed indirectly by helper")
+int reject_indirect_helper_access(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ bpf_get_current_comm(v, sizeof(v->buf) + 1);
+ return 0;
+}
+
+__noinline
+int write_func(int *p)
+{
+ return p ? *p = 42 : 0;
+}
+
+SEC("?tc")
+__failure __msg("kptr cannot be accessed indirectly by helper")
+int reject_indirect_global_func_access(struct __sk_buff *ctx)
+{
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ return write_func((void *)v + 5);
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=5 alloc_insn=")
+int kptr_xchg_ref_state(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+ if (!p)
+ return 0;
+ bpf_kptr_xchg(&v->ref_ptr, p);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to helper arg2")
+int kptr_xchg_possibly_null(struct __sk_buff *ctx)
+{
+ struct prog_test_ref_kfunc *p;
+ struct map_value *v;
+ int key = 0;
+
+ v = bpf_map_lookup_elem(&array_map, &key);
+ if (!v)
+ return 0;
+
+ p = bpf_kfunc_call_test_acquire(&(unsigned long){0});
+
+ /* PTR_TO_BTF_ID | PTR_MAYBE_NULL passed to bpf_kptr_xchg() */
+ p = bpf_kptr_xchg(&v->ref_ptr, p);
+ if (p)
+ bpf_kfunc_call_test_release(p);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_percpu_stats.c b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
new file mode 100644
index 000000000000..10b2325c1720
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_percpu_stats.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 target_id;
+
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+ struct bpf_map *map = ctx->map;
+
+ if (map && map->id == target_id)
+ BPF_SEQ_PRINTF(seq, "%lld", bpf_map_sum_elem_count(map));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/map_ptr_kern.c b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
new file mode 100644
index 000000000000..efaf622c28dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/map_ptr_kern.c
@@ -0,0 +1,703 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define LOOP_BOUND 0xf
+#define MAX_ENTRIES 8
+#define HALF_ENTRIES (MAX_ENTRIES >> 1)
+
+_Static_assert(MAX_ENTRIES < LOOP_BOUND, "MAX_ENTRIES must be < LOOP_BOUND");
+
+enum bpf_map_type g_map_type = BPF_MAP_TYPE_UNSPEC;
+__u32 g_line = 0;
+int page_size = 0; /* userspace should set it */
+
+#define VERIFY_TYPE(type, func) ({ \
+ g_map_type = type; \
+ if (!func()) \
+ return 0; \
+})
+
+
+#define VERIFY(expr) ({ \
+ g_line = __LINE__; \
+ if (!(expr)) \
+ return 0; \
+})
+
+struct bpf_map {
+ enum bpf_map_type map_type;
+ __u32 key_size;
+ __u32 value_size;
+ __u32 max_entries;
+ __u32 id;
+} __attribute__((preserve_access_index));
+
+static inline int check_bpf_map_fields(struct bpf_map *map, __u32 key_size,
+ __u32 value_size, __u32 max_entries)
+{
+ VERIFY(map->map_type == g_map_type);
+ VERIFY(map->key_size == key_size);
+ VERIFY(map->value_size == value_size);
+ VERIFY(map->max_entries == max_entries);
+ VERIFY(map->id > 0);
+
+ return 1;
+}
+
+static inline int check_bpf_map_ptr(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(indirect->map_type == direct->map_type);
+ VERIFY(indirect->key_size == direct->key_size);
+ VERIFY(indirect->value_size == direct->value_size);
+ VERIFY(indirect->max_entries == direct->max_entries);
+ VERIFY(indirect->id == direct->id);
+
+ return 1;
+}
+
+static inline int check(struct bpf_map *indirect, struct bpf_map *direct,
+ __u32 key_size, __u32 value_size, __u32 max_entries)
+{
+ VERIFY(check_bpf_map_ptr(indirect, direct));
+ VERIFY(check_bpf_map_fields(indirect, key_size, value_size,
+ max_entries));
+ return 1;
+}
+
+static inline int check_default(struct bpf_map *indirect,
+ struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+static __noinline int
+check_default_noinline(struct bpf_map *indirect, struct bpf_map *direct)
+{
+ VERIFY(check(indirect, direct, sizeof(__u32), sizeof(__u32),
+ MAX_ENTRIES));
+ return 1;
+}
+
+typedef struct {
+ int counter;
+} atomic_t;
+
+struct bpf_htab {
+ struct bpf_map map;
+ atomic_t count;
+ __u32 n_buckets;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC); /* to test bpf_htab.count */
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_hash SEC(".maps");
+
+__s64 bpf_map_sum_elem_count(struct bpf_map *map) __ksym;
+
+static inline int check_hash(void)
+{
+ struct bpf_htab *hash = (struct bpf_htab *)&m_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_hash;
+ int i;
+
+ VERIFY(check_default_noinline(&hash->map, map));
+
+ VERIFY(hash->n_buckets == MAX_ENTRIES);
+ VERIFY(hash->elem_size == 64);
+
+ VERIFY(hash->count.counter == 0);
+ VERIFY(bpf_map_sum_elem_count(map) == 0);
+
+ for (i = 0; i < HALF_ENTRIES; ++i) {
+ const __u32 key = i;
+ const __u32 val = 1;
+
+ if (bpf_map_update_elem(hash, &key, &val, 0))
+ return 0;
+ }
+ VERIFY(hash->count.counter == HALF_ENTRIES);
+ VERIFY(bpf_map_sum_elem_count(map) == HALF_ENTRIES);
+
+ return 1;
+}
+
+struct bpf_array {
+ struct bpf_map map;
+ __u32 elem_size;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_array SEC(".maps");
+
+static inline int check_array(void)
+{
+ struct bpf_array *array = (struct bpf_array *)&m_array;
+ struct bpf_map *map = (struct bpf_map *)&m_array;
+ int i, n_lookups = 0, n_keys = 0;
+
+ VERIFY(check_default(&array->map, map));
+
+ VERIFY(array->elem_size == 8);
+
+ for (i = 0; i < array->map.max_entries && i < LOOP_BOUND; ++i) {
+ const __u32 key = i;
+ __u32 *val = bpf_map_lookup_elem(array, &key);
+
+ ++n_lookups;
+ if (val)
+ ++n_keys;
+ }
+
+ VERIFY(n_lookups == MAX_ENTRIES);
+ VERIFY(n_keys == MAX_ENTRIES);
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_prog_array SEC(".maps");
+
+static inline int check_prog_array(void)
+{
+ struct bpf_array *prog_array = (struct bpf_array *)&m_prog_array;
+ struct bpf_map *map = (struct bpf_map *)&m_prog_array;
+
+ VERIFY(check_default(&prog_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_perf_event_array SEC(".maps");
+
+static inline int check_perf_event_array(void)
+{
+ struct bpf_array *perf_event_array = (struct bpf_array *)&m_perf_event_array;
+ struct bpf_map *map = (struct bpf_map *)&m_perf_event_array;
+
+ VERIFY(check_default(&perf_event_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_hash SEC(".maps");
+
+static inline int check_percpu_hash(void)
+{
+ struct bpf_htab *percpu_hash = (struct bpf_htab *)&m_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_hash;
+
+ VERIFY(check_default(&percpu_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_percpu_array SEC(".maps");
+
+static inline int check_percpu_array(void)
+{
+ struct bpf_array *percpu_array = (struct bpf_array *)&m_percpu_array;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_array;
+
+ VERIFY(check_default(&percpu_array->map, map));
+
+ return 1;
+}
+
+struct bpf_stack_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u64);
+} m_stack_trace SEC(".maps");
+
+static inline int check_stack_trace(void)
+{
+ struct bpf_stack_map *stack_trace =
+ (struct bpf_stack_map *)&m_stack_trace;
+ struct bpf_map *map = (struct bpf_map *)&m_stack_trace;
+
+ VERIFY(check(&stack_trace->map, map, sizeof(__u32), sizeof(__u64),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cgroup_array SEC(".maps");
+
+static inline int check_cgroup_array(void)
+{
+ struct bpf_array *cgroup_array = (struct bpf_array *)&m_cgroup_array;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_array;
+
+ VERIFY(check_default(&cgroup_array->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_hash SEC(".maps");
+
+static inline int check_lru_hash(void)
+{
+ struct bpf_htab *lru_hash = (struct bpf_htab *)&m_lru_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_hash;
+
+ VERIFY(check_default(&lru_hash->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_lru_percpu_hash SEC(".maps");
+
+static inline int check_lru_percpu_hash(void)
+{
+ struct bpf_htab *lru_percpu_hash = (struct bpf_htab *)&m_lru_percpu_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_lru_percpu_hash;
+
+ VERIFY(check_default(&lru_percpu_hash->map, map));
+
+ return 1;
+}
+
+struct lpm_trie {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct lpm_key {
+ struct bpf_lpm_trie_key_hdr trie_key;
+ __u32 data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, struct lpm_key);
+ __type(value, __u32);
+} m_lpm_trie SEC(".maps");
+
+static inline int check_lpm_trie(void)
+{
+ struct lpm_trie *lpm_trie = (struct lpm_trie *)&m_lpm_trie;
+ struct bpf_map *map = (struct bpf_map *)&m_lpm_trie;
+
+ VERIFY(check(&lpm_trie->map, map, sizeof(struct lpm_key), sizeof(__u32),
+ MAX_ENTRIES));
+
+ return 1;
+}
+
+#define INNER_MAX_ENTRIES 1234
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, INNER_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, INNER_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ });
+} m_array_of_maps SEC(".maps") = {
+ .values = { (void *)&inner_map, 0, 0, 0, 0, 0, 0, 0, 0 },
+};
+
+static inline int check_array_of_maps(void)
+{
+ struct bpf_array *array_of_maps = (struct bpf_array *)&m_array_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_array_of_maps;
+ struct bpf_array *inner_map;
+ int key = 0;
+
+ VERIFY(check_default(&array_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(array_of_maps, &key);
+ VERIFY(inner_map != NULL);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+ __array(values, struct inner_map);
+} m_hash_of_maps SEC(".maps") = {
+ .values = {
+ [2] = &inner_map,
+ },
+};
+
+static inline int check_hash_of_maps(void)
+{
+ struct bpf_htab *hash_of_maps = (struct bpf_htab *)&m_hash_of_maps;
+ struct bpf_map *map = (struct bpf_map *)&m_hash_of_maps;
+ struct bpf_htab *inner_map;
+ int key = 2;
+
+ VERIFY(check_default(&hash_of_maps->map, map));
+ inner_map = bpf_map_lookup_elem(hash_of_maps, &key);
+ VERIFY(inner_map != NULL);
+ VERIFY(inner_map->map.max_entries == INNER_MAX_ENTRIES);
+
+ return 1;
+}
+
+struct bpf_dtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap SEC(".maps");
+
+static inline int check_devmap(void)
+{
+ struct bpf_dtab *devmap = (struct bpf_dtab *)&m_devmap;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap;
+
+ VERIFY(check_default(&devmap->map, map));
+
+ return 1;
+}
+
+struct bpf_stab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockmap SEC(".maps");
+
+static inline int check_sockmap(void)
+{
+ struct bpf_stab *sockmap = (struct bpf_stab *)&m_sockmap;
+ struct bpf_map *map = (struct bpf_map *)&m_sockmap;
+
+ VERIFY(check_default(&sockmap->map, map));
+
+ return 1;
+}
+
+struct bpf_cpu_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_cpumap SEC(".maps");
+
+static inline int check_cpumap(void)
+{
+ struct bpf_cpu_map *cpumap = (struct bpf_cpu_map *)&m_cpumap;
+ struct bpf_map *map = (struct bpf_map *)&m_cpumap;
+
+ VERIFY(check_default(&cpumap->map, map));
+
+ return 1;
+}
+
+struct xsk_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_xskmap SEC(".maps");
+
+static inline int check_xskmap(void)
+{
+ struct xsk_map *xskmap = (struct xsk_map *)&m_xskmap;
+ struct bpf_map *map = (struct bpf_map *)&m_xskmap;
+
+ VERIFY(check_default(&xskmap->map, map));
+
+ return 1;
+}
+
+struct bpf_shtab {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sockhash SEC(".maps");
+
+static inline int check_sockhash(void)
+{
+ struct bpf_shtab *sockhash = (struct bpf_shtab *)&m_sockhash;
+ struct bpf_map *map = (struct bpf_map *)&m_sockhash;
+
+ VERIFY(check_default(&sockhash->map, map));
+
+ return 1;
+}
+
+struct bpf_cgroup_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_cgroup_storage SEC(".maps");
+
+static inline int check_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_cgroup_storage;
+
+ VERIFY(check(&cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct reuseport_array {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_reuseport_sockarray SEC(".maps");
+
+static inline int check_reuseport_sockarray(void)
+{
+ struct reuseport_array *reuseport_sockarray =
+ (struct reuseport_array *)&m_reuseport_sockarray;
+ struct bpf_map *map = (struct bpf_map *)&m_reuseport_sockarray;
+
+ VERIFY(check_default(&reuseport_sockarray->map, map));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, __u32);
+} m_percpu_cgroup_storage SEC(".maps");
+
+static inline int check_percpu_cgroup_storage(void)
+{
+ struct bpf_cgroup_storage_map *percpu_cgroup_storage =
+ (struct bpf_cgroup_storage_map *)&m_percpu_cgroup_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_percpu_cgroup_storage;
+
+ VERIFY(check(&percpu_cgroup_storage->map, map,
+ sizeof(struct bpf_cgroup_storage_key), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct bpf_queue_stack {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_queue SEC(".maps");
+
+static inline int check_queue(void)
+{
+ struct bpf_queue_stack *queue = (struct bpf_queue_stack *)&m_queue;
+ struct bpf_map *map = (struct bpf_map *)&m_queue;
+
+ VERIFY(check(&queue->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(value, __u32);
+} m_stack SEC(".maps");
+
+static inline int check_stack(void)
+{
+ struct bpf_queue_stack *stack = (struct bpf_queue_stack *)&m_stack;
+ struct bpf_map *map = (struct bpf_map *)&m_stack;
+
+ VERIFY(check(&stack->map, map, 0, sizeof(__u32), MAX_ENTRIES));
+
+ return 1;
+}
+
+struct bpf_local_storage_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_sk_storage SEC(".maps");
+
+static inline int check_sk_storage(void)
+{
+ struct bpf_local_storage_map *sk_storage =
+ (struct bpf_local_storage_map *)&m_sk_storage;
+ struct bpf_map *map = (struct bpf_map *)&m_sk_storage;
+
+ VERIFY(check(&sk_storage->map, map, sizeof(__u32), sizeof(__u32), 0));
+
+ return 1;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(max_entries, MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} m_devmap_hash SEC(".maps");
+
+static inline int check_devmap_hash(void)
+{
+ struct bpf_dtab *devmap_hash = (struct bpf_dtab *)&m_devmap_hash;
+ struct bpf_map *map = (struct bpf_map *)&m_devmap_hash;
+
+ VERIFY(check_default(&devmap_hash->map, map));
+
+ return 1;
+}
+
+struct bpf_ringbuf_map {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} m_ringbuf SEC(".maps");
+
+static inline int check_ringbuf(void)
+{
+ struct bpf_ringbuf_map *ringbuf = (struct bpf_ringbuf_map *)&m_ringbuf;
+ struct bpf_map *map = (struct bpf_map *)&m_ringbuf;
+
+ VERIFY(check(&ringbuf->map, map, 0, 0, page_size));
+
+ return 1;
+}
+
+SEC("cgroup_skb/egress")
+int cg_skb(void *ctx)
+{
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH, check_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY, check_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PROG_ARRAY, check_prog_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, check_perf_event_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_HASH, check_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, check_percpu_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK_TRACE, check_stack_trace);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, check_cgroup_array);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_HASH, check_lru_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, check_lru_percpu_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_LPM_TRIE, check_lpm_trie);
+ VERIFY_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, check_array_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, check_hash_of_maps);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP, check_devmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKMAP, check_sockmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_CPUMAP, check_cpumap);
+ VERIFY_TYPE(BPF_MAP_TYPE_XSKMAP, check_xskmap);
+ VERIFY_TYPE(BPF_MAP_TYPE_SOCKHASH, check_sockhash);
+ VERIFY_TYPE(BPF_MAP_TYPE_CGROUP_STORAGE, check_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+ check_reuseport_sockarray);
+ VERIFY_TYPE(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
+ check_percpu_cgroup_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_QUEUE, check_queue);
+ VERIFY_TYPE(BPF_MAP_TYPE_STACK, check_stack);
+ VERIFY_TYPE(BPF_MAP_TYPE_SK_STORAGE, check_sk_storage);
+ VERIFY_TYPE(BPF_MAP_TYPE_DEVMAP_HASH, check_devmap_hash);
+ VERIFY_TYPE(BPF_MAP_TYPE_RINGBUF, check_ringbuf);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_unused.c b/tools/testing/selftests/bpf/progs/metadata_unused.c
new file mode 100644
index 000000000000..672a0d19f8d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_unused.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "foo";
+volatile const int bpf_metadata_b SEC(".rodata") = 1;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/metadata_used.c b/tools/testing/selftests/bpf/progs/metadata_used.c
new file mode 100644
index 000000000000..b7198e65383d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/metadata_used.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+volatile const char bpf_metadata_a[] SEC(".rodata") = "bar";
+volatile const int bpf_metadata_b SEC(".rodata") = 2;
+
+SEC("cgroup_skb/egress")
+int prog(struct xdp_md *ctx)
+{
+ return bpf_metadata_b ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe.c b/tools/testing/selftests/bpf/progs/missed_kprobe.c
new file mode 100644
index 000000000000..7f9ef701f5de
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
new file mode 100644
index 000000000000..8ea71cbd6c45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_kprobe_recursion.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod_kfunc.h"
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_kfunc_common_test();
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_kfunc_common_test")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.multi/bpf_kfunc_common_test")
+int test5(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/missed_tp_recursion.c b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
new file mode 100644
index 000000000000..762385f827c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/missed_tp_recursion.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+/*
+ * No tests in here, just to trigger 'bpf_fentry_test*'
+ * through tracing test_run
+ */
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(trigger)
+{
+ return 0;
+}
+
+SEC("kprobe/bpf_fentry_test1")
+int test1(struct pt_regs *ctx)
+{
+ bpf_printk("test");
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test2(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test3(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("tp/bpf_trace/bpf_trace_printk")
+int test4(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/modify_return.c b/tools/testing/selftests/bpf/progs/modify_return.c
index 8b7466a15c6b..3376d4849f58 100644
--- a/tools/testing/selftests/bpf/progs/modify_return.c
+++ b/tools/testing/selftests/bpf/progs/modify_return.c
@@ -47,3 +47,43 @@ int BPF_PROG(fexit_test, int a, __u64 b, int ret)
return 0;
}
+
+static int sequence2;
+
+__u64 fentry_result2 = 0;
+SEC("fentry/bpf_modify_return_test2")
+int BPF_PROG(fentry_test2, int a, int *b, short c, int d, void *e, char f,
+ int g)
+{
+ sequence2++;
+ fentry_result2 = (sequence2 == 1);
+ return 0;
+}
+
+__u64 fmod_ret_result2 = 0;
+SEC("fmod_ret/bpf_modify_return_test2")
+int BPF_PROG(fmod_ret_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* This is the first fmod_ret program, the ret passed should be 0 */
+ fmod_ret_result2 = (sequence2 == 2 && ret == 0);
+ return input_retval;
+}
+
+__u64 fexit_result2 = 0;
+SEC("fexit/bpf_modify_return_test2")
+int BPF_PROG(fexit_test2, int a, int *b, short c, int d, void *e, char f,
+ int g, int ret)
+{
+ sequence2++;
+ /* If the input_reval is non-zero a successful modification should have
+ * occurred.
+ */
+ if (input_retval)
+ fexit_result2 = (sequence2 == 3 && ret == input_retval);
+ else
+ fexit_result2 = (sequence2 == 3 && ret == 29);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcp_sock.c b/tools/testing/selftests/bpf/progs/mptcp_sock.c
new file mode 100644
index 000000000000..91a0d7eff2ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcp_sock.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Tessares SA. */
+/* Copyright (c) 2022, SUSE. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+__u32 token = 0;
+
+struct mptcp_storage {
+ __u32 invoked;
+ __u32 is_mptcp;
+ struct sock *sk;
+ __u32 token;
+ struct sock *first;
+ char ca_name[TCP_CA_NAME_MAX];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct mptcp_storage);
+} socket_storage_map SEC(".maps");
+
+SEC("sockops")
+int _sockops(struct bpf_sock_ops *ctx)
+{
+ struct mptcp_storage *storage;
+ struct mptcp_sock *msk;
+ int op = (int)ctx->op;
+ struct tcp_sock *tsk;
+ struct bpf_sock *sk;
+ bool is_mptcp;
+
+ if (op != BPF_SOCK_OPS_TCP_CONNECT_CB)
+ return 1;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ tsk = bpf_skc_to_tcp_sock(sk);
+ if (!tsk)
+ return 1;
+
+ is_mptcp = bpf_core_field_exists(tsk->is_mptcp) ? tsk->is_mptcp : 0;
+ if (!is_mptcp) {
+ storage = bpf_sk_storage_get(&socket_storage_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ storage->token = 0;
+ __builtin_memset(storage->ca_name, 0, TCP_CA_NAME_MAX);
+ storage->first = NULL;
+ } else {
+ msk = bpf_skc_to_mptcp_sock(sk);
+ if (!msk)
+ return 1;
+
+ storage = bpf_sk_storage_get(&socket_storage_map, msk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!storage)
+ return 1;
+
+ storage->token = msk->token;
+ __builtin_memcpy(storage->ca_name, msk->ca_name, TCP_CA_NAME_MAX);
+ storage->first = msk->first;
+ }
+ storage->invoked++;
+ storage->is_mptcp = is_mptcp;
+ storage->sk = (struct sock *)sk;
+
+ return 1;
+}
+
+SEC("fentry/mptcp_pm_new_connection")
+int BPF_PROG(trace_mptcp_pm_new_connection, struct mptcp_sock *msk,
+ const struct sock *ssk, int server_side)
+{
+ if (!server_side)
+ token = msk->token;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/mptcpify.c b/tools/testing/selftests/bpf/progs/mptcpify.c
new file mode 100644
index 000000000000..53301ae8a8f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/mptcpify.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023, SUSE. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fmod_ret/update_socket_protocol")
+int BPF_PROG(mptcpify, int family, int type, int protocol)
+{
+ if ((family == AF_INET || family == AF_INET6) &&
+ type == SOCK_STREAM &&
+ (!protocol || protocol == IPPROTO_TCP)) {
+ return IPPROTO_MPTCP;
+ }
+
+ return protocol;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_common.h b/tools/testing/selftests/bpf/progs/nested_trust_common.h
new file mode 100644
index 000000000000..83d33931136e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _NESTED_TRUST_COMMON_H
+#define _NESTED_TRUST_COMMON_H
+
+#include <stdbool.h>
+
+bool bpf_cpumask_test_cpu(unsigned int cpu, const struct cpumask *cpumask) __ksym;
+bool bpf_cpumask_first_zero(const struct cpumask *cpumask) __ksym;
+
+#endif /* _NESTED_TRUST_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_failure.c b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
new file mode 100644
index 000000000000..ea39497f11ed
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_failure.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R2 must be")
+int BPF_PROG(test_invalid_nested_user_cpus, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_test_cpu(0, task->user_cpus_ptr);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R1 must have zero offset when passed to release func or trusted arg to kfunc")
+int BPF_PROG(test_invalid_nested_offset, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_first_zero(&task->cpus_mask);
+ return 0;
+}
+
+/* Although R2 is of type sk_buff but sock_common is expected, we will hit untrusted ptr first. */
+SEC("tp_btf/tcp_probe")
+__failure __msg("R2 type=untrusted_ptr_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+int BPF_PROG(test_invalid_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->next, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/nested_trust_success.c b/tools/testing/selftests/bpf/progs/nested_trust_success.c
new file mode 100644
index 000000000000..833840bffd3b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/nested_trust_success.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#include "nested_trust_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, u64);
+} sk_storage_map SEC(".maps");
+
+SEC("tp_btf/task_newtask")
+__success
+int BPF_PROG(test_read_cpumask, struct task_struct *task, u64 clone_flags)
+{
+ bpf_cpumask_test_cpu(0, task->cpus_ptr);
+ return 0;
+}
+
+SEC("tp_btf/tcp_probe")
+__success
+int BPF_PROG(test_skb_field, struct sock *sk, struct sk_buff *skb)
+{
+ bpf_sk_storage_get(&sk_storage_map, skb->sk, 0, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/netcnt_prog.c b/tools/testing/selftests/bpf/progs/netcnt_prog.c
index d071adf178bd..f9ef8aee56f1 100644
--- a/tools/testing/selftests/bpf/progs/netcnt_prog.c
+++ b/tools/testing/selftests/bpf/progs/netcnt_prog.c
@@ -13,21 +13,20 @@
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct percpu_net_cnt);
+ __type(value, union percpu_net_cnt);
} percpu_netcnt SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
__type(key, struct bpf_cgroup_storage_key);
- __type(value, struct net_cnt);
+ __type(value, union net_cnt);
} netcnt SEC(".maps");
SEC("cgroup/skb")
int bpf_nextcnt(struct __sk_buff *skb)
{
- struct percpu_net_cnt *percpu_cnt;
- char fmt[] = "%d %llu %llu\n";
- struct net_cnt *cnt;
+ union percpu_net_cnt *percpu_cnt;
+ union net_cnt *cnt;
__u64 ts, dt;
int ret;
@@ -68,4 +67,3 @@ int bpf_nextcnt(struct __sk_buff *skb)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = LINUX_VERSION_CODE;
diff --git a/tools/testing/selftests/bpf/progs/netif_receive_skb.c b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
new file mode 100644
index 000000000000..c0062645fc68
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netif_receive_skb.c
@@ -0,0 +1,255 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020, Oracle and/or its affiliates. */
+
+#include "btf_ptr.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#include <errno.h>
+
+long ret = 0;
+int num_subtests = 0;
+int ran_subtests = 0;
+bool skip = false;
+
+#define STRSIZE 2048
+#define EXPECTED_STRSIZE 256
+
+#if defined(bpf_target_s390)
+/* NULL points to a readable struct lowcore on s390, so take the last page */
+#define BADPTR ((void *)0xFFFFFFFFFFFFF000ULL)
+#else
+#define BADPTR 0
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, char[STRSIZE]);
+} strdata SEC(".maps");
+
+static int __strncmp(const void *m1, const void *m2, size_t len)
+{
+ const unsigned char *s1 = m1;
+ const unsigned char *s2 = m2;
+ int i, delta = 0;
+
+ for (i = 0; i < len; i++) {
+ delta = s1[i] - s2[i];
+ if (delta || s1[i] == 0 || s2[i] == 0)
+ break;
+ }
+ return delta;
+}
+
+#if __has_builtin(__builtin_btf_type_id)
+#define TEST_BTF(_str, _type, _flags, _expected, ...) \
+ do { \
+ static const char _expectedval[EXPECTED_STRSIZE] = \
+ _expected; \
+ __u64 _hflags = _flags | BTF_F_COMPACT; \
+ static _type _ptrdata = __VA_ARGS__; \
+ static struct btf_ptr _ptr = { }; \
+ int _cmp; \
+ \
+ ++num_subtests; \
+ if (ret < 0) \
+ break; \
+ ++ran_subtests; \
+ _ptr.ptr = &_ptrdata; \
+ _ptr.type_id = bpf_core_type_id_kernel(_type); \
+ if (_ptr.type_id <= 0) { \
+ ret = -EINVAL; \
+ break; \
+ } \
+ ret = bpf_snprintf_btf(_str, STRSIZE, \
+ &_ptr, sizeof(_ptr), _hflags); \
+ if (ret) \
+ break; \
+ _cmp = __strncmp(_str, _expectedval, EXPECTED_STRSIZE); \
+ if (_cmp != 0) { \
+ bpf_printk("(%d) got %s", _cmp, _str); \
+ bpf_printk("(%d) expected %s", _cmp, \
+ _expectedval); \
+ ret = -EBADMSG; \
+ break; \
+ } \
+ } while (0)
+#endif
+
+/* Use where expected data string matches its stringified declaration */
+#define TEST_BTF_C(_str, _type, _flags, ...) \
+ TEST_BTF(_str, _type, _flags, "(" #_type ")" #__VA_ARGS__, \
+ __VA_ARGS__)
+
+/* TRACE_EVENT(netif_receive_skb,
+ * TP_PROTO(struct sk_buff *skb),
+ */
+SEC("tp_btf/netif_receive_skb")
+int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
+{
+ static __u64 flags[] = { 0, BTF_F_COMPACT, BTF_F_ZERO, BTF_F_PTR_RAW,
+ BTF_F_NONAME, BTF_F_COMPACT | BTF_F_ZERO |
+ BTF_F_PTR_RAW | BTF_F_NONAME };
+ static struct btf_ptr p = { };
+ __u32 key = 0;
+ int i, __ret;
+ char *str;
+
+#if __has_builtin(__builtin_btf_type_id)
+ str = bpf_map_lookup_elem(&strdata, &key);
+ if (!str)
+ return 0;
+
+ /* Ensure we can write skb string representation */
+ p.type_id = bpf_core_type_id_kernel(struct sk_buff);
+ p.ptr = skb;
+ for (i = 0; i < ARRAY_SIZE(flags); i++) {
+ ++num_subtests;
+ ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (ret < 0)
+ bpf_printk("returned %d when writing skb", ret);
+ ++ran_subtests;
+ }
+
+ /* Check invalid ptr value */
+ p.ptr = BADPTR;
+ __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
+ if (__ret >= 0) {
+ bpf_printk("printing %llx should generate error, got (%d)",
+ (unsigned long long)BADPTR, __ret);
+ ret = -ERANGE;
+ }
+
+ /* Verify type display for various types. */
+
+ /* simple int */
+ TEST_BTF_C(str, int, 0, 1234);
+ TEST_BTF(str, int, BTF_F_NONAME, "1234", 1234);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, int, 0, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, int, BTF_F_ZERO, "(int)0", 0);
+ TEST_BTF(str, int, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+ TEST_BTF_C(str, int, 0, -4567);
+ TEST_BTF(str, int, BTF_F_NONAME, "-4567", -4567);
+
+ /* simple char */
+ TEST_BTF_C(str, char, 0, 100);
+ TEST_BTF(str, char, BTF_F_NONAME, "100", 100);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, char, 0, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, char, BTF_F_ZERO, "(char)0", 0);
+ TEST_BTF(str, char, BTF_F_NONAME | BTF_F_ZERO, "0", 0);
+
+ /* simple typedef */
+ TEST_BTF_C(str, uint64_t, 0, 100);
+ TEST_BTF(str, u64, BTF_F_NONAME, "1", 1);
+ /* zero value should be printed at toplevel */
+ TEST_BTF(str, u64, 0, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME, "0", 0);
+ TEST_BTF(str, u64, BTF_F_ZERO, "(u64)0", 0);
+ TEST_BTF(str, u64, BTF_F_NONAME|BTF_F_ZERO, "0", 0);
+
+ /* typedef struct */
+ TEST_BTF_C(str, atomic_t, 0, {.counter = (int)1,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{1,}", {.counter = 1,});
+ /* typedef with 0 value should be printed at toplevel */
+ TEST_BTF(str, atomic_t, 0, "(atomic_t){}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME, "{}", {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_ZERO, "(atomic_t){.counter = (int)0,}",
+ {.counter = 0,});
+ TEST_BTF(str, atomic_t, BTF_F_NONAME|BTF_F_ZERO,
+ "{0,}", {.counter = 0,});
+
+ /* enum where enum value does (and does not) exist */
+ TEST_BTF_C(str, enum bpf_cmd, 0, BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, 0, "(enum bpf_cmd)BPF_MAP_CREATE", 0);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", 0);
+
+ TEST_BTF(str, enum bpf_cmd, BTF_F_ZERO, "(enum bpf_cmd)BPF_MAP_CREATE",
+ BPF_MAP_CREATE);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME|BTF_F_ZERO,
+ "BPF_MAP_CREATE", BPF_MAP_CREATE);
+ TEST_BTF_C(str, enum bpf_cmd, 0, 2000);
+ TEST_BTF(str, enum bpf_cmd, BTF_F_NONAME, "2000", 2000);
+
+ /* simple struct */
+ TEST_BTF_C(str, struct btf_enum, 0,
+ {.name_off = (__u32)3,.val = (__s32)-1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{3,-1,}",
+ { .name_off = 3, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{-1,}",
+ { .name_off = 0, .val = -1,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME|BTF_F_ZERO, "{0,-1,}",
+ { .name_off = 0, .val = -1,});
+ /* empty struct should be printed */
+ TEST_BTF(str, struct btf_enum, 0, "(struct btf_enum){}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_NONAME, "{}",
+ { .name_off = 0, .val = 0,});
+ TEST_BTF(str, struct btf_enum, BTF_F_ZERO,
+ "(struct btf_enum){.name_off = (__u32)0,.val = (__s32)0,}",
+ { .name_off = 0, .val = 0,});
+
+ /* struct with pointers */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){.next = (struct list_head *)0x0000000000000001,}",
+ { .next = (struct list_head *)1 });
+ /* NULL pointer should not be displayed */
+ TEST_BTF(str, struct list_head, BTF_F_PTR_RAW,
+ "(struct list_head){}",
+ { .next = (struct list_head *)0 });
+
+ /* struct with char array */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])['f','o','o',],}",
+ { .name = "foo",});
+ TEST_BTF(str, struct bpf_prog_info, BTF_F_NONAME,
+ "{['f','o','o',],}",
+ {.name = "foo",});
+ /* leading null char means do not display string */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){}",
+ {.name = {'\0', 'f', 'o', 'o'}});
+ /* handle non-printable characters */
+ TEST_BTF(str, struct bpf_prog_info, 0,
+ "(struct bpf_prog_info){.name = (char[])[1,2,3,],}",
+ { .name = {1, 2, 3, 0}});
+
+ /* struct with non-char array */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,2,3,4,5,],}",
+ { .cb = {1, 2, 3, 4, 5,},});
+ TEST_BTF(str, struct __sk_buff, BTF_F_NONAME,
+ "{[1,2,3,4,5,],}",
+ { .cb = { 1, 2, 3, 4, 5},});
+ /* For non-char, arrays, show non-zero values only */
+ TEST_BTF(str, struct __sk_buff, 0,
+ "(struct __sk_buff){.cb = (__u32[])[1,],}",
+ { .cb = { 0, 0, 1, 0, 0},});
+
+ /* struct with bitfields */
+ TEST_BTF_C(str, struct bpf_insn, 0,
+ {.code = (__u8)1,.dst_reg = (__u8)0x2,.src_reg = (__u8)0x3,.off = (__s16)4,.imm = (__s32)5,});
+ TEST_BTF(str, struct bpf_insn, BTF_F_NONAME, "{1,0x2,0x3,4,5,}",
+ {.code = 1, .dst_reg = 0x2, .src_reg = 0x3, .off = 4,
+ .imm = 5,});
+#else
+ skip = true;
+#endif
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/netns_cookie_prog.c b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
new file mode 100644
index 000000000000..aeff3a4f9287
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/netns_cookie_prog.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+#define AF_INET6 10
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sockops_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_msg_netns_cookies SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sockops")
+int get_netns_cookie_sockops(struct bpf_sock_ops *ctx)
+{
+ struct bpf_sock *sk = ctx->sk;
+ int *cookie;
+ __u32 key = 0;
+
+ if (ctx->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ switch (ctx->op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ cookie = bpf_sk_storage_get(&sockops_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(ctx);
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ bpf_sock_map_update(ctx, &sock_map, &key, BPF_NOEXIST);
+ break;
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+SEC("sk_msg")
+int get_netns_cookie_sk_msg(struct sk_msg_md *msg)
+{
+ struct bpf_sock *sk = msg->sk;
+ int *cookie;
+
+ if (msg->family != AF_INET6)
+ return 1;
+
+ if (!sk)
+ return 1;
+
+ cookie = bpf_sk_storage_get(&sk_msg_netns_cookies, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!cookie)
+ return 1;
+
+ *cookie = bpf_get_netns_cookie(msg);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c
new file mode 100644
index 000000000000..a45c9299552c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct node_data {
+ __u64 data;
+ struct bpf_list_node node;
+};
+
+struct map_value {
+ struct bpf_list_head head __contains(node_data, node);
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 1);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+bool done = false;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int add_to_list_in_array(void *ctx)
+{
+ struct map_value *value;
+ struct node_data *new;
+ int zero = 0;
+
+ if (done || (int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ value = bpf_map_lookup_elem(&array, &zero);
+ if (!value)
+ return 0;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new)
+ return 0;
+
+ bpf_spin_lock(&value->lock);
+ bpf_list_push_back(&value->head, &new->node);
+ bpf_spin_unlock(&value->lock);
+ done = true;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_array.c b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
new file mode 100644
index 000000000000..37c2d2608ec0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_array.c
@@ -0,0 +1,190 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Update percpu data */
+SEC("?fentry/bpf_fentry_test2")
+int BPF_PROG(test_array_map_2)
+{
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data */
+SEC("?fentry/bpf_fentry_test3")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+/* Explicitly free allocated percpu data */
+SEC("?fentry/bpf_fentry_test4")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ /* delete */
+ p = bpf_kptr_xchg(&e->pc, NULL);
+ if (p) {
+ bpf_percpu_obj_drop(p);
+ }
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+int BPF_PROG(test_array_map_10)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ int i, index = 0;
+ struct val_t *v;
+ struct elem *e;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ bpf_rcu_read_lock();
+ p = e->pc;
+ if (!p) {
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ goto out;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1) {
+ /* race condition */
+ bpf_percpu_obj_drop(p1);
+ }
+ }
+
+ v = bpf_this_cpu_ptr(p);
+ v->c = 3;
+ v = bpf_this_cpu_ptr(p);
+ v->c = 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ goto out;
+ v->c = 1;
+ v->d = 2;
+
+ /* delete */
+ p1 = bpf_kptr_xchg(&e->pc, NULL);
+ if (!p1)
+ goto out;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ /* finally release p */
+ bpf_percpu_obj_drop(p1);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
new file mode 100644
index 000000000000..a2acf9aa6c24
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_cgrp_local_storage.c
@@ -0,0 +1,109 @@
+#include "bpf_experimental.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGRP_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct elem);
+} cgrp SEC(".maps");
+
+const volatile int nr_cpus;
+
+/* Initialize the percpu object */
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test_cgrp_local_storage_1)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+/* Percpu data collection */
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG(test_cgrp_local_storage_2)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ v = bpf_per_cpu_ptr(p, 0);
+ if (!v)
+ return 0;
+ v->c = 1;
+ v->d = 2;
+ return 0;
+}
+
+int cpu0_field_d, sum_field_c;
+int my_pid;
+
+/* Summarize percpu data collection */
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG(test_cgrp_local_storage_3)
+{
+ struct task_struct *task;
+ struct val_t __percpu_kptr *p;
+ struct val_t *v;
+ struct elem *e;
+ int i;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != my_pid)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ e = bpf_cgrp_storage_get(&cgrp, task->cgroups->dfl_cgrp, 0, 0);
+ if (!e)
+ return 0;
+
+ p = e->pc;
+ if (!p)
+ return 0;
+
+ bpf_for(i, 0, nr_cpus) {
+ v = bpf_per_cpu_ptr(p, i);
+ if (v) {
+ if (i == 0)
+ cpu0_field_d = v->d;
+ sum_field_c += v->c;
+ }
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
new file mode 100644
index 000000000000..f2b8eb2ff76f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/percpu_alloc_fail.c
@@ -0,0 +1,182 @@
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct val_t {
+ long b, c, d;
+};
+
+struct val2_t {
+ long b;
+};
+
+struct val_with_ptr_t {
+ char *p;
+};
+
+struct val_with_rb_root_t {
+ struct bpf_spin_lock lock;
+};
+
+struct val_600b_t {
+ char b[600];
+};
+
+struct elem {
+ long sum;
+ struct val_t __percpu_kptr *pc;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+long ret;
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("store to referenced kptr disallowed")
+int BPF_PROG(test_array_map_1)
+{
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ e->pc = (struct val_t __percpu_kptr *)ret;
+ return 0;
+}
+
+SEC("?fentry/bpf_fentry_test1")
+__failure __msg("invalid kptr access, R2 type=percpu_ptr_val2_t expected=ptr_val_t")
+int BPF_PROG(test_array_map_2)
+{
+ struct val2_t __percpu_kptr *p2;
+ struct val_t __percpu_kptr *p;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p2 = bpf_percpu_obj_new(struct val2_t);
+ if (!p2)
+ return 0;
+
+ p = bpf_kptr_xchg(&e->pc, p2);
+ if (p)
+ bpf_percpu_obj_drop(p);
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("R1 type=scalar expected=percpu_ptr_, percpu_rcu_ptr_, percpu_trusted_ptr_")
+int BPF_PROG(test_array_map_3)
+{
+ struct val_t __percpu_kptr *p, *p1;
+ struct val_t *v;
+ struct elem *e;
+ int index = 0;
+
+ e = bpf_map_lookup_elem(&array, &index);
+ if (!e)
+ return 0;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ p1 = bpf_kptr_xchg(&e->pc, p);
+ if (p1)
+ bpf_percpu_obj_drop(p1);
+
+ v = bpf_this_cpu_ptr(p);
+ ret = v->b;
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_percpu_obj_drop_impl()")
+int BPF_PROG(test_array_map_4)
+{
+ struct val_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("arg#0 expected for bpf_obj_drop_impl()")
+int BPF_PROG(test_array_map_5)
+{
+ struct val_t *p;
+
+ p = bpf_obj_new(struct val_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must be of a struct of scalars")
+int BPF_PROG(test_array_map_6)
+{
+ struct val_with_ptr_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_ptr_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type ID argument must not contain special fields")
+int BPF_PROG(test_array_map_7)
+{
+ struct val_with_rb_root_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_with_rb_root_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_fentry_test1")
+__failure __msg("bpf_percpu_obj_new type size (600) is greater than 512")
+int BPF_PROG(test_array_map_8)
+{
+ struct val_600b_t __percpu_kptr *p;
+
+ p = bpf_percpu_obj_new(struct val_600b_t);
+ if (!p)
+ return 0;
+
+ bpf_percpu_obj_drop(p);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perf_event_stackmap.c b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
new file mode 100644
index 000000000000..f793280a3238
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perf_event_stackmap.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#ifndef PERF_MAX_STACK_DEPTH
+#define PERF_MAX_STACK_DEPTH 127
+#endif
+
+typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 16384);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackdata_map SEC(".maps");
+
+long stackid_kernel = 1;
+long stackid_user = 1;
+long stack_kernel = 1;
+long stack_user = 1;
+
+SEC("perf_event")
+int oncpu(void *ctx)
+{
+ stack_trace_t *trace;
+ __u32 key = 0;
+ long val;
+
+ val = bpf_get_stackid(ctx, &stackmap, 0);
+ if (val >= 0)
+ stackid_kernel = 2;
+ val = bpf_get_stackid(ctx, &stackmap, BPF_F_USER_STACK);
+ if (val >= 0)
+ stackid_user = 2;
+
+ trace = bpf_map_lookup_elem(&stackdata_map, &key);
+ if (!trace)
+ return 0;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), 0);
+ if (val > 0)
+ stack_kernel = 2;
+
+ val = bpf_get_stack(ctx, trace, sizeof(stack_trace_t), BPF_F_USER_STACK);
+ if (val > 0)
+ stack_user = 2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
index e5ab4836a641..29c1639fc78a 100644
--- a/tools/testing/selftests/bpf/progs/perfbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -18,10 +19,9 @@ const volatile int batch_cnt = 0;
long sample_val = 42;
long dropped __attribute__((aligned(128))) = 0;
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_perfbuf(void *ctx)
{
- __u64 *sample;
int i;
for (i = 0; i < batch_cnt; i++) {
diff --git a/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
new file mode 100644
index 000000000000..55907ef961bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/preempted_bpf_ma_op.c
@@ -0,0 +1,106 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+
+struct bin_data {
+ char data[256];
+ struct bpf_spin_lock lock;
+};
+
+struct map_value {
+ struct bin_data __kptr * data;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2048);
+} array SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+bool nomem_err = false;
+
+static int del_array(unsigned int i, int *from)
+{
+ struct map_value *value;
+ struct bin_data *old;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static int add_array(unsigned int i, int *from)
+{
+ struct bin_data *old, *new;
+ struct map_value *value;
+
+ value = bpf_map_lookup_elem(&array, from);
+ if (!value)
+ return 1;
+
+ new = bpf_obj_new(typeof(*new));
+ if (!new) {
+ nomem_err = true;
+ return 1;
+ }
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old)
+ bpf_obj_drop(old);
+
+ (*from)++;
+ return 0;
+}
+
+static void del_then_add_array(int from)
+{
+ int i;
+
+ i = from;
+ bpf_loop(512, del_array, &i, 0);
+
+ i = from;
+ bpf_loop(512, add_array, &i, 0);
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test0, int, a)
+{
+ del_then_add_array(0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test1, int, a, u64, b)
+{
+ del_then_add_array(512);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test2, char, a, int, b, u64, c)
+{
+ del_then_add_array(1024);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test3, void *, a, char, b, int, c, u64, d)
+{
+ del_then_add_array(1536);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c
new file mode 100644
index 000000000000..9085be50f03b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_map.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_QUEUE);
+ __uint(max_entries, 1);
+ __type(value, __u32);
+} priv_map SEC(".maps");
diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c
new file mode 100644
index 000000000000..3c7b2b618c8a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/priv_prog.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("kprobe")
+int kprobe_prog(void *ctx)
+{
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/profiler.h b/tools/testing/selftests/bpf/progs/profiler.h
new file mode 100644
index 000000000000..3bac4fdd4bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.h
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#pragma once
+
+#define TASK_COMM_LEN 16
+#define MAX_ANCESTORS 4
+#define MAX_PATH 256
+#define KILL_TARGET_LEN 64
+#define CTL_MAXNAME 10
+#define MAX_ARGS_LEN 4096
+#define MAX_FILENAME_LEN 512
+#define MAX_ENVIRON_LEN 8192
+#define MAX_PATH_DEPTH 32
+#define MAX_FILEPATH_LENGTH (MAX_PATH_DEPTH * MAX_PATH)
+#define MAX_CGROUPS_PATH_DEPTH 8
+
+#define MAX_METADATA_PAYLOAD_LEN TASK_COMM_LEN
+
+#define MAX_CGROUP_PAYLOAD_LEN \
+ (MAX_PATH * 2 + (MAX_PATH * MAX_CGROUPS_PATH_DEPTH))
+
+#define MAX_CAP_PAYLOAD_LEN (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+
+#define MAX_SYSCTL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + CTL_MAXNAME + MAX_PATH)
+
+#define MAX_KILL_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + TASK_COMM_LEN + \
+ KILL_TARGET_LEN)
+
+#define MAX_EXEC_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILENAME_LEN + \
+ MAX_ARGS_LEN + MAX_ENVIRON_LEN)
+
+#define MAX_FILEMOD_PAYLOAD_LEN \
+ (MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN + MAX_FILEPATH_LENGTH + \
+ MAX_FILEPATH_LENGTH)
+
+enum data_type {
+ INVALID_EVENT,
+ EXEC_EVENT,
+ FORK_EVENT,
+ KILL_EVENT,
+ SYSCTL_EVENT,
+ FILEMOD_EVENT,
+ MAX_DATA_TYPE_EVENT
+};
+
+enum filemod_type {
+ FMOD_OPEN,
+ FMOD_LINK,
+ FMOD_SYMLINK,
+};
+
+struct ancestors_data_t {
+ pid_t ancestor_pids[MAX_ANCESTORS];
+ uint32_t ancestor_exec_ids[MAX_ANCESTORS];
+ uint64_t ancestor_start_times[MAX_ANCESTORS];
+ uint32_t num_ancestors;
+};
+
+struct var_metadata_t {
+ enum data_type type;
+ pid_t pid;
+ uint32_t exec_id;
+ uid_t uid;
+ gid_t gid;
+ uint64_t start_time;
+ uint32_t cpu_id;
+ uint64_t bpf_stats_num_perf_events;
+ uint64_t bpf_stats_start_ktime_ns;
+ uint8_t comm_length;
+};
+
+struct cgroup_data_t {
+ ino_t cgroup_root_inode;
+ ino_t cgroup_proc_inode;
+ uint64_t cgroup_root_mtime;
+ uint64_t cgroup_proc_mtime;
+ uint16_t cgroup_root_length;
+ uint16_t cgroup_proc_length;
+ uint16_t cgroup_full_length;
+ int cgroup_full_path_root_pos;
+};
+
+struct var_sysctl_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ uint8_t sysctl_val_length;
+ uint16_t sysctl_path_length;
+ char payload[MAX_SYSCTL_PAYLOAD_LEN];
+};
+
+struct var_kill_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ struct ancestors_data_t ancestors_info;
+ pid_t kill_target_pid;
+ int kill_sig;
+ uint32_t kill_count;
+ uint64_t last_kill_time;
+ uint8_t kill_target_name_length;
+ uint8_t kill_target_cgroup_proc_length;
+ char payload[MAX_KILL_PAYLOAD_LEN];
+ size_t payload_length;
+};
+
+struct var_exec_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uid_t parent_uid;
+ uint64_t parent_start_time;
+ uint16_t bin_path_length;
+ uint16_t cmdline_length;
+ uint16_t environment_length;
+ char payload[MAX_EXEC_PAYLOAD_LEN];
+};
+
+struct var_fork_data_t {
+ struct var_metadata_t meta;
+ pid_t parent_pid;
+ uint32_t parent_exec_id;
+ uint64_t parent_start_time;
+ char payload[MAX_METADATA_PAYLOAD_LEN];
+};
+
+struct var_filemod_data_t {
+ struct var_metadata_t meta;
+ struct cgroup_data_t cgroup_data;
+ enum filemod_type fmod_type;
+ unsigned int dst_flags;
+ uint32_t src_device_id;
+ uint32_t dst_device_id;
+ ino_t src_inode;
+ ino_t dst_inode;
+ uint16_t src_filepath_length;
+ uint16_t dst_filepath_length;
+ char payload[MAX_FILEMOD_PAYLOAD_LEN];
+};
+
+struct profiler_config_struct {
+ bool fetch_cgroups_from_bpf;
+ ino_t cgroup_fs_inode;
+ ino_t cgroup_login_session_inode;
+ uint64_t kill_signals_mask;
+ ino_t inode_filter;
+ uint32_t stale_info_secs;
+ bool use_variable_buffers;
+ bool read_environ_from_exec;
+ bool enable_cgroup_v1_resolver;
+};
+
+struct bpf_func_stats_data {
+ uint64_t time_elapsed_ns;
+ uint64_t num_executions;
+ uint64_t num_perf_events;
+};
+
+struct bpf_func_stats_ctx {
+ uint64_t start_time_ns;
+ struct bpf_func_stats_data* bpf_func_stats_data_val;
+};
+
+enum bpf_function_id {
+ profiler_bpf_proc_sys_write,
+ profiler_bpf_sched_process_exec,
+ profiler_bpf_sched_process_exit,
+ profiler_bpf_sys_enter_kill,
+ profiler_bpf_do_filp_open_ret,
+ profiler_bpf_sched_process_fork,
+ profiler_bpf_vfs_link,
+ profiler_bpf_vfs_symlink,
+ profiler_bpf_max_function_id
+};
diff --git a/tools/testing/selftests/bpf/progs/profiler.inc.h b/tools/testing/selftests/bpf/progs/profiler.inc.h
new file mode 100644
index 000000000000..6957d9f2805e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler.inc.h
@@ -0,0 +1,963 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include "profiler.h"
+#include "err.h"
+#include "bpf_experimental.h"
+#include "bpf_compiler.h"
+
+#ifndef NULL
+#define NULL 0
+#endif
+
+#define O_WRONLY 00000001
+#define O_RDWR 00000002
+#define O_DIRECTORY 00200000
+#define __O_TMPFILE 020000000
+#define O_TMPFILE (__O_TMPFILE | O_DIRECTORY)
+#define S_IFMT 00170000
+#define S_IFSOCK 0140000
+#define S_IFLNK 0120000
+#define S_IFREG 0100000
+#define S_IFBLK 0060000
+#define S_IFDIR 0040000
+#define S_IFCHR 0020000
+#define S_IFIFO 0010000
+#define S_ISUID 0004000
+#define S_ISGID 0002000
+#define S_ISVTX 0001000
+#define S_ISLNK(m) (((m)&S_IFMT) == S_IFLNK)
+#define S_ISDIR(m) (((m)&S_IFMT) == S_IFDIR)
+#define S_ISCHR(m) (((m)&S_IFMT) == S_IFCHR)
+#define S_ISBLK(m) (((m)&S_IFMT) == S_IFBLK)
+#define S_ISFIFO(m) (((m)&S_IFMT) == S_IFIFO)
+#define S_ISSOCK(m) (((m)&S_IFMT) == S_IFSOCK)
+
+#define KILL_DATA_ARRAY_SIZE 8
+
+struct var_kill_data_arr_t {
+ struct var_kill_data_t array[KILL_DATA_ARRAY_SIZE];
+};
+
+union any_profiler_data_t {
+ struct var_exec_data_t var_exec;
+ struct var_kill_data_t var_kill;
+ struct var_sysctl_data_t var_sysctl;
+ struct var_filemod_data_t var_filemod;
+ struct var_fork_data_t var_fork;
+ struct var_kill_data_arr_t var_kill_data_arr;
+};
+
+volatile struct profiler_config_struct bpf_config = {};
+
+#define FETCH_CGROUPS_FROM_BPF (bpf_config.fetch_cgroups_from_bpf)
+#define CGROUP_FS_INODE (bpf_config.cgroup_fs_inode)
+#define CGROUP_LOGIN_SESSION_INODE \
+ (bpf_config.cgroup_login_session_inode)
+#define KILL_SIGNALS (bpf_config.kill_signals_mask)
+#define STALE_INFO (bpf_config.stale_info_secs)
+#define INODE_FILTER (bpf_config.inode_filter)
+#define READ_ENVIRON_FROM_EXEC (bpf_config.read_environ_from_exec)
+#define ENABLE_CGROUP_V1_RESOLVER (bpf_config.enable_cgroup_v1_resolver)
+
+struct kernfs_iattrs___52 {
+ struct iattr ia_iattr;
+};
+
+struct kernfs_node___52 {
+ union /* kernfs_node_id */ {
+ struct {
+ u32 ino;
+ u32 generation;
+ };
+ u64 id;
+ } id;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, u32);
+ __type(value, union any_profiler_data_t);
+} data_heap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} events SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, KILL_DATA_ARRAY_SIZE);
+ __type(key, u32);
+ __type(value, struct var_kill_data_arr_t);
+} var_tpid_to_data SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, profiler_bpf_max_function_id);
+ __type(key, u32);
+ __type(value, struct bpf_func_stats_data);
+} bpf_func_stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} allowed_devices SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_file_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u64);
+ __type(value, bool);
+ __uint(max_entries, 1024);
+} allowed_directory_inodes SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, u32);
+ __type(value, bool);
+ __uint(max_entries, 16);
+} disallowed_exec_inodes SEC(".maps");
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (int)(sizeof(arr) / sizeof(arr[0]))
+#endif
+
+static INLINE bool IS_ERR(const void* ptr)
+{
+ return IS_ERR_VALUE((unsigned long)ptr);
+}
+
+static INLINE u32 get_userspace_pid()
+{
+ return bpf_get_current_pid_tgid() >> 32;
+}
+
+static INLINE bool is_init_process(u32 tgid)
+{
+ return tgid == 1 || tgid == 0;
+}
+
+static INLINE unsigned long
+probe_read_lim(void* dst, void* src, unsigned long len, unsigned long max)
+{
+ len = len < max ? len : max;
+ if (len > 1) {
+ if (bpf_probe_read_kernel(dst, len, src))
+ return 0;
+ } else if (len == 1) {
+ if (bpf_probe_read_kernel(dst, 1, src))
+ return 0;
+ }
+ return len;
+}
+
+static INLINE int get_var_spid_index(struct var_kill_data_arr_t* arr_struct,
+ int spid)
+{
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == spid)
+ return i;
+ return -1;
+}
+
+static INLINE void populate_ancestors(struct task_struct* task,
+ struct ancestors_data_t* ancestors_data)
+{
+ struct task_struct* parent = task;
+ u32 num_ancestors, ppid;
+
+ ancestors_data->num_ancestors = 0;
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (num_ancestors = 0; num_ancestors < MAX_ANCESTORS; num_ancestors++) {
+ parent = BPF_CORE_READ(parent, real_parent);
+ if (parent == NULL)
+ break;
+ ppid = BPF_CORE_READ(parent, tgid);
+ if (is_init_process(ppid))
+ break;
+ ancestors_data->ancestor_pids[num_ancestors] = ppid;
+ ancestors_data->ancestor_exec_ids[num_ancestors] =
+ BPF_CORE_READ(parent, self_exec_id);
+ ancestors_data->ancestor_start_times[num_ancestors] =
+ BPF_CORE_READ(parent, start_time);
+ ancestors_data->num_ancestors = num_ancestors;
+ }
+}
+
+static INLINE void* read_full_cgroup_path(struct kernfs_node* cgroup_node,
+ struct kernfs_node* cgroup_root_node,
+ void* payload,
+ int* root_pos)
+{
+ void* payload_start = payload;
+ size_t filepart_length;
+
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < MAX_CGROUPS_PATH_DEPTH; i++) {
+ filepart_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(cgroup_node, name));
+ if (!cgroup_node)
+ return payload;
+ if (cgroup_node == cgroup_root_node)
+ *root_pos = payload - payload_start;
+ if (bpf_cmp_likely(filepart_length, <=, MAX_PATH)) {
+ payload += filepart_length;
+ }
+ cgroup_node = BPF_CORE_READ(cgroup_node, parent);
+ }
+ return payload;
+}
+
+static ino_t get_inode_from_kernfs(struct kernfs_node* node)
+{
+ struct kernfs_node___52* node52 = (void*)node;
+
+ if (bpf_core_field_exists(node52->id.ino)) {
+ barrier_var(node52);
+ return BPF_CORE_READ(node52, id.ino);
+ } else {
+ barrier_var(node);
+ return (u64)BPF_CORE_READ(node, id);
+ }
+}
+
+extern bool CONFIG_CGROUP_PIDS __kconfig __weak;
+enum cgroup_subsys_id___local {
+ pids_cgrp_id___local = 123, /* value doesn't matter */
+};
+
+static INLINE void* populate_cgroup_info(struct cgroup_data_t* cgroup_data,
+ struct task_struct* task,
+ void* payload)
+{
+ struct kernfs_node* root_kernfs =
+ BPF_CORE_READ(task, nsproxy, cgroup_ns, root_cset, dfl_cgrp, kn);
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+#if __has_builtin(__builtin_preserve_enum_value)
+ if (ENABLE_CGROUP_V1_RESOLVER && CONFIG_CGROUP_PIDS) {
+ int cgrp_id = bpf_core_enum_value(enum cgroup_subsys_id___local,
+ pids_cgrp_id___local);
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+ struct cgroup_subsys_state* subsys =
+ BPF_CORE_READ(task, cgroups, subsys[i]);
+ if (subsys != NULL) {
+ int subsys_id = BPF_CORE_READ(subsys, ss, id);
+ if (subsys_id == cgrp_id) {
+ proc_kernfs = BPF_CORE_READ(subsys, cgroup, kn);
+ root_kernfs = BPF_CORE_READ(subsys, ss, root, kf_root, kn);
+ break;
+ }
+ }
+ }
+ }
+#endif
+
+ cgroup_data->cgroup_root_inode = get_inode_from_kernfs(root_kernfs);
+ cgroup_data->cgroup_proc_inode = get_inode_from_kernfs(proc_kernfs);
+
+ if (bpf_core_field_exists(root_kernfs->iattr->ia_mtime)) {
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_kernfs, iattr, ia_mtime.tv_nsec);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_kernfs, iattr, ia_mtime.tv_nsec);
+ } else {
+ struct kernfs_iattrs___52* root_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(root_kernfs, iattr);
+ cgroup_data->cgroup_root_mtime =
+ BPF_CORE_READ(root_iattr, ia_iattr.ia_mtime.tv_nsec);
+
+ struct kernfs_iattrs___52* proc_iattr =
+ (struct kernfs_iattrs___52*)BPF_CORE_READ(proc_kernfs, iattr);
+ cgroup_data->cgroup_proc_mtime =
+ BPF_CORE_READ(proc_iattr, ia_iattr.ia_mtime.tv_nsec);
+ }
+
+ cgroup_data->cgroup_root_length = 0;
+ cgroup_data->cgroup_proc_length = 0;
+ cgroup_data->cgroup_full_length = 0;
+
+ size_t cgroup_root_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(root_kernfs, name));
+ if (bpf_cmp_likely(cgroup_root_length, <=, MAX_PATH)) {
+ cgroup_data->cgroup_root_length = cgroup_root_length;
+ payload += cgroup_root_length;
+ }
+
+ size_t cgroup_proc_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(proc_kernfs, name));
+ if (bpf_cmp_likely(cgroup_proc_length, <=, MAX_PATH)) {
+ cgroup_data->cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ if (FETCH_CGROUPS_FROM_BPF) {
+ cgroup_data->cgroup_full_path_root_pos = -1;
+ void* payload_end_pos = read_full_cgroup_path(proc_kernfs, root_kernfs, payload,
+ &cgroup_data->cgroup_full_path_root_pos);
+ cgroup_data->cgroup_full_length = payload_end_pos - payload;
+ payload = payload_end_pos;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE void* populate_var_metadata(struct var_metadata_t* metadata,
+ struct task_struct* task,
+ u32 pid, void* payload)
+{
+ u64 uid_gid = bpf_get_current_uid_gid();
+
+ metadata->uid = (u32)uid_gid;
+ metadata->gid = uid_gid >> 32;
+ metadata->pid = pid;
+ metadata->exec_id = BPF_CORE_READ(task, self_exec_id);
+ metadata->start_time = BPF_CORE_READ(task, start_time);
+ metadata->comm_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
+ metadata->comm_length = comm_length;
+ payload += comm_length;
+ }
+
+ return (void*)payload;
+}
+
+static INLINE struct var_kill_data_t*
+get_var_kill_data(struct pt_regs* ctx, int spid, int tpid, int sig)
+{
+ int zero = 0;
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (kill_data == NULL)
+ return NULL;
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ void* payload = populate_var_metadata(&kill_data->meta, task, spid, kill_data->payload);
+ payload = populate_cgroup_info(&kill_data->cgroup_data, task, payload);
+ size_t payload_length = payload - (void*)kill_data->payload;
+ kill_data->payload_length = payload_length;
+ populate_ancestors(task, &kill_data->ancestors_info);
+ kill_data->meta.type = KILL_EVENT;
+ kill_data->kill_target_pid = tpid;
+ kill_data->kill_sig = sig;
+ kill_data->kill_count = 1;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ return kill_data;
+}
+
+static INLINE int trace_var_sys_kill(void* ctx, int tpid, int sig)
+{
+ if ((KILL_SIGNALS & (1ULL << sig)) == 0)
+ return 0;
+
+ u32 spid = get_userspace_pid();
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+
+ if (arr_struct == NULL) {
+ struct var_kill_data_t* kill_data = get_var_kill_data(ctx, spid, tpid, sig);
+ int zero = 0;
+
+ if (kill_data == NULL)
+ return 0;
+ arr_struct = bpf_map_lookup_elem(&data_heap, &zero);
+ if (arr_struct == NULL)
+ return 0;
+ bpf_probe_read_kernel(&arr_struct->array[0],
+ sizeof(arr_struct->array[0]), kill_data);
+ } else {
+ int index = get_var_spid_index(arr_struct, spid);
+
+ if (index == -1) {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++)
+ if (arr_struct->array[i].meta.pid == 0) {
+ bpf_probe_read_kernel(&arr_struct->array[i],
+ sizeof(arr_struct->array[i]),
+ kill_data);
+ bpf_map_update_elem(&var_tpid_to_data, &tpid,
+ arr_struct, 0);
+
+ return 0;
+ }
+ return 0;
+ }
+
+ struct var_kill_data_t* kill_data = &arr_struct->array[index];
+
+ u64 delta_sec =
+ (bpf_ktime_get_ns() - kill_data->last_kill_time) / 1000000000;
+
+ if (delta_sec < STALE_INFO) {
+ kill_data->kill_count++;
+ kill_data->last_kill_time = bpf_ktime_get_ns();
+ bpf_probe_read_kernel(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ } else {
+ struct var_kill_data_t* kill_data =
+ get_var_kill_data(ctx, spid, tpid, sig);
+ if (kill_data == NULL)
+ return 0;
+ bpf_probe_read_kernel(&arr_struct->array[index],
+ sizeof(arr_struct->array[index]),
+ kill_data);
+ }
+ }
+ bpf_map_update_elem(&var_tpid_to_data, &tpid, arr_struct, 0);
+ return 0;
+}
+
+static INLINE void bpf_stats_enter(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ enum bpf_function_id func_id)
+{
+ int func_id_key = func_id;
+
+ bpf_stat_ctx->start_time_ns = bpf_ktime_get_ns();
+ bpf_stat_ctx->bpf_func_stats_data_val =
+ bpf_map_lookup_elem(&bpf_func_stats, &func_id_key);
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->num_executions++;
+}
+
+static INLINE void bpf_stats_exit(struct bpf_func_stats_ctx* bpf_stat_ctx)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val)
+ bpf_stat_ctx->bpf_func_stats_data_val->time_elapsed_ns +=
+ bpf_ktime_get_ns() - bpf_stat_ctx->start_time_ns;
+}
+
+static INLINE void
+bpf_stats_pre_submit_var_perf_event(struct bpf_func_stats_ctx* bpf_stat_ctx,
+ struct var_metadata_t* meta)
+{
+ if (bpf_stat_ctx->bpf_func_stats_data_val) {
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events++;
+ meta->bpf_stats_num_perf_events =
+ bpf_stat_ctx->bpf_func_stats_data_val->num_perf_events;
+ }
+ meta->bpf_stats_start_ktime_ns = bpf_stat_ctx->start_time_ns;
+ meta->cpu_id = bpf_get_smp_processor_id();
+}
+
+static INLINE size_t
+read_absolute_file_path_from_dentry(struct dentry* filp_dentry, void* payload)
+{
+ size_t length = 0;
+ size_t filepart_length;
+ struct dentry* parent_dentry;
+
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ filepart_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp_dentry, d_name.name));
+ bpf_nop_mov(filepart_length);
+ if (bpf_cmp_unlikely(filepart_length, >, MAX_PATH))
+ break;
+ payload += filepart_length;
+ length += filepart_length;
+
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+
+ return length;
+}
+
+static INLINE bool
+is_ancestor_in_allowed_inodes(struct dentry* filp_dentry)
+{
+ struct dentry* parent_dentry;
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < MAX_PATH_DEPTH; i++) {
+ u64 dir_ino = BPF_CORE_READ(filp_dentry, d_inode, i_ino);
+ bool* allowed_dir = bpf_map_lookup_elem(&allowed_directory_inodes, &dir_ino);
+
+ if (allowed_dir != NULL)
+ return true;
+ parent_dentry = BPF_CORE_READ(filp_dentry, d_parent);
+ if (filp_dentry == parent_dentry)
+ break;
+ filp_dentry = parent_dentry;
+ }
+ return false;
+}
+
+static INLINE bool is_dentry_allowed_for_filemod(struct dentry* file_dentry,
+ u32* device_id,
+ u64* file_ino)
+{
+ u32 dev_id = BPF_CORE_READ(file_dentry, d_sb, s_dev);
+ *device_id = dev_id;
+ bool* allowed_device = bpf_map_lookup_elem(&allowed_devices, &dev_id);
+
+ if (allowed_device == NULL)
+ return false;
+
+ u64 ino = BPF_CORE_READ(file_dentry, d_inode, i_ino);
+ *file_ino = ino;
+ bool* allowed_file = bpf_map_lookup_elem(&allowed_file_inodes, &ino);
+
+ if (allowed_file == NULL)
+ if (!is_ancestor_in_allowed_inodes(BPF_CORE_READ(file_dentry, d_parent)))
+ return false;
+ return true;
+}
+
+SEC("kprobe/proc_sys_write")
+ssize_t BPF_KPROBE(kprobe__proc_sys_write,
+ struct file* filp, const char* buf,
+ size_t count, loff_t* ppos)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_proc_sys_write);
+
+ u32 pid = get_userspace_pid();
+ int zero = 0;
+ struct var_sysctl_data_t* sysctl_data =
+ bpf_map_lookup_elem(&data_heap, &zero);
+ if (!sysctl_data)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ sysctl_data->meta.type = SYSCTL_EVENT;
+ void* payload = populate_var_metadata(&sysctl_data->meta, task, pid, sysctl_data->payload);
+ payload = populate_cgroup_info(&sysctl_data->cgroup_data, task, payload);
+
+ populate_ancestors(task, &sysctl_data->ancestors_info);
+
+ sysctl_data->sysctl_val_length = 0;
+ sysctl_data->sysctl_path_length = 0;
+
+ size_t sysctl_val_length = bpf_probe_read_kernel_str(payload,
+ CTL_MAXNAME, buf);
+ if (bpf_cmp_likely(sysctl_val_length, <=, CTL_MAXNAME)) {
+ sysctl_data->sysctl_val_length = sysctl_val_length;
+ payload += sysctl_val_length;
+ }
+
+ size_t sysctl_path_length =
+ bpf_probe_read_kernel_str(payload, MAX_PATH,
+ BPF_CORE_READ(filp, f_path.dentry,
+ d_name.name));
+ if (bpf_cmp_likely(sysctl_path_length, <=, MAX_PATH)) {
+ sysctl_data->sysctl_path_length = sysctl_path_length;
+ payload += sysctl_path_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &sysctl_data->meta);
+ unsigned long data_len = payload - (void*)sysctl_data;
+ data_len = data_len > sizeof(struct var_sysctl_data_t)
+ ? sizeof(struct var_sysctl_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, sysctl_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("tracepoint/syscalls/sys_enter_kill")
+int tracepoint__syscalls__sys_enter_kill(struct syscall_trace_enter* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sys_enter_kill);
+ int pid = ctx->args[0];
+ int sig = ctx->args[1];
+ int ret = trace_var_sys_kill(ctx, pid, sig);
+ bpf_stats_exit(&stats_ctx);
+ return ret;
+};
+
+SEC("raw_tracepoint/sched_process_exit")
+int raw_tracepoint__sched_process_exit(void* ctx)
+{
+ int zero = 0;
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exit);
+
+ u32 tpid = get_userspace_pid();
+
+ struct var_kill_data_arr_t* arr_struct = bpf_map_lookup_elem(&var_tpid_to_data, &tpid);
+ struct var_kill_data_t* kill_data = bpf_map_lookup_elem(&data_heap, &zero);
+
+ if (arr_struct == NULL || kill_data == NULL)
+ goto out;
+
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+ struct kernfs_node* proc_kernfs = BPF_CORE_READ(task, cgroups, dfl_cgrp, kn);
+
+#ifdef UNROLL
+ __pragma_loop_unroll
+#endif
+ for (int i = 0; i < ARRAY_SIZE(arr_struct->array); i++) {
+ struct var_kill_data_t* past_kill_data = &arr_struct->array[i];
+
+ if (past_kill_data != NULL && past_kill_data->kill_target_pid == (pid_t)tpid) {
+ bpf_probe_read_kernel(kill_data, sizeof(*past_kill_data),
+ past_kill_data);
+ void* payload = kill_data->payload;
+ size_t offset = kill_data->payload_length;
+ if (offset >= MAX_METADATA_PAYLOAD_LEN + MAX_CGROUP_PAYLOAD_LEN)
+ return 0;
+ payload += offset;
+
+ kill_data->kill_target_name_length = 0;
+ kill_data->kill_target_cgroup_proc_length = 0;
+
+ size_t comm_length = bpf_core_read_str(payload, TASK_COMM_LEN, &task->comm);
+ if (bpf_cmp_likely(comm_length, <=, TASK_COMM_LEN)) {
+ kill_data->kill_target_name_length = comm_length;
+ payload += comm_length;
+ }
+
+ size_t cgroup_proc_length =
+ bpf_probe_read_kernel_str(payload,
+ KILL_TARGET_LEN,
+ BPF_CORE_READ(proc_kernfs, name));
+ if (bpf_cmp_likely(cgroup_proc_length, <=, KILL_TARGET_LEN)) {
+ kill_data->kill_target_cgroup_proc_length = cgroup_proc_length;
+ payload += cgroup_proc_length;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &kill_data->meta);
+ unsigned long data_len = (void*)payload - (void*)kill_data;
+ data_len = data_len > sizeof(struct var_kill_data_t)
+ ? sizeof(struct var_kill_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, kill_data, data_len);
+ }
+ }
+ bpf_map_delete_elem(&var_tpid_to_data, &tpid);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_exec")
+int raw_tracepoint__sched_process_exec(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_exec);
+
+ struct linux_binprm* bprm = (struct linux_binprm*)ctx->args[2];
+ u64 inode = BPF_CORE_READ(bprm, file, f_inode, i_ino);
+
+ bool* should_filter_binprm = bpf_map_lookup_elem(&disallowed_exec_inodes, &inode);
+ if (should_filter_binprm != NULL)
+ goto out;
+
+ int zero = 0;
+ struct var_exec_data_t* proc_exec_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!proc_exec_data)
+ goto out;
+
+ if (INODE_FILTER && inode != INODE_FILTER)
+ return 0;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ proc_exec_data->meta.type = EXEC_EVENT;
+ proc_exec_data->bin_path_length = 0;
+ proc_exec_data->cmdline_length = 0;
+ proc_exec_data->environment_length = 0;
+ void* payload = populate_var_metadata(&proc_exec_data->meta, task, pid,
+ proc_exec_data->payload);
+ payload = populate_cgroup_info(&proc_exec_data->cgroup_data, task, payload);
+
+ struct task_struct* parent_task = BPF_CORE_READ(task, real_parent);
+ proc_exec_data->parent_pid = BPF_CORE_READ(parent_task, tgid);
+ proc_exec_data->parent_uid = BPF_CORE_READ(parent_task, real_cred, uid.val);
+ proc_exec_data->parent_exec_id = BPF_CORE_READ(parent_task, self_exec_id);
+ proc_exec_data->parent_start_time = BPF_CORE_READ(parent_task, start_time);
+
+ const char* filename = BPF_CORE_READ(bprm, filename);
+ size_t bin_path_length =
+ bpf_probe_read_kernel_str(payload, MAX_FILENAME_LEN, filename);
+ if (bpf_cmp_likely(bin_path_length, <=, MAX_FILENAME_LEN)) {
+ proc_exec_data->bin_path_length = bin_path_length;
+ payload += bin_path_length;
+ }
+
+ void* arg_start = (void*)BPF_CORE_READ(task, mm, arg_start);
+ void* arg_end = (void*)BPF_CORE_READ(task, mm, arg_end);
+ unsigned int cmdline_length = probe_read_lim(payload, arg_start,
+ arg_end - arg_start, MAX_ARGS_LEN);
+
+ if (bpf_cmp_likely(cmdline_length, <=, MAX_ARGS_LEN)) {
+ proc_exec_data->cmdline_length = cmdline_length;
+ payload += cmdline_length;
+ }
+
+ if (READ_ENVIRON_FROM_EXEC) {
+ void* env_start = (void*)BPF_CORE_READ(task, mm, env_start);
+ void* env_end = (void*)BPF_CORE_READ(task, mm, env_end);
+ unsigned long env_len = probe_read_lim(payload, env_start,
+ env_end - env_start, MAX_ENVIRON_LEN);
+ if (cmdline_length <= MAX_ENVIRON_LEN) {
+ proc_exec_data->environment_length = env_len;
+ payload += env_len;
+ }
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &proc_exec_data->meta);
+ unsigned long data_len = payload - (void*)proc_exec_data;
+ data_len = data_len > sizeof(struct var_exec_data_t)
+ ? sizeof(struct var_exec_data_t)
+ : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, proc_exec_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kretprobe/do_filp_open")
+int kprobe_ret__do_filp_open(struct pt_regs* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_do_filp_open_ret);
+
+ struct file* filp = (struct file*)PT_REGS_RC_CORE(ctx);
+
+ if (filp == NULL || IS_ERR(filp))
+ goto out;
+ unsigned int flags = BPF_CORE_READ(filp, f_flags);
+ if ((flags & (O_RDWR | O_WRONLY)) == 0)
+ goto out;
+ if ((flags & O_TMPFILE) > 0)
+ goto out;
+ struct inode* file_inode = BPF_CORE_READ(filp, f_inode);
+ umode_t mode = BPF_CORE_READ(file_inode, i_mode);
+ if (S_ISDIR(mode) || S_ISCHR(mode) || S_ISBLK(mode) || S_ISFIFO(mode) ||
+ S_ISSOCK(mode))
+ goto out;
+
+ struct dentry* filp_dentry = BPF_CORE_READ(filp, f_path.dentry);
+ u32 device_id = 0;
+ u64 file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(filp_dentry, &device_id, &file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_OPEN;
+ filemod_data->dst_flags = flags;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(filp_dentry, payload);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_link")
+int BPF_KPROBE(kprobe__vfs_link,
+ struct dentry* old_dentry, struct mnt_idmap *idmap,
+ struct inode* dir, struct dentry* new_dentry,
+ struct inode** delegated_inode)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_link);
+
+ u32 src_device_id = 0;
+ u64 src_file_ino = 0;
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(old_dentry, &src_device_id, &src_file_ino) &&
+ !is_dentry_allowed_for_filemod(new_dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_LINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = src_file_ino;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = src_device_id;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = read_absolute_file_path_from_dentry(old_dentry, payload);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+
+ len = read_absolute_file_path_from_dentry(new_dentry, payload);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("kprobe/vfs_symlink")
+int BPF_KPROBE(kprobe__vfs_symlink, struct inode* dir, struct dentry* dentry,
+ const char* oldname)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_vfs_symlink);
+
+ u32 dst_device_id = 0;
+ u64 dst_file_ino = 0;
+ if (!is_dentry_allowed_for_filemod(dentry, &dst_device_id, &dst_file_ino))
+ goto out;
+
+ int zero = 0;
+ struct var_filemod_data_t* filemod_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!filemod_data)
+ goto out;
+
+ u32 pid = get_userspace_pid();
+ struct task_struct* task = (struct task_struct*)bpf_get_current_task();
+
+ filemod_data->meta.type = FILEMOD_EVENT;
+ filemod_data->fmod_type = FMOD_SYMLINK;
+ filemod_data->dst_flags = 0;
+ filemod_data->src_inode = 0;
+ filemod_data->dst_inode = dst_file_ino;
+ filemod_data->src_device_id = 0;
+ filemod_data->dst_device_id = dst_device_id;
+ filemod_data->src_filepath_length = 0;
+ filemod_data->dst_filepath_length = 0;
+
+ void* payload = populate_var_metadata(&filemod_data->meta, task, pid,
+ filemod_data->payload);
+ payload = populate_cgroup_info(&filemod_data->cgroup_data, task, payload);
+
+ size_t len = bpf_probe_read_kernel_str(payload, MAX_FILEPATH_LENGTH,
+ oldname);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
+ payload += len;
+ filemod_data->src_filepath_length = len;
+ }
+ len = read_absolute_file_path_from_dentry(dentry, payload);
+ if (bpf_cmp_likely(len, <=, MAX_FILEPATH_LENGTH)) {
+ payload += len;
+ filemod_data->dst_filepath_length = len;
+ }
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &filemod_data->meta);
+ unsigned long data_len = payload - (void*)filemod_data;
+ data_len = data_len > sizeof(*filemod_data) ? sizeof(*filemod_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, filemod_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+
+SEC("raw_tracepoint/sched_process_fork")
+int raw_tracepoint__sched_process_fork(struct bpf_raw_tracepoint_args* ctx)
+{
+ struct bpf_func_stats_ctx stats_ctx;
+ bpf_stats_enter(&stats_ctx, profiler_bpf_sched_process_fork);
+
+ int zero = 0;
+ struct var_fork_data_t* fork_data = bpf_map_lookup_elem(&data_heap, &zero);
+ if (!fork_data)
+ goto out;
+
+ struct task_struct* parent = (struct task_struct*)ctx->args[0];
+ struct task_struct* child = (struct task_struct*)ctx->args[1];
+ fork_data->meta.type = FORK_EVENT;
+
+ void* payload = populate_var_metadata(&fork_data->meta, child,
+ BPF_CORE_READ(child, pid), fork_data->payload);
+ fork_data->parent_pid = BPF_CORE_READ(parent, pid);
+ fork_data->parent_exec_id = BPF_CORE_READ(parent, self_exec_id);
+ fork_data->parent_start_time = BPF_CORE_READ(parent, start_time);
+ bpf_stats_pre_submit_var_perf_event(&stats_ctx, &fork_data->meta);
+
+ unsigned long data_len = payload - (void*)fork_data;
+ data_len = data_len > sizeof(*fork_data) ? sizeof(*fork_data) : data_len;
+ bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU, fork_data, data_len);
+out:
+ bpf_stats_exit(&stats_ctx);
+ return 0;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/profiler1.c b/tools/testing/selftests/bpf/progs/profiler1.c
new file mode 100644
index 000000000000..fb6b13522949
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler1.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define UNROLL
+#define INLINE __always_inline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler2.c b/tools/testing/selftests/bpf/progs/profiler2.c
new file mode 100644
index 000000000000..0f32a3cbf556
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler2.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+/* undef #define UNROLL */
+#define INLINE /**/
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/profiler3.c b/tools/testing/selftests/bpf/progs/profiler3.c
new file mode 100644
index 000000000000..6249fc31ccb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/profiler3.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define barrier_var(var) /**/
+#define UNROLL
+#define INLINE __noinline
+#include "profiler.inc.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf.h b/tools/testing/selftests/bpf/progs/pyperf.h
index cc615b82b56e..86484f07e1d1 100644
--- a/tools/testing/selftests/bpf/progs/pyperf.h
+++ b/tools/testing/selftests/bpf/progs/pyperf.h
@@ -7,6 +7,8 @@
#include <stdbool.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_compiler.h"
#define FUNCTION_NAME_LEN 64
#define FILE_NAME_LEN 128
@@ -67,7 +69,12 @@ typedef struct {
void* co_name; // PyCodeObject.co_name
} FrameData;
-static __always_inline void *get_thread_state(void *tls_base, PidData *pidData)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *get_thread_state(void *tls_base, PidData *pidData)
{
void* thread_state;
int key;
@@ -154,8 +161,61 @@ struct {
__uint(value_size, sizeof(long long) * 127);
} stackmap SEC(".maps");
+#ifdef USE_BPF_LOOP
+struct process_frame_ctx {
+ int cur_cpu;
+ int32_t *symbol_counter;
+ void *frame_ptr;
+ FrameData *frame;
+ PidData *pidData;
+ Symbol *sym;
+ Event *event;
+ bool done;
+};
+
+static int process_frame_callback(__u32 i, struct process_frame_ctx *ctx)
+{
+ int zero = 0;
+ void *frame_ptr = ctx->frame_ptr;
+ PidData *pidData = ctx->pidData;
+ FrameData *frame = ctx->frame;
+ int32_t *symbol_counter = ctx->symbol_counter;
+ int cur_cpu = ctx->cur_cpu;
+ Event *event = ctx->event;
+ Symbol *sym = ctx->sym;
+
+ if (frame_ptr && get_frame_data(frame_ptr, pidData, frame, sym)) {
+ int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
+ int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+
+ if (!symbol_id) {
+ bpf_map_update_elem(&symbolmap, sym, &zero, 0);
+ symbol_id = bpf_map_lookup_elem(&symbolmap, sym);
+ if (!symbol_id) {
+ ctx->done = true;
+ return 1;
+ }
+ }
+ if (*symbol_id == new_symbol_id)
+ (*symbol_counter)++;
+
+ barrier_var(i);
+ if (i >= STACK_MAX_LEN)
+ return 1;
+
+ event->stack[i] = *symbol_id;
+
+ event->stack_len = i + 1;
+ frame_ptr = frame->f_back;
+ }
+ return 0;
+}
+#endif /* USE_BPF_LOOP */
+
#ifdef GLOBAL_FUNC
-__attribute__((noinline))
+__noinline
+#elif defined(SUBPROGS)
+static __noinline
#else
static __always_inline
#endif
@@ -221,13 +281,37 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
int32_t* symbol_counter = bpf_map_lookup_elem(&symbolmap, &sym);
if (symbol_counter == NULL)
return 0;
-#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+#ifdef USE_BPF_LOOP
+ struct process_frame_ctx ctx = {
+ .cur_cpu = cur_cpu,
+ .symbol_counter = symbol_counter,
+ .frame_ptr = frame_ptr,
+ .frame = &frame,
+ .pidData = pidData,
+ .sym = &sym,
+ .event = event,
+ };
+
+ bpf_loop(STACK_MAX_LEN, process_frame_callback, &ctx, 0);
+ if (ctx.done)
+ return 0;
#else
-#pragma clang loop unroll(full)
-#endif
+#if defined(USE_ITER)
+/* no for loop, no unrolling */
+#elif defined(NO_UNROLL)
+ __pragma_loop_no_unroll
+#elif defined(UNROLL_COUNT)
+ __pragma_loop_unroll_count(UNROLL_COUNT)
+#else
+ __pragma_loop_unroll_full
+#endif /* NO_UNROLL */
/* Unwind python stack */
+#ifdef USE_ITER
+ int i;
+ bpf_for(i, 0, STACK_MAX_LEN) {
+#else /* !USE_ITER */
for (int i = 0; i < STACK_MAX_LEN; ++i) {
+#endif
if (frame_ptr && get_frame_data(frame_ptr, pidData, &frame, &sym)) {
int32_t new_symbol_id = *symbol_counter * 64 + cur_cpu;
int32_t *symbol_id = bpf_map_lookup_elem(&symbolmap, &sym);
@@ -244,6 +328,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
frame_ptr = frame.f_back;
}
}
+#endif /* USE_BPF_LOOP */
event->stack_complete = frame_ptr == NULL;
} else {
event->stack_complete = 1;
@@ -261,7 +346,7 @@ int __on_event(struct bpf_raw_tracepoint_args *ctx)
SEC("raw_tracepoint/kfree_skb")
int on_event(struct bpf_raw_tracepoint_args* ctx)
{
- int i, ret = 0;
+ int ret = 0;
ret |= __on_event(ctx);
ret |= __on_event(ctx);
ret |= __on_event(ctx);
diff --git a/tools/testing/selftests/bpf/progs/pyperf180.c b/tools/testing/selftests/bpf/progs/pyperf180.c
index c39f559d3100..42c4a8b62e36 100644
--- a/tools/testing/selftests/bpf/progs/pyperf180.c
+++ b/tools/testing/selftests/bpf/progs/pyperf180.c
@@ -1,4 +1,26 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 180
+
+/* llvm upstream commit at clang18
+ * https://github.com/llvm/llvm-project/commit/1a2e77cf9e11dbf56b5720c607313a566eebb16e
+ * changed inlining behavior and caused compilation failure as some branch
+ * target distance exceeded 16bit representation which is the maximum for
+ * cpu v1/v2/v3. Macro __BPF_CPU_VERSION__ is later implemented in clang18
+ * to specify which cpu version is used for compilation. So a smaller
+ * unroll_count can be set if __BPF_CPU_VERSION__ is less than 4, which
+ * reduced some branch target distances and resolved the compilation failure.
+ *
+ * To capture the case where a developer/ci uses clang18 but the corresponding
+ * repo checkpoint does not have __BPF_CPU_VERSION__, a smaller unroll_count
+ * will be set as well to prevent potential compilation failures.
+ */
+#ifdef __BPF_CPU_VERSION__
+#if __BPF_CPU_VERSION__ < 4
+#define UNROLL_COUNT 90
+#endif
+#elif __clang_major__ == 18
+#define UNROLL_COUNT 90
+#endif
+
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600.c b/tools/testing/selftests/bpf/progs/pyperf600.c
index cb49b89e37cd..ce1aa5189cc4 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600.c
@@ -1,9 +1,12 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
-/* clang will not unroll the loop 600 times.
- * Instead it will unroll it to the amount it deemed
- * appropriate, but the loop will still execute 600 times.
- * Total program size is around 90k insns
+/* Full unroll of 600 iterations will have total
+ * program size close to 298k insns and this may
+ * cause BPF_JMP insn out of 16-bit integer range.
+ * So limit the unroll size to 150 so the
+ * total program size is around 80k insns but
+ * the loop will still execute 600 times.
*/
+#define UNROLL_COUNT 150
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
new file mode 100644
index 000000000000..5c2059dc01af
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_bpf_loop.c
@@ -0,0 +1,6 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#define STACK_MAX_LEN 600
+#define USE_BPF_LOOP
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_iter.c b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
new file mode 100644
index 000000000000..d62e1b200c30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf600_iter.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2023 Meta Platforms, Inc. and affiliates.
+#define STACK_MAX_LEN 600
+#define SUBPROGS
+#define NO_UNROLL
+#define USE_ITER
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
index 6beff7502f4d..520b58c4f8db 100644
--- a/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
+++ b/tools/testing/selftests/bpf/progs/pyperf600_nounroll.c
@@ -2,7 +2,4 @@
// Copyright (c) 2019 Facebook
#define STACK_MAX_LEN 600
#define NO_UNROLL
-/* clang will not unroll at all.
- * Total program size is around 2k insns
- */
#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/pyperf_subprogs.c b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
new file mode 100644
index 000000000000..60e27a7f0cca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/pyperf_subprogs.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define STACK_MAX_LEN 50
+#define SUBPROGS
+#include "pyperf.h"
diff --git a/tools/testing/selftests/bpf/progs/rbtree.c b/tools/testing/selftests/bpf/progs/rbtree.c
new file mode 100644
index 000000000000..b09f4fffe57c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+long less_callback_ran = -1;
+long removed_key = -1;
+long first_data[2] = {-1, -1};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ less_callback_ran = 1;
+
+ return node_a->key < node_b->key;
+}
+
+static long __add_three(struct bpf_rb_root *root, struct bpf_spin_lock *lock)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ m->key = 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ bpf_spin_unlock(&glock);
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 3;
+ n->key = 3;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_add_nodes(void *ctx)
+{
+ return __add_three(&groot, &glock);
+}
+
+SEC("tc")
+long rbtree_add_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ goto err_out;
+ n->key = 5;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 3;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ res = bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+
+ if (!res)
+ return 1;
+
+ n = container_of(res, struct node_data, node);
+ removed_key = n->key;
+ bpf_obj_drop(n);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+SEC("tc")
+long rbtree_first_and_remove(void *ctx)
+{
+ struct bpf_rb_node *res = NULL;
+ struct node_data *n, *m, *o;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 3;
+ n->data = 4;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m)
+ goto err_out;
+ m->key = 5;
+ m->data = 6;
+
+ o = bpf_obj_new(typeof(*o));
+ if (!o)
+ goto err_out;
+ o->key = 1;
+ o->data = 2;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rbtree_add(&groot, &m->node, less);
+ bpf_rbtree_add(&groot, &o->node, less);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 2;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[0] = o->data;
+
+ res = bpf_rbtree_remove(&groot, &o->node);
+ bpf_spin_unlock(&glock);
+
+ if (!res)
+ return 5;
+
+ o = container_of(res, struct node_data, node);
+ removed_key = o->key;
+ bpf_obj_drop(o);
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 3;
+ }
+
+ o = container_of(res, struct node_data, node);
+ first_data[1] = o->data;
+ bpf_spin_unlock(&glock);
+
+ return 0;
+err_out:
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 1;
+}
+
+SEC("tc")
+long rbtree_api_release_aliasing(void *ctx)
+{
+ struct node_data *n, *m, *o;
+ struct bpf_rb_node *res, *res2;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ n->key = 41;
+ n->data = 42;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+
+ /* m and o point to the same node,
+ * but verifier doesn't know this
+ */
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ o = container_of(res, struct node_data, node);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ m = container_of(res, struct node_data, node);
+
+ res = bpf_rbtree_remove(&groot, &m->node);
+ /* Retval of previous remove returns an owning reference to m,
+ * which is the same node non-owning ref o is pointing at.
+ * We can safely try to remove o as the second rbtree_remove will
+ * return NULL since the node isn't in a tree.
+ *
+ * Previously we relied on the verifier type system + rbtree_remove
+ * invalidating non-owning refs to ensure that rbtree_remove couldn't
+ * fail, but now rbtree_remove does runtime checking so we no longer
+ * invalidate non-owning refs after remove.
+ */
+ res2 = bpf_rbtree_remove(&groot, &o->node);
+
+ bpf_spin_unlock(&glock);
+
+ if (res) {
+ o = container_of(res, struct node_data, node);
+ first_data[0] = o->data;
+ bpf_obj_drop(o);
+ }
+ if (res2) {
+ /* The second remove fails, so res2 is null and this doesn't
+ * execute
+ */
+ m = container_of(res2, struct node_data, node);
+ first_data[1] = m->data;
+ bpf_obj_drop(m);
+ }
+ return 0;
+
+err_out:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
new file mode 100644
index 000000000000..60079b202c07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__add_wrong_type.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+struct node_data {
+ int key;
+ int data;
+ struct bpf_rb_node node;
+};
+
+struct node_data2 {
+ int key;
+ struct bpf_rb_node node;
+ int data;
+};
+
+static bool less2(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data2 *node_a;
+ struct node_data2 *node_b;
+
+ node_a = container_of(a, struct node_data2, node);
+ node_b = container_of(b, struct node_data2, node);
+
+ return node_a->key < node_b->key;
+}
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__add_wrong_type(void *ctx)
+{
+ struct node_data2 *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less2);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
new file mode 100644
index 000000000000..7651843f5a80
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_btf_fail__wrong_node_type.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+
+/* BTF load should fail as bpf_rb_root __contains this type and points to
+ * 'node', but 'node' is not a bpf_rb_node
+ */
+struct node_data {
+ int key;
+ int data;
+ struct bpf_list_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+
+SEC("tc")
+long rbtree_api_add__wrong_node_type(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rbtree_fail.c b/tools/testing/selftests/bpf/progs/rbtree_fail.c
new file mode 100644
index 000000000000..3fecf1c6dfe5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rbtree_fail.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_data {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+};
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_data, node);
+private(A) struct bpf_rb_root groot2 __contains(node_data, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_add(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_rbtree_add(&groot, &n->node, less);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_remove(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_rbtree_remove(&groot, &n->node);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("bpf_spin_lock at off=16 must be held for bpf_rb_root")
+long rbtree_api_nolock_first(void *ctx)
+{
+ bpf_rbtree_first(&groot);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_remove_unadded_node(void *ctx)
+{
+ struct node_data *n, *m;
+ struct bpf_rb_node *res;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ m = bpf_obj_new(typeof(*m));
+ if (!m) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ /* This remove should pass verifier */
+ res = bpf_rbtree_remove(&groot, &n->node);
+ n = container_of(res, struct node_data, node);
+
+ /* This remove shouldn't, m isn't in an rbtree */
+ res = bpf_rbtree_remove(&groot, &m->node);
+ m = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ if (n)
+ bpf_obj_drop(n);
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=10")
+long rbtree_api_remove_no_drop(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto unlock_err;
+
+ res = bpf_rbtree_remove(&groot, res);
+
+ if (res) {
+ n = container_of(res, struct node_data, node);
+ __sink(n);
+ }
+ bpf_spin_unlock(&glock);
+
+ /* if (res) { bpf_obj_drop(n); } is missing here */
+ return 0;
+
+unlock_err:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_to_multiple_trees(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+
+ /* This add should fail since n already in groot's tree */
+ bpf_rbtree_add(&groot2, &n->node, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("dereference of modified ptr_or_null_ ptr R2 off=16 disallowed")
+long rbtree_api_use_unchecked_remove_retval(void *ctx)
+{
+ struct bpf_rb_node *res;
+
+ bpf_spin_lock(&glock);
+
+ res = bpf_rbtree_first(&groot);
+ if (!res)
+ goto err_out;
+ res = bpf_rbtree_remove(&groot, res);
+
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* Must check res for NULL before using in rbtree_add below */
+ bpf_rbtree_add(&groot, res, less);
+ bpf_spin_unlock(&glock);
+ return 0;
+
+err_out:
+ bpf_spin_unlock(&glock);
+ return 1;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_add_release_unlock_escape(void *ctx)
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After add() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove node input must be non-owning ref")
+long rbtree_api_first_release_unlock_escape(void *ctx)
+{
+ struct bpf_rb_node *res;
+ struct node_data *n;
+
+ bpf_spin_lock(&glock);
+ res = bpf_rbtree_first(&groot);
+ if (!res) {
+ bpf_spin_unlock(&glock);
+ return 1;
+ }
+ n = container_of(res, struct node_data, node);
+ bpf_spin_unlock(&glock);
+
+ bpf_spin_lock(&glock);
+ /* After first() in previous critical section, n should be
+ * release_on_unlock and released after previous spin_unlock,
+ * so should not be possible to use it here
+ */
+ bpf_rbtree_remove(&groot, &n->node);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+static bool less__bad_fn_call_add(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_add(&groot, &node_a->node, less);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_remove(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_remove(&groot, &node_a->node);
+
+ return node_a->key < node_b->key;
+}
+
+static bool less__bad_fn_call_first_unlock_after(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_data *node_a;
+ struct node_data *node_b;
+
+ node_a = container_of(a, struct node_data, node);
+ node_b = container_of(b, struct node_data, node);
+ bpf_rbtree_first(&groot);
+ bpf_spin_unlock(&glock);
+
+ return node_a->key < node_b->key;
+}
+
+static __always_inline
+long add_with_cb(bool (cb)(struct bpf_rb_node *a, const struct bpf_rb_node *b))
+{
+ struct node_data *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, cb);
+ bpf_spin_unlock(&glock);
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("arg#1 expected pointer to allocated object")
+long rbtree_api_add_bad_cb_bad_fn_call_add(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_add);
+}
+
+SEC("?tc")
+__failure __msg("rbtree_remove not allowed in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_remove(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_remove);
+}
+
+SEC("?tc")
+__failure __msg("can't spin_{lock,unlock} in rbtree cb")
+long rbtree_api_add_bad_cb_bad_fn_call_first_unlock_after(void *ctx)
+{
+ return add_with_cb(less__bad_fn_call_first_unlock_after);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/rcu_read_lock.c b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
new file mode 100644
index 000000000000..ab3a532b7dd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_read_lock.c
@@ -0,0 +1,441 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+__u32 user_data, key_serial, target_pid;
+__u64 flags, task_storage_val, cgroup_id;
+
+struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+void bpf_key_put(struct bpf_key *key) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int get_cgroup_id(void *ctx)
+{
+ struct task_struct *task;
+ struct css_set *cgroups;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ /* simulate bpf_get_current_cgroup_id() helper */
+ bpf_rcu_read_lock();
+ cgroups = task->cgroups;
+ if (!cgroups)
+ goto unlock;
+ cgroup_id = cgroups->dfl_cgrp->kn->id;
+unlock:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_succ(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ long init_val = 2;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ bpf_rcu_read_lock();
+ /* region including helper using rcu ptr real_parent */
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ ptr = bpf_task_storage_get(&map_a, real_parent, &init_val,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ goto out;
+ ptr = bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (!ptr)
+ goto out;
+ task_storage_val = *ptr;
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int no_lock(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* old style ptr_to_btf_id is not allowed in sleepable */
+ task = bpf_get_current_task_btf();
+ real_parent = task->real_parent;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int two_regions(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* two regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_1(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_2(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ bpf_rcu_read_lock();
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int task_acquire(void *ctx)
+{
+ struct task_struct *task, *real_parent, *gparent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+
+ /* rcu_ptr->rcu_field */
+ gparent = real_parent->real_parent;
+ if (!gparent)
+ goto out;
+
+ /* acquire a reference which can be used outside rcu read lock region */
+ gparent = bpf_task_acquire(gparent);
+ if (!gparent)
+ goto out;
+
+ (void)bpf_task_storage_get(&map_a, gparent, 0, 0);
+ bpf_task_release(gparent);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_lock(void *ctx)
+{
+ struct task_struct *task;
+
+ /* missing bpf_rcu_read_lock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int miss_unlock(void *ctx)
+{
+ struct task_struct *task;
+
+ /* missing bpf_rcu_read_unlock() */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, task, 0, 0);
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_getpgid")
+int non_sleepable_rcu_mismatch(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ /* non-sleepable: missing bpf_rcu_read_unlock() in one path */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ if (real_parent)
+ bpf_rcu_read_unlock();
+out:
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int inproper_sleepable_helper(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+ struct pt_regs *regs;
+ __u32 value = 0;
+ void *ptr;
+
+ task = bpf_get_current_task_btf();
+ /* sleepable helper in rcu read lock region */
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ regs = (struct pt_regs *)bpf_task_pt_regs(real_parent);
+ if (!regs)
+ goto out;
+
+ ptr = (void *)PT_REGS_IP(regs);
+ (void)bpf_copy_from_user_task(&value, sizeof(uint32_t), ptr, task, 0);
+ user_data = value;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?lsm.s/bpf")
+int BPF_PROG(inproper_sleepable_kfunc, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct bpf_key *bkey;
+
+ /* sleepable kfunc in rcu read lock region */
+ bpf_rcu_read_lock();
+ bkey = bpf_lookup_user_key(key_serial, flags);
+ bpf_rcu_read_unlock();
+ if (!bkey)
+ return -1;
+ bpf_key_put(bkey);
+
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int nested_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* nested rcu read lock regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ if (!real_parent)
+ goto out;
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+out:
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_trusted_non_rcuptr(void *ctx)
+{
+ struct task_struct *task, *group_leader;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ /* the pointer group_leader is explicitly marked as trusted */
+ group_leader = task->real_parent->group_leader;
+ (void)bpf_task_storage_get(&map_a, group_leader, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int task_untrusted_rcuptr(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ /* helper use of rcu ptr outside the rcu read lock region */
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_nanosleep")
+int cross_rcu_region(void *ctx)
+{
+ struct task_struct *task, *real_parent;
+
+ /* rcu ptr define/use in different regions */
+ task = bpf_get_current_task_btf();
+ bpf_rcu_read_lock();
+ real_parent = task->real_parent;
+ bpf_rcu_read_unlock();
+ bpf_rcu_read_lock();
+ (void)bpf_task_storage_get(&map_a, real_parent, 0, 0);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+__noinline
+static int static_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog(NULL);
+}
+
+__noinline
+static int static_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_lock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_lock(NULL);
+}
+
+__noinline
+static int static_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_unlock();
+ if (bpf_get_prandom_u32())
+ return ret + 42;
+ return ret + bpf_get_prandom_u32();
+}
+
+__noinline
+int global_subprog_unlock(u64 a)
+{
+ volatile int ret = a;
+
+ return ret + static_subprog_unlock(NULL);
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += static_subprog(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ if (bpf_get_prandom_u32())
+ ret += global_subprog(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += static_subprog_lock(ctx);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_lock(void *ctx)
+{
+ volatile int ret = 0;
+
+ ret += global_subprog_lock(ret);
+ bpf_rcu_read_unlock();
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += static_subprog_unlock(ctx);
+ return 0;
+}
+
+SEC("?fentry.s/" SYS_PREFIX "sys_getpgid")
+int rcu_read_lock_global_subprog_unlock(void *ctx)
+{
+ volatile int ret = 0;
+
+ bpf_rcu_read_lock();
+ ret += global_subprog_unlock(ret);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
new file mode 100644
index 000000000000..df4873558634
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/rcu_tasks_trace_gp.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct task_ls_map {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_ls_map SEC(".maps");
+
+long gp_seq;
+
+SEC("syscall")
+int do_call_rcu_tasks_trace(void *ctx)
+{
+ struct task_struct *current;
+ int *v;
+
+ current = bpf_get_current_task_btf();
+ v = bpf_task_storage_get(&task_ls_map, current, NULL, BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!v)
+ return 1;
+ /* Invoke call_rcu_tasks_trace */
+ return bpf_task_storage_delete(&task_ls_map, current);
+}
+
+SEC("kprobe/rcu_tasks_trace_postgp")
+int rcu_tasks_trace_postgp(void *ctx)
+{
+ __sync_add_and_fetch(&gp_seq, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
new file mode 100644
index 000000000000..76556e0b42b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_bpf_task_storage_busy.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022. Huawei Technologies Co., Ltd */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern bool CONFIG_PREEMPT __kconfig __weak;
+extern const int bpf_task_storage_busy __ksym;
+
+char _license[] SEC("license") = "GPL";
+
+int pid = 0;
+int busy = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} task SEC(".maps");
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(read_bpf_task_storage_busy)
+{
+ int *value;
+
+ if (!CONFIG_PREEMPT)
+ return 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ value = bpf_this_cpu_ptr(&bpf_task_storage_busy);
+ if (value)
+ busy = *value;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/read_vsyscall.c b/tools/testing/selftests/bpf/progs/read_vsyscall.c
new file mode 100644
index 000000000000..986f96687ae1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/read_vsyscall.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2024. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+int target_pid = 0;
+void *user_ptr = 0;
+int read_ret[8];
+
+char _license[] SEC("license") = "GPL";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int do_probe_read(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[0] = bpf_probe_read_kernel(buf, sizeof(buf), user_ptr);
+ read_ret[1] = bpf_probe_read_kernel_str(buf, sizeof(buf), user_ptr);
+ read_ret[2] = bpf_probe_read(buf, sizeof(buf), user_ptr);
+ read_ret[3] = bpf_probe_read_str(buf, sizeof(buf), user_ptr);
+ read_ret[4] = bpf_probe_read_user(buf, sizeof(buf), user_ptr);
+ read_ret[5] = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_nanosleep")
+int do_copy_from_user(void *ctx)
+{
+ char buf[8];
+
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ read_ret[6] = bpf_copy_from_user(buf, sizeof(buf), user_ptr);
+ read_ret[7] = bpf_copy_from_user_task(buf, sizeof(buf), user_ptr,
+ bpf_get_current_task_btf(), 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/recursion.c b/tools/testing/selftests/bpf/progs/recursion.c
new file mode 100644
index 000000000000..3c2423bb19e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recursion.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long);
+} hash1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, long);
+} hash2 SEC(".maps");
+
+int pass1 = 0;
+int pass2 = 0;
+
+SEC("fentry/htab_map_delete_elem")
+int BPF_PROG(on_delete, struct bpf_map *map)
+{
+ int key = 0;
+
+ if (map == (void *)&hash1) {
+ pass1++;
+ return 0;
+ }
+ if (map == (void *)&hash2) {
+ pass2++;
+ bpf_map_delete_elem(&hash2, &key);
+ return 0;
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/recvmsg4_prog.c b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
new file mode 100644
index 000000000000..59748c95471a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg4_prog.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <bpf_sockopt_helpers.h>
+
+#define SERV4_IP 0xc0a801feU /* 192.168.1.254 */
+#define SERV4_PORT 4040
+
+SEC("cgroup/recvmsg4")
+int recvmsg4_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ if (sk->family != AF_INET)
+ return 1;
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 1;
+
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
+ ctx->user_ip4 = bpf_htonl(SERV4_IP);
+ ctx->user_port = bpf_htons(SERV4_PORT);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/recvmsg6_prog.c b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
new file mode 100644
index 000000000000..d9a4016596d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg6_prog.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <bpf_sockopt_helpers.h>
+
+#define SERV6_IP_0 0xfaceb00c /* face:b00c:1234:5678::abcd */
+#define SERV6_IP_1 0x12345678
+#define SERV6_IP_2 0x00000000
+#define SERV6_IP_3 0x0000abcd
+#define SERV6_PORT 6060
+
+SEC("cgroup/recvmsg6")
+int recvmsg6_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock *sk;
+
+ sk = ctx->sk;
+ if (!sk)
+ return 1;
+
+ if (sk->family != AF_INET6)
+ return 1;
+
+ if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
+ return 1;
+
+ if (!get_set_sk_priority(ctx))
+ return 1;
+
+ ctx->user_ip6[0] = bpf_htonl(SERV6_IP_0);
+ ctx->user_ip6[1] = bpf_htonl(SERV6_IP_1);
+ ctx->user_ip6[2] = bpf_htonl(SERV6_IP_2);
+ ctx->user_ip6[3] = bpf_htonl(SERV6_IP_3);
+ ctx->user_port = bpf_htons(SERV6_PORT);
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
new file mode 100644
index 000000000000..1c7ab44bccfa
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/recvmsg_unix_prog.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_ADDRESS[] = "\0bpf_cgroup_unix_test";
+
+SEC("cgroup/recvmsg_unix")
+int recvmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_ADDRESS) - 1;
+ int ret;
+
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1);
+ if (ret)
+ return 1;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 1;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_ADDRESS,
+ sizeof(SERVUN_ADDRESS) - 1) != 0)
+ return 1;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr.c b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
new file mode 100644
index 000000000000..893a4fdb4b6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr.c
@@ -0,0 +1,571 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+extern void bpf_rcu_read_lock(void) __ksym;
+extern void bpf_rcu_read_unlock(void) __ksym;
+
+struct node_data {
+ long key;
+ long list_data;
+ struct bpf_rb_node r;
+ struct bpf_list_node l;
+ struct bpf_refcount ref;
+};
+
+struct map_value {
+ struct node_data __kptr *node;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct map_value);
+ __uint(max_entries, 2);
+} stashed_nodes SEC(".maps");
+
+struct node_acquire {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ struct bpf_refcount refcount;
+};
+
+#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock lock;
+private(A) struct bpf_rb_root root __contains(node_data, r);
+private(A) struct bpf_list_head head __contains(node_data, l);
+
+private(B) struct bpf_spin_lock alock;
+private(B) struct bpf_rb_root aroot __contains(node_acquire, node);
+
+private(C) struct bpf_spin_lock block;
+private(C) struct bpf_rb_root broot __contains(node_data, r);
+
+static bool less(struct bpf_rb_node *node_a, const struct bpf_rb_node *node_b)
+{
+ struct node_data *a;
+ struct node_data *b;
+
+ a = container_of(node_a, struct node_data, r);
+ b = container_of(node_b, struct node_data, r);
+
+ return a->key < b->key;
+}
+
+static bool less_a(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_acquire *node_a;
+ struct node_acquire *node_b;
+
+ node_a = container_of(a, struct node_acquire, node);
+ node_b = container_of(b, struct node_acquire, node);
+
+ return node_a->key < node_b->key;
+}
+
+static long __insert_in_tree_and_list(struct bpf_list_head *head,
+ struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return -1;
+
+ m = bpf_refcount_acquire(n);
+ m->key = 123;
+ m->list_data = 456;
+
+ bpf_spin_lock(lock);
+ if (bpf_rbtree_add(root, &n->r, less)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ bpf_obj_drop(m);
+ return -2;
+ }
+ bpf_spin_unlock(lock);
+
+ bpf_spin_lock(lock);
+ if (bpf_list_push_front(head, &m->l)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ return -3;
+ }
+ bpf_spin_unlock(lock);
+ return 0;
+}
+
+static long __stash_map_insert_tree(int idx, int val, struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock)
+{
+ struct map_value *mapval;
+ struct node_data *n, *m;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return -1;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return -2;
+
+ n->key = val;
+ m = bpf_refcount_acquire(n);
+
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (n) {
+ bpf_obj_drop(n);
+ bpf_obj_drop(m);
+ return -3;
+ }
+
+ bpf_spin_lock(lock);
+ if (bpf_rbtree_add(root, &m->r, less)) {
+ /* Failure to insert - unexpected */
+ bpf_spin_unlock(lock);
+ return -4;
+ }
+ bpf_spin_unlock(lock);
+ return 0;
+}
+
+static long __read_from_tree(struct bpf_rb_root *root,
+ struct bpf_spin_lock *lock,
+ bool remove_from_tree)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n;
+ long res = -99;
+
+ bpf_spin_lock(lock);
+
+ rb = bpf_rbtree_first(root);
+ if (!rb) {
+ bpf_spin_unlock(lock);
+ return -1;
+ }
+
+ n = container_of(rb, struct node_data, r);
+ res = n->key;
+
+ if (!remove_from_tree) {
+ bpf_spin_unlock(lock);
+ return res;
+ }
+
+ rb = bpf_rbtree_remove(root, rb);
+ bpf_spin_unlock(lock);
+ if (!rb)
+ return -2;
+ n = container_of(rb, struct node_data, r);
+ bpf_obj_drop(n);
+ return res;
+}
+
+static long __read_from_list(struct bpf_list_head *head,
+ struct bpf_spin_lock *lock,
+ bool remove_from_list)
+{
+ struct bpf_list_node *l;
+ struct node_data *n;
+ long res = -99;
+
+ bpf_spin_lock(lock);
+
+ l = bpf_list_pop_front(head);
+ if (!l) {
+ bpf_spin_unlock(lock);
+ return -1;
+ }
+
+ n = container_of(l, struct node_data, l);
+ res = n->list_data;
+
+ if (!remove_from_list) {
+ if (bpf_list_push_back(head, &n->l)) {
+ bpf_spin_unlock(lock);
+ return -2;
+ }
+ }
+
+ bpf_spin_unlock(lock);
+
+ if (remove_from_list)
+ bpf_obj_drop(n);
+ return res;
+}
+
+static long __read_from_unstash(int idx)
+{
+ struct node_data *n = NULL;
+ struct map_value *mapval;
+ long val = -99;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return -1;
+
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (!n)
+ return -2;
+
+ val = n->key;
+ bpf_obj_drop(n);
+ return val;
+}
+
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(579) \
+long insert_and_remove_tree_##rem_tree##_list_##rem_list(void *ctx) \
+{ \
+ long err, tree_data, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ err = __read_from_list(&head, &lock, rem_list); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ return tree_data + list_data; \
+}
+
+/* After successful insert of struct node_data into both collections:
+ * - it should have refcount = 2
+ * - removing / not removing the node_data from a collection after
+ * reading should have no effect on ability to read / remove from
+ * the other collection
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both: remove from list");
+
+#undef INSERT_READ_BOTH
+#define INSERT_READ_BOTH(rem_tree, rem_list, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(579) \
+long insert_and_remove_lf_tree_##rem_tree##_list_##rem_list(void *ctx) \
+{ \
+ long err, tree_data, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_list(&head, &lock, rem_list); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ return tree_data + list_data; \
+}
+
+/* Similar to insert_read_both, but list data is read and possibly removed
+ * first
+ *
+ * Results should be no different than reading and possibly removing rbtree
+ * node first
+ */
+INSERT_READ_BOTH(true, true, "insert_read_both_list_first: remove from tree + list");
+INSERT_READ_BOTH(false, false, "insert_read_both_list_first: remove from neither");
+INSERT_READ_BOTH(true, false, "insert_read_both_list_first: remove from tree");
+INSERT_READ_BOTH(false, true, "insert_read_both_list_first: remove from list");
+
+#define INSERT_DOUBLE_READ_AND_DEL(read_fn, read_root, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(-1) \
+long insert_double_##read_fn##_and_del_##read_root(void *ctx) \
+{ \
+ long err, list_data; \
+ \
+ err = __insert_in_tree_and_list(&head, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = read_fn(&read_root, &lock, true); \
+ if (err < 0) \
+ return err; \
+ else \
+ list_data = err; \
+ \
+ err = read_fn(&read_root, &lock, true); \
+ if (err < 0) \
+ return err; \
+ \
+ return err + list_data; \
+}
+
+/* Insert into both tree and list, then try reading-and-removing from either twice
+ *
+ * The second read-and-remove should fail on read step since the node has
+ * already been removed
+ */
+INSERT_DOUBLE_READ_AND_DEL(__read_from_tree, root, "insert_double_del: 2x read-and-del from tree");
+INSERT_DOUBLE_READ_AND_DEL(__read_from_list, head, "insert_double_del: 2x read-and-del from list");
+
+#define INSERT_STASH_READ(rem_tree, desc) \
+SEC("tc") \
+__description(desc) \
+__success __retval(84) \
+long insert_rbtree_and_stash__del_tree_##rem_tree(void *ctx) \
+{ \
+ long err, tree_data, map_data; \
+ \
+ err = __stash_map_insert_tree(0, 42, &root, &lock); \
+ if (err) \
+ return err; \
+ \
+ err = __read_from_tree(&root, &lock, rem_tree); \
+ if (err < 0) \
+ return err; \
+ else \
+ tree_data = err; \
+ \
+ err = __read_from_unstash(0); \
+ if (err < 0) \
+ return err; \
+ else \
+ map_data = err; \
+ \
+ return tree_data + map_data; \
+}
+
+/* Stash a refcounted node in map_val, insert same node into tree, then try
+ * reading data from tree then unstashed map_val, possibly removing from tree
+ *
+ * Removing from tree should have no effect on map_val kptr validity
+ */
+INSERT_STASH_READ(true, "insert_stash_read: remove from tree");
+INSERT_STASH_READ(false, "insert_stash_read: don't remove from tree");
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&alock);
+ bpf_rbtree_add(&aroot, &n->node, less_a);
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&alock);
+ if (!m)
+ return 2;
+
+ m->key = 2;
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("tc")
+__success
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ m = bpf_refcount_acquire(n);
+ m->key = 2;
+
+ bpf_spin_lock(&alock);
+ bpf_rbtree_add(&aroot, &n->node, less_a);
+ bpf_spin_unlock(&alock);
+
+ bpf_obj_drop(m);
+
+ return 0;
+}
+
+static long __stash_map_empty_xchg(struct node_data *n, int idx)
+{
+ struct map_value *mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+
+ if (!mapval) {
+ bpf_obj_drop(n);
+ return 1;
+ }
+ n = bpf_kptr_xchg(&mapval->node, n);
+ if (n) {
+ bpf_obj_drop(n);
+ return 2;
+ }
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a1(void *ctx)
+{
+ struct node_data *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+ m = bpf_refcount_acquire(n);
+
+ if (__stash_map_empty_xchg(n, 0)) {
+ bpf_obj_drop(m);
+ return 2;
+ }
+
+ if (__stash_map_empty_xchg(m, 1))
+ return 3;
+
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_b(void *ctx)
+{
+ struct map_value *mapval;
+ struct node_data *n;
+ int idx = 0;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ n = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!n)
+ return 2;
+
+ bpf_spin_lock(&block);
+
+ bpf_rbtree_add(&broot, &n->r, less);
+
+ bpf_spin_unlock(&block);
+ return 0;
+}
+
+SEC("tc")
+long rbtree_wrong_owner_remove_fail_a2(void *ctx)
+{
+ struct map_value *mapval;
+ struct bpf_rb_node *res;
+ struct node_data *m;
+ int idx = 1;
+
+ mapval = bpf_map_lookup_elem(&stashed_nodes, &idx);
+ if (!mapval)
+ return 1;
+
+ m = bpf_kptr_xchg(&mapval->node, NULL);
+ if (!m)
+ return 2;
+ bpf_spin_lock(&lock);
+
+ /* make m non-owning ref */
+ bpf_list_push_back(&head, &m->l);
+ res = bpf_rbtree_remove(&root, &m->r);
+
+ bpf_spin_unlock(&lock);
+ if (res) {
+ bpf_obj_drop(container_of(res, struct node_data, r));
+ return 3;
+ }
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__success
+int BPF_PROG(rbtree_sleepable_rcu,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ bpf_rcu_read_lock();
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&root, &n->r, less);
+ rb = bpf_rbtree_first(&root);
+ if (!rb)
+ goto err_out;
+
+ rb = bpf_rbtree_remove(&root, rb);
+ if (!rb)
+ goto err_out;
+
+ m = container_of(rb, struct node_data, r);
+
+err_out:
+ bpf_spin_unlock(&lock);
+ bpf_rcu_read_unlock();
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__success
+int BPF_PROG(rbtree_sleepable_rcu_no_explicit_rcu_lock,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct bpf_rb_node *rb;
+ struct node_data *n, *m = NULL;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ /* No explicit bpf_rcu_read_lock */
+ bpf_spin_lock(&lock);
+ bpf_rbtree_add(&root, &n->r, less);
+ rb = bpf_rbtree_first(&root);
+ if (!rb)
+ goto err_out;
+
+ rb = bpf_rbtree_remove(&root, rb);
+ if (!rb)
+ goto err_out;
+
+ m = container_of(rb, struct node_data, r);
+
+err_out:
+ bpf_spin_unlock(&lock);
+ /* No explicit bpf_rcu_read_unlock */
+ if (m)
+ bpf_obj_drop(m);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
new file mode 100644
index 000000000000..1553b9c16aa7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/refcounted_kptr_fail.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+struct node_acquire {
+ long key;
+ long data;
+ struct bpf_rb_node node;
+ struct bpf_refcount refcount;
+};
+
+extern void bpf_rcu_read_lock(void) __ksym;
+extern void bpf_rcu_read_unlock(void) __ksym;
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+private(A) struct bpf_spin_lock glock;
+private(A) struct bpf_rb_root groot __contains(node_acquire, node);
+
+static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct node_acquire *node_a;
+ struct node_acquire *node_b;
+
+ node_a = container_of(a, struct node_acquire, node);
+ node_b = container_of(b, struct node_acquire, node);
+
+ return node_a->key < node_b->key;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=4 alloc_insn=21")
+long rbtree_refcounted_node_ref_escapes(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ /* m becomes an owning ref but is never drop'd or added to a tree */
+ m = bpf_refcount_acquire(n);
+ bpf_spin_unlock(&glock);
+ if (!m)
+ return 2;
+
+ m->key = 2;
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+long refcount_acquire_maybe_null(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ /* Intentionally not testing !n
+ * it's MAYBE_NULL for refcount_acquire
+ */
+ m = bpf_refcount_acquire(n);
+ if (m)
+ bpf_obj_drop(m);
+ if (n)
+ bpf_obj_drop(n);
+
+ return 0;
+}
+
+SEC("?tc")
+__failure __msg("Unreleased reference id=3 alloc_insn=9")
+long rbtree_refcounted_node_ref_escapes_owning_input(void *ctx)
+{
+ struct node_acquire *n, *m;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 1;
+
+ /* m becomes an owning ref but is never drop'd or added to a tree */
+ m = bpf_refcount_acquire(n);
+ m->key = 2;
+
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_spin_unlock(&glock);
+
+ return 0;
+}
+
+SEC("?fentry.s/bpf_testmod_test_read")
+__failure __msg("function calls are not allowed while holding a lock")
+int BPF_PROG(rbtree_fail_sleepable_lock_across_rcu,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ struct node_acquire *n;
+
+ n = bpf_obj_new(typeof(*n));
+ if (!n)
+ return 0;
+
+ /* spin_{lock,unlock} are in different RCU CS */
+ bpf_rcu_read_lock();
+ bpf_spin_lock(&glock);
+ bpf_rbtree_add(&groot, &n->node, less);
+ bpf_rcu_read_unlock();
+
+ bpf_rcu_read_lock();
+ bpf_spin_unlock(&glock);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
index 123607d314d6..6a468496f539 100644
--- a/tools/testing/selftests/bpf/progs/ringbuf_bench.c
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -4,6 +4,7 @@
#include <linux/bpf.h>
#include <stdint.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -30,7 +31,7 @@ static __always_inline long get_flags()
return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_ringbuf(void *ctx)
{
long *sample, flags;
diff --git a/tools/testing/selftests/bpf/progs/sample_map_ret0.c b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
index 1612a32007b6..495990d355ef 100644
--- a/tools/testing/selftests/bpf/progs/sample_map_ret0.c
+++ b/tools/testing/selftests/bpf/progs/sample_map_ret0.c
@@ -2,19 +2,19 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-struct bpf_map_def SEC("maps") htab = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} htab SEC(".maps");
-struct bpf_map_def SEC("maps") array = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(long),
- .max_entries = 2,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, long);
+ __uint(max_entries, 2);
+} array SEC(".maps");
/* Sample program which should always load for testing control paths. */
SEC(".text") int func()
diff --git a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
index 092d9da536f3..351e79aef2fa 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg4_prog.c
@@ -8,6 +8,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf_sockopt_helpers.h>
+
#define SRC1_IP4 0xAC100001U /* 172.16.0.1 */
#define SRC2_IP4 0x00000000U
#define SRC_REWRITE_IP4 0x7f000004U
@@ -16,14 +18,15 @@
#define DST_PORT 4040
#define DST_REWRITE_PORT4 4444
-int _version SEC("version") = 1;
-
SEC("cgroup/sendmsg4")
int sendmsg_v4_prog(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_DGRAM)
return 0;
+ if (!get_set_sk_priority(ctx))
+ return 0;
+
/* Rewrite source. */
if (ctx->msg_src_ip4 == bpf_htonl(SRC1_IP4) ||
ctx->msg_src_ip4 == bpf_htonl(SRC2_IP4)) {
diff --git a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
index 255a432bc163..bf9b46b806f6 100644
--- a/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
+++ b/tools/testing/selftests/bpf/progs/sendmsg6_prog.c
@@ -8,6 +8,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf_sockopt_helpers.h>
+
#define SRC_REWRITE_IP6_0 0
#define SRC_REWRITE_IP6_1 0
#define SRC_REWRITE_IP6_2 0
@@ -20,14 +22,15 @@
#define DST_REWRITE_PORT6 6666
-int _version SEC("version") = 1;
-
SEC("cgroup/sendmsg6")
int sendmsg_v6_prog(struct bpf_sock_addr *ctx)
{
if (ctx->type != SOCK_DGRAM)
return 0;
+ if (!get_set_sk_priority(ctx))
+ return 0;
+
/* Rewrite source. */
if (ctx->msg_src_ip6[3] == bpf_htonl(1) ||
ctx->msg_src_ip6[3] == bpf_htonl(0)) {
diff --git a/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
new file mode 100644
index 000000000000..d8869b03dda9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sendmsg_unix_prog.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+
+#include <string.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+__u8 SERVUN_REWRITE_ADDRESS[] = "\0bpf_cgroup_unix_test_rewrite";
+
+SEC("cgroup/sendmsg_unix")
+int sendmsg_unix_prog(struct bpf_sock_addr *ctx)
+{
+ struct bpf_sock_addr_kern *sa_kern = bpf_cast_to_kern_ctx(ctx);
+ struct sockaddr_un *sa_kern_unaddr;
+ __u32 unaddrlen = offsetof(struct sockaddr_un, sun_path) +
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1;
+ int ret;
+
+ /* Rewrite destination. */
+ ret = bpf_sock_addr_set_sun_path(sa_kern, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1);
+ if (ret)
+ return 0;
+
+ if (sa_kern->uaddrlen != unaddrlen)
+ return 0;
+
+ sa_kern_unaddr = bpf_core_cast(sa_kern->uaddr, struct sockaddr_un);
+ if (memcmp(sa_kern_unaddr->sun_path, SERVUN_REWRITE_ADDRESS,
+ sizeof(SERVUN_REWRITE_ADDRESS) - 1) != 0)
+ return 0;
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/setget_sockopt.c b/tools/testing/selftests/bpf/progs/setget_sockopt.c
new file mode 100644
index 000000000000..7a438600ae98
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/setget_sockopt.c
@@ -0,0 +1,403 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+extern unsigned long CONFIG_HZ __kconfig;
+
+const volatile char veth[IFNAMSIZ];
+const volatile int veth_ifindex;
+
+int nr_listen;
+int nr_passive;
+int nr_active;
+int nr_connect;
+int nr_binddev;
+int nr_socket_post_create;
+int nr_fin_wait1;
+
+struct sockopt_test {
+ int opt;
+ int new;
+ int restore;
+ int expected;
+ int tcp_expected;
+ unsigned int flip:1;
+};
+
+static const char not_exist_cc[] = "not_exist";
+static const char cubic_cc[] = "cubic";
+static const char reno_cc[] = "reno";
+
+static const struct sockopt_test sol_socket_tests[] = {
+ { .opt = SO_REUSEADDR, .flip = 1, },
+ { .opt = SO_SNDBUF, .new = 8123, .expected = 8123 * 2, },
+ { .opt = SO_RCVBUF, .new = 8123, .expected = 8123 * 2, },
+ { .opt = SO_KEEPALIVE, .flip = 1, },
+ { .opt = SO_PRIORITY, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_REUSEPORT, .flip = 1, },
+ { .opt = SO_RCVLOWAT, .new = 8123, .expected = 8123, },
+ { .opt = SO_MARK, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_MAX_PACING_RATE, .new = 0xeb9f, .expected = 0xeb9f, },
+ { .opt = SO_TXREHASH, .flip = 1, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_tcp_tests[] = {
+ { .opt = TCP_NODELAY, .flip = 1, },
+ { .opt = TCP_KEEPIDLE, .new = 123, .expected = 123, .restore = 321, },
+ { .opt = TCP_KEEPINTVL, .new = 123, .expected = 123, .restore = 321, },
+ { .opt = TCP_KEEPCNT, .new = 123, .expected = 123, .restore = 124, },
+ { .opt = TCP_SYNCNT, .new = 123, .expected = 123, .restore = 124, },
+ { .opt = TCP_WINDOW_CLAMP, .new = 8123, .expected = 8123, .restore = 8124, },
+ { .opt = TCP_CONGESTION, },
+ { .opt = TCP_THIN_LINEAR_TIMEOUTS, .flip = 1, },
+ { .opt = TCP_USER_TIMEOUT, .new = 123400, .expected = 123400, },
+ { .opt = TCP_NOTSENT_LOWAT, .new = 1314, .expected = 1314, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_ip_tests[] = {
+ { .opt = IP_TOS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+ { .opt = 0, },
+};
+
+static const struct sockopt_test sol_ipv6_tests[] = {
+ { .opt = IPV6_TCLASS, .new = 0xe1, .expected = 0xe1, .tcp_expected = 0xe0, },
+ { .opt = IPV6_AUTOFLOWLABEL, .flip = 1, },
+ { .opt = 0, },
+};
+
+struct loop_ctx {
+ void *ctx;
+ struct sock *sk;
+};
+
+static int bpf_test_sockopt_flip(void *ctx, struct sock *sk,
+ const struct sockopt_test *t,
+ int level)
+{
+ int old, tmp, new, opt = t->opt;
+
+ opt = t->opt;
+
+ if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+ /* kernel initialized txrehash to 255 */
+ if (level == SOL_SOCKET && opt == SO_TXREHASH && old != 0 && old != 1)
+ old = 1;
+
+ new = !old;
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
+ tmp != new)
+ return 1;
+
+ if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+static int bpf_test_sockopt_int(void *ctx, struct sock *sk,
+ const struct sockopt_test *t,
+ int level)
+{
+ int old, tmp, new, expected, opt;
+
+ opt = t->opt;
+ new = t->new;
+ if (sk->sk_type == SOCK_STREAM && t->tcp_expected)
+ expected = t->tcp_expected;
+ else
+ expected = t->expected;
+
+ if (bpf_getsockopt(ctx, level, opt, &old, sizeof(old)) ||
+ old == new)
+ return 1;
+
+ if (bpf_setsockopt(ctx, level, opt, &new, sizeof(new)))
+ return 1;
+ if (bpf_getsockopt(ctx, level, opt, &tmp, sizeof(tmp)) ||
+ tmp != expected)
+ return 1;
+
+ if (t->restore)
+ old = t->restore;
+ if (bpf_setsockopt(ctx, level, opt, &old, sizeof(old)))
+ return 1;
+
+ return 0;
+}
+
+static int bpf_test_socket_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_socket_tests))
+ return 1;
+
+ t = &sol_socket_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, SOL_SOCKET);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, SOL_SOCKET);
+}
+
+static int bpf_test_ip_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_ip_tests))
+ return 1;
+
+ t = &sol_ip_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IP);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IP);
+}
+
+static int bpf_test_ipv6_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+
+ if (i >= ARRAY_SIZE(sol_ipv6_tests))
+ return 1;
+
+ t = &sol_ipv6_tests[i];
+ if (!t->opt)
+ return 1;
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+
+ return bpf_test_sockopt_int(lc->ctx, lc->sk, t, IPPROTO_IPV6);
+}
+
+static int bpf_test_tcp_sockopt(__u32 i, struct loop_ctx *lc)
+{
+ const struct sockopt_test *t;
+ struct sock *sk;
+ void *ctx;
+
+ if (i >= ARRAY_SIZE(sol_tcp_tests))
+ return 1;
+
+ t = &sol_tcp_tests[i];
+ if (!t->opt)
+ return 1;
+
+ ctx = lc->ctx;
+ sk = lc->sk;
+
+ if (t->opt == TCP_CONGESTION) {
+ char old_cc[16], tmp_cc[16];
+ const char *new_cc;
+ int new_cc_len;
+
+ if (!bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION,
+ (void *)not_exist_cc, sizeof(not_exist_cc)))
+ return 1;
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+ return 1;
+ if (!bpf_strncmp(old_cc, sizeof(old_cc), cubic_cc)) {
+ new_cc = reno_cc;
+ new_cc_len = sizeof(reno_cc);
+ } else {
+ new_cc = cubic_cc;
+ new_cc_len = sizeof(cubic_cc);
+ }
+ if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, (void *)new_cc,
+ new_cc_len))
+ return 1;
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, tmp_cc, sizeof(tmp_cc)))
+ return 1;
+ if (bpf_strncmp(tmp_cc, sizeof(tmp_cc), new_cc))
+ return 1;
+ if (bpf_setsockopt(ctx, IPPROTO_TCP, TCP_CONGESTION, old_cc, sizeof(old_cc)))
+ return 1;
+ return 0;
+ }
+
+ if (t->flip)
+ return bpf_test_sockopt_flip(ctx, sk, t, IPPROTO_TCP);
+
+ return bpf_test_sockopt_int(ctx, sk, t, IPPROTO_TCP);
+}
+
+static int bpf_test_sockopt(void *ctx, struct sock *sk)
+{
+ struct loop_ctx lc = { .ctx = ctx, .sk = sk, };
+ __u16 family, proto;
+ int n;
+
+ family = sk->sk_family;
+ proto = sk->sk_protocol;
+
+ n = bpf_loop(ARRAY_SIZE(sol_socket_tests), bpf_test_socket_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_socket_tests))
+ return -1;
+
+ if (proto == IPPROTO_TCP) {
+ n = bpf_loop(ARRAY_SIZE(sol_tcp_tests), bpf_test_tcp_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_tcp_tests))
+ return -1;
+ }
+
+ if (family == AF_INET) {
+ n = bpf_loop(ARRAY_SIZE(sol_ip_tests), bpf_test_ip_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_ip_tests))
+ return -1;
+ } else {
+ n = bpf_loop(ARRAY_SIZE(sol_ipv6_tests), bpf_test_ipv6_sockopt, &lc, 0);
+ if (n != ARRAY_SIZE(sol_ipv6_tests))
+ return -1;
+ }
+
+ return 0;
+}
+
+static int binddev_test(void *ctx)
+{
+ const char empty_ifname[] = "";
+ int ifindex, zero = 0;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ (void *)veth, sizeof(veth)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != veth_ifindex)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+ (void *)empty_ifname, sizeof(empty_ifname)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != 0)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ (void *)&veth_ifindex, sizeof(int)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != veth_ifindex)
+ return -1;
+
+ if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &zero, sizeof(int)))
+ return -1;
+ if (bpf_getsockopt(ctx, SOL_SOCKET, SO_BINDTOIFINDEX,
+ &ifindex, sizeof(int)) ||
+ ifindex != 0)
+ return -1;
+
+ return 0;
+}
+
+static int test_tcp_maxseg(void *ctx, struct sock *sk)
+{
+ int val = 1314, tmp;
+
+ if (sk->sk_state != TCP_ESTABLISHED)
+ return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG,
+ &val, sizeof(val));
+
+ if (bpf_getsockopt(ctx, IPPROTO_TCP, TCP_MAXSEG, &tmp, sizeof(tmp)) ||
+ tmp > val)
+ return -1;
+
+ return 0;
+}
+
+static int test_tcp_saved_syn(void *ctx, struct sock *sk)
+{
+ __u8 saved_syn[20];
+ int one = 1;
+
+ if (sk->sk_state == TCP_LISTEN)
+ return bpf_setsockopt(ctx, IPPROTO_TCP, TCP_SAVE_SYN,
+ &one, sizeof(one));
+
+ return bpf_getsockopt(ctx, IPPROTO_TCP, TCP_SAVED_SYN,
+ saved_syn, sizeof(saved_syn));
+}
+
+SEC("lsm_cgroup/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family,
+ int type, int protocol, int kern)
+{
+ struct sock *sk = sock->sk;
+
+ if (!sk)
+ return 1;
+
+ nr_socket_post_create += !bpf_test_sockopt(sk, sk);
+ nr_binddev += !binddev_test(sk);
+
+ return 1;
+}
+
+SEC("sockops")
+int skops_sockopt(struct bpf_sock_ops *skops)
+{
+ struct bpf_sock *bpf_sk = skops->sk;
+ struct sock *sk;
+
+ if (!bpf_sk)
+ return 1;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(bpf_sk);
+ if (!sk)
+ return 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ nr_listen += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk) ||
+ test_tcp_saved_syn(skops, sk));
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ nr_connect += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk));
+ break;
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ nr_active += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk));
+ break;
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ nr_passive += !(bpf_test_sockopt(skops, sk) ||
+ test_tcp_maxseg(skops, sk) ||
+ test_tcp_saved_syn(skops, sk));
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ case BPF_SOCK_OPS_STATE_CB:
+ if (skops->args[1] == BPF_TCP_CLOSE_WAIT)
+ nr_fin_wait1 += !bpf_test_sockopt(skops, sk);
+ break;
+ }
+
+ return 1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
new file mode 100644
index 000000000000..46d6eb2a3b17
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sk_storage_omem_uncharge.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Facebook */
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+void *local_storage_ptr = NULL;
+void *sk_ptr = NULL;
+int cookie_found = 0;
+__u64 cookie = 0;
+__u32 omem = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_storage SEC(".maps");
+
+SEC("fexit/bpf_local_storage_destroy")
+int BPF_PROG(bpf_local_storage_destroy, struct bpf_local_storage *local_storage)
+{
+ struct sock *sk;
+
+ if (local_storage_ptr != local_storage)
+ return 0;
+
+ sk = bpf_core_cast(sk_ptr, struct sock);
+ if (sk->sk_cookie.counter != cookie)
+ return 0;
+
+ cookie_found++;
+ omem = sk->sk_omem_alloc.counter;
+ local_storage_ptr = NULL;
+
+ return 0;
+}
+
+SEC("fentry/inet6_sock_destruct")
+int BPF_PROG(inet6_sock_destruct, struct sock *sk)
+{
+ int *value;
+
+ if (!cookie || sk->sk_cookie.counter != cookie)
+ return 0;
+
+ value = bpf_sk_storage_get(&sk_storage, sk, 0, 0);
+ if (value && *value == 0xdeadbeef) {
+ cookie_found++;
+ sk_ptr = sk;
+ local_storage_ptr = sk->sk_bpf_storage;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/skb_load_bytes.c b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
new file mode 100644
index 000000000000..e4252fd973be
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_load_bytes.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 load_offset = 0;
+int test_result = 0;
+
+SEC("tc")
+int skb_process(struct __sk_buff *skb)
+{
+ char buf[16];
+
+ test_result = bpf_skb_load_bytes(skb, load_offset, buf, 10);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/skb_pkt_end.c b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
new file mode 100644
index 000000000000..992b7861003a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/skb_pkt_end.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+#define BPF_NO_PRESERVE_ACCESS_INDEX
+#include <vmlinux.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+#define INLINE __always_inline
+
+#define skb_shorter(skb, len) ((void *)(long)(skb)->data + (len) > (void *)(long)skb->data_end)
+
+#define ETH_IPV4_TCP_SIZE (14 + sizeof(struct iphdr) + sizeof(struct tcphdr))
+
+static INLINE struct iphdr *get_iphdr(struct __sk_buff *skb)
+{
+ struct iphdr *ip = NULL;
+ struct ethhdr *eth;
+
+ if (skb_shorter(skb, ETH_IPV4_TCP_SIZE))
+ goto out;
+
+ eth = (void *)(long)skb->data;
+ ip = (void *)(eth + 1);
+
+out:
+ return ip;
+}
+
+SEC("tc")
+int main_prog(struct __sk_buff *skb)
+{
+ struct iphdr *ip = NULL;
+ struct tcphdr *tcp;
+ __u8 proto = 0;
+
+ if (!(ip = get_iphdr(skb)))
+ goto out;
+
+ proto = ip->protocol;
+
+ if (proto != IPPROTO_TCP)
+ goto out;
+
+ tcp = (void*)(ip + 1);
+ if (tcp->dest != 0)
+ goto out;
+ if (!tcp)
+ goto out;
+
+ return tcp->urg_ptr;
+out:
+ return -1;
+}
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
new file mode 100644
index 000000000000..9e0bf7a54cec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "bpf_tracing_net.h"
+
+__be16 serv_port = 0;
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} tcp_conn_sockets SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} udp_conn_sockets SEC(".maps");
+
+SEC("cgroup/connect6")
+int sock_connect(struct bpf_sock_addr *ctx)
+{
+ __u64 sock_cookie = 0;
+ int key = 0;
+ __u32 keyc = 0;
+
+ if (ctx->family != AF_INET6 || ctx->user_family != AF_INET6)
+ return 1;
+
+ sock_cookie = bpf_get_socket_cookie(ctx);
+ if (ctx->protocol == IPPROTO_TCP)
+ bpf_map_update_elem(&tcp_conn_sockets, &key, &sock_cookie, 0);
+ else if (ctx->protocol == IPPROTO_UDP)
+ bpf_map_update_elem(&udp_conn_sockets, &keyc, &sock_cookie, 0);
+ else
+ return 1;
+
+ return 1;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_client(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ __u64 sock_cookie = 0;
+ __u64 *val;
+ int key = 0;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk_common);
+ val = bpf_map_lookup_elem(&tcp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+SEC("iter/tcp")
+int iter_tcp6_server(struct bpf_iter__tcp *ctx)
+{
+ struct sock_common *sk_common = ctx->sk_common;
+ const struct inet_connection_sock *icsk;
+ const struct inet_sock *inet;
+ struct tcp6_sock *tcp_sk;
+ __be16 srcp;
+
+ if (!sk_common)
+ return 0;
+
+ if (sk_common->skc_family != AF_INET6)
+ return 0;
+
+ tcp_sk = bpf_skc_to_tcp6_sock(sk_common);
+ if (!tcp_sk)
+ return 0;
+
+ icsk = &tcp_sk->tcp.inet_conn;
+ inet = &icsk->icsk_inet;
+ srcp = inet->inet_sport;
+
+ /* Destroy server sockets. */
+ if (srcp == serv_port)
+ bpf_sock_destroy(sk_common);
+
+ return 0;
+}
+
+
+SEC("iter/udp")
+int iter_udp6_client(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ __u64 sock_cookie = 0, *val;
+ int key = 0;
+
+ if (!sk)
+ return 0;
+
+ sock_cookie = bpf_get_socket_cookie(sk);
+ val = bpf_map_lookup_elem(&udp_conn_sockets, &key);
+ if (!val)
+ return 0;
+ /* Destroy connected client sockets. */
+ if (sock_cookie == *val)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+SEC("iter/udp")
+int iter_udp6_server(struct bpf_iter__udp *ctx)
+{
+ struct udp_sock *udp_sk = ctx->udp_sk;
+ struct sock *sk = (struct sock *) udp_sk;
+ struct inet_sock *inet;
+ __be16 srcp;
+
+ if (!sk)
+ return 0;
+
+ inet = &udp_sk->inet;
+ srcp = inet->inet_sport;
+ if (srcp == serv_port)
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
new file mode 100644
index 000000000000..dd6850b58e25
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_destroy_prog_fail.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int bpf_sock_destroy(struct sock_common *sk) __ksym;
+
+SEC("tp_btf/tcp_destroy_sock")
+__failure __msg("calling kernel function bpf_sock_destroy is not allowed")
+int BPF_PROG(trace_tcp_destroy_sock, struct sock *sk)
+{
+ /* should not load */
+ bpf_sock_destroy((struct sock_common *)sk);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
new file mode 100644
index 000000000000..96531b0d9d55
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2024 Meta
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+
+#define ATTR __always_inline
+#include "test_jhash.h"
+
+static bool ipv6_addr_loopback(const struct in6_addr *a)
+{
+ return (a->s6_addr32[0] | a->s6_addr32[1] |
+ a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
+}
+
+volatile const __u16 ports[2];
+unsigned int bucket[2];
+
+SEC("iter/tcp")
+int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
+{
+ struct sock *sk = (struct sock *)ctx->sk_common;
+ struct inet_hashinfo *hinfo;
+ unsigned int hash;
+ struct net *net;
+ int idx;
+
+ if (!sk)
+ return 0;
+
+ sk = bpf_core_cast(sk, struct sock);
+ if (sk->sk_family != AF_INET6 ||
+ sk->sk_state != TCP_LISTEN ||
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ return 0;
+
+ if (sk->sk_num == ports[0])
+ idx = 0;
+ else if (sk->sk_num == ports[1])
+ idx = 1;
+ else
+ return 0;
+
+ /* bucket selection as in inet_lhash2_bucket_sk() */
+ net = sk->sk_net.net;
+ hash = jhash2(sk->sk_v6_rcv_saddr.s6_addr32, 4, net->hash_mix);
+ hash ^= sk->sk_num;
+ hinfo = net->ipv4.tcp_death_row.hashinfo;
+ bucket[idx] = hash & hinfo->lhash2_mask;
+ bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+
+ return 0;
+}
+
+#define udp_sk(ptr) container_of(ptr, struct udp_sock, inet.sk)
+
+SEC("iter/udp")
+int iter_udp_soreuse(struct bpf_iter__udp *ctx)
+{
+ struct sock *sk = (struct sock *)ctx->udp_sk;
+ struct udp_table *udptable;
+ int idx;
+
+ if (!sk)
+ return 0;
+
+ sk = bpf_core_cast(sk, struct sock);
+ if (sk->sk_family != AF_INET6 ||
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ return 0;
+
+ if (sk->sk_num == ports[0])
+ idx = 0;
+ else if (sk->sk_num == ports[1])
+ idx = 1;
+ else
+ return 0;
+
+ /* bucket selection as in udp_hashslot2() */
+ udptable = sk->sk_net.net->ipv4.udp_table;
+ bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
+ bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
index 0cb5656a22b0..35630a5aaf5f 100644
--- a/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
+++ b/tools/testing/selftests/bpf/progs/socket_cookie_prog.c
@@ -1,11 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
-#include <sys/socket.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+#define AF_INET6 10
struct socket_cookie {
__u64 cookie_key;
@@ -19,6 +21,14 @@ struct {
__type(value, struct socket_cookie);
} socket_cookies SEC(".maps");
+/*
+ * These three programs get executed in a row on connect() syscalls. The
+ * userspace side of the test creates a client socket, issues a connect() on it
+ * and then checks that the local storage associated with this socket has:
+ * cookie_value == local_port << 8 | 0xFF
+ * The different parts of this cookie_value are appended by those hooks if they
+ * all agree on the output of bpf_get_socket_cookie().
+ */
SEC("cgroup/connect6")
int set_cookie(struct bpf_sock_addr *ctx)
{
@@ -32,16 +42,16 @@ int set_cookie(struct bpf_sock_addr *ctx)
if (!p)
return 1;
- p->cookie_value = 0xFF;
+ p->cookie_value = 0xF;
p->cookie_key = bpf_get_socket_cookie(ctx);
return 1;
}
SEC("sockops")
-int update_cookie(struct bpf_sock_ops *ctx)
+int update_cookie_sockops(struct bpf_sock_ops *ctx)
{
- struct bpf_sock *sk;
+ struct bpf_sock *sk = ctx->sk;
struct socket_cookie *p;
if (ctx->family != AF_INET6)
@@ -50,21 +60,40 @@ int update_cookie(struct bpf_sock_ops *ctx)
if (ctx->op != BPF_SOCK_OPS_TCP_CONNECT_CB)
return 1;
- if (!ctx->sk)
+ if (!sk)
return 1;
- p = bpf_sk_storage_get(&socket_cookies, ctx->sk, 0, 0);
+ p = bpf_sk_storage_get(&socket_cookies, sk, 0, 0);
if (!p)
return 1;
if (p->cookie_key != bpf_get_socket_cookie(ctx))
return 1;
- p->cookie_value = (ctx->local_port << 8) | p->cookie_value;
+ p->cookie_value |= (ctx->local_port << 8);
return 1;
}
-int _version SEC("version") = 1;
+SEC("fexit/inet_stream_connect")
+int BPF_PROG(update_cookie_tracing, struct socket *sock,
+ struct sockaddr *uaddr, int addr_len, int flags)
+{
+ struct socket_cookie *p;
+
+ if (uaddr->sa_family != AF_INET6)
+ return 0;
+
+ p = bpf_sk_storage_get(&socket_cookies, sock->sk, 0, 0);
+ if (!p)
+ return 0;
+
+ if (p->cookie_key != bpf_get_socket_cookie(sock->sk))
+ return 0;
+
+ p->cookie_value |= 0xF0;
+
+ return 0;
+}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
index ca283af80d4e..c9abfe3a11af 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_parse_prog.c
@@ -2,15 +2,11 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
SEC("sk_skb1")
int bpf_prog1(struct __sk_buff *skb)
{
void *data_end = (void *)(long) skb->data_end;
void *data = (void *)(long) skb->data;
- __u32 lport = skb->local_port;
- __u32 rport = skb->remote_port;
__u8 *d = data;
int err;
diff --git a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
index fdb4bf4408fa..80632954c5a1 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_tcp_msg_prog.c
@@ -3,23 +3,9 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
SEC("sk_msg1")
int bpf_prog1(struct sk_msg_md *msg)
{
- void *data_end = (void *)(long) msg->data_end;
- void *data = (void *)(long) msg->data;
-
- char *d;
-
- if (data + 8 > data_end)
- return SK_DROP;
-
- bpf_printk("data length %i\n", (__u64)msg->data_end - (__u64)msg->data);
- d = (char *)data;
- bpf_printk("hello sendmsg hook %i %i\n", d[0], d[1]);
-
return SK_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
index 4797dc985064..0660f29dca95 100644
--- a/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
+++ b/tools/testing/selftests/bpf/progs/sockmap_verdict_prog.c
@@ -1,28 +1,27 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-int _version SEC("version") = 1;
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_rx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_tx SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_SOCKMAP);
__uint(max_entries, 20);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} sock_map_msg SEC(".maps");
struct {
@@ -42,6 +41,9 @@ int bpf_prog2(struct __sk_buff *skb)
__u8 *d = data;
__u8 sk, map;
+ __sink(lport);
+ __sink(rport);
+
if (data + 8 > data_end)
return SK_DROP;
diff --git a/tools/testing/selftests/bpf/progs/sockopt_inherit.c b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
index c6d428a8d785..a3434b840928 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_inherit.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_inherit.c
@@ -3,13 +3,14 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
#define SOL_CUSTOM 0xdeadbeef
#define CUSTOM_INHERIT1 0
#define CUSTOM_INHERIT2 1
#define CUSTOM_LISTENER 2
+__s32 page_size = 0;
+
struct sockopt_inherit {
__u8 val;
};
@@ -56,7 +57,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -71,6 +72,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -81,7 +88,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_CUSTOM)
- return 1; /* only interested in SOL_CUSTOM */
+ goto out; /* only interested in SOL_CUSTOM */
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -94,4 +101,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = -1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_multi.c b/tools/testing/selftests/bpf/progs/sockopt_multi.c
index 9d8c212dde9f..db67278e12d4 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_multi.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_multi.c
@@ -4,16 +4,17 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-SEC("cgroup/getsockopt/child")
+__s32 page_size = 0;
+
+SEC("cgroup/getsockopt")
int _getsockopt_child(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -27,16 +28,22 @@ int _getsockopt_child(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
-SEC("cgroup/getsockopt/parent")
+SEC("cgroup/getsockopt")
int _getsockopt_parent(struct bpf_sockopt *ctx)
{
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -50,6 +57,12 @@ int _getsockopt_parent(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -59,7 +72,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval = ctx->optval;
if (ctx->level != SOL_IP || ctx->optname != IP_TOS)
- return 1;
+ goto out;
if (optval + 1 > optval_end)
return 0; /* EPERM, bounds check */
@@ -68,4 +81,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
new file mode 100644
index 000000000000..83753b00a556
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/sockopt_qos_to_cc.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <string.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+__s32 page_size = 0;
+
+SEC("cgroup/setsockopt")
+int sockopt_qos_to_cc(struct bpf_sockopt *ctx)
+{
+ void *optval_end = ctx->optval_end;
+ int *optval = ctx->optval;
+ char buf[TCP_CA_NAME_MAX];
+ char cc_reno[TCP_CA_NAME_MAX] = "reno";
+ char cc_cubic[TCP_CA_NAME_MAX] = "cubic";
+
+ if (ctx->level != SOL_IPV6 || ctx->optname != IPV6_TCLASS)
+ goto out;
+
+ if (optval + 1 > optval_end)
+ return 0; /* EPERM, bounds check */
+
+ if (bpf_getsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+ return 0;
+
+ if (!tcp_cc_eq(buf, cc_cubic))
+ return 0;
+
+ if (*optval == 0x2d) {
+ if (bpf_setsockopt(ctx->sk, SOL_TCP, TCP_CONGESTION, &cc_reno,
+ sizeof(cc_reno)))
+ return 0;
+ }
+ return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/sockopt_sk.c b/tools/testing/selftests/bpf/progs/sockopt_sk.c
index 712df7b49cb1..cb990a7d3d45 100644
--- a/tools/testing/selftests/bpf/progs/sockopt_sk.c
+++ b/tools/testing/selftests/bpf/progs/sockopt_sk.c
@@ -1,15 +1,16 @@
// SPDX-License-Identifier: GPL-2.0
#include <string.h>
-#include <netinet/in.h>
-#include <netinet/tcp.h>
+#include <linux/tcp.h>
#include <linux/bpf.h>
+#include <netinet/in.h>
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
-#ifndef PAGE_SIZE
-#define PAGE_SIZE 4096
+int page_size = 0; /* userspace should set it */
+
+#ifndef SOL_TCP
+#define SOL_TCP IPPROTO_TCP
#endif
#define SOL_CUSTOM 0xdeadbeef
@@ -31,14 +32,27 @@ int _getsockopt(struct bpf_sockopt *ctx)
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ struct bpf_sock *sk;
+
+ /* Bypass AF_NETLINK. */
+ sk = ctx->sk;
+ if (sk && sk->family == AF_NETLINK)
+ goto out;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- ctx->optlen = 0; /* bypass optval>PAGE_SIZE */
- return 1;
+ goto out;
}
if (ctx->level == SOL_SOCKET && ctx->optname == SO_SNDBUF) {
@@ -46,7 +60,7 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
}
if (ctx->level == SOL_TCP && ctx->optname == TCP_CONGESTION) {
@@ -54,12 +68,28 @@ int _getsockopt(struct bpf_sockopt *ctx)
* let next BPF program in the cgroup chain or kernel
* handle it.
*/
- return 1;
+ goto out;
+ }
+
+ if (ctx->level == SOL_TCP && ctx->optname == TCP_ZEROCOPY_RECEIVE) {
+ /* Verify that TCP_ZEROCOPY_RECEIVE triggers.
+ * It has a custom implementation for performance
+ * reasons.
+ */
+
+ /* Check that optval contains address (__u64) */
+ if (optval + sizeof(__u64) > optval_end)
+ return 0; /* bounds check */
+
+ if (((struct tcp_zerocopy_receive *)optval)->address != 0)
+ return 0; /* unexpected data */
+
+ goto out;
}
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
ctx->retval = 0; /* Reset system call return value to zero */
@@ -71,25 +101,25 @@ int _getsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
- return 0; /* EPERM, unexpected data size */
+ if (optval_end - optval != page_size)
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
if (!ctx->retval)
- return 0; /* EPERM, kernel should not have handled
+ return 0; /* kernel should not have handled
* SOL_CUSTOM, something is wrong!
*/
ctx->retval = 0; /* Reset system call return value to zero */
@@ -98,6 +128,12 @@ int _getsockopt(struct bpf_sockopt *ctx)
ctx->optlen = 1;
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
SEC("cgroup/setsockopt")
@@ -106,6 +142,20 @@ int _setsockopt(struct bpf_sockopt *ctx)
__u8 *optval_end = ctx->optval_end;
__u8 *optval = ctx->optval;
struct sockopt_sk *storage;
+ struct bpf_sock *sk;
+
+ /* Bypass AF_NETLINK. */
+ sk = ctx->sk;
+ if (sk && sk->family == AF_NETLINK)
+ goto out;
+
+ /* Make sure bpf_get_netns_cookie is callable.
+ */
+ if (bpf_get_netns_cookie(NULL) == 0)
+ return 0;
+
+ if (bpf_get_netns_cookie(ctx) == 0)
+ return 0;
if (ctx->level == SOL_IP && ctx->optname == IP_TOS) {
/* Not interested in SOL_IP:IP_TOS;
@@ -120,7 +170,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Overwrite SO_SNDBUF value */
if (optval + sizeof(__u32) > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
*(__u32 *)optval = 0x55AA;
ctx->optlen = 4;
@@ -132,7 +182,7 @@ int _setsockopt(struct bpf_sockopt *ctx)
/* Always use cubic */
if (optval + 5 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
memcpy(optval, "cubic", 5);
ctx->optlen = 5;
@@ -142,11 +192,11 @@ int _setsockopt(struct bpf_sockopt *ctx)
if (ctx->level == SOL_IP && ctx->optname == IP_FREEBIND) {
/* Original optlen is larger than PAGE_SIZE. */
- if (ctx->optlen != PAGE_SIZE * 2)
- return 0; /* EPERM, unexpected data size */
+ if (ctx->optlen != page_size * 2)
+ return 0; /* unexpected data size */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
/* Make sure we can trim the buffer. */
optval[0] = 0;
@@ -156,22 +206,22 @@ int _setsockopt(struct bpf_sockopt *ctx)
* program can only see the first PAGE_SIZE
* bytes of data.
*/
- if (optval_end - optval != PAGE_SIZE)
- return 0; /* EPERM, unexpected data size */
+ if (optval_end - optval != page_size)
+ return 0; /* unexpected data size */
return 1;
}
if (ctx->level != SOL_CUSTOM)
- return 0; /* EPERM, deny everything except custom level */
+ return 0; /* deny everything except custom level */
if (optval + 1 > optval_end)
- return 0; /* EPERM, bounds check */
+ return 0; /* bounds check */
storage = bpf_sk_storage_get(&socket_storage_map, ctx->sk, 0,
BPF_SK_STORAGE_GET_F_CREATE);
if (!storage)
- return 0; /* EPERM, couldn't get sk storage */
+ return 0; /* couldn't get sk storage */
storage->val = optval[0];
ctx->optlen = -1; /* BPF has consumed this option, don't call kernel
@@ -179,4 +229,10 @@ int _setsockopt(struct bpf_sockopt *ctx)
*/
return 1;
+
+out:
+ /* optval larger than PAGE_SIZE use kernel's buffer. */
+ if (ctx->optlen > page_size)
+ ctx->optlen = 0;
+ return 1;
}
diff --git a/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
new file mode 100644
index 000000000000..2eb297df3dd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/stacktrace_map_skip.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define TEST_STACK_DEPTH 2
+#define TEST_MAX_ENTRIES 16384
+
+typedef __u64 stack_trace_t[TEST_STACK_DEPTH];
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stackmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, __u32);
+} stackid_hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, TEST_MAX_ENTRIES);
+ __type(key, __u32);
+ __type(value, stack_trace_t);
+} stack_amap SEC(".maps");
+
+int pid = 0;
+int control = 0;
+int failed = 0;
+
+SEC("tracepoint/sched/sched_switch")
+int oncpu(struct trace_event_raw_sched_switch *ctx)
+{
+ __u32 max_len = TEST_STACK_DEPTH * sizeof(__u64);
+ __u32 key = 0, val = 0;
+ __u64 *stack_p;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ if (control)
+ return 0;
+
+ /* it should allow skipping whole buffer size entries */
+ key = bpf_get_stackid(ctx, &stackmap, TEST_STACK_DEPTH);
+ if ((int)key >= 0) {
+ /* The size of stackmap and stack_amap should be the same */
+ bpf_map_update_elem(&stackid_hmap, &key, &val, 0);
+ stack_p = bpf_map_lookup_elem(&stack_amap, &key);
+ if (stack_p) {
+ bpf_get_stack(ctx, stack_p, max_len, TEST_STACK_DEPTH);
+ /* it wrongly skipped all the entries and filled zero */
+ if (stack_p[0] == 0)
+ failed = 1;
+ }
+ } else {
+ /* old kernel doesn't support skipping that many entries */
+ failed = 2;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/strncmp_bench.c b/tools/testing/selftests/bpf/progs/strncmp_bench.c
new file mode 100644
index 000000000000..18373a7df76e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_bench.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 4096
+
+/* Will be updated by benchmark before program loading */
+const volatile unsigned int cmp_str_len = 1;
+const char target[STRNCMP_STR_SZ];
+
+long hits = 0;
+char str[STRNCMP_STR_SZ];
+
+char _license[] SEC("license") = "GPL";
+
+static __always_inline int local_strncmp(const char *s1, unsigned int sz,
+ const char *s2)
+{
+ int ret = 0;
+ unsigned int i;
+
+ for (i = 0; i < sz; i++) {
+ /* E.g. 0xff > 0x31 */
+ ret = (unsigned char)s1[i] - (unsigned char)s2[i];
+ if (ret || !s1[i])
+ break;
+ }
+
+ return ret;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_no_helper(void *ctx)
+{
+ if (local_strncmp(str, cmp_str_len + 1, target) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int strncmp_helper(void *ctx)
+{
+ if (bpf_strncmp(str, cmp_str_len + 1, target) < 0)
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/strncmp_test.c b/tools/testing/selftests/bpf/progs/strncmp_test.c
new file mode 100644
index 000000000000..769668feed48
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strncmp_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2021. Huawei Technologies Co., Ltd */
+#include <stdbool.h>
+#include <linux/types.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define STRNCMP_STR_SZ 8
+
+const char target[STRNCMP_STR_SZ] = "EEEEEEE";
+char str[STRNCMP_STR_SZ];
+int cmp_ret = 0;
+int target_pid = 0;
+
+const char no_str_target[STRNCMP_STR_SZ] = "12345678";
+char writable_target[STRNCMP_STR_SZ];
+unsigned int no_const_str_size = STRNCMP_STR_SZ;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int do_strncmp(void *ctx)
+{
+ if ((bpf_get_current_pid_tgid() >> 32) != target_pid)
+ return 0;
+
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_const_str_size(void *ctx)
+{
+ /* The value of string size is not const, so will fail */
+ cmp_ret = bpf_strncmp(str, no_const_str_size, target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_writable_target(void *ctx)
+{
+ /* Compared target is not read-only, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, writable_target);
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int strncmp_bad_not_null_term_target(void *ctx)
+{
+ /* Compared target is not null-terminated, so will fail */
+ cmp_ret = bpf_strncmp(str, STRNCMP_STR_SZ, no_str_target);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/strobemeta.h b/tools/testing/selftests/bpf/progs/strobemeta.h
index ad61b722a9de..f74459eead26 100644
--- a/tools/testing/selftests/bpf/progs/strobemeta.h
+++ b/tools/testing/selftests/bpf/progs/strobemeta.h
@@ -10,6 +10,8 @@
#include <linux/types.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
typedef uint32_t pid_t;
struct task_struct {};
@@ -24,9 +26,11 @@ struct task_struct {};
#define STACK_TABLE_EPOCH_SHIFT 20
#define STROBE_MAX_STR_LEN 1
#define STROBE_MAX_CFGS 32
+#define READ_MAP_VAR_PAYLOAD_CAP \
+ ((1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
#define STROBE_MAX_PAYLOAD \
(STROBE_MAX_STRS * STROBE_MAX_STR_LEN + \
- STROBE_MAX_MAPS * (1 + STROBE_MAX_MAP_ENTRIES * 2) * STROBE_MAX_STR_LEN)
+ STROBE_MAX_MAPS * READ_MAP_VAR_PAYLOAD_CAP)
struct strobe_value_header {
/*
@@ -135,7 +139,7 @@ struct strobe_value_loc {
* tpidr_el0 for aarch64).
* TLS_IMM_EXEC: absolute address of GOT entry containing offset
* from thread pointer;
- * TLS_GENERAL_DYN: absolute addres of double GOT entry
+ * TLS_GENERAL_DYN: absolute address of double GOT entry
* containing tls_index_t struct;
*/
int64_t offset;
@@ -266,8 +270,12 @@ struct tls_index {
uint64_t offset;
};
-static __always_inline void *calc_location(struct strobe_value_loc *loc,
- void *tls_base)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *calc_location(struct strobe_value_loc *loc, void *tls_base)
{
/*
* tls_mode value is:
@@ -327,10 +335,15 @@ static __always_inline void *calc_location(struct strobe_value_loc *loc,
: NULL;
}
-static __always_inline void read_int_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void read_int_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data)
{
void *location = calc_location(&cfg->int_locs[idx], tls_base);
if (!location)
@@ -346,10 +359,10 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
size_t idx, void *tls_base,
struct strobe_value_generic *value,
struct strobemeta_payload *data,
- void *payload)
+ size_t off)
{
void *location;
- uint32_t len;
+ uint64_t len;
data->str_lens[idx] = 0;
location = calc_location(&cfg->str_locs[idx], tls_base);
@@ -357,7 +370,7 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, value->ptr);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, value->ptr);
/*
* if bpf_probe_read_user_str returns error (<0), due to casting to
* unsinged int, it will become big number, so next check is
@@ -369,31 +382,30 @@ static __always_inline uint64_t read_str_var(struct strobemeta_cfg *cfg,
return 0;
data->str_lens[idx] = len;
- return len;
+ return off + len;
}
-static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
- size_t idx, void *tls_base,
- struct strobe_value_generic *value,
- struct strobemeta_payload *data,
- void *payload)
+static __always_inline uint64_t read_map_var(struct strobemeta_cfg *cfg,
+ size_t idx, void *tls_base,
+ struct strobe_value_generic *value,
+ struct strobemeta_payload *data,
+ size_t off)
{
struct strobe_map_descr* descr = &data->map_descrs[idx];
struct strobe_map_raw map;
void *location;
- uint32_t len;
- int i;
+ uint64_t len;
descr->tag_len = 0; /* presume no tag is set */
descr->cnt = -1; /* presume no value is set */
location = calc_location(&cfg->map_locs[idx], tls_base);
if (!location)
- return payload;
+ return off;
bpf_probe_read_user(value, sizeof(struct strobe_value_generic), location);
if (bpf_probe_read_user(&map, sizeof(struct strobe_map_raw), value->ptr))
- return payload;
+ return off;
descr->id = map.id;
descr->cnt = map.cnt;
@@ -402,51 +414,109 @@ static __always_inline void *read_map_var(struct strobemeta_cfg *cfg,
data->req_meta_valid = 1;
}
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN, map.tag);
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN, map.tag);
if (len <= STROBE_MAX_STR_LEN) {
descr->tag_len = len;
- payload += len;
+ off += len;
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
+ __pragma_loop_unroll
#endif
for (int i = 0; i < STROBE_MAX_MAP_ENTRIES; ++i) {
if (i >= map.cnt)
break;
descr->key_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].key);
if (len <= STROBE_MAX_STR_LEN) {
descr->key_lens[i] = len;
- payload += len;
+ off += len;
}
descr->val_lens[i] = 0;
- len = bpf_probe_read_user_str(payload, STROBE_MAX_STR_LEN,
+ len = bpf_probe_read_user_str(&data->payload[off], STROBE_MAX_STR_LEN,
map.entries[i].val);
if (len <= STROBE_MAX_STR_LEN) {
descr->val_lens[i] = len;
- payload += len;
+ off += len;
}
}
- return payload;
+ return off;
+}
+
+#ifdef USE_BPF_LOOP
+enum read_type {
+ READ_INT_VAR,
+ READ_MAP_VAR,
+ READ_STR_VAR,
+};
+
+struct read_var_ctx {
+ struct strobemeta_payload *data;
+ void *tls_base;
+ struct strobemeta_cfg *cfg;
+ size_t payload_off;
+ /* value gets mutated */
+ struct strobe_value_generic *value;
+ enum read_type type;
+};
+
+static int read_var_callback(__u64 index, struct read_var_ctx *ctx)
+{
+ /* lose precision info for ctx->payload_off, verifier won't track
+ * double xor, barrier_var() is needed to force clang keep both xors.
+ */
+ ctx->payload_off ^= index;
+ barrier_var(ctx->payload_off);
+ ctx->payload_off ^= index;
+ switch (ctx->type) {
+ case READ_INT_VAR:
+ if (index >= STROBE_MAX_INTS)
+ return 1;
+ read_int_var(ctx->cfg, index, ctx->tls_base, ctx->value, ctx->data);
+ break;
+ case READ_MAP_VAR:
+ if (index >= STROBE_MAX_MAPS)
+ return 1;
+ if (ctx->payload_off > sizeof(ctx->data->payload) - READ_MAP_VAR_PAYLOAD_CAP)
+ return 1;
+ ctx->payload_off = read_map_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ case READ_STR_VAR:
+ if (index >= STROBE_MAX_STRS)
+ return 1;
+ if (ctx->payload_off > sizeof(ctx->data->payload) - STROBE_MAX_STR_LEN)
+ return 1;
+ ctx->payload_off = read_str_var(ctx->cfg, index, ctx->tls_base,
+ ctx->value, ctx->data, ctx->payload_off);
+ break;
+ }
+ return 0;
}
+#endif /* USE_BPF_LOOP */
/*
* read_strobe_meta returns NULL, if no metadata was read; otherwise returns
* pointer to *right after* payload ends
*/
-static __always_inline void *read_strobe_meta(struct task_struct *task,
- struct strobemeta_payload *data)
+#ifdef SUBPROGS
+__noinline
+#else
+__always_inline
+#endif
+static void *read_strobe_meta(struct task_struct *task,
+ struct strobemeta_payload *data)
{
pid_t pid = bpf_get_current_pid_tgid() >> 32;
struct strobe_value_generic value = {0};
struct strobemeta_cfg *cfg;
- void *tls_base, *payload;
+ size_t payload_off;
+ void *tls_base;
cfg = bpf_map_lookup_elem(&strobemeta_cfgs, &pid);
if (!cfg)
@@ -454,42 +524,74 @@ static __always_inline void *read_strobe_meta(struct task_struct *task,
data->int_vals_set_mask = 0;
data->req_meta_valid = 0;
- payload = data->payload;
+ payload_off = 0;
/*
* we don't have struct task_struct definition, it should be:
* tls_base = (void *)task->thread.fsbase;
*/
tls_base = (void *)task;
+#ifdef USE_BPF_LOOP
+ struct read_var_ctx ctx = {
+ .cfg = cfg,
+ .tls_base = tls_base,
+ .value = &value,
+ .data = data,
+ .payload_off = 0,
+ };
+ int err;
+
+ ctx.type = READ_INT_VAR;
+ err = bpf_loop(STROBE_MAX_INTS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_INTS)
+ return NULL;
+
+ ctx.type = READ_STR_VAR;
+ err = bpf_loop(STROBE_MAX_STRS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_STRS)
+ return NULL;
+
+ ctx.type = READ_MAP_VAR;
+ err = bpf_loop(STROBE_MAX_MAPS, read_var_callback, &ctx, 0);
+ if (err != STROBE_MAX_MAPS)
+ return NULL;
+
+ payload_off = ctx.payload_off;
+ /* this should not really happen, here only to satisfy verifer */
+ if (payload_off > sizeof(data->payload))
+ payload_off = sizeof(data->payload);
+#else
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_INTS; ++i) {
read_int_var(cfg, i, tls_base, &value, data);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_STRS; ++i) {
- payload += read_str_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_str_var(cfg, i, tls_base, &value, data, payload_off);
}
#ifdef NO_UNROLL
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
#else
-#pragma unroll
-#endif
+ __pragma_loop_unroll
+#endif /* NO_UNROLL */
for (int i = 0; i < STROBE_MAX_MAPS; ++i) {
- payload = read_map_var(cfg, i, tls_base, &value, data, payload);
+ payload_off = read_map_var(cfg, i, tls_base, &value, data, payload_off);
}
+#endif /* USE_BPF_LOOP */
+
/*
* return pointer right after end of payload, so it's possible to
* calculate exact amount of useful data that needs to be sent
*/
- return payload;
+ return &data->payload[payload_off];
}
SEC("raw_tracepoint/kfree_skb")
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
new file mode 100644
index 000000000000..d18b992f0165
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_bpf_loop.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (c) 2021 Facebook */
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 100
+#define STROBE_MAX_MAP_ENTRIES 20
+#define USE_BPF_LOOP
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
new file mode 100644
index 000000000000..b6c01f8fc559
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/strobemeta_subprogs.c
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+// Copyright (c) 2019 Facebook
+
+#define STROBE_MAX_INTS 2
+#define STROBE_MAX_STRS 25
+#define STROBE_MAX_MAPS 13
+#define STROBE_MAX_MAP_ENTRIES 20
+#define NO_UNROLL
+#define SUBPROGS
+#include "strobemeta.h"
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
new file mode 100644
index 000000000000..ba10c3896213
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_2)
+{
+ return 0;
+}
+
+struct bpf_testmod_ops___v1 {
+ int (*test_1)(void);
+};
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ int (*does_not_exist)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v1 testmod_1 = {
+ .test_1 = (void *)test_1
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .does_not_exist = (void *)test_2
+};
+
+SEC("?.struct_ops")
+struct bpf_testmod_ops___v1 optional_map = {
+ .test_1 = (void *)test_1,
+};
+
+SEC("?.struct_ops.link")
+struct bpf_testmod_ops___v1 optional_map2 = {
+ .test_1 = (void *)test_1,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
new file mode 100644
index 000000000000..6049d9c902d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_autocreate2.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(foo)
+{
+ test_1_result = 42;
+ return 0;
+}
+
+SEC("?struct_ops/test_1")
+int BPF_PROG(bar)
+{
+ test_1_result = 24;
+ return 0;
+}
+
+struct bpf_testmod_ops {
+ int (*test_1)(void);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)bar
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
new file mode 100644
index 000000000000..b450f72e744a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+/* This is a test BPF program that uses struct_ops to access an argument
+ * that may be NULL. This is a test for the verifier to ensure that it can
+ * rip PTR_MAYBE_NULL correctly.
+ */
+SEC("struct_ops/test_maybe_null")
+int BPF_PROG(test_maybe_null, int dummy,
+ struct task_struct *task)
+{
+ if (task)
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_maybe_null = (void *)test_maybe_null,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
new file mode 100644
index 000000000000..6283099ec383
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_maybe_null_fail.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+pid_t tgid = 0;
+
+SEC("struct_ops/test_maybe_null_struct_ptr")
+int BPF_PROG(test_maybe_null_struct_ptr, int dummy,
+ struct task_struct *task)
+{
+ tgid = task->tgid;
+
+ return 0;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_struct_ptr = {
+ .test_maybe_null = (void *)test_maybe_null_struct_ptr,
+};
+
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c
new file mode 100644
index 000000000000..026cabfa7f1f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+int test_1_result = 0;
+int test_2_result = 0;
+
+SEC("struct_ops/test_1")
+int BPF_PROG(test_1)
+{
+ test_1_result = 0xdeadbeef;
+ return 0;
+}
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2, int a, int b)
+{
+ test_2_result = a + b;
+}
+
+SEC("struct_ops/test_3")
+int BPF_PROG(test_3, int a, int b)
+{
+ test_2_result = a + b + 3;
+ return a + b + 3;
+}
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops testmod_1 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2,
+ .data = 0x1,
+};
+
+SEC("struct_ops/test_2")
+void BPF_PROG(test_2_v2, int a, int b)
+{
+ test_2_result = a * b;
+}
+
+struct bpf_testmod_ops___v2 {
+ int (*test_1)(void);
+ void (*test_2)(int a, int b);
+ int (*test_maybe_null)(int dummy, struct task_struct *task);
+};
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops___v2 testmod_2 = {
+ .test_1 = (void *)test_1,
+ .test_2 = (void *)test_2_v2,
+};
diff --git a/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
new file mode 100644
index 000000000000..9efcc6e4d356
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/struct_ops_multi_pages.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define TRAMP(x) \
+ SEC("struct_ops/tramp_" #x) \
+ int BPF_PROG(tramp_ ## x, int a) \
+ { \
+ return a; \
+ }
+
+TRAMP(1)
+TRAMP(2)
+TRAMP(3)
+TRAMP(4)
+TRAMP(5)
+TRAMP(6)
+TRAMP(7)
+TRAMP(8)
+TRAMP(9)
+TRAMP(10)
+TRAMP(11)
+TRAMP(12)
+TRAMP(13)
+TRAMP(14)
+TRAMP(15)
+TRAMP(16)
+TRAMP(17)
+TRAMP(18)
+TRAMP(19)
+TRAMP(20)
+TRAMP(21)
+TRAMP(22)
+TRAMP(23)
+TRAMP(24)
+TRAMP(25)
+TRAMP(26)
+TRAMP(27)
+TRAMP(28)
+TRAMP(29)
+TRAMP(30)
+TRAMP(31)
+TRAMP(32)
+TRAMP(33)
+TRAMP(34)
+TRAMP(35)
+TRAMP(36)
+TRAMP(37)
+TRAMP(38)
+TRAMP(39)
+TRAMP(40)
+
+#define F_TRAMP(x) .tramp_ ## x = (void *)tramp_ ## x
+
+SEC(".struct_ops.link")
+struct bpf_testmod_ops multi_pages = {
+ F_TRAMP(1),
+ F_TRAMP(2),
+ F_TRAMP(3),
+ F_TRAMP(4),
+ F_TRAMP(5),
+ F_TRAMP(6),
+ F_TRAMP(7),
+ F_TRAMP(8),
+ F_TRAMP(9),
+ F_TRAMP(10),
+ F_TRAMP(11),
+ F_TRAMP(12),
+ F_TRAMP(13),
+ F_TRAMP(14),
+ F_TRAMP(15),
+ F_TRAMP(16),
+ F_TRAMP(17),
+ F_TRAMP(18),
+ F_TRAMP(19),
+ F_TRAMP(20),
+ F_TRAMP(21),
+ F_TRAMP(22),
+ F_TRAMP(23),
+ F_TRAMP(24),
+ F_TRAMP(25),
+ F_TRAMP(26),
+ F_TRAMP(27),
+ F_TRAMP(28),
+ F_TRAMP(29),
+ F_TRAMP(30),
+ F_TRAMP(31),
+ F_TRAMP(32),
+ F_TRAMP(33),
+ F_TRAMP(34),
+ F_TRAMP(35),
+ F_TRAMP(36),
+ F_TRAMP(37),
+ F_TRAMP(38),
+ F_TRAMP(39),
+ F_TRAMP(40),
+};
diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c
new file mode 100644
index 000000000000..3d3cafdebe72
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/syscall.c
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <../../../tools/include/linux/filter.h>
+#include <linux/btf.h>
+#include <string.h>
+#include <errno.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_map {
+ int id;
+} __attribute__((preserve_access_index));
+
+struct args {
+ __u64 log_buf;
+ __u32 log_size;
+ int max_entries;
+ int map_fd;
+ int prog_fd;
+ int btf_fd;
+};
+
+#define BTF_INFO_ENC(kind, kind_flag, vlen) \
+ ((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
+#define BTF_TYPE_ENC(name, info, size_or_type) (name), (info), (size_or_type)
+#define BTF_INT_ENC(encoding, bits_offset, nr_bits) \
+ ((encoding) << 24 | (bits_offset) << 16 | (nr_bits))
+#define BTF_TYPE_INT_ENC(name, encoding, bits_offset, bits, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \
+ BTF_INT_ENC(encoding, bits_offset, bits)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, union bpf_attr);
+ __uint(max_entries, 1);
+} bpf_attr_array SEC(".maps");
+
+struct inner_map_type {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, 4);
+ __uint(value_size, 4);
+ __uint(max_entries, 1);
+} inner_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+ __array(values, struct inner_map_type);
+} outer_array_map SEC(".maps") = {
+ .values = {
+ [0] = &inner_map,
+ },
+};
+
+static inline __u64 ptr_to_u64(const void *ptr)
+{
+ return (__u64) (unsigned long) ptr;
+}
+
+static int btf_load(void)
+{
+ struct btf_blob {
+ struct btf_header btf_hdr;
+ __u32 types[8];
+ __u32 str;
+ } raw_btf = {
+ .btf_hdr = {
+ .magic = BTF_MAGIC,
+ .version = BTF_VERSION,
+ .hdr_len = sizeof(struct btf_header),
+ .type_len = sizeof(__u32) * 8,
+ .str_off = sizeof(__u32) * 8,
+ .str_len = sizeof(__u32),
+ },
+ .types = {
+ /* long */
+ BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 64, 8), /* [1] */
+ /* unsigned long */
+ BTF_TYPE_INT_ENC(0, 0, 0, 64, 8), /* [2] */
+ },
+ };
+ static union bpf_attr btf_load_attr = {
+ .btf_size = sizeof(raw_btf),
+ };
+
+ btf_load_attr.btf = (long)&raw_btf;
+ return bpf_sys_bpf(BPF_BTF_LOAD, &btf_load_attr, sizeof(btf_load_attr));
+}
+
+SEC("syscall")
+int load_prog(struct args *ctx)
+{
+ static char license[] = "GPL";
+ static struct bpf_insn insns[] = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ static union bpf_attr map_create_attr = {
+ .map_type = BPF_MAP_TYPE_HASH,
+ .key_size = 8,
+ .value_size = 8,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 2,
+ };
+ static union bpf_attr map_update_attr = { .map_fd = 1, };
+ static __u64 key = 12;
+ static __u64 value = 34;
+ static union bpf_attr prog_load_attr = {
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .insn_cnt = sizeof(insns) / sizeof(insns[0]),
+ };
+ int ret;
+
+ ret = btf_load();
+ if (ret <= 0)
+ return ret;
+
+ ctx->btf_fd = ret;
+ map_create_attr.max_entries = ctx->max_entries;
+ map_create_attr.btf_fd = ret;
+
+ prog_load_attr.license = ptr_to_u64(license);
+ prog_load_attr.insns = ptr_to_u64(insns);
+ prog_load_attr.log_buf = ctx->log_buf;
+ prog_load_attr.log_size = ctx->log_size;
+ prog_load_attr.log_level = 1;
+
+ ret = bpf_sys_bpf(BPF_MAP_CREATE, &map_create_attr, sizeof(map_create_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->map_fd = ret;
+ insns[3].imm = ret;
+
+ map_update_attr.map_fd = ret;
+ map_update_attr.key = ptr_to_u64(&key);
+ map_update_attr.value = ptr_to_u64(&value);
+ ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr));
+ if (ret < 0)
+ return ret;
+
+ ret = bpf_sys_bpf(BPF_PROG_LOAD, &prog_load_attr, sizeof(prog_load_attr));
+ if (ret <= 0)
+ return ret;
+ ctx->prog_fd = ret;
+ return 1;
+}
+
+SEC("syscall")
+int update_outer_map(void *ctx)
+{
+ int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err;
+ const int attr_sz = sizeof(union bpf_attr);
+ union bpf_attr *attr;
+
+ attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero);
+ if (!attr)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_id = ((struct bpf_map *)&outer_array_map)->id;
+ outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz);
+ if (outer_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_type = BPF_MAP_TYPE_ARRAY;
+ attr->key_size = 4;
+ attr->value_size = 4;
+ attr->max_entries = 1;
+ inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz);
+ if (inner_fd < 0)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ attr->value = ptr_to_u64(&inner_fd);
+ err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+
+ memset(attr, 0, attr_sz);
+ attr->map_fd = outer_fd;
+ attr->key = ptr_to_u64(&zero);
+ err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz);
+ if (err)
+ goto out;
+ ret = 1;
+out:
+ if (inner_fd >= 0)
+ bpf_sys_close(inner_fd);
+ if (outer_fd >= 0)
+ bpf_sys_close(outer_fd);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/tailcall1.c b/tools/testing/selftests/bpf/progs/tailcall1.c
index 1f407e65ae52..8159a0b4a69a 100644
--- a/tools/testing/selftests/bpf/progs/tailcall1.c
+++ b/tools/testing/selftests/bpf/progs/tailcall1.c
@@ -11,8 +11,8 @@ struct {
} jmp_table SEC(".maps");
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -20,29 +20,28 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
/* Multiple locations to make sure we patch
* all of them.
*/
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 3;
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall2.c b/tools/testing/selftests/bpf/progs/tailcall2.c
index a093e739cf0e..a5ff53e61702 100644
--- a/tools/testing/selftests/bpf/progs/tailcall2.c
+++ b/tools/testing/selftests/bpf/progs/tailcall2.c
@@ -10,50 +10,49 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 1);
+ bpf_tail_call_static(skb, &jmp_table, 1);
return 0;
}
-SEC("classifier/1")
-int bpf_func_1(struct __sk_buff *skb)
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
return 1;
}
-SEC("classifier/2")
-int bpf_func_2(struct __sk_buff *skb)
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
{
return 2;
}
-SEC("classifier/3")
-int bpf_func_3(struct __sk_buff *skb)
+SEC("tc")
+int classifier_3(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 4);
+ bpf_tail_call_static(skb, &jmp_table, 4);
return 3;
}
-SEC("classifier/4")
-int bpf_func_4(struct __sk_buff *skb)
+SEC("tc")
+int classifier_4(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 4;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
/* Check multi-prog update. */
- bpf_tail_call(skb, &jmp_table, 2);
+ bpf_tail_call_static(skb, &jmp_table, 2);
/* Check tail call limit. */
- bpf_tail_call(skb, &jmp_table, 3);
+ bpf_tail_call_static(skb, &jmp_table, 3);
return 3;
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall3.c b/tools/testing/selftests/bpf/progs/tailcall3.c
index cabda877cf0a..f60bcd7b8d4b 100644
--- a/tools/testing/selftests/bpf/progs/tailcall3.c
+++ b/tools/testing/selftests/bpf/progs/tailcall3.c
@@ -10,22 +10,21 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int count;
+int count = 0;
-SEC("classifier/0")
-int bpf_func_0(struct __sk_buff *skb)
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
{
count++;
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 1;
}
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
- bpf_tail_call(skb, &jmp_table, 0);
+ bpf_tail_call_static(skb, &jmp_table, 0);
return 0;
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall4.c b/tools/testing/selftests/bpf/progs/tailcall4.c
index f82075b47d7d..a56bbc2313ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall4.c
+++ b/tools/testing/selftests/bpf/progs/tailcall4.c
@@ -10,11 +10,11 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
bpf_tail_call(skb, &jmp_table, selector);
@@ -30,4 +30,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall5.c b/tools/testing/selftests/bpf/progs/tailcall5.c
index ce5450744fd4..8d03496eb6ca 100644
--- a/tools/testing/selftests/bpf/progs/tailcall5.c
+++ b/tools/testing/selftests/bpf/progs/tailcall5.c
@@ -10,11 +10,11 @@ struct {
__uint(value_size, sizeof(__u32));
} jmp_table SEC(".maps");
-static volatile int selector;
+int selector = 0;
#define TAIL_FUNC(x) \
- SEC("classifier/" #x) \
- int bpf_func_##x(struct __sk_buff *skb) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
{ \
return x; \
}
@@ -22,7 +22,7 @@ TAIL_FUNC(0)
TAIL_FUNC(1)
TAIL_FUNC(2)
-SEC("classifier")
+SEC("tc")
int entry(struct __sk_buff *skb)
{
int idx = 0;
@@ -37,4 +37,3 @@ int entry(struct __sk_buff *skb)
}
char __license[] SEC("license") = "GPL";
-int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/tailcall6.c b/tools/testing/selftests/bpf/progs/tailcall6.c
new file mode 100644
index 000000000000..d77b8abd62f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall6.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count, which;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count++;
+ if (__builtin_constant_p(which))
+ __bpf_unreachable();
+ bpf_tail_call(skb, &jmp_table, which);
+ return 1;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ if (__builtin_constant_p(which))
+ __bpf_unreachable();
+ bpf_tail_call(skb, &jmp_table, which);
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
new file mode 100644
index 000000000000..8c91428deb90
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf1.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+#define TAIL_FUNC(x) \
+ SEC("tc") \
+ int classifier_##x(struct __sk_buff *skb) \
+ { \
+ return x; \
+ }
+TAIL_FUNC(0)
+TAIL_FUNC(1)
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return skb->len * 2;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
new file mode 100644
index 000000000000..ce97d141daee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf2.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ if (load_byte(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return 1;
+}
+
+int count = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail(skb);
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ return 0;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
new file mode 100644
index 000000000000..99c8d1d8a187
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf3.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+__noinline
+int subprog_tail2(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ if (load_word(skb, 0) || load_half(skb, 0))
+ bpf_tail_call_static(skb, &jmp_table, 10);
+ else
+ bpf_tail_call_static(skb, &jmp_table, 1);
+
+ __sink(arr[sizeof(arr) - 1]);
+
+ return skb->len;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ volatile char arr[64] = {};
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+
+ __sink(arr[sizeof(arr) - 1]);
+
+ return skb->len * 2;
+}
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ __sink(arr[sizeof(arr) - 1]);
+
+ return subprog_tail2(skb);
+}
+
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ __sink(arr[sizeof(arr) - 1]);
+
+ return skb->len * 3;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ volatile char arr[128] = {};
+
+ __sink(arr[sizeof(arr) - 1]);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
new file mode 100644
index 000000000000..a017d6b2f1dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf4.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} nop_table SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 3);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int count = 0;
+int noise = 0;
+
+static __always_inline int subprog_noise(void)
+{
+ __u32 key = 0;
+
+ bpf_map_lookup_elem(&nop_table, &key);
+ return 0;
+}
+
+__noinline
+int subprog_tail_2(struct __sk_buff *skb)
+{
+ if (noise)
+ subprog_noise();
+ bpf_tail_call_static(skb, &jmp_table, 2);
+ return skb->len * 3;
+}
+
+__noinline
+int subprog_tail_1(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 1);
+ return skb->len * 2;
+}
+
+__noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return skb->len;
+}
+
+SEC("tc")
+int classifier_1(struct __sk_buff *skb)
+{
+ return subprog_tail_2(skb);
+}
+
+SEC("tc")
+int classifier_2(struct __sk_buff *skb)
+{
+ count++;
+ return subprog_tail_2(skb);
+}
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb)
+{
+ return subprog_tail_1(skb);
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
new file mode 100644
index 000000000000..4a9f63bea66c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf6.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define __unused __attribute__((unused))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+int done = 0;
+
+SEC("tc")
+int classifier_0(struct __sk_buff *skb __unused)
+{
+ done = 1;
+ return 0;
+}
+
+static __noinline
+int subprog_tail(struct __sk_buff *skb)
+{
+ /* Don't propagate the constant to the caller */
+ volatile int ret = 1;
+
+ bpf_tail_call_static(skb, &jmp_table, 0);
+ return ret;
+}
+
+SEC("tc")
+int entry(struct __sk_buff *skb)
+{
+ /* Have data on stack which size is not a multiple of 8 */
+ volatile char arr[1] = {};
+
+ __sink(arr[0]);
+
+ return subprog_tail(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
new file mode 100644
index 000000000000..8436c6729167
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fentry.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fentry/subprog_tail")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
new file mode 100644
index 000000000000..fe16412c6e6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_fexit.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int count = 0;
+
+SEC("fexit/subprog_tail")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ count++;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/tailcall_poke.c b/tools/testing/selftests/bpf/progs/tailcall_poke.c
new file mode 100644
index 000000000000..c78b94b75e83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tailcall_poke.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} jmp_table SEC(".maps");
+
+SEC("?fentry/bpf_fentry_test1")
+int BPF_PROG(test, int a)
+{
+ bpf_tail_call_static(ctx, &jmp_table, 0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call1, int a)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(call2, int a)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_common.h b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
new file mode 100644
index 000000000000..41f2d44f49cb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_common.h
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TASK_KFUNC_COMMON_H
+#define _TASK_KFUNC_COMMON_H
+
+#include <errno.h>
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct __tasks_kfunc_map_value {
+ struct task_struct __kptr * task;
+};
+
+struct hash_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, int);
+ __type(value, struct __tasks_kfunc_map_value);
+ __uint(max_entries, 1);
+} __tasks_kfunc_map SEC(".maps");
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+struct task_struct *bpf_task_from_pid(s32 pid) __ksym;
+void bpf_rcu_read_lock(void) __ksym;
+void bpf_rcu_read_unlock(void) __ksym;
+
+static inline struct __tasks_kfunc_map_value *tasks_kfunc_map_value_lookup(struct task_struct *p)
+{
+ s32 pid;
+ long status;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return NULL;
+
+ return bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+}
+
+static inline int tasks_kfunc_map_insert(struct task_struct *p)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &p->pid);
+ if (status)
+ return status;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v) {
+ bpf_map_delete_elem(&__tasks_kfunc_map, &pid);
+ return -ENOENT;
+ }
+
+ acquired = bpf_task_acquire(p);
+ if (!acquired)
+ return -ENOENT;
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+ if (old) {
+ bpf_task_release(old);
+ return -EEXIST;
+ }
+
+ return 0;
+}
+
+#endif /* _TASK_KFUNC_COMMON_H */
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_failure.c b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
new file mode 100644
index 000000000000..ad88a3796ddf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_failure.c
@@ -0,0 +1,326 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+static struct __tasks_kfunc_map_value *insert_lookup_task(struct task_struct *task)
+{
+ int status;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ return NULL;
+
+ return tasks_kfunc_map_value_lookup(task);
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_acquire() on an untrusted pointer. */
+ acquired = bpf_task_acquire(v->task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
+int BPF_PROG(task_kfunc_acquire_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired, *stack_task = (struct task_struct *)&clone_flags;
+
+ /* Can't invoke bpf_task_acquire() on a random frame pointer. */
+ acquired = bpf_task_acquire((struct task_struct *)&stack_task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/free_task")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("kretprobe/free_task")
+__failure __msg("calling kernel function bpf_task_acquire is not allowed")
+int BPF_PROG(task_kfunc_acquire_unsafe_kretprobe_rcu, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ bpf_rcu_read_lock();
+ if (!task) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't call bpf_task_acquire() or bpf_task_release() in an untrusted prog. */
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ bpf_task_release(acquired);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_null, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* Can't invoke bpf_task_acquire() on a NULL pointer. */
+ acquired = bpf_task_acquire(NULL);
+ if (!acquired)
+ return 0;
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(task_kfunc_acquire_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+
+ /* Acquired task is never released. */
+ __sink(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Unreleased reference")
+int BPF_PROG(task_kfunc_xchg_unreleased, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr)
+ return 0;
+
+ /* Kptr retrieved from map is never released. */
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_acquire_release_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_acquire(task);
+ /* Can't invoke bpf_task_release() on an acquired task without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_release_untrusted, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value *v;
+
+ v = insert_lookup_task(task);
+ if (!v)
+ return 0;
+
+ /* Can't invoke bpf_task_release() on an untrusted pointer. */
+ bpf_task_release(v->task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("arg#0 pointer type STRUCT task_struct must point")
+int BPF_PROG(task_kfunc_release_fp, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = (struct task_struct *)&clone_flags;
+
+ /* Cannot release random frame pointer. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_release_null, struct task_struct *task, u64 clone_flags)
+{
+ struct __tasks_kfunc_map_value local, *v;
+ long status;
+ struct task_struct *acquired, *old;
+ s32 pid;
+
+ status = bpf_probe_read_kernel(&pid, sizeof(pid), &task->pid);
+ if (status)
+ return 0;
+
+ local.task = NULL;
+ status = bpf_map_update_elem(&__tasks_kfunc_map, &pid, &local, BPF_NOEXIST);
+ if (status)
+ return status;
+
+ v = bpf_map_lookup_elem(&__tasks_kfunc_map, &pid);
+ if (!v)
+ return -ENOENT;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return -EEXIST;
+
+ old = bpf_kptr_xchg(&v->task, acquired);
+
+ /* old cannot be passed to bpf_task_release() without a NULL check. */
+ bpf_task_release(old);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("release kernel function bpf_task_release expects")
+int BPF_PROG(task_kfunc_release_unacquired, struct task_struct *task, u64 clone_flags)
+{
+ /* Cannot release trusted task pointer which was not acquired. */
+ bpf_task_release(task);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+int BPF_PROG(task_kfunc_from_pid_no_null_check, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(task->pid);
+
+ /* Releasing bpf_task_from_pid() lookup without a NULL check. */
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("lsm/task_free")
+__failure __msg("R1 must be a rcu pointer")
+int BPF_PROG(task_kfunc_from_lsm_task_free, struct task_struct *task)
+{
+ struct task_struct *acquired;
+
+ /* the argument of lsm task_free hook is untrusted. */
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ bpf_task_release(acquired);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm1, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm, 17, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("access beyond the end of member comm")
+int BPF_PROG(task_access_comm2, struct task_struct *task, u64 clone_flags)
+{
+ bpf_strncmp(task->comm + 1, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("write into memory")
+int BPF_PROG(task_access_comm3, struct task_struct *task, u64 clone_flags)
+{
+ bpf_probe_read_kernel(task->comm, 16, task->comm);
+ return 0;
+}
+
+SEC("fentry/__set_task_comm")
+__failure __msg("R1 type=ptr_ expected")
+int BPF_PROG(task_access_comm4, struct task_struct *task, const char *buf, bool exec)
+{
+ /*
+ * task->comm is a legacy ptr_to_btf_id. The verifier cannot guarantee
+ * its safety. Hence it cannot be accessed with normal load insns.
+ */
+ bpf_strncmp(task->comm, 16, "foo");
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+__failure __msg("R1 must be referenced or trusted")
+int BPF_PROG(task_kfunc_release_in_map, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *local;
+ struct __tasks_kfunc_map_value *v;
+
+ if (tasks_kfunc_map_insert(task))
+ return 0;
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v)
+ return 0;
+
+ bpf_rcu_read_lock();
+ local = v->task;
+ if (!local) {
+ bpf_rcu_read_unlock();
+ return 0;
+ }
+ /* Can't release a kptr that's still stored in a map. */
+ bpf_task_release(local);
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_kfunc_success.c b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
new file mode 100644
index 000000000000..70df695312dc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_kfunc_success.c
@@ -0,0 +1,316 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "task_kfunc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+int err, pid;
+
+/* Prototype for all of the program trace events below:
+ *
+ * TRACE_EVENT(task_newtask,
+ * TP_PROTO(struct task_struct *p, u64 clone_flags)
+ */
+
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+
+struct task_struct *bpf_task_acquire___one(struct task_struct *task) __ksym __weak;
+/* The two-param bpf_task_acquire doesn't exist */
+struct task_struct *bpf_task_acquire___two(struct task_struct *p, void *ctx) __ksym __weak;
+/* Incorrect type for first param */
+struct task_struct *bpf_task_acquire___three(void *ctx) __ksym __weak;
+
+void invalid_kfunc(void) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
+
+static bool is_test_kfunc_task(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return pid == cur_pid;
+}
+
+static int test_acquire_release(struct task_struct *task)
+{
+ struct task_struct *acquired = NULL;
+
+ if (!bpf_ksym_exists(bpf_task_acquire)) {
+ err = 3;
+ return 0;
+ }
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc)) {
+ err = 4;
+ return 0;
+ }
+ if (bpf_ksym_exists(invalid_kfunc)) {
+ /* the verifier's dead code elimination should remove this */
+ err = 5;
+ asm volatile ("goto -1"); /* for (;;); */
+ }
+
+ acquired = bpf_task_acquire(task);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 6;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_kfunc_flavor_relo, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired = NULL;
+ int fake_ctx = 42;
+
+ if (bpf_ksym_exists(bpf_task_acquire___one)) {
+ acquired = bpf_task_acquire___one(task);
+ } else if (bpf_ksym_exists(bpf_task_acquire___two)) {
+ /* Here, bpf_object__resolve_ksym_func_btf_id's find_ksym_btf_id
+ * call will find vmlinux's bpf_task_acquire, but subsequent
+ * bpf_core_types_are_compat will fail
+ */
+ acquired = bpf_task_acquire___two(task, &fake_ctx);
+ err = 3;
+ return 0;
+ } else if (bpf_ksym_exists(bpf_task_acquire___three)) {
+ /* bpf_core_types_are_compat will fail similarly to above case */
+ acquired = bpf_task_acquire___three(&fake_ctx);
+ err = 4;
+ return 0;
+ }
+
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 5;
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_kfunc_flavor_relo_not_found, struct task_struct *task, u64 clone_flags)
+{
+ /* Neither symbol should successfully resolve.
+ * Success or failure of one ___flavor should not affect others
+ */
+ if (bpf_ksym_exists(bpf_task_acquire___two))
+ err = 1;
+ else if (bpf_ksym_exists(bpf_task_acquire___three))
+ err = 2;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_argument, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(task);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_release_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ return test_acquire_release(bpf_get_current_task_btf());
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_acquire_leave_in_map, struct task_struct *task, u64 clone_flags)
+{
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status)
+ err = 1;
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_xchg_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ kptr = bpf_kptr_xchg(&v->task, NULL);
+ if (!kptr) {
+ err = 3;
+ return 0;
+ }
+
+ bpf_task_release(kptr);
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_map_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *kptr;
+ struct __tasks_kfunc_map_value *v;
+ long status;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ status = tasks_kfunc_map_insert(task);
+ if (status) {
+ err = 1;
+ return 0;
+ }
+
+ v = tasks_kfunc_map_value_lookup(task);
+ if (!v) {
+ err = 2;
+ return 0;
+ }
+
+ bpf_rcu_read_lock();
+ kptr = v->task;
+ if (!kptr) {
+ err = 3;
+ } else {
+ kptr = bpf_task_acquire(kptr);
+ if (!kptr)
+ err = 4;
+ else
+ bpf_task_release(kptr);
+ }
+ bpf_rcu_read_unlock();
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_current_acquire_release, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *current, *acquired;
+
+ if (!is_test_kfunc_task())
+ return 0;
+
+ current = bpf_get_current_task_btf();
+ acquired = bpf_task_acquire(current);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
+
+ return 0;
+}
+
+static void lookup_compare_pid(const struct task_struct *p)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(p->pid);
+ if (!acquired) {
+ err = 1;
+ return;
+ }
+
+ if (acquired->pid != p->pid)
+ err = 2;
+ bpf_task_release(acquired);
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_arg, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(task);
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_current, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ lookup_compare_pid(bpf_get_current_task_btf());
+ return 0;
+}
+
+static int is_pid_lookup_valid(s32 pid)
+{
+ struct task_struct *acquired;
+
+ acquired = bpf_task_from_pid(pid);
+ if (acquired) {
+ bpf_task_release(acquired);
+ return 1;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(test_task_from_pid_invalid, struct task_struct *task, u64 clone_flags)
+{
+ if (!is_test_kfunc_task())
+ return 0;
+
+ bpf_strncmp(task->comm, 12, "foo");
+ bpf_strncmp(task->comm, 16, "foo");
+ bpf_strncmp(&task->comm[8], 4, "foo");
+
+ if (is_pid_lookup_valid(-1)) {
+ err = 1;
+ return 0;
+ }
+
+ if (is_pid_lookup_valid(0xcafef00d)) {
+ err = 2;
+ return 0;
+ }
+
+ return 0;
+}
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(task_kfunc_acquire_trusted_walked, struct task_struct *task, u64 clone_flags)
+{
+ struct task_struct *acquired;
+
+ /* task->group_leader is listed as a trusted, non-NULL field of task struct. */
+ acquired = bpf_task_acquire(task->group_leader);
+ if (acquired)
+ bpf_task_release(acquired);
+ else
+ err = 1;
+
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage.c b/tools/testing/selftests/bpf/progs/task_local_storage.c
new file mode 100644
index 000000000000..80a0a20db88d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define MAGIC_VALUE 0xabcd1234
+
+pid_t target_pid = 0;
+int mismatch_cnt = 0;
+int enter_cnt = 0;
+int exit_cnt = 0;
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&enter_cnt, 1);
+ *ptr = MAGIC_VALUE + enter_cnt;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_exit")
+int BPF_PROG(on_exit, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (task->pid != target_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&enter_id, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!ptr)
+ return 0;
+
+ __sync_fetch_and_add(&exit_cnt, 1);
+ if (*ptr != MAGIC_VALUE + exit_cnt)
+ __sync_fetch_and_add(&mismatch_cnt, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
new file mode 100644
index 000000000000..41d88ed222ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_local_storage_exit_creds.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, __u64);
+} task_storage SEC(".maps");
+
+int run_count = 0;
+int valid_ptr_count = 0;
+int null_ptr_count = 0;
+
+SEC("fentry/exit_creds")
+int BPF_PROG(trace_exit_creds, struct task_struct *task)
+{
+ __u64 *ptr;
+
+ ptr = bpf_task_storage_get(&task_storage, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ __sync_fetch_and_add(&valid_ptr_count, 1);
+ else
+ __sync_fetch_and_add(&null_ptr_count, 1);
+
+ __sync_fetch_and_add(&run_count, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_ls_recursion.c b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
new file mode 100644
index 000000000000..f1853c38aada
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_ls_recursion.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
+char _license[] SEC("license") = "GPL";
+int nr_del_errs = 0;
+int test_pid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_a SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} map_b SEC(".maps");
+
+SEC("fentry/bpf_local_storage_update")
+int BPF_PROG(on_update)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ long *ptr;
+
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ /* ptr will not be NULL when it is called from
+ * the bpf_task_storage_get(&map_b,...F_CREATE) in
+ * the BPF_PROG(on_enter) below. It is because
+ * the value can be found in map_a and the kernel
+ * does not need to acquire any spin_lock.
+ */
+ if (ptr) {
+ int err;
+
+ *ptr += 1;
+ err = bpf_task_storage_delete(&map_a, task);
+ if (err == -EBUSY)
+ nr_del_errs++;
+ }
+
+ /* This will still fail because map_b is empty and
+ * this BPF_PROG(on_update) has failed to acquire
+ * the percpu busy lock => meaning potential
+ * deadlock is detected and it will fail to create
+ * new storage.
+ */
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr)
+ *ptr += 1;
+
+ return 0;
+}
+
+SEC("tp_btf/sys_enter")
+int BPF_PROG(on_enter, struct pt_regs *regs, long id)
+{
+ struct task_struct *task;
+ long *ptr;
+
+ task = bpf_get_current_task_btf();
+ if (!test_pid || task->pid != test_pid)
+ return 0;
+
+ ptr = bpf_task_storage_get(&map_a, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr && !*ptr)
+ *ptr = 200;
+
+ ptr = bpf_task_storage_get(&map_b, task, 0,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (ptr && !*ptr)
+ *ptr = 100;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
new file mode 100644
index 000000000000..ea2dbb80f7b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/task_storage_nodeadlock.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef EBUSY
+#define EBUSY 16
+#endif
+
+extern bool CONFIG_PREEMPT __kconfig __weak;
+int nr_get_errs = 0;
+int nr_del_errs = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} task_storage SEC(".maps");
+
+SEC("lsm.s/socket_post_create")
+int BPF_PROG(socket_post_create, struct socket *sock, int family, int type,
+ int protocol, int kern)
+{
+ struct task_struct *task;
+ int ret, zero = 0;
+ int *value;
+
+ if (!CONFIG_PREEMPT)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+ value = bpf_task_storage_get(&task_storage, task, &zero,
+ BPF_LOCAL_STORAGE_GET_F_CREATE);
+ if (!value)
+ __sync_fetch_and_add(&nr_get_errs, 1);
+
+ ret = bpf_task_storage_delete(&task_storage,
+ bpf_get_current_task_btf());
+ if (ret == -EBUSY)
+ __sync_fetch_and_add(&nr_del_errs, 1);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
new file mode 100644
index 000000000000..7bb872fb22dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_incompl_cong_ops.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/incompl_cong_ops_ssthresh")
+__u32 BPF_PROG(incompl_cong_ops_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/incompl_cong_ops_undo_cwnd")
+__u32 BPF_PROG(incompl_cong_ops_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops incompl_cong_ops = {
+ /* Intentionally leaving out any of the required cong_avoid() and
+ * cong_control() here.
+ */
+ .ssthresh = (void *)incompl_cong_ops_ssthresh,
+ .undo_cwnd = (void *)incompl_cong_ops_undo_cwnd,
+ .name = "bpf_incompl_ops",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
new file mode 100644
index 000000000000..c06f4a41c21a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_unsupp_cong_op.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops/unsupp_cong_op_get_info")
+size_t BPF_PROG(unsupp_cong_op_get_info, struct sock *sk, u32 ext, int *attr,
+ union tcp_cc_info *info)
+{
+ return 0;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops unsupp_cong_op = {
+ .get_info = (void *)unsupp_cong_op_get_info,
+ .name = "bpf_unsupp_op",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_update.c b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
new file mode 100644
index 000000000000..b93a0ed33057
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_update.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int ca1_cnt = 0;
+int ca2_cnt = 0;
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+SEC("struct_ops/ca_update_1_init")
+void BPF_PROG(ca_update_1_init, struct sock *sk)
+{
+ ca1_cnt++;
+}
+
+SEC("struct_ops/ca_update_2_init")
+void BPF_PROG(ca_update_2_init, struct sock *sk)
+{
+ ca2_cnt++;
+}
+
+SEC("struct_ops/ca_update_cong_control")
+void BPF_PROG(ca_update_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+}
+
+SEC("struct_ops/ca_update_ssthresh")
+__u32 BPF_PROG(ca_update_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/ca_update_undo_cwnd")
+__u32 BPF_PROG(ca_update_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_1 = {
+ .init = (void *)ca_update_1_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_update_2 = {
+ .init = (void *)ca_update_2_init,
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_update",
+};
+
+SEC(".struct_ops.link")
+struct tcp_congestion_ops ca_wrong = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_wrong",
+};
+
+SEC(".struct_ops")
+struct tcp_congestion_ops ca_no_link = {
+ .cong_control = (void *)ca_update_cong_control,
+ .ssthresh = (void *)ca_update_ssthresh,
+ .undo_cwnd = (void *)ca_update_undo_cwnd,
+ .name = "tcp_ca_no_link",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
new file mode 100644
index 000000000000..0724a79cec78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tcp_ca_write_sk_pacing.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define USEC_PER_SEC 1000000UL
+
+#define min(a, b) ((a) < (b) ? (a) : (b))
+
+static inline struct tcp_sock *tcp_sk(const struct sock *sk)
+{
+ return (struct tcp_sock *)sk;
+}
+
+static inline unsigned int tcp_left_out(const struct tcp_sock *tp)
+{
+ return tp->sacked_out + tp->lost_out;
+}
+
+static inline unsigned int tcp_packets_in_flight(const struct tcp_sock *tp)
+{
+ return tp->packets_out - tcp_left_out(tp) + tp->retrans_out;
+}
+
+SEC("struct_ops/write_sk_pacing_init")
+void BPF_PROG(write_sk_pacing_init, struct sock *sk)
+{
+#ifdef ENABLE_ATOMICS_TESTS
+ __sync_bool_compare_and_swap(&sk->sk_pacing_status, SK_PACING_NONE,
+ SK_PACING_NEEDED);
+#else
+ sk->sk_pacing_status = SK_PACING_NEEDED;
+#endif
+}
+
+SEC("struct_ops/write_sk_pacing_cong_control")
+void BPF_PROG(write_sk_pacing_cong_control, struct sock *sk,
+ const struct rate_sample *rs)
+{
+ struct tcp_sock *tp = tcp_sk(sk);
+ unsigned long rate =
+ ((tp->snd_cwnd * tp->mss_cache * USEC_PER_SEC) << 3) /
+ (tp->srtt_us ?: 1U << 3);
+ sk->sk_pacing_rate = min(rate, sk->sk_max_pacing_rate);
+ tp->app_limited = (tp->delivered + tcp_packets_in_flight(tp)) ?: 1;
+}
+
+SEC("struct_ops/write_sk_pacing_ssthresh")
+__u32 BPF_PROG(write_sk_pacing_ssthresh, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_ssthresh;
+}
+
+SEC("struct_ops/write_sk_pacing_undo_cwnd")
+__u32 BPF_PROG(write_sk_pacing_undo_cwnd, struct sock *sk)
+{
+ return tcp_sk(sk)->snd_cwnd;
+}
+
+SEC(".struct_ops")
+struct tcp_congestion_ops write_sk_pacing = {
+ .init = (void *)write_sk_pacing_init,
+ .cong_control = (void *)write_sk_pacing_cong_control,
+ .ssthresh = (void *)write_sk_pacing_ssthresh,
+ .undo_cwnd = (void *)write_sk_pacing_undo_cwnd,
+ .name = "bpf_w_sk_pacing",
+};
diff --git a/tools/testing/selftests/bpf/progs/tcp_rtt.c b/tools/testing/selftests/bpf/progs/tcp_rtt.c
index 0cb3204ddb18..0988d79f1587 100644
--- a/tools/testing/selftests/bpf/progs/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/progs/tcp_rtt.c
@@ -3,7 +3,6 @@
#include <bpf/bpf_helpers.h>
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1;
struct tcp_rtt_storage {
__u32 invoked;
diff --git a/tools/testing/selftests/bpf/progs/test_access_variable_array.c b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
new file mode 100644
index 000000000000..808c49b79889
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_access_variable_array.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+unsigned long span = 0;
+
+SEC("fentry/load_balance")
+int BPF_PROG(fentry_fentry, int this_cpu, struct rq *this_rq,
+ struct sched_domain *sd)
+{
+ span = sd->span[0];
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_assign_reuse.c b/tools/testing/selftests/bpf/progs/test_assign_reuse.c
new file mode 100644
index 000000000000..4f2e2321ea06
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_assign_reuse.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/pkt_cls.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+__u64 sk_cookie_seen;
+__u64 reuseport_executed;
+union {
+ struct tcphdr tcp;
+ struct udphdr udp;
+} headers;
+
+const volatile __u16 dest_port;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sk_map SEC(".maps");
+
+SEC("sk_reuseport")
+int reuse_accept(struct sk_reuseport_md *ctx)
+{
+ reuseport_executed++;
+
+ if (ctx->ip_protocol == IPPROTO_TCP) {
+ if (ctx->data + sizeof(headers.tcp) > ctx->data_end)
+ return SK_DROP;
+
+ if (__builtin_memcmp(&headers.tcp, ctx->data, sizeof(headers.tcp)) != 0)
+ return SK_DROP;
+ } else if (ctx->ip_protocol == IPPROTO_UDP) {
+ if (ctx->data + sizeof(headers.udp) > ctx->data_end)
+ return SK_DROP;
+
+ if (__builtin_memcmp(&headers.udp, ctx->data, sizeof(headers.udp)) != 0)
+ return SK_DROP;
+ } else {
+ return SK_DROP;
+ }
+
+ sk_cookie_seen = bpf_get_socket_cookie(ctx->sk);
+ return SK_PASS;
+}
+
+SEC("sk_reuseport")
+int reuse_drop(struct sk_reuseport_md *ctx)
+{
+ reuseport_executed++;
+ sk_cookie_seen = 0;
+ return SK_DROP;
+}
+
+static int
+assign_sk(struct __sk_buff *skb)
+{
+ int zero = 0, ret = 0;
+ struct bpf_sock *sk;
+
+ sk = bpf_map_lookup_elem(&sk_map, &zero);
+ if (!sk)
+ return TC_ACT_SHOT;
+ ret = bpf_sk_assign(skb, sk, 0);
+ bpf_sk_release(sk);
+ return ret ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static bool
+maybe_assign_tcp(struct __sk_buff *skb, struct tcphdr *th)
+{
+ if (th + 1 > (void *)(long)(skb->data_end))
+ return TC_ACT_SHOT;
+
+ if (!th->syn || th->ack || th->dest != bpf_htons(dest_port))
+ return TC_ACT_OK;
+
+ __builtin_memcpy(&headers.tcp, th, sizeof(headers.tcp));
+ return assign_sk(skb);
+}
+
+static bool
+maybe_assign_udp(struct __sk_buff *skb, struct udphdr *uh)
+{
+ if (uh + 1 > (void *)(long)(skb->data_end))
+ return TC_ACT_SHOT;
+
+ if (uh->dest != bpf_htons(dest_port))
+ return TC_ACT_OK;
+
+ __builtin_memcpy(&headers.udp, uh, sizeof(headers.udp));
+ return assign_sk(skb);
+}
+
+SEC("tc")
+int tc_main(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct ethhdr *eth;
+
+ eth = (struct ethhdr *)(data);
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *iph = (struct iphdr *)(data + sizeof(*eth));
+
+ if (iph + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (iph->protocol == IPPROTO_TCP)
+ return maybe_assign_tcp(skb, (struct tcphdr *)(iph + 1));
+ else if (iph->protocol == IPPROTO_UDP)
+ return maybe_assign_udp(skb, (struct udphdr *)(iph + 1));
+ else
+ return TC_ACT_SHOT;
+ } else {
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(data + sizeof(*eth));
+
+ if (ip6h + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ return maybe_assign_tcp(skb, (struct tcphdr *)(ip6h + 1));
+ else if (ip6h->nexthdr == IPPROTO_UDP)
+ return maybe_assign_udp(skb, (struct udphdr *)(ip6h + 1));
+ else
+ return TC_ACT_SHOT;
+ }
+}
diff --git a/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
new file mode 100644
index 000000000000..f548b7446218
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_kprobe_sleepable.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+
+/**
+ * This program will be manually made sleepable on the userspace side
+ * and should thus be unattachable.
+ */
+SEC("kprobe/" SYS_PREFIX "sys_nanosleep")
+int handle_kprobe_sleepable(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c
index 8056a4c6d918..68466a6ad18c 100644
--- a/tools/testing/selftests/bpf/progs/test_attach_probe.c
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c
@@ -1,42 +1,125 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
-int kprobe_res = 0;
-int kretprobe_res = 0;
-int uprobe_res = 0;
-int uretprobe_res = 0;
+int kprobe2_res = 0;
+int kretprobe2_res = 0;
+int uprobe_byname_res = 0;
+int uretprobe_byname_res = 0;
+int uprobe_byname2_res = 0;
+int uretprobe_byname2_res = 0;
+int uprobe_byname3_sleepable_res = 0;
+int uprobe_byname3_res = 0;
+int uretprobe_byname3_sleepable_res = 0;
+int uretprobe_byname3_res = 0;
+void *user_ptr = 0;
-SEC("kprobe/sys_nanosleep")
-int handle_kprobe(struct pt_regs *ctx)
+SEC("ksyscall/nanosleep")
+int BPF_KSYSCALL(handle_kprobe_auto, struct __kernel_timespec *req, struct __kernel_timespec *rem)
{
- kprobe_res = 1;
+ kprobe2_res = 11;
return 0;
}
-SEC("kretprobe/sys_nanosleep")
-int BPF_KRETPROBE(handle_kretprobe)
+SEC("kretsyscall/nanosleep")
+int BPF_KRETPROBE(handle_kretprobe_auto, int ret)
+{
+ kretprobe2_res = 22;
+ return ret;
+}
+
+SEC("uprobe")
+int handle_uprobe_ref_ctr(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe_ref_ctr(struct pt_regs *ctx)
{
- kretprobe_res = 2;
return 0;
}
-SEC("uprobe/trigger_func")
-int handle_uprobe(struct pt_regs *ctx)
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
{
- uprobe_res = 3;
+ uprobe_byname_res = 5;
return 0;
}
-SEC("uretprobe/trigger_func")
-int handle_uretprobe(struct pt_regs *ctx)
+/* use auto-attach format for section definition. */
+SEC("uretprobe//proc/self/exe:trigger_func2")
+int handle_uretprobe_byname(struct pt_regs *ctx)
{
- uretprobe_res = 4;
+ uretprobe_byname_res = 6;
return 0;
}
+SEC("uprobe")
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
+{
+ char mode_buf[2] = {};
+
+ /* verify fopen mode */
+ bpf_probe_read_user(mode_buf, sizeof(mode_buf), mode);
+ if (mode_buf[0] == 'r' && mode_buf[1] == 0)
+ uprobe_byname2_res = 7;
+ return 0;
+}
+
+SEC("uretprobe")
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
+{
+ uretprobe_byname2_res = 8;
+ return 0;
+}
+
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+SEC("uprobe.s//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uprobe_byname3_sleepable_res = 9;
+ return 0;
+}
+
+/**
+ * same target as the uprobe.s above to force sleepable and non-sleepable
+ * programs in the same bpf_prog_array
+ */
+SEC("uprobe//proc/self/exe:trigger_func3")
+int handle_uprobe_byname3(struct pt_regs *ctx)
+{
+ uprobe_byname3_res = 10;
+ return 0;
+}
+
+SEC("uretprobe.s//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3_sleepable(struct pt_regs *ctx)
+{
+ if (verify_sleepable_user_copy())
+ uretprobe_byname3_sleepable_res = 11;
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:trigger_func3")
+int handle_uretprobe_byname3(struct pt_regs *ctx)
+{
+ uretprobe_byname3_res = 12;
+ return 0;
+}
+
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
new file mode 100644
index 000000000000..7f08bce94596
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_attach_probe_manual.c
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+int kprobe_res = 0;
+int kretprobe_res = 0;
+int uprobe_res = 0;
+int uretprobe_res = 0;
+int uprobe_byname_res = 0;
+void *user_ptr = 0;
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ kprobe_res = 1;
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ kretprobe_res = 2;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ uprobe_res = 3;
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ uretprobe_res = 4;
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe_byname(struct pt_regs *ctx)
+{
+ uprobe_byname_res = 5;
+ return 0;
+}
+
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_autoattach.c b/tools/testing/selftests/bpf/progs/test_autoattach.c
new file mode 100644
index 000000000000..11a44493ebce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoattach.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_autoload.c b/tools/testing/selftests/bpf/progs/test_autoload.c
new file mode 100644
index 000000000000..62c8cdec6d5d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_autoload.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+bool prog1_called = false;
+bool prog2_called = false;
+bool prog3_called = false;
+
+SEC("raw_tp/sys_enter")
+int prog1(const void *ctx)
+{
+ prog1_called = true;
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(const void *ctx)
+{
+ prog2_called = true;
+ return 0;
+}
+
+struct fake_kernel_struct {
+ int whatever;
+} __attribute__((preserve_access_index));
+
+SEC("fentry/unexisting-kprobe-will-fail-if-loaded")
+int prog3(const void *ctx)
+{
+ struct fake_kernel_struct *fake = (void *)ctx;
+ fake->whatever = 123;
+ prog3_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_cookie.c b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
new file mode 100644
index 000000000000..5a3a80f751c4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_cookie.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+
+int my_tid;
+
+__u64 kprobe_res;
+__u64 kprobe_multi_res;
+__u64 kretprobe_res;
+__u64 uprobe_res;
+__u64 uretprobe_res;
+__u64 tp_res;
+__u64 pe_res;
+__u64 fentry_res;
+__u64 fexit_res;
+__u64 fmod_ret_res;
+__u64 lsm_res;
+
+static void update(void *ctx, __u64 *res)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return;
+
+ *res |= bpf_get_attach_cookie(ctx);
+}
+
+SEC("kprobe")
+int handle_kprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kprobe_res);
+ return 0;
+}
+
+SEC("kretprobe")
+int handle_kretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &kretprobe_res);
+ return 0;
+}
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uprobe_res);
+ return 0;
+}
+
+SEC("uretprobe")
+int handle_uretprobe(struct pt_regs *ctx)
+{
+ update(ctx, &uretprobe_res);
+ return 0;
+}
+
+/* bpf_prog_array, used by kernel internally to keep track of attached BPF
+ * programs to a given BPF hook (e.g., for tracepoints) doesn't allow the same
+ * BPF program to be attached multiple times. So have three identical copies
+ * ready to attach to the same tracepoint.
+ */
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp1(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp2(struct pt_regs *ctx)
+{
+ update(ctx, &tp_res);
+ return 0;
+}
+SEC("tp/syscalls/sys_enter_nanosleep")
+int handle_tp3(void *ctx)
+{
+ update(ctx, &tp_res);
+ return 1;
+}
+
+SEC("perf_event")
+int handle_pe(struct pt_regs *ctx)
+{
+ update(ctx, &pe_res);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(fentry_test1, int a)
+{
+ update(ctx, &fentry_res);
+ return 0;
+}
+
+SEC("fexit/bpf_fentry_test1")
+int BPF_PROG(fexit_test1, int a, int ret)
+{
+ update(ctx, &fexit_res);
+ return 0;
+}
+
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int _a, int *_b, int _ret)
+{
+ update(ctx, &fmod_ret_res);
+ return 1234;
+}
+
+SEC("lsm/file_mprotect")
+int BPF_PROG(test_int_hook, struct vm_area_struct *vma,
+ unsigned long reqprot, unsigned long prot, int ret)
+{
+ if (my_tid != (u32)bpf_get_current_pid_tgid())
+ return ret;
+ update(ctx, &lsm_res);
+ return -EPERM;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_ma.c b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
new file mode 100644
index 000000000000..3494ca30fa7f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_ma.c
@@ -0,0 +1,289 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023. Huawei Technologies Co., Ltd */
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_experimental.h"
+#include "bpf_misc.h"
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
+
+struct generic_map_value {
+ void *data;
+};
+
+char _license[] SEC("license") = "GPL";
+
+const unsigned int data_sizes[] = {16, 32, 64, 96, 128, 192, 256, 512, 1024, 2048, 4096};
+const volatile unsigned int data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
+const unsigned int percpu_data_sizes[] = {8, 16, 32, 64, 96, 128, 192, 256, 512};
+const volatile unsigned int percpu_data_btf_ids[ARRAY_SIZE(data_sizes)] = {};
+
+int err = 0;
+u32 pid = 0;
+
+#define DEFINE_ARRAY_WITH_KPTR(_size) \
+ struct bin_data_##_size { \
+ char data[_size - sizeof(void *)]; \
+ }; \
+ /* See Commit 5d8d6634ccc, force btf generation for type bin_data_##_size */ \
+ struct bin_data_##_size *__bin_data_##_size; \
+ struct map_value_##_size { \
+ struct bin_data_##_size __kptr * data; \
+ }; \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_ARRAY); \
+ __type(key, int); \
+ __type(value, struct map_value_##_size); \
+ __uint(max_entries, 128); \
+ } array_##_size SEC(".maps")
+
+#define DEFINE_ARRAY_WITH_PERCPU_KPTR(_size) \
+ struct percpu_bin_data_##_size { \
+ char data[_size]; \
+ }; \
+ struct percpu_bin_data_##_size *__percpu_bin_data_##_size; \
+ struct map_value_percpu_##_size { \
+ struct percpu_bin_data_##_size __percpu_kptr * data; \
+ }; \
+ struct { \
+ __uint(type, BPF_MAP_TYPE_ARRAY); \
+ __type(key, int); \
+ __type(value, struct map_value_percpu_##_size); \
+ __uint(max_entries, 128); \
+ } array_percpu_##_size SEC(".maps")
+
+static __always_inline void batch_alloc(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old, *new;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 1;
+ return;
+ }
+ new = bpf_obj_new_impl(data_btf_ids[idx], NULL);
+ if (!new) {
+ err = 2;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old) {
+ bpf_obj_drop(old);
+ err = 3;
+ return;
+ }
+ }
+}
+
+static __always_inline void batch_free(struct bpf_map *map, unsigned int batch, unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 4;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (!old) {
+ err = 5;
+ return;
+ }
+ bpf_obj_drop(old);
+ }
+}
+
+static __always_inline void batch_percpu_alloc(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old, *new;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 1;
+ return;
+ }
+ /* per-cpu allocator may not be able to refill in time */
+ new = bpf_percpu_obj_new_impl(percpu_data_btf_ids[idx], NULL);
+ if (!new)
+ continue;
+
+ old = bpf_kptr_xchg(&value->data, new);
+ if (old) {
+ bpf_percpu_obj_drop(old);
+ err = 2;
+ return;
+ }
+ }
+}
+
+static __always_inline void batch_percpu_free(struct bpf_map *map, unsigned int batch,
+ unsigned int idx)
+{
+ struct generic_map_value *value;
+ unsigned int i, key;
+ void *old;
+
+ for (i = 0; i < batch; i++) {
+ key = i;
+ value = bpf_map_lookup_elem(map, &key);
+ if (!value) {
+ err = 3;
+ return;
+ }
+ old = bpf_kptr_xchg(&value->data, NULL);
+ if (!old)
+ continue;
+ bpf_percpu_obj_drop(old);
+ }
+}
+
+#define CALL_BATCH_ALLOC(size, batch, idx) \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx)
+
+#define CALL_BATCH_ALLOC_FREE(size, batch, idx) \
+ do { \
+ batch_alloc((struct bpf_map *)(&array_##size), batch, idx); \
+ batch_free((struct bpf_map *)(&array_##size), batch, idx); \
+ } while (0)
+
+#define CALL_BATCH_PERCPU_ALLOC(size, batch, idx) \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx)
+
+#define CALL_BATCH_PERCPU_ALLOC_FREE(size, batch, idx) \
+ do { \
+ batch_percpu_alloc((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ batch_percpu_free((struct bpf_map *)(&array_percpu_##size), batch, idx); \
+ } while (0)
+
+/* kptr doesn't support bin_data_8 which is a zero-sized array */
+DEFINE_ARRAY_WITH_KPTR(16);
+DEFINE_ARRAY_WITH_KPTR(32);
+DEFINE_ARRAY_WITH_KPTR(64);
+DEFINE_ARRAY_WITH_KPTR(96);
+DEFINE_ARRAY_WITH_KPTR(128);
+DEFINE_ARRAY_WITH_KPTR(192);
+DEFINE_ARRAY_WITH_KPTR(256);
+DEFINE_ARRAY_WITH_KPTR(512);
+DEFINE_ARRAY_WITH_KPTR(1024);
+DEFINE_ARRAY_WITH_KPTR(2048);
+DEFINE_ARRAY_WITH_KPTR(4096);
+
+DEFINE_ARRAY_WITH_PERCPU_KPTR(8);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(16);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(32);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(64);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(96);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(128);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(192);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(256);
+DEFINE_ARRAY_WITH_PERCPU_KPTR(512);
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_alloc_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes objects in batch to trigger refilling,
+ * then free 128 16-bytes objects in batch to trigger freeing.
+ */
+ CALL_BATCH_ALLOC_FREE(16, 128, 0);
+ CALL_BATCH_ALLOC_FREE(32, 128, 1);
+ CALL_BATCH_ALLOC_FREE(64, 128, 2);
+ CALL_BATCH_ALLOC_FREE(96, 128, 3);
+ CALL_BATCH_ALLOC_FREE(128, 128, 4);
+ CALL_BATCH_ALLOC_FREE(192, 128, 5);
+ CALL_BATCH_ALLOC_FREE(256, 128, 6);
+ CALL_BATCH_ALLOC_FREE(512, 64, 7);
+ CALL_BATCH_ALLOC_FREE(1024, 32, 8);
+ CALL_BATCH_ALLOC_FREE(2048, 16, 9);
+ CALL_BATCH_ALLOC_FREE(4096, 8, 10);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 16-bytes objects in batch to trigger refilling,
+ * then free these objects through map free.
+ */
+ CALL_BATCH_ALLOC(16, 128, 0);
+ CALL_BATCH_ALLOC(32, 128, 1);
+ CALL_BATCH_ALLOC(64, 128, 2);
+ CALL_BATCH_ALLOC(96, 128, 3);
+ CALL_BATCH_ALLOC(128, 128, 4);
+ CALL_BATCH_ALLOC(192, 128, 5);
+ CALL_BATCH_ALLOC(256, 128, 6);
+ CALL_BATCH_ALLOC(512, 64, 7);
+ CALL_BATCH_ALLOC(1024, 32, 8);
+ CALL_BATCH_ALLOC(2048, 16, 9);
+ CALL_BATCH_ALLOC(4096, 8, 10);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_batch_percpu_alloc_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
+ * then free 128 8-bytes per-cpu objects in batch to trigger freeing.
+ */
+ CALL_BATCH_PERCPU_ALLOC_FREE(8, 128, 0);
+ CALL_BATCH_PERCPU_ALLOC_FREE(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC_FREE(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC_FREE(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC_FREE(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC_FREE(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC_FREE(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC_FREE(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC_FREE(512, 64, 8);
+
+ return 0;
+}
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+int test_percpu_free_through_map_free(void *ctx)
+{
+ if ((u32)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Alloc 128 8-bytes per-cpu objects in batch to trigger refilling,
+ * then free these object through map free.
+ */
+ CALL_BATCH_PERCPU_ALLOC(8, 128, 0);
+ CALL_BATCH_PERCPU_ALLOC(16, 128, 1);
+ CALL_BATCH_PERCPU_ALLOC(32, 128, 2);
+ CALL_BATCH_PERCPU_ALLOC(64, 128, 3);
+ CALL_BATCH_PERCPU_ALLOC(96, 128, 4);
+ CALL_BATCH_PERCPU_ALLOC(128, 128, 5);
+ CALL_BATCH_PERCPU_ALLOC(192, 128, 6);
+ CALL_BATCH_PERCPU_ALLOC(256, 128, 7);
+ CALL_BATCH_PERCPU_ALLOC(512, 64, 8);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf.c b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
new file mode 100644
index 000000000000..77ad8adf68da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define EAFNOSUPPORT 97
+#define EPROTO 71
+#define ENONET 64
+#define EINVAL 22
+#define ENOENT 2
+
+extern unsigned long CONFIG_HZ __kconfig;
+
+int test_einval_bpf_tuple = 0;
+int test_einval_reserved = 0;
+int test_einval_netns_id = 0;
+int test_einval_len_opts = 0;
+int test_eproto_l4proto = 0;
+int test_enonet_netns_id = 0;
+int test_enoent_lookup = 0;
+int test_eafnosupport = 0;
+int test_alloc_entry = -EINVAL;
+int test_insert_entry = -EAFNOSUPPORT;
+int test_succ_lookup = -ENOENT;
+u32 test_delta_timeout = 0;
+u32 test_status = 0;
+u32 test_insert_lookup_mark = 0;
+int test_snat_addr = -EINVAL;
+int test_dnat_addr = -EINVAL;
+__be32 saddr = 0;
+__be16 sport = 0;
+__be32 daddr = 0;
+__be16 dport = 0;
+int test_exist_lookup = -ENOENT;
+u32 test_exist_lookup_mark = 0;
+
+enum nf_nat_manip_type___local {
+ NF_NAT_MANIP_SRC___local,
+ NF_NAT_MANIP_DST___local
+};
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_xdp_ct_alloc(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_nat_info(struct nf_conn *, union nf_inet_addr *,
+ int port, enum nf_nat_manip_type___local) __ksym;
+
+static __always_inline void
+nf_ct_test(struct nf_conn *(*lookup_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ struct nf_conn *(*alloc_fn)(void *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32),
+ void *ctx)
+{
+ struct bpf_ct_opts___local opts_def = { .l4proto = IPPROTO_TCP, .netns_id = -1 };
+ struct bpf_sock_tuple bpf_tuple;
+ struct nf_conn *ct;
+
+ __builtin_memset(&bpf_tuple, 0, sizeof(bpf_tuple.ipv4));
+
+ ct = lookup_fn(ctx, NULL, 0, &opts_def, sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_bpf_tuple = opts_def.error;
+
+ opts_def.reserved[0] = 1;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.reserved[0] = 0;
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_reserved = opts_def.error;
+
+ opts_def.netns_id = -2;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_netns_id = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def) - 1);
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_einval_len_opts = opts_def.error;
+
+ opts_def.l4proto = IPPROTO_ICMP;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.l4proto = IPPROTO_TCP;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eproto_l4proto = opts_def.error;
+
+ opts_def.netns_id = 0xf00f;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ opts_def.netns_id = -1;
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enonet_netns_id = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_enoent_lookup = opts_def.error;
+
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4) - 1, &opts_def,
+ sizeof(opts_def));
+ if (ct)
+ bpf_ct_release(ct);
+ else
+ test_eafnosupport = opts_def.error;
+
+ bpf_tuple.ipv4.saddr = bpf_get_prandom_u32(); /* src IP */
+ bpf_tuple.ipv4.daddr = bpf_get_prandom_u32(); /* dst IP */
+ bpf_tuple.ipv4.sport = bpf_get_prandom_u32(); /* src port */
+ bpf_tuple.ipv4.dport = bpf_get_prandom_u32(); /* dst port */
+
+ ct = alloc_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ __u16 sport = bpf_get_prandom_u32();
+ __u16 dport = bpf_get_prandom_u32();
+ union nf_inet_addr saddr = {};
+ union nf_inet_addr daddr = {};
+ struct nf_conn *ct_ins;
+
+ bpf_ct_set_timeout(ct, 10000);
+ ct->mark = 77;
+
+ /* snat */
+ saddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &saddr, sport, NF_NAT_MANIP_SRC___local);
+ /* dnat */
+ daddr.ip = bpf_get_prandom_u32();
+ bpf_ct_set_nat_info(ct, &daddr, dport, NF_NAT_MANIP_DST___local);
+
+ ct_ins = bpf_ct_insert_entry(ct);
+ if (ct_ins) {
+ struct nf_conn *ct_lk;
+
+ ct_lk = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4),
+ &opts_def, sizeof(opts_def));
+ if (ct_lk) {
+ struct nf_conntrack_tuple *tuple;
+
+ /* check snat and dnat addresses */
+ tuple = &ct_lk->tuplehash[IP_CT_DIR_REPLY].tuple;
+ if (tuple->dst.u3.ip == saddr.ip &&
+ tuple->dst.u.all == bpf_htons(sport))
+ test_snat_addr = 0;
+ if (tuple->src.u3.ip == daddr.ip &&
+ tuple->src.u.all == bpf_htons(dport))
+ test_dnat_addr = 0;
+
+ /* update ct entry timeout */
+ bpf_ct_change_timeout(ct_lk, 10000);
+ test_delta_timeout = ct_lk->timeout - bpf_jiffies64();
+ test_delta_timeout /= CONFIG_HZ;
+ test_insert_lookup_mark = ct_lk->mark;
+ bpf_ct_change_status(ct_lk,
+ IPS_CONFIRMED | IPS_SEEN_REPLY);
+ test_status = ct_lk->status;
+
+ bpf_ct_release(ct_lk);
+ test_succ_lookup = 0;
+ }
+ bpf_ct_release(ct_ins);
+ test_insert_entry = 0;
+ }
+ test_alloc_entry = 0;
+ }
+
+ bpf_tuple.ipv4.saddr = saddr;
+ bpf_tuple.ipv4.daddr = daddr;
+ bpf_tuple.ipv4.sport = sport;
+ bpf_tuple.ipv4.dport = dport;
+ ct = lookup_fn(ctx, &bpf_tuple, sizeof(bpf_tuple.ipv4), &opts_def,
+ sizeof(opts_def));
+ if (ct) {
+ test_exist_lookup = 0;
+ if (ct->mark == 42) {
+ ct->mark++;
+ test_exist_lookup_mark = ct->mark;
+ }
+ bpf_ct_release(ct);
+ } else {
+ test_exist_lookup = opts_def.error;
+ }
+}
+
+SEC("xdp")
+int nf_xdp_ct_test(struct xdp_md *ctx)
+{
+ nf_ct_test((void *)bpf_xdp_ct_lookup, (void *)bpf_xdp_ct_alloc, ctx);
+ return 0;
+}
+
+SEC("tc")
+int nf_skb_ct_test(struct __sk_buff *ctx)
+{
+ nf_ct_test((void *)bpf_skb_ct_lookup, (void *)bpf_skb_ct_alloc, ctx);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
new file mode 100644
index 000000000000..0e4759ab38ff
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_bpf_nf_fail.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct nf_conn;
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 reserved[3];
+} __attribute__((preserve_access_index));
+
+struct nf_conn *bpf_skb_ct_alloc(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *, struct bpf_sock_tuple *, u32,
+ struct bpf_ct_opts___local *, u32) __ksym;
+struct nf_conn *bpf_ct_insert_entry(struct nf_conn *) __ksym;
+void bpf_ct_release(struct nf_conn *) __ksym;
+void bpf_ct_set_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_timeout(struct nf_conn *, u32) __ksym;
+int bpf_ct_set_status(struct nf_conn *, u32) __ksym;
+int bpf_ct_change_status(struct nf_conn *, u32) __ksym;
+
+SEC("?tc")
+int alloc_release(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_release(ct);
+ return 0;
+}
+
+SEC("?tc")
+int insert_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int lookup_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_insert_entry(ct);
+ return 0;
+}
+
+SEC("?tc")
+int write_not_allowlisted_field(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_lookup(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct->status = 0xF00;
+ return 0;
+}
+
+SEC("?tc")
+int set_timeout_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int set_status_after_insert(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ ct = bpf_ct_insert_entry(ct);
+ if (!ct)
+ return 0;
+ bpf_ct_set_status(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_timeout_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_timeout(ct, 0);
+ return 0;
+}
+
+SEC("?tc")
+int change_status_after_alloc(struct __sk_buff *ctx)
+{
+ struct bpf_ct_opts___local opts = {};
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+
+ ct = bpf_skb_ct_alloc(ctx, &tup, sizeof(tup.ipv4), &opts, sizeof(opts));
+ if (!ct)
+ return 0;
+ bpf_ct_change_status(ct, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
new file mode 100644
index 000000000000..c88ccc53529a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_decl_tag.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if __has_attribute(btf_decl_tag)
+#define __tag1 __attribute__((btf_decl_tag("tag1")))
+#define __tag2 __attribute__((btf_decl_tag("tag2")))
+volatile const bool skip_tests __tag1 __tag2 = false;
+#else
+#define __tag1
+#define __tag2
+volatile const bool skip_tests = true;
+#endif
+
+struct key_t {
+ int a;
+ int b __tag1 __tag2;
+ int c;
+} __tag1 __tag2;
+
+typedef struct {
+ int a;
+ int b;
+} value_t __tag1 __tag2;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 3);
+ __type(key, struct key_t);
+ __type(value, value_t);
+} hashmap1 SEC(".maps");
+
+
+static __noinline int foo(int x __tag1 __tag2) __tag1 __tag2
+{
+ struct key_t key;
+ value_t val = {};
+
+ key.a = key.b = key.c = x;
+ bpf_map_update_elem(&hashmap1, &key, &val, 0);
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(sub, int x)
+{
+ return foo(x);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_haskv.c b/tools/testing/selftests/bpf/progs/test_btf_haskv.c
deleted file mode 100644
index 31538c9ed193..000000000000
--- a/tools/testing/selftests/bpf/progs/test_btf_haskv.c
+++ /dev/null
@@ -1,50 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2018 Facebook */
-#include <linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
-
-int _version SEC("version") = 1;
-
-struct ipv_counts {
- unsigned int v4;
- unsigned int v6;
-};
-
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
-
-BPF_ANNOTATE_KV_PAIR(btf_map, int, struct ipv_counts);
-
-__attribute__((noinline))
-int test_long_fname_2(void)
-{
- struct ipv_counts *counts;
- int key = 0;
-
- counts = bpf_map_lookup_elem(&btf_map, &key);
- if (!counts)
- return 0;
-
- counts->v6++;
-
- return 0;
-}
-
-__attribute__((noinline))
-int test_long_fname_1(void)
-{
- return test_long_fname_2();
-}
-
-SEC("dummy_tracepoint")
-int _dummy_tracepoint(void *arg)
-{
- return test_long_fname_1();
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
index e5093796be97..c218cf8989a9 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -11,11 +11,18 @@ struct inner_map {
} inner_map1 SEC(".maps"),
inner_map2 SEC(".maps");
+struct inner_map_sz2 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} inner_map_sz2 SEC(".maps");
+
struct outer_arr {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 3);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
/* it's possible to use anonymous struct as inner map definition here */
__array(values, struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -34,10 +41,47 @@ struct outer_arr {
.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
};
+struct inner_map_sz3 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __type(value, int);
+} inner_map3 SEC(".maps"),
+ inner_map4 SEC(".maps");
+
+struct inner_map_sz4 {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 5);
+ __type(key, int);
+ __type(value, int);
+} inner_map5 SEC(".maps");
+
+struct outer_arr_dyn {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 3);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(map_flags, BPF_F_INNER_MAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} outer_arr_dyn SEC(".maps") = {
+ .values = {
+ [0] = (void *)&inner_map3,
+ [1] = (void *)&inner_map4,
+ [2] = (void *)&inner_map5,
+ },
+};
+
struct outer_hash {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(max_entries, 5);
- __uint(key_size, sizeof(int));
+ __type(key, int);
/* Here everything works flawlessly due to reuse of struct inner_map
* and compiler will complain at the attempt to use non-inner_map
* references below. This is great experience.
@@ -50,6 +94,30 @@ struct outer_hash {
},
};
+struct sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz1 SEC(".maps");
+
+struct sockarr_sz2 {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} sockarr_sz2 SEC(".maps");
+
+struct outer_sockarr_sz1 {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct sockarr_sz1);
+} outer_sockarr SEC(".maps") = {
+ .values = { (void *)&sockarr_sz1 },
+};
+
int input = 0;
SEC("raw_tp/sys_enter")
@@ -70,6 +138,12 @@ int handle__sys_enter(void *ctx)
val = input + 1;
bpf_map_update_elem(inner_map, &key, &val, 0);
+ inner_map = bpf_map_lookup_elem(&outer_arr_dyn, &key);
+ if (!inner_map)
+ return 1;
+ val = input + 2;
+ bpf_map_update_elem(inner_map, &key, &val, 0);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_newkv.c b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
index 6c5560162746..251854a041b5 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_newkv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_newkv.c
@@ -4,23 +4,11 @@
#include <bpf/bpf_helpers.h>
#include "bpf_legacy.h"
-int _version SEC("version") = 1;
-
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
-/* just to validate we can handle maps in multiple sections */
-struct bpf_map_def SEC("maps") btf_map_legacy = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(long long),
- .max_entries = 4,
-};
-
-BPF_ANNOTATE_KV_PAIR(btf_map_legacy, int, struct ipv_counts);
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 4);
@@ -40,11 +28,6 @@ int test_long_fname_2(void)
counts->v6++;
- /* just verify we can reference both maps */
- counts = bpf_map_lookup_elem(&btf_map_legacy, &key);
- if (!counts)
- return 0;
-
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_nokv.c b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
index 506da7fd2da2..1dabb88f8cb4 100644
--- a/tools/testing/selftests/bpf/progs/test_btf_nokv.c
+++ b/tools/testing/selftests/bpf/progs/test_btf_nokv.c
@@ -3,19 +3,17 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct ipv_counts {
unsigned int v4;
unsigned int v6;
};
-struct bpf_map_def SEC("maps") btf_map = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(int),
- .value_size = sizeof(struct ipv_counts),
- .max_entries = 4,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct ipv_counts));
+ __uint(max_entries, 4);
+} btf_map SEC(".maps");
__attribute__((noinline))
int test_long_fname_2(void)
diff --git a/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
new file mode 100644
index 000000000000..e2bea4da194b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_skc_cls_ingress.c
@@ -0,0 +1,174 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <string.h>
+#include <errno.h>
+#include <netinet/in.h>
+#include <linux/stddef.h>
+#include <linux/bpf.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
+
+struct sockaddr_in6 srv_sa6 = {};
+__u16 listen_tp_sport = 0;
+__u16 req_sk_sport = 0;
+__u32 recv_cookie = 0;
+__u32 gen_cookie = 0;
+__u32 linum = 0;
+
+#define LOG() ({ if (!linum) linum = __LINE__; })
+
+static void test_syncookie_helper(struct ipv6hdr *ip6h, struct tcphdr *th,
+ struct tcp_sock *tp,
+ struct __sk_buff *skb)
+{
+ if (th->syn) {
+ __s64 mss_cookie;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ if (th->doff * 4 != 40) {
+ LOG();
+ return;
+ }
+
+ if ((void *)th + 40 > data_end) {
+ LOG();
+ return;
+ }
+
+ mss_cookie = bpf_tcp_gen_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, 40);
+ if (mss_cookie < 0) {
+ if (mss_cookie != -ENOENT)
+ LOG();
+ } else {
+ gen_cookie = (__u32)mss_cookie;
+ }
+ } else if (gen_cookie) {
+ /* It was in cookie mode */
+ int ret = bpf_tcp_check_syncookie(tp, ip6h, sizeof(*ip6h),
+ th, sizeof(*th));
+
+ if (ret < 0) {
+ if (ret != -ENOENT)
+ LOG();
+ } else {
+ recv_cookie = bpf_ntohl(th->ack_seq) - 1;
+ }
+ }
+}
+
+static int handle_ip6_tcp(struct ipv6hdr *ip6h, struct __sk_buff *skb)
+{
+ struct bpf_sock_tuple *tuple;
+ struct bpf_sock *bpf_skc;
+ unsigned int tuple_len;
+ struct tcphdr *th;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ th = (struct tcphdr *)(ip6h + 1);
+ if (th + 1 > data_end)
+ return TC_ACT_OK;
+
+ /* Is it the testing traffic? */
+ if (th->dest != srv_sa6.sin6_port)
+ return TC_ACT_OK;
+
+ tuple_len = sizeof(tuple->ipv6);
+ tuple = (struct bpf_sock_tuple *)&ip6h->saddr;
+ if ((void *)tuple + tuple_len > data_end) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ bpf_skc = bpf_skc_lookup_tcp(skb, tuple, tuple_len,
+ BPF_F_CURRENT_NETNS, 0);
+ if (!bpf_skc) {
+ LOG();
+ return TC_ACT_OK;
+ }
+
+ if (bpf_skc->state == BPF_TCP_NEW_SYN_RECV) {
+ struct request_sock *req_sk;
+
+ req_sk = (struct request_sock *)bpf_skc_to_tcp_request_sock(bpf_skc);
+ if (!req_sk) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, req_sk, 0)) {
+ LOG();
+ goto release;
+ }
+
+ req_sk_sport = req_sk->__req_common.skc_num;
+
+ bpf_sk_release(req_sk);
+ return TC_ACT_OK;
+ } else if (bpf_skc->state == BPF_TCP_LISTEN) {
+ struct tcp_sock *tp;
+
+ tp = bpf_skc_to_tcp_sock(bpf_skc);
+ if (!tp) {
+ LOG();
+ goto release;
+ }
+
+ if (bpf_sk_assign(skb, tp, 0)) {
+ LOG();
+ goto release;
+ }
+
+ listen_tp_sport = tp->inet_conn.icsk_inet.sk.__sk_common.skc_num;
+
+ test_syncookie_helper(ip6h, th, tp, skb);
+ bpf_sk_release(tp);
+ return TC_ACT_OK;
+ }
+
+ if (bpf_sk_assign(skb, bpf_skc, 0))
+ LOG();
+
+release:
+ bpf_sk_release(bpf_skc);
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int cls_ingress(struct __sk_buff *skb)
+{
+ struct ipv6hdr *ip6h;
+ struct ethhdr *eth;
+ void *data_end;
+
+ data_end = (void *)(long)(skb->data_end);
+
+ eth = (struct ethhdr *)(long)(skb->data);
+ if (eth + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ ip6h = (struct ipv6hdr *)(eth + 1);
+ if (ip6h + 1 > data_end)
+ return TC_ACT_OK;
+
+ if (ip6h->nexthdr == IPPROTO_TCP)
+ return handle_ip6_tcp(ip6h, skb);
+
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
new file mode 100644
index 000000000000..44628865fe1d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cgroup1_hierarchy.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+__u32 target_ancestor_level;
+__u64 target_ancestor_cgid;
+int target_pid, target_hid;
+
+struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym;
+struct cgroup *bpf_cgroup_ancestor(struct cgroup *cgrp, int level) __ksym;
+void bpf_cgroup_release(struct cgroup *cgrp) __ksym;
+
+static int bpf_link_create_verify(int cmd)
+{
+ struct cgroup *cgrp, *ancestor;
+ struct task_struct *task;
+ int ret = 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ task = bpf_get_current_task_btf();
+
+ /* Then it can run in parallel with others */
+ if (task->pid != target_pid)
+ return 0;
+
+ cgrp = bpf_task_get_cgroup1(task, target_hid);
+ if (!cgrp)
+ return 0;
+
+ /* Refuse it if its cgid or its ancestor's cgid is the target cgid */
+ if (cgrp->kn->id == target_ancestor_cgid)
+ ret = -1;
+
+ ancestor = bpf_cgroup_ancestor(cgrp, target_ancestor_level);
+ if (!ancestor)
+ goto out;
+
+ if (ancestor->kn->id == target_ancestor_cgid)
+ ret = -1;
+ bpf_cgroup_release(ancestor);
+
+out:
+ bpf_cgroup_release(cgrp);
+ return ret;
+}
+
+SEC("lsm/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_s_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ return bpf_link_create_verify(cmd);
+}
+
+SEC("fentry")
+int BPF_PROG(fentry_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_cgroup_link.c b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
index 77e47b9e4446..4faba88e45a5 100644
--- a/tools/testing/selftests/bpf/progs/test_cgroup_link.c
+++ b/tools/testing/selftests/bpf/progs/test_cgroup_link.c
@@ -6,14 +6,14 @@
int calls = 0;
int alt_calls = 0;
-SEC("cgroup_skb/egress1")
+SEC("cgroup_skb/egress")
int egress(struct __sk_buff *skb)
{
__sync_fetch_and_add(&calls, 1);
return 1;
}
-SEC("cgroup_skb/egress2")
+SEC("cgroup_skb/egress")
int egress_alt(struct __sk_buff *skb)
{
__sync_fetch_and_add(&alt_calls, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_check_mtu.c b/tools/testing/selftests/bpf/progs/test_check_mtu.c
new file mode 100644
index 000000000000..2ec1de11a3ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_check_mtu.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Jesper Dangaard Brouer */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+
+#include <stddef.h>
+#include <stdint.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* Userspace will update with MTU it can see on device */
+volatile const int GLOBAL_USER_MTU;
+volatile const __u32 GLOBAL_USER_IFINDEX;
+
+/* BPF-prog will update these with MTU values it can see */
+__u32 global_bpf_mtu_xdp = 0;
+__u32 global_bpf_mtu_tc = 0;
+
+SEC("xdp")
+int xdp_use_helper_basic(struct xdp_md *ctx)
+{
+ __u32 mtu_len = 0;
+
+ if (bpf_check_mtu(ctx, 0, &mtu_len, 0, 0))
+ return XDP_ABORTED;
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_use_helper(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ __u32 mtu_len = 0;
+ __u32 ifindex = 0;
+ int delta = 0;
+
+ /* When ifindex is zero, save net_device lookup and use ctx netdev */
+ if (GLOBAL_USER_IFINDEX > 0)
+ ifindex = GLOBAL_USER_IFINDEX;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0)) {
+ /* mtu_len is also valid when check fail */
+ retval = XDP_ABORTED;
+ goto out;
+ }
+
+ if (mtu_len != GLOBAL_USER_MTU)
+ retval = XDP_DROP;
+
+out:
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_exceed_mtu(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+ int retval = XDP_ABORTED; /* Fail */
+ __u32 mtu_len = 0;
+ int delta;
+ int err;
+
+ /* Exceed MTU with 1 via delta adjust */
+ delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1;
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0);
+ if (err) {
+ retval = XDP_PASS; /* Success in exceeding MTU check */
+ if (err != BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = XDP_DROP;
+ }
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_minus_delta(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+ __u32 mtu_len = 0;
+ int delta;
+
+ /* Borderline test case: Minus delta exceeding packet length allowed */
+ delta = -((data_len - ETH_HLEN) + 1);
+
+ /* Minus length (adjusted via delta) still pass MTU check, other helpers
+ * are responsible for catching this, when doing actual size adjust
+ */
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0))
+ retval = XDP_ABORTED;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_input_len(struct xdp_md *ctx)
+{
+ int retval = XDP_PASS; /* Expected retval on successful test */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input len is L3, like MTU and iph->tot_len.
+ * Remember XDP data_len is L2.
+ */
+ __u32 mtu_len = data_len - ETH_HLEN;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = XDP_ABORTED;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("xdp")
+int xdp_input_len_exceed(struct xdp_md *ctx)
+{
+ int retval = XDP_ABORTED; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = XDP_PASS ; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_use_helper(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 mtu_len = 0;
+ int delta = 0;
+
+ if (bpf_check_mtu(ctx, 0, &mtu_len, delta, 0)) {
+ retval = BPF_DROP;
+ goto out;
+ }
+
+ if (mtu_len != GLOBAL_USER_MTU)
+ retval = BPF_REDIRECT;
+out:
+ global_bpf_mtu_tc = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_exceed_mtu(struct __sk_buff *ctx)
+{
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int retval = BPF_DROP; /* Fail */
+ __u32 skb_len = ctx->len;
+ __u32 mtu_len = 0;
+ int delta;
+ int err;
+
+ /* Exceed MTU with 1 via delta adjust */
+ delta = GLOBAL_USER_MTU - (skb_len - ETH_HLEN) + 1;
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0);
+ if (err) {
+ retval = BPF_OK; /* Success in exceeding MTU check */
+ if (err != BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_DROP;
+ }
+
+ global_bpf_mtu_tc = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_exceed_mtu_da(struct __sk_buff *ctx)
+{
+ /* SKB Direct-Access variant */
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 data_len = data_end - data;
+ int retval = BPF_DROP; /* Fail */
+ __u32 mtu_len = 0;
+ int delta;
+ int err;
+
+ /* Exceed MTU with 1 via delta adjust */
+ delta = GLOBAL_USER_MTU - (data_len - ETH_HLEN) + 1;
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0);
+ if (err) {
+ retval = BPF_OK; /* Success in exceeding MTU check */
+ if (err != BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_DROP;
+ }
+
+ global_bpf_mtu_tc = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_minus_delta(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ __u32 skb_len = ctx->len;
+ __u32 mtu_len = 0;
+ int delta;
+
+ /* Borderline test case: Minus delta exceeding packet length allowed */
+ delta = -((skb_len - ETH_HLEN) + 1);
+
+ /* Minus length (adjusted via delta) still pass MTU check, other helpers
+ * are responsible for catching this, when doing actual size adjust
+ */
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, delta, 0))
+ retval = BPF_DROP;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_input_len(struct __sk_buff *ctx)
+{
+ int retval = BPF_OK; /* Expected retval on successful test */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+ retval = BPF_DROP;
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
+
+SEC("tc")
+int tc_input_len_exceed(struct __sk_buff *ctx)
+{
+ int retval = BPF_DROP; /* Fail */
+ __u32 ifindex = GLOBAL_USER_IFINDEX;
+ int err;
+
+ /* API allow user give length to check as input via mtu_len param,
+ * resulting MTU value is still output in mtu_len param after call.
+ *
+ * Input length value is L3 size like MTU.
+ */
+ __u32 mtu_len = GLOBAL_USER_MTU;
+
+ mtu_len += 1; /* Exceed with 1 */
+
+ err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+ if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+ retval = BPF_OK; /* Success in exceeding MTU check */
+
+ global_bpf_mtu_xdp = mtu_len;
+ return retval;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index f0b72e86bee5..683c8aaa63da 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -20,8 +20,17 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "test_cls_redirect.h"
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
+#ifdef SUBPROGS
+#define INLINING __noinline
+#else
+#define INLINING __always_inline
+#endif
+
#define offsetofend(TYPE, MEMBER) \
(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
@@ -33,8 +42,8 @@ char _license[] SEC("license") = "Dual BSD/GPL";
/**
* Destination port and IP used for UDP encapsulation.
*/
-static volatile const __be16 ENCAPSULATION_PORT;
-static volatile const __be32 ENCAPSULATION_IP;
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
typedef struct {
uint64_t processed_packets_total;
@@ -64,6 +73,7 @@ typedef struct {
uint64_t errors_total_encap_adjust_failed;
uint64_t errors_total_encap_buffer_too_small;
uint64_t errors_total_redirect_loop;
+ uint64_t errors_total_encap_mtu_violate;
} metrics_t;
typedef enum {
@@ -125,7 +135,7 @@ typedef struct buf {
uint8_t *const tail;
} buf_t;
-static size_t buf_off(const buf_t *buf)
+static __always_inline size_t buf_off(const buf_t *buf)
{
/* Clang seems to optimize constructs like
* a - b + c
@@ -145,7 +155,7 @@ static size_t buf_off(const buf_t *buf)
return off;
}
-static bool buf_copy(buf_t *buf, void *dst, size_t len)
+static __always_inline bool buf_copy(buf_t *buf, void *dst, size_t len)
{
if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
return false;
@@ -155,7 +165,7 @@ static bool buf_copy(buf_t *buf, void *dst, size_t len)
return true;
}
-static bool buf_skip(buf_t *buf, const size_t len)
+static __always_inline bool buf_skip(buf_t *buf, const size_t len)
{
/* Check whether off + len is valid in the non-linear part. */
if (buf_off(buf) + len > buf->skb->len) {
@@ -173,7 +183,7 @@ static bool buf_skip(buf_t *buf, const size_t len)
* If scratch is not NULL, the function will attempt to load non-linear
* data via bpf_skb_load_bytes. On success, scratch is returned.
*/
-static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+static __always_inline void *buf_assign(buf_t *buf, const size_t len, void *scratch)
{
if (buf->head + len > buf->tail) {
if (scratch == NULL) {
@@ -188,7 +198,7 @@ static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
return ptr;
}
-static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+static INLINING bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
{
if (ipv4->ihl <= 5) {
return true;
@@ -197,13 +207,13 @@ static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
return buf_skip(buf, (ipv4->ihl - 5) * 4);
}
-static bool ipv4_is_fragment(const struct iphdr *ip)
+static INLINING bool ipv4_is_fragment(const struct iphdr *ip)
{
uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
}
-static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+static __always_inline struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
{
struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
if (ipv4 == NULL) {
@@ -222,7 +232,7 @@ static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
}
/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
-static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+static INLINING bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
{
if (!buf_copy(pkt, ports, sizeof(*ports))) {
return false;
@@ -237,7 +247,7 @@ static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
return true;
}
-static uint16_t pkt_checksum_fold(uint32_t csum)
+static INLINING uint16_t pkt_checksum_fold(uint32_t csum)
{
/* The highest reasonable value for an IPv4 header
* checksum requires two folds, so we just do that always.
@@ -247,7 +257,7 @@ static uint16_t pkt_checksum_fold(uint32_t csum)
return (uint16_t)~csum;
}
-static void pkt_ipv4_checksum(struct iphdr *iph)
+static INLINING void pkt_ipv4_checksum(struct iphdr *iph)
{
iph->check = 0;
@@ -260,7 +270,7 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
uint32_t acc = 0;
uint16_t *ipw = (uint16_t *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
acc += ipw[i];
}
@@ -268,10 +278,11 @@ static void pkt_ipv4_checksum(struct iphdr *iph)
iph->check = pkt_checksum_fold(acc);
}
-static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
- const struct ipv6hdr *ipv6,
- uint8_t *upper_proto,
- bool *is_fragment)
+static INLINING
+bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+ const struct ipv6hdr *ipv6,
+ uint8_t *upper_proto,
+ bool *is_fragment)
{
/* We understand five extension headers.
* https://tools.ietf.org/html/rfc8200#section-4.1 states that all
@@ -286,7 +297,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
};
*is_fragment = false;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 6; i++) {
switch (exthdr.next) {
case IPPROTO_FRAGMENT:
@@ -336,7 +347,7 @@ static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
* scratch is allocated on the stack. However, this usage should be safe since
* it's the callers stack after all.
*/
-static inline __attribute__((__always_inline__)) struct ipv6hdr *
+static __always_inline struct ipv6hdr *
pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
bool *is_fragment)
{
@@ -354,20 +365,20 @@ pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
/* Global metrics, per CPU
*/
-struct bpf_map_def metrics_map SEC("maps") = {
- .type = BPF_MAP_TYPE_PERCPU_ARRAY,
- .key_size = sizeof(unsigned int),
- .value_size = sizeof(metrics_t),
- .max_entries = 1,
-};
-
-static metrics_t *get_global_metrics(void)
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static INLINING metrics_t *get_global_metrics(void)
{
uint64_t key = 0;
return bpf_map_lookup_elem(&metrics_map, &key);
}
-static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+static INLINING ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
{
const int payload_off =
sizeof(*encap) +
@@ -388,8 +399,8 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
}
-static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
metrics->forwarded_packets_total_gre++;
@@ -400,6 +411,7 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
uint16_t proto = ETH_P_IP;
+ uint32_t mtu_len = 0;
/* Loop protection: the inner packet's TTL is decremented as a safeguard
* against any forwarding loop. As the only interesting field is the TTL
@@ -472,6 +484,11 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
}
}
+ if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
+ metrics->errors_total_encap_mtu_violate++;
+ return TC_ACT_SHOT;
+ }
+
if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
BPF_F_ADJ_ROOM_FIXED_GSO |
BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
@@ -509,8 +526,8 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
- struct in_addr *next_hop, metrics_t *metrics)
+static INLINING ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+ struct in_addr *next_hop, metrics_t *metrics)
{
/* swap L2 addresses */
/* This assumes that packets are received from a router.
@@ -546,7 +563,7 @@ static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
return bpf_redirect(skb->ifindex, 0);
}
-static ret_t skip_next_hops(buf_t *pkt, int n)
+static INLINING ret_t skip_next_hops(buf_t *pkt, int n)
{
switch (n) {
case 1:
@@ -566,8 +583,8 @@ static ret_t skip_next_hops(buf_t *pkt, int n)
* pkt is positioned just after the variable length GLB header
* iff the call is successful.
*/
-static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
- struct in_addr *next_hop)
+static INLINING ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+ struct in_addr *next_hop)
{
if (encap->unigue.next_hop > encap->unigue.hop_count) {
return TC_ACT_SHOT;
@@ -586,7 +603,7 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
return TC_ACT_SHOT;
}
- /* Skip the remainig next hops (may be zero). */
+ /* Skip the remaining next hops (may be zero). */
return skip_next_hops(pkt, encap->unigue.hop_count -
encap->unigue.next_hop - 1);
}
@@ -596,13 +613,13 @@ static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
*
* fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
*
- * clang will substitue a costant for sizeof, which allows the verifier
- * to track it's value. Based on this, it can figure out the constant
+ * clang will substitute a constant for sizeof, which allows the verifier
+ * to track its value. Based on this, it can figure out the constant
* return value, and calling code works while still being "generic" to
* IPv4 and IPv6.
*/
-static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
- uint64_t iphlen, uint16_t sport, uint16_t dport)
+static INLINING uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
{
switch (iphlen) {
case sizeof(struct iphdr): {
@@ -630,9 +647,9 @@ static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
}
}
-static verdict_t classify_tcp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- void *iph, struct tcphdr *tcp)
+static INLINING verdict_t classify_tcp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ void *iph, struct tcphdr *tcp)
{
struct bpf_sock *sk =
bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -663,8 +680,8 @@ static verdict_t classify_tcp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_udp(struct __sk_buff *skb,
- struct bpf_sock_tuple *tuple, uint64_t tuplen)
+static INLINING verdict_t classify_udp(struct __sk_buff *skb,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen)
{
struct bpf_sock *sk =
bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
@@ -681,9 +698,9 @@ static verdict_t classify_udp(struct __sk_buff *skb,
return UNKNOWN;
}
-static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
- struct bpf_sock_tuple *tuple, uint64_t tuplen,
- metrics_t *metrics)
+static INLINING verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+ struct bpf_sock_tuple *tuple, uint64_t tuplen,
+ metrics_t *metrics)
{
switch (proto) {
case IPPROTO_TCP:
@@ -698,7 +715,7 @@ static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
}
}
-static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
{
struct icmphdr icmp;
if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
@@ -745,7 +762,7 @@ static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
sizeof(tuple.ipv4), metrics);
}
-static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
{
struct icmp6hdr icmp6;
if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
@@ -797,8 +814,8 @@ static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
metrics);
}
-static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_tcp++;
@@ -819,8 +836,8 @@ static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
}
-static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
- metrics_t *metrics)
+static INLINING verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+ metrics_t *metrics)
{
metrics->l4_protocol_packets_total_udp++;
@@ -837,7 +854,7 @@ static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
return classify_udp(pkt->skb, &tuple, tuplen);
}
-static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv4++;
@@ -874,7 +891,7 @@ static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
}
}
-static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+static INLINING verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
{
metrics->l3_protocol_packets_total_ipv6++;
@@ -914,7 +931,7 @@ static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
}
}
-SEC("classifier/cls_redirect")
+SEC("tc")
int cls_redirect(struct __sk_buff *skb)
{
metrics_t *metrics = get_global_metrics();
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
index 76eab0aacba0..233b089d1fba 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.h
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -12,6 +12,15 @@
#include <linux/ipv6.h>
#include <linux/udp.h>
+/* offsetof() is used in static asserts, and the libbpf-redefined CO-RE
+ * friendly version breaks compilation for older clang versions <= 15
+ * when invoked in a static assert. Restore original here.
+ */
+#ifdef offsetof
+#undef offsetof
+#define offsetof(type, member) __builtin_offsetof(type, member)
+#endif
+
struct gre_base_hdr {
uint16_t flags;
uint16_t protocol;
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
new file mode 100644
index 000000000000..da54c09e9a15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_dynptr.c
@@ -0,0 +1,981 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+#include "bpf_kfuncs.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
+
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+volatile const __be16 ENCAPSULATION_PORT;
+volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+ uint64_t processed_packets_total;
+ uint64_t l3_protocol_packets_total_ipv4;
+ uint64_t l3_protocol_packets_total_ipv6;
+ uint64_t l4_protocol_packets_total_tcp;
+ uint64_t l4_protocol_packets_total_udp;
+ uint64_t accepted_packets_total_syn;
+ uint64_t accepted_packets_total_syn_cookies;
+ uint64_t accepted_packets_total_last_hop;
+ uint64_t accepted_packets_total_icmp_echo_request;
+ uint64_t accepted_packets_total_established;
+ uint64_t forwarded_packets_total_gue;
+ uint64_t forwarded_packets_total_gre;
+
+ uint64_t errors_total_unknown_l3_proto;
+ uint64_t errors_total_unknown_l4_proto;
+ uint64_t errors_total_malformed_ip;
+ uint64_t errors_total_fragmented_ip;
+ uint64_t errors_total_malformed_icmp;
+ uint64_t errors_total_unwanted_icmp;
+ uint64_t errors_total_malformed_icmp_pkt_too_big;
+ uint64_t errors_total_malformed_tcp;
+ uint64_t errors_total_malformed_udp;
+ uint64_t errors_total_icmp_echo_replies;
+ uint64_t errors_total_malformed_encapsulation;
+ uint64_t errors_total_encap_adjust_failed;
+ uint64_t errors_total_encap_buffer_too_small;
+ uint64_t errors_total_redirect_loop;
+ uint64_t errors_total_encap_mtu_violate;
+} metrics_t;
+
+typedef enum {
+ INVALID = 0,
+ UNKNOWN,
+ ECHO_REQUEST,
+ SYN,
+ SYN_COOKIE,
+ ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+ uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+ offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+ sizeof(flow_ports_t) !=
+ offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+ offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+ "flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+struct iphdr_info {
+ void *hdr;
+ __u64 len;
+};
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x) \
+ do { \
+ ret_t __ret = x; \
+ if (__ret != CONTINUE_PROCESSING) \
+ return __ret; \
+ } while (0)
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+ uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+ return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static int pkt_parse_ipv4(struct bpf_dynptr *dynptr, __u64 *offset, struct iphdr *iphdr)
+{
+ if (bpf_dynptr_read(iphdr, sizeof(*iphdr), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*iphdr);
+
+ if (iphdr->ihl < 5)
+ return -1;
+
+ /* skip ipv4 options */
+ *offset += (iphdr->ihl - 5) * 4;
+
+ return 0;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(struct bpf_dynptr *dynptr, __u64 *offset, flow_ports_t *ports)
+{
+ if (bpf_dynptr_read(ports, sizeof(*ports), dynptr, *offset, 0))
+ return false;
+
+ *offset += sizeof(*ports);
+
+ /* Ports in the L4 headers are reversed, since we are parsing an ICMP
+ * payload which is going towards the eyeball.
+ */
+ uint16_t dst = ports->src;
+ ports->src = ports->dst;
+ ports->dst = dst;
+ return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+ /* The highest reasonable value for an IPv4 header
+ * checksum requires two folds, so we just do that always.
+ */
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+ iph->check = 0;
+
+ /* An IP header without options is 20 bytes. Two of those
+ * are the checksum, which we always set to zero. Hence,
+ * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+ * which fits in 32 bit.
+ */
+ _Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+ uint32_t acc = 0;
+ uint16_t *ipw = (uint16_t *)iph;
+
+ for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++)
+ acc += ipw[i];
+
+ iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(struct bpf_dynptr *dynptr, __u64 *offset,
+ const struct ipv6hdr *ipv6, uint8_t *upper_proto,
+ bool *is_fragment)
+{
+ /* We understand five extension headers.
+ * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+ * headers should occur once, except Destination Options, which may
+ * occur twice. Hence we give up after 6 headers.
+ */
+ struct {
+ uint8_t next;
+ uint8_t len;
+ } exthdr = {
+ .next = ipv6->nexthdr,
+ };
+ *is_fragment = false;
+
+ for (int i = 0; i < 6; i++) {
+ switch (exthdr.next) {
+ case IPPROTO_FRAGMENT:
+ *is_fragment = true;
+ /* NB: We don't check that hdrlen == 0 as per spec. */
+ /* fallthrough; */
+
+ case IPPROTO_HOPOPTS:
+ case IPPROTO_ROUTING:
+ case IPPROTO_DSTOPTS:
+ case IPPROTO_MH:
+ if (bpf_dynptr_read(&exthdr, sizeof(exthdr), dynptr, *offset, 0))
+ return false;
+
+ /* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+ *offset += (exthdr.len + 1) * 8;
+
+ /* Decode next header */
+ break;
+
+ default:
+ /* The next header is not one of the known extension
+ * headers, treat it as the upper layer header.
+ *
+ * This handles IPPROTO_NONE.
+ *
+ * Encapsulating Security Payload (50) and Authentication
+ * Header (51) also end up here (and will trigger an
+ * unknown proto error later). They have a custom header
+ * format and seem too esoteric to care about.
+ */
+ *upper_proto = exthdr.next;
+ return true;
+ }
+ }
+
+ /* We never found an upper layer header. */
+ return false;
+}
+
+static int pkt_parse_ipv6(struct bpf_dynptr *dynptr, __u64 *offset, struct ipv6hdr *ipv6,
+ uint8_t *proto, bool *is_fragment)
+{
+ if (bpf_dynptr_read(ipv6, sizeof(*ipv6), dynptr, *offset, 0))
+ return -1;
+
+ *offset += sizeof(*ipv6);
+
+ if (!pkt_skip_ipv6_extension_headers(dynptr, offset, ipv6, proto, is_fragment))
+ return -1;
+
+ return 0;
+}
+
+/* Global metrics, per CPU
+ */
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, unsigned int);
+ __type(value, metrics_t);
+} metrics_map SEC(".maps");
+
+static metrics_t *get_global_metrics(void)
+{
+ uint64_t key = 0;
+ return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+ /* Changing the ethertype if the encapsulated packet is ipv6 */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6)
+ encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+
+ if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
+ return TC_ACT_SHOT;
+
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ const int payload_off =
+ sizeof(*encap) +
+ sizeof(struct in_addr) * encap->unigue.hop_count;
+ int32_t encap_overhead =
+ payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+ int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+ __u8 encap_buffer[sizeof(encap_gre_t)] = {};
+ uint16_t proto = ETH_P_IP;
+ uint32_t mtu_len = 0;
+ encap_gre_t *encap_gre;
+
+ metrics->forwarded_packets_total_gre++;
+
+ /* Loop protection: the inner packet's TTL is decremented as a safeguard
+ * against any forwarding loop. As the only interesting field is the TTL
+ * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+ * as they handle the split packets if needed (no need for the data to be
+ * in the linear section).
+ */
+ if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+ proto = ETH_P_IPV6;
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+ &ttl, 1, 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ } else {
+ uint8_t ttl;
+ int rc;
+
+ rc = bpf_skb_load_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+ 1);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (ttl == 0) {
+ metrics->errors_total_redirect_loop++;
+ return TC_ACT_SHOT;
+ }
+
+ /* IPv4 also has a checksum to patch. While the TTL is only one byte,
+ * this function only works for 2 and 4 bytes arguments (the result is
+ * the same).
+ */
+ rc = bpf_l3_csum_replace(
+ skb, payload_off + offsetof(struct iphdr, check), ttl,
+ ttl - 1, 2);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ ttl--;
+ rc = bpf_skb_store_bytes(
+ skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+ 0);
+ if (rc != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+ }
+
+ if (bpf_check_mtu(skb, skb->ifindex, &mtu_len, delta, 0)) {
+ metrics->errors_total_encap_mtu_violate++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+ BPF_F_ADJ_ROOM_FIXED_GSO |
+ BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+ bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
+ metrics->errors_total_encap_adjust_failed++;
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre = bpf_dynptr_slice_rdwr(dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap_gre) {
+ metrics->errors_total_encap_buffer_too_small++;
+ return TC_ACT_SHOT;
+ }
+
+ encap_gre->ip.protocol = IPPROTO_GRE;
+ encap_gre->ip.daddr = next_hop->s_addr;
+ encap_gre->ip.saddr = ENCAPSULATION_IP;
+ encap_gre->ip.tot_len =
+ bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+ encap_gre->gre.flags = 0;
+ encap_gre->gre.protocol = bpf_htons(proto);
+ pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+ if (encap_gre == encap_buffer)
+ bpf_dynptr_write(dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ encap_headers_t *encap, struct in_addr *next_hop,
+ metrics_t *metrics)
+{
+ /* swap L2 addresses */
+ /* This assumes that packets are received from a router.
+ * So just swapping the MAC addresses here will make the packet go back to
+ * the router, which will send it to the appropriate machine.
+ */
+ unsigned char temp[ETH_ALEN];
+ memcpy(temp, encap->eth.h_dest, sizeof(temp));
+ memcpy(encap->eth.h_dest, encap->eth.h_source,
+ sizeof(encap->eth.h_dest));
+ memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+ encap->unigue.last_hop_gre) {
+ return forward_with_gre(skb, dynptr, encap, next_hop, metrics);
+ }
+
+ metrics->forwarded_packets_total_gue++;
+ uint32_t old_saddr = encap->ip.saddr;
+ encap->ip.saddr = encap->ip.daddr;
+ encap->ip.daddr = next_hop->s_addr;
+ if (encap->unigue.next_hop < encap->unigue.hop_count) {
+ encap->unigue.next_hop++;
+ }
+
+ /* Remove ip->saddr, add next_hop->s_addr */
+ const uint64_t off = offsetof(typeof(*encap), ip.check);
+ int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+ if (ret < 0) {
+ return TC_ACT_SHOT;
+ }
+
+ return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(__u64 *offset, int n)
+{
+ switch (n) {
+ case 1:
+ *offset += sizeof(struct in_addr);
+ case 0:
+ return CONTINUE_PROCESSING;
+
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(struct bpf_dynptr *dynptr, __u64 *offset, encap_headers_t *encap,
+ struct in_addr *next_hop)
+{
+ if (encap->unigue.next_hop > encap->unigue.hop_count)
+ return TC_ACT_SHOT;
+
+ /* Skip "used" next hops. */
+ MAYBE_RETURN(skip_next_hops(offset, encap->unigue.next_hop));
+
+ if (encap->unigue.next_hop == encap->unigue.hop_count) {
+ /* No more next hops, we are at the end of the GLB header. */
+ next_hop->s_addr = 0;
+ return CONTINUE_PROCESSING;
+ }
+
+ if (bpf_dynptr_read(next_hop, sizeof(*next_hop), dynptr, *offset, 0))
+ return TC_ACT_SHOT;
+
+ *offset += sizeof(*next_hop);
+
+ /* Skip the remainig next hops (may be zero). */
+ return skip_next_hops(offset, encap->unigue.hop_count - encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ * fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+ uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+ switch (iphlen) {
+ case sizeof(struct iphdr): {
+ struct iphdr *ipv4 = (struct iphdr *)iph;
+ tuple->ipv4.daddr = ipv4->daddr;
+ tuple->ipv4.saddr = ipv4->saddr;
+ tuple->ipv4.sport = sport;
+ tuple->ipv4.dport = dport;
+ return sizeof(tuple->ipv4);
+ }
+
+ case sizeof(struct ipv6hdr): {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+ memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+ sizeof(tuple->ipv6.daddr));
+ memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+ sizeof(tuple->ipv6.saddr));
+ tuple->ipv6.sport = sport;
+ tuple->ipv6.dport = dport;
+ return sizeof(tuple->ipv6);
+ }
+
+ default:
+ return 0;
+ }
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, void *iph, struct tcphdr *tcp)
+{
+ struct bpf_sock *sk =
+ bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state != BPF_TCP_LISTEN) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ if (iph != NULL && tcp != NULL) {
+ /* Kludge: we've run out of arguments, but need the length of the ip header. */
+ uint64_t iphlen = sizeof(struct iphdr);
+
+ if (tuplen == sizeof(tuple->ipv6))
+ iphlen = sizeof(struct ipv6hdr);
+
+ if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+ sizeof(*tcp)) == 0) {
+ bpf_sk_release(sk);
+ return SYN_COOKIE;
+ }
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+ struct bpf_sock *sk =
+ bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+
+ if (sk == NULL)
+ return UNKNOWN;
+
+ if (sk->state == BPF_TCP_ESTABLISHED) {
+ bpf_sk_release(sk);
+ return ESTABLISHED;
+ }
+
+ bpf_sk_release(sk);
+ return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto, struct bpf_sock_tuple *tuple,
+ uint64_t tuplen, metrics_t *metrics)
+{
+ switch (proto) {
+ case IPPROTO_TCP:
+ return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+ case IPPROTO_UDP:
+ return classify_udp(skb, tuple, tuplen);
+
+ default:
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_icmpv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr, __u64 *offset,
+ metrics_t *metrics)
+{
+ struct icmphdr icmp;
+ struct iphdr ipv4;
+
+ if (bpf_dynptr_read(&icmp, sizeof(icmp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(icmp);
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp.type == ICMP_ECHOREPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp.type == ICMP_ECHO)
+ return ECHO_REQUEST;
+
+ if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ /* The source address in the outer IP header is from the entity that
+ * originated the ICMP message. Use the original IP header to restore
+ * the correct flow tuple.
+ */
+ struct bpf_sock_tuple tuple;
+ tuple.ipv4.saddr = ipv4.daddr;
+ tuple.ipv4.daddr = ipv4.saddr;
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv4.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, ipv4.protocol, &tuple,
+ sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct ipv6hdr ipv6;
+ struct icmp6hdr icmp6;
+ bool is_fragment;
+ uint8_t l4_proto;
+
+ if (bpf_dynptr_read(&icmp6, sizeof(icmp6), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_icmp++;
+ return INVALID;
+ }
+
+ /* We should never receive encapsulated echo replies. */
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+ metrics->errors_total_icmp_echo_replies++;
+ return INVALID;
+ }
+
+ if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+ return ECHO_REQUEST;
+ }
+
+ if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+ metrics->errors_total_unwanted_icmp++;
+ return INVALID;
+ }
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ /* Swap source and dest addresses. */
+ memcpy(&tuple.ipv6.saddr, &ipv6.daddr, sizeof(tuple.ipv6.saddr));
+ memcpy(&tuple.ipv6.daddr, &ipv6.saddr, sizeof(tuple.ipv6.daddr));
+
+ if (!pkt_parse_icmp_l4_ports(dynptr, offset, (flow_ports_t *)&tuple.ipv6.sport)) {
+ metrics->errors_total_malformed_icmp_pkt_too_big++;
+ return INVALID;
+ }
+
+ return classify_icmp(skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+ metrics);
+}
+
+static verdict_t process_tcp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct tcphdr tcp;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_tcp++;
+
+ if (bpf_dynptr_read(&tcp, sizeof(tcp), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_tcp++;
+ return INVALID;
+ }
+
+ *offset += sizeof(tcp);
+
+ if (tcp.syn)
+ return SYN;
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, tcp.source, tcp.dest);
+ return classify_tcp(skb, &tuple, tuplen, info->hdr, &tcp);
+}
+
+static verdict_t process_udp(struct bpf_dynptr *dynptr, __u64 *offset, struct __sk_buff *skb,
+ struct iphdr_info *info, metrics_t *metrics)
+{
+ struct bpf_sock_tuple tuple;
+ struct udphdr udph;
+ uint64_t tuplen;
+
+ metrics->l4_protocol_packets_total_udp++;
+
+ if (bpf_dynptr_read(&udph, sizeof(udph), dynptr, *offset, 0)) {
+ metrics->errors_total_malformed_udp++;
+ return INVALID;
+ }
+ *offset += sizeof(udph);
+
+ tuplen = fill_tuple(&tuple, info->hdr, info->len, udph.source, udph.dest);
+ return classify_udp(skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct iphdr ipv4;
+ struct iphdr_info info = {
+ .hdr = &ipv4,
+ .len = sizeof(ipv4),
+ };
+
+ metrics->l3_protocol_packets_total_ipv4++;
+
+ if (pkt_parse_ipv4(dynptr, offset, &ipv4)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4.version != 4) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv4_is_fragment(&ipv4)) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (ipv4.protocol) {
+ case IPPROTO_ICMP:
+ return process_icmpv4(skb, dynptr, offset, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+static verdict_t process_ipv6(struct __sk_buff *skb, struct bpf_dynptr *dynptr,
+ __u64 *offset, metrics_t *metrics)
+{
+ struct ipv6hdr ipv6;
+ struct iphdr_info info = {
+ .hdr = &ipv6,
+ .len = sizeof(ipv6),
+ };
+ uint8_t l4_proto;
+ bool is_fragment;
+
+ metrics->l3_protocol_packets_total_ipv6++;
+
+ if (pkt_parse_ipv6(dynptr, offset, &ipv6, &l4_proto, &is_fragment)) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (ipv6.version != 6) {
+ metrics->errors_total_malformed_ip++;
+ return INVALID;
+ }
+
+ if (is_fragment) {
+ metrics->errors_total_fragmented_ip++;
+ return INVALID;
+ }
+
+ switch (l4_proto) {
+ case IPPROTO_ICMPV6:
+ return process_icmpv6(dynptr, offset, skb, metrics);
+
+ case IPPROTO_TCP:
+ return process_tcp(dynptr, offset, skb, &info, metrics);
+
+ case IPPROTO_UDP:
+ return process_udp(dynptr, offset, skb, &info, metrics);
+
+ default:
+ metrics->errors_total_unknown_l4_proto++;
+ return INVALID;
+ }
+}
+
+SEC("tc")
+int cls_redirect(struct __sk_buff *skb)
+{
+ __u8 encap_buffer[sizeof(encap_headers_t)] = {};
+ struct bpf_dynptr dynptr;
+ struct in_addr next_hop;
+ /* Tracks offset of the dynptr. This will be unnecessary once
+ * bpf_dynptr_advance() is available.
+ */
+ __u64 off = 0;
+ ret_t ret;
+
+ bpf_dynptr_from_skb(skb, 0, &dynptr);
+
+ metrics_t *metrics = get_global_metrics();
+ if (metrics == NULL)
+ return TC_ACT_SHOT;
+
+ metrics->processed_packets_total++;
+
+ /* Pass bogus packets as long as we're not sure they're
+ * destined for us.
+ */
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ encap_headers_t *encap;
+
+ /* Make sure that all encapsulation headers are available in
+ * the linear portion of the skb. This makes it easy to manipulate them.
+ */
+ if (bpf_skb_pull_data(skb, sizeof(*encap)))
+ return TC_ACT_OK;
+
+ encap = bpf_dynptr_slice_rdwr(&dynptr, 0, encap_buffer, sizeof(encap_buffer));
+ if (!encap)
+ return TC_ACT_OK;
+
+ off += sizeof(*encap);
+
+ if (encap->ip.ihl != 5)
+ /* We never have any options. */
+ return TC_ACT_OK;
+
+ if (encap->ip.daddr != ENCAPSULATION_IP ||
+ encap->ip.protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ /* TODO Check UDP length? */
+ if (encap->udp.dest != ENCAPSULATION_PORT)
+ return TC_ACT_OK;
+
+ /* We now know that the packet is destined to us, we can
+ * drop bogus ones.
+ */
+ if (ipv4_is_fragment((void *)&encap->ip)) {
+ metrics->errors_total_fragmented_ip++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.variant != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.control != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.flags != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->gue.hlen !=
+ sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.version != 0) {
+ metrics->errors_total_malformed_encapsulation++;
+ return TC_ACT_SHOT;
+ }
+
+ if (encap->unigue.reserved != 0)
+ return TC_ACT_SHOT;
+
+ MAYBE_RETURN(get_next_hop(&dynptr, &off, encap, &next_hop));
+
+ if (next_hop.s_addr == 0) {
+ metrics->accepted_packets_total_last_hop++;
+ return accept_locally(skb, encap);
+ }
+
+ verdict_t verdict;
+ switch (encap->gue.proto_ctype) {
+ case IPPROTO_IPIP:
+ verdict = process_ipv4(skb, &dynptr, &off, metrics);
+ break;
+
+ case IPPROTO_IPV6:
+ verdict = process_ipv6(skb, &dynptr, &off, metrics);
+ break;
+
+ default:
+ metrics->errors_total_unknown_l3_proto++;
+ return TC_ACT_SHOT;
+ }
+
+ switch (verdict) {
+ case INVALID:
+ /* metrics have already been bumped */
+ return TC_ACT_SHOT;
+
+ case UNKNOWN:
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop, metrics);
+
+ case ECHO_REQUEST:
+ metrics->accepted_packets_total_icmp_echo_request++;
+ break;
+
+ case SYN:
+ if (encap->unigue.forward_syn) {
+ return forward_to_next_hop(skb, &dynptr, encap, &next_hop,
+ metrics);
+ }
+
+ metrics->accepted_packets_total_syn++;
+ break;
+
+ case SYN_COOKIE:
+ metrics->accepted_packets_total_syn_cookies++;
+ break;
+
+ case ESTABLISHED:
+ metrics->accepted_packets_total_established++;
+ break;
+ }
+
+ ret = accept_locally(skb, encap);
+
+ if (encap == encap_buffer)
+ bpf_dynptr_write(&dynptr, 0, encap_buffer, sizeof(encap_buffer), 0);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
new file mode 100644
index 000000000000..eed26b70e3a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect_subprogs.c
@@ -0,0 +1,2 @@
+#define SUBPROGS
+#include "test_cls_redirect.c"
diff --git a/tools/testing/selftests/bpf/progs/test_core_autosize.c b/tools/testing/selftests/bpf/progs/test_core_autosize.c
new file mode 100644
index 000000000000..9a7829c5e4a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_autosize.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* fields of exactly the same size */
+struct test_struct___samesize {
+ void *ptr;
+ unsigned long long val1;
+ unsigned int val2;
+ unsigned short val3;
+ unsigned char val4;
+} __attribute((preserve_access_index));
+
+/* unsigned fields that have to be downsized by libbpf */
+struct test_struct___downsize {
+ void *ptr;
+ unsigned long val1;
+ unsigned long val2;
+ unsigned long val3;
+ unsigned long val4;
+ /* total sz: 40 */
+} __attribute__((preserve_access_index));
+
+/* fields with signed integers of wrong size, should be rejected */
+struct test_struct___signed {
+ void *ptr;
+ long val1;
+ long val2;
+ long val3;
+ long val4;
+} __attribute((preserve_access_index));
+
+/* real layout and sizes according to test's (32-bit) BTF */
+struct test_struct___real {
+ unsigned int ptr; /* can't use `void *`, it is always 8 byte in BPF target */
+ unsigned int val2;
+ unsigned long long val1;
+ unsigned short val3;
+ unsigned char val4;
+ unsigned char _pad;
+ /* total sz: 20 */
+};
+
+struct test_struct___real input = {
+ .ptr = 0x01020304,
+ .val1 = 0x1020304050607080,
+ .val2 = 0x0a0b0c0d,
+ .val3 = 0xfeed,
+ .val4 = 0xb9,
+ ._pad = 0xff, /* make sure no accidental zeros are present */
+};
+
+unsigned long long ptr_samesized = 0;
+unsigned long long val1_samesized = 0;
+unsigned long long val2_samesized = 0;
+unsigned long long val3_samesized = 0;
+unsigned long long val4_samesized = 0;
+struct test_struct___real output_samesized = {};
+
+unsigned long long ptr_downsized = 0;
+unsigned long long val1_downsized = 0;
+unsigned long long val2_downsized = 0;
+unsigned long long val3_downsized = 0;
+unsigned long long val4_downsized = 0;
+struct test_struct___real output_downsized = {};
+
+unsigned long long ptr_probed = 0;
+unsigned long long val1_probed = 0;
+unsigned long long val2_probed = 0;
+unsigned long long val3_probed = 0;
+unsigned long long val4_probed = 0;
+
+unsigned long long ptr_signed = 0;
+unsigned long long val1_signed = 0;
+unsigned long long val2_signed = 0;
+unsigned long long val3_signed = 0;
+unsigned long long val4_signed = 0;
+struct test_struct___real output_signed = {};
+
+SEC("raw_tp/sys_exit")
+int handle_samesize(void *ctx)
+{
+ struct test_struct___samesize *in = (void *)&input;
+ struct test_struct___samesize *out = (void *)&output_samesized;
+
+ ptr_samesized = (unsigned long long)in->ptr;
+ val1_samesized = in->val1;
+ val2_samesized = in->val2;
+ val3_samesized = in->val3;
+ val4_samesized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handle_downsize(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ struct test_struct___downsize *out = (void *)&output_downsized;
+
+ ptr_downsized = (unsigned long long)in->ptr;
+ val1_downsized = in->val1;
+ val2_downsized = in->val2;
+ val3_downsized = in->val3;
+ val4_downsized = in->val4;
+
+ out->ptr = in->ptr;
+ out->val1 = in->val1;
+ out->val2 = in->val2;
+ out->val3 = in->val3;
+ out->val4 = in->val4;
+
+ return 0;
+}
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define bpf_core_read_int bpf_core_read
+#else
+#define bpf_core_read_int(dst, sz, src) ({ \
+ /* Prevent "subtraction from stack pointer prohibited" */ \
+ volatile long __off = sizeof(*dst) - (sz); \
+ bpf_core_read((char *)(dst) + __off, sz, src); \
+})
+#endif
+
+SEC("raw_tp/sys_enter")
+int handle_probed(void *ctx)
+{
+ struct test_struct___downsize *in = (void *)&input;
+ __u64 tmp;
+
+ tmp = 0;
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->ptr), &in->ptr);
+ ptr_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val1), &in->val1);
+ val1_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val2), &in->val2);
+ val2_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val3), &in->val3);
+ val3_probed = tmp;
+
+ tmp = 0;
+ bpf_core_read_int(&tmp, bpf_core_field_size(in->val4), &in->val4);
+ val4_probed = tmp;
+
+ return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handle_signed(void *ctx)
+{
+ struct test_struct___signed *in = (void *)&input;
+ struct test_struct___signed *out = (void *)&output_signed;
+
+ val2_signed = in->val2;
+ val3_signed = in->val3;
+ val4_signed = in->val4;
+
+ out->val2= in->val2;
+ out->val3= in->val3;
+ out->val4= in->val4;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c
index 3ac3603ad53d..a3c7c1042f35 100644
--- a/tools/testing/selftests/bpf/progs/test_core_extern.c
+++ b/tools/testing/selftests/bpf/progs/test_core_extern.c
@@ -11,6 +11,7 @@
static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999;
extern int LINUX_KERNEL_VERSION __kconfig;
+extern int LINUX_UNKNOWN_VIRTUAL_EXTERN __kconfig __weak;
extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */
extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak;
extern bool CONFIG_BOOL __kconfig __weak;
@@ -22,6 +23,7 @@ extern const char CONFIG_STR[8] __kconfig __weak;
extern uint64_t CONFIG_MISSING __kconfig __weak;
uint64_t kern_ver = -1;
+uint64_t unkn_virt_val = -1;
uint64_t bpf_syscall = -1;
uint64_t tristate_val = -1;
uint64_t bool_val = -1;
@@ -38,6 +40,7 @@ int handle_sys_enter(struct pt_regs *ctx)
int i;
kern_ver = LINUX_KERNEL_VERSION;
+ unkn_virt_val = LINUX_UNKNOWN_VIRTUAL_EXTERN;
bpf_syscall = CONFIG_BPF_SYSCALL;
tristate_val = CONFIG_TRISTATE;
bool_val = CONFIG_BOOL;
diff --git a/tools/testing/selftests/bpf/progs/test_core_read_macros.c b/tools/testing/selftests/bpf/progs/test_core_read_macros.c
new file mode 100644
index 000000000000..fd54caa17319
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_read_macros.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* shuffled layout for relocatable (CO-RE) reads */
+struct callback_head___shuffled {
+ void (*func)(struct callback_head___shuffled *head);
+ struct callback_head___shuffled *next;
+};
+
+struct callback_head k_probe_in = {};
+struct callback_head___shuffled k_core_in = {};
+
+struct callback_head *u_probe_in = 0;
+struct callback_head___shuffled *u_core_in = 0;
+
+long k_probe_out = 0;
+long u_probe_out = 0;
+
+long k_core_out = 0;
+long u_core_out = 0;
+
+int my_pid = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int handler(void *ctx)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (my_pid != pid)
+ return 0;
+
+ /* next pointers for kernel address space have to be initialized from
+ * BPF side, user-space mmaped addresses are stil user-space addresses
+ */
+ k_probe_in.next = &k_probe_in;
+ __builtin_preserve_access_index(({k_core_in.next = &k_core_in;}));
+
+ k_probe_out = (long)BPF_PROBE_READ(&k_probe_in, next, next, func);
+ k_core_out = (long)BPF_CORE_READ(&k_core_in, next, next, func);
+ u_probe_out = (long)BPF_PROBE_READ_USER(u_probe_in, next, next, func);
+ u_core_out = (long)BPF_CORE_READ_USER(u_core_in, next, next, func);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
index ab1e647aeb31..b86fdda2a6ea 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_bitfields_probed.c
@@ -42,7 +42,6 @@ int test_core_bitfields(void *ctx)
{
struct core_reloc_bitfields *in = (void *)&data.in;
struct core_reloc_bitfields_output *out = (void *)&data.out;
- uint64_t res;
out->ub1 = BPF_CORE_READ_BITFIELD_PROBED(in, ub1);
out->ub2 = BPF_CORE_READ_BITFIELD_PROBED(in, ub2);
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
new file mode 100644
index 000000000000..63147fbfae6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enum64val.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_unsigned_enum64 {
+ UNSIGNED_ENUM64_VAL1 = 0x1ffffffffULL,
+ UNSIGNED_ENUM64_VAL2 = 0x2ffffffffULL,
+ UNSIGNED_ENUM64_VAL3 = 0x3ffffffffULL,
+};
+
+enum named_signed_enum64 {
+ SIGNED_ENUM64_VAL1 = 0x1ffffffffLL,
+ SIGNED_ENUM64_VAL2 = -2,
+ SIGNED_ENUM64_VAL3 = 0x3ffffffffLL,
+};
+
+struct core_reloc_enum64val_output {
+ bool unsigned_val1_exists;
+ bool unsigned_val2_exists;
+ bool unsigned_val3_exists;
+ bool signed_val1_exists;
+ bool signed_val2_exists;
+ bool signed_val3_exists;
+
+ long unsigned_val1;
+ long unsigned_val2;
+ long signed_val1;
+ long signed_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enum64val(void *ctx)
+{
+#if __clang_major__ >= 15
+ struct core_reloc_enum64val_output *out = (void *)&data.out;
+ enum named_unsigned_enum64 named_unsigned = 0;
+ enum named_signed_enum64 named_signed = 0;
+
+ out->unsigned_val1_exists = bpf_core_enum_value_exists(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL2);
+ out->unsigned_val3_exists = bpf_core_enum_value_exists(enum named_unsigned_enum64, UNSIGNED_ENUM64_VAL3);
+ out->signed_val1_exists = bpf_core_enum_value_exists(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL2);
+ out->signed_val3_exists = bpf_core_enum_value_exists(enum named_signed_enum64, SIGNED_ENUM64_VAL3);
+
+ out->unsigned_val1 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL1);
+ out->unsigned_val2 = bpf_core_enum_value(named_unsigned, UNSIGNED_ENUM64_VAL2);
+ out->signed_val1 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL1);
+ out->signed_val2 = bpf_core_enum_value(named_signed, SIGNED_ENUM64_VAL2);
+ /* NAMED_ENUM64_VAL3 value is optional */
+
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
new file mode 100644
index 000000000000..e7ef3dada2bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_enumval.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+enum named_enum {
+ NAMED_ENUM_VAL1 = 1,
+ NAMED_ENUM_VAL2 = 2,
+ NAMED_ENUM_VAL3 = 3,
+};
+
+typedef enum {
+ ANON_ENUM_VAL1 = 0x10,
+ ANON_ENUM_VAL2 = 0x20,
+ ANON_ENUM_VAL3 = 0x30,
+} anon_enum;
+
+struct core_reloc_enumval_output {
+ bool named_val1_exists;
+ bool named_val2_exists;
+ bool named_val3_exists;
+ bool anon_val1_exists;
+ bool anon_val2_exists;
+ bool anon_val3_exists;
+
+ int named_val1;
+ int named_val2;
+ int anon_val1;
+ int anon_val2;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_enumval(void *ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_enumval_output *out = (void *)&data.out;
+ enum named_enum named = 0;
+ anon_enum anon = 0;
+
+ out->named_val1_exists = bpf_core_enum_value_exists(named, NAMED_ENUM_VAL1);
+ out->named_val2_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL2);
+ out->named_val3_exists = bpf_core_enum_value_exists(enum named_enum, NAMED_ENUM_VAL3);
+
+ out->anon_val1_exists = bpf_core_enum_value_exists(anon, ANON_ENUM_VAL1);
+ out->anon_val2_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL2);
+ out->anon_val3_exists = bpf_core_enum_value_exists(anon_enum, ANON_ENUM_VAL3);
+
+ out->named_val1 = bpf_core_enum_value(named, NAMED_ENUM_VAL1);
+ out->named_val2 = bpf_core_enum_value(named, NAMED_ENUM_VAL2);
+ /* NAMED_ENUM_VAL3 value is optional */
+
+ out->anon_val1 = bpf_core_enum_value(anon, ANON_ENUM_VAL1);
+ out->anon_val2 = bpf_core_enum_value(anon, ANON_ENUM_VAL2);
+ /* ANON_ENUM_VAL3 value is optional */
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
index 7e45e2bdf6cd..5b8a75097ea3 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_existence.c
@@ -45,35 +45,34 @@ int test_core_existence(void *ctx)
struct core_reloc_existence_output *out = (void *)&data.out;
out->a_exists = bpf_core_field_exists(in->a);
- if (bpf_core_field_exists(in->a))
+ if (bpf_core_field_exists(struct core_reloc_existence, a))
out->a_value = BPF_CORE_READ(in, a);
else
out->a_value = 0xff000001u;
out->b_exists = bpf_core_field_exists(in->b);
- if (bpf_core_field_exists(in->b))
+ if (bpf_core_field_exists(struct core_reloc_existence, b))
out->b_value = BPF_CORE_READ(in, b);
else
out->b_value = 0xff000002u;
out->c_exists = bpf_core_field_exists(in->c);
- if (bpf_core_field_exists(in->c))
+ if (bpf_core_field_exists(struct core_reloc_existence, c))
out->c_value = BPF_CORE_READ(in, c);
else
out->c_value = 0xff000003u;
out->arr_exists = bpf_core_field_exists(in->arr);
- if (bpf_core_field_exists(in->arr))
+ if (bpf_core_field_exists(struct core_reloc_existence, arr))
out->arr_value = BPF_CORE_READ(in, arr[0]);
else
out->arr_value = 0xff000004u;
out->s_exists = bpf_core_field_exists(in->s);
- if (bpf_core_field_exists(in->s))
+ if (bpf_core_field_exists(struct core_reloc_existence, s))
out->s_value = BPF_CORE_READ(in, s.x);
else
out->s_value = 0xff000005u;
return 0;
}
-
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
index aba928fd60d3..ee4a601dcb06 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_kernel.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <stdint.h>
+#include <stdbool.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_core_read.h>
@@ -11,6 +12,7 @@ char _license[] SEC("license") = "GPL";
struct {
char in[256];
char out[256];
+ bool skip;
uint64_t my_pid_tgid;
} data = {};
@@ -19,6 +21,7 @@ struct core_reloc_kernel_output {
/* we have test_progs[-flavor], so cut flavor part */
char comm[sizeof("test_progs")];
int comm_len;
+ bool local_task_struct_matches;
};
struct task_struct {
@@ -28,15 +31,29 @@ struct task_struct {
struct task_struct *group_leader;
};
+struct mm_struct___wrong {
+ int abc_whatever_should_not_exist;
+};
+
+struct task_struct___local {
+ int pid;
+ struct mm_struct___wrong *mm;
+};
+
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
SEC("raw_tracepoint/sys_enter")
int test_core_kernel(void *ctx)
{
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
struct task_struct *task = (void *)bpf_get_current_task();
struct core_reloc_kernel_output *out = (void *)&data.out;
uint64_t pid_tgid = bpf_get_current_pid_tgid();
- uint32_t real_tgid = (uint32_t)pid_tgid;
+ int32_t real_tgid = (int32_t)pid_tgid;
int pid, tgid;
if (data.my_pid_tgid != pid_tgid)
@@ -91,6 +108,10 @@ int test_core_kernel(void *ctx)
group_leader, group_leader, group_leader, group_leader,
comm);
+ out->local_task_struct_matches = bpf_core_type_matches(struct task_struct___local);
+#else
+ data.skip = true;
+#endif
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
index 8b533db4a7a5..b2ded497572a 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_mods.c
@@ -42,7 +42,16 @@ struct core_reloc_mods {
core_reloc_mods_substruct_t h;
};
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src)
+#else
+#define CORE_READ(dst, src) ({ \
+ int __sz = sizeof(*(dst)) < sizeof(*(src)) ? sizeof(*(dst)) : \
+ sizeof(*(src)); \
+ bpf_core_read((char *)(dst) + sizeof(*(dst)) - __sz, __sz, \
+ (const char *)(src) + sizeof(*(src)) - __sz); \
+})
+#endif
SEC("raw_tracepoint/sys_enter")
int test_core_mods(void *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_module.c b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
new file mode 100644
index 000000000000..bcb31ff92dcc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_module.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct bpf_testmod_test_read_ctx {
+ /* field order is mixed up */
+ size_t len;
+ char *buf;
+ loff_t off;
+} __attribute__((preserve_access_index));
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+ uint64_t my_pid_tgid;
+} data = {};
+
+struct core_reloc_module_output {
+ long long len;
+ long long off;
+ int read_ctx_sz;
+ bool read_ctx_exists;
+ bool buf_exists;
+ bool len_exists;
+ bool off_exists;
+ /* we have test_progs[-flavor], so cut flavor part */
+ char comm[sizeof("test_progs")];
+ int comm_len;
+};
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(test_core_module_probed,
+ struct task_struct *task,
+ struct bpf_testmod_test_read_ctx *read_ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_module_output *out = (void *)&data.out;
+ __u64 pid_tgid = bpf_get_current_pid_tgid();
+ __s32 real_tgid = (__s32)(pid_tgid >> 32);
+ __s32 real_pid = (__s32)pid_tgid;
+
+ if (data.my_pid_tgid != pid_tgid)
+ return 0;
+
+ if (BPF_CORE_READ(task, pid) != real_pid || BPF_CORE_READ(task, tgid) != real_tgid)
+ return 0;
+
+ out->len = BPF_CORE_READ(read_ctx, len);
+ out->off = BPF_CORE_READ(read_ctx, off);
+
+ out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+ out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+ out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+ out->off_exists = bpf_core_field_exists(read_ctx->off);
+ out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+ out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(test_core_module_direct,
+ struct task_struct *task,
+ struct bpf_testmod_test_read_ctx *read_ctx)
+{
+#if __has_builtin(__builtin_preserve_enum_value)
+ struct core_reloc_module_output *out = (void *)&data.out;
+ __u64 pid_tgid = bpf_get_current_pid_tgid();
+ __s32 real_tgid = (__s32)(pid_tgid >> 32);
+ __s32 real_pid = (__s32)pid_tgid;
+
+ if (data.my_pid_tgid != pid_tgid)
+ return 0;
+
+ if (task->pid != real_pid || task->tgid != real_tgid)
+ return 0;
+
+ out->len = read_ctx->len;
+ out->off = read_ctx->off;
+
+ out->read_ctx_sz = bpf_core_type_size(struct bpf_testmod_test_read_ctx);
+ out->read_ctx_exists = bpf_core_type_exists(struct bpf_testmod_test_read_ctx);
+ out->buf_exists = bpf_core_field_exists(read_ctx->buf);
+ out->off_exists = bpf_core_field_exists(read_ctx->off);
+ out->len_exists = bpf_core_field_exists(read_ctx->len);
+
+ out->comm_len = BPF_CORE_READ_STR_INTO(&out->comm, task, comm);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
index d7fb6cfc7891..5b686053ce42 100644
--- a/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_size.c
@@ -15,12 +15,21 @@ struct {
struct core_reloc_size_output {
int int_sz;
+ int int_off;
int struct_sz;
+ int struct_off;
int union_sz;
+ int union_off;
int arr_sz;
+ int arr_off;
int arr_elem_sz;
+ int arr_elem_off;
int ptr_sz;
+ int ptr_off;
int enum_sz;
+ int enum_off;
+ int float_sz;
+ int float_off;
};
struct core_reloc_size {
@@ -30,6 +39,7 @@ struct core_reloc_size {
int arr_field[4];
void *ptr_field;
enum { VALUE = 123 } enum_field;
+ float float_field;
};
SEC("raw_tracepoint/sys_enter")
@@ -39,12 +49,28 @@ int test_core_size(void *ctx)
struct core_reloc_size_output *out = (void *)&data.out;
out->int_sz = bpf_core_field_size(in->int_field);
+ out->int_off = bpf_core_field_offset(in->int_field);
+
out->struct_sz = bpf_core_field_size(in->struct_field);
+ out->struct_off = bpf_core_field_offset(in->struct_field);
+
out->union_sz = bpf_core_field_size(in->union_field);
+ out->union_off = bpf_core_field_offset(in->union_field);
+
out->arr_sz = bpf_core_field_size(in->arr_field);
- out->arr_elem_sz = bpf_core_field_size(in->arr_field[0]);
- out->ptr_sz = bpf_core_field_size(in->ptr_field);
- out->enum_sz = bpf_core_field_size(in->enum_field);
+ out->arr_off = bpf_core_field_offset(in->arr_field);
+
+ out->arr_elem_sz = bpf_core_field_size(struct core_reloc_size, arr_field[1]);
+ out->arr_elem_off = bpf_core_field_offset(struct core_reloc_size, arr_field[1]);
+
+ out->ptr_sz = bpf_core_field_size(struct core_reloc_size, ptr_field);
+ out->ptr_off = bpf_core_field_offset(struct core_reloc_size, ptr_field);
+
+ out->enum_sz = bpf_core_field_size(struct core_reloc_size, enum_field);
+ out->enum_off = bpf_core_field_offset(struct core_reloc_size, enum_field);
+
+ out->float_sz = bpf_core_field_size(struct core_reloc_size, float_field);
+ out->float_off = bpf_core_field_offset(struct core_reloc_size, float_field);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
new file mode 100644
index 000000000000..2edb4df35e6e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_based.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+struct a_struct {
+ int x;
+};
+
+struct a_complex_struct {
+ union {
+ struct a_struct *a;
+ void *b;
+ } x;
+ volatile long y;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef struct { int x, y, z; } anon_struct_typedef;
+
+typedef struct {
+ int a, b, c;
+} *struct_ptr_typedef;
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef int int_typedef;
+
+typedef enum { TYPEDEF_ENUM_VAL1, TYPEDEF_ENUM_VAL2 } enum_typedef;
+
+typedef void *void_ptr_typedef;
+typedef int *restrict restrict_ptr_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_based_output {
+ bool struct_exists;
+ bool complex_struct_exists;
+ bool union_exists;
+ bool enum_exists;
+ bool typedef_named_struct_exists;
+ bool typedef_anon_struct_exists;
+ bool typedef_struct_ptr_exists;
+ bool typedef_int_exists;
+ bool typedef_enum_exists;
+ bool typedef_void_ptr_exists;
+ bool typedef_restrict_ptr_exists;
+ bool typedef_func_proto_exists;
+ bool typedef_arr_exists;
+
+ bool struct_matches;
+ bool complex_struct_matches;
+ bool union_matches;
+ bool enum_matches;
+ bool typedef_named_struct_matches;
+ bool typedef_anon_struct_matches;
+ bool typedef_struct_ptr_matches;
+ bool typedef_int_matches;
+ bool typedef_enum_matches;
+ bool typedef_void_ptr_matches;
+ bool typedef_restrict_ptr_matches;
+ bool typedef_func_proto_matches;
+ bool typedef_arr_matches;
+
+ int struct_sz;
+ int union_sz;
+ int enum_sz;
+ int typedef_named_struct_sz;
+ int typedef_anon_struct_sz;
+ int typedef_struct_ptr_sz;
+ int typedef_int_sz;
+ int typedef_enum_sz;
+ int typedef_void_ptr_sz;
+ int typedef_func_proto_sz;
+ int typedef_arr_sz;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_based(void *ctx)
+{
+ /* Support for the BPF_TYPE_MATCHES argument to the
+ * __builtin_preserve_type_info builtin was added at some point during
+ * development of clang 15 and it's what we require for this test. Part of it
+ * could run with merely __builtin_preserve_type_info (which could be checked
+ * separately), but we have to find an upper bound.
+ */
+#if __has_builtin(__builtin_preserve_type_info) && __clang_major__ >= 15
+ struct core_reloc_type_based_output *out = (void *)&data.out;
+
+ out->struct_exists = bpf_core_type_exists(struct a_struct);
+ out->complex_struct_exists = bpf_core_type_exists(struct a_complex_struct);
+ out->union_exists = bpf_core_type_exists(union a_union);
+ out->enum_exists = bpf_core_type_exists(enum an_enum);
+ out->typedef_named_struct_exists = bpf_core_type_exists(named_struct_typedef);
+ out->typedef_anon_struct_exists = bpf_core_type_exists(anon_struct_typedef);
+ out->typedef_struct_ptr_exists = bpf_core_type_exists(struct_ptr_typedef);
+ out->typedef_int_exists = bpf_core_type_exists(int_typedef);
+ out->typedef_enum_exists = bpf_core_type_exists(enum_typedef);
+ out->typedef_void_ptr_exists = bpf_core_type_exists(void_ptr_typedef);
+ out->typedef_restrict_ptr_exists = bpf_core_type_exists(restrict_ptr_typedef);
+ out->typedef_func_proto_exists = bpf_core_type_exists(func_proto_typedef);
+ out->typedef_arr_exists = bpf_core_type_exists(arr_typedef);
+
+ out->struct_matches = bpf_core_type_matches(struct a_struct);
+ out->complex_struct_matches = bpf_core_type_matches(struct a_complex_struct);
+ out->union_matches = bpf_core_type_matches(union a_union);
+ out->enum_matches = bpf_core_type_matches(enum an_enum);
+ out->typedef_named_struct_matches = bpf_core_type_matches(named_struct_typedef);
+ out->typedef_anon_struct_matches = bpf_core_type_matches(anon_struct_typedef);
+ out->typedef_struct_ptr_matches = bpf_core_type_matches(struct_ptr_typedef);
+ out->typedef_int_matches = bpf_core_type_matches(int_typedef);
+ out->typedef_enum_matches = bpf_core_type_matches(enum_typedef);
+ out->typedef_void_ptr_matches = bpf_core_type_matches(void_ptr_typedef);
+ out->typedef_restrict_ptr_matches = bpf_core_type_matches(restrict_ptr_typedef);
+ out->typedef_func_proto_matches = bpf_core_type_matches(func_proto_typedef);
+ out->typedef_arr_matches = bpf_core_type_matches(arr_typedef);
+
+ out->struct_sz = bpf_core_type_size(struct a_struct);
+ out->union_sz = bpf_core_type_size(union a_union);
+ out->enum_sz = bpf_core_type_size(enum an_enum);
+ out->typedef_named_struct_sz = bpf_core_type_size(named_struct_typedef);
+ out->typedef_anon_struct_sz = bpf_core_type_size(anon_struct_typedef);
+ out->typedef_struct_ptr_sz = bpf_core_type_size(struct_ptr_typedef);
+ out->typedef_int_sz = bpf_core_type_size(int_typedef);
+ out->typedef_enum_sz = bpf_core_type_size(enum_typedef);
+ out->typedef_void_ptr_sz = bpf_core_type_size(void_ptr_typedef);
+ out->typedef_func_proto_sz = bpf_core_type_size(func_proto_typedef);
+ out->typedef_arr_sz = bpf_core_type_size(arr_typedef);
+#else
+ data.skip = true;
+#endif
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
new file mode 100644
index 000000000000..6fc8b9d66e34
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ char in[256];
+ char out[256];
+ bool skip;
+} data = {};
+
+/* some types are shared with test_core_reloc_type_based.c */
+struct a_struct {
+ int x;
+};
+
+union a_union {
+ int y;
+ int z;
+};
+
+enum an_enum {
+ AN_ENUM_VAL1 = 1,
+ AN_ENUM_VAL2 = 2,
+ AN_ENUM_VAL3 = 3,
+};
+
+typedef struct a_struct named_struct_typedef;
+
+typedef int (*func_proto_typedef)(long);
+
+typedef char arr_typedef[20];
+
+struct core_reloc_type_id_output {
+ int local_anon_struct;
+ int local_anon_union;
+ int local_anon_enum;
+ int local_anon_func_proto_ptr;
+ int local_anon_void_ptr;
+ int local_anon_arr;
+
+ int local_struct;
+ int local_union;
+ int local_enum;
+ int local_int;
+ int local_struct_typedef;
+ int local_func_proto_typedef;
+ int local_arr_typedef;
+
+ int targ_struct;
+ int targ_union;
+ int targ_enum;
+ int targ_int;
+ int targ_struct_typedef;
+ int targ_func_proto_typedef;
+ int targ_arr_typedef;
+};
+
+/* preserve types even if Clang doesn't support built-in */
+struct a_struct t1 = {};
+union a_union t2 = {};
+enum an_enum t3 = 0;
+named_struct_typedef t4 = {};
+func_proto_typedef t5 = 0;
+arr_typedef t6 = {};
+
+SEC("raw_tracepoint/sys_enter")
+int test_core_type_id(void *ctx)
+{
+ /* We use __builtin_btf_type_id() in this tests, but up until the time
+ * __builtin_preserve_type_info() was added it contained a bug that
+ * would make this test fail. The bug was fixed ([0]) with addition of
+ * __builtin_preserve_type_info(), though, so that's what we are using
+ * to detect whether this test has to be executed, however strange
+ * that might look like.
+ *
+ * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99
+ */
+#if __has_builtin(__builtin_preserve_type_info)
+ struct core_reloc_type_id_output *out = (void *)&data.out;
+
+ out->local_anon_struct = bpf_core_type_id_local(struct { int marker_field; });
+ out->local_anon_union = bpf_core_type_id_local(union { int marker_field; });
+ out->local_anon_enum = bpf_core_type_id_local(enum { MARKER_ENUM_VAL = 123 });
+ out->local_anon_func_proto_ptr = bpf_core_type_id_local(_Bool(*)(int));
+ out->local_anon_void_ptr = bpf_core_type_id_local(void *);
+ out->local_anon_arr = bpf_core_type_id_local(_Bool[47]);
+
+ out->local_struct = bpf_core_type_id_local(struct a_struct);
+ out->local_union = bpf_core_type_id_local(union a_union);
+ out->local_enum = bpf_core_type_id_local(enum an_enum);
+ out->local_int = bpf_core_type_id_local(int);
+ out->local_struct_typedef = bpf_core_type_id_local(named_struct_typedef);
+ out->local_func_proto_typedef = bpf_core_type_id_local(func_proto_typedef);
+ out->local_arr_typedef = bpf_core_type_id_local(arr_typedef);
+
+ out->targ_struct = bpf_core_type_id_kernel(struct a_struct);
+ out->targ_union = bpf_core_type_id_kernel(union a_union);
+ out->targ_enum = bpf_core_type_id_kernel(enum an_enum);
+ out->targ_int = bpf_core_type_id_kernel(int);
+ out->targ_struct_typedef = bpf_core_type_id_kernel(named_struct_typedef);
+ out->targ_func_proto_typedef = bpf_core_type_id_kernel(func_proto_typedef);
+ out->targ_arr_typedef = bpf_core_type_id_kernel(arr_typedef);
+#else
+ data.skip = true;
+#endif
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_core_retro.c b/tools/testing/selftests/bpf/progs/test_core_retro.c
new file mode 100644
index 000000000000..20861ec2f674
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_core_retro.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct {
+ int tgid;
+} __attribute__((preserve_access_index));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} exp_tgid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} results SEC(".maps");
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
+{
+ struct task_struct *task = (void *)bpf_get_current_task();
+ int tgid = BPF_CORE_READ(task, tgid);
+ int zero = 0;
+ int real_tgid = bpf_get_current_pid_tgid() >> 32;
+ int *exp_tgid = bpf_map_lookup_elem(&exp_tgid_map, &zero);
+
+ /* only pass through sys_enters from test process */
+ if (!exp_tgid || *exp_tgid != real_tgid)
+ return 0;
+
+ bpf_map_update_elem(&results, &zero, &tgid, 0);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
new file mode 100644
index 000000000000..4061f701ca50
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_custom_sec_handlers.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile int my_pid;
+
+bool abc1_called;
+bool abc2_called;
+bool custom1_called;
+bool custom2_called;
+bool kprobe1_called;
+bool xyz_called;
+
+SEC("abc")
+int abc1(void *ctx)
+{
+ abc1_called = true;
+ return 0;
+}
+
+SEC("abc/whatever")
+int abc2(void *ctx)
+{
+ abc2_called = true;
+ return 0;
+}
+
+SEC("custom")
+int custom1(void *ctx)
+{
+ custom1_called = true;
+ return 0;
+}
+
+SEC("custom/something")
+int custom2(void *ctx)
+{
+ custom2_called = true;
+ return 0;
+}
+
+SEC("kprobe")
+int kprobe1(void *ctx)
+{
+ kprobe1_called = true;
+ return 0;
+}
+
+SEC("xyz/blah")
+int xyz(void *ctx)
+{
+ int whatever;
+
+ /* use sleepable helper, custom handler should set sleepable flag */
+ bpf_copy_from_user(&whatever, sizeof(whatever), NULL);
+ xyz_called = true;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path.c b/tools/testing/selftests/bpf/progs/test_d_path.c
new file mode 100644
index 000000000000..84e1f883f97b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define MAX_PATH_LEN 128
+#define MAX_FILES 7
+
+pid_t my_pid = 0;
+__u32 cnt_stat = 0;
+__u32 cnt_close = 0;
+char paths_stat[MAX_FILES][MAX_PATH_LEN] = {};
+char paths_close[MAX_FILES][MAX_PATH_LEN] = {};
+int rets_stat[MAX_FILES] = {};
+int rets_close[MAX_FILES] = {};
+
+int called_stat = 0;
+int called_close = 0;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(prog_stat, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_stat;
+ int ret;
+
+ called_stat = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(path, paths_stat[cnt], MAX_PATH_LEN);
+
+ rets_stat[cnt] = ret;
+ cnt_stat++;
+ return 0;
+}
+
+SEC("fentry/filp_close")
+int BPF_PROG(prog_close, struct file *file, void *id)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ __u32 cnt = cnt_close;
+ int ret;
+
+ called_close = 1;
+
+ if (pid != my_pid)
+ return 0;
+
+ if (cnt >= MAX_FILES)
+ return 0;
+ ret = bpf_d_path(&file->f_path,
+ paths_close[cnt], MAX_PATH_LEN);
+
+ rets_close[cnt] = ret;
+ cnt_close++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
new file mode 100644
index 000000000000..27c27cff6a3a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_rdonly_mem.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Google */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to readonly memory. bpf helpers
+ * that update its arguments can not write into it.
+ */
+ bpf_d_path(path, active, sizeof(int));
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_d_path_check_types.c b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
new file mode 100644
index 000000000000..7e02b7361307
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_d_path_check_types.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+extern const int bpf_prog_active __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+SEC("fentry/security_inode_getattr")
+int BPF_PROG(d_path_check_rdonly_mem, struct path *path, struct kstat *stat,
+ __u32 request_mask, unsigned int query_flags)
+{
+ void *active;
+ u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (void *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* FAIL here! 'active' points to 'regular' memory. It
+ * cannot be submitted to ring buffer.
+ */
+ bpf_ringbuf_submit(active, 0);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_deny_namespace.c b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
new file mode 100644
index 000000000000..e96b901a733c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_deny_namespace.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <errno.h>
+#include <linux/capability.h>
+
+typedef struct { unsigned long long val; } kernel_cap_t;
+
+struct cred {
+ kernel_cap_t cap_effective;
+} __attribute__((preserve_access_index));
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/userns_create")
+int BPF_PROG(test_userns_create, const struct cred *cred, int ret)
+{
+ kernel_cap_t caps = cred->cap_effective;
+ __u64 cap_mask = 1ULL << CAP_SYS_ADMIN;
+
+ if (ret)
+ return 0;
+
+ ret = -EPERM;
+ if (caps.val & cap_mask)
+ return 0;
+
+ return -EPERM;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
index 01a002ade529..1705097d01d7 100644
--- a/tools/testing/selftests/bpf/progs/test_enable_stats.c
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -13,6 +13,6 @@ __u64 count = 0;
SEC("raw_tracepoint/sys_enter")
int test_enable_stats(void *ctx)
{
- count += 1;
+ __sync_fetch_and_add(&count, 1);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_endian.c b/tools/testing/selftests/bpf/progs/test_endian.c
new file mode 100644
index 000000000000..ddb687c5d125
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_endian.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IN16 0x1234
+#define IN32 0x12345678U
+#define IN64 0x123456789abcdef0ULL
+
+__u16 in16 = 0;
+__u32 in32 = 0;
+__u64 in64 = 0;
+
+__u16 out16 = 0;
+__u32 out32 = 0;
+__u64 out64 = 0;
+
+__u16 const16 = 0;
+__u32 const32 = 0;
+__u64 const64 = 0;
+
+SEC("raw_tp/sys_enter")
+int sys_enter(const void *ctx)
+{
+ out16 = __builtin_bswap16(in16);
+ out32 = __builtin_bswap32(in32);
+ out64 = __builtin_bswap64(in64);
+ const16 = ___bpf_swab16(IN16);
+ const32 = ___bpf_swab32(IN32);
+ const64 = ___bpf_swab64(IN64);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
new file mode 100644
index 000000000000..6afa834756e9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+extern bool CONFIG_X86_KERNEL_IBT __kconfig __weak;
+
+/* This function is here to have CONFIG_X86_KERNEL_IBT
+ * used and added to object BTF.
+ */
+int unused(void)
+{
+ return CONFIG_X86_KERNEL_IBT ? 0 : 1;
+}
+
+SEC("kprobe")
+int BPF_PROG(kprobe_run)
+{
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_PROG(uprobe_run)
+{
+ return 0;
+}
+
+SEC("tracepoint")
+int BPF_PROG(tp_run)
+{
+ return 0;
+}
+
+SEC("perf_event")
+int event_run(void *ctx)
+{
+ return 0;
+}
+
+SEC("kprobe.multi")
+int BPF_PROG(kmulti_run)
+{
+ return 0;
+}
+
+SEC("uprobe.multi")
+int BPF_PROG(umulti_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c
new file mode 100644
index 000000000000..9e0f73e8189c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_fsverity.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+
+char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+__u32 monitored_pid;
+__u32 got_fsverity;
+__u32 digest_matches;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr;
+ __u32 pid;
+ int ret;
+ int i;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ if (ret < 0)
+ return 0;
+ got_fsverity = 1;
+
+ for (i = 0; i < (int)sizeof(digest); i++) {
+ if (digest[i] != expected_digest[i])
+ return 0;
+ }
+
+ digest_matches = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
index 29817a703984..b6a6eb279e54 100644
--- a/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
+++ b/tools/testing/selftests/bpf/progs/test_get_stack_rawtp.c
@@ -57,8 +57,9 @@ struct {
SEC("raw_tracepoint/sys_enter")
int bpf_prog1(void *ctx)
{
- int max_len, max_buildid_len, usize, ksize, total_size;
+ int max_len, max_buildid_len, total_size;
struct stack_trace_t *data;
+ long usize, ksize;
void *raw_data;
__u32 key = 0;
diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c
new file mode 100644
index 000000000000..7eb2a4e5a3e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 found_xattr;
+
+static const char expected_value[] = "hello";
+char value[32];
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr value_ptr;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr);
+
+ ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr);
+ if (ret != sizeof(expected_value))
+ return 0;
+ if (bpf_strncmp(value, ret, expected_value))
+ return 0;
+ found_xattr = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_data.c b/tools/testing/selftests/bpf/progs/test_global_data.c
index 1319be1c54ba..719e314ef3e4 100644
--- a/tools/testing/selftests/bpf/progs/test_global_data.c
+++ b/tools/testing/selftests/bpf/progs/test_global_data.c
@@ -68,7 +68,7 @@ static struct foo struct3 = {
bpf_map_update_elem(&result_##map, &key, var, 0); \
} while (0)
-SEC("classifier/static_data_load")
+SEC("tc")
int load_static_data(struct __sk_buff *skb)
{
static const __u64 bar = ~0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func1.c b/tools/testing/selftests/bpf/progs/test_global_func1.c
index 880260f6d536..fc69ff18880d 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func1.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func1.c
@@ -3,14 +3,15 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
-#ifndef MAX_STACK
-#define MAX_STACK (512 - 3 * 32 + 8)
-#endif
+#define MAX_STACK 260
static __attribute__ ((noinline))
int f0(int var, struct __sk_buff *skb)
{
+ asm volatile ("");
+
return skb->len;
}
@@ -19,6 +20,8 @@ int f1(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return f0(0, skb) + skb->len;
}
@@ -27,6 +30,10 @@ int f3(int, struct __sk_buff *skb, int);
__attribute__ ((noinline))
int f2(int val, struct __sk_buff *skb)
{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
return f1(skb) + f3(val, skb, 1);
}
@@ -35,11 +42,14 @@ int f3(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("combined stack size of 3 calls is")
+int global_func1(struct __sk_buff *skb)
{
return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func10.c b/tools/testing/selftests/bpf/progs/test_global_func10.c
new file mode 100644
index 000000000000..8fba3f3649e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func10.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct Small {
+ long x;
+};
+
+struct Big {
+ long x;
+ long y;
+};
+
+__noinline int foo(const struct Big *big)
+{
+ if (!big)
+ return 0;
+
+ return bpf_get_prandom_u32() < big->y;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("invalid indirect access to stack")
+int global_func10(struct __sk_buff *skb)
+{
+ const struct Small small = {.x = skb->len };
+
+ return foo((struct Big *)&small) ? 1 : 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func11.c b/tools/testing/selftests/bpf/progs/test_global_func11.c
new file mode 100644
index 000000000000..283e036dc401
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func11.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct S {
+ int x;
+};
+
+__noinline int foo(const struct S *s)
+{
+ return s ? bpf_get_prandom_u32() < s->x : 0;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("Caller passes invalid args into func#1")
+int global_func11(struct __sk_buff *skb)
+{
+ return foo((const void *)skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func12.c b/tools/testing/selftests/bpf/progs/test_global_func12.c
new file mode 100644
index 000000000000..6e03d42519a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func12.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct S {
+ int x;
+};
+
+__noinline int foo(const struct S *s)
+{
+ return bpf_get_prandom_u32() < s->x;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("invalid mem access 'mem_or_null'")
+int global_func12(struct __sk_buff *skb)
+{
+ const struct S s = {.x = skb->len };
+
+ foo(&s);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func13.c b/tools/testing/selftests/bpf/progs/test_global_func13.c
new file mode 100644
index 000000000000..02ea80da75b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func13.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct S {
+ int x;
+};
+
+__noinline int foo(const struct S *s)
+{
+ if (s)
+ return bpf_get_prandom_u32() < s->x;
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("Caller passes invalid args into func#1")
+int global_func13(struct __sk_buff *skb)
+{
+ const struct S *s = (const struct S *)(0xbedabeda);
+
+ return foo(s);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func14.c b/tools/testing/selftests/bpf/progs/test_global_func14.c
new file mode 100644
index 000000000000..33b7d5efd7b2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func14.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct S;
+
+__noinline int foo(const struct S *s)
+{
+ if (s)
+ return bpf_get_prandom_u32() < *(const int *) s;
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("reference type('FWD S') size cannot be determined")
+int global_func14(struct __sk_buff *skb)
+{
+
+ return foo(NULL);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c
new file mode 100644
index 000000000000..b4e089d6981d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func15.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline int foo(unsigned int *v)
+{
+ if (v)
+ *v = bpf_get_prandom_u32();
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+__failure __msg("At program exit the register R0 has ")
+int global_func15(struct __sk_buff *skb)
+{
+ unsigned int v = 1;
+
+ foo(&v);
+
+ return v;
+}
+
+SEC("cgroup_skb/ingress")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1")
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7")
+__msg("At program exit the register R0 has ")
+__naked int global_func15_tricky_pruning(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 1;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* cgroup_skb/ingress program is expected to return [0, 1]
+ * values, so branch above makes sure that in a fallthrough
+ * case we have a valid 1 stored in R0 register, but in
+ * a branch case we assign some random value to R0. So if
+ * there is something wrong with precision tracking for R0 at
+ * program exit, we might erronenously prune branch case,
+ * because R0 in fallthrough case is imprecise (and thus any
+ * value is valid from POV of verifier is_state_equal() logic)
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c
new file mode 100644
index 000000000000..e3e64bc472cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func16.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline int foo(int (*arr)[10])
+{
+ if (arr)
+ return (*arr)[9];
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+__success
+int global_func16(struct __sk_buff *skb)
+{
+ int array[10];
+
+ const int rv = foo(&array);
+
+ return rv ? 1 : 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func17.c b/tools/testing/selftests/bpf/progs/test_global_func17.c
new file mode 100644
index 000000000000..5de44b09e8ec
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func17.c
@@ -0,0 +1,19 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline int foo(int *p)
+{
+ barrier_var(p);
+ return p ? (*p = 42) : 0;
+}
+
+const volatile int i;
+
+SEC("tc")
+__failure __msg("Caller passes invalid args into func#1")
+int global_func17(struct __sk_buff *skb)
+{
+ return foo((int *)&i);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func2.c b/tools/testing/selftests/bpf/progs/test_global_func2.c
index 2c18d82923a2..2beab9c3b68a 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func2.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func2.c
@@ -1,4 +1,49 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
#define MAX_STACK (512 - 3 * 32)
-#include "test_global_func1.c"
+
+static __attribute__ ((noinline))
+int f0(int var, struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
+ return f0(0, skb) + skb->len;
+}
+
+int f3(int, struct __sk_buff *skb, int);
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + f3(val, skb, 1);
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ volatile char buf[MAX_STACK] = {};
+
+ __sink(buf[MAX_STACK - 1]);
+
+ return skb->ifindex * val * var;
+}
+
+SEC("tc")
+__success
+int global_func2(struct __sk_buff *skb)
+{
+ return f0(1, skb) + f1(skb) + f2(2, skb) + f3(3, skb, 4);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func3.c b/tools/testing/selftests/bpf/progs/test_global_func3.c
index 86f0ecb304fc..142b682d3c2f 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func3.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func3.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -46,20 +47,15 @@ int f7(struct __sk_buff *skb)
return f6(skb);
}
-#ifndef NO_FN8
__attribute__ ((noinline))
int f8(struct __sk_buff *skb)
{
return f7(skb);
}
-#endif
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("the call stack of 8 frames")
+int global_func3(struct __sk_buff *skb)
{
-#ifndef NO_FN8
return f8(skb);
-#else
- return f7(skb);
-#endif
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func4.c b/tools/testing/selftests/bpf/progs/test_global_func4.c
index 610f75edf276..1733d87ad3f3 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func4.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func4.c
@@ -1,4 +1,55 @@
// SPDX-License-Identifier: GPL-2.0-only
/* Copyright (c) 2020 Facebook */
-#define NO_FN8
-#include "test_global_func3.c"
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__attribute__ ((noinline))
+int f1(struct __sk_buff *skb)
+{
+ return skb->len;
+}
+
+__attribute__ ((noinline))
+int f2(int val, struct __sk_buff *skb)
+{
+ return f1(skb) + val;
+}
+
+__attribute__ ((noinline))
+int f3(int val, struct __sk_buff *skb, int var)
+{
+ return f2(var, skb) + val;
+}
+
+__attribute__ ((noinline))
+int f4(struct __sk_buff *skb)
+{
+ return f3(1, skb, 2);
+}
+
+__attribute__ ((noinline))
+int f5(struct __sk_buff *skb)
+{
+ return f4(skb);
+}
+
+__attribute__ ((noinline))
+int f6(struct __sk_buff *skb)
+{
+ return f5(skb);
+}
+
+__attribute__ ((noinline))
+int f7(struct __sk_buff *skb)
+{
+ return f6(skb);
+}
+
+SEC("tc")
+__success
+int global_func4(struct __sk_buff *skb)
+{
+ return f7(skb);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func5.c b/tools/testing/selftests/bpf/progs/test_global_func5.c
index 260c25b827ef..257c0569ff98 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func5.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func5.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -24,8 +25,9 @@ int f3(int val, struct __sk_buff *skb)
return skb->ifindex * val;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("expects pointer to ctx")
+int global_func5(struct __sk_buff *skb)
{
return f1(skb) + f2(2, skb) + f3(3, skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func6.c b/tools/testing/selftests/bpf/progs/test_global_func6.c
index 69e19c64e10b..46c38c8f2cf0 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func6.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func6.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
int f1(struct __sk_buff *skb)
@@ -24,8 +25,9 @@ int f3(int val, struct __sk_buff *skb)
return skb->ifindex * val;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("modified ctx ptr R2")
+int global_func6(struct __sk_buff *skb)
{
return f1(skb) + f2(2, skb) + f3(3, skb);
}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func7.c b/tools/testing/selftests/bpf/progs/test_global_func7.c
index 309b3f6136bd..f182febfde3c 100644
--- a/tools/testing/selftests/bpf/progs/test_global_func7.c
+++ b/tools/testing/selftests/bpf/progs/test_global_func7.c
@@ -3,6 +3,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
__attribute__ ((noinline))
void foo(struct __sk_buff *skb)
@@ -10,8 +11,9 @@ void foo(struct __sk_buff *skb)
skb->tc_index = 0;
}
-SEC("classifier/test")
-int test_cls(struct __sk_buff *skb)
+SEC("tc")
+__failure __msg("foo() doesn't return scalar")
+int global_func7(struct __sk_buff *skb)
{
foo(skb);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_global_func8.c b/tools/testing/selftests/bpf/progs/test_global_func8.c
new file mode 100644
index 000000000000..9b9c57fa2dd3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func8.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+__noinline int foo(struct __sk_buff *skb)
+{
+ return bpf_get_prandom_u32();
+}
+
+SEC("cgroup_skb/ingress")
+__success
+int global_func8(struct __sk_buff *skb)
+{
+ if (!foo(skb))
+ return 0;
+
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func9.c b/tools/testing/selftests/bpf/progs/test_global_func9.c
new file mode 100644
index 000000000000..1f2cb0159b8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func9.c
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct S {
+ int x;
+};
+
+struct C {
+ int x;
+ int y;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct S);
+} map SEC(".maps");
+
+enum E {
+ E_ITEM
+};
+
+static int global_data_x = 100;
+static int volatile global_data_y = 500;
+
+__noinline int foo(const struct S *s)
+{
+ if (s)
+ return bpf_get_prandom_u32() < s->x;
+
+ return 0;
+}
+
+__noinline int bar(int *x)
+{
+ if (x)
+ *x &= bpf_get_prandom_u32();
+
+ return 0;
+}
+__noinline int baz(volatile int *x)
+{
+ if (x)
+ *x &= bpf_get_prandom_u32();
+
+ return 0;
+}
+
+__noinline int qux(enum E *e)
+{
+ if (e)
+ return *e;
+
+ return 0;
+}
+
+__noinline int quux(int (*arr)[10])
+{
+ if (arr)
+ return (*arr)[9];
+
+ return 0;
+}
+
+__noinline int quuz(int **p)
+{
+ if (p)
+ *p = NULL;
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+__success
+int global_func9(struct __sk_buff *skb)
+{
+ int result = 0;
+
+ {
+ const struct S s = {.x = skb->len };
+
+ result |= foo(&s);
+ }
+
+ {
+ const __u32 key = 1;
+ const struct S *s = bpf_map_lookup_elem(&map, &key);
+
+ result |= foo(s);
+ }
+
+ {
+ const struct C c = {.x = skb->len, .y = skb->family };
+
+ result |= foo((const struct S *)&c);
+ }
+
+ {
+ result |= foo(NULL);
+ }
+
+ {
+ bar(&result);
+ bar(&global_data_x);
+ }
+
+ {
+ result |= baz(&global_data_y);
+ }
+
+ {
+ enum E e = E_ITEM;
+
+ result |= qux(&e);
+ }
+
+ {
+ int array[10] = {0};
+
+ result |= quux(&array);
+ }
+
+ {
+ int *p;
+
+ result |= quuz(&p);
+ }
+
+ return result ? 1 : 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_args.c b/tools/testing/selftests/bpf/progs/test_global_func_args.c
new file mode 100644
index 000000000000..e712bf77daae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_args.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+
+#include <bpf/bpf_helpers.h>
+
+struct S {
+ int v;
+};
+
+struct S global_variable = {};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 7);
+ __type(key, __u32);
+ __type(value, int);
+} values SEC(".maps");
+
+static void save_value(__u32 index, int value)
+{
+ bpf_map_update_elem(&values, &index, &value, 0);
+}
+
+__noinline int foo(__u32 index, struct S *s)
+{
+ if (s) {
+ save_value(index, s->v);
+ return ++s->v;
+ }
+
+ save_value(index, 0);
+
+ return 1;
+}
+
+__noinline int bar(__u32 index, volatile struct S *s)
+{
+ if (s) {
+ save_value(index, s->v);
+ return ++s->v;
+ }
+
+ save_value(index, 0);
+
+ return 1;
+}
+
+__noinline int baz(struct S **s)
+{
+ if (s)
+ *s = 0;
+
+ return 0;
+}
+
+SEC("cgroup_skb/ingress")
+int test_cls(struct __sk_buff *skb)
+{
+ __u32 index = 0;
+
+ {
+ const int v = foo(index++, 0);
+
+ save_value(index++, v);
+ }
+
+ {
+ struct S s = { .v = 100 };
+
+ foo(index++, &s);
+ save_value(index++, s.v);
+ }
+
+ {
+ global_variable.v = 42;
+ bar(index++, &global_variable);
+ save_value(index++, global_variable.v);
+ }
+
+ {
+ struct S v, *p = &v;
+
+ baz(&p);
+ save_value(index++, !p);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
new file mode 100644
index 000000000000..143c8a4852bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_func_ctx_args.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+static long stack[256];
+
+/*
+ * KPROBE contexts
+ */
+
+__weak int kprobe_typedef_ctx_subprog(bpf_user_pt_regs_t *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_typedef_ctx(void *ctx)
+{
+ return kprobe_typedef_ctx_subprog(ctx);
+}
+
+/* s390x defines:
+ *
+ * typedef user_pt_regs bpf_user_pt_regs_t;
+ * typedef struct { ... } user_pt_regs;
+ *
+ * And so "canonical" underlying struct type is anonymous.
+ * So on s390x only valid ways to have PTR_TO_CTX argument in global subprogs
+ * are:
+ * - bpf_user_pt_regs_t *ctx (typedef);
+ * - struct bpf_user_pt_regs_t *ctx (backwards compatible struct hack);
+ * - void *ctx __arg_ctx (arg:ctx tag)
+ *
+ * Other architectures also allow using underlying struct types (e.g.,
+ * `struct pt_regs *ctx` for x86-64)
+ */
+#ifndef bpf_target_s390
+
+#define pt_regs_struct_t typeof(*(__PT_REGS_CAST((struct pt_regs *)NULL)))
+
+__weak int kprobe_struct_ctx_subprog(pt_regs_struct_t *ctx)
+{
+ return bpf_get_stack((void *)ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_resolved_ctx(void *ctx)
+{
+ return kprobe_struct_ctx_subprog(ctx);
+}
+
+#endif
+
+/* this is current hack to make this work on old kernels */
+struct bpf_user_pt_regs_t {};
+
+__weak int kprobe_workaround_ctx_subprog(struct bpf_user_pt_regs_t *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?kprobe")
+__success
+int kprobe_workaround_ctx(void *ctx)
+{
+ return kprobe_workaround_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT contexts
+ */
+
+__weak int raw_tp_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_ctx(void *ctx)
+{
+ return raw_tp_ctx_subprog(ctx);
+}
+
+/*
+ * RAW_TRACEPOINT_WRITABLE contexts
+ */
+
+__weak int raw_tp_writable_ctx_subprog(struct bpf_raw_tracepoint_args *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success
+int raw_tp_writable_ctx(void *ctx)
+{
+ return raw_tp_writable_ctx_subprog(ctx);
+}
+
+/*
+ * PERF_EVENT contexts
+ */
+
+__weak int perf_event_ctx_subprog(struct bpf_perf_event_data *ctx)
+{
+ return bpf_get_stack(ctx, &stack, sizeof(stack), 0);
+}
+
+SEC("?perf_event")
+__success
+int perf_event_ctx(void *ctx)
+{
+ return perf_event_ctx_subprog(ctx);
+}
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+ return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+struct my_struct { int x; };
+
+__weak int subprog_multi_ctx_tags(void *ctx1 __arg_ctx,
+ struct my_struct *mem,
+ void *ctx2 __arg_ctx)
+{
+ if (!mem)
+ return 0;
+
+ return bpf_get_stack(ctx1, stack, sizeof(stack), 0) +
+ mem->x +
+ bpf_get_stack(ctx2, stack, sizeof(stack), 0);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?perf_event")
+__success __log_level(2)
+int arg_tag_ctx_perf(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+ struct my_struct x = { .x = 123 };
+
+ return subprog_ctx_tag(ctx) + subprog_multi_ctx_tags(ctx, &x, ctx);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_global_map_resize.c b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
new file mode 100644
index 000000000000..1fbb73d3e5d5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_global_map_resize.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+/* rodata section */
+const volatile pid_t pid;
+const volatile size_t bss_array_len;
+const volatile size_t data_array_len;
+
+/* bss section */
+int sum = 0;
+int array[1];
+
+/* custom data secton */
+int my_array[1] SEC(".data.custom");
+
+/* custom data section which should NOT be resizable,
+ * since it contains a single var which is not an array
+ */
+int my_int SEC(".data.non_array");
+
+/* custom data section which should NOT be resizable,
+ * since its last var is not an array
+ */
+int my_array_first[1] SEC(".data.array_not_last");
+int my_int_last SEC(".data.array_not_last");
+
+int percpu_arr[1] SEC(".data.percpu_arr");
+
+SEC("tp/syscalls/sys_enter_getpid")
+int bss_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
+
+ for (size_t i = 0; i < bss_array_len; ++i)
+ sum += array[i];
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int data_array_sum(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ /* this will be zero, we just rely on verifier not rejecting this */
+ sum = percpu_arr[bpf_get_smp_processor_id()];
+
+ for (size_t i = 0; i < data_array_len; ++i)
+ sum += my_array[i];
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_hash_large_key.c b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
new file mode 100644
index 000000000000..8b438128f46b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_hash_large_key.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, struct bigelement);
+ __type(value, __u32);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct bigelement);
+} key_map SEC(".maps");
+
+struct bigelement {
+ int a;
+ char b[4096];
+ long long c;
+};
+
+SEC("raw_tracepoint/sys_enter")
+int bpf_hash_large_key_test(void *ctx)
+{
+ int zero = 0, value = 42;
+ struct bigelement *key;
+
+ key = bpf_map_lookup_elem(&key_map, &zero);
+ if (!key)
+ return 0;
+
+ key->c = 1;
+ if (bpf_map_update_elem(&hash_map, key, &value, BPF_ANY))
+ return 0;
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_helper_restricted.c b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
new file mode 100644
index 000000000000..5715c569ec03
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_helper_restricted.c
@@ -0,0 +1,123 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <time.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct timer {
+ struct bpf_timer t;
+};
+
+struct lock {
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct timer);
+} timers SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct lock);
+} locks SEC(".maps");
+
+static int timer_cb(void *map, int *key, struct timer *timer)
+{
+ return 0;
+}
+
+static void timer_work(void)
+{
+ struct timer *timer;
+ const int key = 0;
+
+ timer = bpf_map_lookup_elem(&timers, &key);
+ if (timer) {
+ bpf_timer_init(&timer->t, &timers, CLOCK_MONOTONIC);
+ bpf_timer_set_callback(&timer->t, timer_cb);
+ bpf_timer_start(&timer->t, 10E9, 0);
+ bpf_timer_cancel(&timer->t);
+ }
+}
+
+static void spin_lock_work(void)
+{
+ const int key = 0;
+ struct lock *lock;
+
+ lock = bpf_map_lookup_elem(&locks, &key);
+ if (lock) {
+ bpf_spin_lock(&lock->l);
+ bpf_spin_unlock(&lock->l);
+ }
+}
+
+SEC("?raw_tp/sys_enter")
+int raw_tp_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int tp_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?kprobe")
+int kprobe_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?perf_event")
+int perf_event_timer(void *ctx)
+{
+ timer_work();
+
+ return 0;
+}
+
+SEC("?raw_tp/sys_enter")
+int raw_tp_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?tp/syscalls/sys_enter_nanosleep")
+int tp_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?kprobe")
+int kprobe_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+SEC("?perf_event")
+int perf_event_spin_lock(void *ctx)
+{
+ spin_lock_work();
+
+ return 0;
+}
+
+const char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_jhash.h b/tools/testing/selftests/bpf/progs/test_jhash.h
index c300734d26f6..ef53559bbbdf 100644
--- a/tools/testing/selftests/bpf/progs/test_jhash.h
+++ b/tools/testing/selftests/bpf/progs/test_jhash.h
@@ -69,3 +69,34 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
+
+static __always_inline u32 jhash2(const u32 *k, u32 length, u32 initval)
+{
+ u32 a, b, c;
+
+ /* Set up the internal state */
+ a = b = c = JHASH_INITVAL + (length<<2) + initval;
+
+ /* Handle most of the key */
+ while (length > 3) {
+ a += k[0];
+ b += k[1];
+ c += k[2];
+ __jhash_mix(a, b, c);
+ length -= 3;
+ k += 3;
+ }
+
+ /* Handle the last 3 u32's */
+ switch (length) {
+ case 3: c += k[2];
+ case 2: b += k[1];
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ break;
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
new file mode 100644
index 000000000000..2dde8e3fe4c9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_kfunc_dynptr_param.c
@@ -0,0 +1,86 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr,
+ struct bpf_dynptr *sig_ptr,
+ struct bpf_key *trusted_keyring) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array_map SEC(".maps");
+
+int err, pid;
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?lsm.s/bpf")
+__failure __msg("cannot pass in dynptr at an offset=-8")
+int BPF_PROG(not_valid_dynptr, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ unsigned long val;
+
+ return bpf_verify_pkcs7_signature((struct bpf_dynptr *)&val,
+ (struct bpf_dynptr *)&val, NULL);
+}
+
+SEC("?lsm.s/bpf")
+__failure __msg("arg#0 expected pointer to stack or dynptr_ptr")
+int BPF_PROG(not_ptr_to_stack, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ unsigned long val = 0;
+
+ return bpf_verify_pkcs7_signature((struct bpf_dynptr *)val,
+ (struct bpf_dynptr *)val, NULL);
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(dynptr_data_null, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct bpf_key *trusted_keyring;
+ struct bpf_dynptr ptr;
+ __u32 *value;
+ int ret, zero = 0;
+
+ if (bpf_get_current_pid_tgid() >> 32 != pid)
+ return 0;
+
+ value = bpf_map_lookup_elem(&array_map, &zero);
+ if (!value)
+ return 0;
+
+ /* Pass invalid flags. */
+ ret = bpf_dynptr_from_mem(value, sizeof(*value), ((__u64)~0ULL), &ptr);
+ if (ret != -EINVAL)
+ return 0;
+
+ trusted_keyring = bpf_lookup_system_key(0);
+ if (!trusted_keyring)
+ return 0;
+
+ err = bpf_verify_pkcs7_signature(&ptr, &ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms.c b/tools/testing/selftests/bpf/progs/test_ksyms.c
new file mode 100644
index 000000000000..6c9cbb5a3bdf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2019 Facebook */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u64 out__bpf_link_fops = -1;
+__u64 out__bpf_link_fops1 = -1;
+__u64 out__btf_size = -1;
+__u64 out__per_cpu_start = -1;
+
+extern const void bpf_link_fops __ksym;
+extern const void __start_BTF __ksym;
+extern const void __stop_BTF __ksym;
+extern const void __per_cpu_start __ksym;
+/* non-existing symbol, weak, default to zero */
+extern const void bpf_link_fops1 __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ out__bpf_link_fops = (__u64)&bpf_link_fops;
+ out__btf_size = (__u64)(&__stop_BTF - &__start_BTF);
+ out__per_cpu_start = (__u64)&__per_cpu_start;
+
+ out__bpf_link_fops1 = (__u64)&bpf_link_fops1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
new file mode 100644
index 000000000000..bb8ea9270f29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+__u64 out__runqueues_addr = -1;
+__u64 out__bpf_prog_active_addr = -1;
+
+__u32 out__rq_cpu = -1; /* percpu struct fields */
+int out__bpf_prog_active = -1; /* percpu int */
+
+__u32 out__this_rq_cpu = -1;
+int out__this_bpf_prog_active = -1;
+
+__u32 out__cpu_0_rq_cpu = -1; /* cpu_rq(0)->cpu */
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ out__runqueues_addr = (__u64)&runqueues;
+ out__bpf_prog_active_addr = (__u64)&bpf_prog_active;
+
+ cpu = bpf_get_smp_processor_id();
+
+ /* test bpf_per_cpu_ptr() */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ if (rq)
+ out__rq_cpu = rq->cpu;
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active)
+ out__bpf_prog_active = *active;
+
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq) /* should always be valid, but we can't spare the check. */
+ out__cpu_0_rq_cpu = rq->cpu;
+
+ /* test bpf_this_cpu_ptr */
+ rq = (struct rq *)bpf_this_cpu_ptr(&runqueues);
+ out__this_rq_cpu = rq->cpu;
+ active = (int *)bpf_this_cpu_ptr(&bpf_prog_active);
+ out__this_bpf_prog_active = *active;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
new file mode 100644
index 000000000000..8bc8f7c637bc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const struct rq runqueues __ksym; /* struct type global var. */
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ struct rq *rq;
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu);
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* READ_ONCE */
+ *(volatile int *)active;
+ /* !rq has not been tested, so verifier should reject. */
+ *(volatile int *)(&rq->cpu);
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
new file mode 100644
index 000000000000..27109b877714
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_write_check.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+extern const int bpf_prog_active __ksym; /* int type global var. */
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ int *active;
+ __u32 cpu;
+
+ cpu = bpf_get_smp_processor_id();
+ active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu);
+ if (active) {
+ /* Kernel memory obtained from bpf_{per,this}_cpu_ptr
+ * is read-only, should _not_ pass verification.
+ */
+ /* WRITE_ONCE */
+ *(volatile int *)active = -1;
+ }
+
+ return 0;
+}
+
+__noinline int write_active(int *p)
+{
+ return p ? (*p = 42) : 0;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ int *active;
+
+ active = bpf_this_cpu_ptr(&bpf_prog_active);
+ write_active(active);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_module.c b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
new file mode 100644
index 000000000000..0650d918c096
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_module.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define X_0(x)
+#define X_1(x) x X_0(x)
+#define X_2(x) x X_1(x)
+#define X_3(x) x X_2(x)
+#define X_4(x) x X_3(x)
+#define X_5(x) x X_4(x)
+#define X_6(x) x X_5(x)
+#define X_7(x) x X_6(x)
+#define X_8(x) x X_7(x)
+#define X_9(x) x X_8(x)
+#define X_10(x) x X_9(x)
+#define REPEAT_256(Y) X_2(X_10(X_10(Y))) X_5(X_10(Y)) X_6(Y)
+
+extern const int bpf_testmod_ksym_percpu __ksym;
+extern void bpf_testmod_test_mod_kfunc(int i) __ksym;
+extern void bpf_testmod_invalid_mod_kfunc(void) __ksym __weak;
+
+int out_bpf_testmod_ksym = 0;
+const volatile int x = 0;
+
+SEC("tc")
+int load(struct __sk_buff *skb)
+{
+ /* This will be kept by clang, but removed by verifier. Since it is
+ * marked as __weak, libbpf and gen_loader don't error out if BTF ID
+ * is not found for it, instead imm and off is set to 0 for it.
+ */
+ if (x)
+ bpf_testmod_invalid_mod_kfunc();
+ bpf_testmod_test_mod_kfunc(42);
+ out_bpf_testmod_ksym = *(int *)bpf_this_cpu_ptr(&bpf_testmod_ksym_percpu);
+ return 0;
+}
+
+SEC("tc")
+int load_256(struct __sk_buff *skb)
+{
+ /* this will fail if kfunc doesn't reuse its own btf fd index */
+ REPEAT_256(bpf_testmod_test_mod_kfunc(42););
+ bpf_testmod_test_mod_kfunc(42);
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_weak.c b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
new file mode 100644
index 000000000000..d00268c91e19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ksyms_weak.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test weak ksyms.
+ *
+ * Copyright (c) 2021 Google
+ */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+
+int out__existing_typed = -1;
+__u64 out__existing_typeless = -1;
+
+__u64 out__non_existent_typeless = -1;
+__u64 out__non_existent_typed = -1;
+
+/* existing weak symbols */
+
+/* test existing weak symbols can be resolved. */
+extern const struct rq runqueues __ksym __weak; /* typed */
+extern const void bpf_prog_active __ksym __weak; /* typeless */
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+void bpf_testmod_test_mod_kfunc(int i) __ksym __weak;
+
+
+/* non-existent weak symbols. */
+
+/* typeless symbols, default to zero. */
+extern const void bpf_link_fops1 __ksym __weak;
+
+/* typed symbols, default to zero. */
+extern const int bpf_link_fops2 __ksym __weak;
+void invalid_kfunc(void) __ksym __weak;
+
+SEC("raw_tp/sys_enter")
+int pass_handler(const void *ctx)
+{
+ struct rq *rq;
+
+ /* tests existing symbols. */
+ rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, 0);
+ if (rq && bpf_ksym_exists(&runqueues))
+ out__existing_typed = rq->cpu;
+ out__existing_typeless = (__u64)&bpf_prog_active;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typeless = (__u64)&bpf_link_fops1;
+
+ /* tests non-existent symbols. */
+ out__non_existent_typed = (__u64)&bpf_link_fops2;
+
+ if (&bpf_link_fops2) /* can't happen */
+ out__non_existent_typed = (__u64)bpf_per_cpu_ptr(&bpf_link_fops2, 0);
+
+ if (!bpf_ksym_exists(bpf_task_acquire))
+ /* dead code won't be seen by the verifier */
+ bpf_task_acquire(0);
+
+ if (!bpf_ksym_exists(bpf_testmod_test_mod_kfunc))
+ /* dead code won't be seen by the verifier */
+ bpf_testmod_test_mod_kfunc(0);
+
+ if (bpf_ksym_exists(invalid_kfunc))
+ /* dead code won't be seen by the verifier */
+ invalid_kfunc();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb.c b/tools/testing/selftests/bpf/progs/test_l4lb.c
index 33493911d87a..c26057ec46dc 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb.c
@@ -21,8 +21,6 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
static inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
@@ -450,7 +448,7 @@ static __always_inline int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
index 28351936a438..c8bc0c6947aa 100644
--- a/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline.c
@@ -17,9 +17,7 @@
#include "test_iptunnel_common.h"
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
-
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -52,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static u32 jhash(const void *key, u32 length, u32 initval)
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
const unsigned char *k = key;
@@ -88,7 +86,7 @@ static u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
b += initval;
@@ -97,7 +95,7 @@ static u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-static u32 jhash_2words(u32 a, u32 b, u32 initval)
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
}
@@ -200,8 +198,7 @@ struct {
__type(value, struct ctl_value);
} ctl_array SEC(".maps");
-static __u32 get_packet_hash(struct packet_description *pckt,
- bool ipv6)
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
{
if (ipv6)
return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
@@ -210,10 +207,10 @@ static __u32 get_packet_hash(struct packet_description *pckt,
return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
}
-static bool get_packet_dst(struct real_definition **real,
- struct packet_description *pckt,
- struct vip_meta *vip_info,
- bool is_ipv6)
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
{
__u32 hash = get_packet_hash(pckt, is_ipv6);
__u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
@@ -221,7 +218,7 @@ static bool get_packet_dst(struct real_definition **real,
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
@@ -233,8 +230,8 @@ static bool get_packet_dst(struct real_definition **real,
return true;
}
-static int parse_icmpv6(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmpv6(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmp6hdr *icmp_hdr;
struct ipv6hdr *ip6h;
@@ -255,8 +252,8 @@ static int parse_icmpv6(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static int parse_icmp(void *data, void *data_end, __u64 off,
- struct packet_description *pckt)
+static __noinline int parse_icmp(void *data, void *data_end, __u64 off,
+ struct packet_description *pckt)
{
struct icmphdr *icmp_hdr;
struct iphdr *iph;
@@ -280,8 +277,8 @@ static int parse_icmp(void *data, void *data_end, __u64 off,
return TC_ACT_UNSPEC;
}
-static bool parse_udp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_udp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct udphdr *udp;
udp = data + off;
@@ -299,8 +296,8 @@ static bool parse_udp(void *data, __u64 off, void *data_end,
return true;
}
-static bool parse_tcp(void *data, __u64 off, void *data_end,
- struct packet_description *pckt)
+static __noinline bool parse_tcp(void *data, __u64 off, void *data_end,
+ struct packet_description *pckt)
{
struct tcphdr *tcp;
@@ -321,8 +318,8 @@ static bool parse_tcp(void *data, __u64 off, void *data_end,
return true;
}
-static int process_packet(void *data, __u64 off, void *data_end,
- bool is_ipv6, struct __sk_buff *skb)
+static __noinline int process_packet(void *data, __u64 off, void *data_end,
+ bool is_ipv6, struct __sk_buff *skb)
{
void *pkt_start = (void *)(long)skb->data;
struct packet_description pckt = {};
@@ -450,7 +447,7 @@ static int process_packet(void *data, __u64 off, void *data_end,
return bpf_redirect(ifindex, 0);
}
-SEC("l4lb-demo")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
new file mode 100644
index 000000000000..f997f5080748
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_l4lb_noinline_dynptr.c
@@ -0,0 +1,487 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+#include <stddef.h>
+#include <stdbool.h>
+#include <string.h>
+#include <linux/pkt_cls.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include "test_iptunnel_common.h"
+#include <bpf/bpf_endian.h>
+
+#include "bpf_kfuncs.h"
+
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
+{
+ return (word << shift) | (word >> ((-shift) & 31));
+}
+
+/* copy paste of jhash from kernel sources to make sure llvm
+ * can compile it into valid sequence of bpf instructions
+ */
+#define __jhash_mix(a, b, c) \
+{ \
+ a -= c; a ^= rol32(c, 4); c += b; \
+ b -= a; b ^= rol32(a, 6); a += c; \
+ c -= b; c ^= rol32(b, 8); b += a; \
+ a -= c; a ^= rol32(c, 16); c += b; \
+ b -= a; b ^= rol32(a, 19); a += c; \
+ c -= b; c ^= rol32(b, 4); b += a; \
+}
+
+#define __jhash_final(a, b, c) \
+{ \
+ c ^= b; c -= rol32(b, 14); \
+ a ^= c; a -= rol32(c, 11); \
+ b ^= a; b -= rol32(a, 25); \
+ c ^= b; c -= rol32(b, 16); \
+ a ^= c; a -= rol32(c, 4); \
+ b ^= a; b -= rol32(a, 14); \
+ c ^= b; c -= rol32(b, 24); \
+}
+
+#define JHASH_INITVAL 0xdeadbeef
+
+typedef unsigned int u32;
+
+static __noinline u32 jhash(const void *key, u32 length, u32 initval)
+{
+ u32 a, b, c;
+ const unsigned char *k = key;
+
+ a = b = c = JHASH_INITVAL + length + initval;
+
+ while (length > 12) {
+ a += *(u32 *)(k);
+ b += *(u32 *)(k + 4);
+ c += *(u32 *)(k + 8);
+ __jhash_mix(a, b, c);
+ length -= 12;
+ k += 12;
+ }
+ switch (length) {
+ case 12: c += (u32)k[11]<<24;
+ case 11: c += (u32)k[10]<<16;
+ case 10: c += (u32)k[9]<<8;
+ case 9: c += k[8];
+ case 8: b += (u32)k[7]<<24;
+ case 7: b += (u32)k[6]<<16;
+ case 6: b += (u32)k[5]<<8;
+ case 5: b += k[4];
+ case 4: a += (u32)k[3]<<24;
+ case 3: a += (u32)k[2]<<16;
+ case 2: a += (u32)k[1]<<8;
+ case 1: a += k[0];
+ __jhash_final(a, b, c);
+ case 0: /* Nothing left to add */
+ break;
+ }
+
+ return c;
+}
+
+static __noinline u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
+{
+ a += initval;
+ b += initval;
+ c += initval;
+ __jhash_final(a, b, c);
+ return c;
+}
+
+static __noinline u32 jhash_2words(u32 a, u32 b, u32 initval)
+{
+ return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
+}
+
+#define PCKT_FRAGMENTED 65343
+#define IPV4_HDR_LEN_NO_OPT 20
+#define IPV4_PLUS_ICMP_HDR 28
+#define IPV6_PLUS_ICMP_HDR 48
+#define RING_SIZE 2
+#define MAX_VIPS 12
+#define MAX_REALS 5
+#define CTL_MAP_SIZE 16
+#define CH_RINGS_SIZE (MAX_VIPS * RING_SIZE)
+#define F_IPV6 (1 << 0)
+#define F_HASH_NO_SRC_PORT (1 << 0)
+#define F_ICMP (1 << 0)
+#define F_SYN_SET (1 << 1)
+
+struct packet_description {
+ union {
+ __be32 src;
+ __be32 srcv6[4];
+ };
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ union {
+ __u32 ports;
+ __u16 port16[2];
+ };
+ __u8 proto;
+ __u8 flags;
+};
+
+struct ctl_value {
+ union {
+ __u64 value;
+ __u32 ifindex;
+ __u8 mac[6];
+ };
+};
+
+struct vip_meta {
+ __u32 flags;
+ __u32 vip_num;
+};
+
+struct real_definition {
+ union {
+ __be32 dst;
+ __be32 dstv6[4];
+ };
+ __u8 flags;
+};
+
+struct vip_stats {
+ __u64 bytes;
+ __u64 pkts;
+};
+
+struct eth_hdr {
+ unsigned char eth_dest[ETH_ALEN];
+ unsigned char eth_source[ETH_ALEN];
+ unsigned short eth_proto;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, struct vip);
+ __type(value, struct vip_meta);
+} vip_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CH_RINGS_SIZE);
+ __type(key, __u32);
+ __type(value, __u32);
+} ch_rings SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, MAX_REALS);
+ __type(key, __u32);
+ __type(value, struct real_definition);
+} reals SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, MAX_VIPS);
+ __type(key, __u32);
+ __type(value, struct vip_stats);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, CTL_MAP_SIZE);
+ __type(key, __u32);
+ __type(value, struct ctl_value);
+} ctl_array SEC(".maps");
+
+static __noinline __u32 get_packet_hash(struct packet_description *pckt, bool ipv6)
+{
+ if (ipv6)
+ return jhash_2words(jhash(pckt->srcv6, 16, MAX_VIPS),
+ pckt->ports, CH_RINGS_SIZE);
+ else
+ return jhash_2words(pckt->src, pckt->ports, CH_RINGS_SIZE);
+}
+
+static __noinline bool get_packet_dst(struct real_definition **real,
+ struct packet_description *pckt,
+ struct vip_meta *vip_info,
+ bool is_ipv6)
+{
+ __u32 hash = get_packet_hash(pckt, is_ipv6);
+ __u32 key = RING_SIZE * vip_info->vip_num + hash % RING_SIZE;
+ __u32 *real_pos;
+
+ if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
+ hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
+ return false;
+
+ real_pos = bpf_map_lookup_elem(&ch_rings, &key);
+ if (!real_pos)
+ return false;
+ key = *real_pos;
+ *real = bpf_map_lookup_elem(&reals, &key);
+ if (!(*real))
+ return false;
+ return true;
+}
+
+static __noinline int parse_icmpv6(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+ struct icmp6hdr *icmp_hdr;
+ struct ipv6hdr *ip6h;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+
+ if (icmp_hdr->icmp6_type != ICMPV6_PKT_TOOBIG)
+ return TC_ACT_OK;
+ off += sizeof(struct icmp6hdr);
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+ pckt->proto = ip6h->nexthdr;
+ pckt->flags |= F_ICMP;
+ memcpy(pckt->srcv6, ip6h->daddr.s6_addr32, 16);
+ memcpy(pckt->dstv6, ip6h->saddr.s6_addr32, 16);
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline int parse_icmp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer_icmp[sizeof(struct iphdr)] = {};
+ __u8 buffer_ip[sizeof(struct iphdr)] = {};
+ struct icmphdr *icmp_hdr;
+ struct iphdr *iph;
+
+ icmp_hdr = bpf_dynptr_slice(skb_ptr, off, buffer_icmp, sizeof(buffer_icmp));
+ if (!icmp_hdr)
+ return TC_ACT_SHOT;
+ if (icmp_hdr->type != ICMP_DEST_UNREACH ||
+ icmp_hdr->code != ICMP_FRAG_NEEDED)
+ return TC_ACT_OK;
+ off += sizeof(struct icmphdr);
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer_ip, sizeof(buffer_ip));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+ pckt->proto = iph->protocol;
+ pckt->flags |= F_ICMP;
+ pckt->src = iph->daddr;
+ pckt->dst = iph->saddr;
+ return TC_ACT_UNSPEC;
+}
+
+static __noinline bool parse_udp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct udphdr)] = {};
+ struct udphdr *udp;
+
+ udp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!udp)
+ return false;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = udp->source;
+ pckt->port16[1] = udp->dest;
+ } else {
+ pckt->port16[0] = udp->dest;
+ pckt->port16[1] = udp->source;
+ }
+ return true;
+}
+
+static __noinline bool parse_tcp(struct bpf_dynptr *skb_ptr, __u64 off,
+ struct packet_description *pckt)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ struct tcphdr *tcp;
+
+ tcp = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!tcp)
+ return false;
+
+ if (tcp->syn)
+ pckt->flags |= F_SYN_SET;
+
+ if (!(pckt->flags & F_ICMP)) {
+ pckt->port16[0] = tcp->source;
+ pckt->port16[1] = tcp->dest;
+ } else {
+ pckt->port16[0] = tcp->dest;
+ pckt->port16[1] = tcp->source;
+ }
+ return true;
+}
+
+static __noinline int process_packet(struct bpf_dynptr *skb_ptr,
+ struct eth_hdr *eth, __u64 off,
+ bool is_ipv6, struct __sk_buff *skb)
+{
+ struct packet_description pckt = {};
+ struct bpf_tunnel_key tkey = {};
+ struct vip_stats *data_stats;
+ struct real_definition *dst;
+ struct vip_meta *vip_info;
+ struct ctl_value *cval;
+ __u32 v4_intf_pos = 1;
+ __u32 v6_intf_pos = 2;
+ struct ipv6hdr *ip6h;
+ struct vip vip = {};
+ struct iphdr *iph;
+ int tun_flag = 0;
+ __u16 pkt_bytes;
+ __u64 iph_len;
+ __u32 ifindex;
+ __u8 protocol;
+ __u32 vip_num;
+ int action;
+
+ tkey.tunnel_ttl = 64;
+ if (is_ipv6) {
+ __u8 buffer[sizeof(struct ipv6hdr)] = {};
+
+ ip6h = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!ip6h)
+ return TC_ACT_SHOT;
+
+ iph_len = sizeof(struct ipv6hdr);
+ protocol = ip6h->nexthdr;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(ip6h->payload_len);
+ off += iph_len;
+ if (protocol == IPPROTO_FRAGMENT) {
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_ICMPV6) {
+ action = parse_icmpv6(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV6_PLUS_ICMP_HDR;
+ } else {
+ memcpy(pckt.srcv6, ip6h->saddr.s6_addr32, 16);
+ memcpy(pckt.dstv6, ip6h->daddr.s6_addr32, 16);
+ }
+ } else {
+ __u8 buffer[sizeof(struct iphdr)] = {};
+
+ iph = bpf_dynptr_slice(skb_ptr, off, buffer, sizeof(buffer));
+ if (!iph || iph->ihl != 5)
+ return TC_ACT_SHOT;
+
+ protocol = iph->protocol;
+ pckt.proto = protocol;
+ pkt_bytes = bpf_ntohs(iph->tot_len);
+ off += IPV4_HDR_LEN_NO_OPT;
+
+ if (iph->frag_off & PCKT_FRAGMENTED)
+ return TC_ACT_SHOT;
+ if (protocol == IPPROTO_ICMP) {
+ action = parse_icmp(skb_ptr, off, &pckt);
+ if (action >= 0)
+ return action;
+ off += IPV4_PLUS_ICMP_HDR;
+ } else {
+ pckt.src = iph->saddr;
+ pckt.dst = iph->daddr;
+ }
+ }
+ protocol = pckt.proto;
+
+ if (protocol == IPPROTO_TCP) {
+ if (!parse_tcp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else if (protocol == IPPROTO_UDP) {
+ if (!parse_udp(skb_ptr, off, &pckt))
+ return TC_ACT_SHOT;
+ } else {
+ return TC_ACT_SHOT;
+ }
+
+ if (is_ipv6)
+ memcpy(vip.daddr.v6, pckt.dstv6, 16);
+ else
+ vip.daddr.v4 = pckt.dst;
+
+ vip.dport = pckt.port16[1];
+ vip.protocol = pckt.proto;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info) {
+ vip.dport = 0;
+ vip_info = bpf_map_lookup_elem(&vip_map, &vip);
+ if (!vip_info)
+ return TC_ACT_SHOT;
+ pckt.port16[1] = 0;
+ }
+
+ if (vip_info->flags & F_HASH_NO_SRC_PORT)
+ pckt.port16[0] = 0;
+
+ if (!get_packet_dst(&dst, &pckt, vip_info, is_ipv6))
+ return TC_ACT_SHOT;
+
+ if (dst->flags & F_IPV6) {
+ cval = bpf_map_lookup_elem(&ctl_array, &v6_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ memcpy(tkey.remote_ipv6, dst->dstv6, 16);
+ tun_flag = BPF_F_TUNINFO_IPV6;
+ } else {
+ cval = bpf_map_lookup_elem(&ctl_array, &v4_intf_pos);
+ if (!cval)
+ return TC_ACT_SHOT;
+ ifindex = cval->ifindex;
+ tkey.remote_ipv4 = dst->dst;
+ }
+ vip_num = vip_info->vip_num;
+ data_stats = bpf_map_lookup_elem(&stats, &vip_num);
+ if (!data_stats)
+ return TC_ACT_SHOT;
+ data_stats->pkts++;
+ data_stats->bytes += pkt_bytes;
+ bpf_skb_set_tunnel_key(skb, &tkey, sizeof(tkey), tun_flag);
+ *(u32 *)eth->eth_dest = tkey.remote_ipv4;
+ return bpf_redirect(ifindex, 0);
+}
+
+SEC("tc")
+int balancer_ingress(struct __sk_buff *ctx)
+{
+ __u8 buffer[sizeof(struct eth_hdr)] = {};
+ struct bpf_dynptr ptr;
+ struct eth_hdr *eth;
+ __u32 eth_proto;
+ __u32 nh_off;
+ int err;
+
+ nh_off = sizeof(struct eth_hdr);
+
+ bpf_dynptr_from_skb(ctx, 0, &ptr);
+ eth = bpf_dynptr_slice_rdwr(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return TC_ACT_SHOT;
+ eth_proto = eth->eth_proto;
+ if (eth_proto == bpf_htons(ETH_P_IP))
+ err = process_packet(&ptr, eth, nh_off, false, ctx);
+ else if (eth_proto == bpf_htons(ETH_P_IPV6))
+ err = process_packet(&ptr, eth, nh_off, true, ctx);
+ else
+ return TC_ACT_SHOT;
+
+ if (eth == buffer)
+ bpf_dynptr_write(&ptr, 0, buffer, sizeof(buffer), 0);
+
+ return err;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ldsx_insn.c b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
new file mode 100644
index 000000000000..2a2a942737d7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ldsx_insn.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_s390) || defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+const volatile int skip = 0;
+#else
+const volatile int skip = 1;
+#endif
+
+volatile const short val1 = -1;
+volatile const int val2 = -1;
+short val3 = -1;
+int val4 = -1;
+int done1, done2, ret1, ret2;
+
+SEC("?raw_tp/sys_enter")
+int rdonly_map_prog(const void *ctx)
+{
+ if (done1)
+ return 0;
+
+ done1 = 1;
+ /* val1/val2 readonly map */
+ if (val1 == val2)
+ ret1 = 1;
+ return 0;
+
+}
+
+SEC("?raw_tp/sys_enter")
+int map_val_prog(const void *ctx)
+{
+ if (done2)
+ return 0;
+
+ done2 = 1;
+ /* val1/val2 regular read/write map */
+ if (val3 == val4)
+ ret2 = 1;
+ return 0;
+
+}
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+
+long long int_member;
+
+SEC("?fentry/bpf_testmod_test_arg_ptr_to_struct")
+int BPF_PROG2(test_ptr_struct_arg, struct bpf_testmod_struct_arg_1 *, p)
+{
+ /* probed memory access */
+ int_member = p->a;
+ return 0;
+}
+
+long long set_optlen, set_retval;
+
+SEC("?cgroup/getsockopt")
+int _getsockopt(volatile struct bpf_sockopt *ctx)
+{
+ int old_optlen, old_retval;
+
+ old_optlen = ctx->optlen;
+ old_retval = ctx->retval;
+
+ ctx->optlen = -1;
+ ctx->retval = -1;
+
+ /* sign extension for ctx member */
+ set_optlen = ctx->optlen;
+ set_retval = ctx->retval;
+
+ ctx->optlen = old_optlen;
+ ctx->retval = old_retval;
+
+ return 0;
+}
+
+long long set_mark;
+
+SEC("?tc")
+int _tc(volatile struct __sk_buff *skb)
+{
+ long long tmp_mark;
+ int old_mark;
+
+ old_mark = skb->mark;
+
+ skb->mark = 0xf6fe;
+
+ /* narrowed sign extension for ctx member */
+#if __clang_major__ >= 18
+ /* force narrow one-byte signed load. Otherwise, compiler may
+ * generate a 32-bit unsigned load followed by an s8 movsx.
+ */
+ asm volatile ("r1 = *(s8 *)(%[ctx] + %[off_mark])\n\t"
+ "%[tmp_mark] = r1"
+ : [tmp_mark]"=r"(tmp_mark)
+ : [ctx]"r"(skb),
+ [off_mark]"i"(offsetof(struct __sk_buff, mark)
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ + sizeof(skb->mark) - 1
+#endif
+ )
+ : "r1");
+#else
+ tmp_mark = (char)skb->mark;
+#endif
+ set_mark = tmp_mark;
+
+ skb->mark = old_mark;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_legacy_printk.c b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
new file mode 100644
index 000000000000..42718cd8e6a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_legacy_printk.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#define BPF_NO_GLOBAL_DATA
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} res_map SEC(".maps");
+
+volatile int my_pid_var = 0;
+volatile int res_var = 0;
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_legacy(void *ctx)
+{
+ int zero = 0, *my_pid, cur_pid, *my_res;
+
+ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+ if (!my_pid)
+ return 1;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != *my_pid)
+ return 1;
+
+ my_res = bpf_map_lookup_elem(&res_map, &zero);
+ if (!my_res)
+ return 1;
+
+ if (*my_res == 0)
+ /* use bpf_printk() in combination with BPF_NO_GLOBAL_DATA to
+ * force .rodata.str1.1 section that previously caused
+ * problems on old kernels due to libbpf always tried to
+ * create a global data map for it
+ */
+ bpf_printk("Legacy-case bpf_printk test, pid %d\n", cur_pid);
+ *my_res = 1;
+
+ return *my_res;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handle_modern(void *ctx)
+{
+ int cur_pid;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != my_pid_var)
+ return 1;
+
+ if (res_var == 0)
+ /* we need bpf_printk() to validate libbpf logic around unused
+ * global maps and legacy kernels; see comment in handle_legacy()
+ */
+ bpf_printk("Modern-case bpf_printk test, pid %d\n", cur_pid);
+ res_var = 1;
+
+ return res_var;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
new file mode 100644
index 000000000000..f5ac5f3e8919
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_libbpf_get_fd_by_id_opts.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+/* From include/linux/mm.h. */
+#define FMODE_WRITE 0x2
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm/bpf_map")
+int BPF_PROG(check_access, struct bpf_map *map, fmode_t fmode)
+{
+ if (map != (struct bpf_map *)&data_input)
+ return 0;
+
+ if (fmode & FMODE_WRITE)
+ return -EACCES;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_log_buf.c b/tools/testing/selftests/bpf/progs/test_log_buf.c
new file mode 100644
index 000000000000..199f459bd5ae
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_buf.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int a[4];
+const volatile int off = 4000;
+
+SEC("raw_tp/sys_enter")
+int good_prog(const void *ctx)
+{
+ a[0] = (int)(long)ctx;
+ return a[1];
+}
+
+SEC("raw_tp/sys_enter")
+int bad_prog(const void *ctx)
+{
+ /* out of bounds access */
+ return a[off];
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_log_fixup.c b/tools/testing/selftests/bpf/progs/test_log_fixup.c
new file mode 100644
index 000000000000..1bd48feaaa42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_log_fixup.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+struct task_struct___bad {
+ int pid;
+ int fake_field;
+ void *fake_field_subprog;
+} __attribute__((preserve_access_index));
+
+SEC("?raw_tp/sys_enter")
+int bad_relo(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bpf_core_field_size(t->fake_field);
+}
+
+static __noinline int bad_subprog(void)
+{
+ static struct task_struct___bad *t;
+
+ /* ugliness below is a field offset relocation */
+ return (void *)&t->fake_field_subprog - (void *)t;
+}
+
+SEC("?raw_tp/sys_enter")
+int bad_relo_subprog(const void *ctx)
+{
+ static struct task_struct___bad *t;
+
+ return bad_subprog() + bpf_core_field_size(t->pid);
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} existing_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} missing_map SEC(".maps");
+
+SEC("?raw_tp/sys_enter")
+int use_missing_map(const void *ctx)
+{
+ int zero = 0, *value;
+
+ value = bpf_map_lookup_elem(&existing_map, &zero);
+
+ value = bpf_map_lookup_elem(&missing_map, &zero);
+
+ return value != NULL;
+}
+
+extern int bpf_nonexistent_kfunc(void) __ksym __weak;
+
+SEC("?raw_tp/sys_enter")
+int use_missing_kfunc(const void *ctx)
+{
+ bpf_nonexistent_kfunc();
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
new file mode 100644
index 000000000000..3a193f42c7e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_and_delete.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u32 set_pid = 0;
+__u64 set_key = 0;
+__u64 set_value = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hash_map SEC(".maps");
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bpf_lookup_and_delete_test(const void *ctx)
+{
+ if (set_pid == bpf_get_current_pid_tgid() >> 32)
+ bpf_map_update_elem(&hash_map, &set_key, &set_value, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lookup_key.c b/tools/testing/selftests/bpf/progs/test_lookup_key.c
new file mode 100644
index 000000000000..c73776990ae3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lookup_key.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 monitored_pid;
+__u32 key_serial;
+__u32 key_id;
+__u64 flags;
+
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct bpf_key *bkey;
+ __u32 pid;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ if (key_serial)
+ bkey = bpf_lookup_user_key(key_serial, flags);
+ else
+ bkey = bpf_lookup_system_key(key_id);
+
+ if (!bkey)
+ return -ENOENT;
+
+ bpf_key_put(bkey);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_redirect.c b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
new file mode 100644
index 000000000000..8c895122f293
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lwt_redirect.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/ip.h>
+#include "bpf_tracing_net.h"
+
+/* We don't care about whether the packet can be received by network stack.
+ * Just care if the packet is sent to the correct device at correct direction
+ * and not panic the kernel.
+ */
+static int prepend_dummy_mac(struct __sk_buff *skb)
+{
+ char mac[] = {0x1, 0x2, 0x3, 0x4, 0x5, 0x6, 0xf,
+ 0xe, 0xd, 0xc, 0xb, 0xa, 0x08, 0x00};
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0))
+ return -1;
+
+ if (bpf_skb_store_bytes(skb, 0, mac, sizeof(mac), 0))
+ return -1;
+
+ return 0;
+}
+
+/* Use the last byte of IP address to redirect the packet */
+static int get_redirect_target(struct __sk_buff *skb)
+{
+ struct iphdr *iph = NULL;
+ void *start = (void *)(long)skb->data;
+ void *end = (void *)(long)skb->data_end;
+
+ if (start + sizeof(*iph) > end)
+ return -1;
+
+ iph = (struct iphdr *)start;
+ return bpf_ntohl(iph->daddr) & 0xff;
+}
+
+SEC("redir_ingress")
+int test_lwt_redirect_in(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ if (prepend_dummy_mac(skb))
+ return BPF_DROP;
+
+ return bpf_redirect(target, BPF_F_INGRESS);
+}
+
+SEC("redir_egress")
+int test_lwt_redirect_out(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ if (prepend_dummy_mac(skb))
+ return BPF_DROP;
+
+ return bpf_redirect(target, 0);
+}
+
+SEC("redir_egress_nomac")
+int test_lwt_redirect_out_nomac(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ return bpf_redirect(target, 0);
+}
+
+SEC("redir_ingress_nomac")
+int test_lwt_redirect_in_nomac(struct __sk_buff *skb)
+{
+ int target = get_redirect_target(skb);
+
+ if (target < 0)
+ return BPF_OK;
+
+ return bpf_redirect(target, BPF_F_INGRESS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_reroute.c b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c
new file mode 100644
index 000000000000..1dc64351929c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_lwt_reroute.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+/* This function extracts the last byte of the daddr, and uses it
+ * as output dev index.
+ */
+SEC("lwt_xmit")
+int test_lwt_reroute(struct __sk_buff *skb)
+{
+ struct iphdr *iph = NULL;
+ void *start = (void *)(long)skb->data;
+ void *end = (void *)(long)skb->data_end;
+
+ /* set mark at most once */
+ if (skb->mark != 0)
+ return BPF_OK;
+
+ if (start + sizeof(*iph) > end)
+ return BPF_DROP;
+
+ iph = (struct iphdr *)start;
+ skb->mark = bpf_ntohl(iph->daddr) & 0xff;
+
+ /* do not reroute x.x.x.0 packets */
+ if (skb->mark == 0)
+ return BPF_OK;
+
+ return BPF_LWT_REROUTE;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
index 48ff2b2ad5e7..fed66f36adb6 100644
--- a/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
+++ b/tools/testing/selftests/bpf/progs/test_lwt_seg6local.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -131,7 +133,7 @@ int is_valid_tlv_boundary(struct __sk_buff *skb, struct ip6_srh_t *srh,
*pad_off = 0;
// we can only go as far as ~10 TLVs due to the BPF max stack size
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 10; i++) {
struct sr6_tlv_t tlv;
@@ -302,7 +304,7 @@ int __encap_srh(struct __sk_buff *skb)
seg = (struct ip6_addr_t *)((char *)srh + sizeof(*srh));
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (unsigned long long lo = 0; lo < 4; lo++) {
seg->lo = bpf_cpu_to_be64(4 - lo);
seg->hi = bpf_cpu_to_be64(hi);
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c
index 1cfeb940cf9f..b295f9b721bf 100644
--- a/tools/testing/selftests/bpf/progs/test_map_in_map.c
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c
@@ -9,21 +9,45 @@ struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 1);
__uint(map_flags, 0);
- __uint(key_size, sizeof(__u32));
- /* must be sizeof(__u32) for map in map */
- __uint(value_size, sizeof(__u32));
+ __type(key, __u32);
+ __type(value, __u32);
} mim_array SEC(".maps");
struct {
__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
__uint(max_entries, 1);
__uint(map_flags, 0);
- __uint(key_size, sizeof(int));
- /* must be sizeof(__u32) for map in map */
- __uint(value_size, sizeof(__u32));
+ __type(key, int);
+ __type(value, __u32);
} mim_hash SEC(".maps");
-SEC("xdp_mimtest")
+/* The following three maps are used to test
+ * perf_event_array map can be an inner
+ * map of hash/array_of_maps.
+ */
+struct perf_event_array {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} inner_map0 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_array_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __array(values, struct perf_event_array);
+} mim_hash_pe SEC(".maps") = {
+ .values = {&inner_map0}};
+
+SEC("xdp")
int xdp_mimtest0(struct xdp_md *ctx)
{
int value = 123;
@@ -49,5 +73,4 @@ int xdp_mimtest0(struct xdp_md *ctx)
return XDP_PASS;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
new file mode 100644
index 000000000000..9c7d75cf0bd6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_in_map_invalid.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Isovalent, Inc. */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, int);
+ __uint(max_entries, 4);
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 0); /* This will make map creation to fail */
+ __type(key, __u32);
+ __array(values, struct inner);
+} mim SEC(".maps");
+
+SEC("xdp")
+int xdp_noop0(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_init.c b/tools/testing/selftests/bpf/progs/test_map_init.c
new file mode 100644
index 000000000000..c89d28ead673
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_init.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Tessares SA <http://www.tessares.net> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 inKey = 0;
+__u64 inValue = 0;
+__u32 inPid = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 2);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap1 SEC(".maps");
+
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int sysenter_getpgid(const void *ctx)
+{
+ /* Just do it for once, when called from our own test prog. This
+ * ensures the map value is only updated for a single CPU.
+ */
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid == inPid)
+ bpf_map_update_elem(&hashmap1, &inKey, &inValue, BPF_NOEXIST);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_lock.c b/tools/testing/selftests/bpf/progs/test_map_lock.c
index b5c07ae7b68f..1c02511b73cd 100644
--- a/tools/testing/selftests/bpf/progs/test_map_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_map_lock.c
@@ -30,10 +30,10 @@ struct {
__type(value, struct array_elem);
} array_map SEC(".maps");
-SEC("map_lock_demo")
+SEC("cgroup/skb")
int bpf_map_lock_test(struct __sk_buff *skb)
{
- struct hmap_elem zero = {}, *val;
+ struct hmap_elem *val;
int rnd = bpf_get_prandom_u32();
int key = 0, err = 1, i;
struct array_elem *q;
diff --git a/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
new file mode 100644
index 000000000000..ca827b1092da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_lookup_percpu_elem.c
@@ -0,0 +1,76 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Bytedance */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+__u64 percpu_array_elem_sum = 0;
+__u64 percpu_hash_elem_sum = 0;
+__u64 percpu_lru_hash_elem_sum = 0;
+const volatile int nr_cpus;
+const volatile int my_pid;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} percpu_array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u64);
+ __type(value, __u64);
+} percpu_hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u64);
+ __type(value, __u64);
+} percpu_lru_hash_map SEC(".maps");
+
+struct read_percpu_elem_ctx {
+ void *map;
+ __u64 sum;
+};
+
+static int read_percpu_elem_callback(__u32 index, struct read_percpu_elem_ctx *ctx)
+{
+ __u64 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_percpu_elem(ctx->map, &key, index);
+ if (value)
+ ctx->sum += *value;
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int sysenter_getuid(const void *ctx)
+{
+ struct read_percpu_elem_ctx map_ctx;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ map_ctx.map = &percpu_array_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_array_elem_sum = map_ctx.sum;
+
+ map_ctx.map = &percpu_hash_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_hash_elem_sum = map_ctx.sum;
+
+ map_ctx.map = &percpu_lru_hash_map;
+ map_ctx.sum = 0;
+ bpf_loop(nr_cpus, read_percpu_elem_callback, &map_ctx, 0);
+ percpu_lru_hash_elem_sum = map_ctx.sum;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_map_ops.c b/tools/testing/selftests/bpf/progs/test_map_ops.c
new file mode 100644
index 000000000000..b53b46a090c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_map_ops.c
@@ -0,0 +1,138 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} hash_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK);
+ __uint(max_entries, 1);
+ __type(value, int);
+} stack_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} array_map SEC(".maps");
+
+const volatile pid_t pid;
+long err = 0;
+
+static u64 callback(u64 map, u64 key, u64 val, u64 ctx, u64 flags)
+{
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpid")
+int map_update(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_update_elem(&hash_map, &key, &val, BPF_NOEXIST);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getppid")
+int map_delete(void *ctx)
+{
+ const int key = 0;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_delete_elem(&hash_map, &key);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getuid")
+int map_push(void *ctx)
+{
+ const int val = 1;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_push_elem(&stack_map, &val, 0);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_geteuid")
+int map_pop(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_pop_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getgid")
+int map_peek(void *ctx)
+{
+ int val;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ err = bpf_map_peek_elem(&stack_map, &val);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_gettid")
+int map_for_each_pass(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = 0;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int map_for_each_fail(void *ctx)
+{
+ const int key = 0;
+ const int val = 1;
+ const u64 flags = BPF_NOEXIST;
+ int callback_ctx;
+
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ bpf_map_update_elem(&array_map, &key, &val, flags);
+
+ /* calling for_each with non-zero flags will return error */
+ err = bpf_for_each_map_elem(&array_map, callback, &callback_ctx, flags);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
new file mode 100644
index 000000000000..27df571abf5b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_migrate_reuseport.c
@@ -0,0 +1,135 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check if we can migrate child sockets.
+ *
+ * 1. If reuse_md->migrating_sk is NULL (SYN packet),
+ * return SK_PASS without selecting a listener.
+ * 2. If reuse_md->migrating_sk is not NULL (socket migration),
+ * select a listener (reuseport_map[migrate_map[cookie]])
+ *
+ * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp>
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/in.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 256);
+ __type(key, int);
+ __type(value, __u64);
+} reuseport_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 256);
+ __type(key, __u64);
+ __type(value, int);
+} migrate_map SEC(".maps");
+
+int migrated_at_close = 0;
+int migrated_at_close_fastopen = 0;
+int migrated_at_send_synack = 0;
+int migrated_at_recv_ack = 0;
+__be16 server_port;
+
+SEC("xdp")
+int drop_ack(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eth = data;
+ struct tcphdr *tcp = NULL;
+
+ if (eth + 1 > data_end)
+ goto pass;
+
+ switch (bpf_ntohs(eth->h_proto)) {
+ case ETH_P_IP: {
+ struct iphdr *ip = (struct iphdr *)(eth + 1);
+
+ if (ip + 1 > data_end)
+ goto pass;
+
+ if (ip->protocol != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)((void *)ip + ip->ihl * 4);
+ break;
+ }
+ case ETH_P_IPV6: {
+ struct ipv6hdr *ipv6 = (struct ipv6hdr *)(eth + 1);
+
+ if (ipv6 + 1 > data_end)
+ goto pass;
+
+ if (ipv6->nexthdr != IPPROTO_TCP)
+ goto pass;
+
+ tcp = (struct tcphdr *)(ipv6 + 1);
+ break;
+ }
+ default:
+ goto pass;
+ }
+
+ if (tcp + 1 > data_end)
+ goto pass;
+
+ if (tcp->dest != server_port)
+ goto pass;
+
+ if (!tcp->syn && tcp->ack)
+ return XDP_DROP;
+
+pass:
+ return XDP_PASS;
+}
+
+SEC("sk_reuseport/migrate")
+int migrate_reuseport(struct sk_reuseport_md *reuse_md)
+{
+ int *key, flags = 0, state, err;
+ __u64 cookie;
+
+ if (!reuse_md->migrating_sk)
+ return SK_PASS;
+
+ state = reuse_md->migrating_sk->state;
+ cookie = bpf_get_socket_cookie(reuse_md->sk);
+
+ key = bpf_map_lookup_elem(&migrate_map, &cookie);
+ if (!key)
+ return SK_DROP;
+
+ err = bpf_sk_select_reuseport(reuse_md, &reuseport_map, key, flags);
+ if (err)
+ return SK_PASS;
+
+ switch (state) {
+ case BPF_TCP_ESTABLISHED:
+ __sync_fetch_and_add(&migrated_at_close, 1);
+ break;
+ case BPF_TCP_SYN_RECV:
+ __sync_fetch_and_add(&migrated_at_close_fastopen, 1);
+ break;
+ case BPF_TCP_NEW_SYN_RECV:
+ if (!reuse_md->len)
+ __sync_fetch_and_add(&migrated_at_send_synack, 1);
+ else
+ __sync_fetch_and_add(&migrated_at_recv_ack, 1);
+ break;
+ }
+
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
new file mode 100644
index 000000000000..d487153a839d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_misc_tcp_hdr_options.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+__u16 last_addr16_n = __bpf_htons(1);
+__u16 active_lport_n = 0;
+__u16 active_lport_h = 0;
+__u16 passive_lport_n = 0;
+__u16 passive_lport_h = 0;
+
+/* options received at passive side */
+unsigned int nr_pure_ack = 0;
+unsigned int nr_data = 0;
+unsigned int nr_syn = 0;
+unsigned int nr_fin = 0;
+unsigned int nr_hwtstamp = 0;
+
+/* Check the header received from the active side */
+static int __check_active_hdr_in(struct bpf_sock_ops *skops, bool check_syn)
+{
+ union {
+ struct tcphdr th;
+ struct ipv6hdr ip6;
+ struct tcp_exprm_opt exprm_opt;
+ struct tcp_opt reg_opt;
+ __u8 data[100]; /* IPv6 (40) + Max TCP hdr (60) */
+ } hdr = {};
+ __u64 load_flags = check_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+ struct tcphdr *pth;
+ int ret;
+
+ hdr.reg_opt.kind = 0xB9;
+
+ /* The option is 4 bytes long instead of 2 bytes */
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, 2, load_flags);
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ /* Test searching magic with regular kind */
+ hdr.reg_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ hdr.reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.reg_opt, sizeof(hdr.reg_opt),
+ load_flags);
+ if (ret != 4 || hdr.reg_opt.len != 4 || hdr.reg_opt.kind != 0xB9 ||
+ hdr.reg_opt.data[0] != 0xfa || hdr.reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with invalid kind length */
+ hdr.exprm_opt.kind = TCPOPT_EXP;
+ hdr.exprm_opt.len = 5;
+ hdr.exprm_opt.magic = 0;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -EINVAL)
+ RET_CG_ERR(ret);
+
+ /* Test searching experimental option with 0 magic value */
+ hdr.exprm_opt.len = 4;
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != -ENOMSG)
+ RET_CG_ERR(ret);
+
+ hdr.exprm_opt.magic = __bpf_htons(0xeB9F);
+ ret = bpf_load_hdr_opt(skops, &hdr.exprm_opt, sizeof(hdr.exprm_opt),
+ load_flags);
+ if (ret != 4 || hdr.exprm_opt.len != 4 ||
+ hdr.exprm_opt.kind != TCPOPT_EXP ||
+ hdr.exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ if (!check_syn)
+ return CG_OK;
+
+ /* Test loading from skops->syn_skb if sk_state == TCP_NEW_SYN_RECV
+ *
+ * Test loading from tp->saved_syn for other sk_state.
+ */
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr.ip6,
+ sizeof(hdr.ip6));
+ if (ret != -ENOSPC)
+ RET_CG_ERR(ret);
+
+ if (hdr.ip6.saddr.s6_addr16[7] != last_addr16_n ||
+ hdr.ip6.daddr.s6_addr16[7] != last_addr16_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN_IP, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ pth = (struct tcphdr *)(&hdr.ip6 + 1);
+ if (pth->dest != passive_lport_n || pth->source != active_lport_n)
+ RET_CG_ERR(0);
+
+ ret = bpf_getsockopt(skops, SOL_TCP, TCP_BPF_SYN, &hdr, sizeof(hdr));
+ if (ret < 0)
+ RET_CG_ERR(ret);
+
+ if (hdr.th.dest != passive_lport_n || hdr.th.source != active_lport_n)
+ RET_CG_ERR(0);
+
+ return CG_OK;
+}
+
+static int check_active_syn_in(struct bpf_sock_ops *skops)
+{
+ return __check_active_hdr_in(skops, true);
+}
+
+static int check_active_hdr_in(struct bpf_sock_ops *skops)
+{
+ struct tcphdr *th;
+
+ if (__check_active_hdr_in(skops, false) == CG_ERR)
+ return CG_ERR;
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (tcp_hdrlen(th) < skops->skb_len)
+ nr_data++;
+
+ if (th->fin)
+ nr_fin++;
+
+ if (th->ack && !th->fin && tcp_hdrlen(th) == skops->skb_len)
+ nr_pure_ack++;
+
+ if (skops->skb_hwtstamp)
+ nr_hwtstamp++;
+
+ return CG_OK;
+}
+
+static int active_opt_len(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* Reserve more than enough to allow the -EEXIST test in
+ * the write_active_opt().
+ */
+ err = bpf_reserve_hdr_opt(skops, 12, 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int write_active_opt(struct bpf_sock_ops *skops)
+{
+ struct tcp_exprm_opt exprm_opt = {};
+ struct tcp_opt win_scale_opt = {};
+ struct tcp_opt reg_opt = {};
+ struct tcphdr *th;
+ int err, ret;
+
+ exprm_opt.kind = TCPOPT_EXP;
+ exprm_opt.len = 4;
+ exprm_opt.magic = __bpf_htons(0xeB9F);
+
+ reg_opt.kind = 0xB9;
+ reg_opt.len = 4;
+ reg_opt.data[0] = 0xfa;
+ reg_opt.data[1] = 0xce;
+
+ win_scale_opt.kind = TCPOPT_WINDOW;
+
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ /* Store the same exprm option */
+ err = bpf_store_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err)
+ RET_CG_ERR(err);
+ err = bpf_store_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+
+ /* Check the option has been written and can be searched */
+ ret = bpf_load_hdr_opt(skops, &exprm_opt, sizeof(exprm_opt), 0);
+ if (ret != 4 || exprm_opt.len != 4 || exprm_opt.kind != TCPOPT_EXP ||
+ exprm_opt.magic != __bpf_htons(0xeB9F))
+ RET_CG_ERR(ret);
+
+ reg_opt.len = 0;
+ ret = bpf_load_hdr_opt(skops, &reg_opt, sizeof(reg_opt), 0);
+ if (ret != 4 || reg_opt.len != 4 || reg_opt.kind != 0xB9 ||
+ reg_opt.data[0] != 0xfa || reg_opt.data[1] != 0xce)
+ RET_CG_ERR(ret);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ active_lport_h = skops->local_port;
+ active_lport_n = th->source;
+
+ /* Search the win scale option written by kernel
+ * in the SYN packet.
+ */
+ ret = bpf_load_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (ret != 3 || win_scale_opt.len != 3 ||
+ win_scale_opt.kind != TCPOPT_WINDOW)
+ RET_CG_ERR(ret);
+
+ /* Write the win scale option that kernel
+ * has already written.
+ */
+ err = bpf_store_hdr_opt(skops, &win_scale_opt,
+ sizeof(win_scale_opt), 0);
+ if (err != -EEXIST)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ /* Check the SYN from bpf_sock_ops_kern->syn_skb */
+ return check_active_syn_in(skops);
+
+ /* Passive side should have cleared the write hdr cb by now */
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return active_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ if (skops->local_port == passive_lport_h)
+ RET_CG_ERR(0);
+
+ return write_active_opt(skops);
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ /* Passive side is not writing any non-standard/unknown
+ * option, so the active side should never be called.
+ */
+ if (skops->local_port == active_lport_h)
+ RET_CG_ERR(0);
+
+ return check_active_hdr_in(skops);
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ int err;
+
+ /* No more write hdr cb */
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG);
+
+ /* Recheck the SYN but check the tp->saved_syn this time */
+ err = check_active_syn_in(skops);
+ if (err == CG_ERR)
+ return err;
+
+ nr_syn++;
+
+ /* The ack has header option written by the active side also */
+ return check_active_hdr_in(skops);
+}
+
+SEC("sockops")
+int misc_estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ passive_lport_h = skops->local_port;
+ passive_lport_n = __bpf_htons(passive_lport_h);
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_mmap.c b/tools/testing/selftests/bpf/progs/test_mmap.c
index 4eb42cff5fe9..5a5cc19a15bf 100644
--- a/tools/testing/selftests/bpf/progs/test_mmap.c
+++ b/tools/testing/selftests/bpf/progs/test_mmap.c
@@ -9,7 +9,6 @@ char _license[] SEC("license") = "GPL";
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4096);
__uint(map_flags, BPF_F_MMAPABLE | BPF_F_RDONLY_PROG);
__type(key, __u32);
__type(value, char);
@@ -17,7 +16,6 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 512 * 4); /* at least 4 pages of data */
__uint(map_flags, BPF_F_MMAPABLE);
__type(key, __u32);
__type(value, __u64);
diff --git a/tools/testing/selftests/bpf/progs/test_module_attach.c b/tools/testing/selftests/bpf/progs/test_module_attach.c
new file mode 100644
index 000000000000..8a1b50f3a002
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_module_attach.c
@@ -0,0 +1,119 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "../bpf_testmod/bpf_testmod.h"
+
+__u32 raw_tp_read_sz = 0;
+
+SEC("raw_tp/bpf_testmod_test_read")
+int BPF_PROG(handle_raw_tp,
+ struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+ raw_tp_read_sz = BPF_CORE_READ(read_ctx, len);
+ return 0;
+}
+
+__u32 raw_tp_bare_write_sz = 0;
+
+SEC("raw_tp/bpf_testmod_test_write_bare")
+int BPF_PROG(handle_raw_tp_bare,
+ struct task_struct *task, struct bpf_testmod_test_write_ctx *write_ctx)
+{
+ raw_tp_bare_write_sz = BPF_CORE_READ(write_ctx, len);
+ return 0;
+}
+
+int raw_tp_writable_bare_in_val = 0;
+int raw_tp_writable_bare_early_ret = 0;
+int raw_tp_writable_bare_out_val = 0;
+
+SEC("raw_tp.w/bpf_testmod_test_writable_bare")
+int BPF_PROG(handle_raw_tp_writable_bare,
+ struct bpf_testmod_test_writable_ctx *writable)
+{
+ raw_tp_writable_bare_in_val = writable->val;
+ writable->early_ret = raw_tp_writable_bare_early_ret;
+ writable->val = raw_tp_writable_bare_out_val;
+ return 0;
+}
+
+__u32 tp_btf_read_sz = 0;
+
+SEC("tp_btf/bpf_testmod_test_read")
+int BPF_PROG(handle_tp_btf,
+ struct task_struct *task, struct bpf_testmod_test_read_ctx *read_ctx)
+{
+ tp_btf_read_sz = read_ctx->len;
+ return 0;
+}
+
+__u32 fentry_read_sz = 0;
+
+SEC("fentry/bpf_testmod_test_read")
+int BPF_PROG(handle_fentry,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_read_sz = len;
+ return 0;
+}
+
+__u32 fentry_manual_read_sz = 0;
+
+SEC("fentry")
+int BPF_PROG(handle_fentry_manual,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fentry_manual_read_sz = len;
+ return 0;
+}
+
+__u32 fexit_read_sz = 0;
+int fexit_ret = 0;
+
+SEC("fexit/bpf_testmod_test_read")
+int BPF_PROG(handle_fexit,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len,
+ int ret)
+{
+ fexit_read_sz = len;
+ fexit_ret = ret;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret, int arg, struct file *ret)
+{
+ long buf = 0;
+
+ bpf_probe_read_kernel(&buf, 8, ret);
+ bpf_probe_read_kernel(&buf, 8, (char *)ret + 256);
+ *(volatile long long *)ret;
+ *(volatile int *)&ret->f_mode;
+ return 0;
+}
+
+__u32 fmod_ret_read_sz = 0;
+
+SEC("fmod_ret/bpf_testmod_test_read")
+int BPF_PROG(handle_fmod_ret,
+ struct file *file, struct kobject *kobj,
+ struct bin_attribute *bin_attr, char *buf, loff_t off, size_t len)
+{
+ fmod_ret_read_sz = len;
+ return 0; /* don't override the exit code */
+}
+
+SEC("kprobe.multi/bpf_testmod_test_read")
+int BPF_PROG(kprobe_multi)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
new file mode 100644
index 000000000000..03a475160abe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_netfilter_link_attach.c
@@ -0,0 +1,14 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+int nf_link_attach_test(struct bpf_nf_ctx *ctx)
+{
+ return NF_ACCEPT;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
index 1dca70a6de2f..0763d49f9c42 100644
--- a/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/progs/test_ns_current_pid_tgid.c
@@ -5,31 +5,21 @@
#include <stdint.h>
#include <bpf/bpf_helpers.h>
-static volatile struct {
- __u64 dev;
- __u64 ino;
- __u64 pid_tgid;
- __u64 user_pid_tgid;
-} res;
+__u64 user_pid = 0;
+__u64 user_tgid = 0;
+__u64 dev = 0;
+__u64 ino = 0;
-SEC("raw_tracepoint/sys_enter")
-int trace(void *ctx)
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int handler(const void *ctx)
{
- __u64 ns_pid_tgid, expected_pid;
struct bpf_pidns_info nsdata;
- __u32 key = 0;
- if (bpf_get_ns_current_pid_tgid(res.dev, res.ino, &nsdata,
- sizeof(struct bpf_pidns_info)))
+ if (bpf_get_ns_current_pid_tgid(dev, ino, &nsdata, sizeof(struct bpf_pidns_info)))
return 0;
- ns_pid_tgid = (__u64)nsdata.tgid << 32 | nsdata.pid;
- expected_pid = res.user_pid_tgid;
-
- if (expected_pid != ns_pid_tgid)
- return 0;
-
- res.pid_tgid = ns_pid_tgid;
+ user_pid = nsdata.pid;
+ user_tgid = nsdata.tgid;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index ded71b3ff6b4..2850ae788a91 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -4,6 +4,7 @@
#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
@@ -19,6 +20,7 @@ int test_obj_id(void *ctx)
__u64 *value;
value = bpf_map_lookup_elem(&test_map_id, &key);
+ __sink(value);
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index 42403d088abc..abb7344b531f 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -39,10 +39,4 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
return 0;
}
-SEC("fmod_ret/__set_task_comm")
-int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
-{
- return !tsk;
-}
-
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
new file mode 100644
index 000000000000..d9b2ba7ac340
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This parsing logic is taken from the open source library katran, a layer 4
+ * load balancer.
+ *
+ * This code logic using dynptrs can be found in test_parse_tcp_hdr_opt_dynptr.c
+ *
+ * https://github.com/facebookincubator/katran/blob/main/katran/lib/bpf/pckt_parsing.h
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+struct hdr_opt_state {
+ __u32 server_id;
+ __u8 byte_offset;
+ __u8 hdr_bytes_remaining;
+};
+
+static int parse_hdr_opt(const struct xdp_md *xdp, struct hdr_opt_state *state)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ __u8 *tcp_opt, kind, hdr_len;
+
+ tcp_opt = (__u8 *)(data + state->byte_offset);
+ if (tcp_opt + 1 > data_end)
+ return -1;
+
+ kind = tcp_opt[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ state->hdr_bytes_remaining--;
+ state->byte_offset++;
+ return 0;
+ }
+
+ if (state->hdr_bytes_remaining < 2 ||
+ tcp_opt + sizeof(__u8) + sizeof(__u8) > data_end)
+ return -1;
+
+ hdr_len = tcp_opt[1];
+ if (hdr_len > state->hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ if (tcp_opt + tcp_hdr_opt_len_tpr > data_end)
+ return -1;
+
+ state->server_id = *(__u32 *)&tcp_opt[2];
+ return 1;
+ }
+
+ state->hdr_bytes_remaining -= hdr_len;
+ state->byte_offset += hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ const void *data = (void *)(long)xdp->data;
+ const void *data_end = (void *)(long)xdp->data_end;
+ struct hdr_opt_state opt_state = {};
+ __u8 tcp_hdr_opt_len = 0;
+ struct tcphdr *tcp_hdr;
+ __u64 tcp_offset = 0;
+ int err;
+
+ tcp_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ tcp_hdr = (struct tcphdr *)(data + tcp_offset);
+ if (tcp_hdr + 1 > data_end)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ opt_state.hdr_bytes_remaining = tcp_hdr_opt_len;
+ opt_state.byte_offset = sizeof(struct tcphdr) + tcp_offset;
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(xdp, &opt_state);
+
+ if (err || !opt_state.hdr_bytes_remaining)
+ break;
+ }
+
+ if (!opt_state.server_id)
+ return XDP_DROP;
+
+ server_id = opt_state.server_id;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
new file mode 100644
index 000000000000..dc6e43bc6a62
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_parse_tcp_hdr_opt_dynptr.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* This logic is lifted from a real-world use case of packet parsing, used in
+ * the open source library katran, a layer 4 load balancer.
+ *
+ * This test demonstrates how to parse packet contents using dynptrs. The
+ * original code (parsing without dynptrs) can be found in test_parse_tcp_hdr_opt.c
+ */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/tcp.h>
+#include <stdbool.h>
+#include <linux/ipv6.h>
+#include <linux/if_ether.h>
+#include "test_tcp_hdr_options.h"
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+/* Kind number used for experiments */
+const __u32 tcp_hdr_opt_kind_tpr = 0xFD;
+/* Length of the tcp header option */
+const __u32 tcp_hdr_opt_len_tpr = 6;
+/* maximum number of header options to check to lookup server_id */
+const __u32 tcp_hdr_opt_max_opt_checks = 15;
+
+__u32 server_id;
+
+static int parse_hdr_opt(struct bpf_dynptr *ptr, __u32 *off, __u8 *hdr_bytes_remaining,
+ __u32 *server_id)
+{
+ __u8 kind, hdr_len;
+ __u8 buffer[sizeof(kind) + sizeof(hdr_len) + sizeof(*server_id)];
+ __u8 *data;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ data = bpf_dynptr_slice(ptr, *off, buffer, sizeof(buffer));
+ if (!data)
+ return -1;
+
+ kind = data[0];
+
+ if (kind == TCPOPT_EOL)
+ return -1;
+
+ if (kind == TCPOPT_NOP) {
+ *off += 1;
+ *hdr_bytes_remaining -= 1;
+ return 0;
+ }
+
+ if (*hdr_bytes_remaining < 2)
+ return -1;
+
+ hdr_len = data[1];
+ if (hdr_len > *hdr_bytes_remaining)
+ return -1;
+
+ if (kind == tcp_hdr_opt_kind_tpr) {
+ if (hdr_len != tcp_hdr_opt_len_tpr)
+ return -1;
+
+ __builtin_memcpy(server_id, (__u32 *)(data + 2), sizeof(*server_id));
+ return 1;
+ }
+
+ *off += hdr_len;
+ *hdr_bytes_remaining -= hdr_len;
+ return 0;
+}
+
+SEC("xdp")
+int xdp_ingress_v6(struct xdp_md *xdp)
+{
+ __u8 buffer[sizeof(struct tcphdr)] = {};
+ __u8 hdr_bytes_remaining;
+ struct tcphdr *tcp_hdr;
+ __u8 tcp_hdr_opt_len;
+ int err = 0;
+ __u32 off;
+
+ struct bpf_dynptr ptr;
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+
+ off = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ tcp_hdr = bpf_dynptr_slice(&ptr, off, buffer, sizeof(buffer));
+ if (!tcp_hdr)
+ return XDP_DROP;
+
+ tcp_hdr_opt_len = (tcp_hdr->doff * 4) - sizeof(struct tcphdr);
+ if (tcp_hdr_opt_len < tcp_hdr_opt_len_tpr)
+ return XDP_DROP;
+
+ hdr_bytes_remaining = tcp_hdr_opt_len;
+
+ off += sizeof(struct tcphdr);
+
+ /* max number of bytes of options in tcp header is 40 bytes */
+ for (int i = 0; i < tcp_hdr_opt_max_opt_checks; i++) {
+ err = parse_hdr_opt(&ptr, &off, &hdr_bytes_remaining, &server_id);
+
+ if (err || !hdr_bytes_remaining)
+ break;
+ }
+
+ if (!server_id)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
new file mode 100644
index 000000000000..1249a945699f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_pe_preserve_elems.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} array_1 SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __uint(map_flags, BPF_F_PRESERVE_ELEMS);
+} array_2 SEC(".maps");
+
+SEC("raw_tp/sched_switch")
+int BPF_PROG(read_array_1)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_1, 0, &val, sizeof(val));
+}
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(read_array_2)
+{
+ struct bpf_perf_event_value val;
+
+ return bpf_perf_event_read_value(&array_2, 0, &val, sizeof(val));
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_buffer.c b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
index ad59c4c9aba8..17d5b67744d5 100644
--- a/tools/testing/selftests/bpf/progs/test_perf_buffer.c
+++ b/tools/testing/selftests/bpf/progs/test_perf_buffer.c
@@ -7,19 +7,35 @@
#include <bpf/bpf_tracing.h>
struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, int);
+ __uint(max_entries, 1);
+} my_pid_map SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
-SEC("kprobe/sys_nanosleep")
-int BPF_KPROBE(handle_sys_nanosleep_entry)
+SEC("tp/raw_syscalls/sys_enter")
+int handle_sys_enter(void *ctx)
{
+ int zero = 0, *my_pid, cur_pid;
int cpu = bpf_get_smp_processor_id();
+ my_pid = bpf_map_lookup_elem(&my_pid_map, &zero);
+ if (!my_pid)
+ return 1;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+ if (cur_pid != *my_pid)
+ return 1;
+
bpf_perf_event_output(ctx, &perf_buf_map, BPF_F_CURRENT_CPU,
&cpu, sizeof(cpu));
- return 0;
+ return 1;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_perf_link.c b/tools/testing/selftests/bpf/progs/test_perf_link.c
new file mode 100644
index 000000000000..c1db9fd98d0c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_perf_link.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int run_cnt = 0;
+
+SEC("perf_event")
+int handler(struct pt_regs *ctx)
+{
+ __sync_fetch_and_add(&run_cnt, 1);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_pinning.c b/tools/testing/selftests/bpf/progs/test_pinning.c
index 4ef2630292b2..0facea6cbbae 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning.c
@@ -3,8 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
index 5412e0c732c7..2a56db1094b8 100644
--- a/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
+++ b/tools/testing/selftests/bpf/progs/test_pinning_invalid.c
@@ -3,8 +3,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, 1);
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_access.c b/tools/testing/selftests/bpf/progs/test_pkt_access.c
index e72eba4a93d2..bce7173152c6 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_access.c
@@ -13,9 +13,7 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-#define barrier() __asm__ __volatile__("": : :"memory")
-int _version SEC("version") = 1;
+#include "bpf_misc.h"
/* llvm will optimize both subprograms into exactly the same BPF assembly
*
@@ -54,6 +52,8 @@ int get_skb_len(struct __sk_buff *skb)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->len;
}
@@ -76,10 +76,30 @@ int get_skb_ifindex(int val, struct __sk_buff *skb, int var)
{
volatile char buf[MAX_STACK] = {};
+ __sink(buf[MAX_STACK - 1]);
+
return skb->ifindex * val * var;
}
-SEC("classifier/test_pkt_access")
+__attribute__ ((noinline))
+int test_pkt_write_access_subprog(struct __sk_buff *skb, __u32 off)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+ struct tcphdr *tcp = NULL;
+
+ if (off > sizeof(struct ethhdr) + sizeof(struct ipv6hdr))
+ return -1;
+
+ tcp = data + off;
+ if (tcp + 1 > data_end)
+ return -1;
+ /* make modification to the packet data */
+ tcp->check++;
+ return 0;
+}
+
+SEC("tc")
int test_pkt_access(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
@@ -117,6 +137,8 @@ int test_pkt_access(struct __sk_buff *skb)
if (test_pkt_access_subprog3(3, skb) != skb->len * 3 * skb->ifindex)
return TC_ACT_SHOT;
if (tcp) {
+ if (test_pkt_write_access_subprog(skb, (void *)tcp - data))
+ return TC_ACT_SHOT;
if (((void *)(tcp) + 20) > data_end || proto != 6)
return TC_ACT_SHOT;
barrier(); /* to force ordering of checks */
diff --git a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
index 610c74ea9f64..d1839366f3e1 100644
--- a/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
+++ b/tools/testing/selftests/bpf/progs/test_pkt_md_access.c
@@ -7,8 +7,6 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define TEST_FIELD(TYPE, FIELD, MASK) \
{ \
@@ -27,7 +25,7 @@ int _version SEC("version") = 1;
}
#endif
-SEC("classifier/test_pkt_md_access")
+SEC("tc")
int test_pkt_md_access(struct __sk_buff *skb)
{
TEST_FIELD(__u8, len, 0xFF);
diff --git a/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
new file mode 100644
index 000000000000..3ae398b75dcd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_probe_read_user_str.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#include <sys/types.h>
+
+pid_t pid = 0;
+long ret = 0;
+void *user_ptr = 0;
+char buf[256] = {};
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int on_write(void *ctx)
+{
+ if (pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ ret = bpf_probe_read_user_str(buf, sizeof(buf), user_ptr);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_probe_user.c b/tools/testing/selftests/bpf/progs/test_probe_user.c
index 89b3532ccc75..a8e501af9604 100644
--- a/tools/testing/selftests/bpf/progs/test_probe_user.c
+++ b/tools/testing/selftests/bpf/progs/test_probe_user.c
@@ -1,26 +1,47 @@
// SPDX-License-Identifier: GPL-2.0
-
-#include <linux/ptrace.h>
-#include <linux/bpf.h>
-
-#include <netinet/in.h>
-
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
static struct sockaddr_in old;
-SEC("kprobe/__sys_connect")
-int BPF_KPROBE(handle_sys_connect)
+static int handle_sys_connect_common(struct sockaddr_in *uservaddr)
{
- void *ptr = (void *)PT_REGS_PARM2(ctx);
struct sockaddr_in new;
- bpf_probe_read_user(&old, sizeof(old), ptr);
+ bpf_probe_read_user(&old, sizeof(old), uservaddr);
__builtin_memset(&new, 0xab, sizeof(new));
- bpf_probe_write_user(ptr, &new, sizeof(new));
+ bpf_probe_write_user(uservaddr, &new, sizeof(new));
+
+ return 0;
+}
+
+SEC("ksyscall/connect")
+int BPF_KSYSCALL(handle_sys_connect, int fd, struct sockaddr_in *uservaddr,
+ int addrlen)
+{
+ return handle_sys_connect_common(uservaddr);
+}
+
+#if defined(bpf_target_s390)
+#ifndef SYS_CONNECT
+#define SYS_CONNECT 3
+#endif
+
+SEC("ksyscall/socketcall")
+int BPF_KSYSCALL(handle_sys_socketcall, int call, unsigned long *args)
+{
+ if (call == SYS_CONNECT) {
+ struct sockaddr_in *uservaddr;
+
+ bpf_probe_read_user(&uservaddr, sizeof(uservaddr), &args[1]);
+ return handle_sys_connect_common(uservaddr);
+ }
return 0;
}
+#endif
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_prog_array_init.c b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
new file mode 100644
index 000000000000..2cd138356126
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_prog_array_init.c
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+const volatile pid_t my_pid = 0;
+int value = 0;
+
+SEC("raw_tp/sys_enter")
+int tailcall_1(void *ctx)
+{
+ value = 42;
+ return 0;
+}
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(__u32));
+ __array(values, int (void *));
+} prog_array_init SEC(".maps") = {
+ .values = {
+ [1] = (void *)&tailcall_1,
+ },
+};
+
+SEC("raw_tp/sys_enter")
+int entry(void *ctx)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ bpf_tail_call(ctx, &prog_array_init, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
new file mode 100644
index 000000000000..2fdc44e76624
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ptr_untrusted.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 Yafang Shao <laoar.shao@gmail.com> */
+
+#include "vmlinux.h"
+#include <bpf/bpf_tracing.h>
+
+char tp_name[128];
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ switch (cmd) {
+ case BPF_RAW_TRACEPOINT_OPEN:
+ bpf_copy_from_user(tp_name, sizeof(tp_name) - 1,
+ (void *)attr->raw_tracepoint.name);
+ break;
+ default:
+ break;
+ }
+ return 0;
+}
+
+SEC("raw_tracepoint")
+int BPF_PROG(raw_tp_run)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
index 4dd9806ad73b..648e8cab7a23 100644
--- a/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
+++ b/tools/testing/selftests/bpf/progs/test_queue_stack_map.h
@@ -8,8 +8,6 @@
#include <linux/pkt_cls.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
struct {
__uint(type, MAP_TYPE);
__uint(max_entries, 32);
@@ -26,7 +24,7 @@ struct {
__uint(value_size, sizeof(__u32));
} map_out SEC(".maps");
-SEC("test")
+SEC("tc")
int _test(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
new file mode 100644
index 000000000000..4c63cc87b9d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_raw_tp_test_run.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u32 count = 0;
+__u32 on_cpu = 0xffffffff;
+
+SEC("raw_tp/task_rename")
+int BPF_PROG(rename, struct task_struct *task, char *comm)
+{
+
+ count++;
+ if ((__u64) task == 0x1234ULL && (__u64) comm == 0x5678ULL) {
+ on_cpu = bpf_get_smp_processor_id();
+ return (long)task + (long)comm;
+ }
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
index ecbeea2df259..fc8e8a34a3db 100644
--- a/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
+++ b/tools/testing/selftests/bpf/progs/test_rdonly_maps.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-static volatile const struct {
+const struct {
unsigned a[4];
/*
* if the struct's size is multiple of 16, compiler will put it into
@@ -15,11 +15,11 @@ static volatile const struct {
char _y;
} rdonly_values = { .a = {2, 3, 4, 5} };
-static volatile struct {
+struct {
unsigned did_run;
unsigned iters;
unsigned sum;
-} res;
+} res = {};
SEC("raw_tracepoint/sys_enter:skip_loop")
int skip_loop(struct pt_regs *ctx)
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
index 8ba9959b036b..501cefa97633 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -15,7 +16,6 @@ struct sample {
struct {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
} ringbuf SEC(".maps");
/* inputs */
@@ -36,12 +36,11 @@ long prod_pos = 0;
/* inner state */
long seq = 0;
-SEC("tp/syscalls/sys_enter_getpgid")
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int test_ringbuf(void *ctx)
{
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
- int zero = 0;
if (cur_pid != pid)
return 0;
@@ -49,7 +48,7 @@ int test_ringbuf(void *ctx)
sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
if (!sample) {
__sync_fetch_and_add(&dropped, 1);
- return 1;
+ return 0;
}
sample->pid = pid;
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
new file mode 100644
index 000000000000..21bb7da90ea5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_map_key.c
@@ -0,0 +1,71 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, struct sample);
+ __type(value, int);
+} hash_map SEC(".maps");
+
+/* inputs */
+int pid = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_ringbuf_mem_map_key(void *ctx)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+ struct sample *sample, sample_copy;
+ int *lookup_val;
+
+ if (cur_pid != pid)
+ return 0;
+
+ sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+ if (!sample)
+ return 0;
+
+ sample->pid = pid;
+ bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+ sample->seq = ++seq;
+ sample->value = 42;
+
+ /* test using 'sample' (PTR_TO_MEM | MEM_ALLOC) as map key arg
+ */
+ lookup_val = (int *)bpf_map_lookup_elem(&hash_map, sample);
+ __sink(lookup_val);
+
+ /* workaround - memcpy is necessary so that verifier doesn't
+ * complain with:
+ * verifier internal error: more than one arg with ref_obj_id R3
+ * when trying to do bpf_map_update_elem(&hash_map, sample, &sample->seq, BPF_ANY);
+ *
+ * Since bpf_map_lookup_elem above uses 'sample' as key, test using
+ * sample field as value below
+ */
+ __builtin_memcpy(&sample_copy, sample, sizeof(struct sample));
+ bpf_map_update_elem(&hash_map, &sample_copy, &sample->seq, BPF_ANY);
+
+ bpf_ringbuf_submit(sample, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
index edf3b6953533..9626baa6779c 100644
--- a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -15,7 +15,8 @@ struct sample {
struct ringbuf_map {
__uint(type, BPF_MAP_TYPE_RINGBUF);
- __uint(max_entries, 1 << 12);
+ /* libbpf will adjust to valid page size */
+ __uint(max_entries, 1000);
} ringbuf1 SEC(".maps"),
ringbuf2 SEC(".maps");
@@ -31,6 +32,17 @@ struct {
},
};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __array(values, struct ringbuf_map);
+} ringbuf_hash SEC(".maps") = {
+ .values = {
+ [0] = &ringbuf1,
+ },
+};
+
/* inputs */
int pid = 0;
int target_ring = 0;
@@ -47,7 +59,6 @@ int test_ringbuf(void *ctx)
int cur_pid = bpf_get_current_pid_tgid() >> 32;
struct sample *sample;
void *rb;
- int zero = 0;
if (cur_pid != pid)
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_seg6_loop.c b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
index a7278f064368..5059050f74f6 100644
--- a/tools/testing/selftests/bpf/progs/test_seg6_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_seg6_loop.c
@@ -6,6 +6,8 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
+
/* Packet parsing state machine helpers. */
#define cursor_advance(_cursor, _len) \
({ void *_tmp = _cursor; _cursor += _len; _tmp; })
@@ -134,7 +136,7 @@ static __always_inline int is_valid_tlv_boundary(struct __sk_buff *skb,
// we can only go as far as ~10 TLVs due to the BPF max stack size
// workaround: define induction variable "i" as "long" instead
// of "int" to prevent alu32 sub-register spilling.
- #pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (long i = 0; i < 100; i++) {
struct sr6_tlv_t tlv;
diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
index 26e77dcc7e91..5eb25c6ad75b 100644
--- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c
@@ -15,8 +15,6 @@
#include <bpf/bpf_helpers.h>
#include "test_select_reuseport_common.h"
-int _version SEC("version") = 1;
-
#ifndef offsetof
#define offsetof(TYPE, MEMBER) ((size_t) &((TYPE *)0)->MEMBER)
#endif
@@ -24,8 +22,8 @@ int _version SEC("version") = 1;
struct {
__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
__uint(max_entries, 1);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(__u32));
+ __type(key, __u32);
+ __type(value, __u32);
} outer_map SEC(".maps");
struct {
@@ -66,7 +64,7 @@ SEC("sk_reuseport")
int _select_by_skb_data(struct sk_reuseport_md *reuse_md)
{
__u32 linum, index = 0, flags = 0, index_zero = 0;
- __u32 *result_cnt, *linum_value;
+ __u32 *result_cnt;
struct data_check data_check = {};
struct cmd *cmd, cmd_copy;
void *data, *data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
index b4233d3efac2..92354cd72044 100644
--- a/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_send_signal_kern.c
@@ -10,7 +10,7 @@ static __always_inline int bpf_send_signal_test(void *ctx)
{
int ret;
- if (status != 0 || sig == 0 || pid == 0)
+ if (status != 0 || pid == 0)
return 0;
if ((bpf_get_current_pid_tgid() >> 32) == pid) {
diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
new file mode 100644
index 000000000000..2f0eb1334d65
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+char _license[] SEC("license") = "GPL";
+
+#ifndef SHA256_DIGEST_SIZE
+#define SHA256_DIGEST_SIZE 32
+#endif
+
+#define MAX_SIG_SIZE 1024
+
+/* By default, "fsverity sign" signs a file with fsverity_formatted_digest
+ * of the file. fsverity_formatted_digest on the kernel side is only used
+ * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not
+ * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have
+ * fsverity_formatted_digest. In this test, we intentionally avoid using
+ * fsverity_formatted_digest.
+ *
+ * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by
+ * fsverity_digest. We use a char array of size fsverity_formatted_digest
+ * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space,
+ * and the rest of it is filled by bpf_get_fsverity_digest.
+ *
+ * Note that, generating signatures based on fsverity_formatted_digest is
+ * the design choice of this selftest (and "fsverity sign"). With BPF
+ * LSM, we have the flexibility to generate signature based on other data
+ * sets, for example, fsverity_digest or only the digest[] part of it.
+ */
+#define MAGIC_SIZE 8
+#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */
+char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE];
+
+__u32 monitored_pid;
+char sig[MAX_SIG_SIZE];
+__u32 sig_size;
+__u32 user_keyring_serial;
+
+SEC("lsm.s/file_open")
+int BPF_PROG(test_file_open, struct file *f)
+{
+ struct bpf_dynptr digest_ptr, sig_ptr;
+ struct bpf_key *trusted_keyring;
+ __u32 pid;
+ int ret;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ /* digest_ptr points to fsverity_digest */
+ bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr);
+
+ ret = bpf_get_fsverity_digest(f, &digest_ptr);
+ /* No verity, allow access */
+ if (ret < 0)
+ return 0;
+
+ /* Move digest_ptr to fsverity_formatted_digest */
+ bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr);
+
+ /* Read signature from xattr */
+ bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr);
+ ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr);
+ /* No signature, reject access */
+ if (ret < 0)
+ return -EPERM;
+
+ trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+ if (!trusted_keyring)
+ return -ENOENT;
+
+ /* Verify signature */
+ ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h
new file mode 100644
index 000000000000..5d3a7ec36780
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_siphash.h
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_SIPHASH_H
+#define _TEST_SIPHASH_H
+
+/* include/linux/bitops.h */
+static inline u64 rol64(u64 word, unsigned int shift)
+{
+ return (word << (shift & 63)) | (word >> ((-shift) & 63));
+}
+
+/* include/linux/siphash.h */
+#define SIPHASH_PERMUTATION(a, b, c, d) ( \
+ (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \
+ (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \
+ (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \
+ (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32))
+
+#define SIPHASH_CONST_0 0x736f6d6570736575ULL
+#define SIPHASH_CONST_1 0x646f72616e646f6dULL
+#define SIPHASH_CONST_2 0x6c7967656e657261ULL
+#define SIPHASH_CONST_3 0x7465646279746573ULL
+
+/* lib/siphash.c */
+#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3)
+
+#define PREAMBLE(len) \
+ u64 v0 = SIPHASH_CONST_0; \
+ u64 v1 = SIPHASH_CONST_1; \
+ u64 v2 = SIPHASH_CONST_2; \
+ u64 v3 = SIPHASH_CONST_3; \
+ u64 b = ((u64)(len)) << 56; \
+ v3 ^= key->key[1]; \
+ v2 ^= key->key[0]; \
+ v1 ^= key->key[1]; \
+ v0 ^= key->key[0];
+
+#define POSTAMBLE \
+ v3 ^= b; \
+ SIPROUND; \
+ SIPROUND; \
+ v0 ^= b; \
+ v2 ^= 0xff; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ SIPROUND; \
+ return (v0 ^ v1) ^ (v2 ^ v3);
+
+static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key)
+{
+ PREAMBLE(16)
+ v3 ^= first;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= first;
+ v3 ^= second;
+ SIPROUND;
+ SIPROUND;
+ v0 ^= second;
+ POSTAMBLE
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 1ecd987005d2..3079244c7f96 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -15,7 +15,18 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
+#if defined(IPROUTE2_HAVE_LIBBPF)
+/* Use a new-style map definition. */
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __type(key, int);
+ __type(value, __u64);
+ __uint(pinning, LIBBPF_PIN_BY_NAME);
+ __uint(max_entries, 1);
+} server_map SEC(".maps");
+#else
/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
#define PIN_GLOBAL_NS 2
@@ -35,8 +46,8 @@ struct {
.max_elem = 1,
.pinning = PIN_GLOBAL_NS,
};
+#endif
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -47,7 +58,6 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
void *data = (void *)(long)skb->data;
struct bpf_sock_tuple *result;
struct ethhdr *eth;
- __u64 tuple_len;
__u8 proto = 0;
__u64 ihl_len;
@@ -84,13 +94,13 @@ get_tuple(struct __sk_buff *skb, bool *ipv4, bool *tcp)
return NULL;
*tcp = (proto == IPPROTO_TCP);
+ __sink(ihl_len);
return result;
}
static inline int
handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -122,7 +132,6 @@ assign:
static inline int
handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
{
- struct bpf_sock_tuple ln = {0};
struct bpf_sock *sk;
const int zero = 0;
size_t tuple_len;
@@ -159,13 +168,12 @@ assign:
return ret;
}
-SEC("classifier/sk_assign_test")
+SEC("tc")
int bpf_sk_assign_test(struct __sk_buff *skb)
{
- struct bpf_sock_tuple *tuple, ln = {0};
+ struct bpf_sock_tuple *tuple;
bool ipv4 = false;
bool tcp = false;
- int tuple_len;
int ret = 0;
tuple = get_tuple(skb, &ipv4, &tcp);
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
new file mode 100644
index 000000000000..dcf46adfda04
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign_libbpf.c
@@ -0,0 +1,3 @@
+// SPDX-License-Identifier: GPL-2.0
+#define IPROUTE2_HAVE_LIBBPF
+#include "test_sk_assign.c"
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup.c b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
new file mode 100644
index 000000000000..71f844b9b902
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup.c
@@ -0,0 +1,660 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#define IP4(a, b, c, d) \
+ bpf_htonl((((__u32)(a) & 0xffU) << 24) | \
+ (((__u32)(b) & 0xffU) << 16) | \
+ (((__u32)(c) & 0xffU) << 8) | \
+ (((__u32)(d) & 0xffU) << 0))
+#define IP6(aaaa, bbbb, cccc, dddd) \
+ { bpf_htonl(aaaa), bpf_htonl(bbbb), bpf_htonl(cccc), bpf_htonl(dddd) }
+
+/* Macros for least-significant byte and word accesses. */
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define LSE_INDEX(index, size) (index)
+#else
+#define LSE_INDEX(index, size) ((size) - (index) - 1)
+#endif
+#define LSB(value, index) \
+ (((__u8 *)&(value))[LSE_INDEX((index), sizeof(value))])
+#define LSW(value, index) \
+ (((__u16 *)&(value))[LSE_INDEX((index), sizeof(value) / 2)])
+
+#define MAX_SOCKS 32
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, MAX_SOCKS);
+ __type(key, __u32);
+ __type(value, __u64);
+} redir_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, int);
+} run_map SEC(".maps");
+
+enum {
+ PROG1 = 0,
+ PROG2,
+};
+
+enum {
+ SERVER_A = 0,
+ SERVER_B,
+};
+
+/* Addressable key/value constants for convenience */
+static const int KEY_PROG1 = PROG1;
+static const int KEY_PROG2 = PROG2;
+static const int PROG_DONE = 1;
+
+static const __u32 KEY_SERVER_A = SERVER_A;
+static const __u32 KEY_SERVER_B = SERVER_B;
+
+static const __u16 SRC_PORT = bpf_htons(8008);
+static const __u32 SRC_IP4 = IP4(127, 0, 0, 2);
+static const __u32 SRC_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000002);
+
+static const __u16 DST_PORT = 7007; /* Host byte order */
+static const __u32 DST_IP4 = IP4(127, 0, 0, 1);
+static const __u32 DST_IP6[] = IP6(0xfd000000, 0x0, 0x0, 0x00000001);
+
+SEC("sk_lookup")
+int lookup_pass(struct bpf_sk_lookup *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_lookup")
+int lookup_drop(struct bpf_sk_lookup *ctx)
+{
+ return SK_DROP;
+}
+
+SEC("sk_lookup")
+int check_ifindex(struct bpf_sk_lookup *ctx)
+{
+ if (ctx->ingress_ifindex == 1)
+ return SK_DROP;
+ return SK_PASS;
+}
+
+SEC("sk_reuseport")
+int reuseport_pass(struct sk_reuseport_md *ctx)
+{
+ return SK_PASS;
+}
+
+SEC("sk_reuseport")
+int reuseport_drop(struct sk_reuseport_md *ctx)
+{
+ return SK_DROP;
+}
+
+/* Redirect packets destined for port DST_PORT to socket at redir_map[0]. */
+SEC("sk_lookup")
+int redir_port(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP4 address to socket at redir_map[0]. */
+SEC("sk_lookup")
+int redir_ip4(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip4 != DST_IP4)
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Redirect packets destined for DST_IP6 address to socket at redir_map[0]. */
+SEC("sk_lookup")
+int redir_ip6(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ if (ctx->family != AF_INET6)
+ return SK_PASS;
+ if (ctx->local_port != DST_PORT)
+ return SK_PASS;
+ if (ctx->local_ip6[0] != DST_IP6[0] ||
+ ctx->local_ip6[1] != DST_IP6[1] ||
+ ctx->local_ip6[2] != DST_IP6[2] ||
+ ctx->local_ip6[3] != DST_IP6[3])
+ return SK_PASS;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup")
+int select_sock_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_PASS;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_lookup")
+int select_sock_a_no_reuseport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_NO_REUSEPORT);
+ bpf_sk_release(sk);
+ return err ? SK_DROP : SK_PASS;
+}
+
+SEC("sk_reuseport")
+int select_sock_b(struct sk_reuseport_md *ctx)
+{
+ __u32 key = KEY_SERVER_B;
+ int err;
+
+ err = bpf_sk_select_reuseport(ctx, &redir_map, &key, 0);
+ return err ? SK_DROP : SK_PASS;
+}
+
+/* Check that bpf_sk_assign() returns -EEXIST if socket already selected. */
+SEC("sk_lookup")
+int sk_assign_eexist(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -EEXIST) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -EEXIST);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(BPF_SK_LOOKUP_F_REPLACE) can override selection. */
+SEC("sk_lookup")
+int sk_assign_replace_flag(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err)
+ goto out;
+ bpf_sk_release(sk);
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that bpf_sk_assign(sk=NULL) is accepted. */
+SEC("sk_lookup")
+int sk_assign_null(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err) {
+ bpf_printk("sk_assign returned %d, expected 0\n", err);
+ goto out;
+ }
+
+ if (ctx->sk != sk)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, 0);
+ if (err != -EEXIST)
+ goto out;
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ err = bpf_sk_assign(ctx, sk, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+/* Check that selected sk is accessible through context. */
+SEC("sk_lookup")
+int access_ctx_sk(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk1 = NULL, *sk2 = NULL;
+ int err, ret;
+
+ ret = SK_DROP;
+
+ /* Try accessing unassigned (NULL) ctx->sk field */
+ if (ctx->sk && ctx->sk->family != AF_INET)
+ goto out;
+
+ /* Assign a value to ctx->sk */
+ sk1 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk1)
+ goto out;
+ err = bpf_sk_assign(ctx, sk1, 0);
+ if (err)
+ goto out;
+ if (ctx->sk != sk1)
+ goto out;
+
+ /* Access ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ /* Reset selection */
+ err = bpf_sk_assign(ctx, NULL, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk)
+ goto out;
+
+ /* Assign another socket */
+ sk2 = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (!sk2)
+ goto out;
+ err = bpf_sk_assign(ctx, sk2, BPF_SK_LOOKUP_F_REPLACE);
+ if (err)
+ goto out;
+ if (ctx->sk != sk2)
+ goto out;
+
+ /* Access reassigned ctx->sk fields */
+ if (ctx->sk->family != AF_INET ||
+ ctx->sk->type != SOCK_STREAM ||
+ ctx->sk->state != BPF_TCP_LISTEN)
+ goto out;
+
+ ret = SK_PASS; /* Success, redirect to KEY_SERVER_B */
+out:
+ if (sk1)
+ bpf_sk_release(sk1);
+ if (sk2)
+ bpf_sk_release(sk2);
+ return ret;
+}
+
+/* Check narrow loads from ctx fields that support them.
+ *
+ * Narrow loads of size >= target field size from a non-zero offset
+ * are not covered because they give bogus results, that is the
+ * verifier ignores the offset.
+ */
+SEC("sk_lookup")
+int ctx_narrow_access(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ __u32 val_u32;
+ bool v4;
+
+ v4 = (ctx->family == AF_INET);
+
+ /* Narrow loads from family field */
+ if (LSB(ctx->family, 0) != (v4 ? AF_INET : AF_INET6) ||
+ LSB(ctx->family, 1) != 0 || LSB(ctx->family, 2) != 0 || LSB(ctx->family, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->family, 0) != (v4 ? AF_INET : AF_INET6))
+ return SK_DROP;
+
+ /* Narrow loads from protocol field */
+ if (LSB(ctx->protocol, 0) != IPPROTO_TCP ||
+ LSB(ctx->protocol, 1) != 0 || LSB(ctx->protocol, 2) != 0 || LSB(ctx->protocol, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->protocol, 0) != IPPROTO_TCP)
+ return SK_DROP;
+
+ /* Narrow loads from remote_port field. Expect SRC_PORT. */
+ if (LSB(ctx->remote_port, 0) != ((SRC_PORT >> 0) & 0xff) ||
+ LSB(ctx->remote_port, 1) != ((SRC_PORT >> 8) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->remote_port, 0) != SRC_PORT)
+ return SK_DROP;
+
+ /*
+ * NOTE: 4-byte load from bpf_sk_lookup at remote_port offset
+ * is quirky. It gets rewritten by the access converter to a
+ * 2-byte load for backward compatibility. Treating the load
+ * result as a be16 value makes the code portable across
+ * little- and big-endian platforms.
+ */
+ val_u32 = *(__u32 *)&ctx->remote_port;
+ if (val_u32 != SRC_PORT)
+ return SK_DROP;
+
+ /* Narrow loads from local_port field. Expect DST_PORT. */
+ if (LSB(ctx->local_port, 0) != ((DST_PORT >> 0) & 0xff) ||
+ LSB(ctx->local_port, 1) != ((DST_PORT >> 8) & 0xff) ||
+ LSB(ctx->local_port, 2) != 0 || LSB(ctx->local_port, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->local_port, 0) != DST_PORT)
+ return SK_DROP;
+
+ /* Narrow loads from IPv4 fields */
+ if (v4) {
+ /* Expect SRC_IP4 in remote_ip4 */
+ if (LSB(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xff) ||
+ LSB(ctx->remote_ip4, 1) != ((SRC_IP4 >> 8) & 0xff) ||
+ LSB(ctx->remote_ip4, 2) != ((SRC_IP4 >> 16) & 0xff) ||
+ LSB(ctx->remote_ip4, 3) != ((SRC_IP4 >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->remote_ip4, 0) != ((SRC_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip4, 1) != ((SRC_IP4 >> 16) & 0xffff))
+ return SK_DROP;
+
+ /* Expect DST_IP4 in local_ip4 */
+ if (LSB(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xff) ||
+ LSB(ctx->local_ip4, 1) != ((DST_IP4 >> 8) & 0xff) ||
+ LSB(ctx->local_ip4, 2) != ((DST_IP4 >> 16) & 0xff) ||
+ LSB(ctx->local_ip4, 3) != ((DST_IP4 >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->local_ip4, 0) != ((DST_IP4 >> 0) & 0xffff) ||
+ LSW(ctx->local_ip4, 1) != ((DST_IP4 >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect 0.0.0.0 IPs when family != AF_INET */
+ if (LSB(ctx->remote_ip4, 0) != 0 || LSB(ctx->remote_ip4, 1) != 0 ||
+ LSB(ctx->remote_ip4, 2) != 0 || LSB(ctx->remote_ip4, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip4, 0) != 0 || LSW(ctx->remote_ip4, 1) != 0)
+ return SK_DROP;
+
+ if (LSB(ctx->local_ip4, 0) != 0 || LSB(ctx->local_ip4, 1) != 0 ||
+ LSB(ctx->local_ip4, 2) != 0 || LSB(ctx->local_ip4, 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->local_ip4, 0) != 0 || LSW(ctx->local_ip4, 1) != 0)
+ return SK_DROP;
+ }
+
+ /* Narrow loads from IPv6 fields */
+ if (!v4) {
+ /* Expect SRC_IP6 in remote_ip6 */
+ if (LSB(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 2) != ((SRC_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[0], 3) != ((SRC_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 2) != ((SRC_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[1], 3) != ((SRC_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 2) != ((SRC_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[2], 3) != ((SRC_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 2) != ((SRC_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->remote_ip6[3], 3) != ((SRC_IP6[3] >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) != ((SRC_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[0], 1) != ((SRC_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 0) != ((SRC_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[1], 1) != ((SRC_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 0) != ((SRC_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[2], 1) != ((SRC_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 0) != ((SRC_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->remote_ip6[3], 1) != ((SRC_IP6[3] >> 16) & 0xffff))
+ return SK_DROP;
+ /* Expect DST_IP6 in local_ip6 */
+ if (LSB(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[0], 2) != ((DST_IP6[0] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[0], 3) != ((DST_IP6[0] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[1], 2) != ((DST_IP6[1] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[1], 3) != ((DST_IP6[1] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[2], 2) != ((DST_IP6[2] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[2], 3) != ((DST_IP6[2] >> 24) & 0xff) ||
+ LSB(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xff) ||
+ LSB(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 8) & 0xff) ||
+ LSB(ctx->local_ip6[3], 2) != ((DST_IP6[3] >> 16) & 0xff) ||
+ LSB(ctx->local_ip6[3], 3) != ((DST_IP6[3] >> 24) & 0xff))
+ return SK_DROP;
+ if (LSW(ctx->local_ip6[0], 0) != ((DST_IP6[0] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[0], 1) != ((DST_IP6[0] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 0) != ((DST_IP6[1] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[1], 1) != ((DST_IP6[1] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 0) != ((DST_IP6[2] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[2], 1) != ((DST_IP6[2] >> 16) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 0) != ((DST_IP6[3] >> 0) & 0xffff) ||
+ LSW(ctx->local_ip6[3], 1) != ((DST_IP6[3] >> 16) & 0xffff))
+ return SK_DROP;
+ } else {
+ /* Expect :: IPs when family != AF_INET6 */
+ if (LSB(ctx->remote_ip6[0], 0) != 0 || LSB(ctx->remote_ip6[0], 1) != 0 ||
+ LSB(ctx->remote_ip6[0], 2) != 0 || LSB(ctx->remote_ip6[0], 3) != 0 ||
+ LSB(ctx->remote_ip6[1], 0) != 0 || LSB(ctx->remote_ip6[1], 1) != 0 ||
+ LSB(ctx->remote_ip6[1], 2) != 0 || LSB(ctx->remote_ip6[1], 3) != 0 ||
+ LSB(ctx->remote_ip6[2], 0) != 0 || LSB(ctx->remote_ip6[2], 1) != 0 ||
+ LSB(ctx->remote_ip6[2], 2) != 0 || LSB(ctx->remote_ip6[2], 3) != 0 ||
+ LSB(ctx->remote_ip6[3], 0) != 0 || LSB(ctx->remote_ip6[3], 1) != 0 ||
+ LSB(ctx->remote_ip6[3], 2) != 0 || LSB(ctx->remote_ip6[3], 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
+ return SK_DROP;
+
+ if (LSB(ctx->local_ip6[0], 0) != 0 || LSB(ctx->local_ip6[0], 1) != 0 ||
+ LSB(ctx->local_ip6[0], 2) != 0 || LSB(ctx->local_ip6[0], 3) != 0 ||
+ LSB(ctx->local_ip6[1], 0) != 0 || LSB(ctx->local_ip6[1], 1) != 0 ||
+ LSB(ctx->local_ip6[1], 2) != 0 || LSB(ctx->local_ip6[1], 3) != 0 ||
+ LSB(ctx->local_ip6[2], 0) != 0 || LSB(ctx->local_ip6[2], 1) != 0 ||
+ LSB(ctx->local_ip6[2], 2) != 0 || LSB(ctx->local_ip6[2], 3) != 0 ||
+ LSB(ctx->local_ip6[3], 0) != 0 || LSB(ctx->local_ip6[3], 1) != 0 ||
+ LSB(ctx->local_ip6[3], 2) != 0 || LSB(ctx->local_ip6[3], 3) != 0)
+ return SK_DROP;
+ if (LSW(ctx->remote_ip6[0], 0) != 0 || LSW(ctx->remote_ip6[0], 1) != 0 ||
+ LSW(ctx->remote_ip6[1], 0) != 0 || LSW(ctx->remote_ip6[1], 1) != 0 ||
+ LSW(ctx->remote_ip6[2], 0) != 0 || LSW(ctx->remote_ip6[2], 1) != 0 ||
+ LSW(ctx->remote_ip6[3], 0) != 0 || LSW(ctx->remote_ip6[3], 1) != 0)
+ return SK_DROP;
+ }
+
+ /* Success, redirect to KEY_SERVER_B */
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_B);
+ if (sk) {
+ bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ }
+ return SK_PASS;
+}
+
+/* Check that sk_assign rejects SERVER_A socket with -ESOCKNOSUPPORT */
+SEC("sk_lookup")
+int sk_assign_esocknosupport(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err, ret;
+
+ ret = SK_DROP;
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ goto out;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ if (err != -ESOCKTNOSUPPORT) {
+ bpf_printk("sk_assign returned %d, expected %d\n",
+ err, -ESOCKTNOSUPPORT);
+ goto out;
+ }
+
+ ret = SK_PASS; /* Success, pass to regular lookup */
+out:
+ if (sk)
+ bpf_sk_release(sk);
+ return ret;
+}
+
+SEC("sk_lookup")
+int multi_prog_pass1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup")
+int multi_prog_pass2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup")
+int multi_prog_drop1(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+SEC("sk_lookup")
+int multi_prog_drop2(struct bpf_sk_lookup *ctx)
+{
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_DROP;
+}
+
+static __always_inline int select_server_a(struct bpf_sk_lookup *ctx)
+{
+ struct bpf_sock *sk;
+ int err;
+
+ sk = bpf_map_lookup_elem(&redir_map, &KEY_SERVER_A);
+ if (!sk)
+ return SK_DROP;
+
+ err = bpf_sk_assign(ctx, sk, 0);
+ bpf_sk_release(sk);
+ if (err)
+ return SK_DROP;
+
+ return SK_PASS;
+}
+
+SEC("sk_lookup")
+int multi_prog_redir1(struct bpf_sk_lookup *ctx)
+{
+ (void)select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG1, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+SEC("sk_lookup")
+int multi_prog_redir2(struct bpf_sk_lookup *ctx)
+{
+ (void)select_server_a(ctx);
+ bpf_map_update_elem(&run_map, &KEY_PROG2, &PROG_DONE, BPF_ANY);
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "Dual BSD/GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index e83d0b48d80c..e9efc3263022 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -15,7 +15,6 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
/* Fill 'tuple' with L3 info, and attempt to find L4. On fail, return NULL. */
@@ -24,8 +23,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
bool *ipv4)
{
struct bpf_sock_tuple *result;
+ __u64 ihl_len = 0;
__u8 proto = 0;
- __u64 ihl_len;
if (eth_proto == bpf_htons(ETH_P_IP)) {
struct iphdr *iph = (struct iphdr *)(data + nh_off);
@@ -53,8 +52,8 @@ static struct bpf_sock_tuple *get_tuple(void *data, __u64 nh_off,
return result;
}
-SEC("classifier/sk_lookup_success")
-int bpf_sk_lookup_test0(struct __sk_buff *skb)
+SEC("?tc")
+int sk_lookup_success(struct __sk_buff *skb)
{
void *data_end = (void *)(long)skb->data_end;
void *data = (void *)(long)skb->data;
@@ -79,8 +78,8 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
}
-SEC("classifier/sk_lookup_success_simple")
-int bpf_sk_lookup_test1(struct __sk_buff *skb)
+SEC("?tc")
+int sk_lookup_success_simple(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -91,8 +90,8 @@ int bpf_sk_lookup_test1(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_use_after_free")
-int bpf_sk_lookup_uaf(struct __sk_buff *skb)
+SEC("?tc")
+int err_use_after_free(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -106,12 +105,11 @@ int bpf_sk_lookup_uaf(struct __sk_buff *skb)
return family;
}
-SEC("classifier/fail_modify_sk_pointer")
-int bpf_sk_lookup_modptr(struct __sk_buff *skb)
+SEC("?tc")
+int err_modify_sk_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
if (sk) {
@@ -121,12 +119,11 @@ int bpf_sk_lookup_modptr(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_modify_sk_or_null_pointer")
-int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
+SEC("?tc")
+int err_modify_sk_or_null_pointer(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
- __u32 family;
sk = bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
sk += 1;
@@ -135,8 +132,8 @@ int bpf_sk_lookup_modptr_or_null(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_no_release")
-int bpf_sk_lookup_test2(struct __sk_buff *skb)
+SEC("?tc")
+int err_no_release(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
@@ -144,8 +141,8 @@ int bpf_sk_lookup_test2(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_twice")
-int bpf_sk_lookup_test3(struct __sk_buff *skb)
+SEC("?tc")
+int err_release_twice(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -156,8 +153,8 @@ int bpf_sk_lookup_test3(struct __sk_buff *skb)
return 0;
}
-SEC("classifier/fail_release_unchecked")
-int bpf_sk_lookup_test4(struct __sk_buff *skb)
+SEC("?tc")
+int err_release_unchecked(struct __sk_buff *skb)
{
struct bpf_sock_tuple tuple = {};
struct bpf_sock *sk;
@@ -173,8 +170,8 @@ void lookup_no_release(struct __sk_buff *skb)
bpf_sk_lookup_tcp(skb, &tuple, sizeof(tuple), BPF_F_CURRENT_NETNS, 0);
}
-SEC("classifier/fail_no_release_subcall")
-int bpf_sk_lookup_test5(struct __sk_buff *skb)
+SEC("?tc")
+int err_no_release_subcall(struct __sk_buff *skb)
{
lookup_no_release(skb);
return 0;
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
new file mode 100644
index 000000000000..59ef72d02a61
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_trace_itself.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_stg_map SEC(".maps");
+
+SEC("fentry/bpf_sk_storage_free")
+int BPF_PROG(trace_bpf_sk_storage_free, struct sock *sk)
+{
+ int *value;
+
+ value = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+
+ if (value)
+ *value = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
new file mode 100644
index 000000000000..02e718f06e0f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sk_storage_tracing.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+
+struct sk_stg {
+ __u32 pid;
+ __u32 last_notclose_state;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct sk_stg);
+} sk_stg_map SEC(".maps");
+
+/* Testing delete */
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} del_sk_stg_map SEC(".maps");
+
+char task_comm[16] = "";
+
+SEC("tp_btf/inet_sock_set_state")
+int BPF_PROG(trace_inet_sock_set_state, struct sock *sk, int oldstate,
+ int newstate)
+{
+ struct sk_stg *stg;
+
+ if (newstate == BPF_TCP_CLOSE)
+ return 0;
+
+ stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!stg)
+ return 0;
+
+ stg->last_notclose_state = newstate;
+
+ bpf_sk_storage_delete(&del_sk_stg_map, sk);
+
+ return 0;
+}
+
+static void set_task_info(struct sock *sk)
+{
+ struct task_struct *task;
+ struct sk_stg *stg;
+
+ stg = bpf_sk_storage_get(&sk_stg_map, sk, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!stg)
+ return;
+
+ stg->pid = bpf_get_current_pid_tgid();
+
+ task = (struct task_struct *)bpf_get_current_task();
+ bpf_core_read_str(&stg->comm, sizeof(stg->comm), &task->comm);
+ bpf_core_read_str(&task_comm, sizeof(task_comm), &task->comm);
+}
+
+SEC("fentry/inet_csk_listen_start")
+int BPF_PROG(trace_inet_csk_listen_start, struct sock *sk)
+{
+ set_task_info(sk);
+
+ return 0;
+}
+
+SEC("fentry/tcp_connect")
+int BPF_PROG(trace_tcp_connect, struct sock *sk)
+{
+ set_task_info(sk);
+
+ return 0;
+}
+
+SEC("fexit/inet_csk_accept")
+int BPF_PROG(inet_csk_accept, struct sock *sk, int flags, int *err, bool kern,
+ struct sock *accepted_sk)
+{
+ set_task_info(accepted_sk);
+
+ return 0;
+}
+
+SEC("tp_btf/tcp_retransmit_synack")
+int BPF_PROG(tcp_retransmit_synack, struct sock* sk, struct request_sock* req)
+{
+ /* load only test */
+ bpf_sk_storage_get(&sk_stg_map, sk, 0, 0);
+ bpf_sk_storage_get(&sk_stg_map, req->sk, 0, 0);
+ return 0;
+}
+
+SEC("tp_btf/tcp_bad_csum")
+int BPF_PROG(tcp_bad_csum, struct sk_buff* skb)
+{
+ bpf_sk_storage_get(&sk_stg_map, skb->sk, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
index 552f2090665c..37aacc66cd68 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_cgroup_id_kern.c
@@ -10,12 +10,12 @@
#define NUM_CGROUP_LEVELS 4
-struct bpf_map_def SEC("maps") cgroup_ids = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u64),
- .max_entries = NUM_CGROUP_LEVELS,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, NUM_CGROUP_LEVELS);
+} cgroup_ids SEC(".maps");
static __always_inline void log_nth_level(struct __sk_buff *skb, __u32 level)
{
@@ -42,6 +42,4 @@ int log_cgroup_id(struct __sk_buff *skb)
return TC_ACT_OK;
}
-int _version SEC("version") = 1;
-
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
index b02ea589ce7e..a724a70c6700 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c
@@ -3,13 +3,14 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
+
char _license[] SEC("license") = "GPL";
-SEC("skb_ctx")
+SEC("tc")
int process(struct __sk_buff *skb)
{
- #pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < 5; i++) {
if (skb->cb[i] != i + 1)
return 1;
@@ -25,6 +26,12 @@ int process(struct __sk_buff *skb)
return 1;
if (skb->gso_size != 10)
return 1;
+ if (skb->ingress_ifindex != 11)
+ return 1;
+ if (skb->ifindex != 1)
+ return 1;
+ if (skb->hwtstamp != 11)
+ return 1;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
index bb3fbf1a29e3..507215791c5b 100644
--- a/tools/testing/selftests/bpf/progs/test_skb_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -14,7 +14,7 @@ struct {
char _license[] SEC("license") = "GPL";
-SEC("classifier/test_skb_helpers")
+SEC("tc")
int test_skb_helpers(struct __sk_buff *skb)
{
struct task_struct *task;
diff --git a/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
new file mode 100644
index 000000000000..4cfa42aa9436
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skc_to_unix_sock.c
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2021 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_tracing_net.h"
+
+const volatile pid_t my_pid = 0;
+char path[256] = {};
+
+SEC("fentry/unix_listen")
+int BPF_PROG(unix_listen, struct socket *sock, int backlog)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+ struct unix_sock *unix_sk;
+ int i, len;
+
+ if (pid != my_pid)
+ return 0;
+
+ unix_sk = (struct unix_sock *)bpf_skc_to_unix_sock(sock->sk);
+ if (!unix_sk)
+ return 0;
+
+ if (unix_sk->addr->name->sun_path[0])
+ return 0;
+
+ len = unix_sk->addr->len - sizeof(short);
+ path[0] = '@';
+ for (i = 1; i < len; i++) {
+ if (i >= (int)sizeof(struct sockaddr_un))
+ break;
+
+ path[i] = unix_sk->addr->name->sun_path[i];
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c
index 77ae86f44db5..adece9f91f58 100644
--- a/tools/testing/selftests/bpf/progs/test_skeleton.c
+++ b/tools/testing/selftests/bpf/progs/test_skeleton.c
@@ -5,6 +5,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
+#define __read_mostly SEC(".data.read_mostly")
+
struct s {
int a;
long long b;
@@ -20,7 +22,9 @@ long long in4 __attribute__((aligned(64))) = 0;
struct s in5 = {};
/* .rodata section */
-const volatile int in6 = 0;
+const volatile struct {
+ const int in6;
+} in = {};
/* .data section */
int out1 = -1;
@@ -36,21 +40,58 @@ extern int LINUX_KERNEL_VERSION __kconfig;
bool bpf_syscall = 0;
int kern_ver = 0;
+struct s out5 = {};
+
+
+const volatile int in_dynarr_sz SEC(".rodata.dyn");
+const volatile int in_dynarr[4] SEC(".rodata.dyn") = { -1, -2, -3, -4 };
+
+int out_dynarr[4] SEC(".data.dyn") = { 1, 2, 3, 4 };
+
+int read_mostly_var __read_mostly;
+int out_mostly_var;
+
+char huge_arr[16 * 1024 * 1024];
+
+/* non-mmapable custom .data section */
+
+struct my_value { int x, y, z; };
+
+__hidden int zero_key SEC(".data.non_mmapable");
+static struct my_value zero_value SEC(".data.non_mmapable");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct my_value);
+ __uint(max_entries, 1);
+} my_map SEC(".maps");
+
SEC("raw_tp/sys_enter")
int handler(const void *ctx)
{
- static volatile struct s out5;
+ int i;
out1 = in1;
out2 = in2;
out3 = in3;
out4 = in4;
out5 = in5;
- out6 = in6;
+ out6 = in.in6;
bpf_syscall = CONFIG_BPF_SYSCALL;
kern_ver = LINUX_KERNEL_VERSION;
+ for (i = 0; i < in_dynarr_sz; i++)
+ out_dynarr[i] = in_dynarr[i];
+
+ out_mostly_var = read_mostly_var;
+
+ huge_arr[sizeof(huge_arr) - 1] = 123;
+
+ /* make sure zero_key and zero_value are not optimized out */
+ bpf_map_update_elem(&my_map, &zero_key, &zero_value, BPF_ANY);
+
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf.c b/tools/testing/selftests/bpf/progs/test_snprintf.c
new file mode 100644
index 000000000000..8fda07544023
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+__u32 pid = 0;
+
+char num_out[64] = {};
+long num_ret = 0;
+
+char ip_out[64] = {};
+long ip_ret = 0;
+
+char sym_out[64] = {};
+long sym_ret = 0;
+
+char addr_out[64] = {};
+long addr_ret = 0;
+
+char str_out[64] = {};
+long str_ret = 0;
+
+char over_out[6] = {};
+long over_ret = 0;
+
+char pad_out[10] = {};
+long pad_ret = 0;
+
+char noarg_out[64] = {};
+long noarg_ret = 0;
+
+long nobuf_ret = 0;
+
+extern const void schedule __ksym;
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ /* Convenient values to pretty-print */
+ const __u8 ex_ipv4[] = {127, 0, 0, 1};
+ const __u8 ex_ipv6[] = {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1};
+ static const char str1[] = "str1";
+ static const char longstr[] = "longstr";
+
+ if ((int)bpf_get_current_pid_tgid() != pid)
+ return 0;
+
+ /* Integer types */
+ num_ret = BPF_SNPRINTF(num_out, sizeof(num_out),
+ "%d %u %x %li %llu %lX",
+ -8, 9, 150, -424242, 1337, 0xDABBAD00);
+ /* IP addresses */
+ ip_ret = BPF_SNPRINTF(ip_out, sizeof(ip_out), "%pi4 %pI6",
+ &ex_ipv4, &ex_ipv6);
+ /* Symbol lookup formatting */
+ sym_ret = BPF_SNPRINTF(sym_out, sizeof(sym_out), "%ps %pS %pB",
+ &schedule, &schedule, &schedule);
+ /* Kernel pointers */
+ addr_ret = BPF_SNPRINTF(addr_out, sizeof(addr_out), "%pK %px %p",
+ 0, 0xFFFF00000ADD4E55, 0xFFFF00000ADD4E55);
+ /* Strings and single-byte character embedding */
+ str_ret = BPF_SNPRINTF(str_out, sizeof(str_out), "%s % 9c %+2c %-3c %04c %0c %+05s",
+ str1, 'a', 'b', 'c', 'd', 'e', longstr);
+ /* Overflow */
+ over_ret = BPF_SNPRINTF(over_out, sizeof(over_out), "%%overflow");
+ /* Padding of fixed width numbers */
+ pad_ret = BPF_SNPRINTF(pad_out, sizeof(pad_out), "%5d %0900000X", 4, 4);
+ /* No args */
+ noarg_ret = BPF_SNPRINTF(noarg_out, sizeof(noarg_out), "simple case");
+ /* No buffer */
+ nobuf_ret = BPF_SNPRINTF(NULL, 0, "only interested in length %d", 60);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_snprintf_single.c b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
new file mode 100644
index 000000000000..3095837334d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_snprintf_single.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Google LLC. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* The format string is filled from the userspace such that loading fails */
+const char fmt[10];
+
+SEC("raw_tp/sys_enter")
+int handler(const void *ctx)
+{
+ unsigned long long arg = 42;
+
+ bpf_snprintf(NULL, 0, fmt, &arg, sizeof(arg));
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c b/tools/testing/selftests/bpf/progs/test_sock_fields.c
index 9bcaa37f476a..f75e531bf36f 100644
--- a/tools/testing/selftests/bpf/progs/test_sock_fields_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sock_fields.c
@@ -7,49 +7,17 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
+#include "bpf_tcp_helpers.h"
enum bpf_linum_array_idx {
EGRESS_LINUM_IDX,
INGRESS_LINUM_IDX,
+ READ_SK_DST_PORT_LINUM_IDX,
__NR_BPF_LINUM_ARRAY_IDX,
};
struct {
__uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_ADDR_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct sockaddr_in6);
-} addr_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_sock);
-} sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, __NR_BPF_RESULT_ARRAY_IDX);
- __type(key, __u32);
- __type(value, struct bpf_tcp_sock);
-} tcp_sock_result_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
__uint(max_entries, __NR_BPF_LINUM_ARRAY_IDX);
__type(key, __u32);
__type(value, __u32);
@@ -74,6 +42,17 @@ struct {
__type(value, struct bpf_spinlock_cnt);
} sk_pkt_out_cnt10 SEC(".maps");
+struct bpf_tcp_sock listen_tp = {};
+struct sockaddr_in6 srv_sa6 = {};
+struct bpf_tcp_sock cli_tp = {};
+struct bpf_tcp_sock srv_tp = {};
+struct bpf_sock listen_sk = {};
+struct bpf_sock srv_sk = {};
+struct bpf_sock cli_sk = {};
+__u64 parent_cg_id = 0;
+__u64 child_cg_id = 0;
+__u64 lsndtime = 0;
+
static bool is_loopback6(__u32 *a6)
{
return !a6[0] && !a6[1] && !a6[2] && a6[3] == bpf_htonl(1);
@@ -130,62 +109,86 @@ static void tpcpy(struct bpf_tcp_sock *dst,
dst->bytes_acked = src->bytes_acked;
}
-#define RETURN { \
+/* Always return CG_OK so that no pkt will be filtered out */
+#define CG_OK 1
+
+#define RET_LOG() ({ \
linum = __LINE__; \
- bpf_map_update_elem(&linum_map, &linum_idx, &linum, 0); \
- return 1; \
-}
+ bpf_map_update_elem(&linum_map, &linum_idx, &linum, BPF_ANY); \
+ return CG_OK; \
+})
SEC("cgroup_skb/egress")
int egress_read_sock_fields(struct __sk_buff *skb)
{
- struct bpf_spinlock_cnt cli_cnt_init = { .lock = 0, .cnt = 0xeB9F };
- __u32 srv_idx = ADDR_SRV_IDX, cli_idx = ADDR_CLI_IDX, result_idx;
+ struct bpf_spinlock_cnt cli_cnt_init = { .lock = {}, .cnt = 0xeB9F };
struct bpf_spinlock_cnt *pkt_out_cnt, *pkt_out_cnt10;
- struct sockaddr_in6 *srv_sa6, *cli_sa6;
struct bpf_tcp_sock *tp, *tp_ret;
struct bpf_sock *sk, *sk_ret;
__u32 linum, linum_idx;
+ struct tcp_sock *ktp;
linum_idx = EGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->state == 10)
- RETURN;
+ if (!sk)
+ RET_LOG();
+ /* Not testing the egress traffic or the listening socket,
+ * which are covered by the cgroup_skb/ingress test program.
+ */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->state == BPF_TCP_LISTEN)
+ return CG_OK;
+
+ if (sk->src_port == bpf_ntohs(srv_sa6.sin6_port)) {
+ /* Server socket */
+ sk_ret = &srv_sk;
+ tp_ret = &srv_tp;
+ } else if (sk->dst_port == srv_sa6.sin6_port) {
+ /* Client socket */
+ sk_ret = &cli_sk;
+ tp_ret = &cli_tp;
+ } else {
+ /* Not the testing egress traffic */
+ return CG_OK;
+ }
+
+ /* It must be a fullsock for cgroup_skb/egress prog */
sk = bpf_sk_fullsock(sk);
- if (!sk || sk->family != AF_INET6 || sk->protocol != IPPROTO_TCP ||
- !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
+
+ /* Not the testing egress traffic */
+ if (sk->protocol != IPPROTO_TCP)
+ return CG_OK;
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- cli_sa6 = bpf_map_lookup_elem(&addr_map, &cli_idx);
- if (!srv_sa6 || !cli_sa6)
- RETURN;
+ skcpy(sk_ret, sk);
+ tpcpy(tp_ret, tp);
- if (sk->src_port == bpf_ntohs(srv_sa6->sin6_port))
- result_idx = EGRESS_SRV_IDX;
- else if (sk->src_port == bpf_ntohs(cli_sa6->sin6_port))
- result_idx = EGRESS_CLI_IDX;
- else
- RETURN;
+ if (sk_ret == &srv_sk) {
+ ktp = bpf_skc_to_tcp_sock(sk);
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ if (!ktp)
+ RET_LOG();
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ lsndtime = ktp->lsndtime;
+
+ child_cg_id = bpf_sk_cgroup_id(ktp);
+ if (!child_cg_id)
+ RET_LOG();
+
+ parent_cg_id = bpf_sk_ancestor_cgroup_id(ktp, 2);
+ if (!parent_cg_id)
+ RET_LOG();
- if (result_idx == EGRESS_SRV_IDX) {
/* The userspace has created it for srv sk */
- pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk, 0, 0);
- pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, sk,
+ pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, ktp, 0, 0);
+ pkt_out_cnt10 = bpf_sk_storage_get(&sk_pkt_out_cnt10, ktp,
0, 0);
} else {
pkt_out_cnt = bpf_sk_storage_get(&sk_pkt_out_cnt, sk,
@@ -197,7 +200,7 @@ int egress_read_sock_fields(struct __sk_buff *skb)
}
if (!pkt_out_cnt || !pkt_out_cnt10)
- RETURN;
+ RET_LOG();
/* Even both cnt and cnt10 have lock defined in their BTF,
* intentionally one cnt takes lock while one does not
@@ -208,48 +211,97 @@ int egress_read_sock_fields(struct __sk_buff *skb)
pkt_out_cnt10->cnt += 10;
bpf_spin_unlock(&pkt_out_cnt10->lock);
- RETURN;
+ return CG_OK;
}
SEC("cgroup_skb/ingress")
int ingress_read_sock_fields(struct __sk_buff *skb)
{
- __u32 srv_idx = ADDR_SRV_IDX, result_idx = INGRESS_LISTEN_IDX;
- struct bpf_tcp_sock *tp, *tp_ret;
- struct bpf_sock *sk, *sk_ret;
- struct sockaddr_in6 *srv_sa6;
+ struct bpf_tcp_sock *tp;
__u32 linum, linum_idx;
+ struct bpf_sock *sk;
linum_idx = INGRESS_LINUM_IDX;
sk = skb->sk;
- if (!sk || sk->family != AF_INET6 || !is_loopback6(sk->src_ip6))
- RETURN;
+ if (!sk)
+ RET_LOG();
- srv_sa6 = bpf_map_lookup_elem(&addr_map, &srv_idx);
- if (!srv_sa6 || sk->src_port != bpf_ntohs(srv_sa6->sin6_port))
- RETURN;
+ /* Not the testing ingress traffic to the server */
+ if (sk->family != AF_INET6 || !is_loopback6(sk->src_ip6) ||
+ sk->src_port != bpf_ntohs(srv_sa6.sin6_port))
+ return CG_OK;
- if (sk->state != 10 && sk->state != 12)
- RETURN;
+ /* Only interested in the listening socket */
+ if (sk->state != BPF_TCP_LISTEN)
+ return CG_OK;
- sk = bpf_get_listener_sock(sk);
+ /* It must be a fullsock for cgroup_skb/ingress prog */
+ sk = bpf_sk_fullsock(sk);
if (!sk)
- RETURN;
+ RET_LOG();
tp = bpf_tcp_sock(sk);
if (!tp)
- RETURN;
+ RET_LOG();
- sk_ret = bpf_map_lookup_elem(&sock_result_map, &result_idx);
- tp_ret = bpf_map_lookup_elem(&tcp_sock_result_map, &result_idx);
- if (!sk_ret || !tp_ret)
- RETURN;
+ skcpy(&listen_sk, sk);
+ tpcpy(&listen_tp, tp);
- skcpy(sk_ret, sk);
- tpcpy(tp_ret, tp);
+ return CG_OK;
+}
+
+/*
+ * NOTE: 4-byte load from bpf_sock at dst_port offset is quirky. It
+ * gets rewritten by the access converter to a 2-byte load for
+ * backward compatibility. Treating the load result as a be16 value
+ * makes the code portable across little- and big-endian platforms.
+ */
+static __noinline bool sk_dst_port__load_word(struct bpf_sock *sk)
+{
+ __u32 *word = (__u32 *)&sk->dst_port;
+ return word[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_half(struct bpf_sock *sk)
+{
+ __u16 *half;
+
+ asm volatile ("");
+ half = (__u16 *)&sk->dst_port;
+ return half[0] == bpf_htons(0xcafe);
+}
+
+static __noinline bool sk_dst_port__load_byte(struct bpf_sock *sk)
+{
+ __u8 *byte = (__u8 *)&sk->dst_port;
+ return byte[0] == 0xca && byte[1] == 0xfe;
+}
+
+SEC("cgroup_skb/egress")
+int read_sk_dst_port(struct __sk_buff *skb)
+{
+ __u32 linum, linum_idx;
+ struct bpf_sock *sk;
+
+ linum_idx = READ_SK_DST_PORT_LINUM_IDX;
+
+ sk = skb->sk;
+ if (!sk)
+ RET_LOG();
+
+ /* Ignore everything but the SYN from the client socket */
+ if (sk->state != BPF_TCP_SYN_SENT)
+ return CG_OK;
+
+ if (!sk_dst_port__load_word(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_half(sk))
+ RET_LOG();
+ if (!sk_dst_port__load_byte(sk))
+ RET_LOG();
- RETURN;
+ return CG_OK;
}
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
new file mode 100644
index 000000000000..29314805ce42
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_drop_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
new file mode 100644
index 000000000000..02a59e220cbc
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_invalid_update.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+ __u32 key = 0;
+
+ if (skops->sk)
+ bpf_map_update_elem(&map, &key, skops->sk, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 3dca4c2e2418..99d2ea9fb658 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -14,6 +14,7 @@
#include <sys/socket.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_misc.h"
/* Sockmap sample program connects a client and a backend together
* using cgroups.
@@ -111,12 +112,15 @@ int bpf_prog2(struct __sk_buff *skb)
int len, *f, ret, zero = 0;
__u64 flags = 0;
+ __sink(rport);
if (lport == 10000)
ret = 10;
else
ret = 1;
len = (__u32)skb->data_end - (__u32)skb->data;
+ __sink(len);
+
f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
if (f && *f) {
ret = 3;
@@ -131,40 +135,55 @@ int bpf_prog2(struct __sk_buff *skb)
}
-SEC("sk_skb3")
-int bpf_prog3(struct __sk_buff *skb)
+static inline void bpf_write_pass(struct __sk_buff *skb, int offset)
{
- const int one = 1;
- int err, *f, ret = SK_PASS;
+ int err = bpf_skb_pull_data(skb, 6 + offset);
void *data_end;
char *c;
- err = bpf_skb_pull_data(skb, 19);
if (err)
- goto tls_out;
+ return;
c = (char *)(long)skb->data;
data_end = (void *)(long)skb->data_end;
- if (c + 18 < data_end)
- memcpy(&c[13], "PASS", 4);
+ if (c + 5 + offset < data_end)
+ memcpy(c + offset, "PASS", 4);
+}
+
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+ int err, *f, ret = SK_PASS;
+ const int one = 1;
+
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
if (f && *f) {
__u64 flags = 0;
ret = 0;
flags = *f;
+
+ err = bpf_skb_adjust_room(skb, -13, 0, 0);
+ if (err)
+ return SK_DROP;
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 0);
#ifdef SOCKMAP
return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
#else
return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
#endif
}
-
f = bpf_map_lookup_elem(&sock_skb_opts, &one);
if (f && *f)
ret = SK_DROP;
-tls_out:
+ err = bpf_skb_adjust_room(skb, 4, 0, 0);
+ if (err)
+ return SK_DROP;
+ bpf_write_pass(skb, 13);
return ret;
}
@@ -172,8 +191,7 @@ SEC("sockops")
int bpf_sockmap(struct bpf_sock_ops *skops)
{
__u32 lport, rport;
- int op, err = 0, index, key, ret;
-
+ int op, ret;
op = (int) skops->op;
@@ -185,10 +203,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (lport == 10000) {
ret = 1;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -200,10 +218,10 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
if (bpf_ntohl(rport) == 10001) {
ret = 10;
#ifdef SOCKMAP
- err = bpf_sock_map_update(skops, &sock_map, &ret,
+ bpf_sock_map_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#else
- err = bpf_sock_hash_update(skops, &sock_map, &ret,
+ bpf_sock_hash_update(skops, &sock_map, &ret,
BPF_NOEXIST);
#endif
}
@@ -219,7 +237,7 @@ SEC("sk_msg1")
int bpf_prog4(struct sk_msg_md *msg)
{
int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
- int *start, *end, *start_push, *end_push, *start_pop, *pop;
+ int *start, *end, *start_push, *end_push, *start_pop, *pop, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -233,8 +251,11 @@ int bpf_prog4(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
@@ -247,6 +268,7 @@ int bpf_prog6(struct sk_msg_md *msg)
{
int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+ int err = 0;
__u64 flags = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
@@ -263,8 +285,11 @@ int bpf_prog6(struct sk_msg_md *msg)
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_DROP;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
@@ -298,6 +323,10 @@ int bpf_prog8(struct sk_msg_md *msg)
} else {
return SK_DROP;
}
+
+ __sink(data_end);
+ __sink(data);
+
return SK_PASS;
}
SEC("sk_msg4")
@@ -322,7 +351,7 @@ SEC("sk_msg5")
int bpf_prog10(struct sk_msg_md *msg)
{
int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
- int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+ int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, err = 0;
bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
if (bytes)
@@ -336,8 +365,11 @@ int bpf_prog10(struct sk_msg_md *msg)
bpf_msg_pull_data(msg, *start, *end, 0);
start_push = bpf_map_lookup_elem(&sock_bytes, &two);
end_push = bpf_map_lookup_elem(&sock_bytes, &three);
- if (start_push && end_push)
- bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (start_push && end_push) {
+ err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+ if (err)
+ return SK_PASS;
+ }
start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
pop = bpf_map_lookup_elem(&sock_bytes, &five);
if (start_pop && pop)
@@ -345,5 +377,4 @@ int bpf_prog10(struct sk_msg_md *msg)
return SK_DROP;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
index a3a366c57ce1..b7250eb9c30c 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_listen.c
@@ -15,6 +15,13 @@ struct {
} sock_map SEC(".maps");
struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} nop_map SEC(".maps");
+
+struct {
__uint(type, BPF_MAP_TYPE_SOCKHASH);
__uint(max_entries, 2);
__type(key, __u32);
@@ -28,16 +35,31 @@ struct {
__type(value, unsigned int);
} verdict_map SEC(".maps");
-static volatile bool test_sockmap; /* toggled by user-space */
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} parser_map SEC(".maps");
+
+bool test_sockmap = false; /* toggled by user-space */
+bool test_ingress = false; /* toggled by user-space */
SEC("sk_skb/stream_parser")
-int prog_skb_parser(struct __sk_buff *skb)
+int prog_stream_parser(struct __sk_buff *skb)
{
+ int *value;
+ __u32 key = 0;
+
+ value = bpf_map_lookup_elem(&parser_map, &key);
+ if (value && *value)
+ return *value;
+
return skb->len;
}
SEC("sk_skb/stream_verdict")
-int prog_skb_verdict(struct __sk_buff *skb)
+int prog_stream_verdict(struct __sk_buff *skb)
{
unsigned int *count;
__u32 zero = 0;
@@ -55,6 +77,27 @@ int prog_skb_verdict(struct __sk_buff *skb)
return verdict;
}
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ unsigned int *count;
+ __u32 zero = 0;
+ int verdict;
+
+ if (test_sockmap)
+ verdict = bpf_sk_redirect_map(skb, &sock_map, zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+ else
+ verdict = bpf_sk_redirect_hash(skb, &sock_hash, &zero,
+ test_ingress ? BPF_F_INGRESS : 0);
+
+ count = bpf_map_lookup_elem(&verdict_map, &verdict);
+ if (count)
+ (*count)++;
+
+ return verdict;
+}
+
SEC("sk_msg")
int prog_msg_verdict(struct sk_msg_md *msg)
{
@@ -94,5 +137,4 @@ int prog_reuseport(struct sk_reuseport_md *reuse)
return verdict;
}
-int _version SEC("version") = 1;
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
new file mode 100644
index 000000000000..1d86a717a290
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_pass_prog.c
@@ -0,0 +1,32 @@
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_rx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_tx SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 20);
+ __type(key, int);
+ __type(value, int);
+} sock_map_msg SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
new file mode 100644
index 000000000000..9d58d61c0dee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_progs_query.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_PASS;
+}
+
+SEC("sk_msg")
+int prog_skmsg_verdict(struct sk_msg_md *msg)
+{
+ return SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
new file mode 100644
index 000000000000..3c69aa971738
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_skb_verdict_attach.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 2);
+ __type(key, __u32);
+ __type(value, __u64);
+} sock_map SEC(".maps");
+
+SEC("sk_skb")
+int prog_skb_verdict(struct __sk_buff *skb)
+{
+ return SK_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_update.c b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
new file mode 100644
index 000000000000..6d64ea536e3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_update.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Cloudflare
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} src SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} dst_sock_hash SEC(".maps");
+
+SEC("tc")
+int copy_sock_map(void *ctx)
+{
+ struct bpf_sock *sk;
+ bool failed = false;
+ __u32 key = 0;
+
+ sk = bpf_map_lookup_elem(&src, &key);
+ if (!sk)
+ return SK_DROP;
+
+ if (bpf_map_update_elem(&dst_sock_map, &key, sk, 0))
+ failed = true;
+
+ if (bpf_map_update_elem(&dst_sock_hash, &key, sk, 0))
+ failed = true;
+
+ bpf_sk_release(sk);
+ return failed ? SK_DROP : SK_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock.c b/tools/testing/selftests/bpf/progs/test_spin_lock.c
index 0d31a3b3505f..d8d77bdffd3d 100644
--- a/tools/testing/selftests/bpf/progs/test_spin_lock.c
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock.c
@@ -3,6 +3,7 @@
#include <linux/bpf.h>
#include <linux/version.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
struct hmap_elem {
volatile int cnt;
@@ -45,8 +46,8 @@ struct {
#define CREDIT_PER_NS(delta, rate) (((delta) * rate) >> 20)
-SEC("spin_lock_demo")
-int bpf_sping_lock_test(struct __sk_buff *skb)
+SEC("cgroup_skb/ingress")
+int bpf_spin_lock_test(struct __sk_buff *skb)
{
volatile int credit = 0, max_credit = 100, pkt_len = 64;
struct hmap_elem zero = {}, *val;
@@ -89,6 +90,8 @@ int bpf_sping_lock_test(struct __sk_buff *skb)
credit = q->credit;
bpf_spin_unlock(&q->lock);
+ __sink(credit);
+
/* spin_lock in cgroup local storage */
cls = bpf_get_local_storage(&cls_map, 0);
bpf_spin_lock(&cls->lock);
@@ -98,4 +101,69 @@ int bpf_sping_lock_test(struct __sk_buff *skb)
err:
return err;
}
+
+struct bpf_spin_lock lockA __hidden SEC(".data.A");
+
+__noinline
+static int static_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_lock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_lock(&lockA);
+ return ret + ctx->len;
+}
+
+__noinline
+static int static_subprog_unlock(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret + ctx->len;
+}
+
+SEC("tc")
+int lock_static_subprog_call(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_lock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ ret = static_subprog_lock(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("tc")
+int lock_static_subprog_unlock(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ ret = static_subprog_unlock(ctx);
+ return ret;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
new file mode 100644
index 000000000000..43f40c4fe241
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_spin_lock_fail.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+
+struct foo {
+ struct bpf_spin_lock lock;
+ int data;
+};
+
+struct array_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, int);
+ __type(value, struct foo);
+ __uint(max_entries, 1);
+} array_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct array_map);
+} map_of_maps SEC(".maps") = {
+ .values = {
+ [0] = &array_map,
+ },
+};
+
+SEC(".data.A") struct bpf_spin_lock lockA;
+SEC(".data.B") struct bpf_spin_lock lockB;
+
+SEC("?tc")
+int lock_id_kptr_preserve(void *ctx)
+{
+ struct foo *f;
+
+ f = bpf_obj_new(typeof(*f));
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_global_zero(void *ctx)
+{
+ bpf_this_cpu_ptr(&lockA);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+
+ f = bpf_map_lookup_elem(&array_map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_innermapval_preserve(void *ctx)
+{
+ struct foo *f;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f = bpf_map_lookup_elem(map, &key);
+ if (!f)
+ return 0;
+ bpf_this_cpu_ptr(f);
+ return 0;
+}
+
+#define CHECK(test, A, B) \
+ SEC("?tc") \
+ int lock_id_mismatch_##test(void *ctx) \
+ { \
+ struct foo *f1, *f2, *v, *iv; \
+ int key = 0; \
+ void *map; \
+ \
+ map = bpf_map_lookup_elem(&map_of_maps, &key); \
+ if (!map) \
+ return 0; \
+ iv = bpf_map_lookup_elem(map, &key); \
+ if (!iv) \
+ return 0; \
+ v = bpf_map_lookup_elem(&array_map, &key); \
+ if (!v) \
+ return 0; \
+ f1 = bpf_obj_new(typeof(*f1)); \
+ if (!f1) \
+ return 0; \
+ f2 = bpf_obj_new(typeof(*f2)); \
+ if (!f2) { \
+ bpf_obj_drop(f1); \
+ return 0; \
+ } \
+ bpf_spin_lock(A); \
+ bpf_spin_unlock(B); \
+ return 0; \
+ }
+
+CHECK(kptr_kptr, &f1->lock, &f2->lock);
+CHECK(kptr_global, &f1->lock, &lockA);
+CHECK(kptr_mapval, &f1->lock, &v->lock);
+CHECK(kptr_innermapval, &f1->lock, &iv->lock);
+
+CHECK(global_global, &lockA, &lockB);
+CHECK(global_kptr, &lockA, &f1->lock);
+CHECK(global_mapval, &lockA, &v->lock);
+CHECK(global_innermapval, &lockA, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_mapval_mapval(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+
+ f1 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(&array_map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(mapval_kptr, &v->lock, &f1->lock);
+CHECK(mapval_global, &v->lock, &lockB);
+CHECK(mapval_innermapval, &v->lock, &iv->lock);
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval1(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+SEC("?tc")
+int lock_id_mismatch_innermapval_innermapval2(void *ctx)
+{
+ struct foo *f1, *f2;
+ int key = 0;
+ void *map;
+
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f1 = bpf_map_lookup_elem(map, &key);
+ if (!f1)
+ return 0;
+ map = bpf_map_lookup_elem(&map_of_maps, &key);
+ if (!map)
+ return 0;
+ f2 = bpf_map_lookup_elem(map, &key);
+ if (!f2)
+ return 0;
+
+ bpf_spin_lock(&f1->lock);
+ f1->data = 42;
+ bpf_spin_unlock(&f2->lock);
+
+ return 0;
+}
+
+CHECK(innermapval_kptr, &iv->lock, &f1->lock);
+CHECK(innermapval_global, &iv->lock, &lockA);
+CHECK(innermapval_mapval, &iv->lock, &v->lock);
+
+#undef CHECK
+
+__noinline
+int global_subprog(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ ret += ctx->protocol;
+ return ret + ctx->mark;
+}
+
+__noinline
+static int static_subprog_call_global(struct __sk_buff *ctx)
+{
+ volatile int ret = 0;
+
+ if (ctx->protocol)
+ return ret;
+ return ret + ctx->len + global_subprog(ctx);
+}
+
+SEC("?tc")
+int lock_global_subprog_call1(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = global_subprog(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+SEC("?tc")
+int lock_global_subprog_call2(struct __sk_buff *ctx)
+{
+ int ret = 0;
+
+ bpf_spin_lock(&lockA);
+ if (ctx->mark == 42)
+ ret = static_subprog_call_global(ctx);
+ bpf_spin_unlock(&lockA);
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stack_var_off.c b/tools/testing/selftests/bpf/progs/test_stack_var_off.c
new file mode 100644
index 000000000000..665e6ae09d37
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_stack_var_off.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int probe_res;
+
+char input[4] = {};
+int test_pid;
+
+SEC("tracepoint/syscalls/sys_enter_nanosleep")
+int probe(void *ctx)
+{
+ /* This BPF program performs variable-offset reads and writes on a
+ * stack-allocated buffer.
+ */
+ char stack_buf[16];
+ unsigned long len;
+ unsigned long last;
+
+ if ((bpf_get_current_pid_tgid() >> 32) != test_pid)
+ return 0;
+
+ /* Copy the input to the stack. */
+ __builtin_memcpy(stack_buf, input, 4);
+
+ /* The first byte in the buffer indicates the length. */
+ len = stack_buf[0] & 0xf;
+ last = (len - 1) & 0xf;
+
+ /* Append something to the buffer. The offset where we write is not
+ * statically known; this is a variable-offset stack write.
+ */
+ stack_buf[len] = 42;
+
+ /* Index into the buffer at an unknown offset. This is a
+ * variable-offset stack read.
+ *
+ * Note that if it wasn't for the preceding variable-offset write, this
+ * read would be rejected because the stack slot cannot be verified as
+ * being initialized. With the preceding variable-offset write, the
+ * stack slot still cannot be verified, but the write inhibits the
+ * respective check on the reasoning that, if there was a
+ * variable-offset to a higher-or-equal spot, we're probably reading
+ * what we just wrote.
+ */
+ probe_res = stack_buf[last];
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
index 0cf0134631b4..0c4426592a26 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_build_id.c
@@ -28,8 +28,8 @@ struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 128);
__uint(map_flags, BPF_F_STACK_BUILD_ID);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(stack_trace_t));
+ __type(key, __u32);
+ __type(value, stack_trace_t);
} stackmap SEC(".maps");
struct {
@@ -39,16 +39,8 @@ struct {
__type(value, stack_trace_t);
} stack_amap SEC(".maps");
-/* taken from /sys/kernel/debug/tracing/events/random/urandom_read/format */
-struct random_urandom_args {
- unsigned long long pad;
- int got_bits;
- int pool_left;
- int input_left;
-};
-
-SEC("tracepoint/random/urandom_read")
-int oncpu(struct random_urandom_args *args)
+SEC("kprobe/urandom_read_iter")
+int oncpu(struct pt_regs *args)
{
__u32 max_len = sizeof(struct bpf_stack_build_id)
* PERF_MAX_STACK_DEPTH;
@@ -73,4 +65,3 @@ int oncpu(struct random_urandom_args *args)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
index 00ed48672620..47568007b668 100644
--- a/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
+++ b/tools/testing/selftests/bpf/progs/test_stacktrace_map.c
@@ -1,7 +1,7 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2018 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
#ifndef PERF_MAX_STACK_DEPTH
@@ -27,8 +27,8 @@ typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH];
struct {
__uint(type, BPF_MAP_TYPE_STACK_TRACE);
__uint(max_entries, 16384);
- __uint(key_size, sizeof(__u32));
- __uint(value_size, sizeof(stack_trace_t));
+ __type(key, __u32);
+ __type(value, stack_trace_t);
} stackmap SEC(".maps");
struct {
@@ -38,14 +38,14 @@ struct {
__type(value, stack_trace_t);
} stack_amap SEC(".maps");
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked1.c b/tools/testing/selftests/bpf/progs/test_static_linked1.c
new file mode 100644
index 000000000000..4f0b612e1661
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked1.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 8-byte aligned .data */
+static volatile long static_var1 = 2;
+static volatile int static_var2 = 3;
+int var1 = -1;
+/* 4-byte aligned .rodata */
+const volatile int rovar1;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 2;
+}
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ var1 = subprog(rovar1) + static_var1 + static_var2;
+
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
+int VERSION SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_static_linked2.c b/tools/testing/selftests/bpf/progs/test_static_linked2.c
new file mode 100644
index 000000000000..766ebd502a60
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_static_linked2.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* 4-byte aligned .data */
+static volatile int static_var1 = 5;
+static volatile int static_var2 = 6;
+int var2 = -1;
+/* 8-byte aligned .rodata */
+const volatile long rovar2;
+
+/* same "subprog" name in both files */
+static __noinline int subprog(int x)
+{
+ /* but different formula */
+ return x * 3;
+}
+
+SEC("raw_tp/sys_enter")
+int handler2(const void *ctx)
+{
+ var2 = subprog(rovar2) + static_var1 + static_var2;
+
+ return 0;
+}
+
+/* different name and/or type of the variable doesn't matter */
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs.c b/tools/testing/selftests/bpf/progs/test_subprogs.c
new file mode 100644
index 000000000000..a8d602d7c88a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs.c
@@ -0,0 +1,124 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} array SEC(".maps");
+
+__noinline int sub1(int x)
+{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
+ return x + 1;
+}
+
+static __noinline int sub5(int v);
+
+__noinline int sub2(int y)
+{
+ return sub5(y + 2);
+}
+
+static __noinline int sub3(int z)
+{
+ return z + 3 + sub1(4);
+}
+
+static __noinline int sub4(int w)
+{
+ int key = 0;
+
+ bpf_map_lookup_elem(&array, &key);
+ return w + sub3(5) + sub1(6);
+}
+
+/* sub5() is an identitify function, just to test weirder functions layout and
+ * call patterns
+ */
+static __noinline int sub5(int v)
+{
+ return sub1(v) - 1; /* compensates sub1()'s + 1 */
+}
+
+/* unfortunately verifier rejects `struct task_struct *t` as an unknown pointer
+ * type, so we need to accept pointer as integer and then cast it inside the
+ * function
+ */
+__noinline int get_task_tgid(uintptr_t t)
+{
+ /* this ensures that CO-RE relocs work in multi-subprogs .text */
+ return BPF_CORE_READ((struct task_struct *)(void *)t, tgid);
+}
+
+int res1 = 0;
+int res2 = 0;
+int res3 = 0;
+int res4 = 0;
+
+SEC("raw_tp/sys_enter")
+int prog1(void *ctx)
+{
+ /* perform some CO-RE relocations to ensure they work with multi-prog
+ * sections correctly
+ */
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res1 = sub1(1) + sub3(2); /* (1 + 1) + (2 + 3 + (4 + 1)) = 12 */
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int prog2(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res2 = sub2(3) + sub3(4); /* (3 + 2) + (4 + 3 + (4 + 1)) = 17 */
+ return 0;
+}
+
+static int empty_callback(__u32 index, void *data)
+{
+ return 0;
+}
+
+/* prog3 has the same section name as prog1 */
+SEC("raw_tp/sys_enter")
+int prog3(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ /* test that ld_imm64 with BPF_PSEUDO_FUNC doesn't get blinded */
+ bpf_loop(1, empty_callback, NULL, 0);
+
+ res3 = sub3(5) + 6; /* (5 + 3 + (4 + 1)) + 6 = 19 */
+ return 0;
+}
+
+/* prog4 has the same section name as prog2 */
+SEC("raw_tp/sys_exit")
+int prog4(void *ctx)
+{
+ struct task_struct *t = (void *)bpf_get_current_task();
+
+ if (!BPF_CORE_READ(t, pid) || !get_task_tgid((uintptr_t)t))
+ return 1;
+
+ res4 = sub4(7) + sub1(8); /* (7 + (5 + 3 + (4 + 1)) + (6 + 1)) + (8 + 1) = 36 */
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_extable.c b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
new file mode 100644
index 000000000000..e2a21fbd4e44
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_extable.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} test_array SEC(".maps");
+
+unsigned int triggered;
+
+static __u64 test_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return 1;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs2, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_return_ptr")
+int BPF_PROG(handle_fexit_ret_subprogs3, int arg, struct file *ret)
+{
+ *(volatile long *)ret;
+ *(volatile int *)&ret->f_mode;
+ bpf_for_each_map_elem(&test_array, test_cb, NULL, 0);
+ triggered++;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subprogs_unused.c b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
new file mode 100644
index 000000000000..bc49e050d342
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subprogs_unused.c
@@ -0,0 +1,21 @@
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+const char LICENSE[] SEC("license") = "GPL";
+
+__attribute__((unused)) __noinline int unused1(int x)
+{
+ return x + 1;
+}
+
+static __attribute__((unused)) __noinline int unused2(int x)
+{
+ return x + 2;
+}
+
+SEC("raw_tp/sys_enter")
+int main_prog(void *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton.c b/tools/testing/selftests/bpf/progs/test_subskeleton.c
new file mode 100644
index 000000000000..006417974372
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read, compiler may assume 0 otherwise */
+const volatile int rovar1;
+int out1;
+
+/* Override weak symbol in test_subskeleton_lib */
+int var5 = 5;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+extern int lib_routine(void);
+
+SEC("raw_tp/sys_enter")
+int handler1(const void *ctx)
+{
+ (void) CONFIG_BPF_SYSCALL;
+
+ out1 = lib_routine() * rovar1;
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
new file mode 100644
index 000000000000..ecfafe812c36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+/* volatile to force a read */
+const volatile int var1;
+volatile int var2 = 1;
+struct {
+ int var3_1;
+ __s64 var3_2;
+} var3;
+int libout1;
+
+extern volatile bool CONFIG_BPF_SYSCALL __kconfig;
+
+int var4[4];
+
+__weak int var5 SEC(".data");
+
+/* Fully contained within library extern-and-definition */
+extern int var6;
+
+int var7 SEC(".data.custom");
+
+int (*fn_ptr)(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map1 SEC(".maps");
+
+extern struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map2 SEC(".maps");
+
+int lib_routine(void)
+{
+ __u32 key = 1, value = 2;
+
+ (void) CONFIG_BPF_SYSCALL;
+ bpf_map_update_elem(&map2, &key, &value, BPF_ANY);
+
+ libout1 = var1 + var2 + var3.var3_1 + var3.var3_2 + var5 + var6;
+ return libout1;
+}
+
+SEC("perf_event")
+int lib_perf_handler(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
new file mode 100644
index 000000000000..80238486b7ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_subskeleton_lib2.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int var6 = 6;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 16);
+} map2 SEC(".maps");
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
index 458b0d69133e..7f74077d6622 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop1.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -18,11 +20,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
@@ -30,7 +32,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -59,7 +61,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
index b2e6f9b0894d..68a75436e8af 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_loop2.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
#ifndef ARRAY_SIZE
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
@@ -18,11 +20,11 @@
#define MAX_ULONG_STR_LEN 7
#define MAX_VALUE_STR_LEN (TCP_MEM_LOOPS * MAX_ULONG_STR_LEN)
+const char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
{
- volatile char tcp_mem_name[] = "net/ipv4/tcp_mem/very_very_very_very_long_pointless_string_to_stress_byte_loop";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
@@ -30,7 +32,7 @@ static __attribute__((noinline)) int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 50525235380e..efc3c61f7852 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -9,6 +9,8 @@
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
/* Max supported length of a string with unsigned long in base 10 (pow2 - 1). */
#define MAX_ULONG_STR_LEN 0xF
@@ -19,11 +21,11 @@
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
#endif
+const char tcp_mem_name[] = "net/ipv4/tcp_mem";
static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
{
- char tcp_mem_name[] = "net/ipv4/tcp_mem";
unsigned char i;
- char name[64];
+ char name[sizeof(tcp_mem_name)];
int ret;
memset(name, 0, sizeof(name));
@@ -31,7 +33,7 @@ static __always_inline int is_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret != sizeof(tcp_mem_name) - 1)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(tcp_mem_name); ++i)
if (name[i] != tcp_mem_name[i])
return 0;
@@ -57,7 +59,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
if (ret < 0 || ret >= MAX_VALUE_STR_LEN)
return 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < ARRAY_SIZE(tcp_mem); ++i) {
ret = bpf_strtoul(value + off, MAX_ULONG_STR_LEN, 0,
tcp_mem + i);
diff --git a/tools/testing/selftests/bpf/progs/test_task_pt_regs.c b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
new file mode 100644
index 000000000000..1926facba122
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_pt_regs.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+#define PT_REGS_SIZE sizeof(struct pt_regs)
+
+/*
+ * The kernel struct pt_regs isn't exported in its entirety to userspace.
+ * Pass it as an array to task_pt_regs.c
+ */
+char current_regs[PT_REGS_SIZE] = {};
+char ctx_regs[PT_REGS_SIZE] = {};
+int uprobe_res = 0;
+
+SEC("uprobe")
+int handle_uprobe(struct pt_regs *ctx)
+{
+ struct task_struct *current;
+ struct pt_regs *regs;
+
+ current = bpf_get_current_task_btf();
+ regs = (struct pt_regs *) bpf_task_pt_regs(current);
+ if (bpf_probe_read_kernel(current_regs, PT_REGS_SIZE, regs))
+ return 0;
+ if (bpf_probe_read_kernel(ctx_regs, PT_REGS_SIZE, ctx))
+ return 0;
+
+ /* Prove that uprobe was run */
+ uprobe_res = 1;
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
new file mode 100644
index 000000000000..7e750309ce27
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_task_under_cgroup.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Bytedance */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+#include "bpf_misc.h"
+
+struct cgroup *bpf_cgroup_from_id(u64 cgid) __ksym;
+long bpf_task_under_cgroup(struct task_struct *task, struct cgroup *ancestor) __ksym;
+void bpf_cgroup_release(struct cgroup *p) __ksym;
+struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym;
+void bpf_task_release(struct task_struct *p) __ksym;
+
+const volatile int local_pid;
+const volatile __u64 cgid;
+int remote_pid;
+
+SEC("tp_btf/task_newtask")
+int BPF_PROG(tp_btf_run, struct task_struct *task, u64 clone_flags)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *acquired;
+
+ if (local_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ acquired = bpf_task_acquire(task);
+ if (!acquired)
+ return 0;
+
+ if (local_pid == acquired->tgid)
+ goto out;
+
+ cgrp = bpf_cgroup_from_id(cgid);
+ if (!cgrp)
+ goto out;
+
+ if (bpf_task_under_cgroup(acquired, cgrp))
+ remote_pid = acquired->tgid;
+
+out:
+ if (cgrp)
+ bpf_cgroup_release(cgrp);
+ bpf_task_release(acquired);
+
+ return 0;
+}
+
+SEC("lsm.s/bpf")
+int BPF_PROG(lsm_run, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct cgroup *cgrp = NULL;
+ struct task_struct *task;
+ int ret = 0;
+
+ task = bpf_get_current_task_btf();
+ if (local_pid != task->pid)
+ return 0;
+
+ if (cmd != BPF_LINK_CREATE)
+ return 0;
+
+ /* 1 is the root cgroup */
+ cgrp = bpf_cgroup_from_id(1);
+ if (!cgrp)
+ goto out;
+ if (!bpf_task_under_cgroup(task, cgrp))
+ ret = -1;
+ bpf_cgroup_release(cgrp);
+
+out:
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_bpf.c b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
new file mode 100644
index 000000000000..ef7da419632a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_bpf.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+/* Dummy prog to test TC-BPF API */
+
+SEC("tc")
+int cls(struct __sk_buff *skb)
+{
+ return 0;
+}
+
+/* Prog to verify tc-bpf without cap_sys_admin and cap_perfmon */
+SEC("tcx/ingress")
+int pkt_ptr(struct __sk_buff *skb)
+{
+ struct iphdr *iph = (void *)(long)skb->data + sizeof(struct ethhdr);
+
+ if ((long)(iph + 1) > (long)skb->data_end)
+ return 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_dtime.c b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
new file mode 100644
index 000000000000..74ec09f040b7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_dtime.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2022 Meta
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* veth_src --- veth_src_fwd --- veth_det_fwd --- veth_dst
+ * | |
+ * ns_src | ns_fwd | ns_dst
+ *
+ * ns_src and ns_dst: ENDHOST namespace
+ * ns_fwd: Fowarding namespace
+ */
+
+#define ctx_ptr(field) (void *)(long)(field)
+
+#define ip4_src __bpf_htonl(0xac100164) /* 172.16.1.100 */
+#define ip4_dst __bpf_htonl(0xac100264) /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
+
+#define EGRESS_ENDHOST_MAGIC 0x0b9fbeef
+#define INGRESS_FWDNS_MAGIC 0x1b9fbeef
+#define EGRESS_FWDNS_MAGIC 0x2b9fbeef
+
+enum {
+ INGRESS_FWDNS_P100,
+ INGRESS_FWDNS_P101,
+ EGRESS_FWDNS_P100,
+ EGRESS_FWDNS_P101,
+ INGRESS_ENDHOST,
+ EGRESS_ENDHOST,
+ SET_DTIME,
+ __MAX_CNT,
+};
+
+enum {
+ TCP_IP6_CLEAR_DTIME,
+ TCP_IP4,
+ TCP_IP6,
+ UDP_IP4,
+ UDP_IP6,
+ TCP_IP4_RT_FWD,
+ TCP_IP6_RT_FWD,
+ UDP_IP4_RT_FWD,
+ UDP_IP6_RT_FWD,
+ UKN_TEST,
+ __NR_TESTS,
+};
+
+enum {
+ SRC_NS = 1,
+ DST_NS,
+};
+
+__u32 dtimes[__NR_TESTS][__MAX_CNT] = {};
+__u32 errs[__NR_TESTS][__MAX_CNT] = {};
+__u32 test = 0;
+
+static void inc_dtimes(__u32 idx)
+{
+ if (test < __NR_TESTS)
+ dtimes[test][idx]++;
+ else
+ dtimes[UKN_TEST][idx]++;
+}
+
+static void inc_errs(__u32 idx)
+{
+ if (test < __NR_TESTS)
+ errs[test][idx]++;
+ else
+ errs[UKN_TEST][idx]++;
+}
+
+static int skb_proto(int type)
+{
+ return type & 0xff;
+}
+
+static int skb_ns(int type)
+{
+ return (type >> 8) & 0xff;
+}
+
+static bool fwdns_clear_dtime(void)
+{
+ return test == TCP_IP6_CLEAR_DTIME;
+}
+
+static bool bpf_fwd(void)
+{
+ return test < TCP_IP4_RT_FWD;
+}
+
+static __u8 get_proto(void)
+{
+ switch (test) {
+ case UDP_IP4:
+ case UDP_IP6:
+ case UDP_IP4_RT_FWD:
+ case UDP_IP6_RT_FWD:
+ return IPPROTO_UDP;
+ default:
+ return IPPROTO_TCP;
+ }
+}
+
+/* -1: parse error: TC_ACT_SHOT
+ * 0: not testing traffic: TC_ACT_OK
+ * >0: first byte is the inet_proto, second byte has the netns
+ * of the sender
+ */
+static int skb_get_type(struct __sk_buff *skb)
+{
+ __u16 dst_ns_port = __bpf_htons(50000 + test);
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u8 inet_proto = 0, ns = 0;
+ struct ipv6hdr *ip6h;
+ __u16 sport, dport;
+ struct iphdr *iph;
+ struct tcphdr *th;
+ struct udphdr *uh;
+ void *trans;
+
+ switch (skb->protocol) {
+ case __bpf_htons(ETH_P_IP):
+ iph = data + sizeof(struct ethhdr);
+ if (iph + 1 > data_end)
+ return -1;
+ if (iph->saddr == ip4_src)
+ ns = SRC_NS;
+ else if (iph->saddr == ip4_dst)
+ ns = DST_NS;
+ inet_proto = iph->protocol;
+ trans = iph + 1;
+ break;
+ case __bpf_htons(ETH_P_IPV6):
+ ip6h = data + sizeof(struct ethhdr);
+ if (ip6h + 1 > data_end)
+ return -1;
+ if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_src}}))
+ ns = SRC_NS;
+ else if (v6_equal(ip6h->saddr, (struct in6_addr){{ip6_dst}}))
+ ns = DST_NS;
+ inet_proto = ip6h->nexthdr;
+ trans = ip6h + 1;
+ break;
+ default:
+ return 0;
+ }
+
+ /* skb is not from src_ns or dst_ns.
+ * skb is not the testing IPPROTO.
+ */
+ if (!ns || inet_proto != get_proto())
+ return 0;
+
+ switch (inet_proto) {
+ case IPPROTO_TCP:
+ th = trans;
+ if (th + 1 > data_end)
+ return -1;
+ sport = th->source;
+ dport = th->dest;
+ break;
+ case IPPROTO_UDP:
+ uh = trans;
+ if (uh + 1 > data_end)
+ return -1;
+ sport = uh->source;
+ dport = uh->dest;
+ break;
+ default:
+ return 0;
+ }
+
+ /* The skb is the testing traffic */
+ if ((ns == SRC_NS && dport == dst_ns_port) ||
+ (ns == DST_NS && sport == dst_ns_port))
+ return (ns << 8 | inet_proto);
+
+ return 0;
+}
+
+/* format: direction@iface@netns
+ * egress@veth_(src|dst)@ns_(src|dst)
+ */
+SEC("tc")
+int egress_host(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ if (skb_proto(skb_type) == IPPROTO_TCP) {
+ if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
+ skb->tstamp)
+ inc_dtimes(EGRESS_ENDHOST);
+ else
+ inc_errs(EGRESS_ENDHOST);
+ } else {
+ if (skb->tstamp_type == BPF_SKB_TSTAMP_UNSPEC &&
+ skb->tstamp)
+ inc_dtimes(EGRESS_ENDHOST);
+ else
+ inc_errs(EGRESS_ENDHOST);
+ }
+
+ skb->tstamp = EGRESS_ENDHOST_MAGIC;
+
+ return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)@ns_(src|dst) */
+SEC("tc")
+int ingress_host(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO &&
+ skb->tstamp == EGRESS_FWDNS_MAGIC)
+ inc_dtimes(INGRESS_ENDHOST);
+ else
+ inc_errs(INGRESS_ENDHOST);
+
+ return TC_ACT_OK;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int ingress_fwdns_prio100(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ /* delivery_time is only available to the ingress
+ * if the tc-bpf checks the skb->tstamp_type.
+ */
+ if (skb->tstamp == EGRESS_ENDHOST_MAGIC)
+ inc_errs(INGRESS_FWDNS_P100);
+
+ if (fwdns_clear_dtime())
+ skb->tstamp = 0;
+
+ return TC_ACT_UNSPEC;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 100 */
+SEC("tc")
+int egress_fwdns_prio100(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1)
+ return TC_ACT_SHOT;
+ if (!skb_type)
+ return TC_ACT_OK;
+
+ /* delivery_time is always available to egress even
+ * the tc-bpf did not use the tstamp_type.
+ */
+ if (skb->tstamp == INGRESS_FWDNS_MAGIC)
+ inc_dtimes(EGRESS_FWDNS_P100);
+ else
+ inc_errs(EGRESS_FWDNS_P100);
+
+ if (fwdns_clear_dtime())
+ skb->tstamp = 0;
+
+ return TC_ACT_UNSPEC;
+}
+
+/* ingress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int ingress_fwdns_prio101(struct __sk_buff *skb)
+{
+ __u64 expected_dtime = EGRESS_ENDHOST_MAGIC;
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1 || !skb_type)
+ /* Should have handled in prio100 */
+ return TC_ACT_SHOT;
+
+ if (skb_proto(skb_type) == IPPROTO_UDP)
+ expected_dtime = 0;
+
+ if (skb->tstamp_type) {
+ if (fwdns_clear_dtime() ||
+ skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
+ skb->tstamp != expected_dtime)
+ inc_errs(INGRESS_FWDNS_P101);
+ else
+ inc_dtimes(INGRESS_FWDNS_P101);
+ } else {
+ if (!fwdns_clear_dtime() && expected_dtime)
+ inc_errs(INGRESS_FWDNS_P101);
+ }
+
+ if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
+ skb->tstamp = INGRESS_FWDNS_MAGIC;
+ } else {
+ if (bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+ BPF_SKB_TSTAMP_DELIVERY_MONO))
+ inc_errs(SET_DTIME);
+ if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+ BPF_SKB_TSTAMP_UNSPEC))
+ inc_errs(SET_DTIME);
+ }
+
+ if (skb_ns(skb_type) == SRC_NS)
+ return bpf_fwd() ?
+ bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0) : TC_ACT_OK;
+ else
+ return bpf_fwd() ?
+ bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0) : TC_ACT_OK;
+}
+
+/* egress@veth_(src|dst)_fwd@ns_fwd priority 101 */
+SEC("tc")
+int egress_fwdns_prio101(struct __sk_buff *skb)
+{
+ int skb_type;
+
+ skb_type = skb_get_type(skb);
+ if (skb_type == -1 || !skb_type)
+ /* Should have handled in prio100 */
+ return TC_ACT_SHOT;
+
+ if (skb->tstamp_type) {
+ if (fwdns_clear_dtime() ||
+ skb->tstamp_type != BPF_SKB_TSTAMP_DELIVERY_MONO ||
+ skb->tstamp != INGRESS_FWDNS_MAGIC)
+ inc_errs(EGRESS_FWDNS_P101);
+ else
+ inc_dtimes(EGRESS_FWDNS_P101);
+ } else {
+ if (!fwdns_clear_dtime())
+ inc_errs(EGRESS_FWDNS_P101);
+ }
+
+ if (skb->tstamp_type == BPF_SKB_TSTAMP_DELIVERY_MONO) {
+ skb->tstamp = EGRESS_FWDNS_MAGIC;
+ } else {
+ if (bpf_skb_set_tstamp(skb, EGRESS_FWDNS_MAGIC,
+ BPF_SKB_TSTAMP_DELIVERY_MONO))
+ inc_errs(SET_DTIME);
+ if (!bpf_skb_set_tstamp(skb, INGRESS_FWDNS_MAGIC,
+ BPF_SKB_TSTAMP_UNSPEC))
+ inc_errs(SET_DTIME);
+ }
+
+ return TC_ACT_OK;
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_edt.c b/tools/testing/selftests/bpf/progs/test_tc_edt.c
index bf28814bfde5..950a70b61e74 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_edt.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_edt.c
@@ -17,12 +17,12 @@
#define THROTTLE_RATE_BPS (5 * 1000 * 1000)
/* flow_key => last_tstamp timestamp used */
-struct bpf_map_def SEC("maps") flow_map = {
- .type = BPF_MAP_TYPE_HASH,
- .key_size = sizeof(uint32_t),
- .value_size = sizeof(uint64_t),
- .max_entries = 1,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, uint32_t);
+ __type(value, uint64_t);
+ __uint(max_entries, 1);
+} flow_map SEC(".maps");
static inline int throttle_flow(struct __sk_buff *skb)
{
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
new file mode 100644
index 000000000000..992400acb957
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Isovalent */
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+bool seen_tc1;
+bool seen_tc2;
+bool seen_tc3;
+bool seen_tc4;
+bool seen_tc5;
+bool seen_tc6;
+bool seen_eth;
+
+SEC("tc/ingress")
+int tc1(struct __sk_buff *skb)
+{
+ struct ethhdr eth = {};
+
+ if (skb->protocol != __bpf_constant_htons(ETH_P_IP))
+ goto out;
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)))
+ goto out;
+ seen_eth = eth.h_proto == bpf_htons(ETH_P_IP);
+out:
+ seen_tc1 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc2(struct __sk_buff *skb)
+{
+ seen_tc2 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc3(struct __sk_buff *skb)
+{
+ seen_tc3 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc4(struct __sk_buff *skb)
+{
+ seen_tc4 = true;
+ return TCX_NEXT;
+}
+
+SEC("tc/egress")
+int tc5(struct __sk_buff *skb)
+{
+ seen_tc5 = true;
+ return TCX_PASS;
+}
+
+SEC("tc/egress")
+int tc6(struct __sk_buff *skb)
+{
+ seen_tc6 = true;
+ return TCX_PASS;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
new file mode 100644
index 000000000000..de15155f2609
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stddef.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define ip4_src 0xac100164 /* 172.16.1.100 */
+#define ip4_dst 0xac100264 /* 172.16.2.100 */
+
+#define ip6_src { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x01, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+#define ip6_dst { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, \
+ 0x00, 0x02, 0xde, 0xad, 0xbe, 0xef, 0xca, 0xfe }
+
+#ifndef v6_equal
+# define v6_equal(a, b) (a.s6_addr32[0] == b.s6_addr32[0] && \
+ a.s6_addr32[1] == b.s6_addr32[1] && \
+ a.s6_addr32[2] == b.s6_addr32[2] && \
+ a.s6_addr32[3] == b.s6_addr32[3])
+#endif
+
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
+
+static __always_inline bool is_remote_ep_v4(struct __sk_buff *skb,
+ __be32 addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return false;
+
+ return ip4h->daddr == addr;
+}
+
+static __always_inline bool is_remote_ep_v6(struct __sk_buff *skb,
+ struct in6_addr addr)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return false;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return false;
+
+ return v6_equal(ip6h->daddr, addr);
+}
+
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_src));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_src}});
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(IFINDEX_SRC, NULL, 0, 0);
+}
+
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
+{
+ __u8 zero[ETH_ALEN * 2];
+ bool redirect = false;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ redirect = is_remote_ep_v4(skb, __bpf_constant_htonl(ip4_dst));
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ redirect = is_remote_ep_v6(skb, (struct in6_addr){{ip6_dst}});
+ break;
+ }
+
+ if (!redirect)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_neigh(IFINDEX_DST, NULL, 0, 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
new file mode 100644
index 000000000000..ec4cce19362d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+#include <stddef.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#ifndef ctx_ptr
+# define ctx_ptr(field) (void *)(long)(field)
+#endif
+
+#define AF_INET 2
+#define AF_INET6 10
+
+static __always_inline int fill_fib_params_v4(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct iphdr *ip4h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip4h = (struct iphdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip4h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET;
+ fib_params->tos = ip4h->tos;
+ fib_params->l4_protocol = ip4h->protocol;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip4h->tot_len);
+ fib_params->ipv4_src = ip4h->saddr;
+ fib_params->ipv4_dst = ip4h->daddr;
+
+ return 0;
+}
+
+static __always_inline int fill_fib_params_v6(struct __sk_buff *skb,
+ struct bpf_fib_lookup *fib_params)
+{
+ struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src;
+ struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst;
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ struct ipv6hdr *ip6h;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return -1;
+
+ ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr));
+ if ((void *)(ip6h + 1) > data_end)
+ return -1;
+
+ fib_params->family = AF_INET6;
+ fib_params->flowinfo = 0;
+ fib_params->l4_protocol = ip6h->nexthdr;
+ fib_params->sport = 0;
+ fib_params->dport = 0;
+ fib_params->tot_len = bpf_ntohs(ip6h->payload_len);
+ *src = ip6h->saddr;
+ *dst = ip6h->daddr;
+
+ return 0;
+}
+
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
+{
+ void *data_end = ctx_ptr(skb->data_end);
+ void *data = ctx_ptr(skb->data);
+ __u32 *raw = data;
+
+ if (data + sizeof(struct ethhdr) > data_end)
+ return TC_ACT_SHOT;
+
+ return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK;
+}
+
+static __always_inline int tc_redir(struct __sk_buff *skb)
+{
+ struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex };
+ __u8 zero[ETH_ALEN * 2];
+ int ret = -1;
+
+ switch (skb->protocol) {
+ case __bpf_constant_htons(ETH_P_IP):
+ ret = fill_fib_params_v4(skb, &fib_params);
+ break;
+ case __bpf_constant_htons(ETH_P_IPV6):
+ ret = fill_fib_params_v6(skb, &fib_params);
+ break;
+ }
+
+ if (ret)
+ return TC_ACT_OK;
+
+ ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0);
+ if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0)
+ return TC_ACT_OK;
+
+ __builtin_memset(&zero, 0, sizeof(zero));
+ if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0)
+ return TC_ACT_SHOT;
+
+ if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) {
+ struct bpf_redir_neigh nh_params = {};
+
+ nh_params.nh_family = fib_params.family;
+ __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst,
+ sizeof(nh_params.ipv6_nh));
+
+ return bpf_redirect_neigh(fib_params.ifindex, &nh_params,
+ sizeof(nh_params), 0);
+
+ } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) {
+ void *data_end = ctx_ptr(skb->data_end);
+ struct ethhdr *eth = ctx_ptr(skb->data);
+
+ if (eth + 1 > data_end)
+ return TC_ACT_SHOT;
+
+ __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN);
+ __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN);
+
+ return bpf_redirect(fib_params.ifindex, 0);
+ }
+
+ return TC_ACT_SHOT;
+}
+
+/* these are identical, but keep them separate for compatibility with the
+ * section names expected by test_tc_redirect.sh
+ */
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
+{
+ return tc_redir(skb);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_peer.c b/tools/testing/selftests/bpf/progs/test_tc_peer.c
new file mode 100644
index 000000000000..365eacb5dc34
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tc_peer.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <linux/bpf.h>
+#include <linux/stddef.h>
+#include <linux/pkt_cls.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+
+#include <bpf/bpf_helpers.h>
+
+volatile const __u32 IFINDEX_SRC;
+volatile const __u32 IFINDEX_DST;
+
+static const __u8 src_mac[] = {0x00, 0x11, 0x22, 0x33, 0x44, 0x55};
+static const __u8 dst_mac[] = {0x00, 0x22, 0x33, 0x44, 0x55, 0x66};
+
+SEC("tc")
+int tc_chk(struct __sk_buff *skb)
+{
+ return TC_ACT_SHOT;
+}
+
+SEC("tc")
+int tc_dst(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_SRC, 0);
+}
+
+SEC("tc")
+int tc_src(struct __sk_buff *skb)
+{
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
+SEC("tc")
+int tc_dst_l3(struct __sk_buff *skb)
+{
+ return bpf_redirect(IFINDEX_SRC, 0);
+}
+
+SEC("tc")
+int tc_src_l3(struct __sk_buff *skb)
+{
+ __u16 proto = skb->protocol;
+
+ if (bpf_skb_change_head(skb, ETH_HLEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, 0, &src_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN, &dst_mac, ETH_ALEN, 0) != 0)
+ return TC_ACT_SHOT;
+
+ if (bpf_skb_store_bytes(skb, ETH_ALEN + ETH_ALEN, &proto, sizeof(__u16), 0) != 0)
+ return TC_ACT_SHOT;
+
+ return bpf_redirect_peer(IFINDEX_DST, 0);
+}
+
+char __license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
index 37bce7a7c394..404124a93892 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_tunnel.c
@@ -19,19 +19,41 @@
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include "bpf_compiler.h"
+
+#pragma GCC diagnostic ignored "-Waddress-of-packed-member"
static const int cfg_port = 8000;
static const int cfg_udp_src = 20000;
+#define L2_PAD_SZ (sizeof(struct vxlanhdr) + ETH_HLEN)
+
#define UDP_PORT 5555
#define MPLS_OVER_UDP_PORT 6635
#define ETH_OVER_UDP_PORT 7777
+#define VXLAN_UDP_PORT 8472
+
+#define EXTPROTO_VXLAN 0x1
+
+#define VXLAN_N_VID (1u << 24)
+#define VXLAN_VNI_MASK bpf_htonl((VXLAN_N_VID - 1) << 8)
+#define VXLAN_FLAGS 0x8
+#define VXLAN_VNI 1
+
+#ifndef NEXTHDR_DEST
+#define NEXTHDR_DEST 60
+#endif
/* MPLS label 1000 with S bit (last label) set and ttl of 255. */
static const __u32 mpls_label = __bpf_constant_htonl(1000 << 12 |
MPLS_LS_S_MASK | 0xff);
+struct vxlanhdr {
+ __be32 vx_flags;
+ __be32 vx_vni;
+} __attribute__((packed));
+
struct gre_hdr {
__be16 flags;
__be16 protocol;
@@ -45,13 +67,13 @@ union l4hdr {
struct v4hdr {
struct iphdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
struct v6hdr {
struct ipv6hdr ip;
union l4hdr l4hdr;
- __u8 pad[16]; /* enough space for L2 header */
+ __u8 pad[L2_PAD_SZ]; /* space for L2 header / vxlan header ... */
} __attribute__((packed));
static __always_inline void set_ipv4_csum(struct iphdr *iph)
@@ -62,21 +84,22 @@ static __always_inline void set_ipv4_csum(struct iphdr *iph)
iph->check = 0;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0, csum = 0; i < sizeof(*iph) >> 1; i++)
csum += *iph16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
}
-static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
- __u16 l2_proto)
+static __always_inline int __encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
{
__u16 udp_dst = UDP_PORT;
struct iphdr iph_inner;
struct v4hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
int tcp_off;
__u64 flags;
@@ -141,7 +164,11 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -171,14 +198,26 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
+
break;
}
olen += l2_len;
@@ -214,14 +253,21 @@ static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
-static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+static __always_inline int encap_ipv4(struct __sk_buff *skb, __u8 encap_proto,
__u16 l2_proto)
{
+ return __encap_ipv4(skb, encap_proto, l2_proto, 0);
+}
+
+static __always_inline int __encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto, __u16 ext_proto)
+{
__u16 udp_dst = UDP_PORT;
struct ipv6hdr iph_inner;
struct v6hdr h_outer;
struct tcphdr tcph;
int olen, l2_len;
+ __u8 *l2_hdr = NULL;
__u16 tot_len;
__u64 flags;
@@ -249,7 +295,11 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
break;
case ETH_P_TEB:
l2_len = ETH_HLEN;
- udp_dst = ETH_OVER_UDP_PORT;
+ if (ext_proto & EXTPROTO_VXLAN) {
+ udp_dst = VXLAN_UDP_PORT;
+ l2_len += sizeof(struct vxlanhdr);
+ } else
+ udp_dst = ETH_OVER_UDP_PORT;
break;
}
flags |= BPF_F_ADJ_ROOM_ENCAP_L2(l2_len);
@@ -267,7 +317,7 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
h_outer.l4hdr.udp.source = __bpf_constant_htons(cfg_udp_src);
h_outer.l4hdr.udp.dest = bpf_htons(udp_dst);
tot_len = bpf_ntohs(iph_inner.payload_len) + sizeof(iph_inner) +
- sizeof(h_outer.l4hdr.udp);
+ sizeof(h_outer.l4hdr.udp) + l2_len;
h_outer.l4hdr.udp.check = 0;
h_outer.l4hdr.udp.len = bpf_htons(tot_len);
break;
@@ -278,13 +328,24 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
}
/* add L2 encap (if specified) */
+ l2_hdr = (__u8 *)&h_outer + olen;
switch (l2_proto) {
case ETH_P_MPLS_UC:
- *((__u32 *)((__u8 *)&h_outer + olen)) = mpls_label;
+ *(__u32 *)l2_hdr = mpls_label;
break;
case ETH_P_TEB:
- if (bpf_skb_load_bytes(skb, 0, (__u8 *)&h_outer + olen,
- ETH_HLEN))
+ flags |= BPF_F_ADJ_ROOM_ENCAP_L2_ETH;
+
+ if (ext_proto & EXTPROTO_VXLAN) {
+ struct vxlanhdr *vxlan_hdr = (struct vxlanhdr *)l2_hdr;
+
+ vxlan_hdr->vx_flags = VXLAN_FLAGS;
+ vxlan_hdr->vx_vni = bpf_htonl((VXLAN_VNI & VXLAN_VNI_MASK) << 8);
+
+ l2_hdr += sizeof(struct vxlanhdr);
+ }
+
+ if (bpf_skb_load_bytes(skb, 0, l2_hdr, ETH_HLEN))
return TC_ACT_SHOT;
break;
}
@@ -309,6 +370,67 @@ static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
return TC_ACT_OK;
}
+static int encap_ipv6_ipip6(struct __sk_buff *skb)
+{
+ struct iphdr iph_inner;
+ struct v6hdr h_outer;
+ struct tcphdr tcph;
+ struct ethhdr eth;
+ __u64 flags;
+ int olen;
+
+ if (bpf_skb_load_bytes(skb, ETH_HLEN, &iph_inner,
+ sizeof(iph_inner)) < 0)
+ return TC_ACT_OK;
+
+ /* filter only packets we want */
+ if (bpf_skb_load_bytes(skb, ETH_HLEN + (iph_inner.ihl << 2),
+ &tcph, sizeof(tcph)) < 0)
+ return TC_ACT_OK;
+
+ if (tcph.dest != __bpf_constant_htons(cfg_port))
+ return TC_ACT_OK;
+
+ olen = sizeof(h_outer.ip);
+
+ flags = BPF_F_ADJ_ROOM_FIXED_GSO | BPF_F_ADJ_ROOM_ENCAP_L3_IPV6;
+
+ /* add room between mac and network header */
+ if (bpf_skb_adjust_room(skb, olen, BPF_ADJ_ROOM_MAC, flags))
+ return TC_ACT_SHOT;
+
+ /* prepare new outer network header */
+ memset(&h_outer.ip, 0, sizeof(h_outer.ip));
+ h_outer.ip.version = 6;
+ h_outer.ip.hop_limit = iph_inner.ttl;
+ h_outer.ip.saddr.s6_addr[1] = 0xfd;
+ h_outer.ip.saddr.s6_addr[15] = 1;
+ h_outer.ip.daddr.s6_addr[1] = 0xfd;
+ h_outer.ip.daddr.s6_addr[15] = 2;
+ h_outer.ip.payload_len = iph_inner.tot_len;
+ h_outer.ip.nexthdr = IPPROTO_IPIP;
+
+ /* store new outer network header */
+ if (bpf_skb_store_bytes(skb, ETH_HLEN, &h_outer, olen,
+ BPF_F_INVALIDATE_HASH) < 0)
+ return TC_ACT_SHOT;
+
+ /* update eth->h_proto */
+ if (bpf_skb_load_bytes(skb, 0, &eth, sizeof(eth)) < 0)
+ return TC_ACT_SHOT;
+ eth.h_proto = bpf_htons(ETH_P_IPV6);
+ if (bpf_skb_store_bytes(skb, 0, &eth, sizeof(eth), 0) < 0)
+ return TC_ACT_SHOT;
+
+ return TC_ACT_OK;
+}
+
+static __always_inline int encap_ipv6(struct __sk_buff *skb, __u8 encap_proto,
+ __u16 l2_proto)
+{
+ return __encap_ipv6(skb, encap_proto, l2_proto, 0);
+}
+
SEC("encap_ipip_none")
int __encap_ipip_none(struct __sk_buff *skb)
{
@@ -372,6 +494,17 @@ int __encap_udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_vxlan_eth")
+int __encap_vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return __encap_ipv4(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_sit_none")
int __encap_sit_none(struct __sk_buff *skb)
{
@@ -390,6 +523,15 @@ int __encap_ip6tnl_none(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_ipip6_none")
+int __encap_ipip6_none(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IP))
+ return encap_ipv6_ipip6(skb);
+ else
+ return TC_ACT_OK;
+}
+
SEC("encap_ip6gre_none")
int __encap_ip6gre_none(struct __sk_buff *skb)
{
@@ -444,16 +586,46 @@ int __encap_ip6udp_eth(struct __sk_buff *skb)
return TC_ACT_OK;
}
+SEC("encap_ip6vxlan_eth")
+int __encap_ip6vxlan_eth(struct __sk_buff *skb)
+{
+ if (skb->protocol == __bpf_constant_htons(ETH_P_IPV6))
+ return __encap_ipv6(skb, IPPROTO_UDP,
+ ETH_P_TEB,
+ EXTPROTO_VXLAN);
+ else
+ return TC_ACT_OK;
+}
+
static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
{
- char buf[sizeof(struct v6hdr)];
+ __u64 flags = BPF_F_ADJ_ROOM_FIXED_GSO;
+ struct ipv6_opt_hdr ip6_opt_hdr;
struct gre_hdr greh;
struct udphdr udph;
int olen = len;
switch (proto) {
case IPPROTO_IPIP:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ break;
case IPPROTO_IPV6:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ break;
+ case NEXTHDR_DEST:
+ if (bpf_skb_load_bytes(skb, off + len, &ip6_opt_hdr,
+ sizeof(ip6_opt_hdr)) < 0)
+ return TC_ACT_OK;
+ switch (ip6_opt_hdr.nexthdr) {
+ case IPPROTO_IPIP:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV4;
+ break;
+ case IPPROTO_IPV6:
+ flags |= BPF_F_ADJ_ROOM_DECAP_L3_IPV6;
+ break;
+ default:
+ return TC_ACT_OK;
+ }
break;
case IPPROTO_GRE:
olen += sizeof(struct gre_hdr);
@@ -479,14 +651,16 @@ static int decap_internal(struct __sk_buff *skb, int off, int len, char proto)
case ETH_OVER_UDP_PORT:
olen += ETH_HLEN;
break;
+ case VXLAN_UDP_PORT:
+ olen += ETH_HLEN + sizeof(struct vxlanhdr);
+ break;
}
break;
default:
return TC_ACT_OK;
}
- if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC,
- BPF_F_ADJ_ROOM_FIXED_GSO))
+ if (bpf_skb_adjust_room(skb, -olen, BPF_ADJ_ROOM_MAC, flags))
return TC_ACT_SHOT;
return TC_ACT_OK;
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
index 47cbe2eeae43..6edebce563b5 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_check_syncookie_kern.c
@@ -16,12 +16,12 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
-struct bpf_map_def SEC("maps") results = {
- .type = BPF_MAP_TYPE_ARRAY,
- .key_size = sizeof(__u32),
- .value_size = sizeof(__u32),
- .max_entries = 3,
-};
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 3);
+} results SEC(".maps");
static __always_inline __s64 gen_syncookie(void *data_end, struct bpf_sock *sk,
void *iph, __u32 ip_size,
@@ -148,7 +148,7 @@ release:
bpf_sk_release(sk);
}
-SEC("clsact/check_syncookie")
+SEC("tc")
int check_syncookie_clsact(struct __sk_buff *skb)
{
check_syncookie(skb, (void *)(long)skb->data,
@@ -156,7 +156,7 @@ int check_syncookie_clsact(struct __sk_buff *skb)
return TC_ACT_OK;
}
-SEC("xdp/check_syncookie")
+SEC("xdp")
int check_syncookie_xdp(struct xdp_md *ctx)
{
check_syncookie(ctx, (void *)(long)ctx->data,
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
new file mode 100644
index 000000000000..c8e4553648bf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c
@@ -0,0 +1,595 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "bpf_tracing_net.h"
+#include "bpf_kfuncs.h"
+#include "test_siphash.h"
+#include "test_tcp_custom_syncookie.h"
+
+#define MAX_PACKET_OFF 0xffff
+
+/* Hash is calculated for each client and split into ISN and TS.
+ *
+ * MSB LSB
+ * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 |
+ * | Hash_1 | MSS | ECN | SACK | WScale |
+ *
+ * TS: | 31 ... 8 | 7 ... 0 |
+ * | Random | Hash_2 |
+ */
+#define COOKIE_BITS 8
+#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1)
+
+enum {
+ /* 0xf is invalid thus means that SYN did not have WScale. */
+ BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1,
+ BPF_SYNCOOKIE_SACK = (1 << 4),
+ BPF_SYNCOOKIE_ECN = (1 << 5),
+};
+
+#define MSS_LOCAL_IPV4 65495
+#define MSS_LOCAL_IPV6 65476
+
+const __u16 msstab4[] = {
+ 536,
+ 1300,
+ 1460,
+ MSS_LOCAL_IPV4,
+};
+
+const __u16 msstab6[] = {
+ 1280 - 60, /* IPV6_MIN_MTU - 60 */
+ 1480 - 60,
+ 9000 - 60,
+ MSS_LOCAL_IPV6,
+};
+
+static siphash_key_t test_key_siphash = {
+ { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL }
+};
+
+struct tcp_syncookie {
+ struct __sk_buff *skb;
+ void *data;
+ void *data_end;
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __be32 *ptr32;
+ struct bpf_tcp_req_attrs attrs;
+ u32 off;
+ u32 cookie;
+ u64 first;
+};
+
+bool handled_syn, handled_ack;
+
+static int tcp_load_headers(struct tcp_syncookie *ctx)
+{
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+
+ if (ctx->eth + 1 > ctx->data_end)
+ goto err;
+
+ switch (bpf_ntohs(ctx->eth->h_proto)) {
+ case ETH_P_IP:
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+
+ if (ctx->ipv4 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4)
+ goto err;
+
+ if (ctx->ipv4->version != 4)
+ goto err;
+
+ if (ctx->ipv4->protocol != IPPROTO_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ break;
+ case ETH_P_IPV6:
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+
+ if (ctx->ipv6 + 1 > ctx->data_end)
+ goto err;
+
+ if (ctx->ipv6->version != 6)
+ goto err;
+
+ if (ctx->ipv6->nexthdr != NEXTHDR_TCP)
+ goto err;
+
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ break;
+ default:
+ goto err;
+ }
+
+ if (ctx->tcp + 1 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_reload_headers(struct tcp_syncookie *ctx)
+{
+ /* Without volatile,
+ * R3 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 data_len = ctx->skb->data_end - ctx->skb->data;
+
+ if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4)
+ goto err;
+
+ /* Needed to calculate csum and parse TCP options. */
+ if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0))
+ goto err;
+
+ ctx->data = (void *)(long)ctx->skb->data;
+ ctx->data_end = (void *)(long)ctx->skb->data_end;
+ ctx->eth = (struct ethhdr *)(long)ctx->skb->data;
+ if (ctx->ipv4) {
+ ctx->ipv4 = (struct iphdr *)(ctx->eth + 1);
+ ctx->ipv6 = NULL;
+ ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1);
+ } else {
+ ctx->ipv4 = NULL;
+ ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1);
+ ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1);
+ }
+
+ if ((void *)ctx->tcp + 60 > ctx->data_end)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum)
+{
+ return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr,
+ ctx->tcp->doff * 4, IPPROTO_TCP, csum);
+}
+
+static int tcp_validate_header(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_reload_headers(ctx))
+ goto err;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ /* check tcp_v4_csum(csum) is 0 if not on lo. */
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (csum_fold(csum) != 0)
+ goto err;
+ } else if (ctx->ipv6) {
+ /* check tcp_v6_csum(csum) is 0 if not on lo. */
+ }
+
+ return 0;
+err:
+ return -1;
+}
+
+static __always_inline void *next(struct tcp_syncookie *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx)
+{
+ __u8 *opcode, *opsize, *wscale;
+ __u32 *tsval, *tsecr;
+ __u16 *mss;
+ __u32 off;
+
+ off = ctx->off;
+ opcode = next(ctx, 1);
+ if (!opcode)
+ goto stop;
+
+ if (*opcode == TCPOPT_EOL)
+ goto stop;
+
+ if (*opcode == TCPOPT_NOP)
+ goto next;
+
+ opsize = next(ctx, 1);
+ if (!opsize)
+ goto stop;
+
+ if (*opsize < 2)
+ goto stop;
+
+ switch (*opcode) {
+ case TCPOPT_MSS:
+ mss = next(ctx, 2);
+ if (*opsize == TCPOLEN_MSS && ctx->tcp->syn && mss)
+ ctx->attrs.mss = get_unaligned_be16(mss);
+ break;
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (*opsize == TCPOLEN_WINDOW && ctx->tcp->syn && wscale) {
+ ctx->attrs.wscale_ok = 1;
+ ctx->attrs.snd_wscale = *wscale;
+ }
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsval = next(ctx, 4);
+ tsecr = next(ctx, 4);
+ if (*opsize == TCPOLEN_TIMESTAMP && tsval && tsecr) {
+ ctx->attrs.rcv_tsval = get_unaligned_be32(tsval);
+ ctx->attrs.rcv_tsecr = get_unaligned_be32(tsecr);
+
+ if (ctx->tcp->syn && ctx->attrs.rcv_tsecr)
+ ctx->attrs.tstamp_ok = 0;
+ else
+ ctx->attrs.tstamp_ok = 1;
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn)
+ ctx->attrs.sack_ok = 1;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+next:
+ return 0;
+stop:
+ return 1;
+}
+
+static void tcp_parse_options(struct tcp_syncookie *ctx)
+{
+ ctx->off = (__u8 *)(ctx->tcp + 1) - (__u8 *)ctx->data,
+
+ bpf_loop(40, tcp_parse_option, ctx, 0);
+}
+
+static int tcp_validate_sysctl(struct tcp_syncookie *ctx)
+{
+ if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) ||
+ (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6))
+ goto err;
+
+ if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7)
+ goto err;
+
+ if (!ctx->attrs.tstamp_ok)
+ goto err;
+
+ if (!ctx->attrs.sack_ok)
+ goto err;
+
+ if (!ctx->tcp->ece || !ctx->tcp->cwr)
+ goto err;
+
+ return 0;
+err:
+ return -1;
+}
+
+static void tcp_prepare_cookie(struct tcp_syncookie *ctx)
+{
+ u32 seq = bpf_ntohl(ctx->tcp->seq);
+ u64 first = 0, second;
+ int mssind = 0;
+ u32 hash;
+
+ if (ctx->ipv4) {
+ for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab4[mssind])
+ break;
+
+ ctx->attrs.mss = msstab4[mssind];
+
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ } else if (ctx->ipv6) {
+ for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--)
+ if (ctx->attrs.mss >= msstab6[mssind])
+ break;
+
+ ctx->attrs.mss = msstab6[mssind];
+
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+ }
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok) {
+ ctx->attrs.rcv_tsecr = bpf_get_prandom_u32();
+ ctx->attrs.rcv_tsecr &= ~COOKIE_MASK;
+ ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK;
+ }
+
+ hash &= ~COOKIE_MASK;
+ hash |= mssind << 6;
+
+ if (ctx->attrs.wscale_ok)
+ hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK;
+
+ if (ctx->attrs.sack_ok)
+ hash |= BPF_SYNCOOKIE_SACK;
+
+ if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr)
+ hash |= BPF_SYNCOOKIE_ECN;
+
+ ctx->cookie = hash;
+}
+
+static void tcp_write_options(struct tcp_syncookie *ctx)
+{
+ ctx->ptr32 = (__be32 *)(ctx->tcp + 1);
+
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 |
+ ctx->attrs.mss);
+
+ if (ctx->attrs.wscale_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_WINDOW << 16 |
+ TCPOLEN_WINDOW << 8 |
+ ctx->attrs.snd_wscale);
+
+ if (ctx->attrs.tstamp_ok) {
+ if (ctx->attrs.sack_ok)
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 |
+ TCPOLEN_SACK_PERM << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+ else
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_TIMESTAMP << 8 |
+ TCPOLEN_TIMESTAMP);
+
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr);
+ *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval);
+ } else if (ctx->attrs.sack_ok) {
+ *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 |
+ TCPOPT_NOP << 16 |
+ TCPOPT_SACK_PERM << 8 |
+ TCPOLEN_SACK_PERM);
+ }
+}
+
+static int tcp_handle_syn(struct tcp_syncookie *ctx)
+{
+ s64 csum;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_sysctl(ctx))
+ goto err;
+
+ tcp_prepare_cookie(ctx);
+ tcp_write_options(ctx);
+
+ swap(ctx->tcp->source, ctx->tcp->dest);
+ ctx->tcp->check = 0;
+ ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1);
+ ctx->tcp->seq = bpf_htonl(ctx->cookie);
+ ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2;
+ ctx->tcp->ack = 1;
+ if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr)
+ ctx->tcp->ece = 0;
+ ctx->tcp->cwr = 0;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0);
+ if (csum < 0)
+ goto err;
+
+ if (ctx->ipv4) {
+ swap(ctx->ipv4->saddr, ctx->ipv4->daddr);
+ ctx->tcp->check = tcp_v4_csum(ctx, csum);
+
+ ctx->ipv4->check = 0;
+ ctx->ipv4->tos = 0;
+ ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4);
+ ctx->ipv4->id = 0;
+ ctx->ipv4->ttl = 64;
+
+ csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0);
+ if (csum < 0)
+ goto err;
+
+ ctx->ipv4->check = csum_fold(csum);
+ } else if (ctx->ipv6) {
+ swap(ctx->ipv6->saddr, ctx->ipv6->daddr);
+ ctx->tcp->check = tcp_v6_csum(ctx, csum);
+
+ *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000);
+ ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp);
+ ctx->ipv6->hop_limit = 64;
+ }
+
+ swap_array(ctx->eth->h_source, ctx->eth->h_dest);
+
+ if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0))
+ goto err;
+
+ return bpf_redirect(ctx->skb->ifindex, 0);
+err:
+ return TC_ACT_SHOT;
+}
+
+static int tcp_validate_cookie(struct tcp_syncookie *ctx)
+{
+ u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1;
+ u32 seq = bpf_ntohl(ctx->tcp->seq) - 1;
+ u64 first = 0, second;
+ int mssind;
+ u32 hash;
+
+ if (ctx->ipv4)
+ first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr;
+ else if (ctx->ipv6)
+ first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 |
+ ctx->ipv6->daddr.in6_u.u6_addr32[0];
+
+ second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest;
+ hash = siphash_2u64(first, second, &test_key_siphash);
+
+ if (ctx->attrs.tstamp_ok)
+ hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK;
+ else
+ hash &= ~COOKIE_MASK;
+
+ hash -= cookie & ~COOKIE_MASK;
+ if (hash)
+ goto err;
+
+ mssind = (cookie & (3 << 6)) >> 6;
+ if (ctx->ipv4) {
+ if (mssind > ARRAY_SIZE(msstab4))
+ goto err;
+
+ ctx->attrs.mss = msstab4[mssind];
+ } else {
+ if (mssind > ARRAY_SIZE(msstab6))
+ goto err;
+
+ ctx->attrs.mss = msstab6[mssind];
+ }
+
+ ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale;
+ ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK;
+ ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK;
+ ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN;
+
+ return 0;
+err:
+ return -1;
+}
+
+static int tcp_handle_ack(struct tcp_syncookie *ctx)
+{
+ struct bpf_sock_tuple tuple;
+ struct bpf_sock *skc;
+ int ret = TC_ACT_OK;
+ struct sock *sk;
+ u32 tuple_size;
+
+ if (ctx->ipv4) {
+ tuple.ipv4.saddr = ctx->ipv4->saddr;
+ tuple.ipv4.daddr = ctx->ipv4->daddr;
+ tuple.ipv4.sport = ctx->tcp->source;
+ tuple.ipv4.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv4);
+ } else if (ctx->ipv6) {
+ __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr));
+ __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr));
+ tuple.ipv6.sport = ctx->tcp->source;
+ tuple.ipv6.dport = ctx->tcp->dest;
+ tuple_size = sizeof(tuple.ipv6);
+ } else {
+ goto out;
+ }
+
+ skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0);
+ if (!skc)
+ goto out;
+
+ if (skc->state != TCP_LISTEN)
+ goto release;
+
+ sk = (struct sock *)bpf_skc_to_tcp_sock(skc);
+ if (!sk)
+ goto err;
+
+ if (tcp_validate_header(ctx))
+ goto err;
+
+ tcp_parse_options(ctx);
+
+ if (tcp_validate_cookie(ctx))
+ goto err;
+
+ ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs));
+ if (ret < 0)
+ goto err;
+
+release:
+ bpf_sk_release(skc);
+out:
+ return ret;
+
+err:
+ ret = TC_ACT_SHOT;
+ goto release;
+}
+
+SEC("tc")
+int tcp_custom_syncookie(struct __sk_buff *skb)
+{
+ struct tcp_syncookie ctx = {
+ .skb = skb,
+ };
+
+ if (tcp_load_headers(&ctx))
+ return TC_ACT_OK;
+
+ if (ctx.tcp->rst)
+ return TC_ACT_OK;
+
+ if (ctx.tcp->syn) {
+ if (ctx.tcp->ack)
+ return TC_ACT_OK;
+
+ handled_syn = true;
+
+ return tcp_handle_syn(&ctx);
+ }
+
+ handled_ack = true;
+
+ return tcp_handle_ack(&ctx);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
new file mode 100644
index 000000000000..29a6a53cf229
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#ifndef _TEST_TCP_SYNCOOKIE_H
+#define _TEST_TCP_SYNCOOKIE_H
+
+#define __packed __attribute__((__packed__))
+#define __force
+
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+
+#define swap(a, b) \
+ do { \
+ typeof(a) __tmp = (a); \
+ (a) = (b); \
+ (b) = __tmp; \
+ } while (0)
+
+#define swap_array(a, b) \
+ do { \
+ typeof(a) __tmp[sizeof(a)]; \
+ __builtin_memcpy(__tmp, a, sizeof(a)); \
+ __builtin_memcpy(a, b, sizeof(a)); \
+ __builtin_memcpy(b, __tmp, sizeof(a)); \
+ } while (0)
+
+/* asm-generic/unaligned.h */
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+static inline u16 get_unaligned_be16(const void *p)
+{
+ return bpf_ntohs(__get_unaligned_t(__be16, p));
+}
+
+static inline u32 get_unaligned_be32(const void *p)
+{
+ return bpf_ntohl(__get_unaligned_t(__be32, p));
+}
+
+/* lib/checksum.c */
+static inline u32 from64to32(u64 x)
+{
+ /* add up 32-bit and 32-bit for 32+c bit */
+ x = (x & 0xffffffff) + (x >> 32);
+ /* add up carry.. */
+ x = (x & 0xffffffff) + (x >> 32);
+ return (u32)x;
+}
+
+static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto, __wsum sum)
+{
+ unsigned long long s = (__force u32)sum;
+
+ s += (__force u32)saddr;
+ s += (__force u32)daddr;
+#ifdef __BIG_ENDIAN
+ s += proto + len;
+#else
+ s += (proto + len) << 8;
+#endif
+ return (__force __wsum)from64to32(s);
+}
+
+/* asm-generic/checksum.h */
+static inline __sum16 csum_fold(__wsum csum)
+{
+ u32 sum = (__force u32)csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (__force __sum16)~sum;
+}
+
+static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len,
+ __u8 proto, __wsum sum)
+{
+ return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum));
+}
+
+/* net/ipv6/ip6_checksum.c */
+static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __wsum csum)
+{
+ int carry;
+ __u32 ulen;
+ __u32 uproto;
+ __u32 sum = (__force u32)csum;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)saddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[0];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[1];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[2];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]);
+ sum += carry;
+
+ sum += (__force u32)daddr->in6_u.u6_addr32[3];
+ carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]);
+ sum += carry;
+
+ ulen = (__force u32)bpf_htonl((__u32)len);
+ sum += ulen;
+ carry = (sum < ulen);
+ sum += carry;
+
+ uproto = (__force u32)bpf_htonl(proto);
+ sum += uproto;
+ carry = (sum < uproto);
+ sum += carry;
+
+ return csum_fold((__force __wsum)sum);
+}
+#endif
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_estats.c b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
index adc83a54c352..e2ae049c2f85 100644
--- a/tools/testing/selftests/bpf/progs/test_tcp_estats.c
+++ b/tools/testing/selftests/bpf/progs/test_tcp_estats.c
@@ -244,7 +244,7 @@ static __always_inline void send_basic_event(struct sock *sk,
bpf_map_update_elem(&ev_record_map, &key, &ev, BPF_ANY);
}
-SEC("dummy_tracepoint")
+SEC("tp/dummy/tracepoint")
int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
{
if (!arg->sock)
@@ -255,4 +255,3 @@ int _dummy_tracepoint(struct dummy_tracepoint_args *arg)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
new file mode 100644
index 000000000000..5f4e87ee949a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_tcp_hdr_options.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <stddef.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <linux/tcp.h>
+#include <linux/socket.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#define BPF_PROG_TEST_TCP_HDR_OPTIONS
+#include "test_tcp_hdr_options.h"
+
+#ifndef sizeof_field
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+__u8 test_kind = TCPOPT_EXP;
+__u16 test_magic = 0xeB9F;
+__u32 inherit_cb_flags = 0;
+
+struct bpf_test_option passive_synack_out = {};
+struct bpf_test_option passive_fin_out = {};
+
+struct bpf_test_option passive_estab_in = {};
+struct bpf_test_option passive_fin_in = {};
+
+struct bpf_test_option active_syn_out = {};
+struct bpf_test_option active_fin_out = {};
+
+struct bpf_test_option active_estab_in = {};
+struct bpf_test_option active_fin_in = {};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, struct hdr_stg);
+} hdr_stg_map SEC(".maps");
+
+static bool skops_want_cookie(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_SYNACK_COOKIE;
+}
+
+static bool skops_current_mss(const struct bpf_sock_ops *skops)
+{
+ return skops->args[0] == BPF_WRITE_HDR_TCP_CURRENT_MSS;
+}
+
+static __u8 option_total_len(__u8 flags)
+{
+ __u8 i, len = 1; /* +1 for flags */
+
+ if (!flags)
+ return 0;
+
+ /* RESEND bit does not use a byte */
+ for (i = OPTION_RESEND + 1; i < __NR_OPTION_FLAGS; i++)
+ len += !!TEST_OPTION_FLAGS(flags, i);
+
+ if (test_kind == TCPOPT_EXP)
+ return len + TCP_BPF_EXPOPT_BASE_LEN;
+ else
+ return len + 2; /* +1 kind, +1 kind-len */
+}
+
+static void write_test_option(const struct bpf_test_option *test_opt,
+ __u8 *data)
+{
+ __u8 offset = 0;
+
+ data[offset++] = test_opt->flags;
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_MAX_DELACK_MS))
+ data[offset++] = test_opt->max_delack_ms;
+
+ if (TEST_OPTION_FLAGS(test_opt->flags, OPTION_RAND))
+ data[offset++] = test_opt->rand;
+}
+
+static int store_option(struct bpf_sock_ops *skops,
+ const struct bpf_test_option *test_opt)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } write_opt;
+ int err;
+
+ if (test_kind == TCPOPT_EXP) {
+ write_opt.exprm.kind = TCPOPT_EXP;
+ write_opt.exprm.len = option_total_len(test_opt->flags);
+ write_opt.exprm.magic = __bpf_htons(test_magic);
+ write_opt.exprm.data32 = 0;
+ write_test_option(test_opt, write_opt.exprm.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.exprm,
+ sizeof(write_opt.exprm), 0);
+ } else {
+ write_opt.regular.kind = test_kind;
+ write_opt.regular.len = option_total_len(test_opt->flags);
+ write_opt.regular.data32 = 0;
+ write_test_option(test_opt, write_opt.regular.data);
+ err = bpf_store_hdr_opt(skops, &write_opt.regular,
+ sizeof(write_opt.regular), 0);
+ }
+
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int parse_test_option(struct bpf_test_option *opt, const __u8 *start)
+{
+ opt->flags = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_MAX_DELACK_MS))
+ opt->max_delack_ms = *start++;
+
+ if (TEST_OPTION_FLAGS(opt->flags, OPTION_RAND))
+ opt->rand = *start++;
+
+ return 0;
+}
+
+static int load_option(struct bpf_sock_ops *skops,
+ struct bpf_test_option *test_opt, bool from_syn)
+{
+ union {
+ struct tcp_exprm_opt exprm;
+ struct tcp_opt regular;
+ } search_opt;
+ int ret, load_flags = from_syn ? BPF_LOAD_HDR_OPT_TCP_SYN : 0;
+
+ if (test_kind == TCPOPT_EXP) {
+ search_opt.exprm.kind = TCPOPT_EXP;
+ search_opt.exprm.len = 4;
+ search_opt.exprm.magic = __bpf_htons(test_magic);
+ search_opt.exprm.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.exprm,
+ sizeof(search_opt.exprm), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.exprm.data);
+ } else {
+ search_opt.regular.kind = test_kind;
+ search_opt.regular.len = 0;
+ search_opt.regular.data32 = 0;
+ ret = bpf_load_hdr_opt(skops, &search_opt.regular,
+ sizeof(search_opt.regular), load_flags);
+ if (ret < 0)
+ return ret;
+ return parse_test_option(test_opt, search_opt.regular.data);
+ }
+}
+
+static int synack_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option test_opt = {};
+ __u8 optlen;
+ int err;
+
+ if (!passive_synack_out.flags)
+ return CG_OK;
+
+ err = load_option(skops, &test_opt, true);
+
+ /* bpf_test_option is not found */
+ if (err == -ENOMSG)
+ return CG_OK;
+
+ if (err)
+ RET_CG_ERR(err);
+
+ optlen = option_total_len(passive_synack_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_synack_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option opt;
+
+ if (!passive_synack_out.flags)
+ /* We should not even be called since no header
+ * space has been reserved.
+ */
+ RET_CG_ERR(0);
+
+ opt = passive_synack_out;
+ if (skops_want_cookie(skops))
+ SET_OPTION_FLAGS(opt.flags, OPTION_RESEND);
+
+ return store_option(skops, &opt);
+}
+
+static int syn_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 optlen;
+ int err;
+
+ if (!active_syn_out.flags)
+ return CG_OK;
+
+ optlen = option_total_len(active_syn_out.flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_syn_opt(struct bpf_sock_ops *skops)
+{
+ if (!active_syn_out.flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, &active_syn_out);
+}
+
+static int fin_opt_len(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+ __u8 optlen;
+ int err;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ optlen = option_total_len(opt->flags);
+ if (optlen) {
+ err = bpf_reserve_hdr_opt(skops, optlen, 0);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int write_fin_opt(struct bpf_sock_ops *skops)
+{
+ struct bpf_test_option *opt;
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->active)
+ opt = &active_fin_out;
+ else
+ opt = &passive_fin_out;
+
+ if (!opt->flags)
+ RET_CG_ERR(0);
+
+ return store_option(skops, opt);
+}
+
+static int resend_in_ack(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+
+ if (!skops->sk)
+ return -1;
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ return -1;
+
+ return !!hdr_stg->resend_syn;
+}
+
+static int nodata_opt_len(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return syn_opt_len(skops);
+
+ return CG_OK;
+}
+
+static int write_nodata_opt(struct bpf_sock_ops *skops)
+{
+ int resend;
+
+ resend = resend_in_ack(skops);
+ if (resend < 0)
+ RET_CG_ERR(0);
+
+ if (resend)
+ return write_syn_opt(skops);
+
+ return CG_OK;
+}
+
+static int data_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Same as the nodata version. Mostly to show
+ * an example usage on skops->skb_len.
+ */
+ return nodata_opt_len(skops);
+}
+
+static int write_data_opt(struct bpf_sock_ops *skops)
+{
+ return write_nodata_opt(skops);
+}
+
+static int current_mss_opt_len(struct bpf_sock_ops *skops)
+{
+ /* Reserve maximum that may be needed */
+ int err;
+
+ err = bpf_reserve_hdr_opt(skops, option_total_len(OPTION_MASK), 0);
+ if (err)
+ RET_CG_ERR(err);
+
+ return CG_OK;
+}
+
+static int handle_hdr_opt_len(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return synack_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return syn_opt_len(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return fin_opt_len(skops);
+
+ if (skops_current_mss(skops))
+ /* The kernel is calculating the MSS */
+ return current_mss_opt_len(skops);
+
+ if (skops->skb_len)
+ return data_opt_len(skops);
+
+ return nodata_opt_len(skops);
+}
+
+static int handle_write_hdr_opt(struct bpf_sock_ops *skops)
+{
+ __u8 tcp_flags = skops_tcp_flags(skops);
+ struct tcphdr *th;
+
+ if ((tcp_flags & TCPHDR_SYNACK) == TCPHDR_SYNACK)
+ return write_synack_opt(skops);
+
+ if (tcp_flags & TCPHDR_SYN)
+ return write_syn_opt(skops);
+
+ if (tcp_flags & TCPHDR_FIN)
+ return write_fin_opt(skops);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (skops->skb_len > tcp_hdrlen(th))
+ return write_data_opt(skops);
+
+ return write_nodata_opt(skops);
+}
+
+static int set_delack_max(struct bpf_sock_ops *skops, __u8 max_delack_ms)
+{
+ __u32 max_delack_us = max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_DELACK_MAX,
+ &max_delack_us, sizeof(max_delack_us));
+}
+
+static int set_rto_min(struct bpf_sock_ops *skops, __u8 peer_max_delack_ms)
+{
+ __u32 min_rto_us = peer_max_delack_ms * 1000;
+
+ return bpf_setsockopt(skops, SOL_TCP, TCP_BPF_RTO_MIN, &min_rto_us,
+ sizeof(min_rto_us));
+}
+
+static int handle_active_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {
+ .active = true,
+ };
+ int err;
+
+ err = load_option(skops, &active_estab_in, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ init_stg.resend_syn = TEST_OPTION_FLAGS(active_estab_in.flags,
+ OPTION_RESEND);
+ if (!skops->sk || !bpf_sk_storage_get(&hdr_stg_map, skops->sk,
+ &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (init_stg.resend_syn)
+ /* Don't clear the write_hdr cb now because
+ * the ACK may get lost and retransmit may
+ * be needed.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn if this
+ * resend_syn option has received by the peer.
+ *
+ * The header option will be resent until a valid
+ * packet is received at handle_parse_hdr()
+ * and all hdr cb flags will be cleared in
+ * handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ else if (!active_fin_out.flags)
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+
+ if (active_syn_out.max_delack_ms) {
+ err = set_delack_max(skops, active_syn_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (active_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, active_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_passive_estab(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg init_stg = {};
+ struct tcphdr *th;
+ int err;
+
+ inherit_cb_flags = skops->bpf_sock_ops_cb_flags;
+
+ err = load_option(skops, &passive_estab_in, true);
+ if (err == -ENOENT) {
+ /* saved_syn is not found. It was in syncookie mode.
+ * We have asked the active side to resend the options
+ * in ACK, so try to find the bpf_test_option from ACK now.
+ */
+ err = load_option(skops, &passive_estab_in, false);
+ init_stg.syncookie = true;
+ }
+
+ /* ENOMSG: The bpf_test_option is not found which is fine.
+ * Bail out now for all other errors.
+ */
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ if (th->syn) {
+ /* Fastopen */
+
+ /* Cannot clear cb_flags to stop write_hdr cb.
+ * synack is not sent yet for fast open.
+ * Even it was, the synack may need to be retransmitted.
+ *
+ * PARSE_ALL_HDR cb flag is set to learn
+ * if synack has reached the peer.
+ * All cb_flags will be cleared in handle_parse_hdr().
+ */
+ set_parse_all_hdr_cb_flags(skops);
+ init_stg.fastopen = true;
+ } else if (!passive_fin_out.flags) {
+ /* No options will be written from now */
+ clear_hdr_cb_flags(skops);
+ }
+
+ if (!skops->sk ||
+ !bpf_sk_storage_get(&hdr_stg_map, skops->sk, &init_stg,
+ BPF_SK_STORAGE_GET_F_CREATE))
+ RET_CG_ERR(0);
+
+ if (passive_synack_out.max_delack_ms) {
+ err = set_delack_max(skops, passive_synack_out.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ if (passive_estab_in.max_delack_ms) {
+ err = set_rto_min(skops, passive_estab_in.max_delack_ms);
+ if (err)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+static int handle_parse_hdr(struct bpf_sock_ops *skops)
+{
+ struct hdr_stg *hdr_stg;
+ struct tcphdr *th;
+
+ if (!skops->sk)
+ RET_CG_ERR(0);
+
+ th = skops->skb_data;
+ if (th + 1 > skops->skb_data_end)
+ RET_CG_ERR(0);
+
+ hdr_stg = bpf_sk_storage_get(&hdr_stg_map, skops->sk, NULL, 0);
+ if (!hdr_stg)
+ RET_CG_ERR(0);
+
+ if (hdr_stg->resend_syn || hdr_stg->fastopen)
+ /* The PARSE_ALL_HDR cb flag was turned on
+ * to ensure that the previously written
+ * options have reached the peer.
+ * Those previously written option includes:
+ * - Active side: resend_syn in ACK during syncookie
+ * or
+ * - Passive side: SYNACK during fastopen
+ *
+ * A valid packet has been received here after
+ * the 3WHS, so the PARSE_ALL_HDR cb flag
+ * can be cleared now.
+ */
+ clear_parse_all_hdr_cb_flags(skops);
+
+ if (hdr_stg->resend_syn && !active_fin_out.flags)
+ /* Active side resent the syn option in ACK
+ * because the server was in syncookie mode.
+ * A valid packet has been received, so
+ * clear header cb flags if there is no
+ * more option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (hdr_stg->fastopen && !passive_fin_out.flags)
+ /* Passive side was in fastopen.
+ * A valid packet has been received, so
+ * the SYNACK has reached the peer.
+ * Clear header cb flags if there is no more
+ * option to send.
+ */
+ clear_hdr_cb_flags(skops);
+
+ if (th->fin) {
+ struct bpf_test_option *fin_opt;
+ int err;
+
+ if (hdr_stg->active)
+ fin_opt = &active_fin_in;
+ else
+ fin_opt = &passive_fin_in;
+
+ err = load_option(skops, fin_opt, false);
+ if (err && err != -ENOMSG)
+ RET_CG_ERR(err);
+ }
+
+ return CG_OK;
+}
+
+SEC("sockops")
+int estab(struct bpf_sock_ops *skops)
+{
+ int true_val = 1;
+
+ switch (skops->op) {
+ case BPF_SOCK_OPS_TCP_LISTEN_CB:
+ bpf_setsockopt(skops, SOL_TCP, TCP_SAVE_SYN,
+ &true_val, sizeof(true_val));
+ set_hdr_cb_flags(skops, BPF_SOCK_OPS_STATE_CB_FLAG);
+ break;
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ set_hdr_cb_flags(skops, 0);
+ break;
+ case BPF_SOCK_OPS_PARSE_HDR_OPT_CB:
+ return handle_parse_hdr(skops);
+ case BPF_SOCK_OPS_HDR_OPT_LEN_CB:
+ return handle_hdr_opt_len(skops);
+ case BPF_SOCK_OPS_WRITE_HDR_OPT_CB:
+ return handle_write_hdr_opt(skops);
+ case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+ return handle_passive_estab(skops);
+ case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+ return handle_active_estab(skops);
+ }
+
+ return CG_OK;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
index 1f1966e86e9f..a3f3f43fc195 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c
@@ -12,81 +12,101 @@
#include <linux/tcp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_tcp_helpers.h"
#include "test_tcpbpf.h"
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 4);
- __type(key, __u32);
- __type(value, struct tcpbpf_globals);
-} global_map SEC(".maps");
-
-struct {
- __uint(type, BPF_MAP_TYPE_ARRAY);
- __uint(max_entries, 2);
- __type(key, __u32);
- __type(value, int);
-} sockopt_results SEC(".maps");
-
-static inline void update_event_map(int event)
+struct tcpbpf_globals global = {};
+
+/**
+ * SOL_TCP is defined in <netinet/tcp.h> while
+ * TCP_SAVED_SYN is defined in already included <linux/tcp.h>
+ */
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+static __always_inline int get_tp_window_clamp(struct bpf_sock_ops *skops)
{
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (gp == NULL) {
- struct tcpbpf_globals g = {0};
-
- g.event_map |= (1 << event);
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- } else {
- g = *gp;
- g.event_map |= (1 << event);
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- }
-}
+ struct bpf_sock *sk;
+ struct tcp_sock *tp;
-int _version SEC("version") = 1;
+ sk = skops->sk;
+ if (!sk)
+ return -1;
+ tp = bpf_skc_to_tcp_sock(sk);
+ if (!tp)
+ return -1;
+ return tp->window_clamp;
+}
SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
char header[sizeof(struct ipv6hdr) + sizeof(struct tcphdr)];
+ struct bpf_sock_ops *reuse = skops;
struct tcphdr *thdr;
- int good_call_rv = 0;
- int bad_call_rv = 0;
+ int window_clamp = 9216;
int save_syn = 1;
int rv = -1;
int v = 0;
int op;
+ /* Test reading fields in bpf_sock_ops using single register */
+ asm volatile (
+ "%[reuse] = *(u32 *)(%[reuse] +96)"
+ : [reuse] "+r"(reuse)
+ :);
+
+ asm volatile (
+ "%[op] = *(u32 *)(%[skops] +96)"
+ : [op] "=r"(op)
+ : [skops] "r"(skops)
+ :);
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r8 = *(u32 *)(r9 +164);\n"
+ "*(u32 *)(r9 +164) = r8;\n"
+ :: [skops] "r"(skops)
+ : "r9", "r8");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r1 = *(u64 *)(r1 +184);\n"
+ "if r1 == 0 goto +1;\n"
+ "r1 = *(u32 *)(r1 +4);\n"
+ :: [skops] "r"(skops):"r1");
+
+ asm volatile (
+ "r9 = %[skops];\n"
+ "r9 = *(u64 *)(r9 +184);\n"
+ "if r9 == 0 goto +1;\n"
+ "r9 = *(u32 *)(r9 +4);\n"
+ :: [skops] "r"(skops):"r9");
+
+ asm volatile (
+ "r1 = %[skops];\n"
+ "r2 = *(u64 *)(r1 +184);\n"
+ "if r2 == 0 goto +1;\n"
+ "r2 = *(u32 *)(r2 +4);\n"
+ :: [skops] "r"(skops):"r1", "r2");
+
op = (int) skops->op;
- update_event_map(op);
+ global.event_map |= (1 << op);
switch (op) {
+ case BPF_SOCK_OPS_TCP_CONNECT_CB:
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+ &window_clamp, sizeof(window_clamp));
+ global.window_clamp_client = get_tp_window_clamp(skops);
+ break;
case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
/* Test failure to set largest cb flag (assumes not defined) */
- bad_call_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
+ global.bad_cb_test_rv = bpf_sock_ops_cb_flags_set(skops, 0x80);
/* Set callback */
- good_call_rv = bpf_sock_ops_cb_flags_set(skops,
+ global.good_cb_test_rv = bpf_sock_ops_cb_flags_set(skops,
BPF_SOCK_OPS_STATE_CB_FLAG);
- /* Update results */
- {
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (!gp)
- break;
- g = *gp;
- g.bad_cb_test_rv = bad_call_rv;
- g.good_cb_test_rv = good_call_rv;
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
- }
break;
case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
skops->sk_txhash = 0x12345f;
@@ -102,12 +122,14 @@ int bpf_testcb(struct bpf_sock_ops *skops)
thdr = (struct tcphdr *)(header + offset);
v = thdr->syn;
- __u32 key = 1;
- bpf_map_update_elem(&sockopt_results, &key, &v,
- BPF_ANY);
+ global.tcp_saved_syn = v;
}
}
+ rv = bpf_setsockopt(skops, SOL_TCP, TCP_WINDOW_CLAMP,
+ &window_clamp, sizeof(window_clamp));
+
+ global.window_clamp_server = get_tp_window_clamp(skops);
break;
case BPF_SOCK_OPS_RTO_CB:
break;
@@ -115,25 +137,16 @@ int bpf_testcb(struct bpf_sock_ops *skops)
break;
case BPF_SOCK_OPS_STATE_CB:
if (skops->args[1] == BPF_TCP_CLOSE) {
- __u32 key = 0;
- struct tcpbpf_globals g, *gp;
-
- gp = bpf_map_lookup_elem(&global_map, &key);
- if (!gp)
- break;
- g = *gp;
if (skops->args[0] == BPF_TCP_LISTEN) {
- g.num_listen++;
+ global.num_listen++;
} else {
- g.total_retrans = skops->total_retrans;
- g.data_segs_in = skops->data_segs_in;
- g.data_segs_out = skops->data_segs_out;
- g.bytes_received = skops->bytes_received;
- g.bytes_acked = skops->bytes_acked;
+ global.total_retrans = skops->total_retrans;
+ global.data_segs_in = skops->data_segs_in;
+ global.data_segs_out = skops->data_segs_out;
+ global.bytes_received = skops->bytes_received;
+ global.bytes_acked = skops->bytes_acked;
}
- g.num_close_events++;
- bpf_map_update_elem(&global_map, &key, &g,
- BPF_ANY);
+ global.num_close_events++;
}
break;
case BPF_SOCK_OPS_TCP_LISTEN_CB:
@@ -141,9 +154,7 @@ int bpf_testcb(struct bpf_sock_ops *skops)
v = bpf_setsockopt(skops, IPPROTO_TCP, TCP_SAVE_SYN,
&save_syn, sizeof(save_syn));
/* Update global map w/ result of setsock opt */
- __u32 key = 0;
-
- bpf_map_update_elem(&sockopt_results, &key, &v, BPF_ANY);
+ global.tcp_save_syn = v;
break;
default:
rv = -1;
diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
index ac63410bb541..540181c115a8 100644
--- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c
@@ -24,12 +24,10 @@ struct {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
__uint(max_entries, 2);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(__u32));
+ __type(key, int);
+ __type(value, __u32);
} perf_event_map SEC(".maps");
-int _version SEC("version") = 1;
-
SEC("sockops")
int bpf_testcb(struct bpf_sock_ops *skops)
{
diff --git a/tools/testing/selftests/bpf/progs/test_time_tai.c b/tools/testing/selftests/bpf/progs/test_time_tai.c
new file mode 100644
index 000000000000..7ea0863f3ddb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_time_tai.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2022 Linutronix GmbH */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("tc")
+int time_tai(struct __sk_buff *skb)
+{
+ __u64 ts1, ts2;
+
+ /* Get TAI timestamps */
+ ts1 = bpf_ktime_get_tai_ns();
+ ts2 = bpf_ktime_get_tai_ns();
+
+ /* Save TAI timestamps (Note: skb->hwtstamp is read-only) */
+ skb->tstamp = ts1;
+ skb->cb[0] = ts2 & 0xffffffff;
+ skb->cb[1] = ts2 >> 32;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext.c b/tools/testing/selftests/bpf/progs/test_trace_ext.c
new file mode 100644
index 000000000000..d19a634d0e78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2019 Facebook
+#include <linux/bpf.h>
+#include <stdbool.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 ext_called = 0;
+
+SEC("freplace/test_pkt_md_access")
+int test_pkt_md_access_new(struct __sk_buff *skb)
+{
+ ext_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
new file mode 100644
index 000000000000..52f3baf98f20
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_trace_ext_tracing.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+__u64 fentry_called = 0;
+
+SEC("fentry/test_pkt_md_access_new")
+int BPF_PROG(fentry, struct sk_buff *skb)
+{
+ fentry_called = skb->len;
+ return 0;
+}
+
+__u64 fexit_called = 0;
+
+SEC("fexit/test_pkt_md_access_new")
+int BPF_PROG(fexit, struct sk_buff *skb)
+{
+ fexit_called = skb->len;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_tracepoint.c b/tools/testing/selftests/bpf/progs/test_tracepoint.c
index 4b825ee122cf..4cb8bbb6a320 100644
--- a/tools/testing/selftests/bpf/progs/test_tracepoint.c
+++ b/tools/testing/selftests/bpf/progs/test_tracepoint.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2017 Facebook
-#include <linux/bpf.h>
+#include <vmlinux.h>
#include <bpf/bpf_helpers.h>
-/* taken from /sys/kernel/debug/tracing/events/sched/sched_switch/format */
+/* taken from /sys/kernel/tracing/events/sched/sched_switch/format */
struct sched_switch_args {
unsigned long long pad;
- char prev_comm[16];
+ char prev_comm[TASK_COMM_LEN];
int prev_pid;
int prev_prio;
long long prev_state;
- char next_comm[16];
+ char next_comm[TASK_COMM_LEN];
int next_pid;
int next_prio;
};
@@ -23,4 +23,3 @@ int oncpu(struct sched_switch_args *ctx)
}
char _license[] SEC("license") = "GPL";
-__u32 _version SEC("version") = 1; /* ignored by tracepoints, required by libbpf.a */
diff --git a/tools/testing/selftests/bpf/progs/test_trampoline_count.c b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
index f030e469d05b..7765720da7d5 100644
--- a/tools/testing/selftests/bpf/progs/test_trampoline_count.c
+++ b/tools/testing/selftests/bpf/progs/test_trampoline_count.c
@@ -1,20 +1,22 @@
// SPDX-License-Identifier: GPL-2.0
-#include <stdbool.h>
-#include <stddef.h>
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
-struct task_struct;
+SEC("fentry/bpf_modify_return_test")
+int BPF_PROG(fentry_test, int a, int *b)
+{
+ return 0;
+}
-SEC("fentry/__set_task_comm")
-int BPF_PROG(prog1, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fmod_ret/bpf_modify_return_test")
+int BPF_PROG(fmod_ret_test, int a, int *b, int ret)
{
return 0;
}
-SEC("fexit/__set_task_comm")
-int BPF_PROG(prog2, struct task_struct *tsk, const char *buf, bool exec)
+SEC("fexit/bpf_modify_return_test")
+int BPF_PROG(fexit_test, int a, int *b, int ret)
{
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
index f48dbfe24ddc..3e436e6f7312 100644
--- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c
@@ -6,45 +6,44 @@
* modify it under the terms of version 2 of the GNU General Public
* License as published by the Free Software Foundation.
*/
-#include <stddef.h>
-#include <string.h>
-#include <arpa/inet.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/types.h>
-#include <linux/tcp.h>
-#include <linux/socket.h>
-#include <linux/pkt_cls.h>
-#include <linux/erspan.h>
+#include "vmlinux.h"
+#include <bpf/bpf_core_read.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_kfuncs.h"
+#include "bpf_tracing_net.h"
-#define ERROR(ret) do {\
- char fmt[] = "ERROR line:%d ret:%d\n";\
- bpf_trace_printk(fmt, sizeof(fmt), __LINE__, ret); \
- } while (0)
-
-int _version SEC("version") = 1;
-
-struct geneve_opt {
- __be16 opt_class;
- __u8 type;
- __u8 length:5;
- __u8 r3:1;
- __u8 r2:1;
- __u8 r1:1;
- __u8 opt_data[8]; /* hard-coded to 8 byte */
-};
-
-struct vxlan_metadata {
- __u32 gbp;
-};
-
-SEC("gre_set_tunnel")
-int _gre_set_tunnel(struct __sk_buff *skb)
+#define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret)
+
+#define VXLAN_UDP_PORT 4789
+#define ETH_P_IP 0x0800
+#define PACKET_HOST 0
+#define TUNNEL_CSUM bpf_htons(0x01)
+#define TUNNEL_KEY bpf_htons(0x04)
+
+/* Only IPv4 address assigned to veth1.
+ * 172.16.1.200
+ */
+#define ASSIGNED_ADDR_VETH1 0xac1001c8
+
+int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap, int type) __ksym;
+int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx,
+ struct bpf_fou_encap *encap) __ksym;
+struct xfrm_state *
+bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts,
+ u32 opts__sz) __ksym;
+void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} local_ip_map SEC(".maps");
+
+SEC("tc")
+int gre_set_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
@@ -58,32 +57,52 @@ int _gre_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int gre_set_tunnel_no_key(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX | BPF_F_SEQ_NUMBER |
+ BPF_F_NO_TUNNEL_KEY);
+ if (ret < 0) {
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("gre_get_tunnel")
-int _gre_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int gre_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "key %d remote ip 0x%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), key.tunnel_id, key.remote_ipv4);
+ bpf_printk("key %d remote ip 0x%x\n", key.tunnel_id, key.remote_ipv4);
return TC_ACT_OK;
}
-SEC("ip6gretap_set_tunnel")
-int _ip6gretap_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
int ret;
@@ -99,35 +118,34 @@ int _ip6gretap_set_tunnel(struct __sk_buff *skb)
BPF_F_TUNINFO_IPV6 | BPF_F_ZERO_CSUM_TX |
BPF_F_SEQ_NUMBER);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6gretap_get_tunnel")
-int _ip6gretap_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6gretap_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
struct bpf_tunnel_key key;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+ bpf_printk("key %d remote ip6 ::%x label %x\n",
+ key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
return TC_ACT_OK;
}
-SEC("erspan_set_tunnel")
-int _erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
@@ -142,7 +160,7 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
@@ -155,63 +173,58 @@ int _erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 7;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("erspan_get_tunnel")
-int _erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
+ bpf_printk("key %d remote ip 0x%x erspan version %d\n",
+ key.tunnel_id, key.remote_ipv4, md.version);
#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
+ bpf_printk("\tindex %x\n", index);
#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
+ bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
+ bpf_ntohl(md.u.md2.timestamp));
#endif
return TC_ACT_OK;
}
-SEC("ip4ip6erspan_set_tunnel")
-int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct erspan_metadata md;
@@ -226,7 +239,7 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
@@ -240,70 +253,113 @@ int _ip4ip6erspan_set_tunnel(struct __sk_buff *skb)
__u8 hwid = 17;
md.version = 2;
- md.u.md2.dir = direction;
- md.u.md2.hwid = hwid & 0xf;
- md.u.md2.hwid_upper = (hwid >> 4) & 0x3;
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction);
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf));
+ BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3);
#endif
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip4ip6erspan_get_tunnel")
-int _ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip4ip6erspan_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "ip6erspan get key %d remote ip6 ::%x erspan version %d\n";
struct bpf_tunnel_key key;
struct erspan_metadata md;
- __u32 index;
int ret;
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.version);
+ bpf_printk("ip6erspan get key %d remote ip6 ::%x erspan version %d\n",
+ key.tunnel_id, key.remote_ipv4, md.version);
#ifdef ERSPAN_V1
- char fmt2[] = "\tindex %x\n";
-
index = bpf_ntohl(md.u.index);
- bpf_trace_printk(fmt2, sizeof(fmt2), index);
+ bpf_printk("\tindex %x\n", index);
#else
- char fmt2[] = "\tdirection %d hwid %x timestamp %u\n";
-
- bpf_trace_printk(fmt2, sizeof(fmt2),
- md.u.md2.dir,
- (md.u.md2.hwid_upper << 4) + md.u.md2.hwid,
- bpf_ntohl(md.u.md2.timestamp));
+ bpf_printk("\tdirection %d hwid %x timestamp %u\n",
+ BPF_CORE_READ_BITFIELD(&md.u.md2, dir),
+ (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) +
+ BPF_CORE_READ_BITFIELD(&md.u.md2, hwid),
+ bpf_ntohl(md.u.md2.timestamp));
#endif
return TC_ACT_OK;
}
-SEC("vxlan_set_tunnel")
-int _vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_set_tunnel_dst(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ struct vxlan_metadata md;
+ __u32 index = 0;
+ __u32 *local_ip = NULL;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv4 = 0xac100164; /* 172.16.1.100 */
+ key.remote_ipv4 = *local_ip;
+ key.tunnel_id = 2;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_ZERO_CSUM_TX);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
+ ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int vxlan_set_tunnel_src(struct __sk_buff *skb)
{
- int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
+ __u32 index = 0;
+ __u32 *local_ip = NULL;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
__builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv4 = *local_ip;
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
key.tunnel_id = 2;
key.tunnel_tos = 0;
@@ -312,53 +368,154 @@ int _vxlan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
md.gbp = 0x800FF; /* Set VXLAN Group Policy extension */
ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("vxlan_get_tunnel")
-int _vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int vxlan_get_tunnel_src(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
struct vxlan_metadata md;
- char fmt[] = "key %d remote ip 0x%x vxlan gbp 0x%x\n";
- ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_FLAGS);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &md, sizeof(md));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (key.local_ipv4 != ASSIGNED_ADDR_VETH1 || md.gbp != 0x800FF ||
+ !(key.tunnel_flags & TUNNEL_KEY) ||
+ (key.tunnel_flags & TUNNEL_CSUM)) {
+ bpf_printk("vxlan key %d local ip 0x%x remote ip 0x%x gbp 0x%x flags 0x%x\n",
+ key.tunnel_id, key.local_ipv4,
+ key.remote_ipv4, md.gbp,
+ bpf_ntohs(key.tunnel_flags));
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int veth_set_outer_dst(struct __sk_buff *skb)
+{
+ struct ethhdr *eth = (struct ethhdr *)(long)skb->data;
+ __u32 assigned_ip = bpf_htonl(ASSIGNED_ADDR_VETH1);
+ void *data_end = (void *)(long)skb->data_end;
+ struct udphdr *udph;
+ struct iphdr *iph;
+ int ret = 0;
+ __s64 csum;
+
+ if ((void *)eth + sizeof(*eth) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ iph = (struct iphdr *)(eth + 1);
+ if ((void *)iph + sizeof(*iph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (iph->protocol != IPPROTO_UDP)
+ return TC_ACT_OK;
+
+ udph = (struct udphdr *)(iph + 1);
+ if ((void *)udph + sizeof(*udph) > data_end) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (udph->dest != bpf_htons(VXLAN_UDP_PORT))
+ return TC_ACT_OK;
+
+ if (iph->daddr != assigned_ip) {
+ csum = bpf_csum_diff(&iph->daddr, sizeof(__u32), &assigned_ip,
+ sizeof(__u32), 0);
+ if (bpf_skb_store_bytes(skb, ETH_HLEN + offsetof(struct iphdr, daddr),
+ &assigned_ip, sizeof(__u32), 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ if (bpf_l3_csum_replace(skb, ETH_HLEN + offsetof(struct iphdr, check),
+ 0, csum, 0) < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+ bpf_skb_change_type(skb, PACKET_HOST);
+ }
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ip6vxlan_set_tunnel_dst(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, md.gbp);
+ __builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ key.remote_ipv6[3] = bpf_htonl(*local_ip);
+ key.tunnel_id = 22;
+ key.tunnel_tos = 0;
+ key.tunnel_ttl = 64;
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
+ BPF_F_TUNINFO_IPV6);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
return TC_ACT_OK;
}
-SEC("ip6vxlan_set_tunnel")
-int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_set_tunnel_src(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
- int ret;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
__builtin_memset(&key, 0x0, sizeof(key));
+ key.local_ipv6[3] = bpf_htonl(*local_ip);
key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
key.tunnel_id = 22;
key.tunnel_tos = 0;
@@ -367,37 +524,53 @@ int _ip6vxlan_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6vxlan_get_tunnel")
-int _ip6vxlan_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6vxlan_get_tunnel_src(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip6 ::%x label %x\n";
struct bpf_tunnel_key key;
- int ret;
+ __u32 index = 0;
+ __u32 *local_ip;
+ int ret = 0;
+
+ local_ip = bpf_map_lookup_elem(&local_ip_map, &index);
+ if (!local_ip) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
- BPF_F_TUNINFO_IPV6);
+ BPF_F_TUNINFO_IPV6 | BPF_F_TUNINFO_FLAGS);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv6[3], key.tunnel_label);
+ if (bpf_ntohl(key.local_ipv6[3]) != *local_ip ||
+ !(key.tunnel_flags & TUNNEL_KEY) ||
+ !(key.tunnel_flags & TUNNEL_CSUM)) {
+ bpf_printk("ip6vxlan key %d local ip6 ::%x remote ip6 ::%x label 0x%x flags 0x%x\n",
+ key.tunnel_id, bpf_ntohl(key.local_ipv6[3]),
+ bpf_ntohl(key.remote_ipv6[3]), key.tunnel_label,
+ bpf_ntohs(key.tunnel_flags));
+ bpf_printk("local_ip 0x%x\n", *local_ip);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
return TC_ACT_OK;
}
-SEC("geneve_set_tunnel")
-int _geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_set_tunnel(struct __sk_buff *skb)
{
- int ret, ret2;
+ int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
@@ -419,46 +592,43 @@ int _geneve_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_ZERO_CSUM_TX);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("geneve_get_tunnel")
-int _geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int geneve_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
struct geneve_opt gopt;
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
- if (ret < 0) {
- ERROR(ret);
- return TC_ACT_SHOT;
- }
+ if (ret < 0)
+ gopt.opt_class = 0;
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
return TC_ACT_OK;
}
-SEC("ip6geneve_set_tunnel")
-int _ip6geneve_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key;
struct geneve_opt gopt;
@@ -473,7 +643,7 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
@@ -488,17 +658,16 @@ int _ip6geneve_set_tunnel(struct __sk_buff *skb)
ret = bpf_skb_set_tunnel_opt(skb, &gopt, sizeof(gopt));
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6geneve_get_tunnel")
-int _ip6geneve_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6geneve_get_tunnel(struct __sk_buff *skb)
{
- char fmt[] = "key %d remote ip 0x%x geneve class 0x%x\n";
struct bpf_tunnel_key key;
struct geneve_opt gopt;
int ret;
@@ -506,208 +675,330 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
+ if (ret < 0)
+ gopt.opt_class = 0;
+
+ bpf_printk("key %d remote ip 0x%x geneve class 0x%x\n",
+ key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ /* single length check */
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
+ }
+
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP) {
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+ }
+
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt),
- key.tunnel_id, key.remote_ipv4, gopt.opt_class);
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_get_tunnel(struct __sk_buff *skb)
+{
+ int ret;
+ struct bpf_tunnel_key key;
+ ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ bpf_printk("remote ip 0x%x\n", key.remote_ipv4);
return TC_ACT_OK;
}
-SEC("ipip_set_tunnel")
-int _ipip_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_gue_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
void *data = (void *)(long)skb->data;
struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
void *data_end = (void *)(long)skb->data_end;
int ret;
- /* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
return TC_ACT_SHOT;
}
key.tunnel_ttl = 64;
- if (iph->protocol == IPPROTO_ICMP) {
+ if (iph->protocol == IPPROTO_ICMP)
key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- } else {
- if (iph->protocol != IPPROTO_TCP || iph->ihl != 5)
- return TC_ACT_SHOT;
- if (tcp->dest == bpf_htons(5200))
- key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
- else if (tcp->dest == bpf_htons(5201))
- key.remote_ipv4 = 0xac100165; /* 172.16.1.101 */
- else
- return TC_ACT_SHOT;
+ ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_GUE);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ return TC_ACT_OK;
+}
+
+SEC("tc")
+int ipip_fou_set_tunnel(struct __sk_buff *skb)
+{
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
+ void *data = (void *)(long)skb->data;
+ struct iphdr *iph = data;
+ void *data_end = (void *)(long)skb->data_end;
+ int ret;
+
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
+ return TC_ACT_SHOT;
}
+ key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP)
+ key.remote_ipv4 = 0xac100164; /* 172.16.1.100 */
+
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ encap.sport = 0;
+ encap.dport = bpf_htons(5555);
+
+ ret = bpf_skb_set_fou_encap(skb, &encap, FOU_BPF_ENCAP_FOU);
+ if (ret < 0) {
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ipip_get_tunnel")
-int _ipip_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip_encap_get_tunnel(struct __sk_buff *skb)
{
int ret;
- struct bpf_tunnel_key key;
- char fmt[] = "remote ip 0x%x\n";
+ struct bpf_tunnel_key key = {};
+ struct bpf_fou_encap encap = {};
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key), 0);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), key.remote_ipv4);
+ ret = bpf_skb_get_fou_encap(skb, &encap);
+ if (ret < 0) {
+ log_err(ret);
+ return TC_ACT_SHOT;
+ }
+
+ if (bpf_ntohs(encap.dport) != 5555)
+ return TC_ACT_SHOT;
+
+ bpf_printk("%d remote ip 0x%x, sport %d, dport %d\n", ret,
+ key.remote_ipv4, bpf_ntohs(encap.sport),
+ bpf_ntohs(encap.dport));
return TC_ACT_OK;
}
-SEC("ipip6_set_tunnel")
-int _ipip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
void *data = (void *)(long)skb->data;
struct iphdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
void *data_end = (void *)(long)skb->data_end;
int ret;
/* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
return TC_ACT_SHOT;
}
__builtin_memset(&key, 0x0, sizeof(key));
- key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
key.tunnel_ttl = 64;
+ if (iph->protocol == IPPROTO_ICMP) {
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
+ }
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ipip6_get_tunnel")
-int _ipip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ipip6_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
+ bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
return TC_ACT_OK;
}
-SEC("ip6ip6_set_tunnel")
-int _ip6ip6_set_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_set_tunnel(struct __sk_buff *skb)
{
struct bpf_tunnel_key key = {};
void *data = (void *)(long)skb->data;
struct ipv6hdr *iph = data;
- struct tcphdr *tcp = data + sizeof(*iph);
void *data_end = (void *)(long)skb->data_end;
int ret;
/* single length check */
- if (data + sizeof(*iph) + sizeof(*tcp) > data_end) {
- ERROR(1);
+ if (data + sizeof(*iph) > data_end) {
+ log_err(1);
return TC_ACT_SHOT;
}
- key.remote_ipv6[0] = bpf_htonl(0x2401db00);
key.tunnel_ttl = 64;
-
if (iph->nexthdr == 58 /* NEXTHDR_ICMP */) {
- key.remote_ipv6[3] = bpf_htonl(1);
- } else {
- if (iph->nexthdr != 6 /* NEXTHDR_TCP */) {
- ERROR(iph->nexthdr);
- return TC_ACT_SHOT;
- }
-
- if (tcp->dest == bpf_htons(5200)) {
- key.remote_ipv6[3] = bpf_htonl(1);
- } else if (tcp->dest == bpf_htons(5201)) {
- key.remote_ipv6[3] = bpf_htonl(2);
- } else {
- ERROR(tcp->dest);
- return TC_ACT_SHOT;
- }
+ key.remote_ipv6[3] = bpf_htonl(0x11); /* ::11 */
}
ret = bpf_skb_set_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
return TC_ACT_OK;
}
-SEC("ip6ip6_get_tunnel")
-int _ip6ip6_get_tunnel(struct __sk_buff *skb)
+SEC("tc")
+int ip6ip6_get_tunnel(struct __sk_buff *skb)
{
int ret;
struct bpf_tunnel_key key;
- char fmt[] = "remote ip6 %x::%x\n";
ret = bpf_skb_get_tunnel_key(skb, &key, sizeof(key),
BPF_F_TUNINFO_IPV6);
if (ret < 0) {
- ERROR(ret);
+ log_err(ret);
return TC_ACT_SHOT;
}
- bpf_trace_printk(fmt, sizeof(fmt), bpf_htonl(key.remote_ipv6[0]),
- bpf_htonl(key.remote_ipv6[3]));
+ bpf_printk("remote ip6 %x::%x\n", bpf_htonl(key.remote_ipv6[0]),
+ bpf_htonl(key.remote_ipv6[3]));
return TC_ACT_OK;
}
-SEC("xfrm_get_state")
-int _xfrm_get_state(struct __sk_buff *skb)
+volatile int xfrm_reqid = 0;
+volatile int xfrm_spi = 0;
+volatile int xfrm_remote_ip = 0;
+
+SEC("tc")
+int xfrm_get_state(struct __sk_buff *skb)
{
struct bpf_xfrm_state x;
- char fmt[] = "reqid %d spi 0x%x remote ip 0x%x\n";
int ret;
ret = bpf_skb_get_xfrm_state(skb, 0, &x, sizeof(x), 0);
if (ret < 0)
return TC_ACT_OK;
- bpf_trace_printk(fmt, sizeof(fmt), x.reqid, bpf_ntohl(x.spi),
- bpf_ntohl(x.remote_ipv4));
+ xfrm_reqid = x.reqid;
+ xfrm_spi = bpf_ntohl(x.spi);
+ xfrm_remote_ip = bpf_ntohl(x.remote_ipv4);
+
return TC_ACT_OK;
}
+volatile int xfrm_replay_window = 0;
+
+SEC("xdp")
+int xfrm_get_state_xdp(struct xdp_md *xdp)
+{
+ struct bpf_xfrm_state_opts opts = {};
+ struct xfrm_state *x = NULL;
+ struct ip_esp_hdr *esph;
+ struct bpf_dynptr ptr;
+ u8 esph_buf[8] = {};
+ u8 iph_buf[20] = {};
+ struct iphdr *iph;
+ u32 off;
+
+ if (bpf_dynptr_from_xdp(xdp, 0, &ptr))
+ goto out;
+
+ off = sizeof(struct ethhdr);
+ iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf));
+ if (!iph || iph->protocol != IPPROTO_ESP)
+ goto out;
+
+ off += sizeof(struct iphdr);
+ esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf));
+ if (!esph)
+ goto out;
+
+ opts.netns_id = BPF_F_CURRENT_NETNS;
+ opts.daddr.a4 = iph->daddr;
+ opts.spi = esph->spi;
+ opts.proto = IPPROTO_ESP;
+ opts.family = AF_INET;
+
+ x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts));
+ if (!x)
+ goto out;
+
+ if (!x->replay_esn)
+ goto out;
+
+ xfrm_replay_window = x->replay_esn->replay_window;
+out:
+ if (x)
+ bpf_xdp_xfrm_state_release(x);
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
new file mode 100644
index 000000000000..fc423e43a3cd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_unpriv_bpf_disabled.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+__u32 perfbuf_val = 0;
+__u32 ringbuf_val = 0;
+
+int test_pid;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} percpu_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} percpu_hash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+} perfbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(__u32));
+} prog_array SEC(".maps");
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_nanosleep_enter(void *ctx)
+{
+ int cur_pid;
+
+ cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (cur_pid != test_pid)
+ return 0;
+
+ bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU, &perfbuf_val, sizeof(perfbuf_val));
+ bpf_ringbuf_output(&ringbuf, &ringbuf_val, sizeof(ringbuf_val), 0);
+
+ return 0;
+}
+
+SEC("perf_event")
+int handle_perf_event(void *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe.c b/tools/testing/selftests/bpf/progs/test_uprobe.c
new file mode 100644
index 000000000000..896c88a4960d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Hengqi Chen */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+pid_t my_pid = 0;
+
+int test1_result = 0;
+int test2_result = 0;
+int test3_result = 0;
+int test4_result = 0;
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset")
+int BPF_UPROBE(test1)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test1_result = 1;
+ return 0;
+}
+
+SEC("uprobe/./liburandom_read.so:urandlib_api_sameoffset@LIBURANDOM_READ_1.0.0")
+int BPF_UPROBE(test2)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test2_result = 1;
+ return 0;
+}
+
+SEC("uretprobe/./liburandom_read.so:urandlib_api_sameoffset@@LIBURANDOM_READ_2.0.0")
+int BPF_URETPROBE(test3, int ret)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test3_result = ret;
+ return 0;
+}
+
+SEC("uprobe")
+int BPF_UPROBE(test4)
+{
+ pid_t pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid != my_pid)
+ return 0;
+
+ test4_result = 1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
new file mode 100644
index 000000000000..da4bf89d004c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_uprobe_autoattach.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022, Oracle and/or its affiliates. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_core_read.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+int uprobe_byname_parm1 = 0;
+int uprobe_byname_ran = 0;
+int uretprobe_byname_rc = 0;
+int uretprobe_byname_ret = 0;
+int uretprobe_byname_ran = 0;
+u64 uprobe_byname2_parm1 = 0;
+int uprobe_byname2_ran = 0;
+u64 uretprobe_byname2_rc = 0;
+int uretprobe_byname2_ran = 0;
+
+int test_pid;
+
+int a[8];
+
+/* This program cannot auto-attach, but that should not stop other
+ * programs from attaching.
+ */
+SEC("uprobe")
+int handle_uprobe_noautoattach(struct pt_regs *ctx)
+{
+ return 0;
+}
+
+SEC("uprobe//proc/self/exe:autoattach_trigger_func")
+int BPF_UPROBE(handle_uprobe_byname
+ , int arg1
+ , int arg2
+ , int arg3
+#if FUNC_REG_ARG_CNT > 3
+ , int arg4
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ , int arg5
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ , int arg6
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ , int arg7
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ , int arg8
+#endif
+)
+{
+ uprobe_byname_parm1 = PT_REGS_PARM1_CORE(ctx);
+ uprobe_byname_ran = 1;
+
+ a[0] = arg1;
+ a[1] = arg2;
+ a[2] = arg3;
+#if FUNC_REG_ARG_CNT > 3
+ a[3] = arg4;
+#endif
+#if FUNC_REG_ARG_CNT > 4
+ a[4] = arg5;
+#endif
+#if FUNC_REG_ARG_CNT > 5
+ a[5] = arg6;
+#endif
+#if FUNC_REG_ARG_CNT > 6
+ a[6] = arg7;
+#endif
+#if FUNC_REG_ARG_CNT > 7
+ a[7] = arg8;
+#endif
+ return 0;
+}
+
+SEC("uretprobe//proc/self/exe:autoattach_trigger_func")
+int BPF_URETPROBE(handle_uretprobe_byname, int ret)
+{
+ uretprobe_byname_rc = PT_REGS_RC_CORE(ctx);
+ uretprobe_byname_ret = ret;
+ uretprobe_byname_ran = 2;
+
+ return 0;
+}
+
+
+SEC("uprobe/libc.so.6:fopen")
+int BPF_UPROBE(handle_uprobe_byname2, const char *pathname, const char *mode)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uprobe_byname2_parm1 = (u64)(long)pathname;
+ uprobe_byname2_ran = 3;
+ return 0;
+}
+
+SEC("uretprobe/libc.so.6:fopen")
+int BPF_URETPROBE(handle_uretprobe_byname2, void *ret)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid)
+ return 0;
+ uretprobe_byname2_rc = (u64)(long)ret;
+ uretprobe_byname2_ran = 4;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_urandom_usdt.c b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
new file mode 100644
index 000000000000..3539b02bd5f7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_urandom_usdt.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int urand_pid;
+
+int urand_read_without_sema_call_cnt;
+int urand_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_without_sema")
+int BPF_USDT(urand_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urand_read_with_sema_call_cnt;
+int urand_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./urandom_read:urand:read_with_sema")
+int BPF_USDT(urand_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urand_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urand_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_without_sema_call_cnt;
+int urandlib_read_without_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_without_sema")
+int BPF_USDT(urandlib_read_without_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_without_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_without_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+int urandlib_read_with_sema_call_cnt;
+int urandlib_read_with_sema_buf_sz_sum;
+
+SEC("usdt/./liburandom_read.so:urandlib:read_with_sema")
+int BPF_USDT(urandlib_read_with_sema, int iter_num, int iter_cnt, int buf_sz)
+{
+ if (urand_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&urandlib_read_with_sema_call_cnt, 1);
+ __sync_fetch_and_add(&urandlib_read_with_sema_buf_sz_sum, buf_sz);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt.c b/tools/testing/selftests/bpf/progs/test_usdt.c
new file mode 100644
index 000000000000..505aab9a5234
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+int my_pid;
+
+int usdt0_called;
+u64 usdt0_cookie;
+int usdt0_arg_cnt;
+int usdt0_arg_ret;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt0_called, 1);
+
+ usdt0_cookie = bpf_usdt_cookie(ctx);
+ usdt0_arg_cnt = bpf_usdt_arg_cnt(ctx);
+ /* should return -ENOENT for any arg_num */
+ usdt0_arg_ret = bpf_usdt_arg(ctx, bpf_get_prandom_u32(), &tmp);
+ return 0;
+}
+
+int usdt3_called;
+u64 usdt3_cookie;
+int usdt3_arg_cnt;
+int usdt3_arg_rets[3];
+u64 usdt3_args[3];
+
+SEC("usdt//proc/self/exe:test:usdt3")
+int usdt3(struct pt_regs *ctx)
+{
+ long tmp;
+
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt3_called, 1);
+
+ usdt3_cookie = bpf_usdt_cookie(ctx);
+ usdt3_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt3_arg_rets[0] = bpf_usdt_arg(ctx, 0, &tmp);
+ usdt3_args[0] = (int)tmp;
+
+ usdt3_arg_rets[1] = bpf_usdt_arg(ctx, 1, &tmp);
+ usdt3_args[1] = (long)tmp;
+
+ usdt3_arg_rets[2] = bpf_usdt_arg(ctx, 2, &tmp);
+ usdt3_args[2] = (uintptr_t)tmp;
+
+ return 0;
+}
+
+int usdt12_called;
+u64 usdt12_cookie;
+int usdt12_arg_cnt;
+u64 usdt12_args[12];
+
+SEC("usdt//proc/self/exe:test:usdt12")
+int BPF_USDT(usdt12, int a1, int a2, long a3, long a4, unsigned a5,
+ long a6, __u64 a7, uintptr_t a8, int a9, short a10,
+ short a11, signed char a12)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt12_called, 1);
+
+ usdt12_cookie = bpf_usdt_cookie(ctx);
+ usdt12_arg_cnt = bpf_usdt_arg_cnt(ctx);
+
+ usdt12_args[0] = a1;
+ usdt12_args[1] = a2;
+ usdt12_args[2] = a3;
+ usdt12_args[3] = a4;
+ usdt12_args[4] = a5;
+ usdt12_args[5] = a6;
+ usdt12_args[6] = a7;
+ usdt12_args[7] = a8;
+ usdt12_args[8] = a9;
+ usdt12_args[9] = a10;
+ usdt12_args[10] = a11;
+ usdt12_args[11] = a12;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_usdt_multispec.c b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
new file mode 100644
index 000000000000..962f3462066a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_usdt_multispec.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+/* this file is linked together with test_usdt.c to validate that usdt.bpf.h
+ * can be included in multiple .bpf.c files forming single final BPF object
+ * file
+ */
+
+extern int my_pid;
+
+int usdt_100_called;
+int usdt_100_sum;
+
+SEC("usdt//proc/self/exe:test:usdt_100")
+int BPF_USDT(usdt_100, int x)
+{
+ if (my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+
+ __sync_fetch_and_add(&usdt_100_called, 1);
+ __sync_fetch_and_add(&usdt_100_sum, x);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_user_ringbuf.h b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
new file mode 100644
index 000000000000..1643b4d59ba7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_user_ringbuf.h
@@ -0,0 +1,35 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#ifndef _TEST_USER_RINGBUF_H
+#define _TEST_USER_RINGBUF_H
+
+#define TEST_OP_64 4
+#define TEST_OP_32 2
+
+enum test_msg_op {
+ TEST_MSG_OP_INC64,
+ TEST_MSG_OP_INC32,
+ TEST_MSG_OP_MUL64,
+ TEST_MSG_OP_MUL32,
+
+ // Must come last.
+ TEST_MSG_OP_NUM_OPS,
+};
+
+struct test_msg {
+ enum test_msg_op msg_op;
+ union {
+ __s64 operand_64;
+ __s32 operand_32;
+ };
+};
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+#endif /* _TEST_USER_RINGBUF_H */
diff --git a/tools/testing/selftests/bpf/progs/test_varlen.c b/tools/testing/selftests/bpf/progs/test_varlen.c
new file mode 100644
index 000000000000..20eb7d422c41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_varlen.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+
+#define MAX_LEN 256
+
+char buf_in1[MAX_LEN] = {};
+char buf_in2[MAX_LEN] = {};
+
+int test_pid = 0;
+bool capture = false;
+
+/* .bss */
+__u64 payload1_len1 = 0;
+__u64 payload1_len2 = 0;
+__u64 total1 = 0;
+char payload1[MAX_LEN + MAX_LEN] = {};
+__u64 ret_bad_read = 0;
+
+/* .data */
+int payload2_len1 = -1;
+int payload2_len2 = -1;
+int total2 = -1;
+char payload2[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload3_len1 = -1;
+int payload3_len2 = -1;
+int total3= -1;
+char payload3[MAX_LEN + MAX_LEN] = { 1 };
+
+int payload4_len1 = -1;
+int payload4_len2 = -1;
+int total4= -1;
+char payload4[MAX_LEN + MAX_LEN] = { 1 };
+
+char payload_bad[5] = { 0x42, 0x42, 0x42, 0x42, 0x42 };
+
+SEC("raw_tp/sys_enter")
+int handler64_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload1;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload1_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload1_len2 = len;
+ }
+
+ total1 = payload - (void *)payload1;
+
+ ret_bad_read = bpf_probe_read_kernel_str(payload_bad + 2, 1, (void *) -1);
+
+ return 0;
+}
+
+SEC("raw_tp/sys_exit")
+int handler64_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload3;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload3_len2 = len;
+ }
+ total3 = payload - (void *)payload3;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_enter")
+int handler32_unsigned(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload2;
+ u32 len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len1 = len;
+ }
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len <= MAX_LEN) {
+ payload += len;
+ payload2_len2 = len;
+ }
+
+ total2 = payload - (void *)payload2;
+
+ return 0;
+}
+
+SEC("tp/raw_syscalls/sys_exit")
+int handler32_signed(void *regs)
+{
+ int pid = bpf_get_current_pid_tgid() >> 32;
+ void *payload = payload4;
+ long len;
+
+ /* ignore irrelevant invocations */
+ if (test_pid != pid || !capture)
+ return 0;
+
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in1[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len1 = len;
+ }
+ len = bpf_probe_read_kernel_str(payload, MAX_LEN, &buf_in2[0]);
+ if (len >= 0) {
+ payload += len;
+ payload4_len2 = len;
+ }
+ total4 = payload - (void *)payload4;
+
+ return 0;
+}
+
+SEC("tp/syscalls/sys_exit_getpid")
+int handler_exit(void *regs)
+{
+ long bla;
+
+ if (bpf_probe_read_kernel(&bla, sizeof(bla), 0))
+ return 1;
+ else
+ return 0;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale1.c b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
index d38153dab3dd..323a73fb2e8c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale1.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale1.c
@@ -5,13 +5,13 @@
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
-SEC("scale90_noinline")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale2.c b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
index f024154c7be7..f5318f757084 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale2.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale2.c
@@ -1,17 +1,17 @@
// SPDX-License-Identifier: GPL-2.0
// Copyright (c) 2019 Facebook
-#include <linux/bpf.h>
+#include "vmlinux.h"
#include <bpf/bpf_helpers.h>
#define ATTR __always_inline
#include "test_jhash.h"
-SEC("scale90_inline")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 14;
diff --git a/tools/testing/selftests/bpf/progs/test_verif_scale3.c b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
index 9beb5bf80373..2e06dbb1ad5c 100644
--- a/tools/testing/selftests/bpf/progs/test_verif_scale3.c
+++ b/tools/testing/selftests/bpf/progs/test_verif_scale3.c
@@ -5,13 +5,13 @@
#define ATTR __attribute__((noinline))
#include "test_jhash.h"
-SEC("scale90_noinline32")
+SEC("tc")
int balancer_ingress(struct __sk_buff *ctx)
{
void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
void *ptr;
- int ret = 0, nh_off, i = 0;
+ int nh_off, i = 0;
nh_off = 32;
diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
new file mode 100644
index 000000000000..f42e9f3831a1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright (C) 2022 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Roberto Sassu <roberto.sassu@huawei.com>
+ */
+
+#include "vmlinux.h"
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_kfuncs.h"
+
+#define MAX_DATA_SIZE (1024 * 1024)
+#define MAX_SIG_SIZE 1024
+
+__u32 monitored_pid;
+__u32 user_keyring_serial;
+__u64 system_keyring_id;
+
+struct data {
+ __u8 data[MAX_DATA_SIZE];
+ __u32 data_len;
+ __u8 sig[MAX_SIG_SIZE];
+ __u32 sig_len;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, struct data);
+} data_input SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("lsm.s/bpf")
+int BPF_PROG(bpf, int cmd, union bpf_attr *attr, unsigned int size)
+{
+ struct bpf_dynptr data_ptr, sig_ptr;
+ struct data *data_val;
+ struct bpf_key *trusted_keyring;
+ __u32 pid;
+ __u64 value;
+ int ret, zero = 0;
+
+ pid = bpf_get_current_pid_tgid() >> 32;
+ if (pid != monitored_pid)
+ return 0;
+
+ data_val = bpf_map_lookup_elem(&data_input, &zero);
+ if (!data_val)
+ return 0;
+
+ ret = bpf_probe_read_kernel(&value, sizeof(value), &attr->value);
+ if (ret)
+ return ret;
+
+ ret = bpf_copy_from_user(data_val, sizeof(struct data),
+ (void *)(unsigned long)value);
+ if (ret)
+ return ret;
+
+ if (data_val->data_len > sizeof(data_val->data))
+ return -EINVAL;
+
+ bpf_dynptr_from_mem(data_val->data, data_val->data_len, 0, &data_ptr);
+
+ if (data_val->sig_len > sizeof(data_val->sig))
+ return -EINVAL;
+
+ bpf_dynptr_from_mem(data_val->sig, data_val->sig_len, 0, &sig_ptr);
+
+ if (user_keyring_serial)
+ trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0);
+ else
+ trusted_keyring = bpf_lookup_system_key(system_keyring_id);
+
+ if (!trusted_keyring)
+ return -ENOENT;
+
+ ret = bpf_verify_pkcs7_signature(&data_ptr, &sig_ptr, trusted_keyring);
+
+ bpf_key_put(trusted_keyring);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_vmlinux.c b/tools/testing/selftests/bpf/progs/test_vmlinux.c
index 5611b564d3b1..78b23934d9f8 100644
--- a/tools/testing/selftests/bpf/progs/test_vmlinux.c
+++ b/tools/testing/selftests/bpf/progs/test_vmlinux.c
@@ -16,15 +16,17 @@ bool kprobe_called = false;
bool fentry_called = false;
SEC("tp/syscalls/sys_enter_nanosleep")
-int handle__tp(struct trace_event_raw_sys_enter *args)
+int handle__tp(struct syscall_trace_enter *args)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
- if (args->id != __NR_nanosleep)
+ if (args->nr != __NR_nanosleep)
return 0;
ts = (void *)args->args[0];
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_called = true;
@@ -35,12 +37,14 @@ SEC("raw_tp/sys_enter")
int BPF_PROG(handle__raw_tp, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
- ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
raw_tp_called = true;
@@ -51,32 +55,34 @@ SEC("tp_btf/sys_enter")
int BPF_PROG(handle__tp_btf, struct pt_regs *regs, long id)
{
struct __kernel_timespec *ts;
+ long tv_nsec;
if (id != __NR_nanosleep)
return 0;
- ts = (void *)PT_REGS_PARM1_CORE(regs);
- if (BPF_CORE_READ(ts, tv_nsec) != MY_TV_NSEC)
+ ts = (void *)PT_REGS_PARM1_CORE_SYSCALL(regs);
+ if (bpf_probe_read_user(&tv_nsec, sizeof(ts->tv_nsec), &ts->tv_nsec) ||
+ tv_nsec != MY_TV_NSEC)
return 0;
tp_btf_called = true;
return 0;
}
-SEC("kprobe/hrtimer_nanosleep")
-int BPF_KPROBE(handle__kprobe,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("kprobe/hrtimer_start_range_ns")
+int BPF_KPROBE(handle__kprobe, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
kprobe_called = true;
return 0;
}
-SEC("fentry/hrtimer_nanosleep")
-int BPF_PROG(handle__fentry,
- ktime_t rqtp, enum hrtimer_mode mode, clockid_t clockid)
+SEC("fentry/hrtimer_start_range_ns")
+int BPF_PROG(handle__fentry, struct hrtimer *timer, ktime_t tim, u64 delta_ns,
+ const enum hrtimer_mode mode)
{
- if (rqtp == MY_TV_NSEC)
+ if (tim == MY_TV_NSEC)
fentry_called = true;
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp.c b/tools/testing/selftests/bpf/progs/test_xdp.c
index 31f9bce37491..8caf58be5818 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp.c
@@ -19,8 +19,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
-
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -139,7 +138,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
@@ -210,7 +209,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
index 3d66599eee2e..81bb38d72ced 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -2,16 +2,19 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_adjust_tail_grow")
+SEC("xdp")
int _xdp_adjust_tail_grow(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
- unsigned int data_len;
+ int data_len = bpf_xdp_get_buff_len(xdp);
int offset = 0;
+ /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) */
+#if defined(__TARGET_ARCH_s390)
+ int tailroom = 512;
+#else
+ int tailroom = 320;
+#endif
/* Data length determine test case */
- data_len = data_end - data;
if (data_len == 54) { /* sizeof(pkt_v4) */
offset = 4096; /* test too large offset */
@@ -20,7 +23,12 @@ int _xdp_adjust_tail_grow(struct xdp_md *xdp)
} else if (data_len == 64) {
offset = 128;
} else if (data_len == 128) {
- offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+ /* Max tail grow 3520 */
+ offset = 4096 - 256 - tailroom - data_len;
+ } else if (data_len == 9000) {
+ offset = 10;
+ } else if (data_len == 9001) {
+ offset = 4096;
} else {
return XDP_ABORTED; /* No matching test */
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
index 22065a9cfb25..ca68c038357c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -9,19 +9,41 @@
#include <linux/if_ether.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail_shrink")
+SEC("xdp")
int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
{
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
int offset = 0;
- if (data_end - data == 54) /* sizeof(pkt_v4) */
+ switch (bpf_xdp_get_buff_len(xdp)) {
+ case 54:
+ /* sizeof(pkt_v4) */
offset = 256; /* shrink too much */
- else
+ break;
+ case 9000:
+ /* non-linear buff test cases */
+ if (data + 1 > data_end)
+ return XDP_DROP;
+
+ switch (data[0]) {
+ case 0:
+ offset = 10;
+ break;
+ case 1:
+ offset = 4100;
+ break;
+ case 2:
+ offset = 8200;
+ break;
+ default:
+ return XDP_DROP;
+ }
+ break;
+ default:
offset = 20;
+ break;
+ }
if (bpf_xdp_adjust_tail(xdp, 0 - offset))
return XDP_DROP;
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c
new file mode 100644
index 000000000000..2ff1b596e87e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_attach_fail.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Leon Hwang */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define ERRMSG_LEN 64
+
+struct xdp_errmsg {
+ char msg[ERRMSG_LEN];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+ __type(key, int);
+ __type(value, int);
+} xdp_errmsg_pb SEC(".maps");
+
+struct xdp_attach_error_ctx {
+ unsigned long unused;
+
+ /*
+ * bpf does not support tracepoint __data_loc directly.
+ *
+ * Actually, this field is a 32 bit integer whose value encodes
+ * information on where to find the actual data. The first 2 bytes is
+ * the size of the data. The last 2 bytes is the offset from the start
+ * of the tracepoint struct where the data begins.
+ * -- https://github.com/iovisor/bpftrace/pull/1542
+ */
+ __u32 msg; // __data_loc char[] msg;
+};
+
+/*
+ * Catch the error message at the tracepoint.
+ */
+
+SEC("tp/xdp/bpf_xdp_link_attach_failed")
+int tp__xdp__bpf_xdp_link_attach_failed(struct xdp_attach_error_ctx *ctx)
+{
+ char *msg = (void *)(__u64) ((void *) ctx + (__u16) ctx->msg);
+ struct xdp_errmsg errmsg = {};
+
+ bpf_probe_read_kernel_str(&errmsg.msg, ERRMSG_LEN, msg);
+ bpf_perf_event_output(ctx, &xdp_errmsg_pb, BPF_F_CURRENT_CPU, &errmsg,
+ ERRMSG_LEN);
+ return 0;
+}
+
+/*
+ * Reuse the XDP program in xdp_dummy.c.
+ */
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
index a038e827f850..ee48c4963971 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_bpf2bpf.c
@@ -36,8 +36,8 @@ struct meta {
struct {
__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
- __uint(key_size, sizeof(int));
- __uint(value_size, sizeof(int));
+ __type(key, int);
+ __type(value, int);
} perf_buf_map SEC(".maps");
__u64 test_result_fentry = 0;
@@ -45,11 +45,9 @@ SEC("fentry/FUNC")
int BPF_PROG(trace_on_entry, struct xdp_buff *xdp)
{
struct meta meta;
- void *data_end = (void *)(long)xdp->data_end;
- void *data = (void *)(long)xdp->data;
meta.ifindex = xdp->rxq->dev->ifindex;
- meta.pkt_len = data_end - data;
+ meta.pkt_len = bpf_xdp_get_buff_len((struct xdp_md *)xdp);
bpf_xdp_output(xdp, &perf_buf_map,
((__u64) meta.pkt_len << 32) |
BPF_F_CURRENT_CPU,
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
new file mode 100644
index 000000000000..d7b88cd05afd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_context_test_run.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_context(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ __u32 ret;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+ ret = *metadata;
+ if (bpf_xdp_adjust_meta(xdp, 4))
+ return XDP_ABORTED;
+ return ret;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
index b360ba2bd441..807bf895f42c 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -5,7 +5,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dm_log")
+SEC("xdp")
int xdpdm_devlog(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
new file mode 100644
index 000000000000..3abf068b8446
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_do_redirect.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+
+#define ETH_ALEN 6
+#define HDR_SZ (sizeof(struct ethhdr) + sizeof(struct ipv6hdr) + sizeof(struct udphdr))
+
+/**
+ * enum frame_mark - magics to distinguish page/packet paths
+ * @MARK_XMIT: page was recycled due to the frame being "xmitted" by the NIC.
+ * @MARK_IN: frame is being processed by the input XDP prog.
+ * @MARK_SKB: frame did hit the TC ingress hook as an skb.
+ */
+enum frame_mark {
+ MARK_XMIT = 0U,
+ MARK_IN = 0x42,
+ MARK_SKB = 0x45,
+};
+
+const volatile int ifindex_out;
+const volatile int ifindex_in;
+const volatile __u8 expect_dst[ETH_ALEN];
+volatile int pkts_seen_xdp = 0;
+volatile int pkts_seen_zero = 0;
+volatile int pkts_seen_tc = 0;
+volatile int retcode = XDP_REDIRECT;
+
+SEC("xdp")
+int xdp_redirect(struct xdp_md *xdp)
+{
+ __u32 *metadata = (void *)(long)xdp->data_meta;
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ __u8 *payload = data + HDR_SZ;
+ int ret = retcode;
+
+ if (payload + 1 > data_end)
+ return XDP_ABORTED;
+
+ if (xdp->ingress_ifindex != (__u32)ifindex_in)
+ return XDP_ABORTED;
+
+ if (metadata + 1 > data)
+ return XDP_ABORTED;
+
+ if (*metadata != 0x42)
+ return XDP_ABORTED;
+
+ if (*payload == MARK_XMIT)
+ pkts_seen_zero++;
+
+ *payload = MARK_IN;
+
+ if (bpf_xdp_adjust_meta(xdp, sizeof(__u64)))
+ return XDP_ABORTED;
+
+ if (retcode > XDP_PASS)
+ retcode--;
+
+ if (ret == XDP_REDIRECT)
+ return bpf_redirect(ifindex_out, 0);
+
+ return ret;
+}
+
+static bool check_pkt(void *data, void *data_end, const __u32 mark)
+{
+ struct ipv6hdr *iph = data + sizeof(struct ethhdr);
+ __u8 *payload = data + HDR_SZ;
+
+ if (payload + 1 > data_end)
+ return false;
+
+ if (iph->nexthdr != IPPROTO_UDP || *payload != MARK_IN)
+ return false;
+
+ /* reset the payload so the same packet doesn't get counted twice when
+ * it cycles back through the kernel path and out the dst veth
+ */
+ *payload = mark;
+ return true;
+}
+
+SEC("xdp")
+int xdp_count_pkts(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+
+ if (check_pkt(data, data_end, MARK_XMIT))
+ pkts_seen_xdp++;
+
+ /* Return %XDP_DROP to recycle the data page with %MARK_XMIT, like
+ * it exited a physical NIC. Those pages will be counted in the
+ * pkts_seen_zero counter above.
+ */
+ return XDP_DROP;
+}
+
+SEC("tc")
+int tc_count_pkts(struct __sk_buff *skb)
+{
+ void *data = (void *)(long)skb->data;
+ void *data_end = (void *)(long)skb->data_end;
+
+ if (check_pkt(data, data_end, MARK_SKB))
+ pkts_seen_tc++;
+
+ /* Will be either recycled or freed, %MARK_SKB makes sure it won't
+ * hit any of the counters above.
+ */
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
new file mode 100644
index 000000000000..67a77944ef29
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c
@@ -0,0 +1,256 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta */
+#include <stddef.h>
+#include <string.h>
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include "test_iptunnel_common.h"
+#include "bpf_kfuncs.h"
+
+#define tcphdr_sz sizeof(struct tcphdr)
+#define udphdr_sz sizeof(struct udphdr)
+#define ethhdr_sz sizeof(struct ethhdr)
+#define iphdr_sz sizeof(struct iphdr)
+#define ipv6hdr_sz sizeof(struct ipv6hdr)
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u64);
+} rxcnt SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, MAX_IPTNL_ENTRIES);
+ __type(key, struct vip);
+ __type(value, struct iptnl_info);
+} vip2tnl SEC(".maps");
+
+static __always_inline void count_tx(__u32 protocol)
+{
+ __u64 *rxcnt_count;
+
+ rxcnt_count = bpf_map_lookup_elem(&rxcnt, &protocol);
+ if (rxcnt_count)
+ *rxcnt_count += 1;
+}
+
+static __always_inline int get_dport(void *trans_data, __u8 protocol)
+{
+ struct tcphdr *th;
+ struct udphdr *uh;
+
+ switch (protocol) {
+ case IPPROTO_TCP:
+ th = (struct tcphdr *)trans_data;
+ return th->dest;
+ case IPPROTO_UDP:
+ uh = (struct udphdr *)trans_data;
+ return uh->dest;
+ default:
+ return 0;
+ }
+}
+
+static __always_inline void set_ethhdr(struct ethhdr *new_eth,
+ const struct ethhdr *old_eth,
+ const struct iptnl_info *tnl,
+ __be16 h_proto)
+{
+ memcpy(new_eth->h_source, old_eth->h_dest, sizeof(new_eth->h_source));
+ memcpy(new_eth->h_dest, tnl->dmac, sizeof(new_eth->h_dest));
+ new_eth->h_proto = h_proto;
+}
+
+static __always_inline int handle_ipv4(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + iphdr_sz + ethhdr_sz];
+ __u8 iph_buffer_tcp[iphdr_sz + tcphdr_sz];
+ __u8 iph_buffer_udp[iphdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct iphdr *iph;
+ __u16 *next_iph;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+ __u32 csum = 0;
+ int i;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(iph_buffer_tcp, 0, sizeof(iph_buffer_tcp));
+ __builtin_memset(iph_buffer_udp, 0, sizeof(iph_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_udp, sizeof(iph_buffer_udp));
+ else
+ iph = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, iph_buffer_tcp, sizeof(iph_buffer_tcp));
+
+ if (!iph)
+ return XDP_DROP;
+
+ dport = get_dport(iph + 1, iph->protocol);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = iph->protocol;
+ vip.family = AF_INET;
+ vip.daddr.v4 = iph->daddr;
+ vip.dport = dport;
+ payload_len = bpf_ntohs(iph->tot_len);
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v4-in-v4 */
+ if (!tnl || tnl->family != AF_INET)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)iphdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ iph = (struct iphdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(iph + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IP));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ iph->version = 4;
+ iph->ihl = iphdr_sz >> 2;
+ iph->frag_off = 0;
+ iph->protocol = IPPROTO_IPIP;
+ iph->check = 0;
+ iph->tos = 0;
+ iph->tot_len = bpf_htons(payload_len + iphdr_sz);
+ iph->daddr = tnl->daddr.v4;
+ iph->saddr = tnl->saddr.v4;
+ iph->ttl = 8;
+
+ next_iph = (__u16 *)iph;
+ for (i = 0; i < iphdr_sz >> 1; i++)
+ csum += *next_iph++;
+
+ iph->check = ~((csum & 0xffff) + (csum >> 16));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+static __always_inline int handle_ipv6(struct xdp_md *xdp, struct bpf_dynptr *xdp_ptr)
+{
+ __u8 eth_buffer[ethhdr_sz + ipv6hdr_sz + ethhdr_sz];
+ __u8 ip6h_buffer_tcp[ipv6hdr_sz + tcphdr_sz];
+ __u8 ip6h_buffer_udp[ipv6hdr_sz + udphdr_sz];
+ struct bpf_dynptr new_xdp_ptr;
+ struct iptnl_info *tnl;
+ struct ethhdr *new_eth;
+ struct ethhdr *old_eth;
+ struct ipv6hdr *ip6h;
+ __u16 payload_len;
+ struct vip vip = {};
+ int dport;
+
+ __builtin_memset(eth_buffer, 0, sizeof(eth_buffer));
+ __builtin_memset(ip6h_buffer_tcp, 0, sizeof(ip6h_buffer_tcp));
+ __builtin_memset(ip6h_buffer_udp, 0, sizeof(ip6h_buffer_udp));
+
+ if (ethhdr_sz + iphdr_sz + tcphdr_sz > xdp->data_end - xdp->data)
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_udp, sizeof(ip6h_buffer_udp));
+ else
+ ip6h = bpf_dynptr_slice(xdp_ptr, ethhdr_sz, ip6h_buffer_tcp, sizeof(ip6h_buffer_tcp));
+
+ if (!ip6h)
+ return XDP_DROP;
+
+ dport = get_dport(ip6h + 1, ip6h->nexthdr);
+ if (dport == -1)
+ return XDP_DROP;
+
+ vip.protocol = ip6h->nexthdr;
+ vip.family = AF_INET6;
+ memcpy(vip.daddr.v6, ip6h->daddr.s6_addr32, sizeof(vip.daddr));
+ vip.dport = dport;
+ payload_len = ip6h->payload_len;
+
+ tnl = bpf_map_lookup_elem(&vip2tnl, &vip);
+ /* It only does v6-in-v6 */
+ if (!tnl || tnl->family != AF_INET6)
+ return XDP_PASS;
+
+ if (bpf_xdp_adjust_head(xdp, 0 - (int)ipv6hdr_sz))
+ return XDP_DROP;
+
+ bpf_dynptr_from_xdp(xdp, 0, &new_xdp_ptr);
+ new_eth = bpf_dynptr_slice_rdwr(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer));
+ if (!new_eth)
+ return XDP_DROP;
+
+ ip6h = (struct ipv6hdr *)(new_eth + 1);
+ old_eth = (struct ethhdr *)(ip6h + 1);
+
+ set_ethhdr(new_eth, old_eth, tnl, bpf_htons(ETH_P_IPV6));
+
+ if (new_eth == eth_buffer)
+ bpf_dynptr_write(&new_xdp_ptr, 0, eth_buffer, sizeof(eth_buffer), 0);
+
+ ip6h->version = 6;
+ ip6h->priority = 0;
+ memset(ip6h->flow_lbl, 0, sizeof(ip6h->flow_lbl));
+ ip6h->payload_len = bpf_htons(bpf_ntohs(payload_len) + ipv6hdr_sz);
+ ip6h->nexthdr = IPPROTO_IPV6;
+ ip6h->hop_limit = 8;
+ memcpy(ip6h->saddr.s6_addr32, tnl->saddr.v6, sizeof(tnl->saddr.v6));
+ memcpy(ip6h->daddr.s6_addr32, tnl->daddr.v6, sizeof(tnl->daddr.v6));
+
+ count_tx(vip.protocol);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int _xdp_tx_iptunnel(struct xdp_md *xdp)
+{
+ __u8 buffer[ethhdr_sz];
+ struct bpf_dynptr ptr;
+ struct ethhdr *eth;
+ __u16 h_proto;
+
+ __builtin_memset(buffer, 0, sizeof(buffer));
+
+ bpf_dynptr_from_xdp(xdp, 0, &ptr);
+ eth = bpf_dynptr_slice(&ptr, 0, buffer, sizeof(buffer));
+ if (!eth)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return handle_ipv4(xdp, &ptr);
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+
+ return handle_ipv6(xdp, &ptr);
+ else
+ return XDP_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_link.c b/tools/testing/selftests/bpf/progs/test_xdp_link.c
new file mode 100644
index 000000000000..64ff32eaae92
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_link.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+SEC("xdp")
+int xdp_handler(struct xdp_md *xdp)
+{
+ return 0;
+}
+
+SEC("tc")
+int tc_handler(struct __sk_buff *skb)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_loop.c b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
index fcabcda30ba3..93267a68825b 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_loop.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_loop.c
@@ -15,8 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
#include "test_iptunnel_common.h"
-
-int _version SEC("version") = 1;
+#include "bpf_compiler.h"
struct {
__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
@@ -135,7 +134,7 @@ static __always_inline int handle_ipv4(struct xdp_md *xdp)
iph->ttl = 8;
next_iph = (__u16 *)iph;
-#pragma clang loop unroll(disable)
+ __pragma_loop_no_unroll
for (i = 0; i < sizeof(*iph) >> 1; i++)
csum += *next_iph++;
@@ -206,7 +205,7 @@ static __always_inline int handle_ipv6(struct xdp_md *xdp)
return XDP_TX;
}
-SEC("xdp_tx_iptunnel")
+SEC("xdp")
int _xdp_tx_iptunnel(struct xdp_md *xdp)
{
void *data_end = (void *)(long)xdp->data_end;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
index 8beecec166d9..5c7e4758a0ca 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_noinline.c
@@ -15,8 +15,9 @@
#include <linux/udp.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
-static __u32 rol32(__u32 word, unsigned int shift)
+static __always_inline __u32 rol32(__u32 word, unsigned int shift)
{
return (word << shift) | (word >> ((-shift) & 31));
}
@@ -49,7 +50,7 @@ static __u32 rol32(__u32 word, unsigned int shift)
typedef unsigned int u32;
-static __attribute__ ((noinline))
+static __noinline
u32 jhash(const void *key, u32 length, u32 initval)
{
u32 a, b, c;
@@ -86,7 +87,7 @@ u32 jhash(const void *key, u32 length, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
{
a += initval;
@@ -96,7 +97,7 @@ u32 __jhash_nwords(u32 a, u32 b, u32 c, u32 initval)
return c;
}
-__attribute__ ((noinline))
+__noinline
u32 jhash_2words(u32 a, u32 b, u32 initval)
{
return __jhash_nwords(a, b, 0, initval + JHASH_INITVAL + (2 << 2));
@@ -213,7 +214,7 @@ struct eth_hdr {
unsigned short eth_proto;
};
-static inline __u64 calc_offset(bool is_ipv6, bool is_icmp)
+static __noinline __u64 calc_offset(bool is_ipv6, bool is_icmp)
{
__u64 off = sizeof(struct eth_hdr);
if (is_ipv6) {
@@ -239,7 +240,7 @@ bool parse_udp(void *data, void *data_end,
udp = data + off;
if (udp + 1 > data_end)
- return 0;
+ return false;
if (!is_icmp) {
pckt->flow.port16[0] = udp->source;
pckt->flow.port16[1] = udp->dest;
@@ -247,7 +248,7 @@ bool parse_udp(void *data, void *data_end,
pckt->flow.port16[0] = udp->dest;
pckt->flow.port16[1] = udp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -261,7 +262,7 @@ bool parse_tcp(void *data, void *data_end,
tcp = data + off;
if (tcp + 1 > data_end)
- return 0;
+ return false;
if (tcp->syn)
pckt->flags |= (1 << 1);
if (!is_icmp) {
@@ -271,7 +272,7 @@ bool parse_tcp(void *data, void *data_end,
pckt->flow.port16[0] = tcp->dest;
pckt->flow.port16[1] = tcp->source;
}
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -287,7 +288,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
void *data;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct ipv6hdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -295,7 +296,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct ipv6hdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || ip6h + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 56710;
@@ -314,7 +315,7 @@ bool encap_v6(struct xdp_md *xdp, struct ctl_value *cval,
ip6h->saddr.in6_u.u6_addr32[2] = 3;
ip6h->saddr.in6_u.u6_addr32[3] = ip_suffix;
memcpy(ip6h->daddr.in6_u.u6_addr32, dst->dstv6, 16);
- return 1;
+ return true;
}
static __attribute__ ((noinline))
@@ -335,7 +336,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
ip_suffix <<= 15;
ip_suffix ^= pckt->flow.src;
if (bpf_xdp_adjust_head(xdp, 0 - (int)sizeof(struct iphdr)))
- return 0;
+ return false;
data = (void *)(long)xdp->data;
data_end = (void *)(long)xdp->data_end;
new_eth = data;
@@ -343,7 +344,7 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
old_eth = data + sizeof(struct iphdr);
if (new_eth + 1 > data_end ||
old_eth + 1 > data_end || iph + 1 > data_end)
- return 0;
+ return false;
memcpy(new_eth->eth_dest, cval->mac, 6);
memcpy(new_eth->eth_source, old_eth->eth_dest, 6);
new_eth->eth_proto = 8;
@@ -362,52 +363,13 @@ bool encap_v4(struct xdp_md *xdp, struct ctl_value *cval,
iph->ttl = 4;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool decap_v6(struct xdp_md *xdp, void **data, void **data_end, bool inner_v4)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct ipv6hdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- if (inner_v4)
- new_eth->eth_proto = 8;
- else
- new_eth->eth_proto = 56710;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct ipv6hdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
-}
-
-static __attribute__ ((noinline))
-bool decap_v4(struct xdp_md *xdp, void **data, void **data_end)
-{
- struct eth_hdr *new_eth;
- struct eth_hdr *old_eth;
-
- old_eth = *data;
- new_eth = *data + sizeof(struct iphdr);
- memcpy(new_eth->eth_source, old_eth->eth_source, 6);
- memcpy(new_eth->eth_dest, old_eth->eth_dest, 6);
- new_eth->eth_proto = 8;
- if (bpf_xdp_adjust_head(xdp, (int)sizeof(struct iphdr)))
- return 0;
- *data = (void *)(long)xdp->data;
- *data_end = (void *)(long)xdp->data_end;
- return 1;
+ return false;
+ return true;
}
static __attribute__ ((noinline))
@@ -430,7 +392,6 @@ int send_icmp_reply(void *data, void *data_end)
__u16 *next_iph_u16;
__u32 tmp_addr = 0;
struct iphdr *iph;
- __u32 csum1 = 0;
__u32 csum = 0;
__u64 off = 0;
@@ -449,7 +410,7 @@ int send_icmp_reply(void *data, void *data_end)
iph->saddr = tmp_addr;
iph->check = 0;
next_iph_u16 = (__u16 *) iph;
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (int i = 0; i < sizeof(struct iphdr) >> 1; i++)
csum += *next_iph_u16++;
iph->check = ~((csum & 0xffff) + (csum >> 16));
@@ -564,22 +525,22 @@ static bool get_packet_dst(struct real_definition **real,
hash = get_packet_hash(pckt, hash_16bytes);
if (hash != 0x358459b7 /* jhash of ipv4 packet */ &&
hash != 0x2f4bc6bb /* jhash of ipv6 packet */)
- return 0;
+ return false;
key = 2 * vip_info->vip_num + hash % 2;
real_pos = bpf_map_lookup_elem(&ch_rings, &key);
if (!real_pos)
- return 0;
+ return false;
key = *real_pos;
*real = bpf_map_lookup_elem(&reals, &key);
if (!(*real))
- return 0;
+ return false;
if (!(vip_info->flags & (1 << 1))) {
__u32 conn_rate_key = 512 + 2;
struct lb_stats *conn_rate_stats =
bpf_map_lookup_elem(&stats, &conn_rate_key);
if (!conn_rate_stats)
- return 1;
+ return true;
cur_time = bpf_ktime_get_ns();
if ((cur_time - conn_rate_stats->v2) >> 32 > 0xffFFFF) {
conn_rate_stats->v1 = 1;
@@ -587,14 +548,14 @@ static bool get_packet_dst(struct real_definition **real,
} else {
conn_rate_stats->v1 += 1;
if (conn_rate_stats->v1 >= 1)
- return 1;
+ return true;
}
if (pckt->flow.proto == IPPROTO_UDP)
new_dst_lru.atime = cur_time;
new_dst_lru.pos = key;
bpf_map_update_elem(lru_map, &pckt->flow, &new_dst_lru, 0);
}
- return 1;
+ return true;
}
__attribute__ ((noinline))
@@ -662,7 +623,6 @@ static int process_l3_headers_v4(struct packet_description *pckt,
void *data_end)
{
struct iphdr *iph;
- __u64 iph_len;
int action;
iph = data + off;
@@ -696,7 +656,6 @@ static int process_packet(void *data, __u64 off, void *data_end,
struct packet_description pckt = { };
struct vip_definition vip = { };
struct lb_stats *data_stats;
- struct eth_hdr *eth = data;
void *lru_map = &lru_cache;
struct vip_meta *vip_info;
__u32 lru_stats_key = 513;
@@ -704,7 +663,6 @@ static int process_packet(void *data, __u64 off, void *data_end,
__u32 stats_key = 512;
struct ctl_value *cval;
__u16 pkt_bytes;
- __u64 iph_len;
__u8 protocol;
__u32 vip_num;
int action;
@@ -797,8 +755,8 @@ out:
return XDP_DROP;
}
-__attribute__ ((section("xdp-test"), used))
-int balancer_ingress(struct xdp_md *ctx)
+SEC("xdp")
+int balancer_ingress_v4(struct xdp_md *ctx)
{
void *data = (void *)(long)ctx->data;
void *data_end = (void *)(long)ctx->data_end;
@@ -812,11 +770,27 @@ int balancer_ingress(struct xdp_md *ctx)
eth_proto = bpf_ntohs(eth->eth_proto);
if (eth_proto == ETH_P_IP)
return process_packet(data, nh_off, data_end, 0, ctx);
- else if (eth_proto == ETH_P_IPV6)
+ else
+ return XDP_DROP;
+}
+
+SEC("xdp")
+int balancer_ingress_v6(struct xdp_md *ctx)
+{
+ void *data = (void *)(long)ctx->data;
+ void *data_end = (void *)(long)ctx->data_end;
+ struct eth_hdr *eth = data;
+ __u32 eth_proto;
+ __u32 nh_off;
+
+ nh_off = sizeof(struct eth_hdr);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+ eth_proto = bpf_ntohs(eth->eth_proto);
+ if (eth_proto == ETH_P_IPV6)
return process_packet(data, nh_off, data_end, 1, ctx);
else
return XDP_DROP;
}
-char _license[] __attribute__ ((section("license"), used)) = "GPL";
-int _version __attribute__ ((section("version"), used)) = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
index a5337cd9400b..b778cad45485 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_redirect.c
@@ -12,8 +12,6 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-int _version SEC("version") = 1;
-
SEC("redirect_to_111")
int xdp_redirect_to_111(struct xdp_md *xdp)
{
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
new file mode 100644
index 000000000000..2a3496d8e327
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_update_frags.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp.frags")
+int xdp_adjust_frags(struct xdp_md *xdp)
+{
+ __u8 *data_end = (void *)(long)xdp->data_end;
+ __u8 *data = (void *)(long)xdp->data;
+ __u8 val[16] = {};
+ __u32 offset;
+ int err;
+
+ if (data + sizeof(__u32) > data_end)
+ return XDP_DROP;
+
+ offset = *(__u32 *)data;
+ err = bpf_xdp_load_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ if (val[0] != 0xaa || val[15] != 0xaa) /* marker */
+ return XDP_DROP;
+
+ val[0] = 0xbb; /* update the marker */
+ val[15] = 0xbb;
+ err = bpf_xdp_store_bytes(xdp, offset, val, sizeof(val));
+ if (err < 0)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
index 134768f6b788..f3ec8086482d 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_vlan.c
@@ -98,7 +98,7 @@ bool parse_eth_frame(struct ethhdr *eth, void *data_end, struct parse_pkt *pkt)
return true;
}
-/* Hint, VLANs are choosen to hit network-byte-order issues */
+/* Hint, VLANs are chosen to hit network-byte-order issues */
#define TESTVLAN 4011 /* 0xFAB */
// #define TO_VLAN 4000 /* 0xFA0 (hint 0xOA0 = 160) */
@@ -195,7 +195,7 @@ int xdp_prognum2(struct xdp_md *ctx)
/* Moving Ethernet header, dest overlap with src, memmove handle this */
dest = data;
- dest+= VLAN_HDR_SZ;
+ dest += VLAN_HDR_SZ;
/*
* Notice: Taking over vlan_hdr->h_vlan_encapsulated_proto, by
* only moving two MAC addrs (12 bytes), not overwriting last 2 bytes
@@ -210,19 +210,6 @@ int xdp_prognum2(struct xdp_md *ctx)
}
static __always_inline
-void shift_mac_4bytes_16bit(void *data)
-{
- __u16 *p = data;
-
- p[7] = p[5]; /* delete p[7] was vlan_hdr->h_vlan_TCI */
- p[6] = p[4]; /* delete p[6] was ethhdr->h_proto */
- p[5] = p[3];
- p[4] = p[2];
- p[3] = p[1];
- p[2] = p[0];
-}
-
-static __always_inline
void shift_mac_4bytes_32bit(void *data)
{
__u32 *p = data;
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
new file mode 100644
index 000000000000..97ed625bb70a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp/cpumap")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
new file mode 100644
index 000000000000..20ec6723df18
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_cpumap_helpers.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+#define IFINDEX_LO 1
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 4);
+} cpu_map SEC(".maps");
+
+SEC("xdp")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&cpu_map, 1, 0);
+}
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp/cpumap")
+int xdp_dummy_cm(struct xdp_md *ctx)
+{
+ if (ctx->ingress_ifindex == IFINDEX_LO)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/cpumap")
+int xdp_dummy_cm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
new file mode 100644
index 000000000000..cdcf7de7ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_frags_helpers.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp/devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
index 0ac086497722..4139a14f9996 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -9,7 +9,7 @@ struct {
__uint(max_entries, 4);
} dm_ports SEC(".maps");
-SEC("xdp_redir")
+SEC("xdp")
int xdp_redir_prog(struct xdp_md *ctx)
{
return bpf_redirect_map(&dm_ports, 1, 0);
@@ -18,7 +18,7 @@ int xdp_redir_prog(struct xdp_md *ctx)
/* invalid program on DEVMAP entry;
* SEC name means expected attach type not set
*/
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
@@ -27,7 +27,7 @@ int xdp_dummy_prog(struct xdp_md *ctx)
/* valid program on DEVMAP entry via SEC name;
* has access to egress and ingress ifindex
*/
-SEC("xdp_devmap/map_prog")
+SEC("xdp/devmap")
int xdp_dummy_dm(struct xdp_md *ctx)
{
char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
@@ -40,4 +40,11 @@ int xdp_dummy_dm(struct xdp_md *ctx)
return XDP_PASS;
}
+
+SEC("xdp.frags/devmap")
+int xdp_dummy_dm_frags(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer.c b/tools/testing/selftests/bpf/progs/timer.c
new file mode 100644
index 000000000000..f615da97df26
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer.c
@@ -0,0 +1,424 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int counter;
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock; /* unused */
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __uint(max_entries, 1000);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} hmap_malloc SEC(".maps");
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct elem);
+} array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LRU_HASH);
+ __uint(max_entries, 4);
+ __type(key, int);
+ __type(value, struct elem);
+} lru SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} abs_timer SEC(".maps"), soft_timer_pinned SEC(".maps"), abs_timer_pinned SEC(".maps"),
+ race_array SEC(".maps");
+
+__u64 bss_data;
+__u64 abs_data;
+__u64 err;
+__u64 ok;
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+__u64 pinned_callback_check;
+__s32 pinned_cpu;
+
+#define ARRAY 1
+#define HTAB 2
+#define HTAB_MALLOC 3
+#define LRU 4
+
+/* callback for array and lru timers */
+static int timer_cb1(void *map, int *key, struct bpf_timer *timer)
+{
+ /* increment bss variable twice.
+ * Once via array timer callback and once via lru timer callback
+ */
+ bss_data += 5;
+
+ /* *key == 0 - the callback was called for array timer.
+ * *key == 4 - the callback was called from lru timer.
+ */
+ if (*key == ARRAY) {
+ struct bpf_timer *lru_timer;
+ int lru_key = LRU;
+
+ /* rearm array timer to be called again in ~35 seconds */
+ if (bpf_timer_start(timer, 1ull << 35, 0) != 0)
+ err |= 1;
+
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_set_callback(lru_timer, timer_cb1);
+ if (bpf_timer_start(lru_timer, 0, 0) != 0)
+ err |= 2;
+ } else if (*key == LRU) {
+ int lru_key, i;
+
+ for (i = LRU + 1;
+ i <= 100 /* for current LRU eviction algorithm this number
+ * should be larger than ~ lru->max_entries * 2
+ */;
+ i++) {
+ struct elem init = {};
+
+ /* lru_key cannot be used as loop induction variable
+ * otherwise the loop will be unbounded.
+ */
+ lru_key = i;
+
+ /* add more elements into lru map to push out current
+ * element and force deletion of this timer
+ */
+ bpf_map_update_elem(map, &lru_key, &init, 0);
+ /* look it up to bump it into active list */
+ bpf_map_lookup_elem(map, &lru_key);
+
+ /* keep adding until *key changes underneath,
+ * which means that key/timer memory was reused
+ */
+ if (*key != LRU)
+ break;
+ }
+
+ /* check that the timer was removed */
+ if (bpf_timer_cancel(timer) != -EINVAL)
+ err |= 4;
+ ok |= 1;
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG2(test1, int, a)
+{
+ struct bpf_timer *arr_timer, *lru_timer;
+ struct elem init = {};
+ int lru_key = LRU;
+ int array_key = ARRAY;
+
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+
+ bpf_map_update_elem(&lru, &lru_key, &init, 0);
+ lru_timer = bpf_map_lookup_elem(&lru, &lru_key);
+ if (!lru_timer)
+ return 0;
+ bpf_timer_init(lru_timer, &lru, CLOCK_MONOTONIC);
+
+ bpf_timer_set_callback(arr_timer, timer_cb1);
+ bpf_timer_start(arr_timer, 0 /* call timer_cb1 asap */, 0);
+
+ /* init more timers to check that array destruction
+ * doesn't leak timer memory.
+ */
+ array_key = 0;
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ bpf_timer_init(arr_timer, &array, CLOCK_MONOTONIC);
+ return 0;
+}
+
+/* callback for prealloc and non-prealloca hashtab timers */
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ if (*key == HTAB)
+ callback_check--;
+ else
+ callback2_check--;
+ if (val->counter > 0 && --val->counter) {
+ /* re-arm the timer again to execute after 1 usec */
+ bpf_timer_start(&val->timer, 1000, 0);
+ } else if (*key == HTAB) {
+ struct bpf_timer *arr_timer;
+ int array_key = ARRAY;
+
+ /* cancel arr_timer otherwise bpf_fentry_test1 prog
+ * will stay alive forever.
+ */
+ arr_timer = bpf_map_lookup_elem(&array, &array_key);
+ if (!arr_timer)
+ return 0;
+ if (bpf_timer_cancel(arr_timer) != 1)
+ /* bpf_timer_cancel should return 1 to indicate
+ * that arr_timer was active at this time
+ */
+ err |= 8;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 16;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ /* in preallocated hashmap both 'key' and 'val' could have been
+ * reused to store another map element (like in LRU above),
+ * but in controlled test environment the below test works.
+ * It's not a use-after-free. The memory is owned by the map.
+ */
+ if (bpf_timer_start(&val->timer, 1000, 0) != -EINVAL)
+ err |= 32;
+ ok |= 2;
+ } else {
+ if (*key != HTAB_MALLOC)
+ err |= 64;
+
+ /* try to cancel ourself. It shouldn't deadlock. */
+ if (bpf_timer_cancel(&val->timer) != -EDEADLK)
+ err |= 128;
+
+ /* delete this key and this timer anyway.
+ * It shouldn't deadlock either.
+ */
+ bpf_map_delete_elem(map, key);
+
+ ok |= 4;
+ }
+ return 0;
+}
+
+int bpf_timer_test(void)
+{
+ struct hmap_elem *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME) != 0)
+ err |= 512;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val) {
+ if (bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME) != 0)
+ err |= 1024;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ bpf_timer_start(&val->timer, 1000, 0);
+ }
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test2")
+int BPF_PROG2(test2, int, a, int, b)
+{
+ struct hmap_elem init = {}, *val;
+ int key = HTAB, key_malloc = HTAB_MALLOC;
+
+ init.counter = 10; /* number of times to trigger timer_cb2 */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ /* update the same key to free the timer */
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+
+ /* init more timers to check that htab operations
+ * don't leak timer memory.
+ */
+ key = 0;
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap, &key);
+ bpf_map_update_elem(&hmap, &key, &init, 0);
+ val = bpf_map_lookup_elem(&hmap, &key);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap, CLOCK_BOOTTIME);
+
+ /* and with non-prealloc htab */
+ key_malloc = 0;
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+ bpf_map_delete_elem(&hmap_malloc, &key_malloc);
+ bpf_map_update_elem(&hmap_malloc, &key_malloc, &init, 0);
+ val = bpf_map_lookup_elem(&hmap_malloc, &key_malloc);
+ if (val)
+ bpf_timer_init(&val->timer, &hmap_malloc, CLOCK_BOOTTIME);
+
+ return bpf_timer_test();
+}
+
+/* callback for absolute timer */
+static int timer_cb3(void *map, int *key, struct bpf_timer *timer)
+{
+ abs_data += 6;
+
+ if (abs_data < 12) {
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ } else {
+ /* Re-arm timer ~35 seconds in future */
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + (1ull << 35),
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test3")
+int BPF_PROG2(test3, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ bpf_printk("test3");
+
+ timer = bpf_map_lookup_elem(&abs_timer, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, &abs_timer, CLOCK_BOOTTIME) != 0)
+ err |= 2048;
+ bpf_timer_set_callback(timer, timer_cb3);
+ bpf_timer_start(timer, bpf_ktime_get_boot_ns() + 1000,
+ BPF_F_TIMER_ABS);
+ }
+
+ return 0;
+}
+
+/* callback for pinned timer */
+static int timer_cb_pinned(void *map, int *key, struct bpf_timer *timer)
+{
+ __s32 cpu = bpf_get_smp_processor_id();
+
+ if (cpu != pinned_cpu)
+ err |= 16384;
+
+ pinned_callback_check++;
+ return 0;
+}
+
+static void test_pinned_timer(bool soft)
+{
+ int key = 0;
+ void *map;
+ struct bpf_timer *timer;
+ __u64 flags = BPF_F_TIMER_CPU_PIN;
+ __u64 start_time;
+
+ if (soft) {
+ map = &soft_timer_pinned;
+ start_time = 0;
+ } else {
+ map = &abs_timer_pinned;
+ start_time = bpf_ktime_get_boot_ns();
+ flags |= BPF_F_TIMER_ABS;
+ }
+
+ timer = bpf_map_lookup_elem(map, &key);
+ if (timer) {
+ if (bpf_timer_init(timer, map, CLOCK_BOOTTIME) != 0)
+ err |= 4096;
+ bpf_timer_set_callback(timer, timer_cb_pinned);
+ pinned_cpu = bpf_get_smp_processor_id();
+ bpf_timer_start(timer, start_time + 1000, flags);
+ } else {
+ err |= 8192;
+ }
+}
+
+SEC("fentry/bpf_fentry_test4")
+int BPF_PROG2(test4, int, a)
+{
+ bpf_printk("test4");
+ test_pinned_timer(true);
+
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test5")
+int BPF_PROG2(test5, int, a)
+{
+ bpf_printk("test5");
+ test_pinned_timer(false);
+
+ return 0;
+}
+
+static int race_timer_callback(void *race_array, int *race_key, struct bpf_timer *timer)
+{
+ bpf_timer_start(timer, 1000000, 0);
+ return 0;
+}
+
+SEC("syscall")
+int race(void *ctx)
+{
+ struct bpf_timer *timer;
+ int err, race_key = 0;
+ struct elem init;
+
+ __builtin_memset(&init, 0, sizeof(struct elem));
+ bpf_map_update_elem(&race_array, &race_key, &init, BPF_ANY);
+
+ timer = bpf_map_lookup_elem(&race_array, &race_key);
+ if (!timer)
+ return 1;
+
+ err = bpf_timer_init(timer, &race_array, CLOCK_MONOTONIC);
+ if (err && err != -EBUSY)
+ return 1;
+
+ bpf_timer_set_callback(timer, race_timer_callback);
+ bpf_timer_start(timer, 0, 0);
+ bpf_timer_cancel(timer);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_crash.c b/tools/testing/selftests/bpf/progs/timer_crash.c
new file mode 100644
index 000000000000..f8f7944e70da
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_crash.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct map_elem {
+ struct bpf_timer timer;
+ struct bpf_spin_lock lock;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} amap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct map_elem);
+} hmap SEC(".maps");
+
+int pid = 0;
+int crash_map = 0; /* 0 for amap, 1 for hmap */
+
+SEC("fentry/do_nanosleep")
+int sys_enter(void *ctx)
+{
+ struct map_elem *e, value = {};
+ void *map = crash_map ? (void *)&hmap : (void *)&amap;
+
+ if (bpf_get_current_task_btf()->tgid != pid)
+ return 0;
+
+ *(void **)&value = (void *)0xdeadcaf3;
+
+ bpf_map_update_elem(map, &(int){0}, &value, 0);
+ /* For array map, doing bpf_map_update_elem will do a
+ * check_and_free_timer_in_array, which will trigger the crash if timer
+ * pointer was overwritten, for hmap we need to use bpf_timer_cancel.
+ */
+ if (crash_map == 1) {
+ e = bpf_map_lookup_elem(map, &(int){0});
+ if (!e)
+ return 0;
+ bpf_timer_cancel(&e->timer);
+ }
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c
new file mode 100644
index 000000000000..0996c2486f05
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_failure.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct elem {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct elem);
+} timer_map SEC(".maps");
+
+__naked __noinline __used
+static unsigned long timer_cb_ret_bad()
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* async callback is expected to return 0, so branch above
+ * skipping r0 = 0; should lead to a failure, but if exit
+ * instruction doesn't enforce r0's precision, this callback
+ * will be successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
+
+SEC("fentry/bpf_fentry_test1")
+__log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+__failure
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before")
+__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */
+/* check that branch code path marks r0 as precise */
+__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7")
+__msg("should have been in [0, 0]")
+long BPF_PROG2(test_bad_ret, int, a)
+{
+ int key = 0;
+ struct bpf_timer *timer;
+
+ timer = bpf_map_lookup_elem(&timer_map, &key);
+ if (timer) {
+ bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME);
+ bpf_timer_set_callback(timer, timer_cb_ret_bad);
+ bpf_timer_start(timer, 1000, 0);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim.c b/tools/testing/selftests/bpf/progs/timer_mim.c
new file mode 100644
index 000000000000..2fee7ab105ef
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+static int timer_cb1(void *map, int *key, struct hmap_elem *val);
+
+static int timer_cb2(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb1);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 1;
+ ok |= 1;
+ return 0;
+}
+
+/* callback for inner hash map */
+static int timer_cb1(void *map, int *key, struct hmap_elem *val)
+{
+ cnt++;
+ bpf_timer_set_callback(&val->timer, timer_cb2);
+ if (bpf_timer_start(&val->timer, 1000, 0))
+ err |= 2;
+ /* Do a lookup to make sure 'map' and 'key' pointers are correct */
+ bpf_map_lookup_elem(map, key);
+ ok |= 2;
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb1))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/timer_mim_reject.c b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
new file mode 100644
index 000000000000..5d648e3d8a41
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/timer_mim_reject.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/bpf.h>
+#include <time.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_tcp_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+struct hmap_elem {
+ int pad; /* unused */
+ struct bpf_timer timer;
+};
+
+struct inner_map {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1024);
+ __type(key, int);
+ __type(value, struct hmap_elem);
+} inner_htab SEC(".maps");
+
+#define ARRAY_KEY 1
+#define ARRAY_KEY2 2
+#define HASH_KEY 1234
+
+struct outer_arr {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __array(values, struct inner_map);
+} outer_arr SEC(".maps") = {
+ .values = { [ARRAY_KEY] = &inner_htab },
+};
+
+__u64 err;
+__u64 ok;
+__u64 cnt;
+
+/* callback for inner hash map */
+static int timer_cb(void *map, int *key, struct hmap_elem *val)
+{
+ return 0;
+}
+
+SEC("fentry/bpf_fentry_test1")
+int BPF_PROG(test1, int a)
+{
+ struct hmap_elem init = {};
+ struct bpf_map *inner_map, *inner_map2;
+ struct hmap_elem *val;
+ int array_key = ARRAY_KEY;
+ int array_key2 = ARRAY_KEY2;
+ int hash_key = HASH_KEY;
+
+ inner_map = bpf_map_lookup_elem(&outer_arr, &array_key);
+ if (!inner_map)
+ return 0;
+
+ inner_map2 = bpf_map_lookup_elem(&outer_arr, &array_key2);
+ if (!inner_map2)
+ return 0;
+ bpf_map_update_elem(inner_map, &hash_key, &init, 0);
+ val = bpf_map_lookup_elem(inner_map, &hash_key);
+ if (!val)
+ return 0;
+
+ bpf_timer_init(&val->timer, inner_map2, CLOCK_MONOTONIC);
+ if (bpf_timer_set_callback(&val->timer, timer_cb))
+ err |= 4;
+ if (bpf_timer_start(&val->timer, 0, 0))
+ err |= 8;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c
new file mode 100644
index 000000000000..e4d59b6ba743
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/token_lsm.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int my_pid;
+bool reject_capable;
+bool reject_cmd;
+
+SEC("lsm/bpf_token_capable")
+int BPF_PROG(token_capable, struct bpf_token *token, int cap)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_capable)
+ return -1;
+ return 0;
+}
+
+SEC("lsm/bpf_token_cmd")
+int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd)
+{
+ if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32))
+ return 0;
+ if (reject_cmd)
+ return -1;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
new file mode 100644
index 000000000000..00a4be9d3074
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_dummy_st_ops.c
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+int val = 0;
+
+SEC("fentry/test_1")
+int BPF_PROG(fentry_test_1, __u64 *st_ops_ctx)
+{
+ __u64 state;
+
+ /* Read the traced st_ops arg1 which is a pointer */
+ bpf_probe_read_kernel(&state, sizeof(__u64), (void *)st_ops_ctx);
+ /* Read state->val */
+ bpf_probe_read_kernel(&val, sizeof(__u32), (void *)state);
+
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trace_printk.c b/tools/testing/selftests/bpf/progs/trace_printk.c
new file mode 100644
index 000000000000..6695478c2b25
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_printk.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020, Oracle and/or its affiliates.
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int trace_printk_ret = 0;
+int trace_printk_ran = 0;
+
+const char fmt[] = "Testing,testing %d\n";
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_enter(void *ctx)
+{
+ trace_printk_ret = bpf_trace_printk(fmt, sizeof(fmt),
+ ++trace_printk_ran);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/trace_vprintk.c b/tools/testing/selftests/bpf/progs/trace_vprintk.c
new file mode 100644
index 000000000000..969306cd4f33
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trace_vprintk.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+int null_data_vprintk_ret = 0;
+int trace_vprintk_ret = 0;
+int trace_vprintk_ran = 0;
+
+SEC("fentry/" SYS_PREFIX "sys_nanosleep")
+int sys_enter(void *ctx)
+{
+ static const char one[] = "1";
+ static const char three[] = "3";
+ static const char five[] = "5";
+ static const char seven[] = "7";
+ static const char nine[] = "9";
+ static const char f[] = "%pS\n";
+
+ /* runner doesn't search for \t, just ensure it compiles */
+ bpf_printk("\t");
+
+ trace_vprintk_ret = __bpf_vprintk("%s,%d,%s,%d,%s,%d,%s,%d,%s,%d %d\n",
+ one, 2, three, 4, five, 6, seven, 8, nine, 10, ++trace_vprintk_ran);
+
+ /* non-NULL fmt w/ NULL data should result in error */
+ null_data_vprintk_ret = bpf_trace_vprintk(f, sizeof(f), NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_failure.c b/tools/testing/selftests/bpf/progs/tracing_failure.c
new file mode 100644
index 000000000000..d41665d2ec8c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_failure.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("?fentry/bpf_spin_lock")
+int BPF_PROG(test_spin_lock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
+
+SEC("?fentry/bpf_spin_unlock")
+int BPF_PROG(test_spin_unlock, struct bpf_spin_lock *lock)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/tracing_struct.c b/tools/testing/selftests/bpf/progs/tracing_struct.c
new file mode 100644
index 000000000000..515daef3c84b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/tracing_struct.c
@@ -0,0 +1,187 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+struct bpf_testmod_struct_arg_1 {
+ int a;
+};
+struct bpf_testmod_struct_arg_2 {
+ long a;
+ long b;
+};
+
+struct bpf_testmod_struct_arg_3 {
+ int a;
+ int b[];
+};
+
+struct bpf_testmod_struct_arg_4 {
+ u64 a;
+ int b;
+};
+
+long t1_a_a, t1_a_b, t1_b, t1_c, t1_ret, t1_nregs;
+__u64 t1_reg0, t1_reg1, t1_reg2, t1_reg3;
+long t2_a, t2_b_a, t2_b_b, t2_c, t2_ret;
+long t3_a, t3_b, t3_c_a, t3_c_b, t3_ret;
+long t4_a_a, t4_b, t4_c, t4_d, t4_e_a, t4_e_b, t4_ret;
+long t5_ret;
+int t6;
+long t7_a, t7_b, t7_c, t7_d, t7_e, t7_f_a, t7_f_b, t7_ret;
+long t8_a, t8_b, t8_c, t8_d, t8_e, t8_f_a, t8_f_b, t8_g, t8_ret;
+
+
+SEC("fentry/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_1, struct bpf_testmod_struct_arg_2, a, int, b, int, c)
+{
+ t1_a_a = a.a;
+ t1_a_b = a.b;
+ t1_b = b;
+ t1_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_1")
+int BPF_PROG2(test_struct_arg_2, struct bpf_testmod_struct_arg_2, a, int, b, int, c, int, ret)
+{
+ t1_nregs = bpf_get_func_arg_cnt(ctx);
+ /* a.a */
+ bpf_get_func_arg(ctx, 0, &t1_reg0);
+ /* a.b */
+ bpf_get_func_arg(ctx, 1, &t1_reg1);
+ /* b */
+ bpf_get_func_arg(ctx, 2, &t1_reg2);
+ t1_reg2 = (int)t1_reg2;
+ /* c */
+ bpf_get_func_arg(ctx, 3, &t1_reg3);
+ t1_reg3 = (int)t1_reg3;
+
+ t1_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_3, int, a, struct bpf_testmod_struct_arg_2, b, int, c)
+{
+ t2_a = a;
+ t2_b_a = b.a;
+ t2_b_b = b.b;
+ t2_c = c;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_2")
+int BPF_PROG2(test_struct_arg_4, int, a, struct bpf_testmod_struct_arg_2, b, int, c, int, ret)
+{
+ t2_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_5, int, a, int, b, struct bpf_testmod_struct_arg_2, c)
+{
+ t3_a = a;
+ t3_b = b;
+ t3_c_a = c.a;
+ t3_c_b = c.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_3")
+int BPF_PROG2(test_struct_arg_6, int, a, int, b, struct bpf_testmod_struct_arg_2, c, int, ret)
+{
+ t3_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_7, struct bpf_testmod_struct_arg_1, a, int, b,
+ int, c, int, d, struct bpf_testmod_struct_arg_2, e)
+{
+ t4_a_a = a.a;
+ t4_b = b;
+ t4_c = c;
+ t4_d = d;
+ t4_e_a = e.a;
+ t4_e_b = e.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_4")
+int BPF_PROG2(test_struct_arg_8, struct bpf_testmod_struct_arg_1, a, int, b,
+ int, c, int, d, struct bpf_testmod_struct_arg_2, e, int, ret)
+{
+ t4_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_9)
+{
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_5")
+int BPF_PROG2(test_struct_arg_10, int, ret)
+{
+ t5_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_6")
+int BPF_PROG2(test_struct_arg_11, struct bpf_testmod_struct_arg_3 *, a)
+{
+ t6 = a->b[0];
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_arg_12, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f)
+{
+ t7_a = a;
+ t7_b = (long)b;
+ t7_c = c;
+ t7_d = d;
+ t7_e = (long)e;
+ t7_f_a = f.a;
+ t7_f_b = f.b;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_7")
+int BPF_PROG2(test_struct_arg_13, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, ret)
+{
+ t7_ret = ret;
+ return 0;
+}
+
+SEC("fentry/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_arg_14, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g)
+{
+ t8_a = a;
+ t8_b = (long)b;
+ t8_c = c;
+ t8_d = d;
+ t8_e = (long)e;
+ t8_f_a = f.a;
+ t8_f_b = f.b;
+ t8_g = g;
+ return 0;
+}
+
+SEC("fexit/bpf_testmod_test_struct_arg_8")
+int BPF_PROG2(test_struct_arg_15, __u64, a, void *, b, short, c, int, d,
+ void *, e, struct bpf_testmod_struct_arg_4, f, int, g,
+ int, ret)
+{
+ t8_ret = ret;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
index 8b36b6640e7e..5fda43901033 100644
--- a/tools/testing/selftests/bpf/progs/trigger_bench.c
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -5,6 +5,7 @@
#include <asm/unistd.h>
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
char _license[] SEC("license") = "GPL";
@@ -25,23 +26,65 @@ int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
return 0;
}
-SEC("kprobe/__x64_sys_getpgid")
+SEC("kprobe/" SYS_PREFIX "sys_getpgid")
int bench_trigger_kprobe(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return 0;
}
-SEC("fentry/__x64_sys_getpgid")
+SEC("kretprobe/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("kretprobe.multi/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_kretprobe_multi(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fentry(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return 0;
}
-SEC("fmod_ret/__x64_sys_getpgid")
+SEC("fexit/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_fexit(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fentry.s/" SYS_PREFIX "sys_getpgid")
+int bench_trigger_fentry_sleep(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
+
+SEC("fmod_ret/" SYS_PREFIX "sys_getpgid")
int bench_trigger_fmodret(void *ctx)
{
__sync_add_and_fetch(&hits, 1);
return -22;
}
+
+SEC("uprobe")
+int bench_trigger_uprobe(void *ctx)
+{
+ __sync_add_and_fetch(&hits, 1);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/twfw.c b/tools/testing/selftests/bpf/progs/twfw.c
new file mode 100644
index 000000000000..de1b18a62b46
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/twfw.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#define TWFW_MAX_TIERS (64)
+/*
+ * load is successful
+ * #define TWFW_MAX_TIERS (64u)$
+ */
+
+struct twfw_tier_value {
+ unsigned long mask[1];
+};
+
+struct rule {
+ uint8_t seqnum;
+};
+
+struct rules_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct rule);
+ __uint(max_entries, 1);
+};
+
+struct tiers_map {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, struct twfw_tier_value);
+ __uint(max_entries, 1);
+};
+
+struct rules_map rules SEC(".maps");
+struct tiers_map tiers SEC(".maps");
+
+SEC("cgroup_skb/ingress")
+int twfw_verifier(struct __sk_buff* skb)
+{
+ const uint32_t key = 0;
+ const struct twfw_tier_value* tier = bpf_map_lookup_elem(&tiers, &key);
+ if (!tier)
+ return 1;
+
+ struct rule* rule = bpf_map_lookup_elem(&rules, &key);
+ if (!rule)
+ return 1;
+
+ if (rule && rule->seqnum < TWFW_MAX_TIERS) {
+ /* rule->seqnum / 64 should always be 0 */
+ unsigned long mask = tier->mask[rule->seqnum / 64];
+ if (mask)
+ return 0;
+ }
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/type_cast.c b/tools/testing/selftests/bpf/progs/type_cast.c
new file mode 100644
index 000000000000..9d808b8f4ab0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/type_cast.c
@@ -0,0 +1,79 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_kfuncs.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_TASK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, long);
+} enter_id SEC(".maps");
+
+#define IFNAMSIZ 16
+
+int ifindex, ingress_ifindex;
+char name[IFNAMSIZ];
+unsigned int inum;
+unsigned int meta_len, frag0_len, kskb_len, kskb2_len;
+
+SEC("?xdp")
+int md_xdp(struct xdp_md *ctx)
+{
+ struct xdp_buff *kctx = bpf_cast_to_kern_ctx(ctx);
+ struct net_device *dev;
+
+ dev = kctx->rxq->dev;
+ ifindex = dev->ifindex;
+ inum = dev->nd_net.net->ns.inum;
+ __builtin_memcpy(name, dev->name, IFNAMSIZ);
+ ingress_ifindex = ctx->ingress_ifindex;
+ return XDP_PASS;
+}
+
+SEC("?tc")
+int md_skb(struct __sk_buff *skb)
+{
+ struct sk_buff *kskb = bpf_cast_to_kern_ctx(skb);
+ struct skb_shared_info *shared_info;
+ struct sk_buff *kskb2;
+
+ kskb_len = kskb->len;
+
+ /* Simulate the following kernel macro:
+ * #define skb_shinfo(SKB) ((struct skb_shared_info *)(skb_end_pointer(SKB)))
+ */
+ shared_info = bpf_core_cast(kskb->head + kskb->end, struct skb_shared_info);
+ meta_len = shared_info->meta_len;
+ frag0_len = shared_info->frag_list->len;
+
+ /* kskb2 should be equal to kskb */
+ kskb2 = bpf_core_cast(kskb, typeof(*kskb2));
+ kskb2_len = kskb2->len;
+ return 0;
+}
+
+SEC("?tp_btf/sys_enter")
+int BPF_PROG(untrusted_ptr, struct pt_regs *regs, long id)
+{
+ struct task_struct *task, *task_dup;
+
+ task = bpf_get_current_task_btf();
+ task_dup = bpf_core_cast(task, struct task_struct);
+ (void)bpf_task_storage_get(&enter_id, task_dup, 0, 0);
+ return 0;
+}
+
+SEC("?tracepoint/syscalls/sys_enter_nanosleep")
+int kctx_u64(void *ctx)
+{
+ u64 *kctx = bpf_core_cast(ctx, u64);
+
+ (void)kctx;
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/udp_limit.c b/tools/testing/selftests/bpf/progs/udp_limit.c
new file mode 100644
index 000000000000..4767451b59ac
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/udp_limit.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/socket.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+int invocations = 0, in_use = 0;
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+ __type(key, int);
+ __type(value, int);
+} sk_map SEC(".maps");
+
+SEC("cgroup/sock_create")
+int sock(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0,
+ BPF_SK_STORAGE_GET_F_CREATE);
+ if (!sk_storage)
+ return 0;
+ *sk_storage = 0xdeadbeef;
+
+ __sync_fetch_and_add(&invocations, 1);
+
+ if (in_use > 0) {
+ /* BPF_CGROUP_INET_SOCK_RELEASE is _not_ called
+ * when we return an error from the BPF
+ * program!
+ */
+ return 0;
+ }
+
+ __sync_fetch_and_add(&in_use, 1);
+ return 1;
+}
+
+SEC("cgroup/sock_release")
+int sock_release(struct bpf_sock *ctx)
+{
+ int *sk_storage;
+
+ if (ctx->type != SOCK_DGRAM)
+ return 1;
+
+ sk_storage = bpf_sk_storage_get(&sk_map, ctx, 0, 0);
+ if (!sk_storage || *sk_storage != 0xdeadbeef)
+ return 0;
+
+ __sync_fetch_and_add(&invocations, 1);
+ __sync_fetch_and_add(&in_use, -1);
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/uninit_stack.c b/tools/testing/selftests/bpf/progs/uninit_stack.c
new file mode 100644
index 000000000000..8a403470e557
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uninit_stack.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Read an uninitialized value from stack at a fixed offset */
+SEC("socket")
+__naked int read_uninit_stack_fixed_off(void *ctx)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = *(u8 *)(r10 - 8 ); \
+ r0 += r1; \
+ r1 = *(u8 *)(r10 - 11); \
+ r1 = *(u8 *)(r10 - 13); \
+ r1 = *(u8 *)(r10 - 15); \
+ r1 = *(u16*)(r10 - 16); \
+ r1 = *(u32*)(r10 - 32); \
+ r1 = *(u64*)(r10 - 64); \
+ /* read from a spill of a wrong size, it is a separate \
+ * branch in check_stack_read_fixed_off() \
+ */ \
+ *(u32*)(r10 - 72) = r1; \
+ r1 = *(u64*)(r10 - 72); \
+ r0 = 0; \
+ exit; \
+"
+ ::: __clobber_all);
+}
+
+/* Read an uninitialized value from stack at a variable offset */
+SEC("socket")
+__naked int read_uninit_stack_var_off(void *ctx)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ /* force stack depth to be 64 */ \
+ *(u64*)(r10 - 64) = r0; \
+ r0 = -r0; \
+ /* give r0 a range [-31, -1] */ \
+ if r0 s<= -32 goto exit_%=; \
+ if r0 s>= 0 goto exit_%=; \
+ /* access stack using r0 */ \
+ r1 = r10; \
+ r1 += r0; \
+ r2 = *(u8*)(r1 + 0); \
+exit_%=: r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+static __noinline void dummy(void) {}
+
+/* Pass a pointer to uninitialized stack memory to a helper.
+ * Passed memory block should be marked as STACK_MISC after helper call.
+ */
+SEC("socket")
+__log_level(7) __msg("fp-104=mmmmmmmm")
+__naked int helper_uninit_to_misc(void *ctx)
+{
+ asm volatile (" \
+ /* force stack depth to be 128 */ \
+ *(u64*)(r10 - 128) = r1; \
+ r1 = r10; \
+ r1 += -128; \
+ r2 = 32; \
+ call %[bpf_trace_printk]; \
+ /* Call to dummy() forces print_verifier_state(..., true), \
+ * thus showing the stack state, matched by __msg(). \
+ */ \
+ call %[dummy]; \
+ r0 = 0; \
+ exit; \
+"
+ :
+ : __imm(bpf_trace_printk),
+ __imm(dummy)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi.c b/tools/testing/selftests/bpf/progs/uprobe_multi.c
new file mode 100644
index 000000000000..419d9aa28fce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi.c
@@ -0,0 +1,101 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <stdbool.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 uprobe_multi_func_1_addr = 0;
+__u64 uprobe_multi_func_2_addr = 0;
+__u64 uprobe_multi_func_3_addr = 0;
+
+__u64 uprobe_multi_func_1_result = 0;
+__u64 uprobe_multi_func_2_result = 0;
+__u64 uprobe_multi_func_3_result = 0;
+
+__u64 uretprobe_multi_func_1_result = 0;
+__u64 uretprobe_multi_func_2_result = 0;
+__u64 uretprobe_multi_func_3_result = 0;
+
+__u64 uprobe_multi_sleep_result = 0;
+
+int pid = 0;
+int child_pid = 0;
+
+bool test_cookie = false;
+void *user_ptr = 0;
+
+static __always_inline bool verify_sleepable_user_copy(void)
+{
+ char data[9];
+
+ bpf_copy_from_user(data, sizeof(data), user_ptr);
+ return bpf_strncmp(data, sizeof(data), "test_data") == 0;
+}
+
+static void uprobe_multi_check(void *ctx, bool is_return, bool is_sleep)
+{
+ child_pid = bpf_get_current_pid_tgid() >> 32;
+
+ if (pid && child_pid != pid)
+ return;
+
+ __u64 cookie = test_cookie ? bpf_get_attach_cookie(ctx) : 0;
+ __u64 addr = bpf_get_func_ip(ctx);
+
+#define SET(__var, __addr, __cookie) ({ \
+ if (addr == __addr && \
+ (!test_cookie || (cookie == __cookie))) \
+ __var += 1; \
+})
+
+ if (is_return) {
+ SET(uretprobe_multi_func_1_result, uprobe_multi_func_1_addr, 2);
+ SET(uretprobe_multi_func_2_result, uprobe_multi_func_2_addr, 3);
+ SET(uretprobe_multi_func_3_result, uprobe_multi_func_3_addr, 1);
+ } else {
+ SET(uprobe_multi_func_1_result, uprobe_multi_func_1_addr, 3);
+ SET(uprobe_multi_func_2_result, uprobe_multi_func_2_addr, 1);
+ SET(uprobe_multi_func_3_result, uprobe_multi_func_3_addr, 2);
+ }
+
+#undef SET
+
+ if (is_sleep && verify_sleepable_user_copy())
+ uprobe_multi_sleep_result += 1;
+}
+
+SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uprobe(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, false, false);
+ return 0;
+}
+
+SEC("uretprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uretprobe(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, true, false);
+ return 0;
+}
+
+SEC("uprobe.multi.s//proc/self/exe:uprobe_multi_func_*")
+int uprobe_sleep(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, false, true);
+ return 0;
+}
+
+SEC("uretprobe.multi.s//proc/self/exe:uprobe_multi_func_*")
+int uretprobe_sleep(struct pt_regs *ctx)
+{
+ uprobe_multi_check(ctx, true, true);
+ return 0;
+}
+
+SEC("uprobe.multi//proc/self/exe:uprobe_multi_func_*")
+int uprobe_extra(struct pt_regs *ctx)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c
new file mode 100644
index 000000000000..5367f6105e30
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_bench.c
@@ -0,0 +1,15 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+int count;
+
+SEC("uprobe.multi/./uprobe_multi:uprobe_multi_func_*")
+int uprobe_bench(struct pt_regs *ctx)
+{
+ count++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c
new file mode 100644
index 000000000000..9e1c33d0bd2f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/uprobe_multi_usdt.c
@@ -0,0 +1,16 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/usdt.bpf.h>
+
+char _license[] SEC("license") = "GPL";
+
+int count;
+
+SEC("usdt")
+int usdt0(struct pt_regs *ctx)
+{
+ count++;
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
new file mode 100644
index 000000000000..11ab25c42c36
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c
@@ -0,0 +1,223 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+ int pid;
+ int seq;
+ long value;
+ char comm[16];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 4096);
+} user_ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 2);
+} ringbuf SEC(".maps");
+
+static int map_value;
+
+static long
+bad_access1(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample;
+
+ sample = bpf_dynptr_data(dynptr - 1, 0, sizeof(*sample));
+ bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr - 1);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read before the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("negative offset dynptr_ptr ptr")
+int user_ringbuf_callback_bad_access1(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, bad_access1, NULL, 0);
+
+ return 0;
+}
+
+static long
+bad_access2(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample;
+
+ sample = bpf_dynptr_data(dynptr + 1, 0, sizeof(*sample));
+ bpf_printk("Was able to pass bad pointer %lx\n", (__u64)dynptr + 1);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("dereference of modified dynptr_ptr ptr")
+int user_ringbuf_callback_bad_access2(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, bad_access2, NULL, 0);
+
+ return 0;
+}
+
+static long
+write_forbidden(struct bpf_dynptr *dynptr, void *context)
+{
+ *((long *)dynptr) = 0;
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'dynptr_ptr'")
+int user_ringbuf_callback_write_forbidden(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, write_forbidden, NULL, 0);
+
+ return 0;
+}
+
+static long
+null_context_write(struct bpf_dynptr *dynptr, void *context)
+{
+ *((__u64 *)context) = 0;
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int user_ringbuf_callback_null_context_write(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, null_context_write, NULL, 0);
+
+ return 0;
+}
+
+static long
+null_context_read(struct bpf_dynptr *dynptr, void *context)
+{
+ __u64 id = *((__u64 *)context);
+
+ bpf_printk("Read id %lu\n", id);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("invalid mem access 'scalar'")
+int user_ringbuf_callback_null_context_read(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, null_context_read, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_discard_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_discard_dynptr(dynptr, 0);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
+int user_ringbuf_callback_discard_dynptr(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_discard_dynptr, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_submit_dynptr(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_submit_dynptr(dynptr, 0);
+
+ return 0;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to read past the end of the pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("cannot release unowned const bpf_dynptr")
+int user_ringbuf_callback_submit_dynptr(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_submit_dynptr, NULL, 0);
+
+ return 0;
+}
+
+static long
+invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context)
+{
+ return 2;
+}
+
+/* A callback that accesses a dynptr in a bpf_user_ringbuf_drain callback should
+ * not be able to write to that pointer.
+ */
+SEC("?raw_tp")
+__failure __msg("At callback return the register R0 has ")
+int user_ringbuf_callback_invalid_return(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0);
+
+ return 0;
+}
+
+static long
+try_reinit_dynptr_mem(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_dynptr_from_mem(&map_value, 4, 0, dynptr);
+ return 0;
+}
+
+static long
+try_reinit_dynptr_ringbuf(struct bpf_dynptr *dynptr, void *context)
+{
+ bpf_ringbuf_reserve_dynptr(&ringbuf, 8, 0, dynptr);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
+int user_ringbuf_callback_reinit_dynptr_mem(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_mem, NULL, 0);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("Dynptr has to be an uninitialized dynptr")
+int user_ringbuf_callback_reinit_dynptr_ringbuf(void *ctx)
+{
+ bpf_user_ringbuf_drain(&user_ringbuf, try_reinit_dynptr_ringbuf, NULL, 0);
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_success.c b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
new file mode 100644
index 000000000000..dd3bdf672633
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/user_ringbuf_success.c
@@ -0,0 +1,212 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include "test_user_ringbuf.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+} user_ringbuf SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+} kernel_ringbuf SEC(".maps");
+
+/* inputs */
+int pid, err, val;
+
+int read = 0;
+
+/* Counter used for end-to-end protocol test */
+__u64 kern_mutated = 0;
+__u64 user_mutated = 0;
+__u64 expected_user_mutated = 0;
+
+static int
+is_test_process(void)
+{
+ int cur_pid = bpf_get_current_pid_tgid() >> 32;
+
+ return cur_pid == pid;
+}
+
+static long
+record_sample(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct sample *sample = NULL;
+ struct sample stack_sample;
+ int status;
+ static int num_calls;
+
+ if (num_calls++ % 2 == 0) {
+ status = bpf_dynptr_read(&stack_sample, sizeof(stack_sample), dynptr, 0, 0);
+ if (status) {
+ bpf_printk("bpf_dynptr_read() failed: %d\n", status);
+ err = 1;
+ return 1;
+ }
+ } else {
+ sample = bpf_dynptr_data(dynptr, 0, sizeof(*sample));
+ if (!sample) {
+ bpf_printk("Unexpectedly failed to get sample\n");
+ err = 2;
+ return 1;
+ }
+ stack_sample = *sample;
+ }
+
+ __sync_fetch_and_add(&read, 1);
+ return 0;
+}
+
+static void
+handle_sample_msg(const struct test_msg *msg)
+{
+ switch (msg->msg_op) {
+ case TEST_MSG_OP_INC64:
+ kern_mutated += msg->operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ kern_mutated += msg->operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ kern_mutated *= msg->operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ kern_mutated *= msg->operand_32;
+ break;
+ default:
+ bpf_printk("Unrecognized op %d\n", msg->msg_op);
+ err = 2;
+ }
+}
+
+static long
+read_protocol_msg(struct bpf_dynptr *dynptr, void *context)
+{
+ const struct test_msg *msg = NULL;
+
+ msg = bpf_dynptr_data(dynptr, 0, sizeof(*msg));
+ if (!msg) {
+ err = 1;
+ bpf_printk("Unexpectedly failed to get msg\n");
+ return 0;
+ }
+
+ handle_sample_msg(msg);
+
+ return 0;
+}
+
+static int publish_next_kern_msg(__u32 index, void *context)
+{
+ struct test_msg *msg = NULL;
+ int operand_64 = TEST_OP_64;
+ int operand_32 = TEST_OP_32;
+
+ msg = bpf_ringbuf_reserve(&kernel_ringbuf, sizeof(*msg), 0);
+ if (!msg) {
+ err = 4;
+ return 1;
+ }
+
+ switch (index % TEST_MSG_OP_NUM_OPS) {
+ case TEST_MSG_OP_INC64:
+ msg->operand_64 = operand_64;
+ msg->msg_op = TEST_MSG_OP_INC64;
+ expected_user_mutated += operand_64;
+ break;
+ case TEST_MSG_OP_INC32:
+ msg->operand_32 = operand_32;
+ msg->msg_op = TEST_MSG_OP_INC32;
+ expected_user_mutated += operand_32;
+ break;
+ case TEST_MSG_OP_MUL64:
+ msg->operand_64 = operand_64;
+ msg->msg_op = TEST_MSG_OP_MUL64;
+ expected_user_mutated *= operand_64;
+ break;
+ case TEST_MSG_OP_MUL32:
+ msg->operand_32 = operand_32;
+ msg->msg_op = TEST_MSG_OP_MUL32;
+ expected_user_mutated *= operand_32;
+ break;
+ default:
+ bpf_ringbuf_discard(msg, 0);
+ err = 5;
+ return 1;
+ }
+
+ bpf_ringbuf_submit(msg, 0);
+
+ return 0;
+}
+
+static void
+publish_kern_messages(void)
+{
+ if (expected_user_mutated != user_mutated) {
+ bpf_printk("%lu != %lu\n", expected_user_mutated, user_mutated);
+ err = 3;
+ return;
+ }
+
+ bpf_loop(8, publish_next_kern_msg, NULL, 0);
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prctl")
+int test_user_ringbuf_protocol(void *ctx)
+{
+ long status = 0;
+
+ if (!is_test_process())
+ return 0;
+
+ status = bpf_user_ringbuf_drain(&user_ringbuf, read_protocol_msg, NULL, 0);
+ if (status < 0) {
+ bpf_printk("Drain returned: %ld\n", status);
+ err = 1;
+ return 0;
+ }
+
+ publish_kern_messages();
+
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_getpgid")
+int test_user_ringbuf(void *ctx)
+{
+ if (!is_test_process())
+ return 0;
+
+ err = bpf_user_ringbuf_drain(&user_ringbuf, record_sample, NULL, 0);
+
+ return 0;
+}
+
+static long
+do_nothing_cb(struct bpf_dynptr *dynptr, void *context)
+{
+ __sync_fetch_and_add(&read, 1);
+ return 0;
+}
+
+SEC("fentry/" SYS_PREFIX "sys_prlimit64")
+int test_user_ringbuf_epoll(void *ctx)
+{
+ long num_samples;
+
+ if (!is_test_process())
+ return 0;
+
+ num_samples = bpf_user_ringbuf_drain(&user_ringbuf, do_nothing_cb, NULL, 0);
+ if (num_samples <= 0)
+ err = 1;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_and.c b/tools/testing/selftests/bpf/progs/verifier_and.c
new file mode 100644
index 000000000000..e97e518516b6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_and.c
@@ -0,0 +1,107 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/and.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("invalid and of negative number")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_and_of_negative_number(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= -4; \
+ r1 <<= 2; \
+ r0 += r1; \
+l0_%=: r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid range check")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_range_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r9 = 1; \
+ w1 %%= 2; \
+ w1 += 1; \
+ w9 &= w1; \
+ w9 += 1; \
+ w9 >>= 1; \
+ w3 = 1; \
+ w3 -= w9; \
+ w3 *= 0x10000000; \
+ r0 += r3; \
+ *(u32*)(r0 + 0) = r3; \
+l0_%=: r0 = r0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check known subreg with unknown reg")
+__success __failure_unpriv __msg_unpriv("R1 !read_ok")
+__retval(0)
+__naked void known_subreg_with_unknown_reg(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ r0 += 1; \
+ r0 &= 0xFFFF1234; \
+ /* Upper bits are unknown but AND above masks out 1 zero'ing lower bits */\
+ if w0 < 1 goto l0_%=; \
+ r1 = *(u32*)(r1 + 512); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_arena.c b/tools/testing/selftests/bpf/progs/verifier_arena.c
new file mode 100644
index 000000000000..5540b05ff9ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_arena.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+#include "bpf_arena_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARENA);
+ __uint(map_flags, BPF_F_MMAPABLE);
+ __uint(max_entries, 2); /* arena of two pages close to 32-bit boundary*/
+ __ulong(map_extra, (1ull << 44) | (~0u - __PAGE_SIZE * 2 + 1)); /* start of mmap() region */
+} arena SEC(".maps");
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc1(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile int __arena *page1, *page2, *no_page, *page3;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ *page1 = 1;
+ page2 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page2)
+ return 2;
+ *page2 = 2;
+ no_page = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (no_page)
+ return 3;
+ if (*page1 != 1)
+ return 4;
+ if (*page2 != 2)
+ return 5;
+ bpf_arena_free_pages(&arena, (void __arena *)page2, 1);
+ if (*page1 != 1)
+ return 6;
+ if (*page2 != 0) /* use-after-free should return 0 */
+ return 7;
+ page3 = bpf_arena_alloc_pages(&arena, NULL, 1, NUMA_NO_NODE, 0);
+ if (!page3)
+ return 8;
+ *page3 = 3;
+ if (page2 != page3)
+ return 9;
+ if (*page1 != 1)
+ return 10;
+#endif
+ return 0;
+}
+
+SEC("syscall")
+__success __retval(0)
+int basic_alloc2(void *ctx)
+{
+#if defined(__BPF_FEATURE_ARENA_CAST)
+ volatile char __arena *page1, *page2, *page3, *page4;
+
+ page1 = bpf_arena_alloc_pages(&arena, NULL, 2, NUMA_NO_NODE, 0);
+ if (!page1)
+ return 1;
+ page2 = page1 + __PAGE_SIZE;
+ page3 = page1 + __PAGE_SIZE * 2;
+ page4 = page1 - __PAGE_SIZE;
+ *page1 = 1;
+ *page2 = 2;
+ *page3 = 3;
+ *page4 = 4;
+ if (*page1 != 1)
+ return 1;
+ if (*page2 != 2)
+ return 2;
+ if (*page3 != 0)
+ return 3;
+ if (*page4 != 0)
+ return 4;
+ bpf_arena_free_pages(&arena, (void __arena *)page1, 2);
+ if (*page1 != 0)
+ return 5;
+ if (*page2 != 0)
+ return 6;
+ if (*page3 != 0)
+ return 7;
+ if (*page4 != 0)
+ return 8;
+#endif
+ return 0;
+}
+
+struct bpf_arena___l {
+ struct bpf_map map;
+} __attribute__((preserve_access_index));
+
+SEC("syscall")
+__success __retval(0) __log_level(2)
+int basic_alloc3(void *ctx)
+{
+ struct bpf_arena___l *ar = (struct bpf_arena___l *)&arena;
+ volatile char __arena *pages;
+
+ pages = bpf_arena_alloc_pages(&ar->map, NULL, ar->map.max_entries, NUMA_NO_NODE, 0);
+ if (!pages)
+ return 1;
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__success __log_level(2)
+int iter_maps1(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("expected pointer to STRUCT bpf_map")
+int iter_maps2(struct bpf_iter__bpf_map *ctx)
+{
+ struct seq_file *seq = ctx->meta->seq;
+
+ bpf_arena_alloc_pages((void *)seq, NULL, 1, 0, 0);
+ return 0;
+}
+
+SEC("iter.s/bpf_map")
+__failure __msg("untrusted_ptr_bpf_map")
+int iter_maps3(struct bpf_iter__bpf_map *ctx)
+{
+ struct bpf_map *map = ctx->map;
+
+ if (!map)
+ return 0;
+ bpf_arena_alloc_pages(map->inner_map_meta, NULL, map->max_entries, 0, 0);
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_array_access.c b/tools/testing/selftests/bpf/progs/verifier_array_access.c
new file mode 100644
index 000000000000..95d7ecc12963
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_array_access.c
@@ -0,0 +1,529 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/array_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_RDONLY_PROG);
+} map_array_ro SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+ __uint(map_flags, BPF_F_WRONLY_PROG);
+} map_array_wo SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("valid map access into an array with a constant")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void an_array_with_a_constant_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a register")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid map access into an array with a signed variable")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_a_signed_variable(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if w1 s> 0xffffffff goto l1_%=; \
+ w1 = 0; \
+l1_%=: w2 = %[max_entries]; \
+ if r2 s> r1 goto l2_%=; \
+ w1 = 0; \
+l2_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a constant")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=8")
+__failure_unpriv
+__naked void an_array_with_a_constant_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + %[__imm_0]) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, (MAX_ENTRIES + 1) << 2),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a register")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_register_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a variable")
+__failure
+__msg("R0 unbounded memory access, make sure to bounds check any such access")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void an_array_with_a_variable_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with no floor check")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void array_with_no_floor_check(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ w2 = %[max_entries]; \
+ if r2 s> r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("invalid access to map value, value_size=48 off=44 size=8")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ w2 = %[__imm_0]; \
+ if r2 > r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access into an array with a invalid max check")
+__failure __msg("R0 pointer += pointer")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void with_a_invalid_max_check_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r8 = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += r8; \
+ r0 = *(u32*)(r0 + %[test_val_foo]); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid read map access into a read-only array 1")
+__success __success_unpriv __retval(28)
+__naked void a_read_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid read map access into a read-only array 2")
+__success __retval(65507)
+__naked void a_read_only_array_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 &= 0xffff; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid write map access into a read-only array 1")
+__failure __msg("write into map forbidden")
+__failure_unpriv
+__naked void a_read_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid write map access into a read-only array 2")
+__failure __msg("write into map forbidden")
+__naked void a_read_only_array_2_2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_ro] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_ro)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("valid write map access into a write-only array 1")
+__success __success_unpriv __retval(1)
+__naked void a_write_only_array_1_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid write map access into a write-only array 2")
+__success __retval(0)
+__naked void a_write_only_array_2_1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r0; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_skb_load_bytes),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid read map access into a write-only array 1")
+__failure __msg("read from map forbidden")
+__failure_unpriv
+__naked void a_write_only_array_1_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid read map access into a write-only array 2")
+__failure __msg("read from map forbidden")
+__naked void a_write_only_array_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_wo] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_wo)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
new file mode 100644
index 000000000000..8d77cc5323d3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/basic_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("stack out of bounds")
+__failure __msg("invalid write to stack")
+__failure_unpriv
+__naked void stack_out_of_bounds(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 + 8) = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack1")
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid indirect read from stack")
+__naked void uninitialized_stack1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("uninitialized stack2")
+__success __log_level(4) __msg("stack depth 8")
+__failure_unpriv __msg_unpriv("invalid read from stack")
+__naked void uninitialized_stack2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("invalid fp arithmetic")
+__failure __msg("R1 subtraction from stack pointer")
+__failure_unpriv
+__naked void invalid_fp_arithmetic(void)
+{
+ /* If this gets ever changed, make sure JITs can deal with it. */
+ asm volatile (" \
+ r0 = 0; \
+ r1 = r10; \
+ r1 -= 8; \
+ *(u64*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("non-invalid fp arithmetic")
+__success __success_unpriv __retval(0)
+__naked void non_invalid_fp_arithmetic(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("misaligned read from stack")
+__failure __msg("misaligned stack access")
+__failure_unpriv
+__naked void misaligned_read_from_stack(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r0 = *(u64*)(r2 - 4); \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
new file mode 100644
index 000000000000..623f130a3198
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c
@@ -0,0 +1,100 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <stdint.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
+
+#include "bpf_misc.h"
+
+struct core_reloc_bitfields {
+ /* unsigned bitfields */
+ uint8_t ub1: 1;
+ uint8_t ub2: 2;
+ uint32_t ub7: 7;
+ /* signed bitfields */
+ int8_t sb4: 4;
+ int32_t sb20: 20;
+ /* non-bitfields */
+ uint32_t u32;
+ int32_t s32;
+} __attribute__((preserve_access_index));
+
+SEC("tc")
+__description("single CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(3)
+int single_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+ return BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+}
+
+SEC("tc")
+__description("multiple CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x3FD)
+int multiple_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub2;
+ int8_t sb4;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1);
+
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+ sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4);
+
+ return (((uint8_t)sb4) << 2) | ub2;
+}
+
+SEC("tc")
+__description("adjacent CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(7)
+int adjacent_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint8_t ub1, ub2;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2);
+
+ return (ub2 << 1) | ub1;
+}
+
+SEC("tc")
+__description("multibyte CO-RE bitfield roundtrip")
+__btf_path("btf__core_reloc_bitfields.bpf.o")
+__success
+__retval(0x21)
+int multibyte_field_roundtrip(struct __sk_buff *ctx)
+{
+ struct core_reloc_bitfields bitfields;
+ uint32_t ub7;
+ uint8_t ub1;
+
+ __builtin_memset(&bitfields, 0, sizeof(bitfields));
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1);
+ BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16);
+
+ ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1);
+ ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7);
+
+ return (ub7 << 1) | ub1;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds.c b/tools/testing/selftests/bpf/progs/verifier_bounds.c
new file mode 100644
index 000000000000..960998f16306
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds.c
@@ -0,0 +1,1140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("subtraction bounds (map value) variant 1")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__naked void bounds_map_value_variant_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ if r1 > 0xff goto l0_%=; \
+ r3 = *(u8*)(r0 + 1); \
+ if r3 > 0xff goto l0_%=; \
+ r1 -= r3; \
+ r1 >>= 56; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("subtraction bounds (map value) variant 2")
+__failure
+__msg("R0 min value is negative, either use unsigned index or do a if (index >=0) check.")
+__msg_unpriv("R1 has unknown scalar with mixed signed bounds")
+__naked void bounds_map_value_variant_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ if r1 > 0xff goto l0_%=; \
+ r3 = *(u8*)(r0 + 1); \
+ if r3 > 0xff goto l0_%=; \
+ r1 -= r3; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check subtraction on pointers for unpriv")
+__success __failure_unpriv __msg_unpriv("R9 pointer -= pointer prohibited")
+__retval(0)
+__naked void subtraction_on_pointers_for_unpriv(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 9; \
+ *(u64*)(r2 + 0) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r10; \
+ r9 -= r0; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: *(u64*)(r0 + 0) = r9; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on zero-extended MOV")
+__success __success_unpriv __retval(0)
+__naked void based_on_zero_extended_mov(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0x0000'0000'ffff'ffff */ \
+ w2 = 0xffffffff; \
+ /* r2 = 0 */ \
+ r2 >>= 32; \
+ /* no-op */ \
+ r0 += r2; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on sign-extended MOV. test1")
+__failure __msg("map_value pointer and 4294967295")
+__failure_unpriv
+__naked void on_sign_extended_mov_test1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0xffff'ffff'ffff'ffff */ \
+ r2 = 0xffffffff; \
+ /* r2 = 0xffff'ffff */ \
+ r2 >>= 32; \
+ /* r0 = <oob pointer> */ \
+ r0 += r2; \
+ /* access to OOB pointer */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check based on sign-extended MOV. test2")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void on_sign_extended_mov_test2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r2 = 0xffff'ffff'ffff'ffff */ \
+ r2 = 0xffffffff; \
+ /* r2 = 0xfff'ffff */ \
+ r2 >>= 36; \
+ /* r0 = <oob pointer> */ \
+ r0 += r2; \
+ /* access to OOB pointer */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check based on reg_off + var_off + insn_off. test1")
+__failure __msg("value_size=8 off=1073741825")
+__naked void var_off_insn_off_test1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r6 &= 1; \
+ r6 += %[__imm_0]; \
+ r0 += r6; \
+ r0 += %[__imm_0]; \
+l0_%=: r0 = *(u8*)(r0 + 3); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check based on reg_off + var_off + insn_off. test2")
+__failure __msg("value 1073741823")
+__naked void var_off_insn_off_test2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r6 &= 1; \
+ r6 += %[__imm_0]; \
+ r0 += r6; \
+ r0 += %[__imm_1]; \
+l0_%=: r0 = *(u8*)(r0 + 3); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, (1 << 30) - 1),
+ __imm_const(__imm_1, (1 << 29) - 1),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of non-boundary-crossing range")
+__success __success_unpriv __retval(0)
+__naked void of_non_boundary_crossing_range(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r2 = 1; \
+ /* r2 = 0x10'0000'0000 */ \
+ r2 <<= 36; \
+ /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */ \
+ r1 += r2; \
+ /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */ \
+ r1 += 0x7fffffff; \
+ /* r1 = [0x00, 0xff] */ \
+ w1 -= 0x7fffffff; \
+ /* r1 = 0 */ \
+ r1 >>= 8; \
+ /* no-op */ \
+ r0 += r1; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of boundary-crossing range (1)")
+__failure
+/* not actually fully unbounded, but the bound is very high */
+__msg("value -4294967168 makes map_value pointer be out of bounds")
+__failure_unpriv
+__naked void of_boundary_crossing_range_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */ \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or \
+ * [0x0000'0000, 0x0000'007f] \
+ */ \
+ w1 += 0; \
+ r1 -= %[__imm_0]; \
+ /* r1 = [0x00, 0xff] or \
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]\
+ */ \
+ r1 -= %[__imm_0]; \
+ /* error on OOB pointer computation */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 0xffffff80 >> 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after truncation of boundary-crossing range (2)")
+__failure __msg("value -4294967168 makes map_value pointer be out of bounds")
+__failure_unpriv
+__naked void of_boundary_crossing_range_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0x1'0000'007f] */ \
+ r1 += %[__imm_0]; \
+ /* r1 = [0xffff'ff80, 0xffff'ffff] or \
+ * [0x0000'0000, 0x0000'007f] \
+ * difference to previous test: truncation via MOV32\
+ * instead of ALU32. \
+ */ \
+ w1 = w1; \
+ r1 -= %[__imm_0]; \
+ /* r1 = [0x00, 0xff] or \
+ * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]\
+ */ \
+ r1 -= %[__imm_0]; \
+ /* error on OOB pointer computation */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 0xffffff80 >> 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after wrapping 32-bit addition")
+__success __success_unpriv __retval(0)
+__naked void after_wrapping_32_bit_addition(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = 0x7fff'ffff */ \
+ r1 = 0x7fffffff; \
+ /* r1 = 0xffff'fffe */ \
+ r1 += 0x7fffffff; \
+ /* r1 = 0 */ \
+ w1 += 2; \
+ /* no-op */ \
+ r0 += r1; \
+ /* access at offset 0 */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after shift with oversized count operand")
+__failure __msg("R0 max value is outside of the allowed memory range")
+__failure_unpriv
+__naked void shift_with_oversized_count_operand(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = 32; \
+ r1 = 1; \
+ /* r1 = (u32)1 << (u32)32 = ? */ \
+ w1 <<= w2; \
+ /* r1 = [0x0000, 0xffff] */ \
+ r1 &= 0xffff; \
+ /* computes unknown pointer, potentially OOB */ \
+ r0 += r1; \
+ /* potentially OOB access */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after right shift of maybe-negative number")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv
+__naked void shift_of_maybe_negative_number(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ /* r1 = [0x00, 0xff] */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* r1 = [-0x01, 0xfe] */ \
+ r1 -= 1; \
+ /* r1 = 0 or 0xff'ffff'ffff'ffff */ \
+ r1 >>= 8; \
+ /* r1 = 0 or 0xffff'ffff'ffff */ \
+ r1 >>= 8; \
+ /* computes unknown pointer, potentially OOB */ \
+ r0 += r1; \
+ /* potentially OOB access */ \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: /* exit */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check after 32-bit right shift with 64-bit input")
+__failure __msg("math between map_value pointer and 4294967294 is not allowed")
+__failure_unpriv
+__naked void shift_with_64_bit_input(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 2; \
+ /* r1 = 1<<32 */ \
+ r1 <<= 31; \
+ /* r1 = 0 (NOT 2!) */ \
+ w1 >>= 31; \
+ /* r1 = 0xffff'fffe (NOT 0!) */ \
+ w1 -= 2; \
+ /* error on computing OOB pointer */ \
+ r0 += r1; \
+ /* exit */ \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test1")
+__failure __msg("map_value pointer and 2147483646")
+__failure_unpriv
+__naked void size_signed_32bit_overflow_test1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 0x7ffffffe; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test2")
+__failure __msg("pointer offset 1073741822")
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void size_signed_32bit_overflow_test2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 0x1fffffff; \
+ r0 += 0x1fffffff; \
+ r0 += 0x1fffffff; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test3")
+__failure __msg("pointer offset -1073741822")
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void size_signed_32bit_overflow_test3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 -= 0x1fffffff; \
+ r0 -= 0x1fffffff; \
+ r0 = *(u64*)(r0 + 2); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check map access with off+size signed 32bit overflow. test4")
+__failure __msg("map_value pointer and 1000000000000")
+__failure_unpriv
+__naked void size_signed_32bit_overflow_test4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 1000000; \
+ r1 *= 1000000; \
+ r0 += r1; \
+ r0 = *(u64*)(r0 + 2); \
+ goto l1_%=; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check mixed 32bit and 64bit arithmetic. test1")
+__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__retval(0)
+__naked void _32bit_and_64bit_arithmetic_test1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = -1; \
+ r1 <<= 32; \
+ r1 += 1; \
+ /* r1 = 0xffffFFFF00000001 */ \
+ if w1 > 1 goto l0_%=; \
+ /* check ALU64 op keeps 32bit bounds */ \
+ r1 += 1; \
+ if w1 > 2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: /* invalid ldx if bounds are lost above */ \
+ r0 = *(u64*)(r0 - 1); \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check mixed 32bit and 64bit arithmetic. test2")
+__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__retval(0)
+__naked void _32bit_and_64bit_arithmetic_test2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = -1; \
+ r1 <<= 32; \
+ r1 += 1; \
+ /* r1 = 0xffffFFFF00000001 */ \
+ r2 = 3; \
+ /* r1 = 0x2 */ \
+ w1 += 1; \
+ /* check ALU32 op zero extends 64bit bounds */ \
+ if r1 > r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: /* invalid ldx if bounds are lost above */ \
+ r0 = *(u64*)(r0 - 1); \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("assigning 32bit bounds to 64bit for wA = 0, wB = wA")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void for_wa_0_wb_wa(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ w9 = 0; \
+ w2 = w9; \
+ r6 = r7; \
+ r6 += r2; \
+ r3 = r6; \
+ r3 += 8; \
+ if r3 > r8 goto l0_%=; \
+ r5 = *(u32*)(r6 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = 0, reg xor 1")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(0)
+__naked void reg_0_reg_xor_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 0; \
+ r1 ^= 1; \
+ if r1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 = 0, reg32 xor 1")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(0)
+__naked void reg32_0_reg32_xor_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: w1 = 0; \
+ w1 ^= 1; \
+ if w1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = 2, reg xor 3")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(0)
+__naked void reg_2_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 2; \
+ r1 ^= 3; \
+ if r1 > 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg = any, reg xor 3")
+__failure __msg("invalid access to map value")
+__msg_unpriv("invalid access to map value")
+__naked void reg_any_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ r1 ^= 3; \
+ if r1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 = any, reg32 xor 3")
+__failure __msg("invalid access to map value")
+__msg_unpriv("invalid access to map value")
+__naked void reg32_any_reg32_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ w1 ^= 3; \
+ if w1 != 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg > 0, reg xor 3")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(0)
+__naked void reg_0_reg_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ if r1 <= 0 goto l1_%=; \
+ r1 ^= 3; \
+ if r1 >= 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds check for reg32 > 0, reg32 xor 3")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(0)
+__naked void reg32_0_reg32_xor_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u64*)(r0 + 0); \
+ if w1 <= 0 goto l1_%=; \
+ w1 ^= 3; \
+ if w1 >= 0 goto l1_%=; \
+ r0 = *(u64*)(r0 + 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks after 32-bit truncation. test 1")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void _32_bit_truncation_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ /* This used to reduce the max bound to 0x7fffffff */\
+ if r1 == 0 goto l1_%=; \
+ if r1 > 0x7fffffff goto l0_%=; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks after 32-bit truncation. test 2")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void _32_bit_truncation_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 s< 1 goto l1_%=; \
+ if w1 s< 0 goto l0_%=; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP_JLT for crossing 64-bit signed boundary")
+__success __retval(0)
+__naked void crossing_64_bit_signed_boundary_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x7fffffffffffff10 ll; \
+ r1 += r0; \
+ r0 = 0x8000000000000000 ll; \
+l1_%=: r0 += 1; \
+ /* r1 unsigned range is [0x7fffffffffffff10, 0x800000000000000f] */\
+ if r0 < r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP_JSLT for crossing 64-bit signed boundary")
+__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__naked void crossing_64_bit_signed_boundary_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x7fffffffffffff10 ll; \
+ r1 += r0; \
+ r2 = 0x8000000000000fff ll; \
+ r0 = 0x8000000000000000 ll; \
+l1_%=: r0 += 1; \
+ if r0 s> r2 goto l0_%=; \
+ /* r1 signed range is [S64_MIN, S64_MAX] */ \
+ if r0 s< r1 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check for loop upper bound greater than U32_MAX")
+__success __retval(0)
+__naked void bound_greater_than_u32_max(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ r0 = 0x100000000 ll; \
+ r1 += r0; \
+ r0 = 0x100000000 ll; \
+l1_%=: r0 += 1; \
+ if r0 < r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP32_JLT for crossing 32-bit signed boundary")
+__success __retval(0)
+__naked void crossing_32_bit_signed_boundary_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ w0 = 0x7fffff10; \
+ w1 += w0; \
+ w0 = 0x80000000; \
+l1_%=: w0 += 1; \
+ /* r1 unsigned range is [0, 0x8000000f] */ \
+ if w0 < w1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bound check with JMP32_JSLT for crossing 32-bit signed boundary")
+__success __retval(0)
+__flag(!BPF_F_TEST_REG_INVARIANTS) /* known invariants violation */
+__naked void crossing_32_bit_signed_boundary_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 1; \
+ if r1 > r3 goto l0_%=; \
+ r1 = *(u8*)(r2 + 0); \
+ w0 = 0x7fffff10; \
+ w1 += w0; \
+ w2 = 0x80000fff; \
+ w0 = 0x80000000; \
+l1_%=: w0 += 1; \
+ if w0 s> w2 goto l0_%=; \
+ /* r1 signed range is [S32_MIN, S32_MAX] */ \
+ if w0 s< w1 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check with JMP_NE for reg edge")
+__success __retval(0)
+__naked void reg_not_equal_const(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r4 = r0; \
+ r4 &= 7; \
+ if r4 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = r6; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -8; \
+ r5 = 0; \
+ /* The 4th argument of bpf_skb_store_bytes is defined as \
+ * ARG_CONST_SIZE, so 0 is not allowed. The 'r4 != 0' \
+ * is providing us this exclusion of zero from initial \
+ * [0, 7] range. \
+ */ \
+ call %[bpf_skb_store_bytes]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bounds check with JMP_EQ for reg edge")
+__success __retval(0)
+__naked void reg_equal_const(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r4 = r0; \
+ r4 &= 7; \
+ if r4 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -8; \
+ r5 = 0; \
+ /* Just the same as what we do in reg_not_equal_const() */ \
+ call %[bpf_skb_store_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_skb_store_bytes)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
new file mode 100644
index 000000000000..c506afbdd936
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_deduction.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from const, 1")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 2")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(1)
+__naked void deducing_bounds_from_const_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ if r0 s>= 1 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s<= 1 goto l1_%=; \
+ exit; \
+l1_%=: r1 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 3")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 4")
+__success __failure_unpriv
+__msg_unpriv("R6 has pointer with unsupported alu operation")
+__retval(0)
+__naked void deducing_bounds_from_const_4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+ exit; \
+l0_%=: if r0 s>= 0 goto l1_%=; \
+ exit; \
+l1_%=: r6 -= r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 5")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_5(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 1 goto l0_%=; \
+ r0 -= r1; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 6")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_6(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 7")
+__failure __msg("dereference of modified ctx ptr")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_7(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r1 -= r0; \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 8")
+__failure __msg("negative offset ctx ptr R1 off=-1 disallowed")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void deducing_bounds_from_const_8(void)
+{
+ asm volatile (" \
+ r0 = %[__imm_0]; \
+ if r0 s>= 0 goto l0_%=; \
+ r1 += r0; \
+l0_%=: r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 9")
+__failure __msg("R0 tried to subtract pointer from scalar")
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__naked void deducing_bounds_from_const_9(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s>= 0 goto l0_%=; \
+l0_%=: r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from const, 10")
+__failure
+__msg("math between ctx pointer and register with unbounded min value is not allowed")
+__failure_unpriv
+__naked void deducing_bounds_from_const_10(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 s<= 0 goto l0_%=; \
+l0_%=: /* Marks reg as unknown. */ \
+ r0 = -r0; \
+ r0 -= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
new file mode 100644
index 000000000000..823f727cf210
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_deduction_non_const.c
@@ -0,0 +1,639 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 == r2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 3 goto l0_%=; \
+ r2 = 4; \
+ if r0 != r2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> == <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_6(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 5; \
+ if w0 == w2 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <non_const> != <const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 3 goto l0_%=; \
+ w2 = 4; \
+ if w0 != w2 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r2 = 0; \
+ if r2 > r0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 > r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 >= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 > 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 < r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 >= 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 <= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_14(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 == r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s> r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_16(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s< 4 goto l0_%=; \
+ r2 = 3; \
+ if r2 s>= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_17(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 4; \
+ if r2 s< r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_18(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 s> 4 goto l0_%=; \
+ r2 = 5; \
+ if r2 s<= r0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp64, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_19(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 < 3 goto l0_%=; \
+ r2 = 2; \
+ if r2 != r0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 1")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_20(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ w2 = 0; \
+ if w2 > w0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> > <non_const>, 2")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_21(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 > w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> >= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_22(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 >= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> < <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_23(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 > 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 < w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> <= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_24(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 >= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 <= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> == <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_25(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 == w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s> <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_26(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s> w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s>= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_27(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s< 4 goto l0_%=; \
+ w2 = 3; \
+ if w2 s>= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s< <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_28(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s> 4 goto l0_%=; \
+ w2 = 5; \
+ if w2 s< w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> s<= <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_29(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 s>= 4 goto l0_%=; \
+ w2 = 4; \
+ if w2 s<= w0 goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check deducing bounds from non-const, jmp32, <const> != <non_const>")
+__success __retval(0)
+__naked void deducing_bounds_from_non_const_30(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if w0 < 3 goto l0_%=; \
+ w2 = 2; \
+ if w2 != w0 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: \
+ r0 = 0; \
+ exit; \
+l1_%=: \
+ r0 -= r1; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
new file mode 100644
index 000000000000..4f40144748a5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bounds_mix_sign_unsign.c
@@ -0,0 +1,554 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, positive bounds")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_positive_bounds(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ if r1 s> 4 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void checks_mixing_signed_and_unsigned(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 2")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = 0; \
+ r8 += r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 3")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_3(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ r8 = r1; \
+ if r8 s> 1 goto l0_%=; \
+ r0 += r8; \
+ r0 = 0; \
+ *(u8*)(r8 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 4")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_4(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 1; \
+ r1 &= r2; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 5")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_5(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += 4; \
+ r0 -= r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 6")
+__failure __msg("R4 min value is negative, either use unsigned")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_6(void)
+{
+ asm volatile (" \
+ r9 = r1; \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r9; \
+ r2 = 0; \
+ r3 = r10; \
+ r3 += -512; \
+ r4 = *(u64*)(r10 - 16); \
+ r6 = -1; \
+ if r4 > r6 goto l0_%=; \
+ if r4 s> 1 goto l0_%=; \
+ r4 += 1; \
+ r5 = 0; \
+ r6 = 0; \
+ *(u16*)(r10 - 512) = r6; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 7")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_7(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = %[__imm_0]; \
+ if r1 > r2 goto l0_%=; \
+ if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__imm_0, 1024 * 1024 * 1024)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 8")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_8(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 9")
+__success __success_unpriv __retval(0)
+__naked void signed_and_unsigned_variant_9(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -9223372036854775808ULL ll; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 10")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_10(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 > r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 11")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_11(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ if r2 >= r1 goto l1_%=; \
+ /* Dead branch. */ \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 12")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_12(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: if r1 s> 1 goto l0_%=; \
+ r0 += r1; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 13")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_13(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = 2; \
+ if r2 >= r1 goto l0_%=; \
+ r7 = 1; \
+ if r7 s> 0 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r7 += r1; \
+ if r7 s> 4 goto l2_%=; \
+ r0 += r7; \
+ r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 14")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_14(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -1; \
+ r8 = 2; \
+ if r9 == 42 goto l1_%=; \
+ if r8 s> r1 goto l2_%=; \
+l3_%=: if r1 s> 1 goto l2_%=; \
+ r0 += r1; \
+l0_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+l2_%=: r0 = 0; \
+ exit; \
+l1_%=: if r1 > r2 goto l2_%=; \
+ goto l3_%=; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bounds checks mixing signed and unsigned, variant 15")
+__failure __msg("unbounded min value")
+__failure_unpriv
+__naked void signed_and_unsigned_variant_15(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r10 - 16); \
+ r2 = -6; \
+ if r2 >= r1 goto l1_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+l1_%=: r0 += r1; \
+ if r0 > 1 goto l2_%=; \
+ r0 = 0; \
+ exit; \
+l2_%=: r1 = 0; \
+ *(u8*)(r0 + 0) = r1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c b/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c
new file mode 100644
index 000000000000..325a2bab4a71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bpf_get_stack.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/bpf_get_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("tracepoint")
+__description("bpf_get_stack return R0 within range")
+__success
+__naked void stack_return_r0_within_range(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ r9 = %[__imm_0]; \
+ r1 = r6; \
+ r2 = r7; \
+ r3 = %[__imm_0]; \
+ r4 = 256; \
+ call %[bpf_get_stack]; \
+ r1 = 0; \
+ r8 = r0; \
+ r8 <<= 32; \
+ r8 s>>= 32; \
+ if r1 s> r8 goto l0_%=; \
+ r9 -= r8; \
+ r2 = r7; \
+ r2 += r8; \
+ r1 = r9; \
+ r1 <<= 32; \
+ r1 s>>= 32; \
+ r3 = r2; \
+ r3 += r1; \
+ r1 = r7; \
+ r5 = %[__imm_0]; \
+ r1 += r5; \
+ if r3 >= r1 goto l0_%=; \
+ r1 = r6; \
+ r3 = r9; \
+ r4 = 0; \
+ call %[bpf_get_stack]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_stack),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) / 2)
+ : __clobber_all);
+}
+
+SEC("iter/task")
+__description("bpf_get_task_stack return R0 range is refined")
+__success
+__naked void return_r0_range_is_refined(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 0); \
+ r6 = *(u64*)(r6 + 0); /* ctx->meta->seq */\
+ r7 = *(u64*)(r1 + 8); /* ctx->task */\
+ r1 = %[map_array_48b] ll; /* fixup_map_array_48b */\
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: if r7 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r1 = r7; \
+ r2 = r0; \
+ r9 = r0; /* keep buf for seq_write */\
+ r3 = 48; \
+ r4 = 0; \
+ call %[bpf_get_task_stack]; \
+ if r0 s> 0 goto l2_%=; \
+ r0 = 0; \
+ exit; \
+l2_%=: r1 = r6; \
+ r2 = r9; \
+ r3 = r0; \
+ call %[bpf_seq_write]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_task_stack),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_seq_write),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_bswap.c b/tools/testing/selftests/bpf/progs/verifier_bswap.c
new file mode 100644
index 000000000000..e61755656e8d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_bswap.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("BSWAP, 16")
+__success __success_unpriv __retval(0x23ff)
+__naked void bswap_16(void)
+{
+ asm volatile (" \
+ r0 = 0xff23; \
+ r0 = bswap16 r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("BSWAP, 32")
+__success __success_unpriv __retval(0x23ff0000)
+__naked void bswap_32(void)
+{
+ asm volatile (" \
+ r0 = 0xff23; \
+ r0 = bswap32 r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("BSWAP, 64")
+__success __success_unpriv __retval(0x34ff12ff)
+__naked void bswap_64(void)
+{
+ asm volatile (" \
+ r0 = %[u64_val] ll; \
+ r0 = bswap64 r0; \
+ exit; \
+" :
+ : [u64_val]"i"(0xff12ff34ff56ff78ull)
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
new file mode 100644
index 000000000000..a570e48b917a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_ctx_access.c
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/btf_ctx_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_modify_return_test")
+__description("btf_ctx_access accept")
+__success __retval(0)
+__naked void btf_ctx_access_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + 8); /* load 2nd argument value (int pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("fentry/bpf_fentry_test9")
+__description("btf_ctx_access u32 pointer accept")
+__success __retval(0)
+__naked void ctx_access_u32_pointer_accept(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + 0); /* load 1nd argument value (u32 pointer) */\
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
new file mode 100644
index 000000000000..36e033a2e02c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_btf_unreliable_prog.c
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2017 Facebook
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+
+struct whatever {};
+
+SEC("kprobe")
+__success __log_level(2)
+/* context type is wrong, making it impossible to freplace this program */
+int btf_unreliable_kprobe(struct whatever *ctx)
+{
+ return 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cfg.c b/tools/testing/selftests/bpf/progs/verifier_cfg.c
new file mode 100644
index 000000000000..c1f55e1d80a4
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cfg.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cfg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("unreachable")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable(void)
+{
+ asm volatile (" \
+ exit; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unreachable2")
+__failure __msg("unreachable")
+__failure_unpriv
+__naked void unreachable2(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ goto l0_%=; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump(void)
+{
+ asm volatile (" \
+ goto l0_%=; \
+ exit; \
+l0_%=: \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("out of range jump2")
+__failure __msg("jump out of range")
+__failure_unpriv
+__naked void out_of_range_jump2(void)
+{
+ asm volatile (" \
+ goto -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop (back-edge)")
+__failure __msg("unreachable insn 1")
+__msg_unpriv("back-edge")
+__naked void loop_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("loop2 (back-edge)")
+__failure __msg("unreachable insn 4")
+__msg_unpriv("back-edge")
+__naked void loop2_back_edge(void)
+{
+ asm volatile (" \
+l0_%=: r1 = r0; \
+ r2 = r0; \
+ r3 = r0; \
+ goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional loop")
+__failure __msg("infinite loop detected")
+__msg_unpriv("back-edge")
+__naked void conditional_loop(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+l0_%=: r2 = r0; \
+ r3 = r0; \
+ if r1 == 0 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("conditional loop (2)")
+__success
+__failure_unpriv __msg_unpriv("back-edge from insn 10 to 11")
+__naked void conditional_loop2(void)
+{
+ asm volatile (" \
+ r9 = 2 ll; \
+ r3 = 0x20 ll; \
+ r4 = 0x35 ll; \
+ r8 = r4; \
+ goto l1_%=; \
+l0_%=: r9 -= r3; \
+ r9 -= r4; \
+ r9 -= r8; \
+l1_%=: r8 += r4; \
+ if r8 < 0x64 goto l0_%=; \
+ r0 = r9; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+__failure __msg("infinite loop detected")
+__failure_unpriv __msg_unpriv("back-edge from insn 3 to 2")
+__naked void uncond_loop_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 = 1; \
+ goto l0_%=; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+
+__naked __noinline __used
+static unsigned long never_ending_subprog()
+{
+ asm volatile (" \
+ r0 = r1; \
+ goto -1; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unconditional loop after conditional jump")
+/* infinite loop is detected *after* check_cfg() */
+__failure __msg("infinite loop detected")
+__naked void uncond_loop_in_subprog_after_cond_jmp(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r0 > 0 goto l1_%=; \
+l0_%=: r0 += 1; \
+ call never_ending_subprog; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
new file mode 100644
index 000000000000..6e0f349f8f15
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test1")
+__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]")
+__naked void with_invalid_return_code_test1(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test2")
+__success
+__naked void with_invalid_return_code_test2(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test3")
+__failure __msg("smin=0 smax=3 should have been in [0, 1]")
+__naked void with_invalid_return_code_test3(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r0 &= 3; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test4")
+__success
+__naked void with_invalid_return_code_test4(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test5")
+__failure __msg("smin=2 smax=2 should have been in [0, 1]")
+__naked void with_invalid_return_code_test5(void)
+{
+ asm volatile (" \
+ r0 = 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test6")
+__failure __msg("R0 is not a known value (ctx)")
+__naked void with_invalid_return_code_test6(void)
+{
+ asm volatile (" \
+ r0 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/sock")
+__description("bpf_exit with invalid return code. test7")
+__failure __msg("R0 has unknown scalar value should have been in [0, 1]")
+__naked void with_invalid_return_code_test7(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + 0); \
+ r2 = *(u32*)(r1 + 4); \
+ r0 *= r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
new file mode 100644
index 000000000000..5ee3d349d6d0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_skb.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_skb.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("cgroup/skb")
+__description("direct packet read test#1 for CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=76 size=4")
+__retval(0)
+__naked void test_1_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_pkt_type]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_queue_mapping]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_protocol]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_vlan_present]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_protocol, offsetof(struct __sk_buff, protocol)),
+ __imm_const(__sk_buff_queue_mapping, offsetof(struct __sk_buff, queue_mapping)),
+ __imm_const(__sk_buff_vlan_present, offsetof(struct __sk_buff, vlan_present))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#2 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_2_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_vlan_tci]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_vlan_proto]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_priority]); \
+ *(u32*)(r1 + %[__sk_buff_priority]) = r6; \
+ r7 = *(u32*)(r1 + %[__sk_buff_ingress_ifindex]);\
+ r8 = *(u32*)(r1 + %[__sk_buff_tc_index]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_hash]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_hash, offsetof(struct __sk_buff, hash)),
+ __imm_const(__sk_buff_ingress_ifindex, offsetof(struct __sk_buff, ingress_ifindex)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority)),
+ __imm_const(__sk_buff_tc_index, offsetof(struct __sk_buff, tc_index)),
+ __imm_const(__sk_buff_vlan_proto, offsetof(struct __sk_buff, vlan_proto)),
+ __imm_const(__sk_buff_vlan_tci, offsetof(struct __sk_buff, vlan_tci))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#3 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_3_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r4 = *(u32*)(r1 + %[__sk_buff_cb_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_cb_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_cb_2]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_cb_3]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_cb_4]); \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_cb_0]) = r4; \
+ *(u32*)(r1 + %[__sk_buff_cb_1]) = r5; \
+ *(u32*)(r1 + %[__sk_buff_cb_2]) = r6; \
+ *(u32*)(r1 + %[__sk_buff_cb_3]) = r7; \
+ *(u32*)(r1 + %[__sk_buff_cb_4]) = r8; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0])),
+ __imm_const(__sk_buff_cb_1, offsetof(struct __sk_buff, cb[1])),
+ __imm_const(__sk_buff_cb_2, offsetof(struct __sk_buff, cb[2])),
+ __imm_const(__sk_buff_cb_3, offsetof(struct __sk_buff, cb[3])),
+ __imm_const(__sk_buff_cb_4, offsetof(struct __sk_buff, cb[4])),
+ __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("direct packet read test#4 for CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void test_4_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_family]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_remote_ip4]); \
+ r4 = *(u32*)(r1 + %[__sk_buff_local_ip4]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_0]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_1]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_2]); \
+ r5 = *(u32*)(r1 + %[__sk_buff_remote_ip6_3]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_0]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_1]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_2]); \
+ r6 = *(u32*)(r1 + %[__sk_buff_local_ip6_3]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_remote_port]); \
+ r8 = *(u32*)(r1 + %[__sk_buff_local_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_family, offsetof(struct __sk_buff, family)),
+ __imm_const(__sk_buff_local_ip4, offsetof(struct __sk_buff, local_ip4)),
+ __imm_const(__sk_buff_local_ip6_0, offsetof(struct __sk_buff, local_ip6[0])),
+ __imm_const(__sk_buff_local_ip6_1, offsetof(struct __sk_buff, local_ip6[1])),
+ __imm_const(__sk_buff_local_ip6_2, offsetof(struct __sk_buff, local_ip6[2])),
+ __imm_const(__sk_buff_local_ip6_3, offsetof(struct __sk_buff, local_ip6[3])),
+ __imm_const(__sk_buff_local_port, offsetof(struct __sk_buff, local_port)),
+ __imm_const(__sk_buff_remote_ip4, offsetof(struct __sk_buff, remote_ip4)),
+ __imm_const(__sk_buff_remote_ip6_0, offsetof(struct __sk_buff, remote_ip6[0])),
+ __imm_const(__sk_buff_remote_ip6_1, offsetof(struct __sk_buff, remote_ip6[1])),
+ __imm_const(__sk_buff_remote_ip6_2, offsetof(struct __sk_buff, remote_ip6[2])),
+ __imm_const(__sk_buff_remote_ip6_3, offsetof(struct __sk_buff, remote_ip6[3])),
+ __imm_const(__sk_buff_remote_port, offsetof(struct __sk_buff, remote_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of tc_classid for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of data_meta for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void data_meta_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_data_meta]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data_meta, offsetof(struct __sk_buff, data_meta))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid access of flow_keys for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void flow_keys_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_flow_keys]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_flow_keys, offsetof(struct __sk_buff, flow_keys))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid write access to napi_id for CGROUP_SKB")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void napi_id_for_cgroup_skb(void)
+{
+ asm volatile (" \
+ r9 = *(u32*)(r1 + %[__sk_buff_napi_id]); \
+ *(u32*)(r1 + %[__sk_buff_napi_id]) = r9; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_napi_id, offsetof(struct __sk_buff, napi_id))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("write tstamp from CGROUP_SKB")
+__success __failure_unpriv
+__msg_unpriv("invalid bpf_context access off=152 size=8")
+__retval(0)
+__naked void write_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_tstamp]) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("read tstamp from CGROUP_SKB")
+__success __success_unpriv __retval(0)
+__naked void read_tstamp_from_cgroup_skb(void)
+{
+ asm volatile (" \
+ r0 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
new file mode 100644
index 000000000000..9a13f5c11ac7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_storage.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/cgroup_storage.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[TEST_DATA_LEN]);
+} cgroup_storage SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, struct bpf_cgroup_storage_key);
+ __type(value, char[64]);
+} percpu_cgroup_storage SEC(".maps");
+
+SEC("cgroup/skb")
+__description("valid cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void valid_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void invalid_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void invalid_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void invalid_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("valid per-cpu cgroup storage access")
+__success __success_unpriv __retval(0)
+__naked void per_cpu_cgroup_storage_access(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 1")
+__failure __msg("cannot pass map_type 1 into func bpf_get_local_storage")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_1(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 2")
+__failure __msg("fd 1 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_2(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_get_local_storage]; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 1))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 3")
+__failure __msg("invalid access to map value, value_size=64 off=256 size=4")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_3(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 256); \
+ r1 += 1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 4")
+__failure __msg("invalid access to map value, value_size=64 off=-2 size=4")
+__failure_unpriv
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void cpu_cgroup_storage_access_4(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ r1 = %[cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 - 2); \
+ r0 = r1; \
+ r1 += 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 5")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__failure_unpriv
+__naked void cpu_cgroup_storage_access_5(void)
+{
+ asm volatile (" \
+ r2 = 7; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("invalid per-cpu cgroup storage access 6")
+__failure __msg("get_local_storage() doesn't support non-zero flags")
+__msg_unpriv("R2 leaks addr into helper function")
+__naked void cpu_cgroup_storage_access_6(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ r1 = %[percpu_cgroup_storage] ll; \
+ call %[bpf_get_local_storage]; \
+ r1 = *(u32*)(r0 + 0); \
+ r0 = r1; \
+ r0 &= 1; \
+ exit; \
+" :
+ : __imm(bpf_get_local_storage),
+ __imm_addr(percpu_cgroup_storage)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_const_or.c b/tools/testing/selftests/bpf/progs/verifier_const_or.c
new file mode 100644
index 000000000000..ba8922b2eebd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_const_or.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/const_or.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tracepoint")
+__description("constant register |= constant should keep constant type")
+__success
+__naked void constant_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 13; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant should not bypass stack boundary checks")
+__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r2 |= 24; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should keep constant type")
+__success
+__naked void register_should_keep_constant_type(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 13; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("constant register |= constant register should not bypass stack boundary checks")
+__failure __msg("invalid indirect access to stack R1 off=-48 size=58")
+__naked void not_bypass_stack_boundary_checks_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -48; \
+ r2 = 34; \
+ r4 = 24; \
+ r2 |= r4; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx.c b/tools/testing/selftests/bpf/progs/verifier_ctx.c
new file mode 100644
index 000000000000..a83809a1dbbf
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx.c
@@ -0,0 +1,221 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ctx.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc")
+__description("context stores via BPF_ATOMIC")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__naked void context_stores_via_bpf_atomic(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ lock *(u32 *)(r1 + %[__sk_buff_mark]) += w0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("arithmetic ops make PTR_TO_CTX unusable")
+__failure __msg("dereference of modified ctx ptr")
+__naked void make_ptr_to_ctx_unusable(void)
+{
+ asm volatile (" \
+ r1 += %[__imm_0]; \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__imm_0,
+ offsetof(struct __sk_buff, data) - offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass unmodified ctx pointer to helper")
+__success __retval(0)
+__naked void unmodified_ctx_pointer_to_helper(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass modified ctx pointer to helper, 1")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void ctx_pointer_to_helper_1(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("pass modified ctx pointer to helper, 2")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__failure_unpriv __msg_unpriv("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void ctx_pointer_to_helper_2(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("pass modified ctx pointer to helper, 3")
+__failure __msg("variable ctx access var_off=(0x0; 0x4)")
+__naked void ctx_pointer_to_helper_3(void)
+{
+ asm volatile (" \
+ r3 = *(u32*)(r1 + 0); \
+ r3 &= 4; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_csum_update]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_update)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 1: ctx")
+__success
+__naked void or_null_check_1_ctx(void)
+{
+ asm volatile (" \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 2: null")
+__success
+__naked void or_null_check_2_null(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 3: 1")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void or_null_check_3_1(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/sendmsg6")
+__description("pass ctx or null check, 4: ctx - const")
+__failure __msg("negative offset ctx ptr R1 off=-612 disallowed")
+__naked void null_check_4_ctx_const(void)
+{
+ asm volatile (" \
+ r1 += -612; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/connect4")
+__description("pass ctx or null check, 5: null (connect)")
+__success
+__naked void null_check_5_null_connect(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 6: null (bind)")
+__success
+__naked void null_check_6_null_bind(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_netns_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_netns_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 7: ctx (bind)")
+__success
+__naked void null_check_7_ctx_bind(void)
+{
+ asm volatile (" \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+SEC("cgroup/post_bind4")
+__description("pass ctx or null check, 8: null (bind)")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void null_check_8_null_bind(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_get_socket_cookie]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_socket_cookie)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
new file mode 100644
index 000000000000..65edc89799f9
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ctx_sk_msg.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ctx_sk_msg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("sk_msg")
+__description("valid access family in SK_MSG")
+__success
+__naked void access_family_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_family]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_family, offsetof(struct sk_msg_md, family))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_ip4 in SK_MSG")
+__success
+__naked void remote_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip4, offsetof(struct sk_msg_md, remote_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_ip4 in SK_MSG")
+__success
+__naked void local_ip4_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip4]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip4, offsetof(struct sk_msg_md, local_ip4))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access remote_port in SK_MSG")
+__success
+__naked void remote_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_port, offsetof(struct sk_msg_md, remote_port))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access local_port in SK_MSG")
+__success
+__naked void local_port_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_port]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_port, offsetof(struct sk_msg_md, local_port))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access remote_ip6 in SK_MSG")
+__success
+__naked void remote_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_remote_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_remote_ip6_0, offsetof(struct sk_msg_md, remote_ip6[0])),
+ __imm_const(sk_msg_md_remote_ip6_1, offsetof(struct sk_msg_md, remote_ip6[1])),
+ __imm_const(sk_msg_md_remote_ip6_2, offsetof(struct sk_msg_md, remote_ip6[2])),
+ __imm_const(sk_msg_md_remote_ip6_3, offsetof(struct sk_msg_md, remote_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("valid access local_ip6 in SK_MSG")
+__success
+__naked void local_ip6_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_0]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_1]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_2]); \
+ r0 = *(u32*)(r1 + %[sk_msg_md_local_ip6_3]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_local_ip6_0, offsetof(struct sk_msg_md, local_ip6[0])),
+ __imm_const(sk_msg_md_local_ip6_1, offsetof(struct sk_msg_md, local_ip6[1])),
+ __imm_const(sk_msg_md_local_ip6_2, offsetof(struct sk_msg_md, local_ip6[2])),
+ __imm_const(sk_msg_md_local_ip6_3, offsetof(struct sk_msg_md, local_ip6[3]))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("valid access size in SK_MSG")
+__success
+__naked void access_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid 64B read of size in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void of_size_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_size]); \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_size, offsetof(struct sk_msg_md, size))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read past end of SK_MSG")
+__failure __msg("invalid bpf_context access")
+__naked void past_end_of_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, size) + 4)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("invalid read offset in SK_MSG")
+__failure __msg("invalid bpf_context access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_offset_in_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__imm_0]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, offsetof(struct sk_msg_md, family) + 1)
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet read for SK_MSG")
+__success
+__naked void packet_read_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("direct packet write for SK_MSG")
+__success
+__naked void packet_write_for_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+SEC("sk_msg")
+__description("overlapping checks for direct packet access SK_MSG")
+__success
+__naked void direct_packet_access_sk_msg(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[sk_msg_md_data]); \
+ r3 = *(u64*)(r1 + %[sk_msg_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u16*)(r2 + 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(sk_msg_md_data, offsetof(struct sk_msg_md, data)),
+ __imm_const(sk_msg_md_data_end, offsetof(struct sk_msg_md, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_d_path.c b/tools/testing/selftests/bpf/progs/verifier_d_path.c
new file mode 100644
index 000000000000..ec79cbcfde91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_d_path.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/d_path.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/dentry_open")
+__description("d_path accept")
+__success __retval(0)
+__naked void d_path_accept(void)
+{
+ asm volatile (" \
+ r1 = *(u32*)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ r3 = 8 ll; \
+ call %[bpf_d_path]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_d_path)
+ : __clobber_all);
+}
+
+SEC("fentry/d_path")
+__description("d_path reject")
+__failure __msg("helper call is not allowed in probe")
+__naked void d_path_reject(void)
+{
+ asm volatile (" \
+ r1 = *(u32*)(r1 + 0); \
+ r2 = r10; \
+ r2 += -8; \
+ r6 = 0; \
+ *(u64*)(r2 + 0) = r6; \
+ r3 = 8 ll; \
+ call %[bpf_d_path]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_d_path)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
new file mode 100644
index 000000000000..28b602ac9cbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c
@@ -0,0 +1,803 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/direct_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("tc")
+__description("pkt_end - pkt_start is allowed")
+__success __retval(TEST_DATA_LEN)
+__naked void end_pkt_start_is_allowed(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r0 -= r2; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test1")
+__success __retval(0)
+__naked void direct_packet_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test2")
+__success __retval(0)
+__naked void direct_packet_access_test2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r4 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r5 = r3; \
+ r5 += 14; \
+ if r5 > r4 goto l0_%=; \
+ r0 = *(u8*)(r3 + 7); \
+ r4 = *(u8*)(r3 + 12); \
+ r4 *= 14; \
+ r3 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 += r4; \
+ r2 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r2 <<= 49; \
+ r2 >>= 49; \
+ r3 += r2; \
+ r2 = r3; \
+ r2 += 8; \
+ r1 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ if r2 > r1 goto l1_%=; \
+ r1 = *(u8*)(r3 + 4); \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("direct packet access: test3")
+__failure __msg("invalid bpf_context access off=76")
+__failure_unpriv
+__naked void direct_packet_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test4 (write)")
+__success __retval(0)
+__naked void direct_packet_access_test4_write(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test5 (pkt_end >= reg, good access)")
+__success __retval(0)
+__naked void pkt_end_reg_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test6 (pkt_end >= reg, bad access)")
+__failure __msg("invalid access to packet")
+__naked void pkt_end_reg_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test7 (pkt_end >= reg, both accesses)")
+__failure __msg("invalid access to packet")
+__naked void pkt_end_reg_both_accesses(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test8 (double test, variant 1)")
+__success __retval(0)
+__naked void test8_double_test_variant_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ if r0 > r3 goto l1_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test9 (double test, variant 2)")
+__success __retval(0)
+__naked void test9_double_test_variant_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 >= r0 goto l0_%=; \
+ r0 = 1; \
+ exit; \
+l0_%=: if r0 > r3 goto l1_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = *(u8*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test10 (write invalid)")
+__failure __msg("invalid access to packet")
+__naked void packet_access_test10_write_invalid(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: *(u8*)(r2 + 0) = r2; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test11 (shift, good access)")
+__success __retval(1)
+__naked void access_test11_shift_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = 144; \
+ r5 = r3; \
+ r5 += 23; \
+ r5 >>= 3; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test12 (and, good access)")
+__success __retval(1)
+__naked void access_test12_and_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = 144; \
+ r5 = r3; \
+ r5 += 23; \
+ r5 &= 15; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test13 (branches, good access)")
+__success __retval(1)
+__naked void access_test13_branches_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r3 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r4 = 1; \
+ if r3 > r4 goto l1_%=; \
+ r3 = 14; \
+ goto l2_%=; \
+l1_%=: r3 = 24; \
+l2_%=: r5 = r3; \
+ r5 += 23; \
+ r5 &= 15; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)")
+__success __retval(1)
+__naked void _0_const_imm_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 22; \
+ if r0 > r3 goto l0_%=; \
+ r5 = 12; \
+ r5 >>= 4; \
+ r6 = r2; \
+ r6 += r5; \
+ r0 = *(u8*)(r6 + 0); \
+ r0 = 1; \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test15 (spill with xadd)")
+__failure __msg("R2 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void access_test15_spill_with_xadd(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r5 = 4096; \
+ r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r2; \
+ lock *(u64 *)(r4 + 0) += r5; \
+ r2 = *(u64*)(r4 + 0); \
+ *(u32*)(r2 + 0) = r5; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test16 (arith on data_end)")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void test16_arith_on_data_end(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ r3 += 16; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test17 (pruning, alignment)")
+__failure __msg("misaligned packet access off 2+0+15+-4 size 4")
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void packet_access_test17_pruning_alignment(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ r0 = r2; \
+ r0 += 14; \
+ if r7 > 1 goto l0_%=; \
+l2_%=: if r0 > r3 goto l1_%=; \
+ *(u32*)(r0 - 4) = r0; \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: r0 += 1; \
+ goto l2_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test18 (imm += pkt_ptr, 1)")
+__success __retval(0)
+__naked void test18_imm_pkt_ptr_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 8; \
+ r0 += r2; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test19 (imm += pkt_ptr, 2)")
+__success __retval(0)
+__naked void test19_imm_pkt_ptr_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r4 = 4; \
+ r4 += r2; \
+ *(u8*)(r4 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test20 (x += pkt_ptr, 1)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test20_x_pkt_ptr_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0x7fff; \
+ r4 = r0; \
+ r4 += r2; \
+ r5 = r4; \
+ r4 += %[__imm_0]; \
+ if r4 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test21 (x += pkt_ptr, 2)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test21_x_pkt_ptr_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r4 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r4 &= 0x7fff; \
+ r4 += r2; \
+ r5 = r4; \
+ r4 += %[__imm_0]; \
+ if r4 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test22 (x += pkt_ptr, 3)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test22_x_pkt_ptr_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ *(u64*)(r10 - 8) = r2; \
+ *(u64*)(r10 - 16) = r3; \
+ r3 = *(u64*)(r10 - 16); \
+ if r0 > r3 goto l0_%=; \
+ r2 = *(u64*)(r10 - 8); \
+ r4 = 0xffffffff; \
+ lock *(u64 *)(r10 - 8) += r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r4 >>= 49; \
+ r4 += r2; \
+ r0 = r4; \
+ r0 += 2; \
+ if r0 > r3 goto l0_%=; \
+ r2 = 1; \
+ *(u16*)(r4 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test23 (x += pkt_ptr, 4)")
+__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void test23_x_pkt_ptr_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0xffff; \
+ r4 = r0; \
+ r0 = 31; \
+ r0 += r4; \
+ r0 += r2; \
+ r5 = r0; \
+ r0 += %[__imm_0]; \
+ if r0 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test24 (x += pkt_ptr, 5)")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void test24_x_pkt_ptr_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = 0xffffffff; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 &= 0xff; \
+ r4 = r0; \
+ r0 = 64; \
+ r0 += r4; \
+ r0 += r2; \
+ r5 = r0; \
+ r0 += %[__imm_0]; \
+ if r0 > r3 goto l0_%=; \
+ *(u64*)(r5 + 0) = r0; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0x7fff - 1),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test25 (marking on <, good access)")
+__success __retval(0)
+__naked void test25_marking_on_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 < r3 goto l0_%=; \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test26 (marking on <, bad access)")
+__failure __msg("invalid access to packet")
+__naked void test26_marking_on_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 < r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l1_%=: r0 = 0; \
+ exit; \
+l0_%=: goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test27 (marking on <=, good access)")
+__success __retval(1)
+__naked void test27_marking_on_good_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 <= r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test28 (marking on <=, bad access)")
+__failure __msg("invalid access to packet")
+__naked void test28_marking_on_bad_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r3 <= r0 goto l0_%=; \
+l1_%=: r0 = 1; \
+ exit; \
+l0_%=: r0 = *(u8*)(r2 + 0); \
+ goto l1_%=; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test29 (reg > pkt_end in subprog)")
+__success __retval(0)
+__naked void reg_pkt_end_in_subprog(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r2 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = r6; \
+ r3 += 8; \
+ call reg_pkt_end_in_subprog__1; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u8*)(r6 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void reg_pkt_end_in_subprog__1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ if r3 > r2 goto l0_%=; \
+ r0 = 1; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("direct packet access: test30 (check_id() in regsafe(), bad access)")
+__failure __msg("invalid access to packet, off=0 size=1, R2")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void id_in_regsafe_bad_access(void)
+{
+ asm volatile (" \
+ /* r9 = ctx */ \
+ r9 = r1; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* r2 = ctx->data \
+ * r3 = ctx->data \
+ * r4 = ctx->data_end \
+ */ \
+ r2 = *(u32*)(r9 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r9 + %[__sk_buff_data]); \
+ r4 = *(u32*)(r9 + %[__sk_buff_data_end]); \
+ /* if r6 > 100 goto exit \
+ * if r7 > 100 goto exit \
+ */ \
+ if r6 > 100 goto l0_%=; \
+ if r7 > 100 goto l0_%=; \
+ /* r2 += r6 ; this forces assignment of ID to r2\
+ * r2 += 1 ; get some fixed off for r2\
+ * r3 += r7 ; this forces assignment of ID to r3\
+ * r3 += 1 ; get some fixed off for r3\
+ */ \
+ r2 += r6; \
+ r2 += 1; \
+ r3 += r7; \
+ r3 += 1; \
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * r2 = r3 ; optionally share ID between r2 and r3\
+ */ \
+ if r6 != r7 goto l1_%=; \
+ r2 = r3; \
+l1_%=: /* if r3 > ctx->data_end goto exit */ \
+ if r3 > r4 goto l0_%=; \
+ /* r5 = *(u8 *) (r2 - 1) ; access packet memory using r2,\
+ * ; this is not always safe\
+ */ \
+ r5 = *(u8*)(r2 - 1); \
+l0_%=: /* exit(0) */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
new file mode 100644
index 000000000000..c538c6893552
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_direct_stack_access_wraparound.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test1")
+__failure __msg("fp pointer and 2147483647")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x7fffffff; \
+ r1 += 0x7fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test2")
+__failure __msg("fp pointer and 1073741823")
+__failure_unpriv
+__naked void with_32_bit_wraparound_test2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x3fffffff; \
+ r1 += 0x3fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("direct stack access with 32-bit wraparound. test3")
+__failure __msg("fp pointer offset 1073741822")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void with_32_bit_wraparound_test3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0x1fffffff; \
+ r1 += 0x1fffffff; \
+ w0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div0.c b/tools/testing/selftests/bpf/progs/verifier_div0.c
new file mode 100644
index 000000000000..cca5ea18fc28
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div0.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div0.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("DIV32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("DIV64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void div64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 1")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_1_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD32 by 0, zero check 2")
+__success __success_unpriv __retval(42)
+__naked void by_0_zero_check_2_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ r1 = 0xffffffff00000000LL ll; \
+ w2 = 1; \
+ w2 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOD64 by 0, zero check")
+__success __success_unpriv __retval(42)
+__naked void mod64_by_0_zero_check(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = 1; \
+ r2 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check ok, cls")
+__success __retval(8)
+__naked void _0_zero_check_ok_cls_1(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 2; \
+ w2 = 16; \
+ w2 /= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 1, cls")
+__success __retval(0)
+__naked void _0_zero_check_1_cls_1(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 by 0, zero check 2, cls")
+__success __retval(0)
+__naked void _0_zero_check_2_cls_1(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 /= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 by 0, zero check, cls")
+__success __retval(0)
+__naked void by_0_zero_check_cls(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ r0 /= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check ok, cls")
+__success __retval(2)
+__naked void _0_zero_check_ok_cls_2(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 3; \
+ w2 = 5; \
+ w2 %%= w1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 1, cls")
+__success __retval(1)
+__naked void _0_zero_check_1_cls_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 by 0, zero check 2, cls")
+__success __retval(1)
+__naked void _0_zero_check_2_cls_2(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff00000000LL ll; \
+ w0 = 1; \
+ w0 %%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 1, cls")
+__success __retval(2)
+__naked void _0_zero_check_1_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = 2; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 by 0, zero check 2, cls")
+__success __retval(-1)
+__naked void _0_zero_check_2_cls_3(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w0 = -1; \
+ r0 %%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_div_overflow.c b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
new file mode 100644
index 000000000000..458984da804c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_div_overflow.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/div_overflow.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+/* Just make sure that JITs used udiv/umod as otherwise we get
+ * an exception from INT_MIN/-1 overflow similarly as with div
+ * by zero.
+ */
+
+SEC("tc")
+__description("DIV32 overflow, check 1")
+__success __retval(0)
+__naked void div32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 /= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV32 overflow, check 2")
+__success __retval(0)
+__naked void div32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 /= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 1")
+__success __retval(0)
+__naked void div64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r2 /= r1; \
+ w0 = 0; \
+ if r0 == r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("DIV64 overflow, check 2")
+__success __retval(0)
+__naked void div64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r1 = %[llong_min] ll; \
+ r1 /= -1; \
+ w0 = 0; \
+ if r0 == r1 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 1")
+__success __retval(INT_MIN)
+__naked void mod32_overflow_check_1(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w0 = %[int_min]; \
+ w0 %%= w1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD32 overflow, check 2")
+__success __retval(INT_MIN)
+__naked void mod32_overflow_check_2(void)
+{
+ asm volatile (" \
+ w0 = %[int_min]; \
+ w0 %%= -1; \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 1")
+__success __retval(1)
+__naked void mod64_overflow_check_1(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= r1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("MOD64 overflow, check 2")
+__success __retval(1)
+__naked void mod64_overflow_check_2(void)
+{
+ asm volatile (" \
+ r2 = %[llong_min] ll; \
+ r3 = r2; \
+ r2 %%= -1; \
+ w0 = 0; \
+ if r3 != r2 goto l0_%=; \
+ w0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(llong_min, LLONG_MIN)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
new file mode 100644
index 000000000000..4ab0ef18d7eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_ptr_args.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_core_read.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+
+extern struct task_struct *bpf_task_acquire(struct task_struct *p) __ksym __weak;
+extern void bpf_task_release(struct task_struct *p) __ksym __weak;
+
+__weak int subprog_trusted_task_nullable(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ if (!task)
+ return 0;
+ return task->pid + task->tgid;
+}
+
+__weak int subprog_trusted_task_nullable_extra_layer(struct task_struct *task __arg_trusted __arg_nullable)
+{
+ return subprog_trusted_task_nullable(task) + subprog_trusted_task_nullable(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nullable() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int trusted_task_arg_nullable(void *ctx)
+{
+ struct task_struct *t1 = bpf_get_current_task_btf();
+ struct task_struct *t2 = bpf_task_acquire(t1);
+ int res = 0;
+
+ /* known NULL */
+ res += subprog_trusted_task_nullable(NULL);
+
+ /* known non-NULL */
+ res += subprog_trusted_task_nullable(t1);
+ res += subprog_trusted_task_nullable_extra_layer(t1);
+
+ /* unknown if NULL or not */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ if (t2) {
+ /* known non-NULL after explicit NULL check, just in case */
+ res += subprog_trusted_task_nullable(t2);
+ res += subprog_trusted_task_nullable_extra_layer(t2);
+
+ bpf_task_release(t2);
+ }
+
+ return res;
+}
+
+__weak int subprog_trusted_task_nonnull(struct task_struct *task __arg_trusted)
+{
+ return task->pid + task->tgid;
+}
+
+SEC("?kprobe")
+__failure __log_level(2)
+__msg("R1 type=scalar expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail1(void *ctx)
+{
+ return subprog_trusted_task_nonnull(NULL);
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("R1 type=ptr_or_null_ expected=ptr_, trusted_ptr_, rcu_ptr_")
+__msg("Caller passes invalid args into func#1 ('subprog_trusted_task_nonnull')")
+int trusted_task_arg_nonnull_fail2(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+ struct task_struct *nullable;
+ int res;
+
+ nullable = bpf_task_acquire(t);
+
+ /* should fail, PTR_TO_BTF_ID_OR_NULL */
+ res = subprog_trusted_task_nonnull(nullable);
+
+ if (nullable)
+ bpf_task_release(nullable);
+
+ return res;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+__msg("Validating subprog_trusted_task_nonnull() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int trusted_task_arg_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_trusted_task_nonnull(t);
+}
+
+struct task_struct___local {} __attribute__((preserve_access_index));
+
+__weak int subprog_nullable_task_flavor(
+ struct task_struct___local *task __arg_trusted __arg_nullable)
+{
+ char buf[16];
+
+ if (!task)
+ return 0;
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nullable_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_or_null_task_struct(")
+int flavor_ptr_nullable(void *ctx)
+{
+ struct task_struct___local *t = (void *)bpf_get_current_task_btf();
+
+ return subprog_nullable_task_flavor(t);
+}
+
+__weak int subprog_nonnull_task_flavor(struct task_struct___local *task __arg_trusted)
+{
+ char buf[16];
+
+ return bpf_copy_from_user_task(&buf, sizeof(buf), NULL, (void *)task, 0);
+}
+
+SEC("?uprobe.s")
+__success __log_level(2)
+__msg("Validating subprog_nonnull_task_flavor() func#1...")
+__msg(": R1=trusted_ptr_task_struct(")
+int flavor_ptr_nonnull(void *ctx)
+{
+ struct task_struct *t = bpf_get_current_task_btf();
+
+ return subprog_nonnull_task_flavor((void *)t);
+}
+
+__weak int subprog_trusted_destroy(struct task_struct *task __arg_trusted)
+{
+ bpf_task_release(task); /* should be rejected */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__failure __log_level(2)
+__msg("release kernel function bpf_task_release expects refcounted PTR_TO_BTF_ID")
+int BPF_PROG(trusted_destroy_fail, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_destroy(task);
+}
+
+__weak int subprog_trusted_acq_rel(struct task_struct *task __arg_trusted)
+{
+ struct task_struct *owned;
+
+ owned = bpf_task_acquire(task);
+ if (!owned)
+ return 0;
+
+ bpf_task_release(owned); /* this one is OK, we acquired it locally */
+
+ return 0;
+}
+
+SEC("?tp_btf/task_newtask")
+__success __log_level(2)
+int BPF_PROG(trusted_acq_rel, struct task_struct *task, u64 clone_flags)
+{
+ return subprog_trusted_acq_rel(task);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
new file mode 100644
index 000000000000..baff5ffe9405
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include "bpf_misc.h"
+#include "xdp_metadata.h"
+#include "bpf_kfuncs.h"
+
+int arr[1];
+int unkn_idx;
+const volatile bool call_dead_subprog = false;
+
+__noinline long global_bad(void)
+{
+ return arr[unkn_idx]; /* BOOM */
+}
+
+__noinline long global_good(void)
+{
+ return arr[0];
+}
+
+__noinline long global_calls_bad(void)
+{
+ return global_good() + global_bad() /* does BOOM indirectly */;
+}
+
+__noinline long global_calls_good_only(void)
+{
+ return global_good();
+}
+
+__noinline long global_dead(void)
+{
+ return arr[0] * 2;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* main prog is validated completely first */
+__msg("('global_calls_good_only') is global and assumed valid.")
+/* eventually global_good() is transitively validated as well */
+__msg("Validating global_good() func")
+__msg("('global_good') is safe for any args that match its prototype")
+int chained_global_func_calls_success(void)
+{
+ int sum = 0;
+
+ if (call_dead_subprog)
+ sum += global_dead();
+ return global_calls_good_only() + sum;
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+/* main prog validated successfully first */
+__msg("('global_calls_bad') is global and assumed valid.")
+/* eventually we validate global_bad() and fail */
+__msg("Validating global_bad() func")
+__msg("math between map_value pointer and register") /* BOOM */
+int chained_global_func_calls_bad(void)
+{
+ return global_calls_bad();
+}
+
+/* do out of bounds access forcing verifier to fail verification if this
+ * global func is called
+ */
+__noinline int global_unsupp(const int *mem)
+{
+ if (!mem)
+ return 0;
+ return mem[100]; /* BOOM */
+}
+
+const volatile bool skip_unsupp_global = true;
+
+SEC("?raw_tp")
+__success
+int guarded_unsupp_global_called(void)
+{
+ if (!skip_unsupp_global)
+ return global_unsupp(NULL);
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("Func#1 ('global_unsupp') is global and assumed valid.")
+__msg("Validating global_unsupp() func#1...")
+__msg("value is outside of the allowed memory range")
+int unguarded_unsupp_global_called(void)
+{
+ int x = 0;
+
+ return global_unsupp(&x);
+}
+
+long stack[128];
+
+__weak int subprog_nullable_ptr_bad(int *p)
+{
+ return (*p) * 2; /* bad, missing null check */
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__msg("invalid mem access 'mem_or_null'")
+int arg_tag_nullable_ptr_fail(void *ctx)
+{
+ int x = 42;
+
+ return subprog_nullable_ptr_bad(&x);
+}
+
+typedef struct {
+ int x;
+} user_struct_t;
+
+__noinline __weak int subprog_user_anon_mem(user_struct_t *t)
+{
+ return t ? t->x : 0;
+}
+
+SEC("?tracepoint")
+__failure __log_level(2)
+__msg("invalid bpf_context access")
+__msg("Caller passes invalid args into func#1 ('subprog_user_anon_mem')")
+int anon_user_mem_invalid(void *ctx)
+{
+ /* can't pass PTR_TO_CTX as user memory */
+ return subprog_user_anon_mem(ctx);
+}
+
+SEC("?tracepoint")
+__success __log_level(2)
+__msg("Func#1 ('subprog_user_anon_mem') is safe for any args that match its prototype")
+int anon_user_mem_valid(void *ctx)
+{
+ user_struct_t t = { .x = 42 };
+
+ return subprog_user_anon_mem(&t);
+}
+
+__noinline __weak int subprog_nonnull_ptr_good(int *p1 __arg_nonnull, int *p2 __arg_nonnull)
+{
+ return (*p1) * (*p2); /* good, no need for NULL checks */
+}
+
+int x = 47;
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_nonnull_ptr_good(void *ctx)
+{
+ int y = 74;
+
+ return subprog_nonnull_ptr_good(&x, &y);
+}
+
+/* this global subprog can be now called from many types of entry progs, each
+ * with different context type
+ */
+__weak int subprog_ctx_tag(void *ctx __arg_ctx)
+{
+ return bpf_get_stack(ctx, stack, sizeof(stack), 0);
+}
+
+__weak int raw_tp_canonical(struct bpf_raw_tracepoint_args *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int raw_tp_u64_array(u64 *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+SEC("?raw_tp.w")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp_writable(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+SEC("?tp_btf/sys_enter")
+__success __log_level(2)
+int arg_tag_ctx_raw_tp_btf(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + raw_tp_canonical(ctx) + raw_tp_u64_array(ctx);
+}
+
+struct whatever { };
+
+__weak int tp_whatever(struct whatever *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?tp")
+__success __log_level(2)
+int arg_tag_ctx_tp(void *ctx)
+{
+ return subprog_ctx_tag(ctx) + tp_whatever(ctx);
+}
+
+__weak int kprobe_subprog_pt_regs(struct pt_regs *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int kprobe_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?kprobe")
+__success __log_level(2)
+int arg_tag_ctx_kprobe(void *ctx)
+{
+ return subprog_ctx_tag(ctx) +
+ kprobe_subprog_pt_regs(ctx) +
+ kprobe_subprog_typedef(ctx);
+}
+
+__weak int perf_subprog_regs(
+#if defined(bpf_target_riscv)
+ struct user_regs_struct *ctx __arg_ctx
+#elif defined(bpf_target_s390)
+ /* user_pt_regs typedef is anonymous struct, so only `void *` works */
+ void *ctx __arg_ctx
+#elif defined(bpf_target_loongarch) || defined(bpf_target_arm64) || defined(bpf_target_powerpc)
+ struct user_pt_regs *ctx __arg_ctx
+#else
+ struct pt_regs *ctx __arg_ctx
+#endif
+)
+{
+ return 0;
+}
+
+__weak int perf_subprog_typedef(bpf_user_pt_regs_t *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int perf_subprog_canonical(struct bpf_perf_event_data *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?perf_event")
+__success __log_level(2)
+int arg_tag_ctx_perf(void *ctx)
+{
+ return subprog_ctx_tag(ctx) +
+ perf_subprog_regs(ctx) +
+ perf_subprog_typedef(ctx) +
+ perf_subprog_canonical(ctx);
+}
+
+__weak int iter_subprog_void(void *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int iter_subprog_typed(struct bpf_iter__task *ctx __arg_ctx)
+{
+ return 0;
+}
+
+SEC("?iter/task")
+__success __log_level(2)
+int arg_tag_ctx_iter_task(struct bpf_iter__task *ctx)
+{
+ return (iter_subprog_void(ctx) + iter_subprog_typed(ctx)) & 1;
+}
+
+__weak int tracing_subprog_void(void *ctx __arg_ctx)
+{
+ return 0;
+}
+
+__weak int tracing_subprog_u64(u64 *ctx __arg_ctx)
+{
+ return 0;
+}
+
+int acc;
+
+SEC("?fentry/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fentry)
+{
+ acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+ return 0;
+}
+
+SEC("?fexit/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fexit)
+{
+ acc += tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+ return 0;
+}
+
+SEC("?fmod_ret/" SYS_PREFIX "sys_nanosleep")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_fmod_ret)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+}
+
+SEC("?lsm/bpf")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_lsm)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+}
+
+SEC("?struct_ops/test_1")
+__success __log_level(2)
+int BPF_PROG(arg_tag_ctx_struct_ops)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx);
+}
+
+SEC(".struct_ops")
+struct bpf_dummy_ops dummy_1 = {
+ .test_1 = (void *)arg_tag_ctx_struct_ops,
+};
+
+SEC("?syscall")
+__success __log_level(2)
+int arg_tag_ctx_syscall(void *ctx)
+{
+ return tracing_subprog_void(ctx) + tracing_subprog_u64(ctx) + tp_whatever(ctx);
+}
+
+__weak int subprog_dynptr(struct bpf_dynptr *dptr)
+{
+ long *d, t, buf[1] = {};
+
+ d = bpf_dynptr_data(dptr, 0, sizeof(long));
+ if (!d)
+ return 0;
+
+ t = *d + 1;
+
+ d = bpf_dynptr_slice(dptr, 0, &buf, sizeof(long));
+ if (!d)
+ return t;
+
+ t = *d + 2;
+
+ return t;
+}
+
+SEC("?xdp")
+__success __log_level(2)
+int arg_tag_dynptr(struct xdp_md *ctx)
+{
+ struct bpf_dynptr dptr;
+
+ bpf_dynptr_from_xdp(ctx, 0, &dptr);
+
+ return subprog_dynptr(&dptr);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_gotol.c b/tools/testing/selftests/bpf/progs/verifier_gotol.c
new file mode 100644
index 000000000000..05a329ee45ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_gotol.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("gotol, small_imm")
+__success __success_unpriv __retval(1)
+__naked void gotol_small_imm(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ if r0 == 0 goto l0_%=; \
+ gotol l1_%=; \
+l2_%=: \
+ gotol l3_%=; \
+l1_%=: \
+ r0 = 1; \
+ gotol l2_%=; \
+l0_%=: \
+ r0 = 2; \
+l3_%=: \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("gotol, large_imm")
+__success __failure_unpriv __retval(40000)
+__naked void gotol_large_imm(void)
+{
+ asm volatile (" \
+ gotol 1f; \
+0: \
+ r0 = 0; \
+ .rept 40000; \
+ r0 += 1; \
+ .endr; \
+ exit; \
+1: gotol 0b; \
+" :
+ :
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
new file mode 100644
index 000000000000..50c6b22606f6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_access_var_len.c
@@ -0,0 +1,825 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_access_var_len.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, correct bounds")
+__success
+__naked void bitwise_and_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 64; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, bitwise AND, zero included")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_bitwise_and_zero_included(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use bitwise AND to limit r3 range to [0, 64] */\
+ r3 &= 64; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, bitwise AND + JMP, wrong max")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void bitwise_and_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r2 &= 65; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, correct bounds")
+__success
+__naked void memory_stack_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), correct bounds")
+__success
+__naked void stack_jmp_signed_correct_bounds(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = 16; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, bounds + offset")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_bounds_offset(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 64 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r2 += 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, wrong max")
+__failure __msg("invalid indirect access to stack R1 off=-64 size=65")
+__naked void memory_stack_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 > 65 goto l0_%=; \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP, no max check")
+__failure
+/* because max wasn't checked, signed min is negative */
+__msg("R2 min value is negative, either use unsigned or 'var &= const'")
+__naked void stack_jmp_no_max_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ r4 = 0; \
+ if r4 >= r2 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: stack, JMP, no min check")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+0 size 64")
+__retval(0)
+__naked void stack_jmp_no_min_check(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ /* use JMP to limit r3 range to [0, 64] */ \
+ if r3 > 64 goto l0_%=; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory at &fp[-64] is\
+ * not initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: stack, JMP (signed), no min check")
+__failure __msg("R2 min value is negative")
+__naked void jmp_signed_no_min_check(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + 8); \
+ r1 = r10; \
+ r1 += -64; \
+ *(u64*)(r1 - 128) = r2; \
+ r2 = *(u64*)(r1 - 128); \
+ if r2 s> 64 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, correct bounds")
+__success
+__naked void memory_map_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[sizeof_test_val] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map, JMP, wrong max")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=49")
+__naked void memory_map_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 1)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, correct bounds")
+__success
+__naked void map_adjusted_jmp_correct_bounds(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = %[sizeof_test_val]; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 20),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: map adjusted, JMP, wrong max")
+__failure __msg("R1 min value is outside of the allowed memory range")
+__naked void map_adjusted_jmp_wrong_max(void)
+{
+ asm volatile (" \
+ r6 = *(u64*)(r1 + 8); \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += 20; \
+ r2 = r6; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ if r2 s> %[__imm_0] goto l1_%=; \
+ r4 = 0; \
+ if r4 s>= r2 goto l1_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l1_%=: r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - 19)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + 0); \
+ r1 = 0; \
+ *(u64*)(r10 - 128) = r2; \
+ r2 = *(u64*)(r10 - 128); \
+ r2 &= 64; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ *(u64*)(r1 + 0) = r2; \
+ r2 &= 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_5(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ *(u64*)(r1 + 0) = r2; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+__naked void ptr_to_mem_or_null_6(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)")
+__success __retval(0)
+/* csum_diff of 64-byte packet */
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void ptr_to_mem_or_null_7(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r6; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r6; \
+ r2 = *(u64*)(r6 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_8(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)")
+__failure __msg("R1 type=scalar expected=fp")
+__naked void ptr_to_mem_or_null_9(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_10(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_11(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_12(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r1 = r10; \
+ r1 += -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)")
+__success
+__naked void ptr_to_mem_or_null_13(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = *(u64*)(r0 + 0); \
+ if r2 > 8 goto l0_%=; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("helper access to variable memory: 8 bytes leak")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -64+32 size 64")
+__retval(0)
+__naked void variable_memory_8_bytes_leak(void)
+{
+ asm volatile (" \
+ /* set max stack size */ \
+ r6 = 0; \
+ *(u64*)(r10 - 128) = r6; \
+ /* set r3 to a random value */ \
+ call %[bpf_get_prandom_u32]; \
+ r3 = r0; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = r10; \
+ r2 += -64; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ /* Note: fp[-32] left uninitialized */ \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Limit r3 range to [1, 64] */ \
+ r3 &= 63; \
+ r3 += 1; \
+ r4 = 0; \
+ /* Call bpf_ringbuf_output(), it is one of a few helper functions with\
+ * ARG_CONST_SIZE_OR_ZERO parameter allowed in unpriv mode.\
+ * For unpriv this should signal an error, because memory region [1, 64]\
+ * at &fp[-64] is not fully initialized. \
+ */ \
+ call %[bpf_ringbuf_output]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_ringbuf_output),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to variable memory: 8 bytes no leak (init memory)")
+__success
+__naked void bytes_no_leak_init_memory(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 0; \
+ r0 = 0; \
+ *(u64*)(r10 - 64) = r0; \
+ *(u64*)(r10 - 56) = r0; \
+ *(u64*)(r10 - 48) = r0; \
+ *(u64*)(r10 - 40) = r0; \
+ *(u64*)(r10 - 32) = r0; \
+ *(u64*)(r10 - 24) = r0; \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ r1 += -64; \
+ r2 = 0; \
+ r2 &= 32; \
+ r2 += 32; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ r1 = *(u64*)(r10 - 16); \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
new file mode 100644
index 000000000000..74f5f9cd153d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_packet_access.c
@@ -0,0 +1,550 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("xdp")
+__description("helper access to packet: test1, valid packet_ptr range")
+__success __retval(0)
+__naked void test1_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test2, unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void packet_test2_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test3, variable add")
+__success __retval(0)
+__naked void to_packet_test3_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test4, packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("helper access to packet: test5, packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test6, cls valid packet_ptr range")
+__success __retval(0)
+__naked void cls_valid_packet_ptr_range(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = 0; \
+ call %[bpf_map_update_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test7, cls unchecked packet_ptr")
+__failure __msg("invalid access to packet")
+__naked void test7_cls_unchecked_packet_ptr(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test8, cls variable add")
+__success __retval(0)
+__naked void packet_test8_cls_variable_add(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r5 = *(u8*)(r2 + 0); \
+ r4 = r2; \
+ r4 += r5; \
+ r5 = r4; \
+ r5 += 8; \
+ if r5 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ r2 = r4; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test9, cls packet_ptr with bad range")
+__failure __msg("invalid access to packet")
+__naked void packet_ptr_with_bad_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = r2; \
+ r4 += 4; \
+ if r4 > r3 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test10, cls packet_ptr with too short range")
+__failure __msg("invalid access to packet")
+__naked void ptr_with_too_short_range_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r2 += 1; \
+ r4 = r2; \
+ r4 += 7; \
+ if r4 > r3 goto l0_%=; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test11, cls unsuitable helper 1")
+__failure __msg("helper access to the packet")
+__naked void test11_cls_unsuitable_helper_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r3 = r6; \
+ r3 += 7; \
+ if r3 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 42; \
+ r5 = 0; \
+ call %[bpf_skb_store_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_store_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test12, cls unsuitable helper 2")
+__failure __msg("helper access to the packet")
+__naked void test12_cls_unsuitable_helper_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r3 = r6; \
+ r6 += 8; \
+ if r6 > r7 goto l0_%=; \
+ r2 = 0; \
+ r4 = 4; \
+ call %[bpf_skb_load_bytes]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test13, cls helper ok")
+__success __retval(0)
+__naked void packet_test13_cls_helper_ok(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test14, cls helper ok sub")
+__success __retval(0)
+__naked void test14_cls_helper_ok_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 4; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test15, cls helper fail sub")
+__failure __msg("invalid access to packet")
+__naked void test15_cls_helper_fail_sub(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 -= 12; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test16, cls helper fail range 1")
+__failure __msg("invalid access to packet")
+__naked void cls_helper_fail_range_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 8; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test17, cls helper fail range 2")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = -9; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test18, cls helper fail range 3")
+__failure __msg("R2 min value is negative")
+__naked void cls_helper_fail_range_3(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__imm_0, ~0),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test19, cls helper range zero")
+__success __retval(0)
+__naked void test19_cls_helper_range_zero(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r6; \
+ r2 = 0; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test20, pkt end as input")
+__failure __msg("R1 type=pkt_end expected=fp")
+__naked void test20_pkt_end_as_input(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r1 = r7; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("helper access to packet: test21, wrong reg")
+__failure __msg("invalid access to packet")
+__naked void to_packet_test21_wrong_reg(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r6 += 1; \
+ r1 = r6; \
+ r1 += 7; \
+ if r1 > r7 goto l0_%=; \
+ r2 = 4; \
+ r3 = 0; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_csum_diff]; \
+ r0 = 0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_csum_diff),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
new file mode 100644
index 000000000000..0ede0ccd090c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_restricted.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_restricted.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct val);
+} map_spin_lock SEC(".maps");
+
+struct timer {
+ struct bpf_timer t;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct timer);
+} map_timer SEC(".maps");
+
+SEC("kprobe")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_kprobe_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void in_bpf_prog_type_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_perf_event_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_ktime_get_coarse_ns is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("unknown func bpf_ktime_get_coarse_ns")
+__naked void bpf_prog_type_raw_tracepoint_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_coarse_ns]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_coarse_ns)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_timer_init isn restricted in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_kprobe_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_perf_event_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void in_bpf_prog_type_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_timer_init is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_timer yet")
+__naked void bpf_prog_type_raw_tracepoint_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_timer] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[map_timer] ll; \
+ r3 = 1; \
+l0_%=: call %[bpf_timer_init]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_timer_init),
+ __imm_addr(map_timer)
+ : __clobber_all);
+}
+
+SEC("kprobe")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_KPROBE")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_kprobe_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void in_bpf_prog_type_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("perf_event")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_PERF_EVENT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_perf_event_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("raw_tracepoint")
+__description("bpf_spin_lock is forbidden in BPF_PROG_TYPE_RAW_TRACEPOINT")
+__failure __msg("tracing progs cannot use bpf_spin_lock yet")
+__naked void bpf_prog_type_raw_tracepoint_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_spin_lock]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
new file mode 100644
index 000000000000..886498b5e6f3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_helper_value_access.c
@@ -0,0 +1,1282 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/helper_value_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tracepoint")
+__description("helper access to map: full range")
+__success
+__naked void access_to_map_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[sizeof_test_val]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(sizeof_test_val, sizeof(struct test_val))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: partial range")
+__success
+__naked void access_to_map_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+/* Call a function taking a pointer and a size which doesn't allow the size to
+ * be zero (i.e. bpf_trace_printk() declares the second argument to be
+ * ARG_CONST_SIZE, not ARG_CONST_SIZE_OR_ZERO). We attempt to pass zero for the
+ * size and expect to fail.
+ */
+SEC("tracepoint")
+__description("helper access to map: empty range")
+__failure __msg("R2 invalid zero-sized read: u64=[0,0]")
+__naked void access_to_map_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+/* Like the test above, but this time the size register is not known to be zero;
+ * its lower-bound is zero though, which is still unacceptable.
+ */
+SEC("tracepoint")
+__description("helper access to map: possibly-empty ange")
+__failure __msg("R2 invalid zero-sized read: u64=[0,4]")
+__naked void access_to_map_possibly_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ /* Read an unknown value */ \
+ r7 = *(u64*)(r0 + 0); \
+ /* Make it small and positive, to avoid other errors */ \
+ r7 &= 4; \
+ r2 = 0; \
+ r2 += r7; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=0 size=56")
+__naked void map_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) + 8)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: negative range")
+__failure __msg("R2 min value is negative")
+__naked void access_to_map_negative_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): full range")
+__success
+__naked void via_const_imm_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): partial range")
+__success
+__naked void via_const_imm_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void via_const_imm_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void imm_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const imm): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_imm_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r1 += %[test_val_foo]; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): full range")
+__success
+__naked void via_const_reg_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): partial range")
+__success
+__naked void via_const_reg_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void via_const_reg_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = 0; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): out-of-bound range")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=52")
+__naked void reg_out_of_bound_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (> adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via const reg): negative range (< adjustment)")
+__failure __msg("R2 min value is negative")
+__naked void const_reg_negative_range_adjustment_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = %[test_val_foo]; \
+ r1 += r3; \
+ r2 = -1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): full range")
+__success
+__naked void map_via_variable_full_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo)),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): partial range")
+__success
+__naked void map_via_variable_partial_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): empty range")
+__failure __msg("R2 invalid zero-sized read")
+__naked void map_via_variable_empty_range(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = 0; \
+ call %[bpf_trace_printk]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_trace_printk),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): no max check")
+__failure __msg("R1 unbounded memory access")
+__naked void via_variable_no_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r1 += r3; \
+ r2 = 1; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=48 off=4 size=45")
+__naked void via_variable_wrong_max_check_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[test_val_foo] goto l0_%=; \
+ r1 += r3; \
+ r2 = %[__imm_0]; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_probe_read_kernel),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, sizeof(struct test_val) - offsetof(struct test_val, foo) + 1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, good access")
+__success
+__naked void bounds_check_using_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 < 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, good access")
+__success
+__naked void bounds_check_using_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r0 = 0; \
+l0_%=: exit; \
+l1_%=: r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using <=, bad access")
+__failure __msg("R1 unbounded memory access")
+__naked void bounds_check_using_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 <= 32 goto l1_%=; \
+ r1 += r3; \
+l0_%=: r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access")
+__success
+__naked void check_using_s_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, good access 2")
+__success
+__naked void using_s_good_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s< 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s< -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access")
+__success
+__naked void check_using_s_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= 0 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, good access 2")
+__success
+__naked void using_s_good_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("helper access to map: bounds check using s<=, bad access")
+__failure __msg("R1 min value is negative")
+__naked void check_using_s_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 s<= 32 goto l1_%=; \
+l2_%=: r0 = 0; \
+l0_%=: exit; \
+l1_%=: if r3 s<= -3 goto l2_%=; \
+ r1 += r3; \
+ r0 = 0; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map lookup helper access to map")
+__success
+__naked void lookup_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map")
+__success
+__naked void update_helper_access_to_map(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map update helper access to map: wrong size")
+__failure __msg("invalid access to map value, value_size=8 off=0 size=16")
+__naked void access_to_map_wrong_size(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r4 = 0; \
+ r3 = r0; \
+ r2 = r0; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_update_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_16b),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm)")
+__success
+__naked void adjusted_map_via_const_imm(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[other_val_bar]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void imm_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += %[__imm_0]; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const imm): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void imm_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r2 += -4; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg)")
+__success
+__naked void adjusted_map_via_const_reg(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[other_val_bar]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 1")
+__failure __msg("invalid access to map value, value_size=16 off=12 size=8")
+__naked void reg_out_of_bound_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = %[__imm_0]; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, sizeof(struct other_val) - 4)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via const reg): out-of-bound 2")
+__failure __msg("invalid access to map value, value_size=16 off=-4 size=8")
+__naked void reg_out_of_bound_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = -4; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable)")
+__success
+__naked void to_adjusted_map_via_variable(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[other_val_bar] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(other_val_bar, offsetof(struct other_val, bar))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): no max check")
+__failure
+__msg("R2 unbounded memory access, make sure to bounds check any such access")
+__naked void via_variable_no_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("map helper access to adjusted map (via variable): wrong max check")
+__failure __msg("invalid access to map value, value_size=16 off=9 size=8")
+__naked void via_variable_wrong_max_check_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r2 = r0; \
+ r3 = *(u32*)(r0 + 0); \
+ if r3 > %[__imm_0] goto l0_%=; \
+ r2 += r3; \
+ r1 = %[map_hash_16b] ll; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b),
+ __imm_const(__imm_0, offsetof(struct other_val, bar) + 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
new file mode 100644
index 000000000000..9fc3fae5cd83
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/int_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("ARG_PTR_TO_LONG uninitialized")
+__success
+__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8")
+__naked void arg_ptr_to_long_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ARG_PTR_TO_LONG half-uninitialized")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R4 off -16+4 size 8")
+__retval(0)
+__naked void ptr_to_long_half_uninitialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG misaligned")
+__failure __msg("misaligned stack access off 0+-20+0 size 8")
+__naked void arg_ptr_to_long_misaligned(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -12; \
+ r0 = 0; \
+ *(u32*)(r7 + 0) = r0; \
+ *(u64*)(r7 + 4) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG size < sizeof(long)")
+__failure __msg("invalid indirect access to stack R4 off=-4 size=8")
+__naked void to_long_size_sizeof_long(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -16; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += 12; \
+ *(u32*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+SEC("cgroup/sysctl")
+__description("ARG_PTR_TO_LONG initialized")
+__success
+__naked void arg_ptr_to_long_initialized(void)
+{
+ asm volatile (" \
+ /* bpf_strtoul arg1 (buf) */ \
+ r7 = r10; \
+ r7 += -8; \
+ r0 = 0x00303036; \
+ *(u64*)(r7 + 0) = r0; \
+ r1 = r7; \
+ /* bpf_strtoul arg2 (buf_len) */ \
+ r2 = 4; \
+ /* bpf_strtoul arg3 (flags) */ \
+ r3 = 0; \
+ /* bpf_strtoul arg4 (res) */ \
+ r7 += -8; \
+ *(u64*)(r7 + 0) = r0; \
+ r4 = r7; \
+ /* bpf_strtoul() */ \
+ call %[bpf_strtoul]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_strtoul)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
new file mode 100644
index 000000000000..99e561f18f9b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_iterating_callbacks.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "bpf_misc.h"
+#include "bpf_experimental.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 8);
+ __type(key, __u32);
+ __type(value, __u64);
+} map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_USER_RINGBUF);
+ __uint(max_entries, 8);
+} ringbuf SEC(".maps");
+
+struct vm_area_struct;
+struct bpf_map;
+
+struct buf_context {
+ char *buf;
+};
+
+struct num_context {
+ __u64 i;
+ __u64 j;
+};
+
+__u8 choice_arr[2] = { 0, 1 };
+
+static int unsafe_on_2nd_iter_cb(__u32 idx, struct buf_context *ctx)
+{
+ if (idx == 0) {
+ ctx->buf = (char *)(0xDEAD);
+ return 0;
+ }
+
+ if (bpf_probe_read_user(ctx->buf, 8, (void *)(0xBADC0FFEE)))
+ return 1;
+
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("R1 type=scalar expected=fp")
+int unsafe_on_2nd_iter(void *unused)
+{
+ char buf[4];
+ struct buf_context loop_ctx = { .buf = buf };
+
+ bpf_loop(100, unsafe_on_2nd_iter_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static int unsafe_on_zero_iter_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i = 0;
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_on_zero_iter(void *unused)
+{
+ struct num_context loop_ctx = { .i = 32 };
+
+ bpf_loop(100, unsafe_on_zero_iter_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int widening_cb(__u32 idx, struct num_context *ctx)
+{
+ ++ctx->i;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int widening(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0, .j = 1 };
+
+ bpf_loop(100, widening_cb, &loop_ctx, 0);
+ /* loop_ctx.j is not changed during callback iteration,
+ * verifier should not apply widening to it.
+ */
+ return choice_arr[loop_ctx.j];
+}
+
+static int loop_detection_cb(__u32 idx, struct num_context *ctx)
+{
+ for (;;) {}
+ return 0;
+}
+
+SEC("?raw_tp")
+__failure __msg("infinite loop detected")
+int loop_detection(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_loop(100, loop_detection_cb, &loop_ctx, 0);
+ return 0;
+}
+
+static __always_inline __u64 oob_state_machine(struct num_context *ctx)
+{
+ switch (ctx->i) {
+ case 0:
+ ctx->i = 1;
+ break;
+ case 1:
+ ctx->i = 32;
+ break;
+ }
+ return 0;
+}
+
+static __u64 for_each_map_elem_cb(struct bpf_map *map, __u32 *key, __u64 *val, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_for_each_map_elem(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_for_each_map_elem(&map, for_each_map_elem_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 ringbuf_drain_cb(struct bpf_dynptr *dynptr, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_ringbuf_drain(void *unused)
+{
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_user_ringbuf_drain(&ringbuf, ringbuf_drain_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static __u64 find_vma_cb(struct task_struct *task, struct vm_area_struct *vma, void *data)
+{
+ return oob_state_machine(data);
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=32 size=1")
+int unsafe_find_vma(void *unused)
+{
+ struct task_struct *task = bpf_get_current_task_btf();
+ struct num_context loop_ctx = { .i = 0 };
+
+ bpf_find_vma(task, 0, find_vma_cb, &loop_ctx, 0);
+ return choice_arr[loop_ctx.i];
+}
+
+static int iter_limit_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i++;
+ return 0;
+}
+
+SEC("?raw_tp")
+__success
+int bpf_loop_iter_limit_ok(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(1, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+SEC("?raw_tp")
+__failure __msg("invalid access to map value, value_size=2 off=2 size=1")
+int bpf_loop_iter_limit_overflow(void *unused)
+{
+ struct num_context ctx = { .i = 0 };
+
+ bpf_loop(2, iter_limit_cb, &ctx, 0);
+ return choice_arr[ctx.i];
+}
+
+static int iter_limit_level2a_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 100;
+ return 0;
+}
+
+static int iter_limit_level2b_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 10;
+ return 0;
+}
+
+static int iter_limit_level1_cb(__u32 idx, struct num_context *ctx)
+{
+ ctx->i += 1;
+ bpf_loop(1, iter_limit_level2a_cb, ctx, 0);
+ bpf_loop(1, iter_limit_level2b_cb, ctx, 0);
+ return 0;
+}
+
+/* Check that path visiting every callback function once had been
+ * reached by verifier. Variables 'ctx{1,2}i' below serve as flags,
+ * with each decimal digit corresponding to a callback visit marker.
+ */
+SEC("socket")
+__success __retval(111111)
+int bpf_loop_iter_limit_nested(void *unused)
+{
+ struct num_context ctx1 = { .i = 0 };
+ struct num_context ctx2 = { .i = 0 };
+ __u64 a, b, c;
+
+ bpf_loop(1, iter_limit_level1_cb, &ctx1, 0);
+ bpf_loop(1, iter_limit_level1_cb, &ctx2, 0);
+ a = ctx1.i;
+ b = ctx2.i;
+ /* Force 'ctx1.i' and 'ctx2.i' precise. */
+ c = choice_arr[(a + b) % 2];
+ /* This makes 'c' zero, but neither clang nor verifier know it. */
+ c /= 10;
+ /* Make sure that verifier does not visit 'impossible' states:
+ * enumerate all possible callback visit masks.
+ */
+ if (a != 0 && a != 1 && a != 11 && a != 101 && a != 111 &&
+ b != 0 && b != 1 && b != 11 && b != 101 && b != 111)
+ asm volatile ("r0 /= 0;" ::: "r0");
+ return 1000 * a + b + c;
+}
+
+struct iter_limit_bug_ctx {
+ __u64 a;
+ __u64 b;
+ __u64 c;
+};
+
+static __naked void iter_limit_bug_cb(void)
+{
+ /* This is the same as C code below, but written
+ * in assembly to control which branches are fall-through.
+ *
+ * switch (bpf_get_prandom_u32()) {
+ * case 1: ctx->a = 42; break;
+ * case 2: ctx->b = 42; break;
+ * default: ctx->c = 42; break;
+ * }
+ */
+ asm volatile (
+ "r9 = r2;"
+ "call %[bpf_get_prandom_u32];"
+ "r1 = r0;"
+ "r2 = 42;"
+ "r0 = 0;"
+ "if r1 == 0x1 goto 1f;"
+ "if r1 == 0x2 goto 2f;"
+ "*(u64 *)(r9 + 16) = r2;"
+ "exit;"
+ "1: *(u64 *)(r9 + 0) = r2;"
+ "exit;"
+ "2: *(u64 *)(r9 + 8) = r2;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all
+ );
+}
+
+SEC("tc")
+__failure
+__flag(BPF_F_TEST_STATE_FREQ)
+int iter_limit_bug(struct __sk_buff *skb)
+{
+ struct iter_limit_bug_ctx ctx = { 7, 7, 7 };
+
+ bpf_loop(2, iter_limit_bug_cb, &ctx, 0);
+
+ /* This is the same as C code below,
+ * written in assembly to guarantee checks order.
+ *
+ * if (ctx.a == 42 && ctx.b == 42 && ctx.c == 7)
+ * asm volatile("r1 /= 0;":::"r1");
+ */
+ asm volatile (
+ "r1 = *(u64 *)%[ctx_a];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_b];"
+ "if r1 != 42 goto 1f;"
+ "r1 = *(u64 *)%[ctx_c];"
+ "if r1 != 7 goto 1f;"
+ "r1 /= 0;"
+ "1:"
+ :
+ : [ctx_a]"m"(ctx.a),
+ [ctx_b]"m"(ctx.b),
+ [ctx_c]"m"(ctx.c)
+ : "r1"
+ );
+ return 0;
+}
+
+#define ARR_SZ 1000000
+int zero;
+char arr[ARR_SZ];
+
+SEC("socket")
+__success __retval(0xd495cdc0)
+int cond_break1(const void *ctx)
+{
+ unsigned long i;
+ unsigned int sum = 0;
+
+ for (i = zero; i < ARR_SZ; cond_break, i++)
+ sum += i;
+ for (i = zero; i < ARR_SZ; i++) {
+ barrier_var(i);
+ sum += i + arr[i];
+ cond_break;
+ }
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(999000000)
+int cond_break2(const void *ctx)
+{
+ int i, j;
+ int sum = 0;
+
+ for (i = zero; i < 1000; cond_break, i++)
+ for (j = zero; j < 1000; j++) {
+ sum += i + j;
+ cond_break;
+ }
+
+ return sum;
+}
+
+static __noinline int loop(void)
+{
+ int i, sum = 0;
+
+ for (i = zero; i <= 1000000; i++, cond_break)
+ sum += i;
+
+ return sum;
+}
+
+SEC("socket")
+__success __retval(0x6a5a2920)
+int cond_break3(const void *ctx)
+{
+ return loop();
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break4(const void *ctx)
+{
+ int cnt = zero;
+
+ for (;;) {
+ /* should eventually break out of the loop */
+ cond_break;
+ cnt++;
+ }
+ /* if we looped a bit, it's a success */
+ return cnt > 1 ? 1 : 0;
+}
+
+static __noinline int static_subprog(void)
+{
+ int cnt = zero;
+
+ for (;;) {
+ cond_break;
+ cnt++;
+ }
+
+ return cnt;
+}
+
+SEC("socket")
+__success __retval(1)
+int cond_break5(const void *ctx)
+{
+ int cnt1 = zero, cnt2;
+
+ for (;;) {
+ cond_break;
+ cnt1++;
+ }
+
+ cnt2 = static_subprog();
+
+ /* main and subprog have to loop a bit */
+ return cnt1 > 1 && cnt2 > 1 ? 1 : 0;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
new file mode 100644
index 000000000000..bf16b00502f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_jeq_infer_not_null.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/jeq_infer_not_null.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_xskmap SEC(".maps");
+
+/* This is equivalent to the following program:
+ *
+ * r6 = skb->sk;
+ * r7 = sk_fullsock(r6);
+ * r0 = sk_fullsock(r6);
+ * if (r0 == 0) return 0; (a)
+ * if (r0 != r7) return 0; (b)
+ * *r7->type; (c)
+ * return 0;
+ *
+ * It is safe to dereference r7 at point (c), because of (a) and (b).
+ * The test verifies that relation r0 == r7 is propagated from (b) to (c).
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JNE false branch")
+__success __failure_unpriv __msg_unpriv("R7 pointer comparison")
+__retval(0)
+__naked void socket_for_jne_false_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 == r7) r0 = *(r7->type); */ \
+ if r0 != r7 goto l0_%=; /* Use ! JNE ! */\
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JNE true branch")
+__failure __msg("R7 invalid mem access 'sock_or_null'")
+__failure_unpriv __msg_unpriv("R7 pointer comparison")
+__naked void unchanged_for_jne_true_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 != 0 goto l0_%=; \
+ /* if (r0 == r7) return 0; */ \
+ if r0 != r7 goto l1_%=; /* Use ! JNE ! */\
+ goto l0_%=; \
+l1_%=: /* r0 = *(r7->type); */ \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as a first test, but not null should be inferred for JEQ branch */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL -> PTR_TO_SOCKET for JEQ true branch")
+__success __failure_unpriv __msg_unpriv("R7 pointer comparison")
+__retval(0)
+__naked void socket_for_jeq_true_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == null) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 != r7) return 0; */ \
+ if r0 == r7 goto l1_%=; /* Use ! JEQ ! */\
+ goto l0_%=; \
+l1_%=: /* r0 = *(r7->type); */ \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Same as above, but verify that another branch of JNE still
+ * prohibits access to PTR_MAYBE_NULL.
+ */
+SEC("cgroup/skb")
+__description("jne/jeq infer not null, PTR_TO_SOCKET_OR_NULL unchanged for JEQ false branch")
+__failure __msg("R7 invalid mem access 'sock_or_null'")
+__failure_unpriv __msg_unpriv("R7 pointer comparison")
+__naked void unchanged_for_jeq_false_branch(void)
+{
+ asm volatile (" \
+ /* r6 = skb->sk; */ \
+ r6 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ /* if (r6 == null) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* r7 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ /* r0 = sk_fullsock(skb); */ \
+ r1 = r6; \
+ call %[bpf_sk_fullsock]; \
+ /* if (r0 == null) return 0; */ \
+ if r0 == 0 goto l0_%=; \
+ /* if (r0 != r7) r0 = *(r7->type); */ \
+ if r0 == r7 goto l0_%=; /* Use ! JEQ ! */\
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+/* Maps are treated in a different branch of `mark_ptr_not_null_reg`,
+ * so separate test for maps case.
+ */
+SEC("xdp")
+__description("jne/jeq infer not null, PTR_TO_MAP_VALUE_OR_NULL -> PTR_TO_MAP_VALUE")
+__success __retval(0)
+__naked void null_ptr_to_map_value(void)
+{
+ asm volatile (" \
+ /* r9 = &some stack to use as key */ \
+ r1 = 0; \
+ *(u32*)(r10 - 8) = r1; \
+ r9 = r10; \
+ r9 += -8; \
+ /* r8 = process local map */ \
+ r8 = %[map_xskmap] ll; \
+ /* r6 = map_lookup_elem(r8, r9); */ \
+ r1 = r8; \
+ r2 = r9; \
+ call %[bpf_map_lookup_elem]; \
+ r6 = r0; \
+ /* r7 = map_lookup_elem(r8, r9); */ \
+ r1 = r8; \
+ r2 = r9; \
+ call %[bpf_map_lookup_elem]; \
+ r7 = r0; \
+ /* if (r6 == 0) return 0; */ \
+ if r6 == 0 goto l0_%=; \
+ /* if (r6 != r7) return 0; */ \
+ if r6 != r7 goto l0_%=; \
+ /* read *r7; */ \
+ r0 = *(u32*)(r7 + %[bpf_xdp_sock_queue_id]); \
+l0_%=: /* return 0; */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap),
+ __imm_const(bpf_xdp_sock_queue_id, offsetof(struct bpf_xdp_sock, queue_id))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ld_ind.c b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
new file mode 100644
index 000000000000..c925ba9a2e74
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ld_ind.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ld_ind.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("ld_ind: check calling conv, r1")
+__failure __msg("R1 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r1(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r2")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r2(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r3")
+__failure __msg("R3 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r3(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r3 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r4")
+__failure __msg("R4 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r4(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r4 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r4; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r5")
+__failure __msg("R5 !read_ok")
+__failure_unpriv
+__naked void ind_check_calling_conv_r5(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r5 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r5; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ld_ind: check calling conv, r7")
+__success __success_unpriv __retval(1)
+__naked void ind_check_calling_conv_r7(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ exit; \
+" :
+ : __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ldsx.c b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
new file mode 100644
index 000000000000..d4427d8e1217
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ldsx.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("LDSX, S8")
+__success __success_unpriv __retval(-2)
+__naked void ldsx_s8(void)
+{
+ asm volatile (
+ "r1 = 0x3fe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s8 *)(r10 - 8);"
+#else
+ "r0 = *(s8 *)(r10 - 1);"
+#endif
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S16")
+__success __success_unpriv __retval(-2)
+__naked void ldsx_s16(void)
+{
+ asm volatile (
+ "r1 = 0x3fffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s16 *)(r10 - 8);"
+#else
+ "r0 = *(s16 *)(r10 - 2);"
+#endif
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S32")
+__success __success_unpriv __retval(-1)
+__naked void ldsx_s32(void)
+{
+ asm volatile (
+ "r1 = 0xfffffffe;"
+ "*(u64 *)(r10 - 8) = r1;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(s32 *)(r10 - 8);"
+#else
+ "r0 = *(s32 *)(r10 - 4);"
+#endif
+ "r0 >>= 1;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S8 range checking, privileged")
+__log_level(2) __success __retval(1)
+__msg("R1_w=scalar(smin=smin32=-128,smax=smax32=127)")
+__naked void ldsx_s8_range_priv(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s8 *)(r10 - 8);"
+#else
+ "r1 = *(s8 *)(r10 - 1);"
+#endif
+ /* r1 with s8 range */
+ "if r1 s> 0x7f goto l0_%=;"
+ "if r1 s< -0x80 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S16 range checking")
+__success __success_unpriv __retval(1)
+__naked void ldsx_s16_range(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s16 *)(r10 - 8);"
+#else
+ "r1 = *(s16 *)(r10 - 2);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fff goto l0_%=;"
+ "if r1 s< -0x8000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("LDSX, S32 range checking")
+__success __success_unpriv __retval(1)
+__naked void ldsx_s32_range(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "*(u64 *)(r10 - 8) = r0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(s32 *)(r10 - 8);"
+#else
+ "r1 = *(s32 *)(r10 - 4);"
+#endif
+ /* r1 with s16 range */
+ "if r1 s> 0x7fffFFFF goto l0_%=;"
+ "if r1 s< -0x80000000 goto l0_%=;"
+ "r0 = 1;"
+"l1_%=:"
+ "exit;"
+"l0_%=:"
+ "r0 = 2;"
+ "goto l1_%=;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
new file mode 100644
index 000000000000..d153fbe50055
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_leak_ptr.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/leak_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("leak pointer into ctx 1")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R2 leaks addr into mem")
+__naked void leak_pointer_into_ctx_1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r2 = %[map_hash_8b] ll; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 2")
+__failure __msg("BPF_ATOMIC stores into R1 ctx is not allowed")
+__failure_unpriv __msg_unpriv("R10 leaks addr into mem")
+__naked void leak_pointer_into_ctx_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ lock *(u64 *)(r1 + %[__sk_buff_cb_0]) += r10; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into ctx 3")
+__success __failure_unpriv __msg_unpriv("R2 leaks addr into ctx")
+__retval(0)
+__naked void leak_pointer_into_ctx_3(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = %[map_hash_8b] ll; \
+ *(u64*)(r1 + %[__sk_buff_cb_0]) = r2; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("leak pointer into map val")
+__success __failure_unpriv __msg_unpriv("R6 leaks addr into mem")
+__retval(0)
+__naked void leak_pointer_into_map_val(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 0; \
+ *(u64*)(r0 + 0) = r3; \
+ lock *(u64 *)(r0 + 0) += r6; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c
new file mode 100644
index 000000000000..e07b43b78fd2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c
@@ -0,0 +1,286 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/loops1.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("bounded loop, count to 4")
+__success __retval(4)
+__naked void bounded_loop_count_to_4(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 1; \
+ if r0 < 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count to 20")
+__success
+__naked void bounded_loop_count_to_20(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 3; \
+ if r0 < 20 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count from positive unknown to 4")
+__success
+__naked void from_positive_unknown_to_4(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ if r0 s< 0 goto l0_%=; \
+l1_%=: r0 += 1; \
+ if r0 < 4 goto l1_%=; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count from totally unknown to 4")
+__success
+__naked void from_totally_unknown_to_4(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+l0_%=: r0 += 1; \
+ if r0 < 4 goto l0_%=; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop, count to 4 with equality")
+__success
+__naked void count_to_4_with_equality(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l0_%=: r0 += 1; \
+ if r0 != 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("bounded loop, start in the middle")
+__success
+__failure_unpriv __msg_unpriv("back-edge")
+__naked void loop_start_in_the_middle(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ goto l0_%=; \
+l1_%=: r0 += 1; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("bounded loop containing a forward jump")
+__success __retval(4)
+__naked void loop_containing_a_forward_jump(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l1_%=: r0 += 1; \
+ if r0 == r0 goto l0_%=; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded loop that jumps out rather than in")
+__success
+__naked void jumps_out_rather_than_in(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+l1_%=: r6 += 1; \
+ if r6 > 10000 goto l0_%=; \
+ call %[bpf_get_prandom_u32]; \
+ goto l1_%=; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop after a conditional jump")
+__failure __msg("program is too large")
+__naked void loop_after_a_conditional_jump(void)
+{
+ asm volatile (" \
+ r0 = 5; \
+ if r0 < 4 goto l0_%=; \
+l1_%=: r0 += 1; \
+ goto l1_%=; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("bounded recursion")
+__failure
+/* verifier limitation in detecting max stack depth */
+__msg("the call stack of 8 frames is too deep !")
+__naked void bounded_recursion(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call bounded_recursion__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void bounded_recursion__1(void)
+{
+ asm volatile (" \
+ r1 += 1; \
+ r0 = r1; \
+ if r1 < 4 goto l0_%=; \
+ exit; \
+l0_%=: call bounded_recursion__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop in two jumps")
+__failure __msg("loop detected")
+__naked void infinite_loop_in_two_jumps(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l1_%=: goto l0_%=; \
+l0_%=: if r0 < 4 goto l1_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("infinite loop: three-jump trick")
+__failure __msg("loop detected")
+__naked void infinite_loop_three_jump_trick(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+l2_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l0_%=; \
+ exit; \
+l0_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l1_%=; \
+ exit; \
+l1_%=: r0 += 1; \
+ r0 &= 1; \
+ if r0 < 2 goto l2_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("not-taken loop with back jump to 1st insn")
+__success __retval(123)
+__naked void back_jump_to_1st_insn_1(void)
+{
+ asm volatile (" \
+l0_%=: r0 = 123; \
+ if r0 == 4 goto l0_%=; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("taken loop with back jump to 1st insn")
+__success __retval(55)
+__naked void back_jump_to_1st_insn_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ r2 = 0; \
+ call back_jump_to_1st_insn_2__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void back_jump_to_1st_insn_2__1(void)
+{
+ asm volatile (" \
+l0_%=: r2 += r1; \
+ r1 -= 1; \
+ if r1 != 0 goto l0_%=; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("taken loop with back jump to 1st insn, 2")
+__success __retval(55)
+__naked void jump_to_1st_insn_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ r2 = 0; \
+ call jump_to_1st_insn_2__1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void jump_to_1st_insn_2__1(void)
+{
+ asm volatile (" \
+l0_%=: r2 += r1; \
+ r1 -= 1; \
+ if w1 != 0 goto l0_%=; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__success
+__naked void not_an_inifinite_loop(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0xff; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+loop_%=: \
+ r0 = *(u64 *)(r10 - 8); \
+ if r0 > 10 goto exit_%=; \
+ r0 += 1; \
+ *(u64 *)(r10 - 8) = r0; \
+ r0 = 0; \
+ goto loop_%=; \
+exit_%=: \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_lwt.c b/tools/testing/selftests/bpf/progs/verifier_lwt.c
new file mode 100644
index 000000000000..5ab746307309
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_lwt.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/lwt.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("lwt_in")
+__description("invalid direct packet write for LWT_IN")
+__failure __msg("cannot write into packet")
+__naked void packet_write_for_lwt_in(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_out")
+__description("invalid direct packet write for LWT_OUT")
+__failure __msg("cannot write into packet")
+__naked void packet_write_for_lwt_out(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("direct packet write for LWT_XMIT")
+__success __retval(0)
+__naked void packet_write_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ *(u8*)(r2 + 0) = r2; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("direct packet read for LWT_IN")
+__success __retval(0)
+__naked void packet_read_for_lwt_in(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_out")
+__description("direct packet read for LWT_OUT")
+__success __retval(0)
+__naked void packet_read_for_lwt_out(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("direct packet read for LWT_XMIT")
+__success __retval(0)
+__naked void packet_read_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("overlapping checks for direct packet access")
+__success __retval(0)
+__naked void checks_for_direct_packet_access(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u16*)(r2 + 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("lwt_xmit")
+__description("make headroom for LWT_XMIT")
+__success __retval(0)
+__naked void make_headroom_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r2 = 34; \
+ r3 = 0; \
+ call %[bpf_skb_change_head]; \
+ /* split for s390 to succeed */ \
+ r1 = r6; \
+ r2 = 42; \
+ r3 = 0; \
+ call %[bpf_skb_change_head]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_skb_change_head)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_IN")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_in(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_OUT")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_out(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid access of tc_classid for LWT_XMIT")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv
+__naked void tc_classid_for_lwt_xmit(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_tc_classid]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("check skb->tc_classid half load not permitted for lwt prog")
+__failure __msg("invalid bpf_context access")
+__naked void not_permitted_for_lwt_prog(void)
+{
+ asm volatile (
+ "r0 = 0;"
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u16*)(r1 + %[__sk_buff_tc_classid]);"
+#else
+ "r0 = *(u16*)(r1 + %[__imm_0]);"
+#endif
+ "exit;"
+ :
+ : __imm_const(__imm_0, offsetof(struct __sk_buff, tc_classid) + 2),
+ __imm_const(__sk_buff_tc_classid, offsetof(struct __sk_buff, tc_classid))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_in_map.c b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
new file mode 100644
index 000000000000..4eaab1468eb7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_in_map.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_in_map.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} map_in_map SEC(".maps");
+
+SEC("socket")
+__description("map in map access")
+__success __success_unpriv __retval(0)
+__naked void map_in_map_access(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("map in map state pruning")
+__success __msg("processed 26 insns")
+__log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void map_in_map_state_pruning(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r6 = r10; \
+ r6 += -4; \
+ r2 = r6; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r2 = r6; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ r2 = r6; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r2 = r6; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + 0); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid inner map pointer")
+__failure __msg("R1 pointer arithmetic on map_ptr prohibited")
+__failure_unpriv
+__naked void invalid_inner_map_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ r1 += 8; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("forgot null checking on the inner map pointer")
+__failure __msg("R1 type=map_value_or_null expected=map_ptr")
+__failure_unpriv
+__naked void on_the_inner_map_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_in_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
new file mode 100644
index 000000000000..11a079145966
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr.c
@@ -0,0 +1,159 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+SEC("socket")
+__description("bpf_map_ptr: read with negative offset rejected")
+__failure __msg("R1 is bpf_array invalid negative access: off=-8")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void read_with_negative_offset_rejected(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 - 8); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: write rejected")
+__failure __msg("only read from bpf_array is supported")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__naked void bpf_map_ptr_write_rejected(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ *(u64*)(r1 + 0) = r2; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: read non-existent field rejected")
+__failure
+__msg("cannot access ptr member ops with moff 0 in struct bpf_map with off 1 size 4")
+__failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void read_non_existent_field_rejected(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u32*)(r1 + 1); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: read ops field accepted")
+__success __failure_unpriv
+__msg_unpriv("access is allowed only to CAP_PERFMON and CAP_SYS_ADMIN")
+__retval(1)
+__naked void ptr_read_ops_field_accepted(void)
+{
+ asm volatile (" \
+ r6 = 0; \
+ r1 = %[map_array_48b] ll; \
+ r6 = *(u64*)(r1 + 0); \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, map_ptr = map_ptr + r")
+__success __failure_unpriv
+__msg_unpriv("R1 has pointer with unsupported alu operation")
+__retval(0)
+__naked void map_ptr_map_ptr_r(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r0 = 0; \
+ r1 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("bpf_map_ptr: r = 0, r = r + map_ptr")
+__success __failure_unpriv
+__msg_unpriv("R0 has pointer with unsupported alu operation")
+__retval(0)
+__naked void _0_r_r_map_ptr(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ r0 = %[map_hash_16b] ll; \
+ r1 += r0; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_16b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c b/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c
new file mode 100644
index 000000000000..c5a7c1ddc562
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ptr_mixing.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ptr_mixing.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ __array(values, struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+ });
+} map_in_map SEC(".maps");
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+void dummy_prog_loop2_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps") = {
+ .values = {
+ [1] = (void *)&dummy_prog_loop2_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ [7] = (void *)&dummy_prog_42_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop2_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog2_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("calls: two calls returning different map pointers for lookup (hash, array)")
+__success __retval(1)
+__naked void pointers_for_lookup_hash_array(void)
+{
+ asm volatile (" \
+ /* main prog */ \
+ if r1 != 0 goto l0_%=; \
+ call pointers_for_lookup_hash_array__1; \
+ goto l1_%=; \
+l0_%=: call pointers_for_lookup_hash_array__2; \
+l1_%=: r1 = r0; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ r0 = 1; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void pointers_for_lookup_hash_array__1(void)
+{
+ asm volatile (" \
+ r0 = %[map_hash_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void pointers_for_lookup_hash_array__2(void)
+{
+ asm volatile (" \
+ r0 = %[map_array_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("calls: two calls returning different map pointers for lookup (hash, map in map)")
+__failure __msg("only read from bpf_array is supported")
+__naked void lookup_hash_map_in_map(void)
+{
+ asm volatile (" \
+ /* main prog */ \
+ if r1 != 0 goto l0_%=; \
+ call lookup_hash_map_in_map__1; \
+ goto l1_%=; \
+l0_%=: call lookup_hash_map_in_map__2; \
+l1_%=: r1 = r0; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+ r0 = 1; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lookup_hash_map_in_map__1(void)
+{
+ asm volatile (" \
+ r0 = %[map_array_48b] ll; \
+ exit; \
+" :
+ : __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lookup_hash_map_in_map__2(void)
+{
+ asm volatile (" \
+ r0 = %[map_in_map] ll; \
+ exit; \
+" :
+ : __imm_addr(map_in_map)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("cond: two branches returning different map pointers for lookup (tail, tail)")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(42)
+__naked void pointers_for_lookup_tail_tail_1(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ if r6 != 0 goto l0_%=; \
+ r2 = %[map_prog2_socket] ll; \
+ goto l1_%=; \
+l0_%=: r2 = %[map_prog1_socket] ll; \
+l1_%=: r3 = 7; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("cond: two branches returning same map pointers for lookup (tail, tail)")
+__success __success_unpriv __retval(42)
+__naked void pointers_for_lookup_tail_tail_2(void)
+{
+ asm volatile (" \
+ r6 = *(u32*)(r1 + %[__sk_buff_mark]); \
+ if r6 == 0 goto l0_%=; \
+ r2 = %[map_prog2_socket] ll; \
+ goto l1_%=; \
+l0_%=: r2 = %[map_prog2_socket] ll; \
+l1_%=: r3 = 7; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
new file mode 100644
index 000000000000..1639628b832d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_map_ret_val.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/map_ret_val.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("invalid map_fd for function call")
+__failure __msg("fd 0 is not pointing to valid bpf_map")
+__failure_unpriv
+__naked void map_fd_for_function_call(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r2 = r10; \
+ r2 += -8; \
+ .8byte %[ld_map_fd]; \
+ .8byte 0; \
+ call %[bpf_map_delete_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_delete_elem),
+ __imm_insn(ld_map_fd, BPF_RAW_INSN(BPF_LD | BPF_DW | BPF_IMM, BPF_REG_1, BPF_PSEUDO_MAP_FD, 0, 0))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("don't check return value before access")
+__failure __msg("R0 invalid mem access 'map_value_or_null'")
+__failure_unpriv
+__naked void check_return_value_before_access(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("access memory with incorrect alignment")
+__failure __msg("misaligned value access")
+__failure_unpriv
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sometimes access memory with incorrect alignment")
+__failure __msg("R0 invalid mem access")
+__msg_unpriv("R0 leaks addr")
+__flag(BPF_F_STRICT_ALIGNMENT)
+__naked void access_memory_with_incorrect_alignment_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+l0_%=: r1 = 1; \
+ *(u64*)(r0 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_masking.c b/tools/testing/selftests/bpf/progs/verifier_masking.c
new file mode 100644
index 000000000000..5732cc1b4c47
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_masking.c
@@ -0,0 +1,410 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/masking.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("masking, test out of bounds 1")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 2")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 3")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 4")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 5")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 6")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 7")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_7(void)
+{
+ asm volatile (" \
+ r1 = 5; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 8")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_8(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 9")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_9(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 10")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_10(void)
+{
+ asm volatile (" \
+ r1 = 0xffffffff; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 11")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_11(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test out of bounds 12")
+__success __success_unpriv __retval(0)
+__naked void test_out_of_bounds_12(void)
+{
+ asm volatile (" \
+ r1 = -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 1")
+__success __success_unpriv __retval(4)
+__naked void masking_test_in_bounds_1(void)
+{
+ asm volatile (" \
+ w1 = 4; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 5 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 2")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_2(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 3")
+__success __success_unpriv __retval(0xfffffffe)
+__naked void masking_test_in_bounds_3(void)
+{
+ asm volatile (" \
+ w1 = 0xfffffffe; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xffffffff - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 4")
+__success __success_unpriv __retval(0xabcde)
+__naked void masking_test_in_bounds_4(void)
+{
+ asm volatile (" \
+ w1 = 0xabcde; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 0xabcdef - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 5")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_5(void)
+{
+ asm volatile (" \
+ w1 = 0; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 1 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 6")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_6(void)
+{
+ asm volatile (" \
+ w1 = 46; \
+ w2 = %[__imm_0]; \
+ r2 -= r1; \
+ r2 |= r1; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r1 &= r2; \
+ r0 = r1; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 7")
+__success __success_unpriv __retval(46)
+__naked void masking_test_in_bounds_7(void)
+{
+ asm volatile (" \
+ r3 = -46; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("masking, test in bounds 8")
+__success __success_unpriv __retval(0)
+__naked void masking_test_in_bounds_8(void)
+{
+ asm volatile (" \
+ r3 = -47; \
+ r3 *= -1; \
+ w2 = %[__imm_0]; \
+ r2 -= r3; \
+ r2 |= r3; \
+ r2 = -r2; \
+ r2 s>>= 63; \
+ r3 &= r2; \
+ r0 = r3; \
+ exit; \
+" :
+ : __imm_const(__imm_0, 47 - 1)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_meta_access.c b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
new file mode 100644
index 000000000000..d81722fb5f19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_meta_access.c
@@ -0,0 +1,284 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/meta_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("meta access, test1")
+__success __retval(0)
+__naked void meta_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test2")
+__failure __msg("invalid access to packet, off=-8")
+__naked void meta_access_test2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r2; \
+ r0 -= 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test3")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r0 = r2; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test4")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r4; \
+ r0 += 8; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test5")
+__failure __msg("R3 !read_ok")
+__naked void meta_access_test5(void)
+{
+ asm volatile (" \
+ r3 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ if r0 > r4 goto l0_%=; \
+ r2 = -8; \
+ call %[bpf_xdp_adjust_meta]; \
+ r0 = *(u8*)(r3 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_xdp_adjust_meta),
+ __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test6")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r0 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test7")
+__success __retval(0)
+__naked void meta_access_test7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r0 = r3; \
+ r0 += 8; \
+ r4 = r2; \
+ r4 += 8; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test8")
+__success __retval(0)
+__naked void meta_access_test8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test9")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = r2; \
+ r4 += 0xFFFF; \
+ r4 += 1; \
+ if r4 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test10")
+__failure __msg("invalid access to packet")
+__naked void meta_access_test10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r3 += r5; \
+ r5 = r3; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r5 goto l0_%=; \
+ r2 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test11")
+__success __retval(0)
+__naked void meta_access_test11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r5 = 42; \
+ r6 = 24; \
+ *(u64*)(r10 - 8) = r5; \
+ lock *(u64 *)(r10 - 8) += r6; \
+ r5 = *(u64*)(r10 - 8); \
+ if r5 > 100 goto l0_%=; \
+ r2 += r5; \
+ r5 = r2; \
+ r6 = r2; \
+ r6 += 8; \
+ if r6 > r3 goto l0_%=; \
+ r5 = *(u8*)(r5 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("meta access, test12")
+__success __retval(0)
+__naked void meta_access_test12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r4 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r5 = r3; \
+ r5 += 16; \
+ if r5 > r4 goto l0_%=; \
+ r0 = *(u8*)(r3 + 0); \
+ r5 = r2; \
+ r5 += 16; \
+ if r5 > r3 goto l0_%=; \
+ r0 = *(u8*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_movsx.c b/tools/testing/selftests/bpf/progs/verifier_movsx.c
new file mode 100644
index 000000000000..cbb9d6714f53
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_movsx.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("MOV32SX, S8")
+__success __success_unpriv __retval(0x23)
+__naked void mov32sx_s8(void)
+{
+ asm volatile (" \
+ w0 = 0xff23; \
+ w0 = (s8)w0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16")
+__success __success_unpriv __retval(0xFFFFff23)
+__naked void mov32sx_s16(void)
+{
+ asm volatile (" \
+ w0 = 0xff23; \
+ w0 = (s16)w0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S8")
+__success __success_unpriv __retval(-2)
+__naked void mov64sx_s8(void)
+{
+ asm volatile (" \
+ r0 = 0x1fe; \
+ r0 = (s8)r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16")
+__success __success_unpriv __retval(0xf23)
+__naked void mov64sx_s16(void)
+{
+ asm volatile (" \
+ r0 = 0xf0f23; \
+ r0 = (s16)r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S32")
+__success __success_unpriv __retval(-1)
+__naked void mov64sx_s32(void)
+{
+ asm volatile (" \
+ r0 = 0xfffffffe; \
+ r0 = (s32)r0; \
+ r0 >>= 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S8, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s8_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = (s8)w0; \
+ /* w1 with s8 range */ \
+ if w1 s> 0x7f goto l0_%=; \
+ if w1 s< -0x80 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s16_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ w1 = (s16)w0; \
+ /* w1 with s16 range */ \
+ if w1 s> 0x7fff goto l0_%=; \
+ if w1 s< -0x80ff goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV32SX, S16, range_check 2")
+__success __success_unpriv __retval(1)
+__naked void mov32sx_s16_range_2(void)
+{
+ asm volatile (" \
+ r1 = 65535; \
+ w2 = (s16)w1; \
+ r2 >>= 1; \
+ if r2 != 0x7fffFFFF goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 0; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S8, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s8_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s8)r0; \
+ /* r1 with s8 range */ \
+ if r1 s> 0x7f goto l0_%=; \
+ if r1 s< -0x80 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s16_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s16)r0; \
+ /* r1 with s16 range */ \
+ if r1 s> 0x7fff goto l0_%=; \
+ if r1 s< -0x8000 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S32, range_check")
+__success __success_unpriv __retval(1)
+__naked void mov64sx_s32_range(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = (s32)r0; \
+ /* r1 with s32 range */ \
+ if r1 s> 0x7fffffff goto l0_%=; \
+ if r1 s< -0x80000000 goto l0_%=; \
+ r0 = 1; \
+l1_%=: \
+ exit; \
+l0_%=: \
+ r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("MOV64SX, S16, R10 Sign Extension")
+__failure __msg("R1 type=scalar expected=fp, pkt, pkt_meta, map_key, map_value, mem, ringbuf_mem, buf, trusted_ptr_")
+__failure_unpriv __msg_unpriv("R10 sign-extension part of pointer")
+__naked void mov64sx_s16_r10(void)
+{
+ asm volatile (" \
+ r1 = 553656332; \
+ *(u32 *)(r10 - 8) = r1; \
+ r1 = (s16)r10; \
+ r1 += -8; \
+ r2 = 3; \
+ if r2 <= r1 goto l0_%=; \
+l0_%=: \
+ call %[bpf_trace_printk]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
new file mode 100644
index 000000000000..65bba330e7e5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_ctx.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+
+#include "bpf_misc.h"
+
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("netfilter")
+__description("netfilter invalid context access, size too short")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test1(void)
+{
+ asm volatile (" \
+ r2 = *(u8*)(r1 + %[__bpf_nf_ctx_state]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_state, offsetof(struct bpf_nf_ctx, state))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context access, size too short")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test2(void)
+{
+ asm volatile (" \
+ r2 = *(u16*)(r1 + %[__bpf_nf_ctx_skb]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context access, past end of ctx")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test3(void)
+{
+ asm volatile (" \
+ r2 = *(u64*)(r1 + %[__bpf_nf_ctx_size]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_size, sizeof(struct bpf_nf_ctx))
+ : __clobber_all);
+}
+
+SEC("netfilter")
+__description("netfilter invalid context, write")
+__failure __msg("invalid bpf_context access")
+__naked void with_invalid_ctx_access_test4(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ *(u64*)(r2 + 0) = r1; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm_const(__bpf_nf_ctx_skb, offsetof(struct bpf_nf_ctx, skb))
+ : __clobber_all);
+}
+
+#define NF_DROP 0
+#define NF_ACCEPT 1
+
+SEC("netfilter")
+__description("netfilter valid context read and invalid write")
+__failure __msg("only read is supported")
+int with_invalid_ctx_access_test5(struct bpf_nf_ctx *ctx)
+{
+ struct nf_hook_state *state = (void *)ctx->state;
+
+ state->sk = NULL;
+ return NF_ACCEPT;
+}
+
+extern int bpf_dynptr_from_skb(struct sk_buff *skb, __u64 flags,
+ struct bpf_dynptr *ptr__uninit) __ksym;
+extern void *bpf_dynptr_slice(const struct bpf_dynptr *ptr, uint32_t offset,
+ void *buffer, uint32_t buffer__sz) __ksym;
+
+SEC("netfilter")
+__description("netfilter test prog with skb and state read access")
+__success __failure_unpriv
+__retval(0)
+int with_valid_ctx_access_test6(struct bpf_nf_ctx *ctx)
+{
+ const struct nf_hook_state *state = ctx->state;
+ struct sk_buff *skb = ctx->skb;
+ const struct iphdr *iph;
+ const struct tcphdr *th;
+ u8 buffer_iph[20] = {};
+ u8 buffer_th[40] = {};
+ struct bpf_dynptr ptr;
+ uint8_t ihl;
+
+ if (skb->len <= 20 || bpf_dynptr_from_skb(skb, 0, &ptr))
+ return NF_ACCEPT;
+
+ iph = bpf_dynptr_slice(&ptr, 0, buffer_iph, sizeof(buffer_iph));
+ if (!iph)
+ return NF_ACCEPT;
+
+ if (state->pf != 2)
+ return NF_ACCEPT;
+
+ ihl = iph->ihl << 2;
+
+ th = bpf_dynptr_slice(&ptr, ihl, buffer_th, sizeof(buffer_th));
+ if (!th)
+ return NF_ACCEPT;
+
+ return th->dest == bpf_htons(22) ? NF_ACCEPT : NF_DROP;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
new file mode 100644
index 000000000000..e1ffa5d32ff0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("netfilter")
+__description("bpf_exit with invalid return code. test1")
+__failure __msg("R0 is not a known value")
+__naked void with_invalid_return_code_test1(void)
+{
+ asm volatile (" \
+ r0 = *(u64*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with valid return code. test2")
+__success
+__naked void with_valid_return_code_test2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with valid return code. test3")
+__success
+__naked void with_valid_return_code_test3(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("netfilter")
+__description("bpf_exit with invalid return code. test4")
+__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]")
+__naked void with_invalid_return_code_test4(void)
+{
+ asm volatile (" \
+ r0 = 2; \
+ exit; \
+" ::: __clobber_all);
+}
diff --git a/tools/testing/selftests/bpf/progs/verifier_precision.c b/tools/testing/selftests/bpf/progs/verifier_precision.c
new file mode 100644
index 000000000000..6b564d4c0986
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_precision.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2023 SUSE LLC */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0xfffffff8 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (87) r2 = -r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 8")
+__naked int bpf_neg(void)
+{
+ asm volatile (
+ "r2 = 8;"
+ "r2 = -r2;"
+ "if r2 != -8 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d4) r2 = le16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_le(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = le16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (dc) r2 = be16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_to_be(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = be16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390)) && \
+ __clang_major__ >= 18
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("mark_precise: frame0: regs=r2 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r2 stack= before 2: (55) if r2 != 0x0 goto pc+2")
+__msg("mark_precise: frame0: regs=r2 stack= before 1: (d7) r2 = bswap16 r2")
+__msg("mark_precise: frame0: regs=r2 stack= before 0: (b7) r2 = 0")
+__naked int bpf_end_bswap(void)
+{
+ asm volatile (
+ "r2 = 0;"
+ "r2 = bswap16 r2;"
+ "if r2 != 0 goto 1f;"
+ "r1 = r10;"
+ "r1 += r2;"
+ "1:"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+#endif /* v4 instruction */
+
+SEC("?raw_tp")
+__success __log_level(2)
+/*
+ * Without the bug fix there will be no history between "last_idx 3 first_idx 3"
+ * and "parent state regs=" lines. "R0_w=6" parts are here to help anchor
+ * expected log messages to the one specific mark_chain_precision operation.
+ *
+ * This is quite fragile: if verifier checkpointing heuristic changes, this
+ * might need adjusting.
+ */
+__msg("2: (07) r0 += 1 ; R0_w=6")
+__msg("3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: last_idx 3 first_idx 3 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 2: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 1: (07) r0 += 1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (05) goto pc-4")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (35) if r0 >= 0xa goto pc+1")
+__msg("mark_precise: frame0: parent state regs= stack=: R0_rw=P4")
+__msg("3: R0_w=6")
+__naked int state_loop_first_last_equal(void)
+{
+ asm volatile (
+ "r0 = 0;"
+ "l0_%=:"
+ "r0 += 1;"
+ "r0 += 1;"
+ /* every few iterations we'll have a checkpoint here with
+ * first_idx == last_idx, potentially confusing precision
+ * backtracking logic
+ */
+ "if r0 >= 10 goto l1_%=;" /* checkpoint + mark_precise */
+ "goto l0_%=;"
+ "l1_%=:"
+ "exit;"
+ ::: __clobber_common
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c b/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c
new file mode 100644
index 000000000000..8d27c780996f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_prevent_map_lookup.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/prevent_map_lookup.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_STACK_TRACE);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_stacktrace SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps");
+
+SEC("perf_event")
+__description("prevent map lookup in stack trace")
+__failure __msg("cannot pass map_type 7 into func bpf_map_lookup_elem")
+__naked void map_lookup_in_stack_trace(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_stacktrace] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_stacktrace)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("prevent map lookup in prog array")
+__failure __msg("cannot pass map_type 3 into func bpf_map_lookup_elem")
+__failure_unpriv
+__naked void map_lookup_in_prog_array(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_prog2_socket] ll; \
+ call %[bpf_map_lookup_elem]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
new file mode 100644
index 000000000000..7cc83acac727
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c
@@ -0,0 +1,372 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_stack.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("raw_stack: no skb_load_bytes")
+__success
+__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8")
+__naked void stack_no_skb_load_bytes(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ /* Call to skb_load_bytes() omitted. */ \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len")
+__failure __msg("R4 min value is negative")
+__naked void skb_load_bytes_negative_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = -8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, negative len 2")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_negative_len_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = %[__imm_0]; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__imm_0, ~0)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, zero len")
+__failure __msg("R4 invalid zero-sized read: u64=[0,0]")
+__naked void skb_load_bytes_zero_len(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, no init")
+__success __retval(0)
+__naked void skb_load_bytes_no_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, init")
+__success __retval(0)
+__naked void stack_skb_load_bytes_init(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ r3 = 0xcafe; \
+ *(u64*)(r6 + 0) = r3; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs around bounds")
+__success __retval(0)
+__naked void bytes_spilled_regs_around_bounds(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption")
+__failure __msg("R0 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void load_bytes_spilled_regs_corruption(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs corruption 2")
+__failure __msg("R3 invalid mem access 'scalar'")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bytes_spilled_regs_corruption_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r3 = *(u32*)(r3 + %[__sk_buff_pkt_type]); \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_pkt_type, offsetof(struct __sk_buff, pkt_type)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, spilled regs + data")
+__success __retval(0)
+__naked void load_bytes_spilled_regs_data(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -16; \
+ *(u64*)(r6 - 8) = r1; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 8) = r1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 - 8); \
+ r2 = *(u64*)(r6 + 8); \
+ r3 = *(u64*)(r6 + 0); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ r2 = *(u32*)(r2 + %[__sk_buff_priority]); \
+ r0 += r2; \
+ r0 += r3; \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(__sk_buff_priority, offsetof(struct __sk_buff, priority))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 1")
+__failure __msg("invalid indirect access to stack R3 off=-513 size=8")
+__naked void load_bytes_invalid_access_1(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -513; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 2")
+__failure __msg("invalid indirect access to stack R3 off=-1 size=8")
+__naked void load_bytes_invalid_access_2(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 8; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 3")
+__failure __msg("R4 min value is negative")
+__naked void load_bytes_invalid_access_3(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += 0xffffffff; \
+ r3 = r6; \
+ r4 = 0xffffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 4")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_4(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -1; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 5")
+__failure
+__msg("R4 unbounded memory access, use 'var &= const' or 'if (var < const)'")
+__naked void load_bytes_invalid_access_5(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0x7fffffff; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, invalid access 6")
+__failure __msg("invalid zero-sized read")
+__naked void load_bytes_invalid_access_6(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 0; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("raw_stack: skb_load_bytes, large access")
+__success __retval(0)
+__naked void skb_load_bytes_large_access(void)
+{
+ asm volatile (" \
+ r2 = 4; \
+ r6 = r10; \
+ r6 += -512; \
+ r3 = r6; \
+ r4 = 512; \
+ call %[bpf_skb_load_bytes]; \
+ r0 = *(u64*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skb_load_bytes)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
new file mode 100644
index 000000000000..14a0172e2141
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_raw_tp_writable.c
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/raw_tp_writable.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("raw_tracepoint.w")
+__description("raw_tracepoint_writable: reject variable offset")
+__failure
+__msg("R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void tracepoint_writable_reject_variable_offset(void)
+{
+ asm volatile (" \
+ /* r6 is our tp buffer */ \
+ r6 = *(u64*)(r1 + 0); \
+ r1 = %[map_hash_8b] ll; \
+ /* move the key (== 0) to r10-8 */ \
+ w0 = 0; \
+ r2 = r10; \
+ r2 += -8; \
+ *(u64*)(r2 + 0) = r0; \
+ /* lookup in the map */ \
+ call %[bpf_map_lookup_elem]; \
+ /* exit clean if null */ \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* shift the buffer pointer to a variable location */\
+ r0 = *(u32*)(r0 + 0); \
+ r6 += r0; \
+ /* clobber whatever's there */ \
+ r7 = 4242; \
+ *(u64*)(r6 + 0) = r7; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
new file mode 100644
index 000000000000..c4c6da21265e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ref_tracking.c
@@ -0,0 +1,1495 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ref_tracking.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#define BPF_SK_LOOKUP(func) \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ "r2 = 0;" \
+ "*(u32*)(r10 - 8) = r2;" \
+ "*(u64*)(r10 - 16) = r2;" \
+ "*(u64*)(r10 - 24) = r2;" \
+ "*(u64*)(r10 - 32) = r2;" \
+ "*(u64*)(r10 - 40) = r2;" \
+ "*(u64*)(r10 - 48) = r2;" \
+ /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ "r2 = r10;" \
+ "r2 += -48;" \
+ "r3 = %[sizeof_bpf_sock_tuple];"\
+ "r4 = 0;" \
+ "r5 = 0;" \
+ "call %[" #func "];"
+
+struct bpf_key {} __attribute__((preserve_access_index));
+
+extern void bpf_key_put(struct bpf_key *key) __ksym;
+extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym;
+extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym;
+
+/* BTF FUNC records are not generated for kfuncs referenced
+ * from inline assembly. These records are necessary for
+ * libbpf to link the program. The function below is a hack
+ * to ensure that BTF FUNC records are generated.
+ */
+void __kfunc_btf_root(void)
+{
+ bpf_key_put(0);
+ bpf_lookup_system_key(0);
+ bpf_lookup_user_key(0, 0);
+}
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+void dummy_prog_42_tc(void);
+void dummy_prog_24_tc(void);
+void dummy_prog_loop1_tc(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_tc SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_tc,
+ [1] = (void *)&dummy_prog_loop1_tc,
+ [2] = (void *)&dummy_prog_24_tc,
+ },
+};
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_42_tc(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_24_tc(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("tc")
+__auxiliary
+__naked void dummy_prog_loop1_tc(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_tc] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference")
+__failure __msg("Unreleased reference")
+__naked void reference_tracking_leak_potential_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference to sock_common")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_sock_common_1(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r6 = r0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference on stack")
+__failure __msg("Unreleased reference")
+__naked void leak_potential_reference_on_stack(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r0; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak potential reference on stack 2")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_on_stack_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r10; \
+ r4 += -8; \
+ *(u64*)(r4 + 0) = r0; \
+ r0 = 0; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: zero potential reference")
+__failure __msg("Unreleased reference")
+__naked void reference_tracking_zero_potential_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r0 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: zero potential reference to sock_common")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_sock_common_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r0 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: copy and zero potential references")
+__failure __msg("Unreleased reference")
+__naked void copy_and_zero_potential_references(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r7 = r0; \
+ r0 = 0; \
+ r7 = 0; /* leak reference */ \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: acquire/release user key reference")
+__success
+__naked void acquire_release_user_key_reference(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: acquire/release system key reference")
+__success
+__naked void acquire_release_system_key_reference(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release user key reference without check")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void user_key_reference_without_check(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release system key reference without check")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void system_key_reference_without_check(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ r1 = r0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put),
+ __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: release with NULL key pointer")
+__failure __msg("Possibly NULL pointer passed to trusted arg0")
+__naked void release_with_null_key_pointer(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ call %[bpf_key_put]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_key_put)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: leak potential reference to user key")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_user_key(void)
+{
+ asm volatile (" \
+ r1 = -3; \
+ r2 = 0; \
+ call %[bpf_lookup_user_key]; \
+ exit; \
+" :
+ : __imm(bpf_lookup_user_key)
+ : __clobber_all);
+}
+
+SEC("lsm.s/bpf")
+__description("reference tracking: leak potential reference to system key")
+__failure __msg("Unreleased reference")
+__naked void potential_reference_to_system_key(void)
+{
+ asm volatile (" \
+ r1 = 1; \
+ call %[bpf_lookup_system_key]; \
+ exit; \
+" :
+ : __imm(bpf_lookup_system_key)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference without check")
+__failure __msg("type=sock_or_null expected=sock")
+__naked void tracking_release_reference_without_check(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* reference in r0 may be NULL */ \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference to sock_common without check")
+__failure __msg("type=sock_common_or_null expected=sock")
+__naked void to_sock_common_without_check(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" /* reference in r0 may be NULL */ \
+ r1 = r0; \
+ r2 = 0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference")
+__success __retval(0)
+__naked void reference_tracking_release_reference(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference to sock_common")
+__success __retval(0)
+__naked void release_reference_to_sock_common(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_skc_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_skc_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference 2")
+__success __retval(0)
+__naked void reference_tracking_release_reference_2(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference twice")
+__failure __msg("type=scalar expected=sock")
+__naked void reference_tracking_release_reference_twice(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference twice inside branch")
+__failure __msg("type=scalar expected=sock")
+__naked void release_reference_twice_inside_branch(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; /* goto end */ \
+ call %[bpf_sk_release]; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: alloc, check, free in one subbranch")
+__failure __msg("Unreleased reference")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_free_in_one_subbranch(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 16; \
+ /* if (offsetof(skb, mark) > data_len) exit; */ \
+ if r0 <= r3 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r2 + %[__sk_buff_mark]); \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r6 == 0 goto l1_%=; /* mark == 0? */\
+ /* Leak reference in R0 */ \
+ exit; \
+l1_%=: if r0 == 0 goto l2_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: alloc, check, free in both subbranches")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_free_in_both_subbranches(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 16; \
+ /* if (offsetof(skb, mark) > data_len) exit; */ \
+ if r0 <= r3 goto l0_%=; \
+ exit; \
+l0_%=: r6 = *(u32*)(r2 + %[__sk_buff_mark]); \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r6 == 0 goto l1_%=; /* mark == 0? */\
+ if r0 == 0 goto l2_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l2_%=: exit; \
+l1_%=: if r0 == 0 goto l3_%=; /* sk NULL? */ \
+ r1 = r0; \
+ call %[bpf_sk_release]; \
+l3_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: free reference in subprog")
+__success __retval(0)
+__naked void call_free_reference_in_subprog(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; /* unchecked reference */ \
+ call call_free_reference_in_subprog__1; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void call_free_reference_in_subprog__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r2 = r1; \
+ if r2 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: free reference in subprog and outside")
+__failure __msg("type=scalar expected=sock")
+__naked void reference_in_subprog_and_outside(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; /* unchecked reference */ \
+ r6 = r0; \
+ call reference_in_subprog_and_outside__1; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void reference_in_subprog_and_outside__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r2 = r1; \
+ if r2 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: alloc & leak reference in subprog")
+__failure __msg("Unreleased reference")
+__naked void alloc_leak_reference_in_subprog(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call alloc_leak_reference_in_subprog__1; \
+ r1 = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void alloc_leak_reference_in_subprog__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r6 = r4; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* spill unchecked sk_ptr into stack of caller */\
+ *(u64*)(r6 + 0) = r0; \
+ r1 = r0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: alloc in subprog, release outside")
+__success __retval(POINTER_VALUE)
+__naked void alloc_in_subprog_release_outside(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ call alloc_in_subprog_release_outside__1; \
+ r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void alloc_in_subprog_release_outside__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; /* return sk */ \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: sk_ptr leak into caller stack")
+__failure __msg("Unreleased reference")
+__naked void ptr_leak_into_caller_stack(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call ptr_leak_into_caller_stack__1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_leak_into_caller_stack__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r5 = r10; \
+ r5 += -8; \
+ *(u64*)(r5 + 0) = r4; \
+ call ptr_leak_into_caller_stack__2; \
+ /* spill unchecked sk_ptr into stack of caller */\
+ r5 = r10; \
+ r5 += -8; \
+ r4 = *(u64*)(r5 + 0); \
+ *(u64*)(r4 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_leak_into_caller_stack__2(void)
+{
+ asm volatile (" \
+ /* subprog 2 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking in call: sk_ptr spill into caller stack")
+__success __retval(0)
+__naked void ptr_spill_into_caller_stack(void)
+{
+ asm volatile (" \
+ r4 = r10; \
+ r4 += -8; \
+ call ptr_spill_into_caller_stack__1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_spill_into_caller_stack__1(void)
+{
+ asm volatile (" \
+ /* subprog 1 */ \
+ r5 = r10; \
+ r5 += -8; \
+ *(u64*)(r5 + 0) = r4; \
+ call ptr_spill_into_caller_stack__2; \
+ /* spill unchecked sk_ptr into stack of caller */\
+ r5 = r10; \
+ r5 += -8; \
+ r4 = *(u64*)(r5 + 0); \
+ *(u64*)(r4 + 0) = r0; \
+ if r0 == 0 goto l0_%=; \
+ /* now the sk_ptr is verified, free the reference */\
+ r1 = *(u64*)(r4 + 0); \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_release)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void ptr_spill_into_caller_stack__2(void)
+{
+ asm volatile (" \
+ /* subprog 2 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: allow LD_ABS")
+__success __retval(0)
+__naked void reference_tracking_allow_ld_abs(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r0 = *(u8*)skb[0]; \
+ r0 = *(u16*)skb[0]; \
+ r0 = *(u32*)skb[0]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: forbid LD_ABS while holding reference")
+__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references")
+__naked void ld_abs_while_holding_reference(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r0 = *(u8*)skb[0]; \
+ r0 = *(u16*)skb[0]; \
+ r0 = *(u32*)skb[0]; \
+ r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: allow LD_IND")
+__success __retval(1)
+__naked void reference_tracking_allow_ld_ind(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple)),
+ __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: forbid LD_IND while holding reference")
+__failure __msg("BPF_LD_[ABS|IND] cannot be mixed with socket references")
+__naked void ld_ind_while_holding_reference(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r4 = r0; \
+ r7 = 1; \
+ .8byte %[ld_ind]; \
+ r0 = r7; \
+ r1 = r4; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple)),
+ __imm_insn(ld_ind, BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: check reference or tail call")
+__success __retval(0)
+__naked void check_reference_or_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* if (sk) bpf_sk_release() */ \
+ r1 = r0; \
+ if r1 != 0 goto l0_%=; \
+ /* bpf_tail_call() */ \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: release reference then tail call")
+__success __retval(0)
+__naked void release_reference_then_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* if (sk) bpf_sk_release() */ \
+ r1 = r0; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: /* bpf_tail_call() */ \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak possible reference over tail call")
+__failure __msg("tail_call would lead to reference leak")
+__naked void possible_reference_over_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ /* Look up socket and store in REG_6 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" /* bpf_tail_call() */ \
+ r6 = r0; \
+ r3 = 3; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ /* if (sk) bpf_sk_release() */ \
+ r1 = r6; \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: leak checked reference over tail call")
+__failure __msg("tail_call would lead to reference leak")
+__naked void checked_reference_over_tail_call(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ /* Look up socket and store in REG_6 */ \
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ /* if (!sk) goto end */ \
+ if r0 == 0 goto l0_%=; \
+ /* bpf_tail_call() */ \
+ r3 = 0; \
+ r2 = %[map_prog1_tc] ll; \
+ r1 = r7; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ r1 = r6; \
+l0_%=: call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tail_call),
+ __imm_addr(map_prog1_tc),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: mangle and release sock_or_null")
+__failure __msg("R1 pointer arithmetic on sock_or_null prohibited")
+__naked void and_release_sock_or_null(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ r1 += 5; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: mangle and release sock")
+__failure __msg("R1 pointer arithmetic on sock prohibited")
+__naked void tracking_mangle_and_release_sock(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 += 5; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: access member")
+__success __retval(0)
+__naked void reference_tracking_access_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u32*)(r0 + 4); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: write to member")
+__failure __msg("cannot write into sock")
+__naked void reference_tracking_write_to_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 = r6; \
+ r2 = 42 ll; \
+ *(u32*)(r1 + %[bpf_sock_mark]) = r2; \
+ r1 = r6; \
+l0_%=: call %[bpf_sk_release]; \
+ r0 = 0 ll; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: invalid 64-bit access of member")
+__failure __msg("invalid sock access off=0 size=8")
+__naked void _64_bit_access_of_member(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u64*)(r0 + 0); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: access after release")
+__failure __msg("!read_ok")
+__naked void reference_tracking_access_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r1 = r0; \
+ if r0 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+ r2 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: direct access for lookup")
+__success __retval(0)
+__naked void tracking_direct_access_for_lookup(void)
+{
+ asm volatile (" \
+ /* Check that the packet is at least 64B long */\
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r0 = r2; \
+ r0 += 64; \
+ if r0 > r3 goto l0_%=; \
+ /* sk = sk_lookup_tcp(ctx, skb->data, ...) */ \
+ r3 = %[sizeof_bpf_sock_tuple]; \
+ r4 = 0; \
+ r5 = 0; \
+ call %[bpf_sk_lookup_tcp]; \
+ r6 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r2 = *(u32*)(r0 + 4); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_tcp_sock() after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bpf_tcp_sock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r7 + %[bpf_tcp_sock_snd_cwnd]); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_sk_fullsock() after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void bpf_sk_fullsock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r7 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_sk_fullsock(tp) after release")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void sk_fullsock_tp_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ r1 = r6; \
+ r6 = r0; \
+ call %[bpf_sk_release]; \
+ if r6 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use sk after bpf_sk_release(tp)")
+__failure __msg("invalid mem access")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void after_bpf_sk_release_tp(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)")
+__success __retval(0)
+__naked void after_bpf_sk_release_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_get_listener_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r6; \
+ r6 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_src_port]); \
+ exit; \
+" :
+ : __imm(bpf_get_listener_sock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_src_port, offsetof(struct bpf_sock, src_port)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: bpf_sk_release(listen_sk)")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_release_listen_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_get_listener_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = *(u32*)(r6 + %[bpf_sock_type]); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_get_listener_sock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+/* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
+SEC("tc")
+__description("reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)")
+__failure __msg("invalid mem access")
+__naked void and_bpf_tcp_sock_sk(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_sk_fullsock]; \
+ r7 = r0; \
+ r1 = r6; \
+ call %[bpf_tcp_sock]; \
+ r8 = r0; \
+ if r7 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r0 = *(u32*)(r8 + %[bpf_tcp_sock_snd_cwnd]); \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: branch tracking valid pointer null comparison")
+__success __retval(0)
+__naked void tracking_valid_pointer_null_comparison(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ r3 = 1; \
+ if r6 != 0 goto l0_%=; \
+ r3 = 0; \
+l0_%=: if r6 == 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: branch tracking valid pointer value comparison")
+__failure __msg("Unreleased reference")
+__naked void tracking_valid_pointer_value_comparison(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r6 = r0; \
+ r3 = 1; \
+ if r6 == 0 goto l0_%=; \
+ r3 = 0; \
+ if r6 == 1234 goto l0_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: bpf_sk_release(btf_tcp_sock)")
+__success
+__retval(0)
+__naked void sk_release_btf_tcp_sock(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_skc_to_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("reference tracking: use ptr from bpf_skc_to_tcp_sock() after release")
+__failure __msg("invalid mem access")
+__naked void to_tcp_sock_after_release(void)
+{
+ asm volatile (
+ BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ call %[bpf_skc_to_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ call %[bpf_sk_release]; \
+ r0 = *(u8*)(r7 + 0); \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("reference tracking: try to leak released ptr reg")
+__success __failure_unpriv __msg_unpriv("R8 !read_ok")
+__retval(0)
+__naked void to_leak_released_ptr_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u32*)(r10 - 4) = r0; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r9 = r0; \
+ r0 = 0; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r8 = r0; \
+ r1 = r8; \
+ r2 = 0; \
+ call %[bpf_ringbuf_discard]; \
+ r0 = 0; \
+ *(u64*)(r9 + 0) = r8; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_ringbuf_discard),
+ __imm(bpf_ringbuf_reserve),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_reg_equal.c b/tools/testing/selftests/bpf/progs/verifier_reg_equal.c
new file mode 100644
index 000000000000..dc1d8c30fb0e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_reg_equal.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("check w reg equal if r reg upper32 bits 0")
+__success
+__naked void subreg_equality_1(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ *(u64 *)(r10 - 8) = r0; \
+ r2 = *(u32 *)(r10 - 8); \
+ /* At this point upper 4-bytes of r2 are 0, \
+ * thus insn w3 = w2 should propagate reg id, \
+ * and w2 < 9 comparison would also propagate \
+ * the range for r3. \
+ */ \
+ w3 = w2; \
+ if w2 < 9 goto l0_%=; \
+ exit; \
+l0_%=: if r3 < 9 goto l1_%=; \
+ /* r1 read is illegal at this point */ \
+ r0 -= r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check w reg not equal if r reg upper32 bits not 0")
+__failure __msg("R1 !read_ok")
+__naked void subreg_equality_2(void)
+{
+ asm volatile (" \
+ call %[bpf_ktime_get_ns]; \
+ r2 = r0; \
+ /* Upper 4-bytes of r2 may not be 0, thus insn \
+ * w3 = w2 should not propagate reg id, and \
+ * w2 < 9 comparison should not propagate \
+ * the range for r3 either. \
+ */ \
+ w3 = w2; \
+ if w2 < 9 goto l0_%=; \
+ exit; \
+l0_%=: if r3 < 9 goto l1_%=; \
+ /* r1 read is illegal at this point */ \
+ r0 -= r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_regalloc.c b/tools/testing/selftests/bpf/progs/verifier_regalloc.c
new file mode 100644
index 000000000000..ee5ddea87c91
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_regalloc.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/regalloc.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("tracepoint")
+__description("regalloc basic")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_basic(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc negative")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=1")
+__naked void regalloc_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 24 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u8*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc src_reg mark")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_src_reg_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ r3 = 0; \
+ if r3 s>= r2 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc src_reg negative")
+__failure __msg("invalid access to map value, value_size=48 off=44 size=8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_src_reg_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 22 goto l0_%=; \
+ r3 = 0; \
+ if r3 s>= r2 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc and spill")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_and_spill(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 20 goto l0_%=; \
+ /* r0 has upper bound that should propagate into r2 */\
+ *(u64*)(r10 - 8) = r2; /* spill r2 */ \
+ r0 = 0; \
+ r2 = 0; /* clear r0 and r2 */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 */ \
+ if r0 s>= r3 goto l0_%=; \
+ /* r3 has lower and upper bounds */ \
+ r7 += r3; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc and spill negative")
+__failure __msg("invalid access to map value, value_size=48 off=48 size=8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_and_spill_negative(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r0 s> 48 goto l0_%=; \
+ /* r0 has upper bound that should propagate into r2 */\
+ *(u64*)(r10 - 8) = r2; /* spill r2 */ \
+ r0 = 0; \
+ r2 = 0; /* clear r0 and r2 */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 */\
+ if r0 s>= r3 goto l0_%=; \
+ /* r3 has lower and upper bounds */ \
+ r7 += r3; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc three regs")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_three_regs(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ r4 = r2; \
+ if r0 s> 12 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r7 += r0; \
+ r7 += r2; \
+ r7 += r4; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc after call")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_after_call(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r8 = r0; \
+ r9 = r0; \
+ call regalloc_after_call__1; \
+ if r8 s> 20 goto l0_%=; \
+ if r9 s< 0 goto l0_%=; \
+ r7 += r8; \
+ r7 += r9; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void regalloc_after_call__1(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc in callee")
+__success __flag(BPF_F_ANY_ALIGNMENT)
+__naked void regalloc_in_callee(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r7 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r2 = r0; \
+ r3 = r7; \
+ call regalloc_in_callee__1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void regalloc_in_callee__1(void)
+{
+ asm volatile (" \
+ if r1 s> 20 goto l0_%=; \
+ if r2 s< 0 goto l0_%=; \
+ r3 += r1; \
+ r3 += r2; \
+ r0 = *(u64*)(r3 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("regalloc, spill, JEQ")
+__success
+__naked void regalloc_spill_jeq(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ *(u64*)(r10 - 8) = r0; /* spill r0 */ \
+ if r0 == 0 goto l0_%=; \
+l0_%=: /* The verifier will walk the rest twice with r0 == 0 and r0 == map_value */\
+ call %[bpf_get_prandom_u32]; \
+ r2 = r0; \
+ if r2 == 20 goto l1_%=; \
+l1_%=: /* The verifier will walk the rest two more times with r0 == 20 and r0 == unknown */\
+ r3 = *(u64*)(r10 - 8); /* fill r3 with map_value */\
+ if r3 == 0 goto l2_%=; /* skip ldx if map_value == NULL */\
+ /* Buggy verifier will think that r3 == 20 here */\
+ r0 = *(u64*)(r3 + 0); /* read from map_value */\
+l2_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_ringbuf.c b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
new file mode 100644
index 000000000000..ae1d521f326c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_ringbuf.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/ringbuf.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 1")
+__failure __msg("R1 must have zero offset when passed to release func")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_1(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ /* add invalid offset to reserved ringbuf memory */\
+ r1 += 0xcafe; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ringbuf: invalid reservation offset 2")
+__failure __msg("R7 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void ringbuf_invalid_reservation_offset_2(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* add invalid offset to reserved ringbuf memory */\
+ r7 += 0xcafe; \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("ringbuf: check passing rb mem to helpers")
+__success __retval(0)
+__naked void passing_rb_mem_to_helpers(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ r7 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* pass allocated ring buffer memory to fib lookup */\
+ r1 = r6; \
+ r2 = r0; \
+ r3 = 8; \
+ r4 = 0; \
+ call %[bpf_fib_lookup]; \
+ /* submit the ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_fib_lookup),
+ __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c b/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c
new file mode 100644
index 000000000000..27ebfc1fd9ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_runtime_jit.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/runtime_jit.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+void dummy_prog_loop2_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 8);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog2_socket SEC(".maps") = {
+ .values = {
+ [1] = (void *)&dummy_prog_loop2_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ [7] = (void *)&dummy_prog_42_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop2_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog2_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog2_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, prog once")
+__success __success_unpriv __retval(42)
+__naked void call_within_bounds_prog_once(void)
+{
+ asm volatile (" \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, prog loop")
+__success __success_unpriv __retval(41)
+__naked void call_within_bounds_prog_loop(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, no prog")
+__success __success_unpriv __retval(1)
+__naked void call_within_bounds_no_prog(void)
+{
+ asm volatile (" \
+ r3 = 3; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2")
+__success __success_unpriv __retval(24)
+__naked void call_within_bounds_key_2(void)
+{
+ asm volatile (" \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2 / key 2, first branch")
+__success __success_unpriv __retval(24)
+__naked void _2_key_2_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 2 / key 2, second branch")
+__success __success_unpriv __retval(24)
+__naked void _2_key_2_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 0 / key 2, first branch")
+__success __success_unpriv __retval(24)
+__naked void _0_key_2_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, key 0 / key 2, second branch")
+__success __success_unpriv __retval(42)
+__naked void _0_key_2_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 2; \
+ r2 = %[map_prog1_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, different maps, first branch")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(1)
+__naked void bounds_different_maps_first_branch(void)
+{
+ asm volatile (" \
+ r0 = 13; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 0; \
+ r2 = %[map_prog2_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call within bounds, different maps, second branch")
+__success __failure_unpriv __msg_unpriv("tail_call abusing map_ptr")
+__retval(42)
+__naked void bounds_different_maps_second_branch(void)
+{
+ asm volatile (" \
+ r0 = 14; \
+ *(u8*)(r1 + %[__sk_buff_cb_0]) = r0; \
+ r0 = *(u8*)(r1 + %[__sk_buff_cb_0]); \
+ if r0 == 13 goto l0_%=; \
+ r3 = 0; \
+ r2 = %[map_prog1_socket] ll; \
+ goto l1_%=; \
+l0_%=: r3 = 0; \
+ r2 = %[map_prog2_socket] ll; \
+l1_%=: call %[bpf_tail_call]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket),
+ __imm_addr(map_prog2_socket),
+ __imm_const(__sk_buff_cb_0, offsetof(struct __sk_buff, cb[0]))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: tail_call out of bounds")
+__success __success_unpriv __retval(2)
+__naked void tail_call_out_of_bounds(void)
+{
+ asm volatile (" \
+ r3 = 256; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: pass negative index to tail_call")
+__success __success_unpriv __retval(2)
+__naked void negative_index_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = -1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("runtime/jit: pass > 32bit index to tail_call")
+__success __success_unpriv __retval(42)
+/* Verifier rewrite for unpriv skips tail call here. */
+__retval_unpriv(2)
+__naked void _32bit_index_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = 0x100000000 ll; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 2; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
new file mode 100644
index 000000000000..13b29a7faa71
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_scalar_ids.c
@@ -0,0 +1,659 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* Check that precision marks propagate through scalar IDs.
+ * Registers r{0,1,2} have the same scalar ID at the moment when r0 is
+ * marked to be precise, this mark is immediately propagated to r{1,2}.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (bf) r3 = r10")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_same_state(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state, but mark propagates through state /
+ * parent state boundary.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: last_idx 6 first_idx 5 subseq_idx -1")
+__msg("frame0: regs=r0,r1,r2 stack= before 5: (bf) r3 = r10")
+__msg("frame0: parent state regs=r0,r1,r2 stack=:")
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: parent state regs=r0 stack=:")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_cross_state(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force checkpoint */
+ "goto +0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state, but break one of the
+ * links, note that r1 is absent from regs=... in __msg below.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r2 stack= before 5: (bf) r3 = r10")
+__msg("frame0: regs=r0,r2 stack= before 4: (b7) r1 = 0")
+__msg("frame0: regs=r0,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_same_state_broken_link(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* break link for r1, this is the only line that differs
+ * compared to the previous test
+ */
+ "r1 = 0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as precision_same_state_broken_link, but with state /
+ * parent state boundary.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("frame0: regs=r0,r2 stack= before 6: (bf) r3 = r10")
+__msg("frame0: regs=r0,r2 stack= before 5: (b7) r1 = 0")
+__msg("frame0: parent state regs=r0,r2 stack=:")
+__msg("frame0: regs=r0,r1,r2 stack= before 4: (05) goto pc+0")
+__msg("frame0: regs=r0,r1,r2 stack= before 3: (bf) r2 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__msg("frame0: parent state regs=r0 stack=:")
+__msg("frame0: regs=r0 stack= before 0: (85) call bpf_ktime_get_ns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_cross_state_broken_link(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r2.id */
+ "r1 = r0;"
+ "r2 = r0;"
+ /* force checkpoint, although link between r1 and r{0,2} is
+ * broken by the next statement current precision tracking
+ * algorithm can't react to it and propagates mark for r1 to
+ * the parent state.
+ */
+ "goto +0;"
+ /* break link for r1, this is the only line that differs
+ * compared to precision_cross_state()
+ */
+ "r1 = 0;"
+ /* force r0 to be precise, this immediately marks r1 and r2 as
+ * precise as well because of shared IDs
+ */
+ "r3 = r10;"
+ "r3 += r0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that precision marks propagate through scalar IDs.
+ * Use the same scalar ID in multiple stack frames, check that
+ * precision information is propagated up the call stack.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (0f) r2 += r1")
+/* Current state */
+__msg("frame2: last_idx 11 first_idx 10 subseq_idx -1")
+__msg("frame2: regs=r1 stack= before 10: (bf) r2 = r10")
+__msg("frame2: parent state regs=r1 stack=")
+/* frame1.r{6,7} are marked because mark_precise_scalar_ids()
+ * looks for all registers with frame2.r1.id in the current state
+ */
+__msg("frame1: parent state regs=r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame2: last_idx 8 first_idx 8 subseq_idx 10")
+__msg("frame2: regs=r1 stack= before 8: (85) call pc+1")
+/* frame1.r1 is marked because of backtracking of call instruction */
+__msg("frame1: parent state regs=r1,r6,r7 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 7 first_idx 6 subseq_idx 8")
+__msg("frame1: regs=r1,r6,r7 stack= before 7: (bf) r7 = r1")
+__msg("frame1: regs=r1,r6 stack= before 6: (bf) r6 = r1")
+__msg("frame1: parent state regs=r1 stack=")
+__msg("frame0: parent state regs=r6 stack=")
+/* Parent state */
+__msg("frame1: last_idx 4 first_idx 4 subseq_idx 6")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+1")
+__msg("frame0: parent state regs=r1,r6 stack=")
+/* Parent state */
+__msg("frame0: last_idx 3 first_idx 1 subseq_idx 4")
+__msg("frame0: regs=r0,r1,r6 stack= before 3: (bf) r6 = r0")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_many_frames(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == r6.id */
+ "r1 = r0;"
+ "r6 = r0;"
+ "call precision_many_frames__foo;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "r6 = r1;"
+ "r7 = r1;"
+ "call precision_many_frames__bar;"
+ "exit"
+ ::: __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_many_frames__bar(void)
+{
+ asm volatile (
+ /* force r1 to be precise, this immediately marks:
+ * - bar frame r1
+ * - foo frame r{1,6,7}
+ * - main frame r{1,6}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "r0 = 0;"
+ "exit;"
+ ::: __clobber_all);
+}
+
+/* Check that scalars with the same IDs are marked precise on stack as
+ * well as in registers.
+ */
+SEC("socket")
+__success __log_level(2)
+/* foo frame */
+__msg("frame1: regs=r1 stack=-8,-16 before 9: (bf) r2 = r10")
+__msg("frame1: regs=r1 stack=-8,-16 before 8: (7b) *(u64 *)(r10 -16) = r1")
+__msg("frame1: regs=r1 stack=-8 before 7: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame1: regs=r1 stack= before 4: (85) call pc+2")
+/* main frame */
+__msg("frame0: regs=r0,r1 stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r1")
+__msg("frame0: regs=r0,r1 stack= before 2: (bf) r1 = r0")
+__msg("frame0: regs=r0 stack= before 1: (57) r0 &= 255")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_stack(void)
+{
+ asm volatile (
+ /* r0 = random number up to 0xff */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* tie r0.id == r1.id == fp[-8].id */
+ "r1 = r0;"
+ "*(u64*)(r10 - 8) = r1;"
+ "call precision_stack__foo;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+static __naked __noinline __used
+void precision_stack__foo(void)
+{
+ asm volatile (
+ /* conflate one of the register numbers (r6) with outer frame,
+ * to verify that those are tracked independently
+ */
+ "*(u64*)(r10 - 8) = r1;"
+ "*(u64*)(r10 - 16) = r1;"
+ /* force r1 to be precise, this immediately marks:
+ * - foo frame r1,fp{-8,-16}
+ * - main frame r1,fp{-8}
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit"
+ ::: __clobber_all);
+}
+
+/* Use two separate scalar IDs to check that these are propagated
+ * independently.
+ */
+SEC("socket")
+__success __log_level(2)
+/* r{6,7} */
+__msg("11: (0f) r3 += r7")
+__msg("frame0: regs=r6,r7 stack= before 10: (bf) r3 = r10")
+/* ... skip some insns ... */
+__msg("frame0: regs=r6,r7 stack= before 3: (bf) r7 = r0")
+__msg("frame0: regs=r0,r6 stack= before 2: (bf) r6 = r0")
+/* r{8,9} */
+__msg("12: (0f) r3 += r9")
+__msg("frame0: regs=r8,r9 stack= before 11: (0f) r3 += r7")
+/* ... skip some insns ... */
+__msg("frame0: regs=r8,r9 stack= before 7: (bf) r9 = r0")
+__msg("frame0: regs=r0,r8 stack= before 6: (bf) r8 = r0")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void precision_two_ids(void)
+{
+ asm volatile (
+ /* r6 = random number up to 0xff
+ * r6.id == r7.id
+ */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "r7 = r0;"
+ /* same, but for r{8,9} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r8 = r0;"
+ "r9 = r0;"
+ /* clear r0 id */
+ "r0 = 0;"
+ /* force checkpoint */
+ "goto +0;"
+ "r3 = r10;"
+ /* force r7 to be precise, this also marks r6 */
+ "r3 += r7;"
+ /* force r9 to be precise, this also marks r8 */
+ "r3 += r9;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Verify that check_ids() is used by regsafe() for scalars.
+ *
+ * r9 = ... some pointer with range X ...
+ * r6 = ... unbound scalar ID=a ...
+ * r7 = ... unbound scalar ID=b ...
+ * if (r6 > r7) goto +1
+ * r7 = r6
+ * if (r7 > X) goto exit
+ * r9 += r6
+ * ... access memory using r9 ...
+ *
+ * The memory access is safe only if r7 is bounded,
+ * which is true for one branch and not true for another.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ "r7 = r6;"
+"l1_%=:"
+ /* if r7 > 4 ...; transfers range to r6 on one execution path
+ * but does not transfer on another
+ */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6], r6 is not always bounded */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Similar to check_ids_in_regsafe.
+ * The l0 could be reached in two states:
+ *
+ * (1) r6{.id=A}, r7{.id=A}, r8{.id=B}
+ * (2) r6{.id=B}, r7{.id=A}, r8{.id=B}
+ *
+ * Where (2) is not safe, as "r7 > 4" check won't propagate range for it.
+ * This example would be considered safe without changes to
+ * mark_chain_precision() to track scalar values with equal IDs.
+ */
+SEC("socket")
+__failure __msg("register with unbounded min value")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_in_regsafe_2(void)
+{
+ asm volatile (
+ /* Bump allocated stack */
+ "r1 = 0;"
+ "*(u64*)(r10 - 8) = r1;"
+ /* r9 = pointer to stack */
+ "r9 = r10;"
+ "r9 += -8;"
+ /* r8 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r8 = r0;"
+ /* r7 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r7 = r0;"
+ /* r6 = ktime_get_ns() */
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ /* scratch .id from r0 */
+ "r0 = 0;"
+ /* if r6 > r7 is an unpredictable jump */
+ "if r6 > r7 goto l1_%=;"
+ /* tie r6 and r7 .id */
+ "r6 = r7;"
+"l0_%=:"
+ /* if r7 > 4 exit(0) */
+ "if r7 > 4 goto l2_%=;"
+ /* Access memory at r9[r6] */
+ "r9 += r6;"
+ "r0 = *(u8*)(r9 + 0);"
+"l2_%=:"
+ "r0 = 0;"
+ "exit;"
+"l1_%=:"
+ /* tie r6 and r8 .id */
+ "r6 = r8;"
+ "goto l0_%=;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that scalar IDs *are not* generated on register to register
+ * assignments if source register is a constant.
+ *
+ * If such IDs *are* generated the 'l1' below would be reached in
+ * two states:
+ *
+ * (1) r1{.id=A}, r2{.id=A}
+ * (2) r1{.id=C}, r2{.id=C}
+ *
+ * Thus forcing 'if r1 == r2' verification twice.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1d) if r3 == r4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r1 = r1;"
+ "r3 = r1;"
+ "r4 = r1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "r1 = 0;"
+ "r2 = 0;"
+ "r3 = r1;"
+ "r4 = r2;"
+"l1_%=:"
+ /* predictable jump, marks r3 and r4 precise */
+ "if r3 == r4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Same as no_scalar_id_for_const() but for 32-bit values */
+SEC("socket")
+__success __log_level(2)
+__msg("11: (1e) if w3 == w4 goto pc+0")
+__msg("frame 0: propagating r3,r4")
+__msg("11: safe")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void no_scalar_id_for_const32(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ /* unpredictable jump */
+ "if r0 > 7 goto l0_%=;"
+ /* possibly generate same scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w1 = w1;"
+ "w3 = w1;"
+ "w4 = w1;"
+ "goto l1_%=;"
+"l0_%=:"
+ /* possibly generate different scalar ids for r3 and r4 */
+ "w1 = 0;"
+ "w2 = 0;"
+ "w3 = w1;"
+ "w4 = w2;"
+"l1_%=:"
+ /* predictable jump, marks r1 and r2 precise */
+ "if w3 == w4 goto +0;"
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has no id on r1
+ * - new state has a unique id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (57) r1 &= 255")
+__msg("8: (bf) r2 = r10")
+__msg("from 6 to 8: safe")
+__msg("processed 12 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_cur(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l0_%=;"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has no id (cached state)
+ * - second: r1 has a unique id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that unique scalar IDs are ignored when new verifier state is
+ * compared to cached verifier state. For this test:
+ * - cached state has a unique id on r1
+ * - new state has no id on r1
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("6: (25) if r6 > 0x7 goto pc+1")
+__msg("7: (05) goto pc+1")
+__msg("9: (bf) r2 = r10")
+__msg("9: safe")
+__msg("processed 13 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void ignore_unique_scalar_ids_old(void)
+{
+ asm volatile (
+ "call %[bpf_ktime_get_ns];"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ /* r1.id == r0.id */
+ "r1 = r0;"
+ /* make r1.id unique */
+ "r0 = 0;"
+ "if r6 > 7 goto l1_%=;"
+ "goto l0_%=;"
+"l1_%=:"
+ /* clear r1 id, but keep the range compatible */
+ "r1 &= 0xff;"
+"l0_%=:"
+ /* get here in two states:
+ * - first: r1 has a unique id (cached state)
+ * - second: r1 has no id (should be considered equivalent)
+ */
+ "r2 = r10;"
+ "r2 += r1;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* Check that two different scalar IDs in a verified state can't be
+ * mapped to the same scalar ID in current state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* The exit instruction should be reachable from two states,
+ * use two matches and "processed .. insns" to ensure this.
+ */
+__msg("13: (95) exit")
+__msg("13: (95) exit")
+__msg("processed 18 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void two_old_ids_one_cur_id(void)
+{
+ asm volatile (
+ /* Give unique scalar IDs to r{6,7} */
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r6 = r0;"
+ "call %[bpf_ktime_get_ns];"
+ "r0 &= 0xff;"
+ "r7 = r0;"
+ "r0 = 0;"
+ /* Maybe make r{6,7} IDs identical */
+ "if r6 > r7 goto l0_%=;"
+ "goto l1_%=;"
+"l0_%=:"
+ "r6 = r7;"
+"l1_%=:"
+ /* Mark r{6,7} precise.
+ * Get here in two states:
+ * - first: r6{.id=A}, r7{.id=B} (cached state)
+ * - second: r6{.id=A}, r7{.id=A}
+ * Currently we don't want to consider such states equivalent.
+ * Thus "exit;" would be verified twice.
+ */
+ "r2 = r10;"
+ "r2 += r6;"
+ "r2 += r7;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sdiv.c b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
new file mode 100644
index 000000000000..2a2271cf0294
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sdiv.c
@@ -0,0 +1,785 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#if (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \
+ (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64) || \
+ defined(__TARGET_ARCH_arm) || defined(__TARGET_ARCH_s390) || \
+ defined(__TARGET_ARCH_loongarch)) && \
+ __clang_major__ >= 18
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 7")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_imm_7(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero imm divisor, check 8")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_imm_8(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv32_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv32_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = -2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 7")
+__success __success_unpriv __retval(21)
+__naked void sdiv32_non_zero_reg_7(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, non-zero reg divisor, check 8")
+__success __success_unpriv __retval(20)
+__naked void sdiv32_non_zero_reg_8(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = 2; \
+ w0 s/= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv64_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s/= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv64_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s/= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = 2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(-20)
+__naked void sdiv64_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(20)
+__naked void sdiv64_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = 2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(-21)
+__naked void sdiv64_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(21)
+__naked void sdiv64_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = -2; \
+ r0 s/= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod32_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = 2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod32_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ w0 = 41; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod32_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ w0 = -41; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = 2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod32_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ w0 = -42; \
+ w1 = -2; \
+ w0 s%%= w1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_imm_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_imm_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_imm_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 5")
+__success __success_unpriv __retval(-0)
+__naked void smod64_non_zero_imm_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r0 s%%= -2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 7")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_imm_7(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero imm divisor, check 8")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_imm_8(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r0 s%%= 2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 1")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_reg_1(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 2")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_reg_2(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 3")
+__success __success_unpriv __retval(-1)
+__naked void smod64_non_zero_reg_3(void)
+{
+ asm volatile (" \
+ r0 = -41; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 4")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_4(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 5")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_5(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 6")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_6(void)
+{
+ asm volatile (" \
+ r0 = -42; \
+ r1 = -2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 7")
+__success __success_unpriv __retval(0)
+__naked void smod64_non_zero_reg_7(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, non-zero reg divisor, check 8")
+__success __success_unpriv __retval(1)
+__naked void smod64_non_zero_reg_8(void)
+{
+ asm volatile (" \
+ r0 = 41; \
+ r1 = 2; \
+ r0 s%%= r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV32, zero divisor")
+__success __success_unpriv __retval(0)
+__naked void sdiv32_zero_divisor(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = -1; \
+ w2 s/= w1; \
+ w0 = w2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SDIV64, zero divisor")
+__success __success_unpriv __retval(0)
+__naked void sdiv64_zero_divisor(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 0; \
+ r2 = -1; \
+ r2 s/= r1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD32, zero divisor")
+__success __success_unpriv __retval(-1)
+__naked void smod32_zero_divisor(void)
+{
+ asm volatile (" \
+ w0 = 42; \
+ w1 = 0; \
+ w2 = -1; \
+ w2 s%%= w1; \
+ w0 = w2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("SMOD64, zero divisor")
+__success __success_unpriv __retval(-1)
+__naked void smod64_zero_divisor(void)
+{
+ asm volatile (" \
+ r0 = 42; \
+ r1 = 0; \
+ r2 = -1; \
+ r2 s%%= r1; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+#else
+
+SEC("socket")
+__description("cpuv4 is not supported by compiler or jit, use a dummy test")
+__success
+int dummy_test(void)
+{
+ return 0;
+}
+
+#endif
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_search_pruning.c b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
new file mode 100644
index 000000000000..5a14498d352f
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_search_pruning.c
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/search_pruning.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("socket")
+__description("pointer/scalar confusion in state equality check (way 1)")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr as return value")
+__retval(POINTER_VALUE)
+__naked void state_equality_check_way_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r0 + 0); \
+ goto l1_%=; \
+l0_%=: r0 = r10; \
+l1_%=: goto l2_%=; \
+l2_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("pointer/scalar confusion in state equality check (way 2)")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr as return value")
+__retval(POINTER_VALUE)
+__naked void state_equality_check_way_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = r10; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r0 + 0); \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("liveness pruning and write screening")
+__failure __msg("R0 !read_ok")
+__naked void liveness_pruning_and_write_screening(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* branch conditions teach us nothing about R2 */\
+ if r2 >= 0 goto l0_%=; \
+ r0 = 0; \
+l0_%=: if r2 >= 0 goto l1_%=; \
+ r0 = 0; \
+l1_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("varlen_map_value_access pruning")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void varlen_map_value_access_pruning(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ w2 = %[max_entries]; \
+ if r2 s> r1 goto l1_%=; \
+ w1 = 0; \
+l1_%=: w1 <<= 2; \
+ r0 += r1; \
+ goto l2_%=; \
+l2_%=: r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("search pruning: all branches should be verified (nop operation)")
+__failure __msg("R6 invalid mem access 'scalar'")
+__naked void should_be_verified_nop_operation(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r0 + 0); \
+ if r3 == 0xbeef goto l1_%=; \
+ r4 = 0; \
+ goto l2_%=; \
+l1_%=: r4 = 1; \
+l2_%=: *(u64*)(r10 - 16) = r4; \
+ call %[bpf_ktime_get_ns]; \
+ r5 = *(u64*)(r10 - 16); \
+ if r5 == 0 goto l0_%=; \
+ r6 = 0; \
+ r1 = 0xdead; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("search pruning: all branches should be verified (invalid stack access)")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -16+0 size 8")
+__retval(0)
+__naked void be_verified_invalid_stack_access(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r0 + 0); \
+ r4 = 0; \
+ if r3 == 0xbeef goto l1_%=; \
+ *(u64*)(r10 - 16) = r4; \
+ goto l2_%=; \
+l1_%=: *(u64*)(r10 - 24) = r4; \
+l2_%=: call %[bpf_ktime_get_ns]; \
+ r5 = *(u64*)(r10 - 16); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("precision tracking for u32 spill/fill")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__naked void tracking_for_u32_spill_fill(void)
+{
+ asm volatile (" \
+ r7 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ w6 = 32; \
+ if r0 == 0 goto l0_%=; \
+ w6 = 4; \
+l0_%=: /* Additional insns to introduce a pruning point. */\
+ call %[bpf_get_prandom_u32]; \
+ r3 = 0; \
+ r3 = 0; \
+ if r0 == 0 goto l1_%=; \
+ r3 = 0; \
+l1_%=: /* u32 spill/fill */ \
+ *(u32*)(r10 - 8) = r6; \
+ r8 = *(u32*)(r10 - 8); \
+ /* out-of-bound map value access for r6=32 */ \
+ r1 = 0; \
+ *(u64*)(r10 - 16) = r1; \
+ r2 = r10; \
+ r2 += -16; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r0 += r8; \
+ r1 = *(u32*)(r0 + 0); \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tracepoint")
+__description("precision tracking for u32 spills, u64 fill")
+__failure __msg("div by zero")
+__naked void for_u32_spills_u64_fill(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ w7 = 0xffffffff; \
+ /* Additional insns to introduce a pruning point. */\
+ r3 = 1; \
+ r3 = 1; \
+ r3 = 1; \
+ r3 = 1; \
+ call %[bpf_get_prandom_u32]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 1; \
+l0_%=: w3 /= 0; \
+ /* u32 spills, u64 fill */ \
+ *(u32*)(r10 - 4) = r6; \
+ *(u32*)(r10 - 8) = r7; \
+ r8 = *(u64*)(r10 - 8); \
+ /* if r8 != X goto pc+1 r8 known in fallthrough branch */\
+ if r8 != 0xffffffff goto l1_%=; \
+ r3 = 1; \
+l1_%=: /* if r8 == X goto pc+1 condition always true on first\
+ * traversal, so starts backtracking to mark r8 as requiring\
+ * precision. r7 marked as needing precision. r6 not marked\
+ * since it's not tracked. \
+ */ \
+ if r8 == 0xffffffff goto l2_%=; \
+ /* fails if r8 correctly marked unknown after fill. */\
+ w3 /= 0; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("allocated_stack")
+__success __msg("processed 15 insns")
+__success_unpriv __msg_unpriv("") __log_level(1) __retval(0)
+__naked void allocated_stack(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ call %[bpf_get_prandom_u32]; \
+ r7 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r6; \
+ r6 = *(u64*)(r10 - 8); \
+ *(u8*)(r10 - 9) = r7; \
+ r7 = *(u8*)(r10 - 9); \
+l0_%=: if r0 != 0 goto l1_%=; \
+l1_%=: if r0 != 0 goto l2_%=; \
+l2_%=: if r0 != 0 goto l3_%=; \
+l3_%=: if r0 != 0 goto l4_%=; \
+l4_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* The test performs a conditional 64-bit write to a stack location
+ * fp[-8], this is followed by an unconditional 8-bit write to fp[-8],
+ * then data is read from fp[-8]. This sequence is unsafe.
+ *
+ * The test would be mistakenly marked as safe w/o dst register parent
+ * preservation in verifier.c:copy_register_state() function.
+ *
+ * Note the usage of BPF_F_TEST_STATE_FREQ to force creation of the
+ * checkpoint state after conditional 64-bit assignment.
+ */
+
+SEC("socket")
+__description("write tracking and register parent chain bug")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -8+1 size 8")
+__retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void and_register_parent_chain_bug(void)
+{
+ asm volatile (" \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* r0 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ /* if r0 > r6 goto +1 */ \
+ if r0 > r6 goto l0_%=; \
+ /* *(u64 *)(r10 - 8) = 0xdeadbeef */ \
+ r0 = 0xdeadbeef; \
+ *(u64*)(r10 - 8) = r0; \
+l0_%=: r1 = 42; \
+ *(u8*)(r10 - 8) = r1; \
+ r2 = *(u64*)(r10 - 8); \
+ /* exit(0) */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_sock.c b/tools/testing/selftests/bpf/progs/verifier_sock.c
new file mode 100644
index 000000000000..ee76b51005ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_sock.c
@@ -0,0 +1,980 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/sock.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_REUSEPORT_SOCKARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u64);
+} map_reuseport_array SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKHASH);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_sockhash SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SOCKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_sockmap SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, int);
+} map_xskmap SEC(".maps");
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_SK_STORAGE);
+ __uint(max_entries, 0);
+ __type(key, int);
+ __type(value, struct val);
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} sk_storage_map SEC(".maps");
+
+SEC("cgroup/skb")
+__description("skb->sk: no NULL check")
+__failure __msg("invalid mem access 'sock_common_or_null'")
+__failure_unpriv
+__naked void skb_sk_no_null_check(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ r0 = *(u32*)(r1 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("skb->sk: sk->family [non fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_family_non_fullsock_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u32*)(r1 + %[bpf_sock_family]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_family, offsetof(struct bpf_sock, family))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("skb->sk: sk->type [fullsock field]")
+__failure __msg("invalid sock_common access")
+__failure_unpriv
+__naked void sk_sk_type_fullsock_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r0 = *(u32*)(r1 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_sk_fullsock(skb->sk): no !skb->sk check")
+__failure __msg("type=sock_common_or_null expected=sock_common")
+__failure_unpriv
+__naked void sk_no_skb_sk_check_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ call %[bpf_sk_fullsock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): no NULL check on ret")
+__failure __msg("invalid mem access 'sock_or_null'")
+__failure_unpriv
+__naked void no_null_check_on_ret_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->type [fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_type_fullsock_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->family [non fullsock field]")
+__success __success_unpriv __retval(0)
+__naked void sk_family_non_fullsock_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_family]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_family, offsetof(struct bpf_sock, family))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->state [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_state_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_state]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_state, offsetof(struct bpf_sock, state))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [word load] (backward compatibility)")
+__success __success_unpriv __retval(0)
+__naked void port_word_load_backward_compatibility(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_dst_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [half load]")
+__success __success_unpriv __retval(0)
+__naked void sk_dst_port_half_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[bpf_sock_dst_port]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [half load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_half_load_invalid_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 2),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [byte load]")
+__success __success_unpriv __retval(0)
+__naked void sk_dst_port_byte_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r2 = *(u8*)(r0 + %[bpf_sock_dst_port]); \
+ r2 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 1),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port, offsetof(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_port [byte load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_byte_load_invalid(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_port) + 2),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): past sk->dst_port [half load] (invalid)")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void dst_port_half_load_invalid_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u16*)(r0 + %[bpf_sock_dst_port__end]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_dst_port__end, offsetofend(struct bpf_sock, dst_port))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]")
+__success __success_unpriv __retval(0)
+__naked void dst_ip6_load_2nd_byte(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[__imm_0]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__imm_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->type [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_type_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_type]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): sk->protocol [narrow load]")
+__success __success_unpriv __retval(0)
+__naked void sk_sk_protocol_narrow_load(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r0 + %[bpf_sock_protocol]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_protocol, offsetof(struct bpf_sock, protocol))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("sk_fullsock(skb->sk): beyond last field")
+__failure __msg("invalid sock access")
+__failure_unpriv
+__naked void skb_sk_beyond_last_field_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_sock_rx_queue_mapping__end]);\
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_sock_rx_queue_mapping__end, offsetofend(struct bpf_sock, rx_queue_mapping))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): no !skb->sk check")
+__failure __msg("type=sock_common_or_null expected=sock_common")
+__failure_unpriv
+__naked void sk_no_skb_sk_check_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ call %[bpf_tcp_sock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): no NULL check on ret")
+__failure __msg("invalid mem access 'tcp_sock_or_null'")
+__failure_unpriv
+__naked void no_null_check_on_ret_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): tp->snd_cwnd")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_snd_cwnd_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): tp->bytes_acked")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_bytes_acked(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u64*)(r0 + %[bpf_tcp_sock_bytes_acked]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_bytes_acked, offsetof(struct bpf_tcp_sock, bytes_acked))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(skb->sk): beyond last field")
+__failure __msg("invalid tcp_sock access")
+__failure_unpriv
+__naked void skb_sk_beyond_last_field_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r0 = *(u64*)(r0 + %[bpf_tcp_sock_bytes_acked__end]);\
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_bytes_acked__end, offsetofend(struct bpf_tcp_sock, bytes_acked))
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd")
+__success __success_unpriv __retval(0)
+__naked void skb_sk_tp_snd_cwnd_2(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l2_%=; \
+ exit; \
+l2_%=: r0 = *(u32*)(r0 + %[bpf_tcp_sock_snd_cwnd]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk)),
+ __imm_const(bpf_tcp_sock_snd_cwnd, offsetof(struct bpf_tcp_sock, snd_cwnd))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(skb->sk)")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_release_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 == 0 goto l0_%=; \
+ call %[bpf_sk_release]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(bpf_sk_fullsock(skb->sk))")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_sk_fullsock_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_sk_release(bpf_tcp_sock(skb->sk))")
+__failure __msg("R1 must be referenced when passed to release function")
+__naked void bpf_tcp_sock_skb_sk(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_tcp_sock]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r1 = r0; \
+ call %[bpf_sk_release]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_sk_release),
+ __imm(bpf_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, NULL, 0): value == NULL")
+__success __retval(0)
+__naked void sk_null_0_value_null(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 0; \
+ r3 = 0; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, 1, 1): value == 1")
+__failure __msg("R3 type=scalar expected=fp")
+__naked void sk_1_1_value_1(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 1; \
+ r3 = 1; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("sk_storage_get(map, skb->sk, &stack_value, 1): stack_value")
+__success __retval(0)
+__naked void stack_value_1_stack_value(void)
+{
+ asm volatile (" \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: call %[bpf_sk_fullsock]; \
+ if r0 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r4 = 1; \
+ r3 = r10; \
+ r3 += -8; \
+ r2 = r0; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_sk_storage_get]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_fullsock),
+ __imm(bpf_sk_storage_get),
+ __imm_addr(sk_storage_map),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("bpf_map_lookup_elem(smap, &key)")
+__failure __msg("cannot pass map_type 24 into func bpf_map_lookup_elem")
+__naked void map_lookup_elem_smap_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[sk_storage_map] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(sk_storage_map)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("bpf_map_lookup_elem(xskmap, &key); xs->queue_id")
+__success __retval(0)
+__naked void xskmap_key_xs_queue_id(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_xskmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r0 = *(u32*)(r0 + %[bpf_xdp_sock_queue_id]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_xskmap),
+ __imm_const(bpf_xdp_sock_queue_id, offsetof(struct bpf_xdp_sock, queue_id))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockmap, &key)")
+__failure __msg("Unreleased reference id=2 alloc_insn=6")
+__naked void map_lookup_elem_sockmap_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockhash, &key)")
+__failure __msg("Unreleased reference id=2 alloc_insn=6")
+__naked void map_lookup_elem_sockhash_key(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockhash] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_sockhash)
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)")
+__success
+__naked void field_bpf_sk_release_sk_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockmap] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = r0; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_sk_release),
+ __imm_addr(map_sockmap),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("sk_skb")
+__description("bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)")
+__success
+__naked void field_bpf_sk_release_sk_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_sockhash] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = r0; \
+ r0 = *(u32*)(r0 + %[bpf_sock_type]); \
+ call %[bpf_sk_release]; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_sk_release),
+ __imm_addr(map_sockhash),
+ __imm_const(bpf_sock_type, offsetof(struct bpf_sock, type))
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)")
+__success
+__naked void ctx_reuseport_array_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_reuseport_array] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_reuseport_array)
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, sockmap, &key, flags)")
+__success
+__naked void reuseport_ctx_sockmap_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_sockmap] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("sk_reuseport")
+__description("bpf_sk_select_reuseport(ctx, sockhash, &key, flags)")
+__success
+__naked void reuseport_ctx_sockhash_key_flags(void)
+{
+ asm volatile (" \
+ r4 = 0; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r3 = r10; \
+ r3 += -4; \
+ r2 = %[map_sockmap] ll; \
+ call %[bpf_sk_select_reuseport]; \
+ exit; \
+" :
+ : __imm(bpf_sk_select_reuseport),
+ __imm_addr(map_sockmap)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("mark null check on return value of bpf_skc_to helpers")
+__failure __msg("invalid mem access")
+__naked void of_bpf_skc_to_helpers(void)
+{
+ asm volatile (" \
+ r1 = *(u64*)(r1 + %[__sk_buff_sk]); \
+ if r1 != 0 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: r6 = r1; \
+ call %[bpf_skc_to_tcp_sock]; \
+ r7 = r0; \
+ r1 = r6; \
+ call %[bpf_skc_to_tcp_request_sock]; \
+ r8 = r0; \
+ if r8 != 0 goto l1_%=; \
+ r0 = 0; \
+ exit; \
+l1_%=: r0 = *(u8*)(r7 + 0); \
+ exit; \
+" :
+ : __imm(bpf_skc_to_tcp_request_sock),
+ __imm(bpf_skc_to_tcp_sock),
+ __imm_const(__sk_buff_sk, offsetof(struct __sk_buff, sk))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
new file mode 100644
index 000000000000..85e48069c9e6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c
@@ -0,0 +1,1247 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/spill_fill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+#include <../../../tools/include/linux/filter.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_RINGBUF);
+ __uint(max_entries, 4096);
+} map_ringbuf SEC(".maps");
+
+SEC("socket")
+__description("check valid spill/fill")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(POINTER_VALUE)
+__naked void check_valid_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* fill it back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* should be able to access R0 = *(R2 + 8) */ \
+ /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */\
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, skb mark")
+__success __success_unpriv __retval(0)
+__naked void valid_spill_fill_skb_mark(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ *(u64*)(r10 - 8) = r6; \
+ r0 = *(u64*)(r10 - 8); \
+ r0 = *(u32*)(r0 + %[__sk_buff_mark]); \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check valid spill/fill, ptr to mem")
+__success __success_unpriv __retval(0)
+__naked void spill_fill_ptr_to_mem(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* spill R6(mem) into the stack */ \
+ *(u64*)(r10 - 8) = r6; \
+ /* fill it back in R7 */ \
+ r7 = *(u64*)(r10 - 8); \
+ /* should be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u64*)(r7 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r7; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check with invalid reg offset 0")
+__failure __msg("R0 pointer arithmetic on ringbuf_mem_or_null prohibited")
+__failure_unpriv
+__naked void with_invalid_reg_offset_0(void)
+{
+ asm volatile (" \
+ /* reserve 8 byte ringbuf memory */ \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = %[map_ringbuf] ll; \
+ r2 = 8; \
+ r3 = 0; \
+ call %[bpf_ringbuf_reserve]; \
+ /* store a pointer to the reserved memory in R6 */\
+ r6 = r0; \
+ /* add invalid offset to memory or NULL */ \
+ r0 += 1; \
+ /* check whether the reservation was successful */\
+ if r0 == 0 goto l0_%=; \
+ /* should not be able to access *(R7) = 0 */ \
+ r1 = 0; \
+ *(u32*)(r6 + 0) = r1; \
+ /* submit the reserved ringbuf memory */ \
+ r1 = r6; \
+ r2 = 0; \
+ call %[bpf_ringbuf_submit]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ringbuf_reserve),
+ __imm(bpf_ringbuf_submit),
+ __imm_addr(map_ringbuf)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("attempt to corrupt spilled")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void check_corrupted_spill_fill(void)
+{
+ asm volatile (" \
+ /* spill R1(ctx) into stack */ \
+ *(u64*)(r10 - 8) = r1; \
+ /* mess up with R1 pointer on stack */ \
+ r0 = 0x23; \
+ *(u8*)(r10 - 7) = r0; \
+ /* fill back into R0 is fine for priv. \
+ * R0 now becomes SCALAR_VALUE. \
+ */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Load from R0 should fail. */ \
+ r0 = *(u64*)(r0 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, LSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_lsb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0xcafe; \
+ *(u16*)(r10 - 8) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("check corrupted spill/fill, MSB")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(POINTER_VALUE)
+__naked void check_corrupted_spill_fill_msb(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r1; \
+ r0 = 0x12345678; \
+ *(u32*)(r10 - 4) = r0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u32*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */ \
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,off=20,r=20 R2=pkt,r=20 R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("Spill a u32 const, refill from another half of the uninit u32 from the stack")
+/* in privileged mode reads from uninitialized stack locations are permitted */
+__success __failure_unpriv
+__msg_unpriv("invalid read from stack off -4+0 size 4")
+__retval(0)
+__naked void uninit_u32_from_the_stack(void)
+{
+ asm volatile (" \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = *(u32 *)(r10 -4) fp-8=????rrrr*/ \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16. Offset to skb->data")
+__success __retval(0)
+__naked void u16_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 8);"
+#else
+ "r4 = *(u16*)(r10 - 6);"
+#endif
+ " \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=20 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,off=20 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill u32 const scalars. Refill as u64. Offset to skb->data")
+__failure __msg("math between pkt pointer and register with unbounded min value is not allowed")
+__naked void u64_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w6 = 0; \
+ w7 = 20; \
+ *(u32*)(r10 - 4) = r6; \
+ *(u32*)(r10 - 8) = r7; \
+ r4 = *(u64*)(r10 - 8); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \
+ r0 += r4; \
+ if r0 > r3 goto l0_%=; \
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 const scalar. Refill as u16 from MSB. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void _6_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r4 = *(u16*)(r10 - 6);"
+#else
+ "r4 = *(u16*)(r10 - 8);"
+#endif
+ " \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a u32 const scalar at non 8byte aligned stack addr. Offset to skb->data")
+__failure __msg("invalid access to packet")
+__naked void addr_offset_to_skb_data(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ w4 = 20; \
+ *(u32*)(r10 - 8) = r4; \
+ *(u32*)(r10 - 4) = r4; \
+ r4 = *(u32*)(r10 - 4); \
+ r0 = r2; \
+ /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=U32_MAX */\
+ r0 += r4; \
+ /* if (r0 > r3) R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ if r0 > r3 goto l0_%=; \
+ /* r0 = *(u32 *)r2 R0=pkt,umax=U32_MAX R2=pkt R3=pkt_end R4= */\
+ r0 = *(u32*)(r2 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill and refill a umax=40 bounded scalar. Offset to skb->data")
+__success __retval(0)
+__naked void scalar_offset_to_skb_data_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r3 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r4 = *(u64*)(r1 + %[__sk_buff_tstamp]); \
+ if r4 <= 40 goto l0_%=; \
+ r0 = 0; \
+ exit; \
+l0_%=: /* *(u32 *)(r10 -8) = r4 R4=umax=40 */ \
+ *(u32*)(r10 - 8) = r4; \
+ /* r4 = (*u32 *)(r10 - 8) */ \
+ r4 = *(u32*)(r10 - 8); \
+ /* r2 += r4 R2=pkt R4=umax=40 */ \
+ r2 += r4; \
+ /* r0 = r2 R2=pkt,umax=40 R4=umax=40 */ \
+ r0 = r2; \
+ /* r2 += 20 R0=pkt,umax=40 R2=pkt,umax=40 */ \
+ r2 += 20; \
+ /* if (r2 > r3) R0=pkt,umax=40 R2=pkt,off=20,umax=40 */\
+ if r2 > r3 goto l1_%=; \
+ /* r0 = *(u32 *)r0 R0=pkt,r=20,umax=40 R2=pkt,off=20,r=20,umax=40 */\
+ r0 = *(u32*)(r0 + 0); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end)),
+ __imm_const(__sk_buff_tstamp, offsetof(struct __sk_buff, tstamp))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("Spill a u32 scalar at fp-4 and then at fp-8")
+__success __retval(0)
+__naked void and_then_at_fp_8(void)
+{
+ asm volatile (" \
+ w4 = 4321; \
+ *(u32*)(r10 - 4) = r4; \
+ *(u32*)(r10 - 8) = r4; \
+ r4 = *(u64*)(r10 - 8); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 64-bit reg should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void spill_32bit_of_64bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 32-bit spill r1 to stack - should clear the ID! */\
+ *(u32*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack. */ \
+ r2 = *(u32*)(r10 - 8); \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffffffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 32-bit reg should clear ID")
+__failure __msg("dereference of modified ctx ptr R6 off=65535 disallowed")
+__naked void spill_16bit_of_32bit_fail(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ w1 = 0xffff0000; \
+ r1 += r0; \
+ /* Assign an ID to r1. */ \
+ r2 = r1; \
+ /* 16-bit spill r1 to stack - should clear the ID! */\
+ *(u16*)(r10 - 8) = r1; \
+ /* 16-bit fill r2 from stack. */ \
+ r2 = *(u16*)(r10 - 8); \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on spill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 16; \
+ /* At this point, if the verifier thinks that r1 is 0, an out-of-bounds\
+ * read will happen, because it actually contains 0xffff.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("raw_tp")
+__log_level(2)
+__success
+__msg("fp-8=0m??scalar()")
+__msg("fp-16=00mm??scalar()")
+__msg("fp-24=00mm???scalar()")
+__naked void spill_subregs_preserve_stack_zero(void)
+{
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+
+ /* 32-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */
+ "*(u8 *)(r10 -2) = r0;" /* MISC */
+ /* fp-3 and fp-4 stay INVALID */
+ "*(u32 *)(r10 -8) = r0;"
+
+ /* 16-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */
+ "*(u16 *)(r10 -12) = r0;" /* MISC */
+ /* fp-13 and fp-14 stay INVALID */
+ "*(u16 *)(r10 -16) = r0;"
+
+ /* 8-bit subreg spill with ZERO, MISC, and INVALID */
+ ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */
+ "*(u16 *)(r10 -20) = r0;" /* MISC */
+ /* fp-21, fp-22, and fp-23 stay INVALID */
+ "*(u8 *)(r10 -24) = r0;"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32),
+ __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)),
+ __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)),
+ __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0))
+ : __clobber_all);
+}
+
+char single_byte_buf[1] SEC(".data.single_byte_buf");
+
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */
+__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0")
+/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register
+ * precise immediately; if necessary, it will be marked precise later
+ */
+__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0")
+/* similarly, when R2 is assigned from spilled register, it is initially
+ * imprecise, but will be marked precise later once it is used in precise context
+ */
+__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0")
+__msg("11: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0")
+__naked void partial_stack_load_preserves_zeros(void)
+{
+ asm volatile (
+ /* fp-8 is value zero (represented by a zero value fake reg) */
+ ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */
+
+ /* fp-16 is const zero register */
+ "r0 = 0;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* load single U8 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U8 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -9);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -10);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned spilled value zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -12);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from STACK_ZERO slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* for completeness, load U64 from ZERO REG slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0))
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2)
+__success
+/* fp-4 is STACK_ZERO */
+__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????")
+__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????")
+__msg("5: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)")
+__naked void partial_stack_load_preserves_partial_zeros(void)
+{
+ asm volatile (
+ /* fp-4 is value zero */
+ ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */
+
+ /* load single U8 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u8 *)(r10 -1);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U16 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u16 *)(r10 -2);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from non-aligned stack zero slot */
+ "r1 = %[single_byte_buf];"
+ "r2 = *(u32 *)(r10 -4);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(single_byte_buf),
+ __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0))
+ : __clobber_common);
+}
+
+char two_byte_buf[2] SEC(".data.two_byte_buf");
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is IMPRECISE fake register spill */
+__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1")
+/* and fp-16 is spilled IMPRECISE const reg */
+__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+/* now both fp-8 and fp-16 are precise, very good */
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 register */
+ "r0 = 1;"
+ "*(u64 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U64 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U64 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u64 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1))
+ : __clobber_common);
+}
+
+SEC("raw_tp")
+__log_level(2) __flag(BPF_F_TEST_STATE_FREQ)
+__success
+/* make sure fp-8 is 32-bit FAKE subregister spill */
+__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1")
+/* but fp-16 is spilled IMPRECISE zero const reg */
+__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1")
+/* validate load from fp-8, which was initialized using BPF_ST_MEM */
+__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1")
+__msg("9: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1")
+__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+/* validate load from fp-16, which was initialized using BPF_STX_MEM */
+__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1")
+__msg("13: (0f) r1 += r2")
+__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2")
+__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6")
+__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1")
+__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0")
+__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1")
+__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1")
+__naked void stack_load_preserves_const_precision_subreg(void)
+{
+ asm volatile (
+ /* establish checkpoint with state that has no stack slots;
+ * if we bubble up to this state without finding desired stack
+ * slot, then it's a bug and should be caught
+ */
+ "goto +0;"
+
+ /* fp-8 is const 1 *fake* SUB-register */
+ ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */
+
+ /* fp-16 is const 1 SUB-register */
+ "r0 = 1;"
+ "*(u32 *)(r10 -16) = r0;"
+
+ /* force checkpoint to check precision marks preserved in parent states */
+ "goto +0;"
+
+ /* load single U32 from aligned FAKE_REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -8);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ /* load single U32 from aligned REG=1 slot */
+ "r1 = %[two_byte_buf];"
+ "r2 = *(u32 *)(r10 -16);"
+ "r1 += r2;"
+ "*(u8 *)(r1 + 0) = r2;" /* this should be fine */
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(two_byte_buf),
+ __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */
+ : __clobber_common);
+}
+
+SEC("xdp")
+__description("32-bit spilled reg range should be tracked")
+__success __retval(0)
+__naked void spill_32bit_range_track(void)
+{
+ asm volatile(" \
+ call %[bpf_ktime_get_ns]; \
+ /* Make r0 bounded. */ \
+ r0 &= 65535; \
+ /* Assign an ID to r0. */ \
+ r1 = r0; \
+ /* 32-bit spill r0 to stack. */ \
+ *(u32*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 < 1 goto l0_%=; \
+ /* 32-bit fill r1 from stack. */ \
+ r1 = *(u32*)(r10 - 8); \
+ /* r1 == r0 => r1 >= 1 always. */ \
+ if r1 >= 1 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. \
+ * Do an invalid memory access if the verifier \
+ * follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("64-bit spill of 64-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_64bit_of_64bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80000000; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 64-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u64*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit spill of 32-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_32bit_of_32bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0x80000000; \
+ /* 32-bit spill r0 to stack - should assign an ID. */\
+ *(u32*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u32*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("16-bit spill of 16-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_16bit_of_16bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8000; \
+ /* 16-bit spill r0 to stack - should assign an ID. */\
+ *(u16*)(r10 - 8) = r0; \
+ /* 16-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u16*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("8-bit spill of 8-bit reg should assign ID")
+__success __retval(0)
+__naked void spill_8bit_of_8bit_ok(void)
+{
+ asm volatile (" \
+ /* Roll one bit to make the register inexact. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x80; \
+ /* 8-bit spill r0 to stack - should assign an ID. */\
+ *(u8*)(r10 - 8) = r0; \
+ /* 8-bit fill r1 from stack - should preserve the ID. */\
+ r1 = *(u8*)(r10 - 8); \
+ /* Compare r1 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. \
+ */ \
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("spill unbounded reg, then range check src")
+__success __retval(0)
+__naked void spill_unbounded(void)
+{
+ asm volatile (" \
+ /* Produce an unbounded scalar. */ \
+ call %[bpf_get_prandom_u32]; \
+ /* Spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Boundary check on r0. */ \
+ if r0 > 16 goto l0_%=; \
+ /* Fill r0 from stack. */ \
+ r0 = *(u64*)(r10 - 8); \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 <= 16 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit(void)
+{
+ asm volatile(" \
+ /* Randomize the upper 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ r0 <<= 32; \
+ /* 64-bit spill r0 to stack. */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r0 from stack. */ \
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r0 = *(u32*)(r10 - 8);"
+#else
+ "r0 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Boundary check on r0 with predetermined result. */\
+ if r0 == 0 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill of 32-bit value should preserve ID")
+__success __retval(0)
+__naked void fill_32bit_after_spill_64bit_preserve_id(void)
+{
+ asm volatile (" \
+ /* Randomize the lower 32 bits. */ \
+ call %[bpf_get_prandom_u32]; \
+ w0 &= 0xffffffff; \
+ /* 64-bit spill r0 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r0; \
+ /* 32-bit fill r1 from stack - should preserve the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r1 = *(u32*)(r10 - 8);"
+#else
+ "r1 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r1 with another register to trigger find_equal_scalars. */\
+ r2 = 0; \
+ if r1 != r2 goto l0_%=; \
+ /* The result of this comparison is predefined. */\
+ if r0 == r2 goto l0_%=; \
+ /* Dead branch: the verifier should prune it. Do an invalid memory\
+ * access if the verifier follows it. \
+ */ \
+ r0 = *(u64*)(r9 + 0); \
+ exit; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("32-bit fill after 64-bit spill should clear ID")
+__failure __msg("math between ctx pointer and 4294967295 is not allowed")
+__naked void fill_32bit_after_spill_64bit_clear_id(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ /* Roll one bit to force the verifier to track both branches. */\
+ call %[bpf_get_prandom_u32]; \
+ r0 &= 0x8; \
+ /* Put a large number into r1. */ \
+ r1 = 0xffffffff; \
+ r1 <<= 32; \
+ r1 += r0; \
+ /* 64-bit spill r1 to stack - should assign an ID. */\
+ *(u64*)(r10 - 8) = r1; \
+ /* 32-bit fill r2 from stack - should clear the ID. */\
+ "
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ "r2 = *(u32*)(r10 - 8);"
+#else
+ "r2 = *(u32*)(r10 - 4);"
+#endif
+ " \
+ /* Compare r2 with another register to trigger find_equal_scalars.\
+ * Having one random bit is important here, otherwise the verifier cuts\
+ * the corners. If the ID was mistakenly preserved on fill, this would\
+ * cause the verifier to think that r1 is also equal to zero in one of\
+ * the branches, and equal to eight on the other branch.\
+ */ \
+ r3 = 0; \
+ if r2 != r3 goto l0_%=; \
+l0_%=: r1 >>= 32; \
+ /* The verifier shouldn't propagate r2's range to r1, so it should\
+ * still remember r1 = 0xffffffff and reject the below.\
+ */ \
+ r6 += r1; \
+ r0 = *(u32*)(r6 + 0); \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if stack spill of an imprecise scalar in old state
+ * is considered equivalent to STACK_{MISC,INVALID} in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+/* STACK_INVALID should prevent verifier in unpriv mode from
+ * considering states equivalent and force an error on second
+ * verification path (entry - label 1 - label 2).
+ */
+__failure_unpriv
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("9: (95) exit")
+__msg_unpriv("8: (79) r1 = *(u64 *)(r10 -8)")
+__msg_unpriv("invalid read from stack off -8+2 size 8")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_imprecise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that stack spill of a precise scalar in old state
+ * is not considered equivalent to STACK_MISC in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should visit 'if r1 == 0x2a ...' two times:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("if r1 == 0x2a goto pc+0")
+__msg("if r1 == 0x2a goto pc+0")
+__msg("processed 15 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_precise_scalar_vs_cur_stack_misc(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ /* use r1 in precise context */
+ "if r1 == 42 goto +0;"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check if STACK_MISC in old state is considered
+ * equivalent to stack spill of a scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+__msg("8: (79) r0 = *(u64 *)(r10 -8)")
+__msg("8: safe")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_scalar(void)
+{
+ asm volatile(
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_{MISC,INVALID} at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u16*)(r10 - 8) = r0;"
+ "*(u16*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure scalar at fp-8 */
+ "r0 = 42;"
+ "*(u64*)(r10 - 8) = r0;"
+"2:"
+ /* read fp-8, should be considered safe on second visit */
+ "r0 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+/* stacksafe(): check that STACK_MISC in old state is not considered
+ * equivalent to stack spill of a non-scalar in cur state.
+ */
+SEC("socket")
+__success __log_level(2)
+/* verifier should process exit instructions twice:
+ * - once for path entry - label 2;
+ * - once for path entry - label 1 - label 2.
+ */
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("r1 = *(u64 *)(r10 -8)")
+__msg("exit")
+__msg("processed 11 insns")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void old_stack_misc_vs_cur_ctx_ptr(void)
+{
+ asm volatile(
+ /* remember context pointer in r9 */
+ "r9 = r1;"
+ /* get a random value for branching */
+ "call %[bpf_ktime_get_ns];"
+ "if r0 == 0 goto 1f;"
+ /* conjure STACK_MISC at fp-8 */
+ "call %[bpf_ktime_get_ns];"
+ "*(u64*)(r10 - 8) = r0;"
+ "*(u32*)(r10 - 4) = r0;"
+ "goto 2f;"
+"1:"
+ /* conjure context pointer in fp-8 */
+ "*(u64*)(r10 - 8) = r9;"
+"2:"
+ /* read fp-8, should not be considered safe on second visit */
+ "r1 = *(u64*)(r10 - 8);"
+ "exit;"
+ :
+ : __imm(bpf_ktime_get_ns)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_spin_lock.c b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
new file mode 100644
index 000000000000..fb316c080c84
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_spin_lock.c
@@ -0,0 +1,533 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/spin_lock.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct val {
+ int cnt;
+ struct bpf_spin_lock l;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct val);
+} map_spin_lock SEC(".maps");
+
+SEC("cgroup/skb")
+__description("spin_lock: test1 success")
+__success __failure_unpriv __msg_unpriv("")
+__retval(0)
+__naked void spin_lock_test1_success(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test2 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__naked void lock_test2_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r1 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test3 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void lock_test3_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 1); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test4 direct ld/st")
+__failure __msg("cannot be accessed directly")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void lock_test4_direct_ld_st(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u16*)(r6 + 3); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test5 call within a locked region")
+__failure __msg("calls are not allowed")
+__failure_unpriv __msg_unpriv("")
+__naked void call_within_a_locked_region(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test6 missing unlock")
+__failure __msg("unlock is missing")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test6_missing_unlock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ if r0 != 0 goto l1_%=; \
+ call %[bpf_spin_unlock]; \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test7 unlock without lock")
+__failure __msg("without taking a lock")
+__failure_unpriv __msg_unpriv("")
+__naked void lock_test7_unlock_without_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ if r1 != 0 goto l1_%=; \
+ call %[bpf_spin_lock]; \
+l1_%=: r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test8 double lock")
+__failure __msg("calls are not allowed")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test8_double_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r6; \
+ r1 += 4; \
+ r0 = *(u32*)(r6 + 0); \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test9 different lock")
+__failure __msg("unlock of different lock")
+__failure_unpriv __msg_unpriv("")
+__naked void spin_lock_test9_different_lock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r7 = r0; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("spin_lock: test10 lock in subprog without unlock")
+__success
+__failure_unpriv __msg_unpriv("")
+__naked void lock_in_subprog_without_unlock(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r6 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call lock_in_subprog_without_unlock__1; \
+ r1 = r6; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+static __naked __noinline __attribute__((used))
+void lock_in_subprog_without_unlock__1(void)
+{
+ asm volatile (" \
+ call %[bpf_spin_lock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("spin_lock: test11 ld_abs under lock")
+__failure __msg("inside bpf_spin_lock")
+__naked void test11_ld_abs_under_lock(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r7 = r0; \
+ r1 = r0; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r0 = *(u8*)skb[0]; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("spin_lock: regsafe compare reg->id for map value")
+__failure __msg("bpf_spin_unlock of different lock")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void reg_id_for_map_value(void)
+{
+ asm volatile (" \
+ r6 = r1; \
+ r6 = *(u32*)(r6 + %[__sk_buff_mark]); \
+ r1 = %[map_spin_lock] ll; \
+ r9 = r1; \
+ r2 = 0; \
+ *(u32*)(r10 - 4) = r2; \
+ r2 = r10; \
+ r2 += -4; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r7 = r0; \
+ r1 = r9; \
+ r2 = r10; \
+ r2 += -4; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l1_%=; \
+ exit; \
+l1_%=: r8 = r0; \
+ r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ if r6 == 0 goto l2_%=; \
+ goto l3_%=; \
+l2_%=: r7 = r8; \
+l3_%=: r1 = r7; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+/* Make sure that regsafe() compares ids for spin lock records using
+ * check_ids():
+ * 1: r9 = map_lookup_elem(...) ; r9.id == 1
+ * 2: r8 = map_lookup_elem(...) ; r8.id == 2
+ * 3: r7 = ktime_get_ns()
+ * 4: r6 = ktime_get_ns()
+ * 5: if r6 > r7 goto <9>
+ * 6: spin_lock(r8)
+ * 7: r9 = r8
+ * 8: goto <10>
+ * 9: spin_lock(r9)
+ * 10: spin_unlock(r9) ; r9.id == 1 || r9.id == 2 and lock is active,
+ * ; second visit to (10) should be considered safe
+ * ; if check_ids() is used.
+ * 11: exit(0)
+ */
+
+SEC("cgroup/skb")
+__description("spin_lock: regsafe() check_ids() similar id mappings")
+__success __msg("29: safe")
+__failure_unpriv __msg_unpriv("")
+__log_level(2) __retval(0) __flag(BPF_F_TEST_STATE_FREQ)
+__naked void check_ids_similar_id_mappings(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u32*)(r10 - 4) = r1; \
+ /* r9 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r9 = r0; \
+ /* r8 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -4; \
+ r1 = %[map_spin_lock] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l1_%=; \
+ r8 = r0; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* if r6 > r7 goto +5 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * spin_lock(r8) \
+ * r9 = r8 \
+ * goto unlock \
+ */ \
+ if r6 > r7 goto l2_%=; \
+ r1 = r8; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+ r9 = r8; \
+ goto l3_%=; \
+l2_%=: /* spin_lock(r9) */ \
+ r1 = r9; \
+ r1 += 4; \
+ call %[bpf_spin_lock]; \
+l3_%=: /* spin_unlock(r9) */ \
+ r1 = r9; \
+ r1 += 4; \
+ call %[bpf_spin_unlock]; \
+l0_%=: /* exit(0) */ \
+ r0 = 0; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm(bpf_spin_lock),
+ __imm(bpf_spin_unlock),
+ __imm_addr(map_spin_lock)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
new file mode 100644
index 000000000000..417c61cd4b19
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/stack_ptr.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <limits.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+SEC("socket")
+__description("PTR_TO_STACK store/load")
+__success __success_unpriv __retval(0xfaceb00c)
+__naked void ptr_to_stack_store_load(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on off")
+__failure __msg("misaligned stack access off 0+-8+2 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_off(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 2) = r0; \
+ r0 = *(u64*)(r1 + 2); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - bad alignment on reg")
+__failure __msg("misaligned stack access off 0+-10+8 size 8")
+__failure_unpriv
+__naked void load_bad_alignment_on_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -10; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds low")
+__failure __msg("invalid write to stack R1 off=-79992 size=8")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void load_out_of_bounds_low(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -80000; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK store/load - out of bounds high")
+__failure __msg("invalid write to stack R1 off=0 size=8")
+__failure_unpriv
+__naked void load_out_of_bounds_high(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -8; \
+ r0 = 0xfaceb00c; \
+ *(u64*)(r1 + 8) = r0; \
+ r0 = *(u64*)(r1 + 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -1; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 2")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_high_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 3")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_high_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 - 1) = r0; \
+ r0 = *(u8*)(r1 - 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 4")
+__failure __msg("invalid write to stack R1 off=0 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += 0; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 5")
+__failure __msg("invalid write to stack R1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check high 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_high_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + %[shrt_max]) = r0; \
+ r0 = *(u8*)(r1 + %[shrt_max]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, (1 << 29) - 1),
+ __imm_const(shrt_max, SHRT_MAX)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 1")
+__success __success_unpriv __retval(42)
+__naked void to_stack_check_low_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -512; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 2")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(42)
+__naked void to_stack_check_low_2(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 1) = r0; \
+ r0 = *(u8*)(r1 + 1); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 3")
+__failure __msg("invalid write to stack R1 off=-513 size=1")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -513; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 4")
+__failure __msg("math between fp pointer")
+__failure_unpriv
+__naked void to_stack_check_low_4(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[int_min]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(int_min, INT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 5")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_5(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 6")
+__failure __msg("invalid write to stack")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_6(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK check low 7")
+__failure __msg("fp pointer offset")
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__naked void to_stack_check_low_7(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += %[__imm_0]; \
+ r1 += %[__imm_0]; \
+ r0 = 42; \
+ *(u8*)(r1 %[shrt_min]) = r0; \
+ r0 = *(u8*)(r1 %[shrt_min]); \
+ exit; \
+" :
+ : __imm_const(__imm_0, -((1 << 29) - 1)),
+ __imm_const(shrt_min, SHRT_MIN)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 1")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_1(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 2")
+__success __success_unpriv __retval(42)
+__naked void stack_mixed_reg_k_2(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ r0 = 0; \
+ *(u64*)(r10 - 16) = r0; \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r5 = r10; \
+ r0 = *(u8*)(r5 - 6); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK mixed reg/k, 3")
+__success __success_unpriv __retval(-3)
+__naked void stack_mixed_reg_k_3(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r1 += -3; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("PTR_TO_STACK reg")
+__success __success_unpriv __retval(42)
+__naked void ptr_to_stack_reg(void)
+{
+ asm volatile (" \
+ r1 = r10; \
+ r2 = -3; \
+ r1 += r2; \
+ r0 = 42; \
+ *(u8*)(r1 + 0) = r0; \
+ r0 = *(u8*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("stack pointer arithmetic")
+__success __success_unpriv __retval(0)
+__naked void stack_pointer_arithmetic(void)
+{
+ asm volatile (" \
+ r1 = 4; \
+ goto l0_%=; \
+l0_%=: r7 = r10; \
+ r7 += -10; \
+ r7 += -10; \
+ r2 = r7; \
+ r2 += r1; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r2 = r7; \
+ r2 += 8; \
+ r0 = 0; \
+ *(u32*)(r2 + 4) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("store PTR_TO_STACK in R10 to array map using BPF_B")
+__success __retval(42)
+__naked void array_map_using_bpf_b(void)
+{
+ asm volatile (" \
+ /* Load pointer to map. */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ r0 = 2; \
+ exit; \
+l0_%=: r1 = r0; \
+ /* Copy R10 to R9. */ \
+ r9 = r10; \
+ /* Pollute other registers with unaligned values. */\
+ r2 = -1; \
+ r3 = -1; \
+ r4 = -1; \
+ r5 = -1; \
+ r6 = -1; \
+ r7 = -1; \
+ r8 = -1; \
+ /* Store both R9 and R10 with BPF_B and read back. */\
+ *(u8*)(r1 + 0) = r10; \
+ r2 = *(u8*)(r1 + 0); \
+ *(u8*)(r1 + 0) = r9; \
+ r3 = *(u8*)(r1 + 0); \
+ /* Should read back as same value. */ \
+ if r2 == r3 goto l1_%=; \
+ r0 = 1; \
+ exit; \
+l1_%=: r0 = 42; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
new file mode 100644
index 000000000000..6f5d19665cf6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c
@@ -0,0 +1,709 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */
+
+#include <errno.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
+int vals[] SEC(".data.vals") = {1, 2, 3, 4};
+
+__naked __noinline __used
+static unsigned long identity_subprog()
+{
+ /* the simplest *static* 64-bit identity function */
+ asm volatile (
+ "r0 = r1;"
+ "exit;"
+ );
+}
+
+__noinline __used
+unsigned long global_identity_subprog(__u64 x)
+{
+ /* the simplest *global* 64-bit identity function */
+ return x;
+}
+
+__naked __noinline __used
+static unsigned long callback_subprog()
+{
+ /* the simplest callback function */
+ asm volatile (
+ "r0 = 0;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* now use subprog's returned value (which is a
+ * r6 -> r1 -> r0 chain), as index into vals array, forcing
+ * all of that to be known precisely
+ */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (a5) if r0 < 0x4 goto pc+1")
+__msg("mark_precise: frame0: regs=r0 stack= before 4: (85) call pc+7")
+__naked int global_subprog_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * given global_identity_subprog is global, precision won't
+ * propagate all the way back to r6
+ */
+ "r1 = r6;"
+ "call global_identity_subprog;"
+ /* now use subprog's returned value (which is unknown now, so
+ * we need to clamp it), as index into vals array, forcing r0
+ * to be marked precise (with no effect on r6, though)
+ */
+ "if r0 < %[vals_arr_sz] goto 1f;"
+ "r0 = %[vals_arr_sz] - 1;"
+ "1:"
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0 is forced to be precise and has to be
+ * propagated back to the global subprog call, but it
+ * shouldn't go all the way to mark r6 as precise
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_const(vals_arr_sz, ARRAY_SIZE(vals))
+ : __clobber_common, "r6"
+ );
+}
+
+__naked __noinline __used
+static unsigned long loop_callback_bad()
+{
+ /* bpf_loop() callback that can return values outside of [0, 1] range */
+ asm volatile (
+ "call %[bpf_get_prandom_u32];"
+ "if r0 s> 1000 goto 1f;"
+ "r0 = 0;"
+ "1:"
+ "goto +0;" /* checkpoint */
+ /* bpf_loop() expects [0, 1] values, so branch above skipping
+ * r0 = 0; should lead to a failure, but if exit instruction
+ * doesn't enforce r0's precision, this callback will be
+ * successfully verified
+ */
+ "exit;"
+ :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__failure __log_level(2)
+__flag(BPF_F_TEST_STATE_FREQ)
+/* check that fallthrough code path marks r0 as precise */
+__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0")
+/* check that we have branch code path doing its own validation */
+__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001")
+/* check that branch code path marks r0 as precise, before failing */
+__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7")
+__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]")
+__naked int callback_precise_return_fail(void)
+{
+ asm volatile (
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[loop_callback_bad];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r0 = 0;"
+ "exit;"
+ :
+ : __imm_ptr(loop_callback_bad),
+ __imm(bpf_loop)
+ : __clobber_common
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body,
+ * r1 and r4 are always precise for bpf_loop() calls.
+ */
+__msg("9: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r4 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r4 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: parent state regs=r1 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r1 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r1 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r1 stack= before 5: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* r6 precision propagation */
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (25) if r6 > 0x3 goto pc+4")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (bf) r6 = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 9: (85) call bpf_loop")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+__msg("frame 0: propagating r1,r4")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack= before 18: (95) exit")
+__msg("from 18 to 9: safe")
+__naked int callback_result_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and use result; r0 shouldn't propagate back to
+ * callback_subprog
+ */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 = r0;"
+ "if r6 > 3 goto 1f;"
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the bpf_loop() call, but not beyond
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "1:"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 10: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("7: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 7 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (85) call pc+5")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_callee_saved_reg_precise_global(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body */
+__msg("12: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 12 first_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 10: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs=r6 stack=:")
+__msg("mark_precise: frame0: last_idx 8 first_idx 0 subseq_idx 9")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs=r6 stack= before 5: (b7) r1 = 1")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 9 to 15: frame1:")
+__msg("15: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("15: (b7) r0 = 0")
+__msg("16: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 9:")
+/* r1, r4 are always precise for bpf_loop(),
+ * r6 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 9 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4,r6 stack= before 16: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 15: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 9: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 16 to 9: safe")
+__naked int parent_callee_saved_reg_precise_with_callback(void)
+{
+ asm volatile (
+ "r6 = 3;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 1;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) callback call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 6")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 13 first_idx 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 13: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 12: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs= stack= before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("9: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0")
+__msg("mark_precise: frame0: regs=r6 stack= before 8: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 7: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 6: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (85) call pc+6")
+__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r1 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 3: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 3")
+__naked int parent_stack_slot_precise_global(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* call subprog and ignore result; we need this call only to
+ * complicate jump history
+ */
+ "r1 = 0;"
+ "call global_identity_subprog;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+/* First simulated path does not include callback body */
+__msg("14: (0f) r1 += r6")
+__msg("mark_precise: frame0: last_idx 14 first_idx 10")
+__msg("mark_precise: frame0: regs=r6 stack= before 13: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r6 stack= before 12: (27) r6 *= 4")
+__msg("mark_precise: frame0: regs=r6 stack= before 11: (79) r6 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: regs= stack=-8 before 10: (85) call bpf_loop")
+__msg("mark_precise: frame0: parent state regs= stack=-8:")
+__msg("mark_precise: frame0: last_idx 9 first_idx 0 subseq_idx 10")
+__msg("mark_precise: frame0: regs= stack=-8 before 9: (b7) r4 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 8: (b7) r3 = 0")
+__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r2 = r8")
+__msg("mark_precise: frame0: regs= stack=-8 before 6: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -8) = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (b7) r6 = 3")
+/* State entering callback body popped from states stack */
+__msg("from 10 to 17: frame1:")
+__msg("17: frame1: R1=scalar() R2=0 R10=fp0 cb")
+__msg("17: (b7) r0 = 0")
+__msg("18: (95) exit")
+__msg("returning from callee:")
+__msg("to caller at 10:")
+/* r1, r4 are always precise for bpf_loop(),
+ * fp-8 was marked before backtracking to callback body.
+ */
+__msg("frame 0: propagating r1,r4,fp-8")
+__msg("mark_precise: frame0: last_idx 10 first_idx 10 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r1,r4 stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (b7) r0 = 0")
+__msg("mark_precise: frame1: regs= stack= before 10: (85) call bpf_loop#181")
+__msg("mark_precise: frame0: parent state regs= stack=:")
+__msg("from 18 to 10: safe")
+__naked int parent_stack_slot_precise_with_callback(void)
+{
+ asm volatile (
+ /* spill reg */
+ "r6 = 3;"
+ "*(u64 *)(r10 - 8) = r6;"
+
+ /* ensure we have callback frame in jump history */
+ "r1 = r6;" /* nr_loops */
+ "r2 = %[callback_subprog];" /* callback_fn */
+ "r3 = 0;" /* callback_ctx */
+ "r4 = 0;" /* flags */
+ "call %[bpf_loop];"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r6 = *(u64 *)(r10 - 8);"
+
+ "r6 *= 4;"
+ "r1 = %[vals];"
+ /* here r6 is forced to be precise and has to be propagated
+ * back to the beginning, handling (and ignoring) subprog call
+ */
+ "r1 += r6;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals),
+ __imm_ptr(callback_subprog),
+ __imm(bpf_loop)
+ : __clobber_common, "r6"
+ );
+}
+
+__noinline __used
+static __u64 subprog_with_precise_arg(__u64 x)
+{
+ return vals[x]; /* x is forced to be precise */
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("8: (0f) r2 += r1")
+__msg("mark_precise: frame1: last_idx 8 first_idx 0")
+__msg("mark_precise: frame1: regs=r1 stack= before 6: (18) r2 = ")
+__msg("mark_precise: frame1: regs=r1 stack= before 5: (67) r1 <<= 2")
+__msg("mark_precise: frame1: regs=r1 stack= before 2: (85) call pc+2")
+__msg("mark_precise: frame0: regs=r1 stack= before 1: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 0: (b7) r6 = 3")
+__naked int subprog_arg_precise(void)
+{
+ asm volatile (
+ "r6 = 3;"
+ "r1 = r6;"
+ /* subprog_with_precise_arg expects its argument to be
+ * precise, so r1->r6 will be marked precise from inside the
+ * subprog
+ */
+ "call subprog_with_precise_arg;"
+ "r0 += r6;"
+ "exit;"
+ :
+ :
+ : __clobber_common, "r6"
+ );
+}
+
+/* r1 is pointer to stack slot;
+ * r2 is a register to spill into that slot
+ * subprog also spills r2 into its own stack slot
+ */
+__naked __noinline __used
+static __u64 subprog_spill_reg_precise(void)
+{
+ asm volatile (
+ /* spill to parent stack */
+ "*(u64 *)(r1 + 0) = r2;"
+ /* spill to subprog stack (we use -16 offset to avoid
+ * accidental confusion with parent's -8 stack slot in
+ * verifier log output)
+ */
+ "*(u64 *)(r10 - 16) = r2;"
+ /* use both spills as return result to propagete precision everywhere */
+ "r0 = *(u64 *)(r10 - 16);"
+ "r2 = *(u64 *)(r1 + 0);"
+ "r0 += r2;"
+ "exit;"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("10: (0f) r1 += r7")
+__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8")
+__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4")
+__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)")
+__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1")
+__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7")
+__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit")
+__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2")
+__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)")
+__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2")
+__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2")
+__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6")
+__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8")
+__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int subprog_spill_into_parent_stack_slot_precise(void)
+{
+ asm volatile (
+ "r6 = 1;"
+
+ /* pass pointer to stack slot and r6 to subprog;
+ * r6 will be marked precise and spilled into fp-8 slot, which
+ * also should be marked precise
+ */
+ "r1 = r10;"
+ "r1 += -8;"
+ "r2 = r6;"
+ "call subprog_spill_reg_precise;"
+
+ /* restore reg from stack; in this case we'll be carrying
+ * stack mask when going back into subprog through jump
+ * history
+ */
+ "r7 = *(u64 *)(r10 - 8);"
+
+ "r7 *= 4;"
+ "r1 = %[vals];"
+ /* here r7 is forced to be precise and has to be propagated
+ * back to the beginning, handling subprog call and logic
+ */
+ "r1 += r7;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6", "r7"
+ );
+}
+
+SEC("?raw_tp")
+__success __log_level(2)
+__msg("17: (0f) r1 += r0")
+__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1")
+__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7")
+__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4")
+__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)")
+__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)")
+__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0")
+__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32")
+__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8")
+__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10")
+__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit")
+__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1")
+__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15")
+__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6")
+__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1")
+__naked int stack_slot_aliases_precision(void)
+{
+ asm volatile (
+ "r6 = 1;"
+ /* pass r6 through r1 into subprog to get it back as r0;
+ * this whole chain will have to be marked as precise later
+ */
+ "r1 = r6;"
+ "call identity_subprog;"
+ /* let's setup two registers that are aliased to r10 */
+ "r7 = r10;"
+ "r7 += -8;" /* r7 = r10 - 8 */
+ "r8 = r10;"
+ "r8 += -32;" /* r8 = r10 - 32 */
+ /* now spill subprog's return value (a r6 -> r1 -> r0 chain)
+ * a few times through different stack pointer regs, making
+ * sure to use r10, r7, and r8 both in LDX and STX insns, and
+ * *importantly* also using a combination of const var_off and
+ * insn->off to validate that we record final stack slot
+ * correctly, instead of relying on just insn->off derivation,
+ * which is only valid for r10-based stack offset
+ */
+ "*(u64 *)(r10 - 16) = r0;"
+ "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */
+ "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */
+ "r0 = *(u64 *)(r8 + 16);"
+ "*(u64 *)(r7 - 8) = r0;"
+ "r0 = *(u64 *)(r10 - 16);"
+ /* get ready to use r0 as an index into array to force precision */
+ "r0 *= 4;"
+ "r1 = %[vals];"
+ /* here r0->r1->r6 chain is forced to be precise and has to be
+ * propagated back to the beginning, including through the
+ * subprog call and all the stack spills and loads
+ */
+ "r1 += r0;"
+ "r0 = *(u32 *)(r1 + 0);"
+ "exit;"
+ :
+ : __imm_ptr(vals)
+ : __clobber_common, "r6"
+ );
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_subreg.c b/tools/testing/selftests/bpf/progs/verifier_subreg.c
new file mode 100644
index 000000000000..8613ea160dcd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_subreg.c
@@ -0,0 +1,673 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/subreg.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+/* This file contains sub-register zero extension checks for insns defining
+ * sub-registers, meaning:
+ * - All insns under BPF_ALU class. Their BPF_ALU32 variants or narrow width
+ * forms (BPF_END) could define sub-registers.
+ * - Narrow direct loads, BPF_B/H/W | BPF_LDX.
+ * - BPF_LD is not exposed to JIT back-ends, so no need for testing.
+ *
+ * "get_prandom_u32" is used to initialize low 32-bit of some registers to
+ * prevent potential optimizations done by verifier or JIT back-ends which could
+ * optimize register back into constant when range info shows one register is a
+ * constant.
+ */
+
+SEC("socket")
+__description("add32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void add32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000000 ll; \
+ w0 += w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("add32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void add32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ /* An insn could have no effect on the low 32-bit, for example:\
+ * a = a + 0 \
+ * a = a | 0 \
+ * a = a & -1 \
+ * But, they should still zero high 32-bit. \
+ */ \
+ w0 += 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 += -2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sub32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void sub32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x1ffffffff ll; \
+ w0 -= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sub32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void sub32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 -= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 -= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mul32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mul32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000001 ll; \
+ w0 *= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mul32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mul32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 *= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 *= -1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("div32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void div32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = -1; \
+ w0 /= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("div32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void div32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 /= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 /= 2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("or32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void or32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000001 ll; \
+ w0 |= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("or32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void or32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 |= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 |= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("and32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void and32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r1 |= r0; \
+ r0 = 0x1ffffffff ll; \
+ w0 &= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("and32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void and32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 &= -1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 &= -2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("lsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void lsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 <<= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("lsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void lsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 <<= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 <<= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("rsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void rsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 >>= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("rsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void rsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 >>= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 >>= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("neg32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void neg32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = -w0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mod32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mod32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = -1; \
+ w0 %%= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mod32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mod32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 %%= 1; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 %%= 2; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("xor32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void xor32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = r0; \
+ r0 = 0x100000000 ll; \
+ w0 ^= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("xor32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void xor32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 ^= 1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mov32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mov32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x100000000 ll; \
+ r1 |= r0; \
+ r0 = 0x100000000 ll; \
+ w0 = w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("mov32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void mov32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 = 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void arsh32_reg_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r1 = 1; \
+ w0 s>>= w1; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("arsh32 imm zero extend check")
+__success __success_unpriv __retval(0)
+__naked void arsh32_imm_zero_extend_check(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 s>>= 0; \
+ r0 >>= 32; \
+ r6 = r0; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ w0 s>>= 1; \
+ r0 >>= 32; \
+ r0 |= r6; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end16 (to_le) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void le_reg_zero_extend_check_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = le16 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end32 (to_le) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void le_reg_zero_extend_check_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = le32 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end16 (to_be) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void be_reg_zero_extend_check_1(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = be16 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("end32 (to_be) reg zero extend check")
+__success __success_unpriv __retval(0)
+__naked void be_reg_zero_extend_check_2(void)
+{
+ asm volatile (" \
+ call %[bpf_get_prandom_u32]; \
+ r6 = r0; \
+ r6 <<= 32; \
+ call %[bpf_get_prandom_u32]; \
+ r0 |= r6; \
+ r0 = be32 r0; \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_b zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_b_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u8*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_h zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_h_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u16*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("ldx_w zero extend check")
+__success __success_unpriv __retval(0)
+__naked void ldx_w_zero_extend_check(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -4; \
+ r7 = 0xfaceb00c; \
+ *(u32*)(r6 + 0) = r7; \
+ call %[bpf_get_prandom_u32]; \
+ r1 = 0x1000000000 ll; \
+ r0 |= r1; \
+ r0 = *(u32*)(r6 + 0); \
+ r0 >>= 32; \
+ exit; \
+" :
+ : __imm(bpf_get_prandom_u32)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_typedef.c b/tools/testing/selftests/bpf/progs/verifier_typedef.c
new file mode 100644
index 000000000000..08481cfaac4b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_typedef.c
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("fentry/bpf_fentry_test_sinfo")
+__description("typedef: resolve")
+__success __retval(0)
+__naked void resolve_typedef(void)
+{
+ asm volatile (" \
+ r1 = *(u64 *)(r1 +0); \
+ r2 = *(u64 *)(r1 +%[frags_offs]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(frags_offs,
+ offsetof(struct skb_shared_info, frags))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_uninit.c b/tools/testing/selftests/bpf/progs/verifier_uninit.c
new file mode 100644
index 000000000000..7718cd7d19ce
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_uninit.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/uninit.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+SEC("socket")
+__description("read uninitialized register")
+__failure __msg("R2 !read_ok")
+__failure_unpriv
+__naked void read_uninitialized_register(void)
+{
+ asm volatile (" \
+ r0 = r2; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("read invalid register")
+__failure __msg("R15 is invalid")
+__failure_unpriv
+__naked void read_invalid_register(void)
+{
+ asm volatile (" \
+ .8byte %[mov64_reg]; \
+ exit; \
+" :
+ : __imm_insn(mov64_reg, BPF_MOV64_REG(BPF_REG_0, -1))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit")
+__failure __msg("R0 !read_ok")
+__failure_unpriv
+__naked void t_init_r0_before_exit(void)
+{
+ asm volatile (" \
+ r2 = r1; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("program doesn't init R0 before exit in all branches")
+__failure __msg("R0 !read_ok")
+__msg_unpriv("R1 pointer comparison")
+__naked void before_exit_in_all_branches(void)
+{
+ asm volatile (" \
+ if r1 >= 0 goto l0_%=; \
+ r0 = 1; \
+ r0 += 2; \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv.c b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
new file mode 100644
index 000000000000..7ea535bfbacd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/unpriv.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "../../../include/linux/filter.h"
+#include "bpf_misc.h"
+
+#define BPF_SK_LOOKUP(func) \
+ /* struct bpf_sock_tuple tuple = {} */ \
+ "r2 = 0;" \
+ "*(u32*)(r10 - 8) = r2;" \
+ "*(u64*)(r10 - 16) = r2;" \
+ "*(u64*)(r10 - 24) = r2;" \
+ "*(u64*)(r10 - 32) = r2;" \
+ "*(u64*)(r10 - 40) = r2;" \
+ "*(u64*)(r10 - 48) = r2;" \
+ /* sk = func(ctx, &tuple, sizeof tuple, 0, 0) */ \
+ "r2 = r10;" \
+ "r2 += -48;" \
+ "r3 = %[sizeof_bpf_sock_tuple];"\
+ "r4 = 0;" \
+ "r5 = 0;" \
+ "call %[" #func "];"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+void dummy_prog_42_socket(void);
+void dummy_prog_24_socket(void);
+void dummy_prog_loop1_socket(void);
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 4);
+ __uint(key_size, sizeof(int));
+ __array(values, void (void));
+} map_prog1_socket SEC(".maps") = {
+ .values = {
+ [0] = (void *)&dummy_prog_42_socket,
+ [1] = (void *)&dummy_prog_loop1_socket,
+ [2] = (void *)&dummy_prog_24_socket,
+ },
+};
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_42_socket(void)
+{
+ asm volatile ("r0 = 42; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_24_socket(void)
+{
+ asm volatile ("r0 = 24; exit;");
+}
+
+SEC("socket")
+__auxiliary __auxiliary_unpriv
+__naked void dummy_prog_loop1_socket(void)
+{
+ asm volatile (" \
+ r3 = 1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 41; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: return pointer")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(POINTER_VALUE)
+__naked void unpriv_return_pointer(void)
+{
+ asm volatile (" \
+ r0 = r10; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: add const to pointer")
+__success __success_unpriv __retval(0)
+__naked void unpriv_add_const_to_pointer(void)
+{
+ asm volatile (" \
+ r1 += 8; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: add pointer to pointer")
+__failure __msg("R1 pointer += pointer")
+__failure_unpriv
+__naked void unpriv_add_pointer_to_pointer(void)
+{
+ asm volatile (" \
+ r1 += r10; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: neg pointer")
+__success __failure_unpriv __msg_unpriv("R1 pointer arithmetic")
+__retval(0)
+__naked void unpriv_neg_pointer(void)
+{
+ asm volatile (" \
+ r1 = -r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp pointer with const")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_pointer_with_const(void)
+{
+ asm volatile (" \
+ if r1 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp pointer with pointer")
+__success __failure_unpriv __msg_unpriv("R10 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_pointer_with_pointer(void)
+{
+ asm volatile (" \
+ if r1 == r10 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tracepoint")
+__description("unpriv: check that printk is disallowed")
+__success
+__naked void check_that_printk_is_disallowed(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r1 = r10; \
+ r1 += -8; \
+ r2 = 8; \
+ r3 = r1; \
+ call %[bpf_trace_printk]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_trace_printk)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: pass pointer to helper function")
+__success __failure_unpriv __msg_unpriv("R4 leaks addr")
+__retval(0)
+__naked void pass_pointer_to_helper_function(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r3 = r2; \
+ r4 = r2; \
+ call %[bpf_map_update_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_update_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: indirectly pass pointer on stack to helper function")
+__success __failure_unpriv
+__msg_unpriv("invalid indirect read from stack R2 off -8+0 size 8")
+__retval(0)
+__naked void on_stack_to_helper_function(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: mangle pointer on stack 1")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(0)
+__naked void mangle_pointer_on_stack_1(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = 0; \
+ *(u32*)(r10 - 8) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: mangle pointer on stack 2")
+__success __failure_unpriv __msg_unpriv("attempt to corrupt spilled")
+__retval(0)
+__naked void mangle_pointer_on_stack_2(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = 0; \
+ *(u8*)(r10 - 1) = r0; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: read pointer from stack in small chunks")
+__failure __msg("invalid size")
+__failure_unpriv
+__naked void from_stack_in_small_chunks(void)
+{
+ asm volatile (" \
+ *(u64*)(r10 - 8) = r10; \
+ r0 = *(u32*)(r10 - 8); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write pointer into ctx")
+__failure __msg("invalid bpf_context access")
+__failure_unpriv __msg_unpriv("R1 leaks addr")
+__naked void unpriv_write_pointer_into_ctx(void)
+{
+ asm volatile (" \
+ *(u64*)(r1 + 0) = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: spill/fill of ctx")
+__success __success_unpriv __retval(0)
+__naked void unpriv_spill_fill_of_ctx(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r1 = *(u64*)(r6 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 2")
+__success __retval(0)
+__naked void spill_fill_of_ctx_2(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 3")
+__failure __msg("R1 type=fp expected=ctx")
+__naked void spill_fill_of_ctx_3(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ *(u64*)(r6 + 0) = r10; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of ctx 4")
+__failure __msg("R1 type=scalar expected=ctx")
+__naked void spill_fill_of_ctx_4(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r1; \
+ r0 = 1; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ r1 = *(u64*)(r6 + 0); \
+ call %[bpf_get_hash_recalc]; \
+ exit; \
+" :
+ : __imm(bpf_get_hash_recalc)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_stx(void)
+{
+ asm volatile (" \
+ r3 = 42; \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += -16; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark))
+ : __clobber_all);
+}
+
+/* Same as above, but use BPF_ST_MEM to save 42
+ * instead of BPF_STX_MEM.
+ */
+SEC("tc")
+__description("unpriv: spill/fill of different pointers st")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_st(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += -16; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ .8byte %[st_mem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_insn(st_mem,
+ BPF_ST_MEM(BPF_W, BPF_REG_1, offsetof(struct __sk_buff, mark), 42))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - ctx and sock")
+__failure __msg("type=ctx expected=sock")
+__naked void pointers_stx_ctx_and_sock(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb == NULL) *target = sock; */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: /* else *target = skb; */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: /* struct __sk_buff *skb = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* skb->mark = 42; */ \
+ r3 = 42; \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ /* if (sk) bpf_sk_release(sk) */ \
+ if r1 == 0 goto l2_%=; \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - leak sock")
+__failure
+//.errstr = "same insn cannot be used with different pointers",
+__msg("Unreleased reference")
+__naked void different_pointers_stx_leak_sock(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb == NULL) *target = sock; */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: /* else *target = skb; */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: /* struct __sk_buff *skb = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* skb->mark = 42; */ \
+ r3 = 42; \
+ *(u32*)(r1 + %[__sk_buff_mark]) = r3; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm_const(__sk_buff_mark, offsetof(struct __sk_buff, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - sock and ctx (read)")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void stx_sock_and_ctx_read(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb) *target = skb */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: /* else *target = sock */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l1_%=: /* struct bpf_sock *sk = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */\
+ if r1 == 0 goto l2_%=; \
+ r3 = *(u32*)(r1 + %[bpf_sock_mark]); \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("unpriv: spill/fill of different pointers stx - sock and ctx (write)")
+__failure
+//.errstr = "same insn cannot be used with different pointers",
+__msg("cannot write into sock")
+__naked void stx_sock_and_ctx_write(void)
+{
+ asm volatile (" \
+ r8 = r1; \
+ /* struct bpf_sock *sock = bpf_sock_lookup(...); */\
+" BPF_SK_LOOKUP(bpf_sk_lookup_tcp)
+" r2 = r0; \
+ /* u64 foo; */ \
+ /* void *target = &foo; */ \
+ r6 = r10; \
+ r6 += -8; \
+ r1 = r8; \
+ /* if (skb) *target = skb */ \
+ if r1 == 0 goto l0_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l0_%=: /* else *target = sock */ \
+ if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r2; \
+l1_%=: /* struct bpf_sock *sk = *target; */ \
+ r1 = *(u64*)(r6 + 0); \
+ /* if (sk) sk->mark = 42; bpf_sk_release(sk); */\
+ if r1 == 0 goto l2_%=; \
+ r3 = 42; \
+ *(u32*)(r1 + %[bpf_sock_mark]) = r3; \
+ call %[bpf_sk_release]; \
+l2_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_sk_lookup_tcp),
+ __imm(bpf_sk_release),
+ __imm_const(bpf_sock_mark, offsetof(struct bpf_sock, mark)),
+ __imm_const(sizeof_bpf_sock_tuple, sizeof(struct bpf_sock_tuple))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write pointer into map elem value")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void pointer_into_map_elem_value(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ *(u64*)(r0 + 0) = r0; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("alu32: mov u32 const")
+__success __failure_unpriv __msg_unpriv("R7 invalid mem access 'scalar'")
+__retval(0)
+__naked void alu32_mov_u32_const(void)
+{
+ asm volatile (" \
+ w7 = 0; \
+ w7 &= 1; \
+ w0 = w7; \
+ if r0 == 0 goto l0_%=; \
+ r0 = *(u64*)(r7 + 0); \
+l0_%=: exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: partial copy of pointer")
+__success __failure_unpriv __msg_unpriv("R10 partial copy")
+__retval(0)
+__naked void unpriv_partial_copy_of_pointer(void)
+{
+ asm volatile (" \
+ w1 = w10; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: pass pointer to tail_call")
+__success __failure_unpriv __msg_unpriv("R3 leaks addr into helper")
+__retval(0)
+__naked void pass_pointer_to_tail_call(void)
+{
+ asm volatile (" \
+ r3 = r1; \
+ r2 = %[map_prog1_socket] ll; \
+ call %[bpf_tail_call]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_tail_call),
+ __imm_addr(map_prog1_socket)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp map pointer with zero")
+__success __failure_unpriv __msg_unpriv("R1 pointer comparison")
+__retval(0)
+__naked void cmp_map_pointer_with_zero(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ r1 = %[map_hash_8b] ll; \
+ if r1 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: write into frame pointer")
+__failure __msg("frame pointer is read only")
+__failure_unpriv
+__naked void unpriv_write_into_frame_pointer(void)
+{
+ asm volatile (" \
+ r10 = r1; \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: spill/fill frame pointer")
+__failure __msg("frame pointer is read only")
+__failure_unpriv
+__naked void unpriv_spill_fill_frame_pointer(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ *(u64*)(r6 + 0) = r10; \
+ r10 = *(u64*)(r6 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp of frame pointer")
+__success __failure_unpriv __msg_unpriv("R10 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_of_frame_pointer(void)
+{
+ asm volatile (" \
+ if r10 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: adding of fp, reg")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(0)
+__naked void unpriv_adding_of_fp_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = 0; \
+ r1 += r10; \
+ *(u64*)(r1 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: adding of fp, imm")
+__success __failure_unpriv
+__msg_unpriv("R1 stack pointer arithmetic goes out of range")
+__retval(0)
+__naked void unpriv_adding_of_fp_imm(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = r10; \
+ r1 += 0; \
+ *(u64*)(r1 - 8) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("unpriv: cmp of stack pointer")
+__success __failure_unpriv __msg_unpriv("R2 pointer comparison")
+__retval(0)
+__naked void unpriv_cmp_of_stack_pointer(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ if r2 == 0 goto l0_%=; \
+l0_%=: r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c b/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c
new file mode 100644
index 000000000000..4d77407a0a79
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_unpriv_perf.c
@@ -0,0 +1,34 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/unpriv.c */
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("perf_event")
+__description("unpriv: spill/fill of different pointers ldx")
+__failure __msg("same insn cannot be used with different pointers")
+__naked void fill_of_different_pointers_ldx(void)
+{
+ asm volatile (" \
+ r6 = r10; \
+ r6 += -8; \
+ if r1 == 0 goto l0_%=; \
+ r2 = r10; \
+ r2 += %[__imm_0]; \
+ *(u64*)(r6 + 0) = r2; \
+l0_%=: if r1 != 0 goto l1_%=; \
+ *(u64*)(r6 + 0) = r1; \
+l1_%=: r1 = *(u64*)(r6 + 0); \
+ r1 = *(u64*)(r1 + %[sample_period]); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__imm_0,
+ -(__s32) offsetof(struct bpf_perf_event_data, sample_period) - 8),
+ __imm_const(sample_period,
+ offsetof(struct bpf_perf_event_data, sample_period))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value.c b/tools/testing/selftests/bpf/progs/verifier_value.c
new file mode 100644
index 000000000000..b5af6b6f5acd
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value store of cleared call register")
+__failure __msg("R1 !read_ok")
+__failure_unpriv __msg_unpriv("R1 !read_ok")
+__naked void store_of_cleared_call_register(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned store")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_store(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += 3; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = 43; \
+ *(u64*)(r0 + 2) = r1; \
+ r1 = 44; \
+ *(u64*)(r0 - 2) = r1; \
+ r8 = r0; \
+ r1 = 32; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 33; \
+ *(u64*)(r8 + 2) = r1; \
+ r1 = 34; \
+ *(u64*)(r8 - 2) = r1; \
+ r8 += 5; \
+ r1 = 22; \
+ *(u64*)(r8 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r8 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r8 - 7) = r1; \
+ r7 = r8; \
+ r7 += 3; \
+ r1 = 22; \
+ *(u64*)(r7 + 0) = r1; \
+ r1 = 23; \
+ *(u64*)(r7 + 4) = r1; \
+ r1 = 24; \
+ *(u64*)(r7 - 4) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value with unaligned load")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void element_value_with_unaligned_load(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[max_entries] goto l0_%=; \
+ r0 += 3; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 2); \
+ r8 = r0; \
+ r7 = *(u64*)(r8 + 0); \
+ r7 = *(u64*)(r8 + 2); \
+ r0 += 5; \
+ r7 = *(u64*)(r0 + 0); \
+ r7 = *(u64*)(r0 + 4); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(max_entries, MAX_ENTRIES)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += %[test_val_foo]; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
new file mode 100644
index 000000000000..d7a5ba9bbe6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_adj_spill.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_adj_spill.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value is preserved across register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_preserved_across_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 42; \
+ *(u64*)(r0 + 0) = r1; \
+ r1 = r10; \
+ r1 += -184; \
+ *(u64*)(r1 + 0) = r0; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value or null is marked on register spilling")
+__success __failure_unpriv __msg_unpriv("R0 leaks addr")
+__retval(0)
+__naked void is_marked_on_register_spilling(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r1 = r10; \
+ r1 += -152; \
+ *(u64*)(r1 + 0) = r0; \
+ if r0 == 0 goto l0_%=; \
+ r3 = *(u64*)(r1 + 0); \
+ r1 = 42; \
+ *(u64*)(r3 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
new file mode 100644
index 000000000000..a9ab37d3b9e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_illegal_alu.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_illegal_alu.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map element value illegal alu op, 1")
+__failure __msg("R0 bitwise operator &= on pointer")
+__failure_unpriv
+__naked void value_illegal_alu_op_1(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 &= 8; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 2")
+__failure __msg("R0 32-bit pointer arithmetic prohibited")
+__failure_unpriv
+__naked void value_illegal_alu_op_2(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ w0 += 0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 3")
+__failure __msg("R0 pointer arithmetic with /= operator")
+__failure_unpriv
+__naked void value_illegal_alu_op_3(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 /= 42; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 4")
+__failure __msg("invalid mem access 'scalar'")
+__failure_unpriv __msg_unpriv("R0 pointer arithmetic prohibited")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_illegal_alu_op_4(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 = be64 r0; \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map element value illegal alu op, 5")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("leaking pointer from stack off -8")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_illegal_alu_op_5(void)
+{
+ asm volatile (" \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = 0; \
+ *(u64*)(r2 + 0) = r1; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r3 = 4096; \
+ r2 = r10; \
+ r2 += -8; \
+ *(u64*)(r2 + 0) = r0; \
+ lock *(u64 *)(r2 + 0) += r3; \
+ r0 = *(u64*)(r2 + 0); \
+ r1 = 22; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b)
+ : __clobber_all);
+}
+
+SEC("flow_dissector")
+__description("flow_keys illegal alu op with variable offset")
+__failure __msg("R7 pointer arithmetic on flow_keys prohibited")
+__naked void flow_keys_illegal_variable_offset_alu(void)
+{
+ asm volatile(" \
+ r6 = r1; \
+ r7 = *(u64*)(r6 + %[flow_keys_off]); \
+ r8 = 8; \
+ r8 /= 1; \
+ r8 &= 8; \
+ r7 += r8; \
+ r0 = *(u64*)(r7 + 0); \
+ exit; \
+" :
+ : __imm_const(flow_keys_off, offsetof(struct __sk_buff, flow_keys))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_or_null.c b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
new file mode 100644
index 000000000000..8ff668a242eb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_or_null.c
@@ -0,0 +1,288 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_or_null.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("multiple registers share map_lookup_elem result")
+__success __retval(0)
+__naked void share_map_lookup_elem_result(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 1")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_1(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 += -2; \
+ r4 += 2; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 2")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_2(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 &= -1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("alu ops on ptr_to_map_value_or_null, 3")
+__failure __msg("R4 pointer arithmetic on map_value_or_null")
+__naked void map_value_or_null_3(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r4 <<= 1; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("invalid memory access with multiple map_lookup_elem calls")
+__failure __msg("R4 !read_ok")
+__naked void multiple_map_lookup_elem_calls(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r4 = r0; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("valid indirect map_lookup_elem access with 2nd lookup in branch")
+__success __retval(0)
+__naked void with_2nd_lookup_in_branch(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ r8 = r1; \
+ r7 = r2; \
+ call %[bpf_map_lookup_elem]; \
+ r2 = 10; \
+ if r2 != 0 goto l0_%=; \
+ r1 = r8; \
+ r2 = r7; \
+ call %[bpf_map_lookup_elem]; \
+l0_%=: r4 = r0; \
+ if r0 == 0 goto l1_%=; \
+ r1 = 0; \
+ *(u64*)(r4 + 0) = r1; \
+l1_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("invalid map access from else condition")
+__failure __msg("R0 unbounded memory access")
+__failure_unpriv __msg_unpriv("R0 leaks addr")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void map_access_from_else_condition(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ if r1 >= %[__imm_0] goto l1_%=; \
+ r1 += 1; \
+l1_%=: r1 <<= 2; \
+ r0 += r1; \
+ r1 = %[test_val_foo]; \
+ *(u64*)(r0 + 0) = r1; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_48b),
+ __imm_const(__imm_0, MAX_ENTRIES-1),
+ __imm_const(test_val_foo, offsetof(struct test_val, foo))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("map lookup and null branch prediction")
+__success __retval(0)
+__naked void lookup_and_null_branch_prediction(void)
+{
+ asm volatile (" \
+ r1 = 10; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r6 = r0; \
+ if r6 == 0 goto l0_%=; \
+ if r6 != 0 goto l0_%=; \
+ r10 += 10; \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("MAP_VALUE_OR_NULL check_ids() in regsafe()")
+__failure __msg("R8 invalid mem access 'map_value_or_null'")
+__failure_unpriv __msg_unpriv("")
+__flag(BPF_F_TEST_STATE_FREQ)
+__naked void null_check_ids_in_regsafe(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ /* r9 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r9 = r0; \
+ /* r8 = map_lookup_elem(...) */ \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r8 = r0; \
+ /* r7 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r7 = r0; \
+ /* r6 = ktime_get_ns() */ \
+ call %[bpf_ktime_get_ns]; \
+ r6 = r0; \
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from\
+ * ; this check, thus produced verifier states differ\
+ * ; only in 'insn_idx' \
+ * r9 = r8 ; optionally share ID between r9 and r8\
+ */ \
+ if r6 > r7 goto l0_%=; \
+ r9 = r8; \
+l0_%=: /* if r9 == 0 goto <exit> */ \
+ if r9 == 0 goto l1_%=; \
+ /* read map value via r8, this is not always \
+ * safe because r8 might be not equal to r9. \
+ */ \
+ r0 = *(u64*)(r8 + 0); \
+l1_%=: /* exit 0 */ \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_ktime_get_ns),
+ __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
new file mode 100644
index 000000000000..5ba6e53571c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_value_ptr_arith.c
@@ -0,0 +1,1423 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/value_ptr_arith.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <errno.h>
+#include "bpf_misc.h"
+
+#define MAX_ENTRIES 11
+
+struct test_val {
+ unsigned int index;
+ int foo[MAX_ENTRIES];
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, int);
+ __type(value, struct test_val);
+} map_array_48b SEC(".maps");
+
+struct other_val {
+ long long foo;
+ long long bar;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct other_val);
+} map_hash_16b SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, struct test_val);
+} map_hash_48b SEC(".maps");
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs const")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void value_ptr_unknown_vs_const(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 3; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs unknown")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void value_ptr_const_vs_unknown(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 3; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs const (ne)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_const_vs_const_ne(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 3; \
+ goto l4_%=; \
+l3_%=: r1 = 5; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr const vs const (eq)")
+__success __success_unpriv __retval(1)
+__naked void ptr_const_vs_const_eq(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 5; \
+ goto l4_%=; \
+l3_%=: r1 = 5; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (eq)")
+__success __success_unpriv __retval(1)
+__naked void ptr_unknown_vs_unknown_eq(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (lt)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_unknown_vs_unknown_lt(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x3; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr unknown vs unknown (gt)")
+__success __failure_unpriv
+__msg_unpriv("R1 tried to add from different maps, paths or scalars")
+__retval(1)
+__naked void ptr_unknown_vs_unknown_gt(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r4 = *(u8*)(r0 + 0); \
+ if r4 == 1 goto l3_%=; \
+ r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x7; \
+ goto l4_%=; \
+l3_%=: r1 = 6; \
+ r1 = -r1; \
+ r1 &= 0x3; \
+l4_%=: r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr from different maps")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_from_different_maps(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar from different maps")
+__success __failure_unpriv
+__msg_unpriv("R0 min value is outside of the allowed memory range")
+__retval(1)
+__naked void known_scalar_from_different_maps(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_16b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r0 -= r1; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_16b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr from different maps, but same value properties")
+__success __success_unpriv __retval(1)
+__naked void maps_but_same_value_properties(void)
+{
+ asm volatile (" \
+ r0 = *(u32*)(r1 + %[__sk_buff_len]); \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ if r0 == 1 goto l0_%=; \
+ r1 = %[map_hash_48b] ll; \
+ if r0 != 1 goto l1_%=; \
+l0_%=: r1 = %[map_array_48b] ll; \
+l1_%=: call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l2_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l2_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_addr(map_hash_48b),
+ __imm_const(__sk_buff_len, offsetof(struct __sk_buff, len))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: mixing value pointer and scalar, 1")
+__success __failure_unpriv __msg_unpriv("R2 pointer comparison prohibited")
+__retval(0)
+__naked void value_pointer_and_scalar_1(void)
+{
+ asm volatile (" \
+ /* load map value pointer into r0 and r2 */ \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* load some number from the map into r1 */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l1_%=; \
+ /* branch A */ \
+ r2 = r0; \
+ r3 = 0; \
+ goto l2_%=; \
+l1_%=: /* branch B */ \
+ r2 = 0; \
+ r3 = 0x100000; \
+l2_%=: /* common instruction */ \
+ r2 += r3; \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l3_%=; \
+ /* branch A */ \
+ goto l4_%=; \
+l3_%=: /* branch B */ \
+ r0 = 0x13371337; \
+ /* verifier follows fall-through */ \
+ if r2 != 0x100000 goto l4_%=; \
+ r0 = 0; \
+ exit; \
+l4_%=: /* fake-dead code; targeted from branch A to \
+ * prevent dead code sanitization \
+ */ \
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: mixing value pointer and scalar, 2")
+__success __failure_unpriv __msg_unpriv("R0 invalid mem access 'scalar'")
+__retval(0)
+__naked void value_pointer_and_scalar_2(void)
+{
+ asm volatile (" \
+ /* load map value pointer into r0 and r2 */ \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: /* load some number from the map into r1 */ \
+ r1 = *(u8*)(r0 + 0); \
+ /* depending on r1, branch: */ \
+ if r1 == 0 goto l1_%=; \
+ /* branch A */ \
+ r2 = 0; \
+ r3 = 0x100000; \
+ goto l2_%=; \
+l1_%=: /* branch B */ \
+ r2 = r0; \
+ r3 = 0; \
+l2_%=: /* common instruction */ \
+ r2 += r3; \
+ /* depending on r1, branch: */ \
+ if r1 != 0 goto l3_%=; \
+ /* branch A */ \
+ goto l4_%=; \
+l3_%=: /* branch B */ \
+ r0 = 0x13371337; \
+ /* verifier follows fall-through */ \
+ if r2 != 0x100000 goto l4_%=; \
+ r0 = 0; \
+ exit; \
+l4_%=: /* fake-dead code; targeted from branch A to \
+ * prevent dead code sanitization, rejected \
+ * via branch B however \
+ */ \
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 1")
+__success __success_unpriv __retval(0x100000)
+__naked void alu_with_different_scalars_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r2 = r10; \
+ r2 += -16; \
+ r6 = 0; \
+ *(u64*)(r10 - 16) = r6; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = *(u32*)(r0 + 0); \
+ if r1 == 0 goto l1_%=; \
+ r2 = 0; \
+ r3 = 0x100000; \
+ goto l2_%=; \
+l1_%=: r2 = 42; \
+ r3 = 0x100001; \
+l2_%=: r2 += r3; \
+ r0 = r2; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 2")
+__success __success_unpriv __retval(0)
+__naked void alu_with_different_scalars_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r1 = %[map_array_48b] ll; \
+ r6 = r1; \
+ r2 = r10; \
+ r2 += -16; \
+ r7 = 0; \
+ *(u64*)(r10 - 16) = r7; \
+ call %[bpf_map_delete_elem]; \
+ r7 = r0; \
+ r1 = r6; \
+ r2 = r10; \
+ r2 += -16; \
+ call %[bpf_map_delete_elem]; \
+ r6 = r0; \
+ r8 = r6; \
+ r8 += r7; \
+ r0 = r8; \
+ r0 += %[einval]; \
+ r0 += %[einval]; \
+ exit; \
+" :
+ : __imm(bpf_map_delete_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(einval, EINVAL)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("sanitation: alu with different scalars 3")
+__success __success_unpriv __retval(0)
+__naked void alu_with_different_scalars_3(void)
+{
+ asm volatile (" \
+ r0 = %[einval]; \
+ r0 *= -1; \
+ r7 = r0; \
+ r0 = %[einval]; \
+ r0 *= -1; \
+ r6 = r0; \
+ r8 = r6; \
+ r8 += r7; \
+ r0 = r8; \
+ r0 += %[einval]; \
+ r0 += %[einval]; \
+ exit; \
+" :
+ : __imm_const(einval, EINVAL)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 1")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void upper_oob_arith_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 48; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 2")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void upper_oob_arith_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 49; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, upper oob arith, test 3")
+__success __success_unpriv __retval(1)
+__naked void upper_oob_arith_test_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 1")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__naked void lower_oob_arith_test_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 48; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 2")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void lower_oob_arith_test_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 48; \
+ r0 -= r1; \
+ r1 = 1; \
+ r0 += r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, lower oob arith, test 3")
+__success __success_unpriv __retval(1)
+__naked void lower_oob_arith_test_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 47; \
+ r0 += r1; \
+ r1 = 47; \
+ r0 -= r1; \
+ r0 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar += value_ptr")
+__success __success_unpriv __retval(1)
+__naked void access_known_scalar_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 1")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 2")
+__failure __msg("invalid access to map value")
+__failure_unpriv
+__naked void value_ptr_known_scalar_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 49; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 3")
+__failure __msg("invalid access to map value")
+__failure_unpriv
+__naked void value_ptr_known_scalar_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 4")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 5; \
+ r0 += r1; \
+ r1 = -2; \
+ r0 += r1; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 5")
+__success __success_unpriv __retval(0xabcdef12)
+__naked void value_ptr_known_scalar_5(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(__imm_0, (6 + 1) * sizeof(int))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += known scalar, 6")
+__success __success_unpriv __retval(0xabcdef12)
+__naked void value_ptr_known_scalar_6(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = %[__imm_0]; \
+ r0 += r1; \
+ r1 = %[__imm_1]; \
+ r0 += r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b),
+ __imm_const(__imm_0, (3 + 1) * sizeof(int)),
+ __imm_const(__imm_1, 3 * sizeof(int))
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += N, value_ptr -= N known scalar")
+__success __success_unpriv __retval(0x12345678)
+__naked void value_ptr_n_known_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ w1 = 0x12345678; \
+ *(u32*)(r0 + 0) = r1; \
+ r0 += 2; \
+ r1 = 2; \
+ r0 -= r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 1")
+__success __success_unpriv __retval(1)
+__naked void unknown_scalar_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 += r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 2")
+__success __success_unpriv __retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 3")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = -1; \
+ r0 += r1; \
+ r1 = 1; \
+ r0 += r1; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar += value_ptr, 4")
+__failure __msg("R1 max value is outside of the allowed memory range")
+__msg_unpriv("R1 pointer arithmetic of map value goes out of range")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void unknown_scalar_value_ptr_4(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 19; \
+ r0 += r1; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r1 += r0; \
+ r0 = *(u32*)(r1 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 1")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_unknown_scalar_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 2")
+__success __success_unpriv __retval(0xabcdef12) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void value_ptr_unknown_scalar_2_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u32*)(r0 + 0); \
+ r1 &= 31; \
+ r0 += r1; \
+ r0 = *(u32*)(r0 + 0); \
+l0_%=: exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += unknown scalar, 3")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_unknown_scalar_3(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u64*)(r0 + 0); \
+ r2 = *(u64*)(r0 + 8); \
+ r3 = *(u64*)(r0 + 16); \
+ r1 &= 0xf; \
+ r3 &= 1; \
+ r3 |= 1; \
+ if r2 > r3 goto l0_%=; \
+ r0 += r3; \
+ r0 = *(u8*)(r0 + 0); \
+ r0 = 1; \
+l1_%=: exit; \
+l0_%=: r0 = 2; \
+ goto l1_%=; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr += value_ptr")
+__failure __msg("R0 pointer += pointer prohibited")
+__failure_unpriv
+__naked void access_value_ptr_value_ptr_1(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 += r0; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: known scalar -= value_ptr")
+__failure __msg("R1 tried to subtract pointer from scalar")
+__failure_unpriv
+__naked void access_known_scalar_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r1 -= r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar")
+__failure __msg("R0 min value is outside of the allowed memory range")
+__failure_unpriv
+__naked void access_value_ptr_known_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 4; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= known scalar, 2")
+__success __success_unpriv __retval(1)
+__naked void value_ptr_known_scalar_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = 6; \
+ r2 = 4; \
+ r0 += r1; \
+ r0 -= r2; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: unknown scalar -= value_ptr")
+__failure __msg("R1 tried to subtract pointer from scalar")
+__failure_unpriv
+__naked void access_unknown_scalar_value_ptr(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 -= r0; \
+ r0 = *(u8*)(r1 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= unknown scalar")
+__failure __msg("R0 min value is negative")
+__failure_unpriv
+__naked void access_value_ptr_unknown_scalar(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= unknown scalar, 2")
+__success __failure_unpriv
+__msg_unpriv("R0 pointer arithmetic of map value goes out of range")
+__retval(1)
+__naked void value_ptr_unknown_scalar_2_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0xf; \
+ r1 |= 0x7; \
+ r0 += r1; \
+ r1 = *(u8*)(r0 + 0); \
+ r1 &= 0x7; \
+ r0 -= r1; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: value_ptr -= value_ptr")
+__failure __msg("R0 invalid mem access 'scalar'")
+__msg_unpriv("R0 pointer -= pointer prohibited")
+__naked void access_value_ptr_value_ptr_2(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 == 0 goto l0_%=; \
+ r0 -= r0; \
+ r1 = *(u8*)(r0 + 0); \
+l0_%=: r0 = 1; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("socket")
+__description("map access: trying to leak tainted dst reg")
+__failure __msg("math between map_value pointer and 4294967295 is not allowed")
+__failure_unpriv
+__naked void to_leak_tainted_dst_reg(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_array_48b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r2 = r0; \
+ w1 = 0xFFFFFFFF; \
+ w1 = w1; \
+ r2 -= r1; \
+ *(u64*)(r0 + 0) = r2; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_array_48b)
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("32bit pkt_ptr -= scalar")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void _32bit_pkt_ptr_scalar(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r6 = r7; \
+ r6 += 40; \
+ if r6 > r8 goto l0_%=; \
+ w4 = w7; \
+ w6 -= w4; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("32bit scalar -= pkt_ptr")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void _32bit_scalar_pkt_ptr(void)
+{
+ asm volatile (" \
+ r8 = *(u32*)(r1 + %[__sk_buff_data_end]); \
+ r7 = *(u32*)(r1 + %[__sk_buff_data]); \
+ r6 = r7; \
+ r6 += 40; \
+ if r6 > r8 goto l0_%=; \
+ w4 = w6; \
+ w4 -= w7; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(__sk_buff_data, offsetof(struct __sk_buff, data)),
+ __imm_const(__sk_buff_data_end, offsetof(struct __sk_buff, data_end))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c
new file mode 100644
index 000000000000..c810f4f6f479
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c
@@ -0,0 +1,418 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/var_off.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("lwt_in")
+__description("variable-offset ctx access")
+__failure __msg("variable ctx access var_off=(0x0; 0x4)")
+__naked void variable_offset_ctx_access(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ /* add it to skb. We now have either &skb->len or\
+ * &skb->pkt_type, but we don't know which \
+ */ \
+ r1 += r2; \
+ /* dereference it */ \
+ r0 = *(u32*)(r1 + 0); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("variable-offset stack read, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_read_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r0 = 0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("variable-offset stack read, uninitialized")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void variable_offset_stack_read_uninitialized(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it for a stack read */ \
+ r0 = *(u32*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write, priv vs unpriv")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+/* Variable stack access is rejected for unprivileged.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or \
+ * fp-16, but we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+/* Similar to the previous test, but this time also perform a read from the
+ * address written to with a variable offset. The read is allowed, showing that,
+ * after a variable-offset write, a priviledged program can read the slots that
+ * were in the range of that write (even if the verifier doesn't actually know if
+ * the slot being read was really written to or not.
+ *
+ * Despite this test being mostly a superset, the previous test is also kept for
+ * the sake of it checking the stack depth in the case where there is no read.
+ */
+SEC("socket")
+__description("variable-offset stack write followed by read")
+__success
+/* Check that the maximum stack depth is correctly maintained according to the
+ * maximum possible variable offset.
+ */
+__log_level(4) __msg("stack depth 16")
+__failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_write_followed_by_read(void)
+{
+ asm volatile (" \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* Dereference it for a stack write */ \
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Now read from the address we just wrote. */ \
+ r3 = *(u64*)(r2 + 0); \
+ r0 = 0; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("socket")
+__description("variable-offset stack write clobbers spilled regs")
+__failure
+/* In the priviledged case, dereferencing a spilled-and-then-filled
+ * register is rejected because the previous variable offset stack
+ * write might have overwritten the spilled pointer (i.e. we lose track
+ * of the spilled register when we analyze the write).
+ */
+__msg("R2 invalid mem access 'scalar'")
+__failure_unpriv
+/* The unprivileged case is not too interesting; variable
+ * stack access is rejected.
+ */
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void stack_write_clobbers_spilled_regs(void)
+{
+ asm volatile (" \
+ /* Dummy instruction; needed because we need to patch the next one\
+ * and we can't patch the first instruction. \
+ */ \
+ r6 = 0; \
+ /* Make R0 a map ptr */ \
+ r0 = %[map_hash_8b] ll; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 8-byte aligned */ \
+ r2 &= 8; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-8 or fp-16, but\
+ * we don't know which. \
+ */ \
+ r2 += r10; \
+ /* Spill R0(map ptr) into stack */ \
+ *(u64*)(r10 - 8) = r0; \
+ /* Dereference the unknown value for a stack write */\
+ r0 = 0; \
+ *(u64*)(r2 + 0) = r0; \
+ /* Fill the register back into R2 */ \
+ r2 = *(u64*)(r10 - 8); \
+ /* Try to dereference R2 for a memory load */ \
+ r0 = *(u64*)(r2 + 8); \
+ exit; \
+" :
+ : __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("sockops")
+__description("indirect variable-offset stack access, unbounded")
+__failure __msg("invalid unbounded variable-offset indirect access to stack R4")
+__naked void variable_offset_stack_access_unbounded(void)
+{
+ asm volatile (" \
+ r2 = 6; \
+ r3 = 28; \
+ /* Fill the top 16 bytes of the stack. */ \
+ r4 = 0; \
+ *(u64*)(r10 - 16) = r4; \
+ r4 = 0; \
+ *(u64*)(r10 - 8) = r4; \
+ /* Get an unknown value. */ \
+ r4 = *(u64*)(r1 + %[bpf_sock_ops_bytes_received]);\
+ /* Check the lower bound but don't check the upper one. */\
+ if r4 s< 0 goto l0_%=; \
+ /* Point the lower bound to initialized stack. Offset is now in range\
+ * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.\
+ */ \
+ r4 -= 16; \
+ r4 += r10; \
+ r5 = 8; \
+ /* Dereference it indirectly. */ \
+ call %[bpf_getsockopt]; \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_getsockopt),
+ __imm_const(bpf_sock_ops_bytes_received, offsetof(struct bpf_sock_ops, bytes_received))
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, max out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R2")
+__naked void access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 8; \
+ /* add it to fp. We now have either fp-4 or fp-8, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+/* Similar to the test above, but this time check the special case of a
+ * zero-sized stack access. We used to have a bug causing crashes for zero-sized
+ * out-of-bounds accesses.
+ */
+SEC("socket")
+__description("indirect variable-offset stack access, zero-sized, max out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R1")
+__naked void zero_sized_access_max_out_of_bound(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ /* Fill some stack */ \
+ *(u64*)(r10 - 16) = r0; \
+ *(u64*)(r10 - 8) = r0; \
+ /* Get an unknown value */ \
+ r1 = *(u32*)(r1 + 0); \
+ r1 &= 63; \
+ r1 += -16; \
+ /* r1 is now anywhere in [-16,48) */ \
+ r1 += r10; \
+ r2 = 0; \
+ r3 = 0; \
+ call %[bpf_probe_read_kernel]; \
+ exit; \
+" :
+ : __imm(bpf_probe_read_kernel)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, min out of bound")
+__failure __msg("invalid variable-offset indirect access to stack R2")
+__naked void access_min_out_of_bound(void)
+{
+ asm volatile (" \
+ /* Fill the top 8 bytes of the stack */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned */ \
+ r2 &= 4; \
+ r2 -= 516; \
+ /* add it to fp. We now have either fp-516 or fp-512, but\
+ * we don't know which \
+ */ \
+ r2 += r10; \
+ /* dereference it indirectly */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("indirect variable-offset stack access, min_off < min_initialized")
+__success
+__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root")
+__naked void access_min_off_min_initialized(void)
+{
+ asm volatile (" \
+ /* Fill only the top 8 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, but we don't know\
+ * which. fp-16 size 8 is partially uninitialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("cgroup/skb")
+__description("indirect variable-offset stack access, priv vs unpriv")
+__success __failure_unpriv
+__msg_unpriv("R2 variable stack access prohibited for !root")
+__retval(0)
+__naked void stack_access_priv_vs_unpriv(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("lwt_in")
+__description("indirect variable-offset stack access, ok")
+__success __retval(0)
+__naked void variable_offset_stack_access_ok(void)
+{
+ asm volatile (" \
+ /* Fill the top 16 bytes of the stack. */ \
+ r2 = 0; \
+ *(u64*)(r10 - 16) = r2; \
+ r2 = 0; \
+ *(u64*)(r10 - 8) = r2; \
+ /* Get an unknown value. */ \
+ r2 = *(u32*)(r1 + 0); \
+ /* Make it small and 4-byte aligned. */ \
+ r2 &= 4; \
+ r2 -= 16; \
+ /* Add it to fp. We now have either fp-12 or fp-16, we don't know\
+ * which, but either way it points to initialized stack.\
+ */ \
+ r2 += r10; \
+ /* Dereference it indirectly. */ \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xadd.c b/tools/testing/selftests/bpf/progs/verifier_xadd.c
new file mode 100644
index 000000000000..05a0a55adb45
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xadd.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xadd.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 1);
+ __type(key, long long);
+ __type(value, long long);
+} map_hash_8b SEC(".maps");
+
+SEC("tc")
+__description("xadd/w check unaligned stack")
+__failure __msg("misaligned stack access off")
+__naked void xadd_w_check_unaligned_stack(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 7) += w0; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check unaligned map")
+__failure __msg("misaligned value access off")
+__naked void xadd_w_check_unaligned_map(void)
+{
+ asm volatile (" \
+ r1 = 0; \
+ *(u64*)(r10 - 8) = r1; \
+ r2 = r10; \
+ r2 += -8; \
+ r1 = %[map_hash_8b] ll; \
+ call %[bpf_map_lookup_elem]; \
+ if r0 != 0 goto l0_%=; \
+ exit; \
+l0_%=: r1 = 1; \
+ lock *(u32 *)(r0 + 3) += w1; \
+ r0 = *(u32*)(r0 + 3); \
+ exit; \
+" :
+ : __imm(bpf_map_lookup_elem),
+ __imm_addr(map_hash_8b)
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("xadd/w check unaligned pkt")
+__failure __msg("BPF_ATOMIC stores into R2 pkt is not allowed")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void xadd_w_check_unaligned_pkt(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = 99; \
+ goto l1_%=; \
+l0_%=: r0 = 1; \
+ r1 = 0; \
+ *(u32*)(r2 + 0) = r1; \
+ r1 = 0; \
+ *(u32*)(r2 + 3) = r1; \
+ lock *(u32 *)(r2 + 1) += w0; \
+ lock *(u32 *)(r2 + 2) += w0; \
+ r0 = *(u32*)(r2 + 1); \
+l1_%=: exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 1")
+__success __retval(3)
+__naked void src_dst_got_mangled_1(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u64*)(r10 - 8) = r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ lock *(u64 *)(r10 - 8) += r0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u64*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+SEC("tc")
+__description("xadd/w check whether src/dst got mangled, 2")
+__success __retval(3)
+__naked void src_dst_got_mangled_2(void)
+{
+ asm volatile (" \
+ r0 = 1; \
+ r6 = r0; \
+ r7 = r10; \
+ *(u32*)(r10 - 8) = r0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ lock *(u32 *)(r10 - 8) += w0; \
+ if r6 != r0 goto l0_%=; \
+ if r7 != r10 goto l0_%=; \
+ r0 = *(u32*)(r10 - 8); \
+ exit; \
+l0_%=: r0 = 42; \
+ exit; \
+" ::: __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp.c b/tools/testing/selftests/bpf/progs/verifier_xdp.c
new file mode 100644
index 000000000000..50768ed179b3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP, using ifindex from netdev")
+__success __retval(1)
+__naked void xdp_using_ifindex_from_netdev(void)
+{
+ asm volatile (" \
+ r0 = 0; \
+ r2 = *(u32*)(r1 + %[xdp_md_ingress_ifindex]); \
+ if r2 < 1 goto l0_%=; \
+ r0 = 1; \
+l0_%=: exit; \
+" :
+ : __imm_const(xdp_md_ingress_ifindex, offsetof(struct xdp_md, ingress_ifindex))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
new file mode 100644
index 000000000000..df2dfd1b15d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/verifier_xdp_direct_packet_access.c
@@ -0,0 +1,1722 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Converted from tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_misc.h"
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 1")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end mangling, bad access 2")
+__failure __msg("R3 pointer arithmetic on pkt_end")
+__naked void end_mangling_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ r3 -= 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' > pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end > pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' < pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end < pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_end_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' >= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end >= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_end_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data' <= pkt_end, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void end_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_end <= pkt_data', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data_end]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_end, offsetof(struct xdp_md, data_end))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_5(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 > r3 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' > pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_9(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 > r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data > pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_10(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 > r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 6); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_6(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' < pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_11(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 < r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_1(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 4); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 < r1 goto l0_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 9); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data < pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_12(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 < r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_pkt_data_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_7(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' >= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_13(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 >= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_1_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_3(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 >= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data >= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_14(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 >= r1 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_corner_case_good_access_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 8); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 1")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_1_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 4); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_data_bad_access_2_8(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r1 <= r3 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 9; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 9); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_meta' <= pkt_data, corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_15(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r1 <= r3 goto l0_%=; \
+ goto l1_%=; \
+l0_%=: r0 = *(u64*)(r1 - 7); \
+l1_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void data_pkt_meta_good_access_2(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u32*)(r1 - 5); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case -1, bad access")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_bad_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 6; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 6); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', bad access 2")
+__failure __msg("R1 offset is outside of the packet")
+__flag(BPF_F_ANY_ALIGNMENT)
+__naked void pkt_meta_bad_access_2_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+l0_%=: r0 = *(u32*)(r1 - 5); \
+ r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void meta_corner_case_good_access_4(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 7; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 7); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+SEC("xdp")
+__description("XDP pkt read, pkt_data <= pkt_meta', corner case +1, good access")
+__success __retval(0) __flag(BPF_F_ANY_ALIGNMENT)
+__naked void corner_case_1_good_access_16(void)
+{
+ asm volatile (" \
+ r2 = *(u32*)(r1 + %[xdp_md_data_meta]); \
+ r3 = *(u32*)(r1 + %[xdp_md_data]); \
+ r1 = r2; \
+ r1 += 8; \
+ if r3 <= r1 goto l0_%=; \
+ r0 = *(u64*)(r1 - 8); \
+l0_%=: r0 = 0; \
+ exit; \
+" :
+ : __imm_const(xdp_md_data, offsetof(struct xdp_md, data)),
+ __imm_const(xdp_md_data_meta, offsetof(struct xdp_md, data_meta))
+ : __clobber_all);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
new file mode 100644
index 000000000000..bcfb6feb38c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/vrf_socket_lookup.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include <linux/ip.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/pkt_cls.h>
+#include <stdbool.h>
+
+int lookup_status;
+bool test_xdp;
+bool tcp_skc;
+
+#define CUR_NS BPF_F_CURRENT_NETNS
+
+static void socket_lookup(void *ctx, void *data_end, void *data)
+{
+ struct ethhdr *eth = data;
+ struct bpf_sock_tuple *tp;
+ struct bpf_sock *sk;
+ struct iphdr *iph;
+ int tplen;
+
+ if (eth + 1 > data_end)
+ return;
+
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return;
+
+ iph = (struct iphdr *)(eth + 1);
+ if (iph + 1 > data_end)
+ return;
+
+ tp = (struct bpf_sock_tuple *)&iph->saddr;
+ tplen = sizeof(tp->ipv4);
+ if ((void *)tp + tplen > data_end)
+ return;
+
+ switch (iph->protocol) {
+ case IPPROTO_TCP:
+ if (tcp_skc)
+ sk = bpf_skc_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ else
+ sk = bpf_sk_lookup_tcp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ case IPPROTO_UDP:
+ sk = bpf_sk_lookup_udp(ctx, tp, tplen, CUR_NS, 0);
+ break;
+ default:
+ return;
+ }
+
+ lookup_status = 0;
+
+ if (sk) {
+ bpf_sk_release(sk);
+ lookup_status = 1;
+ }
+}
+
+SEC("tc")
+int tc_socket_lookup(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+
+ if (test_xdp)
+ return TC_ACT_UNSPEC;
+
+ socket_lookup(skb, data_end, data);
+ return TC_ACT_UNSPEC;
+}
+
+SEC("xdp")
+int xdp_socket_lookup(struct xdp_md *xdp)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+
+ if (!test_xdp)
+ return XDP_PASS;
+
+ socket_lookup(xdp, data_end, data);
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_dummy.c b/tools/testing/selftests/bpf/progs/xdp_dummy.c
index ea25e8881992..d988b2e0cee8 100644
--- a/tools/testing/selftests/bpf/progs/xdp_dummy.c
+++ b/tools/testing/selftests/bpf/progs/xdp_dummy.c
@@ -4,7 +4,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("xdp_dummy")
+SEC("xdp")
int xdp_dummy_prog(struct xdp_md *ctx)
{
return XDP_PASS;
diff --git a/tools/testing/selftests/bpf/progs/xdp_features.c b/tools/testing/selftests/bpf/progs/xdp_features.c
new file mode 100644
index 000000000000..67424084a38a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_features.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdbool.h>
+#include <linux/bpf.h>
+#include <linux/netdev.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/udp.h>
+#include <asm-generic/errno-base.h>
+
+#include "xdp_features.h"
+
+#define ipv6_addr_equal(a, b) ((a).s6_addr32[0] == (b).s6_addr32[0] && \
+ (a).s6_addr32[1] == (b).s6_addr32[1] && \
+ (a).s6_addr32[2] == (b).s6_addr32[2] && \
+ (a).s6_addr32[3] == (b).s6_addr32[3])
+
+struct net_device;
+struct bpf_prog;
+
+struct xdp_cpumap_stats {
+ unsigned int redirect;
+ unsigned int pass;
+ unsigned int drop;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u32);
+ __uint(max_entries, 1);
+} dut_stats SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_CPUMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_cpumap_val));
+ __uint(max_entries, 1);
+} cpu_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(__u32));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 1);
+} dev_map SEC(".maps");
+
+const volatile struct in6_addr tester_addr;
+const volatile struct in6_addr dut_addr;
+
+static __always_inline int
+xdp_process_echo_packet(struct xdp_md *xdp, bool dut)
+{
+ void *data_end = (void *)(long)xdp->data_end;
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ struct tlv_hdr *tlv;
+ struct udphdr *uh;
+ __be16 port;
+
+ if (eh + 1 > (struct ethhdr *)data_end)
+ return -EINVAL;
+
+ if (eh->h_proto == bpf_htons(ETH_P_IP)) {
+ struct iphdr *ih = (struct iphdr *)(eh + 1);
+ __be32 saddr = dut ? tester_addr.s6_addr32[3]
+ : dut_addr.s6_addr32[3];
+ __be32 daddr = dut ? dut_addr.s6_addr32[3]
+ : tester_addr.s6_addr32[3];
+
+ ih = (struct iphdr *)(eh + 1);
+ if (ih + 1 > (struct iphdr *)data_end)
+ return -EINVAL;
+
+ if (saddr != ih->saddr)
+ return -EINVAL;
+
+ if (daddr != ih->daddr)
+ return -EINVAL;
+
+ if (ih->protocol != IPPROTO_UDP)
+ return -EINVAL;
+
+ uh = (struct udphdr *)(ih + 1);
+ } else if (eh->h_proto == bpf_htons(ETH_P_IPV6)) {
+ struct in6_addr saddr = dut ? tester_addr : dut_addr;
+ struct in6_addr daddr = dut ? dut_addr : tester_addr;
+ struct ipv6hdr *ih6 = (struct ipv6hdr *)(eh + 1);
+
+ if (ih6 + 1 > (struct ipv6hdr *)data_end)
+ return -EINVAL;
+
+ if (!ipv6_addr_equal(saddr, ih6->saddr))
+ return -EINVAL;
+
+ if (!ipv6_addr_equal(daddr, ih6->daddr))
+ return -EINVAL;
+
+ if (ih6->nexthdr != IPPROTO_UDP)
+ return -EINVAL;
+
+ uh = (struct udphdr *)(ih6 + 1);
+ } else {
+ return -EINVAL;
+ }
+
+ if (uh + 1 > (struct udphdr *)data_end)
+ return -EINVAL;
+
+ port = dut ? uh->dest : uh->source;
+ if (port != bpf_htons(DUT_ECHO_PORT))
+ return -EINVAL;
+
+ tlv = (struct tlv_hdr *)(uh + 1);
+ if (tlv + 1 > data_end)
+ return -EINVAL;
+
+ return bpf_htons(tlv->type) == CMD_ECHO ? 0 : -EINVAL;
+}
+
+static __always_inline int
+xdp_update_stats(struct xdp_md *xdp, bool tx, bool dut)
+{
+ __u32 *val, key = 0;
+
+ if (xdp_process_echo_packet(xdp, tx))
+ return -EINVAL;
+
+ if (dut)
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ else
+ val = bpf_map_lookup_elem(&stats, &key);
+
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+/* Tester */
+
+SEC("xdp")
+int xdp_tester_check_tx(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, true, false);
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_tester_check_rx(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, false, false);
+
+ return XDP_PASS;
+}
+
+/* DUT */
+
+SEC("xdp")
+int xdp_do_pass(struct xdp_md *xdp)
+{
+ xdp_update_stats(xdp, true, true);
+
+ return XDP_PASS;
+}
+
+SEC("xdp")
+int xdp_do_drop(struct xdp_md *xdp)
+{
+ if (xdp_update_stats(xdp, true, true))
+ return XDP_PASS;
+
+ return XDP_DROP;
+}
+
+SEC("xdp")
+int xdp_do_aborted(struct xdp_md *xdp)
+{
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ return XDP_ABORTED;
+}
+
+SEC("xdp")
+int xdp_do_tx(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ if (xdp_update_stats(xdp, true, true))
+ return XDP_PASS;
+
+ __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+ __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+ __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+ return XDP_TX;
+}
+
+SEC("xdp")
+int xdp_do_redirect(struct xdp_md *xdp)
+{
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ return bpf_redirect_map(&cpu_map, 0, 0);
+}
+
+SEC("tp_btf/xdp_exception")
+int BPF_PROG(xdp_exception, const struct net_device *dev,
+ const struct bpf_prog *xdp, __u32 act)
+{
+ __u32 *val, key = 0;
+
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+SEC("tp_btf/xdp_cpumap_kthread")
+int BPF_PROG(tp_xdp_cpumap_kthread, int map_id, unsigned int processed,
+ unsigned int drops, int sched, struct xdp_cpumap_stats *xdp_stats)
+{
+ __u32 *val, key = 0;
+
+ val = bpf_map_lookup_elem(&dut_stats, &key);
+ if (val)
+ __sync_add_and_fetch(val, 1);
+
+ return 0;
+}
+
+SEC("xdp/cpumap")
+int xdp_do_redirect_cpumap(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ struct ethhdr *eh = data;
+ __u8 tmp_mac[ETH_ALEN];
+
+ if (xdp_process_echo_packet(xdp, true))
+ return XDP_PASS;
+
+ __builtin_memcpy(tmp_mac, eh->h_source, ETH_ALEN);
+ __builtin_memcpy(eh->h_source, eh->h_dest, ETH_ALEN);
+ __builtin_memcpy(eh->h_dest, tmp_mac, ETH_ALEN);
+
+ return bpf_redirect_map(&dev_map, 0, 0);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
new file mode 100644
index 000000000000..330ece2eabdb
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c
@@ -0,0 +1,117 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 256);
+ __type(key, __u32);
+ __type(value, __u32);
+} xsk SEC(".maps");
+
+__u64 pkts_skip = 0;
+__u64 pkts_fail = 0;
+__u64 pkts_redir = 0;
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+ __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
+
+SEC("xdp.frags")
+int rx(struct xdp_md *ctx)
+{
+ void *data, *data_meta, *data_end;
+ struct ipv6hdr *ip6h = NULL;
+ struct udphdr *udp = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_meta *meta;
+ struct ethhdr *eth;
+ int err;
+
+ data = (void *)(long)ctx->data;
+ data_end = (void *)(long)ctx->data_end;
+ eth = data;
+
+ if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) ||
+ eth->h_proto == bpf_htons(ETH_P_8021Q)))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
+ if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q))
+ eth = (void *)eth + sizeof(struct vlan_hdr);
+
+ if (eth + 1 < data_end) {
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+ udp = (void *)(iph + 1);
+ }
+ if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+ udp = (void *)(ip6h + 1);
+ }
+ if (udp && udp + 1 > data_end)
+ udp = NULL;
+ }
+
+ if (!udp) {
+ __sync_add_and_fetch(&pkts_skip, 1);
+ return XDP_PASS;
+ }
+
+ /* Forwarding UDP:9091 to AF_XDP */
+ if (udp->dest != bpf_htons(9091)) {
+ __sync_add_and_fetch(&pkts_skip, 1);
+ return XDP_PASS;
+ }
+
+ err = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+ if (err) {
+ __sync_add_and_fetch(&pkts_fail, 1);
+ return XDP_PASS;
+ }
+
+ data = (void *)(long)ctx->data;
+ data_meta = (void *)(long)ctx->data_meta;
+ meta = data_meta;
+
+ if (meta + 1 > data) {
+ __sync_add_and_fetch(&pkts_fail, 1);
+ return XDP_PASS;
+ }
+
+ meta->hint_valid = 0;
+
+ meta->xdp_timestamp = bpf_ktime_get_tai_ns();
+ err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp);
+ if (err)
+ meta->rx_timestamp_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_TS;
+
+ err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash,
+ &meta->rx_hash_type);
+ if (err)
+ meta->rx_hash_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_RSS;
+
+ err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
+ if (err)
+ meta->rx_vlan_tag_err = err;
+ else
+ meta->hint_valid |= XDP_META_FIELD_VLAN_TAG;
+
+ __sync_add_and_fetch(&pkts_redir, 1);
+ return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c
new file mode 100644
index 000000000000..31ca229bb3c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 4);
+ __type(key, __u32);
+ __type(value, __u32);
+} xsk SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+ __uint(max_entries, 1);
+ __type(key, __u32);
+ __type(value, __u32);
+} prog_arr SEC(".maps");
+
+extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx,
+ __u64 *timestamp) __ksym;
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx,
+ __be16 *vlan_proto,
+ __u16 *vlan_tci) __ksym;
+
+SEC("xdp")
+int rx(struct xdp_md *ctx)
+{
+ void *data, *data_meta, *data_end;
+ struct ipv6hdr *ip6h = NULL;
+ struct ethhdr *eth = NULL;
+ struct udphdr *udp = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_meta *meta;
+ u64 timestamp = -1;
+ int ret;
+
+ data = (void *)(long)ctx->data;
+ data_end = (void *)(long)ctx->data_end;
+ eth = data;
+ if (eth + 1 < data_end) {
+ if (eth->h_proto == bpf_htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP)
+ udp = (void *)(iph + 1);
+ }
+ if (eth->h_proto == bpf_htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP)
+ udp = (void *)(ip6h + 1);
+ }
+ if (udp && udp + 1 > data_end)
+ udp = NULL;
+ }
+
+ if (!udp)
+ return XDP_PASS;
+
+ /* Forwarding UDP:8080 to AF_XDP */
+ if (udp->dest != bpf_htons(8080))
+ return XDP_PASS;
+
+ /* Reserve enough for all custom metadata. */
+
+ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta));
+ if (ret != 0)
+ return XDP_DROP;
+
+ data = (void *)(long)ctx->data;
+ data_meta = (void *)(long)ctx->data_meta;
+
+ if (data_meta + sizeof(struct xdp_meta) > data)
+ return XDP_DROP;
+
+ meta = data_meta;
+
+ /* Export metadata. */
+
+ /* We expect veth bpf_xdp_metadata_rx_timestamp to return 0 HW
+ * timestamp, so put some non-zero value into AF_XDP frame for
+ * the userspace.
+ */
+ bpf_xdp_metadata_rx_timestamp(ctx, &timestamp);
+ if (timestamp == 0)
+ meta->rx_timestamp = 1;
+
+ bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type);
+ bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto,
+ &meta->rx_vlan_tci);
+
+ return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata2.c b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
new file mode 100644
index 000000000000..85f88d9d7a78
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_metadata2.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "xdp_metadata.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash,
+ enum xdp_rss_hash_type *rss_type) __ksym;
+
+int called;
+
+SEC("freplace/rx")
+int freplace_rx(struct xdp_md *ctx)
+{
+ enum xdp_rss_hash_type type = 0;
+ u32 hash = 0;
+ /* Call _any_ metadata function to make sure we don't crash. */
+ bpf_xdp_metadata_rx_hash(ctx, &hash, &type);
+ called++;
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
new file mode 100644
index 000000000000..97b26a30b59a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_redirect_multi_kern.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+#define KBUILD_MODNAME "foo"
+#include <string.h>
+#include <linux/in.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* One map use devmap, another one use devmap_hash for testing */
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+ __uint(max_entries, 1024);
+} map_all SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_DEVMAP_HASH);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(struct bpf_devmap_val));
+ __uint(max_entries, 128);
+} map_egress SEC(".maps");
+
+/* map to store egress interfaces mac addresses */
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u32);
+ __type(value, __be64);
+ __uint(max_entries, 128);
+} mac_map SEC(".maps");
+
+SEC("xdp")
+int xdp_redirect_map_multi_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ int if_index = ctx->ingress_ifindex;
+ struct ethhdr *eth = data;
+ __u16 h_proto;
+ __u64 nh_off;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ h_proto = eth->h_proto;
+
+ /* Using IPv4 for (BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS) testing */
+ if (h_proto == bpf_htons(ETH_P_IP))
+ return bpf_redirect_map(&map_all, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+ /* Using IPv6 for none flag testing */
+ else if (h_proto == bpf_htons(ETH_P_IPV6))
+ return bpf_redirect_map(&map_all, if_index, 0);
+ /* All others for BPF_F_BROADCAST testing */
+ else
+ return bpf_redirect_map(&map_all, 0, BPF_F_BROADCAST);
+}
+
+/* The following 2 progs are for 2nd devmap prog testing */
+SEC("xdp")
+int xdp_redirect_map_all_prog(struct xdp_md *ctx)
+{
+ return bpf_redirect_map(&map_egress, 0,
+ BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS);
+}
+
+SEC("xdp/devmap")
+int xdp_devmap_prog(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ __u32 key = ctx->egress_ifindex;
+ struct ethhdr *eth = data;
+ __u64 nh_off;
+ __be64 *mac;
+
+ nh_off = sizeof(*eth);
+ if (data + nh_off > data_end)
+ return XDP_DROP;
+
+ mac = bpf_map_lookup_elem(&mac_map, &key);
+ if (mac)
+ __builtin_memcpy(eth->h_source, mac, ETH_ALEN);
+
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
new file mode 100644
index 000000000000..7ea9785738b5
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c
@@ -0,0 +1,865 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include "vmlinux.h"
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+#include <asm/errno.h>
+
+#include "bpf_compiler.h"
+
+#define TC_ACT_OK 0
+#define TC_ACT_SHOT 2
+
+#define NSEC_PER_SEC 1000000000L
+
+#define ETH_ALEN 6
+#define ETH_P_IP 0x0800
+#define ETH_P_IPV6 0x86DD
+
+#define tcp_flag_word(tp) (((union tcp_word_hdr *)(tp))->words[3])
+
+#define IP_DF 0x4000
+#define IP_MF 0x2000
+#define IP_OFFSET 0x1fff
+
+#define NEXTHDR_TCP 6
+
+#define TCPOPT_NOP 1
+#define TCPOPT_EOL 0
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_SACK_PERM 4
+#define TCPOPT_TIMESTAMP 8
+
+#define TCPOLEN_MSS 4
+#define TCPOLEN_WINDOW 3
+#define TCPOLEN_SACK_PERM 2
+#define TCPOLEN_TIMESTAMP 10
+
+#define TCP_TS_HZ 1000
+#define TS_OPT_WSCALE_MASK 0xf
+#define TS_OPT_SACK (1 << 4)
+#define TS_OPT_ECN (1 << 5)
+#define TSBITS 6
+#define TSMASK (((__u32)1 << TSBITS) - 1)
+#define TCP_MAX_WSCALE 14U
+
+#define IPV4_MAXLEN 60
+#define TCP_MAXLEN 60
+
+#define DEFAULT_MSS4 1460
+#define DEFAULT_MSS6 1440
+#define DEFAULT_WSCALE 7
+#define DEFAULT_TTL 64
+#define MAX_ALLOWED_PORTS 8
+
+#define MAX_PACKET_OFF 0xffff
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#define __get_unaligned_t(type, ptr) ({ \
+ const struct { type x; } __attribute__((__packed__)) *__pptr = (typeof(__pptr))(ptr); \
+ __pptr->x; \
+})
+
+#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr))
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u64);
+ __uint(max_entries, 2);
+} values SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __type(key, __u32);
+ __type(value, __u16);
+ __uint(max_entries, MAX_ALLOWED_PORTS);
+} allowed_ports SEC(".maps");
+
+/* Some symbols defined in net/netfilter/nf_conntrack_bpf.c are unavailable in
+ * vmlinux.h if CONFIG_NF_CONNTRACK=m, so they are redefined locally.
+ */
+
+struct bpf_ct_opts___local {
+ s32 netns_id;
+ s32 error;
+ u8 l4proto;
+ u8 dir;
+ u8 reserved[2];
+} __attribute__((preserve_access_index));
+
+#define BPF_F_CURRENT_NETNS (-1)
+
+extern struct nf_conn *bpf_xdp_ct_lookup(struct xdp_md *xdp_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ __u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ __u32 len_opts) __ksym;
+
+extern struct nf_conn *bpf_skb_ct_lookup(struct __sk_buff *skb_ctx,
+ struct bpf_sock_tuple *bpf_tuple,
+ u32 len_tuple,
+ struct bpf_ct_opts___local *opts,
+ u32 len_opts) __ksym;
+
+extern void bpf_ct_release(struct nf_conn *ct) __ksym;
+
+static __always_inline void swap_eth_addr(__u8 *a, __u8 *b)
+{
+ __u8 tmp[ETH_ALEN];
+
+ __builtin_memcpy(tmp, a, ETH_ALEN);
+ __builtin_memcpy(a, b, ETH_ALEN);
+ __builtin_memcpy(b, tmp, ETH_ALEN);
+}
+
+static __always_inline __u16 csum_fold(__u32 csum)
+{
+ csum = (csum & 0xffff) + (csum >> 16);
+ csum = (csum & 0xffff) + (csum >> 16);
+ return (__u16)~csum;
+}
+
+static __always_inline __u16 csum_tcpudp_magic(__be32 saddr, __be32 daddr,
+ __u32 len, __u8 proto,
+ __u32 csum)
+{
+ __u64 s = csum;
+
+ s += (__u32)saddr;
+ s += (__u32)daddr;
+#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
+ s += proto + len;
+#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+ s += (proto + len) << 8;
+#else
+#error Unknown endian
+#endif
+ s = (s & 0xffffffff) + (s >> 32);
+ s = (s & 0xffffffff) + (s >> 32);
+
+ return csum_fold((__u32)s);
+}
+
+static __always_inline __u16 csum_ipv6_magic(const struct in6_addr *saddr,
+ const struct in6_addr *daddr,
+ __u32 len, __u8 proto, __u32 csum)
+{
+ __u64 sum = csum;
+ int i;
+
+ __pragma_loop_unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)saddr->in6_u.u6_addr32[i];
+
+ __pragma_loop_unroll
+ for (i = 0; i < 4; i++)
+ sum += (__u32)daddr->in6_u.u6_addr32[i];
+
+ /* Don't combine additions to avoid 32-bit overflow. */
+ sum += bpf_htonl(len);
+ sum += bpf_htonl(proto);
+
+ sum = (sum & 0xffffffff) + (sum >> 32);
+ sum = (sum & 0xffffffff) + (sum >> 32);
+
+ return csum_fold((__u32)sum);
+}
+
+static __always_inline __u64 tcp_clock_ns(void)
+{
+ return bpf_ktime_get_ns();
+}
+
+static __always_inline __u32 tcp_ns_to_ts(__u64 ns)
+{
+ return ns / (NSEC_PER_SEC / TCP_TS_HZ);
+}
+
+static __always_inline __u32 tcp_clock_ms(void)
+{
+ return tcp_ns_to_ts(tcp_clock_ns());
+}
+
+struct tcpopt_context {
+ void *data;
+ void *data_end;
+ __be32 *tsecr;
+ __u8 wscale;
+ bool option_timestamp;
+ bool option_sack;
+ __u32 off;
+};
+
+static __always_inline u8 *next(struct tcpopt_context *ctx, __u32 sz)
+{
+ __u64 off = ctx->off;
+ __u8 *data;
+
+ /* Verifier forbids access to packet when offset exceeds MAX_PACKET_OFF */
+ if (off > MAX_PACKET_OFF - sz)
+ return NULL;
+
+ data = ctx->data + off;
+ barrier_var(data);
+ if (data + sz >= ctx->data_end)
+ return NULL;
+
+ ctx->off += sz;
+ return data;
+}
+
+static int tscookie_tcpopt_parse(struct tcpopt_context *ctx)
+{
+ __u8 *opcode, *opsize, *wscale, *tsecr;
+ __u32 off = ctx->off;
+
+ opcode = next(ctx, 1);
+ if (!opcode)
+ return 1;
+
+ if (*opcode == TCPOPT_EOL)
+ return 1;
+ if (*opcode == TCPOPT_NOP)
+ return 0;
+
+ opsize = next(ctx, 1);
+ if (!opsize || *opsize < 2)
+ return 1;
+
+ switch (*opcode) {
+ case TCPOPT_WINDOW:
+ wscale = next(ctx, 1);
+ if (!wscale)
+ return 1;
+ if (*opsize == TCPOLEN_WINDOW)
+ ctx->wscale = *wscale < TCP_MAX_WSCALE ? *wscale : TCP_MAX_WSCALE;
+ break;
+ case TCPOPT_TIMESTAMP:
+ tsecr = next(ctx, 4);
+ if (!tsecr)
+ return 1;
+ if (*opsize == TCPOLEN_TIMESTAMP) {
+ ctx->option_timestamp = true;
+ /* Client's tsval becomes our tsecr. */
+ *ctx->tsecr = get_unaligned((__be32 *)tsecr);
+ }
+ break;
+ case TCPOPT_SACK_PERM:
+ if (*opsize == TCPOLEN_SACK_PERM)
+ ctx->option_sack = true;
+ break;
+ }
+
+ ctx->off = off + *opsize;
+
+ return 0;
+}
+
+static int tscookie_tcpopt_parse_batch(__u32 index, void *context)
+{
+ int i;
+
+ for (i = 0; i < 7; i++)
+ if (tscookie_tcpopt_parse(context))
+ return 1;
+ return 0;
+}
+
+static __always_inline bool tscookie_init(struct tcphdr *tcp_header,
+ __u16 tcp_len, __be32 *tsval,
+ __be32 *tsecr, void *data, void *data_end)
+{
+ struct tcpopt_context loop_ctx = {
+ .data = data,
+ .data_end = data_end,
+ .tsecr = tsecr,
+ .wscale = TS_OPT_WSCALE_MASK,
+ .option_timestamp = false,
+ .option_sack = false,
+ /* Note: currently verifier would track .off as unbound scalar.
+ * In case if verifier would at some point get smarter and
+ * compute bounded value for this var, beware that it might
+ * hinder bpf_loop() convergence validation.
+ */
+ .off = (__u8 *)(tcp_header + 1) - (__u8 *)data,
+ };
+ u32 cookie;
+
+ bpf_loop(6, tscookie_tcpopt_parse_batch, &loop_ctx, 0);
+
+ if (!loop_ctx.option_timestamp)
+ return false;
+
+ cookie = tcp_clock_ms() & ~TSMASK;
+ cookie |= loop_ctx.wscale & TS_OPT_WSCALE_MASK;
+ if (loop_ctx.option_sack)
+ cookie |= TS_OPT_SACK;
+ if (tcp_header->ece && tcp_header->cwr)
+ cookie |= TS_OPT_ECN;
+ *tsval = bpf_htonl(cookie);
+
+ return true;
+}
+
+static __always_inline void values_get_tcpipopts(__u16 *mss, __u8 *wscale,
+ __u8 *ttl, bool ipv6)
+{
+ __u32 key = 0;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value && *value != 0) {
+ if (ipv6)
+ *mss = (*value >> 32) & 0xffff;
+ else
+ *mss = *value & 0xffff;
+ *wscale = (*value >> 16) & 0xf;
+ *ttl = (*value >> 24) & 0xff;
+ return;
+ }
+
+ *mss = ipv6 ? DEFAULT_MSS6 : DEFAULT_MSS4;
+ *wscale = DEFAULT_WSCALE;
+ *ttl = DEFAULT_TTL;
+}
+
+static __always_inline void values_inc_synacks(void)
+{
+ __u32 key = 1;
+ __u64 *value;
+
+ value = bpf_map_lookup_elem(&values, &key);
+ if (value)
+ __sync_fetch_and_add(value, 1);
+}
+
+static __always_inline bool check_port_allowed(__u16 port)
+{
+ __u32 i;
+
+ for (i = 0; i < MAX_ALLOWED_PORTS; i++) {
+ __u32 key = i;
+ __u16 *value;
+
+ value = bpf_map_lookup_elem(&allowed_ports, &key);
+
+ if (!value)
+ break;
+ /* 0 is a terminator value. Check it first to avoid matching on
+ * a forbidden port == 0 and returning true.
+ */
+ if (*value == 0)
+ break;
+
+ if (*value == port)
+ return true;
+ }
+
+ return false;
+}
+
+struct header_pointers {
+ struct ethhdr *eth;
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ struct tcphdr *tcp;
+ __u16 tcp_len;
+};
+
+static __always_inline int tcp_dissect(void *data, void *data_end,
+ struct header_pointers *hdr)
+{
+ hdr->eth = data;
+ if (hdr->eth + 1 > data_end)
+ return XDP_DROP;
+
+ switch (bpf_ntohs(hdr->eth->h_proto)) {
+ case ETH_P_IP:
+ hdr->ipv6 = NULL;
+
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv4 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv4->ihl * 4 < sizeof(*hdr->ipv4))
+ return XDP_DROP;
+ if (hdr->ipv4->version != 4)
+ return XDP_DROP;
+
+ if (hdr->ipv4->protocol != IPPROTO_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ break;
+ case ETH_P_IPV6:
+ hdr->ipv4 = NULL;
+
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ if (hdr->ipv6 + 1 > data_end)
+ return XDP_DROP;
+ if (hdr->ipv6->version != 6)
+ return XDP_DROP;
+
+ /* XXX: Extension headers are not supported and could circumvent
+ * XDP SYN flood protection.
+ */
+ if (hdr->ipv6->nexthdr != NEXTHDR_TCP)
+ return XDP_PASS;
+
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ break;
+ default:
+ /* XXX: VLANs will circumvent XDP SYN flood protection. */
+ return XDP_PASS;
+ }
+
+ if (hdr->tcp + 1 > data_end)
+ return XDP_DROP;
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_DROP;
+
+ return XDP_TX;
+}
+
+static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bool xdp)
+{
+ struct bpf_ct_opts___local ct_lookup_opts = {
+ .netns_id = BPF_F_CURRENT_NETNS,
+ .l4proto = IPPROTO_TCP,
+ };
+ struct bpf_sock_tuple tup = {};
+ struct nf_conn *ct;
+ __u32 tup_size;
+
+ if (hdr->ipv4) {
+ /* TCP doesn't normally use fragments, and XDP can't reassemble
+ * them.
+ */
+ if ((hdr->ipv4->frag_off & bpf_htons(IP_DF | IP_MF | IP_OFFSET)) != bpf_htons(IP_DF))
+ return XDP_DROP;
+
+ tup.ipv4.saddr = hdr->ipv4->saddr;
+ tup.ipv4.daddr = hdr->ipv4->daddr;
+ tup.ipv4.sport = hdr->tcp->source;
+ tup.ipv4.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv4);
+ } else if (hdr->ipv6) {
+ __builtin_memcpy(tup.ipv6.saddr, &hdr->ipv6->saddr, sizeof(tup.ipv6.saddr));
+ __builtin_memcpy(tup.ipv6.daddr, &hdr->ipv6->daddr, sizeof(tup.ipv6.daddr));
+ tup.ipv6.sport = hdr->tcp->source;
+ tup.ipv6.dport = hdr->tcp->dest;
+ tup_size = sizeof(tup.ipv6);
+ } else {
+ /* The verifier can't track that either ipv4 or ipv6 is not
+ * NULL.
+ */
+ return XDP_ABORTED;
+ }
+ if (xdp)
+ ct = bpf_xdp_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ else
+ ct = bpf_skb_ct_lookup(ctx, &tup, tup_size, &ct_lookup_opts, sizeof(ct_lookup_opts));
+ if (ct) {
+ unsigned long status = ct->status;
+
+ bpf_ct_release(ct);
+ if (status & IPS_CONFIRMED)
+ return XDP_PASS;
+ } else if (ct_lookup_opts.error != -ENOENT) {
+ return XDP_ABORTED;
+ }
+
+ /* error == -ENOENT || !(status & IPS_CONFIRMED) */
+ return XDP_TX;
+}
+
+static __always_inline __u8 tcp_mkoptions(__be32 *buf, __be32 *tsopt, __u16 mss,
+ __u8 wscale)
+{
+ __be32 *start = buf;
+
+ *buf++ = bpf_htonl((TCPOPT_MSS << 24) | (TCPOLEN_MSS << 16) | mss);
+
+ if (!tsopt)
+ return buf - start;
+
+ if (tsopt[0] & bpf_htonl(1 << 4))
+ *buf++ = bpf_htonl((TCPOPT_SACK_PERM << 24) |
+ (TCPOLEN_SACK_PERM << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ else
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_NOP << 16) |
+ (TCPOPT_TIMESTAMP << 8) |
+ TCPOLEN_TIMESTAMP);
+ *buf++ = tsopt[0];
+ *buf++ = tsopt[1];
+
+ if ((tsopt[0] & bpf_htonl(0xf)) != bpf_htonl(0xf))
+ *buf++ = bpf_htonl((TCPOPT_NOP << 24) |
+ (TCPOPT_WINDOW << 16) |
+ (TCPOLEN_WINDOW << 8) |
+ wscale);
+
+ return buf - start;
+}
+
+static __always_inline void tcp_gen_synack(struct tcphdr *tcp_header,
+ __u32 cookie, __be32 *tsopt,
+ __u16 mss, __u8 wscale)
+{
+ void *tcp_options;
+
+ tcp_flag_word(tcp_header) = TCP_FLAG_SYN | TCP_FLAG_ACK;
+ if (tsopt && (tsopt[0] & bpf_htonl(1 << 5)))
+ tcp_flag_word(tcp_header) |= TCP_FLAG_ECE;
+ tcp_header->doff = 5; /* doff is part of tcp_flag_word. */
+ swap(tcp_header->source, tcp_header->dest);
+ tcp_header->ack_seq = bpf_htonl(bpf_ntohl(tcp_header->seq) + 1);
+ tcp_header->seq = bpf_htonl(cookie);
+ tcp_header->window = 0;
+ tcp_header->urg_ptr = 0;
+ tcp_header->check = 0; /* Calculate checksum later. */
+
+ tcp_options = (void *)(tcp_header + 1);
+ tcp_header->doff += tcp_mkoptions(tcp_options, tsopt, mss, wscale);
+}
+
+static __always_inline void tcpv4_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, false);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv4->saddr, hdr->ipv4->daddr);
+ hdr->ipv4->check = 0; /* Calculate checksum later. */
+ hdr->ipv4->tos = 0;
+ hdr->ipv4->id = 0;
+ hdr->ipv4->ttl = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv4->tot_len = bpf_htons(sizeof(*hdr->ipv4) + hdr->tcp_len);
+}
+
+static __always_inline void tcpv6_gen_synack(struct header_pointers *hdr,
+ __u32 cookie, __be32 *tsopt)
+{
+ __u8 wscale;
+ __u16 mss;
+ __u8 ttl;
+
+ values_get_tcpipopts(&mss, &wscale, &ttl, true);
+
+ swap_eth_addr(hdr->eth->h_source, hdr->eth->h_dest);
+
+ swap(hdr->ipv6->saddr, hdr->ipv6->daddr);
+ *(__be32 *)hdr->ipv6 = bpf_htonl(0x60000000);
+ hdr->ipv6->hop_limit = ttl;
+
+ tcp_gen_synack(hdr->tcp, cookie, tsopt, mss, wscale);
+
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ hdr->ipv6->payload_len = bpf_htons(hdr->tcp_len);
+}
+
+static __always_inline int syncookie_handle_syn(struct header_pointers *hdr,
+ void *ctx,
+ void *data, void *data_end,
+ bool xdp)
+{
+ __u32 old_pkt_size, new_pkt_size;
+ /* Unlike clang 10, clang 11 and 12 generate code that doesn't pass the
+ * BPF verifier if tsopt is not volatile. Volatile forces it to store
+ * the pointer value and use it directly, otherwise tcp_mkoptions is
+ * (mis)compiled like this:
+ * if (!tsopt)
+ * return buf - start;
+ * reg = stored_return_value_of_tscookie_init;
+ * if (reg)
+ * tsopt = tsopt_buf;
+ * else
+ * tsopt = NULL;
+ * ...
+ * *buf++ = tsopt[1];
+ * It creates a dead branch where tsopt is assigned NULL, but the
+ * verifier can't prove it's dead and blocks the program.
+ */
+ __be32 * volatile tsopt = NULL;
+ __be32 tsopt_buf[2] = {};
+ __u16 ip_len;
+ __u32 cookie;
+ __s64 value;
+
+ /* Checksum is not yet verified, but both checksum failure and TCP
+ * header checks return XDP_DROP, so the order doesn't matter.
+ */
+ if (hdr->tcp->fin || hdr->tcp->rst)
+ return XDP_DROP;
+
+ /* Issue SYN cookies on allowed ports, drop SYN packets on blocked
+ * ports.
+ */
+ if (!check_port_allowed(bpf_ntohs(hdr->tcp->dest)))
+ return XDP_DROP;
+
+ if (hdr->ipv4) {
+ /* Check the IPv4 and TCP checksums before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, hdr->ipv4->ihl * 4, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_fold(value) != 0)
+ return XDP_DROP; /* Bad IPv4 checksum. */
+
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_tcpudp_magic(hdr->ipv4->saddr, hdr->ipv4->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv4);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv4(hdr->ipv4, hdr->tcp,
+ hdr->tcp_len);
+ } else if (hdr->ipv6) {
+ /* Check the TCP checksum before creating a SYNACK. */
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (csum_ipv6_magic(&hdr->ipv6->saddr, &hdr->ipv6->daddr,
+ hdr->tcp_len, IPPROTO_TCP, value) != 0)
+ return XDP_DROP; /* Bad TCP checksum. */
+
+ ip_len = sizeof(*hdr->ipv6);
+
+ value = bpf_tcp_raw_gen_syncookie_ipv6(hdr->ipv6, hdr->tcp,
+ hdr->tcp_len);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if (value < 0)
+ return XDP_ABORTED;
+ cookie = (__u32)value;
+
+ if (tscookie_init((void *)hdr->tcp, hdr->tcp_len,
+ &tsopt_buf[0], &tsopt_buf[1], data, data_end))
+ tsopt = tsopt_buf;
+
+ /* Check that there is enough space for a SYNACK. It also covers
+ * the check that the destination of the __builtin_memmove below
+ * doesn't overflow.
+ */
+ if (data + sizeof(*hdr->eth) + ip_len + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ if (hdr->ipv4) {
+ if (hdr->ipv4->ihl * 4 > sizeof(*hdr->ipv4)) {
+ struct tcphdr *new_tcp_header;
+
+ new_tcp_header = data + sizeof(*hdr->eth) + sizeof(*hdr->ipv4);
+ __builtin_memmove(new_tcp_header, hdr->tcp, sizeof(*hdr->tcp));
+ hdr->tcp = new_tcp_header;
+
+ hdr->ipv4->ihl = sizeof(*hdr->ipv4) / 4;
+ }
+
+ tcpv4_gen_synack(hdr, cookie, tsopt);
+ } else if (hdr->ipv6) {
+ tcpv6_gen_synack(hdr, cookie, tsopt);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Recalculate checksums. */
+ hdr->tcp->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->tcp, hdr->tcp_len, 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ if (hdr->ipv4) {
+ hdr->tcp->check = csum_tcpudp_magic(hdr->ipv4->saddr,
+ hdr->ipv4->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+
+ hdr->ipv4->check = 0;
+ value = bpf_csum_diff(0, 0, (void *)hdr->ipv4, sizeof(*hdr->ipv4), 0);
+ if (value < 0)
+ return XDP_ABORTED;
+ hdr->ipv4->check = csum_fold(value);
+ } else if (hdr->ipv6) {
+ hdr->tcp->check = csum_ipv6_magic(&hdr->ipv6->saddr,
+ &hdr->ipv6->daddr,
+ hdr->tcp_len,
+ IPPROTO_TCP,
+ value);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ /* Set the new packet size. */
+ old_pkt_size = data_end - data;
+ new_pkt_size = sizeof(*hdr->eth) + ip_len + hdr->tcp->doff * 4;
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, new_pkt_size - old_pkt_size))
+ return XDP_ABORTED;
+ } else {
+ if (bpf_skb_change_tail(ctx, new_pkt_size, 0))
+ return XDP_ABORTED;
+ }
+
+ values_inc_synacks();
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_handle_ack(struct header_pointers *hdr)
+{
+ int err;
+
+ if (hdr->tcp->rst)
+ return XDP_DROP;
+
+ if (hdr->ipv4)
+ err = bpf_tcp_raw_check_syncookie_ipv4(hdr->ipv4, hdr->tcp);
+ else if (hdr->ipv6)
+ err = bpf_tcp_raw_check_syncookie_ipv6(hdr->ipv6, hdr->tcp);
+ else
+ return XDP_ABORTED;
+ if (err)
+ return XDP_DROP;
+
+ return XDP_PASS;
+}
+
+static __always_inline int syncookie_part1(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ int ret;
+
+ ret = tcp_dissect(data, data_end, hdr);
+ if (ret != XDP_TX)
+ return ret;
+
+ ret = tcp_lookup(ctx, hdr, xdp);
+ if (ret != XDP_TX)
+ return ret;
+
+ /* Packet is TCP and doesn't belong to an established connection. */
+
+ if ((hdr->tcp->syn ^ hdr->tcp->ack) != 1)
+ return XDP_DROP;
+
+ /* Grow the TCP header to TCP_MAXLEN to be able to pass any hdr->tcp_len
+ * to bpf_tcp_raw_gen_syncookie_ipv{4,6} and pass the verifier.
+ */
+ if (xdp) {
+ if (bpf_xdp_adjust_tail(ctx, TCP_MAXLEN - hdr->tcp_len))
+ return XDP_ABORTED;
+ } else {
+ /* Without volatile the verifier throws this error:
+ * R9 32-bit pointer arithmetic prohibited
+ */
+ volatile u64 old_len = data_end - data;
+
+ if (bpf_skb_change_tail(ctx, old_len + TCP_MAXLEN - hdr->tcp_len, 0))
+ return XDP_ABORTED;
+ }
+
+ return XDP_TX;
+}
+
+static __always_inline int syncookie_part2(void *ctx, void *data, void *data_end,
+ struct header_pointers *hdr, bool xdp)
+{
+ if (hdr->ipv4) {
+ hdr->eth = data;
+ hdr->ipv4 = (void *)hdr->eth + sizeof(*hdr->eth);
+ /* IPV4_MAXLEN is needed when calculating checksum.
+ * At least sizeof(struct iphdr) is needed here to access ihl.
+ */
+ if ((void *)hdr->ipv4 + IPV4_MAXLEN > data_end)
+ return XDP_ABORTED;
+ hdr->tcp = (void *)hdr->ipv4 + hdr->ipv4->ihl * 4;
+ } else if (hdr->ipv6) {
+ hdr->eth = data;
+ hdr->ipv6 = (void *)hdr->eth + sizeof(*hdr->eth);
+ hdr->tcp = (void *)hdr->ipv6 + sizeof(*hdr->ipv6);
+ } else {
+ return XDP_ABORTED;
+ }
+
+ if ((void *)hdr->tcp + TCP_MAXLEN > data_end)
+ return XDP_ABORTED;
+
+ /* We run out of registers, tcp_len gets spilled to the stack, and the
+ * verifier forgets its min and max values checked above in tcp_dissect.
+ */
+ hdr->tcp_len = hdr->tcp->doff * 4;
+ if (hdr->tcp_len < sizeof(*hdr->tcp))
+ return XDP_ABORTED;
+
+ return hdr->tcp->syn ? syncookie_handle_syn(hdr, ctx, data, data_end, xdp) :
+ syncookie_handle_ack(hdr);
+}
+
+SEC("xdp")
+int syncookie_xdp(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)ctx->data_end;
+ void *data = (void *)(long)ctx->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(ctx, data, data_end, &hdr, true);
+ if (ret != XDP_TX)
+ return ret;
+
+ data_end = (void *)(long)ctx->data_end;
+ data = (void *)(long)ctx->data;
+
+ return syncookie_part2(ctx, data, data_end, &hdr, true);
+}
+
+SEC("tc")
+int syncookie_tc(struct __sk_buff *skb)
+{
+ void *data_end = (void *)(long)skb->data_end;
+ void *data = (void *)(long)skb->data;
+ struct header_pointers hdr;
+ int ret;
+
+ ret = syncookie_part1(skb, data, data_end, &hdr, false);
+ if (ret != XDP_TX)
+ return ret == XDP_PASS ? TC_ACT_OK : TC_ACT_SHOT;
+
+ data_end = (void *)(long)skb->data_end;
+ data = (void *)(long)skb->data;
+
+ ret = syncookie_part2(skb, data, data_end, &hdr, false);
+ switch (ret) {
+ case XDP_PASS:
+ return TC_ACT_OK;
+ case XDP_TX:
+ return bpf_redirect(skb->ifindex, 0);
+ default:
+ return TC_ACT_SHOT;
+ }
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xdp_tx.c b/tools/testing/selftests/bpf/progs/xdp_tx.c
index 94e6c2b281cb..5f725c720e00 100644
--- a/tools/testing/selftests/bpf/progs/xdp_tx.c
+++ b/tools/testing/selftests/bpf/progs/xdp_tx.c
@@ -3,7 +3,7 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
-SEC("tx")
+SEC("xdp")
int xdp_tx(struct xdp_md *xdp)
{
return XDP_TX;
diff --git a/tools/testing/selftests/bpf/progs/xdping_kern.c b/tools/testing/selftests/bpf/progs/xdping_kern.c
index 6b9ca40bd1f4..44e2b0ef23ae 100644
--- a/tools/testing/selftests/bpf/progs/xdping_kern.c
+++ b/tools/testing/selftests/bpf/progs/xdping_kern.c
@@ -15,6 +15,7 @@
#include <bpf/bpf_helpers.h>
#include <bpf/bpf_endian.h>
+#include "bpf_compiler.h"
#include "xdping.h"
struct {
@@ -86,10 +87,9 @@ static __always_inline int icmp_check(struct xdp_md *ctx, int type)
return XDP_TX;
}
-SEC("xdpclient")
+SEC("xdp")
int xdping_client(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct pinginfo *pinginfo = NULL;
struct ethhdr *eth = data;
@@ -117,7 +117,7 @@ int xdping_client(struct xdp_md *ctx)
return XDP_PASS;
if (pinginfo->start) {
-#pragma clang loop unroll(full)
+ __pragma_loop_unroll_full
for (i = 0; i < XDPING_MAX_COUNT; i++) {
if (pinginfo->times[i] == 0)
break;
@@ -150,10 +150,9 @@ int xdping_client(struct xdp_md *ctx)
return XDP_TX;
}
-SEC("xdpserver")
+SEC("xdp")
int xdping_server(struct xdp_md *ctx)
{
- void *data_end = (void *)(long)ctx->data_end;
void *data = (void *)(long)ctx->data;
struct ethhdr *eth = data;
struct icmphdr *icmph;
diff --git a/tools/testing/selftests/bpf/progs/xdpwall.c b/tools/testing/selftests/bpf/progs/xdpwall.c
new file mode 100644
index 000000000000..c2dd0c28237a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xdpwall.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include <stdbool.h>
+#include <stdint.h>
+#include <linux/stddef.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+#include <linux/bpf.h>
+#include <linux/types.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+enum pkt_parse_err {
+ NO_ERR,
+ BAD_IP6_HDR,
+ BAD_IP4GUE_HDR,
+ BAD_IP6GUE_HDR,
+};
+
+enum pkt_flag {
+ TUNNEL = 0x1,
+ TCP_SYN = 0x2,
+ QUIC_INITIAL_FLAG = 0x4,
+ TCP_ACK = 0x8,
+ TCP_RST = 0x10
+};
+
+struct v4_lpm_key {
+ __u32 prefixlen;
+ __u32 src;
+};
+
+struct v4_lpm_val {
+ struct v4_lpm_key key;
+ __u8 val;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 16);
+ __type(key, struct in6_addr);
+ __type(value, bool);
+} v6_addr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 16);
+ __type(key, __u32);
+ __type(value, bool);
+} v4_addr_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_LPM_TRIE);
+ __uint(max_entries, 16);
+ __uint(key_size, sizeof(struct v4_lpm_key));
+ __uint(value_size, sizeof(struct v4_lpm_val));
+ __uint(map_flags, BPF_F_NO_PREALLOC);
+} v4_lpm_val_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 16);
+ __type(key, int);
+ __type(value, __u8);
+} tcp_port_map SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_ARRAY);
+ __uint(max_entries, 16);
+ __type(key, int);
+ __type(value, __u16);
+} udp_port_map SEC(".maps");
+
+enum ip_type { V4 = 1, V6 = 2 };
+
+struct fw_match_info {
+ __u8 v4_src_ip_match;
+ __u8 v6_src_ip_match;
+ __u8 v4_src_prefix_match;
+ __u8 v4_dst_prefix_match;
+ __u8 tcp_dp_match;
+ __u16 udp_sp_match;
+ __u16 udp_dp_match;
+ bool is_tcp;
+ bool is_tcp_syn;
+};
+
+struct pkt_info {
+ enum ip_type type;
+ union {
+ struct iphdr *ipv4;
+ struct ipv6hdr *ipv6;
+ } ip;
+ int sport;
+ int dport;
+ __u16 trans_hdr_offset;
+ __u8 proto;
+ __u8 flags;
+};
+
+static __always_inline struct ethhdr *parse_ethhdr(void *data, void *data_end)
+{
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return NULL;
+
+ return eth;
+}
+
+static __always_inline __u8 filter_ipv6_addr(const struct in6_addr *ipv6addr)
+{
+ __u8 *leaf;
+
+ leaf = bpf_map_lookup_elem(&v6_addr_map, ipv6addr);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_addr(const __u32 ipaddr)
+{
+ __u8 *leaf;
+
+ leaf = bpf_map_lookup_elem(&v4_addr_map, &ipaddr);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u8 filter_ipv4_lpm(const __u32 ipaddr)
+{
+ struct v4_lpm_key v4_key = {};
+ struct v4_lpm_val *lpm_val;
+
+ v4_key.src = ipaddr;
+ v4_key.prefixlen = 32;
+
+ lpm_val = bpf_map_lookup_elem(&v4_lpm_val_map, &v4_key);
+
+ return lpm_val ? lpm_val->val : 0;
+}
+
+
+static __always_inline void
+filter_src_dst_ip(struct pkt_info* info, struct fw_match_info* match_info)
+{
+ if (info->type == V6) {
+ match_info->v6_src_ip_match =
+ filter_ipv6_addr(&info->ip.ipv6->saddr);
+ } else if (info->type == V4) {
+ match_info->v4_src_ip_match =
+ filter_ipv4_addr(info->ip.ipv4->saddr);
+ match_info->v4_src_prefix_match =
+ filter_ipv4_lpm(info->ip.ipv4->saddr);
+ match_info->v4_dst_prefix_match =
+ filter_ipv4_lpm(info->ip.ipv4->daddr);
+ }
+}
+
+static __always_inline void *
+get_transport_hdr(__u16 offset, void *data, void *data_end)
+{
+ if (offset > 255 || data + offset > data_end)
+ return NULL;
+
+ return data + offset;
+}
+
+static __always_inline bool tcphdr_only_contains_flag(struct tcphdr *tcp,
+ __u32 FLAG)
+{
+ return (tcp_flag_word(tcp) &
+ (TCP_FLAG_ACK | TCP_FLAG_RST | TCP_FLAG_SYN | TCP_FLAG_FIN)) == FLAG;
+}
+
+static __always_inline void set_tcp_flags(struct pkt_info *info,
+ struct tcphdr *tcp) {
+ if (tcphdr_only_contains_flag(tcp, TCP_FLAG_SYN))
+ info->flags |= TCP_SYN;
+ else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_ACK))
+ info->flags |= TCP_ACK;
+ else if (tcphdr_only_contains_flag(tcp, TCP_FLAG_RST))
+ info->flags |= TCP_RST;
+}
+
+static __always_inline bool
+parse_tcp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+ struct tcphdr *tcp = transport_hdr;
+
+ if (tcp + 1 > data_end)
+ return false;
+
+ info->sport = bpf_ntohs(tcp->source);
+ info->dport = bpf_ntohs(tcp->dest);
+ set_tcp_flags(info, tcp);
+
+ return true;
+}
+
+static __always_inline bool
+parse_udp(struct pkt_info *info, void *transport_hdr, void *data_end)
+{
+ struct udphdr *udp = transport_hdr;
+
+ if (udp + 1 > data_end)
+ return false;
+
+ info->sport = bpf_ntohs(udp->source);
+ info->dport = bpf_ntohs(udp->dest);
+
+ return true;
+}
+
+static __always_inline __u8 filter_tcp_port(int port)
+{
+ __u8 *leaf = bpf_map_lookup_elem(&tcp_port_map, &port);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline __u16 filter_udp_port(int port)
+{
+ __u16 *leaf = bpf_map_lookup_elem(&udp_port_map, &port);
+
+ return leaf ? *leaf : 0;
+}
+
+static __always_inline bool
+filter_transport_hdr(void *transport_hdr, void *data_end,
+ struct pkt_info *info, struct fw_match_info *match_info)
+{
+ if (info->proto == IPPROTO_TCP) {
+ if (!parse_tcp(info, transport_hdr, data_end))
+ return false;
+
+ match_info->is_tcp = true;
+ match_info->is_tcp_syn = (info->flags & TCP_SYN) > 0;
+
+ match_info->tcp_dp_match = filter_tcp_port(info->dport);
+ } else if (info->proto == IPPROTO_UDP) {
+ if (!parse_udp(info, transport_hdr, data_end))
+ return false;
+
+ match_info->udp_dp_match = filter_udp_port(info->dport);
+ match_info->udp_sp_match = filter_udp_port(info->sport);
+ }
+
+ return true;
+}
+
+static __always_inline __u8
+parse_gue_v6(struct pkt_info *info, struct ipv6hdr *ip6h, void *data_end)
+{
+ struct udphdr *udp = (struct udphdr *)(ip6h + 1);
+ void *encap_data = udp + 1;
+
+ if (udp + 1 > data_end)
+ return BAD_IP6_HDR;
+
+ if (udp->dest != bpf_htons(6666))
+ return NO_ERR;
+
+ info->flags |= TUNNEL;
+
+ if (encap_data + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ if (*(__u8 *)encap_data & 0x30) {
+ struct ipv6hdr *inner_ip6h = encap_data;
+
+ if (inner_ip6h + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ info->type = V6;
+ info->proto = inner_ip6h->nexthdr;
+ info->ip.ipv6 = inner_ip6h;
+ info->trans_hdr_offset += sizeof(struct ipv6hdr) + sizeof(struct udphdr);
+ } else {
+ struct iphdr *inner_ip4h = encap_data;
+
+ if (inner_ip4h + 1 > data_end)
+ return BAD_IP6GUE_HDR;
+
+ info->type = V4;
+ info->proto = inner_ip4h->protocol;
+ info->ip.ipv4 = inner_ip4h;
+ info->trans_hdr_offset += sizeof(struct iphdr) + sizeof(struct udphdr);
+ }
+
+ return NO_ERR;
+}
+
+static __always_inline __u8 parse_ipv6_gue(struct pkt_info *info,
+ void *data, void *data_end)
+{
+ struct ipv6hdr *ip6h = data + sizeof(struct ethhdr);
+
+ if (ip6h + 1 > data_end)
+ return BAD_IP6_HDR;
+
+ info->proto = ip6h->nexthdr;
+ info->ip.ipv6 = ip6h;
+ info->type = V6;
+ info->trans_hdr_offset = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+
+ if (info->proto == IPPROTO_UDP)
+ return parse_gue_v6(info, ip6h, data_end);
+
+ return NO_ERR;
+}
+
+SEC("xdp")
+int edgewall(struct xdp_md *ctx)
+{
+ void *data_end = (void *)(long)(ctx->data_end);
+ void *data = (void *)(long)(ctx->data);
+ struct fw_match_info match_info = {};
+ struct pkt_info info = {};
+ void *transport_hdr;
+ struct ethhdr *eth;
+ bool filter_res;
+ __u32 proto;
+
+ eth = parse_ethhdr(data, data_end);
+ if (!eth)
+ return XDP_DROP;
+
+ proto = eth->h_proto;
+ if (proto != bpf_htons(ETH_P_IPV6))
+ return XDP_DROP;
+
+ if (parse_ipv6_gue(&info, data, data_end))
+ return XDP_DROP;
+
+ if (info.proto == IPPROTO_ICMPV6)
+ return XDP_PASS;
+
+ if (info.proto != IPPROTO_TCP && info.proto != IPPROTO_UDP)
+ return XDP_DROP;
+
+ filter_src_dst_ip(&info, &match_info);
+
+ transport_hdr = get_transport_hdr(info.trans_hdr_offset, data,
+ data_end);
+ if (!transport_hdr)
+ return XDP_DROP;
+
+ filter_res = filter_transport_hdr(transport_hdr, data_end,
+ &info, &match_info);
+ if (!filter_res)
+ return XDP_DROP;
+
+ if (match_info.is_tcp && !match_info.is_tcp_syn)
+ return XDP_PASS;
+
+ return XDP_DROP;
+}
+
+char LICENSE[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xfrm_info.c b/tools/testing/selftests/bpf/progs/xfrm_info.c
new file mode 100644
index 000000000000..f6a501fbba2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xfrm_info.c
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "vmlinux.h"
+#include "bpf_tracing_net.h"
+#include <bpf/bpf_helpers.h>
+
+struct bpf_xfrm_info___local {
+ u32 if_id;
+ int link;
+} __attribute__((preserve_access_index));
+
+__u32 req_if_id;
+__u32 resp_if_id;
+
+int bpf_skb_set_xfrm_info(struct __sk_buff *skb_ctx,
+ const struct bpf_xfrm_info___local *from) __ksym;
+int bpf_skb_get_xfrm_info(struct __sk_buff *skb_ctx,
+ struct bpf_xfrm_info___local *to) __ksym;
+
+SEC("tc")
+int set_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = { .if_id = req_if_id };
+
+ return bpf_skb_set_xfrm_info(skb, &info) ? TC_ACT_SHOT : TC_ACT_UNSPEC;
+}
+
+SEC("tc")
+int get_xfrm_info(struct __sk_buff *skb)
+{
+ struct bpf_xfrm_info___local info = {};
+
+ if (bpf_skb_get_xfrm_info(skb, &info) < 0)
+ return TC_ACT_SHOT;
+
+ resp_if_id = info.if_id;
+
+ return TC_ACT_UNSPEC;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
new file mode 100644
index 000000000000..ccde6a4c6319
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Intel */
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+#include <linux/if_ether.h>
+#include "xsk_xdp_common.h"
+
+struct {
+ __uint(type, BPF_MAP_TYPE_XSKMAP);
+ __uint(max_entries, 2);
+ __uint(key_size, sizeof(int));
+ __uint(value_size, sizeof(int));
+} xsk SEC(".maps");
+
+static unsigned int idx;
+int count = 0;
+
+SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
+{
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp.frags") int xsk_xdp_drop(struct xdp_md *xdp)
+{
+ /* Drop every other packet */
+ if (idx++ % 2)
+ return XDP_DROP;
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp.frags") int xsk_xdp_populate_metadata(struct xdp_md *xdp)
+{
+ void *data, *data_meta;
+ struct xdp_info *meta;
+ int err;
+
+ /* Reserve enough for all custom metadata. */
+ err = bpf_xdp_adjust_meta(xdp, -(int)sizeof(struct xdp_info));
+ if (err)
+ return XDP_DROP;
+
+ data = (void *)(long)xdp->data;
+ data_meta = (void *)(long)xdp->data_meta;
+
+ if (data_meta + sizeof(struct xdp_info) > data)
+ return XDP_DROP;
+
+ meta = data_meta;
+ meta->count = count++;
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
+SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
+{
+ void *data = (void *)(long)xdp->data;
+ void *data_end = (void *)(long)xdp->data_end;
+ struct ethhdr *eth = data;
+
+ if (eth + 1 > data_end)
+ return XDP_DROP;
+
+ /* Redirecting packets based on the destination MAC address */
+ idx = ((unsigned int)(eth->h_dest[5])) / 2;
+ if (idx > MAX_SOCKETS)
+ return XDP_DROP;
+
+ return bpf_redirect_map(&xsk, idx, XDP_DROP);
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/sdt-config.h b/tools/testing/selftests/bpf/sdt-config.h
new file mode 100644
index 000000000000..733045a52771
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt-config.h
@@ -0,0 +1,6 @@
+/* includes/sys/sdt-config.h. Generated from sdt-config.h.in by configure.
+
+ This file just defines _SDT_ASM_SECTION_AUTOGROUP_SUPPORT to 0 or 1 to
+ indicate whether the assembler supports "?" in .pushsection directives. */
+
+#define _SDT_ASM_SECTION_AUTOGROUP_SUPPORT 1
diff --git a/tools/testing/selftests/bpf/sdt.h b/tools/testing/selftests/bpf/sdt.h
new file mode 100644
index 000000000000..ca0162b4dc57
--- /dev/null
+++ b/tools/testing/selftests/bpf/sdt.h
@@ -0,0 +1,513 @@
+/* <sys/sdt.h> - Systemtap static probe definition macros.
+
+ This file is dedicated to the public domain, pursuant to CC0
+ (https://creativecommons.org/publicdomain/zero/1.0/)
+*/
+
+#ifndef _SYS_SDT_H
+#define _SYS_SDT_H 1
+
+/*
+ This file defines a family of macros
+
+ STAP_PROBEn(op1, ..., opn)
+
+ that emit a nop into the instruction stream, and some data into an auxiliary
+ note section. The data in the note section describes the operands, in terms
+ of size and location. Each location is encoded as assembler operand string.
+ Consumer tools such as gdb or systemtap insert breakpoints on top of
+ the nop, and decode the location operand-strings, like an assembler,
+ to find the values being passed.
+
+ The operand strings are selected by the compiler for each operand.
+ They are constrained by gcc inline-assembler codes. The default is:
+
+ #define STAP_SDT_ARG_CONSTRAINT nor
+
+ This is a good default if the operands tend to be integral and
+ moderate in number (smaller than number of registers). In other
+ cases, the compiler may report "'asm' requires impossible reload" or
+ similar. In this case, consider simplifying the macro call (fewer
+ and simpler operands), reduce optimization, or override the default
+ constraints string via:
+
+ #define STAP_SDT_ARG_CONSTRAINT g
+ #include <sys/sdt.h>
+
+ See also:
+ https://sourceware.org/systemtap/wiki/UserSpaceProbeImplementation
+ https://gcc.gnu.org/onlinedocs/gcc/Constraints.html
+ */
+
+
+
+#ifdef __ASSEMBLER__
+# define _SDT_PROBE(provider, name, n, arglist) \
+ _SDT_ASM_BODY(provider, name, _SDT_ASM_SUBSTR_1, (_SDT_DEPAREN_##n arglist)) \
+ _SDT_ASM_BASE
+# define _SDT_ASM_1(x) x;
+# define _SDT_ASM_2(a, b) a,b;
+# define _SDT_ASM_3(a, b, c) a,b,c;
+# define _SDT_ASM_5(a, b, c, d, e) a,b,c,d,e;
+# define _SDT_ASM_STRING_1(x) .asciz #x;
+# define _SDT_ASM_SUBSTR_1(x) .ascii #x;
+# define _SDT_DEPAREN_0() /* empty */
+# define _SDT_DEPAREN_1(a) a
+# define _SDT_DEPAREN_2(a,b) a b
+# define _SDT_DEPAREN_3(a,b,c) a b c
+# define _SDT_DEPAREN_4(a,b,c,d) a b c d
+# define _SDT_DEPAREN_5(a,b,c,d,e) a b c d e
+# define _SDT_DEPAREN_6(a,b,c,d,e,f) a b c d e f
+# define _SDT_DEPAREN_7(a,b,c,d,e,f,g) a b c d e f g
+# define _SDT_DEPAREN_8(a,b,c,d,e,f,g,h) a b c d e f g h
+# define _SDT_DEPAREN_9(a,b,c,d,e,f,g,h,i) a b c d e f g h i
+# define _SDT_DEPAREN_10(a,b,c,d,e,f,g,h,i,j) a b c d e f g h i j
+# define _SDT_DEPAREN_11(a,b,c,d,e,f,g,h,i,j,k) a b c d e f g h i j k
+# define _SDT_DEPAREN_12(a,b,c,d,e,f,g,h,i,j,k,l) a b c d e f g h i j k l
+#else
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name) \
+ __asm__ __volatile__ ("" :: "m" (provider##_##name##_semaphore));
+#else
+#define _SDT_NOTE_SEMAPHORE_USE(provider, name)
+#endif
+
+# define _SDT_PROBE(provider, name, n, arglist) \
+ do { \
+ _SDT_NOTE_SEMAPHORE_USE(provider, name); \
+ __asm__ __volatile__ (_SDT_ASM_BODY(provider, name, _SDT_ASM_ARGS, (n)) \
+ :: _SDT_ASM_OPERANDS_##n arglist); \
+ __asm__ __volatile__ (_SDT_ASM_BASE); \
+ } while (0)
+# define _SDT_S(x) #x
+# define _SDT_ASM_1(x) _SDT_S(x) "\n"
+# define _SDT_ASM_2(a, b) _SDT_S(a) "," _SDT_S(b) "\n"
+# define _SDT_ASM_3(a, b, c) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "\n"
+# define _SDT_ASM_5(a, b, c, d, e) _SDT_S(a) "," _SDT_S(b) "," \
+ _SDT_S(c) "," _SDT_S(d) "," \
+ _SDT_S(e) "\n"
+# define _SDT_ASM_ARGS(n) _SDT_ASM_TEMPLATE_##n
+# define _SDT_ASM_STRING_1(x) _SDT_ASM_1(.asciz #x)
+# define _SDT_ASM_SUBSTR_1(x) _SDT_ASM_1(.ascii #x)
+
+# define _SDT_ARGFMT(no) _SDT_ASM_1(_SDT_SIGN %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_SIZE %n[_SDT_S##no]) \
+ _SDT_ASM_1(_SDT_TYPE %n[_SDT_S##no]) \
+ _SDT_ASM_SUBSTR(_SDT_ARGTMPL(_SDT_A##no))
+
+
+# ifndef STAP_SDT_ARG_CONSTRAINT
+# if defined __powerpc__
+# define STAP_SDT_ARG_CONSTRAINT nZr
+# elif defined __arm__
+# define STAP_SDT_ARG_CONSTRAINT g
+# else
+# define STAP_SDT_ARG_CONSTRAINT nor
+# endif
+# endif
+
+# define _SDT_STRINGIFY(x) #x
+# define _SDT_ARG_CONSTRAINT_STRING(x) _SDT_STRINGIFY(x)
+/* _SDT_S encodes the size and type as 0xSSTT which is decoded by the assembler
+ macros _SDT_SIZE and _SDT_TYPE */
+# define _SDT_ARG(n, x) \
+ [_SDT_S##n] "n" ((_SDT_ARGSIGNED (x) ? (int)-1 : 1) * (-(((int) _SDT_ARGSIZE (x)) << 8) + (-(0x7f & __builtin_classify_type (x))))), \
+ [_SDT_A##n] _SDT_ARG_CONSTRAINT_STRING (STAP_SDT_ARG_CONSTRAINT) (_SDT_ARGVAL (x))
+#endif
+#define _SDT_ASM_STRING(x) _SDT_ASM_STRING_1(x)
+#define _SDT_ASM_SUBSTR(x) _SDT_ASM_SUBSTR_1(x)
+
+#define _SDT_ARGARRAY(x) (__builtin_classify_type (x) == 14 \
+ || __builtin_classify_type (x) == 5)
+
+#ifdef __cplusplus
+# define _SDT_ARGSIGNED(x) (!_SDT_ARGARRAY (x) \
+ && __sdt_type<__typeof (x)>::__sdt_signed)
+# define _SDT_ARGSIZE(x) (_SDT_ARGARRAY (x) \
+ ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+
+# include <cstddef>
+
+template<typename __sdt_T>
+struct __sdt_type
+{
+ static const bool __sdt_signed = false;
+};
+
+#define __SDT_ALWAYS_SIGNED(T) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = true; };
+#define __SDT_COND_SIGNED(T,CT) \
+template<> struct __sdt_type<T> { static const bool __sdt_signed = ((CT)(-1) < 1); };
+__SDT_ALWAYS_SIGNED(signed char)
+__SDT_ALWAYS_SIGNED(short)
+__SDT_ALWAYS_SIGNED(int)
+__SDT_ALWAYS_SIGNED(long)
+__SDT_ALWAYS_SIGNED(long long)
+__SDT_ALWAYS_SIGNED(volatile signed char)
+__SDT_ALWAYS_SIGNED(volatile short)
+__SDT_ALWAYS_SIGNED(volatile int)
+__SDT_ALWAYS_SIGNED(volatile long)
+__SDT_ALWAYS_SIGNED(volatile long long)
+__SDT_ALWAYS_SIGNED(const signed char)
+__SDT_ALWAYS_SIGNED(const short)
+__SDT_ALWAYS_SIGNED(const int)
+__SDT_ALWAYS_SIGNED(const long)
+__SDT_ALWAYS_SIGNED(const long long)
+__SDT_ALWAYS_SIGNED(const volatile signed char)
+__SDT_ALWAYS_SIGNED(const volatile short)
+__SDT_ALWAYS_SIGNED(const volatile int)
+__SDT_ALWAYS_SIGNED(const volatile long)
+__SDT_ALWAYS_SIGNED(const volatile long long)
+__SDT_COND_SIGNED(char, char)
+__SDT_COND_SIGNED(wchar_t, wchar_t)
+__SDT_COND_SIGNED(volatile char, char)
+__SDT_COND_SIGNED(volatile wchar_t, wchar_t)
+__SDT_COND_SIGNED(const char, char)
+__SDT_COND_SIGNED(const wchar_t, wchar_t)
+__SDT_COND_SIGNED(const volatile char, char)
+__SDT_COND_SIGNED(const volatile wchar_t, wchar_t)
+#if defined (__GNUC__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 4))
+/* __SDT_COND_SIGNED(char16_t) */
+/* __SDT_COND_SIGNED(char32_t) */
+#endif
+
+template<typename __sdt_E>
+struct __sdt_type<__sdt_E[]> : public __sdt_type<__sdt_E *> {};
+
+template<typename __sdt_E, size_t __sdt_N>
+struct __sdt_type<__sdt_E[__sdt_N]> : public __sdt_type<__sdt_E *> {};
+
+#elif !defined(__ASSEMBLER__)
+__extension__ extern unsigned long long __sdt_unsp;
+# define _SDT_ARGINTTYPE(x) \
+ __typeof (__builtin_choose_expr (((__builtin_classify_type (x) \
+ + 3) & -4) == 4, (x), 0U))
+# define _SDT_ARGSIGNED(x) \
+ (!__extension__ \
+ (__builtin_constant_p ((((unsigned long long) \
+ (_SDT_ARGINTTYPE (x)) __sdt_unsp) \
+ & ((unsigned long long)1 << (sizeof (unsigned long long) \
+ * __CHAR_BIT__ - 1))) == 0) \
+ || (_SDT_ARGINTTYPE (x)) -1 > (_SDT_ARGINTTYPE (x)) 0))
+# define _SDT_ARGSIZE(x) \
+ (_SDT_ARGARRAY (x) ? sizeof (void *) : sizeof (x))
+# define _SDT_ARGVAL(x) (x)
+#endif
+
+#if defined __powerpc__ || defined __powerpc64__
+# define _SDT_ARGTMPL(id) %I[id]%[id]
+#elif defined __i386__
+# define _SDT_ARGTMPL(id) %k[id] /* gcc.gnu.org/PR80115 sourceware.org/PR24541 */
+#else
+# define _SDT_ARGTMPL(id) %[id]
+#endif
+
+/* NB: gdb PR24541 highlighted an unspecified corner of the sdt.h
+ operand note format.
+
+ The named register may be a longer or shorter (!) alias for the
+ storage where the value in question is found. For example, on
+ i386, 64-bit value may be put in register pairs, and the register
+ name stored would identify just one of them. Previously, gcc was
+ asked to emit the %w[id] (16-bit alias of some registers holding
+ operands), even when a wider 32-bit value was used.
+
+ Bottom line: the byte-width given before the @ sign governs. If
+ there is a mismatch between that width and that of the named
+ register, then a sys/sdt.h note consumer may need to employ
+ architecture-specific heuristics to figure out where the compiler
+ has actually put the complete value.
+*/
+
+#ifdef __LP64__
+# define _SDT_ASM_ADDR .8byte
+#else
+# define _SDT_ASM_ADDR .4byte
+#endif
+
+/* The ia64 and s390 nop instructions take an argument. */
+#if defined(__ia64__) || defined(__s390__) || defined(__s390x__)
+#define _SDT_NOP nop 0
+#else
+#define _SDT_NOP nop
+#endif
+
+#define _SDT_NOTE_NAME "stapsdt"
+#define _SDT_NOTE_TYPE 3
+
+/* If the assembler supports the necessary feature, then we can play
+ nice with code in COMDAT sections, which comes up in C++ code.
+ Without that assembler support, some combinations of probe placements
+ in certain kinds of C++ code may produce link-time errors. */
+#include "sdt-config.h"
+#if _SDT_ASM_SECTION_AUTOGROUP_SUPPORT
+# define _SDT_ASM_AUTOGROUP "?"
+#else
+# define _SDT_ASM_AUTOGROUP ""
+#endif
+
+#define _SDT_DEF_MACROS \
+ _SDT_ASM_1(.altmacro) \
+ _SDT_ASM_1(.macro _SDT_SIGN x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.iflt \\x) \
+ _SDT_ASM_1(.ascii "-") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_1(.ascii "\x") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_SIZE x) \
+ _SDT_ASM_1(_SDT_SIZE_ %%((-(-\\x*((-\\x>0)-(-\\x<0))))>>8)) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE_ x) \
+ _SDT_ASM_3(.pushsection .note.stapsdt,"","note") \
+ _SDT_ASM_2(.ifc 8,\\x) \
+ _SDT_ASM_1(.ascii "f") \
+ _SDT_ASM_1(.endif) \
+ _SDT_ASM_1(.ascii "@") \
+ _SDT_ASM_1(.popsection) \
+ _SDT_ASM_1(.endm) \
+ _SDT_ASM_1(.macro _SDT_TYPE x) \
+ _SDT_ASM_1(_SDT_TYPE_ %%((\\x)&(0xff))) \
+ _SDT_ASM_1(.endm)
+
+#define _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(.purgem _SDT_SIGN) \
+ _SDT_ASM_1(.purgem _SDT_SIZE_) \
+ _SDT_ASM_1(.purgem _SDT_SIZE) \
+ _SDT_ASM_1(.purgem _SDT_TYPE_) \
+ _SDT_ASM_1(.purgem _SDT_TYPE)
+
+#define _SDT_ASM_BODY(provider, name, pack_args, args, ...) \
+ _SDT_DEF_MACROS \
+ _SDT_ASM_1(990: _SDT_NOP) \
+ _SDT_ASM_3( .pushsection .note.stapsdt,_SDT_ASM_AUTOGROUP,"note") \
+ _SDT_ASM_1( .balign 4) \
+ _SDT_ASM_3( .4byte 992f-991f, 994f-993f, _SDT_NOTE_TYPE) \
+ _SDT_ASM_1(991: .asciz _SDT_NOTE_NAME) \
+ _SDT_ASM_1(992: .balign 4) \
+ _SDT_ASM_1(993: _SDT_ASM_ADDR 990b) \
+ _SDT_ASM_1( _SDT_ASM_ADDR _.stapsdt.base) \
+ _SDT_SEMAPHORE(provider,name) \
+ _SDT_ASM_STRING(provider) \
+ _SDT_ASM_STRING(name) \
+ pack_args args \
+ _SDT_ASM_SUBSTR(\x00) \
+ _SDT_UNDEF_MACROS \
+ _SDT_ASM_1(994: .balign 4) \
+ _SDT_ASM_1( .popsection)
+
+#define _SDT_ASM_BASE \
+ _SDT_ASM_1(.ifndef _.stapsdt.base) \
+ _SDT_ASM_5( .pushsection .stapsdt.base,"aG","progbits", \
+ .stapsdt.base,comdat) \
+ _SDT_ASM_1( .weak _.stapsdt.base) \
+ _SDT_ASM_1( .hidden _.stapsdt.base) \
+ _SDT_ASM_1( _.stapsdt.base: .space 1) \
+ _SDT_ASM_2( .size _.stapsdt.base, 1) \
+ _SDT_ASM_1( .popsection) \
+ _SDT_ASM_1(.endif)
+
+#if defined _SDT_HAS_SEMAPHORES
+#define _SDT_SEMAPHORE(p,n) \
+ _SDT_ASM_1( _SDT_ASM_ADDR p##_##n##_semaphore)
+#else
+#define _SDT_SEMAPHORE(p,n) _SDT_ASM_1( _SDT_ASM_ADDR 0)
+#endif
+
+#define _SDT_ASM_BLANK _SDT_ASM_SUBSTR(\x20)
+#define _SDT_ASM_TEMPLATE_0 /* no arguments */
+#define _SDT_ASM_TEMPLATE_1 _SDT_ARGFMT(1)
+#define _SDT_ASM_TEMPLATE_2 _SDT_ASM_TEMPLATE_1 _SDT_ASM_BLANK _SDT_ARGFMT(2)
+#define _SDT_ASM_TEMPLATE_3 _SDT_ASM_TEMPLATE_2 _SDT_ASM_BLANK _SDT_ARGFMT(3)
+#define _SDT_ASM_TEMPLATE_4 _SDT_ASM_TEMPLATE_3 _SDT_ASM_BLANK _SDT_ARGFMT(4)
+#define _SDT_ASM_TEMPLATE_5 _SDT_ASM_TEMPLATE_4 _SDT_ASM_BLANK _SDT_ARGFMT(5)
+#define _SDT_ASM_TEMPLATE_6 _SDT_ASM_TEMPLATE_5 _SDT_ASM_BLANK _SDT_ARGFMT(6)
+#define _SDT_ASM_TEMPLATE_7 _SDT_ASM_TEMPLATE_6 _SDT_ASM_BLANK _SDT_ARGFMT(7)
+#define _SDT_ASM_TEMPLATE_8 _SDT_ASM_TEMPLATE_7 _SDT_ASM_BLANK _SDT_ARGFMT(8)
+#define _SDT_ASM_TEMPLATE_9 _SDT_ASM_TEMPLATE_8 _SDT_ASM_BLANK _SDT_ARGFMT(9)
+#define _SDT_ASM_TEMPLATE_10 _SDT_ASM_TEMPLATE_9 _SDT_ASM_BLANK _SDT_ARGFMT(10)
+#define _SDT_ASM_TEMPLATE_11 _SDT_ASM_TEMPLATE_10 _SDT_ASM_BLANK _SDT_ARGFMT(11)
+#define _SDT_ASM_TEMPLATE_12 _SDT_ASM_TEMPLATE_11 _SDT_ASM_BLANK _SDT_ARGFMT(12)
+#define _SDT_ASM_OPERANDS_0() [__sdt_dummy] "g" (0)
+#define _SDT_ASM_OPERANDS_1(arg1) _SDT_ARG(1, arg1)
+#define _SDT_ASM_OPERANDS_2(arg1, arg2) \
+ _SDT_ASM_OPERANDS_1(arg1), _SDT_ARG(2, arg2)
+#define _SDT_ASM_OPERANDS_3(arg1, arg2, arg3) \
+ _SDT_ASM_OPERANDS_2(arg1, arg2), _SDT_ARG(3, arg3)
+#define _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4) \
+ _SDT_ASM_OPERANDS_3(arg1, arg2, arg3), _SDT_ARG(4, arg4)
+#define _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5) \
+ _SDT_ASM_OPERANDS_4(arg1, arg2, arg3, arg4), _SDT_ARG(5, arg5)
+#define _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_ASM_OPERANDS_5(arg1, arg2, arg3, arg4, arg5), _SDT_ARG(6, arg6)
+#define _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_ASM_OPERANDS_6(arg1, arg2, arg3, arg4, arg5, arg6), _SDT_ARG(7, arg7)
+#define _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
+ _SDT_ASM_OPERANDS_7(arg1, arg2, arg3, arg4, arg5, arg6, arg7), \
+ _SDT_ARG(8, arg8)
+#define _SDT_ASM_OPERANDS_9(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9) \
+ _SDT_ASM_OPERANDS_8(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8), \
+ _SDT_ARG(9, arg9)
+#define _SDT_ASM_OPERANDS_10(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_ASM_OPERANDS_9(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9), \
+ _SDT_ARG(10, arg10)
+#define _SDT_ASM_OPERANDS_11(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_ASM_OPERANDS_10(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10), \
+ _SDT_ARG(11, arg11)
+#define _SDT_ASM_OPERANDS_12(arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_ASM_OPERANDS_11(arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8, arg9, arg10, arg11), \
+ _SDT_ARG(12, arg12)
+
+/* These macros can be used in C, C++, or assembly code.
+ In assembly code the arguments should use normal assembly operand syntax. */
+
+#define STAP_PROBE(provider, name) \
+ _SDT_PROBE(provider, name, 0, ())
+#define STAP_PROBE1(provider, name, arg1) \
+ _SDT_PROBE(provider, name, 1, (arg1))
+#define STAP_PROBE2(provider, name, arg1, arg2) \
+ _SDT_PROBE(provider, name, 2, (arg1, arg2))
+#define STAP_PROBE3(provider, name, arg1, arg2, arg3) \
+ _SDT_PROBE(provider, name, 3, (arg1, arg2, arg3))
+#define STAP_PROBE4(provider, name, arg1, arg2, arg3, arg4) \
+ _SDT_PROBE(provider, name, 4, (arg1, arg2, arg3, arg4))
+#define STAP_PROBE5(provider, name, arg1, arg2, arg3, arg4, arg5) \
+ _SDT_PROBE(provider, name, 5, (arg1, arg2, arg3, arg4, arg5))
+#define STAP_PROBE6(provider, name, arg1, arg2, arg3, arg4, arg5, arg6) \
+ _SDT_PROBE(provider, name, 6, (arg1, arg2, arg3, arg4, arg5, arg6))
+#define STAP_PROBE7(provider, name, arg1, arg2, arg3, arg4, arg5, arg6, arg7) \
+ _SDT_PROBE(provider, name, 7, (arg1, arg2, arg3, arg4, arg5, arg6, arg7))
+#define STAP_PROBE8(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8) \
+ _SDT_PROBE(provider, name, 8, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8))
+#define STAP_PROBE9(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9)\
+ _SDT_PROBE(provider, name, 9, (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9))
+#define STAP_PROBE10(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10) \
+ _SDT_PROBE(provider, name, 10, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10))
+#define STAP_PROBE11(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11) \
+ _SDT_PROBE(provider, name, 11, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11))
+#define STAP_PROBE12(provider,name,arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12) \
+ _SDT_PROBE(provider, name, 12, \
+ (arg1,arg2,arg3,arg4,arg5,arg6,arg7,arg8,arg9,arg10,arg11,arg12))
+
+/* This STAP_PROBEV macro can be used in variadic scenarios, where the
+ number of probe arguments is not known until compile time. Since
+ variadic macro support may vary with compiler options, you must
+ pre-#define SDT_USE_VARIADIC to enable this type of probe.
+
+ The trick to count __VA_ARGS__ was inspired by this post by
+ Laurent Deniau <laurent.deniau@cern.ch>:
+ http://groups.google.com/group/comp.std.c/msg/346fc464319b1ee5
+
+ Note that our _SDT_NARG is called with an extra 0 arg that's not
+ counted, so we don't have to worry about the behavior of macros
+ called without any arguments. */
+
+#define _SDT_NARG(...) __SDT_NARG(__VA_ARGS__, 12,11,10,9,8,7,6,5,4,3,2,1,0)
+#define __SDT_NARG(_0,_1,_2,_3,_4,_5,_6,_7,_8,_9,_10,_11,_12, N, ...) N
+#ifdef SDT_USE_VARIADIC
+#define _SDT_PROBE_N(provider, name, N, ...) \
+ _SDT_PROBE(provider, name, N, (__VA_ARGS__))
+#define STAP_PROBEV(provider, name, ...) \
+ _SDT_PROBE_N(provider, name, _SDT_NARG(0, ##__VA_ARGS__), ##__VA_ARGS__)
+#endif
+
+/* These macros are for use in asm statements. You must compile
+ with -std=gnu99 or -std=c99 to use the STAP_PROBE_ASM macro.
+
+ The STAP_PROBE_ASM macro generates a quoted string to be used in the
+ template portion of the asm statement, concatenated with strings that
+ contain the actual assembly code around the probe site.
+
+ For example:
+
+ asm ("before\n"
+ STAP_PROBE_ASM(provider, fooprobe, %eax 4(%esi))
+ "after");
+
+ emits the assembly code for "before\nafter", with a probe in between.
+ The probe arguments are the %eax register, and the value of the memory
+ word located 4 bytes past the address in the %esi register. Note that
+ because this is a simple asm, not a GNU C extended asm statement, these
+ % characters do not need to be doubled to generate literal %reg names.
+
+ In a GNU C extended asm statement, the probe arguments can be specified
+ using the macro STAP_PROBE_ASM_TEMPLATE(n) for n arguments. The paired
+ macro STAP_PROBE_ASM_OPERANDS gives the C values of these probe arguments,
+ and appears in the input operand list of the asm statement. For example:
+
+ asm ("someinsn %0,%1\n" // %0 is output operand, %1 is input operand
+ STAP_PROBE_ASM(provider, fooprobe, STAP_PROBE_ASM_TEMPLATE(3))
+ "otherinsn %[namedarg]"
+ : "r" (outvar)
+ : "g" (some_value), [namedarg] "i" (1234),
+ STAP_PROBE_ASM_OPERANDS(3, some_value, some_ptr->field, 1234));
+
+ This is just like writing:
+
+ STAP_PROBE3(provider, fooprobe, some_value, some_ptr->field, 1234));
+
+ but the probe site is right between "someinsn" and "otherinsn".
+
+ The probe arguments in STAP_PROBE_ASM can be given as assembly
+ operands instead, even inside a GNU C extended asm statement.
+ Note that these can use operand templates like %0 or %[name],
+ and likewise they must write %%reg for a literal operand of %reg. */
+
+#define _SDT_ASM_BODY_1(p,n,...) _SDT_ASM_BODY(p,n,_SDT_ASM_SUBSTR,(__VA_ARGS__))
+#define _SDT_ASM_BODY_2(p,n,...) _SDT_ASM_BODY(p,n,/*_SDT_ASM_STRING */,__VA_ARGS__)
+#define _SDT_ASM_BODY_N2(p,n,no,...) _SDT_ASM_BODY_ ## no(p,n,__VA_ARGS__)
+#define _SDT_ASM_BODY_N1(p,n,no,...) _SDT_ASM_BODY_N2(p,n,no,__VA_ARGS__)
+#define _SDT_ASM_BODY_N(p,n,...) _SDT_ASM_BODY_N1(p,n,_SDT_NARG(0, __VA_ARGS__),__VA_ARGS__)
+
+#if __STDC_VERSION__ >= 199901L
+# define STAP_PROBE_ASM(provider, name, ...) \
+ _SDT_ASM_BODY_N(provider, name, __VA_ARGS__) \
+ _SDT_ASM_BASE
+# define STAP_PROBE_ASM_OPERANDS(n, ...) _SDT_ASM_OPERANDS_##n(__VA_ARGS__)
+#else
+# define STAP_PROBE_ASM(provider, name, args) \
+ _SDT_ASM_BODY(provider, name, /* _SDT_ASM_STRING */, (args)) \
+ _SDT_ASM_BASE
+#endif
+#define STAP_PROBE_ASM_TEMPLATE(n) _SDT_ASM_TEMPLATE_##n,"use _SDT_ASM_TEMPLATE_"
+
+
+/* DTrace compatible macro names. */
+#define DTRACE_PROBE(provider,probe) \
+ STAP_PROBE(provider,probe)
+#define DTRACE_PROBE1(provider,probe,parm1) \
+ STAP_PROBE1(provider,probe,parm1)
+#define DTRACE_PROBE2(provider,probe,parm1,parm2) \
+ STAP_PROBE2(provider,probe,parm1,parm2)
+#define DTRACE_PROBE3(provider,probe,parm1,parm2,parm3) \
+ STAP_PROBE3(provider,probe,parm1,parm2,parm3)
+#define DTRACE_PROBE4(provider,probe,parm1,parm2,parm3,parm4) \
+ STAP_PROBE4(provider,probe,parm1,parm2,parm3,parm4)
+#define DTRACE_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5) \
+ STAP_PROBE5(provider,probe,parm1,parm2,parm3,parm4,parm5)
+#define DTRACE_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6) \
+ STAP_PROBE6(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6)
+#define DTRACE_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7) \
+ STAP_PROBE7(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7)
+#define DTRACE_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8) \
+ STAP_PROBE8(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8)
+#define DTRACE_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9) \
+ STAP_PROBE9(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9)
+#define DTRACE_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10) \
+ STAP_PROBE10(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10)
+#define DTRACE_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11) \
+ STAP_PROBE11(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11)
+#define DTRACE_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12) \
+ STAP_PROBE12(provider,probe,parm1,parm2,parm3,parm4,parm5,parm6,parm7,parm8,parm9,parm10,parm11,parm12)
+
+
+#endif /* sys/sdt.h */
diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/bpf/settings
index e7b9417537fb..e7b9417537fb 100644
--- a/tools/testing/selftests/powerpc/dscr/settings
+++ b/tools/testing/selftests/bpf/settings
diff --git a/tools/testing/selftests/bpf/task_local_storage_helpers.h b/tools/testing/selftests/bpf/task_local_storage_helpers.h
new file mode 100644
index 000000000000..281f86132766
--- /dev/null
+++ b/tools/testing/selftests/bpf/task_local_storage_helpers.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TASK_LOCAL_STORAGE_HELPER_H
+#define __TASK_LOCAL_STORAGE_HELPER_H
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+
+#ifndef __NR_pidfd_open
+#ifdef __alpha__
+#define __NR_pidfd_open 544
+#else
+#define __NR_pidfd_open 434
+#endif
+#endif
+
+static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
+{
+ return syscall(__NR_pidfd_open, pid, flags);
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/tcp_client.py b/tools/testing/selftests/bpf/tcp_client.py
deleted file mode 100755
index a53ed58528d6..000000000000
--- a/tools/testing/selftests/bpf/tcp_client.py
+++ /dev/null
@@ -1,50 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
- buf = b''
- while len(buf) < n:
- rem = n - len(buf)
- try: s = sock.recv(rem)
- except (socket.error) as e: return b''
- buf += s
- return buf
-
-def send(sock, s):
- total = len(s)
- count = 0
- while count < total:
- try: n = sock.send(s)
- except (socket.error) as e: n = 0
- if n == 0:
- return count;
- count += n
- return count
-
-
-serverPort = int(sys.argv[1])
-
-# create active socket
-sock = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-try:
- sock.connect(('localhost', serverPort))
-except socket.error as e:
- sys.exit(1)
-
-buf = b''
-n = 0
-while n < 1000:
- buf += b'+'
- n += 1
-
-sock.settimeout(1);
-n = send(sock, buf)
-n = read(sock, 500)
-sys.exit(0)
diff --git a/tools/testing/selftests/bpf/tcp_server.py b/tools/testing/selftests/bpf/tcp_server.py
deleted file mode 100755
index 0ca60d193bed..000000000000
--- a/tools/testing/selftests/bpf/tcp_server.py
+++ /dev/null
@@ -1,80 +0,0 @@
-#!/usr/bin/env python3
-#
-# SPDX-License-Identifier: GPL-2.0
-#
-
-import sys, os, os.path, getopt
-import socket, time
-import subprocess
-import select
-
-def read(sock, n):
- buf = b''
- while len(buf) < n:
- rem = n - len(buf)
- try: s = sock.recv(rem)
- except (socket.error) as e: return b''
- buf += s
- return buf
-
-def send(sock, s):
- total = len(s)
- count = 0
- while count < total:
- try: n = sock.send(s)
- except (socket.error) as e: n = 0
- if n == 0:
- return count;
- count += n
- return count
-
-
-SERVER_PORT = 12877
-MAX_PORTS = 2
-
-serverPort = SERVER_PORT
-serverSocket = None
-
-# create passive socket
-serverSocket = socket.socket(socket.AF_INET6, socket.SOCK_STREAM)
-
-try: serverSocket.bind(('localhost', 0))
-except socket.error as msg:
- print('bind fails: ' + str(msg))
-
-sn = serverSocket.getsockname()
-serverPort = sn[1]
-
-cmdStr = ("./tcp_client.py %d &") % (serverPort)
-os.system(cmdStr)
-
-buf = b''
-n = 0
-while n < 500:
- buf += b'.'
- n += 1
-
-serverSocket.listen(MAX_PORTS)
-readList = [serverSocket]
-
-while True:
- readyRead, readyWrite, inError = \
- select.select(readList, [], [], 2)
-
- if len(readyRead) > 0:
- waitCount = 0
- for sock in readyRead:
- if sock == serverSocket:
- (clientSocket, address) = serverSocket.accept()
- address = str(address[0])
- readList.append(clientSocket)
- else:
- sock.settimeout(1);
- s = read(sock, 1000)
- n = send(sock, buf)
- sock.close()
- serverSocket.close()
- sys.exit(0)
- else:
- print('Select timeout!')
- sys.exit(1)
diff --git a/tools/testing/selftests/bpf/test_bpftool.py b/tools/testing/selftests/bpf/test_bpftool.py
index 4fed2dc25c0a..1c2408ee1f5d 100644
--- a/tools/testing/selftests/bpf/test_bpftool.py
+++ b/tools/testing/selftests/bpf/test_bpftool.py
@@ -57,6 +57,11 @@ def default_iface(f):
return f(*args, iface, **kwargs)
return wrapper
+DMESG_EMITTING_HELPERS = [
+ "bpf_probe_write_user",
+ "bpf_trace_printk",
+ "bpf_trace_vprintk",
+ ]
class TestBpftool(unittest.TestCase):
@classmethod
@@ -67,10 +72,7 @@ class TestBpftool(unittest.TestCase):
@default_iface
def test_feature_dev_json(self, iface):
- unexpected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ unexpected_helpers = DMESG_EMITTING_HELPERS
expected_keys = [
"syscall_config",
"program_types",
@@ -94,10 +96,7 @@ class TestBpftool(unittest.TestCase):
bpftool_json(["feature", "probe"]),
bpftool_json(["feature"]),
]
- unexpected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ unexpected_helpers = DMESG_EMITTING_HELPERS
expected_keys = [
"syscall_config",
"system_config",
@@ -121,10 +120,7 @@ class TestBpftool(unittest.TestCase):
bpftool_json(["feature", "probe", "kernel", "full"]),
bpftool_json(["feature", "probe", "full"]),
]
- expected_helpers = [
- "bpf_probe_write_user",
- "bpf_trace_printk",
- ]
+ expected_helpers = DMESG_EMITTING_HELPERS
for tc in test_cases:
# Check if expected helpers are included at least once in any
@@ -157,7 +153,7 @@ class TestBpftool(unittest.TestCase):
not_full_set.add(helper)
self.assertCountEqual(full_set - not_full_set,
- {"bpf_probe_write_user", "bpf_trace_printk"})
+ set(DMESG_EMITTING_HELPERS))
self.assertCountEqual(not_full_set - full_set, set())
def test_feature_macros(self):
diff --git a/tools/testing/selftests/bpf/test_bpftool.sh b/tools/testing/selftests/bpf/test_bpftool.sh
index 66690778e36d..718f59692ccb 100755
--- a/tools/testing/selftests/bpf/test_bpftool.sh
+++ b/tools/testing/selftests/bpf/test_bpftool.sh
@@ -2,4 +2,10 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright (c) 2020 SUSE LLC.
+# 'make -C tools/testing/selftests/bpf install' will install to SCRIPT_DIR
+SCRIPT_DIR=$(dirname $(realpath $0))
+
+# 'make -C tools/testing/selftests/bpf' will install to BPFTOOL_INSTALL_PATH
+BPFTOOL_INSTALL_PATH="$SCRIPT_DIR"/tools/sbin
+export PATH=$SCRIPT_DIR:$BPFTOOL_INSTALL_PATH:$PATH
python3 -m unittest -v test_bpftool.TestBpftool
diff --git a/tools/testing/selftests/bpf/test_bpftool_build.sh b/tools/testing/selftests/bpf/test_bpftool_build.sh
index ac349a5cea7e..1453a53ed547 100755
--- a/tools/testing/selftests/bpf/test_bpftool_build.sh
+++ b/tools/testing/selftests/bpf/test_bpftool_build.sh
@@ -22,7 +22,7 @@ KDIR_ROOT_DIR=$(realpath $PWD/$SCRIPT_REL_DIR/../../../../)
cd $KDIR_ROOT_DIR
if [ ! -e tools/bpf/bpftool/Makefile ]; then
echo -e "skip: bpftool files not found!\n"
- exit 0
+ exit 4 # KSFT_SKIP=4
fi
ERROR=0
@@ -90,6 +90,10 @@ echo -e "... through kbuild\n"
if [ -f ".config" ] ; then
make_and_clean tools/bpf
+ ## "make tools/bpf" sets $(OUTPUT) to ...tools/bpf/runqslower for
+ ## runqslower, but the default (used for the "clean" target) is .output.
+ ## Let's make sure we clean runqslower's directory properly.
+ make -C tools/bpf/runqslower OUTPUT=${KDIR_ROOT_DIR}/tools/bpf/runqslower/ clean
## $OUTPUT is overwritten in kbuild Makefile, and thus cannot be passed
## down from toplevel Makefile to bpftool's Makefile.
diff --git a/tools/testing/selftests/bpf/test_bpftool_metadata.sh b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
new file mode 100755
index 000000000000..b5520692f41b
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_metadata.sh
@@ -0,0 +1,85 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+BPF_FILE_USED="metadata_used.bpf.o"
+BPF_FILE_UNUSED="metadata_unused.bpf.o"
+
+TESTNAME=bpftool_metadata
+BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
+BPF_DIR=$BPF_FS/test_$TESTNAME
+
+_cleanup()
+{
+ set +e
+ rm -rf $BPF_DIR 2> /dev/null
+}
+
+cleanup_skip()
+{
+ echo "selftests: $TESTNAME [SKIP]"
+ _cleanup
+
+ exit $ksft_skip
+}
+
+cleanup()
+{
+ if [ "$?" = 0 ]; then
+ echo "selftests: $TESTNAME [PASS]"
+ else
+ echo "selftests: $TESTNAME [FAILED]"
+ fi
+ _cleanup
+}
+
+if [ $(id -u) -ne 0 ]; then
+ echo "selftests: $TESTNAME [SKIP] Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ -z "$BPF_FS" ]; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpffs mounted"
+ exit $ksft_skip
+fi
+
+if ! bpftool version > /dev/null 2>&1; then
+ echo "selftests: $TESTNAME [SKIP] Could not run test without bpftool"
+ exit $ksft_skip
+fi
+
+set -e
+
+trap cleanup_skip EXIT
+
+mkdir $BPF_DIR
+
+trap cleanup EXIT
+
+bpftool prog load $BPF_FILE_UNUSED $BPF_DIR/unused
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "foo"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 1' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"foo","b":1}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/unused
+
+bpftool prog load $BPF_FILE_USED $BPF_DIR/used
+
+METADATA_PLAIN="$(bpftool prog)"
+echo "$METADATA_PLAIN" | grep 'a = "bar"' > /dev/null
+echo "$METADATA_PLAIN" | grep 'b = 2' > /dev/null
+
+bpftool prog --json | grep '"metadata":{"a":"bar","b":2}' > /dev/null
+
+bpftool map | grep 'metadata.rodata' > /dev/null
+
+rm $BPF_DIR/used
+
+exit 0
diff --git a/tools/testing/selftests/bpf/test_bpftool_synctypes.py b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
new file mode 100755
index 000000000000..0ed67b6b31dd
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_bpftool_synctypes.py
@@ -0,0 +1,627 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+#
+# Copyright (C) 2021 Isovalent, Inc.
+
+import argparse
+import re
+import os, sys
+
+LINUX_ROOT = os.path.abspath(os.path.join(__file__,
+ os.pardir, os.pardir, os.pardir, os.pardir, os.pardir))
+BPFTOOL_DIR = os.getenv('BPFTOOL_DIR',
+ os.path.join(LINUX_ROOT, 'tools/bpf/bpftool'))
+BPFTOOL_BASHCOMP_DIR = os.getenv('BPFTOOL_BASHCOMP_DIR',
+ os.path.join(BPFTOOL_DIR, 'bash-completion'))
+BPFTOOL_DOC_DIR = os.getenv('BPFTOOL_DOC_DIR',
+ os.path.join(BPFTOOL_DIR, 'Documentation'))
+INCLUDE_DIR = os.getenv('INCLUDE_DIR',
+ os.path.join(LINUX_ROOT, 'tools/include'))
+
+retval = 0
+
+class BlockParser(object):
+ """
+ A parser for extracting set of values from blocks such as enums.
+ @reader: a pointer to the open file to parse
+ """
+ def __init__(self, reader):
+ self.reader = reader
+
+ def search_block(self, start_marker):
+ """
+ Search for a given structure in a file.
+ @start_marker: regex marking the beginning of a structure to parse
+ """
+ offset = self.reader.tell()
+ array_start = re.search(start_marker, self.reader.read())
+ if array_start is None:
+ raise Exception('Failed to find start of block')
+ self.reader.seek(offset + array_start.start())
+
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Values to extract must be
+ on separate lines in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line or re.match(end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture and pattern.groups >= 1:
+ entries.add(capture.group(1))
+ return entries
+
+class ArrayParser(BlockParser):
+ """
+ A parser for extracting a set of values from some BPF-related arrays.
+ @reader: a pointer to the open file to parse
+ @array_name: name of the array to parse
+ """
+ end_marker = re.compile('^};')
+
+ def __init__(self, reader, array_name):
+ self.array_name = array_name
+ self.start_marker = re.compile(f'(static )?const bool {self.array_name}\[.*\] = {{\n')
+ super().__init__(reader)
+
+ def search_block(self):
+ """
+ Search for the given array in a file.
+ """
+ super().search_block(self.start_marker);
+
+ def parse(self):
+ """
+ Parse a block and return data as a dictionary. Items to extract must be
+ on separate lines in the file.
+ """
+ pattern = re.compile('\[(BPF_\w*)\]\s*= (true|false),?$')
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if line == '' or re.match(self.end_marker, line):
+ break
+ capture = pattern.search(line)
+ if capture:
+ entries |= {capture.group(1)}
+ return entries
+
+class InlineListParser(BlockParser):
+ """
+ A parser for extracting set of values from inline lists.
+ """
+ def parse(self, pattern, end_marker):
+ """
+ Parse a block and return a set of values. Multiple values to extract
+ can be on a same line in the file.
+ @pattern: pattern used to identify the values to extract
+ @end_marker: regex marking the end of the block to parse
+ """
+ entries = set()
+ while True:
+ line = self.reader.readline()
+ if not line:
+ break
+ entries.update(pattern.findall(line))
+ if re.search(end_marker, line):
+ break
+ return entries
+
+class FileExtractor(object):
+ """
+ A generic reader for extracting data from a given file. This class contains
+ several helper methods that wrap around parser objects to extract values
+ from different structures.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def __init__(self):
+ self.reader = open(self.filename, 'r')
+
+ def close(self):
+ """
+ Close the file used by the parser.
+ """
+ self.reader.close()
+
+ def reset_read(self):
+ """
+ Reset the file position indicator for this parser. This is useful when
+ parsing several structures in the file without respecting the order in
+ which those structures appear in the file.
+ """
+ self.reader.seek(0)
+
+ def get_types_from_array(self, array_name):
+ """
+ Search for and parse a list of allowed BPF_* enum members, for example:
+
+ const bool prog_type_name[] = {
+ [BPF_PROG_TYPE_UNSPEC] = true,
+ [BPF_PROG_TYPE_SOCKET_FILTER] = true,
+ [BPF_PROG_TYPE_KPROBE] = true,
+ };
+
+ Return a set of the enum members, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @array_name: name of the array to parse
+ """
+ array_parser = ArrayParser(self.reader, array_name)
+ array_parser.search_block()
+ return array_parser.parse()
+
+ def get_enum(self, enum_name):
+ """
+ Search for and parse an enum containing BPF_* members, for example:
+
+ enum bpf_prog_type {
+ BPF_PROG_TYPE_UNSPEC,
+ BPF_PROG_TYPE_SOCKET_FILTER,
+ BPF_PROG_TYPE_KPROBE,
+ };
+
+ Return a set containing all member names, for example:
+
+ {'BPF_PROG_TYPE_UNSPEC',
+ 'BPF_PROG_TYPE_SOCKET_FILTER',
+ 'BPF_PROG_TYPE_KPROBE'}
+
+ @enum_name: name of the enum to parse
+ """
+ start_marker = re.compile(f'enum {enum_name} {{\n')
+ pattern = re.compile('^\s*(BPF_\w+),?(\s+/\*.*\*/)?$')
+ end_marker = re.compile('^};')
+ parser = BlockParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def make_enum_map(self, names, enum_prefix):
+ """
+ Search for and parse an enum containing BPF_* members, just as get_enum
+ does. However, instead of just returning a set of the variant names,
+ also generate a textual representation from them by (assuming and)
+ removing a provided prefix and lowercasing the remainder. Then return a
+ dict mapping from name to textual representation.
+
+ @enum_values: a set of enum values; e.g., as retrieved by get_enum
+ @enum_prefix: the prefix to remove from each of the variants to infer
+ textual representation
+ """
+ mapping = {}
+ for name in names:
+ if not name.startswith(enum_prefix):
+ raise Exception(f"enum variant {name} does not start with {enum_prefix}")
+ text = name[len(enum_prefix):].lower()
+ mapping[name] = text
+
+ return mapping
+
+ def __get_description_list(self, start_marker, pattern, end_marker):
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+ def get_rst_list(self, block_name):
+ """
+ Search for and parse a list of type names from RST documentation, for
+ example:
+
+ | *TYPE* := {
+ | **socket** | **kprobe** |
+ | **kretprobe**
+ | }
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'\*{block_name}\* := {{')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list(self, block_name):
+ """
+ Search for and parse a list of type names from a help message in
+ bpftool, for example:
+
+ " TYPE := { socket | kprobe |\\n"
+ " kretprobe }\\n"
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'"\s*{block_name} := {{')
+ pattern = re.compile('([\w/]+) [|}]')
+ end_marker = re.compile('}')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_help_list_macro(self, macro):
+ """
+ Search for and parse a list of values from a help message starting with
+ a macro in bpftool, for example:
+
+ " " HELP_SPEC_OPTIONS " |\\n"
+ " {-f|--bpffs} | {-m|--mapcompat} | {-n|--nomount} }\\n"
+
+ Return a set containing all item names, for example:
+
+ {'-f', '--bpffs', '-m', '--mapcompat', '-n', '--nomount'}
+
+ @macro: macro starting the block, 'HELP_SPEC_OPTIONS' in the example
+ """
+ start_marker = re.compile(f'"\s*{macro}\s*" [|}}]')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]])')
+ end_marker = re.compile('}\\\\n')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+ def get_bashcomp_list(self, block_name):
+ """
+ Search for and parse a list of type names from a variable in bash
+ completion file, for example:
+
+ local BPFTOOL_PROG_LOAD_TYPES='socket kprobe \\
+ kretprobe'
+
+ Return a set containing all type names, for example:
+
+ {'socket', 'kprobe', 'kretprobe'}
+
+ @block_name: name of the blog to parse, 'TYPE' in the example
+ """
+ start_marker = re.compile(f'local {block_name}=\'')
+ pattern = re.compile('(?:.*=\')?([\w/]+)')
+ end_marker = re.compile('\'$')
+ return self.__get_description_list(start_marker, pattern, end_marker)
+
+class SourceFileExtractor(FileExtractor):
+ """
+ An abstract extractor for a source file with usage message.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_help_list_macro('HELP_SPEC_OPTIONS')
+
+class MainHeaderFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's main.h
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'main.h')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in main.h (options that apply to all
+ commands), which looks to the lists of options in other source files
+ but has different start and end markers:
+
+ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-d|--debug}"
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
+ """
+ start_marker = re.compile(f'"OPTIONS :=')
+ pattern = re.compile('([\w-]+) ?(?:\||}[ }\]"])')
+ end_marker = re.compile('#define')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+class ManSubstitutionsExtractor(SourceFileExtractor):
+ """
+ An extractor for substitutions.rst
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'substitutions.rst')
+
+ def get_common_options(self):
+ """
+ Parse the list of common options in substitutions.rst (options that
+ apply to all commands).
+
+ Return a set containing all options, such as:
+
+ {'-p', '-d', '--pretty', '--debug', '--json', '-j'}
+ """
+ start_marker = re.compile('\|COMMON_OPTIONS\| replace:: {')
+ pattern = re.compile('\*\*([\w/-]+)\*\*')
+ end_marker = re.compile('}$')
+
+ parser = InlineListParser(self.reader)
+ parser.search_block(start_marker)
+ return parser.parse(pattern, end_marker)
+
+class ProgFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's prog.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'prog.c')
+
+ def get_attach_types(self):
+ types = self.get_types_from_array('attach_types')
+ return self.make_enum_map(types, 'BPF_')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class MapFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's map.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'map.c')
+
+ def get_map_help(self):
+ return self.get_help_list('TYPE')
+
+class CgroupFileExtractor(SourceFileExtractor):
+ """
+ An extractor for bpftool's cgroup.c.
+ """
+ filename = os.path.join(BPFTOOL_DIR, 'cgroup.c')
+
+ def get_prog_attach_help(self):
+ return self.get_help_list('ATTACH_TYPE')
+
+class GenericSourceExtractor(SourceFileExtractor):
+ """
+ An extractor for generic source code files.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BpfHeaderExtractor(FileExtractor):
+ """
+ An extractor for the UAPI BPF header.
+ """
+ filename = os.path.join(INCLUDE_DIR, 'uapi/linux/bpf.h')
+
+ def __init__(self):
+ super().__init__()
+ self.attach_types = {}
+
+ def get_prog_types(self):
+ return self.get_enum('bpf_prog_type')
+
+ def get_map_type_map(self):
+ names = self.get_enum('bpf_map_type')
+ return self.make_enum_map(names, 'BPF_MAP_TYPE_')
+
+ def get_attach_type_map(self):
+ if not self.attach_types:
+ names = self.get_enum('bpf_attach_type')
+ self.attach_types = self.make_enum_map(names, 'BPF_')
+ return self.attach_types
+
+ def get_cgroup_attach_type_map(self):
+ if not self.attach_types:
+ self.get_attach_type_map()
+ return {name: text for name, text in self.attach_types.items()
+ if name.startswith('BPF_CGROUP')}
+
+class ManPageExtractor(FileExtractor):
+ """
+ An abstract extractor for an RST documentation page.
+ This class does not offer a way to set a filename, which is expected to be
+ defined in children classes.
+ """
+ def get_options(self):
+ return self.get_rst_list('OPTIONS')
+
+class ManProgExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-prog.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-prog.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManMapExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-map.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-map.rst')
+
+ def get_map_types(self):
+ return self.get_rst_list('TYPE')
+
+class ManCgroupExtractor(ManPageExtractor):
+ """
+ An extractor for bpftool-cgroup.rst.
+ """
+ filename = os.path.join(BPFTOOL_DOC_DIR, 'bpftool-cgroup.rst')
+
+ def get_attach_types(self):
+ return self.get_rst_list('ATTACH_TYPE')
+
+class ManGenericExtractor(ManPageExtractor):
+ """
+ An extractor for generic RST documentation pages.
+ """
+ filename = ""
+
+ def __init__(self, filename):
+ self.filename = os.path.join(BPFTOOL_DIR, filename)
+ super().__init__()
+
+class BashcompExtractor(FileExtractor):
+ """
+ An extractor for bpftool's bash completion file.
+ """
+ filename = os.path.join(BPFTOOL_BASHCOMP_DIR, 'bpftool')
+
+ def get_prog_attach_types(self):
+ return self.get_bashcomp_list('BPFTOOL_PROG_ATTACH_TYPES')
+
+def verify(first_set, second_set, message):
+ """
+ Print all values that differ between two sets.
+ @first_set: one set to compare
+ @second_set: another set to compare
+ @message: message to print for values belonging to only one of the sets
+ """
+ global retval
+ diff = first_set.symmetric_difference(second_set)
+ if diff:
+ print(message, diff)
+ retval = 1
+
+def main():
+ # No arguments supported at this time, but print usage for -h|--help
+ argParser = argparse.ArgumentParser(description="""
+ Verify that bpftool's code, help messages, documentation and bash
+ completion are all in sync on program types, map types, attach types, and
+ options. Also check that bpftool is in sync with the UAPI BPF header.
+ """)
+ args = argParser.parse_args()
+
+ bpf_info = BpfHeaderExtractor()
+
+ # Map types (names)
+
+ map_info = MapFileExtractor()
+ source_map_types = set(bpf_info.get_map_type_map().values())
+ source_map_types.discard('unspec')
+
+ # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED and BPF_MAP_TYPE_CGROUP_STORAGE
+ # share the same enum value and source_map_types picks
+ # BPF_MAP_TYPE_CGROUP_STORAGE_DEPRECATED/cgroup_storage_deprecated.
+ # Replace 'cgroup_storage_deprecated' with 'cgroup_storage'
+ # so it aligns with what `bpftool map help` shows.
+ source_map_types.remove('cgroup_storage_deprecated')
+ source_map_types.add('cgroup_storage')
+
+ # The same applied to BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED and
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE which share the same enum value
+ # and source_map_types picks
+ # BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE_DEPRECATED/percpu_cgroup_storage_deprecated.
+ # Replace 'percpu_cgroup_storage_deprecated' with 'percpu_cgroup_storage'
+ # so it aligns with what `bpftool map help` shows.
+ source_map_types.remove('percpu_cgroup_storage_deprecated')
+ source_map_types.add('percpu_cgroup_storage')
+
+ help_map_types = map_info.get_map_help()
+ help_map_options = map_info.get_options()
+ map_info.close()
+
+ man_map_info = ManMapExtractor()
+ man_map_options = man_map_info.get_options()
+ man_map_types = man_map_info.get_map_types()
+ man_map_info.close()
+
+ verify(source_map_types, help_map_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {MapFileExtractor.filename} (do_help() TYPE):')
+ verify(source_map_types, man_map_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_map_type) and {ManMapExtractor.filename} (TYPE):')
+ verify(help_map_options, man_map_options,
+ f'Comparing {MapFileExtractor.filename} (do_help() OPTIONS) and {ManMapExtractor.filename} (OPTIONS):')
+
+ # Attach types (names)
+
+ prog_info = ProgFileExtractor()
+ source_prog_attach_types = set(prog_info.get_attach_types().values())
+
+ help_prog_attach_types = prog_info.get_prog_attach_help()
+ help_prog_options = prog_info.get_options()
+ prog_info.close()
+
+ man_prog_info = ManProgExtractor()
+ man_prog_options = man_prog_info.get_options()
+ man_prog_attach_types = man_prog_info.get_attach_types()
+ man_prog_info.close()
+
+
+ bashcomp_info = BashcompExtractor()
+ bashcomp_prog_attach_types = bashcomp_info.get_prog_attach_types()
+ bashcomp_info.close()
+
+ verify(source_prog_attach_types, help_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ProgFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_prog_attach_types, man_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {ManProgExtractor.filename} (ATTACH_TYPE):')
+ verify(help_prog_options, man_prog_options,
+ f'Comparing {ProgFileExtractor.filename} (do_help() OPTIONS) and {ManProgExtractor.filename} (OPTIONS):')
+ verify(source_prog_attach_types, bashcomp_prog_attach_types,
+ f'Comparing {ProgFileExtractor.filename} (bpf_attach_type) and {BashcompExtractor.filename} (BPFTOOL_PROG_ATTACH_TYPES):')
+
+ # Cgroup attach types
+ source_cgroup_attach_types = set(bpf_info.get_cgroup_attach_type_map().values())
+ bpf_info.close()
+
+ cgroup_info = CgroupFileExtractor()
+ help_cgroup_attach_types = cgroup_info.get_prog_attach_help()
+ help_cgroup_options = cgroup_info.get_options()
+ cgroup_info.close()
+
+ man_cgroup_info = ManCgroupExtractor()
+ man_cgroup_options = man_cgroup_info.get_options()
+ man_cgroup_attach_types = man_cgroup_info.get_attach_types()
+ man_cgroup_info.close()
+
+ verify(source_cgroup_attach_types, help_cgroup_attach_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {CgroupFileExtractor.filename} (do_help() ATTACH_TYPE):')
+ verify(source_cgroup_attach_types, man_cgroup_attach_types,
+ f'Comparing {BpfHeaderExtractor.filename} (bpf_attach_type) and {ManCgroupExtractor.filename} (ATTACH_TYPE):')
+ verify(help_cgroup_options, man_cgroup_options,
+ f'Comparing {CgroupFileExtractor.filename} (do_help() OPTIONS) and {ManCgroupExtractor.filename} (OPTIONS):')
+
+ # Options for remaining commands
+
+ for cmd in [ 'btf', 'feature', 'gen', 'iter', 'link', 'net', 'perf', 'struct_ops', ]:
+ source_info = GenericSourceExtractor(cmd + '.c')
+ help_cmd_options = source_info.get_options()
+ source_info.close()
+
+ man_cmd_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool-' + cmd + '.rst'))
+ man_cmd_options = man_cmd_info.get_options()
+ man_cmd_info.close()
+
+ verify(help_cmd_options, man_cmd_options,
+ f'Comparing {source_info.filename} (do_help() OPTIONS) and {man_cmd_info.filename} (OPTIONS):')
+
+ source_main_info = GenericSourceExtractor('main.c')
+ help_main_options = source_main_info.get_options()
+ source_main_info.close()
+
+ man_main_info = ManGenericExtractor(os.path.join(BPFTOOL_DOC_DIR, 'bpftool.rst'))
+ man_main_options = man_main_info.get_options()
+ man_main_info.close()
+
+ verify(help_main_options, man_main_options,
+ f'Comparing {source_main_info.filename} (do_help() OPTIONS) and {man_main_info.filename} (OPTIONS):')
+
+ # Compare common options (options that apply to all commands)
+
+ main_hdr_info = MainHeaderFileExtractor()
+ source_common_options = main_hdr_info.get_common_options()
+ main_hdr_info.close()
+
+ man_substitutions = ManSubstitutionsExtractor()
+ man_common_options = man_substitutions.get_common_options()
+ man_substitutions.close()
+
+ verify(source_common_options, man_common_options,
+ f'Comparing common options from {main_hdr_info.filename} (HELP_SPEC_OPTIONS) and {man_substitutions.filename}:')
+
+ sys.exit(retval)
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/bpf/test_btf.h b/tools/testing/selftests/bpf/test_btf.h
index 2023725f1962..fb4f4714eeb4 100644
--- a/tools/testing/selftests/bpf/test_btf.h
+++ b/tools/testing/selftests/bpf/test_btf.h
@@ -4,6 +4,8 @@
#ifndef _TEST_BTF_H
#define _TEST_BTF_H
+#define BTF_END_RAW 0xdeadbeef
+
#define BTF_INFO_ENC(kind, kind_flag, vlen) \
((!!(kind_flag) << 31) | ((kind) << 24) | ((vlen) & BTF_MAX_VLEN))
@@ -39,6 +41,7 @@
#define BTF_MEMBER_ENC(name, type, bits_offset) \
(name), (type), (bits_offset)
#define BTF_ENUM_ENC(name, val) (name), (val)
+#define BTF_ENUM64_ENC(name, val_lo32, val_hi32) (name), (val_lo32), (val_hi32)
#define BTF_MEMBER_OFFSET(bitfield_size, bits_offset) \
((bitfield_size) << 24 | (bits_offset))
@@ -66,4 +69,13 @@
#define BTF_FUNC_ENC(name, func_proto) \
BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), func_proto)
+#define BTF_TYPE_FLOAT_ENC(name, sz) \
+ BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)
+
+#define BTF_DECL_TAG_ENC(value, type, component_idx) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx)
+
+#define BTF_TYPE_TAG_ENC(value, type) \
+ BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)
+
#endif /* _TEST_BTF_H */
diff --git a/tools/testing/selftests/bpf/test_cgroup_storage.c b/tools/testing/selftests/bpf/test_cgroup_storage.c
index 655729004391..0861ea60dcdd 100644
--- a/tools/testing/selftests/bpf/test_cgroup_storage.c
+++ b/tools/testing/selftests/bpf/test_cgroup_storage.c
@@ -6,8 +6,9 @@
#include <stdlib.h>
#include <sys/sysinfo.h>
-#include "bpf_rlimit.h"
+#include "bpf_util.h"
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
char bpf_log_buf[BPF_LOG_BUF_SIZE];
@@ -29,13 +30,13 @@ int main(int argc, char **argv)
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
BPF_FUNC_get_local_storage),
BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD, BPF_REG_0, BPF_REG_1, 0),
BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x1),
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
};
- size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn);
+ size_t insns_cnt = ARRAY_SIZE(prog);
int error = EXIT_FAILURE;
int map_fd, percpu_map_fd, prog_fd, cgroup_fd;
struct bpf_cgroup_storage_key key;
@@ -43,22 +44,25 @@ int main(int argc, char **argv)
unsigned long long *percpu_value;
int cpu, nproc;
- nproc = get_nprocs_conf();
+ nproc = bpf_num_possible_cpus();
percpu_value = malloc(sizeof(*percpu_value) * nproc);
if (!percpu_value) {
printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
goto err;
}
- map_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, sizeof(key),
- sizeof(value), 0, 0);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ map_fd = bpf_map_create(BPF_MAP_TYPE_CGROUP_STORAGE, NULL, sizeof(key),
+ sizeof(value), 0, NULL);
if (map_fd < 0) {
printf("Failed to create map: %s\n", strerror(errno));
goto out;
}
- percpu_map_fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE,
- sizeof(key), sizeof(value), 0, 0);
+ percpu_map_fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, NULL,
+ sizeof(key), sizeof(value), 0, NULL);
if (percpu_map_fd < 0) {
printf("Failed to create map: %s\n", strerror(errno));
goto out;
@@ -66,7 +70,7 @@ int main(int argc, char **argv)
prog[0].imm = percpu_map_fd;
prog[7].imm = map_fd;
- prog_fd = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB,
+ prog_fd = bpf_test_load_program(BPF_PROG_TYPE_CGROUP_SKB,
prog, insns_cnt, "GPL", 0,
bpf_log_buf, BPF_LOG_BUF_SIZE);
if (prog_fd < 0) {
@@ -74,22 +78,7 @@ int main(int argc, char **argv)
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to setup cgroup environment\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
/* Attach the bpf program */
if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp
index a8d2e9a87fbf..f4936834f76f 100644
--- a/tools/testing/selftests/bpf/test_cpp.cpp
+++ b/tools/testing/selftests/bpf/test_cpp.cpp
@@ -1,15 +1,107 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
#include <iostream>
+#include <unistd.h>
+#include <linux/bpf.h>
+#include <linux/btf.h>
#include <bpf/libbpf.h>
#include <bpf/bpf.h>
#include <bpf/btf.h>
#include "test_core_extern.skel.h"
-/* do nothing, just make sure we can link successfully */
+template <typename T>
+class Skeleton {
+private:
+ T *skel;
+public:
+ Skeleton(): skel(nullptr) { }
+
+ ~Skeleton() { if (skel) T::destroy(skel); }
+
+ int open(const struct bpf_object_open_opts *opts = nullptr)
+ {
+ int err;
+
+ if (skel)
+ return -EBUSY;
+
+ skel = T::open(opts);
+ err = libbpf_get_error(skel);
+ if (err) {
+ skel = nullptr;
+ return err;
+ }
+
+ return 0;
+ }
+
+ int load() { return T::load(skel); }
+
+ int attach() { return T::attach(skel); }
+
+ void detach() { return T::detach(skel); }
+
+ const T* operator->() const { return skel; }
+
+ T* operator->() { return skel; }
+
+ const T *get() const { return skel; }
+};
+
+static void dump_printf(void *ctx, const char *fmt, va_list args)
+{
+}
+
+static void try_skeleton_template()
+{
+ Skeleton<test_core_extern> skel;
+ std::string prog_name;
+ int err;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+
+ err = skel.open(&opts);
+ if (err) {
+ fprintf(stderr, "Skeleton open failed: %d\n", err);
+ return;
+ }
+
+ skel->data->kern_ver = 123;
+ skel->data->int_val = skel->data->ushort_val;
+
+ err = skel.load();
+ if (err) {
+ fprintf(stderr, "Skeleton load failed: %d\n", err);
+ return;
+ }
+
+ if (!skel->kconfig->CONFIG_BPF_SYSCALL)
+ fprintf(stderr, "Seems like CONFIG_BPF_SYSCALL isn't set?!\n");
+
+ err = skel.attach();
+ if (err) {
+ fprintf(stderr, "Skeleton attach failed: %d\n", err);
+ return;
+ }
+
+ prog_name = bpf_program__name(skel->progs.handle_sys_enter);
+ if (prog_name != "handle_sys_enter")
+ fprintf(stderr, "Unexpected program name: %s\n", prog_name.c_str());
+
+ bpf_link__destroy(skel->links.handle_sys_enter);
+ skel->links.handle_sys_enter = bpf_program__attach(skel->progs.handle_sys_enter);
+
+ skel.detach();
+
+ /* destructor will destroy underlying skeleton */
+}
int main(int argc, char *argv[])
{
+ struct btf_dump_opts opts = { };
struct test_core_extern *skel;
+ struct btf *btf;
+ int fd;
+
+ try_skeleton_template();
/* libbpf.h */
libbpf_set_print(NULL);
@@ -18,12 +110,20 @@ int main(int argc, char *argv[])
bpf_prog_get_fd_by_id(0);
/* btf.h */
- btf__new(NULL, 0);
+ btf = btf__new(NULL, 0);
+ if (!libbpf_get_error(btf))
+ btf_dump__new(btf, dump_printf, nullptr, &opts);
/* BPF skeleton */
skel = test_core_extern__open_and_load();
test_core_extern__destroy(skel);
+ fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+ if (fd < 0)
+ std::cout << "FAILED to enable stats: " << fd << std::endl;
+ else
+ ::close(fd);
+
std::cout << "DONE!" << std::endl;
return 0;
diff --git a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c b/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
deleted file mode 100644
index ed253f252cd0..000000000000
--- a/tools/testing/selftests/bpf/test_current_pid_tgid_new_ns.c
+++ /dev/null
@@ -1,159 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2020 Carlos Neira cneirabustos@gmail.com */
-#define _GNU_SOURCE
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
-#include <sys/syscall.h>
-#include <sched.h>
-#include <sys/wait.h>
-#include <sys/mount.h>
-#include "test_progs.h"
-
-#define CHECK_NEWNS(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s:FAIL:%s ", __func__, tag); \
- printf(format); \
- } else { \
- printf("%s:PASS:%s\n", __func__, tag); \
- } \
- __ret; \
-})
-
-struct bss {
- __u64 dev;
- __u64 ino;
- __u64 pid_tgid;
- __u64 user_pid_tgid;
-};
-
-int main(int argc, char **argv)
-{
- pid_t pid;
- int exit_code = 1;
- struct stat st;
-
- printf("Testing bpf_get_ns_current_pid_tgid helper in new ns\n");
-
- if (stat("/proc/self/ns/pid", &st)) {
- perror("stat failed on /proc/self/ns/pid ns\n");
- printf("%s:FAILED\n", argv[0]);
- return exit_code;
- }
-
- if (CHECK_NEWNS(unshare(CLONE_NEWPID | CLONE_NEWNS),
- "unshare CLONE_NEWPID | CLONE_NEWNS", "error errno=%d\n", errno))
- return exit_code;
-
- pid = fork();
- if (pid == -1) {
- perror("Fork() failed\n");
- printf("%s:FAILED\n", argv[0]);
- return exit_code;
- }
-
- if (pid > 0) {
- int status;
-
- usleep(5);
- waitpid(pid, &status, 0);
- return 0;
- } else {
-
- pid = fork();
- if (pid == -1) {
- perror("Fork() failed\n");
- printf("%s:FAILED\n", argv[0]);
- return exit_code;
- }
-
- if (pid > 0) {
- int status;
- waitpid(pid, &status, 0);
- return 0;
- } else {
- if (CHECK_NEWNS(mount("none", "/proc", NULL, MS_PRIVATE|MS_REC, NULL),
- "Unmounting proc", "Cannot umount proc! errno=%d\n", errno))
- return exit_code;
-
- if (CHECK_NEWNS(mount("proc", "/proc", "proc", MS_NOSUID|MS_NOEXEC|MS_NODEV, NULL),
- "Mounting proc", "Cannot mount proc! errno=%d\n", errno))
- return exit_code;
-
- const char *probe_name = "raw_tracepoint/sys_enter";
- const char *file = "test_ns_current_pid_tgid.o";
- struct bpf_link *link = NULL;
- struct bpf_program *prog;
- struct bpf_map *bss_map;
- struct bpf_object *obj;
- int exit_code = 1;
- int err, key = 0;
- struct bss bss;
- struct stat st;
- __u64 id;
-
- obj = bpf_object__open_file(file, NULL);
- if (CHECK_NEWNS(IS_ERR(obj), "obj_open", "err %ld\n", PTR_ERR(obj)))
- return exit_code;
-
- err = bpf_object__load(obj);
- if (CHECK_NEWNS(err, "obj_load", "err %d errno %d\n", err, errno))
- goto cleanup;
-
- bss_map = bpf_object__find_map_by_name(obj, "test_ns_.bss");
- if (CHECK_NEWNS(!bss_map, "find_bss_map", "failed\n"))
- goto cleanup;
-
- prog = bpf_object__find_program_by_title(obj, probe_name);
- if (CHECK_NEWNS(!prog, "find_prog", "prog '%s' not found\n",
- probe_name))
- goto cleanup;
-
- memset(&bss, 0, sizeof(bss));
- pid_t tid = syscall(SYS_gettid);
- pid_t pid = getpid();
-
- id = (__u64) tid << 32 | pid;
- bss.user_pid_tgid = id;
-
- if (CHECK_NEWNS(stat("/proc/self/ns/pid", &st),
- "stat new ns", "Failed to stat /proc/self/ns/pid errno=%d\n", errno))
- goto cleanup;
-
- bss.dev = st.st_dev;
- bss.ino = st.st_ino;
-
- err = bpf_map_update_elem(bpf_map__fd(bss_map), &key, &bss, 0);
- if (CHECK_NEWNS(err, "setting_bss", "failed to set bss : %d\n", err))
- goto cleanup;
-
- link = bpf_program__attach_raw_tracepoint(prog, "sys_enter");
- if (CHECK_NEWNS(IS_ERR(link), "attach_raw_tp", "err %ld\n",
- PTR_ERR(link))) {
- link = NULL;
- goto cleanup;
- }
-
- /* trigger some syscalls */
- usleep(1);
-
- err = bpf_map_lookup_elem(bpf_map__fd(bss_map), &key, &bss);
- if (CHECK_NEWNS(err, "set_bss", "failed to get bss : %d\n", err))
- goto cleanup;
-
- if (CHECK_NEWNS(id != bss.pid_tgid, "Compare user pid/tgid vs. bpf pid/tgid",
- "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
- goto cleanup;
-
- exit_code = 0;
- printf("%s:PASS\n", argv[0]);
-cleanup:
- if (!link) {
- bpf_link__destroy(link);
- link = NULL;
- }
- bpf_object__close(obj);
- }
- }
-}
diff --git a/tools/testing/selftests/bpf/test_dev_cgroup.c b/tools/testing/selftests/bpf/test_dev_cgroup.c
index d850fb9076b5..adeaf63cb6fa 100644
--- a/tools/testing/selftests/bpf/test_dev_cgroup.c
+++ b/tools/testing/selftests/bpf/test_dev_cgroup.c
@@ -14,9 +14,9 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
+#include "testing_helpers.h"
-#define DEV_CGROUP_PROG "./dev_cgroup.o"
+#define DEV_CGROUP_PROG "./dev_cgroup.bpf.o"
#define TEST_CGROUP "/test-bpf-based-device-cgroup/"
@@ -27,27 +27,19 @@ int main(int argc, char **argv)
int prog_fd, cgroup_fd;
__u32 prog_cnt;
- if (bpf_prog_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ if (bpf_prog_test_load(DEV_CGROUP_PROG, BPF_PROG_TYPE_CGROUP_DEVICE,
&obj, &prog_fd)) {
printf("Failed to load DEV_CGROUP program\n");
goto out;
}
- if (setup_cgroup_environment()) {
- printf("Failed to load DEV_CGROUP program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
+ cgroup_fd = cgroup_setup_and_join(TEST_CGROUP);
if (cgroup_fd < 0) {
printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
+ goto out;
}
/* Attach bpf program */
diff --git a/tools/testing/selftests/bpf/test_doc_build.sh b/tools/testing/selftests/bpf/test_doc_build.sh
new file mode 100755
index 000000000000..679cf968c7d1
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_doc_build.sh
@@ -0,0 +1,20 @@
+#!/bin/bash
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+set -e
+
+# Assume script is located under tools/testing/selftests/bpf/. We want to start
+# build attempts from the top of kernel repository.
+SCRIPT_REL_PATH=$(realpath $0)
+SCRIPT_REL_DIR=$(dirname $SCRIPT_REL_PATH)
+KDIR_ROOT_DIR=$(realpath $SCRIPT_REL_DIR/../../../../)
+SCRIPT_REL_DIR=$(dirname $(realpath --relative-to=$KDIR_ROOT_DIR $SCRIPT_REL_PATH))
+cd $KDIR_ROOT_DIR
+
+if [ ! -e $PWD/$SCRIPT_REL_DIR/Makefile ]; then
+ echo -e "skip: bpftool files not found!\n"
+ exit 4 # KSFT_SKIP=4
+fi
+
+for tgt in docs docs-clean; do
+ make -s -C $PWD/$SCRIPT_REL_DIR $tgt;
+done
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.c b/tools/testing/selftests/bpf/test_flow_dissector.c
index 01f0c634d548..571cc076dd7d 100644
--- a/tools/testing/selftests/bpf/test_flow_dissector.c
+++ b/tools/testing/selftests/bpf/test_flow_dissector.c
@@ -503,7 +503,7 @@ static int do_rx(int fd)
if (rbuf != cfg_payload_char)
error(1, 0, "recv: payload mismatch");
num++;
- };
+ }
return num;
}
diff --git a/tools/testing/selftests/bpf/test_flow_dissector.sh b/tools/testing/selftests/bpf/test_flow_dissector.sh
index 174b72a64a4c..4b298863797a 100755
--- a/tools/testing/selftests/bpf/test_flow_dissector.sh
+++ b/tools/testing/selftests/bpf/test_flow_dissector.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
#
# Load BPF flow dissector and verify it correctly dissects traffic
+
+BPF_FILE="bpf_flow.bpf.o"
export TESTNAME=test_flow_dissector
unmount=0
@@ -22,26 +24,26 @@ if [[ -z $(ip netns identify $$) ]]; then
if bpftool="$(which bpftool)"; then
echo "Testing global flow dissector..."
- $bpftool prog loadall ./bpf_flow.o /sys/fs/bpf/flow \
+ $bpftool prog loadall $BPF_FILE /sys/fs/bpf/flow \
type flow_dissector
if ! unshare --net $bpftool prog attach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ /sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Unexpected unsuccessful attach in namespace" >&2
err=1
fi
- $bpftool prog attach pinned /sys/fs/bpf/flow/flow_dissector \
+ $bpftool prog attach pinned /sys/fs/bpf/flow/_dissect \
flow_dissector
if unshare --net $bpftool prog attach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ /sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Unexpected successful attach in namespace" >&2
err=1
fi
if ! $bpftool prog detach pinned \
- /sys/fs/bpf/flow/flow_dissector flow_dissector; then
+ /sys/fs/bpf/flow/_dissect flow_dissector; then
echo "Failed to detach flow dissector" >&2
err=1
fi
@@ -95,7 +97,7 @@ else
fi
# Attach BPF program
-./flow_dissector_load -p bpf_flow.o -s flow_dissector
+./flow_dissector_load -p $BPF_FILE -s _dissect
# Setup
tc qdisc add dev lo ingress
@@ -115,6 +117,14 @@ tc filter add dev lo parent ffff: protocol ip pref 1337 flower ip_proto \
# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
./test_flow_dissector -i 4 -f 10
+echo "Testing IPv4 from 127.0.0.127 (fallback to generic dissector)..."
+# Send 10 IPv4/UDP packets from port 8. Filter should not drop any.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 8
+# Send 10 IPv4/UDP packets from port 9. Filter should drop all.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 9 -F
+# Send 10 IPv4/UDP packets from port 10. Filter should not drop any.
+./test_flow_dissector -i 4 -S 127.0.0.127 -f 10
+
echo "Testing IPIP..."
# Send 10 IPv4/IPv4/UDP packets from port 8. Filter should not drop any.
./with_addr.sh ./with_tunnels.sh ./test_flow_dissector -o 4 -e bare -i 4 \
diff --git a/tools/testing/selftests/bpf/test_ftrace.sh b/tools/testing/selftests/bpf/test_ftrace.sh
index 20de7bb873bc..f5109eb0e951 100755
--- a/tools/testing/selftests/bpf/test_ftrace.sh
+++ b/tools/testing/selftests/bpf/test_ftrace.sh
@@ -1,6 +1,11 @@
#!/bin/bash
-TR=/sys/kernel/debug/tracing/
+if [[ -e /sys/kernel/tracing/trace ]]; then
+ TR=/sys/kernel/tracing/
+else
+ TR=/sys/kernel/debug/tracing/
+fi
+
clear_trace() { # reset trace output
echo > $TR/trace
}
diff --git a/tools/testing/selftests/bpf/test_kmod.sh b/tools/testing/selftests/bpf/test_kmod.sh
index 9df0d2ac45f8..50dca53ac536 100755
--- a/tools/testing/selftests/bpf/test_kmod.sh
+++ b/tools/testing/selftests/bpf/test_kmod.sh
@@ -1,6 +1,11 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Usage:
+# ./test_kmod.sh [module_param]...
+# Ex.: ./test_kmod.sh test_range=1,3
+# All the parameters are passed to the kernel module.
+
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -10,7 +15,13 @@ if [ "$(id -u)" != "0" ]; then
exit $ksft_skip
fi
-SRC_TREE=../../../../
+if [ "$building_out_of_srctree" ]; then
+ # We are in linux-build/kselftest/bpf
+ OUTPUT=../../
+else
+ # We are in linux/tools/testing/selftests/bpf
+ OUTPUT=../../../../
+fi
test_run()
{
@@ -18,17 +29,18 @@ test_run()
sysctl -w net.core.bpf_jit_harden=$2 2>&1 > /dev/null
echo "[ JIT enabled:$1 hardened:$2 ]"
+ shift 2
dmesg -C
- if [ -f ${SRC_TREE}/lib/test_bpf.ko ]; then
- insmod ${SRC_TREE}/lib/test_bpf.ko 2> /dev/null
+ if [ -f ${OUTPUT}/lib/test_bpf.ko ]; then
+ insmod ${OUTPUT}/lib/test_bpf.ko "$@" 2> /dev/null
if [ $? -ne 0 ]; then
rc=1
fi
else
# Use modprobe dry run to check for missing test_bpf module
- if ! /sbin/modprobe -q -n test_bpf; then
+ if ! /sbin/modprobe -q -n test_bpf "$@"; then
echo "test_bpf: [SKIP]"
- elif /sbin/modprobe -q test_bpf; then
+ elif /sbin/modprobe -q test_bpf "$@"; then
echo "test_bpf: ok"
else
echo "test_bpf: [FAIL]"
@@ -53,9 +65,9 @@ test_restore()
rc=0
test_save
-test_run 0 0
-test_run 1 0
-test_run 1 1
-test_run 1 2
+test_run 0 0 "$@"
+test_run 1 0 "$@"
+test_run 1 1 "$@"
+test_run 1 2 "$@"
test_restore
exit $rc
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2.sh b/tools/testing/selftests/bpf/test_lirc_mode2.sh
index ec4e15948e40..5252b91f48a1 100755
--- a/tools/testing/selftests/bpf/test_lirc_mode2.sh
+++ b/tools/testing/selftests/bpf/test_lirc_mode2.sh
@@ -3,6 +3,7 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+ret=$ksft_skip
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -25,7 +26,7 @@ do
fi
done
-if [ -n $LIRCDEV ];
+if [ -n "$LIRCDEV" ];
then
TYPE=lirc_mode2
./test_lirc_mode2_user $LIRCDEV $INPUTDEV
@@ -36,3 +37,5 @@ then
echo -e ${GREEN}"PASS: $TYPE"${NC}
fi
fi
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_lirc_mode2_user.c b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
index fb5fd6841ef3..4694422aa76c 100644
--- a/tools/testing/selftests/bpf/test_lirc_mode2_user.c
+++ b/tools/testing/selftests/bpf/test_lirc_mode2_user.c
@@ -28,7 +28,6 @@
// 5. We can read keycode from same /dev/lirc device
#include <linux/bpf.h>
-#include <linux/lirc.h>
#include <linux/input.h>
#include <errno.h>
#include <stdio.h>
@@ -45,6 +44,8 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
+#include "testing_helpers.h"
+
int main(int argc, char **argv)
{
struct bpf_object *obj;
@@ -58,8 +59,8 @@ int main(int argc, char **argv)
return 2;
}
- ret = bpf_prog_load("test_lirc_mode2_kern.o",
- BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
+ ret = bpf_prog_test_load("test_lirc_mode2_kern.bpf.o",
+ BPF_PROG_TYPE_LIRC_MODE2, &obj, &progfd);
if (ret) {
printf("Failed to load bpf program\n");
return 1;
diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c
new file mode 100644
index 000000000000..524c38e9cde4
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_loader.c
@@ -0,0 +1,736 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include <linux/capability.h>
+#include <stdlib.h>
+#include <test_progs.h>
+#include <bpf/btf.h>
+
+#include "autoconf_helper.h"
+#include "unpriv_helpers.h"
+#include "cap_helpers.h"
+
+#define str_has_pfx(str, pfx) \
+ (strncmp(str, pfx, __builtin_constant_p(pfx) ? sizeof(pfx) - 1 : strlen(pfx)) == 0)
+
+#define TEST_LOADER_LOG_BUF_SZ 2097152
+
+#define TEST_TAG_EXPECT_FAILURE "comment:test_expect_failure"
+#define TEST_TAG_EXPECT_SUCCESS "comment:test_expect_success"
+#define TEST_TAG_EXPECT_MSG_PFX "comment:test_expect_msg="
+#define TEST_TAG_EXPECT_FAILURE_UNPRIV "comment:test_expect_failure_unpriv"
+#define TEST_TAG_EXPECT_SUCCESS_UNPRIV "comment:test_expect_success_unpriv"
+#define TEST_TAG_EXPECT_MSG_PFX_UNPRIV "comment:test_expect_msg_unpriv="
+#define TEST_TAG_LOG_LEVEL_PFX "comment:test_log_level="
+#define TEST_TAG_PROG_FLAGS_PFX "comment:test_prog_flags="
+#define TEST_TAG_DESCRIPTION_PFX "comment:test_description="
+#define TEST_TAG_RETVAL_PFX "comment:test_retval="
+#define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv="
+#define TEST_TAG_AUXILIARY "comment:test_auxiliary"
+#define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv"
+#define TEST_BTF_PATH "comment:test_btf_path="
+
+/* Warning: duplicated in bpf_misc.h */
+#define POINTER_VALUE 0xcafe4all
+#define TEST_DATA_LEN 64
+
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+#define EFFICIENT_UNALIGNED_ACCESS 1
+#else
+#define EFFICIENT_UNALIGNED_ACCESS 0
+#endif
+
+static int sysctl_unpriv_disabled = -1;
+
+enum mode {
+ PRIV = 1,
+ UNPRIV = 2
+};
+
+struct test_subspec {
+ char *name;
+ bool expect_failure;
+ const char **expect_msgs;
+ size_t expect_msg_cnt;
+ int retval;
+ bool execute;
+};
+
+struct test_spec {
+ const char *prog_name;
+ struct test_subspec priv;
+ struct test_subspec unpriv;
+ const char *btf_custom_path;
+ int log_level;
+ int prog_flags;
+ int mode_mask;
+ bool auxiliary;
+ bool valid;
+};
+
+static int tester_init(struct test_loader *tester)
+{
+ if (!tester->log_buf) {
+ tester->log_buf_sz = TEST_LOADER_LOG_BUF_SZ;
+ tester->log_buf = calloc(tester->log_buf_sz, 1);
+ if (!ASSERT_OK_PTR(tester->log_buf, "tester_log_buf"))
+ return -ENOMEM;
+ }
+
+ return 0;
+}
+
+void test_loader_fini(struct test_loader *tester)
+{
+ if (!tester)
+ return;
+
+ free(tester->log_buf);
+}
+
+static void free_test_spec(struct test_spec *spec)
+{
+ free(spec->priv.name);
+ free(spec->unpriv.name);
+ free(spec->priv.expect_msgs);
+ free(spec->unpriv.expect_msgs);
+
+ spec->priv.name = NULL;
+ spec->unpriv.name = NULL;
+ spec->priv.expect_msgs = NULL;
+ spec->unpriv.expect_msgs = NULL;
+}
+
+static int push_msg(const char *msg, struct test_subspec *subspec)
+{
+ void *tmp;
+
+ tmp = realloc(subspec->expect_msgs, (1 + subspec->expect_msg_cnt) * sizeof(void *));
+ if (!tmp) {
+ ASSERT_FAIL("failed to realloc memory for messages\n");
+ return -ENOMEM;
+ }
+ subspec->expect_msgs = tmp;
+ subspec->expect_msgs[subspec->expect_msg_cnt++] = msg;
+
+ return 0;
+}
+
+static int parse_int(const char *str, int *val, const char *name)
+{
+ char *end;
+ long tmp;
+
+ errno = 0;
+ if (str_has_pfx(str, "0x"))
+ tmp = strtol(str + 2, &end, 16);
+ else
+ tmp = strtol(str, &end, 10);
+ if (errno || end[0] != '\0') {
+ PRINT_FAIL("failed to parse %s from '%s'\n", name, str);
+ return -EINVAL;
+ }
+ *val = tmp;
+ return 0;
+}
+
+static int parse_retval(const char *str, int *val, const char *name)
+{
+ struct {
+ char *name;
+ int val;
+ } named_values[] = {
+ { "INT_MIN" , INT_MIN },
+ { "POINTER_VALUE", POINTER_VALUE },
+ { "TEST_DATA_LEN", TEST_DATA_LEN },
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(named_values); ++i) {
+ if (strcmp(str, named_values[i].name) != 0)
+ continue;
+ *val = named_values[i].val;
+ return 0;
+ }
+
+ return parse_int(str, val, name);
+}
+
+static void update_flags(int *flags, int flag, bool clear)
+{
+ if (clear)
+ *flags &= ~flag;
+ else
+ *flags |= flag;
+}
+
+/* Uses btf_decl_tag attributes to describe the expected test
+ * behavior, see bpf_misc.h for detailed description of each attribute
+ * and attribute combinations.
+ */
+static int parse_test_spec(struct test_loader *tester,
+ struct bpf_object *obj,
+ struct bpf_program *prog,
+ struct test_spec *spec)
+{
+ const char *description = NULL;
+ bool has_unpriv_result = false;
+ bool has_unpriv_retval = false;
+ int func_id, i, err = 0;
+ struct btf *btf;
+
+ memset(spec, 0, sizeof(*spec));
+
+ spec->prog_name = bpf_program__name(prog);
+ spec->prog_flags = testing_prog_flags();
+
+ btf = bpf_object__btf(obj);
+ if (!btf) {
+ ASSERT_FAIL("BPF object has no BTF");
+ return -EINVAL;
+ }
+
+ func_id = btf__find_by_name_kind(btf, spec->prog_name, BTF_KIND_FUNC);
+ if (func_id < 0) {
+ ASSERT_FAIL("failed to find FUNC BTF type for '%s'", spec->prog_name);
+ return -EINVAL;
+ }
+
+ for (i = 1; i < btf__type_cnt(btf); i++) {
+ const char *s, *val, *msg;
+ const struct btf_type *t;
+ bool clear;
+ int flags;
+
+ t = btf__type_by_id(btf, i);
+ if (!btf_is_decl_tag(t))
+ continue;
+
+ if (t->type != func_id || btf_decl_tag(t)->component_idx != -1)
+ continue;
+
+ s = btf__str_by_offset(btf, t->name_off);
+ if (str_has_pfx(s, TEST_TAG_DESCRIPTION_PFX)) {
+ description = s + sizeof(TEST_TAG_DESCRIPTION_PFX) - 1;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE) == 0) {
+ spec->priv.expect_failure = true;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS) == 0) {
+ spec->priv.expect_failure = false;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_EXPECT_FAILURE_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = true;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
+ } else if (strcmp(s, TEST_TAG_EXPECT_SUCCESS_UNPRIV) == 0) {
+ spec->unpriv.expect_failure = false;
+ spec->mode_mask |= UNPRIV;
+ has_unpriv_result = true;
+ } else if (strcmp(s, TEST_TAG_AUXILIARY) == 0) {
+ spec->auxiliary = true;
+ spec->mode_mask |= PRIV;
+ } else if (strcmp(s, TEST_TAG_AUXILIARY_UNPRIV) == 0) {
+ spec->auxiliary = true;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX) - 1;
+ err = push_msg(msg, &spec->priv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_EXPECT_MSG_PFX_UNPRIV)) {
+ msg = s + sizeof(TEST_TAG_EXPECT_MSG_PFX_UNPRIV) - 1;
+ err = push_msg(msg, &spec->unpriv);
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX) - 1;
+ err = parse_retval(val, &spec->priv.retval, "__retval");
+ if (err)
+ goto cleanup;
+ spec->priv.execute = true;
+ spec->mode_mask |= PRIV;
+ } else if (str_has_pfx(s, TEST_TAG_RETVAL_PFX_UNPRIV)) {
+ val = s + sizeof(TEST_TAG_RETVAL_PFX_UNPRIV) - 1;
+ err = parse_retval(val, &spec->unpriv.retval, "__retval_unpriv");
+ if (err)
+ goto cleanup;
+ spec->mode_mask |= UNPRIV;
+ spec->unpriv.execute = true;
+ has_unpriv_retval = true;
+ } else if (str_has_pfx(s, TEST_TAG_LOG_LEVEL_PFX)) {
+ val = s + sizeof(TEST_TAG_LOG_LEVEL_PFX) - 1;
+ err = parse_int(val, &spec->log_level, "test log level");
+ if (err)
+ goto cleanup;
+ } else if (str_has_pfx(s, TEST_TAG_PROG_FLAGS_PFX)) {
+ val = s + sizeof(TEST_TAG_PROG_FLAGS_PFX) - 1;
+
+ clear = val[0] == '!';
+ if (clear)
+ val++;
+
+ if (strcmp(val, "BPF_F_STRICT_ALIGNMENT") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_STRICT_ALIGNMENT, clear);
+ } else if (strcmp(val, "BPF_F_ANY_ALIGNMENT") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_ANY_ALIGNMENT, clear);
+ } else if (strcmp(val, "BPF_F_TEST_RND_HI32") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_RND_HI32, clear);
+ } else if (strcmp(val, "BPF_F_TEST_STATE_FREQ") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_STATE_FREQ, clear);
+ } else if (strcmp(val, "BPF_F_SLEEPABLE") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_SLEEPABLE, clear);
+ } else if (strcmp(val, "BPF_F_XDP_HAS_FRAGS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_XDP_HAS_FRAGS, clear);
+ } else if (strcmp(val, "BPF_F_TEST_REG_INVARIANTS") == 0) {
+ update_flags(&spec->prog_flags, BPF_F_TEST_REG_INVARIANTS, clear);
+ } else /* assume numeric value */ {
+ err = parse_int(val, &flags, "test prog flags");
+ if (err)
+ goto cleanup;
+ update_flags(&spec->prog_flags, flags, clear);
+ }
+ } else if (str_has_pfx(s, TEST_BTF_PATH)) {
+ spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1;
+ }
+ }
+
+ if (spec->mode_mask == 0)
+ spec->mode_mask = PRIV;
+
+ if (!description)
+ description = spec->prog_name;
+
+ if (spec->mode_mask & PRIV) {
+ spec->priv.name = strdup(description);
+ if (!spec->priv.name) {
+ PRINT_FAIL("failed to allocate memory for priv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ }
+
+ if (spec->mode_mask & UNPRIV) {
+ int descr_len = strlen(description);
+ const char *suffix = " @unpriv";
+ char *name;
+
+ name = malloc(descr_len + strlen(suffix) + 1);
+ if (!name) {
+ PRINT_FAIL("failed to allocate memory for unpriv.name\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+
+ strcpy(name, description);
+ strcpy(&name[descr_len], suffix);
+ spec->unpriv.name = name;
+ }
+
+ if (spec->mode_mask & (PRIV | UNPRIV)) {
+ if (!has_unpriv_result)
+ spec->unpriv.expect_failure = spec->priv.expect_failure;
+
+ if (!has_unpriv_retval) {
+ spec->unpriv.retval = spec->priv.retval;
+ spec->unpriv.execute = spec->priv.execute;
+ }
+
+ if (!spec->unpriv.expect_msgs) {
+ size_t sz = spec->priv.expect_msg_cnt * sizeof(void *);
+
+ spec->unpriv.expect_msgs = malloc(sz);
+ if (!spec->unpriv.expect_msgs) {
+ PRINT_FAIL("failed to allocate memory for unpriv.expect_msgs\n");
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ memcpy(spec->unpriv.expect_msgs, spec->priv.expect_msgs, sz);
+ spec->unpriv.expect_msg_cnt = spec->priv.expect_msg_cnt;
+ }
+ }
+
+ spec->valid = true;
+
+ return 0;
+
+cleanup:
+ free_test_spec(spec);
+ return err;
+}
+
+static void prepare_case(struct test_loader *tester,
+ struct test_spec *spec,
+ struct bpf_object *obj,
+ struct bpf_program *prog)
+{
+ int min_log_level = 0, prog_flags;
+
+ if (env.verbosity > VERBOSE_NONE)
+ min_log_level = 1;
+ if (env.verbosity > VERBOSE_VERY)
+ min_log_level = 2;
+
+ bpf_program__set_log_buf(prog, tester->log_buf, tester->log_buf_sz);
+
+ /* Make sure we set at least minimal log level, unless test requires
+ * even higher level already. Make sure to preserve independent log
+ * level 4 (verifier stats), though.
+ */
+ if ((spec->log_level & 3) < min_log_level)
+ bpf_program__set_log_level(prog, (spec->log_level & 4) | min_log_level);
+ else
+ bpf_program__set_log_level(prog, spec->log_level);
+
+ prog_flags = bpf_program__flags(prog);
+ bpf_program__set_flags(prog, prog_flags | spec->prog_flags);
+
+ tester->log_buf[0] = '\0';
+ tester->next_match_pos = 0;
+}
+
+static void emit_verifier_log(const char *log_buf, bool force)
+{
+ if (!force && env.verbosity == VERBOSE_NONE)
+ return;
+ fprintf(stdout, "VERIFIER LOG:\n=============\n%s=============\n", log_buf);
+}
+
+static void validate_case(struct test_loader *tester,
+ struct test_subspec *subspec,
+ struct bpf_object *obj,
+ struct bpf_program *prog,
+ int load_err)
+{
+ int i, j;
+
+ for (i = 0; i < subspec->expect_msg_cnt; i++) {
+ char *match;
+ const char *expect_msg;
+
+ expect_msg = subspec->expect_msgs[i];
+
+ match = strstr(tester->log_buf + tester->next_match_pos, expect_msg);
+ if (!ASSERT_OK_PTR(match, "expect_msg")) {
+ /* if we are in verbose mode, we've already emitted log */
+ if (env.verbosity == VERBOSE_NONE)
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ for (j = 0; j < i; j++)
+ fprintf(stderr,
+ "MATCHED MSG: '%s'\n", subspec->expect_msgs[j]);
+ fprintf(stderr, "EXPECTED MSG: '%s'\n", expect_msg);
+ return;
+ }
+
+ tester->next_match_pos = match - tester->log_buf + strlen(expect_msg);
+ }
+}
+
+struct cap_state {
+ __u64 old_caps;
+ bool initialized;
+};
+
+static int drop_capabilities(struct cap_state *caps)
+{
+ const __u64 caps_to_drop = (1ULL << CAP_SYS_ADMIN | 1ULL << CAP_NET_ADMIN |
+ 1ULL << CAP_PERFMON | 1ULL << CAP_BPF);
+ int err;
+
+ err = cap_disable_effective(caps_to_drop, &caps->old_caps);
+ if (err) {
+ PRINT_FAIL("failed to drop capabilities: %i, %s\n", err, strerror(err));
+ return err;
+ }
+
+ caps->initialized = true;
+ return 0;
+}
+
+static int restore_capabilities(struct cap_state *caps)
+{
+ int err;
+
+ if (!caps->initialized)
+ return 0;
+
+ err = cap_enable_effective(caps->old_caps, NULL);
+ if (err)
+ PRINT_FAIL("failed to restore capabilities: %i, %s\n", err, strerror(err));
+ caps->initialized = false;
+ return err;
+}
+
+static bool can_execute_unpriv(struct test_loader *tester, struct test_spec *spec)
+{
+ if (sysctl_unpriv_disabled < 0)
+ sysctl_unpriv_disabled = get_unpriv_disabled() ? 1 : 0;
+ if (sysctl_unpriv_disabled)
+ return false;
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+ return true;
+}
+
+static bool is_unpriv_capable_map(struct bpf_map *map)
+{
+ enum bpf_map_type type;
+ __u32 flags;
+
+ type = bpf_map__type(map);
+
+ switch (type) {
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_PERCPU_HASH:
+ case BPF_MAP_TYPE_HASH_OF_MAPS:
+ flags = bpf_map__map_flags(map);
+ return !(flags & BPF_F_ZERO_SEED);
+ case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_ARRAY:
+ case BPF_MAP_TYPE_RINGBUF:
+ case BPF_MAP_TYPE_PROG_ARRAY:
+ case BPF_MAP_TYPE_CGROUP_ARRAY:
+ case BPF_MAP_TYPE_PERCPU_ARRAY:
+ case BPF_MAP_TYPE_USER_RINGBUF:
+ case BPF_MAP_TYPE_ARRAY_OF_MAPS:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ case BPF_MAP_TYPE_PERF_EVENT_ARRAY:
+ return true;
+ default:
+ return false;
+ }
+}
+
+static int do_prog_test_run(int fd_prog, int *retval, bool empty_opts)
+{
+ __u8 tmp_out[TEST_DATA_LEN << 2] = {};
+ __u8 tmp_in[TEST_DATA_LEN] = {};
+ int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = tmp_in,
+ .data_size_in = sizeof(tmp_in),
+ .data_out = tmp_out,
+ .data_size_out = sizeof(tmp_out),
+ .repeat = 1,
+ );
+
+ if (empty_opts) {
+ memset(&topts, 0, sizeof(struct bpf_test_run_opts));
+ topts.sz = sizeof(struct bpf_test_run_opts);
+ }
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
+ saved_errno = errno;
+
+ if (err) {
+ PRINT_FAIL("FAIL: Unexpected bpf_prog_test_run error: %d (%s) ",
+ saved_errno, strerror(saved_errno));
+ return err;
+ }
+
+ ASSERT_OK(0, "bpf_prog_test_run");
+ *retval = topts.retval;
+
+ return 0;
+}
+
+static bool should_do_test_run(struct test_spec *spec, struct test_subspec *subspec)
+{
+ if (!subspec->execute)
+ return false;
+
+ if (subspec->expect_failure)
+ return false;
+
+ if ((spec->prog_flags & BPF_F_ANY_ALIGNMENT) && !EFFICIENT_UNALIGNED_ACCESS) {
+ if (env.verbosity != VERBOSE_NONE)
+ printf("alignment prevents execution\n");
+ return false;
+ }
+
+ return true;
+}
+
+/* this function is forced noinline and has short generic name to look better
+ * in test_progs output (in case of a failure)
+ */
+static noinline
+void run_subtest(struct test_loader *tester,
+ struct bpf_object_open_opts *open_opts,
+ const void *obj_bytes,
+ size_t obj_byte_cnt,
+ struct test_spec *specs,
+ struct test_spec *spec,
+ bool unpriv)
+{
+ struct test_subspec *subspec = unpriv ? &spec->unpriv : &spec->priv;
+ struct bpf_program *tprog = NULL, *tprog_iter;
+ struct test_spec *spec_iter;
+ struct cap_state caps = {};
+ struct bpf_object *tobj;
+ struct bpf_map *map;
+ int retval, err, i;
+ bool should_load;
+
+ if (!test__start_subtest(subspec->name))
+ return;
+
+ if (unpriv) {
+ if (!can_execute_unpriv(tester, spec)) {
+ test__skip();
+ test__end_subtest();
+ return;
+ }
+ if (drop_capabilities(&caps)) {
+ test__end_subtest();
+ return;
+ }
+ }
+
+ /* Implicitly reset to NULL if next test case doesn't specify */
+ open_opts->btf_custom_path = spec->btf_custom_path;
+
+ tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts);
+ if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */
+ goto subtest_cleanup;
+
+ i = 0;
+ bpf_object__for_each_program(tprog_iter, tobj) {
+ spec_iter = &specs[i++];
+ should_load = false;
+
+ if (spec_iter->valid) {
+ if (strcmp(bpf_program__name(tprog_iter), spec->prog_name) == 0) {
+ tprog = tprog_iter;
+ should_load = true;
+ }
+
+ if (spec_iter->auxiliary &&
+ spec_iter->mode_mask & (unpriv ? UNPRIV : PRIV))
+ should_load = true;
+ }
+
+ bpf_program__set_autoload(tprog_iter, should_load);
+ }
+
+ prepare_case(tester, spec, tobj, tprog);
+
+ /* By default bpf_object__load() automatically creates all
+ * maps declared in the skeleton. Some map types are only
+ * allowed in priv mode. Disable autoload for such maps in
+ * unpriv mode.
+ */
+ bpf_object__for_each_map(map, tobj)
+ bpf_map__set_autocreate(map, !unpriv || is_unpriv_capable_map(map));
+
+ err = bpf_object__load(tobj);
+ if (subspec->expect_failure) {
+ if (!ASSERT_ERR(err, "unexpected_load_success")) {
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ goto tobj_cleanup;
+ }
+ } else {
+ if (!ASSERT_OK(err, "unexpected_load_failure")) {
+ emit_verifier_log(tester->log_buf, true /*force*/);
+ goto tobj_cleanup;
+ }
+ }
+
+ emit_verifier_log(tester->log_buf, false /*force*/);
+ validate_case(tester, subspec, tobj, tprog, err);
+
+ if (should_do_test_run(spec, subspec)) {
+ /* For some reason test_verifier executes programs
+ * with all capabilities restored. Do the same here.
+ */
+ if (restore_capabilities(&caps))
+ goto tobj_cleanup;
+
+ if (tester->pre_execution_cb) {
+ err = tester->pre_execution_cb(tobj);
+ if (err) {
+ PRINT_FAIL("pre_execution_cb failed: %d\n", err);
+ goto tobj_cleanup;
+ }
+ }
+
+ do_prog_test_run(bpf_program__fd(tprog), &retval,
+ bpf_program__type(tprog) == BPF_PROG_TYPE_SYSCALL ? true : false);
+ if (retval != subspec->retval && subspec->retval != POINTER_VALUE) {
+ PRINT_FAIL("Unexpected retval: %d != %d\n", retval, subspec->retval);
+ goto tobj_cleanup;
+ }
+ }
+
+tobj_cleanup:
+ bpf_object__close(tobj);
+subtest_cleanup:
+ test__end_subtest();
+ restore_capabilities(&caps);
+}
+
+static void process_subtest(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, open_opts, .object_name = skel_name);
+ struct test_spec *specs = NULL;
+ struct bpf_object *obj = NULL;
+ struct bpf_program *prog;
+ const void *obj_bytes;
+ int err, i, nr_progs;
+ size_t obj_byte_cnt;
+
+ if (tester_init(tester) < 0)
+ return; /* failed to initialize tester */
+
+ obj_bytes = elf_bytes_factory(&obj_byte_cnt);
+ obj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, &open_opts);
+ if (!ASSERT_OK_PTR(obj, "obj_open_mem"))
+ return;
+
+ nr_progs = 0;
+ bpf_object__for_each_program(prog, obj)
+ ++nr_progs;
+
+ specs = calloc(nr_progs, sizeof(struct test_spec));
+ if (!ASSERT_OK_PTR(specs, "specs_alloc"))
+ return;
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ /* ignore tests for which we can't derive test specification */
+ err = parse_test_spec(tester, obj, prog, &specs[i++]);
+ if (err)
+ PRINT_FAIL("Can't parse test spec for program '%s'\n",
+ bpf_program__name(prog));
+ }
+
+ i = 0;
+ bpf_object__for_each_program(prog, obj) {
+ struct test_spec *spec = &specs[i++];
+
+ if (!spec->valid || spec->auxiliary)
+ continue;
+
+ if (spec->mode_mask & PRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt,
+ specs, spec, false);
+ if (spec->mode_mask & UNPRIV)
+ run_subtest(tester, &open_opts, obj_bytes, obj_byte_cnt,
+ specs, spec, true);
+
+ }
+
+ for (i = 0; i < nr_progs; ++i)
+ free_test_spec(&specs[i]);
+ free(specs);
+ bpf_object__close(obj);
+}
+
+void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory)
+{
+ /* see comment in run_subtest() for why we do this function nesting */
+ process_subtest(tester, skel_name, elf_bytes_factory);
+}
diff --git a/tools/testing/selftests/bpf/test_lpm_map.c b/tools/testing/selftests/bpf/test_lpm_map.c
index 006be3963977..d98c72dc563e 100644
--- a/tools/testing/selftests/bpf/test_lpm_map.c
+++ b/tools/testing/selftests/bpf/test_lpm_map.c
@@ -26,7 +26,6 @@
#include <bpf/bpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
struct tlpm_node {
struct tlpm_node *next;
@@ -208,9 +207,11 @@ static void test_lpm_order(void)
static void test_lpm_map(int keysize)
{
- size_t i, j, n_matches, n_matches_after_delete, n_nodes, n_lookups;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ volatile size_t n_matches, n_matches_after_delete;
+ size_t i, j, n_nodes, n_lookups;
struct tlpm_node *t, *list = NULL;
- struct bpf_lpm_trie_key *key;
+ struct bpf_lpm_trie_key_u8 *key;
uint8_t *data, *value;
int r, map;
@@ -233,11 +234,11 @@ static void test_lpm_map(int keysize)
key = alloca(sizeof(*key) + keysize);
memset(key, 0, sizeof(*key) + keysize);
- map = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
sizeof(*key) + keysize,
keysize + 1,
4096,
- BPF_F_NO_PREALLOC);
+ &opts);
assert(map >= 0);
for (i = 0; i < n_nodes; ++i) {
@@ -329,8 +330,9 @@ static void test_lpm_map(int keysize)
static void test_lpm_ipaddr(void)
{
- struct bpf_lpm_trie_key *key_ipv4;
- struct bpf_lpm_trie_key *key_ipv6;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key_ipv4;
+ struct bpf_lpm_trie_key_u8 *key_ipv6;
size_t key_size_ipv4;
size_t key_size_ipv6;
int map_fd_ipv4;
@@ -342,14 +344,14 @@ static void test_lpm_ipaddr(void)
key_ipv4 = alloca(key_size_ipv4);
key_ipv6 = alloca(key_size_ipv6);
- map_fd_ipv4 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv4 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv4, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv4 >= 0);
- map_fd_ipv6 = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd_ipv6 = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size_ipv6, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd_ipv6 >= 0);
/* Fill data some IPv4 and IPv6 address ranges */
@@ -406,16 +408,13 @@ static void test_lpm_ipaddr(void)
/* Test some lookups that should not match any entry */
inet_pton(AF_INET, "10.0.0.1", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET, "11.11.11.11", key_ipv4->data);
- assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv4, key_ipv4, &value) == -ENOENT);
inet_pton(AF_INET6, "2a00:ffff::", key_ipv6->data);
- assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd_ipv6, key_ipv6, &value) == -ENOENT);
close(map_fd_ipv4);
close(map_fd_ipv6);
@@ -423,7 +422,8 @@ static void test_lpm_ipaddr(void)
static void test_lpm_delete(void)
{
- struct bpf_lpm_trie_key *key;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key;
size_t key_size;
int map_fd;
__u64 value;
@@ -431,9 +431,9 @@ static void test_lpm_delete(void)
key_size = sizeof(*key) + sizeof(__u32);
key = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE,
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL,
key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ 100, &opts);
assert(map_fd >= 0);
/* Add nodes:
@@ -471,18 +471,15 @@ static void test_lpm_delete(void)
/* remove non-existent node */
key->prefixlen = 32;
inet_pton(AF_INET, "10.0.0.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
key->prefixlen = 30; // unused prefix so far
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
key->prefixlen = 16; // same prefix as the root node
inet_pton(AF_INET, "192.255.0.0", key->data);
- assert(bpf_map_delete_elem(map_fd, key) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_delete_elem(map_fd, key) == -ENOENT);
/* assert initial lookup */
key->prefixlen = 32;
@@ -527,15 +524,15 @@ static void test_lpm_delete(void)
key->prefixlen = 32;
inet_pton(AF_INET, "192.168.128.1", key->data);
- assert(bpf_map_lookup_elem(map_fd, key, &value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, key, &value) == -ENOENT);
close(map_fd);
}
static void test_lpm_get_next_key(void)
{
- struct bpf_lpm_trie_key *key_p, *next_key_p;
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
+ struct bpf_lpm_trie_key_u8 *key_p, *next_key_p;
size_t key_size;
__u32 value = 0;
int map_fd;
@@ -544,13 +541,11 @@ static void test_lpm_get_next_key(void)
key_p = alloca(key_size);
next_key_p = alloca(key_size);
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, sizeof(value),
- 100, BPF_F_NO_PREALLOC);
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, sizeof(value), 100, &opts);
assert(map_fd >= 0);
/* empty tree. get_next_key should return ENOENT */
- assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, NULL, key_p) == -ENOENT);
/* get and verify the first key, get the second one should fail. */
key_p->prefixlen = 16;
@@ -562,8 +557,7 @@ static void test_lpm_get_next_key(void)
assert(key_p->prefixlen == 16 && key_p->data[0] == 192 &&
key_p->data[1] == 168);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should get the first one in post order. */
key_p->prefixlen = 8;
@@ -587,8 +581,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total three) */
key_p->prefixlen = 24;
@@ -611,8 +604,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total four) */
key_p->prefixlen = 24;
@@ -640,8 +632,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* Add one more element (total five) */
key_p->prefixlen = 28;
@@ -675,8 +666,7 @@ static void test_lpm_get_next_key(void)
next_key_p->data[1] == 168);
memcpy(key_p, next_key_p, key_size);
- assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_get_next_key(map_fd, key_p, next_key_p) == -ENOENT);
/* no exact matching key should return the first one in post order */
key_p->prefixlen = 22;
@@ -703,9 +693,9 @@ static void *lpm_test_command(void *arg)
{
int i, j, ret, iter, key_size;
struct lpm_mt_test_info *info = arg;
- struct bpf_lpm_trie_key *key_p;
+ struct bpf_lpm_trie_key_u8 *key_p;
- key_size = sizeof(struct bpf_lpm_trie_key) + sizeof(__u32);
+ key_size = sizeof(*key_p) + sizeof(__u32);
key_p = alloca(key_size);
for (iter = 0; iter < info->iter; iter++)
for (i = 0; i < MAX_TEST_KEYS; i++) {
@@ -727,7 +717,7 @@ static void *lpm_test_command(void *arg)
ret = bpf_map_lookup_elem(info->map_fd, key_p, &value);
assert(ret == 0 || errno == ENOENT);
} else {
- struct bpf_lpm_trie_key *next_key_p = alloca(key_size);
+ struct bpf_lpm_trie_key_u8 *next_key_p = alloca(key_size);
ret = bpf_map_get_next_key(info->map_fd, key_p, next_key_p);
assert(ret == 0 || errno == ENOENT || errno == ENOMEM);
}
@@ -753,6 +743,7 @@ static void setup_lpm_mt_test_info(struct lpm_mt_test_info *info, int map_fd)
static void test_lpm_multi_thread(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
struct lpm_mt_test_info info[4];
size_t key_size, value_size;
pthread_t thread_id[4];
@@ -761,9 +752,8 @@ static void test_lpm_multi_thread(void)
/* create a trie */
value_size = sizeof(__u32);
- key_size = sizeof(struct bpf_lpm_trie_key) + value_size;
- map_fd = bpf_create_map(BPF_MAP_TYPE_LPM_TRIE, key_size, value_size,
- 100, BPF_F_NO_PREALLOC);
+ key_size = sizeof(struct bpf_lpm_trie_key_hdr) + value_size;
+ map_fd = bpf_map_create(BPF_MAP_TYPE_LPM_TRIE, NULL, key_size, value_size, 100, &opts);
/* create 4 threads to test update, delete, lookup and get_next_key */
setup_lpm_mt_test_info(&info[0], map_fd);
@@ -787,6 +777,9 @@ int main(void)
/* we want predictable, pseudo random tests */
srand(0xf00ba1);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
test_lpm_basic();
test_lpm_order();
diff --git a/tools/testing/selftests/bpf/test_lru_map.c b/tools/testing/selftests/bpf/test_lru_map.c
index 6a5349f9eb14..4d0650cfb5cd 100644
--- a/tools/testing/selftests/bpf/test_lru_map.c
+++ b/tools/testing/selftests/bpf/test_lru_map.c
@@ -18,7 +18,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "../../../include/linux/filter.h"
#define LOCAL_FREE_TARGET (128)
@@ -28,13 +27,14 @@ static int nr_cpus;
static int create_map(int map_type, int map_flags, unsigned int size)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags);
int map_fd;
- map_fd = bpf_create_map(map_type, sizeof(unsigned long long),
- sizeof(unsigned long long), size, map_flags);
+ map_fd = bpf_map_create(map_type, NULL, sizeof(unsigned long long),
+ sizeof(unsigned long long), size, &opts);
if (map_fd == -1)
- perror("bpf_create_map");
+ perror("bpf_map_create");
return map_fd;
}
@@ -42,8 +42,6 @@ static int create_map(int map_type, int map_flags, unsigned int size)
static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
void *value)
{
- struct bpf_load_program_attr prog;
- struct bpf_create_map_attr map;
struct bpf_insn insns[] = {
BPF_LD_MAP_VALUE(BPF_REG_9, 0, 0),
BPF_LD_MAP_FD(BPF_REG_1, fd),
@@ -62,35 +60,26 @@ static int bpf_map_lookup_elem_with_ref_bit(int fd, unsigned long long key,
};
__u8 data[64] = {};
int mfd, pfd, ret, zero = 0;
- __u32 retval = 0;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = sizeof(data),
+ .repeat = 1,
+ );
- memset(&map, 0, sizeof(map));
- map.map_type = BPF_MAP_TYPE_ARRAY;
- map.key_size = sizeof(int);
- map.value_size = sizeof(unsigned long long);
- map.max_entries = 1;
-
- mfd = bpf_create_map_xattr(&map);
+ mfd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(__u64), 1, NULL);
if (mfd < 0)
return -1;
insns[0].imm = mfd;
- memset(&prog, 0, sizeof(prog));
- prog.prog_type = BPF_PROG_TYPE_SCHED_CLS;
- prog.insns = insns;
- prog.insns_cnt = ARRAY_SIZE(insns);
- prog.license = "GPL";
-
- pfd = bpf_load_program_xattr(&prog, NULL, 0);
+ pfd = bpf_prog_load(BPF_PROG_TYPE_SCHED_CLS, NULL, "GPL", insns, ARRAY_SIZE(insns), NULL);
if (pfd < 0) {
close(mfd);
return -1;
}
- ret = bpf_prog_test_run(pfd, 1, data, sizeof(data),
- NULL, NULL, &retval, NULL);
- if (ret < 0 || retval != 42) {
+ ret = bpf_prog_test_run_opts(pfd, &topts);
+ if (ret < 0 || topts.retval != 42) {
ret = -1;
} else {
assert(!bpf_map_lookup_elem(mfd, &zero, value));
@@ -186,24 +175,20 @@ static void test_lru_sanity0(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -1 &&
- errno == EINVAL);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, -1) == -EINVAL);
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -211,8 +196,7 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -228,8 +212,15 @@ static void test_lru_sanity0(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
+
+ /* lookup elem key=1 and delete it, then check it doesn't exist */
+ key = 1;
+ assert(!bpf_map_lookup_and_delete_elem(lru_map_fd, &key, &value));
+ assert(value[0] == 1234);
+
+ /* remove the same element from the expected map */
+ assert(!bpf_map_delete_elem(expected_map_fd, &key));
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -384,8 +375,7 @@ static void test_lru_sanity2(int map_type, int map_flags, unsigned int tgt_free)
end_key = 1 + batch_size;
value[0] = 4321;
for (key = 1; key < end_key; key++) {
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(!bpf_map_update_elem(lru_map_fd, &key, value,
BPF_NOEXIST));
assert(!bpf_map_lookup_elem_with_ref_bit(lru_map_fd, key, value));
@@ -565,8 +555,7 @@ static void do_test_lru_sanity5(unsigned long long last_key, int map_fd)
assert(!bpf_map_lookup_elem_with_ref_bit(map_fd, key, value));
/* Cannot find the last key because it was removed by LRU */
- assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(map_fd, &last_key, value) == -ENOENT);
}
/* Test map with only one element */
@@ -714,21 +703,18 @@ static void test_lru_sanity7(int map_type, int map_flags)
BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
@@ -736,8 +722,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and mark the ref bit to
* stop LRU from removing key=1
@@ -760,8 +745,7 @@ static void test_lru_sanity7(int map_type, int map_flags)
/* key=2 has been removed from the LRU */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -808,21 +792,18 @@ static void test_lru_sanity8(int map_type, int map_flags)
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
/* BPF_NOEXIST means: add new element if it doesn't exist */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -1
- /* key=1 already exists */
- && errno == EEXIST);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST) == -EEXIST);
+ /* key=1 already exists */
/* insert key=2 element */
/* check that key=2 is not found */
key = 2;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* BPF_EXIST means: update existing element */
- assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -1 &&
- /* key=2 is not there */
- errno == ENOENT);
+ assert(bpf_map_update_elem(lru_map_fd, &key, value, BPF_EXIST) == -ENOENT);
+ /* key=2 is not there */
assert(!bpf_map_update_elem(lru_map_fd, &key, value, BPF_NOEXIST));
assert(!bpf_map_update_elem(expected_map_fd, &key, value,
@@ -832,8 +813,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* check that key=3 is not found */
key = 3;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
/* check that key=1 can be found and do _not_ mark ref bit.
* this will be evicted on next update.
@@ -856,8 +836,7 @@ static void test_lru_sanity8(int map_type, int map_flags)
/* key=1 has been removed from the LRU */
key = 1;
- assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -1 &&
- errno == ENOENT);
+ assert(bpf_map_lookup_elem(lru_map_fd, &key, value) == -ENOENT);
assert(map_equal(lru_map_fd, expected_map_fd));
@@ -880,11 +859,14 @@ int main(int argc, char **argv)
assert(nr_cpus != -1);
printf("nr_cpus:%d\n\n", nr_cpus);
- for (f = 0; f < sizeof(map_flags) / sizeof(*map_flags); f++) {
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ for (f = 0; f < ARRAY_SIZE(map_flags); f++) {
unsigned int tgt_free = (map_flags[f] & BPF_F_NO_COMMON_LRU) ?
PERCPU_FREE_TARGET : LOCAL_FREE_TARGET;
- for (t = 0; t < sizeof(map_types) / sizeof(*map_types); t++) {
+ for (t = 0; t < ARRAY_SIZE(map_types); t++) {
test_lru_sanity0(map_types[t], map_flags[f]);
test_lru_sanity1(map_types[t], map_flags[f], tgt_free);
test_lru_sanity2(map_types[t], map_flags[f], tgt_free);
diff --git a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
index 59ea56945e6c..1e565f47aca9 100755
--- a/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
+++ b/tools/testing/selftests/bpf/test_lwt_ip_encap.sh
@@ -38,6 +38,7 @@
# ping: SRC->[encap at veth2:ingress]->GRE:decap->DST
# ping replies go DST->SRC directly
+BPF_FILE="test_lwt_ip_encap.bpf.o"
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
@@ -112,6 +113,22 @@ setup()
ip netns add "${NS2}"
ip netns add "${NS3}"
+ # rp_filter gets confused by what these tests are doing, so disable it
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${NS1} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS2} sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec ${NS3} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
+ # disable IPv6 DAD because it sometimes takes too long and fails tests
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${NS1} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS2} sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec ${NS3} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
ip link add veth1 type veth peer name veth2
ip link add veth3 type veth peer name veth4
ip link add veth5 type veth peer name veth6
@@ -236,11 +253,6 @@ setup()
ip -netns ${NS1} -6 route add ${IPv6_GRE}/128 dev veth5 via ${IPv6_6} ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_GRE}/128 dev veth7 via ${IPv6_8} ${VRF}
- # rp_filter gets confused by what these tests are doing, so disable it
- ip netns exec ${NS1} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS2} sysctl -wq net.ipv4.conf.all.rp_filter=0
- ip netns exec ${NS3} sysctl -wq net.ipv4.conf.all.rp_filter=0
-
TMPFILE=$(mktemp /tmp/test_lwt_ip_encap.XXXXXX)
sleep 1 # reduce flakiness
@@ -286,7 +298,7 @@ test_ping()
ip netns exec ${NS1} ping -c 1 -W 1 -I veth1 ${IPv4_DST} 2>&1 > /dev/null
RET=$?
elif [ "${PROTO}" == "IPv6" ] ; then
- ip netns exec ${NS1} ping6 -c 1 -W 6 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
+ ip netns exec ${NS1} ping6 -c 1 -W 1 -I veth1 ${IPv6_DST} 2>&1 > /dev/null
RET=$?
else
echo " test_ping: unknown PROTO: ${PROTO}"
@@ -362,14 +374,14 @@ test_egress()
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+ ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre dev veth1 ${VRF}
+ ${BPF_FILE} sec encap_gre dev veth1 ${VRF}
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS1} route add ${IPv4_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+ ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
ip -netns ${NS1} -6 route add ${IPv6_DST} encap bpf xmit obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth1 ${VRF}
+ ${BPF_FILE} sec encap_gre6 dev veth1 ${VRF}
else
echo " unknown encap ${ENCAP}"
TEST_STATUS=1
@@ -420,14 +432,14 @@ test_ingress()
# install replacement routes (LWT/eBPF), pings succeed
if [ "${ENCAP}" == "IPv4" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+ ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre dev veth2 ${VRF}
+ ${BPF_FILE} sec encap_gre dev veth2 ${VRF}
elif [ "${ENCAP}" == "IPv6" ] ; then
ip -netns ${NS2} route add ${IPv4_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+ ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
ip -netns ${NS2} -6 route add ${IPv6_DST} encap bpf in obj \
- test_lwt_ip_encap.o sec encap_gre6 dev veth2 ${VRF}
+ ${BPF_FILE} sec encap_gre6 dev veth2 ${VRF}
else
echo "FAIL: unknown encap ${ENCAP}"
TEST_STATUS=1
diff --git a/tools/testing/selftests/bpf/test_lwt_seg6local.sh b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
index 785eabf2a593..0efea2292d6a 100755
--- a/tools/testing/selftests/bpf/test_lwt_seg6local.sh
+++ b/tools/testing/selftests/bpf/test_lwt_seg6local.sh
@@ -23,6 +23,13 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+BPF_FILE="test_lwt_seg6local.bpf.o"
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
+readonly NS4="ns4-$(mktemp -u XXXXXX)"
+readonly NS5="ns5-$(mktemp -u XXXXXX)"
+readonly NS6="ns6-$(mktemp -u XXXXXX)"
msg="skip all tests:"
if [ $UID != 0 ]; then
@@ -41,23 +48,23 @@ cleanup()
fi
set +e
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
- ip netns del ns4 2> /dev/null
- ip netns del ns5 2> /dev/null
- ip netns del ns6 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
+ ip netns del ${NS3} 2> /dev/null
+ ip netns del ${NS4} 2> /dev/null
+ ip netns del ${NS5} 2> /dev/null
+ ip netns del ${NS6} 2> /dev/null
rm -f $TMP_FILE
}
set -e
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
-ip netns add ns4
-ip netns add ns5
-ip netns add ns6
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
+ip netns add ${NS4}
+ip netns add ${NS5}
+ip netns add ${NS6}
trap cleanup 0 2 3 6 9
@@ -67,80 +74,80 @@ ip link add veth5 type veth peer name veth6
ip link add veth7 type veth peer name veth8
ip link add veth9 type veth peer name veth10
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
-ip link set veth3 netns ns2
-ip link set veth4 netns ns3
-ip link set veth5 netns ns3
-ip link set veth6 netns ns4
-ip link set veth7 netns ns4
-ip link set veth8 netns ns5
-ip link set veth9 netns ns5
-ip link set veth10 netns ns6
-
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
-ip netns exec ns2 ip link set dev veth3 up
-ip netns exec ns3 ip link set dev veth4 up
-ip netns exec ns3 ip link set dev veth5 up
-ip netns exec ns4 ip link set dev veth6 up
-ip netns exec ns4 ip link set dev veth7 up
-ip netns exec ns5 ip link set dev veth8 up
-ip netns exec ns5 ip link set dev veth9 up
-ip netns exec ns6 ip link set dev veth10 up
-ip netns exec ns6 ip link set dev lo up
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
+ip link set veth3 netns ${NS2}
+ip link set veth4 netns ${NS3}
+ip link set veth5 netns ${NS3}
+ip link set veth6 netns ${NS4}
+ip link set veth7 netns ${NS4}
+ip link set veth8 netns ${NS5}
+ip link set veth9 netns ${NS5}
+ip link set veth10 netns ${NS6}
+
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
+ip netns exec ${NS2} ip link set dev veth3 up
+ip netns exec ${NS3} ip link set dev veth4 up
+ip netns exec ${NS3} ip link set dev veth5 up
+ip netns exec ${NS4} ip link set dev veth6 up
+ip netns exec ${NS4} ip link set dev veth7 up
+ip netns exec ${NS5} ip link set dev veth8 up
+ip netns exec ${NS5} ip link set dev veth9 up
+ip netns exec ${NS6} ip link set dev veth10 up
+ip netns exec ${NS6} ip link set dev lo up
# All link scope addresses and routes required between veths
-ip netns exec ns1 ip -6 addr add fb00::12/16 dev veth1 scope link
-ip netns exec ns1 ip -6 route add fb00::21 dev veth1 scope link
-ip netns exec ns2 ip -6 addr add fb00::21/16 dev veth2 scope link
-ip netns exec ns2 ip -6 addr add fb00::34/16 dev veth3 scope link
-ip netns exec ns2 ip -6 route add fb00::43 dev veth3 scope link
-ip netns exec ns3 ip -6 route add fb00::65 dev veth5 scope link
-ip netns exec ns3 ip -6 addr add fb00::43/16 dev veth4 scope link
-ip netns exec ns3 ip -6 addr add fb00::56/16 dev veth5 scope link
-ip netns exec ns4 ip -6 addr add fb00::65/16 dev veth6 scope link
-ip netns exec ns4 ip -6 addr add fb00::78/16 dev veth7 scope link
-ip netns exec ns4 ip -6 route add fb00::87 dev veth7 scope link
-ip netns exec ns5 ip -6 addr add fb00::87/16 dev veth8 scope link
-ip netns exec ns5 ip -6 addr add fb00::910/16 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 dev veth9 scope link
-ip netns exec ns5 ip -6 route add fb00::109 table 117 dev veth9 scope link
-ip netns exec ns6 ip -6 addr add fb00::109/16 dev veth10 scope link
-
-ip netns exec ns1 ip -6 addr add fb00::1/16 dev lo
-ip netns exec ns1 ip -6 route add fb00::6 dev veth1 via fb00::21
-
-ip netns exec ns2 ip -6 route add fb00::6 encap bpf in obj test_lwt_seg6local.o sec encap_srh dev veth2
-ip netns exec ns2 ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
-
-ip netns exec ns3 ip -6 route add fc42::1 dev veth5 via fb00::65
-ip netns exec ns3 ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec add_egr_x dev veth4
-
-ip netns exec ns4 ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec pop_egr dev veth6
-ip netns exec ns4 ip -6 addr add fc42::1 dev lo
-ip netns exec ns4 ip -6 route add fd00::3 dev veth7 via fb00::87
-
-ip netns exec ns5 ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
-ip netns exec ns5 ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj test_lwt_seg6local.o sec inspect_t dev veth8
-
-ip netns exec ns6 ip -6 addr add fb00::6/16 dev lo
-ip netns exec ns6 ip -6 addr add fd00::4/16 dev lo
-
-ip netns exec ns1 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns2 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns3 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns4 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-ip netns exec ns5 sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
-
-ip netns exec ns6 sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
-ip netns exec ns6 sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
-
-ip netns exec ns6 nc -l -6 -u -d 7330 > $TMP_FILE &
-ip netns exec ns1 bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
+ip netns exec ${NS1} ip -6 addr add fb00::12/16 dev veth1 scope link
+ip netns exec ${NS1} ip -6 route add fb00::21 dev veth1 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::21/16 dev veth2 scope link
+ip netns exec ${NS2} ip -6 addr add fb00::34/16 dev veth3 scope link
+ip netns exec ${NS2} ip -6 route add fb00::43 dev veth3 scope link
+ip netns exec ${NS3} ip -6 route add fb00::65 dev veth5 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::43/16 dev veth4 scope link
+ip netns exec ${NS3} ip -6 addr add fb00::56/16 dev veth5 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::65/16 dev veth6 scope link
+ip netns exec ${NS4} ip -6 addr add fb00::78/16 dev veth7 scope link
+ip netns exec ${NS4} ip -6 route add fb00::87 dev veth7 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::87/16 dev veth8 scope link
+ip netns exec ${NS5} ip -6 addr add fb00::910/16 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 dev veth9 scope link
+ip netns exec ${NS5} ip -6 route add fb00::109 table 117 dev veth9 scope link
+ip netns exec ${NS6} ip -6 addr add fb00::109/16 dev veth10 scope link
+
+ip netns exec ${NS1} ip -6 addr add fb00::1/16 dev lo
+ip netns exec ${NS1} ip -6 route add fb00::6 dev veth1 via fb00::21
+
+ip netns exec ${NS2} ip -6 route add fb00::6 encap bpf in obj ${BPF_FILE} sec encap_srh dev veth2
+ip netns exec ${NS2} ip -6 route add fd00::1 dev veth3 via fb00::43 scope link
+
+ip netns exec ${NS3} ip -6 route add fc42::1 dev veth5 via fb00::65
+ip netns exec ${NS3} ip -6 route add fd00::1 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec add_egr_x dev veth4
+
+ip netns exec ${NS4} ip -6 route add fd00::2 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec pop_egr dev veth6
+ip netns exec ${NS4} ip -6 addr add fc42::1 dev lo
+ip netns exec ${NS4} ip -6 route add fd00::3 dev veth7 via fb00::87
+
+ip netns exec ${NS5} ip -6 route add fd00::4 table 117 dev veth9 via fb00::109
+ip netns exec ${NS5} ip -6 route add fd00::3 encap seg6local action End.BPF endpoint obj ${BPF_FILE} sec inspect_t dev veth8
+
+ip netns exec ${NS6} ip -6 addr add fb00::6/16 dev lo
+ip netns exec ${NS6} ip -6 addr add fd00::4/16 dev lo
+
+ip netns exec ${NS1} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS2} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS3} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS4} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${NS5} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+
+ip netns exec ${NS6} sysctl net.ipv6.conf.all.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.lo.seg6_enabled=1 > /dev/null
+ip netns exec ${NS6} sysctl net.ipv6.conf.veth10.seg6_enabled=1 > /dev/null
+
+ip netns exec ${NS6} nc -l -6 -u -d 7330 > $TMP_FILE &
+ip netns exec ${NS1} bash -c "echo 'foobar' | nc -w0 -6 -u -p 2121 -s fb00::1 fb00::6 7330"
sleep 5 # wait enough time to ensure the UDP datagram arrived to the last segment
-kill -INT $!
+kill -TERM $!
if [[ $(< $TMP_FILE) != "foobar" ]]; then
exit 1
diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 6a12a0e01e07..dfbab214f4d1 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -23,24 +23,23 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "test_maps.h"
+#include "testing_helpers.h"
#ifndef ENOTSUPP
#define ENOTSUPP 524
#endif
-static int skips;
+int skips;
-static int map_flags;
+static struct bpf_map_create_opts map_opts = { .sz = sizeof(map_opts) };
static void test_hashmap(unsigned int task, void *data)
{
long long key, next_key, first_key, value;
int fd;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -53,23 +52,30 @@ static void test_hashmap(unsigned int task, void *data)
value = 0;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, &value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. */
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 1234);
key = 2;
+ value = 1234;
+ /* Insert key=2 element. */
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
+
+ /* Check that key=2 matches the value and delete it */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, &value) == 0 && value == 1234);
+
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -80,7 +86,7 @@ static void test_hashmap(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Update existing element, though the map is full. */
@@ -89,12 +95,12 @@ static void test_hashmap(unsigned int task, void *data)
key = 2;
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
key = 3;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
key = 0;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -104,7 +110,7 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
(next_key == 1 || next_key == 2) &&
(next_key != first_key));
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete both elements. */
@@ -112,13 +118,13 @@ static void test_hashmap(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
@@ -130,8 +136,7 @@ static void test_hashmap_sizes(unsigned int task, void *data)
for (i = 1; i <= 512; i <<= 1)
for (j = 1; j <= 1 << 18; j <<= 1) {
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j,
- 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, i, j, 2, &map_opts);
if (fd < 0) {
if (errno == ENOMEM)
return;
@@ -152,8 +157,8 @@ static void test_hashmap_percpu(unsigned int task, void *data)
int expected_key_mask = 0;
int fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_HASH, sizeof(key),
- sizeof(bpf_percpu(value, 0)), 2, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_HASH, NULL, sizeof(key),
+ sizeof(bpf_percpu(value, 0)), 2, &map_opts);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
@@ -166,15 +171,25 @@ static void test_hashmap_percpu(unsigned int task, void *data)
/* Insert key=1 element. */
assert(!(expected_key_mask & key));
assert(bpf_map_update_elem(fd, &key, value, BPF_ANY) == 0);
+
+ /* Lookup and delete elem key=1 and check value. */
+ assert(bpf_map_lookup_and_delete_elem(fd, &key, value) == 0 &&
+ bpf_percpu(value,0) == 100);
+
+ for (i = 0; i < nr_cpus; i++)
+ bpf_percpu(value,i) = i + 100;
+
+ /* Insert key=1 element which should not exist. */
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == 0);
expected_key_mask |= key;
/* BPF_NOEXIST means add new element if it doesn't exist. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
/* key=1 already exists. */
errno == EEXIST);
/* -1 is an invalid flag. */
- assert(bpf_map_update_elem(fd, &key, value, -1) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, -1) < 0 &&
errno == EINVAL);
/* Check that key=1 can be found. Value could be 0 if the lookup
@@ -186,10 +201,10 @@ static void test_hashmap_percpu(unsigned int task, void *data)
key = 2;
/* Check that key=2 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, value) < 0 && errno == ENOENT);
/* BPF_EXIST means update existing element. */
- assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) < 0 &&
/* key=2 is not there. */
errno == ENOENT);
@@ -202,11 +217,11 @@ static void test_hashmap_percpu(unsigned int task, void *data)
* inserted due to max_entries limit.
*/
key = 0;
- assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Check that key = 0 doesn't exist. */
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &first_key) == 0 &&
@@ -237,32 +252,33 @@ static void test_hashmap_percpu(unsigned int task, void *data)
assert(bpf_map_delete_elem(fd, &key) == 0);
key = 2;
assert(bpf_map_delete_elem(fd, &key) == 0);
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == ENOENT);
key = 0;
/* Check that map is empty. */
- assert(bpf_map_get_next_key(fd, NULL, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, NULL, &next_key) < 0 &&
errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &key, &next_key) < 0 &&
errno == ENOENT);
close(fd);
}
+#define VALUE_SIZE 3
static int helper_fill_hashmap(int max_entries)
{
int i, fd, ret;
- long long key, value;
+ long long key, value[VALUE_SIZE] = {};
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- max_entries, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ max_entries, &map_opts);
CHECK(fd < 0,
"failed to create hashmap",
- "err: %s, flags: 0x%x\n", strerror(errno), map_flags);
+ "err: %s, flags: 0x%x\n", strerror(errno), map_opts.map_flags);
for (i = 0; i < max_entries; i++) {
- key = i; value = key;
- ret = bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST);
+ key = i; value[0] = key;
+ ret = bpf_map_update_elem(fd, &key, value, BPF_NOEXIST);
CHECK(ret != 0,
"can't update hashmap",
"err: %s\n", strerror(ret));
@@ -273,8 +289,8 @@ static int helper_fill_hashmap(int max_entries)
static void test_hashmap_walk(unsigned int task, void *data)
{
- int fd, i, max_entries = 1000;
- long long key, value, next_key;
+ int fd, i, max_entries = 10000;
+ long long key, value[VALUE_SIZE], next_key;
bool next_key_valid = true;
fd = helper_fill_hashmap(max_entries);
@@ -282,7 +298,7 @@ static void test_hashmap_walk(unsigned int task, void *data)
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
}
assert(i == max_entries);
@@ -290,9 +306,9 @@ static void test_hashmap_walk(unsigned int task, void *data)
assert(bpf_map_get_next_key(fd, NULL, &key) == 0);
for (i = 0; next_key_valid; i++) {
next_key_valid = bpf_map_get_next_key(fd, &key, &next_key) == 0;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
- value++;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == 0);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+ value[0]++;
+ assert(bpf_map_update_elem(fd, &key, value, BPF_EXIST) == 0);
key = next_key;
}
@@ -301,8 +317,8 @@ static void test_hashmap_walk(unsigned int task, void *data)
for (i = 0; bpf_map_get_next_key(fd, !i ? NULL : &key,
&next_key) == 0; i++) {
key = next_key;
- assert(bpf_map_lookup_elem(fd, &key, &value) == 0);
- assert(value - 1 == key);
+ assert(bpf_map_lookup_elem(fd, &key, value) == 0);
+ assert(value[0] - 1 == key);
}
assert(i == max_entries);
@@ -314,8 +330,8 @@ static void test_hashmap_zero_seed(void)
int i, first, second, old_flags;
long long key, next_first, next_second;
- old_flags = map_flags;
- map_flags |= BPF_F_ZERO_SEED;
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_ZERO_SEED;
first = helper_fill_hashmap(3);
second = helper_fill_hashmap(3);
@@ -337,7 +353,7 @@ static void test_hashmap_zero_seed(void)
key = next_first;
}
- map_flags = old_flags;
+ map_opts.map_flags = old_flags;
close(first);
close(second);
}
@@ -347,8 +363,7 @@ static void test_arraymap(unsigned int task, void *data)
int key, next_key, fd;
long long value;
- fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -360,7 +375,7 @@ static void test_arraymap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
value = 0;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -374,11 +389,11 @@ static void test_arraymap(unsigned int task, void *data)
* due to max_entries limit.
*/
key = 2;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -387,12 +402,12 @@ static void test_arraymap(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -403,8 +418,8 @@ static void test_arraymap_percpu(unsigned int task, void *data)
BPF_DECLARE_PERCPU(long, values);
int key, next_key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), 2, NULL);
if (fd < 0) {
printf("Failed to create arraymap '%s'!\n", strerror(errno));
exit(1);
@@ -418,7 +433,7 @@ static void test_arraymap_percpu(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, &key, values, BPF_ANY) == 0);
bpf_percpu(values, 0) = 0;
- assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that key=1 can be found. */
@@ -433,11 +448,11 @@ static void test_arraymap_percpu(unsigned int task, void *data)
/* Check that key=2 cannot be inserted due to max_entries limit. */
key = 2;
- assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, values, BPF_EXIST) < 0 &&
errno == E2BIG);
/* Check that key = 2 doesn't exist. */
- assert(bpf_map_lookup_elem(fd, &key, values) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, values) < 0 && errno == ENOENT);
/* Iterate over two elements. */
assert(bpf_map_get_next_key(fd, NULL, &next_key) == 0 &&
@@ -446,12 +461,12 @@ static void test_arraymap_percpu(unsigned int task, void *data)
next_key == 0);
assert(bpf_map_get_next_key(fd, &next_key, &next_key) == 0 &&
next_key == 1);
- assert(bpf_map_get_next_key(fd, &next_key, &next_key) == -1 &&
+ assert(bpf_map_get_next_key(fd, &next_key, &next_key) < 0 &&
errno == ENOENT);
/* Delete shouldn't succeed. */
key = 1;
- assert(bpf_map_delete_elem(fd, &key) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, &key) < 0 && errno == EINVAL);
close(fd);
}
@@ -466,8 +481,8 @@ static void test_arraymap_percpu_many_keys(void)
unsigned int nr_keys = 2000;
int key, fd, i;
- fd = bpf_create_map(BPF_MAP_TYPE_PERCPU_ARRAY, sizeof(key),
- sizeof(bpf_percpu(values, 0)), nr_keys, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, sizeof(key),
+ sizeof(bpf_percpu(values, 0)), nr_keys, NULL);
if (fd < 0) {
printf("Failed to create per-cpu arraymap '%s'!\n",
strerror(errno));
@@ -498,8 +513,7 @@ static void test_devmap(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap '%s'!\n", strerror(errno));
exit(1);
@@ -513,8 +527,7 @@ static void test_devmap_hash(unsigned int task, void *data)
int fd;
__u32 key, value;
- fd = bpf_create_map(BPF_MAP_TYPE_DEVMAP_HASH, sizeof(key), sizeof(value),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_DEVMAP_HASH, NULL, sizeof(key), sizeof(value), 2, NULL);
if (fd < 0) {
printf("Failed to create devmap_hash '%s'!\n", strerror(errno));
exit(1);
@@ -534,14 +547,12 @@ static void test_queuemap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_QUEUE, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_QUEUE, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Queue map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -555,7 +566,7 @@ static void test_queuemap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -571,12 +582,12 @@ static void test_queuemap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -592,14 +603,12 @@ static void test_stackmap(unsigned int task, void *data)
vals[i] = rand();
/* Invalid key size */
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 4, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 4, sizeof(val), MAP_SIZE, &map_opts);
assert(fd < 0 && errno == EINVAL);
- fd = bpf_create_map(BPF_MAP_TYPE_STACK, 0, sizeof(val), MAP_SIZE,
- map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_STACK, NULL, 0, sizeof(val), MAP_SIZE, &map_opts);
/* Stack map does not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -613,7 +622,7 @@ static void test_stackmap(unsigned int task, void *data)
assert(bpf_map_update_elem(fd, NULL, &vals[i], 0) == 0);
/* Check that element cannot be pushed due to max_entries limit */
- assert(bpf_map_update_elem(fd, NULL, &val, 0) == -1 &&
+ assert(bpf_map_update_elem(fd, NULL, &val, 0) < 0 &&
errno == E2BIG);
/* Peek element */
@@ -629,12 +638,12 @@ static void test_stackmap(unsigned int task, void *data)
val == vals[i]);
/* Check that there are not elements left */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &val) < 0 &&
errno == ENOENT);
/* Check that non supported functions set errno to EINVAL */
- assert(bpf_map_delete_elem(fd, NULL) == -1 && errno == EINVAL);
- assert(bpf_map_get_next_key(fd, NULL, NULL) == -1 && errno == EINVAL);
+ assert(bpf_map_delete_elem(fd, NULL) < 0 && errno == EINVAL);
+ assert(bpf_map_get_next_key(fd, NULL, NULL) < 0 && errno == EINVAL);
close(fd);
}
@@ -643,20 +652,20 @@ static void test_stackmap(unsigned int task, void *data)
#include <arpa/inet.h>
#include <sys/select.h>
#include <linux/err.h>
-#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.o"
-#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.o"
-#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.o"
+#define SOCKMAP_PARSE_PROG "./sockmap_parse_prog.bpf.o"
+#define SOCKMAP_VERDICT_PROG "./sockmap_verdict_prog.bpf.o"
+#define SOCKMAP_TCP_MSG_PROG "./sockmap_tcp_msg_prog.bpf.o"
static void test_sockmap(unsigned int tasks, void *data)
{
struct bpf_map *bpf_map_rx, *bpf_map_tx, *bpf_map_msg, *bpf_map_break;
int map_fd_msg = 0, map_fd_rx = 0, map_fd_tx = 0, map_fd_break;
+ struct bpf_object *parse_obj, *verdict_obj, *msg_obj;
int ports[] = {50200, 50201, 50202, 50204};
int err, i, fd, udp, sfd[6] = {0xdeadbeef};
u8 buf[20] = {0x0, 0x5, 0x3, 0x2, 0x1, 0x0};
int parse_prog, verdict_prog, msg_prog;
struct sockaddr_in addr;
int one = 1, s, sc, rc;
- struct bpf_object *obj;
struct timeval to;
__u32 key, value;
pid_t pid[tasks];
@@ -726,11 +735,11 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Test sockmap with connected sockets */
- fd = bpf_create_map(BPF_MAP_TYPE_SOCKMAP,
+ fd = bpf_map_create(BPF_MAP_TYPE_SOCKMAP, NULL,
sizeof(key), sizeof(value),
- 6, 0);
+ 6, NULL);
if (fd < 0) {
- if (!bpf_probe_map_type(BPF_MAP_TYPE_SOCKMAP, 0)) {
+ if (!libbpf_probe_bpf_map_type(BPF_MAP_TYPE_SOCKMAP, NULL)) {
printf("%s SKIP (unsupported map type BPF_MAP_TYPE_SOCKMAP)\n",
__func__);
skips++;
@@ -747,11 +756,12 @@ static void test_sockmap(unsigned int tasks, void *data)
udp = socket(AF_INET, SOCK_DGRAM, 0);
i = 0;
err = bpf_map_update_elem(fd, &i, &udp, BPF_ANY);
- if (!err) {
- printf("Failed socket SOCK_DGRAM allowed '%i:%i'\n",
+ if (err) {
+ printf("Failed socket update SOCK_DGRAM '%i:%i'\n",
i, udp);
goto out_sockmap;
}
+ close(udp);
/* Test update without programs */
for (i = 0; i < 6; i++) {
@@ -789,19 +799,19 @@ static void test_sockmap(unsigned int tasks, void *data)
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_PARSER);
- if (err) {
+ if (!err) {
printf("Failed empty parser prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_SKB_STREAM_VERDICT);
- if (err) {
+ if (!err) {
printf("Failed empty verdict prog detach\n");
goto out_sockmap;
}
err = bpf_prog_detach(fd, BPF_SK_MSG_VERDICT);
- if (err) {
+ if (!err) {
printf("Failed empty msg verdict prog detach\n");
goto out_sockmap;
}
@@ -813,29 +823,29 @@ static void test_sockmap(unsigned int tasks, void *data)
}
/* Load SK_SKB program and Attach */
- err = bpf_prog_load(SOCKMAP_PARSE_PROG,
- BPF_PROG_TYPE_SK_SKB, &obj, &parse_prog);
+ err = bpf_prog_test_load(SOCKMAP_PARSE_PROG,
+ BPF_PROG_TYPE_SK_SKB, &parse_obj, &parse_prog);
if (err) {
printf("Failed to load SK_SKB parse prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_TCP_MSG_PROG,
- BPF_PROG_TYPE_SK_MSG, &obj, &msg_prog);
+ err = bpf_prog_test_load(SOCKMAP_TCP_MSG_PROG,
+ BPF_PROG_TYPE_SK_MSG, &msg_obj, &msg_prog);
if (err) {
printf("Failed to load SK_SKB msg prog\n");
goto out_sockmap;
}
- err = bpf_prog_load(SOCKMAP_VERDICT_PROG,
- BPF_PROG_TYPE_SK_SKB, &obj, &verdict_prog);
+ err = bpf_prog_test_load(SOCKMAP_VERDICT_PROG,
+ BPF_PROG_TYPE_SK_SKB, &verdict_obj, &verdict_prog);
if (err) {
printf("Failed to load SK_SKB verdict prog\n");
goto out_sockmap;
}
- bpf_map_rx = bpf_object__find_map_by_name(obj, "sock_map_rx");
- if (IS_ERR(bpf_map_rx)) {
+ bpf_map_rx = bpf_object__find_map_by_name(verdict_obj, "sock_map_rx");
+ if (!bpf_map_rx) {
printf("Failed to load map rx from verdict prog\n");
goto out_sockmap;
}
@@ -846,8 +856,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_tx = bpf_object__find_map_by_name(obj, "sock_map_tx");
- if (IS_ERR(bpf_map_tx)) {
+ bpf_map_tx = bpf_object__find_map_by_name(verdict_obj, "sock_map_tx");
+ if (!bpf_map_tx) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -858,8 +868,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_msg = bpf_object__find_map_by_name(obj, "sock_map_msg");
- if (IS_ERR(bpf_map_msg)) {
+ bpf_map_msg = bpf_object__find_map_by_name(verdict_obj, "sock_map_msg");
+ if (!bpf_map_msg) {
printf("Failed to load map msg from msg_verdict prog\n");
goto out_sockmap;
}
@@ -870,8 +880,8 @@ static void test_sockmap(unsigned int tasks, void *data)
goto out_sockmap;
}
- bpf_map_break = bpf_object__find_map_by_name(obj, "sock_map_break");
- if (IS_ERR(bpf_map_break)) {
+ bpf_map_break = bpf_object__find_map_by_name(verdict_obj, "sock_map_break");
+ if (!bpf_map_break) {
printf("Failed to load map tx from verdict prog\n");
goto out_sockmap;
}
@@ -968,7 +978,7 @@ static void test_sockmap(unsigned int tasks, void *data)
FD_ZERO(&w);
FD_SET(sfd[3], &w);
- to.tv_sec = 1;
+ to.tv_sec = 30;
to.tv_usec = 0;
s = select(sfd[3] + 1, &w, NULL, NULL, &to);
if (s == -1) {
@@ -1090,19 +1100,19 @@ static void test_sockmap(unsigned int tasks, void *data)
assert(status == 0);
}
- err = bpf_prog_detach(map_fd_rx, __MAX_BPF_ATTACH_TYPE);
+ err = bpf_prog_detach2(parse_prog, map_fd_rx, __MAX_BPF_ATTACH_TYPE);
if (!err) {
printf("Detached an invalid prog type.\n");
goto out_sockmap;
}
- err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
+ err = bpf_prog_detach2(parse_prog, map_fd_rx, BPF_SK_SKB_STREAM_PARSER);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
}
- err = bpf_prog_detach(map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
+ err = bpf_prog_detach2(verdict_prog, map_fd_rx, BPF_SK_SKB_STREAM_VERDICT);
if (err) {
printf("Failed parser prog detach\n");
goto out_sockmap;
@@ -1116,7 +1126,9 @@ static void test_sockmap(unsigned int tasks, void *data)
}
close(fd);
close(map_fd_rx);
- bpf_object__close(obj);
+ bpf_object__close(parse_obj);
+ bpf_object__close(msg_obj);
+ bpf_object__close(verdict_obj);
return;
out:
for (i = 0; i < 6; i++)
@@ -1135,25 +1147,29 @@ out_sockmap:
exit(1);
}
-#define MAPINMAP_PROG "./test_map_in_map.o"
+#define MAPINMAP_PROG "./test_map_in_map.bpf.o"
+#define MAPINMAP_INVALID_PROG "./test_map_in_map_invalid.bpf.o"
static void test_map_in_map(void)
{
struct bpf_object *obj;
struct bpf_map *map;
int mim_fd, fd, err;
int pos = 0;
+ struct bpf_map_info info = {};
+ __u32 len = sizeof(info);
+ __u32 id = 0;
+ libbpf_print_fn_t old_print_fn;
obj = bpf_object__open(MAPINMAP_PROG);
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int), sizeof(int),
- 2, 0);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int), sizeof(int), 2, NULL);
if (fd < 0) {
printf("Failed to create hashmap '%s'!\n", strerror(errno));
exit(1);
}
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1164,7 +1180,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1174,10 +1190,14 @@ static void test_map_in_map(void)
goto out_map_in_map;
}
- bpf_object__load(obj);
+ err = bpf_object__load(obj);
+ if (err) {
+ printf("Failed to load test prog\n");
+ goto out_map_in_map;
+ }
map = bpf_object__find_map_by_name(obj, "mim_array");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load array of maps from test prog\n");
goto out_map_in_map;
}
@@ -1194,7 +1214,7 @@ static void test_map_in_map(void)
}
map = bpf_object__find_map_by_name(obj, "mim_hash");
- if (IS_ERR(map)) {
+ if (!map) {
printf("Failed to load hash of maps from test prog\n");
goto out_map_in_map;
}
@@ -1211,11 +1231,75 @@ static void test_map_in_map(void)
}
close(fd);
+ fd = -1;
+ bpf_object__close(obj);
+
+ /* Test that failing bpf_object__create_map() destroys the inner map */
+ obj = bpf_object__open(MAPINMAP_INVALID_PROG);
+ err = libbpf_get_error(obj);
+ if (err) {
+ printf("Failed to load %s program: %d %d",
+ MAPINMAP_INVALID_PROG, err, errno);
+ goto out_map_in_map;
+ }
+
+ map = bpf_object__find_map_by_name(obj, "mim");
+ if (!map) {
+ printf("Failed to load array of maps from test prog\n");
+ goto out_map_in_map;
+ }
+
+ old_print_fn = libbpf_set_print(NULL);
+
+ err = bpf_object__load(obj);
+ if (!err) {
+ printf("Loading obj supposed to fail\n");
+ goto out_map_in_map;
+ }
+
+ libbpf_set_print(old_print_fn);
+
+ /* Iterate over all maps to check whether the internal map
+ * ("mim.internal") has been destroyed.
+ */
+ while (true) {
+ err = bpf_map_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ printf("Failed to get next map: %d", errno);
+ goto out_map_in_map;
+ }
+
+ fd = bpf_map_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ printf("Failed to get map by id %u: %d", id, errno);
+ goto out_map_in_map;
+ }
+
+ err = bpf_map_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ printf("Failed to get map info by fd %d: %d", fd,
+ errno);
+ goto out_map_in_map;
+ }
+
+ if (!strcmp(info.name, "mim.inner")) {
+ printf("Inner map mim.inner was not destroyed\n");
+ goto out_map_in_map;
+ }
+
+ close(fd);
+ }
+
bpf_object__close(obj);
return;
out_map_in_map:
- close(fd);
+ if (fd >= 0)
+ close(fd);
exit(1);
}
@@ -1223,15 +1307,16 @@ out_map_in_map:
static void test_map_large(void)
{
+
struct bigkey {
int a;
- char b[116];
+ char b[4096];
long long c;
} key;
int fd, i, value;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create large map '%s'!\n", strerror(errno));
exit(1);
@@ -1245,7 +1330,7 @@ static void test_map_large(void)
}
key.c = -1;
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == E2BIG);
/* Iterate through all elements. */
@@ -1253,12 +1338,12 @@ static void test_map_large(void)
key.c = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
key.c = 0;
assert(bpf_map_lookup_elem(fd, &key, &value) == 0 && value == 0);
key.a = 1;
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1274,6 +1359,8 @@ static void __run_parallel(unsigned int tasks,
pid_t pid[tasks];
int i;
+ fflush(stdout);
+
for (i = 0; i < tasks; i++) {
pid[i] = fork();
if (pid[i] == 0) {
@@ -1295,53 +1382,111 @@ static void __run_parallel(unsigned int tasks,
static void test_map_stress(void)
{
+ run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_hashmap, NULL);
run_parallel(100, test_hashmap_percpu, NULL);
run_parallel(100, test_hashmap_sizes, NULL);
- run_parallel(100, test_hashmap_walk, NULL);
run_parallel(100, test_arraymap, NULL);
run_parallel(100, test_arraymap_percpu, NULL);
}
-#define TASKS 1024
+#define TASKS 100
#define DO_UPDATE 1
#define DO_DELETE 0
+#define MAP_RETRIES 20
+#define MAX_DELAY_US 50000
+#define MIN_DELAY_RANGE_US 5000
+
+static bool retry_for_again_or_busy(int err)
+{
+ return (err == EAGAIN || err == EBUSY);
+}
+
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry)
+{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
+ while (bpf_map_update_elem(map_fd, key, value, flags)) {
+ if (!attempts || !need_retry(errno))
+ return -errno;
+
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
+ attempts--;
+ }
+
+ return 0;
+}
+
+static int map_delete_retriable(int map_fd, const void *key, int attempts)
+{
+ int delay = rand() % MIN_DELAY_RANGE_US;
+
+ while (bpf_map_delete_elem(map_fd, key)) {
+ if (!attempts || (errno != EAGAIN && errno != EBUSY))
+ return -errno;
+
+ if (delay <= MAX_DELAY_US / 2)
+ delay *= 2;
+
+ usleep(delay);
+ attempts--;
+ }
+
+ return 0;
+}
+
static void test_update_delete(unsigned int fn, void *data)
{
int do_update = ((int *)data)[1];
int fd = ((int *)data)[0];
- int i, key, value;
+ int i, key, value, err;
+ if (fn & 1)
+ test_hashmap_walk(fn, NULL);
for (i = fn; i < MAP_SIZE; i += TASKS) {
key = value = i;
if (do_update) {
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_NOEXIST) == 0);
- assert(bpf_map_update_elem(fd, &key, &value,
- BPF_EXIST) == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_NOEXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
+ err = map_update_retriable(fd, &key, &value, BPF_EXIST, MAP_RETRIES,
+ retry_for_again_or_busy);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
} else {
- assert(bpf_map_delete_elem(fd, &key) == 0);
+ err = map_delete_retriable(fd, &key, MAP_RETRIES);
+ if (err)
+ printf("error %d %d\n", err, errno);
+ assert(err == 0);
}
}
}
static void test_map_parallel(void)
{
- int i, fd, key = 0, value = 0;
+ int i, fd, key = 0, value = 0, j = 0;
int data[2];
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags);
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
if (fd < 0) {
printf("Failed to create map for parallel test '%s'!\n",
strerror(errno));
exit(1);
}
+again:
/* Use the same fd in children to add elements to this map:
* child_0 adds key=0, key=1024, key=2048, ...
* child_1 adds key=1, key=1025, key=2049, ...
@@ -1352,7 +1497,7 @@ static void test_map_parallel(void)
run_parallel(TASKS, test_update_delete, data);
/* Check that key=0 is already there. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_NOEXIST) < 0 &&
errno == EEXIST);
/* Check that all elements were inserted. */
@@ -1360,7 +1505,7 @@ static void test_map_parallel(void)
key = -1;
for (i = 0; i < MAP_SIZE; i++)
assert(bpf_map_get_next_key(fd, &key, &key) == 0);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
/* Another check for all elements */
for (i = 0; i < MAP_SIZE; i++) {
@@ -1376,16 +1521,26 @@ static void test_map_parallel(void)
/* Nothing should be left. */
key = -1;
- assert(bpf_map_get_next_key(fd, NULL, &key) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &key) == -1 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, NULL, &key) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &key) < 0 && errno == ENOENT);
+
+ key = 0;
+ bpf_map_delete_elem(fd, &key);
+ if (j++ < 5)
+ goto again;
+ close(fd);
}
static void test_map_rdonly(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_RDONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_RDONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for read only test '%s'!\n",
strerror(errno));
@@ -1395,12 +1550,12 @@ static void test_map_rdonly(void)
key = 1;
value = 1234;
/* Try to insert key=1 element. */
- assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
+ assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) < 0 &&
errno == EPERM);
/* Check that key=1 is not found. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == ENOENT);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == ENOENT);
close(fd);
}
@@ -1408,9 +1563,13 @@ static void test_map_rdonly(void)
static void test_map_wronly_hash(void)
{
int fd, key = 0, value = 0;
+ __u32 old_flags;
- fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
- MAP_SIZE, map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(key), sizeof(value),
+ MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
if (fd < 0) {
printf("Failed to create map for write only test '%s'!\n",
strerror(errno));
@@ -1423,8 +1582,8 @@ static void test_map_wronly_hash(void)
assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
/* Check that reading elements and keys from the map is not allowed. */
- assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
- assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, &key, &value) < 0 && errno == EPERM);
+ assert(bpf_map_get_next_key(fd, &key, &value) < 0 && errno == EPERM);
close(fd);
}
@@ -1432,13 +1591,17 @@ static void test_map_wronly_hash(void)
static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
{
int fd, value = 0;
+ __u32 old_flags;
+
assert(map_type == BPF_MAP_TYPE_QUEUE ||
map_type == BPF_MAP_TYPE_STACK);
- fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
- map_flags | BPF_F_WRONLY);
+ old_flags = map_opts.map_flags;
+ map_opts.map_flags |= BPF_F_WRONLY;
+ fd = bpf_map_create(map_type, NULL, 0, sizeof(value), MAP_SIZE, &map_opts);
+ map_opts.map_flags = old_flags;
/* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
- if (map_flags & BPF_F_NO_PREALLOC) {
+ if (map_opts.map_flags & BPF_F_NO_PREALLOC) {
assert(fd < 0 && errno == EINVAL);
return;
}
@@ -1451,10 +1614,10 @@ static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
/* Peek element should fail */
- assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+ assert(bpf_map_lookup_elem(fd, NULL, &value) < 0 && errno == EPERM);
/* Pop element should fail */
- assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+ assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) < 0 &&
errno == EPERM);
close(fd);
@@ -1508,7 +1671,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
value = &fd32;
}
err = bpf_map_update_elem(map_fd, &index0, value, BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update unbound sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1537,7 +1700,7 @@ static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
*/
err = bpf_map_update_elem(map_fd, &index0, value,
BPF_ANY);
- CHECK(err != -1 || errno != EINVAL,
+ CHECK(err >= 0 || errno != EINVAL,
"reuseport array update non-listening sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1565,33 +1728,33 @@ static void test_reuseport_array(void)
__u32 fds_idx = 0;
int fd;
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u64), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u64), array_size, NULL);
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
/* Test lookup/update/delete with invalid index */
err = bpf_map_delete_elem(map_fd, &bad_index);
- CHECK(err != -1 || errno != E2BIG, "reuseport array del >=max_entries",
+ CHECK(err >= 0 || errno != E2BIG, "reuseport array del >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_update_elem(map_fd, &bad_index, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != E2BIG,
+ CHECK(err >= 0 || errno != E2BIG,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &bad_index, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update >=max_entries",
"err:%d errno:%d\n", err, errno);
/* Test lookup/delete non existence elem */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup not-exist elem",
"err:%d errno:%d\n", err, errno);
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array del not-exist elem",
"err:%d errno:%d\n", err, errno);
@@ -1605,7 +1768,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array update empty elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1614,7 +1777,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST success case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1623,7 +1786,7 @@ static void test_reuseport_array(void)
/* BPF_EXIST success case. */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_EXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same elem BPF_EXIST",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
fds_idx = REUSEPORT_FD_IDX(err, fds_idx);
@@ -1631,7 +1794,7 @@ static void test_reuseport_array(void)
/* BPF_NOEXIST failure case */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_NOEXIST);
- CHECK(err != -1 || errno != EEXIST,
+ CHECK(err >= 0 || errno != EEXIST,
"reuseport array update non-empty elem BPF_NOEXIST",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1640,7 +1803,7 @@ static void test_reuseport_array(void)
/* BPF_ANY case (always succeed) */
err = bpf_map_update_elem(map_fd, &index3, &grpa_fds64[fds_idx],
BPF_ANY);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array update same sk with BPF_ANY",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
@@ -1649,32 +1812,32 @@ static void test_reuseport_array(void)
/* The same sk cannot be added to reuseport_array twice */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with same index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
err = bpf_map_update_elem(map_fd, &index0, &fd64, BPF_ANY);
- CHECK(err != -1 || errno != EBUSY,
+ CHECK(err >= 0 || errno != EBUSY,
"reuseport array update same sk with different index",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Test delete elem */
err = bpf_map_delete_elem(map_fd, &index3);
- CHECK(err == -1, "reuseport array delete sk",
+ CHECK(err < 0, "reuseport array delete sk",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
/* Add it back with BPF_NOEXIST */
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err == -1,
+ CHECK(err < 0,
"reuseport array re-add with BPF_NOEXIST after del",
"sock_type:%d err:%d errno:%d\n", type, err, errno);
/* Test cookie */
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err == -1 || sk_cookie != map_cookie,
+ CHECK(err < 0 || sk_cookie != map_cookie,
"reuseport array lookup re-added sk",
"sock_type:%d err:%d errno:%d sk_cookie:0x%llx map_cookie:0x%llxn",
type, err, errno, sk_cookie, map_cookie);
@@ -1683,7 +1846,7 @@ static void test_reuseport_array(void)
for (f = 0; f < ARRAY_SIZE(grpa_fds64); f++)
close(grpa_fds64[f]);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOENT,
+ CHECK(err >= 0 || errno != ENOENT,
"reuseport array lookup after close()",
"sock_type:%d err:%d errno:%d\n",
type, err, errno);
@@ -1694,7 +1857,7 @@ static void test_reuseport_array(void)
CHECK(fd64 == -1, "socket(SOCK_RAW)", "err:%d errno:%d\n",
err, errno);
err = bpf_map_update_elem(map_fd, &index3, &fd64, BPF_NOEXIST);
- CHECK(err != -1 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
+ CHECK(err >= 0 || errno != ENOTSUPP, "reuseport array update SOCK_RAW",
"err:%d errno:%d\n", err, errno);
close(fd64);
@@ -1702,18 +1865,18 @@ static void test_reuseport_array(void)
close(map_fd);
/* Test 32 bit fd */
- map_fd = bpf_create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
- sizeof(__u32), sizeof(__u32), array_size, 0);
- CHECK(map_fd == -1, "reuseport array create",
+ map_fd = bpf_map_create(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY, NULL,
+ sizeof(__u32), sizeof(__u32), array_size, NULL);
+ CHECK(map_fd < 0, "reuseport array create",
"map_fd:%d, errno:%d\n", map_fd, errno);
prepare_reuseport_grp(SOCK_STREAM, map_fd, sizeof(__u32), &fd64,
&sk_cookie, 1);
fd = fd64;
err = bpf_map_update_elem(map_fd, &index3, &fd, BPF_NOEXIST);
- CHECK(err == -1, "reuseport array update 32 bit fd",
+ CHECK(err < 0, "reuseport array update 32 bit fd",
"err:%d errno:%d\n", err, errno);
err = bpf_map_lookup_elem(map_fd, &index3, &map_cookie);
- CHECK(err != -1 || errno != ENOSPC,
+ CHECK(err >= 0 || errno != ENOSPC,
"reuseport array lookup 32 bit fd",
"err:%d errno:%d\n", err, errno);
close(fd);
@@ -1759,10 +1922,12 @@ int main(void)
{
srand(time(NULL));
- map_flags = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ map_opts.map_flags = 0;
run_all_tests();
- map_flags = BPF_F_NO_PREALLOC;
+ map_opts.map_flags = BPF_F_NO_PREALLOC;
run_all_tests();
#define DEFINE_TEST(name) test_##name();
diff --git a/tools/testing/selftests/bpf/test_maps.h b/tools/testing/selftests/bpf/test_maps.h
index 77d8587ac4ed..e4ac704a536c 100644
--- a/tools/testing/selftests/bpf/test_maps.h
+++ b/tools/testing/selftests/bpf/test_maps.h
@@ -4,6 +4,7 @@
#include <stdio.h>
#include <stdlib.h>
+#include <stdbool.h>
#define CHECK(condition, tag, format...) ({ \
int __ret = !!(condition); \
@@ -14,4 +15,10 @@
} \
})
+extern int skips;
+
+typedef bool (*retry_for_error_fn)(int err);
+int map_update_retriable(int map_fd, const void *key, const void *value, int flags, int attempts,
+ retry_for_error_fn need_retry);
+
#endif
diff --git a/tools/testing/selftests/bpf/test_netcnt.c b/tools/testing/selftests/bpf/test_netcnt.c
deleted file mode 100644
index c1da5404454a..000000000000
--- a/tools/testing/selftests/bpf/test_netcnt.c
+++ /dev/null
@@ -1,161 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-#include <assert.h>
-#include <sys/sysinfo.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-#include "netcnt_common.h"
-
-#define BPF_PROG "./netcnt_prog.o"
-#define TEST_CGROUP "/test-network-counters/"
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- struct percpu_net_cnt *percpu_netcnt;
- struct bpf_cgroup_storage_key key;
- int map_fd, percpu_map_fd;
- int error = EXIT_FAILURE;
- struct net_cnt netcnt;
- struct bpf_object *obj;
- int prog_fd, cgroup_fd;
- unsigned long packets;
- unsigned long bytes;
- int cpu, nproc;
- __u32 prog_cnt;
-
- nproc = get_nprocs_conf();
- percpu_netcnt = malloc(sizeof(*percpu_netcnt) * nproc);
- if (!percpu_netcnt) {
- printf("Not enough memory for per-cpu area (%d cpus)\n", nproc);
- goto err;
- }
-
- if (bpf_prog_load(BPF_PROG, BPF_PROG_TYPE_CGROUP_SKB,
- &obj, &prog_fd)) {
- printf("Failed to load bpf program\n");
- goto out;
- }
-
- if (setup_cgroup_environment()) {
- printf("Failed to load bpf program\n");
- goto err;
- }
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- if (cgroup_fd < 0) {
- printf("Failed to create test cgroup\n");
- goto err;
- }
-
- if (join_cgroup(TEST_CGROUP)) {
- printf("Failed to join cgroup\n");
- goto err;
- }
-
- /* Attach bpf program */
- if (bpf_prog_attach(prog_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0)) {
- printf("Failed to attach bpf program");
- goto err;
- }
-
- if (system("which ping6 &>/dev/null") == 0)
- assert(!system("ping6 localhost -c 10000 -f -q > /dev/null"));
- else
- assert(!system("ping -6 localhost -c 10000 -f -q > /dev/null"));
-
- if (bpf_prog_query(cgroup_fd, BPF_CGROUP_INET_EGRESS, 0, NULL, NULL,
- &prog_cnt)) {
- printf("Failed to query attached programs");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "netcnt");
- if (map_fd < 0) {
- printf("Failed to find bpf map with net counters");
- goto err;
- }
-
- percpu_map_fd = bpf_find_map(__func__, obj, "percpu_netcnt");
- if (percpu_map_fd < 0) {
- printf("Failed to find bpf map with percpu net counters");
- goto err;
- }
-
- if (bpf_map_get_next_key(map_fd, NULL, &key)) {
- printf("Failed to get key in cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(map_fd, &key, &netcnt)) {
- printf("Failed to lookup cgroup storage\n");
- goto err;
- }
-
- if (bpf_map_lookup_elem(percpu_map_fd, &key, &percpu_netcnt[0])) {
- printf("Failed to lookup percpu cgroup storage\n");
- goto err;
- }
-
- /* Some packets can be still in per-cpu cache, but not more than
- * MAX_PERCPU_PACKETS.
- */
- packets = netcnt.packets;
- bytes = netcnt.bytes;
- for (cpu = 0; cpu < nproc; cpu++) {
- if (percpu_netcnt[cpu].packets > MAX_PERCPU_PACKETS) {
- printf("Unexpected percpu value: %llu\n",
- percpu_netcnt[cpu].packets);
- goto err;
- }
-
- packets += percpu_netcnt[cpu].packets;
- bytes += percpu_netcnt[cpu].bytes;
- }
-
- /* No packets should be lost */
- if (packets != 10000) {
- printf("Unexpected packet count: %lu\n", packets);
- goto err;
- }
-
- /* Let's check that bytes counter matches the number of packets
- * multiplied by the size of ipv6 ICMP packet.
- */
- if (bytes != packets * 104) {
- printf("Unexpected bytes count: %lu\n", bytes);
- goto err;
- }
-
- error = 0;
- printf("test_netcnt:PASS\n");
-
-err:
- cleanup_cgroup_environment();
- free(percpu_netcnt);
-
-out:
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py
index 8294ae3ffb3c..6157f884d091 100755
--- a/tools/testing/selftests/bpf/test_offload.py
+++ b/tools/testing/selftests/bpf/test_offload.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
# Copyright (C) 2017 Netronome Systems, Inc.
# Copyright (c) 2019 Mellanox Technologies. All rights reserved
@@ -169,12 +169,14 @@ def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False):
return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns,
fail=fail, include_stderr=include_stderr)
-def bpftool_prog_list(expected=None, ns=""):
+def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True):
_, progs = bpftool("prog show", JSON=True, ns=ns, fail=True)
# Remove the base progs
for p in base_progs:
if p in progs:
progs.remove(p)
+ if exclude_orphaned:
+ progs = [ p for p in progs if not p['orphaned'] ]
if expected is not None:
if len(progs) != expected:
fail(True, "%d BPF programs loaded, expected %d" %
@@ -184,9 +186,7 @@ def bpftool_prog_list(expected=None, ns=""):
def bpftool_map_list(expected=None, ns=""):
_, maps = bpftool("map show", JSON=True, ns=ns, fail=True)
# Remove the base maps
- for m in base_maps:
- if m in maps:
- maps.remove(m)
+ maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names]
if expected is not None:
if len(maps) != expected:
fail(True, "%d BPF maps loaded, expected %d" %
@@ -318,6 +318,9 @@ class DebugfsDir:
continue
if os.path.isfile(p):
+ # We need to init trap_flow_action_cookie before read it
+ if f == "trap_flow_action_cookie":
+ cmd('echo deadbeef > %s/%s' % (path, f))
_, out = cmd('cat %s/%s' % (path, f))
dfs[f] = out.strip()
elif os.path.isdir(p):
@@ -611,11 +614,9 @@ def pin_map(file_name, idx=0, expected=1):
def check_dev_info_removed(prog_file=None, map_file=None):
bpftool_prog_list(expected=0)
+ bpftool_prog_list(expected=1, exclude_orphaned=False)
ret, err = bpftool("prog show pin %s" % (prog_file), fail=False)
- fail(ret == 0, "Showing prog with removed device did not fail")
- fail(err["error"].find("No such device") == -1,
- "Showing prog with removed device expected ENODEV, error is %s" %
- (err["error"]))
+ fail(ret != 0, "failed to show prog with removed device")
bpftool_map_list(expected=0)
ret, err = bpftool("map show pin %s" % (map_file), fail=False)
@@ -713,13 +714,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid):
fail(ret == 0, "Replaced one of programs without -force")
check_extack(err, "XDP program already attached.", args)
- if modename == "" or modename == "drv":
- othermode = "" if modename == "drv" else "drv"
- start_test("Test multi-attachment XDP - detach...")
- ret, _, err = sim.unset_xdp(othermode, force=True,
- fail=False, include_stderr=True)
- fail(ret == 0, "Removed program with a bad mode")
- check_extack(err, "program loaded with different flags.", args)
+ start_test("Test multi-attachment XDP - remove without mode...")
+ ret, _, err = sim.unset_xdp("", force=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "Removed program without a mode flag")
+ check_extack(err, "More than one program loaded, unset mode is ambiguous.", args)
sim.unset_xdp("offload")
xdp = sim.ip_link_show(xdp=True)["xdp"]
@@ -769,10 +768,15 @@ ret, progs = bpftool("prog", fail=False)
skip(ret != 0, "bpftool not installed")
base_progs = progs
_, base_maps = bpftool("map")
+base_map_names = [
+ 'pid_iter.rodata', # created on each bpftool invocation
+ 'libbpf_det_bind', # created on each bpftool invocation
+]
# Check netdevsim
-ret, out = cmd("modprobe netdevsim", fail=False)
-skip(ret != 0, "netdevsim module could not be loaded")
+if not os.path.isdir("/sys/bus/netdevsim/"):
+ ret, out = cmd("modprobe netdevsim", fail=False)
+ skip(ret != 0, "netdevsim module could not be loaded")
# Check debugfs
_, out = cmd("mount")
@@ -780,7 +784,7 @@ if out.find("/sys/kernel/debug type debugfs") == -1:
cmd("mount -t debugfs none /sys/kernel/debug")
# Check samples are compiled
-samples = ["sample_ret0.o", "sample_map_ret0.o"]
+samples = ["sample_ret0.bpf.o", "sample_map_ret0.bpf.o"]
for s in samples:
ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False)
skip(ret != 0, "sample %s/%s not found, please compile it" %
@@ -801,7 +805,7 @@ cmd("ip netns delete %s" % (ns))
netns = []
try:
- obj = bpf_obj("sample_ret0.o")
+ obj = bpf_obj("sample_ret0.bpf.o")
bytecode = bpf_bytecode("1,6 0 0 4294967295,")
start_test("Test destruction of generic XDP...")
@@ -910,11 +914,18 @@ try:
sim.tc_flush_filters()
+ start_test("Test TC offloads failure...")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+ ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
+ fail=False, include_stderr=True)
+ fail(ret == 0, "TC filter did not reject with TC offloads enabled")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+
start_test("Test TC offloads work...")
ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True,
fail=False, include_stderr=True)
fail(ret != 0, "TC filter did not load with TC offloads enabled")
- check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test TC offload basics...")
dfs = simdev.dfs_get_bound_progs(expected=1)
@@ -938,6 +949,7 @@ try:
start_test("Test disabling TC offloads is rejected while filters installed...")
ret, _ = sim.set_ethtool_tc_offloads(False, fail=False)
fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...")
+ sim.set_ethtool_tc_offloads(True)
start_test("Test qdisc removal frees things...")
sim.tc_flush_filters()
@@ -996,18 +1008,8 @@ try:
fail=False, include_stderr=True)
fail(ret == 0, "Replaced XDP program with a program in different mode")
check_extack(err,
- "native and generic XDP can't be active at the same time.",
+ "Native and generic XDP can't be active at the same time.",
args)
- ret, _, err = sim.set_xdp(obj, "", force=True,
- fail=False, include_stderr=True)
- fail(ret == 0, "Replaced XDP program with a program in different mode")
- check_extack(err, "program loaded with different flags.", args)
-
- start_test("Test XDP prog remove with bad flags...")
- ret, _, err = sim.unset_xdp("", force=True,
- fail=False, include_stderr=True)
- fail(ret == 0, "Removed program with a bad mode")
- check_extack(err, "program loaded with different flags.", args)
start_test("Test MTU restrictions...")
ret, _ = sim.set_mtu(9000, fail=False)
@@ -1023,7 +1025,7 @@ try:
sim.wait_for_flush()
start_test("Test non-offload XDP attaching to HW...")
- bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/nooffload")
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/nooffload")
nooffload = bpf_pinned("/sys/fs/bpf/nooffload")
ret, _, err = sim.set_xdp(nooffload, "offload",
fail=False, include_stderr=True)
@@ -1032,15 +1034,24 @@ try:
rm("/sys/fs/bpf/nooffload")
start_test("Test offload XDP attaching to drv...")
- bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload",
+ bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
dev=sim['ifname'])
offload = bpf_pinned("/sys/fs/bpf/offload")
ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True)
fail(ret == 0, "attached offloaded XDP program to drv")
- check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args)
+ check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args)
rm("/sys/fs/bpf/offload")
sim.wait_for_flush()
+ start_test("Test XDP load failure...")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 0
+ ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload",
+ dev=sim['ifname'], fail=False, include_stderr=True)
+ fail(ret == 0, "verifier should fail on load")
+ check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
+ sim.dfs["dev/bpf_bind_verifier_accept"] = 1
+ sim.wait_for_flush()
+
start_test("Test XDP offload...")
_, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True)
ipl = sim.ip_link_show(xdp=True)
@@ -1048,7 +1059,6 @@ try:
progs = bpftool_prog_list(expected=1)
prog = progs[0]
fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID")
- check_verifier_log(err, "[netdevsim] Hello from netdevsim!")
start_test("Test XDP offload is device bound...")
dfs = simdev.dfs_get_bound_progs(expected=1)
@@ -1078,12 +1088,12 @@ try:
ret, _, err = sim.set_xdp(pinned, "offload",
fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a different device accepted")
- check_extack_nsim(err, "program bound to different dev.", args)
+ check_extack(err, "Program bound to different device.", args)
simdev2.remove()
ret, _, err = sim.set_xdp(pinned, "offload",
fail=False, include_stderr=True)
fail(ret == 0, "Pinned program loaded for a removed device accepted")
- check_extack_nsim(err, "xdpoffload of non-bound program.", args)
+ check_extack(err, "Program bound to different device.", args)
rm(pin_file)
bpftool_prog_list_wait(expected=0)
@@ -1161,7 +1171,7 @@ try:
simdev = NetdevSimDev()
sim, = simdev.nsims
- map_obj = bpf_obj("sample_map_ret0.o")
+ map_obj = bpf_obj("sample_map_ret0.bpf.o")
start_test("Test loading program with maps...")
sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON
@@ -1299,10 +1309,10 @@ try:
sims = (simA, simB1, simB2, simB3)
simB = (simB1, simB2, simB3)
- bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA",
+ bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA",
dev=simA['ifname'])
progA = bpf_pinned("/sys/fs/bpf/nsimA")
- bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB",
+ bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB",
dev=simB1['ifname'])
progB = bpf_pinned("/sys/fs/bpf/nsimB")
@@ -1324,26 +1334,26 @@ try:
ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False,
fail=False, include_stderr=True)
fail(ret == 0, "cross-ASIC program allowed")
- check_extack_nsim(err, "program bound to different dev.", args)
+ check_extack(err, "Program bound to different device.", args)
for d in simdevB.nsims:
ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False,
fail=False, include_stderr=True)
fail(ret == 0, "cross-ASIC program allowed")
- check_extack_nsim(err, "program bound to different dev.", args)
+ check_extack(err, "Program bound to different device.", args)
start_test("Test multi-dev ASIC cross-dev map reuse...")
mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0]
mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0]
- ret, _ = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ ret, _ = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
dev=simB3['ifname'],
maps=["idx 0 id %d" % (mapB)],
fail=False)
fail(ret != 0, "couldn't reuse a map on the same ASIC")
rm("/sys/fs/bpf/nsimB_")
- ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimA_",
+ ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA_",
dev=simA['ifname'],
maps=["idx 0 id %d" % (mapB)],
fail=False, include_stderr=True)
@@ -1351,7 +1361,7 @@ try:
fail(err.count("offload device mismatch between prog and map") == 0,
"error message missing for cross-ASIC map")
- ret, _, err = bpftool_prog_load("sample_map_ret0.o", "/sys/fs/bpf/nsimB_",
+ ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_",
dev=simB1['ifname'],
maps=["idx 0 id %d" % (mapA)],
fail=False, include_stderr=True)
@@ -1385,10 +1395,7 @@ try:
start_test("Test multi-dev ASIC cross-dev destruction - orphaned...")
ret, out = bpftool("prog show %s" % (progB), fail=False)
- fail(ret == 0, "got information about orphaned program")
- fail("error" not in out, "no error reported for get info on orphaned")
- fail(out["error"] != "can't get prog info: No such device",
- "wrong error for get info on orphaned")
+ fail(ret != 0, "couldn't get information about orphaned program")
print("%s: OK" % (os.path.basename(__file__)))
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 54fa5fa688ce..89ff704e9dad 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -3,14 +3,133 @@
*/
#define _GNU_SOURCE
#include "test_progs.h"
+#include "testing_helpers.h"
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
#include <argp.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <string.h>
#include <execinfo.h> /* backtrace */
+#include <sys/sysinfo.h> /* get_nprocs */
+#include <netinet/in.h>
+#include <sys/select.h>
+#include <sys/socket.h>
+#include <sys/un.h>
+#include <bpf/btf.h>
+#include "json_writer.h"
+
+static bool verbose(void)
+{
+ return env.verbosity > VERBOSE_NONE;
+}
+
+static void stdio_hijack_init(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ fflush(stdout);
+ fflush(stderr);
+
+ stdout = open_memstream(log_buf, log_cnt);
+ if (!stdout) {
+ stdout = env.stdout;
+ perror("open_memstream");
+ return;
+ }
+
+ if (env.subtest_state)
+ env.subtest_state->stdout = stdout;
+ else
+ env.test_state->stdout = stdout;
+
+ stderr = stdout;
+#endif
+}
+
+static void stdio_hijack(char **log_buf, size_t *log_cnt)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ env.stdout = stdout;
+ env.stderr = stderr;
+
+ stdio_hijack_init(log_buf, log_cnt);
+#endif
+}
+
+static void stdio_restore_cleanup(void)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ fflush(stdout);
+
+ if (env.subtest_state) {
+ fclose(env.subtest_state->stdout);
+ env.subtest_state->stdout = NULL;
+ stdout = env.test_state->stdout;
+ stderr = env.test_state->stdout;
+ } else {
+ fclose(env.test_state->stdout);
+ env.test_state->stdout = NULL;
+ }
+#endif
+}
+
+static void stdio_restore(void)
+{
+#ifdef __GLIBC__
+ if (verbose() && env.worker_id == -1) {
+ /* nothing to do, output to stdout by default */
+ return;
+ }
+
+ if (stdout == env.stdout)
+ return;
+
+ stdio_restore_cleanup();
+
+ stdout = env.stdout;
+ stderr = env.stderr;
+#endif
+}
+
+/* Adapted from perf/util/string.c */
+static bool glob_match(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_match(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+#define EXIT_NO_TEST 2
+#define EXIT_ERR_SETUP_INFRA 3
/* defined in test_progs.h */
struct test_env env = {};
@@ -19,17 +138,9 @@ struct prog_test_def {
const char *test_name;
int test_num;
void (*run_test)(void);
- bool force_log;
- int error_cnt;
- int skip_cnt;
- bool tested;
+ void (*run_serial_test)(void);
+ bool should_run;
bool need_cgroup_cleanup;
-
- char *subtest_name;
- int subtest_num;
-
- /* store counts before subtest started */
- int old_error_cnt;
};
/* Override C runtime library's usleep() implementation to ensure nanosleep()
@@ -51,12 +162,13 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
int i;
for (i = 0; i < sel->blacklist.cnt; i++) {
- if (strstr(name, sel->blacklist.strs[i]))
+ if (glob_match(name, sel->blacklist.tests[i].name) &&
+ !sel->blacklist.tests[i].subtest_cnt)
return false;
}
for (i = 0; i < sel->whitelist.cnt; i++) {
- if (strstr(name, sel->whitelist.strs[i]))
+ if (glob_match(name, sel->whitelist.tests[i].name))
return true;
}
@@ -66,31 +178,182 @@ static bool should_run(struct test_selector *sel, int num, const char *name)
return num < sel->num_set_len && sel->num_set[num];
}
-static void dump_test_log(const struct prog_test_def *test, bool failed)
+static bool should_run_subtest(struct test_selector *sel,
+ struct test_selector *subtest_sel,
+ int subtest_num,
+ const char *test_name,
+ const char *subtest_name)
{
- if (stdout == env.stdout)
- return;
+ int i, j;
- fflush(stdout); /* exports env.log_buf & env.log_cnt */
+ for (i = 0; i < sel->blacklist.cnt; i++) {
+ if (glob_match(test_name, sel->blacklist.tests[i].name)) {
+ if (!sel->blacklist.tests[i].subtest_cnt)
+ return false;
+
+ for (j = 0; j < sel->blacklist.tests[i].subtest_cnt; j++) {
+ if (glob_match(subtest_name,
+ sel->blacklist.tests[i].subtests[j]))
+ return false;
+ }
+ }
+ }
- if (env.verbosity > VERBOSE_NONE || test->force_log || failed) {
- if (env.log_cnt) {
- env.log_buf[env.log_cnt] = '\0';
- fprintf(env.stdout, "%s", env.log_buf);
- if (env.log_buf[env.log_cnt - 1] != '\n')
- fprintf(env.stdout, "\n");
+ for (i = 0; i < sel->whitelist.cnt; i++) {
+ if (glob_match(test_name, sel->whitelist.tests[i].name)) {
+ if (!sel->whitelist.tests[i].subtest_cnt)
+ return true;
+
+ for (j = 0; j < sel->whitelist.tests[i].subtest_cnt; j++) {
+ if (glob_match(subtest_name,
+ sel->whitelist.tests[i].subtests[j]))
+ return true;
+ }
}
}
- fseeko(stdout, 0, SEEK_SET); /* rewind */
+ if (!sel->whitelist.cnt && !subtest_sel->num_set)
+ return true;
+
+ return subtest_num < subtest_sel->num_set_len && subtest_sel->num_set[subtest_num];
+}
+
+static char *test_result(bool failed, bool skipped)
+{
+ return failed ? "FAIL" : (skipped ? "SKIP" : "OK");
+}
+
+#define TEST_NUM_WIDTH 7
+
+static void print_test_result(const struct prog_test_def *test, const struct test_state *test_state)
+{
+ int skipped_cnt = test_state->skip_cnt;
+ int subtests_cnt = test_state->subtest_num;
+
+ fprintf(env.stdout, "#%-*d %s:", TEST_NUM_WIDTH, test->test_num, test->test_name);
+ if (test_state->error_cnt)
+ fprintf(env.stdout, "FAIL");
+ else if (!skipped_cnt)
+ fprintf(env.stdout, "OK");
+ else if (skipped_cnt == subtests_cnt || !subtests_cnt)
+ fprintf(env.stdout, "SKIP");
+ else
+ fprintf(env.stdout, "OK (SKIP: %d/%d)", skipped_cnt, subtests_cnt);
+
+ fprintf(env.stdout, "\n");
+}
+
+static void print_test_log(char *log_buf, size_t log_cnt)
+{
+ log_buf[log_cnt] = '\0';
+ fprintf(env.stdout, "%s", log_buf);
+ if (log_buf[log_cnt - 1] != '\n')
+ fprintf(env.stdout, "\n");
+}
+
+static void print_subtest_name(int test_num, int subtest_num,
+ const char *test_name, char *subtest_name,
+ char *result)
+{
+ char test_num_str[32];
+
+ snprintf(test_num_str, sizeof(test_num_str), "%d/%d", test_num, subtest_num);
+
+ fprintf(env.stdout, "#%-*s %s/%s",
+ TEST_NUM_WIDTH, test_num_str,
+ test_name, subtest_name);
+
+ if (result)
+ fprintf(env.stdout, ":%s", result);
+
+ fprintf(env.stdout, "\n");
+}
+
+static void jsonw_write_log_message(json_writer_t *w, char *log_buf, size_t log_cnt)
+{
+ /* open_memstream (from stdio_hijack_init) ensures that log_bug is terminated by a
+ * null byte. Yet in parallel mode, log_buf will be NULL if there is no message.
+ */
+ if (log_cnt) {
+ jsonw_string_field(w, "message", log_buf);
+ } else {
+ jsonw_string_field(w, "message", "");
+ }
}
-static void skip_account(void)
+static void dump_test_log(const struct prog_test_def *test,
+ const struct test_state *test_state,
+ bool skip_ok_subtests,
+ bool par_exec_result,
+ json_writer_t *w)
{
- if (env.test->skip_cnt) {
- env.skip_cnt++;
- env.test->skip_cnt = 0;
+ bool test_failed = test_state->error_cnt > 0;
+ bool force_log = test_state->force_log;
+ bool print_test = verbose() || force_log || test_failed;
+ int i;
+ struct subtest_state *subtest_state;
+ bool subtest_failed;
+ bool subtest_filtered;
+ bool print_subtest;
+
+ /* we do not print anything in the worker thread */
+ if (env.worker_id != -1)
+ return;
+
+ /* there is nothing to print when verbose log is used and execution
+ * is not in parallel mode
+ */
+ if (verbose() && !par_exec_result)
+ return;
+
+ if (test_state->log_cnt && print_test)
+ print_test_log(test_state->log_buf, test_state->log_cnt);
+
+ if (w && print_test) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", test->test_name);
+ jsonw_uint_field(w, "number", test->test_num);
+ jsonw_write_log_message(w, test_state->log_buf, test_state->log_cnt);
+ jsonw_bool_field(w, "failed", test_failed);
+ jsonw_name(w, "subtests");
+ jsonw_start_array(w);
+ }
+
+ for (i = 0; i < test_state->subtest_num; i++) {
+ subtest_state = &test_state->subtest_states[i];
+ subtest_failed = subtest_state->error_cnt;
+ subtest_filtered = subtest_state->filtered;
+ print_subtest = verbose() || force_log || subtest_failed;
+
+ if ((skip_ok_subtests && !subtest_failed) || subtest_filtered)
+ continue;
+
+ if (subtest_state->log_cnt && print_subtest) {
+ print_test_log(subtest_state->log_buf,
+ subtest_state->log_cnt);
+ }
+
+ print_subtest_name(test->test_num, i + 1,
+ test->test_name, subtest_state->name,
+ test_result(subtest_state->error_cnt,
+ subtest_state->skipped));
+
+ if (w && print_subtest) {
+ jsonw_start_object(w);
+ jsonw_string_field(w, "name", subtest_state->name);
+ jsonw_uint_field(w, "number", i+1);
+ jsonw_write_log_message(w, subtest_state->log_buf, subtest_state->log_cnt);
+ jsonw_bool_field(w, "failed", subtest_failed);
+ jsonw_end_object(w);
+ }
+ }
+
+ if (w && print_test) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
}
+
+ print_test_result(test, test_state);
}
static void stdio_restore(void);
@@ -98,8 +361,8 @@ static void stdio_restore(void);
/* A bunch of tests set custom affinity per-thread and/or per-process. Reset
* it after each test/sub-test.
*/
-static void reset_affinity() {
-
+static void reset_affinity(void)
+{
cpu_set_t cpuset;
int i, err;
@@ -111,82 +374,131 @@ static void reset_affinity() {
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset process affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
}
err = pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);
if (err < 0) {
stdio_restore();
fprintf(stderr, "Failed to reset thread affinity: %d!\n", err);
- exit(-1);
+ exit(EXIT_ERR_SETUP_INFRA);
}
}
-void test__end_subtest()
+static void save_netns(void)
{
- struct prog_test_def *test = env.test;
- int sub_error_cnt = test->error_cnt - test->old_error_cnt;
-
- if (sub_error_cnt)
- env.fail_cnt++;
- else
- env.sub_succ_cnt++;
- skip_account();
+ env.saved_netns_fd = open("/proc/self/ns/net", O_RDONLY);
+ if (env.saved_netns_fd == -1) {
+ perror("open(/proc/self/ns/net)");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
- dump_test_log(test, sub_error_cnt);
+static void restore_netns(void)
+{
+ if (setns(env.saved_netns_fd, CLONE_NEWNET) == -1) {
+ stdio_restore();
+ perror("setns(CLONE_NEWNS)");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+}
- fprintf(env.stdout, "#%d/%d %s:%s\n",
- test->test_num, test->subtest_num,
- test->subtest_name, sub_error_cnt ? "FAIL" : "OK");
+void test__end_subtest(void)
+{
+ struct prog_test_def *test = env.test;
+ struct test_state *test_state = env.test_state;
+ struct subtest_state *subtest_state = env.subtest_state;
+
+ if (subtest_state->error_cnt) {
+ test_state->error_cnt++;
+ } else {
+ if (!subtest_state->skipped)
+ test_state->sub_succ_cnt++;
+ else
+ test_state->skip_cnt++;
+ }
- reset_affinity();
+ if (verbose() && !env.workers)
+ print_subtest_name(test->test_num, test_state->subtest_num,
+ test->test_name, subtest_state->name,
+ test_result(subtest_state->error_cnt,
+ subtest_state->skipped));
- free(test->subtest_name);
- test->subtest_name = NULL;
+ stdio_restore_cleanup();
+ env.subtest_state = NULL;
}
-bool test__start_subtest(const char *name)
+bool test__start_subtest(const char *subtest_name)
{
struct prog_test_def *test = env.test;
+ struct test_state *state = env.test_state;
+ struct subtest_state *subtest_state;
+ size_t sub_state_size = sizeof(*subtest_state);
- if (test->subtest_name)
+ if (env.subtest_state)
test__end_subtest();
- test->subtest_num++;
+ state->subtest_num++;
+ state->subtest_states =
+ realloc(state->subtest_states,
+ state->subtest_num * sub_state_size);
+ if (!state->subtest_states) {
+ fprintf(stderr, "Not enough memory to allocate subtest result\n");
+ return false;
+ }
+
+ subtest_state = &state->subtest_states[state->subtest_num - 1];
+
+ memset(subtest_state, 0, sub_state_size);
- if (!name || !name[0]) {
+ if (!subtest_name || !subtest_name[0]) {
fprintf(env.stderr,
"Subtest #%d didn't provide sub-test name!\n",
- test->subtest_num);
+ state->subtest_num);
return false;
}
- if (!should_run(&env.subtest_selector, test->subtest_num, name))
- return false;
-
- test->subtest_name = strdup(name);
- if (!test->subtest_name) {
+ subtest_state->name = strdup(subtest_name);
+ if (!subtest_state->name) {
fprintf(env.stderr,
"Subtest #%d: failed to copy subtest name!\n",
- test->subtest_num);
+ state->subtest_num);
return false;
}
- env.test->old_error_cnt = env.test->error_cnt;
+
+ if (!should_run_subtest(&env.test_selector,
+ &env.subtest_selector,
+ state->subtest_num,
+ test->test_name,
+ subtest_name)) {
+ subtest_state->filtered = true;
+ return false;
+ }
+
+ env.subtest_state = subtest_state;
+ stdio_hijack_init(&subtest_state->log_buf, &subtest_state->log_cnt);
return true;
}
-void test__force_log() {
- env.test->force_log = true;
+void test__force_log(void)
+{
+ env.test_state->force_log = true;
}
void test__skip(void)
{
- env.test->skip_cnt++;
+ if (env.subtest_state)
+ env.subtest_state->skipped = true;
+ else
+ env.test_state->skip_cnt++;
}
void test__fail(void)
{
- env.test->error_cnt++;
+ if (env.subtest_state)
+ env.subtest_state->error_cnt++;
+ else
+ env.test_state->error_cnt++;
}
int test__join_cgroup(const char *path)
@@ -235,24 +547,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
return bpf_map__fd(map);
}
-static bool is_jit_enabled(void)
-{
- const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
- bool enabled = false;
- int sysctl_fd;
-
- sysctl_fd = open(jit_sysctl, 0, O_RDONLY);
- if (sysctl_fd != -1) {
- char tmpc;
-
- if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
- enabled = (tmpc != '0');
- close(sysctl_fd);
- }
-
- return enabled;
-}
-
int compare_map_keys(int map1_fd, int map2_fd)
{
__u32 key, next_key;
@@ -316,49 +610,36 @@ out:
return err;
}
-int extract_build_id(char *build_id, size_t size)
-{
- FILE *fp;
- char *line = NULL;
- size_t len = 0;
-
- fp = popen("readelf -n ./urandom_read | grep 'Build ID'", "r");
- if (fp == NULL)
- return -1;
-
- if (getline(&line, &len, fp) == -1)
- goto err;
- fclose(fp);
-
- if (len > size)
- len = size;
- memcpy(build_id, line, len);
- build_id[len] = '\0';
- free(line);
- return 0;
-err:
- fclose(fp);
- return -1;
-}
-
/* extern declarations for test funcs */
-#define DEFINE_TEST(name) extern void test_##name(void);
+#define DEFINE_TEST(name) \
+ extern void test_##name(void) __weak; \
+ extern void serial_test_##name(void) __weak;
#include <prog_tests/tests.h>
#undef DEFINE_TEST
static struct prog_test_def prog_test_defs[] = {
-#define DEFINE_TEST(name) { \
- .test_name = #name, \
- .run_test = &test_##name, \
+#define DEFINE_TEST(name) { \
+ .test_name = #name, \
+ .run_test = &test_##name, \
+ .run_serial_test = &serial_test_##name, \
},
#include <prog_tests/tests.h>
#undef DEFINE_TEST
};
-const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+static const int prog_test_cnt = ARRAY_SIZE(prog_test_defs);
+
+static struct test_state test_states[ARRAY_SIZE(prog_test_defs)];
const char *argp_program_version = "test_progs 0.1";
const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
-const char argp_program_doc[] = "BPF selftests test runner";
+static const char argp_program_doc[] =
+"BPF selftests test runner\v"
+"Options accepting the NAMES parameter take either a comma-separated list\n"
+"of test names, or a filename prefixed with @. The file contains one name\n"
+"(or wildcard pattern) per line, and comments beginning with # are ignored.\n"
+"\n"
+"These options can be passed repeatedly to read multiple files.\n";
enum ARG_KEYS {
ARG_TEST_NUM = 'n',
@@ -366,6 +647,13 @@ enum ARG_KEYS {
ARG_TEST_NAME_BLACKLIST = 'b',
ARG_VERIFIER_STATS = 's',
ARG_VERBOSE = 'v',
+ ARG_GET_TEST_CNT = 'c',
+ ARG_LIST_TEST_NAMES = 'l',
+ ARG_TEST_NAME_GLOB_ALLOWLIST = 'a',
+ ARG_TEST_NAME_GLOB_DENYLIST = 'd',
+ ARG_NUM_WORKERS = 'j',
+ ARG_DEBUG = -1,
+ ARG_JSON_SUMMARY = 'J'
};
static const struct argp_option opts[] = {
@@ -379,63 +667,112 @@ static const struct argp_option opts[] = {
"Output verifier statistics", },
{ "verbose", ARG_VERBOSE, "LEVEL", OPTION_ARG_OPTIONAL,
"Verbose output (use -vv or -vvv for progressively verbose output)" },
+ { "count", ARG_GET_TEST_CNT, NULL, 0,
+ "Get number of selected top-level tests " },
+ { "list", ARG_LIST_TEST_NAMES, NULL, 0,
+ "List test names that would run (without running them) " },
+ { "allow", ARG_TEST_NAME_GLOB_ALLOWLIST, "NAMES", 0,
+ "Run tests with name matching the pattern (supports '*' wildcard)." },
+ { "deny", ARG_TEST_NAME_GLOB_DENYLIST, "NAMES", 0,
+ "Don't run tests with name matching the pattern (supports '*' wildcard)." },
+ { "workers", ARG_NUM_WORKERS, "WORKERS", OPTION_ARG_OPTIONAL,
+ "Number of workers to run in parallel, default to number of cpus." },
+ { "debug", ARG_DEBUG, NULL, 0,
+ "print extra debug information for test_progs." },
+ { "json-summary", ARG_JSON_SUMMARY, "FILE", 0, "Write report in json format to this file."},
{},
};
+static FILE *libbpf_capture_stream;
+
+static struct {
+ char *buf;
+ size_t buf_sz;
+} libbpf_output_capture;
+
+/* Creates a global memstream capturing INFO and WARN level output
+ * passed to libbpf_print_fn.
+ * Returns 0 on success, negative value on failure.
+ * On failure the description is printed using PRINT_FAIL and
+ * current test case is marked as fail.
+ */
+int start_libbpf_log_capture(void)
+{
+ if (libbpf_capture_stream) {
+ PRINT_FAIL("%s: libbpf_capture_stream != NULL\n", __func__);
+ return -EINVAL;
+ }
+
+ libbpf_capture_stream = open_memstream(&libbpf_output_capture.buf,
+ &libbpf_output_capture.buf_sz);
+ if (!libbpf_capture_stream) {
+ PRINT_FAIL("%s: open_memstream failed errno=%d\n", __func__, errno);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/* Destroys global memstream created by start_libbpf_log_capture().
+ * Returns a pointer to captured data which has to be freed.
+ * Returned buffer is null terminated.
+ */
+char *stop_libbpf_log_capture(void)
+{
+ char *buf;
+
+ if (!libbpf_capture_stream)
+ return NULL;
+
+ fputc(0, libbpf_capture_stream);
+ fclose(libbpf_capture_stream);
+ libbpf_capture_stream = NULL;
+ /* get 'buf' after fclose(), see open_memstream() documentation */
+ buf = libbpf_output_capture.buf;
+ memset(&libbpf_output_capture, 0, sizeof(libbpf_output_capture));
+ return buf;
+}
+
static int libbpf_print_fn(enum libbpf_print_level level,
const char *format, va_list args)
{
+ if (libbpf_capture_stream && level != LIBBPF_DEBUG) {
+ va_list args2;
+
+ va_copy(args2, args);
+ vfprintf(libbpf_capture_stream, format, args2);
+ }
+
if (env.verbosity < VERBOSE_VERY && level == LIBBPF_DEBUG)
return 0;
+
vfprintf(stdout, format, args);
return 0;
}
-static void free_str_set(const struct str_set *set)
+static void free_test_filter_set(const struct test_filter_set *set)
{
- int i;
+ int i, j;
if (!set)
return;
- for (i = 0; i < set->cnt; i++)
- free((void *)set->strs[i]);
- free(set->strs);
-}
-
-static int parse_str_list(const char *s, struct str_set *set)
-{
- char *input, *state = NULL, *next, **tmp, **strs = NULL;
- int cnt = 0;
+ for (i = 0; i < set->cnt; i++) {
+ free((void *)set->tests[i].name);
+ for (j = 0; j < set->tests[i].subtest_cnt; j++)
+ free((void *)set->tests[i].subtests[j]);
- input = strdup(s);
- if (!input)
- return -ENOMEM;
-
- set->cnt = 0;
- set->strs = NULL;
-
- while ((next = strtok_r(state ? NULL : input, ",", &state))) {
- tmp = realloc(strs, sizeof(*strs) * (cnt + 1));
- if (!tmp)
- goto err;
- strs = tmp;
-
- strs[cnt] = strdup(next);
- if (!strs[cnt])
- goto err;
-
- cnt++;
+ free((void *)set->tests[i].subtests);
}
- set->cnt = cnt;
- set->strs = (const char **)strs;
- free(input);
- return 0;
-err:
- free(strs);
- free(input);
- return -ENOMEM;
+ free((void *)set->tests);
+}
+
+static void free_test_selector(struct test_selector *test_selector)
+{
+ free_test_filter_set(&test_selector->blacklist);
+ free_test_filter_set(&test_selector->whitelist);
+ free(test_selector->num_set);
}
extern int extra_prog_load_log_flags;
@@ -443,6 +780,7 @@ extern int extra_prog_load_log_flags;
static error_t parse_arg(int key, char *arg, struct argp_state *state)
{
struct test_env *env = state->input;
+ int err = 0;
switch (key) {
case ARG_TEST_NUM: {
@@ -465,30 +803,30 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
}
break;
}
+ case ARG_TEST_NAME_GLOB_ALLOWLIST:
case ARG_TEST_NAME: {
- char *subtest_str = strchr(arg, '/');
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.whitelist,
+ key == ARG_TEST_NAME_GLOB_ALLOWLIST);
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.whitelist))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.whitelist))
- return -ENOMEM;
break;
}
+ case ARG_TEST_NAME_GLOB_DENYLIST:
case ARG_TEST_NAME_BLACKLIST: {
- char *subtest_str = strchr(arg, '/');
+ if (arg[0] == '@')
+ err = parse_test_list_file(arg + 1,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
+ else
+ err = parse_test_list(arg,
+ &env->test_selector.blacklist,
+ key == ARG_TEST_NAME_GLOB_DENYLIST);
- if (subtest_str) {
- *subtest_str = '\0';
- if (parse_str_list(subtest_str + 1,
- &env->subtest_selector.blacklist))
- return -ENOMEM;
- }
- if (parse_str_list(arg, &env->test_selector.blacklist))
- return -ENOMEM;
break;
}
case ARG_VERIFIER_STATS:
@@ -510,6 +848,43 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
return -EINVAL;
}
}
+
+ if (verbose()) {
+ if (setenv("SELFTESTS_VERBOSE", "1", 1) == -1) {
+ fprintf(stderr,
+ "Unable to setenv SELFTESTS_VERBOSE=1 (errno=%d)",
+ errno);
+ return -EINVAL;
+ }
+ }
+
+ break;
+ case ARG_GET_TEST_CNT:
+ env->get_test_cnt = true;
+ break;
+ case ARG_LIST_TEST_NAMES:
+ env->list_test_names = true;
+ break;
+ case ARG_NUM_WORKERS:
+ if (arg) {
+ env->workers = atoi(arg);
+ if (!env->workers) {
+ fprintf(stderr, "Invalid number of worker: %s.", arg);
+ return -EINVAL;
+ }
+ } else {
+ env->workers = get_nprocs();
+ }
+ break;
+ case ARG_DEBUG:
+ env->debug = true;
+ break;
+ case ARG_JSON_SUMMARY:
+ env->json = fopen(arg, "w");
+ if (env->json == NULL) {
+ perror("Failed to open json summary file");
+ return -errno;
+ }
break;
case ARGP_KEY_ARG:
argp_usage(state);
@@ -519,49 +894,7 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
default:
return ARGP_ERR_UNKNOWN;
}
- return 0;
-}
-
-static void stdio_hijack(void)
-{
-#ifdef __GLIBC__
- env.stdout = stdout;
- env.stderr = stderr;
-
- if (env.verbosity > VERBOSE_NONE) {
- /* nothing to do, output to stdout by default */
- return;
- }
-
- /* stdout and stderr -> buffer */
- fflush(stdout);
-
- stdout = open_memstream(&env.log_buf, &env.log_cnt);
- if (!stdout) {
- stdout = env.stdout;
- perror("open_memstream");
- return;
- }
-
- stderr = stdout;
-#endif
-}
-
-static void stdio_restore(void)
-{
-#ifdef __GLIBC__
- if (stdout == env.stdout)
- return;
-
- fclose(stdout);
- free(env.log_buf);
-
- env.log_buf = NULL;
- env.log_cnt = 0;
-
- stdout = env.stdout;
- stderr = env.stderr;
-#endif
+ return err;
}
/*
@@ -581,16 +914,113 @@ int cd_flavor_subdir(const char *exec_name)
const char *flavor = strrchr(exec_name, '/');
if (!flavor)
- return 0;
- flavor++;
+ flavor = exec_name;
+ else
+ flavor++;
+
flavor = strrchr(flavor, '-');
if (!flavor)
return 0;
flavor++;
- fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+ if (verbose())
+ fprintf(stdout, "Switching to flavor '%s' subdirectory...\n", flavor);
+
return chdir(flavor);
}
+int trigger_module_test_read(int read_sz)
+{
+ int fd, err;
+
+ fd = open(BPF_TESTMOD_TEST_FILE, O_RDONLY);
+ err = -errno;
+ if (!ASSERT_GE(fd, 0, "testmod_file_open"))
+ return err;
+
+ read(fd, NULL, read_sz);
+ close(fd);
+
+ return 0;
+}
+
+int trigger_module_test_write(int write_sz)
+{
+ int fd, err;
+ char *buf = malloc(write_sz);
+
+ if (!buf)
+ return -ENOMEM;
+
+ memset(buf, 'a', write_sz);
+ buf[write_sz-1] = '\0';
+
+ fd = open(BPF_TESTMOD_TEST_FILE, O_WRONLY);
+ err = -errno;
+ if (!ASSERT_GE(fd, 0, "testmod_file_open")) {
+ free(buf);
+ return err;
+ }
+
+ write(fd, buf, write_sz);
+ close(fd);
+ free(buf);
+ return 0;
+}
+
+int write_sysctl(const char *sysctl, const char *value)
+{
+ int fd, err, len;
+
+ fd = open(sysctl, O_WRONLY);
+ if (!ASSERT_NEQ(fd, -1, "open sysctl"))
+ return -1;
+
+ len = strlen(value);
+ err = write(fd, value, len);
+ close(fd);
+ if (!ASSERT_EQ(err, len, "write sysctl"))
+ return -1;
+
+ return 0;
+}
+
+int get_bpf_max_tramp_links_from(struct btf *btf)
+{
+ const struct btf_enum *e;
+ const struct btf_type *t;
+ __u32 i, type_cnt;
+ const char *name;
+ __u16 j, vlen;
+
+ for (i = 1, type_cnt = btf__type_cnt(btf); i < type_cnt; i++) {
+ t = btf__type_by_id(btf, i);
+ if (!t || !btf_is_enum(t) || t->name_off)
+ continue;
+ e = btf_enum(t);
+ for (j = 0, vlen = btf_vlen(t); j < vlen; j++, e++) {
+ name = btf__str_by_offset(btf, e->name_off);
+ if (name && !strcmp(name, "BPF_MAX_TRAMP_LINKS"))
+ return e->val;
+ }
+ }
+
+ return -1;
+}
+
+int get_bpf_max_tramp_links(void)
+{
+ struct btf *vmlinux_btf;
+ int ret;
+
+ vmlinux_btf = btf__load_vmlinux_btf();
+ if (!ASSERT_OK_PTR(vmlinux_btf, "vmlinux btf"))
+ return -1;
+ ret = get_bpf_max_tramp_links_from(vmlinux_btf);
+ btf__free(vmlinux_btf);
+
+ return ret;
+}
+
#define MAX_BACKTRACE_SZ 128
void crash_handler(int signum)
{
@@ -599,15 +1029,634 @@ void crash_handler(int signum)
sz = backtrace(bt, ARRAY_SIZE(bt));
- if (env.test)
- dump_test_log(env.test, true);
if (env.stdout)
stdio_restore();
-
+ if (env.test) {
+ env.test_state->error_cnt++;
+ dump_test_log(env.test, env.test_state, true, false, NULL);
+ }
+ if (env.worker_id != -1)
+ fprintf(stderr, "[%d]: ", env.worker_id);
fprintf(stderr, "Caught signal #%d!\nStack trace:\n", signum);
backtrace_symbols_fd(bt, sz, STDERR_FILENO);
}
+static void sigint_handler(int signum)
+{
+ int i;
+
+ for (i = 0; i < env.workers; i++)
+ if (env.worker_socks[i] > 0)
+ close(env.worker_socks[i]);
+}
+
+static int current_test_idx;
+static pthread_mutex_t current_test_lock;
+static pthread_mutex_t stdout_output_lock;
+
+static inline const char *str_msg(const struct msg *msg, char *buf)
+{
+ switch (msg->type) {
+ case MSG_DO_TEST:
+ sprintf(buf, "MSG_DO_TEST %d", msg->do_test.num);
+ break;
+ case MSG_TEST_DONE:
+ sprintf(buf, "MSG_TEST_DONE %d (log: %d)",
+ msg->test_done.num,
+ msg->test_done.have_log);
+ break;
+ case MSG_SUBTEST_DONE:
+ sprintf(buf, "MSG_SUBTEST_DONE %d (log: %d)",
+ msg->subtest_done.num,
+ msg->subtest_done.have_log);
+ break;
+ case MSG_TEST_LOG:
+ sprintf(buf, "MSG_TEST_LOG (cnt: %zu, last: %d)",
+ strlen(msg->test_log.log_buf),
+ msg->test_log.is_last);
+ break;
+ case MSG_EXIT:
+ sprintf(buf, "MSG_EXIT");
+ break;
+ default:
+ sprintf(buf, "UNKNOWN");
+ break;
+ }
+
+ return buf;
+}
+
+static int send_message(int sock, const struct msg *msg)
+{
+ char buf[256];
+
+ if (env.debug)
+ fprintf(stderr, "Sending msg: %s\n", str_msg(msg, buf));
+ return send(sock, msg, sizeof(*msg), 0);
+}
+
+static int recv_message(int sock, struct msg *msg)
+{
+ int ret;
+ char buf[256];
+
+ memset(msg, 0, sizeof(*msg));
+ ret = recv(sock, msg, sizeof(*msg), 0);
+ if (ret >= 0) {
+ if (env.debug)
+ fprintf(stderr, "Received msg: %s\n", str_msg(msg, buf));
+ }
+ return ret;
+}
+
+static void run_one_test(int test_num)
+{
+ struct prog_test_def *test = &prog_test_defs[test_num];
+ struct test_state *state = &test_states[test_num];
+
+ env.test = test;
+ env.test_state = state;
+
+ stdio_hijack(&state->log_buf, &state->log_cnt);
+
+ if (test->run_test)
+ test->run_test();
+ else if (test->run_serial_test)
+ test->run_serial_test();
+
+ /* ensure last sub-test is finalized properly */
+ if (env.subtest_state)
+ test__end_subtest();
+
+ state->tested = true;
+
+ if (verbose() && env.worker_id == -1)
+ print_test_result(test, state);
+
+ reset_affinity();
+ restore_netns();
+ if (test->need_cgroup_cleanup)
+ cleanup_cgroup_environment();
+
+ stdio_restore();
+ free(stop_libbpf_log_capture());
+
+ dump_test_log(test, state, false, false, NULL);
+}
+
+struct dispatch_data {
+ int worker_id;
+ int sock_fd;
+};
+
+static int read_prog_test_msg(int sock_fd, struct msg *msg, enum msg_type type)
+{
+ if (recv_message(sock_fd, msg) < 0)
+ return 1;
+
+ if (msg->type != type) {
+ printf("%s: unexpected message type %d. expected %d\n", __func__, msg->type, type);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int dispatch_thread_read_log(int sock_fd, char **log_buf, size_t *log_cnt)
+{
+ FILE *log_fp = NULL;
+ int result = 0;
+
+ log_fp = open_memstream(log_buf, log_cnt);
+ if (!log_fp)
+ return 1;
+
+ while (true) {
+ struct msg msg;
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_LOG)) {
+ result = 1;
+ goto out;
+ }
+
+ fprintf(log_fp, "%s", msg.test_log.log_buf);
+ if (msg.test_log.is_last)
+ break;
+ }
+
+out:
+ fclose(log_fp);
+ log_fp = NULL;
+ return result;
+}
+
+static int dispatch_thread_send_subtests(int sock_fd, struct test_state *state)
+{
+ struct msg msg;
+ struct subtest_state *subtest_state;
+ int subtest_num = state->subtest_num;
+
+ state->subtest_states = malloc(subtest_num * sizeof(*subtest_state));
+
+ for (int i = 0; i < subtest_num; i++) {
+ subtest_state = &state->subtest_states[i];
+
+ memset(subtest_state, 0, sizeof(*subtest_state));
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_SUBTEST_DONE))
+ return 1;
+
+ subtest_state->name = strdup(msg.subtest_done.name);
+ subtest_state->error_cnt = msg.subtest_done.error_cnt;
+ subtest_state->skipped = msg.subtest_done.skipped;
+ subtest_state->filtered = msg.subtest_done.filtered;
+
+ /* collect all logs */
+ if (msg.subtest_done.have_log)
+ if (dispatch_thread_read_log(sock_fd,
+ &subtest_state->log_buf,
+ &subtest_state->log_cnt))
+ return 1;
+ }
+
+ return 0;
+}
+
+static void *dispatch_thread(void *ctx)
+{
+ struct dispatch_data *data = ctx;
+ int sock_fd;
+
+ sock_fd = data->sock_fd;
+
+ while (true) {
+ int test_to_run = -1;
+ struct prog_test_def *test;
+ struct test_state *state;
+
+ /* grab a test */
+ {
+ pthread_mutex_lock(&current_test_lock);
+
+ if (current_test_idx >= prog_test_cnt) {
+ pthread_mutex_unlock(&current_test_lock);
+ goto done;
+ }
+
+ test = &prog_test_defs[current_test_idx];
+ test_to_run = current_test_idx;
+ current_test_idx++;
+
+ pthread_mutex_unlock(&current_test_lock);
+ }
+
+ if (!test->should_run || test->run_serial_test)
+ continue;
+
+ /* run test through worker */
+ {
+ struct msg msg_do_test;
+
+ memset(&msg_do_test, 0, sizeof(msg_do_test));
+ msg_do_test.type = MSG_DO_TEST;
+ msg_do_test.do_test.num = test_to_run;
+ if (send_message(sock_fd, &msg_do_test) < 0) {
+ perror("Fail to send command");
+ goto done;
+ }
+ env.worker_current_test[data->worker_id] = test_to_run;
+ }
+
+ /* wait for test done */
+ do {
+ struct msg msg;
+
+ if (read_prog_test_msg(sock_fd, &msg, MSG_TEST_DONE))
+ goto error;
+ if (test_to_run != msg.test_done.num)
+ goto error;
+
+ state = &test_states[test_to_run];
+ state->tested = true;
+ state->error_cnt = msg.test_done.error_cnt;
+ state->skip_cnt = msg.test_done.skip_cnt;
+ state->sub_succ_cnt = msg.test_done.sub_succ_cnt;
+ state->subtest_num = msg.test_done.subtest_num;
+
+ /* collect all logs */
+ if (msg.test_done.have_log) {
+ if (dispatch_thread_read_log(sock_fd,
+ &state->log_buf,
+ &state->log_cnt))
+ goto error;
+ }
+
+ /* collect all subtests and subtest logs */
+ if (!state->subtest_num)
+ break;
+
+ if (dispatch_thread_send_subtests(sock_fd, state))
+ goto error;
+ } while (false);
+
+ pthread_mutex_lock(&stdout_output_lock);
+ dump_test_log(test, state, false, true, NULL);
+ pthread_mutex_unlock(&stdout_output_lock);
+ } /* while (true) */
+error:
+ if (env.debug)
+ fprintf(stderr, "[%d]: Protocol/IO error: %s.\n", data->worker_id, strerror(errno));
+
+done:
+ {
+ struct msg msg_exit;
+
+ msg_exit.type = MSG_EXIT;
+ if (send_message(sock_fd, &msg_exit) < 0) {
+ if (env.debug)
+ fprintf(stderr, "[%d]: send_message msg_exit: %s.\n",
+ data->worker_id, strerror(errno));
+ }
+ }
+ return NULL;
+}
+
+static void calculate_summary_and_print_errors(struct test_env *env)
+{
+ int i;
+ int succ_cnt = 0, fail_cnt = 0, sub_succ_cnt = 0, skip_cnt = 0;
+ json_writer_t *w = NULL;
+
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct test_state *state = &test_states[i];
+
+ if (!state->tested)
+ continue;
+
+ sub_succ_cnt += state->sub_succ_cnt;
+ skip_cnt += state->skip_cnt;
+
+ if (state->error_cnt)
+ fail_cnt++;
+ else
+ succ_cnt++;
+ }
+
+ if (env->json) {
+ w = jsonw_new(env->json);
+ if (!w)
+ fprintf(env->stderr, "Failed to create new JSON stream.");
+ }
+
+ if (w) {
+ jsonw_start_object(w);
+ jsonw_uint_field(w, "success", succ_cnt);
+ jsonw_uint_field(w, "success_subtest", sub_succ_cnt);
+ jsonw_uint_field(w, "skipped", skip_cnt);
+ jsonw_uint_field(w, "failed", fail_cnt);
+ jsonw_name(w, "results");
+ jsonw_start_array(w);
+ }
+
+ /*
+ * We only print error logs summary when there are failed tests and
+ * verbose mode is not enabled. Otherwise, results may be incosistent.
+ *
+ */
+ if (!verbose() && fail_cnt) {
+ printf("\nAll error logs:\n");
+
+ /* print error logs again */
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+ struct test_state *state = &test_states[i];
+
+ if (!state->tested || !state->error_cnt)
+ continue;
+
+ dump_test_log(test, state, true, true, w);
+ }
+ }
+
+ if (w) {
+ jsonw_end_array(w);
+ jsonw_end_object(w);
+ jsonw_destroy(&w);
+ }
+
+ if (env->json)
+ fclose(env->json);
+
+ printf("Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
+ succ_cnt, sub_succ_cnt, skip_cnt, fail_cnt);
+
+ env->succ_cnt = succ_cnt;
+ env->sub_succ_cnt = sub_succ_cnt;
+ env->fail_cnt = fail_cnt;
+ env->skip_cnt = skip_cnt;
+}
+
+static void server_main(void)
+{
+ pthread_t *dispatcher_threads;
+ struct dispatch_data *data;
+ struct sigaction sigact_int = {
+ .sa_handler = sigint_handler,
+ .sa_flags = SA_RESETHAND,
+ };
+ int i;
+
+ sigaction(SIGINT, &sigact_int, NULL);
+
+ dispatcher_threads = calloc(sizeof(pthread_t), env.workers);
+ data = calloc(sizeof(struct dispatch_data), env.workers);
+
+ env.worker_current_test = calloc(sizeof(int), env.workers);
+ for (i = 0; i < env.workers; i++) {
+ int rc;
+
+ data[i].worker_id = i;
+ data[i].sock_fd = env.worker_socks[i];
+ rc = pthread_create(&dispatcher_threads[i], NULL, dispatch_thread, &data[i]);
+ if (rc < 0) {
+ perror("Failed to launch dispatcher thread");
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+ }
+
+ /* wait for all dispatcher to finish */
+ for (i = 0; i < env.workers; i++) {
+ while (true) {
+ int ret = pthread_tryjoin_np(dispatcher_threads[i], NULL);
+
+ if (!ret) {
+ break;
+ } else if (ret == EBUSY) {
+ if (env.debug)
+ fprintf(stderr, "Still waiting for thread %d (test %d).\n",
+ i, env.worker_current_test[i] + 1);
+ usleep(1000 * 1000);
+ continue;
+ } else {
+ fprintf(stderr, "Unexpected error joining dispatcher thread: %d", ret);
+ break;
+ }
+ }
+ }
+ free(dispatcher_threads);
+ free(env.worker_current_test);
+ free(data);
+
+ /* run serial tests */
+ save_netns();
+
+ for (int i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+
+ if (!test->should_run || !test->run_serial_test)
+ continue;
+
+ run_one_test(i);
+ }
+
+ /* generate summary */
+ fflush(stderr);
+ fflush(stdout);
+
+ calculate_summary_and_print_errors(&env);
+
+ /* reap all workers */
+ for (i = 0; i < env.workers; i++) {
+ int wstatus, pid;
+
+ pid = waitpid(env.worker_pids[i], &wstatus, 0);
+ if (pid != env.worker_pids[i])
+ perror("Unable to reap worker");
+ }
+}
+
+static void worker_main_send_log(int sock, char *log_buf, size_t log_cnt)
+{
+ char *src;
+ size_t slen;
+
+ src = log_buf;
+ slen = log_cnt;
+ while (slen) {
+ struct msg msg_log;
+ char *dest;
+ size_t len;
+
+ memset(&msg_log, 0, sizeof(msg_log));
+ msg_log.type = MSG_TEST_LOG;
+ dest = msg_log.test_log.log_buf;
+ len = slen >= MAX_LOG_TRUNK_SIZE ? MAX_LOG_TRUNK_SIZE : slen;
+ memcpy(dest, src, len);
+
+ src += len;
+ slen -= len;
+ if (!slen)
+ msg_log.test_log.is_last = true;
+
+ assert(send_message(sock, &msg_log) >= 0);
+ }
+}
+
+static void free_subtest_state(struct subtest_state *state)
+{
+ if (state->log_buf) {
+ free(state->log_buf);
+ state->log_buf = NULL;
+ state->log_cnt = 0;
+ }
+ free(state->name);
+ state->name = NULL;
+}
+
+static int worker_main_send_subtests(int sock, struct test_state *state)
+{
+ int i, result = 0;
+ struct msg msg;
+ struct subtest_state *subtest_state;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_SUBTEST_DONE;
+
+ for (i = 0; i < state->subtest_num; i++) {
+ subtest_state = &state->subtest_states[i];
+
+ msg.subtest_done.num = i;
+
+ strncpy(msg.subtest_done.name, subtest_state->name, MAX_SUBTEST_NAME);
+
+ msg.subtest_done.error_cnt = subtest_state->error_cnt;
+ msg.subtest_done.skipped = subtest_state->skipped;
+ msg.subtest_done.filtered = subtest_state->filtered;
+ msg.subtest_done.have_log = false;
+
+ if (verbose() || state->force_log || subtest_state->error_cnt) {
+ if (subtest_state->log_cnt)
+ msg.subtest_done.have_log = true;
+ }
+
+ if (send_message(sock, &msg) < 0) {
+ perror("Fail to send message done");
+ result = 1;
+ goto out;
+ }
+
+ /* send logs */
+ if (msg.subtest_done.have_log)
+ worker_main_send_log(sock, subtest_state->log_buf, subtest_state->log_cnt);
+
+ free_subtest_state(subtest_state);
+ free(subtest_state->name);
+ }
+
+out:
+ for (; i < state->subtest_num; i++)
+ free_subtest_state(&state->subtest_states[i]);
+ free(state->subtest_states);
+ return result;
+}
+
+static int worker_main(int sock)
+{
+ save_netns();
+
+ while (true) {
+ /* receive command */
+ struct msg msg;
+
+ if (recv_message(sock, &msg) < 0)
+ goto out;
+
+ switch (msg.type) {
+ case MSG_EXIT:
+ if (env.debug)
+ fprintf(stderr, "[%d]: worker exit.\n",
+ env.worker_id);
+ goto out;
+ case MSG_DO_TEST: {
+ int test_to_run = msg.do_test.num;
+ struct prog_test_def *test = &prog_test_defs[test_to_run];
+ struct test_state *state = &test_states[test_to_run];
+ struct msg msg;
+
+ if (env.debug)
+ fprintf(stderr, "[%d]: #%d:%s running.\n",
+ env.worker_id,
+ test_to_run + 1,
+ test->test_name);
+
+ run_one_test(test_to_run);
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_TEST_DONE;
+ msg.test_done.num = test_to_run;
+ msg.test_done.error_cnt = state->error_cnt;
+ msg.test_done.skip_cnt = state->skip_cnt;
+ msg.test_done.sub_succ_cnt = state->sub_succ_cnt;
+ msg.test_done.subtest_num = state->subtest_num;
+ msg.test_done.have_log = false;
+
+ if (verbose() || state->force_log || state->error_cnt) {
+ if (state->log_cnt)
+ msg.test_done.have_log = true;
+ }
+ if (send_message(sock, &msg) < 0) {
+ perror("Fail to send message done");
+ goto out;
+ }
+
+ /* send logs */
+ if (msg.test_done.have_log)
+ worker_main_send_log(sock, state->log_buf, state->log_cnt);
+
+ if (state->log_buf) {
+ free(state->log_buf);
+ state->log_buf = NULL;
+ state->log_cnt = 0;
+ }
+
+ if (state->subtest_num)
+ if (worker_main_send_subtests(sock, state))
+ goto out;
+
+ if (env.debug)
+ fprintf(stderr, "[%d]: #%d:%s done.\n",
+ env.worker_id,
+ test_to_run + 1,
+ test->test_name);
+ break;
+ } /* case MSG_DO_TEST */
+ default:
+ if (env.debug)
+ fprintf(stderr, "[%d]: unknown message.\n", env.worker_id);
+ return -1;
+ }
+ }
+out:
+ return 0;
+}
+
+static void free_test_states(void)
+{
+ int i, j;
+
+ for (i = 0; i < ARRAY_SIZE(prog_test_defs); i++) {
+ struct test_state *test_state = &test_states[i];
+
+ for (j = 0; j < test_state->subtest_num; j++)
+ free_subtest_state(&test_state->subtest_states[j]);
+
+ free(test_state->subtest_states);
+ free(test_state->log_buf);
+ test_state->subtest_states = NULL;
+ test_state->log_buf = NULL;
+ }
+}
+
int main(int argc, char **argv)
{
static const struct argp argp = {
@@ -618,7 +1667,7 @@ int main(int argc, char **argv)
struct sigaction sigact = {
.sa_handler = crash_handler,
.sa_flags = SA_RESETHAND,
- };
+ };
int err, i;
sigaction(SIGSEGV, &sigact, NULL);
@@ -631,6 +1680,8 @@ int main(int argc, char **argv)
if (err)
return err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
libbpf_set_print(libbpf_print_fn);
srand(time(NULL));
@@ -643,49 +1694,122 @@ int main(int argc, char **argv)
return -1;
}
- stdio_hijack();
+ env.stdout = stdout;
+ env.stderr = stderr;
+
+ env.has_testmod = true;
+ if (!env.list_test_names) {
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose());
+
+ if (load_bpf_testmod(verbose())) {
+ fprintf(env.stderr, "WARNING! Selftests relying on bpf_testmod.ko will be skipped.\n");
+ env.has_testmod = false;
+ }
+ }
+
+ /* initializing tests */
for (i = 0; i < prog_test_cnt; i++) {
struct prog_test_def *test = &prog_test_defs[i];
- env.test = test;
test->test_num = i + 1;
+ test->should_run = should_run(&env.test_selector,
+ test->test_num, test->test_name);
+
+ if ((test->run_test == NULL && test->run_serial_test == NULL) ||
+ (test->run_test != NULL && test->run_serial_test != NULL)) {
+ fprintf(stderr, "Test %d:%s must have either test_%s() or serial_test_%sl() defined.\n",
+ test->test_num, test->test_name, test->test_name, test->test_name);
+ exit(EXIT_ERR_SETUP_INFRA);
+ }
+ }
- if (!should_run(&env.test_selector,
- test->test_num, test->test_name))
- continue;
+ /* ignore workers if we are just listing */
+ if (env.get_test_cnt || env.list_test_names)
+ env.workers = 0;
+
+ /* launch workers if requested */
+ env.worker_id = -1; /* main process */
+ if (env.workers) {
+ env.worker_pids = calloc(sizeof(__pid_t), env.workers);
+ env.worker_socks = calloc(sizeof(int), env.workers);
+ if (env.debug)
+ fprintf(stdout, "Launching %d workers.\n", env.workers);
+ for (i = 0; i < env.workers; i++) {
+ int sv[2];
+ pid_t pid;
+
+ if (socketpair(AF_UNIX, SOCK_SEQPACKET | SOCK_CLOEXEC, 0, sv) < 0) {
+ perror("Fail to create worker socket");
+ return -1;
+ }
+ pid = fork();
+ if (pid < 0) {
+ perror("Failed to fork worker");
+ return -1;
+ } else if (pid != 0) { /* main process */
+ close(sv[1]);
+ env.worker_pids[i] = pid;
+ env.worker_socks[i] = sv[0];
+ } else { /* inside each worker process */
+ close(sv[0]);
+ env.worker_id = i;
+ return worker_main(sv[1]);
+ }
+ }
- test->run_test();
- /* ensure last sub-test is finalized properly */
- if (test->subtest_name)
- test__end_subtest();
+ if (env.worker_id == -1) {
+ server_main();
+ goto out;
+ }
+ }
- test->tested = true;
- if (test->error_cnt)
- env.fail_cnt++;
- else
+ /* The rest of the main process */
+
+ /* on single mode */
+ save_netns();
+
+ for (i = 0; i < prog_test_cnt; i++) {
+ struct prog_test_def *test = &prog_test_defs[i];
+
+ if (!test->should_run)
+ continue;
+
+ if (env.get_test_cnt) {
env.succ_cnt++;
- skip_account();
+ continue;
+ }
- dump_test_log(test, test->error_cnt);
+ if (env.list_test_names) {
+ fprintf(env.stdout, "%s\n", test->test_name);
+ env.succ_cnt++;
+ continue;
+ }
- fprintf(env.stdout, "#%d %s:%s\n",
- test->test_num, test->test_name,
- test->error_cnt ? "FAIL" : "OK");
+ run_one_test(i);
+ }
- reset_affinity();
- if (test->need_cgroup_cleanup)
- cleanup_cgroup_environment();
+ if (env.get_test_cnt) {
+ printf("%d\n", env.succ_cnt);
+ goto out;
}
- stdio_restore();
- fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
- env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
-
- free_str_set(&env.test_selector.blacklist);
- free_str_set(&env.test_selector.whitelist);
- free(env.test_selector.num_set);
- free_str_set(&env.subtest_selector.blacklist);
- free_str_set(&env.subtest_selector.whitelist);
- free(env.subtest_selector.num_set);
+
+ if (env.list_test_names)
+ goto out;
+
+ calculate_summary_and_print_errors(&env);
+
+ close(env.saved_netns_fd);
+out:
+ if (!env.list_test_names && env.has_testmod)
+ unload_bpf_testmod(verbose());
+
+ free_test_selector(&env.test_selector);
+ free_test_selector(&env.subtest_selector);
+ free_test_states();
+
+ if (env.succ_cnt + env.fail_cnt + env.skip_cnt == 0)
+ return EXIT_NO_TEST;
return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4503c926aca..0ba5a20b19ba 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -1,4 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __TEST_PROGS_H
+#define __TEST_PROGS_H
+
#include <stdio.h>
#include <unistd.h>
#include <errno.h>
@@ -16,7 +19,6 @@ typedef __u16 __sum16;
#include <linux/if_packet.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
-#include <netinet/tcp.h>
#include <linux/filter.h>
#include <linux/perf_event.h>
#include <linux/socket.h>
@@ -26,6 +28,7 @@ typedef __u16 __sum16;
#include <sys/wait.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/param.h>
#include <fcntl.h>
#include <pthread.h>
#include <linux/bpf.h>
@@ -38,7 +41,6 @@ typedef __u16 __sum16;
#include <bpf/bpf_endian.h>
#include "trace_helpers.h"
#include "testing_helpers.h"
-#include "flow_dissector_load.h"
enum verbosity {
VERBOSE_NONE,
@@ -47,46 +49,132 @@ enum verbosity {
VERBOSE_SUPER,
};
-struct str_set {
- const char **strs;
+struct test_filter {
+ char *name;
+ char **subtests;
+ int subtest_cnt;
+};
+
+struct test_filter_set {
+ struct test_filter *tests;
int cnt;
};
struct test_selector {
- struct str_set whitelist;
- struct str_set blacklist;
+ struct test_filter_set whitelist;
+ struct test_filter_set blacklist;
bool *num_set;
int num_set_len;
};
+struct subtest_state {
+ char *name;
+ size_t log_cnt;
+ char *log_buf;
+ int error_cnt;
+ bool skipped;
+ bool filtered;
+
+ FILE *stdout;
+};
+
+struct test_state {
+ bool tested;
+ bool force_log;
+
+ int error_cnt;
+ int skip_cnt;
+ int sub_succ_cnt;
+
+ struct subtest_state *subtest_states;
+ int subtest_num;
+
+ size_t log_cnt;
+ char *log_buf;
+
+ FILE *stdout;
+};
+
struct test_env {
struct test_selector test_selector;
struct test_selector subtest_selector;
bool verifier_stats;
+ bool debug;
enum verbosity verbosity;
bool jit_enabled;
+ bool has_testmod;
+ bool get_test_cnt;
+ bool list_test_names;
+
+ struct prog_test_def *test; /* current running test */
+ struct test_state *test_state; /* current running test state */
+ struct subtest_state *subtest_state; /* current running subtest state */
- struct prog_test_def *test;
FILE *stdout;
FILE *stderr;
- char *log_buf;
- size_t log_cnt;
int nr_cpus;
+ FILE *json;
int succ_cnt; /* successful tests */
int sub_succ_cnt; /* successful sub-tests */
int fail_cnt; /* total failed tests + sub-tests */
int skip_cnt; /* skipped tests */
+
+ int saved_netns_fd;
+ int workers; /* number of worker process */
+ int worker_id; /* id number of current worker, main process is -1 */
+ pid_t *worker_pids; /* array of worker pids */
+ int *worker_socks; /* array of worker socks */
+ int *worker_current_test; /* array of current running test for each worker */
+};
+
+#define MAX_LOG_TRUNK_SIZE 8192
+#define MAX_SUBTEST_NAME 1024
+enum msg_type {
+ MSG_DO_TEST = 0,
+ MSG_TEST_DONE = 1,
+ MSG_TEST_LOG = 2,
+ MSG_SUBTEST_DONE = 3,
+ MSG_EXIT = 255,
+};
+struct msg {
+ enum msg_type type;
+ union {
+ struct {
+ int num;
+ } do_test;
+ struct {
+ int num;
+ int sub_succ_cnt;
+ int error_cnt;
+ int skip_cnt;
+ bool have_log;
+ int subtest_num;
+ } test_done;
+ struct {
+ char log_buf[MAX_LOG_TRUNK_SIZE + 1];
+ bool is_last;
+ } test_log;
+ struct {
+ int num;
+ char name[MAX_SUBTEST_NAME + 1];
+ int error_cnt;
+ bool skipped;
+ bool filtered;
+ bool have_log;
+ } subtest_done;
+ };
};
extern struct test_env env;
-extern void test__force_log();
-extern bool test__start_subtest(const char *name);
-extern void test__skip(void);
-extern void test__fail(void);
-extern int test__join_cgroup(const char *path);
+void test__force_log(void);
+bool test__start_subtest(const char *name);
+void test__end_subtest(void);
+void test__skip(void);
+void test__fail(void);
+int test__join_cgroup(const char *path);
#define PRINT_FAIL(format...) \
({ \
@@ -126,20 +214,255 @@ extern int test__join_cgroup(const char *path);
#define CHECK_ATTR(condition, tag, format...) \
_CHECK(condition, tag, tattr.duration, format)
+#define ASSERT_FAIL(fmt, args...) ({ \
+ static int duration = 0; \
+ CHECK(false, "", fmt"\n", ##args); \
+ false; \
+})
+
+#define ASSERT_TRUE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = (actual); \
+ CHECK(!___ok, (name), "unexpected %s: got FALSE\n", (name)); \
+ ___ok; \
+})
+
+#define ASSERT_FALSE(actual, name) ({ \
+ static int duration = 0; \
+ bool ___ok = !(actual); \
+ CHECK(!___ok, (name), "unexpected %s: got TRUE\n", (name)); \
+ ___ok; \
+})
+
+#define ASSERT_EQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act == ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld != expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_NEQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act != ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld == expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_LT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act < ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld >= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_LE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act <= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld > expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GT(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act > ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld <= expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_GE(actual, expected, name) ({ \
+ static int duration = 0; \
+ typeof(actual) ___act = (actual); \
+ typeof(expected) ___exp = (expected); \
+ bool ___ok = ___act >= ___exp; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual %lld < expected %lld\n", \
+ (name), (long long)(___act), (long long)(___exp)); \
+ ___ok; \
+})
+
+#define ASSERT_STREQ(actual, expected, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ bool ___ok = strcmp(___act, ___exp) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%s' != expected '%s'\n", \
+ (name), ___act, ___exp); \
+ ___ok; \
+})
+
+#define ASSERT_STRNEQ(actual, expected, len, name) ({ \
+ static int duration = 0; \
+ const char *___act = actual; \
+ const char *___exp = expected; \
+ int ___len = len; \
+ bool ___ok = strncmp(___act, ___exp, ___len) == 0; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: actual '%.*s' != expected '%.*s'\n", \
+ (name), ___len, ___act, ___len, ___exp); \
+ ___ok; \
+})
+
+#define ASSERT_HAS_SUBSTR(str, substr, name) ({ \
+ static int duration = 0; \
+ const char *___str = str; \
+ const char *___substr = substr; \
+ bool ___ok = strstr(___str, ___substr) != NULL; \
+ CHECK(!___ok, (name), \
+ "unexpected %s: '%s' is not a substring of '%s'\n", \
+ (name), ___substr, ___str); \
+ ___ok; \
+})
+
+#define ASSERT_OK(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res == 0; \
+ CHECK(!___ok, (name), "unexpected error: %lld (errno %d)\n", \
+ ___res, errno); \
+ ___ok; \
+})
+
+#define ASSERT_ERR(res, name) ({ \
+ static int duration = 0; \
+ long long ___res = (res); \
+ bool ___ok = ___res < 0; \
+ CHECK(!___ok, (name), "unexpected success: %lld\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_NULL(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ bool ___ok = !___res; \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
+
+#define ASSERT_OK_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err == 0; \
+ CHECK(!___ok, (name), "unexpected error: %d\n", ___err); \
+ ___ok; \
+})
+
+#define ASSERT_ERR_PTR(ptr, name) ({ \
+ static int duration = 0; \
+ const void *___res = (ptr); \
+ int ___err = libbpf_get_error(___res); \
+ bool ___ok = ___err != 0; \
+ CHECK(!___ok, (name), "unexpected pointer: %p\n", ___res); \
+ ___ok; \
+})
+
+#define SYS(goto_label, fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (!ASSERT_OK(system(cmd), cmd)) \
+ goto goto_label; \
+ })
+
+#define ALL_TO_DEV_NULL " >/dev/null 2>&1"
+
+#define SYS_NOFAIL(fmt, ...) \
+ ({ \
+ char cmd[1024]; \
+ int n; \
+ n = snprintf(cmd, sizeof(cmd), fmt, ##__VA_ARGS__); \
+ if (n < sizeof(cmd) && sizeof(cmd) - n >= sizeof(ALL_TO_DEV_NULL)) \
+ strcat(cmd, ALL_TO_DEV_NULL); \
+ system(cmd); \
+ })
+
+int start_libbpf_log_capture(void);
+char *stop_libbpf_log_capture(void);
+
static inline __u64 ptr_to_u64(const void *ptr)
{
return (__u64) (unsigned long) ptr;
}
+static inline void *u64_to_ptr(__u64 ptr)
+{
+ return (void *) (unsigned long) ptr;
+}
+
int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
int compare_map_keys(int map1_fd, int map2_fd);
int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
-int extract_build_id(char *build_id, size_t size);
+int trigger_module_test_read(int read_sz);
+int trigger_module_test_write(int write_sz);
+int write_sysctl(const char *sysctl, const char *value);
+int get_bpf_max_tramp_links_from(struct btf *btf);
+int get_bpf_max_tramp_links(void);
#ifdef __x86_64__
#define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
#elif defined(__s390x__)
#define SYS_NANOSLEEP_KPROBE_NAME "__s390x_sys_nanosleep"
+#elif defined(__aarch64__)
+#define SYS_NANOSLEEP_KPROBE_NAME "__arm64_sys_nanosleep"
+#elif defined(__riscv)
+#define SYS_NANOSLEEP_KPROBE_NAME "__riscv_sys_nanosleep"
#else
#define SYS_NANOSLEEP_KPROBE_NAME "sys_nanosleep"
#endif
+
+#define BPF_TESTMOD_TEST_FILE "/sys/kernel/bpf_testmod"
+
+typedef int (*pre_execution_cb)(struct bpf_object *obj);
+
+struct test_loader {
+ char *log_buf;
+ size_t log_buf_sz;
+ size_t next_match_pos;
+ pre_execution_cb pre_execution_cb;
+
+ struct bpf_object *obj;
+};
+
+static inline void test_loader__set_pre_execution_cb(struct test_loader *tester,
+ pre_execution_cb cb)
+{
+ tester->pre_execution_cb = cb;
+}
+
+typedef const void *(*skel_elf_bytes_fn)(size_t *sz);
+
+extern void test_loader__run_subtests(struct test_loader *tester,
+ const char *skel_name,
+ skel_elf_bytes_fn elf_bytes_factory);
+
+extern void test_loader_fini(struct test_loader *tester);
+
+#define RUN_TESTS(skel) ({ \
+ struct test_loader tester = {}; \
+ \
+ test_loader__run_subtests(&tester, #skel, skel##__elf_bytes); \
+ test_loader_fini(&tester); \
+})
+
+#endif /* __TEST_PROGS_H */
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
index a9bc6f82abc1..515c2eafc97f 100755
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id.sh
@@ -54,7 +54,7 @@ DIR=$(dirname $0)
TEST_IF="test_cgid_1"
TEST_IF_PEER="test_cgid_2"
MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.o"
+BPF_PROG_OBJ="${DIR}/test_skb_cgroup_id_kern.bpf.o"
BPF_PROG_SECTION="cgroup_id_logger"
BPF_PROG_ID=0
PROG="${DIR}/test_skb_cgroup_id_user"
diff --git a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
index 356351c0ac28..ed518d075d1d 100644
--- a/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
+++ b/tools/testing/selftests/bpf/test_skb_cgroup_id_user.c
@@ -15,7 +15,6 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
#define CGROUP_PATH "/skb_cgroup_test"
@@ -94,7 +93,7 @@ int get_map_fd_by_prog_id(int prog_id)
info.nr_map_ids = 1;
info.map_ids = (__u64) (unsigned long) map_ids;
- if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) {
log_err("Failed to get info by prog fd %d", prog_fd);
goto err;
}
@@ -160,16 +159,13 @@ int main(int argc, char **argv)
exit(EXIT_FAILURE);
}
- if (setup_cgroup_environment())
- goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- cgfd = create_and_get_cgroup(CGROUP_PATH);
+ cgfd = cgroup_setup_and_join(CGROUP_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CGROUP_PATH))
- goto err;
-
if (send_packet(argv[1]))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock.c b/tools/testing/selftests/bpf/test_sock.c
index 52bf14955797..810c3740b2cc 100644
--- a/tools/testing/selftests/bpf/test_sock.c
+++ b/tools/testing/selftests/bpf/test_sock.c
@@ -14,7 +14,6 @@
#include "cgroup_helpers.h"
#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#define CG_PATH "/foo"
@@ -35,18 +34,21 @@ struct sock_test {
/* Endpoint to bind() to */
const char *ip;
unsigned short port;
+ unsigned short port_retry;
/* Expected test result */
enum {
LOAD_REJECT,
ATTACH_REJECT,
BIND_REJECT,
SUCCESS,
+ RETRY_SUCCESS,
+ RETRY_REJECT
} result;
};
static struct sock_test tests[] = {
{
- "bind4 load with invalid access: src_ip6",
+ .descr = "bind4 load with invalid access: src_ip6",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -54,16 +56,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind4 load with invalid access: mark",
+ .descr = "bind4 load with invalid access: mark",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -71,16 +69,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "bind6 load with invalid access: src_ip4",
+ .descr = "bind6 load with invalid access: src_ip4",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -88,16 +82,12 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = LOAD_REJECT,
},
{
- "sock_create load with invalid access: src_port",
+ .descr = "sock_create load with invalid access: src_port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
@@ -105,128 +95,106 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- LOAD_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = LOAD_REJECT,
},
{
- "sock_create load w/o expected_attach_type (compat mode)",
+ .descr = "sock_create load w/o expected_attach_type (compat mode)",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "sock_create load w/ expected_attach_type",
+ .descr = "sock_create load w/ expected_attach_type",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET_SOCK_CREATE,
- BPF_CGROUP_INET_SOCK_CREATE,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 8097,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 8097,
+ .result = SUCCESS,
},
{
- "attach type mismatch bind4 vs bind6",
+ .descr = "attach type mismatch bind4 vs bind6",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs bind4",
+ .descr = "attach type mismatch bind6 vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch default vs bind4",
+ .descr = "attach type mismatch default vs bind4",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- 0,
- BPF_CGROUP_INET4_POST_BIND,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = 0,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .result = ATTACH_REJECT,
},
{
- "attach type mismatch bind6 vs sock_create",
+ .descr = "attach type mismatch bind6 vs sock_create",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET_SOCK_CREATE,
- 0,
- 0,
- NULL,
- 0,
- ATTACH_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET_SOCK_CREATE,
+ .result = ATTACH_REJECT,
},
{
- "bind4 reject all",
+ .descr = "bind4 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = BIND_REJECT,
},
{
- "bind6 reject all",
+ .descr = "bind6 reject all",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = BIND_REJECT,
},
{
- "bind6 deny specific IP & port",
+ .descr = "bind6 deny specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -247,16 +215,16 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::1",
- 8193,
- BIND_REJECT,
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .result = BIND_REJECT,
},
{
- "bind4 allow specific IP & port",
+ .descr = "bind4 allow specific IP & port",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
@@ -277,41 +245,132 @@ static struct sock_test tests[] = {
BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "127.0.0.1",
- 4098,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .result = SUCCESS,
},
{
- "bind4 allow all",
+ .descr = "bind4 deny specific IP & port of TCP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET4_POST_BIND,
- BPF_CGROUP_INET4_POST_BIND,
- AF_INET,
- SOCK_STREAM,
- "0.0.0.0",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
},
{
- "bind6 allow all",
+ .descr = "bind4 deny specific IP & port of UDP, and retry",
.insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip4)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x7F000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x1002, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- BPF_CGROUP_INET6_POST_BIND,
- BPF_CGROUP_INET6_POST_BIND,
- AF_INET6,
- SOCK_STREAM,
- "::",
- 0,
- SUCCESS,
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ .ip = "127.0.0.1",
+ .port = 4098,
+ .port_retry = 5000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind6 deny specific IP & port, and retry",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
+
+ /* if (ip == expected && port == expected) */
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_ip6[3])),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7,
+ __bpf_constant_ntohl(0x00000001), 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+ offsetof(struct bpf_sock, src_port)),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0x2001, 2),
+
+ /* return DENY; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_A(1),
+
+ /* else return ALLOW; */
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::1",
+ .port = 8193,
+ .port_retry = 9000,
+ .result = RETRY_SUCCESS,
+ },
+ {
+ .descr = "bind4 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .attach_type = BPF_CGROUP_INET4_POST_BIND,
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ .ip = "0.0.0.0",
+ .result = SUCCESS,
+ },
+ {
+ .descr = "bind6 allow all",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .attach_type = BPF_CGROUP_INET6_POST_BIND,
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ .ip = "::",
+ .result = SUCCESS,
},
};
@@ -328,18 +387,17 @@ static size_t probe_prog_length(const struct bpf_insn *fp)
static int load_sock_prog(const struct bpf_insn *prog,
enum bpf_attach_type attach_type)
{
- struct bpf_load_program_attr attr;
- int ret;
-
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK;
- attr.expected_attach_type = attach_type;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
- attr.log_level = 2;
-
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int ret, insn_cnt;
+
+ insn_cnt = probe_prog_length(prog);
+
+ opts.expected_attach_type = attach_type;
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
+ opts.log_level = 2;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", prog, insn_cnt, &opts);
if (verbose && ret < 0)
fprintf(stderr, "%s\n", bpf_log_buf);
@@ -352,14 +410,15 @@ static int attach_sock_prog(int cgfd, int progfd,
return bpf_prog_attach(progfd, cgfd, attach_type, BPF_F_ALLOW_OVERRIDE);
}
-static int bind_sock(int domain, int type, const char *ip, unsigned short port)
+static int bind_sock(int domain, int type, const char *ip,
+ unsigned short port, unsigned short port_retry)
{
struct sockaddr_storage addr;
struct sockaddr_in6 *addr6;
struct sockaddr_in *addr4;
int sockfd = -1;
socklen_t len;
- int err = 0;
+ int res = SUCCESS;
sockfd = socket(domain, type, 0);
if (sockfd < 0)
@@ -385,21 +444,44 @@ static int bind_sock(int domain, int type, const char *ip, unsigned short port)
goto err;
}
- if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1)
- goto err;
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ /* sys_bind() may fail for different reasons, errno has to be
+ * checked to confirm that BPF program rejected it.
+ */
+ if (errno != EPERM)
+ goto err;
+ if (port_retry)
+ goto retry;
+ res = BIND_REJECT;
+ goto out;
+ }
goto out;
+retry:
+ if (domain == AF_INET)
+ addr4->sin_port = htons(port_retry);
+ else
+ addr6->sin6_port = htons(port_retry);
+ if (bind(sockfd, (const struct sockaddr *)&addr, len) == -1) {
+ if (errno != EPERM)
+ goto err;
+ res = RETRY_REJECT;
+ } else {
+ res = RETRY_SUCCESS;
+ }
+ goto out;
err:
- err = -1;
+ res = -1;
out:
close(sockfd);
- return err;
+ return res;
}
static int run_test_case(int cgfd, const struct sock_test *test)
{
int progfd = -1;
int err = 0;
+ int res;
printf("Test case: %s .. ", test->descr);
progfd = load_sock_prog(test->insns, test->expected_attach_type);
@@ -410,28 +492,18 @@ static int run_test_case(int cgfd, const struct sock_test *test)
goto err;
}
- if (attach_sock_prog(cgfd, progfd, test->attach_type) == -1) {
+ if (attach_sock_prog(cgfd, progfd, test->attach_type) < 0) {
if (test->result == ATTACH_REJECT)
goto out;
else
goto err;
}
- if (bind_sock(test->domain, test->type, test->ip, test->port) == -1) {
- /* sys_bind() may fail for different reasons, errno has to be
- * checked to confirm that BPF program rejected it.
- */
- if (test->result == BIND_REJECT && errno == EPERM)
- goto out;
- else
- goto err;
- }
+ res = bind_sock(test->domain, test->type, test->ip, test->port,
+ test->port_retry);
+ if (res > 0 && test->result == res)
+ goto out;
-
- if (test->result != SUCCESS)
- goto err;
-
- goto out;
err:
err = -1;
out:
@@ -464,15 +536,12 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 0358814c67dc..80c42583f597 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -19,7 +19,7 @@
#include <bpf/libbpf.h>
#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
+#include "testing_helpers.h"
#include "bpf_util.h"
#ifndef ENOTSUPP
@@ -27,10 +27,14 @@
#endif
#define CG_PATH "/foo"
-#define CONNECT4_PROG_PATH "./connect4_prog.o"
-#define CONNECT6_PROG_PATH "./connect6_prog.o"
-#define SENDMSG4_PROG_PATH "./sendmsg4_prog.o"
-#define SENDMSG6_PROG_PATH "./sendmsg6_prog.o"
+#define CONNECT4_PROG_PATH "./connect4_prog.bpf.o"
+#define CONNECT6_PROG_PATH "./connect6_prog.bpf.o"
+#define SENDMSG4_PROG_PATH "./sendmsg4_prog.bpf.o"
+#define SENDMSG6_PROG_PATH "./sendmsg6_prog.bpf.o"
+#define RECVMSG4_PROG_PATH "./recvmsg4_prog.bpf.o"
+#define RECVMSG6_PROG_PATH "./recvmsg6_prog.bpf.o"
+#define BIND4_PROG_PATH "./bind4_prog.bpf.o"
+#define BIND6_PROG_PATH "./bind6_prog.bpf.o"
#define SERV4_IP "192.168.1.254"
#define SERV4_REWRITE_IP "127.0.0.1"
@@ -92,10 +96,10 @@ static int sendmsg_deny_prog_load(const struct sock_addr_test *test);
static int recvmsg_allow_prog_load(const struct sock_addr_test *test);
static int recvmsg_deny_prog_load(const struct sock_addr_test *test);
static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test);
+static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test);
static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test);
static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
-static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test);
+static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test);
static int sendmsg6_rw_c_prog_load(const struct sock_addr_test *test);
static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test);
static int sendmsg6_rw_wildcard_prog_load(const struct sock_addr_test *test);
@@ -571,8 +575,8 @@ static struct sock_addr_test tests[] = {
LOAD_REJECT,
},
{
- "recvmsg4: rewrite IP & port (asm)",
- recvmsg4_rw_asm_prog_load,
+ "recvmsg4: rewrite IP & port (C)",
+ recvmsg4_rw_c_prog_load,
BPF_CGROUP_UDP4_RECVMSG,
BPF_CGROUP_UDP4_RECVMSG,
AF_INET,
@@ -585,8 +589,8 @@ static struct sock_addr_test tests[] = {
SUCCESS,
},
{
- "recvmsg6: rewrite IP & port (asm)",
- recvmsg6_rw_asm_prog_load,
+ "recvmsg6: rewrite IP & port (C)",
+ recvmsg6_rw_c_prog_load,
BPF_CGROUP_UDP6_RECVMSG,
BPF_CGROUP_UDP6_RECVMSG,
AF_INET6,
@@ -641,17 +645,14 @@ static int mk_sockaddr(int domain, const char *ip, unsigned short port,
static int load_insns(const struct sock_addr_test *test,
const struct bpf_insn *insns, size_t insns_cnt)
{
- struct bpf_load_program_attr load_attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
int ret;
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- load_attr.expected_attach_type = test->expected_attach_type;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = "GPL";
+ opts.expected_attach_type = test->expected_attach_type;
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
- ret = bpf_load_program_xattr(&load_attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK_ADDR, NULL, "GPL", insns, insns_cnt, &opts);
if (ret < 0 && test->expected_result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
@@ -660,209 +661,48 @@ static int load_insns(const struct sock_addr_test *test,
return ret;
}
-/* [1] These testing programs try to read different context fields, including
- * narrow loads of different sizes from user_ip4 and user_ip6, and write to
- * those allowed to be overridden.
- *
- * [2] BPF_LD_IMM64 & BPF_JMP_REG are used below whenever there is a need to
- * compare a register with unsigned 32bit integer. BPF_JMP_IMM can't be used
- * in such cases since it accepts only _signed_ 32bit integer as IMM
- * argument. Also note that BPF_LD_IMM64 contains 2 instructions what matters
- * to count jumps properly.
- */
-
-static int bind4_prog_load(const struct sock_addr_test *test)
+static int load_path(const struct sock_addr_test *test, const char *path)
{
- union {
- uint8_t u4_addr8[4];
- uint16_t u4_addr16[2];
- uint32_t u4_addr32;
- } ip4, port;
- struct sockaddr_in addr4_rw;
-
- if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
- log_err("Invalid IPv4: %s", SERV4_IP);
- return -1;
- }
-
- port.u4_addr32 = htons(SERV4_PORT);
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ int err;
- if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
- (struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
+ obj = bpf_object__open_file(path, NULL);
+ err = libbpf_get_error(obj);
+ if (err) {
+ log_err(">>> Opening BPF object (%s) error.\n", path);
return -1;
+ }
- /* See [1]. */
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
-
- /* (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, type)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
- BPF_JMP_A(1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
-
- /* 1st_byte_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
-
- /* 2nd_byte_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4) + 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
-
- /* 3rd_byte_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
-
- /* 4th_byte_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4) + 3),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
-
- /* 1st_half_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
-
- /* 2nd_half_of_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4) + 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
-
- /* whole_user_ip4 == expected && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip4)),
- BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
-
- /* 1st_byte_of_user_port == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_port)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
-
- /* 1st_half_of_user_port == expected && */
- BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_port)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
-
- /* user_port == expected) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_port)),
- BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
-
- /* user_ip4 = addr4_rw.sin_addr */
- BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_addr.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_ip4)),
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog)
+ goto err_out;
- /* user_port = addr4_rw.sin_port */
- BPF_MOV32_IMM(BPF_REG_7, addr4_rw.sin_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
+ bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR);
+ bpf_program__set_expected_attach_type(prog, test->expected_attach_type);
+ bpf_program__set_flags(prog, testing_prog_flags());
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
+ err = bpf_object__load(obj);
+ if (err) {
+ if (test->expected_result != LOAD_REJECT)
+ log_err(">>> Loading program (%s) error.\n", path);
+ goto err_out;
+ }
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return bpf_program__fd(prog);
+err_out:
+ bpf_object__close(obj);
+ return -1;
}
-static int bind6_prog_load(const struct sock_addr_test *test)
+static int bind4_prog_load(const struct sock_addr_test *test)
{
- struct sockaddr_in6 addr6_rw;
- struct in6_addr ip6;
-
- if (inet_pton(AF_INET6, SERV6_IP, (void *)&ip6) != 1) {
- log_err("Invalid IPv6: %s", SERV6_IP);
- return -1;
- }
-
- if (mk_sockaddr(AF_INET6, SERV6_REWRITE_IP, SERV6_REWRITE_PORT,
- (struct sockaddr *)&addr6_rw, sizeof(addr6_rw)) == -1)
- return -1;
-
- /* See [1]. */
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET6 && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 18),
-
- /* 5th_byte_of_user_ip6 == expected && */
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip6[1])),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr[4], 16),
-
- /* 3rd_half_of_user_ip6 == expected && */
- BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip6[1])),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip6.s6_addr16[2], 14),
-
- /* last_word_of_user_ip6 == expected) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, user_ip6[3])),
- BPF_LD_IMM64(BPF_REG_8, ip6.s6_addr32[3]), /* See [2]. */
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 10),
-
-
-#define STORE_IPV6_WORD(N) \
- BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_addr.s6_addr32[N]), \
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7, \
- offsetof(struct bpf_sock_addr, user_ip6[N]))
-
- /* user_ip6 = addr6_rw.sin6_addr */
- STORE_IPV6_WORD(0),
- STORE_IPV6_WORD(1),
- STORE_IPV6_WORD(2),
- STORE_IPV6_WORD(3),
-
- /* user_port = addr6_rw.sin6_port */
- BPF_MOV32_IMM(BPF_REG_7, addr6_rw.sin6_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
-
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_path(test, BIND4_PROG_PATH);
}
-static int load_path(const struct sock_addr_test *test, const char *path)
+static int bind6_prog_load(const struct sock_addr_test *test)
{
- struct bpf_prog_load_attr attr;
- struct bpf_object *obj;
- int prog_fd;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = path;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR;
- attr.expected_attach_type = test->expected_attach_type;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
-
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
- if (test->expected_result != LOAD_REJECT)
- log_err(">>> Loading program (%s) error.\n", path);
- return -1;
- }
-
- return prog_fd;
+ return load_path(test, BIND6_PROG_PATH);
}
static int connect4_prog_load(const struct sock_addr_test *test)
@@ -883,7 +723,7 @@ static int xmsg_ret_only_prog_load(const struct sock_addr_test *test,
BPF_MOV64_IMM(BPF_REG_0, rc),
BPF_EXIT_INSN(),
};
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_insns(test, insns, ARRAY_SIZE(insns));
}
static int sendmsg_allow_prog_load(const struct sock_addr_test *test)
@@ -955,48 +795,12 @@ static int sendmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
BPF_EXIT_INSN(),
};
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_insns(test, insns, ARRAY_SIZE(insns));
}
-static int recvmsg4_rw_asm_prog_load(const struct sock_addr_test *test)
+static int recvmsg4_rw_c_prog_load(const struct sock_addr_test *test)
{
- struct sockaddr_in src4_rw_addr;
-
- if (mk_sockaddr(AF_INET, SERV4_IP, SERV4_PORT,
- (struct sockaddr *)&src4_rw_addr,
- sizeof(src4_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET && */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 6),
-
- /* sk.type == SOCK_DGRAM) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, type)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 4),
-
- /* user_ip4 = src4_rw_addr.sin_addr */
- BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_addr.s_addr),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_ip4)),
-
- /* user_port = src4_rw_addr.sin_port */
- BPF_MOV32_IMM(BPF_REG_7, src4_rw_addr.sin_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_path(test, RECVMSG4_PROG_PATH);
}
static int sendmsg4_rw_c_prog_load(const struct sock_addr_test *test)
@@ -1054,7 +858,7 @@ static int sendmsg6_rw_dst_asm_prog_load(const struct sock_addr_test *test,
BPF_EXIT_INSN(),
};
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_insns(test, insns, ARRAY_SIZE(insns));
}
static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
@@ -1062,37 +866,9 @@ static int sendmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
return sendmsg6_rw_dst_asm_prog_load(test, SERV6_REWRITE_IP);
}
-static int recvmsg6_rw_asm_prog_load(const struct sock_addr_test *test)
+static int recvmsg6_rw_c_prog_load(const struct sock_addr_test *test)
{
- struct sockaddr_in6 src6_rw_addr;
-
- if (mk_sockaddr(AF_INET6, SERV6_IP, SERV6_PORT,
- (struct sockaddr *)&src6_rw_addr,
- sizeof(src6_rw_addr)) == -1)
- return -1;
-
- struct bpf_insn insns[] = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
-
- /* if (sk.family == AF_INET6) { */
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
- offsetof(struct bpf_sock_addr, family)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET6, 10),
-
- STORE_IPV6(user_ip6, src6_rw_addr.sin6_addr.s6_addr32),
-
- /* user_port = dst6_rw_addr.sin6_port */
- BPF_MOV32_IMM(BPF_REG_7, src6_rw_addr.sin6_port),
- BPF_STX_MEM(BPF_W, BPF_REG_6, BPF_REG_7,
- offsetof(struct bpf_sock_addr, user_port)),
- /* } */
-
- /* return 1 */
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- };
-
- return load_insns(test, insns, sizeof(insns) / sizeof(struct bpf_insn));
+ return load_path(test, RECVMSG6_PROG_PATH);
}
static int sendmsg6_rw_v4mapped_prog_load(const struct sock_addr_test *test)
@@ -1638,15 +1414,12 @@ int main(int argc, char **argv)
exit(err);
}
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_sock_fields.c b/tools/testing/selftests/bpf/test_sock_fields.c
deleted file mode 100644
index f0fc103261a4..000000000000
--- a/tools/testing/selftests/bpf/test_sock_fields.c
+++ /dev/null
@@ -1,490 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* Copyright (c) 2019 Facebook */
-
-#include <sys/socket.h>
-#include <sys/epoll.h>
-#include <netinet/in.h>
-#include <arpa/inet.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <string.h>
-#include <errno.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "cgroup_helpers.h"
-#include "bpf_rlimit.h"
-
-enum bpf_addr_array_idx {
- ADDR_SRV_IDX,
- ADDR_CLI_IDX,
- __NR_BPF_ADDR_ARRAY_IDX,
-};
-
-enum bpf_result_array_idx {
- EGRESS_SRV_IDX,
- EGRESS_CLI_IDX,
- INGRESS_LISTEN_IDX,
- __NR_BPF_RESULT_ARRAY_IDX,
-};
-
-enum bpf_linum_array_idx {
- EGRESS_LINUM_IDX,
- INGRESS_LINUM_IDX,
- __NR_BPF_LINUM_ARRAY_IDX,
-};
-
-struct bpf_spinlock_cnt {
- struct bpf_spin_lock lock;
- __u32 cnt;
-};
-
-#define CHECK(condition, tag, format...) ({ \
- int __ret = !!(condition); \
- if (__ret) { \
- printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \
- printf(format); \
- printf("\n"); \
- exit(-1); \
- } \
-})
-
-#define TEST_CGROUP "/test-bpf-sock-fields"
-#define DATA "Hello BPF!"
-#define DATA_LEN sizeof(DATA)
-
-static struct sockaddr_in6 srv_sa6, cli_sa6;
-static int sk_pkt_out_cnt10_fd;
-static int sk_pkt_out_cnt_fd;
-static int linum_map_fd;
-static int addr_map_fd;
-static int tp_map_fd;
-static int sk_map_fd;
-
-static __u32 addr_srv_idx = ADDR_SRV_IDX;
-static __u32 addr_cli_idx = ADDR_CLI_IDX;
-
-static __u32 egress_srv_idx = EGRESS_SRV_IDX;
-static __u32 egress_cli_idx = EGRESS_CLI_IDX;
-static __u32 ingress_listen_idx = INGRESS_LISTEN_IDX;
-
-static __u32 egress_linum_idx = EGRESS_LINUM_IDX;
-static __u32 ingress_linum_idx = INGRESS_LINUM_IDX;
-
-static void init_loopback6(struct sockaddr_in6 *sa6)
-{
- memset(sa6, 0, sizeof(*sa6));
- sa6->sin6_family = AF_INET6;
- sa6->sin6_addr = in6addr_loopback;
-}
-
-static void print_sk(const struct bpf_sock *sk)
-{
- char src_ip4[24], dst_ip4[24];
- char src_ip6[64], dst_ip6[64];
-
- inet_ntop(AF_INET, &sk->src_ip4, src_ip4, sizeof(src_ip4));
- inet_ntop(AF_INET6, &sk->src_ip6, src_ip6, sizeof(src_ip6));
- inet_ntop(AF_INET, &sk->dst_ip4, dst_ip4, sizeof(dst_ip4));
- inet_ntop(AF_INET6, &sk->dst_ip6, dst_ip6, sizeof(dst_ip6));
-
- printf("state:%u bound_dev_if:%u family:%u type:%u protocol:%u mark:%u priority:%u "
- "src_ip4:%x(%s) src_ip6:%x:%x:%x:%x(%s) src_port:%u "
- "dst_ip4:%x(%s) dst_ip6:%x:%x:%x:%x(%s) dst_port:%u\n",
- sk->state, sk->bound_dev_if, sk->family, sk->type, sk->protocol,
- sk->mark, sk->priority,
- sk->src_ip4, src_ip4,
- sk->src_ip6[0], sk->src_ip6[1], sk->src_ip6[2], sk->src_ip6[3],
- src_ip6, sk->src_port,
- sk->dst_ip4, dst_ip4,
- sk->dst_ip6[0], sk->dst_ip6[1], sk->dst_ip6[2], sk->dst_ip6[3],
- dst_ip6, ntohs(sk->dst_port));
-}
-
-static void print_tp(const struct bpf_tcp_sock *tp)
-{
- printf("snd_cwnd:%u srtt_us:%u rtt_min:%u snd_ssthresh:%u rcv_nxt:%u "
- "snd_nxt:%u snd:una:%u mss_cache:%u ecn_flags:%u "
- "rate_delivered:%u rate_interval_us:%u packets_out:%u "
- "retrans_out:%u total_retrans:%u segs_in:%u data_segs_in:%u "
- "segs_out:%u data_segs_out:%u lost_out:%u sacked_out:%u "
- "bytes_received:%llu bytes_acked:%llu\n",
- tp->snd_cwnd, tp->srtt_us, tp->rtt_min, tp->snd_ssthresh,
- tp->rcv_nxt, tp->snd_nxt, tp->snd_una, tp->mss_cache,
- tp->ecn_flags, tp->rate_delivered, tp->rate_interval_us,
- tp->packets_out, tp->retrans_out, tp->total_retrans,
- tp->segs_in, tp->data_segs_in, tp->segs_out,
- tp->data_segs_out, tp->lost_out, tp->sacked_out,
- tp->bytes_received, tp->bytes_acked);
-}
-
-static void check_result(void)
-{
- struct bpf_tcp_sock srv_tp, cli_tp, listen_tp;
- struct bpf_sock srv_sk, cli_sk, listen_sk;
- __u32 ingress_linum, egress_linum;
- int err;
-
- err = bpf_map_lookup_elem(linum_map_fd, &egress_linum_idx,
- &egress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(linum_map_fd, &ingress_linum_idx,
- &ingress_linum);
- CHECK(err == -1, "bpf_map_lookup_elem(linum_map_fd)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx, &srv_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx, &srv_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_srv_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx, &cli_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx, &cli_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &egress_cli_idx)",
- "err:%d errno:%d", err, errno);
-
- err = bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx, &listen_sk);
- CHECK(err == -1, "bpf_map_lookup_elem(sk_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
- err = bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx, &listen_tp);
- CHECK(err == -1, "bpf_map_lookup_elem(tp_map_fd, &ingress_listen_idx)",
- "err:%d errno:%d", err, errno);
-
- printf("listen_sk: ");
- print_sk(&listen_sk);
- printf("\n");
-
- printf("srv_sk: ");
- print_sk(&srv_sk);
- printf("\n");
-
- printf("cli_sk: ");
- print_sk(&cli_sk);
- printf("\n");
-
- printf("listen_tp: ");
- print_tp(&listen_tp);
- printf("\n");
-
- printf("srv_tp: ");
- print_tp(&srv_tp);
- printf("\n");
-
- printf("cli_tp: ");
- print_tp(&cli_tp);
- printf("\n");
-
- CHECK(listen_sk.state != 10 ||
- listen_sk.family != AF_INET6 ||
- listen_sk.protocol != IPPROTO_TCP ||
- memcmp(listen_sk.src_ip6, &in6addr_loopback,
- sizeof(listen_sk.src_ip6)) ||
- listen_sk.dst_ip6[0] || listen_sk.dst_ip6[1] ||
- listen_sk.dst_ip6[2] || listen_sk.dst_ip6[3] ||
- listen_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- listen_sk.dst_port,
- "Unexpected listen_sk",
- "Check listen_sk output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_sk.state == 10 ||
- !srv_sk.state ||
- srv_sk.family != AF_INET6 ||
- srv_sk.protocol != IPPROTO_TCP ||
- memcmp(srv_sk.src_ip6, &in6addr_loopback,
- sizeof(srv_sk.src_ip6)) ||
- memcmp(srv_sk.dst_ip6, &in6addr_loopback,
- sizeof(srv_sk.dst_ip6)) ||
- srv_sk.src_port != ntohs(srv_sa6.sin6_port) ||
- srv_sk.dst_port != cli_sa6.sin6_port,
- "Unexpected srv_sk", "Check srv_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_sk.state == 10 ||
- !cli_sk.state ||
- cli_sk.family != AF_INET6 ||
- cli_sk.protocol != IPPROTO_TCP ||
- memcmp(cli_sk.src_ip6, &in6addr_loopback,
- sizeof(cli_sk.src_ip6)) ||
- memcmp(cli_sk.dst_ip6, &in6addr_loopback,
- sizeof(cli_sk.dst_ip6)) ||
- cli_sk.src_port != ntohs(cli_sa6.sin6_port) ||
- cli_sk.dst_port != srv_sa6.sin6_port,
- "Unexpected cli_sk", "Check cli_sk output. egress_linum:%u",
- egress_linum);
-
- CHECK(listen_tp.data_segs_out ||
- listen_tp.data_segs_in ||
- listen_tp.total_retrans ||
- listen_tp.bytes_acked,
- "Unexpected listen_tp", "Check listen_tp output. ingress_linum:%u",
- ingress_linum);
-
- CHECK(srv_tp.data_segs_out != 2 ||
- srv_tp.data_segs_in ||
- srv_tp.snd_cwnd != 10 ||
- srv_tp.total_retrans ||
- srv_tp.bytes_acked != 2 * DATA_LEN,
- "Unexpected srv_tp", "Check srv_tp output. egress_linum:%u",
- egress_linum);
-
- CHECK(cli_tp.data_segs_out ||
- cli_tp.data_segs_in != 2 ||
- cli_tp.snd_cwnd != 10 ||
- cli_tp.total_retrans ||
- cli_tp.bytes_received != 2 * DATA_LEN,
- "Unexpected cli_tp", "Check cli_tp output. egress_linum:%u",
- egress_linum);
-}
-
-static void check_sk_pkt_out_cnt(int accept_fd, int cli_fd)
-{
- struct bpf_spinlock_cnt pkt_out_cnt = {}, pkt_out_cnt10 = {};
- int err;
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &accept_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &accept_fd,
- &pkt_out_cnt10);
-
- /* The bpf prog only counts for fullsock and
- * passive conneciton did not become fullsock until 3WHS
- * had been finished.
- * The bpf prog only counted two data packet out but we
- * specially init accept_fd's pkt_out_cnt by 2 in
- * init_sk_storage(). Hence, 4 here.
- */
- CHECK(err || pkt_out_cnt.cnt != 4 || pkt_out_cnt10.cnt != 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &accept_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-
- pkt_out_cnt.cnt = ~0;
- pkt_out_cnt10.cnt = ~0;
- err = bpf_map_lookup_elem(sk_pkt_out_cnt_fd, &cli_fd, &pkt_out_cnt);
- if (!err)
- err = bpf_map_lookup_elem(sk_pkt_out_cnt10_fd, &cli_fd,
- &pkt_out_cnt10);
- /* Active connection is fullsock from the beginning.
- * 1 SYN and 1 ACK during 3WHS
- * 2 Acks on data packet.
- *
- * The bpf_prog initialized it to 0xeB9F.
- */
- CHECK(err || pkt_out_cnt.cnt != 0xeB9F + 4 ||
- pkt_out_cnt10.cnt != 0xeB9F + 40,
- "bpf_map_lookup_elem(sk_pkt_out_cnt, &cli_fd)",
- "err:%d errno:%d pkt_out_cnt:%u pkt_out_cnt10:%u",
- err, errno, pkt_out_cnt.cnt, pkt_out_cnt10.cnt);
-}
-
-static void init_sk_storage(int sk_fd, __u32 pkt_out_cnt)
-{
- struct bpf_spinlock_cnt scnt = {};
- int err;
-
- scnt.cnt = pkt_out_cnt;
- err = bpf_map_update_elem(sk_pkt_out_cnt_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt_fd)",
- "err:%d errno:%d", err, errno);
-
- scnt.cnt *= 10;
- err = bpf_map_update_elem(sk_pkt_out_cnt10_fd, &sk_fd, &scnt,
- BPF_NOEXIST);
- CHECK(err, "bpf_map_update_elem(sk_pkt_out_cnt10_fd)",
- "err:%d errno:%d", err, errno);
-}
-
-static void test(void)
-{
- int listen_fd, cli_fd, accept_fd, epfd, err;
- struct epoll_event ev;
- socklen_t addrlen;
- int i;
-
- addrlen = sizeof(struct sockaddr_in6);
- ev.events = EPOLLIN;
-
- epfd = epoll_create(1);
- CHECK(epfd == -1, "epoll_create()", "epfd:%d errno:%d", epfd, errno);
-
- /* Prepare listen_fd */
- listen_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(listen_fd == -1, "socket()", "listen_fd:%d errno:%d",
- listen_fd, errno);
-
- init_loopback6(&srv_sa6);
- err = bind(listen_fd, (struct sockaddr *)&srv_sa6, sizeof(srv_sa6));
- CHECK(err, "bind(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(listen_fd, (struct sockaddr *)&srv_sa6, &addrlen);
- CHECK(err, "getsockname(listen_fd)", "err:%d errno:%d", err, errno);
-
- err = listen(listen_fd, 1);
- CHECK(err, "listen(listen_fd)", "err:%d errno:%d", err, errno);
-
- /* Prepare cli_fd */
- cli_fd = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
- CHECK(cli_fd == -1, "socket()", "cli_fd:%d errno:%d", cli_fd, errno);
-
- init_loopback6(&cli_sa6);
- err = bind(cli_fd, (struct sockaddr *)&cli_sa6, sizeof(cli_sa6));
- CHECK(err, "bind(cli_fd)", "err:%d errno:%d", err, errno);
-
- err = getsockname(cli_fd, (struct sockaddr *)&cli_sa6, &addrlen);
- CHECK(err, "getsockname(cli_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Update addr_map with srv_sa6 and cli_sa6 */
- err = bpf_map_update_elem(addr_map_fd, &addr_srv_idx, &srv_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- err = bpf_map_update_elem(addr_map_fd, &addr_cli_idx, &cli_sa6, 0);
- CHECK(err, "map_update", "err:%d errno:%d", err, errno);
-
- /* Connect from cli_sa6 to srv_sa6 */
- err = connect(cli_fd, (struct sockaddr *)&srv_sa6, addrlen);
- printf("srv_sa6.sin6_port:%u cli_sa6.sin6_port:%u\n\n",
- ntohs(srv_sa6.sin6_port), ntohs(cli_sa6.sin6_port));
- CHECK(err && errno != EINPROGRESS,
- "connect(cli_fd)", "err:%d errno:%d", err, errno);
-
- ev.data.fd = listen_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, listen_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, listen_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Accept the connection */
- /* Have some timeout in accept(listen_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != listen_fd,
- "epoll_wait(listen_fd)",
- "err:%d errno:%d ev.data.fd:%d listen_fd:%d",
- err, errno, ev.data.fd, listen_fd);
-
- accept_fd = accept(listen_fd, NULL, NULL);
- CHECK(accept_fd == -1, "accept(listen_fd)", "accept_fd:%d errno:%d",
- accept_fd, errno);
- close(listen_fd);
-
- ev.data.fd = cli_fd;
- err = epoll_ctl(epfd, EPOLL_CTL_ADD, cli_fd, &ev);
- CHECK(err, "epoll_ctl(EPOLL_CTL_ADD, cli_fd)", "err:%d errno:%d",
- err, errno);
-
- init_sk_storage(accept_fd, 2);
-
- for (i = 0; i < 2; i++) {
- /* Send some data from accept_fd to cli_fd */
- err = send(accept_fd, DATA, DATA_LEN, 0);
- CHECK(err != DATA_LEN, "send(accept_fd)", "err:%d errno:%d",
- err, errno);
-
- /* Have some timeout in recv(cli_fd). Just in case. */
- err = epoll_wait(epfd, &ev, 1, 1000);
- CHECK(err != 1 || ev.data.fd != cli_fd,
- "epoll_wait(cli_fd)", "err:%d errno:%d ev.data.fd:%d cli_fd:%d",
- err, errno, ev.data.fd, cli_fd);
-
- err = recv(cli_fd, NULL, 0, MSG_TRUNC);
- CHECK(err, "recv(cli_fd)", "err:%d errno:%d", err, errno);
- }
-
- check_sk_pkt_out_cnt(accept_fd, cli_fd);
-
- close(epfd);
- close(accept_fd);
- close(cli_fd);
-
- check_result();
-}
-
-int main(int argc, char **argv)
-{
- struct bpf_prog_load_attr attr = {
- .file = "test_sock_fields_kern.o",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .prog_flags = BPF_F_TEST_RND_HI32,
- };
- int cgroup_fd, egress_fd, ingress_fd, err;
- struct bpf_program *ingress_prog;
- struct bpf_object *obj;
- struct bpf_map *map;
-
- err = setup_cgroup_environment();
- CHECK(err, "setup_cgroup_environment()", "err:%d errno:%d",
- err, errno);
-
- atexit(cleanup_cgroup_environment);
-
- /* Create a cgroup, get fd, and join it */
- cgroup_fd = create_and_get_cgroup(TEST_CGROUP);
- CHECK(cgroup_fd == -1, "create_and_get_cgroup()",
- "cgroup_fd:%d errno:%d", cgroup_fd, errno);
-
- err = join_cgroup(TEST_CGROUP);
- CHECK(err, "join_cgroup", "err:%d errno:%d", err, errno);
-
- err = bpf_prog_load_xattr(&attr, &obj, &egress_fd);
- CHECK(err, "bpf_prog_load_xattr()", "err:%d", err);
-
- ingress_prog = bpf_object__find_program_by_title(obj,
- "cgroup_skb/ingress");
- CHECK(!ingress_prog,
- "bpf_object__find_program_by_title(cgroup_skb/ingress)",
- "not found");
- ingress_fd = bpf_program__fd(ingress_prog);
-
- err = bpf_prog_attach(egress_fd, cgroup_fd, BPF_CGROUP_INET_EGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_EGRESS)",
- "err:%d errno%d", err, errno);
-
- err = bpf_prog_attach(ingress_fd, cgroup_fd,
- BPF_CGROUP_INET_INGRESS, 0);
- CHECK(err == -1, "bpf_prog_attach(CPF_CGROUP_INET_INGRESS)",
- "err:%d errno%d", err, errno);
- close(cgroup_fd);
-
- map = bpf_object__find_map_by_name(obj, "addr_map");
- CHECK(!map, "cannot find addr_map", "(null)");
- addr_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sock_result_map");
- CHECK(!map, "cannot find sock_result_map", "(null)");
- sk_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "tcp_sock_result_map");
- CHECK(!map, "cannot find tcp_sock_result_map", "(null)");
- tp_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "linum_map");
- CHECK(!map, "cannot find linum_map", "(null)");
- linum_map_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt");
- CHECK(!map, "cannot find sk_pkt_out_cnt", "(null)");
- sk_pkt_out_cnt_fd = bpf_map__fd(map);
-
- map = bpf_object__find_map_by_name(obj, "sk_pkt_out_cnt10");
- CHECK(!map, "cannot find sk_pkt_out_cnt10", "(null)");
- sk_pkt_out_cnt10_fd = bpf_map__fd(map);
-
- test();
-
- bpf_object__close(obj);
- cleanup_cgroup_environment();
-
- printf("PASS\n");
-
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_socket_cookie.c b/tools/testing/selftests/bpf/test_socket_cookie.c
deleted file mode 100644
index 15653b0e26eb..000000000000
--- a/tools/testing/selftests/bpf/test_socket_cookie.c
+++ /dev/null
@@ -1,214 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-// Copyright (c) 2018 Facebook
-
-#include <string.h>
-#include <unistd.h>
-
-#include <arpa/inet.h>
-#include <netinet/in.h>
-#include <sys/types.h>
-#include <sys/socket.h>
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "cgroup_helpers.h"
-
-#define CG_PATH "/foo"
-#define SOCKET_COOKIE_PROG "./socket_cookie_prog.o"
-
-struct socket_cookie {
- __u64 cookie_key;
- __u32 cookie_value;
-};
-
-static int start_server(void)
-{
- struct sockaddr_in6 addr;
- int fd;
-
- fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create server socket");
- goto out;
- }
-
- memset(&addr, 0, sizeof(addr));
- addr.sin6_family = AF_INET6;
- addr.sin6_addr = in6addr_loopback;
- addr.sin6_port = 0;
-
- if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) == -1) {
- log_err("Failed to bind server socket");
- goto close_out;
- }
-
- if (listen(fd, 128) == -1) {
- log_err("Failed to listen on server socket");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int connect_to_server(int server_fd)
-{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
- int fd;
-
- fd = socket(AF_INET6, SOCK_STREAM, 0);
- if (fd == -1) {
- log_err("Failed to create client socket");
- goto out;
- }
-
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto close_out;
- }
-
- if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
- log_err("Fail to connect to server");
- goto close_out;
- }
-
- goto out;
-
-close_out:
- close(fd);
- fd = -1;
-out:
- return fd;
-}
-
-static int validate_map(struct bpf_map *map, int client_fd)
-{
- __u32 cookie_expected_value;
- struct sockaddr_in6 addr;
- socklen_t len = sizeof(addr);
- struct socket_cookie val;
- int err = 0;
- int map_fd;
-
- if (!map) {
- log_err("Map not found in BPF object");
- goto err;
- }
-
- map_fd = bpf_map__fd(map);
-
- err = bpf_map_lookup_elem(map_fd, &client_fd, &val);
-
- err = getsockname(client_fd, (struct sockaddr *)&addr, &len);
- if (err) {
- log_err("Can't get client local addr");
- goto out;
- }
-
- cookie_expected_value = (ntohs(addr.sin6_port) << 8) | 0xFF;
- if (val.cookie_value != cookie_expected_value) {
- log_err("Unexpected value in map: %x != %x", val.cookie_value,
- cookie_expected_value);
- goto err;
- }
-
- goto out;
-err:
- err = -1;
-out:
- return err;
-}
-
-static int run_test(int cgfd)
-{
- enum bpf_attach_type attach_type;
- struct bpf_prog_load_attr attr;
- struct bpf_program *prog;
- struct bpf_object *pobj;
- const char *prog_name;
- int server_fd = -1;
- int client_fd = -1;
- int prog_fd = -1;
- int err = 0;
-
- memset(&attr, 0, sizeof(attr));
- attr.file = SOCKET_COOKIE_PROG;
- attr.prog_type = BPF_PROG_TYPE_UNSPEC;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
-
- err = bpf_prog_load_xattr(&attr, &pobj, &prog_fd);
- if (err) {
- log_err("Failed to load %s", attr.file);
- goto out;
- }
-
- bpf_object__for_each_program(prog, pobj) {
- prog_name = bpf_program__title(prog, /*needs_copy*/ false);
-
- if (libbpf_attach_type_by_name(prog_name, &attach_type))
- goto err;
-
- err = bpf_prog_attach(bpf_program__fd(prog), cgfd, attach_type,
- BPF_F_ALLOW_OVERRIDE);
- if (err) {
- log_err("Failed to attach prog %s", prog_name);
- goto out;
- }
- }
-
- server_fd = start_server();
- if (server_fd == -1)
- goto err;
-
- client_fd = connect_to_server(server_fd);
- if (client_fd == -1)
- goto err;
-
- if (validate_map(bpf_map__next(NULL, pobj), client_fd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(client_fd);
- close(server_fd);
- bpf_object__close(pobj);
- printf("%s\n", err ? "FAILED" : "PASSED");
- return err;
-}
-
-int main(int argc, char **argv)
-{
- int cgfd = -1;
- int err = 0;
-
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
- if (cgfd < 0)
- goto err;
-
- if (join_cgroup(CG_PATH))
- goto err;
-
- if (run_test(cgfd))
- goto err;
-
- goto out;
-err:
- err = -1;
-out:
- close(cgfd);
- cleanup_cgroup_environment();
- return err;
-}
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 78789b27e573..024a0faafb3b 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -18,7 +18,6 @@
#include <sched.h>
#include <sys/time.h>
-#include <sys/resource.h>
#include <sys/types.h>
#include <sys/sendfile.h>
@@ -37,7 +36,6 @@
#include <bpf/libbpf.h>
#include "bpf_util.h"
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
int running;
@@ -54,8 +52,8 @@ static void running_handler(int a);
#define S1_PORT 10000
#define S2_PORT 10001
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
-#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
+#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.bpf.o"
+#define BPF_SOCKHASH_FILENAME "test_sockhash_kern.bpf.o"
#define CG_PATH "/sockmap"
/* global sockets */
@@ -86,6 +84,7 @@ int txmsg_ktls_skb_redir;
int ktls;
int peek_flag;
int skb_use_parser;
+int txmsg_omit_skb_parser;
static const struct option long_options[] = {
{"help", no_argument, NULL, 'h' },
@@ -111,6 +110,7 @@ static const struct option long_options[] = {
{"txmsg_redir_skb", no_argument, &txmsg_redir_skb, 1 },
{"ktls", no_argument, &ktls, 1 },
{"peek", no_argument, &peek_flag, 1 },
+ {"txmsg_omit_skb_parser", no_argument, &txmsg_omit_skb_parser, 1},
{"whitelist", required_argument, NULL, 'n' },
{"blacklist", required_argument, NULL, 'b' },
{0, 0, NULL, 0 }
@@ -137,6 +137,8 @@ struct sockmap_options {
bool sendpage;
bool data_test;
bool drop_expected;
+ bool check_recved_len;
+ bool tx_wait_mem;
int iov_count;
int iov_length;
int rate;
@@ -175,6 +177,7 @@ static void test_reset(void)
txmsg_apply = txmsg_cork = 0;
txmsg_ingress = txmsg_redir_skb = 0;
txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
+ txmsg_omit_skb_parser = 0;
skb_use_parser = 0;
}
@@ -518,28 +521,13 @@ static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
if (i == 0 && txmsg_ktls_skb) {
if (msg->msg_iov[i].iov_len < 4)
return -EIO;
- if (txmsg_ktls_skb_redir) {
- if (memcmp(&d[13], "PASS", 4) != 0) {
- fprintf(stderr,
- "detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
- return -EIO;
- }
- d[13] = 0;
- d[14] = 1;
- d[15] = 2;
- d[16] = 3;
- j = 13;
- } else if (txmsg_ktls_skb) {
- if (memcmp(d, "PASS", 4) != 0) {
- fprintf(stderr,
- "detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
- return -EIO;
- }
- d[0] = 0;
- d[1] = 1;
- d[2] = 2;
- d[3] = 3;
+ if (memcmp(d, "PASS", 4) != 0) {
+ fprintf(stderr,
+ "detected skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n",
+ i, 0, d[0], d[1], d[2], d[3]);
+ return -EIO;
}
+ j = 4; /* advance index past PASS header */
}
for (; j < msg->msg_iov[i].iov_len && size; j++) {
@@ -568,8 +556,12 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
int err, i, flags = MSG_NOSIGNAL;
bool drop = opt->drop_expected;
bool data = opt->data_test;
+ int iov_alloc_length = iov_length;
+
+ if (!tx && opt->check_recved_len)
+ iov_alloc_length *= 2;
- err = msg_alloc_iov(&msg, iov_count, iov_length, data, tx);
+ err = msg_alloc_iov(&msg, iov_count, iov_alloc_length, data, tx);
if (err)
goto out_errno;
if (peek_flag) {
@@ -587,6 +579,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
sent = sendmsg(fd, &msg, flags);
if (!drop && sent < 0) {
+ if (opt->tx_wait_mem && errno == EACCES) {
+ errno = 0;
+ goto out_errno;
+ }
perror("sendmsg loop error");
goto out_errno;
} else if (drop && sent >= 0) {
@@ -653,6 +649,15 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
goto out_errno;
}
+ if (opt->tx_wait_mem) {
+ FD_ZERO(&w);
+ FD_SET(fd, &w);
+ slct = select(max_fd + 1, NULL, NULL, &w, &timeout);
+ errno = 0;
+ close(fd);
+ goto out_errno;
+ }
+
errno = 0;
if (peek_flag) {
flags |= MSG_PEEK;
@@ -677,6 +682,13 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
s->bytes_recvd += recv;
+ if (opt->check_recved_len && s->bytes_recvd > total_bytes) {
+ errno = EMSGSIZE;
+ fprintf(stderr, "recv failed(), bytes_recvd:%zd, total_bytes:%f\n",
+ s->bytes_recvd, total_bytes);
+ goto out_errno;
+ }
+
if (data) {
int chunk_sz = opt->sendpage ?
iov_length * cnt :
@@ -744,7 +756,7 @@ static int sendmsg_test(struct sockmap_options *opt)
* socket is not a valid test. So in this case lets not
* enable kTLS but still run the test.
*/
- if (!txmsg_redir || (txmsg_redir && txmsg_ingress)) {
+ if (!txmsg_redir || txmsg_ingress) {
err = sockmap_init_ktls(opt->verbose, rx_fd);
if (err)
return err;
@@ -754,9 +766,26 @@ static int sendmsg_test(struct sockmap_options *opt)
return err;
}
+ if (opt->tx_wait_mem) {
+ struct timeval timeout;
+ int rxtx_buf_len = 1024;
+
+ timeout.tv_sec = 3;
+ timeout.tv_usec = 0;
+
+ err = setsockopt(c2, SOL_SOCKET, SO_SNDTIMEO, &timeout, sizeof(struct timeval));
+ err |= setsockopt(c2, SOL_SOCKET, SO_SNDBUFFORCE, &rxtx_buf_len, sizeof(int));
+ err |= setsockopt(p2, SOL_SOCKET, SO_RCVBUFFORCE, &rxtx_buf_len, sizeof(int));
+ if (err) {
+ perror("setsockopt failed()");
+ return errno;
+ }
+ }
+
rxpid = fork();
if (rxpid == 0) {
- iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
+ if (txmsg_pop || txmsg_start_pop)
+ iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
if (opt->drop_expected || txmsg_ktls_skb_drop)
_exit(0);
@@ -789,6 +818,9 @@ static int sendmsg_test(struct sockmap_options *opt)
return errno;
}
+ if (opt->tx_wait_mem)
+ close(c2);
+
txpid = fork();
if (txpid == 0) {
if (opt->sendpage)
@@ -927,13 +959,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
goto run;
/* Attach programs to sockmap */
- err = bpf_prog_attach(prog_fd[0], map_fd[0],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[0], err, strerror(errno));
- return err;
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[0],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[0], err, strerror(errno));
+ return err;
+ }
}
err = bpf_prog_attach(prog_fd[1], map_fd[0],
@@ -946,13 +980,15 @@ static int run_options(struct sockmap_options *options, int cg_fd, int test)
/* Attach programs to TLS sockmap */
if (txmsg_ktls_skb) {
- err = bpf_prog_attach(prog_fd[0], map_fd[8],
- BPF_SK_SKB_STREAM_PARSER, 0);
- if (err) {
- fprintf(stderr,
- "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
- prog_fd[0], map_fd[8], err, strerror(errno));
- return err;
+ if (!txmsg_omit_skb_parser) {
+ err = bpf_prog_attach(prog_fd[0], map_fd[8],
+ BPF_SK_SKB_STREAM_PARSER, 0);
+ if (err) {
+ fprintf(stderr,
+ "ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+ prog_fd[0], map_fd[8], err, strerror(errno));
+ return err;
+ }
}
err = bpf_prog_attach(prog_fd[2], map_fd[8],
@@ -1281,6 +1317,16 @@ static char *test_to_str(int test)
return "unknown";
}
+static void append_str(char *dst, const char *src, size_t dst_cap)
+{
+ size_t avail = dst_cap - strlen(dst);
+
+ if (avail <= 1) /* just zero byte could be written */
+ return;
+
+ strncat(dst, src, avail - 1); /* strncat() adds + 1 for zero byte */
+}
+
#define OPTSTRING 60
static void test_options(char *options)
{
@@ -1289,42 +1335,42 @@ static void test_options(char *options)
memset(options, 0, OPTSTRING);
if (txmsg_pass)
- strncat(options, "pass,", OPTSTRING);
+ append_str(options, "pass,", OPTSTRING);
if (txmsg_redir)
- strncat(options, "redir,", OPTSTRING);
+ append_str(options, "redir,", OPTSTRING);
if (txmsg_drop)
- strncat(options, "drop,", OPTSTRING);
+ append_str(options, "drop,", OPTSTRING);
if (txmsg_apply) {
snprintf(tstr, OPTSTRING, "apply %d,", txmsg_apply);
- strncat(options, tstr, OPTSTRING);
+ append_str(options, tstr, OPTSTRING);
}
if (txmsg_cork) {
snprintf(tstr, OPTSTRING, "cork %d,", txmsg_cork);
- strncat(options, tstr, OPTSTRING);
+ append_str(options, tstr, OPTSTRING);
}
if (txmsg_start) {
snprintf(tstr, OPTSTRING, "start %d,", txmsg_start);
- strncat(options, tstr, OPTSTRING);
+ append_str(options, tstr, OPTSTRING);
}
if (txmsg_end) {
snprintf(tstr, OPTSTRING, "end %d,", txmsg_end);
- strncat(options, tstr, OPTSTRING);
+ append_str(options, tstr, OPTSTRING);
}
if (txmsg_start_pop) {
snprintf(tstr, OPTSTRING, "pop (%d,%d),",
txmsg_start_pop, txmsg_start_pop + txmsg_pop);
- strncat(options, tstr, OPTSTRING);
+ append_str(options, tstr, OPTSTRING);
}
if (txmsg_ingress)
- strncat(options, "ingress,", OPTSTRING);
+ append_str(options, "ingress,", OPTSTRING);
if (txmsg_redir_skb)
- strncat(options, "redir_skb,", OPTSTRING);
+ append_str(options, "redir_skb,", OPTSTRING);
if (txmsg_ktls_skb)
- strncat(options, "ktls_skb,", OPTSTRING);
+ append_str(options, "ktls_skb,", OPTSTRING);
if (ktls)
- strncat(options, "ktls,", OPTSTRING);
+ append_str(options, "ktls,", OPTSTRING);
if (peek_flag)
- strncat(options, "peek,", OPTSTRING);
+ append_str(options, "peek,", OPTSTRING);
}
static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
@@ -1439,6 +1485,14 @@ static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
test_send(opt, cgrp);
}
+static void test_txmsg_redir_wait_sndmem(int cgrp, struct sockmap_options *opt)
+{
+ txmsg_redir = 1;
+ opt->tx_wait_mem = true;
+ test_send_large(opt, cgrp);
+ opt->tx_wait_mem = false;
+}
+
static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
{
txmsg_drop = 1;
@@ -1480,12 +1534,29 @@ static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
txmsg_ktls_skb_drop = 0;
txmsg_ktls_skb_redir = 1;
test_exec(cgrp, opt);
+ txmsg_ktls_skb_redir = 0;
+
+ /* Tests that omit skb_parser */
+ txmsg_omit_skb_parser = 1;
+ ktls = 0;
+ txmsg_ktls_skb = 0;
+ test_exec(cgrp, opt);
+
+ txmsg_ktls_skb_drop = 1;
+ test_exec(cgrp, opt);
+ txmsg_ktls_skb_drop = 0;
+
+ txmsg_ktls_skb_redir = 1;
+ test_exec(cgrp, opt);
+
+ ktls = 1;
+ test_exec(cgrp, opt);
+ txmsg_omit_skb_parser = 0;
opt->data_test = data;
ktls = k;
}
-
/* Test cork with hung data. This tests poor usage patterns where
* cork can leave data on the ring if user program is buggy and
* doesn't flush them somehow. They do take some time however
@@ -1619,24 +1690,42 @@ static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
{
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
+ txmsg_apply = 1;
+ txmsg_cork = 0;
+ test_send_one(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 0;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1;
txmsg_cork = 0;
test_send_one(opt, cgrp);
txmsg_pass = 1;
txmsg_redir = 0;
+ txmsg_ingress = 0;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
txmsg_pass = 0;
txmsg_redir = 1;
+ txmsg_ingress = 0;
+ txmsg_apply = 1024;
+ txmsg_cork = 0;
+ test_send_large(opt, cgrp);
+
+ txmsg_pass = 0;
+ txmsg_redir = 1;
+ txmsg_ingress = 1;
txmsg_apply = 1024;
txmsg_cork = 0;
test_send_large(opt, cgrp);
@@ -1661,12 +1750,27 @@ static void test_txmsg_ingress_parser(int cgrp, struct sockmap_options *opt)
{
txmsg_pass = 1;
skb_use_parser = 512;
+ if (ktls == 1)
+ skb_use_parser = 570;
opt->iov_length = 256;
opt->iov_count = 1;
opt->rate = 2;
test_exec(cgrp, opt);
}
+static void test_txmsg_ingress_parser2(int cgrp, struct sockmap_options *opt)
+{
+ if (ktls == 1)
+ return;
+ skb_use_parser = 10;
+ opt->iov_length = 20;
+ opt->iov_count = 1;
+ opt->rate = 1;
+ opt->check_recved_len = true;
+ test_exec(cgrp, opt);
+ opt->check_recved_len = false;
+}
+
char *map_names[] = {
"sock_map",
"sock_map_txmsg",
@@ -1739,7 +1843,7 @@ static int populate_progs(char *bpf_file)
i++;
}
- for (i = 0; i < sizeof(map_fd)/sizeof(int); i++) {
+ for (i = 0; i < ARRAY_SIZE(map_fd); i++) {
maps[i] = bpf_object__find_map_by_name(obj, map_names[i]);
map_fd[i] = bpf_map__fd(maps[i]);
if (map_fd[i] < 0) {
@@ -1755,6 +1859,7 @@ static int populate_progs(char *bpf_file)
struct _test test[] = {
{"txmsg test passthrough", test_txmsg_pass},
{"txmsg test redirect", test_txmsg_redir},
+ {"txmsg test redirect wait send mem", test_txmsg_redir_wait_sndmem},
{"txmsg test drop", test_txmsg_drop},
{"txmsg test ingress redirect", test_txmsg_ingress_redir},
{"txmsg test skb", test_txmsg_skb},
@@ -1765,7 +1870,8 @@ struct _test test[] = {
{"txmsg test pull-data", test_txmsg_pull},
{"txmsg test pop-data", test_txmsg_pop},
{"txmsg test push/pop data", test_txmsg_push_pop},
- {"txmsg text ingress parser", test_txmsg_ingress_parser},
+ {"txmsg test ingress parser", test_txmsg_ingress_parser},
+ {"txmsg test ingress parser2", test_txmsg_ingress_parser2},
};
static int check_whitelist(struct _test *t, struct sockmap_options *opt)
@@ -1819,7 +1925,7 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
}
/* Tests basic commands and APIs */
- for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+ for (i = 0; i < ARRAY_SIZE(test); i++) {
struct _test t = test[i];
if (check_whitelist(&t, opt) != 0)
@@ -1963,26 +2069,15 @@ int main(int argc, char **argv)
}
if (!cg_fd) {
- if (setup_cgroup_environment()) {
- fprintf(stderr, "ERROR: cgroup env failed\n");
- return -EINVAL;
- }
-
- cg_fd = create_and_get_cgroup(CG_PATH);
- if (cg_fd < 0) {
- fprintf(stderr,
- "ERROR: (%i) open cg path failed: %s\n",
- cg_fd, strerror(errno));
+ cg_fd = cgroup_setup_and_join(CG_PATH);
+ if (cg_fd < 0)
return cg_fd;
- }
-
- if (join_cgroup(CG_PATH)) {
- fprintf(stderr, "ERROR: failed to join cgroup\n");
- return -EINVAL;
- }
cg_created = 1;
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
if (test == SELFTESTS) {
err = test_selftest(cg_fd, &options);
goto out;
diff --git a/tools/testing/selftests/bpf/test_stub.c b/tools/testing/selftests/bpf/test_stub.c
deleted file mode 100644
index 47e132726203..000000000000
--- a/tools/testing/selftests/bpf/test_stub.c
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-/* Copyright (C) 2019 Netronome Systems, Inc. */
-
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-#include <string.h>
-
-int extra_prog_load_log_flags = 0;
-
-int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
- struct bpf_object **pobj, int *prog_fd)
-{
- struct bpf_prog_load_attr attr;
-
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = file;
- attr.prog_type = type;
- attr.expected_attach_type = 0;
- attr.prog_flags = BPF_F_TEST_RND_HI32;
- attr.log_level = extra_prog_load_log_flags;
-
- return bpf_prog_load_xattr(&attr, pobj, prog_fd);
-}
-
-int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
- size_t insns_cnt, const char *license,
- __u32 kern_version, char *log_buf,
- size_t log_buf_sz)
-{
- struct bpf_load_program_attr load_attr;
-
- memset(&load_attr, 0, sizeof(struct bpf_load_program_attr));
- load_attr.prog_type = type;
- load_attr.expected_attach_type = 0;
- load_attr.name = NULL;
- load_attr.insns = insns;
- load_attr.insns_cnt = insns_cnt;
- load_attr.license = license;
- load_attr.kern_version = kern_version;
- load_attr.prog_flags = BPF_F_TEST_RND_HI32;
- load_attr.log_level = extra_prog_load_log_flags;
-
- return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz);
-}
diff --git a/tools/testing/selftests/bpf/test_sysctl.c b/tools/testing/selftests/bpf/test_sysctl.c
index d196e2a4a6e0..bcdbd27f22f0 100644
--- a/tools/testing/selftests/bpf/test_sysctl.c
+++ b/tools/testing/selftests/bpf/test_sysctl.c
@@ -14,9 +14,9 @@
#include <bpf/libbpf.h>
#include <bpf/bpf_endian.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
+#include "testing_helpers.h"
#define CG_PATH "/foo"
#define MAX_INSNS 512
@@ -124,7 +124,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:write sysctl:write read ok narrow",
.insns = {
/* u64 w = (u16)write & 1; */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, write)),
#else
@@ -184,7 +184,7 @@ static struct sysctl_test tests[] = {
.descr = "ctx:file_pos sysctl:read read ok narrow",
.insns = {
/* If (file_pos == X) */
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_1,
offsetof(struct bpf_sysctl, file_pos)),
#else
@@ -1372,7 +1372,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: deny all writes",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_WRONLY,
@@ -1381,7 +1381,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: deny access by name",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/route/mtu_expires",
.open_flags = O_RDONLY,
@@ -1389,7 +1389,7 @@ static struct sysctl_test tests[] = {
},
{
"C prog: read tcp_mem",
- .prog_file = "./test_sysctl_prog.o",
+ .prog_file = "./test_sysctl_prog.bpf.o",
.attach_type = BPF_CGROUP_SYSCTL,
.sysctl = "net/ipv4/tcp_mem",
.open_flags = O_RDONLY,
@@ -1435,14 +1435,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
const char *sysctl_path)
{
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
- int ret;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+ int ret, insn_cnt;
- memset(&attr, 0, sizeof(struct bpf_load_program_attr));
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
- attr.insns = prog;
- attr.insns_cnt = probe_prog_length(attr.insns);
- attr.license = "GPL";
+ insn_cnt = probe_prog_length(prog);
if (test->fixup_value_insn) {
char buf[128];
@@ -1465,7 +1461,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
return -1;
}
- ret = bpf_load_program_xattr(&attr, bpf_log_buf, BPF_LOG_BUF_SIZE);
+ opts.log_buf = bpf_log_buf;
+ opts.log_size = BPF_LOG_BUF_SIZE;
+
+ ret = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SYSCTL, NULL, "GPL", prog, insn_cnt, &opts);
if (ret < 0 && test->result != LOAD_REJECT) {
log_err(">>> Loading program error.\n"
">>> Verifier output:\n%s\n-------\n", bpf_log_buf);
@@ -1476,15 +1475,10 @@ static int load_sysctl_prog_insns(struct sysctl_test *test,
static int load_sysctl_prog_file(struct sysctl_test *test)
{
- struct bpf_prog_load_attr attr;
struct bpf_object *obj;
int prog_fd;
- memset(&attr, 0, sizeof(struct bpf_prog_load_attr));
- attr.file = test->prog_file;
- attr.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL;
-
- if (bpf_prog_load_xattr(&attr, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(test->prog_file, BPF_PROG_TYPE_CGROUP_SYSCTL, &obj, &prog_fd)) {
if (test->result != LOAD_REJECT)
log_err(">>> Loading program (%s) error.\n",
test->prog_file);
@@ -1566,7 +1560,7 @@ static int run_test_case(int cgfd, struct sysctl_test *test)
goto err;
}
- if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) == -1) {
+ if (bpf_prog_attach(progfd, cgfd, atype, BPF_F_ALLOW_OVERRIDE) < 0) {
if (test->result == ATTACH_REJECT)
goto out;
else
@@ -1619,15 +1613,12 @@ int main(int argc, char **argv)
int cgfd = -1;
int err = 0;
- if (setup_cgroup_environment())
- goto err;
-
- cgfd = create_and_get_cgroup(CG_PATH);
+ cgfd = cgroup_setup_and_join(CG_PATH);
if (cgfd < 0)
goto err;
- if (join_cgroup(CG_PATH))
- goto err;
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
if (run_tests(cgfd))
goto err;
diff --git a/tools/testing/selftests/bpf/test_tag.c b/tools/testing/selftests/bpf/test_tag.c
index 6272c784ca2a..5546b05a0486 100644
--- a/tools/testing/selftests/bpf/test_tag.c
+++ b/tools/testing/selftests/bpf/test_tag.c
@@ -20,7 +20,7 @@
#include <bpf/bpf.h>
#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
+#include "testing_helpers.h"
static struct bpf_insn prog[BPF_MAXINSNS];
@@ -57,7 +57,7 @@ static int bpf_try_load_prog(int insns, int fd_map,
int fd_prog;
bpf_filler(insns, fd_map);
- fd_prog = bpf_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
+ fd_prog = bpf_test_load_program(BPF_PROG_TYPE_SCHED_CLS, prog, insns, "", 0,
NULL, 0);
assert(fd_prog > 0);
if (fd_map > 0)
@@ -184,11 +184,15 @@ static void do_test(uint32_t *tests, int start_insns, int fd_map,
int main(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_NO_PREALLOC);
uint32_t tests = 0;
int i, fd_map;
- fd_map = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(int),
- sizeof(int), 1, BPF_F_NO_PREALLOC);
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ fd_map = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, sizeof(int),
+ sizeof(int), 1, &opts);
assert(fd_map > 0);
for (i = 0; i < 5; i++) {
diff --git a/tools/testing/selftests/bpf/test_tc_edt.sh b/tools/testing/selftests/bpf/test_tc_edt.sh
index daa7d1b8d309..76f0bd17061f 100755
--- a/tools/testing/selftests/bpf/test_tc_edt.sh
+++ b/tools/testing/selftests/bpf/test_tc_edt.sh
@@ -5,6 +5,7 @@
# with dst port = 9000 down to 5MBps. Then it measures actual
# throughput of the flow.
+BPF_FILE="test_tc_edt.bpf.o"
if [[ $EUID -ne 0 ]]; then
echo "This script must be run as root"
echo "FAIL"
@@ -54,7 +55,7 @@ ip -netns ${NS_DST} route add ${IP_SRC}/32 dev veth_dst
ip netns exec ${NS_SRC} tc qdisc add dev veth_src root fq
ip netns exec ${NS_SRC} tc qdisc add dev veth_src clsact
ip netns exec ${NS_SRC} tc filter add dev veth_src egress \
- bpf da obj test_tc_edt.o sec cls_test
+ bpf da obj ${BPF_FILE} sec cls_test
# start the listener
diff --git a/tools/testing/selftests/bpf/test_tc_tunnel.sh b/tools/testing/selftests/bpf/test_tc_tunnel.sh
index 7c76b841b17b..910044f08908 100755
--- a/tools/testing/selftests/bpf/test_tc_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tc_tunnel.sh
@@ -3,6 +3,7 @@
#
# In-place tunneling
+BPF_FILE="test_tc_tunnel.bpf.o"
# must match the port that the bpf program filters on
readonly port=8000
@@ -44,8 +45,8 @@ setup() {
# clamp route to reserve room for tunnel headers
ip -netns "${ns1}" -4 route flush table main
ip -netns "${ns1}" -6 route flush table main
- ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1458 dev veth1
- ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1438 dev veth1
+ ip -netns "${ns1}" -4 route add "${ns2_v4}" mtu 1450 dev veth1
+ ip -netns "${ns1}" -6 route add "${ns2_v6}" mtu 1430 dev veth1
sleep 1
@@ -69,7 +70,7 @@ cleanup() {
}
server_listen() {
- ip netns exec "${ns2}" nc "${netcat_opt}" -l -p "${port}" > "${outfile}" &
+ ip netns exec "${ns2}" nc "${netcat_opt}" -l "${port}" > "${outfile}" &
server_pid=$!
sleep 0.2
}
@@ -99,12 +100,21 @@ if [[ "$#" -eq "0" ]]; then
echo "ipip"
$0 ipv4 ipip none 100
+ echo "ipip6"
+ $0 ipv4 ipip6 none 100
+
echo "ip6ip6"
$0 ipv6 ip6tnl none 100
echo "sit"
$0 ipv6 sit none 100
+ echo "ip4 vxlan"
+ $0 ipv4 vxlan eth 2000
+
+ echo "ip6 vxlan"
+ $0 ipv6 ip6vxlan eth 2000
+
for mac in none mpls eth ; do
echo "ip gre $mac"
$0 ipv4 gre $mac 100
@@ -190,7 +200,7 @@ verify_data
# client can no longer connect
ip netns exec "${ns1}" tc qdisc add dev veth1 clsact
ip netns exec "${ns1}" tc filter add dev veth1 egress \
- bpf direct-action object-file ./test_tc_tunnel.o \
+ bpf direct-action object-file ${BPF_FILE} \
section "encap_${tuntype}_${mac}"
echo "test bpf encap without decap (expect failure)"
server_listen
@@ -214,6 +224,12 @@ if [[ "$tuntype" =~ "udp" ]]; then
targs="encap fou encap-sport auto encap-dport $dport"
elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
ttype=$gretaptype
+elif [[ "$tuntype" =~ "vxlan" && "$mac" == "eth" ]]; then
+ ttype="vxlan"
+ targs="id 1 dstport 8472 udp6zerocsumrx"
+elif [[ "$tuntype" == "ipip6" ]]; then
+ ttype="ip6tnl"
+ targs=""
else
ttype=$tuntype
targs=""
@@ -223,6 +239,9 @@ fi
if [[ "${tuntype}" == "sit" ]]; then
link_addr1="${ns1_v4}"
link_addr2="${ns2_v4}"
+elif [[ "${tuntype}" == "ipip6" ]]; then
+ link_addr1="${ns1_v6}"
+ link_addr2="${ns2_v6}"
else
link_addr1="${addr1}"
link_addr2="${addr2}"
@@ -242,7 +261,7 @@ if [[ "$tuntype" == "ip6udp" && "$mac" == "mpls" ]]; then
elif [[ "$tuntype" =~ "udp" && "$mac" == "eth" ]]; then
# No support for TEB fou tunnel; expect failure.
expect_tun_fail=1
-elif [[ "$tuntype" =~ "gre" && "$mac" == "eth" ]]; then
+elif [[ "$tuntype" =~ (gre|vxlan) && "$mac" == "eth" ]]; then
# Share ethernet address between tunnel/veth2 so L2 decap works.
ethaddr=$(ip netns exec "${ns2}" ip link show veth2 | \
awk '/ether/ { print $2 }')
@@ -277,17 +296,11 @@ else
server_listen
fi
-# bpf_skb_net_shrink does not take tunnel flags yet, cannot update L3.
-if [[ "${tuntype}" == "sit" ]]; then
- echo OK
- exit 0
-fi
-
# serverside, use BPF for decap
ip netns exec "${ns2}" ip link del dev testtun0
ip netns exec "${ns2}" tc qdisc add dev veth2 clsact
ip netns exec "${ns2}" tc filter add dev veth2 ingress \
- bpf direct-action object-file ./test_tc_tunnel.o section decap
+ bpf direct-action object-file ${BPF_FILE} section decap
echo "test bpf encap with bpf decap"
client_connect
verify_data
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
index 9b3617d770a5..b42c24282c25 100755
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie.sh
@@ -4,6 +4,7 @@
# Copyright (c) 2019 Cloudflare
set -eu
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
wait_for_ip()
{
@@ -28,12 +29,12 @@ get_prog_id()
ns1_exec()
{
- ip netns exec ns1 "$@"
+ ip netns exec ${NS1} "$@"
}
setup()
{
- ip netns add ns1
+ ip netns add ${NS1}
ns1_exec ip link set lo up
ns1_exec sysctl -w net.ipv4.tcp_syncookies=2
@@ -75,9 +76,9 @@ main()
DIR=$(dirname $0)
TEST_IF=lo
MAX_PING_TRIES=5
-BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.o"
-CLSACT_SECTION="clsact/check_syncookie"
-XDP_SECTION="xdp/check_syncookie"
+BPF_PROG_OBJ="${DIR}/test_tcp_check_syncookie_kern.bpf.o"
+CLSACT_SECTION="tc"
+XDP_SECTION="xdp"
BPF_PROG_ID=0
PROG="${DIR}/test_tcp_check_syncookie_user"
diff --git a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
index b9e991d43155..32df93747095 100644
--- a/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
+++ b/tools/testing/selftests/bpf/test_tcp_check_syncookie_user.c
@@ -15,11 +15,11 @@
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#include "bpf_rlimit.h"
#include "cgroup_helpers.h"
-static int start_server(const struct sockaddr *addr, socklen_t len)
+static int start_server(const struct sockaddr *addr, socklen_t len, bool dual)
{
+ int mode = !dual;
int fd;
fd = socket(addr->sa_family, SOCK_STREAM, 0);
@@ -28,6 +28,14 @@ static int start_server(const struct sockaddr *addr, socklen_t len)
goto out;
}
+ if (addr->sa_family == AF_INET6) {
+ if (setsockopt(fd, IPPROTO_IPV6, IPV6_V6ONLY, (char *)&mode,
+ sizeof(mode)) == -1) {
+ log_err("Failed to set the dual-stack mode");
+ goto close_out;
+ }
+ }
+
if (bind(fd, addr, len) == -1) {
log_err("Failed to bind server socket");
goto close_out;
@@ -47,24 +55,17 @@ out:
return fd;
}
-static int connect_to_server(int server_fd)
+static int connect_to_server(const struct sockaddr *addr, socklen_t len)
{
- struct sockaddr_storage addr;
- socklen_t len = sizeof(addr);
int fd = -1;
- if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
- log_err("Failed to get server addr");
- goto out;
- }
-
- fd = socket(addr.ss_family, SOCK_STREAM, 0);
+ fd = socket(addr->sa_family, SOCK_STREAM, 0);
if (fd == -1) {
log_err("Failed to create client socket");
goto out;
}
- if (connect(fd, (const struct sockaddr *)&addr, len) == -1) {
+ if (connect(fd, (const struct sockaddr *)addr, len) == -1) {
log_err("Fail to connect to server");
goto close_out;
}
@@ -95,7 +96,7 @@ static int get_map_fd_by_prog_id(int prog_id, bool *xdp)
info.nr_map_ids = 1;
info.map_ids = (__u64)(unsigned long)map_ids;
- if (bpf_obj_get_info_by_fd(prog_fd, &info, &info_len)) {
+ if (bpf_prog_get_info_by_fd(prog_fd, &info, &info_len)) {
log_err("Failed to get info by prog fd %d", prog_fd);
goto err;
}
@@ -116,7 +117,8 @@ err:
return map_fd;
}
-static int run_test(int server_fd, int results_fd, bool xdp)
+static int run_test(int server_fd, int results_fd, bool xdp,
+ const struct sockaddr *addr, socklen_t len)
{
int client = -1, srv_client = -1;
int ret = 0;
@@ -142,7 +144,7 @@ static int run_test(int server_fd, int results_fd, bool xdp)
goto err;
}
- client = connect_to_server(server_fd);
+ client = connect_to_server(addr, len);
if (client == -1)
goto err;
@@ -199,12 +201,30 @@ out:
return ret;
}
+static bool get_port(int server_fd, in_port_t *port)
+{
+ struct sockaddr_in addr;
+ socklen_t len = sizeof(addr);
+
+ if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+ log_err("Failed to get server addr");
+ return false;
+ }
+
+ /* sin_port and sin6_port are located at the same offset. */
+ *port = addr.sin_port;
+ return true;
+}
+
int main(int argc, char **argv)
{
struct sockaddr_in addr4;
struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4dual;
+ struct sockaddr_in6 addr6dual;
int server = -1;
int server_v6 = -1;
+ int server_dual = -1;
int results = -1;
int err = 0;
bool xdp;
@@ -214,6 +234,9 @@ int main(int argc, char **argv)
exit(1);
}
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
results = get_map_fd_by_prog_id(atoi(argv[1]), &xdp);
if (results < 0) {
log_err("Can't get map");
@@ -224,25 +247,43 @@ int main(int argc, char **argv)
addr4.sin_family = AF_INET;
addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
addr4.sin_port = 0;
+ memcpy(&addr4dual, &addr4, sizeof(addr4dual));
memset(&addr6, 0, sizeof(addr6));
addr6.sin6_family = AF_INET6;
addr6.sin6_addr = in6addr_loopback;
addr6.sin6_port = 0;
- server = start_server((const struct sockaddr *)&addr4, sizeof(addr4));
- if (server == -1)
+ memset(&addr6dual, 0, sizeof(addr6dual));
+ addr6dual.sin6_family = AF_INET6;
+ addr6dual.sin6_addr = in6addr_any;
+ addr6dual.sin6_port = 0;
+
+ server = start_server((const struct sockaddr *)&addr4, sizeof(addr4),
+ false);
+ if (server == -1 || !get_port(server, &addr4.sin_port))
goto err;
server_v6 = start_server((const struct sockaddr *)&addr6,
- sizeof(addr6));
- if (server_v6 == -1)
+ sizeof(addr6), false);
+ if (server_v6 == -1 || !get_port(server_v6, &addr6.sin6_port))
+ goto err;
+
+ server_dual = start_server((const struct sockaddr *)&addr6dual,
+ sizeof(addr6dual), true);
+ if (server_dual == -1 || !get_port(server_dual, &addr4dual.sin_port))
+ goto err;
+
+ if (run_test(server, results, xdp,
+ (const struct sockaddr *)&addr4, sizeof(addr4)))
goto err;
- if (run_test(server, results, xdp))
+ if (run_test(server_v6, results, xdp,
+ (const struct sockaddr *)&addr6, sizeof(addr6)))
goto err;
- if (run_test(server_v6, results, xdp))
+ if (run_test(server_dual, results, xdp,
+ (const struct sockaddr *)&addr4dual, sizeof(addr4dual)))
goto err;
printf("ok\n");
@@ -252,6 +293,7 @@ err:
out:
close(server);
close(server_v6);
+ close(server_dual);
close(results);
return err;
}
diff --git a/tools/testing/selftests/bpf/test_tcp_hdr_options.h b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
new file mode 100644
index 000000000000..56c9f8a3ad3d
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_tcp_hdr_options.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+
+#ifndef _TEST_TCP_HDR_OPTIONS_H
+#define _TEST_TCP_HDR_OPTIONS_H
+
+struct bpf_test_option {
+ __u8 flags;
+ __u8 max_delack_ms;
+ __u8 rand;
+} __attribute__((packed));
+
+enum {
+ OPTION_RESEND,
+ OPTION_MAX_DELACK_MS,
+ OPTION_RAND,
+ __NR_OPTION_FLAGS,
+};
+
+#define OPTION_F_RESEND (1 << OPTION_RESEND)
+#define OPTION_F_MAX_DELACK_MS (1 << OPTION_MAX_DELACK_MS)
+#define OPTION_F_RAND (1 << OPTION_RAND)
+#define OPTION_MASK ((1 << __NR_OPTION_FLAGS) - 1)
+
+#define TEST_OPTION_FLAGS(flags, option) (1 & ((flags) >> (option)))
+#define SET_OPTION_FLAGS(flags, option) ((flags) |= (1 << (option)))
+
+/* Store in bpf_sk_storage */
+struct hdr_stg {
+ bool active;
+ bool resend_syn; /* active side only */
+ bool syncookie; /* passive side only */
+ bool fastopen; /* passive side only */
+};
+
+struct linum_err {
+ unsigned int linum;
+ int err;
+};
+
+#define TCPHDR_FIN 0x01
+#define TCPHDR_SYN 0x02
+#define TCPHDR_RST 0x04
+#define TCPHDR_PSH 0x08
+#define TCPHDR_ACK 0x10
+#define TCPHDR_URG 0x20
+#define TCPHDR_ECE 0x40
+#define TCPHDR_CWR 0x80
+#define TCPHDR_SYNACK (TCPHDR_SYN | TCPHDR_ACK)
+
+#define TCPOPT_EOL 0
+#define TCPOPT_NOP 1
+#define TCPOPT_MSS 2
+#define TCPOPT_WINDOW 3
+#define TCPOPT_EXP 254
+
+#define TCP_BPF_EXPOPT_BASE_LEN 4
+#define MAX_TCP_HDR_LEN 60
+#define MAX_TCP_OPTION_SPACE 40
+
+#ifdef BPF_PROG_TEST_TCP_HDR_OPTIONS
+
+#define CG_OK 1
+#define CG_ERR 0
+
+#ifndef SOL_TCP
+#define SOL_TCP 6
+#endif
+
+struct tcp_exprm_opt {
+ __u8 kind;
+ __u8 len;
+ __u16 magic;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct tcp_opt {
+ __u8 kind;
+ __u8 len;
+ union {
+ __u8 data[4];
+ __u32 data32;
+ };
+} __attribute__((packed));
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 2);
+ __type(key, int);
+ __type(value, struct linum_err);
+} lport_linum_map SEC(".maps");
+
+static inline unsigned int tcp_hdrlen(const struct tcphdr *th)
+{
+ return th->doff << 2;
+}
+
+static inline __u8 skops_tcp_flags(const struct bpf_sock_ops *skops)
+{
+ return skops->skb_tcp_flags;
+}
+
+static inline void clear_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~(BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG));
+}
+
+static inline void set_hdr_cb_flags(struct bpf_sock_ops *skops, __u32 extra)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_UNKNOWN_HDR_OPT_CB_FLAG |
+ BPF_SOCK_OPS_WRITE_HDR_OPT_CB_FLAG |
+ extra);
+}
+static inline void
+clear_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags &
+ ~BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+static inline void
+set_parse_all_hdr_cb_flags(struct bpf_sock_ops *skops)
+{
+ bpf_sock_ops_cb_flags_set(skops,
+ skops->bpf_sock_ops_cb_flags |
+ BPF_SOCK_OPS_PARSE_ALL_HDR_OPT_CB_FLAG);
+}
+
+#define RET_CG_ERR(__err) ({ \
+ struct linum_err __linum_err; \
+ int __lport; \
+ \
+ __linum_err.linum = __LINE__; \
+ __linum_err.err = __err; \
+ __lport = skops->local_port; \
+ bpf_map_update_elem(&lport_linum_map, &__lport, &__linum_err, BPF_NOEXIST); \
+ clear_hdr_cb_flags(skops); \
+ clear_parse_all_hdr_cb_flags(skops); \
+ return CG_ERR; \
+})
+
+#endif /* BPF_PROG_TEST_TCP_HDR_OPTIONS */
+
+#endif /* _TEST_TCP_HDR_OPTIONS_H */
diff --git a/tools/testing/selftests/bpf/test_tcpbpf.h b/tools/testing/selftests/bpf/test_tcpbpf.h
index 6220b95cbd02..9dd9b5590f9d 100644
--- a/tools/testing/selftests/bpf/test_tcpbpf.h
+++ b/tools/testing/selftests/bpf/test_tcpbpf.h
@@ -14,5 +14,9 @@ struct tcpbpf_globals {
__u64 bytes_acked;
__u32 num_listen;
__u32 num_close_events;
+ __u32 tcp_save_syn;
+ __u32 tcp_saved_syn;
+ __u32 window_clamp_client;
+ __u32 window_clamp_server;
};
#endif
diff --git a/tools/testing/selftests/bpf/test_tcpbpf_user.c b/tools/testing/selftests/bpf/test_tcpbpf_user.c
deleted file mode 100644
index 3ae127620463..000000000000
--- a/tools/testing/selftests/bpf/test_tcpbpf_user.c
+++ /dev/null
@@ -1,171 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <inttypes.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <sys/types.h>
-#include <bpf/bpf.h>
-#include <bpf/libbpf.h>
-
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-#include "cgroup_helpers.h"
-
-#include "test_tcpbpf.h"
-
-/* 3 comes from one listening socket + both ends of the connection */
-#define EXPECTED_CLOSE_EVENTS 3
-
-#define EXPECT_EQ(expected, actual, fmt) \
- do { \
- if ((expected) != (actual)) { \
- printf(" Value of: " #actual "\n" \
- " Actual: %" fmt "\n" \
- " Expected: %" fmt "\n", \
- (actual), (expected)); \
- ret--; \
- } \
- } while (0)
-
-int verify_result(const struct tcpbpf_globals *result)
-{
- __u32 expected_events;
- int ret = 0;
-
- expected_events = ((1 << BPF_SOCK_OPS_TIMEOUT_INIT) |
- (1 << BPF_SOCK_OPS_RWND_INIT) |
- (1 << BPF_SOCK_OPS_TCP_CONNECT_CB) |
- (1 << BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB) |
- (1 << BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) |
- (1 << BPF_SOCK_OPS_NEEDS_ECN) |
- (1 << BPF_SOCK_OPS_STATE_CB) |
- (1 << BPF_SOCK_OPS_TCP_LISTEN_CB));
-
- EXPECT_EQ(expected_events, result->event_map, "#" PRIx32);
- EXPECT_EQ(501ULL, result->bytes_received, "llu");
- EXPECT_EQ(1002ULL, result->bytes_acked, "llu");
- EXPECT_EQ(1, result->data_segs_in, PRIu32);
- EXPECT_EQ(1, result->data_segs_out, PRIu32);
- EXPECT_EQ(0x80, result->bad_cb_test_rv, PRIu32);
- EXPECT_EQ(0, result->good_cb_test_rv, PRIu32);
- EXPECT_EQ(1, result->num_listen, PRIu32);
- EXPECT_EQ(EXPECTED_CLOSE_EVENTS, result->num_close_events, PRIu32);
-
- return ret;
-}
-
-int verify_sockopt_result(int sock_map_fd)
-{
- __u32 key = 0;
- int ret = 0;
- int res;
- int rv;
-
- /* check setsockopt for SAVE_SYN */
- rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
- EXPECT_EQ(0, rv, "d");
- EXPECT_EQ(0, res, "d");
- key = 1;
- /* check getsockopt for SAVED_SYN */
- rv = bpf_map_lookup_elem(sock_map_fd, &key, &res);
- EXPECT_EQ(0, rv, "d");
- EXPECT_EQ(1, res, "d");
- return ret;
-}
-
-static int bpf_find_map(const char *test, struct bpf_object *obj,
- const char *name)
-{
- struct bpf_map *map;
-
- map = bpf_object__find_map_by_name(obj, name);
- if (!map) {
- printf("%s:FAIL:map '%s' not found\n", test, name);
- return -1;
- }
- return bpf_map__fd(map);
-}
-
-int main(int argc, char **argv)
-{
- const char *file = "test_tcpbpf_kern.o";
- int prog_fd, map_fd, sock_map_fd;
- struct tcpbpf_globals g = {0};
- const char *cg_path = "/foo";
- int error = EXIT_FAILURE;
- struct bpf_object *obj;
- int cg_fd = -1;
- int retry = 10;
- __u32 key = 0;
- int rv;
-
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
- if (cg_fd < 0)
- goto err;
-
- if (join_cgroup(cg_path))
- goto err;
-
- if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
- printf("FAILED: load_bpf_file failed for: %s\n", file);
- goto err;
- }
-
- rv = bpf_prog_attach(prog_fd, cg_fd, BPF_CGROUP_SOCK_OPS, 0);
- if (rv) {
- printf("FAILED: bpf_prog_attach: %d (%s)\n",
- error, strerror(errno));
- goto err;
- }
-
- if (system("./tcp_server.py")) {
- printf("FAILED: TCP server\n");
- goto err;
- }
-
- map_fd = bpf_find_map(__func__, obj, "global_map");
- if (map_fd < 0)
- goto err;
-
- sock_map_fd = bpf_find_map(__func__, obj, "sockopt_results");
- if (sock_map_fd < 0)
- goto err;
-
-retry_lookup:
- rv = bpf_map_lookup_elem(map_fd, &key, &g);
- if (rv != 0) {
- printf("FAILED: bpf_map_lookup_elem returns %d\n", rv);
- goto err;
- }
-
- if (g.num_close_events != EXPECTED_CLOSE_EVENTS && retry--) {
- printf("Unexpected number of close events (%d), retrying!\n",
- g.num_close_events);
- usleep(100);
- goto retry_lookup;
- }
-
- if (verify_result(&g)) {
- printf("FAILED: Wrong stats\n");
- goto err;
- }
-
- if (verify_sockopt_result(sock_map_fd)) {
- printf("FAILED: Wrong sockopt stats\n");
- goto err;
- }
-
- printf("PASSED!\n");
- error = 0;
-err:
- bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
- close(cg_fd);
- cleanup_cgroup_environment();
- return error;
-}
diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c
index f9765ddf0761..595194453ff8 100644
--- a/tools/testing/selftests/bpf/test_tcpnotify_user.c
+++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c
@@ -19,12 +19,12 @@
#include <linux/perf_event.h>
#include <linux/err.h>
-#include "bpf_rlimit.h"
#include "bpf_util.h"
#include "cgroup_helpers.h"
#include "test_tcpnotify.h"
#include "trace_helpers.h"
+#include "testing_helpers.h"
#define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L)
@@ -69,9 +69,8 @@ int verify_result(const struct tcpnotify_globals *result)
int main(int argc, char **argv)
{
- const char *file = "test_tcpnotify_kern.o";
+ const char *file = "test_tcpnotify_kern.bpf.o";
struct bpf_map *perf_map, *global_map;
- struct perf_buffer_opts pb_opts = {};
struct tcpnotify_globals g = {0};
struct perf_buffer *pb = NULL;
const char *cg_path = "/foo";
@@ -82,21 +81,17 @@ int main(int argc, char **argv)
cpu_set_t cpuset;
__u32 key = 0;
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- if (setup_cgroup_environment())
- goto err;
-
- cg_fd = create_and_get_cgroup(cg_path);
+ cg_fd = cgroup_setup_and_join(cg_path);
if (cg_fd < 0)
goto err;
- if (join_cgroup(cg_path))
- goto err;
-
- if (bpf_prog_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(file, BPF_PROG_TYPE_SOCK_OPS, &obj, &prog_fd)) {
printf("FAILED: load_bpf_file failed for: %s\n", file);
goto err;
}
@@ -120,9 +115,8 @@ int main(int argc, char **argv)
return -1;
}
- pb_opts.sample_cb = dummyfn;
- pb = perf_buffer__new(bpf_map__fd(perf_map), 8, &pb_opts);
- if (IS_ERR(pb))
+ pb = perf_buffer__new(bpf_map__fd(perf_map), 8, dummyfn, NULL, NULL, NULL);
+ if (!pb)
goto err;
pthread_create(&tid, NULL, poller_thread, pb);
@@ -130,17 +124,24 @@ int main(int argc, char **argv)
sprintf(test_script,
"iptables -A INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
sprintf(test_script,
"nc 127.0.0.1 %d < /etc/passwd > /dev/null 2>&1 ",
TESTPORT);
- system(test_script);
+ if (system(test_script))
+ printf("execute command: %s, err %d\n", test_script, -errno);
sprintf(test_script,
"iptables -D INPUT -p tcp --dport %d -j DROP",
TESTPORT);
- system(test_script);
+ if (system(test_script)) {
+ printf("FAILED: execute command: %s, err %d\n", test_script, -errno);
+ goto err;
+ }
rv = bpf_map_lookup_elem(bpf_map__fd(global_map), &key, &g);
if (rv != 0) {
@@ -162,7 +163,6 @@ err:
bpf_prog_detach(cg_fd, BPF_CGROUP_SOCK_OPS);
close(cg_fd);
cleanup_cgroup_environment();
- if (!IS_ERR_OR_NULL(pb))
- perf_buffer__free(pb);
+ perf_buffer__free(pb);
return error;
}
diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh
index bd12ec97a44d..d9661b9988ba 100755
--- a/tools/testing/selftests/bpf/test_tunnel.sh
+++ b/tools/testing/selftests/bpf/test_tunnel.sh
@@ -24,12 +24,12 @@
# Root namespace with metadata-mode tunnel + BPF
# Device names and addresses:
# veth1 IP: 172.16.1.200, IPv6: 00::22 (underlay)
-# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200 (overlay)
+# tunnel dev <type>11, ex: gre11, IPv4: 10.1.1.200, IPv6: 1::22 (overlay)
#
# Namespace at_ns0 with native tunnel
# Device names and addresses:
# veth0 IPv4: 172.16.1.100, IPv6: 00::11 (underlay)
-# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100 (overlay)
+# tunnel dev <type>00, ex: gre00, IPv4: 10.1.1.100, IPv6: 1::11 (overlay)
#
#
# End-to-end ping packet flow
@@ -39,12 +39,14 @@
# from root namespace, the following operations happen:
# 1) Route lookup shows 10.1.1.100/24 belongs to tnl dev, fwd to tnl dev.
# 2) Tnl device's egress BPF program is triggered and set the tunnel metadata,
-# with remote_ip=172.16.1.200 and others.
+# with remote_ip=172.16.1.100 and others.
# 3) Outer tunnel header is prepended and route the packet to veth1's egress
# 4) veth0's ingress queue receive the tunneled packet at namespace at_ns0
# 5) Tunnel protocol handler, ex: vxlan_rcv, decap the packet
# 6) Forward the packet to the overlay tnl dev
+BPF_FILE="test_tunnel_kern.bpf.o"
+BPF_PIN_TUNNEL_DIR="/sys/fs/bpf/tc/tunnel"
PING_ARG="-c 3 -w 10 -q"
ret=0
GREEN='\033[0;92m'
@@ -64,15 +66,20 @@ config_device()
add_gre_tunnel()
{
+ tun_key=
+ if [ -n "$1" ]; then
+ tun_key="key $1"
+ fi
+
# at_ns0 namespace
ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE seq key 2 \
+ ip link add dev $DEV_NS type $TYPE seq $tun_key \
local 172.16.1.100 remote 172.16.1.200
ip netns exec at_ns0 ip link set dev $DEV_NS up
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
# root namespace
- ip link add dev $DEV type $TYPE key 2 external
+ ip link add dev $DEV type $TYPE $tun_key external
ip link set dev $DEV up
ip addr add dev $DEV 10.1.1.200/24
}
@@ -155,51 +162,6 @@ add_ip6erspan_tunnel()
ip link set dev $DEV up
}
-add_vxlan_tunnel()
-{
- # Set static ARP entry here because iptables set-mark works
- # on L3 packet, as a result not applying to ARP packets,
- # causing errors at get_tunnel_{key/opt}.
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE \
- id 2 dstport 4789 gbp remote 172.16.1.200
- ip netns exec at_ns0 \
- ip link set dev $DEV_NS address 52:54:00:d9:01:00 up
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 arp -s 10.1.1.200 52:54:00:d9:02:00
- ip netns exec at_ns0 iptables -A OUTPUT -j MARK --set-mark 0x800FF
-
- # root namespace
- ip link add dev $DEV type $TYPE external gbp dstport 4789
- ip link set dev $DEV address 52:54:00:d9:02:00 up
- ip addr add dev $DEV 10.1.1.200/24
- arp -s 10.1.1.100 52:54:00:d9:01:00
-}
-
-add_ip6vxlan_tunnel()
-{
- #ip netns exec at_ns0 ip -4 addr del 172.16.1.100 dev veth0
- ip netns exec at_ns0 ip -6 addr add ::11/96 dev veth0
- ip netns exec at_ns0 ip link set dev veth0 up
- #ip -4 addr del 172.16.1.200 dev veth1
- ip -6 addr add dev veth1 ::22/96
- ip link set dev veth1 up
-
- # at_ns0 namespace
- ip netns exec at_ns0 \
- ip link add dev $DEV_NS type $TYPE id 22 dstport 4789 \
- local ::11 remote ::22
- ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
- ip netns exec at_ns0 ip link set dev $DEV_NS up
-
- # root namespace
- ip link add dev $DEV type $TYPE external dstport 4789
- ip addr add dev $DEV 10.1.1.200/24
- ip link set dev $DEV up
-}
-
add_geneve_tunnel()
{
# at_ns0 namespace
@@ -250,7 +212,7 @@ add_ipip_tunnel()
ip addr add dev $DEV 10.1.1.200/24
}
-add_ipip6tnl_tunnel()
+add_ip6tnl_tunnel()
{
ip netns exec at_ns0 ip addr add ::11/96 dev veth0
ip netns exec at_ns0 ip link set dev veth0 up
@@ -262,11 +224,13 @@ add_ipip6tnl_tunnel()
ip link add dev $DEV_NS type $TYPE \
local ::11 remote ::22
ip netns exec at_ns0 ip addr add dev $DEV_NS 10.1.1.100/24
+ ip netns exec at_ns0 ip addr add dev $DEV_NS 1::11/96
ip netns exec at_ns0 ip link set dev $DEV_NS up
# root namespace
ip link add dev $DEV type $TYPE external
ip addr add dev $DEV 10.1.1.200/24
+ ip addr add dev $DEV 1::22/96
ip link set dev $DEV up
}
@@ -279,7 +243,7 @@ test_gre()
check $TYPE
config_device
- add_gre_tunnel
+ add_gre_tunnel 2
attach_bpf $DEV gre_set_tunnel gre_get_tunnel
ping $PING_ARG 10.1.1.100
check_err $?
@@ -294,6 +258,30 @@ test_gre()
echo -e ${GREEN}"PASS: $TYPE"${NC}
}
+test_gre_no_tunnel_key()
+{
+ TYPE=gre
+ DEV_NS=gre00
+ DEV=gre11
+ ret=0
+
+ check $TYPE
+ config_device
+ add_gre_tunnel
+ attach_bpf $DEV gre_set_tunnel_no_key gre_get_tunnel
+ ping $PING_ARG 10.1.1.100
+ check_err $?
+ ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
+ check_err $?
+ cleanup
+
+ if [ $ret -ne 0 ]; then
+ echo -e ${RED}"FAIL: $TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: $TYPE"${NC}
+}
+
test_ip6gre()
{
TYPE=ip6gre
@@ -400,58 +388,6 @@ test_ip6erspan()
echo -e ${GREEN}"PASS: $TYPE"${NC}
}
-test_vxlan()
-{
- TYPE=vxlan
- DEV_NS=vxlan00
- DEV=vxlan11
- ret=0
-
- check $TYPE
- config_device
- add_vxlan_tunnel
- attach_bpf $DEV vxlan_set_tunnel vxlan_get_tunnel
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: $TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: $TYPE"${NC}
-}
-
-test_ip6vxlan()
-{
- TYPE=vxlan
- DEV_NS=ip6vxlan00
- DEV=ip6vxlan11
- ret=0
-
- check $TYPE
- config_device
- add_ip6vxlan_tunnel
- ip link set dev veth1 mtu 1500
- attach_bpf $DEV ip6vxlan_set_tunnel ip6vxlan_get_tunnel
- # underlay
- ping6 $PING_ARG ::11
- # ip4 over ip6
- ping $PING_ARG 10.1.1.100
- check_err $?
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- check_err $?
- cleanup
-
- if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: ip6$TYPE"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
-}
-
test_geneve()
{
TYPE=geneve
@@ -534,7 +470,7 @@ test_ipip6()
check $TYPE
config_device
- add_ipip6tnl_tunnel
+ add_ip6tnl_tunnel
ip link set dev veth1 mtu 1500
attach_bpf $DEV ipip6_set_tunnel ipip6_get_tunnel
# underlay
@@ -553,81 +489,32 @@ test_ipip6()
echo -e ${GREEN}"PASS: $TYPE"${NC}
}
-setup_xfrm_tunnel()
+test_ip6ip6()
{
- auth=0x$(printf '1%.0s' {1..40})
- enc=0x$(printf '2%.0s' {1..32})
- spi_in_to_out=0x1
- spi_out_to_in=0x2
- # at_ns0 namespace
- # at_ns0 -> root
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip netns exec at_ns0 \
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip netns exec at_ns0 \
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip netns exec at_ns0 \
- ip addr add dev veth0 10.1.1.100/32
- ip netns exec at_ns0 \
- ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \
- src 10.1.1.100
-
- # root namespace
- # at_ns0 -> root
- ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \
- spi $spi_in_to_out reqid 1 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \
- tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \
- mode tunnel
- # root -> at_ns0
- ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \
- spi $spi_out_to_in reqid 2 mode tunnel \
- auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc
- ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \
- tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \
- mode tunnel
- # address & route
- ip addr add dev veth1 10.1.1.200/32
- ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200
-}
+ TYPE=ip6tnl
+ DEV_NS=ip6ip6tnl00
+ DEV=ip6ip6tnl11
+ ret=0
-test_xfrm_tunnel()
-{
+ check $TYPE
config_device
- > /sys/kernel/debug/tracing/trace
- setup_xfrm_tunnel
- tc qdisc add dev veth1 clsact
- tc filter add dev veth1 proto ip ingress bpf da obj test_tunnel_kern.o \
- sec xfrm_get_state
- ip netns exec at_ns0 ping $PING_ARG 10.1.1.200
- sleep 1
- grep "reqid 1" /sys/kernel/debug/tracing/trace
- check_err $?
- grep "spi 0x1" /sys/kernel/debug/tracing/trace
+ add_ip6tnl_tunnel
+ ip link set dev veth1 mtu 1500
+ attach_bpf $DEV ip6ip6_set_tunnel ip6ip6_get_tunnel
+ # underlay
+ ping6 $PING_ARG ::11
+ # ip6 over ip6
+ ping6 $PING_ARG 1::11
check_err $?
- grep "remote ip 0xac100164" /sys/kernel/debug/tracing/trace
+ ip netns exec at_ns0 ping6 $PING_ARG 1::22
check_err $?
cleanup
if [ $ret -ne 0 ]; then
- echo -e ${RED}"FAIL: xfrm tunnel"${NC}
- return 1
- fi
- echo -e ${GREEN}"PASS: xfrm tunnel"${NC}
+ echo -e ${RED}"FAIL: ip6$TYPE"${NC}
+ return 1
+ fi
+ echo -e ${GREEN}"PASS: ip6$TYPE"${NC}
}
attach_bpf()
@@ -635,30 +522,30 @@ attach_bpf()
DEV=$1
SET=$2
GET=$3
+ mkdir -p ${BPF_PIN_TUNNEL_DIR}
+ bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR}/
tc qdisc add dev $DEV clsact
- tc filter add dev $DEV egress bpf da obj test_tunnel_kern.o sec $SET
- tc filter add dev $DEV ingress bpf da obj test_tunnel_kern.o sec $GET
+ tc filter add dev $DEV egress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$SET
+ tc filter add dev $DEV ingress bpf da object-pinned ${BPF_PIN_TUNNEL_DIR}/$GET
}
cleanup()
{
+ rm -rf ${BPF_PIN_TUNNEL_DIR}
+
ip netns delete at_ns0 2> /dev/null
ip link del veth1 2> /dev/null
ip link del ipip11 2> /dev/null
ip link del ipip6tnl11 2> /dev/null
+ ip link del ip6ip6tnl11 2> /dev/null
ip link del gretap11 2> /dev/null
+ ip link del gre11 2> /dev/null
ip link del ip6gre11 2> /dev/null
ip link del ip6gretap11 2> /dev/null
- ip link del vxlan11 2> /dev/null
- ip link del ip6vxlan11 2> /dev/null
ip link del geneve11 2> /dev/null
ip link del ip6geneve11 2> /dev/null
ip link del erspan11 2> /dev/null
ip link del ip6erspan11 2> /dev/null
- ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null
- ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null
- ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null
- ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null
}
cleanup_exit()
@@ -682,7 +569,6 @@ enable_debug()
{
echo 'file ip_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file ip6_gre.c +p' > /sys/kernel/debug/dynamic_debug/control
- echo 'file vxlan.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file geneve.c +p' > /sys/kernel/debug/dynamic_debug/control
echo 'file ipip.c +p' > /sys/kernel/debug/dynamic_debug/control
}
@@ -702,6 +588,10 @@ bpf_tunnel_test()
test_gre
errors=$(( $errors + $? ))
+ echo "Testing GRE tunnel (without tunnel keys)..."
+ test_gre_no_tunnel_key
+ errors=$(( $errors + $? ))
+
echo "Testing IP6GRE tunnel..."
test_ip6gre
errors=$(( $errors + $? ))
@@ -718,14 +608,6 @@ bpf_tunnel_test()
test_ip6erspan v2
errors=$(( $errors + $? ))
- echo "Testing VXLAN tunnel..."
- test_vxlan
- errors=$(( $errors + $? ))
-
- echo "Testing IP6VXLAN tunnel..."
- test_ip6vxlan
- errors=$(( $errors + $? ))
-
echo "Testing GENEVE tunnel..."
test_geneve
errors=$(( $errors + $? ))
@@ -742,8 +624,8 @@ bpf_tunnel_test()
test_ipip6
errors=$(( $errors + $? ))
- echo "Testing IPSec tunnel..."
- test_xfrm_tunnel
+ echo "Testing IP6IP6 tunnel..."
+ test_ip6ip6
errors=$(( $errors + $? ))
return $errors
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 78a6bae56ea6..df04bda1c927 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -22,8 +22,6 @@
#include <limits.h>
#include <assert.h>
-#include <sys/capability.h>
-
#include <linux/unistd.h>
#include <linux/filter.h>
#include <linux/bpf_perf_event.h>
@@ -31,42 +29,83 @@
#include <linux/if_ether.h>
#include <linux/btf.h>
+#include <bpf/btf.h>
#include <bpf/bpf.h>
#include <bpf/libbpf.h>
-#ifdef HAVE_GENHDR
-# include "autoconf.h"
-#else
-# if defined(__i386) || defined(__x86_64) || defined(__s390x__) || defined(__aarch64__)
-# define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS 1
-# endif
-#endif
-#include "bpf_rlimit.h"
+#include "autoconf_helper.h"
+#include "unpriv_helpers.h"
+#include "cap_helpers.h"
#include "bpf_rand.h"
#include "bpf_util.h"
#include "test_btf.h"
#include "../../../include/linux/filter.h"
+#include "testing_helpers.h"
+
+#ifndef ENOTSUPP
+#define ENOTSUPP 524
+#endif
#define MAX_INSNS BPF_MAXINSNS
+#define MAX_EXPECTED_INSNS 32
+#define MAX_UNEXPECTED_INSNS 32
#define MAX_TEST_INSNS 1000000
#define MAX_FIXUPS 8
-#define MAX_NR_MAPS 20
+#define MAX_NR_MAPS 23
#define MAX_TEST_RUNS 8
#define POINTER_VALUE 0xcafe4all
#define TEST_DATA_LEN 64
+#define MAX_FUNC_INFOS 8
+#define MAX_BTF_STRINGS 256
+#define MAX_BTF_TYPES 256
+
+#define INSN_OFF_MASK ((__s16)0xFFFF)
+#define INSN_IMM_MASK ((__s32)0xFFFFFFFF)
+#define SKIP_INSNS() BPF_RAW_INSN(0xde, 0xa, 0xd, 0xbeef, 0xdeadbeef)
+
+#define DEFAULT_LIBBPF_LOG_LEVEL 4
#define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0)
#define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1)
+#define F_NEEDS_JIT_ENABLED (1 << 2)
+/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+#define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \
+ 1ULL << CAP_PERFMON | \
+ 1ULL << CAP_BPF)
#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
static bool unpriv_disabled = false;
+static bool jit_disabled;
static int skips;
static bool verbose = false;
+static int verif_log_level = 0;
+
+struct kfunc_btf_id_pair {
+ const char *kfunc;
+ int insn_idx;
+};
struct bpf_test {
const char *descr;
struct bpf_insn insns[MAX_INSNS];
struct bpf_insn *fill_insns;
+ /* If specified, test engine looks for this sequence of
+ * instructions in the BPF program after loading. Allows to
+ * test rewrites applied by verifier. Use values
+ * INSN_OFF_MASK and INSN_IMM_MASK to mask `off` and `imm`
+ * fields if content does not matter. The test case fails if
+ * specified instructions are not found.
+ *
+ * The sequence could be split into sub-sequences by adding
+ * SKIP_INSNS instruction at the end of each sub-sequence. In
+ * such case sub-sequences are searched for one after another.
+ */
+ struct bpf_insn expected_insns[MAX_EXPECTED_INSNS];
+ /* If specified, test engine applies same pattern matching
+ * logic as for `expected_insns`. If the specified pattern is
+ * matched test case is marked as failed.
+ */
+ struct bpf_insn unexpected_insns[MAX_UNEXPECTED_INSNS];
int fixup_map_hash_8b[MAX_FIXUPS];
int fixup_map_hash_48b[MAX_FIXUPS];
int fixup_map_hash_16b[MAX_FIXUPS];
@@ -87,6 +126,14 @@ struct bpf_test {
int fixup_sk_storage_map[MAX_FIXUPS];
int fixup_map_event_output[MAX_FIXUPS];
int fixup_map_reuseport_array[MAX_FIXUPS];
+ int fixup_map_ringbuf[MAX_FIXUPS];
+ int fixup_map_timer[MAX_FIXUPS];
+ int fixup_map_kptr[MAX_FIXUPS];
+ struct kfunc_btf_id_pair fixup_kfunc_btf_id[MAX_FIXUPS];
+ /* Expected verifier log output for result REJECT or VERBOSE_ACCEPT.
+ * Can be a tab-separated sequence of expected strings. An empty string
+ * means no log verification.
+ */
const char *errstr;
const char *errstr_unpriv;
uint32_t insn_processed;
@@ -100,7 +147,7 @@ struct bpf_test {
enum bpf_prog_type prog_type;
uint8_t flags;
void (*fill_helper)(struct bpf_test *self);
- uint8_t runs;
+ int runs;
#define bpf_testdata_struct_t \
struct { \
uint32_t retval, retval_unpriv; \
@@ -114,6 +161,15 @@ struct bpf_test {
bpf_testdata_struct_t retvals[MAX_TEST_RUNS];
};
enum bpf_attach_type expected_attach_type;
+ const char *kfunc;
+ struct bpf_func_info func_info[MAX_FUNC_INFOS];
+ int func_info_cnt;
+ char btf_strings[MAX_BTF_STRINGS];
+ /* A set of BTF types to load when specified,
+ * use macro definitions from test_btf.h,
+ * must end with BTF_END_RAW
+ */
+ __u32 btf_types[MAX_BTF_TYPES];
};
/* Note we want this to be 64 bit aligned so that the end of our array is
@@ -151,7 +207,7 @@ loop:
insn[i++] = BPF_MOV64_IMM(BPF_REG_2, 1);
insn[i++] = BPF_MOV64_IMM(BPF_REG_3, 2);
insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_vlan_push),
+ BPF_FUNC_skb_vlan_push);
insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
i++;
}
@@ -162,7 +218,7 @@ loop:
i++;
insn[i++] = BPF_MOV64_REG(BPF_REG_1, BPF_REG_6);
insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_vlan_pop),
+ BPF_FUNC_skb_vlan_pop);
insn[i] = BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, len - i - 3);
i++;
}
@@ -295,6 +351,117 @@ static void bpf_fill_scale(struct bpf_test *self)
}
}
+static int bpf_fill_torturous_jumps_insn_1(struct bpf_insn *insn)
+{
+ unsigned int len = 259, hlen = 128;
+ int i;
+
+ insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32);
+ for (i = 1; i <= hlen; i++) {
+ insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, hlen);
+ insn[i + hlen] = BPF_JMP_A(hlen - i);
+ }
+ insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 1);
+ insn[len - 1] = BPF_EXIT_INSN();
+
+ return len;
+}
+
+static int bpf_fill_torturous_jumps_insn_2(struct bpf_insn *insn)
+{
+ unsigned int len = 4100, jmp_off = 2048;
+ int i, j;
+
+ insn[0] = BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32);
+ for (i = 1; i <= jmp_off; i++) {
+ insn[i] = BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, i, jmp_off);
+ }
+ insn[i++] = BPF_JMP_A(jmp_off);
+ for (; i <= jmp_off * 2 + 1; i+=16) {
+ for (j = 0; j < 16; j++) {
+ insn[i + j] = BPF_JMP_A(16 - j - 1);
+ }
+ }
+
+ insn[len - 2] = BPF_MOV64_IMM(BPF_REG_0, 2);
+ insn[len - 1] = BPF_EXIT_INSN();
+
+ return len;
+}
+
+static void bpf_fill_torturous_jumps(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ int i = 0;
+
+ switch (self->retval) {
+ case 1:
+ self->prog_len = bpf_fill_torturous_jumps_insn_1(insn);
+ return;
+ case 2:
+ self->prog_len = bpf_fill_torturous_jumps_insn_2(insn);
+ return;
+ case 3:
+ /* main */
+ insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 4);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP|BPF_CALL, 0, 1, 0, 262);
+ insn[i++] = BPF_ST_MEM(BPF_B, BPF_REG_10, -32, 0);
+ insn[i++] = BPF_MOV64_IMM(BPF_REG_0, 3);
+ insn[i++] = BPF_EXIT_INSN();
+
+ /* subprog 1 */
+ i += bpf_fill_torturous_jumps_insn_1(insn + i);
+
+ /* subprog 2 */
+ i += bpf_fill_torturous_jumps_insn_2(insn + i);
+
+ self->prog_len = i;
+ return;
+ default:
+ self->prog_len = 0;
+ break;
+ }
+}
+
+static void bpf_fill_big_prog_with_loop_1(struct bpf_test *self)
+{
+ struct bpf_insn *insn = self->fill_insns;
+ /* This test was added to catch a specific use after free
+ * error, which happened upon BPF program reallocation.
+ * Reallocation is handled by core.c:bpf_prog_realloc, which
+ * reuses old memory if page boundary is not crossed. The
+ * value of `len` is chosen to cross this boundary on bpf_loop
+ * patching.
+ */
+ const int len = getpagesize() - 25;
+ int callback_load_idx;
+ int callback_idx;
+ int i = 0;
+
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1);
+ callback_load_idx = i;
+ insn[i++] = BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW,
+ BPF_REG_2, BPF_PSEUDO_FUNC, 0,
+ 777 /* filled below */);
+ insn[i++] = BPF_RAW_INSN(0, 0, 0, 0, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0);
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0);
+ insn[i++] = BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop);
+
+ while (i < len - 3)
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ callback_idx = i;
+ insn[i++] = BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0);
+ insn[i++] = BPF_EXIT_INSN();
+
+ insn[callback_load_idx].imm = callback_idx - callback_load_idx - 1;
+ self->func_info[1].insn_off = callback_idx;
+ self->prog_len = i;
+ assert(i == len);
+}
+
/* BPF_SK_LOOKUP contains 13 instructions, if you need to fix up maps */
#define BPF_SK_LOOKUP(func) \
/* struct bpf_sock_tuple tuple = {} */ \
@@ -367,7 +534,7 @@ static int probe_filter_length(const struct bpf_insn *fp)
static bool skip_unsupported_map(enum bpf_map_type map_type)
{
- if (!bpf_probe_map_type(map_type, 0)) {
+ if (!libbpf_probe_bpf_map_type(map_type, NULL)) {
printf("SKIP (unsupported map type %d)\n", map_type);
skips++;
return true;
@@ -379,11 +546,11 @@ static int __create_map(uint32_t type, uint32_t size_key,
uint32_t size_value, uint32_t max_elem,
uint32_t extra_flags)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int fd;
- fd = bpf_create_map(type, size_key, size_value, max_elem,
- (type == BPF_MAP_TYPE_HASH ?
- BPF_F_NO_PREALLOC : 0) | extra_flags);
+ opts.map_flags = (type == BPF_MAP_TYPE_HASH ? BPF_F_NO_PREALLOC : 0) | extra_flags;
+ fd = bpf_map_create(type, NULL, size_key, size_value, max_elem, &opts);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -416,8 +583,7 @@ static int create_prog_dummy_simple(enum bpf_prog_type prog_type, int ret)
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
@@ -432,8 +598,7 @@ static int create_prog_dummy_loop(enum bpf_prog_type prog_type, int mfd,
BPF_EXIT_INSN(),
};
- return bpf_load_program(prog_type, prog,
- ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+ return bpf_prog_load(prog_type, NULL, "GPL", prog, ARRAY_SIZE(prog), NULL);
}
static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
@@ -441,8 +606,8 @@ static int create_prog_array(enum bpf_prog_type prog_type, uint32_t max_elem,
{
int mfd, p1fd, p2fd, p3fd;
- mfd = bpf_create_map(BPF_MAP_TYPE_PROG_ARRAY, sizeof(int),
- sizeof(int), max_elem, 0);
+ mfd = bpf_map_create(BPF_MAP_TYPE_PROG_ARRAY, NULL, sizeof(int),
+ sizeof(int), max_elem, NULL);
if (mfd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_PROG_ARRAY))
return -1;
@@ -472,10 +637,11 @@ err:
static int create_map_in_map(void)
{
+ LIBBPF_OPTS(bpf_map_create_opts, opts);
int inner_map_fd, outer_map_fd;
- inner_map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, sizeof(int),
- sizeof(int), 1, 0);
+ inner_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int),
+ sizeof(int), 1, NULL);
if (inner_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY))
return -1;
@@ -483,8 +649,9 @@ static int create_map_in_map(void)
return inner_map_fd;
}
- outer_map_fd = bpf_create_map_in_map(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
- sizeof(int), inner_map_fd, 1, 0);
+ opts.inner_map_fd = inner_map_fd;
+ outer_map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY_OF_MAPS, NULL,
+ sizeof(int), sizeof(int), 1, &opts);
if (outer_map_fd < 0) {
if (skip_unsupported_map(BPF_MAP_TYPE_ARRAY_OF_MAPS))
return -1;
@@ -503,8 +670,8 @@ static int create_cgroup_storage(bool percpu)
BPF_MAP_TYPE_CGROUP_STORAGE;
int fd;
- fd = bpf_create_map(type, sizeof(struct bpf_cgroup_storage_key),
- TEST_DATA_LEN, 0, 0);
+ fd = bpf_map_create(type, NULL, sizeof(struct bpf_cgroup_storage_key),
+ TEST_DATA_LEN, 0, NULL);
if (fd < 0) {
if (skip_unsupported_map(type))
return -1;
@@ -522,8 +689,22 @@ static int create_cgroup_storage(bool percpu)
* int cnt;
* struct bpf_spin_lock l;
* };
+ * struct bpf_timer {
+ * __u64 :64;
+ * __u64 :64;
+ * } __attribute__((aligned(8)));
+ * struct timer {
+ * struct bpf_timer t;
+ * };
+ * struct btf_ptr {
+ * struct prog_test_ref_kfunc __kptr_untrusted *ptr;
+ * struct prog_test_ref_kfunc __kptr *ptr;
+ * struct prog_test_member __kptr *ptr;
+ * }
*/
-static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l";
+static const char btf_str_sec[] = "\0bpf_spin_lock\0val\0cnt\0l\0bpf_timer\0timer\0t"
+ "\0btf_ptr\0prog_test_ref_kfunc\0ptr\0kptr\0kptr_untrusted"
+ "\0prog_test_member";
static __u32 btf_raw_types[] = {
/* int */
BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */
@@ -534,56 +715,104 @@ static __u32 btf_raw_types[] = {
BTF_TYPE_ENC(15, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 2), 8),
BTF_MEMBER_ENC(19, 1, 0), /* int cnt; */
BTF_MEMBER_ENC(23, 2, 32),/* struct bpf_spin_lock l; */
+ /* struct bpf_timer */ /* [4] */
+ BTF_TYPE_ENC(25, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0), 16),
+ /* struct timer */ /* [5] */
+ BTF_TYPE_ENC(35, BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 1), 16),
+ BTF_MEMBER_ENC(41, 4, 0), /* struct bpf_timer t; */
+ /* struct prog_test_ref_kfunc */ /* [6] */
+ BTF_STRUCT_ENC(51, 0, 0),
+ BTF_STRUCT_ENC(95, 0, 0), /* [7] */
+ /* type tag "kptr_untrusted" */
+ BTF_TYPE_TAG_ENC(80, 6), /* [8] */
+ /* type tag "kptr" */
+ BTF_TYPE_TAG_ENC(75, 6), /* [9] */
+ BTF_TYPE_TAG_ENC(75, 7), /* [10] */
+ BTF_PTR_ENC(8), /* [11] */
+ BTF_PTR_ENC(9), /* [12] */
+ BTF_PTR_ENC(10), /* [13] */
+ /* struct btf_ptr */ /* [14] */
+ BTF_STRUCT_ENC(43, 3, 24),
+ BTF_MEMBER_ENC(71, 11, 0), /* struct prog_test_ref_kfunc __kptr_untrusted *ptr; */
+ BTF_MEMBER_ENC(71, 12, 64), /* struct prog_test_ref_kfunc __kptr *ptr; */
+ BTF_MEMBER_ENC(71, 13, 128), /* struct prog_test_member __kptr *ptr; */
};
-static int load_btf(void)
+static char bpf_vlog[UINT_MAX >> 8];
+
+static int load_btf_spec(__u32 *types, int types_len,
+ const char *strings, int strings_len)
{
struct btf_header hdr = {
.magic = BTF_MAGIC,
.version = BTF_VERSION,
.hdr_len = sizeof(struct btf_header),
- .type_len = sizeof(btf_raw_types),
- .str_off = sizeof(btf_raw_types),
- .str_len = sizeof(btf_str_sec),
+ .type_len = types_len,
+ .str_off = types_len,
+ .str_len = strings_len,
};
void *ptr, *raw_btf;
int btf_fd;
+ LIBBPF_OPTS(bpf_btf_load_opts, opts,
+ .log_buf = bpf_vlog,
+ .log_size = sizeof(bpf_vlog),
+ .log_level = (verbose
+ ? verif_log_level
+ : DEFAULT_LIBBPF_LOG_LEVEL),
+ );
- ptr = raw_btf = malloc(sizeof(hdr) + sizeof(btf_raw_types) +
- sizeof(btf_str_sec));
+ raw_btf = malloc(sizeof(hdr) + types_len + strings_len);
+ ptr = raw_btf;
memcpy(ptr, &hdr, sizeof(hdr));
ptr += sizeof(hdr);
- memcpy(ptr, btf_raw_types, hdr.type_len);
+ memcpy(ptr, types, hdr.type_len);
ptr += hdr.type_len;
- memcpy(ptr, btf_str_sec, hdr.str_len);
+ memcpy(ptr, strings, hdr.str_len);
ptr += hdr.str_len;
- btf_fd = bpf_load_btf(raw_btf, ptr - raw_btf, 0, 0, 0);
- free(raw_btf);
+ btf_fd = bpf_btf_load(raw_btf, ptr - raw_btf, &opts);
if (btf_fd < 0)
- return -1;
- return btf_fd;
+ printf("Failed to load BTF spec: '%s'\n", strerror(errno));
+
+ free(raw_btf);
+
+ return btf_fd < 0 ? -1 : btf_fd;
+}
+
+static int load_btf(void)
+{
+ return load_btf_spec(btf_raw_types, sizeof(btf_raw_types),
+ btf_str_sec, sizeof(btf_str_sec));
+}
+
+static int load_btf_for_test(struct bpf_test *test)
+{
+ int types_num = 0;
+
+ while (types_num < MAX_BTF_TYPES &&
+ test->btf_types[types_num] != BTF_END_RAW)
+ ++types_num;
+
+ int types_len = types_num * sizeof(test->btf_types[0]);
+
+ return load_btf_spec(test->btf_types, types_len,
+ test->btf_strings, sizeof(test->btf_strings));
}
static int create_map_spin_lock(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_ARRAY,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 1,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 8, 1, &opts);
if (fd < 0)
printf("Failed to create map with spin_lock\n");
return fd;
@@ -591,33 +820,196 @@ static int create_map_spin_lock(void)
static int create_sk_storage_map(void)
{
- struct bpf_create_map_attr attr = {
- .name = "test_map",
- .map_type = BPF_MAP_TYPE_SK_STORAGE,
- .key_size = 4,
- .value_size = 8,
- .max_entries = 0,
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
.map_flags = BPF_F_NO_PREALLOC,
.btf_key_type_id = 1,
.btf_value_type_id = 3,
- };
+ );
int fd, btf_fd;
btf_fd = load_btf();
if (btf_fd < 0)
return -1;
- attr.btf_fd = btf_fd;
- fd = bpf_create_map_xattr(&attr);
- close(attr.btf_fd);
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_SK_STORAGE, "test_map", 4, 8, 0, &opts);
+ close(opts.btf_fd);
if (fd < 0)
printf("Failed to create sk_storage_map\n");
return fd;
}
-static char bpf_vlog[UINT_MAX >> 8];
+static int create_map_timer(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 5,
+ );
+ int fd, btf_fd;
+
+ btf_fd = load_btf();
+ if (btf_fd < 0)
+ return -1;
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 16, 1, &opts);
+ if (fd < 0)
+ printf("Failed to create map with timer\n");
+ return fd;
+}
+
+static int create_map_kptr(void)
+{
+ LIBBPF_OPTS(bpf_map_create_opts, opts,
+ .btf_key_type_id = 1,
+ .btf_value_type_id = 14,
+ );
+ int fd, btf_fd;
+
+ btf_fd = load_btf();
+ if (btf_fd < 0)
+ return -1;
+
+ opts.btf_fd = btf_fd;
+ fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "test_map", 4, 24, 1, &opts);
+ if (fd < 0)
+ printf("Failed to create map with btf_id pointer\n");
+ return fd;
+}
+
+static void set_root(bool set)
+{
+ __u64 caps;
+
+ if (set) {
+ if (cap_enable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ } else {
+ if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps))
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
+ }
+}
+
+static __u64 ptr_to_u64(const void *ptr)
+{
+ return (uintptr_t) ptr;
+}
+
+static struct btf *btf__load_testmod_btf(struct btf *vmlinux)
+{
+ struct bpf_btf_info info;
+ __u32 len = sizeof(info);
+ struct btf *btf = NULL;
+ char name[64];
+ __u32 id = 0;
+ int err, fd;
+
+ /* Iterate all loaded BTF objects and find bpf_testmod,
+ * we need SYS_ADMIN cap for that.
+ */
+ set_root(true);
+
+ while (true) {
+ err = bpf_btf_get_next_id(id, &id);
+ if (err) {
+ if (errno == ENOENT)
+ break;
+ perror("bpf_btf_get_next_id failed");
+ break;
+ }
+
+ fd = bpf_btf_get_fd_by_id(id);
+ if (fd < 0) {
+ if (errno == ENOENT)
+ continue;
+ perror("bpf_btf_get_fd_by_id failed");
+ break;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.name_len = sizeof(name);
+ info.name = ptr_to_u64(name);
+ len = sizeof(info);
+
+ err = bpf_obj_get_info_by_fd(fd, &info, &len);
+ if (err) {
+ close(fd);
+ perror("bpf_obj_get_info_by_fd failed");
+ break;
+ }
+
+ if (strcmp("bpf_testmod", name)) {
+ close(fd);
+ continue;
+ }
+
+ btf = btf__load_from_kernel_by_id_split(id, vmlinux);
+ if (!btf) {
+ close(fd);
+ break;
+ }
+
+ /* We need the fd to stay open so it can be used in fd_array.
+ * The final cleanup call to btf__free will free btf object
+ * and close the file descriptor.
+ */
+ btf__set_fd(btf, fd);
+ break;
+ }
+
+ set_root(false);
+ return btf;
+}
+
+static struct btf *testmod_btf;
+static struct btf *vmlinux_btf;
+
+static void kfuncs_cleanup(void)
+{
+ btf__free(testmod_btf);
+ btf__free(vmlinux_btf);
+}
+
+static void fixup_prog_kfuncs(struct bpf_insn *prog, int *fd_array,
+ struct kfunc_btf_id_pair *fixup_kfunc_btf_id)
+{
+ /* Patch in kfunc BTF IDs */
+ while (fixup_kfunc_btf_id->kfunc) {
+ int btf_id = 0;
+
+ /* try to find kfunc in kernel BTF */
+ vmlinux_btf = vmlinux_btf ?: btf__load_vmlinux_btf();
+ if (vmlinux_btf) {
+ btf_id = btf__find_by_name_kind(vmlinux_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ }
+
+ /* kfunc not found in kernel BTF, try bpf_testmod BTF */
+ if (!btf_id) {
+ testmod_btf = testmod_btf ?: btf__load_testmod_btf(vmlinux_btf);
+ if (testmod_btf) {
+ btf_id = btf__find_by_name_kind(testmod_btf,
+ fixup_kfunc_btf_id->kfunc,
+ BTF_KIND_FUNC);
+ btf_id = btf_id < 0 ? 0 : btf_id;
+ if (btf_id) {
+ /* We put bpf_testmod module fd into fd_array
+ * and its index 1 into instruction 'off'.
+ */
+ *fd_array = btf__fd(testmod_btf);
+ prog[fixup_kfunc_btf_id->insn_idx].off = 1;
+ }
+ }
+ }
+
+ prog[fixup_kfunc_btf_id->insn_idx].imm = btf_id;
+ fixup_kfunc_btf_id++;
+ }
+}
static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
- struct bpf_insn *prog, int *map_fds)
+ struct bpf_insn *prog, int *map_fds, int *fd_array)
{
int *fixup_map_hash_8b = test->fixup_map_hash_8b;
int *fixup_map_hash_48b = test->fixup_map_hash_48b;
@@ -639,6 +1031,9 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
int *fixup_sk_storage_map = test->fixup_sk_storage_map;
int *fixup_map_event_output = test->fixup_map_event_output;
int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
+ int *fixup_map_ringbuf = test->fixup_map_ringbuf;
+ int *fixup_map_timer = test->fixup_map_timer;
+ int *fixup_map_kptr = test->fixup_map_kptr;
if (test->fill_helper) {
test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -816,6 +1211,30 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
fixup_map_reuseport_array++;
} while (*fixup_map_reuseport_array);
}
+ if (*fixup_map_ringbuf) {
+ map_fds[20] = create_map(BPF_MAP_TYPE_RINGBUF, 0,
+ 0, getpagesize());
+ do {
+ prog[*fixup_map_ringbuf].imm = map_fds[20];
+ fixup_map_ringbuf++;
+ } while (*fixup_map_ringbuf);
+ }
+ if (*fixup_map_timer) {
+ map_fds[21] = create_map_timer();
+ do {
+ prog[*fixup_map_timer].imm = map_fds[21];
+ fixup_map_timer++;
+ } while (*fixup_map_timer);
+ }
+ if (*fixup_map_kptr) {
+ map_fds[22] = create_map_kptr();
+ do {
+ prog[*fixup_map_kptr].imm = map_fds[22];
+ fixup_map_kptr++;
+ } while (*fixup_map_kptr);
+ }
+
+ fixup_prog_kfuncs(prog, fd_array, test->fixup_kfunc_btf_id);
}
struct libcap {
@@ -825,47 +1244,19 @@ struct libcap {
static int set_admin(bool admin)
{
- cap_t caps;
- /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
- const cap_value_t cap_net_admin = CAP_NET_ADMIN;
- const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
- struct libcap *cap;
- int ret = -1;
-
- caps = cap_get_proc();
- if (!caps) {
- perror("cap_get_proc");
- return -1;
- }
- cap = (struct libcap *)caps;
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
- perror("cap_set_flag clear admin");
- goto out;
- }
- if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
- admin ? CAP_SET : CAP_CLEAR)) {
- perror("cap_set_flag set_or_clear net");
- goto out;
- }
- /* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
- * so update effective bits manually
- */
+ int err;
+
if (admin) {
- cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
- cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+ err = cap_enable_effective(ADMIN_CAPS, NULL);
+ if (err)
+ perror("cap_enable_effective(ADMIN_CAPS)");
} else {
- cap->data[1].effective &= ~(1 << (38 - 32));
- cap->data[1].effective &= ~(1 << (39 - 32));
+ err = cap_disable_effective(ADMIN_CAPS, NULL);
+ if (err)
+ perror("cap_disable_effective(ADMIN_CAPS)");
}
- if (cap_set_proc(caps)) {
- perror("cap_set_proc");
- goto out;
- }
- ret = 0;
-out:
- if (cap_free(caps))
- perror("cap_free");
- return ret;
+
+ return err;
}
static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
@@ -873,35 +1264,62 @@ static int do_prog_test_run(int fd_prog, bool unpriv, uint32_t expected_val,
{
__u8 tmp[TEST_DATA_LEN << 2];
__u32 size_tmp = sizeof(tmp);
- uint32_t retval;
- int err;
+ int err, saved_errno;
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = data,
+ .data_size_in = size_data,
+ .data_out = tmp,
+ .data_size_out = size_tmp,
+ .repeat = 1,
+ );
if (unpriv)
set_admin(true);
- err = bpf_prog_test_run(fd_prog, 1, data, size_data,
- tmp, &size_tmp, &retval, NULL);
+ err = bpf_prog_test_run_opts(fd_prog, &topts);
+ saved_errno = errno;
+
if (unpriv)
set_admin(false);
- if (err && errno != 524/*ENOTSUPP*/ && errno != EPERM) {
- printf("Unexpected bpf_prog_test_run error ");
- return err;
+
+ if (err) {
+ switch (saved_errno) {
+ case ENOTSUPP:
+ printf("Did not run the program (not supported) ");
+ return 0;
+ case EPERM:
+ if (unpriv) {
+ printf("Did not run the program (no permission) ");
+ return 0;
+ }
+ /* fallthrough; */
+ default:
+ printf("FAIL: Unexpected bpf_prog_test_run error (%s) ",
+ strerror(saved_errno));
+ return err;
+ }
}
- if (!err && retval != expected_val &&
- expected_val != POINTER_VALUE) {
- printf("FAIL retval %d != %d ", retval, expected_val);
+
+ if (topts.retval != expected_val && expected_val != POINTER_VALUE) {
+ printf("FAIL retval %d != %d ", topts.retval, expected_val);
return 1;
}
return 0;
}
+/* Returns true if every part of exp (tab-separated) appears in log, in order.
+ *
+ * If exp is an empty string, returns true.
+ */
static bool cmp_str_seq(const char *log, const char *exp)
{
- char needle[80];
+ char needle[200];
const char *p, *q;
int len;
do {
+ if (!strlen(exp))
+ break;
p = strchr(exp, '\t');
if (!p)
p = exp + strlen(exp);
@@ -915,7 +1333,7 @@ static bool cmp_str_seq(const char *log, const char *exp)
needle[len] = 0;
q = strstr(log, needle);
if (!q) {
- printf("FAIL\nUnexpected verifier log in successful load!\n"
+ printf("FAIL\nUnexpected verifier log!\n"
"EXP: %s\nRES:\n", needle);
return false;
}
@@ -925,27 +1343,205 @@ static bool cmp_str_seq(const char *log, const char *exp)
return true;
}
+static bool is_null_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn null_insn = {};
+
+ return memcmp(insn, &null_insn, sizeof(null_insn)) == 0;
+}
+
+static bool is_skip_insn(struct bpf_insn *insn)
+{
+ struct bpf_insn skip_insn = SKIP_INSNS();
+
+ return memcmp(insn, &skip_insn, sizeof(skip_insn)) == 0;
+}
+
+static int null_terminated_insn_len(struct bpf_insn *seq, int max_len)
+{
+ int i;
+
+ for (i = 0; i < max_len; ++i) {
+ if (is_null_insn(&seq[i]))
+ return i;
+ }
+ return max_len;
+}
+
+static bool compare_masked_insn(struct bpf_insn *orig, struct bpf_insn *masked)
+{
+ struct bpf_insn orig_masked;
+
+ memcpy(&orig_masked, orig, sizeof(orig_masked));
+ if (masked->imm == INSN_IMM_MASK)
+ orig_masked.imm = INSN_IMM_MASK;
+ if (masked->off == INSN_OFF_MASK)
+ orig_masked.off = INSN_OFF_MASK;
+
+ return memcmp(&orig_masked, masked, sizeof(orig_masked)) == 0;
+}
+
+static int find_insn_subseq(struct bpf_insn *seq, struct bpf_insn *subseq,
+ int seq_len, int subseq_len)
+{
+ int i, j;
+
+ if (subseq_len > seq_len)
+ return -1;
+
+ for (i = 0; i < seq_len - subseq_len + 1; ++i) {
+ bool found = true;
+
+ for (j = 0; j < subseq_len; ++j) {
+ if (!compare_masked_insn(&seq[i + j], &subseq[j])) {
+ found = false;
+ break;
+ }
+ }
+ if (found)
+ return i;
+ }
+
+ return -1;
+}
+
+static int find_skip_insn_marker(struct bpf_insn *seq, int len)
+{
+ int i;
+
+ for (i = 0; i < len; ++i)
+ if (is_skip_insn(&seq[i]))
+ return i;
+
+ return -1;
+}
+
+/* Return true if all sub-sequences in `subseqs` could be found in
+ * `seq` one after another. Sub-sequences are separated by a single
+ * nil instruction.
+ */
+static bool find_all_insn_subseqs(struct bpf_insn *seq, struct bpf_insn *subseqs,
+ int seq_len, int max_subseqs_len)
+{
+ int subseqs_len = null_terminated_insn_len(subseqs, max_subseqs_len);
+
+ while (subseqs_len > 0) {
+ int skip_idx = find_skip_insn_marker(subseqs, subseqs_len);
+ int cur_subseq_len = skip_idx < 0 ? subseqs_len : skip_idx;
+ int subseq_idx = find_insn_subseq(seq, subseqs,
+ seq_len, cur_subseq_len);
+
+ if (subseq_idx < 0)
+ return false;
+ seq += subseq_idx + cur_subseq_len;
+ seq_len -= subseq_idx + cur_subseq_len;
+ subseqs += cur_subseq_len + 1;
+ subseqs_len -= cur_subseq_len + 1;
+ }
+
+ return true;
+}
+
+static void print_insn(struct bpf_insn *buf, int cnt)
+{
+ int i;
+
+ printf(" addr op d s off imm\n");
+ for (i = 0; i < cnt; ++i) {
+ struct bpf_insn *insn = &buf[i];
+
+ if (is_null_insn(insn))
+ break;
+
+ if (is_skip_insn(insn))
+ printf(" ...\n");
+ else
+ printf(" %04x: %02x %1x %x %04hx %08x\n",
+ i, insn->code, insn->dst_reg,
+ insn->src_reg, insn->off, insn->imm);
+ }
+}
+
+static bool check_xlated_program(struct bpf_test *test, int fd_prog)
+{
+ struct bpf_insn *buf;
+ unsigned int cnt;
+ bool result = true;
+ bool check_expected = !is_null_insn(test->expected_insns);
+ bool check_unexpected = !is_null_insn(test->unexpected_insns);
+
+ if (!check_expected && !check_unexpected)
+ goto out;
+
+ if (get_xlated_program(fd_prog, &buf, &cnt)) {
+ printf("FAIL: can't get xlated program\n");
+ result = false;
+ goto out;
+ }
+
+ if (check_expected &&
+ !find_all_insn_subseqs(buf, test->expected_insns,
+ cnt, MAX_EXPECTED_INSNS)) {
+ printf("FAIL: can't find expected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Expected subsequence:\n");
+ print_insn(test->expected_insns, MAX_EXPECTED_INSNS);
+ }
+ }
+
+ if (check_unexpected &&
+ find_all_insn_subseqs(buf, test->unexpected_insns,
+ cnt, MAX_UNEXPECTED_INSNS)) {
+ printf("FAIL: found unexpected subsequence of instructions\n");
+ result = false;
+ if (verbose) {
+ printf("Program:\n");
+ print_insn(buf, cnt);
+ printf("Un-expected subsequence:\n");
+ print_insn(test->unexpected_insns, MAX_UNEXPECTED_INSNS);
+ }
+ }
+
+ free(buf);
+ out:
+ return result;
+}
+
static void do_test_single(struct bpf_test *test, bool unpriv,
int *passes, int *errors)
{
- int fd_prog, expected_ret, alignment_prevented_execution;
+ int fd_prog, btf_fd, expected_ret, alignment_prevented_execution;
int prog_len, prog_type = test->prog_type;
struct bpf_insn *prog = test->insns;
- struct bpf_load_program_attr attr;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
int run_errs, run_successes;
int map_fds[MAX_NR_MAPS];
const char *expected_err;
+ int fd_array[2] = { -1, -1 };
+ int saved_errno;
int fixup_skips;
__u32 pflags;
int i, err;
+ if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) {
+ printf("SKIP (requires BPF JIT)\n");
+ skips++;
+ sched_yield();
+ return;
+ }
+
+ fd_prog = -1;
for (i = 0; i < MAX_NR_MAPS; i++)
map_fds[i] = -1;
+ btf_fd = -1;
if (!prog_type)
prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
fixup_skips = skips;
- do_test_fixup(test, prog_type, prog, map_fds);
+ do_test_fixup(test, prog_type, prog, map_fds, &fd_array[1]);
if (test->fill_insns) {
prog = test->fill_insns;
prog_len = test->prog_len;
@@ -958,7 +1554,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (fixup_skips != skips)
return;
- pflags = BPF_F_TEST_RND_HI32;
+ pflags = testing_prog_flags();
if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT)
pflags |= BPF_F_STRICT_ALIGNMENT;
if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
@@ -970,33 +1566,74 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
test->result_unpriv : test->result;
expected_err = unpriv && test->errstr_unpriv ?
test->errstr_unpriv : test->errstr;
- memset(&attr, 0, sizeof(attr));
- attr.prog_type = prog_type;
- attr.expected_attach_type = test->expected_attach_type;
- attr.insns = prog;
- attr.insns_cnt = prog_len;
- attr.license = "GPL";
+
+ opts.expected_attach_type = test->expected_attach_type;
if (verbose)
- attr.log_level = 1;
+ opts.log_level = verif_log_level | 4; /* force stats */
else if (expected_ret == VERBOSE_ACCEPT)
- attr.log_level = 2;
+ opts.log_level = 2;
else
- attr.log_level = 4;
- attr.prog_flags = pflags;
+ opts.log_level = DEFAULT_LIBBPF_LOG_LEVEL;
+ opts.prog_flags = pflags;
+ if (fd_array[1] != -1)
+ opts.fd_array = &fd_array[0];
+
+ if ((prog_type == BPF_PROG_TYPE_TRACING ||
+ prog_type == BPF_PROG_TYPE_LSM) && test->kfunc) {
+ int attach_btf_id;
+
+ attach_btf_id = libbpf_find_vmlinux_btf_id(test->kfunc,
+ opts.expected_attach_type);
+ if (attach_btf_id < 0) {
+ printf("FAIL\nFailed to find BTF ID for '%s'!\n",
+ test->kfunc);
+ (*errors)++;
+ return;
+ }
+
+ opts.attach_btf_id = attach_btf_id;
+ }
+
+ if (test->btf_types[0] != 0) {
+ btf_fd = load_btf_for_test(test);
+ if (btf_fd < 0)
+ goto fail_log;
+ opts.prog_btf_fd = btf_fd;
+ }
- fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
- if (fd_prog < 0 && !bpf_probe_prog_type(prog_type, 0)) {
+ if (test->func_info_cnt != 0) {
+ opts.func_info = test->func_info;
+ opts.func_info_cnt = test->func_info_cnt;
+ opts.func_info_rec_size = sizeof(test->func_info[0]);
+ }
+
+ opts.log_buf = bpf_vlog;
+ opts.log_size = sizeof(bpf_vlog);
+ fd_prog = bpf_prog_load(prog_type, NULL, "GPL", prog, prog_len, &opts);
+ saved_errno = errno;
+
+ /* BPF_PROG_TYPE_TRACING requires more setup and
+ * bpf_probe_prog_type won't give correct answer
+ */
+ if (fd_prog < 0 && prog_type != BPF_PROG_TYPE_TRACING &&
+ !libbpf_probe_bpf_prog_type(prog_type, NULL)) {
printf("SKIP (unsupported program type %d)\n", prog_type);
skips++;
goto close_fds;
}
+ if (fd_prog < 0 && saved_errno == ENOTSUPP) {
+ printf("SKIP (program uses an unsupported feature)\n");
+ skips++;
+ goto close_fds;
+ }
+
alignment_prevented_execution = 0;
if (expected_ret == ACCEPT || expected_ret == VERBOSE_ACCEPT) {
if (fd_prog < 0) {
printf("FAIL\nFailed to load prog '%s'!\n",
- strerror(errno));
+ strerror(saved_errno));
goto fail_log;
}
#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
@@ -1012,14 +1649,14 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
printf("FAIL\nUnexpected success to load!\n");
goto fail_log;
}
- if (!expected_err || !strstr(bpf_vlog, expected_err)) {
+ if (!expected_err || !cmp_str_seq(bpf_vlog, expected_err)) {
printf("FAIL\nUnexpected error message!\n\tEXP: %s\n\tRES: %s\n",
expected_err, bpf_vlog);
goto fail_log;
}
}
- if (test->insn_processed) {
+ if (!unpriv && test->insn_processed) {
uint32_t insn_processed;
char *proc;
@@ -1035,9 +1672,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
if (verbose)
printf(", verifier log:\n%s", bpf_vlog);
+ if (!check_xlated_program(test, fd_prog))
+ goto fail_log;
+
run_errs = 0;
run_successes = 0;
- if (!alignment_prevented_execution && fd_prog >= 0) {
+ if (!alignment_prevented_execution && fd_prog >= 0 && test->runs >= 0) {
uint32_t expected_val;
int i;
@@ -1078,6 +1718,7 @@ close_fds:
if (test->fill_insns)
free(test->fill_insns);
close(fd_prog);
+ close(btf_fd);
for (i = 0; i < MAX_NR_MAPS; i++)
close(map_fds[i]);
sched_yield();
@@ -1090,51 +1731,35 @@ fail_log:
static bool is_admin(void)
{
- cap_flag_value_t net_priv = CAP_CLEAR;
- bool perfmon_priv = false;
- bool bpf_priv = false;
- struct libcap *cap;
- cap_t caps;
-
-#ifdef CAP_IS_SUPPORTED
- if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
- perror("cap_get_flag");
- return false;
- }
-#endif
- caps = cap_get_proc();
- if (!caps) {
- perror("cap_get_proc");
+ __u64 caps;
+
+ /* The test checks for finer cap as CAP_NET_ADMIN,
+ * CAP_PERFMON, and CAP_BPF instead of CAP_SYS_ADMIN.
+ * Thus, disable CAP_SYS_ADMIN at the beginning.
+ */
+ if (cap_disable_effective(1ULL << CAP_SYS_ADMIN, &caps)) {
+ perror("cap_disable_effective(CAP_SYS_ADMIN)");
return false;
}
- cap = (struct libcap *)caps;
- bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
- perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
- if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
- perror("cap_get_flag NET");
- if (cap_free(caps))
- perror("cap_free");
- return bpf_priv && perfmon_priv && net_priv == CAP_SET;
-}
-
-static void get_unpriv_disabled()
-{
- char buf[2];
- FILE *fd;
- fd = fopen("/proc/sys/"UNPRIV_SYSCTL, "r");
- if (!fd) {
- perror("fopen /proc/sys/"UNPRIV_SYSCTL);
- unpriv_disabled = true;
- return;
- }
- if (fgets(buf, 2, fd) == buf && atoi(buf))
- unpriv_disabled = true;
- fclose(fd);
+ return (caps & ADMIN_CAPS) == ADMIN_CAPS;
}
static bool test_as_unpriv(struct bpf_test *test)
{
+#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ /* Some architectures have strict alignment requirements. In
+ * that case, the BPF verifier detects if a program has
+ * unaligned accesses and rejects them. A user can pass
+ * BPF_F_ANY_ALIGNMENT to a program to override this
+ * check. That, however, will only work when a privileged user
+ * loads a program. An unprivileged user loading a program
+ * with this flag will be rejected prior entering the
+ * verifier.
+ */
+ if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS)
+ return false;
+#endif
return !test->prog_type ||
test->prog_type == BPF_PROG_TYPE_SOCKET_FILTER ||
test->prog_type == BPF_PROG_TYPE_CGROUP_SKB;
@@ -1144,6 +1769,12 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
{
int i, passes = 0, errors = 0;
+ /* ensure previous instance of the module is unloaded */
+ unload_bpf_testmod(verbose);
+
+ if (load_bpf_testmod(verbose))
+ return EXIT_FAILURE;
+
for (i = from; i < to; i++) {
struct bpf_test *test = &tests[i];
@@ -1171,6 +1802,9 @@ static int do_test(bool unpriv, unsigned int from, unsigned int to)
}
}
+ unload_bpf_testmod(verbose);
+ kfuncs_cleanup();
+
printf("Summary: %d PASSED, %d SKIPPED, %d FAILED\n", passes,
skips, errors);
return errors ? EXIT_FAILURE : EXIT_SUCCESS;
@@ -1185,6 +1819,13 @@ int main(int argc, char **argv)
if (argc > 1 && strcmp(argv[1], "-v") == 0) {
arg++;
verbose = true;
+ verif_log_level = 1;
+ argc--;
+ }
+ if (argc > 1 && strcmp(argv[1], "-vv") == 0) {
+ arg++;
+ verbose = true;
+ verif_log_level = 2;
argc--;
}
@@ -1205,13 +1846,18 @@ int main(int argc, char **argv)
}
}
- get_unpriv_disabled();
+ unpriv_disabled = get_unpriv_disabled();
if (unpriv && unpriv_disabled) {
printf("Cannot run as unprivileged user with sysctl %s.\n",
UNPRIV_SYSCTL);
return EXIT_FAILURE;
}
+ jit_disabled = !is_jit_enabled();
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
bpf_semi_rand_init();
return do_test(unpriv, from, to);
}
diff --git a/tools/testing/selftests/bpf/test_verifier_log.c b/tools/testing/selftests/bpf/test_verifier_log.c
deleted file mode 100644
index 8d6918c3b4a2..000000000000
--- a/tools/testing/selftests/bpf/test_verifier_log.c
+++ /dev/null
@@ -1,174 +0,0 @@
-#include <errno.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-#include <unistd.h>
-#include <sys/time.h>
-
-#include <linux/bpf.h>
-#include <linux/filter.h>
-#include <linux/unistd.h>
-
-#include <bpf/bpf.h>
-
-#include "bpf_rlimit.h"
-
-#define LOG_SIZE (1 << 20)
-
-#define err(str...) printf("ERROR: " str)
-
-static const struct bpf_insn code_sample[] = {
- /* We need a few instructions to pass the min log length */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
-};
-
-static inline __u64 ptr_to_u64(const void *ptr)
-{
- return (__u64) (unsigned long) ptr;
-}
-
-static int load(char *log, size_t log_len, int log_level)
-{
- union bpf_attr attr;
-
- bzero(&attr, sizeof(attr));
- attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insn_cnt = (__u32)(sizeof(code_sample) / sizeof(struct bpf_insn));
- attr.insns = ptr_to_u64(code_sample);
- attr.license = ptr_to_u64("GPL");
- attr.log_buf = ptr_to_u64(log);
- attr.log_size = log_len;
- attr.log_level = log_level;
-
- return syscall(__NR_bpf, BPF_PROG_LOAD, &attr, sizeof(attr));
-}
-
-static void check_ret(int ret, int exp_errno)
-{
- if (ret > 0) {
- close(ret);
- err("broken sample loaded successfully!?\n");
- exit(1);
- }
-
- if (!ret || errno != exp_errno) {
- err("Program load returned: ret:%d/errno:%d, expected ret:%d/errno:%d\n",
- ret, errno, -1, exp_errno);
- exit(1);
- }
-}
-
-static void check_ones(const char *buf, size_t len, const char *msg)
-{
- while (len--)
- if (buf[len] != 1) {
- err("%s", msg);
- exit(1);
- }
-}
-
-static void test_log_good(char *log, size_t buf_len, size_t log_len,
- size_t exp_len, int exp_errno, const char *full_log)
-{
- size_t len;
- int ret;
-
- memset(log, 1, buf_len);
-
- ret = load(log, log_len, 1);
- check_ret(ret, exp_errno);
-
- len = strnlen(log, buf_len);
- if (len == buf_len) {
- err("verifier did not NULL terminate the log\n");
- exit(1);
- }
- if (exp_len && len != exp_len) {
- err("incorrect log length expected:%zd have:%zd\n",
- exp_len, len);
- exit(1);
- }
-
- if (strchr(log, 1)) {
- err("verifier leaked a byte through\n");
- exit(1);
- }
-
- check_ones(log + len + 1, buf_len - len - 1,
- "verifier wrote bytes past NULL termination\n");
-
- if (memcmp(full_log, log, LOG_SIZE)) {
- err("log did not match expected output\n");
- exit(1);
- }
-}
-
-static void test_log_bad(char *log, size_t log_len, int log_level)
-{
- int ret;
-
- ret = load(log, log_len, log_level);
- check_ret(ret, EINVAL);
- if (log)
- check_ones(log, LOG_SIZE,
- "verifier touched log with bad parameters\n");
-}
-
-int main(int argc, char **argv)
-{
- char full_log[LOG_SIZE];
- char log[LOG_SIZE];
- size_t want_len;
- int i;
-
- memset(log, 1, LOG_SIZE);
-
- /* Test incorrect attr */
- printf("Test log_level 0...\n");
- test_log_bad(log, LOG_SIZE, 0);
-
- printf("Test log_size < 128...\n");
- test_log_bad(log, 15, 1);
-
- printf("Test log_buff = NULL...\n");
- test_log_bad(NULL, LOG_SIZE, 1);
-
- /* Test with log big enough */
- printf("Test oversized buffer...\n");
- test_log_good(full_log, LOG_SIZE, LOG_SIZE, 0, EACCES, full_log);
-
- want_len = strlen(full_log);
-
- printf("Test exact buffer...\n");
- test_log_good(log, LOG_SIZE, want_len + 2, want_len, EACCES, full_log);
-
- printf("Test undersized buffers...\n");
- for (i = 0; i < 64; i++) {
- full_log[want_len - i + 1] = 1;
- full_log[want_len - i] = 0;
-
- test_log_good(log, LOG_SIZE, want_len + 1 - i, want_len - i,
- ENOSPC, full_log);
- }
-
- printf("test_verifier_log: OK\n");
- return 0;
-}
diff --git a/tools/testing/selftests/bpf/test_xdp_features.sh b/tools/testing/selftests/bpf/test_xdp_features.sh
new file mode 100755
index 000000000000..0aa71c4455c0
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_features.sh
@@ -0,0 +1,107 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NS="ns1-$(mktemp -u XXXXXX)"
+readonly V0_IP4=10.10.0.11
+readonly V1_IP4=10.10.0.1
+readonly V0_IP6=2001:db8::11
+readonly V1_IP6=2001:db8::1
+
+ret=1
+
+setup() {
+ {
+ ip netns add ${NS}
+
+ ip link add v1 type veth peer name v0 netns ${NS}
+
+ ip link set v1 up
+ ip addr add $V1_IP4/24 dev v1
+ ip addr add $V1_IP6/64 nodad dev v1
+ ip -n ${NS} link set dev v0 up
+ ip -n ${NS} addr add $V0_IP4/24 dev v0
+ ip -n ${NS} addr add $V0_IP6/64 nodad dev v0
+
+ # Enable XDP mode and disable checksum offload
+ ethtool -K v1 gro on
+ ethtool -K v1 tx-checksumming off
+ ip netns exec ${NS} ethtool -K v0 gro on
+ ip netns exec ${NS} ethtool -K v0 tx-checksumming off
+ } > /dev/null 2>&1
+}
+
+cleanup() {
+ ip link del v1 2> /dev/null
+ ip netns del ${NS} 2> /dev/null
+ [ "$(pidof xdp_features)" = "" ] || kill $(pidof xdp_features) 2> /dev/null
+}
+
+wait_for_dut_server() {
+ while sleep 1; do
+ ss -tlp | grep -q xdp_features
+ [ $? -eq 0 ] && break
+ done
+}
+
+test_xdp_features() {
+ setup
+
+ ## XDP_PASS
+ ./xdp_features -f XDP_PASS -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_PASS \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_DROP
+ ./xdp_features -f XDP_DROP -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_DROP \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_ABORTED
+ ./xdp_features -f XDP_ABORTED -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_ABORTED \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_TX
+ ./xdp_features -f XDP_TX -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_TX \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_REDIRECT
+ ./xdp_features -f XDP_REDIRECT -D $V1_IP6 -T $V0_IP6 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_REDIRECT \
+ -D $V1_IP6 -C $V1_IP6 \
+ -T $V0_IP6 v0
+ [ $? -ne 0 ] && exit
+
+ ## XDP_NDO_XMIT
+ ./xdp_features -f XDP_NDO_XMIT -D ::ffff:$V1_IP4 -T ::ffff:$V0_IP4 v1 &
+ wait_for_dut_server
+ ip netns exec ${NS} ./xdp_features -t -f XDP_NDO_XMIT \
+ -D ::ffff:$V1_IP4 \
+ -C ::ffff:$V1_IP4 \
+ -T ::ffff:$V0_IP4 v0
+ ret=$?
+ cleanup
+}
+
+set -e
+trap cleanup 2 3 6 9
+
+test_xdp_features
+
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
index 637fcf4fe4e3..2740322c1878 100755
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ b/tools/testing/selftests/bpf/test_xdp_meta.sh
@@ -1,5 +1,11 @@
#!/bin/sh
+BPF_FILE="test_xdp_meta.bpf.o"
+# Kselftest framework requirement - SKIP code is 4.
+readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+
cleanup()
{
if [ "$?" = "0" ]; then
@@ -10,43 +16,43 @@ cleanup()
set +e
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
ip link set dev lo xdp off 2>/dev/null > /dev/null
if [ $? -ne 0 ];then
echo "selftests: [SKIP] Could not run test without the ip xdp support"
- exit 0
+ exit $KSFT_SKIP
fi
set -e
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
trap cleanup 0 2 3 6 9
ip link add veth1 type veth peer name veth2
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth2
+ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
+ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-ip netns exec ns1 tc qdisc add dev veth1 clsact
-ip netns exec ns2 tc qdisc add dev veth2 clsact
+ip netns exec ${NS1} tc qdisc add dev veth1 clsact
+ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-ip netns exec ns1 tc filter add dev veth1 ingress bpf da obj test_xdp_meta.o sec t
-ip netns exec ns2 tc filter add dev veth2 ingress bpf da obj test_xdp_meta.o sec t
+ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
+ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
-ip netns exec ns1 ip link set dev veth1 xdp obj test_xdp_meta.o sec x
-ip netns exec ns2 ip link set dev veth2 xdp obj test_xdp_meta.o sec x
+ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
+ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
-ip netns exec ns1 ip link set dev veth1 up
-ip netns exec ns2 ip link set dev veth2 up
+ip netns exec ${NS1} ip link set dev veth1 up
+ip netns exec ${NS2} ip link set dev veth2 up
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+ip netns exec ${NS1} ping -c 1 10.1.1.22
+ip netns exec ${NS2} ping -c 1 10.1.1.11
exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect.sh b/tools/testing/selftests/bpf/test_xdp_redirect.sh
index c4b17e08d431..0746a4fde9d3 100755
--- a/tools/testing/selftests/bpf/test_xdp_redirect.sh
+++ b/tools/testing/selftests/bpf/test_xdp_redirect.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# Create 2 namespaces with two veth peers, and
# forward packets in-between using generic XDP
#
@@ -10,52 +10,70 @@
# | xdp forwarding |
# ------------------
-cleanup()
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+ret=0
+
+setup()
{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_redirect [PASS]";
- else
- echo "selftests: test_xdp_redirect [FAILED]";
- fi
- set +e
+ local xdpmode=$1
+
+ ip netns add ${NS1}
+ ip netns add ${NS2}
+
+ ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ ip link add veth2 index 222 type veth peer name veth22 netns ${NS2}
+
+ ip link set veth1 up
+ ip link set veth2 up
+ ip -n ${NS1} link set dev veth11 up
+ ip -n ${NS2} link set dev veth22 up
+
+ ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ ip -n ${NS2} addr add 10.1.1.22/24 dev veth22
+}
+
+cleanup()
+{
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
-ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
- exit 0
-fi
-set -e
-
-ip netns add ns1
-ip netns add ns2
-
-trap cleanup 0 2 3 6 9
+test_xdp_redirect()
+{
+ local xdpmode=$1
-ip link add veth1 index 111 type veth peer name veth11
-ip link add veth2 index 222 type veth peer name veth22
+ setup
-ip link set veth11 netns ns1
-ip link set veth22 netns ns2
+ ip link set dev veth1 $xdpmode off &> /dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: test_xdp_redirect $xdpmode [SKIP]"
+ return 0
+ fi
-ip link set veth1 up
-ip link set veth2 up
+ ip -n ${NS1} link set veth11 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
+ ip -n ${NS2} link set veth22 $xdpmode obj xdp_dummy.bpf.o sec xdp &> /dev/null
+ ip link set dev veth1 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_222 &> /dev/null
+ ip link set dev veth2 $xdpmode obj test_xdp_redirect.bpf.o sec redirect_to_111 &> /dev/null
-ip netns exec ns1 ip addr add 10.1.1.11/24 dev veth11
-ip netns exec ns2 ip addr add 10.1.1.22/24 dev veth22
+ if ip netns exec ${NS1} ping -c 1 10.1.1.22 &> /dev/null &&
+ ip netns exec ${NS2} ping -c 1 10.1.1.11 &> /dev/null; then
+ echo "selftests: test_xdp_redirect $xdpmode [PASS]";
+ else
+ ret=1
+ echo "selftests: test_xdp_redirect $xdpmode [FAILED]";
+ fi
-ip netns exec ns1 ip link set dev veth11 up
-ip netns exec ns2 ip link set dev veth22 up
+ cleanup
+}
-ip link set dev veth1 xdpgeneric obj test_xdp_redirect.o sec redirect_to_222
-ip link set dev veth2 xdpgeneric obj test_xdp_redirect.o sec redirect_to_111
+set -e
+trap cleanup 2 3 6 9
-ip netns exec ns1 ping -c 1 10.1.1.22
-ip netns exec ns2 ping -c 1 10.1.1.11
+test_xdp_redirect xdpgeneric
+test_xdp_redirect xdpdrv
-exit 0
+exit $ret
diff --git a/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
new file mode 100755
index 000000000000..4c3c3fdd2d73
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xdp_redirect_multi.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test topology:
+# - - - - - - - - - - - - - - - - - - -
+# | veth1 veth2 veth3 | ns0
+# - -| - - - - - - | - - - - - - | - -
+# --------- --------- ---------
+# | veth0 | | veth0 | | veth0 |
+# --------- --------- ---------
+# ns1 ns2 ns3
+#
+# Test modules:
+# XDP modes: generic, native, native + egress_prog
+#
+# Test cases:
+# ARP: Testing BPF_F_BROADCAST, the ingress interface also should receive
+# the redirects.
+# ns1 -> gw: ns1, ns2, ns3, should receive the arp request
+# IPv4: Testing BPF_F_BROADCAST | BPF_F_EXCLUDE_INGRESS, the ingress
+# interface should not receive the redirects.
+# ns1 -> gw: ns1 should not receive, ns2, ns3 should receive redirects.
+# IPv6: Testing none flag, all the pkts should be redirected back
+# ping test: ns1 -> ns2 (block), echo requests will be redirect back
+# egress_prog:
+# all src mac should be egress interface's mac
+
+# netns numbers
+NUM=3
+IFACES=""
+DRV_MODE="xdpgeneric xdpdrv xdpegress"
+PASS=0
+FAIL=0
+LOG_DIR=$(mktemp -d)
+declare -a NS
+NS[0]="ns0-$(mktemp -u XXXXXX)"
+NS[1]="ns1-$(mktemp -u XXXXXX)"
+NS[2]="ns2-$(mktemp -u XXXXXX)"
+NS[3]="ns3-$(mktemp -u XXXXXX)"
+
+test_pass()
+{
+ echo "Pass: $@"
+ PASS=$((PASS + 1))
+}
+
+test_fail()
+{
+ echo "fail: $@"
+ FAIL=$((FAIL + 1))
+}
+
+clean_up()
+{
+ for i in $(seq 0 $NUM); do
+ ip netns del ${NS[$i]} 2> /dev/null
+ done
+}
+
+# Kselftest framework requirement - SKIP code is 4.
+check_env()
+{
+ ip link set dev lo xdpgeneric off &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without the ip xdpgeneric support"
+ exit 4
+ fi
+
+ which tcpdump &>/dev/null
+ if [ $? -ne 0 ];then
+ echo "selftests: [SKIP] Could not run test without tcpdump"
+ exit 4
+ fi
+}
+
+setup_ns()
+{
+ local mode=$1
+ IFACES=""
+
+ if [ "$mode" = "xdpegress" ]; then
+ mode="xdpdrv"
+ fi
+
+ ip netns add ${NS[0]}
+ for i in $(seq $NUM); do
+ ip netns add ${NS[$i]}
+ ip -n ${NS[$i]} link add veth0 type veth peer name veth$i netns ${NS[0]}
+ ip -n ${NS[$i]} link set veth0 up
+ ip -n ${NS[0]} link set veth$i up
+
+ ip -n ${NS[$i]} addr add 192.0.2.$i/24 dev veth0
+ ip -n ${NS[$i]} addr add 2001:db8::$i/64 dev veth0
+ # Add a neigh entry for IPv4 ping test
+ ip -n ${NS[$i]} neigh add 192.0.2.253 lladdr 00:00:00:00:00:01 dev veth0
+ ip -n ${NS[$i]} link set veth0 $mode obj \
+ xdp_dummy.bpf.o sec xdp &> /dev/null || \
+ { test_fail "Unable to load dummy xdp" && exit 1; }
+ IFACES="$IFACES veth$i"
+ veth_mac[$i]=$(ip -n ${NS[0]} link show veth$i | awk '/link\/ether/ {print $2}')
+ done
+}
+
+do_egress_tests()
+{
+ local mode=$1
+
+ # mac test
+ ip netns exec ${NS[2]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -e -i veth0 -nn -l -e &> ${LOG_DIR}/mac_ns1-3_${mode}.log &
+ sleep 0.5
+ ip netns exec ${NS[1]} ping 192.0.2.254 -i 0.1 -c 4 &> /dev/null
+ sleep 0.5
+ pkill tcpdump
+
+ # mac check
+ grep -q "${veth_mac[2]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-2_${mode}.log && \
+ test_pass "$mode mac ns1-2" || test_fail "$mode mac ns1-2"
+ grep -q "${veth_mac[3]} > ff:ff:ff:ff:ff:ff" ${LOG_DIR}/mac_ns1-3_${mode}.log && \
+ test_pass "$mode mac ns1-3" || test_fail "$mode mac ns1-3"
+}
+
+do_ping_tests()
+{
+ local mode=$1
+
+ # ping6 test: echo request should be redirect back to itself, not others
+ ip netns exec ${NS[1]} ip neigh add 2001:db8::2 dev veth0 lladdr 00:00:00:00:00:02
+
+ ip netns exec ${NS[1]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-1_${mode}.log &
+ ip netns exec ${NS[2]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-2_${mode}.log &
+ ip netns exec ${NS[3]} tcpdump -i veth0 -nn -l -e &> ${LOG_DIR}/ns1-3_${mode}.log &
+ sleep 0.5
+ # ARP test
+ ip netns exec ${NS[1]} arping -q -c 2 -I veth0 192.0.2.254
+ # IPv4 test
+ ip netns exec ${NS[1]} ping 192.0.2.253 -i 0.1 -c 4 &> /dev/null
+ # IPv6 test
+ ip netns exec ${NS[1]} ping6 2001:db8::2 -i 0.1 -c 2 &> /dev/null
+ sleep 0.5
+ pkill tcpdump
+
+ # All netns should receive the redirect arp requests
+ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-1" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-1"
+ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-2_${mode}.log) -eq 2 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-2" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-2"
+ [ $(grep -cF "who-has 192.0.2.254" ${LOG_DIR}/ns1-3_${mode}.log) -eq 2 ] && \
+ test_pass "$mode arp(F_BROADCAST) ns1-3" || \
+ test_fail "$mode arp(F_BROADCAST) ns1-3"
+
+ # ns1 should not receive the redirect echo request, others should
+ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-1"
+ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-2"
+ [ $(grep -c "ICMP echo request" ${LOG_DIR}/ns1-3_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3" || \
+ test_fail "$mode IPv4 (F_BROADCAST|F_EXCLUDE_INGRESS) ns1-3"
+
+ # ns1 should receive the echo request, ns2 should not
+ [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-1_${mode}.log) -eq 4 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-1" || \
+ test_fail "$mode IPv6 (no flags) ns1-1"
+ [ $(grep -c "ICMP6, echo request" ${LOG_DIR}/ns1-2_${mode}.log) -eq 0 ] && \
+ test_pass "$mode IPv6 (no flags) ns1-2" || \
+ test_fail "$mode IPv6 (no flags) ns1-2"
+}
+
+do_tests()
+{
+ local mode=$1
+ local drv_p
+
+ case ${mode} in
+ xdpdrv) drv_p="-N";;
+ xdpegress) drv_p="-X";;
+ xdpgeneric) drv_p="-S";;
+ esac
+
+ ip netns exec ${NS[0]} ./xdp_redirect_multi $drv_p $IFACES &> ${LOG_DIR}/xdp_redirect_${mode}.log &
+ xdp_pid=$!
+ sleep 1
+ if ! ps -p $xdp_pid > /dev/null; then
+ test_fail "$mode xdp_redirect_multi start failed"
+ return 1
+ fi
+
+ if [ "$mode" = "xdpegress" ]; then
+ do_egress_tests $mode
+ else
+ do_ping_tests $mode
+ fi
+
+ kill $xdp_pid
+}
+
+check_env
+
+trap clean_up EXIT
+
+for mode in ${DRV_MODE}; do
+ setup_ns $mode
+ do_tests $mode
+ clean_up
+done
+rm -rf ${LOG_DIR}
+
+echo "Summary: PASS $PASS, FAIL $FAIL"
+[ $FAIL -eq 0 ] && exit 0 || exit 1
diff --git a/tools/testing/selftests/bpf/test_xdp_veth.sh b/tools/testing/selftests/bpf/test_xdp_veth.sh
index ba8ffcdaac30..5211ca9a0239 100755
--- a/tools/testing/selftests/bpf/test_xdp_veth.sh
+++ b/tools/testing/selftests/bpf/test_xdp_veth.sh
@@ -22,6 +22,9 @@ ksft_skip=4
TESTNAME=xdp_veth
BPF_FS=$(awk '$3 == "bpf" {print $2; exit}' /proc/mounts)
BPF_DIR=$BPF_FS/test_$TESTNAME
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+readonly NS3="ns3-$(mktemp -u XXXXXX)"
_cleanup()
{
@@ -29,9 +32,9 @@ _cleanup()
ip link del veth1 2> /dev/null
ip link del veth2 2> /dev/null
ip link del veth3 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
- ip netns del ns3 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
+ ip netns del ${NS3} 2> /dev/null
rm -rf $BPF_DIR 2> /dev/null
}
@@ -77,42 +80,42 @@ set -e
trap cleanup_skip EXIT
-ip netns add ns1
-ip netns add ns2
-ip netns add ns3
+ip netns add ${NS1}
+ip netns add ${NS2}
+ip netns add ${NS3}
-ip link add veth1 index 111 type veth peer name veth11 netns ns1
-ip link add veth2 index 122 type veth peer name veth22 netns ns2
-ip link add veth3 index 133 type veth peer name veth33 netns ns3
+ip link add veth1 index 111 type veth peer name veth11 netns ${NS1}
+ip link add veth2 index 122 type veth peer name veth22 netns ${NS2}
+ip link add veth3 index 133 type veth peer name veth33 netns ${NS3}
ip link set veth1 up
ip link set veth2 up
ip link set veth3 up
-ip -n ns1 addr add 10.1.1.11/24 dev veth11
-ip -n ns3 addr add 10.1.1.33/24 dev veth33
+ip -n ${NS1} addr add 10.1.1.11/24 dev veth11
+ip -n ${NS3} addr add 10.1.1.33/24 dev veth33
-ip -n ns1 link set dev veth11 up
-ip -n ns2 link set dev veth22 up
-ip -n ns3 link set dev veth33 up
+ip -n ${NS1} link set dev veth11 up
+ip -n ${NS2} link set dev veth22 up
+ip -n ${NS3} link set dev veth33 up
mkdir $BPF_DIR
bpftool prog loadall \
- xdp_redirect_map.o $BPF_DIR/progs type xdp \
+ xdp_redirect_map.bpf.o $BPF_DIR/progs type xdp \
pinmaps $BPF_DIR/maps
bpftool map update pinned $BPF_DIR/maps/tx_port key 0 0 0 0 value 122 0 0 0
bpftool map update pinned $BPF_DIR/maps/tx_port key 1 0 0 0 value 133 0 0 0
bpftool map update pinned $BPF_DIR/maps/tx_port key 2 0 0 0 value 111 0 0 0
-ip link set dev veth1 xdp pinned $BPF_DIR/progs/redirect_map_0
-ip link set dev veth2 xdp pinned $BPF_DIR/progs/redirect_map_1
-ip link set dev veth3 xdp pinned $BPF_DIR/progs/redirect_map_2
+ip link set dev veth1 xdp pinned $BPF_DIR/progs/xdp_redirect_map_0
+ip link set dev veth2 xdp pinned $BPF_DIR/progs/xdp_redirect_map_1
+ip link set dev veth3 xdp pinned $BPF_DIR/progs/xdp_redirect_map_2
-ip -n ns1 link set dev veth11 xdp obj xdp_dummy.o sec xdp_dummy
-ip -n ns2 link set dev veth22 xdp obj xdp_tx.o sec tx
-ip -n ns3 link set dev veth33 xdp obj xdp_dummy.o sec xdp_dummy
+ip -n ${NS1} link set dev veth11 xdp obj xdp_dummy.bpf.o sec xdp
+ip -n ${NS2} link set dev veth22 xdp obj xdp_tx.bpf.o sec xdp
+ip -n ${NS3} link set dev veth33 xdp obj xdp_dummy.bpf.o sec xdp
trap cleanup EXIT
-ip netns exec ns1 ping -c 1 -W 1 10.1.1.33
+ip netns exec ${NS1} ping -c 1 -W 1 10.1.1.33
exit 0
diff --git a/tools/testing/selftests/bpf/test_xdp_vlan.sh b/tools/testing/selftests/bpf/test_xdp_vlan.sh
index bb8b0da91686..fbcaa9f0120b 100755
--- a/tools/testing/selftests/bpf/test_xdp_vlan.sh
+++ b/tools/testing/selftests/bpf/test_xdp_vlan.sh
@@ -2,6 +2,11 @@
# SPDX-License-Identifier: GPL-2.0
# Author: Jesper Dangaard Brouer <hawk@kernel.org>
+# Kselftest framework requirement - SKIP code is 4.
+readonly KSFT_SKIP=4
+readonly NS1="ns1-$(mktemp -u XXXXXX)"
+readonly NS2="ns2-$(mktemp -u XXXXXX)"
+
# Allow wrapper scripts to name test
if [ -z "$TESTNAME" ]; then
TESTNAME=xdp_vlan
@@ -46,15 +51,15 @@ cleanup()
if [ -n "$INTERACTIVE" ]; then
echo "Namespace setup still active explore with:"
- echo " ip netns exec ns1 bash"
- echo " ip netns exec ns2 bash"
+ echo " ip netns exec ${NS1} bash"
+ echo " ip netns exec ${NS2} bash"
exit $status
fi
set +e
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
}
# Using external program "getopt" to get --long-options
@@ -94,7 +99,7 @@ while true; do
-h | --help )
usage;
echo "selftests: $TESTNAME [SKIP] usage help info requested"
- exit 0
+ exit $KSFT_SKIP
;;
* )
shift
@@ -117,14 +122,14 @@ fi
ip link set dev lo xdpgeneric off 2>/dev/null > /dev/null
if [ $? -ne 0 ]; then
echo "selftests: $TESTNAME [SKIP] need ip xdp support"
- exit 0
+ exit $KSFT_SKIP
fi
# Interactive mode likely require us to cleanup netns
if [ -n "$INTERACTIVE" ]; then
ip link del veth1 2> /dev/null
- ip netns del ns1 2> /dev/null
- ip netns del ns2 2> /dev/null
+ ip netns del ${NS1} 2> /dev/null
+ ip netns del ${NS2} 2> /dev/null
fi
# Exit on failure
@@ -141,8 +146,8 @@ if [ -n "$VERBOSE" ]; then
fi
# Create two namespaces
-ip netns add ns1
-ip netns add ns2
+ip netns add ${NS1}
+ip netns add ${NS2}
# Run cleanup if failing or on kill
trap cleanup 0 2 3 6 9
@@ -151,67 +156,67 @@ trap cleanup 0 2 3 6 9
ip link add veth1 type veth peer name veth2
# Move veth1 and veth2 into the respective namespaces
-ip link set veth1 netns ns1
-ip link set veth2 netns ns2
+ip link set veth1 netns ${NS1}
+ip link set veth2 netns ${NS2}
# NOTICE: XDP require VLAN header inside packet payload
# - Thus, disable VLAN offloading driver features
# - For veth REMEMBER TX side VLAN-offload
#
# Disable rx-vlan-offload (mostly needed on ns1)
-ip netns exec ns1 ethtool -K veth1 rxvlan off
-ip netns exec ns2 ethtool -K veth2 rxvlan off
+ip netns exec ${NS1} ethtool -K veth1 rxvlan off
+ip netns exec ${NS2} ethtool -K veth2 rxvlan off
#
# Disable tx-vlan-offload (mostly needed on ns2)
-ip netns exec ns2 ethtool -K veth2 txvlan off
-ip netns exec ns1 ethtool -K veth1 txvlan off
+ip netns exec ${NS2} ethtool -K veth2 txvlan off
+ip netns exec ${NS1} ethtool -K veth1 txvlan off
export IPADDR1=100.64.41.1
export IPADDR2=100.64.41.2
# In ns1/veth1 add IP-addr on plain net_device
-ip netns exec ns1 ip addr add ${IPADDR1}/24 dev veth1
-ip netns exec ns1 ip link set veth1 up
+ip netns exec ${NS1} ip addr add ${IPADDR1}/24 dev veth1
+ip netns exec ${NS1} ip link set veth1 up
# In ns2/veth2 create VLAN device
export VLAN=4011
export DEVNS2=veth2
-ip netns exec ns2 ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
-ip netns exec ns2 ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
-ip netns exec ns2 ip link set $DEVNS2 up
-ip netns exec ns2 ip link set $DEVNS2.$VLAN up
+ip netns exec ${NS2} ip link add link $DEVNS2 name $DEVNS2.$VLAN type vlan id $VLAN
+ip netns exec ${NS2} ip addr add ${IPADDR2}/24 dev $DEVNS2.$VLAN
+ip netns exec ${NS2} ip link set $DEVNS2 up
+ip netns exec ${NS2} ip link set $DEVNS2.$VLAN up
# Bringup lo in netns (to avoids confusing people using --interactive)
-ip netns exec ns1 ip link set lo up
-ip netns exec ns2 ip link set lo up
+ip netns exec ${NS1} ip link set lo up
+ip netns exec ${NS2} ip link set lo up
# At this point, the hosts cannot reach each-other,
# because ns2 are using VLAN tags on the packets.
-ip netns exec ns2 sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
+ip netns exec ${NS2} sh -c 'ping -W 1 -c 1 100.64.41.1 || echo "Success: First ping must fail"'
# Now we can use the test_xdp_vlan.c program to pop/push these VLAN tags
# ----------------------------------------------------------------------
# In ns1: ingress use XDP to remove VLAN tags
export DEVNS1=veth1
-export FILE=test_xdp_vlan.o
+export BPF_FILE=test_xdp_vlan.bpf.o
# First test: Remove VLAN by setting VLAN ID 0, using "xdp_vlan_change"
export XDP_PROG=xdp_vlan_change
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
# In ns1: egress use TC to add back VLAN tag 4011
# (del cmd)
# tc qdisc del dev $DEVNS1 clsact 2> /dev/null
#
-ip netns exec ns1 tc qdisc add dev $DEVNS1 clsact
-ip netns exec ns1 tc filter add dev $DEVNS1 egress \
- prio 1 handle 1 bpf da obj $FILE sec tc_vlan_push
+ip netns exec ${NS1} tc qdisc add dev $DEVNS1 clsact
+ip netns exec ${NS1} tc filter add dev $DEVNS1 egress \
+ prio 1 handle 1 bpf da obj $BPF_FILE sec tc_vlan_push
# Now the namespaces can reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
# Second test: Replace xdp prog, that fully remove vlan header
#
@@ -220,9 +225,9 @@ ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
# ETH_P_8021Q indication, and this cause overwriting of our changes.
#
export XDP_PROG=xdp_vlan_remove_outer2
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE off
-ip netns exec ns1 ip link set $DEVNS1 $XDP_MODE object $FILE section $XDP_PROG
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE off
+ip netns exec ${NS1} ip link set $DEVNS1 $XDP_MODE object $BPF_FILE section $XDP_PROG
# Now the namespaces should still be able reach each-other, test with ping:
-ip netns exec ns2 ping -i 0.2 -W 2 -c 2 $IPADDR1
-ip netns exec ns1 ping -i 0.2 -W 2 -c 2 $IPADDR2
+ip netns exec ${NS2} ping -i 0.2 -W 2 -c 2 $IPADDR1
+ip netns exec ${NS1} ping -i 0.2 -W 2 -c 2 $IPADDR2
diff --git a/tools/testing/selftests/bpf/test_xdping.sh b/tools/testing/selftests/bpf/test_xdping.sh
index c2f0ddb45531..c3d82e0a7378 100755
--- a/tools/testing/selftests/bpf/test_xdping.sh
+++ b/tools/testing/selftests/bpf/test_xdping.sh
@@ -95,5 +95,9 @@ for server_args in "" "-I veth0 -s -S" ; do
test "$client_args" "$server_args"
done
+# Test drv mode
+test "-I veth1 -N" "-I veth0 -s -N"
+test "-I veth1 -N -c 10" "-I veth0 -s -N"
+
echo "OK. All tests passed"
exit 0
diff --git a/tools/testing/selftests/bpf/test_xsk.sh b/tools/testing/selftests/bpf/test_xsk.sh
new file mode 100755
index 000000000000..65aafe0003db
--- /dev/null
+++ b/tools/testing/selftests/bpf/test_xsk.sh
@@ -0,0 +1,244 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation, Weqaar Janjua <weqaar.a.janjua@intel.com>
+
+# AF_XDP selftests based on veth
+#
+# End-to-end AF_XDP over Veth test
+#
+# Topology:
+# ---------
+# -----------
+# _ | Process | _
+# / ----------- \
+# / | \
+# / | \
+# ----------- | -----------
+# | Thread1 | | | Thread2 |
+# ----------- | -----------
+# | | |
+# ----------- | -----------
+# | xskX | | | xskY |
+# ----------- | -----------
+# | | |
+# ----------- | ----------
+# | vethX | --------- | vethY |
+# ----------- peer ----------
+#
+# AF_XDP is an address family optimized for high performance packet processing,
+# it is XDP’s user-space interface.
+#
+# An AF_XDP socket is linked to a single UMEM which is a region of virtual
+# contiguous memory, divided into equal-sized frames.
+#
+# Refer to AF_XDP Kernel Documentation for detailed information:
+# https://www.kernel.org/doc/html/latest/networking/af_xdp.html
+#
+# Prerequisites setup by script:
+#
+# Set up veth interfaces as per the topology shown ^^:
+# * setup two veth interfaces
+# ** veth<xxxx>
+# ** veth<yyyy>
+# *** xxxx and yyyy are randomly generated 4 digit numbers used to avoid
+# conflict with any existing interface
+# * tests the veth and xsk layers of the topology
+#
+# See the source xskxceiver.c for information on each test
+#
+# Kernel configuration:
+# ---------------------
+# See "config" file for recommended kernel config options.
+#
+# Turn on XDP sockets and veth support when compiling i.e.
+# Networking support -->
+# Networking options -->
+# [ * ] XDP sockets
+#
+# Executing Tests:
+# ----------------
+# Must run with CAP_NET_ADMIN capability.
+#
+# Run:
+# sudo ./test_xsk.sh
+#
+# If running from kselftests:
+# sudo make run_tests
+#
+# Run with verbose output:
+# sudo ./test_xsk.sh -v
+#
+# Set up veth interfaces and leave them up so xskxceiver can be launched in a debugger:
+# sudo ./test_xsk.sh -d
+#
+# Run test suite for physical device in loopback mode
+# sudo ./test_xsk.sh -i IFACE
+#
+# Run test suite in a specific mode only [skb,drv,zc]
+# sudo ./test_xsk.sh -m MODE
+#
+# List available tests
+# ./test_xsk.sh -l
+#
+# Run a specific test from the test suite
+# sudo ./test_xsk.sh -t TEST_NAME
+#
+# Display the available command line options
+# ./test_xsk.sh -h
+
+. xsk_prereqs.sh
+
+ETH=""
+
+while getopts "vi:dm:lt:h" flag
+do
+ case "${flag}" in
+ v) verbose=1;;
+ d) debug=1;;
+ i) ETH=${OPTARG};;
+ m) MODE=${OPTARG};;
+ l) list=1;;
+ t) TEST=${OPTARG};;
+ h) help=1;;
+ esac
+done
+
+TEST_NAME="PREREQUISITES"
+
+URANDOM=/dev/urandom
+[ ! -e "${URANDOM}" ] && { echo "${URANDOM} not found. Skipping tests."; test_exit $ksft_fail; }
+
+VETH0_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH0=ve${VETH0_POSTFIX}
+VETH1_POSTFIX=$(cat ${URANDOM} | tr -dc '0-9' | fold -w 256 | head -n 1 | head --bytes 4)
+VETH1=ve${VETH1_POSTFIX}
+MTU=1500
+
+trap ctrl_c INT
+
+function ctrl_c() {
+ cleanup_exit ${VETH0} ${VETH1}
+ exit 1
+}
+
+setup_vethPairs() {
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH0}"
+ fi
+ ip link add ${VETH0} numtxqueues 4 numrxqueues 4 type veth peer name ${VETH1} numtxqueues 4 numrxqueues 4
+ if [ -f /proc/net/if_inet6 ]; then
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH0}/disable_ipv6
+ echo 1 > /proc/sys/net/ipv6/conf/${VETH1}/disable_ipv6
+ fi
+ if [[ $verbose -eq 1 ]]; then
+ echo "setting up ${VETH1}"
+ fi
+
+ if [[ $busy_poll -eq 1 ]]; then
+ echo 2 > /sys/class/net/${VETH0}/napi_defer_hard_irqs
+ echo 200000 > /sys/class/net/${VETH0}/gro_flush_timeout
+ echo 2 > /sys/class/net/${VETH1}/napi_defer_hard_irqs
+ echo 200000 > /sys/class/net/${VETH1}/gro_flush_timeout
+ fi
+
+ ip link set ${VETH1} mtu ${MTU}
+ ip link set ${VETH0} mtu ${MTU}
+ ip link set ${VETH1} up
+ ip link set ${VETH0} up
+}
+
+if [[ $list -eq 1 ]]; then
+ ./${XSKOBJ} -l
+ exit
+fi
+
+if [[ $help -eq 1 ]]; then
+ ./${XSKOBJ}
+ exit
+fi
+
+if [ ! -z $ETH ]; then
+ VETH0=${ETH}
+ VETH1=${ETH}
+else
+ validate_root_exec
+ validate_veth_support ${VETH0}
+ validate_ip_utility
+ setup_vethPairs
+
+ retval=$?
+ if [ $retval -ne 0 ]; then
+ test_status $retval "${TEST_NAME}"
+ cleanup_exit ${VETH0} ${VETH1}
+ exit $retval
+ fi
+fi
+
+
+if [[ $verbose -eq 1 ]]; then
+ ARGS+="-v "
+fi
+
+if [ -n "$MODE" ]; then
+ ARGS+="-m ${MODE} "
+fi
+
+if [ -n "$TEST" ]; then
+ ARGS+="-t ${TEST} "
+fi
+
+retval=$?
+test_status $retval "${TEST_NAME}"
+
+## START TESTS
+
+statusList=()
+
+TEST_NAME="XSK_SELFTESTS_${VETH0}_SOFTIRQ"
+
+if [[ $debug -eq 1 ]]; then
+ echo "-i" ${VETH0} "-i" ${VETH1}
+ exit
+fi
+
+exec_xskxceiver
+
+if [ -z $ETH ]; then
+ cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
+fi
+
+if [[ $list -eq 1 ]]; then
+ exit
+fi
+
+TEST_NAME="XSK_SELFTESTS_${VETH0}_BUSY_POLL"
+busy_poll=1
+
+if [ -z $ETH ]; then
+ setup_vethPairs
+fi
+exec_xskxceiver
+
+## END TESTS
+
+if [ -z $ETH ]; then
+ cleanup_exit ${VETH0} ${VETH1}
+else
+ cleanup_iface ${ETH} ${MTU}
+fi
+
+failures=0
+echo -e "\nSummary:"
+for i in "${!statusList[@]}"
+do
+ if [ ${statusList[$i]} -ne 0 ]; then
+ test_status ${statusList[$i]} ${nameList[$i]}
+ failures=1
+ fi
+done
+
+if [ $failures -eq 0 ]; then
+ echo "All tests successful!"
+fi
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
index 0af6337a8962..28b6646662af 100644
--- a/tools/testing/selftests/bpf/testing_helpers.c
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -1,8 +1,15 @@
// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2019 Netronome Systems, Inc. */
/* Copyright (C) 2020 Facebook, Inc. */
+#include <ctype.h>
#include <stdlib.h>
+#include <string.h>
#include <errno.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "test_progs.h"
#include "testing_helpers.h"
+#include <linux/membarrier.h>
int parse_num_list(const char *s, bool **num_set, int *num_set_len)
{
@@ -56,7 +63,7 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
set[i] = true;
}
- if (!set)
+ if (!set || parsing_end)
return -EINVAL;
*num_set = set;
@@ -64,3 +71,407 @@ int parse_num_list(const char *s, bool **num_set, int *num_set_len)
return 0;
}
+
+static int do_insert_test(struct test_filter_set *set,
+ char *test_str,
+ char *subtest_str)
+{
+ struct test_filter *tmp, *test;
+ char **ctmp;
+ int i;
+
+ for (i = 0; i < set->cnt; i++) {
+ test = &set->tests[i];
+
+ if (strcmp(test_str, test->name) == 0) {
+ free(test_str);
+ goto subtest;
+ }
+ }
+
+ tmp = realloc(set->tests, sizeof(*test) * (set->cnt + 1));
+ if (!tmp)
+ return -ENOMEM;
+
+ set->tests = tmp;
+ test = &set->tests[set->cnt];
+
+ test->name = test_str;
+ test->subtests = NULL;
+ test->subtest_cnt = 0;
+
+ set->cnt++;
+
+subtest:
+ if (!subtest_str)
+ return 0;
+
+ for (i = 0; i < test->subtest_cnt; i++) {
+ if (strcmp(subtest_str, test->subtests[i]) == 0) {
+ free(subtest_str);
+ return 0;
+ }
+ }
+
+ ctmp = realloc(test->subtests,
+ sizeof(*test->subtests) * (test->subtest_cnt + 1));
+ if (!ctmp)
+ return -ENOMEM;
+
+ test->subtests = ctmp;
+ test->subtests[test->subtest_cnt] = subtest_str;
+
+ test->subtest_cnt++;
+
+ return 0;
+}
+
+static int insert_test(struct test_filter_set *set,
+ char *test_spec,
+ bool is_glob_pattern)
+{
+ char *pattern, *subtest_str, *ext_test_str, *ext_subtest_str = NULL;
+ int glob_chars = 0;
+
+ if (is_glob_pattern) {
+ pattern = "%s";
+ } else {
+ pattern = "*%s*";
+ glob_chars = 2;
+ }
+
+ subtest_str = strchr(test_spec, '/');
+ if (subtest_str) {
+ *subtest_str = '\0';
+ subtest_str += 1;
+ }
+
+ ext_test_str = malloc(strlen(test_spec) + glob_chars + 1);
+ if (!ext_test_str)
+ goto err;
+
+ sprintf(ext_test_str, pattern, test_spec);
+
+ if (subtest_str) {
+ ext_subtest_str = malloc(strlen(subtest_str) + glob_chars + 1);
+ if (!ext_subtest_str)
+ goto err;
+
+ sprintf(ext_subtest_str, pattern, subtest_str);
+ }
+
+ return do_insert_test(set, ext_test_str, ext_subtest_str);
+
+err:
+ free(ext_test_str);
+ free(ext_subtest_str);
+
+ return -ENOMEM;
+}
+
+int parse_test_list_file(const char *path,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *buf = NULL, *capture_start, *capture_end, *scan_end;
+ size_t buflen = 0;
+ int err = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", path, err);
+ return err;
+ }
+
+ while (getline(&buf, &buflen, f) != -1) {
+ capture_start = buf;
+
+ while (isspace(*capture_start))
+ ++capture_start;
+
+ capture_end = capture_start;
+ scan_end = capture_start;
+
+ while (*scan_end && *scan_end != '#') {
+ if (!isspace(*scan_end))
+ capture_end = scan_end;
+
+ ++scan_end;
+ }
+
+ if (capture_end == capture_start)
+ continue;
+
+ *(++capture_end) = '\0';
+
+ err = insert_test(set, capture_start, is_glob_pattern);
+ if (err)
+ break;
+ }
+
+ fclose(f);
+ return err;
+}
+
+int parse_test_list(const char *s,
+ struct test_filter_set *set,
+ bool is_glob_pattern)
+{
+ char *input, *state = NULL, *test_spec;
+ int err = 0;
+
+ input = strdup(s);
+ if (!input)
+ return -ENOMEM;
+
+ while ((test_spec = strtok_r(state ? NULL : input, ",", &state))) {
+ err = insert_test(set, test_spec, is_glob_pattern);
+ if (err)
+ break;
+ }
+
+ free(input);
+ return err;
+}
+
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info)
+{
+ __u32 info_len = sizeof(*info);
+ int err;
+
+ memset(info, 0, sizeof(*info));
+ err = bpf_link_get_info_by_fd(bpf_link__fd(link), info, &info_len);
+ if (err) {
+ printf("failed to get link info: %d\n", -errno);
+ return 0;
+ }
+ return info->prog_id;
+}
+
+int extra_prog_load_log_flags = 0;
+
+int testing_prog_flags(void)
+{
+ static int cached_flags = -1;
+ static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS };
+ static struct bpf_insn insns[] = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ };
+ int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0;
+ LIBBPF_OPTS(bpf_prog_load_opts, opts);
+
+ if (cached_flags >= 0)
+ return cached_flags;
+
+ for (i = 0; i < ARRAY_SIZE(prog_flags); i++) {
+ opts.prog_flags = prog_flags[i];
+ fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL",
+ insns, insn_cnt, &opts);
+ if (fd >= 0) {
+ flags |= prog_flags[i];
+ close(fd);
+ }
+ }
+
+ cached_flags = flags;
+ return cached_flags;
+}
+
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd)
+{
+ LIBBPF_OPTS(bpf_object_open_opts, opts,
+ .kernel_log_level = extra_prog_load_log_flags,
+ );
+ struct bpf_object *obj;
+ struct bpf_program *prog;
+ __u32 flags;
+ int err;
+
+ obj = bpf_object__open_file(file, &opts);
+ if (!obj)
+ return -errno;
+
+ prog = bpf_object__next_program(obj, NULL);
+ if (!prog) {
+ err = -ENOENT;
+ goto err_out;
+ }
+
+ if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type)
+ bpf_program__set_type(prog, type);
+
+ flags = bpf_program__flags(prog) | testing_prog_flags();
+ bpf_program__set_flags(prog, flags);
+
+ err = bpf_object__load(obj);
+ if (err)
+ goto err_out;
+
+ *pobj = obj;
+ *prog_fd = bpf_program__fd(prog);
+
+ return 0;
+err_out:
+ bpf_object__close(obj);
+ return err;
+}
+
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz)
+{
+ LIBBPF_OPTS(bpf_prog_load_opts, opts,
+ .kern_version = kern_version,
+ .prog_flags = testing_prog_flags(),
+ .log_level = extra_prog_load_log_flags,
+ .log_buf = log_buf,
+ .log_size = log_buf_sz,
+ );
+
+ return bpf_prog_load(type, NULL, license, insns, insns_cnt, &opts);
+}
+
+__u64 read_perf_max_sample_freq(void)
+{
+ __u64 sample_freq = 5000; /* fallback to 5000 on error */
+ FILE *f;
+
+ f = fopen("/proc/sys/kernel/perf_event_max_sample_rate", "r");
+ if (f == NULL) {
+ printf("Failed to open /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ return sample_freq;
+ }
+ if (fscanf(f, "%llu", &sample_freq) != 1) {
+ printf("Failed to parse /proc/sys/kernel/perf_event_max_sample_rate: err %d\n"
+ "return default value: 5000\n", -errno);
+ }
+
+ fclose(f);
+ return sample_freq;
+}
+
+int finit_module(int fd, const char *param_values, int flags)
+{
+ return syscall(__NR_finit_module, fd, param_values, flags);
+}
+
+int delete_module(const char *name, int flags)
+{
+ return syscall(__NR_delete_module, name, flags);
+}
+
+int unload_bpf_testmod(bool verbose)
+{
+ if (kern_sync_rcu())
+ fprintf(stdout, "Failed to trigger kernel-side RCU sync!\n");
+ if (delete_module("bpf_testmod", 0)) {
+ if (errno == ENOENT) {
+ if (verbose)
+ fprintf(stdout, "bpf_testmod.ko is already unloaded.\n");
+ return -1;
+ }
+ fprintf(stdout, "Failed to unload bpf_testmod.ko from kernel: %d\n", -errno);
+ return -1;
+ }
+ if (verbose)
+ fprintf(stdout, "Successfully unloaded bpf_testmod.ko.\n");
+ return 0;
+}
+
+int load_bpf_testmod(bool verbose)
+{
+ int fd;
+
+ if (verbose)
+ fprintf(stdout, "Loading bpf_testmod.ko...\n");
+
+ fd = open("bpf_testmod.ko", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stdout, "Can't find bpf_testmod.ko kernel module: %d\n", -errno);
+ return -ENOENT;
+ }
+ if (finit_module(fd, "", 0)) {
+ fprintf(stdout, "Failed to load bpf_testmod.ko into the kernel: %d\n", -errno);
+ close(fd);
+ return -EINVAL;
+ }
+ close(fd);
+
+ if (verbose)
+ fprintf(stdout, "Successfully loaded bpf_testmod.ko.\n");
+ return 0;
+}
+
+/*
+ * Trigger synchronize_rcu() in kernel.
+ */
+int kern_sync_rcu(void)
+{
+ return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0);
+}
+
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt)
+{
+ __u32 buf_element_size = sizeof(struct bpf_insn);
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ __u32 xlated_prog_len;
+
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("bpf_prog_get_info_by_fd failed");
+ return -1;
+ }
+
+ xlated_prog_len = info.xlated_prog_len;
+ if (xlated_prog_len % buf_element_size) {
+ printf("Program length %u is not multiple of %u\n",
+ xlated_prog_len, buf_element_size);
+ return -1;
+ }
+
+ *cnt = xlated_prog_len / buf_element_size;
+ *buf = calloc(*cnt, buf_element_size);
+ if (!buf) {
+ perror("can't allocate xlated program buffer");
+ return -ENOMEM;
+ }
+
+ bzero(&info, sizeof(info));
+ info.xlated_prog_len = xlated_prog_len;
+ info.xlated_prog_insns = (__u64)(unsigned long)*buf;
+ if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) {
+ perror("second bpf_prog_get_info_by_fd failed");
+ goto out_free_buf;
+ }
+
+ return 0;
+
+out_free_buf:
+ free(*buf);
+ *buf = NULL;
+ return -1;
+}
+
+bool is_jit_enabled(void)
+{
+ const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable";
+ bool enabled = false;
+ int sysctl_fd;
+
+ sysctl_fd = open(jit_sysctl, O_RDONLY);
+ if (sysctl_fd != -1) {
+ char tmpc;
+
+ if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1)
+ enabled = (tmpc != '0');
+ close(sysctl_fd);
+ }
+
+ return enabled;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
index 923b51762759..d55f6ab12433 100644
--- a/tools/testing/selftests/bpf/testing_helpers.h
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -1,5 +1,59 @@
/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
/* Copyright (C) 2020 Facebook, Inc. */
+
+#ifndef __TESTING_HELPERS_H
+#define __TESTING_HELPERS_H
+
#include <stdbool.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <time.h>
+
+#define __TO_STR(x) #x
+#define TO_STR(x) __TO_STR(x)
int parse_num_list(const char *s, bool **set, int *set_len);
+__u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info);
+int bpf_prog_test_load(const char *file, enum bpf_prog_type type,
+ struct bpf_object **pobj, int *prog_fd);
+int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,
+ size_t insns_cnt, const char *license,
+ __u32 kern_version, char *log_buf,
+ size_t log_buf_sz);
+
+/*
+ * below function is exported for testing in prog_test test
+ */
+struct test_filter_set;
+int parse_test_list(const char *s,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
+int parse_test_list_file(const char *path,
+ struct test_filter_set *test_set,
+ bool is_glob_pattern);
+
+__u64 read_perf_max_sample_freq(void);
+int load_bpf_testmod(bool verbose);
+int unload_bpf_testmod(bool verbose);
+int kern_sync_rcu(void);
+int finit_module(int fd, const char *param_values, int flags);
+int delete_module(const char *name, int flags);
+
+static inline __u64 get_time_ns(void)
+{
+ struct timespec t;
+
+ clock_gettime(CLOCK_MONOTONIC, &t);
+
+ return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+struct bpf_insn;
+/* Request BPF program instructions after all rewrites are applied,
+ * e.g. verifier.c:convert_ctx_access() is done.
+ */
+int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt);
+int testing_prog_flags(void);
+bool is_jit_enabled(void);
+
+#endif /* __TESTING_HELPERS_H */
diff --git a/tools/testing/selftests/bpf/trace_helpers.c b/tools/testing/selftests/bpf/trace_helpers.c
index 4d0e913bbb22..27fd7ed3e4b0 100644
--- a/tools/testing/selftests/bpf/trace_helpers.c
+++ b/tools/testing/selftests/bpf/trace_helpers.c
@@ -1,4 +1,5 @@
// SPDX-License-Identifier: GPL-2.0
+#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
@@ -6,95 +7,208 @@
#include <errno.h>
#include <fcntl.h>
#include <poll.h>
+#include <pthread.h>
#include <unistd.h>
#include <linux/perf_event.h>
#include <sys/mman.h>
#include "trace_helpers.h"
+#include <linux/limits.h>
+#include <libelf.h>
+#include <gelf.h>
+#include "bpf/libbpf_internal.h"
-#define DEBUGFS "/sys/kernel/debug/tracing/"
+#define TRACEFS_PIPE "/sys/kernel/tracing/trace_pipe"
+#define DEBUGFS_PIPE "/sys/kernel/debug/tracing/trace_pipe"
-#define MAX_SYMS 300000
-static struct ksym syms[MAX_SYMS];
-static int sym_cnt;
+struct ksyms {
+ struct ksym *syms;
+ size_t sym_cap;
+ size_t sym_cnt;
+};
+
+static struct ksyms *ksyms;
+static pthread_mutex_t ksyms_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static int ksyms__add_symbol(struct ksyms *ksyms, const char *name,
+ unsigned long addr)
+{
+ void *tmp;
+
+ tmp = strdup(name);
+ if (!tmp)
+ return -ENOMEM;
+ ksyms->syms[ksyms->sym_cnt].addr = addr;
+ ksyms->syms[ksyms->sym_cnt].name = tmp;
+ ksyms->sym_cnt++;
+ return 0;
+}
+
+void free_kallsyms_local(struct ksyms *ksyms)
+{
+ unsigned int i;
+
+ if (!ksyms)
+ return;
+
+ if (!ksyms->syms) {
+ free(ksyms);
+ return;
+ }
+
+ for (i = 0; i < ksyms->sym_cnt; i++)
+ free(ksyms->syms[i].name);
+ free(ksyms->syms);
+ free(ksyms);
+}
static int ksym_cmp(const void *p1, const void *p2)
{
return ((struct ksym *)p1)->addr - ((struct ksym *)p2)->addr;
}
-int load_kallsyms(void)
+struct ksyms *load_kallsyms_local(void)
{
- FILE *f = fopen("/proc/kallsyms", "r");
+ FILE *f;
char func[256], buf[256];
char symbol;
void *addr;
- int i = 0;
+ int ret;
+ struct ksyms *ksyms;
+ f = fopen("/proc/kallsyms", "r");
if (!f)
- return -ENOENT;
+ return NULL;
+
+ ksyms = calloc(1, sizeof(struct ksyms));
+ if (!ksyms) {
+ fclose(f);
+ return NULL;
+ }
while (fgets(buf, sizeof(buf), f)) {
if (sscanf(buf, "%p %c %s", &addr, &symbol, func) != 3)
break;
if (!addr)
continue;
- syms[i].addr = (long) addr;
- syms[i].name = strdup(func);
- i++;
+
+ ret = libbpf_ensure_mem((void **) &ksyms->syms, &ksyms->sym_cap,
+ sizeof(struct ksym), ksyms->sym_cnt + 1);
+ if (ret)
+ goto error;
+ ret = ksyms__add_symbol(ksyms, func, (unsigned long)addr);
+ if (ret)
+ goto error;
}
fclose(f);
- sym_cnt = i;
- qsort(syms, sym_cnt, sizeof(struct ksym), ksym_cmp);
- return 0;
+ qsort(ksyms->syms, ksyms->sym_cnt, sizeof(struct ksym), ksym_cmp);
+ return ksyms;
+
+error:
+ fclose(f);
+ free_kallsyms_local(ksyms);
+ return NULL;
}
-struct ksym *ksym_search(long key)
+int load_kallsyms(void)
+{
+ pthread_mutex_lock(&ksyms_mutex);
+ if (!ksyms)
+ ksyms = load_kallsyms_local();
+ pthread_mutex_unlock(&ksyms_mutex);
+ return ksyms ? 0 : 1;
+}
+
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key)
{
- int start = 0, end = sym_cnt;
+ int start = 0, end = ksyms->sym_cnt;
int result;
/* kallsyms not loaded. return NULL */
- if (sym_cnt <= 0)
+ if (ksyms->sym_cnt <= 0)
return NULL;
while (start < end) {
size_t mid = start + (end - start) / 2;
- result = key - syms[mid].addr;
+ result = key - ksyms->syms[mid].addr;
if (result < 0)
end = mid;
else if (result > 0)
start = mid + 1;
else
- return &syms[mid];
+ return &ksyms->syms[mid];
}
- if (start >= 1 && syms[start - 1].addr < key &&
- key < syms[start].addr)
+ if (start >= 1 && ksyms->syms[start - 1].addr < key &&
+ key < ksyms->syms[start].addr)
/* valid ksym */
- return &syms[start - 1];
+ return &ksyms->syms[start - 1];
/* out of range. return _stext */
- return &syms[0];
+ return &ksyms->syms[0];
}
-long ksym_get_addr(const char *name)
+struct ksym *ksym_search(long key)
+{
+ if (!ksyms)
+ return NULL;
+ return ksym_search_local(ksyms, key);
+}
+
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name)
{
int i;
- for (i = 0; i < sym_cnt; i++) {
- if (strcmp(syms[i].name, name) == 0)
- return syms[i].addr;
+ for (i = 0; i < ksyms->sym_cnt; i++) {
+ if (strcmp(ksyms->syms[i].name, name) == 0)
+ return ksyms->syms[i].addr;
}
return 0;
}
+long ksym_get_addr(const char *name)
+{
+ if (!ksyms)
+ return 0;
+ return ksym_get_addr_local(ksyms, name);
+}
+
+/* open kallsyms and read symbol addresses on the fly. Without caching all symbols,
+ * this is faster than load + find.
+ */
+int kallsyms_find(const char *sym, unsigned long long *addr)
+{
+ char type, name[500];
+ unsigned long long value;
+ int err = 0;
+ FILE *f;
+
+ f = fopen("/proc/kallsyms", "r");
+ if (!f)
+ return -EINVAL;
+
+ while (fscanf(f, "%llx %c %499s%*[^\n]\n", &value, &type, name) > 0) {
+ if (strcmp(name, sym) == 0) {
+ *addr = value;
+ goto out;
+ }
+ }
+ err = -ENOENT;
+
+out:
+ fclose(f);
+ return err;
+}
+
void read_trace_pipe(void)
{
int trace_fd;
- trace_fd = open(DEBUGFS "trace_pipe", O_RDONLY, 0);
+ if (access(TRACEFS_PIPE, F_OK) == 0)
+ trace_fd = open(TRACEFS_PIPE, O_RDONLY, 0);
+ else
+ trace_fd = open(DEBUGFS_PIPE, O_RDONLY, 0);
if (trace_fd < 0)
return;
@@ -109,3 +223,161 @@ void read_trace_pipe(void)
}
}
}
+
+ssize_t get_uprobe_offset(const void *addr)
+{
+ size_t start, end, base;
+ char buf[256];
+ bool found = false;
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &base) == 4) {
+ if (buf[2] == 'x' && (uintptr_t)addr >= start && (uintptr_t)addr < end) {
+ found = true;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ if (!found)
+ return -ESRCH;
+
+#if defined(__powerpc64__) && defined(_CALL_ELF) && _CALL_ELF == 2
+
+#define OP_RT_RA_MASK 0xffff0000UL
+#define LIS_R2 0x3c400000UL
+#define ADDIS_R2_R12 0x3c4c0000UL
+#define ADDI_R2_R2 0x38420000UL
+
+ /*
+ * A PPC64 ABIv2 function may have a local and a global entry
+ * point. We need to use the local entry point when patching
+ * functions, so identify and step over the global entry point
+ * sequence.
+ *
+ * The global entry point sequence is always of the form:
+ *
+ * addis r2,r12,XXXX
+ * addi r2,r2,XXXX
+ *
+ * A linker optimisation may convert the addis to lis:
+ *
+ * lis r2,XXXX
+ * addi r2,r2,XXXX
+ */
+ {
+ const __u32 *insn = (const __u32 *)(uintptr_t)addr;
+
+ if ((((*insn & OP_RT_RA_MASK) == ADDIS_R2_R12) ||
+ ((*insn & OP_RT_RA_MASK) == LIS_R2)) &&
+ ((*(insn + 1) & OP_RT_RA_MASK) == ADDI_R2_R2))
+ return (uintptr_t)(insn + 2) - start + base;
+ }
+#endif
+ return (uintptr_t)addr - start + base;
+}
+
+ssize_t get_rel_offset(uintptr_t addr)
+{
+ size_t start, end, offset;
+ char buf[256];
+ FILE *f;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f)
+ return -errno;
+
+ while (fscanf(f, "%zx-%zx %s %zx %*[^\n]\n", &start, &end, buf, &offset) == 4) {
+ if (addr >= start && addr < end) {
+ fclose(f);
+ return (size_t)addr - start + offset;
+ }
+ }
+
+ fclose(f);
+ return -EINVAL;
+}
+
+static int
+parse_build_id_buf(const void *note_start, Elf32_Word note_size, char *build_id)
+{
+ Elf32_Word note_offs = 0;
+
+ while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+ Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+ if (nhdr->n_type == 3 && nhdr->n_namesz == sizeof("GNU") &&
+ !strcmp((char *)(nhdr + 1), "GNU") && nhdr->n_descsz > 0 &&
+ nhdr->n_descsz <= BPF_BUILD_ID_SIZE) {
+ memcpy(build_id, note_start + note_offs +
+ ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr), nhdr->n_descsz);
+ memset(build_id + nhdr->n_descsz, 0, BPF_BUILD_ID_SIZE - nhdr->n_descsz);
+ return (int) nhdr->n_descsz;
+ }
+
+ note_offs = note_offs + sizeof(Elf32_Nhdr) +
+ ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+ }
+
+ return -ENOENT;
+}
+
+/* Reads binary from *path* file and returns it in the *build_id* buffer
+ * with *size* which is expected to be at least BPF_BUILD_ID_SIZE bytes.
+ * Returns size of build id on success. On error the error value is
+ * returned.
+ */
+int read_build_id(const char *path, char *build_id, size_t size)
+{
+ int fd, err = -EINVAL;
+ Elf *elf = NULL;
+ GElf_Ehdr ehdr;
+ size_t max, i;
+
+ if (size < BPF_BUILD_ID_SIZE)
+ return -EINVAL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return -errno;
+
+ (void)elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_READ_MMAP, NULL);
+ if (!elf)
+ goto out;
+ if (elf_kind(elf) != ELF_K_ELF)
+ goto out;
+ if (!gelf_getehdr(elf, &ehdr))
+ goto out;
+
+ for (i = 0; i < ehdr.e_phnum; i++) {
+ GElf_Phdr mem, *phdr;
+ char *data;
+
+ phdr = gelf_getphdr(elf, i, &mem);
+ if (!phdr)
+ goto out;
+ if (phdr->p_type != PT_NOTE)
+ continue;
+ data = elf_rawfile(elf, &max);
+ if (!data)
+ goto out;
+ if (phdr->p_offset + phdr->p_memsz > max)
+ goto out;
+ err = parse_build_id_buf(data + phdr->p_offset, phdr->p_memsz, build_id);
+ if (err > 0)
+ break;
+ }
+
+out:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return err;
+}
diff --git a/tools/testing/selftests/bpf/trace_helpers.h b/tools/testing/selftests/bpf/trace_helpers.h
index 25ef597dd03f..04fd1da7079d 100644
--- a/tools/testing/selftests/bpf/trace_helpers.h
+++ b/tools/testing/selftests/bpf/trace_helpers.h
@@ -4,14 +4,32 @@
#include <bpf/libbpf.h>
+#define __ALIGN_MASK(x, mask) (((x)+(mask))&~(mask))
+#define ALIGN(x, a) __ALIGN_MASK(x, (typeof(x))(a)-1)
+
struct ksym {
long addr;
char *name;
};
+struct ksyms;
int load_kallsyms(void);
struct ksym *ksym_search(long key);
long ksym_get_addr(const char *name);
+
+struct ksyms *load_kallsyms_local(void);
+struct ksym *ksym_search_local(struct ksyms *ksyms, long key);
+long ksym_get_addr_local(struct ksyms *ksyms, const char *name);
+void free_kallsyms_local(struct ksyms *ksyms);
+
+/* open kallsyms and find addresses on the fly, faster than load + search. */
+int kallsyms_find(const char *sym, unsigned long long *addr);
+
void read_trace_pipe(void);
+ssize_t get_uprobe_offset(const void *addr);
+ssize_t get_rel_offset(uintptr_t addr);
+
+int read_build_id(const char *path, char *build_id, size_t size);
+
#endif
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.c b/tools/testing/selftests/bpf/unpriv_helpers.c
new file mode 100644
index 000000000000..b6d016461fb0
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdbool.h>
+#include <stdlib.h>
+#include <error.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+#include "unpriv_helpers.h"
+
+static bool get_mitigations_off(void)
+{
+ char cmdline[4096], *c;
+ int fd, ret = false;
+
+ fd = open("/proc/cmdline", O_RDONLY);
+ if (fd < 0) {
+ perror("open /proc/cmdline");
+ return false;
+ }
+
+ if (read(fd, cmdline, sizeof(cmdline) - 1) < 0) {
+ perror("read /proc/cmdline");
+ goto out;
+ }
+
+ cmdline[sizeof(cmdline) - 1] = '\0';
+ for (c = strtok(cmdline, " \n"); c; c = strtok(NULL, " \n")) {
+ if (strncmp(c, "mitigations=off", strlen(c)))
+ continue;
+ ret = true;
+ break;
+ }
+out:
+ close(fd);
+ return ret;
+}
+
+bool get_unpriv_disabled(void)
+{
+ bool disabled;
+ char buf[2];
+ FILE *fd;
+
+ fd = fopen("/proc/sys/" UNPRIV_SYSCTL, "r");
+ if (fd) {
+ disabled = (fgets(buf, 2, fd) == buf && atoi(buf));
+ fclose(fd);
+ } else {
+ perror("fopen /proc/sys/" UNPRIV_SYSCTL);
+ disabled = true;
+ }
+
+ return disabled ? true : get_mitigations_off();
+}
diff --git a/tools/testing/selftests/bpf/unpriv_helpers.h b/tools/testing/selftests/bpf/unpriv_helpers.h
new file mode 100644
index 000000000000..151f67329665
--- /dev/null
+++ b/tools/testing/selftests/bpf/unpriv_helpers.h
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <stdbool.h>
+
+#define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled"
+
+bool get_unpriv_disabled(void);
diff --git a/tools/testing/selftests/bpf/uprobe_multi.c b/tools/testing/selftests/bpf/uprobe_multi.c
new file mode 100644
index 000000000000..a61ceab60b68
--- /dev/null
+++ b/tools/testing/selftests/bpf/uprobe_multi.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <string.h>
+#include <sdt.h>
+
+#define __PASTE(a, b) a##b
+#define PASTE(a, b) __PASTE(a, b)
+
+#define NAME(name, idx) PASTE(name, idx)
+
+#define DEF(name, idx) int NAME(name, idx)(void) { return 0; }
+#define CALL(name, idx) NAME(name, idx)();
+
+#define F(body, name, idx) body(name, idx)
+
+#define F10(body, name, idx) \
+ F(body, PASTE(name, idx), 0) F(body, PASTE(name, idx), 1) F(body, PASTE(name, idx), 2) \
+ F(body, PASTE(name, idx), 3) F(body, PASTE(name, idx), 4) F(body, PASTE(name, idx), 5) \
+ F(body, PASTE(name, idx), 6) F(body, PASTE(name, idx), 7) F(body, PASTE(name, idx), 8) \
+ F(body, PASTE(name, idx), 9)
+
+#define F100(body, name, idx) \
+ F10(body, PASTE(name, idx), 0) F10(body, PASTE(name, idx), 1) F10(body, PASTE(name, idx), 2) \
+ F10(body, PASTE(name, idx), 3) F10(body, PASTE(name, idx), 4) F10(body, PASTE(name, idx), 5) \
+ F10(body, PASTE(name, idx), 6) F10(body, PASTE(name, idx), 7) F10(body, PASTE(name, idx), 8) \
+ F10(body, PASTE(name, idx), 9)
+
+#define F1000(body, name, idx) \
+ F100(body, PASTE(name, idx), 0) F100(body, PASTE(name, idx), 1) F100(body, PASTE(name, idx), 2) \
+ F100(body, PASTE(name, idx), 3) F100(body, PASTE(name, idx), 4) F100(body, PASTE(name, idx), 5) \
+ F100(body, PASTE(name, idx), 6) F100(body, PASTE(name, idx), 7) F100(body, PASTE(name, idx), 8) \
+ F100(body, PASTE(name, idx), 9)
+
+#define F10000(body, name, idx) \
+ F1000(body, PASTE(name, idx), 0) F1000(body, PASTE(name, idx), 1) F1000(body, PASTE(name, idx), 2) \
+ F1000(body, PASTE(name, idx), 3) F1000(body, PASTE(name, idx), 4) F1000(body, PASTE(name, idx), 5) \
+ F1000(body, PASTE(name, idx), 6) F1000(body, PASTE(name, idx), 7) F1000(body, PASTE(name, idx), 8) \
+ F1000(body, PASTE(name, idx), 9)
+
+F10000(DEF, uprobe_multi_func_, 0)
+F10000(DEF, uprobe_multi_func_, 1)
+F10000(DEF, uprobe_multi_func_, 2)
+F10000(DEF, uprobe_multi_func_, 3)
+F10000(DEF, uprobe_multi_func_, 4)
+
+static int bench(void)
+{
+ F10000(CALL, uprobe_multi_func_, 0)
+ F10000(CALL, uprobe_multi_func_, 1)
+ F10000(CALL, uprobe_multi_func_, 2)
+ F10000(CALL, uprobe_multi_func_, 3)
+ F10000(CALL, uprobe_multi_func_, 4)
+ return 0;
+}
+
+#define PROBE STAP_PROBE(test, usdt);
+
+#define PROBE10 PROBE PROBE PROBE PROBE PROBE \
+ PROBE PROBE PROBE PROBE PROBE
+#define PROBE100 PROBE10 PROBE10 PROBE10 PROBE10 PROBE10 \
+ PROBE10 PROBE10 PROBE10 PROBE10 PROBE10
+#define PROBE1000 PROBE100 PROBE100 PROBE100 PROBE100 PROBE100 \
+ PROBE100 PROBE100 PROBE100 PROBE100 PROBE100
+#define PROBE10000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000 \
+ PROBE1000 PROBE1000 PROBE1000 PROBE1000 PROBE1000
+
+static int usdt(void)
+{
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ PROBE10000
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ if (argc != 2)
+ goto error;
+
+ if (!strcmp("bench", argv[1]))
+ return bench();
+ if (!strcmp("usdt", argv[1]))
+ return usdt();
+
+error:
+ fprintf(stderr, "usage: %s <bench|usdt>\n", argv[0]);
+ return -1;
+}
diff --git a/tools/testing/selftests/bpf/urandom_read.c b/tools/testing/selftests/bpf/urandom_read.c
index db781052758d..4ed795655b9f 100644
--- a/tools/testing/selftests/bpf/urandom_read.c
+++ b/tools/testing/selftests/bpf/urandom_read.c
@@ -1,35 +1,99 @@
+#include <stdbool.h>
#include <stdio.h>
#include <unistd.h>
+#include <errno.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>
#include <stdlib.h>
+#include <signal.h>
+
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
+#define SEC(name) __attribute__((section(name), used))
#define BUF_SIZE 256
-static __attribute__((noinline))
-void urandom_read(int fd, int count)
+/* defined in urandom_read_aux.c */
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+/* these are coming from urandom_read_lib{1,2}.c */
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz);
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz);
+
+int urandlib_api(void);
+COMPAT_VERSION(urandlib_api_old, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_old(void);
+int urandlib_api_sameoffset(void);
+
+unsigned short urand_read_with_sema_semaphore SEC(".probes");
+
+static noinline void urandom_read(int fd, int count)
{
- char buf[BUF_SIZE];
- int i;
+ char buf[BUF_SIZE];
+ int i;
+
+ for (i = 0; i < count; ++i) {
+ read(fd, buf, BUF_SIZE);
- for (i = 0; i < count; ++i)
- read(fd, buf, BUF_SIZE);
+ /* trigger USDTs defined in executable itself */
+ urand_read_without_sema(i, count, BUF_SIZE);
+ STAP_PROBE3(urand, read_with_sema, i, count, BUF_SIZE);
+
+ /* trigger USDTs defined in shared lib */
+ urandlib_read_without_sema(i, count, BUF_SIZE);
+ urandlib_read_with_sema(i, count, BUF_SIZE);
+ }
+}
+
+static volatile bool parent_ready;
+
+static void handle_sigpipe(int sig)
+{
+ parent_ready = true;
}
int main(int argc, char *argv[])
{
int fd = open("/dev/urandom", O_RDONLY);
int count = 4;
+ bool report_pid = false;
if (fd < 0)
return 1;
- if (argc == 2)
+ if (argc >= 2)
count = atoi(argv[1]);
+ if (argc >= 3) {
+ report_pid = true;
+ /* install SIGPIPE handler to catch when parent closes their
+ * end of the pipe (on the other side of our stdout)
+ */
+ signal(SIGPIPE, handle_sigpipe);
+ }
+
+ /* report PID and wait for parent process to send us "signal" by
+ * closing stdout
+ */
+ if (report_pid) {
+ while (!parent_ready) {
+ fprintf(stdout, "%d\n", getpid());
+ fflush(stdout);
+ }
+ /* at this point stdout is closed, parent process knows our
+ * PID and is ready to trace us
+ */
+ }
urandom_read(fd, count);
+ urandlib_api();
+ urandlib_api_old();
+ urandlib_api_sameoffset();
+
close(fd);
return 0;
}
diff --git a/tools/testing/selftests/bpf/urandom_read_aux.c b/tools/testing/selftests/bpf/urandom_read_aux.c
new file mode 100644
index 000000000000..6132edcfea74
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_aux.c
@@ -0,0 +1,9 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urand_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ /* semaphore-less USDT */
+ STAP_PROBE3(urand, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib1.c b/tools/testing/selftests/bpf/urandom_read_lib1.c
new file mode 100644
index 000000000000..8c1356d8b4ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib1.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _SDT_HAS_SEMAPHORES 1
+#include "sdt.h"
+
+#define SHARED 1
+#include "bpf/libbpf_internal.h"
+
+#define SEC(name) __attribute__((section(name), used))
+
+unsigned short urandlib_read_with_sema_semaphore SEC(".probes");
+
+void urandlib_read_with_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_with_sema, iter_num, iter_cnt, read_sz);
+}
+
+COMPAT_VERSION(urandlib_api_v1, urandlib_api, LIBURANDOM_READ_1.0.0)
+int urandlib_api_v1(void)
+{
+ return 1;
+}
+
+DEFAULT_VERSION(urandlib_api_v2, urandlib_api, LIBURANDOM_READ_2.0.0)
+int urandlib_api_v2(void)
+{
+ return 2;
+}
+
+COMPAT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_1.0.0)
+DEFAULT_VERSION(urandlib_api_sameoffset, urandlib_api_sameoffset, LIBURANDOM_READ_2.0.0)
+int urandlib_api_sameoffset(void)
+{
+ return 3;
+}
diff --git a/tools/testing/selftests/bpf/urandom_read_lib2.c b/tools/testing/selftests/bpf/urandom_read_lib2.c
new file mode 100644
index 000000000000..9d401ad9838f
--- /dev/null
+++ b/tools/testing/selftests/bpf/urandom_read_lib2.c
@@ -0,0 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#include "sdt.h"
+
+void urandlib_read_without_sema(int iter_num, int iter_cnt, int read_sz)
+{
+ STAP_PROBE3(urandlib, read_without_sema, iter_num, iter_cnt, read_sz);
+}
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
deleted file mode 100644
index d781bc86e100..000000000000
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ /dev/null
@@ -1,50 +0,0 @@
-{
- "invalid and of negative number",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, -4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid range check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 12),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_1, 2),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_AND, BPF_REG_9, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_9, 1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_9, 1),
- BPF_MOV32_IMM(BPF_REG_3, 1),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_3, BPF_REG_9),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0x10000000),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
deleted file mode 100644
index 1c4b1939f5a8..000000000000
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ /dev/null
@@ -1,378 +0,0 @@
-{
- "valid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "valid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 3),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid map access into an array with a signed variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a constant",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, (MAX_ENTRIES + 1) << 2,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=48 size=8",
- .result = REJECT,
-},
-{
- "invalid map access into an array with a register",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, MAX_ENTRIES + 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 min value is outside of the allowed memory range",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a variable",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access, make sure to bounds check any such access",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with no floor check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "R0 unbounded memory access",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES + 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "invalid access to map value, value_size=48 off=44 size=8",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid map access into an array with a invalid max check",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3, 11 },
- .errstr = "R0 pointer += pointer",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "valid read map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = 28,
-},
-{
- "valid read map access into a read-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 3 },
- .result = ACCEPT,
- .retval = -29,
-},
-{
- "invalid write map access into a read-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_ro = { 3 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "invalid write map access into a read-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_ro = { 4 },
- .result = REJECT,
- .errstr = "write into map forbidden",
-},
-{
- "valid write map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "valid write map access into a write-only array 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_skb_load_bytes),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 4 },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "invalid read map access into a write-only array 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
-{
- "invalid read map access into a write-only array 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_array_wo = { 3 },
- .result = REJECT,
- .errstr = "read from map forbidden",
-},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_and.c b/tools/testing/selftests/bpf/verifier/atomic_and.c
new file mode 100644
index 000000000000..fe4bb70eb9c5
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_and.c
@@ -0,0 +1,100 @@
+{
+ "BPF_ATOMIC_AND without fetch",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* atomic_and(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND, BPF_REG_10, BPF_REG_1, -8),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x010, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* r1 should not be clobbered, no BPF_FETCH flag */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC_AND with fetch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_and(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC_AND with fetch 32bit",
+ .insns = {
+ /* r0 = (s64) -1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110),
+ /* old = atomic_fetch_and(&val, 0x011); */
+ BPF_MOV32_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (for fear of x86 JIT bug)
+ * It should be -1 so add 1 to get exit code.
+ */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC_AND with fetch - r0 as source reg",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_and(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_0, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x010) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_bounds.c b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
new file mode 100644
index 000000000000..e82183e4914f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_bounds.c
@@ -0,0 +1,27 @@
+{
+ "BPF_ATOMIC bounds propagation, mem->reg",
+ .insns = {
+ /* a = 0; */
+ /*
+ * Note this is implemented with two separate instructions,
+ * where you might think one would suffice:
+ *
+ * BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ *
+ * This is because BPF_ST_MEM doesn't seem to set the stack slot
+ * type to 0 when storing an immediate.
+ */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* b = atomic_fetch_add(&a, 1); */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* Verifier should be able to tell that this infinite loop isn't reachable. */
+ /* if (b) while (true) continue; */
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "back-edge",
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
new file mode 100644
index 000000000000..9a7b1106fda8
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
@@ -0,0 +1,235 @@
+{
+ "atomic compare-and-exchange smoketest - 64bit",
+ .insns = {
+ /* val = 3; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ /* old = atomic_cmpxchg(&val, 2, 4); */
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 3) exit(2); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* if (val != 3) exit(3); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* old = atomic_cmpxchg(&val, 3, 4); */
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 3) exit(4); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ /* if (val != 4) exit(5); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "atomic compare-and-exchange smoketest - 32bit",
+ .insns = {
+ /* val = 3; */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3),
+ /* old = atomic_cmpxchg(&val, 2, 4); */
+ BPF_MOV32_IMM(BPF_REG_1, 4),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 3) exit(2); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* if (val != 3) exit(3); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* old = atomic_cmpxchg(&val, 3, 4); */
+ BPF_MOV32_IMM(BPF_REG_1, 4),
+ BPF_MOV32_IMM(BPF_REG_0, 3),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 3) exit(4); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 3, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 4),
+ BPF_EXIT_INSN(),
+ /* if (val != 4) exit(5); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 5),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "Can't use cmpxchg on uninit src reg",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "!read_ok",
+},
+{
+ "BPF_W cmpxchg should zero top 32 bits",
+ .insns = {
+ /* r0 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* u64 val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
+ BPF_MOV32_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = 0x00000000FFFFFFFFull; */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* if (r0 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "Dest pointer in r0 - fail",
+ .insns = {
+ /* val = 0; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, 1); */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* if (r0 != 0) exit(1); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r0 = atomic_cmpxchg(&val, r0, 0); */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 2",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 3",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+ .errstr_unpriv = "R0 leaks addr into mem",
+},
+{
+ "Dest pointer in r0 - succeed, check 4",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r10 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "R10 partial copy of pointer",
+},
+{
+ "Dest pointer in r0 - succeed, check 5",
+ .insns = {
+ /* r0 = &val */
+ BPF_MOV32_REG(BPF_REG_0, BPF_REG_10),
+ /* val = r0; */
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
+ /* r5 = &val */
+ BPF_MOV32_REG(BPF_REG_5, BPF_REG_10),
+ /* r0 = atomic_cmpxchg(&val, r0, r5); */
+ BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_5, -8),
+ /* r1 = *r0 */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -8),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access",
+ .errstr_unpriv = "R10 partial copy of pointer",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch.c b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
new file mode 100644
index 000000000000..5bf03fb4fa2b
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_fetch.c
@@ -0,0 +1,151 @@
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic dw/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = ACCEPT,
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "leaking pointer from stack off -8",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via stack slot",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_2, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
+{
+ "atomic w/fetch and address leakage of (map ptr & -1) via returned value",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_1, -1),
+ BPF_LD_MAP_FD(BPF_REG_8, 0),
+ BPF_LD_MAP_FD(BPF_REG_9, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_9, 0),
+ BPF_ATOMIC_OP(BPF_W, BPF_AND | BPF_FETCH, BPF_REG_2, BPF_REG_1, 0),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 2, 4 },
+ .result = REJECT,
+ .errstr = "invalid size of register fill",
+},
+#define __ATOMIC_FETCH_OP_TEST(src_reg, dst_reg, operand1, op, operand2, expect) \
+ { \
+ "atomic fetch " #op ", src=" #dst_reg " dst=" #dst_reg, \
+ .insns = { \
+ /* u64 val = operan1; */ \
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, operand1), \
+ /* u64 old = atomic_fetch_add(&val, operand2); */ \
+ BPF_MOV64_REG(dst_reg, BPF_REG_10), \
+ BPF_MOV64_IMM(src_reg, operand2), \
+ BPF_ATOMIC_OP(BPF_DW, op, \
+ dst_reg, src_reg, -8), \
+ /* if (old != operand1) exit(1); */ \
+ BPF_JMP_IMM(BPF_JEQ, src_reg, operand1, 2), \
+ BPF_MOV64_IMM(BPF_REG_0, 1), \
+ BPF_EXIT_INSN(), \
+ /* if (val != result) exit (2); */ \
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8), \
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, expect, 2), \
+ BPF_MOV64_IMM(BPF_REG_0, 2), \
+ BPF_EXIT_INSN(), \
+ /* exit(0); */ \
+ BPF_MOV64_IMM(BPF_REG_0, 0), \
+ BPF_EXIT_INSN(), \
+ }, \
+ .result = ACCEPT, \
+ }
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 1, BPF_ADD | BPF_FETCH, 2, 3),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_AND | BPF_FETCH, 0x011, 0x010),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_OR | BPF_FETCH, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XOR | BPF_FETCH, 0x011, 0x001),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_2, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_0, BPF_REG_1, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_1, BPF_REG_0, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_2, BPF_REG_3, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_4, BPF_REG_5, 0x010, BPF_XCHG, 0x011, 0x011),
+__ATOMIC_FETCH_OP_TEST(BPF_REG_9, BPF_REG_8, 0x010, BPF_XCHG, 0x011, 0x011),
+#undef __ATOMIC_FETCH_OP_TEST
diff --git a/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
new file mode 100644
index 000000000000..a91de8cd9def
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_fetch_add.c
@@ -0,0 +1,106 @@
+{
+ "BPF_ATOMIC_FETCH_ADD smoketest - 64bit",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* Write 3 to stack */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */
+ BPF_MOV64_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* Check the value we loaded back was 3 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* Load value from stack */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ /* Check value loaded from stack was 4 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC_FETCH_ADD smoketest - 32bit",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ /* Write 3 to stack */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3),
+ /* Put a 1 in R1, add it to the 3 on the stack, and load the value back into R1 */
+ BPF_MOV32_IMM(BPF_REG_1, 1),
+ BPF_ATOMIC_OP(BPF_W, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4),
+ /* Check the value we loaded back was 3 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* Load value from stack */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4),
+ /* Check value loaded from stack was 4 */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 4, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "Can't use ATM_FETCH_ADD on frame pointer",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_10, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .errstr_unpriv = "R10 leaks addr into mem",
+ .errstr = "frame pointer is read only",
+},
+{
+ "Can't use ATM_FETCH_ADD on uninit src reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_10, BPF_REG_2, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ /* It happens that the address leak check is first, but it would also be
+ * complain about the fact that we're trying to modify R10.
+ */
+ .errstr = "!read_ok",
+},
+{
+ "Can't use ATM_FETCH_ADD on uninit dst reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_0, -8),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ /* It happens that the address leak check is first, but it would also be
+ * complain about the fact that we're trying to modify R10.
+ */
+ .errstr = "!read_ok",
+},
+{
+ "Can't use ATM_FETCH_ADD on kernel memory",
+ .insns = {
+ /* This is an fentry prog, context is array of the args of the
+ * kernel function being called. Load first arg into R2.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 0),
+ /* First arg of bpf_fentry_test7 is a pointer to a struct.
+ * Attempt to modify that struct. Verifier shouldn't let us
+ * because it's kernel memory.
+ */
+ BPF_MOV64_IMM(BPF_REG_3, 1),
+ BPF_ATOMIC_OP(BPF_DW, BPF_ADD | BPF_FETCH, BPF_REG_2, BPF_REG_3, 0),
+ /* Done */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test7",
+ .result = REJECT,
+ .errstr = "only read is supported",
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_invalid.c b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
new file mode 100644
index 000000000000..25f4ac1c69ab
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_invalid.c
@@ -0,0 +1,25 @@
+#define __INVALID_ATOMIC_ACCESS_TEST(op) \
+ { \
+ "atomic " #op " access through non-pointer ", \
+ .insns = { \
+ BPF_MOV64_IMM(BPF_REG_0, 1), \
+ BPF_MOV64_IMM(BPF_REG_1, 0), \
+ BPF_ATOMIC_OP(BPF_DW, op, BPF_REG_1, BPF_REG_0, -8), \
+ BPF_MOV64_IMM(BPF_REG_0, 0), \
+ BPF_EXIT_INSN(), \
+ }, \
+ .result = REJECT, \
+ .errstr = "R1 invalid mem access 'scalar'" \
+ }
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_ADD | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_AND | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_OR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XOR | BPF_FETCH),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_XCHG),
+__INVALID_ATOMIC_ACCESS_TEST(BPF_CMPXCHG),
diff --git a/tools/testing/selftests/bpf/verifier/atomic_or.c b/tools/testing/selftests/bpf/verifier/atomic_or.c
new file mode 100644
index 000000000000..9d0716ac5080
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_or.c
@@ -0,0 +1,102 @@
+{
+ "BPF_ATOMIC OR without fetch",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* atomic_or(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_OR, BPF_REG_10, BPF_REG_1, -8),
+ /* if (val != 0x111) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x111, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* r1 should not be clobbered, no BPF_FETCH flag */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC OR with fetch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_or(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x111) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (for fear of x86 JIT bug) */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC OR with fetch 32bit",
+ .insns = {
+ /* r0 = (s64) -1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110),
+ /* old = atomic_fetch_or(&val, 0x011); */
+ BPF_MOV32_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x111) exit(2); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x111, 2),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (for fear of x86 JIT bug)
+ * It should be -1 so add 1 to get exit code.
+ */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_W atomic_fetch_or should zero top 32 bits",
+ .insns = {
+ /* r1 = U64_MAX; */
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+ /* u64 val = r1; */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+ /* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* r2 = 0x00000000FFFFFFFF; */
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
+ /* if (r2 != r1) exit(1); */
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_xchg.c b/tools/testing/selftests/bpf/verifier/atomic_xchg.c
new file mode 100644
index 000000000000..33e2d6c973ee
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_xchg.c
@@ -0,0 +1,46 @@
+{
+ "atomic exchange smoketest - 64bit",
+ .insns = {
+ /* val = 3; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 3),
+ /* old = atomic_xchg(&val, 4); */
+ BPF_MOV64_IMM(BPF_REG_1, 4),
+ BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 3) exit(1); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* if (val != 4) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "atomic exchange smoketest - 32bit",
+ .insns = {
+ /* val = 3; */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 3),
+ /* old = atomic_xchg(&val, 4); */
+ BPF_MOV32_IMM(BPF_REG_1, 4),
+ BPF_ATOMIC_OP(BPF_W, BPF_XCHG, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 3) exit(1); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 3, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* if (val != 4) exit(2); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_0, 4, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/atomic_xor.c b/tools/testing/selftests/bpf/verifier/atomic_xor.c
new file mode 100644
index 000000000000..74e8fb46694b
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/atomic_xor.c
@@ -0,0 +1,77 @@
+{
+ "BPF_ATOMIC XOR without fetch",
+ .insns = {
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* atomic_xor(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_XOR, BPF_REG_10, BPF_REG_1, -8),
+ /* if (val != 0x101) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x101, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ /* r1 should not be clobbered, no BPF_FETCH flag */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x011, 1),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC XOR with fetch",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 123),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+ /* old = atomic_fetch_xor(&val, 0x011); */
+ BPF_MOV64_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_DW, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x101) exit(2); */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2),
+ BPF_MOV64_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug) */
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 123, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* exit(0); */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
+{
+ "BPF_ATOMIC XOR with fetch 32bit",
+ .insns = {
+ /* r0 = (s64) -1 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+ /* val = 0x110; */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x110),
+ /* old = atomic_fetch_xor(&val, 0x011); */
+ BPF_MOV32_IMM(BPF_REG_1, 0x011),
+ BPF_ATOMIC_OP(BPF_W, BPF_XOR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -4),
+ /* if (old != 0x110) exit(3); */
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x110, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 3),
+ BPF_EXIT_INSN(),
+ /* if (val != 0x101) exit(2); */
+ BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_10, -4),
+ BPF_JMP32_IMM(BPF_JEQ, BPF_REG_1, 0x101, 2),
+ BPF_MOV32_IMM(BPF_REG_1, 2),
+ BPF_EXIT_INSN(),
+ /* Check R0 wasn't clobbered (fxor fear of x86 JIT bug)
+ * It should be -1 so add 1 to get exit code.
+ */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+},
diff --git a/tools/testing/selftests/bpf/verifier/basic.c b/tools/testing/selftests/bpf/verifier/basic.c
index b8d18642653a..de84f0d57082 100644
--- a/tools/testing/selftests/bpf/verifier/basic.c
+++ b/tools/testing/selftests/bpf/verifier/basic.c
@@ -2,7 +2,7 @@
"empty prog",
.insns = {
},
- .errstr = "unknown opcode 00",
+ .errstr = "last insn is not an exit or jmp",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/basic_instr.c b/tools/testing/selftests/bpf/verifier/basic_instr.c
index 071dbc889e8c..bd928a72ad73 100644
--- a/tools/testing/selftests/bpf/verifier/basic_instr.c
+++ b/tools/testing/selftests/bpf/verifier/basic_instr.c
@@ -176,11 +176,11 @@
.retval = 1,
},
{
- "invalid 64-bit BPF_END",
+ "invalid 64-bit BPF_END with BPF_TO_BE",
.insns = {
BPF_MOV32_IMM(BPF_REG_0, 0),
{
- .code = BPF_ALU64 | BPF_END | BPF_TO_LE,
+ .code = BPF_ALU64 | BPF_END | BPF_TO_BE,
.dst_reg = BPF_REG_0,
.src_reg = 0,
.off = 0,
@@ -188,7 +188,7 @@
},
BPF_EXIT_INSN(),
},
- .errstr = "unknown opcode d7",
+ .errstr = "unknown opcode df",
.result = REJECT,
},
{
diff --git a/tools/testing/selftests/bpf/verifier/basic_stack.c b/tools/testing/selftests/bpf/verifier/basic_stack.c
deleted file mode 100644
index b56f8117c09d..000000000000
--- a/tools/testing/selftests/bpf/verifier/basic_stack.c
+++ /dev/null
@@ -1,64 +0,0 @@
-{
- "stack out of bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack",
- .result = REJECT,
-},
-{
- "uninitialized stack1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr = "invalid indirect read from stack",
- .result = REJECT,
-},
-{
- "uninitialized stack2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -8),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid read from stack",
- .result = REJECT,
-},
-{
- "invalid fp arithmetic",
- /* If this gets ever changed, make sure JITs can deal with it. */
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 subtraction from stack pointer",
- .result = REJECT,
-},
-{
- "non-invalid fp arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "misaligned read from stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "misaligned stack access",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
deleted file mode 100644
index 4d6645f2874c..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ /dev/null
@@ -1,559 +0,0 @@
-{
- "subtraction bounds (map value) variant 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 7),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 5),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 56),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT,
-},
-{
- "subtraction bounds (map value) variant 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0xff, 6),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, 0xff, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is negative, either use unsigned index or do a if (index >=0) check.",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "check subtraction on pointers for unpriv",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 9),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_FP),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_ARG2, 0, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_9, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1, 9 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R9 pointer -= pointer prohibited",
-},
-{
- "bounds check based on zero-extended MOV",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0x0000'0000'ffff'ffff */
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0 */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check based on sign-extended MOV. test1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0xffff'ffff'ffff'ffff */
- BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0xffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 32),
- /* r0 = <oob pointer> */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access to OOB pointer */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 4294967295",
- .result = REJECT
-},
-{
- "bounds check based on sign-extended MOV. test2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- /* r2 = 0xffff'ffff'ffff'ffff */
- BPF_MOV64_IMM(BPF_REG_2, 0xffffffff),
- /* r2 = 0xfff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 36),
- /* r0 = <oob pointer> */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- /* access to OOB pointer */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 min value is outside of the allowed memory range",
- .result = REJECT
-},
-{
- "bounds check based on reg_off + var_off + insn_off. test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 29) - 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "value_size=8 off=1073741825",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "bounds check based on reg_off + var_off + insn_off. test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, (1 << 30) - 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, (1 << 29) - 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 3),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "value 1073741823",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "bounds check after truncation of non-boundary-crossing range",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- /* r2 = 0x10'0000'0000 */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 36),
- /* r1 = [0x10'0000'0000, 0x10'0000'00ff] */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- /* r1 = [0x10'7fff'ffff, 0x10'8000'00fe] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- /* r1 = [0x00, 0xff] */
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 0x7fffffff),
- /* r1 = 0 */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check after truncation of boundary-crossing range (1)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0x1'0000'007f] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0xffff'ffff] or
- * [0x0000'0000, 0x0000'007f]
- */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0x00, 0xff] or
- * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* error on OOB pointer computation */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- /* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
- .errstr = "value -4294967168 makes map_value pointer be out of bounds",
- .result = REJECT,
-},
-{
- "bounds check after truncation of boundary-crossing range (2)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0x1'0000'007f] */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0xffff'ff80, 0xffff'ffff] or
- * [0x0000'0000, 0x0000'007f]
- * difference to previous test: truncation via MOV32
- * instead of ALU32.
- */
- BPF_MOV32_REG(BPF_REG_1, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* r1 = [0x00, 0xff] or
- * [0xffff'ffff'0000'0080, 0xffff'ffff'ffff'ffff]
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 0xffffff80 >> 1),
- /* error on OOB pointer computation */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- /* not actually fully unbounded, but the bound is very high */
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds, pointer arithmetic with it prohibited for !root",
- .result_unpriv = REJECT,
- .errstr = "value -4294967168 makes map_value pointer be out of bounds",
- .result = REJECT,
-},
-{
- "bounds check after wrapping 32-bit addition",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- /* r1 = 0x7fff'ffff */
- BPF_MOV64_IMM(BPF_REG_1, 0x7fffffff),
- /* r1 = 0xffff'fffe */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- /* r1 = 0 */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 2),
- /* no-op */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* access at offset 0 */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT
-},
-{
- "bounds check after shift with oversized count operand",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_2, 32),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- /* r1 = (u32)1 << (u32)32 = ? */
- BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_2),
- /* r1 = [0x0000, 0xffff] */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xffff),
- /* computes unknown pointer, potentially OOB */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 max value is outside of the allowed memory range",
- .result = REJECT
-},
-{
- "bounds check after right shift of maybe-negative number",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* r1 = [0x00, 0xff] */
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- /* r1 = [-0x01, 0xfe] */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- /* r1 = 0 or 0xff'ffff'ffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* r1 = 0 or 0xffff'ffff'ffff */
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_1, 8),
- /* computes unknown pointer, potentially OOB */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* potentially OOB access */
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 unbounded memory access",
- .result = REJECT
-},
-{
- "bounds check after 32-bit right shift with 64-bit input",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- /* r1 = 2 */
- BPF_MOV64_IMM(BPF_REG_1, 2),
- /* r1 = 1<<32 */
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 31),
- /* r1 = 0 (NOT 2!) */
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_1, 31),
- /* r1 = 0xffff'fffe (NOT 0!) */
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_1, 2),
- /* error on computing OOB pointer */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- /* exit */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "math between map_value pointer and 4294967294 is not allowed",
- .result = REJECT,
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7ffffffe),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 2147483646",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x1fffffff),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "pointer offset 1073741822",
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 0x1fffffff),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "pointer offset -1073741822",
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .result = REJECT
-},
-{
- "bounds check map access with off+size signed 32bit overflow. test4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 1000000),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_1, 1000000),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 2),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "map_value pointer and 1000000000000",
- .result = REJECT
-},
-{
- "bounds check mixed 32bit and 64bit arithmetic. test1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- /* r1 = 0xffffFFFF00000001 */
- BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 1, 3),
- /* check ALU64 op keeps 32bit bounds */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_JMP32_IMM(BPF_JGT, BPF_REG_1, 2, 1),
- BPF_JMP_A(1),
- /* invalid ldx if bounds are lost above */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT
-},
-{
- "bounds check mixed 32bit and 64bit arithmetic. test2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- /* r1 = 0xffffFFFF00000001 */
- BPF_MOV64_IMM(BPF_REG_2, 3),
- /* r1 = 0x2 */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_1, 1),
- /* check ALU32 op zero extends 64bit bounds */
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 1),
- BPF_JMP_A(1),
- /* invalid ldx if bounds are lost above */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT
-},
-{
- "assigning 32bit bounds to 64bit for wA = 0, wB = wA",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV32_IMM(BPF_REG_9, 0),
- BPF_MOV32_REG(BPF_REG_2, BPF_REG_9),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_8, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_deduction.c b/tools/testing/selftests/bpf/verifier/bounds_deduction.c
deleted file mode 100644
index 1fd07a4f27ac..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_deduction.c
+++ /dev/null
@@ -1,124 +0,0 @@
-{
- "check deducing bounds from const, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 1, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "check deducing bounds from const, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "check deducing bounds from const, 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 1, 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, ~0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check deducing bounds from const, 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSGE, BPF_REG_0, 0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 tried to subtract pointer from scalar",
-},
-{
- "check deducing bounds from const, 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_0, 0, 0),
- /* Marks reg as unknown. */
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
-},
diff --git a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c b/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
deleted file mode 100644
index 9baca7a75c42..000000000000
--- a/tools/testing/selftests/bpf/verifier/bounds_mix_sign_unsign.c
+++ /dev/null
@@ -1,406 +0,0 @@
-{
- "bounds checks mixing signed and unsigned, positive bounds",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_MOV64_IMM(BPF_REG_8, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 4),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_8, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_8),
- BPF_ST_MEM(BPF_B, BPF_REG_8, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R8 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 5",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -512),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_6, 5),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_4, 1, 4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -512, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 min value is negative, either use unsigned",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 7",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 1024 * 1024 * 1024),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 8",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 9",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_LD_IMM64(BPF_REG_2, -9223372036854775808ULL),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 10",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 11",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- /* Dead branch. */
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 12",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 13",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, 2),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_7, BPF_REG_1),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_7, 4, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_7),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R7 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 14",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_8, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 42, 6),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_8, BPF_REG_1, 3),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_2, -3),
- BPF_JMP_IMM(BPF_JA, 0, 0, -7),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
-},
-{
- "bounds checks mixing signed and unsigned, variant 15",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_MOV64_IMM(BPF_REG_2, -6),
- BPF_JMP_REG(BPF_JGE, BPF_REG_2, BPF_REG_1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 1, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "unbounded min value",
- .errstr_unpriv = "R1 has unknown scalar with mixed signed bounds",
- .result = REJECT,
- .result_unpriv = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c b/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
deleted file mode 100644
index 69b048cf46d9..000000000000
--- a/tools/testing/selftests/bpf/verifier/bpf_get_stack.c
+++ /dev/null
@@ -1,44 +0,0 @@
-{
- "bpf_get_stack return R0 within range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 28),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_9, sizeof(struct test_val)/2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct test_val)/2),
- BPF_MOV64_IMM(BPF_REG_4, 256),
- BPF_EMIT_CALL(BPF_FUNC_get_stack),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_8, 32),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_8, 32),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_1, BPF_REG_8, 16),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_9, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_9),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_1, 32),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_5, sizeof(struct test_val)/2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_5),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_9),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_EMIT_CALL(BPF_FUNC_get_stack),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
new file mode 100644
index 000000000000..59125b22ae39
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c
@@ -0,0 +1,270 @@
+#define BTF_TYPES \
+ .btf_strings = "\0int\0i\0ctx\0callback\0main\0", \
+ .btf_types = { \
+ /* 1: int */ BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), \
+ /* 2: int* */ BTF_PTR_ENC(1), \
+ /* 3: void* */ BTF_PTR_ENC(0), \
+ /* 4: int __(void*) */ BTF_FUNC_PROTO_ENC(1, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 3), \
+ /* 5: int __(int, int*) */ BTF_FUNC_PROTO_ENC(1, 2), \
+ BTF_FUNC_PROTO_ARG_ENC(5, 1), \
+ BTF_FUNC_PROTO_ARG_ENC(7, 2), \
+ /* 6: main */ BTF_FUNC_ENC(20, 4), \
+ /* 7: callback */ BTF_FUNC_ENC(11, 5), \
+ BTF_END_RAW \
+ }
+
+#define MAIN_TYPE 6
+#define CALLBACK_TYPE 7
+
+/* can't use BPF_CALL_REL, jit_subprogs adjusts IMM & OFF
+ * fields for pseudo calls
+ */
+#define PSEUDO_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_CALL, \
+ INSN_OFF_MASK, INSN_IMM_MASK)
+
+/* can't use BPF_FUNC_loop constant,
+ * do_mix_fixups adjusts the IMM field
+ */
+#define HELPER_CALL_INSN() \
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, INSN_OFF_MASK, INSN_IMM_MASK)
+
+{
+ "inline simple bpf_loop call",
+ .insns = {
+ /* main */
+ /* force verifier state branching to verify logic on first and
+ * subsequent bpf_loop insn processing steps
+ */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, flags non-zero",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 9),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 7),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 1),
+ BPF_JMP_IMM(BPF_JA, 0, 0, -10),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+{
+ "don't inline bpf_loop call, callback non-constant",
+ .insns = {
+ /* main */
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 777, 4), /* pick a random callback */
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 10),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 3),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { HELPER_CALL_INSN() },
+ .unexpected_insns = { PSEUDO_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 14, CALLBACK_TYPE },
+ { 16, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline and a dead func",
+ .insns = {
+ /* main */
+
+ /* A reference to callback #1 to make verifier count it as a func.
+ * This reference is overwritten below and callback #1 is dead.
+ */
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 9),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 8),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ /* callback #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .runs = 0,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 10, CALLBACK_TYPE },
+ { 12, CALLBACK_TYPE }
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "bpf_loop_inline stack locations for loop vars",
+ .insns = {
+ /* main */
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ /* bpf_loop call #1 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 1),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 22),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* bpf_loop call #2 */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 16),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ /* call func and exit */
+ BPF_CALL_REL(2),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* func */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_1, 2),
+ BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, BPF_REG_2, BPF_PSEUDO_FUNC, 0, 6),
+ BPF_RAW_INSN(0, 0, 0, 0, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_4, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_loop),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* callback */
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .expected_insns = {
+ BPF_ST_MEM(BPF_W, BPF_REG_10, -12, 0x77),
+ SKIP_INSNS(),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ /* offsets are the same as in the first call */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -40),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -32),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -24),
+ SKIP_INSNS(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -32, 0x55),
+ SKIP_INSNS(),
+ /* offsets differ from main because of different offset
+ * in BPF_ST_MEM instruction
+ */
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -56),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_7, -48),
+ BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_8, -40),
+ },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .result = ACCEPT,
+ .func_info = {
+ { 0, MAIN_TYPE },
+ { 16, MAIN_TYPE },
+ { 25, CALLBACK_TYPE },
+ },
+ .func_info_cnt = 3,
+ BTF_TYPES
+},
+{
+ "inline bpf_loop call in a big program",
+ .insns = {},
+ .fill_helper = bpf_fill_big_prog_with_loop_1,
+ .expected_insns = { PSEUDO_CALL_INSN() },
+ .unexpected_insns = { HELPER_CALL_INSN() },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .flags = F_NEEDS_JIT_ENABLED,
+ .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } },
+ .func_info_cnt = 2,
+ BTF_TYPES
+},
+
+#undef HELPER_CALL_INSN
+#undef PSEUDO_CALL_INSN
+#undef CALLBACK_TYPE
+#undef MAIN_TYPE
+#undef BTF_TYPES
diff --git a/tools/testing/selftests/bpf/verifier/bpf_st_mem.c b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
new file mode 100644
index 000000000000..b616575c3b00
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/bpf_st_mem.c
@@ -0,0 +1,99 @@
+{
+ "BPF_ST_MEM stack imm non-zero",
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 42),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, -42),
+ /* if value is tracked correctly R0 is zero */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm zero",
+ .insns = {
+ /* mark stack 0000 0000 */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* read and sum a few bytes */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -8),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -4),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, -1),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
+ /* if value is tracked correctly R0 is zero */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm zero, variable offset",
+ .insns = {
+ /* set fp[-16], fp[-24] to zeros */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -24, 0),
+ /* r0 = random value in range [-32, -15] */
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_0, 16, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 32),
+ /* fp[r0] = 0, make a variable offset write of zero,
+ * this should preserve zero marks on stack.
+ */
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_10),
+ BPF_ST_MEM(BPF_B, BPF_REG_0, 0, 0),
+ /* r0 = fp[-20], if variable offset write was tracked correctly
+ * r0 would be a known zero.
+ */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_10, -20),
+ /* Would fail return code verification if r0 range is not tracked correctly. */
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+{
+ "BPF_ST_MEM stack imm sign",
+ /* Check if verifier correctly reasons about sign of an
+ * immediate spilled to stack by BPF_ST instruction.
+ *
+ * fp[-8] = -44;
+ * r0 = fp[-8];
+ * if r0 s< 0 goto ret0;
+ * r0 = -1;
+ * exit;
+ * ret0:
+ * r0 = 0;
+ * exit;
+ */
+ .insns = {
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, -44),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
+ BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, -1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ /* Use prog type that requires return value in range [0, 1] */
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .result = VERBOSE_ACCEPT,
+ .runs = -1,
+ .errstr = "0: (7a) *(u64 *)(r10 -8) = -44 ; R10=fp0 fp-8_w=-44\
+ 2: (c5) if r0 s< 0x0 goto pc+2\
+ R0_w=-44",
+},
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 94258c6b5235..ab25a81fd3a1 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -1,4 +1,281 @@
{
+ "calls: invalid kfunc call not eliminated",
+ .insns = {
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = REJECT,
+ .errstr = "invalid kernel function call not eliminated in verifier pass",
+},
+{
+ "calls: invalid kfunc call unreachable",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 0, 2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACEPOINT,
+ .result = ACCEPT,
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with non-scalar",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail1 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail1", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with nesting depth > 4",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "max struct nesting depth exceeded\narg#0 pointer type STRUCT prog_test_fail2",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail2", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: ptr_to_mem to struct with FAM",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type STRUCT prog_test_fail3 must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_fail3", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->type != PTR_TO_CTX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R1 must have zero offset when passed to release func or trusted arg to kfunc",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_pass_ctx", 2 },
+ },
+},
+{
+ "calls: invalid kfunc call: void * not allowed in func proto without mem size arg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "arg#0 pointer type UNKNOWN must point to scalar",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_mem_len_fail1", 2 },
+ },
+},
+{
+ "calls: trigger reg2btf_ids[reg->type] for reg->type > __BPF_REG_TYPE_MAX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "Possibly NULL pointer passed to trusted arg0",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_release", 5 },
+ },
+},
+{
+ "calls: invalid kfunc call: reg->off must be zero when passed to release kfunc",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "R1 must have zero offset when passed to release func",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_memb_release", 8 },
+ },
+},
+{
+ "calls: invalid kfunc call: don't match first member type when passed to release kfunc",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = REJECT,
+ .errstr = "kernel function bpf_kfunc_call_memb1_release args#0 expected pointer",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_memb_acquire", 1 },
+ { "bpf_kfunc_call_memb1_release", 5 },
+ },
+},
+{
+ "calls: invalid kfunc call: PTR_TO_BTF_ID with negative offset",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_offset", 9 },
+ { "bpf_kfunc_call_test_release", 12 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "ptr R1 off=-4 disallowed",
+},
+{
+ "calls: invalid kfunc call: PTR_TO_BTF_ID with variable offset",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 3),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_release", 9 },
+ { "bpf_kfunc_call_test_release", 13 },
+ { "bpf_kfunc_call_test_release", 17 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "variable ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+{
+ "calls: invalid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_ref", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = REJECT,
+ .errstr = "R1 must be",
+},
+{
+ "calls: valid kfunc call: referenced arg needs refcounted PTR_TO_BTF_ID",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 3 },
+ { "bpf_kfunc_call_test_ref", 8 },
+ { "bpf_kfunc_call_test_release", 10 },
+ },
+ .result_unpriv = REJECT,
+ .result = ACCEPT,
+},
+{
"calls: basic sanity",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
@@ -11,7 +288,7 @@
.result = ACCEPT,
},
{
- "calls: not on unpriviledged",
+ "calls: not on unprivileged",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
@@ -19,7 +296,7 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 1,
@@ -71,7 +348,7 @@
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
+ .errstr = "R0 invalid mem access 'scalar'",
},
{
"calls: multiple ret types in subprog 2",
@@ -136,7 +413,7 @@
{
"calls: wrong src reg",
.insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 3, 0, 0),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
@@ -165,7 +442,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge from insn 0 to 0",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -374,7 +651,7 @@
BPF_EXIT_INSN(),
},
.result = REJECT,
- .errstr = "R6 invalid mem access 'inv'",
+ .errstr = "R6 invalid mem access 'scalar'",
.prog_type = BPF_PROG_TYPE_XDP,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
@@ -397,7 +674,7 @@
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.fixup_map_hash_48b = { 3 },
.result_unpriv = REJECT,
.result = ACCEPT,
@@ -522,7 +799,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -534,7 +811,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .errstr = "back-edge",
+ .errstr = "the call stack of 9 frames is too deep",
.result = REJECT,
},
{
@@ -647,13 +924,14 @@
.result = REJECT,
},
{
- "calls: ld_abs with changing ctx data in callee",
+ "calls: subprog call with ld_abs in main prog",
.insns = {
BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
BPF_LD_ABS(BPF_B, 0),
BPF_LD_ABS(BPF_H, 0),
BPF_LD_ABS(BPF_W, 0),
BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
BPF_MOV64_REG(BPF_REG_6, BPF_REG_7),
BPF_LD_ABS(BPF_B, 0),
@@ -666,8 +944,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] instructions cannot be mixed",
- .result = REJECT,
+ .result = ACCEPT,
},
{
"calls: two calls with bad fallthrough",
@@ -1228,7 +1505,9 @@
.prog_type = BPF_PROG_TYPE_XDP,
.fixup_map_hash_8b = { 23 },
.result = REJECT,
- .errstr = "invalid read from stack off -16+0 size 8",
+ .errstr = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "invalid read from stack R7 off=-16 size=8",
},
{
"calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1",
@@ -1580,7 +1859,7 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
.fixup_map_hash_8b = { 12, 22 },
.result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
+ .errstr = "R0 invalid mem access 'scalar'",
},
{
"calls: pkt_ptr spill into caller stack",
@@ -1948,19 +2227,22 @@
* that fp-8 stack slot was unused in the fall-through
* branch and will accept the program incorrectly
*/
- BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 2, 2),
+ BPF_EMIT_CALL(BPF_FUNC_get_prandom_u32),
+ BPF_JMP_IMM(BPF_JGT, BPF_REG_0, 2, 2),
BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
BPF_JMP_IMM(BPF_JA, 0, 0, 0),
BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
BPF_LD_MAP_FD(BPF_REG_1, 0),
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
- .fixup_map_hash_48b = { 6 },
- .errstr = "invalid indirect read from stack off -8+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
+ .fixup_map_hash_48b = { 7 },
+ .errstr_unpriv = "invalid indirect read from stack R2 off -8+0 size 8",
+ .result_unpriv = REJECT,
+ /* in privileged mode reads from uninitialized stack locations are permitted */
+ .result = ACCEPT,
},
{
"calls: ctx read at start of subprog",
@@ -1977,7 +2259,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
},
@@ -2003,7 +2285,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
@@ -2028,7 +2310,89 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.errstr = "!read_ok",
.result = REJECT,
},
+/* Make sure that verifier.c:states_equal() considers IDs from all
+ * frames when building 'idmap' for check_ids().
+ */
+{
+ "calls: check_ids() across call boundary",
+ .insns = {
+ /* Function main() */
+ BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+ /* fp[-24] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -24),
+ /* fp[-32] = map_lookup_elem(...) ; get a MAP_VALUE_PTR_OR_NULL with some ID */
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+ BPF_LD_MAP_FD(BPF_REG_1,
+ 0),
+ BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+ BPF_STX_MEM(BPF_DW, BPF_REG_FP, BPF_REG_0, -32),
+ /* call foo(&fp[-24], &fp[-32]) ; both arguments have IDs in the current
+ * ; stack frame
+ */
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -24),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -32),
+ BPF_CALL_REL(2),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ /* Function foo()
+ *
+ * r9 = &frame[0].fp[-24] ; save arguments in the callee saved registers,
+ * r8 = &frame[0].fp[-32] ; arguments are pointers to pointers to map value
+ */
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_1),
+ BPF_MOV64_REG(BPF_REG_8, BPF_REG_2),
+ /* r7 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ /* r6 = ktime_get_ns() */
+ BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
+ BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+ /* if r6 > r7 goto +1 ; no new information about the state is derived from
+ * ; this check, thus produced verifier states differ
+ * ; only in 'insn_idx'
+ * r9 = r8
+ */
+ BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 1),
+ BPF_MOV64_REG(BPF_REG_9, BPF_REG_8),
+ /* r9 = *r9 ; verifier get's to this point via two paths:
+ * ; (I) one including r9 = r8, verified first;
+ * ; (II) one excluding r9 = r8, verified next.
+ * ; After load of *r9 to r9 the frame[0].fp[-24].id == r9.id.
+ * ; Suppose that checkpoint is created here via path (I).
+ * ; When verifying via (II) the r9.id must be compared against
+ * ; frame[0].fp[-24].id, otherwise (I) and (II) would be
+ * ; incorrectly deemed equivalent.
+ * if r9 == 0 goto <exit>
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_9, BPF_REG_9, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1),
+ /* r8 = *r8 ; read map value via r8, this is not safe
+ * r0 = *r8 ; because r8 might be not equal to r9.
+ */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_8, BPF_REG_8, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_8, 0),
+ /* exit 0 */
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .flags = BPF_F_TEST_STATE_FREQ,
+ .fixup_map_hash_8b = { 3, 9 },
+ .result = REJECT,
+ .errstr = "R8 invalid mem access 'map_value_or_null'",
+ .result_unpriv = REJECT,
+ .errstr_unpriv = "",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
diff --git a/tools/testing/selftests/bpf/verifier/cfg.c b/tools/testing/selftests/bpf/verifier/cfg.c
deleted file mode 100644
index 4eb76ed739ce..000000000000
--- a/tools/testing/selftests/bpf/verifier/cfg.c
+++ /dev/null
@@ -1,73 +0,0 @@
-{
- "unreachable",
- .insns = {
- BPF_EXIT_INSN(),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "unreachable2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable",
- .result = REJECT,
-},
-{
- "out of range jump",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "out of range jump2",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -2),
- BPF_EXIT_INSN(),
- },
- .errstr = "jump out of range",
- .result = REJECT,
-},
-{
- "loop (back-edge)",
- .insns = {
- BPF_JMP_IMM(BPF_JA, 0, 0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 1",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "loop2 (back-edge)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- BPF_EXIT_INSN(),
- },
- .errstr = "unreachable insn 4",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
-{
- "conditional loop",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, -3),
- BPF_EXIT_INSN(),
- },
- .errstr = "infinite loop detected",
- .errstr_unpriv = "back-edge",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c b/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
deleted file mode 100644
index 6d65fe3e7321..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_inv_retcode.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "bpf_exit with invalid return code. test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0xffffffff)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x0; 0x3)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has value (0x2; 0x0)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 is not a known value (ctx)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
-{
- "bpf_exit with invalid return code. test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_MUL, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 has unknown scalar value",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_skb.c b/tools/testing/selftests/bpf/verifier/cgroup_skb.c
deleted file mode 100644
index 52e4c03b076b..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_skb.c
+++ /dev/null
@@ -1,197 +0,0 @@
-{
- "direct packet read test#1 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, queue_mapping)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, protocol)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_present)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=76 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#2 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_tci)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, vlan_proto)),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, priority)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, priority)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, ingress_ifindex)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, tc_index)),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, hash)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#3 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, cb[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, cb[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, cb[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, cb[4])),
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_4,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_5,
- offsetof(struct __sk_buff, cb[1])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_6,
- offsetof(struct __sk_buff, cb[2])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_7,
- offsetof(struct __sk_buff, cb[3])),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_8,
- offsetof(struct __sk_buff, cb[4])),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "direct packet read test#4 for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, family)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip4)),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_5, BPF_REG_1,
- offsetof(struct __sk_buff, remote_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, local_ip6[3])),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, remote_port)),
- BPF_LDX_MEM(BPF_W, BPF_REG_8, BPF_REG_1,
- offsetof(struct __sk_buff, local_port)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of tc_classid for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of data_meta for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, data_meta)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid access of flow_keys for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, flow_keys)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid write access to napi_id for CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_1,
- offsetof(struct __sk_buff, napi_id)),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_9,
- offsetof(struct __sk_buff, napi_id)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "write tstamp from CGROUP_SKB",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid bpf_context access off=152 size=8",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "read tstamp from CGROUP_SKB",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tstamp)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/cgroup_storage.c b/tools/testing/selftests/bpf/verifier/cgroup_storage.c
deleted file mode 100644
index 97057c0a1b8a..000000000000
--- a/tools/testing/selftests/bpf/verifier/cgroup_storage.c
+++ /dev/null
@@ -1,220 +0,0 @@
-{
- "valid cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "valid per-cpu cgroup storage access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "cannot pass map_type 1 into func bpf_get_local_storage",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "fd 1 is not pointing to valid bpf_map",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 256),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=256 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, -2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=64 off=-2 size=4",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid per-cpu cgroup storage access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 7),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "invalid per-cpu cgroup storage access 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_percpu_cgroup_storage = { 1 },
- .result = REJECT,
- .errstr = "get_local_storage() doesn't support non-zero flags",
- .errstr_unpriv = "R2 leaks addr into helper function",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
deleted file mode 100644
index 6c214c58e8d4..000000000000
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ /dev/null
@@ -1,60 +0,0 @@
-{
- "constant register |= constant should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack type R1 off=-48 access_size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should keep constant type",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 13),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "constant register |= constant register should not bypass stack boundary checks",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -48),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_4, 24),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack type R1 off=-48 access_size=58",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx.c b/tools/testing/selftests/bpf/verifier/ctx.c
deleted file mode 100644
index 93d6b1641481..000000000000
--- a/tools/testing/selftests/bpf/verifier/ctx.c
+++ /dev/null
@@ -1,198 +0,0 @@
-{
- "context stores via ST",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, offsetof(struct __sk_buff, mark), 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_ST stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "context stores via XADD",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_1,
- BPF_REG_0, offsetof(struct __sk_buff, mark), 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "BPF_XADD stores into R1 ctx is not allowed",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "arithmetic ops make PTR_TO_CTX unusable",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1,
- offsetof(struct __sk_buff, data) -
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .errstr = "dereference of modified ctx ptr",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "pass unmodified ctx pointer to helper",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "pass modified ctx pointer to helper, 1",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass modified ctx pointer to helper, 2",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr_unpriv = "dereference of modified ctx ptr",
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass modified ctx pointer to helper, 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_csum_update),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "variable ctx access var_off=(0x0; 0x4)",
-},
-{
- "pass ctx or null check, 1: ctx",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 2: null",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 3: 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
-},
-{
- "pass ctx or null check, 4: ctx - const",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -612),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_UDP6_SENDMSG,
- .result = REJECT,
- .errstr = "dereference of modified ctx ptr",
-},
-{
- "pass ctx or null check, 5: null (connect)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
- .expected_attach_type = BPF_CGROUP_INET4_CONNECT,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 6: null (bind)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_netns_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 7: ctx (bind)",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = ACCEPT,
-},
-{
- "pass ctx or null check, 8: null (bind)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_get_socket_cookie),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SOCK,
- .expected_attach_type = BPF_CGROUP_INET4_POST_BIND,
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
new file mode 100644
index 000000000000..a2b006e2fd06
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/ctx_sk_lookup.c
@@ -0,0 +1,532 @@
+{
+ "valid 1,2,4,8-byte reads from bpf_sk_lookup",
+ .insns = {
+ /* 1-byte read from family field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 3),
+ /* 2-byte read from family field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family) + 2),
+ /* 4-byte read from family field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+
+ /* 1-byte read from protocol field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 3),
+ /* 2-byte read from protocol field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol) + 2),
+ /* 4-byte read from protocol field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+
+ /* 1-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 3),
+ /* 2-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4) + 2),
+ /* 4-byte read from remote_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+
+ /* 1-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 15),
+ /* 2-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 14),
+ /* 4-byte read from remote_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6) + 12),
+
+ /* 1-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 3),
+ /* 2-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port) + 2),
+ /* 4-byte read from remote_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+
+ /* 1-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 3),
+ /* 2-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4) + 2),
+ /* 4-byte read from local_ip4 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+
+ /* 1-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 3),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 5),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 7),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 9),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 11),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 13),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 15),
+ /* 2-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 2),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 6),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 10),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 14),
+ /* 4-byte read from local_ip6 field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 4),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 8),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6) + 12),
+
+ /* 1-byte read from local_port field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 3),
+ /* 2-byte read from local_port field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port) + 2),
+ /* 4-byte read from local_port field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+
+ /* 1-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 1),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 3),
+ /* 2-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex) + 2),
+ /* 4-byte read from ingress_ifindex field */
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+
+ /* 8-byte read from sk field */
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .runs = -1,
+},
+/* invalid 8-byte reads from a 4-byte fields in bpf_sk_lookup */
+{
+ "invalid 8-byte read from bpf_sk_lookup family field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, family)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup protocol field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, protocol)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup remote_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, remote_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip4 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip4)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_ip6 field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_ip6)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup local_port field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, local_port)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 8-byte read from bpf_sk_lookup ingress_ifindex field",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, ingress_ifindex)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+/* invalid 1,2,4-byte reads from 8-byte fields in bpf_sk_lookup */
+{
+ "invalid 4-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte read from bpf_sk_lookup sk field",
+ .insns = {
+ BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
+ offsetof(struct bpf_sk_lookup, sk)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+/* out of bounds and unaligned reads from bpf_sk_lookup */
+{
+ "invalid 4-byte read past end of bpf_sk_lookup",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at odd offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 1),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "invalid 4-byte unaligned read from bpf_sk_lookup at even offset",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 2),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+/* in-bound and out-of-bound writes to bpf_sk_lookup */
+{
+ "invalid 8-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 2-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_H, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 1-byte write to bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
+{
+ "invalid 4-byte write past end of bpf_sk_lookup",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0xcafe4a11U),
+ BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_0,
+ sizeof(struct bpf_sk_lookup)),
+ BPF_MOV32_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .errstr = "invalid bpf_context access",
+ .result = REJECT,
+ .prog_type = BPF_PROG_TYPE_SK_LOOKUP,
+ .expected_attach_type = BPF_SK_LOOKUP,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c b/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
deleted file mode 100644
index c6c69220a569..000000000000
--- a/tools/testing/selftests/bpf/verifier/ctx_sk_msg.c
+++ /dev/null
@@ -1,181 +0,0 @@
-{
- "valid access family in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, family)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_ip4 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip4)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access local_port in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_port)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "valid access remote_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, remote_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access local_ip6 in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[0])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[1])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[2])),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, local_ip6[3])),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_SKB,
-},
-{
- "valid access size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid 64B read of size in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size)),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "invalid read past end of SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, size) + 4),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "invalid read offset in SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, family) + 1),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet read for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "direct packet write for SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
-{
- "overlapping checks for direct packet access SK_MSG",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1,
- offsetof(struct sk_msg_md, data)),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1,
- offsetof(struct sk_msg_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SK_MSG,
-},
diff --git a/tools/testing/selftests/bpf/verifier/ctx_skb.c b/tools/testing/selftests/bpf/verifier/ctx_skb.c
index 2e16b8e268f2..0b394a7f7a2d 100644
--- a/tools/testing/selftests/bpf/verifier/ctx_skb.c
+++ b/tools/testing/selftests/bpf/verifier/ctx_skb.c
@@ -502,7 +502,7 @@
"check skb->hash byte load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash)),
#else
@@ -537,7 +537,7 @@
"check skb->hash byte load permitted 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 3),
#else
@@ -646,7 +646,7 @@
"check skb->hash half load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash)),
#else
@@ -661,7 +661,7 @@
"check skb->hash half load permitted 2",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 2),
#else
@@ -676,7 +676,7 @@
"check skb->hash half load not permitted, unaligned 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 1),
#else
@@ -693,7 +693,7 @@
"check skb->hash half load not permitted, unaligned 3",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, hash) + 3),
#else
@@ -951,7 +951,7 @@
"check skb->data half load not permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct __sk_buff, data)),
#else
@@ -1058,6 +1058,66 @@
.prog_type = BPF_PROG_TYPE_SCHED_CLS,
},
{
+ "padding after gso_size is not accessible",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
+ offsetofend(struct __sk_buff, gso_size)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=180 size=4",
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
+ "read hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "write hwtstamp from CGROUP_SKB",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = REJECT,
+ .result_unpriv = REJECT,
+ .errstr = "invalid bpf_context access off=184 size=8",
+ .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
+},
+{
+ "read hwtstamp from CLS",
+ .insns = {
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1,
+ offsetof(struct __sk_buff, hwtstamp)),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+},
+{
"check wire_len is not readable by sockets",
.insns = {
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
@@ -1089,3 +1149,47 @@
.errstr_unpriv = "R1 leaks addr",
.result = REJECT,
},
+{
+ "pkt > pkt_end taken check",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end)
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data)
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42
+ BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1), // 8. if r3 > r2 goto 10
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9)
+ BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0
+ BPF_EXIT_INSN(), // 11. exit
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "pkt_end < pkt taken check",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, // 0. r2 = *(u32 *)(r1 + data_end)
+ offsetof(struct __sk_buff, data_end)),
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, // 1. r4 = *(u32 *)(r1 + data)
+ offsetof(struct __sk_buff, data)),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_4), // 2. r3 = r4
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 42), // 3. r3 += 42
+ BPF_MOV64_IMM(BPF_REG_1, 0), // 4. r1 = 0
+ BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 2), // 5. if r3 > r2 goto 8
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 14), // 6. r4 += 14
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_4), // 7. r1 = r4
+ BPF_JMP_REG(BPF_JLT, BPF_REG_2, BPF_REG_3, 1), // 8. if r2 < r3 goto 10
+ BPF_LDX_MEM(BPF_H, BPF_REG_2, BPF_REG_1, 9), // 9. r2 = *(u8 *)(r1 + 9)
+ BPF_MOV64_IMM(BPF_REG_0, 0), // 10. r0 = 0
+ BPF_EXIT_INSN(), // 11. exit
+ },
+ .result = ACCEPT,
+ .prog_type = BPF_PROG_TYPE_SK_SKB,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 5cf361d8eb1c..ee454327e5c6 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -8,6 +8,8 @@
BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 10, -4),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
},
@@ -85,7 +87,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -103,7 +105,7 @@
BPF_MOV64_IMM(BPF_REG_0, 12),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -121,7 +123,7 @@
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 7,
@@ -137,7 +139,7 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
@@ -152,8 +154,20 @@
BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
BPF_EXIT_INSN(),
},
- .errstr_unpriv = "function calls to other bpf functions are allowed for",
+ .errstr_unpriv = "loading/calling other bpf or kernel functions are allowed for",
.result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
},
+{
+ "dead code: zero extension",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_0, 0, 1),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -4),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 0,
+},
diff --git a/tools/testing/selftests/bpf/verifier/direct_packet_access.c b/tools/testing/selftests/bpf/verifier/direct_packet_access.c
deleted file mode 100644
index 2c5fbe7bcd27..000000000000
--- a/tools/testing/selftests/bpf/verifier/direct_packet_access.c
+++ /dev/null
@@ -1,656 +0,0 @@
-{
- "pkt_end - pkt_start is allowed",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = TEST_DATA_LEN,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 15),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 7),
- BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_3, 12),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 14),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_4),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 49),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_2, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_3, 4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid bpf_context access off=76",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-},
-{
- "direct packet access: test4 (write)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test5 (pkt_end >= reg, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test6 (pkt_end >= reg, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test7 (pkt_end >= reg, both accesses)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test8 (double test, variant 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test9 (double test, variant 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test10 (write invalid)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid access to packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test11 (shift, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_3, 144),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 3),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test12 (and, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_3, 144),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test13 (branches, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 13),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_4, 2),
- BPF_MOV64_IMM(BPF_REG_3, 14),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_MOV64_IMM(BPF_REG_3, 24),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 23),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_5, 15),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test14 (pkt_ptr += 0, CONST_IMM, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 22),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
- BPF_MOV64_IMM(BPF_REG_5, 12),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_5, 4),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test15 (spill with xadd)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 8),
- BPF_MOV64_IMM(BPF_REG_5, 4096),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_2, 0),
- BPF_STX_XADD(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_4, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_2, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 invalid mem access 'inv'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test16 (arith on data_end)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test17 (pruning, alignment)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 14),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_7, 1, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_A(-6),
- },
- .errstr = "misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
-{
- "direct packet access: test18 (imm += pkt_ptr, 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 8),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test19 (imm += pkt_ptr, 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 3),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_STX_MEM(BPF_B, BPF_REG_4, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test20 (x += pkt_ptr, 1)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0x7fff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test21 (x += pkt_ptr, 2)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 0x7fff),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test22 (x += pkt_ptr, 3)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_3, -16),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_10, -16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_4, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_STX_MEM(BPF_H, BPF_REG_4, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test23 (x += pkt_ptr, 4)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0xffff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid access to packet, off=0 size=8, R5(id=1,off=0,r=0)",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test24 (x += pkt_ptr, 5)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_IMM(BPF_REG_0, 0xffffffff),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 64),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 0x7fff - 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "direct packet access: test25 (marking on <, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test26 (marking on <, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_0, BPF_REG_3, 3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JA, 0, 0, -3),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test27 (marking on <=, good access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 1,
-},
-{
- "direct packet access: test28 (marking on <=, bad access)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, -4),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "direct packet access: test29 (reg > pkt_end in subprog)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c b/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
deleted file mode 100644
index 698e3779fdd2..000000000000
--- a/tools/testing/selftests/bpf/verifier/direct_stack_access_wraparound.c
+++ /dev/null
@@ -1,40 +0,0 @@
-{
- "direct stack access with 32-bit wraparound. test1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x7fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 2147483647",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x3fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer and 1073741823",
- .result = REJECT
-},
-{
- "direct stack access with 32-bit wraparound. test3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0x1fffffff),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "fp pointer offset 1073741822",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = REJECT
-},
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index 988f46a1a4c7..c0648dc009b5 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -69,6 +69,7 @@
.fixup_map_array_48b = { 1 },
.result = REJECT,
.errstr = "R1 min value is outside of the allowed memory range",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"direct map access, write test 7",
@@ -195,6 +196,7 @@
.fixup_map_array_48b = { 1, 3 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"direct map access, write test 17",
@@ -209,6 +211,7 @@
.fixup_map_array_48b = { 1, 3 },
.result = REJECT,
.errstr = "invalid access to map value, value_size=48 off=47 size=2",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"direct map access, write test 18",
diff --git a/tools/testing/selftests/bpf/verifier/div0.c b/tools/testing/selftests/bpf/verifier/div0.c
deleted file mode 100644
index 7685edfbcf71..000000000000
--- a/tools/testing/selftests/bpf/verifier/div0.c
+++ /dev/null
@@ -1,184 +0,0 @@
-{
- "DIV32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD32 by 0, zero check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "MOD64 by 0, zero check",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "DIV32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 2),
- BPF_MOV32_IMM(BPF_REG_2, 16),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 8,
-},
-{
- "DIV32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 by 0, zero check, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 by 0, zero check ok, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, 42),
- BPF_MOV32_IMM(BPF_REG_1, 3),
- BPF_MOV32_IMM(BPF_REG_2, 5),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD32 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD32 by 0, zero check 2, cls",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, 0xffffffff00000000LL),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 by 0, zero check 1, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, 2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "MOD64 by 0, zero check 2, cls",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_0, -1),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = -1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/div_overflow.c b/tools/testing/selftests/bpf/verifier/div_overflow.c
deleted file mode 100644
index acab4f00819f..000000000000
--- a/tools/testing/selftests/bpf/verifier/div_overflow.c
+++ /dev/null
@@ -1,110 +0,0 @@
-/* Just make sure that JITs used udiv/umod as otherwise we get
- * an exception from INT_MIN/-1 overflow similarly as with div
- * by zero.
- */
-{
- "DIV32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_ALU64_REG(BPF_DIV, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "DIV64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_1, LLONG_MIN),
- BPF_ALU64_IMM(BPF_DIV, BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "MOD32 overflow, check 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD32 overflow, check 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_0, INT_MIN),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = INT_MIN,
-},
-{
- "MOD64 overflow, check 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "MOD64 overflow, check 2",
- .insns = {
- BPF_LD_IMM64(BPF_REG_2, LLONG_MIN),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_ALU64_IMM(BPF_MOD, BPF_REG_2, -1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_JMP_REG(BPF_JNE, BPF_REG_3, BPF_REG_2, 1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
index 99f8f582c02b..c5e805980409 100644
--- a/tools/testing/selftests/bpf/verifier/event_output.c
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -112,6 +112,7 @@
"perfevent for cgroup sockopt",
.insns = { __PERF_EVENT_INSNS__ },
.prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+ .expected_attach_type = BPF_CGROUP_SETSOCKOPT,
.fixup_map_event_output = { 4 },
.result = ACCEPT,
.retval = 1,
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
deleted file mode 100644
index 87c4e7900083..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ /dev/null
@@ -1,616 +0,0 @@
-{
- "helper access to variable memory: stack, bitwise AND + JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, bitwise AND, zero included",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, bitwise AND + JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 65),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack type R1 off=-64 access_size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_MOV64_IMM(BPF_REG_2, 16),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, bounds + offset",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 5),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack type R1 off=-64 access_size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 65, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid stack type R1 off=-64 access_size=65",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no max check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- /* because max wasn't checked, signed min is negative */
- .errstr = "R2 min value is negative, either use unsigned or 'var &= const'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP, no min check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack off -64+0 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: stack, JMP (signed), no min check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val), 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 10),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) + 1, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=49",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, correct bounds",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 20, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: map adjusted, JMP, wrong max",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 20),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_6),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, sizeof(struct test_val) - 19, 4),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 4 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL packet pointer (ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_csum_diff),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 0 /* csum_diff of 64-byte packet */,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "helper access to variable memory: size = 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size > 0 not allowed on NULL (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 type=inv expected=fp",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL stack pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: size possible = 0 allowed on != NULL map pointer (!ARG_PTR_TO_MEM_OR_NULL)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: 8 bytes leak",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -128),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -128),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack off -64+32 size 64",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to variable memory: 8 bytes no leak (init memory)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -64),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -56),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -48),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -40),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -32),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -64),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_packet_access.c b/tools/testing/selftests/bpf/verifier/helper_packet_access.c
deleted file mode 100644
index ae54587e9829..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_packet_access.c
+++ /dev/null
@@ -1,460 +0,0 @@
-{
- "helper access to packet: test1, valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test2, unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test3, variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test4, packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test5, packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "helper access to packet: test6, cls valid packet_ptr range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 5),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test7, cls unchecked packet_ptr",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test8, cls variable add",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 10),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test9, cls packet_ptr with bad range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 7 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test10, cls packet_ptr with too short range",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test11, cls unsuitable helper 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_7, 4),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 42),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_store_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test12, cls unsuitable helper 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_7, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_4, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "helper access to the packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test13, cls helper ok",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test14, cls helper ok sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 4),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test15, cls helper fail sub",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 12),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test16, cls helper fail range 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test17, cls helper fail range 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, -9),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test18, cls helper fail range 3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, ~0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R2 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test19, cls helper range zero",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test20, pkt end as input",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=pkt_end expected=fp",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "helper access to packet: test21, wrong reg",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 7),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_7, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_csum_diff),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
deleted file mode 100644
index 1c7882ddfa63..000000000000
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ /dev/null
@@ -1,953 +0,0 @@
-{
- "helper access to map: full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=0 size=56",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: negative range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=0",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const imm): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): out-of-bound range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=52",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (> adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via const reg): negative range (< adjustment)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct test_val, foo)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R2 min value is negative",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): full range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) - offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): partial range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): empty range",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_trace_printk),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 min value is outside of the allowed memory range",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R1 unbounded memory access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct test_val, foo), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_MOV64_IMM(BPF_REG_2,
- sizeof(struct test_val) -
- offsetof(struct test_val, foo) + 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "invalid access to map value, value_size=48 off=4 size=45",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using <=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 32, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 unbounded memory access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, good access 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "helper access to map: bounds check using s<=, bad access",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 32, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, -3, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 min value is negative",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map lookup helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 8 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map update helper access to map: wrong size",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_update_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .fixup_map_hash_16b = { 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=8 off=0 size=16",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, offsetof(struct other_val, bar)),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, sizeof(struct other_val) - 4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const imm): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 9 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, offsetof(struct other_val, bar)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct other_val) - 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=12 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via const reg): out-of-bound 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, -4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=-4 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar), 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): no max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 10 },
- .result = REJECT,
- .errstr = "R2 unbounded memory access, make sure to bounds check any such access",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "map helper access to adjusted map (via variable): wrong max check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_3, offsetof(struct other_val, bar) + 1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 3, 11 },
- .result = REJECT,
- .errstr = "invalid access to map value, value_size=16 off=9 size=8",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/int_ptr.c b/tools/testing/selftests/bpf/verifier/int_ptr.c
deleted file mode 100644
index ca3b4729df66..000000000000
--- a/tools/testing/selftests/bpf/verifier/int_ptr.c
+++ /dev/null
@@ -1,160 +0,0 @@
-{
- "ARG_PTR_TO_LONG uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack off -16+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG half-uninitialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid indirect read from stack off -16+4 size 8",
-},
-{
- "ARG_PTR_TO_LONG misaligned",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -12),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "misaligned stack access off (0x0; 0x0)+-20+0 size 8",
-},
-{
- "ARG_PTR_TO_LONG size < sizeof(long)",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -16),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 12),
- BPF_STX_MEM(BPF_W, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
- .errstr = "invalid stack type R4 off=-4 access_size=8",
-},
-{
- "ARG_PTR_TO_LONG initialized",
- .insns = {
- /* bpf_strtoul arg1 (buf) */
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0x00303036),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
-
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
-
- /* bpf_strtoul arg2 (buf_len) */
- BPF_MOV64_IMM(BPF_REG_2, 4),
-
- /* bpf_strtoul arg3 (flags) */
- BPF_MOV64_IMM(BPF_REG_3, 0),
-
- /* bpf_strtoul arg4 (res) */
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_7),
-
- /* bpf_strtoul() */
- BPF_EMIT_CALL(BPF_FUNC_strtoul),
-
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
-},
diff --git a/tools/testing/selftests/bpf/verifier/jit.c b/tools/testing/selftests/bpf/verifier/jit.c
index c33adf344fae..8bf37e5207f1 100644
--- a/tools/testing/selftests/bpf/verifier/jit.c
+++ b/tools/testing/selftests/bpf/verifier/jit.c
@@ -21,6 +21,30 @@
.retval = 2,
},
{
+ "jit: lsh, rsh, arsh by reg",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_4, 1),
+ BPF_MOV64_IMM(BPF_REG_1, 0xff),
+ BPF_ALU64_REG(BPF_LSH, BPF_REG_1, BPF_REG_0),
+ BPF_ALU32_REG(BPF_LSH, BPF_REG_1, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x3fc, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_RSH, BPF_REG_1, BPF_REG_4),
+ BPF_MOV64_REG(BPF_REG_4, BPF_REG_1),
+ BPF_ALU32_REG(BPF_RSH, BPF_REG_4, BPF_REG_0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0xff, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ARSH, BPF_REG_4, BPF_REG_4),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
"jit: mov32 for ldimm64, 1",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 2),
@@ -62,6 +86,11 @@
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
BPF_MOV32_REG(BPF_REG_2, BPF_REG_2),
BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
@@ -73,11 +102,69 @@
BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xfefefeULL),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_3, 0xefefef),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_0, 0xfefefeULL),
+ BPF_LD_IMM64(BPF_REG_2, 0x2ad4d4aaULL),
+ BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 0x2b),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
BPF_LD_IMM64(BPF_REG_0, 0x952a7bbcULL),
BPF_LD_IMM64(BPF_REG_1, 0xfefefeULL),
- BPF_LD_IMM64(BPF_REG_2, 0xeeff0d413122ULL),
- BPF_ALU32_REG(BPF_MUL, BPF_REG_2, BPF_REG_1),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_0, 2),
+ BPF_LD_IMM64(BPF_REG_5, 0xeeff0d413122ULL),
+ BPF_ALU32_REG(BPF_MUL, BPF_REG_5, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_5, BPF_REG_0, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ },
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
+ "jit: various div tests",
+ .insns = {
+ BPF_LD_IMM64(BPF_REG_2, 0xefeffeULL),
+ BPF_LD_IMM64(BPF_REG_0, 0xeeff0d413122ULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_ALU64_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_3, 0xeeff0d413122ULL),
+ BPF_ALU64_IMM(BPF_DIV, BPF_REG_3, 0xfefeeeULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_3, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0xaa93ULL),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_1, 0xbeefULL),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0xbeefULL),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LD_IMM64(BPF_REG_2, 0x5ee1dULL),
+ BPF_LD_IMM64(BPF_REG_1, 0xfefeeeULL),
+ BPF_LD_IMM64(BPF_REG_3, 0x2bULL),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_3),
+ BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_2, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU32_REG(BPF_DIV, BPF_REG_1, BPF_REG_1),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 1, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_MOD, BPF_REG_2, BPF_REG_2),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 2),
BPF_MOV64_IMM(BPF_REG_0, 1),
BPF_EXIT_INSN(),
BPF_MOV64_IMM(BPF_REG_0, 2),
@@ -105,3 +192,27 @@
.result = ACCEPT,
.retval = 2,
},
+{
+ "jit: torturous jumps, imm8 nop jmp and pure jump padding",
+ .insns = { },
+ .fill_helper = bpf_fill_torturous_jumps,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
+{
+ "jit: torturous jumps, imm32 nop jmp and jmp_cond padding",
+ .insns = { },
+ .fill_helper = bpf_fill_torturous_jumps,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
+{
+ "jit: torturous jumps in subprog",
+ .insns = { },
+ .fill_helper = bpf_fill_torturous_jumps,
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 3,
+},
diff --git a/tools/testing/selftests/bpf/verifier/jmp32.c b/tools/testing/selftests/bpf/verifier/jmp32.c
index bd5cae4a7f73..43776f6f92f4 100644
--- a/tools/testing/selftests/bpf/verifier/jmp32.c
+++ b/tools/testing/selftests/bpf/verifier/jmp32.c
@@ -87,6 +87,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -150,6 +152,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -213,6 +217,8 @@
BPF_LDX_MEM(BPF_B, BPF_REG_8, BPF_REG_9, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -280,8 +286,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: BPF_K",
@@ -348,8 +357,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jle32: BPF_K",
@@ -416,8 +428,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jlt32: BPF_K",
@@ -484,8 +499,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsge32: BPF_K",
@@ -552,8 +570,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsgt32: BPF_K",
@@ -620,8 +641,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jsle32: BPF_K",
@@ -688,8 +712,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jslt32: BPF_K",
@@ -756,8 +783,11 @@
BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
BPF_EXIT_INSN(),
},
+ .errstr_unpriv = "R0 invalid mem access 'scalar'",
+ .result_unpriv = REJECT,
.result = ACCEPT,
.retval = 2,
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
{
"jgt32: range bound deduction, reg op imm",
@@ -842,3 +872,24 @@
.result = ACCEPT,
.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
},
+{
+ "jeq32/jne32: bounds checking",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_6, 563),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
+ BPF_ALU32_REG(BPF_OR, BPF_REG_2, BPF_REG_6),
+ BPF_JMP32_IMM(BPF_JNE, BPF_REG_2, 8, 5),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_2, 500, 2),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/jset.c b/tools/testing/selftests/bpf/verifier/jset.c
index 8dcd4e0383d5..11fc68da735e 100644
--- a/tools/testing/selftests/bpf/verifier/jset.c
+++ b/tools/testing/selftests/bpf/verifier/jset.c
@@ -82,8 +82,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .retval_unpriv = 1,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.retval = 1,
.result = ACCEPT,
},
@@ -141,7 +141,8 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
{
@@ -162,6 +163,7 @@
BPF_EXIT_INSN(),
},
.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
- .result_unpriv = ACCEPT,
+ .errstr_unpriv = "R9 !read_ok",
+ .result_unpriv = REJECT,
.result = ACCEPT,
},
diff --git a/tools/testing/selftests/bpf/verifier/jump.c b/tools/testing/selftests/bpf/verifier/jump.c
index 6f951d1ff0a4..497fe17d2eaf 100644
--- a/tools/testing/selftests/bpf/verifier/jump.c
+++ b/tools/testing/selftests/bpf/verifier/jump.c
@@ -373,3 +373,25 @@
.result = ACCEPT,
.retval = 3,
},
+{
+ "jump & dead code elimination",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_NEG, BPF_REG_3, 0),
+ BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 32767),
+ BPF_JMP_IMM(BPF_JSGE, BPF_REG_3, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JSLE, BPF_REG_3, 0x8000, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -32767),
+ BPF_MOV64_IMM(BPF_REG_0, 2),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_3, 0, 1),
+ BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .result = ACCEPT,
+ .retval = 2,
+},
diff --git a/tools/testing/selftests/bpf/verifier/ld_imm64.c b/tools/testing/selftests/bpf/verifier/ld_imm64.c
index 3856dba733e9..78f19c255f20 100644
--- a/tools/testing/selftests/bpf/verifier/ld_imm64.c
+++ b/tools/testing/selftests/bpf/verifier/ld_imm64.c
@@ -9,8 +9,8 @@
BPF_MOV64_IMM(BPF_REG_0, 2),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
@@ -23,8 +23,8 @@
BPF_LD_IMM64(BPF_REG_0, 1),
BPF_EXIT_INSN(),
},
- .errstr = "invalid BPF_LD_IMM insn",
- .errstr_unpriv = "R1 pointer comparison",
+ .errstr = "jump into the middle of ldimm64 insn 1",
+ .errstr_unpriv = "jump into the middle of ldimm64 insn 1",
.result = REJECT,
},
{
@@ -51,14 +51,6 @@
.result = REJECT,
},
{
- "test5 ld_imm64",
- .insns = {
- BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
- },
- .errstr = "invalid bpf_ld_imm64 insn",
- .result = REJECT,
-},
-{
"test6 ld_imm64",
.insns = {
BPF_RAW_INSN(BPF_LD | BPF_IMM | BPF_DW, 0, 0, 0, 0),
diff --git a/tools/testing/selftests/bpf/verifier/ld_ind.c b/tools/testing/selftests/bpf/verifier/ld_ind.c
deleted file mode 100644
index 079734227538..000000000000
--- a/tools/testing/selftests/bpf/verifier/ld_ind.c
+++ /dev/null
@@ -1,72 +0,0 @@
-{
- "ld_ind: check calling conv, r1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_LD_IND(BPF_W, BPF_REG_1, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_2, 1),
- BPF_LD_IND(BPF_W, BPF_REG_2, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_LD_IND(BPF_W, BPF_REG_3, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_LD_IND(BPF_W, BPF_REG_4, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_5, 1),
- BPF_LD_IND(BPF_W, BPF_REG_5, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_5),
- BPF_EXIT_INSN(),
- },
- .errstr = "R5 !read_ok",
- .result = REJECT,
-},
-{
- "ld_ind: check calling conv, r7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/leak_ptr.c b/tools/testing/selftests/bpf/verifier/leak_ptr.c
deleted file mode 100644
index d6eec17f2cd2..000000000000
--- a/tools/testing/selftests/bpf/verifier/leak_ptr.c
+++ /dev/null
@@ -1,67 +0,0 @@
-{
- "leak pointer into ctx 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 2 },
- .errstr_unpriv = "R2 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_XADD stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_STX_XADD(BPF_DW, BPF_REG_1, BPF_REG_10,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = REJECT,
- .errstr = "BPF_XADD stores into R1 ctx is not allowed",
-},
-{
- "leak pointer into ctx 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_2,
- offsetof(struct __sk_buff, cb[0])),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .errstr_unpriv = "R2 leaks addr into ctx",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "leak pointer into map val",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
- BPF_STX_XADD(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr_unpriv = "R6 leaks addr into mem",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/loops1.c b/tools/testing/selftests/bpf/verifier/loops1.c
deleted file mode 100644
index 1af37187dc12..000000000000
--- a/tools/testing/selftests/bpf/verifier/loops1.c
+++ /dev/null
@@ -1,206 +0,0 @@
-{
- "bounded loop, count to 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop, count to 20",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 20, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, count from positive unknown to 4",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop, count from totally unknown to 4",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, count to 4 with equality",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded loop, start in the middle",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_A(1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "back-edge",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop containing a forward jump",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
- .retval = 4,
-},
-{
- "bounded loop that jumps out rather than in",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 1),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_6, 10000, 2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_JMP_A(-4),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop after a conditional jump",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 5),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_JMP_A(-2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "program is too large",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "bounded recursion",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_1, 4, 1),
- BPF_EXIT_INSN(),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "back-edge",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop in two jumps",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_A(0),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "loop detected",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "infinite loop: three-jump trick",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 1),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_0, 2, -11),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "loop detected",
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "not-taken loop with back jump to 1st insn",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 123),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 4, -2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 123,
-},
-{
- "taken loop with back jump to 1st insn",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, -3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 55,
-},
-{
- "taken loop with back jump to 1st insn, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 1),
- BPF_EXIT_INSN(),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
- BPF_JMP32_IMM(BPF_JNE, BPF_REG_1, 0, -3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 55,
-},
diff --git a/tools/testing/selftests/bpf/verifier/lwt.c b/tools/testing/selftests/bpf/verifier/lwt.c
deleted file mode 100644
index 2cab6a3966bb..000000000000
--- a/tools/testing/selftests/bpf/verifier/lwt.c
+++ /dev/null
@@ -1,189 +0,0 @@
-{
- "invalid direct packet write for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "cannot write into packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "invalid direct packet write for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "cannot write into packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_OUT,
-},
-{
- "direct packet write for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_STX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "direct packet read for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "direct packet read for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_OUT,
-},
-{
- "direct packet read for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "overlapping checks for direct packet access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 6),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "make headroom for LWT_XMIT",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_2, 34),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
- /* split for s390 to succeed */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_2, 42),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_EMIT_CALL(BPF_FUNC_skb_change_head),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_XMIT,
-},
-{
- "invalid access of tc_classid for LWT_IN",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "invalid access of tc_classid for LWT_OUT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "invalid access of tc_classid for LWT_XMIT",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
-},
-{
- "check skb->tc_classid half load not permitted for lwt prog",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid)),
-#else
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, tc_classid) + 2),
-#endif
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid bpf_context access",
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_in_map.c b/tools/testing/selftests/bpf/verifier/map_in_map.c
deleted file mode 100644
index 2798927ee9ff..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_in_map.c
+++ /dev/null
@@ -1,62 +0,0 @@
-{
- "map in map access",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .result = ACCEPT,
-},
-{
- "invalid inner map pointer",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .errstr = "R1 pointer arithmetic on map_ptr prohibited",
- .result = REJECT,
-},
-{
- "forgot null checking on the inner map pointer",
- .insns = {
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(0, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_in_map = { 3 },
- .errstr = "R1 type=map_value_or_null expected=map_ptr",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_kptr.c b/tools/testing/selftests/bpf/verifier/map_kptr.c
new file mode 100644
index 000000000000..d25c3e9605f1
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/map_kptr.c
@@ -0,0 +1,444 @@
+/* Common tests */
+{
+ "map_kptr: BPF_ST imm != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "BPF_ST imm must be 0 when storing to kptr at off=0",
+},
+{
+ "map_kptr: size != bpf_size_to_bytes(BPF_DW)",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_W, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access size must be BPF_DW",
+},
+{
+ "map_kptr: map_value non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access cannot have variable offset",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "map_kptr: bpf_kptr_xchg non-const var_off",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_3, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_2),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_3),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 doesn't have constant offset. kptr has to be at the constant offset",
+},
+{
+ "map_kptr: unaligned boundary load/store",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 7),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr access misaligned expected=0 off=7",
+ .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+},
+{
+ "map_kptr: reject var_off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
+ BPF_JMP_IMM(BPF_JLE, BPF_REG_2, 4, 1),
+ BPF_EXIT_INSN(),
+ BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "variable untrusted_ptr_ access var_off=(0x0; 0x7) disallowed",
+},
+/* Tests for unreferened PTR_TO_BTF_ID */
+{
+ "map_kptr: unref: reject btf_struct_ids_match == false",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R1 type=untrusted_ptr_prog_test_ref_kfunc expected=ptr_prog_test",
+},
+{
+ "map_kptr: unref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R0 invalid mem access 'untrusted_ptr_or_null_'",
+},
+{
+ "map_kptr: unref: correct in kernel type size",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 32),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "access beyond struct prog_test_ref_kfunc at off 32 size 8",
+},
+{
+ "map_kptr: unref: inherit PTR_UNTRUSTED on struct walk",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 16),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=untrusted_ptr_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: unref: no reference state created",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = ACCEPT,
+},
+{
+ "map_kptr: unref: bpf_kptr_xchg rejected",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "off=0 kptr isn't referenced kptr",
+},
+/* Tests for referenced PTR_TO_BTF_ID */
+{
+ "map_kptr: ref: loaded pointer marked as untrusted",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_IMM(BPF_REG_1, 0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 8),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_this_cpu_ptr),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "R1 type=rcu_ptr_or_null_ expected=percpu_ptr_",
+},
+{
+ "map_kptr: ref: reject off != 0",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_MOV64_IMM(BPF_REG_2, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "invalid kptr access, R2 type=ptr_prog_test_ref_kfunc expected=ptr_prog_test_member",
+},
+{
+ "map_kptr: ref: reference state created and released on xchg",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
+ BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
+ BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, BPF_PSEUDO_KFUNC_CALL, 0, 0),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_kptr_xchg),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "Unreleased reference id=5 alloc_insn=20",
+ .fixup_kfunc_btf_id = {
+ { "bpf_kfunc_call_test_acquire", 15 },
+ }
+},
+{
+ "map_kptr: ref: reject STX",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: ref: reject ST",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ST_MEM(BPF_DW, BPF_REG_0, 8, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "store to referenced kptr disallowed",
+},
+{
+ "map_kptr: reject helper access to kptr",
+ .insns = {
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+ BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_SCHED_CLS,
+ .fixup_map_kptr = { 1 },
+ .result = REJECT,
+ .errstr = "kptr cannot be accessed indirectly by helper",
+},
diff --git a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c b/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
deleted file mode 100644
index cd26ee6b7b1d..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ptr_mixing.c
+++ /dev/null
@@ -1,100 +0,0 @@
-{
- "calls: two calls returning different map pointers for lookup (hash, array)",
- .insns = {
- /* main prog */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_CALL_REL(11),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_CALL_REL(12),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- /* subprog 1 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- /* subprog 2 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_hash_48b = { 13 },
- .fixup_map_array_48b = { 16 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "calls: two calls returning different map pointers for lookup (hash, map in map)",
- .insns = {
- /* main prog */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_CALL_REL(11),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_CALL_REL(12),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- /* subprog 1 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- /* subprog 2 */
- BPF_LD_MAP_FD(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .fixup_map_in_map = { 16 },
- .fixup_map_array_48b = { 13 },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'map_ptr'",
-},
-{
- "cond: two branches returning different map pointers for lookup (tail, tail)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 2 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "cond: two branches returning same map pointers for lookup (tail, tail)",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_1,
- offsetof(struct __sk_buff, mark)),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog2 = { 2, 5 },
- .result_unpriv = ACCEPT,
- .result = ACCEPT,
- .retval = 42,
-},
diff --git a/tools/testing/selftests/bpf/verifier/map_ret_val.c b/tools/testing/selftests/bpf/verifier/map_ret_val.c
deleted file mode 100644
index bdd0e8d18333..000000000000
--- a/tools/testing/selftests/bpf/verifier/map_ret_val.c
+++ /dev/null
@@ -1,65 +0,0 @@
-{
- "invalid map_fd for function call",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_delete_elem),
- BPF_EXIT_INSN(),
- },
- .errstr = "fd 0 is not pointing to valid bpf_map",
- .result = REJECT,
-},
-{
- "don't check return value before access",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access 'map_value_or_null'",
- .result = REJECT,
-},
-{
- "access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "misaligned value access",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
-{
- "sometimes access memory with incorrect alignment",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R0 invalid mem access",
- .errstr_unpriv = "R0 leaks addr",
- .result = REJECT,
- .flags = F_LOAD_WITH_STRICT_ALIGNMENT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/masking.c b/tools/testing/selftests/bpf/verifier/masking.c
deleted file mode 100644
index 6e1358c544fd..000000000000
--- a/tools/testing/selftests/bpf/verifier/masking.c
+++ /dev/null
@@ -1,322 +0,0 @@
-{
- "masking, test out of bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 5),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 9",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 10",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0xffffffff),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 11",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test out of bounds 12",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 1",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 4),
- BPF_MOV32_IMM(BPF_REG_2, 5 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 4,
-},
-{
- "masking, test in bounds 2",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 3",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xfffffffe),
- BPF_MOV32_IMM(BPF_REG_2, 0xffffffff - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfffffffe,
-},
-{
- "masking, test in bounds 4",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0xabcde),
- BPF_MOV32_IMM(BPF_REG_2, 0xabcdef - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xabcde,
-},
-{
- "masking, test in bounds 5",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_MOV32_IMM(BPF_REG_2, 1 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "masking, test in bounds 6",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_1, 46),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_1),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_1, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 7",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -46),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 46,
-},
-{
- "masking, test in bounds 8",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -47),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, -1),
- BPF_MOV32_IMM(BPF_REG_2, 47 - 1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_3),
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_2, 0),
- BPF_ALU64_IMM(BPF_ARSH, BPF_REG_2, 63),
- BPF_ALU64_REG(BPF_AND, BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
diff --git a/tools/testing/selftests/bpf/verifier/meta_access.c b/tools/testing/selftests/bpf/verifier/meta_access.c
deleted file mode 100644
index 205292b8dd65..000000000000
--- a/tools/testing/selftests/bpf/verifier/meta_access.c
+++ /dev/null
@@ -1,235 +0,0 @@
-{
- "meta access, test1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet, off=-8",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test3",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test4",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_4),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test5",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_4, 3),
- BPF_MOV64_IMM(BPF_REG_2, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_xdp_adjust_meta),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 !read_ok",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test6",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_0, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test7",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test8",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test9",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 0xFFFF),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_4, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test10",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_3, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_5, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid access to packet",
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test11",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_IMM(BPF_REG_5, 42),
- BPF_MOV64_IMM(BPF_REG_6, 24),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_5, -8),
- BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -8),
- BPF_JMP_IMM(BPF_JGT, BPF_REG_5, 100, 6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_5),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_6, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_5, BPF_REG_5, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "meta access, test12",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_4, 5),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 16),
- BPF_JMP_REG(BPF_JGT, BPF_REG_5, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
diff --git a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
index 471c1a5950d8..d8a9b1a1f9a2 100644
--- a/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
+++ b/tools/testing/selftests/bpf/verifier/perf_event_sample_period.c
@@ -2,7 +2,7 @@
"check bpf_perf_event_data->sample_period byte load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
@@ -18,7 +18,7 @@
"check bpf_perf_event_data->sample_period half load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
@@ -34,7 +34,7 @@
"check bpf_perf_event_data->sample_period word load permitted",
.insns = {
BPF_MOV64_IMM(BPF_REG_0, 0),
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
offsetof(struct bpf_perf_event_data, sample_period)),
#else
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 6dc8003ffc70..0a9293a57211 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -27,7 +27,7 @@
BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
@@ -38,25 +38,24 @@
.fixup_map_array_48b = { 1 },
.result = VERBOSE_ACCEPT,
.errstr =
- "26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 20\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 10\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- regs=201 stack=0 before 15\
- regs=201 stack=0 before 14\
- regs=200 stack=0 before 13\
- regs=200 stack=0 before 12\
- regs=200 stack=0 before 11\
- regs=200 stack=0 before 10\
- parent already had regs=0 stack=0 marks",
+ "mark_precise: frame0: last_idx 26 first_idx 20\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: regs=r2 stack= before 20\
+ mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 10\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: regs=r0,r9 stack= before 15\
+ mark_precise: frame0: regs=r0,r9 stack= before 14\
+ mark_precise: frame0: regs=r9 stack= before 13\
+ mark_precise: frame0: regs=r9 stack= before 12\
+ mark_precise: frame0: regs=r9 stack= before 11\
+ mark_precise: frame0: regs=r9 stack= before 10\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: test 2",
@@ -87,7 +86,7 @@
BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 8, 1),
BPF_EXIT_INSN(),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=inv(umin=1, umax=8) */
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1), /* R2=scalar(umin=1, umax=8) */
BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
BPF_MOV64_IMM(BPF_REG_3, 0),
@@ -100,20 +99,20 @@
.flags = BPF_F_TEST_STATE_FREQ,
.errstr =
"26: (85) call bpf_probe_read_kernel#113\
- last_idx 26 first_idx 22\
- regs=4 stack=0 before 25\
- regs=4 stack=0 before 24\
- regs=4 stack=0 before 23\
- regs=4 stack=0 before 22\
- parent didn't have regs=4 stack=0 marks\
- last_idx 20 first_idx 20\
- regs=4 stack=0 before 20\
- parent didn't have regs=4 stack=0 marks\
- last_idx 19 first_idx 17\
- regs=4 stack=0 before 19\
- regs=200 stack=0 before 18\
- regs=300 stack=0 before 17\
- parent already had regs=0 stack=0 marks",
+ mark_precise: frame0: last_idx 26 first_idx 22\
+ mark_precise: frame0: regs=r2 stack= before 25\
+ mark_precise: frame0: regs=r2 stack= before 24\
+ mark_precise: frame0: regs=r2 stack= before 23\
+ mark_precise: frame0: regs=r2 stack= before 22\
+ mark_precise: frame0: parent state regs=r2 stack=:\
+ mark_precise: frame0: last_idx 20 first_idx 20\
+ mark_precise: frame0: regs=r2,r9 stack= before 20\
+ mark_precise: frame0: parent state regs=r2,r9 stack=:\
+ mark_precise: frame0: last_idx 19 first_idx 17\
+ mark_precise: frame0: regs=r2,r9 stack= before 19\
+ mark_precise: frame0: regs=r9 stack= before 18\
+ mark_precise: frame0: regs=r8,r9 stack= before 17\
+ mark_precise: frame0: parent state regs= stack=:",
},
{
"precise: cross frame pruning",
@@ -141,10 +140,11 @@
.result = REJECT,
},
{
- "precise: ST insn causing spi > allocated_stack",
+ "precise: ST zero to stack insn is supported",
.insns = {
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
+ /* not a register spill, so we stop precision propagation for R4 here */
BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0),
BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8),
BPF_MOV64_IMM(BPF_REG_0, -1),
@@ -153,15 +153,16 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "5: (2d) if r4 > r0 goto pc+0\
- last_idx 5 first_idx 5\
- parent didn't have regs=10 stack=0 marks\
- last_idx 4 first_idx 2\
- regs=10 stack=0 before 4\
- regs=10 stack=0 before 3\
- regs=0 stack=1 before 2\
- last_idx 5 first_idx 5\
- parent didn't have regs=1 stack=0 marks",
+ .errstr = "mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs=r4 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r4 stack= before 4\
+ mark_precise: frame0: regs=r4 stack= before 3\
+ mark_precise: frame0: last_idx 5 first_idx 5\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 4 first_idx 2\
+ mark_precise: frame0: regs=r0 stack= before 4\
+ 5: R0=-1 R4=0",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
@@ -169,6 +170,8 @@
"precise: STX insn causing spi > allocated_stack",
.insns = {
BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
+ /* make later reg spill more interesting by having somewhat known scalar */
+ BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff),
BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0),
BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8),
@@ -179,16 +182,83 @@
},
.prog_type = BPF_PROG_TYPE_XDP,
.flags = BPF_F_TEST_STATE_FREQ,
- .errstr = "last_idx 6 first_idx 6\
- parent didn't have regs=10 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=10 stack=0 before 5\
- regs=10 stack=0 before 4\
- regs=0 stack=1 before 3\
- last_idx 6 first_idx 6\
- parent didn't have regs=1 stack=0 marks\
- last_idx 5 first_idx 3\
- regs=1 stack=0 before 5",
+ .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\
+ mark_precise: frame0: parent state regs=r4 stack=-8:\
+ mark_precise: frame0: last_idx 6 first_idx 4\
+ mark_precise: frame0: regs=r4 stack=-8 before 6: (b7) r0 = -1\
+ mark_precise: frame0: regs=r4 stack=-8 before 5: (79) r4 = *(u64 *)(r10 -8)\
+ mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 3 first_idx 3\
+ mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\
+ mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\
+ mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\
+ mark_precise: frame0: parent state regs=r0 stack=:\
+ mark_precise: frame0: last_idx 0 first_idx 0\
+ mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\
+ mark_precise: frame0: last_idx 7 first_idx 7\
+ mark_precise: frame0: parent state regs= stack=:",
.result = VERBOSE_ACCEPT,
.retval = -1,
},
+{
+ "precise: mark_chain_precision for ARG_CONST_ALLOC_SIZE_OR_ZERO",
+ .insns = {
+ BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, offsetof(struct xdp_md, ingress_ifindex)),
+ BPF_LD_MAP_FD(BPF_REG_6, 0),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+ BPF_MOV64_IMM(BPF_REG_2, 1),
+ BPF_MOV64_IMM(BPF_REG_3, 0),
+ BPF_JMP_IMM(BPF_JEQ, BPF_REG_4, 0, 1),
+ BPF_MOV64_IMM(BPF_REG_2, 0x1000),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_reserve),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 42),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_ringbuf_submit),
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_ringbuf = { 1 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .flags = BPF_F_TEST_STATE_FREQ | F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
+ .errstr = "invalid access to memory, mem_size=1 off=42 size=8",
+ .result = REJECT,
+},
+{
+ "precise: program doesn't prematurely prune branches",
+ .insns = {
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0x400),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_7, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_8, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0x80000000),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 0x401),
+ BPF_JMP_IMM(BPF_JA, 0, 0, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 2),
+ BPF_ALU64_IMM(BPF_MOD, BPF_REG_6, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_9, 0),
+ BPF_JMP_REG(BPF_JLE, BPF_REG_6, BPF_REG_9, 1),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_6, 0),
+ BPF_ALU64_IMM(BPF_MOV, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4),
+ BPF_LD_MAP_FD(BPF_REG_4, 0),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_4),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_10),
+ BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+ BPF_EXIT_INSN(),
+ BPF_ALU64_IMM(BPF_RSH, BPF_REG_6, 10),
+ BPF_ALU64_IMM(BPF_MUL, BPF_REG_6, 8192),
+ BPF_ALU64_REG(BPF_MOV, BPF_REG_1, BPF_REG_0),
+ BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_6),
+ BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
+ BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_3, 0),
+ BPF_EXIT_INSN(),
+ },
+ .fixup_map_array_48b = { 13 },
+ .prog_type = BPF_PROG_TYPE_XDP,
+ .result = REJECT,
+ .errstr = "register with unbounded min value is not allowed",
+},
diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
deleted file mode 100644
index fc4e301260f6..000000000000
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ /dev/null
@@ -1,29 +0,0 @@
-{
- "prevent map lookup in stack trace",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_map_stacktrace = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 7 into func bpf_map_lookup_elem",
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "prevent map lookup in prog array",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_EXIT_INSN(),
- },
- .fixup_prog2 = { 3 },
- .result = REJECT,
- .errstr = "cannot pass map_type 3 into func bpf_map_lookup_elem",
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_stack.c b/tools/testing/selftests/bpf/verifier/raw_stack.c
deleted file mode 100644
index 193d9e87d5a9..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_stack.c
+++ /dev/null
@@ -1,305 +0,0 @@
-{
- "raw_stack: no skb_load_bytes",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- /* Call to skb_load_bytes() omitted. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid read from stack off -8+0 size 8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, negative len 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, ~0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, zero len",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, no init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, init",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xcafe),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs around bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs corruption 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_3,
- offsetof(struct __sk_buff, pkt_type)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R3 invalid mem access 'inv'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "raw_stack: skb_load_bytes, spilled regs + data",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_6, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_2,
- offsetof(struct __sk_buff, priority)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -513),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-513 access_size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-1 access_size=8",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 0xffffffff),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0xffffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 min value is negative",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 4",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 5",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0x7fffffff),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R4 unbounded memory access, use 'var &= const' or 'if (var < const)'",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, invalid access 6",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack type R3 off=-512 access_size=0",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "raw_stack: skb_load_bytes, large access",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -512),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_6),
- BPF_MOV64_IMM(BPF_REG_4, 512),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_skb_load_bytes),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_6, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c b/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
deleted file mode 100644
index 95b5d70a1dc1..000000000000
--- a/tools/testing/selftests/bpf/verifier/raw_tp_writable.c
+++ /dev/null
@@ -1,34 +0,0 @@
-{
- "raw_tracepoint_writable: reject variable offset",
- .insns = {
- /* r6 is our tp buffer */
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
-
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- /* move the key (== 0) to r10-8 */
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- /* lookup in the map */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
- BPF_FUNC_map_lookup_elem),
-
- /* exit clean if null */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
-
- /* shift the buffer pointer to a variable location */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_0),
- /* clobber whatever's there */
- BPF_MOV64_IMM(BPF_REG_7, 4242),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_7, 0),
-
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1, },
- .prog_type = BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE,
- .errstr = "R6 invalid variable buffer offset: off=0, var_off=(0x0; 0xffffffff)",
-},
diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
deleted file mode 100644
index 056e0273bf12..000000000000
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ /dev/null
@@ -1,856 +0,0 @@
-{
- "reference tracking: leak potential reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference on stack",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: leak potential reference on stack 2",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: zero potential reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: zero potential reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_IMM(BPF_REG_0, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: copy and zero potential references",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_7, 0), /* leak reference */
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking: release reference without check",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* reference in r0 may be NULL */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=sock_or_null expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference to sock_common without check",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- /* reference in r0 may be NULL */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=sock_common_or_null expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference to sock_common",
- .insns = {
- BPF_SK_LOOKUP(skc_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference 2",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference twice",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: release reference twice inside branch",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3), /* goto end */
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking: alloc, check, free in one subbranch",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
- /* if (offsetof(skb, mark) > data_len) exit; */
- BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
- offsetof(struct __sk_buff, mark)),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 1), /* mark == 0? */
- /* Leak reference in R0 */
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking: alloc, check, free in both subbranches",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 16),
- /* if (offsetof(skb, mark) > data_len) exit; */
- BPF_JMP_REG(BPF_JLE, BPF_REG_0, BPF_REG_3, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_2,
- offsetof(struct __sk_buff, mark)),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4), /* mark == 0? */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), /* sk NULL? */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "reference tracking in call: free reference in subprog",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking in call: free reference in subprog and outside",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), /* unchecked reference */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "type=inv expected=sock",
- .result = REJECT,
-},
-{
- "reference tracking in call: alloc & leak reference in subprog",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_4),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking in call: alloc in subprog, release outside",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(), /* return sk */
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = POINTER_VALUE,
- .result = ACCEPT,
-},
-{
- "reference tracking in call: sk_ptr leak into caller stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 2 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
-{
- "reference tracking in call: sk_ptr spill into caller stack",
- .insns = {
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -8),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
-
- /* subprog 1 */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_5, BPF_REG_4, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 8),
- /* spill unchecked sk_ptr into stack of caller */
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_5, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_4, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- /* now the sk_ptr is verified, free the reference */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_4, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
-
- /* subprog 2 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: allow LD_ABS",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LD_ABS(BPF_B, 0),
- BPF_LD_ABS(BPF_H, 0),
- BPF_LD_ABS(BPF_W, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: forbid LD_ABS while holding reference",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_LD_ABS(BPF_B, 0),
- BPF_LD_ABS(BPF_H, 0),
- BPF_LD_ABS(BPF_W, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
- .result = REJECT,
-},
-{
- "reference tracking: allow LD_IND",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "reference tracking: forbid LD_IND while holding reference",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_7, 1),
- BPF_LD_IND(BPF_W, BPF_REG_7, -0x200000),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_4),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "BPF_LD_[ABS|IND] cannot be mixed with socket references",
- .result = REJECT,
-},
-{
- "reference tracking: check reference or tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 7),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 17 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: release reference then tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 18 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: leak possible reference over tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- /* Look up socket and store in REG_6 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- /* bpf_tail_call() */
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- /* if (sk) bpf_sk_release() */
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 16 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "tail_call would lead to reference leak",
- .result = REJECT,
-},
-{
- "reference tracking: leak checked reference over tail call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_1),
- /* Look up socket and store in REG_6 */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- /* if (!sk) goto end */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- /* bpf_tail_call() */
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 17 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "tail_call would lead to reference leak",
- .result = REJECT,
-},
-{
- "reference tracking: mangle and release sock_or_null",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "R1 pointer arithmetic on sock_or_null prohibited",
- .result = REJECT,
-},
-{
- "reference tracking: mangle and release sock",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 5),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "R1 pointer arithmetic on sock prohibited",
- .result = REJECT,
-},
-{
- "reference tracking: access member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: write to member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_LD_IMM64(BPF_REG_2, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_2,
- offsetof(struct bpf_sock, mark)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LD_IMM64(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "cannot write into sock",
- .result = REJECT,
-},
-{
- "reference tracking: invalid 64-bit access of member",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "invalid sock access off=0 size=8",
- .result = REJECT,
-},
-{
- "reference tracking: access after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "!read_ok",
- .result = REJECT,
-},
-{
- "reference tracking: direct access for lookup",
- .insns = {
- /* Check that the packet is at least 64B long */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct __sk_buff, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct __sk_buff, data_end)),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 64),
- BPF_JMP_REG(BPF_JGT, BPF_REG_0, BPF_REG_3, 9),
- /* sk = sk_lookup_tcp(ctx, skb->data, ...) */
- BPF_MOV64_IMM(BPF_REG_3, sizeof(struct bpf_sock_tuple)),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_5, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_0, 4),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: use ptr from bpf_tcp_sock() after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: use ptr from bpf_sk_fullsock() after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_7, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: use ptr from bpf_sk_fullsock(tp) after release",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: use sk after bpf_sk_release(tp)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: use ptr from bpf_get_listener_sock() after bpf_sk_release(sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, src_port)),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: bpf_sk_release(listen_sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_get_listener_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, offsetof(struct bpf_sock, type)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- /* !bpf_sk_fullsock(sk) is checked but !bpf_tcp_sock(sk) is not checked */
- "reference tracking: tp->snd_cwnd after bpf_sk_fullsock(sk) and bpf_tcp_sock(sk)",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_7, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_8, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid mem access",
-},
-{
- "reference tracking: branch tracking valid pointer null comparison",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "reference tracking: branch tracking valid pointer value comparison",
- .insns = {
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .errstr = "Unreleased reference",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/runtime_jit.c b/tools/testing/selftests/bpf/verifier/runtime_jit.c
deleted file mode 100644
index 94c399d1faca..000000000000
--- a/tools/testing/selftests/bpf/verifier/runtime_jit.c
+++ /dev/null
@@ -1,231 +0,0 @@
-{
- "runtime/jit: tail_call within bounds, prog once",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call within bounds, prog loop",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 41,
-},
-{
- "runtime/jit: tail_call within bounds, no prog",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 3),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "runtime/jit: tail_call within bounds, key 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 2 / key 2, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 2 / key 2, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 0 / key 2, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 24,
-},
-{
- "runtime/jit: tail_call within bounds, key 0 / key 2, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 2),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5, 9 },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call within bounds, different maps, first branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 13),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 9 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "runtime/jit: tail_call within bounds, different maps, second branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 14),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_0,
- offsetof(struct __sk_buff, cb[0])),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, cb[0])),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 13, 4),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JA, 0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 5 },
- .fixup_prog2 = { 9 },
- .result_unpriv = REJECT,
- .errstr_unpriv = "tail_call abusing map_ptr",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "runtime/jit: tail_call out of bounds",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 256),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "runtime/jit: pass negative index to tail_call",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, -1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .result = ACCEPT,
- .retval = 2,
-},
-{
- "runtime/jit: pass > 32bit index to tail_call",
- .insns = {
- BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 2 },
- .result = ACCEPT,
- .retval = 42,
- /* Verifier rewrite for unpriv skips tail call here. */
- .retval_unpriv = 2,
-},
diff --git a/tools/testing/selftests/bpf/verifier/search_pruning.c b/tools/testing/selftests/bpf/verifier/search_pruning.c
deleted file mode 100644
index 7e50cb80873a..000000000000
--- a/tools/testing/selftests/bpf/verifier/search_pruning.c
+++ /dev/null
@@ -1,156 +0,0 @@
-{
- "pointer/scalar confusion in state equality check (way 1)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_JMP_A(1),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_JMP_A(0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .retval = POINTER_VALUE,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr as return value"
-},
-{
- "pointer/scalar confusion in state equality check (way 2)",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_JMP_A(1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = ACCEPT,
- .retval = POINTER_VALUE,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr as return value"
-},
-{
- "liveness pruning and write screening",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* branch conditions teach us nothing about R2 */
- BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_2, 0, 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "varlen_map_value_access pruning",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES),
- BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1),
- BPF_MOV32_IMM(BPF_REG_1, 0),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .errstr = "R0 unbounded memory access",
- .result_unpriv = REJECT,
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "search pruning: all branches should be verified (nop operation)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_A(1),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_5, 0, 2),
- BPF_MOV64_IMM(BPF_REG_6, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_6, 0, 0xdead),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "R6 invalid mem access 'inv'",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "search pruning: all branches should be verified (invalid stack access)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_3, 0xbeef, 2),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -16),
- BPF_JMP_A(1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_4, -24),
- BPF_EMIT_CALL(BPF_FUNC_ktime_get_ns),
- BPF_LDX_MEM(BPF_DW, BPF_REG_5, BPF_REG_10, -16),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "invalid read from stack off -16+0 size 8",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "allocated_stack",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_7, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, -8),
- BPF_STX_MEM(BPF_B, BPF_REG_10, BPF_REG_7, -9),
- BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_10, -9),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
- .insn_processed = 15,
-},
diff --git a/tools/testing/selftests/bpf/verifier/sleepable.c b/tools/testing/selftests/bpf/verifier/sleepable.c
new file mode 100644
index 000000000000..1f0d2bdc673f
--- /dev/null
+++ b/tools/testing/selftests/bpf/verifier/sleepable.c
@@ -0,0 +1,91 @@
+{
+ "sleepable fentry accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable fexit accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_FENTRY,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable fmod_ret accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_MODIFY_RETURN,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable iter accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_ITER,
+ .kfunc = "task",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable lsm accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_LSM,
+ .kfunc = "bpf",
+ .expected_attach_type = BPF_LSM_MAC,
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable uprobe accept",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_KPROBE,
+ .kfunc = "bpf_fentry_test1",
+ .result = ACCEPT,
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
+{
+ "sleepable raw tracepoint reject",
+ .insns = {
+ BPF_MOV64_IMM(BPF_REG_0, 0),
+ BPF_EXIT_INSN(),
+ },
+ .prog_type = BPF_PROG_TYPE_TRACING,
+ .expected_attach_type = BPF_TRACE_RAW_TP,
+ .kfunc = "sched_switch",
+ .result = REJECT,
+ .errstr = "Only fentry/fexit/fmod_ret, lsm, iter, uprobe, and struct_ops programs can be sleepable",
+ .flags = BPF_F_SLEEPABLE,
+ .runs = -1,
+},
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
deleted file mode 100644
index b1aac2641498..000000000000
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ /dev/null
@@ -1,633 +0,0 @@
-{
- "skb->sk: no NULL check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'sock_common_or_null'",
-},
-{
- "skb->sk: sk->family [non fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, family)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "skb->sk: sk->type [fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock_common access",
-},
-{
- "bpf_sk_fullsock(skb->sk): no !skb->sk check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "type=sock_common_or_null expected=sock_common",
-},
-{
- "sk_fullsock(skb->sk): no NULL check on ret",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'sock_or_null'",
-},
-{
- "sk_fullsock(skb->sk): sk->type [fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->family [non fullsock field]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, family)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->state [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, state)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->dst_port [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->dst_port [load 2nd byte]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_port) + 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock access",
-},
-{
- "sk_fullsock(skb->sk): sk->dst_ip6 [load 2nd byte]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, dst_ip6[0]) + 1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->type [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): sk->protocol [narrow load]",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, protocol)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "sk_fullsock(skb->sk): beyond last field",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid sock access",
-},
-{
- "bpf_tcp_sock(skb->sk): no !skb->sk check",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "type=sock_common_or_null expected=sock_common",
-},
-{
- "bpf_tcp_sock(skb->sk): no NULL check on ret",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid mem access 'tcp_sock_or_null'",
-},
-{
- "bpf_tcp_sock(skb->sk): tp->snd_cwnd",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_tcp_sock(skb->sk): tp->bytes_acked",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, bytes_acked)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_tcp_sock(skb->sk): beyond last field",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_tcp_sock, bytes_acked)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = REJECT,
- .errstr = "invalid tcp_sock access",
-},
-{
- "bpf_tcp_sock(bpf_sk_fullsock(skb->sk)): tp->snd_cwnd",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_tcp_sock, snd_cwnd)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_sk_release(skb->sk)",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "bpf_sk_release(bpf_sk_fullsock(skb->sk))",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "bpf_sk_release(bpf_tcp_sock(skb->sk))",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_tcp_sock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "reference has not been acquired before",
-},
-{
- "sk_storage_get(map, skb->sk, NULL, 0): value == NULL",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 11 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "sk_storage_get(map, skb->sk, 1, 1): value == 1",
- .insns = {
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_IMM(BPF_REG_3, 1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 11 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "R3 type=inv expected=fp",
-},
-{
- "sk_storage_get(map, skb->sk, &stack_value, 1): stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
-{
- "sk_storage_get(map, skb->sk, &stack_value, 1): partially init stack_value",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_EMIT_CALL(BPF_FUNC_sk_fullsock),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_4, 1),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_storage_get),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 14 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "invalid indirect read from stack",
-},
-{
- "bpf_map_lookup_elem(smap, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_sk_storage_map = { 3 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = REJECT,
- .errstr = "cannot pass map_type 24 into func bpf_map_lookup_elem",
-},
-{
- "bpf_map_lookup_elem(xskmap, &key); xs->queue_id",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_xdp_sock, queue_id)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_xskmap = { 3 },
- .prog_type = BPF_PROG_TYPE_XDP,
- .result = ACCEPT,
-},
-{
- "bpf_map_lookup_elem(sockmap, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = REJECT,
- .errstr = "Unreleased reference id=2 alloc_insn=5",
-},
-{
- "bpf_map_lookup_elem(sockhash, &key)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = REJECT,
- .errstr = "Unreleased reference id=2 alloc_insn=5",
-},
-{
- "bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockhash = { 3 },
- .prog_type = BPF_PROG_TYPE_SK_SKB,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_reuseport_array = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
-{
- "bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_4, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
- BPF_EXIT_INSN(),
- },
- .fixup_map_sockmap = { 4 },
- .prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/spill_fill.c b/tools/testing/selftests/bpf/verifier/spill_fill.c
deleted file mode 100644
index 45d43bf82f26..000000000000
--- a/tools/testing/selftests/bpf/verifier/spill_fill.c
+++ /dev/null
@@ -1,76 +0,0 @@
-{
- "check valid spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* fill it back into R2 */
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_10, -8),
- /* should be able to access R0 = *(R2 + 8) */
- /* BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 8), */
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .retval = POINTER_VALUE,
-},
-{
- "check valid spill/fill, skb mark",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, -8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = ACCEPT,
-},
-{
- "check corrupted spill/fill",
- .insns = {
- /* spill R1(ctx) into stack */
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- /* mess up with R1 pointer on stack */
- BPF_ST_MEM(BPF_B, BPF_REG_10, -7, 0x23),
- /* fill back into R0 is fine for priv.
- * R0 now becomes SCALAR_VALUE.
- */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- /* Load from R0 should fail. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .errstr = "R0 invalid mem access 'inv",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "check corrupted spill/fill, LSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_H, BPF_REG_10, -8, 0xcafe),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
-{
- "check corrupted spill/fill, MSB",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0x12345678),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = POINTER_VALUE,
-},
diff --git a/tools/testing/selftests/bpf/verifier/spin_lock.c b/tools/testing/selftests/bpf/verifier/spin_lock.c
deleted file mode 100644
index 781621facae4..000000000000
--- a/tools/testing/selftests/bpf/verifier/spin_lock.c
+++ /dev/null
@@ -1,333 +0,0 @@
-{
- "spin_lock: test1 success",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test2 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test3 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "spin_lock: test4 direct ld/st",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 3),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "cannot be accessed directly",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "spin_lock: test5 call within a locked region",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "calls are not allowed",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test6 missing unlock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "unlock is missing",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test7 unlock without lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "without taking a lock",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test8 double lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "calls are not allowed",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test9 different lock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3, 11 },
- .result = REJECT,
- .errstr = "unlock of different lock",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test10 lock in subprog without unlock",
- .insns = {
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, 5),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 3 },
- .result = REJECT,
- .errstr = "unlock is missing",
- .result_unpriv = REJECT,
- .errstr_unpriv = "",
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "spin_lock: test11 ld_abs under lock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
- BPF_LD_MAP_FD(BPF_REG_1,
- 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_lock),
- BPF_LD_ABS(BPF_B, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 4),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_spin_unlock),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_spin_lock = { 4 },
- .result = REJECT,
- .errstr = "inside bpf_spin_lock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/stack_ptr.c b/tools/testing/selftests/bpf/verifier/stack_ptr.c
deleted file mode 100644
index 8bfeb77c60bd..000000000000
--- a/tools/testing/selftests/bpf/verifier/stack_ptr.c
+++ /dev/null
@@ -1,357 +0,0 @@
-{
- "PTR_TO_STACK store/load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0xfaceb00c,
-},
-{
- "PTR_TO_STACK store/load - bad alignment on off",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 2, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 2),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-8+2 size 8",
-},
-{
- "PTR_TO_STACK store/load - bad alignment on reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -10),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off (0x0; 0x0)+-10+8 size 8",
-},
-{
- "PTR_TO_STACK store/load - out of bounds low",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -80000),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off=-79992 size=8",
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
-},
-{
- "PTR_TO_STACK store/load - out of bounds high",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_1, 8, 0xfaceb00c),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off=0 size=8",
-},
-{
- "PTR_TO_STACK check high 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, -1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, -1),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check high 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid stack off=0 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check high 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off",
-},
-{
- "PTR_TO_STACK check high 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off",
-},
-{
- "PTR_TO_STACK check high 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, (1 << 29) - 1),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MAX, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MAX),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK check low 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -512),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 1, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 1),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK check low 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -513),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "invalid stack off=-513 size=1",
- .result = REJECT,
-},
-{
- "PTR_TO_STACK check low 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, INT_MIN),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "math between fp pointer",
-},
-{
- "PTR_TO_STACK check low 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off",
-},
-{
- "PTR_TO_STACK check low 6",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "invalid stack off",
-},
-{
- "PTR_TO_STACK check low 7",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -((1 << 29) - 1)),
- BPF_ST_MEM(BPF_B, BPF_REG_1, SHRT_MIN, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, SHRT_MIN),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .errstr = "fp pointer offset",
-},
-{
- "PTR_TO_STACK mixed reg/k, 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_5, BPF_REG_10),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_5, -6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "PTR_TO_STACK mixed reg/k, 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -3),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = -3,
-},
-{
- "PTR_TO_STACK reg",
- .insns = {
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_2, -3),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- BPF_ST_MEM(BPF_B, BPF_REG_1, 0, 42),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .result_unpriv = REJECT,
- .errstr_unpriv = "invalid stack off=0 size=1",
- .result = ACCEPT,
- .retval = 42,
-},
-{
- "stack pointer arithmetic",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_JMP_IMM(BPF_JA, 0, 0, 0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, -10),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_1),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 8),
- BPF_ST_MEM(0, BPF_REG_2, 4, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "store PTR_TO_STACK in R10 to array map using BPF_B",
- .insns = {
- /* Load pointer to map. */
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- /* Copy R10 to R9. */
- BPF_MOV64_REG(BPF_REG_9, BPF_REG_10),
- /* Pollute other registers with unaligned values. */
- BPF_MOV64_IMM(BPF_REG_2, -1),
- BPF_MOV64_IMM(BPF_REG_3, -1),
- BPF_MOV64_IMM(BPF_REG_4, -1),
- BPF_MOV64_IMM(BPF_REG_5, -1),
- BPF_MOV64_IMM(BPF_REG_6, -1),
- BPF_MOV64_IMM(BPF_REG_7, -1),
- BPF_MOV64_IMM(BPF_REG_8, -1),
- /* Store both R9 and R10 with BPF_B and read back. */
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_2, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_B, BPF_REG_1, BPF_REG_9, 0),
- BPF_LDX_MEM(BPF_B, BPF_REG_3, BPF_REG_1, 0),
- /* Should read back as same value. */
- BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 42,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/subreg.c b/tools/testing/selftests/bpf/verifier/subreg.c
deleted file mode 100644
index 4c4133c80440..000000000000
--- a/tools/testing/selftests/bpf/verifier/subreg.c
+++ /dev/null
@@ -1,533 +0,0 @@
-/* This file contains sub-register zero extension checks for insns defining
- * sub-registers, meaning:
- * - All insns under BPF_ALU class. Their BPF_ALU32 variants or narrow width
- * forms (BPF_END) could define sub-registers.
- * - Narrow direct loads, BPF_B/H/W | BPF_LDX.
- * - BPF_LD is not exposed to JIT back-ends, so no need for testing.
- *
- * "get_prandom_u32" is used to initialize low 32-bit of some registers to
- * prevent potential optimizations done by verifier or JIT back-ends which could
- * optimize register back into constant when range info shows one register is a
- * constant.
- */
-{
- "add32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_ALU32_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "add32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- /* An insn could have no effect on the low 32-bit, for example:
- * a = a + 0
- * a = a | 0
- * a = a & -1
- * But, they should still zero high 32-bit.
- */
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, -2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "sub32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x1ffffffffULL),
- BPF_ALU32_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "sub32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_SUB, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mul32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000001ULL),
- BPF_ALU32_REG(BPF_MUL, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mul32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "div32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, -1),
- BPF_ALU32_REG(BPF_DIV, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "div32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_DIV, BPF_REG_0, 2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "or32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000001ULL),
- BPF_ALU32_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "or32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_OR, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_OR, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "and32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x1ffffffffULL),
- BPF_ALU32_REG(BPF_AND, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "and32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_0, -1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_0, -2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "lsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_LSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "lsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_LSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "rsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_RSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "rsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_RSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "neg32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_NEG, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mod32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, -1),
- BPF_ALU32_REG(BPF_MOD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mod32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_MOD, BPF_REG_0, 2),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "xor32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_ALU32_REG(BPF_XOR, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "xor32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_XOR, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mov32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x100000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_1, BPF_REG_0),
- BPF_LD_IMM64(BPF_REG_0, 0x100000000ULL),
- BPF_MOV32_REG(BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "mov32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV32_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "arsh32 reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU32_REG(BPF_ARSH, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "arsh32 imm zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end16 (to_le) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_LE, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end32 (to_le) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_LE, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end16 (to_be) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_BE, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "end32 (to_be) reg zero extend check",
- .insns = {
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 32),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_6),
- BPF_ENDIAN(BPF_TO_BE, BPF_REG_0, 32),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_b zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_h zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_H, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "ldx_w zero extend check",
- .insns = {
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -4),
- BPF_ST_MEM(BPF_W, BPF_REG_6, 0, 0xfaceb00c),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32),
- BPF_LD_IMM64(BPF_REG_1, 0x1000000000ULL),
- BPF_ALU64_REG(BPF_OR, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_6, 0),
- BPF_ALU64_IMM(BPF_RSH, BPF_REG_0, 32),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
diff --git a/tools/testing/selftests/bpf/verifier/uninit.c b/tools/testing/selftests/bpf/verifier/uninit.c
deleted file mode 100644
index 987a5871ff1d..000000000000
--- a/tools/testing/selftests/bpf/verifier/uninit.c
+++ /dev/null
@@ -1,39 +0,0 @@
-{
- "read uninitialized register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R2 !read_ok",
- .result = REJECT,
-},
-{
- "read invalid register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, -1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R15 is invalid",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_2, BPF_REG_1),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .result = REJECT,
-},
-{
- "program doesn't init R0 before exit in all branches",
- .insns = {
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, 0, 2),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
- BPF_EXIT_INSN(),
- },
- .errstr = "R0 !read_ok",
- .errstr_unpriv = "R1 pointer comparison",
- .result = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/unpriv.c b/tools/testing/selftests/bpf/verifier/unpriv.c
deleted file mode 100644
index 91bb77c24a2e..000000000000
--- a/tools/testing/selftests/bpf/verifier/unpriv.c
+++ /dev/null
@@ -1,522 +0,0 @@
-{
- "unpriv: return pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_10),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- .retval = POINTER_VALUE,
-},
-{
- "unpriv: add const to pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "unpriv: add pointer to pointer",
- .insns = {
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 pointer += pointer",
-},
-{
- "unpriv: neg pointer",
- .insns = {
- BPF_ALU64_IMM(BPF_NEG, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer arithmetic",
-},
-{
- "unpriv: cmp pointer with const",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 pointer comparison",
-},
-{
- "unpriv: cmp pointer with pointer",
- .insns = {
- BPF_JMP_REG(BPF_JEQ, BPF_REG_1, BPF_REG_10, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R10 pointer comparison",
-},
-{
- "unpriv: check that printk is disallowed",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_2, 8),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_trace_printk),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "unknown func bpf_trace_printk#6",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_TRACEPOINT,
-},
-{
- "unpriv: pass pointer to helper function",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_2),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_update_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R4 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: indirectly pass pointer on stack to helper function",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr = "invalid indirect read from stack off -8+0 size 8",
- .result = REJECT,
-},
-{
- "unpriv: mangle pointer on stack 1",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_W, BPF_REG_10, -8, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: mangle pointer on stack 2",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_ST_MEM(BPF_B, BPF_REG_10, -1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "attempt to corrupt spilled",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: read pointer from stack in small chunks",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_10, -8),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid size",
- .result = REJECT,
-},
-{
- "unpriv: write pointer into ctx",
- .insns = {
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 leaks addr",
- .result_unpriv = REJECT,
- .errstr = "invalid bpf_context access",
- .result = REJECT,
-},
-{
- "unpriv: spill/fill of ctx",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
-},
-{
- "unpriv: spill/fill of ctx 2",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of ctx 3",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=fp expected=ctx",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of ctx 4",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_10, BPF_REG_0, -8, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_hash_recalc),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "R1 type=inv expected=ctx",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - ctx and sock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb == NULL) *target = sock; */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* else *target = skb; */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* struct __sk_buff *skb = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* skb->mark = 42; */
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- /* if (sk) bpf_sk_release(sk) */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "type=ctx expected=sock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - leak sock",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb == NULL) *target = sock; */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* else *target = skb; */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* struct __sk_buff *skb = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* skb->mark = 42; */
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct __sk_buff, mark)),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- //.errstr = "same insn cannot be used with different pointers",
- .errstr = "Unreleased reference",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - sock and ctx (read)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb) *target = skb */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* else *target = sock */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* struct bpf_sock *sk = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* if (sk) u32 foo = sk->mark; bpf_sk_release(sk); */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct bpf_sock, mark)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers stx - sock and ctx (write)",
- .insns = {
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- /* struct bpf_sock *sock = bpf_sock_lookup(...); */
- BPF_SK_LOOKUP(sk_lookup_tcp),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- /* u64 foo; */
- /* void *target = &foo; */
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- /* if (skb) *target = skb */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- /* else *target = sock */
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- /* struct bpf_sock *sk = *target; */
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- /* if (sk) sk->mark = 42; bpf_sk_release(sk); */
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_IMM(BPF_REG_3, 42),
- BPF_STX_MEM(BPF_W, BPF_REG_1, BPF_REG_3,
- offsetof(struct bpf_sock, mark)),
- BPF_EMIT_CALL(BPF_FUNC_sk_release),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- //.errstr = "same insn cannot be used with different pointers",
- .errstr = "cannot write into sock",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "unpriv: spill/fill of different pointers ldx",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2,
- -(__s32)offsetof(struct bpf_perf_event_data,
- sample_period) - 8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_2, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_6, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1,
- offsetof(struct bpf_perf_event_data, sample_period)),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "same insn cannot be used with different pointers",
- .prog_type = BPF_PROG_TYPE_PERF_EVENT,
-},
-{
- "unpriv: write pointer into map elem value",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "alu32: mov u32 const",
- .insns = {
- BPF_MOV32_IMM(BPF_REG_7, 0),
- BPF_ALU32_IMM(BPF_AND, BPF_REG_7, 1),
- BPF_MOV32_REG(BPF_REG_0, BPF_REG_7),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_7, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = 0,
-},
-{
- "unpriv: partial copy of pointer",
- .insns = {
- BPF_MOV32_REG(BPF_REG_1, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 partial copy",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: pass pointer to tail_call",
- .insns = {
- BPF_MOV64_REG(BPF_REG_3, BPF_REG_1),
- BPF_LD_MAP_FD(BPF_REG_2, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_tail_call),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_prog1 = { 1 },
- .errstr_unpriv = "R3 leaks addr into helper",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: cmp map pointer with zero",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 1 },
- .errstr_unpriv = "R1 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: write into frame pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_10, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "frame pointer is read only",
- .result = REJECT,
-},
-{
- "unpriv: spill/fill frame pointer",
- .insns = {
- BPF_ALU64_REG(BPF_MOV, BPF_REG_6, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_6, BPF_REG_10, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_10, BPF_REG_6, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "frame pointer is read only",
- .result = REJECT,
-},
-{
- "unpriv: cmp of frame pointer",
- .insns = {
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_10, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R10 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: adding of fp",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_1, 0),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
-{
- "unpriv: cmp of stack pointer",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_2, 0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr_unpriv = "R2 pointer comparison",
- .result_unpriv = REJECT,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value.c b/tools/testing/selftests/bpf/verifier/value.c
deleted file mode 100644
index 0e42592b1218..000000000000
--- a/tools/testing/selftests/bpf/verifier/value.c
+++ /dev/null
@@ -1,104 +0,0 @@
-{
- "map element value store of cleared call register",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R1 !read_ok",
- .errstr = "R1 !read_ok",
- .result = REJECT,
- .result_unpriv = REJECT,
-},
-{
- "map element value with unaligned store",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 17),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 2, 43),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, -2, 44),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 32),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 2, 33),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -2, 34),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_8, 5),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_8, -7, 24),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 3),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 0, 22),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, 4, 23),
- BPF_ST_MEM(BPF_DW, BPF_REG_7, -4, 24),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value with unaligned load",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES, 9),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 3),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 2),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_8, 2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 5),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_7, BPF_REG_0, 4),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, offsetof(struct test_val, foo)),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_adj_spill.c b/tools/testing/selftests/bpf/verifier/value_adj_spill.c
deleted file mode 100644
index 7135e8021b81..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_adj_spill.c
+++ /dev/null
@@ -1,43 +0,0 @@
-{
- "map element value is preserved across register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 42),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -184),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
-{
- "map element value or null is marked on register spilling",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -152),
- BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_1, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_3, 0, 42),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 leaks addr",
- .result = ACCEPT,
- .result_unpriv = REJECT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c b/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
deleted file mode 100644
index ed1c2cea1dea..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_illegal_alu.c
+++ /dev/null
@@ -1,95 +0,0 @@
-{
- "map element value illegal alu op, 1",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 8),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 bitwise operator &= on pointer",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 2",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU32_IMM(BPF_ADD, BPF_REG_0, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 32-bit pointer arithmetic prohibited",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 3",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_IMM(BPF_DIV, BPF_REG_0, 42),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 pointer arithmetic with /= operator",
- .result = REJECT,
-},
-{
- "map element value illegal alu op, 4",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ENDIAN(BPF_FROM_BE, BPF_REG_0, 64),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "R0 pointer arithmetic prohibited",
- .errstr = "invalid mem access 'inv'",
- .result = REJECT,
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map element value illegal alu op, 5",
- .insns = {
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_ST_MEM(BPF_DW, BPF_REG_2, 0, 0),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_3, 4096),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_STX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
- BPF_STX_XADD(BPF_DW, BPF_REG_2, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_2, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 22),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr_unpriv = "leaking pointer from stack off -8",
- .errstr = "R0 invalid mem access 'inv'",
- .result = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
deleted file mode 100644
index 3ecb70a3d939..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ /dev/null
@@ -1,171 +0,0 @@
-{
- "multiple registers share map_lookup_elem result",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, -2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, -1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "alu ops on ptr_to_map_value_or_null, 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 1),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .errstr = "R4 pointer arithmetic on map_value_or_null",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid memory access with multiple map_lookup_elem calls",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = REJECT,
- .errstr = "R4 !read_ok",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "valid indirect map_lookup_elem access with 2nd lookup in branch",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_2),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_2, 10),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0, 3),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_7),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_4, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
- BPF_ST_MEM(BPF_DW, BPF_REG_4, 0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS
-},
-{
- "invalid map access from else condition",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JGE, BPF_REG_1, MAX_ENTRIES-1, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 1),
- BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, offsetof(struct test_val, foo)),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 3 },
- .errstr = "R0 unbounded memory access",
- .result = REJECT,
- .errstr_unpriv = "R0 leaks addr",
- .result_unpriv = REJECT,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map lookup and null branch prediction",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_1, 10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 4 },
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .result = ACCEPT,
-},
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
deleted file mode 100644
index 97ee658e1242..000000000000
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ /dev/null
@@ -1,838 +0,0 @@
-{
- "map access: known scalar += value_ptr from different maps",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R1 tried to add from different maps",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar from different maps",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_16b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 min value is outside of the allowed memory range",
- .retval = 1,
-},
-{
- "map access: known scalar += value_ptr from different maps, but same value properties",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1,
- offsetof(struct __sk_buff, len)),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 1, 3),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 1, 2),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_48b = { 5 },
- .fixup_map_array_48b = { 8 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: mixing value pointer and scalar, 1",
- .insns = {
- // load map value pointer into r0 and r2
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- // load some number from the map into r1
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 3),
- // branch A
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- BPF_JMP_A(2),
- // branch B
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- // common instruction
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- // branch A
- BPF_JMP_A(4),
- // branch B
- BPF_MOV64_IMM(BPF_REG_0, 0x13371337),
- // verifier follows fall-through
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- // fake-dead code; targeted from branch A to
- // prevent dead code sanitization
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different pointers or scalars",
- .retval = 0,
-},
-{
- "map access: mixing value pointer and scalar, 2",
- .insns = {
- // load map value pointer into r0 and r2
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- // load some number from the map into r1
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- // branch A
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- BPF_JMP_A(2),
- // branch B
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_3, 0),
- // common instruction
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- // depending on r1, branch:
- BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 1),
- // branch A
- BPF_JMP_A(4),
- // branch B
- BPF_MOV64_IMM(BPF_REG_0, 0x13371337),
- // verifier follows fall-through
- BPF_JMP_IMM(BPF_JNE, BPF_REG_2, 0x100000, 2),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- // fake-dead code; targeted from branch A to
- // prevent dead code sanitization
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R2 tried to add from different maps or paths",
- .retval = 0,
-},
-{
- "sanitation: alu with different scalars 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_ARG1, 0),
- BPF_MOV64_REG(BPF_REG_ARG2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_ARG2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 3),
- BPF_MOV64_IMM(BPF_REG_2, 0),
- BPF_MOV64_IMM(BPF_REG_3, 0x100000),
- BPF_JMP_A(2),
- BPF_MOV64_IMM(BPF_REG_2, 42),
- BPF_MOV64_IMM(BPF_REG_3, 0x100001),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_3),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .retval = 0x100000,
-},
-{
- "sanitation: alu with different scalars 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_1),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_ST_MEM(BPF_DW, BPF_REG_FP, -16, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_delete_elem),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_FP),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -16),
- BPF_EMIT_CALL(BPF_FUNC_map_delete_elem),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 1 },
- .result = ACCEPT,
- .retval = -EINVAL * 2,
-},
-{
- "sanitation: alu with different scalars 3",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, EINVAL),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_0),
- BPF_MOV64_IMM(BPF_REG_0, EINVAL),
- BPF_ALU64_IMM(BPF_MUL, BPF_REG_0, -1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_8, BPF_REG_6),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_8, BPF_REG_7),
- BPF_MOV64_REG(BPF_REG_0, BPF_REG_8),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .retval = -EINVAL * 2,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, upper oob arith, test 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is outside of the allowed memory range",
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 48),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= known scalar, lower oob arith, test 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 47),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: known scalar += value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 49),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
-},
-{
- "map access: value_ptr += known scalar, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "invalid access to map value",
-},
-{
- "map access: value_ptr += known scalar, 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 7),
- BPF_MOV64_IMM(BPF_REG_1, 5),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, -2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr += known scalar, 5",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, (6 + 1) * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
-},
-{
- "map access: value_ptr += known scalar, 6",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, (3 + 1) * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 3 * sizeof(int)),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
-},
-{
- "map access: unknown scalar += value_ptr, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: unknown scalar += value_ptr, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: unknown scalar += value_ptr, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_MOV64_IMM(BPF_REG_1, -1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: unknown scalar += value_ptr, 4",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_1, 19),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 max value is outside of the allowed memory range",
- .errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: value_ptr += unknown scalar, 1",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += unknown scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 31),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 0xabcdef12,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "map access: value_ptr += unknown scalar, 3",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 11),
- BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 8),
- BPF_LDX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, 16),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_3, 1),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_3, 1),
- BPF_JMP_REG(BPF_JGT, BPF_REG_2, BPF_REG_3, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_3),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 2),
- BPF_JMP_IMM(BPF_JA, 0, 0, -3),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .retval = 1,
-},
-{
- "map access: value_ptr += value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 pointer += pointer prohibited",
-},
-{
- "map access: known scalar -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 tried to subtract pointer from scalar",
-},
-{
- "map access: value_ptr -= known scalar",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 3),
- BPF_MOV64_IMM(BPF_REG_1, 4),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is outside of the allowed memory range",
-},
-{
- "map access: value_ptr -= known scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 5),
- BPF_MOV64_IMM(BPF_REG_1, 6),
- BPF_MOV64_IMM(BPF_REG_2, 4),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_2),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: unknown scalar -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_1, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R1 tried to subtract pointer from scalar",
-},
-{
- "map access: value_ptr -= unknown scalar",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 min value is negative",
-},
-{
- "map access: value_ptr -= unknown scalar, 2",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 8),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0xf),
- BPF_ALU64_IMM(BPF_OR, BPF_REG_1, 0x7),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_ALU64_IMM(BPF_AND, BPF_REG_1, 0x7),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = ACCEPT,
- .result_unpriv = REJECT,
- .errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
- .retval = 1,
-},
-{
- "map access: value_ptr -= value_ptr",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
- BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_0),
- BPF_LDX_MEM(BPF_B, BPF_REG_1, BPF_REG_0, 0),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .fixup_map_array_48b = { 3 },
- .result = REJECT,
- .errstr = "R0 invalid mem access 'inv'",
- .errstr_unpriv = "R0 pointer -= pointer prohibited",
-},
diff --git a/tools/testing/selftests/bpf/verifier/var_off.c b/tools/testing/selftests/bpf/verifier/var_off.c
deleted file mode 100644
index 8504ac937809..000000000000
--- a/tools/testing/selftests/bpf/verifier/var_off.c
+++ /dev/null
@@ -1,248 +0,0 @@
-{
- "variable-offset ctx access",
- .insns = {
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- /* add it to skb. We now have either &skb->len or
- * &skb->pkt_type, but we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_2),
- /* dereference it */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "variable ctx access var_off=(0x0; 0x4)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "variable-offset stack access",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it */
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "variable stack access var_off=(0xfffffffffffffff8; 0x4)",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, unbounded",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_1, offsetof(struct bpf_sock_ops,
- bytes_received)),
- /* Check the lower bound but don't check the upper one. */
- BPF_JMP_IMM(BPF_JSLT, BPF_REG_4, 0, 4),
- /* Point the lower bound to initialized stack. Offset is now in range
- * from fp-16 to fp+0x7fffffffffffffef, i.e. max value is unbounded.
- */
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R4 unbounded indirect variable offset stack access",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "indirect variable-offset stack access, max out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 8),
- /* add it to fp. We now have either fp-4 or fp-8, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "R2 max value is outside of stack bound",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min out of bound",
- .insns = {
- /* Fill the top 8 bytes of the stack */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 516),
- /* add it to fp. We now have either fp-516 or fp-512, but
- * we don't know which
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* dereference it indirectly */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "R2 min value is outside of stack bound",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, max_off+size > max_initialized",
- .insns = {
- /* Fill only the second from top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-12 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, min_off < min_initialized",
- .insns = {
- /* Fill only the top 8 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, but we don't know
- * which. fp-16 size 8 is partially uninitialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 5 },
- .errstr = "invalid indirect read from stack var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
-{
- "indirect variable-offset stack access, priv vs unpriv",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .errstr_unpriv = "R2 stack pointer arithmetic goes out of range, prohibited for !root",
- .result_unpriv = REJECT,
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_CGROUP_SKB,
-},
-{
- "indirect variable-offset stack access, uninitialized",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_2, 6),
- BPF_MOV64_IMM(BPF_REG_3, 28),
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_W, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_4, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_4, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_4, BPF_REG_10),
- BPF_MOV64_IMM(BPF_REG_5, 8),
- /* Dereference it indirectly. */
- BPF_EMIT_CALL(BPF_FUNC_getsockopt),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "invalid indirect read from stack var_off",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
- "indirect variable-offset stack access, ok",
- .insns = {
- /* Fill the top 16 bytes of the stack. */
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -16, 0),
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- /* Get an unknown value. */
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 0),
- /* Make it small and 4-byte aligned. */
- BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 4),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 16),
- /* Add it to fp. We now have either fp-12 or fp-16, we don't know
- * which, but either way it points to initialized stack.
- */
- BPF_ALU64_REG(BPF_ADD, BPF_REG_2, BPF_REG_10),
- /* Dereference it indirectly. */
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 6 },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_LWT_IN,
-},
diff --git a/tools/testing/selftests/bpf/verifier/wide_access.c b/tools/testing/selftests/bpf/verifier/wide_access.c
index ccade9312d21..55af248efa93 100644
--- a/tools/testing/selftests/bpf/verifier/wide_access.c
+++ b/tools/testing/selftests/bpf/verifier/wide_access.c
@@ -1,4 +1,4 @@
-#define BPF_SOCK_ADDR_STORE(field, off, res, err) \
+#define BPF_SOCK_ADDR_STORE(field, off, res, err, flgs) \
{ \
"wide store to bpf_sock_addr." #field "[" #off "]", \
.insns = { \
@@ -11,31 +11,36 @@
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
.errstr = err, \
+ .flags = flgs, \
}
/* user_ip6[0] is u64 aligned */
BPF_SOCK_ADDR_STORE(user_ip6, 0, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_STORE(user_ip6, 1, REJECT,
- "invalid bpf_context access off=12 size=8"),
+ "invalid bpf_context access off=12 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_STORE(user_ip6, 2, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_STORE(user_ip6, 3, REJECT,
- "invalid bpf_context access off=20 size=8"),
+ "invalid bpf_context access off=20 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
/* msg_src_ip6[0] is _not_ u64 aligned */
BPF_SOCK_ADDR_STORE(msg_src_ip6, 0, REJECT,
- "invalid bpf_context access off=44 size=8"),
+ "invalid bpf_context access off=44 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_STORE(msg_src_ip6, 1, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_STORE(msg_src_ip6, 2, REJECT,
- "invalid bpf_context access off=52 size=8"),
+ "invalid bpf_context access off=52 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
- "invalid bpf_context access off=56 size=8"),
+ "invalid bpf_context access off=56 size=8", 0),
#undef BPF_SOCK_ADDR_STORE
-#define BPF_SOCK_ADDR_LOAD(field, off, res, err) \
+#define BPF_SOCK_ADDR_LOAD(field, off, res, err, flgs) \
{ \
"wide load from bpf_sock_addr." #field "[" #off "]", \
.insns = { \
@@ -48,26 +53,31 @@ BPF_SOCK_ADDR_STORE(msg_src_ip6, 3, REJECT,
.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR, \
.expected_attach_type = BPF_CGROUP_UDP6_SENDMSG, \
.errstr = err, \
+ .flags = flgs, \
}
/* user_ip6[0] is u64 aligned */
BPF_SOCK_ADDR_LOAD(user_ip6, 0, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_LOAD(user_ip6, 1, REJECT,
- "invalid bpf_context access off=12 size=8"),
+ "invalid bpf_context access off=12 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_LOAD(user_ip6, 2, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_LOAD(user_ip6, 3, REJECT,
- "invalid bpf_context access off=20 size=8"),
+ "invalid bpf_context access off=20 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
/* msg_src_ip6[0] is _not_ u64 aligned */
BPF_SOCK_ADDR_LOAD(msg_src_ip6, 0, REJECT,
- "invalid bpf_context access off=44 size=8"),
+ "invalid bpf_context access off=44 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_LOAD(msg_src_ip6, 1, ACCEPT,
- NULL),
+ NULL, 0),
BPF_SOCK_ADDR_LOAD(msg_src_ip6, 2, REJECT,
- "invalid bpf_context access off=52 size=8"),
+ "invalid bpf_context access off=52 size=8",
+ F_NEEDS_EFFICIENT_UNALIGNED_ACCESS),
BPF_SOCK_ADDR_LOAD(msg_src_ip6, 3, REJECT,
- "invalid bpf_context access off=56 size=8"),
+ "invalid bpf_context access off=56 size=8", 0),
#undef BPF_SOCK_ADDR_LOAD
diff --git a/tools/testing/selftests/bpf/verifier/xadd.c b/tools/testing/selftests/bpf/verifier/xadd.c
deleted file mode 100644
index c5de2e62cc8b..000000000000
--- a/tools/testing/selftests/bpf/verifier/xadd.c
+++ /dev/null
@@ -1,97 +0,0 @@
-{
- "xadd/w check unaligned stack",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -7),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "misaligned stack access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned map",
- .insns = {
- BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
- BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
- BPF_LD_MAP_FD(BPF_REG_1, 0),
- BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
- BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_1, 1),
- BPF_STX_XADD(BPF_W, BPF_REG_0, BPF_REG_1, 3),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 3),
- BPF_EXIT_INSN(),
- },
- .fixup_map_hash_8b = { 3 },
- .result = REJECT,
- .errstr = "misaligned value access off",
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
-},
-{
- "xadd/w check unaligned pkt",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 2),
- BPF_MOV64_IMM(BPF_REG_0, 99),
- BPF_JMP_IMM(BPF_JA, 0, 0, 6),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 0, 0),
- BPF_ST_MEM(BPF_W, BPF_REG_2, 3, 0),
- BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 1),
- BPF_STX_XADD(BPF_W, BPF_REG_2, BPF_REG_0, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_2, 1),
- BPF_EXIT_INSN(),
- },
- .result = REJECT,
- .errstr = "BPF_XADD stores into R2 pkt is not allowed",
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "xadd/w check whether src/dst got mangled, 1",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_XADD(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
-{
- "xadd/w check whether src/dst got mangled, 2",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
- BPF_MOV64_REG(BPF_REG_7, BPF_REG_10),
- BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
- BPF_STX_XADD(BPF_W, BPF_REG_10, BPF_REG_0, -8),
- BPF_JMP_REG(BPF_JNE, BPF_REG_6, BPF_REG_0, 3),
- BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_10, 2),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_10, -8),
- BPF_EXIT_INSN(),
- BPF_MOV64_IMM(BPF_REG_0, 42),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_SCHED_CLS,
- .retval = 3,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp.c b/tools/testing/selftests/bpf/verifier/xdp.c
deleted file mode 100644
index 5ac390508139..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp.c
+++ /dev/null
@@ -1,14 +0,0 @@
-{
- "XDP, using ifindex from netdev",
- .insns = {
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, ingress_ifindex)),
- BPF_JMP_IMM(BPF_JLT, BPF_REG_2, 1, 1),
- BPF_MOV64_IMM(BPF_REG_0, 1),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .retval = 1,
-},
diff --git a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c b/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
deleted file mode 100644
index bfb97383e6b5..000000000000
--- a/tools/testing/selftests/bpf/verifier/xdp_direct_packet_access.c
+++ /dev/null
@@ -1,900 +0,0 @@
-{
- "XDP pkt read, pkt_end mangling, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_end mangling, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_ALU64_IMM(BPF_SUB, BPF_REG_3, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R3 pointer arithmetic on pkt_end",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' > pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end > pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' < pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end < pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' >= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end >= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data' <= pkt_end, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_end <= pkt_data', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1,
- offsetof(struct xdp_md, data_end)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' > pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data > pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' < pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data < pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLT, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' >= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_1, BPF_REG_3, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data >= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_JMP_IMM(BPF_JA, 0, 0, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -4),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_meta' <= pkt_data, bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_1, BPF_REG_3, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', good access",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .result = ACCEPT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 1",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 1),
- BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
-{
- "XDP pkt read, pkt_data <= pkt_meta', bad access 2",
- .insns = {
- BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1,
- offsetof(struct xdp_md, data_meta)),
- BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, offsetof(struct xdp_md, data)),
- BPF_MOV64_REG(BPF_REG_1, BPF_REG_2),
- BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 8),
- BPF_JMP_REG(BPF_JLE, BPF_REG_3, BPF_REG_1, 0),
- BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, -5),
- BPF_MOV64_IMM(BPF_REG_0, 0),
- BPF_EXIT_INSN(),
- },
- .errstr = "R1 offset is outside of the packet",
- .result = REJECT,
- .prog_type = BPF_PROG_TYPE_XDP,
- .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
-},
diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh
new file mode 100755
index 000000000000..f2cac42298ba
--- /dev/null
+++ b/tools/testing/selftests/bpf/verify_sig_setup.sh
@@ -0,0 +1,129 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -e
+set -u
+set -o pipefail
+
+VERBOSE="${SELFTESTS_VERBOSE:=0}"
+LOG_FILE="$(mktemp /tmp/verify_sig_setup.log.XXXXXX)"
+
+x509_genkey_content="\
+[ req ]
+default_bits = 2048
+distinguished_name = req_distinguished_name
+prompt = no
+string_mask = utf8only
+x509_extensions = myexts
+
+[ req_distinguished_name ]
+CN = eBPF Signature Verification Testing Key
+
+[ myexts ]
+basicConstraints=critical,CA:FALSE
+keyUsage=digitalSignature
+subjectKeyIdentifier=hash
+authorityKeyIdentifier=keyid
+"
+
+usage()
+{
+ echo "Usage: $0 <setup|cleanup <existing_tmp_dir>"
+ exit 1
+}
+
+setup()
+{
+ local tmp_dir="$1"
+
+ echo "${x509_genkey_content}" > ${tmp_dir}/x509.genkey
+
+ openssl req -new -nodes -utf8 -sha256 -days 36500 \
+ -batch -x509 -config ${tmp_dir}/x509.genkey \
+ -outform PEM -out ${tmp_dir}/signing_key.pem \
+ -keyout ${tmp_dir}/signing_key.pem 2>&1
+
+ openssl x509 -in ${tmp_dir}/signing_key.pem -out \
+ ${tmp_dir}/signing_key.der -outform der
+
+ key_id=$(cat ${tmp_dir}/signing_key.der | keyctl padd asymmetric ebpf_testing_key @s)
+
+ keyring_id=$(keyctl newring ebpf_testing_keyring @s)
+ keyctl link $key_id $keyring_id
+}
+
+cleanup() {
+ local tmp_dir="$1"
+
+ keyctl unlink $(keyctl search @s asymmetric ebpf_testing_key) @s
+ keyctl unlink $(keyctl search @s keyring ebpf_testing_keyring) @s
+ rm -rf ${tmp_dir}
+}
+
+fsverity_create_sign_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ sig_file=${tmp_dir}/sig-file
+ dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null
+ fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file
+
+ # We do not want to enable fsverity on $data_file yet. Try whether
+ # the file system support fsverity on a different file.
+ touch ${tmp_dir}/tmp-file
+ fsverity enable ${tmp_dir}/tmp-file
+}
+
+fsverity_enable_file() {
+ local tmp_dir="$1"
+
+ data_file=${tmp_dir}/data-file
+ fsverity enable $data_file
+}
+
+catch()
+{
+ local exit_code="$1"
+ local log_file="$2"
+
+ if [[ "${exit_code}" -ne 0 ]]; then
+ cat "${log_file}" >&3
+ fi
+
+ rm -f "${log_file}"
+ exit ${exit_code}
+}
+
+main()
+{
+ [[ $# -ne 2 ]] && usage
+
+ local action="$1"
+ local tmp_dir="$2"
+
+ [[ ! -d "${tmp_dir}" ]] && echo "Directory ${tmp_dir} doesn't exist" && exit 1
+
+ if [[ "${action}" == "setup" ]]; then
+ setup "${tmp_dir}"
+ elif [[ "${action}" == "cleanup" ]]; then
+ cleanup "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-create-sign" ]]; then
+ fsverity_create_sign_file "${tmp_dir}"
+ elif [[ "${action}" == "fsverity-enable" ]]; then
+ fsverity_enable_file "${tmp_dir}"
+ else
+ echo "Unknown action: ${action}"
+ exit 1
+ fi
+}
+
+trap 'catch "$?" "${LOG_FILE}"' EXIT
+
+if [[ "${VERBOSE}" -eq 0 ]]; then
+ # Save the stderr to 3 so that we can output back to
+ # it incase of an error.
+ exec 3>&2 1>"${LOG_FILE}" 2>&1
+fi
+
+main "$@"
+rm -f "${LOG_FILE}"
diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c
new file mode 100644
index 000000000000..244d4996e06e
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.c
@@ -0,0 +1,2163 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <string.h>
+#include <stdlib.h>
+#include <sched.h>
+#include <pthread.h>
+#include <dirent.h>
+#include <signal.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <sys/time.h>
+#include <sys/sysinfo.h>
+#include <sys/stat.h>
+#include <bpf/libbpf.h>
+#include <bpf/btf.h>
+#include <libelf.h>
+#include <gelf.h>
+#include <float.h>
+#include <math.h>
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+enum stat_id {
+ VERDICT,
+ DURATION,
+ TOTAL_INSNS,
+ TOTAL_STATES,
+ PEAK_STATES,
+ MAX_STATES_PER_INSN,
+ MARK_READ_MAX_LEN,
+
+ FILE_NAME,
+ PROG_NAME,
+
+ ALL_STATS_CNT,
+ NUM_STATS_CNT = FILE_NAME - VERDICT,
+};
+
+/* In comparison mode each stat can specify up to four different values:
+ * - A side value;
+ * - B side value;
+ * - absolute diff value;
+ * - relative (percentage) diff value.
+ *
+ * When specifying stat specs in comparison mode, user can use one of the
+ * following variant suffixes to specify which exact variant should be used for
+ * ordering or filtering:
+ * - `_a` for A side value;
+ * - `_b` for B side value;
+ * - `_diff` for absolute diff value;
+ * - `_pct` for relative (percentage) diff value.
+ *
+ * If no variant suffix is provided, then `_b` (control data) is assumed.
+ *
+ * As an example, let's say instructions stat has the following output:
+ *
+ * Insns (A) Insns (B) Insns (DIFF)
+ * --------- --------- --------------
+ * 21547 20920 -627 (-2.91%)
+ *
+ * Then:
+ * - 21547 is A side value (insns_a);
+ * - 20920 is B side value (insns_b);
+ * - -627 is absolute diff value (insns_diff);
+ * - -2.91% is relative diff value (insns_pct).
+ *
+ * For verdict there is no verdict_pct variant.
+ * For file and program name, _a and _b variants are equivalent and there are
+ * no _diff or _pct variants.
+ */
+enum stat_variant {
+ VARIANT_A,
+ VARIANT_B,
+ VARIANT_DIFF,
+ VARIANT_PCT,
+};
+
+struct verif_stats {
+ char *file_name;
+ char *prog_name;
+
+ long stats[NUM_STATS_CNT];
+};
+
+/* joined comparison mode stats */
+struct verif_stats_join {
+ char *file_name;
+ char *prog_name;
+
+ const struct verif_stats *stats_a;
+ const struct verif_stats *stats_b;
+};
+
+struct stat_specs {
+ int spec_cnt;
+ enum stat_id ids[ALL_STATS_CNT];
+ enum stat_variant variants[ALL_STATS_CNT];
+ bool asc[ALL_STATS_CNT];
+ bool abs[ALL_STATS_CNT];
+ int lens[ALL_STATS_CNT * 3]; /* 3x for comparison mode */
+};
+
+enum resfmt {
+ RESFMT_TABLE,
+ RESFMT_TABLE_CALCLEN, /* fake format to pre-calculate table's column widths */
+ RESFMT_CSV,
+};
+
+enum filter_kind {
+ FILTER_NAME,
+ FILTER_STAT,
+};
+
+enum operator_kind {
+ OP_EQ, /* == or = */
+ OP_NEQ, /* != or <> */
+ OP_LT, /* < */
+ OP_LE, /* <= */
+ OP_GT, /* > */
+ OP_GE, /* >= */
+};
+
+struct filter {
+ enum filter_kind kind;
+ /* FILTER_NAME */
+ char *any_glob;
+ char *file_glob;
+ char *prog_glob;
+ /* FILTER_STAT */
+ enum operator_kind op;
+ int stat_id;
+ enum stat_variant stat_var;
+ long value;
+ bool abs;
+};
+
+static struct env {
+ char **filenames;
+ int filename_cnt;
+ bool verbose;
+ bool debug;
+ bool quiet;
+ bool force_checkpoints;
+ bool force_reg_invariants;
+ enum resfmt out_fmt;
+ bool show_version;
+ bool comparison_mode;
+ bool replay_mode;
+ int top_n;
+
+ int log_level;
+ int log_size;
+ bool log_fixed;
+
+ struct verif_stats *prog_stats;
+ int prog_stat_cnt;
+
+ /* baseline_stats is allocated and used only in comparison mode */
+ struct verif_stats *baseline_stats;
+ int baseline_stat_cnt;
+
+ struct verif_stats_join *join_stats;
+ int join_stat_cnt;
+
+ struct stat_specs output_spec;
+ struct stat_specs sort_spec;
+
+ struct filter *allow_filters;
+ struct filter *deny_filters;
+ int allow_filter_cnt;
+ int deny_filter_cnt;
+
+ int files_processed;
+ int files_skipped;
+ int progs_processed;
+ int progs_skipped;
+} env;
+
+static int libbpf_print_fn(enum libbpf_print_level level, const char *format, va_list args)
+{
+ if (!env.verbose)
+ return 0;
+ if (level == LIBBPF_DEBUG && !env.debug)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+#ifndef VERISTAT_VERSION
+#define VERISTAT_VERSION "<kernel>"
+#endif
+
+const char *argp_program_version = "veristat v" VERISTAT_VERSION;
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"veristat BPF verifier stats collection and comparison tool.\n"
+"\n"
+"USAGE: veristat <obj-file> [<obj-file>...]\n"
+" OR: veristat -C <baseline.csv> <comparison.csv>\n"
+" OR: veristat -R <results.csv>\n";
+
+enum {
+ OPT_LOG_FIXED = 1000,
+ OPT_LOG_SIZE = 1001,
+};
+
+static const struct argp_option opts[] = {
+ { NULL, 'h', NULL, OPTION_HIDDEN, "Show the full help" },
+ { "version", 'V', NULL, 0, "Print version" },
+ { "verbose", 'v', NULL, 0, "Verbose mode" },
+ { "debug", 'd', NULL, 0, "Debug mode (turns on libbpf debug logging)" },
+ { "log-level", 'l', "LEVEL", 0, "Verifier log level (default 0 for normal mode, 1 for verbose mode)" },
+ { "log-fixed", OPT_LOG_FIXED, NULL, 0, "Disable verifier log rotation" },
+ { "log-size", OPT_LOG_SIZE, "BYTES", 0, "Customize verifier log size (default to 16MB)" },
+ { "top-n", 'n', "N", 0, "Emit only up to first N results." },
+ { "quiet", 'q', NULL, 0, "Quiet mode" },
+ { "emit", 'e', "SPEC", 0, "Specify stats to be emitted" },
+ { "sort", 's', "SPEC", 0, "Specify sort order" },
+ { "output-format", 'o', "FMT", 0, "Result output format (table, csv), default is table." },
+ { "compare", 'C', NULL, 0, "Comparison mode" },
+ { "replay", 'R', NULL, 0, "Replay mode" },
+ { "filter", 'f', "FILTER", 0, "Filter expressions (or @filename for file with expressions)." },
+ { "test-states", 't', NULL, 0,
+ "Force frequent BPF verifier state checkpointing (set BPF_F_TEST_STATE_FREQ program flag)" },
+ { "test-reg-invariants", 'r', NULL, 0,
+ "Force BPF verifier failure on register invariant violation (BPF_F_TEST_REG_INVARIANTS program flag)" },
+ {},
+};
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs);
+static int append_filter(struct filter **filters, int *cnt, const char *str);
+static int append_filter_file(const char *path);
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ void *tmp;
+ int err;
+
+ switch (key) {
+ case 'h':
+ argp_state_help(state, stderr, ARGP_HELP_STD_HELP);
+ break;
+ case 'V':
+ env.show_version = true;
+ break;
+ case 'v':
+ env.verbose = true;
+ break;
+ case 'd':
+ env.debug = true;
+ env.verbose = true;
+ break;
+ case 'q':
+ env.quiet = true;
+ break;
+ case 'e':
+ err = parse_stats(arg, &env.output_spec);
+ if (err)
+ return err;
+ break;
+ case 's':
+ err = parse_stats(arg, &env.sort_spec);
+ if (err)
+ return err;
+ break;
+ case 'o':
+ if (strcmp(arg, "table") == 0) {
+ env.out_fmt = RESFMT_TABLE;
+ } else if (strcmp(arg, "csv") == 0) {
+ env.out_fmt = RESFMT_CSV;
+ } else {
+ fprintf(stderr, "Unrecognized output format '%s'\n", arg);
+ return -EINVAL;
+ }
+ break;
+ case 'l':
+ errno = 0;
+ env.log_level = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid log level: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case OPT_LOG_FIXED:
+ env.log_fixed = true;
+ break;
+ case OPT_LOG_SIZE:
+ errno = 0;
+ env.log_size = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid log size: %s\n", arg);
+ argp_usage(state);
+ }
+ break;
+ case 't':
+ env.force_checkpoints = true;
+ break;
+ case 'r':
+ env.force_reg_invariants = true;
+ break;
+ case 'n':
+ errno = 0;
+ env.top_n = strtol(arg, NULL, 10);
+ if (errno) {
+ fprintf(stderr, "invalid top N specifier: %s\n", arg);
+ argp_usage(state);
+ }
+ case 'C':
+ env.comparison_mode = true;
+ break;
+ case 'R':
+ env.replay_mode = true;
+ break;
+ case 'f':
+ if (arg[0] == '@')
+ err = append_filter_file(arg + 1);
+ else if (arg[0] == '!')
+ err = append_filter(&env.deny_filters, &env.deny_filter_cnt, arg + 1);
+ else
+ err = append_filter(&env.allow_filters, &env.allow_filter_cnt, arg);
+ if (err) {
+ fprintf(stderr, "Failed to collect program filter expressions: %d\n", err);
+ return err;
+ }
+ break;
+ case ARGP_KEY_ARG:
+ tmp = realloc(env.filenames, (env.filename_cnt + 1) * sizeof(*env.filenames));
+ if (!tmp)
+ return -ENOMEM;
+ env.filenames = tmp;
+ env.filenames[env.filename_cnt] = strdup(arg);
+ if (!env.filenames[env.filename_cnt])
+ return -ENOMEM;
+ env.filename_cnt++;
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+ return 0;
+}
+
+static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+};
+
+
+/* Adapted from perf/util/string.c */
+static bool glob_matches(const char *str, const char *pat)
+{
+ while (*str && *pat && *pat != '*') {
+ if (*str != *pat)
+ return false;
+ str++;
+ pat++;
+ }
+ /* Check wild card */
+ if (*pat == '*') {
+ while (*pat == '*')
+ pat++;
+ if (!*pat) /* Tail wild card matches all */
+ return true;
+ while (*str)
+ if (glob_matches(str++, pat))
+ return true;
+ }
+ return !*str && !*pat;
+}
+
+static bool is_bpf_obj_file(const char *path) {
+ Elf64_Ehdr *ehdr;
+ int fd, err = -EINVAL;
+ Elf *elf = NULL;
+
+ fd = open(path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return true; /* we'll fail later and propagate error */
+
+ /* ensure libelf is initialized */
+ (void)elf_version(EV_CURRENT);
+
+ elf = elf_begin(fd, ELF_C_READ, NULL);
+ if (!elf)
+ goto cleanup;
+
+ if (elf_kind(elf) != ELF_K_ELF || gelf_getclass(elf) != ELFCLASS64)
+ goto cleanup;
+
+ ehdr = elf64_getehdr(elf);
+ /* Old LLVM set e_machine to EM_NONE */
+ if (!ehdr || ehdr->e_type != ET_REL || (ehdr->e_machine && ehdr->e_machine != EM_BPF))
+ goto cleanup;
+
+ err = 0;
+cleanup:
+ if (elf)
+ elf_end(elf);
+ close(fd);
+ return err == 0;
+}
+
+static bool should_process_file_prog(const char *filename, const char *prog_name)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_NAME)
+ continue;
+
+ if (f->any_glob && glob_matches(filename, f->any_glob))
+ return false;
+ if (f->any_glob && prog_name && glob_matches(prog_name, f->any_glob))
+ return false;
+ if (f->file_glob && glob_matches(filename, f->file_glob))
+ return false;
+ if (f->prog_glob && prog_name && glob_matches(prog_name, f->prog_glob))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_NAME)
+ continue;
+
+ allow_cnt++;
+ if (f->any_glob) {
+ if (glob_matches(filename, f->any_glob))
+ return true;
+ /* If we don't know program name yet, any_glob filter
+ * has to assume that current BPF object file might be
+ * relevant; we'll check again later on after opening
+ * BPF object file, at which point program name will
+ * be known finally.
+ */
+ if (!prog_name || glob_matches(prog_name, f->any_glob))
+ return true;
+ } else {
+ if (f->file_glob && !glob_matches(filename, f->file_glob))
+ continue;
+ if (f->prog_glob && prog_name && !glob_matches(prog_name, f->prog_glob))
+ continue;
+ return true;
+ }
+ }
+
+ /* if there are no file/prog name allow filters, allow all progs,
+ * unless they are denied earlier explicitly
+ */
+ return allow_cnt == 0;
+}
+
+static struct {
+ enum operator_kind op_kind;
+ const char *op_str;
+} operators[] = {
+ /* Order of these definitions matter to avoid situations like '<'
+ * matching part of what is actually a '<>' operator. That is,
+ * substrings should go last.
+ */
+ { OP_EQ, "==" },
+ { OP_NEQ, "!=" },
+ { OP_NEQ, "<>" },
+ { OP_LE, "<=" },
+ { OP_LT, "<" },
+ { OP_GE, ">=" },
+ { OP_GT, ">" },
+ { OP_EQ, "=" },
+};
+
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs);
+
+static int append_filter(struct filter **filters, int *cnt, const char *str)
+{
+ struct filter *f;
+ void *tmp;
+ const char *p;
+ int i;
+
+ tmp = realloc(*filters, (*cnt + 1) * sizeof(**filters));
+ if (!tmp)
+ return -ENOMEM;
+ *filters = tmp;
+
+ f = &(*filters)[*cnt];
+ memset(f, 0, sizeof(*f));
+
+ /* First, let's check if it's a stats filter of the following form:
+ * <stat><op><value, where:
+ * - <stat> is one of supported numerical stats (verdict is also
+ * considered numerical, failure == 0, success == 1);
+ * - <op> is comparison operator (see `operators` definitions);
+ * - <value> is an integer (or failure/success, or false/true as
+ * special aliases for 0 and 1, respectively).
+ * If the form doesn't match what user provided, we assume file/prog
+ * glob filter.
+ */
+ for (i = 0; i < ARRAY_SIZE(operators); i++) {
+ enum stat_variant var;
+ int id;
+ long val;
+ const char *end = str;
+ const char *op_str;
+ bool is_abs;
+
+ op_str = operators[i].op_str;
+ p = strstr(str, op_str);
+ if (!p)
+ continue;
+
+ if (!parse_stat_id_var(str, p - str, &id, &var, &is_abs)) {
+ fprintf(stderr, "Unrecognized stat name in '%s'!\n", str);
+ return -EINVAL;
+ }
+ if (id >= FILE_NAME) {
+ fprintf(stderr, "Non-integer stat is specified in '%s'!\n", str);
+ return -EINVAL;
+ }
+
+ p += strlen(op_str);
+
+ if (strcasecmp(p, "true") == 0 ||
+ strcasecmp(p, "t") == 0 ||
+ strcasecmp(p, "success") == 0 ||
+ strcasecmp(p, "succ") == 0 ||
+ strcasecmp(p, "s") == 0 ||
+ strcasecmp(p, "match") == 0 ||
+ strcasecmp(p, "m") == 0) {
+ val = 1;
+ } else if (strcasecmp(p, "false") == 0 ||
+ strcasecmp(p, "f") == 0 ||
+ strcasecmp(p, "failure") == 0 ||
+ strcasecmp(p, "fail") == 0 ||
+ strcasecmp(p, "mismatch") == 0 ||
+ strcasecmp(p, "mis") == 0) {
+ val = 0;
+ } else {
+ errno = 0;
+ val = strtol(p, (char **)&end, 10);
+ if (errno || end == p || *end != '\0' ) {
+ fprintf(stderr, "Invalid integer value in '%s'!\n", str);
+ return -EINVAL;
+ }
+ }
+
+ f->kind = FILTER_STAT;
+ f->stat_id = id;
+ f->stat_var = var;
+ f->op = operators[i].op_kind;
+ f->abs = true;
+ f->value = val;
+
+ *cnt += 1;
+ return 0;
+ }
+
+ /* File/prog filter can be specified either as '<glob>' or
+ * '<file-glob>/<prog-glob>'. In the former case <glob> is applied to
+ * both file and program names. This seems to be way more useful in
+ * practice. If user needs full control, they can use '/<prog-glob>'
+ * form to glob just program name, or '<file-glob>/' to glob only file
+ * name. But usually common <glob> seems to be the most useful and
+ * ergonomic way.
+ */
+ f->kind = FILTER_NAME;
+ p = strchr(str, '/');
+ if (!p) {
+ f->any_glob = strdup(str);
+ if (!f->any_glob)
+ return -ENOMEM;
+ } else {
+ if (str != p) {
+ /* non-empty file glob */
+ f->file_glob = strndup(str, p - str);
+ if (!f->file_glob)
+ return -ENOMEM;
+ }
+ if (strlen(p + 1) > 0) {
+ /* non-empty prog glob */
+ f->prog_glob = strdup(p + 1);
+ if (!f->prog_glob) {
+ free(f->file_glob);
+ f->file_glob = NULL;
+ return -ENOMEM;
+ }
+ }
+ }
+
+ *cnt += 1;
+ return 0;
+}
+
+static int append_filter_file(const char *path)
+{
+ char buf[1024];
+ FILE *f;
+ int err = 0;
+
+ f = fopen(path, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open filters in '%s': %d\n", path, err);
+ return err;
+ }
+
+ while (fscanf(f, " %1023[^\n]\n", buf) == 1) {
+ /* lines starting with # are comments, skip them */
+ if (buf[0] == '\0' || buf[0] == '#')
+ continue;
+ /* lines starting with ! are negative match filters */
+ if (buf[0] == '!')
+ err = append_filter(&env.deny_filters, &env.deny_filter_cnt, buf + 1);
+ else
+ err = append_filter(&env.allow_filters, &env.allow_filter_cnt, buf);
+ if (err)
+ goto cleanup;
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+static const struct stat_specs default_output_spec = {
+ .spec_cnt = 7,
+ .ids = {
+ FILE_NAME, PROG_NAME, VERDICT, DURATION,
+ TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+ },
+};
+
+static const struct stat_specs default_csv_output_spec = {
+ .spec_cnt = 9,
+ .ids = {
+ FILE_NAME, PROG_NAME, VERDICT, DURATION,
+ TOTAL_INSNS, TOTAL_STATES, PEAK_STATES,
+ MAX_STATES_PER_INSN, MARK_READ_MAX_LEN,
+ },
+};
+
+static const struct stat_specs default_sort_spec = {
+ .spec_cnt = 2,
+ .ids = {
+ FILE_NAME, PROG_NAME,
+ },
+ .asc = { true, true, },
+};
+
+/* sorting for comparison mode to join two data sets */
+static const struct stat_specs join_sort_spec = {
+ .spec_cnt = 2,
+ .ids = {
+ FILE_NAME, PROG_NAME,
+ },
+ .asc = { true, true, },
+};
+
+static struct stat_def {
+ const char *header;
+ const char *names[4];
+ bool asc_by_default;
+ bool left_aligned;
+} stat_defs[] = {
+ [FILE_NAME] = { "File", {"file_name", "filename", "file"}, true /* asc */, true /* left */ },
+ [PROG_NAME] = { "Program", {"prog_name", "progname", "prog"}, true /* asc */, true /* left */ },
+ [VERDICT] = { "Verdict", {"verdict"}, true /* asc: failure, success */, true /* left */ },
+ [DURATION] = { "Duration (us)", {"duration", "dur"}, },
+ [TOTAL_INSNS] = { "Insns", {"total_insns", "insns"}, },
+ [TOTAL_STATES] = { "States", {"total_states", "states"}, },
+ [PEAK_STATES] = { "Peak states", {"peak_states"}, },
+ [MAX_STATES_PER_INSN] = { "Max states per insn", {"max_states_per_insn"}, },
+ [MARK_READ_MAX_LEN] = { "Max mark read length", {"max_mark_read_len", "mark_read"}, },
+};
+
+static bool parse_stat_id_var(const char *name, size_t len, int *id,
+ enum stat_variant *var, bool *is_abs)
+{
+ static const char *var_sfxs[] = {
+ [VARIANT_A] = "_a",
+ [VARIANT_B] = "_b",
+ [VARIANT_DIFF] = "_diff",
+ [VARIANT_PCT] = "_pct",
+ };
+ int i, j, k;
+
+ /* |<stat>| means we take absolute value of given stat */
+ *is_abs = false;
+ if (len > 2 && name[0] == '|' && name[len - 1] == '|') {
+ *is_abs = true;
+ name += 1;
+ len -= 2;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(stat_defs); i++) {
+ struct stat_def *def = &stat_defs[i];
+ size_t alias_len, sfx_len;
+ const char *alias;
+
+ for (j = 0; j < ARRAY_SIZE(stat_defs[i].names); j++) {
+ alias = def->names[j];
+ if (!alias)
+ continue;
+
+ alias_len = strlen(alias);
+ if (strncmp(name, alias, alias_len) != 0)
+ continue;
+
+ if (alias_len == len) {
+ /* If no variant suffix is specified, we
+ * assume control group (just in case we are
+ * in comparison mode. Variant is ignored in
+ * non-comparison mode.
+ */
+ *var = VARIANT_B;
+ *id = i;
+ return true;
+ }
+
+ for (k = 0; k < ARRAY_SIZE(var_sfxs); k++) {
+ sfx_len = strlen(var_sfxs[k]);
+ if (alias_len + sfx_len != len)
+ continue;
+
+ if (strncmp(name + alias_len, var_sfxs[k], sfx_len) == 0) {
+ *var = (enum stat_variant)k;
+ *id = i;
+ return true;
+ }
+ }
+ }
+ }
+
+ return false;
+}
+
+static bool is_asc_sym(char c)
+{
+ return c == '^';
+}
+
+static bool is_desc_sym(char c)
+{
+ return c == 'v' || c == 'V' || c == '.' || c == '!' || c == '_';
+}
+
+static int parse_stat(const char *stat_name, struct stat_specs *specs)
+{
+ int id;
+ bool has_order = false, is_asc = false, is_abs = false;
+ size_t len = strlen(stat_name);
+ enum stat_variant var;
+
+ if (specs->spec_cnt >= ARRAY_SIZE(specs->ids)) {
+ fprintf(stderr, "Can't specify more than %zd stats\n", ARRAY_SIZE(specs->ids));
+ return -E2BIG;
+ }
+
+ if (len > 1 && (is_asc_sym(stat_name[len - 1]) || is_desc_sym(stat_name[len - 1]))) {
+ has_order = true;
+ is_asc = is_asc_sym(stat_name[len - 1]);
+ len -= 1;
+ }
+
+ if (!parse_stat_id_var(stat_name, len, &id, &var, &is_abs)) {
+ fprintf(stderr, "Unrecognized stat name '%s'\n", stat_name);
+ return -ESRCH;
+ }
+
+ specs->ids[specs->spec_cnt] = id;
+ specs->variants[specs->spec_cnt] = var;
+ specs->asc[specs->spec_cnt] = has_order ? is_asc : stat_defs[id].asc_by_default;
+ specs->abs[specs->spec_cnt] = is_abs;
+ specs->spec_cnt++;
+
+ return 0;
+}
+
+static int parse_stats(const char *stats_str, struct stat_specs *specs)
+{
+ char *input, *state = NULL, *next;
+ int err;
+
+ input = strdup(stats_str);
+ if (!input)
+ return -ENOMEM;
+
+ while ((next = strtok_r(state ? NULL : input, ",", &state))) {
+ err = parse_stat(next, specs);
+ if (err)
+ return err;
+ }
+
+ return 0;
+}
+
+static void free_verif_stats(struct verif_stats *stats, size_t stat_cnt)
+{
+ int i;
+
+ if (!stats)
+ return;
+
+ for (i = 0; i < stat_cnt; i++) {
+ free(stats[i].file_name);
+ free(stats[i].prog_name);
+ }
+ free(stats);
+}
+
+static char verif_log_buf[64 * 1024];
+
+#define MAX_PARSED_LOG_LINES 100
+
+static int parse_verif_log(char * const buf, size_t buf_sz, struct verif_stats *s)
+{
+ const char *cur;
+ int pos, lines;
+
+ buf[buf_sz - 1] = '\0';
+
+ for (pos = strlen(buf) - 1, lines = 0; pos >= 0 && lines < MAX_PARSED_LOG_LINES; lines++) {
+ /* find previous endline or otherwise take the start of log buf */
+ for (cur = &buf[pos]; cur > buf && cur[0] != '\n'; cur--, pos--) {
+ }
+ /* next time start from end of previous line (or pos goes to <0) */
+ pos--;
+ /* if we found endline, point right after endline symbol;
+ * otherwise, stay at the beginning of log buf
+ */
+ if (cur[0] == '\n')
+ cur++;
+
+ if (1 == sscanf(cur, "verification time %ld usec\n", &s->stats[DURATION]))
+ continue;
+ if (6 == sscanf(cur, "processed %ld insns (limit %*d) max_states_per_insn %ld total_states %ld peak_states %ld mark_read %ld",
+ &s->stats[TOTAL_INSNS],
+ &s->stats[MAX_STATES_PER_INSN],
+ &s->stats[TOTAL_STATES],
+ &s->stats[PEAK_STATES],
+ &s->stats[MARK_READ_MAX_LEN]))
+ continue;
+ }
+
+ return 0;
+}
+
+static int guess_prog_type_by_ctx_name(const char *ctx_name,
+ enum bpf_prog_type *prog_type,
+ enum bpf_attach_type *attach_type)
+{
+ /* We need to guess program type based on its declared context type.
+ * This guess can't be perfect as many different program types might
+ * share the same context type. So we can only hope to reasonably
+ * well guess this and get lucky.
+ *
+ * Just in case, we support both UAPI-side type names and
+ * kernel-internal names.
+ */
+ static struct {
+ const char *uapi_name;
+ const char *kern_name;
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ } ctx_map[] = {
+ /* __sk_buff is most ambiguous, we assume TC program */
+ { "__sk_buff", "sk_buff", BPF_PROG_TYPE_SCHED_CLS },
+ { "bpf_sock", "sock", BPF_PROG_TYPE_CGROUP_SOCK, BPF_CGROUP_INET4_POST_BIND },
+ { "bpf_sock_addr", "bpf_sock_addr_kern", BPF_PROG_TYPE_CGROUP_SOCK_ADDR, BPF_CGROUP_INET4_BIND },
+ { "bpf_sock_ops", "bpf_sock_ops_kern", BPF_PROG_TYPE_SOCK_OPS, BPF_CGROUP_SOCK_OPS },
+ { "sk_msg_md", "sk_msg", BPF_PROG_TYPE_SK_MSG, BPF_SK_MSG_VERDICT },
+ { "bpf_cgroup_dev_ctx", "bpf_cgroup_dev_ctx", BPF_PROG_TYPE_CGROUP_DEVICE, BPF_CGROUP_DEVICE },
+ { "bpf_sysctl", "bpf_sysctl_kern", BPF_PROG_TYPE_CGROUP_SYSCTL, BPF_CGROUP_SYSCTL },
+ { "bpf_sockopt", "bpf_sockopt_kern", BPF_PROG_TYPE_CGROUP_SOCKOPT, BPF_CGROUP_SETSOCKOPT },
+ { "sk_reuseport_md", "sk_reuseport_kern", BPF_PROG_TYPE_SK_REUSEPORT, BPF_SK_REUSEPORT_SELECT_OR_MIGRATE },
+ { "bpf_sk_lookup", "bpf_sk_lookup_kern", BPF_PROG_TYPE_SK_LOOKUP, BPF_SK_LOOKUP },
+ { "xdp_md", "xdp_buff", BPF_PROG_TYPE_XDP, BPF_XDP },
+ /* tracing types with no expected attach type */
+ { "bpf_user_pt_regs_t", "pt_regs", BPF_PROG_TYPE_KPROBE },
+ { "bpf_perf_event_data", "bpf_perf_event_data_kern", BPF_PROG_TYPE_PERF_EVENT },
+ /* raw_tp programs use u64[] from kernel side, we don't want
+ * to match on that, probably; so NULL for kern-side type
+ */
+ { "bpf_raw_tracepoint_args", NULL, BPF_PROG_TYPE_RAW_TRACEPOINT },
+ };
+ int i;
+
+ if (!ctx_name)
+ return -EINVAL;
+
+ for (i = 0; i < ARRAY_SIZE(ctx_map); i++) {
+ if (strcmp(ctx_map[i].uapi_name, ctx_name) == 0 ||
+ (ctx_map[i].kern_name && strcmp(ctx_map[i].kern_name, ctx_name) == 0)) {
+ *prog_type = ctx_map[i].prog_type;
+ *attach_type = ctx_map[i].attach_type;
+ return 0;
+ }
+ }
+
+ return -ESRCH;
+}
+
+static void fixup_obj(struct bpf_object *obj, struct bpf_program *prog, const char *filename)
+{
+ struct bpf_map *map;
+
+ bpf_object__for_each_map(map, obj) {
+ /* disable pinning */
+ bpf_map__set_pin_path(map, NULL);
+
+ /* fix up map size, if necessary */
+ switch (bpf_map__type(map)) {
+ case BPF_MAP_TYPE_SK_STORAGE:
+ case BPF_MAP_TYPE_TASK_STORAGE:
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ case BPF_MAP_TYPE_CGROUP_STORAGE:
+ break;
+ default:
+ if (bpf_map__max_entries(map) == 0)
+ bpf_map__set_max_entries(map, 1);
+ }
+ }
+
+ /* SEC(freplace) programs can't be loaded with veristat as is,
+ * but we can try guessing their target program's expected type by
+ * looking at the type of program's first argument and substituting
+ * corresponding program type
+ */
+ if (bpf_program__type(prog) == BPF_PROG_TYPE_EXT) {
+ const struct btf *btf = bpf_object__btf(obj);
+ const char *prog_name = bpf_program__name(prog);
+ enum bpf_prog_type prog_type;
+ enum bpf_attach_type attach_type;
+ const struct btf_type *t;
+ const char *ctx_name;
+ int id;
+
+ if (!btf)
+ goto skip_freplace_fixup;
+
+ id = btf__find_by_name_kind(btf, prog_name, BTF_KIND_FUNC);
+ t = btf__type_by_id(btf, id);
+ t = btf__type_by_id(btf, t->type);
+ if (!btf_is_func_proto(t) || btf_vlen(t) != 1)
+ goto skip_freplace_fixup;
+
+ /* context argument is a pointer to a struct/typedef */
+ t = btf__type_by_id(btf, btf_params(t)[0].type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t || !btf_is_ptr(t))
+ goto skip_freplace_fixup;
+ t = btf__type_by_id(btf, t->type);
+ while (t && btf_is_mod(t))
+ t = btf__type_by_id(btf, t->type);
+ if (!t)
+ goto skip_freplace_fixup;
+
+ ctx_name = btf__name_by_offset(btf, t->name_off);
+
+ if (guess_prog_type_by_ctx_name(ctx_name, &prog_type, &attach_type) == 0) {
+ bpf_program__set_type(prog, prog_type);
+ bpf_program__set_expected_attach_type(prog, attach_type);
+
+ if (!env.quiet) {
+ printf("Using guessed program type '%s' for %s/%s...\n",
+ libbpf_bpf_prog_type_str(prog_type),
+ filename, prog_name);
+ }
+ } else {
+ if (!env.quiet) {
+ printf("Failed to guess program type for freplace program with context type name '%s' for %s/%s. Consider using canonical type names to help veristat...\n",
+ ctx_name, filename, prog_name);
+ }
+ }
+ }
+skip_freplace_fixup:
+ return;
+}
+
+static int process_prog(const char *filename, struct bpf_object *obj, struct bpf_program *prog)
+{
+ const char *prog_name = bpf_program__name(prog);
+ const char *base_filename = basename(filename);
+ char *buf;
+ int buf_sz, log_level;
+ struct verif_stats *stats;
+ int err = 0;
+ void *tmp;
+
+ if (!should_process_file_prog(base_filename, bpf_program__name(prog))) {
+ env.progs_skipped++;
+ return 0;
+ }
+
+ tmp = realloc(env.prog_stats, (env.prog_stat_cnt + 1) * sizeof(*env.prog_stats));
+ if (!tmp)
+ return -ENOMEM;
+ env.prog_stats = tmp;
+ stats = &env.prog_stats[env.prog_stat_cnt++];
+ memset(stats, 0, sizeof(*stats));
+
+ if (env.verbose) {
+ buf_sz = env.log_size ? env.log_size : 16 * 1024 * 1024;
+ buf = malloc(buf_sz);
+ if (!buf)
+ return -ENOMEM;
+ /* ensure we always request stats */
+ log_level = env.log_level | 4 | (env.log_fixed ? 8 : 0);
+ } else {
+ buf = verif_log_buf;
+ buf_sz = sizeof(verif_log_buf);
+ /* request only verifier stats */
+ log_level = 4 | (env.log_fixed ? 8 : 0);
+ }
+ verif_log_buf[0] = '\0';
+
+ bpf_program__set_log_buf(prog, buf, buf_sz);
+ bpf_program__set_log_level(prog, log_level);
+
+ /* increase chances of successful BPF object loading */
+ fixup_obj(obj, prog, base_filename);
+
+ if (env.force_checkpoints)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_STATE_FREQ);
+ if (env.force_reg_invariants)
+ bpf_program__set_flags(prog, bpf_program__flags(prog) | BPF_F_TEST_REG_INVARIANTS);
+
+ err = bpf_object__load(obj);
+ env.progs_processed++;
+
+ stats->file_name = strdup(base_filename);
+ stats->prog_name = strdup(bpf_program__name(prog));
+ stats->stats[VERDICT] = err == 0; /* 1 - success, 0 - failure */
+ parse_verif_log(buf, buf_sz, stats);
+
+ if (env.verbose) {
+ printf("PROCESSING %s/%s, DURATION US: %ld, VERDICT: %s, VERIFIER LOG:\n%s\n",
+ filename, prog_name, stats->stats[DURATION],
+ err ? "failure" : "success", buf);
+ }
+
+ if (verif_log_buf != buf)
+ free(buf);
+
+ return 0;
+};
+
+static int process_obj(const char *filename)
+{
+ struct bpf_object *obj = NULL, *tobj;
+ struct bpf_program *prog, *tprog, *lprog;
+ libbpf_print_fn_t old_libbpf_print_fn;
+ LIBBPF_OPTS(bpf_object_open_opts, opts);
+ int err = 0, prog_cnt = 0;
+
+ if (!should_process_file_prog(basename(filename), NULL)) {
+ if (env.verbose)
+ printf("Skipping '%s' due to filters...\n", filename);
+ env.files_skipped++;
+ return 0;
+ }
+ if (!is_bpf_obj_file(filename)) {
+ if (env.verbose)
+ printf("Skipping '%s' as it's not a BPF object file...\n", filename);
+ env.files_skipped++;
+ return 0;
+ }
+
+ if (!env.quiet && env.out_fmt == RESFMT_TABLE)
+ printf("Processing '%s'...\n", basename(filename));
+
+ old_libbpf_print_fn = libbpf_set_print(libbpf_print_fn);
+ obj = bpf_object__open_file(filename, &opts);
+ if (!obj) {
+ /* if libbpf can't open BPF object file, it could be because
+ * that BPF object file is incomplete and has to be statically
+ * linked into a final BPF object file; instead of bailing
+ * out, report it into stderr, mark it as skipped, and
+ * proceed
+ */
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, -errno);
+ env.files_skipped++;
+ err = 0;
+ goto cleanup;
+ }
+
+ env.files_processed++;
+
+ bpf_object__for_each_program(prog, obj) {
+ prog_cnt++;
+ }
+
+ if (prog_cnt == 1) {
+ prog = bpf_object__next_program(obj, NULL);
+ bpf_program__set_autoload(prog, true);
+ process_prog(filename, obj, prog);
+ goto cleanup;
+ }
+
+ bpf_object__for_each_program(prog, obj) {
+ const char *prog_name = bpf_program__name(prog);
+
+ tobj = bpf_object__open_file(filename, &opts);
+ if (!tobj) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+ goto cleanup;
+ }
+
+ lprog = NULL;
+ bpf_object__for_each_program(tprog, tobj) {
+ const char *tprog_name = bpf_program__name(tprog);
+
+ if (strcmp(prog_name, tprog_name) == 0) {
+ bpf_program__set_autoload(tprog, true);
+ lprog = tprog;
+ } else {
+ bpf_program__set_autoload(tprog, false);
+ }
+ }
+
+ process_prog(filename, tobj, lprog);
+ bpf_object__close(tobj);
+ }
+
+cleanup:
+ bpf_object__close(obj);
+ libbpf_set_print(old_libbpf_print_fn);
+ return err;
+}
+
+static int cmp_stat(const struct verif_stats *s1, const struct verif_stats *s2,
+ enum stat_id id, bool asc, bool abs)
+{
+ int cmp = 0;
+
+ switch (id) {
+ case FILE_NAME:
+ cmp = strcmp(s1->file_name, s2->file_name);
+ break;
+ case PROG_NAME:
+ cmp = strcmp(s1->prog_name, s2->prog_name);
+ break;
+ case VERDICT:
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MARK_READ_MAX_LEN: {
+ long v1 = s1->stats[id];
+ long v2 = s2->stats[id];
+
+ if (abs) {
+ v1 = v1 < 0 ? -v1 : v1;
+ v2 = v2 < 0 ? -v2 : v2;
+ }
+
+ if (v1 != v2)
+ cmp = v1 < v2 ? -1 : 1;
+ break;
+ }
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ exit(1);
+ }
+
+ return asc ? cmp : -cmp;
+}
+
+static int cmp_prog_stats(const void *v1, const void *v2)
+{
+ const struct verif_stats *s1 = v1, *s2 = v2;
+ int i, cmp;
+
+ for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+ cmp = cmp_stat(s1, s2, env.sort_spec.ids[i],
+ env.sort_spec.asc[i], env.sort_spec.abs[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
+}
+
+static void fetch_join_stat_value(const struct verif_stats_join *s,
+ enum stat_id id, enum stat_variant var,
+ const char **str_val,
+ double *num_val)
+{
+ long v1, v2;
+
+ if (id == FILE_NAME) {
+ *str_val = s->file_name;
+ return;
+ }
+ if (id == PROG_NAME) {
+ *str_val = s->prog_name;
+ return;
+ }
+
+ v1 = s->stats_a ? s->stats_a->stats[id] : 0;
+ v2 = s->stats_b ? s->stats_b->stats[id] : 0;
+
+ switch (var) {
+ case VARIANT_A:
+ if (!s->stats_a)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_a->stats[id];
+ return;
+ case VARIANT_B:
+ if (!s->stats_b)
+ *num_val = -DBL_MAX;
+ else
+ *num_val = s->stats_b->stats[id];
+ return;
+ case VARIANT_DIFF:
+ if (!s->stats_a || !s->stats_b)
+ *num_val = -DBL_MAX;
+ else if (id == VERDICT)
+ *num_val = v1 == v2 ? 1.0 /* MATCH */ : 0.0 /* MISMATCH */;
+ else
+ *num_val = (double)(v2 - v1);
+ return;
+ case VARIANT_PCT:
+ if (!s->stats_a || !s->stats_b) {
+ *num_val = -DBL_MAX;
+ } else if (v1 == 0) {
+ if (v1 == v2)
+ *num_val = 0.0;
+ else
+ *num_val = v2 < v1 ? -100.0 : 100.0;
+ } else {
+ *num_val = (v2 - v1) * 100.0 / v1;
+ }
+ return;
+ }
+}
+
+static int cmp_join_stat(const struct verif_stats_join *s1,
+ const struct verif_stats_join *s2,
+ enum stat_id id, enum stat_variant var,
+ bool asc, bool abs)
+{
+ const char *str1 = NULL, *str2 = NULL;
+ double v1 = 0.0, v2 = 0.0;
+ int cmp = 0;
+
+ fetch_join_stat_value(s1, id, var, &str1, &v1);
+ fetch_join_stat_value(s2, id, var, &str2, &v2);
+
+ if (abs) {
+ v1 = fabs(v1);
+ v2 = fabs(v2);
+ }
+
+ if (str1)
+ cmp = strcmp(str1, str2);
+ else if (v1 != v2)
+ cmp = v1 < v2 ? -1 : 1;
+
+ return asc ? cmp : -cmp;
+}
+
+static int cmp_join_stats(const void *v1, const void *v2)
+{
+ const struct verif_stats_join *s1 = v1, *s2 = v2;
+ int i, cmp;
+
+ for (i = 0; i < env.sort_spec.spec_cnt; i++) {
+ cmp = cmp_join_stat(s1, s2,
+ env.sort_spec.ids[i],
+ env.sort_spec.variants[i],
+ env.sort_spec.asc[i],
+ env.sort_spec.abs[i]);
+ if (cmp != 0)
+ return cmp;
+ }
+
+ /* always disambiguate with file+prog, which are unique */
+ cmp = strcmp(s1->file_name, s2->file_name);
+ if (cmp != 0)
+ return cmp;
+ return strcmp(s1->prog_name, s2->prog_name);
+}
+
+#define HEADER_CHAR '-'
+#define COLUMN_SEP " "
+
+static void output_header_underlines(void)
+{
+ int i, j, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ len = env.output_spec.lens[i];
+
+ printf("%s", i == 0 ? "" : COLUMN_SEP);
+ for (j = 0; j < len; j++)
+ printf("%c", HEADER_CHAR);
+ }
+ printf("\n");
+}
+
+static void output_headers(enum resfmt fmt)
+{
+ const char *fmt_str;
+ int i, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int *max_len = &env.output_spec.lens[i];
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ len = snprintf(NULL, 0, "%s", stat_defs[id].header);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ fmt_str = stat_defs[id].left_aligned ? "%s%-*s" : "%s%*s";
+ printf(fmt_str, i == 0 ? "" : COLUMN_SEP, *max_len, stat_defs[id].header);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ printf("%s%s", i == 0 ? "" : ",", stat_defs[id].names[0]);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (fmt == RESFMT_TABLE)
+ output_header_underlines();
+}
+
+static void prepare_value(const struct verif_stats *s, enum stat_id id,
+ const char **str, long *val)
+{
+ switch (id) {
+ case FILE_NAME:
+ *str = s ? s->file_name : "N/A";
+ break;
+ case PROG_NAME:
+ *str = s ? s->prog_name : "N/A";
+ break;
+ case VERDICT:
+ if (!s)
+ *str = "N/A";
+ else
+ *str = s->stats[VERDICT] ? "success" : "failure";
+ break;
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MARK_READ_MAX_LEN:
+ *val = s ? s->stats[id] : 0;
+ break;
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ exit(1);
+ }
+}
+
+static void output_stats(const struct verif_stats *s, enum resfmt fmt, bool last)
+{
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int *max_len = &env.output_spec.lens[i], len;
+ const char *str = NULL;
+ long val = 0;
+
+ prepare_value(s, id, &str, &val);
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ if (str)
+ len = snprintf(NULL, 0, "%s", str);
+ else
+ len = snprintf(NULL, 0, "%ld", val);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ if (str)
+ printf("%s%-*s", i == 0 ? "" : COLUMN_SEP, *max_len, str);
+ else
+ printf("%s%*ld", i == 0 ? "" : COLUMN_SEP, *max_len, val);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ if (str)
+ printf("%s%s", i == 0 ? "" : ",", str);
+ else
+ printf("%s%ld", i == 0 ? "" : ",", val);
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (last && fmt == RESFMT_TABLE) {
+ output_header_underlines();
+ printf("Done. Processed %d files, %d programs. Skipped %d files, %d programs.\n",
+ env.files_processed, env.files_skipped, env.progs_processed, env.progs_skipped);
+ }
+}
+
+static int parse_stat_value(const char *str, enum stat_id id, struct verif_stats *st)
+{
+ switch (id) {
+ case FILE_NAME:
+ st->file_name = strdup(str);
+ if (!st->file_name)
+ return -ENOMEM;
+ break;
+ case PROG_NAME:
+ st->prog_name = strdup(str);
+ if (!st->prog_name)
+ return -ENOMEM;
+ break;
+ case VERDICT:
+ if (strcmp(str, "success") == 0) {
+ st->stats[VERDICT] = true;
+ } else if (strcmp(str, "failure") == 0) {
+ st->stats[VERDICT] = false;
+ } else {
+ fprintf(stderr, "Unrecognized verification verdict '%s'\n", str);
+ return -EINVAL;
+ }
+ break;
+ case DURATION:
+ case TOTAL_INSNS:
+ case TOTAL_STATES:
+ case PEAK_STATES:
+ case MAX_STATES_PER_INSN:
+ case MARK_READ_MAX_LEN: {
+ long val;
+ int err, n;
+
+ if (sscanf(str, "%ld %n", &val, &n) != 1 || n != strlen(str)) {
+ err = -errno;
+ fprintf(stderr, "Failed to parse '%s' as integer\n", str);
+ return err;
+ }
+
+ st->stats[id] = val;
+ break;
+ }
+ default:
+ fprintf(stderr, "Unrecognized stat #%d\n", id);
+ return -EINVAL;
+ }
+ return 0;
+}
+
+static int parse_stats_csv(const char *filename, struct stat_specs *specs,
+ struct verif_stats **statsp, int *stat_cntp)
+{
+ char line[4096];
+ FILE *f;
+ int err = 0;
+ bool header = true;
+
+ f = fopen(filename, "r");
+ if (!f) {
+ err = -errno;
+ fprintf(stderr, "Failed to open '%s': %d\n", filename, err);
+ return err;
+ }
+
+ *stat_cntp = 0;
+
+ while (fgets(line, sizeof(line), f)) {
+ char *input = line, *state = NULL, *next;
+ struct verif_stats *st = NULL;
+ int col = 0;
+
+ if (!header) {
+ void *tmp;
+
+ tmp = realloc(*statsp, (*stat_cntp + 1) * sizeof(**statsp));
+ if (!tmp) {
+ err = -ENOMEM;
+ goto cleanup;
+ }
+ *statsp = tmp;
+
+ st = &(*statsp)[*stat_cntp];
+ memset(st, 0, sizeof(*st));
+
+ *stat_cntp += 1;
+ }
+
+ while ((next = strtok_r(state ? NULL : input, ",\n", &state))) {
+ if (header) {
+ /* for the first line, set up spec stats */
+ err = parse_stat(next, specs);
+ if (err)
+ goto cleanup;
+ continue;
+ }
+
+ /* for all other lines, parse values based on spec */
+ if (col >= specs->spec_cnt) {
+ fprintf(stderr, "Found extraneous column #%d in row #%d of '%s'\n",
+ col, *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+ err = parse_stat_value(next, specs->ids[col], st);
+ if (err)
+ goto cleanup;
+ col++;
+ }
+
+ if (header) {
+ header = false;
+ continue;
+ }
+
+ if (col < specs->spec_cnt) {
+ fprintf(stderr, "Not enough columns in row #%d in '%s'\n",
+ *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ if (!st->file_name || !st->prog_name) {
+ fprintf(stderr, "Row #%d in '%s' is missing file and/or program name\n",
+ *stat_cntp, filename);
+ err = -EINVAL;
+ goto cleanup;
+ }
+
+ /* in comparison mode we can only check filters after we
+ * parsed entire line; if row should be ignored we pretend we
+ * never parsed it
+ */
+ if (!should_process_file_prog(st->file_name, st->prog_name)) {
+ free(st->file_name);
+ free(st->prog_name);
+ *stat_cntp -= 1;
+ }
+ }
+
+ if (!feof(f)) {
+ err = -errno;
+ fprintf(stderr, "Failed I/O for '%s': %d\n", filename, err);
+ }
+
+cleanup:
+ fclose(f);
+ return err;
+}
+
+/* empty/zero stats for mismatched rows */
+static const struct verif_stats fallback_stats = { .file_name = "", .prog_name = "" };
+
+static bool is_key_stat(enum stat_id id)
+{
+ return id == FILE_NAME || id == PROG_NAME;
+}
+
+static void output_comp_header_underlines(void)
+{
+ int i, j, k;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ int max_j = is_key_stat(id) ? 1 : 3;
+
+ for (j = 0; j < max_j; j++) {
+ int len = env.output_spec.lens[3 * i + j];
+
+ printf("%s", i + j == 0 ? "" : COLUMN_SEP);
+
+ for (k = 0; k < len; k++)
+ printf("%c", HEADER_CHAR);
+ }
+ }
+ printf("\n");
+}
+
+static void output_comp_headers(enum resfmt fmt)
+{
+ static const char *table_sfxs[3] = {" (A)", " (B)", " (DIFF)"};
+ static const char *name_sfxs[3] = {"_base", "_comp", "_diff"};
+ int i, j, len;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i];
+ /* key stats don't have A/B/DIFF columns, they are common for both data sets */
+ int max_j = is_key_stat(id) ? 1 : 3;
+
+ for (j = 0; j < max_j; j++) {
+ int *max_len = &env.output_spec.lens[3 * i + j];
+ bool last = (i == env.output_spec.spec_cnt - 1) && (j == max_j - 1);
+ const char *sfx;
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ sfx = is_key_stat(id) ? "" : table_sfxs[j];
+ len = snprintf(NULL, 0, "%s%s", stat_defs[id].header, sfx);
+ if (len > *max_len)
+ *max_len = len;
+ break;
+ case RESFMT_TABLE:
+ sfx = is_key_stat(id) ? "" : table_sfxs[j];
+ printf("%s%-*s%s", i + j == 0 ? "" : COLUMN_SEP,
+ *max_len - (int)strlen(sfx), stat_defs[id].header, sfx);
+ if (last)
+ printf("\n");
+ break;
+ case RESFMT_CSV:
+ sfx = is_key_stat(id) ? "" : name_sfxs[j];
+ printf("%s%s%s", i + j == 0 ? "" : ",", stat_defs[id].names[0], sfx);
+ if (last)
+ printf("\n");
+ break;
+ }
+ }
+ }
+
+ if (fmt == RESFMT_TABLE)
+ output_comp_header_underlines();
+}
+
+static void output_comp_stats(const struct verif_stats_join *join_stats,
+ enum resfmt fmt, bool last)
+{
+ const struct verif_stats *base = join_stats->stats_a;
+ const struct verif_stats *comp = join_stats->stats_b;
+ char base_buf[1024] = {}, comp_buf[1024] = {}, diff_buf[1024] = {};
+ int i;
+
+ for (i = 0; i < env.output_spec.spec_cnt; i++) {
+ int id = env.output_spec.ids[i], len;
+ int *max_len_base = &env.output_spec.lens[3 * i + 0];
+ int *max_len_comp = &env.output_spec.lens[3 * i + 1];
+ int *max_len_diff = &env.output_spec.lens[3 * i + 2];
+ const char *base_str = NULL, *comp_str = NULL;
+ long base_val = 0, comp_val = 0, diff_val = 0;
+
+ prepare_value(base, id, &base_str, &base_val);
+ prepare_value(comp, id, &comp_str, &comp_val);
+
+ /* normalize all the outputs to be in string buffers for simplicity */
+ if (is_key_stat(id)) {
+ /* key stats (file and program name) are always strings */
+ if (base)
+ snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+ else
+ snprintf(base_buf, sizeof(base_buf), "%s", comp_str);
+ } else if (base_str) {
+ snprintf(base_buf, sizeof(base_buf), "%s", base_str);
+ snprintf(comp_buf, sizeof(comp_buf), "%s", comp_str);
+ if (!base || !comp)
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+ else if (strcmp(base_str, comp_str) == 0)
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "MATCH");
+ else
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "MISMATCH");
+ } else {
+ double p = 0.0;
+
+ if (base)
+ snprintf(base_buf, sizeof(base_buf), "%ld", base_val);
+ else
+ snprintf(base_buf, sizeof(base_buf), "%s", "N/A");
+ if (comp)
+ snprintf(comp_buf, sizeof(comp_buf), "%ld", comp_val);
+ else
+ snprintf(comp_buf, sizeof(comp_buf), "%s", "N/A");
+
+ diff_val = comp_val - base_val;
+ if (!base || !comp) {
+ snprintf(diff_buf, sizeof(diff_buf), "%s", "N/A");
+ } else {
+ if (base_val == 0) {
+ if (comp_val == base_val)
+ p = 0.0; /* avoid +0 (+100%) case */
+ else
+ p = comp_val < base_val ? -100.0 : 100.0;
+ } else {
+ p = diff_val * 100.0 / base_val;
+ }
+ snprintf(diff_buf, sizeof(diff_buf), "%+ld (%+.2lf%%)", diff_val, p);
+ }
+ }
+
+ switch (fmt) {
+ case RESFMT_TABLE_CALCLEN:
+ len = strlen(base_buf);
+ if (len > *max_len_base)
+ *max_len_base = len;
+ if (!is_key_stat(id)) {
+ len = strlen(comp_buf);
+ if (len > *max_len_comp)
+ *max_len_comp = len;
+ len = strlen(diff_buf);
+ if (len > *max_len_diff)
+ *max_len_diff = len;
+ }
+ break;
+ case RESFMT_TABLE: {
+ /* string outputs are left-aligned, number outputs are right-aligned */
+ const char *fmt = base_str ? "%s%-*s" : "%s%*s";
+
+ printf(fmt, i == 0 ? "" : COLUMN_SEP, *max_len_base, base_buf);
+ if (!is_key_stat(id)) {
+ printf(fmt, COLUMN_SEP, *max_len_comp, comp_buf);
+ printf(fmt, COLUMN_SEP, *max_len_diff, diff_buf);
+ }
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ case RESFMT_CSV:
+ printf("%s%s", i == 0 ? "" : ",", base_buf);
+ if (!is_key_stat(id)) {
+ printf("%s%s", i == 0 ? "" : ",", comp_buf);
+ printf("%s%s", i == 0 ? "" : ",", diff_buf);
+ }
+ if (i == env.output_spec.spec_cnt - 1)
+ printf("\n");
+ break;
+ }
+ }
+
+ if (last && fmt == RESFMT_TABLE)
+ output_comp_header_underlines();
+}
+
+static int cmp_stats_key(const struct verif_stats *base, const struct verif_stats *comp)
+{
+ int r;
+
+ r = strcmp(base->file_name, comp->file_name);
+ if (r != 0)
+ return r;
+ return strcmp(base->prog_name, comp->prog_name);
+}
+
+static bool is_join_stat_filter_matched(struct filter *f, const struct verif_stats_join *stats)
+{
+ static const double eps = 1e-9;
+ const char *str = NULL;
+ double value = 0.0;
+
+ fetch_join_stat_value(stats, f->stat_id, f->stat_var, &str, &value);
+
+ if (f->abs)
+ value = fabs(value);
+
+ switch (f->op) {
+ case OP_EQ: return value > f->value - eps && value < f->value + eps;
+ case OP_NEQ: return value < f->value - eps || value > f->value + eps;
+ case OP_LT: return value < f->value - eps;
+ case OP_LE: return value <= f->value + eps;
+ case OP_GT: return value > f->value + eps;
+ case OP_GE: return value >= f->value - eps;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_join_stats(const struct verif_stats_join *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_join_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
+static int handle_comparison_mode(void)
+{
+ struct stat_specs base_specs = {}, comp_specs = {};
+ struct stat_specs tmp_sort_spec;
+ enum resfmt cur_fmt;
+ int err, i, j, last_idx, cnt;
+
+ if (env.filename_cnt != 2) {
+ fprintf(stderr, "Comparison mode expects exactly two input CSV files!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ err = parse_stats_csv(env.filenames[0], &base_specs,
+ &env.baseline_stats, &env.baseline_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+ return err;
+ }
+ err = parse_stats_csv(env.filenames[1], &comp_specs,
+ &env.prog_stats, &env.prog_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[1], err);
+ return err;
+ }
+
+ /* To keep it simple we validate that the set and order of stats in
+ * both CSVs are exactly the same. This can be lifted with a bit more
+ * pre-processing later.
+ */
+ if (base_specs.spec_cnt != comp_specs.spec_cnt) {
+ fprintf(stderr, "Number of stats in '%s' and '%s' differs (%d != %d)!\n",
+ env.filenames[0], env.filenames[1],
+ base_specs.spec_cnt, comp_specs.spec_cnt);
+ return -EINVAL;
+ }
+ for (i = 0; i < base_specs.spec_cnt; i++) {
+ if (base_specs.ids[i] != comp_specs.ids[i]) {
+ fprintf(stderr, "Stats composition differs between '%s' and '%s' (%s != %s)!\n",
+ env.filenames[0], env.filenames[1],
+ stat_defs[base_specs.ids[i]].names[0],
+ stat_defs[comp_specs.ids[i]].names[0]);
+ return -EINVAL;
+ }
+ }
+
+ /* Replace user-specified sorting spec with file+prog sorting rule to
+ * be able to join two datasets correctly. Once we are done, we will
+ * restore the original sort spec.
+ */
+ tmp_sort_spec = env.sort_spec;
+ env.sort_spec = join_sort_spec;
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+ qsort(env.baseline_stats, env.baseline_stat_cnt, sizeof(*env.baseline_stats), cmp_prog_stats);
+ env.sort_spec = tmp_sort_spec;
+
+ /* Join two datasets together. If baseline and comparison datasets
+ * have different subset of rows (we match by 'object + prog' as
+ * a unique key) then assume empty/missing/zero value for rows that
+ * are missing in the opposite data set.
+ */
+ i = j = 0;
+ while (i < env.baseline_stat_cnt || j < env.prog_stat_cnt) {
+ const struct verif_stats *base, *comp;
+ struct verif_stats_join *join;
+ void *tmp;
+ int r;
+
+ base = i < env.baseline_stat_cnt ? &env.baseline_stats[i] : &fallback_stats;
+ comp = j < env.prog_stat_cnt ? &env.prog_stats[j] : &fallback_stats;
+
+ if (!base->file_name || !base->prog_name) {
+ fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+ i, env.filenames[0]);
+ return -EINVAL;
+ }
+ if (!comp->file_name || !comp->prog_name) {
+ fprintf(stderr, "Entry #%d in '%s' doesn't have file and/or program name specified!\n",
+ j, env.filenames[1]);
+ return -EINVAL;
+ }
+
+ tmp = realloc(env.join_stats, (env.join_stat_cnt + 1) * sizeof(*env.join_stats));
+ if (!tmp)
+ return -ENOMEM;
+ env.join_stats = tmp;
+
+ join = &env.join_stats[env.join_stat_cnt];
+ memset(join, 0, sizeof(*join));
+
+ r = cmp_stats_key(base, comp);
+ if (r == 0) {
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = comp;
+ i++;
+ j++;
+ } else if (base != &fallback_stats && (comp == &fallback_stats || r < 0)) {
+ join->file_name = base->file_name;
+ join->prog_name = base->prog_name;
+ join->stats_a = base;
+ join->stats_b = NULL;
+ i++;
+ } else if (comp != &fallback_stats && (base == &fallback_stats || r > 0)) {
+ join->file_name = comp->file_name;
+ join->prog_name = comp->prog_name;
+ join->stats_a = NULL;
+ join->stats_b = comp;
+ j++;
+ } else {
+ fprintf(stderr, "%s:%d: should never reach here i=%i, j=%i",
+ __FILE__, __LINE__, i, j);
+ return -EINVAL;
+ }
+ env.join_stat_cnt += 1;
+ }
+
+ /* now sort joined results according to sort spec */
+ qsort(env.join_stats, env.join_stat_cnt, sizeof(*env.join_stats), cmp_join_stats);
+
+ /* for human-readable table output we need to do extra pass to
+ * calculate column widths, so we substitute current output format
+ * with RESFMT_TABLE_CALCLEN and later revert it back to RESFMT_TABLE
+ * and do everything again.
+ */
+ if (env.out_fmt == RESFMT_TABLE)
+ cur_fmt = RESFMT_TABLE_CALCLEN;
+ else
+ cur_fmt = env.out_fmt;
+
+one_more_time:
+ output_comp_headers(cur_fmt);
+
+ last_idx = -1;
+ cnt = 0;
+ for (i = 0; i < env.join_stat_cnt; i++) {
+ const struct verif_stats_join *join = &env.join_stats[i];
+
+ if (!should_output_join_stats(join))
+ continue;
+
+ if (env.top_n && cnt >= env.top_n)
+ break;
+
+ if (cur_fmt == RESFMT_TABLE_CALCLEN)
+ last_idx = i;
+
+ output_comp_stats(join, cur_fmt, i == last_idx);
+
+ cnt++;
+ }
+
+ if (cur_fmt == RESFMT_TABLE_CALCLEN) {
+ cur_fmt = RESFMT_TABLE;
+ goto one_more_time; /* ... this time with feeling */
+ }
+
+ return 0;
+}
+
+static bool is_stat_filter_matched(struct filter *f, const struct verif_stats *stats)
+{
+ long value = stats->stats[f->stat_id];
+
+ if (f->abs)
+ value = value < 0 ? -value : value;
+
+ switch (f->op) {
+ case OP_EQ: return value == f->value;
+ case OP_NEQ: return value != f->value;
+ case OP_LT: return value < f->value;
+ case OP_LE: return value <= f->value;
+ case OP_GT: return value > f->value;
+ case OP_GE: return value >= f->value;
+ }
+
+ fprintf(stderr, "BUG: unknown filter op %d!\n", f->op);
+ return false;
+}
+
+static bool should_output_stats(const struct verif_stats *stats)
+{
+ struct filter *f;
+ int i, allow_cnt = 0;
+
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ f = &env.deny_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+
+ if (is_stat_filter_matched(f, stats))
+ return false;
+ }
+
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ f = &env.allow_filters[i];
+ if (f->kind != FILTER_STAT)
+ continue;
+ allow_cnt++;
+
+ if (is_stat_filter_matched(f, stats))
+ return true;
+ }
+
+ /* if there are no stat allowed filters, pass everything through */
+ return allow_cnt == 0;
+}
+
+static void output_prog_stats(void)
+{
+ const struct verif_stats *stats;
+ int i, last_stat_idx = 0, cnt = 0;
+
+ if (env.out_fmt == RESFMT_TABLE) {
+ /* calculate column widths */
+ output_headers(RESFMT_TABLE_CALCLEN);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ output_stats(stats, RESFMT_TABLE_CALCLEN, false);
+ last_stat_idx = i;
+ }
+ }
+
+ /* actually output the table */
+ output_headers(env.out_fmt);
+ for (i = 0; i < env.prog_stat_cnt; i++) {
+ stats = &env.prog_stats[i];
+ if (!should_output_stats(stats))
+ continue;
+ if (env.top_n && cnt >= env.top_n)
+ break;
+ output_stats(stats, env.out_fmt, i == last_stat_idx);
+ cnt++;
+ }
+}
+
+static int handle_verif_mode(void)
+{
+ int i, err;
+
+ if (env.filename_cnt == 0) {
+ fprintf(stderr, "Please provide path to BPF object file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ for (i = 0; i < env.filename_cnt; i++) {
+ err = process_obj(env.filenames[i]);
+ if (err) {
+ fprintf(stderr, "Failed to process '%s': %d\n", env.filenames[i], err);
+ return err;
+ }
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+ return 0;
+}
+
+static int handle_replay_mode(void)
+{
+ struct stat_specs specs = {};
+ int err;
+
+ if (env.filename_cnt != 1) {
+ fprintf(stderr, "Replay mode expects exactly one input CSV file!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return -EINVAL;
+ }
+
+ err = parse_stats_csv(env.filenames[0], &specs,
+ &env.prog_stats, &env.prog_stat_cnt);
+ if (err) {
+ fprintf(stderr, "Failed to parse stats from '%s': %d\n", env.filenames[0], err);
+ return err;
+ }
+
+ qsort(env.prog_stats, env.prog_stat_cnt, sizeof(*env.prog_stats), cmp_prog_stats);
+
+ output_prog_stats();
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int err = 0, i;
+
+ if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+ return 1;
+
+ if (env.show_version) {
+ printf("%s\n", argp_program_version);
+ return 0;
+ }
+
+ if (env.verbose && env.quiet) {
+ fprintf(stderr, "Verbose and quiet modes are incompatible, please specify just one or neither!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return 1;
+ }
+ if (env.verbose && env.log_level == 0)
+ env.log_level = 1;
+
+ if (env.output_spec.spec_cnt == 0) {
+ if (env.out_fmt == RESFMT_CSV)
+ env.output_spec = default_csv_output_spec;
+ else
+ env.output_spec = default_output_spec;
+ }
+ if (env.sort_spec.spec_cnt == 0)
+ env.sort_spec = default_sort_spec;
+
+ if (env.comparison_mode && env.replay_mode) {
+ fprintf(stderr, "Can't specify replay and comparison mode at the same time!\n\n");
+ argp_help(&argp, stderr, ARGP_HELP_USAGE, "veristat");
+ return 1;
+ }
+
+ if (env.comparison_mode)
+ err = handle_comparison_mode();
+ else if (env.replay_mode)
+ err = handle_replay_mode();
+ else
+ err = handle_verif_mode();
+
+ free_verif_stats(env.prog_stats, env.prog_stat_cnt);
+ free_verif_stats(env.baseline_stats, env.baseline_stat_cnt);
+ free(env.join_stats);
+ for (i = 0; i < env.filename_cnt; i++)
+ free(env.filenames[i]);
+ free(env.filenames);
+ for (i = 0; i < env.allow_filter_cnt; i++) {
+ free(env.allow_filters[i].any_glob);
+ free(env.allow_filters[i].file_glob);
+ free(env.allow_filters[i].prog_glob);
+ }
+ free(env.allow_filters);
+ for (i = 0; i < env.deny_filter_cnt; i++) {
+ free(env.deny_filters[i].any_glob);
+ free(env.deny_filters[i].file_glob);
+ free(env.deny_filters[i].prog_glob);
+ }
+ free(env.deny_filters);
+ return -err;
+}
diff --git a/tools/testing/selftests/bpf/veristat.cfg b/tools/testing/selftests/bpf/veristat.cfg
new file mode 100644
index 000000000000..1a385061618d
--- /dev/null
+++ b/tools/testing/selftests/bpf/veristat.cfg
@@ -0,0 +1,17 @@
+# pre-canned list of rather complex selftests/bpf BPF object files to monitor
+# BPF verifier's performance on
+bpf_flow*
+bpf_loop_bench*
+loop*
+netif_receive_skb*
+profiler*
+pyperf*
+strobemeta*
+test_cls_redirect*
+test_l4lb
+test_sysctl*
+test_tcp_hdr_*
+test_usdt*
+test_verif_scale*
+test_xdp_noinline*
+xdp_synproxy*
diff --git a/tools/testing/selftests/bpf/vmtest.sh b/tools/testing/selftests/bpf/vmtest.sh
new file mode 100755
index 000000000000..65d14f3bbe30
--- /dev/null
+++ b/tools/testing/selftests/bpf/vmtest.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -u
+set -e
+
+# This script currently only works for x86_64 and s390x, as
+# it is based on the VM image used by the BPF CI, which is
+# available only for these architectures.
+ARCH="$(uname -m)"
+case "${ARCH}" in
+s390x)
+ QEMU_BINARY=qemu-system-s390x
+ QEMU_CONSOLE="ttyS1"
+ QEMU_FLAGS=(-smp 2)
+ BZIMAGE="arch/s390/boot/vmlinux"
+ ;;
+x86_64)
+ QEMU_BINARY=qemu-system-x86_64
+ QEMU_CONSOLE="ttyS0,115200"
+ QEMU_FLAGS=(-cpu host -smp 8)
+ BZIMAGE="arch/x86/boot/bzImage"
+ ;;
+aarch64)
+ QEMU_BINARY=qemu-system-aarch64
+ QEMU_CONSOLE="ttyAMA0,115200"
+ QEMU_FLAGS=(-M virt,gic-version=3 -cpu host -smp 8)
+ BZIMAGE="arch/arm64/boot/Image"
+ ;;
+*)
+ echo "Unsupported architecture"
+ exit 1
+ ;;
+esac
+DEFAULT_COMMAND="./test_progs"
+MOUNT_DIR="mnt"
+ROOTFS_IMAGE="root.img"
+OUTPUT_DIR="$HOME/.bpf_selftests"
+KCONFIG_REL_PATHS=("tools/testing/selftests/bpf/config"
+ "tools/testing/selftests/bpf/config.vm"
+ "tools/testing/selftests/bpf/config.${ARCH}")
+INDEX_URL="https://raw.githubusercontent.com/libbpf/ci/master/INDEX"
+NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"bpf_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
+
+usage()
+{
+ cat <<EOF
+Usage: $0 [-i] [-s] [-d <output_dir>] -- [<command>]
+
+<command> is the command you would normally run when you are in
+tools/testing/selftests/bpf. e.g:
+
+ $0 -- ./test_progs -t test_lsm
+
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
+
+If you build your kernel using KBUILD_OUTPUT= or O= options, these
+can be passed as environment variables to the script:
+
+ O=<kernel_build_path> $0 -- ./test_progs -t test_lsm
+
+or
+
+ KBUILD_OUTPUT=<kernel_build_path> $0 -- ./test_progs -t test_lsm
+
+Options:
+
+ -i) Update the rootfs image with a newer version.
+ -d) Update the output directory (default: ${OUTPUT_DIR})
+ -j) Number of jobs for compilation, similar to -j in make
+ (default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
+EOF
+}
+
+unset URLS
+populate_url_map()
+{
+ if ! declare -p URLS &> /dev/null; then
+ # URLS contain the mapping from file names to URLs where
+ # those files can be downloaded from.
+ declare -gA URLS
+ while IFS=$'\t' read -r name url; do
+ URLS["$name"]="$url"
+ done < <(curl -Lsf ${INDEX_URL})
+ fi
+}
+
+download()
+{
+ local file="$1"
+
+ if [[ ! -v URLS[$file] ]]; then
+ echo "$file not found" >&2
+ return 1
+ fi
+
+ echo "Downloading $file..." >&2
+ curl -Lsf "${URLS[$file]}" "${@:2}"
+}
+
+newest_rootfs_version()
+{
+ {
+ for file in "${!URLS[@]}"; do
+ if [[ $file =~ ^"${ARCH}"/libbpf-vmtest-rootfs-(.*)\.tar\.zst$ ]]; then
+ echo "${BASH_REMATCH[1]}"
+ fi
+ done
+ } | sort -rV | head -1
+}
+
+download_rootfs()
+{
+ local rootfsversion="$1"
+ local dir="$2"
+
+ if ! which zstd &> /dev/null; then
+ echo 'Could not find "zstd" on the system, please install zstd'
+ exit 1
+ fi
+
+ download "${ARCH}/libbpf-vmtest-rootfs-$rootfsversion.tar.zst" |
+ zstd -d | sudo tar -C "$dir" -x
+}
+
+recompile_kernel()
+{
+ local kernel_checkout="$1"
+ local make_command="$2"
+
+ cd "${kernel_checkout}"
+
+ ${make_command} olddefconfig
+ ${make_command}
+}
+
+mount_image()
+{
+ local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+ local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+ sudo mount -o loop "${rootfs_img}" "${mount_dir}"
+}
+
+unmount_image()
+{
+ local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+ sudo umount "${mount_dir}" &> /dev/null
+}
+
+update_selftests()
+{
+ local kernel_checkout="$1"
+ local selftests_dir="${kernel_checkout}/tools/testing/selftests/bpf"
+
+ cd "${selftests_dir}"
+ ${make_command}
+
+ # Mount the image and copy the selftests to the image.
+ mount_image
+ sudo rm -rf "${mount_dir}/root/bpf"
+ sudo cp -r "${selftests_dir}" "${mount_dir}/root"
+ unmount_image
+}
+
+update_init_script()
+{
+ local init_script_dir="${OUTPUT_DIR}/${MOUNT_DIR}/etc/rcS.d"
+ local init_script="${init_script_dir}/S50-startup"
+ local command="$1"
+ local exit_command="$2"
+
+ mount_image
+
+ if [[ ! -d "${init_script_dir}" ]]; then
+ cat <<EOF
+Could not find ${init_script_dir} in the mounted image.
+This likely indicates a bad rootfs image, Please download
+a new image by passing "-i" to the script
+EOF
+ exit 1
+
+ fi
+
+ sudo bash -c "echo '#!/bin/bash' > ${init_script}"
+
+ if [[ "${command}" != "" ]]; then
+ sudo bash -c "cat >>${init_script}" <<EOF
+# Have a default value in the exit status file
+# incase the VM is forcefully stopped.
+echo "130" > "/root/${EXIT_STATUS_FILE}"
+
+{
+ cd /root/bpf
+ echo ${command}
+ stdbuf -oL -eL ${command}
+ echo "\$?" > "/root/${EXIT_STATUS_FILE}"
+} 2>&1 | tee "/root/${LOG_FILE}"
+# Ensure that the logs are written to disk
+sync
+EOF
+ fi
+
+ sudo bash -c "echo ${exit_command} >> ${init_script}"
+ sudo chmod a+x "${init_script}"
+ unmount_image
+}
+
+create_vm_image()
+{
+ local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+ local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+ rm -rf "${rootfs_img}"
+ touch "${rootfs_img}"
+ chattr +C "${rootfs_img}" >/dev/null 2>&1 || true
+
+ truncate -s 2G "${rootfs_img}"
+ mkfs.ext4 -q "${rootfs_img}"
+
+ mount_image
+ download_rootfs "$(newest_rootfs_version)" "${mount_dir}"
+ unmount_image
+}
+
+run_vm()
+{
+ local kernel_bzimage="$1"
+ local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+
+ if ! which "${QEMU_BINARY}" &> /dev/null; then
+ cat <<EOF
+Could not find ${QEMU_BINARY}
+Please install qemu or set the QEMU_BINARY environment variable.
+EOF
+ exit 1
+ fi
+
+ ${QEMU_BINARY} \
+ -nodefaults \
+ -display none \
+ -serial mon:stdio \
+ "${QEMU_FLAGS[@]}" \
+ -enable-kvm \
+ -m 4G \
+ -drive file="${rootfs_img}",format=raw,index=1,media=disk,if=virtio,cache=none \
+ -kernel "${kernel_bzimage}" \
+ -append "root=/dev/vda rw console=${QEMU_CONSOLE}"
+}
+
+copy_logs()
+{
+ local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+ local log_file="${mount_dir}/root/${LOG_FILE}"
+ local exit_status_file="${mount_dir}/root/${EXIT_STATUS_FILE}"
+
+ mount_image
+ sudo cp ${log_file} "${OUTPUT_DIR}"
+ sudo cp ${exit_status_file} "${OUTPUT_DIR}"
+ sudo rm -f ${log_file}
+ unmount_image
+}
+
+is_rel_path()
+{
+ local path="$1"
+
+ [[ ${path:0:1} != "/" ]]
+}
+
+do_update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ rm -f "$kconfig_file" 2> /dev/null
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ cat "$kconfig_src" >> "$kconfig_file"
+ done
+}
+
+update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ if [[ -f "${kconfig_file}" ]]; then
+ local local_modified="$(stat -c %Y "${kconfig_file}")"
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${kernel_checkout}/${config}"
+ local src_modified="$(stat -c %Y "${kconfig_src}")"
+ # Only update the config if it has been updated after the
+ # previously cached config was created. This avoids
+ # unnecessarily compiling the kernel and selftests.
+ if [[ "${src_modified}" -gt "${local_modified}" ]]; then
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # Once we have found one outdated configuration
+ # there is no need to check other ones.
+ break
+ fi
+ done
+ else
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ fi
+}
+
+catch()
+{
+ local exit_code=$1
+ local exit_status_file="${OUTPUT_DIR}/${EXIT_STATUS_FILE}"
+ # This is just a cleanup and the directory may
+ # have already been unmounted. So, don't let this
+ # clobber the error code we intend to return.
+ unmount_image || true
+ if [[ -f "${exit_status_file}" ]]; then
+ exit_code="$(cat ${exit_status_file})"
+ fi
+ exit ${exit_code}
+}
+
+main()
+{
+ local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+ local kernel_checkout=$(realpath "${script_dir}"/../../../../)
+ # By default the script searches for the kernel in the checkout directory but
+ # it also obeys environment variables O= and KBUILD_OUTPUT=
+ local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
+ local command="${DEFAULT_COMMAND}"
+ local update_image="no"
+ local exit_command="poweroff -f"
+ local debug_shell="no"
+
+ while getopts ':hskid:j:' opt; do
+ case ${opt} in
+ i)
+ update_image="yes"
+ ;;
+ d)
+ OUTPUT_DIR="$OPTARG"
+ ;;
+ j)
+ NUM_COMPILE_JOBS="$OPTARG"
+ ;;
+ s)
+ command=""
+ debug_shell="yes"
+ exit_command="bash"
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG"
+ usage
+ exit 1
+ ;;
+ : )
+ echo "Invalid Option: -$OPTARG requires an argument"
+ usage
+ exit 1
+ ;;
+ esac
+ done
+ shift $((OPTIND -1))
+
+ trap 'catch "$?"' EXIT
+
+ if [[ $# -eq 0 && "${debug_shell}" == "no" ]]; then
+ echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
+ else
+ command="$@"
+ fi
+
+ local kconfig_file="${OUTPUT_DIR}/latest.config"
+ local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+
+ # Figure out where the kernel is being built.
+ # O takes precedence over KBUILD_OUTPUT.
+ if [[ "${O:=""}" != "" ]]; then
+ if is_rel_path "${O}"; then
+ O="$(realpath "${PWD}/${O}")"
+ fi
+ kernel_bzimage="${O}/${BZIMAGE}"
+ make_command="${make_command} O=${O}"
+ elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
+ if is_rel_path "${KBUILD_OUTPUT}"; then
+ KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+ fi
+ kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
+ make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
+ fi
+
+ populate_url_map
+
+ local rootfs_img="${OUTPUT_DIR}/${ROOTFS_IMAGE}"
+ local mount_dir="${OUTPUT_DIR}/${MOUNT_DIR}"
+
+ echo "Output directory: ${OUTPUT_DIR}"
+
+ mkdir -p "${OUTPUT_DIR}"
+ mkdir -p "${mount_dir}"
+ update_kconfig "${kernel_checkout}" "${kconfig_file}"
+
+ recompile_kernel "${kernel_checkout}" "${make_command}"
+
+ if [[ "${update_image}" == "no" && ! -f "${rootfs_img}" ]]; then
+ echo "rootfs image not found in ${rootfs_img}"
+ update_image="yes"
+ fi
+
+ if [[ "${update_image}" == "yes" ]]; then
+ create_vm_image
+ fi
+
+ update_selftests "${kernel_checkout}" "${make_command}"
+ update_init_script "${command}" "${exit_command}"
+ run_vm "${kernel_bzimage}"
+ if [[ "${command}" != "" ]]; then
+ copy_logs
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/bpf/xdp_features.c b/tools/testing/selftests/bpf/xdp_features.c
new file mode 100644
index 000000000000..595c79141cf3
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.c
@@ -0,0 +1,718 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <uapi/linux/netdev.h>
+#include <linux/if_link.h>
+#include <signal.h>
+#include <argp.h>
+#include <net/if.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <pthread.h>
+
+#include <network_helpers.h>
+
+#include "xdp_features.skel.h"
+#include "xdp_features.h"
+
+#define RED(str) "\033[0;31m" str "\033[0m"
+#define GREEN(str) "\033[0;32m" str "\033[0m"
+#define YELLOW(str) "\033[0;33m" str "\033[0m"
+
+static struct env {
+ bool verbosity;
+ char ifname[IF_NAMESIZE];
+ int ifindex;
+ bool is_tester;
+ struct {
+ enum netdev_xdp_act drv_feature;
+ enum xdp_action action;
+ } feature;
+ struct sockaddr_storage dut_ctrl_addr;
+ struct sockaddr_storage dut_addr;
+ struct sockaddr_storage tester_addr;
+} env;
+
+#define BUFSIZE 128
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ if (level == LIBBPF_DEBUG && !env.verbosity)
+ return 0;
+ return vfprintf(stderr, format, args);
+}
+
+static volatile bool exiting;
+
+static void sig_handler(int sig)
+{
+ exiting = true;
+}
+
+const char *argp_program_version = "xdp-features 0.0";
+const char argp_program_doc[] =
+"XDP features detection application.\n"
+"\n"
+"XDP features application checks the XDP advertised features match detected ones.\n"
+"\n"
+"USAGE: ./xdp-features [-vt] [-f <xdp-feature>] [-D <dut-data-ip>] [-T <tester-data-ip>] [-C <dut-ctrl-ip>] <iface-name>\n"
+"\n"
+"dut-data-ip, tester-data-ip, dut-ctrl-ip: IPv6 or IPv4-mapped-IPv6 addresses;\n"
+"\n"
+"XDP features\n:"
+"- XDP_PASS\n"
+"- XDP_DROP\n"
+"- XDP_ABORTED\n"
+"- XDP_REDIRECT\n"
+"- XDP_NDO_XMIT\n"
+"- XDP_TX\n";
+
+static const struct argp_option opts[] = {
+ { "verbose", 'v', NULL, 0, "Verbose debug output" },
+ { "tester", 't', NULL, 0, "Tester mode" },
+ { "feature", 'f', "XDP-FEATURE", 0, "XDP feature to test" },
+ { "dut_data_ip", 'D', "DUT-DATA-IP", 0, "DUT IP data channel" },
+ { "dut_ctrl_ip", 'C', "DUT-CTRL-IP", 0, "DUT IP control channel" },
+ { "tester_data_ip", 'T', "TESTER-DATA-IP", 0, "Tester IP data channel" },
+ {},
+};
+
+static int get_xdp_feature(const char *arg)
+{
+ if (!strcmp(arg, "XDP_PASS")) {
+ env.feature.action = XDP_PASS;
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ } else if (!strcmp(arg, "XDP_DROP")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_DROP;
+ } else if (!strcmp(arg, "XDP_ABORTED")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_ABORTED;
+ } else if (!strcmp(arg, "XDP_TX")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_BASIC;
+ env.feature.action = XDP_TX;
+ } else if (!strcmp(arg, "XDP_REDIRECT")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_REDIRECT;
+ env.feature.action = XDP_REDIRECT;
+ } else if (!strcmp(arg, "XDP_NDO_XMIT")) {
+ env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+ } else {
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static char *get_xdp_feature_str(void)
+{
+ switch (env.feature.action) {
+ case XDP_PASS:
+ return YELLOW("XDP_PASS");
+ case XDP_DROP:
+ return YELLOW("XDP_DROP");
+ case XDP_ABORTED:
+ return YELLOW("XDP_ABORTED");
+ case XDP_TX:
+ return YELLOW("XDP_TX");
+ case XDP_REDIRECT:
+ return YELLOW("XDP_REDIRECT");
+ default:
+ break;
+ }
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+ return YELLOW("XDP_NDO_XMIT");
+
+ return "";
+}
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+ switch (key) {
+ case 'v':
+ env.verbosity = true;
+ break;
+ case 't':
+ env.is_tester = true;
+ break;
+ case 'f':
+ if (get_xdp_feature(arg) < 0) {
+ fprintf(stderr, "Invalid xdp feature: %s\n", arg);
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'D':
+ if (make_sockaddr(AF_INET6, arg, DUT_ECHO_PORT,
+ &env.dut_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'C':
+ if (make_sockaddr(AF_INET6, arg, DUT_CTRL_PORT,
+ &env.dut_ctrl_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Device Under Test: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case 'T':
+ if (make_sockaddr(AF_INET6, arg, 0, &env.tester_addr, NULL)) {
+ fprintf(stderr,
+ "Invalid address assigned to the Tester device: %s\n",
+ arg);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ case ARGP_KEY_ARG:
+ errno = 0;
+ if (strlen(arg) >= IF_NAMESIZE) {
+ fprintf(stderr, "Invalid device name: %s\n", arg);
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ env.ifindex = if_nametoindex(arg);
+ if (!env.ifindex)
+ env.ifindex = strtoul(arg, NULL, 0);
+ if (!env.ifindex || !if_indextoname(env.ifindex, env.ifname)) {
+ fprintf(stderr,
+ "Bad interface index or name (%d): %s\n",
+ errno, strerror(errno));
+ argp_usage(state);
+ return ARGP_ERR_UNKNOWN;
+ }
+ break;
+ default:
+ return ARGP_ERR_UNKNOWN;
+ }
+
+ return 0;
+}
+
+static const struct argp argp = {
+ .options = opts,
+ .parser = parse_arg,
+ .doc = argp_program_doc,
+};
+
+static void set_env_default(void)
+{
+ env.feature.drv_feature = NETDEV_XDP_ACT_NDO_XMIT;
+ env.feature.action = -EINVAL;
+ env.ifindex = -ENODEV;
+ strcpy(env.ifname, "unknown");
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_CTRL_PORT,
+ &env.dut_ctrl_addr, NULL);
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", DUT_ECHO_PORT,
+ &env.dut_addr, NULL);
+ make_sockaddr(AF_INET6, "::ffff:127.0.0.1", 0, &env.tester_addr, NULL);
+}
+
+static void *dut_echo_thread(void *arg)
+{
+ unsigned char buf[sizeof(struct tlv_hdr)];
+ int sockfd = *(int *)arg;
+
+ while (!exiting) {
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ struct sockaddr_storage addr;
+ socklen_t addrlen;
+ size_t n;
+
+ n = recvfrom(sockfd, buf, sizeof(buf), MSG_WAITALL,
+ (struct sockaddr *)&addr, &addrlen);
+ if (n != ntohs(tlv->len))
+ continue;
+
+ if (ntohs(tlv->type) != CMD_ECHO)
+ continue;
+
+ sendto(sockfd, buf, sizeof(buf), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&addr, addrlen);
+ }
+
+ pthread_exit((void *)0);
+ close(sockfd);
+
+ return NULL;
+}
+
+static int dut_run_echo_thread(pthread_t *t, int *sockfd)
+{
+ int err;
+
+ sockfd = start_reuseport_server(AF_INET6, SOCK_DGRAM, NULL,
+ DUT_ECHO_PORT, 0, 1);
+ if (!sockfd) {
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
+ return -errno;
+ }
+
+ /* start echo channel */
+ err = pthread_create(t, NULL, dut_echo_thread, sockfd);
+ if (err) {
+ fprintf(stderr,
+ "Failed creating data UDP thread on device %s: %s\n",
+ env.ifname, strerror(-err));
+ free_fds(sockfd, 1);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static int dut_attach_xdp_prog(struct xdp_features *skel, int flags)
+{
+ enum xdp_action action = env.feature.action;
+ struct bpf_program *prog;
+ unsigned int key = 0;
+ int err, fd = 0;
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT) {
+ struct bpf_devmap_val entry = {
+ .ifindex = env.ifindex,
+ };
+
+ err = bpf_map__update_elem(skel->maps.dev_map,
+ &key, sizeof(key),
+ &entry, sizeof(entry), 0);
+ if (err < 0)
+ return err;
+
+ fd = bpf_program__fd(skel->progs.xdp_do_redirect_cpumap);
+ action = XDP_REDIRECT;
+ }
+
+ switch (action) {
+ case XDP_TX:
+ prog = skel->progs.xdp_do_tx;
+ break;
+ case XDP_DROP:
+ prog = skel->progs.xdp_do_drop;
+ break;
+ case XDP_ABORTED:
+ prog = skel->progs.xdp_do_aborted;
+ break;
+ case XDP_PASS:
+ prog = skel->progs.xdp_do_pass;
+ break;
+ case XDP_REDIRECT: {
+ struct bpf_cpumap_val entry = {
+ .qsize = 2048,
+ .bpf_prog.fd = fd,
+ };
+
+ err = bpf_map__update_elem(skel->maps.cpu_map,
+ &key, sizeof(key),
+ &entry, sizeof(entry), 0);
+ if (err < 0)
+ return err;
+
+ prog = skel->progs.xdp_do_redirect;
+ break;
+ }
+ default:
+ return -EINVAL;
+ }
+
+ err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+ if (err)
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
+ return err;
+}
+
+static int recv_msg(int sockfd, void *buf, size_t bufsize, void *val,
+ size_t val_size)
+{
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ size_t len;
+
+ len = recv(sockfd, buf, bufsize, 0);
+ if (len != ntohs(tlv->len) || len < sizeof(*tlv))
+ return -EINVAL;
+
+ if (val) {
+ len -= sizeof(*tlv);
+ if (len > val_size)
+ return -ENOMEM;
+
+ memcpy(val, tlv->data, len);
+ }
+
+ return 0;
+}
+
+static int dut_run(struct xdp_features *skel)
+{
+ int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+ int state, err = 0, *sockfd, ctrl_sockfd, echo_sockfd;
+ struct sockaddr_storage ctrl_addr;
+ pthread_t dut_thread = 0;
+ socklen_t addrlen;
+
+ sockfd = start_reuseport_server(AF_INET6, SOCK_STREAM, NULL,
+ DUT_CTRL_PORT, 0, 1);
+ if (!sockfd) {
+ fprintf(stderr,
+ "Failed creating control socket on device %s\n", env.ifname);
+ return -errno;
+ }
+
+ ctrl_sockfd = accept(*sockfd, (struct sockaddr *)&ctrl_addr, &addrlen);
+ if (ctrl_sockfd < 0) {
+ fprintf(stderr,
+ "Failed accepting connections on device %s control socket\n",
+ env.ifname);
+ free_fds(sockfd, 1);
+ return -errno;
+ }
+
+ /* CTRL loop */
+ while (!exiting) {
+ unsigned char buf[BUFSIZE] = {};
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+
+ err = recv_msg(ctrl_sockfd, buf, BUFSIZE, NULL, 0);
+ if (err)
+ continue;
+
+ switch (ntohs(tlv->type)) {
+ case CMD_START: {
+ if (state == CMD_START)
+ continue;
+
+ state = CMD_START;
+ /* Load the XDP program on the DUT */
+ err = dut_attach_xdp_prog(skel, flags);
+ if (err)
+ goto out;
+
+ err = dut_run_echo_thread(&dut_thread, &echo_sockfd);
+ if (err < 0)
+ goto out;
+
+ tlv->type = htons(CMD_ACK);
+ tlv->len = htons(sizeof(*tlv));
+ err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ case CMD_STOP:
+ if (state != CMD_START)
+ break;
+
+ state = CMD_STOP;
+
+ exiting = true;
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+
+ tlv->type = htons(CMD_ACK);
+ tlv->len = htons(sizeof(*tlv));
+ err = send(ctrl_sockfd, buf, sizeof(*tlv), 0);
+ goto end_thread;
+ case CMD_GET_XDP_CAP: {
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ unsigned long long val;
+ size_t n;
+
+ err = bpf_xdp_query(env.ifindex, XDP_FLAGS_DRV_MODE,
+ &opts);
+ if (err) {
+ fprintf(stderr,
+ "Failed querying XDP cap for device %s\n",
+ env.ifname);
+ goto end_thread;
+ }
+
+ tlv->type = htons(CMD_ACK);
+ n = sizeof(*tlv) + sizeof(opts.feature_flags);
+ tlv->len = htons(n);
+
+ val = htobe64(opts.feature_flags);
+ memcpy(tlv->data, &val, sizeof(val));
+
+ err = send(ctrl_sockfd, buf, n, 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ case CMD_GET_STATS: {
+ unsigned int key = 0, val;
+ size_t n;
+
+ err = bpf_map__lookup_elem(skel->maps.dut_stats,
+ &key, sizeof(key),
+ &val, sizeof(val), 0);
+ if (err) {
+ fprintf(stderr,
+ "bpf_map_lookup_elem failed (%d)\n", err);
+ goto end_thread;
+ }
+
+ tlv->type = htons(CMD_ACK);
+ n = sizeof(*tlv) + sizeof(val);
+ tlv->len = htons(n);
+
+ val = htonl(val);
+ memcpy(tlv->data, &val, sizeof(val));
+
+ err = send(ctrl_sockfd, buf, n, 0);
+ if (err < 0)
+ goto end_thread;
+ break;
+ }
+ default:
+ break;
+ }
+ }
+
+end_thread:
+ pthread_join(dut_thread, NULL);
+out:
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+ close(ctrl_sockfd);
+ free_fds(sockfd, 1);
+
+ return err;
+}
+
+static bool tester_collect_detected_cap(struct xdp_features *skel,
+ unsigned int dut_stats)
+{
+ unsigned int err, key = 0, val;
+
+ if (!dut_stats)
+ return false;
+
+ err = bpf_map__lookup_elem(skel->maps.stats, &key, sizeof(key),
+ &val, sizeof(val), 0);
+ if (err) {
+ fprintf(stderr, "bpf_map_lookup_elem failed (%d)\n", err);
+ return false;
+ }
+
+ switch (env.feature.action) {
+ case XDP_PASS:
+ case XDP_TX:
+ case XDP_REDIRECT:
+ return val > 0;
+ case XDP_DROP:
+ case XDP_ABORTED:
+ return val == 0;
+ default:
+ break;
+ }
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT)
+ return val > 0;
+
+ return false;
+}
+
+static int send_and_recv_msg(int sockfd, enum test_commands cmd, void *val,
+ size_t val_size)
+{
+ unsigned char buf[BUFSIZE] = {};
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ int err;
+
+ tlv->type = htons(cmd);
+ tlv->len = htons(sizeof(*tlv));
+
+ err = send(sockfd, buf, sizeof(*tlv), 0);
+ if (err < 0)
+ return err;
+
+ err = recv_msg(sockfd, buf, BUFSIZE, val, val_size);
+ if (err < 0)
+ return err;
+
+ return ntohs(tlv->type) == CMD_ACK ? 0 : -EINVAL;
+}
+
+static int send_echo_msg(void)
+{
+ unsigned char buf[sizeof(struct tlv_hdr)];
+ struct tlv_hdr *tlv = (struct tlv_hdr *)buf;
+ int sockfd, n;
+
+ sockfd = socket(AF_INET6, SOCK_DGRAM, 0);
+ if (sockfd < 0) {
+ fprintf(stderr,
+ "Failed creating data UDP socket on device %s\n",
+ env.ifname);
+ return -errno;
+ }
+
+ tlv->type = htons(CMD_ECHO);
+ tlv->len = htons(sizeof(*tlv));
+
+ n = sendto(sockfd, buf, sizeof(*tlv), MSG_NOSIGNAL | MSG_CONFIRM,
+ (struct sockaddr *)&env.dut_addr, sizeof(env.dut_addr));
+ close(sockfd);
+
+ return n == ntohs(tlv->len) ? 0 : -EINVAL;
+}
+
+static int tester_run(struct xdp_features *skel)
+{
+ int flags = XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE;
+ unsigned long long advertised_feature;
+ struct bpf_program *prog;
+ unsigned int stats;
+ int i, err, sockfd;
+ bool detected_cap;
+
+ sockfd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (sockfd < 0) {
+ fprintf(stderr,
+ "Failed creating tester service control socket\n");
+ return -errno;
+ }
+
+ if (settimeo(sockfd, 1000) < 0)
+ return -EINVAL;
+
+ err = connect(sockfd, (struct sockaddr *)&env.dut_ctrl_addr,
+ sizeof(env.dut_ctrl_addr));
+ if (err) {
+ fprintf(stderr,
+ "Failed connecting to the Device Under Test control socket\n");
+ return -errno;
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_GET_XDP_CAP, &advertised_feature,
+ sizeof(advertised_feature));
+ if (err < 0) {
+ close(sockfd);
+ return err;
+ }
+
+ advertised_feature = be64toh(advertised_feature);
+
+ if (env.feature.drv_feature == NETDEV_XDP_ACT_NDO_XMIT ||
+ env.feature.action == XDP_TX)
+ prog = skel->progs.xdp_tester_check_tx;
+ else
+ prog = skel->progs.xdp_tester_check_rx;
+
+ err = bpf_xdp_attach(env.ifindex, bpf_program__fd(prog), flags, NULL);
+ if (err) {
+ fprintf(stderr, "Failed attaching XDP program to device %s\n",
+ env.ifname);
+ goto out;
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_START, NULL, 0);
+ if (err)
+ goto out;
+
+ for (i = 0; i < 10 && !exiting; i++) {
+ err = send_echo_msg();
+ if (err < 0)
+ goto out;
+
+ sleep(1);
+ }
+
+ err = send_and_recv_msg(sockfd, CMD_GET_STATS, &stats, sizeof(stats));
+ if (err)
+ goto out;
+
+ /* stop the test */
+ err = send_and_recv_msg(sockfd, CMD_STOP, NULL, 0);
+ /* send a new echo message to wake echo thread of the dut */
+ send_echo_msg();
+
+ detected_cap = tester_collect_detected_cap(skel, ntohl(stats));
+
+ fprintf(stdout, "Feature %s: [%s][%s]\n", get_xdp_feature_str(),
+ detected_cap ? GREEN("DETECTED") : RED("NOT DETECTED"),
+ env.feature.drv_feature & advertised_feature ? GREEN("ADVERTISED")
+ : RED("NOT ADVERTISED"));
+out:
+ bpf_xdp_detach(env.ifindex, flags, NULL);
+ close(sockfd);
+ return err < 0 ? err : 0;
+}
+
+int main(int argc, char **argv)
+{
+ struct xdp_features *skel;
+ int err;
+
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ signal(SIGINT, sig_handler);
+ signal(SIGTERM, sig_handler);
+
+ set_env_default();
+
+ /* Parse command line arguments */
+ err = argp_parse(&argp, argc, argv, 0, NULL, NULL);
+ if (err)
+ return err;
+
+ if (env.ifindex < 0) {
+ fprintf(stderr, "Invalid device name %s\n", env.ifname);
+ return -ENODEV;
+ }
+
+ /* Load and verify BPF application */
+ skel = xdp_features__open();
+ if (!skel) {
+ fprintf(stderr, "Failed to open and load BPF skeleton\n");
+ return -EINVAL;
+ }
+
+ skel->rodata->tester_addr =
+ ((struct sockaddr_in6 *)&env.tester_addr)->sin6_addr;
+ skel->rodata->dut_addr =
+ ((struct sockaddr_in6 *)&env.dut_addr)->sin6_addr;
+
+ /* Load & verify BPF programs */
+ err = xdp_features__load(skel);
+ if (err) {
+ fprintf(stderr, "Failed to load and verify BPF skeleton\n");
+ goto cleanup;
+ }
+
+ err = xdp_features__attach(skel);
+ if (err) {
+ fprintf(stderr, "Failed to attach BPF skeleton\n");
+ goto cleanup;
+ }
+
+ if (env.is_tester) {
+ /* Tester */
+ fprintf(stdout, "Starting tester service on device %s\n",
+ env.ifname);
+ err = tester_run(skel);
+ } else {
+ /* DUT */
+ fprintf(stdout, "Starting test on device %s\n", env.ifname);
+ err = dut_run(skel);
+ }
+
+cleanup:
+ xdp_features__destroy(skel);
+
+ return err < 0 ? -err : 0;
+}
diff --git a/tools/testing/selftests/bpf/xdp_features.h b/tools/testing/selftests/bpf/xdp_features.h
new file mode 100644
index 000000000000..2670c541713b
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_features.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+/* test commands */
+enum test_commands {
+ CMD_STOP, /* CMD */
+ CMD_START, /* CMD */
+ CMD_ECHO, /* CMD */
+ CMD_ACK, /* CMD + data */
+ CMD_GET_XDP_CAP, /* CMD */
+ CMD_GET_STATS, /* CMD */
+};
+
+#define DUT_CTRL_PORT 12345
+#define DUT_ECHO_PORT 12346
+
+struct tlv_hdr {
+ __be16 type;
+ __be16 len;
+ __u8 data[];
+};
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
new file mode 100644
index 000000000000..bdf5d8180067
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -0,0 +1,747 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Reference program for verifying XDP metadata on real HW. Functional test
+ * only, doesn't test the performance.
+ *
+ * RX:
+ * - UDP 9091 packets are diverted into AF_XDP
+ * - Metadata verified:
+ * - rx_timestamp
+ * - rx_hash
+ *
+ * TX:
+ * - UDP 9091 packets trigger TX reply
+ * - TX HW timestamp is requested and reported back upon completion
+ * - TX checksum is requested
+ */
+
+#include <test_progs.h>
+#include <network_helpers.h>
+#include "xdp_hw_metadata.skel.h"
+#include "xsk.h"
+
+#include <error.h>
+#include <linux/kernel.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/errqueue.h>
+#include <linux/if_link.h>
+#include <linux/net_tstamp.h>
+#include <linux/udp.h>
+#include <linux/sockios.h>
+#include <linux/if_xdp.h>
+#include <sys/mman.h>
+#include <net/if.h>
+#include <ctype.h>
+#include <poll.h>
+#include <time.h>
+#include <unistd.h>
+#include <libgen.h>
+
+#include "xdp_metadata.h"
+
+#define UMEM_NUM 256
+#define UMEM_FRAME_SIZE XSK_UMEM__DEFAULT_FRAME_SIZE
+#define UMEM_SIZE (UMEM_FRAME_SIZE * UMEM_NUM)
+#define XDP_FLAGS (XDP_FLAGS_DRV_MODE | XDP_FLAGS_REPLACE)
+
+struct xsk {
+ void *umem_area;
+ struct xsk_umem *umem;
+ struct xsk_ring_prod fill;
+ struct xsk_ring_cons comp;
+ struct xsk_ring_prod tx;
+ struct xsk_ring_cons rx;
+ struct xsk_socket *socket;
+};
+
+struct xdp_hw_metadata *bpf_obj;
+__u16 bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
+struct xsk *rx_xsk;
+const char *ifname;
+int ifindex;
+int rxq;
+bool skip_tx;
+__u64 last_hw_rx_timestamp;
+__u64 last_xdp_rx_timestamp;
+
+void test__fail(void) { /* for network_helpers.c */ }
+
+static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
+{
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ const struct xsk_socket_config socket_config = {
+ .rx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .bind_flags = bind_flags,
+ };
+ const struct xsk_umem_config umem_config = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
+ .flags = XSK_UMEM__DEFAULT_FLAGS,
+ .tx_metadata_len = sizeof(struct xsk_tx_metadata),
+ };
+ __u32 idx = 0;
+ u64 addr;
+ int ret;
+ int i;
+
+ xsk->umem_area = mmap(NULL, UMEM_SIZE, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (xsk->umem_area == MAP_FAILED)
+ return -ENOMEM;
+
+ ret = xsk_umem__create(&xsk->umem,
+ xsk->umem_area, UMEM_SIZE,
+ &xsk->fill,
+ &xsk->comp,
+ &umem_config);
+ if (ret)
+ return ret;
+
+ ret = xsk_socket__create(&xsk->socket, ifindex, queue_id,
+ xsk->umem,
+ &xsk->rx,
+ &xsk->tx,
+ &socket_config);
+ if (ret)
+ return ret;
+
+ /* First half of umem is for TX. This way address matches 1-to-1
+ * to the completion queue index.
+ */
+
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = i * UMEM_FRAME_SIZE;
+ printf("%p: tx_desc[%d] -> %lx\n", xsk, i, addr);
+ }
+
+ /* Second half of umem is for RX. */
+
+ ret = xsk_ring_prod__reserve(&xsk->fill, UMEM_NUM / 2, &idx);
+ for (i = 0; i < UMEM_NUM / 2; i++) {
+ addr = (UMEM_NUM / 2 + i) * UMEM_FRAME_SIZE;
+ printf("%p: rx_desc[%d] -> %lx\n", xsk, i, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx + i) = addr;
+ }
+ xsk_ring_prod__submit(&xsk->fill, ret);
+
+ return 0;
+}
+
+static void close_xsk(struct xsk *xsk)
+{
+ if (xsk->umem)
+ xsk_umem__delete(xsk->umem);
+ if (xsk->socket)
+ xsk_socket__delete(xsk->socket);
+ munmap(xsk->umem_area, UMEM_SIZE);
+}
+
+static void refill_rx(struct xsk *xsk, __u64 addr)
+{
+ __u32 idx;
+
+ if (xsk_ring_prod__reserve(&xsk->fill, 1, &idx) == 1) {
+ printf("%p: complete rx idx=%u addr=%llx\n", xsk, idx, addr);
+ *xsk_ring_prod__fill_addr(&xsk->fill, idx) = addr;
+ xsk_ring_prod__submit(&xsk->fill, 1);
+ }
+}
+
+static int kick_tx(struct xsk *xsk)
+{
+ return sendto(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, 0);
+}
+
+static int kick_rx(struct xsk *xsk)
+{
+ return recvfrom(xsk_socket__fd(xsk->socket), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+}
+
+#define NANOSEC_PER_SEC 1000000000 /* 10^9 */
+static __u64 gettime(clockid_t clock_id)
+{
+ struct timespec t;
+ int res;
+
+ /* See man clock_gettime(2) for type of clock_id's */
+ res = clock_gettime(clock_id, &t);
+
+ if (res < 0)
+ error(res, errno, "Error with clock_gettime()");
+
+ return (__u64) t.tv_sec * NANOSEC_PER_SEC + t.tv_nsec;
+}
+
+static void print_tstamp_delta(const char *name, const char *refname,
+ __u64 tstamp, __u64 reference)
+{
+ __s64 delta = (__s64)reference - (__s64)tstamp;
+
+ printf("%s: %llu (sec:%0.4f) delta to %s sec:%0.4f (%0.3f usec)\n",
+ name, tstamp, (double)tstamp / NANOSEC_PER_SEC, refname,
+ (double)delta / NANOSEC_PER_SEC,
+ (double)delta / 1000);
+}
+
+#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */
+#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */
+#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */
+static void print_vlan_tci(__u16 tag)
+{
+ __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag);
+ __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag);
+ bool dei = FIELD_GET(VLAN_DEI_MASK, tag);
+
+ printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id);
+}
+
+static void verify_xdp_metadata(void *data, clockid_t clock_id)
+{
+ struct xdp_meta *meta;
+
+ meta = data - sizeof(*meta);
+
+ if (meta->hint_valid & XDP_META_FIELD_RSS)
+ printf("rx_hash: 0x%X with RSS type:0x%X\n",
+ meta->rx_hash, meta->rx_hash_type);
+ else
+ printf("No rx_hash, err=%d\n", meta->rx_hash_err);
+
+ if (meta->hint_valid & XDP_META_FIELD_TS) {
+ __u64 ref_tstamp = gettime(clock_id);
+
+ /* store received timestamps to calculate a delta at tx */
+ last_hw_rx_timestamp = meta->rx_timestamp;
+ last_xdp_rx_timestamp = meta->xdp_timestamp;
+
+ print_tstamp_delta("HW RX-time", "User RX-time",
+ meta->rx_timestamp, ref_tstamp);
+ print_tstamp_delta("XDP RX-time", "User RX-time",
+ meta->xdp_timestamp, ref_tstamp);
+ } else {
+ printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err);
+ }
+
+ if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) {
+ printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto));
+ printf("rx_vlan_tci: ");
+ print_vlan_tci(meta->rx_vlan_tci);
+ } else {
+ printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n",
+ meta->rx_vlan_tag_err);
+ }
+}
+
+static void verify_skb_metadata(int fd)
+{
+ char cmsg_buf[1024];
+ char packet_buf[128];
+
+ struct scm_timestamping *ts;
+ struct iovec packet_iov;
+ struct cmsghdr *cmsg;
+ struct msghdr hdr;
+
+ memset(&hdr, 0, sizeof(hdr));
+ hdr.msg_iov = &packet_iov;
+ hdr.msg_iovlen = 1;
+ packet_iov.iov_base = packet_buf;
+ packet_iov.iov_len = sizeof(packet_buf);
+
+ hdr.msg_control = cmsg_buf;
+ hdr.msg_controllen = sizeof(cmsg_buf);
+
+ if (recvmsg(fd, &hdr, 0) < 0)
+ error(1, errno, "recvmsg");
+
+ for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&hdr, cmsg)) {
+
+ if (cmsg->cmsg_level != SOL_SOCKET)
+ continue;
+
+ switch (cmsg->cmsg_type) {
+ case SCM_TIMESTAMPING:
+ ts = (struct scm_timestamping *)CMSG_DATA(cmsg);
+ if (ts->ts[2].tv_sec || ts->ts[2].tv_nsec) {
+ printf("found skb hwtstamp = %lu.%lu\n",
+ ts->ts[2].tv_sec, ts->ts[2].tv_nsec);
+ return;
+ }
+ break;
+ default:
+ break;
+ }
+ }
+
+ printf("skb hwtstamp is not found!\n");
+}
+
+static bool complete_tx(struct xsk *xsk, clockid_t clock_id)
+{
+ struct xsk_tx_metadata *meta;
+ __u64 addr;
+ void *data;
+ __u32 idx;
+
+ if (!xsk_ring_cons__peek(&xsk->comp, 1, &idx))
+ return false;
+
+ addr = *xsk_ring_cons__comp_addr(&xsk->comp, idx);
+ data = xsk_umem__get_data(xsk->umem_area, addr);
+ meta = data - sizeof(struct xsk_tx_metadata);
+
+ printf("%p: complete tx idx=%u addr=%llx\n", xsk, idx, addr);
+
+ if (meta->completion.tx_timestamp) {
+ __u64 ref_tstamp = gettime(clock_id);
+
+ print_tstamp_delta("HW TX-complete-time", "User TX-complete-time",
+ meta->completion.tx_timestamp, ref_tstamp);
+ print_tstamp_delta("XDP RX-time", "User TX-complete-time",
+ last_xdp_rx_timestamp, ref_tstamp);
+ print_tstamp_delta("HW RX-time", "HW TX-complete-time",
+ last_hw_rx_timestamp, meta->completion.tx_timestamp);
+ } else {
+ printf("No tx_timestamp\n");
+ }
+
+ xsk_ring_cons__release(&xsk->comp, 1);
+
+ return true;
+}
+
+#define swap(a, b, len) do { \
+ for (int i = 0; i < len; i++) { \
+ __u8 tmp = ((__u8 *)a)[i]; \
+ ((__u8 *)a)[i] = ((__u8 *)b)[i]; \
+ ((__u8 *)b)[i] = tmp; \
+ } \
+} while (0)
+
+static void ping_pong(struct xsk *xsk, void *rx_packet, clockid_t clock_id)
+{
+ struct xsk_tx_metadata *meta;
+ struct ipv6hdr *ip6h = NULL;
+ struct iphdr *iph = NULL;
+ struct xdp_desc *tx_desc;
+ struct udphdr *udph;
+ struct ethhdr *eth;
+ __sum16 want_csum;
+ void *data;
+ __u32 idx;
+ int ret;
+ int len;
+
+ ret = xsk_ring_prod__reserve(&xsk->tx, 1, &idx);
+ if (ret != 1) {
+ printf("%p: failed to reserve tx slot\n", xsk);
+ return;
+ }
+
+ tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx);
+ tx_desc->addr = idx % (UMEM_NUM / 2) * UMEM_FRAME_SIZE + sizeof(struct xsk_tx_metadata);
+ data = xsk_umem__get_data(xsk->umem_area, tx_desc->addr);
+
+ meta = data - sizeof(struct xsk_tx_metadata);
+ memset(meta, 0, sizeof(*meta));
+ meta->flags = XDP_TXMD_FLAGS_TIMESTAMP;
+
+ eth = rx_packet;
+
+ if (eth->h_proto == htons(ETH_P_IP)) {
+ iph = (void *)(eth + 1);
+ udph = (void *)(iph + 1);
+ } else if (eth->h_proto == htons(ETH_P_IPV6)) {
+ ip6h = (void *)(eth + 1);
+ udph = (void *)(ip6h + 1);
+ } else {
+ printf("%p: failed to detect IP version for ping pong %04x\n", xsk, eth->h_proto);
+ xsk_ring_prod__cancel(&xsk->tx, 1);
+ return;
+ }
+
+ len = ETH_HLEN;
+ if (ip6h)
+ len += sizeof(*ip6h) + ntohs(ip6h->payload_len);
+ if (iph)
+ len += ntohs(iph->tot_len);
+
+ swap(eth->h_dest, eth->h_source, ETH_ALEN);
+ if (iph)
+ swap(&iph->saddr, &iph->daddr, 4);
+ else
+ swap(&ip6h->saddr, &ip6h->daddr, 16);
+ swap(&udph->source, &udph->dest, 2);
+
+ want_csum = udph->check;
+ if (ip6h)
+ udph->check = ~csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+ else
+ udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr,
+ ntohs(udph->len), IPPROTO_UDP, 0);
+
+ meta->flags |= XDP_TXMD_FLAGS_CHECKSUM;
+ if (iph)
+ meta->request.csum_start = sizeof(*eth) + sizeof(*iph);
+ else
+ meta->request.csum_start = sizeof(*eth) + sizeof(*ip6h);
+ meta->request.csum_offset = offsetof(struct udphdr, check);
+
+ printf("%p: ping-pong with csum=%04x (want %04x) csum_start=%d csum_offset=%d\n",
+ xsk, ntohs(udph->check), ntohs(want_csum),
+ meta->request.csum_start, meta->request.csum_offset);
+
+ memcpy(data, rx_packet, len); /* don't share umem chunk for simplicity */
+ tx_desc->options |= XDP_TX_METADATA;
+ tx_desc->len = len;
+
+ xsk_ring_prod__submit(&xsk->tx, 1);
+}
+
+static int verify_metadata(struct xsk *rx_xsk, int rxq, int server_fd, clockid_t clock_id)
+{
+ const struct xdp_desc *rx_desc;
+ struct pollfd fds[rxq + 1];
+ __u64 comp_addr;
+ __u64 addr;
+ __u32 idx = 0;
+ int ret;
+ int i;
+
+ for (i = 0; i < rxq; i++) {
+ fds[i].fd = xsk_socket__fd(rx_xsk[i].socket);
+ fds[i].events = POLLIN;
+ fds[i].revents = 0;
+ }
+
+ fds[rxq].fd = server_fd;
+ fds[rxq].events = POLLIN;
+ fds[rxq].revents = 0;
+
+ while (true) {
+ errno = 0;
+
+ for (i = 0; i < rxq; i++) {
+ ret = kick_rx(&rx_xsk[i]);
+ if (ret)
+ printf("kick_rx ret=%d\n", ret);
+ }
+
+ ret = poll(fds, rxq + 1, 1000);
+ printf("poll: %d (%d) skip=%llu fail=%llu redir=%llu\n",
+ ret, errno, bpf_obj->bss->pkts_skip,
+ bpf_obj->bss->pkts_fail, bpf_obj->bss->pkts_redir);
+ if (ret < 0)
+ break;
+ if (ret == 0)
+ continue;
+
+ if (fds[rxq].revents)
+ verify_skb_metadata(server_fd);
+
+ for (i = 0; i < rxq; i++) {
+ bool first_seg = true;
+ bool is_eop = true;
+
+ if (fds[i].revents == 0)
+ continue;
+
+ struct xsk *xsk = &rx_xsk[i];
+peek:
+ ret = xsk_ring_cons__peek(&xsk->rx, 1, &idx);
+ printf("xsk_ring_cons__peek: %d\n", ret);
+ if (ret != 1)
+ continue;
+
+ rx_desc = xsk_ring_cons__rx_desc(&xsk->rx, idx);
+ comp_addr = xsk_umem__extract_addr(rx_desc->addr);
+ addr = xsk_umem__add_offset_to_addr(rx_desc->addr);
+ is_eop = !(rx_desc->options & XDP_PKT_CONTD);
+ printf("%p: rx_desc[%u]->addr=%llx addr=%llx comp_addr=%llx%s\n",
+ xsk, idx, rx_desc->addr, addr, comp_addr, is_eop ? " EoP" : "");
+ if (first_seg) {
+ verify_xdp_metadata(xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
+ first_seg = false;
+
+ if (!skip_tx) {
+ /* mirror first chunk back */
+ ping_pong(xsk, xsk_umem__get_data(xsk->umem_area, addr),
+ clock_id);
+
+ ret = kick_tx(xsk);
+ if (ret)
+ printf("kick_tx ret=%d\n", ret);
+
+ for (int j = 0; j < 500; j++) {
+ if (complete_tx(xsk, clock_id))
+ break;
+ usleep(10);
+ }
+ }
+ }
+
+ xsk_ring_cons__release(&xsk->rx, 1);
+ refill_rx(xsk, comp_addr);
+ if (!is_eop)
+ goto peek;
+ }
+ }
+
+ return 0;
+}
+
+struct ethtool_channels {
+ __u32 cmd;
+ __u32 max_rx;
+ __u32 max_tx;
+ __u32 max_other;
+ __u32 max_combined;
+ __u32 rx_count;
+ __u32 tx_count;
+ __u32 other_count;
+ __u32 combined_count;
+};
+
+#define ETHTOOL_GCHANNELS 0x0000003c /* Get no of channels */
+
+static int rxq_num(const char *ifname)
+{
+ struct ethtool_channels ch = {
+ .cmd = ETHTOOL_GCHANNELS,
+ };
+
+ struct ifreq ifr = {
+ .ifr_data = (void *)&ch,
+ };
+ strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ ret = ioctl(fd, SIOCETHTOOL, &ifr);
+ if (ret < 0)
+ error(1, errno, "ioctl(SIOCETHTOOL)");
+
+ close(fd);
+
+ return ch.rx_count + ch.combined_count;
+}
+
+static void hwtstamp_ioctl(int op, const char *ifname, struct hwtstamp_config *cfg)
+{
+ struct ifreq ifr = {
+ .ifr_data = (void *)cfg,
+ };
+ strncpy(ifr.ifr_name, ifname, IF_NAMESIZE - 1);
+ int fd, ret;
+
+ fd = socket(AF_UNIX, SOCK_DGRAM, 0);
+ if (fd < 0)
+ error(1, errno, "socket");
+
+ ret = ioctl(fd, op, &ifr);
+ if (ret < 0)
+ error(1, errno, "ioctl(%d)", op);
+
+ close(fd);
+}
+
+static struct hwtstamp_config saved_hwtstamp_cfg;
+static const char *saved_hwtstamp_ifname;
+
+static void hwtstamp_restore(void)
+{
+ hwtstamp_ioctl(SIOCSHWTSTAMP, saved_hwtstamp_ifname, &saved_hwtstamp_cfg);
+}
+
+static void hwtstamp_enable(const char *ifname)
+{
+ struct hwtstamp_config cfg = {
+ .rx_filter = HWTSTAMP_FILTER_ALL,
+ };
+
+ hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
+ saved_hwtstamp_ifname = strdup(ifname);
+ atexit(hwtstamp_restore);
+
+ hwtstamp_ioctl(SIOCSHWTSTAMP, ifname, &cfg);
+}
+
+static void cleanup(void)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int ret;
+ int i;
+
+ if (bpf_obj) {
+ opts.old_prog_fd = bpf_program__fd(bpf_obj->progs.rx);
+ if (opts.old_prog_fd >= 0) {
+ printf("detaching bpf program....\n");
+ ret = bpf_xdp_detach(ifindex, XDP_FLAGS, &opts);
+ if (ret)
+ printf("failed to detach XDP program: %d\n", ret);
+ }
+ }
+
+ for (i = 0; i < rxq; i++)
+ close_xsk(&rx_xsk[i]);
+
+ if (bpf_obj)
+ xdp_hw_metadata__destroy(bpf_obj);
+}
+
+static void handle_signal(int sig)
+{
+ /* interrupting poll() is all we need */
+}
+
+static void timestamping_enable(int fd, int val)
+{
+ int ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
+ if (ret < 0)
+ error(1, errno, "setsockopt(SO_TIMESTAMPING)");
+}
+
+static void print_usage(void)
+{
+ const char *usage =
+ "Usage: xdp_hw_metadata [OPTIONS] [IFNAME]\n"
+ " -c Run in copy mode (zerocopy is default)\n"
+ " -h Display this help and exit\n\n"
+ " -m Enable multi-buffer XDP for larger MTU\n"
+ " -r Don't generate AF_XDP reply (rx metadata only)\n"
+ "Generate test packets on the other machine with:\n"
+ " echo -n xdp | nc -u -q1 <dst_ip> 9091\n";
+
+ printf("%s", usage);
+}
+
+static void read_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "chmr")) != -1) {
+ switch (opt) {
+ case 'c':
+ bind_flags &= ~XDP_USE_NEED_WAKEUP;
+ bind_flags &= ~XDP_ZEROCOPY;
+ bind_flags |= XDP_COPY;
+ break;
+ case 'h':
+ print_usage();
+ exit(0);
+ case 'm':
+ bind_flags |= XDP_USE_SG;
+ break;
+ case 'r':
+ skip_tx = true;
+ break;
+ case '?':
+ if (isprint(optopt))
+ fprintf(stderr, "Unknown option: -%c\n", optopt);
+ fallthrough;
+ default:
+ print_usage();
+ error(-1, opterr, "Command line options error");
+ }
+ }
+
+ if (optind >= argc) {
+ fprintf(stderr, "No device name provided\n");
+ print_usage();
+ exit(-1);
+ }
+
+ ifname = argv[optind];
+ ifindex = if_nametoindex(ifname);
+
+ if (!ifname)
+ error(-1, errno, "Invalid interface name");
+}
+
+int main(int argc, char *argv[])
+{
+ clockid_t clock_id = CLOCK_TAI;
+ int server_fd = -1;
+ int ret;
+ int i;
+
+ struct bpf_program *prog;
+
+ read_args(argc, argv);
+
+ rxq = rxq_num(ifname);
+
+ printf("rxq: %d\n", rxq);
+
+ hwtstamp_enable(ifname);
+
+ rx_xsk = malloc(sizeof(struct xsk) * rxq);
+ if (!rx_xsk)
+ error(1, ENOMEM, "malloc");
+
+ for (i = 0; i < rxq; i++) {
+ printf("open_xsk(%s, %p, %d)\n", ifname, &rx_xsk[i], i);
+ ret = open_xsk(ifindex, &rx_xsk[i], i);
+ if (ret)
+ error(1, -ret, "open_xsk");
+
+ printf("xsk_socket__fd() -> %d\n", xsk_socket__fd(rx_xsk[i].socket));
+ }
+
+ printf("open bpf program...\n");
+ bpf_obj = xdp_hw_metadata__open();
+ if (libbpf_get_error(bpf_obj))
+ error(1, libbpf_get_error(bpf_obj), "xdp_hw_metadata__open");
+
+ prog = bpf_object__find_program_by_name(bpf_obj->obj, "rx");
+ bpf_program__set_ifindex(prog, ifindex);
+ bpf_program__set_flags(prog, BPF_F_XDP_DEV_BOUND_ONLY);
+
+ printf("load bpf program...\n");
+ ret = xdp_hw_metadata__load(bpf_obj);
+ if (ret)
+ error(1, -ret, "xdp_hw_metadata__load");
+
+ printf("prepare skb endpoint...\n");
+ server_fd = start_server(AF_INET6, SOCK_DGRAM, NULL, 9092, 1000);
+ if (server_fd < 0)
+ error(1, errno, "start_server");
+ timestamping_enable(server_fd,
+ SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_RAW_HARDWARE);
+
+ printf("prepare xsk map...\n");
+ for (i = 0; i < rxq; i++) {
+ int sock_fd = xsk_socket__fd(rx_xsk[i].socket);
+ __u32 queue_id = i;
+
+ printf("map[%d] = %d\n", queue_id, sock_fd);
+ ret = bpf_map_update_elem(bpf_map__fd(bpf_obj->maps.xsk), &queue_id, &sock_fd, 0);
+ if (ret)
+ error(1, -ret, "bpf_map_update_elem");
+ }
+
+ printf("attach bpf program...\n");
+ ret = bpf_xdp_attach(ifindex,
+ bpf_program__fd(bpf_obj->progs.rx),
+ XDP_FLAGS, NULL);
+ if (ret)
+ error(1, -ret, "bpf_xdp_attach");
+
+ signal(SIGINT, handle_signal);
+ ret = verify_metadata(rx_xsk, rxq, server_fd, clock_id);
+ close(server_fd);
+ cleanup();
+ if (ret)
+ error(1, -ret, "verify_metadata");
+}
diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h
new file mode 100644
index 000000000000..87318ad1117a
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_metadata.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+
+#ifndef ETH_P_IP
+#define ETH_P_IP 0x0800
+#endif
+
+#ifndef ETH_P_IPV6
+#define ETH_P_IPV6 0x86DD
+#endif
+
+#ifndef ETH_P_8021Q
+#define ETH_P_8021Q 0x8100
+#endif
+
+#ifndef ETH_P_8021AD
+#define ETH_P_8021AD 0x88A8
+#endif
+
+#ifndef BIT
+#define BIT(nr) (1 << (nr))
+#endif
+
+/* Non-existent checksum status */
+#define XDP_CHECKSUM_MAGIC BIT(2)
+
+enum xdp_meta_field {
+ XDP_META_FIELD_TS = BIT(0),
+ XDP_META_FIELD_RSS = BIT(1),
+ XDP_META_FIELD_VLAN_TAG = BIT(2),
+};
+
+struct xdp_meta {
+ union {
+ __u64 rx_timestamp;
+ __s32 rx_timestamp_err;
+ };
+ __u64 xdp_timestamp;
+ __u32 rx_hash;
+ union {
+ __u32 rx_hash_type;
+ __s32 rx_hash_err;
+ };
+ union {
+ struct {
+ __be16 rx_vlan_proto;
+ __u16 rx_vlan_tci;
+ };
+ __s32 rx_vlan_tag_err;
+ };
+ enum xdp_meta_field hint_valid;
+};
diff --git a/tools/testing/selftests/bpf/xdp_redirect_multi.c b/tools/testing/selftests/bpf/xdp_redirect_multi.c
new file mode 100644
index 000000000000..c1fc44c87c30
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_redirect_multi.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <linux/if_link.h>
+#include <assert.h>
+#include <errno.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <net/if.h>
+#include <unistd.h>
+#include <libgen.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "bpf_util.h"
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+
+#define MAX_IFACE_NUM 32
+#define MAX_INDEX_NUM 1024
+
+static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int ifaces[MAX_IFACE_NUM] = {};
+
+static void int_exit(int sig)
+{
+ __u32 prog_id = 0;
+ int i;
+
+ for (i = 0; ifaces[i] > 0; i++) {
+ if (bpf_xdp_query_id(ifaces[i], xdp_flags, &prog_id)) {
+ printf("bpf_xdp_query_id failed\n");
+ exit(1);
+ }
+ if (prog_id)
+ bpf_xdp_detach(ifaces[i], xdp_flags, NULL);
+ }
+
+ exit(0);
+}
+
+static int get_mac_addr(unsigned int ifindex, void *mac_addr)
+{
+ char ifname[IF_NAMESIZE];
+ struct ifreq ifr;
+ int fd, ret = -1;
+
+ fd = socket(AF_INET, SOCK_DGRAM, 0);
+ if (fd < 0)
+ return ret;
+
+ if (!if_indextoname(ifindex, ifname))
+ goto err_out;
+
+ strcpy(ifr.ifr_name, ifname);
+
+ if (ioctl(fd, SIOCGIFHWADDR, &ifr) != 0)
+ goto err_out;
+
+ memcpy(mac_addr, ifr.ifr_hwaddr.sa_data, 6 * sizeof(char));
+ ret = 0;
+
+err_out:
+ close(fd);
+ return ret;
+}
+
+static void usage(const char *prog)
+{
+ fprintf(stderr,
+ "usage: %s [OPTS] <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n"
+ "OPTS:\n"
+ " -S use skb-mode\n"
+ " -N enforce native mode\n"
+ " -F force loading prog\n"
+ " -X load xdp program on egress\n",
+ prog);
+}
+
+int main(int argc, char **argv)
+{
+ int prog_fd, group_all, mac_map;
+ struct bpf_program *ingress_prog, *egress_prog;
+ int i, err, ret, opt, egress_prog_fd = 0;
+ struct bpf_devmap_val devmap_val;
+ bool attach_egress_prog = false;
+ unsigned char mac_addr[6];
+ char ifname[IF_NAMESIZE];
+ struct bpf_object *obj;
+ unsigned int ifindex;
+ char filename[256];
+
+ while ((opt = getopt(argc, argv, "SNFX")) != -1) {
+ switch (opt) {
+ case 'S':
+ xdp_flags |= XDP_FLAGS_SKB_MODE;
+ break;
+ case 'N':
+ /* default, set below */
+ break;
+ case 'F':
+ xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST;
+ break;
+ case 'X':
+ attach_egress_prog = true;
+ break;
+ default:
+ usage(basename(argv[0]));
+ return 1;
+ }
+ }
+
+ if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) {
+ xdp_flags |= XDP_FLAGS_DRV_MODE;
+ } else if (attach_egress_prog) {
+ printf("Load xdp program on egress with SKB mode not supported yet\n");
+ goto err_out;
+ }
+
+ if (optind == argc) {
+ printf("usage: %s <IFNAME|IFINDEX> <IFNAME|IFINDEX> ...\n", argv[0]);
+ goto err_out;
+ }
+
+ printf("Get interfaces:");
+ for (i = 0; i < MAX_IFACE_NUM && argv[optind + i]; i++) {
+ ifaces[i] = if_nametoindex(argv[optind + i]);
+ if (!ifaces[i])
+ ifaces[i] = strtoul(argv[optind + i], NULL, 0);
+ if (!if_indextoname(ifaces[i], ifname)) {
+ perror("Invalid interface name or i");
+ goto err_out;
+ }
+ if (ifaces[i] > MAX_INDEX_NUM) {
+ printf(" interface index too large\n");
+ goto err_out;
+ }
+ printf(" %d", ifaces[i]);
+ }
+ printf("\n");
+
+ snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
+ obj = bpf_object__open_file(filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err)
+ goto err_out;
+ err = bpf_object__load(obj);
+ if (err)
+ goto err_out;
+ prog_fd = bpf_program__fd(bpf_object__next_program(obj, NULL));
+
+ if (attach_egress_prog)
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_egress");
+ else
+ group_all = bpf_object__find_map_fd_by_name(obj, "map_all");
+ mac_map = bpf_object__find_map_fd_by_name(obj, "mac_map");
+
+ if (group_all < 0 || mac_map < 0) {
+ printf("bpf_object__find_map_fd_by_name failed\n");
+ goto err_out;
+ }
+
+ if (attach_egress_prog) {
+ /* Find ingress/egress prog for 2nd xdp prog */
+ ingress_prog = bpf_object__find_program_by_name(obj, "xdp_redirect_map_all_prog");
+ egress_prog = bpf_object__find_program_by_name(obj, "xdp_devmap_prog");
+ if (!ingress_prog || !egress_prog) {
+ printf("finding ingress/egress_prog in obj file failed\n");
+ goto err_out;
+ }
+ prog_fd = bpf_program__fd(ingress_prog);
+ egress_prog_fd = bpf_program__fd(egress_prog);
+ if (prog_fd < 0 || egress_prog_fd < 0) {
+ printf("find egress_prog fd failed\n");
+ goto err_out;
+ }
+ }
+
+ signal(SIGINT, int_exit);
+ signal(SIGTERM, int_exit);
+
+ /* Init forward multicast groups and exclude group */
+ for (i = 0; ifaces[i] > 0; i++) {
+ ifindex = ifaces[i];
+
+ if (attach_egress_prog) {
+ ret = get_mac_addr(ifindex, mac_addr);
+ if (ret < 0) {
+ printf("get interface %d mac failed\n", ifindex);
+ goto err_out;
+ }
+ ret = bpf_map_update_elem(mac_map, &ifindex, mac_addr, 0);
+ if (ret) {
+ perror("bpf_update_elem mac_map failed\n");
+ goto err_out;
+ }
+ }
+
+ /* Add all the interfaces to group all */
+ devmap_val.ifindex = ifindex;
+ devmap_val.bpf_prog.fd = egress_prog_fd;
+ ret = bpf_map_update_elem(group_all, &ifindex, &devmap_val, 0);
+ if (ret) {
+ perror("bpf_map_update_elem");
+ goto err_out;
+ }
+
+ /* bind prog_fd to each interface */
+ ret = bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
+ if (ret) {
+ printf("Set xdp fd failed on %d\n", ifindex);
+ goto err_out;
+ }
+ }
+
+ /* sleep some time for testing */
+ sleep(999);
+
+ return 0;
+
+err_out:
+ return 1;
+}
diff --git a/tools/testing/selftests/bpf/xdp_synproxy.c b/tools/testing/selftests/bpf/xdp_synproxy.c
new file mode 100644
index 000000000000..ce68c342b56f
--- /dev/null
+++ b/tools/testing/selftests/bpf/xdp_synproxy.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: LGPL-2.1 OR BSD-2-Clause
+/* Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. */
+
+#include <stdnoreturn.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <getopt.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <net/if.h>
+#include <linux/if_link.h>
+#include <linux/limits.h>
+
+static unsigned int ifindex;
+static __u32 attached_prog_id;
+static bool attached_tc;
+
+static void noreturn cleanup(int sig)
+{
+ LIBBPF_OPTS(bpf_xdp_attach_opts, opts);
+ int prog_fd;
+ int err;
+
+ if (attached_prog_id == 0)
+ exit(0);
+
+ if (attached_tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+
+ err = bpf_tc_hook_destroy(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_destroy: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to destroy the TC hook\n");
+ exit(1);
+ }
+ exit(0);
+ }
+
+ prog_fd = bpf_prog_get_fd_by_id(attached_prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ err = bpf_xdp_attach(ifindex, -1, 0, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ } else {
+ opts.old_prog_fd = prog_fd;
+ err = bpf_xdp_attach(ifindex, -1, XDP_FLAGS_REPLACE, &opts);
+ close(prog_fd);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd_opts: %s\n", strerror(-err));
+ /* Not an error if already replaced by someone else. */
+ if (err != -EEXIST) {
+ fprintf(stderr, "Failed to detach XDP program\n");
+ exit(1);
+ }
+ }
+ }
+ exit(0);
+}
+
+static noreturn void usage(const char *progname)
+{
+ fprintf(stderr, "Usage: %s [--iface <iface>|--prog <prog_id>] [--mss4 <mss ipv4> --mss6 <mss ipv6> --wscale <wscale> --ttl <ttl>] [--ports <port1>,<port2>,...] [--single] [--tc]\n",
+ progname);
+ exit(1);
+}
+
+static unsigned long parse_arg_ul(const char *progname, const char *arg, unsigned long limit)
+{
+ unsigned long res;
+ char *endptr;
+
+ errno = 0;
+ res = strtoul(arg, &endptr, 10);
+ if (errno != 0 || *endptr != '\0' || arg[0] == '\0' || res > limit)
+ usage(progname);
+
+ return res;
+}
+
+static void parse_options(int argc, char *argv[], unsigned int *ifindex, __u32 *prog_id,
+ __u64 *tcpipopts, char **ports, bool *single, bool *tc)
+{
+ static struct option long_options[] = {
+ { "help", no_argument, NULL, 'h' },
+ { "iface", required_argument, NULL, 'i' },
+ { "prog", required_argument, NULL, 'x' },
+ { "mss4", required_argument, NULL, 4 },
+ { "mss6", required_argument, NULL, 6 },
+ { "wscale", required_argument, NULL, 'w' },
+ { "ttl", required_argument, NULL, 't' },
+ { "ports", required_argument, NULL, 'p' },
+ { "single", no_argument, NULL, 's' },
+ { "tc", no_argument, NULL, 'c' },
+ { NULL, 0, NULL, 0 },
+ };
+ unsigned long mss4, wscale, ttl;
+ unsigned long long mss6;
+ unsigned int tcpipopts_mask = 0;
+
+ if (argc < 2)
+ usage(argv[0]);
+
+ *ifindex = 0;
+ *prog_id = 0;
+ *tcpipopts = 0;
+ *ports = NULL;
+ *single = false;
+ *tc = false;
+
+ while (true) {
+ int opt;
+
+ opt = getopt_long(argc, argv, "", long_options, NULL);
+ if (opt == -1)
+ break;
+
+ switch (opt) {
+ case 'h':
+ usage(argv[0]);
+ break;
+ case 'i':
+ *ifindex = if_nametoindex(optarg);
+ if (*ifindex == 0)
+ usage(argv[0]);
+ break;
+ case 'x':
+ *prog_id = parse_arg_ul(argv[0], optarg, UINT32_MAX);
+ if (*prog_id == 0)
+ usage(argv[0]);
+ break;
+ case 4:
+ mss4 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 0;
+ break;
+ case 6:
+ mss6 = parse_arg_ul(argv[0], optarg, UINT16_MAX);
+ tcpipopts_mask |= 1 << 1;
+ break;
+ case 'w':
+ wscale = parse_arg_ul(argv[0], optarg, 14);
+ tcpipopts_mask |= 1 << 2;
+ break;
+ case 't':
+ ttl = parse_arg_ul(argv[0], optarg, UINT8_MAX);
+ tcpipopts_mask |= 1 << 3;
+ break;
+ case 'p':
+ *ports = optarg;
+ break;
+ case 's':
+ *single = true;
+ break;
+ case 'c':
+ *tc = true;
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+ if (optind < argc)
+ usage(argv[0]);
+
+ if (tcpipopts_mask == 0xf) {
+ if (mss4 == 0 || mss6 == 0 || wscale == 0 || ttl == 0)
+ usage(argv[0]);
+ *tcpipopts = (mss6 << 32) | (ttl << 24) | (wscale << 16) | mss4;
+ } else if (tcpipopts_mask != 0) {
+ usage(argv[0]);
+ }
+
+ if (*ifindex != 0 && *prog_id != 0)
+ usage(argv[0]);
+ if (*ifindex == 0 && *prog_id == 0)
+ usage(argv[0]);
+}
+
+static int syncookie_attach(const char *argv0, unsigned int ifindex, bool tc)
+{
+ struct bpf_prog_info info = {};
+ __u32 info_len = sizeof(info);
+ char xdp_filename[PATH_MAX];
+ struct bpf_program *prog;
+ struct bpf_object *obj;
+ int prog_fd;
+ int err;
+
+ snprintf(xdp_filename, sizeof(xdp_filename), "%s_kern.bpf.o", argv0);
+ obj = bpf_object__open_file(xdp_filename, NULL);
+ err = libbpf_get_error(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ err = bpf_object__load(obj);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_object__open_file: %s\n", strerror(-err));
+ return err;
+ }
+
+ prog = bpf_object__find_program_by_name(obj, tc ? "syncookie_tc" : "syncookie_xdp");
+ if (!prog) {
+ fprintf(stderr, "Error: bpf_object__find_program_by_name: program was not found\n");
+ return -ENOENT;
+ }
+
+ prog_fd = bpf_program__fd(prog);
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &info, &info_len);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ attached_tc = tc;
+ attached_prog_id = info.id;
+ signal(SIGINT, cleanup);
+ signal(SIGTERM, cleanup);
+ if (tc) {
+ LIBBPF_OPTS(bpf_tc_hook, hook,
+ .ifindex = ifindex,
+ .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, opts,
+ .handle = 1,
+ .priority = 1,
+ .prog_fd = prog_fd);
+
+ err = bpf_tc_hook_create(&hook);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_hook_create: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ err = bpf_tc_attach(&hook, &opts);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_tc_attach: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+
+ } else {
+ err = bpf_xdp_attach(ifindex, prog_fd,
+ XDP_FLAGS_UPDATE_IF_NOEXIST, NULL);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_set_link_xdp_fd: %s\n",
+ strerror(-err));
+ goto fail;
+ }
+ }
+ err = 0;
+out:
+ bpf_object__close(obj);
+ return err;
+fail:
+ signal(SIGINT, SIG_DFL);
+ signal(SIGTERM, SIG_DFL);
+ attached_prog_id = 0;
+ goto out;
+}
+
+static int syncookie_open_bpf_maps(__u32 prog_id, int *values_map_fd, int *ports_map_fd)
+{
+ struct bpf_prog_info prog_info;
+ __u32 map_ids[8];
+ __u32 info_len;
+ int prog_fd;
+ int err;
+ int i;
+
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+ prog_fd = bpf_prog_get_fd_by_id(prog_id);
+ if (prog_fd < 0) {
+ fprintf(stderr, "Error: bpf_prog_get_fd_by_id: %s\n", strerror(-prog_fd));
+ return prog_fd;
+ }
+
+ prog_info = (struct bpf_prog_info) {
+ .nr_map_ids = 8,
+ .map_ids = (__u64)(unsigned long)map_ids,
+ };
+ info_len = sizeof(prog_info);
+
+ err = bpf_prog_get_info_by_fd(prog_fd, &prog_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_prog_get_info_by_fd: %s\n",
+ strerror(-err));
+ goto out;
+ }
+
+ if (prog_info.nr_map_ids < 2) {
+ fprintf(stderr, "Error: Found %u BPF maps, expected at least 2\n",
+ prog_info.nr_map_ids);
+ err = -ENOENT;
+ goto out;
+ }
+
+ for (i = 0; i < prog_info.nr_map_ids; i++) {
+ struct bpf_map_info map_info = {};
+ int map_fd;
+
+ err = bpf_map_get_fd_by_id(map_ids[i]);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_map_get_fd_by_id: %s\n", strerror(-err));
+ goto err_close_map_fds;
+ }
+ map_fd = err;
+
+ info_len = sizeof(map_info);
+ err = bpf_map_get_info_by_fd(map_fd, &map_info, &info_len);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_get_info_by_fd: %s\n",
+ strerror(-err));
+ close(map_fd);
+ goto err_close_map_fds;
+ }
+ if (strcmp(map_info.name, "values") == 0) {
+ *values_map_fd = map_fd;
+ continue;
+ }
+ if (strcmp(map_info.name, "allowed_ports") == 0) {
+ *ports_map_fd = map_fd;
+ continue;
+ }
+ close(map_fd);
+ }
+
+ if (*values_map_fd != -1 && *ports_map_fd != -1) {
+ err = 0;
+ goto out;
+ }
+
+ err = -ENOENT;
+
+err_close_map_fds:
+ if (*values_map_fd != -1)
+ close(*values_map_fd);
+ if (*ports_map_fd != -1)
+ close(*ports_map_fd);
+ *values_map_fd = -1;
+ *ports_map_fd = -1;
+
+out:
+ close(prog_fd);
+ return err;
+}
+
+int main(int argc, char *argv[])
+{
+ int values_map_fd, ports_map_fd;
+ __u64 tcpipopts;
+ bool firstiter;
+ __u64 prevcnt;
+ __u32 prog_id;
+ char *ports;
+ bool single;
+ int err = 0;
+ bool tc;
+
+ parse_options(argc, argv, &ifindex, &prog_id, &tcpipopts, &ports,
+ &single, &tc);
+
+ if (prog_id == 0) {
+ if (!tc) {
+ err = bpf_xdp_query_id(ifindex, 0, &prog_id);
+ if (err < 0) {
+ fprintf(stderr, "Error: bpf_get_link_xdp_id: %s\n",
+ strerror(-err));
+ goto out;
+ }
+ }
+ if (prog_id == 0) {
+ err = syncookie_attach(argv[0], ifindex, tc);
+ if (err < 0)
+ goto out;
+ prog_id = attached_prog_id;
+ }
+ }
+
+ err = syncookie_open_bpf_maps(prog_id, &values_map_fd, &ports_map_fd);
+ if (err < 0)
+ goto out;
+
+ if (ports) {
+ __u16 port_last = 0;
+ __u32 port_idx = 0;
+ char *p = ports;
+
+ fprintf(stderr, "Replacing allowed ports\n");
+
+ while (p && *p != '\0') {
+ char *token = strsep(&p, ",");
+ __u16 port;
+
+ port = parse_arg_ul(argv[0], token, UINT16_MAX);
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add port %u (index %u)\n",
+ port, port_idx);
+ goto out_close_maps;
+ }
+ fprintf(stderr, "Added port %u\n", port);
+ port_idx++;
+ }
+ err = bpf_map_update_elem(ports_map_fd, &port_idx, &port_last, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ fprintf(stderr, "Failed to add the terminator value 0 (index %u)\n",
+ port_idx);
+ goto out_close_maps;
+ }
+ }
+
+ if (tcpipopts) {
+ __u32 key = 0;
+
+ fprintf(stderr, "Replacing TCP/IP options\n");
+
+ err = bpf_map_update_elem(values_map_fd, &key, &tcpipopts, BPF_ANY);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_update_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ }
+
+ if ((ports || tcpipopts) && attached_prog_id == 0 && !single)
+ goto out_close_maps;
+
+ prevcnt = 0;
+ firstiter = true;
+ while (true) {
+ __u32 key = 1;
+ __u64 value;
+
+ err = bpf_map_lookup_elem(values_map_fd, &key, &value);
+ if (err != 0) {
+ fprintf(stderr, "Error: bpf_map_lookup_elem: %s\n", strerror(-err));
+ goto out_close_maps;
+ }
+ if (firstiter) {
+ prevcnt = value;
+ firstiter = false;
+ }
+ if (single) {
+ printf("Total SYNACKs generated: %llu\n", value);
+ break;
+ }
+ printf("SYNACKs generated: %llu (total %llu)\n", value - prevcnt, value);
+ prevcnt = value;
+ sleep(1);
+ }
+
+out_close_maps:
+ close(values_map_fd);
+ close(ports_map_fd);
+out:
+ return err == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/bpf/xdping.c b/tools/testing/selftests/bpf/xdping.c
index 842d9155d36c..1503a1d2faa0 100644
--- a/tools/testing/selftests/bpf/xdping.c
+++ b/tools/testing/selftests/bpf/xdping.c
@@ -12,7 +12,6 @@
#include <string.h>
#include <unistd.h>
#include <libgen.h>
-#include <sys/resource.h>
#include <net/if.h>
#include <sys/types.h>
#include <sys/socket.h>
@@ -22,13 +21,14 @@
#include "bpf/libbpf.h"
#include "xdping.h"
+#include "testing_helpers.h"
static int ifindex;
static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
static void cleanup(int sig)
{
- bpf_set_link_xdp_fd(ifindex, -1, xdp_flags);
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
if (sig)
exit(1);
}
@@ -88,7 +88,6 @@ int main(int argc, char **argv)
{
__u32 mode_flags = XDP_FLAGS_DRV_MODE | XDP_FLAGS_SKB_MODE;
struct addrinfo *a, hints = { .ai_family = AF_INET };
- struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
__u16 count = XDPING_DEFAULT_COUNT;
struct pinginfo pinginfo = { 0 };
const char *optstr = "c:I:NsS";
@@ -166,21 +165,18 @@ int main(int argc, char **argv)
freeaddrinfo(a);
}
- if (setrlimit(RLIMIT_MEMLOCK, &r)) {
- perror("setrlimit(RLIMIT_MEMLOCK)");
- return 1;
- }
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
- snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ snprintf(filename, sizeof(filename), "%s_kern.bpf.o", argv[0]);
- if (bpf_prog_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
+ if (bpf_prog_test_load(filename, BPF_PROG_TYPE_XDP, &obj, &prog_fd)) {
fprintf(stderr, "load of %s failed\n", filename);
return 1;
}
- main_prog = bpf_object__find_program_by_title(obj,
- server ? "xdpserver" :
- "xdpclient");
+ main_prog = bpf_object__find_program_by_name(obj,
+ server ? "xdping_server" : "xdping_client");
if (main_prog)
prog_fd = bpf_program__fd(main_prog);
if (!main_prog || prog_fd < 0) {
@@ -188,7 +184,7 @@ int main(int argc, char **argv)
return 1;
}
- map = bpf_map__next(NULL, obj);
+ map = bpf_object__next_map(obj, NULL);
if (map)
map_fd = bpf_map__fd(map);
if (!map || map_fd < 0) {
@@ -203,7 +199,7 @@ int main(int argc, char **argv)
printf("XDP setup disrupts network connectivity, hit Ctrl+C to quit\n");
- if (bpf_set_link_xdp_fd(ifindex, prog_fd, xdp_flags) < 0) {
+ if (bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL) < 0) {
fprintf(stderr, "Link set xdp fd failed for %s\n", ifname);
goto done;
}
diff --git a/tools/testing/selftests/bpf/xsk.c b/tools/testing/selftests/bpf/xsk.c
new file mode 100644
index 000000000000..25d568abf0f2
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk.c
@@ -0,0 +1,781 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright(c) 2018 - 2019 Intel Corporation.
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <asm/barrier.h>
+#include <linux/compiler.h>
+#include <linux/ethtool.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_link.h>
+#include <linux/if_packet.h>
+#include <linux/if_xdp.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sockios.h>
+#include <net/if.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include "xsk.h"
+#include "bpf_util.h"
+
+#ifndef SOL_XDP
+ #define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+ #define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+ #define PF_XDP AF_XDP
+#endif
+
+#define pr_warn(fmt, ...) fprintf(stderr, fmt, ##__VA_ARGS__)
+
+#define XSKMAP_SIZE 1
+
+struct xsk_umem {
+ struct xsk_ring_prod *fill_save;
+ struct xsk_ring_cons *comp_save;
+ char *umem_area;
+ struct xsk_umem_config config;
+ int fd;
+ int refcount;
+ struct list_head ctx_list;
+ bool rx_ring_setup_done;
+ bool tx_ring_setup_done;
+};
+
+struct xsk_ctx {
+ struct xsk_ring_prod *fill;
+ struct xsk_ring_cons *comp;
+ __u32 queue_id;
+ struct xsk_umem *umem;
+ int refcount;
+ int ifindex;
+ struct list_head list;
+};
+
+struct xsk_socket {
+ struct xsk_ring_cons *rx;
+ struct xsk_ring_prod *tx;
+ struct xsk_ctx *ctx;
+ struct xsk_socket_config config;
+ int fd;
+};
+
+struct nl_mtu_req {
+ struct nlmsghdr nh;
+ struct ifinfomsg msg;
+ char buf[512];
+};
+
+int xsk_umem__fd(const struct xsk_umem *umem)
+{
+ return umem ? umem->fd : -EINVAL;
+}
+
+int xsk_socket__fd(const struct xsk_socket *xsk)
+{
+ return xsk ? xsk->fd : -EINVAL;
+}
+
+static bool xsk_page_aligned(void *buffer)
+{
+ unsigned long addr = (unsigned long)buffer;
+
+ return !(addr & (getpagesize() - 1));
+}
+
+static void xsk_set_umem_config(struct xsk_umem_config *cfg,
+ const struct xsk_umem_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM;
+ cfg->flags = XSK_UMEM__DEFAULT_FLAGS;
+ cfg->tx_metadata_len = 0;
+ return;
+ }
+
+ cfg->fill_size = usr_cfg->fill_size;
+ cfg->comp_size = usr_cfg->comp_size;
+ cfg->frame_size = usr_cfg->frame_size;
+ cfg->frame_headroom = usr_cfg->frame_headroom;
+ cfg->flags = usr_cfg->flags;
+ cfg->tx_metadata_len = usr_cfg->tx_metadata_len;
+}
+
+static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,
+ const struct xsk_socket_config *usr_cfg)
+{
+ if (!usr_cfg) {
+ cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg->bind_flags = 0;
+ return 0;
+ }
+
+ cfg->rx_size = usr_cfg->rx_size;
+ cfg->tx_size = usr_cfg->tx_size;
+ cfg->bind_flags = usr_cfg->bind_flags;
+
+ return 0;
+}
+
+static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off)
+{
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(*off);
+ err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen);
+ if (err)
+ return err;
+
+ if (optlen == sizeof(*off))
+ return 0;
+
+ return -EINVAL;
+}
+
+static int xsk_create_umem_rings(struct xsk_umem *umem, int fd,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xdp_mmap_offsets off;
+ void *map;
+ int err;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_FILL_RING,
+ &umem->config.fill_size,
+ sizeof(umem->config.fill_size));
+ if (err)
+ return -errno;
+
+ err = setsockopt(fd, SOL_XDP, XDP_UMEM_COMPLETION_RING,
+ &umem->config.comp_size,
+ sizeof(umem->config.comp_size));
+ if (err)
+ return -errno;
+
+ err = xsk_get_mmap_offsets(fd, &off);
+ if (err)
+ return -errno;
+
+ map = mmap(NULL, off.fr.desc + umem->config.fill_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_FILL_RING);
+ if (map == MAP_FAILED)
+ return -errno;
+
+ fill->mask = umem->config.fill_size - 1;
+ fill->size = umem->config.fill_size;
+ fill->producer = map + off.fr.producer;
+ fill->consumer = map + off.fr.consumer;
+ fill->flags = map + off.fr.flags;
+ fill->ring = map + off.fr.desc;
+ fill->cached_cons = umem->config.fill_size;
+
+ map = mmap(NULL, off.cr.desc + umem->config.comp_size * sizeof(__u64),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd,
+ XDP_UMEM_PGOFF_COMPLETION_RING);
+ if (map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap;
+ }
+
+ comp->mask = umem->config.comp_size - 1;
+ comp->size = umem->config.comp_size;
+ comp->producer = map + off.cr.producer;
+ comp->consumer = map + off.cr.consumer;
+ comp->flags = map + off.cr.flags;
+ comp->ring = map + off.cr.desc;
+
+ return 0;
+
+out_mmap:
+ munmap(map, off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ return err;
+}
+
+int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area,
+ __u64 size, struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *usr_config)
+{
+ struct xdp_umem_reg mr;
+ struct xsk_umem *umem;
+ int err;
+
+ if (!umem_area || !umem_ptr || !fill || !comp)
+ return -EFAULT;
+ if (!size && !xsk_page_aligned(umem_area))
+ return -EINVAL;
+
+ umem = calloc(1, sizeof(*umem));
+ if (!umem)
+ return -ENOMEM;
+
+ umem->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+ if (umem->fd < 0) {
+ err = -errno;
+ goto out_umem_alloc;
+ }
+
+ umem->umem_area = umem_area;
+ INIT_LIST_HEAD(&umem->ctx_list);
+ xsk_set_umem_config(&umem->config, usr_config);
+
+ memset(&mr, 0, sizeof(mr));
+ mr.addr = (uintptr_t)umem_area;
+ mr.len = size;
+ mr.chunk_size = umem->config.frame_size;
+ mr.headroom = umem->config.frame_headroom;
+ mr.flags = umem->config.flags;
+ mr.tx_metadata_len = umem->config.tx_metadata_len;
+
+ err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr));
+ if (err) {
+ err = -errno;
+ goto out_socket;
+ }
+
+ err = xsk_create_umem_rings(umem, umem->fd, fill, comp);
+ if (err)
+ goto out_socket;
+
+ umem->fill_save = fill;
+ umem->comp_save = comp;
+ *umem_ptr = umem;
+ return 0;
+
+out_socket:
+ close(umem->fd);
+out_umem_alloc:
+ free(umem);
+ return err;
+}
+
+bool xsk_is_in_mode(u32 ifindex, int mode)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ int ret;
+
+ ret = bpf_xdp_query(ifindex, mode, &opts);
+ if (ret) {
+ printf("XDP mode query returned error %s\n", strerror(errno));
+ return false;
+ }
+
+ if (mode == XDP_FLAGS_DRV_MODE)
+ return opts.attach_mode == XDP_ATTACHED_DRV;
+ else if (mode == XDP_FLAGS_SKB_MODE)
+ return opts.attach_mode == XDP_ATTACHED_SKB;
+
+ return false;
+}
+
+/* Lifted from netlink.c in tools/lib/bpf */
+static int netlink_recvmsg(int sock, struct msghdr *mhdr, int flags)
+{
+ int len;
+
+ do {
+ len = recvmsg(sock, mhdr, flags);
+ } while (len < 0 && (errno == EINTR || errno == EAGAIN));
+
+ if (len < 0)
+ return -errno;
+ return len;
+}
+
+/* Lifted from netlink.c in tools/lib/bpf */
+static int alloc_iov(struct iovec *iov, int len)
+{
+ void *nbuf;
+
+ nbuf = realloc(iov->iov_base, len);
+ if (!nbuf)
+ return -ENOMEM;
+
+ iov->iov_base = nbuf;
+ iov->iov_len = len;
+ return 0;
+}
+
+/* Original version lifted from netlink.c in tools/lib/bpf */
+static int netlink_recv(int sock)
+{
+ struct iovec iov = {};
+ struct msghdr mhdr = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+ bool multipart = true;
+ struct nlmsgerr *err;
+ struct nlmsghdr *nh;
+ int len, ret;
+
+ ret = alloc_iov(&iov, 4096);
+ if (ret)
+ goto done;
+
+ while (multipart) {
+ multipart = false;
+ len = netlink_recvmsg(sock, &mhdr, MSG_PEEK | MSG_TRUNC);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len > iov.iov_len) {
+ ret = alloc_iov(&iov, len);
+ if (ret)
+ goto done;
+ }
+
+ len = netlink_recvmsg(sock, &mhdr, 0);
+ if (len < 0) {
+ ret = len;
+ goto done;
+ }
+
+ if (len == 0)
+ break;
+
+ for (nh = (struct nlmsghdr *)iov.iov_base; NLMSG_OK(nh, len);
+ nh = NLMSG_NEXT(nh, len)) {
+ if (nh->nlmsg_flags & NLM_F_MULTI)
+ multipart = true;
+ switch (nh->nlmsg_type) {
+ case NLMSG_ERROR:
+ err = (struct nlmsgerr *)NLMSG_DATA(nh);
+ if (!err->error)
+ continue;
+ ret = err->error;
+ goto done;
+ case NLMSG_DONE:
+ ret = 0;
+ goto done;
+ default:
+ break;
+ }
+ }
+ }
+ ret = 0;
+done:
+ free(iov.iov_base);
+ return ret;
+}
+
+int xsk_set_mtu(int ifindex, int mtu)
+{
+ struct nl_mtu_req req;
+ struct rtattr *rta;
+ int fd, ret;
+
+ fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (fd < 0)
+ return fd;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.msg.ifi_family = AF_UNSPEC;
+ req.msg.ifi_index = ifindex;
+ rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+ rta->rta_type = IFLA_MTU;
+ rta->rta_len = RTA_LENGTH(sizeof(unsigned int));
+ req.nh.nlmsg_len = NLMSG_ALIGN(req.nh.nlmsg_len) + RTA_LENGTH(sizeof(mtu));
+ memcpy(RTA_DATA(rta), &mtu, sizeof(mtu));
+
+ ret = send(fd, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0) {
+ close(fd);
+ return errno;
+ }
+
+ ret = netlink_recv(fd);
+ close(fd);
+ return ret;
+}
+
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags)
+{
+ int prog_fd;
+
+ prog_fd = bpf_program__fd(prog);
+ return bpf_xdp_attach(ifindex, prog_fd, xdp_flags, NULL);
+}
+
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags)
+{
+ bpf_xdp_detach(ifindex, xdp_flags, NULL);
+}
+
+void xsk_clear_xskmap(struct bpf_map *map)
+{
+ u32 index = 0;
+ int map_fd;
+
+ map_fd = bpf_map__fd(map);
+ bpf_map_delete_elem(map_fd, &index);
+}
+
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index)
+{
+ int map_fd, sock_fd;
+
+ map_fd = bpf_map__fd(map);
+ sock_fd = xsk_socket__fd(xsk);
+
+ return bpf_map_update_elem(map_fd, &index, &sock_fd, 0);
+}
+
+static struct xsk_ctx *xsk_get_ctx(struct xsk_umem *umem, int ifindex,
+ __u32 queue_id)
+{
+ struct xsk_ctx *ctx;
+
+ if (list_empty(&umem->ctx_list))
+ return NULL;
+
+ list_for_each_entry(ctx, &umem->ctx_list, list) {
+ if (ctx->ifindex == ifindex && ctx->queue_id == queue_id) {
+ ctx->refcount++;
+ return ctx;
+ }
+ }
+
+ return NULL;
+}
+
+static void xsk_put_ctx(struct xsk_ctx *ctx, bool unmap)
+{
+ struct xsk_umem *umem = ctx->umem;
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (--ctx->refcount)
+ return;
+
+ if (!unmap)
+ goto out_free;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (err)
+ goto out_free;
+
+ munmap(ctx->fill->ring - off.fr.desc, off.fr.desc + umem->config.fill_size *
+ sizeof(__u64));
+ munmap(ctx->comp->ring - off.cr.desc, off.cr.desc + umem->config.comp_size *
+ sizeof(__u64));
+
+out_free:
+ list_del(&ctx->list);
+ free(ctx);
+}
+
+static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,
+ struct xsk_umem *umem, int ifindex,
+ __u32 queue_id,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp)
+{
+ struct xsk_ctx *ctx;
+ int err;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ if (!umem->fill_save) {
+ err = xsk_create_umem_rings(umem, xsk->fd, fill, comp);
+ if (err) {
+ free(ctx);
+ return NULL;
+ }
+ } else if (umem->fill_save != fill || umem->comp_save != comp) {
+ /* Copy over rings to new structs. */
+ memcpy(fill, umem->fill_save, sizeof(*fill));
+ memcpy(comp, umem->comp_save, sizeof(*comp));
+ }
+
+ ctx->ifindex = ifindex;
+ ctx->refcount = 1;
+ ctx->umem = umem;
+ ctx->queue_id = queue_id;
+
+ ctx->fill = fill;
+ ctx->comp = comp;
+ list_add(&ctx->list, &umem->ctx_list);
+ return ctx;
+}
+
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *usr_config)
+{
+ bool unmap, rx_setup_done = false, tx_setup_done = false;
+ void *rx_map = NULL, *tx_map = NULL;
+ struct sockaddr_xdp sxdp = {};
+ struct xdp_mmap_offsets off;
+ struct xsk_socket *xsk;
+ struct xsk_ctx *ctx;
+ int err;
+
+ if (!umem || !xsk_ptr || !(rx || tx))
+ return -EFAULT;
+
+ unmap = umem->fill_save != fill;
+
+ xsk = calloc(1, sizeof(*xsk));
+ if (!xsk)
+ return -ENOMEM;
+
+ err = xsk_set_xdp_socket_config(&xsk->config, usr_config);
+ if (err)
+ goto out_xsk_alloc;
+
+ if (umem->refcount++ > 0) {
+ xsk->fd = socket(AF_XDP, SOCK_RAW | SOCK_CLOEXEC, 0);
+ if (xsk->fd < 0) {
+ err = -errno;
+ goto out_xsk_alloc;
+ }
+ } else {
+ xsk->fd = umem->fd;
+ rx_setup_done = umem->rx_ring_setup_done;
+ tx_setup_done = umem->tx_ring_setup_done;
+ }
+
+ ctx = xsk_get_ctx(umem, ifindex, queue_id);
+ if (!ctx) {
+ if (!fill || !comp) {
+ err = -EFAULT;
+ goto out_socket;
+ }
+
+ ctx = xsk_create_ctx(xsk, umem, ifindex, queue_id, fill, comp);
+ if (!ctx) {
+ err = -ENOMEM;
+ goto out_socket;
+ }
+ }
+ xsk->ctx = ctx;
+
+ if (rx && !rx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,
+ &xsk->config.rx_size,
+ sizeof(xsk->config.rx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->rx_ring_setup_done = true;
+ }
+ if (tx && !tx_setup_done) {
+ err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING,
+ &xsk->config.tx_size,
+ sizeof(xsk->config.tx_size));
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+ if (xsk->fd == umem->fd)
+ umem->tx_ring_setup_done = true;
+ }
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (err) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ if (rx) {
+ rx_map = mmap(NULL, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_RX_RING);
+ if (rx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_put_ctx;
+ }
+
+ rx->mask = xsk->config.rx_size - 1;
+ rx->size = xsk->config.rx_size;
+ rx->producer = rx_map + off.rx.producer;
+ rx->consumer = rx_map + off.rx.consumer;
+ rx->flags = rx_map + off.rx.flags;
+ rx->ring = rx_map + off.rx.desc;
+ rx->cached_prod = *rx->producer;
+ rx->cached_cons = *rx->consumer;
+ }
+ xsk->rx = rx;
+
+ if (tx) {
+ tx_map = mmap(NULL, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ xsk->fd, XDP_PGOFF_TX_RING);
+ if (tx_map == MAP_FAILED) {
+ err = -errno;
+ goto out_mmap_rx;
+ }
+
+ tx->mask = xsk->config.tx_size - 1;
+ tx->size = xsk->config.tx_size;
+ tx->producer = tx_map + off.tx.producer;
+ tx->consumer = tx_map + off.tx.consumer;
+ tx->flags = tx_map + off.tx.flags;
+ tx->ring = tx_map + off.tx.desc;
+ tx->cached_prod = *tx->producer;
+ /* cached_cons is r->size bigger than the real consumer pointer
+ * See xsk_prod_nb_free
+ */
+ tx->cached_cons = *tx->consumer + xsk->config.tx_size;
+ }
+ xsk->tx = tx;
+
+ sxdp.sxdp_family = PF_XDP;
+ sxdp.sxdp_ifindex = ctx->ifindex;
+ sxdp.sxdp_queue_id = ctx->queue_id;
+ if (umem->refcount > 1) {
+ sxdp.sxdp_flags |= XDP_SHARED_UMEM;
+ sxdp.sxdp_shared_umem_fd = umem->fd;
+ } else {
+ sxdp.sxdp_flags = xsk->config.bind_flags;
+ }
+
+ err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));
+ if (err) {
+ err = -errno;
+ goto out_mmap_tx;
+ }
+
+ *xsk_ptr = xsk;
+ umem->fill_save = NULL;
+ umem->comp_save = NULL;
+ return 0;
+
+out_mmap_tx:
+ if (tx)
+ munmap(tx_map, off.tx.desc +
+ xsk->config.tx_size * sizeof(struct xdp_desc));
+out_mmap_rx:
+ if (rx)
+ munmap(rx_map, off.rx.desc +
+ xsk->config.rx_size * sizeof(struct xdp_desc));
+out_put_ctx:
+ xsk_put_ctx(ctx, unmap);
+out_socket:
+ if (--umem->refcount)
+ close(xsk->fd);
+out_xsk_alloc:
+ free(xsk);
+ return err;
+}
+
+int xsk_socket__create(struct xsk_socket **xsk_ptr, int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx, struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *usr_config)
+{
+ if (!umem)
+ return -EFAULT;
+
+ return xsk_socket__create_shared(xsk_ptr, ifindex, queue_id, umem,
+ rx, tx, umem->fill_save,
+ umem->comp_save, usr_config);
+}
+
+int xsk_umem__delete(struct xsk_umem *umem)
+{
+ struct xdp_mmap_offsets off;
+ int err;
+
+ if (!umem)
+ return 0;
+
+ if (umem->refcount)
+ return -EBUSY;
+
+ err = xsk_get_mmap_offsets(umem->fd, &off);
+ if (!err && umem->fill_save && umem->comp_save) {
+ munmap(umem->fill_save->ring - off.fr.desc,
+ off.fr.desc + umem->config.fill_size * sizeof(__u64));
+ munmap(umem->comp_save->ring - off.cr.desc,
+ off.cr.desc + umem->config.comp_size * sizeof(__u64));
+ }
+
+ close(umem->fd);
+ free(umem);
+
+ return 0;
+}
+
+void xsk_socket__delete(struct xsk_socket *xsk)
+{
+ size_t desc_sz = sizeof(struct xdp_desc);
+ struct xdp_mmap_offsets off;
+ struct xsk_umem *umem;
+ struct xsk_ctx *ctx;
+ int err;
+
+ if (!xsk)
+ return;
+
+ ctx = xsk->ctx;
+ umem = ctx->umem;
+
+ xsk_put_ctx(ctx, true);
+
+ err = xsk_get_mmap_offsets(xsk->fd, &off);
+ if (!err) {
+ if (xsk->rx) {
+ munmap(xsk->rx->ring - off.rx.desc,
+ off.rx.desc + xsk->config.rx_size * desc_sz);
+ }
+ if (xsk->tx) {
+ munmap(xsk->tx->ring - off.tx.desc,
+ off.tx.desc + xsk->config.tx_size * desc_sz);
+ }
+ }
+
+ umem->refcount--;
+ /* Do not close an fd that also has an associated umem connected
+ * to it.
+ */
+ if (xsk->fd != umem->fd)
+ close(xsk->fd);
+ free(xsk);
+}
diff --git a/tools/testing/selftests/bpf/xsk.h b/tools/testing/selftests/bpf/xsk.h
new file mode 100644
index 000000000000..93c2cc413cfc
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk.h
@@ -0,0 +1,249 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+
+/*
+ * AF_XDP user-space access library.
+ *
+ * Copyright (c) 2018 - 2019 Intel Corporation.
+ * Copyright (c) 2019 Facebook
+ *
+ * Author(s): Magnus Karlsson <magnus.karlsson@intel.com>
+ */
+
+#ifndef __XSK_H
+#define __XSK_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <linux/if_xdp.h>
+
+#include <bpf/libbpf.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Do not access these members directly. Use the functions below. */
+#define DEFINE_XSK_RING(name) \
+struct name { \
+ __u32 cached_prod; \
+ __u32 cached_cons; \
+ __u32 mask; \
+ __u32 size; \
+ __u32 *producer; \
+ __u32 *consumer; \
+ void *ring; \
+ __u32 *flags; \
+}
+
+DEFINE_XSK_RING(xsk_ring_prod);
+DEFINE_XSK_RING(xsk_ring_cons);
+
+/* For a detailed explanation on the memory barriers associated with the
+ * ring, please take a look at net/xdp/xsk_queue.h.
+ */
+
+struct xsk_umem;
+struct xsk_socket;
+
+static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill,
+ __u32 idx)
+{
+ __u64 *addrs = (__u64 *)fill->ring;
+
+ return &addrs[idx & fill->mask];
+}
+
+static inline const __u64 *
+xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx)
+{
+ const __u64 *addrs = (const __u64 *)comp->ring;
+
+ return &addrs[idx & comp->mask];
+}
+
+static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx,
+ __u32 idx)
+{
+ struct xdp_desc *descs = (struct xdp_desc *)tx->ring;
+
+ return &descs[idx & tx->mask];
+}
+
+static inline const struct xdp_desc *
+xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx)
+{
+ const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring;
+
+ return &descs[idx & rx->mask];
+}
+
+static inline int xsk_ring_prod__needs_wakeup(const struct xsk_ring_prod *r)
+{
+ return *r->flags & XDP_RING_NEED_WAKEUP;
+}
+
+static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb)
+{
+ __u32 free_entries = r->cached_cons - r->cached_prod;
+
+ if (free_entries >= nb)
+ return free_entries;
+
+ /* Refresh the local tail pointer.
+ * cached_cons is r->size bigger than the real consumer pointer so
+ * that this addition can be avoided in the more frequently
+ * executed code that computs free_entries in the beginning of
+ * this function. Without this optimization it whould have been
+ * free_entries = r->cached_prod - r->cached_cons + r->size.
+ */
+ r->cached_cons = __atomic_load_n(r->consumer, __ATOMIC_ACQUIRE);
+ r->cached_cons += r->size;
+
+ return r->cached_cons - r->cached_prod;
+}
+
+static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb)
+{
+ __u32 entries = r->cached_prod - r->cached_cons;
+
+ if (entries == 0) {
+ r->cached_prod = __atomic_load_n(r->producer, __ATOMIC_ACQUIRE);
+ entries = r->cached_prod - r->cached_cons;
+ }
+
+ return (entries > nb) ? nb : entries;
+}
+
+static inline __u32 xsk_ring_prod__reserve(struct xsk_ring_prod *prod, __u32 nb, __u32 *idx)
+{
+ if (xsk_prod_nb_free(prod, nb) < nb)
+ return 0;
+
+ *idx = prod->cached_prod;
+ prod->cached_prod += nb;
+
+ return nb;
+}
+
+static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, __u32 nb)
+{
+ /* Make sure everything has been written to the ring before indicating
+ * this to the kernel by writing the producer pointer.
+ */
+ __atomic_store_n(prod->producer, *prod->producer + nb, __ATOMIC_RELEASE);
+}
+
+static inline void xsk_ring_prod__cancel(struct xsk_ring_prod *prod, __u32 nb)
+{
+ prod->cached_prod -= nb;
+}
+
+static inline __u32 xsk_ring_cons__peek(struct xsk_ring_cons *cons, __u32 nb, __u32 *idx)
+{
+ __u32 entries = xsk_cons_nb_avail(cons, nb);
+
+ if (entries > 0) {
+ *idx = cons->cached_cons;
+ cons->cached_cons += entries;
+ }
+
+ return entries;
+}
+
+static inline void xsk_ring_cons__cancel(struct xsk_ring_cons *cons, __u32 nb)
+{
+ cons->cached_cons -= nb;
+}
+
+static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, __u32 nb)
+{
+ /* Make sure data has been read before indicating we are done
+ * with the entries by updating the consumer pointer.
+ */
+ __atomic_store_n(cons->consumer, *cons->consumer + nb, __ATOMIC_RELEASE);
+}
+
+static inline void *xsk_umem__get_data(void *umem_area, __u64 addr)
+{
+ return &((char *)umem_area)[addr];
+}
+
+static inline __u64 xsk_umem__extract_addr(__u64 addr)
+{
+ return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline __u64 xsk_umem__extract_offset(__u64 addr)
+{
+ return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline __u64 xsk_umem__add_offset_to_addr(__u64 addr)
+{
+ return xsk_umem__extract_addr(addr) + xsk_umem__extract_offset(addr);
+}
+
+int xsk_umem__fd(const struct xsk_umem *umem);
+int xsk_socket__fd(const struct xsk_socket *xsk);
+
+#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048
+#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048
+#define XSK_UMEM__DEFAULT_FRAME_SHIFT 12 /* 4096 bytes */
+#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT)
+#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0
+#define XSK_UMEM__DEFAULT_FLAGS 0
+
+struct xsk_umem_config {
+ __u32 fill_size;
+ __u32 comp_size;
+ __u32 frame_size;
+ __u32 frame_headroom;
+ __u32 flags;
+ __u32 tx_metadata_len;
+};
+
+int xsk_attach_xdp_program(struct bpf_program *prog, int ifindex, u32 xdp_flags);
+void xsk_detach_xdp_program(int ifindex, u32 xdp_flags);
+int xsk_update_xskmap(struct bpf_map *map, struct xsk_socket *xsk, u32 index);
+void xsk_clear_xskmap(struct bpf_map *map);
+bool xsk_is_in_mode(u32 ifindex, int mode);
+
+struct xsk_socket_config {
+ __u32 rx_size;
+ __u32 tx_size;
+ __u16 bind_flags;
+};
+
+/* Set config to NULL to get the default configuration. */
+int xsk_umem__create(struct xsk_umem **umem,
+ void *umem_area, __u64 size,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_umem_config *config);
+int xsk_socket__create(struct xsk_socket **xsk,
+ int ifindex, __u32 queue_id,
+ struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ const struct xsk_socket_config *config);
+int xsk_socket__create_shared(struct xsk_socket **xsk_ptr,
+ int ifindex,
+ __u32 queue_id, struct xsk_umem *umem,
+ struct xsk_ring_cons *rx,
+ struct xsk_ring_prod *tx,
+ struct xsk_ring_prod *fill,
+ struct xsk_ring_cons *comp,
+ const struct xsk_socket_config *config);
+
+/* Returns 0 for success and -EBUSY if the umem is still in use. */
+int xsk_umem__delete(struct xsk_umem *umem);
+void xsk_socket__delete(struct xsk_socket *xsk);
+
+int xsk_set_mtu(int ifindex, int mtu);
+
+#ifdef __cplusplus
+} /* extern "C" */
+#endif
+
+#endif /* __XSK_H */
diff --git a/tools/testing/selftests/bpf/xsk_prereqs.sh b/tools/testing/selftests/bpf/xsk_prereqs.sh
new file mode 100755
index 000000000000..47c7b8064f38
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_prereqs.sh
@@ -0,0 +1,93 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright(c) 2020 Intel Corporation.
+
+ksft_pass=0
+ksft_fail=1
+ksft_xfail=2
+ksft_xpass=3
+ksft_skip=4
+
+XSKOBJ=xskxceiver
+
+validate_root_exec()
+{
+ msg="skip all tests:"
+ if [ $UID != 0 ]; then
+ echo $msg must be run as root >&2
+ test_exit $ksft_fail
+ else
+ return $ksft_pass
+ fi
+}
+
+validate_veth_support()
+{
+ msg="skip all tests:"
+ if [ $(ip link add $1 type veth 2>/dev/null; echo $?;) != 0 ]; then
+ echo $msg veth kernel support not available >&2
+ test_exit $ksft_skip
+ else
+ ip link del $1
+ return $ksft_pass
+ fi
+}
+
+test_status()
+{
+ statusval=$1
+ if [ $statusval -eq $ksft_fail ]; then
+ echo "$2: [ FAIL ]"
+ elif [ $statusval -eq $ksft_skip ]; then
+ echo "$2: [ SKIPPED ]"
+ elif [ $statusval -eq $ksft_pass ]; then
+ echo "$2: [ PASS ]"
+ fi
+}
+
+test_exit()
+{
+ if [ $1 -ne 0 ]; then
+ test_status $1 $(basename $0)
+ fi
+ exit 1
+}
+
+cleanup_iface()
+{
+ ip link set $1 mtu $2
+ ip link set $1 xdp off
+ ip link set $1 xdpgeneric off
+}
+
+clear_configs()
+{
+ [ $(ip link show $1 &>/dev/null; echo $?;) == 0 ] &&
+ { ip link del $1; }
+}
+
+cleanup_exit()
+{
+ clear_configs $1 $2
+}
+
+validate_ip_utility()
+{
+ [ ! $(type -P ip) ] && { echo "'ip' not found. Skipping tests."; test_exit $ksft_skip; }
+}
+
+exec_xskxceiver()
+{
+ if [[ $busy_poll -eq 1 ]]; then
+ ARGS+="-b "
+ fi
+
+ ./${XSKOBJ} -i ${VETH0} -i ${VETH1} ${ARGS}
+ retval=$?
+
+ if [[ $list -ne 1 ]]; then
+ test_status $retval "${TEST_NAME}"
+ statusList+=($retval)
+ nameList+=(${TEST_NAME})
+ fi
+}
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
new file mode 100644
index 000000000000..5a6f36f07383
--- /dev/null
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef XSK_XDP_COMMON_H_
+#define XSK_XDP_COMMON_H_
+
+#define MAX_SOCKETS 2
+
+struct xdp_info {
+ __u64 count;
+} __attribute__((aligned(32)));
+
+#endif /* XSK_XDP_COMMON_H_ */
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
new file mode 100644
index 000000000000..b1102ee13faa
--- /dev/null
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -0,0 +1,2595 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2020 Intel Corporation. */
+
+/*
+ * Some functions in this program are taken from
+ * Linux kernel samples/bpf/xdpsock* and modified
+ * for use.
+ *
+ * See test_xsk.sh for detailed information on test topology
+ * and prerequisite network setup.
+ *
+ * This test program contains two threads, each thread is single socket with
+ * a unique UMEM. It validates in-order packet delivery and packet content
+ * by sending packets to each other.
+ *
+ * Tests Information:
+ * ------------------
+ * These selftests test AF_XDP SKB and Native/DRV modes using veth
+ * Virtual Ethernet interfaces.
+ *
+ * For each mode, the following tests are run:
+ * a. nopoll - soft-irq processing in run-to-completion mode
+ * b. poll - using poll() syscall
+ * c. Socket Teardown
+ * Create a Tx and a Rx socket, Tx from one socket, Rx on another. Destroy
+ * both sockets, then repeat multiple times. Only nopoll mode is used
+ * d. Bi-directional sockets
+ * Configure sockets as bi-directional tx/rx sockets, sets up fill and
+ * completion rings on each socket, tx/rx in both directions. Only nopoll
+ * mode is used
+ * e. Statistics
+ * Trigger some error conditions and ensure that the appropriate statistics
+ * are incremented. Within this test, the following statistics are tested:
+ * i. rx dropped
+ * Increase the UMEM frame headroom to a value which results in
+ * insufficient space in the rx buffer for both the packet and the headroom.
+ * ii. tx invalid
+ * Set the 'len' field of tx descriptors to an invalid value (umem frame
+ * size + 1).
+ * iii. rx ring full
+ * Reduce the size of the RX ring to a fraction of the fill ring size.
+ * iv. fill queue empty
+ * Do not populate the fill queue and then try to receive pkts.
+ * f. bpf_link resource persistence
+ * Configure sockets at indexes 0 and 1, run a traffic on queue ids 0,
+ * then remove xsk sockets from queue 0 on both veth interfaces and
+ * finally run a traffic on queues ids 1
+ * g. unaligned mode
+ * h. tests for invalid and corner case Tx descriptors so that the correct ones
+ * are discarded and let through, respectively.
+ * i. 2K frame size tests
+ * j. If multi-buffer is supported, send 9k packets divided into 3 frames
+ * k. If multi-buffer and huge pages are supported, send 9k packets in a single frame
+ * using unaligned mode
+ * l. If multi-buffer is supported, try various nasty combinations of descriptors to
+ * check if they pass the validation or not
+ *
+ * Flow:
+ * -----
+ * - Single process spawns two threads: Tx and Rx
+ * - Each of these two threads attach to a veth interface
+ * - Each thread creates one AF_XDP socket connected to a unique umem for each
+ * veth interface
+ * - Tx thread Transmits a number of packets from veth<xxxx> to veth<yyyy>
+ * - Rx thread verifies if all packets were received and delivered in-order,
+ * and have the right content
+ *
+ * Enable/disable packet dump mode:
+ * --------------------------
+ * To enable L2 - L4 headers and payload dump of each packet on STDOUT, add
+ * parameter -D to params array in test_xsk.sh, i.e. params=("-S" "-D")
+ */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <getopt.h>
+#include <linux/if_link.h>
+#include <linux/if_ether.h>
+#include <linux/mman.h>
+#include <linux/netdev.h>
+#include <linux/bitmap.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <locale.h>
+#include <poll.h>
+#include <pthread.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stddef.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "xsk_xdp_progs.skel.h"
+#include "xsk.h"
+#include "xskxceiver.h"
+#include <bpf/bpf.h>
+#include <linux/filter.h>
+#include "../kselftest.h"
+#include "xsk_xdp_common.h"
+
+static bool opt_verbose;
+static bool opt_print_tests;
+static enum test_mode opt_mode = TEST_MODE_ALL;
+static u32 opt_run_test = RUN_ALL_TESTS;
+
+static void __exit_with_error(int error, const char *file, const char *func, int line)
+{
+ ksft_test_result_fail("[%s:%s:%i]: ERROR: %d/\"%s\"\n", file, func, line, error,
+ strerror(error));
+ ksft_exit_xfail();
+}
+
+#define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, __LINE__)
+#define busy_poll_string(test) (test)->ifobj_tx->busy_poll ? "BUSY-POLL " : ""
+static char *mode_string(struct test_spec *test)
+{
+ switch (test->mode) {
+ case TEST_MODE_SKB:
+ return "SKB";
+ case TEST_MODE_DRV:
+ return "DRV";
+ case TEST_MODE_ZC:
+ return "ZC";
+ default:
+ return "BOGUS";
+ }
+}
+
+static void report_failure(struct test_spec *test)
+{
+ if (test->fail)
+ return;
+
+ ksft_test_result_fail("FAIL: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ test->fail = true;
+}
+
+/* The payload is a word consisting of a packet sequence number in the upper
+ * 16-bits and a intra packet data sequence number in the lower 16 bits. So the 3rd packet's
+ * 5th word of data will contain the number (2<<16) | 4 as they are numbered from 0.
+ */
+static void write_payload(void *dest, u32 pkt_nb, u32 start, u32 size)
+{
+ u32 *ptr = (u32 *)dest, i;
+
+ start /= sizeof(*ptr);
+ size /= sizeof(*ptr);
+ for (i = 0; i < size; i++)
+ ptr[i] = htonl(pkt_nb << 16 | (i + start));
+}
+
+static void gen_eth_hdr(struct xsk_socket_info *xsk, struct ethhdr *eth_hdr)
+{
+ memcpy(eth_hdr->h_dest, xsk->dst_mac, ETH_ALEN);
+ memcpy(eth_hdr->h_source, xsk->src_mac, ETH_ALEN);
+ eth_hdr->h_proto = htons(ETH_P_LOOPBACK);
+}
+
+static bool is_umem_valid(struct ifobject *ifobj)
+{
+ return !!ifobj->umem->umem;
+}
+
+static u32 mode_to_xdp_flags(enum test_mode mode)
+{
+ return (mode == TEST_MODE_SKB) ? XDP_FLAGS_SKB_MODE : XDP_FLAGS_DRV_MODE;
+}
+
+static u64 umem_size(struct xsk_umem_info *umem)
+{
+ return umem->num_frames * umem->frame_size;
+}
+
+static int xsk_configure_umem(struct ifobject *ifobj, struct xsk_umem_info *umem, void *buffer,
+ u64 size)
+{
+ struct xsk_umem_config cfg = {
+ .fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
+ .comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
+ .frame_size = umem->frame_size,
+ .frame_headroom = umem->frame_headroom,
+ .flags = XSK_UMEM__DEFAULT_FLAGS
+ };
+ int ret;
+
+ if (umem->unaligned_mode)
+ cfg.flags |= XDP_UMEM_UNALIGNED_CHUNK_FLAG;
+
+ ret = xsk_umem__create(&umem->umem, buffer, size,
+ &umem->fq, &umem->cq, &cfg);
+ if (ret)
+ return ret;
+
+ umem->buffer = buffer;
+ if (ifobj->shared_umem && ifobj->rx_on) {
+ umem->base_addr = umem_size(umem);
+ umem->next_buffer = umem_size(umem);
+ }
+
+ return 0;
+}
+
+static u64 umem_alloc_buffer(struct xsk_umem_info *umem)
+{
+ u64 addr;
+
+ addr = umem->next_buffer;
+ umem->next_buffer += umem->frame_size;
+ if (umem->next_buffer >= umem->base_addr + umem_size(umem))
+ umem->next_buffer = umem->base_addr;
+
+ return addr;
+}
+
+static void umem_reset_alloc(struct xsk_umem_info *umem)
+{
+ umem->next_buffer = 0;
+}
+
+static void enable_busy_poll(struct xsk_socket_info *xsk)
+{
+ int sock_opt;
+
+ sock_opt = 1;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_PREFER_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+
+ sock_opt = 20;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+
+ sock_opt = BATCH_SIZE;
+ if (setsockopt(xsk_socket__fd(xsk->xsk), SOL_SOCKET, SO_BUSY_POLL_BUDGET,
+ (void *)&sock_opt, sizeof(sock_opt)) < 0)
+ exit_with_error(errno);
+}
+
+static int __xsk_configure_socket(struct xsk_socket_info *xsk, struct xsk_umem_info *umem,
+ struct ifobject *ifobject, bool shared)
+{
+ struct xsk_socket_config cfg = {};
+ struct xsk_ring_cons *rxr;
+ struct xsk_ring_prod *txr;
+
+ xsk->umem = umem;
+ cfg.rx_size = xsk->rxqsize;
+ cfg.tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+ cfg.bind_flags = ifobject->bind_flags;
+ if (shared)
+ cfg.bind_flags |= XDP_SHARED_UMEM;
+ if (ifobject->mtu > MAX_ETH_PKT_SIZE)
+ cfg.bind_flags |= XDP_USE_SG;
+
+ txr = ifobject->tx_on ? &xsk->tx : NULL;
+ rxr = ifobject->rx_on ? &xsk->rx : NULL;
+ return xsk_socket__create(&xsk->xsk, ifobject->ifindex, 0, umem->umem, rxr, txr, &cfg);
+}
+
+static bool ifobj_zc_avail(struct ifobject *ifobject)
+{
+ size_t umem_sz = DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ struct xsk_socket_info *xsk;
+ struct xsk_umem_info *umem;
+ bool zc_avail = false;
+ void *bufs;
+ int ret;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ umem = calloc(1, sizeof(struct xsk_umem_info));
+ if (!umem) {
+ munmap(bufs, umem_sz);
+ exit_with_error(ENOMEM);
+ }
+ umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+ ret = xsk_configure_umem(ifobject, umem, bufs, umem_sz);
+ if (ret)
+ exit_with_error(-ret);
+
+ xsk = calloc(1, sizeof(struct xsk_socket_info));
+ if (!xsk)
+ goto out;
+ ifobject->bind_flags = XDP_USE_NEED_WAKEUP | XDP_ZEROCOPY;
+ ifobject->rx_on = true;
+ xsk->rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ ret = __xsk_configure_socket(xsk, umem, ifobject, false);
+ if (!ret)
+ zc_avail = true;
+
+ xsk_socket__delete(xsk->xsk);
+ free(xsk);
+out:
+ munmap(umem->buffer, umem_sz);
+ xsk_umem__delete(umem->umem);
+ free(umem);
+ return zc_avail;
+}
+
+static struct option long_options[] = {
+ {"interface", required_argument, 0, 'i'},
+ {"busy-poll", no_argument, 0, 'b'},
+ {"verbose", no_argument, 0, 'v'},
+ {"mode", required_argument, 0, 'm'},
+ {"list", no_argument, 0, 'l'},
+ {"test", required_argument, 0, 't'},
+ {"help", no_argument, 0, 'h'},
+ {0, 0, 0, 0}
+};
+
+static void print_usage(char **argv)
+{
+ const char *str =
+ " Usage: xskxceiver [OPTIONS]\n"
+ " Options:\n"
+ " -i, --interface Use interface\n"
+ " -v, --verbose Verbose output\n"
+ " -b, --busy-poll Enable busy poll\n"
+ " -m, --mode Run only mode skb, drv, or zc\n"
+ " -l, --list List all available tests\n"
+ " -t, --test Run a specific test. Enter number from -l option.\n"
+ " -h, --help Display this help and exit\n";
+
+ ksft_print_msg(str, basename(argv[0]));
+ ksft_exit_xfail();
+}
+
+static bool validate_interface(struct ifobject *ifobj)
+{
+ if (!strcmp(ifobj->ifname, ""))
+ return false;
+ return true;
+}
+
+static void parse_command_line(struct ifobject *ifobj_tx, struct ifobject *ifobj_rx, int argc,
+ char **argv)
+{
+ struct ifobject *ifobj;
+ u32 interface_nb = 0;
+ int option_index, c;
+
+ opterr = 0;
+
+ for (;;) {
+ c = getopt_long(argc, argv, "i:vbm:lt:", long_options, &option_index);
+ if (c == -1)
+ break;
+
+ switch (c) {
+ case 'i':
+ if (interface_nb == 0)
+ ifobj = ifobj_tx;
+ else if (interface_nb == 1)
+ ifobj = ifobj_rx;
+ else
+ break;
+
+ memcpy(ifobj->ifname, optarg,
+ min_t(size_t, MAX_INTERFACE_NAME_CHARS, strlen(optarg)));
+
+ ifobj->ifindex = if_nametoindex(ifobj->ifname);
+ if (!ifobj->ifindex)
+ exit_with_error(errno);
+
+ interface_nb++;
+ break;
+ case 'v':
+ opt_verbose = true;
+ break;
+ case 'b':
+ ifobj_tx->busy_poll = true;
+ ifobj_rx->busy_poll = true;
+ break;
+ case 'm':
+ if (!strncmp("skb", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_SKB;
+ else if (!strncmp("drv", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_DRV;
+ else if (!strncmp("zc", optarg, strlen(optarg)))
+ opt_mode = TEST_MODE_ZC;
+ else
+ print_usage(argv);
+ break;
+ case 'l':
+ opt_print_tests = true;
+ break;
+ case 't':
+ errno = 0;
+ opt_run_test = strtol(optarg, NULL, 0);
+ if (errno)
+ print_usage(argv);
+ break;
+ case 'h':
+ default:
+ print_usage(argv);
+ }
+ }
+}
+
+static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx)
+{
+ u32 i, j;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->xsk = &ifobj->xsk_arr[0];
+ ifobj->use_poll = false;
+ ifobj->use_fill_ring = true;
+ ifobj->release_rx = true;
+ ifobj->validation_func = NULL;
+ ifobj->use_metadata = false;
+
+ if (i == 0) {
+ ifobj->rx_on = false;
+ ifobj->tx_on = true;
+ } else {
+ ifobj->rx_on = true;
+ ifobj->tx_on = false;
+ }
+
+ memset(ifobj->umem, 0, sizeof(*ifobj->umem));
+ ifobj->umem->num_frames = DEFAULT_UMEM_BUFFERS;
+ ifobj->umem->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE;
+
+ for (j = 0; j < MAX_SOCKETS; j++) {
+ memset(&ifobj->xsk_arr[j], 0, sizeof(ifobj->xsk_arr[j]));
+ ifobj->xsk_arr[j].rxqsize = XSK_RING_CONS__DEFAULT_NUM_DESCS;
+ if (i == 0)
+ ifobj->xsk_arr[j].pkt_stream = test->tx_pkt_stream_default;
+ else
+ ifobj->xsk_arr[j].pkt_stream = test->rx_pkt_stream_default;
+
+ memcpy(ifobj->xsk_arr[j].src_mac, g_mac, ETH_ALEN);
+ memcpy(ifobj->xsk_arr[j].dst_mac, g_mac, ETH_ALEN);
+ ifobj->xsk_arr[j].src_mac[5] += ((j * 2) + 0);
+ ifobj->xsk_arr[j].dst_mac[5] += ((j * 2) + 1);
+ }
+ }
+
+ test->ifobj_tx = ifobj_tx;
+ test->ifobj_rx = ifobj_rx;
+ test->current_step = 0;
+ test->total_steps = 1;
+ test->nb_sockets = 1;
+ test->fail = false;
+ test->mtu = MAX_ETH_PKT_SIZE;
+ test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
+ test->xdp_prog_tx = ifobj_tx->xdp_progs->progs.xsk_def_prog;
+ test->xskmap_tx = ifobj_tx->xdp_progs->maps.xsk;
+}
+
+static void test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
+ struct ifobject *ifobj_rx, enum test_mode mode,
+ const struct test_spec *test_to_run)
+{
+ struct pkt_stream *tx_pkt_stream;
+ struct pkt_stream *rx_pkt_stream;
+ u32 i;
+
+ tx_pkt_stream = test->tx_pkt_stream_default;
+ rx_pkt_stream = test->rx_pkt_stream_default;
+ memset(test, 0, sizeof(*test));
+ test->tx_pkt_stream_default = tx_pkt_stream;
+ test->rx_pkt_stream_default = rx_pkt_stream;
+
+ for (i = 0; i < MAX_INTERFACES; i++) {
+ struct ifobject *ifobj = i ? ifobj_rx : ifobj_tx;
+
+ ifobj->bind_flags = XDP_USE_NEED_WAKEUP;
+ if (mode == TEST_MODE_ZC)
+ ifobj->bind_flags |= XDP_ZEROCOPY;
+ else
+ ifobj->bind_flags |= XDP_COPY;
+ }
+
+ strncpy(test->name, test_to_run->name, MAX_TEST_NAME_SIZE);
+ test->test_func = test_to_run->test_func;
+ test->mode = mode;
+ __test_spec_init(test, ifobj_tx, ifobj_rx);
+}
+
+static void test_spec_reset(struct test_spec *test)
+{
+ __test_spec_init(test, test->ifobj_tx, test->ifobj_rx);
+}
+
+static void test_spec_set_xdp_prog(struct test_spec *test, struct bpf_program *xdp_prog_rx,
+ struct bpf_program *xdp_prog_tx, struct bpf_map *xskmap_rx,
+ struct bpf_map *xskmap_tx)
+{
+ test->xdp_prog_rx = xdp_prog_rx;
+ test->xdp_prog_tx = xdp_prog_tx;
+ test->xskmap_rx = xskmap_rx;
+ test->xskmap_tx = xskmap_tx;
+}
+
+static int test_spec_set_mtu(struct test_spec *test, int mtu)
+{
+ int err;
+
+ if (test->ifobj_rx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_rx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_rx->mtu = mtu;
+ }
+ if (test->ifobj_tx->mtu != mtu) {
+ err = xsk_set_mtu(test->ifobj_tx->ifindex, mtu);
+ if (err)
+ return err;
+ test->ifobj_tx->mtu = mtu;
+ }
+
+ return 0;
+}
+
+static void pkt_stream_reset(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream) {
+ pkt_stream->current_pkt_nb = 0;
+ pkt_stream->nb_rx_pkts = 0;
+ }
+}
+
+static struct pkt *pkt_stream_get_next_tx_pkt(struct pkt_stream *pkt_stream)
+{
+ if (pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts)
+ return NULL;
+
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+}
+
+static struct pkt *pkt_stream_get_next_rx_pkt(struct pkt_stream *pkt_stream, u32 *pkts_sent)
+{
+ while (pkt_stream->current_pkt_nb < pkt_stream->nb_pkts) {
+ (*pkts_sent)++;
+ if (pkt_stream->pkts[pkt_stream->current_pkt_nb].valid)
+ return &pkt_stream->pkts[pkt_stream->current_pkt_nb++];
+ pkt_stream->current_pkt_nb++;
+ }
+ return NULL;
+}
+
+static void pkt_stream_delete(struct pkt_stream *pkt_stream)
+{
+ free(pkt_stream->pkts);
+ free(pkt_stream);
+}
+
+static void pkt_stream_restore_default(struct test_spec *test)
+{
+ struct pkt_stream *tx_pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ struct pkt_stream *rx_pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+
+ if (tx_pkt_stream != test->tx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_tx->xsk->pkt_stream);
+ test->ifobj_tx->xsk->pkt_stream = test->tx_pkt_stream_default;
+ }
+
+ if (rx_pkt_stream != test->rx_pkt_stream_default) {
+ pkt_stream_delete(test->ifobj_rx->xsk->pkt_stream);
+ test->ifobj_rx->xsk->pkt_stream = test->rx_pkt_stream_default;
+ }
+}
+
+static struct pkt_stream *__pkt_stream_alloc(u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = calloc(1, sizeof(*pkt_stream));
+ if (!pkt_stream)
+ return NULL;
+
+ pkt_stream->pkts = calloc(nb_pkts, sizeof(*pkt_stream->pkts));
+ if (!pkt_stream->pkts) {
+ free(pkt_stream);
+ return NULL;
+ }
+
+ pkt_stream->nb_pkts = nb_pkts;
+ return pkt_stream;
+}
+
+static bool pkt_continues(u32 options)
+{
+ return options & XDP_PKT_CONTD;
+}
+
+static u32 ceil_u32(u32 a, u32 b)
+{
+ return (a + b - 1) / b;
+}
+
+static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pkt *pkt)
+{
+ u32 nb_frags = 1, next_frag;
+
+ if (!pkt)
+ return 1;
+
+ if (!pkt_stream->verbatim) {
+ if (!pkt->valid || !pkt->len)
+ return 1;
+ return ceil_u32(pkt->len, frame_size);
+ }
+
+ /* Search for the end of the packet in verbatim mode */
+ if (!pkt_continues(pkt->options))
+ return nb_frags;
+
+ next_frag = pkt_stream->current_pkt_nb;
+ pkt++;
+ while (next_frag++ < pkt_stream->nb_pkts) {
+ nb_frags++;
+ if (!pkt_continues(pkt->options) || !pkt->valid)
+ break;
+ pkt++;
+ }
+ return nb_frags;
+}
+
+static bool set_pkt_valid(int offset, u32 len)
+{
+ return len <= MAX_ETH_JUMBO_SIZE;
+}
+
+static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ pkt->offset = offset;
+ pkt->len = len;
+ pkt->valid = set_pkt_valid(offset, len);
+}
+
+static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len)
+{
+ bool prev_pkt_valid = pkt->valid;
+
+ pkt_set(pkt_stream, pkt, offset, len);
+ pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid;
+}
+
+static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len)
+{
+ return ceil_u32(len, umem->frame_size) * umem->frame_size;
+}
+
+static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb_start, u32 nb_off)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = __pkt_stream_alloc(nb_pkts);
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ pkt_stream->nb_pkts = nb_pkts;
+ pkt_stream->max_pkt_len = pkt_len;
+ for (i = 0; i < nb_pkts; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[i];
+
+ pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len);
+ pkt->pkt_nb = nb_start + i * nb_off;
+ }
+
+ return pkt_stream;
+}
+
+static struct pkt_stream *pkt_stream_generate(u32 nb_pkts, u32 pkt_len)
+{
+ return __pkt_stream_generate(nb_pkts, pkt_len, 0, 1);
+}
+
+static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
+{
+ return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
+}
+
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+ pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+}
+
+static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
+ int offset)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream);
+ for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2)
+ pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len);
+
+ ifobj->xsk->pkt_stream = pkt_stream;
+}
+
+static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset)
+{
+ __pkt_stream_replace_half(test->ifobj_tx, pkt_len, offset);
+ __pkt_stream_replace_half(test->ifobj_rx, pkt_len, offset);
+}
+
+static void pkt_stream_receive_half(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream = test->ifobj_tx->xsk->pkt_stream;
+ u32 i;
+
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(pkt_stream->nb_pkts,
+ pkt_stream->pkts[0].len);
+ pkt_stream = test->ifobj_rx->xsk->pkt_stream;
+ for (i = 1; i < pkt_stream->nb_pkts; i += 2)
+ pkt_stream->pkts[i].valid = false;
+
+ pkt_stream->nb_valid_entries /= 2;
+}
+
+static void pkt_stream_even_odd_sequence(struct test_spec *test)
+{
+ struct pkt_stream *pkt_stream;
+ u32 i;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ pkt_stream = test->ifobj_tx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_tx->xsk_arr[i].pkt_stream = pkt_stream;
+
+ pkt_stream = test->ifobj_rx->xsk_arr[i].pkt_stream;
+ pkt_stream = __pkt_stream_generate(pkt_stream->nb_pkts / 2,
+ pkt_stream->pkts[0].len, i, 2);
+ test->ifobj_rx->xsk_arr[i].pkt_stream = pkt_stream;
+ }
+}
+
+static u64 pkt_get_addr(struct pkt *pkt, struct xsk_umem_info *umem)
+{
+ if (!pkt->valid)
+ return pkt->offset;
+ return pkt->offset + umem_alloc_buffer(umem);
+}
+
+static void pkt_stream_cancel(struct pkt_stream *pkt_stream)
+{
+ pkt_stream->current_pkt_nb--;
+}
+
+static void pkt_generate(struct xsk_socket_info *xsk, struct xsk_umem_info *umem, u64 addr, u32 len,
+ u32 pkt_nb, u32 bytes_written)
+{
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ if (len < MIN_PKT_SIZE)
+ return;
+
+ if (!bytes_written) {
+ gen_eth_hdr(xsk, data);
+
+ len -= PKT_HDR_SIZE;
+ data += PKT_HDR_SIZE;
+ } else {
+ bytes_written -= PKT_HDR_SIZE;
+ }
+
+ write_payload(data, pkt_nb, bytes_written, len);
+}
+
+static struct pkt_stream *__pkt_stream_generate_custom(struct ifobject *ifobj, struct pkt *frames,
+ u32 nb_frames, bool verbatim)
+{
+ u32 i, len = 0, pkt_nb = 0, payload = 0;
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_alloc(nb_frames);
+ if (!pkt_stream)
+ exit_with_error(ENOMEM);
+
+ for (i = 0; i < nb_frames; i++) {
+ struct pkt *pkt = &pkt_stream->pkts[pkt_nb];
+ struct pkt *frame = &frames[i];
+
+ pkt->offset = frame->offset;
+ if (verbatim) {
+ *pkt = *frame;
+ pkt->pkt_nb = payload;
+ if (!frame->valid || !pkt_continues(frame->options))
+ payload++;
+ } else {
+ if (frame->valid)
+ len += frame->len;
+ if (frame->valid && pkt_continues(frame->options))
+ continue;
+
+ pkt->pkt_nb = pkt_nb;
+ pkt->len = len;
+ pkt->valid = frame->valid;
+ pkt->options = 0;
+
+ len = 0;
+ }
+
+ print_verbose("offset: %d len: %u valid: %u options: %u pkt_nb: %u\n",
+ pkt->offset, pkt->len, pkt->valid, pkt->options, pkt->pkt_nb);
+
+ if (pkt->valid && pkt->len > pkt_stream->max_pkt_len)
+ pkt_stream->max_pkt_len = pkt->len;
+
+ if (pkt->valid)
+ pkt_stream->nb_valid_entries++;
+
+ pkt_nb++;
+ }
+
+ pkt_stream->nb_pkts = pkt_nb;
+ pkt_stream->verbatim = verbatim;
+ return pkt_stream;
+}
+
+static void pkt_stream_generate_custom(struct test_spec *test, struct pkt *pkts, u32 nb_pkts)
+{
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_tx, pkts, nb_pkts, true);
+ test->ifobj_tx->xsk->pkt_stream = pkt_stream;
+
+ pkt_stream = __pkt_stream_generate_custom(test->ifobj_rx, pkts, nb_pkts, false);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+}
+
+static void pkt_print_data(u32 *data, u32 cnt)
+{
+ u32 i;
+
+ for (i = 0; i < cnt; i++) {
+ u32 seqnum, pkt_nb;
+
+ seqnum = ntohl(*data) & 0xffff;
+ pkt_nb = ntohl(*data) >> 16;
+ ksft_print_msg("%u:%u ", pkt_nb, seqnum);
+ data++;
+ }
+}
+
+static void pkt_dump(void *pkt, u32 len, bool eth_header)
+{
+ struct ethhdr *ethhdr = pkt;
+ u32 i, *data;
+
+ if (eth_header) {
+ /*extract L2 frame */
+ ksft_print_msg("DEBUG>> L2: dst mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_dest[i]);
+
+ ksft_print_msg("\nDEBUG>> L2: src mac: ");
+ for (i = 0; i < ETH_ALEN; i++)
+ ksft_print_msg("%02X", ethhdr->h_source[i]);
+
+ data = pkt + PKT_HDR_SIZE;
+ } else {
+ data = pkt;
+ }
+
+ /*extract L5 frame */
+ ksft_print_msg("\nDEBUG>> L5: seqnum: ");
+ pkt_print_data(data, PKT_DUMP_NB_TO_PRINT);
+ ksft_print_msg("....");
+ if (len > PKT_DUMP_NB_TO_PRINT * sizeof(u32)) {
+ ksft_print_msg("\n.... ");
+ pkt_print_data(data + len / sizeof(u32) - PKT_DUMP_NB_TO_PRINT,
+ PKT_DUMP_NB_TO_PRINT);
+ }
+ ksft_print_msg("\n---------------------------------------\n");
+}
+
+static bool is_offset_correct(struct xsk_umem_info *umem, struct pkt *pkt, u64 addr)
+{
+ u32 headroom = umem->unaligned_mode ? 0 : umem->frame_headroom;
+ u32 offset = addr % umem->frame_size, expected_offset;
+ int pkt_offset = pkt->valid ? pkt->offset : 0;
+
+ if (!umem->unaligned_mode)
+ pkt_offset = 0;
+
+ expected_offset = (pkt_offset + headroom + XDP_PACKET_HEADROOM) % umem->frame_size;
+
+ if (offset == expected_offset)
+ return true;
+
+ ksft_print_msg("[%s] expected [%u], got [%u]\n", __func__, expected_offset, offset);
+ return false;
+}
+
+static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
+{
+ void *data = xsk_umem__get_data(buffer, addr);
+ struct xdp_info *meta = data - sizeof(struct xdp_info);
+
+ if (meta->count != pkt->pkt_nb) {
+ ksft_print_msg("[%s] expected meta_count [%d], got meta_count [%llu]\n",
+ __func__, pkt->pkt_nb,
+ (unsigned long long)meta->count);
+ return false;
+ }
+
+ return true;
+}
+
+static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
+ u32 bytes_processed)
+{
+ u32 seqnum, pkt_nb, *pkt_data, words_to_end, expected_seqnum;
+ void *data = xsk_umem__get_data(umem->buffer, addr);
+
+ addr -= umem->base_addr;
+
+ if (addr >= umem->num_frames * umem->frame_size ||
+ addr + len > umem->num_frames * umem->frame_size) {
+ ksft_print_msg("Frag invalid addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+ if (!umem->unaligned_mode && addr % umem->frame_size + len > umem->frame_size) {
+ ksft_print_msg("Frag crosses frame boundary addr: %llx len: %u\n",
+ (unsigned long long)addr, len);
+ return false;
+ }
+
+ pkt_data = data;
+ if (!bytes_processed) {
+ pkt_data += PKT_HDR_SIZE / sizeof(*pkt_data);
+ len -= PKT_HDR_SIZE;
+ } else {
+ bytes_processed -= PKT_HDR_SIZE;
+ }
+
+ expected_seqnum = bytes_processed / sizeof(*pkt_data);
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ pkt_nb = ntohl(*pkt_data) >> 16;
+
+ if (expected_pkt_nb != pkt_nb) {
+ ksft_print_msg("[%s] expected pkt_nb [%u], got pkt_nb [%u]\n",
+ __func__, expected_pkt_nb, pkt_nb);
+ goto error;
+ }
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at start [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ pkt_data += words_to_end;
+ seqnum = ntohl(*pkt_data) & 0xffff;
+ expected_seqnum += words_to_end;
+ if (expected_seqnum != seqnum) {
+ ksft_print_msg("[%s] expected seqnum at end [%u], got seqnum [%u]\n",
+ __func__, expected_seqnum, seqnum);
+ goto error;
+ }
+
+ return true;
+
+error:
+ pkt_dump(data, len, !bytes_processed);
+ return false;
+}
+
+static bool is_pkt_valid(struct pkt *pkt, void *buffer, u64 addr, u32 len)
+{
+ if (pkt->len != len) {
+ ksft_print_msg("[%s] expected packet length [%d], got length [%d]\n",
+ __func__, pkt->len, len);
+ pkt_dump(xsk_umem__get_data(buffer, addr), len, true);
+ return false;
+ }
+
+ return true;
+}
+
+static int kick_tx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = sendto(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, 0);
+ if (ret >= 0)
+ return TEST_PASS;
+ if (errno == ENOBUFS || errno == EAGAIN || errno == EBUSY || errno == ENETDOWN) {
+ usleep(100);
+ return TEST_PASS;
+ }
+ return TEST_FAILURE;
+}
+
+static int kick_rx(struct xsk_socket_info *xsk)
+{
+ int ret;
+
+ ret = recvfrom(xsk_socket__fd(xsk->xsk), NULL, 0, MSG_DONTWAIT, NULL, NULL);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int complete_pkts(struct xsk_socket_info *xsk, int batch_size)
+{
+ unsigned int rcvd;
+ u32 idx;
+ int ret;
+
+ if (xsk_ring_prod__needs_wakeup(&xsk->tx)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx);
+ if (rcvd) {
+ if (rcvd > xsk->outstanding_tx) {
+ u64 addr = *xsk_ring_cons__comp_addr(&xsk->umem->cq, idx + rcvd - 1);
+
+ ksft_print_msg("[%s] Too many packets completed\n", __func__);
+ ksft_print_msg("Last completion address: %llx\n",
+ (unsigned long long)addr);
+ return TEST_FAILURE;
+ }
+
+ xsk_ring_cons__release(&xsk->umem->cq, rcvd);
+ xsk->outstanding_tx -= rcvd;
+ }
+
+ return TEST_PASS;
+}
+
+static int __receive_pkts(struct test_spec *test, struct xsk_socket_info *xsk)
+{
+ u32 frags_processed = 0, nb_frags = 0, pkt_len = 0;
+ u32 idx_rx = 0, idx_fq = 0, rcvd, pkts_sent = 0;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct ifobject *ifobj = test->ifobj_rx;
+ struct xsk_umem_info *umem = xsk->umem;
+ struct pollfd fds = { };
+ struct pkt *pkt;
+ u64 first_addr = 0;
+ int ret;
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLIN;
+
+ ret = kick_rx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+
+ if (ifobj->use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+
+ if (!ret) {
+ if (!is_umem_valid(test->ifobj_tx))
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll timed out\n", __func__);
+ return TEST_CONTINUE;
+ }
+
+ if (!(fds.revents & POLLIN))
+ return TEST_CONTINUE;
+ }
+
+ rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx);
+ if (!rcvd)
+ return TEST_CONTINUE;
+
+ if (ifobj->use_fill_ring) {
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ while (ret != rcvd) {
+ if (xsk_ring_prod__needs_wakeup(&umem->fq)) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret < 0)
+ return TEST_FAILURE;
+ }
+ ret = xsk_ring_prod__reserve(&umem->fq, rcvd, &idx_fq);
+ }
+ }
+
+ while (frags_processed < rcvd) {
+ const struct xdp_desc *desc = xsk_ring_cons__rx_desc(&xsk->rx, idx_rx++);
+ u64 addr = desc->addr, orig;
+
+ orig = xsk_umem__extract_addr(addr);
+ addr = xsk_umem__add_offset_to_addr(addr);
+
+ if (!nb_frags) {
+ pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &pkts_sent);
+ if (!pkt) {
+ ksft_print_msg("[%s] received too many packets addr: %lx len %u\n",
+ __func__, addr, desc->len);
+ return TEST_FAILURE;
+ }
+ }
+
+ print_verbose("Rx: addr: %lx len: %u options: %u pkt_nb: %u valid: %u\n",
+ addr, desc->len, desc->options, pkt->pkt_nb, pkt->valid);
+
+ if (!is_frag_valid(umem, addr, desc->len, pkt->pkt_nb, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr) || (ifobj->use_metadata &&
+ !is_metadata_correct(pkt, umem->buffer, addr)))
+ return TEST_FAILURE;
+
+ if (!nb_frags++)
+ first_addr = addr;
+ frags_processed++;
+ pkt_len += desc->len;
+ if (ifobj->use_fill_ring)
+ *xsk_ring_prod__fill_addr(&umem->fq, idx_fq++) = orig;
+
+ if (pkt_continues(desc->options))
+ continue;
+
+ /* The complete packet has been received */
+ if (!is_pkt_valid(pkt, umem->buffer, first_addr, pkt_len) ||
+ !is_offset_correct(umem, pkt, addr))
+ return TEST_FAILURE;
+
+ pkt_stream->nb_rx_pkts++;
+ nb_frags = 0;
+ pkt_len = 0;
+ }
+
+ if (nb_frags) {
+ /* In the middle of a packet. Start over from beginning of packet. */
+ idx_rx -= nb_frags;
+ xsk_ring_cons__cancel(&xsk->rx, nb_frags);
+ if (ifobj->use_fill_ring) {
+ idx_fq -= nb_frags;
+ xsk_ring_prod__cancel(&umem->fq, nb_frags);
+ }
+ frags_processed -= nb_frags;
+ }
+
+ if (ifobj->use_fill_ring)
+ xsk_ring_prod__submit(&umem->fq, frags_processed);
+ if (ifobj->release_rx)
+ xsk_ring_cons__release(&xsk->rx, frags_processed);
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight -= pkts_sent;
+ pthread_mutex_unlock(&pacing_mutex);
+ pkts_sent = 0;
+
+return TEST_CONTINUE;
+}
+
+bool all_packets_received(struct test_spec *test, struct xsk_socket_info *xsk, u32 sock_num,
+ unsigned long *bitmap)
+{
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+
+ if (!pkt_stream) {
+ __set_bit(sock_num, bitmap);
+ return false;
+ }
+
+ if (pkt_stream->nb_rx_pkts == pkt_stream->nb_valid_entries) {
+ __set_bit(sock_num, bitmap);
+ if (bitmap_full(bitmap, test->nb_sockets))
+ return true;
+ }
+
+ return false;
+}
+
+static int receive_pkts(struct test_spec *test)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ struct xsk_socket_info *xsk;
+ u32 sock_num = 0;
+ int res, ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (1) {
+ xsk = &test->ifobj_rx->xsk_arr[sock_num];
+
+ if ((all_packets_received(test, xsk, sock_num, bitmap)))
+ break;
+
+ res = __receive_pkts(test, xsk);
+ if (!(res == TEST_PASS || res == TEST_CONTINUE))
+ return res;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Receive loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+ sock_num = (sock_num + 1) % test->nb_sockets;
+ }
+
+ return TEST_PASS;
+}
+
+static int __send_pkts(struct ifobject *ifobject, struct xsk_socket_info *xsk, bool timeout)
+{
+ u32 i, idx = 0, valid_pkts = 0, valid_frags = 0, buffer_len;
+ struct pkt_stream *pkt_stream = xsk->pkt_stream;
+ struct xsk_umem_info *umem = ifobject->umem;
+ bool use_poll = ifobject->use_poll;
+ struct pollfd fds = { };
+ int ret;
+
+ buffer_len = pkt_get_buffer_len(umem, pkt_stream->max_pkt_len);
+ /* pkts_in_flight might be negative if many invalid packets are sent */
+ if (pkts_in_flight >= (int)((umem_size(umem) - BATCH_SIZE * buffer_len) / buffer_len)) {
+ ret = kick_tx(xsk);
+ if (ret)
+ return TEST_FAILURE;
+ return TEST_CONTINUE;
+ }
+
+ fds.fd = xsk_socket__fd(xsk->xsk);
+ fds.events = POLLOUT;
+
+ while (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) < BATCH_SIZE) {
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (timeout) {
+ if (ret < 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ if (ret == 0)
+ return TEST_PASS;
+ break;
+ }
+ if (ret <= 0) {
+ ksft_print_msg("ERROR: [%s] Poll error %d\n",
+ __func__, errno);
+ return TEST_FAILURE;
+ }
+ }
+
+ complete_pkts(xsk, BATCH_SIZE);
+ }
+
+ for (i = 0; i < BATCH_SIZE; i++) {
+ struct pkt *pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ u32 nb_frags_left, nb_frags, bytes_written = 0;
+
+ if (!pkt)
+ break;
+
+ nb_frags = pkt_nb_frags(umem->frame_size, pkt_stream, pkt);
+ if (nb_frags > BATCH_SIZE - i) {
+ pkt_stream_cancel(pkt_stream);
+ xsk_ring_prod__cancel(&xsk->tx, BATCH_SIZE - i);
+ break;
+ }
+ nb_frags_left = nb_frags;
+
+ while (nb_frags_left--) {
+ struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, idx + i);
+
+ tx_desc->addr = pkt_get_addr(pkt, ifobject->umem);
+ if (pkt_stream->verbatim) {
+ tx_desc->len = pkt->len;
+ tx_desc->options = pkt->options;
+ } else if (nb_frags_left) {
+ tx_desc->len = umem->frame_size;
+ tx_desc->options = XDP_PKT_CONTD;
+ } else {
+ tx_desc->len = pkt->len - bytes_written;
+ tx_desc->options = 0;
+ }
+ if (pkt->valid)
+ pkt_generate(xsk, umem, tx_desc->addr, tx_desc->len, pkt->pkt_nb,
+ bytes_written);
+ bytes_written += tx_desc->len;
+
+ print_verbose("Tx addr: %llx len: %u options: %u pkt_nb: %u\n",
+ tx_desc->addr, tx_desc->len, tx_desc->options, pkt->pkt_nb);
+
+ if (nb_frags_left) {
+ i++;
+ if (pkt_stream->verbatim)
+ pkt = pkt_stream_get_next_tx_pkt(pkt_stream);
+ }
+ }
+
+ if (pkt && pkt->valid) {
+ valid_pkts++;
+ valid_frags += nb_frags;
+ }
+ }
+
+ pthread_mutex_lock(&pacing_mutex);
+ pkts_in_flight += valid_pkts;
+ pthread_mutex_unlock(&pacing_mutex);
+
+ xsk_ring_prod__submit(&xsk->tx, i);
+ xsk->outstanding_tx += valid_frags;
+
+ if (use_poll) {
+ ret = poll(&fds, 1, POLL_TMOUT);
+ if (ret <= 0) {
+ if (ret == 0 && timeout)
+ return TEST_PASS;
+
+ ksft_print_msg("ERROR: [%s] Poll error %d\n", __func__, ret);
+ return TEST_FAILURE;
+ }
+ }
+
+ if (!timeout) {
+ if (complete_pkts(xsk, i))
+ return TEST_FAILURE;
+
+ usleep(10);
+ return TEST_PASS;
+ }
+
+ return TEST_CONTINUE;
+}
+
+static int wait_for_tx_completion(struct xsk_socket_info *xsk)
+{
+ struct timeval tv_end, tv_now, tv_timeout = {THREAD_TMOUT, 0};
+ int ret;
+
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ timeradd(&tv_now, &tv_timeout, &tv_end);
+
+ while (xsk->outstanding_tx) {
+ ret = gettimeofday(&tv_now, NULL);
+ if (ret)
+ exit_with_error(errno);
+ if (timercmp(&tv_now, &tv_end, >)) {
+ ksft_print_msg("ERROR: [%s] Transmission loop timed out\n", __func__);
+ return TEST_FAILURE;
+ }
+
+ complete_pkts(xsk, BATCH_SIZE);
+ }
+
+ return TEST_PASS;
+}
+
+bool all_packets_sent(struct test_spec *test, unsigned long *bitmap)
+{
+ return bitmap_full(bitmap, test->nb_sockets);
+}
+
+static int send_pkts(struct test_spec *test, struct ifobject *ifobject)
+{
+ bool timeout = !is_umem_valid(test->ifobj_rx);
+ DECLARE_BITMAP(bitmap, test->nb_sockets);
+ u32 i, ret;
+
+ while (!(all_packets_sent(test, bitmap))) {
+ for (i = 0; i < test->nb_sockets; i++) {
+ struct pkt_stream *pkt_stream;
+
+ pkt_stream = ifobject->xsk_arr[i].pkt_stream;
+ if (!pkt_stream || pkt_stream->current_pkt_nb >= pkt_stream->nb_pkts) {
+ __set_bit(i, bitmap);
+ continue;
+ }
+ ret = __send_pkts(ifobject, &ifobject->xsk_arr[i], timeout);
+ if (ret == TEST_CONTINUE && !test->fail)
+ continue;
+
+ if ((ret || test->fail) && !timeout)
+ return TEST_FAILURE;
+
+ if (ret == TEST_PASS && timeout)
+ return ret;
+
+ ret = wait_for_tx_completion(&ifobject->xsk_arr[i]);
+ if (ret)
+ return TEST_FAILURE;
+ }
+ }
+
+ return TEST_PASS;
+}
+
+static int get_xsk_stats(struct xsk_socket *xsk, struct xdp_statistics *stats)
+{
+ int fd = xsk_socket__fd(xsk), err;
+ socklen_t optlen, expected_len;
+
+ optlen = sizeof(*stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ expected_len = sizeof(struct xdp_statistics);
+ if (optlen != expected_len) {
+ ksft_print_msg("[%s] getsockopt optlen error. Expected: %u got: %u\n",
+ __func__, expected_len, optlen);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static int validate_rx_dropped(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ /* The receiver calls getsockopt after receiving the last (valid)
+ * packet which is not the final packet sent in this test (valid and
+ * invalid packets are sent in alternating fashion with the final
+ * packet being invalid). Since the last packet may or may not have
+ * been dropped already, both outcomes must be allowed.
+ */
+ if (stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 ||
+ stats.rx_dropped == ifobject->xsk->pkt_stream->nb_pkts / 2 - 1)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_rx_full(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_ring_full)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_fill_empty(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ struct xdp_statistics stats;
+ int err;
+
+ usleep(1000);
+ err = kick_rx(ifobject->xsk);
+ if (err)
+ return TEST_FAILURE;
+
+ err = get_xsk_stats(xsk, &stats);
+ if (err)
+ return TEST_FAILURE;
+
+ if (stats.rx_fill_ring_empty_descs)
+ return TEST_PASS;
+
+ return TEST_FAILURE;
+}
+
+static int validate_tx_invalid_descs(struct ifobject *ifobject)
+{
+ struct xsk_socket *xsk = ifobject->xsk->xsk;
+ int fd = xsk_socket__fd(xsk);
+ struct xdp_statistics stats;
+ socklen_t optlen;
+ int err;
+
+ optlen = sizeof(stats);
+ err = getsockopt(fd, SOL_XDP, XDP_STATISTICS, &stats, &optlen);
+ if (err) {
+ ksft_print_msg("[%s] getsockopt(XDP_STATISTICS) error %u %s\n",
+ __func__, -err, strerror(-err));
+ return TEST_FAILURE;
+ }
+
+ if (stats.tx_invalid_descs != ifobject->xsk->pkt_stream->nb_pkts / 2) {
+ ksft_print_msg("[%s] tx_invalid_descs incorrect. Got [%llu] expected [%u]\n",
+ __func__,
+ (unsigned long long)stats.tx_invalid_descs,
+ ifobject->xsk->pkt_stream->nb_pkts);
+ return TEST_FAILURE;
+ }
+
+ return TEST_PASS;
+}
+
+static void xsk_configure_socket(struct test_spec *test, struct ifobject *ifobject,
+ struct xsk_umem_info *umem, bool tx)
+{
+ int i, ret;
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ bool shared = (ifobject->shared_umem && tx) ? true : !!i;
+ u32 ctr = 0;
+
+ while (ctr++ < SOCK_RECONF_CTR) {
+ ret = __xsk_configure_socket(&ifobject->xsk_arr[i], umem,
+ ifobject, shared);
+ if (!ret)
+ break;
+
+ /* Retry if it fails as xsk_socket__create() is asynchronous */
+ if (ctr >= SOCK_RECONF_CTR)
+ exit_with_error(-ret);
+ usleep(USLEEP_MAX);
+ }
+ if (ifobject->busy_poll)
+ enable_busy_poll(&ifobject->xsk_arr[i]);
+ }
+}
+
+static void thread_common_ops_tx(struct test_spec *test, struct ifobject *ifobject)
+{
+ xsk_configure_socket(test, ifobject, test->ifobj_rx->umem, true);
+ ifobject->xsk = &ifobject->xsk_arr[0];
+ ifobject->xskmap = test->ifobj_rx->xskmap;
+ memcpy(ifobject->umem, test->ifobj_rx->umem, sizeof(struct xsk_umem_info));
+ ifobject->umem->base_addr = 0;
+}
+
+static void xsk_populate_fill_ring(struct xsk_umem_info *umem, struct pkt_stream *pkt_stream,
+ bool fill_up)
+{
+ u32 rx_frame_size = umem->frame_size - XDP_PACKET_HEADROOM;
+ u32 idx = 0, filled = 0, buffers_to_fill, nb_pkts;
+ int ret;
+
+ if (umem->num_frames < XSK_RING_PROD__DEFAULT_NUM_DESCS)
+ buffers_to_fill = umem->num_frames;
+ else
+ buffers_to_fill = XSK_RING_PROD__DEFAULT_NUM_DESCS;
+
+ ret = xsk_ring_prod__reserve(&umem->fq, buffers_to_fill, &idx);
+ if (ret != buffers_to_fill)
+ exit_with_error(ENOSPC);
+
+ while (filled < buffers_to_fill) {
+ struct pkt *pkt = pkt_stream_get_next_rx_pkt(pkt_stream, &nb_pkts);
+ u64 addr;
+ u32 i;
+
+ for (i = 0; i < pkt_nb_frags(rx_frame_size, pkt_stream, pkt); i++) {
+ if (!pkt) {
+ if (!fill_up)
+ break;
+ addr = filled * umem->frame_size + umem->base_addr;
+ } else if (pkt->offset >= 0) {
+ addr = pkt->offset % umem->frame_size + umem_alloc_buffer(umem);
+ } else {
+ addr = pkt->offset + umem_alloc_buffer(umem);
+ }
+
+ *xsk_ring_prod__fill_addr(&umem->fq, idx++) = addr;
+ if (++filled >= buffers_to_fill)
+ break;
+ }
+ }
+ xsk_ring_prod__submit(&umem->fq, filled);
+ xsk_ring_prod__cancel(&umem->fq, buffers_to_fill - filled);
+
+ pkt_stream_reset(pkt_stream);
+ umem_reset_alloc(umem);
+}
+
+static void thread_common_ops(struct test_spec *test, struct ifobject *ifobject)
+{
+ u64 umem_sz = ifobject->umem->num_frames * ifobject->umem->frame_size;
+ int mmap_flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE;
+ LIBBPF_OPTS(bpf_xdp_query_opts, opts);
+ void *bufs;
+ int ret;
+ u32 i;
+
+ if (ifobject->umem->unaligned_mode)
+ mmap_flags |= MAP_HUGETLB | MAP_HUGE_2MB;
+
+ if (ifobject->shared_umem)
+ umem_sz *= 2;
+
+ bufs = mmap(NULL, umem_sz, PROT_READ | PROT_WRITE, mmap_flags, -1, 0);
+ if (bufs == MAP_FAILED)
+ exit_with_error(errno);
+
+ ret = xsk_configure_umem(ifobject, ifobject->umem, bufs, umem_sz);
+ if (ret)
+ exit_with_error(-ret);
+
+ xsk_configure_socket(test, ifobject, ifobject->umem, false);
+
+ ifobject->xsk = &ifobject->xsk_arr[0];
+
+ if (!ifobject->rx_on)
+ return;
+
+ xsk_populate_fill_ring(ifobject->umem, ifobject->xsk->pkt_stream, ifobject->use_fill_ring);
+
+ for (i = 0; i < test->nb_sockets; i++) {
+ ifobject->xsk = &ifobject->xsk_arr[i];
+ ret = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, i);
+ if (ret)
+ exit_with_error(errno);
+ }
+}
+
+static void *worker_testapp_validate_tx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_tx;
+ int err;
+
+ if (test->current_step == 1) {
+ if (!ifobject->shared_umem)
+ thread_common_ops(test, ifobject);
+ else
+ thread_common_ops_tx(test, ifobject);
+ }
+
+ err = send_pkts(test, ifobject);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+ if (err)
+ report_failure(test);
+
+ pthread_exit(NULL);
+}
+
+static void *worker_testapp_validate_rx(void *arg)
+{
+ struct test_spec *test = (struct test_spec *)arg;
+ struct ifobject *ifobject = test->ifobj_rx;
+ int err;
+
+ if (test->current_step == 1) {
+ thread_common_ops(test, ifobject);
+ } else {
+ xsk_clear_xskmap(ifobject->xskmap);
+ err = xsk_update_xskmap(ifobject->xskmap, ifobject->xsk->xsk, 0);
+ if (err) {
+ ksft_print_msg("Error: Failed to update xskmap, error %s\n",
+ strerror(-err));
+ exit_with_error(-err);
+ }
+ }
+
+ pthread_barrier_wait(&barr);
+
+ err = receive_pkts(test);
+
+ if (!err && ifobject->validation_func)
+ err = ifobject->validation_func(ifobject);
+ if (err)
+ report_failure(test);
+
+ pthread_exit(NULL);
+}
+
+static u64 ceil_u64(u64 a, u64 b)
+{
+ return (a + b - 1) / b;
+}
+
+static void testapp_clean_xsk_umem(struct ifobject *ifobj)
+{
+ u64 umem_sz = ifobj->umem->num_frames * ifobj->umem->frame_size;
+
+ if (ifobj->shared_umem)
+ umem_sz *= 2;
+
+ umem_sz = ceil_u64(umem_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ xsk_umem__delete(ifobj->umem->umem);
+ munmap(ifobj->umem->buffer, umem_sz);
+}
+
+static void handler(int signum)
+{
+ pthread_exit(NULL);
+}
+
+static bool xdp_prog_changed_rx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_rx;
+
+ return ifobj->xdp_prog != test->xdp_prog_rx || ifobj->mode != test->mode;
+}
+
+static bool xdp_prog_changed_tx(struct test_spec *test)
+{
+ struct ifobject *ifobj = test->ifobj_tx;
+
+ return ifobj->xdp_prog != test->xdp_prog_tx || ifobj->mode != test->mode;
+}
+
+static void xsk_reattach_xdp(struct ifobject *ifobj, struct bpf_program *xdp_prog,
+ struct bpf_map *xskmap, enum test_mode mode)
+{
+ int err;
+
+ xsk_detach_xdp_program(ifobj->ifindex, mode_to_xdp_flags(ifobj->mode));
+ err = xsk_attach_xdp_program(xdp_prog, ifobj->ifindex, mode_to_xdp_flags(mode));
+ if (err) {
+ ksft_print_msg("Error attaching XDP program\n");
+ exit_with_error(-err);
+ }
+
+ if (ifobj->mode != mode && (mode == TEST_MODE_DRV || mode == TEST_MODE_ZC))
+ if (!xsk_is_in_mode(ifobj->ifindex, XDP_FLAGS_DRV_MODE)) {
+ ksft_print_msg("ERROR: XDP prog not in DRV mode\n");
+ exit_with_error(EINVAL);
+ }
+
+ ifobj->xdp_prog = xdp_prog;
+ ifobj->xskmap = xskmap;
+ ifobj->mode = mode;
+}
+
+static void xsk_attach_xdp_progs(struct test_spec *test, struct ifobject *ifobj_rx,
+ struct ifobject *ifobj_tx)
+{
+ if (xdp_prog_changed_rx(test))
+ xsk_reattach_xdp(ifobj_rx, test->xdp_prog_rx, test->xskmap_rx, test->mode);
+
+ if (!ifobj_tx || ifobj_tx->shared_umem)
+ return;
+
+ if (xdp_prog_changed_tx(test))
+ xsk_reattach_xdp(ifobj_tx, test->xdp_prog_tx, test->xskmap_tx, test->mode);
+}
+
+static int __testapp_validate_traffic(struct test_spec *test, struct ifobject *ifobj1,
+ struct ifobject *ifobj2)
+{
+ pthread_t t0, t1;
+ int err;
+
+ if (test->mtu > MAX_ETH_PKT_SIZE) {
+ if (test->mode == TEST_MODE_ZC && (!ifobj1->multi_buff_zc_supp ||
+ (ifobj2 && !ifobj2->multi_buff_zc_supp))) {
+ ksft_test_result_skip("Multi buffer for zero-copy not supported.\n");
+ return TEST_SKIP;
+ }
+ if (test->mode != TEST_MODE_ZC && (!ifobj1->multi_buff_supp ||
+ (ifobj2 && !ifobj2->multi_buff_supp))) {
+ ksft_test_result_skip("Multi buffer not supported.\n");
+ return TEST_SKIP;
+ }
+ }
+ err = test_spec_set_mtu(test, test->mtu);
+ if (err) {
+ ksft_print_msg("Error, could not set mtu.\n");
+ exit_with_error(err);
+ }
+
+ if (ifobj2) {
+ if (pthread_barrier_init(&barr, NULL, 2))
+ exit_with_error(errno);
+ pkt_stream_reset(ifobj2->xsk->pkt_stream);
+ }
+
+ test->current_step++;
+ pkt_stream_reset(ifobj1->xsk->pkt_stream);
+ pkts_in_flight = 0;
+
+ signal(SIGUSR1, handler);
+ /*Spawn RX thread */
+ pthread_create(&t0, NULL, ifobj1->func_ptr, test);
+
+ if (ifobj2) {
+ pthread_barrier_wait(&barr);
+ if (pthread_barrier_destroy(&barr))
+ exit_with_error(errno);
+
+ /*Spawn TX thread */
+ pthread_create(&t1, NULL, ifobj2->func_ptr, test);
+
+ pthread_join(t1, NULL);
+ }
+
+ if (!ifobj2)
+ pthread_kill(t0, SIGUSR1);
+ else
+ pthread_join(t0, NULL);
+
+ if (test->total_steps == test->current_step || test->fail) {
+ u32 i;
+
+ if (ifobj2)
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj2->xsk_arr[i].xsk);
+
+ for (i = 0; i < test->nb_sockets; i++)
+ xsk_socket__delete(ifobj1->xsk_arr[i].xsk);
+
+ testapp_clean_xsk_umem(ifobj1);
+ if (ifobj2 && !ifobj2->shared_umem)
+ testapp_clean_xsk_umem(ifobj2);
+ }
+
+ return !!test->fail;
+}
+
+static int testapp_validate_traffic(struct test_spec *test)
+{
+ struct ifobject *ifobj_rx = test->ifobj_rx;
+ struct ifobject *ifobj_tx = test->ifobj_tx;
+
+ if ((ifobj_rx->umem->unaligned_mode && !ifobj_rx->unaligned_supp) ||
+ (ifobj_tx->umem->unaligned_mode && !ifobj_tx->unaligned_supp)) {
+ ksft_test_result_skip("No huge pages present.\n");
+ return TEST_SKIP;
+ }
+
+ xsk_attach_xdp_progs(test, ifobj_rx, ifobj_tx);
+ return __testapp_validate_traffic(test, ifobj_rx, ifobj_tx);
+}
+
+static int testapp_validate_traffic_single_thread(struct test_spec *test, struct ifobject *ifobj)
+{
+ return __testapp_validate_traffic(test, ifobj, NULL);
+}
+
+static int testapp_teardown(struct test_spec *test)
+{
+ int i;
+
+ for (i = 0; i < MAX_TEARDOWN_ITER; i++) {
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+ test_spec_reset(test);
+ }
+
+ return TEST_PASS;
+}
+
+static void swap_directions(struct ifobject **ifobj1, struct ifobject **ifobj2)
+{
+ thread_func_t tmp_func_ptr = (*ifobj1)->func_ptr;
+ struct ifobject *tmp_ifobj = (*ifobj1);
+
+ (*ifobj1)->func_ptr = (*ifobj2)->func_ptr;
+ (*ifobj2)->func_ptr = tmp_func_ptr;
+
+ *ifobj1 = *ifobj2;
+ *ifobj2 = tmp_ifobj;
+}
+
+static int testapp_bidirectional(struct test_spec *test)
+{
+ int res;
+
+ test->ifobj_tx->rx_on = true;
+ test->ifobj_rx->tx_on = true;
+ test->total_steps = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ print_verbose("Switching Tx/Rx direction\n");
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ res = __testapp_validate_traffic(test, test->ifobj_rx, test->ifobj_tx);
+
+ swap_directions(&test->ifobj_rx, &test->ifobj_tx);
+ return res;
+}
+
+static int swap_xsk_resources(struct test_spec *test)
+{
+ int ret;
+
+ test->ifobj_tx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_rx->xsk_arr[0].pkt_stream = NULL;
+ test->ifobj_tx->xsk_arr[1].pkt_stream = test->tx_pkt_stream_default;
+ test->ifobj_rx->xsk_arr[1].pkt_stream = test->rx_pkt_stream_default;
+ test->ifobj_tx->xsk = &test->ifobj_tx->xsk_arr[1];
+ test->ifobj_rx->xsk = &test->ifobj_rx->xsk_arr[1];
+
+ ret = xsk_update_xskmap(test->ifobj_rx->xskmap, test->ifobj_rx->xsk->xsk, 0);
+ if (ret)
+ return TEST_FAILURE;
+
+ return TEST_PASS;
+}
+
+static int testapp_xdp_prog_cleanup(struct test_spec *test)
+{
+ test->total_steps = 2;
+ test->nb_sockets = 2;
+ if (testapp_validate_traffic(test))
+ return TEST_FAILURE;
+
+ if (swap_xsk_resources(test))
+ return TEST_FAILURE;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_headroom(struct test_spec *test)
+{
+ test->ifobj_rx->umem->frame_headroom = UMEM_HEADROOM_TEST_SIZE;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_stats_rx_dropped(struct test_spec *test)
+{
+ if (test->mode == TEST_MODE_ZC) {
+ ksft_test_result_skip("Can not run RX_DROPPED test for ZC mode\n");
+ return TEST_SKIP;
+ }
+
+ pkt_stream_replace_half(test, MIN_PKT_SIZE * 4, 0);
+ test->ifobj_rx->umem->frame_headroom = test->ifobj_rx->umem->frame_size -
+ XDP_PACKET_HEADROOM - MIN_PKT_SIZE * 3;
+ pkt_stream_receive_half(test);
+ test->ifobj_rx->validation_func = validate_rx_dropped;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_stats_tx_invalid_descs(struct test_spec *test)
+{
+ pkt_stream_replace_half(test, XSK_UMEM__INVALID_FRAME_SIZE, 0);
+ test->ifobj_tx->validation_func = validate_tx_invalid_descs;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_stats_rx_full(struct test_spec *test)
+{
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->xsk->rxqsize = DEFAULT_UMEM_BUFFERS;
+ test->ifobj_rx->release_rx = false;
+ test->ifobj_rx->validation_func = validate_rx_full;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_stats_fill_empty(struct test_spec *test)
+{
+ pkt_stream_replace(test, DEFAULT_UMEM_BUFFERS + DEFAULT_UMEM_BUFFERS / 2, MIN_PKT_SIZE);
+ test->ifobj_rx->xsk->pkt_stream = pkt_stream_generate(DEFAULT_UMEM_BUFFERS, MIN_PKT_SIZE);
+
+ test->ifobj_rx->use_fill_ring = false;
+ test->ifobj_rx->validation_func = validate_fill_empty;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_unaligned(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* Let half of the packets straddle a 4K buffer boundary */
+ pkt_stream_replace_half(test, MIN_PKT_SIZE, -MIN_PKT_SIZE / 2);
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_unaligned_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_single_pkt(struct test_spec *test)
+{
+ struct pkt pkts[] = {{0, MIN_PKT_SIZE, 0, true}};
+
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MAX_ETH_JUMBO_SIZE);
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_invalid_desc_mb(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Valid packet for synch to start with */
+ {0, MIN_PKT_SIZE, 0, true, 0},
+ /* Zero frame len is not legal */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, 0, 0, false, 0},
+ /* Invalid address in the second frame */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {umem_size, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid len in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ /* Invalid options in the middle */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XSK_DESC__INVALID_OPTION},
+ /* Transmit 2 frags, receive 3 */
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, XDP_PKT_CONTD},
+ {0, XSK_UMEM__MAX_FRAME_SIZE, 0, true, 0},
+ /* Middle frame crosses chunk boundary with small length */
+ {0, XSK_UMEM__LARGE_FRAME_SIZE, 0, false, XDP_PKT_CONTD},
+ {-MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false, 0},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true, 0}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a chunk boundary allowed */
+ pkts[12].valid = true;
+ pkts[13].valid = true;
+ }
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_invalid_desc(struct test_spec *test)
+{
+ struct xsk_umem_info *umem = test->ifobj_tx->umem;
+ u64 umem_size = umem->num_frames * umem->frame_size;
+ struct pkt pkts[] = {
+ /* Zero packet address allowed */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Allowed packet */
+ {0, MIN_PKT_SIZE, 0, true},
+ /* Straddling the start of umem */
+ {-2, MIN_PKT_SIZE, 0, false},
+ /* Packet too large */
+ {0, XSK_UMEM__INVALID_FRAME_SIZE, 0, false},
+ /* Up to end of umem allowed */
+ {umem_size - MIN_PKT_SIZE - 2 * umem->frame_size, MIN_PKT_SIZE, 0, true},
+ /* After umem ends */
+ {umem_size, MIN_PKT_SIZE, 0, false},
+ /* Straddle the end of umem */
+ {umem_size - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 4K boundary */
+ {0x1000 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, false},
+ /* Straddle a 2K boundary */
+ {0x800 - MIN_PKT_SIZE / 2, MIN_PKT_SIZE, 0, true},
+ /* Valid packet for synch so that something is received */
+ {0, MIN_PKT_SIZE, 0, true}};
+
+ if (umem->unaligned_mode) {
+ /* Crossing a page boundary allowed */
+ pkts[7].valid = true;
+ }
+ if (umem->frame_size == XSK_UMEM__DEFAULT_FRAME_SIZE / 2) {
+ /* Crossing a 2K frame size boundary not allowed */
+ pkts[8].valid = false;
+ }
+
+ if (test->ifobj_tx->shared_umem) {
+ pkts[4].offset += umem_size;
+ pkts[5].offset += umem_size;
+ pkts[6].offset += umem_size;
+ }
+
+ pkt_stream_generate_custom(test, pkts, ARRAY_SIZE(pkts));
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_drop(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_drop, skel_tx->progs.xsk_xdp_drop,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ pkt_stream_receive_half(test);
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_metadata_copy(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+ struct bpf_map *data_map;
+ int count = 0;
+ int key = 0;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_populate_metadata,
+ skel_tx->progs.xsk_xdp_populate_metadata,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+ test->ifobj_rx->use_metadata = true;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ return TEST_FAILURE;
+ }
+
+ if (bpf_map_update_elem(bpf_map__fd(data_map), &key, &count, BPF_ANY)) {
+ ksft_print_msg("Error: could not update count element\n");
+ return TEST_FAILURE;
+ }
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_xdp_shared_umem(struct test_spec *test)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test->total_steps = 1;
+ test->nb_sockets = 2;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_shared_umem,
+ skel_tx->progs.xsk_xdp_shared_umem,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ pkt_stream_even_odd_sequence(test);
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_txq_tmout(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ /* create invalid frame by set umem frame_size and pkt length equal to 2048 */
+ test->ifobj_tx->umem->frame_size = 2048;
+ pkt_stream_replace(test, 2 * DEFAULT_PKT_CNT, 2048);
+ return testapp_validate_traffic_single_thread(test, test->ifobj_tx);
+}
+
+static int testapp_poll_rxq_tmout(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic_single_thread(test, test->ifobj_rx);
+}
+
+static int testapp_too_many_frags(struct test_spec *test)
+{
+ struct pkt pkts[2 * XSK_DESC__MAX_SKB_FRAGS + 2] = {};
+ u32 max_frags, i;
+
+ if (test->mode == TEST_MODE_ZC)
+ max_frags = test->ifobj_tx->xdp_zc_max_segs;
+ else
+ max_frags = XSK_DESC__MAX_SKB_FRAGS;
+
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+
+ /* Valid packet for synch */
+ pkts[0].len = MIN_PKT_SIZE;
+ pkts[0].valid = true;
+
+ /* One valid packet with the max amount of frags */
+ for (i = 1; i < max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = true;
+ }
+ pkts[max_frags].options = 0;
+
+ /* An invalid packet with the max amount of frags but signals packet
+ * continues on the last frag
+ */
+ for (i = max_frags + 1; i < 2 * max_frags + 1; i++) {
+ pkts[i].len = MIN_PKT_SIZE;
+ pkts[i].options = XDP_PKT_CONTD;
+ pkts[i].valid = false;
+ }
+
+ /* Valid packet for synch */
+ pkts[2 * max_frags + 1].len = MIN_PKT_SIZE;
+ pkts[2 * max_frags + 1].valid = true;
+
+ pkt_stream_generate_custom(test, pkts, 2 * max_frags + 2);
+ return testapp_validate_traffic(test);
+}
+
+static int xsk_load_xdp_programs(struct ifobject *ifobj)
+{
+ ifobj->xdp_progs = xsk_xdp_progs__open_and_load();
+ if (libbpf_get_error(ifobj->xdp_progs))
+ return libbpf_get_error(ifobj->xdp_progs);
+
+ return 0;
+}
+
+static void xsk_unload_xdp_programs(struct ifobject *ifobj)
+{
+ xsk_xdp_progs__destroy(ifobj->xdp_progs);
+}
+
+/* Simple test */
+static bool hugepages_present(void)
+{
+ size_t mmap_sz = 2 * DEFAULT_UMEM_BUFFERS * XSK_UMEM__DEFAULT_FRAME_SIZE;
+ void *bufs;
+
+ bufs = mmap(NULL, mmap_sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, MAP_HUGE_2MB);
+ if (bufs == MAP_FAILED)
+ return false;
+
+ mmap_sz = ceil_u64(mmap_sz, HUGEPAGE_SIZE) * HUGEPAGE_SIZE;
+ munmap(bufs, mmap_sz);
+ return true;
+}
+
+static void init_iface(struct ifobject *ifobj, thread_func_t func_ptr)
+{
+ LIBBPF_OPTS(bpf_xdp_query_opts, query_opts);
+ int err;
+
+ ifobj->func_ptr = func_ptr;
+
+ err = xsk_load_xdp_programs(ifobj);
+ if (err) {
+ ksft_print_msg("Error loading XDP program\n");
+ exit_with_error(err);
+ }
+
+ if (hugepages_present())
+ ifobj->unaligned_supp = true;
+
+ err = bpf_xdp_query(ifobj->ifindex, XDP_FLAGS_DRV_MODE, &query_opts);
+ if (err) {
+ ksft_print_msg("Error querying XDP capabilities\n");
+ exit_with_error(-err);
+ }
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_RX_SG)
+ ifobj->multi_buff_supp = true;
+ if (query_opts.feature_flags & NETDEV_XDP_ACT_XSK_ZEROCOPY) {
+ if (query_opts.xdp_zc_max_segs > 1) {
+ ifobj->multi_buff_zc_supp = true;
+ ifobj->xdp_zc_max_segs = query_opts.xdp_zc_max_segs;
+ } else {
+ ifobj->xdp_zc_max_segs = 0;
+ }
+ }
+}
+
+static int testapp_send_receive(struct test_spec *test)
+{
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_send_receive_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ pkt_stream_replace(test, DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_rx(struct test_spec *test)
+{
+ test->ifobj_rx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_poll_tx(struct test_spec *test)
+{
+ test->ifobj_tx->use_poll = true;
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_aligned_inv_desc(struct test_spec *test)
+{
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_2k_frame(struct test_spec *test)
+{
+ test->ifobj_tx->umem->frame_size = 2048;
+ test->ifobj_rx->umem->frame_size = 2048;
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_unaligned_inv_desc_4001_frame(struct test_spec *test)
+{
+ u64 page_size, umem_size;
+
+ /* Odd frame size so the UMEM doesn't end near a page boundary. */
+ test->ifobj_tx->umem->frame_size = 4001;
+ test->ifobj_rx->umem->frame_size = 4001;
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ /* This test exists to test descriptors that staddle the end of
+ * the UMEM but not a page.
+ */
+ page_size = sysconf(_SC_PAGESIZE);
+ umem_size = test->ifobj_tx->umem->num_frames * test->ifobj_tx->umem->frame_size;
+ assert(umem_size % page_size > MIN_PKT_SIZE);
+ assert(umem_size % page_size < page_size - MIN_PKT_SIZE);
+
+ return testapp_invalid_desc(test);
+}
+
+static int testapp_aligned_inv_desc_mb(struct test_spec *test)
+{
+ return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_unaligned_inv_desc_mb(struct test_spec *test)
+{
+ test->ifobj_tx->umem->unaligned_mode = true;
+ test->ifobj_rx->umem->unaligned_mode = true;
+ return testapp_invalid_desc_mb(test);
+}
+
+static int testapp_xdp_metadata(struct test_spec *test)
+{
+ return testapp_xdp_metadata_copy(test);
+}
+
+static int testapp_xdp_metadata_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ return testapp_xdp_metadata_copy(test);
+}
+
+static void run_pkt_test(struct test_spec *test)
+{
+ int ret;
+
+ ret = test->test_func(test);
+
+ if (ret == TEST_PASS)
+ ksft_test_result_pass("PASS: %s %s%s\n", mode_string(test), busy_poll_string(test),
+ test->name);
+ pkt_stream_restore_default(test);
+}
+
+static struct ifobject *ifobject_create(void)
+{
+ struct ifobject *ifobj;
+
+ ifobj = calloc(1, sizeof(struct ifobject));
+ if (!ifobj)
+ return NULL;
+
+ ifobj->xsk_arr = calloc(MAX_SOCKETS, sizeof(*ifobj->xsk_arr));
+ if (!ifobj->xsk_arr)
+ goto out_xsk_arr;
+
+ ifobj->umem = calloc(1, sizeof(*ifobj->umem));
+ if (!ifobj->umem)
+ goto out_umem;
+
+ return ifobj;
+
+out_umem:
+ free(ifobj->xsk_arr);
+out_xsk_arr:
+ free(ifobj);
+ return NULL;
+}
+
+static void ifobject_delete(struct ifobject *ifobj)
+{
+ free(ifobj->umem);
+ free(ifobj->xsk_arr);
+ free(ifobj);
+}
+
+static bool is_xdp_supported(int ifindex)
+{
+ int flags = XDP_FLAGS_DRV_MODE;
+
+ LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = flags);
+ struct bpf_insn insns[2] = {
+ BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),
+ BPF_EXIT_INSN()
+ };
+ int prog_fd, insn_cnt = ARRAY_SIZE(insns);
+ int err;
+
+ prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);
+ if (prog_fd < 0)
+ return false;
+
+ err = bpf_xdp_attach(ifindex, prog_fd, flags, NULL);
+ if (err) {
+ close(prog_fd);
+ return false;
+ }
+
+ bpf_xdp_detach(ifindex, flags, NULL);
+ close(prog_fd);
+
+ return true;
+}
+
+static const struct test_spec tests[] = {
+ {.name = "SEND_RECEIVE", .test_func = testapp_send_receive},
+ {.name = "SEND_RECEIVE_2K_FRAME", .test_func = testapp_send_receive_2k_frame},
+ {.name = "SEND_RECEIVE_SINGLE_PKT", .test_func = testapp_single_pkt},
+ {.name = "POLL_RX", .test_func = testapp_poll_rx},
+ {.name = "POLL_TX", .test_func = testapp_poll_tx},
+ {.name = "POLL_RXQ_FULL", .test_func = testapp_poll_rxq_tmout},
+ {.name = "POLL_TXQ_FULL", .test_func = testapp_poll_txq_tmout},
+ {.name = "SEND_RECEIVE_UNALIGNED", .test_func = testapp_send_receive_unaligned},
+ {.name = "ALIGNED_INV_DESC", .test_func = testapp_aligned_inv_desc},
+ {.name = "ALIGNED_INV_DESC_2K_FRAME_SIZE", .test_func = testapp_aligned_inv_desc_2k_frame},
+ {.name = "UNALIGNED_INV_DESC", .test_func = testapp_unaligned_inv_desc},
+ {.name = "UNALIGNED_INV_DESC_4001_FRAME_SIZE",
+ .test_func = testapp_unaligned_inv_desc_4001_frame},
+ {.name = "UMEM_HEADROOM", .test_func = testapp_headroom},
+ {.name = "TEARDOWN", .test_func = testapp_teardown},
+ {.name = "BIDIRECTIONAL", .test_func = testapp_bidirectional},
+ {.name = "STAT_RX_DROPPED", .test_func = testapp_stats_rx_dropped},
+ {.name = "STAT_TX_INVALID", .test_func = testapp_stats_tx_invalid_descs},
+ {.name = "STAT_RX_FULL", .test_func = testapp_stats_rx_full},
+ {.name = "STAT_FILL_EMPTY", .test_func = testapp_stats_fill_empty},
+ {.name = "XDP_PROG_CLEANUP", .test_func = testapp_xdp_prog_cleanup},
+ {.name = "XDP_DROP_HALF", .test_func = testapp_xdp_drop},
+ {.name = "XDP_SHARED_UMEM", .test_func = testapp_xdp_shared_umem},
+ {.name = "XDP_METADATA_COPY", .test_func = testapp_xdp_metadata},
+ {.name = "XDP_METADATA_COPY_MULTI_BUFF", .test_func = testapp_xdp_metadata_mb},
+ {.name = "SEND_RECEIVE_9K_PACKETS", .test_func = testapp_send_receive_mb},
+ {.name = "SEND_RECEIVE_UNALIGNED_9K_PACKETS",
+ .test_func = testapp_send_receive_unaligned_mb},
+ {.name = "ALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_aligned_inv_desc_mb},
+ {.name = "UNALIGNED_INV_DESC_MULTI_BUFF", .test_func = testapp_unaligned_inv_desc_mb},
+ {.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
+};
+
+static void print_tests(void)
+{
+ u32 i;
+
+ printf("Tests:\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++)
+ printf("%u: %s\n", i, tests[i].name);
+}
+
+int main(int argc, char **argv)
+{
+ struct pkt_stream *rx_pkt_stream_default;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct ifobject *ifobj_tx, *ifobj_rx;
+ u32 i, j, failed_tests = 0, nb_tests;
+ int modes = TEST_MODE_SKB + 1;
+ struct test_spec test;
+ bool shared_netdev;
+
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+
+ ifobj_tx = ifobject_create();
+ if (!ifobj_tx)
+ exit_with_error(ENOMEM);
+ ifobj_rx = ifobject_create();
+ if (!ifobj_rx)
+ exit_with_error(ENOMEM);
+
+ setlocale(LC_ALL, "");
+
+ parse_command_line(ifobj_tx, ifobj_rx, argc, argv);
+
+ if (opt_print_tests) {
+ print_tests();
+ ksft_exit_xpass();
+ }
+ if (opt_run_test != RUN_ALL_TESTS && opt_run_test >= ARRAY_SIZE(tests)) {
+ ksft_print_msg("Error: test %u does not exist.\n", opt_run_test);
+ ksft_exit_xfail();
+ }
+
+ shared_netdev = (ifobj_tx->ifindex == ifobj_rx->ifindex);
+ ifobj_tx->shared_umem = shared_netdev;
+ ifobj_rx->shared_umem = shared_netdev;
+
+ if (!validate_interface(ifobj_tx) || !validate_interface(ifobj_rx))
+ print_usage(argv);
+
+ if (is_xdp_supported(ifobj_tx->ifindex)) {
+ modes++;
+ if (ifobj_zc_avail(ifobj_tx))
+ modes++;
+ }
+
+ init_iface(ifobj_rx, worker_testapp_validate_rx);
+ init_iface(ifobj_tx, worker_testapp_validate_tx);
+
+ test_spec_init(&test, ifobj_tx, ifobj_rx, 0, &tests[0]);
+ tx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ rx_pkt_stream_default = pkt_stream_generate(DEFAULT_PKT_CNT, MIN_PKT_SIZE);
+ if (!tx_pkt_stream_default || !rx_pkt_stream_default)
+ exit_with_error(ENOMEM);
+ test.tx_pkt_stream_default = tx_pkt_stream_default;
+ test.rx_pkt_stream_default = rx_pkt_stream_default;
+
+ if (opt_run_test == RUN_ALL_TESTS)
+ nb_tests = ARRAY_SIZE(tests);
+ else
+ nb_tests = 1;
+ if (opt_mode == TEST_MODE_ALL) {
+ ksft_set_plan(modes * nb_tests);
+ } else {
+ if (opt_mode == TEST_MODE_DRV && modes <= TEST_MODE_DRV) {
+ ksft_print_msg("Error: XDP_DRV mode not supported.\n");
+ ksft_exit_xfail();
+ }
+ if (opt_mode == TEST_MODE_ZC && modes <= TEST_MODE_ZC) {
+ ksft_print_msg("Error: zero-copy mode not supported.\n");
+ ksft_exit_xfail();
+ }
+
+ ksft_set_plan(nb_tests);
+ }
+
+ for (i = 0; i < modes; i++) {
+ if (opt_mode != TEST_MODE_ALL && i != opt_mode)
+ continue;
+
+ for (j = 0; j < ARRAY_SIZE(tests); j++) {
+ if (opt_run_test != RUN_ALL_TESTS && j != opt_run_test)
+ continue;
+
+ test_spec_init(&test, ifobj_tx, ifobj_rx, i, &tests[j]);
+ run_pkt_test(&test);
+ usleep(USLEEP_MAX);
+
+ if (test.fail)
+ failed_tests++;
+ }
+ }
+
+ pkt_stream_delete(tx_pkt_stream_default);
+ pkt_stream_delete(rx_pkt_stream_default);
+ xsk_unload_xdp_programs(ifobj_tx);
+ xsk_unload_xdp_programs(ifobj_rx);
+ ifobject_delete(ifobj_tx);
+ ifobject_delete(ifobj_rx);
+
+ if (failed_tests)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
new file mode 100644
index 000000000000..f174df2d693f
--- /dev/null
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -0,0 +1,176 @@
+/* SPDX-License-Identifier: GPL-2.0
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef XSKXCEIVER_H_
+#define XSKXCEIVER_H_
+
+#include <limits.h>
+
+#include "xsk_xdp_progs.skel.h"
+#include "xsk_xdp_common.h"
+
+#ifndef SOL_XDP
+#define SOL_XDP 283
+#endif
+
+#ifndef AF_XDP
+#define AF_XDP 44
+#endif
+
+#ifndef PF_XDP
+#define PF_XDP AF_XDP
+#endif
+
+#ifndef SO_BUSY_POLL_BUDGET
+#define SO_BUSY_POLL_BUDGET 70
+#endif
+
+#ifndef SO_PREFER_BUSY_POLL
+#define SO_PREFER_BUSY_POLL 69
+#endif
+
+#define TEST_PASS 0
+#define TEST_FAILURE -1
+#define TEST_CONTINUE 1
+#define TEST_SKIP 2
+#define MAX_INTERFACES 2
+#define MAX_INTERFACE_NAME_CHARS 16
+#define MAX_TEST_NAME_SIZE 48
+#define MAX_TEARDOWN_ITER 10
+#define PKT_HDR_SIZE (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
+#define MIN_PKT_SIZE 64
+#define MAX_ETH_PKT_SIZE 1518
+#define MAX_ETH_JUMBO_SIZE 9000
+#define USLEEP_MAX 10000
+#define SOCK_RECONF_CTR 10
+#define BATCH_SIZE 64
+#define POLL_TMOUT 1000
+#define THREAD_TMOUT 3
+#define DEFAULT_PKT_CNT (4 * 1024)
+#define DEFAULT_UMEM_BUFFERS (DEFAULT_PKT_CNT / 4)
+#define RX_FULL_RXQSIZE 32
+#define UMEM_HEADROOM_TEST_SIZE 128
+#define XSK_UMEM__INVALID_FRAME_SIZE (MAX_ETH_JUMBO_SIZE + 1)
+#define XSK_UMEM__LARGE_FRAME_SIZE (3 * 1024)
+#define XSK_UMEM__MAX_FRAME_SIZE (4 * 1024)
+#define XSK_DESC__INVALID_OPTION (0xffff)
+#define XSK_DESC__MAX_SKB_FRAGS 18
+#define HUGEPAGE_SIZE (2 * 1024 * 1024)
+#define PKT_DUMP_NB_TO_PRINT 16
+#define RUN_ALL_TESTS UINT_MAX
+#define NUM_MAC_ADDRESSES 4
+
+#define print_verbose(x...) do { if (opt_verbose) ksft_print_msg(x); } while (0)
+
+enum test_mode {
+ TEST_MODE_SKB,
+ TEST_MODE_DRV,
+ TEST_MODE_ZC,
+ TEST_MODE_ALL
+};
+
+struct xsk_umem_info {
+ struct xsk_ring_prod fq;
+ struct xsk_ring_cons cq;
+ struct xsk_umem *umem;
+ u64 next_buffer;
+ u32 num_frames;
+ u32 frame_headroom;
+ void *buffer;
+ u32 frame_size;
+ u32 base_addr;
+ bool unaligned_mode;
+};
+
+struct xsk_socket_info {
+ struct xsk_ring_cons rx;
+ struct xsk_ring_prod tx;
+ struct xsk_umem_info *umem;
+ struct xsk_socket *xsk;
+ struct pkt_stream *pkt_stream;
+ u32 outstanding_tx;
+ u32 rxqsize;
+ u8 dst_mac[ETH_ALEN];
+ u8 src_mac[ETH_ALEN];
+};
+
+struct pkt {
+ int offset;
+ u32 len;
+ u32 pkt_nb;
+ bool valid;
+ u16 options;
+};
+
+struct pkt_stream {
+ u32 nb_pkts;
+ u32 current_pkt_nb;
+ struct pkt *pkts;
+ u32 max_pkt_len;
+ u32 nb_rx_pkts;
+ u32 nb_valid_entries;
+ bool verbatim;
+};
+
+struct ifobject;
+struct test_spec;
+typedef int (*validation_func_t)(struct ifobject *ifobj);
+typedef void *(*thread_func_t)(void *arg);
+typedef int (*test_func_t)(struct test_spec *test);
+
+struct ifobject {
+ char ifname[MAX_INTERFACE_NAME_CHARS];
+ struct xsk_socket_info *xsk;
+ struct xsk_socket_info *xsk_arr;
+ struct xsk_umem_info *umem;
+ thread_func_t func_ptr;
+ validation_func_t validation_func;
+ struct xsk_xdp_progs *xdp_progs;
+ struct bpf_map *xskmap;
+ struct bpf_program *xdp_prog;
+ enum test_mode mode;
+ int ifindex;
+ int mtu;
+ u32 bind_flags;
+ u32 xdp_zc_max_segs;
+ bool tx_on;
+ bool rx_on;
+ bool use_poll;
+ bool busy_poll;
+ bool use_fill_ring;
+ bool release_rx;
+ bool shared_umem;
+ bool use_metadata;
+ bool unaligned_supp;
+ bool multi_buff_supp;
+ bool multi_buff_zc_supp;
+};
+
+struct test_spec {
+ struct ifobject *ifobj_tx;
+ struct ifobject *ifobj_rx;
+ struct pkt_stream *tx_pkt_stream_default;
+ struct pkt_stream *rx_pkt_stream_default;
+ struct bpf_program *xdp_prog_rx;
+ struct bpf_program *xdp_prog_tx;
+ struct bpf_map *xskmap_rx;
+ struct bpf_map *xskmap_tx;
+ test_func_t test_func;
+ int mtu;
+ u16 total_steps;
+ u16 current_step;
+ u16 nb_sockets;
+ bool fail;
+ enum test_mode mode;
+ char name[MAX_TEST_NAME_SIZE];
+};
+
+pthread_barrier_t barr;
+pthread_mutex_t pacing_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+int pkts_in_flight;
+
+static const u8 g_mac[ETH_ALEN] = {0x55, 0x44, 0x33, 0x22, 0x11, 0x00};
+
+#endif /* XSKXCEIVER_H_ */
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test.c b/tools/testing/selftests/breakpoints/breakpoint_test.c
index 3266cc9293fe..d46962a24724 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test.c
@@ -284,9 +284,9 @@ static void check_success(const char *msg)
nr_tests++;
if (ret)
- ksft_test_result_pass(msg);
+ ksft_test_result_pass("%s", msg);
else
- ksft_test_result_fail(msg);
+ ksft_test_result_fail("%s", msg);
}
static void launch_instruction_breakpoints(char *buf, int local, int global)
diff --git a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
index ad41ea69001b..e7041816085a 100644
--- a/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
+++ b/tools/testing/selftests/breakpoints/breakpoint_test_arm64.c
@@ -145,7 +145,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg(
- "ptrace(PTRACE_SINGLESTEP) failed: %s\n",
+ "ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
return false;
}
@@ -159,7 +159,7 @@ static bool run_test(int wr_size, int wp_size, int wr, int wp)
}
alarm(0);
if (WIFEXITED(status)) {
- ksft_print_msg("child did not single-step\n");
+ ksft_print_msg("child exited prematurely\n");
return false;
}
if (!WIFSTOPPED(status)) {
diff --git a/tools/testing/selftests/breakpoints/step_after_suspend_test.c b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
index b3ead29c6089..b8703c499d28 100644
--- a/tools/testing/selftests/breakpoints/step_after_suspend_test.c
+++ b/tools/testing/selftests/breakpoints/step_after_suspend_test.c
@@ -47,7 +47,7 @@ void child(int cpu)
_exit(0);
}
-bool run_test(int cpu)
+int run_test(int cpu)
{
int status;
pid_t pid = fork();
@@ -55,7 +55,7 @@ bool run_test(int cpu)
if (pid < 0) {
ksft_print_msg("fork() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (pid == 0)
child(cpu);
@@ -63,67 +63,68 @@ bool run_test(int cpu)
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGSTOP) {
ksft_print_msg("child did not stop with SIGSTOP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_SINGLESTEP, pid, NULL, NULL) < 0) {
if (errno == EIO) {
- ksft_exit_skip(
+ ksft_print_msg(
"ptrace(PTRACE_SINGLESTEP) not supported on this architecture: %s\n",
strerror(errno));
+ return KSFT_SKIP;
}
ksft_print_msg("ptrace(PTRACE_SINGLESTEP) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
- ksft_print_msg("waitpid() failed: $s\n", strerror(errno));
- return false;
+ ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
+ return KSFT_FAIL;
}
if (WIFEXITED(status)) {
ksft_print_msg("child did not single-step: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFSTOPPED(status)) {
ksft_print_msg("child did not stop: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (WSTOPSIG(status) != SIGTRAP) {
ksft_print_msg("child did not stop with SIGTRAP: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (ptrace(PTRACE_CONT, pid, NULL, NULL) < 0) {
ksft_print_msg("ptrace(PTRACE_CONT) failed: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
wpid = waitpid(pid, &status, __WALL);
if (wpid != pid) {
ksft_print_msg("waitpid() failed: %s\n", strerror(errno));
- return false;
+ return KSFT_FAIL;
}
if (!WIFEXITED(status)) {
ksft_print_msg("child did not exit after PTRACE_CONT: %s\n",
strerror(errno));
- return false;
+ return KSFT_FAIL;
}
- return true;
+ return KSFT_PASS;
}
void suspend(void)
@@ -183,32 +184,38 @@ int main(int argc, char **argv)
}
}
+ err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
+ if (err < 0)
+ ksft_exit_fail_msg("sched_getaffinity() failed\n");
+
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
if (!CPU_ISSET(cpu, &available_cpus))
continue;
tests++;
}
- ksft_set_plan(tests);
if (do_suspend)
suspend();
- err = sched_getaffinity(0, sizeof(available_cpus), &available_cpus);
- if (err < 0)
- ksft_exit_fail_msg("sched_getaffinity() failed\n");
-
+ ksft_set_plan(tests);
for (cpu = 0; cpu < CPU_SETSIZE; cpu++) {
- bool test_success;
+ int test_success;
if (!CPU_ISSET(cpu, &available_cpus))
continue;
test_success = run_test(cpu);
- if (test_success) {
+ switch (test_success) {
+ case KSFT_PASS:
ksft_test_result_pass("CPU %d\n", cpu);
- } else {
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("CPU %d\n", cpu);
+ break;
+ case KSFT_FAIL:
ksft_test_result_fail("CPU %d\n", cpu);
succeeded = false;
+ break;
}
}
diff --git a/tools/testing/selftests/android/ion/.gitignore b/tools/testing/selftests/cachestat/.gitignore
index 78eae9972bb1..d6c30b43a4bb 100644
--- a/tools/testing/selftests/android/ion/.gitignore
+++ b/tools/testing/selftests/cachestat/.gitignore
@@ -1,4 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-ionapp_export
-ionapp_import
-ionmap_test
+test_cachestat
diff --git a/tools/testing/selftests/cachestat/Makefile b/tools/testing/selftests/cachestat/Makefile
new file mode 100644
index 000000000000..778b54ebb036
--- /dev/null
+++ b/tools/testing/selftests/cachestat/Makefile
@@ -0,0 +1,8 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := test_cachestat
+
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -Wall
+LDLIBS += -lrt
+
+include ../lib.mk
diff --git a/tools/testing/selftests/cachestat/test_cachestat.c b/tools/testing/selftests/cachestat/test_cachestat.c
new file mode 100644
index 000000000000..b171fd53b004
--- /dev/null
+++ b/tools/testing/selftests/cachestat/test_cachestat.c
@@ -0,0 +1,318 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdbool.h>
+#include <linux/kernel.h>
+#include <linux/magic.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/syscall.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+#define NR_TESTS 9
+
+static const char * const dev_files[] = {
+ "/dev/zero", "/dev/null", "/dev/urandom",
+ "/proc/version", "/proc"
+};
+
+void print_cachestat(struct cachestat *cs)
+{
+ ksft_print_msg(
+ "Using cachestat: Cached: %llu, Dirty: %llu, Writeback: %llu, Evicted: %llu, Recently Evicted: %llu\n",
+ cs->nr_cache, cs->nr_dirty, cs->nr_writeback,
+ cs->nr_evicted, cs->nr_recently_evicted);
+}
+
+bool write_exactly(int fd, size_t filesize)
+{
+ int random_fd = open("/dev/urandom", O_RDONLY);
+ char *cursor, *data;
+ int remained;
+ bool ret;
+
+ if (random_fd < 0) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = false;
+ goto out;
+ }
+
+ data = malloc(filesize);
+ if (!data) {
+ ksft_print_msg("Unable to allocate data.\n");
+ ret = false;
+ goto close_random_fd;
+ }
+
+ remained = filesize;
+ cursor = data;
+
+ while (remained) {
+ ssize_t read_len = read(random_fd, cursor, remained);
+
+ if (read_len <= 0) {
+ ksft_print_msg("Unable to read from urandom.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= read_len;
+ cursor += read_len;
+ }
+
+ /* write random data to fd */
+ remained = filesize;
+ cursor = data;
+ while (remained) {
+ ssize_t write_len = write(fd, cursor, remained);
+
+ if (write_len <= 0) {
+ ksft_print_msg("Unable write random data to file.\n");
+ ret = false;
+ goto out_free_data;
+ }
+
+ remained -= write_len;
+ cursor += write_len;
+ }
+
+ ret = true;
+out_free_data:
+ free(data);
+close_random_fd:
+ close(random_fd);
+out:
+ return ret;
+}
+
+/*
+ * fsync() is implemented via noop_fsync() on tmpfs. This makes the fsync()
+ * test fail below, so we need to check for test file living on a tmpfs.
+ */
+static bool is_on_tmpfs(int fd)
+{
+ struct statfs statfs_buf;
+
+ if (fstatfs(fd, &statfs_buf))
+ return false;
+
+ return statfs_buf.f_type == TMPFS_MAGIC;
+}
+
+/*
+ * Open/create the file at filename, (optionally) write random data to it
+ * (exactly num_pages), then test the cachestat syscall on this file.
+ *
+ * If test_fsync == true, fsync the file, then check the number of dirty
+ * pages.
+ */
+static int test_cachestat(const char *filename, bool write_random, bool create,
+ bool test_fsync, unsigned long num_pages,
+ int open_flags, mode_t open_mode)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ int filesize = num_pages * PS;
+ int ret = KSFT_PASS;
+ long syscall_ret;
+ struct cachestat cs;
+ struct cachestat_range cs_range = { 0, filesize };
+
+ int fd = open(filename, open_flags, open_mode);
+
+ if (fd == -1) {
+ ksft_print_msg("Unable to create/open file.\n");
+ ret = KSFT_FAIL;
+ goto out;
+ } else {
+ ksft_print_msg("Create/open %s\n", filename);
+ }
+
+ if (write_random) {
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to access urandom.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call returned %ld\n", syscall_ret);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+
+ } else {
+ print_cachestat(&cs);
+
+ if (write_random) {
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = KSFT_FAIL;
+ }
+ }
+ }
+
+ if (test_fsync) {
+ if (is_on_tmpfs(fd)) {
+ ret = KSFT_SKIP;
+ } else if (fsync(fd)) {
+ ksft_print_msg("fsync fails.\n");
+ ret = KSFT_FAIL;
+ } else {
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ ksft_print_msg("Cachestat call (after fsync) returned %ld\n",
+ syscall_ret);
+
+ if (!syscall_ret) {
+ print_cachestat(&cs);
+
+ if (cs.nr_dirty) {
+ ret = KSFT_FAIL;
+ ksft_print_msg(
+ "Number of dirty should be zero after fsync.\n");
+ }
+ } else {
+ ksft_print_msg("Cachestat (after fsync) returned non-zero.\n");
+ ret = KSFT_FAIL;
+ goto out1;
+ }
+ }
+ }
+
+out1:
+ close(fd);
+
+ if (create)
+ remove(filename);
+out:
+ return ret;
+}
+
+bool test_cachestat_shmem(void)
+{
+ size_t PS = sysconf(_SC_PAGESIZE);
+ size_t filesize = PS * 512 * 2; /* 2 2MB huge pages */
+ int syscall_ret;
+ size_t compute_len = PS * 512;
+ struct cachestat_range cs_range = { PS, compute_len };
+ char *filename = "tmpshmcstat";
+ struct cachestat cs;
+ bool ret = true;
+ unsigned long num_pages = compute_len / PS;
+ int fd = shm_open(filename, O_CREAT | O_RDWR, 0600);
+
+ if (fd < 0) {
+ ksft_print_msg("Unable to create shmem file.\n");
+ ret = false;
+ goto out;
+ }
+
+ if (ftruncate(fd, filesize)) {
+ ksft_print_msg("Unable to truncate shmem file.\n");
+ ret = false;
+ goto close_fd;
+ }
+
+ if (!write_exactly(fd, filesize)) {
+ ksft_print_msg("Unable to write to shmem file.\n");
+ ret = false;
+ goto close_fd;
+ }
+
+ syscall_ret = syscall(__NR_cachestat, fd, &cs_range, &cs, 0);
+
+ if (syscall_ret) {
+ ksft_print_msg("Cachestat returned non-zero.\n");
+ ret = false;
+ goto close_fd;
+ } else {
+ print_cachestat(&cs);
+ if (cs.nr_cache + cs.nr_evicted != num_pages) {
+ ksft_print_msg(
+ "Total number of cached and evicted pages is off.\n");
+ ret = false;
+ }
+ }
+
+close_fd:
+ shm_unlink(filename);
+out:
+ return ret;
+}
+
+int main(void)
+{
+ int ret;
+
+ ksft_print_header();
+
+ ret = syscall(__NR_cachestat, -1, NULL, NULL, 0);
+ if (ret == -1 && errno == ENOSYS)
+ ksft_exit_skip("cachestat syscall not available\n");
+
+ ksft_set_plan(NR_TESTS);
+
+ if (ret == -1 && errno == EBADF) {
+ ksft_test_result_pass("bad file descriptor recognized\n");
+ ret = 0;
+ } else {
+ ksft_test_result_fail("bad file descriptor ignored\n");
+ ret = 1;
+ }
+
+ for (int i = 0; i < 5; i++) {
+ const char *dev_filename = dev_files[i];
+
+ if (test_cachestat(dev_filename, false, false, false,
+ 4, O_RDONLY, 0400) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with %s\n", dev_filename);
+ else {
+ ksft_test_result_fail("cachestat fails with %s\n", dev_filename);
+ ret = 1;
+ }
+ }
+
+ if (test_cachestat("tmpfilecachestat", true, true,
+ false, 4, O_CREAT | O_RDWR, 0600) == KSFT_PASS)
+ ksft_test_result_pass("cachestat works with a normal file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with normal file\n");
+ ret = 1;
+ }
+
+ switch (test_cachestat("tmpfilecachestat", true, true,
+ true, 4, O_CREAT | O_RDWR, 0600)) {
+ case KSFT_FAIL:
+ ksft_test_result_fail("cachestat fsync fails with normal file\n");
+ ret = KSFT_FAIL;
+ break;
+ case KSFT_PASS:
+ ksft_test_result_pass("cachestat fsync works with a normal file\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("tmpfilecachestat is on tmpfs\n");
+ break;
+ }
+
+ if (test_cachestat_shmem())
+ ksft_test_result_pass("cachestat works with a shmem file\n");
+ else {
+ ksft_test_result_fail("cachestat fails with a shmem file\n");
+ ret = 1;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/capabilities/Makefile b/tools/testing/selftests/capabilities/Makefile
index 6e9d98d457d5..411ac098308f 100644
--- a/tools/testing/selftests/capabilities/Makefile
+++ b/tools/testing/selftests/capabilities/Makefile
@@ -2,7 +2,7 @@
TEST_GEN_FILES := validate_cap
TEST_GEN_PROGS := test_execve
-CFLAGS += -O2 -g -std=gnu99 -Wall
+CFLAGS += -O2 -g -std=gnu99 -Wall $(KHDR_INCLUDES)
LDLIBS += -lcap-ng -lrt -ldl
include ../lib.mk
diff --git a/tools/testing/selftests/capabilities/test_execve.c b/tools/testing/selftests/capabilities/test_execve.c
index df0ef02b4036..7cde07a5df78 100644
--- a/tools/testing/selftests/capabilities/test_execve.c
+++ b/tools/testing/selftests/capabilities/test_execve.c
@@ -20,14 +20,6 @@
#include "../kselftest.h"
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
-
static int nerrs;
static pid_t mpid; /* main() pid is used to avoid duplicate test counts */
@@ -96,11 +88,7 @@ static bool create_and_enter_ns(uid_t inner_uid)
outer_uid = getuid();
outer_gid = getgid();
- /*
- * TODO: If we're already root, we could skip creating the userns.
- */
-
- if (unshare(CLONE_NEWNS) == 0) {
+ if (outer_uid == 0 && unshare(CLONE_NEWNS) == 0) {
ksft_print_msg("[NOTE]\tUsing global UIDs for tests\n");
if (prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0) != 0)
ksft_exit_fail_msg("PR_SET_KEEPCAPS - %s\n",
diff --git a/tools/testing/selftests/capabilities/validate_cap.c b/tools/testing/selftests/capabilities/validate_cap.c
index cdfc94268fe6..60b4e7b716a7 100644
--- a/tools/testing/selftests/capabilities/validate_cap.c
+++ b/tools/testing/selftests/capabilities/validate_cap.c
@@ -9,14 +9,6 @@
#include "../kselftest.h"
-#ifndef PR_CAP_AMBIENT
-#define PR_CAP_AMBIENT 47
-# define PR_CAP_AMBIENT_IS_SET 1
-# define PR_CAP_AMBIENT_RAISE 2
-# define PR_CAP_AMBIENT_LOWER 3
-# define PR_CAP_AMBIENT_CLEAR_ALL 4
-#endif
-
#if __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 19)
# define HAVE_GETAUXVAL
#endif
diff --git a/tools/testing/selftests/cgroup/.gitignore b/tools/testing/selftests/cgroup/.gitignore
index aa6de65b0838..2732e0b29271 100644
--- a/tools/testing/selftests/cgroup/.gitignore
+++ b/tools/testing/selftests/cgroup/.gitignore
@@ -2,3 +2,10 @@
test_memcontrol
test_core
test_freezer
+test_kmem
+test_kill
+test_cpu
+test_cpuset
+test_zswap
+test_hugetlb_memcg
+wait_inotify
diff --git a/tools/testing/selftests/cgroup/Makefile b/tools/testing/selftests/cgroup/Makefile
index 967f268fde74..00b441928909 100644
--- a/tools/testing/selftests/cgroup/Makefile
+++ b/tools/testing/selftests/cgroup/Makefile
@@ -1,16 +1,31 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -Wall -pthread
-all:
+all: ${HELPER_PROGS}
TEST_FILES := with_stress.sh
-TEST_PROGS := test_stress.sh
+TEST_PROGS := test_stress.sh test_cpuset_prs.sh
+TEST_GEN_FILES := wait_inotify
TEST_GEN_PROGS = test_memcontrol
+TEST_GEN_PROGS += test_kmem
TEST_GEN_PROGS += test_core
TEST_GEN_PROGS += test_freezer
+TEST_GEN_PROGS += test_kill
+TEST_GEN_PROGS += test_cpu
+TEST_GEN_PROGS += test_cpuset
+TEST_GEN_PROGS += test_zswap
+TEST_GEN_PROGS += test_hugetlb_memcg
+
+LOCAL_HDRS += $(selfdir)/clone3/clone3_selftests.h $(selfdir)/pidfd/pidfd.h
include ../lib.mk
-$(OUTPUT)/test_memcontrol: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_core: cgroup_util.c ../clone3/clone3_selftests.h
-$(OUTPUT)/test_freezer: cgroup_util.c ../clone3/clone3_selftests.h
+$(OUTPUT)/test_memcontrol: cgroup_util.c
+$(OUTPUT)/test_kmem: cgroup_util.c
+$(OUTPUT)/test_core: cgroup_util.c
+$(OUTPUT)/test_freezer: cgroup_util.c
+$(OUTPUT)/test_kill: cgroup_util.c
+$(OUTPUT)/test_cpu: cgroup_util.c
+$(OUTPUT)/test_cpuset: cgroup_util.c
+$(OUTPUT)/test_zswap: cgroup_util.c
+$(OUTPUT)/test_hugetlb_memcg: cgroup_util.c
diff --git a/tools/testing/selftests/cgroup/cgroup_util.c b/tools/testing/selftests/cgroup/cgroup_util.c
index 8a637ca7d73a..0340d4ca8f51 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.c
+++ b/tools/testing/selftests/cgroup/cgroup_util.c
@@ -5,10 +5,12 @@
#include <errno.h>
#include <fcntl.h>
#include <linux/limits.h>
+#include <poll.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/inotify.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/wait.h>
@@ -17,6 +19,7 @@
#include "cgroup_util.h"
#include "../clone3/clone3_selftests.h"
+/* Returns read len on success, or -errno on failure. */
static ssize_t read_text(const char *path, char *buf, size_t max_len)
{
ssize_t len;
@@ -24,35 +27,29 @@ static ssize_t read_text(const char *path, char *buf, size_t max_len)
fd = open(path, O_RDONLY);
if (fd < 0)
- return fd;
+ return -errno;
len = read(fd, buf, max_len - 1);
- if (len < 0)
- goto out;
- buf[len] = 0;
-out:
+ if (len >= 0)
+ buf[len] = 0;
+
close(fd);
- return len;
+ return len < 0 ? -errno : len;
}
+/* Returns written len on success, or -errno on failure. */
static ssize_t write_text(const char *path, char *buf, ssize_t len)
{
int fd;
fd = open(path, O_WRONLY | O_APPEND);
if (fd < 0)
- return fd;
+ return -errno;
len = write(fd, buf, len);
- if (len < 0) {
- close(fd);
- return len;
- }
-
close(fd);
-
- return len;
+ return len < 0 ? -errno : len;
}
char *cg_name(const char *root, const char *name)
@@ -85,16 +82,16 @@ char *cg_control(const char *cgroup, const char *control)
return ret;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_read(const char *cgroup, const char *control, char *buf, size_t len)
{
char path[PATH_MAX];
+ ssize_t ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
- if (read_text(path, buf, len) >= 0)
- return 0;
-
- return -1;
+ ret = read_text(path, buf, len);
+ return ret >= 0 ? 0 : ret;
}
int cg_read_strcmp(const char *cgroup, const char *control,
@@ -106,7 +103,7 @@ int cg_read_strcmp(const char *cgroup, const char *control,
/* Handle the case of comparing against empty string */
if (!expected)
- size = 32;
+ return -1;
else
size = strlen(expected) + 1;
@@ -175,17 +172,27 @@ long cg_read_lc(const char *cgroup, const char *control)
return cnt;
}
+/* Returns 0 on success, or -errno on failure. */
int cg_write(const char *cgroup, const char *control, char *buf)
{
char path[PATH_MAX];
- ssize_t len = strlen(buf);
+ ssize_t len = strlen(buf), ret;
snprintf(path, sizeof(path), "%s/%s", cgroup, control);
+ ret = write_text(path, buf, len);
+ return ret == len ? 0 : ret;
+}
- if (write_text(path, buf, len) == len)
- return 0;
+int cg_write_numeric(const char *cgroup, const char *control, long value)
+{
+ char buf[64];
+ int ret;
- return -1;
+ ret = sprintf(buf, "%lu", value);
+ if (ret < 0)
+ return ret;
+
+ return cg_write(cgroup, control, buf);
}
int cg_find_unified_root(char *root, size_t len)
@@ -219,7 +226,7 @@ int cg_find_unified_root(char *root, size_t len)
int cg_create(const char *cgroup)
{
- return mkdir(cgroup, 0644);
+ return mkdir(cgroup, 0755);
}
int cg_wait_for_proc_count(const char *cgroup, int count)
@@ -252,6 +259,10 @@ int cg_killall(const char *cgroup)
char buf[PAGE_SIZE];
char *ptr = buf;
+ /* If cgroup.kill exists use it. */
+ if (!cg_write(cgroup, "cgroup.kill", "1"))
+ return 0;
+
if (cg_read(cgroup, "cgroup.procs", buf, sizeof(buf)))
return -1;
@@ -275,6 +286,8 @@ int cg_destroy(const char *cgroup)
{
int ret;
+ if (!cgroup)
+ return 0;
retry:
ret = rmdir(cgroup);
if (ret && errno == EBUSY) {
@@ -337,13 +350,13 @@ pid_t clone_into_cgroup(int cgroup_fd)
#ifdef CLONE_ARGS_SIZE_VER2
pid_t pid;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_INTO_CGROUP,
.exit_signal = SIGCHLD,
.cgroup = cgroup_fd,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(struct __clone_args));
/*
* Verify that this is a genuine test failure:
* ENOSYS -> clone3() not available
@@ -529,9 +542,22 @@ int set_oom_adj_score(int pid, int score)
return 0;
}
+int proc_mount_contains(const char *option)
+{
+ char buf[4 * PAGE_SIZE];
+ ssize_t read;
+
+ read = read_text("/proc/mounts", buf, sizeof(buf));
+ if (read < 0)
+ return read;
+
+ return strstr(buf, option) != NULL;
+}
+
ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size)
{
char path[PATH_MAX];
+ ssize_t ret;
if (!pid)
snprintf(path, sizeof(path), "/proc/%s/%s",
@@ -539,7 +565,8 @@ ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t
else
snprintf(path, sizeof(path), "/proc/%d/%s", pid, item);
- return read_text(path, buf, size);
+ ret = read_text(path, buf, size);
+ return ret < 0 ? -1 : ret;
}
int proc_read_strstr(int pid, bool thread, const char *item, const char *needle)
@@ -576,3 +603,57 @@ int clone_into_cgroup_run_wait(const char *cgroup)
(void)clone_reap(pid, WEXITED);
return 0;
}
+
+static int __prepare_for_wait(const char *cgroup, const char *filename)
+{
+ int fd, ret = -1;
+
+ fd = inotify_init1(0);
+ if (fd == -1)
+ return fd;
+
+ ret = inotify_add_watch(fd, cg_control(cgroup, filename), IN_MODIFY);
+ if (ret == -1) {
+ close(fd);
+ fd = -1;
+ }
+
+ return fd;
+}
+
+int cg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "cgroup.events");
+}
+
+int memcg_prepare_for_wait(const char *cgroup)
+{
+ return __prepare_for_wait(cgroup, "memory.events");
+}
+
+int cg_wait_for(int fd)
+{
+ int ret = -1;
+ struct pollfd fds = {
+ .fd = fd,
+ .events = POLLIN,
+ };
+
+ while (true) {
+ ret = poll(&fds, 1, 10000);
+
+ if (ret == -1) {
+ if (errno == EINTR)
+ continue;
+
+ break;
+ }
+
+ if (ret > 0 && fds.revents & POLLIN) {
+ ret = 0;
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/cgroup_util.h b/tools/testing/selftests/cgroup/cgroup_util.h
index 5a1305dd1f0b..1df7f202214a 100644
--- a/tools/testing/selftests/cgroup/cgroup_util.h
+++ b/tools/testing/selftests/cgroup/cgroup_util.h
@@ -2,12 +2,17 @@
#include <stdbool.h>
#include <stdlib.h>
-#define PAGE_SIZE 4096
+#include "../kselftest.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#define PAGE_SIZE 4096
#define MB(x) (x << 20)
+#define USEC_PER_SEC 1000000L
+#define NSEC_PER_SEC 1000000000L
+
+#define TEST_UID 65534 /* usually nobody, any !root is fine */
+
/*
* Checks if two given values differ by less than err% of their sum.
*/
@@ -32,6 +37,7 @@ extern long cg_read_long(const char *cgroup, const char *control);
long cg_read_key_long(const char *cgroup, const char *control, const char *key);
extern long cg_read_lc(const char *cgroup, const char *control);
extern int cg_write(const char *cgroup, const char *control, char *buf);
+int cg_write_numeric(const char *cgroup, const char *control, long value);
extern int cg_run(const char *cgroup,
int (*fn)(const char *cgroup, void *arg),
void *arg);
@@ -48,9 +54,13 @@ extern int is_swap_enabled(void);
extern int set_oom_adj_score(int pid, int score);
extern int cg_wait_for_proc_count(const char *cgroup, int count);
extern int cg_killall(const char *cgroup);
+int proc_mount_contains(const char *option);
extern ssize_t proc_read_text(int pid, bool thread, const char *item, char *buf, size_t size);
extern int proc_read_strstr(int pid, bool thread, const char *item, const char *needle);
extern pid_t clone_into_cgroup(int cgroup_fd);
extern int clone_reap(pid_t pid, int options);
extern int clone_into_cgroup_run_wait(const char *cgroup);
extern int dirfd_open_opath(const char *dir);
+extern int cg_prepare_for_wait(const char *cgroup);
+extern int memcg_prepare_for_wait(const char *cgroup);
+extern int cg_wait_for(int fd);
diff --git a/tools/testing/selftests/cgroup/config b/tools/testing/selftests/cgroup/config
new file mode 100644
index 000000000000..97d549ee894f
--- /dev/null
+++ b/tools/testing/selftests/cgroup/config
@@ -0,0 +1,7 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_MEMCG=y
+CONFIG_MEMCG_KMEM=y
+CONFIG_PAGE_COUNTER=y
diff --git a/tools/testing/selftests/cgroup/memcg_protection.m b/tools/testing/selftests/cgroup/memcg_protection.m
new file mode 100644
index 000000000000..051daa3477b6
--- /dev/null
+++ b/tools/testing/selftests/cgroup/memcg_protection.m
@@ -0,0 +1,89 @@
+% SPDX-License-Identifier: GPL-2.0
+%
+% run as: octave-cli memcg_protection.m
+%
+% This script simulates reclaim protection behavior on a single level of memcg
+% hierarchy to illustrate how overcommitted protection spreads among siblings
+% (as it depends also on their current consumption).
+%
+% Simulation assumes siblings consumed the initial amount of memory (w/out
+% reclaim) and then the reclaim starts, all memory is reclaimable, i.e. treated
+% same. It simulates only non-low reclaim and assumes all memory.min = 0.
+%
+% Input configurations
+% --------------------
+% E number parent effective protection
+% n vector nominal protection of siblings set at the given level (memory.low)
+% c vector current consumption -,,- (memory.current)
+
+% example from testcase (values in GB)
+E = 50 / 1024;
+n = [75 25 0 500 ] / 1024;
+c = [50 50 50 0] / 1024;
+
+% Reclaim parameters
+% ------------------
+
+% Minimal reclaim amount (GB)
+cluster = 32*4 / 2**20;
+
+% Reclaim coefficient (think as 0.5^sc->priority)
+alpha = .1
+
+% Simulation parameters
+% ---------------------
+epsilon = 1e-7;
+timeout = 1000;
+
+% Simulation loop
+% ---------------
+
+ch = [];
+eh = [];
+rh = [];
+
+for t = 1:timeout
+ % low_usage
+ u = min(c, n);
+ siblings = sum(u);
+
+ % effective_protection()
+ protected = min(n, c); % start with nominal
+ e = protected * min(1, E / siblings); % normalize overcommit
+
+ % recursive protection
+ unclaimed = max(0, E - siblings);
+ parent_overuse = sum(c) - siblings;
+ if (unclaimed > 0 && parent_overuse > 0)
+ overuse = max(0, c - protected);
+ e += unclaimed * (overuse / parent_overuse);
+ endif
+
+ % get_scan_count()
+ r = alpha * c; % assume all memory is in a single LRU list
+
+ % commit 1bc63fb1272b ("mm, memcg: make scan aggression always exclude protection")
+ sz = max(e, c);
+ r .*= (1 - (e+epsilon) ./ (sz+epsilon));
+
+ % uncomment to debug prints
+ % e, c, r
+
+ % nothing to reclaim, reached equilibrium
+ if max(r) < epsilon
+ break;
+ endif
+
+ % SWAP_CLUSTER_MAX roundup
+ r = max(r, (r > epsilon) .* cluster);
+ % XXX here I do parallel reclaim of all siblings
+ % in reality reclaim is serialized and each sibling recalculates own residual
+ c = max(c - r, 0);
+
+ ch = [ch ; c];
+ eh = [eh ; e];
+ rh = [rh ; r];
+endfor
+
+t
+c, e
diff --git a/tools/testing/selftests/cgroup/test_core.c b/tools/testing/selftests/cgroup/test_core.c
index 3df648c37876..80aa6b2373b9 100644
--- a/tools/testing/selftests/cgroup/test_core.c
+++ b/tools/testing/selftests/cgroup/test_core.c
@@ -1,11 +1,14 @@
/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
#include <linux/limits.h>
+#include <linux/sched.h>
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sched.h>
#include <stdio.h>
#include <errno.h>
#include <signal.h>
@@ -674,6 +677,166 @@ cleanup:
return ret;
}
+/*
+ * cgroup migration permission check should be performed based on the
+ * credentials at the time of open instead of write.
+ */
+static int test_cgcore_lesser_euid_open(const char *root)
+{
+ const uid_t test_euid = TEST_UID;
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ uid_t saved_uid;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ saved_uid = geteuid();
+ if (seteuid(test_euid))
+ goto cleanup;
+
+ cg_test_b_procs_fd = open(cg_test_b_procs, O_RDWR);
+
+ if (seteuid(saved_uid))
+ goto cleanup;
+
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (write(cg_test_b_procs_fd, "0", 1) >= 0 || errno != EACCES)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
+struct lesser_ns_open_thread_arg {
+ const char *path;
+ int fd;
+ int err;
+};
+
+static int lesser_ns_open_thread_fn(void *arg)
+{
+ struct lesser_ns_open_thread_arg *targ = arg;
+
+ targ->fd = open(targ->path, O_RDWR);
+ targ->err = errno;
+ return 0;
+}
+
+/*
+ * cgroup migration permission check should be performed based on the cgroup
+ * namespace at the time of open instead of write.
+ */
+static int test_cgcore_lesser_ns_open(const char *root)
+{
+ static char stack[65536];
+ const uid_t test_euid = 65534; /* usually nobody, any !root is fine */
+ int ret = KSFT_FAIL;
+ char *cg_test_a = NULL, *cg_test_b = NULL;
+ char *cg_test_a_procs = NULL, *cg_test_b_procs = NULL;
+ int cg_test_b_procs_fd = -1;
+ struct lesser_ns_open_thread_arg targ = { .fd = -1 };
+ pid_t pid;
+ int status;
+
+ cg_test_a = cg_name(root, "cg_test_a");
+ cg_test_b = cg_name(root, "cg_test_b");
+
+ if (!cg_test_a || !cg_test_b)
+ goto cleanup;
+
+ cg_test_a_procs = cg_name(cg_test_a, "cgroup.procs");
+ cg_test_b_procs = cg_name(cg_test_b, "cgroup.procs");
+
+ if (!cg_test_a_procs || !cg_test_b_procs)
+ goto cleanup;
+
+ if (cg_create(cg_test_a) || cg_create(cg_test_b))
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_b))
+ goto cleanup;
+
+ if (chown(cg_test_a_procs, test_euid, -1) ||
+ chown(cg_test_b_procs, test_euid, -1))
+ goto cleanup;
+
+ targ.path = cg_test_b_procs;
+ pid = clone(lesser_ns_open_thread_fn, stack + sizeof(stack),
+ CLONE_NEWCGROUP | CLONE_FILES | CLONE_VM | SIGCHLD,
+ &targ);
+ if (pid < 0)
+ goto cleanup;
+
+ if (waitpid(pid, &status, 0) < 0)
+ goto cleanup;
+
+ if (!WIFEXITED(status))
+ goto cleanup;
+
+ cg_test_b_procs_fd = targ.fd;
+ if (cg_test_b_procs_fd < 0)
+ goto cleanup;
+
+ if (cg_enter_current(cg_test_a))
+ goto cleanup;
+
+ if ((status = write(cg_test_b_procs_fd, "0", 1)) >= 0 || errno != ENOENT)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_enter_current(root);
+ if (cg_test_b_procs_fd >= 0)
+ close(cg_test_b_procs_fd);
+ if (cg_test_b)
+ cg_destroy(cg_test_b);
+ if (cg_test_a)
+ cg_destroy(cg_test_a);
+ free(cg_test_b_procs);
+ free(cg_test_a_procs);
+ free(cg_test_b);
+ free(cg_test_a);
+ return ret;
+}
+
#define T(x) { x, #x }
struct corecg_test {
int (*fn)(const char *root);
@@ -689,6 +852,8 @@ struct corecg_test {
T(test_cgcore_proc_migration),
T(test_cgcore_thread_migration),
T(test_cgcore_destroy),
+ T(test_cgcore_lesser_euid_open),
+ T(test_cgcore_lesser_ns_open),
};
#undef T
diff --git a/tools/testing/selftests/cgroup/test_cpu.c b/tools/testing/selftests/cgroup/test_cpu.c
new file mode 100644
index 000000000000..24020a2c68dc
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpu.c
@@ -0,0 +1,726 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <linux/limits.h>
+#include <sys/sysinfo.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+enum hog_clock_type {
+ // Count elapsed time using the CLOCK_PROCESS_CPUTIME_ID clock.
+ CPU_HOG_CLOCK_PROCESS,
+ // Count elapsed time using system wallclock time.
+ CPU_HOG_CLOCK_WALL,
+};
+
+struct cpu_hogger {
+ char *cgroup;
+ pid_t pid;
+ long usage;
+};
+
+struct cpu_hog_func_param {
+ int nprocs;
+ struct timespec ts;
+ enum hog_clock_type clock_type;
+};
+
+/*
+ * This test creates two nested cgroups with and without enabling
+ * the cpu controller.
+ */
+static int test_cpucg_subtree_control(const char *root)
+{
+ char *parent = NULL, *child = NULL, *parent2 = NULL, *child2 = NULL;
+ int ret = KSFT_FAIL;
+
+ // Create two nested cgroups with the cpu controller enabled.
+ parent = cg_name(root, "cpucg_test_0");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ child = cg_name(parent, "cpucg_test_child");
+ if (!child)
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "cpu"))
+ goto cleanup;
+
+ // Create two nested cgroups without enabling the cpu controller.
+ parent2 = cg_name(root, "cpucg_test_1");
+ if (!parent2)
+ goto cleanup;
+
+ if (cg_create(parent2))
+ goto cleanup;
+
+ child2 = cg_name(parent2, "cpucg_test_child");
+ if (!child2)
+ goto cleanup;
+
+ if (cg_create(child2))
+ goto cleanup;
+
+ if (!cg_read_strstr(child2, "cgroup.controllers", "cpu"))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(child);
+ free(child);
+ cg_destroy(child2);
+ free(child2);
+ cg_destroy(parent);
+ free(parent);
+ cg_destroy(parent2);
+ free(parent2);
+
+ return ret;
+}
+
+static void *hog_cpu_thread_func(void *arg)
+{
+ while (1)
+ ;
+
+ return NULL;
+}
+
+static struct timespec
+timespec_sub(const struct timespec *lhs, const struct timespec *rhs)
+{
+ struct timespec zero = {
+ .tv_sec = 0,
+ .tv_nsec = 0,
+ };
+ struct timespec ret;
+
+ if (lhs->tv_sec < rhs->tv_sec)
+ return zero;
+
+ ret.tv_sec = lhs->tv_sec - rhs->tv_sec;
+
+ if (lhs->tv_nsec < rhs->tv_nsec) {
+ if (ret.tv_sec == 0)
+ return zero;
+
+ ret.tv_sec--;
+ ret.tv_nsec = NSEC_PER_SEC - rhs->tv_nsec + lhs->tv_nsec;
+ } else
+ ret.tv_nsec = lhs->tv_nsec - rhs->tv_nsec;
+
+ return ret;
+}
+
+static int hog_cpus_timed(const char *cgroup, void *arg)
+{
+ const struct cpu_hog_func_param *param =
+ (struct cpu_hog_func_param *)arg;
+ struct timespec ts_run = param->ts;
+ struct timespec ts_remaining = ts_run;
+ struct timespec ts_start;
+ int i, ret;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts_start);
+ if (ret != 0)
+ return ret;
+
+ for (i = 0; i < param->nprocs; i++) {
+ pthread_t tid;
+
+ ret = pthread_create(&tid, NULL, &hog_cpu_thread_func, NULL);
+ if (ret != 0)
+ return ret;
+ }
+
+ while (ts_remaining.tv_sec > 0 || ts_remaining.tv_nsec > 0) {
+ struct timespec ts_total;
+
+ ret = nanosleep(&ts_remaining, NULL);
+ if (ret && errno != EINTR)
+ return ret;
+
+ if (param->clock_type == CPU_HOG_CLOCK_PROCESS) {
+ ret = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts_total);
+ if (ret != 0)
+ return ret;
+ } else {
+ struct timespec ts_current;
+
+ ret = clock_gettime(CLOCK_MONOTONIC, &ts_current);
+ if (ret != 0)
+ return ret;
+
+ ts_total = timespec_sub(&ts_current, &ts_start);
+ }
+
+ ts_remaining = timespec_sub(&ts_run, &ts_total);
+ }
+
+ return 0;
+}
+
+/*
+ * Creates a cpu cgroup, burns a CPU for a few quanta, and verifies that
+ * cpu.stat shows the expected output.
+ */
+static int test_cpucg_stats(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long usage_usec, user_usec, system_usec;
+ long usage_seconds = 2;
+ long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ char *cpucg;
+
+ cpucg = cg_name(root, "cpucg_test");
+ if (!cpucg)
+ goto cleanup;
+
+ if (cg_create(cpucg))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ system_usec = cg_read_key_long(cpucg, "cpu.stat", "system_usec");
+ if (usage_usec != 0 || user_usec != 0 || system_usec != 0)
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_PROCESS,
+ };
+ if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ if (user_usec <= 0)
+ goto cleanup;
+
+ if (!values_close(usage_usec, expected_usage_usec, 1))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(cpucg);
+ free(cpucg);
+
+ return ret;
+}
+
+static int
+run_cpucg_weight_test(
+ const char *root,
+ pid_t (*spawn_child)(const struct cpu_hogger *child),
+ int (*validate)(const struct cpu_hogger *children, int num_children))
+{
+ int ret = KSFT_FAIL, i;
+ char *parent = NULL;
+ struct cpu_hogger children[3] = {NULL};
+
+ parent = cg_name(root, "cpucg_test_0");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ children[i].cgroup = cg_name_indexed(parent, "cpucg_child", i);
+ if (!children[i].cgroup)
+ goto cleanup;
+
+ if (cg_create(children[i].cgroup))
+ goto cleanup;
+
+ if (cg_write_numeric(children[i].cgroup, "cpu.weight",
+ 50 * (i + 1)))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ pid_t pid = spawn_child(&children[i]);
+ if (pid <= 0)
+ goto cleanup;
+ children[i].pid = pid;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int retcode;
+
+ waitpid(children[i].pid, &retcode, 0);
+ if (!WIFEXITED(retcode))
+ goto cleanup;
+ if (WEXITSTATUS(retcode))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(children); i++)
+ children[i].usage = cg_read_key_long(children[i].cgroup,
+ "cpu.stat", "usage_usec");
+
+ if (validate(children, ARRAY_SIZE(children)))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ for (i = 0; i < ARRAY_SIZE(children); i++) {
+ cg_destroy(children[i].cgroup);
+ free(children[i].cgroup);
+ }
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+static pid_t weight_hog_ncpus(const struct cpu_hogger *child, int ncpus)
+{
+ long usage_seconds = 10;
+ struct cpu_hog_func_param param = {
+ .nprocs = ncpus,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ return cg_run_nowait(child->cgroup, hog_cpus_timed, (void *)&param);
+}
+
+static pid_t weight_hog_all_cpus(const struct cpu_hogger *child)
+{
+ return weight_hog_ncpus(child, get_nprocs());
+}
+
+static int
+overprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+ int ret = KSFT_FAIL, i;
+
+ for (i = 0; i < num_children - 1; i++) {
+ long delta;
+
+ if (children[i + 1].usage <= children[i].usage)
+ goto cleanup;
+
+ delta = children[i + 1].usage - children[i].usage;
+ if (!values_close(delta, children[0].usage, 35))
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 50
+ * A/C cpu.weight = 100
+ * A/D cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns as
+ * many threads as there are cores, and hogs each CPU as much as possible
+ * for some time interval.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * was given proportional runtime as informed by their cpu.weight.
+ */
+static int test_cpucg_weight_overprovisioned(const char *root)
+{
+ return run_cpucg_weight_test(root, weight_hog_all_cpus,
+ overprovision_validate);
+}
+
+static pid_t weight_hog_one_cpu(const struct cpu_hogger *child)
+{
+ return weight_hog_ncpus(child, 1);
+}
+
+static int
+underprovision_validate(const struct cpu_hogger *children, int num_children)
+{
+ int ret = KSFT_FAIL, i;
+
+ for (i = 0; i < num_children - 1; i++) {
+ if (!values_close(children[i + 1].usage, children[0].usage, 15))
+ goto cleanup;
+ }
+
+ ret = KSFT_PASS;
+cleanup:
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 50
+ * A/C cpu.weight = 100
+ * A/D cpu.weight = 150
+ *
+ * A separate process is then created for each child cgroup which spawns a
+ * single thread that hogs a CPU. The testcase is only run on systems that
+ * have at least one core per-thread in the child processes.
+ *
+ * Once all of the children have exited, we verify that each child cgroup
+ * had roughly the same runtime despite having different cpu.weight.
+ */
+static int test_cpucg_weight_underprovisioned(const char *root)
+{
+ // Only run the test if there are enough cores to avoid overprovisioning
+ // the system.
+ if (get_nprocs() < 4)
+ return KSFT_SKIP;
+
+ return run_cpucg_weight_test(root, weight_hog_one_cpu,
+ underprovision_validate);
+}
+
+static int
+run_cpucg_nested_weight_test(const char *root, bool overprovisioned)
+{
+ int ret = KSFT_FAIL, i;
+ char *parent = NULL, *child = NULL;
+ struct cpu_hogger leaf[3] = {NULL};
+ long nested_leaf_usage, child_usage;
+ int nprocs = get_nprocs();
+
+ if (!overprovisioned) {
+ if (nprocs < 4)
+ /*
+ * Only run the test if there are enough cores to avoid overprovisioning
+ * the system.
+ */
+ return KSFT_SKIP;
+ nprocs /= 4;
+ }
+
+ parent = cg_name(root, "cpucg_test");
+ child = cg_name(parent, "cpucg_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+ if (cg_write(child, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+ if (cg_write(child, "cpu.weight", "1000"))
+ goto cleanup;
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ const char *ancestor;
+ long weight;
+
+ if (i == 0) {
+ ancestor = parent;
+ weight = 1000;
+ } else {
+ ancestor = child;
+ weight = 5000;
+ }
+ leaf[i].cgroup = cg_name_indexed(ancestor, "cpucg_leaf", i);
+ if (!leaf[i].cgroup)
+ goto cleanup;
+
+ if (cg_create(leaf[i].cgroup))
+ goto cleanup;
+
+ if (cg_write_numeric(leaf[i].cgroup, "cpu.weight", weight))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ pid_t pid;
+ struct cpu_hog_func_param param = {
+ .nprocs = nprocs,
+ .ts = {
+ .tv_sec = 10,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+
+ pid = cg_run_nowait(leaf[i].cgroup, hog_cpus_timed,
+ (void *)&param);
+ if (pid <= 0)
+ goto cleanup;
+ leaf[i].pid = pid;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ int retcode;
+
+ waitpid(leaf[i].pid, &retcode, 0);
+ if (!WIFEXITED(retcode))
+ goto cleanup;
+ if (WEXITSTATUS(retcode))
+ goto cleanup;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ leaf[i].usage = cg_read_key_long(leaf[i].cgroup,
+ "cpu.stat", "usage_usec");
+ if (leaf[i].usage <= 0)
+ goto cleanup;
+ }
+
+ nested_leaf_usage = leaf[1].usage + leaf[2].usage;
+ if (overprovisioned) {
+ if (!values_close(leaf[0].usage, nested_leaf_usage, 15))
+ goto cleanup;
+ } else if (!values_close(leaf[0].usage * 2, nested_leaf_usage, 15))
+ goto cleanup;
+
+
+ child_usage = cg_read_key_long(child, "cpu.stat", "usage_usec");
+ if (child_usage <= 0)
+ goto cleanup;
+ if (!values_close(child_usage, nested_leaf_usage, 1))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ for (i = 0; i < ARRAY_SIZE(leaf); i++) {
+ cg_destroy(leaf[i].cgroup);
+ free(leaf[i].cgroup);
+ }
+ cg_destroy(child);
+ free(child);
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 1000
+ * A/C cpu.weight = 1000
+ * A/C/D cpu.weight = 5000
+ * A/C/E cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which spawn nproc threads
+ * that burn a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_overprovisioned(const char *root)
+{
+ return run_cpucg_nested_weight_test(root, true);
+}
+
+/*
+ * First, this test creates the following hierarchy:
+ * A
+ * A/B cpu.weight = 1000
+ * A/C cpu.weight = 1000
+ * A/C/D cpu.weight = 5000
+ * A/C/E cpu.weight = 5000
+ *
+ * A separate process is then created for each leaf, which nproc / 4 threads
+ * that burns a CPU for a few seconds.
+ *
+ * Once all of those processes have exited, we verify that each of the leaf
+ * cgroups have roughly the same usage from cpu.stat.
+ */
+static int
+test_cpucg_nested_weight_underprovisioned(const char *root)
+{
+ return run_cpucg_nested_weight_test(root, false);
+}
+
+/*
+ * This test creates a cgroup with some maximum value within a period, and
+ * verifies that a process in the cgroup is not overscheduled.
+ */
+static int test_cpucg_max(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long usage_usec, user_usec;
+ long usage_seconds = 1;
+ long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ char *cpucg;
+
+ cpucg = cg_name(root, "cpucg_test");
+ if (!cpucg)
+ goto cleanup;
+
+ if (cg_create(cpucg))
+ goto cleanup;
+
+ if (cg_write(cpucg, "cpu.max", "1000"))
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ if (cg_run(cpucg, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(cpucg, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(cpucg, "cpu.stat", "user_usec");
+ if (user_usec <= 0)
+ goto cleanup;
+
+ if (user_usec >= expected_usage_usec)
+ goto cleanup;
+
+ if (values_close(usage_usec, expected_usage_usec, 95))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(cpucg);
+ free(cpucg);
+
+ return ret;
+}
+
+/*
+ * This test verifies that a process inside of a nested cgroup whose parent
+ * group has a cpu.max value set, is properly throttled.
+ */
+static int test_cpucg_max_nested(const char *root)
+{
+ int ret = KSFT_FAIL;
+ long usage_usec, user_usec;
+ long usage_seconds = 1;
+ long expected_usage_usec = usage_seconds * USEC_PER_SEC;
+ char *parent, *child;
+
+ parent = cg_name(root, "cpucg_parent");
+ child = cg_name(parent, "cpucg_child");
+ if (!parent || !child)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpu"))
+ goto cleanup;
+
+ if (cg_create(child))
+ goto cleanup;
+
+ if (cg_write(parent, "cpu.max", "1000"))
+ goto cleanup;
+
+ struct cpu_hog_func_param param = {
+ .nprocs = 1,
+ .ts = {
+ .tv_sec = usage_seconds,
+ .tv_nsec = 0,
+ },
+ .clock_type = CPU_HOG_CLOCK_WALL,
+ };
+ if (cg_run(child, hog_cpus_timed, (void *)&param))
+ goto cleanup;
+
+ usage_usec = cg_read_key_long(child, "cpu.stat", "usage_usec");
+ user_usec = cg_read_key_long(child, "cpu.stat", "user_usec");
+ if (user_usec <= 0)
+ goto cleanup;
+
+ if (user_usec >= expected_usage_usec)
+ goto cleanup;
+
+ if (values_close(usage_usec, expected_usage_usec, 95))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ cg_destroy(child);
+ free(child);
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cpucg_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cpucg_subtree_control),
+ T(test_cpucg_stats),
+ T(test_cpucg_weight_overprovisioned),
+ T(test_cpucg_weight_underprovisioned),
+ T(test_cpucg_nested_weight_overprovisioned),
+ T(test_cpucg_nested_weight_underprovisioned),
+ T(test_cpucg_max),
+ T(test_cpucg_max_nested),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "cpu"))
+ if (cg_write(root, "cgroup.subtree_control", "+cpu"))
+ ksft_exit_skip("Failed to set cpu controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_cpuset.c b/tools/testing/selftests/cgroup/test_cpuset.c
new file mode 100644
index 000000000000..b061ed1e05b4
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/limits.h>
+#include <signal.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+static int idle_process_fn(const char *cgroup, void *arg)
+{
+ (void)pause();
+ return 0;
+}
+
+static int do_migration_fn(const char *cgroup, void *arg)
+{
+ int object_pid = (int)(size_t)arg;
+
+ if (setuid(TEST_UID))
+ return EXIT_FAILURE;
+
+ // XXX checking /proc/$pid/cgroup would be quicker than wait
+ if (cg_enter(cgroup, object_pid) ||
+ cg_wait_for_proc_count(cgroup, 1))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+static int do_controller_fn(const char *cgroup, void *arg)
+{
+ const char *child = cgroup;
+ const char *parent = arg;
+
+ if (setuid(TEST_UID))
+ return EXIT_FAILURE;
+
+ if (!cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ if (cg_write(parent, "cgroup.subtree_control", "-cpuset"))
+ return EXIT_FAILURE;
+
+ if (!cg_read_strstr(child, "cgroup.controllers", "cpuset"))
+ return EXIT_FAILURE;
+
+ return EXIT_SUCCESS;
+}
+
+/*
+ * Migrate a process between two sibling cgroups.
+ * The success should only depend on the parent cgroup permissions and not the
+ * migrated process itself (cpuset controller is in place because it uses
+ * security_task_setscheduler() in cgroup v1).
+ *
+ * Deliberately don't set cpuset.cpus in children to avoid definining migration
+ * permissions between two different cpusets.
+ */
+static int test_cpuset_perms_object(const char *root, bool allow)
+{
+ char *parent = NULL, *child_src = NULL, *child_dst = NULL;
+ char *parent_procs = NULL, *child_src_procs = NULL, *child_dst_procs = NULL;
+ const uid_t test_euid = TEST_UID;
+ int object_pid = 0;
+ int ret = KSFT_FAIL;
+
+ parent = cg_name(root, "cpuset_test_0");
+ if (!parent)
+ goto cleanup;
+ parent_procs = cg_name(parent, "cgroup.procs");
+ if (!parent_procs)
+ goto cleanup;
+ if (cg_create(parent))
+ goto cleanup;
+
+ child_src = cg_name(parent, "cpuset_test_1");
+ if (!child_src)
+ goto cleanup;
+ child_src_procs = cg_name(child_src, "cgroup.procs");
+ if (!child_src_procs)
+ goto cleanup;
+ if (cg_create(child_src))
+ goto cleanup;
+
+ child_dst = cg_name(parent, "cpuset_test_2");
+ if (!child_dst)
+ goto cleanup;
+ child_dst_procs = cg_name(child_dst, "cgroup.procs");
+ if (!child_dst_procs)
+ goto cleanup;
+ if (cg_create(child_dst))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+cpuset"))
+ goto cleanup;
+
+ if (cg_read_strstr(child_src, "cgroup.controllers", "cpuset") ||
+ cg_read_strstr(child_dst, "cgroup.controllers", "cpuset"))
+ goto cleanup;
+
+ /* Enable permissions along src->dst tree path */
+ if (chown(child_src_procs, test_euid, -1) ||
+ chown(child_dst_procs, test_euid, -1))
+ goto cleanup;
+
+ if (allow && chown(parent_procs, test_euid, -1))
+ goto cleanup;
+
+ /* Fork a privileged child as a test object */
+ object_pid = cg_run_nowait(child_src, idle_process_fn, NULL);
+ if (object_pid < 0)
+ goto cleanup;
+
+ /* Carry out migration in a child process that can drop all privileges
+ * (including capabilities), the main process must remain privileged for
+ * cleanup.
+ * Child process's cgroup is irrelevant but we place it into child_dst
+ * as hacky way to pass information about migration target to the child.
+ */
+ if (allow ^ (cg_run(child_dst, do_migration_fn, (void *)(size_t)object_pid) == EXIT_SUCCESS))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (object_pid > 0) {
+ (void)kill(object_pid, SIGTERM);
+ (void)clone_reap(object_pid, WEXITED);
+ }
+
+ cg_destroy(child_dst);
+ free(child_dst_procs);
+ free(child_dst);
+
+ cg_destroy(child_src);
+ free(child_src_procs);
+ free(child_src);
+
+ cg_destroy(parent);
+ free(parent_procs);
+ free(parent);
+
+ return ret;
+}
+
+static int test_cpuset_perms_object_allow(const char *root)
+{
+ return test_cpuset_perms_object(root, true);
+}
+
+static int test_cpuset_perms_object_deny(const char *root)
+{
+ return test_cpuset_perms_object(root, false);
+}
+
+/*
+ * Migrate a process between parent and child implicitely
+ * Implicit migration happens when a controller is enabled/disabled.
+ *
+ */
+static int test_cpuset_perms_subtree(const char *root)
+{
+ char *parent = NULL, *child = NULL;
+ char *parent_procs = NULL, *parent_subctl = NULL, *child_procs = NULL;
+ const uid_t test_euid = TEST_UID;
+ int object_pid = 0;
+ int ret = KSFT_FAIL;
+
+ parent = cg_name(root, "cpuset_test_0");
+ if (!parent)
+ goto cleanup;
+ parent_procs = cg_name(parent, "cgroup.procs");
+ if (!parent_procs)
+ goto cleanup;
+ parent_subctl = cg_name(parent, "cgroup.subtree_control");
+ if (!parent_subctl)
+ goto cleanup;
+ if (cg_create(parent))
+ goto cleanup;
+
+ child = cg_name(parent, "cpuset_test_1");
+ if (!child)
+ goto cleanup;
+ child_procs = cg_name(child, "cgroup.procs");
+ if (!child_procs)
+ goto cleanup;
+ if (cg_create(child))
+ goto cleanup;
+
+ /* Enable permissions as in a delegated subtree */
+ if (chown(parent_procs, test_euid, -1) ||
+ chown(parent_subctl, test_euid, -1) ||
+ chown(child_procs, test_euid, -1))
+ goto cleanup;
+
+ /* Put a privileged child in the subtree and modify controller state
+ * from an unprivileged process, the main process remains privileged
+ * for cleanup.
+ * The unprivileged child runs in subtree too to avoid parent and
+ * internal-node constraing violation.
+ */
+ object_pid = cg_run_nowait(child, idle_process_fn, NULL);
+ if (object_pid < 0)
+ goto cleanup;
+
+ if (cg_run(child, do_controller_fn, parent) != EXIT_SUCCESS)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (object_pid > 0) {
+ (void)kill(object_pid, SIGTERM);
+ (void)clone_reap(object_pid, WEXITED);
+ }
+
+ cg_destroy(child);
+ free(child_procs);
+ free(child);
+
+ cg_destroy(parent);
+ free(parent_subctl);
+ free(parent_procs);
+ free(parent);
+
+ return ret;
+}
+
+
+#define T(x) { x, #x }
+struct cpuset_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cpuset_perms_object_allow),
+ T(test_cpuset_perms_object_deny),
+ T(test_cpuset_perms_subtree),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "cpuset"))
+ if (cg_write(root, "cgroup.subtree_control", "+cpuset"))
+ ksft_exit_skip("Failed to set cpuset controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
new file mode 100755
index 000000000000..b5eb1be2248c
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -0,0 +1,936 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for cpuset v2 partition root state (PRS)
+#
+# The sched verbose flag can be optionally set so that the console log
+# can be examined for the correct setting of scheduling domain.
+#
+
+skip_test() {
+ echo "$1"
+ echo "Test SKIPPED"
+ exit 4 # ksft_skip
+}
+
+[[ $(id -u) -eq 0 ]] || skip_test "Test must be run as root!"
+
+
+# Get wait_inotify location
+WAIT_INOTIFY=$(cd $(dirname $0); pwd)/wait_inotify
+
+# Find cgroup v2 mount point
+CGROUP2=$(mount -t cgroup2 | head -1 | awk -e '{print $3}')
+[[ -n "$CGROUP2" ]] || skip_test "Cgroup v2 mount point not found!"
+SUBPARTS_CPUS=$CGROUP2/.__DEBUG__.cpuset.cpus.subpartitions
+CPULIST=$(cat $CGROUP2/cpuset.cpus.effective)
+
+NR_CPUS=$(lscpu | grep "^CPU(s):" | sed -e "s/.*:[[:space:]]*//")
+[[ $NR_CPUS -lt 8 ]] && skip_test "Test needs at least 8 cpus available!"
+
+# Set verbose flag and delay factor
+PROG=$1
+VERBOSE=0
+DELAY_FACTOR=1
+SCHED_DEBUG=
+while [[ "$1" = -* ]]
+do
+ case "$1" in
+ -v) ((VERBOSE++))
+ # Enable sched/verbose can slow thing down
+ [[ $DELAY_FACTOR -eq 1 ]] &&
+ DELAY_FACTOR=2
+ ;;
+ -d) DELAY_FACTOR=$2
+ shift
+ ;;
+ *) echo "Usage: $PROG [-v] [-d <delay-factor>"
+ exit
+ ;;
+ esac
+ shift
+done
+
+# Set sched verbose flag if available when "-v" option is specified
+if [[ $VERBOSE -gt 0 && -d /sys/kernel/debug/sched ]]
+then
+ # Used to restore the original setting during cleanup
+ SCHED_DEBUG=$(cat /sys/kernel/debug/sched/verbose)
+ echo Y > /sys/kernel/debug/sched/verbose
+fi
+
+cd $CGROUP2
+echo +cpuset > cgroup.subtree_control
+
+#
+# If cpuset has been set up and used in child cgroups, we may not be able to
+# create partition under root cgroup because of the CPU exclusivity rule.
+# So we are going to skip the test if this is the case.
+#
+[[ -d test ]] || mkdir test
+echo 0-6 > test/cpuset.cpus
+echo root > test/cpuset.cpus.partition
+cat test/cpuset.cpus.partition | grep -q invalid
+RESULT=$?
+echo member > test/cpuset.cpus.partition
+echo "" > test/cpuset.cpus
+[[ $RESULT -eq 0 ]] && skip_test "Child cgroups are using cpuset!"
+
+cleanup()
+{
+ online_cpus
+ cd $CGROUP2
+ rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
+ rmdir test > /dev/null 2>&1
+ [[ -n "$SCHED_DEBUG" ]] &&
+ echo "$SCHED_DEBUG" > /sys/kernel/debug/sched/verbose
+}
+
+# Pause in ms
+pause()
+{
+ DELAY=$1
+ LOOP=0
+ while [[ $LOOP -lt $DELAY_FACTOR ]]
+ do
+ sleep $DELAY
+ ((LOOP++))
+ done
+ return 0
+}
+
+console_msg()
+{
+ MSG=$1
+ echo "$MSG"
+ echo "" > /dev/console
+ echo "$MSG" > /dev/console
+ pause 0.01
+}
+
+test_partition()
+{
+ EXPECTED_VAL=$1
+ echo $EXPECTED_VAL > cpuset.cpus.partition
+ [[ $? -eq 0 ]] || exit 1
+ ACTUAL_VAL=$(cat cpuset.cpus.partition)
+ [[ $ACTUAL_VAL != $EXPECTED_VAL ]] && {
+ echo "cpuset.cpus.partition: expect $EXPECTED_VAL, found $ACTUAL_VAL"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+test_effective_cpus()
+{
+ EXPECTED_VAL=$1
+ ACTUAL_VAL=$(cat cpuset.cpus.effective)
+ [[ "$ACTUAL_VAL" != "$EXPECTED_VAL" ]] && {
+ echo "cpuset.cpus.effective: expect '$EXPECTED_VAL', found '$ACTUAL_VAL'"
+ echo "Test FAILED"
+ exit 1
+ }
+}
+
+# Adding current process to cgroup.procs as a test
+test_add_proc()
+{
+ OUTSTR="$1"
+ ERRMSG=$((echo $$ > cgroup.procs) |& cat)
+ echo $ERRMSG | grep -q "$OUTSTR"
+ [[ $? -ne 0 ]] && {
+ echo "cgroup.procs: expect '$OUTSTR', got '$ERRMSG'"
+ echo "Test FAILED"
+ exit 1
+ }
+ echo $$ > $CGROUP2/cgroup.procs # Move out the task
+}
+
+#
+# Cpuset controller state transition test matrix.
+#
+# Cgroup test hierarchy
+#
+# root -- A1 -- A2 -- A3
+# +- B1
+#
+# P<v> = set cpus.partition (0:member, 1:root, 2:isolated)
+# C<l> = add cpu-list to cpuset.cpus
+# X<l> = add cpu-list to cpuset.cpus.exclusive
+# S<p> = use prefix in subtree_control
+# T = put a task into cgroup
+# O<c>=<v> = Write <v> to CPU online file of <c>
+#
+SETUP_A123_PARTITIONS="C1-3:P1:S+ C2-3:P1:S+ C3:P1"
+TEST_MATRIX=(
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ " C0-1 . . C2-3 S+ C4-5 . . 0 A2:0-1"
+ " C0-1 . . C2-3 P1 . . . 0 "
+ " C0-1 . . C2-3 P1:S+ C0-1:P1 . . 0 "
+ " C0-1 . . C2-3 P1:S+ C1:P1 . . 0 "
+ " C0-1:S+ . . C2-3 . . . P1 0 "
+ " C0-1:P1 . . C2-3 S+ C1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . . 0 "
+ " C0-1:P1 . . C2-3 S+ C1:P1 . P1 0 "
+ " C0-1:P1 . . C2-3 C4-5 . . . 0 A1:4-5"
+ " C0-1:P1 . . C2-3 S+:C4-5 . . . 0 A1:4-5"
+ " C0-1 . . C2-3:P1 . . . C2 0 "
+ " C0-1 . . C2-3:P1 . . . C4-5 0 B1:4-5"
+ "C0-3:P1:S+ C2-3:P1 . . . . . . 0 A1:0-1,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . C1-3 . . . 0 A1:1,A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . C3 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . C3 P0 . . 0 A1:3,A2:3 A1:P1,A2:P0"
+ "C2-3:P1:S+ C2:P1 . . C2-4 . . . 0 A1:3-4,A2:2"
+ "C2-3:P1:S+ C3:P1 . . C3 . . C0-2 0 A1:,B1:0-2 A1:P1,A2:P1"
+ "$SETUP_A123_PARTITIONS . C2-3 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+
+ # CPU offlining cases:
+ " C0-1 . . C2-3 S+ C4-5 . O2=0 0 A1:0-1,B1:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 . . . 0 A1:0-1,A2:3"
+ "C0-3:P1:S+ C2-3:P1 . . O2=0 O2=1 . . 0 A1:0-1,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 . . . 0 A1:0,A2:2-3"
+ "C0-3:P1:S+ C2-3:P1 . . O1=0 O1=1 . . 0 A1:0-1,A2:2-3"
+ "C2-3:P1:S+ C3:P1 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O3=0 O3=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P2 . . O2=0 O2=1 . . 0 A1:2,A2:3 A1:P1,A2:P2"
+ "C2-3:P1:S+ C3:P1 . . O2=0 . . . 0 A1:,A2:3 A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . O3=0 . . . 0 A1:2,A2: A1:P1,A2:P1"
+ "C2-3:P1:S+ C3:P1 . . T:O2=0 . . . 0 A1:3,A2:3 A1:P1,A2:P-1"
+ "C2-3:P1:S+ C3:P1 . . . T:O3=0 . . 0 A1:2,A2:2 A1:P1,A2:P-1"
+ "$SETUP_A123_PARTITIONS . O1=0 . . . 0 A1:,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . O2=0 . . . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . O3=0 . . . 0 A1:1,A2:2,A3: A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 . . 0 A1:1,A2:3,A3:3 A1:P1,A2:P1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 . 0 A1:1,A2:2,A3:2 A1:P1,A2:P1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O1=1 . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . . T:O2=0 O2=1 . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . . . T:O3=0 O3=1 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O1=1 . 0 A1:1,A2:,A3:3 A1:P1,A2:P1,A3:P1"
+ "$SETUP_A123_PARTITIONS . T:O1=0 O2=0 O2=1 . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ #
+ # Remote partition and cpuset.cpus.exclusive tests
+ #
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 . . . 0 A1:0-3,A2:1-3,A3:2-3,XA1:2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3:P2 . . 0 A1:0-1,A2:2-3,A3:2-3 A1:P0,A2:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X3:P2 . . 0 A1:0-2,A2:3,A3:3 A1:P0,A2:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2 . 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:C3 . 0 A1:0-2,A2:1-2,A3:3 A1:P0,A3:P2 3"
+ " C0-3:S+ C1-3:S+ C2-3 C2-3 . . . P2 0 A1:0-3,A2:1-3,A3:2-3,B1:2-3 A1:P0,A3:P0,B1:P-2"
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 . . . P2 0 B1:4-5 B1:P2 4-5"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2:C1-3 P2 0 A3:2-3,B1:4 A3:P2,B1:P2 2-4"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X1-3 X1-3:P2 P2 . 0 A2:1,A3:2-3 A2:P2,A3:P2 1-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3 X2-3:P2 P2:C4-5 0 A3:2-3,B1:4-5 A3:P2,B1:P2 2-5"
+
+ # Nested remote/local partition tests
+ " C0-3:S+ C1-3:S+ C2-3 C4-5 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4-5 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:,A3:2-3,B1:4 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-4,2-3"
+ " C0-3:S+ C1-3:S+ C3 C4 X2-3 X2-3:P1 P2 P1 0 A1:0-1,A2:2,A3:3,B1:4 \
+ A1:P0,A2:P1,A3:P2,B1:P1 2-4,3"
+ " C0-4:S+ C1-4:S+ C2-4 . X2-4 X2-4:P2 X4:P1 . 0 A1:0-1,A2:2-3,A3:4 \
+ A1:P0,A2:P2,A3:P1 2-4,2-3"
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . X5 . . 0 A1:0-4,A2:1-4,A3:2-4 \
+ A1:P0,A2:P-2,A3:P-1"
+ " C0-4:X2-4:S+ C1-4:X2-4:S+:P2 C2-4:X4:P1 \
+ . . . X1 . 0 A1:0-1,A2:2-4,A3:2-4 \
+ A1:P0,A2:P2,A3:P-1 2-4"
+
+ # Remote partition offline tests
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 . 0 A1:0-1,A2:1,A3:3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C2-3 . X2-3 X2-3 X2-3:P2:O2=0 O2=1 0 A1:0-1,A2:1,A3:2-3 A1:P0,A3:P2 2-3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 P2:O3=0 . 0 A1:0-2,A2:1-2,A3: A1:P0,A3:P2 3"
+ " C0-3:S+ C1-3:S+ C3 . X2-3 X2-3 T:P2:O3=0 . 0 A1:0-2,A2:1-2,A3:1-2 A1:P0,A3:P-2 3,"
+
+ # An invalidated remote partition cannot self-recover from hotplug
+ " C0-3:S+ C1-3:S+ C2 . X2-3 X2-3 T:P2:O2=0 O2=1 0 A1:0-3,A2:1-3,A3:2 A1:P0,A3:P-2"
+
+ # cpus.exclusive.effective clearing test
+ " C0-3:S+ C1-3:S+ C2 . X2-3:X . . . 0 A1:0-3,A2:1-3,A3:2,XA1:"
+
+ # Invalid to valid remote partition transition test
+ " C0-3:S+ C1-3 . . . X3:P2 . . 0 A1:0-3,A2:1-3,XA2: A2:P-2"
+ " C0-3:S+ C1-3:X3:P2
+ . . X2-3 P2 . . 0 A1:0-2,A2:3,XA2:3 A2:P2 3"
+
+ # Invalid to valid local partition direct transition tests
+ " C1-3:S+:P2 C2-3:X1:P2 . . . . . . 0 A1:1-3,XA1:1-3,A2:2-3:XA2: A1:P2,A2:P-2 1-3"
+ " C1-3:S+:P2 C2-3:X1:P2 . . . X3:P2 . . 0 A1:1-2,XA1:1-3,A2:3:XA2:3 A1:P2,A2:P2 1-3"
+ " C0-3:P2 . . C4-6 C0-4 . . . 0 A1:0-4,B1:4-6 A1:P-2,B1:P0"
+ " C0-3:P2 . . C4-6 C0-4:C0-3 . . . 0 A1:0-3,B1:4-6 A1:P2,B1:P0 0-3"
+ " C0-3:P2 . . C3-5:C4-5 . . . . 0 A1:0-3,B1:4-5 A1:P2,B1:P0 0-3"
+
+ # Local partition invalidation tests
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . . . . 0 A1:1,A2:2,A3:3 A1:P2,A2:P2,A3:P2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . X4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ " C0-3:X1-3:S+:P2 C1-3:X2-3:S+:P2 C2-3:X3:P2 \
+ . . C4 . . 0 A1:1-3,A2:1-3,A3:2-3,XA2:,XA3: A1:P2,A2:P-2,A3:P-2 1-3"
+ # Local partition CPU change tests
+ " C0-5:S+:P2 C4-5:S+:P1 . . . C3-5 . . 0 A1:0-2,A2:3-5 A1:P2,A2:P1 0-2"
+ " C0-5:S+:P2 C4-5:S+:P1 . . C1-5 . . . 0 A1:1-3,A2:4-5 A1:P2,A2:P1 1-3"
+
+ # cpus_allowed/exclusive_cpus update tests
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . C4 . P2 . 0 A1:4,A2:4,XA2:,XA3:,A3:4 \
+ A1:P0,A3:P-2"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . X1 . P2 . 0 A1:0-3,A2:1-3,XA1:1,XA2:,XA3:,A3:2-3 \
+ A1:P0,A3:P-2"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . . C3 P2 . 0 A1:0-2,A2:0-2,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3 \
+ . . X3 P2 . 0 A1:0-2,A2:1-2,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P2 3"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . . X3 . . 0 A1:0-3,A2:1-3,XA2:3,XA3:3,A3:2-3 \
+ A1:P0,A3:P-2"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . . C3 . . 0 A1:0-3,A2:3,XA2:3,XA3:3,A3:3 \
+ A1:P0,A3:P-2"
+ " C0-3:X2-3:S+ C1-3:X2-3:S+ C2-3:X2-3:P2 \
+ . C4 . . . 0 A1:4,A2:4,A3:4,XA1:,XA2:,XA3 \
+ A1:P0,A3:P-2"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ #
+ # Incorrect change to cpuset.cpus invalidates partition root
+ #
+ # Adding CPUs to partition root that are not in parent's
+ # cpuset.cpus is allowed, but those extra CPUs are ignored.
+ "C2-3:P1:S+ C3:P1 . . . C2-4 . . 0 A1:,A2:2-3 A1:P1,A2:P1"
+
+ # Taking away all CPUs from parent or itself if there are tasks
+ # will make the partition invalid.
+ "C2-3:P1:S+ C3:P1 . . T C2-3 . . 0 A1:2-3,A2:2-3 A1:P1,A2:P-1"
+ " C3:P1:S+ C3 . . T P1 . . 0 A1:3,A2:3 A1:P1,A2:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3 . . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P-1,A3:P-1"
+ "$SETUP_A123_PARTITIONS . T:C2-3:C1-3 . . . 0 A1:1,A2:2,A3:3 A1:P1,A2:P1,A3:P1"
+
+ # Changing a partition root to member makes child partitions invalid
+ "C2-3:P1:S+ C3:P1 . . P0 . . . 0 A1:2-3,A2:3 A1:P0,A2:P-1"
+ "$SETUP_A123_PARTITIONS . C2-3 P0 . . 0 A1:2-3,A2:2-3,A3:3 A1:P1,A2:P0,A3:P-1"
+
+ # cpuset.cpus can contains cpus not in parent's cpuset.cpus as long
+ # as they overlap.
+ "C2-3:P1:S+ . . . . C3-4:P1 . . 0 A1:2,A2:3 A1:P1,A2:P1"
+
+ # Deletion of CPUs distributed to child cgroup is allowed.
+ "C0-1:P1:S+ C1 . C2-3 C4-5 . . . 0 A1:4-5,A2:4-5"
+
+ # To become a valid partition root, cpuset.cpus must overlap parent's
+ # cpuset.cpus.
+ " C0-1:P1 . . C2-3 S+ C4-5:P1 . . 0 A1:0-1,A2:0-1 A1:P1,A2:P-1"
+
+ # Enabling partition with child cpusets is allowed
+ " C0-1:S+ C1 . C2-3 P1 . . . 0 A1:0-1,A2:1 A1:P1"
+
+ # A partition root with non-partition root parent is invalid, but it
+ # can be made valid if its parent becomes a partition root too.
+ " C0-1:S+ C1 . C2-3 . P2 . . 0 A1:0-1,A2:1 A1:P0,A2:P-2"
+ " C0-1:S+ C1:P2 . C2-3 P1 . . . 0 A1:0,A2:1 A1:P1,A2:P2"
+
+ # A non-exclusive cpuset.cpus change will invalidate partition and its siblings
+ " C0-1:P1 . . C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P0"
+ " C0-1:P1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P-1,B1:P-1"
+ " C0-1 . . P1:C2-3 C0-2 . . . 0 A1:0-2,B1:2-3 A1:P0,B1:P-1"
+
+ # old-A1 old-A2 old-A3 old-B1 new-A1 new-A2 new-A3 new-B1 fail ECPUs Pstate ISOLCPUS
+ # ------ ------ ------ ------ ------ ------ ------ ------ ---- ----- ------ --------
+ # Failure cases:
+
+ # A task cannot be added to a partition with no cpu
+ "C2-3:P1:S+ C3:P1 . . O2=0:T . . . 1 A1:,A2:3 A1:P1,A2:P1"
+
+ # Changes to cpuset.cpus.exclusive that violate exclusivity rule is rejected
+ " C0-3 . . C4-5 X0-3 . . X3-5 1 A1:0-3,B1:4-5"
+)
+
+#
+# Write to the cpu online file
+# $1 - <c>=<v> where <c> = cpu number, <v> value to be written
+#
+write_cpu_online()
+{
+ CPU=${1%=*}
+ VAL=${1#*=}
+ CPUFILE=//sys/devices/system/cpu/cpu${CPU}/online
+ if [[ $VAL -eq 0 ]]
+ then
+ OFFLINE_CPUS="$OFFLINE_CPUS $CPU"
+ else
+ [[ -n "$OFFLINE_CPUS" ]] && {
+ OFFLINE_CPUS=$(echo $CPU $CPU $OFFLINE_CPUS | fmt -1 |\
+ sort | uniq -u)
+ }
+ fi
+ echo $VAL > $CPUFILE
+ pause 0.05
+}
+
+#
+# Set controller state
+# $1 - cgroup directory
+# $2 - state
+# $3 - showerr
+#
+# The presence of ":" in state means transition from one to the next.
+#
+set_ctrl_state()
+{
+ TMPMSG=/tmp/.msg_$$
+ CGRP=$1
+ STATE=$2
+ SHOWERR=${3}
+ CTRL=${CTRL:=$CONTROLLER}
+ HASERR=0
+ REDIRECT="2> $TMPMSG"
+ [[ -z "$STATE" || "$STATE" = '.' ]] && return 0
+ [[ $VERBOSE -gt 0 ]] && SHOWERR=1
+
+ rm -f $TMPMSG
+ for CMD in $(echo $STATE | sed -e "s/:/ /g")
+ do
+ TFILE=$CGRP/cgroup.procs
+ SFILE=$CGRP/cgroup.subtree_control
+ PFILE=$CGRP/cpuset.cpus.partition
+ CFILE=$CGRP/cpuset.cpus
+ XFILE=$CGRP/cpuset.cpus.exclusive
+ S=$(expr substr $CMD 1 1)
+ if [[ $S = S ]]
+ then
+ PREFIX=${CMD#?}
+ COMM="echo ${PREFIX}${CTRL} > $SFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = X ]]
+ then
+ CPUS=${CMD#?}
+ COMM="echo $CPUS > $XFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = C ]]
+ then
+ CPUS=${CMD#?}
+ COMM="echo $CPUS > $CFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = P ]]
+ then
+ VAL=${CMD#?}
+ case $VAL in
+ 0) VAL=member
+ ;;
+ 1) VAL=root
+ ;;
+ 2) VAL=isolated
+ ;;
+ *)
+ echo "Invalid partition state - $VAL"
+ exit 1
+ ;;
+ esac
+ COMM="echo $VAL > $PFILE"
+ eval $COMM $REDIRECT
+ elif [[ $S = O ]]
+ then
+ VAL=${CMD#?}
+ write_cpu_online $VAL
+ elif [[ $S = T ]]
+ then
+ COMM="echo 0 > $TFILE"
+ eval $COMM $REDIRECT
+ fi
+ RET=$?
+ [[ $RET -ne 0 ]] && {
+ [[ -n "$SHOWERR" ]] && {
+ echo "$COMM"
+ cat $TMPMSG
+ }
+ HASERR=1
+ }
+ pause 0.01
+ rm -f $TMPMSG
+ done
+ return $HASERR
+}
+
+set_ctrl_state_noerr()
+{
+ CGRP=$1
+ STATE=$2
+ [[ -d $CGRP ]] || mkdir $CGRP
+ set_ctrl_state $CGRP $STATE 1
+ [[ $? -ne 0 ]] && {
+ echo "ERROR: Failed to set $2 to cgroup $1!"
+ exit 1
+ }
+}
+
+online_cpus()
+{
+ [[ -n "OFFLINE_CPUS" ]] && {
+ for C in $OFFLINE_CPUS
+ do
+ write_cpu_online ${C}=1
+ done
+ }
+}
+
+#
+# Return 1 if the list of effective cpus isn't the same as the initial list.
+#
+reset_cgroup_states()
+{
+ echo 0 > $CGROUP2/cgroup.procs
+ online_cpus
+ rmdir A1/A2/A3 A1/A2 A1 B1 > /dev/null 2>&1
+ pause 0.02
+ set_ctrl_state . R-
+ pause 0.01
+}
+
+dump_states()
+{
+ for DIR in . A1 A1/A2 A1/A2/A3 B1
+ do
+ CPUS=$DIR/cpuset.cpus
+ ECPUS=$DIR/cpuset.cpus.effective
+ XCPUS=$DIR/cpuset.cpus.exclusive
+ XECPUS=$DIR/cpuset.cpus.exclusive.effective
+ PRS=$DIR/cpuset.cpus.partition
+ PCPUS=$DIR/.__DEBUG__.cpuset.cpus.subpartitions
+ ISCPUS=$DIR/cpuset.cpus.isolated
+ [[ -e $CPUS ]] && echo "$CPUS: $(cat $CPUS)"
+ [[ -e $XCPUS ]] && echo "$XCPUS: $(cat $XCPUS)"
+ [[ -e $ECPUS ]] && echo "$ECPUS: $(cat $ECPUS)"
+ [[ -e $XECPUS ]] && echo "$XECPUS: $(cat $XECPUS)"
+ [[ -e $PRS ]] && echo "$PRS: $(cat $PRS)"
+ [[ -e $PCPUS ]] && echo "$PCPUS: $(cat $PCPUS)"
+ [[ -e $ISCPUS ]] && echo "$ISCPUS: $(cat $ISCPUS)"
+ done
+}
+
+#
+# Check effective cpus
+# $1 - check string, format: <cgroup>:<cpu-list>[,<cgroup>:<cpu-list>]*
+#
+check_effective_cpus()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ CPUS=$2
+ if [[ $CGRP = X* ]]
+ then
+ CGRP=${CGRP#X}
+ FILE=cpuset.cpus.exclusive.effective
+ else
+ FILE=cpuset.cpus.effective
+ fi
+ [[ $CGRP = A2 ]] && CGRP=A1/A2
+ [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
+ [[ -e $CGRP/$FILE ]] || return 1
+ [[ $CPUS = $(cat $CGRP/$FILE) ]] || return 1
+ done
+}
+
+#
+# Check cgroup states
+# $1 - check string, format: <cgroup>:<state>[,<cgroup>:<state>]*
+#
+check_cgroup_states()
+{
+ CHK_STR=$1
+ for CHK in $(echo $CHK_STR | sed -e "s/,/ /g")
+ do
+ set -- $(echo $CHK | sed -e "s/:/ /g")
+ CGRP=$1
+ STATE=$2
+ FILE=
+ EVAL=$(expr substr $STATE 2 2)
+ [[ $CGRP = A2 ]] && CGRP=A1/A2
+ [[ $CGRP = A3 ]] && CGRP=A1/A2/A3
+
+ case $STATE in
+ P*) FILE=$CGRP/cpuset.cpus.partition
+ ;;
+ *) echo "Unknown state: $STATE!"
+ exit 1
+ ;;
+ esac
+ VAL=$(cat $FILE)
+
+ case "$VAL" in
+ member) VAL=0
+ ;;
+ root) VAL=1
+ ;;
+ isolated)
+ VAL=2
+ ;;
+ "root invalid"*)
+ VAL=-1
+ ;;
+ "isolated invalid"*)
+ VAL=-2
+ ;;
+ esac
+ [[ $EVAL != $VAL ]] && return 1
+ done
+ return 0
+}
+
+#
+# Get isolated (including offline) CPUs by looking at
+# /sys/kernel/debug/sched/domains and cpuset.cpus.isolated control file,
+# if available, and compare that with the expected value.
+#
+# Note that isolated CPUs from the sched/domains context include offline
+# CPUs as well as CPUs in non-isolated 1-CPU partition. Those CPUs may
+# not be included in the cpuset.cpus.isolated control file which contains
+# only CPUs in isolated partitions.
+#
+# $1 - expected isolated cpu list(s) <isolcpus1>{,<isolcpus2>}
+# <isolcpus1> - expected sched/domains value
+# <isolcpus2> - cpuset.cpus.isolated value = <isolcpus1> if not defined
+#
+check_isolcpus()
+{
+ EXPECT_VAL=$1
+ ISOLCPUS=
+ LASTISOLCPU=
+ SCHED_DOMAINS=/sys/kernel/debug/sched/domains
+ ISCPUS=${CGROUP2}/cpuset.cpus.isolated
+ if [[ $EXPECT_VAL = . ]]
+ then
+ EXPECT_VAL=
+ EXPECT_VAL2=
+ elif [[ $(expr $EXPECT_VAL : ".*,.*") > 0 ]]
+ then
+ set -- $(echo $EXPECT_VAL | sed -e "s/,/ /g")
+ EXPECT_VAL=$1
+ EXPECT_VAL2=$2
+ else
+ EXPECT_VAL2=$EXPECT_VAL
+ fi
+
+ #
+ # Check the debug isolated cpumask, if present
+ #
+ [[ -f $ISCPUS ]] && {
+ ISOLCPUS=$(cat $ISCPUS)
+ [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
+ # Take a 50ms pause and try again
+ pause 0.05
+ ISOLCPUS=$(cat $ISCPUS)
+ }
+ [[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && return 1
+ ISOLCPUS=
+ }
+
+ #
+ # Use the sched domain in debugfs to check isolated CPUs, if available
+ #
+ [[ -d $SCHED_DOMAINS ]] || return 0
+
+ for ((CPU=0; CPU < $NR_CPUS; CPU++))
+ do
+ [[ -n "$(ls ${SCHED_DOMAINS}/cpu$CPU)" ]] && continue
+
+ if [[ -z "$LASTISOLCPU" ]]
+ then
+ ISOLCPUS=$CPU
+ LASTISOLCPU=$CPU
+ elif [[ "$LASTISOLCPU" -eq $((CPU - 1)) ]]
+ then
+ echo $ISOLCPUS | grep -q "\<$LASTISOLCPU\$"
+ if [[ $? -eq 0 ]]
+ then
+ ISOLCPUS=${ISOLCPUS}-
+ fi
+ LASTISOLCPU=$CPU
+ else
+ if [[ $ISOLCPUS = *- ]]
+ then
+ ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ fi
+ ISOLCPUS=${ISOLCPUS},$CPU
+ LASTISOLCPU=$CPU
+ fi
+ done
+ [[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
+ [[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
+}
+
+test_fail()
+{
+ TESTNUM=$1
+ TESTTYPE=$2
+ ADDINFO=$3
+ echo "Test $TEST[$TESTNUM] failed $TESTTYPE check!"
+ [[ -n "$ADDINFO" ]] && echo "*** $ADDINFO ***"
+ eval echo \${$TEST[$I]}
+ echo
+ dump_states
+ exit 1
+}
+
+#
+# Check to see if there are unexpected isolated CPUs left
+#
+null_isolcpus_check()
+{
+ [[ $VERBOSE -gt 0 ]] || return 0
+ # Retry a few times before printing error
+ RETRY=0
+ while [[ $RETRY -lt 5 ]]
+ do
+ pause 0.01
+ check_isolcpus "."
+ [[ $? -eq 0 ]] && return 0
+ ((RETRY++))
+ done
+ echo "Unexpected isolated CPUs: $ISOLCPUS"
+ dump_states
+ exit 1
+}
+
+#
+# Run cpuset state transition test
+# $1 - test matrix name
+#
+# This test is somewhat fragile as delays (sleep x) are added in various
+# places to make sure state changes are fully propagated before the next
+# action. These delays may need to be adjusted if running in a slower machine.
+#
+run_state_test()
+{
+ TEST=$1
+ CONTROLLER=cpuset
+ I=0
+ eval CNT="\${#$TEST[@]}"
+
+ reset_cgroup_states
+ console_msg "Running state transition test ..."
+
+ while [[ $I -lt $CNT ]]
+ do
+ echo "Running test $I ..." > /dev/console
+ [[ $VERBOSE -gt 1 ]] && {
+ echo ""
+ eval echo \${$TEST[$I]}
+ }
+ eval set -- "\${$TEST[$I]}"
+ OLD_A1=$1
+ OLD_A2=$2
+ OLD_A3=$3
+ OLD_B1=$4
+ NEW_A1=$5
+ NEW_A2=$6
+ NEW_A3=$7
+ NEW_B1=$8
+ RESULT=$9
+ ECPUS=${10}
+ STATES=${11}
+ ICPUS=${12}
+
+ set_ctrl_state_noerr B1 $OLD_B1
+ set_ctrl_state_noerr A1 $OLD_A1
+ set_ctrl_state_noerr A1/A2 $OLD_A2
+ set_ctrl_state_noerr A1/A2/A3 $OLD_A3
+ RETVAL=0
+ set_ctrl_state A1 $NEW_A1; ((RETVAL += $?))
+ set_ctrl_state A1/A2 $NEW_A2; ((RETVAL += $?))
+ set_ctrl_state A1/A2/A3 $NEW_A3; ((RETVAL += $?))
+ set_ctrl_state B1 $NEW_B1; ((RETVAL += $?))
+
+ [[ $RETVAL -ne $RESULT ]] && test_fail $I result
+
+ [[ -n "$ECPUS" && "$ECPUS" != . ]] && {
+ check_effective_cpus $ECPUS
+ [[ $? -ne 0 ]] && test_fail $I "effective CPU"
+ }
+
+ [[ -n "$STATES" && "$STATES" != . ]] && {
+ check_cgroup_states $STATES
+ [[ $? -ne 0 ]] && test_fail $I states
+ }
+
+ # Compare the expected isolated CPUs with the actual ones,
+ # if available
+ [[ -n "$ICPUS" ]] && {
+ check_isolcpus $ICPUS
+ [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
+ "Expect $ICPUS, get $ISOLCPUS instead"
+ }
+ reset_cgroup_states
+ #
+ # Check to see if effective cpu list changes
+ #
+ NEWLIST=$(cat cpuset.cpus.effective)
+ RETRY=0
+ while [[ $NEWLIST != $CPULIST && $RETRY -lt 8 ]]
+ do
+ # Wait a bit longer & recheck a few times
+ pause 0.01
+ ((RETRY++))
+ NEWLIST=$(cat cpuset.cpus.effective)
+ done
+ [[ $NEWLIST != $CPULIST ]] && {
+ echo "Effective cpus changed to $NEWLIST after test $I!"
+ exit 1
+ }
+ null_isolcpus_check
+ [[ $VERBOSE -gt 0 ]] && echo "Test $I done."
+ ((I++))
+ done
+ echo "All $I tests of $TEST PASSED."
+}
+
+#
+# Testing the new "isolated" partition root type
+#
+test_isolated()
+{
+ cd $CGROUP2/test
+ echo 2-3 > cpuset.cpus
+ TYPE=$(cat cpuset.cpus.partition)
+ [[ $TYPE = member ]] || echo member > cpuset.cpus.partition
+
+ console_msg "Change from member to root"
+ test_partition root
+
+ console_msg "Change from root to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to member"
+ test_partition member
+
+ console_msg "Change from member to isolated"
+ test_partition isolated
+
+ console_msg "Change from isolated to root"
+ test_partition root
+
+ console_msg "Change from root to member"
+ test_partition member
+
+ #
+ # Testing partition root with no cpu
+ #
+ console_msg "Distribute all cpus to child partition"
+ echo +cpuset > cgroup.subtree_control
+ test_partition root
+
+ mkdir A1
+ cd A1
+ echo 2-3 > cpuset.cpus
+ test_partition root
+ test_effective_cpus 2-3
+ cd ..
+ test_effective_cpus ""
+
+ console_msg "Moving task to partition test"
+ test_add_proc "No space left"
+ cd A1
+ test_add_proc ""
+ cd ..
+
+ console_msg "Shrink and expand child partition"
+ cd A1
+ echo 2 > cpuset.cpus
+ cd ..
+ test_effective_cpus 3
+ cd A1
+ echo 2-3 > cpuset.cpus
+ cd ..
+ test_effective_cpus ""
+
+ # Cleaning up
+ console_msg "Cleaning up"
+ echo $$ > $CGROUP2/cgroup.procs
+ [[ -d A1 ]] && rmdir A1
+ null_isolcpus_check
+}
+
+#
+# Wait for inotify event for the given file and read it
+# $1: cgroup file to wait for
+# $2: file to store the read result
+#
+wait_inotify()
+{
+ CGROUP_FILE=$1
+ OUTPUT_FILE=$2
+
+ $WAIT_INOTIFY $CGROUP_FILE
+ cat $CGROUP_FILE > $OUTPUT_FILE
+}
+
+#
+# Test if inotify events are properly generated when going into and out of
+# invalid partition state.
+#
+test_inotify()
+{
+ ERR=0
+ PRS=/tmp/.prs_$$
+ cd $CGROUP2/test
+ [[ -f $WAIT_INOTIFY ]] || {
+ echo "wait_inotify not found, inotify test SKIPPED."
+ return
+ }
+
+ pause 0.01
+ echo 1 > cpuset.cpus
+ echo 0 > cgroup.procs
+ echo root > cpuset.cpus.partition
+ pause 0.01
+ rm -f $PRS
+ wait_inotify $PWD/cpuset.cpus.partition $PRS &
+ pause 0.01
+ set_ctrl_state . "O1=0"
+ pause 0.01
+ check_cgroup_states ".:P-1"
+ if [[ $? -ne 0 ]]
+ then
+ echo "FAILED: Inotify test - partition not invalid"
+ ERR=1
+ elif [[ ! -f $PRS ]]
+ then
+ echo "FAILED: Inotify test - event not generated"
+ ERR=1
+ kill %1
+ elif [[ $(cat $PRS) != "root invalid"* ]]
+ then
+ echo "FAILED: Inotify test - incorrect state"
+ cat $PRS
+ ERR=1
+ fi
+ online_cpus
+ echo member > cpuset.cpus.partition
+ echo 0 > ../cgroup.procs
+ if [[ $ERR -ne 0 ]]
+ then
+ exit 1
+ else
+ echo "Inotify test PASSED"
+ fi
+}
+
+trap cleanup 0 2 3 6
+run_state_test TEST_MATRIX
+test_isolated
+test_inotify
+echo "All tests PASSED."
diff --git a/tools/testing/selftests/cgroup/test_freezer.c b/tools/testing/selftests/cgroup/test_freezer.c
index 23d8fa4a3e4e..8845353aca53 100644
--- a/tools/testing/selftests/cgroup/test_freezer.c
+++ b/tools/testing/selftests/cgroup/test_freezer.c
@@ -7,9 +7,7 @@
#include <unistd.h>
#include <stdio.h>
#include <errno.h>
-#include <poll.h>
#include <stdlib.h>
-#include <sys/inotify.h>
#include <string.h>
#include <sys/wait.h>
@@ -55,61 +53,6 @@ static int cg_freeze_nowait(const char *cgroup, bool freeze)
}
/*
- * Prepare for waiting on cgroup.events file.
- */
-static int cg_prepare_for_wait(const char *cgroup)
-{
- int fd, ret = -1;
-
- fd = inotify_init1(0);
- if (fd == -1) {
- debug("Error: inotify_init1() failed\n");
- return fd;
- }
-
- ret = inotify_add_watch(fd, cg_control(cgroup, "cgroup.events"),
- IN_MODIFY);
- if (ret == -1) {
- debug("Error: inotify_add_watch() failed\n");
- close(fd);
- fd = -1;
- }
-
- return fd;
-}
-
-/*
- * Wait for an event. If there are no events for 10 seconds,
- * treat this an error.
- */
-static int cg_wait_for(int fd)
-{
- int ret = -1;
- struct pollfd fds = {
- .fd = fd,
- .events = POLLIN,
- };
-
- while (true) {
- ret = poll(&fds, 1, 10000);
-
- if (ret == -1) {
- if (errno == EINTR)
- continue;
- debug("Error: poll() failed\n");
- break;
- }
-
- if (ret > 0 && fds.revents & POLLIN) {
- ret = 0;
- break;
- }
- }
-
- return ret;
-}
-
-/*
* Attach a task to the given cgroup and wait for a cgroup frozen event.
* All transient events (e.g. populated) are ignored.
*/
@@ -797,7 +740,7 @@ static int test_cgfreezer_ptraced(const char *root)
/*
* cg_check_frozen(cgroup, true) will fail here,
- * because the task in in the TRACEd state.
+ * because the task is in the TRACEd state.
*/
if (cg_freeze_wait(cgroup, false))
goto cleanup;
diff --git a/tools/testing/selftests/cgroup/test_hugetlb_memcg.c b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
new file mode 100644
index 000000000000..f0fefeb4cc24
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_hugetlb_memcg.c
@@ -0,0 +1,234 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <sys/mman.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+#define ADDR ((void *)(0x0UL))
+#define FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+/* mapping 8 MBs == 4 hugepages */
+#define LENGTH (8UL*1024*1024)
+#define PROTECTION (PROT_READ | PROT_WRITE)
+
+/* borrowed from mm/hmm-tests.c */
+static long get_hugepage_size(void)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q, *path = "/proc/meminfo", *tag = "Hugepagesize:";
+ long val;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
+static int set_file(const char *path, long value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen(path, "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int set_nr_hugepages(long value)
+{
+ return set_file("/proc/sys/vm/nr_hugepages", value);
+}
+
+static unsigned int check_first(char *addr)
+{
+ return *(unsigned int *)addr;
+}
+
+static void write_data(char *addr)
+{
+ unsigned long i;
+
+ for (i = 0; i < LENGTH; i++)
+ *(addr + i) = (char)i;
+}
+
+static int hugetlb_test_program(const char *cgroup, void *arg)
+{
+ char *test_group = (char *)arg;
+ void *addr;
+ long old_current, expected_current, current;
+ int ret = EXIT_FAILURE;
+
+ old_current = cg_read_long(test_group, "memory.current");
+ set_nr_hugepages(20);
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg(
+ "setting nr_hugepages should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ return EXIT_FAILURE;
+ }
+
+ addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, 0, 0);
+ if (addr == MAP_FAILED) {
+ ksft_print_msg("fail to mmap.\n");
+ return EXIT_FAILURE;
+ }
+ current = cg_read_long(test_group, "memory.current");
+ if (current - old_current >= MB(2)) {
+ ksft_print_msg("mmap should not increase hugepage usage.\n");
+ ksft_print_msg("before: %ld, after: %ld\n", old_current, current);
+ goto out_failed_munmap;
+ }
+ old_current = current;
+
+ /* read the first page */
+ check_first(addr);
+ expected_current = old_current + MB(2);
+ current = cg_read_long(test_group, "memory.current");
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 2MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* write to the whole range */
+ write_data(addr);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current + MB(8);
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should increase by around 8MB.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ goto out_failed_munmap;
+ }
+
+ /* unmap the whole range */
+ munmap(addr, LENGTH);
+ current = cg_read_long(test_group, "memory.current");
+ expected_current = old_current;
+ if (!values_close(expected_current, current, 5)) {
+ ksft_print_msg("memory usage should go back down.\n");
+ ksft_print_msg(
+ "expected memory: %ld, actual memory: %ld\n",
+ expected_current, current);
+ return ret;
+ }
+
+ ret = EXIT_SUCCESS;
+ return ret;
+
+out_failed_munmap:
+ munmap(addr, LENGTH);
+ return ret;
+}
+
+static int test_hugetlb_memcg(char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "hugetlb_memcg_test");
+ if (!test_group || cg_create(test_group)) {
+ ksft_print_msg("fail to create cgroup.\n");
+ goto out;
+ }
+
+ if (cg_write(test_group, "memory.max", "100M")) {
+ ksft_print_msg("fail to set cgroup memory limit.\n");
+ goto out;
+ }
+
+ /* disable swap */
+ if (cg_write(test_group, "memory.swap.max", "0")) {
+ ksft_print_msg("fail to disable swap.\n");
+ goto out;
+ }
+
+ if (!cg_run(test_group, hugetlb_test_program, (void *)test_group))
+ ret = KSFT_PASS;
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int ret = EXIT_SUCCESS, has_memory_hugetlb_acc;
+
+ has_memory_hugetlb_acc = proc_mount_contains("memory_hugetlb_accounting");
+ if (has_memory_hugetlb_acc < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ else if (!has_memory_hugetlb_acc)
+ ksft_exit_skip("memory hugetlb accounting is disabled\n");
+
+ /* Unit is kB! */
+ if (get_hugepage_size() != 2048) {
+ ksft_print_msg("test_hugetlb_memcg requires 2MB hugepages\n");
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ return ret;
+ }
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ switch (test_hugetlb_memcg(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("test_hugetlb_memcg\n");
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("test_hugetlb_memcg\n");
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("test_hugetlb_memcg\n");
+ break;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_kill.c b/tools/testing/selftests/cgroup/test_kill.c
new file mode 100644
index 000000000000..6153690319c9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kill.c
@@ -0,0 +1,297 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include <errno.h>
+#include <linux/limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+#include "../pidfd/pidfd.h"
+#include "cgroup_util.h"
+
+/*
+ * Kill the given cgroup and wait for the inotify signal.
+ * If there are no events in 10 seconds, treat this as an error.
+ * Then check that the cgroup is in the desired state.
+ */
+static int cg_kill_wait(const char *cgroup)
+{
+ int fd, ret = -1;
+
+ fd = cg_prepare_for_wait(cgroup);
+ if (fd < 0)
+ return fd;
+
+ ret = cg_write(cgroup, "cgroup.kill", "1");
+ if (ret)
+ goto out;
+
+ ret = cg_wait_for(fd);
+ if (ret)
+ goto out;
+
+out:
+ close(fd);
+ return ret;
+}
+
+/*
+ * A simple process running in a sleep loop until being
+ * re-parented.
+ */
+static int child_fn(const char *cgroup, void *arg)
+{
+ int ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+static int test_cgkill_simple(const char *root)
+{
+ pid_t pids[100];
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ int i;
+
+ cgroup = cg_name(root, "cg_test_simple");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ for (i = 0; i < 100; i++)
+ pids[i] = cg_run_nowait(cgroup, child_fn, NULL);
+
+ if (cg_wait_for_proc_count(cgroup, 100))
+ goto cleanup;
+
+ if (cg_read_strcmp(cgroup, "cgroup.events", "populated 1\n"))
+ goto cleanup;
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 100; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+/*
+ * The test creates the following hierarchy:
+ * A
+ * / / \ \
+ * B E I K
+ * /\ |
+ * C D F
+ * |
+ * G
+ * |
+ * H
+ *
+ * with a process in C, H and 3 processes in K.
+ * Then it tries to kill the whole tree.
+ */
+static int test_cgkill_tree(const char *root)
+{
+ pid_t pids[5];
+ char *cgroup[10] = {0};
+ int ret = KSFT_FAIL;
+ int i;
+
+ cgroup[0] = cg_name(root, "cg_test_tree_A");
+ if (!cgroup[0])
+ goto cleanup;
+
+ cgroup[1] = cg_name(cgroup[0], "B");
+ if (!cgroup[1])
+ goto cleanup;
+
+ cgroup[2] = cg_name(cgroup[1], "C");
+ if (!cgroup[2])
+ goto cleanup;
+
+ cgroup[3] = cg_name(cgroup[1], "D");
+ if (!cgroup[3])
+ goto cleanup;
+
+ cgroup[4] = cg_name(cgroup[0], "E");
+ if (!cgroup[4])
+ goto cleanup;
+
+ cgroup[5] = cg_name(cgroup[4], "F");
+ if (!cgroup[5])
+ goto cleanup;
+
+ cgroup[6] = cg_name(cgroup[5], "G");
+ if (!cgroup[6])
+ goto cleanup;
+
+ cgroup[7] = cg_name(cgroup[6], "H");
+ if (!cgroup[7])
+ goto cleanup;
+
+ cgroup[8] = cg_name(cgroup[0], "I");
+ if (!cgroup[8])
+ goto cleanup;
+
+ cgroup[9] = cg_name(cgroup[0], "K");
+ if (!cgroup[9])
+ goto cleanup;
+
+ for (i = 0; i < 10; i++)
+ if (cg_create(cgroup[i]))
+ goto cleanup;
+
+ pids[0] = cg_run_nowait(cgroup[2], child_fn, NULL);
+ pids[1] = cg_run_nowait(cgroup[7], child_fn, NULL);
+ pids[2] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[3] = cg_run_nowait(cgroup[9], child_fn, NULL);
+ pids[4] = cg_run_nowait(cgroup[9], child_fn, NULL);
+
+ /*
+ * Wait until all child processes will enter
+ * corresponding cgroups.
+ */
+
+ if (cg_wait_for_proc_count(cgroup[2], 1) ||
+ cg_wait_for_proc_count(cgroup[7], 1) ||
+ cg_wait_for_proc_count(cgroup[9], 3))
+ goto cleanup;
+
+ /*
+ * Kill A and check that we get an empty notification.
+ */
+ if (cg_kill_wait(cgroup[0]))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ for (i = 0; i < 5; i++)
+ wait_for_pid(pids[i]);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup[0], "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ for (i = 9; i >= 0 && cgroup[i]; i--) {
+ cg_destroy(cgroup[i]);
+ free(cgroup[i]);
+ }
+
+ return ret;
+}
+
+static int forkbomb_fn(const char *cgroup, void *arg)
+{
+ int ppid;
+
+ fork();
+ fork();
+
+ ppid = getppid();
+
+ while (getppid() == ppid)
+ usleep(1000);
+
+ return getppid() == ppid;
+}
+
+/*
+ * The test runs a fork bomb in a cgroup and tries to kill it.
+ */
+static int test_cgkill_forkbomb(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cgroup = NULL;
+ pid_t pid = -ESRCH;
+
+ cgroup = cg_name(root, "cg_forkbomb_test");
+ if (!cgroup)
+ goto cleanup;
+
+ if (cg_create(cgroup))
+ goto cleanup;
+
+ pid = cg_run_nowait(cgroup, forkbomb_fn, NULL);
+ if (pid < 0)
+ goto cleanup;
+
+ usleep(100000);
+
+ if (cg_kill_wait(cgroup))
+ goto cleanup;
+
+ if (cg_wait_for_proc_count(cgroup, 0))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (pid > 0)
+ wait_for_pid(pid);
+
+ if (ret == KSFT_PASS &&
+ cg_read_strcmp(cgroup, "cgroup.events", "populated 0\n"))
+ ret = KSFT_FAIL;
+
+ if (cgroup)
+ cg_destroy(cgroup);
+ free(cgroup);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct cgkill_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_cgkill_simple),
+ T(test_cgkill_tree),
+ T(test_cgkill_forkbomb),
+};
+#undef T
+
+int main(int argc, char *argv[])
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_kmem.c b/tools/testing/selftests/cgroup/test_kmem.c
new file mode 100644
index 000000000000..c82f974b85c9
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_kmem.c
@@ -0,0 +1,453 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <sys/wait.h>
+#include <errno.h>
+#include <sys/sysinfo.h>
+#include <pthread.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+
+/*
+ * Memory cgroup charging is performed using percpu batches 64 pages
+ * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
+ * the maximum discrepancy between charge and vmstat entries is number
+ * of cpus multiplied by 64 pages.
+ */
+#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
+
+
+static int alloc_dcache(const char *cgroup, void *arg)
+{
+ unsigned long i;
+ struct stat st;
+ char buf[128];
+
+ for (i = 0; i < (unsigned long)arg; i++) {
+ snprintf(buf, sizeof(buf),
+ "/something-non-existent-with-a-long-name-%64lu-%d",
+ i, getpid());
+ stat(buf, &st);
+ }
+
+ return 0;
+}
+
+/*
+ * This test allocates 100000 of negative dentries with long names.
+ * Then it checks that "slab" in memory.stat is larger than 1M.
+ * Then it sets memory.high to 1M and checks that at least 1/2
+ * of slab memory has been reclaimed.
+ */
+static int test_kmem_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+ long slab0, slab1, current;
+
+ cg = cg_name(root, "kmem_basic_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, alloc_dcache, (void *)100000))
+ goto cleanup;
+
+ slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab0 < (1 << 20))
+ goto cleanup;
+
+ cg_write(cg, "memory.high", "1M");
+
+ /* wait for RCU freeing */
+ sleep(1);
+
+ slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
+ if (slab1 < 0)
+ goto cleanup;
+
+ current = cg_read_long(cg, "memory.current");
+ if (current < 0)
+ goto cleanup;
+
+ if (slab1 < slab0 / 2 && current < slab0 / 2)
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+static void *alloc_kmem_fn(void *arg)
+{
+ alloc_dcache(NULL, (void *)100);
+ return NULL;
+}
+
+static int alloc_kmem_smp(const char *cgroup, void *arg)
+{
+ int nr_threads = 2 * get_nprocs();
+ pthread_t *tinfo;
+ unsigned long i;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
+ (void *)i)) {
+ free(tinfo);
+ return -1;
+ }
+ }
+
+ for (i = 0; i < nr_threads; i++) {
+ ret = pthread_join(tinfo[i], NULL);
+ if (ret)
+ break;
+ }
+
+ free(tinfo);
+ return ret;
+}
+
+static int cg_run_in_subcgroups(const char *parent,
+ int (*fn)(const char *cgroup, void *arg),
+ void *arg, int times)
+{
+ char *child;
+ int i;
+
+ for (i = 0; i < times; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ if (cg_run(child, fn, NULL)) {
+ cg_destroy(child);
+ free(child);
+ return -1;
+ }
+
+ cg_destroy(child);
+ free(child);
+ }
+
+ return 0;
+}
+
+/*
+ * The test creates and destroys a large number of cgroups. In each cgroup it
+ * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
+ * threads. Then it checks the sanity of numbers on the parent level:
+ * the total size of the cgroups should be roughly equal to
+ * anon + file + kernel + sock.
+ */
+static int test_kmem_memcg_deletion(const char *root)
+{
+ long current, anon, file, kernel, sock, sum;
+ int ret = KSFT_FAIL;
+ char *parent;
+
+ parent = cg_name(root, "kmem_memcg_deletion_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
+ goto cleanup;
+
+ current = cg_read_long(parent, "memory.current");
+ anon = cg_read_key_long(parent, "memory.stat", "anon ");
+ file = cg_read_key_long(parent, "memory.stat", "file ");
+ kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
+ sock = cg_read_key_long(parent, "memory.stat", "sock ");
+ if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
+ goto cleanup;
+
+ sum = anon + file + kernel + sock;
+ if (abs(sum - current) < MAX_VMSTAT_ERROR) {
+ ret = KSFT_PASS;
+ } else {
+ printf("memory.current = %ld\n", current);
+ printf("anon + file + kernel + sock = %ld\n", sum);
+ printf("anon = %ld\n", anon);
+ printf("file = %ld\n", file);
+ printf("kernel = %ld\n", kernel);
+ printf("sock = %ld\n", sock);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * The test reads the entire /proc/kpagecgroup. If the operation went
+ * successfully (and the kernel didn't panic), the test is treated as passed.
+ */
+static int test_kmem_proc_kpagecgroup(const char *root)
+{
+ unsigned long buf[128];
+ int ret = KSFT_FAIL;
+ ssize_t len;
+ int fd;
+
+ fd = open("/proc/kpagecgroup", O_RDONLY);
+ if (fd < 0)
+ return ret;
+
+ do {
+ len = read(fd, buf, sizeof(buf));
+ } while (len > 0);
+
+ if (len == 0)
+ ret = KSFT_PASS;
+
+ close(fd);
+ return ret;
+}
+
+static void *pthread_wait_fn(void *arg)
+{
+ sleep(100);
+ return NULL;
+}
+
+static int spawn_1000_threads(const char *cgroup, void *arg)
+{
+ int nr_threads = 1000;
+ pthread_t *tinfo;
+ unsigned long i;
+ long stack;
+ int ret = -1;
+
+ tinfo = calloc(nr_threads, sizeof(pthread_t));
+ if (tinfo == NULL)
+ return -1;
+
+ for (i = 0; i < nr_threads; i++) {
+ if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
+ (void *)i)) {
+ free(tinfo);
+ return(-1);
+ }
+ }
+
+ stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
+ if (stack >= 4096 * 1000)
+ ret = 0;
+
+ free(tinfo);
+ return ret;
+}
+
+/*
+ * The test spawns a process, which spawns 1000 threads. Then it checks
+ * that memory.stat's kernel_stack is at least 1000 pages large.
+ */
+static int test_kmem_kernel_stacks(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *cg = NULL;
+
+ cg = cg_name(root, "kmem_kernel_stacks_test");
+ if (!cg)
+ goto cleanup;
+
+ if (cg_create(cg))
+ goto cleanup;
+
+ if (cg_run(cg, spawn_1000_threads, NULL))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(cg);
+ free(cg);
+
+ return ret;
+}
+
+/*
+ * This test sequentionally creates 30 child cgroups, allocates some
+ * kernel memory in each of them, and deletes them. Then it checks
+ * that the number of dying cgroups on the parent level is 0.
+ */
+static int test_kmem_dead_cgroups(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent;
+ long dead;
+ int i;
+
+ parent = cg_name(root, "kmem_dead_cgroups_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
+ goto cleanup;
+
+ for (i = 0; i < 5; i++) {
+ dead = cg_read_key_long(parent, "cgroup.stat",
+ "nr_dying_descendants ");
+ if (dead == 0) {
+ ret = KSFT_PASS;
+ break;
+ }
+ /*
+ * Reclaiming cgroups might take some time,
+ * let's wait a bit and repeat.
+ */
+ sleep(1);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+/*
+ * This test creates a sub-tree with 1000 memory cgroups.
+ * Then it checks that the memory.current on the parent level
+ * is greater than 0 and approximates matches the percpu value
+ * from memory.stat.
+ */
+static int test_percpu_basic(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *parent, *child;
+ long current, percpu;
+ int i;
+
+ parent = cg_name(root, "percpu_basic_test");
+ if (!parent)
+ goto cleanup;
+
+ if (cg_create(parent))
+ goto cleanup;
+
+ if (cg_write(parent, "cgroup.subtree_control", "+memory"))
+ goto cleanup;
+
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ if (!child)
+ return -1;
+
+ if (cg_create(child))
+ goto cleanup_children;
+
+ free(child);
+ }
+
+ current = cg_read_long(parent, "memory.current");
+ percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
+
+ if (current > 0 && percpu > 0 && abs(current - percpu) <
+ MAX_VMSTAT_ERROR)
+ ret = KSFT_PASS;
+ else
+ printf("memory.current %ld\npercpu %ld\n",
+ current, percpu);
+
+cleanup_children:
+ for (i = 0; i < 1000; i++) {
+ child = cg_name_indexed(parent, "child", i);
+ cg_destroy(child);
+ free(child);
+ }
+
+cleanup:
+ cg_destroy(parent);
+ free(parent);
+
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct kmem_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_kmem_basic),
+ T(test_kmem_memcg_deletion),
+ T(test_kmem_proc_kpagecgroup),
+ T(test_kmem_kernel_stacks),
+ T(test_kmem_dead_cgroups),
+ T(test_percpu_basic),
+};
+#undef T
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/test_memcontrol.c b/tools/testing/selftests/cgroup/test_memcontrol.c
index c19a97dd02d4..c7c9572003a8 100644
--- a/tools/testing/selftests/cgroup/test_memcontrol.c
+++ b/tools/testing/selftests/cgroup/test_memcontrol.c
@@ -16,10 +16,14 @@
#include <netinet/in.h>
#include <netdb.h>
#include <errno.h>
+#include <sys/mman.h>
#include "../kselftest.h"
#include "cgroup_util.h"
+static bool has_localevents;
+static bool has_recursiveprot;
+
/*
* This test creates two nested cgroups with and without enabling
* the memory controller.
@@ -94,6 +98,11 @@ static int alloc_anon_50M_check(const char *cgroup, void *arg)
int ret = -1;
buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
+ return -1;
+ }
+
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
@@ -186,13 +195,6 @@ cleanup:
return ret;
}
-static int alloc_pagecache_50M(const char *cgroup, void *arg)
-{
- int fd = (long)arg;
-
- return alloc_pagecache(fd, MB(50));
-}
-
static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
{
int fd = (long)arg;
@@ -210,13 +212,22 @@ static int alloc_pagecache_50M_noexit(const char *cgroup, void *arg)
static int alloc_anon_noexit(const char *cgroup, void *arg)
{
int ppid = getppid();
+ size_t size = (unsigned long)arg;
+ char *buf, *ptr;
- if (alloc_anon(cgroup, arg))
+ buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
return -1;
+ }
+
+ for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
+ *ptr = 0;
while (getppid() == ppid)
sleep(1);
+ free(buf);
return 0;
}
@@ -237,36 +248,51 @@ static int cg_test_proc_killed(const char *cgroup)
return -1;
}
+static bool reclaim_until(const char *memcg, long goal);
+
/*
* First, this test creates the following hierarchy:
- * A memory.min = 50M, memory.max = 200M
- * A/B memory.min = 50M, memory.current = 50M
+ * A memory.min = 0, memory.max = 200M
+ * A/B memory.min = 50M
* A/B/C memory.min = 75M, memory.current = 50M
* A/B/D memory.min = 25M, memory.current = 50M
- * A/B/E memory.min = 500M, memory.current = 0
- * A/B/F memory.min = 0, memory.current = 50M
+ * A/B/E memory.min = 0, memory.current = 50M
+ * A/B/F memory.min = 500M, memory.current = 0
*
- * Usages are pagecache, but the test keeps a running
+ * (or memory.low if we test soft protection)
+ *
+ * Usages are pagecache and the test keeps a running
* process in every leaf cgroup.
* Then it creates A/G and creates a significant
- * memory pressure in it.
+ * memory pressure in A.
*
+ * Then it checks actual memory usages and expects that:
* A/B memory.current ~= 50M
- * A/B/C memory.current ~= 33M
- * A/B/D memory.current ~= 17M
+ * A/B/C memory.current ~= 29M
+ * A/B/D memory.current ~= 21M
* A/B/E memory.current ~= 0
+ * A/B/F memory.current = 0
+ * (for origin of the numbers, see model in memcg_protection.m.)
*
* After that it tries to allocate more than there is
- * unprotected memory in A available, and checks
- * checks that memory.min protects pagecache even
- * in this case.
+ * unprotected memory in A available, and checks that:
+ * a) memory.min protects pagecache even in this case,
+ * b) memory.low allows reclaiming page cache with low events.
+ *
+ * Then we try to reclaim from A/B/C using memory.reclaim until its
+ * usage reaches 10M.
+ * This makes sure that:
+ * (a) We ignore the protection of the reclaim target memcg.
+ * (b) The previously calculated emin value (~29M) should be dismissed.
*/
-static int test_memcg_min(const char *root)
+static int test_memcg_protection(const char *root, bool min)
{
- int ret = KSFT_FAIL;
+ int ret = KSFT_FAIL, rc;
char *parent[3] = {NULL};
char *children[4] = {NULL};
+ const char *attribute = min ? "memory.min" : "memory.low";
long c[4];
+ long current;
int i, attempts;
int fd;
@@ -289,8 +315,10 @@ static int test_memcg_min(const char *root)
if (cg_create(parent[0]))
goto cleanup;
- if (cg_read_long(parent[0], "memory.min")) {
- ret = KSFT_SKIP;
+ if (cg_read_long(parent[0], attribute)) {
+ /* No memory.min on older kernels is fine */
+ if (min)
+ ret = KSFT_SKIP;
goto cleanup;
}
@@ -320,24 +348,22 @@ static int test_memcg_min(const char *root)
if (cg_create(children[i]))
goto cleanup;
- if (i == 2)
+ if (i > 2)
continue;
cg_run_nowait(children[i], alloc_pagecache_50M_noexit,
(void *)(long)fd);
}
- if (cg_write(parent[0], "memory.min", "50M"))
+ if (cg_write(parent[1], attribute, "50M"))
goto cleanup;
- if (cg_write(parent[1], "memory.min", "50M"))
+ if (cg_write(children[0], attribute, "75M"))
goto cleanup;
- if (cg_write(children[0], "memory.min", "75M"))
+ if (cg_write(children[1], attribute, "25M"))
goto cleanup;
- if (cg_write(children[1], "memory.min", "25M"))
+ if (cg_write(children[2], attribute, "0"))
goto cleanup;
- if (cg_write(children[2], "memory.min", "500M"))
- goto cleanup;
- if (cg_write(children[3], "memory.min", "0"))
+ if (cg_write(children[3], attribute, "500M"))
goto cleanup;
attempts = 0;
@@ -357,178 +383,50 @@ static int test_memcg_min(const char *root)
for (i = 0; i < ARRAY_SIZE(children); i++)
c[i] = cg_read_long(children[i], "memory.current");
- if (!values_close(c[0], MB(33), 10))
+ if (!values_close(c[0], MB(29), 10))
goto cleanup;
- if (!values_close(c[1], MB(17), 10))
+ if (!values_close(c[1], MB(21), 10))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (c[3] != 0)
goto cleanup;
- if (!cg_run(parent[2], alloc_anon, (void *)MB(170)))
+ rc = cg_run(parent[2], alloc_anon, (void *)MB(170));
+ if (min && !rc)
goto cleanup;
-
- if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
- goto cleanup;
-
- ret = KSFT_PASS;
-
-cleanup:
- for (i = ARRAY_SIZE(children) - 1; i >= 0; i--) {
- if (!children[i])
- continue;
-
- cg_destroy(children[i]);
- free(children[i]);
- }
-
- for (i = ARRAY_SIZE(parent) - 1; i >= 0; i--) {
- if (!parent[i])
- continue;
-
- cg_destroy(parent[i]);
- free(parent[i]);
- }
- close(fd);
- return ret;
-}
-
-/*
- * First, this test creates the following hierarchy:
- * A memory.low = 50M, memory.max = 200M
- * A/B memory.low = 50M, memory.current = 50M
- * A/B/C memory.low = 75M, memory.current = 50M
- * A/B/D memory.low = 25M, memory.current = 50M
- * A/B/E memory.low = 500M, memory.current = 0
- * A/B/F memory.low = 0, memory.current = 50M
- *
- * Usages are pagecache.
- * Then it creates A/G an creates a significant
- * memory pressure in it.
- *
- * Then it checks actual memory usages and expects that:
- * A/B memory.current ~= 50M
- * A/B/ memory.current ~= 33M
- * A/B/D memory.current ~= 17M
- * A/B/E memory.current ~= 0
- *
- * After that it tries to allocate more than there is
- * unprotected memory in A available,
- * and checks low and oom events in memory.events.
- */
-static int test_memcg_low(const char *root)
-{
- int ret = KSFT_FAIL;
- char *parent[3] = {NULL};
- char *children[4] = {NULL};
- long low, oom;
- long c[4];
- int i;
- int fd;
-
- fd = get_temp_fd();
- if (fd < 0)
- goto cleanup;
-
- parent[0] = cg_name(root, "memcg_test_0");
- if (!parent[0])
- goto cleanup;
-
- parent[1] = cg_name(parent[0], "memcg_test_1");
- if (!parent[1])
- goto cleanup;
-
- parent[2] = cg_name(parent[0], "memcg_test_2");
- if (!parent[2])
- goto cleanup;
-
- if (cg_create(parent[0]))
- goto cleanup;
-
- if (cg_read_long(parent[0], "memory.low"))
- goto cleanup;
-
- if (cg_write(parent[0], "cgroup.subtree_control", "+memory"))
- goto cleanup;
-
- if (cg_write(parent[0], "memory.max", "200M"))
- goto cleanup;
-
- if (cg_write(parent[0], "memory.swap.max", "0"))
- goto cleanup;
-
- if (cg_create(parent[1]))
- goto cleanup;
-
- if (cg_write(parent[1], "cgroup.subtree_control", "+memory"))
- goto cleanup;
-
- if (cg_create(parent[2]))
+ else if (!min && rc) {
+ fprintf(stderr,
+ "memory.low prevents from allocating anon memory\n");
goto cleanup;
-
- for (i = 0; i < ARRAY_SIZE(children); i++) {
- children[i] = cg_name_indexed(parent[1], "child_memcg", i);
- if (!children[i])
- goto cleanup;
-
- if (cg_create(children[i]))
- goto cleanup;
-
- if (i == 2)
- continue;
-
- if (cg_run(children[i], alloc_pagecache_50M, (void *)(long)fd))
- goto cleanup;
}
- if (cg_write(parent[0], "memory.low", "50M"))
- goto cleanup;
- if (cg_write(parent[1], "memory.low", "50M"))
- goto cleanup;
- if (cg_write(children[0], "memory.low", "75M"))
- goto cleanup;
- if (cg_write(children[1], "memory.low", "25M"))
- goto cleanup;
- if (cg_write(children[2], "memory.low", "500M"))
- goto cleanup;
- if (cg_write(children[3], "memory.low", "0"))
- goto cleanup;
-
- if (cg_run(parent[2], alloc_anon, (void *)MB(148)))
- goto cleanup;
-
- if (!values_close(cg_read_long(parent[1], "memory.current"), MB(50), 3))
- goto cleanup;
-
- for (i = 0; i < ARRAY_SIZE(children); i++)
- c[i] = cg_read_long(children[i], "memory.current");
-
- if (!values_close(c[0], MB(33), 10))
- goto cleanup;
-
- if (!values_close(c[1], MB(17), 10))
+ current = min ? MB(50) : MB(30);
+ if (!values_close(cg_read_long(parent[1], "memory.current"), current, 3))
goto cleanup;
- if (!values_close(c[2], 0, 1))
+ if (!reclaim_until(children[0], MB(10)))
goto cleanup;
- if (cg_run(parent[2], alloc_anon, (void *)MB(166))) {
- fprintf(stderr,
- "memory.low prevents from allocating anon memory\n");
+ if (min) {
+ ret = KSFT_PASS;
goto cleanup;
}
for (i = 0; i < ARRAY_SIZE(children); i++) {
+ int no_low_events_index = 1;
+ long low, oom;
+
oom = cg_read_key_long(children[i], "memory.events", "oom ");
low = cg_read_key_long(children[i], "memory.events", "low ");
if (oom)
goto cleanup;
- if (i < 2 && low <= 0)
+ if (i <= no_low_events_index && low <= 0)
goto cleanup;
- if (i >= 2 && low)
+ if (i > no_low_events_index && low)
goto cleanup;
+
}
ret = KSFT_PASS;
@@ -553,13 +451,28 @@ cleanup:
return ret;
}
+static int test_memcg_min(const char *root)
+{
+ return test_memcg_protection(root, true);
+}
+
+static int test_memcg_low(const char *root)
+{
+ return test_memcg_protection(root, false);
+}
+
static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
{
size_t size = MB(50);
int ret = -1;
- long current;
+ long current, high, max;
int fd;
+ high = cg_read_long(cgroup, "memory.high");
+ max = cg_read_long(cgroup, "memory.max");
+ if (high != MB(30) && max != MB(30))
+ return -1;
+
fd = get_temp_fd();
if (fd < 0)
return -1;
@@ -568,7 +481,7 @@ static int alloc_pagecache_max_30M(const char *cgroup, void *arg)
goto cleanup;
current = cg_read_long(cgroup, "memory.current");
- if (current <= MB(29) || current > MB(30))
+ if (!values_close(current, MB(30), 5))
goto cleanup;
ret = 0;
@@ -606,7 +519,7 @@ static int test_memcg_high(const char *root)
if (cg_write(memcg, "memory.high", "30M"))
goto cleanup;
- if (cg_run(memcg, alloc_anon, (void *)MB(100)))
+ if (cg_run(memcg, alloc_anon, (void *)MB(31)))
goto cleanup;
if (!cg_run(memcg, alloc_pagecache_50M_check, NULL))
@@ -628,6 +541,82 @@ cleanup:
return ret;
}
+static int alloc_anon_mlock(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ void *buf;
+
+ buf = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON,
+ 0, 0);
+ if (buf == MAP_FAILED)
+ return -1;
+
+ mlock(buf, size);
+ munmap(buf, size);
+ return 0;
+}
+
+/*
+ * This test checks that memory.high is able to throttle big single shot
+ * allocation i.e. large allocation within one kernel entry.
+ */
+static int test_memcg_high_sync(const char *root)
+{
+ int ret = KSFT_FAIL, pid, fd = -1;
+ char *memcg;
+ long pre_high, pre_max;
+ long post_high, post_max;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ pre_high = cg_read_key_long(memcg, "memory.events", "high ");
+ pre_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (pre_high < 0 || pre_max < 0)
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.swap.max", "0"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.high", "30M"))
+ goto cleanup;
+
+ if (cg_write(memcg, "memory.max", "140M"))
+ goto cleanup;
+
+ fd = memcg_prepare_for_wait(memcg);
+ if (fd < 0)
+ goto cleanup;
+
+ pid = cg_run_nowait(memcg, alloc_anon_mlock, (void *)MB(200));
+ if (pid < 0)
+ goto cleanup;
+
+ cg_wait_for(fd);
+
+ post_high = cg_read_key_long(memcg, "memory.events", "high ");
+ post_max = cg_read_key_long(memcg, "memory.events", "max ");
+ if (post_high < 0 || post_max < 0)
+ goto cleanup;
+
+ if (pre_high == post_high || pre_max != post_max)
+ goto cleanup;
+
+ ret = KSFT_PASS;
+
+cleanup:
+ if (fd >= 0)
+ close(fd);
+ cg_destroy(memcg);
+ free(memcg);
+
+ return ret;
+}
+
/*
* This test checks that memory.max limits the amount of
* memory which can be consumed by either anonymous memory
@@ -679,6 +668,119 @@ cleanup:
return ret;
}
+/*
+ * Reclaim from @memcg until usage reaches @goal by writing to
+ * memory.reclaim.
+ *
+ * This function will return false if the usage is already below the
+ * goal.
+ *
+ * This function assumes that writing to memory.reclaim is the only
+ * source of change in memory.current (no concurrent allocations or
+ * reclaim).
+ *
+ * This function makes sure memory.reclaim is sane. It will return
+ * false if memory.reclaim's error codes do not make sense, even if
+ * the usage goal was satisfied.
+ */
+static bool reclaim_until(const char *memcg, long goal)
+{
+ char buf[64];
+ int retries, err;
+ long current, to_reclaim;
+ bool reclaimed = false;
+
+ for (retries = 5; retries > 0; retries--) {
+ current = cg_read_long(memcg, "memory.current");
+
+ if (current < goal || values_close(current, goal, 3))
+ break;
+ /* Did memory.reclaim return 0 incorrectly? */
+ else if (reclaimed)
+ return false;
+
+ to_reclaim = current - goal;
+ snprintf(buf, sizeof(buf), "%ld", to_reclaim);
+ err = cg_write(memcg, "memory.reclaim", buf);
+ if (!err)
+ reclaimed = true;
+ else if (err != -EAGAIN)
+ return false;
+ }
+ return reclaimed;
+}
+
+/*
+ * This test checks that memory.reclaim reclaims the given
+ * amount of memory (from both anon and file, if possible).
+ */
+static int test_memcg_reclaim(const char *root)
+{
+ int ret = KSFT_FAIL, fd, retries;
+ char *memcg;
+ long current, expected_usage;
+
+ memcg = cg_name(root, "memcg_test");
+ if (!memcg)
+ goto cleanup;
+
+ if (cg_create(memcg))
+ goto cleanup;
+
+ current = cg_read_long(memcg, "memory.current");
+ if (current != 0)
+ goto cleanup;
+
+ fd = get_temp_fd();
+ if (fd < 0)
+ goto cleanup;
+
+ cg_run_nowait(memcg, alloc_pagecache_50M_noexit, (void *)(long)fd);
+
+ /*
+ * If swap is enabled, try to reclaim from both anon and file, else try
+ * to reclaim from file only.
+ */
+ if (is_swap_enabled()) {
+ cg_run_nowait(memcg, alloc_anon_noexit, (void *) MB(50));
+ expected_usage = MB(100);
+ } else
+ expected_usage = MB(50);
+
+ /*
+ * Wait until current usage reaches the expected usage (or we run out of
+ * retries).
+ */
+ retries = 5;
+ while (!values_close(cg_read_long(memcg, "memory.current"),
+ expected_usage, 10)) {
+ if (retries--) {
+ sleep(1);
+ continue;
+ } else {
+ fprintf(stderr,
+ "failed to allocate %ld for memcg reclaim test\n",
+ expected_usage);
+ goto cleanup;
+ }
+ }
+
+ /*
+ * Reclaim until current reaches 30M, this makes sure we hit both anon
+ * and file if swap is enabled.
+ */
+ if (!reclaim_until(memcg, MB(30)))
+ goto cleanup;
+
+ ret = KSFT_PASS;
+cleanup:
+ cg_destroy(memcg);
+ free(memcg);
+ close(fd);
+
+ return ret;
+}
+
static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
{
long mem_max = (long)arg;
@@ -688,6 +790,11 @@ static int alloc_anon_50M_check_swap(const char *cgroup, void *arg)
int ret = -1;
buf = malloc(size);
+ if (buf == NULL) {
+ fprintf(stderr, "malloc() failed\n");
+ return -1;
+ }
+
for (ptr = buf; ptr < buf + size; ptr += PAGE_SIZE)
*ptr = 0;
@@ -882,7 +989,9 @@ static int tcp_client(const char *cgroup, unsigned short port)
char servport[6];
int retries = 0x10; /* nice round number */
int sk, ret;
+ long allocated;
+ allocated = cg_read_long(cgroup, "memory.current");
snprintf(servport, sizeof(servport), "%hd", port);
ret = getaddrinfo(server, servport, NULL, &ai);
if (ret)
@@ -910,10 +1019,8 @@ static int tcp_client(const char *cgroup, unsigned short port)
if (current < 0 || sock < 0)
goto close_sk;
- if (current < sock)
- goto close_sk;
-
- if (values_close(current, sock, 10)) {
+ /* exclude the memory not related to socket connection */
+ if (values_close(current - allocated, sock, 10)) {
ret = KSFT_PASS;
break;
}
@@ -1002,12 +1109,14 @@ cleanup:
/*
* This test disables swapping and tries to allocate anonymous memory
* up to OOM with memory.group.oom set. Then it checks that all
- * processes in the leaf (but not the parent) were killed.
+ * processes in the leaf were killed. It also checks that oom_events
+ * were propagated to the parent level.
*/
static int test_memcg_oom_group_leaf_events(const char *root)
{
int ret = KSFT_FAIL;
char *parent, *child;
+ long parent_oom_events;
parent = cg_name(root, "memcg_test_0");
child = cg_name(root, "memcg_test_0/memcg_test_1");
@@ -1045,7 +1154,16 @@ static int test_memcg_oom_group_leaf_events(const char *root)
if (cg_read_key_long(child, "memory.events", "oom_kill ") <= 0)
goto cleanup;
- if (cg_read_key_long(parent, "memory.events", "oom_kill ") != 0)
+ parent_oom_events = cg_read_key_long(
+ parent, "memory.events", "oom_kill ");
+ /*
+ * If memory_localevents is not enabled (the default), the parent should
+ * count OOM events in its children groups. Otherwise, it should not
+ * have observed any events.
+ */
+ if (has_localevents && parent_oom_events != 0)
+ goto cleanup;
+ else if (!has_localevents && parent_oom_events <= 0)
goto cleanup;
ret = KSFT_PASS;
@@ -1169,7 +1287,6 @@ cleanup:
return ret;
}
-
#define T(x) { x, #x }
struct memcg_test {
int (*fn)(const char *root);
@@ -1180,7 +1297,9 @@ struct memcg_test {
T(test_memcg_min),
T(test_memcg_low),
T(test_memcg_high),
+ T(test_memcg_high_sync),
T(test_memcg_max),
+ T(test_memcg_reclaim),
T(test_memcg_oom_events),
T(test_memcg_swap_max),
T(test_memcg_sock),
@@ -1193,7 +1312,7 @@ struct memcg_test {
int main(int argc, char **argv)
{
char root[PATH_MAX];
- int i, ret = EXIT_SUCCESS;
+ int i, proc_status, ret = EXIT_SUCCESS;
if (cg_find_unified_root(root, sizeof(root)))
ksft_exit_skip("cgroup v2 isn't mounted\n");
@@ -1209,6 +1328,16 @@ int main(int argc, char **argv)
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
+ proc_status = proc_mount_contains("memory_recursiveprot");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_recursiveprot = proc_status;
+
+ proc_status = proc_mount_contains("memory_localevents");
+ if (proc_status < 0)
+ ksft_exit_skip("Failed to query cgroup mount option\n");
+ has_localevents = proc_status;
+
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
diff --git a/tools/testing/selftests/cgroup/test_stress.sh b/tools/testing/selftests/cgroup/test_stress.sh
index 15d9d5896394..3c9c4554d5f6 100755
--- a/tools/testing/selftests/cgroup/test_stress.sh
+++ b/tools/testing/selftests/cgroup/test_stress.sh
@@ -1,4 +1,4 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-./with_stress.sh -s subsys -s fork ./test_core
+./with_stress.sh -s subsys -s fork ${OUTPUT:-.}/test_core
diff --git a/tools/testing/selftests/cgroup/test_zswap.c b/tools/testing/selftests/cgroup/test_zswap.c
new file mode 100644
index 000000000000..f0e488ed90d8
--- /dev/null
+++ b/tools/testing/selftests/cgroup/test_zswap.c
@@ -0,0 +1,476 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <linux/limits.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <signal.h>
+#include <sys/sysinfo.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+#include "cgroup_util.h"
+
+static int read_int(const char *path, size_t *value)
+{
+ FILE *file;
+ int ret = 0;
+
+ file = fopen(path, "r");
+ if (!file)
+ return -1;
+ if (fscanf(file, "%ld", value) != 1)
+ ret = -1;
+ fclose(file);
+ return ret;
+}
+
+static int set_min_free_kb(size_t value)
+{
+ FILE *file;
+ int ret;
+
+ file = fopen("/proc/sys/vm/min_free_kbytes", "w");
+ if (!file)
+ return -1;
+ ret = fprintf(file, "%ld\n", value);
+ fclose(file);
+ return ret;
+}
+
+static int read_min_free_kb(size_t *value)
+{
+ return read_int("/proc/sys/vm/min_free_kbytes", value);
+}
+
+static int get_zswap_stored_pages(size_t *value)
+{
+ return read_int("/sys/kernel/debug/zswap/stored_pages", value);
+}
+
+static int get_cg_wb_count(const char *cg)
+{
+ return cg_read_key_long(cg, "memory.stat", "zswpwb");
+}
+
+static long get_zswpout(const char *cgroup)
+{
+ return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
+}
+
+static int allocate_and_read_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+ int ret = 0;
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+
+ /* Go through the allocated memory to (z)swap in and out pages */
+ for (int i = 0; i < size; i += 4095) {
+ if (mem[i] != 'a')
+ ret = -1;
+ }
+
+ free(mem);
+ return ret;
+}
+
+static int allocate_bytes(const char *cgroup, void *arg)
+{
+ size_t size = (size_t)arg;
+ char *mem = (char *)malloc(size);
+
+ if (!mem)
+ return -1;
+ for (int i = 0; i < size; i += 4095)
+ mem[i] = 'a';
+ free(mem);
+ return 0;
+}
+
+static char *setup_test_group_1M(const char *root, const char *name)
+{
+ char *group_name = cg_name(root, name);
+
+ if (!group_name)
+ return NULL;
+ if (cg_create(group_name))
+ goto fail;
+ if (cg_write(group_name, "memory.max", "1M")) {
+ cg_destroy(group_name);
+ goto fail;
+ }
+ return group_name;
+fail:
+ free(group_name);
+ return NULL;
+}
+
+/*
+ * Sanity test to check that pages are written into zswap.
+ */
+static int test_zswap_usage(const char *root)
+{
+ long zswpout_before, zswpout_after;
+ int ret = KSFT_FAIL;
+ char *test_group;
+
+ test_group = cg_name(root, "no_shrink_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "1M"))
+ goto out;
+
+ zswpout_before = get_zswpout(test_group);
+ if (zswpout_before < 0) {
+ ksft_print_msg("Failed to get zswpout\n");
+ goto out;
+ }
+
+ /* Allocate more than memory.max to push memory into zswap */
+ if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
+ goto out;
+
+ /* Verify that pages come into zswap */
+ zswpout_after = get_zswpout(test_group);
+ if (zswpout_after <= zswpout_before) {
+ ksft_print_msg("zswpout does not increase after test program\n");
+ goto out;
+ }
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/*
+ * Check that when memory.zswap.max = 0, no pages can go to the zswap pool for
+ * the cgroup.
+ */
+static int test_swapin_nozswap(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+ long swap_peak, zswpout;
+
+ test_group = cg_name(root, "no_zswap_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "8M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "0"))
+ goto out;
+
+ /* Allocate and read more than memory.max to trigger swapin */
+ if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+ goto out;
+
+ /* Verify that pages are swapped out, but no zswap happened */
+ swap_peak = cg_read_long(test_group, "memory.swap.peak");
+ if (swap_peak < 0) {
+ ksft_print_msg("failed to get cgroup's swap_peak\n");
+ goto out;
+ }
+
+ if (swap_peak < MB(24)) {
+ ksft_print_msg("at least 24MB of memory should be swapped out\n");
+ goto out;
+ }
+
+ zswpout = get_zswpout(test_group);
+ if (zswpout < 0) {
+ ksft_print_msg("failed to get zswpout\n");
+ goto out;
+ }
+
+ if (zswpout > 0) {
+ ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/* Simple test to verify the (z)swapin code paths */
+static int test_zswapin(const char *root)
+{
+ int ret = KSFT_FAIL;
+ char *test_group;
+ long zswpin;
+
+ test_group = cg_name(root, "zswapin_test");
+ if (!test_group)
+ goto out;
+ if (cg_create(test_group))
+ goto out;
+ if (cg_write(test_group, "memory.max", "8M"))
+ goto out;
+ if (cg_write(test_group, "memory.zswap.max", "max"))
+ goto out;
+
+ /* Allocate and read more than memory.max to trigger (z)swap in */
+ if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
+ goto out;
+
+ zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
+ if (zswpin < 0) {
+ ksft_print_msg("failed to get zswpin\n");
+ goto out;
+ }
+
+ if (zswpin < MB(24) / PAGE_SIZE) {
+ ksft_print_msg("at least 24MB should be brought back from zswap\n");
+ goto out;
+ }
+
+ ret = KSFT_PASS;
+
+out:
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+/*
+ * When trying to store a memcg page in zswap, if the memcg hits its memory
+ * limit in zswap, writeback should affect only the zswapped pages of that
+ * memcg.
+ */
+static int test_no_invasive_cgroup_shrink(const char *root)
+{
+ int ret = KSFT_FAIL;
+ size_t control_allocation_size = MB(10);
+ char *control_allocation, *wb_group = NULL, *control_group = NULL;
+
+ wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
+ if (!wb_group)
+ return KSFT_FAIL;
+ if (cg_write(wb_group, "memory.zswap.max", "10K"))
+ goto out;
+ control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
+ if (!control_group)
+ goto out;
+
+ /* Push some test_group2 memory into zswap */
+ if (cg_enter_current(control_group))
+ goto out;
+ control_allocation = malloc(control_allocation_size);
+ for (int i = 0; i < control_allocation_size; i += 4095)
+ control_allocation[i] = 'a';
+ if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
+ goto out;
+
+ /* Allocate 10x memory.max to push wb_group memory into zswap and trigger wb */
+ if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
+ goto out;
+
+ /* Verify that only zswapped memory from gwb_group has been written back */
+ if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
+ ret = KSFT_PASS;
+out:
+ cg_enter_current(root);
+ if (control_group) {
+ cg_destroy(control_group);
+ free(control_group);
+ }
+ cg_destroy(wb_group);
+ free(wb_group);
+ if (control_allocation)
+ free(control_allocation);
+ return ret;
+}
+
+struct no_kmem_bypass_child_args {
+ size_t target_alloc_bytes;
+ size_t child_allocated;
+};
+
+static int no_kmem_bypass_child(const char *cgroup, void *arg)
+{
+ struct no_kmem_bypass_child_args *values = arg;
+ void *allocation;
+
+ allocation = malloc(values->target_alloc_bytes);
+ if (!allocation) {
+ values->child_allocated = true;
+ return -1;
+ }
+ for (long i = 0; i < values->target_alloc_bytes; i += 4095)
+ ((char *)allocation)[i] = 'a';
+ values->child_allocated = true;
+ pause();
+ free(allocation);
+ return 0;
+}
+
+/*
+ * When pages owned by a memcg are pushed to zswap by kswapd, they should be
+ * charged to that cgroup. This wasn't the case before commit
+ * cd08d80ecdac("mm: correctly charge compressed memory to its memcg").
+ *
+ * The test first allocates memory in a memcg, then raises min_free_kbytes to
+ * a very high value so that the allocation falls below low wm, then makes
+ * another allocation to trigger kswapd that should push the memcg-owned pages
+ * to zswap and verifies that the zswap pages are correctly charged.
+ *
+ * To be run on a VM with at most 4G of memory.
+ */
+static int test_no_kmem_bypass(const char *root)
+{
+ size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
+ struct no_kmem_bypass_child_args *values;
+ size_t trigger_allocation_size;
+ int wait_child_iteration = 0;
+ long stored_pages_threshold;
+ struct sysinfo sys_info;
+ int ret = KSFT_FAIL;
+ int child_status;
+ char *test_group;
+ pid_t child_pid;
+
+ /* Read sys info and compute test values accordingly */
+ if (sysinfo(&sys_info) != 0)
+ return KSFT_FAIL;
+ if (sys_info.totalram > 5000000000)
+ return KSFT_SKIP;
+ values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
+ PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (values == MAP_FAILED)
+ return KSFT_FAIL;
+ if (read_min_free_kb(&min_free_kb_original))
+ return KSFT_FAIL;
+ min_free_kb_high = sys_info.totalram / 2000;
+ min_free_kb_low = sys_info.totalram / 500000;
+ values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
+ sys_info.totalram * 5 / 100;
+ stored_pages_threshold = sys_info.totalram / 5 / 4096;
+ trigger_allocation_size = sys_info.totalram / 20;
+
+ /* Set up test memcg */
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ goto out;
+ test_group = cg_name(root, "kmem_bypass_test");
+ if (!test_group)
+ goto out;
+
+ /* Spawn memcg child and wait for it to allocate */
+ set_min_free_kb(min_free_kb_low);
+ if (cg_create(test_group))
+ goto out;
+ values->child_allocated = false;
+ child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
+ if (child_pid < 0)
+ goto out;
+ while (!values->child_allocated && wait_child_iteration++ < 10000)
+ usleep(1000);
+
+ /* Try to wakeup kswapd and let it push child memory to zswap */
+ set_min_free_kb(min_free_kb_high);
+ for (int i = 0; i < 20; i++) {
+ size_t stored_pages;
+ char *trigger_allocation = malloc(trigger_allocation_size);
+
+ if (!trigger_allocation)
+ break;
+ for (int i = 0; i < trigger_allocation_size; i += 4095)
+ trigger_allocation[i] = 'b';
+ usleep(100000);
+ free(trigger_allocation);
+ if (get_zswap_stored_pages(&stored_pages))
+ break;
+ if (stored_pages < 0)
+ break;
+ /* If memory was pushed to zswap, verify it belongs to memcg */
+ if (stored_pages > stored_pages_threshold) {
+ int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
+ int delta = stored_pages * 4096 - zswapped;
+ int result_ok = delta < stored_pages * 4096 / 4;
+
+ ret = result_ok ? KSFT_PASS : KSFT_FAIL;
+ break;
+ }
+ }
+
+ kill(child_pid, SIGTERM);
+ waitpid(child_pid, &child_status, 0);
+out:
+ set_min_free_kb(min_free_kb_original);
+ cg_destroy(test_group);
+ free(test_group);
+ return ret;
+}
+
+#define T(x) { x, #x }
+struct zswap_test {
+ int (*fn)(const char *root);
+ const char *name;
+} tests[] = {
+ T(test_zswap_usage),
+ T(test_swapin_nozswap),
+ T(test_zswapin),
+ T(test_no_kmem_bypass),
+ T(test_no_invasive_cgroup_shrink),
+};
+#undef T
+
+static bool zswap_configured(void)
+{
+ return access("/sys/module/zswap", F_OK) == 0;
+}
+
+int main(int argc, char **argv)
+{
+ char root[PATH_MAX];
+ int i, ret = EXIT_SUCCESS;
+
+ if (cg_find_unified_root(root, sizeof(root)))
+ ksft_exit_skip("cgroup v2 isn't mounted\n");
+
+ if (!zswap_configured())
+ ksft_exit_skip("zswap isn't configured\n");
+
+ /*
+ * Check that memory controller is available:
+ * memory is listed in cgroup.controllers
+ */
+ if (cg_read_strstr(root, "cgroup.controllers", "memory"))
+ ksft_exit_skip("memory controller isn't available\n");
+
+ if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
+ if (cg_write(root, "cgroup.subtree_control", "+memory"))
+ ksft_exit_skip("Failed to set memory controller\n");
+
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ switch (tests[i].fn(root)) {
+ case KSFT_PASS:
+ ksft_test_result_pass("%s\n", tests[i].name);
+ break;
+ case KSFT_SKIP:
+ ksft_test_result_skip("%s\n", tests[i].name);
+ break;
+ default:
+ ret = EXIT_FAILURE;
+ ksft_test_result_fail("%s\n", tests[i].name);
+ break;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/cgroup/wait_inotify.c b/tools/testing/selftests/cgroup/wait_inotify.c
new file mode 100644
index 000000000000..e11b431e1b62
--- /dev/null
+++ b/tools/testing/selftests/cgroup/wait_inotify.c
@@ -0,0 +1,87 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Wait until an inotify event on the given cgroup file.
+ */
+#include <linux/limits.h>
+#include <sys/inotify.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+static const char usage[] = "Usage: %s [-v] <cgroup_file>\n";
+static char *file;
+static int verbose;
+
+static inline void fail_message(char *msg)
+{
+ fprintf(stderr, msg, file);
+ exit(1);
+}
+
+int main(int argc, char *argv[])
+{
+ char *cmd = argv[0];
+ int c, fd;
+ struct pollfd fds = { .events = POLLIN, };
+
+ while ((c = getopt(argc, argv, "v")) != -1) {
+ switch (c) {
+ case 'v':
+ verbose++;
+ break;
+ }
+ argv++, argc--;
+ }
+
+ if (argc != 2) {
+ fprintf(stderr, usage, cmd);
+ return -1;
+ }
+ file = argv[1];
+ fd = open(file, O_RDONLY);
+ if (fd < 0)
+ fail_message("Cgroup file %s not found!\n");
+ close(fd);
+
+ fd = inotify_init();
+ if (fd < 0)
+ fail_message("inotify_init() fails on %s!\n");
+ if (inotify_add_watch(fd, file, IN_MODIFY) < 0)
+ fail_message("inotify_add_watch() fails on %s!\n");
+ fds.fd = fd;
+
+ /*
+ * poll waiting loop
+ */
+ for (;;) {
+ int ret = poll(&fds, 1, 10000);
+
+ if (ret < 0) {
+ if (errno == EINTR)
+ continue;
+ perror("poll");
+ exit(1);
+ }
+ if ((ret > 0) && (fds.revents & POLLIN))
+ break;
+ }
+ if (verbose) {
+ struct inotify_event events[10];
+ long len;
+
+ usleep(1000);
+ len = read(fd, events, sizeof(events));
+ printf("Number of events read = %ld\n",
+ len/sizeof(struct inotify_event));
+ }
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/clone3/.gitignore b/tools/testing/selftests/clone3/.gitignore
index a81085742d40..83c0f6246055 100644
--- a/tools/testing/selftests/clone3/.gitignore
+++ b/tools/testing/selftests/clone3/.gitignore
@@ -2,3 +2,4 @@
clone3
clone3_clear_sighand
clone3_set_tid
+clone3_cap_checkpoint_restore
diff --git a/tools/testing/selftests/clone3/Makefile b/tools/testing/selftests/clone3/Makefile
index cf976c732906..84832c369a2e 100644
--- a/tools/testing/selftests/clone3/Makefile
+++ b/tools/testing/selftests/clone3/Makefile
@@ -1,6 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g -std=gnu99 $(KHDR_INCLUDES)
+LDLIBS += -lcap
-TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid
+TEST_GEN_PROGS := clone3 clone3_clear_sighand clone3_set_tid \
+ clone3_cap_checkpoint_restore
include ../lib.mk
diff --git a/tools/testing/selftests/clone3/clone3.c b/tools/testing/selftests/clone3/clone3.c
index f14c269a5a18..3c9bf0cd82a8 100644
--- a/tools/testing/selftests/clone3/clone3.c
+++ b/tools/testing/selftests/clone3/clone3.c
@@ -7,6 +7,7 @@
#include <inttypes.h>
#include <linux/types.h>
#include <linux/sched.h>
+#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -20,13 +21,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-/*
- * Different sizes of struct clone_args
- */
-#ifndef CLONE3_ARGS_SIZE_V0
-#define CLONE3_ARGS_SIZE_V0 64
-#endif
-
enum test_mode {
CLONE3_ARGS_NO_TEST,
CLONE3_ARGS_ALL_0,
@@ -38,13 +32,13 @@ enum test_mode {
static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
};
struct clone_args_extended {
- struct clone_args args;
+ struct __clone_args args;
__aligned_u64 excess_space[2];
} args_ext;
@@ -52,13 +46,19 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
int status;
memset(&args_ext, 0, sizeof(args_ext));
- if (size > sizeof(struct clone_args))
+ if (size > sizeof(struct __clone_args))
args_ext.excess_space[1] = 1;
if (size == 0)
- size = sizeof(struct clone_args);
+ size = sizeof(struct __clone_args);
switch (test_mode) {
+ case CLONE3_ARGS_NO_TEST:
+ /*
+ * Uses default 'flags' and 'SIGCHLD'
+ * assignment.
+ */
+ break;
case CLONE3_ARGS_ALL_0:
args.flags = 0;
args.exit_signal = 0;
@@ -77,9 +77,9 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
break;
}
- memcpy(&args_ext.args, &args, sizeof(struct clone_args));
+ memcpy(&args_ext.args, &args, sizeof(struct __clone_args));
- pid = sys_clone3((struct clone_args *)&args_ext, size);
+ pid = sys_clone3((struct __clone_args *)&args_ext, size);
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -104,8 +104,8 @@ static int call_clone3(uint64_t flags, size_t size, enum test_mode test_mode)
return 0;
}
-static void test_clone3(uint64_t flags, size_t size, int expected,
- enum test_mode test_mode)
+static bool test_clone3(uint64_t flags, size_t size, int expected,
+ enum test_mode test_mode)
{
int ret;
@@ -115,88 +115,223 @@ static void test_clone3(uint64_t flags, size_t size, int expected,
ret = call_clone3(flags, size, test_mode);
ksft_print_msg("[%d] clone3() with flags says: %d expected %d\n",
getpid(), ret, expected);
- if (ret != expected)
- ksft_test_result_fail(
+ if (ret != expected) {
+ ksft_print_msg(
"[%d] Result (%d) is different than expected (%d)\n",
getpid(), ret, expected);
- else
- ksft_test_result_pass(
- "[%d] Result (%d) matches expectation (%d)\n",
- getpid(), ret, expected);
-}
-
-int main(int argc, char *argv[])
-{
- pid_t pid;
-
- uid_t uid = getuid();
-
- test_clone3_supported();
- ksft_print_header();
- ksft_set_plan(17);
-
- /* Just a simple clone3() should return 0.*/
- test_clone3(0, 0, 0, CLONE3_ARGS_NO_TEST);
+ return false;
+ }
- /* Do a clone3() in a new PID NS.*/
- if (uid == 0)
- test_clone3(CLONE_NEWPID, 0, 0, CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ return true;
+}
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0. */
- test_clone3(0, CLONE3_ARGS_SIZE_V0, 0, CLONE3_ARGS_NO_TEST);
+typedef bool (*filter_function)(void);
+typedef size_t (*size_function)(void);
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 */
- test_clone3(0, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL, CLONE3_ARGS_NO_TEST);
+static bool not_root(void)
+{
+ if (getuid() != 0) {
+ ksft_print_msg("Not running as root\n");
+ return true;
+ }
- /* Do a clone3() with sizeof(struct clone_args) + 8 */
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_NO_TEST);
+ return false;
+}
- /* Do a clone3() with exit_signal having highest 32 bits non-zero */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG);
+static bool no_timenamespace(void)
+{
+ if (not_root())
+ return true;
- /* Do a clone3() with negative 32-bit exit_signal */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG);
+ if (!access("/proc/self/ns/time", F_OK))
+ return false;
- /* Do a clone3() with exit_signal not fitting into CSIGNAL mask */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG);
+ ksft_print_msg("Time namespaces are not supported\n");
+ return true;
+}
- /* Do a clone3() with NSIG < exit_signal < CSIG */
- test_clone3(0, 0, -EINVAL, CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG);
+static size_t page_size_plus_8(void)
+{
+ return getpagesize() + 8;
+}
- test_clone3(0, sizeof(struct clone_args) + 8, 0, CLONE3_ARGS_ALL_0);
+struct test {
+ const char *name;
+ uint64_t flags;
+ size_t size;
+ size_function size_function;
+ int expected;
+ enum test_mode test_mode;
+ filter_function filter;
+};
- test_clone3(0, sizeof(struct clone_args) + 16, -E2BIG,
- CLONE3_ARGS_ALL_0);
+static const struct test tests[] = {
+ {
+ .name = "simple clone3()",
+ .flags = 0,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "clone3() in a new PID_NS",
+ .flags = CLONE_NEWPID,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8",
+ .flags = 0,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "exit_signal with highest 32 bits non-zero",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_BIG,
+ },
+ {
+ .name = "negative 32-bit exit_signal",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NEG,
+ },
+ {
+ .name = "exit_signal not fitting into CSIGNAL mask",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_CSIG,
+ },
+ {
+ .name = "NSIG < exit_signal < CSIG",
+ .flags = 0,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_INVAL_EXIT_SIGNAL_NSIG,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 8",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_args) + 16",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments sizeof(struct clone_arg) * 2",
+ .flags = 0,
+ .size = sizeof(struct __clone_args) + 16,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_ALL_0,
+ },
+ {
+ .name = "Arguments > page size",
+ .flags = 0,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "CLONE_ARGS_SIZE_VER0 - 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = CLONE_ARGS_SIZE_VER0 - 8,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "sizeof(struct clone_args) + 8 in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size = sizeof(struct __clone_args) + 8,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = not_root,
+ },
+ {
+ .name = "Arguments > page size in a new PID NS",
+ .flags = CLONE_NEWPID,
+ .size_function = page_size_plus_8,
+ .expected = -E2BIG,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+ {
+ .name = "New time NS",
+ .flags = CLONE_NEWTIME,
+ .size = 0,
+ .expected = 0,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ .filter = no_timenamespace,
+ },
+ {
+ .name = "exit signal (SIGCHLD) in flags",
+ .flags = SIGCHLD,
+ .size = 0,
+ .expected = -EINVAL,
+ .test_mode = CLONE3_ARGS_NO_TEST,
+ },
+};
- test_clone3(0, sizeof(struct clone_args) * 2, -E2BIG,
- CLONE3_ARGS_ALL_0);
+int main(int argc, char *argv[])
+{
+ size_t size;
+ int i;
- /* Do a clone3() with > page size */
- test_clone3(0, getpagesize() + 8, -E2BIG, CLONE3_ARGS_NO_TEST);
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(tests));
+ test_clone3_supported();
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 in a new PID NS. */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ for (i = 0; i < ARRAY_SIZE(tests); i++) {
+ if (tests[i].filter && tests[i].filter()) {
+ ksft_test_result_skip("%s\n", tests[i].name);
+ continue;
+ }
- /* Do a clone3() with CLONE3_ARGS_SIZE_V0 - 8 in a new PID NS */
- test_clone3(CLONE_NEWPID, CLONE3_ARGS_SIZE_V0 - 8, -EINVAL,
- CLONE3_ARGS_NO_TEST);
+ if (tests[i].size_function)
+ size = tests[i].size_function();
+ else
+ size = tests[i].size;
- /* Do a clone3() with sizeof(struct clone_args) + 8 in a new PID NS */
- if (uid == 0)
- test_clone3(CLONE_NEWPID, sizeof(struct clone_args) + 8, 0,
- CLONE3_ARGS_NO_TEST);
- else
- ksft_test_result_skip("Skipping clone3() with CLONE_NEWPID\n");
+ ksft_print_msg("Running test '%s'\n", tests[i].name);
- /* Do a clone3() with > page size in a new PID NS */
- test_clone3(CLONE_NEWPID, getpagesize() + 8, -E2BIG,
- CLONE3_ARGS_NO_TEST);
+ ksft_test_result(test_clone3(tests[i].flags, size,
+ tests[i].expected,
+ tests[i].test_mode),
+ "%s\n", tests[i].name);
+ }
- return !ksft_get_fail_cnt() ? ksft_exit_pass() : ksft_exit_fail();
+ ksft_finished();
}
diff --git a/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
new file mode 100644
index 000000000000..31b56d625655
--- /dev/null
+++ b/tools/testing/selftests/clone3/clone3_cap_checkpoint_restore.c
@@ -0,0 +1,180 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Based on Christian Brauner's clone3() example.
+ * These tests are assuming to be running in the host's
+ * PID namespace.
+ */
+
+/* capabilities related code based on selftests/bpf/test_verifier.c */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <linux/types.h>
+#include <linux/sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <sys/capability.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sched.h>
+
+#include "../kselftest_harness.h"
+#include "clone3_selftests.h"
+
+#define MAX_PID_NS_LEVEL 32
+
+static void child_exit(int ret)
+{
+ fflush(stdout);
+ fflush(stderr);
+ _exit(ret);
+}
+
+static int call_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int status;
+ pid_t pid = -1;
+
+ struct __clone_args args = {
+ .exit_signal = SIGCHLD,
+ .set_tid = ptr_to_u64(set_tid),
+ .set_tid_size = set_tid_size,
+ };
+
+ pid = sys_clone3(&args, sizeof(args));
+ if (pid < 0) {
+ TH_LOG("%s - Failed to create new process", strerror(errno));
+ return -errno;
+ }
+
+ if (pid == 0) {
+ int ret;
+ char tmp = 0;
+
+ TH_LOG("I am the child, my PID is %d (expected %d)", getpid(), set_tid[0]);
+
+ if (set_tid[0] != getpid())
+ child_exit(EXIT_FAILURE);
+ child_exit(EXIT_SUCCESS);
+ }
+
+ TH_LOG("I am the parent (%d). My child's pid is %d", getpid(), pid);
+
+ if (waitpid(pid, &status, 0) < 0) {
+ TH_LOG("Child returned %s", strerror(errno));
+ return -errno;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int test_clone3_set_tid(struct __test_metadata *_metadata,
+ pid_t *set_tid, size_t set_tid_size)
+{
+ int ret;
+
+ TH_LOG("[%d] Trying clone3() with CLONE_SET_TID to %d", getpid(), set_tid[0]);
+ ret = call_clone3_set_tid(_metadata, set_tid, set_tid_size);
+ TH_LOG("[%d] clone3() with CLONE_SET_TID %d says:%d", getpid(), set_tid[0], ret);
+ return ret;
+}
+
+struct libcap {
+ struct __user_cap_header_struct hdr;
+ struct __user_cap_data_struct data[2];
+};
+
+static int set_capability(void)
+{
+ cap_value_t cap_values[] = { CAP_SETUID, CAP_SETGID };
+ struct libcap *cap;
+ int ret = -1;
+ cap_t caps;
+
+ caps = cap_get_proc();
+ if (!caps) {
+ perror("cap_get_proc");
+ return -1;
+ }
+
+ /* Drop all capabilities */
+ if (cap_clear(caps)) {
+ perror("cap_clear");
+ goto out;
+ }
+
+ cap_set_flag(caps, CAP_EFFECTIVE, 2, cap_values, CAP_SET);
+ cap_set_flag(caps, CAP_PERMITTED, 2, cap_values, CAP_SET);
+
+ cap = (struct libcap *) caps;
+
+ /* 40 -> CAP_CHECKPOINT_RESTORE */
+ cap->data[1].effective |= 1 << (40 - 32);
+ cap->data[1].permitted |= 1 << (40 - 32);
+
+ if (cap_set_proc(caps)) {
+ perror("cap_set_proc");
+ goto out;
+ }
+ ret = 0;
+out:
+ if (cap_free(caps))
+ perror("cap_free");
+ return ret;
+}
+
+TEST(clone3_cap_checkpoint_restore)
+{
+ pid_t pid;
+ int status;
+ int ret = 0;
+ pid_t set_tid[1];
+
+ test_clone3_supported();
+
+ EXPECT_EQ(getuid(), 0)
+ SKIP(return, "Skipping all tests as non-root");
+
+ memset(&set_tid, 0, sizeof(set_tid));
+
+ /* Find the current active PID */
+ pid = fork();
+ if (pid == 0) {
+ TH_LOG("Child has PID %d", getpid());
+ child_exit(EXIT_SUCCESS);
+ }
+ ASSERT_GT(waitpid(pid, &status, 0), 0)
+ TH_LOG("Waiting for child %d failed", pid);
+
+ /* After the child has finished, its PID should be free. */
+ set_tid[0] = pid;
+
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+
+ ASSERT_EQ(prctl(PR_SET_KEEPCAPS, 1, 0, 0, 0), 0);
+
+ EXPECT_EQ(setgid(65534), 0)
+ TH_LOG("Failed to setgid(65534)");
+ ASSERT_EQ(setuid(65534), 0);
+
+ set_tid[0] = pid;
+ /* This would fail without CAP_CHECKPOINT_RESTORE */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), -EPERM);
+ ASSERT_EQ(set_capability(), 0)
+ TH_LOG("Could not set CAP_CHECKPOINT_RESTORE");
+ /* This should work as we have CAP_CHECKPOINT_RESTORE as non-root */
+ ASSERT_EQ(test_clone3_set_tid(_metadata, set_tid, 1), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/clone3/clone3_clear_sighand.c b/tools/testing/selftests/clone3/clone3_clear_sighand.c
index 9e1af8aa7698..54a8b2445be9 100644
--- a/tools/testing/selftests/clone3/clone3_clear_sighand.c
+++ b/tools/testing/selftests/clone3/clone3_clear_sighand.c
@@ -16,10 +16,6 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-#ifndef CLONE_CLEAR_SIGHAND
-#define CLONE_CLEAR_SIGHAND 0x100000000ULL
-#endif
-
static void nop_handler(int signo)
{
}
@@ -47,7 +43,7 @@ static void test_clone3_clear_sighand(void)
{
int ret;
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
struct sigaction act;
/*
@@ -119,9 +115,8 @@ static void test_clone3_clear_sighand(void)
int main(int argc, char **argv)
{
ksft_print_header();
- test_clone3_supported();
-
ksft_set_plan(1);
+ test_clone3_supported();
test_clone3_clear_sighand();
diff --git a/tools/testing/selftests/clone3/clone3_selftests.h b/tools/testing/selftests/clone3/clone3_selftests.h
index 91c1a78ddb39..3d2663fe50ba 100644
--- a/tools/testing/selftests/clone3/clone3_selftests.h
+++ b/tools/testing/selftests/clone3/clone3_selftests.h
@@ -15,17 +15,11 @@
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
-#ifndef CLONE_INTO_CGROUP
-#define CLONE_INTO_CGROUP 0x200000000ULL /* Clone into a specific cgroup given the right permissions. */
-#endif
-
-#ifndef CLONE_ARGS_SIZE_VER0
-#define CLONE_ARGS_SIZE_VER0 64
-#endif
-
#ifndef __NR_clone3
#define __NR_clone3 -1
-struct clone_args {
+#endif
+
+struct __clone_args {
__aligned_u64 flags;
__aligned_u64 pidfd;
__aligned_u64 child_tid;
@@ -34,15 +28,12 @@ struct clone_args {
__aligned_u64 stack;
__aligned_u64 stack_size;
__aligned_u64 tls;
-#define CLONE_ARGS_SIZE_VER1 80
__aligned_u64 set_tid;
__aligned_u64 set_tid_size;
-#define CLONE_ARGS_SIZE_VER2 88
__aligned_u64 cgroup;
};
-#endif /* __NR_clone3 */
-static pid_t sys_clone3(struct clone_args *args, size_t size)
+static pid_t sys_clone3(struct __clone_args *args, size_t size)
{
fflush(stdout);
fflush(stderr);
@@ -52,7 +43,7 @@ static pid_t sys_clone3(struct clone_args *args, size_t size)
static inline void test_clone3_supported(void)
{
pid_t pid;
- struct clone_args args = {};
+ struct __clone_args args = {};
if (__NR_clone3 < 0)
ksft_exit_skip("clone3() syscall is not supported\n");
diff --git a/tools/testing/selftests/clone3/clone3_set_tid.c b/tools/testing/selftests/clone3/clone3_set_tid.c
index 25beb22f35b5..ed785afb6077 100644
--- a/tools/testing/selftests/clone3/clone3_set_tid.c
+++ b/tools/testing/selftests/clone3/clone3_set_tid.c
@@ -23,9 +23,7 @@
#include "../kselftest.h"
#include "clone3_selftests.h"
-#ifndef MAX_PID_NS_LEVEL
#define MAX_PID_NS_LEVEL 32
-#endif
static int pipe_1[2];
static int pipe_2[2];
@@ -46,14 +44,14 @@ static int call_clone3_set_tid(pid_t *set_tid,
int status;
pid_t pid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.flags = flags,
.exit_signal = SIGCHLD,
.set_tid = ptr_to_u64(set_tid),
.set_tid_size = set_tid_size,
};
- pid = sys_clone3(&args, sizeof(struct clone_args));
+ pid = sys_clone3(&args, sizeof(args));
if (pid < 0) {
ksft_print_msg("%s - Failed to create new process\n",
strerror(errno));
@@ -157,8 +155,8 @@ int main(int argc, char *argv[])
pid_t set_tid[MAX_PID_NS_LEVEL * 2];
ksft_print_header();
- test_clone3_supported();
ksft_set_plan(29);
+ test_clone3_supported();
if (pipe(pipe_1) < 0 || pipe(pipe_2) < 0)
ksft_exit_fail_msg("pipe() failed\n");
diff --git a/tools/testing/selftests/connector/.gitignore b/tools/testing/selftests/connector/.gitignore
new file mode 100644
index 000000000000..c90098199a44
--- /dev/null
+++ b/tools/testing/selftests/connector/.gitignore
@@ -0,0 +1 @@
+proc_filter
diff --git a/tools/testing/selftests/connector/Makefile b/tools/testing/selftests/connector/Makefile
new file mode 100644
index 000000000000..92188b9bac5c
--- /dev/null
+++ b/tools/testing/selftests/connector/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wall $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS = proc_filter
+
+include ../lib.mk
diff --git a/tools/testing/selftests/connector/proc_filter.c b/tools/testing/selftests/connector/proc_filter.c
new file mode 100644
index 000000000000..4a825b997666
--- /dev/null
+++ b/tools/testing/selftests/connector/proc_filter.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <sys/types.h>
+#include <sys/epoll.h>
+#include <sys/socket.h>
+#include <linux/netlink.h>
+#include <linux/connector.h>
+#include <linux/cn_proc.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <strings.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+
+#include "../kselftest.h"
+
+#define NL_MESSAGE_SIZE (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(struct proc_input))
+#define NL_MESSAGE_SIZE_NF (sizeof(struct nlmsghdr) + sizeof(struct cn_msg) + \
+ sizeof(int))
+
+#define MAX_EVENTS 1
+
+volatile static int interrupted;
+static int nl_sock, ret_errno, tcount;
+static struct epoll_event evn;
+
+static int filter;
+
+#ifdef ENABLE_PRINTS
+#define Printf printf
+#else
+#define Printf ksft_print_msg
+#endif
+
+int send_message(void *pinp)
+{
+ char buff[NL_MESSAGE_SIZE];
+ struct nlmsghdr *hdr;
+ struct cn_msg *msg;
+
+ hdr = (struct nlmsghdr *)buff;
+ if (filter)
+ hdr->nlmsg_len = NL_MESSAGE_SIZE;
+ else
+ hdr->nlmsg_len = NL_MESSAGE_SIZE_NF;
+ hdr->nlmsg_type = NLMSG_DONE;
+ hdr->nlmsg_flags = 0;
+ hdr->nlmsg_seq = 0;
+ hdr->nlmsg_pid = getpid();
+
+ msg = (struct cn_msg *)NLMSG_DATA(hdr);
+ msg->id.idx = CN_IDX_PROC;
+ msg->id.val = CN_VAL_PROC;
+ msg->seq = 0;
+ msg->ack = 0;
+ msg->flags = 0;
+
+ if (filter) {
+ msg->len = sizeof(struct proc_input);
+ ((struct proc_input *)msg->data)->mcast_op =
+ ((struct proc_input *)pinp)->mcast_op;
+ ((struct proc_input *)msg->data)->event_type =
+ ((struct proc_input *)pinp)->event_type;
+ } else {
+ msg->len = sizeof(int);
+ *(int *)msg->data = *(enum proc_cn_mcast_op *)pinp;
+ }
+
+ if (send(nl_sock, hdr, hdr->nlmsg_len, 0) == -1) {
+ ret_errno = errno;
+ perror("send failed");
+ return -3;
+ }
+ return 0;
+}
+
+int register_proc_netlink(int *efd, void *input)
+{
+ struct sockaddr_nl sa_nl;
+ int err = 0, epoll_fd;
+
+ nl_sock = socket(PF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR);
+
+ if (nl_sock == -1) {
+ ret_errno = errno;
+ perror("socket failed");
+ return -1;
+ }
+
+ bzero(&sa_nl, sizeof(sa_nl));
+ sa_nl.nl_family = AF_NETLINK;
+ sa_nl.nl_groups = CN_IDX_PROC;
+ sa_nl.nl_pid = getpid();
+
+ if (bind(nl_sock, (struct sockaddr *)&sa_nl, sizeof(sa_nl)) == -1) {
+ ret_errno = errno;
+ perror("bind failed");
+ return -2;
+ }
+
+ epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+ if (epoll_fd < 0) {
+ ret_errno = errno;
+ perror("epoll_create1 failed");
+ return -2;
+ }
+
+ err = send_message(input);
+
+ if (err < 0)
+ return err;
+
+ evn.events = EPOLLIN;
+ evn.data.fd = nl_sock;
+ if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, nl_sock, &evn) < 0) {
+ ret_errno = errno;
+ perror("epoll_ctl failed");
+ return -3;
+ }
+ *efd = epoll_fd;
+ return 0;
+}
+
+static void sigint(int sig)
+{
+ interrupted = 1;
+}
+
+int handle_packet(char *buff, int fd, struct proc_event *event)
+{
+ struct nlmsghdr *hdr;
+
+ hdr = (struct nlmsghdr *)buff;
+
+ if (hdr->nlmsg_type == NLMSG_ERROR) {
+ perror("NLMSG_ERROR error\n");
+ return -3;
+ } else if (hdr->nlmsg_type == NLMSG_DONE) {
+ event = (struct proc_event *)
+ ((struct cn_msg *)NLMSG_DATA(hdr))->data;
+ tcount++;
+ switch (event->what) {
+ case PROC_EVENT_EXIT:
+ Printf("Exit process %d (tgid %d) with code %d, signal %d\n",
+ event->event_data.exit.process_pid,
+ event->event_data.exit.process_tgid,
+ event->event_data.exit.exit_code,
+ event->event_data.exit.exit_signal);
+ break;
+ case PROC_EVENT_FORK:
+ Printf("Fork process %d (tgid %d), parent %d (tgid %d)\n",
+ event->event_data.fork.child_pid,
+ event->event_data.fork.child_tgid,
+ event->event_data.fork.parent_pid,
+ event->event_data.fork.parent_tgid);
+ break;
+ case PROC_EVENT_EXEC:
+ Printf("Exec process %d (tgid %d)\n",
+ event->event_data.exec.process_pid,
+ event->event_data.exec.process_tgid);
+ break;
+ case PROC_EVENT_UID:
+ Printf("UID process %d (tgid %d) uid %d euid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.ruid,
+ event->event_data.id.e.euid);
+ break;
+ case PROC_EVENT_GID:
+ Printf("GID process %d (tgid %d) gid %d egid %d\n",
+ event->event_data.id.process_pid,
+ event->event_data.id.process_tgid,
+ event->event_data.id.r.rgid,
+ event->event_data.id.e.egid);
+ break;
+ case PROC_EVENT_SID:
+ Printf("SID process %d (tgid %d)\n",
+ event->event_data.sid.process_pid,
+ event->event_data.sid.process_tgid);
+ break;
+ case PROC_EVENT_PTRACE:
+ Printf("Ptrace process %d (tgid %d), Tracer %d (tgid %d)\n",
+ event->event_data.ptrace.process_pid,
+ event->event_data.ptrace.process_tgid,
+ event->event_data.ptrace.tracer_pid,
+ event->event_data.ptrace.tracer_tgid);
+ break;
+ case PROC_EVENT_COMM:
+ Printf("Comm process %d (tgid %d) comm %s\n",
+ event->event_data.comm.process_pid,
+ event->event_data.comm.process_tgid,
+ event->event_data.comm.comm);
+ break;
+ case PROC_EVENT_COREDUMP:
+ Printf("Coredump process %d (tgid %d) parent %d, (tgid %d)\n",
+ event->event_data.coredump.process_pid,
+ event->event_data.coredump.process_tgid,
+ event->event_data.coredump.parent_pid,
+ event->event_data.coredump.parent_tgid);
+ break;
+ default:
+ break;
+ }
+ }
+ return 0;
+}
+
+int handle_events(int epoll_fd, struct proc_event *pev)
+{
+ char buff[CONNECTOR_MAX_MSG_SIZE];
+ struct epoll_event ev[MAX_EVENTS];
+ int i, event_count = 0, err = 0;
+
+ event_count = epoll_wait(epoll_fd, ev, MAX_EVENTS, -1);
+ if (event_count < 0) {
+ ret_errno = errno;
+ if (ret_errno != EINTR)
+ perror("epoll_wait failed");
+ return -3;
+ }
+ for (i = 0; i < event_count; i++) {
+ if (!(ev[i].events & EPOLLIN))
+ continue;
+ if (recv(ev[i].data.fd, buff, sizeof(buff), 0) == -1) {
+ ret_errno = errno;
+ perror("recv failed");
+ return -3;
+ }
+ err = handle_packet(buff, ev[i].data.fd, pev);
+ if (err < 0)
+ return err;
+ }
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int epoll_fd, err;
+ struct proc_event proc_ev;
+ struct proc_input input;
+
+ signal(SIGINT, sigint);
+
+ if (argc > 2) {
+ printf("Expected 0(assume no-filter) or 1 argument(-f)\n");
+ exit(KSFT_SKIP);
+ }
+
+ if (argc == 2) {
+ if (strcmp(argv[1], "-f") == 0) {
+ filter = 1;
+ } else {
+ printf("Valid option : -f (for filter feature)\n");
+ exit(KSFT_SKIP);
+ }
+ }
+
+ if (filter) {
+ input.event_type = PROC_EVENT_NONZERO_EXIT;
+ input.mcast_op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_LISTEN;
+ err = register_proc_netlink(&epoll_fd, (void*)&op);
+ }
+
+ if (err < 0) {
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+
+ while (!interrupted) {
+ err = handle_events(epoll_fd, &proc_ev);
+ if (err < 0) {
+ if (ret_errno == EINTR)
+ continue;
+ if (err == -2)
+ close(nl_sock);
+ if (err == -3) {
+ close(nl_sock);
+ close(epoll_fd);
+ }
+ exit(1);
+ }
+ }
+
+ if (filter) {
+ input.mcast_op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&input);
+ } else {
+ enum proc_cn_mcast_op op = PROC_CN_MCAST_IGNORE;
+ send_message((void*)&op);
+ }
+
+ close(epoll_fd);
+ close(nl_sock);
+
+ printf("Done total count: %d\n", tcount);
+ exit(0);
+}
diff --git a/tools/testing/selftests/core/.gitignore b/tools/testing/selftests/core/.gitignore
new file mode 100644
index 000000000000..6e6712ce5817
--- /dev/null
+++ b/tools/testing/selftests/core/.gitignore
@@ -0,0 +1 @@
+close_range_test
diff --git a/tools/testing/selftests/core/Makefile b/tools/testing/selftests/core/Makefile
new file mode 100644
index 000000000000..ce262d097269
--- /dev/null
+++ b/tools/testing/selftests/core/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -g $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := close_range_test
+
+include ../lib.mk
+
diff --git a/tools/testing/selftests/core/close_range_test.c b/tools/testing/selftests/core/close_range_test.c
new file mode 100644
index 000000000000..c59e4adb905d
--- /dev/null
+++ b/tools/testing/selftests/core/close_range_test.c
@@ -0,0 +1,539 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/kernel.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <unistd.h>
+#include <sys/resource.h>
+#include <linux/close_range.h>
+
+#include "../kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+static inline int sys_close_range(unsigned int fd, unsigned int max_fd,
+ unsigned int flags)
+{
+ return syscall(__NR_close_range, fd, max_fd, flags);
+}
+
+TEST(core_close_range)
+{
+ int i, ret;
+ int open_fds[101];
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ EXPECT_EQ(-1, sys_close_range(open_fds[0], open_fds[100], -1)) {
+ if (errno == ENOSYS)
+ SKIP(return, "close_range() syscall not supported");
+ }
+
+ EXPECT_EQ(0, sys_close_range(open_fds[0], open_fds[50], 0));
+
+ for (i = 0; i <= 50; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ for (i = 51; i <= 100; i++)
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ /* create a couple of gaps */
+ close(57);
+ close(78);
+ close(81);
+ close(82);
+ close(84);
+ close(90);
+
+ EXPECT_EQ(0, sys_close_range(open_fds[51], open_fds[92], 0));
+
+ for (i = 51; i <= 92; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ for (i = 93; i <= 100; i++)
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ /* test that the kernel caps and still closes all fds */
+ EXPECT_EQ(0, sys_close_range(open_fds[93], open_fds[99], 0));
+
+ for (i = 93; i <= 99; i++)
+ EXPECT_EQ(-1, fcntl(open_fds[i], F_GETFL));
+
+ EXPECT_GT(fcntl(open_fds[i], F_GETFL), -1);
+
+ EXPECT_EQ(0, sys_close_range(open_fds[100], open_fds[100], 0));
+
+ EXPECT_EQ(-1, fcntl(open_fds[100], F_GETFL));
+}
+
+TEST(close_range_unshare)
+{
+ int i, ret, status;
+ pid_t pid;
+ int open_fds[101];
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(open_fds[0], open_fds[50],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 0; i <= 50; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ for (i = 51; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ /* create a couple of gaps */
+ close(57);
+ close(78);
+ close(81);
+ close(82);
+ close(84);
+ close(90);
+
+ ret = sys_close_range(open_fds[51], open_fds[92],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 51; i <= 92; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ for (i = 93; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ /* test that the kernel caps and still closes all fds */
+ ret = sys_close_range(open_fds[93], open_fds[99],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 93; i <= 99; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ if (fcntl(open_fds[100], F_GETFL) == -1)
+ exit(EXIT_FAILURE);
+
+ ret = sys_close_range(open_fds[100], open_fds[100],
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ if (fcntl(open_fds[100], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(close_range_unshare_capped)
+{
+ int i, ret, status;
+ pid_t pid;
+ int open_fds[101];
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(open_fds[0], UINT_MAX,
+ CLOSE_RANGE_UNSHARE);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ for (i = 0; i <= 100; i++)
+ if (fcntl(open_fds[i], F_GETFL) != -1)
+ exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+TEST(close_range_cloexec)
+{
+ int i, ret;
+ int open_fds[101];
+ struct rlimit rlimit;
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "close_range() syscall not supported");
+ if (errno == EINVAL)
+ SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+ }
+
+ /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
+ ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+ rlimit.rlim_cur = 25;
+ ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+ /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
+ ret = sys_close_range(open_fds[0], open_fds[50], CLOSE_RANGE_CLOEXEC);
+ ASSERT_EQ(0, ret);
+ ret = sys_close_range(open_fds[75], open_fds[100], CLOSE_RANGE_CLOEXEC);
+ ASSERT_EQ(0, ret);
+
+ for (i = 0; i <= 50; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ for (i = 51; i <= 74; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+ }
+
+ for (i = 75; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ /* Test a common pattern. */
+ ret = sys_close_range(3, UINT_MAX, CLOSE_RANGE_CLOEXEC);
+ for (i = 0; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+}
+
+TEST(close_range_cloexec_unshare)
+{
+ int i, ret;
+ int open_fds[101];
+ struct rlimit rlimit;
+
+ for (i = 0; i < ARRAY_SIZE(open_fds); i++) {
+ int fd;
+
+ fd = open("/dev/null", O_RDONLY);
+ ASSERT_GE(fd, 0) {
+ if (errno == ENOENT)
+ SKIP(return, "Skipping test since /dev/null does not exist");
+ }
+
+ open_fds[i] = fd;
+ }
+
+ ret = sys_close_range(1000, 1000, CLOSE_RANGE_CLOEXEC);
+ if (ret < 0) {
+ if (errno == ENOSYS)
+ SKIP(return, "close_range() syscall not supported");
+ if (errno == EINVAL)
+ SKIP(return, "close_range() doesn't support CLOSE_RANGE_CLOEXEC");
+ }
+
+ /* Ensure the FD_CLOEXEC bit is set also with a resource limit in place. */
+ ASSERT_EQ(0, getrlimit(RLIMIT_NOFILE, &rlimit));
+ rlimit.rlim_cur = 25;
+ ASSERT_EQ(0, setrlimit(RLIMIT_NOFILE, &rlimit));
+
+ /* Set close-on-exec for two ranges: [0-50] and [75-100]. */
+ ret = sys_close_range(open_fds[0], open_fds[50],
+ CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+ ASSERT_EQ(0, ret);
+ ret = sys_close_range(open_fds[75], open_fds[100],
+ CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+ ASSERT_EQ(0, ret);
+
+ for (i = 0; i <= 50; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ for (i = 51; i <= 74; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+ }
+
+ for (i = 75; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+
+ /* Test a common pattern. */
+ ret = sys_close_range(3, UINT_MAX,
+ CLOSE_RANGE_CLOEXEC | CLOSE_RANGE_UNSHARE);
+ for (i = 0; i <= 100; i++) {
+ int flags = fcntl(open_fds[i], F_GETFD);
+
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+ }
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_syzbot)
+{
+ int fd1, fd2, fd3, flags, ret, status;
+ pid_t pid;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ /* Create a huge gap in the fd table. */
+ fd1 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd1, 0);
+
+ fd2 = dup2(fd1, 1000);
+ EXPECT_GT(fd2, 0);
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(3, ~0U, CLOSE_RANGE_CLOEXEC);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ /*
+ * We now have a private file descriptor table and all
+ * our open fds should still be open but made
+ * close-on-exec.
+ */
+ flags = fcntl(fd1, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ flags = fcntl(fd2, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ fd3 = dup2(fd1, 42);
+ EXPECT_GT(fd3, 0);
+
+ /*
+ * Duplicating the file descriptor must remove the
+ * FD_CLOEXEC flag.
+ */
+ flags = fcntl(fd3, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /*
+ * We had a shared file descriptor table before along with requesting
+ * close-on-exec so the original fds must not be close-on-exec.
+ */
+ flags = fcntl(fd1, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ flags = fcntl(fd2, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ fd3 = dup2(fd1, 42);
+ EXPECT_GT(fd3, 0);
+
+ flags = fcntl(fd3, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ EXPECT_EQ(close(fd1), 0);
+ EXPECT_EQ(close(fd2), 0);
+ EXPECT_EQ(close(fd3), 0);
+}
+
+/*
+ * Regression test for syzbot+96cfd2b22b3213646a93@syzkaller.appspotmail.com
+ */
+TEST(close_range_cloexec_unshare_syzbot)
+{
+ int i, fd1, fd2, fd3, flags, ret, status;
+ pid_t pid;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ /*
+ * Create a huge gap in the fd table. When we now call
+ * CLOSE_RANGE_UNSHARE with a shared fd table and and with ~0U as upper
+ * bound the kernel will only copy up to fd1 file descriptors into the
+ * new fd table. If the kernel is buggy and doesn't handle
+ * CLOSE_RANGE_CLOEXEC correctly it will not have copied all file
+ * descriptors and we will oops!
+ *
+ * On a buggy kernel this should immediately oops. But let's loop just
+ * to be sure.
+ */
+ fd1 = open("/dev/null", O_RDWR);
+ EXPECT_GT(fd1, 0);
+
+ fd2 = dup2(fd1, 1000);
+ EXPECT_GT(fd2, 0);
+
+ for (i = 0; i < 100; i++) {
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = sys_close_range(3, ~0U, CLOSE_RANGE_UNSHARE |
+ CLOSE_RANGE_CLOEXEC);
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ /*
+ * We now have a private file descriptor table and all
+ * our open fds should still be open but made
+ * close-on-exec.
+ */
+ flags = fcntl(fd1, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ flags = fcntl(fd2, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, FD_CLOEXEC);
+
+ fd3 = dup2(fd1, 42);
+ EXPECT_GT(fd3, 0);
+
+ /*
+ * Duplicating the file descriptor must remove the
+ * FD_CLOEXEC flag.
+ */
+ flags = fcntl(fd3, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ EXPECT_EQ(close(fd1), 0);
+ EXPECT_EQ(close(fd2), 0);
+ EXPECT_EQ(close(fd3), 0);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+
+ /*
+ * We created a private file descriptor table before along with
+ * requesting close-on-exec so the original fds must not be
+ * close-on-exec.
+ */
+ flags = fcntl(fd1, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ flags = fcntl(fd2, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ fd3 = dup2(fd1, 42);
+ EXPECT_GT(fd3, 0);
+
+ flags = fcntl(fd3, F_GETFD);
+ EXPECT_GT(flags, -1);
+ EXPECT_EQ(flags & FD_CLOEXEC, 0);
+
+ EXPECT_EQ(close(fd1), 0);
+ EXPECT_EQ(close(fd2), 0);
+ EXPECT_EQ(close(fd3), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile
index d8be047ee5b6..8b66c4738344 100644
--- a/tools/testing/selftests/cpu-hotplug/Makefile
+++ b/tools/testing/selftests/cpu-hotplug/Makefile
@@ -6,6 +6,6 @@ TEST_PROGS := cpu-on-off-test.sh
include ../lib.mk
run_full_test:
- @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]"
+ @/bin/bash ./cpu-on-off-test.sh -a && echo "cpu-hotplug selftests: [PASS]" || echo "cpu-hotplug selftests: [FAIL]"
clean:
diff --git a/tools/testing/selftests/cpu-hotplug/config b/tools/testing/selftests/cpu-hotplug/config
deleted file mode 100644
index d4aca2ad5069..000000000000
--- a/tools/testing/selftests/cpu-hotplug/config
+++ /dev/null
@@ -1 +0,0 @@
-CONFIG_NOTIFIER_ERROR_INJECTION=y
diff --git a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
index 0d26b5e3f966..d5dc7e0dc726 100755
--- a/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
+++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh
@@ -4,6 +4,7 @@
SYSFS=
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+retval=0
prerequisite()
{
@@ -102,10 +103,10 @@ online_cpu_expect_success()
if ! online_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
- exit 1
+ retval=1
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
@@ -115,10 +116,10 @@ online_cpu_expect_fail()
if online_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
- exit 1
+ retval=1
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected online >&2
- exit 1
+ retval=1
fi
}
@@ -128,10 +129,10 @@ offline_cpu_expect_success()
if ! offline_cpu $cpu; then
echo $FUNCNAME $cpu: unexpected fail >&2
- exit 1
+ retval=1
elif ! cpu_is_offline $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
@@ -141,16 +142,33 @@ offline_cpu_expect_fail()
if offline_cpu $cpu 2> /dev/null; then
echo $FUNCNAME $cpu: unexpected success >&2
- exit 1
+ retval=1
elif ! cpu_is_online $cpu; then
echo $FUNCNAME $cpu: unexpected offline >&2
- exit 1
+ retval=1
fi
}
-error=-12
+online_all_hot_pluggable_cpus()
+{
+ for cpu in `hotplaggable_offline_cpus`; do
+ online_cpu_expect_success $cpu
+ done
+}
+
+offline_all_hot_pluggable_cpus()
+{
+ local reserve_cpu=$online_max
+ for cpu in `hotpluggable_online_cpus`; do
+ # Reserve one cpu oneline at least.
+ if [ $cpu -eq $reserve_cpu ];then
+ continue
+ fi
+ offline_cpu_expect_success $cpu
+ done
+}
+
allcpus=0
-priority=0
online_cpus=0
online_max=0
offline_cpus=0
@@ -158,31 +176,20 @@ offline_max=0
present_cpus=0
present_max=0
-while getopts e:ahp: opt; do
+while getopts ah opt; do
case $opt in
- e)
- error=$OPTARG
- ;;
a)
allcpus=1
;;
h)
- echo "Usage $0 [ -a ] [ -e errno ] [ -p notifier-priority ]"
+ echo "Usage $0 [ -a ]"
echo -e "\t default offline one cpu"
echo -e "\t run with -a option to offline all cpus"
exit
;;
- p)
- priority=$OPTARG
- ;;
esac
done
-if ! [ "$error" -ge -4095 -a "$error" -lt 0 ]; then
- echo "error code must be -4095 <= errno < 0" >&2
- exit 1
-fi
-
prerequisite
#
@@ -196,12 +203,12 @@ if [ $allcpus -eq 0 ]; then
online_cpu_expect_success $online_max
if [[ $offline_cpus -gt 0 ]]; then
- echo -e "\t offline to online to offline: cpu $present_max"
+ echo -e "\t online to offline to online: cpu $present_max"
online_cpu_expect_success $present_max
offline_cpu_expect_success $present_max
online_cpu $present_max
fi
- exit 0
+ exit $retval
else
echo "Full scope test: all hotplug cpus"
echo -e "\t online all offline cpus"
@@ -209,85 +216,10 @@ else
echo -e "\t online all offline cpus"
fi
-#
-# Online all hot-pluggable CPUs
-#
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Offline all hot-pluggable CPUs
-#
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_success $cpu
-done
-
-#
-# Online all hot-pluggable CPUs again
-#
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Test with cpu notifier error injection
-#
+online_all_hot_pluggable_cpus
-DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
-NOTIFIER_ERR_INJECT_DIR=$DEBUGFS/notifier-error-inject/cpu
+offline_all_hot_pluggable_cpus
-prerequisite_extra()
-{
- msg="skip extra tests:"
-
- /sbin/modprobe -q -r cpu-notifier-error-inject
- /sbin/modprobe -q cpu-notifier-error-inject priority=$priority
-
- if [ ! -d "$DEBUGFS" ]; then
- echo $msg debugfs is not mounted >&2
- exit $ksft_skip
- fi
-
- if [ ! -d $NOTIFIER_ERR_INJECT_DIR ]; then
- echo $msg cpu-notifier-error-inject module is not available >&2
- exit $ksft_skip
- fi
-}
-
-prerequisite_extra
-
-#
-# Offline all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-add error handling (offline => online)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_fail $cpu
-done
-
-#
-# Online all hot-pluggable CPUs
-#
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_UP_PREPARE/error
-for cpu in `hotplaggable_offline_cpus`; do
- online_cpu_expect_success $cpu
-done
-
-#
-# Test CPU hot-remove error handling (online => offline)
-#
-echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-for cpu in `hotpluggable_online_cpus`; do
- offline_cpu_expect_fail $cpu
-done
+online_all_hot_pluggable_cpus
-echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/CPU_DOWN_PREPARE/error
-/sbin/modprobe -q -r cpu-notifier-error-inject
+exit $retval
diff --git a/tools/testing/selftests/cpufreq/config b/tools/testing/selftests/cpufreq/config
index 27ff72ebd0f5..ce5068f5a6a2 100644
--- a/tools/testing/selftests/cpufreq/config
+++ b/tools/testing/selftests/cpufreq/config
@@ -5,11 +5,3 @@ CONFIG_CPU_FREQ_GOV_USERSPACE=y
CONFIG_CPU_FREQ_GOV_ONDEMAND=y
CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
CONFIG_CPU_FREQ_GOV_SCHEDUTIL=y
-CONFIG_DEBUG_RT_MUTEXES=y
-CONFIG_DEBUG_PI_LIST=y
-CONFIG_DEBUG_SPINLOCK=y
-CONFIG_DEBUG_MUTEXES=y
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-CONFIG_LOCKDEP=y
-CONFIG_DEBUG_ATOMIC_SLEEP=y
diff --git a/tools/testing/selftests/cpufreq/main.sh b/tools/testing/selftests/cpufreq/main.sh
index 31f8c9a76c5f..60ce18ed0666 100755
--- a/tools/testing/selftests/cpufreq/main.sh
+++ b/tools/testing/selftests/cpufreq/main.sh
@@ -194,5 +194,5 @@ prerequisite
# Run requested functions
clear_dumps $OUTFILE
-do_test >> $OUTFILE.txt
+do_test | tee -a $OUTFILE.txt
dmesg_dumps $OUTFILE
diff --git a/tools/testing/selftests/damon/.gitignore b/tools/testing/selftests/damon/.gitignore
new file mode 100644
index 000000000000..e65ef9d9cedc
--- /dev/null
+++ b/tools/testing/selftests/damon/.gitignore
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0-only
+huge_count_read_write
+debugfs_target_ids_read_before_terminate_race
+debugfs_target_ids_pid_leak
+access_memory
diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile
new file mode 100644
index 000000000000..789d6949c247
--- /dev/null
+++ b/tools/testing/selftests/damon/Makefile
@@ -0,0 +1,22 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for damon selftests
+
+TEST_GEN_FILES += huge_count_read_write
+TEST_GEN_FILES += debugfs_target_ids_read_before_terminate_race
+TEST_GEN_FILES += debugfs_target_ids_pid_leak
+TEST_GEN_FILES += access_memory
+
+TEST_FILES = _chk_dependency.sh _debugfs_common.sh
+TEST_PROGS = debugfs_attrs.sh debugfs_schemes.sh debugfs_target_ids.sh
+TEST_PROGS += debugfs_empty_targets.sh debugfs_huge_count_read_write.sh
+TEST_PROGS += debugfs_duplicate_context_creation.sh
+TEST_PROGS += debugfs_rm_non_contexts.sh
+TEST_PROGS += debugfs_target_ids_read_before_terminate_race.sh
+TEST_PROGS += debugfs_target_ids_pid_leak.sh
+TEST_PROGS += sysfs.sh sysfs_update_removed_scheme_dir.sh
+TEST_PROGS += sysfs_update_schemes_tried_regions_hang.py
+TEST_PROGS += sysfs_update_schemes_tried_regions_wss_estimation.py
+TEST_PROGS += damos_quota.py damos_apply_interval.py
+TEST_PROGS += reclaim.sh lru_sort.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/damon/_chk_dependency.sh b/tools/testing/selftests/damon/_chk_dependency.sh
new file mode 100644
index 000000000000..dda3a87dc00a
--- /dev/null
+++ b/tools/testing/selftests/damon/_chk_dependency.sh
@@ -0,0 +1,52 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+DBGFS=$(grep debugfs /proc/mounts --max-count 1 | awk '{print $2}')
+if [ "$DBGFS" = "" ]
+then
+ echo "debugfs not mounted"
+ exit $ksft_skip
+fi
+
+DBGFS+="/damon"
+
+if [ $EUID -ne 0 ];
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+if [ ! -d "$DBGFS" ]
+then
+ echo "$DBGFS not found"
+ exit $ksft_skip
+fi
+
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+ monitor_on_file="monitor_on_DEPRECATED"
+else
+ monitor_on_file="monitor_on"
+fi
+
+for f in attrs target_ids "$monitor_on_file"
+do
+ if [ ! -f "$DBGFS/$f" ]
+ then
+ echo "$f not found"
+ exit 1
+ fi
+done
+
+permission_error="Operation not permitted"
+for f in attrs target_ids "$monitor_on_file"
+do
+ status=$( cat "$DBGFS/$f" 2>&1 )
+ if [ "${status#*$permission_error}" != "$status" ]; then
+ echo "Permission for reading $DBGFS/$f denied; maybe secureboot enabled?"
+ exit $ksft_skip
+ fi
+done
diff --git a/tools/testing/selftests/damon/_damon_sysfs.py b/tools/testing/selftests/damon/_damon_sysfs.py
new file mode 100644
index 000000000000..d23d7398a27a
--- /dev/null
+++ b/tools/testing/selftests/damon/_damon_sysfs.py
@@ -0,0 +1,385 @@
+# SPDX-License-Identifier: GPL-2.0
+
+import os
+
+sysfs_root = '/sys/kernel/mm/damon/admin'
+
+def write_file(path, string):
+ "Returns error string if failed, or None otherwise"
+ string = '%s' % string
+ try:
+ with open(path, 'w') as f:
+ f.write(string)
+ except Exception as e:
+ return '%s' % e
+ return None
+
+def read_file(path):
+ '''Returns the read content and error string. The read content is None if
+ the reading failed'''
+ try:
+ with open(path, 'r') as f:
+ return f.read(), None
+ except Exception as e:
+ return None, '%s' % e
+
+class DamosAccessPattern:
+ size = None
+ nr_accesses = None
+ age = None
+ scheme = None
+
+ def __init__(self, size=None, nr_accesses=None, age=None):
+ self.size = size
+ self.nr_accesses = nr_accesses
+ self.age = age
+
+ if self.size == None:
+ self.size = [0, 2**64 - 1]
+ if self.nr_accesses == None:
+ self.nr_accesses = [0, 2**64 - 1]
+ if self.age == None:
+ self.age = [0, 2**64 - 1]
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'access_pattern')
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'min'), self.size[0])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'sz', 'max'), self.size[1])
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'min'),
+ self.nr_accesses[0])
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_accesses', 'max'),
+ self.nr_accesses[1])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'min'), self.age[0])
+ if err != None:
+ return err
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'age', 'max'), self.age[1])
+ if err != None:
+ return err
+
+class DamosQuota:
+ sz = None # size quota, in bytes
+ ms = None # time quota
+ reset_interval_ms = None # quota reset interval
+ scheme = None # owner scheme
+
+ def __init__(self, sz=0, ms=0, reset_interval_ms=0):
+ self.sz = sz
+ self.ms = ms
+ self.reset_interval_ms = reset_interval_ms
+
+ def sysfs_dir(self):
+ return os.path.join(self.scheme.sysfs_dir(), 'quotas')
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'bytes'), self.sz)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'ms'), self.ms)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'reset_interval_ms'),
+ self.reset_interval_ms)
+ if err != None:
+ return err
+
+class DamosStats:
+ nr_tried = None
+ sz_tried = None
+ nr_applied = None
+ sz_applied = None
+ qt_exceeds = None
+
+ def __init__(self, nr_tried, sz_tried, nr_applied, sz_applied, qt_exceeds):
+ self.nr_tried = nr_tried
+ self.sz_tried = sz_tried
+ self.nr_applied = nr_applied
+ self.sz_applied = sz_applied
+ self.qt_exceeds = qt_exceeds
+
+class Damos:
+ action = None
+ access_pattern = None
+ quota = None
+ apply_interval_us = None
+ # todo: Support watermarks, stats, tried_regions
+ idx = None
+ context = None
+ tried_bytes = None
+ stats = None
+
+ def __init__(self, action='stat', access_pattern=DamosAccessPattern(),
+ quota=DamosQuota(), apply_interval_us=0):
+ self.action = action
+ self.access_pattern = access_pattern
+ self.access_pattern.scheme = self
+ self.quota = quota
+ self.quota.scheme = self
+ self.apply_interval_us = apply_interval_us
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'schemes', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'action'), self.action)
+ if err != None:
+ return err
+ err = self.access_pattern.stage()
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'apply_interval_us'),
+ '%d' % self.apply_interval_us)
+ if err != None:
+ return err
+
+ err = self.quota.stage()
+ if err != None:
+ return err
+
+ # disable watermarks
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'watermarks', 'metric'), 'none')
+ if err != None:
+ return err
+
+ # disable filters
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'filters', 'nr_filters'), '0')
+ if err != None:
+ return err
+
+class DamonTarget:
+ pid = None
+ # todo: Support target regions if test is made
+ idx = None
+ context = None
+
+ def __init__(self, pid):
+ self.pid = pid
+
+ def sysfs_dir(self):
+ return os.path.join(
+ self.context.sysfs_dir(), 'targets', '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'regions', 'nr_regions'), '0')
+ if err != None:
+ return err
+ return write_file(
+ os.path.join(self.sysfs_dir(), 'pid_target'), self.pid)
+
+class DamonAttrs:
+ sample_us = None
+ aggr_us = None
+ update_us = None
+ min_nr_regions = None
+ max_nr_regions = None
+ context = None
+
+ def __init__(self, sample_us=5000, aggr_us=100000, update_us=1000000,
+ min_nr_regions=10, max_nr_regions=1000):
+ self.sample_us = sample_us
+ self.aggr_us = aggr_us
+ self.update_us = update_us
+ self.min_nr_regions = min_nr_regions
+ self.max_nr_regions = max_nr_regions
+
+ def interval_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'intervals')
+
+ def nr_regions_range_sysfs_dir(self):
+ return os.path.join(self.context.sysfs_dir(), 'monitoring_attrs',
+ 'nr_regions')
+
+ def stage(self):
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'sample_us'),
+ self.sample_us)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'aggr_us'),
+ self.aggr_us)
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.interval_sysfs_dir(), 'update_us'),
+ self.update_us)
+ if err != None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'min'),
+ self.min_nr_regions)
+ if err != None:
+ return err
+
+ err = write_file(
+ os.path.join(self.nr_regions_range_sysfs_dir(), 'max'),
+ self.max_nr_regions)
+ if err != None:
+ return err
+
+class DamonCtx:
+ ops = None
+ monitoring_attrs = None
+ targets = None
+ schemes = None
+ kdamond = None
+ idx = None
+
+ def __init__(self, ops='paddr', monitoring_attrs=DamonAttrs(), targets=[],
+ schemes=[]):
+ self.ops = ops
+ self.monitoring_attrs = monitoring_attrs
+ self.monitoring_attrs.context = self
+
+ self.targets = targets
+ for idx, target in enumerate(self.targets):
+ target.idx = idx
+ target.context = self
+
+ self.schemes = schemes
+ for idx, scheme in enumerate(self.schemes):
+ scheme.idx = idx
+ scheme.context = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamond.sysfs_dir(), 'contexts',
+ '%d' % self.idx)
+
+ def stage(self):
+ err = write_file(
+ os.path.join(self.sysfs_dir(), 'operations'), self.ops)
+ if err != None:
+ return err
+ err = self.monitoring_attrs.stage()
+ if err != None:
+ return err
+
+ nr_targets_file = os.path.join(
+ self.sysfs_dir(), 'targets', 'nr_targets')
+ content, err = read_file(nr_targets_file)
+ if err != None:
+ return err
+ if int(content) != len(self.targets):
+ err = write_file(nr_targets_file, '%d' % len(self.targets))
+ if err != None:
+ return err
+ for target in self.targets:
+ err = target.stage()
+ if err != None:
+ return err
+
+ nr_schemes_file = os.path.join(
+ self.sysfs_dir(), 'schemes', 'nr_schemes')
+ content, err = read_file(nr_schemes_file)
+ if int(content) != len(self.schemes):
+ err = write_file(nr_schemes_file, '%d' % len(self.schemes))
+ if err != None:
+ return err
+ for scheme in self.schemes:
+ err = scheme.stage()
+ if err != None:
+ return err
+ return None
+
+class Kdamond:
+ state = None
+ pid = None
+ contexts = None
+ idx = None # index of this kdamond between siblings
+ kdamonds = None # parent
+
+ def __init__(self, contexts=[]):
+ self.contexts = contexts
+ for idx, context in enumerate(self.contexts):
+ context.idx = idx
+ context.kdamond = self
+
+ def sysfs_dir(self):
+ return os.path.join(self.kdamonds.sysfs_dir(), '%d' % self.idx)
+
+ def start(self):
+ nr_contexts_file = os.path.join(self.sysfs_dir(),
+ 'contexts', 'nr_contexts')
+ content, err = read_file(nr_contexts_file)
+ if err != None:
+ return err
+ if int(content) != len(self.contexts):
+ err = write_file(nr_contexts_file, '%d' % len(self.contexts))
+ if err != None:
+ return err
+
+ for context in self.contexts:
+ err = context.stage()
+ if err != None:
+ return err
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'), 'on')
+ return err
+
+ def update_schemes_tried_bytes(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_tried_bytes')
+ if err != None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ content, err = read_file(os.path.join(scheme.sysfs_dir(),
+ 'tried_regions', 'total_bytes'))
+ if err != None:
+ return err
+ scheme.tried_bytes = int(content)
+
+ def update_schemes_stats(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'state'),
+ 'update_schemes_stats')
+ if err != None:
+ return err
+ for context in self.contexts:
+ for scheme in context.schemes:
+ stat_values = []
+ for stat in ['nr_tried', 'sz_tried', 'nr_applied',
+ 'sz_applied', 'qt_exceeds']:
+ content, err = read_file(
+ os.path.join(scheme.sysfs_dir(), 'stats', stat))
+ if err != None:
+ return err
+ stat_values.append(int(content))
+ scheme.stats = DamosStats(*stat_values)
+
+class Kdamonds:
+ kdamonds = []
+
+ def __init__(self, kdamonds=[]):
+ self.kdamonds = kdamonds
+ for idx, kdamond in enumerate(self.kdamonds):
+ kdamond.idx = idx
+ kdamond.kdamonds = self
+
+ def sysfs_dir(self):
+ return os.path.join(sysfs_root, 'kdamonds')
+
+ def start(self):
+ err = write_file(os.path.join(self.sysfs_dir(), 'nr_kdamonds'),
+ '%s' % len(self.kdamonds))
+ if err != None:
+ return err
+ for kdamond in self.kdamonds:
+ err = kdamond.start()
+ if err != None:
+ return err
+ return None
diff --git a/tools/testing/selftests/damon/_debugfs_common.sh b/tools/testing/selftests/damon/_debugfs_common.sh
new file mode 100644
index 000000000000..aa995516870b
--- /dev/null
+++ b/tools/testing/selftests/damon/_debugfs_common.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+test_write_result() {
+ file=$1
+ content=$2
+ orig_content=$3
+ expect_reason=$4
+ expected=$5
+
+ echo "$content" > "$file"
+ if [ $? -ne "$expected" ]
+ then
+ echo "writing $content to $file doesn't return $expected"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+test_write_succ() {
+ test_write_result "$1" "$2" "$3" "$4" 0
+}
+
+test_write_fail() {
+ test_write_result "$1" "$2" "$3" "$4" 1
+}
+
+test_content() {
+ file=$1
+ orig_content=$2
+ expected=$3
+ expect_reason=$4
+
+ content=$(cat "$file")
+ if [ "$content" != "$expected" ]
+ then
+ echo "reading $file expected $expected but $content"
+ echo "expected because: $expect_reason"
+ echo "$orig_content" > "$file"
+ exit 1
+ fi
+}
+
+source ./_chk_dependency.sh
+
+damon_onoff="$DBGFS/monitor_on"
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+ damon_onoff="$DBGFS/monitor_on_DEPRECATED"
+else
+ damon_onoff="$DBGFS/monitor_on"
+fi
+
+if [ $(cat "$damon_onoff") = "on" ]
+then
+ echo "monitoring is on"
+ exit $ksft_skip
+fi
diff --git a/tools/testing/selftests/damon/access_memory.c b/tools/testing/selftests/damon/access_memory.c
new file mode 100644
index 000000000000..585a2fa54329
--- /dev/null
+++ b/tools/testing/selftests/damon/access_memory.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Artificial memory access program for testing DAMON.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+
+int main(int argc, char *argv[])
+{
+ char **regions;
+ clock_t start_clock;
+ int nr_regions;
+ int sz_region;
+ int access_time_ms;
+ int i;
+
+ if (argc != 4) {
+ printf("Usage: %s <number> <size (bytes)> <time (ms)>\n",
+ argv[0]);
+ return -1;
+ }
+
+ nr_regions = atoi(argv[1]);
+ sz_region = atoi(argv[2]);
+ access_time_ms = atoi(argv[3]);
+
+ regions = malloc(sizeof(*regions) * nr_regions);
+ for (i = 0; i < nr_regions; i++)
+ regions[i] = malloc(sz_region);
+
+ for (i = 0; i < nr_regions; i++) {
+ start_clock = clock();
+ while ((clock() - start_clock) * 1000 / CLOCKS_PER_SEC <
+ access_time_ms)
+ memset(regions[i], i, 1024 * 1024 * 10);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/config b/tools/testing/selftests/damon/config
new file mode 100644
index 000000000000..0daf38974eb0
--- /dev/null
+++ b/tools/testing/selftests/damon/config
@@ -0,0 +1,7 @@
+CONFIG_DAMON=y
+CONFIG_DAMON_SYSFS=y
+CONFIG_DAMON_DBGFS=y
+CONFIG_DAMON_PADDR=y
+CONFIG_DAMON_VADDR=y
+CONFIG_DAMON_RECLAIM=y
+CONFIG_DAMON_LRU_SORT=y
diff --git a/tools/testing/selftests/damon/damos_apply_interval.py b/tools/testing/selftests/damon/damos_apply_interval.py
new file mode 100644
index 000000000000..f04d43702481
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_apply_interval.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ # Set quota up to 1 MiB per 100 ms
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[
+ _damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ # aggregation interval (100 ms) is used
+ apply_interval_us=0),
+ # use 10ms apply interval
+ _damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ # explicitly set 10 ms apply interval
+ apply_interval_us=10 * 1000)
+ ] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ nr_quota_exceeds = 0
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_stats()
+ if err != None:
+ print('stats update failed: %s' % err)
+ exit(1)
+ schemes = kdamonds.kdamonds[0].contexts[0].schemes
+ nr_tried_stats = [s.stats.nr_tried for s in schemes]
+ if nr_tried_stats[0] == 0 or nr_tried_stats[1] == 0:
+ print('scheme(s) are not tried')
+ exit(1)
+
+ # Because the second scheme was having the apply interval that is ten times
+ # lower than that of the first scheme, the second scheme should be tried
+ # about ten times more frequently than the first scheme. For possible
+ # timing errors, check if it was at least nine times more freuqnetly tried.
+ ratio = nr_tried_stats[1] / nr_tried_stats[0]
+ if ratio < 9:
+ print('%d / %d = %f (< 9)' %
+ (nr_tried_stats[1], nr_tried_stats[0], ratio))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/damos_quota.py b/tools/testing/selftests/damon/damos_quota.py
new file mode 100644
index 000000000000..7d4c6bb2e3cd
--- /dev/null
+++ b/tools/testing/selftests/damon/damos_quota.py
@@ -0,0 +1,67 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+
+ # Set quota up to 1 MiB per 100 ms
+ sz_quota = 1024 * 1024 # 1 MiB
+ quota_reset_interval = 100 # 100 ms
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]),
+ quota=_damon_sysfs.DamosQuota(
+ sz=sz_quota, reset_interval_ms=quota_reset_interval)
+ )] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ nr_quota_exceeds = 0
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+ err = kdamonds.kdamonds[0].update_schemes_stats()
+ if err != None:
+ print('stats update failed: %s' % err)
+ exit(1)
+
+ scheme = kdamonds.kdamonds[0].contexts[0].schemes[0]
+ wss_collected.append(scheme.tried_bytes)
+ nr_quota_exceeds = scheme.stats.qt_exceeds
+
+ wss_collected.sort()
+ for wss in wss_collected:
+ if wss > sz_quota:
+ print('quota is not kept: %s > %s' % (wss, sz_quota))
+ print('collected samples are as below')
+ print('\n'.join(['%d' % wss for wss in wss_collected]))
+ exit(1)
+
+ if nr_quota_exceeds < len(wss_collected):
+ print('quota is not always exceeded: %d > %d' %
+ (len(wss_collected), nr_quota_exceeds))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/debugfs_attrs.sh b/tools/testing/selftests/damon/debugfs_attrs.sh
new file mode 100755
index 000000000000..902e312bca89
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_attrs.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test attrs file
+# ===============
+
+file="$DBGFS/attrs"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4 5" "$orig_content" "valid input"
+test_write_fail "$file" "1 2 3 4" "$orig_content" "no enough fields"
+test_write_fail "$file" "1 2 3 5 4" "$orig_content" \
+ "min_nr_regions > max_nr_regions"
+test_content "$file" "$orig_content" "1 2 3 4 5" "successfully written"
+echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
new file mode 100755
index 000000000000..4a76e37ef16b
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_duplicate_context_creation.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test duplicated context creation
+# ================================
+
+if ! echo foo > "$DBGFS/mk_contexts"
+then
+ echo "context creation failed"
+ exit 1
+fi
+
+if echo foo > "$DBGFS/mk_contexts"
+then
+ echo "duplicate context creation success"
+ exit 1
+fi
+
+if ! echo foo > "$DBGFS/rm_contexts"
+then
+ echo "context deletion failed"
+ exit 1
+fi
+
+exit 0
diff --git a/tools/testing/selftests/damon/debugfs_empty_targets.sh b/tools/testing/selftests/damon/debugfs_empty_targets.sh
new file mode 100755
index 000000000000..effbea33dc16
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_empty_targets.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test empty targets case
+# =======================
+
+orig_target_ids=$(cat "$DBGFS/target_ids")
+echo "" > "$DBGFS/target_ids"
+
+if [ -f "$DBGFS/monitor_on_DEPRECATED" ]
+then
+ monitor_on_file="$DBGFS/monitor_on_DEPRECATED"
+else
+ monitor_on_file="$DBGFS/monitor_on"
+fi
+
+orig_monitor_on=$(cat "$monitor_on_file")
+test_write_fail "$monitor_on_file" "on" "orig_monitor_on" "empty target ids"
+echo "$orig_target_ids" > "$DBGFS/target_ids"
diff --git a/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
new file mode 100755
index 000000000000..922cadac2950
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_huge_count_read_write.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test huge count read write
+# ==========================
+
+dmesg -C
+
+for file in "$DBGFS/"*
+do
+ ./huge_count_read_write "$file"
+done
+
+if dmesg | grep -q WARNING
+then
+ dmesg
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
new file mode 100755
index 000000000000..f3ffeb1343cf
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_rm_non_contexts.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test putting non-ctx files/dirs to rm_contexts file
+# ===================================================
+
+dmesg -C
+
+for file in "$DBGFS/"*
+do
+ (echo "$(basename "$f")" > "$DBGFS/rm_contexts") &> /dev/null
+ if dmesg | grep -q BUG
+ then
+ dmesg
+ exit 1
+ fi
+done
diff --git a/tools/testing/selftests/damon/debugfs_schemes.sh b/tools/testing/selftests/damon/debugfs_schemes.sh
new file mode 100755
index 000000000000..5b39ab44731c
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_schemes.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test schemes file
+# =================
+
+file="$DBGFS/schemes"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4 5 6 4 0 0 0 1 2 3 1 100 3 2 1" \
+ "$orig_content" "valid input"
+test_write_fail "$file" "1 2
+3 4 5 6 3 0 0 0 1 2 3 1 100 3 2 1" "$orig_content" "multi lines"
+test_write_succ "$file" "" "$orig_content" "disabling"
+test_write_fail "$file" "2 1 2 1 10 1 3 10 1 1 1 1 1 1 1 1 2 3" \
+ "$orig_content" "wrong condition ranges"
+echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids.sh b/tools/testing/selftests/damon/debugfs_target_ids.sh
new file mode 100755
index 000000000000..49aeabdb0aae
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids.sh
@@ -0,0 +1,19 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source _debugfs_common.sh
+
+# Test target_ids file
+# ====================
+
+file="$DBGFS/target_ids"
+orig_content=$(cat "$file")
+
+test_write_succ "$file" "1 2 3 4" "$orig_content" "valid input"
+test_write_succ "$file" "1 2 abc 4" "$orig_content" "still valid input"
+test_content "$file" "$orig_content" "1 2" "non-integer was there"
+test_write_succ "$file" "abc 2 3" "$orig_content" "the file allows wrong input"
+test_content "$file" "$orig_content" "" "wrong input written"
+test_write_succ "$file" "" "$orig_content" "empty input"
+test_content "$file" "$orig_content" "" "empty input written"
+echo "$orig_content" > "$file"
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
new file mode 100644
index 000000000000..0cc2eef7d142
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <unistd.h>
+
+#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
+
+static void write_targetid_exit(void)
+{
+ int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+ char pid_str[128];
+
+ snprintf(pid_str, sizeof(pid_str), "%d", getpid());
+ write(target_ids_fd, pid_str, sizeof(pid_str));
+ close(target_ids_fd);
+ exit(0);
+}
+
+unsigned long msec_timestamp(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return tv.tv_sec * 1000UL + tv.tv_usec / 1000;
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long start_ms;
+ int time_to_run, nr_forks = 0;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <msecs to run>\n", argv[0]);
+ exit(1);
+ }
+ time_to_run = atoi(argv[1]);
+
+ start_ms = msec_timestamp();
+ while (true) {
+ int pid = fork();
+
+ if (pid < 0) {
+ fprintf(stderr, "fork() failed\n");
+ exit(1);
+ }
+ if (pid == 0)
+ write_targetid_exit();
+ wait(NULL);
+ nr_forks++;
+
+ if (msec_timestamp() - start_ms > time_to_run)
+ break;
+ }
+ printf("%d\n", nr_forks);
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
new file mode 100644
index 000000000000..31fe33c2b032
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids_pid_leak.sh
@@ -0,0 +1,22 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+before=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
+
+nr_leaks=$(./debugfs_target_ids_pid_leak 1000)
+expected_after_max=$((before + nr_leaks / 2))
+
+after=$(grep "^pid " /proc/slabinfo | awk '{print $2}')
+
+echo > /sys/kernel/debug/damon/target_ids
+
+echo "tried $nr_leaks pid leak"
+echo "number of active pid slabs: $before -> $after"
+echo "(up to $expected_after_max expected)"
+if [ $after -gt $expected_after_max ]
+then
+ echo "maybe pids are leaking"
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
new file mode 100644
index 000000000000..b06f52a8ce2d
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#define DBGFS_MONITOR_ON "/sys/kernel/debug/damon/monitor_on_DEPRECATED"
+#define DBGFS_TARGET_IDS "/sys/kernel/debug/damon/target_ids"
+
+static void turn_damon_on_exit(void)
+{
+ int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+ int monitor_on_fd = open(DBGFS_MONITOR_ON, O_RDWR);
+ char pid_str[128];
+
+ snprintf(pid_str, sizeof(pid_str), "%d", getpid());
+ write(target_ids_fd, pid_str, sizeof(pid_str));
+ write(monitor_on_fd, "on\n", 3);
+ close(target_ids_fd);
+ close(monitor_on_fd);
+ usleep(1000);
+ exit(0);
+}
+
+static void try_race(void)
+{
+ int target_ids_fd = open(DBGFS_TARGET_IDS, O_RDWR);
+ int pid = fork();
+ int buf[256];
+
+ if (pid < 0) {
+ fprintf(stderr, "fork() failed\n");
+ exit(1);
+ }
+ if (pid == 0)
+ turn_damon_on_exit();
+ while (true) {
+ int status;
+
+ read(target_ids_fd, buf, sizeof(buf));
+ if (waitpid(-1, &status, WNOHANG) == pid)
+ break;
+ }
+ close(target_ids_fd);
+}
+
+static inline uint64_t ts_to_ms(struct timespec *ts)
+{
+ return (uint64_t)ts->tv_sec * 1000 + (uint64_t)ts->tv_nsec / 1000000;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec start_time, now;
+ int runtime_ms;
+
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <runtime in ms>\n", argv[0]);
+ exit(1);
+ }
+ runtime_ms = atoi(argv[1]);
+ clock_gettime(CLOCK_MONOTONIC, &start_time);
+ while (true) {
+ try_race();
+ clock_gettime(CLOCK_MONOTONIC, &now);
+ if (ts_to_ms(&now) - ts_to_ms(&start_time) > runtime_ms)
+ break;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
new file mode 100644
index 000000000000..fc793c4c9aea
--- /dev/null
+++ b/tools/testing/selftests/damon/debugfs_target_ids_read_before_terminate_race.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+dmesg -C
+
+./debugfs_target_ids_read_before_terminate_race 5000
+
+if dmesg | grep -q dbgfs_target_ids_read
+then
+ dmesg
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/damon/huge_count_read_write.c b/tools/testing/selftests/damon/huge_count_read_write.c
new file mode 100644
index 000000000000..a6fe0689f88d
--- /dev/null
+++ b/tools/testing/selftests/damon/huge_count_read_write.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Author: SeongJae Park <sj@kernel.org>
+ */
+
+#include <fcntl.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#pragma GCC diagnostic push
+#if __GNUC__ >= 11 && __GNUC_MINOR__ >= 1
+/* Ignore read(2) overflow and write(2) overread compile warnings */
+#pragma GCC diagnostic ignored "-Wstringop-overread"
+#pragma GCC diagnostic ignored "-Wstringop-overflow"
+#endif
+
+void write_read_with_huge_count(char *file)
+{
+ int filedesc = open(file, O_RDWR);
+ char buf[25];
+ int ret;
+
+ printf("%s %s\n", __func__, file);
+ if (filedesc < 0) {
+ fprintf(stderr, "failed opening %s\n", file);
+ exit(1);
+ }
+
+ write(filedesc, "", 0xfffffffful);
+ perror("after write: ");
+ ret = read(filedesc, buf, 0xfffffffful);
+ perror("after read: ");
+ close(filedesc);
+}
+
+#pragma GCC diagnostic pop
+
+int main(int argc, char *argv[])
+{
+ if (argc != 2) {
+ fprintf(stderr, "Usage: %s <file>\n", argv[0]);
+ exit(1);
+ }
+ write_read_with_huge_count(argv[1]);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/damon/lru_sort.sh b/tools/testing/selftests/damon/lru_sort.sh
new file mode 100755
index 000000000000..61b80197c896
--- /dev/null
+++ b/tools/testing/selftests/damon/lru_sort.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+damon_lru_sort_enabled="/sys/module/damon_lru_sort/parameters/enabled"
+if [ ! -f "$damon_lru_sort_enabled" ]
+then
+ echo "No 'enabled' file. Maybe DAMON_LRU_SORT not built"
+ exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "Another kdamond is running"
+ exit $ksft_skip
+fi
+
+echo Y > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+ echo "kdamond is not turned on"
+ exit 1
+fi
+
+echo N > "$damon_lru_sort_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "kdamond is not turned off"
+ exit 1
+fi
diff --git a/tools/testing/selftests/damon/reclaim.sh b/tools/testing/selftests/damon/reclaim.sh
new file mode 100755
index 000000000000..78dbc2334cbe
--- /dev/null
+++ b/tools/testing/selftests/damon/reclaim.sh
@@ -0,0 +1,42 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+damon_reclaim_enabled="/sys/module/damon_reclaim/parameters/enabled"
+if [ ! -f "$damon_reclaim_enabled" ]
+then
+ echo "No 'enabled' file. Maybe DAMON_RECLAIM not built"
+ exit $ksft_skip
+fi
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "Another kdamond is running"
+ exit $ksft_skip
+fi
+
+echo Y > "$damon_reclaim_enabled"
+
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 1 ]
+then
+ echo "kdamond is not turned on"
+ exit 1
+fi
+
+echo N > "$damon_reclaim_enabled"
+nr_kdamonds=$(pgrep kdamond | wc -l)
+if [ "$nr_kdamonds" -ne 0 ]
+then
+ echo "kdamond is not turned off"
+ exit 1
+fi
diff --git a/tools/testing/selftests/damon/sysfs.sh b/tools/testing/selftests/damon/sysfs.sh
new file mode 100755
index 000000000000..e9a976d296e2
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs.sh
@@ -0,0 +1,377 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest frmework requirement - SKIP code is 4.
+ksft_skip=4
+
+ensure_write_succ()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if ! echo "$content" > "$file"
+ then
+ echo "writing $content to $file failed"
+ echo "expected success because $reason"
+ exit 1
+ fi
+}
+
+ensure_write_fail()
+{
+ file=$1
+ content=$2
+ reason=$3
+
+ if (echo "$content" > "$file") 2> /dev/null
+ then
+ echo "writing $content to $file succeed ($fail_reason)"
+ echo "expected failure because $reason"
+ exit 1
+ fi
+}
+
+ensure_dir()
+{
+ dir=$1
+ to_ensure=$2
+ if [ "$to_ensure" = "exist" ] && [ ! -d "$dir" ]
+ then
+ echo "$dir dir is expected but not found"
+ exit 1
+ elif [ "$to_ensure" = "not_exist" ] && [ -d "$dir" ]
+ then
+ echo "$dir dir is not expected but found"
+ exit 1
+ fi
+}
+
+ensure_file()
+{
+ file=$1
+ to_ensure=$2
+ permission=$3
+ if [ "$to_ensure" = "exist" ]
+ then
+ if [ ! -f "$file" ]
+ then
+ echo "$file is expected but not found"
+ exit 1
+ fi
+ perm=$(stat -c "%a" "$file")
+ if [ ! "$perm" = "$permission" ]
+ then
+ echo "$file permission: expected $permission but $perm"
+ exit 1
+ fi
+ elif [ "$to_ensure" = "not_exist" ] && [ -f "$dir" ]
+ then
+ echo "$file is not expected but found"
+ exit 1
+ fi
+}
+
+test_range()
+{
+ range_dir=$1
+ ensure_dir "$range_dir" "exist"
+ ensure_file "$range_dir/min" "exist" 600
+ ensure_file "$range_dir/max" "exist" 600
+}
+
+test_tried_regions()
+{
+ tried_regions_dir=$1
+ ensure_dir "$tried_regions_dir" "exist"
+ ensure_file "$tried_regions_dir/total_bytes" "exist" "400"
+}
+
+test_stats()
+{
+ stats_dir=$1
+ ensure_dir "$stats_dir" "exist"
+ for f in nr_tried sz_tried nr_applied sz_applied qt_exceeds
+ do
+ ensure_file "$stats_dir/$f" "exist" "400"
+ done
+}
+
+test_filter()
+{
+ filter_dir=$1
+ ensure_file "$filter_dir/type" "exist" "600"
+ ensure_write_succ "$filter_dir/type" "anon" "valid input"
+ ensure_write_succ "$filter_dir/type" "memcg" "valid input"
+ ensure_write_succ "$filter_dir/type" "addr" "valid input"
+ ensure_write_succ "$filter_dir/type" "target" "valid input"
+ ensure_write_fail "$filter_dir/type" "foo" "invalid input"
+ ensure_file "$filter_dir/matching" "exist" "600"
+ ensure_file "$filter_dir/memcg_path" "exist" "600"
+ ensure_file "$filter_dir/addr_start" "exist" "600"
+ ensure_file "$filter_dir/addr_end" "exist" "600"
+ ensure_file "$filter_dir/damon_target_idx" "exist" "600"
+}
+
+test_filters()
+{
+ filters_dir=$1
+ ensure_dir "$filters_dir" "exist"
+ ensure_file "$filters_dir/nr_filters" "exist" "600"
+ ensure_write_succ "$filters_dir/nr_filters" "1" "valid input"
+ test_filter "$filters_dir/0"
+
+ ensure_write_succ "$filters_dir/nr_filters" "2" "valid input"
+ test_filter "$filters_dir/0"
+ test_filter "$filters_dir/1"
+
+ ensure_write_succ "$filters_dir/nr_filters" "0" "valid input"
+ ensure_dir "$filters_dir/0" "not_exist"
+ ensure_dir "$filters_dir/1" "not_exist"
+}
+
+test_watermarks()
+{
+ watermarks_dir=$1
+ ensure_dir "$watermarks_dir" "exist"
+ ensure_file "$watermarks_dir/metric" "exist" "600"
+ ensure_file "$watermarks_dir/interval_us" "exist" "600"
+ ensure_file "$watermarks_dir/high" "exist" "600"
+ ensure_file "$watermarks_dir/mid" "exist" "600"
+ ensure_file "$watermarks_dir/low" "exist" "600"
+}
+
+test_weights()
+{
+ weights_dir=$1
+ ensure_dir "$weights_dir" "exist"
+ ensure_file "$weights_dir/sz_permil" "exist" "600"
+ ensure_file "$weights_dir/nr_accesses_permil" "exist" "600"
+ ensure_file "$weights_dir/age_permil" "exist" "600"
+}
+
+test_goal()
+{
+ goal_dir=$1
+ ensure_dir "$goal_dir" "exist"
+ ensure_file "$goal_dir/target_value" "exist" "600"
+ ensure_file "$goal_dir/current_value" "exist" "600"
+}
+
+test_goals()
+{
+ goals_dir=$1
+ ensure_dir "$goals_dir" "exist"
+ ensure_file "$goals_dir/nr_goals" "exist" "600"
+
+ ensure_write_succ "$goals_dir/nr_goals" "1" "valid input"
+ test_goal "$goals_dir/0"
+
+ ensure_write_succ "$goals_dir/nr_goals" "2" "valid input"
+ test_goal "$goals_dir/0"
+ test_goal "$goals_dir/1"
+
+ ensure_write_succ "$goals_dir/nr_goals" "0" "valid input"
+ ensure_dir "$goals_dir/0" "not_exist"
+ ensure_dir "$goals_dir/1" "not_exist"
+}
+
+test_quotas()
+{
+ quotas_dir=$1
+ ensure_dir "$quotas_dir" "exist"
+ ensure_file "$quotas_dir/ms" "exist" 600
+ ensure_file "$quotas_dir/bytes" "exist" 600
+ ensure_file "$quotas_dir/reset_interval_ms" "exist" 600
+ test_weights "$quotas_dir/weights"
+ test_goals "$quotas_dir/goals"
+}
+
+test_access_pattern()
+{
+ access_pattern_dir=$1
+ ensure_dir "$access_pattern_dir" "exist"
+ test_range "$access_pattern_dir/age"
+ test_range "$access_pattern_dir/nr_accesses"
+ test_range "$access_pattern_dir/sz"
+}
+
+test_scheme()
+{
+ scheme_dir=$1
+ ensure_dir "$scheme_dir" "exist"
+ ensure_file "$scheme_dir/action" "exist" "600"
+ test_access_pattern "$scheme_dir/access_pattern"
+ ensure_file "$scheme_dir/apply_interval_us" "exist" "600"
+ test_quotas "$scheme_dir/quotas"
+ test_watermarks "$scheme_dir/watermarks"
+ test_filters "$scheme_dir/filters"
+ test_stats "$scheme_dir/stats"
+ test_tried_regions "$scheme_dir/tried_regions"
+}
+
+test_schemes()
+{
+ schemes_dir=$1
+ ensure_dir "$schemes_dir" "exist"
+ ensure_file "$schemes_dir/nr_schemes" "exist" 600
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "1" "valid input"
+ test_scheme "$schemes_dir/0"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "2" "valid input"
+ test_scheme "$schemes_dir/0"
+ test_scheme "$schemes_dir/1"
+
+ ensure_write_succ "$schemes_dir/nr_schemes" "0" "valid input"
+ ensure_dir "$schemes_dir/0" "not_exist"
+ ensure_dir "$schemes_dir/1" "not_exist"
+}
+
+test_region()
+{
+ region_dir=$1
+ ensure_dir "$region_dir" "exist"
+ ensure_file "$region_dir/start" "exist" 600
+ ensure_file "$region_dir/end" "exist" 600
+}
+
+test_regions()
+{
+ regions_dir=$1
+ ensure_dir "$regions_dir" "exist"
+ ensure_file "$regions_dir/nr_regions" "exist" 600
+
+ ensure_write_succ "$regions_dir/nr_regions" "1" "valid input"
+ test_region "$regions_dir/0"
+
+ ensure_write_succ "$regions_dir/nr_regions" "2" "valid input"
+ test_region "$regions_dir/0"
+ test_region "$regions_dir/1"
+
+ ensure_write_succ "$regions_dir/nr_regions" "0" "valid input"
+ ensure_dir "$regions_dir/0" "not_exist"
+ ensure_dir "$regions_dir/1" "not_exist"
+}
+
+test_target()
+{
+ target_dir=$1
+ ensure_dir "$target_dir" "exist"
+ ensure_file "$target_dir/pid_target" "exist" "600"
+ test_regions "$target_dir/regions"
+}
+
+test_targets()
+{
+ targets_dir=$1
+ ensure_dir "$targets_dir" "exist"
+ ensure_file "$targets_dir/nr_targets" "exist" 600
+
+ ensure_write_succ "$targets_dir/nr_targets" "1" "valid input"
+ test_target "$targets_dir/0"
+
+ ensure_write_succ "$targets_dir/nr_targets" "2" "valid input"
+ test_target "$targets_dir/0"
+ test_target "$targets_dir/1"
+
+ ensure_write_succ "$targets_dir/nr_targets" "0" "valid input"
+ ensure_dir "$targets_dir/0" "not_exist"
+ ensure_dir "$targets_dir/1" "not_exist"
+}
+
+test_intervals()
+{
+ intervals_dir=$1
+ ensure_dir "$intervals_dir" "exist"
+ ensure_file "$intervals_dir/aggr_us" "exist" "600"
+ ensure_file "$intervals_dir/sample_us" "exist" "600"
+ ensure_file "$intervals_dir/update_us" "exist" "600"
+}
+
+test_monitoring_attrs()
+{
+ monitoring_attrs_dir=$1
+ ensure_dir "$monitoring_attrs_dir" "exist"
+ test_intervals "$monitoring_attrs_dir/intervals"
+ test_range "$monitoring_attrs_dir/nr_regions"
+}
+
+test_context()
+{
+ context_dir=$1
+ ensure_dir "$context_dir" "exist"
+ ensure_file "$context_dir/avail_operations" "exit" 400
+ ensure_file "$context_dir/operations" "exist" 600
+ test_monitoring_attrs "$context_dir/monitoring_attrs"
+ test_targets "$context_dir/targets"
+ test_schemes "$context_dir/schemes"
+}
+
+test_contexts()
+{
+ contexts_dir=$1
+ ensure_dir "$contexts_dir" "exist"
+ ensure_file "$contexts_dir/nr_contexts" "exist" 600
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "1" "valid input"
+ test_context "$contexts_dir/0"
+
+ ensure_write_fail "$contexts_dir/nr_contexts" "2" "only 0/1 are supported"
+ test_context "$contexts_dir/0"
+
+ ensure_write_succ "$contexts_dir/nr_contexts" "0" "valid input"
+ ensure_dir "$contexts_dir/0" "not_exist"
+}
+
+test_kdamond()
+{
+ kdamond_dir=$1
+ ensure_dir "$kdamond_dir" "exist"
+ ensure_file "$kdamond_dir/state" "exist" "600"
+ ensure_file "$kdamond_dir/pid" "exist" 400
+ test_contexts "$kdamond_dir/contexts"
+}
+
+test_kdamonds()
+{
+ kdamonds_dir=$1
+ ensure_dir "$kdamonds_dir" "exist"
+
+ ensure_file "$kdamonds_dir/nr_kdamonds" "exist" "600"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "1" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "2" "valid input"
+ test_kdamond "$kdamonds_dir/0"
+ test_kdamond "$kdamonds_dir/1"
+
+ ensure_write_succ "$kdamonds_dir/nr_kdamonds" "0" "valid input"
+ ensure_dir "$kdamonds_dir/0" "not_exist"
+ ensure_dir "$kdamonds_dir/1" "not_exist"
+}
+
+test_damon_sysfs()
+{
+ damon_sysfs=$1
+ if [ ! -d "$damon_sysfs" ]
+ then
+ echo "$damon_sysfs not found"
+ exit $ksft_skip
+ fi
+
+ test_kdamonds "$damon_sysfs/kdamonds"
+}
+
+check_dependencies()
+{
+ if [ $EUID -ne 0 ]
+ then
+ echo "Run as root"
+ exit $ksft_skip
+ fi
+}
+
+check_dependencies
+test_damon_sysfs "/sys/kernel/mm/damon/admin"
diff --git a/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
new file mode 100755
index 000000000000..ade35576e748
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_removed_scheme_dir.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ $EUID -ne 0 ]
+then
+ echo "Run as root"
+ exit $ksft_skip
+fi
+
+damon_sysfs="/sys/kernel/mm/damon/admin"
+if [ ! -d "$damon_sysfs" ]
+then
+ echo "damon sysfs not found"
+ exit $ksft_skip
+fi
+
+# clear log
+dmesg -C
+
+# start DAMON with a scheme
+echo 1 > "$damon_sysfs/kdamonds/nr_kdamonds"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/nr_contexts"
+echo "vaddr" > "$damon_sysfs/kdamonds/0/contexts/0/operations"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/targets/nr_targets"
+echo $$ > "$damon_sysfs/kdamonds/0/contexts/0/targets/0/pid_target"
+echo 1 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+scheme_dir="$damon_sysfs/kdamonds/0/contexts/0/schemes/0"
+echo 4096000 > "$scheme_dir/access_pattern/sz/max"
+echo 20 > "$scheme_dir/access_pattern/nr_accesses/max"
+echo 1024 > "$scheme_dir/access_pattern/age/max"
+echo "on" > "$damon_sysfs/kdamonds/0/state"
+sleep 0.3
+
+# remove scheme sysfs dir
+echo 0 > "$damon_sysfs/kdamonds/0/contexts/0/schemes/nr_schemes"
+
+# try to update stat of already removed scheme sysfs dir
+echo "update_schemes_stats" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+ echo "update_schemes_stats triggers a kernel bug"
+ dmesg
+ exit 1
+fi
+
+# try to update tried regions of already removed scheme sysfs dir
+echo "update_schemes_tried_regions" > "$damon_sysfs/kdamonds/0/state"
+if dmesg | grep -q BUG
+then
+ echo "update_schemes_tried_regions triggers a kernel bug"
+ dmesg
+ exit 1
+fi
+
+echo "off" > "$damon_sysfs/kdamonds/0/state"
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
new file mode 100644
index 000000000000..28c887a0108f
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_hang.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ proc = subprocess.Popen(['sleep', '2'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ nr_accesses=[200, 200]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ while proc.poll() == None:
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
new file mode 100644
index 000000000000..90ad7409a7a6
--- /dev/null
+++ b/tools/testing/selftests/damon/sysfs_update_schemes_tried_regions_wss_estimation.py
@@ -0,0 +1,55 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import subprocess
+import time
+
+import _damon_sysfs
+
+def main():
+ # access two 10 MiB memory regions, 2 second per each
+ sz_region = 10 * 1024 * 1024
+ proc = subprocess.Popen(['./access_memory', '2', '%d' % sz_region, '2000'])
+ kdamonds = _damon_sysfs.Kdamonds([_damon_sysfs.Kdamond(
+ contexts=[_damon_sysfs.DamonCtx(
+ ops='vaddr',
+ targets=[_damon_sysfs.DamonTarget(pid=proc.pid)],
+ schemes=[_damon_sysfs.Damos(
+ access_pattern=_damon_sysfs.DamosAccessPattern(
+ # >= 25% access rate, >= 200ms age
+ nr_accesses=[5, 20], age=[2, 2**64 - 1]))] # schemes
+ )] # contexts
+ )]) # kdamonds
+
+ err = kdamonds.start()
+ if err != None:
+ print('kdamond start failed: %s' % err)
+ exit(1)
+
+ wss_collected = []
+ while proc.poll() == None:
+ time.sleep(0.1)
+ err = kdamonds.kdamonds[0].update_schemes_tried_bytes()
+ if err != None:
+ print('tried bytes update failed: %s' % err)
+ exit(1)
+
+ wss_collected.append(
+ kdamonds.kdamonds[0].contexts[0].schemes[0].tried_bytes)
+
+ wss_collected.sort()
+ acceptable_error_rate = 0.2
+ for percentile in [50, 75]:
+ sample = wss_collected[int(len(wss_collected) * percentile / 100)]
+ error_rate = abs(sample - sz_region) / sz_region
+ print('%d-th percentile (%d) error %f' %
+ (percentile, sample, error_rate))
+ if error_rate > acceptable_error_rate:
+ print('the error rate is not acceptable (> %f)' %
+ acceptable_error_rate)
+ print('samples are as below')
+ print('\n'.join(['%d' % wss for wss in wss_collected]))
+ exit(1)
+
+if __name__ == '__main__':
+ main()
diff --git a/tools/testing/selftests/devices/Makefile b/tools/testing/selftests/devices/Makefile
new file mode 100644
index 000000000000..ca29249b30c3
--- /dev/null
+++ b/tools/testing/selftests/devices/Makefile
@@ -0,0 +1,4 @@
+TEST_PROGS := test_discoverable_devices.py
+TEST_FILES := boards ksft.py
+
+include ../lib.mk
diff --git a/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml b/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml
new file mode 100644
index 000000000000..ff932eb19f0b
--- /dev/null
+++ b/tools/testing/selftests/devices/boards/Dell Inc.,XPS 13 9300.yaml
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is the device definition for the XPS 13 9300.
+# The filename "Dell Inc.,XPS 13 9300" was chosen following the format
+# "Vendor,Product", where Vendor comes from
+# /sys/devices/virtual/dmi/id/sys_vendor, and Product comes from
+# /sys/devices/virtual/dmi/id/product_name.
+#
+# See google,spherion.yaml for more information.
+#
+- type: pci-controller
+ # This machine has a single PCI host controller so it's valid to not have any
+ # key to identify the controller. If it had more than one controller, the UID
+ # of the controller from ACPI could be used to distinguish as follows:
+ #acpi-uid: 0
+ devices:
+ - path: 14.0
+ type: usb-controller
+ usb-version: 2
+ devices:
+ - path: 9
+ name: camera
+ interfaces: [0, 1, 2, 3]
+ - path: 10
+ name: bluetooth
+ interfaces: [0, 1]
+ - path: 2.0
+ name: gpu
+ - path: 4.0
+ name: thermal
+ - path: 12.0
+ name: sensors
+ - path: 14.3
+ name: wifi
+ - path: 1d.0/0.0
+ name: ssd
+ - path: 1d.7/0.0
+ name: sdcard-reader
+ - path: 1f.3
+ name: audio
diff --git a/tools/testing/selftests/devices/boards/google,spherion.yaml b/tools/testing/selftests/devices/boards/google,spherion.yaml
new file mode 100644
index 000000000000..17157ecd8c14
--- /dev/null
+++ b/tools/testing/selftests/devices/boards/google,spherion.yaml
@@ -0,0 +1,50 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# This is the device definition for the Google Spherion Chromebook.
+# The filename "google,spherion" comes from the Devicetree compatible, so this
+# file will be automatically used when the test is run on that machine.
+#
+# The top-level is a list of controllers, either for USB or PCI(e).
+# Every controller needs to have a 'type' key set to either 'usb-controller' or
+# 'pci-controller'.
+# Every controller needs to be uniquely identified on the platform. To achieve
+# this, several optional keys can be used:
+# - dt-mmio: identify the MMIO address of the controller as defined in the
+# Devicetree.
+# - usb-version: for USB controllers to differentiate between USB3 and USB2
+# buses sharing the same controller.
+# - acpi-uid: _UID property of the controller as supplied by the ACPI. Useful to
+# distinguish between multiple PCI host controllers.
+#
+# The 'devices' key defines a list of devices that are accessible under that
+# controller. A device might be a leaf device or another controller (see
+# 'Dell Inc.,XPS 13 9300.yaml').
+#
+# The 'path' key is needed for every child device (that is, not top-level) to
+# define how to reach this device from the parent controller. For USB devices it
+# follows the format \d(.\d)* and denotes the port in the hub at each level in
+# the USB topology. For PCI devices it follows the format \d.\d(/\d.\d)*
+# denoting the device (identified by device-function pair) at each level in the
+# PCI topology.
+#
+# The 'name' key is used in the leaf devices to name the device for clarity in
+# the test output.
+#
+# For USB leaf devices, the 'interfaces' key should contain a list of the
+# interfaces in that device that should be bound to a driver.
+#
+- type: usb-controller
+ dt-mmio: 11200000
+ usb-version: 2
+ devices:
+ - path: 1.4.1
+ interfaces: [0, 1]
+ name: camera
+ - path: 1.4.2
+ interfaces: [0, 1]
+ name: bluetooth
+- type: pci-controller
+ dt-mmio: 11230000
+ devices:
+ - path: 0.0/0.0
+ name: wifi
diff --git a/tools/testing/selftests/devices/ksft.py b/tools/testing/selftests/devices/ksft.py
new file mode 100644
index 000000000000..cd89fb2bc10e
--- /dev/null
+++ b/tools/testing/selftests/devices/ksft.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Kselftest helpers for outputting in KTAP format. Based on kselftest.h.
+#
+
+import sys
+
+ksft_cnt = {"pass": 0, "fail": 0, "skip": 0}
+ksft_num_tests = 0
+ksft_test_number = 1
+
+KSFT_PASS = 0
+KSFT_FAIL = 1
+KSFT_SKIP = 4
+
+
+def print_header():
+ print("TAP version 13")
+
+
+def set_plan(num_tests):
+ global ksft_num_tests
+ ksft_num_tests = num_tests
+ print("1..{}".format(num_tests))
+
+
+def print_cnts():
+ print(
+ f"# Totals: pass:{ksft_cnt['pass']} fail:{ksft_cnt['fail']} xfail:0 xpass:0 skip:{ksft_cnt['skip']} error:0"
+ )
+
+
+def print_msg(msg):
+ print(f"# {msg}")
+
+
+def _test_print(result, description, directive=None):
+ if directive:
+ directive_str = f"# {directive}"
+ else:
+ directive_str = ""
+
+ global ksft_test_number
+ print(f"{result} {ksft_test_number} {description} {directive_str}")
+ ksft_test_number += 1
+
+
+def test_result_pass(description):
+ _test_print("ok", description)
+ ksft_cnt["pass"] += 1
+
+
+def test_result_fail(description):
+ _test_print("not ok", description)
+ ksft_cnt["fail"] += 1
+
+
+def test_result_skip(description):
+ _test_print("ok", description, "SKIP")
+ ksft_cnt["skip"] += 1
+
+
+def test_result(condition, description=""):
+ if condition:
+ test_result_pass(description)
+ else:
+ test_result_fail(description)
+
+
+def finished():
+ if ksft_cnt["pass"] == ksft_num_tests:
+ exit_code = KSFT_PASS
+ else:
+ exit_code = KSFT_FAIL
+
+ print_cnts()
+
+ sys.exit(exit_code)
+
+
+def exit_fail():
+ print_cnts()
+ sys.exit(KSFT_FAIL)
+
+
+def exit_pass():
+ print_cnts()
+ sys.exit(KSFT_PASS)
diff --git a/tools/testing/selftests/devices/test_discoverable_devices.py b/tools/testing/selftests/devices/test_discoverable_devices.py
new file mode 100755
index 000000000000..fbae8deb593d
--- /dev/null
+++ b/tools/testing/selftests/devices/test_discoverable_devices.py
@@ -0,0 +1,318 @@
+#!/usr/bin/python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# This script tests for presence and driver binding of devices from discoverable
+# buses (ie USB, PCI).
+#
+# The per-platform YAML file defining the devices to be tested is stored inside
+# the boards/ directory and chosen based on DT compatible or DMI IDs (sys_vendor
+# and product_name).
+#
+# See boards/google,spherion.yaml and boards/'Dell Inc.,XPS 13 9300.yaml' for
+# the description and examples of the file structure and vocabulary.
+#
+
+import glob
+import ksft
+import os
+import re
+import sys
+import yaml
+
+pci_controllers = []
+usb_controllers = []
+
+sysfs_usb_devices = "/sys/bus/usb/devices/"
+
+
+def find_pci_controller_dirs():
+ sysfs_devices = "/sys/devices"
+ pci_controller_sysfs_dir = "pci[0-9a-f]{4}:[0-9a-f]{2}"
+
+ dir_regex = re.compile(pci_controller_sysfs_dir)
+ for path, dirs, _ in os.walk(sysfs_devices):
+ for d in dirs:
+ if dir_regex.match(d):
+ pci_controllers.append(os.path.join(path, d))
+
+
+def find_usb_controller_dirs():
+ usb_controller_sysfs_dir = "usb[\d]+"
+
+ dir_regex = re.compile(usb_controller_sysfs_dir)
+ for d in os.scandir(sysfs_usb_devices):
+ if dir_regex.match(d.name):
+ usb_controllers.append(os.path.realpath(d.path))
+
+
+def get_dt_mmio(sysfs_dev_dir):
+ re_dt_mmio = re.compile("OF_FULLNAME=.*@([0-9a-f]+)")
+ dt_mmio = None
+
+ # PCI controllers' sysfs don't have an of_node, so have to read it from the
+ # parent
+ while not dt_mmio:
+ try:
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ dt_mmio = re_dt_mmio.search(f.read()).group(1)
+ return dt_mmio
+ except:
+ pass
+ sysfs_dev_dir = os.path.dirname(sysfs_dev_dir)
+
+
+def get_acpi_uid(sysfs_dev_dir):
+ with open(os.path.join(sysfs_dev_dir, "firmware_node", "uid")) as f:
+ return f.read()
+
+
+def get_usb_version(sysfs_dev_dir):
+ re_usb_version = re.compile("PRODUCT=.*/(\d)/.*")
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ return int(re_usb_version.search(f.read()).group(1))
+
+
+def get_usb_busnum(sysfs_dev_dir):
+ re_busnum = re.compile("BUSNUM=(.*)")
+ with open(os.path.join(sysfs_dev_dir, "uevent")) as f:
+ return int(re_busnum.search(f.read()).group(1))
+
+
+def find_controller_in_sysfs(controller, parent_sysfs=None):
+ if controller["type"] == "pci-controller":
+ controllers = pci_controllers
+ elif controller["type"] == "usb-controller":
+ controllers = usb_controllers
+
+ result_controllers = []
+
+ for c in controllers:
+ if parent_sysfs and parent_sysfs not in c:
+ continue
+
+ if controller.get("dt-mmio"):
+ if str(controller["dt-mmio"]) != get_dt_mmio(c):
+ continue
+
+ if controller.get("usb-version"):
+ if controller["usb-version"] != get_usb_version(c):
+ continue
+
+ if controller.get("acpi-uid"):
+ if controller["acpi-uid"] != get_acpi_uid(c):
+ continue
+
+ result_controllers.append(c)
+
+ return result_controllers
+
+
+def is_controller(device):
+ return device.get("type") and "controller" in device.get("type")
+
+
+def path_to_dir(parent_sysfs, dev_type, path):
+ if dev_type == "usb-device":
+ usb_dev_sysfs_fmt = "{}-{}"
+ busnum = get_usb_busnum(parent_sysfs)
+ dirname = os.path.join(
+ sysfs_usb_devices, usb_dev_sysfs_fmt.format(busnum, path)
+ )
+ return [os.path.realpath(dirname)]
+ else:
+ pci_dev_sysfs_fmt = "????:??:{}"
+ path_glob = ""
+ for dev_func in path.split("/"):
+ dev_func = dev_func.zfill(4)
+ path_glob = os.path.join(path_glob, pci_dev_sysfs_fmt.format(dev_func))
+
+ dir_list = glob.glob(os.path.join(parent_sysfs, path_glob))
+
+ return dir_list
+
+
+def find_in_sysfs(device, parent_sysfs=None):
+ if parent_sysfs and device.get("path"):
+ pathdirs = path_to_dir(
+ parent_sysfs, device["meta"]["type"], str(device["path"])
+ )
+ if len(pathdirs) != 1:
+ # Early return to report error
+ return pathdirs
+ pathdir = pathdirs[0]
+ sysfs_path = os.path.join(parent_sysfs, pathdir)
+ else:
+ sysfs_path = parent_sysfs
+
+ if is_controller(device):
+ return find_controller_in_sysfs(device, sysfs_path)
+ else:
+ return [sysfs_path]
+
+
+def check_driver_presence(sysfs_dir, current_node):
+ if current_node["meta"]["type"] == "usb-device":
+ usb_intf_fmt = "*-*:*.{}"
+
+ interfaces = []
+ for i in current_node["interfaces"]:
+ interfaces.append((i, usb_intf_fmt.format(i)))
+
+ for intf_num, intf_dir_fmt in interfaces:
+ test_name = f"{current_node['meta']['pathname']}.{intf_num}.driver"
+
+ intf_dirs = glob.glob(os.path.join(sysfs_dir, intf_dir_fmt))
+ if len(intf_dirs) != 1:
+ ksft.test_result_fail(test_name)
+ continue
+ intf_dir = intf_dirs[0]
+
+ driver_link = os.path.join(sysfs_dir, intf_dir, "driver")
+ ksft.test_result(os.path.isdir(driver_link), test_name)
+ else:
+ driver_link = os.path.join(sysfs_dir, "driver")
+ test_name = current_node["meta"]["pathname"] + ".driver"
+ ksft.test_result(os.path.isdir(driver_link), test_name)
+
+
+def generate_pathname(device):
+ pathname = ""
+
+ if device.get("path"):
+ pathname = str(device["path"])
+
+ if device.get("type"):
+ dev_type = device["type"]
+ if device.get("usb-version"):
+ dev_type = dev_type.replace("usb", "usb" + str(device["usb-version"]))
+ if device.get("acpi-uid") is not None:
+ dev_type = dev_type.replace("pci", "pci" + str(device["acpi-uid"]))
+ pathname = pathname + "/" + dev_type
+
+ if device.get("dt-mmio"):
+ pathname += "@" + str(device["dt-mmio"])
+
+ if device.get("name"):
+ pathname = pathname + "/" + device["name"]
+
+ return pathname
+
+
+def fill_meta_keys(child, parent=None):
+ child["meta"] = {}
+
+ if parent:
+ child["meta"]["type"] = parent["type"].replace("controller", "device")
+
+ pathname = generate_pathname(child)
+ if parent:
+ pathname = parent["meta"]["pathname"] + "/" + pathname
+ child["meta"]["pathname"] = pathname
+
+
+def parse_device_tree_node(current_node, parent_sysfs=None):
+ if not parent_sysfs:
+ fill_meta_keys(current_node)
+
+ sysfs_dirs = find_in_sysfs(current_node, parent_sysfs)
+ if len(sysfs_dirs) != 1:
+ if len(sysfs_dirs) == 0:
+ ksft.test_result_fail(
+ f"Couldn't find in sysfs: {current_node['meta']['pathname']}"
+ )
+ else:
+ ksft.test_result_fail(
+ f"Found multiple sysfs entries for {current_node['meta']['pathname']}: {sysfs_dirs}"
+ )
+ return
+ sysfs_dir = sysfs_dirs[0]
+
+ if not is_controller(current_node):
+ ksft.test_result(
+ os.path.exists(sysfs_dir), current_node["meta"]["pathname"] + ".device"
+ )
+ check_driver_presence(sysfs_dir, current_node)
+ else:
+ for child_device in current_node["devices"]:
+ fill_meta_keys(child_device, current_node)
+ parse_device_tree_node(child_device, sysfs_dir)
+
+
+def count_tests(device_trees):
+ test_count = 0
+
+ def parse_node(device):
+ nonlocal test_count
+ if device.get("devices"):
+ for child in device["devices"]:
+ parse_node(child)
+ else:
+ if device.get("interfaces"):
+ test_count += len(device["interfaces"])
+ else:
+ test_count += 1
+ test_count += 1
+
+ for device_tree in device_trees:
+ parse_node(device_tree)
+
+ return test_count
+
+
+def get_board_filenames():
+ filenames = []
+
+ platform_compatible_file = "/proc/device-tree/compatible"
+ if os.path.exists(platform_compatible_file):
+ with open(platform_compatible_file) as f:
+ for line in f:
+ filenames.extend(line.split("\0"))
+ else:
+ dmi_id_dir = "/sys/devices/virtual/dmi/id"
+ vendor_dmi_file = os.path.join(dmi_id_dir, "sys_vendor")
+ product_dmi_file = os.path.join(dmi_id_dir, "product_name")
+
+ with open(vendor_dmi_file) as f:
+ vendor = f.read().replace("\n", "")
+ with open(product_dmi_file) as f:
+ product = f.read().replace("\n", "")
+
+ filenames = [vendor + "," + product]
+
+ return filenames
+
+
+def run_test(yaml_file):
+ ksft.print_msg(f"Using board file: {yaml_file}")
+
+ with open(yaml_file) as f:
+ device_trees = yaml.safe_load(f)
+
+ ksft.set_plan(count_tests(device_trees))
+
+ for device_tree in device_trees:
+ parse_device_tree_node(device_tree)
+
+
+find_pci_controller_dirs()
+find_usb_controller_dirs()
+
+ksft.print_header()
+
+board_file = ""
+for board_filename in get_board_filenames():
+ full_board_filename = os.path.join("boards", board_filename + ".yaml")
+
+ if os.path.exists(full_board_filename):
+ board_file = full_board_filename
+ break
+
+if not board_file:
+ ksft.print_msg("No matching board file found")
+ ksft.exit_fail()
+
+run_test(board_file)
+
+ksft.finished()
diff --git a/tools/testing/selftests/dma/Makefile b/tools/testing/selftests/dma/Makefile
new file mode 100644
index 000000000000..cd8c5ece1cba
--- /dev/null
+++ b/tools/testing/selftests/dma/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../include/
+
+TEST_GEN_PROGS := dma_map_benchmark
+
+include ../lib.mk
diff --git a/tools/testing/selftests/dma/config b/tools/testing/selftests/dma/config
new file mode 100644
index 000000000000..6102ee3c43cd
--- /dev/null
+++ b/tools/testing/selftests/dma/config
@@ -0,0 +1 @@
+CONFIG_DMA_MAP_BENCHMARK=y
diff --git a/tools/testing/selftests/dma/dma_map_benchmark.c b/tools/testing/selftests/dma/dma_map_benchmark.c
new file mode 100644
index 000000000000..5c997f17fcbd
--- /dev/null
+++ b/tools/testing/selftests/dma/dma_map_benchmark.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 HiSilicon Limited.
+ */
+
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <linux/types.h>
+#include <linux/map_benchmark.h>
+
+#define NSEC_PER_MSEC 1000000L
+
+static char *directions[] = {
+ "BIDIRECTIONAL",
+ "TO_DEVICE",
+ "FROM_DEVICE",
+};
+
+int main(int argc, char **argv)
+{
+ struct map_benchmark map;
+ int fd, opt;
+ /* default single thread, run 20 seconds on NUMA_NO_NODE */
+ int threads = 1, seconds = 20, node = -1;
+ /* default dma mask 32bit, bidirectional DMA */
+ int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
+ /* default granule 1 PAGESIZE */
+ int granule = 1;
+
+ int cmd = DMA_MAP_BENCHMARK;
+ char *p;
+
+ while ((opt = getopt(argc, argv, "t:s:n:b:d:x:g:")) != -1) {
+ switch (opt) {
+ case 't':
+ threads = atoi(optarg);
+ break;
+ case 's':
+ seconds = atoi(optarg);
+ break;
+ case 'n':
+ node = atoi(optarg);
+ break;
+ case 'b':
+ bits = atoi(optarg);
+ break;
+ case 'd':
+ dir = atoi(optarg);
+ break;
+ case 'x':
+ xdelay = atoi(optarg);
+ break;
+ case 'g':
+ granule = atoi(optarg);
+ break;
+ default:
+ return -1;
+ }
+ }
+
+ if (threads <= 0 || threads > DMA_MAP_MAX_THREADS) {
+ fprintf(stderr, "invalid number of threads, must be in 1-%d\n",
+ DMA_MAP_MAX_THREADS);
+ exit(1);
+ }
+
+ if (seconds <= 0 || seconds > DMA_MAP_MAX_SECONDS) {
+ fprintf(stderr, "invalid number of seconds, must be in 1-%d\n",
+ DMA_MAP_MAX_SECONDS);
+ exit(1);
+ }
+
+ if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+ fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+ DMA_MAP_MAX_TRANS_DELAY);
+ exit(1);
+ }
+
+ /* suppose the mininum DMA zone is 1MB in the world */
+ if (bits < 20 || bits > 64) {
+ fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
+ exit(1);
+ }
+
+ if (dir != DMA_MAP_BIDIRECTIONAL && dir != DMA_MAP_TO_DEVICE &&
+ dir != DMA_MAP_FROM_DEVICE) {
+ fprintf(stderr, "invalid dma direction\n");
+ exit(1);
+ }
+
+ if (granule < 1 || granule > 1024) {
+ fprintf(stderr, "invalid granule size\n");
+ exit(1);
+ }
+
+ fd = open("/sys/kernel/debug/dma_map_benchmark", O_RDWR);
+ if (fd == -1) {
+ perror("open");
+ exit(1);
+ }
+
+ memset(&map, 0, sizeof(map));
+ map.seconds = seconds;
+ map.threads = threads;
+ map.node = node;
+ map.dma_bits = bits;
+ map.dma_dir = dir;
+ map.dma_trans_ns = xdelay;
+ map.granule = granule;
+
+ if (ioctl(fd, cmd, &map)) {
+ perror("ioctl");
+ exit(1);
+ }
+
+ printf("dma mapping benchmark: threads:%d seconds:%d node:%d dir:%s granule: %d\n",
+ threads, seconds, node, dir[directions], granule);
+ printf("average map latency(us):%.1f standard deviation:%.1f\n",
+ map.avg_map_100ns/10.0, map.map_stddev/10.0);
+ printf("average unmap latency(us):%.1f standard deviation:%.1f\n",
+ map.avg_unmap_100ns/10.0, map.unmap_stddev/10.0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/dmabuf-heaps/.gitignore b/tools/testing/selftests/dmabuf-heaps/.gitignore
new file mode 100644
index 000000000000..b500e76b9045
--- /dev/null
+++ b/tools/testing/selftests/dmabuf-heaps/.gitignore
@@ -0,0 +1 @@
+dmabuf-heap
diff --git a/tools/testing/selftests/dmabuf-heaps/Makefile b/tools/testing/selftests/dmabuf-heaps/Makefile
index 607c2acd2082..9e7e158d5fa3 100644
--- a/tools/testing/selftests/dmabuf-heaps/Makefile
+++ b/tools/testing/selftests/dmabuf-heaps/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -static -O3 -Wl,-no-as-needed -Wall -I../../../../usr/include
+CFLAGS += -static -O3 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
TEST_GEN_PROGS = dmabuf-heap
diff --git a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
index 909da9cdda97..890a8236a8ba 100644
--- a/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
+++ b/tools/testing/selftests/dmabuf-heaps/dmabuf-heap.c
@@ -13,10 +13,9 @@
#include <sys/types.h>
#include <linux/dma-buf.h>
+#include <linux/dma-heap.h>
#include <drm/drm.h>
-#include "../../../../include/uapi/linux/dma-heap.h"
-
#define DEVPATH "/dev/dma_heap"
static int check_vgem(int fd)
@@ -130,16 +129,13 @@ static int dmabuf_heap_alloc(int fd, size_t len, unsigned int flags,
dmabuf_fd);
}
-static void dmabuf_sync(int fd, int start_stop)
+static int dmabuf_sync(int fd, int start_stop)
{
struct dma_buf_sync sync = {
.flags = start_stop | DMA_BUF_SYNC_RW,
};
- int ret;
- ret = ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
- if (ret)
- printf("sync failed %d\n", errno);
+ return ioctl(fd, DMA_BUF_IOCTL_SYNC, &sync);
}
#define ONE_MEG (1024 * 1024)
@@ -151,16 +147,14 @@ static int test_alloc_and_import(char *heap_name)
void *p = NULL;
int ret;
- printf("Testing heap: %s\n", heap_name);
-
heap_fd = dmabuf_heap_open(heap_name);
if (heap_fd < 0)
return -1;
- printf("Allocating 1 MEG\n");
+ printf(" Testing allocation and importing: ");
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Allocation Failed!\n");
+ printf("FAIL (Allocation Failed!)\n");
ret = -1;
goto out;
}
@@ -172,11 +166,10 @@ static int test_alloc_and_import(char *heap_name)
dmabuf_fd,
0);
if (p == MAP_FAILED) {
- printf("mmap() failed: %m\n");
+ printf("FAIL (mmap() failed)\n");
ret = -1;
goto out;
}
- printf("mmap passed\n");
dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 1, ONE_MEG / 2);
@@ -186,25 +179,31 @@ static int test_alloc_and_import(char *heap_name)
importer_fd = open_vgem();
if (importer_fd < 0) {
ret = importer_fd;
- printf("Failed to open vgem\n");
- goto out;
+ printf("(Could not open vgem - skipping): ");
+ } else {
+ ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ if (ret < 0) {
+ printf("FAIL (Failed to import buffer)\n");
+ goto out;
+ }
}
- ret = import_vgem_fd(importer_fd, dmabuf_fd, &handle);
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
if (ret < 0) {
- printf("Failed to import buffer\n");
+ printf("FAIL (DMA_BUF_SYNC_START failed!)\n");
goto out;
}
- printf("import passed\n");
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_START);
memset(p, 0xff, ONE_MEG);
- dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
- printf("syncs passed\n");
+ ret = dmabuf_sync(dmabuf_fd, DMA_BUF_SYNC_END);
+ if (ret < 0) {
+ printf("FAIL (DMA_BUF_SYNC_END failed!)\n");
+ goto out;
+ }
close_handle(importer_fd, handle);
ret = 0;
-
+ printf(" OK\n");
out:
if (p)
munmap(p, ONE_MEG);
@@ -218,6 +217,84 @@ out:
return ret;
}
+static int test_alloc_zeroed(char *heap_name, size_t size)
+{
+ int heap_fd = -1, dmabuf_fd[32];
+ int i, j, ret;
+ void *p = NULL;
+ char *c;
+
+ printf(" Testing alloced %ldk buffers are zeroed: ", size / 1024);
+ heap_fd = dmabuf_heap_open(heap_name);
+ if (heap_fd < 0)
+ return -1;
+
+ /* Allocate and fill a bunch of buffers */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret < 0) {
+ printf("FAIL (Allocation (%i) failed)\n", i);
+ goto out;
+ }
+ /* mmap and fill with simple pattern */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ printf("FAIL (mmap() failed!)\n");
+ ret = -1;
+ goto out;
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ memset(p, 0xff, size);
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+ /* close them all */
+ for (i = 0; i < 32; i++)
+ close(dmabuf_fd[i]);
+
+ /* Allocate and validate all buffers are zeroed */
+ for (i = 0; i < 32; i++) {
+ ret = dmabuf_heap_alloc(heap_fd, size, 0, &dmabuf_fd[i]);
+ if (ret < 0) {
+ printf("FAIL (Allocation (%i) failed)\n", i);
+ goto out;
+ }
+
+ /* mmap and validate everything is zero */
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, dmabuf_fd[i], 0);
+ if (p == MAP_FAILED) {
+ printf("FAIL (mmap() failed!)\n");
+ ret = -1;
+ goto out;
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_START);
+ c = (char *)p;
+ for (j = 0; j < size; j++) {
+ if (c[j] != 0) {
+ printf("FAIL (Allocated buffer not zeroed @ %i)\n", j);
+ break;
+ }
+ }
+ dmabuf_sync(dmabuf_fd[i], DMA_BUF_SYNC_END);
+ munmap(p, size);
+ }
+ /* close them all */
+ for (i = 0; i < 32; i++)
+ close(dmabuf_fd[i]);
+
+ close(heap_fd);
+ printf("OK\n");
+ return 0;
+
+out:
+ while (i > 0) {
+ close(dmabuf_fd[i]);
+ i--;
+ }
+ close(heap_fd);
+ return ret;
+}
+
/* Test the ioctl version compatibility w/ a smaller structure then expected */
static int dmabuf_heap_alloc_older(int fd, size_t len, unsigned int flags,
int *dmabuf_fd)
@@ -292,23 +369,24 @@ static int test_alloc_compat(char *heap_name)
if (heap_fd < 0)
return -1;
- printf("Testing (theoretical)older alloc compat\n");
+ printf(" Testing (theoretical)older alloc compat: ");
ret = dmabuf_heap_alloc_older(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Older compat allocation failed!\n");
+ printf("FAIL (Older compat allocation failed!)\n");
ret = -1;
goto out;
}
close(dmabuf_fd);
+ printf("OK\n");
- printf("Testing (theoretical)newer alloc compat\n");
+ printf(" Testing (theoretical)newer alloc compat: ");
ret = dmabuf_heap_alloc_newer(heap_fd, ONE_MEG, 0, &dmabuf_fd);
if (ret) {
- printf("Newer compat allocation failed!\n");
+ printf("FAIL (Newer compat allocation failed!)\n");
ret = -1;
goto out;
}
- printf("Ioctl compatibility tests passed\n");
+ printf("OK\n");
out:
if (dmabuf_fd >= 0)
close(dmabuf_fd);
@@ -327,17 +405,17 @@ static int test_alloc_errors(char *heap_name)
if (heap_fd < 0)
return -1;
- printf("Testing expected error cases\n");
+ printf(" Testing expected error cases: ");
ret = dmabuf_heap_alloc(0, ONE_MEG, 0x111111, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid fd)!\n");
+ printf("FAIL (Did not see expected error (invalid fd)!)\n");
ret = -1;
goto out;
}
ret = dmabuf_heap_alloc(heap_fd, ONE_MEG, 0x111111, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid heap flags)!\n");
+ printf("FAIL (Did not see expected error (invalid heap flags)!)\n");
ret = -1;
goto out;
}
@@ -345,12 +423,12 @@ static int test_alloc_errors(char *heap_name)
ret = dmabuf_heap_alloc_fdflags(heap_fd, ONE_MEG,
~(O_RDWR | O_CLOEXEC), 0, &dmabuf_fd);
if (!ret) {
- printf("Did not see expected error (invalid fd flags)!\n");
+ printf("FAIL (Did not see expected error (invalid fd flags)!)\n");
ret = -1;
goto out;
}
- printf("Expected error checking passed\n");
+ printf("OK\n");
ret = 0;
out:
if (dmabuf_fd >= 0)
@@ -379,10 +457,20 @@ int main(void)
if (!strncmp(dir->d_name, "..", 3))
continue;
+ printf("Testing heap: %s\n", dir->d_name);
+ printf("=======================================\n");
ret = test_alloc_and_import(dir->d_name);
if (ret)
break;
+ ret = test_alloc_zeroed(dir->d_name, 4 * 1024);
+ if (ret)
+ break;
+
+ ret = test_alloc_zeroed(dir->d_name, ONE_MEG);
+ if (ret)
+ break;
+
ret = test_alloc_compat(dir->d_name);
if (ret)
break;
diff --git a/tools/testing/selftests/drivers/.gitignore b/tools/testing/selftests/drivers/.gitignore
index ca74f2e1c719..09e23b5afa96 100644
--- a/tools/testing/selftests/drivers/.gitignore
+++ b/tools/testing/selftests/drivers/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
/dma-buf/udmabuf
+/s390x/uvdevice/test_uvdevice
diff --git a/tools/testing/selftests/drivers/dma-buf/Makefile b/tools/testing/selftests/drivers/dma-buf/Makefile
index 79cb16b4e01a..441407bb0e80 100644
--- a/tools/testing/selftests/drivers/dma-buf/Makefile
+++ b/tools/testing/selftests/drivers/dma-buf/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := udmabuf
diff --git a/tools/testing/selftests/drivers/dma-buf/udmabuf.c b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
index 4de902ea14d8..c812080e304e 100644
--- a/tools/testing/selftests/drivers/dma-buf/udmabuf.c
+++ b/tools/testing/selftests/drivers/dma-buf/udmabuf.c
@@ -1,10 +1,13 @@
// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#define __EXPORTED_HEADERS__
+
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
#include <string.h>
#include <errno.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <malloc.h>
#include <sys/ioctl.h>
@@ -29,7 +32,8 @@ int main(int argc, char *argv[])
devfd = open("/dev/udmabuf", O_RDWR);
if (devfd < 0) {
- printf("%s: [skip,no-udmabuf]\n", TEST_PREFIX);
+ printf("%s: [skip,no-udmabuf: Unable to access DMA buffer device file]\n",
+ TEST_PREFIX);
exit(77);
}
diff --git a/tools/testing/selftests/drivers/gpu/drm_mm.sh b/tools/testing/selftests/drivers/gpu/drm_mm.sh
index b789dc8257e6..09c76cd7661d 100755
--- a/tools/testing/selftests/drivers/gpu/drm_mm.sh
+++ b/tools/testing/selftests/drivers/gpu/drm_mm.sh
@@ -3,7 +3,7 @@
# Runs API tests for struct drm_mm (DRM range manager)
if ! /sbin/modprobe -n -q test-drm_mm; then
- echo "drivers/gpu/drm_mm: [skip]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm is not found in /lib/modules/`uname -r` [skip]"
exit 77
fi
@@ -11,6 +11,6 @@ if /sbin/modprobe -q test-drm_mm; then
/sbin/modprobe -q -r test-drm_mm
echo "drivers/gpu/drm_mm: ok"
else
- echo "drivers/gpu/drm_mm: [FAIL]"
+ echo "drivers/gpu/drm_mm: module test-drm_mm could not be removed [FAIL]"
exit 1
fi
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
new file mode 100644
index 000000000000..03a089165d3f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -0,0 +1,24 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := \
+ bond-arp-interval-causes-panic.sh \
+ bond-break-lacpdu-tx.sh \
+ bond-lladdr-target.sh \
+ dev_addr_lists.sh \
+ mode-1-recovery-updelay.sh \
+ mode-2-recovery-updelay.sh \
+ bond_options.sh \
+ bond-eth-type-change.sh \
+ bond_macvlan.sh
+
+TEST_FILES := \
+ lag_lib.sh \
+ bond_topo_2d1c.sh \
+ bond_topo_3d1c.sh
+
+TEST_INCLUDES := \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
new file mode 100755
index 000000000000..5667febee328
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-arp-interval-causes-panic.sh
@@ -0,0 +1,46 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# cause kernel oops in bond_rr_gen_slave_id
+DEBUG=${DEBUG:-0}
+
+set -e
+test ${DEBUG} -ne 0 && set -x
+
+finish()
+{
+ ip netns delete server || true
+ ip netns delete client || true
+}
+
+trap finish EXIT
+
+client_ip4=192.168.1.198
+server_ip4=192.168.1.254
+
+# setup kernel so it reboots after causing the panic
+echo 180 >/proc/sys/kernel/panic
+
+# build namespaces
+ip netns add "server"
+ip netns add "client"
+ip -n client link add eth0 type veth peer name eth0 netns server
+ip netns exec server ip link set dev eth0 up
+ip netns exec server ip addr add ${server_ip4}/24 dev eth0
+
+ip netns exec client ip link add dev bond0 down type bond mode 1 \
+ miimon 100 all_slaves_active 1
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ip addr add ${client_ip4}/24 dev bond0
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+ip netns exec client ip link set dev eth0 nomaster
+ip netns exec client ip link set dev bond0 down
+ip netns exec client ip link set dev bond0 type bond mode 0 \
+ arp_interval 1000 arp_ip_target "+${server_ip4}"
+ip netns exec client ip link set dev eth0 master bond0
+ip netns exec client ip link set dev bond0 up
+ip netns exec client ping -c 5 $server_ip4 >/dev/null
+
+exit 0
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
new file mode 100755
index 000000000000..1ec7f59db7f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh
@@ -0,0 +1,80 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify LACPDUs get transmitted after setting the MAC address of
+# the bond.
+#
+# https://bugzilla.redhat.com/show_bug.cgi?id=2020773
+#
+# +---------+
+# | fab-br0 |
+# +---------+
+# |
+# +---------+
+# | fbond |
+# +---------+
+# | |
+# +------+ +------+
+# |veth1 | |veth2 |
+# +------+ +------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+set -e
+cleanup() {
+ ip link del fab-br0 >/dev/null 2>&1 || :
+ ip link del fbond >/dev/null 2>&1 || :
+ ip link del veth1-bond >/dev/null 2>&1 || :
+ ip link del veth2-bond >/dev/null 2>&1 || :
+}
+
+trap cleanup 0 1 2
+cleanup
+
+# create the bridge
+ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \
+ forward_delay 15
+
+# create the bond
+ip link add fbond type bond mode 4 miimon 200 xmit_hash_policy 1 \
+ ad_actor_sys_prio 65535 lacp_rate fast
+
+# set bond address
+ip link set fbond address 52:54:00:3B:7C:A6
+ip link set fbond up
+
+# set again bond sysfs parameters
+ip link set fbond type bond ad_actor_sys_prio 65535
+
+# create veths
+ip link add name veth1-bond type veth peer name veth1-end
+ip link add name veth2-bond type veth peer name veth2-end
+
+# add ports
+ip link set fbond master fab-br0
+ip link set veth1-bond master fbond
+ip link set veth2-bond master fbond
+
+# bring up
+ip link set veth1-end up
+ip link set veth2-end up
+ip link set fab-br0 up
+ip link set fbond up
+ip addr add dev fab-br0 10.0.0.3
+
+rc=0
+tc qdisc add dev veth1-end clsact
+tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass
+if slowwait_for_counter 15 2 \
+ tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then
+ echo "PASS, captured 2"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
new file mode 100755
index 000000000000..8293dbc7c18f
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device ether type changing
+#
+
+ALL_TESTS="
+ bond_test_unsuccessful_enslave_type_change
+ bond_test_successful_enslave_type_change
+"
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+bond_check_flags()
+{
+ local bonddev=$1
+
+ ip -d l sh dev "$bonddev" | grep -q "MASTER"
+ check_err $? "MASTER flag is missing from the bond device"
+
+ ip -d l sh dev "$bonddev" | grep -q "SLAVE"
+ check_err $? "SLAVE flag is missing from the bond device"
+}
+
+# test enslaved bond dev type change from ARPHRD_ETHER and back
+# this allows us to test both MASTER and SLAVE flags at once
+bond_test_enslave_type_change()
+{
+ local test_success=$1
+ local devbond0="test-bond0"
+ local devbond1="test-bond1"
+ local devbond2="test-bond2"
+ local nonethdev="test-noneth0"
+
+ # create a non-ARPHRD_ETHER device for testing (e.g. nlmon type)
+ ip link add name "$nonethdev" type nlmon
+ check_err $? "could not create a non-ARPHRD_ETHER device (nlmon)"
+ ip link add name "$devbond0" type bond
+ if [ $test_success -eq 1 ]; then
+ # we need devbond0 in active-backup mode to successfully enslave nonethdev
+ ip link set dev "$devbond0" type bond mode active-backup
+ check_err $? "could not change bond mode to active-backup"
+ fi
+ ip link add name "$devbond1" type bond
+ ip link add name "$devbond2" type bond
+ ip link set dev "$devbond0" master "$devbond1"
+ check_err $? "could not enslave $devbond0 to $devbond1"
+ # change bond type to non-ARPHRD_ETHER
+ ip link set dev "$nonethdev" master "$devbond0" 1>/dev/null 2>/dev/null
+ ip link set dev "$nonethdev" nomaster 1>/dev/null 2>/dev/null
+ # restore ARPHRD_ETHER type by enslaving such device
+ ip link set dev "$devbond2" master "$devbond0"
+ check_err $? "could not enslave $devbond2 to $devbond0"
+
+ bond_check_flags "$devbond0"
+
+ # clean up
+ ip link del dev "$devbond0"
+ ip link del dev "$devbond1"
+ ip link del dev "$devbond2"
+ ip link del dev "$nonethdev"
+}
+
+bond_test_unsuccessful_enslave_type_change()
+{
+ RET=0
+
+ bond_test_enslave_type_change 0
+ log_test "Change ether type of an enslaved bond device with unsuccessful enslave"
+}
+
+bond_test_successful_enslave_type_change()
+{
+ RET=0
+
+ bond_test_enslave_type_change 1
+ log_test "Change ether type of an enslaved bond device with successful enslave"
+}
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
new file mode 100755
index 000000000000..78d3e0fe6604
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# Verify bond interface could up when set IPv6 link local address target.
+#
+# +----------------+
+# | br0 |
+# | | | sw
+# | veth0 veth1 |
+# +---+-------+----+
+# | |
+# +---+-------+----+
+# | veth0 veth1 |
+# | | | host
+# | bond0 |
+# +----------------+
+#
+# We use veths instead of physical interfaces
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+sw="sw-$(mktemp -u XXXXXX)"
+host="ns-$(mktemp -u XXXXXX)"
+
+cleanup()
+{
+ ip netns del $sw
+ ip netns del $host
+}
+
+wait_lladdr_dad()
+{
+ $@ | grep fe80 | grep -qv tentative
+}
+
+wait_bond_up()
+{
+ $@ | grep -q 'state UP'
+}
+
+trap cleanup 0 1 2
+
+ip netns add $sw
+ip netns add $host
+
+ip -n $host link add veth0 type veth peer name veth0 netns $sw
+ip -n $host link add veth1 type veth peer name veth1 netns $sw
+
+ip -n $sw link add br0 type bridge
+ip -n $sw link set br0 up
+sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1)
+# wait some time to make sure bridge lladdr pass DAD
+slowwait 2 wait_lladdr_dad ip -n $sw addr show br0
+
+ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \
+ arp_validate 3 arp_interval 1000
+# add a lladdr for bond to make sure there is a route to target
+ip -n $host addr add fe80::beef/64 dev bond0
+ip -n $host link set bond0 up
+ip -n $host link set veth0 master bond0
+ip -n $host link set veth1 master bond0
+
+ip -n $sw link set veth0 master br0
+ip -n $sw link set veth1 master br0
+ip -n $sw link set veth0 up
+ip -n $sw link set veth1 up
+
+slowwait 5 wait_bond_up ip -n $host link show bond0
+
+rc=0
+if ip -n $host link show bond0 | grep -q LOWER_UP; then
+ echo "PASS"
+else
+ echo "FAIL"
+ rc=1
+fi
+exit $rc
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
new file mode 100755
index 000000000000..b609fb6231f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan over balance-alb
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+m1_ns="m1-$(mktemp -u XXXXXX)"
+m2_ns="m1-$(mktemp -u XXXXXX)"
+m1_ip4="192.0.2.11"
+m1_ip6="2001:db8::11"
+m2_ip4="192.0.2.12"
+m2_ip6="2001:db8::12"
+
+cleanup()
+{
+ ip -n ${m1_ns} link del macv0
+ ip netns del ${m1_ns}
+ ip -n ${m2_ns} link del macv0
+ ip netns del ${m2_ns}
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3:-"macvlan_over_bond"}
+ RET=0
+
+
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "$mode: $message"
+}
+
+macvlan_over_bond()
+{
+ local param="$1"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m1_ns}
+ ip -n ${m1_ns} link set dev macv0 up
+ ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
+ ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
+
+ ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
+ ip -n ${s_ns} link set macv0 netns ${m2_ns}
+ ip -n ${m2_ns} link set dev macv0 up
+ ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
+ ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
+ check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
+ check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
+ check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
+ check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
+
+
+ sleep 5
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
+ check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
+ check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
+ check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
+ check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
+ check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${m1_ns}
+ip netns add ${m2_ns}
+
+modes="active-backup balance-tlb balance-alb"
+
+for mode in $modes; do
+ macvlan_over_bond "mode $mode"
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
new file mode 100755
index 000000000000..41d0859feb7d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh
@@ -0,0 +1,333 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bonding options with mode 1,5,6
+
+ALL_TESTS="
+ prio
+ arp_validate
+ num_grat_arp
+"
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_3d1c.sh
+
+skip_prio()
+{
+ local skip=1
+
+ # check if iproute support prio option
+ ip -n ${s_ns} link set eth0 type bond_slave prio 10
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support prio option
+ ip -n ${s_ns} -d link show eth0 | grep -q "prio 10"
+ [[ $? -ne 0 ]] && skip=0
+
+ return $skip
+}
+
+skip_ns()
+{
+ local skip=1
+
+ # check if iproute support ns_ip6_target option
+ ip -n ${s_ns} link add bond1 type bond ns_ip6_target ${g_ip6}
+ [[ $? -ne 0 ]] && skip=0
+
+ # check if kernel support ns_ip6_target option
+ ip -n ${s_ns} -d link show bond1 | grep -q "ns_ip6_target ${g_ip6}"
+ [[ $? -ne 0 ]] && skip=0
+
+ ip -n ${s_ns} link del bond1
+
+ return $skip
+}
+
+active_slave=""
+active_slave_changed()
+{
+ local old_active_slave=$1
+ local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \
+ ".[].linkinfo.info_data.active_slave")
+ [ "$new_active_slave" != "$old_active_slave" -a "$new_active_slave" != "null" ]
+}
+
+check_active_slave()
+{
+ local target_active_slave=$1
+ slowwait 5 active_slave_changed $active_slave
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ test "$active_slave" = "$target_active_slave"
+ check_err $? "Current active slave is $active_slave but not $target_active_slave"
+}
+
+# Test bonding prio option
+prio_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+ # set active_slave to primary eth1 specifically
+ ip -n ${s_ns} link set bond0 type bond active_slave eth1
+
+ # check bonding member prio value
+ ip -n ${s_ns} link set eth0 type bond_slave prio 0
+ ip -n ${s_ns} link set eth1 type bond_slave prio 10
+ ip -n ${s_ns} link set eth2 type bond_slave prio 11
+ cmd_jq "ip -n ${s_ns} -d -j link show eth0" \
+ ".[].linkinfo.info_slave_data | select (.prio == 0)" "-e" &> /dev/null
+ check_err $? "eth0 prio is not 0"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth1" \
+ ".[].linkinfo.info_slave_data | select (.prio == 10)" "-e" &> /dev/null
+ check_err $? "eth1 prio is not 10"
+ cmd_jq "ip -n ${s_ns} -d -j link show eth2" \
+ ".[].linkinfo.info_slave_data | select (.prio == 11)" "-e" &> /dev/null
+ check_err $? "eth2 prio is not 11"
+
+ bond_check_connection "setup"
+
+ # active slave should be the primary slave
+ check_active_slave eth1
+
+ # active slave should be the higher prio slave
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave eth2
+ bond_check_connection "fail over"
+
+ # when only 1 slave is up
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave eth0
+ bond_check_connection "only 1 slave up"
+
+ # when a higher prio slave change to up
+ ip -n ${s_ns} link set eth2 up
+ bond_check_connection "higher prio slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth2"
+ ;;
+ "1")
+ check_active_slave "eth0"
+ ;;
+ "2")
+ check_active_slave "eth0"
+ ;;
+ esac
+ local pre_active_slave=$active_slave
+
+ # when the primary slave change to up
+ ip -n ${s_ns} link set eth1 up
+ bond_check_connection "primary slave up"
+ case $primary_reselect in
+ "0")
+ check_active_slave "eth1"
+ ;;
+ "1")
+ check_active_slave "$pre_active_slave"
+ ;;
+ "2")
+ check_active_slave "$pre_active_slave"
+ ip -n ${s_ns} link set $active_slave down
+ bond_check_connection "pre_active slave down"
+ check_active_slave "eth1"
+ ;;
+ esac
+
+ # Test changing bond slave prio
+ if [[ "$primary_reselect" == "0" ]];then
+ ip -n ${s_ns} link set eth0 type bond_slave prio 1000000
+ ip -n ${s_ns} link set eth1 type bond_slave prio 0
+ ip -n ${s_ns} link set eth2 type bond_slave prio -50
+ ip -n ${s_ns} -d link show eth0 | grep -q 'prio 1000000'
+ check_err $? "eth0 prio is not 1000000"
+ ip -n ${s_ns} -d link show eth1 | grep -q 'prio 0'
+ check_err $? "eth1 prio is not 0"
+ ip -n ${s_ns} -d link show eth2 | grep -q 'prio -50'
+ check_err $? "eth3 prio is not -50"
+ check_active_slave "eth1"
+
+ ip -n ${s_ns} link set $active_slave down
+ check_active_slave "eth0"
+ bond_check_connection "change slave prio"
+ fi
+}
+
+prio_miimon()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode miimon 100 primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode miimon primary_reselect $primary_reselect"
+ done
+}
+
+prio_arp()
+{
+ local primary_reselect
+ local mode=$1
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode arp_ip_target primary_reselect $primary_reselect"
+ done
+}
+
+prio_ns()
+{
+ local primary_reselect
+ local mode=$1
+
+ if skip_ns; then
+ log_test_skip "prio ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for primary_reselect in 0 1 2; do
+ prio_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} primary eth1 primary_reselect $primary_reselect"
+ log_test "prio" "$mode ns_ip6_target primary_reselect $primary_reselect"
+ done
+}
+
+prio()
+{
+ local mode modes="active-backup balance-tlb balance-alb"
+
+ if skip_prio; then
+ log_test_skip "prio" "Current iproute or kernel doesn't support bond option 'prio'."
+ return 0
+ fi
+
+ for mode in $modes; do
+ prio_miimon $mode
+ done
+ prio_arp "active-backup"
+ prio_ns "active-backup"
+}
+
+wait_mii_up()
+{
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ [ ${mii_status} != "UP" ] && return 1
+ done
+ return 0
+}
+
+arp_validate_test()
+{
+ local param="$1"
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg"
+
+ # wait for a while to make sure the mii status stable
+ slowwait 5 wait_mii_up
+ for i in $(seq 0 2); do
+ mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status")
+ if [ ${mii_status} != "UP" ]; then
+ RET=1
+ log_test "arp_validate" "interface eth$i mii_status $mii_status"
+ fi
+ done
+}
+
+arp_validate_arp()
+{
+ local mode=$1
+ local val
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 arp_ip_target ${g_ip4} arp_validate $val"
+ log_test "arp_validate" "$mode arp_ip_target arp_validate $val"
+ done
+}
+
+arp_validate_ns()
+{
+ local mode=$1
+ local val
+
+ if skip_ns; then
+ log_test_skip "arp_validate ns" "Current iproute or kernel doesn't support bond option 'ns_ip6_target'."
+ return 0
+ fi
+
+ for val in $(seq 0 6); do
+ arp_validate_test "mode $mode arp_interval 100 ns_ip6_target ${g_ip6} arp_validate $val"
+ log_test "arp_validate" "$mode ns_ip6_target arp_validate $val"
+ done
+}
+
+arp_validate()
+{
+ arp_validate_arp "active-backup"
+ arp_validate_ns "active-backup"
+}
+
+garp_test()
+{
+ local param="$1"
+ local active_slave exp_num real_num i
+ RET=0
+
+ # create bond
+ bond_reset "${param}"
+
+ bond_check_connection
+ [ $RET -ne 0 ] && log_test "num_grat_arp" "$retmsg"
+
+
+ # Add tc rules to count GARP number
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter add dev s$i ingress protocol arp pref 1 handle 101 \
+ flower skip_hw arp_op request arp_sip ${s_ip4} arp_tip ${s_ip4} action pass
+ done
+
+ # Do failover
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ ip -n ${s_ns} link set ${active_slave} down
+
+ # wait for active link change
+ slowwait 2 active_slave_changed $active_slave
+
+ exp_num=$(echo "${param}" | cut -f6 -d ' ')
+ active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave")
+ slowwait_for_counter $((exp_num + 5)) $exp_num \
+ tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}"
+
+ # check result
+ real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}")
+ if [ "${real_num}" -ne "${exp_num}" ]; then
+ echo "$real_num garp packets sent on active slave ${active_slave}"
+ RET=1
+ fi
+
+ for i in $(seq 0 2); do
+ tc -n ${g_ns} filter del dev s$i ingress
+ done
+}
+
+num_grat_arp()
+{
+ local val
+ for val in 10 20 30; do
+ garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100"
+ log_test "num_grat_arp" "active-backup miimon num_grat_arp $val"
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
new file mode 100644
index 000000000000..195ef83cfbf1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh
@@ -0,0 +1,158 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------+
+# | bond0 | Server
+# | + | 192.0.2.1/24
+# | eth0 | eth1 | 2001:db8::1/24
+# | +---+---+ |
+# | | | |
+# +-------------------------+
+# | |
+# +-------------------------+
+# | | | |
+# | +---+-------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------+-------+ | 2001:db8::254/24
+# | | |
+# +-------------------------+
+# |
+# +-------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------+
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+s_ns="s-$(mktemp -u XXXXXX)"
+c_ns="c-$(mktemp -u XXXXXX)"
+g_ns="g-$(mktemp -u XXXXXX)"
+s_ip4="192.0.2.1"
+c_ip4="192.0.2.10"
+g_ip4="192.0.2.254"
+s_ip6="2001:db8::1"
+c_ip6="2001:db8::10"
+g_ip6="2001:db8::254"
+
+gateway_create()
+{
+ ip netns add ${g_ns}
+ ip -n ${g_ns} link add br0 type bridge
+ ip -n ${g_ns} link set br0 up
+ ip -n ${g_ns} addr add ${g_ip4}/24 dev br0
+ ip -n ${g_ns} addr add ${g_ip6}/24 dev br0
+}
+
+gateway_destroy()
+{
+ ip -n ${g_ns} link del br0
+ ip netns del ${g_ns}
+}
+
+server_create()
+{
+ ip netns add ${s_ns}
+ ip -n ${s_ns} link add bond0 type bond mode active-backup miimon 100
+
+ for i in $(seq 0 1); do
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+}
+
+# Reset bond with new mode and options
+bond_reset()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ local param="$1"
+ link_num=$((link_num -1))
+
+ ip -n ${s_ns} link set bond0 down
+ ip -n ${s_ns} link del bond0
+
+ ip -n ${s_ns} link add bond0 type bond $param
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link set eth$i master bond0
+ done
+
+ ip -n ${s_ns} link set bond0 up
+ ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0
+ ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0
+ # Wait for IPv6 address ready as it needs DAD
+ slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null
+}
+
+server_destroy()
+{
+ # Count the eth link number in real-time as this function
+ # maybe called from other topologies.
+ local link_num=$(ip -n ${s_ns} -br link show | grep -c "^eth")
+ link_num=$((link_num -1))
+ for i in $(seq 0 ${link_num}); do
+ ip -n ${s_ns} link del eth${i}
+ done
+ ip netns del ${s_ns}
+}
+
+client_create()
+{
+ ip netns add ${c_ns}
+ ip -n ${c_ns} link add eth0 type veth peer name c0 netns ${g_ns}
+
+ ip -n ${g_ns} link set c0 up
+ ip -n ${g_ns} link set c0 master br0
+
+ ip -n ${c_ns} link set eth0 up
+ ip -n ${c_ns} addr add ${c_ip4}/24 dev eth0
+ ip -n ${c_ns} addr add ${c_ip6}/24 dev eth0
+}
+
+client_destroy()
+{
+ ip -n ${c_ns} link del eth0
+ ip netns del ${c_ns}
+}
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ client_destroy
+ server_destroy
+ gateway_destroy
+}
+
+bond_check_connection()
+{
+ local msg=${1:-"check connection"}
+
+ slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null
+ ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping failed"
+ ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null
+ check_err $? "${msg}: ping6 failed"
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
new file mode 100644
index 000000000000..3a1333d9a85b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_3d1c.sh
@@ -0,0 +1,43 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Topology for Bond mode 1,5,6 testing
+#
+# +-------------------------------------+
+# | bond0 |
+# | + | Server
+# | eth0 | eth1 eth2 | 192.0.2.1/24
+# | +-------------------+ | 2001:db8::1/24
+# | | | | |
+# +-------------------------------------+
+# | | |
+# +-------------------------------------+
+# | | | | |
+# | +---+---------+---------+---+ | Gateway
+# | | br0 | | 192.0.2.254/24
+# | +-------------+-------------+ | 2001:db8::254/24
+# | | |
+# +-------------------------------------+
+# |
+# +-------------------------------------+
+# | | | Client
+# | + | 192.0.2.10/24
+# | eth0 | 2001:db8::10/24
+# +-------------------------------------+
+
+source bond_topo_2d1c.sh
+
+setup_prepare()
+{
+ gateway_create
+ server_create
+ client_create
+
+ # Add the extra device as we use 3 down links for bond0
+ local i=2
+ ip -n ${s_ns} link add eth${i} type veth peer name s${i} netns ${g_ns}
+ ip -n ${g_ns} link set s${i} up
+ ip -n ${g_ns} link set s${i} master br0
+ ip -n ${s_ns} link set eth${i} master bond0
+ tc -n ${g_ns} qdisc add dev s${i} clsact
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
new file mode 100644
index 000000000000..899d7fb6ea8e
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -0,0 +1,10 @@
+CONFIG_BONDING=y
+CONFIG_BRIDGE=y
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
+CONFIG_NET_ACT_GACT=y
+CONFIG_NET_CLS_FLOWER=y
+CONFIG_NET_SCH_INGRESS=y
+CONFIG_NLMON=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
new file mode 100755
index 000000000000..e6fa24eded5b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh
@@ -0,0 +1,109 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bond device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+ bond_cleanup_mode1
+ bond_cleanup_mode4
+ bond_listen_lacpdu_multicast_case_down
+ bond_listen_lacpdu_multicast_case_up
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/lag_lib.sh
+
+
+destroy()
+{
+ local ifnames=(dummy1 dummy2 bond1 mv0)
+ local ifname
+
+ for ifname in "${ifnames[@]}"; do
+ ip link del "$ifname" &>/dev/null
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ destroy
+}
+
+
+# bond driver control paths vary between modes that have a primary slave
+# (bond_uses_primary()) and others. Test both kinds of modes.
+
+bond_cleanup_mode1()
+{
+ RET=0
+
+ test_LAG_cleanup "bonding" "active-backup"
+}
+
+bond_cleanup_mode4() {
+ RET=0
+
+ test_LAG_cleanup "bonding" "802.3ad"
+}
+
+bond_listen_lacpdu_multicast()
+{
+ # Initial state of bond device, up | down
+ local init_state=$1
+ local lacpdu_mc="01:80:c2:00:00:02"
+
+ ip link add dummy1 type dummy
+ ip link add bond1 "$init_state" type bond mode 802.3ad
+ ip link set dev dummy1 master bond1
+ if [ "$init_state" = "down" ]; then
+ ip link set dev bond1 up
+ fi
+
+ grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address not present on slave (1)"
+
+ ip link set dev bond1 down
+
+ not grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address still present on slave"
+
+ ip link set dev bond1 up
+
+ grep_bridge_fdb "$lacpdu_mc" bridge fdb show brport dummy1 >/dev/null
+ check_err $? "LACPDU multicast address not present on slave (2)"
+
+ cleanup
+
+ log_test "bonding LACPDU multicast address to slave (from bond $init_state)"
+}
+
+# The LACPDU mc addr is added by different paths depending on the initial state
+# of the bond when enslaving a device. Test both cases.
+
+bond_listen_lacpdu_multicast_case_down()
+{
+ RET=0
+
+ bond_listen_lacpdu_multicast "down"
+}
+
+bond_listen_lacpdu_multicast_case_up()
+{
+ RET=0
+
+ bond_listen_lacpdu_multicast "up"
+}
+
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
new file mode 100644
index 000000000000..bf9bcd1b5ec0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh
@@ -0,0 +1,177 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NAMESPACES=""
+
+# Test that a link aggregation device (bonding, team) removes the hardware
+# addresses that it adds on its underlying devices.
+test_LAG_cleanup()
+{
+ local driver=$1
+ local mode=$2
+ local ucaddr="02:00:00:12:34:56"
+ local addr6="fe80::78:9abc/64"
+ local mcaddr="33:33:ff:78:9a:bc"
+ local name
+
+ ip link add dummy1 type dummy
+ ip link add dummy2 type dummy
+ if [ "$driver" = "bonding" ]; then
+ name="bond1"
+ ip link add "$name" up type bond mode "$mode"
+ ip link set dev dummy1 master "$name"
+ ip link set dev dummy2 master "$name"
+ elif [ "$driver" = "team" ]; then
+ name="team0"
+ teamd -d -c '
+ {
+ "device": "'"$name"'",
+ "runner": {
+ "name": "'"$mode"'"
+ },
+ "ports": {
+ "dummy1":
+ {},
+ "dummy2":
+ {}
+ }
+ }
+ '
+ ip link set dev "$name" up
+ else
+ check_err 1
+ log_test test_LAG_cleanup ": unknown driver \"$driver\""
+ return
+ fi
+
+ # Used to test dev->uc handling
+ ip link add mv0 link "$name" up address "$ucaddr" type macvlan
+ # Used to test dev->mc handling
+ ip address add "$addr6" dev "$name"
+
+ # Check that addresses were added as expected
+ (grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$ucaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "macvlan unicast address not found on a slave"
+
+ # mcaddr is added asynchronously by addrconf_dad_work(), use busywait
+ (busywait 10000 grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy1 ||
+ grep_bridge_fdb "$mcaddr" bridge fdb show dev dummy2) >/dev/null
+ check_err $? "IPv6 solicited-node multicast mac address not found on a slave"
+
+ ip link set dev "$name" down
+ ip link del "$name"
+
+ not grep_bridge_fdb "$ucaddr" bridge fdb show >/dev/null
+ check_err $? "macvlan unicast address still present on a slave"
+
+ not grep_bridge_fdb "$mcaddr" bridge fdb show >/dev/null
+ check_err $? "IPv6 solicited-node multicast mac address still present on a slave"
+
+ cleanup
+
+ log_test "$driver cleanup mode $mode"
+}
+
+# Build a generic 2 node net namespace with 2 connections
+# between the namespaces
+#
+# +-----------+ +-----------+
+# | node1 | | node2 |
+# | | | |
+# | | | |
+# | eth0 +-------+ eth0 |
+# | | | |
+# | eth1 +-------+ eth1 |
+# | | | |
+# +-----------+ +-----------+
+lag_setup2x2()
+{
+ local state=${1:-down}
+ local namespaces="lag_node1 lag_node2"
+
+ # create namespaces
+ for n in ${namespaces}; do
+ ip netns add ${n}
+ done
+
+ # wire up namespaces
+ ip link add name lag1 type veth peer name lag1-end
+ ip link set dev lag1 netns lag_node1 $state name eth0
+ ip link set dev lag1-end netns lag_node2 $state name eth0
+
+ ip link add name lag1 type veth peer name lag1-end
+ ip link set dev lag1 netns lag_node1 $state name eth1
+ ip link set dev lag1-end netns lag_node2 $state name eth1
+
+ NAMESPACES="${namespaces}"
+}
+
+# cleanup all lag related namespaces
+lag_cleanup()
+{
+ for n in ${NAMESPACES}; do
+ ip netns delete ${n} >/dev/null 2>&1 || true
+ done
+}
+
+SWITCH="lag_node1"
+CLIENT="lag_node2"
+CLIENTIP="172.20.2.1"
+SWITCHIP="172.20.2.2"
+
+lag_setup_network()
+{
+ lag_setup2x2 "down"
+
+ # create switch
+ ip netns exec ${SWITCH} ip link add br0 up type bridge
+ ip netns exec ${SWITCH} ip link set eth0 master br0 up
+ ip netns exec ${SWITCH} ip link set eth1 master br0 up
+ ip netns exec ${SWITCH} ip addr add ${SWITCHIP}/24 dev br0
+}
+
+lag_reset_network()
+{
+ ip netns exec ${CLIENT} ip link del bond0
+ ip netns exec ${SWITCH} ip link set eth0 up
+ ip netns exec ${SWITCH} ip link set eth1 up
+}
+
+create_bond()
+{
+ # create client
+ ip netns exec ${CLIENT} ip link set eth0 down
+ ip netns exec ${CLIENT} ip link set eth1 down
+
+ ip netns exec ${CLIENT} ip link add bond0 type bond $@
+ ip netns exec ${CLIENT} ip link set eth0 master bond0
+ ip netns exec ${CLIENT} ip link set eth1 master bond0
+ ip netns exec ${CLIENT} ip link set bond0 up
+ ip netns exec ${CLIENT} ip addr add ${CLIENTIP}/24 dev bond0
+}
+
+test_bond_recovery()
+{
+ RET=0
+
+ create_bond $@
+
+ # verify connectivity
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+ check_err $? "No connectivity"
+
+ # force the links of the bond down
+ ip netns exec ${SWITCH} ip link set eth0 down
+ sleep 2
+ ip netns exec ${SWITCH} ip link set eth0 up
+ ip netns exec ${SWITCH} ip link set eth1 down
+
+ # re-verify connectivity
+ slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null
+
+ local rc=$?
+ check_err $rc "Bond failed to recover"
+ log_test "$1 ($2) bond recovery"
+ lag_reset_network
+}
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
new file mode 100755
index 000000000000..9d26ab4cad0b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# When the bond is configured with down/updelay and the link state of
+# slave members flaps if there are no remaining members up the bond
+# should immediately select a member to bring up. (from bonding.txt
+# section 13.1 paragraph 4)
+#
+# +-------------+ +-----------+
+# | client | | switch |
+# | | | |
+# | +--------| link1 |-----+ |
+# | | +-------+ | |
+# | | | | | |
+# | | +-------+ | |
+# | | bond | link2 | Br0 | |
+# +-------------+ +-----------+
+# 172.20.2.1 172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+ lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 1 miimon 100 updelay 0
+test_bond_recovery mode 1 miimon 100 updelay 200
+test_bond_recovery mode 1 miimon 100 updelay 500
+test_bond_recovery mode 1 miimon 100 updelay 1000
+test_bond_recovery mode 1 miimon 100 updelay 2000
+test_bond_recovery mode 1 miimon 100 updelay 5000
+test_bond_recovery mode 1 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
new file mode 100755
index 000000000000..2d275b3e47dd
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Regression Test:
+# When the bond is configured with down/updelay and the link state of
+# slave members flaps if there are no remaining members up the bond
+# should immediately select a member to bring up. (from bonding.txt
+# section 13.1 paragraph 4)
+#
+# +-------------+ +-----------+
+# | client | | switch |
+# | | | |
+# | +--------| link1 |-----+ |
+# | | +-------+ | |
+# | | | | | |
+# | | +-------+ | |
+# | | bond | link2 | Br0 | |
+# +-------------+ +-----------+
+# 172.20.2.1 172.20.2.2
+
+
+REQUIRE_MZ=no
+REQUIRE_JQ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+source "$lib_dir"/lag_lib.sh
+
+cleanup()
+{
+ lag_cleanup
+}
+
+trap cleanup 0 1 2
+
+lag_setup_network
+test_bond_recovery mode 2 miimon 100 updelay 0
+test_bond_recovery mode 2 miimon 100 updelay 200
+test_bond_recovery mode 2 miimon 100 updelay 500
+test_bond_recovery mode 2 miimon 100 updelay 1000
+test_bond_recovery mode 2 miimon 100 updelay 2000
+test_bond_recovery mode 2 miimon 100 updelay 5000
+test_bond_recovery mode 2 miimon 100 updelay 10000
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/net/bonding/settings b/tools/testing/selftests/drivers/net/bonding/settings
new file mode 100644
index 000000000000..79b65bdf05db
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/settings
@@ -0,0 +1 @@
+timeout=1200
diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile
new file mode 100644
index 000000000000..cd6817fe5be6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/Makefile
@@ -0,0 +1,32 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = bridge_locked_port.sh \
+ bridge_mdb.sh \
+ bridge_mld.sh \
+ bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
+ bridge_vlan_unaware.sh \
+ local_termination.sh \
+ no_forwarding.sh \
+ tc_actions.sh \
+ test_bridge_fdb_stress.sh
+
+TEST_FILES := \
+ run_net_forwarding_test.sh \
+ forwarding.config
+
+TEST_INCLUDES := \
+ ../../../net/forwarding/bridge_locked_port.sh \
+ ../../../net/forwarding/bridge_mdb.sh \
+ ../../../net/forwarding/bridge_mld.sh \
+ ../../../net/forwarding/bridge_vlan_aware.sh \
+ ../../../net/forwarding/bridge_vlan_mcast.sh \
+ ../../../net/forwarding/bridge_vlan_unaware.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/forwarding/local_termination.sh \
+ ../../../net/forwarding/no_forwarding.sh \
+ ../../../net/forwarding/tc_actions.sh \
+ ../../../net/forwarding/tc_common.sh \
+ ../../../net/lib.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/forwarding.config b/tools/testing/selftests/drivers/net/dsa/forwarding.config
new file mode 100644
index 000000000000..7adc1396fae0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/forwarding.config
@@ -0,0 +1,2 @@
+NETIF_CREATE=no
+STABLE_MAC_ADDRS=yes
diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
new file mode 100755
index 000000000000..4106c0a102ea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")")
+testname=$(basename "${BASH_SOURCE[0]}")
+
+source "$libdir"/forwarding.config
+cd "$libdir"/../../../net/forwarding/ || exit 1
+source "./$testname" "$@"
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
new file mode 100755
index 000000000000..74682151d04d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh
@@ -0,0 +1,47 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Bridge FDB entries can be offloaded to DSA switches without holding the
+# rtnl_mutex. Traditionally this mutex has conferred drivers implicit
+# serialization, which means their code paths are not well tested in the
+# presence of concurrency.
+# This test creates a background task that stresses the FDB by adding and
+# deleting an entry many times in a row without the rtnl_mutex held.
+# It then tests the driver resistance to concurrency by calling .ndo_fdb_dump
+# (with rtnl_mutex held) from a foreground task.
+# Since either the FDB dump or the additions/removals can fail, but the
+# additions and removals are performed in deferred as opposed to process
+# context, we cannot simply check for user space error codes.
+
+WAIT_TIME=1
+NUM_NETIFS=1
+REQUIRE_JQ="no"
+REQUIRE_MZ="no"
+NETIF_CREATE="no"
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+cleanup() {
+ echo "Cleaning up"
+ kill $pid && wait $pid &> /dev/null
+ ip link del br0
+ echo "Please check kernel log for errors"
+}
+trap 'cleanup' EXIT
+
+eth=${NETIFS[p1]}
+
+ip link del br0 2>&1 >/dev/null || :
+ip link add br0 type bridge && ip link set $eth master br0
+
+(while :; do
+ bridge fdb add 00:01:02:03:04:05 dev $eth master static
+ bridge fdb del 00:01:02:03:04:05 dev $eth master static
+done) &
+pid=$!
+
+for i in $(seq 1 50); do
+ bridge fdb show > /dev/null
+ sleep 3
+ echo "$((${i} * 2))% complete..."
+done
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
new file mode 100755
index 000000000000..224ca3695c89
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_linecard.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# In addition to the common variables, user might use:
+# LC_SLOT - If not set, all probed line cards are going to be tested,
+# with an exception of the "activation_16x100G_test".
+# It set, only the selected line card is going to be used
+# for tests, including "activation_16x100G_test".
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ unprovision_test
+ provision_test
+ activation_16x100G_test
+"
+
+NUM_NETIFS=0
+
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+until_lc_state_is()
+{
+ local state=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ "$current" == "$state" ]
+}
+
+until_lc_state_is_not()
+{
+ ! until_lc_state_is "$@"
+}
+
+lc_state_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].state"
+}
+
+lc_wait_until_state_changes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is_not "$state" lc_state_get "$lc"
+}
+
+lc_wait_until_state_becomes()
+{
+ local lc=$1
+ local state=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_state_is "$state" lc_state_get "$lc"
+}
+
+until_lc_port_count_is()
+{
+ local port_count=$1; shift
+ local current=$("$@")
+
+ echo "$current"
+ [ $current == $port_count ]
+}
+
+lc_port_count_get()
+{
+ local lc=$1
+
+ devlink port -j | jq -e -r ".[][] | select(.lc==$lc) | .port" | wc -l
+}
+
+lc_wait_until_port_count_is()
+{
+ local lc=$1
+ local port_count=$2
+ local timeout=$3 # ms
+
+ busywait "$timeout" until_lc_port_count_is "$port_count" lc_port_count_get "$lc"
+}
+
+lc_nested_devlink_dev_get()
+{
+ local lc=$1
+
+ devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].nested_devlink"
+}
+
+PROV_UNPROV_TIMEOUT=8000 # ms
+POST_PROV_ACT_TIMEOUT=2000 # ms
+PROV_PORTS_INSTANTIATION_TIMEOUT=15000 # ms
+
+unprovision_one()
+{
+ local lc=$1
+ local state
+
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" == "unprovisioned" ]]; then
+ return
+ fi
+
+ log_info "Unprovisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc notype
+ check_err $? "Failed to trigger linecard $lc unprovisioning"
+
+ state=$(lc_wait_until_state_changes $lc "unprovisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to unprovision linecard $lc (timeout)"
+
+ [ "$state" == "unprovisioned" ]
+ check_err $? "Failed to unprovision linecard $lc (state=$state)"
+}
+
+provision_one()
+{
+ local lc=$1
+ local type=$2
+ local state
+
+ log_info "Provisioning linecard $lc"
+
+ devlink lc set $DEVLINK_DEV lc $lc type $type
+ check_err $? "Failed trigger linecard $lc provisioning"
+
+ state=$(lc_wait_until_state_changes $lc "provisioning" \
+ $PROV_UNPROV_TIMEOUT)
+ check_err $? "Failed to provision linecard $lc (timeout)"
+
+ [ "$state" == "provisioned" ] || [ "$state" == "active" ]
+ check_err $? "Failed to provision linecard $lc (state=$state)"
+
+ provisioned_type=$(devlink lc show $DEVLINK_DEV lc $lc -j | jq -e -r ".[][][].type")
+ [ "$provisioned_type" == "$type" ]
+ check_err $? "Wrong provision type returned for linecard $lc (got \"$provisioned_type\", expected \"$type\")"
+
+ # Wait for possible activation to make sure the state
+ # won't change after return from this function.
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $POST_PROV_ACT_TIMEOUT)
+}
+
+unprovision_test()
+{
+ RET=0
+ local lc
+
+ lc=$LC_SLOT
+ unprovision_one $lc
+ log_test "Unprovision"
+}
+
+LC_16X100G_TYPE="16x100G"
+LC_16X100G_PORT_COUNT=16
+
+supported_types_check()
+{
+ local lc=$1
+ local supported_types_count
+ local type_index
+ local lc_16x100_found=false
+
+ supported_types_count=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types | length")
+ [ $supported_types_count != 0 ]
+ check_err $? "No supported types found for linecard $lc"
+ for (( type_index=0; type_index<$supported_types_count; type_index++ ))
+ do
+ type=$(devlink lc show $DEVLINK_DEV lc $lc -j | \
+ jq -e -r ".[][][].supported_types[$type_index]")
+ if [[ "$type" == "$LC_16X100G_TYPE" ]]; then
+ lc_16x100_found=true
+ break
+ fi
+ done
+ [ $lc_16x100_found = true ]
+ check_err $? "16X100G not found between supported types of linecard $lc"
+}
+
+ports_check()
+{
+ local lc=$1
+ local expected_port_count=$2
+ local port_count
+
+ port_count=$(lc_wait_until_port_count_is $lc $expected_port_count \
+ $PROV_PORTS_INSTANTIATION_TIMEOUT)
+ [ $port_count != 0 ]
+ check_err $? "No port associated with linecard $lc"
+ [ $port_count == $expected_port_count ]
+ check_err $? "Unexpected port count linecard $lc (got $port_count, expected $expected_port_count)"
+}
+
+lc_dev_info_provisioned_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_hw_revision
+ local running_ini_version
+
+ fixed_hw_revision=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.fixed."hw.revision"')
+ check_err $? "Failed to get linecard $lc fixed.hw.revision"
+ log_info "Linecard $lc fixed.hw.revision: \"$fixed_hw_revision\""
+ running_ini_version=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r '.[][].versions.running."ini.version"')
+ check_err $? "Failed to get linecard $lc running.ini.version"
+ log_info "Linecard $lc running.ini.version: \"$running_ini_version\""
+}
+
+provision_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+ local nested_devlink_dev
+
+ lc=$LC_SLOT
+ supported_types_check $lc
+ state=$(lc_state_get $lc)
+ check_err $? "Failed to get state of linecard $lc"
+ if [[ "$state" != "unprovisioned" ]]; then
+ unprovision_one $lc
+ fi
+ provision_one $lc $LC_16X100G_TYPE
+ ports_check $lc $LC_16X100G_PORT_COUNT
+
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_provisioned_check $lc $nested_devlink_dev
+
+ log_test "Provision"
+}
+
+ACTIVATION_TIMEOUT=20000 # ms
+
+interface_check()
+{
+ ip link set $h1 up
+ ip link set $h2 up
+ ifaces_upped=true
+ setup_wait
+}
+
+lc_dev_info_active_check()
+{
+ local lc=$1
+ local nested_devlink_dev=$2
+ local fixed_device_fw_psid
+ local running_device_fw
+
+ fixed_device_fw_psid=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.fixed" | \
+ jq -e -r '."fw.psid"')
+ check_err $? "Failed to get linecard $lc fixed fw PSID"
+ log_info "Linecard $lc fixed.fw.psid: \"$fixed_device_fw_psid\""
+
+ running_device_fw=$(devlink dev info $nested_devlink_dev -j | \
+ jq -e -r ".[][].versions.running.fw")
+ check_err $? "Failed to get linecard $lc running.fw.version"
+ log_info "Linecard $lc running.fw: \"$running_device_fw\""
+}
+
+activation_16x100G_test()
+{
+ RET=0
+ local lc
+ local type
+ local state
+ local nested_devlink_dev
+
+ lc=$LC_SLOT
+ type=$LC_16X100G_TYPE
+
+ unprovision_one $lc
+ provision_one $lc $type
+ state=$(lc_wait_until_state_becomes $lc "active" \
+ $ACTIVATION_TIMEOUT)
+ check_err $? "Failed to get linecard $lc activated (timeout)"
+
+ interface_check
+
+ nested_devlink_dev=$(lc_nested_devlink_dev_get $lc)
+ check_err $? "Failed to get nested devlink handle of linecard $lc"
+ lc_dev_info_active_check $lc $nested_devlink_dev
+
+ log_test "Activation 16x100G"
+}
+
+setup_prepare()
+{
+ local lc_num=$(devlink lc show -j | jq -e -r ".[][\"$DEVLINK_DEV\"] |length")
+ if [[ $? -ne 0 ]] || [[ $lc_num -eq 0 ]]; then
+ echo "SKIP: No linecard support found"
+ exit $ksft_skip
+ fi
+
+ if [ -z "$LC_SLOT" ]; then
+ echo "SKIP: \"LC_SLOT\" variable not provided"
+ exit $ksft_skip
+ fi
+
+ # Interfaces are not present during the script start,
+ # that's why we define NUM_NETIFS here so dummy
+ # implicit veth pairs are not created.
+ NUM_NETIFS=2
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ ifaces_upped=false
+}
+
+cleanup()
+{
+ if [ "$ifaces_upped" = true ] ; then
+ ip link set $h1 down
+ ip link set $h2 down
+ fi
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
index a37273473c1b..64153bbf95df 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -83,10 +83,12 @@ ALL_TESTS="
ptp_general_test
flow_action_sample_test
flow_action_trap_test
+ eapol_test
"
NUM_NETIFS=4
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
h1_create()
{
@@ -626,8 +628,7 @@ ipv6_redirect_test()
ptp_event_test()
{
- # PTP is only supported on Spectrum-1, for now.
- [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+ mlxsw_only_on_spectrum 1 || return
# PTP Sync (0)
devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
@@ -638,8 +639,7 @@ ptp_event_test()
ptp_general_test()
{
- # PTP is only supported on Spectrum-1, for now.
- [[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+ mlxsw_only_on_spectrum 1 || return
# PTP Announce (b)
devlink_trap_stats_test "PTP General Message" "ptp_general" \
@@ -678,6 +678,27 @@ flow_action_trap_test()
tc qdisc del dev $rp1 clsact
}
+eapol_payload_get()
+{
+ local source_mac=$1; shift
+ local p
+
+ p=$(:
+ )"01:80:C2:00:00:03:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"88:8E:"$( : ETH type
+ )
+ echo $p
+}
+
+eapol_test()
+{
+ local h1mac=$(mac_get $h1)
+
+ devlink_trap_stats_test "EAPOL" "eapol" $MZ $h1 -c 1 \
+ $(eapol_payload_get $h1mac) -p 100 -q
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index a4c2812e9807..8d4b2c6265b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -14,6 +14,7 @@ ALL_TESTS="
ingress_stp_filter_test
port_list_is_empty_test
port_loopback_filter_test
+ locked_port_test
"
NUM_NETIFS=4
source $lib_dir/tc_common.sh
@@ -420,6 +421,110 @@ port_loopback_filter_test()
port_loopback_filter_uc_test
}
+locked_port_miss_test()
+{
+ local trap_name="locked_port"
+ local smac=00:11:22:33:44:55
+
+ bridge link set dev $swp1 learning off
+ bridge link set dev $swp1 locked on
+
+ RET=0
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased before setting action to \"trap\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_err $? "Trap stats did not increase when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after setting action to \"drop\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ bridge fdb replace $smac dev $swp1 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after adding an FDB entry"
+
+ bridge fdb del $smac dev $swp1 master static vlan 1
+ bridge link set dev $swp1 locked off
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after unlocking port"
+
+ log_test "Locked port - FDB miss"
+
+ devlink_trap_action_set $trap_name "drop"
+ bridge link set dev $swp1 learning on
+}
+
+locked_port_mismatch_test()
+{
+ local trap_name="locked_port"
+ local smac=00:11:22:33:44:55
+
+ bridge link set dev $swp1 learning off
+ bridge link set dev $swp1 locked on
+
+ RET=0
+
+ bridge fdb replace $smac dev $swp2 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased before setting action to \"trap\""
+
+ devlink_trap_action_set $trap_name "trap"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_err $? "Trap stats did not increase when should"
+
+ devlink_trap_action_set $trap_name "drop"
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after setting action to \"drop\""
+
+ devlink_trap_action_set $trap_name "trap"
+ bridge link set dev $swp1 locked off
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after unlocking port"
+
+ bridge link set dev $swp1 locked on
+ bridge fdb replace $smac dev $swp1 master static vlan 1
+
+ devlink_trap_stats_check $trap_name $MZ $h1 -c 1 \
+ -a $smac -b $(mac_get $h2) -A 192.0.2.1 -B 192.0.2.2 -p 100 -q
+ check_fail $? "Trap stats increased after replacing an FDB entry"
+
+ bridge fdb del $smac dev $swp1 master static vlan 1
+ devlink_trap_action_set $trap_name "drop"
+
+ log_test "Locked port - FDB mismatch"
+
+ bridge link set dev $swp1 locked off
+ bridge link set dev $swp1 learning on
+}
+
+locked_port_test()
+{
+ locked_port_miss_test
+ locked_port_mismatch_test
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index f5abb1ebd392..160891dcb4bc 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -52,6 +52,7 @@ ALL_TESTS="
blackhole_route_test
irif_disabled_test
erif_disabled_test
+ blackhole_nexthop_test
"
NUM_NETIFS=4
@@ -108,6 +109,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
@@ -647,6 +651,41 @@ erif_disabled_test()
devlink_trap_action_set $trap_name "drop"
}
+__blackhole_nexthop_test()
+{
+ local flags=$1; shift
+ local subnet=$1; shift
+ local proto=$1; shift
+ local dip=$1; shift
+ local trap_name="blackhole_nexthop"
+ local mz_pid
+
+ RET=0
+
+ ip -$flags nexthop add id 1 blackhole
+ ip -$flags route add $subnet nhid 1
+ tc filter add dev $rp2 egress protocol $proto pref 1 handle 101 \
+ flower skip_hw dst_ip $dip ip_proto udp action drop
+
+ # Generate packets to the blackhole nexthop
+ $MZ $h1 -$flags -t udp "sp=54321,dp=12345" -c 0 -p 100 -b $rp1mac \
+ -B $dip -d 1msec -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $rp2 101
+ log_test "Blackhole nexthop: IPv$flags"
+
+ devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
+ ip -$flags route del $subnet
+ ip -$flags nexthop del id 1
+}
+
+blackhole_nexthop_test()
+{
+ __blackhole_nexthop_test "4" "198.51.100.0/30" "ip" $h2_ipv4
+ __blackhole_nexthop_test "6" "2001:db8:2::/120" "ipv6" $h2_ipv6
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 1fedfc9da434..190c1b6b5365 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -111,6 +111,9 @@ router_destroy()
__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
}
setup_prepare()
@@ -446,6 +449,35 @@ __invalid_nexthop_test()
log_test "Unresolved neigh: nexthop does not exist: $desc"
}
+__invalid_nexthop_bucket_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local via_add=$1; shift
+ local trap_name="unresolved_neigh"
+
+ RET=0
+
+ # Check that route to nexthop that does not exist triggers
+ # unresolved_neigh
+ ip nexthop add id 1 via $via_add dev $rp2
+ ip nexthop add id 10 group 1 type resilient buckets 32
+ ip route add $dip nhid 10
+
+ t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+ ping_do $h1 $dip
+ t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ check_err 1 "Trap counter did not increase"
+ fi
+
+ ip route del $dip nhid 10
+ ip nexthop del id 10
+ ip nexthop del id 1
+ log_test "Unresolved neigh: nexthop bucket does not exist: $desc"
+}
+
unresolved_neigh_test()
{
__host_miss_test "IPv4" 198.51.100.1
@@ -453,6 +485,8 @@ unresolved_neigh_test()
__invalid_nexthop_test "IPv4" 198.51.100.1 198.51.100.3 24 198.51.100.4
__invalid_nexthop_test "IPv6" 2001:db8:2::1 2001:db8:2::3 64 \
2001:db8:2::4
+ __invalid_nexthop_bucket_test "IPv4" 198.51.100.1 198.51.100.4
+ __invalid_nexthop_bucket_test "IPv6" 2001:db8:2::1 2001:db8:2::4
}
vrf_without_routes_create()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
index 47edf099a17e..0bd5ffc218ac 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_policer.sh
@@ -207,7 +207,7 @@ __rate_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 16
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
# Send packets at highest possible rate and make sure they are dropped
@@ -220,8 +220,8 @@ __rate_test()
rate=$(trap_rate_get)
pct=$((100 * (rate - 1000) / 1000))
- ((-5 <= pct && pct <= 5))
- check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-5%"
+ ((-10 <= pct && pct <= 10))
+ check_err $? "Expected rate 1000 pps, got $rate pps, which is $pct% off. Required accuracy is +-10%"
log_info "Expected rate 1000 pps, measured rate $rate pps"
drop_rate=$(policer_drop_rate_get $id)
@@ -272,13 +272,17 @@ __rate_test()
rate_test()
{
- local id
+ local last_policer=$(devlink -j -p trap policer show |
+ jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
- for id in $(devlink_trap_policer_ids_get); do
- echo
- log_info "Running rate test for policer $id"
- __rate_test $id
- done
+ log_info "Running rate test for policer 1"
+ __rate_test 1
+
+ log_info "Running rate test for policer $((last_policer / 2))"
+ __rate_test $((last_policer / 2))
+
+ log_info "Running rate test for policer $last_policer"
+ __rate_test $last_policer
}
__burst_test()
@@ -288,35 +292,12 @@ __burst_test()
RET=0
- devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 32
+ devlink trap policer set $DEVLINK_DEV policer $id rate 1000 burst 512
devlink trap group set $DEVLINK_DEV group l3_drops policer $id
- # Send a burst of 64 packets and make sure that about 32 are received
- # and the rest are dropped by the policer
- log_info "=== Tx burst size: 64, Policer burst size: 32 pps ==="
-
- t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- start_traffic $h1 192.0.2.1 198.51.100.100 $rp1_mac -c 64
-
- t1_rx=$(devlink_trap_rx_packets_get blackhole_route)
- t1_drop=$(devlink_trap_policer_rx_dropped_get $id)
-
- rx=$((t1_rx - t0_rx))
- pct=$((100 * (rx - 32) / 32))
- ((-20 <= pct && pct <= 20))
- check_err $? "Expected burst size of 32 packets, got $rx packets, which is $pct% off. Required accuracy is +-20%"
- log_info "Expected burst size of 32 packets, measured burst size of $rx packets"
-
- drop=$((t1_drop - t0_drop))
- (( drop > 0 ))
- check_err $? "Expected non-zero policer drops, got 0"
- log_info "Measured policer drops of $drop packets"
-
# Send a burst of 16 packets and make sure that 16 are received
# and that none are dropped by the policer
- log_info "=== Tx burst size: 16, Policer burst size: 32 pps ==="
+ log_info "=== Tx burst size: 16, Policer burst size: 512 ==="
t0_rx=$(devlink_trap_rx_packets_get blackhole_route)
t0_drop=$(devlink_trap_policer_rx_dropped_get $id)
@@ -365,13 +346,17 @@ __burst_test()
burst_test()
{
- local id
+ local last_policer=$(devlink -j -p trap policer show |
+ jq '[.[]["'$DEVLINK_DEV'"][].policer] | max')
+
+ log_info "Running burst test for policer 1"
+ __burst_test 1
+
+ log_info "Running burst test for policer $((last_policer / 2))"
+ __burst_test $((last_policer / 2))
- for id in $(devlink_trap_policer_ids_get); do
- echo
- log_info "Running burst size test for policer $id"
- __burst_test $id
- done
+ log_info "Running burst test for policer $last_policer"
+ __burst_test $last_policer
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 8817851da7a9..e9a82cae8c9a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -13,7 +13,7 @@
# |
# +-------------------|-----+
# | SW1 | |
-# | $swp1 + |
+# | $swp1 + |
# | 192.0.2.2/28 |
# | |
# | + g1a (gre) |
@@ -27,8 +27,8 @@
# |
# +--|----------------------+
# | | VRF2 |
-# | + $rp2 |
-# | 198.51.100.2/28 |
+# | + $rp2 |
+# | 198.51.100.2/28 |
# +-------------------------+
lib_dir=$(dirname $0)/../../../net/forwarding
@@ -116,12 +116,16 @@ cleanup()
forwarding_restore
}
-ecn_payload_get()
+ipip_payload_get()
{
+ local flags=$1; shift
+ local key=$1; shift
+
p=$(:
- )"0"$( : GRE flags
+ )"$flags"$( : GRE flags
)"0:00:"$( : Reserved + version
)"08:00:"$( : ETH protocol type
+ )"$key"$( : Key
)"4"$( : IP version
)"5:"$( : IHL
)"00:"$( : IP TOS
@@ -137,6 +141,11 @@ ecn_payload_get()
echo $p
}
+ecn_payload_get()
+{
+ echo $(ipip_payload_get "0")
+}
+
ecn_decap_test()
{
local trap_name="decap_error"
@@ -171,31 +180,6 @@ ecn_decap_test()
tc filter del dev $swp1 egress protocol ip pref 1 handle 101 flower
}
-ipip_payload_get()
-{
- local flags=$1; shift
- local key=$1; shift
-
- p=$(:
- )"$flags"$( : GRE flags
- )"0:00:"$( : Reserved + version
- )"08:00:"$( : ETH protocol type
- )"$key"$( : Key
- )"4"$( : IP version
- )"5:"$( : IHL
- )"00:"$( : IP TOS
- )"00:14:"$( : IP total length
- )"00:00:"$( : IP identification
- )"20:00:"$( : IP flags + frag off
- )"30:"$( : IP TTL
- )"01:"$( : IP proto
- )"E7:E6:"$( : IP header csum
- )"C0:00:01:01:"$( : IP saddr : 192.0.1.1
- )"C0:00:02:01:"$( : IP daddr : 192.0.2.1
- )
- echo $p
-}
-
no_matching_tunnel_test()
{
local trap_name="decap_error"
@@ -239,7 +223,8 @@ decap_error_test()
no_matching_tunnel_test "Decap error: Source IP check failed" \
192.0.2.68 "0"
no_matching_tunnel_test \
- "Decap error: Key exists but was not expected" $sip "2" ":E9:"
+ "Decap error: Key exists but was not expected" $sip "2" \
+ "00:00:00:E9:"
# Destroy the tunnel and create new one with key
__addr_add_del g1 del 192.0.2.65/32
@@ -251,7 +236,8 @@ decap_error_test()
no_matching_tunnel_test \
"Decap error: Key does not exist but was expected" $sip "0"
no_matching_tunnel_test \
- "Decap error: Packet has a wrong key field" $sip "2" "E8:"
+ "Decap error: Packet has a wrong key field" $sip "2" \
+ "00:00:00:E8:"
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
new file mode 100755
index 000000000000..878125041fc3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip6.sh
@@ -0,0 +1,250 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel exceptions functionality over mlxsw.
+# Check all exception traps to make sure they are triggered under the right
+# conditions.
+
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|-----+
+# | SW1 | |
+# | $swp1 + |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 |
+# | tos=inherit |
+# | |
+# | + $rp1 |
+# | | 2001:db8:10::1/64 |
+# +--|----------------------+
+# |
+# +--|----------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 2001:db8:10::2/64 |
+# +-------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 2001:db8:10::2/64
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:10::2/64
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ __addr_add_del $swp1 add 2001:db8:1::2/64
+ tc qdisc add dev $swp1 clsact
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit
+ ip link set dev g1 up
+ __addr_add_del g1 add 2001:db8:3::1/128
+
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 2001:db8:10::1/64
+}
+
+switch_destroy()
+{
+ __addr_add_del $rp1 del 2001:db8:10::1/64
+ ip link set dev $rp1 down
+
+ __addr_add_del g1 del 2001:db8:3::1/128
+ ip link set dev g1 down
+ tunnel_destroy g1
+
+ tc qdisc del dev $swp1 clsact
+ __addr_add_del $swp1 del 2001:db8:1::2/64
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+ipip_payload_get()
+{
+ local saddr="20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:01"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ local flags=$1; shift
+ local key=$1; shift
+
+ p=$(:
+ )"$flags"$( : GRE flags
+ )"0:00:"$( : Reserved + version
+ )"86:dd:"$( : ETH protocol type
+ )"$key"$( : Key
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:00:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )
+ echo $p
+}
+
+ecn_payload_get()
+{
+ echo $(ipip_payload_get "0")
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 skip_sw \
+ action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A 2001:db8:3::2 -B 2001:db8:3::1 -t ip \
+ tos=$outer_tos,next=47,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+no_matching_tunnel_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local sip=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:2::1 dst_ip 2001:db8:1::1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ rp2_mac=$(mac_get $rp2)
+ payload=$(ipip_payload_get "$@")
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -a $rp2_mac -b $rp1_mac \
+ -A $sip -B 2001:db8:3::1 -t ip next=47,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ # Correct source IP - the remote address
+ local sip=2001:db8:3::2
+
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ no_matching_tunnel_test "Decap error: Source IP check failed" \
+ 2001:db8:4::2 "0"
+ no_matching_tunnel_test \
+ "Decap error: Key exists but was not expected" $sip "2" \
+ "00:00:00:E9:"
+
+ # Destroy the tunnel and create new one with key
+ __addr_add_del g1 del 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit key 233
+ __addr_add_del g1 add 2001:db8:3::1/128
+
+ no_matching_tunnel_test \
+ "Decap error: Key does not exist but was expected" $sip "0"
+ no_matching_tunnel_test \
+ "Decap error: Packet has a wrong key field" $sip "2" \
+ "00:00:00:E8:"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index 10e0f3dbc930..5f6eb965cfd1 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -217,9 +217,11 @@ short_payload_get()
dest_mac=$(mac_get $h1)
p=$(:
)"08:"$( : VXLAN flags
- )"01:00:00:"$( : VXLAN reserved
+ )"00:00:00:"$( : VXLAN reserved
)"00:03:e8:"$( : VXLAN VNI : 1000
)"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
)
echo $p
}
@@ -263,7 +265,8 @@ decap_error_test()
corrupted_packet_test "Decap error: Reserved bits in use" \
"reserved_bits_payload_get"
- corrupted_packet_test "Decap error: No L2 header" "short_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
}
mc_smac_payload_get()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
new file mode 100755
index 000000000000..f6c16cbb6cf7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan_ipv6.sh
@@ -0,0 +1,342 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap tunnel drops and exceptions functionality over mlxsw.
+# Check all traps to make sure they are triggered under the right
+# conditions.
+
+# +------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|-------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRF2 |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ decap_error_test
+ overlay_smac_is_mc_test
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx1 master br1
+ ip link set dev vx1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:3::1/64
+}
+
+switch_destroy()
+{
+ ip address del dev $rp1 2001:db8:3::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev vx1 down
+ ip link set dev vx1 nomaster
+ ip link del dev vx1
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrf2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+vrf2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+ h1_create
+ switch_create
+ vrf2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrf2_destroy
+ switch_destroy
+ h1_destroy
+ forwarding_restore
+ vrf_cleanup
+}
+
+ecn_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+ecn_decap_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local ecn_desc=$1; shift
+ local outer_tos=$1; shift
+ local mz_pid
+
+ RET=0
+
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(ecn_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp \
+ sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc: Inner ECN is not ECT and outer is $ecn_desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+reserved_bits_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"01:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+short_payload_get()
+{
+ dest_mac=$(mac_get $h1)
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"00:00:00:00:00:00:"$( : ETH saddr
+ )
+ echo $p
+}
+
+corrupted_packet_test()
+{
+ local trap_name="decap_error"
+ local desc=$1; shift
+ local payload_get=$1; shift
+ local mz_pid
+
+ RET=0
+
+ # In case of too short packet, there is no any inner packet,
+ # so the matching will always succeed
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower skip_hw src_ip 2001:db8:3::1 dst_ip 2001:db8:1::1 \
+ action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$($payload_get)
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_exception_test $trap_name
+
+ tc_check_packets "dev $swp1 egress" 101 0
+ check_err $? "Packets were not dropped"
+
+ log_test "$desc"
+
+ kill $mz_pid && wait $mz_pid &> /dev/null
+ tc filter del dev $swp1 egress protocol ipv6 pref 1 handle 101 flower
+}
+
+decap_error_test()
+{
+ ecn_decap_test "Decap error" "ECT(1)" 01
+ ecn_decap_test "Decap error" "ECT(0)" 02
+ ecn_decap_test "Decap error" "CE" 03
+
+ corrupted_packet_test "Decap error: Reserved bits in use" \
+ "reserved_bits_payload_get"
+ corrupted_packet_test "Decap error: Too short inner packet" \
+ "short_payload_get"
+}
+
+mc_smac_payload_get()
+{
+ local dest_mac=$(mac_get $h1)
+ local source_mac="01:02:03:04:05:06"
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI : 1000
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$source_mac:"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"0:0"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+ echo $p
+}
+
+overlay_smac_is_mc_test()
+{
+ local trap_name="overlay_smac_is_mc"
+ local mz_pid
+
+ RET=0
+
+ # The matching will be checked on devlink_trap_drop_test()
+ # and the filter will be removed on devlink_trap_drop_cleanup()
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_mac 01:02:03:04:05:06 action pass
+
+ rp1_mac=$(mac_get $rp1)
+ payload=$(mc_smac_payload_get)
+
+ ip vrf exec v$rp2 $MZ -6 $rp2 -c 0 -d 1msec -b $rp1_mac \
+ -B 2001:db8:3::1 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
+ mz_pid=$!
+
+ devlink_trap_drop_test $trap_name $swp1 101
+
+ log_test "Overlay source MAC is multicast"
+
+ devlink_trap_drop_cleanup $mz_pid $swp1 "ipv6" 1 101
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
new file mode 100755
index 000000000000..a5c2aec52898
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/egress_vid_classification.sh
@@ -0,0 +1,272 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test VLAN classification after routing and verify that the order of
+# configuration does not impact switch behavior. Verify that {RIF, Port}->VID
+# mapping is added correctly for existing {Port, VID}->FID mapping and that
+# {RIF, Port}->VID mapping is added correctly for new {Port, VID}->FID mapping.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW $swp1 + + $swp2 |
+# | | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.20 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.20 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 20 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 20
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 20 "" 192.0.2.17/28
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.20 192.0.2.18 lladdr $(mac_get $h3.20)
+}
+
+switch_destroy()
+{
+ vlan_destroy $swp3 20
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for swp1.10, then add a RIF and verify that
+ # packets get the correct VID after routing.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # get the correct VID after routing.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev br0 192.0.2.1 lladdr $(mac_get $h1.10)
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
new file mode 100755
index 000000000000..91891b9418d7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ethtool_lanes.sh
@@ -0,0 +1,187 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+"
+
+NUM_NETIFS=2
+: ${TIMEOUT:=30000} # ms
+source $lib_dir/lib.sh
+source $lib_dir/ethtool_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
+ check_err $? "ports did not come up"
+
+ local lanes_exist=$(ethtool $swp1 | grep 'Lanes:')
+ if [[ -z $lanes_exist ]]; then
+ log_test "SKIP: driver does not support lanes setting"
+ exit 1
+ fi
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+check_lanes()
+{
+ local dev=$1; shift
+ local lanes=$1; shift
+ local max_speed=$1; shift
+ local chosen_lanes
+
+ chosen_lanes=$(ethtool $dev | grep 'Lanes:')
+ chosen_lanes=${chosen_lanes#*"Lanes: "}
+
+ ((chosen_lanes == lanes))
+ check_err $? "swp1 advertise $max_speed and $lanes, devs sync to $chosen_lanes"
+}
+
+check_unsupported_lanes()
+{
+ local dev=$1; shift
+ local max_speed=$1; shift
+ local max_lanes=$1; shift
+ local autoneg=$1; shift
+ local autoneg_str=""
+
+ local unsupported_lanes=$((max_lanes *= 2))
+
+ if [[ $autoneg -eq 0 ]]; then
+ autoneg_str="autoneg off"
+ fi
+
+ ethtool -s $swp1 speed $max_speed lanes $unsupported_lanes $autoneg_str &> /dev/null
+ check_fail $? "Unsuccessful $unsupported_lanes lanes setting was expected"
+}
+
+max_speed_and_lanes_get()
+{
+ local dev=$1; shift
+ local arr=("$@")
+ local max_lanes
+ local max_speed
+ local -a lanes_arr
+ local -a speeds_arr
+ local -a max_values
+
+ for ((i=0; i<${#arr[@]}; i+=2)); do
+ speeds_arr+=("${arr[$i]}")
+ lanes_arr+=("${arr[i+1]}")
+ done
+
+ max_values+=($(get_max "${speeds_arr[@]}"))
+ max_values+=($(get_max "${lanes_arr[@]}"))
+
+ echo ${max_values[@]}
+}
+
+search_linkmode()
+{
+ local speed=$1; shift
+ local lanes=$1; shift
+ local arr=("$@")
+
+ for ((i=0; i<${#arr[@]}; i+=2)); do
+ if [[ $speed -eq ${arr[$i]} && $lanes -eq ${arr[i+1]} ]]; then
+ return 1
+ fi
+ done
+ return 0
+}
+
+autoneg()
+{
+ RET=0
+
+ local lanes
+ local max_speed
+ local max_lanes
+
+ local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1))
+ local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}"))
+ max_speed=${max_values[0]}
+ max_lanes=${max_values[1]}
+
+ lanes=$max_lanes
+
+ while [[ $lanes -ge 1 ]]; do
+ search_linkmode $max_speed $lanes "${linkmodes_params[@]}"
+ if [[ $? -eq 1 ]]; then
+ ethtool_set $swp1 speed $max_speed lanes $lanes
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+ busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
+ check_err $? "ports did not come up"
+
+ check_lanes $swp1 $lanes $max_speed
+ log_test "$lanes lanes is autonegotiated"
+ fi
+ let $((lanes /= 2))
+ done
+
+ check_unsupported_lanes $swp1 $max_speed $max_lanes 1
+ log_test "Lanes number larger than max width is not set"
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ RET=0
+
+ local lanes
+ local max_speed
+ local max_lanes
+
+ local -a linkmodes_params=($(dev_linkmodes_params_get $swp1 1))
+ local -a max_values=($(max_speed_and_lanes_get $swp1 "${linkmodes_params[@]}"))
+ max_speed=${max_values[0]}
+ max_lanes=${max_values[1]}
+
+ lanes=$max_lanes
+
+ while [[ $lanes -ge 1 ]]; do
+ search_linkmode $max_speed $lanes "${linkmodes_params[@]}"
+ if [[ $? -eq 1 ]]; then
+ ethtool_set $swp1 speed $max_speed lanes $lanes autoneg off
+ ethtool_set $swp2 speed $max_speed lanes $lanes autoneg off
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+ busywait "$TIMEOUT" wait_for_port_up ethtool $swp2
+ check_err $? "ports did not come up"
+
+ check_lanes $swp1 $lanes $max_speed
+ log_test "Autoneg off, $lanes lanes detected during force mode"
+ fi
+ let $((lanes /= 2))
+ done
+
+ check_unsupported_lanes $swp1 $max_speed $max_lanes 0
+ log_test "Lanes number larger than max width is not set"
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+}
+
+check_ethtool_lanes_support
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/extack.sh b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
index 7a0a99c1d22f..6fd422d38fe8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/extack.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/extack.sh
@@ -35,7 +35,9 @@ netdev_pre_up_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link add name vx1 up type vxlan id 1000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -46,7 +48,9 @@ netdev_pre_up_test()
ip link set dev $swp1 master br1
check_err $?
- ip link add name br2 up type bridge vlan_filtering 0 mcast_snooping 0
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link add name vx2 up type vxlan id 2000 \
local 192.0.2.17 remote 192.0.2.18 \
dstport 4789 nolearning noudpcsum tos inherit ttl 100
@@ -81,7 +85,9 @@ vxlan_vlan_add_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
# Unsupported configuration: mlxsw demands VXLAN with "noudpcsum".
ip link add name vx1 up type vxlan id 1000 \
@@ -117,7 +123,9 @@ vxlan_bridge_create_test()
dstport 4789 tos inherit ttl 100
# Test with VLAN-aware bridge.
- ip link add name br1 up type bridge vlan_filtering 1 mcast_snooping 0
+ ip link add name br1 type bridge vlan_filtering 1 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev vx1 master br1
@@ -142,8 +150,12 @@ bridge_create_test()
{
RET=0
- ip link add name br1 up type bridge vlan_filtering 1
- ip link add name br2 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
+ ip link add name br2 type bridge vlan_filtering 1
+ ip link set dev br2 addrgenmode none
+ ip link set dev br2 up
ip link set dev $swp1 master br1
check_err $?
diff --git a/tools/testing/selftests/drivers/net/mlxsw/fib.sh b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
index eab79b9e58cd..dcbf32b99bb6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/fib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/fib.sh
@@ -225,6 +225,16 @@ ipv6_local_replace()
ip -n $ns link del dev dummy1
}
+fib_notify_on_flag_change_set()
+{
+ local notify=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+ ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+ log_info "Set fib_notify_on_flag_change to $notify"
+}
+
setup_prepare()
{
ip netns add testns1
@@ -251,6 +261,10 @@ trap cleanup EXIT
setup_prepare
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
new file mode 100755
index 000000000000..941ba4c485c9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/hw_stats_l3.sh
@@ -0,0 +1,31 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ l3_monitor_test
+"
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+ pre_cleanup
+}
+
+l3_monitor_test()
+{
+ hw_stats_monitor_test $swp l3 \
+ "ip addr add dev $swp 192.0.2.1/28" \
+ "ip addr del dev $swp 192.0.2.1/28"
+}
+
+trap cleanup EXIT
+
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
new file mode 100755
index 000000000000..7d7f862c809c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1d.sh
@@ -0,0 +1,263 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mappings and that new mappings use the correct RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW $swp1 + + $swp2 |
+# | | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1.10 + + $swp2.10 | |
+# | | | |
+# | | br0 | |
+# | | 192.0.2.2/28 | |
+# | +--------------------------------------------------------+ |
+# | |
+# | $swp3.10 + |
+# | 192.0.2.17/28 | |
+# | | |
+# | $swp3 + |
+# +---------------|--------------------------------------------+
+# |
+# +---------------|--+
+# | $h3 + |
+# | | |
+# | $h3.10 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_vid_map_rif
+ rif_port_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3
+ vlan_create $h3 10 v$h3 192.0.2.18/28
+
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+
+ vlan_destroy $h3 10
+ simple_if_fini $h3
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 10
+ ip link set dev $swp2.10 master br0
+
+ ip link set dev $swp3 up
+ vlan_create $swp3 10 "" 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3.10 192.0.2.18 lladdr $(mac_get $h3.10)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ vlan_destroy $swp3 10
+ ip link set dev $swp3 down
+
+ ip link set dev $swp2.10 nomaster
+ vlan_destroy $swp2 10
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br0 add 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ __addr_add_del br0 del 192.0.2.2/28
+}
+
+port_vid_map_rif()
+{
+ RET=0
+
+ # First add {port, VID}->FID for $swp1.10, then add a RIF and verify
+ # that packets can be routed via the existing mapping.
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+ bridge_rif_add
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing {port, VID}->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+}
+
+rif_port_vid_map()
+{
+ RET=0
+
+ # First add an address to the bridge, which will create a RIF on top of
+ # it, then add a new {port, VID}->FID mapping and verify that packets
+ # can be routed via the new mapping.
+ bridge_rif_add
+ vlan_create $swp1 10
+ ip link set dev $swp1.10 master br0
+
+ # The hardware matches on the first ethertype which is not VLAN,
+ # so the protocol should be IP.
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add {port, VID}->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ ip link set dev $swp1.10 nomaster
+ vlan_destroy $swp1 10
+ bridge_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
new file mode 100755
index 000000000000..577293bab88b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_1q.sh
@@ -0,0 +1,264 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing over bridge and verify that the order of configuration does not
+# impact switch behavior. Verify that RIF is added correctly for existing
+# mapping and that packets can be routed via port which is added after the FID
+# already has a RIF.
+
+# +-------------------+ +--------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.10 + | | + $h2.10 |
+# | 192.0.2.1/28 | | | | 192.0.2.3/28 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# +----------------|--+ +--|-----------------+
+# | |
+# +----------------|-------------------------|-----------------+
+# | SW | | |
+# | +--------------|-------------------------|---------------+ |
+# | | $swp1 + + $swp2 | |
+# | | | |
+# | | br0 | |
+# | +--------------------------------------------------------+ |
+# | | |
+# | br0.10 |
+# | 192.0.2.2/28 |
+# | |
+# | |
+# | $swp3 + |
+# | 192.0.2.17/28 | |
+# +----------------|-------------------------------------------+
+# |
+# +----------------|--+
+# | $h3 + |
+# | 192.0.2.18/28 |
+# | |
+# | H3 |
+# +-------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vid_map_rif
+ rif_vid_map
+"
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.3/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.18/28
+ ip route add 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+}
+
+h3_destroy()
+{
+ ip route del 192.0.2.0/28 vrf v$h3 nexthop via 192.0.2.17
+ simple_if_fini $h3 192.0.2.18/28
+}
+
+switch_create()
+{
+ ip link set dev $swp1 up
+
+ ip link add dev br0 type bridge vlan_filtering 1 mcast_snooping 0
+
+ # By default, a link-local address is generated when netdevice becomes
+ # up. Adding an address to the bridge will cause creating a RIF for it.
+ # Prevent generating link-local address to be able to control when the
+ # RIF is added.
+ sysctl_set net.ipv6.conf.br0.addr_gen_mode 1
+ ip link set dev br0 up
+
+ ip link set dev $swp2 up
+ ip link set dev $swp2 master br0
+ bridge vlan add vid 10 dev $swp2
+
+ ip link set dev $swp3 up
+ __addr_add_del $swp3 add 192.0.2.17/28
+ tc qdisc add dev $swp3 clsact
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev $swp3 192.0.2.18 lladdr $(mac_get $h3)
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp3 clsact
+ __addr_add_del $swp3 del 192.0.2.17/28
+ ip link set dev $swp3 down
+
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev br0 down
+ sysctl_restore net.ipv6.conf.br0.addr_gen_mode
+ ip link del dev br0
+
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+bridge_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ vlan_create br0 10 "" 192.0.2.2/28
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ sleep 1
+}
+
+bridge_rif_del()
+{
+ vlan_destroy br0 10
+}
+
+vid_map_rif()
+{
+ RET=0
+
+ # First add VID->FID for vlan 10, then add a RIF and verify that
+ # packets can be routed via the existing mapping.
+ bridge vlan add vid 10 dev br0 self
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ bridge_rif_add
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VID->FID mapping"
+
+ tc filter del dev $swp3 egress
+
+ bridge_rif_del
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+ bridge vlan del vid 10 dev br0 self
+}
+
+rif_vid_map()
+{
+ RET=0
+
+ # Using 802.1Q, there is only one VID->FID map for each VID. That means
+ # that we cannot really check adding a new map for existing FID with a
+ # RIF. Verify that packets can be routed via port which is added after
+ # the FID already has a RIF, although in practice there is no new
+ # mapping in the hardware.
+ bridge vlan add vid 10 dev br0 self
+ bridge_rif_add
+
+ ip link set dev $swp1 master br0
+ bridge vlan add vid 10 dev $swp1
+
+ tc filter add dev $swp3 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.18 action pass
+
+ ping_do $h1.10 192.0.2.18
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $swp3 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add port to VID->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp3 egress
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 nomaster
+
+ bridge_rif_del
+ bridge vlan del vid 10 dev br0 self
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
new file mode 100755
index 000000000000..90450216a10d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/ingress_rif_conf_vxlan.sh
@@ -0,0 +1,311 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test routing after VXLAN decapsulation and verify that the order of
+# configuration does not impact switch behavior. Verify that RIF is added
+# correctly for existing mapping and that new mapping uses the correct RIF.
+
+# +---------------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|----------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 br1 | |
+# | | vid 10 pvid untagged | |
+# | | | |
+# | | | |
+# | | + vx4001 | |
+# | | local 192.0.2.17 | |
+# | | remote 192.0.2.18 | |
+# | | id 104001 | |
+# | | dstport $VXPORT | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | +----------------------------------+------------------------------------+ |
+# | | |
+# | +----------------------------------|------------------------------------+ |
+# | | | | |
+# | | +-------------------------------+---------------------------------+ | |
+# | | | | | |
+# | | + vlan10 vlan4001 + | |
+# | | 192.0.2.2/28 | |
+# | | | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 198.51.100.1/24 192.0.2.17/32 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | v$rp2 |
+# | + $rp2 |
+# | 198.51.100.2/24 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ vni_fid_map_rif
+ rif_vni_fid_map
+"
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+: ${VXPORT:=4789}
+export VXPORT
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 198.51.100.1/24
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+
+ tc qdisc add dev $swp1 clsact
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 192.0.2.17 dstport $VXPORT \
+ nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+
+ ip address add 192.0.2.17/32 dev lo
+
+ # Create SVIs.
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+
+ # Replace neighbor to avoid 1 packet which is forwarded in software due
+ # to "unresolved neigh".
+ ip neigh replace dev vlan10 192.0.2.1 lladdr $(mac_get $h1)
+
+ ip address add 192.0.2.2/28 dev vlan10
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+}
+
+switch_destroy()
+{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
+ bridge vlan del vid 4001 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 192.0.2.17/32 dev lo
+
+ tc qdisc del dev $swp1 clsact
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx4001 nomaster
+
+ ip link set dev vx4001 down
+ ip link del dev vx4001
+
+ ip address del dev $rp1 198.51.100.1/24
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 198.51.100.2/24
+
+ ip route add 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+}
+
+vrp2_destroy()
+{
+ ip route del 192.0.2.17/32 vrf v$rp2 nexthop via 198.51.100.1
+
+ simple_if_fini $rp2 198.51.100.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ vrp2_destroy
+
+ switch_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+payload_get()
+{
+ local dest_mac=$(mac_get vlan4001)
+ local src_mac=$(mac_get $rp1)
+
+ p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"01:96:41:"$( : VXLAN VNI : 104001
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$src_mac:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"3f:49:"$( : IP identification
+ )"00:00:"$( : IP flags + frag off
+ )"3f:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"50:21:"$( : IP header csum
+ )"c6:33:64:0a:"$( : IP saddr: 198.51.100.10
+ )"c0:00:02:01:"$( : IP daddr: 192.0.2.1
+ )
+ echo $p
+}
+
+vlan_rif_add()
+{
+ rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ expected_rifs=$((rifs_occ_t0 + 1))
+
+ [[ $expected_rifs -eq $rifs_occ_t1 ]]
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+}
+
+vlan_rif_del()
+{
+ ip link del dev vlan4001
+}
+
+vni_fid_map_rif()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add VNI->FID mapping to the FID of VLAN 4001
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ # Add a RIF to the FID with VNI->FID mapping
+ vlan_rif_add
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add RIF for existing VNI->FID mapping"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+rif_vni_fid_map()
+{
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ # First add a RIF to the FID of VLAN 4001
+ vlan_rif_add
+
+ # Add VNI->FID mapping to FID with a RIF
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower skip_sw dst_ip 192.0.2.1 action pass
+
+ payload=$(payload_get)
+ ip vrf exec v$rp2 $MZ $rp2 -c 10 -d 1msec -b $rp1_mac \
+ -B 192.0.2.17 -A 192.0.2.18 \
+ -t udp sp=12345,dp=$VXPORT,p=$payload -q
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10
+ check_err $? "Packets were not routed in hardware"
+
+ log_test "Add VNI->FID mapping for FID with a RIF"
+
+ tc filter del dev $swp1 egress
+
+ bridge vlan del vid 4001 dev vx4001 pvid untagged
+ vlan_rif_del
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
index 6f3a70df63bc..e5589e2fca85 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh
@@ -120,12 +120,13 @@ __mirror_gre_test()
sleep 5
for ((i = 0; i < count; ++i)); do
+ local sip=$(mirror_gre_ipv6_addr 1 $i)::1
local dip=$(mirror_gre_ipv6_addr 1 $i)::2
local htun=h3-gt6-$i
local message
icmp6_capture_install $htun
- mirror_test v$h1 "" $dip $htun 100 10
+ mirror_test v$h1 $sip $dip $htun 100 10
icmp6_capture_uninstall $htun
done
}
@@ -164,6 +165,7 @@ mirror_gre_setup_prepare()
simple_if_init $h3
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
index cbe50f260a40..6369927e9c37 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/mlxsw_lib.sh
@@ -11,3 +11,67 @@ if [[ ! -v MLXSW_CHIP ]]; then
exit 1
fi
fi
+
+MLXSW_SPECTRUM_REV=$(case $MLXSW_CHIP in
+ mlxsw_spectrum)
+ echo 1 ;;
+ mlxsw_spectrum*)
+ echo ${MLXSW_CHIP#mlxsw_spectrum} ;;
+ *)
+ echo "Couldn't determine Spectrum chip revision." \
+ > /dev/stderr ;;
+ esac)
+
+mlxsw_on_spectrum()
+{
+ local rev=$1; shift
+ local op="=="
+ local rev2=${rev%+}
+
+ if [[ $rev2 != $rev ]]; then
+ op=">="
+ fi
+
+ ((MLXSW_SPECTRUM_REV $op rev2))
+}
+
+__mlxsw_only_on_spectrum()
+{
+ local rev=$1; shift
+ local caller=$1; shift
+ local src=$1; shift
+
+ if ! mlxsw_on_spectrum "$rev"; then
+ log_test_skip $src:$caller "(Spectrum-$rev only)"
+ return 1
+ fi
+}
+
+mlxsw_only_on_spectrum()
+{
+ local caller=${FUNCNAME[1]}
+ local src=${BASH_SOURCE[1]}
+ local rev
+
+ for rev in "$@"; do
+ if __mlxsw_only_on_spectrum "$rev" "$caller" "$src"; then
+ return 0
+ fi
+ done
+
+ return 1
+}
+
+mlxsw_max_descriptors_get()
+{
+ local spectrum_rev=$MLXSW_SPECTRUM_REV
+
+ case $spectrum_rev in
+ 1) echo 81920 ;;
+ 2) echo 136960 ;;
+ 3) echo 204800 ;;
+ 4) echo 220000 ;;
+ *) echo "Unknown max descriptors for chip revision." > /dev/stderr
+ return 1 ;;
+ esac
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
index f02d83e94576..fca0e1e642c6 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/one_armed_router.sh
@@ -83,7 +83,8 @@ h2_destroy()
switch_create()
{
- ip link add name br0 type bridge mcast_snooping 0
+ ip link add name br0 address $(mac_get $swp1) \
+ type bridge mcast_snooping 0
ip link set dev br0 up
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
new file mode 100755
index 000000000000..fe0343b95e6c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/pci_reset.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that PCI reset works correctly by verifying that only the expected reset
+# methods are supported and that after issuing the reset the ifindex of the
+# port changes.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ pci_reset_test
+"
+NUM_NETIFS=1
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+pci_reset_test()
+{
+ RET=0
+
+ local bus=$(echo $DEVLINK_DEV | cut -d '/' -f 1)
+ local bdf=$(echo $DEVLINK_DEV | cut -d '/' -f 2)
+
+ if [ $bus != "pci" ]; then
+ check_err 1 "devlink device is not a PCI device"
+ log_test "pci reset"
+ return
+ fi
+
+ if [ ! -f /sys/bus/pci/devices/$bdf/reset_method ]; then
+ check_err 1 "reset is not supported"
+ log_test "pci reset"
+ return
+ fi
+
+ [[ $(cat /sys/bus/pci/devices/$bdf/reset_method) == "bus" ]]
+ check_err $? "only \"bus\" reset method should be supported"
+
+ local ifindex_pre=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ echo 1 > /sys/bus/pci/devices/$bdf/reset
+ check_err $? "reset failed"
+
+ # Wait for udev to rename newly created netdev.
+ udevadm settle
+
+ local ifindex_post=$(ip -j link show dev $swp1 | jq '.[]["ifindex"]')
+
+ [[ $ifindex_pre != $ifindex_post ]]
+ check_err $? "reset not performed"
+
+ log_test "pci reset"
+}
+
+swp1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
new file mode 100755
index 000000000000..b1f0781f6b25
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_occ.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that filters that match on the same port range, but with different
+# combination of IPv4/IPv6 and TCP/UDP all use the same port range register by
+# observing port range registers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ port_range_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+port_range_occ_get()
+{
+ devlink_resource_occ_get port_range_registers
+}
+
+port_range_occ_test()
+{
+ RET=0
+
+ local occ=$(port_range_occ_get)
+
+ # Two port range registers are used, for source and destination port
+ # ranges.
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter add dev $swp1 ingress pref 1 handle 102 proto ip \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 103 proto ipv6 \
+ flower skip_sw ip_proto udp src_port 1-100 dst_port 1-100 \
+ action pass
+ tc filter add dev $swp1 ingress pref 2 handle 104 proto ipv6 \
+ flower skip_sw ip_proto tcp src_port 1-100 dst_port 1-100 \
+ action pass
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 104 flower
+ tc filter del dev $swp1 ingress pref 2 handle 103 flower
+ tc filter del dev $swp1 ingress pref 1 handle 102 flower
+ (( occ + 2 == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $((occ + 2))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(port_range_occ_get) ))
+ check_err $? "Got occupancy $(port_range_occ_get), expected $occ"
+
+ log_test "port range occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
new file mode 100644
index 000000000000..2a70840ff14b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_range_scale.sh
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-2.0
+
+PORT_RANGE_NUM_NETIFS=2
+
+port_range_h1_create()
+{
+ simple_if_init $h1
+}
+
+port_range_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+port_range_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+port_range_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+port_range_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $batch_file <<-EOF
+ filter add dev $swp1 ingress \
+ prot ipv4 \
+ pref 1000 \
+ flower skip_sw \
+ ip_proto udp dst_port 1-$((100 + i)) \
+ action pass
+ EOF
+ done
+
+ tc -b $batch_file
+ check_err_fail $should_fail $? "Rule insertion"
+
+ rm -f $batch_file
+}
+
+__port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ port_range_rules_create $count $should_fail
+
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
+ ((offload_count == count))
+ check_err_fail $should_fail $? "port range offload count"
+}
+
+port_range_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $PORT_RANGE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __port_range_test $count $should_fail
+}
+
+port_range_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ port_range_h1_create
+ port_range_switch_create
+}
+
+port_range_cleanup()
+{
+ pre_cleanup
+
+ port_range_switch_destroy
+ port_range_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
new file mode 100644
index 000000000000..1e9a4aff76a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/port_scale.sh
@@ -0,0 +1,62 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for physical ports resource. The test splits each splittable port
+# to its width and checks that eventually the number of physical ports equals
+# the maximum number of physical ports.
+
+PORT_NUM_NETIFS=0
+
+declare -a unsplit
+
+port_setup_prepare()
+{
+ :
+}
+
+port_cleanup()
+{
+ pre_cleanup
+
+ for port in "${unsplit[@]}"; do
+ devlink port unsplit $port
+ check_err $? "Did not unsplit $netdev"
+ done
+ unsplit=()
+}
+
+split_all_ports()
+{
+ local should_fail=$1; shift
+
+ # Loop over the splittable netdevs and create tuples of netdev along
+ # with its width. For example:
+ # '$netdev1 $count1 $netdev2 $count2...', when:
+ # $netdev1-2 are splittable netdevs in the device, and
+ # $count1-2 are the netdevs width respectively.
+ while read netdev count <<<$(
+ devlink -j port show |
+ jq -r '.[][] | select(.splittable==true) | "\(.netdev) \(.lanes)"'
+ )
+ [[ ! -z $netdev ]]
+ do
+ devlink port split $netdev count $count
+ check_err $? "Did not split $netdev into $count"
+ unsplit+=( "${netdev}s0" )
+ done
+}
+
+port_test()
+{
+ local max_ports=$1; shift
+ local should_fail=$1; shift
+
+ split_all_ports $should_fail
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="physical_ports") |.["occ"]')
+
+ [[ $occ -eq $max_ports ]]
+ check_err_fail $should_fail $? "Attempt to create $max_ports ports (actual result $occ)"
+
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
new file mode 100755
index 000000000000..00d55b0e98c1
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/q_in_q_veto.sh
@@ -0,0 +1,304 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ create_8021ad_vlan_upper_on_top_front_panel_port
+ create_8021ad_vlan_upper_on_top_bridge_port
+ create_8021ad_vlan_upper_on_top_lag
+ create_8021ad_vlan_upper_on_top_bridge
+ create_8021ad_vlan_upper_on_top_8021ad_bridge
+ create_vlan_upper_on_top_8021ad_bridge
+ create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge
+ create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge
+ enslave_front_panel_with_vlan_upper_to_8021ad_bridge
+ enslave_lag_with_vlan_upper_to_8021ad_bridge
+ add_ip_address_to_8021ad_bridge
+ switch_bridge_protocol_from_8021q_to_8021ad
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ sleep 10
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+create_vlan_upper_on_top_of_bridge()
+{
+ RET=0
+
+ local bridge_proto=$1; shift
+ local netdev_proto=$1; shift
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol $bridge_proto vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+
+ ip link set dev br0 up
+ ip link set dev $swp1 master br0
+
+ ip link add name br0.100 link br0 type vlan \
+ protocol $netdev_proto id 100 2>/dev/null
+ check_fail $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge not rejected"
+
+ ip link add name br0.100 link br0 type vlan \
+ protocol $netdev_proto id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "$netdev_proto vlan upper creation on top of an $bridge_proto bridge rejected without extack"
+
+ log_test "create $netdev_proto vlan upper on top $bridge_proto bridge"
+
+ ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_front_panel_port()
+{
+ RET=0
+
+ ip link add name $swp1.100 link $swp1 type vlan \
+ protocol 802.1ad id 100 2>/dev/null
+ check_fail $? "802.1ad vlan upper creation on top of a front panel not rejected"
+
+ ip link add name $swp1.100 link $swp1 type vlan \
+ protocol 802.1ad id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "802.1ad vlan upper creation on top of a front panel rejected without extack"
+
+ log_test "create 802.1ad vlan upper on top of a front panel"
+}
+
+create_8021ad_vlan_upper_on_top_bridge_port()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+
+ ip link set dev $swp1 master br0
+ ip link set dev br0 up
+
+ ip link add name $swp1.100 link $swp1 type vlan \
+ protocol 802.1ad id 100 2>/dev/null
+ check_fail $? "802.1ad vlan upper creation on top of a bridge port not rejected"
+
+ ip link add name $swp1.100 link $swp1 type vlan \
+ protocol 802.1ad id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "802.1ad vlan upper creation on top of a bridge port rejected without extack"
+
+ log_test "create 802.1ad vlan upper on top of a bridge port"
+
+ ip link del dev br0
+}
+
+create_8021ad_vlan_upper_on_top_lag()
+{
+ RET=0
+
+ ip link add name bond1 type bond mode 802.3ad
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master bond1
+
+ ip link add name bond1.100 link bond1 type vlan \
+ protocol 802.1ad id 100 2>/dev/null
+ check_fail $? "802.1ad vlan upper creation on top of a lag not rejected"
+
+ ip link add name bond1.100 link bond1 type vlan \
+ protocol 802.1ad id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "802.1ad vlan upper creation on top of a lag rejected without extack"
+
+ log_test "create 802.1ad vlan upper on top of a lag"
+
+ ip link del dev bond1
+}
+
+create_8021ad_vlan_upper_on_top_bridge()
+{
+ RET=0
+
+ create_vlan_upper_on_top_of_bridge "802.1q" "802.1ad"
+}
+
+create_8021ad_vlan_upper_on_top_8021ad_bridge()
+{
+ RET=0
+
+ create_vlan_upper_on_top_of_bridge "802.1ad" "802.1ad"
+}
+
+create_vlan_upper_on_top_8021ad_bridge()
+{
+ RET=0
+
+ create_vlan_upper_on_top_of_bridge "802.1ad" "802.1q"
+}
+
+create_vlan_upper_on_top_front_panel_enslaved_to_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+
+ ip link add name $swp1.100 link $swp1 type vlan id 100 2>/dev/null
+ check_fail $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge not rejected"
+
+ ip link add name $swp1.100 link $swp1 type vlan id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "vlan upper creation on top of front panel enslaved to 802.1ad bridge rejected without extack"
+
+ log_test "create vlan upper on top of front panel enslaved to 802.1ad bridge"
+
+ ip link del dev br0
+}
+
+create_vlan_upper_on_top_lag_enslaved_to_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+
+ ip link add name bond1 type bond mode 802.3ad
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master bond1
+ ip link set dev bond1 master br0
+
+ ip link add name bond1.100 link bond1 type vlan id 100 2>/dev/null
+ check_fail $? "vlan upper creation on top of lag enslaved to 802.1ad bridge not rejected"
+
+ ip link add name bond1.100 link bond1 type vlan id 100 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "vlan upper creation on top of lag enslaved to 802.1ad bridge rejected without extack"
+
+ log_test "create vlan upper on top of lag enslaved to 802.1ad bridge"
+
+ ip link del dev bond1
+ ip link del dev br0
+}
+
+enslave_front_panel_with_vlan_upper_to_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+
+ ip link add name $swp1.100 link $swp1 type vlan id 100
+
+ ip link set dev $swp1 master br0 2>/dev/null
+ check_fail $? "front panel with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+ ip link set dev $swp1 master br0 2>&1 >/dev/null | grep -q mlxsw_spectrum
+ check_err $? "front panel with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+ log_test "enslave front panel with vlan upper to 802.1ad bridge"
+
+ ip link del dev $swp1.100
+ ip link del dev br0
+}
+
+enslave_lag_with_vlan_upper_to_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+
+ ip link add name bond1 type bond mode 802.3ad
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master bond1
+ ip link add name bond1.100 link bond1 type vlan id 100
+
+ ip link set dev bond1 master br0 2>/dev/null
+ check_fail $? "lag with vlan upper enslavemnt to 802.1ad bridge not rejected"
+
+ ip link set dev bond1 master br0 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "lag with vlan upper enslavemnt to 802.1ad bridge rejected without extack"
+
+ log_test "enslave lag with vlan upper to 802.1ad bridge"
+
+ ip link del dev bond1
+ ip link del dev br0
+}
+
+
+add_ip_address_to_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+
+ ip link set dev br0 up
+ ip link set dev $swp1 master br0
+
+ ip addr add dev br0 192.0.2.17/28 2>/dev/null
+ check_fail $? "IP address addition to 802.1ad bridge not rejected"
+
+ ip addr add dev br0 192.0.2.17/28 2>&1 >/dev/null | grep -q mlxsw_spectrum
+ check_err $? "IP address addition to 802.1ad bridge rejected without extack"
+
+ log_test "IP address addition to 802.1ad bridge"
+
+ ip link del dev br0
+}
+
+switch_bridge_protocol_from_8021q_to_8021ad()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+
+ ip link set dev br0 up
+ ip link set dev $swp1 master br0
+
+ ip link set dev br0 type bridge vlan_protocol 802.1q 2>/dev/null
+ check_fail $? "switching bridge protocol from 802.1q to 802.1ad not rejected"
+
+ log_test "switch bridge protocol"
+
+ ip link del dev br0
+}
+
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
index 71066bc4b886..5492fa5550d7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_defprio.sh
@@ -5,18 +5,18 @@
# prioritized according to the default priority specified at the port.
# rx_octets_prio_* counters are used to verify the prioritization.
#
-# +-----------------------+
-# | H1 |
-# | + $h1 |
-# | | 192.0.2.1/28 |
-# +----|------------------+
+# +----------------------------------+
+# | H1 |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|-----------------------------+
# |
-# +----|------------------+
-# | SW | |
-# | + $swp1 |
-# | 192.0.2.2/28 |
-# | APP=<prio>,1,0 |
-# +-----------------------+
+# +----|-----------------------------+
+# | SW | |
+# | + $swp1 |
+# | 192.0.2.2/28 |
+# | dcb app default-prio <prio> |
+# +----------------------------------+
ALL_TESTS="
ping_ipv4
@@ -29,42 +29,6 @@ NUM_NETIFS=2
: ${HIT_TIMEOUT:=1000} # ms
source $lib_dir/lib.sh
-declare -a APP
-
-defprio_install()
-{
- local dev=$1; shift
- local prio=$1; shift
- local app="app=$prio,1,0"
-
- lldptool -T -i $dev -V APP $app >/dev/null
- lldpad_app_wait_set $dev
- APP[$prio]=$app
-}
-
-defprio_uninstall()
-{
- local dev=$1; shift
- local prio=$1; shift
- local app=${APP[$prio]}
-
- lldptool -T -i $dev -V APP -d $app >/dev/null
- lldpad_app_wait_del
- unset APP[$prio]
-}
-
-defprio_flush()
-{
- local dev=$1; shift
- local prio
-
- if ((${#APP[@]})); then
- lldptool -T -i $dev -V APP -d ${APP[@]} >/dev/null
- fi
- lldpad_app_wait_del
- APP=()
-}
-
h1_create()
{
simple_if_init $h1 192.0.2.1/28
@@ -83,7 +47,7 @@ switch_create()
switch_destroy()
{
- defprio_flush $swp1
+ dcb app flush dev $swp1 default-prio
ip addr del dev $swp1 192.0.2.2/28
ip link set dev $swp1 down
}
@@ -124,7 +88,7 @@ __test_defprio()
RET=0
- defprio_install $swp1 $prio_install
+ dcb app add dev $swp1 default-prio $prio_install
local t0=$(ethtool_stats_get $swp1 rx_frames_prio_$prio_observe)
mausezahn -q $h1 -d 100m -c 10 -t arp reply
@@ -134,7 +98,7 @@ __test_defprio()
check_err $? "Default priority $prio_install/$prio_observe: Expected to capture 10 packets, got $((t1 - t0))."
log_test "Default priority $prio_install/$prio_observe"
- defprio_uninstall $swp1 $prio_install
+ dcb app del dev $swp1 default-prio $prio_install
}
test_defprio()
@@ -145,7 +109,7 @@ test_defprio()
__test_defprio $prio $prio
done
- defprio_install $swp1 3
+ dcb app add dev $swp1 default-prio 3
__test_defprio 0 3
__test_defprio 1 3
__test_defprio 2 3
@@ -153,7 +117,7 @@ test_defprio()
__test_defprio 5 5
__test_defprio 6 6
__test_defprio 7 7
- defprio_uninstall $swp1 3
+ dcb app del dev $swp1 default-prio 3
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
index 5cbff8038f84..914c63d6318a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_bridge.sh
@@ -20,7 +20,7 @@
# | SW | | |
# | +-|----------------------------------------------------------------|-+ |
# | | + $swp1 BR $swp2 + | |
-# | | APP=0,5,10 .. 7,5,17 APP=0,5,20 .. 7,5,27 | |
+# | | dcb dscp-prio 10:0...17:7 dcb dscp-prio 20:0...27:7 | |
# | +--------------------------------------------------------------------+ |
# +---------------------------------------------------------------------------+
@@ -62,38 +62,28 @@ h2_destroy()
simple_if_fini $h2 192.0.2.2/28
}
-dscp_map()
-{
- local base=$1; shift
- local prio
-
- for prio in {0..7}; do
- echo app=$prio,5,$((base + prio))
- done
-}
-
switch_create()
{
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
ip link set dev $swp2 up
- lldptool -T -i $swp1 -V APP $(dscp_map 10) >/dev/null
- lldptool -T -i $swp2 -V APP $(dscp_map 20) >/dev/null
- lldpad_app_wait_set $swp1
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+ dcb app add dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
}
switch_destroy()
{
- lldptool -T -i $swp2 -V APP -d $(dscp_map 20) >/dev/null
- lldptool -T -i $swp1 -V APP -d $(dscp_map 10) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 20:0 21:1 22:2 23:3 24:4 25:5 26:6 27:7
+ dcb app del dev $swp1 dscp-prio 10:0 11:1 12:2 13:3 14:4 15:5 16:6 17:7
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
index 4cb2aa65278a..f6c23f84423e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_dscp_router.sh
@@ -94,16 +94,6 @@ h2_destroy()
simple_if_fini $h2 192.0.2.18/28
}
-dscp_map()
-{
- local base=$1; shift
- local prio
-
- for prio in {0..7}; do
- echo app=$prio,5,$((base + prio))
- done
-}
-
switch_create()
{
simple_if_init $swp1 192.0.2.2/28
@@ -112,17 +102,14 @@ switch_create()
tc qdisc add dev $swp1 clsact
tc qdisc add dev $swp2 clsact
- lldptool -T -i $swp1 -V APP $(dscp_map 0) >/dev/null
- lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
- lldpad_app_wait_set $swp1
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+ dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
}
switch_destroy()
{
- lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
- lldptool -T -i $swp1 -V APP -d $(dscp_map 0) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+ dcb app del dev $swp1 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
@@ -265,13 +252,11 @@ test_dscp_leftover()
{
echo "Test that last removed DSCP rule is deconfigured correctly"
- lldptool -T -i $swp2 -V APP -d $(dscp_map 0) >/dev/null
- lldpad_app_wait_del
+ dcb app del dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
__test_update 0 zero
- lldptool -T -i $swp2 -V APP $(dscp_map 0) >/dev/null
- lldpad_app_wait_set $swp2
+ dcb app add dev $swp2 dscp-prio 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
index 6d1790b5de7a..fee74f215cec 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_ets_strict.sh
@@ -130,34 +130,48 @@ switch_create()
ip link set dev $swp3 up
mtu_set $swp3 10000
- ethtool -s $swp3 speed 1000 autoneg off
+ tc qdisc replace dev $swp3 root handle 101: tbf rate 1gbit \
+ burst 128K limit 1G
vlan_create $swp1 111
vlan_create $swp2 222
vlan_create $swp3 111
vlan_create $swp3 222
- ip link add name br111 up type bridge vlan_filtering 0
+ ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 addrgenmode none
+ ip link set dev br111 up
ip link set dev $swp1.111 master br111
ip link set dev $swp3.111 master br111
- ip link add name br222 up type bridge vlan_filtering 0
+ ip link add name br222 type bridge vlan_filtering 0
+ ip link set dev br222 addrgenmode none
+ ip link set dev br222 up
ip link set dev $swp2.222 master br222
ip link set dev $swp3.222 master br222
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_pool_size_thtype_save 0
devlink_pool_size_thtype_set 0 dynamic 10000000
+ devlink_pool_size_thtype_save 4
devlink_pool_size_thtype_set 4 dynamic 10000000
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 6
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
devlink_tc_bind_pool_th_set $swp1 1 ingress 0 6
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 6
+ devlink_tc_bind_pool_th_save $swp2 2 ingress
devlink_tc_bind_pool_th_set $swp2 2 ingress 0 6
+ devlink_tc_bind_pool_th_save $swp3 1 egress
devlink_tc_bind_pool_th_set $swp3 1 egress 4 7
+ devlink_tc_bind_pool_th_save $swp3 2 egress
devlink_tc_bind_pool_th_set $swp3 2 egress 4 7
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 7
}
@@ -184,7 +198,7 @@ switch_destroy()
vlan_destroy $swp2 222
vlan_destroy $swp1 111
- ethtool -s $swp3 autoneg on
+ tc qdisc del dev $swp3 root handle 101:
mtu_restore $swp3
ip link set dev $swp3 down
lldptool -T -i $swp3 -V ETS-CFG up2tc=0:0,1:0,2:0,3:0,4:0,5:0,6:0,7:0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
new file mode 100755
index 000000000000..88162b4027c0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_headroom.sh
@@ -0,0 +1,379 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ test_defaults
+ test_dcb_ets
+ test_mtu
+ test_pfc
+ test_int_buf
+ test_tc_priomap
+ test_tc_mtu
+ test_tc_sizes
+ test_tc_int_buf
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+swp=$NETIF_NO_CABLE
+
+cleanup()
+{
+ pre_cleanup
+}
+
+get_prio_pg()
+{
+ # Produces a string of numbers "<B0> <B1> ... <B7> ", where BX is number
+ # of buffer that priority X is mapped to.
+ dcb -j buffer show dev $swp |
+ jq -r '[.prio_buffer | .[] | tostring + " "] | add'
+}
+
+get_prio_pfc()
+{
+ # Produces a string of numbers "<P0> <P1> ... <P7> ", where PX denotes
+ # whether priority X has PFC enabled (the value is 1) or disabled (0).
+ dcb -j pfc show dev $swp |
+ jq -r '[.prio_pfc | .[] | if . then "1 " else "0 " end] | add'
+}
+
+get_prio_tc()
+{
+ # Produces a string of numbers "<T0> <T1> ... <T7> ", where TC is number
+ # of TC that priority X is mapped to.
+ dcb -j ets show dev $swp |
+ jq -r '[.prio_tc | .[] | tostring + " "] | add'
+}
+
+get_buf_size()
+{
+ local idx=$1; shift
+
+ dcb -j buffer show dev $swp | jq ".buffer_size[$idx]"
+}
+
+get_tot_size()
+{
+ dcb -j buffer show dev $swp | jq '.total_size'
+}
+
+check_prio_pg()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pg)
+ test "$current" = "$expect"
+ check_err $? "prio2buffer is '$current', expected '$expect'"
+}
+
+check_prio_pfc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_pfc)
+ test "$current" = "$expect"
+ check_err $? "prio PFC is '$current', expected '$expect'"
+}
+
+check_prio_tc()
+{
+ local expect=$1; shift
+
+ local current=$(get_prio_tc)
+ test "$current" = "$expect"
+ check_err $? "prio_tc is '$current', expected '$expect'"
+}
+
+__check_buf_size()
+{
+ local idx=$1; shift
+ local expr=$1; shift
+ local what=$1; shift
+
+ local current=$(get_buf_size $idx)
+ ((current $expr))
+ check_err $? "${what}buffer $idx size is '$current', expected '$expr'"
+ echo $current
+}
+
+check_buf_size()
+{
+ __check_buf_size "$@" > /dev/null
+}
+
+test_defaults()
+{
+ RET=0
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ log_test "Default headroom configuration"
+}
+
+test_dcb_ets()
+{
+ RET=0
+
+ dcb ets set dev $swp prio-tc 0:0 1:2 2:4 3:6 4:1 5:3 6:5 7:7
+
+ check_prio_pg "0 2 4 6 1 3 5 7 "
+ check_prio_tc "0 2 4 6 1 3 5 7 "
+ check_prio_pfc "0 0 0 0 0 0 0 0 "
+
+ dcb ets set dev $swp prio-tc all:0
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6 2>/dev/null
+ check_fail $? "prio2buffer accepted in DCB mode"
+
+ log_test "Configuring headroom through ETS"
+}
+
+test_mtu()
+{
+ local what=$1; shift
+ local buf0size_2
+ local buf0size
+
+ RET=0
+ buf0size=$(__check_buf_size 0 "> 0")
+
+ mtu_set $swp 3000
+ buf0size_2=$(__check_buf_size 0 "> $buf0size" "MTU 3000: ")
+ mtu_restore $swp
+
+ mtu_set $swp 6000
+ check_buf_size 0 "> $buf0size_2" "MTU 6000: "
+ mtu_restore $swp
+
+ check_buf_size 0 "== $buf0size"
+
+ log_test "${what}MTU impacts buffer size"
+}
+
+test_tc_mtu()
+{
+ # In TC mode, MTU still impacts the threshold below which a buffer is
+ # not permitted to go.
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_mtu "TC: "
+ tc qdisc delete dev $swp root
+}
+
+test_pfc()
+{
+ RET=0
+
+ dcb ets set dev $swp prio-tc all:0 5:1 6:2 7:3
+
+ local buf0size=$(get_buf_size 0)
+ local buf1size=$(get_buf_size 1)
+ local buf2size=$(get_buf_size 2)
+ local buf3size=$(get_buf_size 3)
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "> 0"
+ check_buf_size 2 "> 0"
+ check_buf_size 3 "> 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "Buffer size sans PFC"
+
+ RET=0
+
+ dcb pfc set dev $swp prio-pfc all:off 5:on 6:on 7:on delay 0
+
+ check_prio_pg "0 0 0 0 0 1 2 3 "
+ check_prio_pfc "0 0 0 0 0 1 1 1 "
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf2size"
+ check_buf_size 3 "> $buf3size"
+
+ local buf1size=$(get_buf_size 1)
+ check_buf_size 2 "== $buf1size"
+ check_buf_size 3 "== $buf1size"
+
+ log_test "PFC: Cable length 0"
+
+ RET=0
+
+ dcb pfc set dev $swp delay 1000
+
+ check_buf_size 0 "== $buf0size"
+ check_buf_size 1 "> $buf1size"
+ check_buf_size 2 "> $buf1size"
+ check_buf_size 3 "> $buf1size"
+
+ log_test "PFC: Cable length 1000"
+
+ RET=0
+
+ dcb pfc set dev $swp prio-pfc all:off delay 0
+ dcb ets set dev $swp prio-tc all:0
+
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+ check_prio_tc "0 0 0 0 0 0 0 0 "
+ check_buf_size 0 "> 0"
+ check_buf_size 1 "== 0"
+ check_buf_size 2 "== 0"
+ check_buf_size 3 "== 0"
+ check_buf_size 4 "== 0"
+ check_buf_size 5 "== 0"
+ check_buf_size 6 "== 0"
+ check_buf_size 7 "== 0"
+
+ log_test "PFC: Restore defaults"
+}
+
+test_tc_priomap()
+{
+ RET=0
+
+ dcb ets set dev $swp prio-tc 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ check_prio_pg "0 0 0 0 0 0 0 0 "
+
+ dcb buffer set dev $swp prio-buffer 0:1 1:3 2:5 3:7 4:0 5:2 6:4 7:6
+ check_prio_pg "1 3 5 7 0 2 4 6 "
+
+ tc qdisc delete dev $swp root
+ check_prio_pg "0 1 2 3 4 5 6 7 "
+
+ # Clean up.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ dcb buffer set dev $swp prio-buffer all:0
+ tc qdisc delete dev $swp root
+ dcb ets set dev $swp prio-tc all:0
+
+ log_test "TC: priomap"
+}
+
+test_tc_sizes()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ RET=0
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
+ check_fail $? "buffer_size should fail before qdisc is added"
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size
+ check_err $? "buffer_size should pass after qdisc is added"
+ check_buf_size 0 "== $size" "set size: "
+
+ mtu_set $swp 6000
+ check_buf_size 0 "== $size" "set MTU: "
+ mtu_restore $swp
+
+ dcb buffer set dev $swp buffer-size all:0
+
+ # After replacing the qdisc for the same kind, buffer_size still has to
+ # work.
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1M
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size
+ check_buf_size 0 "== $size" "post replace, set size: "
+
+ dcb buffer set dev $swp buffer-size all:0
+
+ # Likewise after replacing for a different kind.
+ tc qdisc replace dev $swp root handle 2: prio bands 8
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size
+ check_buf_size 0 "== $size" "post replace different kind, set size: "
+
+ tc qdisc delete dev $swp root
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size 2>/dev/null
+ check_fail $? "buffer_size should fail after qdisc is deleted"
+
+ log_test "TC: buffer size"
+}
+
+test_int_buf()
+{
+ local what=$1; shift
+
+ RET=0
+
+ local buf0size=$(get_buf_size 0)
+ local tot_size=$(get_tot_size)
+
+ # Size of internal buffer and buffer 9.
+ local dsize=$((tot_size - buf0size))
+
+ tc qdisc add dev $swp clsact
+ tc filter add dev $swp egress matchall skip_sw action mirred egress mirror dev $swp
+
+ local buf0size_2=$(get_buf_size 0)
+ local tot_size_2=$(get_tot_size)
+ local dsize_2=$((tot_size_2 - buf0size_2))
+
+ # Egress SPAN should have added to the "invisible" buffer configuration.
+ ((dsize_2 > dsize))
+ check_err $? "Invisible buffers account for '$dsize_2', expected '> $dsize'"
+
+ mtu_set $swp 3000
+
+ local buf0size_3=$(get_buf_size 0)
+ local tot_size_3=$(get_tot_size)
+ local dsize_3=$((tot_size_3 - buf0size_3))
+
+ # MTU change might change buffer 0, which will show at total, but the
+ # hidden buffers should stay the same size.
+ ((dsize_3 == dsize_2))
+ check_err $? "MTU change: Invisible buffers account for '$dsize_3', expected '== $dsize_2'"
+
+ mtu_restore $swp
+ tc qdisc del dev $swp clsact
+
+ # After SPAN removal, hidden buffers should be back to the original sizes.
+ local buf0size_4=$(get_buf_size 0)
+ local tot_size_4=$(get_tot_size)
+ local dsize_4=$((tot_size_4 - buf0size_4))
+ ((dsize_4 == dsize))
+ check_err $? "SPAN removed: Invisible buffers account for '$dsize_4', expected '== $dsize'"
+
+ log_test "${what}internal buffer size"
+}
+
+test_tc_int_buf()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local size=$((cell_size * 1000))
+
+ tc qdisc replace dev $swp root handle 1: bfifo limit 1.5M
+ test_int_buf "TC: "
+
+ dcb buffer set dev $swp buffer-size all:0 0:$size
+ test_int_buf "TC+buffsize: "
+
+ dcb buffer set dev $swp buffer-size all:0
+ tc qdisc delete dev $swp root
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
index faa51012cdac..5ad092b9bf10 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_lib.sh
@@ -54,31 +54,3 @@ measure_rate()
echo $ir $er
return $ret
}
-
-bail_on_lldpad()
-{
- if systemctl is-active --quiet lldpad; then
-
- cat >/dev/stderr <<-EOF
- WARNING: lldpad is running
-
- lldpad will likely configure DCB, and this test will
- configure Qdiscs. mlxsw does not support both at the
- same time, one of them is arbitrarily going to overwrite
- the other. That will cause spurious failures (or,
- unlikely, passes) of this test.
- EOF
-
- if [[ -z $ALLOW_LLDPAD ]]; then
- cat >/dev/stderr <<-EOF
-
- If you want to run the test anyway, please set
- an environment variable ALLOW_LLDPAD to a
- non-empty string.
- EOF
- exit 1
- else
- return
- fi
- fi
-}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
new file mode 100755
index 000000000000..5ac4f795e333
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_max_descriptors.sh
@@ -0,0 +1,282 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test sends many small packets (size is less than cell size) through the
+# switch. A shaper is used in $swp2, so the traffic is limited there. Packets
+# are queued till they will be sent.
+#
+# The idea is to verify that the switch can handle at least 85% of maximum
+# supported descrpitors by hardware. Then, we verify that the driver configures
+# firmware to allow infinite size of egress descriptor pool, and does not use a
+# lower limitation. Increase the size of the relevant pools such that the pool's
+# size does not limit the traffic.
+
+# +-----------------------+
+# | H1 |
+# | + $h1.111 |
+# | | 192.0.2.33/28 |
+# | | |
+# | + $h1 |
+# +---|-------------------+
+# |
+# +---|-----------------------------+
+# | + $swp1 |
+# | | iPOOL1 |
+# | | |
+# | +-|------------------------+ |
+# | | + $swp1.111 | |
+# | | | |
+# | | BR1 | |
+# | | | |
+# | | + $swp2.111 | |
+# | +-|------------------------+ |
+# | | |
+# | + $swp2 |
+# | | ePOOL6 |
+# | | 1mbit |
+# +---+-----------------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+
+ALL_TESTS="
+ ping_ipv4
+ max_descriptors
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+MAX_POOL_SIZE=$(devlink_pool_size_get)
+SHAPER_RATE=1mbit
+
+# The current TBF qdisc interface does not allow us to configure the shaper to
+# flat zero. The ASIC shaper is guaranteed to work with a granularity of
+# 200Mbps. On Spectrum-2, writing a value close to zero instead of zero works
+# well, but the performance on Spectrum-1 is unpredictable. Thus, do not run the
+# test on Spectrum-1.
+mlxsw_only_on_spectrum 2+ || exit
+
+h1_create()
+{
+ simple_if_init $h1
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+ ip link set dev $h1.111 type vlan egress-qos-map 0:1
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 111
+
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ # pools
+ # -----
+
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_save 6
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_save $swp2 6
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+
+ devlink_pool_size_thtype_set 1 dynamic $MAX_POOL_SIZE
+ devlink_pool_size_thtype_set 6 static $MAX_POOL_SIZE
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ vlan_create $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp1 1 16
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 16
+
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ vlan_create $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp2 6 $MAX_POOL_SIZE
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $MAX_POOL_SIZE
+
+ tc qdisc replace dev $swp2 root handle 1: tbf rate $SHAPER_RATE \
+ burst 128K limit 500M
+ tc qdisc replace dev $swp2 parent 1:1 handle 11: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # bridge
+ # ------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev $swp1.111 master br1
+ ip link set dev br1 up
+
+ ip link set dev $swp2.111 master br1
+}
+
+switch_destroy()
+{
+ # Do this first so that we can reset the limits to values that are only
+ # valid for the original static / dynamic setting.
+ devlink_pool_size_thtype_restore 6
+ devlink_pool_size_thtype_restore 1
+
+ # bridge
+ # ------
+
+ ip link set dev $swp2.111 nomaster
+
+ ip link set dev br1 down
+ ip link set dev $swp1.111 nomaster
+ ip link del dev br1
+
+ # $swp2
+ # -----
+
+ tc qdisc del dev $swp2 parent 1:1 handle 11:
+ tc qdisc del dev $swp2 root
+
+ devlink_tc_bind_pool_th_restore $swp2 1 egress
+ devlink_port_pool_th_restore $swp2 6
+
+ vlan_destroy $swp2 111
+ ip link set dev $swp2 down
+
+ # $swp1
+ # -----
+
+ dcb buffer set dev $swp1 prio-buffer all:0
+ tc qdisc del dev $swp1 root
+
+ devlink_tc_bind_pool_th_restore $swp1 1 ingress
+ devlink_port_pool_th_restore $swp1 1
+
+ vlan_destroy $swp1 111
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34 " h1->h2"
+}
+
+percentage_used()
+{
+ local num_packets=$1; shift
+ local max_packets=$1; shift
+
+ bc <<< "
+ scale=2
+ 100 * $num_packets / $max_packets
+ "
+}
+
+max_descriptors()
+{
+ local cell_size=$(devlink_cell_size_get)
+ local exp_perc_used=85
+ local max_descriptors
+ local pktsize=30
+
+ RET=0
+
+ max_descriptors=$(mlxsw_max_descriptors_get) || exit 1
+
+ local d0=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+
+ log_info "Send many small packets, packet size = $pktsize bytes"
+ start_traffic_pktsize $pktsize $h1.111 192.0.2.33 192.0.2.34 $h2mac
+
+ # Sleep to wait for congestion.
+ sleep 5
+
+ local d1=$(ethtool_stats_get $swp2 tc_no_buffer_discard_uc_tc_1)
+ ((d1 == d0))
+ check_err $? "Drops seen on egress port: $d0 -> $d1 ($((d1 - d0)))"
+
+ # Check how many packets the switch can handle, the limitation is
+ # maximum descriptors.
+ local pkts_bytes=$(ethtool_stats_get $swp2 tc_transmit_queue_tc_1)
+ local pkts_num=$((pkts_bytes / cell_size))
+ local perc_used=$(percentage_used $pkts_num $max_descriptors)
+
+ check_err $(bc <<< "$perc_used < $exp_perc_used") \
+ "Expected > $exp_perc_used% of descriptors, handle $perc_used%"
+
+ stop_traffic
+ sleep 1
+
+ log_test "Maximum descriptors usage. The percentage used is $perc_used%"
+}
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
index b025daea062d..6d892de43fa8 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_mc_aware.sh
@@ -129,28 +129,36 @@ switch_create()
vlan_create $swp2 111
vlan_create $swp3 111
- ethtool -s $swp3 speed 1000 autoneg off
- tc qdisc replace dev $swp3 root handle 3: \
- prio bands 8 priomap 7 7 7 7 7 7 7 7
+ tc qdisc replace dev $swp3 root handle 3: tbf rate 1gbit \
+ burst 128K limit 1G
+ tc qdisc replace dev $swp3 parent 3:3 handle 33: \
+ prio bands 8 priomap 7 7 7 7 7 7 7 7
ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp3 master br1
ip link add name br111 type bridge vlan_filtering 0
+ ip link set dev br111 addrgenmode none
ip link set dev br111 up
ip link set dev $swp2.111 master br111
ip link set dev $swp3.111 master br111
# Make sure that ingress quotas are smaller than egress so that there is
# room for both streams of traffic to be admitted to shared buffer.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 5
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 5
+ devlink_port_pool_th_save $swp2 0
devlink_port_pool_th_set $swp2 0 5
+ devlink_tc_bind_pool_th_save $swp2 1 ingress
devlink_tc_bind_pool_th_set $swp2 1 ingress 0 5
+ devlink_port_pool_th_save $swp3 4
devlink_port_pool_th_set $swp3 4 12
}
@@ -167,8 +175,8 @@ switch_destroy()
ip link del dev br111
ip link del dev br1
+ tc qdisc del dev $swp3 parent 3:3 handle 33:
tc qdisc del dev $swp3 root handle 3:
- ethtool -s $swp3 autoneg on
vlan_destroy $swp3 111
vlan_destroy $swp2 111
diff --git a/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
new file mode 100755
index 000000000000..0f0f4f05807c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/qos_pfc.sh
@@ -0,0 +1,417 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
+# of 1. This stream is consistently prioritized as priority 1, is put to PG
+# buffer 1, and scheduled at TC 1.
+#
+# - the stream first ingresses through $swp1, where it is forwarded to $swp3
+#
+# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
+# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
+# shaped, and thus the PFC pool eventually fills, therefore the headroom
+# fills, and $swp3 is paused.
+#
+# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
+# a pool ("overflow pool"). The overflow pool needs to be large enough to
+# contain the whole burst.
+#
+# - eventually the PFC pool gets some traffic out, headroom therefore gets some
+# traffic to the pool, and $swp3 is unpaused again. This way the traffic is
+# gradually forwarded from the overflow pool, through the PFC pool, out of
+# $swp2, and eventually to $h2.
+#
+# - if PFC works, all lossless flow packets that ingress through $swp1 should
+# also be seen ingressing $h2. If it doesn't, there will be drops due to
+# discrepancy between the speeds of $swp1 and $h2.
+#
+# - it should all play out relatively quickly, so that SLL and HLL will not
+# cause drops.
+#
+# +-----------------------+
+# | H1 |
+# | + $h1.111 |
+# | | 192.0.2.33/28 |
+# | | |
+# | + $h1 |
+# +---|-------------------+ +--------------------+
+# | | |
+# +---|----------------------|--------------------|---------------------------+
+# | + $swp1 $swp3 + + $swp4 |
+# | | iPOOL1 iPOOL0 | | iPOOL2 |
+# | | ePOOL4 ePOOL5 | | ePOOL4 |
+# | | PFC:enabled=1 | | PFC:enabled=1 |
+# | +-|----------------------|-+ +-|------------------------+ |
+# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | |
+# | | | | | |
+# | | BR1 | | BR2 | |
+# | | | | | |
+# | | | | + $swp2.111 | |
+# | +--------------------------+ +---------|----------------+ |
+# | | |
+# | iPOOL0: 500KB dynamic | |
+# | iPOOL1: 10MB static | |
+# | iPOOL2: 1MB static + $swp2 |
+# | ePOOL4: 500KB dynamic | iPOOL0 |
+# | ePOOL5: 10MB static | ePOOL6 |
+# | ePOOL6: "infinite" static | 200Mbps shaper |
+# +-------------------------------------------------------|-------------------+
+# |
+# +---|-------------------+
+# | + $h2 H2 |
+# | | |
+# | + $h2.111 |
+# | 192.0.2.34/28 |
+# +-----------------------+
+#
+# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
+# iPOOL1+ePOOL5 are overflow pools.
+# iPOOL2+ePOOL6 are PFC pools.
+
+ALL_TESTS="
+ ping_ipv4
+ test_qos_pfc
+"
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+NUM_NETIFS=6
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+_1KB=1000
+_100KB=$((100 * _1KB))
+_500KB=$((500 * _1KB))
+_1MB=$((1000 * _1KB))
+_10MB=$((10 * _1MB))
+
+h1_create()
+{
+ simple_if_init $h1
+ mtu_set $h1 10000
+
+ vlan_create $h1 111 v$h1 192.0.2.33/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 111
+
+ mtu_restore $h1
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ mtu_set $h2 10000
+
+ vlan_create $h2 111 v$h2 192.0.2.34/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 111
+
+ mtu_restore $h2
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ local lanes_swp4
+ local pg1_size
+
+ # pools
+ # -----
+
+ devlink_pool_size_thtype_save 0
+ devlink_pool_size_thtype_save 4
+ devlink_pool_size_thtype_save 1
+ devlink_pool_size_thtype_save 5
+ devlink_pool_size_thtype_save 2
+ devlink_pool_size_thtype_save 6
+
+ devlink_port_pool_th_save $swp1 1
+ devlink_port_pool_th_save $swp2 6
+ devlink_port_pool_th_save $swp3 5
+ devlink_port_pool_th_save $swp4 2
+
+ devlink_tc_bind_pool_th_save $swp1 1 ingress
+ devlink_tc_bind_pool_th_save $swp2 1 egress
+ devlink_tc_bind_pool_th_save $swp3 1 egress
+ devlink_tc_bind_pool_th_save $swp4 1 ingress
+
+ # Control traffic pools. Just reduce the size. Keep them dynamic so that
+ # we don't need to change all the uninteresting quotas.
+ devlink_pool_size_thtype_set 0 dynamic $_500KB
+ devlink_pool_size_thtype_set 4 dynamic $_500KB
+
+ # Overflow pools.
+ devlink_pool_size_thtype_set 1 static $_10MB
+ devlink_pool_size_thtype_set 5 static $_10MB
+
+ # PFC pools. As per the writ, the size of egress PFC pool should be
+ # infinice, but actually it just needs to be large enough to not matter
+ # in practice, so reuse the 10MB limit.
+ devlink_pool_size_thtype_set 2 static $_1MB
+ devlink_pool_size_thtype_set 6 static $_10MB
+
+ # $swp1
+ # -----
+
+ ip link set dev $swp1 up
+ mtu_set $swp1 10000
+ vlan_create $swp1 111
+ ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp1 1 $_10MB
+ devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
+
+ # Configure qdisc so that we can configure PG and therefore pool
+ # assignment.
+ tc qdisc replace dev $swp1 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ dcb buffer set dev $swp1 prio-buffer all:0 1:1
+
+ # $swp2
+ # -----
+
+ ip link set dev $swp2 up
+ mtu_set $swp2 10000
+ vlan_create $swp2 111
+ ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp2 6 $_10MB
+ devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
+ tc qdisc replace dev $swp2 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ tc qdisc replace dev $swp2 parent 1:7 handle 17: \
+ tbf rate 200Mbit burst 131072 limit 1M
+
+ # $swp3
+ # -----
+
+ ip link set dev $swp3 up
+ mtu_set $swp3 10000
+ vlan_create $swp3 111
+ ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp3 5 $_10MB
+ devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
+
+ # prio 0->TC0 (band 7), 1->TC1 (band 6)
+ tc qdisc replace dev $swp3 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+
+ # Need to enable PFC so that PAUSE takes effect. Therefore need to put
+ # the lossless prio into a buffer of its own. Don't bother with buffer
+ # sizes though, there is not going to be any pressure in the "backward"
+ # direction.
+ dcb buffer set dev $swp3 prio-buffer all:0 1:1
+ dcb pfc set dev $swp3 prio-pfc all:off 1:on
+
+ # $swp4
+ # -----
+
+ ip link set dev $swp4 up
+ mtu_set $swp4 10000
+ vlan_create $swp4 111
+ ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
+
+ devlink_port_pool_th_set $swp4 2 $_1MB
+ devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
+
+ # Configure qdisc so that we can hand-tune headroom.
+ tc qdisc replace dev $swp4 root handle 1: \
+ ets bands 8 strict 8 priomap 7 6
+ dcb buffer set dev $swp4 prio-buffer all:0 1:1
+ dcb pfc set dev $swp4 prio-pfc all:off 1:on
+ # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
+ # is (-2*MTU) about 80K of delay provision.
+ pg1_size=$_100KB
+
+ setup_wait_dev_with_timeout $swp4
+
+ lanes_swp4=$(ethtool $swp4 | grep 'Lanes:')
+ lanes_swp4=${lanes_swp4#*"Lanes: "}
+
+ # 8-lane ports use two buffers among which the configured buffer
+ # is split, so double the size to get twice (20K + 80K).
+ if [[ $lanes_swp4 -eq 8 ]]; then
+ pg1_size=$((pg1_size * 2))
+ fi
+
+ dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size
+
+ # bridges
+ # -------
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev $swp1.111 master br1
+ ip link set dev $swp3.111 master br1
+ ip link set dev br1 up
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev $swp2.111 master br2
+ ip link set dev $swp4.111 master br2
+ ip link set dev br2 up
+}
+
+switch_destroy()
+{
+ # Do this first so that we can reset the limits to values that are only
+ # valid for the original static / dynamic setting.
+ devlink_pool_size_thtype_restore 6
+ devlink_pool_size_thtype_restore 5
+ devlink_pool_size_thtype_restore 4
+ devlink_pool_size_thtype_restore 2
+ devlink_pool_size_thtype_restore 1
+ devlink_pool_size_thtype_restore 0
+
+ # bridges
+ # -------
+
+ ip link set dev br2 down
+ ip link set dev $swp4.111 nomaster
+ ip link set dev $swp2.111 nomaster
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link set dev $swp3.111 nomaster
+ ip link set dev $swp1.111 nomaster
+ ip link del dev br1
+
+ # $swp4
+ # -----
+
+ dcb buffer set dev $swp4 buffer-size all:0
+ dcb pfc set dev $swp4 prio-pfc all:off
+ dcb buffer set dev $swp4 prio-buffer all:0
+ tc qdisc del dev $swp4 root
+
+ devlink_tc_bind_pool_th_restore $swp4 1 ingress
+ devlink_port_pool_th_restore $swp4 2
+
+ vlan_destroy $swp4 111
+ mtu_restore $swp4
+ ip link set dev $swp4 down
+
+ # $swp3
+ # -----
+
+ dcb pfc set dev $swp3 prio-pfc all:off
+ dcb buffer set dev $swp3 prio-buffer all:0
+ tc qdisc del dev $swp3 root
+
+ devlink_tc_bind_pool_th_restore $swp3 1 egress
+ devlink_port_pool_th_restore $swp3 5
+
+ vlan_destroy $swp3 111
+ mtu_restore $swp3
+ ip link set dev $swp3 down
+
+ # $swp2
+ # -----
+
+ tc qdisc del dev $swp2 parent 1:7
+ tc qdisc del dev $swp2 root
+
+ devlink_tc_bind_pool_th_restore $swp2 1 egress
+ devlink_port_pool_th_restore $swp2 6
+
+ vlan_destroy $swp2 111
+ mtu_restore $swp2
+ ip link set dev $swp2 down
+
+ # $swp1
+ # -----
+
+ dcb buffer set dev $swp1 prio-buffer all:0
+ tc qdisc del dev $swp1 root
+
+ devlink_tc_bind_pool_th_restore $swp1 1 ingress
+ devlink_port_pool_th_restore $swp1 1
+
+ vlan_destroy $swp1 111
+ mtu_restore $swp1
+ ip link set dev $swp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ swp4=${NETIFS[p6]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.34
+}
+
+test_qos_pfc()
+{
+ RET=0
+
+ # 10M pool, each packet is 8K of payload + headers
+ local pkts=$((_10MB / 8050))
+ local size=$((pkts * 8050))
+ local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
+ -a own -b $h2mac -c $pkts -t udp -q
+ sleep 2
+
+ local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
+ local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
+
+ local din=$((in1 - in0))
+ local dout=$((out1 - out0))
+
+ local pct_in=$((din * 100 / size))
+
+ ((pct_in > 95 && pct_in < 105))
+ check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
+
+ ((dout == din))
+ check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
+
+ log_test "PFC"
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
new file mode 100755
index 000000000000..b79542a4dcc7
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ bridge_rif_add
+ bridge_rif_nomaster
+ bridge_rif_remaster
+ bridge_rif_nomaster_addr
+ bridge_rif_nomaster_port
+ bridge_rif_remaster_port
+"
+
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 address $(mac_get lag1)
+ ip link set dev br1 up
+
+ ip link set dev lag1 master br1
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link set dev lag1 nomaster
+ ip link del dev lag1
+
+ ip link del dev br1
+}
+
+bridge_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del br1 add 192.0.2.2/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on address addition"
+}
+
+bridge_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev lag1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for bridge on LAG deslavement"
+}
+
+bridge_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev lag1 master br1
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on LAG reenslavement"
+}
+
+bridge_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the LAG is enslaved shouldn't generate a RIF.
+ __addr_add_del lag1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the LAG from the bridge should drop RIF for the bridge (as
+ # tested in bridge_rif_lag_nomaster), but since the LAG now has an
+ # address, it should gain a RIF.
+ ip link set dev lag1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ log_test "Add RIF for LAG on deslavement from bridge"
+
+ __addr_add_del lag1 del 192.0.2.65/28
+ ip link set dev lag1 master br1
+ sleep 1
+}
+
+bridge_rif_nomaster_port()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for bridge on deslavement of port from LAG"
+}
+
+bridge_rif_remaster_port()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for bridge on reenslavement of port to LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
new file mode 100644
index 000000000000..a43a9926e690
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_counter_scale.sh
@@ -0,0 +1,107 @@
+# SPDX-License-Identifier: GPL-2.0
+
+RIF_COUNTER_NUM_NETIFS=2
+
+rif_counter_addr4()
+{
+ local i=$1; shift
+ local p=$1; shift
+
+ printf 192.0.%d.%d $((i / 64)) $(((4 * i % 256) + p))
+}
+
+rif_counter_addr4pfx()
+{
+ rif_counter_addr4 $@
+ printf /30
+}
+
+rif_counter_h1_create()
+{
+ simple_if_init $h1
+}
+
+rif_counter_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+rif_counter_h2_create()
+{
+ simple_if_init $h2
+}
+
+rif_counter_h2_destroy()
+{
+ simple_if_fini $h2
+}
+
+rif_counter_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ vrf_prepare
+
+ rif_counter_h1_create
+ rif_counter_h2_create
+}
+
+rif_counter_cleanup()
+{
+ local count=$1; shift
+
+ pre_cleanup
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_destroy $h2 $i
+ done
+
+ rif_counter_h2_destroy
+ rif_counter_h1_destroy
+
+ vrf_cleanup
+
+ if [[ -v RIF_COUNTER_BATCH_FILE ]]; then
+ rm -f $RIF_COUNTER_BATCH_FILE
+ fi
+}
+
+
+rif_counter_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ RIF_COUNTER_BATCH_FILE="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan_create $h2 $i v$h2 $(rif_counter_addr4pfx $i 2)
+ done
+ for ((i = 1; i <= count; i++)); do
+ cat >> $RIF_COUNTER_BATCH_FILE <<-EOF
+ stats set dev $h2.$i l3_stats on
+ EOF
+ done
+
+ ip -b $RIF_COUNTER_BATCH_FILE
+ check_err_fail $should_fail $? "RIF counter enablement"
+}
+
+rif_counter_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count; i > 0; i /= 2)); do
+ $MZ $h1 -Q $i -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(rif_counter_addr4 $i 1) \
+ -B $(rif_counter_addr4 $i 2) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count; i > 0; i /= 2)); do
+ busywait "$TC_HIT_TIMEOUT" until_counter_is "== 1" \
+ hw_stats_get l3_stats $h2.$i rx packets > /dev/null
+ check_err $? "Traffic not seen at RIF $h2.$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
new file mode 100755
index 000000000000..e28f978104f3
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
@@ -0,0 +1,136 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ lag_rif_add
+ lag_rif_nomaster
+ lag_rif_remaster
+ lag_rif_nomaster_addr
+"
+
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link del dev lag1
+}
+
+lag_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del lag1 add 192.0.2.2/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for LAG on address addition"
+}
+
+lag_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIF for LAG on port deslavement"
+}
+
+lag_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 1))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIF for LAG on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the port is LAG'd shouldn't generate a RIF.
+ __addr_add_del $swp1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the port from LAG should drop RIF for the LAG (as tested in
+ # lag_rif_nomaster), but since the port now has an address, it should
+ # gain a RIF.
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ __addr_add_del $swp1 del 192.0.2.65/28
+ log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
new file mode 100755
index 000000000000..6318cfa6434c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
@@ -0,0 +1,146 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ lag_rif_add
+ lag_rif_nomaster
+ lag_rif_remaster
+ lag_rif_nomaster_addr
+"
+
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $swp1)
+
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp2)
+
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master lag2
+ ip link set dev $swp2 up
+
+ vlan_create lag1 100
+ ip link set dev lag1.100 addrgenmode none
+
+ vlan_create lag1 200
+ ip link set dev lag1.200 addrgenmode none
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link del dev lag1.200
+ ip link del dev lag1.100
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 nomaster
+ ip link set dev $swp1 down
+
+ ip link del dev lag2
+ ip link del dev lag1
+}
+
+lag_rif_add()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ __addr_add_del lag1.100 add 192.0.2.2/28
+ __addr_add_del lag1.200 add 192.0.2.18/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIFs for LAG VLANs on address addition"
+}
+
+lag_rif_nomaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Drop RIFs for LAG VLANs on port deslavement"
+}
+
+lag_rif_remaster()
+{
+ RET=0
+
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+ ip link set dev $swp1 down
+ ip link set dev $swp1 master lag1
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 + 2))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ log_test "Add RIFs for LAG VLANs on port reenslavement"
+}
+
+lag_rif_nomaster_addr()
+{
+ local rifs_occ_t0=$(devlink_resource_occ_get rifs)
+
+ # Adding an address while the port is LAG'd shouldn't generate a RIF.
+ __addr_add_del $swp1 add 192.0.2.65/28
+ sleep 1
+ local rifs_occ_t1=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0))
+
+ ((expected_rifs == rifs_occ_t1))
+ check_err $? "After adding IP: Expected $expected_rifs RIFs, $rifs_occ_t1 are used"
+
+ # Removing the port from LAG should drop two RIFs for the LAG VLANs (as
+ # tested in lag_rif_nomaster), but since the port now has an address, it
+ # should gain a RIF.
+ ip link set dev $swp1 nomaster
+ sleep 1
+ local rifs_occ_t2=$(devlink_resource_occ_get rifs)
+ local expected_rifs=$((rifs_occ_t0 - 1))
+
+ ((expected_rifs == rifs_occ_t2))
+ check_err $? "After deslaving: Expected $expected_rifs RIFs, $rifs_occ_t2 are used"
+
+ __addr_add_del $swp1 del 192.0.2.65/28
+ log_test "Add RIF for port on deslavement from LAG"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..71e7681f15f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profile_scale.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for RIF MAC profiles resource. The test adds VLAN netdevices according to
+# the maximum number of RIF MAC profiles, sets each of them with a random
+# MAC address, and checks that eventually the number of occupied RIF MAC
+# profiles equals the maximum number of RIF MAC profiles.
+
+
+RIF_MAC_PROFILE_NUM_NETIFS=2
+
+rif_mac_profiles_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan=$(( i*10 ))
+ m=$(( i*11 ))
+
+ cat >> $batch_file <<-EOF
+ link add link $h1 name $h1.$vlan \
+ address 00:$m:$m:$m:$m:$m type vlan id $vlan
+ address add 192.0.$m.1/24 dev $h1.$vlan
+ EOF
+ done
+
+ ip -b $batch_file &> /dev/null
+ check_err_fail $should_fail $? "RIF creation"
+
+ rm -f $batch_file
+}
+
+rif_mac_profile_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ rif_mac_profiles_create $count $should_fail
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+ [[ $occ -eq $count ]]
+ check_err_fail $should_fail $? "Attempt to use $count profiles (actual result $occ)"
+}
+
+rif_mac_profile_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+ # being generated and RIFs being created.
+ sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+ sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+rif_mac_profile_cleanup()
+{
+ pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
+ sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+ sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
new file mode 100755
index 000000000000..c18340cee55d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ mac_profile_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/tc_common.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ ip route add 198.51.100.0/24 vrf v$h1 nexthop via 192.0.2.2
+
+ tc qdisc add dev $h1 ingress
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 ingress
+
+ ip route del 198.51.100.0/24 vrf v$h1
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+ ip route add 192.0.2.0/24 vrf v$h2 nexthop via 198.51.100.2
+
+ tc qdisc add dev $h2 ingress
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 ingress
+
+ ip route del 192.0.2.0/24 vrf v$h2
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ tc qdisc add dev $rp1 clsact
+ tc qdisc add dev $rp2 clsact
+ ip address add 192.0.2.2/24 dev $rp1
+ ip address add 198.51.100.2/24 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 198.51.100.2/24 dev $rp2
+ ip address del 192.0.2.2/24 dev $rp1
+ tc qdisc del dev $rp2 clsact
+ tc qdisc del dev $rp1 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+h1_to_h2()
+{
+ local test_name=$@; shift
+ local smac=$(mac_get $rp2)
+
+ RET=0
+
+ # Replace neighbour to avoid first packet being forwarded in software
+ ip neigh replace dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+
+ # Add a filter to ensure that packets are forwarded in hardware. Cannot
+ # match on source MAC because it is not set in eACL after routing
+ tc filter add dev $rp2 egress proto ip pref 1 handle 101 \
+ flower skip_sw ip_proto udp src_port 12345 dst_port 54321 \
+ action pass
+
+ # Add a filter to ensure that packets are received with the correct
+ # source MAC
+ tc filter add dev $h2 ingress proto ip pref 1 handle 101 \
+ flower skip_sw src_mac $smac ip_proto udp src_port 12345 \
+ dst_port 54321 action pass
+
+ $MZ $h1 -a own -b $(mac_get $rp1) -t udp "sp=12345,dp=54321" \
+ -A 192.0.2.1 -B 198.51.100.1 -c 10 -p 100 -d 1msec -q
+
+ tc_check_packets "dev $rp2 egress" 101 10
+ check_err $? "packets not forwarded in hardware"
+
+ tc_check_packets "dev $h2 ingress" 101 10
+ check_err $? "packets not forwarded with correct source mac"
+
+ log_test "h1->h2: $test_name"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ ip neigh del dev $rp2 198.51.100.1 lladdr $(mac_get $h2)
+}
+
+h2_to_h1()
+{
+ local test_name=$@; shift
+ local rp1_mac=$(mac_get $rp1)
+
+ RET=0
+
+ ip neigh replace dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+
+ tc filter add dev $rp1 egress proto ip pref 1 handle 101 \
+ flower skip_sw ip_proto udp src_port 54321 dst_port 12345 \
+ action pass
+
+ tc filter add dev $h1 ingress proto ip pref 1 handle 101 \
+ flower skip_sw src_mac $rp1_mac ip_proto udp src_port 54321 \
+ dst_port 12345 action pass
+
+ $MZ $h2 -a own -b $(mac_get $rp2) -t udp "sp=54321,dp=12345" \
+ -A 198.51.100.1 -B 192.0.2.1 -c 10 -p 100 -d 1msec -q
+
+ tc_check_packets "dev $rp1 egress" 101 10
+ check_err $? "packets not forwarded in hardware"
+
+ tc_check_packets "dev $h1 ingress" 101 10
+ check_err $? "packets not forwarded with correct source mac"
+
+ log_test "h2->h1: $test_name"
+
+ tc filter del dev $h1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp1 egress protocol ip pref 1 handle 101 flower
+ ip neigh del dev $rp1 192.0.2.1 lladdr $(mac_get $h1)
+}
+
+smac_test()
+{
+ local test_name=$@; shift
+
+ # Test that packets forwarded to $h2 via $rp2 are forwarded with the
+ # current source MAC of $rp2
+ h1_to_h2 $test_name
+
+ # Test that packets forwarded to $h1 via $rp1 are forwarded with the
+ # current source MAC of $rp1. This MAC is never changed during the test,
+ # but given the shared nature of MAC profile, the point is to see that
+ # changes to the MAC of $rp2 do not affect that of $rp1
+ h2_to_h1 $test_name
+}
+
+mac_profile_test()
+{
+ local rp2_mac=$(mac_get $rp2)
+
+ # Test behavior when the RIF backing $rp2 is transitioned to use
+ # a new MAC profile
+ ip link set dev $rp2 addr 00:11:22:33:44:55
+ smac_test "new mac profile"
+
+ # Test behavior when the MAC profile used by the RIF is edited
+ ip link set dev $rp2 address 00:22:22:22:22:22
+ smac_test "edit mac profile"
+
+ # Restore original MAC
+ ip link set dev $rp2 addr $rp2_mac
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+mac_profiles=$(devlink_resource_size_get rif_mac_profiles)
+if [[ $mac_profiles -ne 1 ]]; then
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
new file mode 100755
index 000000000000..026a126f584d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_mac_profiles_occ.sh
@@ -0,0 +1,147 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ rif_mac_profile_edit_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ # Disable IPv6 on the two interfaces to avoid IPv6 link-local addresses
+ # being generated and RIFs being created
+ sysctl_set net.ipv6.conf.$h1.disable_ipv6 1
+ sysctl_set net.ipv6.conf.$h2.disable_ipv6 1
+
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set $h2 down
+ ip link set $h1 down
+
+ sysctl_restore net.ipv6.conf.$h2.disable_ipv6
+ sysctl_restore net.ipv6.conf.$h1.disable_ipv6
+
+ # Reload in order to clean all the RIFs and RIF MAC profiles created
+ devlink_reload
+}
+
+create_max_rif_mac_profiles()
+{
+ local count=$1; shift
+ local batch_file="$(mktemp)"
+
+ for ((i = 1; i <= count; i++)); do
+ vlan=$(( i*10 ))
+ m=$(( i*11 ))
+
+ cat >> $batch_file <<-EOF
+ link add link $h1 name $h1.$vlan \
+ address 00:$m:$m:$m:$m:$m type vlan id $vlan
+ address add 192.0.$m.1/24 dev $h1.$vlan
+ EOF
+ done
+
+ ip -b $batch_file &> /dev/null
+ rm -f $batch_file
+}
+
+rif_mac_profile_replacement_test()
+{
+ local h1_10_mac=$(mac_get $h1.10)
+
+ RET=0
+
+ ip link set $h1.10 address 00:12:34:56:78:99
+ check_err $?
+
+ log_test "RIF MAC profile replacement"
+
+ ip link set $h1.10 address $h1_10_mac
+}
+
+rif_mac_profile_consolidation_test()
+{
+ local count=$1; shift
+ local h1_20_mac
+
+ RET=0
+
+ if [[ $count -eq 1 ]]; then
+ return
+ fi
+
+ h1_20_mac=$(mac_get $h1.20)
+
+ # Set the MAC of $h1.20 to that of $h1.10 and confirm that they are
+ # using the same MAC profile.
+ ip link set $h1.20 address 00:11:11:11:11:11
+ check_err $?
+
+ occ=$(devlink -j resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="rif_mac_profiles") |.["occ"]')
+
+ [[ $occ -eq $((count - 1)) ]]
+ check_err $? "MAC profile occupancy did not decrease"
+
+ log_test "RIF MAC profile consolidation"
+
+ ip link set $h1.20 address $h1_20_mac
+}
+
+rif_mac_profile_shared_replacement_test()
+{
+ local count=$1; shift
+ local i=$((count + 1))
+ local vlan=$(( i*10 ))
+ local m=11
+
+ RET=0
+
+ # Create a VLAN netdevice that has the same MAC as the first one.
+ ip link add link $h1 name $h1.$vlan address 00:$m:$m:$m:$m:$m \
+ type vlan id $vlan
+ ip address add 192.0.$m.1/24 dev $h1.$vlan
+
+ # MAC replacement should fail because all the MAC profiles are in use
+ # and the profile is shared between multiple RIFs
+ m=$(( i*11 ))
+ ip link set $h1.$vlan address 00:$m:$m:$m:$m:$m &> /dev/null
+ check_fail $?
+
+ log_test "RIF MAC profile shared replacement"
+
+ ip link del dev $h1.$vlan
+}
+
+rif_mac_profile_edit_test()
+{
+ local count=$(devlink_resource_size_get rif_mac_profiles)
+
+ create_max_rif_mac_profiles $count
+
+ rif_mac_profile_replacement_test
+ rif_mac_profile_consolidation_test $count
+ rif_mac_profile_shared_replacement_test $count
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
new file mode 100755
index 000000000000..6ce317cfaf9b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_bridge_lag.sh
@@ -0,0 +1,50 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test enslavement to LAG with a clean slate.
+# See $lib_dir/router_bridge_lag.sh for further details.
+
+ALL_TESTS="
+ config_devlink_reload
+ config_enslave_h1
+ config_enslave_h2
+ config_enslave_h3
+ config_enslave_h4
+ config_enslave_swp1
+ config_enslave_swp2
+ config_enslave_swp3
+ config_enslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+"
+
+config_devlink_reload()
+{
+ log_info "Devlink reload"
+ devlink_reload
+}
+
+config_enslave_h1()
+{
+ config_enslave $h1 lag1
+}
+
+config_enslave_h2()
+{
+ config_enslave $h2 lag4
+}
+
+config_enslave_h3()
+{
+ config_enslave $h3 lag4
+}
+
+config_enslave_h4()
+{
+ config_enslave $h4 lag1
+}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+EXTRA_SOURCE="source $lib_dir/devlink_lib.sh"
+source $lib_dir/router_bridge_lag.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
index e93878d42596..683759d29199 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/router_scale.sh
@@ -68,7 +68,7 @@ wait_for_routes()
local t0=$1; shift
local route_count=$1; shift
- local t1=$(ip route | grep -o 'offload' | wc -l)
+ local t1=$(ip route | grep 'offload' | grep -v 'offload_failed' | wc -l)
local delta=$((t1 - t0))
echo $delta
[[ $delta -ge $route_count ]]
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
index f4031002d5e9..893a693ad805 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rtnetlink.sh
@@ -10,18 +10,16 @@
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
- rif_set_addr_test
rif_vrf_set_addr_test
- rif_inherit_bridge_addr_test
rif_non_inherit_bridge_addr_test
vlan_interface_deletion_test
bridge_deletion_test
bridge_vlan_flags_test
vlan_1_test
- lag_bridge_upper_test
duplicate_vlans_test
vlan_rif_refcount_test
subport_rif_refcount_test
+ subport_rif_lag_join_test
vlan_dev_deletion_test
lag_unlink_slaves_test
lag_dev_deletion_test
@@ -29,6 +27,13 @@ ALL_TESTS="
bridge_extern_learn_test
neigh_offload_test
nexthop_offload_test
+ nexthop_obj_invalid_test
+ nexthop_obj_offload_test
+ nexthop_obj_group_offload_test
+ nexthop_obj_bucket_offload_test
+ nexthop_obj_blackhole_offload_test
+ nexthop_obj_route_offload_test
+ bridge_locked_port_test
devlink_reload_test
"
NUM_NETIFS=2
@@ -53,55 +58,6 @@ cleanup()
ip link set dev $swp1 down
}
-rif_set_addr_test()
-{
- local swp1_mac=$(mac_get $swp1)
- local swp2_mac=$(mac_get $swp2)
-
- RET=0
-
- # $swp1 and $swp2 likely got their IPv6 local addresses already, but
- # here we need to test the transition to RIF.
- ip addr flush dev $swp1
- ip addr flush dev $swp2
- sleep .1
-
- ip addr add dev $swp1 192.0.2.1/28
- check_err $?
-
- ip link set dev $swp1 addr 00:11:22:33:44:55
- check_err $?
-
- # IP address enablement should be rejected if the MAC address prefix
- # doesn't match other RIFs.
- ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
- check_fail $? "IP address addition passed for a device with a wrong MAC"
- ip addr add dev $swp2 192.0.2.2/28 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for IP address addition"
-
- ip link set dev $swp2 addr 00:11:22:33:44:66
- check_err $?
- ip addr add dev $swp2 192.0.2.2/28 &>/dev/null
- check_err $?
-
- # Change of MAC address of a RIF should be forbidden if the new MAC
- # doesn't share the prefix with other MAC addresses.
- ip link set dev $swp2 addr 00:11:22:33:00:66 &>/dev/null
- check_fail $? "change of MAC address passed for a wrong MAC"
- ip link set dev $swp2 addr 00:11:22:33:00:66 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for MAC address change"
-
- log_test "RIF - bad MAC change"
-
- ip addr del dev $swp2 192.0.2.2/28
- ip addr del dev $swp1 192.0.2.1/28
-
- ip link set dev $swp2 addr $swp2_mac
- ip link set dev $swp1 addr $swp1_mac
-}
-
rif_vrf_set_addr_test()
{
# Test that it is possible to set an IP address on a VRF upper despite
@@ -121,45 +77,6 @@ rif_vrf_set_addr_test()
ip link del dev vrf-test
}
-rif_inherit_bridge_addr_test()
-{
- RET=0
-
- # Create first RIF
- ip addr add dev $swp1 192.0.2.1/28
- check_err $?
-
- # Create a FID RIF
- ip link add name br1 up type bridge vlan_filtering 0
- ip link set dev $swp2 master br1
- ip addr add dev br1 192.0.2.17/28
- check_err $?
-
- # Prepare a device with a low MAC address
- ip link add name d up type dummy
- ip link set dev d addr 00:11:22:33:44:55
-
- # Attach the device to br1. That prompts bridge address change, which
- # should be vetoed, thus preventing the attachment.
- ip link set dev d master br1 &>/dev/null
- check_fail $? "Device with low MAC was permitted to attach a bridge with RIF"
- ip link set dev d master br1 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for bridge attach rejection"
-
- ip link set dev $swp2 addr 00:11:22:33:44:55 &>/dev/null
- check_fail $? "Changing swp2's MAC address permitted"
- ip link set dev $swp2 addr 00:11:22:33:44:55 2>&1 >/dev/null \
- | grep -q mlxsw_spectrum
- check_err $? "no extack for bridge port MAC address change rejection"
-
- log_test "RIF - attach port with bad MAC to bridge"
-
- ip link del dev d
- ip link del dev br1
- ip addr del dev $swp1 192.0.2.1/28
-}
-
rif_non_inherit_bridge_addr_test()
{
local swp2_mac=$(mac_get $swp2)
@@ -293,33 +210,6 @@ vlan_1_test()
ip link del dev $swp1.1
}
-lag_bridge_upper_test()
-{
- # Test that ports cannot be enslaved to LAG devices that have uppers
- # and that failure is handled gracefully. See commit b3529af6bb0d
- # ("spectrum: Reference count VLAN entries") for more details
- RET=0
-
- ip link add name bond1 type bond mode 802.3ad
-
- ip link add name br0 type bridge vlan_filtering 1
- ip link set dev bond1 master br0
-
- ip link set dev $swp1 down
- ip link set dev $swp1 master bond1 &> /dev/null
- check_fail $? "managed to enslave port to lag when should not"
-
- # This might generate a trace, if we did not handle the failure
- # correctly
- ip -6 address add 2001:db8:1::1/64 dev $swp1
- ip -6 address del 2001:db8:1::1/64 dev $swp1
-
- log_test "lag with bridge upper"
-
- ip link del dev br0
- ip link del dev bond1
-}
-
duplicate_vlans_test()
{
# Test that on a given port a VLAN is only used once. Either as VLAN
@@ -435,6 +325,48 @@ subport_rif_refcount_test()
ip link del dev bond1
}
+subport_rif_lag_join_test()
+{
+ # Test that the reference count of a RIF configured for a LAG is
+ # incremented / decremented when ports join / leave the LAG. We use the
+ # offload indication on routes configured on the RIF to understand if
+ # it was created / destroyed
+ RET=0
+
+ ip link add name bond1 type bond mode 802.3ad
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link set dev $swp1 master bond1
+ ip link set dev $swp2 master bond1
+
+ ip link set dev bond1 up
+ ip -6 address add 2001:db8:1::1/64 dev bond1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif was not created on lag device"
+
+ ip link set dev $swp1 nomaster
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif of lag device was destroyed after removing one port"
+
+ ip link set dev $swp1 master bond1
+ ip link set dev $swp2 nomaster
+ busywait "$TIMEOUT" wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif of lag device was destroyed after re-adding a port and removing another"
+
+ ip link set dev $swp1 nomaster
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip -6 route get fibmatch 2001:db8:1::2 dev bond1
+ check_err $? "subport rif of lag device was not destroyed when should"
+
+ log_test "subport rif lag join"
+
+ ip link del dev bond1
+}
+
vlan_dev_deletion_test()
{
# Test that VLAN devices are correctly deleted / unlinked when enslaved
@@ -550,9 +482,6 @@ vlan_interface_uppers_test()
ip link set dev $swp1 master br0
ip link add link br0 name br0.10 type vlan id 10
- ip link add link br0.10 name macvlan0 \
- type macvlan mode private &> /dev/null
- check_fail $? "managed to create a macvlan when should not"
ip -6 address add 2001:db8:1::1/64 dev br0.10
ip link add link br0.10 name macvlan0 type macvlan mode private
@@ -674,6 +603,320 @@ nexthop_offload_test()
sysctl_restore net.ipv6.conf.$swp2.keep_addr_on_down
}
+nexthop_obj_invalid_test()
+{
+ # Test that invalid nexthop object configurations are rejected
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2 192.0.2.2/24 2001:db8:1::2/64
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.3 fdb
+ check_fail $? "managed to configure an FDB nexthop when should not"
+
+ ip nexthop add id 1 encap mpls 200/300 via 192.0.2.3 dev $swp1
+ check_fail $? "managed to configure a nexthop with MPLS encap when should not"
+
+ ip nexthop add id 1 dev $swp1
+ ip nexthop add id 2 dev $swp1
+ ip nexthop add id 3 via 192.0.2.3 dev $swp1
+ ip nexthop add id 10 group 1/2
+ check_fail $? "managed to configure a nexthop group with device-only nexthops when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 7
+ check_fail $? "managed to configure a too small resilient nexthop group when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 129
+ check_fail $? "managed to configure a resilient nexthop group with invalid number of buckets when should not"
+
+ ip nexthop add id 10 group 1/2 type resilient buckets 32
+ check_fail $? "managed to configure a resilient nexthop group with device-only nexthops when should not"
+
+ ip nexthop add id 10 group 3 type resilient buckets 32
+ check_err $? "failed to configure a valid resilient nexthop group"
+ ip nexthop replace id 3 dev $swp1
+ check_fail $? "managed to populate a nexthop bucket with a device-only nexthop when should not"
+
+ log_test "nexthop objects - invalid configurations"
+
+ ip nexthop del id 10
+ ip nexthop del id 3
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2 192.0.2.2/24 2001:db8:1::2/64
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_offload_test()
+{
+ # Test offload indication of nexthop objects
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "nexthop not marked as offloaded when should"
+
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "nexthop marked as offloaded after setting neigh to failed state"
+
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "nexthop not marked as offloaded after neigh replace"
+
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "nexthop marked as offloaded after replacing to use an invalid address"
+
+ ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "nexthop not marked as offloaded after replacing to use a valid address"
+
+ log_test "nexthop objects offload indication"
+
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_group_offload_test()
+{
+ # Test offload indication of nexthop group objects
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+ ip nexthop add id 10 group 1/2
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "IPv4 nexthop not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 2
+ check_err $? "IPv6 nexthop not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 10
+ check_err $? "nexthop group not marked as offloaded when should"
+
+ # Invalidate nexthop id 1
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip nexthop show id 10
+ check_fail $? "nexthop group not marked as offloaded with one valid nexthop"
+
+ # Invalidate nexthop id 2
+ ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip nexthop show id 10
+ check_err $? "nexthop group marked as offloaded when should not"
+
+ # Revalidate nexthop id 1
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 10
+ check_err $? "nexthop group not marked as offloaded after revalidating nexthop"
+
+ log_test "nexthop group objects offload indication"
+
+ ip neigh del 2001:db8:1::2 dev $swp1
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 10
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_bucket_offload_test()
+{
+ # Test offload indication of nexthop buckets
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip nexthop add id 2 via 2001:db8:1::2 dev $swp1
+ ip nexthop add id 10 group 1/2 type resilient buckets 32 idle_timer 0
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked as offloaded when should"
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked as offloaded when should"
+
+ # Invalidate nexthop id 1
+ ip neigh replace 192.0.2.2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 1
+ check_err $? "IPv4 nexthop buckets not marked with trap when should"
+
+ # Invalidate nexthop id 2
+ ip neigh replace 2001:db8:1::2 nud failed dev $swp1
+ busywait "$TIMEOUT" wait_for_trap \
+ ip nexthop bucket show nhid 2
+ check_err $? "IPv6 nexthop buckets not marked with trap when should"
+
+ # Revalidate nexthop id 1 by changing its configuration
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 1
+ check_err $? "nexthop bucket not marked as offloaded after revalidating nexthop"
+
+ # Revalidate nexthop id 2 by changing its neighbour
+ ip neigh replace 2001:db8:1::2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop bucket show nhid 2
+ check_err $? "nexthop bucket not marked as offloaded after revalidating neighbour"
+
+ log_test "nexthop bucket offload indication"
+
+ ip neigh del 2001:db8:1::2 dev $swp1
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 10
+ ip nexthop del id 2
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+nexthop_obj_blackhole_offload_test()
+{
+ # Test offload indication of blackhole nexthop objects
+ RET=0
+
+ ip nexthop add id 1 blackhole
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 1
+ check_err $? "Blackhole nexthop not marked as offloaded when should"
+
+ ip nexthop add id 10 group 1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip nexthop show id 10
+ check_err $? "Nexthop group not marked as offloaded when should"
+
+ log_test "blackhole nexthop objects offload indication"
+
+ ip nexthop del id 10
+ ip nexthop del id 1
+}
+
+nexthop_obj_route_offload_test()
+{
+ # Test offload indication of routes using nexthop objects
+ RET=0
+
+ simple_if_init $swp1 192.0.2.1/24 2001:db8:1::1/64
+ simple_if_init $swp2
+ setup_wait
+
+ ip nexthop add id 1 via 192.0.2.2 dev $swp1
+ ip neigh replace 192.0.2.2 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+ ip neigh replace 192.0.2.3 lladdr 00:11:22:33:44:55 nud perm \
+ dev $swp1
+
+ ip route replace 198.51.100.0/24 nhid 1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show 198.51.100.0/24
+ check_err $? "route not marked as offloaded when using valid nexthop"
+
+ ip nexthop replace id 1 via 192.0.2.3 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show 198.51.100.0/24
+ check_err $? "route not marked as offloaded after replacing valid nexthop with a valid one"
+
+ ip nexthop replace id 1 via 192.0.2.4 dev $swp1
+ busywait "$TIMEOUT" not wait_for_offload \
+ ip route show 198.51.100.0/24
+ check_err $? "route marked as offloaded after replacing valid nexthop with an invalid one"
+
+ ip nexthop replace id 1 via 192.0.2.2 dev $swp1
+ busywait "$TIMEOUT" wait_for_offload \
+ ip route show 198.51.100.0/24
+ check_err $? "route not marked as offloaded after replacing invalid nexthop with a valid one"
+
+ log_test "routes using nexthop objects offload indication"
+
+ ip route del 198.51.100.0/24
+ ip neigh del 192.0.2.3 dev $swp1
+ ip neigh del 192.0.2.2 dev $swp1
+ ip nexthop del id 1
+
+ simple_if_fini $swp2
+ simple_if_fini $swp1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+bridge_locked_port_test()
+{
+ RET=0
+
+ ip link add name br1 up type bridge vlan_filtering 0
+
+ ip link add link $swp1 name $swp1.10 type vlan id 10
+ ip link set dev $swp1.10 master br1
+
+ bridge link set dev $swp1.10 locked on
+ check_fail $? "managed to set locked flag on a VLAN upper"
+
+ ip link set dev $swp1.10 nomaster
+ ip link set dev $swp1 master br1
+
+ bridge link set dev $swp1 locked on
+ check_fail $? "managed to set locked flag on a bridge port that has a VLAN upper"
+
+ ip link del dev $swp1.10
+ bridge link set dev $swp1 locked on
+
+ ip link add link $swp1 name $swp1.10 type vlan id 10
+ check_fail $? "managed to configure a VLAN upper on a locked port"
+
+ log_test "bridge locked port"
+
+ ip link del dev $swp1.10 &> /dev/null
+ ip link del dev br1
+}
+
devlink_reload_test()
{
# Test that after executing all the above configuration tests, a
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
index 94c37124a840..139175fd03e7 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_ets.sh
@@ -5,7 +5,6 @@
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/sch_ets_core.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
ALL_TESTS="
ping_ipv4
@@ -15,23 +14,31 @@ ALL_TESTS="
ets_test_dwrr
"
+PARENT="parent 3:3"
+
switch_create()
{
- ets_switch_create
-
# Create a bottleneck so that the DWRR process can kick in.
- ethtool -s $h2 speed 1000 autoneg off
- ethtool -s $swp2 speed 1000 autoneg off
+ tc qdisc replace dev $swp2 root handle 3: tbf rate 1gbit \
+ burst 128K limit 1G
+
+ ets_switch_create
# Set the ingress quota high and use the three egress TCs to limit the
# amount of traffic that is admitted to the shared buffers. This makes
# sure that there is always enough traffic of all types to select from
# for the DWRR process.
+ devlink_port_pool_th_save $swp1 0
devlink_port_pool_th_set $swp1 0 12
+ devlink_tc_bind_pool_th_save $swp1 0 ingress
devlink_tc_bind_pool_th_set $swp1 0 ingress 0 12
+ devlink_port_pool_th_save $swp2 4
devlink_port_pool_th_set $swp2 4 12
+ devlink_tc_bind_pool_th_save $swp2 7 egress
devlink_tc_bind_pool_th_set $swp2 7 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 6 egress
devlink_tc_bind_pool_th_set $swp2 6 egress 4 5
+ devlink_tc_bind_pool_th_save $swp2 5 egress
devlink_tc_bind_pool_th_set $swp2 5 egress 4 5
# Note: sch_ets_core.sh uses VLAN ingress-qos-map to assign packet
@@ -49,10 +56,9 @@ switch_destroy()
devlink_tc_bind_pool_th_restore $swp1 0 ingress
devlink_port_pool_th_restore $swp1 0
- ethtool -s $swp2 autoneg on
- ethtool -s $h2 autoneg on
-
ets_switch_destroy
+
+ tc qdisc del dev $swp2 root handle 3:
}
# Callback from sch_ets_tests.sh
@@ -71,5 +77,5 @@ collect_stats()
done
}
-bail_on_lldpad
+bail_on_lldpad "configure DCB" "configure Qdiscs"
ets_run
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
new file mode 100755
index 000000000000..071a33d10c20
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_offload.sh
@@ -0,0 +1,290 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test qdisc offload indication
+
+
+ALL_TESTS="
+ test_root
+ test_port_tbf
+ test_etsprio
+ test_etsprio_port_tbf
+"
+NUM_NETIFS=1
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/lib.sh
+
+check_not_offloaded()
+{
+ local handle=$1; shift
+ local h
+ local offloaded
+
+ h=$(qdisc_stats_get $h1 "$handle" .handle)
+ [[ $h == '"'$handle'"' ]]
+ check_err $? "Qdisc with handle $handle does not exist"
+
+ offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+ [[ $offloaded == true ]]
+ check_fail $? "Qdisc with handle $handle offloaded, but should not be"
+}
+
+check_all_offloaded()
+{
+ local handle=$1; shift
+
+ if [[ ! -z $handle ]]; then
+ local offloaded=$(qdisc_stats_get $h1 "$handle" .offloaded)
+ [[ $offloaded == true ]]
+ check_err $? "Qdisc with handle $handle not offloaded"
+ fi
+
+ local unoffloaded=$(tc q sh dev $h1 invisible |
+ grep -v offloaded |
+ sed s/root/parent\ root/ |
+ cut -d' ' -f 5)
+ [[ -z $unoffloaded ]]
+ check_err $? "Qdiscs with following parents not offloaded: $unoffloaded"
+
+ pre_cleanup
+}
+
+with_ets()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ ets bands 8 priomap 7 6 5 4 3 2 1 0
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_prio()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ prio bands 8 priomap 7 6 5 4 3 2 1 0
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_red()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ red limit 1000000 min 200000 max 300000 probability 0.5 avpkt 1500
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_tbf()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle \
+ tbf rate 400Mbit burst 128K limit 1M
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_pfifo()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle pfifo limit 100K
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_bfifo()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle bfifo limit 100K
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_drr()
+{
+ local handle=$1; shift
+ local locus=$1; shift
+
+ tc qdisc add dev $h1 $locus handle $handle drr
+ "$@"
+ tc qdisc del dev $h1 $locus
+}
+
+with_qdiscs()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+ local kind=$1; shift
+ local next_handle=$((handle * 2))
+ local locus;
+
+ if [[ $kind == "--" ]]; then
+ local cmd=$1; shift
+ $cmd $(printf %x: $parent) "$@"
+ else
+ if ((parent == 0)); then
+ locus=root
+ else
+ locus=$(printf "parent %x:1" $parent)
+ fi
+
+ with_$kind $(printf %x: $handle) "$locus" \
+ with_qdiscs $next_handle $handle "$@"
+ fi
+}
+
+get_name()
+{
+ local parent=$1; shift
+ local name=$(echo "" "${@^^}" | tr ' ' -)
+
+ if ((parent != 0)); then
+ kind=$(qdisc_stats_get $h1 $parent: .kind)
+ kind=${kind%\"}
+ kind=${kind#\"}
+ name="-${kind^^}$name"
+ fi
+
+ echo root$name
+}
+
+do_test_offloaded()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ RET=0
+ with_qdiscs $handle $parent "$@" -- check_all_offloaded
+ log_test $(get_name $parent "$@")" offloaded"
+}
+
+do_test_nooffload()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ local name=$(echo "${@^^}" | tr ' ' -)
+ local kind
+
+ RET=0
+ with_qdiscs $handle $parent "$@" -- check_not_offloaded
+ log_test $(get_name $parent "$@")" not offloaded"
+}
+
+do_test_combinations()
+{
+ local handle=$1; shift
+ local parent=$1; shift
+
+ local cont
+ local leaf
+ local fifo
+
+ for cont in "" ets prio; do
+ for leaf in "" red tbf "red tbf" "tbf red"; do
+ for fifo in "" pfifo bfifo; do
+ if [[ -z "$cont$leaf$fifo" ]]; then
+ continue
+ fi
+ do_test_offloaded $handle $parent \
+ $cont $leaf $fifo
+ done
+ done
+ done
+
+ for cont in ets prio; do
+ for leaf in red tbf; do
+ do_test_nooffload $handle $parent $cont red tbf $leaf
+ do_test_nooffload $handle $parent $cont tbf red $leaf
+ done
+ for leaf in "red red" "tbf tbf"; do
+ do_test_nooffload $handle $parent $cont $leaf
+ done
+ done
+
+ do_test_nooffload $handle $parent drr
+}
+
+test_root()
+{
+ do_test_combinations 1 0
+}
+
+test_port_tbf()
+{
+ with_tbf 1: root \
+ do_test_combinations 8 1
+}
+
+do_test_etsprio()
+{
+ local parent=$1; shift
+ local tbfpfx=$1; shift
+ local cont
+
+ for cont in ets prio; do
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 11: "parent 8:1" \
+ with_red 12: "parent 8:2" \
+ with_tbf 13: "parent 8:3" \
+ with_tbf 14: "parent 8:4" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED,TBF} offloaded"
+
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 81: "parent 8:1" \
+ with_tbf 811: "parent 81:1" \
+ with_tbf 84: "parent 8:4" \
+ with_red 841: "parent 84:1" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED-TBF,TBF-RED} offloaded"
+
+ RET=0
+ with_$cont 8: "$parent" \
+ with_red 81: "parent 8:1" \
+ with_tbf 811: "parent 81:1" \
+ with_bfifo 8111: "parent 811:1" \
+ with_tbf 82: "parent 8:2" \
+ with_red 821: "parent 82:1" \
+ with_bfifo 8211: "parent 821:1" \
+ check_all_offloaded
+ log_test "root$tbfpfx-ETS-{RED-TBF-bFIFO,TBF-RED-bFIFO} offloaded"
+ done
+}
+
+test_etsprio()
+{
+ do_test_etsprio root ""
+}
+
+test_etsprio_port_tbf()
+{
+ with_tbf 1: root \
+ do_test_etsprio "parent 1:1" "-TBF"
+}
+
+cleanup()
+{
+ tc qdisc del dev $h1 root &>/dev/null
+}
+
+trap cleanup EXIT
+h1=${NETIFS[p1]}
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
index 0d347d48c112..299e06a5808c 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_core.sh
@@ -73,7 +73,7 @@ CHECK_TC="yes"
lib_dir=$(dirname $0)/../../../net/forwarding
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
-source qos_lib.sh
+source mlxsw_lib.sh
ipaddr()
{
@@ -121,6 +121,7 @@ h1_destroy()
h2_create()
{
host_create $h2 2
+ tc qdisc add dev $h2 clsact
# Some of the tests in this suite use multicast traffic. As this traffic
# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
@@ -133,26 +134,27 @@ h2_create()
# cause packets to fail to queue up at $swp3 due to shared buffer
# quotas, and the test to spuriously fail.
#
- # Prevent this by setting the speed of $h2 to 1Gbps.
+ # Prevent this by adding a shaper which limits the traffic in $h2 to
+ # 1Gbps.
- ethtool -s $h2 speed 1000 autoneg off
+ tc qdisc replace dev $h2 root handle 10: tbf rate 1gbit \
+ burst 128K limit 1G
}
h2_destroy()
{
- ethtool -s $h2 autoneg on
+ tc qdisc del dev $h2 root handle 10:
+ tc qdisc del dev $h2 clsact
host_destroy $h2
}
h3_create()
{
host_create $h3 3
- ethtool -s $h3 speed 1000 autoneg off
}
h3_destroy()
{
- ethtool -s $h3 autoneg on
host_destroy $h3
}
@@ -196,8 +198,9 @@ switch_create()
done
done
- for intf in $swp2 $swp3 $swp4 $swp5; do
- ethtool -s $intf speed 1000 autoneg off
+ for intf in $swp3 $swp4; do
+ tc qdisc replace dev $intf root handle 1: tbf rate 1gbit \
+ burst 128K limit 1G
done
ip link set dev br1_10 up
@@ -206,6 +209,7 @@ switch_create()
ip link set dev br2_11 up
local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
+ devlink_port_pool_th_save $swp3 8
devlink_port_pool_th_set $swp3 8 $size
}
@@ -216,15 +220,13 @@ switch_destroy()
devlink_port_pool_th_restore $swp3 8
- tc qdisc del dev $swp3 root 2>/dev/null
-
ip link set dev br2_11 down
ip link set dev br2_10 down
ip link set dev br1_11 down
ip link set dev br1_10 down
- for intf in $swp5 $swp4 $swp3 $swp2; do
- ethtool -s $intf autoneg on
+ for intf in $swp4 $swp3; do
+ tc qdisc del dev $intf root handle 1:
done
for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
@@ -328,6 +330,14 @@ get_nmarked()
ethtool_stats_get $swp3 ecn_marked
}
+get_qdisc_nmarked()
+{
+ local vlan=$1; shift
+
+ busywait_for_counter 1100 +1 \
+ qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .marked
+}
+
get_qdisc_npackets()
{
local vlan=$1; shift
@@ -336,6 +346,17 @@ get_qdisc_npackets()
qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
}
+send_packets()
+{
+ local vlan=$1; shift
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
+ -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
+ -t $proto -q -c $pkts "$@"
+}
+
# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
# success. After 10 failed attempts it bails out and returns 1. It dumps the
# backlog size to stdout.
@@ -364,22 +385,21 @@ build_backlog()
return 1
fi
- $MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
- -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
- -t $proto -q -c $pkts "$@"
+ send_packets $vlan $proto $pkts "$@"
done
}
check_marking()
{
+ local get_nmarked=$1; shift
local vlan=$1; shift
local cond=$1; shift
local npackets_0=$(get_qdisc_npackets $vlan)
- local nmarked_0=$(get_nmarked $vlan)
+ local nmarked_0=$($get_nmarked $vlan)
sleep 5
local npackets_1=$(get_qdisc_npackets $vlan)
- local nmarked_1=$(get_nmarked $vlan)
+ local nmarked_1=$($get_nmarked $vlan)
local nmarked_d=$((nmarked_1 - nmarked_0))
local npackets_d=$((npackets_1 - npackets_0))
@@ -392,6 +412,7 @@ check_marking()
ecn_test_common()
{
local name=$1; shift
+ local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
local backlog
@@ -404,7 +425,7 @@ ecn_test_common()
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking "$get_nmarked" $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): $name backlog < limit"
@@ -414,22 +435,23 @@ ecn_test_common()
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan ">= 95")
+ pct=$(check_marking "$get_nmarked" $vlan ">= 95")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
log_test "TC $((vlan - 10)): $name backlog > limit"
}
-do_ecn_test()
+__do_ecn_test()
{
+ local get_nmarked=$1; shift
local vlan=$1; shift
local limit=$1; shift
- local name=ECN
+ local name=${1-ECN}; shift
start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
$h3_mac tos=0x01
sleep 1
- ecn_test_common "$name" $vlan $limit
+ ecn_test_common "$name" "$get_nmarked" $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, it should all get dropped, and backlog
@@ -443,6 +465,23 @@ do_ecn_test()
sleep 1
}
+do_ecn_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ __do_ecn_test get_nmarked "$vlan" "$limit"
+}
+
+do_ecn_test_perband()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ mlxsw_only_on_spectrum 3+ || return
+ __do_ecn_test get_qdisc_nmarked "$vlan" "$limit" "per-band ECN"
+}
+
do_ecn_nodrop_test()
{
local vlan=$1; shift
@@ -453,7 +492,7 @@ do_ecn_nodrop_test()
$h3_mac tos=0x01
sleep 1
- ecn_test_common "$name" $vlan $limit
+ ecn_test_common "$name" get_nmarked $vlan $limit
# Up there we saw that UDP gets accepted when backlog is below the
# limit. Now that it is above, in nodrop mode, make sure it goes to
@@ -483,7 +522,7 @@ do_red_test()
RET=0
backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
check_err $? "Could not build the requested backlog"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
log_test "TC $((vlan - 10)): RED backlog < limit"
@@ -491,12 +530,12 @@ do_red_test()
RET=0
backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
check_fail $? "Traffic went into backlog instead of being early-dropped"
- pct=$(check_marking $vlan "== 0")
+ pct=$(check_marking get_nmarked $vlan "== 0")
check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
local diff=$((limit - backlog))
pct=$((100 * diff / limit))
- ((0 <= pct && pct <= 5))
- check_err $? "backlog $backlog / $limit expected <= 5% distance"
+ ((-10 <= pct && pct <= 10))
+ check_err $? "backlog $backlog / $limit expected <= 10% distance"
log_test "TC $((vlan - 10)): RED backlog > limit"
stop_traffic
@@ -531,3 +570,191 @@ do_mc_backlog_test()
log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
}
+
+do_mark_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local subtest=$1; shift
+ local fetch_counter=$1; shift
+ local should_fail=$1; shift
+ local base
+
+ mlxsw_only_on_spectrum 2+ || return
+
+ RET=0
+
+ start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
+ $h3_mac tos=0x01
+
+ # Create a bit of a backlog and observe no mirroring due to marks.
+ qevent_rule_install_$subtest
+
+ build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01 >/dev/null
+
+ base=$($fetch_counter)
+ count=$(busywait 1100 until_counter_is ">= $((base + 1))" \
+ $fetch_counter)
+ check_fail $? "Spurious packets ($base -> $count) observed without buffer pressure"
+
+ # Above limit, everything should be mirrored, we should see lots of
+ # packets.
+ build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01 >/dev/null
+ busywait_for_counter 1100 +10000 \
+ $fetch_counter > /dev/null
+ check_err_fail "$should_fail" $? "ECN-marked packets $subtest'd"
+
+ # When the rule is uninstalled, there should be no mirroring.
+ qevent_rule_uninstall_$subtest
+ busywait_for_counter 1100 +10 \
+ $fetch_counter > /dev/null
+ check_fail $? "Spurious packets observed after uninstall"
+
+ if ((should_fail)); then
+ log_test "TC $((vlan - 10)): marked packets not $subtest'd"
+ else
+ log_test "TC $((vlan - 10)): marked packets $subtest'd"
+ fi
+
+ stop_traffic
+ sleep 1
+}
+
+do_drop_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trigger=$1; shift
+ local subtest=$1; shift
+ local fetch_counter=$1; shift
+ local base
+ local now
+
+ mlxsw_only_on_spectrum 2+ || return
+
+ RET=0
+
+ start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
+
+ # Create a bit of a backlog and observe no mirroring due to drops.
+ qevent_rule_install_$subtest
+ base=$($fetch_counter)
+
+ build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
+
+ busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed without buffer pressure"
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc and then by the driver, so this is the best we
+ # can do to get to the real limit of the system.
+ build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
+
+ base=$($fetch_counter)
+ send_packets $vlan udp 11
+
+ now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
+ check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
+
+ # When no extra traffic is injected, there should be no mirroring.
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed"
+
+ # When the rule is uninstalled, there should be no mirroring.
+ qevent_rule_uninstall_$subtest
+ send_packets $vlan udp 11
+ busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
+ check_fail $? "Spurious packets observed after uninstall"
+
+ log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
+
+ stop_traffic
+ sleep 1
+}
+
+qevent_rule_install_mirror()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action mirred egress mirror dev $swp2 hw_stats disabled
+}
+
+qevent_rule_uninstall_mirror()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_mirror()
+{
+ tc_rule_handle_stats_get "dev $h2 ingress" 101
+}
+
+do_drop_mirror_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local qevent_name=$1; shift
+
+ tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+ flower skip_sw ip_proto udp \
+ action drop
+
+ do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
+ qevent_counter_fetch_mirror
+
+ tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+do_mark_mirror_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+
+ tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
+ flower skip_sw ip_proto tcp \
+ action drop
+
+ do_mark_test "$vlan" "$limit" mirror \
+ qevent_counter_fetch_mirror \
+ $(: should_fail=)0
+
+ tc filter del dev $h2 ingress pref 1 handle 101 flower
+}
+
+qevent_rule_install_trap()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action trap hw_stats disabled
+}
+
+qevent_rule_uninstall_trap()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
+
+qevent_counter_fetch_trap()
+{
+ local trap_name=$1; shift
+
+ devlink_trap_rx_packets_get "$trap_name"
+}
+
+do_drop_trap_test()
+{
+ local vlan=$1; shift
+ local limit=$1; shift
+ local trap_name=$1; shift
+
+ do_drop_test "$vlan" "$limit" "$trap_name" trap \
+ "qevent_counter_fetch_trap $trap_name"
+}
+
+qevent_rule_install_trap_fwd()
+{
+ tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
+ action trap_fwd hw_stats disabled
+}
+
+qevent_rule_uninstall_trap_fwd()
+{
+ tc filter del block 10 pref 1234 handle 102 matchall
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
index 1c36c576613b..8ecddafa79b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_ets.sh
@@ -4,9 +4,13 @@
ALL_TESTS="
ping_ipv4
ecn_test
+ ecn_test_perband
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
+ red_trap_test
+ ecn_mirror_test
"
: ${QDISC:=ets}
source sch_red_core.sh
@@ -19,26 +23,58 @@ source sch_red_core.sh
BACKLOG1=200000
BACKLOG2=500000
-install_qdisc()
+install_root_qdisc()
+{
+ tc qdisc add dev $swp3 parent 1: handle 10: $QDISC \
+ bands 8 priomap 7 6 5 4 3 2 1 0
+}
+
+install_qdisc_tc0()
{
local -a args=("$@")
- tc qdisc add dev $swp3 root handle 10: $QDISC \
- bands 8 priomap 7 6 5 4 3 2 1 0
tc qdisc add dev $swp3 parent 10:8 handle 108: red \
limit 1000000 min $BACKLOG1 max $((BACKLOG1 + 1)) \
probability 1.0 avpkt 8000 burst 38 "${args[@]}"
+}
+
+install_qdisc_tc1()
+{
+ local -a args=("$@")
+
tc qdisc add dev $swp3 parent 10:7 handle 107: red \
limit 1000000 min $BACKLOG2 max $((BACKLOG2 + 1)) \
probability 1.0 avpkt 8000 burst 63 "${args[@]}"
+}
+
+install_qdisc()
+{
+ install_root_qdisc
+ install_qdisc_tc0 "$@"
+ install_qdisc_tc1 "$@"
sleep 1
}
-uninstall_qdisc()
+uninstall_qdisc_tc0()
{
- tc qdisc del dev $swp3 parent 10:7
tc qdisc del dev $swp3 parent 10:8
- tc qdisc del dev $swp3 root
+}
+
+uninstall_qdisc_tc1()
+{
+ tc qdisc del dev $swp3 parent 10:7
+}
+
+uninstall_root_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:
+}
+
+uninstall_qdisc()
+{
+ uninstall_qdisc_tc0
+ uninstall_qdisc_tc1
+ uninstall_root_qdisc
}
ecn_test()
@@ -51,6 +87,16 @@ ecn_test()
uninstall_qdisc
}
+ecn_test_perband()
+{
+ install_qdisc ecn
+
+ do_ecn_test_perband 10 $BACKLOG1
+ do_ecn_test_perband 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
ecn_nodrop_test()
{
install_qdisc ecn nodrop
@@ -65,6 +111,13 @@ red_test()
{
install_qdisc
+ # Make sure that we get the non-zero value if there is any.
+ local cur=$(busywait 1100 until_counter_is "> 0" \
+ qdisc_stats_get $swp3 10: .backlog)
+ (( cur == 0 ))
+ check_err $? "backlog of $cur observed on non-busy qdisc"
+ log_test "$QDISC backlog properly cleaned"
+
do_red_test 10 $BACKLOG1
do_red_test 11 $BACKLOG2
@@ -83,12 +136,41 @@ mc_backlog_test()
uninstall_qdisc
}
-trap cleanup EXIT
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_mirror_test 10 $BACKLOG1 early_drop
+ do_drop_mirror_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
+red_trap_test()
+{
+ install_qdisc qevent early_drop block 10
+
+ do_drop_trap_test 10 $BACKLOG1 early_drop
+ do_drop_trap_test 11 $BACKLOG2 early_drop
+
+ uninstall_qdisc
+}
+
+ecn_mirror_test()
+{
+ install_qdisc ecn qevent mark block 10
+ do_mark_mirror_test 10 $BACKLOG1
+ do_mark_mirror_test 11 $BACKLOG2
+
+ uninstall_qdisc
+}
+
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
index 558667ea11ec..159108d02895 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_red_root.sh
@@ -4,9 +4,11 @@
ALL_TESTS="
ping_ipv4
ecn_test
+ ecn_test_perband
ecn_nodrop_test
red_test
mc_backlog_test
+ red_mirror_test
"
source sch_red_core.sh
@@ -16,7 +18,7 @@ install_qdisc()
{
local -a args=("$@")
- tc qdisc add dev $swp3 root handle 108: red \
+ tc qdisc add dev $swp3 parent 1: handle 108: red \
limit 1000000 min $BACKLOG max $((BACKLOG + 1)) \
probability 1.0 avpkt 8000 burst 38 "${args[@]}"
sleep 1
@@ -24,7 +26,7 @@ install_qdisc()
uninstall_qdisc()
{
- tc qdisc del dev $swp3 root
+ tc qdisc del dev $swp3 parent 1:
}
ecn_test()
@@ -34,6 +36,13 @@ ecn_test()
uninstall_qdisc
}
+ecn_test_perband()
+{
+ install_qdisc ecn
+ do_ecn_test_perband 10 $BACKLOG
+ uninstall_qdisc
+}
+
ecn_nodrop_test()
{
install_qdisc ecn nodrop
@@ -57,12 +66,18 @@ mc_backlog_test()
uninstall_qdisc
}
-trap cleanup EXIT
+red_mirror_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_drop_mirror_test 10 $BACKLOG
+ uninstall_qdisc
+}
+bail_on_lldpad "configure DCB" "configure Qdiscs"
+
+trap cleanup EXIT
setup_prepare
setup_wait
-
-bail_on_lldpad
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
index c6ce0b448bf3..ecc3664376b3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_ets.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
index 8d245f331619..2e0a4efb1703 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_prio.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
index 013886061f15..6679a338dfc4 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sch_tbf_root.sh
@@ -1,8 +1,10 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-source qos_lib.sh
-bail_on_lldpad
+sch_tbf_pre_hook()
+{
+ bail_on_lldpad "configure DCB" "configure Qdiscs"
+}
lib_dir=$(dirname $0)/../../../net/forwarding
TCFLAGS=skip_sw
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 7d9e73a43a49..0c47faff9274 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -98,12 +98,12 @@ sb_occ_etc_check()
port_pool_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -126,12 +126,12 @@ port_pool_test()
port_tc_ip_test()
{
- local exp_max_occ=288
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
+ $MZ $h1 -c 1 -p 10 -a $h1mac -b $h2mac -A 192.0.1.1 -B 192.0.1.2 \
-t ip -q
devlink sb occupancy snapshot $DEVLINK_DEV
@@ -154,16 +154,12 @@ port_tc_ip_test()
port_tc_arp_test()
{
- local exp_max_occ=96
+ local exp_max_occ=$(devlink_cell_size_get)
local max_occ
- if [[ $MLXSW_CHIP != "mlxsw_spectrum" ]]; then
- exp_max_occ=144
- fi
-
devlink sb occupancy clearmax $DEVLINK_DEV
- $MZ $h1 -c 1 -p 160 -a $h1mac -A 192.0.1.1 -t arp -q
+ $MZ $h1 -c 1 -p 10 -a $h1mac -A 192.0.1.1 -t arp -q
devlink sb occupancy snapshot $DEVLINK_DEV
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
index 0d4b9327c9b3..2223337eed0c 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer_configuration.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python
+#!/usr/bin/env python
# SPDX-License-Identifier: GPL-2.0
import subprocess
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
new file mode 120000
index 000000000000..bd670d9dc4e5
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_range_scale.sh
@@ -0,0 +1 @@
+../spectrum/port_range_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get physical_ports)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
index fd583a171db7..a88d8a8c85f2 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/resource_scale.sh
@@ -7,12 +7,9 @@ NUM_NETIFS=6
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
source $lib_dir/devlink_lib.sh
+source ../mlxsw_lib.sh
-if [[ "$DEVLINK_VIDDID" != "15b3:cf6c" && \
- "$DEVLINK_VIDDID" != "15b3:cf70" ]]; then
- echo "SKIP: test is tailored for Mellanox Spectrum-2 and Spectrum-3"
- exit 1
-fi
+mlxsw_only_on_spectrum 2+ || exit 1
current_test=""
@@ -28,8 +25,19 @@ cleanup()
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+ port_range
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -38,18 +46,35 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' should_fail=$should_fail test"
+ continue
+ fi
+
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource changed
+ # following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
- devlink_reload
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]; then
+ $tt "$target"
+ log_test "'$current_test' $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' overflow $target"
fi
+ ${current_test}_cleanup $target
+ devlink_reload
+ RET_FIN=$(( RET_FIN || RET ))
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
new file mode 120000
index 000000000000..1f5752e8ffc0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_counter_scale.sh
@@ -0,0 +1 @@
+../spectrum/rif_counter_scale.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get rif_mac_profiles)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
index fb850e0ec837..31252bc8775e 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower.sh
@@ -10,7 +10,8 @@ lib_dir=$(dirname $0)/../../../../net/forwarding
ALL_TESTS="single_mask_test identical_filters_test two_masks_test \
multiple_masks_test ctcam_edge_cases_test delta_simple_test \
delta_two_masks_one_key_test delta_simple_rehash_test \
- bloom_simple_test bloom_complex_test bloom_delta_test"
+ bloom_simple_test bloom_complex_test bloom_delta_test \
+ max_erp_entries_test max_group_size_test"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/tc_common.sh
@@ -868,7 +869,7 @@ bloom_simple_test()
bloom_complex_test()
{
# Bloom filter index computation is affected from region ID, eRP
- # ID and from the region key size. In order to excercise those parts
+ # ID and from the region key size. In order to exercise those parts
# of the Bloom filter code, use a series of regions, each with a
# different key size and send packet that should hit all of them.
local index
@@ -983,6 +984,109 @@ bloom_delta_test()
log_test "bloom delta test ($tcflags)"
}
+max_erp_entries_test()
+{
+ # The number of eRP entries is limited. Once the maximum number of eRPs
+ # has been reached, filters cannot be added. This test verifies that
+ # when this limit is reached, inserstion fails without crashing.
+
+ RET=0
+
+ local num_masks=32
+ local num_regions=15
+ local chain_failed
+ local mask_failed
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ for ((i=1; i < $num_regions; i++)); do
+ for ((j=$num_masks; j >= 0; j--)); do
+ tc filter add dev $h2 ingress chain $i protocol ip \
+ pref $i handle $j flower $tcflags \
+ dst_ip 192.1.0.0/$j &> /dev/null
+ ret=$?
+
+ if [ $ret -ne 0 ]; then
+ chain_failed=$i
+ mask_failed=$j
+ break 2
+ fi
+ done
+ done
+
+ # We expect to exceed the maximum number of eRP entries, so that
+ # insertion eventually fails. Otherwise, the test should be adjusted to
+ # add more filters.
+ check_fail $ret "expected to exceed number of eRP entries"
+
+ for ((; i >= 1; i--)); do
+ for ((j=0; j <= $num_masks; j++)); do
+ tc filter del dev $h2 ingress chain $i protocol ip \
+ pref $i handle $j flower &> /dev/null
+ done
+ done
+
+ log_test "max eRP entries test ($tcflags). " \
+ "max chain $chain_failed, mask $mask_failed"
+}
+
+max_group_size_test()
+{
+ # The number of ACLs in an ACL group is limited. Once the maximum
+ # number of ACLs has been reached, filters cannot be added. This test
+ # verifies that when this limit is reached, insertion fails without
+ # crashing.
+
+ RET=0
+
+ local num_acls=32
+ local max_size
+ local ret
+
+ if [[ "$tcflags" != "skip_sw" ]]; then
+ return 0;
+ fi
+
+ for ((i=1; i < $num_acls; i++)); do
+ if [[ $(( i % 2 )) == 1 ]]; then
+ tc filter add dev $h2 ingress pref $i proto ipv4 \
+ flower $tcflags dst_ip 198.51.100.1/32 \
+ ip_proto tcp tcp_flags 0x01/0x01 \
+ action drop &> /dev/null
+ else
+ tc filter add dev $h2 ingress pref $i proto ipv6 \
+ flower $tcflags dst_ip 2001:db8:1::1/128 \
+ action drop &> /dev/null
+ fi
+
+ ret=$?
+ [[ $ret -ne 0 ]] && max_size=$((i - 1)) && break
+ done
+
+ # We expect to exceed the maximum number of ACLs in a group, so that
+ # insertion eventually fails. Otherwise, the test should be adjusted to
+ # add more filters.
+ check_fail $ret "expected to exceed number of ACLs in a group"
+
+ for ((; i >= 1; i--)); do
+ if [[ $(( i % 2 )) == 1 ]]; then
+ tc filter del dev $h2 ingress pref $i proto ipv4 \
+ flower $tcflags dst_ip 198.51.100.1/32 \
+ ip_proto tcp tcp_flags 0x01/0x01 \
+ action drop &> /dev/null
+ else
+ tc filter del dev $h2 ingress pref $i proto ipv6 \
+ flower $tcflags dst_ip 2001:db8:1::1/128 \
+ action drop &> /dev/null
+ fi
+ done
+
+ log_test "max ACL group size test ($tcflags). max size $max_size"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
index efd798a85931..4444bbace1a9 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_flower_scale.sh
@@ -4,17 +4,22 @@ source ../tc_flower_scale.sh
tc_flower_get_target()
{
local should_fail=$1; shift
+ local max_cnts
# The driver associates a counter with each tc filter, which means the
# number of supported filters is bounded by the number of available
# counters.
- # Currently, the driver supports 30K (30,720) flow counters and six of
- # these are used for multicast routing.
- local target=30714
+ max_cnts=$(devlink_resource_size_get counters flow)
+
+ # Remove already allocated counters.
+ ((max_cnts -= $(devlink_resource_occ_get counters flow)))
+
+ # Each rule uses two counters, for packets and bytes.
+ ((max_cnts /= 2))
if ((! should_fail)); then
- echo $target
+ echo $max_cnts
else
- echo $((target + 1))
+ echo $((max_cnts + 1))
fi
}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..fd23c80eba31
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum-2/vxlan_flooding_ipv6.sh
@@ -0,0 +1,339 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to four IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..17} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 16 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=16
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 10..13
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 1 1 1 1 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 3 3 3 3 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+
+ $MZ $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 4 4 4 4 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 4 5 5 5 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 6 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 4 5 6 7 1 1 1 1 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
index 73035e25085d..06a80f40daa4 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/devlink_lib_spectrum.sh
@@ -2,11 +2,9 @@
# SPDX-License-Identifier: GPL-2.0
source "../../../../net/forwarding/devlink_lib.sh"
+source ../mlxsw_lib.sh
-if [ "$DEVLINK_VIDDID" != "15b3:cb84" ]; then
- echo "SKIP: test is tailored for Mellanox Spectrum"
- exit 1
-fi
+mlxsw_only_on_spectrum 1 || exit 1
# Needed for returning to default
declare -A KVD_DEFAULTS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
new file mode 100644
index 000000000000..d0847e8ea270
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_range_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_range_scale.sh
+
+port_range_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get port_range_registers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
new file mode 100644
index 000000000000..0b71dfbbb447
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/port_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../port_scale.sh
+
+port_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get physical_ports)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
new file mode 100755
index 000000000000..60753d46a2d4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/q_in_vni_veto.sh
@@ -0,0 +1,67 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+VXPORT=4789
+
+ALL_TESTS="
+ create_vxlan_on_top_of_8021ad_bridge
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+create_vxlan_on_top_of_8021ad_bridge()
+{
+ RET=0
+
+ ip link add dev br0 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+
+ ip link add name vx100 type vxlan id 1000 local 192.0.2.17 dstport \
+ "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev vx100 master br0
+
+ bridge vlan add vid 100 dev vx100 pvid untagged 2>/dev/null
+ check_fail $? "802.1ad bridge with VxLAN in Spectrum-1 not rejected"
+
+ bridge vlan add vid 100 dev vx100 pvid untagged 2>&1 >/dev/null \
+ | grep -q mlxsw_spectrum
+ check_err $? "802.1ad bridge with VxLAN in Spectrum-1 rejected without extack"
+
+ log_test "create VxLAN on top of 802.1ad bridge"
+
+ ip link del dev vx100
+ ip link del dev br0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
index 43ba1b438f6d..f981c957f097 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/resource_scale.sh
@@ -22,8 +22,19 @@ cleanup()
devlink_sp_read_kvd_defaults
trap cleanup EXIT
-ALL_TESTS="router tc_flower mirror_gre"
+ALL_TESTS="
+ router
+ tc_flower
+ mirror_gre
+ tc_police
+ port
+ rif_mac_profile
+ rif_counter
+ port_range
+"
+
for current_test in ${TESTS:-$ALL_TESTS}; do
+ RET_FIN=0
source ${current_test}_scale.sh
num_netifs_var=${current_test^^}_NUM_NETIFS
@@ -40,18 +51,35 @@ for current_test in ${TESTS:-$ALL_TESTS}; do
for should_fail in 0 1; do
RET=0
target=$(${current_test}_get_target "$should_fail")
+ if ((target == 0)); then
+ log_test_skip "'$current_test' [$profile] should_fail=$should_fail test"
+ continue
+ fi
${current_test}_setup_prepare
setup_wait $num_netifs
+ # Update target in case occupancy of a certain resource
+ # changed following the test setup.
+ target=$(${current_test}_get_target "$should_fail")
${current_test}_test "$target" "$should_fail"
- ${current_test}_cleanup
if [[ "$should_fail" -eq 0 ]]; then
log_test "'$current_test' [$profile] $target"
+
+ if ((!RET)); then
+ tt=${current_test}_traffic_test
+ if [[ $(type -t $tt) == "function" ]]
+ then
+ $tt "$target"
+ log_test "'$current_test' [$profile] $target traffic test"
+ fi
+ fi
else
log_test "'$current_test' [$profile] overflow $target"
fi
+ ${current_test}_cleanup $target
+ RET_FIN=$(( RET_FIN || RET ))
done
done
done
current_test=""
-exit "$RET"
+exit "$RET_FIN"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
new file mode 100644
index 000000000000..d44536276e8a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_counter_scale.sh
@@ -0,0 +1,34 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_counter_scale.sh
+
+rif_counter_get_target()
+{
+ local should_fail=$1; shift
+ local max_cnts
+ local max_rifs
+ local target
+
+ max_rifs=$(devlink_resource_size_get rifs)
+ max_cnts=$(devlink_resource_size_get counters rif)
+
+ # Remove already allocated RIFs.
+ ((max_rifs -= $(devlink_resource_occ_get rifs)))
+
+ # 10 KVD slots per counter, ingress+egress counters per RIF
+ ((max_cnts /= 20))
+
+ # Pointless to run the overflow test if we don't have enough RIFs to
+ # host all the counters.
+ if ((max_cnts > max_rifs && should_fail)); then
+ echo 0
+ return
+ fi
+
+ target=$((max_rifs < max_cnts ? max_rifs : max_cnts))
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
new file mode 100644
index 000000000000..303d7cbe3c45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/rif_mac_profile_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../rif_mac_profile_scale.sh
+
+rif_mac_profile_get_target()
+{
+ local should_fail=$1
+ local target
+
+ target=$(devlink_resource_size_get rif_mac_profiles)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
new file mode 100644
index 000000000000..e79ac0dad1f4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/tc_police_scale.sh
@@ -0,0 +1,16 @@
+# SPDX-License-Identifier: GPL-2.0
+source ../tc_police_scale.sh
+
+tc_police_get_target()
+{
+ local should_fail=$1; shift
+ local target
+
+ target=$(devlink_resource_size_get global_policers single_rate_policers)
+
+ if ((! should_fail)); then
+ echo $target
+ else
+ echo $((target + 1))
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
new file mode 100755
index 000000000000..d8fd875ad527
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/spectrum/vxlan_flooding_ipv6.sh
@@ -0,0 +1,334 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test VxLAN flooding. The device stores flood records in a singly linked list
+# where each record stores up to five IPv6 addresses of remote VTEPs. The test
+# verifies that packets are correctly flooded in various cases such as deletion
+# of a record in the middle of the list.
+#
+# +-----------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 2001:db8:1::1/64 |
+# +----|------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | +--|--------------------------------------------------------------------+ |
+# | | + $swp1 BR0 (802.1d) | |
+# | | | |
+# | | + vxlan0 (vxlan) | |
+# | | local 2001:db8:2::1 | |
+# | | remote 2001:db8:2::{2..21} | |
+# | | id 10 dstport 4789 | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:2::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | R2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | |
+# +-------------------------------------------------------------+
+
+lib_dir=$(dirname $0)/../../../../net/forwarding
+
+ALL_TESTS="flooding_test"
+NUM_NETIFS=4
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Make sure the bridge uses the MAC address of the local port and
+ # not that of the VxLAN's device
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 address $(mac_get $swp1)
+
+ ip link add name vxlan0 type vxlan id 10 nolearning \
+ udp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local 2001:db8:2::1 dstport 4789
+
+ ip address add 2001:db8:2::1/128 dev lo
+
+ ip link set dev $swp1 master br0
+ ip link set dev vxlan0 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev vxlan0 up
+}
+
+switch_destroy()
+{
+ ip link set dev vxlan0 down
+ ip link set dev $swp1 down
+ ip link set dev br0 down
+
+ ip link set dev vxlan0 nomaster
+ ip link set dev $swp1 nomaster
+
+ ip address del 2001:db8:2::1/128 dev lo
+
+ ip link del dev vxlan0
+
+ ip link del dev br0
+}
+
+router1_create()
+{
+ # This router is in the default VRF, where the VxLAN device is
+ # performing the L3 lookup
+ ip link set dev $rp1 up
+ ip address add 2001:db8:3::1/64 dev $rp1
+ ip route add 2001:db8:2::0/64 via 2001:db8:3::2
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::0/64 via 2001:db8:3::2
+ ip address del 2001:db8:3::1/64 dev $rp1
+ ip link set dev $rp1 down
+}
+
+router2_create()
+{
+ # This router is not in the default VRF, so use simple_if_init()
+ simple_if_init $rp2 2001:db8:3::2/64
+}
+
+router2_destroy()
+{
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+
+ switch_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ switch_destroy
+
+ h1_destroy
+
+ vrf_cleanup
+}
+
+flooding_remotes_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self \
+ dst 2001:db8:2::$lsb
+ done
+}
+
+flooding_filters_add()
+{
+ local num_remotes=$1
+ local lsb
+ local i
+
+ tc qdisc add dev $rp2 clsact
+
+ for i in $(eval echo {1..$num_remotes}); do
+ lsb=$((i + 1))
+
+ tc filter add dev $rp2 ingress protocol ipv6 pref $i handle $i \
+ flower ip_proto udp dst_ip 2001:db8:2::$lsb \
+ dst_port 4789 skip_sw action drop
+ done
+}
+
+flooding_filters_del()
+{
+ local num_remotes=$1
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc filter del dev $rp2 ingress protocol ipv6 pref $i \
+ handle $i flower
+ done
+
+ tc qdisc del dev $rp2 clsact
+}
+
+flooding_check_packets()
+{
+ local packets=("$@")
+ local num_remotes=${#packets[@]}
+ local i
+
+ for i in $(eval echo {1..$num_remotes}); do
+ tc_check_packets "dev $rp2 ingress" $i ${packets[i - 1]}
+ check_err $? "remote $i - did not get expected number of packets"
+ done
+}
+
+flooding_test()
+{
+ # Use 20 remote VTEPs that will be stored in 4 records. The array
+ # 'packets' will store how many packets are expected to be received
+ # by each remote VTEP at each stage of the test
+ declare -a packets=(1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1)
+ local num_remotes=20
+
+ RET=0
+
+ # Add FDB entries for remote VTEPs and corresponding tc filters on the
+ # ingress of the nexthop router. These filters will count how many
+ # packets were flooded to each remote VTEP
+ flooding_remotes_add $num_remotes
+ flooding_filters_add $num_remotes
+
+ # Send one packet and make sure it is flooded to all the remote VTEPs
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 1 packet"
+
+ # Delete the third record which corresponds to VTEPs with LSB 12..16
+ # and check that packet is flooded correctly when we remove a record
+ # from the middle of the list
+ RET=0
+
+ packets=(2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 2 2 2 2 2)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::12
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::13
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::14
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::15
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::16
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 2 packets"
+
+ # Delete the first record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 3 3 3 3 3 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::2
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::3
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::4
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::5
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::6
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 3 packets"
+
+ # Delete the last record and make sure the packet is flooded correctly
+ RET=0
+
+ packets=(2 2 2 2 2 4 4 4 4 4 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::17
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::18
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::19
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::20
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::21
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 4 packets"
+
+ # Delete the last record, one entry at a time and make sure single
+ # entries are correctly removed
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 5 5 5 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::7
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 5 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 6 6 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::8
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 6 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 7 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::9
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 7 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::10
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 8 packets"
+
+ RET=0
+
+ packets=(2 2 2 2 2 4 5 6 7 8 1 1 1 1 1 3 3 3 3 3)
+ bridge fdb del 00:00:00:00:00:00 dev vxlan0 self dst 2001:db8:2::11
+
+ $MZ -6 $h1 -q -p 64 -b de:ad:be:ef:13:37 -t ip -c 1
+ flooding_check_packets "${packets[@]}"
+ log_test "flood after 9 packets"
+
+ flooding_filters_del $num_remotes
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
index cc0f07e72cf2..d3d9e60d6ddf 100644
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_scale.sh
@@ -77,6 +77,7 @@ tc_flower_rules_create()
filter add dev $h2 ingress \
prot ipv6 \
pref 1000 \
+ handle 42$i \
flower $tcflags dst_ip $(tc_flower_addr $i) \
action drop
EOF
@@ -98,11 +99,7 @@ __tc_flower_test()
jq -r '[ .[] | select(.kind == "flower") |
.options | .in_hw ]' | jq .[] | wc -l)
[[ $((offload_count - 1)) -eq $count ]]
- if [[ $should_fail -eq 0 ]]; then
- check_err $? "Offload mismatch"
- else
- check_err_fail $should_fail $? "Offload more than expacted"
- fi
+ check_err_fail $should_fail $? "Attempt to offload $count rules (actual result $((offload_count - 1)))"
}
tc_flower_test()
@@ -125,3 +122,19 @@ tc_flower_test()
tcflags="skip_sw"
__tc_flower_test $count $should_fail
}
+
+tc_flower_traffic_test()
+{
+ local count=$1; shift
+ local i;
+
+ for ((i = count - 1; i > 0; i /= 2)); do
+ $MZ -6 $h1 -c 1 -d 20msec -p 100 -a own -b $(mac_get $h2) \
+ -A $(tc_flower_addr 0) -B $(tc_flower_addr $i) \
+ -q -t udp sp=54321,dp=12345
+ done
+ for ((i = count - 1; i > 0; i /= 2)); do
+ tc_check_packets "dev $h2 ingress" 42$i 1
+ check_err $? "Traffic not seen at rule #$i"
+ done
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
new file mode 100755
index 000000000000..448b75c1545a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_occ.sh
@@ -0,0 +1,108 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that policers shared by different tc filters are correctly reference
+# counted by observing policers' occupancy via devlink-resource.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_police_occ_test
+"
+NUM_NETIFS=2
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+tc_police_occ_get()
+{
+ devlink_resource_occ_get global_policers single_rate_policers
+}
+
+tc_police_occ_test()
+{
+ RET=0
+
+ local occ=$(tc_police_occ_get)
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ tc filter add dev $swp1 ingress pref 1 handle 101 proto ip \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok \
+ index 10
+ tc filter add dev $swp1 ingress pref 2 handle 102 proto ip \
+ flower skip_sw action police index 10
+
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 2 handle 102 flower
+ (( occ + 1 == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $((occ + 1))"
+
+ tc filter del dev $swp1 ingress pref 1 handle 101 flower
+ (( occ == $(tc_police_occ_get) ))
+ check_err $? "Got occupancy $(tc_police_occ_get), expected $occ"
+
+ log_test "tc police occupancy"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
new file mode 100644
index 000000000000..86e787895f78
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_police_scale.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TC_POLICE_NUM_NETIFS=2
+
+tc_police_h1_create()
+{
+ simple_if_init $h1
+}
+
+tc_police_h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+tc_police_switch_create()
+{
+ simple_if_init $swp1
+ tc qdisc add dev $swp1 clsact
+}
+
+tc_police_switch_destroy()
+{
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1
+}
+
+tc_police_addr()
+{
+ local num=$1; shift
+
+ printf "2001:db8:1::%x" $num
+}
+
+tc_police_rules_create()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ TC_POLICE_BATCH_FILE="$(mktemp)"
+
+ for ((i = 0; i < count; ++i)); do
+ cat >> $TC_POLICE_BATCH_FILE <<-EOF
+ filter add dev $swp1 ingress \
+ prot ipv6 \
+ pref 1000 \
+ flower skip_sw dst_ip $(tc_police_addr $i) \
+ action police rate 10mbit burst 100k \
+ conform-exceed drop/ok
+ EOF
+ done
+
+ tc -b $TC_POLICE_BATCH_FILE
+ check_err_fail $should_fail $? "Rule insertion"
+}
+
+__tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ tc_police_rules_create $count $should_fail
+
+ offload_count=$(tc -j filter show dev $swp1 ingress |
+ jq "[.[] | select(.options.in_hw == true)] | length")
+ ((offload_count == count))
+ check_err_fail $should_fail $? "tc police offload count"
+}
+
+tc_police_test()
+{
+ local count=$1; shift
+ local should_fail=$1; shift
+
+ if ! tc_offload_check $TC_POLICE_NUM_NETIFS; then
+ check_err 1 "Could not test offloaded functionality"
+ return
+ fi
+
+ __tc_police_test $count $should_fail
+}
+
+tc_police_setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ tc_police_h1_create
+ tc_police_switch_create
+}
+
+tc_police_cleanup()
+{
+ pre_cleanup
+
+ tc_police_switch_destroy
+ tc_police_h1_destroy
+
+ vrf_cleanup
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 9241250c5921..0441a18f098b 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -11,11 +11,16 @@ ALL_TESTS="
matchall_mirror_behind_flower_ingress_test
matchall_sample_behind_flower_ingress_test
matchall_mirror_behind_flower_egress_test
+ matchall_proto_match_test
+ police_limits_test
+ multi_police_test
"
NUM_NETIFS=2
source $lib_dir/tc_common.sh
source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
switch_create()
{
@@ -164,7 +169,8 @@ matchall_sample_egress_test()
RET=0
# It is forbidden in mlxsw driver to have matchall with sample action
- # bound on egress
+ # bound on egress. Spectrum-1 specific restriction
+ mlxsw_only_on_spectrum 1 || return
tc qdisc add dev $swp1 clsact
@@ -287,6 +293,96 @@ matchall_mirror_behind_flower_egress_test()
matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
}
+matchall_proto_match_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ matchall skip_sw \
+ action sample group 1 rate 100
+ check_fail $? "Incorrect success to add matchall rule with protocol match"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "matchall protocol match"
+}
+
+police_limits_test()
+{
+ RET=0
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 0.5kbit burst 1m conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 2.5tbit burst 1g conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too high rate"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 1m conform-exceed drop/ok
+ check_err $? "Failed to add police action with low rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.9tbit burst 1g conform-exceed drop/ok
+ check_err $? "Failed to add police action with high rate"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 512b conform-exceed drop/ok
+ check_fail $? "Incorrect success to add police action with too low burst size"
+
+ tc filter add dev $swp1 ingress pref 1 proto ip handle 101 \
+ flower skip_sw \
+ action police rate 1.5kbit burst 2k conform-exceed drop/ok
+ check_err $? "Failed to add police action with low burst size"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "police rate and burst limits"
+}
+
+multi_police_test()
+{
+ RET=0
+
+ # It is forbidden in mlxsw driver to have multiple police
+ # actions in a single rule.
+
+ tc qdisc add dev $swp1 clsact
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/ok
+ check_err $? "Failed to add rule with single police action"
+
+ tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 \
+ flower skip_sw \
+ action police rate 100mbit burst 100k conform-exceed drop/pipe \
+ action police rate 200mbit burst 200k conform-exceed drop/ok
+ check_fail $? "Incorrect success to add rule with two police actions"
+
+ tc qdisc del dev $swp1 clsact
+
+ log_test "multi police"
+}
+
setup_prepare()
{
swp1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
new file mode 100755
index 000000000000..83a0210e7544
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_sample.sh
@@ -0,0 +1,658 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test that packets are sampled when tc-sample is used and that reported
+# metadata is correct. Two sets of hosts (with and without LAG) are used, since
+# metadata extraction in mlxsw is a bit different when LAG is involved.
+#
+# +---------------------------------+ +---------------------------------+
+# | H1 (vrf) | | H3 (vrf) |
+# | + $h1 | | + $h3_lag |
+# | | 192.0.2.1/28 | | | 192.0.2.17/28 |
+# | | | | | |
+# | | default via 192.0.2.2 | | | default via 192.0.2.18 |
+# +----|----------------------------+ +----|----------------------------+
+# | |
+# +----|-----------------------------------------|----------------------------+
+# | | 192.0.2.2/28 | 192.0.2.18/28 |
+# | + $rp1 + $rp3_lag |
+# | |
+# | + $rp2 + $rp4_lag |
+# | | 198.51.100.2/28 | 198.51.100.18/28 |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 198.51.100.18 |
+# | | | | | |
+# | | 198.51.100.1/28 | | | 198.51.100.17/28 |
+# | + $h2 | | + $h4_lag |
+# | H2 (vrf) | | H4 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ tc_sample_rate_test
+ tc_sample_max_rate_test
+ tc_sample_conflict_test
+ tc_sample_group_conflict_test
+ tc_sample_md_iif_test
+ tc_sample_md_lag_iif_test
+ tc_sample_md_oif_test
+ tc_sample_md_lag_oif_test
+ tc_sample_md_out_tc_test
+ tc_sample_md_out_tc_occ_test
+ tc_sample_md_latency_test
+ tc_sample_acl_group_conflict_test
+ tc_sample_acl_rate_test
+ tc_sample_acl_max_rate_test
+"
+NUM_NETIFS=8
+CAPTURE_FILE=$(mktemp)
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+source mlxsw_lib.sh
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/28
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+}
+
+h2_destroy()
+{
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/28
+}
+
+h3_create()
+{
+ ip link set dev $h3 down
+ ip link add name ${h3}_bond type bond mode 802.3ad
+ ip link set dev $h3 master ${h3}_bond
+
+ simple_if_init ${h3}_bond 192.0.2.17/28
+
+ ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
+}
+
+h3_destroy()
+{
+ ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
+
+ simple_if_fini ${h3}_bond 192.0.2.17/28
+
+ ip link set dev $h3 nomaster
+ ip link del dev ${h3}_bond
+}
+
+h4_create()
+{
+ ip link set dev $h4 down
+ ip link add name ${h4}_bond type bond mode 802.3ad
+ ip link set dev $h4 master ${h4}_bond
+
+ simple_if_init ${h4}_bond 198.51.100.17/28
+
+ ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
+}
+
+h4_destroy()
+{
+ ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
+
+ simple_if_fini ${h4}_bond 198.51.100.17/28
+
+ ip link set dev $h4 nomaster
+ ip link del dev ${h4}_bond
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ __addr_add_del $rp1 add 192.0.2.2/28
+ tc qdisc add dev $rp1 clsact
+
+ ip link set dev $rp2 up
+ __addr_add_del $rp2 add 198.51.100.2/28
+ tc qdisc add dev $rp2 clsact
+
+ ip link add name ${rp3}_bond type bond mode 802.3ad
+ ip link set dev $rp3 master ${rp3}_bond
+ __addr_add_del ${rp3}_bond add 192.0.2.18/28
+ tc qdisc add dev $rp3 clsact
+ ip link set dev ${rp3}_bond up
+
+ ip link add name ${rp4}_bond type bond mode 802.3ad
+ ip link set dev $rp4 master ${rp4}_bond
+ __addr_add_del ${rp4}_bond add 198.51.100.18/28
+ tc qdisc add dev $rp4 clsact
+ ip link set dev ${rp4}_bond up
+}
+
+router_destroy()
+{
+ ip link set dev ${rp4}_bond down
+ tc qdisc del dev $rp4 clsact
+ __addr_add_del ${rp4}_bond del 198.51.100.18/28
+ ip link set dev $rp4 nomaster
+ ip link del dev ${rp4}_bond
+
+ ip link set dev ${rp3}_bond down
+ tc qdisc del dev $rp3 clsact
+ __addr_add_del ${rp3}_bond del 192.0.2.18/28
+ ip link set dev $rp3 nomaster
+ ip link del dev ${rp3}_bond
+
+ tc qdisc del dev $rp2 clsact
+ __addr_add_del $rp2 del 198.51.100.2/28
+ ip link set dev $rp2 down
+
+ tc qdisc del dev $rp1 clsact
+ __addr_add_del $rp1 del 192.0.2.2/28
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+ h3=${NETIFS[p5]}
+ rp3=${NETIFS[p6]}
+ h4=${NETIFS[p7]}
+ rp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ h4_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ rm -f $CAPTURE_FILE
+
+ router_destroy
+ h4_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+psample_capture_start()
+{
+ rm -f $CAPTURE_FILE
+
+ psample &> $CAPTURE_FILE &
+
+ sleep 1
+}
+
+psample_capture_stop()
+{
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+__tc_sample_rate_test()
+{
+ local desc=$1; shift
+ local dip=$1; shift
+ local pkts pct
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+ -B $dip -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 10000) / 10000))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ log_test "tc sample rate ($desc)"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_rate_test()
+{
+ __tc_sample_rate_test "forward" 198.51.100.1
+ __tc_sample_rate_test "local receive" 192.0.2.2
+}
+
+tc_sample_max_rate_test()
+{
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample maximum rate"
+}
+
+tc_sample_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port,
+ # even when they share the same parameters.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1 &> /dev/null
+ check_fail $? "Managed to configure second sampling rule"
+
+ # Delete the first rule and make sure the second rule can now be
+ # configured.
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule after deletion"
+
+ log_test "tc sample conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
+}
+
+tc_sample_group_conflict_test()
+{
+ RET=0
+
+ # Test that two sampling rules cannot be configured on the same port
+ # with different groups.
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample group conflict test"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_iif_test()
+{
+ local rp1_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample iif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_iif_test()
+{
+ local rp3_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
+ grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected in-ifindex"
+
+ log_test "tc sample lag iif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_oif_test()
+{
+ local rp2_ifindex
+
+ RET=0
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample oif"
+
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_lag_oif_test()
+{
+ local rp4_ifindex
+
+ RET=0
+
+ tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
+ -A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
+ grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-ifindex"
+
+ log_test "tc sample lag oif"
+
+ tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_test()
+{
+ RET=0
+
+ # Output traffic class is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # By default, all the packets should go to the same traffic class (0).
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 0 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (0)"
+
+ # Map all priorities to highest traffic class (7) and check reported
+ # out-tc.
+ tc qdisc replace dev $rp2 root handle 1: \
+ prio bands 3 priomap 0 0 0 0 0 0 0 0
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "out-tc 7 " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have expected out-tc (7)"
+
+ log_test "tc sample out-tc"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_out_tc_occ_test()
+{
+ local backlog pct occ
+
+ RET=0
+
+ # Output traffic class occupancy is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ # Configure a shaper on egress to create congestion.
+ tc qdisc replace dev $rp2 root handle 1: \
+ tbf rate 1Mbit burst 256k limit 1M
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
+
+ # Allow congestion to reach steady state.
+ sleep 10
+
+ backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
+
+ # Kill mausezahn.
+ { kill %% && wait %%; } 2>/dev/null
+
+ psample_capture_stop
+
+ # Record last congestion sample.
+ occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
+ cut -d ' ' -f 16)
+
+ pct=$((100 * (occ - backlog) / backlog))
+ (( -1 <= pct && pct <= 1))
+ check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
+
+ log_test "tc sample out-tc-occ"
+
+ tc qdisc del dev $rp2 root handle 1:
+ tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_md_latency_test()
+{
+ RET=0
+
+ # Egress sampling not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
+ skip_sw action sample rate 5 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets do not have latency attribute"
+
+ log_test "tc sample latency"
+
+ tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
+}
+
+tc_sample_acl_group_conflict_test()
+{
+ RET=0
+
+ # Test that two flower sampling rules cannot be configured on the same
+ # port with different groups.
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
+ skip_sw action sample rate 1024 group 1
+ check_err $? "Failed to configure sampling rule with same group"
+
+ tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
+ skip_sw action sample rate 1024 group 2 &> /dev/null
+ check_fail $? "Managed to configure sampling rule with conflicting group"
+
+ log_test "tc sample (w/ flower) group conflict test"
+
+ tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+__tc_sample_acl_rate_test()
+{
+ local bind=$1; shift
+ local port=$1; shift
+ local pkts pct
+
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
+ pct=$((100 * (pkts - 10000) / 10000))
+ (( -25 <= pct && pct <= 25))
+ check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
+
+ # Setup a filter that should not match any packet and make sure packets
+ # are not sampled.
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
+ skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
+ check_err $? "Failed to configure sampling rule"
+
+ psample_capture_start
+
+ ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
+ -B 198.51.100.1 -t udp dp=52768,sp=42768 -q
+
+ psample_capture_stop
+
+ grep -q -e "group 1 " $CAPTURE_FILE
+ check_fail $? "Sampled packets when should not"
+
+ log_test "tc sample (w/ flower) rate ($bind)"
+
+ tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
+}
+
+tc_sample_acl_rate_test()
+{
+ __tc_sample_acl_rate_test ingress $rp1
+ __tc_sample_acl_rate_test egress $rp2
+}
+
+tc_sample_acl_max_rate_test()
+{
+ RET=0
+
+ # Policy-based sampling is not supported on Spectrum-1.
+ mlxsw_only_on_spectrum 2+ || return
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24 - 1)) group 1
+ check_err $? "Failed to configure sampling rule with max rate"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ skip_sw action sample rate $((2 ** 24)) \
+ group 1 &> /dev/null
+ check_fail $? "Managed to configure sampling rate above maximum"
+
+ log_test "tc sample (w/ flower) maximum rate"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
index 729a86cc4ede..4687b0a7dffb 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan.sh
@@ -4,10 +4,35 @@
# Test various aspects of VxLAN offloading which are specific to mlxsw, such
# as sanitization of invalid configurations and offload indication.
-lib_dir=$(dirname $0)/../../../net/forwarding
+: ${ADDR_FAMILY:=ipv4}
+export ADDR_FAMILY
+
+: ${LOCAL_IP_1:=198.51.100.1}
+export LOCAL_IP_1
+
+: ${LOCAL_IP_2:=198.51.100.2}
+export LOCAL_IP_2
+
+: ${PREFIX_LEN:=32}
+export PREFIX_LEN
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=239.0.0.1}
+export MC_IP
-ALL_TESTS="sanitization_test offload_indication_test \
- sanitization_vlan_aware_test offload_indication_vlan_aware_test"
+: ${IP_FLAG:=""}
+export IP_FLAG
+
+: ${ALL_TESTS:="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"}
+
+lib_dir=$(dirname $0)/../../../net/forwarding
NUM_NETIFS=2
: ${TIMEOUT:=20000} # ms
source $lib_dir/lib.sh
@@ -63,8 +88,8 @@ sanitization_single_dev_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -80,8 +105,8 @@ sanitization_single_dev_vlan_aware_test()
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -97,8 +122,8 @@ sanitization_single_dev_mcast_enabled_test()
ip link add dev br0 type bridge
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -115,9 +140,9 @@ sanitization_single_dev_mcast_group_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
- dev dummy1 group 239.0.0.1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
+ dev dummy1 group $MC_IP
sanitization_single_dev_test_fail
@@ -134,7 +159,7 @@ sanitization_single_dev_no_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
ttl 20 tos inherit dstport 4789
sanitization_single_dev_test_fail
@@ -145,31 +170,14 @@ sanitization_single_dev_no_local_ip_test()
log_test "vxlan device with no local ip"
}
-sanitization_single_dev_local_ipv6_test()
-{
- RET=0
-
- ip link add dev br0 type bridge mcast_snooping 0
-
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 2001:db8::1 dstport 4789
-
- sanitization_single_dev_test_fail
-
- ip link del dev vxlan0
- ip link del dev br0
-
- log_test "vxlan device with local ipv6 address"
-}
-
-sanitization_single_dev_learning_enabled_test()
+sanitization_single_dev_learning_enabled_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 learning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_pass
@@ -186,8 +194,8 @@ sanitization_single_dev_local_interface_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add name dummy1 up type dummy
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 dev dummy1
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 dev dummy1
sanitization_single_dev_test_fail
@@ -204,8 +212,8 @@ sanitization_single_dev_port_range_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789 \
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789 \
srcport 4000 5000
sanitization_single_dev_test_fail
@@ -222,8 +230,8 @@ sanitization_single_dev_tos_static_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos 20 local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos 20 local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -239,8 +247,8 @@ sanitization_single_dev_ttl_inherit_test()
ip link add dev br0 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl inherit tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl inherit tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -250,14 +258,14 @@ sanitization_single_dev_ttl_inherit_test()
log_test "vxlan device with inherit ttl"
}
-sanitization_single_dev_udp_checksum_test()
+sanitization_single_dev_udp_checksum_ipv4_test()
{
RET=0
ip link add dev br0 type bridge mcast_snooping 0
ip link add name vxlan0 up type vxlan id 10 nolearning udpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_single_dev_test_fail
@@ -276,13 +284,12 @@ sanitization_single_dev_test()
sanitization_single_dev_mcast_enabled_test
sanitization_single_dev_mcast_group_test
sanitization_single_dev_no_local_ip_test
- sanitization_single_dev_local_ipv6_test
- sanitization_single_dev_learning_enabled_test
+ sanitization_single_dev_learning_enabled_"$ADDR_FAMILY"_test
sanitization_single_dev_local_interface_test
sanitization_single_dev_port_range_test
sanitization_single_dev_tos_static_test
sanitization_single_dev_ttl_inherit_test
- sanitization_single_dev_udp_checksum_test
+ sanitization_single_dev_udp_checksum_"$ADDR_FAMILY"_test
}
sanitization_multi_devs_test_pass()
@@ -334,10 +341,10 @@ sanitization_multi_devs_valid_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_pass
@@ -356,10 +363,10 @@ sanitization_multi_devs_ttl_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 40 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 40 tos inherit local $LOCAL_IP_1 dstport 4789
sanitization_multi_devs_test_fail
@@ -378,10 +385,10 @@ sanitization_multi_devs_udp_dstport_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 5789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 5789
sanitization_multi_devs_test_fail
@@ -400,10 +407,10 @@ sanitization_multi_devs_local_ip_test()
ip link add dev br0 type bridge mcast_snooping 0
ip link add dev br1 type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
- ip link add name vxlan1 up type vxlan id 20 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.2 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+ ip link add name vxlan1 up type vxlan id 20 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_2 dstport 4789
sanitization_multi_devs_test_fail
@@ -437,18 +444,22 @@ offload_indication_setup_create()
{
# Create a simple setup with two bridges, each with a VxLAN device
# and one local port
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
offload_indication_setup_destroy()
@@ -456,7 +467,7 @@ offload_indication_setup_destroy()
ip link del dev vxlan1
ip link del dev vxlan0
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev $swp2 nomaster
ip link set dev $swp1 nomaster
@@ -469,7 +480,7 @@ offload_indication_fdb_flood_test()
{
RET=0
- bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst 198.51.100.2
+ bridge fdb append 00:00:00:00:00:00 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb 00:00:00:00:00:00 \
bridge fdb show brport vxlan0
@@ -485,7 +496,7 @@ offload_indication_fdb_bridge_test()
RET=0
bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self master static \
- dst 198.51.100.2
+ dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
@@ -536,7 +547,7 @@ offload_indication_fdb_bridge_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan0 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan0
check_err $?
@@ -560,17 +571,17 @@ offload_indication_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 down
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan1 down
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device down"
@@ -579,26 +590,26 @@ offload_indication_decap_route_test()
ip link set dev vxlan1 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev vxlan0 up
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device up"
RET=0
- ip address delete 198.51.100.1/32 dev lo
+ ip address delete $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - add local route"
@@ -607,18 +618,18 @@ offload_indication_decap_route_test()
ip link set dev $swp1 nomaster
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp2 nomaster
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - local ports enslavement"
@@ -627,44 +638,48 @@ offload_indication_decap_route_test()
ip link del dev br0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev br1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - bridge device deletion"
RET=0
- ip link add name br0 up type bridge mcast_snooping 0
- ip link add name br1 up type bridge mcast_snooping 0
+ ip link add name br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name br1 type bridge mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br0
ip link set dev $swp2 master br1
ip link set dev vxlan0 master br0
ip link set dev vxlan1 master br1
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
ip link del dev vxlan1
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vxlan device deletion"
ip link add name vxlan0 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan1 up master br1 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
}
check_fdb_offloaded()
@@ -721,10 +736,10 @@ __offload_indication_join_vxlan_first()
local mac=00:11:22:33:44:55
local zmac=00:00:00:00:00:00
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
- bridge fdb add dev vxlan0 $mac self master static dst 198.51.100.2
+ bridge fdb add dev vxlan0 $mac self master static dst $LOCAL_IP_2
RET=0
check_vxlan_fdb_not_offloaded
@@ -773,9 +788,11 @@ __offload_indication_join_vxlan_first()
offload_indication_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first
@@ -789,7 +806,7 @@ __offload_indication_join_vxlan_last()
RET=0
- bridge fdb append $zmac dev vxlan0 self dst 198.51.100.2
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev $swp1 master br0
@@ -808,9 +825,11 @@ __offload_indication_join_vxlan_last()
offload_indication_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add dev br0 type bridge mcast_snooping 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -835,12 +854,13 @@ sanitization_vlan_aware_test()
RET=0
ip link add dev br0 type bridge mcast_snooping 0 vlan_filtering 1
+ ip link set dev br0 addrgenmode none
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
# Test that when each VNI is mapped to a different VLAN we can enslave
# a port to the bridge
@@ -884,20 +904,20 @@ sanitization_vlan_aware_test()
# Use the offload indication of the local route to ensure the VXLAN
# configuration was correctly rollbacked.
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link set dev vxlan10 type vxlan ttl 10
ip link set dev $swp1 master br0 &> /dev/null
check_fail $?
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
log_test "vlan-aware - failed enslavement to bridge due to conflict"
ip link set dev vxlan10 type vxlan ttl 20
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link del dev vxlan20
ip link del dev vxlan10
@@ -908,20 +928,22 @@ offload_indication_vlan_aware_setup_create()
{
# Create a simple setup with two VxLAN devices and a single VLAN-aware
# bridge
- ip link add name br0 up type bridge mcast_snooping 0 vlan_filtering 1 \
+ ip link add name br0 type bridge mcast_snooping 0 vlan_filtering 1 \
vlan_default_pvid 0
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
ip link set dev $swp1 master br0
bridge vlan add vid 10 dev $swp1
bridge vlan add vid 20 dev $swp1
- ip address add 198.51.100.1/32 dev lo
+ ip address add $LOCAL_IP_1/$PREFIX_LEN dev lo
ip link add name vxlan10 up master br0 type vxlan id 10 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link add name vxlan20 up master br0 type vxlan id 20 nolearning \
- noudpcsum ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ $UDPCSUM_FLAFS ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
@@ -935,7 +957,7 @@ offload_indication_vlan_aware_setup_destroy()
ip link del dev vxlan20
ip link del dev vxlan10
- ip address del 198.51.100.1/32 dev lo
+ ip address del $LOCAL_IP_1/$PREFIX_LEN dev lo
bridge vlan del vid 20 dev $swp1
bridge vlan del vid 10 dev $swp1
@@ -952,7 +974,7 @@ offload_indication_vlan_aware_fdb_test()
log_info "vxlan entry offload indication - vlan-aware"
bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self master static \
- dst 198.51.100.2 vlan 10
+ dst $LOCAL_IP_2 vlan 10
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
@@ -1003,7 +1025,7 @@ offload_indication_vlan_aware_fdb_test()
# marked as offloaded in both drivers
RET=0
- bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst 198.51.100.2
+ bridge fdb add de:ad:be:ef:13:37 dev vxlan10 self dst $LOCAL_IP_2
busywait "$TIMEOUT" wait_for_offload grep_bridge_fdb \
de:ad:be:ef:13:37 self bridge fdb show brport vxlan10
check_err $?
@@ -1021,7 +1043,7 @@ offload_indication_vlan_aware_decap_route_test()
RET=0
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on one VxLAN device and make sure route is still
@@ -1029,7 +1051,7 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 10 dev vxlan10 untagged
busywait "$TIMEOUT" wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag on second VxLAN device and make sure route is no
@@ -1037,14 +1059,15 @@ offload_indication_vlan_aware_decap_route_test()
bridge vlan add vid 20 dev vxlan20 untagged
busywait "$TIMEOUT" not wait_for_offload \
- ip route show table local 198.51.100.1
+ ip $IP_FLAG route show table local $LOCAL_IP_1
check_err $?
# Toggle PVID flag back and make sure route is marked as offloaded
bridge vlan add vid 10 dev vxlan10 pvid untagged
bridge vlan add vid 20 dev vxlan20 pvid untagged
- busywait "$TIMEOUT" wait_for_offload ip route show table local 198.51.100.1
+ busywait "$TIMEOUT" wait_for_offload ip $IP_FLAG route show table local \
+ $LOCAL_IP_1
check_err $?
log_test "vxlan decap route - vni map/unmap"
@@ -1052,10 +1075,12 @@ offload_indication_vlan_aware_decap_route_test()
offload_indication_vlan_aware_join_vxlan_first()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_first 1
@@ -1065,10 +1090,12 @@ offload_indication_vlan_aware_join_vxlan_first()
offload_indication_vlan_aware_join_vxlan_last()
{
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 1
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
__offload_indication_join_vxlan_last
@@ -1083,16 +1110,18 @@ offload_indication_vlan_aware_l3vni_test()
RET=0
sysctl_set net.ipv6.conf.default.disable_ipv6 1
- ip link add dev br0 up type bridge mcast_snooping 0 \
+ ip link add dev br0 type bridge mcast_snooping 0 \
vlan_filtering 1 vlan_default_pvid 0
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link set dev br0 addrgenmode none
+ ip link set dev br0 up
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
ip link set dev $swp1 master br0
# The test will use the offload indication on the FDB entry to
# understand if the tunnel is offloaded or not
- bridge fdb append $zmac dev vxlan0 self dst 192.0.2.1
+ bridge fdb append $zmac dev vxlan0 self dst $LOCAL_IP_2
ip link set dev vxlan0 master br0
bridge vlan add dev vxlan0 vid 10 pvid untagged
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
index 749ba3cfda1d..38148f51877a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto.sh
@@ -4,6 +4,21 @@
# Test vetoing of FDB entries that mlxsw can not offload. This exercises several
# different veto vectors to test various rollback scenarios in the vxlan driver.
+: ${LOCAL_IP:=198.51.100.1}
+export LOCAL_IP
+
+: ${REMOTE_IP_1:=198.51.100.2}
+export REMOTE_IP_1
+
+: ${REMOTE_IP_2:=198.51.100.3}
+export REMOTE_IP_2
+
+: ${UDPCSUM_FLAFS:=noudpcsum}
+export UDPCSUM_FLAFS
+
+: ${MC_IP:=224.0.0.1}
+export MC_IP
+
lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="
@@ -26,8 +41,8 @@ setup_prepare()
ip link set dev $swp1 master br0
ip link set dev $swp2 up
- ip link add name vxlan0 up type vxlan id 10 nolearning noudpcsum \
- ttl 20 tos inherit local 198.51.100.1 dstport 4789
+ ip link add name vxlan0 up type vxlan id 10 nolearning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP dstport 4789
ip link set dev vxlan0 master br0
}
@@ -50,11 +65,11 @@ fdb_create_veto_test()
RET=0
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>/dev/null
+ dst $REMOTE_IP_1 2>/dev/null
check_fail $? "multicast MAC not rejected"
bridge fdb add 01:02:03:04:05:06 dev vxlan0 self static \
- dst 198.51.100.2 2>&1 >/dev/null | grep -q mlxsw_spectrum
+ dst $REMOTE_IP_1 2>&1 >/dev/null | grep -q mlxsw_spectrum
check_err $? "multicast MAC rejected without extack"
log_test "vxlan FDB veto - create"
@@ -65,15 +80,15 @@ fdb_replace_veto_test()
RET=0
bridge fdb add 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>/dev/null
+ dst $REMOTE_IP_1 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb replace 00:01:02:03:04:05 dev vxlan0 self static \
- dst 198.51.100.2 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_1 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -85,15 +100,15 @@ fdb_append_veto_test()
RET=0
bridge fdb add 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.2
+ dst $REMOTE_IP_1
check_err $? "valid FDB rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>/dev/null
+ dst $REMOTE_IP_2 port 1234 2>/dev/null
check_fail $? "FDB with an explicit port not rejected"
bridge fdb append 00:00:00:00:00:00 dev vxlan0 self static \
- dst 198.51.100.3 port 1234 2>&1 >/dev/null \
+ dst $REMOTE_IP_2 port 1234 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with an explicit port rejected without extack"
@@ -105,11 +120,11 @@ fdb_changelink_veto_test()
RET=0
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>/dev/null
+ group $MC_IP dev lo 2>/dev/null
check_fail $? "FDB with a multicast IP not rejected"
ip link set dev vxlan0 type vxlan \
- group 224.0.0.1 dev lo 2>&1 >/dev/null \
+ group $MC_IP dev lo 2>&1 >/dev/null \
| grep -q mlxsw_spectrum
check_err $? "FDB with a multicast IP rejected without extack"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
new file mode 100755
index 000000000000..66c87aab86f6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_fdb_veto_ipv6.sh
@@ -0,0 +1,12 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+LOCAL_IP=2001:db8:1::1
+REMOTE_IP_1=2001:db8:2::1
+REMOTE_IP_2=2001:db8:3::1
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+
+source vxlan_fdb_veto.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
index fedcb7b35af9..af5ea50ed5c0 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_flooding.sh
@@ -172,6 +172,17 @@ flooding_filters_add()
local lsb
local i
+ # Prevent unwanted packets from entering the bridge and interfering
+ # with the test.
+ tc qdisc add dev br0 clsact
+ tc filter add dev br0 egress protocol all pref 1 handle 1 \
+ matchall skip_hw action drop
+ tc qdisc add dev $h1 clsact
+ tc filter add dev $h1 egress protocol all pref 1 handle 1 \
+ flower skip_hw dst_mac de:ad:be:ef:13:37 action pass
+ tc filter add dev $h1 egress protocol all pref 2 handle 2 \
+ matchall skip_hw action drop
+
tc qdisc add dev $rp2 clsact
for i in $(eval echo {1..$num_remotes}); do
@@ -194,6 +205,12 @@ flooding_filters_del()
done
tc qdisc del dev $rp2 clsact
+
+ tc filter del dev $h1 egress protocol all pref 2 handle 2 matchall
+ tc filter del dev $h1 egress protocol all pref 1 handle 1 flower
+ tc qdisc del dev $h1 clsact
+ tc filter del dev br0 egress protocol all pref 1 handle 1 matchall
+ tc qdisc del dev br0 clsact
}
flooding_check_packets()
diff --git a/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
new file mode 100755
index 000000000000..f2ea0163ddea
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/vxlan_ipv6.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN test for IPv6.
+
+ADDR_FAMILY=ipv6
+LOCAL_IP_1=2001:db8:1::1
+LOCAL_IP_2=2001:db8:1::2
+PREFIX_LEN=128
+UDPCSUM_FLAFS="udp6zerocsumrx udp6zerocsumtx"
+MC_IP=FF02::2
+IP_FLAG="-6"
+
+ALL_TESTS="
+ sanitization_test
+ offload_indication_test
+ sanitization_vlan_aware_test
+ offload_indication_vlan_aware_test
+"
+
+sanitization_single_dev_learning_enabled_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 learning $UDPCSUM_FLAFS \
+ ttl 20 tos inherit local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+ log_test "vxlan device with learning enabled"
+}
+
+sanitization_single_dev_udp_checksum_ipv6_test()
+{
+ RET=0
+
+ ip link add dev br0 type bridge mcast_snooping 0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ noudp6zerocsumrx udp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at RX"
+
+ ip link del dev vxlan0
+
+ ip link add name vxlan0 up type vxlan id 10 nolearning \
+ udp6zerocsumrx noudp6zerocsumtx ttl 20 tos inherit \
+ local $LOCAL_IP_1 dstport 4789
+
+ sanitization_single_dev_test_fail
+ log_test "vxlan device without zero udp checksum at TX"
+
+ ip link del dev vxlan0
+ ip link del dev br0
+
+}
+
+source vxlan.sh
diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile
new file mode 100644
index 000000000000..5bace0b7fb57
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0+ OR MIT
+
+TEST_PROGS = devlink.sh \
+ devlink_in_netns.sh \
+ devlink_trap.sh \
+ ethtool-coalesce.sh \
+ ethtool-fec.sh \
+ ethtool-pause.sh \
+ ethtool-ring.sh \
+ fib.sh \
+ hw_stats_l3.sh \
+ nexthop.sh \
+ peer.sh \
+ psample.sh \
+ tc-mq-visibility.sh \
+ udp_tunnel_nic.sh \
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/netdevsim/config b/tools/testing/selftests/drivers/net/netdevsim/config
new file mode 100644
index 000000000000..adf45a3a78b4
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/config
@@ -0,0 +1,10 @@
+CONFIG_DUMMY=y
+CONFIG_GENEVE=m
+CONFIG_IPV6=y
+CONFIG_NETDEVSIM=m
+CONFIG_NET_SCH_MQPRIO=y
+CONFIG_NET_SCH_MULTIQ=y
+CONFIG_NET_SCH_PRIO=y
+CONFIG_PSAMPLE=y
+CONFIG_PTP_1588_CLOCK_MOCK=y
+CONFIG_VXLAN=m
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index de4b32fc4223..b5ea2526f23c 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -5,28 +5,69 @@ lib_dir=$(dirname $0)/../../../net/forwarding
ALL_TESTS="fw_flash_test params_test regions_test reload_test \
netns_reload_test resource_test dev_info_test \
- empty_reporter_test dummy_reporter_test"
+ empty_reporter_test dummy_reporter_test rate_test"
NUM_NETIFS=0
source $lib_dir/lib.sh
BUS_ADDR=10
PORT_COUNT=4
+VF_COUNT=4
DEV_NAME=netdevsim$BUS_ADDR
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV_NAME/net/
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV_NAME/
DL_HANDLE=netdevsim/$DEV_NAME
+wait_for_devlink()
+{
+ "$@" | grep -q $DL_HANDLE
+}
+
+devlink_wait()
+{
+ local timeout=$1
+
+ busywait "$timeout" wait_for_devlink devlink dev
+}
+
fw_flash_test()
{
+ DUMMYFILE=$(find /lib/firmware -type f -printf '%P\n' | head -1)
RET=0
- devlink dev flash $DL_HANDLE file dummy
+ if [ -z "$DUMMYFILE" ]
+ then
+ echo "SKIP: unable to find suitable dummy firmware file"
+ return
+ fi
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates on"
+ devlink dev flash $DL_HANDLE file $DUMMYFILE component fw.mgmt
+ check_err $? "Failed to flash with component attribute"
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
+ check_fail $? "Flash with overwrite settings should be rejected"
+
+ echo "1"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite settings
+ check_err $? "Failed to flash with settings overwrite enabled"
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers
+ check_fail $? "Flash with overwrite settings should be identifiers"
+
+ echo "3"> $DEBUGFS_DIR/fw_update_overwrite_mask
+ check_err $? "Failed to change allowed overwrite mask"
+
+ devlink dev flash $DL_HANDLE file $DUMMYFILE overwrite identifiers overwrite settings
+ check_err $? "Failed to flash with settings and identifiers overwrite enabled"
+
echo "n"> $DEBUGFS_DIR/fw_update_status
check_err $? "Failed to disable status updates"
- devlink dev flash $DL_HANDLE file dummy
+ devlink dev flash $DL_HANDLE file $DUMMYFILE
check_err $? "Failed to flash with status updates off"
log_test "fw flash test"
@@ -234,6 +275,9 @@ netns_reload_test()
ip netns del testns2
ip netns del testns1
+ # Wait until netns async cleanup is done.
+ devlink_wait 2000
+
log_test "netns reload test"
}
@@ -326,6 +370,9 @@ resource_test()
ip netns del testns2
ip netns del testns1
+ # Wait until netns async cleanup is done.
+ devlink_wait 2000
+
log_test "resource test"
}
@@ -474,8 +521,8 @@ dummy_reporter_test()
check_reporter_info dummy healthy 3 3 10 true
- echo 8192> $DEBUGFS_DIR/health/binary_len
- check_fail $? "Failed set dummy reporter binary len to 8192"
+ echo 8192 > $DEBUGFS_DIR/health/binary_len
+ check_err $? "Failed set dummy reporter binary len to 8192"
local dump=$(devlink health dump show $DL_HANDLE reporter dummy -j)
check_err $? "Failed show dump of dummy reporter"
@@ -486,6 +533,170 @@ dummy_reporter_test()
log_test "dummy reporter test"
}
+rate_leafs_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "leaf") | .key | select(contains("'$handle'"))'
+}
+
+rate_nodes_get()
+{
+ local handle=$1
+
+ cmd_jq "devlink port function rate show -j" \
+ '.[] | to_entries | .[] | select(.value.type == "node") | .key | select(contains("'$handle'"))'
+}
+
+rate_attr_set()
+{
+ local handle=$1
+ local name=$2
+ local value=$3
+ local units=$4
+
+ devlink port function rate set $handle $name $value$units
+}
+
+rate_attr_get()
+{
+ local handle=$1
+ local name=$2
+
+ cmd_jq "devlink port function rate show $handle -j" '.[][].'$name
+}
+
+rate_attr_tx_rate_check()
+{
+ local handle=$1
+ local name=$2
+ local rate=$3
+ local debug_file=$4
+
+ rate_attr_set $handle $name $rate mbit
+ check_err $? "Failed to set $name value"
+
+ local debug_value=$(cat $debug_file)
+ check_err $? "Failed to read $name value from debugfs"
+ [ "$debug_value" == "$rate" ]
+ check_err $? "Unexpected $name debug value $debug_value != $rate"
+
+ local api_value=$(( $(rate_attr_get $handle $name) * 8 / 1000000 ))
+ check_err $? "Failed to get $name attr value"
+ [ "$api_value" == "$rate" ]
+ check_err $? "Unexpected $name attr value $api_value != $rate"
+}
+
+rate_attr_parent_check()
+{
+ local handle=$1
+ local parent=$2
+ local debug_file=$3
+
+ rate_attr_set $handle parent $parent
+ check_err $? "Failed to set parent"
+
+ debug_value=$(cat $debug_file)
+ check_err $? "Failed to get parent debugfs value"
+ [ "$debug_value" == "$parent" ]
+ check_err $? "Unexpected parent debug value $debug_value != $parent"
+
+ api_value=$(rate_attr_get $r_obj parent)
+ check_err $? "Failed to get parent attr value"
+ [ "$api_value" == "$parent" ]
+ check_err $? "Unexpected parent attr value $api_value != $parent"
+}
+
+rate_node_add()
+{
+ local handle=$1
+
+ devlink port function rate add $handle
+}
+
+rate_node_del()
+{
+ local handle=$1
+
+ devlink port function rate del $handle
+}
+
+rate_test()
+{
+ RET=0
+
+ echo $VF_COUNT > /sys/bus/netdevsim/devices/$DEV_NAME/sriov_numvfs
+ devlink dev eswitch set $DL_HANDLE mode switchdev
+ local leafs=`rate_leafs_get $DL_HANDLE`
+ local num_leafs=`echo $leafs | wc -w`
+ [ "$num_leafs" == "$VF_COUNT" ]
+ check_err $? "Expected $VF_COUNT rate leafs but got $num_leafs"
+
+ rate=10
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_share $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_share
+ rate=$(($rate+10))
+ done
+
+ rate=100
+ for r_obj in $leafs
+ do
+ rate_attr_tx_rate_check $r_obj tx_max $rate \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/tx_max
+ rate=$(($rate+100))
+ done
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 1 ]
+ check_err $? "Expected 1 rate node in output but got $num_nodes"
+
+ local node_tx_share=10
+ rate_attr_tx_rate_check $node1 tx_share $node_tx_share \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_share
+
+ local node_tx_max=100
+ rate_attr_tx_rate_check $node1 tx_max $node_tx_max \
+ $DEBUGFS_DIR/rate_nodes/${node1##*/}/tx_max
+
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+ local num_nodes=`rate_nodes_get $DL_HANDLE | wc -w`
+ [ $num_nodes == 0 ]
+ check_err $? "Expected 0 rate node but got $num_nodes"
+
+ local node1_name='group1'
+ local node1="$DL_HANDLE/$node1_name"
+ rate_node_add "$node1"
+ check_err $? "Failed to add node $node1"
+
+ rate_attr_parent_check $r_obj $node1_name \
+ $DEBUGFS_DIR/ports/${r_obj##*/}/rate_parent
+
+ local node2_name='group2'
+ local node2="$DL_HANDLE/$node2_name"
+ rate_node_add "$node2"
+ check_err $? "Failed to add node $node2"
+
+ rate_attr_parent_check $node2 $node1_name \
+ $DEBUGFS_DIR/rate_nodes/$node2_name/rate_parent
+ rate_node_del "$node2"
+ check_err $? "Failed to delete node $node2"
+ rate_attr_set "$r_obj" noparent
+ check_err $? "Failed to unset $r_obj parent node"
+ rate_node_del "$node1"
+ check_err $? "Failed to delete node $node1"
+
+ log_test "rate test"
+}
+
setup_prepare()
{
modprobe netdevsim
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
index da49ad2761b5..b64d98ca0df7 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh
@@ -24,13 +24,15 @@ ALL_TESTS="
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
SLEEP_TIME=1
NETDEV=""
NUM_NETIFS=0
source $lib_dir/lib.sh
+
+DEVLINK_DEV=
source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
require_command udevadm
@@ -45,6 +47,17 @@ if [ -d "${NETDEVSIM_PATH}/devices/netdevsim${DEV_ADDR}" ]; then
exit 1
fi
+check_netdev_down()
+{
+ state=$(cat /sys/class/net/${NETDEV}/flags)
+
+ if [ $((state & 1)) -ne 0 ]; then
+ echo "WARNING: unexpected interface UP, disable NetworkManager?"
+
+ ip link set dev $NETDEV down
+ fi
+}
+
init_test()
{
RET=0
@@ -149,6 +162,7 @@ trap_stats_test()
RET=0
+ check_netdev_down
for trap_name in $(devlink_traps_get); do
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when netdev down"
@@ -163,6 +177,16 @@ trap_stats_test()
devlink_trap_action_set $trap_name "drop"
devlink_trap_stats_idle_test $trap_name
check_err $? "Stats of trap $trap_name not idle when action is drop"
+
+ echo "y"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_fail $? "Managed to read trap (hard dropped) statistics when should not"
+ echo "n"> $DEBUGFS_DIR/fail_trap_drop_counter_get
+ devlink -s trap show $DEVLINK_DEV trap $trap_name &> /dev/null
+ check_err $? "Did not manage to read trap (hard dropped) statistics when should"
+
+ devlink_trap_drop_stats_idle_test $trap_name
+ check_fail $? "Drop stats of trap $trap_name idle when should not"
else
devlink_trap_stats_idle_test $trap_name
check_fail $? "Stats of non-drop trap $trap_name idle when should not"
@@ -242,6 +266,7 @@ trap_group_stats_test()
RET=0
+ check_netdev_down
for group_name in $(devlink_trap_groups_get); do
devlink_trap_group_stats_idle_test $group_name
check_err $? "Stats of trap group $group_name not idle when netdev down"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
new file mode 100755
index 000000000000..9adfba8f87e6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-coalesce.sh
@@ -0,0 +1,132 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+ local query="${SETTINGS_MAP[$1]}"
+
+ echo $(ethtool -c $NSIM_NETDEV | \
+ awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[ \t]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+ for key in ${!SETTINGS_MAP[@]}; do
+ CURRENT_SETTINGS[$key]=$(get_value $key)
+ done
+ echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q coalesce; then
+ echo "SKIP: No --coalesce support in ethtool"
+ exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+ ["rx-frames-low"]="rx-frame-low"
+ ["tx-frames-low"]="tx-frame-low"
+ ["rx-frames-high"]="rx-frame-high"
+ ["tx-frames-high"]="tx-frame-high"
+ ["rx-usecs"]="rx-usecs"
+ ["rx-frames"]="rx-frames"
+ ["rx-usecs-irq"]="rx-usecs-irq"
+ ["rx-frames-irq"]="rx-frames-irq"
+ ["tx-usecs"]="tx-usecs"
+ ["tx-frames"]="tx-frames"
+ ["tx-usecs-irq"]="tx-usecs-irq"
+ ["tx-frames-irq"]="tx-frames-irq"
+ ["stats-block-usecs"]="stats-block-usecs"
+ ["pkt-rate-low"]="pkt-rate-low"
+ ["rx-usecs-low"]="rx-usecs-low"
+ ["tx-usecs-low"]="tx-usecs-low"
+ ["pkt-rate-high"]="pkt-rate-high"
+ ["rx-usecs-high"]="rx-usecs-high"
+ ["tx-usecs-high"]="tx-usecs-high"
+ ["sample-interval"]="sample-interval"
+)
+
+declare -A CURRENT_SETTINGS=(
+ ["rx-frames-low"]=""
+ ["tx-frames-low"]=""
+ ["rx-frames-high"]=""
+ ["tx-frames-high"]=""
+ ["rx-usecs"]=""
+ ["rx-frames"]=""
+ ["rx-usecs-irq"]=""
+ ["rx-frames-irq"]=""
+ ["tx-usecs"]=""
+ ["tx-frames"]=""
+ ["tx-usecs-irq"]=""
+ ["tx-frames-irq"]=""
+ ["stats-block-usecs"]=""
+ ["pkt-rate-low"]=""
+ ["rx-usecs-low"]=""
+ ["tx-usecs-low"]=""
+ ["pkt-rate-high"]=""
+ ["rx-usecs-high"]=""
+ ["tx-usecs-high"]=""
+ ["sample-interval"]=""
+)
+
+declare -A EXPECTED_SETTINGS=(
+ ["rx-frames-low"]=""
+ ["tx-frames-low"]=""
+ ["rx-frames-high"]=""
+ ["tx-frames-high"]=""
+ ["rx-usecs"]=""
+ ["rx-frames"]=""
+ ["rx-usecs-irq"]=""
+ ["rx-frames-irq"]=""
+ ["tx-usecs"]=""
+ ["tx-frames"]=""
+ ["tx-usecs-irq"]=""
+ ["tx-frames-irq"]=""
+ ["stats-block-usecs"]=""
+ ["pkt-rate-low"]=""
+ ["rx-usecs-low"]=""
+ ["tx-usecs-low"]=""
+ ["pkt-rate-high"]=""
+ ["rx-usecs-high"]=""
+ ["tx-usecs-high"]=""
+ ["sample-interval"]=""
+)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+ EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+ value=$((RANDOM % $((2**32-1))))
+
+ ethtool -C $NSIM_NETDEV "$key" "$value"
+
+ EXPECTED_SETTINGS[$key]="$value"
+ expected=${EXPECTED_SETTINGS[@]}
+ current=$(update_current_settings)
+
+ check $? "$current" "$expected"
+ set +x
+done
+
+# bool settings which ethtool displays on the same line
+ethtool -C $NSIM_NETDEV adaptive-rx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: off")
+check $? "$s" ""
+
+ethtool -C $NSIM_NETDEV adaptive-tx on
+s=$(ethtool -c $NSIM_NETDEV | grep -q "Adaptive RX: on TX: on")
+check $? "$s" ""
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
new file mode 100644
index 000000000000..80160579e0cc
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-common.sh
@@ -0,0 +1,57 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID/ports/0
+NSIM_NETDEV=
+num_passes=0
+num_errors=0
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function check {
+ local code=$1
+ local str=$2
+ local exp_str=$3
+ local exp_fail=$4
+
+ [ -z "$exp_fail" ] && cop="-ne" || cop="-eq"
+
+ if [ $code $cop 0 ]; then
+ ((num_errors++))
+ return
+ fi
+
+ if [ "$str" != "$exp_str" ]; then
+ echo -e "Expected: '$exp_str', got '$str'"
+ ((num_errors++))
+ return
+ fi
+
+ ((num_passes++))
+}
+
+function make_netdev {
+ # Make a netdevsim
+ old_netdevs=$(ls /sys/class/net)
+
+ if ! $(lsmod | grep -q netdevsim); then
+ modprobe netdevsim
+ fi
+
+ echo $NSIM_ID $@ > /sys/bus/netdevsim/new_device
+ udevadm settle
+ # get new device name
+ ls /sys/bus/netdevsim/devices/netdevsim${NSIM_ID}/net/
+}
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
new file mode 100755
index 000000000000..6c52ce1b0450
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-fec.sh
@@ -0,0 +1,114 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+NSIM_NETDEV=$(make_netdev)
+[ a$ETHTOOL == a ] && ETHTOOL=ethtool
+
+set -o pipefail
+
+# Since commit 2b3ddcb35357 ("ethtool: fec: Change the prompt ...")
+# in ethtool CLI the Configured lines start with Supported/Configured.
+configured=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2 | head -1 | cut -d' ' -f1)
+
+# netdevsim starts out with None/None
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: None
+Active FEC encoding: None"
+
+# Test Auto
+$ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto
+Active FEC encoding: Off"
+
+# Test case in-sensitivity
+for o in off Off OFF; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: Off
+Active FEC encoding: Off"
+done
+
+for o in BaseR baser BAser; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: BaseR
+Active FEC encoding: BaseR"
+done
+
+for o in llrs rs; do
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding $o
+ check $?
+ s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+ check $? "$s" "$configured FEC encodings: ${o^^}
+Active FEC encoding: ${o^^}"
+done
+
+# Test multiple bits
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs llrs
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: RS LLRS
+Active FEC encoding: LLRS"
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding rs off auto
+check $?
+s=$($ETHTOOL --show-fec $NSIM_NETDEV | tail -2)
+check $? "$s" "$configured FEC encodings: Auto Off RS
+Active FEC encoding: RS"
+
+# Make sure other link modes are rejected
+$ETHTOOL --set-fec $NSIM_NETDEV encoding FIBRE 2>/dev/null
+check $? '' '' 1
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding bla-bla-bla 2>/dev/null
+check $? '' '' 1
+
+# Try JSON
+$ETHTOOL --json --show-fec $NSIM_NETDEV | jq empty >>/dev/null 2>&1
+if [ $? -eq 0 ]; then
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"Off"'
+
+ $ETHTOOL --set-fec $NSIM_NETDEV encoding auto RS
+ check $?
+
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].config[]')
+ check $? "$s" '"Auto"
+"RS"'
+ s=$($ETHTOOL --json --show-fec $NSIM_NETDEV | jq '.[].active[]')
+ check $? "$s" '"RS"'
+fi
+
+# Test error injection
+echo 11 > $NSIM_DEV_DFS/ethtool/get_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $? '' '' 1
+
+echo 0 > $NSIM_DEV_DFS/ethtool/get_err
+echo 11 > $NSIM_DEV_DFS/ethtool/set_err
+
+$ETHTOOL --show-fec $NSIM_NETDEV >>/dev/null 2>&1
+check $?
+
+$ETHTOOL --set-fec $NSIM_NETDEV encoding RS 2>/dev/null
+check $? '' '' 1
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
new file mode 100755
index 000000000000..b4a7abfe5454
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-pause.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+# Bail if ethtool is too old
+if ! ethtool -h | grep include-stat 2>&1 >/dev/null; then
+ echo "SKIP: No --include-statistics support in ethtool"
+ exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+echo n > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "null"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics')
+check $? "$s" "{}"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_tx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "1"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+echo y > $NSIM_DEV_DFS/ethtool/pause/report_stats_rx
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics | length')
+check $? "$s" "2"
+
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.rx_pause_frames')
+check $? "$s" "1"
+s=$(ethtool -I --json -a $NSIM_NETDEV | jq '.[].statistics.tx_pause_frames')
+check $? "$s" "2"
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
new file mode 100755
index 000000000000..c969559ffa7a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/ethtool-ring.sh
@@ -0,0 +1,85 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+function get_value {
+ local query="${SETTINGS_MAP[$1]}"
+
+ echo $(ethtool -g $NSIM_NETDEV | \
+ tail -n +$CURR_SETT_LINE | \
+ awk -F':' -v pattern="$query:" '$0 ~ pattern {gsub(/[\t ]/, "", $2); print $2}')
+}
+
+function update_current_settings {
+ for key in ${!SETTINGS_MAP[@]}; do
+ CURRENT_SETTINGS[$key]=$(get_value $key)
+ done
+ echo ${CURRENT_SETTINGS[@]}
+}
+
+if ! ethtool -h | grep -q set-ring >/dev/null; then
+ echo "SKIP: No --set-ring support in ethtool"
+ exit 4
+fi
+
+NSIM_NETDEV=$(make_netdev)
+
+set -o pipefail
+
+declare -A SETTINGS_MAP=(
+ ["rx"]="RX"
+ ["rx-mini"]="RX Mini"
+ ["rx-jumbo"]="RX Jumbo"
+ ["tx"]="TX"
+)
+
+declare -A EXPECTED_SETTINGS=(
+ ["rx"]=""
+ ["rx-mini"]=""
+ ["rx-jumbo"]=""
+ ["tx"]=""
+)
+
+declare -A CURRENT_SETTINGS=(
+ ["rx"]=""
+ ["rx-mini"]=""
+ ["rx-jumbo"]=""
+ ["tx"]=""
+)
+
+MAX_VALUE=$((RANDOM % $((2**32-1))))
+RING_MAX_LIST=$(ls $NSIM_DEV_DFS/ethtool/ring/)
+
+for ring_max_entry in $RING_MAX_LIST; do
+ echo $MAX_VALUE > $NSIM_DEV_DFS/ethtool/ring/$ring_max_entry
+done
+
+CURR_SETT_LINE=$(ethtool -g $NSIM_NETDEV | grep -i -m1 -n 'Current hardware settings' | cut -f1 -d:)
+
+# populate the expected settings map
+for key in ${!SETTINGS_MAP[@]}; do
+ EXPECTED_SETTINGS[$key]=$(get_value $key)
+done
+
+# test
+for key in ${!SETTINGS_MAP[@]}; do
+ value=$((RANDOM % $MAX_VALUE))
+
+ ethtool -G $NSIM_NETDEV "$key" "$value"
+
+ EXPECTED_SETTINGS[$key]="$value"
+ expected=${EXPECTED_SETTINGS[@]}
+ current=$(update_current_settings)
+
+ check $? "$current" "$expected"
+ set +x
+done
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib.sh b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
index 2f87c3be76a9..6800de816e8b 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/fib.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib.sh
@@ -16,6 +16,7 @@ ALL_TESTS="
ipv4_replay
ipv4_flush
ipv4_error_path
+ ipv4_delete_fail
ipv6_add
ipv6_metric
ipv6_append_single
@@ -29,17 +30,21 @@ ALL_TESTS="
ipv6_replay_single
ipv6_replay_multipath
ipv6_error_path
+ ipv6_delete_fail
"
NETDEVSIM_PATH=/sys/bus/netdevsim/
DEV_ADDR=1337
DEV=netdevsim${DEV_ADDR}
-DEVLINK_DEV=netdevsim/${DEV}
SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
NUM_NETIFS=0
source $lib_dir/lib.sh
-source $lib_dir/devlink_lib.sh
source $lib_dir/fib_offload_lib.sh
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
ipv4_identical_routes()
{
fib_ipv4_identical_routes_test "testns1"
@@ -155,6 +160,27 @@ ipv4_error_path()
ipv4_error_path_replay
}
+ipv4_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 192.0.2.0/24 dev dummy1
+ ip -n testns1 route del 192.0.2.0/24 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv4 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
ipv6_add()
{
fib_ipv6_add_test "testns1"
@@ -302,6 +328,37 @@ ipv6_error_path()
ipv6_error_path_replay
}
+ipv6_delete_fail()
+{
+ RET=0
+
+ echo "y" > $DEBUGFS_DIR/fib/fail_route_delete
+
+ ip -n testns1 link add name dummy1 type dummy
+ ip -n testns1 link set dev dummy1 up
+
+ ip -n testns1 route add 2001:db8:1::/64 dev dummy1
+ ip -n testns1 route del 2001:db8:1::/64 dev dummy1 &> /dev/null
+
+ # We should not be able to delete the netdev if we are leaking a
+ # reference.
+ ip -n testns1 link del dev dummy1
+
+ log_test "IPv6 route delete failure"
+
+ echo "n" > $DEBUGFS_DIR/fib/fail_route_delete
+}
+
+fib_notify_on_flag_change_set()
+{
+ local notify=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.ipv4.fib_notify_on_flag_change=$notify
+ ip netns exec testns1 sysctl -qw net.ipv6.fib_notify_on_flag_change=$notify
+
+ log_info "Set fib_notify_on_flag_change to $notify"
+}
+
setup_prepare()
{
local netdev
@@ -336,6 +393,10 @@ trap cleanup EXIT
setup_prepare
+fib_notify_on_flag_change_set 1
+tests_run
+
+fib_notify_on_flag_change_set 0
tests_run
exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
new file mode 100755
index 000000000000..8d91191a098c
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/fib_notifications.sh
@@ -0,0 +1,430 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ ipv4_route_addition_test
+ ipv4_route_deletion_test
+ ipv4_route_replacement_test
+ ipv4_route_offload_failed_test
+ ipv6_route_addition_test
+ ipv6_route_deletion_test
+ ipv6_route_replacement_test
+ ipv6_route_offload_failed_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_DIR=/sys/kernel/debug/netdevsim/$DEV/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+check_rt_offload_failed()
+{
+ local outfile=$1; shift
+ local line
+
+ # Make sure that the first notification was emitted without
+ # RTM_F_OFFLOAD_FAILED flag and the second with RTM_F_OFFLOAD_FAILED
+ # flag
+ head -n 1 $outfile | grep -q "rt_offload_failed"
+ if [[ $? -eq 0 ]]; then
+ return 1
+ fi
+
+ head -n 2 $outfile | tail -n 1 | grep -q "rt_offload_failed"
+}
+
+check_rt_trap()
+{
+ local outfile=$1; shift
+ local line
+
+ # Make sure that the first notification was emitted without RTM_F_TRAP
+ # flag and the second with RTM_F_TRAP flag
+ head -n 1 $outfile | grep -q "rt_trap"
+ if [[ $? -eq 0 ]]; then
+ return 1
+ fi
+
+ head -n 2 $outfile | tail -n 1 | grep -q "rt_trap"
+}
+
+route_notify_check()
+{
+ local outfile=$1; shift
+ local expected_num_lines=$1; shift
+ local offload_failed=${1:-0}; shift
+
+ # check the monitor results
+ lines=`wc -l $outfile | cut "-d " -f1`
+ test $lines -eq $expected_num_lines
+ check_err $? "$expected_num_lines notifications were expected but $lines were received"
+
+ if [[ $expected_num_lines -eq 1 ]]; then
+ return
+ fi
+
+ if [[ $offload_failed -eq 0 ]]; then
+ check_rt_trap $outfile
+ check_err $? "Wrong RTM_F_TRAP flags in notifications"
+ else
+ check_rt_offload_failed $outfile
+ check_err $? "Wrong RTM_F_OFFLOAD_FAILED flags in notifications"
+ fi
+}
+
+route_addition_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+ local offload_failed=${1:-0}; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route add $route dev dummy1
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications $offload_failed
+ rm -f $outfile
+
+ $IP route del $route dev dummy1
+}
+
+ipv4_route_addition_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ # route_addition_check will assign value to RET.
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the programmed route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the programmed
+ # route.
+ notify=2
+ expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv4 route addition"
+}
+
+route_deletion_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+ $IP route add $route dev dummy1
+ sleep 1
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route del $route dev dummy1
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications
+ rm -f $outfile
+}
+
+ipv4_route_deletion_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+ local expected_num_notifications=1
+
+ # Make sure a single notification will be emitted for the deleted route,
+ # regardless of fib_notify_on_flag_change value.
+ local notify=0
+ # route_deletion_check will assign value to RET.
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ notify=1
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv4 route deletion"
+}
+
+route_replacement_check()
+{
+ local ip=$1; shift
+ local notify=$1; shift
+ local route=$1; shift
+ local expected_num_notifications=$1; shift
+
+ ip netns exec testns1 sysctl -qw net.$ip.fib_notify_on_flag_change=$notify
+ $IP route add $route dev dummy1
+ sleep 1
+
+ local outfile=$(mktemp)
+
+ $IP monitor route &> $outfile &
+ sleep 1
+ $IP route replace $route dev dummy2
+ sleep 1
+ kill %% && wait %% &> /dev/null
+
+ route_notify_check $outfile $expected_num_notifications
+ rm -f $outfile
+
+ $IP route del $route dev dummy2
+}
+
+ipv4_route_replacement_test()
+{
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+
+ $IP link add name dummy2 type dummy
+ $IP link set dev dummy2 up
+
+ # Make sure a single notification will be emitted for the new route.
+ local notify=0
+ local expected_num_notifications=1
+ # route_replacement_check will assign value to RET.
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the new route.
+ notify=2
+ expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ $IP link del name dummy2
+
+ log_test "IPv4 route replacement"
+}
+
+ipv4_route_offload_failed_test()
+{
+
+ RET=0
+
+ local ip="ipv4"
+ local route=192.0.2.0/24
+ local offload_failed=1
+
+ echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload to fail"
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure two notifications will be emitted for the new route.
+ notify=2
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload not to fail"
+
+ log_test "IPv4 route offload failed"
+}
+
+ipv6_route_addition_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the programmed route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the programmed
+ # route.
+ notify=2
+ expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv6 route addition"
+}
+
+ipv6_route_deletion_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+ local expected_num_notifications=1
+
+ # Make sure a single notification will be emitted for the deleted route,
+ # regardless of fib_notify_on_flag_change value.
+ local notify=0
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ notify=1
+ route_deletion_check $ip $notify $route $expected_num_notifications
+
+ log_test "IPv6 route deletion"
+}
+
+ipv6_route_replacement_test()
+{
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+
+ $IP link add name dummy2 type dummy
+ $IP link set dev dummy2 up
+
+ # Make sure a single notification will be emitted for the new route.
+ local notify=0
+ local expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure a single notification will be emitted for the new route.
+ notify=2
+ expected_num_notifications=1
+ route_replacement_check $ip $notify $route $expected_num_notifications
+
+ $IP link del name dummy2
+
+ log_test "IPv6 route replacement"
+}
+
+ipv6_route_offload_failed_test()
+{
+
+ RET=0
+
+ local ip="ipv6"
+ local route=2001:db8:1::/64
+ local offload_failed=1
+
+ echo "y"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload to fail"
+
+ # Make sure a single notification will be emitted for the programmed
+ # route.
+ local notify=0
+ local expected_num_notifications=1
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # Make sure two notifications will be emitted for the new route.
+ notify=1
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ # notify=2 means emit notifications only for failed route installation,
+ # make sure two notifications will be emitted for the new route.
+ notify=2
+ expected_num_notifications=2
+ route_addition_check $ip $notify $route $expected_num_notifications \
+ $offload_failed
+
+ echo "n"> $DEBUGFS_DIR/fib/fail_route_offload
+ check_err $? "Failed to setup route offload not to fail"
+
+ log_test "IPv6 route offload failed"
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ ip netns add testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload into netns \"testns1\""
+ exit 1
+ fi
+
+ IP="ip -n testns1"
+
+ $IP link add name dummy1 type dummy
+ $IP link set dev dummy1 up
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ $IP link del name dummy1
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
new file mode 100755
index 000000000000..cba5ac08426b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/hw_stats_l3.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ l3_reporting_test
+ l3_fail_next_test
+ l3_counter_test
+ l3_rollback_test
+ l3_monitor_test
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR_1=1337
+DEV_ADDR_2=1057
+DEV_ADDR_3=5417
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DUMMY_IFINDEX=
+
+DEV_ADDR()
+{
+ local n=$1; shift
+ local var=DEV_ADDR_$n
+
+ echo ${!var}
+}
+
+DEV()
+{
+ echo netdevsim$(DEV_ADDR $1)
+}
+
+DEVLINK_DEV()
+{
+ echo netdevsim/$(DEV $1)
+}
+
+SYSFS_NET_DIR()
+{
+ echo /sys/bus/netdevsim/devices/$(DEV $1)/net/
+}
+
+DEBUGFS_DIR()
+{
+ echo /sys/kernel/debug/netdevsim/$(DEV $1)/
+}
+
+nsim_add()
+{
+ local n=$1; shift
+
+ echo "$(DEV_ADDR $n) 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $(SYSFS_NET_DIR $n) ] ; do :; done
+}
+
+nsim_reload()
+{
+ local n=$1; shift
+ local ns=$1; shift
+
+ devlink dev reload $(DEVLINK_DEV $n) netns $ns
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to reload $(DEV $n) into netns \"testns1\""
+ exit 1
+ fi
+
+}
+
+nsim_del()
+{
+ local n=$1; shift
+
+ echo "$(DEV_ADDR $n)" > ${NETDEVSIM_PATH}/del_device
+}
+
+nsim_hwstats_toggle()
+{
+ local action=$1; shift
+ local instance=$1; shift
+ local netdev=$1; shift
+ local type=$1; shift
+
+ local ifindex=$($IP -j link show dev $netdev | jq '.[].ifindex')
+
+ echo $ifindex > $(DEBUGFS_DIR $instance)/hwstats/$type/$action
+}
+
+nsim_hwstats_enable()
+{
+ nsim_hwstats_toggle enable_ifindex "$@"
+}
+
+nsim_hwstats_disable()
+{
+ nsim_hwstats_toggle disable_ifindex "$@"
+}
+
+nsim_hwstats_fail_next_enable()
+{
+ nsim_hwstats_toggle fail_next_enable "$@"
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+ nsim_add 1
+ nsim_add 2
+ nsim_add 3
+
+ ip netns add testns1
+
+ if [ $? -ne 0 ]; then
+ echo "Failed to add netns \"testns1\""
+ exit 1
+ fi
+
+ nsim_reload 1 testns1
+ nsim_reload 2 testns1
+ nsim_reload 3 testns1
+
+ IP="ip -n testns1"
+
+ $IP link add name dummy1 type dummy
+ $IP link set dev dummy1 up
+ DUMMY_IFINDEX=$($IP -j link show dev dummy1 | jq '.[].ifindex')
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ $IP link del name dummy1
+ ip netns del testns1
+ nsim_del 3
+ nsim_del 2
+ nsim_del 1
+ modprobe -r netdevsim &> /dev/null
+}
+
+netdev_hwstats_used()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used'
+}
+
+netdev_check_used()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_used $netdev $type) == "true" ]]
+}
+
+netdev_check_unused()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_used $netdev $type) == "false" ]]
+}
+
+netdev_hwstats_request()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ $IP -j stats show dev "$netdev" group offload subgroup hw_stats_info |
+ jq ".[].info.${type}_stats.request"
+}
+
+netdev_check_requested()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_request $netdev $type) == "true" ]]
+}
+
+netdev_check_unrequested()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+
+ [[ $(netdev_hwstats_request $netdev $type) == "false" ]]
+}
+
+reporting_test()
+{
+ local type=$1; shift
+ local instance=1
+
+ RET=0
+
+ [[ -n $(netdev_hwstats_used dummy1 $type) ]]
+ check_err $? "$type stats not reported"
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before either device or netdevsim request"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before device request"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested before device request"
+
+ $IP stats set dev dummy1 ${type}_stats on
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+ netdev_check_requested dummy1 $type
+ check_err $? "$type stats reported as not requested after device request"
+
+ nsim_hwstats_disable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after netdevsim request withdrawn"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after netdevsim request reenabled"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after device request withdrawn"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after device request withdrawn"
+
+ nsim_hwstats_disable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after both requests withdrawn"
+
+ log_test "Reporting of $type stats usage"
+}
+
+l3_reporting_test()
+{
+ reporting_test l3
+}
+
+__fail_next_test()
+{
+ local instance=$1; shift
+ local type=$1; shift
+
+ RET=0
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before either device or netdevsim request"
+
+ nsim_hwstats_enable $instance dummy1 $type
+ nsim_hwstats_fail_next_enable $instance dummy1 $type
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used before device request"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested before device request"
+
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after bounce"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after bounce"
+
+ $IP stats set dev dummy1 ${type}_stats on
+ check_err $? "$type stats request failed when it shouldn't have"
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+ netdev_check_requested dummy1 $type
+ check_err $? "$type stats reported as not requested after device request"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ nsim_hwstats_disable $instance dummy1 $type
+
+ log_test "Injected failure of $type stats enablement (netdevsim #$instance)"
+}
+
+fail_next_test()
+{
+ __fail_next_test 1 "$@"
+ __fail_next_test 2 "$@"
+ __fail_next_test 3 "$@"
+}
+
+l3_fail_next_test()
+{
+ fail_next_test l3
+}
+
+get_hwstat()
+{
+ local netdev=$1; shift
+ local type=$1; shift
+ local selector=$1; shift
+
+ $IP -j stats show dev $netdev group offload subgroup ${type}_stats |
+ jq ".[0].stats64.${selector}"
+}
+
+counter_test()
+{
+ local type=$1; shift
+ local instance=1
+
+ RET=0
+
+ nsim_hwstats_enable $instance dummy1 $type
+ $IP stats set dev dummy1 ${type}_stats on
+ netdev_check_used dummy1 $type
+ check_err $? "$type stats reported as not used after both device and netdevsim request"
+
+ # Netdevsim counts 10pps on ingress. We should see maybe a couple
+ # packets, unless things take a reeealy long time.
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after first enablement"
+
+ sleep 2.5
+
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts >= 20))
+ check_err $? "$type stats show < 20 packets after 2.5s passed"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after second enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+ nsim_hwstats_fail_next_enable $instance dummy1 $type
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show >= 10 packets after post-fail enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ log_test "Counter values in $type stats"
+}
+
+l3_counter_test()
+{
+ counter_test l3
+}
+
+rollback_test()
+{
+ local type=$1; shift
+
+ RET=0
+
+ nsim_hwstats_enable 1 dummy1 l3
+ nsim_hwstats_enable 2 dummy1 l3
+ nsim_hwstats_enable 3 dummy1 l3
+
+ # The three netdevsim instances are registered in order of their number
+ # one after another. It is reasonable to expect that whatever
+ # notifications take place hit no. 2 in between hitting nos. 1 and 3,
+ # whatever the actual order. This allows us to test that a fail caused
+ # by no. 2 does not leave the system in a partial state, and rolls
+ # everything back.
+
+ nsim_hwstats_fail_next_enable 2 dummy1 l3
+ $IP stats set dev dummy1 ${type}_stats on 2>/dev/null
+ check_fail $? "$type stats request not bounced as it should have been"
+
+ netdev_check_unused dummy1 $type
+ check_err $? "$type stats reported as used after bounce"
+ netdev_check_unrequested dummy1 $type
+ check_err $? "$type stats reported as requested after bounce"
+
+ sleep 2
+
+ $IP stats set dev dummy1 ${type}_stats on
+ check_err $? "$type stats request not upheld as it should have been"
+
+ local pkts=$(get_hwstat dummy1 l3 rx.packets)
+ ((pkts < 10))
+ check_err $? "$type stats show $pkts packets after post-fail enablement"
+
+ $IP stats set dev dummy1 ${type}_stats off
+
+ nsim_hwstats_disable 3 dummy1 l3
+ nsim_hwstats_disable 2 dummy1 l3
+ nsim_hwstats_disable 1 dummy1 l3
+
+ log_test "Failure in $type stats enablement rolled back"
+}
+
+l3_rollback_test()
+{
+ rollback_test l3
+}
+
+l3_monitor_test()
+{
+ hw_stats_monitor_test dummy1 l3 \
+ "nsim_hwstats_enable 1 dummy1 l3" \
+ "nsim_hwstats_disable 1 dummy1 l3" \
+ "$IP"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
new file mode 100755
index 000000000000..e8e0dc088d6a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/nexthop.sh
@@ -0,0 +1,1058 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the nexthop offload API. It makes use of netdevsim
+# which registers a listener to the nexthop notification chain.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ nexthop_single_add_test
+ nexthop_single_add_err_test
+ nexthop_group_add_test
+ nexthop_group_add_err_test
+ nexthop_res_group_add_test
+ nexthop_res_group_add_err_test
+ nexthop_group_replace_test
+ nexthop_group_replace_err_test
+ nexthop_res_group_replace_test
+ nexthop_res_group_replace_err_test
+ nexthop_res_group_idle_timer_test
+ nexthop_res_group_idle_timer_del_test
+ nexthop_res_group_increase_idle_timer_test
+ nexthop_res_group_decrease_idle_timer_test
+ nexthop_res_group_unbalanced_timer_test
+ nexthop_res_group_unbalanced_timer_del_test
+ nexthop_res_group_no_unbalanced_timer_test
+ nexthop_res_group_short_unbalanced_timer_test
+ nexthop_res_group_increase_unbalanced_timer_test
+ nexthop_res_group_decrease_unbalanced_timer_test
+ nexthop_res_group_force_migrate_busy_test
+ nexthop_single_replace_test
+ nexthop_single_replace_err_test
+ nexthop_single_in_group_replace_test
+ nexthop_single_in_group_replace_err_test
+ nexthop_single_in_res_group_replace_test
+ nexthop_single_in_res_group_replace_err_test
+ nexthop_single_in_group_delete_test
+ nexthop_single_in_group_delete_err_test
+ nexthop_single_in_res_group_delete_test
+ nexthop_single_in_res_group_delete_err_test
+ nexthop_replay_test
+ nexthop_replay_err_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+DEBUGFS_NET_DIR=/sys/kernel/debug/netdevsim/$DEV/
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+nexthop_check()
+{
+ local nharg="$1"; shift
+ local expected="$1"; shift
+
+ out=$($IP nexthop show ${nharg} | sed -e 's/ *$//')
+ if [[ "$out" != "$expected" ]]; then
+ return 1
+ fi
+
+ return 0
+}
+
+nexthop_bucket_nhid_count_check()
+{
+ local group_id=$1; shift
+ local expected
+ local count
+ local nhid
+ local ret
+
+ while (($# > 0)); do
+ nhid=$1; shift
+ expected=$1; shift
+
+ count=$($IP nexthop bucket show id $group_id nhid $nhid |
+ grep "trap" | wc -l)
+ if ((expected != count)); then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+nexthop_resource_check()
+{
+ local expected_occ=$1; shift
+
+ occ=$($DEVLINK -jp resource show $DEVLINK_DEV \
+ | jq '.[][][] | select(.name=="nexthops") | .["occ"]')
+
+ if [ $expected_occ -ne $occ ]; then
+ return 1
+ fi
+
+ return 0
+}
+
+nexthop_resource_set()
+{
+ local size=$1; shift
+
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size $size
+ $DEVLINK dev reload $DEVLINK_DEV
+}
+
+nexthop_single_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 1
+ nexthop_resource_check 0
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Single nexthop add and delete"
+}
+
+nexthop_single_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 1
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop addition succeeded when should fail"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,20/2,39
+ nexthop_check "id 10" "id 10 group 1,20/2,39 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_resource_check 61
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_res_group_add_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ $IP nexthop add id 10 group 1,3/2,2 type resilient buckets 5
+ nexthop_check "id 10" "id 10 group 1,3/2,2 type resilient buckets 5 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected weighted nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 7
+ check_err $? "Wrong weighted nexthop occupancy"
+
+ $IP nexthop del id 10
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy after delete"
+
+ log_test "Resilient nexthop group add and delete"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_add_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 2
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4 &> /dev/null
+ check_fail $? "Nexthop group addition succeeded when should fail"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group add failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 10 group 1/2/3
+ nexthop_check "id 10" "id 10 group 1/2/3 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_group_replace_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 5
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 10 group 1/2/3 &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ $IP nexthop replace id 10 group 1/2/3 type resilient
+ nexthop_check "id 10" "id 10 group 1/2/3 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 2
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Resilient nexthop group replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+ $IP nexthop replace id 10 group 1/2/3 type resilient &> /dev/null
+ check_fail $? "Nexthop group replacement succeeded when should fail"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 6 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 3
+ check_err $? "Wrong nexthop buckets count"
+ nexthop_bucket_nhid_count_check 10 2 3
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 9
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Resilient nexthop group replace failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_res_nexthop_group_replace
+}
+
+nexthop_res_mark_buckets_busy()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+ local count=$1; shift
+ local index
+
+ for index in $($IP -j nexthop bucket show id $group_id nhid $nhid |
+ jq '.[].bucket.index' | head -n ${count:--0})
+ do
+ echo $group_id $index \
+ > $DEBUGFS_NET_DIR/fib/nexthop_bucket_activity
+ done
+}
+
+nexthop_res_num_nhid_buckets()
+{
+ local group_id=$1; shift
+ local nhid=$1; shift
+
+ $IP -j nexthop bucket show id $group_id nhid $nhid | jq length
+}
+
+nexthop_res_group_idle_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 idle_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 6
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_idle_timer_del_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 \
+ type resilient buckets 8 idle_timer 6
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4 3 0
+ check_err $? "Group expected to be unbalanced"
+
+ sleep 4
+
+ # Deletion prompts group replacement. Check that the bucket timers
+ # are kept.
+ $IP nexthop delete id 3
+
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "Group expected to still be unbalanced"
+
+ sleep 4
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after idle timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_decrease_timer_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8 $timer 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 4
+ sleep 4
+
+ # 6 seconds, past the new timer, before the old timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer decrease"
+
+ $IP nexthop flush &> /dev/null
+}
+
+__nexthop_res_group_increase_timer_del_test()
+{
+ local timer=$1; shift
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,100/2,100/3,1 \
+ type resilient buckets 8 $timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,100/2,300/3,1 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 2
+ $IP nexthop replace id 10 group 1/2,3 type resilient $timer 8
+ sleep 4
+
+ # 6 seconds, past the original timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group still expected to be unbalanced"
+
+ sleep 4
+
+ # 10 seconds, past the new timer.
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after $timer increase"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_idle_timer_test()
+{
+ __nexthop_res_group_increase_timer_test idle_timer
+}
+
+nexthop_res_group_decrease_idle_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test idle_timer
+}
+
+nexthop_res_group_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in 1 2; do
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_unbalanced_timer_del_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1,50/2,50/3,1 type resilient \
+ buckets 8 idle_timer 6 unbalanced_timer 10
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1,50/2,150/3,1 type resilient
+
+ # Check that NH delete does not reset unbalanced time.
+ sleep 4
+ $IP nexthop delete id 3
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "1: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 4 2 4
+ check_err $? "2: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+
+ # 3 x sleep 4 > unbalanced timer 10
+ sleep 4
+ nexthop_bucket_nhid_count_check 10 1 2 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced timer (with delete)"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_no_unbalanced_timer_test()
+{
+ local i
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient buckets 8
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ for i in $(seq 3); do
+ sleep 60
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "$i: Group expected to be unbalanced"
+ nexthop_res_mark_buckets_busy 10 1
+ done
+
+ log_test "Buckets never force-migrated without unbalanced timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_short_unbalanced_timer_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120 unbalanced_timer 4
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ sleep 5
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_err $? "Group expected to be balanced"
+
+ log_test "Bucket migration after unbalanced < idle timer"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_res_group_increase_unbalanced_timer_test()
+{
+ __nexthop_res_group_increase_timer_test unbalanced_timer
+}
+
+nexthop_res_group_decrease_unbalanced_timer_test()
+{
+ __nexthop_res_group_decrease_timer_test unbalanced_timer
+}
+
+nexthop_res_group_force_migrate_busy_test()
+{
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+
+ RET=0
+
+ $IP nexthop add id 10 group 1/2 type resilient \
+ buckets 8 idle_timer 120
+ nexthop_res_mark_buckets_busy 10 1
+ $IP nexthop replace id 10 group 1/2,3 type resilient
+
+ nexthop_bucket_nhid_count_check 10 2 6
+ check_fail $? "Group expected to be unbalanced"
+
+ $IP nexthop replace id 10 group 2 type resilient
+ nexthop_bucket_nhid_count_check 10 2 8
+ check_err $? "All buckets expected to have migrated"
+
+ log_test "Busy buckets force-migrated when NH removed"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop replace id 1 via 192.0.2.3 dev dummy1
+ nexthop_check "id 1" "id 1 via 192.0.2.3 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_replace_err_test()
+{
+ RET=0
+
+ # This is supposed to cause the replace to fail because the new nexthop
+ # is programmed before deleting the replaced one.
+ nexthop_resource_set 1
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+
+ $IP nexthop replace id 1 via 192.0.2.3 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replace succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_resource_check 1
+ check_err $? "Wrong nexthop occupancy after failure"
+
+ log_test "Single nexthop replace failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_in_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_replace_err_test()
+{
+ RET=0
+
+ nexthop_resource_set 5
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in group failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_in_res_group_replace_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1
+ check_err $? "Failed to replace nexthop when should not"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_replace_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop replace id 1 via 192.0.2.4 dev dummy1 &> /dev/null
+ check_fail $? "Nexthop replacement succeeded when should fail"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after failure"
+
+ nexthop_check "id 10" "id 10 group 1/2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry after failure"
+
+ nexthop_bucket_nhid_count_check 10 1 2 2 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 6
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop replace while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
+nexthop_single_in_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_resource_check 2
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_group_delete_err_test()
+{
+ RET=0
+
+ # First, nexthop 1 will be deleted, which will reduce the occupancy to
+ # 5. Afterwards, a replace notification will be sent for nexthop group
+ # 10 with only two nexthops. Since the new group is allocated before
+ # the old is deleted, the replacement will fail as it will result in an
+ # occupancy of 7.
+ nexthop_resource_set 6
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3
+
+ $IP nexthop del id 1
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in group failure"
+
+ $IP nexthop flush &> /dev/null
+ nexthop_resource_set 9999
+}
+
+nexthop_single_in_res_group_delete_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2 type resilient buckets 4
+
+ $IP nexthop del id 1
+ nexthop_check "id 10" "id 10 group 2 type resilient buckets 4 idle_timer 120 unbalanced_timer 0 unbalanced_time 0 trap"
+ check_err $? "Unexpected nexthop group entry"
+
+ nexthop_bucket_nhid_count_check 10 2 4
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 5
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_single_in_res_group_delete_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 3 via 192.0.2.4 dev dummy1
+ $IP nexthop add id 10 group 1/2/3 type resilient buckets 6
+
+ ip netns exec testns1 \
+ echo 1 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+ $IP nexthop del id 1
+
+ # We failed to replace the two nexthop buckets that were originally
+ # assigned to nhid 1.
+ nexthop_bucket_nhid_count_check 10 2 2 3 2
+ check_err $? "Wrong nexthop buckets count"
+
+ nexthop_resource_check 8
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Single nexthop delete while in resilient group failure"
+
+ $IP nexthop flush &> /dev/null
+ ip netns exec testns1 \
+ echo 0 > $DEBUGFS_NET_DIR/fib/fail_nexthop_bucket_replace
+}
+
+nexthop_replay_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ $DEVLINK dev reload $DEVLINK_DEV
+ check_err $? "Failed to reload when should not"
+
+ nexthop_check "id 1" "id 1 via 192.0.2.2 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after reload"
+
+ nexthop_check "id 2" "id 2 via 192.0.2.3 dev dummy1 scope link trap"
+ check_err $? "Unexpected nexthop entry after reload"
+
+ nexthop_check "id 10" "id 10 group 1/2 trap"
+ check_err $? "Unexpected nexthop group entry after reload"
+
+ nexthop_resource_check 4
+ check_err $? "Wrong nexthop occupancy"
+
+ log_test "Nexthop replay"
+
+ $IP nexthop flush &> /dev/null
+}
+
+nexthop_replay_err_test()
+{
+ RET=0
+
+ $IP nexthop add id 1 via 192.0.2.2 dev dummy1
+ $IP nexthop add id 2 via 192.0.2.3 dev dummy1
+ $IP nexthop add id 10 group 1/2
+
+ # Reduce size of nexthop resource so that reload will fail.
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size 3
+ $DEVLINK dev reload $DEVLINK_DEV &> /dev/null
+ check_fail $? "Reload succeeded when should fail"
+
+ $DEVLINK resource set $DEVLINK_DEV path nexthops size 9999
+ $DEVLINK dev reload $DEVLINK_DEV
+ check_err $? "Failed to reload when should not"
+
+ log_test "Nexthop replay failure"
+
+ $IP nexthop flush &> /dev/null
+}
+
+setup_prepare()
+{
+ local netdev
+
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ IP="ip -netns testns1"
+ DEVLINK="devlink -N testns1"
+
+ $IP link add name dummy1 up type dummy
+ $IP address add 192.0.2.1/24 dev dummy1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/peer.sh b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
new file mode 100755
index 000000000000..aed62d9e6c0a
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/peer.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ../../../net/net_helper.sh
+
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_1_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_1_ID
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_2_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_DEV_2_ID
+
+NSIM_DEV_SYS_NEW=/sys/bus/netdevsim/new_device
+NSIM_DEV_SYS_DEL=/sys/bus/netdevsim/del_device
+NSIM_DEV_SYS_LINK=/sys/bus/netdevsim/link_device
+NSIM_DEV_SYS_UNLINK=/sys/bus/netdevsim/unlink_device
+
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+setup_ns()
+{
+ set -e
+ ip netns add nssv
+ ip netns add nscl
+
+ NSIM_DEV_1_NAME=$(find $NSIM_DEV_1_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_1_SYS/net -exec basename {} \;)
+ NSIM_DEV_2_NAME=$(find $NSIM_DEV_2_SYS/net -maxdepth 1 -type d ! \
+ -path $NSIM_DEV_2_SYS/net -exec basename {} \;)
+
+ ip link set $NSIM_DEV_1_NAME netns nssv
+ ip link set $NSIM_DEV_2_NAME netns nscl
+
+ ip netns exec nssv ip addr add '192.168.1.1/24' dev $NSIM_DEV_1_NAME
+ ip netns exec nscl ip addr add '192.168.1.2/24' dev $NSIM_DEV_2_NAME
+
+ ip netns exec nssv ip link set dev $NSIM_DEV_1_NAME up
+ ip netns exec nscl ip link set dev $NSIM_DEV_2_NAME up
+ set +e
+}
+
+cleanup_ns()
+{
+ ip netns del nscl
+ ip netns del nssv
+}
+
+###
+### Code start
+###
+
+socat_check || exit 4
+
+modprobe netdevsim
+
+# linking
+
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_NEW
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_NEW
+udevadm settle
+
+setup_ns
+
+NSIM_DEV_1_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_1_FD}</var/run/netns/nssv
+NSIM_DEV_1_IFIDX=$(ip netns exec nssv cat /sys/class/net/$NSIM_DEV_1_NAME/ifindex)
+
+NSIM_DEV_2_FD=$((256 + RANDOM % 256))
+exec {NSIM_DEV_2_FD}</var/run/netns/nscl
+NSIM_DEV_2_IFIDX=$(ip netns exec nscl cat /sys/class/net/$NSIM_DEV_2_NAME/ifindex)
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:2000" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netdevsim should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX 2000:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with non-existent netnsid should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "linking with self should fail"
+ cleanup_ns
+ exit 1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:$NSIM_DEV_2_IFIDX" > $NSIM_DEV_SYS_LINK
+if [ $? -ne 0 ]; then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup_ns
+ exit 1
+fi
+
+# argument error checking
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX $NSIM_DEV_2_FD:a" > $NSIM_DEV_SYS_LINK 2>/dev/null
+if [ $? -eq 0 ]; then
+ echo "invalid arg should fail"
+ cleanup_ns
+ exit 1
+fi
+
+# send/recv packets
+
+tmp_file=$(mktemp)
+ip netns exec nssv socat TCP-LISTEN:1234,fork $tmp_file &
+pid=$!
+res=0
+
+wait_local_port_listen nssv 1234 tcp
+
+echo "HI" | ip netns exec nscl socat STDIN TCP:192.168.1.1:1234
+
+count=$(cat $tmp_file | wc -c)
+if [[ $count -ne 3 ]]; then
+ echo "expected 3 bytes, got $count"
+ res=1
+fi
+
+echo "$NSIM_DEV_1_FD:$NSIM_DEV_1_IFIDX" > $NSIM_DEV_SYS_UNLINK
+
+echo $NSIM_DEV_2_ID > $NSIM_DEV_SYS_DEL
+
+kill $pid
+echo $NSIM_DEV_1_ID > $NSIM_DEV_SYS_DEL
+
+cleanup_ns
+
+modprobe -r netdevsim
+
+exit $res
diff --git a/tools/testing/selftests/drivers/net/netdevsim/psample.sh b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
new file mode 100755
index 000000000000..e689ff7a0b12
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/psample.sh
@@ -0,0 +1,183 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking the psample module. It makes use of netdevsim
+# which periodically generates "sampled" packets.
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+ psample_enable_test
+ psample_group_num_test
+ psample_md_test
+"
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+SYSFS_NET_DIR=/sys/bus/netdevsim/devices/$DEV/net/
+PSAMPLE_DIR=/sys/kernel/debug/netdevsim/$DEV/psample/
+CAPTURE_FILE=$(mktemp)
+NUM_NETIFS=0
+source $lib_dir/lib.sh
+
+DEVLINK_DEV=
+source $lib_dir/devlink_lib.sh
+DEVLINK_DEV=netdevsim/${DEV}
+
+# Available at https://github.com/Mellanox/libpsample
+require_command psample
+
+psample_capture()
+{
+ rm -f $CAPTURE_FILE
+
+ timeout 2 ip netns exec testns1 psample &> $CAPTURE_FILE
+}
+
+psample_enable_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to enable sampling when should not"
+
+ echo 1 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling enablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -eq 0 ]; then
+ check_err 1 "Failed to capture sampled packets"
+ fi
+
+ echo 0 > $PSAMPLE_DIR/enable
+ check_err $? "Failed to disable sampling when should not"
+
+ echo 0 > $PSAMPLE_DIR/enable 2>/dev/null
+ check_fail $? "Sampling disablement succeeded when should fail"
+
+ psample_capture
+ if [ $(cat $CAPTURE_FILE | wc -l) -ne 0 ]; then
+ check_err 1 "Captured sampled packets when should not"
+ fi
+
+ log_test "psample enable / disable"
+}
+
+psample_group_num_test()
+{
+ RET=0
+
+ echo 1234 > $PSAMPLE_DIR/group_num
+ echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong group number"
+
+ # New group number should only be used after disable / enable.
+ echo 4321 > $PSAMPLE_DIR/group_num
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_fail $? "Group number changed while sampling is active"
+
+ echo 0 > $PSAMPLE_DIR/enable && echo 1 > $PSAMPLE_DIR/enable
+
+ psample_capture
+ grep -q -e "group 4321" $CAPTURE_FILE
+ check_err $? "Group number did not change after restarting sampling"
+
+ log_test "psample group number"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+psample_md_test()
+{
+ RET=0
+
+ echo 1 > $PSAMPLE_DIR/enable
+
+ echo 1234 > $PSAMPLE_DIR/in_ifindex
+ echo 4321 > $PSAMPLE_DIR/out_ifindex
+ psample_capture
+
+ grep -q -e "in-ifindex 1234" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong in-ifindex"
+
+ grep -q -e "out-ifindex 4321" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-ifindex"
+
+ echo 5 > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc 5" $CAPTURE_FILE
+ check_err $? "Sampled packets reported with wrong out-tc"
+
+ echo $((2**16 - 1)) > $PSAMPLE_DIR/out_tc
+ psample_capture
+
+ grep -q -e "out-tc " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc when should not"
+
+ echo 1 > $PSAMPLE_DIR/out_tc
+ echo 10000 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with out-tc-occ when should"
+
+ echo 0 > $PSAMPLE_DIR/out_tc_occ_max
+ psample_capture
+
+ grep -q -e "out-tc-occ " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with out-tc-occ when should not"
+
+ echo 10000 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_err $? "Sampled packets not reported with latency when should"
+
+ echo 0 > $PSAMPLE_DIR/latency_max
+ psample_capture
+
+ grep -q -e "latency " $CAPTURE_FILE
+ check_fail $? "Sampled packets reported with latency when should not"
+
+ log_test "psample metadata"
+
+ echo 0 > $PSAMPLE_DIR/enable
+}
+
+setup_prepare()
+{
+ modprobe netdevsim &> /dev/null
+
+ echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ while [ ! -d $SYSFS_NET_DIR ] ; do :; done
+
+ set -e
+
+ ip netns add testns1
+ devlink dev reload $DEVLINK_DEV netns testns1
+
+ set +e
+}
+
+cleanup()
+{
+ pre_cleanup
+ rm -f $CAPTURE_FILE
+ ip netns del testns1
+ echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ modprobe -r netdevsim &> /dev/null
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
new file mode 100755
index 000000000000..fd13c8cfb7a8
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -0,0 +1,77 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+source ethtool-common.sh
+
+set -o pipefail
+
+n_children() {
+ n=$(tc qdisc show dev $NDEV | grep '^qdisc' | wc -l)
+ echo $((n - 1))
+}
+
+tcq() {
+ tc qdisc $1 dev $NDEV ${@:2}
+}
+
+n_child_assert() {
+ n=$(n_children)
+ if [ $n -ne $1 ]; then
+ echo "ERROR ($root): ${@:2}, expected $1 have $n"
+ ((num_errors++))
+ else
+ ((num_passes++))
+ fi
+}
+
+
+for root in mq mqprio; do
+ NDEV=$(make_netdev 1 4)
+
+ opts=
+ [ $root == "mqprio" ] && opts='hw 0 num_tc 1 map 0 0 0 0 queues 1@0'
+
+ tcq add root handle 100: $root $opts
+ n_child_assert 4 'Init'
+
+ # All defaults
+
+ for n in 3 2 1 2 3 4 1 4; do
+ ethtool -L $NDEV combined $n
+ n_child_assert $n "Change queues to $n while down"
+ done
+
+ ip link set dev $NDEV up
+
+ for n in 3 2 1 2 3 4 1 4; do
+ ethtool -L $NDEV combined $n
+ n_child_assert $n "Change queues to $n while up"
+ done
+
+ # One real one
+ tcq replace parent 100:4 handle 204: pfifo_fast
+ n_child_assert 4 "One real queue"
+
+ ethtool -L $NDEV combined 1
+ n_child_assert 2 "One real queue, one default"
+
+ ethtool -L $NDEV combined 4
+ n_child_assert 4 "One real queue, rest default"
+
+ # Graft some
+ tcq replace parent 100:1 handle 204:
+ n_child_assert 3 "Grafted"
+
+ ethtool -L $NDEV combined 1
+ n_child_assert 1 "Grafted, one"
+
+ cleanup_nsim
+done
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $((num_passes)) checks"
+ exit 0
+else
+ echo "FAILED $num_errors/$((num_errors+num_passes)) checks"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
new file mode 100755
index 000000000000..384cfa3d38a6
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh
@@ -0,0 +1,963 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only
+
+VNI_GEN=$RANDOM
+NSIM_ID=$((RANDOM % 1024))
+NSIM_DEV_SYS=/sys/bus/netdevsim/devices/netdevsim$NSIM_ID
+NSIM_DEV_DFS=/sys/kernel/debug/netdevsim/netdevsim$NSIM_ID
+NSIM_NETDEV=
+HAS_ETHTOOL=
+STATIC_ENTRIES=
+EXIT_STATUS=0
+num_cases=0
+num_errors=0
+
+clean_up_devs=( )
+
+function err_cnt {
+ echo "ERROR:" $@
+ EXIT_STATUS=1
+ ((num_errors++))
+ ((num_cases++))
+}
+
+function pass_cnt {
+ ((num_cases++))
+}
+
+function cleanup_tuns {
+ for dev in "${clean_up_devs[@]}"; do
+ [ -e /sys/class/net/$dev ] && ip link del dev $dev
+ done
+ clean_up_devs=( )
+}
+
+function cleanup_nsim {
+ if [ -e $NSIM_DEV_SYS ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/del_device
+ fi
+}
+
+function cleanup {
+ cleanup_tuns
+ cleanup_nsim
+}
+
+trap cleanup EXIT
+
+function new_vxlan {
+ local dev=$1
+ local dstport=$2
+ local lower=$3
+ local ipver=$4
+ local flags=$5
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && group=239.1.1.1 || group=fff1::1
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags id $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type vxlan \
+ group $group \
+ dev $lower \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function new_geneve {
+ local dev=$1
+ local dstport=$2
+ local ipver=$3
+ local flags=$4
+
+ local group ipfl
+
+ [ "$ipver" != '6' ] && remote=1.1.1.2 || group=::2
+ [ "$ipver" != '6' ] || ipfl="-6"
+
+ [[ ! "$flags" =~ "external" ]] && flags="$flags vni $((VNI_GEN++))"
+
+ ip $ipfl link add $dev type geneve \
+ remote $remote \
+ dstport $dstport \
+ $flags
+
+ ip link set dev $dev up
+
+ clean_up_devs=("${clean_up_devs[@]}" $dev)
+
+ check_tables
+}
+
+function del_dev {
+ local dev=$1
+
+ ip link del dev $dev
+ check_tables
+}
+
+# Helpers for netdevsim port/type encoding
+function mke {
+ local port=$1
+ local type=$2
+
+ echo $((port << 16 | type))
+}
+
+function pre {
+ local val=$1
+
+ echo -e "port: $((val >> 16))\ttype: $((val & 0xffff))"
+}
+
+function pre_ethtool {
+ local val=$1
+ local port=$((val >> 16))
+ local type=$((val & 0xffff))
+
+ case $type in
+ 1)
+ type_name="vxlan"
+ ;;
+ 2)
+ type_name="geneve"
+ ;;
+ 4)
+ type_name="vxlan-gpe"
+ ;;
+ *)
+ type_name="bit X"
+ ;;
+ esac
+
+ echo "port $port, $type_name"
+}
+
+function check_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ local -n expected=$2
+ local last=$3
+
+ read -a have < $path
+
+ if [ ${#expected[@]} -ne ${#have[@]} ]; then
+ echo "check_table: BAD NUMBER OF ITEMS"
+ return 0
+ fi
+
+ for i in "${!expected[@]}"; do
+ if [ -n "$HAS_ETHTOOL" -a ${expected[i]} -ne 0 ]; then
+ pp_expected=`pre_ethtool ${expected[i]}`
+ ethtool --show-tunnels $NSIM_NETDEV | grep "$pp_expected" >/dev/null
+ if [ $? -ne 0 -a $last -ne 0 ]; then
+ err_cnt "ethtool table $1 on port $port: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ return 0
+
+ fi
+ fi
+
+ if [ ${expected[i]} != ${have[i]} ]; then
+ if [ $last -ne 0 ]; then
+ err_cnt "table $1 on port $port: $pfx - $msg"
+ echo " check_table: wrong entry $i"
+ echo " expected: `pre ${expected[i]}`"
+ echo " have: `pre ${have[i]}`"
+ return 0
+ fi
+ return 1
+ fi
+ done
+
+ pass_cnt
+ return 0
+}
+
+function check_tables {
+ # Need retries in case we have workqueue making the changes
+ local retries=10
+
+ while ! check_table 0 exp0 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+ while ! check_table 1 exp1 $((retries == 0)); do
+ sleep 0.02
+ ((retries--))
+ done
+
+ if [ -n "$HAS_ETHTOOL" -a -n "${STATIC_ENTRIES[0]}" ]; then
+ fail=0
+ for i in "${!STATIC_ENTRIES[@]}"; do
+ pp_expected=`pre_ethtool ${STATIC_ENTRIES[i]}`
+ cnt=$(ethtool --show-tunnels $NSIM_NETDEV | grep -c "$pp_expected")
+ if [ $cnt -ne 1 ]; then
+ err_cnt "ethtool static entry: $pfx - $msg"
+ echo " check_table: ethtool does not contain '$pp_expected'"
+ ethtool --show-tunnels $NSIM_NETDEV
+ fail=1
+ fi
+ done
+ [ $fail == 0 ] && pass_cnt
+ fi
+}
+
+function print_table {
+ local path=$NSIM_DEV_DFS/ports/$port/udp_ports_table$1
+ read -a have < $path
+
+ tree $NSIM_DEV_DFS/
+
+ echo "Port $port table $1:"
+
+ for i in "${!have[@]}"; do
+ echo " `pre ${have[i]}`"
+ done
+
+}
+
+function print_tables {
+ print_table 0
+ print_table 1
+}
+
+function get_netdev_name {
+ local -n old=$1
+
+ udevadm settle
+ new=$(ls /sys/class/net)
+
+ for netdev in $new; do
+ for check in $old; do
+ [ $netdev == $check ] && break
+ done
+
+ if [ $netdev != $check ]; then
+ echo $netdev
+ break
+ fi
+ done
+}
+
+###
+### Code start
+###
+
+# Probe ethtool support
+ethtool -h | grep show-tunnels 2>&1 >/dev/null && HAS_ETHTOOL=y
+
+modprobe netdevsim
+
+# Basic test
+pfx="basic"
+
+for port in 0 1; do
+ old_netdevs=$(ls /sys/class/net)
+ if [ $port -eq 0 ]; then
+ echo $NSIM_ID > /sys/bus/netdevsim/new_device
+ else
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ echo 1 > $NSIM_DEV_SYS/new_port
+ fi
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ msg="new NIC device created"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ msg="VxLAN v4 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlan0 4789 $NSIM_NETDEV
+ new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+ msg="VxLAN v4 devices go down"
+ exp0=( 0 0 0 0 )
+ ip link set dev vxlan1 down
+ ip link set dev vxlan0 down
+ check_tables
+
+ msg="VxLAN v6 devices"
+ exp0=( `mke 4789 1` 0 0 0 )
+ new_vxlan vxlanA 4789 $NSIM_NETDEV 6
+
+ for ifc in vxlan0 vxlan1; do
+ ip link set dev $ifc up
+ done
+
+ new_vxlan vxlanB 4789 $NSIM_NETDEV 6
+
+ msg="another VxLAN v6 devices"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+ msg="Geneve device"
+ exp1=( `mke 6081 2` 0 0 0 )
+ new_geneve gnv0 6081
+
+ msg="NIC device goes down"
+ ip link set dev $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+ msg="NIC device goes up again"
+ ip link set dev $NSIM_NETDEV up
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ check_tables
+
+ cleanup_tuns
+
+ msg="tunnels destroyed"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ check_tables
+
+ modprobe -r geneve
+ modprobe -r vxlan
+ modprobe -r udp_tunnel
+
+ check_tables
+done
+
+modprobe -r netdevsim
+
+# Module tests
+pfx="module tests"
+
+if modinfo netdevsim | grep udp_tunnel >/dev/null; then
+ err_cnt "netdevsim depends on udp_tunnel"
+else
+ pass_cnt
+fi
+
+modprobe netdevsim
+
+old_netdevs=$(ls /sys/class/net)
+port=0
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1000 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 0 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+modprobe -r vxlan
+modprobe -r udp_tunnel
+
+msg="remove tunnels"
+exp0=( 0 0 0 0 )
+check_tables
+
+msg="create VxLANs"
+exp0=( 0 0 0 0 ) # sleep is longer than out wait
+new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+exp0=( 0 0 0 0 )
+
+modprobe -r netdevsim
+modprobe netdevsim
+
+# Overflow the table
+
+function overflow_table0 {
+ local pfx=$1
+
+ msg="create VxLANs 1/5"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="create VxLANs 2/5"
+ exp0=( `mke 10000 1` `mke 10001 1` 0 0 )
+ new_vxlan vxlan1 10001 $NSIM_NETDEV
+
+ msg="create VxLANs 3/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` 0 )
+ new_vxlan vxlan2 10002 $NSIM_NETDEV
+
+ msg="create VxLANs 4/5"
+ exp0=( `mke 10000 1` `mke 10001 1` `mke 10002 1` `mke 10003 1` )
+ new_vxlan vxlan3 10003 $NSIM_NETDEV
+
+ msg="create VxLANs 5/5"
+ new_vxlan vxlan4 10004 $NSIM_NETDEV
+}
+
+function overflow_table1 {
+ local pfx=$1
+
+ msg="create GENEVE 1/5"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="create GENEVE 2/5"
+ exp1=( `mke 20000 2` `mke 20001 2` 0 0 )
+ new_geneve gnv1 20001
+
+ msg="create GENEVE 3/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` 0 )
+ new_geneve gnv2 20002
+
+ msg="create GENEVE 4/5"
+ exp1=( `mke 20000 2` `mke 20001 2` `mke 20002 2` `mke 20003 2` )
+ new_geneve gnv3 20003
+
+ msg="create GENEVE 5/5"
+ new_geneve gnv4 20004
+}
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Sync all
+pfx="sync all"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_sync_all
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ overflow_table0 "overflow NIC table"
+ overflow_table1 "overflow NIC table"
+
+ msg="replace VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+ del_dev vxlan1
+
+ msg="vacate VxLAN in overflow table"
+ exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+ del_dev vxlan2
+
+ msg="replace GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 20002 2` `mke 20003 2` )
+ del_dev gnv1
+
+ msg="vacate GENEVE in overflow table"
+ exp1=( `mke 20000 2` `mke 20004 2` 0 `mke 20003 2` )
+ del_dev gnv2
+
+ msg="table sharing - share"
+ exp1=( `mke 20000 2` `mke 20004 2` `mke 30001 4` `mke 20003 2` )
+ new_vxlan vxlanG0 30001 $NSIM_NETDEV 4 "gpe external"
+
+ msg="table sharing - overflow"
+ new_vxlan vxlanG1 30002 $NSIM_NETDEV 4 "gpe external"
+ msg="table sharing - overflow v6"
+ new_vxlan vxlanG2 30002 $NSIM_NETDEV 6 "gpe external"
+
+ exp1=( `mke 20000 2` `mke 30002 4` `mke 30001 4` `mke 20003 2` )
+ del_dev gnv4
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Destroy full NIC
+pfx="destroy full"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ overflow_table0 "destroy NIC"
+ overflow_table1 "destroy NIC"
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# IPv4 only
+pfx="IPv4 only"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+echo 1 > $NSIM_DEV_DFS/udp_ports_ipv4_only
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v6"
+ new_vxlan vxlanA1 10000 $NSIM_NETDEV 6
+
+ ip link set dev vxlanA0 down
+ ip link set dev vxlanA0 up
+ check_tables
+
+ msg="create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="down VxLANs v4"
+ exp0=( 0 0 0 0 )
+ ip link set dev vxlan0 down
+ check_tables
+
+ msg="up VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ip link set dev vxlan0 up
+ check_tables
+
+ msg="destroy VxLANs v4"
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+
+ msg="recreate VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ del_dev vxlanA0
+ del_dev vxlanA1
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# Failures
+pfx="error injection"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+
+ msg="1 - create VxLANs v6"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="1 - create VxLANs v4"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="1 - remove VxLANs v4"
+ del_dev vxlan0
+
+ msg="1 - remove VxLANs v6"
+ exp0=( 0 0 0 0 )
+ del_dev vxlanA0
+
+ msg="2 - create GENEVE"
+ exp1=( `mke 20000 2` 0 0 0 )
+ new_geneve gnv0 20000
+
+ msg="2 - destroy GENEVE"
+ echo 2 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error
+ exp1=( `mke 20000 2` 0 0 0 )
+ del_dev gnv0
+
+ msg="2 - create second GENEVE"
+ exp1=( 0 `mke 20001 2` 0 0 )
+ new_geneve gnv0 20001
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# netdev flags
+pfx="netdev flags"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="turn off"
+ exp0=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+
+ msg="create VxLANs v4 - off"
+ exp0=( 0 0 0 0 )
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ msg="created off - turn on"
+ exp0=( `mke 10000 1` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# device initiated reset
+pfx="reset notification"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+for port in 0 1; do
+ if [ $port -ne 0 ]; then
+ echo 1 > $NSIM_DEV_DFS/udp_ports_open_only
+ echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+ fi
+
+ echo $port > $NSIM_DEV_SYS/new_port
+ NSIM_NETDEV=`get_netdev_name old_netdevs`
+ ip link set dev $NSIM_NETDEV up
+
+ msg="create VxLANs v6"
+ exp0=( `mke 10000 1` 0 0 0 )
+ new_vxlan vxlanA0 10000 $NSIM_NETDEV 6
+
+ msg="create VxLANs v4"
+ new_vxlan vxlan0 10000 $NSIM_NETDEV
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes down"
+ ip link set dev $NSIM_NETDEV down
+ if [ $port -eq 1 ]; then
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ fi
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="NIC device goes up again"
+ ip link set dev $NSIM_NETDEV up
+ exp0=( `mke 10000 1` 0 0 0 )
+ check_tables
+
+ msg="remove both"
+ del_dev vxlanA0
+ exp0=( 0 0 0 0 )
+ del_dev vxlan0
+ check_tables
+
+ echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+ check_tables
+
+ msg="destroy NIC"
+ echo $port > $NSIM_DEV_SYS/del_port
+
+ cleanup_tuns
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+done
+
+cleanup_nsim
+
+# shared port tables
+pfx="table sharing"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 0 > $NSIM_DEV_DFS/udp_ports_open_only
+echo 1 > $NSIM_DEV_DFS/udp_ports_sleep
+echo 1 > $NSIM_DEV_DFS/udp_ports_shared
+
+old_netdevs=$(ls /sys/class/net)
+echo 1 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+old_netdevs=$(ls /sys/class/net)
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV2=`get_netdev_name old_netdevs`
+
+msg="VxLAN v4 devices"
+exp0=( `mke 4789 1` 0 0 0 )
+exp1=( 0 0 0 0 )
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV2
+
+msg="VxLAN v4 devices go down"
+exp0=( 0 0 0 0 )
+ip link set dev vxlan1 down
+ip link set dev vxlan0 down
+check_tables
+
+for ifc in vxlan0 vxlan1; do
+ ip link set dev $ifc up
+done
+
+msg="VxLAN v6 device"
+exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+new_vxlan vxlanC 4790 $NSIM_NETDEV 6
+
+msg="Geneve device"
+exp1=( `mke 6081 2` 0 0 0 )
+new_geneve gnv0 6081
+
+msg="NIC device goes down"
+ip link set dev $NSIM_NETDEV down
+check_tables
+
+msg="NIC device goes up again"
+ip link set dev $NSIM_NETDEV up
+check_tables
+
+for i in `seq 2`; do
+ msg="turn feature off - 1, rep $i"
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature off - 2, rep $i"
+ exp0=( 0 0 0 0 )
+ exp1=( 0 0 0 0 )
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload off
+ check_tables
+
+ msg="turn feature on - 1, rep $i"
+ exp0=( `mke 4789 1` `mke 4790 1` 0 0 )
+ exp1=( `mke 6081 2` 0 0 0 )
+ ethtool -K $NSIM_NETDEV rx-udp_tunnel-port-offload on
+ check_tables
+
+ msg="turn feature on - 2, rep $i"
+ ethtool -K $NSIM_NETDEV2 rx-udp_tunnel-port-offload on
+ check_tables
+done
+
+msg="tunnels destroyed 1"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+overflow_table0 "overflow NIC table"
+
+msg="re-add a port"
+
+echo 2 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+check_tables
+
+msg="replace VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` `mke 10002 1` `mke 10003 1` )
+del_dev vxlan1
+
+msg="vacate VxLAN in overflow table"
+exp0=( `mke 10000 1` `mke 10004 1` 0 `mke 10003 1` )
+del_dev vxlan2
+
+echo 1 > $NSIM_DEV_DFS/ports/$port/udp_ports_reset
+check_tables
+
+msg="tunnels destroyed 2"
+cleanup_tuns
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+echo 1 > $NSIM_DEV_SYS/del_port
+echo 2 > $NSIM_DEV_SYS/del_port
+
+cleanup_nsim
+
+# Static IANA port
+pfx="static IANA vxlan"
+
+echo $NSIM_ID > /sys/bus/netdevsim/new_device
+echo 0 > $NSIM_DEV_SYS/del_port
+
+echo 1 > $NSIM_DEV_DFS/udp_ports_static_iana_vxlan
+STATIC_ENTRIES=( `mke 4789 1` )
+
+port=1
+old_netdevs=$(ls /sys/class/net)
+echo $port > $NSIM_DEV_SYS/new_port
+NSIM_NETDEV=`get_netdev_name old_netdevs`
+
+msg="check empty"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="add on static port"
+new_vxlan vxlan0 4789 $NSIM_NETDEV
+new_vxlan vxlan1 4789 $NSIM_NETDEV
+
+msg="add on different port"
+exp0=( `mke 4790 1` 0 0 0 )
+new_vxlan vxlan2 4790 $NSIM_NETDEV
+
+cleanup_tuns
+
+msg="tunnels destroyed"
+exp0=( 0 0 0 0 )
+exp1=( 0 0 0 0 )
+check_tables
+
+msg="different type"
+new_geneve gnv0 4789
+
+cleanup_tuns
+cleanup_nsim
+
+# END
+
+modprobe -r netdevsim
+
+if [ $num_errors -eq 0 ]; then
+ echo "PASSED all $num_cases checks"
+else
+ echo "FAILED $num_errors/$num_cases checks"
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
new file mode 100755
index 000000000000..c51c83421c61
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/basic_qos.sh
@@ -0,0 +1,253 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2022 NXP
+
+# The script is mostly generic, with the exception of the
+# ethtool per-TC counter names ("rx_green_prio_${tc}")
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command dcb
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h1_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h1 $vid
+ simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ ip link set $h1.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h1_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ vlan_destroy $h1 $vid
+}
+
+h2_vlan_create()
+{
+ local vid=$1
+
+ vlan_create $h2 $vid
+ simple_if_init $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ ip link set $h2.$vid type vlan \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7 \
+ ingress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+}
+
+h2_vlan_destroy()
+{
+ local vid=$1
+
+ simple_if_fini $h2.$vid $H2_IPV4/24 $H2_IPV6/64
+ vlan_destroy $h2 $vid
+}
+
+vlans_prepare()
+{
+ h1_vlan_create 100
+ h2_vlan_create 100
+
+ tc qdisc add dev ${h1}.100 clsact
+ tc filter add dev ${h1}.100 egress protocol ipv4 \
+ flower ip_proto icmp action skbedit priority 3
+ tc filter add dev ${h1}.100 egress protocol ipv6 \
+ flower ip_proto icmpv6 action skbedit priority 3
+}
+
+vlans_destroy()
+{
+ tc qdisc del dev ${h1}.100 clsact
+
+ h1_vlan_destroy 100
+ h2_vlan_destroy 100
+}
+
+switch_create()
+{
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ # Ports should trust VLAN PCP even with vlan_filtering=0
+ ip link add br0 type bridge
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+dscp_cs_to_tos()
+{
+ local dscp_cs=$1
+
+ # https://datatracker.ietf.org/doc/html/rfc2474
+ # 4.2.2.1 The Class Selector Codepoints
+ echo $((${dscp_cs} << 5))
+}
+
+run_test()
+{
+ local test_name=$1; shift
+ local if_name=$1; shift
+ local tc=$1; shift
+ local tos=$1; shift
+ local counter_name="rx_green_prio_${tc}"
+ local ipv4_before
+ local ipv4_after
+ local ipv6_before
+ local ipv6_after
+
+ ipv4_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV4 "-Q ${tos}"
+ ipv4_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv4_after} - ${ipv4_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv4 ${test_name}"
+
+ ipv6_before=$(ethtool_stats_get ${swp1} "${counter_name}")
+ ping_do ${if_name} $H2_IPV6 "-Q ${tos}"
+ ipv6_after=$(ethtool_stats_get ${swp1} "${counter_name}")
+
+ if [ $((${ipv6_after} - ${ipv6_before})) -lt ${PING_COUNT} ]; then
+ RET=1
+ else
+ RET=0
+ fi
+ log_test "IPv6 ${test_name}"
+}
+
+port_default_prio_get()
+{
+ local if_name=$1
+ local prio
+
+ prio="$(dcb -j app show dev ${if_name} default-prio | \
+ jq '.default_prio[]')"
+ if [ -z "${prio}" ]; then
+ prio=0
+ fi
+
+ echo ${prio}
+}
+
+test_port_default()
+{
+ local orig=$(port_default_prio_get ${swp1})
+ local dmac=$(mac_get ${h2})
+
+ dcb app replace dev ${swp1} default-prio 5
+
+ run_test "Port-default QoS classification" ${h1} 5 0
+
+ dcb app replace dev ${swp1} default-prio ${orig}
+}
+
+test_vlan_pcp()
+{
+ vlans_prepare
+
+ run_test "Trusted VLAN PCP QoS classification" ${h1}.100 3 0
+
+ vlans_destroy
+}
+
+test_ip_dscp()
+{
+ local port_default=$(port_default_prio_get ${swp1})
+ local tos=$(dscp_cs_to_tos 4)
+
+ dcb app add dev ${swp1} dscp-prio CS4:4
+ run_test "Trusted DSCP QoS classification" ${h1} 4 ${tos}
+ dcb app del dev ${swp1} dscp-prio CS4:4
+
+ vlans_prepare
+ run_test "Untrusted DSCP QoS classification follows VLAN PCP" \
+ ${h1}.100 3 ${tos}
+ vlans_destroy
+
+ run_test "Untrusted DSCP QoS classification follows port default" \
+ ${h1} ${port_default} ${tos}
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_port_default
+ test_vlan_pcp
+ test_ip_dscp
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
new file mode 100755
index 000000000000..bed748dde4b0
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -0,0 +1,327 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+# Note: On LS1028A, in lack of enough user ports, this setup requires patching
+# the device tree to use the second CPU port as a user port
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+NETIF_CREATE=no
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+source $lib_dir/tsn_lib.sh
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+# Tunables
+NUM_PKTS=1000
+STREAM_VID=100
+STREAM_PRIO=6
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((${CYCLE_TIME_NS} / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((${CYCLE_TIME_NS} / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((${GATE_DURATION_NS} / 2))
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Chain number exported by the ocelot driver for
+# Per-Stream Filtering and Policing filters
+PSFP()
+{
+ echo 30000
+}
+
+psfp_chain_create()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+
+ tc filter add dev $if_name ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(PSFP)
+}
+
+psfp_chain_destroy()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+}
+
+psfp_filter_check()
+{
+ local expected=$1
+ local packets=""
+ local drops=""
+ local stats=""
+
+ stats=$(tc -j -s filter show dev ${swp1} ingress chain $(PSFP) pref 1)
+ packets=$(echo ${stats} | jq ".[1].options.actions[].stats.packets")
+ drops=$(echo ${stats} | jq ".[1].options.actions[].stats.drops")
+
+ if ! [ "${packets}" = "${expected}" ]; then
+ printf "Expected filter to match on %d packets but matched on %d instead\n" \
+ "${expected}" "${packets}"
+ fi
+
+ echo "Hardware filter reports ${drops} drops"
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set ${swp1} up
+ ip link set ${swp2} up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set ${swp1} master br0
+ ip link set ${swp2} master br0
+ ip link set br0 up
+
+ bridge vlan add dev ${swp2} vid ${STREAM_VID}
+ bridge vlan add dev ${swp1} vid ${STREAM_VID}
+ # PSFP on Ocelot requires the filter to also be added to the bridge
+ # FDB, and not be removed
+ bridge fdb add dev ${swp2} \
+ ${h2_mac_addr} vlan ${STREAM_VID} static master
+
+ psfp_chain_create ${swp1}
+
+ tc filter add dev ${swp1} ingress chain $(PSFP) pref 1 \
+ protocol 802.1Q flower skip_sw \
+ dst_mac ${h2_mac_addr} vlan_id ${STREAM_VID} \
+ action gate base-time 0.000000000 \
+ sched-entry OPEN ${GATE_DURATION_NS} -1 -1 \
+ sched-entry CLOSE ${GATE_DURATION_NS} -1 -1
+}
+
+switch_destroy()
+{
+ psfp_chain_destroy ${swp1}
+ ip link del br0
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev ${if_name} clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev ${if_name} egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev ${if_name} egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev ${if_name} handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 6 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev ${if_name} parent 100:$((${STREAM_PRIO} + 1)) etf \
+ clockid CLOCK_TAI offload delta ${FUDGE_FACTOR}
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev ${if_name} root
+ tc qdisc del dev ${if_name} clsact
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup ${h1}
+
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start ${UDS_ADDRESS_SWP1}
+
+ # Assumption true for LS1028A: h1 and h2 use the same PHC. So by
+ # synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized
+ # to swp1 (and both to CLOCK_REALTIME).
+ ptp4l_start ${h1} true ${UDS_ADDRESS_H1}
+ ptp4l_start ${swp1} false ${UDS_ADDRESS_SWP1}
+
+ # Make sure there are no filter matches at the beginning of the test
+ psfp_filter_check 0
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ptp4l_stop ${swp1}
+ ptp4l_stop ${h1}
+ phc2sys_stop
+ isochron_recv_stop
+
+ txtime_cleanup ${h1}
+
+ h2_destroy
+ h1_destroy
+ switch_destroy
+
+ vrf_cleanup
+}
+
+debug_incorrectly_dropped_packets()
+{
+ local isochron_dat=$1
+ local dropped_seqids
+ local seqid
+
+ echo "Packets incorrectly dropped:"
+
+ dropped_seqids=$(isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u RX hw %T\n" \
+ --printf-args "qR" | \
+ grep 'RX hw 0.000000000' | \
+ awk '{print $1}')
+
+ for seqid in ${dropped_seqids}; do
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --start ${seqid} --stop ${seqid} \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T\n" \
+ --printf-args "qST"
+ done
+}
+
+debug_incorrectly_received_packets()
+{
+ local isochron_dat=$1
+
+ echo "Packets incorrectly received:"
+
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "seqid %u scheduled for %T, HW TX timestamp %T, HW RX timestamp %T\n" \
+ --printf-args "qSTR" |
+ grep -v 'HW RX timestamp 0.000000000'
+}
+
+run_test()
+{
+ local base_time=$1
+ local expected=$2
+ local test_name=$3
+ local debug=$4
+ local isochron_dat="$(mktemp)"
+ local extra_args=""
+ local received
+
+ isochron_do \
+ "${h1}" \
+ "${h2}" \
+ "${UDS_ADDRESS_H1}" \
+ "" \
+ "${base_time}" \
+ "${CYCLE_TIME_NS}" \
+ "${SHIFT_TIME_NS}" \
+ "${NUM_PKTS}" \
+ "${STREAM_VID}" \
+ "${STREAM_PRIO}" \
+ "" \
+ "${isochron_dat}"
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ received=$(isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l)
+
+ if [ "${received}" = "${expected}" ]; then
+ RET=0
+ else
+ RET=1
+ echo "Expected isochron to receive ${expected} packets but received ${received}"
+ fi
+
+ log_test "${test_name}"
+
+ if [ "$RET" = "1" ]; then
+ ${debug} "${isochron_dat}"
+ fi
+
+ rm ${isochron_dat} 2> /dev/null
+}
+
+test_gate_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 ${NUM_PKTS} "In band" \
+ debug_incorrectly_dropped_packets
+
+ psfp_filter_check ${NUM_PKTS}
+}
+
+test_gate_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 0 "Out of band" \
+ debug_incorrectly_received_packets
+
+ psfp_filter_check $((2 * ${NUM_PKTS}))
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_gate_in_band
+ test_gate_out_of_band
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
new file mode 100755
index 000000000000..aff0a59f92d9
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/ocelot/tc_flower_chains.sh
@@ -0,0 +1,352 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2020 NXP
+
+WAIT_TIME=1
+NUM_NETIFS=4
+STABLE_MAC_ADDRS=yes
+lib_dir=$(dirname $0)/../../../net/forwarding
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+require_command tcpdump
+
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+# Helpers to map a VCAP IS1 and VCAP IS2 lookup and policy to a chain number
+# used by the kernel driver. The numbers are:
+# VCAP IS1 lookup 0: 10000
+# VCAP IS1 lookup 1: 11000
+# VCAP IS1 lookup 2: 12000
+# VCAP IS2 lookup 0 policy 0: 20000
+# VCAP IS2 lookup 0 policy 1: 20001
+# VCAP IS2 lookup 0 policy 255: 20255
+# VCAP IS2 lookup 1 policy 0: 21000
+# VCAP IS2 lookup 1 policy 1: 21001
+# VCAP IS2 lookup 1 policy 255: 21255
+IS1()
+{
+ local lookup=$1
+
+ echo $((10000 + 1000 * lookup))
+}
+
+IS2()
+{
+ local lookup=$1
+ local pag=$2
+
+ echo $((20000 + 1000 * lookup + pag))
+}
+
+ES0()
+{
+ echo 0
+}
+
+# The Ocelot switches have a fixed ingress pipeline composed of:
+#
+# +----------------------------------------------+ +-----------------------------------------+
+# | VCAP IS1 | | VCAP IS2 |
+# | | | |
+# | +----------+ +----------+ +----------+ | | +----------+ +----------+ |
+# | | Lookup 0 | | Lookup 1 | | Lookup 2 | | --+------> PAG 0: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ -> +----------+ -> +----------+ | | | +----------+ +----------+ |
+# | |key&action| |key&action| |key&action| | | | |key&action| |key&action| |
+# | |key&action| |key&action| |key&action| | | | | .. | | .. | |
+# | | .. | | .. | | .. | | | | +----------+ +----------+ |
+# | +----------+ +----------+ +----------+ | | | |
+# | selects PAG | | | +----------+ +----------+ |
+# +----------------------------------------------+ +------> PAG 1: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | ... |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 254: | Lookup 0 | -> | Lookup 1 | |
+# | | +----------+ +----------+ |
+# | | |key&action| |key&action| |
+# | | | .. | | .. | |
+# | | +----------+ +----------+ |
+# | | |
+# | | +----------+ +----------+ |
+# +----> PAG 255: | Lookup 0 | -> | Lookup 1 | |
+# | +----------+ +----------+ |
+# | |key&action| |key&action| |
+# | | .. | | .. | |
+# | +----------+ +----------+ |
+# +-----------------------------------------+
+#
+# Both the VCAP IS1 (Ingress Stage 1) and IS2 (Ingress Stage 2) are indexed
+# (looked up) multiple times: IS1 3 times, and IS2 2 times. Each filter
+# (key and action pair) can be configured to only match during the first, or
+# second, etc, lookup.
+#
+# During one TCAM lookup, the filter processing stops at the first entry that
+# matches, then the pipeline jumps to the next lookup.
+# The driver maps each individual lookup of each individual ingress TCAM to a
+# separate chain number. For correct rule offloading, it is mandatory that each
+# filter installed in one TCAM is terminated by a non-optional GOTO action to
+# the next lookup from the fixed pipeline.
+#
+# A chain can only be used if there is a GOTO action correctly set up from the
+# prior lookup in the processing pipeline. Setting up all chains is not
+# mandatory.
+
+# NOTE: VCAP IS1 currently uses only S1_NORMAL half keys and VCAP IS2
+# dynamically chooses between MAC_ETYPE, ARP, IP4_TCP_UDP, IP4_OTHER, which are
+# all half keys as well.
+
+create_tcam_skeleton()
+{
+ local eth=$1
+
+ tc qdisc add dev $eth clsact
+
+ # VCAP IS1 is the Ingress Classification TCAM and can offload the
+ # following actions:
+ # - skbedit priority
+ # - vlan pop
+ # - vlan modify
+ # - goto (only in lookup 2, the last IS1 lookup)
+ tc filter add dev $eth ingress chain 0 pref 49152 flower \
+ skip_sw action goto chain $(IS1 0)
+ tc filter add dev $eth ingress chain $(IS1 0) pref 49152 \
+ flower skip_sw action goto chain $(IS1 1)
+ tc filter add dev $eth ingress chain $(IS1 1) pref 49152 \
+ flower skip_sw action goto chain $(IS1 2)
+ tc filter add dev $eth ingress chain $(IS1 2) pref 49152 \
+ flower skip_sw action goto chain $(IS2 0 0)
+
+ # VCAP IS2 is the Security Enforcement ingress TCAM and can offload the
+ # following actions:
+ # - trap
+ # - drop
+ # - police
+ # The two VCAP IS2 lookups can be segmented into up to 256 groups of
+ # rules, called Policies. A Policy is selected through the Policy
+ # Association Group (PAG) action of VCAP IS1 (which is the
+ # GOTO offload).
+ tc filter add dev $eth ingress chain $(IS2 0 0) pref 49152 \
+ flower skip_sw action goto chain $(IS2 1 0)
+}
+
+setup_prepare()
+{
+ ip link set $swp1 up
+ ip link set $swp2 up
+ ip link set $h2 up
+ ip link set $h1 up
+
+ create_tcam_skeleton $swp1
+
+ ip link add br0 type bridge
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ ip link add link $h1 name $h1.100 type vlan id 100
+ ip link set $h1.100 up
+
+ ip link add link $h1 name $h1.200 type vlan id 200
+ ip link set $h1.200 up
+
+ tc filter add dev $swp1 ingress chain $(IS1 1) pref 1 \
+ protocol 802.1Q flower skip_sw vlan_id 100 \
+ action vlan pop \
+ action goto chain $(IS1 2)
+
+ tc filter add dev $swp1 egress chain $(ES0) pref 1 \
+ flower skip_sw indev $swp2 \
+ action vlan push protocol 802.1Q id 100
+
+ tc filter add dev $swp1 ingress chain $(IS1 0) pref 2 \
+ protocol ipv4 flower skip_sw src_ip 10.1.1.2 \
+ action skbedit priority 7 \
+ action goto chain $(IS1 1)
+
+ tc filter add dev $swp1 ingress chain $(IS2 0 0) pref 1 \
+ protocol ipv4 flower skip_sw ip_proto udp dst_port 5201 \
+ action police rate 50mbit burst 64k conform-exceed drop/pipe \
+ action goto chain $(IS2 1 0)
+}
+
+cleanup()
+{
+ ip link del $h1.200
+ ip link del $h1.100
+ tc qdisc del dev $swp1 clsact
+ ip link del br0
+}
+
+test_vlan_pop()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
+
+ tcpdump_start $h2
+
+ # Work around Mausezahn VLAN builder bug
+ # (https://github.com/netsniff-ng/netsniff-ng/issues/225) by using
+ # an 8021q upper
+ $MZ $h1.100 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop $h2
+
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, ethertype IPv4"
+ check_err "$?" "untagged reception"
+
+ tcpdump_cleanup $h2
+
+ log_test "VLAN pop"
+}
+
+test_vlan_push()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
+
+ tcpdump_start $h1.100
+
+ $MZ $h2 -q -c 1 -p 64 -a $h2_mac -b $h1_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop $h1.100
+
+ tcpdump_show $h1.100 | grep -q "$h2_mac > $h1_mac"
+ check_err "$?" "tagged reception"
+
+ tcpdump_cleanup $h1.100
+
+ log_test "VLAN push"
+}
+
+test_vlan_ingress_modify()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
+
+ ip link set br0 type bridge vlan_filtering 1
+ bridge vlan add dev $swp1 vid 200
+ bridge vlan add dev $swp1 vid 300
+ bridge vlan add dev $swp2 vid 300
+
+ tc filter add dev $swp1 ingress chain $(IS1 2) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 src_mac $h1_mac \
+ action vlan modify id 300 \
+ action goto chain $(IS2 0 0)
+
+ tcpdump_start $h2
+
+ $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop $h2
+
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+ check_err "$?" "tagged reception"
+
+ tcpdump_cleanup $h2
+
+ tc filter del dev $swp1 ingress chain $(IS1 2) pref 3
+
+ bridge vlan del dev $swp1 vid 200
+ bridge vlan del dev $swp1 vid 300
+ bridge vlan del dev $swp2 vid 300
+ ip link set br0 type bridge vlan_filtering 0
+
+ log_test "Ingress VLAN modification"
+}
+
+test_vlan_egress_modify()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ RET=0
+
+ tc qdisc add dev $swp2 clsact
+
+ ip link set br0 type bridge vlan_filtering 1
+ bridge vlan add dev $swp1 vid 200
+ bridge vlan add dev $swp2 vid 200
+
+ tc filter add dev $swp2 egress chain $(ES0) pref 3 \
+ protocol 802.1Q flower skip_sw vlan_id 200 vlan_prio 0 \
+ action vlan modify id 300 priority 7
+
+ tcpdump_start $h2
+
+ $MZ $h1.200 -q -c 1 -p 64 -a $h1_mac -b $h2_mac -t ip
+
+ sleep 1
+
+ tcpdump_stop $h2
+
+ tcpdump_show $h2 | grep -q "$h1_mac > $h2_mac, .* vlan 300"
+ check_err "$?" "tagged reception"
+
+ tcpdump_cleanup $h2
+
+ tc filter del dev $swp2 egress chain $(ES0) pref 3
+ tc qdisc del dev $swp2 clsact
+
+ bridge vlan del dev $swp1 vid 200
+ bridge vlan del dev $swp2 vid 200
+ ip link set br0 type bridge vlan_filtering 0
+
+ log_test "Egress VLAN modification"
+}
+
+test_skbedit_priority()
+{
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+ local num_pkts=100
+
+ before=$(ethtool_stats_get $swp1 'rx_green_prio_7')
+
+ $MZ $h1 -q -c $num_pkts -p 64 -a $h1_mac -b $h2_mac -t ip -A 10.1.1.2
+
+ after=$(ethtool_stats_get $swp1 'rx_green_prio_7')
+
+ if [ $((after - before)) = $num_pkts ]; then
+ RET=0
+ else
+ RET=1
+ fi
+
+ log_test "Frame prioritization"
+}
+
+trap cleanup EXIT
+
+ALL_TESTS="
+ test_vlan_pop
+ test_vlan_push
+ test_vlan_ingress_modify
+ test_vlan_egress_modify
+ test_skbedit_priority
+"
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile
new file mode 100644
index 000000000000..2d5a76d99181
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for net selftests
+
+TEST_PROGS := dev_addr_lists.sh
+
+TEST_INCLUDES := \
+ ../bonding/lag_lib.sh \
+ ../../../net/forwarding/lib.sh \
+ ../../../net/lib.sh
+
+include ../../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/team/config b/tools/testing/selftests/drivers/net/team/config
new file mode 100644
index 000000000000..b5e3a3aad4bf
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/config
@@ -0,0 +1,5 @@
+CONFIG_DUMMY=y
+CONFIG_IPV6=y
+CONFIG_MACVLAN=y
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
new file mode 100755
index 000000000000..b1ec7755b783
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh
@@ -0,0 +1,51 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test team device handling of addr lists (dev->uc, mc)
+#
+
+ALL_TESTS="
+ team_cleanup
+"
+
+REQUIRE_MZ=no
+NUM_NETIFS=0
+lib_dir=$(dirname "$0")
+source "$lib_dir"/../../../net/forwarding/lib.sh
+
+source "$lib_dir"/../bonding/lag_lib.sh
+
+
+destroy()
+{
+ local ifnames=(dummy1 dummy2 team0 mv0)
+ local ifname
+
+ for ifname in "${ifnames[@]}"; do
+ ip link del "$ifname" &>/dev/null
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ destroy
+}
+
+
+team_cleanup()
+{
+ RET=0
+
+ test_LAG_cleanup "team" "lacp"
+}
+
+
+require_command teamd
+
+trap cleanup EXIT
+
+tests_run
+
+exit "$EXIT_STATUS"
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/Makefile b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
new file mode 100644
index 000000000000..755d164384c4
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/Makefile
@@ -0,0 +1,20 @@
+include ../../../../../build/Build.include
+
+UNAME_M := $(shell uname -m)
+
+ifneq ($(UNAME_M),s390x)
+nothing:
+.PHONY: all clean run_tests install
+.SILENT:
+else
+
+TEST_GEN_PROGS := test_uvdevice
+
+top_srcdir ?= ../../../../../..
+LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
+
+CFLAGS += -Wall -Werror -static $(KHDR_INCLUDES) -I$(LINUX_TOOL_ARCH_INCLUDE)
+
+include ../../../lib.mk
+
+endif
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/config b/tools/testing/selftests/drivers/s390x/uvdevice/config
new file mode 100644
index 000000000000..f28a04b99eff
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/config
@@ -0,0 +1 @@
+CONFIG_S390_UV_UAPI=y
diff --git a/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
new file mode 100644
index 000000000000..ea0cdc37b44f
--- /dev/null
+++ b/tools/testing/selftests/drivers/s390x/uvdevice/test_uvdevice.c
@@ -0,0 +1,276 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * selftest for the Ultravisor UAPI device
+ *
+ * Copyright IBM Corp. 2022
+ * Author(s): Steffen Eiden <seiden@linux.ibm.com>
+ */
+
+#include <stdint.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include <asm/uvdevice.h>
+
+#include "../../../kselftest_harness.h"
+
+#define UV_PATH "/dev/uv"
+#define BUFFER_SIZE 0x200
+FIXTURE(uvio_fixture) {
+ int uv_fd;
+ struct uvio_ioctl_cb uvio_ioctl;
+ uint8_t buffer[BUFFER_SIZE];
+ __u64 fault_page;
+};
+
+FIXTURE_VARIANT(uvio_fixture) {
+ unsigned long ioctl_cmd;
+ uint32_t arg_size;
+};
+
+FIXTURE_VARIANT_ADD(uvio_fixture, att) {
+ .ioctl_cmd = UVIO_IOCTL_ATT,
+ .arg_size = sizeof(struct uvio_attest),
+};
+
+FIXTURE_SETUP(uvio_fixture)
+{
+ self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+ self->uvio_ioctl.argument_addr = (__u64)self->buffer;
+ self->uvio_ioctl.argument_len = variant->arg_size;
+ self->fault_page =
+ (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(uvio_fixture)
+{
+ if (self->uv_fd)
+ close(self->uv_fd);
+ munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+TEST_F(uvio_fixture, fault_ioctl_arg)
+{
+ int rc, errno_cache;
+
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, NULL);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, self->fault_page);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+}
+
+TEST_F(uvio_fixture, fault_uvio_arg)
+{
+ int rc, errno_cache;
+
+ self->uvio_ioctl.argument_addr = 0;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+
+ self->uvio_ioctl.argument_addr = self->fault_page;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+}
+
+/*
+ * Test to verify that IOCTLs with invalid values in the ioctl_control block
+ * are rejected.
+ */
+TEST_F(uvio_fixture, inval_ioctl_cb)
+{
+ int rc, errno_cache;
+
+ self->uvio_ioctl.argument_len = 0;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ self->uvio_ioctl.argument_len = (uint32_t)-1;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ self->uvio_ioctl.argument_len = variant->arg_size;
+
+ self->uvio_ioctl.flags = (uint32_t)-1;
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ self->uvio_ioctl.flags = 0;
+
+ memset(self->uvio_ioctl.reserved14, 0xff, sizeof(self->uvio_ioctl.reserved14));
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ memset(&self->uvio_ioctl, 0x11, sizeof(self->uvio_ioctl));
+ rc = ioctl(self->uv_fd, variant->ioctl_cmd, &self->uvio_ioctl);
+ ASSERT_EQ(rc, -1);
+}
+
+TEST_F(uvio_fixture, inval_ioctl_cmd)
+{
+ int rc, errno_cache;
+ uint8_t nr = _IOC_NR(variant->ioctl_cmd);
+ unsigned long cmds[] = {
+ _IOWR('a', nr, struct uvio_ioctl_cb),
+ _IOWR(UVIO_TYPE_UVC, nr, int),
+ _IO(UVIO_TYPE_UVC, nr),
+ _IOR(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+ _IOW(UVIO_TYPE_UVC, nr, struct uvio_ioctl_cb),
+ };
+
+ for (size_t i = 0; i < ARRAY_SIZE(cmds); i++) {
+ rc = ioctl(self->uv_fd, cmds[i], &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, ENOTTY);
+ }
+}
+
+struct test_attest_buffer {
+ uint8_t arcb[0x180];
+ uint8_t meas[64];
+ uint8_t add[32];
+};
+
+FIXTURE(attest_fixture) {
+ int uv_fd;
+ struct uvio_ioctl_cb uvio_ioctl;
+ struct uvio_attest uvio_attest;
+ struct test_attest_buffer attest_buffer;
+ __u64 fault_page;
+};
+
+FIXTURE_SETUP(attest_fixture)
+{
+ self->uv_fd = open(UV_PATH, O_ACCMODE);
+
+ self->uvio_ioctl.argument_addr = (__u64)&self->uvio_attest;
+ self->uvio_ioctl.argument_len = sizeof(self->uvio_attest);
+
+ self->uvio_attest.arcb_addr = (__u64)&self->attest_buffer.arcb;
+ self->uvio_attest.arcb_len = sizeof(self->attest_buffer.arcb);
+
+ self->uvio_attest.meas_addr = (__u64)&self->attest_buffer.meas;
+ self->uvio_attest.meas_len = sizeof(self->attest_buffer.meas);
+
+ self->uvio_attest.add_data_addr = (__u64)&self->attest_buffer.add;
+ self->uvio_attest.add_data_len = sizeof(self->attest_buffer.add);
+ self->fault_page =
+ (__u64)mmap(NULL, (size_t)getpagesize(), PROT_NONE, MAP_ANONYMOUS, -1, 0);
+}
+
+FIXTURE_TEARDOWN(attest_fixture)
+{
+ if (self->uv_fd)
+ close(self->uv_fd);
+ munmap((void *)self->fault_page, (size_t)getpagesize());
+}
+
+static void att_inval_sizes_test(uint32_t *size, uint32_t max_size, bool test_zero,
+ struct __test_metadata *_metadata,
+ FIXTURE_DATA(attest_fixture) *self)
+{
+ int rc, errno_cache;
+ uint32_t tmp = *size;
+
+ if (test_zero) {
+ *size = 0;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ }
+ *size = max_size + 1;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+ *size = tmp;
+}
+
+/*
+ * Test to verify that attestation IOCTLs with invalid values in the UVIO
+ * attestation control block are rejected.
+ */
+TEST_F(attest_fixture, att_inval_request)
+{
+ int rc, errno_cache;
+
+ att_inval_sizes_test(&self->uvio_attest.add_data_len, UVIO_ATT_ADDITIONAL_MAX_LEN,
+ false, _metadata, self);
+ att_inval_sizes_test(&self->uvio_attest.meas_len, UVIO_ATT_MEASUREMENT_MAX_LEN,
+ true, _metadata, self);
+ att_inval_sizes_test(&self->uvio_attest.arcb_len, UVIO_ATT_ARCB_MAX_LEN,
+ true, _metadata, self);
+
+ self->uvio_attest.reserved136 = (uint16_t)-1;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EINVAL);
+
+ memset(&self->uvio_attest, 0x11, sizeof(self->uvio_attest));
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ ASSERT_EQ(rc, -1);
+}
+
+static void att_inval_addr_test(__u64 *addr, struct __test_metadata *_metadata,
+ FIXTURE_DATA(attest_fixture) *self)
+{
+ int rc, errno_cache;
+ __u64 tmp = *addr;
+
+ *addr = 0;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+ *addr = self->fault_page;
+ rc = ioctl(self->uv_fd, UVIO_IOCTL_ATT, &self->uvio_ioctl);
+ errno_cache = errno;
+ ASSERT_EQ(rc, -1);
+ ASSERT_EQ(errno_cache, EFAULT);
+ *addr = tmp;
+}
+
+TEST_F(attest_fixture, att_inval_addr)
+{
+ att_inval_addr_test(&self->uvio_attest.arcb_addr, _metadata, self);
+ att_inval_addr_test(&self->uvio_attest.add_data_addr, _metadata, self);
+ att_inval_addr_test(&self->uvio_attest.meas_addr, _metadata, self);
+}
+
+static void __attribute__((constructor)) __constructor_order_last(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+ int fd = open(UV_PATH, O_ACCMODE);
+
+ if (fd < 0)
+ ksft_exit_skip("No uv-device or cannot access " UV_PATH "\n"
+ "Enable CONFIG_S390_UV_UAPI and check the access rights on "
+ UV_PATH ".\n");
+ close(fd);
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi.sh b/tools/testing/selftests/drivers/sdsi/sdsi.sh
new file mode 100755
index 000000000000..9b84b9b82b49
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi.sh
@@ -0,0 +1,25 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the intel_sdsi driver
+
+if ! command -v python3 > /dev/null 2>&1; then
+ echo "drivers/sdsi: [SKIP] python3 not installed"
+ exit 77
+fi
+
+if ! python3 -c "import pytest" > /dev/null 2>&1; then
+ echo "drivers/sdsi: [SKIP] pytest module not installed"
+ exit 77
+fi
+
+if ! /sbin/modprobe -q -r intel_sdsi; then
+ echo "drivers/sdsi: [SKIP]"
+ exit 77
+fi
+
+if /sbin/modprobe -q intel_sdsi && python3 -m pytest sdsi_test.py; then
+ echo "drivers/sdsi: [OK]"
+else
+ echo "drivers/sdsi: [FAIL]"
+ exit 1
+fi
diff --git a/tools/testing/selftests/drivers/sdsi/sdsi_test.py b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
new file mode 100644
index 000000000000..5efb29feee70
--- /dev/null
+++ b/tools/testing/selftests/drivers/sdsi/sdsi_test.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from struct import pack
+from time import sleep
+
+import errno
+import glob
+import os
+import subprocess
+
+try:
+ import pytest
+except ImportError:
+ print("Unable to import pytest python module.")
+ print("\nIf not already installed, you may do so with:")
+ print("\t\tpip3 install pytest")
+ exit(1)
+
+SOCKETS = glob.glob('/sys/bus/auxiliary/devices/intel_vsec.sdsi.*')
+NUM_SOCKETS = len(SOCKETS)
+
+MODULE_NAME = 'intel_sdsi'
+DEV_PREFIX = 'intel_vsec.sdsi'
+CLASS_DIR = '/sys/bus/auxiliary/devices'
+GUID = "0x6dd191"
+
+def read_bin_file(file):
+ with open(file, mode='rb') as f:
+ content = f.read()
+ return content
+
+def get_dev_file_path(socket, file):
+ return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/' + file
+
+def kmemleak_enabled():
+ kmemleak = "/sys/kernel/debug/kmemleak"
+ return os.path.isfile(kmemleak)
+
+class TestSDSiDriver:
+ def test_driver_loaded(self):
+ lsmod_p = subprocess.Popen(('lsmod'), stdout=subprocess.PIPE)
+ result = subprocess.check_output(('grep', '-q', MODULE_NAME), stdin=lsmod_p.stdout)
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiFilesClass:
+
+ def read_value(self, file):
+ f = open(file, "r")
+ value = f.read().strip("\n")
+ return value
+
+ def get_dev_folder(self, socket):
+ return CLASS_DIR + '/' + DEV_PREFIX + '.' + str(socket) + '/'
+
+ def test_sysfs_files_exist(self, socket):
+ folder = self.get_dev_folder(socket)
+ print (folder)
+ assert os.path.isfile(folder + "guid") == True
+ assert os.path.isfile(folder + "provision_akc") == True
+ assert os.path.isfile(folder + "provision_cap") == True
+ assert os.path.isfile(folder + "state_certificate") == True
+ assert os.path.isfile(folder + "registers") == True
+
+ def test_sysfs_file_permissions(self, socket):
+ folder = self.get_dev_folder(socket)
+ mode = os.stat(folder + "guid").st_mode & 0o777
+ assert mode == 0o444 # Read all
+ mode = os.stat(folder + "registers").st_mode & 0o777
+ assert mode == 0o400 # Read owner
+ mode = os.stat(folder + "provision_akc").st_mode & 0o777
+ assert mode == 0o200 # Read owner
+ mode = os.stat(folder + "provision_cap").st_mode & 0o777
+ assert mode == 0o200 # Read owner
+ mode = os.stat(folder + "state_certificate").st_mode & 0o777
+ assert mode == 0o400 # Read owner
+
+ def test_sysfs_file_ownership(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ st = os.stat(folder + "guid")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "registers")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "provision_akc")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "provision_cap")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ st = os.stat(folder + "state_certificate")
+ assert st.st_uid == 0
+ assert st.st_gid == 0
+
+ def test_sysfs_file_sizes(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ if self.read_value(folder + "guid") == GUID:
+ st = os.stat(folder + "registers")
+ assert st.st_size == 72
+
+ st = os.stat(folder + "provision_akc")
+ assert st.st_size == 1024
+
+ st = os.stat(folder + "provision_cap")
+ assert st.st_size == 1024
+
+ st = os.stat(folder + "state_certificate")
+ assert st.st_size == 4096
+
+ def test_no_seek_allowed(self, socket):
+ folder = self.get_dev_folder(socket)
+ rand_file = bytes(os.urandom(8))
+
+ f = open(folder + "provision_cap", "wb", 0)
+ f.seek(1)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ESPIPE
+ f.close()
+
+ f = open(folder + "provision_akc", "wb", 0)
+ f.seek(1)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ESPIPE
+ f.close()
+
+ def test_registers_seek(self, socket):
+ folder = self.get_dev_folder(socket)
+
+ # Check that the value read from an offset of the entire
+ # file is none-zero and the same as the value read
+ # from seeking to the same location
+ f = open(folder + "registers", "rb")
+ data = f.read()
+ f.seek(64)
+ id = f.read()
+ assert id != bytes(0)
+ assert data[64:] == id
+ f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSDSiMailboxCmdsClass:
+ def test_provision_akc_eoverflow_1017_bytes(self, socket):
+
+ # The buffer for writes is 1k, of with 8 bytes must be
+ # reserved for the command, leaving 1016 bytes max.
+ # Check that we get an overflow error for 1017 bytes.
+ node = get_dev_file_path(socket, "provision_akc")
+ rand_file = bytes(os.urandom(1017))
+
+ f = open(node, 'wb', 0)
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.EOVERFLOW
+ f.close()
+
+@pytest.mark.parametrize('socket', range(0, NUM_SOCKETS))
+class TestSdsiDriverLocksClass:
+ def test_enodev_when_pci_device_removed(self, socket):
+ node = get_dev_file_path(socket, "provision_akc")
+ dev_name = DEV_PREFIX + '.' + str(socket)
+ driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+ rand_file = bytes(os.urandom(8))
+
+ f = open(node, 'wb', 0)
+ g = open(node, 'wb', 0)
+
+ with open(driver_dir + 'unbind', 'w') as k:
+ print(dev_name, file = k)
+
+ with pytest.raises(OSError) as error:
+ f.write(rand_file)
+ assert error.value.errno == errno.ENODEV
+
+ with pytest.raises(OSError) as error:
+ g.write(rand_file)
+ assert error.value.errno == errno.ENODEV
+
+ f.close()
+ g.close()
+
+ # Short wait needed to allow file to close before pulling driver
+ sleep(1)
+
+ p = subprocess.Popen(('modprobe', '-r', 'intel_sdsi'))
+ p.wait()
+ p = subprocess.Popen(('modprobe', '-r', 'intel_vsec'))
+ p.wait()
+ p = subprocess.Popen(('modprobe', 'intel_vsec'))
+ p.wait()
+
+ # Short wait needed to allow driver time to get inserted
+ # before continuing tests
+ sleep(1)
+
+ def test_memory_leak(self, socket):
+ if not kmemleak_enabled():
+ pytest.skip("kmemleak not enabled in kernel")
+
+ dev_name = DEV_PREFIX + '.' + str(socket)
+ driver_dir = CLASS_DIR + '/' + dev_name + "/driver/"
+
+ with open(driver_dir + 'unbind', 'w') as k:
+ print(dev_name, file = k)
+
+ sleep(1)
+
+ subprocess.check_output(('modprobe', '-r', 'intel_sdsi'))
+ subprocess.check_output(('modprobe', '-r', 'intel_vsec'))
+
+ with open('/sys/kernel/debug/kmemleak', 'w') as f:
+ print('scan', file = f)
+ sleep(5)
+
+ assert os.stat('/sys/kernel/debug/kmemleak').st_size == 0
+
+ subprocess.check_output(('modprobe', 'intel_vsec'))
+ sleep(1)
diff --git a/tools/testing/selftests/dt/.gitignore b/tools/testing/selftests/dt/.gitignore
new file mode 100644
index 000000000000..f6476c9f2884
--- /dev/null
+++ b/tools/testing/selftests/dt/.gitignore
@@ -0,0 +1 @@
+compatible_list
diff --git a/tools/testing/selftests/dt/Makefile b/tools/testing/selftests/dt/Makefile
new file mode 100644
index 000000000000..2d33ee9e9b71
--- /dev/null
+++ b/tools/testing/selftests/dt/Makefile
@@ -0,0 +1,21 @@
+PY3 = $(shell which python3 2>/dev/null)
+
+ifneq ($(PY3),)
+
+TEST_PROGS := test_unprobed_devices.sh
+TEST_GEN_FILES := compatible_list
+TEST_FILES := compatible_ignore_list
+
+include ../lib.mk
+
+$(OUTPUT)/compatible_list:
+ $(top_srcdir)/scripts/dtc/dt-extract-compatibles -d $(top_srcdir) > $@
+
+else
+
+all: no_py3_warning
+
+no_py3_warning:
+ @echo "Missing python3. This test will be skipped."
+
+endif
diff --git a/tools/testing/selftests/dt/compatible_ignore_list b/tools/testing/selftests/dt/compatible_ignore_list
new file mode 100644
index 000000000000..1323903feca9
--- /dev/null
+++ b/tools/testing/selftests/dt/compatible_ignore_list
@@ -0,0 +1 @@
+simple-mfd
diff --git a/tools/testing/selftests/dt/test_unprobed_devices.sh b/tools/testing/selftests/dt/test_unprobed_devices.sh
new file mode 100755
index 000000000000..2d7e70c5ad2d
--- /dev/null
+++ b/tools/testing/selftests/dt/test_unprobed_devices.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Based on Frank Rowand's dt_stat script.
+#
+# This script tests for devices that were declared on the Devicetree and are
+# expected to bind to a driver, but didn't.
+#
+# To achieve this, two lists are used:
+# * a list of the compatibles that can be matched by a Devicetree node
+# * a list of compatibles that should be ignored
+#
+
+DIR="$(dirname $(readlink -f "$0"))"
+
+source "${DIR}"/../kselftest/ktap_helpers.sh
+
+PDT=/proc/device-tree/
+COMPAT_LIST="${DIR}"/compatible_list
+IGNORE_LIST="${DIR}"/compatible_ignore_list
+
+ktap_print_header
+
+if [[ ! -d "${PDT}" ]]; then
+ ktap_skip_all "${PDT} doesn't exist."
+ exit "${KSFT_SKIP}"
+fi
+
+nodes_compatible=$(
+ for node in $(find ${PDT} -type d); do
+ [ ! -f "${node}"/compatible ] && continue
+ # Check if node is available
+ if [[ -e "${node}"/status ]]; then
+ status=$(tr -d '\000' < "${node}"/status)
+ [[ "${status}" != "okay" && "${status}" != "ok" ]] && continue
+ fi
+ echo "${node}" | sed -e 's|\/proc\/device-tree||'
+ done | sort
+ )
+
+nodes_dev_bound=$(
+ IFS=$'\n'
+ for dev_dir in $(find /sys/devices -type d); do
+ [ ! -f "${dev_dir}"/uevent ] && continue
+ [ ! -d "${dev_dir}"/driver ] && continue
+
+ grep '^OF_FULLNAME=' "${dev_dir}"/uevent | sed -e 's|OF_FULLNAME=||'
+ done
+ )
+
+num_tests=$(echo ${nodes_compatible} | wc -w)
+ktap_set_plan "${num_tests}"
+
+retval="${KSFT_PASS}"
+for node in ${nodes_compatible}; do
+ if ! echo "${nodes_dev_bound}" | grep -E -q "(^| )${node}( |\$)"; then
+ compatibles=$(tr '\000' '\n' < "${PDT}"/"${node}"/compatible)
+
+ for compatible in ${compatibles}; do
+ if grep -x -q "${compatible}" "${IGNORE_LIST}"; then
+ continue
+ fi
+
+ if grep -x -q "${compatible}" "${COMPAT_LIST}"; then
+ ktap_test_fail "${node}"
+ retval="${KSFT_FAIL}"
+ continue 2
+ fi
+ done
+ ktap_test_skip "${node}"
+ else
+ ktap_test_pass "${node}"
+ fi
+
+done
+
+ktap_print_totals
+exit "${retval}"
diff --git a/tools/testing/selftests/efivarfs/create-read.c b/tools/testing/selftests/efivarfs/create-read.c
index 9674a19396a3..7bc7af4eb2c1 100644
--- a/tools/testing/selftests/efivarfs/create-read.c
+++ b/tools/testing/selftests/efivarfs/create-read.c
@@ -32,8 +32,10 @@ int main(int argc, char **argv)
rc = read(fd, buf, sizeof(buf));
if (rc != 0) {
fprintf(stderr, "Reading a new var should return EOF\n");
+ close(fd);
return EXIT_FAILURE;
}
+ close(fd);
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh
index a90f394f9aa9..d374878cc0ba 100755
--- a/tools/testing/selftests/efivarfs/efivarfs.sh
+++ b/tools/testing/selftests/efivarfs/efivarfs.sh
@@ -87,6 +87,11 @@ test_create_read()
{
local file=$efivarfs_mount/$FUNCNAME-$test_guid
./create-read $file
+ if [ $? -ne 0 ]; then
+ echo "create and read $file failed"
+ file_cleanup $file
+ exit 1
+ fi
file_cleanup $file
}
diff --git a/tools/testing/selftests/exec/.gitignore b/tools/testing/selftests/exec/.gitignore
index 94b02a18f230..90c238ba6a4b 100644
--- a/tools/testing/selftests/exec/.gitignore
+++ b/tools/testing/selftests/exec/.gitignore
@@ -7,6 +7,10 @@ execveat.moved
execveat.path.ephemeral
execveat.ephemeral
execveat.denatured
+non-regular
+null-argv
+/load_address_*
/recursion-depth
xxxxxxxx*
pipe
+S_I*.test
diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile
index 4453b8f8def3..a0b8688b0836 100644
--- a/tools/testing/selftests/exec/Makefile
+++ b/tools/testing/selftests/exec/Makefile
@@ -3,15 +3,17 @@ CFLAGS = -Wall
CFLAGS += -Wno-nonnull
CFLAGS += -D_GNU_SOURCE
-TEST_PROGS := binfmt_script
-TEST_GEN_PROGS := execveat
-TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir pipe
+TEST_PROGS := binfmt_script.py
+TEST_GEN_PROGS := execveat load_address_4096 load_address_2097152 load_address_16777216 non-regular
+TEST_GEN_FILES := execveat.symlink execveat.denatured script subdir
# Makefile is a run-time dependency, since it's accessed by the execveat test
TEST_FILES := Makefile
TEST_GEN_PROGS += recursion-depth
+TEST_GEN_PROGS += null-argv
-EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx*
+EXTRA_CLEAN := $(OUTPUT)/subdir.moved $(OUTPUT)/execveat.moved $(OUTPUT)/xxxxx* \
+ $(OUTPUT)/S_I*.test
include ../lib.mk
@@ -26,4 +28,9 @@ $(OUTPUT)/execveat.symlink: $(OUTPUT)/execveat
$(OUTPUT)/execveat.denatured: $(OUTPUT)/execveat
cp $< $@
chmod -x $@
-
+$(OUTPUT)/load_address_4096: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000 -pie -static $< -o $@
+$(OUTPUT)/load_address_2097152: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x200000 -pie -static $< -o $@
+$(OUTPUT)/load_address_16777216: load_address.c
+ $(CC) $(CFLAGS) $(LDFLAGS) -Wl,-z,max-page-size=0x1000000 -pie -static $< -o $@
diff --git a/tools/testing/selftests/exec/binfmt_script b/tools/testing/selftests/exec/binfmt_script.py
index 05f94a741c7a..05f94a741c7a 100755
--- a/tools/testing/selftests/exec/binfmt_script
+++ b/tools/testing/selftests/exec/binfmt_script.py
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 67bf7254a48f..0546ca24f2b2 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -23,6 +23,9 @@
#include "../kselftest.h"
+#define TESTS_EXPECTED 51
+#define TEST_NAME_LEN (PATH_MAX * 4)
+
static char longpath[2 * PATH_MAX] = "";
static char *envp[] = { "IN_TEST=yes", NULL, NULL };
static char *argv[] = { "execveat", "99", NULL };
@@ -43,71 +46,85 @@ static int execveat_(int fd, const char *path, char **argv, char **envp,
static int _check_execveat_fail(int fd, const char *path, int flags,
int expected_errno, const char *errno_str)
{
+ char test_name[TEST_NAME_LEN];
int rc;
errno = 0;
- printf("Check failure of execveat(%d, '%s', %d) with %s... ",
- fd, path?:"(null)", flags, errno_str);
+ snprintf(test_name, sizeof(test_name),
+ "Check failure of execveat(%d, '%s', %d) with %s",
+ fd, path?:"(null)", flags, errno_str);
rc = execveat_(fd, path, argv, envp, flags);
if (rc > 0) {
- printf("[FAIL] (unexpected success from execveat(2))\n");
+ ksft_print_msg("unexpected success from execveat(2)\n");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (errno != expected_errno) {
- printf("[FAIL] (expected errno %d (%s) not %d (%s)\n",
- expected_errno, strerror(expected_errno),
- errno, strerror(errno));
+ ksft_print_msg("expected errno %d (%s) not %d (%s)\n",
+ expected_errno, strerror(expected_errno),
+ errno, strerror(errno));
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
static int check_execveat_invoked_rc(int fd, const char *path, int flags,
int expected_rc, int expected_rc2)
{
+ char test_name[TEST_NAME_LEN];
int status;
int rc;
pid_t child;
int pathlen = path ? strlen(path) : 0;
if (pathlen > 40)
- printf("Check success of execveat(%d, '%.20s...%s', %d)... ",
- fd, path, (path + pathlen - 20), flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%.20s...%s', %d)... ",
+ fd, path, (path + pathlen - 20), flags);
else
- printf("Check success of execveat(%d, '%s', %d)... ",
- fd, path?:"(null)", flags);
+ snprintf(test_name, sizeof(test_name),
+ "Check success of execveat(%d, '%s', %d)... ",
+ fd, path?:"(null)", flags);
+
child = fork();
if (child < 0) {
- printf("[FAIL] (fork() failed)\n");
+ ksft_perror("fork() failed");
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (child == 0) {
/* Child: do execveat(). */
rc = execveat_(fd, path, argv, envp, flags);
- printf("[FAIL]: execveat() failed, rc=%d errno=%d (%s)\n",
- rc, errno, strerror(errno));
+ ksft_print_msg("execveat() failed, rc=%d errno=%d (%s)\n",
+ rc, errno, strerror(errno));
+ ksft_test_result_fail("%s\n", test_name);
exit(1); /* should not reach here */
}
/* Parent: wait for & check child's exit status. */
rc = waitpid(child, &status, 0);
if (rc != child) {
- printf("[FAIL] (waitpid(%d,...) returned %d)\n", child, rc);
+ ksft_print_msg("waitpid(%d,...) returned %d\n", child, rc);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if (!WIFEXITED(status)) {
- printf("[FAIL] (child %d did not exit cleanly, status=%08x)\n",
- child, status);
+ ksft_print_msg("child %d did not exit cleanly, status=%08x\n",
+ child, status);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
if ((WEXITSTATUS(status) != expected_rc) &&
(WEXITSTATUS(status) != expected_rc2)) {
- printf("[FAIL] (child %d exited with %d not %d nor %d)\n",
- child, WEXITSTATUS(status), expected_rc, expected_rc2);
+ ksft_print_msg("child %d exited with %d not %d nor %d\n",
+ child, WEXITSTATUS(status), expected_rc,
+ expected_rc2);
+ ksft_test_result_fail("%s\n", test_name);
return 1;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%s\n", test_name);
return 0;
}
@@ -129,11 +146,9 @@ static int open_or_die(const char *filename, int flags)
{
int fd = open(filename, flags);
- if (fd < 0) {
- printf("Failed to open '%s'; "
+ if (fd < 0)
+ ksft_exit_fail_msg("Failed to open '%s'; "
"check prerequisites are available\n", filename);
- exit(1);
- }
return fd;
}
@@ -162,8 +177,7 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
char *cwd = getcwd(NULL, 0);
if (!cwd) {
- printf("Failed to getcwd(), errno=%d (%s)\n",
- errno, strerror(errno));
+ ksft_perror("Failed to getcwd()");
return 2;
}
strcpy(longpath, cwd);
@@ -193,12 +207,12 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
*/
fd = open(longpath, O_RDONLY);
if (fd > 0) {
- printf("Invoke copy of '%s' via filename of length %zu:\n",
- src, strlen(longpath));
+ ksft_print_msg("Invoke copy of '%s' via filename of length %zu:\n",
+ src, strlen(longpath));
fail += check_execveat(fd, "", AT_EMPTY_PATH);
} else {
- printf("Failed to open length %zu filename, errno=%d (%s)\n",
- strlen(longpath), errno, strerror(errno));
+ ksft_print_msg("Failed to open length %zu filename, errno=%d (%s)\n",
+ strlen(longpath), errno, strerror(errno));
fail++;
}
@@ -379,7 +393,7 @@ static int run_tests(void)
static void prerequisites(void)
{
int fd;
- const char *script = "#!/bin/sh\nexit $*\n";
+ const char *script = "#!/bin/bash\nexit $*\n";
/* Create ephemeral copies of files */
exe_cp("execveat", "execveat.ephemeral");
@@ -405,28 +419,31 @@ int main(int argc, char **argv)
const char *in_test = getenv("IN_TEST");
if (verbose) {
- printf(" invoked with:");
+ ksft_print_msg("invoked with:\n");
for (ii = 0; ii < argc; ii++)
- printf(" [%d]='%s'", ii, argv[ii]);
- printf("\n");
+ ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]);
}
/* Check expected environment transferred. */
if (!in_test || strcmp(in_test, "yes") != 0) {
- printf("[FAIL] (no IN_TEST=yes in env)\n");
+ ksft_print_msg("no IN_TEST=yes in env\n");
return 1;
}
/* Use the final argument as an exit code. */
rc = atoi(argv[argc - 1]);
- fflush(stdout);
+ exit(rc);
} else {
+ ksft_print_header();
+ ksft_set_plan(TESTS_EXPECTED);
prerequisites();
if (verbose)
envp[1] = "VERBOSE=1";
rc = run_tests();
if (rc > 0)
printf("%d tests failed\n", rc);
+ ksft_finished();
}
+
return rc;
}
diff --git a/tools/testing/selftests/exec/load_address.c b/tools/testing/selftests/exec/load_address.c
new file mode 100644
index 000000000000..d487c2f6a615
--- /dev/null
+++ b/tools/testing/selftests/exec/load_address.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include <link.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+struct Statistics {
+ unsigned long long load_address;
+ unsigned long long alignment;
+};
+
+int ExtractStatistics(struct dl_phdr_info *info, size_t size, void *data)
+{
+ struct Statistics *stats = (struct Statistics *) data;
+ int i;
+
+ if (info->dlpi_name != NULL && info->dlpi_name[0] != '\0') {
+ // Ignore headers from other than the executable.
+ return 2;
+ }
+
+ stats->load_address = (unsigned long long) info->dlpi_addr;
+ stats->alignment = 0;
+
+ for (i = 0; i < info->dlpi_phnum; i++) {
+ if (info->dlpi_phdr[i].p_type != PT_LOAD)
+ continue;
+
+ if (info->dlpi_phdr[i].p_align > stats->alignment)
+ stats->alignment = info->dlpi_phdr[i].p_align;
+ }
+
+ return 1; // Terminate dl_iterate_phdr.
+}
+
+int main(int argc, char **argv)
+{
+ struct Statistics extracted;
+ unsigned long long misalign;
+ int ret;
+
+ ret = dl_iterate_phdr(ExtractStatistics, &extracted);
+ if (ret != 1) {
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ if (extracted.alignment == 0) {
+ fprintf(stderr, "No alignment found\n");
+ return 1;
+ } else if (extracted.alignment & (extracted.alignment - 1)) {
+ fprintf(stderr, "Alignment is not a power of 2\n");
+ return 1;
+ }
+
+ misalign = extracted.load_address & (extracted.alignment - 1);
+ if (misalign) {
+ printf("alignment = %llu, load_address = %llu\n",
+ extracted.alignment, extracted.load_address);
+ fprintf(stderr, "FAILED\n");
+ return 1;
+ }
+
+ fprintf(stderr, "PASS\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/exec/non-regular.c b/tools/testing/selftests/exec/non-regular.c
new file mode 100644
index 000000000000..cd3a34aca93e
--- /dev/null
+++ b/tools/testing/selftests/exec/non-regular.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/types.h>
+
+#include "../kselftest_harness.h"
+
+/* Remove a file, ignoring the result if it didn't exist. */
+void rm(struct __test_metadata *_metadata, const char *pathname,
+ int is_dir)
+{
+ int rc;
+
+ if (is_dir)
+ rc = rmdir(pathname);
+ else
+ rc = unlink(pathname);
+
+ if (rc < 0) {
+ ASSERT_EQ(errno, ENOENT) {
+ TH_LOG("Not ENOENT: %s", pathname);
+ }
+ } else {
+ ASSERT_EQ(rc, 0) {
+ TH_LOG("Failed to remove: %s", pathname);
+ }
+ }
+}
+
+FIXTURE(file) {
+ char *pathname;
+ int is_dir;
+};
+
+FIXTURE_VARIANT(file)
+{
+ const char *name;
+ int expected;
+ int is_dir;
+ void (*setup)(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant);
+ int major, minor, mode; /* for mknod() */
+};
+
+void setup_link(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ const char * const paths[] = {
+ "/bin/true",
+ "/usr/bin/true",
+ };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(paths); i++) {
+ if (access(paths[i], X_OK) == 0) {
+ ASSERT_EQ(symlink(paths[i], self->pathname), 0);
+ return;
+ }
+ }
+ ASSERT_EQ(1, 0) {
+ TH_LOG("Could not find viable 'true' binary");
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFLNK)
+{
+ .name = "S_IFLNK",
+ .expected = ELOOP,
+ .setup = setup_link,
+};
+
+void setup_dir(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkdir(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFDIR)
+{
+ .name = "S_IFDIR",
+ .is_dir = 1,
+ .expected = EACCES,
+ .setup = setup_dir,
+};
+
+void setup_node(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ dev_t dev;
+ int rc;
+
+ dev = makedev(variant->major, variant->minor);
+ rc = mknod(self->pathname, 0755 | variant->mode, dev);
+ ASSERT_EQ(rc, 0) {
+ if (errno == EPERM)
+ SKIP(return, "Please run as root; cannot mknod(%s)",
+ variant->name);
+ }
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFBLK)
+{
+ .name = "S_IFBLK",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/loop0 */
+ .major = 7,
+ .minor = 0,
+ .mode = S_IFBLK,
+};
+
+FIXTURE_VARIANT_ADD(file, S_IFCHR)
+{
+ .name = "S_IFCHR",
+ .expected = EACCES,
+ .setup = setup_node,
+ /* /dev/zero */
+ .major = 1,
+ .minor = 5,
+ .mode = S_IFCHR,
+};
+
+void setup_fifo(struct __test_metadata *_metadata,
+ FIXTURE_DATA(file) *self,
+ const FIXTURE_VARIANT(file) *variant)
+{
+ ASSERT_EQ(mkfifo(self->pathname, 0755), 0);
+}
+
+FIXTURE_VARIANT_ADD(file, S_IFIFO)
+{
+ .name = "S_IFIFO",
+ .expected = EACCES,
+ .setup = setup_fifo,
+};
+
+FIXTURE_SETUP(file)
+{
+ ASSERT_GT(asprintf(&self->pathname, "%s.test", variant->name), 6);
+ self->is_dir = variant->is_dir;
+
+ rm(_metadata, self->pathname, variant->is_dir);
+ variant->setup(_metadata, self, variant);
+}
+
+FIXTURE_TEARDOWN(file)
+{
+ rm(_metadata, self->pathname, self->is_dir);
+}
+
+TEST_F(file, exec_errno)
+{
+ char * const argv[2] = { (char * const)self->pathname, NULL };
+
+ EXPECT_LT(execv(argv[0], argv), 0);
+ EXPECT_EQ(errno, variant->expected);
+}
+
+/* S_IFSOCK */
+FIXTURE(sock)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(sock)
+{
+ self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(sock)
+{
+ if (self->fd >= 0)
+ ASSERT_EQ(close(self->fd), 0);
+}
+
+TEST_F(sock, exec_errno)
+{
+ char * const argv[2] = { " magic socket ", NULL };
+ char * const envp[1] = { NULL };
+
+ EXPECT_LT(fexecve(self->fd, argv, envp), 0);
+ EXPECT_EQ(errno, EACCES);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/exec/null-argv.c b/tools/testing/selftests/exec/null-argv.c
new file mode 100644
index 000000000000..c19726e710d1
--- /dev/null
+++ b/tools/testing/selftests/exec/null-argv.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test that empty argvs are swapped out for a single empty string. */
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h"
+
+#define FORK(exec) \
+do { \
+ pid = fork(); \
+ if (pid == 0) { \
+ /* Child */ \
+ exec; /* Some kind of exec */ \
+ perror("# " #exec); \
+ return 1; \
+ } \
+ check_result(pid, #exec); \
+} while (0)
+
+void check_result(pid_t pid, const char *msg)
+{
+ int wstatus;
+
+ if (pid == (pid_t)-1) {
+ perror("# fork");
+ ksft_test_result_fail("fork failed: %s\n", msg);
+ return;
+ }
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ perror("# waitpid");
+ ksft_test_result_fail("waitpid failed: %s\n", msg);
+ return;
+ }
+ if (!WIFEXITED(wstatus)) {
+ ksft_test_result_fail("child did not exit: %s\n", msg);
+ return;
+ }
+ if (WEXITSTATUS(wstatus) != 0) {
+ ksft_test_result_fail("non-zero exit: %s\n", msg);
+ return;
+ }
+ ksft_test_result_pass("%s\n", msg);
+}
+
+int main(int argc, char *argv[], char *envp[])
+{
+ pid_t pid;
+ static char * const args[] = { NULL };
+ static char * const str[] = { "", NULL };
+
+ /* argc counting checks */
+ if (argc < 1) {
+ fprintf(stderr, "# FAIL: saw argc == 0 (old kernel?)\n");
+ return 1;
+ }
+ if (argc != 1) {
+ fprintf(stderr, "# FAIL: unknown argc (%d)\n", argc);
+ return 1;
+ }
+ if (argv[0][0] == '\0') {
+ /* Good, we found a NULL terminated string at argv[0]! */
+ return 0;
+ }
+
+ /* Test runner. */
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ FORK(execve(argv[0], str, NULL));
+ FORK(execve(argv[0], NULL, NULL));
+ FORK(execve(argv[0], NULL, envp));
+ FORK(execve(argv[0], args, NULL));
+ FORK(execve(argv[0], args, envp));
+
+ ksft_exit(ksft_cnt.ksft_pass == ksft_plan);
+}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore b/tools/testing/selftests/fchmodat2/.gitignore
index 24e27957efcc..82a4846cbc4b 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/.gitignore
+++ b/tools/testing/selftests/fchmodat2/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-srcu.c
+/*_test
diff --git a/tools/testing/selftests/fchmodat2/Makefile b/tools/testing/selftests/fchmodat2/Makefile
new file mode 100644
index 000000000000..71ec34bf1501
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan $(KHDR_INCLUDES)
+TEST_GEN_PROGS := fchmodat2_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fchmodat2/fchmodat2_test.c b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
new file mode 100644
index 000000000000..e0319417124d
--- /dev/null
+++ b/tools/testing/selftests/fchmodat2/fchmodat2_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <syscall.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+int sys_fchmodat2(int dfd, const char *filename, mode_t mode, int flags)
+{
+ int ret = syscall(__NR_fchmodat2, dfd, filename, mode, flags);
+
+ return ret >= 0 ? ret : -errno;
+}
+
+int setup_testdir(void)
+{
+ int dfd, ret;
+ char dirname[] = "/tmp/ksft-fchmodat2.XXXXXX";
+
+ /* Make the top-level directory. */
+ if (!mkdtemp(dirname))
+ ksft_exit_fail_msg("%s: failed to create tmpdir\n", __func__);
+
+ dfd = open(dirname, O_PATH | O_DIRECTORY);
+ if (dfd < 0)
+ ksft_exit_fail_msg("%s: failed to open tmpdir\n", __func__);
+
+ ret = openat(dfd, "regfile", O_CREAT | O_WRONLY | O_TRUNC, 0644);
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create file in tmpdir\n",
+ __func__);
+ close(ret);
+
+ ret = symlinkat("regfile", dfd, "symlink");
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: failed to create symlink in tmpdir\n",
+ __func__);
+
+ return dfd;
+}
+
+int expect_mode(int dfd, const char *filename, mode_t expect_mode)
+{
+ struct stat st;
+ int ret = fstatat(dfd, filename, &st, AT_SYMLINK_NOFOLLOW);
+
+ if (ret)
+ ksft_exit_fail_msg("%s: %s: fstatat failed\n",
+ __func__, filename);
+
+ return (st.st_mode == expect_mode);
+}
+
+void test_regfile(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "regfile", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "regfile", 0600, AT_SYMLINK_NOFOLLOW);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(AT_SYMLINK_NOFOLLOW) failed\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100600))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ ksft_test_result_pass("fchmodat2(regfile)\n");
+}
+
+void test_symlink(void)
+{
+ int dfd, ret;
+
+ dfd = setup_testdir();
+
+ ret = sys_fchmodat2(dfd, "symlink", 0640, 0);
+
+ if (ret < 0)
+ ksft_exit_fail_msg("%s: fchmodat2(noflag) failed\n", __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2\n",
+ __func__);
+
+ if (!expect_mode(dfd, "symlink", 0120777))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2\n",
+ __func__);
+
+ ret = sys_fchmodat2(dfd, "symlink", 0600, AT_SYMLINK_NOFOLLOW);
+
+ /*
+ * On certain filesystems (xfs or btrfs), chmod operation fails. So we
+ * first check the symlink target but if the operation fails we mark the
+ * test as skipped.
+ *
+ * https://sourceware.org/legacy-ml/libc-alpha/2020-02/msg00467.html
+ */
+ if (ret == 0 && !expect_mode(dfd, "symlink", 0120600))
+ ksft_exit_fail_msg("%s: wrong symlink mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (!expect_mode(dfd, "regfile", 0100640))
+ ksft_exit_fail_msg("%s: wrong file mode bits after fchmodat2 with nofollow\n",
+ __func__);
+
+ if (ret != 0)
+ ksft_test_result_skip("fchmodat2(symlink)\n");
+ else
+ ksft_test_result_pass("fchmodat2(symlink)\n");
+}
+
+#define NUM_TESTS 2
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ test_regfile();
+ test_symlink();
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/filelock/Makefile b/tools/testing/selftests/filelock/Makefile
new file mode 100644
index 000000000000..478e82f8b464
--- /dev/null
+++ b/tools/testing/selftests/filelock/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := ofdlocks
+
+include ../lib.mk
diff --git a/tools/testing/selftests/filelock/ofdlocks.c b/tools/testing/selftests/filelock/ofdlocks.c
new file mode 100644
index 000000000000..a55b79810ab2
--- /dev/null
+++ b/tools/testing/selftests/filelock/ofdlocks.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <assert.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include "../kselftest.h"
+
+static int lock_set(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_SETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+static int lock_get(int fd, struct flock *fl)
+{
+ int ret;
+
+ fl->l_pid = 0; // needed for OFD locks
+ fl->l_whence = SEEK_SET;
+ ret = fcntl(fd, F_OFD_GETLK, fl);
+ if (ret)
+ perror("fcntl()");
+ return ret;
+}
+
+int main(void)
+{
+ int rc;
+ struct flock fl, fl2;
+ int fd = open("/tmp/aa", O_RDWR | O_CREAT | O_EXCL, 0600);
+ int fd2 = open("/tmp/aa", O_RDONLY);
+
+ unlink("/tmp/aa");
+ assert(fd != -1);
+ assert(fd2 != -1);
+ ksft_print_msg("[INFO] opened fds %i %i\n", fd, fd2);
+
+ /* Set some read lock */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 3;
+ rc = lock_set(fd, &fl);
+ if (rc == 0) {
+ ksft_print_msg
+ ("[SUCCESS] set OFD read lock on first fd\n");
+ } else {
+ ksft_print_msg("[FAIL] to set OFD read lock on first fd\n");
+ return -1;
+ }
+ /* Make sure read locks do not conflict on different fds. */
+ fl.l_type = F_RDLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg("[FAIL] read locks conflicted\n");
+ return -1;
+ }
+ /* Make sure read/write locks do conflict on different fds. */
+ fl.l_type = F_WRLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd2, &fl);
+ if (rc != 0)
+ return -1;
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks conflicted\n");
+ } else {
+ ksft_print_msg
+ ("[SUCCESS] read and write locks not conflicted\n");
+ return -1;
+ }
+ /* Get info about the lock on first fd. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 5;
+ fl.l_len = 1;
+ rc = lock_get(fd, &fl);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[SUCCESS] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ } else {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK did not return lock info\n");
+ return -1;
+ }
+ /* Try the same but by locking everything by len==0. */
+ fl2.l_type = F_UNLCK;
+ fl2.l_start = 0;
+ fl2.l_len = 0;
+ rc = lock_get(fd, &fl2);
+ if (rc != 0) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK not supported\n");
+ return -1;
+ }
+ if (memcmp(&fl, &fl2, sizeof(fl))) {
+ ksft_print_msg
+ ("[FAIL] F_UNLCK test returns: locked, type %i pid %i len %zi\n",
+ fl.l_type, fl.l_pid, fl.l_len);
+ return -1;
+ }
+ ksft_print_msg("[SUCCESS] F_UNLCK with len==0 returned the same\n");
+ /* Get info about the lock on second fd - no locks on it. */
+ fl.l_type = F_UNLCK;
+ fl.l_start = 0;
+ fl.l_len = 0;
+ lock_get(fd2, &fl);
+ if (fl.l_type != F_UNLCK) {
+ ksft_print_msg
+ ("[FAIL] F_OFD_GETLK with F_UNLCK return lock info from another fd\n");
+ return -1;
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile
index 129880fb42d3..c647fd6a0446 100644
--- a/tools/testing/selftests/filesystems/Makefile
+++ b/tools/testing/selftests/filesystems/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := devpts_pts
TEST_GEN_PROGS_EXTENDED := dnotify_test
diff --git a/tools/testing/selftests/filesystems/binderfs/Makefile b/tools/testing/selftests/filesystems/binderfs/Makefile
index 8af25ae96049..c2f7cef919c0 100644
--- a/tools/testing/selftests/filesystems/binderfs/Makefile
+++ b/tools/testing/selftests/filesystems/binderfs/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/ -pthread
+CFLAGS += $(KHDR_INCLUDES) -pthread
TEST_GEN_PROGS := binderfs_test
binderfs_test: binderfs_test.c ../../kselftest.h ../../kselftest_harness.h
diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
index 8a6b507e34a8..5f362c0fd890 100644
--- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
+++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c
@@ -21,7 +21,6 @@
#include <linux/android/binder.h>
#include <linux/android/binderfs.h>
-#include "../../kselftest.h"
#include "../../kselftest_harness.h"
#define DEFAULT_THREADS 4
@@ -37,151 +36,152 @@
fd = -EBADF; \
}
-#define log_exit(format, ...) \
- ({ \
- fprintf(stderr, format "\n", ##__VA_ARGS__); \
- exit(EXIT_FAILURE); \
- })
-
-static void change_mountns(void)
+static void change_mountns(struct __test_metadata *_metadata)
{
int ret;
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare mount namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare mount namespace",
+ strerror(errno));
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to mount / as private\n",
- strerror(errno));
-}
-
-static void rmdir_protect_errno(const char *dir)
-{
- int saved_errno = errno;
- (void)rmdir(dir);
- errno = saved_errno;
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount / as private",
+ strerror(errno));
+ }
}
-static int __do_binderfs_test(void)
+static int __do_binderfs_test(struct __test_metadata *_metadata)
{
- int fd, ret, saved_errno;
+ int fd, ret, saved_errno, result = 1;
size_t len;
ssize_t wret;
struct binderfs_device device = { 0 };
struct binder_version version = { 0 };
char binderfs_mntpt[] = P_tmpdir "/binderfs_XXXXXX",
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
+ static const char * const binder_features[] = {
+ "oneway_spam_detection",
+ "extended_error",
+ };
- change_mountns();
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- ksft_exit_fail_msg(
- "%s - Failed to create binderfs mountpoint\n",
+ EXPECT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
strerror(errno));
+ goto out;
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0) {
- if (errno != ENODEV)
- ksft_exit_fail_msg("%s - Failed to mount binderfs\n",
- strerror(errno));
-
- rmdir_protect_errno(binderfs_mntpt);
- return 1;
+ EXPECT_EQ(ret, 0) {
+ if (errno == ENODEV)
+ SKIP(goto out, "binderfs missing");
+ TH_LOG("%s - Failed to mount binderfs", strerror(errno));
+ goto rmdir;
}
- /* binderfs mount test passed */
- ksft_inc_pass_cnt();
+ /* success: binderfs mounted */
memcpy(device.name, "my-binder", strlen("my-binder"));
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- ksft_exit_fail_msg(
- "%s - Failed to open binder-control device\n",
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
strerror(errno));
+ goto umount;
+ }
ret = ioctl(fd, BINDER_CTL_ADD, &device);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to allocate new binder device\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
strerror(errno));
+ goto umount;
}
- ksft_print_msg(
- "Allocated new binder device with major %d, minor %d, and name %s\n",
+ TH_LOG("Allocated new binder device with major %d, minor %d, and name %s",
device.major, device.minor, device.name);
- /* binder device allocation test passed */
- ksft_inc_pass_cnt();
+ /* success: binder device allocation */
snprintf(device_path, sizeof(device_path), "%s/my-binder", binderfs_mntpt);
fd = open(device_path, O_CLOEXEC | O_RDONLY);
- if (fd < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to open my-binder device\n",
- strerror(errno));
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open my-binder device",
+ strerror(errno));
+ goto umount;
}
ret = ioctl(fd, BINDER_VERSION, &version);
saved_errno = errno;
close(fd);
errno = saved_errno;
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to open perform BINDER_VERSION request\n",
+ EXPECT_GE(ret, 0) {
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request",
strerror(errno));
+ goto umount;
}
- ksft_print_msg("Detected binder version: %d\n",
- version.protocol_version);
+ TH_LOG("Detected binder version: %d", version.protocol_version);
- /* binder transaction with binderfs binder device passed */
- ksft_inc_pass_cnt();
+ /* success: binder transaction with binderfs binder device */
ret = unlink(device_path);
- if (ret < 0) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("%s - Failed to delete binder device\n",
- strerror(errno));
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to delete binder device",
+ strerror(errno));
+ goto umount;
}
- /* binder device removal passed */
- ksft_inc_pass_cnt();
+ /* success: binder device removal */
snprintf(device_path, sizeof(device_path), "%s/binder-control", binderfs_mntpt);
ret = unlink(device_path);
- if (!ret) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg("Managed to delete binder-control device\n");
- } else if (errno != EPERM) {
- rmdir_protect_errno(binderfs_mntpt);
- ksft_exit_fail_msg(
- "%s - Failed to delete binder-control device but exited with unexpected error code\n",
+ EXPECT_NE(ret, 0) {
+ TH_LOG("Managed to delete binder-control device");
+ goto umount;
+ }
+ EXPECT_EQ(errno, EPERM) {
+ TH_LOG("%s - Failed to delete binder-control device but exited with unexpected error code",
strerror(errno));
+ goto umount;
}
- /* binder-control device removal failed as expected */
- ksft_inc_xfail_cnt();
+ /* success: binder-control device removal failed as expected */
-on_error:
- ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unmount binderfs\n",
- strerror(errno));
+ for (int i = 0; i < ARRAY_SIZE(binder_features); i++) {
+ snprintf(device_path, sizeof(device_path), "%s/features/%s",
+ binderfs_mntpt, binder_features[i]);
+ fd = open(device_path, O_CLOEXEC | O_RDONLY);
+ EXPECT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder feature: %s",
+ strerror(errno), binder_features[i]);
+ goto umount;
+ }
+ close(fd);
+ }
- /* binderfs unmount test passed */
- ksft_inc_pass_cnt();
- return 0;
+ /* success: binder feature files found */
+ result = 0;
+
+umount:
+ ret = umount2(binderfs_mntpt, MNT_DETACH);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ }
+rmdir:
+ ret = rmdir(binderfs_mntpt);
+ EXPECT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to rmdir binderfs mount", strerror(errno));
+ }
+out:
+ return result;
}
static int wait_for_pid(pid_t pid)
@@ -291,7 +291,7 @@ static int write_id_mapping(enum idmap_type type, pid_t pid, const char *buf,
return 0;
}
-static void change_userns(int syncfds[2])
+static void change_userns(struct __test_metadata *_metadata, int syncfds[2])
{
int ret;
char buf;
@@ -299,25 +299,29 @@ static void change_userns(int syncfds[2])
close_prot_errno_disarm(syncfds[1]);
ret = unshare(CLONE_NEWUSER);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to unshare user namespace\n",
- strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unshare user namespace",
+ strerror(errno));
+ }
ret = write_nointr(syncfds[0], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
ret = read_nointr(syncfds[0], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[0]);
- if (setid_userns_root())
- ksft_exit_fail_msg("setid_userns_root() failed");
+ ASSERT_EQ(setid_userns_root(), 0) {
+ TH_LOG("setid_userns_root() failed");
+ }
}
-static void change_idmaps(int syncfds[2], pid_t pid)
+static void change_idmaps(struct __test_metadata *_metadata, int syncfds[2], pid_t pid)
{
int ret;
char buf;
@@ -326,35 +330,42 @@ static void change_idmaps(int syncfds[2], pid_t pid)
close_prot_errno_disarm(syncfds[0]);
ret = read_nointr(syncfds[1], &buf, 1);
- if (ret != 1)
- ksft_exit_fail_msg("read_nointr() failed\n");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("read_nointr() failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getuid());
ret = write_id_mapping(UID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(UID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(UID_MAP) failed");
+ }
snprintf(id_map, sizeof(id_map), "0 %d 1\n", getgid());
ret = write_id_mapping(GID_MAP, pid, id_map, strlen(id_map));
- if (ret)
- ksft_exit_fail_msg("write_id_mapping(GID_MAP) failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("write_id_mapping(GID_MAP) failed");
+ }
ret = write_nointr(syncfds[1], "1", 1);
- if (ret != 1)
- ksft_exit_fail_msg("write_nointr() failed");
+ ASSERT_EQ(ret, 1) {
+ TH_LOG("write_nointr() failed");
+ }
close_prot_errno_disarm(syncfds[1]);
}
+struct __test_metadata *_thread_metadata;
static void *binder_version_thread(void *data)
{
+ struct __test_metadata *_metadata = _thread_metadata;
int fd = PTR_TO_INT(data);
struct binder_version version = { 0 };
int ret;
ret = ioctl(fd, BINDER_VERSION, &version);
if (ret < 0)
- ksft_print_msg("%s - Failed to open perform BINDER_VERSION request\n", strerror(errno));
+ TH_LOG("%s - Failed to open perform BINDER_VERSION request\n",
+ strerror(errno));
pthread_exit(data);
}
@@ -377,68 +388,80 @@ TEST(binderfs_stress)
device_path[sizeof(P_tmpdir "/binderfs_XXXXXX/") + BINDERFS_MAX_NAME];
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
+ TH_LOG("%s - Failed to fork", strerror(errno));
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
int i, j, k, nthreads;
pthread_attr_t attr;
pthread_t threads[DEFAULT_THREADS];
- change_userns(syncfds);
- change_mountns();
+ change_userns(_metadata, syncfds);
+ change_mountns(_metadata);
- if (!mkdtemp(binderfs_mntpt))
- log_exit("%s - Failed to create binderfs mountpoint\n",
- strerror(errno));
+ ASSERT_NE(mkdtemp(binderfs_mntpt), NULL) {
+ TH_LOG("%s - Failed to create binderfs mountpoint",
+ strerror(errno));
+ }
ret = mount(NULL, binderfs_mntpt, "binder", 0, 0);
- if (ret < 0)
- log_exit("%s - Failed to mount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to mount binderfs, check if CONFIG_ANDROID_BINDERFS is enabled in the running kernel",
+ strerror(errno));
+ }
for (int i = 0; i < ARRAY_SIZE(fds); i++) {
snprintf(device_path, sizeof(device_path),
"%s/binder-control", binderfs_mntpt);
fd = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fd < 0)
- log_exit("%s - Failed to open binder-control device\n", strerror(errno));
+ ASSERT_GE(fd, 0) {
+ TH_LOG("%s - Failed to open binder-control device",
+ strerror(errno));
+ }
memset(&device, 0, sizeof(device));
snprintf(device.name, sizeof(device.name), "%d", i);
ret = ioctl(fd, BINDER_CTL_ADD, &device);
close_prot_errno_disarm(fd);
- if (ret < 0)
- log_exit("%s - Failed to allocate new binder device\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to allocate new binder device",
+ strerror(errno));
+ }
snprintf(device_path, sizeof(device_path), "%s/%d",
binderfs_mntpt, i);
fds[i] = open(device_path, O_RDONLY | O_CLOEXEC);
- if (fds[i] < 0)
- log_exit("%s - Failed to open binder device\n", strerror(errno));
+ ASSERT_GE(fds[i], 0) {
+ TH_LOG("%s - Failed to open binder device", strerror(errno));
+ }
}
ret = umount2(binderfs_mntpt, MNT_DETACH);
- rmdir_protect_errno(binderfs_mntpt);
- if (ret < 0)
- log_exit("%s - Failed to unmount binderfs\n", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to unmount binderfs", strerror(errno));
+ rmdir(binderfs_mntpt);
+ }
nthreads = get_nprocs_conf();
if (nthreads > DEFAULT_THREADS)
nthreads = DEFAULT_THREADS;
+ _thread_metadata = _metadata;
pthread_attr_init(&attr);
for (k = 0; k < ARRAY_SIZE(fds); k++) {
for (i = 0; i < nthreads; i++) {
ret = pthread_create(&threads[i], &attr, binder_version_thread, INT_TO_PTR(fds[k]));
if (ret) {
- ksft_print_msg("%s - Failed to create thread %d\n", strerror(errno), i);
+ TH_LOG("%s - Failed to create thread %d",
+ strerror(errno), i);
break;
}
}
@@ -448,7 +471,8 @@ TEST(binderfs_stress)
ret = pthread_join(threads[j], &fdptr);
if (ret)
- ksft_print_msg("%s - Failed to join thread %d for fd %d\n", strerror(errno), j, PTR_TO_INT(fdptr));
+ TH_LOG("%s - Failed to join thread %d for fd %d",
+ strerror(errno), j, PTR_TO_INT(fdptr));
}
}
pthread_attr_destroy(&attr);
@@ -459,20 +483,21 @@ TEST(binderfs_stress)
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
- if (ret)
- ksft_exit_fail_msg("wait_for_pid() failed");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
TEST(binderfs_test_privileged)
{
if (geteuid() != 0)
- XFAIL(return, "Tests are not run as root. Skipping privileged tests");
+ SKIP(return, "Tests are not run as root. Skipping privileged tests");
- if (__do_binderfs_test() == 1)
- XFAIL(return, "The Android binderfs filesystem is not available");
+ if (__do_binderfs_test(_metadata))
+ SKIP(return, "The Android binderfs filesystem is not available");
}
TEST(binderfs_test_unprivileged)
@@ -482,31 +507,33 @@ TEST(binderfs_test_unprivileged)
pid_t pid;
ret = socketpair(PF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, syncfds);
- if (ret < 0)
- ksft_exit_fail_msg("%s - Failed to create socket pair", strerror(errno));
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("%s - Failed to create socket pair", strerror(errno));
+ }
pid = fork();
- if (pid < 0) {
+ ASSERT_GE(pid, 0) {
close_prot_errno_disarm(syncfds[0]);
close_prot_errno_disarm(syncfds[1]);
- ksft_exit_fail_msg("%s - Failed to fork", strerror(errno));
+ TH_LOG("%s - Failed to fork", strerror(errno));
}
if (pid == 0) {
- change_userns(syncfds);
- if (__do_binderfs_test() == 1)
+ change_userns(_metadata, syncfds);
+ if (__do_binderfs_test(_metadata))
exit(2);
exit(EXIT_SUCCESS);
}
- change_idmaps(syncfds, pid);
+ change_idmaps(_metadata, syncfds, pid);
ret = wait_for_pid(pid);
if (ret) {
if (ret == 2)
- XFAIL(return, "The Android binderfs filesystem is not available");
- else
- ksft_exit_fail_msg("wait_for_pid() failed");
+ SKIP(return, "The Android binderfs filesystem is not available");
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("wait_for_pid() failed");
+ }
}
}
diff --git a/tools/testing/selftests/filesystems/binderfs/config b/tools/testing/selftests/filesystems/binderfs/config
index 02dd6cc9cf99..7b4fc6ee6205 100644
--- a/tools/testing/selftests/filesystems/binderfs/config
+++ b/tools/testing/selftests/filesystems/binderfs/config
@@ -1,3 +1,2 @@
-CONFIG_ANDROID=y
CONFIG_ANDROID_BINDERFS=y
CONFIG_ANDROID_BINDER_IPC=y
diff --git a/tools/testing/selftests/filesystems/epoll/Makefile b/tools/testing/selftests/filesystems/epoll/Makefile
index 78ae4aaf7141..0788a7dc8004 100644
--- a/tools/testing/selftests/filesystems/epoll/Makefile
+++ b/tools/testing/selftests/filesystems/epoll/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
LDLIBS += -lpthread
TEST_GEN_PROGS := epoll_wakeup_test
diff --git a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
index d979ff14775a..65ede506305c 100644
--- a/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
+++ b/tools/testing/selftests/filesystems/epoll/epoll_wakeup_test.c
@@ -1,6 +1,8 @@
// SPDX-License-Identifier: GPL-2.0
#define _GNU_SOURCE
+#include <asm/unistd.h>
+#include <linux/time_types.h>
#include <poll.h>
#include <unistd.h>
#include <assert.h>
@@ -21,6 +23,19 @@ struct epoll_mtcontext
pthread_t waiter;
};
+#ifndef __NR_epoll_pwait2
+#define __NR_epoll_pwait2 -1
+#endif
+
+static inline int sys_epoll_pwait2(int fd, struct epoll_event *events,
+ int maxevents,
+ const struct __kernel_timespec *timeout,
+ const sigset_t *sigset, size_t sigsetsize)
+{
+ return syscall(__NR_epoll_pwait2, fd, events, maxevents, timeout,
+ sigset, sigsetsize);
+}
+
static void signal_handler(int signum)
{
}
@@ -3282,4 +3297,200 @@ TEST(epoll60)
close(ctx.epfd);
}
+struct epoll61_ctx {
+ int epfd;
+ int evfd;
+};
+
+static void *epoll61_write_eventfd(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ int64_t l = 1;
+
+ usleep(10950);
+ write(ctx->evfd, &l, sizeof(l));
+ return NULL;
+}
+
+static void *epoll61_epoll_with_timeout(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ struct epoll_event events[1];
+ int n;
+
+ n = epoll_wait(ctx->epfd, events, 1, 11);
+ /*
+ * If epoll returned the eventfd, write on the eventfd to wake up the
+ * blocking poller.
+ */
+ if (n == 1) {
+ int64_t l = 1;
+
+ write(ctx->evfd, &l, sizeof(l));
+ }
+ return NULL;
+}
+
+static void *epoll61_blocking_epoll(void *ctx_)
+{
+ struct epoll61_ctx *ctx = ctx_;
+ struct epoll_event events[1];
+
+ epoll_wait(ctx->epfd, events, 1, -1);
+ return NULL;
+}
+
+TEST(epoll61)
+{
+ struct epoll61_ctx ctx;
+ struct epoll_event ev;
+ int i, r;
+
+ ctx.epfd = epoll_create1(0);
+ ASSERT_GE(ctx.epfd, 0);
+ ctx.evfd = eventfd(0, EFD_NONBLOCK);
+ ASSERT_GE(ctx.evfd, 0);
+
+ ev.events = EPOLLIN | EPOLLET | EPOLLERR | EPOLLHUP;
+ ev.data.ptr = NULL;
+ r = epoll_ctl(ctx.epfd, EPOLL_CTL_ADD, ctx.evfd, &ev);
+ ASSERT_EQ(r, 0);
+
+ /*
+ * We are testing a race. Repeat the test case 1000 times to make it
+ * more likely to fail in case of a bug.
+ */
+ for (i = 0; i < 1000; i++) {
+ pthread_t threads[3];
+ int n;
+
+ /*
+ * Start 3 threads:
+ * Thread 1 sleeps for 10.9ms and writes to the evenfd.
+ * Thread 2 calls epoll with a timeout of 11ms.
+ * Thread 3 calls epoll with a timeout of -1.
+ *
+ * The eventfd write by Thread 1 should either wakeup Thread 2
+ * or Thread 3. If it wakes up Thread 2, Thread 2 writes on the
+ * eventfd to wake up Thread 3.
+ *
+ * If no events are missed, all three threads should eventually
+ * be joinable.
+ */
+ ASSERT_EQ(pthread_create(&threads[0], NULL,
+ epoll61_write_eventfd, &ctx), 0);
+ ASSERT_EQ(pthread_create(&threads[1], NULL,
+ epoll61_epoll_with_timeout, &ctx), 0);
+ ASSERT_EQ(pthread_create(&threads[2], NULL,
+ epoll61_blocking_epoll, &ctx), 0);
+
+ for (n = 0; n < ARRAY_SIZE(threads); ++n)
+ ASSERT_EQ(pthread_join(threads[n], NULL), 0);
+ }
+
+ close(ctx.epfd);
+ close(ctx.evfd);
+}
+
+/* Equivalent to basic test epoll1, but exercising epoll_pwait2. */
+TEST(epoll62)
+{
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ASSERT_EQ(write(sfd[1], "w", 1), 1);
+
+ EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+ EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, NULL, NULL, 0), 1);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/* Epoll_pwait2 basic timeout test. */
+TEST(epoll63)
+{
+ const int cfg_delay_ms = 10;
+ unsigned long long tdiff;
+ struct __kernel_timespec ts;
+ int efd;
+ int sfd[2];
+ struct epoll_event e;
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, sfd), 0);
+
+ efd = epoll_create(1);
+ ASSERT_GE(efd, 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(efd, EPOLL_CTL_ADD, sfd[0], &e), 0);
+
+ ts.tv_sec = 0;
+ ts.tv_nsec = cfg_delay_ms * 1000 * 1000;
+
+ tdiff = msecs();
+ EXPECT_EQ(sys_epoll_pwait2(efd, &e, 1, &ts, NULL, 0), 0);
+ tdiff = msecs() - tdiff;
+
+ EXPECT_GE(tdiff, cfg_delay_ms);
+
+ close(efd);
+ close(sfd[0]);
+ close(sfd[1]);
+}
+
+/*
+ * t0 t1
+ * (ew) \ / (ew)
+ * e0
+ * | (lt)
+ * s0
+ */
+TEST(epoll64)
+{
+ pthread_t waiter[2];
+ struct epoll_event e;
+ struct epoll_mtcontext ctx = { 0 };
+
+ signal(SIGUSR1, signal_handler);
+
+ ASSERT_EQ(socketpair(AF_UNIX, SOCK_STREAM, 0, ctx.sfd), 0);
+
+ ctx.efd[0] = epoll_create(1);
+ ASSERT_GE(ctx.efd[0], 0);
+
+ e.events = EPOLLIN;
+ ASSERT_EQ(epoll_ctl(ctx.efd[0], EPOLL_CTL_ADD, ctx.sfd[0], &e), 0);
+
+ /*
+ * main will act as the emitter once both waiter threads are
+ * blocked and expects to both be awoken upon the ready event.
+ */
+ ctx.main = pthread_self();
+ ASSERT_EQ(pthread_create(&waiter[0], NULL, waiter_entry1a, &ctx), 0);
+ ASSERT_EQ(pthread_create(&waiter[1], NULL, waiter_entry1a, &ctx), 0);
+
+ usleep(100000);
+ ASSERT_EQ(write(ctx.sfd[1], "w", 1), 1);
+
+ ASSERT_EQ(pthread_join(waiter[0], NULL), 0);
+ ASSERT_EQ(pthread_join(waiter[1], NULL), 0);
+
+ EXPECT_EQ(ctx.count, 2);
+
+ close(ctx.efd[0]);
+ close(ctx.sfd[0]);
+ close(ctx.sfd[1]);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/eventfd/.gitignore b/tools/testing/selftests/filesystems/eventfd/.gitignore
new file mode 100644
index 000000000000..483faf59fe4a
--- /dev/null
+++ b/tools/testing/selftests/filesystems/eventfd/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+eventfd_test
diff --git a/tools/testing/selftests/filesystems/eventfd/Makefile b/tools/testing/selftests/filesystems/eventfd/Makefile
new file mode 100644
index 000000000000..0a8e3910df15
--- /dev/null
+++ b/tools/testing/selftests/filesystems/eventfd/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += $(KHDR_INCLUDES)
+LDLIBS += -lpthread
+TEST_GEN_PROGS := eventfd_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/eventfd/eventfd_test.c b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
new file mode 100644
index 000000000000..f142a137526c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/eventfd/eventfd_test.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <asm/unistd.h>
+#include <linux/time_types.h>
+#include <unistd.h>
+#include <assert.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/epoll.h>
+#include <sys/eventfd.h>
+#include "../../kselftest_harness.h"
+
+struct error {
+ int code;
+ char msg[512];
+};
+
+static int error_set(struct error *err, int code, const char *fmt, ...)
+{
+ va_list args;
+ int r;
+
+ if (code == 0 || !err || err->code != 0)
+ return code;
+
+ err->code = code;
+ va_start(args, fmt);
+ r = vsnprintf(err->msg, sizeof(err->msg), fmt, args);
+ assert((size_t)r < sizeof(err->msg));
+ va_end(args);
+
+ return code;
+}
+
+static inline int sys_eventfd2(unsigned int count, int flags)
+{
+ return syscall(__NR_eventfd2, count, flags);
+}
+
+TEST(eventfd01)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, 0);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ // since the kernel automatically added O_RDWR.
+ EXPECT_EQ(flags, O_RDWR);
+
+ close(fd);
+}
+
+TEST(eventfd02)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFD);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags, FD_CLOEXEC);
+
+ close(fd);
+}
+
+TEST(eventfd03)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+ EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+ close(fd);
+}
+
+TEST(eventfd04)
+{
+ int fd, flags;
+
+ fd = sys_eventfd2(0, EFD_CLOEXEC|EFD_NONBLOCK);
+ ASSERT_GE(fd, 0);
+
+ flags = fcntl(fd, F_GETFL);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags & EFD_NONBLOCK, EFD_NONBLOCK);
+ EXPECT_EQ(flags & O_RDWR, O_RDWR);
+
+ flags = fcntl(fd, F_GETFD);
+ ASSERT_GT(flags, -1);
+ EXPECT_EQ(flags, FD_CLOEXEC);
+
+ close(fd);
+}
+
+static inline void trim_newline(char *str)
+{
+ char *pos = strrchr(str, '\n');
+
+ if (pos)
+ *pos = '\0';
+}
+
+static int verify_fdinfo(int fd, struct error *err, const char *prefix,
+ size_t prefix_len, const char *expect, ...)
+{
+ char buffer[512] = {0, };
+ char path[512] = {0, };
+ va_list args;
+ FILE *f;
+ char *line = NULL;
+ size_t n = 0;
+ int found = 0;
+ int r;
+
+ va_start(args, expect);
+ r = vsnprintf(buffer, sizeof(buffer), expect, args);
+ assert((size_t)r < sizeof(buffer));
+ va_end(args);
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", fd);
+ f = fopen(path, "re");
+ if (!f)
+ return error_set(err, -1, "fdinfo open failed for %d", fd);
+
+ while (getline(&line, &n, f) != -1) {
+ char *val;
+
+ if (strncmp(line, prefix, prefix_len))
+ continue;
+
+ found = 1;
+
+ val = line + prefix_len;
+ r = strcmp(val, buffer);
+ if (r != 0) {
+ trim_newline(line);
+ trim_newline(buffer);
+ error_set(err, -1, "%s '%s' != '%s'",
+ prefix, val, buffer);
+ }
+ break;
+ }
+
+ free(line);
+ fclose(f);
+
+ if (found == 0)
+ return error_set(err, -1, "%s not found for fd %d",
+ prefix, fd);
+
+ return 0;
+}
+
+TEST(eventfd05)
+{
+ struct error err = {0};
+ int fd, ret;
+
+ fd = sys_eventfd2(0, EFD_SEMAPHORE);
+ ASSERT_GE(fd, 0);
+
+ ret = fcntl(fd, F_GETFL);
+ ASSERT_GT(ret, -1);
+ EXPECT_EQ(ret & O_RDWR, O_RDWR);
+
+ // The semaphore could only be obtained from fdinfo.
+ ret = verify_fdinfo(fd, &err, "eventfd-semaphore: ", 19, "1\n");
+ if (ret != 0)
+ ksft_print_msg("eventfd-semaphore check failed, msg: %s\n",
+ err.msg);
+ EXPECT_EQ(ret, 0);
+
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/filesystems/fat/.gitignore b/tools/testing/selftests/filesystems/fat/.gitignore
new file mode 100644
index 000000000000..b89920ed841c
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rename_exchange
diff --git a/tools/testing/selftests/filesystems/fat/Makefile b/tools/testing/selftests/filesystems/fat/Makefile
new file mode 100644
index 000000000000..902033f6ef09
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS := run_fat_tests.sh
+TEST_GEN_PROGS_EXTENDED := rename_exchange
+CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/fat/config b/tools/testing/selftests/filesystems/fat/config
new file mode 100644
index 000000000000..6cf95e787a17
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/config
@@ -0,0 +1,2 @@
+CONFIG_BLK_DEV_LOOP=y
+CONFIG_VFAT_FS=y
diff --git a/tools/testing/selftests/filesystems/fat/rename_exchange.c b/tools/testing/selftests/filesystems/fat/rename_exchange.c
new file mode 100644
index 000000000000..e488ad354fce
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/rename_exchange.c
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Program that atomically exchanges two paths using
+ * the renameat2() system call RENAME_EXCHANGE flag.
+ *
+ * Copyright 2022 Red Hat Inc.
+ * Author: Javier Martinez Canillas <javierm@redhat.com>
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+
+void print_usage(const char *program)
+{
+ printf("Usage: %s [oldpath] [newpath]\n", program);
+ printf("Atomically exchange oldpath and newpath\n");
+}
+
+int main(int argc, char *argv[])
+{
+ int ret;
+
+ if (argc != 3) {
+ print_usage(argv[0]);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = renameat2(AT_FDCWD, argv[1], AT_FDCWD, argv[2], RENAME_EXCHANGE);
+ if (ret) {
+ perror("rename exchange failed");
+ exit(EXIT_FAILURE);
+ }
+
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/filesystems/fat/run_fat_tests.sh b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
new file mode 100755
index 000000000000..d61264d4795d
--- /dev/null
+++ b/tools/testing/selftests/filesystems/fat/run_fat_tests.sh
@@ -0,0 +1,82 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run filesystem operations tests on an 1 MiB disk image that is formatted with
+# a vfat filesystem and mounted in a temporary directory using a loop device.
+#
+# Copyright 2022 Red Hat Inc.
+# Author: Javier Martinez Canillas <javierm@redhat.com>
+
+set -e
+set -u
+set -o pipefail
+
+BASE_DIR="$(dirname $0)"
+TMP_DIR="$(mktemp -d /tmp/fat_tests_tmp.XXXXXX)"
+IMG_PATH="${TMP_DIR}/fat.img"
+MNT_PATH="${TMP_DIR}/mnt"
+
+cleanup()
+{
+ mountpoint -q "${MNT_PATH}" && unmount_image
+ rm -rf "${TMP_DIR}"
+}
+trap cleanup SIGINT SIGTERM EXIT
+
+create_loopback()
+{
+ touch "${IMG_PATH}"
+ chattr +C "${IMG_PATH}" >/dev/null 2>&1 || true
+
+ truncate -s 1M "${IMG_PATH}"
+ mkfs.vfat "${IMG_PATH}" >/dev/null 2>&1
+}
+
+mount_image()
+{
+ mkdir -p "${MNT_PATH}"
+ sudo mount -o loop "${IMG_PATH}" "${MNT_PATH}"
+}
+
+rename_exchange_test()
+{
+ local rename_exchange="${BASE_DIR}/rename_exchange"
+ local old_path="${MNT_PATH}/old_file"
+ local new_path="${MNT_PATH}/new_file"
+
+ echo old | sudo tee "${old_path}" >/dev/null 2>&1
+ echo new | sudo tee "${new_path}" >/dev/null 2>&1
+ sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1
+ sudo sync -f "${MNT_PATH}"
+ grep new "${old_path}" >/dev/null 2>&1
+ grep old "${new_path}" >/dev/null 2>&1
+}
+
+rename_exchange_subdir_test()
+{
+ local rename_exchange="${BASE_DIR}/rename_exchange"
+ local dir_path="${MNT_PATH}/subdir"
+ local old_path="${MNT_PATH}/old_file"
+ local new_path="${dir_path}/new_file"
+
+ sudo mkdir -p "${dir_path}"
+ echo old | sudo tee "${old_path}" >/dev/null 2>&1
+ echo new | sudo tee "${new_path}" >/dev/null 2>&1
+ sudo "${rename_exchange}" "${old_path}" "${new_path}" >/dev/null 2>&1
+ sudo sync -f "${MNT_PATH}"
+ grep new "${old_path}" >/dev/null 2>&1
+ grep old "${new_path}" >/dev/null 2>&1
+}
+
+unmount_image()
+{
+ sudo umount "${MNT_PATH}" &> /dev/null
+}
+
+create_loopback
+mount_image
+rename_exchange_test
+rename_exchange_subdir_test
+unmount_image
+
+exit 0
diff --git a/tools/testing/selftests/filesystems/overlayfs/.gitignore b/tools/testing/selftests/filesystems/overlayfs/.gitignore
new file mode 100644
index 000000000000..52ae618fdd98
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+dev_in_maps
diff --git a/tools/testing/selftests/filesystems/overlayfs/Makefile b/tools/testing/selftests/filesystems/overlayfs/Makefile
new file mode 100644
index 000000000000..56b2b48a765b
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_GEN_PROGS := dev_in_maps
+
+CFLAGS := -Wall -Werror
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
new file mode 100644
index 000000000000..759f86e7d263
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/dev_in_maps.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+#include <unistd.h>
+#include <stdio.h>
+
+#include <linux/unistd.h>
+#include <linux/types.h>
+#include <linux/mount.h>
+#include <sys/syscall.h>
+#include <sys/stat.h>
+#include <sys/mman.h>
+#include <sched.h>
+#include <fcntl.h>
+
+#include "../../kselftest.h"
+#include "log.h"
+
+static int sys_fsopen(const char *fsname, unsigned int flags)
+{
+ return syscall(__NR_fsopen, fsname, flags);
+}
+
+static int sys_fsconfig(int fd, unsigned int cmd, const char *key, const char *value, int aux)
+{
+ return syscall(__NR_fsconfig, fd, cmd, key, value, aux);
+}
+
+static int sys_fsmount(int fd, unsigned int flags, unsigned int attr_flags)
+{
+ return syscall(__NR_fsmount, fd, flags, attr_flags);
+}
+static int sys_mount(const char *src, const char *tgt, const char *fst,
+ unsigned long flags, const void *data)
+{
+ return syscall(__NR_mount, src, tgt, fst, flags, data);
+}
+static int sys_move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd, to_pathname, flags);
+}
+
+static long get_file_dev_and_inode(void *addr, struct statx *stx)
+{
+ char buf[4096];
+ FILE *mapf;
+
+ mapf = fopen("/proc/self/maps", "r");
+ if (mapf == NULL)
+ return pr_perror("fopen(/proc/self/maps)");
+
+ while (fgets(buf, sizeof(buf), mapf)) {
+ unsigned long start, end;
+ uint32_t maj, min;
+ __u64 ino;
+
+ if (sscanf(buf, "%lx-%lx %*s %*s %x:%x %llu",
+ &start, &end, &maj, &min, &ino) != 5)
+ return pr_perror("unable to parse: %s", buf);
+ if (start == (unsigned long)addr) {
+ stx->stx_dev_major = maj;
+ stx->stx_dev_minor = min;
+ stx->stx_ino = ino;
+ return 0;
+ }
+ }
+
+ return pr_err("unable to find the mapping");
+}
+
+static int ovl_mount(void)
+{
+ int tmpfs, fsfd, ovl;
+
+ fsfd = sys_fsopen("tmpfs", 0);
+ if (fsfd == -1)
+ return pr_perror("fsopen(tmpfs)");
+
+ if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+ return pr_perror("FSCONFIG_CMD_CREATE");
+
+ tmpfs = sys_fsmount(fsfd, 0, 0);
+ if (tmpfs == -1)
+ return pr_perror("fsmount");
+
+ close(fsfd);
+
+ /* overlayfs can't be constructed on top of a detached mount. */
+ if (sys_move_mount(tmpfs, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH))
+ return pr_perror("move_mount");
+ close(tmpfs);
+
+ if (mkdir("/tmp/w", 0755) == -1 ||
+ mkdir("/tmp/u", 0755) == -1 ||
+ mkdir("/tmp/l", 0755) == -1)
+ return pr_perror("mkdir");
+
+ fsfd = sys_fsopen("overlay", 0);
+ if (fsfd == -1)
+ return pr_perror("fsopen(overlay)");
+ if (sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "source", "test", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "lowerdir", "/tmp/l", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "upperdir", "/tmp/u", 0) == -1 ||
+ sys_fsconfig(fsfd, FSCONFIG_SET_STRING, "workdir", "/tmp/w", 0) == -1)
+ return pr_perror("fsconfig");
+ if (sys_fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0) == -1)
+ return pr_perror("fsconfig");
+ ovl = sys_fsmount(fsfd, 0, 0);
+ if (ovl == -1)
+ return pr_perror("fsmount");
+
+ return ovl;
+}
+
+/*
+ * Check that the file device and inode shown in /proc/pid/maps match values
+ * returned by stat(2).
+ */
+static int test(void)
+{
+ struct statx stx, mstx;
+ int ovl, fd;
+ void *addr;
+
+ ovl = ovl_mount();
+ if (ovl == -1)
+ return -1;
+
+ fd = openat(ovl, "test", O_RDWR | O_CREAT, 0644);
+ if (fd == -1)
+ return pr_perror("openat");
+
+ addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_FILE | MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED)
+ return pr_perror("mmap");
+
+ if (get_file_dev_and_inode(addr, &mstx))
+ return -1;
+ if (statx(fd, "", AT_EMPTY_PATH | AT_STATX_SYNC_AS_STAT, STATX_INO, &stx))
+ return pr_perror("statx");
+
+ if (stx.stx_dev_major != mstx.stx_dev_major ||
+ stx.stx_dev_minor != mstx.stx_dev_minor ||
+ stx.stx_ino != mstx.stx_ino)
+ return pr_fail("unmatched dev:ino %x:%x:%llx (expected %x:%x:%llx)\n",
+ mstx.stx_dev_major, mstx.stx_dev_minor, mstx.stx_ino,
+ stx.stx_dev_major, stx.stx_dev_minor, stx.stx_ino);
+
+ ksft_test_result_pass("devices are matched\n");
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int fsfd;
+
+ fsfd = sys_fsopen("overlay", 0);
+ if (fsfd == -1) {
+ ksft_test_result_skip("unable to create overlay mount\n");
+ return 1;
+ }
+ close(fsfd);
+
+ /* Create a new mount namespace to not care about cleaning test mounts. */
+ if (unshare(CLONE_NEWNS) == -1) {
+ ksft_test_result_skip("unable to create a new mount namespace\n");
+ return 1;
+ }
+ if (sys_mount(NULL, "/", NULL, MS_SLAVE | MS_REC, NULL) == -1) {
+ pr_perror("mount");
+ return 1;
+ }
+
+ ksft_set_plan(1);
+
+ if (test())
+ return 1;
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/filesystems/overlayfs/log.h b/tools/testing/selftests/filesystems/overlayfs/log.h
new file mode 100644
index 000000000000..db64df2a8483
--- /dev/null
+++ b/tools/testing/selftests/filesystems/overlayfs/log.h
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __SELFTEST_TIMENS_LOG_H__
+#define __SELFTEST_TIMENS_LOG_H__
+
+#define pr_msg(fmt, lvl, ...) \
+ ksft_print_msg("[%s] (%s:%d)\t" fmt "\n", \
+ lvl, __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define pr_p(func, fmt, ...) func(fmt ": %m", ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) \
+ ({ \
+ ksft_test_result_error(fmt "\n", ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_fail(fmt, ...) \
+ ({ \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__); \
+ -1; \
+ })
+
+#define pr_perror(fmt, ...) pr_p(pr_err, fmt, ##__VA_ARGS__)
+
+#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore b/tools/testing/selftests/filesystems/statmount/.gitignore
index 57d296341304..82a4846cbc4b 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/.gitignore
+++ b/tools/testing/selftests/filesystems/statmount/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-srcu.h
+/*_test
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
new file mode 100644
index 000000000000..07a0d5b545ca
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
+TEST_GEN_PROGS := statmount_test
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/statmount_test.c b/tools/testing/selftests/filesystems/statmount/statmount_test.c
new file mode 100644
index 000000000000..3eafd7da58e2
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/statmount_test.c
@@ -0,0 +1,612 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <stdint.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <sys/param.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/statfs.h>
+#include <linux/mount.h>
+#include <linux/stat.h>
+#include <asm/unistd.h>
+
+#include "../../kselftest.h"
+
+static const char *const known_fs[] = {
+ "9p", "adfs", "affs", "afs", "aio", "anon_inodefs", "apparmorfs",
+ "autofs", "bcachefs", "bdev", "befs", "bfs", "binder", "binfmt_misc",
+ "bpf", "btrfs", "btrfs_test_fs", "ceph", "cgroup", "cgroup2", "cifs",
+ "coda", "configfs", "cpuset", "cramfs", "cxl", "dax", "debugfs",
+ "devpts", "devtmpfs", "dmabuf", "drm", "ecryptfs", "efivarfs", "efs",
+ "erofs", "exfat", "ext2", "ext3", "ext4", "f2fs", "functionfs",
+ "fuse", "fuseblk", "fusectl", "gadgetfs", "gfs2", "gfs2meta", "hfs",
+ "hfsplus", "hostfs", "hpfs", "hugetlbfs", "ibmasmfs", "iomem",
+ "ipathfs", "iso9660", "jffs2", "jfs", "minix", "mqueue", "msdos",
+ "nfs", "nfs4", "nfsd", "nilfs2", "nsfs", "ntfs", "ntfs3", "ocfs2",
+ "ocfs2_dlmfs", "ocxlflash", "omfs", "openpromfs", "overlay", "pipefs",
+ "proc", "pstore", "pvfs2", "qnx4", "qnx6", "ramfs", "reiserfs",
+ "resctrl", "romfs", "rootfs", "rpc_pipefs", "s390_hypfs", "secretmem",
+ "securityfs", "selinuxfs", "smackfs", "smb3", "sockfs", "spufs",
+ "squashfs", "sysfs", "sysv", "tmpfs", "tracefs", "ubifs", "udf",
+ "ufs", "v7", "vboxsf", "vfat", "virtiofs", "vxfs", "xenfs", "xfs",
+ "zonefs", NULL };
+
+static int statmount(uint64_t mnt_id, uint64_t mask, struct statmount *buf,
+ size_t bufsize, unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = mask,
+ };
+
+ return syscall(__NR_statmount, &req, buf, bufsize, flags);
+}
+
+static struct statmount *statmount_alloc(uint64_t mnt_id, uint64_t mask, unsigned int flags)
+{
+ size_t bufsize = 1 << 15;
+ struct statmount *buf = NULL, *tmp = alloca(bufsize);
+ int tofree = 0;
+ int ret;
+
+ for (;;) {
+ ret = statmount(mnt_id, mask, tmp, bufsize, flags);
+ if (ret != -1)
+ break;
+ if (tofree)
+ free(tmp);
+ if (errno != EOVERFLOW)
+ return NULL;
+ bufsize <<= 1;
+ tofree = 1;
+ tmp = malloc(bufsize);
+ if (!tmp)
+ return NULL;
+ }
+ buf = malloc(tmp->size);
+ if (buf)
+ memcpy(buf, tmp, tmp->size);
+ if (tofree)
+ free(tmp);
+
+ return buf;
+}
+
+static void write_file(const char *path, const char *val)
+{
+ int fd = open(path, O_WRONLY);
+ size_t len = strlen(val);
+ int ret;
+
+ if (fd == -1)
+ ksft_exit_fail_msg("opening %s for write: %s\n", path, strerror(errno));
+
+ ret = write(fd, val, len);
+ if (ret == -1)
+ ksft_exit_fail_msg("writing to %s: %s\n", path, strerror(errno));
+ if (ret != len)
+ ksft_exit_fail_msg("short write to %s\n", path);
+
+ ret = close(fd);
+ if (ret == -1)
+ ksft_exit_fail_msg("closing %s\n", path);
+}
+
+static uint64_t get_mnt_id(const char *name, const char *path, uint64_t mask)
+{
+ struct statx sx;
+ int ret;
+
+ ret = statx(AT_FDCWD, path, 0, mask, &sx);
+ if (ret == -1)
+ ksft_exit_fail_msg("retrieving %s mount ID for %s: %s\n",
+ mask & STATX_MNT_ID_UNIQUE ? "unique" : "old",
+ name, strerror(errno));
+ if (!(sx.stx_mask & mask))
+ ksft_exit_fail_msg("no %s mount ID available for %s\n",
+ mask & STATX_MNT_ID_UNIQUE ? "unique" : "old",
+ name);
+
+ return sx.stx_mnt_id;
+}
+
+
+static char root_mntpoint[] = "/tmp/statmount_test_root.XXXXXX";
+static int orig_root;
+static uint64_t root_id, parent_id;
+static uint32_t old_root_id, old_parent_id;
+
+
+static void cleanup_namespace(void)
+{
+ fchdir(orig_root);
+ chroot(".");
+ umount2(root_mntpoint, MNT_DETACH);
+ rmdir(root_mntpoint);
+}
+
+static void setup_namespace(void)
+{
+ int ret;
+ char buf[32];
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ ret = unshare(CLONE_NEWNS|CLONE_NEWUSER);
+ if (ret == -1)
+ ksft_exit_fail_msg("unsharing mountns and userns: %s\n",
+ strerror(errno));
+
+ sprintf(buf, "0 %d 1", uid);
+ write_file("/proc/self/uid_map", buf);
+ write_file("/proc/self/setgroups", "deny");
+ sprintf(buf, "0 %d 1", gid);
+ write_file("/proc/self/gid_map", buf);
+
+ ret = mount("", "/", NULL, MS_REC|MS_PRIVATE, NULL);
+ if (ret == -1)
+ ksft_exit_fail_msg("making mount tree private: %s\n",
+ strerror(errno));
+
+ if (!mkdtemp(root_mntpoint))
+ ksft_exit_fail_msg("creating temporary directory %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ old_parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID);
+ parent_id = get_mnt_id("parent", root_mntpoint, STATX_MNT_ID_UNIQUE);
+
+ orig_root = open("/", O_PATH);
+ if (orig_root == -1)
+ ksft_exit_fail_msg("opening root directory: %s",
+ strerror(errno));
+
+ atexit(cleanup_namespace);
+
+ ret = mount(root_mntpoint, root_mntpoint, NULL, MS_BIND, NULL);
+ if (ret == -1)
+ ksft_exit_fail_msg("mounting temp root %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ ret = chroot(root_mntpoint);
+ if (ret == -1)
+ ksft_exit_fail_msg("chroot to temp root %s: %s\n",
+ root_mntpoint, strerror(errno));
+
+ ret = chdir("/");
+ if (ret == -1)
+ ksft_exit_fail_msg("chdir to root: %s\n", strerror(errno));
+
+ old_root_id = get_mnt_id("root", "/", STATX_MNT_ID);
+ root_id = get_mnt_id("root", "/", STATX_MNT_ID_UNIQUE);
+}
+
+static int setup_mount_tree(int log2_num)
+{
+ int ret, i;
+
+ ret = mount("", "/", NULL, MS_REC|MS_SHARED, NULL);
+ if (ret == -1) {
+ ksft_test_result_fail("making mount tree shared: %s\n",
+ strerror(errno));
+ return -1;
+ }
+
+ for (i = 0; i < log2_num; i++) {
+ ret = mount("/", "/", NULL, MS_BIND, NULL);
+ if (ret == -1) {
+ ksft_test_result_fail("mounting submount %s: %s\n",
+ root_mntpoint, strerror(errno));
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static ssize_t listmount(uint64_t mnt_id, uint64_t last_mnt_id,
+ uint64_t list[], size_t num, unsigned int flags)
+{
+ struct mnt_id_req req = {
+ .size = MNT_ID_REQ_SIZE_VER0,
+ .mnt_id = mnt_id,
+ .param = last_mnt_id,
+ };
+
+ return syscall(__NR_listmount, &req, list, num, flags);
+}
+
+static void test_listmount_empty_root(void)
+{
+ ssize_t res;
+ const unsigned int size = 32;
+ uint64_t list[size];
+
+ res = listmount(LSMT_ROOT, 0, list, size, 0);
+ if (res == -1) {
+ ksft_test_result_fail("listmount: %s\n", strerror(errno));
+ return;
+ }
+ if (res != 1) {
+ ksft_test_result_fail("listmount result is %zi != 1\n", res);
+ return;
+ }
+
+ if (list[0] != root_id) {
+ ksft_test_result_fail("listmount ID doesn't match 0x%llx != 0x%llx\n",
+ (unsigned long long) list[0],
+ (unsigned long long) root_id);
+ return;
+ }
+
+ ksft_test_result_pass("listmount empty root\n");
+}
+
+static void test_statmount_zero_mask(void)
+{
+ struct statmount sm;
+ int ret;
+
+ ret = statmount(root_id, 0, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount zero mask: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != 0) {
+ ksft_test_result_fail("unexpected mask: 0x%llx != 0x0\n",
+ (unsigned long long) sm.mask);
+ return;
+ }
+
+ ksft_test_result_pass("statmount zero mask\n");
+}
+
+static void test_statmount_mnt_basic(void)
+{
+ struct statmount sm;
+ int ret;
+ uint64_t mask = STATMOUNT_MNT_BASIC;
+
+ ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount mnt basic: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != mask) {
+ ksft_test_result_skip("statmount mnt basic unavailable\n");
+ return;
+ }
+
+ if (sm.mnt_id != root_id) {
+ ksft_test_result_fail("unexpected root ID: 0x%llx != 0x%llx\n",
+ (unsigned long long) sm.mnt_id,
+ (unsigned long long) root_id);
+ return;
+ }
+
+ if (sm.mnt_id_old != old_root_id) {
+ ksft_test_result_fail("unexpected old root ID: %u != %u\n",
+ sm.mnt_id_old, old_root_id);
+ return;
+ }
+
+ if (sm.mnt_parent_id != parent_id) {
+ ksft_test_result_fail("unexpected parent ID: 0x%llx != 0x%llx\n",
+ (unsigned long long) sm.mnt_parent_id,
+ (unsigned long long) parent_id);
+ return;
+ }
+
+ if (sm.mnt_parent_id_old != old_parent_id) {
+ ksft_test_result_fail("unexpected old parent ID: %u != %u\n",
+ sm.mnt_parent_id_old, old_parent_id);
+ return;
+ }
+
+ if (sm.mnt_propagation != MS_PRIVATE) {
+ ksft_test_result_fail("unexpected propagation: 0x%llx\n",
+ (unsigned long long) sm.mnt_propagation);
+ return;
+ }
+
+ ksft_test_result_pass("statmount mnt basic\n");
+}
+
+
+static void test_statmount_sb_basic(void)
+{
+ struct statmount sm;
+ int ret;
+ uint64_t mask = STATMOUNT_SB_BASIC;
+ struct statx sx;
+ struct statfs sf;
+
+ ret = statmount(root_id, mask, &sm, sizeof(sm), 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount sb basic: %s\n",
+ strerror(errno));
+ return;
+ }
+ if (sm.size != sizeof(sm)) {
+ ksft_test_result_fail("unexpected size: %u != %u\n",
+ sm.size, (uint32_t) sizeof(sm));
+ return;
+ }
+ if (sm.mask != mask) {
+ ksft_test_result_skip("statmount sb basic unavailable\n");
+ return;
+ }
+
+ ret = statx(AT_FDCWD, "/", 0, 0, &sx);
+ if (ret == -1) {
+ ksft_test_result_fail("stat root failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (sm.sb_dev_major != sx.stx_dev_major ||
+ sm.sb_dev_minor != sx.stx_dev_minor) {
+ ksft_test_result_fail("unexpected sb dev %u:%u != %u:%u\n",
+ sm.sb_dev_major, sm.sb_dev_minor,
+ sx.stx_dev_major, sx.stx_dev_minor);
+ return;
+ }
+
+ ret = statfs("/", &sf);
+ if (ret == -1) {
+ ksft_test_result_fail("statfs root failed: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (sm.sb_magic != sf.f_type) {
+ ksft_test_result_fail("unexpected sb magic: 0x%llx != 0x%lx\n",
+ (unsigned long long) sm.sb_magic,
+ sf.f_type);
+ return;
+ }
+
+ ksft_test_result_pass("statmount sb basic\n");
+}
+
+static void test_statmount_mnt_point(void)
+{
+ struct statmount *sm;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_POINT, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mount point: %s\n",
+ strerror(errno));
+ return;
+ }
+
+ if (strcmp(sm->str + sm->mnt_point, "/") != 0) {
+ ksft_test_result_fail("unexpected mount point: '%s' != '/'\n",
+ sm->str + sm->mnt_point);
+ goto out;
+ }
+ ksft_test_result_pass("statmount mount point\n");
+out:
+ free(sm);
+}
+
+static void test_statmount_mnt_root(void)
+{
+ struct statmount *sm;
+ const char *mnt_root, *last_dir, *last_root;
+
+ last_dir = strrchr(root_mntpoint, '/');
+ assert(last_dir);
+ last_dir++;
+
+ sm = statmount_alloc(root_id, STATMOUNT_MNT_ROOT, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount mount root: %s\n",
+ strerror(errno));
+ return;
+ }
+ mnt_root = sm->str + sm->mnt_root;
+ last_root = strrchr(mnt_root, '/');
+ if (last_root)
+ last_root++;
+ else
+ last_root = mnt_root;
+
+ if (strcmp(last_dir, last_root) != 0) {
+ ksft_test_result_fail("unexpected mount root last component: '%s' != '%s'\n",
+ last_root, last_dir);
+ goto out;
+ }
+ ksft_test_result_pass("statmount mount root\n");
+out:
+ free(sm);
+}
+
+static void test_statmount_fs_type(void)
+{
+ struct statmount *sm;
+ const char *fs_type;
+ const char *const *s;
+
+ sm = statmount_alloc(root_id, STATMOUNT_FS_TYPE, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount fs type: %s\n",
+ strerror(errno));
+ return;
+ }
+ fs_type = sm->str + sm->fs_type;
+ for (s = known_fs; s != NULL; s++) {
+ if (strcmp(fs_type, *s) == 0)
+ break;
+ }
+ if (!s)
+ ksft_print_msg("unknown filesystem type: %s\n", fs_type);
+
+ ksft_test_result_pass("statmount fs type\n");
+ free(sm);
+}
+
+static void test_statmount_string(uint64_t mask, size_t off, const char *name)
+{
+ struct statmount *sm;
+ size_t len, shortsize, exactsize;
+ uint32_t start, i;
+ int ret;
+
+ sm = statmount_alloc(root_id, mask, 0);
+ if (!sm) {
+ ksft_test_result_fail("statmount %s: %s\n", name,
+ strerror(errno));
+ goto out;
+ }
+ if (sm->size < sizeof(*sm)) {
+ ksft_test_result_fail("unexpected size: %u < %u\n",
+ sm->size, (uint32_t) sizeof(*sm));
+ goto out;
+ }
+ if (sm->mask != mask) {
+ ksft_test_result_skip("statmount %s unavailable\n", name);
+ goto out;
+ }
+ len = sm->size - sizeof(*sm);
+ start = ((uint32_t *) sm)[off];
+
+ for (i = start;; i++) {
+ if (i >= len) {
+ ksft_test_result_fail("string out of bounds\n");
+ goto out;
+ }
+ if (!sm->str[i])
+ break;
+ }
+ exactsize = sm->size;
+ shortsize = sizeof(*sm) + i;
+
+ ret = statmount(root_id, mask, sm, exactsize, 0);
+ if (ret == -1) {
+ ksft_test_result_fail("statmount exact size: %s\n",
+ strerror(errno));
+ goto out;
+ }
+ errno = 0;
+ ret = statmount(root_id, mask, sm, shortsize, 0);
+ if (ret != -1 || errno != EOVERFLOW) {
+ ksft_test_result_fail("should have failed with EOVERFLOW: %s\n",
+ strerror(errno));
+ goto out;
+ }
+
+ ksft_test_result_pass("statmount string %s\n", name);
+out:
+ free(sm);
+}
+
+static void test_listmount_tree(void)
+{
+ ssize_t res;
+ const unsigned int log2_num = 4;
+ const unsigned int step = 3;
+ const unsigned int size = (1 << log2_num) + step + 1;
+ size_t num, expect = 1 << log2_num;
+ uint64_t list[size];
+ uint64_t list2[size];
+ size_t i;
+
+
+ res = setup_mount_tree(log2_num);
+ if (res == -1)
+ return;
+
+ num = res = listmount(LSMT_ROOT, 0, list, size, 0);
+ if (res == -1) {
+ ksft_test_result_fail("listmount: %s\n", strerror(errno));
+ return;
+ }
+ if (num != expect) {
+ ksft_test_result_fail("listmount result is %zi != %zi\n",
+ res, expect);
+ return;
+ }
+
+ for (i = 0; i < size - step;) {
+ res = listmount(LSMT_ROOT, i ? list2[i - 1] : 0, list2 + i, step, 0);
+ if (res == -1)
+ ksft_test_result_fail("short listmount: %s\n",
+ strerror(errno));
+ i += res;
+ if (res < step)
+ break;
+ }
+ if (i != num) {
+ ksft_test_result_fail("different number of entries: %zu != %zu\n",
+ i, num);
+ return;
+ }
+ for (i = 0; i < num; i++) {
+ if (list2[i] != list[i]) {
+ ksft_test_result_fail("different value for entry %zu: 0x%llx != 0x%llx\n",
+ i,
+ (unsigned long long) list2[i],
+ (unsigned long long) list[i]);
+ }
+ }
+
+ ksft_test_result_pass("listmount tree\n");
+}
+
+#define str_off(memb) (offsetof(struct statmount, memb) / sizeof(uint32_t))
+
+int main(void)
+{
+ int ret;
+ uint64_t all_mask = STATMOUNT_SB_BASIC | STATMOUNT_MNT_BASIC |
+ STATMOUNT_PROPAGATE_FROM | STATMOUNT_MNT_ROOT |
+ STATMOUNT_MNT_POINT | STATMOUNT_FS_TYPE;
+
+ ksft_print_header();
+
+ ret = statmount(0, 0, NULL, 0, 0);
+ assert(ret == -1);
+ if (errno == ENOSYS)
+ ksft_exit_skip("statmount() syscall not supported\n");
+
+ setup_namespace();
+
+ ksft_set_plan(14);
+ test_listmount_empty_root();
+ test_statmount_zero_mask();
+ test_statmount_mnt_basic();
+ test_statmount_sb_basic();
+ test_statmount_mnt_root();
+ test_statmount_mnt_point();
+ test_statmount_fs_type();
+ test_statmount_string(STATMOUNT_MNT_ROOT, str_off(mnt_root), "mount root");
+ test_statmount_string(STATMOUNT_MNT_POINT, str_off(mnt_point), "mount point");
+ test_statmount_string(STATMOUNT_FS_TYPE, str_off(fs_type), "fs type");
+ test_statmount_string(all_mask, str_off(mnt_root), "mount root & all");
+ test_statmount_string(all_mask, str_off(mnt_point), "mount point & all");
+ test_statmount_string(all_mask, str_off(fs_type), "fs type & all");
+
+ test_listmount_tree();
+
+
+ if (ksft_get_fail_cnt() + ksft_get_error_cnt() > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/firmware/.gitignore b/tools/testing/selftests/firmware/.gitignore
new file mode 100644
index 000000000000..62abc92a94c4
--- /dev/null
+++ b/tools/testing/selftests/firmware/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+fw_namespace
diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile
index 40211cd8f0e6..7992969deaa2 100644
--- a/tools/testing/selftests/firmware/Makefile
+++ b/tools/testing/selftests/firmware/Makefile
@@ -4,7 +4,7 @@ CFLAGS = -Wall \
-O2
TEST_PROGS := fw_run_tests.sh
-TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_lib.sh
+TEST_FILES := fw_fallback.sh fw_filesystem.sh fw_upload.sh fw_lib.sh
TEST_GEN_FILES := fw_namespace
include ../lib.mk
diff --git a/tools/testing/selftests/firmware/config b/tools/testing/selftests/firmware/config
index bf634dda0720..6e402519b117 100644
--- a/tools/testing/selftests/firmware/config
+++ b/tools/testing/selftests/firmware/config
@@ -3,3 +3,4 @@ CONFIG_FW_LOADER=y
CONFIG_FW_LOADER_USER_HELPER=y
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
+CONFIG_FW_UPLOAD=y
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..1a99aea0549e 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -11,6 +11,9 @@ TEST_REQS_FW_SET_CUSTOM_PATH="yes"
TEST_DIR=$(dirname $0)
source $TEST_DIR/fw_lib.sh
+RUN_XZ="xz -C crc32 --lzma2=dict=2MiB"
+RUN_ZSTD="zstd -q"
+
check_mods
check_setup
verify_reqs
@@ -149,6 +152,26 @@ config_unset_into_buf()
echo 0 > $DIR/config_into_buf
}
+config_set_buf_size()
+{
+ echo $1 > $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+ echo $1 > $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+ echo 1 > $DIR/config_partial
+}
+
+config_unset_partial()
+{
+ echo 0 > $DIR/config_partial
+}
+
config_set_sync_direct()
{
echo 1 > $DIR/config_sync_direct
@@ -191,7 +214,7 @@ read_firmwares()
else
fwfile="$FW"
fi
- if [ "$1" = "xzonly" ]; then
+ if [ "$1" = "componly" ]; then
fwfile="${fwfile}-orig"
fi
for i in $(seq 0 3); do
@@ -207,6 +230,35 @@ read_firmwares()
done
}
+read_partial_firmwares()
+{
+ if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+ fwfile="${FW_INTO_BUF}"
+ else
+ fwfile="${FW}"
+ fi
+
+ if [ "$1" = "componly" ]; then
+ fwfile="${fwfile}-orig"
+ fi
+
+ # Strip fwfile down to match partial offset and length
+ partial_data="$(cat $fwfile)"
+ partial_data="${partial_data:$2:$3}"
+
+ for i in $(seq 0 3); do
+ config_set_read_fw_idx $i
+
+ read_firmware="$(cat $DIR/read_firmware)"
+
+ # Verify the contents are what we expect.
+ if [ $read_firmware != $partial_data ]; then
+ echo "request #$i: partial firmware was not loaded" >&2
+ exit 1
+ fi
+ done
+}
+
read_firmwares_expect_nofile()
{
for i in $(seq 0 3); do
@@ -242,6 +294,21 @@ test_batched_request_firmware_into_buf_nofile()
echo "OK"
}
+test_request_partial_firmware_into_buf_nofile()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+ config_reset
+ config_set_name nope-test-firmware.bin
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_firmwares_expect_nofile
+ release_all_firmware
+ echo "OK"
+}
+
test_batched_request_firmware_direct_nofile()
{
echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -345,10 +412,8 @@ test_request_firmware_nowait_custom()
config_unset_uevent
RANDOM_FILE_PATH=$(setup_random_file)
RANDOM_FILE="$(basename $RANDOM_FILE_PATH)"
- if [ "$2" = "both" ]; then
- xz -9 -C crc32 -k $RANDOM_FILE_PATH
- elif [ "$2" = "xzonly" ]; then
- xz -9 -C crc32 $RANDOM_FILE_PATH
+ if [ -n "$2" -a "$2" != "normal" ]; then
+ compress_"$2"_"$COMPRESS_FORMAT" $RANDOM_FILE_PATH
fi
config_set_name $RANDOM_FILE
config_trigger_async
@@ -356,6 +421,47 @@ test_request_firmware_nowait_custom()
echo "OK"
}
+test_request_partial_firmware_into_buf()
+{
+ echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+ config_reset
+ config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+ config_set_into_buf
+ config_set_partial
+ config_set_buf_size $2
+ config_set_file_offset $1
+ config_trigger_sync
+ read_partial_firmwares normal $1 $2
+ release_all_firmware
+ echo "OK"
+}
+
+do_tests ()
+{
+ mode="$1"
+ suffix="$2"
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware_into_buf$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_batched_request_firmware_direct$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_request_firmware_nowait_uevent$suffix $i $mode
+ done
+
+ for i in $(seq 1 5); do
+ test_request_firmware_nowait_custom$suffix $i $mode
+ done
+}
+
# Only continue if batched request triggers are present on the
# test-firmware driver
test_config_present
@@ -363,99 +469,78 @@ test_config_present
# test with the file present
echo
echo "Testing with the file present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i normal
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i normal
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i normal
-done
+do_tests normal
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i normal
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i normal
-done
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
# Test for file not found, errors are expected, the failure would be
# a hung task, which would require a hard reset.
echo
echo "Testing with the file missing..."
-for i in $(seq 1 5); do
- test_batched_request_firmware_nofile $i
-done
+do_tests nofile _nofile
+
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf_nofile $i
-done
+test_request_firmware_compressed ()
+{
+ export COMPRESS_FORMAT="$1"
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct_nofile $i
-done
+ # test with both files present
+ compress_both_"$COMPRESS_FORMAT" $FW
+ compress_both_"$COMPRESS_FORMAT" $FW_INTO_BUF
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent_nofile $i
-done
+ config_set_name $NAME
+ echo
+ echo "Testing with both plain and $COMPRESS_FORMAT files present..."
+ do_tests both
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom_nofile $i
-done
+ # test with only compressed file present
+ mv "$FW" "${FW}-orig"
+ mv "$FW_INTO_BUF" "${FW_INTO_BUF}-orig"
-test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
+ config_set_name $NAME
+ echo
+ echo "Testing with only $COMPRESS_FORMAT file present..."
+ do_tests componly
-# test with both files present
-xz -9 -C crc32 -k $FW
-config_set_name $NAME
-echo
-echo "Testing with both plain and xz files present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i both
-done
+ mv "${FW}-orig" "$FW"
+ mv "${FW_INTO_BUF}-orig" "$FW_INTO_BUF"
+}
+
+compress_both_XZ ()
+{
+ $RUN_XZ -k "$@"
+}
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i both
-done
+compress_componly_XZ ()
+{
+ $RUN_XZ "$@"
+}
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i both
-done
+compress_both_ZSTD ()
+{
+ $RUN_ZSTD -k "$@"
+}
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i both
-done
+compress_componly_ZSTD ()
+{
+ $RUN_ZSTD --rm "$@"
+}
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i both
-done
+if test "$HAS_FW_LOADER_COMPRESS_XZ" = "yes"; then
+ test_request_firmware_compressed XZ
+fi
-# test with only xz file present
-mv "$FW" "${FW}-orig"
-echo
-echo "Testing with only xz file present..."
-for i in $(seq 1 5); do
- test_batched_request_firmware $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_into_buf $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_batched_request_firmware_direct $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_uevent $i xzonly
-done
-
-for i in $(seq 1 5); do
- test_request_firmware_nowait_custom $i xzonly
-done
+if test "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes"; then
+ test_request_firmware_compressed ZSTD
+fi
exit 0
diff --git a/tools/testing/selftests/firmware/fw_lib.sh b/tools/testing/selftests/firmware/fw_lib.sh
index 5b8c0fedee76..7bffd67800bf 100755
--- a/tools/testing/selftests/firmware/fw_lib.sh
+++ b/tools/testing/selftests/firmware/fw_lib.sh
@@ -62,7 +62,9 @@ check_setup()
{
HAS_FW_LOADER_USER_HELPER="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER=y)"
HAS_FW_LOADER_USER_HELPER_FALLBACK="$(kconfig_has CONFIG_FW_LOADER_USER_HELPER_FALLBACK=y)"
- HAS_FW_LOADER_COMPRESS="$(kconfig_has CONFIG_FW_LOADER_COMPRESS=y)"
+ HAS_FW_LOADER_COMPRESS_XZ="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_XZ=y)"
+ HAS_FW_LOADER_COMPRESS_ZSTD="$(kconfig_has CONFIG_FW_LOADER_COMPRESS_ZSTD=y)"
+ HAS_FW_UPLOAD="$(kconfig_has CONFIG_FW_UPLOAD=y)"
PROC_FW_IGNORE_SYSFS_FALLBACK="0"
PROC_FW_FORCE_SYSFS_FALLBACK="0"
@@ -98,9 +100,14 @@ check_setup()
OLD_FWPATH="$(cat /sys/module/firmware_class/parameters/path)"
- if [ "$HAS_FW_LOADER_COMPRESS" = "yes" ]; then
+ if [ "$HAS_FW_LOADER_COMPRESS_XZ" = "yes" ]; then
if ! which xz 2> /dev/null > /dev/null; then
- HAS_FW_LOADER_COMPRESS=""
+ HAS_FW_LOADER_COMPRESS_XZ=""
+ fi
+ fi
+ if [ "$HAS_FW_LOADER_COMPRESS_ZSTD" = "yes" ]; then
+ if ! which zstd 2> /dev/null > /dev/null; then
+ HAS_FW_LOADER_COMPRESS_ZSTD=""
fi
fi
}
@@ -113,6 +120,12 @@ verify_reqs()
exit 0
fi
fi
+ if [ "$TEST_REQS_FW_UPLOAD" = "yes" ]; then
+ if [ ! "$HAS_FW_UPLOAD" = "yes" ]; then
+ echo "firmware upload disabled so ignoring test"
+ exit 0
+ fi
+ fi
}
setup_tmp_file()
diff --git a/tools/testing/selftests/firmware/fw_namespace.c b/tools/testing/selftests/firmware/fw_namespace.c
index 5ebc1aec7923..04757dc7e546 100644
--- a/tools/testing/selftests/firmware/fw_namespace.c
+++ b/tools/testing/selftests/firmware/fw_namespace.c
@@ -17,10 +17,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#ifndef CLONE_NEWNS
-# define CLONE_NEWNS 0x00020000
-#endif
-
static char *fw_path = NULL;
static void die(char *fmt, ...)
@@ -95,7 +91,7 @@ static bool test_fw_in_ns(const char *fw_name, const char *sys_path, bool block_
}
if (block_fw_in_parent_ns)
umount("/lib/firmware");
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
if (unshare(CLONE_NEWNS) != 0) {
@@ -129,7 +125,8 @@ int main(int argc, char **argv)
die("mounting tmpfs to /lib/firmware failed\n");
sys_path = argv[1];
- asprintf(&fw_path, "/lib/firmware/%s", fw_name);
+ if (asprintf(&fw_path, "/lib/firmware/%s", fw_name) < 0)
+ die("error: failed to build full fw_path\n");
setup_fw(fw_path);
diff --git a/tools/testing/selftests/firmware/fw_run_tests.sh b/tools/testing/selftests/firmware/fw_run_tests.sh
index 777377078d5e..f6d95a2d5124 100755
--- a/tools/testing/selftests/firmware/fw_run_tests.sh
+++ b/tools/testing/selftests/firmware/fw_run_tests.sh
@@ -22,6 +22,10 @@ run_tests()
proc_set_force_sysfs_fallback $1
proc_set_ignore_sysfs_fallback $2
$TEST_DIR/fw_fallback.sh
+
+ proc_set_force_sysfs_fallback $1
+ proc_set_ignore_sysfs_fallback $2
+ $TEST_DIR/fw_upload.sh
}
run_test_config_0001()
diff --git a/tools/testing/selftests/firmware/fw_upload.sh b/tools/testing/selftests/firmware/fw_upload.sh
new file mode 100755
index 000000000000..c7a6f06c9adb
--- /dev/null
+++ b/tools/testing/selftests/firmware/fw_upload.sh
@@ -0,0 +1,214 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# This validates the user-initiated fw upload mechanism of the firmware
+# loader. It verifies that one or more firmware devices can be created
+# for a device driver. It also verifies the data transfer, the
+# cancellation support, and the error flows.
+set -e
+
+TEST_REQS_FW_UPLOAD="yes"
+TEST_DIR=$(dirname $0)
+
+progress_states="preparing transferring programming"
+errors="hw-error
+ timeout
+ device-busy
+ invalid-file-size
+ read-write-error
+ flash-wearout"
+error_abort="user-abort"
+fwname1=fw1
+fwname2=fw2
+fwname3=fw3
+
+source $TEST_DIR/fw_lib.sh
+
+check_mods
+check_setup
+verify_reqs
+
+trap "upload_finish" EXIT
+
+upload_finish() {
+ local fwdevs="$fwname1 $fwname2 $fwname3"
+
+ for name in $fwdevs; do
+ if [ -e "$DIR/$name" ]; then
+ echo -n "$name" > "$DIR"/upload_unregister
+ fi
+ done
+}
+
+upload_fw() {
+ local name="$1"
+ local file="$2"
+
+ echo 1 > "$DIR"/"$name"/loading
+ cat "$file" > "$DIR"/"$name"/data
+ echo 0 > "$DIR"/"$name"/loading
+}
+
+verify_fw() {
+ local name="$1"
+ local file="$2"
+
+ echo -n "$name" > "$DIR"/config_upload_name
+ if ! cmp "$file" "$DIR"/upload_read > /dev/null 2>&1; then
+ echo "$0: firmware compare for $name did not match" >&2
+ exit 1
+ fi
+
+ echo "$0: firmware upload for $name works" >&2
+ return 0
+}
+
+inject_error() {
+ local name="$1"
+ local status="$2"
+ local error="$3"
+
+ echo 1 > "$DIR"/"$name"/loading
+ echo -n "inject":"$status":"$error" > "$DIR"/"$name"/data
+ echo 0 > "$DIR"/"$name"/loading
+}
+
+await_status() {
+ local name="$1"
+ local expected="$2"
+ local status
+ local i
+
+ let i=0
+ while [ $i -lt 50 ]; do
+ status=$(cat "$DIR"/"$name"/status)
+ if [ "$status" = "$expected" ]; then
+ return 0;
+ fi
+ sleep 1e-03
+ let i=$i+1
+ done
+
+ echo "$0: Invalid status: Expected $expected, Actual $status" >&2
+ return 1;
+}
+
+await_idle() {
+ local name="$1"
+
+ await_status "$name" "idle"
+ return $?
+}
+
+expect_error() {
+ local name="$1"
+ local expected="$2"
+ local error=$(cat "$DIR"/"$name"/error)
+
+ if [ "$error" != "$expected" ]; then
+ echo "Invalid error: Expected $expected, Actual $error" >&2
+ return 1
+ fi
+
+ return 0
+}
+
+random_firmware() {
+ local bs="$1"
+ local count="$2"
+ local file=$(mktemp -p /tmp uploadfwXXX.bin)
+
+ dd if=/dev/urandom of="$file" bs="$bs" count="$count" > /dev/null 2>&1
+ echo "$file"
+}
+
+test_upload_cancel() {
+ local name="$1"
+ local status
+
+ for status in $progress_states; do
+ inject_error $name $status $error_abort
+ if ! await_status $name $status; then
+ exit 1
+ fi
+
+ echo 1 > "$DIR"/"$name"/cancel
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name "$status":"$error_abort"; then
+ exit 1
+ fi
+ done
+
+ echo "$0: firmware upload cancellation works"
+ return 0
+}
+
+test_error_handling() {
+ local name=$1
+ local status
+ local error
+
+ for status in $progress_states; do
+ for error in $errors; do
+ inject_error $name $status $error
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name "$status":"$error"; then
+ exit 1
+ fi
+
+ done
+ done
+ echo "$0: firmware upload error handling works"
+}
+
+test_fw_too_big() {
+ local name=$1
+ local fw_too_big=`random_firmware 512 5`
+ local expected="preparing:invalid-file-size"
+
+ upload_fw $name $fw_too_big
+ rm -f $fw_too_big
+
+ if ! await_idle $name; then
+ exit 1
+ fi
+
+ if ! expect_error $name $expected; then
+ exit 1
+ fi
+
+ echo "$0: oversized firmware error handling works"
+}
+
+echo -n "$fwname1" > "$DIR"/upload_register
+echo -n "$fwname2" > "$DIR"/upload_register
+echo -n "$fwname3" > "$DIR"/upload_register
+
+test_upload_cancel $fwname1
+test_error_handling $fwname1
+test_fw_too_big $fwname1
+
+fw_file1=`random_firmware 512 4`
+fw_file2=`random_firmware 512 3`
+fw_file3=`random_firmware 512 2`
+
+upload_fw $fwname1 $fw_file1
+upload_fw $fwname2 $fw_file2
+upload_fw $fwname3 $fw_file3
+
+verify_fw ${fwname1} ${fw_file1}
+verify_fw ${fwname2} ${fw_file2}
+verify_fw ${fwname3} ${fw_file3}
+
+echo -n "$fwname1" > "$DIR"/upload_unregister
+echo -n "$fwname2" > "$DIR"/upload_unregister
+echo -n "$fwname3" > "$DIR"/upload_unregister
+
+exit 0
diff --git a/tools/testing/selftests/firmware/settings b/tools/testing/selftests/firmware/settings
new file mode 100644
index 000000000000..085e664ee093
--- /dev/null
+++ b/tools/testing/selftests/firmware/settings
@@ -0,0 +1,8 @@
+# The async firmware timeout is set to 1 second (but ends up being effectively
+# 2 seconds). There are 3 test configs, each done with and without firmware
+# present, each with 2 "nowait" functions tested 5 times. Expected time for a
+# normal execution should be 2 * 3 * 2 * 2 * 5 = 120 seconds for those alone.
+# Additionally, fw_fallback may take 5 seconds for internal timeouts in each
+# of the 3 configs, so at least another 15 seconds are needed. Add another
+# 10 seconds for each testing config: 120 + 15 + 30
+timeout=165
diff --git a/tools/testing/selftests/fpu/.gitignore b/tools/testing/selftests/fpu/.gitignore
new file mode 100644
index 000000000000..d6d12ac1d9c3
--- /dev/null
+++ b/tools/testing/selftests/fpu/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+test_fpu
diff --git a/tools/testing/selftests/fpu/Makefile b/tools/testing/selftests/fpu/Makefile
new file mode 100644
index 000000000000..ea62c176ede7
--- /dev/null
+++ b/tools/testing/selftests/fpu/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+LDLIBS := -lm
+
+TEST_GEN_PROGS := test_fpu
+
+TEST_PROGS := run_test_fpu.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/fpu/run_test_fpu.sh b/tools/testing/selftests/fpu/run_test_fpu.sh
new file mode 100755
index 000000000000..d77be93ec139
--- /dev/null
+++ b/tools/testing/selftests/fpu/run_test_fpu.sh
@@ -0,0 +1,46 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Load kernel module for FPU tests
+
+uid=$(id -u)
+if [ $uid -ne 0 ]; then
+ echo "$0: Must be run as root"
+ exit 1
+fi
+
+if ! which modprobe > /dev/null 2>&1; then
+ echo "$0: You need modprobe installed"
+ exit 4
+fi
+
+if ! modinfo test_fpu > /dev/null 2>&1; then
+ echo "$0: You must have the following enabled in your kernel:"
+ echo "CONFIG_TEST_FPU=m"
+ exit 4
+fi
+
+NR_CPUS=$(getconf _NPROCESSORS_ONLN)
+if [ ! $NR_CPUS ]; then
+ NR_CPUS=1
+fi
+
+modprobe test_fpu
+
+if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then
+ mount -t debugfs none /sys/kernel/debug
+
+ if [ ! -e /sys/kernel/debug/selftest_helpers/test_fpu ]; then
+ echo "$0: Error mounting debugfs"
+ exit 4
+ fi
+fi
+
+echo "Running 1000 iterations on all CPUs... "
+for i in $(seq 1 1000); do
+ for c in $(seq 1 $NR_CPUS); do
+ ./test_fpu &
+ done
+done
+
+rmmod test_fpu
diff --git a/tools/testing/selftests/fpu/test_fpu.c b/tools/testing/selftests/fpu/test_fpu.c
new file mode 100644
index 000000000000..200238522a9d
--- /dev/null
+++ b/tools/testing/selftests/fpu/test_fpu.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* This testcase operates with the test_fpu kernel driver.
+ * It modifies the FPU control register in user mode and calls the kernel
+ * module to perform floating point operations in the kernel. The control
+ * register value should be independent between kernel and user mode.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <fenv.h>
+#include <unistd.h>
+#include <fcntl.h>
+
+const char *test_fpu_path = "/sys/kernel/debug/selftest_helpers/test_fpu";
+
+int main(void)
+{
+ char dummy[1];
+ int fd = open(test_fpu_path, O_RDONLY);
+
+ if (fd < 0) {
+ printf("[SKIP]\tcan't access %s: %s\n",
+ test_fpu_path, strerror(errno));
+ return 0;
+ }
+
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with default rounding mode failed\n");
+ return 1;
+ }
+
+ fesetround(FE_DOWNWARD);
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with downward rounding mode failed\n");
+ return 2;
+ }
+ if (fegetround() != FE_DOWNWARD) {
+ printf("[FAIL]\tusermode rounding mode clobbered\n");
+ return 3;
+ }
+
+ /* Note: the tests up to this point are quite safe and will only return
+ * an error. But the exception mask setting can cause misbehaving kernel
+ * to crash.
+ */
+ feclearexcept(FE_ALL_EXCEPT);
+ feenableexcept(FE_ALL_EXCEPT);
+ if (read(fd, dummy, 1) < 0) {
+ printf("[FAIL]\taccess with fpu exceptions unmasked failed\n");
+ return 4;
+ }
+ if (fegetexcept() != FE_ALL_EXCEPT) {
+ printf("[FAIL]\tusermode fpu exception mask clobbered\n");
+ return 5;
+ }
+
+ printf("[OK]\ttest_fpu\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile
index d6e106fbce11..a1e955d2de4c 100644
--- a/tools/testing/selftests/ftrace/Makefile
+++ b/tools/testing/selftests/ftrace/Makefile
@@ -1,7 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
all:
-TEST_PROGS := ftracetest
+TEST_PROGS_EXTENDED := ftracetest
+TEST_PROGS := ftracetest-ktap
TEST_FILES := test.d settings
EXTRA_CLEAN := $(OUTPUT)/logs/*
diff --git a/tools/testing/selftests/ftrace/ftracetest b/tools/testing/selftests/ftrace/ftracetest
index 8ec1922e974e..25d4e0fca385 100755
--- a/tools/testing/selftests/ftrace/ftracetest
+++ b/tools/testing/selftests/ftrace/ftracetest
@@ -13,6 +13,7 @@ echo "Usage: ftracetest [options] [testcase(s)] [testcase-directory(s)]"
echo " Options:"
echo " -h|--help Show help message"
echo " -k|--keep Keep passed test logs"
+echo " -K|--ktap Output in KTAP format"
echo " -v|--verbose Increase verbosity of test messages"
echo " -vv Alias of -v -v (Show all results in stdout)"
echo " -vvv Alias of -v -v -v (Show all commands immediately)"
@@ -30,6 +31,9 @@ err_ret=1
# kselftest skip code is 4
err_skip=4
+# umount required
+UMOUNT_DIR=""
+
# cgroup RT scheduling prevents chrt commands from succeeding, which
# induces failures in test wakeup tests. Disable for the duration of
# the tests.
@@ -44,6 +48,9 @@ setup() {
cleanup() {
echo $sched_rt_runtime_orig > $sched_rt_runtime
+ if [ -n "${UMOUNT_DIR}" ]; then
+ umount ${UMOUNT_DIR} ||:
+ fi
}
errexit() { # message
@@ -85,6 +92,10 @@ parse_opts() { # opts
KEEP_LOG=1
shift 1
;;
+ --ktap|-K)
+ KTAP=1
+ shift 1
+ ;;
--verbose|-v|-vv|-vvv)
if [ $VERBOSE -eq -1 ]; then
usage "--console can not use with --verbose"
@@ -119,6 +130,7 @@ parse_opts() { # opts
;;
--logdir|-l)
LOG_DIR=$2
+ LINK_PTR=
shift 2
;;
*.tc)
@@ -155,11 +167,13 @@ if [ -z "$TRACING_DIR" ]; then
mount -t tracefs nodev /sys/kernel/tracing ||
errexit "Failed to mount /sys/kernel/tracing"
TRACING_DIR="/sys/kernel/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
# If debugfs exists, then so does /sys/kernel/debug
elif [ -d "/sys/kernel/debug" ]; then
mount -t debugfs nodev /sys/kernel/debug ||
errexit "Failed to mount /sys/kernel/debug"
TRACING_DIR="/sys/kernel/debug/tracing"
+ UMOUNT_DIR=${TRACING_DIR}
else
err_ret=$err_skip
errexit "debugfs and tracefs are not configured in this kernel"
@@ -176,8 +190,12 @@ fi
TOP_DIR=`absdir $0`
TEST_DIR=$TOP_DIR/test.d
TEST_CASES=`find_testcases $TEST_DIR`
-LOG_DIR=$TOP_DIR/logs/`date +%Y%m%d-%H%M%S`/
+LOG_TOP_DIR=$TOP_DIR/logs
+LOG_DATE=`date +%Y%m%d-%H%M%S`
+LOG_DIR=$LOG_TOP_DIR/$LOG_DATE/
+LINK_PTR=$LOG_TOP_DIR/latest
KEEP_LOG=0
+KTAP=0
DEBUG=0
VERBOSE=0
UNSUPPORTED_RESULT=0
@@ -201,6 +219,10 @@ else
LOG_FILE=$LOG_DIR/ftracetest.log
mkdir -p $LOG_DIR || errexit "Failed to make a log directory: $LOG_DIR"
date > $LOG_FILE
+ if [ "x-$LINK_PTR" != "x-" ]; then
+ unlink $LINK_PTR
+ ln -fs $LOG_DATE $LINK_PTR
+ fi
fi
# Define text colors
@@ -229,7 +251,7 @@ prlog() { # messages
newline=
shift
fi
- printf "$*$newline"
+ [ "$KTAP" != "1" ] && printf "$*$newline"
[ "$LOG_FILE" ] && printf "$*$newline" | strip_esc >> $LOG_FILE
}
catlog() { #file
@@ -260,11 +282,11 @@ TOTAL_RESULT=0
INSTANCE=
CASENO=0
+CASENAME=
testcase() { # testfile
CASENO=$((CASENO+1))
- desc=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
- prlog -n "[$CASENO]$INSTANCE$desc"
+ CASENAME=`grep "^#[ \t]*description:" $1 | cut -f2- -d:`
}
checkreq() { # testfile
@@ -277,40 +299,68 @@ test_on_instance() { # testfile
grep -q "^#[ \t]*flags:.*instance" $1
}
+ktaptest() { # result comment
+ if [ "$KTAP" != "1" ]; then
+ return
+ fi
+
+ local result=
+ if [ "$1" = "1" ]; then
+ result="ok"
+ else
+ result="not ok"
+ fi
+ shift
+
+ local comment=$*
+ if [ "$comment" != "" ]; then
+ comment="# $comment"
+ fi
+
+ echo $result $CASENO $INSTANCE$CASENAME $comment
+}
+
eval_result() { # sigval
case $1 in
$PASS)
prlog " [${color_green}PASS${color_reset}]"
+ ktaptest 1
PASSED_CASES="$PASSED_CASES $CASENO"
return 0
;;
$FAIL)
prlog " [${color_red}FAIL${color_reset}]"
+ ktaptest 0
FAILED_CASES="$FAILED_CASES $CASENO"
return 1 # this is a bug.
;;
$UNRESOLVED)
prlog " [${color_blue}UNRESOLVED${color_reset}]"
+ ktaptest 0 UNRESOLVED
UNRESOLVED_CASES="$UNRESOLVED_CASES $CASENO"
return $UNRESOLVED_RESULT # depends on use case
;;
$UNTESTED)
prlog " [${color_blue}UNTESTED${color_reset}]"
+ ktaptest 1 SKIP
UNTESTED_CASES="$UNTESTED_CASES $CASENO"
return 0
;;
$UNSUPPORTED)
prlog " [${color_blue}UNSUPPORTED${color_reset}]"
+ ktaptest 1 SKIP
UNSUPPORTED_CASES="$UNSUPPORTED_CASES $CASENO"
return $UNSUPPORTED_RESULT # depends on use case
;;
$XFAIL)
prlog " [${color_green}XFAIL${color_reset}]"
+ ktaptest 1 XFAIL
XFAILED_CASES="$XFAILED_CASES $CASENO"
return 0
;;
*)
prlog " [${color_blue}UNDEFINED${color_reset}]"
+ ktaptest 0 error
UNDEFINED_CASES="$UNDEFINED_CASES $CASENO"
return 1 # this must be a test bug
;;
@@ -371,6 +421,7 @@ __run_test() { # testfile
run_test() { # testfile
local testname=`basename $1`
testcase $1
+ prlog -n "[$CASENO]$INSTANCE$CASENAME"
if [ ! -z "$LOG_FILE" ] ; then
local testlog=`mktemp $LOG_DIR/${CASENO}-${testname}-log.XXXXXX`
else
@@ -405,6 +456,17 @@ run_test() { # testfile
# load in the helper functions
. $TEST_DIR/functions
+if [ "$KTAP" = "1" ]; then
+ echo "TAP version 13"
+
+ casecount=`echo $TEST_CASES | wc -w`
+ for t in $TEST_CASES; do
+ test_on_instance $t || continue
+ casecount=$((casecount+1))
+ done
+ echo "1..${casecount}"
+fi
+
# Main loop
for t in $TEST_CASES; do
run_test $t
@@ -428,7 +490,7 @@ for t in $TEST_CASES; do
exit 1
fi
done
-(cd $TRACING_DIR; initialize_ftrace) # for cleanup
+(cd $TRACING_DIR; finish_ftrace) # for cleanup
prlog ""
prlog "# of passed: " `echo $PASSED_CASES | wc -w`
@@ -439,6 +501,17 @@ prlog "# of unsupported: " `echo $UNSUPPORTED_CASES | wc -w`
prlog "# of xfailed: " `echo $XFAILED_CASES | wc -w`
prlog "# of undefined(test bug): " `echo $UNDEFINED_CASES | wc -w`
+if [ "$KTAP" = "1" ]; then
+ echo -n "# Totals:"
+ echo -n " pass:"`echo $PASSED_CASES | wc -w`
+ echo -n " fail:"`echo $FAILED_CASES | wc -w`
+ echo -n " xfail:"`echo $XFAILED_CASES | wc -w`
+ echo -n " xpass:0"
+ echo -n " skip:"`echo $UNTESTED_CASES $UNSUPPORTED_CASES | wc -w`
+ echo -n " error:"`echo $UNRESOLVED_CASES $UNDEFINED_CASES | wc -w`
+ echo
+fi
+
cleanup
# if no error, return 0
diff --git a/tools/testing/selftests/ftrace/ftracetest-ktap b/tools/testing/selftests/ftrace/ftracetest-ktap
new file mode 100755
index 000000000000..b3284679ef3a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/ftracetest-ktap
@@ -0,0 +1,8 @@
+#!/bin/sh -e
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# ftracetest-ktap: Wrapper to integrate ftracetest with the kselftest runner
+#
+# Copyright (C) Arm Ltd., 2023
+
+./ftracetest -K
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
new file mode 100644
index 000000000000..d44d09a33a74
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/ringbuffer_subbuf_size.tc
@@ -0,0 +1,95 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Change the ringbuffer sub-buffer size
+# requires: buffer_subbuf_size_kb
+# flags: instance
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_buffer_data_offset() {
+ sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_event_header_size() {
+ type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ total_bits=$((type_len+time_len+array_len))
+ total_bits=$((total_bits+7))
+ echo $((total_bits/8))
+}
+
+get_print_event_buf_offset() {
+ sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
+}
+
+event_header_size=`get_event_header_size`
+print_header_size=`get_print_event_buf_offset`
+
+data_offset=`get_buffer_data_offset`
+
+marker_meta=$((event_header_size+print_header_size))
+
+make_str() {
+ cnt=$1
+ printf -- 'X%.0s' $(seq $cnt)
+}
+
+write_buffer() {
+ size=$1
+
+ str=`make_str $size`
+
+ # clear the buffer
+ echo > trace
+
+ # write the string into the marker
+ echo $str > trace_marker
+
+ echo $str
+}
+
+test_buffer() {
+ size_kb=$1
+ page_size=$((size_kb*1024))
+
+ size=`get_buffer_data_size`
+
+ # the size must be greater than or equal to page_size - data_offset
+ page_size=$((page_size-data_offset))
+ if [ $size -lt $page_size ]; then
+ exit fail
+ fi
+
+ # Now add a little more the meta data overhead will overflow
+
+ str=`write_buffer $size`
+
+ # Make sure the line was broken
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
+
+ if [ "$new_str" = "$str" ]; then
+ exit fail;
+ fi
+
+ # Make sure the entire line can be found
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
+
+ if [ "$new_str" != "$str" ]; then
+ exit fail;
+ fi
+}
+
+ORIG=`cat buffer_subbuf_size_kb`
+
+# Could test bigger sizes than 32K, but then creating the string
+# to write into the ring buffer takes too long
+for a in 4 8 16 32 ; do
+ echo $a > buffer_subbuf_size_kb
+ test_buffer $a
+done
+
+echo $ORIG > buffer_subbuf_size_kb
+
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
new file mode 100644
index 000000000000..63b76cf2a360
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/snapshot1.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Snapshot and tracing_cpumask
+# requires: trace_marker tracing_cpumask snapshot
+# flags: instance
+
+# This testcase is constrived to reproduce a problem that the cpu buffers
+# become unavailable which is due to 'record_disabled' of array_buffer and
+# max_buffer being messed up.
+
+# Store origin cpumask
+ORIG_CPUMASK=`cat tracing_cpumask`
+
+# Stop tracing all cpu
+echo 0 > tracing_cpumask
+
+# Take a snapshot of the main buffer
+echo 1 > snapshot
+
+# Restore origin cpumask, note that there should be some cpus being traced
+echo ${ORIG_CPUMASK} > tracing_cpumask
+
+# Set tracing on
+echo 1 > tracing_on
+
+# Write a log into buffer
+echo "test input 1" > trace_marker
+
+# Ensure the log writed so that cpu buffers are still available
+grep -q "test input 1" trace
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
new file mode 100644
index 000000000000..c45094d1e1d2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/test_ownership.tc
@@ -0,0 +1,114 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test file and directory ownership changes for eventfs
+
+original_group=`stat -c "%g" .`
+original_owner=`stat -c "%u" .`
+
+mount_point=`stat -c '%m' .`
+mount_options=`mount | grep "$mount_point" | sed -e 's/.*(\(.*\)).*/\1/'`
+
+# find another owner and group that is not the original
+other_group=`tac /etc/group | grep -v ":$original_group:" | head -1 | cut -d: -f3`
+other_owner=`tac /etc/passwd | grep -v ":$original_owner:" | head -1 | cut -d: -f3`
+
+# Remove any group ownership already
+new_options=`echo "$mount_options" | sed -e "s/gid=[0-9]*/gid=$other_group/"`
+
+if [ "$new_options" = "$mount_options" ]; then
+ new_options="$mount_options,gid=$other_group"
+ mount_options="$mount_options,gid=$original_group"
+fi
+
+canary="events/timer events/timer/timer_cancel events/timer/timer_cancel/format"
+
+test() {
+ file=$1
+ test_group=$2
+
+ owner=`stat -c "%u" $file`
+ group=`stat -c "%g" $file`
+
+ echo "testing $file $owner=$original_owner and $group=$test_group"
+ if [ $owner -ne $original_owner ]; then
+ exit_fail
+ fi
+ if [ $group -ne $test_group ]; then
+ exit_fail
+ fi
+
+ # Note, the remount does not update ownership so test going to and from owner
+ echo "test owner $file to $other_owner"
+ chown $other_owner $file
+ owner=`stat -c "%u" $file`
+ if [ $owner -ne $other_owner ]; then
+ exit_fail
+ fi
+
+ chown $original_owner $file
+ owner=`stat -c "%u" $file`
+ if [ $owner -ne $original_owner ]; then
+ exit_fail
+ fi
+
+}
+
+run_tests() {
+ for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events
+ test "events" $original_group
+ for d in "." "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched
+ test "events/sched" $original_group
+ for d in "." "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched/sched_switch
+ test "events/sched/sched_switch" $original_group
+ for d in "." "events/sched/sched_switch/enable" $canary; do
+ test "$d" $other_group
+ done
+
+ chgrp $original_group events/sched/sched_switch/enable
+ test "events/sched/sched_switch/enable" $original_group
+ for d in "." $canary; do
+ test "$d" $other_group
+ done
+}
+
+mount -o remount,"$new_options" .
+
+run_tests
+
+mount -o remount,"$mount_options" .
+
+for d in "." "events" "events/sched" "events/sched/sched_switch" "events/sched/sched_switch/enable" $canary; do
+ test "$d" $original_group
+done
+
+# check instances as well
+
+chgrp $other_group instances
+
+instance="$(mktemp -u test-XXXXXX)"
+
+mkdir instances/$instance
+
+cd instances/$instance
+
+run_tests
+
+cd ../..
+
+rmdir instances/$instance
+
+chgrp $original_group instances
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc
new file mode 100644
index 000000000000..9aa0db2b84fc
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/00basic/trace_marker.tc
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Basic tests on writing to trace_marker
+# requires: trace_marker
+# flags: instance
+
+get_buffer_data_size() {
+ sed -ne 's/^.*data.*size:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_buffer_data_offset() {
+ sed -ne 's/^.*data.*offset:\([0-9][0-9]*\).*/\1/p' events/header_page
+}
+
+get_event_header_size() {
+ type_len=`sed -ne 's/^.*type_len.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ time_len=`sed -ne 's/^.*time_delta.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ array_len=`sed -ne 's/^.*array.*:[^0-9]*\([0-9][0-9]*\).*/\1/p' events/header_event`
+ total_bits=$((type_len+time_len+array_len))
+ total_bits=$((total_bits+7))
+ echo $((total_bits/8))
+}
+
+get_print_event_buf_offset() {
+ sed -ne 's/^.*buf.*offset:\([0-9][0-9]*\).*/\1/p' events/ftrace/print/format
+}
+
+event_header_size=`get_event_header_size`
+print_header_size=`get_print_event_buf_offset`
+
+data_offset=`get_buffer_data_offset`
+
+marker_meta=$((event_header_size+print_header_size))
+
+make_str() {
+ cnt=$1
+ # subtract two for \n\0 as marker adds these
+ cnt=$((cnt-2))
+ printf -- 'X%.0s' $(seq $cnt)
+}
+
+write_buffer() {
+ size=$1
+
+ str=`make_str $size`
+
+ # clear the buffer
+ echo > trace
+
+ # write the string into the marker
+ echo -n $str > trace_marker
+
+ echo $str
+}
+
+test_buffer() {
+
+ size=`get_buffer_data_size`
+ oneline_size=$((size-marker_meta))
+ echo size = $size
+ echo meta size = $marker_meta
+
+ # Now add a little more the meta data overhead will overflow
+
+ str=`write_buffer $size`
+
+ # Make sure the line was broken
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; exit}' trace`
+
+ if [ "$new_str" = "$str" ]; then
+ exit fail;
+ fi
+
+ # Make sure the entire line can be found
+ new_str=`awk ' /tracing_mark_write:/ { sub(/^.*tracing_mark_write: /,"");printf "%s", $0; }' trace`
+
+ if [ "$new_str" != "$str" ]; then
+ exit fail;
+ fi
+}
+
+test_buffer
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
new file mode 100644
index 000000000000..b9c21a81d248
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_btfarg.tc
@@ -0,0 +1,78 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove probes with BTF arguments
+# requires: dynamic_events "<argname>":README
+
+KPROBES=
+FPROBES=
+FIELDS=
+
+if grep -qF "p[:[<group>/][<event>]] <place> [<args>]" README ; then
+ KPROBES=yes
+fi
+if grep -qF "f[:[<group>/][<event>]] <func-name>[%return] [<args>]" README ; then
+ FPROBES=yes
+fi
+if grep -qF "<argname>[->field[->field|.field...]]" README ; then
+ FIELDS=yes
+fi
+
+if [ -z "$KPROBES" -a -z "$FPROBES" ] ; then
+ exit_unsupported
+fi
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TP=kfree
+TP2=kmem_cache_alloc
+TP3=getname_flags
+TP4=sched_wakeup
+
+if [ "$FPROBES" ] ; then
+echo "f:fpevent $TP object" >> dynamic_events
+echo "t:tpevent $TP ptr" >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg1' >> dynamic_events
+grep -q "fpevent.*object=object" dynamic_events
+
+echo > dynamic_events
+
+echo "f:fpevent $TP "'$arg*' >> dynamic_events
+echo "t:tpevent $TP "'$arg*' >> dynamic_events
+
+grep -q "fpevent.*object=object" dynamic_events
+grep -q "tpevent.*ptr=ptr" dynamic_events
+! grep -q "tpevent.*_data" dynamic_events
+fi
+
+echo > dynamic_events
+
+if [ "$FIELDS" ] ; then
+echo "t:tpevent ${TP2} obj_size=s->object_size" >> dynamic_events
+echo "f:fpevent ${TP3}%return path=\$retval->name:string" >> dynamic_events
+echo "t:tpevent2 ${TP4} p->se.group_node.next->prev" >> dynamic_events
+
+grep -q "tpevent .*obj_size=s->object_size" dynamic_events
+grep -q "fpevent.*path=\$retval->name:string" dynamic_events
+grep -q 'tpevent2 .*p->se.group_node.next->prev' dynamic_events
+
+echo > dynamic_events
+fi
+
+if [ "$KPROBES" ] ; then
+echo "p:kpevent $TP object" >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+
+echo > dynamic_events
+
+echo "p:kpevent $TP "'$arg*' >> dynamic_events
+grep -q "kpevent.*object=object" dynamic_events
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
new file mode 100644
index 000000000000..c300eb020262
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_eprobe.tc
@@ -0,0 +1,97 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove eprobe events
+# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+EVENT="sys_enter_openat"
+FIELD="filename"
+EPROBE="eprobe_open"
+OPTIONS="file=+0(\$filename):ustring"
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."' -e '(fault)' ` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# test various ways to remove the probe (already tested with just event name)
+
+# With group name
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With just event name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# With group name and system/event and options
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+# Finally make sure what is in the dynamic_events file clears it too
+echo "e:$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+LINE=`sed -e '/$EPROBE/s/^e/-/' < dynamic_events`
+test -d events/eprobes/$EPROBE
+echo "-:eprobes/$EPROBE $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/eprobes/$EPROBE
+
+if grep -q "e\[:\[<group>/]\[<event>]]" README; then
+ echo "e:mygroup/ $SYSTEM/$EVENT $OPTIONS" >> dynamic_events
+ test -d events/mygroup
+ echo "-:mygroup/" >> dynamic_events
+ ! test -d events/mygroup
+fi
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
new file mode 100644
index 000000000000..dc25bcf4f9e2
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe.tc
@@ -0,0 +1,26 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+
+echo "f:myevent1 $PLACE" >> dynamic_events
+echo "f:myevent2 $PLACE%return" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/fprobes/myevent1
+test -d events/fprobes/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
index 68550f97d3c3..13d43f40a6fc 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_kprobe.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=$FUNCTION_FORK
echo "p:myevent1 $PLACE" >> dynamic_events
echo "r:myevent2 $PLACE" >> dynamic_events
@@ -23,4 +23,11 @@ grep -q myevent1 dynamic_events
echo > dynamic_events
+if grep -q "p\[:\[<group>/]\[<event>]]" README; then
+ echo "p:mygroup/ $PLACE" >> dynamic_events
+ test -d events/mygroup
+ echo "-:mygroup/" >> dynamic_events
+ ! test -d events/mygroup
+fi
+
clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
new file mode 100644
index 000000000000..155792eaeee5
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_tprobe.tc
@@ -0,0 +1,27 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - add/remove tracepoint probe events
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+TRACEPOINT1=kmem_cache_alloc
+TRACEPOINT2=kmem_cache_free
+
+echo "t:myevent1 $TRACEPOINT1" >> dynamic_events
+echo "t:myevent2 $TRACEPOINT2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+grep -q myevent2 dynamic_events
+test -d events/tracepoints/myevent1
+test -d events/tracepoints/myevent2
+
+echo "-:myevent2" >> dynamic_events
+
+grep -q myevent1 dynamic_events
+! grep -q myevent2 dynamic_events
+
+echo > dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
index c969be9eb7de..3a0e2885fff5 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/clear_select_events.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=$FUNCTION_FORK
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
new file mode 100644
index 000000000000..4f5e8c665156
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/eprobes_syntax_errors.tc
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Event probe event parser error log check
+# requires: dynamic_events events/syscalls/sys_enter_openat "<attached-group>.<attached-event> [<args>]":README error_log
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'event_probe' "$1" 'dynamic_events'
+}
+
+check_error 'e ^a.' # NO_EVENT_INFO
+check_error 'e ^.b' # NO_EVENT_INFO
+check_error 'e ^a.b' # BAD_ATTACH_EVENT
+check_error 'e syscalls/sys_enter_openat ^foo' # BAD_ATTACH_ARG
+check_error 'e:^/bar syscalls/sys_enter_openat' # NO_GROUP_NAME
+check_error 'e:^12345678901234567890123456789012345678901234567890123456789012345/bar syscalls/sys_enter_openat' # GROUP_TOO_LONG
+
+check_error 'e:^foo.1/bar syscalls/sys_enter_openat' # BAD_GROUP_NAME
+check_error 'e:^ syscalls/sys_enter_openat' # NO_EVENT_NAME
+check_error 'e:foo/^12345678901234567890123456789012345678901234567890123456789012345 syscalls/sys_enter_openat' # EVENT_TOO_LONG
+check_error 'e:foo/^bar.1 syscalls/sys_enter_openat' # BAD_EVENT_NAME
+
+check_error 'e:foo/bar syscalls/sys_enter_openat arg=^dfd' # BAD_FETCH_ARG
+check_error 'e:foo/bar syscalls/sys_enter_openat ^arg=$foo' # BAD_ATTACH_ARG
+
+if grep -q '<attached-group>\.<attached-event>.*\[if <filter>\]' README; then
+ check_error 'e:foo/bar syscalls/sys_enter_openat if ^' # NO_EP_FILTER
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
new file mode 100644
index 000000000000..d183b8a8ecf8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_entry_arg.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Function return probe entry argument access
+# requires: dynamic_events 'f[:[<group>/][<event>]] <func-name>':README 'kernel return probes support:':README
+
+echo 'f:tests/myevent1 vfs_open arg=$arg1' >> dynamic_events
+echo 'f:tests/myevent2 vfs_open%return arg=$arg1' >> dynamic_events
+
+echo 1 > events/tests/enable
+
+echo > trace
+cat trace > /dev/null
+
+function streq() {
+ test $1 = $2
+}
+
+streq `grep -A 1 -m 1 myevent1 trace | sed -r 's/^.*(arg=.*)/\1/' `
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
new file mode 100644
index 000000000000..61877d166451
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -0,0 +1,123 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Fprobe event parser error log check
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+case `uname -m` in
+x86_64|i[3456]86)
+ REG=%ax ;;
+aarch64)
+ REG=%x0 ;;
+*)
+ REG=%r0 ;;
+esac
+
+check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE
+check_error 'f^1a111 vfs_read' # BAD_MAXACT
+check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG
+
+check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent)
+check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR
+check_error 'f:^/bar vfs_read' # NO_GROUP_NAME
+check_error 'f:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
+
+check_error 'f:^foo.1/bar vfs_read' # BAD_GROUP_NAME
+check_error 'f:^ vfs_read' # NO_EVENT_NAME
+check_error 'f:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
+check_error 'f:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
+
+check_error 'f vfs_read ^$stack10000' # BAD_STACK_NUM
+
+check_error 'f vfs_read ^$arg10000' # BAD_ARG_NUM
+
+if !grep -q 'kernel return probes support:' README; then
+check_error 'f vfs_read $retval ^$arg1' # BAD_VAR
+fi
+check_error 'f vfs_read ^$none_var' # BAD_VAR
+check_error 'f vfs_read ^'$REG # BAD_VAR
+
+check_error 'f vfs_read ^@12345678abcde' # BAD_MEM_ADDR
+check_error 'f vfs_read ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 'f vfs_read arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 'f vfs_read arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 'f vfs_read ^+0@0)' # DEREF_NEED_BRACE
+check_error 'f vfs_read ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 'f vfs_read +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 'f vfs_read +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 'f vfs_read ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 'f vfs_read +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 'f vfs_read +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 'f vfs_read +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 'f vfs_read +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 'f vfs_read +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 'f vfs_read @11:^unknown_type' # BAD_TYPE
+check_error 'f vfs_read $stack0:^string' # BAD_STRING
+check_error 'f vfs_read @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 'f vfs_read ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 'f vfs_read ^=@11' # NO_ARG_NAME
+check_error 'f vfs_read ^var.1=@11' # BAD_ARG_NAME
+check_error 'f vfs_read var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 'f vfs_read ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 'f vfs_read arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "f:fprobes/testevent $FUNCTION_FORK" > dynamic_events
+check_error '^f:fprobes/testevent do_exit%return' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "f:fprobes/testevent $FUNCTION_FORK abcd=\\1" > dynamic_events
+check_error "f:fprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "f:fprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^f:fprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE
+fi
+
+# %return suffix errors
+check_error 'f vfs_read^%hoge' # BAD_ADDR_SUFFIX
+
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'f vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'f vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'f vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+if !grep -q 'kernel return probes support:' README; then
+check_error 'f vfs_read%return ^$arg*' # NOFENTRY_ARGS
+fi
+check_error 'f vfs_read ^hoge' # NO_BTFARG
+check_error 'f kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'f kfree%return ^$retval' # NO_RETVAL
+
+if grep -qF "<argname>[->field[->field|.field...]]" README ; then
+check_error 'f vfs_read%return $retval->^foo' # NO_PTR_STRCT
+check_error 'f vfs_read file->^foo' # NO_BTF_FIELD
+check_error 'f vfs_read file^-.foo' # BAD_HYPHEN
+check_error 'f vfs_read ^file:string' # BAD_TYPE4STR
+fi
+
+else
+check_error 'f vfs_read ^$arg*' # NOSUP_BTFARG
+check_error 't kfree ^$arg*' # NOSUP_BTFARG
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
index 16d543eaac88..d3e138e8377f 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/generic_clear_event.tc
@@ -6,7 +6,7 @@
echo 0 > events/enable
echo > dynamic_events
-PLACE=_do_fork
+PLACE=$FUNCTION_FORK
setup_events() {
echo "p:myevent1 $PLACE" >> dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
new file mode 100644
index 000000000000..d3a79da215c8
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/test_duplicates.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - check if duplicate events are caught
+# requires: dynamic_events "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+HAVE_KPROBES=0
+
+if [ -f kprobe_events ]; then
+ HAVE_KPROBES=1
+fi
+
+clear_dynamic_events
+
+# first create dynamic events for eprobes and kprobes.
+
+echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test eprobe for same eprobe, existing kprobe and existing event
+! echo 'e:egroup/eevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+! echo 'e:syscalls/sys_enter_open syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+if [ $HAVE_KPROBES -eq 1 ]; then
+ echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'e:kgroup/kevent syscalls/sys_enter_openat file=+0($filename):ustring' >> dynamic_events
+
+# Test kprobe for same kprobe, existing eprobe and existing event
+ ! echo 'p:kgroup/kevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:egroup/eevent vfs_open file=+0($arg2)' >> dynamic_events
+ ! echo 'p:syscalls/sys_enter_open vfs_open file=+0($arg2)' >> dynamic_events
+
+ echo '-:kgroup/kevent' >> dynamic_events
+fi
+
+echo '-:egroup/eevent' >> dynamic_events
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
new file mode 100644
index 000000000000..da117b8f1d12
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/tprobe_syntax_errors.tc
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Tracepoint probe event parser error log check
+# requires: dynamic_events "t[:[<group>/][<event>]] <tracepoint> [<args>]":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'trace_fprobe' "$1" 'dynamic_events'
+}
+
+check_error 't^100 kfree' # BAD_MAXACT_TYPE
+
+check_error 't ^non_exist_tracepoint' # NO_TRACEPOINT
+check_error 't:^/bar kfree' # NO_GROUP_NAME
+check_error 't:^12345678901234567890123456789012345678901234567890123456789012345/bar kfree' # GROUP_TOO_LONG
+
+check_error 't:^foo.1/bar kfree' # BAD_GROUP_NAME
+check_error 't:^ kfree' # NO_EVENT_NAME
+check_error 't:foo/^12345678901234567890123456789012345678901234567890123456789012345 kfree' # EVENT_TOO_LONG
+check_error 't:foo/^bar.1 kfree' # BAD_EVENT_NAME
+
+check_error 't kfree ^$retval' # RETVAL_ON_PROBE
+check_error 't kfree ^$stack10000' # BAD_STACK_NUM
+
+check_error 't kfree ^$arg10000' # BAD_ARG_NUM
+
+check_error 't kfree ^$none_var' # BAD_VAR
+check_error 't kfree ^%rax' # BAD_VAR
+
+check_error 't kfree ^@12345678abcde' # BAD_MEM_ADDR
+check_error 't kfree ^@+10' # FILE_ON_KPROBE
+
+grep -q "imm-value" README && \
+check_error 't kfree arg1=\^x' # BAD_IMM
+grep -q "imm-string" README && \
+check_error 't kfree arg1=\"abcd^' # IMMSTR_NO_CLOSE
+
+check_error 't kfree ^+0@0)' # DEREF_NEED_BRACE
+check_error 't kfree ^+0ab1(@0)' # BAD_DEREF_OFFS
+check_error 't kfree +0(+0(@0^)' # DEREF_OPEN_BRACE
+
+if grep -A1 "fetcharg:" README | grep -q '\$comm' ; then
+check_error 't kfree +0(^$comm)' # COMM_CANT_DEREF
+fi
+
+check_error 't kfree ^&1' # BAD_FETCH_ARG
+
+
+# We've introduced this limitation with array support
+if grep -q ' <type>\\\[<array-size>\\\]' README; then
+check_error 't kfree +0(^+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(+0(@0))))))))))))))' # TOO_MANY_OPS?
+check_error 't kfree +0(@11):u8[10^' # ARRAY_NO_CLOSE
+check_error 't kfree +0(@11):u8[10]^a' # BAD_ARRAY_SUFFIX
+check_error 't kfree +0(@11):u8[^10a]' # BAD_ARRAY_NUM
+check_error 't kfree +0(@11):u8[^256]' # ARRAY_TOO_BIG
+fi
+
+check_error 't kfree @11:^unknown_type' # BAD_TYPE
+check_error 't kfree $stack0:^string' # BAD_STRING
+check_error 't kfree @11:^b10@a/16' # BAD_BITFIELD
+
+check_error 't kfree ^arg123456789012345678901234567890=@11' # ARG_NAME_TOO_LOG
+check_error 't kfree ^=@11' # NO_ARG_NAME
+check_error 't kfree ^var.1=@11' # BAD_ARG_NAME
+check_error 't kfree var1=@11 ^var1=@12' # USED_ARG_NAME
+check_error 't kfree ^+1234567(+1234567(+1234567(+1234567(+1234567(+1234567(@1234))))))' # ARG_TOO_LONG
+check_error 't kfree arg1=^' # NO_ARG_BODY
+
+
+# multiprobe errors
+if grep -q "Create/append/" README && grep -q "imm-value" README; then
+echo "t:tracepoint/testevent kfree" > dynamic_events
+check_error '^f:tracepoint/testevent kfree' # DIFF_PROBE_TYPE
+
+# Explicitly use printf "%s" to not interpret \1
+printf "%s" "t:tracepoints/testevent kfree abcd=\\1" > dynamic_events
+check_error "t:tracepoints/testevent kfree ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "t:tracepoints/testevent kfree ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^t:tracepoints/testevent kfree abcd=\\1" # SAME_PROBE
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
index e6eb78f0b954..9933ed24f901 100644
--- a/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/event/event-no-pid.tc
@@ -57,6 +57,10 @@ enable_events() {
echo 1 > tracing_on
}
+other_task() {
+ sleep .001 || usleep 1 || sleep 1
+}
+
echo 0 > options/event-fork
do_reset
@@ -94,6 +98,9 @@ child=$!
echo "child = $child"
wait $child
+# Be sure some other events will happen for small systems (e.g. 1 core)
+other_task
+
echo 0 > tracing_on
cnt=`count_pid $mypid`
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
new file mode 100644
index 000000000000..2de7c61d1ae3
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -0,0 +1,67 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event filter function - test event filtering on functions
+# requires: set_event events/kmem/kmem_cache_free/filter
+# flags: instance
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+sample_events() {
+ echo > trace
+ echo 1 > events/kmem/kmem_cache_free/enable
+ echo 1 > tracing_on
+ ls > /dev/null
+ echo 0 > tracing_on
+ echo 0 > events/kmem/kmem_cache_free/enable
+}
+
+echo 0 > tracing_on
+echo 0 > events/enable
+
+echo "Get the most frequently calling function"
+sample_events
+
+target_func=`cut -d: -f3 trace | sed 's/call_site=\([^+]*\)+0x.*/\1/' | sort | uniq -c | sort | tail -n 1 | sed 's/^[ 0-9]*//'`
+if [ -z "$target_func" ]; then
+ exit_fail
+fi
+echo > trace
+
+echo "Test event filter function name"
+echo "call_site.function == $target_func" > events/kmem/kmem_cache_free/filter
+sample_events
+
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+ exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+ exit_fail
+fi
+
+address=`grep " ${target_func}\$" /proc/kallsyms | cut -d' ' -f1`
+
+echo "Test event filter function address"
+echo "call_site.function == 0x$address" > events/kmem/kmem_cache_free/filter
+sample_events
+
+hitcnt=`grep kmem_cache_free trace| grep $target_func | wc -l`
+misscnt=`grep kmem_cache_free trace| grep -v $target_func | wc -l`
+
+if [ $hitcnt -eq 0 ]; then
+ exit_fail
+fi
+
+if [ $misscnt -gt 0 ]; then
+ exit_fail
+fi
+
+reset_events_filter
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
new file mode 100644
index 000000000000..e34c0bdef3ed
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-retval.tc
@@ -0,0 +1,44 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: ftrace - function graph print function return value
+# requires: options/funcgraph-retval options/funcgraph-retval-hex function_graph:tracer
+
+# Make sure that funcgraph-retval works
+
+fail() { # msg
+ echo $1
+ exit_fail
+}
+
+disable_tracing
+clear_trace
+
+# get self PID, can not use $$, because it is PPID
+read PID _ < /proc/self/stat
+
+[ -f set_ftrace_filter ] && echo proc_reg_write > set_ftrace_filter
+[ -f set_ftrace_pid ] && echo ${PID} > set_ftrace_pid
+echo function_graph > current_tracer
+echo 1 > options/funcgraph-retval
+
+set +e
+enable_tracing
+echo > /proc/interrupts
+disable_tracing
+set -e
+
+: "Test printing the error code in signed decimal format"
+echo 0 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep '= -5' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in signed decimal format"
+fi
+
+: "Test printing the error code in hexadecimal format"
+echo 1 > options/funcgraph-retval-hex
+count=`cat trace | grep 'proc_reg_write' | grep 'fffffffb' | wc -l`
+if [ $count -eq 0 ]; then
+ fail "Return value can not be printed in hexadecimal format"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
index acb17ce543d2..80541964b927 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-notrace-pid.tc
@@ -39,7 +39,7 @@ do_test() {
disable_tracing
echo do_execve* > set_ftrace_filter
- echo *do_fork >> set_ftrace_filter
+ echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_notrace_pid
echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
index 9f0a9687c773..2f7211254529 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-pid.tc
@@ -39,7 +39,7 @@ do_test() {
disable_tracing
echo do_execve* > set_ftrace_filter
- echo *do_fork >> set_ftrace_filter
+ echo $FUNCTION_FORK >> set_ftrace_filter
echo $PID > set_ftrace_pid
echo function > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
index 0f41e441c203..191d116b7883 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func-filter-stacktrace.tc
@@ -4,9 +4,9 @@
# requires: set_ftrace_filter
# flags: instance
-echo _do_fork:stacktrace >> set_ftrace_filter
+echo $FUNCTION_FORK:stacktrace >> set_ftrace_filter
-grep -q "_do_fork:stacktrace:unlimited" set_ftrace_filter
+grep -q "$FUNCTION_FORK:stacktrace:unlimited" set_ftrace_filter
(echo "forked"; sleep 1)
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
index 3145b0f1835c..2ad7d4b501cc 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_event_triggers.tc
@@ -38,11 +38,18 @@ cnt_trace() {
test_event_enabled() {
val=$1
+ check_times=10 # wait for 10 * SLEEP_TIME at most
- e=`cat $EVENT_ENABLE`
- if [ "$e" != $val ]; then
- fail "Expected $val but found $e"
- fi
+ while [ $check_times -ne 0 ]; do
+ e=`cat $EVENT_ENABLE`
+ if [ "$e" = $val ]; then
+ return 0
+ fi
+ sleep $SLEEP_TIME
+ check_times=$((check_times - 1))
+ done
+
+ fail "Expected $val but found $e"
}
run_enable_disable() {
@@ -85,7 +92,7 @@ run_enable_disable() {
echo $check_disable > $EVENT_ENABLE
done
sleep $SLEEP_TIME
- echo " make sure it's still works"
+ echo " make sure it still works"
test_event_enabled $check_enable_star
reset_ftrace_filter
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
new file mode 100644
index 000000000000..ccfbfde3d942
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_hotplug.tc
@@ -0,0 +1,42 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# description: ftrace - function trace across cpu hotplug
+# requires: function:tracer
+
+if ! which nproc ; then
+ nproc() {
+ ls -d /sys/devices/system/cpu/cpu[0-9]* | wc -l
+ }
+fi
+
+NP=`nproc`
+
+if [ $NP -eq 1 ] ;then
+ echo "We cannot test cpu hotplug in UP environment"
+ exit_unresolved
+fi
+
+# Find online cpu
+for i in /sys/devices/system/cpu/cpu[1-9]*; do
+ if [ -f $i/online ] && [ "$(cat $i/online)" = "1" ]; then
+ cpu=$i
+ break
+ fi
+done
+
+if [ -z "$cpu" ]; then
+ echo "We cannot test cpu hotplug with a single cpu online"
+ exit_unresolved
+fi
+
+echo 0 > tracing_on
+echo > trace
+
+: "Set $(basename $cpu) offline/online with function tracer enabled"
+echo function > current_tracer
+echo 1 > tracing_on
+(echo 0 > $cpu/online)
+(echo "forked"; sleep 1)
+(echo 1 > $cpu/online)
+echo 0 > tracing_on
+echo nop > current_tracer
diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
index e96e279e0533..25432b8cd5bd 100644
--- a/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
+++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_set_ftrace_file.tc
@@ -19,7 +19,7 @@ fail() { # mesg
FILTER=set_ftrace_filter
FUNC1="schedule"
-FUNC2="do_softirq"
+FUNC2="scheduler_tick"
ALL_FUNCS="#### all functions enabled ####"
diff --git a/tools/testing/selftests/ftrace/test.d/functions b/tools/testing/selftests/ftrace/test.d/functions
index c5dec55b7d95..779f3e62ec90 100644
--- a/tools/testing/selftests/ftrace/test.d/functions
+++ b/tools/testing/selftests/ftrace/test.d/functions
@@ -83,6 +83,27 @@ clear_synthetic_events() { # reset all current synthetic events
done
}
+clear_dynamic_events() { # reset all current dynamic events
+ again=1
+ stop=1
+ # loop mulitple times as some events require other to be removed first
+ while [ $again -eq 1 ]; do
+ stop=$((stop+1))
+ # Prevent infinite loops
+ if [ $stop -gt 10 ]; then
+ break;
+ fi
+ again=2
+ grep -v '^#' dynamic_events|
+ while read line; do
+ del=`echo $line | sed -e 's/^.\([^ ]*\).*/-\1/'`
+ if ! echo "$del" >> dynamic_events; then
+ again=1
+ fi
+ done
+ done
+}
+
initialize_ftrace() { # Reset ftrace to initial-state
# As the initial state, ftrace will be set to nop tracer,
# no events, no triggers, no filters, no function filters,
@@ -93,6 +114,7 @@ initialize_ftrace() { # Reset ftrace to initial-state
reset_events_filter
reset_ftrace_filter
disable_events
+ clear_dynamic_events
[ -f set_event_pid ] && echo > set_event_pid
[ -f set_ftrace_pid ] && echo > set_ftrace_pid
[ -f set_ftrace_notrace ] && echo > set_ftrace_notrace
@@ -102,20 +124,38 @@ initialize_ftrace() { # Reset ftrace to initial-state
[ -f uprobe_events ] && echo > uprobe_events
[ -f synthetic_events ] && echo > synthetic_events
[ -f snapshot ] && echo 0 > snapshot
+
+# Stop tracing while reading the trace file by default, to prevent
+# the test results while checking it and to avoid taking a long time
+# to check the result.
+ [ -f options/pause-on-trace ] && echo 1 > options/pause-on-trace
+
clear_trace
enable_tracing
}
+finish_ftrace() {
+ initialize_ftrace
+# And recover it to default.
+ [ -f options/pause-on-trace ] && echo 0 > options/pause-on-trace
+}
+
check_requires() { # Check required files and tracers
for i in "$@" ; do
+ p=${i%:program}
r=${i%:README}
t=${i%:tracer}
- if [ $t != $i ]; then
+ if [ $p != $i ]; then
+ if ! which $p ; then
+ echo "Required program $p is not found."
+ exit_unresolved
+ fi
+ elif [ $t != $i ]; then
if ! grep -wq $t available_tracers ; then
echo "Required tracer $t is not configured."
exit_unsupported
fi
- elif [ $r != $i ]; then
+ elif [ "$r" != "$i" ]; then
if ! grep -Fq "$r" README ; then
echo "Required feature pattern \"$r\" is not in README."
exit_unsupported
@@ -133,6 +173,13 @@ yield() {
ping $LOCALHOST -c 1 || sleep .001 || usleep 1 || sleep 1
}
+# The fork function in the kernel was renamed from "_do_fork" to
+# "kernel_fork". As older tests should still work with older kernels
+# as well as newer kernels, check which version of fork is used on this
+# kernel so that the tests can use the fork function for the running kernel.
+FUNCTION_FORK=`(if grep '\bkernel_clone\b' /proc/kallsyms > /dev/null; then
+ echo kernel_clone; else echo '_do_fork'; fi)`
+
# Since probe event command may include backslash, explicitly use printf "%s"
# to NOT interpret it.
ftrace_errlog_check() { # err-prefix command-with-error-pos-by-^ command-file
diff --git a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
index 0eb47fbb3f44..42422e425107 100644
--- a/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
+++ b/tools/testing/selftests/ftrace/test.d/instances/instance-event.tc
@@ -39,7 +39,7 @@ instance_read() {
instance_set() {
while :; do
- echo 1 > foo/events/sched/sched_switch
+ echo 1 > foo/events/sched/sched_switch/enable
done 2> /dev/null
}
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
index eba858c21815..2428a3ed78c9 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/add_and_remove.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - adding and removing
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
grep myevent kprobe_events
test -d events/kprobes/myevent
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
index d10bf4f05bc8..010a8b1d6c1d 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/busy_check.tc
@@ -3,7 +3,7 @@
# description: Kprobe dynamic event - busy event check
# requires: kprobe_events
-echo p:myevent _do_fork > kprobe_events
+echo p:myevent $FUNCTION_FORK > kprobe_events
test -d events/kprobes/myevent
echo 1 > events/kprobes/myevent/enable
echo > kprobe_events && exit_fail # this must fail
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
index 61f2ac441aec..a96a1dc7014f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args.tc
@@ -3,13 +3,13 @@
# description: Kprobe dynamic event with arguments
# requires: kprobe_events
-echo 'p:testprobe _do_fork $stack $stack0 +0($stack)' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK \$stack \$stack0 +0(\$stack)" > kprobe_events
grep testprobe kprobe_events | grep -q 'arg1=\$stack arg2=\$stack0 arg3=+0(\$stack)'
test -d events/kprobes/testprobe
echo 1 > events/kprobes/testprobe/enable
( echo "forked")
-grep testprobe trace | grep '_do_fork' | \
+grep testprobe trace | grep "$FUNCTION_FORK" | \
grep -q 'arg1=0x[[:xdigit:]]* arg2=0x[[:xdigit:]]* arg3=0x[[:xdigit:]]*$'
echo 0 > events/kprobes/testprobe/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
new file mode 100644
index 000000000000..e21c9c27ece4
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_char.tc
@@ -0,0 +1,60 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kprobe event char type argument
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+ ARG1=%di
+;;
+i[3456]86)
+ ARG1=%ax
+;;
+aarch64)
+ ARG1=%x0
+;;
+arm*)
+ ARG1=%r0
+;;
+ppc64*)
+ ARG1=%r3
+;;
+ppc*)
+ ARG1=%r3
+;;
+s390*)
+ ARG1=%r2
+;;
+mips*)
+ ARG1=%r4
+;;
+loongarch*)
+ ARG1=%r4
+;;
+riscv*)
+ ARG1=%a0
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_untested
+esac
+
+: "Test get argument (1)"
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test $FUNCTION_FORK" >> kprobe_events
+grep -qe "testprobe.* arg1='t'" trace
+
+echo 0 > events/kprobes/testprobe/enable
+: "Test get argument (2)"
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):char arg2=+0(${ARG1}):char[4]" > kprobe_events
+echo 1 > events/kprobes/testprobe/enable
+echo "p:test $FUNCTION_FORK" >> kprobe_events
+grep -qe "testprobe.* arg1='t' arg2={'t','e','s','t'}" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
index 05aaeed6987f..a053ee2e7d77 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_comm.tc
@@ -5,7 +5,7 @@
grep -A1 "fetcharg:" README | grep -q "\$comm" || exit_unsupported # this is too old
-echo 'p:testprobe _do_fork comm=$comm ' > kprobe_events
+echo "p:testprobe $FUNCTION_FORK comm=\$comm " > kprobe_events
grep testprobe kprobe_events | grep -q 'comm=$comm'
test -d events/kprobes/testprobe
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
index b5fa05443b39..93217d459556 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_string.tc
@@ -22,21 +22,40 @@ ppc64*)
ppc*)
ARG1=%r3
;;
+s390*)
+ ARG1=%r2
+;;
+mips*)
+ ARG1=%r4
+;;
+loongarch*)
+ ARG1=%r4
+;;
+riscv*)
+ ARG1=%a0
+;;
*)
echo "Please implement other architecture here"
exit_untested
esac
: "Test get argument (1)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string" > kprobe_events
+if grep -q eventfs_create_dir available_filter_functions; then
+ DIR_NAME="eventfs_create_dir"
+elif grep -q eventfs_add_dir available_filter_functions; then
+ DIR_NAME="eventfs_add_dir"
+else
+ DIR_NAME="tracefs_create_dir"
+fi
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\"" trace
echo 0 > events/kprobes/testprobe/enable
: "Test get argument (2)"
-echo "p:testprobe tracefs_create_dir arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
+echo "p:testprobe ${DIR_NAME} arg1=+0(${ARG1}):string arg2=+0(${ARG1}):string" > kprobe_events
echo 1 > events/kprobes/testprobe/enable
-echo "p:test _do_fork" >> kprobe_events
+echo "p:test $FUNCTION_FORK" >> kprobe_events
grep -qe "testprobe.* arg1=\"test\" arg2=\"test\"" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
index b8c75a3d003c..717130ed4feb 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_symbol.tc
@@ -14,12 +14,12 @@ elif ! grep "$SYMBOL\$" /proc/kallsyms; then
fi
: "Test get basic types symbol argument"
-echo "p:testprobe_u _do_fork arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
-echo "p:testprobe_s _do_fork arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
+echo "p:testprobe_u $FUNCTION_FORK arg1=@linux_proc_banner:u64 arg2=@linux_proc_banner:u32 arg3=@linux_proc_banner:u16 arg4=@linux_proc_banner:u8" > kprobe_events
+echo "p:testprobe_s $FUNCTION_FORK arg1=@linux_proc_banner:s64 arg2=@linux_proc_banner:s32 arg3=@linux_proc_banner:s16 arg4=@linux_proc_banner:s8" >> kprobe_events
if grep -q "x8/16/32/64" README; then
- echo "p:testprobe_x _do_fork arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
+ echo "p:testprobe_x $FUNCTION_FORK arg1=@linux_proc_banner:x64 arg2=@linux_proc_banner:x32 arg3=@linux_proc_banner:x16 arg4=@linux_proc_banner:x8" >> kprobe_events
fi
-echo "p:testprobe_bf _do_fork arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
+echo "p:testprobe_bf $FUNCTION_FORK arg1=@linux_proc_banner:b8@4/32" >> kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
@@ -27,7 +27,7 @@ grep "testprobe_[usx]:.* arg1=.* arg2=.* arg3=.* arg4=.*" trace
grep "testprobe_bf:.* arg1=.*" trace
: "Test get string symbol argument"
-echo "p:testprobe_str _do_fork arg1=@linux_proc_banner:string" > kprobe_events
+echo "p:testprobe_str $FUNCTION_FORK arg1=@linux_proc_banner:string" > kprobe_events
echo 1 > events/kprobes/enable
(echo "forked")
echo 0 > events/kprobes/enable
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
index 474ca1a9a088..8f1292ad80ff 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_syntax.tc
@@ -32,6 +32,22 @@ ppc*)
GOODREG=%r3
BADREG=%msr
;;
+s390*)
+ GOODREG=%r2
+ BADREG=%s2
+;;
+mips*)
+ GOODREG=%r4
+ BADREG=%r12
+;;
+loongarch*)
+ GOODREG=%r4
+ BADREG=%r12
+;;
+riscv*)
+ GOODREG=%a0
+ BADREG=%a8
+;;
*)
echo "Please implement other architecture here"
exit_untested
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
index 0610e0b5587c..25b7708eb559 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_type.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "x8/16/32/64":README
gen_event() { # Bitsize
- echo "p:testprobe _do_fork \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
+ echo "p:testprobe $FUNCTION_FORK \$stack0:s$1 \$stack0:u$1 \$stack0:x$1 \$stack0:b4@4/$1"
}
check_types() { # s-type u-type x-type bf-type width
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
index a30a9c07290d..d25d01a19778 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_args_user.tc
@@ -9,12 +9,16 @@ grep -A10 "fetcharg:" README | grep -q '\[u\]<offset>' || exit_unsupported
:;: "user-memory access syntax and ustring working on user memory";:
echo 'p:myevent do_sys_open path=+0($arg2):ustring path2=+u0($arg2):string' \
> kprobe_events
+echo 'p:myevent2 do_sys_openat2 path=+0($arg2):ustring path2=+u0($arg2):string' \
+ >> kprobe_events
grep myevent kprobe_events | \
grep -q 'path=+0($arg2):ustring path2=+u0($arg2):string'
echo 1 > events/kprobes/myevent/enable
+echo 1 > events/kprobes/myevent2/enable
echo > /dev/null
echo 0 > events/kprobes/myevent/enable
+echo 0 > events/kprobes/myevent2/enable
grep myevent trace | grep -q 'path="/dev/null" path2="/dev/null"'
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
index 81d8b58c03bc..5556292601a4 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_ftrace.tc
@@ -5,29 +5,29 @@
# prepare
echo nop > current_tracer
-echo _do_fork > set_ftrace_filter
-echo 'p:testprobe _do_fork' > kprobe_events
+echo $FUNCTION_FORK > set_ftrace_filter
+echo "p:testprobe $FUNCTION_FORK" > kprobe_events
# kprobe on / ftrace off
echo 1 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace on
echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe off / ftrace on
echo 0 > events/kprobes/testprobe/enable
echo > trace
( echo "forked")
! grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace on
echo 1 > events/kprobes/testprobe/enable
@@ -35,11 +35,11 @@ echo function > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-grep '_do_fork <-' trace
+grep "$FUNCTION_FORK <-" trace
# kprobe on / ftrace off
echo nop > current_tracer
echo > trace
( echo "forked")
grep testprobe trace
-! grep '_do_fork <-' trace
+! grep "$FUNCTION_FORK <-" trace
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
new file mode 100644
index 000000000000..4f7cc318f331
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_insn_boundary.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register multiple kprobe events in a function
+# requires: kprobe_events
+
+for i in `seq 0 255`; do
+ echo p $FUNCTION_FORK+${i} >> kprobe_events || continue
+done
+
+cat kprobe_events >> $testlog
+
+echo 1 > events/kprobes/enable
+( echo "forked" )
+echo 0 > events/kprobes/enable
+echo > kprobe_events
+echo "Waiting for unoptimizing & freeing"
+sleep 5
+echo "Done"
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
index 366b7e1b6718..f0d5b7777ed7 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_multiprobe.tc
@@ -4,7 +4,7 @@
# requires: kprobe_events "Create/append/":README
# Choose 2 symbols for target
-SYM1=_do_fork
+SYM1=$FUNCTION_FORK
SYM2=do_exit
EVENT_NAME=kprobes/testevent
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
new file mode 100644
index 000000000000..bc9514428dba
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_non_uniq_symbol.tc
@@ -0,0 +1,13 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Test failure of registering kprobe on non unique symbol
+# requires: kprobe_events
+
+SYMBOL='name_show'
+
+# We skip this test on kernel where SYMBOL is unique or does not exist.
+if [ "$(grep -c -E "[[:alnum:]]+ t ${SYMBOL}" /proc/kallsyms)" -le '1' ]; then
+ exit_unsupported
+fi
+
+! echo "p:test_non_unique ${SYMBOL}" > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
new file mode 100644
index 000000000000..9f5d99328086
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_opt_types.tc
@@ -0,0 +1,34 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 Akanksha J N, IBM corporation
+# description: Register/unregister optimized probe
+# requires: kprobe_events
+
+case `uname -m` in
+x86_64)
+;;
+arm*)
+;;
+ppc*)
+;;
+*)
+ echo "Please implement other architecture here"
+ exit_unsupported
+esac
+
+DEFAULT=$(cat /proc/sys/debug/kprobes-optimization)
+echo 1 > /proc/sys/debug/kprobes-optimization
+for i in `seq 0 255`; do
+ echo "p:testprobe $FUNCTION_FORK+${i}" > kprobe_events || continue
+ echo 1 > events/kprobes/enable || continue
+ (echo "forked")
+ PROBE=$(grep $FUNCTION_FORK /sys/kernel/debug/kprobes/list)
+ echo 0 > events/kprobes/enable
+ echo > kprobe_events
+ if echo $PROBE | grep -q OPTIMIZED; then
+ echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+ exit_pass
+ fi
+done
+echo "$DEFAULT" > /proc/sys/debug/kprobes-optimization
+exit_unresolved
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
index b4d834675e59..a16c6a6f6055 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kprobe_syntax_errors.tc
@@ -8,7 +8,7 @@ check_error() { # command-with-error-pos-by-^
}
if grep -q 'r\[maxactive\]' README; then
-check_error 'p^100 vfs_read' # MAXACT_NO_KPROBE
+check_error 'p^100 vfs_read' # BAD_MAXACT_TYPE
check_error 'r^1a111 vfs_read' # BAD_MAXACT
check_error 'r^100000 vfs_read' # MAXACT_TOO_BIG
fi
@@ -21,7 +21,7 @@ check_error 'p:^/bar vfs_read' # NO_GROUP_NAME
check_error 'p:^12345678901234567890123456789012345678901234567890123456789012345/bar vfs_read' # GROUP_TOO_LONG
check_error 'p:^foo.1/bar vfs_read' # BAD_GROUP_NAME
-check_error 'p:foo/^ vfs_read' # NO_EVENT_NAME
+check_error 'p:^ vfs_read' # NO_EVENT_NAME
check_error 'p:foo/^12345678901234567890123456789012345678901234567890123456789012345 vfs_read' # EVENT_TOO_LONG
check_error 'p:foo/^bar.1 vfs_read' # BAD_EVENT_NAME
@@ -86,15 +86,37 @@ esac
# multiprobe errors
if grep -q "Create/append/" README && grep -q "imm-value" README; then
-echo 'p:kprobes/testevent _do_fork' > kprobe_events
+echo "p:kprobes/testevent $FUNCTION_FORK" > kprobe_events
check_error '^r:kprobes/testevent do_exit' # DIFF_PROBE_TYPE
# Explicitly use printf "%s" to not interpret \1
-printf "%s" 'p:kprobes/testevent _do_fork abcd=\1' > kprobe_events
-check_error 'p:kprobes/testevent _do_fork ^bcd=\1' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\1:u8' # DIFF_ARG_TYPE
-check_error 'p:kprobes/testevent _do_fork ^abcd=\"foo"' # DIFF_ARG_TYPE
-check_error '^p:kprobes/testevent _do_fork abcd=\1' # SAME_PROBE
+printf "%s" "p:kprobes/testevent $FUNCTION_FORK abcd=\\1" > kprobe_events
+check_error "p:kprobes/testevent $FUNCTION_FORK ^bcd=\\1" # DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\1:u8" # DIFF_ARG_TYPE
+check_error "p:kprobes/testevent $FUNCTION_FORK ^abcd=\\\"foo\"" # DIFF_ARG_TYPE
+check_error "^p:kprobes/testevent $FUNCTION_FORK abcd=\\1" # SAME_PROBE
+fi
+
+# %return suffix errors
+if grep -q "place (kretprobe): .*%return.*" README; then
+check_error 'p vfs_read^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p ^vfs_read+10%return' # BAD_RETPROBE
+fi
+
+# BTF arguments errors
+if grep -q "<argname>" README; then
+check_error 'p vfs_read args=^$arg*' # BAD_VAR_ARGS
+check_error 'p vfs_read +0(^$arg*)' # BAD_VAR_ARGS
+check_error 'p vfs_read $arg* ^$arg*' # DOUBLE_ARGS
+if !grep -q 'kernel return probes support:' README; then
+check_error 'r vfs_read ^$arg*' # NOFENTRY_ARGS
+fi
+check_error 'p vfs_read+8 ^$arg*' # NOFENTRY_ARGS
+check_error 'p vfs_read ^hoge' # NO_BTFARG
+check_error 'p kfree ^$arg10' # NO_BTFARG (exceed the number of parameters)
+check_error 'r kfree ^$retval' # NO_RETVAL
+else
+check_error 'p vfs_read ^$arg*' # NOSUP_BTFARG
fi
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
index 523fde6d1aa5..197cc2afd404 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_args.tc
@@ -4,14 +4,14 @@
# requires: kprobe_events
# Add new kretprobe event
-echo 'r:testprobe2 _do_fork $retval' > kprobe_events
+echo "r:testprobe2 $FUNCTION_FORK \$retval" > kprobe_events
grep testprobe2 kprobe_events | grep -q 'arg1=\$retval'
test -d events/kprobes/testprobe2
echo 1 > events/kprobes/testprobe2/enable
( echo "forked")
-cat trace | grep testprobe2 | grep -q '<- _do_fork'
+cat trace | grep testprobe2 | grep -q "<- $FUNCTION_FORK"
echo 0 > events/kprobes/testprobe2/enable
echo '-:testprobe2' >> kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
new file mode 100644
index 000000000000..53b82f36a1d0
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_entry_arg.tc
@@ -0,0 +1,18 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe entry argument access
+# requires: kprobe_events 'kernel return probes support:':README
+
+echo 'p:myevent1 vfs_open arg=$arg1' >> kprobe_events
+echo 'r:myevent2 vfs_open arg=$arg1' >> kprobe_events
+
+echo 1 > events/kprobes/enable
+
+echo > trace
+cat trace > /dev/null
+
+function streq() {
+ test $1 = $2
+}
+
+streq `grep -A 1 -m 1 myevent1 trace | sed -r 's/^.*(arg=.*)/\1/' `
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
new file mode 100644
index 000000000000..f07bd15cc033
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/kretprobe_return_suffix.tc
@@ -0,0 +1,21 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Kretprobe %%return suffix test
+# requires: kprobe_events '<symbol>[+<offset>]%return':README
+
+# Test for kretprobe by "r"
+echo 'r:myprobeaccept vfs_read' > kprobe_events
+RESULT1=`cat kprobe_events`
+
+# Test for kretprobe by "%return"
+echo 'p:myprobeaccept vfs_read%return' > kprobe_events
+RESULT2=`cat kprobe_events`
+
+if [ "$RESULT1" != "$RESULT2" ]; then
+ echo "Error: %return suffix didn't make a return probe."
+ echo "r-command: $RESULT1"
+ echo "%return: $RESULT2"
+ exit_fail
+fi
+
+echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
index 312d23780096..be754f5bcf79 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/multiple_kprobes.tc
@@ -25,6 +25,8 @@ if [ $L -ne 256 ]; then
exit_fail
fi
+cat kprobe_events >> $testlog
+
echo 1 > events/kprobes/enable
echo 0 > events/kprobes/enable
echo > kprobe_events
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
index 624269c8d534..68425987a5dd 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/probepoint.tc
@@ -21,7 +21,7 @@ set_offs() { # prev target next
# We have to decode symbol addresses to get correct offsets.
# If the offset is not an instruction boundary, it cause -EILSEQ.
-set_offs `grep -A1 -B1 ${TARGET_FUNC} /proc/kallsyms | cut -f 1 -d " " | xargs`
+set_offs `grep -v __pfx_ /proc/kallsyms | grep -A1 -B1 ${TARGET_FUNC} | cut -f 1 -d " " | xargs`
UINT_TEST=no
# printf "%x" -1 returns (unsigned long)-1.
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
index ff6c44adc8a0..34fb89b0c61f 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/profile.tc
@@ -1,10 +1,10 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
-# description: Kprobe dynamic event - adding and removing
+# description: Kprobe profile
# requires: kprobe_events
! grep -q 'myevent' kprobe_profile
-echo p:myevent _do_fork > kprobe_events
+echo "p:myevent $FUNCTION_FORK" > kprobe_events
grep -q 'myevent[[:space:]]*0[[:space:]]*0$' kprobe_profile
echo 1 > events/kprobes/myevent/enable
( echo "forked" )
diff --git a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
index 7b5b60c3c5a2..c817158b99db 100644
--- a/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/kprobe/uprobe_syntax_errors.tc
@@ -17,4 +17,15 @@ check_error 'p /bin/sh:10(10)^a' # BAD_REFCNT_SUFFIX
check_error 'p /bin/sh:10 ^@+ab' # BAD_FILE_OFFS
check_error 'p /bin/sh:10 ^@symbol' # SYM_ON_UPROBE
+# %return suffix error
+if grep -q "place (uprobe): .*%return.*" README; then
+check_error 'p /bin/sh:10^%hoge' # BAD_ADDR_SUFFIX
+check_error 'p /bin/sh:10(10)^%return' # BAD_REFCNT_SUFFIX
+fi
+
+# symstr is not supported by uprobe
+if grep -q ".*symstr.*" README; then
+check_error 'p /bin/sh:10 $stack0:^symstr' # BAD_TYPE
+fi
+
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
index 22bff122b933..ba1038953873 100644
--- a/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
+++ b/tools/testing/selftests/ftrace/test.d/preemptirq/irqsoff_tracer.tc
@@ -46,10 +46,10 @@ cat trace
grep -q "tracer: preemptoff" trace || fail
# Check the end of the section
-egrep -q "5.....us : <stack trace>" trace || fail
+grep -E -q "5.....us : <stack trace>" trace || fail
# Check for 500ms of latency
-egrep -q "latency: 5..... us" trace || fail
+grep -E -q "latency: 5..... us" trace || fail
reset_tracer
@@ -69,10 +69,10 @@ cat trace
grep -q "tracer: irqsoff" trace || fail
# Check the end of the section
-egrep -q "5.....us : <stack trace>" trace || fail
+grep -E -q "5.....us : <stack trace>" trace || fail
# Check for 500ms of latency
-egrep -q "latency: 5..... us" trace || fail
+grep -E -q "latency: 5..... us" trace || fail
reset_tracer
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
index 11be10e1bf96..e8f0fac9a110 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup.tc
@@ -1,12 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup tracer
-# requires: wakeup:tracer
-
-if ! which chrt ; then
- echo "chrt is not found. This test requires nice command."
- exit_unresolved
-fi
+# requires: wakeup:tracer chrt:program
echo wakeup > current_tracer
echo 1 > tracing_on
diff --git a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
index 3a77198b3c69..79807656785b 100644
--- a/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
+++ b/tools/testing/selftests/ftrace/test.d/tracer/wakeup_rt.tc
@@ -1,12 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: Test wakeup RT tracer
-# requires: wakeup_rt:tracer
-
-if ! which chrt ; then
- echo "chrt is not found. This test requires chrt command."
- exit_unresolved
-fi
+# requires: wakeup_rt:tracer chrt:program
echo wakeup_rt > current_tracer
echo 1 > tracing_on
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
index 41119e0440e9..04c5dd7d0acc 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-field-variable-support.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test field variable support
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
index 7449a4b8f1f9..f7447d800899 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-inter-event-combined-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event combined histogram trigger
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
@@ -25,12 +25,12 @@ echo 'wakeup_latency u64 lat pid_t pid' >> synthetic_events
echo 'hist:keys=pid:ts1=common_timestamp.usecs if comm=="ping"' >> events/sched/sched_wakeup/trigger
echo 'hist:keys=next_pid:wakeup_lat=common_timestamp.usecs-$ts1:onmatch(sched.sched_wakeup).wakeup_latency($wakeup_lat,next_pid) if next_comm=="ping"' > events/sched/sched_switch/trigger
-echo 'waking+wakeup_latency u64 lat; pid_t pid' >> synthetic_events
-echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking+wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
-echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking+wakeup_latency/trigger
+echo 'waking_plus_wakeup_latency u64 lat; pid_t pid' >> synthetic_events
+echo 'hist:keys=pid,lat:sort=pid,lat:ww_lat=$waking_lat+$wakeup_lat:onmatch(synthetic.wakeup_latency).waking_plus_wakeup_latency($ww_lat,pid)' >> events/synthetic/wakeup_latency/trigger
+echo 'hist:keys=pid,lat:sort=pid,lat' >> events/synthetic/waking_plus_wakeup_latency/trigger
ping $LOCALHOST -c 3
-if ! grep -q "pid:" events/synthetic/waking+wakeup_latency/hist; then
+if ! grep -q "pid:" events/synthetic/waking_plus_wakeup_latency/hist; then
fail "Failed to create combined histogram"
fi
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
index adaabb873ed4..91339c130832 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onchange-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onchange action
-# requires: set_event "onchange(var)":README
+# requires: set_event "onchange(var)":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
index 20e39471052e..d645abcf11c4 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
index f4b03ab7c287..c369247efb35 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmatch-onmax-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmatch-onmax action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
index 71c9b5911c70..e28dc5f11b2b 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-onmax-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger onmax action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
index 67fa328b830f..147967e86584 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-snapshot-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger snapshot action
-# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README
+# requires: set_event snapshot events/sched/sched_process_fork/hist "onchange(var)":README "snapshot()":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
new file mode 100644
index 000000000000..c2a8ab01e13b
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-eprobe.tc
@@ -0,0 +1,53 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger eprobe on synthetic event
+# requires: dynamic_events synthetic_events events/syscalls/sys_enter_openat/hist "e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>]":README
+
+echo 0 > events/enable
+
+clear_dynamic_events
+
+SYSTEM="syscalls"
+START="sys_enter_openat"
+END="sys_exit_openat"
+FIELD="filename"
+SYNTH="synth_open"
+EPROBE="eprobe_open"
+
+echo "$SYNTH unsigned long filename; long ret;" > synthetic_events
+echo "hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo "hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+
+echo "e:$EPROBE synthetic/$SYNTH file=+0(\$filename):ustring ret=\$ret:s64" >> dynamic_events
+
+grep -q "$SYNTH" dynamic_events
+grep -q "$EPROBE" dynamic_events
+test -d events/synthetic/$SYNTH
+test -d events/eprobes/$EPROBE
+
+echo 1 > events/eprobes/$EPROBE/enable
+ls
+echo 0 > events/eprobes/$EPROBE/enable
+
+content=`grep '^ *ls-' trace | grep 'file='`
+nocontent=`grep '^ *ls-' trace | grep 'file=' | grep -v -e '"/' -e '"."'` || true
+
+if [ -z "$content" ]; then
+ exit_fail
+fi
+
+if [ ! -z "$nocontent" ]; then
+ exit_fail
+fi
+
+echo "-:$EPROBE" >> dynamic_events
+echo '!'"hist:keys=common_pid:filename=\$__arg__1,ret=ret:onmatch($SYSTEM.$START).trace($SYNTH,\$filename,\$ret)" > events/$SYSTEM/$END/trigger
+echo '!'"hist:keys=common_pid:__arg__1=$FIELD" > events/$SYSTEM/$START/trigger
+echo '!'"$SYNTH u64 filename; s64 ret;" >> synthetic_events
+
+! grep -q "$SYNTH" dynamic_events
+! grep -q "$EPROBE" dynamic_events
+! test -d events/synthetic/$SYNTH
+! test -d events/eprobes/$EPROBE
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
new file mode 100644
index 000000000000..174376ddbc6c
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-dynstring.tc
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "' >> synthetic_events":README ping:program
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event"
+
+echo 'ping_test_latency u64 lat; char filename[]' > synthetic_events
+if [ ! -d events/synthetic/ping_test_latency ]; then
+ fail "Failed to create ping_test_latency synthetic event"
+fi
+
+echo "Test create histogram for synthetic event using trace action and dynamic strings"
+echo "Test histogram dynamic string variables,simple expression support and trace action"
+
+echo 'hist:key=pid:filenamevar=filename:ts0=common_timestamp.usecs' > events/sched/sched_process_exec/trigger
+echo 'hist:key=pid:lat=common_timestamp.usecs-$ts0:onmatch(sched.sched_process_exec).ping_test_latency($lat,$filenamevar) if comm == "ping"' > events/sched/sched_process_exit/trigger
+echo 'hist:keys=filename,lat:sort=filename,lat' > events/synthetic/ping_test_latency/trigger
+
+ping $LOCALHOST -c 5
+
+if ! grep -q "ping" events/synthetic/ping_test_latency/hist; then
+ fail "Failed to create dynamic string trace action inter-event histogram"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
new file mode 100644
index 000000000000..d0cd91a93069
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack-legacy.tc
@@ -0,0 +1,24 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param (legacy stack)
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "long[] stack' >> synthetic_events":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+# Test the old stacktrace keyword (for backward compatibility)
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+ fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
new file mode 100644
index 000000000000..8f1cc9a86a06
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-stack.tc
@@ -0,0 +1,23 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test inter-event histogram trigger trace action with dynamic string param
+# requires: set_event synthetic_events events/sched/sched_process_exec/hist "can be any field, or the special string 'common_stacktrace'":README
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+echo "Test create synthetic event with stack"
+
+echo 's:wake_lat pid_t pid; u64 delta; unsigned long[] stack;' > dynamic_events
+echo 'hist:keys=next_pid:ts=common_timestamp.usecs,st=common_stacktrace if prev_state == 1||prev_state == 2' >> events/sched/sched_switch/trigger
+echo 'hist:keys=prev_pid:delta=common_timestamp.usecs-$ts,s=$st:onmax($delta).trace(wake_lat,prev_pid,$delta,$s)' >> events/sched/sched_switch/trigger
+echo 1 > events/synthetic/wake_lat/enable
+sleep 1
+
+if ! grep -q "=>.*sched" trace; then
+ fail "Failed to create synthetic event with stack"
+fi
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
index 59216f3cfb12..366f1f3ad906 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic-event-syntax.tc
@@ -32,6 +32,10 @@ grep "myevent[[:space:]]u64 var1" synthetic_events
# it is not possible to add same name event
! echo "myevent u64 var2" >> synthetic_events
+# make sure !synthetic event doesn't require a field
+echo "!myevent" >> synthetic_events
+echo "myevent u64 var1" >> synthetic_events
+
# Non-append open will cleanup all events and add new one
echo "myevent u64 var2" > synthetic_events
@@ -66,6 +70,12 @@ grep "myevent[[:space:]]unsigned long var" synthetic_events
echo "myevent char var[10]" > synthetic_events
grep "myevent[[:space:]]char\[10\] var" synthetic_events
+if grep -q 'long\[\]' README; then
+ # test stacktrace type
+ echo "myevent unsigned long[] var" > synthetic_events
+ grep "myevent[[:space:]]unsigned long\[\] var" synthetic_events
+fi
+
do_reset
exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
new file mode 100644
index 000000000000..b927ee54c02d
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-synthetic_event_syntax_errors.tc
@@ -0,0 +1,38 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test synthetic_events syntax parser errors
+# requires: synthetic_events error_log "' >> synthetic_events":README
+
+check_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'synthetic_events'
+}
+
+check_dyn_error() { # command-with-error-pos-by-^
+ ftrace_errlog_check 'synthetic_events' "$1" 'dynamic_events'
+}
+
+check_error 'myevent ^chr arg' # INVALID_TYPE
+check_error 'myevent ^unsigned arg' # INCOMPLETE_TYPE
+
+check_error 'myevent char ^str]; int v' # BAD_NAME
+check_error '^mye-vent char str[]' # BAD_NAME
+check_error 'myevent char ^st-r[]' # BAD_NAME
+
+check_error 'myevent char str;^[]' # INVALID_FIELD
+check_error 'myevent char str; ^int' # INVALID_FIELD
+
+check_error 'myevent char ^str[; int v' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[kdjdk]' # INVALID_ARRAY_SPEC
+check_error 'myevent char ^str[257]' # INVALID_ARRAY_SPEC
+
+check_error '^mye;vent char str[]' # INVALID_CMD
+check_error '^myevent ; char str[]' # INVALID_CMD
+check_error '^myevent; char str[]' # INVALID_CMD
+check_error '^myevent ;char str[]' # INVALID_CMD
+check_error '^; char str[]' # INVALID_CMD
+check_error '^;myevent char str[]' # INVALID_CMD
+check_error '^myevent' # INVALID_CMD
+
+check_dyn_error '^s:junk/myevent char str[' # INVALID_DYN_CMD
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
index c126d2350a6d..d7312047ce28 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/inter-event/trigger-trace-action-hist.tc
@@ -1,7 +1,7 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
# description: event trigger - test inter-event histogram trigger trace action
-# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README
+# requires: set_event synthetic_events events/sched/sched_process_fork/hist "trace(<synthetic_event>":README ping:program
fail() { #msg
echo $1
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
new file mode 100644
index 000000000000..05ffba299dbf
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-expressions.tc
@@ -0,0 +1,63 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: event trigger - test histogram expression parsing
+# requires: set_event events/sched/sched_process_fork/trigger events/sched/sched_process_fork/hist error_log "<var1>=<field|var_ref|numeric_literal>":README
+
+
+fail() { #msg
+ echo $1
+ exit_fail
+}
+
+test_hist_expr() { # test_name expression expected_val
+ trigger="events/sched/sched_process_fork/trigger"
+
+ reset_trigger_file $trigger
+
+ echo "Test hist trigger expressions - $1"
+
+ echo "hist:keys=common_pid:x=$2" > $trigger
+
+ for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+
+ actual=`grep -o 'x=[[:digit:]]*' $trigger | awk -F= '{ print $2 }'`
+
+ if [ $actual != $3 ]; then
+ fail "Failed hist trigger expression evaluation: Expression: $2 Expected: $3, Actual: $actual"
+ fi
+
+ reset_trigger_file $trigger
+}
+
+check_error() { # test_name command-with-error-pos-by-^
+ trigger="events/sched/sched_process_fork/trigger"
+
+ echo "Test hist trigger expressions - $1"
+ ftrace_errlog_check 'hist:sched:sched_process_fork' "$2" $trigger
+}
+
+test_hist_expr "Variable assignment" "123" "123"
+
+test_hist_expr "Subtraction not associative" "16-8-4-2" "2"
+
+test_hist_expr "Division not associative" "64/8/4/2" "1"
+
+test_hist_expr "Same precedence operators (+,-) evaluated left to right" "16-8+4+2" "14"
+
+test_hist_expr "Same precedence operators (*,/) evaluated left to right" "4*3/2*2" "12"
+
+test_hist_expr "Multiplication evaluated before addition/subtraction" "4+3*2-2" "8"
+
+test_hist_expr "Division evaluated before addition/subtraction" "4+6/2-2" "5"
+
+# err pos for "too many subexpressions" is dependent on where
+# the last subexpression was detected. This can vary depending
+# on how the expression tree was generated.
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=32+^10*3/20-4'
+check_error "Too many subexpressions" 'hist:keys=common_pid:x=^1+2+3+4+5'
+
+check_error "Unary minus not supported in subexpression" 'hist:keys=common_pid:x=-(^1)+2'
+
+check_error "Division by zero" 'hist:keys=common_pid:x=3/^0'
+
+exit 0
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
index 4562e13cb26b..717898894ef7 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist-mod.tc
@@ -40,7 +40,7 @@ grep "id: \(unknown_\|sys_\)" events/raw_syscalls/sys_exit/hist > /dev/null || \
reset_trigger
-echo "Test histgram with log2 modifier"
+echo "Test histogram with log2 modifier"
echo 'hist:keys=bytes_req.log2' > events/kmem/kmalloc/trigger
for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
diff --git a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
index 2950bfbc6fce..adae72665500 100644
--- a/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
+++ b/tools/testing/selftests/ftrace/test.d/trigger/trigger-hist.tc
@@ -39,6 +39,24 @@ grep "parent_comm: $COMM" events/sched/sched_process_fork/hist > /dev/null || \
reset_trigger
+echo "Test histogram with sym modifier"
+
+echo 'hist:keys=call_site.sym' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]* *}' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym modifier on kmalloc call_site did not work"
+
+reset_trigger
+
+echo "Test histogram with sym-offset modifier"
+
+echo 'hist:keys=call_site.sym-offset' > events/kmem/kmalloc/trigger
+for i in `seq 1 10` ; do ( echo "forked" > /dev/null); done
+grep '{ call_site: \[[0-9a-f][0-9a-f]*\] [_a-zA-Z][_a-zA-Z]*+0x[0-9a-f][0-9a-f]*' events/kmem/kmalloc/hist > /dev/null || \
+ fail "sym-offset modifier on kmalloc call_site did not work"
+
+reset_trigger
+
echo "Test histogram with sort key"
echo 'hist:keys=parent_pid,child_pid:sort=child_pid.ascending' > events/sched/sched_process_fork/trigger
diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile
index 12631f0076a1..11e157d7533b 100644
--- a/tools/testing/selftests/futex/Makefile
+++ b/tools/testing/selftests/futex/Makefile
@@ -11,7 +11,7 @@ all:
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
if [ -e $$DIR/$(TEST_PROGS) ]; then \
rsync -a $$DIR/$(TEST_PROGS) $$BUILD_TARGET/; \
fi \
@@ -32,6 +32,6 @@ override define CLEAN
@for DIR in $(SUBDIRS); do \
BUILD_TARGET=$(OUTPUT)/$$DIR; \
mkdir $$BUILD_TARGET -p; \
- make OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@;\
done
endef
diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore
index 0efcd494daab..fbcbdb6963b3 100644
--- a/tools/testing/selftests/futex/functional/.gitignore
+++ b/tools/testing/selftests/futex/functional/.gitignore
@@ -6,3 +6,6 @@ futex_wait_private_mapped_file
futex_wait_timeout
futex_wait_uninitialized_heap
futex_wait_wouldblock
+futex_wait
+futex_requeue
+futex_waitv
diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile
index 23207829ec75..a392d0917b4e 100644
--- a/tools/testing/selftests/futex/functional/Makefile
+++ b/tools/testing/selftests/futex/functional/Makefile
@@ -1,25 +1,26 @@
# SPDX-License-Identifier: GPL-2.0
-INCLUDES := -I../include -I../../
-CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES)
+INCLUDES := -I../include -I../../ $(KHDR_INCLUDES)
+CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) $(KHDR_INCLUDES)
LDLIBS := -lpthread -lrt
-HEADERS := \
+LOCAL_HDRS := \
../include/futextest.h \
../include/atomic.h \
../include/logging.h
-TEST_GEN_FILES := \
+TEST_GEN_PROGS := \
futex_wait_timeout \
futex_wait_wouldblock \
futex_requeue_pi \
futex_requeue_pi_signal_restart \
futex_requeue_pi_mismatched_ops \
futex_wait_uninitialized_heap \
- futex_wait_private_mapped_file
+ futex_wait_private_mapped_file \
+ futex_wait \
+ futex_requeue \
+ futex_waitv
TEST_PROGS := run.sh
top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
+DEFAULT_INSTALL_HDR_PATH := 1
include ../../lib.mk
-
-$(TEST_GEN_FILES): $(HEADERS)
diff --git a/tools/testing/selftests/futex/functional/futex_requeue.c b/tools/testing/selftests/futex/functional/futex_requeue.c
new file mode 100644
index 000000000000..51485be6eb2f
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_requeue.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <limits.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-requeue"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+
+volatile futex_t *f1;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(f1, *f1, &to, 0))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t waiter[10];
+ int res, ret = RET_PASS;
+ int c, i;
+ volatile futex_t _f1 = 0;
+ volatile futex_t f2 = 0;
+
+ f1 = &_f1;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(2);
+ ksft_print_msg("%s: Test futex_requeue\n",
+ basename(argv[0]));
+
+ /*
+ * Requeue a waiter from f1 to f2, and wake f2.
+ */
+ if (pthread_create(&waiter[0], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Requeuing 1 futex from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 0, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+
+ info("Waking 1 futex at f2\n");
+ res = futex_wake(&f2, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_requeue simple returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue simple succeeds\n");
+ }
+
+
+ /*
+ * Create 10 waiters at f1. At futex_requeue, wake 3 and requeue 7.
+ * At futex_wake, wake INT_MAX (should be exactly 7).
+ */
+ for (i = 0; i < 10; i++) {
+ if (pthread_create(&waiter[i], NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+ }
+
+ usleep(WAKE_WAIT_US);
+
+ info("Waking 3 futexes at f1 and requeuing 7 futexes from f1 to f2\n");
+ res = futex_cmp_requeue(f1, 0, &f2, 3, 7, 0);
+ if (res != 10) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ }
+
+ info("Waking INT_MAX futexes at f2\n");
+ res = futex_wake(&f2, INT_MAX, 0);
+ if (res != 7) {
+ ksft_test_result_fail("futex_requeue many returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_requeue many succeeds\n");
+ }
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
index 1ee5518ee6b7..7f3ca5c78df1 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c
@@ -17,6 +17,8 @@
*
*****************************************************************************/
+#define _GNU_SOURCE
+
#include <errno.h>
#include <limits.h>
#include <pthread.h>
@@ -358,6 +360,7 @@ out:
int main(int argc, char *argv[])
{
+ const char *test_name;
int c, ret;
while ((c = getopt(argc, argv, "bchlot:v:")) != -1) {
@@ -397,6 +400,14 @@ int main(int argc, char *argv[])
"\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n",
broadcast, locked, owner, timeout_ns);
+ ret = asprintf(&test_name,
+ "%s broadcast=%d locked=%d owner=%d timeout=%ldns",
+ TEST_NAME, broadcast, locked, owner, timeout_ns);
+ if (ret < 0) {
+ ksft_print_msg("Failed to generate test name\n");
+ test_name = TEST_NAME;
+ }
+
/*
* FIXME: unit_test is obsolete now that we parse options and the
* various style of runs are done by run.sh - simplify the code and move
@@ -404,6 +415,6 @@ int main(int argc, char *argv[])
*/
ret = unit_test(broadcast, locked, owner, timeout_ns);
- print_result(TEST_NAME, ret);
+ print_result(test_name, ret);
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
index f8c43ce8fe66..c6b8f32990c8 100644
--- a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
+++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c
@@ -184,7 +184,7 @@ int main(int argc, char *argv[])
/*
* If res is non-zero, we either requeued the waiter or hit an
* error, break out and handle it. If it is zero, then the
- * signal may have hit before the the waiter was blocked on f1.
+ * signal may have hit before the waiter was blocked on f1.
* Try again.
*/
if (res > 0) {
diff --git a/tools/testing/selftests/futex/functional/futex_wait.c b/tools/testing/selftests/futex/functional/futex_wait.c
new file mode 100644
index 000000000000..685140d9b93d
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_wait.c
@@ -0,0 +1,171 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright Collabora Ltd., 2021
+ *
+ * futex cmp requeue test by André Almeida <andrealmeid@collabora.com>
+ */
+
+#include <pthread.h>
+#include <sys/shm.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "logging.h"
+#include "futextest.h"
+
+#define TEST_NAME "futex-wait"
+#define timeout_ns 30000000
+#define WAKE_WAIT_US 10000
+#define SHM_PATH "futex_shm_file"
+
+void *futex;
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+static void *waiterfn(void *arg)
+{
+ struct timespec to;
+ unsigned int flags = 0;
+
+ if (arg)
+ flags = *((unsigned int *) arg);
+
+ to.tv_sec = 0;
+ to.tv_nsec = timeout_ns;
+
+ if (futex_wait(futex, 0, &to, flags))
+ printf("waiter failed errno %d\n", errno);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ int res, ret = RET_PASS, fd, c, shm_id;
+ u_int32_t f_private = 0, *shared_data;
+ unsigned int flags = FUTEX_PRIVATE_FLAG;
+ pthread_t waiter;
+ void *shm;
+
+ futex = &f_private;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(3);
+ ksft_print_msg("%s: Test futex_wait\n", basename(argv[0]));
+
+ /* Testing a private futex */
+ info("Calling private futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, (void *) &flags))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling private futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake private succeeds\n");
+ }
+
+ /* Testing an anon page shared memory */
+ shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ futex = shared_data;
+
+ info("Calling shared (page anon) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (page anon) futex_wake on futex: %p\n", futex);
+ res = futex_wake(futex, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (page anon) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (page anon) succeeds\n");
+ }
+
+
+ /* Testing a file backed shared memory */
+ fd = open(SHM_PATH, O_RDWR | O_CREAT, S_IRUSR | S_IWUSR);
+ if (fd < 0) {
+ perror("open");
+ exit(1);
+ }
+
+ if (ftruncate(fd, sizeof(f_private))) {
+ perror("ftruncate");
+ exit(1);
+ }
+
+ shm = mmap(NULL, sizeof(f_private), PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (shm == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ memcpy(shm, &f_private, sizeof(f_private));
+
+ futex = shm;
+
+ info("Calling shared (file backed) futex_wait on futex: %p\n", futex);
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ info("Calling shared (file backed) futex_wake on futex: %p\n", futex);
+ res = futex_wake(shm, 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared (file backed) returned: %d %s\n",
+ errno, strerror(errno));
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wake shared (file backed) succeeds\n");
+ }
+
+ /* Freeing resources */
+ shmdt(shared_data);
+ munmap(shm, sizeof(f_private));
+ remove(SHM_PATH);
+ close(fd);
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
index ee55e6d389a3..d183f878360b 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_timeout.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c
@@ -11,21 +11,20 @@
*
* HISTORY
* 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com>
+ * 2021-Apr-26: More test cases by André Almeida <andrealmeid@collabora.com>
*
*****************************************************************************/
-#include <errno.h>
-#include <getopt.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <time.h>
+#include <pthread.h>
#include "futextest.h"
+#include "futex2test.h"
#include "logging.h"
#define TEST_NAME "futex-wait-timeout"
static long timeout_ns = 100000; /* 100us default timeout */
+static futex_t futex_pi;
+static pthread_barrier_t barrier;
void usage(char *prog)
{
@@ -37,12 +36,76 @@ void usage(char *prog)
VQUIET, VCRITICAL, VINFO);
}
+/*
+ * Get a PI lock and hold it forever, so the main thread lock_pi will block
+ * and we can test the timeout
+ */
+void *get_pi_lock(void *arg)
+{
+ int ret;
+ volatile futex_t lock = 0;
+
+ ret = futex_lock_pi(&futex_pi, NULL, 0, 0);
+ if (ret != 0)
+ error("futex_lock_pi failed\n", ret);
+
+ pthread_barrier_wait(&barrier);
+
+ /* Blocks forever */
+ ret = futex_wait(&lock, 0, NULL, 0);
+ error("futex_wait failed\n", ret);
+
+ return NULL;
+}
+
+/*
+ * Check if the function returned the expected error
+ */
+static void test_timeout(int res, int *ret, char *test_name, int err)
+{
+ if (!res || errno != err) {
+ ksft_test_result_fail("%s returned %d\n", test_name,
+ res < 0 ? errno : res);
+ *ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("%s succeeds\n", test_name);
+ }
+}
+
+/*
+ * Calculate absolute timeout and correct overflow
+ */
+static int futex_get_abs_timeout(clockid_t clockid, struct timespec *to,
+ long timeout_ns)
+{
+ if (clock_gettime(clockid, to)) {
+ error("clock_gettime failed\n", errno);
+ return errno;
+ }
+
+ to->tv_nsec += timeout_ns;
+
+ if (to->tv_nsec >= 1000000000) {
+ to->tv_sec++;
+ to->tv_nsec -= 1000000000;
+ }
+
+ return 0;
+}
+
int main(int argc, char *argv[])
{
futex_t f1 = FUTEX_INITIALIZER;
- struct timespec to;
int res, ret = RET_PASS;
+ struct timespec to;
+ pthread_t thread;
int c;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1,
+ .flags = FUTEX_32,
+ .__reserved = 0
+ };
while ((c = getopt(argc, argv, "cht:v:")) != -1) {
switch (c) {
@@ -65,22 +128,79 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(9);
ksft_print_msg("%s: Block on a futex and wait for timeout\n",
basename(argv[0]));
ksft_print_msg("\tArguments: timeout=%ldns\n", timeout_ns);
- /* initialize timeout */
+ pthread_barrier_init(&barrier, NULL, 2);
+ pthread_create(&thread, NULL, get_pi_lock, NULL);
+
+ /* initialize relative timeout */
to.tv_sec = 0;
to.tv_nsec = timeout_ns;
- info("Calling futex_wait on f1: %u @ %p\n", f1, &f1);
- res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG);
- if (!res || errno != ETIMEDOUT) {
- fail("futex_wait returned %d\n", ret < 0 ? errno : ret);
- ret = RET_FAIL;
- }
+ res = futex_wait(&f1, f1, &to, 0);
+ test_timeout(res, &ret, "futex_wait relative", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_bitset realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_BITSET with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_bitset(&f1, f1, &to, 1, 0);
+ test_timeout(res, &ret, "futex_wait_bitset monotonic", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_wait_requeue_pi realtime", ETIMEDOUT);
+
+ /* FUTEX_WAIT_REQUEUE_PI with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_wait_requeue_pi(&f1, f1, &futex_pi, &to, 0);
+ test_timeout(res, &ret, "futex_wait_requeue_pi monotonic", ETIMEDOUT);
+
+ /* Wait until the other thread calls futex_lock_pi() */
+ pthread_barrier_wait(&barrier);
+ pthread_barrier_destroy(&barrier);
+ /*
+ * FUTEX_LOCK_PI with CLOCK_REALTIME
+ * Due to historical reasons, FUTEX_LOCK_PI supports only realtime
+ * clock, but requires the caller to not set CLOCK_REALTIME flag.
+ *
+ * If you call FUTEX_LOCK_PI with a monotonic clock, it'll be
+ * interpreted as a realtime clock, and (unless you mess your machine's
+ * time or your time machine) the monotonic clock value is always
+ * smaller than realtime and the syscall will timeout immediately.
+ */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_lock_pi(&futex_pi, &to, 0, 0);
+ test_timeout(res, &ret, "futex_lock_pi realtime", ETIMEDOUT);
+
+ /* Test operations that don't support FUTEX_CLOCK_REALTIME */
+ res = futex_lock_pi(&futex_pi, NULL, 0, FUTEX_CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_lock_pi invalid timeout flag", ENOSYS);
+
+ /* futex_waitv with CLOCK_MONOTONIC */
+ if (futex_get_abs_timeout(CLOCK_MONOTONIC, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+ test_timeout(res, &ret, "futex_waitv monotonic", ETIMEDOUT);
+
+ /* futex_waitv with CLOCK_REALTIME */
+ if (futex_get_abs_timeout(CLOCK_REALTIME, &to, timeout_ns))
+ return RET_FAIL;
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_REALTIME);
+ test_timeout(res, &ret, "futex_waitv realtime", ETIMEDOUT);
- print_result(TEST_NAME, ret);
+ ksft_print_cnts();
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
index 0ae390ff8164..7d7a6a06cdb7 100644
--- a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
+++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c
@@ -22,6 +22,7 @@
#include <string.h>
#include <time.h>
#include "futextest.h"
+#include "futex2test.h"
#include "logging.h"
#define TEST_NAME "futex-wait-wouldblock"
@@ -42,6 +43,12 @@ int main(int argc, char *argv[])
futex_t f1 = FUTEX_INITIALIZER;
int res, ret = RET_PASS;
int c;
+ struct futex_waitv waitv = {
+ .uaddr = (uintptr_t)&f1,
+ .val = f1+1,
+ .flags = FUTEX_32,
+ .__reserved = 0
+ };
while ((c = getopt(argc, argv, "cht:v:")) != -1) {
switch (c) {
@@ -61,18 +68,44 @@ int main(int argc, char *argv[])
}
ksft_print_header();
- ksft_set_plan(1);
+ ksft_set_plan(2);
ksft_print_msg("%s: Test the unexpected futex value in FUTEX_WAIT\n",
basename(argv[0]));
info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG);
if (!res || errno != EWOULDBLOCK) {
- fail("futex_wait returned: %d %s\n",
- res ? errno : res, res ? strerror(errno) : "");
+ ksft_test_result_fail("futex_wait returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_wait\n");
}
- print_result(TEST_NAME, ret);
+ if (clock_gettime(CLOCK_MONOTONIC, &to)) {
+ error("clock_gettime failed\n", errno);
+ return errno;
+ }
+
+ to.tv_nsec += timeout_ns;
+
+ if (to.tv_nsec >= 1000000000) {
+ to.tv_sec++;
+ to.tv_nsec -= 1000000000;
+ }
+
+ info("Calling futex_waitv on f1: %u @ %p with val=%u\n", f1, &f1, f1+1);
+ res = futex_waitv(&waitv, 1, 0, &to, CLOCK_MONOTONIC);
+ if (!res || errno != EWOULDBLOCK) {
+ ksft_test_result_pass("futex_waitv returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv\n");
+ }
+
+ ksft_print_cnts();
return ret;
}
diff --git a/tools/testing/selftests/futex/functional/futex_waitv.c b/tools/testing/selftests/futex/functional/futex_waitv.c
new file mode 100644
index 000000000000..a94337f677e1
--- /dev/null
+++ b/tools/testing/selftests/futex/functional/futex_waitv.c
@@ -0,0 +1,237 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * futex_waitv() test by André Almeida <andrealmeid@collabora.com>
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <pthread.h>
+#include <stdint.h>
+#include <sys/shm.h>
+#include "futextest.h"
+#include "futex2test.h"
+#include "logging.h"
+
+#define TEST_NAME "futex-wait"
+#define WAKE_WAIT_US 10000
+#define NR_FUTEXES 30
+static struct futex_waitv waitv[NR_FUTEXES];
+u_int32_t futexes[NR_FUTEXES] = {0};
+
+void usage(char *prog)
+{
+ printf("Usage: %s\n", prog);
+ printf(" -c Use color\n");
+ printf(" -h Display this help message\n");
+ printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n",
+ VQUIET, VCRITICAL, VINFO);
+}
+
+void *waiterfn(void *arg)
+{
+ struct timespec to;
+ int res;
+
+ /* setting absolute timeout for futex2 */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res < 0) {
+ ksft_test_result_fail("futex_waitv returned: %d %s\n",
+ errno, strerror(errno));
+ } else if (res != NR_FUTEXES - 1) {
+ ksft_test_result_fail("futex_waitv returned: %d, expecting %d\n",
+ res, NR_FUTEXES - 1);
+ }
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ pthread_t waiter;
+ int res, ret = RET_PASS;
+ struct timespec to;
+ int c, i;
+
+ while ((c = getopt(argc, argv, "cht:v:")) != -1) {
+ switch (c) {
+ case 'c':
+ log_color(1);
+ break;
+ case 'h':
+ usage(basename(argv[0]));
+ exit(0);
+ case 'v':
+ log_verbosity(atoi(optarg));
+ break;
+ default:
+ usage(basename(argv[0]));
+ exit(1);
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(7);
+ ksft_print_msg("%s: Test FUTEX_WAITV\n",
+ basename(argv[0]));
+
+ for (i = 0; i < NR_FUTEXES; i++) {
+ waitv[i].uaddr = (uintptr_t)&futexes[i];
+ waitv[i].flags = FUTEX_32 | FUTEX_PRIVATE_FLAG;
+ waitv[i].val = 0;
+ waitv[i].__reserved = 0;
+ }
+
+ /* Private waitv */
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, FUTEX_PRIVATE_FLAG);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv private\n");
+ }
+
+ /* Shared waitv */
+ for (i = 0; i < NR_FUTEXES; i++) {
+ int shm_id = shmget(IPC_PRIVATE, 4096, IPC_CREAT | 0666);
+
+ if (shm_id < 0) {
+ perror("shmget");
+ exit(1);
+ }
+
+ unsigned int *shared_data = shmat(shm_id, NULL, 0);
+
+ *shared_data = 0;
+ waitv[i].uaddr = (uintptr_t)shared_data;
+ waitv[i].flags = FUTEX_32;
+ waitv[i].val = 0;
+ waitv[i].__reserved = 0;
+ }
+
+ if (pthread_create(&waiter, NULL, waiterfn, NULL))
+ error("pthread_create failed\n", errno);
+
+ usleep(WAKE_WAIT_US);
+
+ res = futex_wake(u64_to_ptr(waitv[NR_FUTEXES - 1].uaddr), 1, 0);
+ if (res != 1) {
+ ksft_test_result_fail("futex_wake shared returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv shared\n");
+ }
+
+ for (i = 0; i < NR_FUTEXES; i++)
+ shmdt(u64_to_ptr(waitv[i].uaddr));
+
+ /* Testing a waiter without FUTEX_32 flag */
+ waitv[0].flags = FUTEX_PRIVATE_FLAG;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv without FUTEX_32\n");
+ }
+
+ /* Testing a waiter with an unaligned address */
+ waitv[0].flags = FUTEX_PRIVATE_FLAG | FUTEX_32;
+ waitv[0].uaddr = 1;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_wake private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv with an unaligned address\n");
+ }
+
+ /* Testing a NULL address for waiters.uaddr */
+ waitv[0].uaddr = 0x00000000;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(waitv, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv NULL address in waitv.uaddr\n");
+ }
+
+ /* Testing a NULL address for *waiters */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_MONOTONIC);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv NULL address in *waiters\n");
+ }
+
+ /* Testing an invalid clockid */
+ if (clock_gettime(CLOCK_MONOTONIC, &to))
+ error("gettime64 failed\n", errno);
+
+ to.tv_sec++;
+
+ res = futex_waitv(NULL, NR_FUTEXES, 0, &to, CLOCK_TAI);
+ if (res == EINVAL) {
+ ksft_test_result_fail("futex_waitv private returned: %d %s\n",
+ res ? errno : res,
+ res ? strerror(errno) : "");
+ ret = RET_FAIL;
+ } else {
+ ksft_test_result_pass("futex_waitv invalid clockid\n");
+ }
+
+ ksft_print_cnts();
+ return ret;
+}
diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh
index 1acb6ace1680..5ccd599da6c3 100755
--- a/tools/testing/selftests/futex/functional/run.sh
+++ b/tools/testing/selftests/futex/functional/run.sh
@@ -73,3 +73,12 @@ echo
echo
./futex_wait_uninitialized_heap $COLOR
./futex_wait_private_mapped_file $COLOR
+
+echo
+./futex_wait $COLOR
+
+echo
+./futex_requeue $COLOR
+
+echo
+./futex_waitv $COLOR
diff --git a/tools/testing/selftests/futex/include/futex2test.h b/tools/testing/selftests/futex/include/futex2test.h
new file mode 100644
index 000000000000..9d305520e849
--- /dev/null
+++ b/tools/testing/selftests/futex/include/futex2test.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Futex2 library addons for futex tests
+ *
+ * Copyright 2021 Collabora Ltd.
+ */
+#include <stdint.h>
+
+#define u64_to_ptr(x) ((void *)(uintptr_t)(x))
+
+/**
+ * futex_waitv - Wait at multiple futexes, wake on any
+ * @waiters: Array of waiters
+ * @nr_waiters: Length of waiters array
+ * @flags: Operation flags
+ * @timo: Optional timeout for operation
+ */
+static inline int futex_waitv(volatile struct futex_waitv *waiters, unsigned long nr_waiters,
+ unsigned long flags, struct timespec *timo, clockid_t clockid)
+{
+ return syscall(__NR_futex_waitv, waiters, nr_waiters, flags, timo, clockid);
+}
diff --git a/tools/testing/selftests/gpio/.gitignore b/tools/testing/selftests/gpio/.gitignore
index 4c69408f3e84..ededb077a3a6 100644
--- a/tools/testing/selftests/gpio/.gitignore
+++ b/tools/testing/selftests/gpio/.gitignore
@@ -1,2 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
-gpio-mockup-chardev
+gpio-mockup-cdev
+gpio-chip-info
+gpio-line-name
diff --git a/tools/testing/selftests/gpio/Makefile b/tools/testing/selftests/gpio/Makefile
index 32bdc978a711..e0884390447d 100644
--- a/tools/testing/selftests/gpio/Makefile
+++ b/tools/testing/selftests/gpio/Makefile
@@ -1,32 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-VAR_CFLAGS := $(shell pkg-config --cflags mount 2>/dev/null)
-VAR_LDLIBS := $(shell pkg-config --libs mount 2>/dev/null)
-ifeq ($(VAR_LDLIBS),)
-VAR_LDLIBS := -lmount -I/usr/include/libmount
-endif
-
-CFLAGS += -O2 -g -std=gnu99 -Wall -I../../../../usr/include/ $(VAR_CFLAGS)
-LDLIBS += $(VAR_LDLIBS)
-
-TEST_PROGS := gpio-mockup.sh
+TEST_PROGS := gpio-mockup.sh gpio-sim.sh
TEST_FILES := gpio-mockup-sysfs.sh
-TEST_PROGS_EXTENDED := gpio-mockup-chardev
-
-GPIODIR := $(realpath ../../../gpio)
-GPIOOBJ := gpio-utils.o
-
-all: $(TEST_PROGS_EXTENDED)
+TEST_GEN_PROGS_EXTENDED := gpio-mockup-cdev gpio-chip-info gpio-line-name
+CFLAGS += -O2 -g -Wall $(KHDR_INCLUDES)
-override define CLEAN
- $(RM) $(TEST_PROGS_EXTENDED)
- $(MAKE) -C $(GPIODIR) OUTPUT=$(GPIODIR)/ clean
-endef
-
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
-
-$(TEST_PROGS_EXTENDED): $(GPIODIR)/$(GPIOOBJ)
-
-$(GPIODIR)/$(GPIOOBJ):
- $(MAKE) OUTPUT=$(GPIODIR)/ -C $(GPIODIR)
diff --git a/tools/testing/selftests/gpio/config b/tools/testing/selftests/gpio/config
index abaa6902b7b6..409a8532facc 100644
--- a/tools/testing/selftests/gpio/config
+++ b/tools/testing/selftests/gpio/config
@@ -1,2 +1,4 @@
CONFIG_GPIOLIB=y
+CONFIG_GPIO_CDEV=y
CONFIG_GPIO_MOCKUP=m
+CONFIG_GPIO_SIM=m
diff --git a/tools/testing/selftests/gpio/gpio-chip-info.c b/tools/testing/selftests/gpio/gpio-chip-info.c
new file mode 100644
index 000000000000..fdc07e742fba
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-chip-info.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading chip information.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-chip-info <chip path> [name|label|num-lines]\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpiochip_info info;
+ int fd, ret;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, &info);
+ if (ret) {
+ perror("chip info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ if (strcmp(argv[2], "name") == 0) {
+ printf("%s\n", info.name);
+ } else if (strcmp(argv[2], "label") == 0) {
+ printf("%s\n", info.label);
+ } else if (strcmp(argv[2], "num-lines") == 0) {
+ printf("%u\n", info.lines);
+ } else {
+ fprintf(stderr, "unknown command: %s\n", argv[2]);
+ return EXIT_FAILURE;
+ }
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-line-name.c b/tools/testing/selftests/gpio/gpio-line-name.c
new file mode 100644
index 000000000000..e635cfadbded
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-line-name.c
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * GPIO character device helper for reading line names.
+ *
+ * Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+ */
+
+#include <fcntl.h>
+#include <linux/gpio.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/types.h>
+
+static void print_usage(void)
+{
+ printf("usage:\n");
+ printf(" gpio-line-name <chip path> <line offset>\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct gpio_v2_line_info info;
+ int fd, ret;
+ char *endp;
+
+ if (argc != 3) {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDWR);
+ if (fd < 0) {
+ perror("unable to open the GPIO chip");
+ return EXIT_FAILURE;
+ }
+
+ memset(&info, 0, sizeof(info));
+ info.offset = strtoul(argv[2], &endp, 10);
+ if (*endp != '\0') {
+ print_usage();
+ return EXIT_FAILURE;
+ }
+
+ ret = ioctl(fd, GPIO_V2_GET_LINEINFO_IOCTL, &info);
+ if (ret) {
+ perror("line info ioctl failed");
+ return EXIT_FAILURE;
+ }
+
+ printf("%s\n", info.name);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-cdev.c b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
new file mode 100644
index 000000000000..d1640f44f8ac
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-mockup-cdev.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * GPIO mockup cdev test helper
+ *
+ * Copyright (C) 2020 Kent Gibson
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <linux/gpio.h>
+
+#define CONSUMER "gpio-mockup-cdev"
+
+static int request_line_v2(int cfd, unsigned int offset,
+ uint64_t flags, unsigned int val)
+{
+ struct gpio_v2_line_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.num_lines = 1;
+ req.offsets[0] = offset;
+ req.config.flags = flags;
+ strcpy(req.consumer, CONSUMER);
+ if (flags & GPIO_V2_LINE_FLAG_OUTPUT) {
+ req.config.num_attrs = 1;
+ req.config.attrs[0].mask = 1;
+ req.config.attrs[0].attr.id = GPIO_V2_LINE_ATTR_ID_OUTPUT_VALUES;
+ if (val)
+ req.config.attrs[0].attr.values = 1;
+ }
+ ret = ioctl(cfd, GPIO_V2_GET_LINE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+
+static int get_value_v2(int lfd)
+{
+ struct gpio_v2_line_values vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ vals.mask = 1;
+ ret = ioctl(lfd, GPIO_V2_LINE_GET_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.bits & 0x1;
+}
+
+static int request_line_v1(int cfd, unsigned int offset,
+ uint32_t flags, unsigned int val)
+{
+ struct gpiohandle_request req;
+ int ret;
+
+ memset(&req, 0, sizeof(req));
+ req.lines = 1;
+ req.lineoffsets[0] = offset;
+ req.flags = flags;
+ strcpy(req.consumer_label, CONSUMER);
+ if (flags & GPIOHANDLE_REQUEST_OUTPUT)
+ req.default_values[0] = val;
+
+ ret = ioctl(cfd, GPIO_GET_LINEHANDLE_IOCTL, &req);
+ if (ret == -1)
+ return -errno;
+ return req.fd;
+}
+
+static int get_value_v1(int lfd)
+{
+ struct gpiohandle_data vals;
+ int ret;
+
+ memset(&vals, 0, sizeof(vals));
+ ret = ioctl(lfd, GPIOHANDLE_GET_LINE_VALUES_IOCTL, &vals);
+ if (ret == -1)
+ return -errno;
+ return vals.values[0];
+}
+
+static void usage(char *prog)
+{
+ printf("Usage: %s [-l] [-b <bias>] [-s <value>] [-u <uAPI>] <gpiochip> <offset>\n", prog);
+ printf(" -b: set line bias to one of pull-down, pull-up, disabled\n");
+ printf(" (default is to leave bias unchanged):\n");
+ printf(" -l: set line active low (default is active high)\n");
+ printf(" -s: set line value (default is to get line value)\n");
+ printf(" -u: uAPI version to use (default is 2)\n");
+ exit(-1);
+}
+
+static int wait_signal(void)
+{
+ int sig;
+ sigset_t wset;
+
+ sigemptyset(&wset);
+ sigaddset(&wset, SIGHUP);
+ sigaddset(&wset, SIGINT);
+ sigaddset(&wset, SIGTERM);
+ sigwait(&wset, &sig);
+
+ return sig;
+}
+
+int main(int argc, char *argv[])
+{
+ char *chip;
+ int opt, ret, cfd, lfd;
+ unsigned int offset, val = 0, abiv;
+ uint32_t flags_v1;
+ uint64_t flags_v2;
+
+ abiv = 2;
+ ret = 0;
+ flags_v1 = GPIOHANDLE_REQUEST_INPUT;
+ flags_v2 = GPIO_V2_LINE_FLAG_INPUT;
+
+ while ((opt = getopt(argc, argv, "lb:s:u:")) != -1) {
+ switch (opt) {
+ case 'l':
+ flags_v1 |= GPIOHANDLE_REQUEST_ACTIVE_LOW;
+ flags_v2 |= GPIO_V2_LINE_FLAG_ACTIVE_LOW;
+ break;
+ case 'b':
+ if (strcmp("pull-up", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_UP;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_UP;
+ } else if (strcmp("pull-down", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_PULL_DOWN;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_PULL_DOWN;
+ } else if (strcmp("disabled", optarg) == 0) {
+ flags_v1 |= GPIOHANDLE_REQUEST_BIAS_DISABLE;
+ flags_v2 |= GPIO_V2_LINE_FLAG_BIAS_DISABLED;
+ }
+ break;
+ case 's':
+ val = atoi(optarg);
+ flags_v1 &= ~GPIOHANDLE_REQUEST_INPUT;
+ flags_v1 |= GPIOHANDLE_REQUEST_OUTPUT;
+ flags_v2 &= ~GPIO_V2_LINE_FLAG_INPUT;
+ flags_v2 |= GPIO_V2_LINE_FLAG_OUTPUT;
+ break;
+ case 'u':
+ abiv = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ }
+ }
+
+ if (argc < optind + 2)
+ usage(argv[0]);
+
+ chip = argv[optind];
+ offset = atoi(argv[optind + 1]);
+
+ cfd = open(chip, 0);
+ if (cfd == -1) {
+ fprintf(stderr, "Failed to open %s: %s\n", chip, strerror(errno));
+ return -errno;
+ }
+
+ if (abiv == 1)
+ lfd = request_line_v1(cfd, offset, flags_v1, val);
+ else
+ lfd = request_line_v2(cfd, offset, flags_v2, val);
+
+ close(cfd);
+
+ if (lfd < 0) {
+ fprintf(stderr, "Failed to request %s:%d: %s\n", chip, offset, strerror(-lfd));
+ return lfd;
+ }
+
+ if (flags_v2 & GPIO_V2_LINE_FLAG_OUTPUT) {
+ wait_signal();
+ } else {
+ if (abiv == 1)
+ ret = get_value_v1(lfd);
+ else
+ ret = get_value_v2(lfd);
+ }
+
+ close(lfd);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-chardev.c b/tools/testing/selftests/gpio/gpio-mockup-chardev.c
deleted file mode 100644
index 73ead8828d3a..000000000000
--- a/tools/testing/selftests/gpio/gpio-mockup-chardev.c
+++ /dev/null
@@ -1,323 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * GPIO chardev test helper
- *
- * Copyright (C) 2016 Bamvor Jian Zhang
- */
-
-#define _GNU_SOURCE
-#include <unistd.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <errno.h>
-#include <string.h>
-#include <fcntl.h>
-#include <getopt.h>
-#include <sys/ioctl.h>
-#include <libmount.h>
-#include <err.h>
-#include <dirent.h>
-#include <linux/gpio.h>
-#include "../../../gpio/gpio-utils.h"
-
-#define CONSUMER "gpio-selftest"
-#define GC_NUM 10
-enum direction {
- OUT,
- IN
-};
-
-static int get_debugfs(char **path)
-{
- struct libmnt_context *cxt;
- struct libmnt_table *tb;
- struct libmnt_iter *itr = NULL;
- struct libmnt_fs *fs;
- int found = 0, ret;
-
- cxt = mnt_new_context();
- if (!cxt)
- err(EXIT_FAILURE, "libmount context allocation failed");
-
- itr = mnt_new_iter(MNT_ITER_FORWARD);
- if (!itr)
- err(EXIT_FAILURE, "failed to initialize libmount iterator");
-
- if (mnt_context_get_mtab(cxt, &tb))
- err(EXIT_FAILURE, "failed to read mtab");
-
- while (mnt_table_next_fs(tb, itr, &fs) == 0) {
- const char *type = mnt_fs_get_fstype(fs);
-
- if (!strcmp(type, "debugfs")) {
- found = 1;
- break;
- }
- }
- if (found) {
- ret = asprintf(path, "%s/gpio", mnt_fs_get_target(fs));
- if (ret < 0)
- err(EXIT_FAILURE, "failed to format string");
- }
-
- mnt_free_iter(itr);
- mnt_free_context(cxt);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static int gpio_debugfs_get(const char *consumer, int *dir, int *value)
-{
- char *debugfs;
- FILE *f;
- char *line = NULL;
- size_t len = 0;
- char *cur;
- int found = 0;
-
- if (get_debugfs(&debugfs) != 0)
- err(EXIT_FAILURE, "debugfs is not mounted");
-
- f = fopen(debugfs, "r");
- if (!f)
- err(EXIT_FAILURE, "read from gpio debugfs failed");
-
- /*
- * gpio-2 ( |gpio-selftest ) in lo
- */
- while (getline(&line, &len, f) != -1) {
- cur = strstr(line, consumer);
- if (cur == NULL)
- continue;
-
- cur = strchr(line, ')');
- if (!cur)
- continue;
-
- cur += 2;
- if (!strncmp(cur, "out", 3)) {
- *dir = OUT;
- cur += 4;
- } else if (!strncmp(cur, "in", 2)) {
- *dir = IN;
- cur += 4;
- }
-
- if (!strncmp(cur, "hi", 2))
- *value = 1;
- else if (!strncmp(cur, "lo", 2))
- *value = 0;
-
- found = 1;
- break;
- }
- free(debugfs);
- fclose(f);
- free(line);
-
- if (!found)
- return -1;
-
- return 0;
-}
-
-static struct gpiochip_info *list_gpiochip(const char *gpiochip_name, int *ret)
-{
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- const struct dirent *ent;
- DIR *dp;
- char *chrdev_name;
- int fd;
- int i = 0;
-
- cinfo = calloc(sizeof(struct gpiochip_info) * 4, GC_NUM + 1);
- if (!cinfo)
- err(EXIT_FAILURE, "gpiochip_info allocation failed");
-
- current = cinfo;
- dp = opendir("/dev");
- if (!dp) {
- *ret = -errno;
- goto error_out;
- } else {
- *ret = 0;
- }
-
- while (ent = readdir(dp), ent) {
- if (check_prefix(ent->d_name, "gpiochip")) {
- *ret = asprintf(&chrdev_name, "/dev/%s", ent->d_name);
- if (*ret < 0)
- goto error_out;
-
- fd = open(chrdev_name, 0);
- if (fd == -1) {
- *ret = -errno;
- fprintf(stderr, "Failed to open %s\n",
- chrdev_name);
- goto error_close_dir;
- }
- *ret = ioctl(fd, GPIO_GET_CHIPINFO_IOCTL, current);
- if (*ret == -1) {
- perror("Failed to issue CHIPINFO IOCTL\n");
- goto error_close_dir;
- }
- close(fd);
- if (strcmp(current->label, gpiochip_name) == 0
- || check_prefix(current->label, gpiochip_name)) {
- *ret = 0;
- current++;
- i++;
- }
- }
- }
-
- if ((!*ret && i == 0) || *ret < 0) {
- free(cinfo);
- cinfo = NULL;
- }
- if (!*ret && i > 0) {
- cinfo = realloc(cinfo, sizeof(struct gpiochip_info) * 4 * i);
- *ret = i;
- }
-
-error_close_dir:
- closedir(dp);
-error_out:
- if (*ret < 0)
- err(EXIT_FAILURE, "list gpiochip failed: %s", strerror(*ret));
-
- return cinfo;
-}
-
-int gpio_pin_test(struct gpiochip_info *cinfo, int line, int flag, int value)
-{
- struct gpiohandle_data data;
- unsigned int lines[] = {line};
- int fd;
- int debugfs_dir = IN;
- int debugfs_value = 0;
- int ret;
-
- data.values[0] = value;
- ret = gpiotools_request_linehandle(cinfo->name, lines, 1, flag, &data,
- CONSUMER);
- if (ret < 0)
- goto fail_out;
- else
- fd = ret;
-
- ret = gpio_debugfs_get(CONSUMER, &debugfs_dir, &debugfs_value);
- if (ret) {
- ret = -EINVAL;
- goto fail_out;
- }
- if (flag & GPIOHANDLE_REQUEST_INPUT) {
- if (debugfs_dir != IN) {
- errno = -EINVAL;
- ret = -errno;
- }
- } else if (flag & GPIOHANDLE_REQUEST_OUTPUT) {
- if (flag & GPIOHANDLE_REQUEST_ACTIVE_LOW)
- debugfs_value = !debugfs_value;
-
- if (!(debugfs_dir == OUT && value == debugfs_value)) {
- errno = -EINVAL;
- ret = -errno;
- }
- }
- gpiotools_release_linehandle(fd);
-
-fail_out:
- if (ret)
- err(EXIT_FAILURE, "gpio<%s> line<%d> test flag<0x%x> value<%d>",
- cinfo->name, line, flag, value);
-
- return ret;
-}
-
-void gpio_pin_tests(struct gpiochip_info *cinfo, unsigned int line)
-{
- printf("line<%d>", line);
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 0);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_OUTPUT, 1);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 0);
- printf(".");
- gpio_pin_test(cinfo, line,
- GPIOHANDLE_REQUEST_OUTPUT | GPIOHANDLE_REQUEST_ACTIVE_LOW,
- 1);
- printf(".");
- gpio_pin_test(cinfo, line, GPIOHANDLE_REQUEST_INPUT, 0);
- printf(".");
-}
-
-/*
- * ./gpio-mockup-chardev gpio_chip_name_prefix is_valid_gpio_chip
- * Return 0 if successful or exit with EXIT_FAILURE if test failed.
- * gpio_chip_name_prefix: The prefix of gpiochip you want to test. E.g.
- * gpio-mockup
- * is_valid_gpio_chip: Whether the gpio_chip is valid. 1 means valid,
- * 0 means invalid which could not be found by
- * list_gpiochip.
- */
-int main(int argc, char *argv[])
-{
- char *prefix;
- int valid;
- struct gpiochip_info *cinfo;
- struct gpiochip_info *current;
- int i;
- int ret;
-
- if (argc < 3) {
- printf("Usage: %s prefix is_valid", argv[0]);
- exit(EXIT_FAILURE);
- }
-
- prefix = argv[1];
- valid = strcmp(argv[2], "true") == 0 ? 1 : 0;
-
- printf("Test gpiochip %s: ", prefix);
- cinfo = list_gpiochip(prefix, &ret);
- if (!cinfo) {
- if (!valid && ret == 0) {
- printf("Invalid test successful\n");
- ret = 0;
- goto out;
- } else {
- ret = -EINVAL;
- goto out;
- }
- } else if (cinfo && !valid) {
- ret = -EINVAL;
- goto out;
- }
- current = cinfo;
- for (i = 0; i < ret; i++) {
- gpio_pin_tests(current, 0);
- gpio_pin_tests(current, current->lines - 1);
- gpio_pin_tests(current, random() % current->lines);
- current++;
- }
- ret = 0;
- printf("successful\n");
-
-out:
- if (ret)
- fprintf(stderr, "gpio<%s> test failed\n", prefix);
-
- if (cinfo)
- free(cinfo);
-
- if (ret)
- exit(EXIT_FAILURE);
-
- return ret;
-}
diff --git a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
index dd269d877562..2d2e5d8763b6 100755
--- a/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup-sysfs.sh
@@ -1,135 +1,77 @@
# SPDX-License-Identifier: GPL-2.0
-is_consistent()
-{
- val=
-
- active_low_sysfs=`cat $GPIO_SYSFS/gpio$nr/active_low`
- val_sysfs=`cat $GPIO_SYSFS/gpio$nr/value`
- dir_sysfs=`cat $GPIO_SYSFS/gpio$nr/direction`
- gpio_this_debugfs=`cat $GPIO_DEBUGFS |grep "gpio-$nr" | sed "s/(.*)//g"`
- dir_debugfs=`echo $gpio_this_debugfs | awk '{print $2}'`
- val_debugfs=`echo $gpio_this_debugfs | awk '{print $3}'`
- if [ $val_debugfs = "lo" ]; then
- val=0
- elif [ $val_debugfs = "hi" ]; then
- val=1
- fi
+# Overrides functions in gpio-mockup.sh to test using the GPIO SYSFS uAPI
- if [ $active_low_sysfs = "1" ]; then
- if [ $val = "0" ]; then
- val="1"
- else
- val="0"
- fi
- fi
+SYSFS=`grep -w sysfs /proc/mounts | cut -f2 -d' '`
+[ -d "$SYSFS" ] || skip "sysfs is not mounted"
- if [ $val_sysfs = $val ] && [ $dir_sysfs = $dir_debugfs ]; then
- echo -n "."
- else
- echo "test fail, exit"
- die
- fi
-}
+GPIO_SYSFS="${SYSFS}/class/gpio"
+[ -d "$GPIO_SYSFS" ] || skip "CONFIG_GPIO_SYSFS is not selected"
-test_pin_logic()
-{
- nr=$1
- direction=$2
- active_low=$3
- value=$4
+PLATFORM_SYSFS=$SYSFS/devices/platform
- echo $direction > $GPIO_SYSFS/gpio$nr/direction
- echo $active_low > $GPIO_SYSFS/gpio$nr/active_low
- if [ $direction = "out" ]; then
- echo $value > $GPIO_SYSFS/gpio$nr/value
- fi
- is_consistent $nr
-}
+sysfs_nr=
+sysfs_ldir=
-test_one_pin()
+# determine the sysfs GPIO number given the $chip and $offset
+# e.g. gpiochip1:32
+find_sysfs_nr()
{
- nr=$1
-
- echo -n "test pin<$nr>"
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test GPIO pin $nr failed"
- die
- fi
-
- #"Checking if the sysfs is consistent with debugfs: "
- is_consistent $nr
-
- #"Checking the logic of active_low: "
- test_pin_logic $nr out 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr out 0 1
- test_pin_logic $nr out 0 0
-
- #"Checking the logic of direction: "
- test_pin_logic $nr in 1 1
- test_pin_logic $nr out 1 0
- test_pin_logic $nr low 0 1
- test_pin_logic $nr high 0 0
-
- echo $nr > $GPIO_SYSFS/unexport
-
- echo "successful"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpiochip1
+ local platform=$(find $PLATFORM_SYSFS -mindepth 2 -maxdepth 2 -type d -name $chip)
+ [ "$platform" ] || fail "can't find platform of $chip"
+ # e.g. /sys/devices/platform/gpio-mockup.1/gpio/gpiochip508/base
+ local base=$(find ${platform%/*}/gpio/ -mindepth 2 -maxdepth 2 -type f -name base)
+ [ "$base" ] || fail "can't find base of $chip"
+ sysfs_nr=$(($(< "$base") + $offset))
+ sysfs_ldir="$GPIO_SYSFS/gpio$sysfs_nr"
}
-test_one_pin_fail()
+acquire_line()
{
- nr=$1
-
- echo $nr > $GPIO_SYSFS/export 2>/dev/null
-
- if [ X$? != X0 ]; then
- echo "test invalid pin $nr successful"
- else
- echo "test invalid pin $nr failed"
- echo $nr > $GPIO_SYSFS/unexport 2>/dev/null
- die
- fi
+ [ "$sysfs_nr" ] && return
+ find_sysfs_nr
+ echo "$sysfs_nr" > "$GPIO_SYSFS/export"
}
-list_chip()
+# The helpers being overridden...
+get_line()
{
- echo `ls -d $GPIO_DRV_SYSFS/gpiochip* 2>/dev/null`
+ [ -e "$sysfs_ldir/value" ] && echo $(< "$sysfs_ldir/value")
}
-test_chip()
+set_line()
{
- chip=$1
- name=`basename $chip`
- base=`cat $chip/base`
- ngpio=`cat $chip/ngpio`
- printf "%-10s %-5s %-5s\n" $name $base $ngpio
- if [ $ngpio = "0" ]; then
- echo "number of gpio is zero is not allowed".
- fi
- test_one_pin $base
- test_one_pin $(($base + $ngpio - 1))
- test_one_pin $((( RANDOM % $ngpio ) + $base ))
+ acquire_line
+
+ for option in $*; do
+ case $option in
+ active-high)
+ echo 0 > "$sysfs_ldir/active_low"
+ ;;
+ active-low)
+ echo 1 > "$sysfs_ldir/active_low"
+ ;;
+ input)
+ echo "in" > "$sysfs_ldir/direction"
+ ;;
+ 0)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 0 > "$sysfs_ldir/value"
+ ;;
+ 1)
+ echo "out" > "$sysfs_ldir/direction"
+ echo 1 > "$sysfs_ldir/value"
+ ;;
+ esac
+ done
}
-test_chips_sysfs()
+release_line()
{
- gpiochip=`list_chip $module`
- if [ X"$gpiochip" = X ]; then
- if [ X"$valid" = Xfalse ]; then
- echo "successful"
- else
- echo "fail"
- die
- fi
- else
- for chip in $gpiochip; do
- test_chip $chip
- done
- fi
+ [ "$sysfs_nr" ] || return 0
+ echo "$sysfs_nr" > "$GPIO_SYSFS/unexport"
+ sysfs_nr=
+ sysfs_ldir=
}
-
diff --git a/tools/testing/selftests/gpio/gpio-mockup.sh b/tools/testing/selftests/gpio/gpio-mockup.sh
index 7f35b9880485..fc2dd4c24d06 100755
--- a/tools/testing/selftests/gpio/gpio-mockup.sh
+++ b/tools/testing/selftests/gpio/gpio-mockup.sh
@@ -1,72 +1,57 @@
-#!/bin/bash
+#!/bin/bash -efu
# SPDX-License-Identifier: GPL-2.0
#exit status
-#1: Internal error
-#2: sysfs/debugfs not mount
-#3: insert module fail when gpio-mockup is a module.
-#4: Skip test including run as non-root user.
-#5: other reason.
-
-SYSFS=
-GPIO_SYSFS=
-GPIO_DRV_SYSFS=
+#0: success
+#1: fail
+#4: skip test - including run as non-root user
+
+BASE=${0%/*}
DEBUGFS=
GPIO_DEBUGFS=
-dev_type=
-module=
+dev_type="cdev"
+module="gpio-mockup"
+verbose=
+full_test=
+random=
+uapi_opt=
+active_opt=
+bias_opt=
+line_set_pid=
-# Kselftest framework requirement - SKIP code is 4.
+# Kselftest return codes
+ksft_fail=1
ksft_skip=4
usage()
{
echo "Usage:"
- echo "$0 [-f] [-m name] [-t type]"
- echo "-f: full test. It maybe conflict with existence gpio device."
- echo "-m: module name, default name is gpio-mockup. It could also test"
- echo " other gpio device."
- echo "-t: interface type: chardev(char device) and sysfs(being"
- echo " deprecated). The first one is default"
- echo ""
- echo "$0 -h"
- echo "This usage"
+ echo "$0 [-frv] [-t type]"
+ echo "-f: full test (minimal set run by default)"
+ echo "-r: test random lines as well as fence posts"
+ echo "-t: interface type:"
+ echo " cdev (character device ABI) - default"
+ echo " cdev_v1 (deprecated character device ABI)"
+ echo " sysfs (deprecated SYSFS ABI)"
+ echo "-v: verbose progress reporting"
+ exit $ksft_fail
}
-prerequisite()
+skip()
{
- msg="skip all tests:"
- if [ $UID != 0 ]; then
- echo $msg must be run as root >&2
- exit $ksft_skip
- fi
- SYSFS=`mount -t sysfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$SYSFS" ]; then
- echo $msg sysfs is not mounted >&2
- exit 2
- fi
- GPIO_SYSFS=`echo $SYSFS/class/gpio`
- GPIO_DRV_SYSFS=`echo $SYSFS/devices/platform/$module/gpio`
- DEBUGFS=`mount -t debugfs | head -1 | awk '{ print $3 }'`
- if [ ! -d "$DEBUGFS" ]; then
- echo $msg debugfs is not mounted >&2
- exit 2
- fi
- GPIO_DEBUGFS=`echo $DEBUGFS/gpio`
- source gpio-mockup-sysfs.sh
+ echo "$*" >&2
+ echo "GPIO $module test SKIP"
+ exit $ksft_skip
}
-try_insert_module()
+prerequisite()
{
- if [ -d "$GPIO_DRV_SYSFS" ]; then
- echo "$GPIO_DRV_SYSFS exist. Skip insert module"
- else
- modprobe -q $module $1
- if [ X$? != X0 ]; then
- echo $msg insmod $module failed >&2
- exit 3
- fi
- fi
+ [ $(id -u) -eq 0 ] || skip "must be run as root"
+
+ DEBUGFS=$(grep -w debugfs /proc/mounts | cut -f2 -d' ')
+ [ -d "$DEBUGFS" ] || skip "debugfs is not mounted"
+
+ GPIO_DEBUGFS=$DEBUGFS/$module
}
remove_module()
@@ -74,133 +59,342 @@ remove_module()
modprobe -r -q $module
}
-die()
+cleanup()
{
+ set +e
+ release_line
remove_module
- exit 5
+ jobs -p | xargs -r kill > /dev/null 2>&1
}
-test_chips()
+fail()
{
- if [ X$dev_type = Xsysfs ]; then
- echo "WARNING: sysfs ABI of gpio is going to deprecated."
- test_chips_sysfs $*
- else
- $BASE/gpio-mockup-chardev $*
- fi
+ echo "test failed: $*" >&2
+ echo "GPIO $module test FAIL"
+ exit $ksft_fail
+}
+
+try_insert_module()
+{
+ modprobe -q $module "$1" || fail "insert $module failed with error $?"
+}
+
+log()
+{
+ [ -z "$verbose" ] || echo "$*"
}
-gpio_test()
+# The following line helpers, release_Line, get_line and set_line, all
+# make use of the global $chip and $offset variables.
+#
+# This implementation drives the GPIO character device (cdev) uAPI.
+# Other implementations may override these to test different uAPIs.
+
+# Release any resources related to the line
+release_line()
{
- param=$1
- valid=$2
+ [ "$line_set_pid" ] && kill $line_set_pid && wait $line_set_pid || true
+ line_set_pid=
+}
- if [ X"$param" = X ]; then
- die
+# Read the current value of the line
+get_line()
+{
+ release_line
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset
+ echo $?
+}
+
+# Set the state of the line
+#
+# Changes to line configuration are provided as parameters.
+# The line is assumed to be an output if the line value 0 or 1 is
+# specified, else an input.
+set_line()
+{
+ local val=
+
+ release_line
+
+ # parse config options...
+ for option in $*; do
+ case $option in
+ active-low)
+ active_opt="-l "
+ ;;
+ active-high)
+ active_opt=
+ ;;
+ bias-none)
+ bias_opt=
+ ;;
+ pull-down)
+ bias_opt="-bpull-down "
+ ;;
+ pull-up)
+ bias_opt="-bpull-up "
+ ;;
+ 0)
+ val=0
+ ;;
+ 1)
+ val=1
+ ;;
+ esac
+ done
+
+ local cdev_opts=${uapi_opt}${active_opt}
+ if [ "$val" ]; then
+ $BASE/gpio-mockup-cdev $cdev_opts -s$val /dev/$chip $offset &
+ # failure to set is detected by reading mockup and toggling values
+ line_set_pid=$!
+ # allow for gpio-mockup-cdev to launch and request line
+ # (there is limited value in checking if line has been requested)
+ sleep 0.01
+ elif [ "$bias_opt" ]; then
+ cdev_opts=${cdev_opts}${bias_opt}
+ $BASE/gpio-mockup-cdev $cdev_opts /dev/$chip $offset || true
fi
- try_insert_module "gpio_mockup_ranges=$param"
- echo -n "GPIO $module test with ranges: <"
- echo "$param>: "
- printf "%-10s %s\n" $param
- test_chips $module $valid
- remove_module
}
-BASE=`dirname $0`
+assert_line()
+{
+ local val
+ # don't need any retry here as set_mock allows for propagation
+ val=$(get_line)
+ [ "$val" = "$1" ] || fail "line value is ${val:-empty} when $1 was expected"
+}
+
+# The following mockup helpers all make use of the $mock_line
+assert_mock()
+{
+ local backoff_wait=10
+ local retry=0
+ local val
+ # retry allows for set propagation from uAPI to mockup
+ while true; do
+ val=$(< $mock_line)
+ [ "$val" = "$1" ] && break
+ retry=$((retry + 1))
+ [ $retry -lt 5 ] || fail "mockup $mock_line value ${val:-empty} when $1 expected"
+ sleep $(printf "%0.2f" $((backoff_wait))e-3)
+ backoff_wait=$((backoff_wait * 2))
+ done
+}
+
+set_mock()
+{
+ echo "$1" > $mock_line
+ # allow for set propagation - so we won't be in a race with set_line
+ assert_mock "$1"
+}
-dev_type=
-TEMP=`getopt -o fhm:t: -n '$0' -- "$@"`
+# test the functionality of a line
+#
+# The line is set from the mockup side and is read from the userspace side
+# (input), and is set from the userspace side and is read from the mockup side
+# (output).
+#
+# Setting the mockup pull using the userspace interface bias settings is
+# tested where supported by the userspace interface (cdev).
+test_line()
+{
+ chip=$1
+ offset=$2
+ log "test_line $chip $offset"
+ mock_line=$GPIO_DEBUGFS/$chip/$offset
+ [ -e "$mock_line" ] || fail "missing line $chip:$offset"
-if [ "$?" != "0" ]; then
- echo "Parameter process failed, Terminating..." >&2
- exit 1
-fi
+ # test input active-high
+ set_mock 1
+ set_line input active-high
+ assert_line 1
+ set_mock 0
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ if [ "$full_test" ]; then
+ if [ "$dev_type" != "sysfs" ]; then
+ # test pulls
+ set_mock 0
+ set_line input pull-up
+ assert_line 1
+ set_mock 0
+ assert_line 0
+
+ set_mock 1
+ set_line input pull-down
+ assert_line 0
+ set_mock 1
+ assert_line 1
+
+ set_line bias-none
+ fi
+
+ # test input active-low
+ set_mock 0
+ set_line active-low
+ assert_line 1
+ set_mock 1
+ assert_line 0
+ set_mock 0
+ assert_line 1
+
+ # test output active-high
+ set_mock 1
+ set_line active-high 0
+ assert_mock 0
+ set_line 1
+ assert_mock 1
+ set_line 0
+ assert_mock 0
+ fi
+
+ # test output active-low
+ set_mock 0
+ set_line active-low 0
+ assert_mock 1
+ set_line 1
+ assert_mock 0
+ set_line 0
+ assert_mock 1
+
+ release_line
+}
+
+test_no_line()
+{
+ log test_no_line "$*"
+ [ ! -e "$GPIO_DEBUGFS/$1/$2" ] || fail "unexpected line $1:$2"
+}
-# Note the quotes around `$TEMP': they are essential!
-eval set -- "$TEMP"
+# Load the module and check that the expected number of gpiochips, with the
+# expected number of lines, are created and are functional.
+#
+# $1 is the gpio_mockup_ranges parameter for the module
+# The remaining parameters are the number of lines, n, expected for each of
+# the gpiochips expected to be created.
+#
+# For each gpiochip the fence post lines, 0 and n-1, are tested, and the
+# line on the far side of the fence post, n, is tested to not exist.
+#
+# If the $random flag is set then a random line in the middle of the
+# gpiochip is tested as well.
+insmod_test()
+{
+ local ranges=
+ local gc=
+ local width=
-while true; do
- case $1 in
- -f)
+ [ "${1:-}" ] || fail "missing ranges"
+ ranges=$1 ; shift
+ try_insert_module "gpio_mockup_ranges=$ranges"
+ log "GPIO $module test with ranges: <$ranges>:"
+ # e.g. /sys/kernel/debug/gpio-mockup/gpiochip1
+ gpiochip=$(find "$DEBUGFS/$module/" -name gpiochip* -type d | sort)
+ for chip in $gpiochip; do
+ gc=${chip##*/}
+ [ "${1:-}" ] || fail "unexpected chip - $gc"
+ width=$1 ; shift
+ test_line $gc 0
+ if [ "$random" -a $width -gt 2 ]; then
+ test_line $gc $((RANDOM % ($width - 2) + 1))
+ fi
+ test_line $gc $(($width - 1))
+ test_no_line $gc $width
+ done
+ [ "${1:-}" ] && fail "missing expected chip of width $1"
+ remove_module || fail "failed to remove module with error $?"
+}
+
+while getopts ":frvt:" opt; do
+ case $opt in
+ f)
full_test=true
- shift
- ;;
- -h)
- usage
- exit
;;
- -m)
- module=$2
- shift 2
+ r)
+ random=true
;;
- -t)
- dev_type=$2
- shift 2
+ t)
+ dev_type=$OPTARG
;;
- --)
- shift
- break
+ v)
+ verbose=true
;;
*)
- echo "Internal error!"
- exit 1
+ usage
;;
esac
done
+shift $((OPTIND - 1))
-if [ X"$module" = X ]; then
- module="gpio-mockup"
-fi
-
-if [ X$dev_type != Xsysfs ]; then
- dev_type="chardev"
-fi
+[ "${1:-}" ] && fail "unknown argument '$1'"
prerequisite
-echo "1. Test dynamic allocation of gpio successful means insert gpiochip and"
-echo " manipulate gpio pin successful"
-gpio_test "-1,32" true
-gpio_test "-1,32,-1,32" true
-gpio_test "-1,32,-1,32,-1,32" true
-if [ X$full_test = Xtrue ]; then
- gpio_test "-1,32,32,64" true
- gpio_test "-1,32,40,64,-1,5" true
- gpio_test "-1,32,32,64,-1,32" true
- gpio_test "0,32,32,64,-1,32,-1,32" true
- gpio_test "-1,32,-1,32,0,32,32,64" true
- echo "2. Do basic test: successful means insert gpiochip and"
- echo " manipulate gpio pin successful"
- gpio_test "0,32" true
- gpio_test "0,32,32,64" true
- gpio_test "0,32,40,64,64,96" true
+trap 'exit $ksft_fail' SIGTERM SIGINT
+trap cleanup EXIT
+
+case "$dev_type" in
+sysfs)
+ source $BASE/gpio-mockup-sysfs.sh
+ echo "WARNING: gpio sysfs ABI is deprecated."
+ ;;
+cdev_v1)
+ echo "WARNING: gpio cdev ABI v1 is deprecated."
+ uapi_opt="-u1 "
+ ;;
+cdev)
+ ;;
+*)
+ fail "unknown interface type: $dev_type"
+ ;;
+esac
+
+remove_module || fail "can't remove existing $module module"
+
+# manual gpio allocation tests fail if a physical chip already exists
+[ "$full_test" -a -e "/dev/gpiochip0" ] && skip "full tests conflict with gpiochip0"
+
+echo "1. Module load tests"
+echo "1.1. dynamic allocation of gpio"
+insmod_test "-1,32" 32
+insmod_test "-1,23,-1,32" 23 32
+insmod_test "-1,23,-1,26,-1,32" 23 26 32
+if [ "$full_test" ]; then
+ echo "1.2. manual allocation of gpio"
+ insmod_test "0,32" 32
+ insmod_test "0,32,32,60" 32 28
+ insmod_test "0,32,40,64,64,96" 32 24 32
+ echo "1.3. dynamic and manual allocation of gpio"
+ insmod_test "-1,32,32,62" 32 30
+ insmod_test "-1,22,-1,23,0,24,32,64" 22 23 24 32
+ insmod_test "-1,32,32,60,-1,29" 32 28 29
+ insmod_test "-1,32,40,64,-1,5" 32 24 5
+ insmod_test "0,32,32,44,-1,22,-1,31" 32 12 22 31
fi
-echo "3. Error test: successful means insert gpiochip failed"
-echo "3.1 Test number of gpio overflow"
-#Currently: The max number of gpio(1024) is defined in arm architecture.
-gpio_test "-1,32,-1,1024" false
-if [ X$full_test = Xtrue ]; then
- echo "3.2 Test zero line of gpio"
- gpio_test "0,0" false
- echo "3.3 Test range overlap"
- echo "3.3.1 Test corner case"
- gpio_test "0,32,0,1" false
- gpio_test "0,32,32,64,32,40" false
- gpio_test "0,32,35,64,35,45" false
- gpio_test "0,32,31,32" false
- gpio_test "0,32,32,64,36,37" false
- gpio_test "0,32,35,64,34,36" false
- echo "3.3.2 Test inserting invalid second gpiochip"
- gpio_test "0,32,30,35" false
- gpio_test "0,32,1,5" false
- gpio_test "10,32,9,14" false
- gpio_test "10,32,30,35" false
- echo "3.3.3 Test others"
- gpio_test "0,32,40,56,39,45" false
- gpio_test "0,32,40,56,30,33" false
- gpio_test "0,32,40,56,30,41" false
- gpio_test "0,32,40,56,20,21" false
+echo "2. Module load error tests"
+echo "2.1 no lines defined"
+insmod_test "0,0"
+if [ "$full_test" ]; then
+ echo "2.2 ignore range overlap"
+ insmod_test "0,32,0,1" 32
+ insmod_test "0,32,1,5" 32
+ insmod_test "0,32,30,35" 32
+ insmod_test "0,32,31,32" 32
+ insmod_test "10,32,30,35" 22
+ insmod_test "10,32,9,14" 22
+ insmod_test "0,32,20,21,40,56" 32 16
+ insmod_test "0,32,32,64,32,40" 32 32
+ insmod_test "0,32,32,64,36,37" 32 32
+ insmod_test "0,32,35,64,34,36" 32 29
+ insmod_test "0,30,35,64,35,45" 30 29
+ insmod_test "0,32,40,56,30,33" 32 16
+ insmod_test "0,32,40,56,30,41" 32 16
+ insmod_test "0,32,40,56,39,45" 32 16
fi
-echo GPIO test PASS
-
+echo "GPIO $module test PASS"
diff --git a/tools/testing/selftests/gpio/gpio-sim.sh b/tools/testing/selftests/gpio/gpio-sim.sh
new file mode 100755
index 000000000000..6fb66a687f17
--- /dev/null
+++ b/tools/testing/selftests/gpio/gpio-sim.sh
@@ -0,0 +1,399 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 Bartosz Golaszewski <brgl@bgdev.pl>
+
+BASE_DIR=`dirname $0`
+CONFIGFS_DIR="/sys/kernel/config/gpio-sim"
+MODULE="gpio-sim"
+
+fail() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test FAIL"
+ exit 1
+}
+
+skip() {
+ echo "$*" >&2
+ echo "GPIO $MODULE test SKIP"
+ exit 4
+}
+
+remove_chip() {
+ local CHIP=$1
+
+ for FILE in $CONFIGFS_DIR/$CHIP/*; do
+ BANK=`basename $FILE`
+ if [ "$BANK" = "live" -o "$BANK" = "dev_name" ]; then
+ continue
+ fi
+
+ LINES=`ls $CONFIGFS_DIR/$CHIP/$BANK/ | grep -E ^line`
+ if [ "$?" = 0 ]; then
+ for LINE in $LINES; do
+ if [ -e $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog ]; then
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE/hog || \
+ fail "Unable to remove the hog"
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK/$LINE || \
+ fail "Unable to remove the line"
+ done
+ fi
+
+ rmdir $CONFIGFS_DIR/$CHIP/$BANK
+ done
+
+ rmdir $CONFIGFS_DIR/$CHIP || fail "Unable to remove the chip"
+}
+
+configfs_cleanup() {
+ for CHIP in `ls $CONFIGFS_DIR/`; do
+ remove_chip $CHIP
+ done
+}
+
+create_chip() {
+ local CHIP=$1
+
+ mkdir $CONFIGFS_DIR/$CHIP
+}
+
+create_bank() {
+ local CHIP=$1
+ local BANK=$2
+
+ mkdir $CONFIGFS_DIR/$CHIP/$BANK
+}
+
+set_label() {
+ local CHIP=$1
+ local BANK=$2
+ local LABEL=$3
+
+ echo $LABEL > $CONFIGFS_DIR/$CHIP/$BANK/label || fail "Unable to set the chip label"
+}
+
+set_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+ local NUM_LINES=$3
+
+ echo $NUM_LINES > $CONFIGFS_DIR/$CHIP/$BANK/num_lines || \
+ fail "Unable to set the number of lines"
+}
+
+set_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+ local NAME=$4
+ local LINE_DIR=$CONFIGFS_DIR/$CHIP/$BANK/line$OFFSET
+
+ test -d $LINE_DIR || mkdir $LINE_DIR
+ echo $NAME > $LINE_DIR/name || fail "Unable to set the line name"
+}
+
+enable_chip() {
+ local CHIP=$1
+
+ echo 1 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to enable the chip"
+}
+
+disable_chip() {
+ local CHIP=$1
+
+ echo 0 > $CONFIGFS_DIR/$CHIP/live || fail "Unable to disable the chip"
+}
+
+configfs_chip_name() {
+ local CHIP=$1
+ local BANK=$2
+
+ cat $CONFIGFS_DIR/$CHIP/$BANK/chip_name 2> /dev/null || \
+ fail "unable to read the chip name from configfs"
+}
+
+configfs_dev_name() {
+ local CHIP=$1
+
+ cat $CONFIGFS_DIR/$CHIP/dev_name 2> /dev/null || \
+ fail "unable to read the device name from configfs"
+}
+
+get_chip_num_lines() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` num-lines || \
+ fail "unable to read the number of lines from the character device"
+}
+
+get_chip_label() {
+ local CHIP=$1
+ local BANK=$2
+
+ $BASE_DIR/gpio-chip-info /dev/`configfs_chip_name $CHIP $BANK` label || \
+ fail "unable to read the chip label from the character device"
+}
+
+get_line_name() {
+ local CHIP=$1
+ local BANK=$2
+ local OFFSET=$3
+
+ $BASE_DIR/gpio-line-name /dev/`configfs_chip_name $CHIP $BANK` $OFFSET || \
+ fail "unable to read the line name from the character device"
+}
+
+sysfs_set_pull() {
+ local DEV=$1
+ local BANK=$2
+ local OFFSET=$3
+ local PULL=$4
+ local DEVNAME=`configfs_dev_name $DEV`
+ local CHIPNAME=`configfs_chip_name $DEV $BANK`
+ local SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio$OFFSET/pull"
+
+ echo $PULL > $SYSFS_PATH || fail "Unable to set line pull in sysfs"
+}
+
+# Load the gpio-sim module. This will pull in configfs if needed too.
+modprobe gpio-sim || skip "unable to load the gpio-sim module"
+# Make sure configfs is mounted at /sys/kernel/config. Wait a bit if needed.
+for IDX in `seq 5`; do
+ if [ "$IDX" -eq "5" ]; then
+ skip "configfs not mounted at /sys/kernel/config"
+ fi
+
+ mountpoint -q /sys/kernel/config && break
+ sleep 0.1
+done
+# If the module was already loaded: remove all previous chips
+configfs_cleanup
+
+trap "exit 1" SIGTERM SIGINT
+trap configfs_cleanup EXIT
+
+echo "1. chip_name and dev_name attributes"
+
+echo "1.1. Chip name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/bank/chip_name` || fail "chip_name doesn't work"
+remove_chip chip
+
+echo "1.2. chip_name returns 'none' if the chip is still pending"
+create_chip chip
+create_bank chip bank
+test "`cat $CONFIGFS_DIR/chip/bank/chip_name`" = "none" || \
+ fail "chip_name doesn't return 'none' for a pending chip"
+remove_chip chip
+
+echo "1.3. Device name is communicated to user"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -n `cat $CONFIGFS_DIR/chip/dev_name` || fail "dev_name doesn't work"
+remove_chip chip
+
+echo "2. Creating and configuring simulated chips"
+
+echo "2.1. Default number of lines is 1"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "1" || fail "default number of lines is not 1"
+remove_chip chip
+
+echo "2.2. Number of lines can be specified"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+enable_chip chip
+test "`get_chip_num_lines chip bank`" = "16" || fail "number of lines is not 16"
+remove_chip chip
+
+echo "2.3. Label can be set"
+create_chip chip
+create_bank chip bank
+set_label chip bank foobar
+enable_chip chip
+test "`get_chip_label chip bank`" = "foobar" || fail "label is incorrect"
+remove_chip chip
+
+echo "2.4. Label can be left empty"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+test -z "`cat $CONFIGFS_DIR/chip/bank/label`" || fail "label is not empty"
+remove_chip chip
+
+echo "2.5. Line names can be configured"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 16
+set_line_name chip bank 0 foo
+set_line_name chip bank 2 bar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "foo" || fail "line name is incorrect"
+test "`get_line_name chip bank 2`" = "bar" || fail "line name is incorrect"
+remove_chip chip
+
+echo "2.6. Line config can remain unused if offset is greater than number of lines"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 2
+set_line_name chip bank 5 foobar
+enable_chip chip
+test "`get_line_name chip bank 0`" = "" || fail "line name is incorrect"
+test "`get_line_name chip bank 1`" = "" || fail "line name is incorrect"
+remove_chip chip
+
+echo "2.7. Line configfs directory names are sanitized"
+create_chip chip
+create_bank chip bank
+mkdir $CONFIGFS_DIR/chip/bank/line12foobar 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+mkdir $CONFIGFS_DIR/chip/bank/line_no_offset 2> /dev/null && \
+ fail "invalid configfs line name accepted"
+remove_chip chip
+
+echo "2.8. Multiple chips can be created"
+CHIPS="chip0 chip1 chip2"
+for CHIP in $CHIPS; do
+ create_chip $CHIP
+ create_bank $CHIP bank
+ enable_chip $CHIP
+done
+for CHIP in $CHIPS; do
+ remove_chip $CHIP
+done
+
+echo "2.9. Can't modify settings when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+echo foobar > $CONFIGFS_DIR/chip/bank/label 2> /dev/null && \
+ fail "Setting label of a live chip should fail"
+echo 8 > $CONFIGFS_DIR/chip/bank/num_lines 2> /dev/null && \
+ fail "Setting number of lines of a live chip should fail"
+remove_chip chip
+
+echo "2.10. Can't create line items when chip is live"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+mkdir $CONFIGFS_DIR/chip/bank/line0 2> /dev/null && fail "Creating line item should fail"
+remove_chip chip
+
+echo "2.11. Probe errors are propagated to user-space"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 99999
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Probe error was not propagated"
+remove_chip chip
+
+echo "2.12. Cannot enable a chip without any GPIO banks"
+create_chip chip
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Chip enabled without any GPIO banks"
+remove_chip chip
+
+echo "2.13. Duplicate chip labels are not allowed"
+create_chip chip
+create_bank chip bank0
+set_label chip bank0 foobar
+create_bank chip bank1
+set_label chip bank1 foobar
+echo 1 > $CONFIGFS_DIR/chip/live 2> /dev/null && fail "Duplicate chip labels were not rejected"
+remove_chip chip
+
+echo "2.14. Lines can be hogged"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+mkdir -p $CONFIGFS_DIR/chip/bank/line4/hog
+enable_chip chip
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 4 2> /dev/null && \
+ fail "Setting the value of a hogged line shouldn't succeed"
+remove_chip chip
+
+echo "3. Controlling simulated chips"
+
+echo "3.1. Pull can be set over sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+sysfs_set_pull chip bank 0 pull-up
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 0
+test "$?" = "1" || fail "pull set incorrectly"
+sysfs_set_pull chip bank 0 pull-down
+$BASE_DIR/gpio-mockup-cdev /dev/`configfs_chip_name chip bank` 1
+test "$?" = "0" || fail "pull set incorrectly"
+remove_chip chip
+
+echo "3.2. Pull can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH=/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull
+test `cat $SYSFS_PATH` = "pull-down" || fail "reading the pull failed"
+sysfs_set_pull chip bank 0 pull-up
+test `cat $SYSFS_PATH` = "pull-up" || fail "reading the pull failed"
+remove_chip chip
+
+echo "3.3. Incorrect input in sysfs is rejected"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/pull"
+echo foobar > $SYSFS_PATH 2> /dev/null && fail "invalid input not detected"
+remove_chip chip
+
+echo "3.4. Can't write to value"
+create_chip chip
+create_bank chip bank
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+echo 1 > $SYSFS_PATH 2> /dev/null && fail "writing to 'value' succeeded unexpectedly"
+remove_chip chip
+
+echo "4. Simulated GPIO chips are functional"
+
+echo "4.1. Values can be read from sysfs"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+test `cat $SYSFS_PATH` = "0" || fail "incorrect value read from sysfs"
+$BASE_DIR/gpio-mockup-cdev -s 1 /dev/`configfs_chip_name chip bank` 0 &
+sleep 0.1 # FIXME Any better way?
+test `cat $SYSFS_PATH` = "1" || fail "incorrect value read from sysfs"
+kill $!
+remove_chip chip
+
+echo "4.2. Bias settings work correctly"
+create_chip chip
+create_bank chip bank
+set_num_lines chip bank 8
+enable_chip chip
+DEVNAME=`configfs_dev_name chip`
+CHIPNAME=`configfs_chip_name chip bank`
+SYSFS_PATH="/sys/devices/platform/$DEVNAME/$CHIPNAME/sim_gpio0/value"
+$BASE_DIR/gpio-mockup-cdev -b pull-up /dev/`configfs_chip_name chip bank` 0
+test `cat $SYSFS_PATH` = "1" || fail "bias setting does not work"
+remove_chip chip
+
+echo "GPIO $MODULE test PASS"
diff --git a/tools/testing/selftests/hid/.gitignore b/tools/testing/selftests/hid/.gitignore
new file mode 100644
index 000000000000..995af0670f69
--- /dev/null
+++ b/tools/testing/selftests/hid/.gitignore
@@ -0,0 +1,5 @@
+bpftool
+*.skel.h
+/tools
+hid_bpf
+results
diff --git a/tools/testing/selftests/hid/Makefile b/tools/testing/selftests/hid/Makefile
new file mode 100644
index 000000000000..2b5ea18bde38
--- /dev/null
+++ b/tools/testing/selftests/hid/Makefile
@@ -0,0 +1,241 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# based on tools/testing/selftest/bpf/Makefile
+include ../../../build/Build.include
+include ../../../scripts/Makefile.arch
+include ../../../scripts/Makefile.include
+
+TEST_PROGS := hid-core.sh
+TEST_PROGS += hid-apple.sh
+TEST_PROGS += hid-gamepad.sh
+TEST_PROGS += hid-ite.sh
+TEST_PROGS += hid-keyboard.sh
+TEST_PROGS += hid-mouse.sh
+TEST_PROGS += hid-multitouch.sh
+TEST_PROGS += hid-sony.sh
+TEST_PROGS += hid-tablet.sh
+TEST_PROGS += hid-usb_crash.sh
+TEST_PROGS += hid-wacom.sh
+
+CXX ?= $(CROSS_COMPILE)g++
+
+HOSTPKG_CONFIG := pkg-config
+
+CFLAGS += -g -O0 -rdynamic -Wall -Werror -I$(OUTPUT)
+CFLAGS += -I$(OUTPUT)/tools/include
+
+LDLIBS += -lelf -lz -lrt -lpthread
+
+# Silence some warnings when compiled with clang
+ifneq ($(LLVM),)
+CFLAGS += -Wno-unused-command-line-argument
+endif
+
+# Order correspond to 'make run_tests' order
+TEST_GEN_PROGS = hid_bpf
+
+# Emit succinct information message describing current building step
+# $1 - generic step name (e.g., CC, LINK, etc);
+# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor];
+# $3 - target (assumed to be file); only file name will be emitted;
+# $4 - optional extra arg, emitted as-is, if provided.
+ifeq ($(V),1)
+Q =
+msg =
+else
+Q = @
+msg = @printf ' %-8s%s %s%s\n' "$(1)" "$(if $(2), [$(2)])" "$(notdir $(3))" "$(if $(4), $(4))";
+MAKEFLAGS += --no-print-directory
+submake_extras := feature_display=0
+endif
+
+# override lib.mk's default rules
+OVERRIDE_TARGETS := 1
+override define CLEAN
+ $(call msg,CLEAN)
+ $(Q)$(RM) -r $(TEST_GEN_PROGS)
+ $(Q)$(RM) -r $(EXTRA_CLEAN)
+endef
+
+include ../lib.mk
+
+TOOLSDIR := $(top_srcdir)/tools
+LIBDIR := $(TOOLSDIR)/lib
+BPFDIR := $(LIBDIR)/bpf
+TOOLSINCDIR := $(TOOLSDIR)/include
+BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+INCLUDE_DIR := $(SCRATCH_DIR)/include
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+ifneq ($(CROSS_COMPILE),)
+HOST_BUILD_DIR := $(BUILD_DIR)/host
+HOST_SCRATCH_DIR := $(OUTPUT)/host-tools
+HOST_INCLUDE_DIR := $(HOST_SCRATCH_DIR)/include
+else
+HOST_BUILD_DIR := $(BUILD_DIR)
+HOST_SCRATCH_DIR := $(SCRATCH_DIR)
+HOST_INCLUDE_DIR := $(INCLUDE_DIR)
+endif
+HOST_BPFOBJ := $(HOST_BUILD_DIR)/libbpf/libbpf.a
+RESOLVE_BTFIDS := $(HOST_BUILD_DIR)/resolve_btfids/resolve_btfids
+
+VMLINUX_BTF_PATHS ?= $(if $(O),$(O)/vmlinux) \
+ $(if $(KBUILD_OUTPUT),$(KBUILD_OUTPUT)/vmlinux) \
+ ../../../../vmlinux \
+ /sys/kernel/btf/vmlinux \
+ /boot/vmlinux-$(shell uname -r)
+VMLINUX_BTF ?= $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
+ifeq ($(VMLINUX_BTF),)
+$(error Cannot find a vmlinux for VMLINUX_BTF at any of "$(VMLINUX_BTF_PATHS)")
+endif
+
+# Define simple and short `make test_progs`, `make test_sysctl`, etc targets
+# to build individual tests.
+# NOTE: Semicolon at the end is critical to override lib.mk's default static
+# rule for binaries.
+$(notdir $(TEST_GEN_PROGS)): %: $(OUTPUT)/% ;
+
+# sort removes libbpf duplicates when not cross-building
+MAKE_DIRS := $(sort $(BUILD_DIR)/libbpf $(HOST_BUILD_DIR)/libbpf \
+ $(HOST_BUILD_DIR)/bpftool $(HOST_BUILD_DIR)/resolve_btfids \
+ $(INCLUDE_DIR))
+$(MAKE_DIRS):
+ $(call msg,MKDIR,,$@)
+ $(Q)mkdir -p $@
+
+# LLVM's ld.lld doesn't support all the architectures, so use it only on x86
+ifeq ($(SRCARCH),x86)
+LLD := lld
+else
+LLD := ld
+endif
+
+DEFAULT_BPFTOOL := $(HOST_SCRATCH_DIR)/sbin/bpftool
+
+TEST_GEN_PROGS_EXTENDED += $(DEFAULT_BPFTOOL)
+
+$(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(BPFOBJ)
+
+BPFTOOL ?= $(DEFAULT_BPFTOOL)
+$(DEFAULT_BPFTOOL): $(wildcard $(BPFTOOLDIR)/*.[ch] $(BPFTOOLDIR)/Makefile) \
+ $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/bpftool
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFTOOLDIR) \
+ ARCH= CROSS_COMPILE= CC=$(HOSTCC) LD=$(HOSTLD) \
+ EXTRA_CFLAGS='-g -O0' \
+ OUTPUT=$(HOST_BUILD_DIR)/bpftool/ \
+ LIBBPF_OUTPUT=$(HOST_BUILD_DIR)/libbpf/ \
+ LIBBPF_DESTDIR=$(HOST_SCRATCH_DIR)/ \
+ prefix= DESTDIR=$(HOST_SCRATCH_DIR)/ install-bin
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ | $(BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+ifneq ($(BPFOBJ),$(HOST_BPFOBJ))
+$(HOST_BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ | $(HOST_BUILD_DIR)/libbpf
+ $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) \
+ EXTRA_CFLAGS='-g -O0' ARCH= CROSS_COMPILE= \
+ OUTPUT=$(HOST_BUILD_DIR)/libbpf/ CC=$(HOSTCC) LD=$(HOSTLD) \
+ DESTDIR=$(HOST_SCRATCH_DIR)/ prefix= all install_headers
+endif
+
+$(INCLUDE_DIR)/vmlinux.h: $(VMLINUX_BTF) $(BPFTOOL) | $(INCLUDE_DIR)
+ifeq ($(VMLINUX_H),)
+ $(call msg,GEN,,$@)
+ $(Q)$(BPFTOOL) btf dump file $(VMLINUX_BTF) format c > $@
+else
+ $(call msg,CP,,$@)
+ $(Q)cp "$(VMLINUX_H)" $@
+endif
+
+$(RESOLVE_BTFIDS): $(HOST_BPFOBJ) | $(HOST_BUILD_DIR)/resolve_btfids \
+ $(TOOLSDIR)/bpf/resolve_btfids/main.c \
+ $(TOOLSDIR)/lib/rbtree.c \
+ $(TOOLSDIR)/lib/zalloc.c \
+ $(TOOLSDIR)/lib/string.c \
+ $(TOOLSDIR)/lib/ctype.c \
+ $(TOOLSDIR)/lib/str_error_r.c
+ $(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/resolve_btfids \
+ CC=$(HOSTCC) LD=$(HOSTLD) AR=$(HOSTAR) \
+ LIBBPF_INCLUDE=$(HOST_INCLUDE_DIR) \
+ OUTPUT=$(HOST_BUILD_DIR)/resolve_btfids/ BPFOBJ=$(HOST_BPFOBJ)
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+# Determine target endianness.
+IS_LITTLE_ENDIAN = $(shell $(CC) -dM -E - </dev/null | \
+ grep 'define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__')
+MENDIAN=$(if $(IS_LITTLE_ENDIAN),-mlittle-endian,-mbig-endian)
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG))
+BPF_CFLAGS = -g -Werror -D__TARGET_ARCH_$(SRCARCH) $(MENDIAN) \
+ -I$(INCLUDE_DIR)
+
+CLANG_CFLAGS = $(CLANG_SYS_INCLUDES) \
+ -Wno-compare-distinct-pointer-types
+
+# Build BPF object using Clang
+# $1 - input .c file
+# $2 - output .o file
+# $3 - CFLAGS
+define CLANG_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v3 -o $2
+endef
+# Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32
+define CLANG_NOALU32_BPF_BUILD_RULE
+ $(call msg,CLNG-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(CLANG) $3 -O2 --target=bpf -c $1 -mcpu=v2 -o $2
+endef
+# Build BPF object using GCC
+define GCC_BPF_BUILD_RULE
+ $(call msg,GCC-BPF,$(TRUNNER_BINARY),$2)
+ $(Q)$(BPF_GCC) $3 -O2 -c $1 -o $2
+endef
+
+BPF_PROGS_DIR := progs
+BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
+BPF_SRCS := $(notdir $(wildcard $(BPF_PROGS_DIR)/*.c))
+BPF_OBJS := $(patsubst %.c,$(OUTPUT)/%.bpf.o, $(BPF_SRCS))
+BPF_SKELS := $(patsubst %.c,$(OUTPUT)/%.skel.h, $(BPF_SRCS))
+TEST_GEN_FILES += $(BPF_OBJS)
+
+$(BPF_PROGS_DIR)-bpfobjs := y
+$(BPF_OBJS): $(OUTPUT)/%.bpf.o: \
+ $(BPF_PROGS_DIR)/%.c \
+ $(wildcard $(BPF_PROGS_DIR)/*.h) \
+ $(INCLUDE_DIR)/vmlinux.h \
+ $(wildcard $(BPFDIR)/hid_bpf_*.h) \
+ $(wildcard $(BPFDIR)/*.bpf.h) \
+ | $(OUTPUT) $(BPFOBJ)
+ $(call $(BPF_BUILD_RULE),$<,$@, $(BPF_CFLAGS))
+
+$(BPF_SKELS): %.skel.h: %.bpf.o $(BPFTOOL) | $(OUTPUT)
+ $(call msg,GEN-SKEL,$(BINARY),$@)
+ $(Q)$(BPFTOOL) gen object $(<:.o=.linked1.o) $<
+ $(Q)$(BPFTOOL) gen skeleton $(<:.o=.linked1.o) name $(notdir $(<:.bpf.o=)) > $@
+
+$(OUTPUT)/%.o: %.c $(BPF_SKELS)
+ $(call msg,CC,,$@)
+ $(Q)$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+
+$(OUTPUT)/%: $(OUTPUT)/%.o
+ $(call msg,BINARY,,$@)
+ $(Q)$(LINK.c) $^ $(LDLIBS) -o $@
+
+EXTRA_CLEAN := $(SCRATCH_DIR) $(HOST_SCRATCH_DIR) feature bpftool \
+ $(addprefix $(OUTPUT)/,*.o *.skel.h no_alu32)
diff --git a/tools/testing/selftests/hid/config b/tools/testing/selftests/hid/config
new file mode 100644
index 000000000000..1758b055f295
--- /dev/null
+++ b/tools/testing/selftests/hid/config
@@ -0,0 +1,32 @@
+CONFIG_BPF_EVENTS=y
+CONFIG_BPF_JIT_ALWAYS_ON=y
+CONFIG_BPF_JIT=y
+CONFIG_BPF_KPROBE_OVERRIDE=y
+CONFIG_BPF_LSM=y
+CONFIG_BPF_PRELOAD_UMD=y
+CONFIG_BPF_PRELOAD=y
+CONFIG_BPF_STREAM_PARSER=y
+CONFIG_BPF_SYSCALL=y
+CONFIG_BPF=y
+CONFIG_CGROUP_BPF=y
+CONFIG_DEBUG_INFO_BTF=y
+CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS=y
+CONFIG_FPROBE=y
+CONFIG_FTRACE_SYSCALLS=y
+CONFIG_FUNCTION_TRACER=y
+CONFIG_HIDRAW=y
+CONFIG_HID=y
+CONFIG_HID_BPF=y
+CONFIG_INPUT_EVDEV=y
+CONFIG_UHID=y
+CONFIG_LEDS_CLASS_MULTICOLOR=y
+CONFIG_USB=y
+CONFIG_USB_HID=y
+CONFIG_HID_APPLE=y
+CONFIG_HID_ITE=y
+CONFIG_HID_MULTITOUCH=y
+CONFIG_HID_PLAYSTATION=y
+CONFIG_PLAYSTATION_FF=y
+CONFIG_HID_SONY=y
+CONFIG_SONY_FF=y
+CONFIG_HID_WACOM=y
diff --git a/tools/testing/selftests/hid/config.common b/tools/testing/selftests/hid/config.common
new file mode 100644
index 000000000000..0f456dbab62f
--- /dev/null
+++ b/tools/testing/selftests/hid/config.common
@@ -0,0 +1,240 @@
+CONFIG_9P_FS_POSIX_ACL=y
+CONFIG_9P_FS_SECURITY=y
+CONFIG_9P_FS=y
+CONFIG_AUDIT=y
+CONFIG_BINFMT_MISC=y
+CONFIG_BLK_CGROUP_IOLATENCY=y
+CONFIG_BLK_CGROUP=y
+CONFIG_BLK_DEV_BSGLIB=y
+CONFIG_BLK_DEV_IO_TRACE=y
+CONFIG_BLK_DEV_RAM_SIZE=16384
+CONFIG_BLK_DEV_RAM=y
+CONFIG_BLK_DEV_THROTTLING=y
+CONFIG_BONDING=y
+CONFIG_BOOTPARAM_HARDLOCKUP_PANIC=y
+CONFIG_BOOTTIME_TRACING=y
+CONFIG_BSD_DISKLABEL=y
+CONFIG_BSD_PROCESS_ACCT=y
+CONFIG_CFS_BANDWIDTH=y
+CONFIG_CGROUP_CPUACCT=y
+CONFIG_CGROUP_DEBUG=y
+CONFIG_CGROUP_DEVICE=y
+CONFIG_CGROUP_FREEZER=y
+CONFIG_CGROUP_HUGETLB=y
+CONFIG_CGROUP_NET_CLASSID=y
+CONFIG_CGROUP_NET_PRIO=y
+CONFIG_CGROUP_PERF=y
+CONFIG_CGROUP_PIDS=y
+CONFIG_CGROUP_RDMA=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_WRITEBACK=y
+CONFIG_CMA_AREAS=7
+CONFIG_CMA=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CPU_FREQ_DEFAULT_GOV_PERFORMANCE=y
+CONFIG_CPU_FREQ_GOV_CONSERVATIVE=y
+CONFIG_CPU_FREQ_GOV_ONDEMAND=y
+CONFIG_CPU_FREQ_GOV_USERSPACE=y
+CONFIG_CPU_FREQ_STAT=y
+CONFIG_CPU_IDLE_GOV_LADDER=y
+CONFIG_CPUSETS=y
+CONFIG_CRC_T10DIF=y
+CONFIG_CRYPTO_BLAKE2B=y
+CONFIG_CRYPTO_DEV_VIRTIO=y
+CONFIG_CRYPTO_SEQIV=y
+CONFIG_CRYPTO_XXHASH=y
+CONFIG_DCB=y
+CONFIG_DEBUG_ATOMIC_SLEEP=y
+CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y
+CONFIG_DEBUG_MEMORY_INIT=y
+CONFIG_DEFAULT_FQ_CODEL=y
+CONFIG_DEFAULT_RENO=y
+CONFIG_DEFAULT_SECURITY_DAC=y
+CONFIG_DEVTMPFS_MOUNT=y
+CONFIG_DEVTMPFS=y
+CONFIG_DMA_CMA=y
+CONFIG_DNS_RESOLVER=y
+CONFIG_EFI_STUB=y
+CONFIG_EFI=y
+CONFIG_EXPERT=y
+CONFIG_EXT4_FS_POSIX_ACL=y
+CONFIG_EXT4_FS_SECURITY=y
+CONFIG_EXT4_FS=y
+CONFIG_FAIL_FUNCTION=y
+CONFIG_FAULT_INJECTION_DEBUG_FS=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_FB_MODE_HELPERS=y
+CONFIG_FB_TILEBLITTING=y
+CONFIG_FB_VESA=y
+CONFIG_FB=y
+CONFIG_FONT_8x16=y
+CONFIG_FONT_MINI_4x6=y
+CONFIG_FONTS=y
+CONFIG_FRAMEBUFFER_CONSOLE_DETECT_PRIMARY=y
+CONFIG_FRAMEBUFFER_CONSOLE_ROTATION=y
+CONFIG_FRAMEBUFFER_CONSOLE=y
+CONFIG_FUSE_FS=y
+CONFIG_FW_LOADER_USER_HELPER=y
+CONFIG_GART_IOMMU=y
+CONFIG_GENERIC_PHY=y
+CONFIG_HARDLOCKUP_DETECTOR=y
+CONFIG_HIGH_RES_TIMERS=y
+CONFIG_HPET=y
+CONFIG_HUGETLBFS=y
+CONFIG_HUGETLB_PAGE=y
+CONFIG_HWPOISON_INJECT=y
+CONFIG_HZ_1000=y
+CONFIG_INET=y
+CONFIG_INTEL_POWERCLAMP=y
+CONFIG_IP6_NF_FILTER=y
+CONFIG_IP6_NF_IPTABLES=y
+CONFIG_IP6_NF_NAT=y
+CONFIG_IP6_NF_TARGET_MASQUERADE=y
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MULTICAST=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_IPTABLES=y
+CONFIG_IP_NF_NAT=y
+CONFIG_IP_NF_TARGET_MASQUERADE=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_IP_ROUTE_MULTIPATH=y
+CONFIG_IP_ROUTE_VERBOSE=y
+CONFIG_IPV6_MIP6=y
+CONFIG_IPV6_ROUTE_INFO=y
+CONFIG_IPV6_ROUTER_PREF=y
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_IPV6_SUBTREES=y
+CONFIG_IRQ_POLL=y
+CONFIG_JUMP_LABEL=y
+CONFIG_KARMA_PARTITION=y
+CONFIG_KEXEC=y
+CONFIG_KPROBES=y
+CONFIG_KSM=y
+CONFIG_LEGACY_VSYSCALL_NONE=y
+CONFIG_LOG_BUF_SHIFT=21
+CONFIG_LOG_CPU_MAX_BUF_SHIFT=0
+CONFIG_LOGO=y
+CONFIG_LSM="selinux,bpf,integrity"
+CONFIG_MAC_PARTITION=y
+CONFIG_MAGIC_SYSRQ=y
+CONFIG_MCORE2=y
+CONFIG_MEMCG=y
+CONFIG_MEMORY_FAILURE=y
+CONFIG_MINIX_SUBPARTITION=y
+CONFIG_MODULES=y
+CONFIG_NAMESPACES=y
+CONFIG_NET_9P_VIRTIO=y
+CONFIG_NET_9P=y
+CONFIG_NET_ACT_BPF=y
+CONFIG_NET_CLS_CGROUP=y
+CONFIG_NETDEVICES=y
+CONFIG_NET_EMATCH=y
+CONFIG_NETFILTER_NETLINK_LOG=y
+CONFIG_NETFILTER_NETLINK_QUEUE=y
+CONFIG_NETFILTER_XTABLES=y
+CONFIG_NETFILTER_XT_MATCH_ADDRTYPE=y
+CONFIG_NETFILTER_XT_MATCH_BPF=y
+CONFIG_NETFILTER_XT_MATCH_COMMENT=y
+CONFIG_NETFILTER_XT_MATCH_CONNTRACK=y
+CONFIG_NETFILTER_XT_MATCH_MARK=y
+CONFIG_NETFILTER_XT_MATCH_MULTIPORT=y
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=y
+CONFIG_NETFILTER_XT_NAT=y
+CONFIG_NETFILTER_XT_TARGET_MASQUERADE=y
+CONFIG_NET_IPGRE_BROADCAST=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NETLABEL=y
+CONFIG_NET_SCH_DEFAULT=y
+CONFIG_NET_SCHED=y
+CONFIG_NET_SCH_FQ_CODEL=y
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_VRF=y
+CONFIG_NET=y
+CONFIG_NF_CONNTRACK=y
+CONFIG_NF_NAT_MASQUERADE=y
+CONFIG_NF_NAT=y
+CONFIG_NLS_ASCII=y
+CONFIG_NLS_CODEPAGE_437=y
+CONFIG_NLS_DEFAULT="utf8"
+CONFIG_NO_HZ=y
+CONFIG_NR_CPUS=128
+CONFIG_NUMA_BALANCING=y
+CONFIG_NUMA=y
+CONFIG_NVMEM=y
+CONFIG_OSF_PARTITION=y
+CONFIG_OVERLAY_FS_INDEX=y
+CONFIG_OVERLAY_FS_METACOPY=y
+CONFIG_OVERLAY_FS_XINO_AUTO=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PACKET=y
+CONFIG_PANIC_ON_OOPS=y
+CONFIG_PARTITION_ADVANCED=y
+CONFIG_PCIEPORTBUS=y
+CONFIG_PCI_IOV=y
+CONFIG_PCI_MSI=y
+CONFIG_PCI=y
+CONFIG_PHYSICAL_ALIGN=0x1000000
+CONFIG_POSIX_MQUEUE=y
+CONFIG_POWER_SUPPLY=y
+CONFIG_PREEMPT=y
+CONFIG_PRINTK_TIME=y
+CONFIG_PROC_KCORE=y
+CONFIG_PROFILING=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_PTP_1588_CLOCK=y
+CONFIG_RC_DEVICES=y
+CONFIG_RC_LOOPBACK=y
+CONFIG_RCU_CPU_STALL_TIMEOUT=60
+CONFIG_SCHED_STACK_END_CHECK=y
+CONFIG_SCHEDSTATS=y
+CONFIG_SECURITY_NETWORK=y
+CONFIG_SECURITY_SELINUX=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_8250_DETECT_IRQ=y
+CONFIG_SERIAL_8250_EXTENDED=y
+CONFIG_SERIAL_8250_MANY_PORTS=y
+CONFIG_SERIAL_8250_NR_UARTS=32
+CONFIG_SERIAL_8250_RSA=y
+CONFIG_SERIAL_8250_SHARE_IRQ=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_NONSTANDARD=y
+CONFIG_SERIO_LIBPS2=y
+CONFIG_SGI_PARTITION=y
+CONFIG_SMP=y
+CONFIG_SOCK_CGROUP_DATA=y
+CONFIG_SOLARIS_X86_PARTITION=y
+CONFIG_SUN_PARTITION=y
+CONFIG_SYNC_FILE=y
+CONFIG_SYSVIPC=y
+CONFIG_TASK_DELAY_ACCT=y
+CONFIG_TASK_IO_ACCOUNTING=y
+CONFIG_TASKSTATS=y
+CONFIG_TASK_XACCT=y
+CONFIG_TCP_CONG_ADVANCED=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_TLS=y
+CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS=y
+CONFIG_TRANSPARENT_HUGEPAGE_MADVISE=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_TUN=y
+CONFIG_UNIXWARE_DISKLABEL=y
+CONFIG_UNIX=y
+CONFIG_USER_NS=y
+CONFIG_VALIDATE_FS_PARSER=y
+CONFIG_VETH=y
+CONFIG_VIRT_DRIVERS=y
+CONFIG_VIRTIO_BALLOON=y
+CONFIG_VIRTIO_BLK=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_FS=y
+CONFIG_VIRTIO_NET=y
+CONFIG_VIRTIO_PCI=y
+CONFIG_VLAN_8021Q=y
+CONFIG_XFRM_SUB_POLICY=y
+CONFIG_XFRM_USER=y
+CONFIG_ZEROPLUS_FF=y
diff --git a/tools/testing/selftests/hid/config.x86_64 b/tools/testing/selftests/hid/config.x86_64
new file mode 100644
index 000000000000..a8721f403c21
--- /dev/null
+++ b/tools/testing/selftests/hid/config.x86_64
@@ -0,0 +1,4 @@
+CONFIG_X86_ACPI_CPUFREQ=y
+CONFIG_X86_CPUID=y
+CONFIG_X86_MSR=y
+CONFIG_X86_POWERNOW_K8=y
diff --git a/tools/testing/selftests/hid/hid-apple.sh b/tools/testing/selftests/hid/hid-apple.sh
new file mode 100755
index 000000000000..656f2d5ae5a9
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-apple.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_apple_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-core.sh b/tools/testing/selftests/hid/hid-core.sh
new file mode 100755
index 000000000000..5bbabc12c34f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-core.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_hid_core.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-gamepad.sh b/tools/testing/selftests/hid/hid-gamepad.sh
new file mode 100755
index 000000000000..1ba00c0ca95f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-gamepad.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_gamepad.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-ite.sh b/tools/testing/selftests/hid/hid-ite.sh
new file mode 100755
index 000000000000..52c5ccf42292
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-ite.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_ite_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-keyboard.sh b/tools/testing/selftests/hid/hid-keyboard.sh
new file mode 100755
index 000000000000..55368f17d1d5
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-keyboard.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_keyboard.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-mouse.sh b/tools/testing/selftests/hid/hid-mouse.sh
new file mode 100755
index 000000000000..7b4ad4f646f7
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-mouse.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_mouse.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-multitouch.sh b/tools/testing/selftests/hid/hid-multitouch.sh
new file mode 100755
index 000000000000..d03a1ddbfb1f
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-multitouch.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_multitouch.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-sony.sh b/tools/testing/selftests/hid/hid-sony.sh
new file mode 100755
index 000000000000..c863c442686e
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-sony.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_sony.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-tablet.sh b/tools/testing/selftests/hid/hid-tablet.sh
new file mode 100755
index 000000000000..e86b3fedafd9
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-tablet.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_tablet.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-usb_crash.sh b/tools/testing/selftests/hid/hid-usb_crash.sh
new file mode 100755
index 000000000000..3f0debe7e8fd
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-usb_crash.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_usb_crash.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid-wacom.sh b/tools/testing/selftests/hid/hid-wacom.sh
new file mode 100755
index 000000000000..1630c22726d2
--- /dev/null
+++ b/tools/testing/selftests/hid/hid-wacom.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+export TARGET=test_wacom_generic.py
+
+bash ./run-hid-tools-tests.sh
diff --git a/tools/testing/selftests/hid/hid_bpf.c b/tools/testing/selftests/hid/hid_bpf.c
new file mode 100644
index 000000000000..2cf96f818f25
--- /dev/null
+++ b/tools/testing/selftests/hid/hid_bpf.c
@@ -0,0 +1,869 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red Hat */
+#include "hid.skel.h"
+
+#include "../kselftest_harness.h"
+
+#include <bpf/bpf.h>
+#include <fcntl.h>
+#include <fnmatch.h>
+#include <dirent.h>
+#include <poll.h>
+#include <pthread.h>
+#include <stdbool.h>
+#include <linux/hidraw.h>
+#include <linux/uhid.h>
+
+#define SHOW_UHID_DEBUG 0
+
+static unsigned char rdesc[] = {
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x09, 0x21, /* Usage (Vendor Usage 0x21) */
+ 0xa1, 0x01, /* COLLECTION (Application) */
+ 0x09, 0x01, /* Usage (Vendor Usage 0x01) */
+ 0xa1, 0x00, /* COLLECTION (Physical) */
+ 0x85, 0x02, /* REPORT_ID (2) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0xff, /* LOGICAL_MAXIMUM (255) */
+ 0x95, 0x08, /* REPORT_COUNT (8) */
+ 0x75, 0x08, /* REPORT_SIZE (8) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0xc0, /* END_COLLECTION */
+ 0x09, 0x01, /* Usage (Vendor Usage 0x01) */
+ 0xa1, 0x00, /* COLLECTION (Physical) */
+ 0x85, 0x01, /* REPORT_ID (1) */
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x81, 0x02, /* INPUT (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x81, 0x01, /* INPUT (Cnst,Var,Abs) */
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x30, /* USAGE (X) */
+ 0x09, 0x31, /* USAGE (Y) */
+ 0x15, 0x81, /* LOGICAL_MINIMUM (-127) */
+ 0x25, 0x7f, /* LOGICAL_MAXIMUM (127) */
+ 0x75, 0x10, /* REPORT_SIZE (16) */
+ 0x95, 0x02, /* REPORT_COUNT (2) */
+ 0x81, 0x06, /* INPUT (Data,Var,Rel) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x91, 0x02, /* Output (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x06, /* USAGE_MINIMUM (6) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (8) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0xb1, 0x02, /* Feature (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0xc0, /* END_COLLECTION */
+ 0xc0, /* END_COLLECTION */
+};
+
+static __u8 feature_data[] = { 1, 2 };
+
+struct attach_prog_args {
+ int prog_fd;
+ unsigned int hid;
+ int retval;
+ int insert_head;
+};
+
+struct hid_hw_request_syscall_args {
+ __u8 data[10];
+ unsigned int hid;
+ int retval;
+ size_t size;
+ enum hid_report_type type;
+ __u8 request_type;
+};
+
+#define ASSERT_OK(data) ASSERT_FALSE(data)
+#define ASSERT_OK_PTR(ptr) ASSERT_NE(NULL, ptr)
+
+#define UHID_LOG(fmt, ...) do { \
+ if (SHOW_UHID_DEBUG) \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+} while (0)
+
+static pthread_mutex_t uhid_started_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t uhid_started = PTHREAD_COND_INITIALIZER;
+
+/* no need to protect uhid_stopped, only one thread accesses it */
+static bool uhid_stopped;
+
+static int uhid_write(struct __test_metadata *_metadata, int fd, const struct uhid_event *ev)
+{
+ ssize_t ret;
+
+ ret = write(fd, ev, sizeof(*ev));
+ if (ret < 0) {
+ TH_LOG("Cannot write to uhid: %m");
+ return -errno;
+ } else if (ret != sizeof(*ev)) {
+ TH_LOG("Wrong size written to uhid: %zd != %zu",
+ ret, sizeof(ev));
+ return -EFAULT;
+ } else {
+ return 0;
+ }
+}
+
+static int uhid_create(struct __test_metadata *_metadata, int fd, int rand_nb)
+{
+ struct uhid_event ev;
+ char buf[25];
+
+ sprintf(buf, "test-uhid-device-%d", rand_nb);
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_CREATE;
+ strcpy((char *)ev.u.create.name, buf);
+ ev.u.create.rd_data = rdesc;
+ ev.u.create.rd_size = sizeof(rdesc);
+ ev.u.create.bus = BUS_USB;
+ ev.u.create.vendor = 0x0001;
+ ev.u.create.product = 0x0a37;
+ ev.u.create.version = 0;
+ ev.u.create.country = 0;
+
+ sprintf(buf, "%d", rand_nb);
+ strcpy((char *)ev.u.create.phys, buf);
+
+ return uhid_write(_metadata, fd, &ev);
+}
+
+static void uhid_destroy(struct __test_metadata *_metadata, int fd)
+{
+ struct uhid_event ev;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_DESTROY;
+
+ uhid_write(_metadata, fd, &ev);
+}
+
+static int uhid_event(struct __test_metadata *_metadata, int fd)
+{
+ struct uhid_event ev, answer;
+ ssize_t ret;
+
+ memset(&ev, 0, sizeof(ev));
+ ret = read(fd, &ev, sizeof(ev));
+ if (ret == 0) {
+ UHID_LOG("Read HUP on uhid-cdev");
+ return -EFAULT;
+ } else if (ret < 0) {
+ UHID_LOG("Cannot read uhid-cdev: %m");
+ return -errno;
+ } else if (ret != sizeof(ev)) {
+ UHID_LOG("Invalid size read from uhid-dev: %zd != %zu",
+ ret, sizeof(ev));
+ return -EFAULT;
+ }
+
+ switch (ev.type) {
+ case UHID_START:
+ pthread_mutex_lock(&uhid_started_mtx);
+ pthread_cond_signal(&uhid_started);
+ pthread_mutex_unlock(&uhid_started_mtx);
+
+ UHID_LOG("UHID_START from uhid-dev");
+ break;
+ case UHID_STOP:
+ uhid_stopped = true;
+
+ UHID_LOG("UHID_STOP from uhid-dev");
+ break;
+ case UHID_OPEN:
+ UHID_LOG("UHID_OPEN from uhid-dev");
+ break;
+ case UHID_CLOSE:
+ UHID_LOG("UHID_CLOSE from uhid-dev");
+ break;
+ case UHID_OUTPUT:
+ UHID_LOG("UHID_OUTPUT from uhid-dev");
+ break;
+ case UHID_GET_REPORT:
+ UHID_LOG("UHID_GET_REPORT from uhid-dev");
+
+ answer.type = UHID_GET_REPORT_REPLY;
+ answer.u.get_report_reply.id = ev.u.get_report.id;
+ answer.u.get_report_reply.err = ev.u.get_report.rnum == 1 ? 0 : -EIO;
+ answer.u.get_report_reply.size = sizeof(feature_data);
+ memcpy(answer.u.get_report_reply.data, feature_data, sizeof(feature_data));
+
+ uhid_write(_metadata, fd, &answer);
+
+ break;
+ case UHID_SET_REPORT:
+ UHID_LOG("UHID_SET_REPORT from uhid-dev");
+ break;
+ default:
+ TH_LOG("Invalid event from uhid-dev: %u", ev.type);
+ }
+
+ return 0;
+}
+
+struct uhid_thread_args {
+ int fd;
+ struct __test_metadata *_metadata;
+};
+static void *uhid_read_events_thread(void *arg)
+{
+ struct uhid_thread_args *args = (struct uhid_thread_args *)arg;
+ struct __test_metadata *_metadata = args->_metadata;
+ struct pollfd pfds[1];
+ int fd = args->fd;
+ int ret = 0;
+
+ pfds[0].fd = fd;
+ pfds[0].events = POLLIN;
+
+ uhid_stopped = false;
+
+ while (!uhid_stopped) {
+ ret = poll(pfds, 1, 100);
+ if (ret < 0) {
+ TH_LOG("Cannot poll for fds: %m");
+ break;
+ }
+ if (pfds[0].revents & POLLIN) {
+ ret = uhid_event(_metadata, fd);
+ if (ret)
+ break;
+ }
+ }
+
+ return (void *)(long)ret;
+}
+
+static int uhid_start_listener(struct __test_metadata *_metadata, pthread_t *tid, int uhid_fd)
+{
+ struct uhid_thread_args args = {
+ .fd = uhid_fd,
+ ._metadata = _metadata,
+ };
+ int err;
+
+ pthread_mutex_lock(&uhid_started_mtx);
+ err = pthread_create(tid, NULL, uhid_read_events_thread, (void *)&args);
+ ASSERT_EQ(0, err) {
+ TH_LOG("Could not start the uhid thread: %d", err);
+ pthread_mutex_unlock(&uhid_started_mtx);
+ close(uhid_fd);
+ return -EIO;
+ }
+ pthread_cond_wait(&uhid_started, &uhid_started_mtx);
+ pthread_mutex_unlock(&uhid_started_mtx);
+
+ return 0;
+}
+
+static int uhid_send_event(struct __test_metadata *_metadata, int fd, __u8 *buf, size_t size)
+{
+ struct uhid_event ev;
+
+ if (size > sizeof(ev.u.input.data))
+ return -E2BIG;
+
+ memset(&ev, 0, sizeof(ev));
+ ev.type = UHID_INPUT2;
+ ev.u.input2.size = size;
+
+ memcpy(ev.u.input2.data, buf, size);
+
+ return uhid_write(_metadata, fd, &ev);
+}
+
+static int setup_uhid(struct __test_metadata *_metadata, int rand_nb)
+{
+ int fd;
+ const char *path = "/dev/uhid";
+ int ret;
+
+ fd = open(path, O_RDWR | O_CLOEXEC);
+ ASSERT_GE(fd, 0) TH_LOG("open uhid-cdev failed; %d", fd);
+
+ ret = uhid_create(_metadata, fd, rand_nb);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("create uhid device failed: %d", ret);
+ close(fd);
+ }
+
+ return fd;
+}
+
+static bool match_sysfs_device(int dev_id, const char *workdir, struct dirent *dir)
+{
+ const char *target = "0003:0001:0A37.*";
+ char phys[512];
+ char uevent[1024];
+ char temp[512];
+ int fd, nread;
+ bool found = false;
+
+ if (fnmatch(target, dir->d_name, 0))
+ return false;
+
+ /* we found the correct VID/PID, now check for phys */
+ sprintf(uevent, "%s/%s/uevent", workdir, dir->d_name);
+
+ fd = open(uevent, O_RDONLY | O_NONBLOCK);
+ if (fd < 0)
+ return false;
+
+ sprintf(phys, "PHYS=%d", dev_id);
+
+ nread = read(fd, temp, ARRAY_SIZE(temp));
+ if (nread > 0 && (strstr(temp, phys)) != NULL)
+ found = true;
+
+ close(fd);
+
+ return found;
+}
+
+static int get_hid_id(int dev_id)
+{
+ const char *workdir = "/sys/devices/virtual/misc/uhid";
+ const char *str_id;
+ DIR *d;
+ struct dirent *dir;
+ int found = -1, attempts = 3;
+
+ /* it would be nice to be able to use nftw, but the no_alu32 target doesn't support it */
+
+ while (found < 0 && attempts > 0) {
+ attempts--;
+ d = opendir(workdir);
+ if (d) {
+ while ((dir = readdir(d)) != NULL) {
+ if (!match_sysfs_device(dev_id, workdir, dir))
+ continue;
+
+ str_id = dir->d_name + sizeof("0003:0001:0A37.");
+ found = (int)strtol(str_id, NULL, 16);
+
+ break;
+ }
+ closedir(d);
+ }
+ if (found < 0)
+ usleep(100000);
+ }
+
+ return found;
+}
+
+static int get_hidraw(int dev_id)
+{
+ const char *workdir = "/sys/devices/virtual/misc/uhid";
+ char sysfs[1024];
+ DIR *d, *subd;
+ struct dirent *dir, *subdir;
+ int i, found = -1;
+
+ /* retry 5 times in case the system is loaded */
+ for (i = 5; i > 0; i--) {
+ usleep(10);
+ d = opendir(workdir);
+
+ if (!d)
+ continue;
+
+ while ((dir = readdir(d)) != NULL) {
+ if (!match_sysfs_device(dev_id, workdir, dir))
+ continue;
+
+ sprintf(sysfs, "%s/%s/hidraw", workdir, dir->d_name);
+
+ subd = opendir(sysfs);
+ if (!subd)
+ continue;
+
+ while ((subdir = readdir(subd)) != NULL) {
+ if (fnmatch("hidraw*", subdir->d_name, 0))
+ continue;
+
+ found = atoi(subdir->d_name + strlen("hidraw"));
+ }
+
+ closedir(subd);
+
+ if (found > 0)
+ break;
+ }
+ closedir(d);
+ }
+
+ return found;
+}
+
+static int open_hidraw(int dev_id)
+{
+ int hidraw_number;
+ char hidraw_path[64] = { 0 };
+
+ hidraw_number = get_hidraw(dev_id);
+ if (hidraw_number < 0)
+ return hidraw_number;
+
+ /* open hidraw node to check the other side of the pipe */
+ sprintf(hidraw_path, "/dev/hidraw%d", hidraw_number);
+ return open(hidraw_path, O_RDWR | O_NONBLOCK);
+}
+
+FIXTURE(hid_bpf) {
+ int dev_id;
+ int uhid_fd;
+ int hidraw_fd;
+ int hid_id;
+ pthread_t tid;
+ struct hid *skel;
+ int hid_links[3]; /* max number of programs loaded in a single test */
+};
+static void detach_bpf(FIXTURE_DATA(hid_bpf) * self)
+{
+ int i;
+
+ if (self->hidraw_fd)
+ close(self->hidraw_fd);
+ self->hidraw_fd = 0;
+
+ for (i = 0; i < ARRAY_SIZE(self->hid_links); i++) {
+ if (self->hid_links[i])
+ close(self->hid_links[i]);
+ }
+
+ hid__destroy(self->skel);
+ self->skel = NULL;
+}
+
+FIXTURE_TEARDOWN(hid_bpf) {
+ void *uhid_err;
+
+ uhid_destroy(_metadata, self->uhid_fd);
+
+ detach_bpf(self);
+ pthread_join(self->tid, &uhid_err);
+}
+#define TEARDOWN_LOG(fmt, ...) do { \
+ TH_LOG(fmt, ##__VA_ARGS__); \
+ hid_bpf_teardown(_metadata, self, variant); \
+} while (0)
+
+FIXTURE_SETUP(hid_bpf)
+{
+ time_t t;
+ int err;
+
+ /* initialize random number generator */
+ srand((unsigned int)time(&t));
+
+ self->dev_id = rand() % 1024;
+
+ self->uhid_fd = setup_uhid(_metadata, self->dev_id);
+
+ /* locate the uev, self, variant);ent file of the created device */
+ self->hid_id = get_hid_id(self->dev_id);
+ ASSERT_GT(self->hid_id, 0)
+ TEARDOWN_LOG("Could not locate uhid device id: %d", self->hid_id);
+
+ err = uhid_start_listener(_metadata, &self->tid, self->uhid_fd);
+ ASSERT_EQ(0, err) TEARDOWN_LOG("could not start udev listener: %d", err);
+}
+
+struct test_program {
+ const char *name;
+ int insert_head;
+};
+#define LOAD_PROGRAMS(progs) \
+ load_programs(progs, ARRAY_SIZE(progs), _metadata, self, variant)
+#define LOAD_BPF \
+ load_programs(NULL, 0, _metadata, self, variant)
+static void load_programs(const struct test_program programs[],
+ const size_t progs_count,
+ struct __test_metadata *_metadata,
+ FIXTURE_DATA(hid_bpf) * self,
+ const FIXTURE_VARIANT(hid_bpf) * variant)
+{
+ int attach_fd, err = -EINVAL;
+ struct attach_prog_args args = {
+ .retval = -1,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattr,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+
+ ASSERT_LE(progs_count, ARRAY_SIZE(self->hid_links))
+ TH_LOG("too many programs are to be loaded");
+
+ /* open the bpf file */
+ self->skel = hid__open();
+ ASSERT_OK_PTR(self->skel) TEARDOWN_LOG("Error while calling hid__open");
+
+ for (int i = 0; i < progs_count; i++) {
+ struct bpf_program *prog;
+
+ prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj,
+ programs[i].name);
+ ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name);
+
+ bpf_program__set_autoload(prog, true);
+ }
+
+ err = hid__load(self->skel);
+ ASSERT_OK(err) TH_LOG("hid_skel_load failed: %d", err);
+
+ attach_fd = bpf_program__fd(self->skel->progs.attach_prog);
+ ASSERT_GE(attach_fd, 0) TH_LOG("locate attach_prog: %d", attach_fd);
+
+ for (int i = 0; i < progs_count; i++) {
+ struct bpf_program *prog;
+
+ prog = bpf_object__find_program_by_name(*self->skel->skeleton->obj,
+ programs[i].name);
+ ASSERT_OK_PTR(prog) TH_LOG("can not find program by name '%s'", programs[i].name);
+
+ args.prog_fd = bpf_program__fd(prog);
+ args.hid = self->hid_id;
+ args.insert_head = programs[i].insert_head;
+ err = bpf_prog_test_run_opts(attach_fd, &tattr);
+ ASSERT_GE(args.retval, 0)
+ TH_LOG("attach_hid(%s): %d", programs[i].name, args.retval);
+
+ self->hid_links[i] = args.retval;
+ }
+
+ self->hidraw_fd = open_hidraw(self->dev_id);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+}
+
+/*
+ * A simple test to see if the fixture is working fine.
+ * If this fails, none of the other tests will pass.
+ */
+TEST_F(hid_bpf, test_create_uhid)
+{
+}
+
+/*
+ * Attach hid_first_event to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, raw_event)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* check that the program is correctly loaded */
+ ASSERT_EQ(self->skel->data->callback_check, 52) TH_LOG("callback_check1");
+ ASSERT_EQ(self->skel->data->callback2_check, 52) TH_LOG("callback2_check1");
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* check that hid_first_event() was executed */
+ ASSERT_EQ(self->skel->data->callback_check, 42) TH_LOG("callback_check1");
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* check that hid_first_event() was executed */
+ ASSERT_EQ(self->skel->data->callback_check, 47) TH_LOG("callback_check1");
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[2], 52);
+}
+
+/*
+ * Ensures that we can attach/detach programs
+ */
+TEST_F(hid_bpf, test_attach_detach)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_first_event" },
+ { .name = "hid_second_event" },
+ };
+ __u8 buf[10] = {0};
+ int err, link;
+
+ LOAD_PROGRAMS(progs);
+
+ link = self->hid_links[0];
+ ASSERT_GT(link, 0) TH_LOG("HID-BPF link not created");
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+
+ /* make sure both programs are run */
+ ASSERT_EQ(buf[3], 52);
+
+ /* pin the first program and immediately unpin it */
+#define PIN_PATH "/sys/fs/bpf/hid_first_event"
+ err = bpf_obj_pin(link, PIN_PATH);
+ ASSERT_OK(err) TH_LOG("error while calling bpf_obj_pin");
+ remove(PIN_PATH);
+#undef PIN_PATH
+ usleep(100000);
+
+ /* detach the program */
+ detach_bpf(self);
+
+ self->hidraw_fd = open_hidraw(self->dev_id);
+ ASSERT_GE(self->hidraw_fd, 0) TH_LOG("open_hidraw");
+
+ /* inject another event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 47;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw_no_bpf");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[1], 47);
+ ASSERT_EQ(buf[2], 0);
+ ASSERT_EQ(buf[3], 0);
+
+ /* re-attach our program */
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ memset(buf, 0, sizeof(buf));
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 1);
+ ASSERT_EQ(buf[2], 47);
+ ASSERT_EQ(buf[3], 52);
+}
+
+/*
+ * Attach hid_change_report_id to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the program sees it and can change the data
+ */
+TEST_F(hid_bpf, test_hid_change_report)
+{
+ const struct test_program progs[] = {
+ { .name = "hid_change_report_id" },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ buf[1] = 42;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 9) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[0], 2);
+ ASSERT_EQ(buf[1], 42);
+ ASSERT_EQ(buf[2], 0) TH_LOG("leftovers_from_previous_test");
+}
+
+/*
+ * Attach hid_user_raw_request to the given uhid device,
+ * call the bpf program from userspace
+ * check that the program is called and does the expected.
+ */
+TEST_F(hid_bpf, test_hid_user_raw_request_call)
+{
+ struct hid_hw_request_syscall_args args = {
+ .retval = -1,
+ .type = HID_FEATURE_REPORT,
+ .request_type = HID_REQ_GET_REPORT,
+ .size = 10,
+ };
+ DECLARE_LIBBPF_OPTS(bpf_test_run_opts, tattrs,
+ .ctx_in = &args,
+ .ctx_size_in = sizeof(args),
+ );
+ int err, prog_fd;
+
+ LOAD_BPF;
+
+ args.hid = self->hid_id;
+ args.data[0] = 1; /* report ID */
+
+ prog_fd = bpf_program__fd(self->skel->progs.hid_user_raw_request);
+
+ err = bpf_prog_test_run_opts(prog_fd, &tattrs);
+ ASSERT_OK(err) TH_LOG("error while calling bpf_prog_test_run_opts");
+
+ ASSERT_EQ(args.retval, 2);
+
+ ASSERT_EQ(args.data[1], 2);
+}
+
+/*
+ * Attach hid_insert{0,1,2} to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * inject one event in the uhid device,
+ * check that the programs have been inserted in the correct order.
+ */
+TEST_F(hid_bpf, test_hid_attach_flags)
+{
+ const struct test_program progs[] = {
+ {
+ .name = "hid_test_insert2",
+ .insert_head = 0,
+ },
+ {
+ .name = "hid_test_insert1",
+ .insert_head = 1,
+ },
+ {
+ .name = "hid_test_insert3",
+ .insert_head = 0,
+ },
+ };
+ __u8 buf[10] = {0};
+ int err;
+
+ LOAD_PROGRAMS(progs);
+
+ /* inject one event */
+ buf[0] = 1;
+ uhid_send_event(_metadata, self->uhid_fd, buf, 6);
+
+ /* read the data from hidraw */
+ memset(buf, 0, sizeof(buf));
+ err = read(self->hidraw_fd, buf, sizeof(buf));
+ ASSERT_EQ(err, 6) TH_LOG("read_hidraw");
+ ASSERT_EQ(buf[1], 1);
+ ASSERT_EQ(buf[2], 2);
+ ASSERT_EQ(buf[3], 3);
+}
+
+/*
+ * Attach hid_rdesc_fixup to the given uhid device,
+ * retrieve and open the matching hidraw node,
+ * check that the hidraw report descriptor has been updated.
+ */
+TEST_F(hid_bpf, test_rdesc_fixup)
+{
+ struct hidraw_report_descriptor rpt_desc = {0};
+ const struct test_program progs[] = {
+ { .name = "hid_rdesc_fixup" },
+ };
+ int err, desc_size;
+
+ LOAD_PROGRAMS(progs);
+
+ /* check that hid_rdesc_fixup() was executed */
+ ASSERT_EQ(self->skel->data->callback2_check, 0x21);
+
+ /* read the exposed report descriptor from hidraw */
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESCSIZE, &desc_size);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESCSIZE: %d", err);
+
+ /* ensure the new size of the rdesc is bigger than the old one */
+ ASSERT_GT(desc_size, sizeof(rdesc));
+
+ rpt_desc.size = desc_size;
+ err = ioctl(self->hidraw_fd, HIDIOCGRDESC, &rpt_desc);
+ ASSERT_GE(err, 0) TH_LOG("error while reading HIDIOCGRDESC: %d", err);
+
+ ASSERT_EQ(rpt_desc.value[4], 0x42);
+}
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+ const char *format, va_list args)
+{
+ char buf[1024];
+
+ if (level == LIBBPF_DEBUG)
+ return 0;
+
+ snprintf(buf, sizeof(buf), "# %s", format);
+
+ vfprintf(stdout, buf, args);
+ return 0;
+}
+
+static void __attribute__((constructor)) __constructor_order_last(void)
+{
+ if (!__constructor_order)
+ __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD;
+}
+
+int main(int argc, char **argv)
+{
+ /* Use libbpf 1.0 API mode */
+ libbpf_set_strict_mode(LIBBPF_STRICT_ALL);
+ libbpf_set_print(libbpf_print_fn);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/hid/progs/hid.c b/tools/testing/selftests/hid/progs/hid.c
new file mode 100644
index 000000000000..1e558826b809
--- /dev/null
+++ b/tools/testing/selftests/hid/progs/hid.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2022 Red hat */
+#include "hid_bpf_helpers.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct attach_prog_args {
+ int prog_fd;
+ unsigned int hid;
+ int retval;
+ int insert_head;
+};
+
+__u64 callback_check = 52;
+__u64 callback2_check = 52;
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_first_event, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ callback_check = rw_data[1];
+
+ rw_data[2] = rw_data[1] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_second_event, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[3] = rw_data[2] + 5;
+
+ return hid_ctx->size;
+}
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_change_report_id, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *rw_data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 3 /* size */);
+
+ if (!rw_data)
+ return 0; /* EPERM check */
+
+ rw_data[0] = 2;
+
+ return 9;
+}
+
+SEC("syscall")
+int attach_prog(struct attach_prog_args *ctx)
+{
+ ctx->retval = hid_bpf_attach_prog(ctx->hid,
+ ctx->prog_fd,
+ ctx->insert_head ? HID_BPF_FLAG_INSERT_HEAD :
+ HID_BPF_FLAG_NONE);
+ return 0;
+}
+
+struct hid_hw_request_syscall_args {
+ /* data needs to come at offset 0 so we can use it in calls */
+ __u8 data[10];
+ unsigned int hid;
+ int retval;
+ size_t size;
+ enum hid_report_type type;
+ __u8 request_type;
+};
+
+SEC("syscall")
+int hid_user_raw_request(struct hid_hw_request_syscall_args *args)
+{
+ struct hid_bpf_ctx *ctx;
+ const size_t size = args->size;
+ int i, ret = 0;
+
+ if (size > sizeof(args->data))
+ return -7; /* -E2BIG */
+
+ ctx = hid_bpf_allocate_context(args->hid);
+ if (!ctx)
+ return -1; /* EPERM check */
+
+ ret = hid_bpf_hw_request(ctx,
+ args->data,
+ size,
+ args->type,
+ args->request_type);
+ args->retval = ret;
+
+ hid_bpf_release_context(ctx);
+
+ return 0;
+}
+
+static const __u8 rdesc[] = {
+ 0x05, 0x01, /* USAGE_PAGE (Generic Desktop) */
+ 0x09, 0x32, /* USAGE (Z) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x81, 0x06, /* INPUT (Data,Var,Rel) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x01, /* USAGE_MINIMUM (1) */
+ 0x29, 0x03, /* USAGE_MAXIMUM (3) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0x91, 0x02, /* Output (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0x06, 0x00, 0xff, /* Usage Page (Vendor Defined Page 1) */
+ 0x19, 0x06, /* USAGE_MINIMUM (6) */
+ 0x29, 0x08, /* USAGE_MAXIMUM (8) */
+ 0x15, 0x00, /* LOGICAL_MINIMUM (0) */
+ 0x25, 0x01, /* LOGICAL_MAXIMUM (1) */
+ 0x95, 0x03, /* REPORT_COUNT (3) */
+ 0x75, 0x01, /* REPORT_SIZE (1) */
+ 0xb1, 0x02, /* Feature (Data,Var,Abs) */
+ 0x95, 0x01, /* REPORT_COUNT (1) */
+ 0x75, 0x05, /* REPORT_SIZE (5) */
+ 0x91, 0x01, /* Output (Cnst,Var,Abs) */
+
+ 0xc0, /* END_COLLECTION */
+ 0xc0, /* END_COLLECTION */
+};
+
+SEC("?fmod_ret/hid_bpf_rdesc_fixup")
+int BPF_PROG(hid_rdesc_fixup, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4096 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ callback2_check = data[4];
+
+ /* insert rdesc at offset 73 */
+ __builtin_memcpy(&data[73], rdesc, sizeof(rdesc));
+
+ /* Change Usage Vendor globally */
+ data[4] = 0x42;
+
+ return sizeof(rdesc) + 73;
+}
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_test_insert1, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* we need to be run first */
+ if (data[2] || data[3])
+ return -1;
+
+ data[1] = 1;
+
+ return 0;
+}
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_test_insert2, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* after insert0 and before insert2 */
+ if (!data[1] || data[3])
+ return -1;
+
+ data[2] = 2;
+
+ return 0;
+}
+
+SEC("?fmod_ret/hid_bpf_device_event")
+int BPF_PROG(hid_test_insert3, struct hid_bpf_ctx *hid_ctx)
+{
+ __u8 *data = hid_bpf_get_data(hid_ctx, 0 /* offset */, 4 /* size */);
+
+ if (!data)
+ return 0; /* EPERM check */
+
+ /* at the end */
+ if (!data[1] || !data[2])
+ return -1;
+
+ data[3] = 3;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/hid/progs/hid_bpf_helpers.h b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
new file mode 100644
index 000000000000..65e657ac1198
--- /dev/null
+++ b/tools/testing/selftests/hid/progs/hid_bpf_helpers.h
@@ -0,0 +1,98 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2022 Benjamin Tissoires
+ */
+
+#ifndef __HID_BPF_HELPERS_H
+#define __HID_BPF_HELPERS_H
+
+/* "undefine" structs and enums in vmlinux.h, because we "override" them below */
+#define hid_bpf_ctx hid_bpf_ctx___not_used
+#define hid_report_type hid_report_type___not_used
+#define hid_class_request hid_class_request___not_used
+#define hid_bpf_attach_flags hid_bpf_attach_flags___not_used
+#define HID_INPUT_REPORT HID_INPUT_REPORT___not_used
+#define HID_OUTPUT_REPORT HID_OUTPUT_REPORT___not_used
+#define HID_FEATURE_REPORT HID_FEATURE_REPORT___not_used
+#define HID_REPORT_TYPES HID_REPORT_TYPES___not_used
+#define HID_REQ_GET_REPORT HID_REQ_GET_REPORT___not_used
+#define HID_REQ_GET_IDLE HID_REQ_GET_IDLE___not_used
+#define HID_REQ_GET_PROTOCOL HID_REQ_GET_PROTOCOL___not_used
+#define HID_REQ_SET_REPORT HID_REQ_SET_REPORT___not_used
+#define HID_REQ_SET_IDLE HID_REQ_SET_IDLE___not_used
+#define HID_REQ_SET_PROTOCOL HID_REQ_SET_PROTOCOL___not_used
+#define HID_BPF_FLAG_NONE HID_BPF_FLAG_NONE___not_used
+#define HID_BPF_FLAG_INSERT_HEAD HID_BPF_FLAG_INSERT_HEAD___not_used
+#define HID_BPF_FLAG_MAX HID_BPF_FLAG_MAX___not_used
+
+#include "vmlinux.h"
+
+#undef hid_bpf_ctx
+#undef hid_report_type
+#undef hid_class_request
+#undef hid_bpf_attach_flags
+#undef HID_INPUT_REPORT
+#undef HID_OUTPUT_REPORT
+#undef HID_FEATURE_REPORT
+#undef HID_REPORT_TYPES
+#undef HID_REQ_GET_REPORT
+#undef HID_REQ_GET_IDLE
+#undef HID_REQ_GET_PROTOCOL
+#undef HID_REQ_SET_REPORT
+#undef HID_REQ_SET_IDLE
+#undef HID_REQ_SET_PROTOCOL
+#undef HID_BPF_FLAG_NONE
+#undef HID_BPF_FLAG_INSERT_HEAD
+#undef HID_BPF_FLAG_MAX
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+#include <linux/const.h>
+
+enum hid_report_type {
+ HID_INPUT_REPORT = 0,
+ HID_OUTPUT_REPORT = 1,
+ HID_FEATURE_REPORT = 2,
+
+ HID_REPORT_TYPES,
+};
+
+struct hid_bpf_ctx {
+ __u32 index;
+ const struct hid_device *hid;
+ __u32 allocated_size;
+ enum hid_report_type report_type;
+ union {
+ __s32 retval;
+ __s32 size;
+ };
+} __attribute__((preserve_access_index));
+
+enum hid_class_request {
+ HID_REQ_GET_REPORT = 0x01,
+ HID_REQ_GET_IDLE = 0x02,
+ HID_REQ_GET_PROTOCOL = 0x03,
+ HID_REQ_SET_REPORT = 0x09,
+ HID_REQ_SET_IDLE = 0x0A,
+ HID_REQ_SET_PROTOCOL = 0x0B,
+};
+
+enum hid_bpf_attach_flags {
+ HID_BPF_FLAG_NONE = 0,
+ HID_BPF_FLAG_INSERT_HEAD = _BITUL(0),
+ HID_BPF_FLAG_MAX,
+};
+
+/* following are kfuncs exported by HID for HID-BPF */
+extern __u8 *hid_bpf_get_data(struct hid_bpf_ctx *ctx,
+ unsigned int offset,
+ const size_t __sz) __ksym;
+extern int hid_bpf_attach_prog(unsigned int hid_id, int prog_fd, u32 flags) __ksym;
+extern struct hid_bpf_ctx *hid_bpf_allocate_context(unsigned int hid_id) __ksym;
+extern void hid_bpf_release_context(struct hid_bpf_ctx *ctx) __ksym;
+extern int hid_bpf_hw_request(struct hid_bpf_ctx *ctx,
+ __u8 *data,
+ size_t buf__sz,
+ enum hid_report_type type,
+ enum hid_class_request reqtype) __ksym;
+
+#endif /* __HID_BPF_HELPERS_H */
diff --git a/tools/testing/selftests/hid/run-hid-tools-tests.sh b/tools/testing/selftests/hid/run-hid-tools-tests.sh
new file mode 100755
index 000000000000..bdae8464da86
--- /dev/null
+++ b/tools/testing/selftests/hid/run-hid-tools-tests.sh
@@ -0,0 +1,28 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Runs tests for the HID subsystem
+
+if ! command -v python3 > /dev/null 2>&1; then
+ echo "hid-tools: [SKIP] python3 not installed"
+ exit 77
+fi
+
+if ! python3 -c "import pytest" > /dev/null 2>&1; then
+ echo "hid: [SKIP/ pytest module not installed"
+ exit 77
+fi
+
+if ! python3 -c "import pytest_tap" > /dev/null 2>&1; then
+ echo "hid: [SKIP/ pytest_tap module not installed"
+ exit 77
+fi
+
+if ! python3 -c "import hidtools" > /dev/null 2>&1; then
+ echo "hid: [SKIP/ hid-tools module not installed"
+ exit 77
+fi
+
+TARGET=${TARGET:=.}
+
+echo TAP version 13
+python3 -u -m pytest $PYTEST_XDIST ./tests/$TARGET --tap-stream --udevd
diff --git a/tools/testing/selftests/hid/settings b/tools/testing/selftests/hid/settings
new file mode 100644
index 000000000000..b3cbfc521b10
--- /dev/null
+++ b/tools/testing/selftests/hid/settings
@@ -0,0 +1,3 @@
+# HID tests can be long, so give a little bit more time
+# to them
+timeout=200
diff --git a/tools/testing/selftests/hid/tests/__init__.py b/tools/testing/selftests/hid/tests/__init__.py
new file mode 100644
index 000000000000..c940e9275252
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/__init__.py
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+# Just to make sphinx-apidoc document this directory
diff --git a/tools/testing/selftests/hid/tests/base.py b/tools/testing/selftests/hid/tests/base.py
new file mode 100644
index 000000000000..51433063b227
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/base.py
@@ -0,0 +1,344 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+
+import libevdev
+import os
+import pytest
+import time
+
+import logging
+
+from hidtools.device.base_device import BaseDevice, EvdevMatch, SysfsFile
+from pathlib import Path
+from typing import Final, List, Tuple
+
+logger = logging.getLogger("hidtools.test.base")
+
+# application to matches
+application_matches: Final = {
+ # pyright: ignore
+ "Accelerometer": EvdevMatch(
+ req_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ]
+ ),
+ "Game Pad": EvdevMatch( # in systemd, this is a lot more complex, but that will do
+ requires=[
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ libevdev.EV_ABS.ABS_RX,
+ libevdev.EV_ABS.ABS_RY,
+ libevdev.EV_KEY.BTN_START,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Joystick": EvdevMatch( # in systemd, this is a lot more complex, but that will do
+ requires=[
+ libevdev.EV_ABS.ABS_RX,
+ libevdev.EV_ABS.ABS_RY,
+ libevdev.EV_KEY.BTN_START,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Key": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.KEY_A,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ libevdev.INPUT_PROP_DIRECT,
+ libevdev.INPUT_PROP_POINTER,
+ ],
+ ),
+ "Mouse": EvdevMatch(
+ requires=[
+ libevdev.EV_REL.REL_X,
+ libevdev.EV_REL.REL_Y,
+ libevdev.EV_KEY.BTN_LEFT,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Pad": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_0,
+ ],
+ excludes=[
+ libevdev.EV_KEY.BTN_TOOL_PEN,
+ libevdev.EV_KEY.BTN_TOUCH,
+ libevdev.EV_ABS.ABS_DISTANCE,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Pen": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_STYLUS,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Stylus": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_STYLUS,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Touch Pad": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_LEFT,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excludes=[libevdev.EV_KEY.BTN_TOOL_PEN, libevdev.EV_KEY.BTN_STYLUS],
+ req_properties=[
+ libevdev.INPUT_PROP_POINTER,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+ "Touch Screen": EvdevMatch(
+ requires=[
+ libevdev.EV_KEY.BTN_TOUCH,
+ libevdev.EV_ABS.ABS_X,
+ libevdev.EV_ABS.ABS_Y,
+ ],
+ excludes=[libevdev.EV_KEY.BTN_TOOL_PEN, libevdev.EV_KEY.BTN_STYLUS],
+ req_properties=[
+ libevdev.INPUT_PROP_DIRECT,
+ ],
+ excl_properties=[
+ libevdev.INPUT_PROP_ACCELEROMETER,
+ ],
+ ),
+}
+
+
+class UHIDTestDevice(BaseDevice):
+ def __init__(self, name, application, rdesc_str=None, rdesc=None, input_info=None):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.application_matches = application_matches
+ if name is None:
+ name = f"uhid test {self.__class__.__name__}"
+ if not name.startswith("uhid test "):
+ name = "uhid test " + self.name
+ self.name = name
+
+
+class BaseTestCase:
+ class TestUhid(object):
+ syn_event = libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) # type: ignore
+ key_event = libevdev.InputEvent(libevdev.EV_KEY) # type: ignore
+ abs_event = libevdev.InputEvent(libevdev.EV_ABS) # type: ignore
+ rel_event = libevdev.InputEvent(libevdev.EV_REL) # type: ignore
+ msc_event = libevdev.InputEvent(libevdev.EV_MSC.MSC_SCAN) # type: ignore
+
+ # List of kernel modules to load before starting the test
+ # if any module is not available (not compiled), the test will skip.
+ # Each element is a tuple '(kernel driver name, kernel module)',
+ # for example ("playstation", "hid-playstation")
+ kernel_modules: List[Tuple[str, str]] = []
+
+ def assertInputEventsIn(self, expected_events, effective_events):
+ effective_events = effective_events.copy()
+ for ev in expected_events:
+ assert ev in effective_events
+ effective_events.remove(ev)
+ return effective_events
+
+ def assertInputEvents(self, expected_events, effective_events):
+ remaining = self.assertInputEventsIn(expected_events, effective_events)
+ assert remaining == []
+
+ @classmethod
+ def debug_reports(cls, reports, uhdev=None, events=None):
+ data = [" ".join([f"{v:02x}" for v in r]) for r in reports]
+
+ if uhdev is not None:
+ human_data = [
+ uhdev.parsed_rdesc.format_report(r, split_lines=True)
+ for r in reports
+ ]
+ try:
+ human_data = [
+ f'\n\t {" " * h.index("/")}'.join(h.split("\n"))
+ for h in human_data
+ ]
+ except ValueError:
+ # '/' not found: not a numbered report
+ human_data = ["\n\t ".join(h.split("\n")) for h in human_data]
+ data = [f"{d}\n\t ====> {h}" for d, h in zip(data, human_data)]
+
+ reports = data
+
+ if len(reports) == 1:
+ print("sending 1 report:")
+ else:
+ print(f"sending {len(reports)} reports:")
+ for report in reports:
+ print("\t", report)
+
+ if events is not None:
+ print("events received:", events)
+
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def _load_kernel_module(self, kernel_driver, kernel_module):
+ sysfs_path = Path("/sys/bus/hid/drivers")
+ if kernel_driver is not None:
+ sysfs_path /= kernel_driver
+ else:
+ # special case for when testing all available modules:
+ # we don't know beforehand the name of the module from modinfo
+ sysfs_path = Path("/sys/module") / kernel_module.replace("-", "_")
+ if not sysfs_path.exists():
+ import subprocess
+
+ ret = subprocess.run(["/usr/sbin/modprobe", kernel_module])
+ if ret.returncode != 0:
+ pytest.skip(
+ f"module {kernel_module} could not be loaded, skipping the test"
+ )
+
+ @pytest.fixture()
+ def load_kernel_module(self):
+ for kernel_driver, kernel_module in self.kernel_modules:
+ self._load_kernel_module(kernel_driver, kernel_module)
+ yield
+
+ @pytest.fixture()
+ def new_uhdev(self, load_kernel_module):
+ return self.create_device()
+
+ def assertName(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert uhdev.name in evdev.name
+
+ @pytest.fixture(autouse=True)
+ def context(self, new_uhdev, request):
+ try:
+ with HIDTestUdevRule.instance():
+ with new_uhdev as self.uhdev:
+ for skip_cond in request.node.iter_markers("skip_if_uhdev"):
+ test, message, *rest = skip_cond.args
+
+ if test(self.uhdev):
+ pytest.skip(message)
+
+ self.uhdev.create_kernel_device()
+ now = time.time()
+ while not self.uhdev.is_ready() and time.time() - now < 5:
+ self.uhdev.dispatch(1)
+ if self.uhdev.get_evdev() is None:
+ logger.warning(
+ f"available list of input nodes: (default application is '{self.uhdev.application}')"
+ )
+ logger.warning(self.uhdev.input_nodes)
+ yield
+ self.uhdev = None
+ except PermissionError:
+ pytest.skip("Insufficient permissions, run me as root")
+
+ @pytest.fixture(autouse=True)
+ def check_taint(self):
+ # we are abusing SysfsFile here, it's in /proc, but meh
+ taint_file = SysfsFile("/proc/sys/kernel/tainted")
+ taint = taint_file.int_value
+
+ yield
+
+ assert taint_file.int_value == taint
+
+ def test_creation(self):
+ """Make sure the device gets processed by the kernel and creates
+ the expected application input node.
+
+ If this fail, there is something wrong in the device report
+ descriptors."""
+ uhdev = self.uhdev
+ assert uhdev is not None
+ assert uhdev.get_evdev() is not None
+ self.assertName(uhdev)
+ assert len(uhdev.next_sync_events()) == 0
+ assert uhdev.get_evdev() is not None
+
+
+class HIDTestUdevRule(object):
+ _instance = None
+ """
+ A context-manager compatible class that sets up our udev rules file and
+ deletes it on context exit.
+
+ This class is tailored to our test setup: it only sets up the udev rule
+ on the **second** context and it cleans it up again on the last context
+ removed. This matches the expected pytest setup: we enter a context for
+ the session once, then once for each test (the first of which will
+ trigger the udev rule) and once the last test exited and the session
+ exited, we clean up after ourselves.
+ """
+
+ def __init__(self):
+ self.refs = 0
+ self.rulesfile = None
+
+ def __enter__(self):
+ self.refs += 1
+ if self.refs == 2 and self.rulesfile is None:
+ self.create_udev_rule()
+ self.reload_udev_rules()
+
+ def __exit__(self, exc_type, exc_value, traceback):
+ self.refs -= 1
+ if self.refs == 0 and self.rulesfile:
+ os.remove(self.rulesfile.name)
+ self.reload_udev_rules()
+
+ def reload_udev_rules(self):
+ import subprocess
+
+ subprocess.run("udevadm control --reload-rules".split())
+ subprocess.run("systemd-hwdb update".split())
+
+ def create_udev_rule(self):
+ import tempfile
+
+ os.makedirs("/run/udev/rules.d", exist_ok=True)
+ with tempfile.NamedTemporaryFile(
+ prefix="91-uhid-test-device-REMOVEME-",
+ suffix=".rules",
+ mode="w+",
+ dir="/run/udev/rules.d",
+ delete=False,
+ ) as f:
+ f.write(
+ 'KERNELS=="*input*", ATTRS{name}=="*uhid test *", ENV{LIBINPUT_IGNORE_DEVICE}="1"\n'
+ )
+ f.write(
+ 'KERNELS=="*input*", ATTRS{name}=="*uhid test * System Multi Axis", ENV{ID_INPUT_TOUCHSCREEN}="", ENV{ID_INPUT_SYSTEM_MULTIAXIS}="1"\n'
+ )
+ self.rulesfile = f
+
+ @classmethod
+ def instance(cls):
+ if not cls._instance:
+ cls._instance = HIDTestUdevRule()
+ return cls._instance
diff --git a/tools/testing/selftests/hid/tests/conftest.py b/tools/testing/selftests/hid/tests/conftest.py
new file mode 100644
index 000000000000..1361ec981db6
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/conftest.py
@@ -0,0 +1,81 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+
+import platform
+import pytest
+import re
+import resource
+import subprocess
+from .base import HIDTestUdevRule
+from pathlib import Path
+
+
+# See the comment in HIDTestUdevRule, this doesn't set up but it will clean
+# up once the last test exited.
+@pytest.fixture(autouse=True, scope="session")
+def udev_rules_session_setup():
+ with HIDTestUdevRule.instance():
+ yield
+
+
+@pytest.fixture(autouse=True, scope="session")
+def setup_rlimit():
+ resource.setrlimit(resource.RLIMIT_CORE, (0, 0))
+
+
+@pytest.fixture(autouse=True, scope="session")
+def start_udevd(pytestconfig):
+ if pytestconfig.getoption("udevd"):
+ import subprocess
+
+ with subprocess.Popen("/usr/lib/systemd/systemd-udevd") as proc:
+ yield
+ proc.kill()
+ else:
+ yield
+
+
+def pytest_configure(config):
+ config.addinivalue_line(
+ "markers",
+ "skip_if_uhdev(condition, message): mark test to skip if the condition on the uhdev device is met",
+ )
+
+
+# Generate the list of modules and modaliases
+# for the tests that need to be parametrized with those
+def pytest_generate_tests(metafunc):
+ if "usbVidPid" in metafunc.fixturenames:
+ modules = (
+ Path("/lib/modules/")
+ / platform.uname().release
+ / "kernel"
+ / "drivers"
+ / "hid"
+ )
+
+ modalias_re = re.compile(r"alias:\s+hid:b0003g.*v([0-9a-fA-F]+)p([0-9a-fA-F]+)")
+
+ params = []
+ ids = []
+ for module in modules.glob("*.ko"):
+ p = subprocess.run(
+ ["modinfo", module], capture_output=True, check=True, encoding="utf-8"
+ )
+ for line in p.stdout.split("\n"):
+ m = modalias_re.match(line)
+ if m is not None:
+ vid, pid = m.groups()
+ vid = int(vid, 16)
+ pid = int(pid, 16)
+ params.append([module.name.replace(".ko", ""), vid, pid])
+ ids.append(f"{module.name} {vid:04x}:{pid:04x}")
+ metafunc.parametrize("usbVidPid", params, ids=ids)
+
+
+def pytest_addoption(parser):
+ parser.addoption("--udevd", action="store_true", default=False)
diff --git a/tools/testing/selftests/hid/tests/descriptors_wacom.py b/tools/testing/selftests/hid/tests/descriptors_wacom.py
new file mode 100644
index 000000000000..91c16e005c12
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/descriptors_wacom.py
@@ -0,0 +1,1360 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# fmt: off
+wacom_pth660_v145 = [
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x02, # . Usage (Mouse),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x01, # . Usage (Pointer),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x05, 0x09, # . Usage Page (Button),
+ 0x19, 0x01, # . Usage Minimum (01h),
+ 0x29, 0x03, # . Usage Maximum (03h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x05, # . Report Count (5),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x09, 0x31, # . Usage (Y),
+ 0x15, 0x81, # . Logical Minimum (-127),
+ 0x25, 0x7F, # . Logical Maximum (127),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x06, # . Input (Variable, Relative),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x47, 0x80, 0x57, 0x00, 0x00, # . Physical Maximum (22400),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x27, 0x00, 0xAF, 0x00, 0x00, # . Logical Maximum (44800),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0xD0, 0x39, 0x00, 0x00, # . Physical Maximum (14800),
+ 0x27, 0xA0, 0x73, 0x00, 0x00, # . Logical Maximum (29600),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047), # !!! Errata: Missing Physical Min/Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x17, 0x09, # . Usage Maximum (0917h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x08, # . Report Count (8),
+ 0x81, 0x02, # . Input (Variable),
+ 0x1A, 0x40, 0x09, # . Usage Minimum (0940h),
+ 0x2A, 0x47, 0x09, # . Usage Maximum (0947h),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x25, 0x01, # . Logical Maximum (1), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x85, 0x02, # . Report ID (2),
+ 0x09, 0x01, # . Usage (01h),
+ 0x75, 0x08, # . Report Size (8),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h), # !!! Errata: Missing Non-zero Physical Max
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x05, # . Report Count (5),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x32, # . Report ID (50),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x0A, 0x32, 0x10, # . Usage (1032h),
+ 0x25, 0x03, # . Logical Maximum (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x34, # . Report ID (52),
+ 0x0A, 0x34, 0x10, # . Usage (1034h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x36, # . Report ID (54),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x96, 0x01, 0x01, # . Report Count (257),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0xBF, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x64, # . Report ID (100),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0C, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x12, # . Report ID (18),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x40, # . Report ID (64),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x41, # . Report ID (65),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x42, # . Report ID (66),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x43, # . Report ID (67),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD0, # . Report ID (208),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
+
+# Report ID (20), Usage (1014h), Report Count (13) -> 15
+wacom_pth660_v150 = wacom_pth660_v145.copy()
+wacom_pth660_v150[0x2CB] = 0x0F
+
+# fmt: off
+wacom_pth860_v145 = [
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x02, # . Usage (Mouse),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x01, # . Usage (Pointer),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x05, 0x09, # . Usage Page (Button),
+ 0x19, 0x01, # . Usage Minimum (01h),
+ 0x29, 0x03, # . Usage Maximum (03h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x05, # . Report Count (5),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x09, 0x31, # . Usage (Y),
+ 0x15, 0x80, # . Logical Minimum (-128),
+ 0x25, 0x7F, # . Logical Maximum (127),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x06, # . Input (Variable, Relative),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x47, 0x7C, 0x79, 0x00, 0x00, # . Physical Maximum (31100),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x27, 0xF8, 0xF2, 0x00, 0x00, # . Logical Maximum (62200),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0x60, 0x54, 0x00, 0x00, # . Physical Maximum (21600),
+ 0x27, 0xC0, 0xA8, 0x00, 0x00, # . Logical Maximum (43200),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h), # !!! Errata: Missing Physical Max = 0
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191),
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047), # !!! Errata: Missing Physical Min/Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x16, 0x00, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x17, 0x09, # . Usage Maximum (0917h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x08, # . Report Count (8),
+ 0x81, 0x02, # . Input (Variable),
+ 0x1A, 0x40, 0x09, # . Usage Minimum (0940h),
+ 0x2A, 0x47, 0x09, # . Usage Maximum (0947h),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x25, 0x01, # . Logical Maximum (1), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x85, 0x02, # . Report ID (2),
+ 0x09, 0x01, # . Usage (01h),
+ 0x75, 0x08, # . Report Size (8),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h), # !!! Errata: Missing Non-zero Physical Max
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x05, # . Report Count (5),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x32, # . Report ID (50),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x0A, 0x32, 0x10, # . Usage (1032h),
+ 0x25, 0x03, # . Logical Maximum (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x34, # . Report ID (52),
+ 0x0A, 0x34, 0x10, # . Usage (1034h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x36, # . Report ID (54),
+ 0x0A, 0x35, 0x10, # . Usage (1035h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x96, 0x01, 0x01, # . Report Count (257),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0xBF, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x64, # . Report ID (100),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0C, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x12, # . Report ID (18),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x40, # . Report ID (64),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x41, # . Report ID (65),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x42, # . Report ID (66),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x43, # . Report ID (67),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD0, # . Report ID (208),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
+
+# Report ID (20), Usage (1014h), Report Count (13) -> 15
+wacom_pth860_v150 = wacom_pth860_v145.copy()
+wacom_pth860_v150[0x2CA] = 0x0F
+
+# fmt: off
+wacom_pth460_v105 = [
+ 0x06, 0x0D, 0xFF, # . Usage Page (FF0Dh),
+ 0x09, 0x01, # . Usage (01h),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (20h),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (42h),
+ 0x09, 0x44, # . Usage (44h),
+ 0x09, 0x5A, # . Usage (5Ah),
+ 0x09, 0x45, # . Usage (45h),
+ 0x09, 0x3C, # . Usage (3Ch),
+ 0x09, 0x32, # . Usage (32h),
+ 0x09, 0x36, # . Usage (36h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (0130h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x47, 0x58, 0x3E, 0x00, 0x00, # . Physical Maximum (15960),
+ 0x27, 0xB0, 0x7C, 0x00, 0x00, # . Logical Maximum (31920),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (0131h),
+ 0x47, 0xF7, 0x26, 0x00, 0x00, # . Physical Maximum (9975),
+ 0x27, 0xEE, 0x4D, 0x00, 0x00, # . Logical Maximum (19950),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (30h),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x26, 0xFF, 0x1F, # . Logical Maximum (8191), # !!! Errata: Missing Physical Max = 0
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x3D, # . Usage (3Dh),
+ 0x09, 0x3E, # . Usage (3Eh),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0xC0, # . Physical Minimum (-64),
+ 0x45, 0x3F, # . Physical Maximum (63),
+ 0x15, 0xC0, # . Logical Minimum (-64),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x41, # . Usage (41h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x36, 0x4C, 0xFF, # . Physical Minimum (-180),
+ 0x46, 0xB3, 0x00, # . Physical Maximum (179),
+ 0x16, 0x7C, 0xFC, # . Logical Minimum (-900),
+ 0x26, 0x83, 0x03, # . Logical Maximum (899),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x0A, # . Input (Variable, Wrap),
+ 0x0A, 0x03, 0x0D, # . Usage (0D03h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x07, # . Logical Maximum (2047),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x32, 0x01, # . Usage (0132h),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (5Bh),
+ 0x09, 0x5C, # . Usage (5Ch),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (77h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x11, # . Report ID (17),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x1A, 0x10, 0x09, # . Usage Minimum (0910h),
+ 0x2A, 0x15, 0x09, # . Usage Maximum (0915h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x95, 0x09, # . Usage (0995h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x39, # . Usage (39h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x38, 0x01, # . Usage (0138h),
+ 0x65, 0x14, # . Unit (Degrees),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0x67, 0x01, # . Physical Maximum (359),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x47, # . Logical Maximum (71),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x4A, # . Input (Variable, Wrap, Null State),
+ 0x0A, 0x39, 0x01, # . Usage (0139h),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x04, # . Report Count (4),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0x13, # . Report ID (19),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3B, 0x04, # . Usage (043Bh),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x75, 0x07, # . Report Size (7),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x04, 0x04, # . Usage (0404h),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x0A, 0x52, 0x04, # . Usage (0452h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x41, 0x04, # . Usage (0441h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x07, # . Logical Maximum (7),
+ 0x75, 0x03, # . Report Size (3),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x54, 0x04, # . Usage (0454h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3C, 0x04, # . Usage (043Ch),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0xFB, # . Logical Minimum (-5),
+ 0x25, 0x32, # . Logical Maximum (50),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0x13, 0x10, # . Usage (1013h),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x0A, 0x3D, 0x04, # . Usage (043Dh),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x03, # . Report Count (3),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x85, 0x02, # . Report ID (2),
+ 0x0A, 0x02, 0x10, # . Usage (1002h),
+ 0x15, 0x02, # . Logical Minimum (2),
+ 0x25, 0x02, # . Logical Maximum (2),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x03, # . Report ID (3),
+ 0x0A, 0x03, 0x10, # . Usage (1003h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x04, # . Report ID (4),
+ 0x0A, 0x04, 0x10, # . Usage (1004h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x07, # . Report ID (7),
+ 0x0A, 0x09, 0x10, # . Usage (1009h),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x0A, 0x07, 0x10, # . Usage (1007h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x0A, 0x08, 0x10, # . Usage (1008h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x09, 0x00, # . Usage (00h),
+ 0x27, 0xFF, 0xFF, 0x00, 0x00, # . Logical Maximum (65535),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x06, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x09, 0x00, # . Usage (00h),
+ 0x25, 0x00, # . Logical Maximum (0),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0x85, 0x0C, # . Report ID (12),
+ 0x0A, 0x30, 0x0D, # . Usage (0D30h),
+ 0x0A, 0x31, 0x0D, # . Usage (0D31h),
+ 0x0A, 0x32, 0x0D, # . Usage (0D32h),
+ 0x0A, 0x33, 0x0D, # . Usage (0D33h),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x46, 0xC8, 0x00, # . Physical Maximum (200),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0x90, 0x01, # . Logical Maximum (400),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x0D, # . Report ID (13),
+ 0x0A, 0x0D, 0x10, # . Usage (100Dh),
+ 0x65, 0x00, # . Unit,
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x14, # . Report ID (20),
+ 0x0A, 0x14, 0x10, # . Usage (1014h),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x95, 0x0D, # . Report Count (13),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCC, # . Report ID (204),
+ 0x0A, 0xCC, 0x10, # . Usage (10CCh),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x09, 0x0E, # . Usage (0Eh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x85, 0x31, # . Report ID (49),
+ 0x0A, 0x31, 0x10, # . Usage (1031h),
+ 0x25, 0x64, # . Logical Maximum (100),
+ 0x95, 0x03, # . Report Count (3),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x03, # . Feature (Constant, Variable),
+ 0xC0, # . End Collection,
+ 0x0A, 0xAC, 0x10, # . Usage (10ACh),
+ 0xA1, 0x02, # . Collection (Logical),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x00, # . Logical Maximum (255),
+ 0x75, 0x08, # . Report Size (8),
+ 0x85, 0xAC, # . Report ID (172),
+ 0x09, 0x00, # . Usage (00h),
+ 0x96, 0xBF, 0x00, # . Report Count (191),
+ 0x81, 0x02, # . Input (Variable),
+ 0x85, 0x15, # . Report ID (21),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x33, # . Report ID (51),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x12, # . Report Count (18),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x44, # . Report ID (68),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x45, # . Report ID (69),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x20, # . Report Count (32),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x60, # . Report ID (96),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3F, # . Report Count (63),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x61, # . Report ID (97),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x62, # . Report ID (98),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x65, # . Report ID (101),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x66, # . Report ID (102),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x67, # . Report ID (103),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x04, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x68, # . Report ID (104),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x11, # . Report Count (17),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x6F, # . Report ID (111),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x3E, # . Report Count (62),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCD, # . Report ID (205),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x02, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x16, # . Report ID (22),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0E, # . Report Count (14),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0x35, # . Report ID (53),
+ 0x09, 0x00, # . Usage (00h),
+ 0x95, 0x0A, # . Report Count (10),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0, # . End Collection,
+ 0x85, 0xD1, # . Report ID (209),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD2, # . Report ID (210),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x01, # . Report Count (260),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD3, # . Report ID (211),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD4, # . Report ID (212),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD5, # . Report ID (213),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD6, # . Report ID (214),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD7, # . Report ID (215),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x08, 0x00, # . Report Count (8),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD8, # . Report ID (216),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x0C, 0x00, # . Report Count (12),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xD9, # . Report ID (217),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x00, 0x0A, # . Report Count (2560),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDA, # . Report ID (218),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x04, # . Report Count (1028),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDB, # . Report ID (219),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x06, 0x00, # . Report Count (6),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDC, # . Report ID (220),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDD, # . Report ID (221),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDE, # . Report ID (222),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x04, 0x00, # . Report Count (4),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xDF, # . Report ID (223),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x22, 0x00, # . Report Count (34),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE0, # . Report ID (224),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x01, 0x00, # . Report Count (1),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE1, # . Report ID (225),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE2, # . Report ID (226),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE3, # . Report ID (227),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x02, 0x00, # . Report Count (2),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xE4, # . Report ID (228),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0xFF, 0x01, # . Report Count (511),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0x85, 0xCB, # . Report ID (203),
+ 0x09, 0x01, # . Usage (01h),
+ 0x96, 0x1F, 0x00, # . Report Count (31),
+ 0xB1, 0x02, # . Feature (Variable),
+ 0xC0 # . End Collection
+]
+# fmt: on
diff --git a/tools/testing/selftests/hid/tests/test_apple_keyboard.py b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
new file mode 100644
index 000000000000..f81071d46166
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_apple_keyboard.py
@@ -0,0 +1,440 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2019 Red Hat, Inc.
+#
+
+from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
+from hidtools.util import BusType
+
+import libevdev
+import logging
+
+logger = logging.getLogger("hidtools.test.apple-keyboard")
+
+KERNEL_MODULE = ("apple", "hid-apple")
+
+
+class KbdData(object):
+ pass
+
+
+class AppleKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x05, # .Report Count (5)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x05, # .Usage Maximum (5)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x75, 0x03, # .Report Size (3)
+ 0x95, 0x01, # .Report Count (1)
+ 0x91, 0x01, # .Output (Cnst,Arr,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x06, # .Report Count (6)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xff, 0x00, # .Logical Maximum (255)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x2a, 0xff, 0x00, # .Usage Maximum (255)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x47, # .Report ID (71)
+ 0x05, 0x01, # .Usage Page (Generic Desktop)
+ 0x09, 0x06, # .Usage (Keyboard)
+ 0xa1, 0x02, # .Collection (Logical)
+ 0x05, 0x06, # ..Usage Page (Generic Device Controls)
+ 0x09, 0x20, # ..Usage (Battery Strength)
+ 0x15, 0x00, # ..Logical Minimum (0)
+ 0x26, 0xff, 0x00, # ..Logical Maximum (255)
+ 0x75, 0x08, # ..Report Size (8)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x81, 0x02, # ..Input (Data,Var,Abs)
+ 0xc0, # .End Collection
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x11, # .Report ID (17)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x05, 0x0c, # .Usage Page (Consumer Devices)
+ 0x09, 0xb8, # .Usage (Eject)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x06, 0xff, 0x00, # .Usage Page (Vendor Usage Page 0xff)
+ 0x09, 0x03, # .Usage (Vendor Usage 0x03)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x05, 0x0c, # .Usage Page (Consumer Devices)
+ 0x85, 0x12, # .Report ID (18)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x09, 0xcd, # .Usage (Play/Pause)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb3, # .Usage (Fast Forward)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb4, # .Usage (Rewind)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb5, # .Usage (Scan Next Track)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x09, 0xb6, # .Usage (Scan Previous Track)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x85, 0x13, # .Report ID (19)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x06, 0x01, 0xff, # .Usage Page (Vendor Usage Page 0xff01)
+ 0x09, 0x0a, # .Usage (Vendor Usage 0x0a)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x06, 0x01, 0xff, # .Usage Page (Vendor Usage Page 0xff01)
+ 0x09, 0x0c, # .Usage (Vendor Usage 0x0c)
+ 0x81, 0x22, # .Input (Data,Var,Abs,NoPref)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x06, # .Report Count (6)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x85, 0x09, # .Report ID (9)
+ 0x09, 0x0b, # .Usage (Vendor Usage 0x0b)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0xb1, 0x02, # .Feature (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x02, # .Report Count (2)
+ 0xb1, 0x01, # .Feature (Cnst,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(
+ self,
+ rdesc=report_descriptor,
+ name="Apple Wireless Keyboard",
+ input_info=(BusType.BLUETOOTH, 0x05AC, 0x0256),
+ ):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 1
+
+ def send_fn_state(self, state):
+ data = KbdData()
+ setattr(data, "0xff0003", state)
+ r = self.create_report(data, reportID=17)
+ self.call_input_event(r)
+ return [r]
+
+
+class TestAppleKeyboard(TestArrayKeyboard):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ return AppleKeyboard()
+
+ def test_single_function_key(self):
+ """check for function key reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["F4"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+
+ def test_single_fn_function_key(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ def test_single_fn_function_key_release_first(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+
+ def test_single_fn_function_key_inverted(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["F4"])
+ r.extend(uhdev.send_fn_state(1))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ def test_multiple_fn_function_key_release_first(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["F4"]))
+ r.extend(uhdev.event(["F4", "F6"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.event(["F6"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F4, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ def test_multiple_fn_function_key_release_between(self):
+ """check for function key reliability with the fn key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ # press F4
+ r = uhdev.event(["F4"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ # press Fn key
+ r = uhdev.send_fn_state(1)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # keep F4 and press F6
+ r = uhdev.event(["F4", "F6"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # keep F4 and F6
+ r = uhdev.event(["F4", "F6"])
+ expected = []
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ # release Fn key and all keys
+ r = uhdev.send_fn_state(0)
+ r.extend(uhdev.event([]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_ALL_APPLICATIONS, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_F6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_F4] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_ALL_APPLICATIONS] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_F6] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_KBDILLUMUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ def test_single_pageup_key_release_first(self):
+ """check for function key reliability with the [page] up key."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.send_fn_state(1)
+ r.extend(uhdev.event(["UpArrow"]))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_PAGEUP, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 1
+
+ r = uhdev.send_fn_state(0)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_FN, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_PAGEUP, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_PAGEUP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_UP] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_FN] == 0
diff --git a/tools/testing/selftests/hid/tests/test_gamepad.py b/tools/testing/selftests/hid/tests/test_gamepad.py
new file mode 100644
index 000000000000..26c74040b796
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_gamepad.py
@@ -0,0 +1,209 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2019 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2019 Red Hat, Inc.
+#
+
+from . import base
+import libevdev
+import pytest
+
+from hidtools.device.base_gamepad import AsusGamepad, SaitekGamepad
+
+import logging
+
+logger = logging.getLogger("hidtools.test.gamepad")
+
+
+class BaseTest:
+ class TestGamepad(base.BaseTestCase.TestUhid):
+ @pytest.fixture(autouse=True)
+ def send_initial_state(self):
+ """send an empty report to initialize the axes"""
+ uhdev = self.uhdev
+
+ r = uhdev.event()
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ def assert_button(self, button):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ buttons = {}
+ key = libevdev.evbit(uhdev.buttons_map[button])
+
+ buttons[button] = True
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key] == 1
+
+ buttons[button] = False
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key] == 0
+
+ def test_buttons(self):
+ """check for button reliability."""
+ uhdev = self.uhdev
+
+ for b in uhdev.buttons:
+ self.assert_button(b)
+
+ def test_dual_buttons(self):
+ """check for button reliability when pressing 2 buttons"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ # can change intended b1 b2 values
+ b1 = uhdev.buttons[0]
+ key1 = libevdev.evbit(uhdev.buttons_map[b1])
+ b2 = uhdev.buttons[1]
+ key2 = libevdev.evbit(uhdev.buttons_map[b2])
+
+ buttons = {b1: True, b2: True}
+ r = uhdev.event(buttons=buttons)
+ expected_event0 = libevdev.InputEvent(key1, 1)
+ expected_event1 = libevdev.InputEvent(key2, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(
+ (syn_event, expected_event0, expected_event1), events
+ )
+ assert evdev.value[key1] == 1
+ assert evdev.value[key2] == 1
+
+ buttons = {b1: False, b2: None}
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key1, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key1] == 0
+ assert evdev.value[key2] == 1
+
+ buttons = {b1: None, b2: False}
+ r = uhdev.event(buttons=buttons)
+ expected_event = libevdev.InputEvent(key2, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[key1] == 0
+ assert evdev.value[key2] == 0
+
+ def _get_libevdev_abs_events(self, which):
+ """Returns which ABS_* evdev axes are expected for the given stick"""
+ abs_map = self.uhdev.axes_map[which]
+
+ x = abs_map["x"].evdev
+ y = abs_map["y"].evdev
+
+ assert x
+ assert y
+
+ return x, y
+
+ def _test_joystick_press(self, which, data):
+ uhdev = self.uhdev
+
+ libevdev_axes = self._get_libevdev_abs_events(which)
+
+ r = None
+ if which == "left_stick":
+ r = uhdev.event(left=data)
+ else:
+ r = uhdev.event(right=data)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ for i, d in enumerate(data):
+ if d is not None and d != 127:
+ assert libevdev.InputEvent(libevdev_axes[i], d) in events
+ else:
+ assert libevdev.InputEvent(libevdev_axes[i]) not in events
+
+ def test_left_joystick_press_left(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (63, None))
+ self._test_joystick_press("left_stick", (0, 127))
+
+ def test_left_joystick_press_right(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (191, 127))
+ self._test_joystick_press("left_stick", (255, None))
+
+ def test_left_joystick_press_up(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (None, 63))
+ self._test_joystick_press("left_stick", (127, 0))
+
+ def test_left_joystick_press_down(self):
+ """check for the left joystick reliability"""
+ self._test_joystick_press("left_stick", (127, 191))
+ self._test_joystick_press("left_stick", (None, 255))
+
+ def test_right_joystick_press_left(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (63, None))
+ self._test_joystick_press("right_stick", (0, 127))
+
+ def test_right_joystick_press_right(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (191, 127))
+ self._test_joystick_press("right_stick", (255, None))
+
+ def test_right_joystick_press_up(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (None, 63))
+ self._test_joystick_press("right_stick", (127, 0))
+
+ def test_right_joystick_press_down(self):
+ """check for the right joystick reliability"""
+ self._test_joystick_press("right_stick", (127, 191))
+ self._test_joystick_press("right_stick", (None, 255))
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Hat switch" not in uhdev.fields,
+ "Device not compatible, missing Hat switch usage",
+ )
+ @pytest.mark.parametrize(
+ "hat_value,expected_evdev,evdev_value",
+ [
+ (0, "ABS_HAT0Y", -1),
+ (2, "ABS_HAT0X", 1),
+ (4, "ABS_HAT0Y", 1),
+ (6, "ABS_HAT0X", -1),
+ ],
+ )
+ def test_hat_switch(self, hat_value, expected_evdev, evdev_value):
+ uhdev = self.uhdev
+
+ r = uhdev.event(hat_switch=hat_value)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert (
+ libevdev.InputEvent(
+ libevdev.evbit("EV_ABS", expected_evdev), evdev_value
+ )
+ in events
+ )
+
+
+class TestSaitekGamepad(BaseTest.TestGamepad):
+ def create_device(self):
+ return SaitekGamepad()
+
+
+class TestAsusGamepad(BaseTest.TestGamepad):
+ def create_device(self):
+ return AsusGamepad()
diff --git a/tools/testing/selftests/hid/tests/test_hid_core.py b/tools/testing/selftests/hid/tests/test_hid_core.py
new file mode 100644
index 000000000000..9a7fe40020d2
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_hid_core.py
@@ -0,0 +1,154 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+# This program is free software: you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program. If not, see <http://www.gnu.org/licenses/>.
+#
+
+# This is for generic devices
+
+from . import base
+import logging
+
+logger = logging.getLogger("hidtools.test.hid")
+
+
+class TestCollectionOverflow(base.BaseTestCase.TestUhid):
+ """
+ Test class to test re-allocation of the HID collection stack in
+ hid-core.c.
+ """
+
+ def create_device(self):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop)
+ 0x09, 0x02, # .Usage (Mouse)
+ 0xa1, 0x01, # .Collection (Application)
+ 0x09, 0x02, # ..Usage (Mouse)
+ 0xa1, 0x02, # ..Collection (Logical)
+ 0x09, 0x01, # ...Usage (Pointer)
+ 0xa1, 0x00, # ...Collection (Physical)
+ 0x05, 0x09, # ....Usage Page (Button)
+ 0x19, 0x01, # ....Usage Minimum (1)
+ 0x29, 0x03, # ....Usage Maximum (3)
+ 0x15, 0x00, # ....Logical Minimum (0)
+ 0x25, 0x01, # ....Logical Maximum (1)
+ 0x75, 0x01, # ....Report Size (1)
+ 0x95, 0x03, # ....Report Count (3)
+ 0x81, 0x02, # ....Input (Data,Var,Abs)
+ 0x75, 0x05, # ....Report Size (5)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0xa1, 0x02, # ....Collection (Logical)
+ 0x09, 0x01, # .....Usage (Pointer)
+ 0x05, 0x01, # .....Usage Page (Generic Desktop)
+ 0x09, 0x30, # .....Usage (X)
+ 0x09, 0x31, # .....Usage (Y)
+ 0x15, 0x81, # .....Logical Minimum (-127)
+ 0x25, 0x7f, # .....Logical Maximum (127)
+ 0x75, 0x08, # .....Report Size (8)
+ 0x95, 0x02, # .....Report Count (2)
+ 0x81, 0x06, # .....Input (Data,Var,Rel)
+ 0xa1, 0x02, # ...Collection (Logical)
+ 0x85, 0x12, # ....Report ID (18)
+ 0x09, 0x48, # ....Usage (Resolution Multiplier)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x75, 0x02, # ....Report Size (2)
+ 0x15, 0x00, # ....Logical Minimum (0)
+ 0x25, 0x01, # ....Logical Maximum (1)
+ 0x35, 0x01, # ....Physical Minimum (1)
+ 0x45, 0x0c, # ....Physical Maximum (12)
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs)
+ 0x85, 0x1a, # ....Report ID (26)
+ 0x09, 0x38, # ....Usage (Wheel)
+ 0x35, 0x00, # ....Physical Minimum (0)
+ 0x45, 0x00, # ....Physical Maximum (0)
+ 0x95, 0x01, # ....Report Count (1)
+ 0x75, 0x10, # ....Report Size (16)
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767)
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767)
+ 0x81, 0x06, # ....Input (Data,Var,Rel)
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ...End Collection
+ 0xc0, # ..End Collection
+ 0xc0, # .End Collection
+ ]
+ # fmt: on
+ return base.UHIDTestDevice(
+ name=None, rdesc=report_descriptor, application="Mouse"
+ )
+
+ def test_rdesc(self):
+ """
+ This test can only check for negatives. If the kernel crashes, you
+ know why. If this test passes, either the bug isn't present or just
+ didn't get triggered. No way to know.
+
+ For an explanation, see kernel patch
+ HID: core: replace the collection tree pointers with indices
+ """
+ pass
diff --git a/tools/testing/selftests/hid/tests/test_ite_keyboard.py b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
new file mode 100644
index 000000000000..38550c167bae
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_ite_keyboard.py
@@ -0,0 +1,166 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2020 Red Hat, Inc.
+#
+
+from .test_keyboard import ArrayKeyboard, TestArrayKeyboard
+from hidtools.util import BusType
+
+import libevdev
+import logging
+
+logger = logging.getLogger("hidtools.test.ite-keyboard")
+
+KERNEL_MODULE = ("itetech", "hid_ite")
+
+
+class KbdData(object):
+ pass
+
+
+# The ITE keyboards have an issue regarding the Wifi key:
+# nothing comes in when pressing the key, but we get a null
+# event on the key release.
+# This test covers this case.
+class ITEKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x06, 0x85, 0xff, # Usage Page (Vendor Usage Page 0xff85)
+ 0x09, 0x95, # Usage (Vendor Usage 0x95) 3
+ 0xa1, 0x01, # Collection (Application) 5
+ 0x85, 0x5a, # .Report ID (90) 7
+ 0x09, 0x01, # .Usage (Vendor Usage 0x01) 9
+ 0x15, 0x00, # .Logical Minimum (0) 11
+ 0x26, 0xff, 0x00, # .Logical Maximum (255) 13
+ 0x75, 0x08, # .Report Size (8) 16
+ 0x95, 0x10, # .Report Count (16) 18
+ 0xb1, 0x00, # .Feature (Data,Arr,Abs) 20
+ 0xc0, # End Collection 22
+ 0x05, 0x01, # Usage Page (Generic Desktop) 23
+ 0x09, 0x06, # Usage (Keyboard) 25
+ 0xa1, 0x01, # Collection (Application) 27
+ 0x85, 0x01, # .Report ID (1) 29
+ 0x75, 0x01, # .Report Size (1) 31
+ 0x95, 0x08, # .Report Count (8) 33
+ 0x05, 0x07, # .Usage Page (Keyboard) 35
+ 0x19, 0xe0, # .Usage Minimum (224) 37
+ 0x29, 0xe7, # .Usage Maximum (231) 39
+ 0x15, 0x00, # .Logical Minimum (0) 41
+ 0x25, 0x01, # .Logical Maximum (1) 43
+ 0x81, 0x02, # .Input (Data,Var,Abs) 45
+ 0x95, 0x01, # .Report Count (1) 47
+ 0x75, 0x08, # .Report Size (8) 49
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 51
+ 0x95, 0x05, # .Report Count (5) 53
+ 0x75, 0x01, # .Report Size (1) 55
+ 0x05, 0x08, # .Usage Page (LEDs) 57
+ 0x19, 0x01, # .Usage Minimum (1) 59
+ 0x29, 0x05, # .Usage Maximum (5) 61
+ 0x91, 0x02, # .Output (Data,Var,Abs) 63
+ 0x95, 0x01, # .Report Count (1) 65
+ 0x75, 0x03, # .Report Size (3) 67
+ 0x91, 0x03, # .Output (Cnst,Var,Abs) 69
+ 0x95, 0x06, # .Report Count (6) 71
+ 0x75, 0x08, # .Report Size (8) 73
+ 0x15, 0x00, # .Logical Minimum (0) 75
+ 0x26, 0xff, 0x00, # .Logical Maximum (255) 77
+ 0x05, 0x07, # .Usage Page (Keyboard) 80
+ 0x19, 0x00, # .Usage Minimum (0) 82
+ 0x2a, 0xff, 0x00, # .Usage Maximum (255) 84
+ 0x81, 0x00, # .Input (Data,Arr,Abs) 87
+ 0xc0, # End Collection 89
+ 0x05, 0x0c, # Usage Page (Consumer Devices) 90
+ 0x09, 0x01, # Usage (Consumer Control) 92
+ 0xa1, 0x01, # Collection (Application) 94
+ 0x85, 0x02, # .Report ID (2) 96
+ 0x19, 0x00, # .Usage Minimum (0) 98
+ 0x2a, 0x3c, 0x02, # .Usage Maximum (572) 100
+ 0x15, 0x00, # .Logical Minimum (0) 103
+ 0x26, 0x3c, 0x02, # .Logical Maximum (572) 105
+ 0x75, 0x10, # .Report Size (16) 108
+ 0x95, 0x01, # .Report Count (1) 110
+ 0x81, 0x00, # .Input (Data,Arr,Abs) 112
+ 0xc0, # End Collection 114
+ 0x05, 0x01, # Usage Page (Generic Desktop) 115
+ 0x09, 0x0c, # Usage (Wireless Radio Controls) 117
+ 0xa1, 0x01, # Collection (Application) 119
+ 0x85, 0x03, # .Report ID (3) 121
+ 0x15, 0x00, # .Logical Minimum (0) 123
+ 0x25, 0x01, # .Logical Maximum (1) 125
+ 0x09, 0xc6, # .Usage (Wireless Radio Button) 127
+ 0x95, 0x01, # .Report Count (1) 129
+ 0x75, 0x01, # .Report Size (1) 131
+ 0x81, 0x06, # .Input (Data,Var,Rel) 133
+ 0x75, 0x07, # .Report Size (7) 135
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 137
+ 0xc0, # End Collection 139
+ 0x05, 0x88, # Usage Page (Vendor Usage Page 0x88) 140
+ 0x09, 0x01, # Usage (Vendor Usage 0x01) 142
+ 0xa1, 0x01, # Collection (Application) 144
+ 0x85, 0x04, # .Report ID (4) 146
+ 0x19, 0x00, # .Usage Minimum (0) 148
+ 0x2a, 0xff, 0xff, # .Usage Maximum (65535) 150
+ 0x15, 0x00, # .Logical Minimum (0) 153
+ 0x26, 0xff, 0xff, # .Logical Maximum (65535) 155
+ 0x75, 0x08, # .Report Size (8) 158
+ 0x95, 0x02, # .Report Count (2) 160
+ 0x81, 0x02, # .Input (Data,Var,Abs) 162
+ 0xc0, # End Collection 164
+ 0x05, 0x01, # Usage Page (Generic Desktop) 165
+ 0x09, 0x80, # Usage (System Control) 167
+ 0xa1, 0x01, # Collection (Application) 169
+ 0x85, 0x05, # .Report ID (5) 171
+ 0x19, 0x81, # .Usage Minimum (129) 173
+ 0x29, 0x83, # .Usage Maximum (131) 175
+ 0x15, 0x00, # .Logical Minimum (0) 177
+ 0x25, 0x01, # .Logical Maximum (1) 179
+ 0x95, 0x08, # .Report Count (8) 181
+ 0x75, 0x01, # .Report Size (1) 183
+ 0x81, 0x02, # .Input (Data,Var,Abs) 185
+ 0xc0, # End Collection 187
+ ]
+ # fmt: on
+
+ def __init__(
+ self,
+ rdesc=report_descriptor,
+ name=None,
+ input_info=(BusType.USB, 0x06CB, 0x2968),
+ ):
+ super().__init__(rdesc, name, input_info)
+
+ def event(self, keys, reportID=None, application=None):
+ application = application or "Keyboard"
+ return super().event(keys, reportID, application)
+
+
+class TestITEKeyboard(TestArrayKeyboard):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ return ITEKeyboard()
+
+ def test_wifi_key(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ # the following sends a 'release' event on the Wifi key.
+ # the kernel is supposed to translate this into Wifi key
+ # down and up
+ r = [0x03, 0x00]
+ uhdev.call_input_event(r)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_RFKILL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports([r], uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_RFKILL, 0))
+ # the kernel sends the two down/up key events in a batch, no need to
+ # call events = uhdev.next_sync_events()
+ self.debug_reports([], uhdev, events)
+ self.assertInputEventsIn(expected, events)
diff --git a/tools/testing/selftests/hid/tests/test_keyboard.py b/tools/testing/selftests/hid/tests/test_keyboard.py
new file mode 100644
index 000000000000..b3b2bdbf63b7
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_keyboard.py
@@ -0,0 +1,485 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2018 Red Hat, Inc.
+#
+
+from . import base
+import hidtools.hid
+import libevdev
+import logging
+
+logger = logging.getLogger("hidtools.test.keyboard")
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class KeyboardData(object):
+ pass
+
+
+class BaseKeyboard(base.UHIDTestDevice):
+ def __init__(self, rdesc, name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, "Key", input_info=input_info, rdesc=rdesc)
+ self.keystates = {}
+
+ def _update_key_state(self, keys):
+ """
+ Update the internal state of keys with the new state given.
+
+ :param key: a tuple of chars for the currently pressed keys.
+ """
+ # First remove the already released keys
+ unused_keys = [k for k, v in self.keystates.items() if not v]
+ for key in unused_keys:
+ del self.keystates[key]
+
+ # self.keystates contains now the list of currently pressed keys,
+ # release them...
+ for key in self.keystates.keys():
+ self.keystates[key] = False
+
+ # ...and press those that are in parameter
+ for key in keys:
+ self.keystates[key] = True
+
+ def _create_report_data(self):
+ keyboard = KeyboardData()
+ for key, value in self.keystates.items():
+ key = key.replace(" ", "").lower()
+ setattr(keyboard, key, value)
+ return keyboard
+
+ def create_array_report(self, keys, reportID=None, application=None):
+ """
+ Return an input report for this device.
+
+ :param keys: a tuple of chars for the pressed keys. The class maintains
+ the list of currently pressed keys, so to release a key, the caller
+ needs to call again this function without the key in this tuple.
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ self._update_key_state(keys)
+ reportID = reportID or self.default_reportID
+
+ keyboard = self._create_report_data()
+ return self.create_report(keyboard, reportID=reportID, application=application)
+
+ def event(self, keys, reportID=None, application=None):
+ """
+ Send an input event on the default report ID.
+
+ :param keys: a tuple of chars for the pressed keys. The class maintains
+ the list of currently pressed keys, so to release a key, the caller
+ needs to call again this function without the key in this tuple.
+ """
+ r = self.create_array_report(keys, reportID, application)
+ self.call_input_event(r)
+ return [r]
+
+
+class PlainKeyboard(BaseKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0x97, # .Usage Maximum (151)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x98, # .Report Count (152)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 1
+
+
+class ArrayKeyboard(BaseKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x95, 0x06, # .Report Count (6)
+ 0x75, 0x08, # .Report Size (8)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xa4, 0x00, # .Logical Maximum (164)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0xa4, # .Usage Maximum (164)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+ def _create_report_data(self):
+ data = KeyboardData()
+ array = []
+
+ hut = hidtools.hut.HUT
+
+ # strip modifiers from the array
+ for k, v in self.keystates.items():
+ # we ignore depressed keys
+ if not v:
+ continue
+
+ usage = hut[0x07].from_name[k].usage
+ if usage >= 224 and usage <= 231:
+ # modifier
+ setattr(data, k.lower(), 1)
+ else:
+ array.append(k)
+
+ # if array length is bigger than 6, report ErrorRollOver
+ if len(array) > 6:
+ array = ["ErrorRollOver"] * 6
+
+ data.keyboard = array
+ return data
+
+
+class LEDKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xa1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xe0, # .Usage Minimum (224)
+ 0x29, 0xe7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x08, # .Report Size (8)
+ 0x81, 0x01, # .Input (Cnst,Arr,Abs)
+ 0x95, 0x05, # .Report Count (5)
+ 0x75, 0x01, # .Report Size (1)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x05, # .Usage Maximum (5)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x03, # .Report Size (3)
+ 0x91, 0x01, # .Output (Cnst,Arr,Abs)
+ 0x95, 0x06, # .Report Count (6)
+ 0x75, 0x08, # .Report Size (8)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xa4, 0x00, # .Logical Maximum (164)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x29, 0xa4, # .Usage Maximum (164)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+
+# Some Primax manufactured keyboards set the Usage Page after having defined
+# some local Usages. It relies on the fact that the specification states that
+# Usages are to be concatenated with Usage Pages upon finding a Main item (see
+# 6.2.2.8). This test covers this case.
+class PrimaxKeyboard(ArrayKeyboard):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x06, # Usage (Keyboard)
+ 0xA1, 0x01, # Collection (Application)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x19, 0xE0, # .Usage Minimum (224)
+ 0x29, 0xE7, # .Usage Maximum (231)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x08, # .Report Count (8)
+ 0x81, 0x02, # .Input (Data,Var,Abs)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x01, # .Report Count (1)
+ 0x81, 0x01, # .Input (Data,Var,Abs)
+ 0x05, 0x08, # .Usage Page (LEDs)
+ 0x19, 0x01, # .Usage Minimum (1)
+ 0x29, 0x03, # .Usage Maximum (3)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x03, # .Report Count (3)
+ 0x91, 0x02, # .Output (Data,Var,Abs)
+ 0x95, 0x01, # .Report Count (1)
+ 0x75, 0x05, # .Report Size (5)
+ 0x91, 0x01, # .Output (Constant)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x26, 0xFF, 0x00, # .Logical Maximum (255)
+ 0x19, 0x00, # .Usage Minimum (0)
+ 0x2A, 0xFF, 0x00, # .Usage Maximum (255)
+ 0x05, 0x07, # .Usage Page (Keyboard)
+ 0x75, 0x08, # .Report Size (8)
+ 0x95, 0x06, # .Report Count (6)
+ 0x81, 0x00, # .Input (Data,Arr,Abs)
+ 0xC0, # End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+
+class BaseTest:
+ class TestKeyboard(base.BaseTestCase.TestUhid):
+ def test_single_key(self):
+ """check for key reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["a and A"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 0
+
+ def test_two_keys(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(["a and A", "q and Q"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_Q, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_A, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_Q, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_A] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_Q] == 0
+
+ r = uhdev.event(["c and C"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_C, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 1
+
+ r = uhdev.event(["c and C", "Spacebar"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.KEY_C) not in events
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 1
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 1
+
+ r = uhdev.event(["Spacebar"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_C, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE) not in events
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_C] == 0
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 1
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_SPACE, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+ assert evdev.value[libevdev.EV_KEY.KEY_SPACE] == 0
+
+ def test_modifiers(self):
+ # ctrl-alt-del would be very nice :)
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(["LeftControl", "LeftShift", "= and +"])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_LEFTCTRL, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_LEFTSHIFT, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_EQUAL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestPlainKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return PlainKeyboard()
+
+ def test_10_keys(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ "7 and &",
+ "8 and *",
+ "9 and (",
+ "0 and )",
+ ]
+ )
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_0, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_7, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_8, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_9, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_0, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_7, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_8, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_9, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestArrayKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return ArrayKeyboard()
+
+ def test_10_keys(self):
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ ]
+ )
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 1))
+ events = uhdev.next_sync_events()
+
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+ # ErrRollOver
+ r = uhdev.event(
+ [
+ "1 and !",
+ "2 and @",
+ "3 and #",
+ "4 and $",
+ "5 and %",
+ "6 and ^",
+ "7 and &",
+ "8 and *",
+ "9 and (",
+ "0 and )",
+ ]
+ )
+ events = uhdev.next_sync_events()
+
+ self.debug_reports(r, uhdev, events)
+
+ assert len(events) == 0
+
+ r = uhdev.event([])
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_1, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_2, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_3, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_4, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_5, 0))
+ expected.append(libevdev.InputEvent(libevdev.EV_KEY.KEY_6, 0))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(expected, events)
+
+
+class TestLEDKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return LEDKeyboard()
+
+
+class TestPrimaxKeyboard(BaseTest.TestKeyboard):
+ def create_device(self):
+ return PrimaxKeyboard()
diff --git a/tools/testing/selftests/hid/tests/test_mouse.py b/tools/testing/selftests/hid/tests/test_mouse.py
new file mode 100644
index 000000000000..66daf7e5975c
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_mouse.py
@@ -0,0 +1,977 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+
+from . import base
+import hidtools.hid
+from hidtools.util import BusType
+import libevdev
+import logging
+import pytest
+
+logger = logging.getLogger("hidtools.test.mouse")
+
+# workaround https://gitlab.freedesktop.org/libevdev/python-libevdev/issues/6
+try:
+ libevdev.EV_REL.REL_WHEEL_HI_RES
+except AttributeError:
+ libevdev.EV_REL.REL_WHEEL_HI_RES = libevdev.EV_REL.REL_0B
+ libevdev.EV_REL.REL_HWHEEL_HI_RES = libevdev.EV_REL.REL_0C
+
+
+class InvalidHIDCommunication(Exception):
+ pass
+
+
+class MouseData(object):
+ pass
+
+
+class BaseMouse(base.UHIDTestDevice):
+ def __init__(self, rdesc, name=None, input_info=None):
+ assert rdesc is not None
+ super().__init__(name, "Mouse", input_info=input_info, rdesc=rdesc)
+ self.left = False
+ self.right = False
+ self.middle = False
+
+ def create_report(self, x, y, buttons=None, wheels=None, reportID=None):
+ """
+ Return an input report for this device.
+
+ :param x: relative x
+ :param y: relative y
+ :param buttons: a (l, r, m) tuple of bools for the button states,
+ where ``None`` is "leave unchanged"
+ :param wheels: a single value for the vertical wheel or a (vertical, horizontal) tuple for
+ the two wheels
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ if buttons is not None:
+ left, right, middle = buttons
+ if left is not None:
+ self.left = left
+ if right is not None:
+ self.right = right
+ if middle is not None:
+ self.middle = middle
+ left = self.left
+ right = self.right
+ middle = self.middle
+ # Note: the BaseMouse doesn't actually have a wheel but the
+ # create_report magic only fills in those fields exist, so let's
+ # make this generic here.
+ wheel, acpan = 0, 0
+ if wheels is not None:
+ if isinstance(wheels, tuple):
+ wheel = wheels[0]
+ acpan = wheels[1]
+ else:
+ wheel = wheels
+
+ reportID = reportID or self.default_reportID
+
+ mouse = MouseData()
+ mouse.b1 = int(left)
+ mouse.b2 = int(right)
+ mouse.b3 = int(middle)
+ mouse.x = x
+ mouse.y = y
+ mouse.wheel = wheel
+ mouse.acpan = acpan
+ return super().create_report(mouse, reportID=reportID)
+
+ def event(self, x, y, buttons=None, wheels=None):
+ """
+ Send an input event on the default report ID.
+
+ :param x: relative x
+ :param y: relative y
+ :param buttons: a (l, r, m) tuple of bools for the button states,
+ where ``None`` is "leave unchanged"
+ :param wheels: a single value for the vertical wheel or a (vertical, horizontal) tuple for
+ the two wheels
+ """
+ r = self.create_report(x, y, buttons, wheels)
+ self.call_input_event(r)
+ return [r]
+
+
+class ButtonMouse(BaseMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # .Usage (Mouse) 2
+ 0xa1, 0x01, # .Collection (Application) 4
+ 0x09, 0x02, # ..Usage (Mouse) 6
+ 0xa1, 0x02, # ..Collection (Logical) 8
+ 0x09, 0x01, # ...Usage (Pointer) 10
+ 0xa1, 0x00, # ...Collection (Physical) 12
+ 0x05, 0x09, # ....Usage Page (Button) 14
+ 0x19, 0x01, # ....Usage Minimum (1) 16
+ 0x29, 0x03, # ....Usage Maximum (3) 18
+ 0x15, 0x00, # ....Logical Minimum (0) 20
+ 0x25, 0x01, # ....Logical Maximum (1) 22
+ 0x75, 0x01, # ....Report Size (1) 24
+ 0x95, 0x03, # ....Report Count (3) 26
+ 0x81, 0x02, # ....Input (Data,Var,Abs) 28
+ 0x75, 0x05, # ....Report Size (5) 30
+ 0x95, 0x01, # ....Report Count (1) 32
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs) 34
+ 0x05, 0x01, # ....Usage Page (Generic Desktop) 36
+ 0x09, 0x30, # ....Usage (X) 38
+ 0x09, 0x31, # ....Usage (Y) 40
+ 0x15, 0x81, # ....Logical Minimum (-127) 42
+ 0x25, 0x7f, # ....Logical Maximum (127) 44
+ 0x75, 0x08, # ....Report Size (8) 46
+ 0x95, 0x02, # ....Report Count (2) 48
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 50
+ 0xc0, # ...End Collection 52
+ 0xc0, # ..End Collection 53
+ 0xc0, # .End Collection 54
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+
+ def fake_report(self, x, y, buttons):
+ if buttons is not None:
+ left, right, middle = buttons
+ if left is None:
+ left = self.left
+ if right is None:
+ right = self.right
+ if middle is None:
+ middle = self.middle
+ else:
+ left = self.left
+ right = self.right
+ middle = self.middle
+
+ button_mask = sum(1 << i for i, b in enumerate([left, right, middle]) if b)
+ x = max(-127, min(127, x))
+ y = max(-127, min(127, y))
+ x = hidtools.util.to_twos_comp(x, 8)
+ y = hidtools.util.to_twos_comp(y, 8)
+ return [button_mask, x, y]
+
+
+class WheelMouse(ButtonMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x05, 0x09, # .Usage Page (Button) 6
+ 0x19, 0x01, # .Usage Minimum (1) 8
+ 0x29, 0x03, # .Usage Maximum (3) 10
+ 0x15, 0x00, # .Logical Minimum (0) 12
+ 0x25, 0x01, # .Logical Maximum (1) 14
+ 0x95, 0x03, # .Report Count (3) 16
+ 0x75, 0x01, # .Report Size (1) 18
+ 0x81, 0x02, # .Input (Data,Var,Abs) 20
+ 0x95, 0x01, # .Report Count (1) 22
+ 0x75, 0x05, # .Report Size (5) 24
+ 0x81, 0x03, # .Input (Cnst,Var,Abs) 26
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 28
+ 0x09, 0x01, # .Usage (Pointer) 30
+ 0xa1, 0x00, # .Collection (Physical) 32
+ 0x09, 0x30, # ..Usage (X) 34
+ 0x09, 0x31, # ..Usage (Y) 36
+ 0x15, 0x81, # ..Logical Minimum (-127) 38
+ 0x25, 0x7f, # ..Logical Maximum (127) 40
+ 0x75, 0x08, # ..Report Size (8) 42
+ 0x95, 0x02, # ..Report Count (2) 44
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 46
+ 0xc0, # .End Collection 48
+ 0x09, 0x38, # .Usage (Wheel) 49
+ 0x15, 0x81, # .Logical Minimum (-127) 51
+ 0x25, 0x7f, # .Logical Maximum (127) 53
+ 0x75, 0x08, # .Report Size (8) 55
+ 0x95, 0x01, # .Report Count (1) 57
+ 0x81, 0x06, # .Input (Data,Var,Rel) 59
+ 0xc0, # End Collection 61
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.wheel_multiplier = 1
+
+
+class TwoWheelMouse(WheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x09, 0x01, # .Usage (Pointer) 6
+ 0xa1, 0x00, # .Collection (Physical) 8
+ 0x05, 0x09, # ..Usage Page (Button) 10
+ 0x19, 0x01, # ..Usage Minimum (1) 12
+ 0x29, 0x10, # ..Usage Maximum (16) 14
+ 0x15, 0x00, # ..Logical Minimum (0) 16
+ 0x25, 0x01, # ..Logical Maximum (1) 18
+ 0x95, 0x10, # ..Report Count (16) 20
+ 0x75, 0x01, # ..Report Size (1) 22
+ 0x81, 0x02, # ..Input (Data,Var,Abs) 24
+ 0x05, 0x01, # ..Usage Page (Generic Desktop) 26
+ 0x16, 0x01, 0x80, # ..Logical Minimum (-32767) 28
+ 0x26, 0xff, 0x7f, # ..Logical Maximum (32767) 31
+ 0x75, 0x10, # ..Report Size (16) 34
+ 0x95, 0x02, # ..Report Count (2) 36
+ 0x09, 0x30, # ..Usage (X) 38
+ 0x09, 0x31, # ..Usage (Y) 40
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 42
+ 0x15, 0x81, # ..Logical Minimum (-127) 44
+ 0x25, 0x7f, # ..Logical Maximum (127) 46
+ 0x75, 0x08, # ..Report Size (8) 48
+ 0x95, 0x01, # ..Report Count (1) 50
+ 0x09, 0x38, # ..Usage (Wheel) 52
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 54
+ 0x05, 0x0c, # ..Usage Page (Consumer Devices) 56
+ 0x0a, 0x38, 0x02, # ..Usage (AC Pan) 58
+ 0x95, 0x01, # ..Report Count (1) 61
+ 0x81, 0x06, # ..Input (Data,Var,Rel) 63
+ 0xc0, # .End Collection 65
+ 0xc0, # End Collection 66
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.hwheel_multiplier = 1
+
+
+class MIDongleMIWirelessMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop)
+ 0x09, 0x02, # Usage (Mouse)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x01, # .Report ID (1)
+ 0x09, 0x01, # .Usage (Pointer)
+ 0xa1, 0x00, # .Collection (Physical)
+ 0x95, 0x05, # ..Report Count (5)
+ 0x75, 0x01, # ..Report Size (1)
+ 0x05, 0x09, # ..Usage Page (Button)
+ 0x19, 0x01, # ..Usage Minimum (1)
+ 0x29, 0x05, # ..Usage Maximum (5)
+ 0x15, 0x00, # ..Logical Minimum (0)
+ 0x25, 0x01, # ..Logical Maximum (1)
+ 0x81, 0x02, # ..Input (Data,Var,Abs)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x75, 0x03, # ..Report Size (3)
+ 0x81, 0x01, # ..Input (Cnst,Arr,Abs)
+ 0x75, 0x08, # ..Report Size (8)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x05, 0x01, # ..Usage Page (Generic Desktop)
+ 0x09, 0x38, # ..Usage (Wheel)
+ 0x15, 0x81, # ..Logical Minimum (-127)
+ 0x25, 0x7f, # ..Logical Maximum (127)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0x05, 0x0c, # ..Usage Page (Consumer Devices)
+ 0x0a, 0x38, 0x02, # ..Usage (AC Pan)
+ 0x95, 0x01, # ..Report Count (1)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0xc0, # .End Collection
+ 0x85, 0x02, # .Report ID (2)
+ 0x09, 0x01, # .Usage (Consumer Control)
+ 0xa1, 0x00, # .Collection (Physical)
+ 0x75, 0x0c, # ..Report Size (12)
+ 0x95, 0x02, # ..Report Count (2)
+ 0x05, 0x01, # ..Usage Page (Generic Desktop)
+ 0x09, 0x30, # ..Usage (X)
+ 0x09, 0x31, # ..Usage (Y)
+ 0x16, 0x01, 0xf8, # ..Logical Minimum (-2047)
+ 0x26, 0xff, 0x07, # ..Logical Maximum (2047)
+ 0x81, 0x06, # ..Input (Data,Var,Rel)
+ 0xc0, # .End Collection
+ 0xc0, # End Collection
+ 0x05, 0x0c, # Usage Page (Consumer Devices)
+ 0x09, 0x01, # Usage (Consumer Control)
+ 0xa1, 0x01, # Collection (Application)
+ 0x85, 0x03, # .Report ID (3)
+ 0x15, 0x00, # .Logical Minimum (0)
+ 0x25, 0x01, # .Logical Maximum (1)
+ 0x75, 0x01, # .Report Size (1)
+ 0x95, 0x01, # .Report Count (1)
+ 0x09, 0xcd, # .Usage (Play/Pause)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x83, 0x01, # .Usage (AL Consumer Control Config)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xb5, # .Usage (Scan Next Track)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xb6, # .Usage (Scan Previous Track)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xea, # .Usage (Volume Down)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x09, 0xe9, # .Usage (Volume Up)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x25, 0x02, # .Usage (AC Forward)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0x0a, 0x24, 0x02, # .Usage (AC Back)
+ 0x81, 0x06, # .Input (Data,Var,Rel)
+ 0xc0, # End Collection
+ ]
+ # fmt: on
+ device_input_info = (BusType.USB, 0x2717, 0x003B)
+ device_name = "uhid test MI Dongle MI Wireless Mouse"
+
+ def __init__(
+ self, rdesc=report_descriptor, name=device_name, input_info=device_input_info
+ ):
+ super().__init__(rdesc, name, input_info)
+
+ def event(self, x, y, buttons=None, wheels=None):
+ # this mouse spreads the relative pointer and the mouse buttons
+ # onto 2 distinct reports
+ rs = []
+ r = self.create_report(x, y, buttons, wheels, reportID=1)
+ self.call_input_event(r)
+ rs.append(r)
+ r = self.create_report(x, y, buttons, reportID=2)
+ self.call_input_event(r)
+ rs.append(r)
+ return rs
+
+
+class ResolutionMultiplierMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 83
+ 0x09, 0x02, # Usage (Mouse) 85
+ 0xa1, 0x01, # Collection (Application) 87
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 89
+ 0x09, 0x02, # .Usage (Mouse) 91
+ 0xa1, 0x02, # .Collection (Logical) 93
+ 0x85, 0x11, # ..Report ID (17) 95
+ 0x09, 0x01, # ..Usage (Pointer) 97
+ 0xa1, 0x00, # ..Collection (Physical) 99
+ 0x05, 0x09, # ...Usage Page (Button) 101
+ 0x19, 0x01, # ...Usage Minimum (1) 103
+ 0x29, 0x03, # ...Usage Maximum (3) 105
+ 0x95, 0x03, # ...Report Count (3) 107
+ 0x75, 0x01, # ...Report Size (1) 109
+ 0x25, 0x01, # ...Logical Maximum (1) 111
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 113
+ 0x95, 0x01, # ...Report Count (1) 115
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 117
+ 0x09, 0x05, # ...Usage (Vendor Usage 0x05) 119
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 121
+ 0x95, 0x03, # ...Report Count (3) 123
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 125
+ 0x05, 0x01, # ...Usage Page (Generic Desktop) 127
+ 0x09, 0x30, # ...Usage (X) 129
+ 0x09, 0x31, # ...Usage (Y) 131
+ 0x95, 0x02, # ...Report Count (2) 133
+ 0x75, 0x08, # ...Report Size (8) 135
+ 0x15, 0x81, # ...Logical Minimum (-127) 137
+ 0x25, 0x7f, # ...Logical Maximum (127) 139
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 141
+ 0xa1, 0x02, # ...Collection (Logical) 143
+ 0x85, 0x12, # ....Report ID (18) 145
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 147
+ 0x95, 0x01, # ....Report Count (1) 149
+ 0x75, 0x02, # ....Report Size (2) 151
+ 0x15, 0x00, # ....Logical Minimum (0) 153
+ 0x25, 0x01, # ....Logical Maximum (1) 155
+ 0x35, 0x01, # ....Physical Minimum (1) 157
+ 0x45, 0x04, # ....Physical Maximum (4) 159
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 161
+ 0x35, 0x00, # ....Physical Minimum (0) 163
+ 0x45, 0x00, # ....Physical Maximum (0) 165
+ 0x75, 0x06, # ....Report Size (6) 167
+ 0xb1, 0x01, # ....Feature (Cnst,Arr,Abs) 169
+ 0x85, 0x11, # ....Report ID (17) 171
+ 0x09, 0x38, # ....Usage (Wheel) 173
+ 0x15, 0x81, # ....Logical Minimum (-127) 175
+ 0x25, 0x7f, # ....Logical Maximum (127) 177
+ 0x75, 0x08, # ....Report Size (8) 179
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 181
+ 0xc0, # ...End Collection 183
+ 0x05, 0x0c, # ...Usage Page (Consumer Devices) 184
+ 0x75, 0x08, # ...Report Size (8) 186
+ 0x0a, 0x38, 0x02, # ...Usage (AC Pan) 188
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 191
+ 0xc0, # ..End Collection 193
+ 0xc0, # .End Collection 194
+ 0xc0, # End Collection 195
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 0x11
+
+ # Feature Report 12, multiplier Feature value must be set to 0b01,
+ # i.e. 1. We should extract that from the descriptor instead
+ # of hardcoding it here, but meanwhile this will do.
+ self.set_feature_report = [0x12, 0x1]
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ raise InvalidHIDCommunication(f"Unexpected report type: {rtype}")
+ if rnum != 0x12:
+ raise InvalidHIDCommunication(f"Unexpected report number: {rnum}")
+
+ if data != self.set_feature_report:
+ raise InvalidHIDCommunication(
+ f"Unexpected data: {data}, expected {self.set_feature_report}"
+ )
+
+ self.wheel_multiplier = 4
+
+ return 0
+
+
+class BadResolutionMultiplierMouse(ResolutionMultiplierMouse):
+ def set_report(self, req, rnum, rtype, data):
+ super().set_report(req, rnum, rtype, data)
+
+ self.wheel_multiplier = 1
+ self.hwheel_multiplier = 1
+ return 32 # EPIPE
+
+
+class ResolutionMultiplierHWheelMouse(TwoWheelMouse):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # Usage (Mouse) 2
+ 0xa1, 0x01, # Collection (Application) 4
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 6
+ 0x09, 0x02, # .Usage (Mouse) 8
+ 0xa1, 0x02, # .Collection (Logical) 10
+ 0x85, 0x1a, # ..Report ID (26) 12
+ 0x09, 0x01, # ..Usage (Pointer) 14
+ 0xa1, 0x00, # ..Collection (Physical) 16
+ 0x05, 0x09, # ...Usage Page (Button) 18
+ 0x19, 0x01, # ...Usage Minimum (1) 20
+ 0x29, 0x05, # ...Usage Maximum (5) 22
+ 0x95, 0x05, # ...Report Count (5) 24
+ 0x75, 0x01, # ...Report Size (1) 26
+ 0x15, 0x00, # ...Logical Minimum (0) 28
+ 0x25, 0x01, # ...Logical Maximum (1) 30
+ 0x81, 0x02, # ...Input (Data,Var,Abs) 32
+ 0x75, 0x03, # ...Report Size (3) 34
+ 0x95, 0x01, # ...Report Count (1) 36
+ 0x81, 0x01, # ...Input (Cnst,Arr,Abs) 38
+ 0x05, 0x01, # ...Usage Page (Generic Desktop) 40
+ 0x09, 0x30, # ...Usage (X) 42
+ 0x09, 0x31, # ...Usage (Y) 44
+ 0x95, 0x02, # ...Report Count (2) 46
+ 0x75, 0x10, # ...Report Size (16) 48
+ 0x16, 0x01, 0x80, # ...Logical Minimum (-32767) 50
+ 0x26, 0xff, 0x7f, # ...Logical Maximum (32767) 53
+ 0x81, 0x06, # ...Input (Data,Var,Rel) 56
+ 0xa1, 0x02, # ...Collection (Logical) 58
+ 0x85, 0x12, # ....Report ID (18) 60
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 62
+ 0x95, 0x01, # ....Report Count (1) 64
+ 0x75, 0x02, # ....Report Size (2) 66
+ 0x15, 0x00, # ....Logical Minimum (0) 68
+ 0x25, 0x01, # ....Logical Maximum (1) 70
+ 0x35, 0x01, # ....Physical Minimum (1) 72
+ 0x45, 0x0c, # ....Physical Maximum (12) 74
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 76
+ 0x85, 0x1a, # ....Report ID (26) 78
+ 0x09, 0x38, # ....Usage (Wheel) 80
+ 0x35, 0x00, # ....Physical Minimum (0) 82
+ 0x45, 0x00, # ....Physical Maximum (0) 84
+ 0x95, 0x01, # ....Report Count (1) 86
+ 0x75, 0x10, # ....Report Size (16) 88
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767) 90
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767) 93
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 96
+ 0xc0, # ...End Collection 98
+ 0xa1, 0x02, # ...Collection (Logical) 99
+ 0x85, 0x12, # ....Report ID (18) 101
+ 0x09, 0x48, # ....Usage (Resolution Multiplier) 103
+ 0x75, 0x02, # ....Report Size (2) 105
+ 0x15, 0x00, # ....Logical Minimum (0) 107
+ 0x25, 0x01, # ....Logical Maximum (1) 109
+ 0x35, 0x01, # ....Physical Minimum (1) 111
+ 0x45, 0x0c, # ....Physical Maximum (12) 113
+ 0xb1, 0x02, # ....Feature (Data,Var,Abs) 115
+ 0x35, 0x00, # ....Physical Minimum (0) 117
+ 0x45, 0x00, # ....Physical Maximum (0) 119
+ 0x75, 0x04, # ....Report Size (4) 121
+ 0xb1, 0x01, # ....Feature (Cnst,Arr,Abs) 123
+ 0x85, 0x1a, # ....Report ID (26) 125
+ 0x05, 0x0c, # ....Usage Page (Consumer Devices) 127
+ 0x95, 0x01, # ....Report Count (1) 129
+ 0x75, 0x10, # ....Report Size (16) 131
+ 0x16, 0x01, 0x80, # ....Logical Minimum (-32767) 133
+ 0x26, 0xff, 0x7f, # ....Logical Maximum (32767) 136
+ 0x0a, 0x38, 0x02, # ....Usage (AC Pan) 139
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 142
+ 0xc0, # ...End Collection 144
+ 0xc0, # ..End Collection 145
+ 0xc0, # .End Collection 146
+ 0xc0, # End Collection 147
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, input_info=None):
+ super().__init__(rdesc, name, input_info)
+ self.default_reportID = 0x1A
+
+ # Feature Report 12, multiplier Feature value must be set to 0b0101,
+ # i.e. 5. We should extract that from the descriptor instead
+ # of hardcoding it here, but meanwhile this will do.
+ self.set_feature_report = [0x12, 0x5]
+
+ def set_report(self, req, rnum, rtype, data):
+ super().set_report(req, rnum, rtype, data)
+
+ self.wheel_multiplier = 12
+ self.hwheel_multiplier = 12
+
+ return 0
+
+
+class BaseTest:
+ class TestMouse(base.BaseTestCase.TestUhid):
+ def test_buttons(self):
+ """check for button reliability."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+ syn_event = self.syn_event
+
+ r = uhdev.event(0, 0, (None, True, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+
+ r = uhdev.event(0, 0, (None, False, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+
+ r = uhdev.event(0, 0, (None, None, True))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_MIDDLE, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_MIDDLE] == 1
+
+ r = uhdev.event(0, 0, (None, None, False))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_MIDDLE, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_MIDDLE] == 0
+
+ r = uhdev.event(0, 0, (True, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(0, 0, (False, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(0, 0, (True, True, None))
+ expected_event0 = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1)
+ expected_event1 = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn(
+ (syn_event, expected_event0, expected_event1), events
+ )
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(0, 0, (False, None, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(0, 0, (None, False, None))
+ expected_event = libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEventsIn((syn_event, expected_event), events)
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ def test_relative(self):
+ """Check for relative events."""
+ uhdev = self.uhdev
+
+ syn_event = self.syn_event
+
+ r = uhdev.event(0, -1)
+ expected_event = libevdev.InputEvent(libevdev.EV_REL.REL_Y, -1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents((syn_event, expected_event), events)
+
+ r = uhdev.event(1, 0)
+ expected_event = libevdev.InputEvent(libevdev.EV_REL.REL_X, 1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents((syn_event, expected_event), events)
+
+ r = uhdev.event(-1, 2)
+ expected_event0 = libevdev.InputEvent(libevdev.EV_REL.REL_X, -1)
+ expected_event1 = libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(
+ (syn_event, expected_event0, expected_event1), events
+ )
+
+
+class TestSimpleMouse(BaseTest.TestMouse):
+ def create_device(self):
+ return ButtonMouse()
+
+ def test_rdesc(self):
+ """Check that the testsuite actually manages to format the
+ reports according to the report descriptors.
+ No kernel device is used here"""
+ uhdev = self.uhdev
+
+ event = (0, 0, (None, None, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (None, True, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (True, True, None))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, 0, (False, False, False))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (1, 0, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-1, 0, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-5, 5, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (-127, 127, (True, False, True))
+ assert uhdev.fake_report(*event) == uhdev.create_report(*event)
+
+ event = (0, -128, (True, False, True))
+ with pytest.raises(hidtools.hid.RangeError):
+ uhdev.create_report(*event)
+
+
+class TestWheelMouse(BaseTest.TestMouse):
+ def create_device(self):
+ return WheelMouse()
+
+ def is_wheel_highres(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert evdev.has(libevdev.EV_REL.REL_WHEEL)
+ return evdev.has(libevdev.EV_REL.REL_WHEEL_HI_RES)
+
+ def test_wheel(self):
+ uhdev = self.uhdev
+
+ # check if the kernel is high res wheel compatible
+ high_res_wheel = self.is_wheel_highres(uhdev)
+
+ syn_event = self.syn_event
+ # The Resolution Multiplier is applied to the HID reports, so we
+ # need to pre-multiply too.
+ mult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=1 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 1))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=-1 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, -1))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=3 * mult)
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 3))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 360))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestTwoWheelMouse(TestWheelMouse):
+ def create_device(self):
+ return TwoWheelMouse()
+
+ def is_hwheel_highres(self, uhdev):
+ evdev = uhdev.get_evdev()
+ assert evdev.has(libevdev.EV_REL.REL_HWHEEL)
+ return evdev.has(libevdev.EV_REL.REL_HWHEEL_HI_RES)
+
+ def test_ac_pan(self):
+ uhdev = self.uhdev
+
+ # check if the kernel is high res wheel compatible
+ high_res_wheel = self.is_wheel_highres(uhdev)
+ high_res_hwheel = self.is_hwheel_highres(uhdev)
+ assert high_res_wheel == high_res_hwheel
+
+ syn_event = self.syn_event
+ # The Resolution Multiplier is applied to the HID reports, so we
+ # need to pre-multiply too.
+ hmult = uhdev.hwheel_multiplier
+ vmult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=(0, 1 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 1))
+ if high_res_hwheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=(0, -1 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, -1))
+ if high_res_hwheel:
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, -120)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=(0, 3 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 3))
+ if high_res_hwheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 360))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(-1, 2, wheels=(-3 * vmult, 4 * hmult))
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, -1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, 2))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, -3))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -360))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 4))
+ if high_res_wheel:
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 480))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestResolutionMultiplierMouse(TestTwoWheelMouse):
+ def create_device(self):
+ return ResolutionMultiplierMouse()
+
+ def is_wheel_highres(self, uhdev):
+ high_res = super().is_wheel_highres(uhdev)
+
+ if not high_res:
+ # the kernel doesn't seem to support the high res wheel mice,
+ # make sure we haven't triggered the feature
+ assert uhdev.wheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_wheel(self):
+ uhdev = self.uhdev
+
+ if not self.is_wheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.wheel_multiplier > 1
+ assert 120 % uhdev.wheel_multiplier == 0
+
+ def test_wheel_with_multiplier(self):
+ uhdev = self.uhdev
+
+ if not self.is_wheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.wheel_multiplier > 1
+
+ syn_event = self.syn_event
+ mult = uhdev.wheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=1)
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120 / mult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=-1)
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, -120 / mult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, -2))
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL_HI_RES, 120 / mult)
+ )
+
+ for _ in range(mult - 1):
+ r = uhdev.event(1, -2, wheels=1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(1, -2, wheels=1)
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_WHEEL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestBadResolutionMultiplierMouse(TestTwoWheelMouse):
+ def create_device(self):
+ return BadResolutionMultiplierMouse()
+
+ def is_wheel_highres(self, uhdev):
+ high_res = super().is_wheel_highres(uhdev)
+
+ assert uhdev.wheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_wheel(self):
+ uhdev = self.uhdev
+
+ assert uhdev.wheel_multiplier == 1
+
+
+class TestResolutionMultiplierHWheelMouse(TestResolutionMultiplierMouse):
+ def create_device(self):
+ return ResolutionMultiplierHWheelMouse()
+
+ def is_hwheel_highres(self, uhdev):
+ high_res = super().is_hwheel_highres(uhdev)
+
+ if not high_res:
+ # the kernel doesn't seem to support the high res wheel mice,
+ # make sure we haven't triggered the feature
+ assert uhdev.hwheel_multiplier == 1
+
+ return high_res
+
+ def test_resolution_multiplier_ac_pan(self):
+ uhdev = self.uhdev
+
+ if not self.is_hwheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.hwheel_multiplier > 1
+ assert 120 % uhdev.hwheel_multiplier == 0
+
+ def test_ac_pan_with_multiplier(self):
+ uhdev = self.uhdev
+
+ if not self.is_hwheel_highres(uhdev):
+ pytest.skip("Kernel not compatible, we can not trigger the conditions")
+
+ assert uhdev.hwheel_multiplier > 1
+
+ syn_event = self.syn_event
+ hmult = uhdev.hwheel_multiplier
+
+ r = uhdev.event(0, 0, wheels=(0, 1))
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120 / hmult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(0, 0, wheels=(0, -1))
+ expected = [syn_event]
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, -120 / hmult)
+ )
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ expected = [syn_event]
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_X, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_Y, -2))
+ expected.append(
+ libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL_HI_RES, 120 / hmult)
+ )
+
+ for _ in range(hmult - 1):
+ r = uhdev.event(1, -2, wheels=(0, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+ r = uhdev.event(1, -2, wheels=(0, 1))
+ expected.append(libevdev.InputEvent(libevdev.EV_REL.REL_HWHEEL, 1))
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ self.assertInputEvents(expected, events)
+
+
+class TestMiMouse(TestWheelMouse):
+ def create_device(self):
+ return MIDongleMIWirelessMouse()
+
+ def assertInputEvents(self, expected_events, effective_events):
+ # Buttons and x/y are spread over two HID reports, so we can get two
+ # event frames for this device.
+ remaining = self.assertInputEventsIn(expected_events, effective_events)
+ try:
+ remaining.remove(libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT, 0))
+ except ValueError:
+ # If there's no SYN_REPORT in the list, continue and let the
+ # assert below print out the real error
+ pass
+ assert remaining == []
diff --git a/tools/testing/selftests/hid/tests/test_multitouch.py b/tools/testing/selftests/hid/tests/test_multitouch.py
new file mode 100644
index 000000000000..4265012231c6
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_multitouch.py
@@ -0,0 +1,2088 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+#
+
+from . import base
+from hidtools.hut import HUT
+from hidtools.util import BusType
+import libevdev
+import logging
+import pytest
+import sys
+import time
+
+logger = logging.getLogger("hidtools.test.multitouch")
+
+KERNEL_MODULE = ("hid-multitouch", "hid_multitouch")
+
+
+def BIT(x):
+ return 1 << x
+
+
+mt_quirks = {
+ "NOT_SEEN_MEANS_UP": BIT(0),
+ "SLOT_IS_CONTACTID": BIT(1),
+ "CYPRESS": BIT(2),
+ "SLOT_IS_CONTACTNUMBER": BIT(3),
+ "ALWAYS_VALID": BIT(4),
+ "VALID_IS_INRANGE": BIT(5),
+ "VALID_IS_CONFIDENCE": BIT(6),
+ "CONFIDENCE": BIT(7),
+ "SLOT_IS_CONTACTID_MINUS_ONE": BIT(8),
+ "NO_AREA": BIT(9),
+ "IGNORE_DUPLICATES": BIT(10),
+ "HOVERING": BIT(11),
+ "CONTACT_CNT_ACCURATE": BIT(12),
+ "FORCE_GET_FEATURE": BIT(13),
+ "FIX_CONST_CONTACT_ID": BIT(14),
+ "TOUCH_SIZE_SCALING": BIT(15),
+ "STICKY_FINGERS": BIT(16),
+ "ASUS_CUSTOM_UP": BIT(17),
+ "WIN8_PTP_BUTTONS": BIT(18),
+ "SEPARATE_APP_REPORT": BIT(19),
+ "MT_QUIRK_FORCE_MULTI_INPUT": BIT(20),
+}
+
+
+class Data(object):
+ pass
+
+
+class Touch(object):
+ def __init__(self, id, x, y):
+ self.contactid = id
+ self.x = x
+ self.y = y
+ self.cx = x
+ self.cy = y
+ self.tipswitch = True
+ self.confidence = True
+ self.tippressure = 15
+ self.azimuth = 0
+ self.inrange = True
+ self.width = 10
+ self.height = 10
+
+
+class Pen(Touch):
+ def __init__(self, x, y):
+ super().__init__(0, x, y)
+ self.barrel = False
+ self.invert = False
+ self.eraser = False
+ self.x_tilt = False
+ self.y_tilt = False
+ self.twist = 0
+
+
+class Digitizer(base.UHIDTestDevice):
+ @classmethod
+ def msCertificationBlob(cls, reportID):
+ return f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID ({reportID})
+ Usage Page (0xff00)
+ Usage (0xc5)
+ Logical Minimum (0)
+ Logical Maximum (255)
+ Report Size (8)
+ Report Count (256)
+ Feature (Data,Var,Abs)
+ End Collection
+ """
+
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Touch Screen",
+ physical="Finger",
+ max_contacts=None,
+ input_info=(BusType.USB, 1, 2),
+ quirks=None,
+ ):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.scantime = 0
+ self.quirks = quirks
+ if max_contacts is None:
+ self.max_contacts = sys.maxsize
+ for features in self.parsed_rdesc.feature_reports.values():
+ for feature in features:
+ if feature.usage_name in ["Contact Max"]:
+ self.max_contacts = feature.logical_max
+ for inputs in self.parsed_rdesc.input_reports.values():
+ for i in inputs:
+ if (
+ i.usage_name in ["Contact Count"]
+ and i.logical_max > 0
+ and self.max_contacts > i.logical_max
+ ):
+ self.max_contacts = i.logical_max
+ if self.max_contacts == sys.maxsize:
+ self.max_contacts = 1
+ else:
+ self.max_contacts = max_contacts
+ self.physical = physical
+ self.cur_application = application
+
+ for features in self.parsed_rdesc.feature_reports.values():
+ for feature in features:
+ if feature.usage_name == "Inputmode":
+ self.cur_application = "Mouse"
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ physicals = [f.physical_name for f in r]
+ if self.physical not in physicals and None not in physicals:
+ continue
+ self.fields = [f.usage_name for f in r]
+
+ @property
+ def touches_in_a_report(self):
+ return self.fields.count("Contact Id")
+
+ def event(self, slots, global_data=None, contact_count=None, incr_scantime=True):
+ if incr_scantime:
+ self.scantime += 1
+ rs = []
+ # make sure we have only the required number of available slots
+ slots = slots[: self.max_contacts]
+
+ if global_data is None:
+ global_data = Data()
+ if contact_count is None:
+ global_data.contactcount = len(slots)
+ else:
+ global_data.contactcount = contact_count
+ global_data.scantime = self.scantime
+
+ while len(slots):
+ r = self.create_report(
+ application=self.cur_application, data=slots, global_data=global_data
+ )
+ self.call_input_event(r)
+ rs.append(r)
+ global_data.contactcount = 0
+ return rs
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ if "Contact Max" not in [f.usage_name for f in rdesc]:
+ return (1, [])
+
+ self.contactmax = self.max_contacts
+ r = rdesc.create_report([self], None)
+ return (0, r)
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return 1
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return 1
+
+ if "Inputmode" not in [f.usage_name for f in rdesc]:
+ return 0
+
+ Inputmode_seen = False
+ for f in rdesc:
+ if "Inputmode" == f.usage_name:
+ values = f.get_values(data)
+ assert len(values) == 1
+ value = values[0]
+
+ if not Inputmode_seen:
+ Inputmode_seen = True
+ if value == 0:
+ self.cur_application = "Mouse"
+ elif value == 2:
+ self.cur_application = "Touch Screen"
+ elif value == 3:
+ self.cur_application = "Touch Pad"
+ else:
+ if value != 0:
+ # Elan bug where the device doesn't work properly
+ # if we set twice an Input Mode in the same Feature
+ self.cur_application = "Mouse"
+
+ return 0
+
+
+class PTP(Digitizer):
+ def __init__(
+ self,
+ name,
+ type="Click Pad",
+ rdesc_str=None,
+ rdesc=None,
+ application="Touch Pad",
+ physical="Pointer",
+ max_contacts=None,
+ input_info=None,
+ ):
+ self.type = type.lower().replace(" ", "")
+ if self.type == "clickpad":
+ self.buttontype = 0
+ else: # pressurepad
+ self.buttontype = 1
+ self.clickpad_state = False
+ self.left_state = False
+ self.right_state = False
+ super().__init__(
+ name, rdesc_str, rdesc, application, physical, max_contacts, input_info
+ )
+
+ def event(
+ self,
+ slots=None,
+ click=None,
+ left=None,
+ right=None,
+ contact_count=None,
+ incr_scantime=True,
+ ):
+ # update our internal state
+ if click is not None:
+ self.clickpad_state = click
+ if left is not None:
+ self.left_state = left
+ if right is not None:
+ self.right_state = right
+
+ # now create the global data
+ global_data = Data()
+ global_data.b1 = 1 if self.clickpad_state else 0
+ global_data.b2 = 1 if self.left_state else 0
+ global_data.b3 = 1 if self.right_state else 0
+
+ if slots is None:
+ slots = [Data()]
+
+ return super().event(slots, global_data, contact_count, incr_scantime)
+
+
+class MinWin8TSParallel(Digitizer):
+ def __init__(self, max_slots):
+ self.max_slots = max_slots
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Report Size (7)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ Usage Page (Digitizers)
+ Usage (Azimuth)
+ Logical Maximum (360)
+ Unit (SILinear: deg)
+ Report Size (16)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * self.max_slots}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__(f"uhid test parallel {self.max_slots}", rdesc_str)
+
+
+class MinWin8TSHybrid(Digitizer):
+ def __init__(self):
+ self.max_slots = 10
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Report Size (7)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * 2}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__("uhid test hybrid", rdesc_str)
+
+
+class Win8TSConfidence(Digitizer):
+ def __init__(self, max_slots):
+ self.max_slots = max_slots
+ self.phys_max = 120, 90
+ rdesc_finger_str = f"""
+ Usage Page (Digitizers)
+ Usage (Finger)
+ Collection (Logical)
+ Report Size (1)
+ Report Count (1)
+ Logical Minimum (0)
+ Logical Maximum (1)
+ Usage (Tip Switch)
+ Input (Data,Var,Abs)
+ Usage (Confidence)
+ Input (Data,Var,Abs)
+ Report Size (6)
+ Logical Maximum (127)
+ Input (Cnst,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Id)
+ Input (Data,Var,Abs)
+ Report Size (16)
+ Unit Exponent (-1)
+ Unit (SILinear: cm)
+ Logical Maximum (4095)
+ Physical Minimum (0)
+ Physical Maximum ({self.phys_max[0]})
+ Usage Page (Generic Desktop)
+ Usage (X)
+ Input (Data,Var,Abs)
+ Physical Maximum ({self.phys_max[1]})
+ Usage (Y)
+ Input (Data,Var,Abs)
+ Usage Page (Digitizers)
+ Usage (Azimuth)
+ Logical Maximum (360)
+ Unit (SILinear: deg)
+ Report Size (16)
+ Input (Data,Var,Abs)
+ End Collection
+"""
+ rdesc_str = f"""
+ Usage Page (Digitizers)
+ Usage (Touch Screen)
+ Collection (Application)
+ Report ID (1)
+ {rdesc_finger_str * self.max_slots}
+ Unit Exponent (-4)
+ Unit (SILinear: s)
+ Logical Maximum (65535)
+ Physical Maximum (65535)
+ Usage Page (Digitizers)
+ Usage (Scan Time)
+ Input (Data,Var,Abs)
+ Report Size (8)
+ Logical Maximum (255)
+ Usage (Contact Count)
+ Input (Data,Var,Abs)
+ Report ID (2)
+ Logical Maximum ({self.max_slots})
+ Usage (Contact Max)
+ Feature (Data,Var,Abs)
+ End Collection
+ {Digitizer.msCertificationBlob(68)}
+"""
+ super().__init__(f"uhid test confidence {self.max_slots}", rdesc_str)
+
+
+class SmartTechDigitizer(Digitizer):
+ def __init__(self, name, input_info):
+ super().__init__(
+ name,
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 05 81 03 05 01 15 00 26 ff 0f 55 0e 65 11 75 10 95 01 35 00 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 06 15 00 26 ff 00 a1 01 85 02 75 08 95 3f 09 00 82 02 01 95 3f 09 00 92 02 01 c0 05 0d 09 04 a1 01 85 05 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 09 20 a1 00 25 01 75 01 95 02 09 42 09 45 81 02 75 06 95 01 09 30 81 02 26 ff 00 75 08 09 51 81 02 75 10 09 38 81 02 95 02 26 ff 0f 09 48 09 49 81 02 05 01 09 30 09 31 81 02 c0 05 0d 75 08 95 01 15 00 25 0a 09 54 81 02 09 55 b1 02 c0 05 0d 09 0e a1 01 85 04 09 23 a1 02 15 00 25 02 75 08 95 02 09 52 09 53 b1 02 c0 c0 05 0d 09 04 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 02 81 03 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 95 01 09 51 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 05 0d 75 08 95 01 15 00 25 0a 09 54 81 02 09 55 b1 02 c0 05 0d 09 04 a1 01 85 06 09 22 a1 02 15 00 25 01 75 01 95 02 09 42 09 47 81 02 95 06 81 03 95 01 75 10 65 11 55 0e 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 07 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 08 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 09 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0 05 0d 09 02 a1 01 85 0a 09 20 a1 02 25 01 75 01 95 04 09 42 09 44 09 3c 09 45 81 02 75 04 95 01 25 0f 09 30 81 02 26 ff 00 75 08 09 38 81 02 75 10 27 a0 8c 00 00 55 0e 65 14 47 a0 8c 00 00 09 3f 81 02 65 11 26 ff 0f 46 c8 37 09 48 81 02 46 68 1f 09 49 81 02 05 01 46 c8 37 09 30 81 02 46 68 1f 09 31 81 02 45 00 c0 c0",
+ input_info=input_info,
+ )
+
+ def create_report(self, data, global_data=None, reportID=None, application=None):
+ # this device has *a lot* of different reports, and most of them
+ # have the Touch Screen application. But the first one is a stylus
+ # report (as stated in the physical type), so we simply override
+ # the report ID to use what the device sends
+ return super().create_report(data, global_data=global_data, reportID=3)
+
+ def match_evdev_rule(self, application, evdev):
+ # we need to select the correct evdev node, as the device has multiple
+ # Touch Screen application collections
+ if application != "Touch Screen":
+ return True
+ absinfo = evdev.absinfo[libevdev.EV_ABS.ABS_MT_POSITION_X]
+ return absinfo is not None and absinfo.resolution == 3
+
+
+class BaseTest:
+ class TestMultitouch(base.BaseTestCase.TestUhid):
+ kernel_modules = [KERNEL_MODULE]
+
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def get_slot(self, uhdev, t, default):
+ if uhdev.quirks is None:
+ return default
+
+ if "SLOT_IS_CONTACTID" in uhdev.quirks:
+ return t.contactid
+
+ if "SLOT_IS_CONTACTID_MINUS_ONE" in uhdev.quirks:
+ return t.contactid - 1
+
+ return default
+
+ def test_creation(self):
+ """Make sure the device gets processed by the kernel and creates
+ the expected application input node.
+
+ If this fail, there is something wrong in the device report
+ descriptors."""
+ super().test_creation()
+
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # some sanity checking for the quirks
+ if uhdev.quirks is not None:
+ for q in uhdev.quirks:
+ assert q in mt_quirks
+
+ assert evdev.num_slots == uhdev.max_contacts
+
+ if uhdev.max_contacts > 1:
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ if uhdev.max_contacts > 2:
+ assert evdev.slots[2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_required_usages(self):
+ """Make sure the device exports the correct required features and
+ inputs."""
+ uhdev = self.uhdev
+ rdesc = uhdev.parsed_rdesc
+ for feature in rdesc.feature_reports.values():
+ for field in feature:
+ page_id = field.usage >> 16
+ value = field.usage & 0xFF
+ try:
+ if HUT[page_id][value] == "Contact Max":
+ assert HUT[page_id][field.application] in [
+ "Touch Screen",
+ "Touch Pad",
+ "System Multi-Axis Controller",
+ ]
+ except KeyError:
+ pass
+
+ try:
+ if HUT[page_id][value] == "Inputmode":
+ assert HUT[page_id][field.application] in [
+ "Touch Screen",
+ "Touch Pad",
+ "Device Configuration",
+ ]
+ except KeyError:
+ pass
+
+ def test_mt_single_touch(self):
+ """send a single touch in the first slot of the device,
+ and release it."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_dual_touch(self):
+ """Send 2 touches in the first 2 slots.
+ Make sure the kernel sees this as a dual touch.
+ Release and check
+
+ Note: PTP will send here BTN_DOUBLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+
+ if uhdev.quirks is not None and (
+ "SLOT_IS_CONTACTID" in uhdev.quirks
+ or "SLOT_IS_CONTACTNUMBER" in uhdev.quirks
+ ):
+ t1.contactid = 0
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event([t0, t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X, 5) not in events
+ )
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y, 10) not in events
+ )
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0, t1])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X) not in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y) not in events
+
+ t1.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t1.inrange = False
+
+ if uhdev.quirks is not None and "SLOT_IS_CONTACTNUMBER" in uhdev.quirks:
+ r = uhdev.event([t0, t1])
+ else:
+ r = uhdev.event([t1])
+
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.max_contacts <= 2, "Device not compatible"
+ )
+ def test_mt_triple_tap(self):
+ """Send 3 touches in the first 3 slots.
+ Make sure the kernel sees this as a triple touch.
+ Release and check
+
+ Note: PTP will send here BTN_TRIPLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+ t2 = Touch(3, 250, 300)
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+ slot2 = self.get_slot(uhdev, t2, 2)
+
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 2
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_POSITION_X] == 250
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 300
+
+ t0.tipswitch = False
+ t1.tipswitch = False
+ t2.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ t1.inrange = False
+ t2.inrange = False
+ r = uhdev.event([t0, t1, t2])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[slot2][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.max_contacts <= 2, "Device not compatible"
+ )
+ def test_mt_max_contact(self):
+ """send the maximum number of contact as reported by the device.
+ Make sure all contacts are forwarded and that there is no miss.
+ Release and check."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = [
+ Touch(i, (i + 3) * 20, (i + 3) * 20 + 5)
+ for i in range(uhdev.max_contacts)
+ ]
+ if (
+ uhdev.quirks is not None
+ and "SLOT_IS_CONTACTID_MINUS_ONE" in uhdev.quirks
+ ):
+ for t in touches:
+ t.contactid += 1
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ for i, t in enumerate(touches):
+ slot = self.get_slot(uhdev, t, i)
+
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == i
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == t.x
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == t.y
+
+ for t in touches:
+ t.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t.inrange = False
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ for i, t in enumerate(touches):
+ slot = self.get_slot(uhdev, t, i)
+
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: (
+ uhdev.touches_in_a_report == 1
+ or uhdev.quirks is not None
+ and "CONTACT_CNT_ACCURATE" not in uhdev.quirks
+ ),
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_mt_contact_count_accurate(self):
+ """Test the MT_QUIRK_CONTACT_CNT_ACCURATE from the kernel.
+ A report should forward an accurate contact count and the kernel
+ should ignore any data provided after we have reached this
+ contact count."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 50, 100)
+ t1 = Touch(2, 150, 200)
+
+ slot0 = self.get_slot(uhdev, t0, 0)
+ slot1 = self.get_slot(uhdev, t1, 1)
+
+ r = uhdev.event([t0, t1], contact_count=1)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[slot1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ class TestWin8Multitouch(TestMultitouch):
+ def test_required_usages8(self):
+ """Make sure the device exports the correct required features and
+ inputs."""
+ uhdev = self.uhdev
+ rdesc = uhdev.parsed_rdesc
+ for feature in rdesc.feature_reports.values():
+ for field in feature:
+ page_id = field.usage >> 16
+ value = field.usage & 0xFF
+ try:
+ if HUT[page_id][value] == "Inputmode":
+ assert HUT[field.application] not in ["Touch Screen"]
+ except KeyError:
+ pass
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.fields.count("X") == uhdev.touches_in_a_report,
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_mt_tx_cx(self):
+ """send a single touch in the first slot of the device, with
+ different values of Tx and Cx. Make sure the kernel reports Tx."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ t0.cx = 50
+ t0.cy = 100
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 5
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TOOL_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 10
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TOOL_Y] == 100
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "In Range" not in uhdev.fields,
+ "Device not compatible, missing In Range usage",
+ )
+ def test_mt_inrange(self):
+ """Send one contact that has the InRange bit set before/after
+ tipswitch.
+ Kernel is supposed to mark the contact with a distance > 0
+ when inrange is set but not tipswitch.
+
+ This tests the hovering capability of devices (MT_QUIRK_HOVERING).
+
+ Make sure the contact is only released from the kernel POV
+ when the inrange bit is set to 0."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] > 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ t0.tipswitch = True
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] == 0
+
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_DISTANCE) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_DISTANCE] > 0
+
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_duplicates(self):
+ """Test the MT_QUIRK_IGNORE_DUPLICATES from the kernel.
+ If a touch is reported more than once with the same Contact ID,
+ we should only handle the first touch.
+
+ Note: this is not in MS spec, but the current kernel behaves
+ like that"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ t1 = Touch(1, 15, 20)
+ t2 = Touch(2, 50, 100)
+
+ r = uhdev.event([t0, t1, t2], contact_count=2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TRACKING_ID, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 5
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 10
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ def test_mt_release_miss(self):
+ """send a single touch in the first slot of the device, and
+ forget to release it. The kernel is supposed to release by itself
+ the touch in 100ms.
+ Make sure that we are dealing with a new touch by resending the
+ same touch after the timeout expired, and check that the kernel
+ considers it as a separate touch (different tracking ID)"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 5, 10)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+
+ time.sleep(0.2)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Azimuth" not in uhdev.fields,
+ "Device not compatible, missing Azimuth usage",
+ )
+ def test_mt_azimuth(self):
+ """Check for the azimtuh information bit.
+ When azimuth is presented by the device, it should be exported
+ as ABS_MT_ORIENTATION and the exported value should report a quarter
+ of circle."""
+ uhdev = self.uhdev
+
+ t0 = Touch(1, 5, 10)
+ t0.azimuth = 270
+
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ # orientation is clockwise, while Azimuth is counter clockwise
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_ORIENTATION, 90) in events
+
+ class TestPTP(TestWin8Multitouch):
+ def test_ptp_buttons(self):
+ """check for button reliability.
+ There are 2 types of touchpads: the click pads and the pressure pads.
+ Each should reliably report the BTN_LEFT events.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ if uhdev.type == "clickpad":
+ r = uhdev.event(click=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(click=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+ else:
+ r = uhdev.event(left=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+ r = uhdev.event(left=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 0
+
+ r = uhdev.event(right=True)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 1
+
+ r = uhdev.event(right=False)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_RIGHT, 0) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_RIGHT] == 0
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_ptp_confidence(self):
+ """Check for the validity of the confidence bit.
+ When a contact is marked as not confident, it should be detected
+ as a palm from the kernel POV and released.
+
+ Note: if the kernel exports ABS_MT_TOOL_TYPE, it shouldn't release
+ the touch but instead convert it to ABS_MT_TOOL_PALM."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ t0.confidence = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ if evdev.absinfo[libevdev.EV_ABS.ABS_MT_TOOL_TYPE] is not None:
+ # the kernel exports MT_TOOL_PALM
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TOOL_TYPE, 2) in events
+ )
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] != -1
+
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: uhdev.touches_in_a_report >= uhdev.max_contacts,
+ "Device not compatible, we can not trigger the conditions",
+ )
+ def test_ptp_non_touch_data(self):
+ """Some single finger hybrid touchpads might not provide the
+ button information in subsequent reports (only in the first report).
+
+ Emulate this and make sure we do not release the buttons in the
+ middle of the event."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = [Touch(i, i * 10, i * 10 + 5) for i in range(uhdev.max_contacts)]
+ contact_count = uhdev.max_contacts
+ incr_scantime = True
+ btn_state = True
+ events = None
+ while touches:
+ t = touches[: uhdev.touches_in_a_report]
+ touches = touches[uhdev.touches_in_a_report :]
+ r = uhdev.event(
+ t,
+ click=btn_state,
+ left=btn_state,
+ contact_count=contact_count,
+ incr_scantime=incr_scantime,
+ )
+ contact_count = 0
+ incr_scantime = False
+ btn_state = False
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ if touches:
+ assert len(events) == 0
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 1) in events
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_LEFT, 0) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_LEFT] == 1
+
+
+################################################################################
+#
+# Windows 7 compatible devices
+#
+################################################################################
+class Test3m_0596_0500(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_0500",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 01 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 10 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 3a 06 81 02 09 31 46 e8 03 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 85 12 09 55 95 01 75 08 15 00 25 0a b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 07 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 c0",
+ input_info=(BusType.USB, 0x0596, 0x0500),
+ max_contacts=60,
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID", "TOUCH_SIZE_SCALING"),
+ )
+
+
+class Test3m_0596_0506(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_0506",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 d6 0a 81 02 09 31 46 22 06 81 02 05 0d 75 10 95 01 09 48 81 02 09 49 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 02 81 03 05 0d 85 12 09 55 95 01 75 08 15 00 25 3c b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 73 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 85 0f 09 01 75 08 96 07 02 b1 02 c0",
+ input_info=(BusType.USB, 0x0596, 0x0506),
+ max_contacts=60,
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID", "TOUCH_SIZE_SCALING"),
+ )
+
+
+class TestActionStar_2101_1011(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ActionStar_2101_1011",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4d 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4d 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2101, 0x1011),
+ )
+
+ def test_mt_actionstar_inrange(self):
+ """Special sequence that might not be handled properly"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ # fmt: off
+ sequence = [
+ # t0 = Touch(1, 6999, 2441) | t1 = Touch(2, 15227, 2026)
+ '01 ff 01 57 1b 89 09 ff 02 7b 3b ea 07 02',
+ # t0.xy = (6996, 2450) | t1.y = 2028
+ '01 ff 01 54 1b 92 09 ff 02 7b 3b ec 07 02',
+ # t1.xy = (15233, 2040) | t0.tipswitch = False
+ '01 ff 02 81 3b f8 07 fe 01 54 1b 92 09 02',
+ # t1 | t0.inrange = False
+ '01 ff 02 81 3b f8 07 fc 01 54 1b 92 09 02',
+ ]
+ # fmt: on
+
+ for num, r_str in enumerate(sequence):
+ r = [int(i, 16) for i in r_str.split()]
+ uhdev.call_input_event(r)
+ events = uhdev.next_sync_events()
+ self.debug_reports([r], uhdev)
+ for e in events:
+ print(e)
+ if num == 2:
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestAsus_computers_0486_0185(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test asus-computers_0486_0185",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 95 01 75 01 81 02 09 32 81 02 09 47 81 02 75 05 81 03 09 30 26 ff 00 75 08 81 02 09 51 25 02 81 02 26 96 0d 05 01 75 10 55 0d 65 33 09 30 35 00 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 09 22 a1 02 05 0d 35 00 45 00 55 00 65 00 09 42 25 01 75 01 81 02 09 32 81 02 09 47 81 02 75 05 81 03 09 30 26 ff 00 75 08 81 02 09 51 25 02 81 02 26 96 0d 05 01 75 10 55 0d 65 33 09 30 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 35 00 45 00 55 00 65 00 05 0d 09 54 75 08 25 02 81 02 85 08 09 55 b1 02 c0 09 0e a1 01 85 07 09 22 a1 00 09 52 25 0a b1 02 c0 05 0c 09 01 a1 01 85 06 09 01 26 ff 00 95 08 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 25 01 75 01 95 02 81 02 95 06 81 03 26 96 0d 05 01 75 10 95 01 55 0d 65 33 09 30 46 fd 1d 81 02 09 31 46 60 11 81 02 c0 c0 06 ff 01 09 01 a1 01 26 ff 00 35 00 45 00 55 00 65 00 85 05 75 08 95 3f 09 00 81 02 c0",
+ input_info=(BusType.USB, 0x0486, 0x0185),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestAtmel_03eb_201c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_201c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x03EB, 0x201C),
+ )
+
+
+class TestAtmel_03eb_211c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_211c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 56 0a 26 ff 0f 09 30 81 02 46 b2 05 26 ff 0f 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x03EB, 0x211C),
+ )
+
+
+class TestCando_2087_0a02(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cando_2087_0a02",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 6d 03 81 02 46 ec 01 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 6d 03 81 02 46 ec 01 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 02 81 02 85 02 09 55 b1 02 c0 06 00 ff 09 01 a1 01 85 a6 95 22 75 08 26 ff 00 15 00 09 01 81 02 85 a5 95 06 75 08 26 ff 00 15 00 09 01 91 02 c0",
+ input_info=(BusType.USB, 0x2087, 0x0A02),
+ )
+
+
+class TestCando_2087_0b03(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cando_2087_0b03",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 49 46 f2 03 81 02 09 31 26 ff 29 46 39 02 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 49 46 f2 03 81 02 09 31 26 ff 29 46 39 02 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2087, 0x0B03),
+ )
+
+
+class TestCVTouch_1ff7_0013(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cvtouch_1ff7_0013",
+ rdesc="06 00 ff 09 00 a1 01 85 fd 06 00 ff 09 01 09 02 09 03 09 04 09 05 09 06 15 00 26 ff 00 75 08 95 06 81 02 85 fe 06 00 ff 09 01 09 02 09 03 09 04 15 00 26 ff 00 75 08 95 04 b1 02 c0 05 01 09 02 a1 01 09 01 a1 00 85 01 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 ff 7f 75 10 95 02 81 02 05 0d 09 33 15 00 26 ff 00 35 00 46 ff 00 75 08 95 01 81 02 05 01 09 38 15 81 25 7f 35 81 45 7f 95 01 81 06 c0 c0 06 00 ff 09 00 a1 01 85 fc 15 00 26 ff 00 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 06 00 ff 09 00 a1 01 85 fb 15 00 26 ff 00 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 85 03 09 55 15 00 25 02 b1 02 c0 09 0e a1 01 85 04 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1FF7, 0x0013),
+ quirks=("NOT_SEEN_MEANS_UP",),
+ )
+
+
+class TestCvtouch_1ff7_0017(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cvtouch_1ff7_0017",
+ rdesc="06 00 ff 09 00 a1 01 85 fd 06 00 ff 09 01 09 02 09 03 09 04 09 05 09 06 15 00 26 ff 00 75 08 95 06 81 02 85 fe 06 00 ff 09 01 09 02 09 03 09 04 15 00 26 ff 00 75 08 95 04 b1 02 c0 05 01 09 02 a1 01 09 01 a1 00 85 01 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 03 05 01 09 30 09 31 15 00 26 ff 0f 35 00 46 ff 0f 75 10 95 02 81 02 09 00 15 00 25 ff 35 00 45 ff 75 08 95 01 81 02 09 38 15 81 25 7f 95 01 81 06 c0 c0 06 00 ff 09 00 a1 01 85 fc 15 00 25 ff 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 06 00 ff 09 00 a1 01 85 fb 15 00 25 ff 19 01 29 3f 75 08 95 3f 81 02 19 01 29 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 09 31 81 02 c0 05 0d 09 54 95 01 75 08 81 02 85 03 09 55 25 02 b1 02 c0 09 0e a1 01 85 04 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1FF7, 0x0017),
+ )
+
+
+class TestCypress_04b4_c001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test cypress_04b4_c001",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 04 a1 01 85 02 09 22 09 53 95 01 75 08 81 02 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 15 00 25 20 09 48 81 02 09 49 81 02 05 01 15 00 26 d0 07 75 10 55 00 65 00 09 30 15 00 26 d0 07 35 00 45 00 81 02 09 31 45 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 09 55 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x04B4, 0xC001),
+ )
+
+
+class TestData_modul_7374_1232(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test data-modul_7374_1232",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 d0 07 26 ff 0f 09 30 81 02 46 40 06 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x7374, 0x1232),
+ )
+
+
+class TestData_modul_7374_1252(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test data-modul_7374_1252",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 37 81 02 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 46 d0 07 26 ff 0f 09 30 81 02 46 40 06 09 31 81 02 05 0d 75 08 85 02 09 55 25 10 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x7374, 0x1252),
+ )
+
+
+class TestE4_2219_044c(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test e4_2219_044c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 09 55 b1 02 c0 09 0e a1 01 85 02 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 05 01 09 38 15 81 25 7f 75 08 95 01 81 06 c0 c0",
+ input_info=(BusType.USB, 0x2219, 0x044C),
+ )
+
+
+class TestEgalax_capacitive_0eef_7224(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7224",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7224),
+ quirks=("SLOT_IS_CONTACTID", "ALWAYS_VALID"),
+ )
+
+
+class TestEgalax_capacitive_0eef_72fa(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_72fa",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 72 22 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 87 13 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 72 22 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 87 13 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x72FA),
+ quirks=("SLOT_IS_CONTACTID", "VALID_IS_INRANGE"),
+ )
+
+
+class TestEgalax_capacitive_0eef_7336(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7336",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 c1 20 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c2 18 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 c1 20 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c2 18 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7336),
+ )
+
+
+class TestEgalax_capacitive_0eef_7337(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7337",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 ae 17 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c3 0e 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 ae 17 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c3 0e 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7337),
+ )
+
+
+class TestEgalax_capacitive_0eef_7349(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_7349",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x7349),
+ quirks=("SLOT_IS_CONTACTID", "ALWAYS_VALID"),
+ )
+
+
+class TestEgalax_capacitive_0eef_73f4(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_73f4",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 96 4e 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 23 2c 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 96 4e 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 23 2c 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0x73F4),
+ )
+
+
+class TestEgalax_capacitive_0eef_a001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax-capacitive_0eef_a001",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 23 28 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 11 19 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0EEF, 0xA001),
+ quirks=("SLOT_IS_CONTACTID", "VALID_IS_INRANGE"),
+ )
+
+
+class TestElo_touchsystems_04e7_0022(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo-touchsystems_04e7_0022",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 0e 65 33 09 30 35 00 46 ff 0f 81 02 46 ff 0f 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 0f 75 10 55 00 65 00 09 30 35 00 46 ff 0f 81 02 46 ff 0f 09 31 81 02 c0 05 0d 09 54 25 10 95 01 75 08 81 02 85 08 09 55 25 02 b1 02 c0 09 0e a1 01 85 07 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 06 00 ff 09 55 85 80 15 00 26 ff 00 75 08 95 01 b1 82 c0 05 01 09 02 a1 01 85 54 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 15 00 26 ff 0f 75 10 95 01 81 02 09 31 75 10 95 01 81 02 09 3b 16 00 00 26 00 01 36 00 00 46 00 01 66 00 00 75 10 95 01 81 62 c0 c0",
+ input_info=(BusType.USB, 0x04E7, 0x0022),
+ )
+
+
+class TestElo_touchsystems_04e7_0080(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo-touchsystems_04e7_0080",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 7c 24 75 10 95 01 09 30 81 02 09 31 46 96 14 81 02 c0 05 0d 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 c0",
+ input_info=(BusType.USB, 0x04E7, 0x0080),
+ )
+
+
+class TestFlatfrog_25b5_0002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test flatfrog_25b5_0002",
+ rdesc="05 0d 09 04 a1 01 85 05 09 22 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 05 0d 09 22 65 00 55 00 a1 02 05 0d 15 00 25 01 75 01 95 01 09 42 81 02 09 32 81 02 95 06 81 03 75 08 95 01 25 7f 09 51 81 02 05 01 65 11 55 0e 75 10 35 00 26 a6 2b 46 48 1b 09 30 81 02 26 90 18 46 59 0f 09 31 81 02 05 0d 65 11 55 0f 75 08 25 7f 45 7f 09 48 81 02 09 49 81 02 65 00 55 00 75 10 26 00 04 46 00 04 09 30 81 02 c0 65 00 55 00 05 0d 55 0c 66 01 10 75 20 95 01 27 ff ff ff 7f 45 00 09 56 81 02 75 08 95 01 15 00 25 28 09 54 81 02 09 55 85 06 25 28 b1 02 c0 65 00 55 00 45 00 09 0e a1 01 85 03 09 23 a1 02 09 52 15 02 25 02 75 08 95 01 b1 02 09 53 15 00 25 0a 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x25B5, 0x0002),
+ quirks=("NOT_SEEN_MEANS_UP", "NO_AREA"),
+ max_contacts=40,
+ )
+
+
+class TestFocaltech_10c4_81b9(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test focaltech_10c4_81b9",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 04 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 26 58 02 09 31 46 00 00 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 02 09 55 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x10C4, 0x81B9),
+ quirks=("ALWAYS_VALID",),
+ max_contacts=5,
+ )
+
+
+class TestHanvon_20b3_0a18(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test hanvon_20b3_0a18",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 26 ff 4b 46 70 03 81 02 09 31 26 ff 2b 46 f1 01 81 02 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 02 09 55 25 02 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x20B3, 0x0A18),
+ )
+
+
+class TestHuitoo_03f7_0003(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test huitoo_03f7_0003",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 00 65 00 35 00 46 ff 0f 09 30 26 ff 0f 81 02 09 31 26 ff 0f 81 02 05 0d 09 48 26 ff 0f 81 02 09 49 26 ff 0f 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 09 55 b1 02 c0 09 0e a1 01 85 02 09 23 a1 02 09 52 09 53 15 00 25 10 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 15 00 26 ff 0f 35 00 46 ff 0f 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 3f 09 02 81 02 95 3f 09 02 91 02 c0",
+ input_info=(BusType.USB, 0x03F7, 0x0003),
+ )
+
+
+class TestIdeacom_1cb6_6650(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ideacom_1cb6_6650",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 26 ff 1f 75 10 95 01 55 0d 65 33 09 31 35 00 46 61 13 81 02 09 30 46 73 22 81 02 05 0d 75 08 95 01 09 30 26 ff 00 81 02 09 51 81 02 85 0c 09 55 25 02 95 01 b1 02 c0 06 00 ff 85 02 09 01 75 08 95 07 b1 02 85 03 09 02 75 08 95 07 b1 02 85 04 09 03 75 08 95 07 b1 02 85 05 09 04 75 08 95 07 b1 02 85 06 09 05 75 08 96 27 00 b1 02 85 07 09 06 75 08 96 27 00 b1 02 85 08 09 07 75 08 95 07 b1 02 85 09 09 08 75 08 95 07 b1 02 85 0b 09 09 75 08 96 07 00 b1 02 85 0d 09 0a 75 08 96 27 00 b1 02 c0 09 0e a1 01 85 0e 09 52 09 53 95 07 b1 02 c0 05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 75 06 95 01 81 01 05 01 09 31 09 30 15 00 27 ff 1f 00 00 75 10 95 02 81 02 c0 09 01 a1 02 15 00 26 ff 00 95 02 75 08 81 03 c0 c0",
+ input_info=(BusType.USB, 0x1CB6, 0x6650),
+ )
+
+
+class TestIdeacom_1cb6_6651(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ideacom_1cb6_6651",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 02 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 26 ff 1f 75 10 95 01 55 0d 65 33 09 31 35 00 46 39 13 81 02 09 30 46 24 22 81 02 05 0d 75 08 95 01 09 30 26 ff 00 81 02 09 51 81 02 85 0c 09 55 25 02 95 01 b1 02 c0 06 00 ff 85 02 09 01 75 08 95 07 b1 02 85 03 09 02 75 08 95 07 b1 02 85 04 09 03 75 08 95 07 b1 02 85 05 09 04 75 08 95 07 b1 02 85 06 09 05 75 08 95 1f b1 02 85 07 09 06 75 08 96 1f 00 b1 02 85 08 09 07 75 08 95 07 b1 02 85 09 09 08 75 08 95 07 b1 02 85 0b 09 09 75 08 95 07 b1 02 85 0d 09 0a 75 08 96 1f 00 b1 02 c0 09 0e a1 01 85 0e 09 52 09 53 95 07 b1 02 c0 05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 75 06 95 01 81 01 05 01 09 31 09 30 15 00 27 ff 1f 00 00 75 10 95 02 81 02 c0 09 01 a1 02 15 00 26 ff 00 95 02 75 08 81 03 c0 c0",
+ input_info=(BusType.USB, 0x1CB6, 0x6651),
+ )
+
+
+class TestIkaist_2793_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ikaist_2793_0001",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 00 00 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 33 09 30 35 00 46 51 07 81 02 09 31 46 96 04 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 02 81 03 05 0d 85 12 09 55 95 01 75 08 15 00 25 3c b1 02 06 00 ff 15 00 26 ff 00 85 1e 09 01 75 08 95 80 b1 02 85 1f 09 01 75 08 96 3f 01 b1 02 c0",
+ input_info=(BusType.USB, 0x2793, 0x0001),
+ )
+
+
+class TestIrmtouch_23c9_5666(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irmtouch_23c9_5666",
+ rdesc="05 0d 09 04 a1 01 85 0a 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 15 00 26 ff 7f 75 10 09 30 81 02 09 31 81 02 05 0d 09 48 09 49 95 02 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 0c 09 23 a1 02 09 52 15 00 25 06 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x23C9, 0x5666),
+ )
+
+
+class TestIrtouch_6615_0070(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irtouch_6615_0070",
+ rdesc="05 01 09 02 a1 01 85 10 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 30 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 05 0d 09 54 15 00 26 02 00 75 08 95 01 81 02 85 03 09 55 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 05 0d 09 02 a1 01 85 20 09 20 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 85 01 06 00 ff 09 01 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x6615, 0x0070),
+ )
+
+
+class TestIrtouch_6615_0081(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test irtouch_6615_0081",
+ rdesc="05 0d 09 04 a1 01 85 30 09 22 09 00 15 00 26 ff 00 75 08 95 05 81 02 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 51 15 00 26 ff 00 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0e 65 13 35 00 46 b5 04 75 10 95 01 81 02 09 31 35 00 46 8a 03 81 02 09 32 35 00 46 8a 03 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 09 00 15 00 26 ff 7f 75 10 95 01 81 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 a1 00 05 0d 09 54 15 00 25 1f 75 05 95 01 81 02 09 00 15 00 25 07 75 03 95 01 81 02 09 00 15 00 26 ff 00 75 08 95 01 81 02 c0 09 55 85 03 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 06 00 ff 09 00 a1 01 09 02 a1 00 85 aa 09 06 15 00 26 ff 00 35 00 46 ff 00 75 08 95 3f b1 02 c0 c0 05 01 09 02 a1 01 85 10 09 01 a1 00 05 01 09 00 15 00 26 ff 00 75 08 95 05 81 02 09 30 09 31 09 32 15 00 26 ff 7f 75 10 95 03 81 02 05 09 19 01 29 08 15 00 25 01 95 08 75 01 81 02 09 00 15 00 26 ff 00 75 08 95 02 81 02 c0 c0 06 00 ff 09 00 a1 01 85 40 09 00 15 00 26 ff 00 75 08 95 2e 81 02 c0",
+ input_info=(BusType.USB, 0x6615, 0x0081),
+ )
+
+
+class TestLG_043e_9aa1(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_043e_9aa1",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 25 0a 09 55 b1 02 c0 09 0e a1 01 85 03 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 04 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 75 10 95 01 15 00 26 7f 07 81 02 09 31 26 37 04 81 02 c0 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 75 08 95 19 09 01 b1 02 c0 05 14 09 2b a1 02 85 07 09 2b 15 00 25 0a 75 08 95 40 b1 02 09 4b 15 00 25 0a 75 08 95 02 91 02 c0 05 14 09 2c a1 02 85 08 09 2b 15 00 25 0a 75 08 95 05 81 02 09 4b 15 00 25 0a 75 08 95 47 91 02 c0",
+ input_info=(BusType.USB, 0x043E, 0x9AA1),
+ )
+
+
+class TestLG_043e_9aa3(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_043e_9aa3",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 25 0a 09 55 b1 02 c0 09 0e a1 01 85 03 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 04 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 75 10 95 01 15 00 26 7f 07 81 02 09 31 26 37 04 81 02 c0 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 75 08 95 19 09 01 b1 02 c0 05 14 09 2b a1 02 85 07 09 2b 15 00 25 0a 75 08 95 40 b1 02 09 4b 15 00 25 0a 75 08 95 02 91 02 c0 05 14 09 2c a1 02 85 08 09 2b 15 00 25 0a 75 08 95 05 81 02 09 4b 15 00 25 0a 75 08 95 47 91 02 c0",
+ input_info=(BusType.USB, 0x043E, 0x9AA3),
+ )
+
+
+class TestLG_1fd2_0064(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lg_1fd2_0064",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 a1 00 05 01 26 80 07 75 10 55 0e 65 33 09 30 35 00 46 53 07 81 02 26 38 04 46 20 04 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 a1 00 05 01 26 80 07 75 10 55 0e 65 33 09 30 35 00 46 53 07 81 02 26 38 04 46 20 04 09 31 81 02 45 00 c0 c0 05 0d 09 54 95 01 75 08 81 02 85 08 09 55 95 01 25 02 b1 02 c0 09 0e a1 01 85 07 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 75 10 95 02 15 00 26 ff 7f 81 02 c0 c0",
+ input_info=(BusType.USB, 0x1FD2, 0x0064),
+ )
+
+
+class TestLumio_202e_0006(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lumio_202e_0006",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 46 b0 0e 75 10 95 01 09 30 81 02 09 31 46 c2 0b 81 02 c0 05 0d 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 c0",
+ input_info=(BusType.USB, 0x202E, 0x0006),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestLumio_202e_0007(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test lumio_202e_0007",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 09 47 95 02 81 02 95 0a 81 03 05 01 26 ff 7f 65 11 55 0e 46 ba 0e 75 10 95 01 09 30 81 02 09 31 46 ea 0b 81 02 05 0d 09 51 75 10 95 01 81 02 09 55 15 00 25 08 75 08 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x202E, 0x0007),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID_MINUS_ONE"),
+ )
+
+
+class TestNexio_1870_0100(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_0100",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 40 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0100),
+ )
+
+
+class TestNexio_1870_010d(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_010d",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x010D),
+ )
+
+
+class TestNexio_1870_0119(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test nexio_1870_0119",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0119),
+ )
+
+
+class TestPenmount_14e1_3500(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test penmount_14e1_3500",
+ rdesc="05 0d 09 04 a1 01 09 22 a1 00 09 51 15 00 25 0f 75 04 95 01 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 81 01 05 01 75 10 95 01 09 30 26 ff 07 81 02 09 31 26 ff 07 81 02 05 0d 09 55 75 08 95 05 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x14E1, 0x3500),
+ quirks=("VALID_IS_CONFIDENCE",),
+ max_contacts=10,
+ )
+
+
+class TestPixart_093a_8002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test pixart_093a_8002",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 5a 14 26 ff 7f 09 30 81 22 46 72 0b 26 ff 7f 09 31 81 22 95 08 75 08 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 5a 14 26 ff 7f 81 02 09 31 46 72 0b 26 ff 7f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 5a 14 26 ff 7f 81 02 46 72 0b 26 ff 7f 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x093A, 0x8002),
+ quirks=("VALID_IS_INRANGE", "SLOT_IS_CONTACTNUMBER"),
+ )
+
+
+class TestPqlabs_1ef1_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test pqlabs_1ef1_0001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 3f 81 02 c0 c0 05 8c 09 07 a1 01 85 11 09 02 15 00 26 ff 00 75 08 95 3f 81 02 85 10 09 10 91 02 c0",
+ input_info=(BusType.USB, 0x1EF1, 0x0001),
+ )
+
+
+class TestQuanta_0408_3000(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3000",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 e3 13 26 7f 07 81 02 09 31 46 2f 0b 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 e3 13 26 7f 07 81 02 46 2f 0b 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 06 00 ff 09 01 26 ff 00 75 08 95 2f 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 c0",
+ input_info=(BusType.USB, 0x0408, 0x3000),
+ )
+
+
+class TestQuanta_0408_3001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 09 31 46 78 0a 26 38 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 80 07 81 02 46 78 0a 26 38 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 0f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 c0",
+ input_info=(BusType.USB, 0x0408, 0x3001),
+ quirks=("VALID_IS_CONFIDENCE", "SLOT_IS_CONTACTID"),
+ )
+
+
+class TestQuanta_0408_3008_1(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3008_1",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 4c 11 26 7f 07 09 30 81 22 46 bb 09 26 37 04 09 31 81 22 95 08 75 08 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 4c 11 26 7f 07 81 02 09 31 46 bb 09 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 4c 11 26 7f 07 81 02 46 bb 09 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0408, 0x3008),
+ )
+
+
+class TestQuanta_0408_3008(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test quanta_0408_3008",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 98 12 26 7f 07 09 30 81 22 46 78 0a 26 37 04 09 31 81 22 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 09 31 46 78 0a 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 46 78 0a 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x0408, 0x3008),
+ )
+
+
+class TestRafi_05bd_0107(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rafi_05bd_0107",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 65 00 55 00 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 25 09 95 01 81 02 05 01 46 9c 01 26 ff 03 35 00 75 10 09 30 81 02 46 e7 00 26 ff 03 09 31 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 09 81 02 05 0d 85 02 95 01 75 08 09 55 25 0a b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 05 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 65 11 55 0f 09 30 26 ff 03 35 00 46 9c 01 75 10 95 01 81 02 09 31 26 ff 03 35 00 46 e7 00 81 02 c0 c0",
+ input_info=(BusType.USB, 0x05BD, 0x0107),
+ )
+
+
+class TestRndplus_2512_5003(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rndplus_2512_5003",
+ rdesc="05 0d 09 04 a1 01 85 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 08 09 55 b1 02 c0 09 0e a1 01 85 07 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 16 00 00 26 ff 3f 36 00 00 46 ff 3f 66 00 00 75 10 95 02 81 62 c0 c0",
+ input_info=(BusType.USB, 0x2512, 0x5003),
+ )
+
+
+class TestRndplus_2512_5004(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test rndplus_2512_5004",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 00 65 00 09 30 35 00 46 00 00 81 02 09 31 46 00 00 81 02 05 0d 09 48 09 49 75 10 95 02 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 08 81 02 85 05 09 55 b1 02 c0 09 0e a1 01 85 06 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 95 03 75 01 81 02 95 01 75 05 81 01 05 01 09 30 09 31 16 00 00 26 ff 3f 36 00 00 46 ff 3f 66 00 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 01 09 01 15 00 26 ff 00 75 08 95 3f 82 00 01 85 02 09 01 15 00 26 ff 00 75 08 95 3f 92 00 01 c0",
+ input_info=(BusType.USB, 0x2512, 0x5004),
+ )
+
+
+class TestSitronix_1403_5001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test sitronix_1403_5001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 54 95 01 75 08 81 02 09 22 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 05 01 26 90 04 75 0c 95 01 55 0f 65 11 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 a4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 02 09 48 09 49 81 02 c0 a1 02 09 51 75 06 95 01 81 02 09 42 09 32 15 00 25 01 75 01 95 02 81 02 b4 09 30 46 e1 00 81 02 26 50 03 09 31 45 7d 81 02 05 0d 75 08 95 04 09 48 09 49 81 02 c0 85 02 09 55 26 ff 00 75 08 95 01 b1 02 09 04 15 00 25 ff 75 08 95 07 91 02 c0 09 0e a1 01 85 03 09 23 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x1403, 0x5001),
+ max_contacts=10,
+ )
+
+
+class TestSmart_0b8c_0092(BaseTest.TestMultitouch):
+ def create_device(self):
+ return SmartTechDigitizer(
+ "uhid test smart_0b8c_0092", input_info=(BusType.USB, 0x0B8C, 0x0092)
+ )
+
+
+class TestStantum_1f87_0002(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test stantum_1f87_0002",
+ rdesc="05 0d 09 04 a1 01 85 03 05 0d 09 54 95 01 75 08 81 02 06 00 ff 75 02 09 01 81 01 75 0e 09 02 81 02 05 0d 09 22 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 a1 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 30 81 02 05 0d 25 1f 75 05 09 48 81 02 05 01 16 00 00 26 ff 07 75 0b 55 00 65 00 09 31 81 02 05 0d 25 1f 75 05 09 49 81 02 75 08 09 51 95 01 81 02 09 30 75 05 81 02 09 42 15 00 25 01 75 01 95 01 81 02 09 47 81 02 09 32 81 02 c0 85 08 05 0d 09 55 95 01 75 08 25 0a b1 02 c0",
+ input_info=(BusType.USB, 0x1F87, 0x0002),
+ )
+
+
+class TestTopseed_1784_0016(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test topseed_1784_0016",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 04 75 10 55 00 65 00 09 30 35 00 46 ff 04 81 02 09 31 46 ff 04 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 0a 81 02 09 55 b1 02 c0 05 0c 09 01 a1 01 85 03 a1 02 09 b5 15 00 25 01 75 01 95 01 81 02 09 b6 81 02 09 b7 81 02 09 cd 81 02 09 e2 81 02 09 e9 81 02 09 ea 81 02 05 01 09 82 81 02 c0 c0",
+ input_info=(BusType.USB, 0x1784, 0x0016),
+ max_contacts=2,
+ )
+
+
+class TestTpv_25aa_8883(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test tpv_25aa_8883",
+ rdesc="05 01 09 02 a1 01 85 0d 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 05 0d 09 32 95 01 75 01 81 02 95 01 75 05 81 03 05 01 55 0e 65 11 75 10 95 01 35 00 46 98 12 26 7f 07 09 30 81 22 46 78 0a 26 37 04 09 31 81 22 35 00 45 00 15 81 25 7f 75 08 95 01 09 38 81 06 09 00 75 08 95 07 81 03 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 09 31 46 78 0a 26 37 04 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 75 10 55 0e 65 11 09 30 35 00 46 98 12 26 7f 07 81 02 46 78 0a 26 37 04 09 31 81 02 c0 05 0d 09 54 15 00 26 ff 00 95 01 75 08 81 02 09 55 25 02 95 01 85 02 b1 02 c0 05 0d 09 0e a1 01 06 00 ff 09 01 26 ff 00 75 08 95 47 85 03 b1 02 09 01 96 ff 03 85 04 b1 02 09 01 95 0b 85 05 b1 02 09 01 96 ff 03 85 06 b1 02 09 01 95 0f 85 07 b1 02 09 01 96 ff 03 85 08 b1 02 09 01 96 ff 03 85 09 b1 02 09 01 95 3f 85 0a b1 02 09 01 96 ff 03 85 0b b1 02 09 01 96 c3 03 85 0e b1 02 09 01 96 ff 03 85 0f b1 02 09 01 96 83 03 85 10 b1 02 09 01 96 93 00 85 11 b1 02 09 01 96 ff 03 85 12 b1 02 05 0d 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 85 0c b1 02 c0 c0",
+ input_info=(BusType.USB, 0x25AA, 0x8883),
+ )
+
+
+class TestTrs_star_238f_0001(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test trs-star_238f_0001",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 95 01 81 03 09 37 95 01 81 03 95 01 81 03 15 00 25 0f 75 04 09 51 95 01 81 02 09 54 95 01 81 02 09 55 95 01 81 02 05 01 26 ff 03 15 00 75 10 65 00 09 30 95 01 81 02 09 31 81 02 c0 05 0d 09 0e 85 02 09 23 a1 02 15 00 25 0a 09 52 75 08 95 01 b1 02 09 53 95 01 b1 02 09 55 95 01 b1 02 c0 c0",
+ input_info=(BusType.USB, 0x238F, 0x0001),
+ )
+
+
+class TestUnitec_227d_0103(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test unitec_227d_0103",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 35 00 55 0e 65 33 75 10 95 01 09 30 16 00 00 26 ff 4f 36 00 00 46 6c 03 81 02 09 31 16 00 00 26 ff 3b 36 00 00 46 ed 01 81 02 26 00 00 46 00 00 c0 05 0d 09 54 75 08 95 01 81 02 05 0d 85 03 09 55 25 05 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 53 15 00 25 05 75 08 95 01 b1 02 c0",
+ input_info=(BusType.USB, 0x227D, 0x0103),
+ )
+
+
+class TestZytronic_14c8_0005(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test zytronic_14c8_0005",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 95 01 81 02 95 06 81 01 05 01 26 00 10 75 10 95 01 65 00 09 30 81 02 09 31 46 00 10 81 02 05 0d 09 51 26 ff 00 75 08 95 01 81 02 c0 85 02 09 55 15 00 25 08 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 03 a1 02 09 23 09 52 09 53 15 00 25 08 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 15 00 26 00 10 35 00 46 00 10 65 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 05 09 00 15 00 26 ff 00 75 08 95 3f b1 02 c0 06 00 ff 09 01 a1 01 85 06 09 00 15 00 26 ff 00 75 08 95 3f 81 02 c0",
+ input_info=(BusType.USB, 0x14C8, 0x0005),
+ )
+
+
+class TestZytronic_14c8_0006(BaseTest.TestMultitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test zytronic_14c8_0006",
+ rdesc="05 0d 09 04 a1 01 85 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 00 10 75 10 09 30 81 02 09 31 81 02 05 0d c0 05 0d 09 54 95 01 75 08 15 00 25 3c 81 02 05 0d 85 02 09 55 95 01 75 08 15 00 25 3c b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 15 00 26 00 10 35 00 46 00 10 65 00 75 10 95 02 81 62 c0 c0 06 00 ff 09 01 a1 01 85 05 09 00 15 00 26 ff 00 75 08 95 3f b1 02 c0 06 00 ff 09 01 a1 01 85 06 09 00 15 00 26 ff 00 75 08 95 3f 81 02 c0",
+ input_info=(BusType.USB, 0x14C8, 0x0006),
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices
+#
+################################################################################
+
+
+class TestMinWin8TSParallelTriple(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSParallel(3)
+
+
+class TestMinWin8TSParallel(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSParallel(10)
+
+
+class TestMinWin8TSHybrid(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return MinWin8TSHybrid()
+
+
+class TestWin8TSConfidence(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Win8TSConfidence(5)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Confidence" not in uhdev.fields,
+ "Device not compatible, missing Confidence usage",
+ )
+ def test_mt_confidence_bad_release(self):
+ """Check for the validity of the confidence bit.
+ When a contact is marked as not confident, it should be detected
+ as a palm from the kernel POV and released.
+
+ Note: if the kernel exports ABS_MT_TOOL_TYPE, it shouldn't release
+ the touch but instead convert it to ABS_MT_TOOL_PALM."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = Touch(1, 150, 200)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ t0.confidence = False
+ t0.tipswitch = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ if evdev.absinfo[libevdev.EV_ABS.ABS_MT_TOOL_TYPE] is not None:
+ # the kernel exports MT_TOOL_PALM
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_TOOL_TYPE, 2) in events
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestElanXPS9360(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ElanXPS9360",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 a4 26 20 0d 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 50 07 46 a6 00 09 31 81 02 b4 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class TestTouchpadXPS9360(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test TouchpadXPS9360",
+ max_contacts=5,
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c0 04 75 10 55 0e 65 11 09 30 35 00 46 f5 03 95 01 81 02 46 36 02 26 a8 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 3d 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class TestSurfaceBook2(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test SurfaceBook2",
+ max_contacts=5,
+ rdesc="05 01 09 06 A1 01 85 01 14 25 01 75 01 95 08 05 07 19 E0 29 E7 81 02 75 08 95 0A 18 29 91 26 FF 00 80 05 0C 0A C0 02 A1 02 1A C1 02 2A C6 02 95 06 B1 03 C0 05 08 19 01 29 03 75 01 95 03 25 01 91 02 95 05 91 01 C0 05 01 09 02 A1 01 85 02 05 09 19 01 29 05 81 02 95 01 75 03 81 03 15 81 25 7F 75 08 95 02 05 01 09 30 09 31 81 06 A1 02 09 48 14 25 01 35 01 45 10 75 02 95 01 A4 B1 02 09 38 15 81 25 7F 34 44 75 08 81 06 C0 A1 02 09 48 B4 B1 02 34 44 75 04 B1 03 05 0C 0A 38 02 15 81 25 7F 75 08 81 06 C0 C0 05 0C 09 01 A1 01 85 03 75 10 14 26 FF 03 18 2A FF 03 80 C0 06 05 FF 09 01 A1 01 85 0D 25 FF 95 02 75 08 09 20 81 02 09 22 91 02 15 81 25 7F 95 20 75 08 09 21 81 02 09 23 91 02 C0 09 02 A1 01 85 0C 14 25 FF 95 01 08 91 02 C0 05 0D 09 05 A1 01 85 04 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 44 54 64 C0 05 0D 09 22 A1 02 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 03 09 51 81 02 75 01 95 03 81 03 05 01 26 E4 07 75 10 55 0E 65 11 09 30 46 F2 03 95 01 81 02 46 94 02 26 29 05 09 31 81 02 C0 05 0D 55 0C 66 01 10 47 FF FF 00 00 27 FF FF 00 00 09 56 81 02 09 54 25 7F 75 08 81 02 05 09 09 01 25 01 75 01 81 02 95 07 81 03 05 0D 85 04 09 55 09 59 75 04 95 02 25 0F B1 02 06 00 FF 09 C6 85 05 14 25 08 75 08 95 01 B1 02 09 C7 26 FF 00 75 08 95 20 B1 02 C0 05 0D 09 0E A1 01 85 07 09 22 A1 02 09 52 14 25 0A 75 08 95 01 B1 02 C0 09 22 A0 85 08 09 57 09 58 75 01 95 02 25 01 B1 02 95 06 B1 03 C0 C0 06 07 FF 09 01 A1 01 85 0A 09 02 26 FF 00 75 08 95 14 91 02 85 09 09 03 91 02 85 0A 09 04 95 26 81 02 85 09 09 05 81 02 85 09 09 06 95 01 B1 02 85 0B 09 07 B1 02 C0 06 05 FF 09 04 A1 01 85 0E 09 31 91 02 09 31 81 03 09 30 91 02 09 30 81 02 95 39 09 32 92 02 01 09 32 82 02 01 C0 06 05 FF 09 50 A1 01 85 20 14 25 FF 75 08 95 3C 09 60 82 02 01 09 61 92 02 01 09 62 B2 02 01 85 21 09 63 82 02 01 09 64 92 02 01 09 65 B2 02 01 85 22 25 FF 75 20 95 04 19 66 29 69 81 02 19 6A 29 6D 91 02 19 6E 29 71 B1 02 85 23 19 72 29 75 81 02 19 76 29 79 91 02 19 7A 29 7D B1 02 85 24 19 7E 29 81 81 02 19 82 29 85 91 02 19 86 29 89 B1 02 85 25 19 8A 29 8D 81 02 19 8E 29 91 91 02 19 92 29 95 B1 02 85 26 19 96 29 99 81 02 19 9A 29 9D 91 02 19 9E 29 A1 B1 02 85 27 19 A2 29 A5 81 02 19 A6 29 A9 91 02 19 AA 29 AD B1 02 85 28 19 AE 29 B1 81 02 19 B2 29 B5 91 02 19 B6 29 B9 B1 02 85 29 19 BA 29 BD 81 02 19 BE 29 C1 91 02 19 C2 29 C5 B1 02 C0 06 00 FF 0A 00 F9 A1 01 85 32 75 10 95 02 14 27 FF FF 00 00 0A 01 F9 B1 02 75 20 95 01 25 FF 0A 02 F9 B1 02 75 08 95 08 26 FF 00 0A 03 F9 B2 02 01 95 10 0A 04 F9 B2 02 01 0A 05 F9 B2 02 01 95 01 75 10 27 FF FF 00 00 0A 06 F9 81 02 C0",
+ )
+
+
+class Test3m_0596_051c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test 3m_0596_051c",
+ rdesc="05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 09 01 95 01 75 01 15 00 25 01 81 02 95 07 75 01 81 03 95 01 75 08 81 03 05 01 09 30 09 31 15 00 26 ff 7f 35 00 46 ff 7f 95 02 75 10 81 02 c0 a1 02 15 00 26 ff 00 09 01 95 39 75 08 81 03 c0 c0 05 0d 09 0e a1 01 85 11 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 09 04 a1 01 85 13 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 81 03 09 47 81 02 95 05 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 d1 12 81 02 09 31 46 b2 0b 81 02 06 00 ff 75 10 95 02 09 01 81 02 c0 05 0d 09 54 95 01 75 08 15 00 25 14 81 02 05 0d 55 0c 66 01 10 35 00 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 05 0d 09 55 85 12 15 00 25 14 75 08 95 01 b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 06 00 ff 15 00 26 ff 00 85 03 09 01 75 08 95 07 b1 02 85 04 09 01 75 08 95 17 b1 02 85 05 09 01 75 08 95 47 b1 02 85 06 09 01 75 08 95 07 b1 02 85 73 09 01 75 08 95 07 b1 02 85 08 09 01 75 08 95 07 b1 02 85 09 09 01 75 08 95 3f b1 02 85 0f 09 01 75 08 96 07 02 b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_04e8_2084(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_04e8_2084",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 c0 14 81 02 46 ae 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_2306(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_2306",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_230a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_230a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f6 13 81 02 46 40 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_231c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_231c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 e2 13 81 02 46 32 0b 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_2703(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_2703",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 66 17 81 02 46 34 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2149_270b(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2149_270b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 52 17 81 02 46 20 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 85 f3 09 08 95 3d b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2575_0204(BaseTest.TestWin8Multitouch):
+ """found on the Dell Canvas 27"""
+
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2575_0204",
+ rdesc="05 0d 09 04 a1 01 85 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 51 75 07 95 01 81 02 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 4f 17 81 02 46 1d 0d 09 31 81 02 45 00 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 42 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 c0 05 01 09 0e a1 01 85 05 05 01 09 08 a1 00 09 30 55 0e 65 11 15 00 26 ff 7f 35 00 46 4f 17 75 10 95 01 81 42 09 31 46 1d 0d 81 42 06 00 ff 09 01 75 20 81 03 05 01 09 37 55 00 65 14 16 98 fe 26 68 01 36 98 fe 46 68 01 75 0f 81 06 05 09 09 01 65 00 15 00 25 01 35 00 45 00 75 01 81 02 05 0d 09 42 81 02 09 51 75 07 25 7f 81 02 05 0d 09 48 55 0e 65 11 15 00 26 ff 7f 35 00 46 ff 7f 75 10 81 02 09 49 81 02 09 3f 55 00 65 14 15 00 26 67 01 35 00 46 67 01 81 0a c0 65 00 35 00 45 00 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 05 75 08 09 54 81 02 85 47 09 55 25 05 b1 02 c0 06 00 ff 09 04 a1 01 85 f0 09 01 75 08 95 04 b1 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 85 c0 09 01 95 03 b1 02 85 c2 09 01 95 0f b1 02 85 c4 09 01 95 3e b1 02 85 c5 09 01 95 7e b1 02 85 c6 09 01 95 fe b1 02 85 c8 09 01 96 fe 03 b1 02 85 0a 09 01 95 3f b1 02 c0",
+ )
+
+
+class Testadvanced_silicon_2619_5610(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test advanced_silicon_2619_5610",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 02 81 03 09 51 25 1f 75 05 95 01 81 02 a1 00 05 01 26 ff 7f 75 10 55 0e 65 11 09 30 35 00 46 f9 15 81 02 46 73 0c 09 31 81 02 45 00 c0 c0 05 0d 15 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 25 0a 75 08 09 54 81 02 85 44 09 55 b1 02 85 44 06 00 ff 09 c5 26 ff 00 96 00 01 b1 02 85 f0 09 01 95 04 81 02 85 f2 09 03 b1 02 09 04 b1 02 09 05 b1 02 95 01 09 06 b1 02 09 07 b1 02 85 f1 09 02 95 07 91 02 c0",
+ )
+
+
+class Testatmel_03eb_8409(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_8409",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 00 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 46 18 06 26 77 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testatmel_03eb_840b(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test atmel_03eb_840b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 00 0a 26 ff 0f 09 30 81 02 46 a0 05 26 ff 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testdell_044e_1220(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test dell_044e_1220",
+ type="pressurepad",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 09 38 95 01 81 06 05 0c 0a 38 02 81 06 c0 c0 05 0d 09 05 a1 01 85 08 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 af 04 75 10 55 0e 65 11 09 30 35 00 46 e8 03 95 01 81 02 26 7b 02 46 12 02 09 31 81 02 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 05 0d 09 56 81 02 09 54 25 05 95 01 75 08 81 02 05 09 19 01 29 03 25 01 75 01 95 03 81 02 95 05 81 03 05 0d 85 09 09 55 75 08 95 01 25 05 b1 02 06 00 ff 85 0a 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 01 ff 09 01 a1 01 85 03 09 01 15 00 26 ff 00 95 1b 81 02 85 04 09 02 95 50 81 02 85 05 09 03 95 07 b1 02 85 06 09 04 81 02 c0 06 02 ff 09 01 a1 01 85 07 09 02 95 86 75 08 b1 02 c0 05 0d 09 0e a1 01 85 0b 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 0c 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0",
+ )
+
+
+class Testdell_06cb_75db(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test dell_06cb_75db",
+ max_contacts=3,
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 c8 04 75 10 55 0e 65 11 09 30 35 00 46 fb 03 95 01 81 02 46 6c 02 26 e8 02 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 1a 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 1a 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class Testegalax_capacitive_0eef_790a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test egalax_capacitive_0eef_790a",
+ max_contacts=10,
+ rdesc="05 0d 09 04 a1 01 85 06 05 0d 09 54 75 08 15 00 25 0c 95 01 81 02 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 15 00 25 20 81 02 05 01 26 ff 0f 75 10 55 0e 65 11 09 30 35 00 46 13 0c 81 02 46 cb 06 09 31 81 02 75 08 95 02 81 03 81 03 c0 05 0d 17 00 00 00 00 27 ff ff ff 7f 75 20 95 01 55 00 65 00 09 56 81 02 09 55 09 53 75 08 95 02 26 ff 00 b1 02 06 00 ff 09 c5 85 07 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 01 a1 01 85 01 09 01 a1 02 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 0e a1 01 85 05 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0",
+ )
+
+
+class Testelan_04f3_000a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_000a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 00 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_000c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_000c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 40 0e 75 10 55 0f 65 11 09 30 35 00 46 01 01 95 02 81 02 26 00 08 46 91 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_010c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_010c",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c2 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_0125(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_0125",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 f0 0c 75 10 55 0f 65 11 09 30 35 00 46 58 01 95 02 81 02 26 50 07 46 c1 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_016f(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_016f",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0",
+ )
+
+
+class Testelan_04f3_0732(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_0732",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0b 75 10 55 0f 65 11 09 30 35 00 46 ff 00 95 02 81 02 26 40 07 46 85 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff 00 00 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 15 00 25 ff 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 1f 09 01 91 02 c0",
+ )
+
+
+class Testelan_04f3_200a(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elan_04f3_200a",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 75 06 09 51 25 3f 81 02 26 ff 00 75 08 09 48 81 02 09 49 81 02 95 01 05 01 26 c0 0e 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 02 81 02 26 40 08 46 a6 00 09 31 81 02 c0 05 0d 09 56 55 00 65 00 27 ff ff 00 00 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 0a 09 55 25 0a b1 02 85 0e 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ )
+
+
+class Testelan_04f3_300b(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_300b",
+ max_contacts=3,
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 09 38 15 81 25 7f 75 08 95 03 81 06 05 0c 0a 38 02 95 01 81 06 75 08 95 03 81 03 c0 06 00 ff 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 76 02 75 08 b1 02 85 0b 09 c7 95 42 75 08 b1 02 09 01 85 5d 95 1f 75 08 81 06 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 02 25 02 09 51 81 02 75 01 95 04 81 03 05 01 15 00 26 a7 0c 75 10 55 0e 65 13 09 30 35 00 46 9d 01 95 01 81 02 46 25 01 26 2b 09 26 2b 09 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 08 95 02 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 15 00 75 01 95 02 25 03 b1 02 95 0e b1 03 c0 c0",
+ )
+
+
+class Testelan_04f3_3045(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_3045",
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 09 38 15 81 25 7f 75 08 95 03 81 06 05 0c 0a 38 02 95 01 81 06 75 08 95 03 81 03 c0 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 75 01 95 02 81 03 95 01 75 04 25 0f 09 51 81 02 05 01 15 00 26 80 0c 75 10 55 0e 65 13 09 30 35 00 46 90 01 95 01 81 02 46 13 01 26 96 08 26 96 08 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 09 c5 75 08 95 04 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 8a 02 75 08 b1 02 85 0b 09 c7 95 80 75 08 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 08 95 02 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 15 00 75 01 95 02 25 03 b1 02 95 0e b1 03 c0 c0",
+ )
+
+
+class Testelan_04f3_313a(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test elan_04f3_313a",
+ type="touchpad",
+ input_info=(BusType.I2C, 0x04F3, 0x313A),
+ rdesc="05 01 09 02 a1 01 85 01 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 75 08 95 05 81 03 c0 06 00 ff 09 01 85 0e 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0a 09 c6 15 00 26 ff 00 75 08 95 04 b1 02 c0 06 00 ff 09 01 a1 01 85 5c 09 01 95 0b 75 08 81 06 85 0d 09 c5 15 00 26 ff 00 75 08 95 04 b1 02 85 0c 09 c6 96 80 03 75 08 b1 02 85 0b 09 c7 95 82 75 08 b1 02 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 05 09 09 02 09 03 15 00 25 01 75 01 95 02 81 02 05 0d 95 01 75 04 25 0f 09 51 81 02 05 01 15 00 26 d7 0e 75 10 55 0d 65 11 09 30 35 00 46 44 2f 95 01 81 02 46 12 16 26 eb 06 26 eb 06 09 31 81 02 05 0d 15 00 25 64 95 03 c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 25 01 75 01 95 08 81 03 09 c5 75 08 95 02 81 03 05 0d 85 02 09 55 09 59 75 04 95 02 25 0f b1 02 85 07 09 60 75 01 95 01 15 00 25 01 b1 02 95 0f b1 03 06 00 ff 06 00 ff 85 06 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 03 09 22 a1 00 09 52 15 00 25 0a 75 10 95 01 b1 02 c0 09 22 a1 00 85 05 09 57 09 58 75 01 95 02 25 01 b1 02 95 0e b1 03 c0 c0 05 01 09 02 a1 01 85 2a 09 01 a1 00 05 09 19 01 29 03 15 00 25 01 75 01 95 03 81 02 95 05 81 03 05 01 09 30 09 31 15 81 25 7f 35 81 45 7f 55 00 65 13 75 08 95 02 81 06 75 08 95 05 81 03 c0 c0",
+ )
+
+
+class Testelo_04e7_0080(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test elo_04e7_0080",
+ rdesc="05 0d 09 04 a1 01 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 03 81 03 09 32 95 02 81 02 95 02 81 03 09 51 75 08 95 01 81 02 05 01 26 ff 7f 65 11 55 0e 75 10 09 30 46 7c 24 81 02 09 31 46 96 14 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 75 08 95 01 15 00 25 08 81 02 09 55 b1 02 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0",
+ )
+
+
+class Testilitek_222a_0015(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ilitek_222a_0015",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 c2 16 35 00 46 b3 08 81 42 09 31 26 c2 0c 46 e4 04 81 42 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 02 09 55 25 0a b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 03 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ )
+
+
+class Testilitek_222a_001c(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test ilitek_222a_001c",
+ rdesc="05 0d 09 04 a1 01 85 04 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 22 a1 02 05 0d 95 01 75 06 09 51 15 00 25 3f 81 02 09 42 25 01 75 01 95 01 81 02 75 01 95 01 81 03 05 01 75 10 55 0e 65 11 09 30 26 74 1d 35 00 46 70 0d 81 42 09 31 26 74 10 46 8f 07 81 42 c0 05 0d 09 56 55 00 65 00 27 ff ff ff 7f 95 01 75 20 81 02 09 54 25 7f 95 01 75 08 81 02 85 02 09 55 25 0a b1 02 06 00 ff 09 c5 85 06 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 03 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ )
+
+
+class Testite_06cb_2968(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test ite_06cb_2968",
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 03 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 1b 04 75 10 55 0e 65 11 09 30 35 00 46 6c 03 95 01 81 02 46 db 01 26 3b 02 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 1a 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 1a 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ max_contacts=5,
+ input_info=(0x3, 0x06CB, 0x2968),
+ )
+
+
+class Testn_trig_1b96_0c01(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0c01",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c03(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0c03",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f00(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0f00",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f04(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_0f04",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_1000(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test n_trig_1b96_1000",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testsharp_04dd_9681(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test sharp_04dd_9681",
+ rdesc="06 00 ff 09 01 a1 01 75 08 26 ff 00 15 00 85 06 95 3f 09 01 91 02 85 05 95 3f 09 01 81 02 c0 05 0d 09 04 a1 01 85 81 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 95 01 81 02 05 01 65 11 55 0f 35 00 46 b0 01 26 80 07 75 10 09 30 81 02 46 f3 00 26 38 04 09 31 81 02 05 0d 09 48 09 49 26 ff 00 95 02 75 08 81 02 c0 05 0d 09 56 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 81 02 09 54 95 01 75 08 15 00 25 0a 81 02 85 84 09 55 b1 02 85 87 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 09 0e a1 01 85 83 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 80 05 09 19 01 29 01 15 00 25 01 95 01 75 01 81 02 95 01 75 07 81 01 05 01 65 11 55 0f 09 30 26 80 07 35 00 46 66 00 75 10 95 01 81 02 09 31 26 38 04 35 00 46 4d 00 81 02 c0 c0",
+ )
+
+
+class Testsipodev_0603_0002(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test sipodev_0603_0002",
+ type="clickpad",
+ rdesc="05 01 09 02 a1 01 85 03 09 01 a1 00 05 09 19 01 29 02 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 80 25 7f 75 08 95 02 81 06 c0 c0 05 0d 09 05 a1 01 85 04 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 75 01 95 02 81 03 95 01 75 04 25 05 09 51 81 02 05 01 15 00 26 44 0a 75 0c 55 0e 65 11 09 30 35 00 46 ac 03 95 01 81 02 46 fe 01 26 34 05 75 0c 09 31 81 02 05 0d c0 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 0a 95 01 75 04 81 02 75 01 95 03 81 03 05 09 09 01 25 01 75 01 95 01 81 02 05 0d 85 0a 09 55 09 59 75 04 95 02 25 0f b1 02 85 0b 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 09 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 06 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 07 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 05 01 09 0c a1 01 85 08 15 00 25 01 09 c6 75 01 95 01 81 06 75 07 81 03 c0 05 01 09 80 a1 01 85 01 15 00 25 01 75 01 0a 81 00 0a 82 00 0a 83 00 95 03 81 06 95 05 81 01 c0 06 0c 00 09 01 a1 01 85 02 25 01 15 00 75 01 0a b5 00 0a b6 00 0a b7 00 0a cd 00 0a e2 00 0a a2 00 0a e9 00 0a ea 00 95 08 81 02 0a 83 01 0a 6f 00 0a 70 00 0a 88 01 0a 8a 01 0a 92 01 0a a8 02 0a 24 02 95 08 81 02 0a 21 02 0a 23 02 0a 96 01 0a 25 02 0a 26 02 0a 27 02 0a 23 02 0a b1 02 95 08 81 02 c0 06 00 ff 09 01 a1 01 85 05 15 00 26 ff 00 19 01 29 02 75 08 95 05 b1 02 c0",
+ )
+
+
+class Testsynaptics_06cb_1d10(BaseTest.TestWin8Multitouch):
+ def create_device(self):
+ return Digitizer(
+ "uhid test synaptics_06cb_1d10",
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 75 08 95 02 15 81 25 7f 35 81 45 7f 55 0e 65 11 81 06 c0 c0 05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 75 08 09 51 15 01 26 ff 00 95 01 81 42 05 01 15 00 26 3c 0c 75 10 55 0e 65 11 09 30 35 12 46 2a 0c 81 02 09 31 15 00 26 f1 06 35 12 46 df 06 81 02 c0 05 0d 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 95 01 75 08 15 00 25 0f 81 02 85 08 09 55 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 3f 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 05 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 3d 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 01 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
+
+
+class Testsynaptics_06cb_ce08(BaseTest.TestPTP):
+ def create_device(self):
+ return PTP(
+ "uhid test synaptics_06cb_ce08",
+ max_contacts=5,
+ physical="Vendor Usage 1",
+ input_info=(BusType.I2C, 0x06CB, 0xCE08),
+ rdesc="05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 05 01 09 02 a1 01 85 18 09 01 a1 00 05 09 19 01 29 03 46 00 00 15 00 25 01 75 01 95 03 81 02 95 05 81 01 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0 06 00 ff 09 02 a1 01 85 20 09 01 a1 00 09 03 15 00 26 ff 00 35 00 46 ff 00 75 08 95 05 81 02 c0 c0 05 0d 09 05 a1 01 85 03 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 09 22 a1 02 15 00 25 01 09 47 09 42 95 02 75 01 81 02 95 01 75 03 25 05 09 51 81 02 75 01 95 03 81 03 05 01 15 00 26 f8 04 75 10 55 0e 65 11 09 30 35 00 46 24 04 95 01 81 02 46 30 02 26 a0 02 09 31 81 02 c0 05 0d 55 0c 66 01 10 47 ff ff 00 00 27 ff ff 00 00 75 10 95 01 09 56 81 02 09 54 25 7f 95 01 75 08 81 02 05 09 09 01 25 01 75 01 95 01 81 02 95 07 81 03 05 0d 85 08 09 55 09 59 75 04 95 02 25 0f b1 02 85 0d 09 60 75 01 95 01 15 00 25 01 b1 02 95 07 b1 03 85 07 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 0e a1 01 85 04 09 22 a1 02 09 52 15 00 25 0a 75 08 95 01 b1 02 c0 09 22 a1 00 85 06 09 57 09 58 75 01 95 02 25 01 b1 02 95 06 b1 03 c0 c0 06 00 ff 09 01 a1 01 85 09 09 02 15 00 26 ff 00 75 08 95 14 91 02 85 0a 09 03 15 00 26 ff 00 75 08 95 14 91 02 85 0b 09 04 15 00 26 ff 00 75 08 95 45 81 02 85 0c 09 05 15 00 26 ff 00 75 08 95 45 81 02 85 0f 09 06 15 00 26 ff 00 75 08 95 03 b1 02 85 0e 09 07 15 00 26 ff 00 75 08 95 01 b1 02 c0",
+ )
diff --git a/tools/testing/selftests/hid/tests/test_sony.py b/tools/testing/selftests/hid/tests/test_sony.py
new file mode 100644
index 000000000000..7e52c28e59c5
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_sony.py
@@ -0,0 +1,342 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2020 Red Hat, Inc.
+#
+
+from .base import application_matches
+from .test_gamepad import BaseTest
+from hidtools.device.sony_gamepad import (
+ PS3Controller,
+ PS4ControllerBluetooth,
+ PS4ControllerUSB,
+ PS5ControllerBluetooth,
+ PS5ControllerUSB,
+ PSTouchPoint,
+)
+from hidtools.util import BusType
+
+import libevdev
+import logging
+import pytest
+
+logger = logging.getLogger("hidtools.test.sony")
+
+PS3_MODULE = ("sony", "hid_sony")
+PS4_MODULE = ("playstation", "hid_playstation")
+PS5_MODULE = ("playstation", "hid_playstation")
+
+
+class SonyBaseTest:
+ class SonyTest(BaseTest.TestGamepad):
+ pass
+
+ class SonyPS4ControllerTest(SonyTest):
+ kernel_modules = [PS4_MODULE]
+
+ def test_accelerometer(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Accelerometer")
+
+ for x in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(x, None, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_X) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_X]
+ # Check against range due to small loss in precision due
+ # to inverse calibration, followed by calibration by hid-sony.
+ assert x - 1 <= value <= x + 1
+
+ for y in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(None, y, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Y) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_Y]
+ assert y - 1 <= value <= y + 1
+
+ for z in range(-32000, 32000, 4000):
+ r = uhdev.event(accel=(None, None, z))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_Z) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_Z]
+ assert z - 1 <= value <= z + 1
+
+ def test_gyroscope(self):
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Accelerometer")
+
+ for rx in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(rx, None, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RX) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RX]
+ # Sensor internal value is 16-bit, but calibrated is 22-bit, so
+ # 6-bit (64) difference, so allow a range of +/- 64.
+ assert rx - 64 <= value <= rx + 64
+
+ for ry in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(None, ry, None))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RY) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RY]
+ assert ry - 64 <= value <= ry + 64
+
+ for rz in range(-2000000, 2000000, 200000):
+ r = uhdev.event(gyro=(None, None, rz))
+ events = uhdev.next_sync_events("Accelerometer")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_RZ) in events
+ value = evdev.value[libevdev.EV_ABS.ABS_RZ]
+ assert rz - 64 <= value <= rz + 64
+
+ def test_battery(self):
+ uhdev = self.uhdev
+
+ assert uhdev.power_supply_class is not None
+
+ # DS4 capacity levels are in increments of 10.
+ # Battery is never below 5%.
+ for i in range(5, 105, 10):
+ uhdev.battery.capacity = i
+ uhdev.event()
+ assert uhdev.power_supply_class.capacity == i
+
+ # Discharging tests only make sense for BlueTooth.
+ if uhdev.bus == BusType.BLUETOOTH:
+ uhdev.battery.cable_connected = False
+ uhdev.battery.capacity = 45
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Discharging"
+
+ uhdev.battery.cable_connected = True
+ uhdev.battery.capacity = 5
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Charging"
+
+ uhdev.battery.capacity = 100
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Charging"
+
+ uhdev.battery.full = True
+ uhdev.event()
+ assert uhdev.power_supply_class.status == "Full"
+
+ def test_mt_single_touch(self):
+ """send a single touch in the first slot of the device,
+ and release it."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Touch Pad")
+
+ t0 = PSTouchPoint(1, 50, 100)
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_mt_dual_touch(self):
+ """Send 2 touches in the first 2 slots.
+ Make sure the kernel sees this as a dual touch.
+ Release and check
+
+ Note: PTP will send here BTN_DOUBLETAP emulation"""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev("Touch Pad")
+
+ t0 = PSTouchPoint(1, 50, 100)
+ t1 = PSTouchPoint(2, 150, 200)
+
+ r = uhdev.event(touch=[t0])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ r = uhdev.event(touch=[t0, t1])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH) not in events
+ assert evdev.value[libevdev.EV_KEY.BTN_TOUCH] == 1
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X, 5) not in events
+ )
+ assert (
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y, 10) not in events
+ )
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_X] == 150
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 200
+
+ t0.tipswitch = False
+ r = uhdev.event(touch=[t0, t1])
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 1
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_X) not in events
+ assert libevdev.InputEvent(libevdev.EV_ABS.ABS_MT_POSITION_Y) not in events
+
+ t1.tipswitch = False
+ r = uhdev.event(touch=[t1])
+
+ events = uhdev.next_sync_events("Touch Pad")
+ self.debug_reports(r, uhdev, events)
+ assert evdev.slots[0][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+ assert evdev.slots[1][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+
+class TestPS3Controller(SonyBaseTest.SonyTest):
+ kernel_modules = [PS3_MODULE]
+
+ def create_device(self):
+ controller = PS3Controller()
+ controller.application_matches = application_matches
+ return controller
+
+ @pytest.fixture(autouse=True)
+ def start_controller(self):
+ # emulate a 'PS' button press to tell the kernel we are ready to accept events
+ self.assert_button(17)
+
+ # drain any remaining udev events
+ while self.uhdev.dispatch(10):
+ pass
+
+ def test_led(self):
+ for k, v in self.uhdev.led_classes.items():
+ # the kernel might have set a LED for us
+ logger.info(f"{k}: {v.brightness}")
+
+ idx = int(k[-1]) - 1
+ assert self.uhdev.hw_leds.get_led(idx)[0] == bool(v.brightness)
+
+ v.brightness = 0
+ self.uhdev.dispatch(10)
+ assert self.uhdev.hw_leds.get_led(idx)[0] is False
+
+ v.brightness = v.max_brightness
+ self.uhdev.dispatch(10)
+ assert self.uhdev.hw_leds.get_led(idx)[0]
+
+
+class CalibratedPS4Controller(object):
+ # DS4 reports uncalibrated sensor data. Calibration coefficients
+ # can be retrieved using a feature report (0x2 USB / 0x5 BT).
+ # The values below are the processed calibration values for the
+ # DS4s matching the feature reports of PS4ControllerBluetooth/USB
+ # as dumped from hid-sony 'ds4_get_calibration_data'.
+ #
+ # Note we duplicate those values here in case the kernel changes them
+ # so we can have tests passing even if hid-tools doesn't have the
+ # correct values.
+ accelerometer_calibration_data = {
+ "x": {"bias": -73, "numer": 16384, "denom": 16472},
+ "y": {"bias": -352, "numer": 16384, "denom": 16344},
+ "z": {"bias": 81, "numer": 16384, "denom": 16319},
+ }
+ gyroscope_calibration_data = {
+ "x": {"bias": 0, "numer": 1105920, "denom": 17827},
+ "y": {"bias": 0, "numer": 1105920, "denom": 17777},
+ "z": {"bias": 0, "numer": 1105920, "denom": 17748},
+ }
+
+
+class CalibratedPS4ControllerBluetooth(CalibratedPS4Controller, PS4ControllerBluetooth):
+ pass
+
+
+class TestPS4ControllerBluetooth(SonyBaseTest.SonyPS4ControllerTest):
+ def create_device(self):
+ controller = CalibratedPS4ControllerBluetooth()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS4ControllerUSB(CalibratedPS4Controller, PS4ControllerUSB):
+ pass
+
+
+class TestPS4ControllerUSB(SonyBaseTest.SonyPS4ControllerTest):
+ def create_device(self):
+ controller = CalibratedPS4ControllerUSB()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS5Controller(object):
+ # DualSense reports uncalibrated sensor data. Calibration coefficients
+ # can be retrieved using feature report 0x09.
+ # The values below are the processed calibration values for the
+ # DualSene matching the feature reports of PS5ControllerBluetooth/USB
+ # as dumped from hid-playstation 'dualsense_get_calibration_data'.
+ #
+ # Note we duplicate those values here in case the kernel changes them
+ # so we can have tests passing even if hid-tools doesn't have the
+ # correct values.
+ accelerometer_calibration_data = {
+ "x": {"bias": 0, "numer": 16384, "denom": 16374},
+ "y": {"bias": -114, "numer": 16384, "denom": 16362},
+ "z": {"bias": 2, "numer": 16384, "denom": 16395},
+ }
+ gyroscope_calibration_data = {
+ "x": {"bias": 0, "numer": 1105920, "denom": 17727},
+ "y": {"bias": 0, "numer": 1105920, "denom": 17728},
+ "z": {"bias": 0, "numer": 1105920, "denom": 17769},
+ }
+
+
+class CalibratedPS5ControllerBluetooth(CalibratedPS5Controller, PS5ControllerBluetooth):
+ pass
+
+
+class TestPS5ControllerBluetooth(SonyBaseTest.SonyPS4ControllerTest):
+ kernel_modules = [PS5_MODULE]
+
+ def create_device(self):
+ controller = CalibratedPS5ControllerBluetooth()
+ controller.application_matches = application_matches
+ return controller
+
+
+class CalibratedPS5ControllerUSB(CalibratedPS5Controller, PS5ControllerUSB):
+ pass
+
+
+class TestPS5ControllerUSB(SonyBaseTest.SonyPS4ControllerTest):
+ kernel_modules = [PS5_MODULE]
+
+ def create_device(self):
+ controller = CalibratedPS5ControllerUSB()
+ controller.application_matches = application_matches
+ return controller
diff --git a/tools/testing/selftests/hid/tests/test_tablet.py b/tools/testing/selftests/hid/tests/test_tablet.py
new file mode 100644
index 000000000000..903f19f7cbe9
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_tablet.py
@@ -0,0 +1,1164 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2021 Red Hat, Inc.
+#
+
+from . import base
+import copy
+from enum import Enum
+from hidtools.util import BusType
+import libevdev
+import logging
+import pytest
+from typing import Dict, List, Optional, Tuple
+
+logger = logging.getLogger("hidtools.test.tablet")
+
+
+class BtnTouch(Enum):
+ """Represents whether the BTN_TOUCH event is set to True or False"""
+
+ DOWN = True
+ UP = False
+
+
+class ToolType(Enum):
+ PEN = libevdev.EV_KEY.BTN_TOOL_PEN
+ RUBBER = libevdev.EV_KEY.BTN_TOOL_RUBBER
+
+
+class BtnPressed(Enum):
+ """Represents whether a button is pressed on the stylus"""
+
+ PRIMARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS
+ SECONDARY_PRESSED = libevdev.EV_KEY.BTN_STYLUS2
+
+
+class PenState(Enum):
+ """Pen states according to Microsoft reference:
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+
+ We extend it with the various buttons when we need to check them.
+ """
+
+ PEN_IS_OUT_OF_RANGE = BtnTouch.UP, None, None
+ PEN_IS_IN_RANGE = BtnTouch.UP, ToolType.PEN, None
+ PEN_IS_IN_RANGE_WITH_BUTTON = BtnTouch.UP, ToolType.PEN, BtnPressed.PRIMARY_PRESSED
+ PEN_IS_IN_RANGE_WITH_SECOND_BUTTON = (
+ BtnTouch.UP,
+ ToolType.PEN,
+ BtnPressed.SECONDARY_PRESSED,
+ )
+ PEN_IS_IN_CONTACT = BtnTouch.DOWN, ToolType.PEN, None
+ PEN_IS_IN_CONTACT_WITH_BUTTON = (
+ BtnTouch.DOWN,
+ ToolType.PEN,
+ BtnPressed.PRIMARY_PRESSED,
+ )
+ PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON = (
+ BtnTouch.DOWN,
+ ToolType.PEN,
+ BtnPressed.SECONDARY_PRESSED,
+ )
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT = BtnTouch.UP, ToolType.RUBBER, None
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_BUTTON = (
+ BtnTouch.UP,
+ ToolType.RUBBER,
+ BtnPressed.PRIMARY_PRESSED,
+ )
+ PEN_IS_IN_RANGE_WITH_ERASING_INTENT_WITH_SECOND_BUTTON = (
+ BtnTouch.UP,
+ ToolType.RUBBER,
+ BtnPressed.SECONDARY_PRESSED,
+ )
+ PEN_IS_ERASING = BtnTouch.DOWN, ToolType.RUBBER, None
+ PEN_IS_ERASING_WITH_BUTTON = (
+ BtnTouch.DOWN,
+ ToolType.RUBBER,
+ BtnPressed.PRIMARY_PRESSED,
+ )
+ PEN_IS_ERASING_WITH_SECOND_BUTTON = (
+ BtnTouch.DOWN,
+ ToolType.RUBBER,
+ BtnPressed.SECONDARY_PRESSED,
+ )
+
+ def __init__(self, touch: BtnTouch, tool: Optional[ToolType], button: Optional[BtnPressed]):
+ self.touch = touch # type: ignore
+ self.tool = tool # type: ignore
+ self.button = button # type: ignore
+
+ @classmethod
+ def from_evdev(cls, evdev) -> "PenState":
+ touch = BtnTouch(evdev.value[libevdev.EV_KEY.BTN_TOUCH])
+ tool = None
+ button = None
+ if (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ and not evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ ):
+ tool = ToolType(libevdev.EV_KEY.BTN_TOOL_RUBBER)
+ elif (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ and not evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ ):
+ tool = ToolType(libevdev.EV_KEY.BTN_TOOL_PEN)
+ elif (
+ evdev.value[libevdev.EV_KEY.BTN_TOOL_PEN]
+ or evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER]
+ ):
+ raise ValueError("2 tools are not allowed")
+
+ # we take only the highest button in account
+ for b in [libevdev.EV_KEY.BTN_STYLUS, libevdev.EV_KEY.BTN_STYLUS2]:
+ if bool(evdev.value[b]):
+ button = BtnPressed(b)
+
+ # the kernel tends to insert an EV_SYN once removing the tool, so
+ # the button will be released after
+ if tool is None:
+ button = None
+
+ return cls((touch, tool, button)) # type: ignore
+
+ def apply(self, events: List[libevdev.InputEvent], strict: bool) -> "PenState":
+ if libevdev.EV_SYN.SYN_REPORT in events:
+ raise ValueError("EV_SYN is in the event sequence")
+ touch = self.touch
+ touch_found = False
+ tool = self.tool
+ tool_found = False
+ button = self.button
+ button_found = False
+
+ for ev in events:
+ if ev == libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH):
+ if touch_found:
+ raise ValueError(f"duplicated BTN_TOUCH in {events}")
+ touch_found = True
+ touch = BtnTouch(ev.value)
+ elif ev in (
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_RUBBER),
+ ):
+ if tool_found:
+ raise ValueError(f"duplicated BTN_TOOL_* in {events}")
+ tool_found = True
+ tool = ToolType(ev.code) if ev.value else None
+ elif ev in (
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2),
+ ):
+ if button_found:
+ raise ValueError(f"duplicated BTN_STYLUS* in {events}")
+ button_found = True
+ button = BtnPressed(ev.code) if ev.value else None
+
+ # the kernel tends to insert an EV_SYN once removing the tool, so
+ # the button will be released after
+ if tool is None:
+ button = None
+
+ new_state = PenState((touch, tool, button)) # type: ignore
+ if strict:
+ assert (
+ new_state in self.valid_transitions()
+ ), f"moving from {self} to {new_state} is forbidden"
+ else:
+ assert (
+ new_state in self.historically_tolerated_transitions()
+ ), f"moving from {self} to {new_state} is forbidden"
+
+ return new_state
+
+ def valid_transitions(self) -> Tuple["PenState", ...]:
+ """Following the state machine in the URL above.
+
+ Note that those transitions are from the evdev point of view, not HID"""
+ if self == PenState.PEN_IS_OUT_OF_RANGE:
+ return (
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE:
+ return (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT:
+ return (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_ERASING:
+ return (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ )
+
+ return tuple()
+
+ def historically_tolerated_transitions(self) -> Tuple["PenState", ...]:
+ """Following the state machine in the URL above, with a couple of addition
+ for skipping the in-range state, due to historical reasons.
+
+ Note that those transitions are from the evdev point of view, not HID"""
+ if self == PenState.PEN_IS_OUT_OF_RANGE:
+ return (
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE:
+ return (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT:
+ return (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_ERASING,
+ )
+
+ if self == PenState.PEN_IS_ERASING:
+ return (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ if self == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
+ return (
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ )
+
+ if self == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
+ return (
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ )
+
+ return tuple()
+
+ @staticmethod
+ def legal_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the first half of the Windows Pen Implementation state machine:
+ we don't have Invert nor Erase bits, so just move in/out-of-range or proximity.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "in-range": (PenState.PEN_IS_IN_RANGE,),
+ "in-range -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "in-range -> touch": (PenState.PEN_IS_IN_RANGE, PenState.PEN_IS_IN_CONTACT),
+ "in-range -> touch -> release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> touch -> release -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def legal_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "hover-erasing": (PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,),
+ "hover-erasing -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "hover-erasing -> erase": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ ),
+ "hover-erasing -> erase -> release": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "hover-erasing -> erase -> release -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "hover-erasing -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> hover-erasing": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ }
+
+ @staticmethod
+ def legal_transitions_with_primary_button() -> Dict[str, Tuple["PenState", ...]]:
+ """We revisit the Windows Pen Implementation state machine:
+ we now have a primary button.
+ """
+ return {
+ "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_BUTTON,),
+ "hover-button -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "in-range -> button-press": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ ),
+ "in-range -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> touch -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ ),
+ "in-range -> touch -> button-press -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> button-release -> release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def legal_transitions_with_secondary_button() -> Dict[str, Tuple["PenState", ...]]:
+ """We revisit the Windows Pen Implementation state machine:
+ we now have a secondary button.
+ Note: we don't looks for 2 buttons interactions.
+ """
+ return {
+ "hover-button": (PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,),
+ "hover-button -> out-of-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ "in-range -> button-press": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ ),
+ "in-range -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> touch -> button-press -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ ),
+ "in-range -> touch -> button-press -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> release -> button-release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "in-range -> button-press -> touch -> button-release -> release": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def tolerated_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """This is not adhering to the Windows Pen Implementation state machine
+ but we should expect the kernel to behave properly, mostly for historical
+ reasons."""
+ return {
+ "direct-in-contact": (PenState.PEN_IS_IN_CONTACT,),
+ "direct-in-contact -> out-of-range": (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def tolerated_transitions_with_invert() -> Dict[str, Tuple["PenState", ...]]:
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ return {
+ "direct-erase": (PenState.PEN_IS_ERASING,),
+ "direct-erase -> out-of-range": (
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_OUT_OF_RANGE,
+ ),
+ }
+
+ @staticmethod
+ def broken_transitions() -> Dict[str, Tuple["PenState", ...]]:
+ """Those tests are definitely not part of the Windows specification.
+ However, a half broken device might export those transitions.
+ For example, a pen that has the eraser button might wobble between
+ touching and erasing if the tablet doesn't enforce the Windows
+ state machine."""
+ return {
+ "in-range -> touch -> erase -> hover-erase": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "in-range -> erase -> hover-erase": (
+ PenState.PEN_IS_IN_RANGE,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ ),
+ "hover-erase -> erase -> touch -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "hover-erase -> touch -> in-range": (
+ PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_IN_RANGE,
+ ),
+ "touch -> erase -> touch -> erase": (
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ PenState.PEN_IS_IN_CONTACT,
+ PenState.PEN_IS_ERASING,
+ ),
+ }
+
+
+class Pen(object):
+ def __init__(self, x, y):
+ self.x = x
+ self.y = y
+ self.tipswitch = False
+ self.tippressure = 15
+ self.azimuth = 0
+ self.inrange = False
+ self.width = 10
+ self.height = 10
+ self.barrelswitch = False
+ self.secondarybarrelswitch = False
+ self.invert = False
+ self.eraser = False
+ self.xtilt = 1
+ self.ytilt = 1
+ self.twist = 1
+ self._old_values = None
+ self.current_state = None
+
+ def restore(self):
+ if self._old_values is not None:
+ for i in [
+ "x",
+ "y",
+ "tippressure",
+ "azimuth",
+ "width",
+ "height",
+ "twist",
+ "xtilt",
+ "ytilt",
+ ]:
+ setattr(self, i, getattr(self._old_values, i))
+
+ def backup(self):
+ self._old_values = copy.copy(self)
+
+ def __assert_axis(self, evdev, axis, value):
+ if (
+ axis == libevdev.EV_KEY.BTN_TOOL_RUBBER
+ and evdev.value[libevdev.EV_KEY.BTN_TOOL_RUBBER] is None
+ ):
+ return
+
+ assert (
+ evdev.value[axis] == value
+ ), f"assert evdev.value[{axis}] ({evdev.value[axis]}) != {value}"
+
+ def assert_expected_input_events(self, evdev):
+ assert evdev.value[libevdev.EV_ABS.ABS_X] == self.x
+ assert evdev.value[libevdev.EV_ABS.ABS_Y] == self.y
+ assert self.current_state == PenState.from_evdev(evdev)
+
+
+class PenDigitizer(base.UHIDTestDevice):
+ def __init__(
+ self,
+ name,
+ rdesc_str=None,
+ rdesc=None,
+ application="Pen",
+ physical="Stylus",
+ input_info=(BusType.USB, 1, 2),
+ evdev_name_suffix=None,
+ ):
+ super().__init__(name, application, rdesc_str, rdesc, input_info)
+ self.physical = physical
+ self.cur_application = application
+ if evdev_name_suffix is not None:
+ self.name += evdev_name_suffix
+
+ self.fields = []
+ for r in self.parsed_rdesc.input_reports.values():
+ if r.application_name == self.application:
+ physicals = [f.physical_name for f in r]
+ if self.physical not in physicals and None not in physicals:
+ continue
+ self.fields = [f.usage_name for f in r]
+
+ def move_to(self, pen, state):
+ # fill in the previous values
+ if pen.current_state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.restore()
+
+ print(f"\n *** pen is moving to {state} ***")
+
+ if state == PenState.PEN_IS_OUT_OF_RANGE:
+ pen.backup()
+ pen.x = 0
+ pen.y = 0
+ pen.tipswitch = False
+ pen.tippressure = 0
+ pen.azimuth = 0
+ pen.inrange = False
+ pen.width = 0
+ pen.height = 0
+ pen.invert = False
+ pen.eraser = False
+ pen.xtilt = 0
+ pen.ytilt = 0
+ pen.twist = 0
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = True
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = True
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_SECOND_BUTTON:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = True
+ elif state == PenState.PEN_IS_IN_CONTACT_WITH_SECOND_BUTTON:
+ pen.tipswitch = True
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = True
+ elif state == PenState.PEN_IS_IN_RANGE_WITH_ERASING_INTENT:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = True
+ pen.eraser = False
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+ elif state == PenState.PEN_IS_ERASING:
+ pen.tipswitch = False
+ pen.inrange = True
+ pen.invert = False
+ pen.eraser = True
+ pen.barrelswitch = False
+ pen.secondarybarrelswitch = False
+
+ pen.current_state = state
+
+ def event(self, pen):
+ rs = []
+ r = self.create_report(application=self.cur_application, data=pen)
+ self.call_input_event(r)
+ rs.append(r)
+ return rs
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ return (1, [])
+
+ def set_report(self, req, rnum, rtype, data):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return 1
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return 1
+
+ return 1
+
+
+class BaseTest:
+ class TestTablet(base.BaseTestCase.TestUhid):
+ def create_device(self):
+ raise Exception("please reimplement me in subclasses")
+
+ def post(self, uhdev, pen):
+ r = uhdev.event(pen)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ return events
+
+ def validate_transitions(
+ self, from_state, pen, evdev, events, allow_intermediate_states
+ ):
+ # check that the final state is correct
+ pen.assert_expected_input_events(evdev)
+
+ state = from_state
+
+ # check that the transitions are valid
+ sync_events = []
+ while libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT) in events:
+ # split the first EV_SYN from the list
+ idx = events.index(libevdev.InputEvent(libevdev.EV_SYN.SYN_REPORT))
+ sync_events = events[:idx]
+ events = events[idx + 1 :]
+
+ # now check for a valid transition
+ state = state.apply(sync_events, not allow_intermediate_states)
+
+ if events:
+ state = state.apply(sync_events, not allow_intermediate_states)
+
+ def _test_states(self, state_list, scribble, allow_intermediate_states):
+ """Internal method to test against a list of
+ transition between states.
+ state_list is a list of PenState objects
+ scribble is a boolean which tells if we need
+ to wobble a little the X,Y coordinates of the pen
+ between each state transition."""
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ cur_state = PenState.PEN_IS_OUT_OF_RANGE
+
+ p = Pen(50, 60)
+ uhdev.move_to(p, PenState.PEN_IS_OUT_OF_RANGE)
+ events = self.post(uhdev, p)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states
+ )
+
+ cur_state = p.current_state
+
+ for state in state_list:
+ if scribble and cur_state != PenState.PEN_IS_OUT_OF_RANGE:
+ p.x += 1
+ p.y -= 1
+ events = self.post(uhdev, p)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states
+ )
+ assert len(events) >= 3 # X, Y, SYN
+ uhdev.move_to(p, state)
+ if scribble and state != PenState.PEN_IS_OUT_OF_RANGE:
+ p.x += 1
+ p.y -= 1
+ events = self.post(uhdev, p)
+ self.validate_transitions(
+ cur_state, p, evdev, events, allow_intermediate_states
+ )
+ cur_state = p.current_state
+
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.legal_transitions().items()],
+ )
+ def test_valid_pen_states(self, state_list, scribble):
+ """This is the first half of the Windows Pen Implementation state machine:
+ we don't have Invert nor Erase bits, so just move in/out-of-range or proximity.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.tolerated_transitions().items()
+ ],
+ )
+ def test_tolerated_pen_states(self, state_list, scribble):
+ """This is not adhering to the Windows Pen Implementation state machine
+ but we should expect the kernel to behave properly, mostly for historical
+ reasons."""
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_primary_button().items()
+ ],
+ )
+ def test_valid_primary_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the primary button."""
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Secondary Barrel Switch" not in uhdev.fields,
+ "Device not compatible, missing Secondary Barrel Switch usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_secondary_button().items()
+ ],
+ )
+ def test_valid_secondary_button_pen_states(self, state_list, scribble):
+ """Rework the transition state machine by adding the secondary button."""
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.legal_transitions_with_invert().items()
+ ],
+ )
+ def test_valid_invert_pen_states(self, state_list, scribble):
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=False)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [
+ pytest.param(v, id=k)
+ for k, v in PenState.tolerated_transitions_with_invert().items()
+ ],
+ )
+ def test_tolerated_invert_pen_states(self, state_list, scribble):
+ """This is the second half of the Windows Pen Implementation state machine:
+ we now have Invert and Erase bits, so move in/out or proximity with the intend
+ to erase.
+ https://docs.microsoft.com/en-us/windows-hardware/design/component-guidelines/windows-pen-states
+ """
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+ @pytest.mark.skip_if_uhdev(
+ lambda uhdev: "Invert" not in uhdev.fields,
+ "Device not compatible, missing Invert usage",
+ )
+ @pytest.mark.parametrize("scribble", [True, False], ids=["scribble", "static"])
+ @pytest.mark.parametrize(
+ "state_list",
+ [pytest.param(v, id=k) for k, v in PenState.broken_transitions().items()],
+ )
+ def test_tolerated_broken_pen_states(self, state_list, scribble):
+ """Those tests are definitely not part of the Windows specification.
+ However, a half broken device might export those transitions.
+ For example, a pen that has the eraser button might wobble between
+ touching and erasing if the tablet doesn't enforce the Windows
+ state machine."""
+ self._test_states(state_list, scribble, allow_intermediate_states=True)
+
+
+class GXTP_pen(PenDigitizer):
+ def event(self, pen):
+ if not hasattr(self, "prev_tip_state"):
+ self.prev_tip_state = False
+
+ internal_pen = copy.copy(pen)
+
+ # bug in the controller: when the pen touches the
+ # surface, in-range stays to 1, but when
+ # the pen moves in-range gets reverted to 0
+ if pen.tipswitch and self.prev_tip_state:
+ internal_pen.inrange = False
+
+ self.prev_tip_state = pen.tipswitch
+
+ # another bug in the controller: when the pen is
+ # inverted, invert is set to 1, but as soon as
+ # the pen touches the surface, eraser is correctly
+ # set to 1 but invert is released
+ if pen.eraser:
+ internal_pen.invert = False
+
+ return super().event(internal_pen)
+
+
+class USIPen(PenDigitizer):
+ pass
+
+
+################################################################################
+#
+# Windows 7 compatible devices
+#
+################################################################################
+# class TestEgalax_capacitive_0eef_7224(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7224',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7224),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_72fa(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_72fa',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 72 22 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 87 13 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 72 22 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 87 13 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x72fa),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7336(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7336',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 c1 20 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c2 18 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 c1 20 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c2 18 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7336),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7337(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7337',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 ae 17 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 c3 0e 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 ae 17 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 c3 0e 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7337),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_7349(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_7349',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 34 49 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 37 29 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 34 49 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 37 29 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x7349),
+# evdev_name_suffix=' Touchscreen')
+#
+#
+# class TestEgalax_capacitive_0eef_73f4(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test egalax-capacitive_0eef_73f4',
+# rdesc='05 0d 09 04 a1 01 85 04 09 22 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 09 32 15 00 25 01 81 02 09 51 75 05 95 01 16 00 00 26 10 00 81 02 09 47 75 01 95 01 15 00 25 01 81 02 05 01 09 30 75 10 95 01 55 0d 65 33 35 00 46 96 4e 26 ff 7f 81 02 09 31 75 10 95 01 55 0d 65 33 35 00 46 23 2c 26 ff 7f 81 02 05 0d 09 55 25 08 75 08 95 01 b1 02 c0 c0 05 01 09 01 a1 01 85 01 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 01 75 06 81 01 05 01 09 30 09 31 16 00 00 26 ff 0f 36 00 00 46 ff 0f 66 00 00 75 10 95 02 81 02 c0 c0 06 00 ff 09 01 a1 01 09 01 15 00 26 ff 00 85 03 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0 05 0d 09 04 a1 01 85 02 09 20 a1 00 09 42 09 32 15 00 25 01 95 02 75 01 81 02 95 06 75 01 81 03 05 01 09 30 75 10 95 01 a4 55 0d 65 33 36 00 00 46 96 4e 16 00 00 26 ff 0f 81 02 09 31 16 00 00 26 ff 0f 36 00 00 46 23 2c 81 02 b4 c0 c0 05 0d 09 0e a1 01 85 05 09 22 a1 00 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x0eef, 0x73f4),
+# evdev_name_suffix=' Touchscreen')
+#
+# bogus: BTN_TOOL_PEN is not emitted
+# class TestIrtouch_6615_0070(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test irtouch_6615_0070',
+# rdesc='05 01 09 02 a1 01 85 10 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 95 02 75 01 81 02 95 06 81 03 05 01 09 30 09 31 15 00 26 ff 7f 75 10 95 02 81 02 c0 c0 05 0d 09 04 a1 01 85 30 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 09 51 75 08 95 01 81 02 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 c0 05 0d 09 54 15 00 26 02 00 75 08 95 01 81 02 85 03 09 55 15 00 26 ff 00 75 08 95 01 b1 02 c0 05 0d 09 0e a1 01 85 02 09 52 09 53 15 00 26 ff 00 75 08 95 02 b1 02 c0 05 0d 09 02 a1 01 85 20 09 20 a1 00 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 03 05 01 09 30 26 ff 7f 55 0f 65 11 35 00 46 51 02 75 10 95 01 81 02 09 31 35 00 46 73 01 81 02 85 01 06 00 ff 09 01 75 08 95 01 b1 02 c0 c0',
+# input_info=(BusType.USB, 0x6615, 0x0070))
+
+
+class TestNexio_1870_0100(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_0100",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 02 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 40 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0100),
+ )
+
+
+class TestNexio_1870_010d(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_010d",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x010D),
+ )
+
+
+class TestNexio_1870_0119(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test nexio_1870_0119",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 95 06 81 03 75 08 09 51 95 01 81 02 05 01 26 ff 3f 75 10 55 0d 65 00 09 30 35 00 46 00 00 81 02 26 ff 3f 09 31 35 00 46 00 00 81 02 26 ff 3f 05 0d 09 48 35 00 26 ff 3f 81 02 09 49 35 00 26 ff 3f 81 02 c0 05 0d 09 54 95 01 75 08 25 02 81 02 85 02 09 55 25 06 b1 02 c0 09 0e a1 01 85 03 09 23 a1 02 09 52 09 53 15 00 25 0a 75 08 95 02 b1 02 c0 c0 05 01 09 02 a1 01 09 01 a1 00 85 04 05 09 95 03 75 01 19 01 29 03 15 00 25 01 81 02 95 01 75 05 81 01 05 01 75 10 95 02 09 30 09 31 15 00 26 ff 7f 81 02 c0 c0 05 0d 09 02 a1 01 85 05 09 20 a1 00 09 42 09 32 15 00 25 01 75 01 95 02 81 02 95 0e 81 03 05 01 26 ff 3f 75 10 95 01 55 0e 65 11 09 30 35 00 46 1e 19 81 02 26 ff 3f 09 31 35 00 46 be 0f 81 02 26 ff 3f c0 c0 06 00 ff 09 01 a1 01 85 06 19 01 29 40 15 00 26 ff 00 75 08 95 3e 81 00 19 01 29 40 91 00 c0",
+ input_info=(BusType.USB, 0x1870, 0x0119),
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices
+#
+################################################################################
+
+# bogus: application is 'undefined'
+# class Testatmel_03eb_8409(BaseTest.TestTablet):
+# def create_device(self):
+# return PenDigitizer('uhid test atmel_03eb_8409', rdesc='05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 35 00 35 00 46 18 06 26 77 0f 09 31 81 02 35 00 35 00 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 48 81 02 09 49 81 02 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 00 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 c8 0a 26 6f 08 09 30 81 02 46 18 06 26 77 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0')
+
+
+class Testatmel_03eb_840b(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test atmel_03eb_840b",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 95 01 81 03 25 1f 75 05 09 51 81 02 05 01 55 0e 65 11 35 00 75 10 95 01 46 00 0a 26 ff 0f 09 30 81 02 09 00 81 03 46 a0 05 26 ff 0f 09 31 81 02 09 00 81 03 05 0d 95 01 75 08 15 00 26 ff 00 46 ff 00 09 00 81 03 09 00 81 03 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 15 00 25 1f 75 05 09 54 95 01 81 02 75 03 25 01 95 01 81 03 75 08 85 02 09 55 25 10 b1 02 06 00 ff 85 05 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 03 09 20 a1 00 15 00 25 01 75 01 95 01 09 42 81 02 09 44 81 02 09 45 81 02 81 03 09 32 81 02 95 03 81 03 05 01 55 0e 65 11 35 00 75 10 95 02 46 00 0a 26 ff 0f 09 30 81 02 46 a0 05 26 ff 0f 09 31 81 02 05 0d 09 30 15 01 26 ff 00 75 08 95 01 81 02 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c01(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0c01",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 09 32 81 02 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0c03(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0c03",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 15 0a 26 80 25 81 02 09 31 46 b4 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f00(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0f00",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_0f04(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_0f04",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 7f 0b 26 80 25 81 02 09 31 46 78 06 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class Testn_trig_1b96_1000(BaseTest.TestTablet):
+ def create_device(self):
+ return PenDigitizer(
+ "uhid test n_trig_1b96_1000",
+ rdesc="75 08 15 00 26 ff 00 06 0b ff 09 0b a1 01 95 0f 09 29 85 29 b1 02 95 1f 09 2a 85 2a b1 02 95 3e 09 2b 85 2b b1 02 95 fe 09 2c 85 2c b1 02 96 fe 01 09 2d 85 2d b1 02 95 02 09 48 85 48 b1 02 95 0f 09 2e 85 2e 81 02 95 1f 09 2f 85 2f 81 02 95 3e 09 30 85 30 81 02 95 fe 09 31 85 31 81 02 96 fe 01 09 32 85 32 81 02 75 08 96 fe 0f 09 35 85 35 81 02 c0 05 0d 09 02 a1 01 85 01 09 20 35 00 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 b4 05 0d 09 30 26 00 01 81 02 06 00 ff 09 01 81 02 c0 85 0c 06 00 ff 09 0c 75 08 95 06 26 ff 00 b1 02 85 0b 09 0b 95 02 b1 02 85 11 09 11 b1 02 85 15 09 15 95 05 b1 02 85 18 09 18 95 0c b1 02 c0 05 0d 09 04 a1 01 85 03 06 00 ff 09 01 75 10 95 01 15 00 27 ff ff 00 00 81 02 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 01 81 03 09 47 81 02 95 05 81 03 75 10 09 51 27 ff ff 00 00 95 01 81 02 05 01 09 30 75 10 95 02 a4 55 0e 65 11 46 03 0a 26 80 25 81 02 09 31 46 a1 05 26 20 1c 81 02 05 0d 09 48 95 01 26 80 25 81 02 09 49 26 20 1c 81 02 b4 06 00 ff 09 02 75 08 95 04 15 00 26 ff 00 81 02 c0 05 0d 09 54 95 01 75 08 81 02 09 56 75 20 95 01 27 ff ff ff 0f 81 02 85 04 09 55 75 08 95 01 25 0b b1 02 85 0a 06 00 ff 09 03 15 00 b1 02 85 1b 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 01 09 02 a1 01 85 02 09 01 a1 00 05 09 19 01 29 02 15 00 25 01 75 01 95 02 81 02 95 06 81 03 05 01 09 30 09 31 15 81 25 7f 75 08 95 02 81 06 c0 c0",
+ )
+
+
+class TestGXTP_27c6_0113(BaseTest.TestTablet):
+ def create_device(self):
+ return GXTP_pen(
+ "uhid test GXTP_27c6_0113",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 95 07 81 01 95 01 75 08 09 51 81 02 75 10 05 01 26 00 14 46 1f 07 09 30 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 01 75 08 09 51 95 01 81 02 05 01 26 00 14 75 10 55 0e 65 11 09 30 35 00 46 1f 07 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 95 07 81 01 75 08 09 51 95 01 81 02 05 01 26 00 14 75 10 55 0e 65 11 09 30 35 00 46 1f 07 81 02 26 80 0c 46 77 04 09 31 81 02 05 0d c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 85 08 09 20 a1 00 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 04 81 02 95 01 81 03 09 32 81 02 95 02 81 03 95 01 75 08 09 51 81 02 05 01 09 30 75 10 95 01 a4 55 0e 65 11 35 00 26 00 14 46 1f 07 81 42 09 31 26 80 0c 46 77 04 81 42 b4 05 0d 09 30 26 ff 0f 81 02 09 3d 65 14 55 0e 36 d8 dc 46 28 23 16 d8 dc 26 28 23 81 02 09 3e 81 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0 05 01 09 06 a1 01 85 04 05 07 09 e3 15 00 25 01 75 01 95 01 81 02 95 07 81 03 c0",
+ )
+
+
+################################################################################
+#
+# Windows 8 compatible devices with USI Pen
+#
+################################################################################
+
+
+class TestElan_04f3_2A49(BaseTest.TestTablet):
+ def create_device(self):
+ return USIPen(
+ "uhid test Elan_04f3_2A49",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 22 a1 02 05 0d 09 42 15 00 25 01 75 01 95 01 81 02 75 01 81 03 75 06 09 51 25 3f 81 02 26 ff 00 75 08 55 0f 65 11 35 00 45 ff 09 48 81 02 09 49 81 02 09 30 81 02 95 01 05 01 a4 26 cf 0f 75 10 55 0f 65 11 09 30 35 00 46 26 01 95 01 81 02 26 77 0a 46 a6 00 09 31 81 02 b4 c0 05 0d 09 54 25 7f 96 01 00 75 08 81 02 85 0a 09 55 25 0a b1 02 85 44 06 00 ff 09 c5 16 00 00 26 ff 00 75 08 96 00 01 b1 02 c0 06 ff 01 09 01 a1 01 85 02 16 00 00 26 ff 00 75 08 95 40 09 00 81 02 c0 06 00 ff 09 01 a1 01 85 03 75 08 95 20 09 01 91 02 c0 06 00 ff 09 01 a1 01 85 06 09 03 75 08 95 12 91 02 09 04 75 08 95 03 b1 02 c0 06 01 ff 09 01 a1 01 85 04 15 00 26 ff 00 75 08 95 13 09 00 81 02 c0 05 0d 09 02 a1 01 85 07 35 00 09 20 a1 00 09 32 09 42 09 44 09 3c 09 45 15 00 25 01 75 01 95 05 81 02 95 03 81 03 05 01 09 30 75 10 95 01 a4 55 0f 65 11 46 26 01 26 1c 48 81 42 09 31 46 a6 00 26 bc 2f 81 42 b4 05 0d 09 30 26 00 10 81 02 75 08 95 01 09 3b 25 64 81 42 09 38 15 00 25 02 81 02 09 5c 26 ff 00 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 09 5b 25 ff 75 40 81 02 c0 06 00 ff 75 08 95 02 09 01 81 02 c0 05 0d 85 60 09 81 a1 02 09 38 75 08 95 01 15 00 25 02 81 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 61 09 5c a1 02 15 00 26 ff 00 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 62 09 5e a1 02 09 38 15 00 25 02 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 63 09 70 a1 02 75 08 95 01 15 00 25 02 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 64 09 80 15 00 25 ff 75 40 95 01 b1 02 85 65 09 44 a1 02 09 38 75 08 95 01 25 02 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 66 75 08 95 01 05 0d 09 90 a1 02 09 38 25 02 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 67 05 06 09 2b a1 02 05 0d 25 02 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 68 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 02 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 69 05 0d 09 38 75 08 95 01 15 00 25 02 b1 02 c0 06 00 ff 09 81 a1 01 85 17 75 08 95 1f 09 05 81 02 c0",
+ input_info=(BusType.I2C, 0x04F3, 0x2A49),
+ )
+
+
+class TestGoodix_27c6_0e00(BaseTest.TestTablet):
+ def create_device(self):
+ return USIPen(
+ "uhid test Elan_04f3_2A49",
+ rdesc="05 0d 09 04 a1 01 85 01 09 22 a1 02 55 0e 65 11 35 00 15 00 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 95 01 75 08 09 51 81 02 75 10 05 01 26 04 20 46 e6 09 09 30 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 22 a1 02 09 42 15 00 25 01 75 01 95 01 81 02 25 7f 09 30 75 07 81 42 75 08 09 51 95 01 81 02 05 01 26 04 20 75 10 55 0e 65 11 09 30 35 00 46 e6 09 81 02 26 60 15 46 9a 06 09 31 81 02 05 0d 55 0f 75 08 25 ff 45 ff 09 48 81 42 09 49 81 42 55 0e c0 09 54 15 00 25 7f 75 08 95 01 81 02 85 02 09 55 95 01 25 0a b1 02 85 03 06 00 ff 09 c5 15 00 26 ff 00 75 08 96 00 01 b1 02 c0 05 0d 09 02 a1 01 09 20 a1 00 85 08 05 01 a4 09 30 35 00 46 e6 09 15 00 26 04 20 55 0d 65 13 75 10 95 01 81 02 09 31 46 9a 06 26 60 15 81 02 b4 05 0d 09 38 95 01 75 08 15 00 25 01 81 02 09 30 75 10 26 ff 0f 81 02 09 31 81 02 09 42 09 44 09 5a 09 3c 09 45 09 32 75 01 95 06 25 01 81 02 95 02 81 03 09 3d 55 0e 65 14 36 d8 dc 46 28 23 16 d8 dc 26 28 23 95 01 75 10 81 02 09 3e 81 02 09 41 15 00 27 a0 8c 00 00 35 00 47 a0 8c 00 00 81 02 05 20 0a 53 04 65 00 16 01 f8 26 ff 07 75 10 95 01 81 02 0a 54 04 81 02 0a 55 04 81 02 0a 57 04 81 02 0a 58 04 81 02 0a 59 04 81 02 0a 72 04 81 02 0a 73 04 81 02 0a 74 04 81 02 05 0d 09 3b 15 00 25 64 75 08 81 02 09 5b 25 ff 75 40 81 02 06 00 ff 09 5b 75 20 81 02 05 0d 09 5c 26 ff 00 75 08 81 02 09 5e 81 02 09 70 a1 02 15 01 25 06 09 72 09 73 09 74 09 75 09 76 09 77 81 20 c0 06 00 ff 09 01 15 00 27 ff ff 00 00 75 10 95 01 81 02 85 09 09 81 a1 02 09 81 15 01 25 04 09 82 09 83 09 84 09 85 81 20 c0 85 10 09 5c a1 02 15 00 25 01 75 08 95 01 09 38 b1 02 09 5c 26 ff 00 b1 02 09 5d 75 01 95 01 25 01 b1 02 95 07 b1 03 c0 85 11 09 5e a1 02 09 38 15 00 25 01 75 08 95 01 b1 02 09 5e 26 ff 00 b1 02 09 5f 75 01 25 01 b1 02 75 07 b1 03 c0 85 12 09 70 a1 02 75 08 95 01 15 00 25 01 09 38 b1 02 09 70 a1 02 25 06 09 72 09 73 09 74 09 75 09 76 09 77 b1 20 c0 09 71 75 01 25 01 b1 02 75 07 b1 03 c0 85 13 09 80 15 00 25 ff 75 40 95 01 b1 02 85 14 09 44 a1 02 09 38 75 08 95 01 25 01 b1 02 15 01 25 03 09 44 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 5a a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 09 45 a1 02 09 a4 09 44 09 5a 09 45 09 a3 b1 20 c0 c0 85 15 75 08 95 01 05 0d 09 90 a1 02 09 38 25 01 b1 02 09 91 75 10 26 ff 0f b1 02 09 92 75 40 25 ff b1 02 05 06 09 2a 75 08 26 ff 00 a1 02 09 2d b1 02 09 2e b1 02 c0 c0 85 16 05 06 09 2b a1 02 05 0d 25 01 09 38 b1 02 05 06 09 2b a1 02 09 2d 26 ff 00 b1 02 09 2e b1 02 c0 c0 85 17 06 00 ff 09 01 a1 02 05 0d 09 38 75 08 95 01 25 01 b1 02 06 00 ff 09 01 75 10 27 ff ff 00 00 b1 02 c0 85 18 05 0d 09 38 75 08 95 01 15 00 25 01 b1 02 c0 c0 06 f0 ff 09 01 a1 01 85 0e 09 01 15 00 25 ff 75 08 95 40 91 02 09 01 15 00 25 ff 75 08 95 40 81 02 c0",
+ input_info=(BusType.I2C, 0x27C6, 0x0E00),
+ )
diff --git a/tools/testing/selftests/hid/tests/test_usb_crash.py b/tools/testing/selftests/hid/tests/test_usb_crash.py
new file mode 100644
index 000000000000..e98bff9197c7
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_usb_crash.py
@@ -0,0 +1,103 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2021 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2021 Red Hat, Inc.
+#
+
+# This is to ensure we don't crash when emulating USB devices
+
+from . import base
+import pytest
+import logging
+
+logger = logging.getLogger("hidtools.test.usb")
+
+
+class USBDev(base.UHIDTestDevice):
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x01, # .Usage Page (Generic Desktop) 0
+ 0x09, 0x02, # .Usage (Mouse) 2
+ 0xa1, 0x01, # .Collection (Application) 4
+ 0x09, 0x02, # ..Usage (Mouse) 6
+ 0xa1, 0x02, # ..Collection (Logical) 8
+ 0x09, 0x01, # ...Usage (Pointer) 10
+ 0xa1, 0x00, # ...Collection (Physical) 12
+ 0x05, 0x09, # ....Usage Page (Button) 14
+ 0x19, 0x01, # ....Usage Minimum (1) 16
+ 0x29, 0x03, # ....Usage Maximum (3) 18
+ 0x15, 0x00, # ....Logical Minimum (0) 20
+ 0x25, 0x01, # ....Logical Maximum (1) 22
+ 0x75, 0x01, # ....Report Size (1) 24
+ 0x95, 0x03, # ....Report Count (3) 26
+ 0x81, 0x02, # ....Input (Data,Var,Abs) 28
+ 0x75, 0x05, # ....Report Size (5) 30
+ 0x95, 0x01, # ....Report Count (1) 32
+ 0x81, 0x03, # ....Input (Cnst,Var,Abs) 34
+ 0x05, 0x01, # ....Usage Page (Generic Desktop) 36
+ 0x09, 0x30, # ....Usage (X) 38
+ 0x09, 0x31, # ....Usage (Y) 40
+ 0x15, 0x81, # ....Logical Minimum (-127) 42
+ 0x25, 0x7f, # ....Logical Maximum (127) 44
+ 0x75, 0x08, # ....Report Size (8) 46
+ 0x95, 0x02, # ....Report Count (2) 48
+ 0x81, 0x06, # ....Input (Data,Var,Rel) 50
+ 0xc0, # ...End Collection 52
+ 0xc0, # ..End Collection 53
+ 0xc0, # .End Collection 54
+ ]
+ # fmt: on
+
+ def __init__(self, name=None, input_info=None):
+ super().__init__(
+ name, "Mouse", input_info=input_info, rdesc=USBDev.report_descriptor
+ )
+
+ # skip witing for udev events, it's likely that the report
+ # descriptor is wrong
+ def is_ready(self):
+ return True
+
+ # we don't have an evdev node here, so paper over
+ # the checks
+ def get_evdev(self, application=None):
+ return "OK"
+
+
+class TestUSBDevice(base.BaseTestCase.TestUhid):
+ """
+ Test class to test if an emulated USB device crashes
+ the kernel.
+ """
+
+ # conftest.py is generating the following fixture:
+ #
+ # @pytest.fixture(params=[('modulename', 1, 2)])
+ # def usbVidPid(self, request):
+ # return request.param
+
+ @pytest.fixture()
+ def new_uhdev(self, usbVidPid, request):
+ self.module, self.vid, self.pid = usbVidPid
+ self._load_kernel_module(None, self.module)
+ return USBDev(input_info=(3, self.vid, self.pid))
+
+ def test_creation(self):
+ """
+ inject the USB dev through uhid and immediately see if there is a crash:
+
+ uhid can create a USB device with the BUS_USB bus, and some
+ drivers assume that they can then access USB related structures
+ when they are actually provided a uhid device. This leads to
+ a crash because those access result in a segmentation fault.
+
+ The kernel should not crash on any (random) user space correct
+ use of its API. So run through all available modules and declared
+ devices to see if we can generate a uhid device without a crash.
+
+ The test is empty as the fixture `check_taint` is doing the job (and
+ honestly, when the kernel crashes, the whole machine freezes).
+ """
+ assert True
diff --git a/tools/testing/selftests/hid/tests/test_wacom_generic.py b/tools/testing/selftests/hid/tests/test_wacom_generic.py
new file mode 100644
index 000000000000..b62c7dba6777
--- /dev/null
+++ b/tools/testing/selftests/hid/tests/test_wacom_generic.py
@@ -0,0 +1,1198 @@
+#!/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2017 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+# Copyright (c) 2017 Red Hat, Inc.
+# Copyright (c) 2020 Wacom Technology Corp.
+#
+# Authors:
+# Jason Gerecke <jason.gerecke@wacom.com>
+
+"""
+Tests for the Wacom driver generic codepath.
+
+This module tests the function of the Wacom driver's generic codepath.
+The generic codepath is used by devices which are not explicitly listed
+in the driver's device table. It uses the device's HID descriptor to
+decode reports sent by the device.
+"""
+
+from .descriptors_wacom import (
+ wacom_pth660_v145,
+ wacom_pth660_v150,
+ wacom_pth860_v145,
+ wacom_pth860_v150,
+ wacom_pth460_v105,
+)
+
+import attr
+from collections import namedtuple
+from enum import Enum
+from hidtools.hut import HUT
+from hidtools.hid import HidUnit
+from . import base
+from . import test_multitouch
+import libevdev
+import pytest
+
+import logging
+
+logger = logging.getLogger("hidtools.test.wacom")
+
+KERNEL_MODULE = ("wacom", "wacom")
+
+
+class ProximityState(Enum):
+ """
+ Enumeration of allowed proximity states.
+ """
+
+ # Tool is not able to be sensed by the device
+ OUT = 0
+
+ # Tool is close enough to be sensed, but some data may be invalid
+ # or inaccurate
+ IN_PROXIMITY = 1
+
+ # Tool is close enough to be sensed with high accuracy. All data
+ # valid.
+ IN_RANGE = 2
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.inrange = self in [ProximityState.IN_RANGE]
+ reportdata.wacomsense = self in [
+ ProximityState.IN_PROXIMITY,
+ ProximityState.IN_RANGE,
+ ]
+
+
+class ReportData:
+ """
+ Placeholder for HID report values.
+ """
+
+ pass
+
+
+@attr.s
+class Buttons:
+ """
+ Stylus button state.
+
+ Describes the state of each of the buttons / "side switches" that
+ may be present on a stylus. Buttons set to 'None' indicate the
+ state is "unchanged" since the previous event.
+ """
+
+ primary = attr.ib(default=None)
+ secondary = attr.ib(default=None)
+ tertiary = attr.ib(default=None)
+
+ @staticmethod
+ def clear():
+ """Button object with all states cleared."""
+ return Buttons(False, False, False)
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.barrelswitch = int(self.primary or 0)
+ reportdata.secondarybarrelswitch = int(self.secondary or 0)
+ reportdata.b3 = int(self.tertiary or 0)
+
+
+@attr.s
+class ToolID:
+ """
+ Stylus tool identifiers.
+
+ Contains values used to identify a specific stylus, e.g. its serial
+ number and tool-type identifier. Values of ``0`` may sometimes be
+ used for the out-of-range condition.
+ """
+
+ serial = attr.ib()
+ tooltype = attr.ib()
+
+ @staticmethod
+ def clear():
+ """ToolID object with all fields cleared."""
+ return ToolID(0, 0)
+
+ def fill(self, reportdata):
+ """Fill a report with approrpiate HID properties/values."""
+ reportdata.transducerserialnumber = self.serial & 0xFFFFFFFF
+ reportdata.serialhi = (self.serial >> 32) & 0xFFFFFFFF
+ reportdata.tooltype = self.tooltype
+
+
+@attr.s
+class PhysRange:
+ """
+ Range of HID physical values, with units.
+ """
+
+ unit = attr.ib()
+ min_size = attr.ib()
+ max_size = attr.ib()
+
+ CENTIMETER = HidUnit.from_string("SILinear: cm")
+ DEGREE = HidUnit.from_string("EnglishRotation: deg")
+
+ def contains(self, field):
+ """
+ Check if the physical size of the provided field is in range.
+
+ Compare the physical size described by the provided HID field
+ against the range of sizes described by this object. This is
+ an exclusive range comparison (e.g. 0 cm is not within the
+ range 0 cm - 5 cm) and exact unit comparison (e.g. 1 inch is
+ not within the range 0 cm - 5 cm).
+ """
+ phys_size = (field.physical_max - field.physical_min) * 10 ** (field.unit_exp)
+ return (
+ field.unit == self.unit.value
+ and phys_size > self.min_size
+ and phys_size < self.max_size
+ )
+
+
+class BaseTablet(base.UHIDTestDevice):
+ """
+ Skeleton object for all kinds of tablet devices.
+ """
+
+ def __init__(self, rdesc, name=None, info=None):
+ assert rdesc is not None
+ super().__init__(name, "Pen", input_info=info, rdesc=rdesc)
+ self.buttons = Buttons.clear()
+ self.toolid = ToolID.clear()
+ self.proximity = ProximityState.OUT
+ self.offset = 0
+ self.ring = -1
+ self.ek0 = False
+
+ def match_evdev_rule(self, application, evdev):
+ """
+ Filter out evdev nodes based on the requested application.
+
+ The Wacom driver may create several device nodes for each USB
+ interface device. It is crucial that we run tests with the
+ expected device node or things will obviously go off the rails.
+ Use the Wacom driver's usual naming conventions to apply a
+ sensible default filter.
+ """
+ if application in ["Pen", "Pad"]:
+ return evdev.name.endswith(application)
+ else:
+ return True
+
+ def create_report(
+ self, x, y, pressure, buttons=None, toolid=None, proximity=None, reportID=None
+ ):
+ """
+ Return an input report for this device.
+
+ :param x: absolute x
+ :param y: absolute y
+ :param pressure: pressure
+ :param buttons: stylus button state. Use ``None`` for unchanged.
+ :param toolid: tool identifiers. Use ``None`` for unchanged.
+ :param proximity: a ProximityState indicating the sensor's ability
+ to detect and report attributes of this tool. Use ``None``
+ for unchanged.
+ :param reportID: the numeric report ID for this report, if needed
+ """
+ if buttons is not None:
+ self.buttons = buttons
+ buttons = self.buttons
+
+ if toolid is not None:
+ self.toolid = toolid
+ toolid = self.toolid
+
+ if proximity is not None:
+ self.proximity = proximity
+ proximity = self.proximity
+
+ reportID = reportID or self.default_reportID
+
+ report = ReportData()
+ report.x = x
+ report.y = y
+ report.tippressure = pressure
+ report.tipswitch = pressure > 0
+ buttons.fill(report)
+ proximity.fill(report)
+ toolid.fill(report)
+
+ return super().create_report(report, reportID=reportID)
+
+ def create_report_heartbeat(self, reportID):
+ """
+ Return a heartbeat input report for this device.
+
+ Heartbeat reports generally contain battery status information,
+ among other things.
+ """
+ report = ReportData()
+ report.wacombatterycharging = 1
+ return super().create_report(report, reportID=reportID)
+
+ def create_report_pad(self, reportID, ring, ek0):
+ report = ReportData()
+
+ if ring is not None:
+ self.ring = ring
+ ring = self.ring
+
+ if ek0 is not None:
+ self.ek0 = ek0
+ ek0 = self.ek0
+
+ if ring >= 0:
+ report.wacomtouchring = ring
+ report.wacomtouchringstatus = 1
+ else:
+ report.wacomtouchring = 0x7F
+ report.wacomtouchringstatus = 0
+
+ report.wacomexpresskey00 = ek0
+ return super().create_report(report, reportID=reportID)
+
+ def event(self, x, y, pressure, buttons=None, toolid=None, proximity=None):
+ """
+ Send an input event on the default report ID.
+
+ :param x: absolute x
+ :param y: absolute y
+ :param buttons: stylus button state. Use ``None`` for unchanged.
+ :param toolid: tool identifiers. Use ``None`` for unchanged.
+ :param proximity: a ProximityState indicating the sensor's ability
+ to detect and report attributes of this tool. Use ``None``
+ for unchanged.
+ """
+ r = self.create_report(x, y, pressure, buttons, toolid, proximity)
+ self.call_input_event(r)
+ return [r]
+
+ def event_heartbeat(self, reportID):
+ """
+ Send a heartbeat event on the requested report ID.
+ """
+ r = self.create_report_heartbeat(reportID)
+ self.call_input_event(r)
+ return [r]
+
+ def event_pad(self, reportID, ring=None, ek0=None):
+ """
+ Send a pad event on the requested report ID.
+ """
+ r = self.create_report_pad(reportID, ring, ek0)
+ self.call_input_event(r)
+ return [r]
+
+ def get_report(self, req, rnum, rtype):
+ if rtype != self.UHID_FEATURE_REPORT:
+ return (1, [])
+
+ rdesc = None
+ for v in self.parsed_rdesc.feature_reports.values():
+ if v.report_ID == rnum:
+ rdesc = v
+
+ if rdesc is None:
+ return (1, [])
+
+ result = (1, [])
+ result = self.create_report_offset(rdesc) or result
+ return result
+
+ def create_report_offset(self, rdesc):
+ require = [
+ "Wacom Offset Left",
+ "Wacom Offset Top",
+ "Wacom Offset Right",
+ "Wacom Offset Bottom",
+ ]
+ if not set(require).issubset(set([f.usage_name for f in rdesc])):
+ return None
+
+ report = ReportData()
+ report.wacomoffsetleft = self.offset
+ report.wacomoffsettop = self.offset
+ report.wacomoffsetright = self.offset
+ report.wacomoffsetbottom = self.offset
+ r = rdesc.create_report([report], None)
+ return (0, r)
+
+
+class OpaqueTablet(BaseTablet):
+ """
+ Bare-bones opaque tablet with a minimum of features.
+
+ A tablet stripped down to its absolute core. It is capable of
+ reporting X/Y position and if the pen is in contact. No pressure,
+ no barrel switches, no eraser. Notably it *does* report an "In
+ Range" flag, but this is only because the Wacom driver expects
+ one to function properly. The device uses only standard HID usages,
+ not any of Wacom's vendor-defined pages.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x05, 0x0D, # . Usage Page (Digitizer),
+ 0x09, 0x01, # . Usage (Digitizer),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x01, # . Report ID (1),
+ 0x09, 0x20, # . Usage (Stylus),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (Tip Switch),
+ 0x09, 0x32, # . Usage (In Range),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x05, 0x01, # . Usage Page (Desktop),
+ 0x09, 0x30, # . Usage (X),
+ 0x27, 0x80, 0x3E, 0x00, 0x00, # . Logical Maximum (16000),
+ 0x47, 0x80, 0x3E, 0x00, 0x00, # . Physical Maximum (16000),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x31, # . Usage (Y),
+ 0x27, 0x28, 0x23, 0x00, 0x00, # . Logical Maximum (9000),
+ 0x47, 0x28, 0x23, 0x00, 0x00, # . Physical Maximum (9000),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0xC0, # . End Collection,
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, info=(0x3, 0x056A, 0x9999)):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 1
+
+
+class OpaqueCTLTablet(BaseTablet):
+ """
+ Opaque tablet similar to something in the CTL product line.
+
+ A pen-only tablet with most basic features you would expect from
+ an actual device. Position, eraser, pressure, barrel buttons.
+ Uses the Wacom vendor-defined usage page.
+ """
+
+ # fmt: off
+ report_descriptor = [
+ 0x06, 0x0D, 0xFF, # . Usage Page (Vnd Wacom Emr),
+ 0x09, 0x01, # . Usage (Digitizer),
+ 0xA1, 0x01, # . Collection (Application),
+ 0x85, 0x10, # . Report ID (16),
+ 0x09, 0x20, # . Usage (Stylus),
+ 0x35, 0x00, # . Physical Minimum (0),
+ 0x45, 0x00, # . Physical Maximum (0),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0xA1, 0x00, # . Collection (Physical),
+ 0x09, 0x42, # . Usage (Tip Switch),
+ 0x09, 0x44, # . Usage (Barrel Switch),
+ 0x09, 0x5A, # . Usage (Secondary Barrel Switch),
+ 0x09, 0x45, # . Usage (Eraser),
+ 0x09, 0x3C, # . Usage (Invert),
+ 0x09, 0x32, # . Usage (In Range),
+ 0x09, 0x36, # . Usage (In Proximity),
+ 0x25, 0x01, # . Logical Maximum (1),
+ 0x75, 0x01, # . Report Size (1),
+ 0x95, 0x07, # . Report Count (7),
+ 0x81, 0x02, # . Input (Variable),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x30, 0x01, # . Usage (X),
+ 0x65, 0x11, # . Unit (Centimeter),
+ 0x55, 0x0D, # . Unit Exponent (13),
+ 0x47, 0x80, 0x3E, 0x00, 0x00, # . Physical Maximum (16000),
+ 0x27, 0x80, 0x3E, 0x00, 0x00, # . Logical Maximum (16000),
+ 0x75, 0x18, # . Report Size (24),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x0A, 0x31, 0x01, # . Usage (Y),
+ 0x47, 0x28, 0x23, 0x00, 0x00, # . Physical Maximum (9000),
+ 0x27, 0x28, 0x23, 0x00, 0x00, # . Logical Maximum (9000),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x30, # . Usage (Tip Pressure),
+ 0x55, 0x00, # . Unit Exponent (0),
+ 0x65, 0x00, # . Unit,
+ 0x47, 0x00, 0x00, 0x00, 0x00, # . Physical Maximum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x81, 0x02, # . Input (Variable),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x06, # . Report Count (6),
+ 0x81, 0x03, # . Input (Constant, Variable),
+ 0x0A, 0x32, 0x01, # . Usage (Z),
+ 0x25, 0x3F, # . Logical Maximum (63),
+ 0x75, 0x08, # . Report Size (8),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x5B, # . Usage (Transducer Serial Number),
+ 0x09, 0x5C, # . Usage (Transducer Serial Number Hi),
+ 0x17, 0x00, 0x00, 0x00, 0x80, # . Logical Minimum (-2147483648),
+ 0x27, 0xFF, 0xFF, 0xFF, 0x7F, # . Logical Maximum (2147483647),
+ 0x75, 0x20, # . Report Size (32),
+ 0x95, 0x02, # . Report Count (2),
+ 0x81, 0x02, # . Input (Variable),
+ 0x09, 0x77, # . Usage (Tool Type),
+ 0x15, 0x00, # . Logical Minimum (0),
+ 0x26, 0xFF, 0x0F, # . Logical Maximum (4095),
+ 0x75, 0x10, # . Report Size (16),
+ 0x95, 0x01, # . Report Count (1),
+ 0x81, 0x02, # . Input (Variable),
+ 0xC0, # . End Collection,
+ 0xC0 # . End Collection
+ ]
+ # fmt: on
+
+ def __init__(self, rdesc=report_descriptor, name=None, info=(0x3, 0x056A, 0x9999)):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 16
+
+
+class PTHX60_Pen(BaseTablet):
+ """
+ Pen interface of a PTH-660 / PTH-860 / PTH-460 tablet.
+
+ This generation of devices are nearly identical to each other, though
+ the PTH-460 uses a slightly different descriptor construction (splits
+ the pad among several physical collections)
+ """
+
+ def __init__(self, rdesc=None, name=None, info=None):
+ super().__init__(rdesc, name, info)
+ self.default_reportID = 16
+
+
+class BaseTest:
+ class TestTablet(base.BaseTestCase.TestUhid):
+ kernel_modules = [KERNEL_MODULE]
+
+ def sync_and_assert_events(
+ self, report, expected_events, auto_syn=True, strict=False
+ ):
+ """
+ Assert we see the expected events in response to a report.
+ """
+ uhdev = self.uhdev
+ syn_event = self.syn_event
+ if auto_syn:
+ expected_events.append(syn_event)
+ actual_events = uhdev.next_sync_events()
+ self.debug_reports(report, uhdev, actual_events)
+ if strict:
+ self.assertInputEvents(expected_events, actual_events)
+ else:
+ self.assertInputEventsIn(expected_events, actual_events)
+
+ def get_usages(self, uhdev):
+ def get_report_usages(report):
+ application = report.application
+ for field in report.fields:
+ if field.usages is not None:
+ for usage in field.usages:
+ yield (field, usage, application)
+ else:
+ yield (field, field.usage, application)
+
+ desc = uhdev.parsed_rdesc
+ reports = [
+ *desc.input_reports.values(),
+ *desc.feature_reports.values(),
+ *desc.output_reports.values(),
+ ]
+ for report in reports:
+ for usage in get_report_usages(report):
+ yield usage
+
+ def assertName(self, uhdev, type):
+ """
+ Assert that the name is as we expect.
+
+ The Wacom driver applies a number of decorations to the name
+ provided by the hardware. We cannot rely on the definition of
+ this assertion from the base class to work properly.
+ """
+ evdev = uhdev.get_evdev()
+ expected_name = uhdev.name + type
+ if "wacom" not in expected_name.lower():
+ expected_name = "Wacom " + expected_name
+ assert evdev.name == expected_name
+
+ def test_descriptor_physicals(self):
+ """
+ Verify that all HID usages which should have a physical range
+ actually do, and those which shouldn't don't. Also verify that
+ the associated unit is correct and within a sensible range.
+ """
+
+ def usage_id(page_name, usage_name):
+ page = HUT.usage_page_from_name(page_name)
+ return (page.page_id << 16) | page[usage_name].usage
+
+ required = {
+ usage_id("Generic Desktop", "X"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Generic Desktop", "Y"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "Width"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "Height"): PhysRange(
+ PhysRange.CENTIMETER, 5, 150
+ ),
+ usage_id("Digitizers", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Digitizers", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Digitizers", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360),
+ usage_id("Wacom", "X Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Wacom", "Y Tilt"): PhysRange(PhysRange.DEGREE, 90, 180),
+ usage_id("Wacom", "Twist"): PhysRange(PhysRange.DEGREE, 358, 360),
+ usage_id("Wacom", "X"): PhysRange(PhysRange.CENTIMETER, 5, 150),
+ usage_id("Wacom", "Y"): PhysRange(PhysRange.CENTIMETER, 5, 150),
+ usage_id("Wacom", "Wacom TouchRing"): PhysRange(
+ PhysRange.DEGREE, 358, 360
+ ),
+ usage_id("Wacom", "Wacom Offset Left"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Top"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Right"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ usage_id("Wacom", "Wacom Offset Bottom"): PhysRange(
+ PhysRange.CENTIMETER, 0, 0.5
+ ),
+ }
+ for field, usage, application in self.get_usages(self.uhdev):
+ if application == usage_id("Generic Desktop", "Mouse"):
+ # Ignore the vestigial Mouse collection which exists
+ # on Wacom tablets only for backwards compatibility.
+ continue
+
+ expect_physical = usage in required
+
+ phys_set = field.physical_min != 0 or field.physical_max != 0
+ assert phys_set == expect_physical
+
+ unit_set = field.unit != 0
+ assert unit_set == expect_physical
+
+ if unit_set:
+ assert required[usage].contains(field)
+
+ def test_prop_direct(self):
+ """
+ Todo: Verify that INPUT_PROP_DIRECT is set on display devices.
+ """
+ pass
+
+ def test_prop_pointer(self):
+ """
+ Todo: Verify that INPUT_PROP_POINTER is set on opaque devices.
+ """
+ pass
+
+
+class PenTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Pen")
+
+
+class TouchTabletTest(BaseTest.TestTablet):
+ def assertName(self, uhdev):
+ super().assertName(uhdev, " Finger")
+
+
+class TestOpaqueTablet(PenTabletTest):
+ def create_device(self):
+ return OpaqueTablet()
+
+ def test_sanity(self):
+ """
+ Bring a pen into contact with the tablet, then remove it.
+
+ Ensure that we get the basic tool/touch/motion events that should
+ be sent by the driver.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=300,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=1),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(110, 220, pressure=0),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 110),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 220),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 120,
+ 230,
+ pressure=0,
+ toolid=ToolID.clear(),
+ proximity=ProximityState.OUT,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 0),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(130, 240, pressure=0), [], auto_syn=False, strict=True
+ )
+
+
+class TestOpaqueCTLTablet(TestOpaqueTablet):
+ def create_device(self):
+ return OpaqueCTLTablet()
+
+ def test_buttons(self):
+ """
+ Test that the barrel buttons (side switches) work as expected.
+
+ Press and release each button individually to verify that we get
+ the expected events.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=0,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=1),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(primary=True)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 1),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(primary=False)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS, 0),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(secondary=True)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2, 1),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(100, 200, pressure=0, buttons=Buttons(secondary=False)),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_STYLUS2, 0),
+ libevdev.InputEvent(libevdev.EV_MSC.MSC_SERIAL, 1),
+ ],
+ )
+
+
+PTHX60_Devices = [
+ {"rdesc": wacom_pth660_v145, "info": (0x3, 0x056A, 0x0357)},
+ {"rdesc": wacom_pth660_v150, "info": (0x3, 0x056A, 0x0357)},
+ {"rdesc": wacom_pth860_v145, "info": (0x3, 0x056A, 0x0358)},
+ {"rdesc": wacom_pth860_v150, "info": (0x3, 0x056A, 0x0358)},
+ {"rdesc": wacom_pth460_v105, "info": (0x3, 0x056A, 0x0392)},
+]
+
+PTHX60_Names = [
+ "PTH-660/v145",
+ "PTH-660/v150",
+ "PTH-860/v145",
+ "PTH-860/v150",
+ "PTH-460/v105",
+]
+
+
+class TestPTHX60_Pen(TestOpaqueCTLTablet):
+ @pytest.fixture(
+ autouse=True, scope="class", params=PTHX60_Devices, ids=PTHX60_Names
+ )
+ def set_device_params(self, request):
+ request.cls.device_params = request.param
+
+ def create_device(self):
+ return PTHX60_Pen(**self.device_params)
+
+ @pytest.mark.xfail
+ def test_descriptor_physicals(self):
+ # XFAIL: Various documented errata
+ super().test_descriptor_physicals()
+
+ def test_heartbeat_spurious(self):
+ """
+ Test that the heartbeat report does not send spurious events.
+ """
+ uhdev = self.uhdev
+
+ self.sync_and_assert_events(
+ uhdev.event(
+ 100,
+ 200,
+ pressure=300,
+ buttons=Buttons.clear(),
+ toolid=ToolID(serial=1, tooltype=0x822),
+ proximity=ProximityState.IN_RANGE,
+ ),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOOL_PEN, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 100),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_Y, 200),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1),
+ ],
+ )
+
+ # Exactly zero events: not even a SYN
+ self.sync_and_assert_events(
+ uhdev.event_heartbeat(19), [], auto_syn=False, strict=True
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event(110, 200, pressure=300),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_X, 110),
+ ],
+ )
+
+ def test_empty_pad_sync(self):
+ self.empty_pad_sync(num=3, denom=16, reverse=True)
+
+ def empty_pad_sync(self, num, denom, reverse):
+ """
+ Test that multiple pad collections do not trigger empty syncs.
+ """
+
+ def offset_rotation(value):
+ """
+ Offset touchring rotation values by the same factor as the
+ Linux kernel. Tablets historically don't use the same origin
+ as HID, and it sometimes changes from tablet to tablet...
+ """
+ evdev = self.uhdev.get_evdev()
+ info = evdev.absinfo[libevdev.EV_ABS.ABS_WHEEL]
+ delta = info.maximum - info.minimum + 1
+ if reverse:
+ value = info.maximum - value
+ value += num * delta // denom
+ if value > info.maximum:
+ value -= delta
+ elif value < info.minimum:
+ value += delta
+ return value
+
+ uhdev = self.uhdev
+ uhdev.application = "Pad"
+ evdev = uhdev.get_evdev()
+
+ print(evdev.name)
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=0, ek0=1),
+ [
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 1),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(0)),
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_MISC, 15),
+ ],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=1, ek0=1),
+ [libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(1))],
+ )
+
+ self.sync_and_assert_events(
+ uhdev.event_pad(reportID=17, ring=2, ek0=0),
+ [
+ libevdev.InputEvent(libevdev.EV_ABS.ABS_WHEEL, offset_rotation(2)),
+ libevdev.InputEvent(libevdev.EV_KEY.BTN_0, 0),
+ ],
+ )
+
+
+class TestDTH2452Tablet(test_multitouch.BaseTest.TestMultitouch, TouchTabletTest):
+ ContactIds = namedtuple("ContactIds", "contact_id, tracking_id, slot_num")
+
+ def create_device(self):
+ return test_multitouch.Digitizer(
+ "DTH 2452",
+ rdesc="05 0d 09 04 a1 01 85 0c 95 01 75 08 15 00 26 ff 00 81 03 09 54 81 02 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 09 22 a1 02 05 0d 95 01 75 01 25 01 09 42 81 02 81 03 09 47 81 02 95 05 81 03 09 51 26 ff 00 75 10 95 01 81 02 35 00 65 11 55 0e 05 01 09 30 26 a0 44 46 96 14 81 42 09 31 26 9a 26 46 95 0b 81 42 05 0d 75 08 95 01 15 00 09 48 26 5f 00 46 7c 14 81 02 09 49 25 35 46 7d 0b 81 02 45 00 65 00 55 00 c0 05 0d 27 ff ff 00 00 75 10 95 01 09 56 81 02 75 08 95 0e 81 03 09 55 26 ff 00 75 08 b1 02 85 0a 06 00 ff 09 c5 96 00 01 b1 02 c0 06 00 ff 09 01 a1 01 09 01 85 13 15 00 26 ff 00 75 08 95 3f 81 02 06 00 ff 09 01 15 00 26 ff 00 75 08 95 3f 91 02 c0",
+ input_info=(0x3, 0x056A, 0x0383),
+ )
+
+ def make_contact(self, contact_id=0, t=0):
+ """
+ Make a single touch contact that can move over time.
+
+ Creates a touch object that has a well-known position in space that
+ does not overlap with other contacts. The value of `t` may be
+ incremented over time to move the point along a linear path.
+ """
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
+ return test_multitouch.Touch(contact_id, x, y)
+
+ def make_contacts(self, n, t=0):
+ """
+ Make multiple touch contacts that can move over time.
+
+ Returns a list of `n` touch objects that are positioned at well-known
+ locations. The value of `t` may be incremented over time to move the
+ points along a linear path.
+ """
+ return [ self.make_contact(id, t) for id in range(0, n) ]
+
+ def assert_contact(self, uhdev, evdev, contact_ids, t=0):
+ """
+ Assert properties of a contact generated by make_contact.
+ """
+ contact_id = contact_ids.contact_id
+ tracking_id = contact_ids.tracking_id
+ slot_num = contact_ids.slot_num
+
+ x = 50 + 10 * contact_id + t * 11
+ y = 100 + 100 * contact_id + t * 11
+
+ # If the data isn't supposed to be stored in any slots, there is
+ # nothing we can check for in the evdev stream.
+ if slot_num is None:
+ assert tracking_id == -1
+ return
+
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == tracking_id
+ if tracking_id != -1:
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_X] == x
+ assert evdev.slots[slot_num][libevdev.EV_ABS.ABS_MT_POSITION_Y] == y
+
+ def assert_contacts(self, uhdev, evdev, data, t=0):
+ """
+ Assert properties of a list of contacts generated by make_contacts.
+ """
+ for contact_ids in data:
+ self.assert_contact(uhdev, evdev, contact_ids, t)
+
+ def test_contact_id_0(self):
+ """
+ Bring a finger in contact with the tablet, then hold it down and remove it.
+
+ Ensure that even with contact ID = 0 which is usually given as an invalid
+ touch event by most tablets with the exception of a few, that given the
+ confidence bit is set to 1 it should process it as a valid touch to cover
+ the few tablets using contact ID = 0 as a valid touch value.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(0, 50, 100)
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ slot = self.get_slot(uhdev, t0, 0)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == 0
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_X] == 50
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_POSITION_Y] == 100
+
+ t0.tipswitch = False
+ if uhdev.quirks is None or "VALID_IS_INRANGE" not in uhdev.quirks:
+ t0.inrange = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 0) in events
+ assert evdev.slots[slot][libevdev.EV_ABS.ABS_MT_TRACKING_ID] == -1
+
+ def test_confidence_false(self):
+ """
+ Bring a finger in contact with the tablet with confidence set to false.
+
+ Ensure that the confidence bit being set to false should not result in a touch event.
+ """
+ uhdev = self.uhdev
+ _evdev = uhdev.get_evdev()
+
+ t0 = test_multitouch.Touch(1, 50, 100)
+ t0.confidence = False
+ r = uhdev.event([t0])
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ _slot = self.get_slot(uhdev, t0, 0)
+
+ assert not events
+
+ def test_confidence_multitouch(self):
+ """
+ Bring multiple fingers in contact with the tablet, some with the
+ confidence bit set, and some without.
+
+ Ensure that all confident touches are reported and that all non-
+ confident touches are ignored.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ touches = self.make_contacts(5)
+ touches[0].confidence = False
+ touches[2].confidence = False
+ touches[4].confidence = False
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ assert libevdev.InputEvent(libevdev.EV_KEY.BTN_TOUCH, 1) in events
+
+ self.assert_contacts(uhdev, evdev,
+ [ self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None),
+ self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0),
+ self.ContactIds(contact_id = 2, tracking_id = -1, slot_num = None),
+ self.ContactIds(contact_id = 3, tracking_id = 1, slot_num = 1),
+ self.ContactIds(contact_id = 4, tracking_id = -1, slot_num = None) ])
+
+ def confidence_change_assert_playback(self, uhdev, evdev, timeline):
+ """
+ Assert proper behavior of contacts that move and change tipswitch /
+ confidence status over time.
+
+ Given a `timeline` list of touch states to iterate over, verify
+ that the contacts move and are reported as up/down as expected
+ by the state of the tipswitch and confidence bits.
+ """
+ t = 0
+
+ for state in timeline:
+ touches = self.make_contacts(len(state), t)
+
+ for item in zip(touches, state):
+ item[0].tipswitch = item[1][1]
+ item[0].confidence = item[1][2]
+
+ r = uhdev.event(touches)
+ events = uhdev.next_sync_events()
+ self.debug_reports(r, uhdev, events)
+
+ ids = [ x[0] for x in state ]
+ self.assert_contacts(uhdev, evdev, ids, t)
+
+ t += 1
+
+ def test_confidence_loss_a(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ first clearing the tipswitch.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally. This mode of confidence loss is used by the
+ DTH-2452.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(uhdev, evdev, [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=1: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the tipswitch flag
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
+ ])
+
+ def test_confidence_loss_b(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ cleraing both tipswitch and confidence bits simultaneously.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally. This mode of confidence loss is used by some
+ AES devices.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(uhdev, evdev, [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=1: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and has both flags cleared simultaneously
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
+ ])
+
+ def test_confidence_loss_c(self):
+ """
+ Transition a confident contact to a non-confident contact by
+ clearing only the confidence bit.
+
+ Ensure that the driver reports the transitioned contact as
+ being removed and that other contacts continue to report
+ normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(uhdev, evdev, [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # Both fingers confidently in contact
+ [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger looses confidence and clears only the confidence flag
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=2: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=3: Contact 0 == !Down + !confident; Contact 1 == Down + confident
+ # First finger has lost confidence and has both flags cleared
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
+ ])
+
+ def test_confidence_gain_a(self):
+ """
+ Transition a contact that was always non-confident to confident.
+
+ Ensure that the confident contact is reported normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(uhdev, evdev, [
+ # t=0: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Only second finger is confidently in contact
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
+
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = None), True, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
+
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)],
+
+ # t=3: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger remains confident
+ [(self.ContactIds(contact_id = 0, tracking_id = 1, slot_num = 1), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 0, slot_num = 0), True, True)]
+ ])
+
+ def test_confidence_gain_b(self):
+ """
+ Transition a contact from non-confident to confident.
+
+ Ensure that the confident contact is reported normally.
+ """
+ uhdev = self.uhdev
+ evdev = uhdev.get_evdev()
+
+ self.confidence_change_assert_playback(uhdev, evdev, [
+ # t=0: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First and second finger confidently in contact
+ [(self.ContactIds(contact_id = 0, tracking_id = 0, slot_num = 0), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=1: Contact 0 == Down + !confident; Contact 1 == Down + confident
+ # Firtst finger looses confidence
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), True, False),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=2: Contact 0 == Down + confident; Contact 1 == Down + confident
+ # First finger gains confidence
+ [(self.ContactIds(contact_id = 0, tracking_id = 2, slot_num = 0), True, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)],
+
+ # t=3: Contact 0 == !Down + confident; Contact 1 == Down + confident
+ # First finger goes up
+ [(self.ContactIds(contact_id = 0, tracking_id = -1, slot_num = 0), False, True),
+ (self.ContactIds(contact_id = 1, tracking_id = 1, slot_num = 1), True, True)]
+ ])
diff --git a/tools/testing/selftests/hid/vmtest.sh b/tools/testing/selftests/hid/vmtest.sh
new file mode 100755
index 000000000000..db534e9099a8
--- /dev/null
+++ b/tools/testing/selftests/hid/vmtest.sh
@@ -0,0 +1,296 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+set -u
+set -e
+
+# This script currently only works for x86_64
+ARCH="$(uname -m)"
+case "${ARCH}" in
+x86_64)
+ QEMU_BINARY=qemu-system-x86_64
+ BZIMAGE="arch/x86/boot/bzImage"
+ ;;
+*)
+ echo "Unsupported architecture"
+ exit 1
+ ;;
+esac
+SCRIPT_DIR="$(dirname $(realpath $0))"
+OUTPUT_DIR="$SCRIPT_DIR/results"
+KCONFIG_REL_PATHS=("${SCRIPT_DIR}/config" "${SCRIPT_DIR}/config.common" "${SCRIPT_DIR}/config.${ARCH}")
+B2C_URL="https://gitlab.freedesktop.org/gfx-ci/boot2container/-/raw/main/vm2c.py"
+NUM_COMPILE_JOBS="$(nproc)"
+LOG_FILE_BASE="$(date +"hid_selftests.%Y-%m-%d_%H-%M-%S")"
+LOG_FILE="${LOG_FILE_BASE}.log"
+EXIT_STATUS_FILE="${LOG_FILE_BASE}.exit_status"
+CONTAINER_IMAGE="registry.freedesktop.org/bentiss/hid/fedora/39:2023-11-22.1"
+
+TARGETS="${TARGETS:=$(basename ${SCRIPT_DIR})}"
+DEFAULT_COMMAND="pip3 install hid-tools; make -C tools/testing/selftests TARGETS=${TARGETS} run_tests"
+
+usage()
+{
+ cat <<EOF
+Usage: $0 [-j N] [-s] [-b] [-d <output_dir>] -- [<command>]
+
+<command> is the command you would normally run when you are in
+the source kernel direcory. e.g:
+
+ $0 -- ./tools/testing/selftests/hid/hid_bpf
+
+If no command is specified and a debug shell (-s) is not requested,
+"${DEFAULT_COMMAND}" will be run by default.
+
+If you build your kernel using KBUILD_OUTPUT= or O= options, these
+can be passed as environment variables to the script:
+
+ O=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
+
+or
+
+ KBUILD_OUTPUT=<kernel_build_path> $0 -- ./tools/testing/selftests/hid/hid_bpf
+
+Options:
+
+ -u) Update the boot2container script to a newer version.
+ -d) Update the output directory (default: ${OUTPUT_DIR})
+ -b) Run only the build steps for the kernel and the selftests
+ -j) Number of jobs for compilation, similar to -j in make
+ (default: ${NUM_COMPILE_JOBS})
+ -s) Instead of powering off the VM, start an interactive
+ shell. If <command> is specified, the shell runs after
+ the command finishes executing
+EOF
+}
+
+download()
+{
+ local file="$1"
+
+ echo "Downloading $file..." >&2
+ curl -Lsf "$file" -o "${@:2}"
+}
+
+recompile_kernel()
+{
+ local kernel_checkout="$1"
+ local make_command="$2"
+
+ cd "${kernel_checkout}"
+
+ ${make_command} olddefconfig
+ ${make_command} headers
+ ${make_command}
+}
+
+update_selftests()
+{
+ local kernel_checkout="$1"
+ local selftests_dir="${kernel_checkout}/tools/testing/selftests/hid"
+
+ cd "${selftests_dir}"
+ ${make_command}
+}
+
+run_vm()
+{
+ local run_dir="$1"
+ local b2c="$2"
+ local kernel_bzimage="$3"
+ local command="$4"
+ local post_command=""
+
+ cd "${run_dir}"
+
+ if ! which "${QEMU_BINARY}" &> /dev/null; then
+ cat <<EOF
+Could not find ${QEMU_BINARY}
+Please install qemu or set the QEMU_BINARY environment variable.
+EOF
+ exit 1
+ fi
+
+ # alpine (used in post-container requires the PATH to have /bin
+ export PATH=$PATH:/bin
+
+ if [[ "${debug_shell}" != "yes" ]]
+ then
+ touch ${OUTPUT_DIR}/${LOG_FILE}
+ command="mount bpffs -t bpf /sys/fs/bpf/; set -o pipefail ; ${command} 2>&1 | tee ${OUTPUT_DIR}/${LOG_FILE}"
+ post_command="cat ${OUTPUT_DIR}/${LOG_FILE}"
+ else
+ command="mount bpffs -t bpf /sys/fs/bpf/; ${command}"
+ fi
+
+ set +e
+ $b2c --command "${command}" \
+ --kernel ${kernel_bzimage} \
+ --workdir ${OUTPUT_DIR} \
+ --image ${CONTAINER_IMAGE}
+
+ echo $? > ${OUTPUT_DIR}/${EXIT_STATUS_FILE}
+
+ set -e
+
+ ${post_command}
+}
+
+is_rel_path()
+{
+ local path="$1"
+
+ [[ ${path:0:1} != "/" ]]
+}
+
+do_update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ rm -f "$kconfig_file" 2> /dev/null
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${config}"
+ cat "$kconfig_src" >> "$kconfig_file"
+ done
+}
+
+update_kconfig()
+{
+ local kernel_checkout="$1"
+ local kconfig_file="$2"
+
+ if [[ -f "${kconfig_file}" ]]; then
+ local local_modified="$(stat -c %Y "${kconfig_file}")"
+
+ for config in "${KCONFIG_REL_PATHS[@]}"; do
+ local kconfig_src="${config}"
+ local src_modified="$(stat -c %Y "${kconfig_src}")"
+ # Only update the config if it has been updated after the
+ # previously cached config was created. This avoids
+ # unnecessarily compiling the kernel and selftests.
+ if [[ "${src_modified}" -gt "${local_modified}" ]]; then
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ # Once we have found one outdated configuration
+ # there is no need to check other ones.
+ break
+ fi
+ done
+ else
+ do_update_kconfig "$kernel_checkout" "$kconfig_file"
+ fi
+}
+
+main()
+{
+ local script_dir="$(cd -P -- "$(dirname -- "${BASH_SOURCE[0]}")" && pwd -P)"
+ local kernel_checkout=$(realpath "${script_dir}"/../../../../)
+ # By default the script searches for the kernel in the checkout directory but
+ # it also obeys environment variables O= and KBUILD_OUTPUT=
+ local kernel_bzimage="${kernel_checkout}/${BZIMAGE}"
+ local command="${DEFAULT_COMMAND}"
+ local update_b2c="no"
+ local debug_shell="no"
+ local build_only="no"
+
+ while getopts ':hsud:j:b' opt; do
+ case ${opt} in
+ u)
+ update_b2c="yes"
+ ;;
+ d)
+ OUTPUT_DIR="$OPTARG"
+ ;;
+ j)
+ NUM_COMPILE_JOBS="$OPTARG"
+ ;;
+ s)
+ command="/bin/sh"
+ debug_shell="yes"
+ ;;
+ b)
+ build_only="yes"
+ ;;
+ h)
+ usage
+ exit 0
+ ;;
+ \? )
+ echo "Invalid Option: -$OPTARG"
+ usage
+ exit 1
+ ;;
+ : )
+ echo "Invalid Option: -$OPTARG requires an argument"
+ usage
+ exit 1
+ ;;
+ esac
+ done
+ shift $((OPTIND -1))
+
+ # trap 'catch "$?"' EXIT
+ if [[ "${build_only}" == "no" && "${debug_shell}" == "no" ]]; then
+ if [[ $# -eq 0 ]]; then
+ echo "No command specified, will run ${DEFAULT_COMMAND} in the vm"
+ else
+ command="$@"
+
+ if [[ "${command}" == "/bin/bash" || "${command}" == "bash" ]]
+ then
+ debug_shell="yes"
+ fi
+ fi
+ fi
+
+ local kconfig_file="${OUTPUT_DIR}/latest.config"
+ local make_command="make -j ${NUM_COMPILE_JOBS} KCONFIG_CONFIG=${kconfig_file}"
+
+ # Figure out where the kernel is being built.
+ # O takes precedence over KBUILD_OUTPUT.
+ if [[ "${O:=""}" != "" ]]; then
+ if is_rel_path "${O}"; then
+ O="$(realpath "${PWD}/${O}")"
+ fi
+ kernel_bzimage="${O}/${BZIMAGE}"
+ make_command="${make_command} O=${O}"
+ elif [[ "${KBUILD_OUTPUT:=""}" != "" ]]; then
+ if is_rel_path "${KBUILD_OUTPUT}"; then
+ KBUILD_OUTPUT="$(realpath "${PWD}/${KBUILD_OUTPUT}")"
+ fi
+ kernel_bzimage="${KBUILD_OUTPUT}/${BZIMAGE}"
+ make_command="${make_command} KBUILD_OUTPUT=${KBUILD_OUTPUT}"
+ fi
+
+ local b2c="${OUTPUT_DIR}/vm2c.py"
+
+ echo "Output directory: ${OUTPUT_DIR}"
+
+ mkdir -p "${OUTPUT_DIR}"
+ update_kconfig "${kernel_checkout}" "${kconfig_file}"
+
+ recompile_kernel "${kernel_checkout}" "${make_command}"
+ update_selftests "${kernel_checkout}" "${make_command}"
+
+ if [[ "${build_only}" == "no" ]]; then
+ if [[ "${update_b2c}" == "no" && ! -f "${b2c}" ]]; then
+ echo "vm2c script not found in ${b2c}"
+ update_b2c="yes"
+ fi
+
+ if [[ "${update_b2c}" == "yes" ]]; then
+ download $B2C_URL $b2c
+ chmod +x $b2c
+ fi
+
+ run_vm "${kernel_checkout}" $b2c "${kernel_bzimage}" "${command}"
+ if [[ "${debug_shell}" != "yes" ]]; then
+ echo "Logs saved in ${OUTPUT_DIR}/${LOG_FILE}"
+ fi
+
+ exit $(cat ${OUTPUT_DIR}/${EXIT_STATUS_FILE})
+ fi
+}
+
+main "$@"
diff --git a/tools/testing/selftests/intel_pstate/Makefile b/tools/testing/selftests/intel_pstate/Makefile
index 39f0fa2a8fd6..05d66ef50c97 100644
--- a/tools/testing/selftests/intel_pstate/Makefile
+++ b/tools/testing/selftests/intel_pstate/Makefile
@@ -2,10 +2,10 @@
CFLAGS := $(CFLAGS) -Wall -D_GNU_SOURCE
LDLIBS += -lm
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq (x86,$(ARCH))
+ifeq (x86,$(ARCH_PROCESSED))
TEST_GEN_FILES := msr aperf
endif
diff --git a/tools/testing/selftests/intel_pstate/aperf.c b/tools/testing/selftests/intel_pstate/aperf.c
index f6cd03a87493..a8acf3996973 100644
--- a/tools/testing/selftests/intel_pstate/aperf.c
+++ b/tools/testing/selftests/intel_pstate/aperf.c
@@ -10,8 +10,12 @@
#include <sched.h>
#include <errno.h>
#include <string.h>
+#include <time.h>
#include "../kselftest.h"
+#define MSEC_PER_SEC 1000L
+#define NSEC_PER_MSEC 1000000L
+
void usage(char *name) {
printf ("Usage: %s cpunum\n", name);
}
@@ -22,7 +26,7 @@ int main(int argc, char **argv) {
long long tsc, old_tsc, new_tsc;
long long aperf, old_aperf, new_aperf;
long long mperf, old_mperf, new_mperf;
- struct timeb before, after;
+ struct timespec before, after;
long long int start, finish, total;
cpu_set_t cpuset;
@@ -55,7 +59,10 @@ int main(int argc, char **argv) {
return 1;
}
- ftime(&before);
+ if (clock_gettime(CLOCK_MONOTONIC, &before) < 0) {
+ perror("clock_gettime");
+ return 1;
+ }
pread(fd, &old_tsc, sizeof(old_tsc), 0x10);
pread(fd, &old_aperf, sizeof(old_mperf), 0xe7);
pread(fd, &old_mperf, sizeof(old_aperf), 0xe8);
@@ -64,7 +71,10 @@ int main(int argc, char **argv) {
sqrt(i);
}
- ftime(&after);
+ if (clock_gettime(CLOCK_MONOTONIC, &after) < 0) {
+ perror("clock_gettime");
+ return 1;
+ }
pread(fd, &new_tsc, sizeof(new_tsc), 0x10);
pread(fd, &new_aperf, sizeof(new_mperf), 0xe7);
pread(fd, &new_mperf, sizeof(new_aperf), 0xe8);
@@ -73,11 +83,11 @@ int main(int argc, char **argv) {
aperf = new_aperf-old_aperf;
mperf = new_mperf-old_mperf;
- start = before.time*1000 + before.millitm;
- finish = after.time*1000 + after.millitm;
+ start = before.tv_sec*MSEC_PER_SEC + before.tv_nsec/NSEC_PER_MSEC;
+ finish = after.tv_sec*MSEC_PER_SEC + after.tv_nsec/NSEC_PER_MSEC;
total = finish - start;
- printf("runTime: %4.2f\n", 1.0*total/1000);
+ printf("runTime: %4.2f\n", 1.0*total/MSEC_PER_SEC);
printf("freq: %7.0f\n", tsc / (1.0*aperf / (1.0 * mperf)) / total);
return 0;
}
diff --git a/tools/testing/selftests/iommu/.gitignore b/tools/testing/selftests/iommu/.gitignore
new file mode 100644
index 000000000000..7d0703049eba
--- /dev/null
+++ b/tools/testing/selftests/iommu/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+/iommufd
+/iommufd_fail_nth
diff --git a/tools/testing/selftests/iommu/Makefile b/tools/testing/selftests/iommu/Makefile
new file mode 100644
index 000000000000..32c5fdfd0eef
--- /dev/null
+++ b/tools/testing/selftests/iommu/Makefile
@@ -0,0 +1,11 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS += -Wall -O2 -Wno-unused-function
+CFLAGS += $(KHDR_INCLUDES)
+
+CFLAGS += -D_GNU_SOURCE
+
+TEST_GEN_PROGS :=
+TEST_GEN_PROGS += iommufd
+TEST_GEN_PROGS += iommufd_fail_nth
+
+include ../lib.mk
diff --git a/tools/testing/selftests/iommu/config b/tools/testing/selftests/iommu/config
new file mode 100644
index 000000000000..110d73917615
--- /dev/null
+++ b/tools/testing/selftests/iommu/config
@@ -0,0 +1,3 @@
+CONFIG_IOMMUFD=y
+CONFIG_FAULT_INJECTION=y
+CONFIG_IOMMUFD_TEST=y
diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c
new file mode 100644
index 000000000000..edf1c99c9936
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd.c
@@ -0,0 +1,2349 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/eventfd.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static unsigned long HUGEPAGE_SIZE;
+
+#define MOCK_PAGE_SIZE (PAGE_SIZE / 2)
+#define MOCK_HUGE_PAGE_SIZE (512 * MOCK_PAGE_SIZE)
+
+static unsigned long get_huge_page_size(void)
+{
+ char buf[80];
+ int ret;
+ int fd;
+
+ fd = open("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size",
+ O_RDONLY);
+ if (fd < 0)
+ return 2 * 1024 * 1024;
+
+ ret = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (ret <= 0 || ret == sizeof(buf))
+ return 2 * 1024 * 1024;
+ buf[ret] = 0;
+ return strtoul(buf, NULL, 10);
+}
+
+static __attribute__((constructor)) void setup_sizes(void)
+{
+ void *vrc;
+ int rc;
+
+ PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+ HUGEPAGE_SIZE = get_huge_page_size();
+
+ BUFFER_SIZE = PAGE_SIZE * 16;
+ rc = posix_memalign(&buffer, HUGEPAGE_SIZE, BUFFER_SIZE);
+ assert(!rc);
+ assert(buffer);
+ assert((uintptr_t)buffer % HUGEPAGE_SIZE == 0);
+ vrc = mmap(buffer, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED, -1, 0);
+ assert(vrc == buffer);
+}
+
+FIXTURE(iommufd)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(iommufd)
+{
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+}
+
+FIXTURE_TEARDOWN(iommufd)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+TEST_F(iommufd, simple_close)
+{
+}
+
+TEST_F(iommufd, cmd_fail)
+{
+ struct iommu_destroy cmd = { .size = sizeof(cmd), .id = 0 };
+
+ /* object id is invalid */
+ EXPECT_ERRNO(ENOENT, _test_ioctl_destroy(self->fd, 0));
+ /* Bad pointer */
+ EXPECT_ERRNO(EFAULT, ioctl(self->fd, IOMMU_DESTROY, NULL));
+ /* Unknown ioctl */
+ EXPECT_ERRNO(ENOTTY,
+ ioctl(self->fd, _IO(IOMMUFD_TYPE, IOMMUFD_CMD_BASE - 1),
+ &cmd));
+}
+
+TEST_F(iommufd, cmd_length)
+{
+#define TEST_LENGTH(_struct, _ioctl, _last) \
+ { \
+ size_t min_size = offsetofend(struct _struct, _last); \
+ struct { \
+ struct _struct cmd; \
+ uint8_t extra; \
+ } cmd = { .cmd = { .size = min_size - 1 }, \
+ .extra = UINT8_MAX }; \
+ int old_errno; \
+ int rc; \
+ \
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, _ioctl, &cmd)); \
+ cmd.cmd.size = sizeof(struct _struct) + 1; \
+ EXPECT_ERRNO(E2BIG, ioctl(self->fd, _ioctl, &cmd)); \
+ cmd.cmd.size = sizeof(struct _struct); \
+ rc = ioctl(self->fd, _ioctl, &cmd); \
+ old_errno = errno; \
+ cmd.cmd.size = sizeof(struct _struct) + 1; \
+ cmd.extra = 0; \
+ if (rc) { \
+ EXPECT_ERRNO(old_errno, \
+ ioctl(self->fd, _ioctl, &cmd)); \
+ } else { \
+ ASSERT_EQ(0, ioctl(self->fd, _ioctl, &cmd)); \
+ } \
+ }
+
+ TEST_LENGTH(iommu_destroy, IOMMU_DESTROY, id);
+ TEST_LENGTH(iommu_hw_info, IOMMU_GET_HW_INFO, __reserved);
+ TEST_LENGTH(iommu_hwpt_alloc, IOMMU_HWPT_ALLOC, __reserved);
+ TEST_LENGTH(iommu_hwpt_invalidate, IOMMU_HWPT_INVALIDATE, __reserved);
+ TEST_LENGTH(iommu_ioas_alloc, IOMMU_IOAS_ALLOC, out_ioas_id);
+ TEST_LENGTH(iommu_ioas_iova_ranges, IOMMU_IOAS_IOVA_RANGES,
+ out_iova_alignment);
+ TEST_LENGTH(iommu_ioas_allow_iovas, IOMMU_IOAS_ALLOW_IOVAS,
+ allowed_iovas);
+ TEST_LENGTH(iommu_ioas_map, IOMMU_IOAS_MAP, iova);
+ TEST_LENGTH(iommu_ioas_copy, IOMMU_IOAS_COPY, src_iova);
+ TEST_LENGTH(iommu_ioas_unmap, IOMMU_IOAS_UNMAP, length);
+ TEST_LENGTH(iommu_option, IOMMU_OPTION, val64);
+ TEST_LENGTH(iommu_vfio_ioas, IOMMU_VFIO_IOAS, __reserved);
+#undef TEST_LENGTH
+}
+
+TEST_F(iommufd, cmd_ex_fail)
+{
+ struct {
+ struct iommu_destroy cmd;
+ __u64 future;
+ } cmd = { .cmd = { .size = sizeof(cmd), .id = 0 } };
+
+ /* object id is invalid and command is longer */
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* future area is non-zero */
+ cmd.future = 1;
+ EXPECT_ERRNO(E2BIG, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* Original command "works" */
+ cmd.cmd.size = sizeof(cmd.cmd);
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+ /* Short command fails */
+ cmd.cmd.size = sizeof(cmd.cmd) - 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_DESTROY, &cmd));
+}
+
+TEST_F(iommufd, global_options)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_RLIMIT_MODE,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 1,
+ };
+
+ cmd.option_id = IOMMU_OPTION_RLIMIT_MODE;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(0, cmd.val64);
+
+ /* This requires root */
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ cmd.val64 = 2;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(1, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ cmd.option_id = IOMMU_OPTION_HUGE_PAGES;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ cmd.op = IOMMU_OPTION_OP_SET;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+FIXTURE(iommufd_ioas)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t stdev_id;
+ uint32_t hwpt_id;
+ uint32_t device_id;
+ uint64_t base_iova;
+};
+
+FIXTURE_VARIANT(iommufd_ioas)
+{
+ unsigned int mock_domains;
+ unsigned int memory_limit;
+};
+
+FIXTURE_SETUP(iommufd_ioas)
+{
+ unsigned int i;
+
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ if (!variant->memory_limit) {
+ test_ioctl_set_default_memory_limit();
+ } else {
+ test_ioctl_set_temp_memory_limit(variant->memory_limit);
+ }
+
+ for (i = 0; i != variant->mock_domains; i++) {
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
+ &self->hwpt_id, &self->device_id);
+ self->base_iova = MOCK_APERTURE_START;
+ }
+}
+
+FIXTURE_TEARDOWN(iommufd_ioas)
+{
+ test_ioctl_set_default_memory_limit();
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, no_domain)
+{
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain)
+{
+ .mock_domains = 1,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, two_mock_domain)
+{
+ .mock_domains = 2,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_ioas, mock_domain_limit)
+{
+ .mock_domains = 1,
+ .memory_limit = 16,
+};
+
+TEST_F(iommufd_ioas, ioas_auto_destroy)
+{
+}
+
+TEST_F(iommufd_ioas, ioas_destroy)
+{
+ if (self->stdev_id) {
+ /* IOAS cannot be freed while a device has a HWPT using it */
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->ioas_id));
+ } else {
+ /* Can allocate and manually free an IOAS table */
+ test_ioctl_destroy(self->ioas_id);
+ }
+}
+
+TEST_F(iommufd_ioas, alloc_hwpt_nested)
+{
+ const uint32_t min_data_len =
+ offsetofend(struct iommu_hwpt_selftest, iotlb);
+ struct iommu_hwpt_selftest data = {
+ .iotlb = IOMMU_TEST_IOTLB_DEFAULT,
+ };
+ struct iommu_hwpt_invalidate_selftest inv_reqs[2] = {};
+ uint32_t nested_hwpt_id[2] = {};
+ uint32_t num_inv;
+ uint32_t parent_hwpt_id = 0;
+ uint32_t parent_hwpt_id_not_work = 0;
+ uint32_t test_hwpt_id = 0;
+
+ if (self->device_id) {
+ /* Negative tests */
+ test_err_hwpt_alloc(ENOENT, self->ioas_id, self->device_id, 0,
+ &test_hwpt_id);
+ test_err_hwpt_alloc(EINVAL, self->device_id, self->device_id, 0,
+ &test_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT,
+ &parent_hwpt_id);
+
+ test_cmd_hwpt_alloc(self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id_not_work);
+
+ /* Negative nested tests */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_NONE, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EOPNOTSUPP, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST + 1, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ min_data_len - 1);
+ test_err_hwpt_alloc_nested(EFAULT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, NULL,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(
+ EOPNOTSUPP, self->device_id, parent_hwpt_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data, sizeof(data));
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ parent_hwpt_id_not_work, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+
+ /* Allocate two nested hwpts sharing one common parent hwpt */
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_alloc_nested(self->device_id, parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0],
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1],
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Negative test: a nested hwpt on top of a nested hwpt */
+ test_err_hwpt_alloc_nested(EINVAL, self->device_id,
+ nested_hwpt_id[0], 0, &test_hwpt_id,
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ /* Negative test: parent hwpt now cannot be freed */
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, parent_hwpt_id));
+
+ /* hwpt_invalidate only supports a user-managed hwpt (nested) */
+ num_inv = 1;
+ test_err_hwpt_invalidate(ENOENT, parent_hwpt_id, inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Check data_type by passing zero-length array */
+ num_inv = 0;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: Invalid data_type */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST_INVALID,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: structure size sanity */
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs) + 1, &num_inv);
+ assert(!num_inv);
+
+ num_inv = 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ 1, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid flag is passed */
+ num_inv = 1;
+ inv_reqs[0].flags = 0xffffffff;
+ test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid data_uptr when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], NULL,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid entry_len when array is not empty */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ 0, &num_inv);
+ assert(!num_inv);
+
+ /* Negative test: invalid iotlb_id */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(!num_inv);
+
+ /*
+ * Invalidate the 1st iotlb entry but fail the 2nd request
+ * due to invalid flags configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 0;
+ inv_reqs[1].flags = 0xffffffff;
+ inv_reqs[1].iotlb_id = 1;
+ test_err_hwpt_invalidate(EOPNOTSUPP, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /*
+ * Invalidate the 1st iotlb entry but fail the 2nd request
+ * due to invalid iotlb_id configuration in the 2nd request.
+ */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 0;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].iotlb_id = MOCK_NESTED_DOMAIN_IOTLB_ID_MAX + 1;
+ test_err_hwpt_invalidate(EINVAL, nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Invalidate the 2nd iotlb entry and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 1;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 0, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 1, 0);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 2,
+ IOMMU_TEST_IOTLB_DEFAULT);
+ test_cmd_hwpt_check_iotlb(nested_hwpt_id[0], 3,
+ IOMMU_TEST_IOTLB_DEFAULT);
+
+ /* Invalidate the 3rd and 4th iotlb entries and verify */
+ num_inv = 2;
+ inv_reqs[0].flags = 0;
+ inv_reqs[0].iotlb_id = 2;
+ inv_reqs[1].flags = 0;
+ inv_reqs[1].iotlb_id = 3;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[0], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 2);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[0], 0);
+
+ /* Invalidate all iotlb entries for nested_hwpt_id[1] and verify */
+ num_inv = 1;
+ inv_reqs[0].flags = IOMMU_TEST_INVALIDATE_FLAG_ALL;
+ test_cmd_hwpt_invalidate(nested_hwpt_id[1], inv_reqs,
+ IOMMU_HWPT_INVALIDATE_DATA_SELFTEST,
+ sizeof(*inv_reqs), &num_inv);
+ assert(num_inv == 1);
+ test_cmd_hwpt_check_iotlb_all(nested_hwpt_id[1], 0);
+
+ /* Attach device to nested_hwpt_id[0] that then will be busy */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[0]));
+
+ /* Switch from nested_hwpt_id[0] to nested_hwpt_id[1] */
+ test_cmd_mock_domain_replace(self->stdev_id, nested_hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, nested_hwpt_id[1]));
+ test_ioctl_destroy(nested_hwpt_id[0]);
+
+ /* Detach from nested_hwpt_id[1] and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, parent_hwpt_id);
+ test_ioctl_destroy(nested_hwpt_id[1]);
+
+ /* Detach from the parent hw_pagetable and destroy it */
+ test_cmd_mock_domain_replace(self->stdev_id, self->ioas_id);
+ test_ioctl_destroy(parent_hwpt_id);
+ test_ioctl_destroy(parent_hwpt_id_not_work);
+ } else {
+ test_err_hwpt_alloc(ENOENT, self->device_id, self->ioas_id, 0,
+ &parent_hwpt_id);
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[0],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_hwpt_alloc_nested(ENOENT, self->device_id,
+ parent_hwpt_id, 0,
+ &nested_hwpt_id[1],
+ IOMMU_HWPT_DATA_SELFTEST, &data,
+ sizeof(data));
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[0]);
+ test_err_mock_domain_replace(ENOENT, self->stdev_id,
+ nested_hwpt_id[1]);
+ }
+}
+
+TEST_F(iommufd_ioas, hwpt_attach)
+{
+ /* Create a device attached directly to a hwpt */
+ if (self->stdev_id) {
+ test_cmd_mock_domain(self->hwpt_id, NULL, NULL, NULL);
+ } else {
+ test_err_mock_domain(ENOENT, self->hwpt_id, NULL, NULL);
+ }
+}
+
+TEST_F(iommufd_ioas, ioas_area_destroy)
+{
+ /* Adding an area does not change ability to destroy */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+ if (self->stdev_id)
+ EXPECT_ERRNO(EBUSY,
+ _test_ioctl_destroy(self->fd, self->ioas_id));
+ else
+ test_ioctl_destroy(self->ioas_id);
+}
+
+TEST_F(iommufd_ioas, ioas_area_auto_destroy)
+{
+ int i;
+
+ /* Can allocate and automatically free an IOAS table with many areas */
+ for (i = 0; i != 10; i++) {
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+ self->base_iova + i * PAGE_SIZE);
+ }
+}
+
+TEST_F(iommufd_ioas, get_hw_info)
+{
+ struct iommu_test_hw_info buffer_exact;
+ struct iommu_test_hw_info_buffer_larger {
+ struct iommu_test_hw_info info;
+ uint64_t trailing_bytes;
+ } buffer_larger;
+ struct iommu_test_hw_info_buffer_smaller {
+ __u32 flags;
+ } buffer_smaller;
+
+ if (self->device_id) {
+ /* Provide a zero-size user_buffer */
+ test_cmd_get_hw_info(self->device_id, NULL, 0);
+ /* Provide a user_buffer with exact size */
+ test_cmd_get_hw_info(self->device_id, &buffer_exact, sizeof(buffer_exact));
+ /*
+ * Provide a user_buffer with size larger than the exact size to check if
+ * kernel zero the trailing bytes.
+ */
+ test_cmd_get_hw_info(self->device_id, &buffer_larger, sizeof(buffer_larger));
+ /*
+ * Provide a user_buffer with size smaller than the exact size to check if
+ * the fields within the size range still gets updated.
+ */
+ test_cmd_get_hw_info(self->device_id, &buffer_smaller, sizeof(buffer_smaller));
+ } else {
+ test_err_get_hw_info(ENOENT, self->device_id,
+ &buffer_exact, sizeof(buffer_exact));
+ test_err_get_hw_info(ENOENT, self->device_id,
+ &buffer_larger, sizeof(buffer_larger));
+ }
+}
+
+TEST_F(iommufd_ioas, area)
+{
+ int i;
+
+ /* Unmap fails if nothing is mapped */
+ for (i = 0; i != 10; i++)
+ test_err_ioctl_ioas_unmap(ENOENT, i * PAGE_SIZE, PAGE_SIZE);
+
+ /* Unmap works */
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE,
+ self->base_iova + i * PAGE_SIZE);
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(self->base_iova + i * PAGE_SIZE,
+ PAGE_SIZE);
+
+ /* Split fails */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE * 2,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 16 * PAGE_SIZE,
+ PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova + 17 * PAGE_SIZE,
+ PAGE_SIZE);
+
+ /* Over map fails */
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+ self->base_iova + 16 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE,
+ self->base_iova + 17 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 2,
+ self->base_iova + 15 * PAGE_SIZE);
+ test_err_ioctl_ioas_map_fixed(EEXIST, buffer, PAGE_SIZE * 3,
+ self->base_iova + 15 * PAGE_SIZE);
+
+ /* unmap all works */
+ test_ioctl_ioas_unmap(0, UINT64_MAX);
+
+ /* Unmap all succeeds on an empty IOAS */
+ test_ioctl_ioas_unmap(0, UINT64_MAX);
+}
+
+TEST_F(iommufd_ioas, unmap_fully_contained_areas)
+{
+ uint64_t unmap_len;
+ int i;
+
+ /* Give no_domain some space to rewind base_iova */
+ self->base_iova += 4 * PAGE_SIZE;
+
+ for (i = 0; i != 4; i++)
+ test_ioctl_ioas_map_fixed(buffer, 8 * PAGE_SIZE,
+ self->base_iova + i * 16 * PAGE_SIZE);
+
+ /* Unmap not fully contained area doesn't work */
+ test_err_ioctl_ioas_unmap(ENOENT, self->base_iova - 4 * PAGE_SIZE,
+ 8 * PAGE_SIZE);
+ test_err_ioctl_ioas_unmap(ENOENT,
+ self->base_iova + 3 * 16 * PAGE_SIZE +
+ 8 * PAGE_SIZE - 4 * PAGE_SIZE,
+ 8 * PAGE_SIZE);
+
+ /* Unmap fully contained areas works */
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id,
+ self->base_iova - 4 * PAGE_SIZE,
+ 3 * 16 * PAGE_SIZE + 8 * PAGE_SIZE +
+ 4 * PAGE_SIZE,
+ &unmap_len));
+ ASSERT_EQ(32 * PAGE_SIZE, unmap_len);
+}
+
+TEST_F(iommufd_ioas, area_auto_iova)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE * 4,
+ .length = PAGE_SIZE * 100 },
+ };
+ struct iommu_iova_range ranges[1] = {};
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+ __u64 iovas[10];
+ int i;
+
+ /* Simple 4k pages */
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iovas[i]);
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE);
+
+ /* Kernel automatically aligns IOVAs properly */
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ if (self->stdev_id) {
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ } else {
+ test_ioctl_ioas_map((void *)(1UL << 31), length,
+ &iovas[i]);
+ }
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+ /* Avoids a reserved region */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ EXPECT_EQ(false,
+ iovas[i] > test_cmd.add_reserved.start &&
+ iovas[i] <
+ test_cmd.add_reserved.start +
+ test_cmd.add_reserved.length);
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+
+ /* Allowed region intersects with a reserved region */
+ ranges[0].start = PAGE_SIZE;
+ ranges[0].last = PAGE_SIZE * 600;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+ /* Allocate from an allowed region */
+ if (self->stdev_id) {
+ ranges[0].start = MOCK_APERTURE_START + PAGE_SIZE;
+ ranges[0].last = MOCK_APERTURE_START + PAGE_SIZE * 600 - 1;
+ } else {
+ ranges[0].start = PAGE_SIZE * 200;
+ ranges[0].last = PAGE_SIZE * 600 - 1;
+ }
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+ for (i = 0; i != 10; i++) {
+ size_t length = PAGE_SIZE * (i + 1);
+
+ test_ioctl_ioas_map(buffer, length, &iovas[i]);
+ EXPECT_EQ(0, iovas[i] % (1UL << (ffs(length) - 1)));
+ EXPECT_EQ(true, iovas[i] >= ranges[0].start);
+ EXPECT_EQ(true, iovas[i] <= ranges[0].last);
+ EXPECT_EQ(true, iovas[i] + length > ranges[0].start);
+ EXPECT_EQ(true, iovas[i] + length <= ranges[0].last + 1);
+ }
+ for (i = 0; i != 10; i++)
+ test_ioctl_ioas_unmap(iovas[i], PAGE_SIZE * (i + 1));
+}
+
+TEST_F(iommufd_ioas, area_allowed)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE * 4,
+ .length = PAGE_SIZE * 100 },
+ };
+ struct iommu_iova_range ranges[1] = {};
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ /* Reserved intersects an allowed */
+ allow_cmd.num_iovas = 1;
+ ranges[0].start = self->base_iova;
+ ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+ test_cmd.add_reserved.start = ranges[0].start + PAGE_SIZE;
+ test_cmd.add_reserved.length = PAGE_SIZE;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ allow_cmd.num_iovas = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+
+ /* Allowed intersects a reserved */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ allow_cmd.num_iovas = 1;
+ ranges[0].start = self->base_iova;
+ ranges[0].last = ranges[0].start + PAGE_SIZE * 600;
+ EXPECT_ERRNO(EADDRINUSE,
+ ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd));
+}
+
+TEST_F(iommufd_ioas, copy_area)
+{
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .dst_ioas_id = self->ioas_id,
+ .src_ioas_id = self->ioas_id,
+ .length = PAGE_SIZE,
+ };
+
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, self->base_iova);
+
+ /* Copy inside a single IOAS */
+ copy_cmd.src_iova = self->base_iova;
+ copy_cmd.dst_iova = self->base_iova + PAGE_SIZE;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+
+ /* Copy between IOAS's */
+ copy_cmd.src_iova = self->base_iova;
+ copy_cmd.dst_iova = 0;
+ test_ioctl_ioas_alloc(&copy_cmd.dst_ioas_id);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+}
+
+TEST_F(iommufd_ioas, iova_ranges)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_ADD_RESERVED,
+ .id = self->ioas_id,
+ .add_reserved = { .start = PAGE_SIZE, .length = PAGE_SIZE },
+ };
+ struct iommu_iova_range *ranges = buffer;
+ struct iommu_ioas_iova_ranges ranges_cmd = {
+ .size = sizeof(ranges_cmd),
+ .ioas_id = self->ioas_id,
+ .num_iovas = BUFFER_SIZE / sizeof(*ranges),
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ /* Range can be read */
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ if (!self->stdev_id) {
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(SIZE_MAX, ranges[0].last);
+ EXPECT_EQ(1, ranges_cmd.out_iova_alignment);
+ } else {
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ EXPECT_EQ(MOCK_PAGE_SIZE, ranges_cmd.out_iova_alignment);
+ }
+
+ /* Buffer too small */
+ memset(ranges, 0, BUFFER_SIZE);
+ ranges_cmd.num_iovas = 0;
+ EXPECT_ERRNO(EMSGSIZE,
+ ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(0, ranges[0].last);
+
+ /* 2 ranges */
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ADD_RESERVED),
+ &test_cmd));
+ ranges_cmd.num_iovas = BUFFER_SIZE / sizeof(*ranges);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ if (!self->stdev_id) {
+ EXPECT_EQ(2, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+ EXPECT_EQ(PAGE_SIZE * 2, ranges[1].start);
+ EXPECT_EQ(SIZE_MAX, ranges[1].last);
+ } else {
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ }
+
+ /* Buffer too small */
+ memset(ranges, 0, BUFFER_SIZE);
+ ranges_cmd.num_iovas = 1;
+ if (!self->stdev_id) {
+ EXPECT_ERRNO(EMSGSIZE, ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES,
+ &ranges_cmd));
+ EXPECT_EQ(2, ranges_cmd.num_iovas);
+ EXPECT_EQ(0, ranges[0].start);
+ EXPECT_EQ(PAGE_SIZE - 1, ranges[0].last);
+ } else {
+ ASSERT_EQ(0,
+ ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd));
+ EXPECT_EQ(1, ranges_cmd.num_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START, ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, ranges[0].last);
+ }
+ EXPECT_EQ(0, ranges[1].start);
+ EXPECT_EQ(0, ranges[1].last);
+}
+
+TEST_F(iommufd_ioas, access_domain_destory)
+{
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = self->base_iova + PAGE_SIZE,
+ .length = PAGE_SIZE},
+ };
+ size_t buf_size = 2 * HUGEPAGE_SIZE;
+ uint8_t *buf;
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ test_ioctl_ioas_map_fixed(buf, buf_size, self->base_iova);
+
+ test_cmd_create_access(self->ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+ access_cmd.access_pages.uptr = (uintptr_t)buf + PAGE_SIZE;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+
+ /* Causes a complicated unpin across a huge page boundary */
+ if (self->stdev_id)
+ test_ioctl_destroy(self->stdev_id);
+
+ test_cmd_destroy_access_pages(
+ access_cmd.id, access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access(access_cmd.id);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_ioas, access_pin)
+{
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+ struct iommu_test_cmd check_map_cmd = {
+ .size = sizeof(check_map_cmd),
+ .op = IOMMU_TEST_OP_MD_CHECK_MAP,
+ .check_map = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+ uint32_t access_pages_id;
+ unsigned int npages;
+
+ test_cmd_create_access(self->ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+ for (npages = 1; npages < BUFFER_SIZE / PAGE_SIZE; npages++) {
+ uint32_t mock_stdev_id;
+ uint32_t mock_hwpt_id;
+
+ access_cmd.access_pages.length = npages * PAGE_SIZE;
+
+ /* Single map/unmap */
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+
+ /* Double user */
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access_pages(access_cmd.id, access_pages_id);
+
+ /* Add/remove a domain with a user */
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ test_cmd_mock_domain(self->ioas_id, &mock_stdev_id,
+ &mock_hwpt_id, NULL);
+ check_map_cmd.id = mock_hwpt_id;
+ ASSERT_EQ(0, ioctl(self->fd,
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP),
+ &check_map_cmd));
+
+ test_ioctl_destroy(mock_stdev_id);
+ test_cmd_destroy_access_pages(
+ access_cmd.id,
+ access_cmd.access_pages.out_access_pages_id);
+
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+ }
+ test_cmd_destroy_access(access_cmd.id);
+}
+
+TEST_F(iommufd_ioas, access_pin_unmap)
+{
+ struct iommu_test_cmd access_pages_cmd = {
+ .size = sizeof(access_pages_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ test_cmd_create_access(self->ioas_id, &access_pages_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, MOCK_APERTURE_START);
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_pages_cmd));
+
+ /* Trigger the unmap op */
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, BUFFER_SIZE);
+
+ /* kernel removed the item for us */
+ test_err_destroy_access_pages(
+ ENOENT, access_pages_cmd.id,
+ access_pages_cmd.access_pages.out_access_pages_id);
+}
+
+static void check_access_rw(struct __test_metadata *_metadata, int fd,
+ unsigned int access_id, uint64_t iova,
+ unsigned int def_flags)
+{
+ uint16_t tmp[32];
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = access_id,
+ .access_rw = { .uptr = (uintptr_t)tmp },
+ };
+ uint16_t *buffer16 = buffer;
+ unsigned int i;
+ void *tmp2;
+
+ for (i = 0; i != BUFFER_SIZE / sizeof(*buffer16); i++)
+ buffer16[i] = rand();
+
+ for (access_cmd.access_rw.iova = iova + PAGE_SIZE - 50;
+ access_cmd.access_rw.iova < iova + PAGE_SIZE + 50;
+ access_cmd.access_rw.iova++) {
+ for (access_cmd.access_rw.length = 1;
+ access_cmd.access_rw.length < sizeof(tmp);
+ access_cmd.access_rw.length++) {
+ access_cmd.access_rw.flags = def_flags;
+ ASSERT_EQ(0, ioctl(fd,
+ _IOMMU_TEST_CMD(
+ IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0,
+ memcmp(buffer + (access_cmd.access_rw.iova -
+ iova),
+ tmp, access_cmd.access_rw.length));
+
+ for (i = 0; i != ARRAY_SIZE(tmp); i++)
+ tmp[i] = rand();
+ access_cmd.access_rw.flags = def_flags |
+ MOCK_ACCESS_RW_WRITE;
+ ASSERT_EQ(0, ioctl(fd,
+ _IOMMU_TEST_CMD(
+ IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0,
+ memcmp(buffer + (access_cmd.access_rw.iova -
+ iova),
+ tmp, access_cmd.access_rw.length));
+ }
+ }
+
+ /* Multi-page test */
+ tmp2 = malloc(BUFFER_SIZE);
+ ASSERT_NE(NULL, tmp2);
+ access_cmd.access_rw.iova = iova;
+ access_cmd.access_rw.length = BUFFER_SIZE;
+ access_cmd.access_rw.flags = def_flags;
+ access_cmd.access_rw.uptr = (uintptr_t)tmp2;
+ ASSERT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd));
+ ASSERT_EQ(0, memcmp(buffer, tmp2, access_cmd.access_rw.length));
+ free(tmp2);
+}
+
+TEST_F(iommufd_ioas, access_rw)
+{
+ __u32 access_id;
+ __u64 iova;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+ test_ioctl_ioas_map(buffer, BUFFER_SIZE, &iova);
+ check_access_rw(_metadata, self->fd, access_id, iova, 0);
+ check_access_rw(_metadata, self->fd, access_id, iova,
+ MOCK_ACCESS_RW_SLOW_PATH);
+ test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, access_rw_unaligned)
+{
+ __u32 access_id;
+ __u64 iova;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ /* Unaligned pages */
+ iova = self->base_iova + MOCK_PAGE_SIZE;
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE, iova);
+ check_access_rw(_metadata, self->fd, access_id, iova, 0);
+ test_ioctl_ioas_unmap(iova, BUFFER_SIZE);
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_gone)
+{
+ __u32 access_id;
+ pid_t child;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ /* Create a mapping with a different mm */
+ child = fork();
+ if (!child) {
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ exit(0);
+ }
+ ASSERT_NE(-1, child);
+ ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+ if (self->stdev_id) {
+ /*
+ * If a domain already existed then everything was pinned within
+ * the fork, so this copies from one domain to another.
+ */
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+ check_access_rw(_metadata, self->fd, access_id,
+ MOCK_APERTURE_START, 0);
+
+ } else {
+ /*
+ * Otherwise we need to actually pin pages which can't happen
+ * since the fork is gone.
+ */
+ test_err_mock_domain(EFAULT, self->ioas_id, NULL, NULL);
+ }
+
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, fork_present)
+{
+ __u32 access_id;
+ int pipefds[2];
+ uint64_t tmp;
+ pid_t child;
+ int efd;
+
+ test_cmd_create_access(self->ioas_id, &access_id, 0);
+
+ ASSERT_EQ(0, pipe2(pipefds, O_CLOEXEC));
+ efd = eventfd(0, EFD_CLOEXEC);
+ ASSERT_NE(-1, efd);
+
+ /* Create a mapping with a different mm */
+ child = fork();
+ if (!child) {
+ __u64 iova;
+ uint64_t one = 1;
+
+ close(pipefds[1]);
+ test_ioctl_ioas_map_fixed(buffer, BUFFER_SIZE,
+ MOCK_APERTURE_START);
+ if (write(efd, &one, sizeof(one)) != sizeof(one))
+ exit(100);
+ if (read(pipefds[0], &iova, 1) != 1)
+ exit(100);
+ exit(0);
+ }
+ close(pipefds[0]);
+ ASSERT_NE(-1, child);
+ ASSERT_EQ(8, read(efd, &tmp, sizeof(tmp)));
+
+ /* Read pages from the remote process */
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+ check_access_rw(_metadata, self->fd, access_id, MOCK_APERTURE_START, 0);
+
+ ASSERT_EQ(0, close(pipefds[1]));
+ ASSERT_EQ(child, waitpid(child, NULL, 0));
+
+ test_cmd_destroy_access(access_id);
+}
+
+TEST_F(iommufd_ioas, ioas_option_huge_pages)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 3,
+ .object_id = self->ioas_id,
+ };
+
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(1, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 0;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_GET;
+ cmd.val64 = 3;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ ASSERT_EQ(0, cmd.val64);
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 2;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ cmd.op = IOMMU_OPTION_OP_SET;
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+}
+
+TEST_F(iommufd_ioas, ioas_iova_alloc)
+{
+ unsigned int length;
+ __u64 iova;
+
+ for (length = 1; length != PAGE_SIZE * 2; length++) {
+ if (variant->mock_domains && (length % MOCK_PAGE_SIZE)) {
+ test_err_ioctl_ioas_map(EINVAL, buffer, length, &iova);
+ } else {
+ test_ioctl_ioas_map(buffer, length, &iova);
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+}
+
+TEST_F(iommufd_ioas, ioas_align_change)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_SET,
+ .object_id = self->ioas_id,
+ /* 0 means everything must be aligned to PAGE_SIZE */
+ .val64 = 0,
+ };
+
+ /*
+ * We cannot upgrade the alignment using OPTION_HUGE_PAGES when a domain
+ * and map are present.
+ */
+ if (variant->mock_domains)
+ return;
+
+ /*
+ * We can upgrade to PAGE_SIZE alignment when things are aligned right
+ */
+ test_ioctl_ioas_map_fixed(buffer, PAGE_SIZE, MOCK_APERTURE_START);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Misalignment is rejected at map time */
+ test_err_ioctl_ioas_map_fixed(EINVAL, buffer + MOCK_PAGE_SIZE,
+ PAGE_SIZE,
+ MOCK_APERTURE_START + PAGE_SIZE);
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Reduce alignment */
+ cmd.val64 = 1;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ /* Confirm misalignment is rejected during alignment upgrade */
+ test_ioctl_ioas_map_fixed(buffer + MOCK_PAGE_SIZE, PAGE_SIZE,
+ MOCK_APERTURE_START + PAGE_SIZE);
+ cmd.val64 = 0;
+ EXPECT_ERRNO(EADDRINUSE, ioctl(self->fd, IOMMU_OPTION, &cmd));
+
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START + PAGE_SIZE, PAGE_SIZE);
+ test_ioctl_ioas_unmap(MOCK_APERTURE_START, PAGE_SIZE);
+}
+
+TEST_F(iommufd_ioas, copy_sweep)
+{
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .src_ioas_id = self->ioas_id,
+ .dst_iova = MOCK_APERTURE_START,
+ .length = MOCK_PAGE_SIZE,
+ };
+ unsigned int dst_ioas_id;
+ uint64_t last_iova;
+ uint64_t iova;
+
+ test_ioctl_ioas_alloc(&dst_ioas_id);
+ copy_cmd.dst_ioas_id = dst_ioas_id;
+
+ if (variant->mock_domains)
+ last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 1;
+ else
+ last_iova = MOCK_APERTURE_START + BUFFER_SIZE - 2;
+
+ test_ioctl_ioas_map_fixed(buffer, last_iova - MOCK_APERTURE_START + 1,
+ MOCK_APERTURE_START);
+
+ for (iova = MOCK_APERTURE_START - PAGE_SIZE; iova <= last_iova;
+ iova += 511) {
+ copy_cmd.src_iova = iova;
+ if (iova < MOCK_APERTURE_START ||
+ iova + copy_cmd.length - 1 > last_iova) {
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_IOAS_COPY,
+ &copy_cmd));
+ } else {
+ ASSERT_EQ(0,
+ ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ test_ioctl_ioas_unmap_id(dst_ioas_id, copy_cmd.dst_iova,
+ copy_cmd.length);
+ }
+ }
+
+ test_ioctl_destroy(dst_ioas_id);
+}
+
+FIXTURE(iommufd_mock_domain)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t hwpt_ids[2];
+ uint32_t stdev_ids[2];
+ uint32_t idev_ids[2];
+ int mmap_flags;
+ size_t mmap_buf_size;
+};
+
+FIXTURE_VARIANT(iommufd_mock_domain)
+{
+ unsigned int mock_domains;
+ bool hugepages;
+};
+
+FIXTURE_SETUP(iommufd_mock_domain)
+{
+ unsigned int i;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+ test_ioctl_ioas_alloc(&self->ioas_id);
+
+ ASSERT_GE(ARRAY_SIZE(self->hwpt_ids), variant->mock_domains);
+
+ for (i = 0; i != variant->mock_domains; i++)
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_ids[i],
+ &self->hwpt_ids[i], &self->idev_ids[i]);
+ self->hwpt_id = self->hwpt_ids[0];
+
+ self->mmap_flags = MAP_SHARED | MAP_ANONYMOUS;
+ self->mmap_buf_size = PAGE_SIZE * 8;
+ if (variant->hugepages) {
+ /*
+ * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+ * not available.
+ */
+ self->mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+ self->mmap_buf_size = HUGEPAGE_SIZE * 2;
+ }
+}
+
+FIXTURE_TEARDOWN(iommufd_mock_domain)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain)
+{
+ .mock_domains = 1,
+ .hugepages = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains)
+{
+ .mock_domains = 2,
+ .hugepages = false,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, one_domain_hugepage)
+{
+ .mock_domains = 1,
+ .hugepages = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_mock_domain, two_domains_hugepage)
+{
+ .mock_domains = 2,
+ .hugepages = true,
+};
+
+/* Have the kernel check that the user pages made it to the iommu_domain */
+#define check_mock_iova(_ptr, _iova, _length) \
+ ({ \
+ struct iommu_test_cmd check_map_cmd = { \
+ .size = sizeof(check_map_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_MAP, \
+ .id = self->hwpt_id, \
+ .check_map = { .iova = _iova, \
+ .length = _length, \
+ .uptr = (uintptr_t)(_ptr) }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_MAP), \
+ &check_map_cmd)); \
+ if (self->hwpt_ids[1]) { \
+ check_map_cmd.id = self->hwpt_ids[1]; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD( \
+ IOMMU_TEST_OP_MD_CHECK_MAP), \
+ &check_map_cmd)); \
+ } \
+ })
+
+TEST_F(iommufd_mock_domain, basic)
+{
+ size_t buf_size = self->mmap_buf_size;
+ uint8_t *buf;
+ __u64 iova;
+
+ /* Simple one page map */
+ test_ioctl_ioas_map(buffer, PAGE_SIZE, &iova);
+ check_mock_iova(buffer, iova, PAGE_SIZE);
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+
+ /* EFAULT half way through mapping */
+ ASSERT_EQ(0, munmap(buf + buf_size / 2, buf_size / 2));
+ test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+
+ /* EFAULT on first page */
+ ASSERT_EQ(0, munmap(buf, buf_size / 2));
+ test_err_ioctl_ioas_map(EFAULT, buf, buf_size, &iova);
+}
+
+TEST_F(iommufd_mock_domain, ro_unshare)
+{
+ uint8_t *buf;
+ __u64 iova;
+ int fd;
+
+ fd = open("/proc/self/exe", O_RDONLY);
+ ASSERT_NE(-1, fd);
+
+ buf = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ close(fd);
+
+ /*
+ * There have been lots of changes to the "unshare" mechanism in
+ * get_user_pages(), make sure it works right. The write to the page
+ * after we map it for reading should not change the assigned PFN.
+ */
+ ASSERT_EQ(0,
+ _test_ioctl_ioas_map(self->fd, self->ioas_id, buf, PAGE_SIZE,
+ &iova, IOMMU_IOAS_MAP_READABLE));
+ check_mock_iova(buf, iova, PAGE_SIZE);
+ memset(buf, 1, PAGE_SIZE);
+ check_mock_iova(buf, iova, PAGE_SIZE);
+ ASSERT_EQ(0, munmap(buf, PAGE_SIZE));
+}
+
+TEST_F(iommufd_mock_domain, all_aligns)
+{
+ size_t test_step = variant->hugepages ? (self->mmap_buf_size / 16) :
+ MOCK_PAGE_SIZE;
+ size_t buf_size = self->mmap_buf_size;
+ unsigned int start;
+ unsigned int end;
+ uint8_t *buf;
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ check_refs(buf, buf_size, 0);
+
+ /*
+ * Map every combination of page size and alignment within a big region,
+ * less for hugepage case as it takes so long to finish.
+ */
+ for (start = 0; start < buf_size; start += test_step) {
+ if (variant->hugepages)
+ end = buf_size;
+ else
+ end = start + MOCK_PAGE_SIZE;
+ for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+ size_t length = end - start;
+ __u64 iova;
+
+ test_ioctl_ioas_map(buf + start, length, &iova);
+ check_mock_iova(buf + start, iova, length);
+ check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+ end / PAGE_SIZE * PAGE_SIZE -
+ start / PAGE_SIZE * PAGE_SIZE,
+ 1);
+
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+ check_refs(buf, buf_size, 0);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_mock_domain, all_aligns_copy)
+{
+ size_t test_step = variant->hugepages ? self->mmap_buf_size / 16 :
+ MOCK_PAGE_SIZE;
+ size_t buf_size = self->mmap_buf_size;
+ unsigned int start;
+ unsigned int end;
+ uint8_t *buf;
+
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE, self->mmap_flags, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ check_refs(buf, buf_size, 0);
+
+ /*
+ * Map every combination of page size and alignment within a big region,
+ * less for hugepage case as it takes so long to finish.
+ */
+ for (start = 0; start < buf_size; start += test_step) {
+ if (variant->hugepages)
+ end = buf_size;
+ else
+ end = start + MOCK_PAGE_SIZE;
+ for (; end < buf_size; end += MOCK_PAGE_SIZE) {
+ size_t length = end - start;
+ unsigned int old_id;
+ uint32_t mock_stdev_id;
+ __u64 iova;
+
+ test_ioctl_ioas_map(buf + start, length, &iova);
+
+ /* Add and destroy a domain while the area exists */
+ old_id = self->hwpt_ids[1];
+ test_cmd_mock_domain(self->ioas_id, &mock_stdev_id,
+ &self->hwpt_ids[1], NULL);
+
+ check_mock_iova(buf + start, iova, length);
+ check_refs(buf + start / PAGE_SIZE * PAGE_SIZE,
+ end / PAGE_SIZE * PAGE_SIZE -
+ start / PAGE_SIZE * PAGE_SIZE,
+ 1);
+
+ test_ioctl_destroy(mock_stdev_id);
+ self->hwpt_ids[1] = old_id;
+
+ test_ioctl_ioas_unmap(iova, length);
+ }
+ }
+ check_refs(buf, buf_size, 0);
+ ASSERT_EQ(0, munmap(buf, buf_size));
+}
+
+TEST_F(iommufd_mock_domain, user_copy)
+{
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .access_pages = { .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_FIXED_IOVA,
+ .dst_ioas_id = self->ioas_id,
+ .dst_iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ };
+ struct iommu_ioas_unmap unmap_cmd = {
+ .size = sizeof(unmap_cmd),
+ .ioas_id = self->ioas_id,
+ .iova = MOCK_APERTURE_START,
+ .length = BUFFER_SIZE,
+ };
+ unsigned int new_ioas_id, ioas_id;
+
+ /* Pin the pages in an IOAS with no domains then copy to an IOAS with domains */
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_ioctl_ioas_map_id(ioas_id, buffer, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+
+ test_cmd_create_access(ioas_id, &access_cmd.id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES);
+
+ access_cmd.access_pages.iova = copy_cmd.src_iova;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ copy_cmd.src_ioas_id = ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE);
+
+ /* Now replace the ioas with a new one */
+ test_ioctl_ioas_alloc(&new_ioas_id);
+ test_ioctl_ioas_map_id(new_ioas_id, buffer, BUFFER_SIZE,
+ &copy_cmd.src_iova);
+ test_cmd_access_replace_ioas(access_cmd.id, new_ioas_id);
+
+ /* Destroy the old ioas and cleanup copied mapping */
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_UNMAP, &unmap_cmd));
+ test_ioctl_destroy(ioas_id);
+
+ /* Then run the same test again with the new ioas */
+ access_cmd.access_pages.iova = copy_cmd.src_iova;
+ ASSERT_EQ(0,
+ ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_PAGES),
+ &access_cmd));
+ copy_cmd.src_ioas_id = new_ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd));
+ check_mock_iova(buffer, MOCK_APERTURE_START, BUFFER_SIZE);
+
+ test_cmd_destroy_access_pages(
+ access_cmd.id, access_cmd.access_pages.out_access_pages_id);
+ test_cmd_destroy_access(access_cmd.id);
+
+ test_ioctl_destroy(new_ioas_id);
+}
+
+TEST_F(iommufd_mock_domain, replace)
+{
+ uint32_t ioas_id;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id);
+
+ /*
+ * Replacing the IOAS causes the prior HWPT to be deallocated, thus we
+ * should get enoent when we try to use it.
+ */
+ if (variant->mock_domains == 1)
+ test_err_mock_domain_replace(ENOENT, self->stdev_ids[0],
+ self->hwpt_ids[0]);
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], ioas_id);
+ if (variant->mock_domains >= 2) {
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[1]);
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[1]);
+ test_cmd_mock_domain_replace(self->stdev_ids[0],
+ self->hwpt_ids[0]);
+ }
+
+ test_cmd_mock_domain_replace(self->stdev_ids[0], self->ioas_id);
+ test_ioctl_destroy(ioas_id);
+}
+
+TEST_F(iommufd_mock_domain, alloc_hwpt)
+{
+ int i;
+
+ for (i = 0; i != variant->mock_domains; i++) {
+ uint32_t hwpt_id[2];
+ uint32_t stddev_id;
+
+ test_err_hwpt_alloc(EOPNOTSUPP,
+ self->idev_ids[i], self->ioas_id,
+ ~IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ 0, &hwpt_id[0]);
+ test_cmd_hwpt_alloc(self->idev_ids[i], self->ioas_id,
+ IOMMU_HWPT_ALLOC_NEST_PARENT, &hwpt_id[1]);
+
+ /* Do a hw_pagetable rotation test */
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[0]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[0]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], hwpt_id[1]);
+ EXPECT_ERRNO(EBUSY, _test_ioctl_destroy(self->fd, hwpt_id[1]));
+ test_cmd_mock_domain_replace(self->stdev_ids[i], self->ioas_id);
+ test_ioctl_destroy(hwpt_id[1]);
+
+ test_cmd_mock_domain(hwpt_id[0], &stddev_id, NULL, NULL);
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id[0]);
+ }
+}
+
+FIXTURE(iommufd_dirty_tracking)
+{
+ int fd;
+ uint32_t ioas_id;
+ uint32_t hwpt_id;
+ uint32_t stdev_id;
+ uint32_t idev_id;
+ unsigned long page_size;
+ unsigned long bitmap_size;
+ void *bitmap;
+ void *buffer;
+};
+
+FIXTURE_VARIANT(iommufd_dirty_tracking)
+{
+ unsigned long buffer_size;
+ bool hugepages;
+};
+
+FIXTURE_SETUP(iommufd_dirty_tracking)
+{
+ int mmap_flags;
+ void *vrc;
+ int rc;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ rc = posix_memalign(&self->buffer, HUGEPAGE_SIZE, variant->buffer_size);
+ if (rc || !self->buffer) {
+ SKIP(return, "Skipping buffer_size=%lu due to errno=%d",
+ variant->buffer_size, rc);
+ }
+
+ mmap_flags = MAP_SHARED | MAP_ANONYMOUS | MAP_FIXED;
+ if (variant->hugepages) {
+ /*
+ * MAP_POPULATE will cause the kernel to fail mmap if THPs are
+ * not available.
+ */
+ mmap_flags |= MAP_HUGETLB | MAP_POPULATE;
+ }
+ assert((uintptr_t)self->buffer % HUGEPAGE_SIZE == 0);
+ vrc = mmap(self->buffer, variant->buffer_size, PROT_READ | PROT_WRITE,
+ mmap_flags, -1, 0);
+ assert(vrc == self->buffer);
+
+ self->page_size = MOCK_PAGE_SIZE;
+ self->bitmap_size =
+ variant->buffer_size / self->page_size / BITS_PER_BYTE;
+
+ /* Provision with an extra (PAGE_SIZE) for the unaligned case */
+ rc = posix_memalign(&self->bitmap, PAGE_SIZE,
+ self->bitmap_size + PAGE_SIZE);
+ assert(!rc);
+ assert(self->bitmap);
+ assert((uintptr_t)self->bitmap % PAGE_SIZE == 0);
+
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ /* Enable 1M mock IOMMU hugepages */
+ if (variant->hugepages) {
+ test_cmd_mock_domain_flags(self->ioas_id,
+ MOCK_FLAGS_DEVICE_HUGE_IOVA,
+ &self->stdev_id, &self->hwpt_id,
+ &self->idev_id);
+ } else {
+ test_cmd_mock_domain(self->ioas_id, &self->stdev_id,
+ &self->hwpt_id, &self->idev_id);
+ }
+}
+
+FIXTURE_TEARDOWN(iommufd_dirty_tracking)
+{
+ munmap(self->buffer, variant->buffer_size);
+ munmap(self->bitmap, self->bitmap_size);
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128k)
+{
+ /* one u32 index bitmap */
+ .buffer_size = 128UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256k)
+{
+ /* one u64 index bitmap */
+ .buffer_size = 256UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty640k)
+{
+ /* two u64 index and trailing end bitmap */
+ .buffer_size = 640UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M)
+{
+ /* 4K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty128M_huge)
+{
+ /* 4K bitmap (128M IOVA range) */
+ .buffer_size = 128UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M)
+{
+ /* 8K bitmap (256M IOVA range) */
+ .buffer_size = 256UL * 1024UL * 1024UL,
+};
+
+FIXTURE_VARIANT_ADD(iommufd_dirty_tracking, domain_dirty256M_huge)
+{
+ /* 8K bitmap (256M IOVA range) */
+ .buffer_size = 256UL * 1024UL * 1024UL,
+ .hugepages = true,
+};
+
+TEST_F(iommufd_dirty_tracking, enforce_dirty)
+{
+ uint32_t ioas_id, stddev_id, idev_id;
+ uint32_t hwpt_id, _hwpt_id;
+ uint32_t dev_flags;
+
+ /* Regular case */
+ dev_flags = MOCK_FLAGS_DEVICE_NO_DIRTY;
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_err_mock_domain_flags(EINVAL, hwpt_id, dev_flags, &stddev_id,
+ NULL);
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+
+ /* IOMMU device does not support dirty tracking */
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_cmd_mock_domain_flags(ioas_id, dev_flags, &stddev_id, &_hwpt_id,
+ &idev_id);
+ test_err_hwpt_alloc(EOPNOTSUPP, idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_ioctl_destroy(stddev_id);
+}
+
+TEST_F(iommufd_dirty_tracking, set_dirty_tracking)
+{
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+ test_cmd_set_dirty_tracking(hwpt_id, false);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, device_dirty_capability)
+{
+ uint32_t caps = 0;
+ uint32_t stddev_id;
+ uint32_t hwpt_id;
+
+ test_cmd_hwpt_alloc(self->idev_id, self->ioas_id, 0, &hwpt_id);
+ test_cmd_mock_domain(hwpt_id, &stddev_id, NULL, NULL);
+ test_cmd_get_hw_capabilities(self->idev_id, caps,
+ IOMMU_HW_CAP_DIRTY_TRACKING);
+ ASSERT_EQ(IOMMU_HW_CAP_DIRTY_TRACKING,
+ caps & IOMMU_HW_CAP_DIRTY_TRACKING);
+
+ test_ioctl_destroy(stddev_id);
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap)
+{
+ uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+ test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap, self->bitmap_size, 0, _metadata);
+
+ /* PAGE_SIZE unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size, 0, _metadata);
+
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size, 0,
+ _metadata);
+
+ test_ioctl_destroy(hwpt_id);
+}
+
+TEST_F(iommufd_dirty_tracking, get_dirty_bitmap_no_clear)
+{
+ uint32_t page_size = MOCK_PAGE_SIZE;
+ uint32_t hwpt_id;
+ uint32_t ioas_id;
+
+ if (variant->hugepages)
+ page_size = MOCK_HUGE_PAGE_SIZE;
+
+ test_ioctl_ioas_alloc(&ioas_id);
+ test_ioctl_ioas_map_fixed_id(ioas_id, self->buffer,
+ variant->buffer_size, MOCK_APERTURE_START);
+
+ test_cmd_hwpt_alloc(self->idev_id, ioas_id,
+ IOMMU_HWPT_ALLOC_DIRTY_TRACKING, &hwpt_id);
+
+ test_cmd_set_dirty_tracking(hwpt_id, true);
+
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ /* Unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + MOCK_PAGE_SIZE,
+ self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ /* u64 unaligned bitmap */
+ test_mock_dirty_bitmaps(hwpt_id, variant->buffer_size,
+ MOCK_APERTURE_START, self->page_size, page_size,
+ self->bitmap + 0xff1, self->bitmap_size,
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR,
+ _metadata);
+
+ test_ioctl_destroy(hwpt_id);
+}
+
+/* VFIO compatibility IOCTLs */
+
+TEST_F(iommufd, simple_ioctls)
+{
+ ASSERT_EQ(VFIO_API_VERSION, ioctl(self->fd, VFIO_GET_API_VERSION));
+ ASSERT_EQ(1, ioctl(self->fd, VFIO_CHECK_EXTENSION, VFIO_TYPE1v2_IOMMU));
+}
+
+TEST_F(iommufd, unmap_cmd)
+{
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .iova = MOCK_APERTURE_START,
+ .size = PAGE_SIZE,
+ };
+
+ unmap_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.argsz = sizeof(unmap_cmd);
+ unmap_cmd.flags = 1 << 31;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.flags = 0;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+}
+
+TEST_F(iommufd, map_cmd)
+{
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .iova = MOCK_APERTURE_START,
+ .size = PAGE_SIZE,
+ .vaddr = (__u64)buffer,
+ };
+
+ map_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ map_cmd.argsz = sizeof(map_cmd);
+ map_cmd.flags = 1 << 31;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ /* Requires a domain to be attached */
+ map_cmd.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+}
+
+TEST_F(iommufd, info_cmd)
+{
+ struct vfio_iommu_type1_info info_cmd = {};
+
+ /* Invalid argsz */
+ info_cmd.argsz = 1;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+
+ info_cmd.argsz = sizeof(info_cmd);
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_IOMMU_GET_INFO, &info_cmd));
+}
+
+TEST_F(iommufd, set_iommu_cmd)
+{
+ /* Requires a domain to be attached */
+ EXPECT_ERRNO(ENODEV,
+ ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1v2_IOMMU));
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, VFIO_SET_IOMMU, VFIO_TYPE1_IOMMU));
+}
+
+TEST_F(iommufd, vfio_ioas)
+{
+ struct iommu_vfio_ioas vfio_ioas_cmd = {
+ .size = sizeof(vfio_ioas_cmd),
+ .op = IOMMU_VFIO_IOAS_GET,
+ };
+ __u32 ioas_id;
+
+ /* ENODEV if there is no compat ioas */
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Invalid id for set */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_SET;
+ EXPECT_ERRNO(ENOENT, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Valid id for set*/
+ test_ioctl_ioas_alloc(&ioas_id);
+ vfio_ioas_cmd.ioas_id = ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+
+ /* Same id comes back from get */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ ASSERT_EQ(ioas_id, vfio_ioas_cmd.ioas_id);
+
+ /* Clear works */
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_CLEAR;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ vfio_ioas_cmd.op = IOMMU_VFIO_IOAS_GET;
+ EXPECT_ERRNO(ENODEV, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+}
+
+FIXTURE(vfio_compat_mock_domain)
+{
+ int fd;
+ uint32_t ioas_id;
+};
+
+FIXTURE_VARIANT(vfio_compat_mock_domain)
+{
+ unsigned int version;
+};
+
+FIXTURE_SETUP(vfio_compat_mock_domain)
+{
+ struct iommu_vfio_ioas vfio_ioas_cmd = {
+ .size = sizeof(vfio_ioas_cmd),
+ .op = IOMMU_VFIO_IOAS_SET,
+ };
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ ASSERT_NE(-1, self->fd);
+
+ /* Create what VFIO would consider a group */
+ test_ioctl_ioas_alloc(&self->ioas_id);
+ test_cmd_mock_domain(self->ioas_id, NULL, NULL, NULL);
+
+ /* Attach it to the vfio compat */
+ vfio_ioas_cmd.ioas_id = self->ioas_id;
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_VFIO_IOAS, &vfio_ioas_cmd));
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_SET_IOMMU, variant->version));
+}
+
+FIXTURE_TEARDOWN(vfio_compat_mock_domain)
+{
+ teardown_iommufd(self->fd, _metadata);
+}
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v2)
+{
+ .version = VFIO_TYPE1v2_IOMMU,
+};
+
+FIXTURE_VARIANT_ADD(vfio_compat_mock_domain, Ver1v0)
+{
+ .version = VFIO_TYPE1_IOMMU,
+};
+
+TEST_F(vfio_compat_mock_domain, simple_close)
+{
+}
+
+TEST_F(vfio_compat_mock_domain, option_huge_pages)
+{
+ struct iommu_option cmd = {
+ .size = sizeof(cmd),
+ .option_id = IOMMU_OPTION_HUGE_PAGES,
+ .op = IOMMU_OPTION_OP_GET,
+ .val64 = 3,
+ .object_id = self->ioas_id,
+ };
+
+ ASSERT_EQ(0, ioctl(self->fd, IOMMU_OPTION, &cmd));
+ if (variant->version == VFIO_TYPE1_IOMMU) {
+ ASSERT_EQ(0, cmd.val64);
+ } else {
+ ASSERT_EQ(1, cmd.val64);
+ }
+}
+
+/*
+ * Execute an ioctl command stored in buffer and check that the result does not
+ * overflow memory.
+ */
+static bool is_filled(const void *buf, uint8_t c, size_t len)
+{
+ const uint8_t *cbuf = buf;
+
+ for (; len; cbuf++, len--)
+ if (*cbuf != c)
+ return false;
+ return true;
+}
+
+#define ioctl_check_buf(fd, cmd) \
+ ({ \
+ size_t _cmd_len = *(__u32 *)buffer; \
+ \
+ memset(buffer + _cmd_len, 0xAA, BUFFER_SIZE - _cmd_len); \
+ ASSERT_EQ(0, ioctl(fd, cmd, buffer)); \
+ ASSERT_EQ(true, is_filled(buffer + _cmd_len, 0xAA, \
+ BUFFER_SIZE - _cmd_len)); \
+ })
+
+static void check_vfio_info_cap_chain(struct __test_metadata *_metadata,
+ struct vfio_iommu_type1_info *info_cmd)
+{
+ const struct vfio_info_cap_header *cap;
+
+ ASSERT_GE(info_cmd->argsz, info_cmd->cap_offset + sizeof(*cap));
+ cap = buffer + info_cmd->cap_offset;
+ while (true) {
+ size_t cap_size;
+
+ if (cap->next)
+ cap_size = (buffer + cap->next) - (void *)cap;
+ else
+ cap_size = (buffer + info_cmd->argsz) - (void *)cap;
+
+ switch (cap->id) {
+ case VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE: {
+ struct vfio_iommu_type1_info_cap_iova_range *data =
+ (void *)cap;
+
+ ASSERT_EQ(1, data->header.version);
+ ASSERT_EQ(1, data->nr_iovas);
+ EXPECT_EQ(MOCK_APERTURE_START,
+ data->iova_ranges[0].start);
+ EXPECT_EQ(MOCK_APERTURE_LAST, data->iova_ranges[0].end);
+ break;
+ }
+ case VFIO_IOMMU_TYPE1_INFO_DMA_AVAIL: {
+ struct vfio_iommu_type1_info_dma_avail *data =
+ (void *)cap;
+
+ ASSERT_EQ(1, data->header.version);
+ ASSERT_EQ(sizeof(*data), cap_size);
+ break;
+ }
+ default:
+ ASSERT_EQ(false, true);
+ break;
+ }
+ if (!cap->next)
+ break;
+
+ ASSERT_GE(info_cmd->argsz, cap->next + sizeof(*cap));
+ ASSERT_GE(buffer + cap->next, (void *)cap);
+ cap = buffer + cap->next;
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, get_info)
+{
+ struct vfio_iommu_type1_info *info_cmd = buffer;
+ unsigned int i;
+ size_t caplen;
+
+ /* Pre-cap ABI */
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = offsetof(struct vfio_iommu_type1_info, cap_offset),
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_NE(0, info_cmd->iova_pgsizes);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+
+ /* Read the cap chain size */
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = sizeof(*info_cmd),
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_NE(0, info_cmd->iova_pgsizes);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+ ASSERT_EQ(0, info_cmd->cap_offset);
+ ASSERT_LT(sizeof(*info_cmd), info_cmd->argsz);
+
+ /* Read the caps, kernel should never create a corrupted caps */
+ caplen = info_cmd->argsz;
+ for (i = sizeof(*info_cmd); i < caplen; i++) {
+ *info_cmd = (struct vfio_iommu_type1_info){
+ .argsz = i,
+ };
+ ioctl_check_buf(self->fd, VFIO_IOMMU_GET_INFO);
+ ASSERT_EQ(VFIO_IOMMU_INFO_PGSIZES | VFIO_IOMMU_INFO_CAPS,
+ info_cmd->flags);
+ if (!info_cmd->cap_offset)
+ continue;
+ check_vfio_info_cap_chain(_metadata, info_cmd);
+ }
+}
+
+static void shuffle_array(unsigned long *array, size_t nelms)
+{
+ unsigned int i;
+
+ /* Shuffle */
+ for (i = 0; i != nelms; i++) {
+ unsigned long tmp = array[i];
+ unsigned int other = rand() % (nelms - i);
+
+ array[i] = array[other];
+ array[other] = tmp;
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, map)
+{
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .argsz = sizeof(map_cmd),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .vaddr = (uintptr_t)buffer,
+ .size = BUFFER_SIZE,
+ .iova = MOCK_APERTURE_START,
+ };
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .argsz = sizeof(unmap_cmd),
+ .size = BUFFER_SIZE,
+ .iova = MOCK_APERTURE_START,
+ };
+ unsigned long pages_iova[BUFFER_SIZE / PAGE_SIZE];
+ unsigned int i;
+
+ /* Simple map/unmap */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+
+ /* UNMAP_FLAG_ALL requires 0 iova/size */
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ unmap_cmd.flags = VFIO_DMA_UNMAP_FLAG_ALL;
+ EXPECT_ERRNO(EINVAL, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+
+ unmap_cmd.iova = 0;
+ unmap_cmd.size = 0;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size);
+
+ /* Small pages */
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ map_cmd.iova = pages_iova[i] =
+ MOCK_APERTURE_START + i * PAGE_SIZE;
+ map_cmd.vaddr = (uintptr_t)buffer + i * PAGE_SIZE;
+ map_cmd.size = PAGE_SIZE;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+ }
+ shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+ unmap_cmd.flags = 0;
+ unmap_cmd.size = PAGE_SIZE;
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ unmap_cmd.iova = pages_iova[i];
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd));
+ }
+}
+
+TEST_F(vfio_compat_mock_domain, huge_map)
+{
+ size_t buf_size = HUGEPAGE_SIZE * 2;
+ struct vfio_iommu_type1_dma_map map_cmd = {
+ .argsz = sizeof(map_cmd),
+ .flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE,
+ .size = buf_size,
+ .iova = MOCK_APERTURE_START,
+ };
+ struct vfio_iommu_type1_dma_unmap unmap_cmd = {
+ .argsz = sizeof(unmap_cmd),
+ };
+ unsigned long pages_iova[16];
+ unsigned int i;
+ void *buf;
+
+ /* Test huge pages and splitting */
+ buf = mmap(0, buf_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE, -1,
+ 0);
+ ASSERT_NE(MAP_FAILED, buf);
+ map_cmd.vaddr = (uintptr_t)buf;
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd));
+
+ unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++)
+ pages_iova[i] = MOCK_APERTURE_START + (i * unmap_cmd.size);
+ shuffle_array(pages_iova, ARRAY_SIZE(pages_iova));
+
+ /* type1 mode can cut up larger mappings, type1v2 always fails */
+ for (i = 0; i != ARRAY_SIZE(pages_iova); i++) {
+ unmap_cmd.iova = pages_iova[i];
+ unmap_cmd.size = buf_size / ARRAY_SIZE(pages_iova);
+ if (variant->version == VFIO_TYPE1_IOMMU) {
+ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+ &unmap_cmd));
+ } else {
+ EXPECT_ERRNO(ENOENT,
+ ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA,
+ &unmap_cmd));
+ }
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_fail_nth.c b/tools/testing/selftests/iommu/iommufd_fail_nth.c
new file mode 100644
index 000000000000..f590417cd67a
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_fail_nth.c
@@ -0,0 +1,630 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES
+ *
+ * These tests are "kernel integrity" tests. They are looking for kernel
+ * WARN/OOPS/kasn/etc splats triggered by kernel sanitizers & debugging
+ * features. It does not attempt to verify that the system calls are doing what
+ * they are supposed to do.
+ *
+ * The basic philosophy is to run a sequence of calls that will succeed and then
+ * sweep every failure injection point on that call chain to look for
+ * interesting things in error handling.
+ *
+ * This test is best run with:
+ * echo 1 > /proc/sys/kernel/panic_on_warn
+ * If something is actually going wrong.
+ */
+#include <fcntl.h>
+#include <dirent.h>
+
+#define __EXPORTED_HEADERS__
+#include <linux/vfio.h>
+
+#include "iommufd_utils.h"
+
+static bool have_fault_injection;
+
+static int writeat(int dfd, const char *fn, const char *val)
+{
+ size_t val_len = strlen(val);
+ ssize_t res;
+ int fd;
+
+ fd = openat(dfd, fn, O_WRONLY);
+ if (fd == -1)
+ return -1;
+ res = write(fd, val, val_len);
+ assert(res == val_len);
+ close(fd);
+ return 0;
+}
+
+static __attribute__((constructor)) void setup_buffer(void)
+{
+ PAGE_SIZE = sysconf(_SC_PAGE_SIZE);
+
+ BUFFER_SIZE = 2*1024*1024;
+
+ buffer = mmap(0, BUFFER_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+}
+
+/*
+ * This sets up fail_injection in a way that is useful for this test.
+ * It does not attempt to restore things back to how they were.
+ */
+static __attribute__((constructor)) void setup_fault_injection(void)
+{
+ DIR *debugfs = opendir("/sys/kernel/debug/");
+ struct dirent *dent;
+
+ if (!debugfs)
+ return;
+
+ /* Allow any allocation call to be fault injected */
+ if (writeat(dirfd(debugfs), "failslab/ignore-gfp-wait", "N"))
+ return;
+ writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-wait", "N");
+ writeat(dirfd(debugfs), "fail_page_alloc/ignore-gfp-highmem", "N");
+
+ while ((dent = readdir(debugfs))) {
+ char fn[300];
+
+ if (strncmp(dent->d_name, "fail", 4) != 0)
+ continue;
+
+ /* We are looking for kernel splats, quiet down the log */
+ snprintf(fn, sizeof(fn), "%s/verbose", dent->d_name);
+ writeat(dirfd(debugfs), fn, "0");
+ }
+ closedir(debugfs);
+ have_fault_injection = true;
+}
+
+struct fail_nth_state {
+ int proc_fd;
+ unsigned int iteration;
+};
+
+static void fail_nth_first(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state)
+{
+ char buf[300];
+
+ snprintf(buf, sizeof(buf), "/proc/self/task/%u/fail-nth", getpid());
+ nth_state->proc_fd = open(buf, O_RDWR);
+ ASSERT_NE(-1, nth_state->proc_fd);
+}
+
+static bool fail_nth_next(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state,
+ int test_result)
+{
+ static const char disable_nth[] = "0";
+ char buf[300];
+
+ /*
+ * This is just an arbitrary limit based on the current kernel
+ * situation. Changes in the kernel can dramatically change the number of
+ * required fault injection sites, so if this hits it doesn't
+ * necessarily mean a test failure, just that the limit has to be made
+ * bigger.
+ */
+ ASSERT_GT(400, nth_state->iteration);
+ if (nth_state->iteration != 0) {
+ ssize_t res;
+ ssize_t res2;
+
+ buf[0] = 0;
+ /*
+ * Annoyingly disabling the nth can also fail. This means
+ * the test passed without triggering failure
+ */
+ res = pread(nth_state->proc_fd, buf, sizeof(buf), 0);
+ if (res == -1 && errno == EFAULT) {
+ buf[0] = '1';
+ buf[1] = '\n';
+ res = 2;
+ }
+
+ res2 = pwrite(nth_state->proc_fd, disable_nth,
+ ARRAY_SIZE(disable_nth) - 1, 0);
+ if (res2 == -1 && errno == EFAULT) {
+ res2 = pwrite(nth_state->proc_fd, disable_nth,
+ ARRAY_SIZE(disable_nth) - 1, 0);
+ buf[0] = '1';
+ buf[1] = '\n';
+ }
+ ASSERT_EQ(ARRAY_SIZE(disable_nth) - 1, res2);
+
+ /* printf(" nth %u result=%d nth=%u\n", nth_state->iteration,
+ test_result, atoi(buf)); */
+ fflush(stdout);
+ ASSERT_LT(1, res);
+ if (res != 2 || buf[0] != '0' || buf[1] != '\n')
+ return false;
+ } else {
+ /* printf(" nth %u result=%d\n", nth_state->iteration,
+ test_result); */
+ }
+ nth_state->iteration++;
+ return true;
+}
+
+/*
+ * This is called during the test to start failure injection. It allows the test
+ * to do some setup that has already been swept and thus reduce the required
+ * iterations.
+ */
+void __fail_nth_enable(struct __test_metadata *_metadata,
+ struct fail_nth_state *nth_state)
+{
+ char buf[300];
+ size_t len;
+
+ if (!nth_state->iteration)
+ return;
+
+ len = snprintf(buf, sizeof(buf), "%u", nth_state->iteration);
+ ASSERT_EQ(len, pwrite(nth_state->proc_fd, buf, len, 0));
+}
+#define fail_nth_enable() __fail_nth_enable(_metadata, _nth_state)
+
+#define TEST_FAIL_NTH(fixture_name, name) \
+ static int test_nth_##name(struct __test_metadata *_metadata, \
+ FIXTURE_DATA(fixture_name) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ *variant, \
+ struct fail_nth_state *_nth_state); \
+ TEST_F(fixture_name, name) \
+ { \
+ struct fail_nth_state nth_state = {}; \
+ int test_result = 0; \
+ \
+ if (!have_fault_injection) \
+ SKIP(return, \
+ "fault injection is not enabled in the kernel"); \
+ fail_nth_first(_metadata, &nth_state); \
+ ASSERT_EQ(0, test_nth_##name(_metadata, self, variant, \
+ &nth_state)); \
+ while (fail_nth_next(_metadata, &nth_state, test_result)) { \
+ fixture_name##_teardown(_metadata, self, variant); \
+ fixture_name##_setup(_metadata, self, variant); \
+ test_result = test_nth_##name(_metadata, self, \
+ variant, &nth_state); \
+ }; \
+ ASSERT_EQ(0, test_result); \
+ } \
+ static int test_nth_##name( \
+ struct __test_metadata __attribute__((unused)) *_metadata, \
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) __attribute__((unused)) \
+ *variant, \
+ struct fail_nth_state *_nth_state)
+
+FIXTURE(basic_fail_nth)
+{
+ int fd;
+ uint32_t access_id;
+};
+
+FIXTURE_SETUP(basic_fail_nth)
+{
+ self->fd = -1;
+ self->access_id = 0;
+}
+
+FIXTURE_TEARDOWN(basic_fail_nth)
+{
+ int rc;
+
+ if (self->access_id) {
+ /* The access FD holds the iommufd open until it closes */
+ rc = _test_cmd_destroy_access(self->access_id);
+ assert(rc == 0);
+ }
+ teardown_iommufd(self->fd, _metadata);
+}
+
+/* Cover ioas.c */
+TEST_FAIL_NTH(basic_fail_nth, basic)
+{
+ struct iommu_iova_range ranges[10];
+ uint32_t ioas_id;
+ __u64 iova;
+
+ fail_nth_enable();
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ {
+ struct iommu_ioas_iova_ranges ranges_cmd = {
+ .size = sizeof(ranges_cmd),
+ .num_iovas = ARRAY_SIZE(ranges),
+ .ioas_id = ioas_id,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+ if (ioctl(self->fd, IOMMU_IOAS_IOVA_RANGES, &ranges_cmd))
+ return -1;
+ }
+
+ {
+ struct iommu_ioas_allow_iovas allow_cmd = {
+ .size = sizeof(allow_cmd),
+ .ioas_id = ioas_id,
+ .num_iovas = 1,
+ .allowed_iovas = (uintptr_t)ranges,
+ };
+
+ ranges[0].start = 16*1024;
+ ranges[0].last = BUFFER_SIZE + 16 * 1024 * 600 - 1;
+ if (ioctl(self->fd, IOMMU_IOAS_ALLOW_IOVAS, &allow_cmd))
+ return -1;
+ }
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ {
+ struct iommu_ioas_copy copy_cmd = {
+ .size = sizeof(copy_cmd),
+ .flags = IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE,
+ .dst_ioas_id = ioas_id,
+ .src_ioas_id = ioas_id,
+ .src_iova = iova,
+ .length = sizeof(ranges),
+ };
+
+ if (ioctl(self->fd, IOMMU_IOAS_COPY, &copy_cmd))
+ return -1;
+ }
+
+ if (_test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE,
+ NULL))
+ return -1;
+ /* Failure path of no IOVA to unmap */
+ _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, BUFFER_SIZE, NULL);
+ return 0;
+}
+
+/* iopt_area_fill_domains() and iopt_area_fill_domain() */
+TEST_FAIL_NTH(basic_fail_nth, map_domain)
+{
+ uint32_t ioas_id;
+ __u32 stdev_id;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+ return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, map_two_domains)
+{
+ uint32_t ioas_id;
+ __u32 stdev_id2;
+ __u32 stdev_id;
+ __u32 hwpt_id2;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2,
+ NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id2))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id2, &hwpt_id2,
+ NULL))
+ return -1;
+ return 0;
+}
+
+TEST_FAIL_NTH(basic_fail_nth, access_rw)
+{
+ uint64_t tmp_big[4096];
+ uint32_t ioas_id;
+ uint16_t tmp[32];
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, 262144, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id, 0))
+ return -1;
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = self->access_id,
+ .access_rw = { .iova = iova,
+ .length = sizeof(tmp),
+ .uptr = (uintptr_t)tmp },
+ };
+
+ // READ
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_WRITE;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_cmd.access_rw.flags = MOCK_ACCESS_RW_SLOW_PATH |
+ MOCK_ACCESS_RW_WRITE;
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ }
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_RW,
+ .id = self->access_id,
+ .access_rw = { .iova = iova,
+ .flags = MOCK_ACCESS_RW_SLOW_PATH,
+ .length = sizeof(tmp_big),
+ .uptr = (uintptr_t)tmp_big },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ }
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+ return 0;
+}
+
+/* pages.c access functions */
+TEST_FAIL_NTH(basic_fail_nth, access_pin)
+{
+ uint32_t access_pages_id;
+ uint32_t ioas_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+ return -1;
+
+ fail_nth_enable();
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .id = self->access_id,
+ .access_pages = { .iova = iova,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ }
+
+ if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+ access_pages_id))
+ return -1;
+
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+ return 0;
+}
+
+/* iopt_pages_fill_xarray() */
+TEST_FAIL_NTH(basic_fail_nth, access_pin_domain)
+{
+ uint32_t access_pages_id;
+ uint32_t ioas_id;
+ __u32 stdev_id;
+ __u32 hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_set_temp_memory_limit(self->fd, 32))
+ return -1;
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, &hwpt_id, NULL))
+ return -1;
+
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, BUFFER_SIZE, &iova,
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ if (_test_cmd_create_access(self->fd, ioas_id, &self->access_id,
+ MOCK_FLAGS_ACCESS_CREATE_NEEDS_PIN_PAGES))
+ return -1;
+
+ fail_nth_enable();
+
+ {
+ struct iommu_test_cmd access_cmd = {
+ .size = sizeof(access_cmd),
+ .op = IOMMU_TEST_OP_ACCESS_PAGES,
+ .id = self->access_id,
+ .access_pages = { .iova = iova,
+ .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (ioctl(self->fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_ACCESS_RW),
+ &access_cmd))
+ return -1;
+ access_pages_id = access_cmd.access_pages.out_access_pages_id;
+ }
+
+ if (_test_cmd_destroy_access_pages(self->fd, self->access_id,
+ access_pages_id))
+ return -1;
+
+ if (_test_cmd_destroy_access(self->access_id))
+ return -1;
+ self->access_id = 0;
+
+ if (_test_ioctl_destroy(self->fd, stdev_id))
+ return -1;
+ return 0;
+}
+
+/* device.c */
+TEST_FAIL_NTH(basic_fail_nth, device)
+{
+ struct iommu_test_hw_info info;
+ uint32_t ioas_id;
+ uint32_t ioas_id2;
+ uint32_t stdev_id;
+ uint32_t idev_id;
+ uint32_t hwpt_id;
+ __u64 iova;
+
+ self->fd = open("/dev/iommu", O_RDWR);
+ if (self->fd == -1)
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id))
+ return -1;
+
+ if (_test_ioctl_ioas_alloc(self->fd, &ioas_id2))
+ return -1;
+
+ iova = MOCK_APERTURE_START;
+ if (_test_ioctl_ioas_map(self->fd, ioas_id, buffer, PAGE_SIZE, &iova,
+ IOMMU_IOAS_MAP_FIXED_IOVA |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+ if (_test_ioctl_ioas_map(self->fd, ioas_id2, buffer, PAGE_SIZE, &iova,
+ IOMMU_IOAS_MAP_FIXED_IOVA |
+ IOMMU_IOAS_MAP_WRITEABLE |
+ IOMMU_IOAS_MAP_READABLE))
+ return -1;
+
+ fail_nth_enable();
+
+ if (_test_cmd_mock_domain(self->fd, ioas_id, &stdev_id, NULL,
+ &idev_id))
+ return -1;
+
+ if (_test_cmd_get_hw_info(self->fd, idev_id, &info, sizeof(info), NULL))
+ return -1;
+
+ if (_test_cmd_hwpt_alloc(self->fd, idev_id, ioas_id, 0, &hwpt_id,
+ IOMMU_HWPT_DATA_NONE, 0, 0))
+ return -1;
+
+ if (_test_cmd_mock_domain_replace(self->fd, stdev_id, ioas_id2, NULL))
+ return -1;
+
+ if (_test_cmd_mock_domain_replace(self->fd, stdev_id, hwpt_id, NULL))
+ return -1;
+ return 0;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h
new file mode 100644
index 000000000000..8d2b46b2114d
--- /dev/null
+++ b/tools/testing/selftests/iommu/iommufd_utils.h
@@ -0,0 +1,686 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright (c) 2021-2022, NVIDIA CORPORATION & AFFILIATES */
+#ifndef __SELFTEST_IOMMUFD_UTILS
+#define __SELFTEST_IOMMUFD_UTILS
+
+#include <unistd.h>
+#include <stddef.h>
+#include <sys/fcntl.h>
+#include <sys/ioctl.h>
+#include <stdint.h>
+#include <assert.h>
+
+#include "../kselftest_harness.h"
+#include "../../../../drivers/iommu/iommufd/iommufd_test.h"
+
+/* Hack to make assertions more readable */
+#define _IOMMU_TEST_CMD(x) IOMMU_TEST_CMD
+
+/* Imported from include/asm-generic/bitops/generic-non-atomic.h */
+#define BITS_PER_BYTE 8
+#define BITS_PER_LONG __BITS_PER_LONG
+#define BIT_MASK(nr) (1UL << ((nr) % __BITS_PER_LONG))
+#define BIT_WORD(nr) ((nr) / __BITS_PER_LONG)
+
+static inline void set_bit(unsigned int nr, unsigned long *addr)
+{
+ unsigned long mask = BIT_MASK(nr);
+ unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
+
+ *p |= mask;
+}
+
+static inline bool test_bit(unsigned int nr, unsigned long *addr)
+{
+ return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG - 1)));
+}
+
+static void *buffer;
+static unsigned long BUFFER_SIZE;
+
+static unsigned long PAGE_SIZE;
+
+#define sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#define offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + sizeof_field(TYPE, MEMBER))
+
+/*
+ * Have the kernel check the refcount on pages. I don't know why a freshly
+ * mmap'd anon non-compound page starts out with a ref of 3
+ */
+#define check_refs(_ptr, _length, _refs) \
+ ({ \
+ struct iommu_test_cmd test_cmd = { \
+ .size = sizeof(test_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_REFS, \
+ .check_refs = { .length = _length, \
+ .uptr = (uintptr_t)(_ptr), \
+ .refs = _refs }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS), \
+ &test_cmd)); \
+ })
+
+static int _test_cmd_mock_domain(int fd, unsigned int ioas_id, __u32 *stdev_id,
+ __u32 *hwpt_id, __u32 *idev_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN,
+ .id = ioas_id,
+ .mock_domain = {},
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (stdev_id)
+ *stdev_id = cmd.mock_domain.out_stdev_id;
+ assert(cmd.id != 0);
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain.out_hwpt_id;
+ if (idev_id)
+ *idev_id = cmd.mock_domain.out_idev_id;
+ return 0;
+}
+#define test_cmd_mock_domain(ioas_id, stdev_id, hwpt_id, idev_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain(self->fd, ioas_id, stdev_id, \
+ hwpt_id, idev_id))
+#define test_err_mock_domain(_errno, ioas_id, stdev_id, hwpt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_mock_domain(self->fd, ioas_id, \
+ stdev_id, hwpt_id, NULL))
+
+static int _test_cmd_mock_domain_flags(int fd, unsigned int ioas_id,
+ __u32 stdev_flags, __u32 *stdev_id,
+ __u32 *hwpt_id, __u32 *idev_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN_FLAGS,
+ .id = ioas_id,
+ .mock_domain_flags = { .dev_flags = stdev_flags },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (stdev_id)
+ *stdev_id = cmd.mock_domain_flags.out_stdev_id;
+ assert(cmd.id != 0);
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain_flags.out_hwpt_id;
+ if (idev_id)
+ *idev_id = cmd.mock_domain_flags.out_idev_id;
+ return 0;
+}
+#define test_cmd_mock_domain_flags(ioas_id, flags, stdev_id, hwpt_id, idev_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, idev_id))
+#define test_err_mock_domain_flags(_errno, ioas_id, flags, stdev_id, hwpt_id) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_mock_domain_flags(self->fd, ioas_id, flags, \
+ stdev_id, hwpt_id, NULL))
+
+static int _test_cmd_mock_domain_replace(int fd, __u32 stdev_id, __u32 pt_id,
+ __u32 *hwpt_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_MOCK_DOMAIN_REPLACE,
+ .id = stdev_id,
+ .mock_domain_replace = {
+ .pt_id = pt_id,
+ },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ if (hwpt_id)
+ *hwpt_id = cmd.mock_domain_replace.pt_id;
+ return 0;
+}
+
+#define test_cmd_mock_domain_replace(stdev_id, pt_id) \
+ ASSERT_EQ(0, _test_cmd_mock_domain_replace(self->fd, stdev_id, pt_id, \
+ NULL))
+#define test_err_mock_domain_replace(_errno, stdev_id, pt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_mock_domain_replace(self->fd, stdev_id, \
+ pt_id, NULL))
+
+static int _test_cmd_hwpt_alloc(int fd, __u32 device_id, __u32 pt_id,
+ __u32 flags, __u32 *hwpt_id, __u32 data_type,
+ void *data, size_t data_len)
+{
+ struct iommu_hwpt_alloc cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .dev_id = device_id,
+ .pt_id = pt_id,
+ .data_type = data_type,
+ .data_len = data_len,
+ .data_uptr = (uint64_t)data,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ if (hwpt_id)
+ *hwpt_id = cmd.out_hwpt_id;
+ return 0;
+}
+
+#define test_cmd_hwpt_alloc(device_id, pt_id, flags, hwpt_id) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, \
+ 0))
+#define test_err_hwpt_alloc(_errno, device_id, pt_id, flags, hwpt_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_hwpt_alloc( \
+ self->fd, device_id, pt_id, flags, \
+ hwpt_id, IOMMU_HWPT_DATA_NONE, NULL, 0))
+
+#define test_cmd_hwpt_alloc_nested(device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, data_type, data, data_len))
+#define test_err_hwpt_alloc_nested(_errno, device_id, pt_id, flags, hwpt_id, \
+ data_type, data, data_len) \
+ EXPECT_ERRNO(_errno, \
+ _test_cmd_hwpt_alloc(self->fd, device_id, pt_id, flags, \
+ hwpt_id, data_type, data, data_len))
+
+#define test_cmd_hwpt_check_iotlb(hwpt_id, iotlb_id, expected) \
+ ({ \
+ struct iommu_test_cmd test_cmd = { \
+ .size = sizeof(test_cmd), \
+ .op = IOMMU_TEST_OP_MD_CHECK_IOTLB, \
+ .id = hwpt_id, \
+ .check_iotlb = { \
+ .id = iotlb_id, \
+ .iotlb = expected, \
+ }, \
+ }; \
+ ASSERT_EQ(0, \
+ ioctl(self->fd, \
+ _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_IOTLB), \
+ &test_cmd)); \
+ })
+
+#define test_cmd_hwpt_check_iotlb_all(hwpt_id, expected) \
+ ({ \
+ int i; \
+ for (i = 0; i < MOCK_NESTED_DOMAIN_IOTLB_NUM; i++) \
+ test_cmd_hwpt_check_iotlb(hwpt_id, i, expected); \
+ })
+
+static int _test_cmd_hwpt_invalidate(int fd, __u32 hwpt_id, void *reqs,
+ uint32_t data_type, uint32_t lreq,
+ uint32_t *nreqs)
+{
+ struct iommu_hwpt_invalidate cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = hwpt_id,
+ .data_type = data_type,
+ .data_uptr = (uint64_t)reqs,
+ .entry_len = lreq,
+ .entry_num = *nreqs,
+ };
+ int rc = ioctl(fd, IOMMU_HWPT_INVALIDATE, &cmd);
+ *nreqs = cmd.entry_num;
+ return rc;
+}
+
+#define test_cmd_hwpt_invalidate(hwpt_id, reqs, data_type, lreq, nreqs) \
+ ({ \
+ ASSERT_EQ(0, \
+ _test_cmd_hwpt_invalidate(self->fd, hwpt_id, reqs, \
+ data_type, lreq, nreqs)); \
+ })
+#define test_err_hwpt_invalidate(_errno, hwpt_id, reqs, data_type, lreq, \
+ nreqs) \
+ ({ \
+ EXPECT_ERRNO(_errno, _test_cmd_hwpt_invalidate( \
+ self->fd, hwpt_id, reqs, \
+ data_type, lreq, nreqs)); \
+ })
+
+static int _test_cmd_access_replace_ioas(int fd, __u32 access_id,
+ unsigned int ioas_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_ACCESS_REPLACE_IOAS,
+ .id = access_id,
+ .access_replace_ioas = { .ioas_id = ioas_id },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ return 0;
+}
+#define test_cmd_access_replace_ioas(access_id, ioas_id) \
+ ASSERT_EQ(0, _test_cmd_access_replace_ioas(self->fd, access_id, ioas_id))
+
+static int _test_cmd_set_dirty_tracking(int fd, __u32 hwpt_id, bool enabled)
+{
+ struct iommu_hwpt_set_dirty_tracking cmd = {
+ .size = sizeof(cmd),
+ .flags = enabled ? IOMMU_HWPT_DIRTY_TRACKING_ENABLE : 0,
+ .hwpt_id = hwpt_id,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_SET_DIRTY_TRACKING, &cmd);
+ if (ret)
+ return -errno;
+ return 0;
+}
+#define test_cmd_set_dirty_tracking(hwpt_id, enabled) \
+ ASSERT_EQ(0, _test_cmd_set_dirty_tracking(self->fd, hwpt_id, enabled))
+
+static int _test_cmd_get_dirty_bitmap(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u32 flags)
+{
+ struct iommu_hwpt_get_dirty_bitmap cmd = {
+ .size = sizeof(cmd),
+ .hwpt_id = hwpt_id,
+ .flags = flags,
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .data = (uintptr_t)bitmap,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_HWPT_GET_DIRTY_BITMAP, &cmd);
+ if (ret)
+ return ret;
+ return 0;
+}
+
+#define test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, \
+ bitmap, flags) \
+ ASSERT_EQ(0, _test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, \
+ page_size, bitmap, flags))
+
+static int _test_cmd_mock_domain_set_dirty(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ __u64 *bitmap, __u64 *dirty)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DIRTY,
+ .id = hwpt_id,
+ .dirty = {
+ .iova = iova,
+ .length = length,
+ .page_size = page_size,
+ .uptr = (uintptr_t)bitmap,
+ }
+ };
+ int ret;
+
+ ret = ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_DIRTY), &cmd);
+ if (ret)
+ return -ret;
+ if (dirty)
+ *dirty = cmd.dirty.out_nr_dirty;
+ return 0;
+}
+
+#define test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size, \
+ bitmap, nr) \
+ ASSERT_EQ(0, \
+ _test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, \
+ page_size, bitmap, nr))
+
+static int _test_mock_dirty_bitmaps(int fd, __u32 hwpt_id, size_t length,
+ __u64 iova, size_t page_size,
+ size_t pte_page_size, __u64 *bitmap,
+ __u64 bitmap_size, __u32 flags,
+ struct __test_metadata *_metadata)
+{
+ unsigned long npte = pte_page_size / page_size, pteset = 2 * npte;
+ unsigned long nbits = bitmap_size * BITS_PER_BYTE;
+ unsigned long j, i, nr = nbits / pteset ?: 1;
+ __u64 out_dirty = 0;
+
+ /* Mark all even bits as dirty in the mock domain */
+ memset(bitmap, 0, bitmap_size);
+ for (i = 0; i < nbits; i += pteset)
+ set_bit(i, (unsigned long *)bitmap);
+
+ test_cmd_mock_domain_set_dirty(fd, hwpt_id, length, iova, page_size,
+ bitmap, &out_dirty);
+ ASSERT_EQ(nr, out_dirty);
+
+ /* Expect all even bits as dirty in the user bitmap */
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+ /* Beware ASSERT_EQ() is two statements -- braces are not redundant! */
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(j < npte,
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
+ ASSERT_EQ(!(i % pteset), test_bit(i, (unsigned long *)bitmap));
+ }
+
+ memset(bitmap, 0, bitmap_size);
+ test_cmd_get_dirty_bitmap(fd, hwpt_id, length, iova, page_size, bitmap,
+ flags);
+
+ /* It as read already -- expect all zeroes */
+ for (i = 0; i < nbits; i += pteset) {
+ for (j = 0; j < pteset; j++) {
+ ASSERT_EQ(
+ (j < npte) &&
+ (flags &
+ IOMMU_HWPT_GET_DIRTY_BITMAP_NO_CLEAR),
+ test_bit(i + j, (unsigned long *)bitmap));
+ }
+ }
+
+ return 0;
+}
+#define test_mock_dirty_bitmaps(hwpt_id, length, iova, page_size, pte_size,\
+ bitmap, bitmap_size, flags, _metadata) \
+ ASSERT_EQ(0, _test_mock_dirty_bitmaps(self->fd, hwpt_id, length, iova, \
+ page_size, pte_size, bitmap, \
+ bitmap_size, flags, _metadata))
+
+static int _test_cmd_create_access(int fd, unsigned int ioas_id,
+ __u32 *access_id, unsigned int flags)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_CREATE_ACCESS,
+ .id = ioas_id,
+ .create_access = { .flags = flags },
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_TEST_CMD, &cmd);
+ if (ret)
+ return ret;
+ *access_id = cmd.create_access.out_access_fd;
+ return 0;
+}
+#define test_cmd_create_access(ioas_id, access_id, flags) \
+ ASSERT_EQ(0, _test_cmd_create_access(self->fd, ioas_id, access_id, \
+ flags))
+
+static int _test_cmd_destroy_access(unsigned int access_id)
+{
+ return close(access_id);
+}
+#define test_cmd_destroy_access(access_id) \
+ ASSERT_EQ(0, _test_cmd_destroy_access(access_id))
+
+static int _test_cmd_destroy_access_pages(int fd, unsigned int access_id,
+ unsigned int access_pages_id)
+{
+ struct iommu_test_cmd cmd = {
+ .size = sizeof(cmd),
+ .op = IOMMU_TEST_OP_DESTROY_ACCESS_PAGES,
+ .id = access_id,
+ .destroy_access_pages = { .access_pages_id = access_pages_id },
+ };
+ return ioctl(fd, IOMMU_TEST_CMD, &cmd);
+}
+#define test_cmd_destroy_access_pages(access_id, access_pages_id) \
+ ASSERT_EQ(0, _test_cmd_destroy_access_pages(self->fd, access_id, \
+ access_pages_id))
+#define test_err_destroy_access_pages(_errno, access_id, access_pages_id) \
+ EXPECT_ERRNO(_errno, _test_cmd_destroy_access_pages( \
+ self->fd, access_id, access_pages_id))
+
+static int _test_ioctl_destroy(int fd, unsigned int id)
+{
+ struct iommu_destroy cmd = {
+ .size = sizeof(cmd),
+ .id = id,
+ };
+ return ioctl(fd, IOMMU_DESTROY, &cmd);
+}
+#define test_ioctl_destroy(id) ASSERT_EQ(0, _test_ioctl_destroy(self->fd, id))
+
+static int _test_ioctl_ioas_alloc(int fd, __u32 *id)
+{
+ struct iommu_ioas_alloc cmd = {
+ .size = sizeof(cmd),
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_IOAS_ALLOC, &cmd);
+ if (ret)
+ return ret;
+ *id = cmd.out_ioas_id;
+ return 0;
+}
+#define test_ioctl_ioas_alloc(id) \
+ ({ \
+ ASSERT_EQ(0, _test_ioctl_ioas_alloc(self->fd, id)); \
+ ASSERT_NE(0, *(id)); \
+ })
+
+static int _test_ioctl_ioas_map(int fd, unsigned int ioas_id, void *buffer,
+ size_t length, __u64 *iova, unsigned int flags)
+{
+ struct iommu_ioas_map cmd = {
+ .size = sizeof(cmd),
+ .flags = flags,
+ .ioas_id = ioas_id,
+ .user_va = (uintptr_t)buffer,
+ .length = length,
+ };
+ int ret;
+
+ if (flags & IOMMU_IOAS_MAP_FIXED_IOVA)
+ cmd.iova = *iova;
+
+ ret = ioctl(fd, IOMMU_IOAS_MAP, &cmd);
+ *iova = cmd.iova;
+ return ret;
+}
+#define test_ioctl_ioas_map(buffer, length, iova_p) \
+ ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+ length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_err_ioctl_ioas_map(_errno, buffer, length, iova_p) \
+ EXPECT_ERRNO(_errno, \
+ _test_ioctl_ioas_map(self->fd, self->ioas_id, buffer, \
+ length, iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_id(ioas_id, buffer, length, iova_p) \
+ ASSERT_EQ(0, _test_ioctl_ioas_map(self->fd, ioas_id, buffer, length, \
+ iova_p, \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE))
+
+#define test_ioctl_ioas_map_fixed(buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, _test_ioctl_ioas_map( \
+ self->fd, self->ioas_id, buffer, length, \
+ &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+#define test_ioctl_ioas_map_fixed_id(ioas_id, buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ ASSERT_EQ(0, \
+ _test_ioctl_ioas_map( \
+ self->fd, ioas_id, buffer, length, &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+#define test_err_ioctl_ioas_map_fixed(_errno, buffer, length, iova) \
+ ({ \
+ __u64 __iova = iova; \
+ EXPECT_ERRNO(_errno, \
+ _test_ioctl_ioas_map( \
+ self->fd, self->ioas_id, buffer, length, \
+ &__iova, \
+ IOMMU_IOAS_MAP_FIXED_IOVA | \
+ IOMMU_IOAS_MAP_WRITEABLE | \
+ IOMMU_IOAS_MAP_READABLE)); \
+ })
+
+static int _test_ioctl_ioas_unmap(int fd, unsigned int ioas_id, uint64_t iova,
+ size_t length, uint64_t *out_len)
+{
+ struct iommu_ioas_unmap cmd = {
+ .size = sizeof(cmd),
+ .ioas_id = ioas_id,
+ .iova = iova,
+ .length = length,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_IOAS_UNMAP, &cmd);
+ if (out_len)
+ *out_len = cmd.length;
+ return ret;
+}
+#define test_ioctl_ioas_unmap(iova, length) \
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, iova, \
+ length, NULL))
+
+#define test_ioctl_ioas_unmap_id(ioas_id, iova, length) \
+ ASSERT_EQ(0, _test_ioctl_ioas_unmap(self->fd, ioas_id, iova, length, \
+ NULL))
+
+#define test_err_ioctl_ioas_unmap(_errno, iova, length) \
+ EXPECT_ERRNO(_errno, _test_ioctl_ioas_unmap(self->fd, self->ioas_id, \
+ iova, length, NULL))
+
+static int _test_ioctl_set_temp_memory_limit(int fd, unsigned int limit)
+{
+ struct iommu_test_cmd memlimit_cmd = {
+ .size = sizeof(memlimit_cmd),
+ .op = IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT,
+ .memory_limit = { .limit = limit },
+ };
+
+ return ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_SET_TEMP_MEMORY_LIMIT),
+ &memlimit_cmd);
+}
+
+#define test_ioctl_set_temp_memory_limit(limit) \
+ ASSERT_EQ(0, _test_ioctl_set_temp_memory_limit(self->fd, limit))
+
+#define test_ioctl_set_default_memory_limit() \
+ test_ioctl_set_temp_memory_limit(65536)
+
+static void teardown_iommufd(int fd, struct __test_metadata *_metadata)
+{
+ struct iommu_test_cmd test_cmd = {
+ .size = sizeof(test_cmd),
+ .op = IOMMU_TEST_OP_MD_CHECK_REFS,
+ .check_refs = { .length = BUFFER_SIZE,
+ .uptr = (uintptr_t)buffer },
+ };
+
+ if (fd == -1)
+ return;
+
+ EXPECT_EQ(0, close(fd));
+
+ fd = open("/dev/iommu", O_RDWR);
+ EXPECT_NE(-1, fd);
+ EXPECT_EQ(0, ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_MD_CHECK_REFS),
+ &test_cmd));
+ EXPECT_EQ(0, close(fd));
+}
+
+#define EXPECT_ERRNO(expected_errno, cmd) \
+ ({ \
+ ASSERT_EQ(-1, cmd); \
+ EXPECT_EQ(expected_errno, errno); \
+ })
+
+#endif
+
+/* @data can be NULL */
+static int _test_cmd_get_hw_info(int fd, __u32 device_id, void *data,
+ size_t data_len, uint32_t *capabilities)
+{
+ struct iommu_test_hw_info *info = (struct iommu_test_hw_info *)data;
+ struct iommu_hw_info cmd = {
+ .size = sizeof(cmd),
+ .dev_id = device_id,
+ .data_len = data_len,
+ .data_uptr = (uint64_t)data,
+ .out_capabilities = 0,
+ };
+ int ret;
+
+ ret = ioctl(fd, IOMMU_GET_HW_INFO, &cmd);
+ if (ret)
+ return ret;
+
+ assert(cmd.out_data_type == IOMMU_HW_INFO_TYPE_SELFTEST);
+
+ /*
+ * The struct iommu_test_hw_info should be the one defined
+ * by the current kernel.
+ */
+ assert(cmd.data_len == sizeof(struct iommu_test_hw_info));
+
+ /*
+ * Trailing bytes should be 0 if user buffer is larger than
+ * the data that kernel reports.
+ */
+ if (data_len > cmd.data_len) {
+ char *ptr = (char *)(data + cmd.data_len);
+ int idx = 0;
+
+ while (idx < data_len - cmd.data_len) {
+ assert(!*(ptr + idx));
+ idx++;
+ }
+ }
+
+ if (info) {
+ if (data_len >= offsetofend(struct iommu_test_hw_info, test_reg))
+ assert(info->test_reg == IOMMU_HW_INFO_SELFTEST_REGVAL);
+ if (data_len >= offsetofend(struct iommu_test_hw_info, flags))
+ assert(!info->flags);
+ }
+
+ if (capabilities)
+ *capabilities = cmd.out_capabilities;
+
+ return 0;
+}
+
+#define test_cmd_get_hw_info(device_id, data, data_len) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, data, \
+ data_len, NULL))
+
+#define test_err_get_hw_info(_errno, device_id, data, data_len) \
+ EXPECT_ERRNO(_errno, _test_cmd_get_hw_info(self->fd, device_id, data, \
+ data_len, NULL))
+
+#define test_cmd_get_hw_capabilities(device_id, caps, mask) \
+ ASSERT_EQ(0, _test_cmd_get_hw_info(self->fd, device_id, NULL, 0, &caps))
diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile
index 1c4448a843a4..50e9c299fc4a 100644
--- a/tools/testing/selftests/ipc/Makefile
+++ b/tools/testing/selftests/ipc/Makefile
@@ -10,7 +10,7 @@ ifeq ($(ARCH),x86_64)
CFLAGS := -DCONFIG_X86_64 -D__x86_64__
endif
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := msgque
diff --git a/tools/testing/selftests/ipc/msgque.c b/tools/testing/selftests/ipc/msgque.c
index 5ec4d9e18806..656c43c24044 100644
--- a/tools/testing/selftests/ipc/msgque.c
+++ b/tools/testing/selftests/ipc/msgque.c
@@ -69,7 +69,7 @@ int restore_queue(struct msgque_data *msgque)
printf("msgsnd failed (%m)\n");
ret = -errno;
goto destroy;
- };
+ }
}
return 0;
@@ -180,7 +180,7 @@ int fill_msgque(struct msgque_data *msgque)
IPC_NOWAIT) != 0) {
printf("First message send failed (%m)\n");
return -errno;
- };
+ }
msgbuf.mtype = ANOTHER_MSG_TYPE;
memcpy(msgbuf.mtext, ANOTHER_TEST_STRING, sizeof(ANOTHER_TEST_STRING));
@@ -188,7 +188,7 @@ int fill_msgque(struct msgque_data *msgque)
IPC_NOWAIT) != 0) {
printf("Second message send failed (%m)\n");
return -errno;
- };
+ }
return 0;
}
diff --git a/tools/testing/selftests/ir/ir_loopback.c b/tools/testing/selftests/ir/ir_loopback.c
index af7f9c7d59bc..f4a15cbdd5ea 100644
--- a/tools/testing/selftests/ir/ir_loopback.c
+++ b/tools/testing/selftests/ir/ir_loopback.c
@@ -26,10 +26,19 @@
#include "../kselftest.h"
#define TEST_SCANCODES 10
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define SYSFS_PATH_MAX 256
#define DNAME_PATH_MAX 256
+/*
+ * Support ancient lirc.h which does not have these values. Can be removed
+ * once RHEL 8 is no longer a relevant testing platform.
+ */
+#if RC_PROTO_MAX < 26
+#define RC_PROTO_RCMM12 24
+#define RC_PROTO_RCMM24 25
+#define RC_PROTO_RCMM32 26
+#endif
+
static const struct {
enum rc_proto proto;
const char *name;
diff --git a/tools/testing/selftests/ir/ir_loopback.sh b/tools/testing/selftests/ir/ir_loopback.sh
index b90dc9939f45..aff9299c9416 100755
--- a/tools/testing/selftests/ir/ir_loopback.sh
+++ b/tools/testing/selftests/ir/ir_loopback.sh
@@ -10,7 +10,7 @@ if [ $UID != 0 ]; then
fi
if ! /sbin/modprobe -q -n rc-loopback; then
- echo "ir_loopback: module rc-loopback is not found [SKIP]"
+ echo "ir_loopback: module rc-loopback is not found in /lib/modules/`uname -r` [SKIP]"
exit $ksft_skip
fi
diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile
index b4d39f6b5124..59a1e5379018 100644
--- a/tools/testing/selftests/kcmp/Makefile
+++ b/tools/testing/selftests/kcmp/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := kcmp_test
diff --git a/tools/testing/selftests/kcmp/kcmp_test.c b/tools/testing/selftests/kcmp/kcmp_test.c
index 6ea7b9f37a41..25110c7c0b3e 100644
--- a/tools/testing/selftests/kcmp/kcmp_test.c
+++ b/tools/testing/selftests/kcmp/kcmp_test.c
@@ -88,6 +88,9 @@ int main(int argc, char **argv)
int pid2 = getpid();
int ret;
+ ksft_print_header();
+ ksft_set_plan(3);
+
fd2 = open(kpath, O_RDWR, 0644);
if (fd2 < 0) {
perror("Can't open file");
@@ -152,7 +155,6 @@ int main(int argc, char **argv)
ksft_inc_pass_cnt();
}
- ksft_print_cnts();
if (ret)
ksft_exit_fail();
@@ -162,5 +164,5 @@ int main(int argc, char **argv)
waitpid(pid2, &status, P_ALL);
- return ksft_exit_pass();
+ return 0;
}
diff --git a/tools/testing/selftests/kexec/Makefile b/tools/testing/selftests/kexec/Makefile
index aa91d2063249..67fe7a46cb62 100644
--- a/tools/testing/selftests/kexec/Makefile
+++ b/tools/testing/selftests/kexec/Makefile
@@ -1,10 +1,10 @@
# SPDX-License-Identifier: GPL-2.0-only
# Makefile for kexec tests
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+ARCH_PROCESSED := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
-ifeq ($(ARCH),x86)
+ifeq ($(ARCH_PROCESSED),$(filter $(ARCH_PROCESSED),x86 ppc64le))
TEST_PROGS := test_kexec_load.sh test_kexec_file_load.sh
TEST_FILES := kexec_common_lib.sh
diff --git a/tools/testing/selftests/kexec/kexec_common_lib.sh b/tools/testing/selftests/kexec/kexec_common_lib.sh
index 43017cfe88f7..641ef05863b2 100755
--- a/tools/testing/selftests/kexec/kexec_common_lib.sh
+++ b/tools/testing/selftests/kexec/kexec_common_lib.sh
@@ -65,48 +65,42 @@ get_efivarfs_secureboot_mode()
return 0;
}
-get_efi_var_secureboot_mode()
+# On powerpc platform, check device-tree property
+# /proc/device-tree/ibm,secureboot/os-secureboot-enforcing
+# to detect secureboot state.
+get_ppc64_secureboot_mode()
{
- local efi_vars
- local secure_boot_file
- local setup_mode_file
- local secureboot_mode
- local setup_mode
-
- if [ ! -d "$efi_vars" ]; then
- log_skip "efi_vars is not enabled\n"
- fi
- secure_boot_file=$(find "$efi_vars" -name SecureBoot-* 2>/dev/null)
- setup_mode_file=$(find "$efi_vars" -name SetupMode-* 2>/dev/null)
- if [ -f "$secure_boot_file/data" ] && \
- [ -f "$setup_mode_file/data" ]; then
- secureboot_mode=`od -An -t u1 "$secure_boot_file/data"`
- setup_mode=`od -An -t u1 "$setup_mode_file/data"`
-
- if [ $secureboot_mode -eq 1 ] && [ $setup_mode -eq 0 ]; then
- log_info "secure boot mode enabled (CONFIG_EFI_VARS)"
- return 1;
- fi
+ local secure_boot_file="/proc/device-tree/ibm,secureboot/os-secureboot-enforcing"
+ # Check for secure boot file existence
+ if [ -f $secure_boot_file ]; then
+ log_info "Secureboot is enabled (Device tree)"
+ return 1;
fi
+ log_info "Secureboot is not enabled (Device tree)"
return 0;
}
+# Return the architecture of the system
+get_arch()
+{
+ echo $(arch)
+}
+
# Check efivar SecureBoot-$(the UUID) and SetupMode-$(the UUID).
-# The secure boot mode can be accessed either as the last integer
-# of "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*" or from
-# "od -An -t u1 /sys/firmware/efi/vars/SecureBoot-*/data". The efi
+# The secure boot mode can be accessed as the last integer of
+# "od -An -t u1 /sys/firmware/efi/efivars/SecureBoot-*". The efi
# SetupMode can be similarly accessed.
# Return 1 for SecureBoot mode enabled and SetupMode mode disabled.
get_secureboot_mode()
{
local secureboot_mode=0
+ local system_arch=$(get_arch)
- get_efivarfs_secureboot_mode
- secureboot_mode=$?
-
- # fallback to using the efi_var files
- if [ $secureboot_mode -eq 0 ]; then
- get_efi_var_secureboot_mode
+ if [ "$system_arch" == "ppc64le" ]; then
+ get_ppc64_secureboot_mode
+ secureboot_mode=$?
+ else
+ get_efivarfs_secureboot_mode
secureboot_mode=$?
fi
@@ -138,15 +132,20 @@ kconfig_enabled()
return 0
}
-# Attempt to get the kernel config first via proc, and then by
-# extracting it from the kernel image or the configs.ko using
-# scripts/extract-ikconfig.
+# Attempt to get the kernel config first by checking the modules directory
+# then via proc, and finally by extracting it from the kernel image or the
+# configs.ko using scripts/extract-ikconfig.
# Return 1 for found.
get_kconfig()
{
local proc_config="/proc/config.gz"
local module_dir="/lib/modules/`uname -r`"
- local configs_module="$module_dir/kernel/kernel/configs.ko"
+ local configs_module="$module_dir/kernel/kernel/configs.ko*"
+
+ if [ -f $module_dir/config ]; then
+ IKCONFIG=$module_dir/config
+ return 1
+ fi
if [ ! -f $proc_config ]; then
modprobe configs > /dev/null 2>&1
diff --git a/tools/testing/selftests/kexec/test_kexec_file_load.sh b/tools/testing/selftests/kexec/test_kexec_file_load.sh
index 2ff600388c30..c9ccb3c93d72 100755
--- a/tools/testing/selftests/kexec/test_kexec_file_load.sh
+++ b/tools/testing/selftests/kexec/test_kexec_file_load.sh
@@ -97,10 +97,11 @@ check_for_imasig()
check_for_modsig()
{
local module_sig_string="~Module signature appended~"
- local sig="$(tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE)"
local ret=0
- if [ "$sig" == "$module_sig_string" ]; then
+ tail --bytes $((${#module_sig_string} + 1)) $KERNEL_IMAGE | \
+ grep -q "$module_sig_string"
+ if [ $? -eq 0 ]; then
ret=1
log_info "kexec kernel image modsig signed"
else
@@ -225,8 +226,12 @@ get_secureboot_mode
secureboot=$?
# Are there pe and ima signatures
-check_for_pesig
-pe_signed=$?
+if [ "$(get_arch)" == 'ppc64le' ]; then
+ pe_signed=0
+else
+ check_for_pesig
+ pe_signed=$?
+fi
check_for_imasig
ima_signed=$?
diff --git a/tools/testing/selftests/kmod/kmod.sh b/tools/testing/selftests/kmod/kmod.sh
index 3702dbcc90a7..7189715d7960 100755
--- a/tools/testing/selftests/kmod/kmod.sh
+++ b/tools/testing/selftests/kmod/kmod.sh
@@ -1,18 +1,7 @@
#!/bin/bash
-#
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
-
# This is a stress test script for kmod, the kernel module loader. It uses
# test_kmod which exposes a series of knobs for the API for us so we can
# tweak each test in userspace rather than in kernelspace.
@@ -63,6 +52,8 @@ ALL_TESTS="$ALL_TESTS 0008:150:1"
ALL_TESTS="$ALL_TESTS 0009:150:1"
ALL_TESTS="$ALL_TESTS 0010:1:1"
ALL_TESTS="$ALL_TESTS 0011:1:1"
+ALL_TESTS="$ALL_TESTS 0012:1:1"
+ALL_TESTS="$ALL_TESTS 0013:1:1"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
@@ -128,7 +119,7 @@ test_reqs()
if [[ $KMOD_VERSION -le 19 ]]; then
echo "$0: You need at least kmod 20" >&2
echo "kmod <= 19 is buggy, for details see:" >&2
- echo "http://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
+ echo "https://git.kernel.org/cgit/utils/kernel/kmod/kmod.git/commit/libkmod/libkmod-module.c?id=fd44a98ae2eb5eb32161088954ab21e58e19dfc4" >&2
exit $ksft_skip
fi
@@ -341,7 +332,7 @@ kmod_test_0001_driver()
kmod_defaults_driver
config_num_threads 1
- printf '\000' >"$DIR"/config_test_driver
+ printf $NAME >"$DIR"/config_test_driver
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} MODULE_NOT_FOUND
}
@@ -352,7 +343,7 @@ kmod_test_0001_fs()
kmod_defaults_fs
config_num_threads 1
- printf '\000' >"$DIR"/config_test_fs
+ printf $NAME >"$DIR"/config_test_fs
config_trigger ${FUNCNAME[0]}
config_expect_result ${FUNCNAME[0]} -EINVAL
}
@@ -470,6 +461,38 @@ kmod_test_0011()
echo "$MODPROBE" > /proc/sys/kernel/modprobe
}
+kmod_check_visibility()
+{
+ local name="$1"
+ local cmd="$2"
+
+ modprobe $DEFAULT_KMOD_DRIVER
+
+ local priv=$(eval $cmd)
+ local unpriv=$(capsh --drop=CAP_SYSLOG -- -c "$cmd")
+
+ if [ "$priv" = "$unpriv" ] || \
+ [ "${priv:0:3}" = "0x0" ] || \
+ [ "${unpriv:0:3}" != "0x0" ] ; then
+ echo "${FUNCNAME[0]}: FAIL, $name visible to unpriv: '$priv' vs '$unpriv'" >&2
+ exit 1
+ else
+ echo "${FUNCNAME[0]}: OK!"
+ fi
+}
+
+kmod_test_0012()
+{
+ kmod_check_visibility /proc/modules \
+ "grep '^${DEFAULT_KMOD_DRIVER}\b' /proc/modules | awk '{print \$NF}'"
+}
+
+kmod_test_0013()
+{
+ kmod_check_visibility '/sys/module/*/sections/*' \
+ "cat /sys/module/${DEFAULT_KMOD_DRIVER}/sections/.*text | head -n1"
+}
+
list_tests()
{
echo "Test ID list:"
@@ -489,6 +512,8 @@ list_tests()
echo "0009 x $(get_test_count 0009) - multithreaded - push kmod_concurrent over max_modprobes for get_fs_type()"
echo "0010 x $(get_test_count 0010) - test nonexistent modprobe path"
echo "0011 x $(get_test_count 0011) - test completely disabling module autoloading"
+ echo "0012 x $(get_test_count 0012) - test /proc/modules address visibility under CAP_SYSLOG"
+ echo "0013 x $(get_test_count 0013) - test /sys/module/*/sections/* visibility under CAP_SYSLOG"
}
usage()
diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h
index 0ac49d91a260..541bf192e30e 100644
--- a/tools/testing/selftests/kselftest.h
+++ b/tools/testing/selftests/kselftest.h
@@ -1,20 +1,76 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * kselftest.h: kselftest framework return codes to include from
- * selftests.
+ * kselftest.h: low-level kselftest framework to include from
+ * selftest programs. When possible, please use
+ * kselftest_harness.h instead.
*
* Copyright (c) 2014 Shuah Khan <shuahkh@osg.samsung.com>
* Copyright (c) 2014 Samsung Electronics Co., Ltd.
*
+ * Using this API consists of first counting how many tests your code
+ * has to run, and then starting up the reporting:
+ *
+ * ksft_print_header();
+ * ksft_set_plan(total_number_of_tests);
+ *
+ * For each test, report any progress, debugging, etc with:
+ *
+ * ksft_print_msg(fmt, ...);
+ *
+ * and finally report the pass/fail/skip/xfail state of the test with one of:
+ *
+ * ksft_test_result(condition, fmt, ...);
+ * ksft_test_result_pass(fmt, ...);
+ * ksft_test_result_fail(fmt, ...);
+ * ksft_test_result_skip(fmt, ...);
+ * ksft_test_result_xfail(fmt, ...);
+ * ksft_test_result_error(fmt, ...);
+ * ksft_test_result_code(exit_code, test_name, fmt, ...);
+ *
+ * When all tests are finished, clean up and exit the program with one of:
+ *
+ * ksft_finished();
+ * ksft_exit(condition);
+ * ksft_exit_pass();
+ * ksft_exit_fail();
+ *
+ * If the program wants to report details on why the entire program has
+ * failed, it can instead exit with a message (this is usually done when
+ * the program is aborting before finishing all tests):
+ *
+ * ksft_exit_fail_msg(fmt, ...);
+ *
*/
#ifndef __KSELFTEST_H
#define __KSELFTEST_H
+#ifndef NOLIBC
#include <errno.h>
#include <stdlib.h>
#include <unistd.h>
#include <stdarg.h>
+#include <string.h>
#include <stdio.h>
+#endif
+
+#ifndef ARRAY_SIZE
+#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#endif
+
+/*
+ * gcc cpuid.h provides __cpuid_count() since v4.4.
+ * Clang/LLVM cpuid.h provides __cpuid_count() since v3.4.0.
+ *
+ * Provide local define for tests needing __cpuid_count() because
+ * selftests need to work in older environments that do not yet
+ * have __cpuid_count().
+ */
+#ifndef __cpuid_count
+#define __cpuid_count(level, count, a, b, c, d) \
+ __asm__ __volatile__ ("cpuid\n\t" \
+ : "=a" (a), "=b" (b), "=c" (c), "=d" (d) \
+ : "0" (level), "2" (count))
+#endif
/* define kselftest exit codes */
#define KSFT_PASS 0
@@ -23,6 +79,8 @@
#define KSFT_XPASS 3
#define KSFT_SKIP 4
+#define __printf(a, b) __attribute__((format(printf, a, b)))
+
/* counters */
struct ksft_count {
unsigned int ksft_pass;
@@ -36,7 +94,7 @@ struct ksft_count {
static struct ksft_count ksft_cnt;
static unsigned int ksft_plan;
-static inline int ksft_test_num(void)
+static inline unsigned int ksft_test_num(void)
{
return ksft_cnt.ksft_pass + ksft_cnt.ksft_fail +
ksft_cnt.ksft_xfail + ksft_cnt.ksft_xpass +
@@ -59,6 +117,15 @@ static inline int ksft_get_error_cnt(void) { return ksft_cnt.ksft_error; }
static inline void ksft_print_header(void)
{
+ /*
+ * Force line buffering; If stdout is not connected to a terminal, it
+ * will otherwise default to fully buffered, which can cause output
+ * duplication if there is content in the buffer when fork()ing. If
+ * there is a crash, line buffering also means the most recent output
+ * line will be visible.
+ */
+ setvbuf(stdout, NULL, _IOLBF, 0);
+
if (!(getenv("KSFT_TAP_LEVEL")))
printf("TAP version 13\n");
}
@@ -66,7 +133,7 @@ static inline void ksft_print_header(void)
static inline void ksft_set_plan(unsigned int plan)
{
ksft_plan = plan;
- printf("1..%d\n", ksft_plan);
+ printf("1..%u\n", ksft_plan);
}
static inline void ksft_print_cnts(void)
@@ -74,13 +141,13 @@ static inline void ksft_print_cnts(void)
if (ksft_plan != ksft_test_num())
printf("# Planned tests != run tests (%u != %u)\n",
ksft_plan, ksft_test_num());
- printf("# Pass %d Fail %d Xfail %d Xpass %d Skip %d Error %d\n",
+ printf("# Totals: pass:%u fail:%u xfail:%u xpass:%u skip:%u error:%u\n",
ksft_cnt.ksft_pass, ksft_cnt.ksft_fail,
ksft_cnt.ksft_xfail, ksft_cnt.ksft_xpass,
ksft_cnt.ksft_xskip, ksft_cnt.ksft_error);
}
-static inline void ksft_print_msg(const char *msg, ...)
+static inline __printf(1, 2) void ksft_print_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -92,7 +159,20 @@ static inline void ksft_print_msg(const char *msg, ...)
va_end(args);
}
-static inline void ksft_test_result_pass(const char *msg, ...)
+static inline void ksft_perror(const char *msg)
+{
+#ifndef NOLIBC
+ ksft_print_msg("%s: %s (%d)\n", msg, strerror(errno), errno);
+#else
+ /*
+ * nolibc doesn't provide strerror() and it seems
+ * inappropriate to add one, just print the errno.
+ */
+ ksft_print_msg("%s: %d)\n", msg, errno);
+#endif
+}
+
+static inline __printf(1, 2) void ksft_test_result_pass(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -100,13 +180,13 @@ static inline void ksft_test_result_pass(const char *msg, ...)
ksft_cnt.ksft_pass++;
va_start(args, msg);
- printf("ok %d ", ksft_test_num());
+ printf("ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_fail(const char *msg, ...)
+static inline __printf(1, 2) void ksft_test_result_fail(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -114,13 +194,39 @@ static inline void ksft_test_result_fail(const char *msg, ...)
ksft_cnt.ksft_fail++;
va_start(args, msg);
- printf("not ok %d ", ksft_test_num());
+ printf("not ok %u ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_skip(const char *msg, ...)
+/**
+ * ksft_test_result() - Report test success based on truth of condition
+ *
+ * @condition: if true, report test success, otherwise failure.
+ */
+#define ksft_test_result(condition, fmt, ...) do { \
+ if (!!(condition)) \
+ ksft_test_result_pass(fmt, ##__VA_ARGS__);\
+ else \
+ ksft_test_result_fail(fmt, ##__VA_ARGS__);\
+ } while (0)
+
+static inline __printf(1, 2) void ksft_test_result_xfail(const char *msg, ...)
+{
+ int saved_errno = errno;
+ va_list args;
+
+ ksft_cnt.ksft_xfail++;
+
+ va_start(args, msg);
+ printf("ok %u # XFAIL ", ksft_test_num());
+ errno = saved_errno;
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline __printf(1, 2) void ksft_test_result_skip(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -128,13 +234,14 @@ static inline void ksft_test_result_skip(const char *msg, ...)
ksft_cnt.ksft_xskip++;
va_start(args, msg);
- printf("not ok %d # SKIP ", ksft_test_num());
+ printf("ok %u # SKIP ", ksft_test_num());
errno = saved_errno;
vprintf(msg, args);
va_end(args);
}
-static inline void ksft_test_result_error(const char *msg, ...)
+/* TODO: how does "error" differ from "fail" or "skip"? */
+static inline __printf(1, 2) void ksft_test_result_error(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -142,9 +249,53 @@ static inline void ksft_test_result_error(const char *msg, ...)
ksft_cnt.ksft_error++;
va_start(args, msg);
- printf("not ok %d # error ", ksft_test_num());
+ printf("not ok %u # error ", ksft_test_num());
+ errno = saved_errno;
+ vprintf(msg, args);
+ va_end(args);
+}
+
+static inline __printf(3, 4)
+void ksft_test_result_code(int exit_code, const char *test_name,
+ const char *msg, ...)
+{
+ const char *tap_code = "ok";
+ const char *directive = "";
+ int saved_errno = errno;
+ va_list args;
+
+ switch (exit_code) {
+ case KSFT_PASS:
+ ksft_cnt.ksft_pass++;
+ break;
+ case KSFT_XFAIL:
+ directive = " # XFAIL ";
+ ksft_cnt.ksft_xfail++;
+ break;
+ case KSFT_XPASS:
+ directive = " # XPASS ";
+ ksft_cnt.ksft_xpass++;
+ break;
+ case KSFT_SKIP:
+ directive = " # SKIP ";
+ ksft_cnt.ksft_xskip++;
+ break;
+ case KSFT_FAIL:
+ default:
+ tap_code = "not ok";
+ ksft_cnt.ksft_fail++;
+ break;
+ }
+
+ /* Docs seem to call for double space if directive is absent */
+ if (!directive[0] && msg[0])
+ directive = " # ";
+
+ va_start(args, msg);
+ printf("%s %u %s%s", tap_code, ksft_test_num(), test_name, directive);
errno = saved_errno;
vprintf(msg, args);
+ printf("\n");
va_end(args);
}
@@ -156,12 +307,32 @@ static inline int ksft_exit_pass(void)
static inline int ksft_exit_fail(void)
{
- printf("Bail out!\n");
ksft_print_cnts();
exit(KSFT_FAIL);
}
-static inline int ksft_exit_fail_msg(const char *msg, ...)
+/**
+ * ksft_exit() - Exit selftest based on truth of condition
+ *
+ * @condition: if true, exit self test with success, otherwise fail.
+ */
+#define ksft_exit(condition) do { \
+ if (!!(condition)) \
+ ksft_exit_pass(); \
+ else \
+ ksft_exit_fail(); \
+ } while (0)
+
+/**
+ * ksft_finished() - Exit selftest with success if all tests passed
+ */
+#define ksft_finished() \
+ ksft_exit(ksft_plan == \
+ ksft_cnt.ksft_pass + \
+ ksft_cnt.ksft_xfail + \
+ ksft_cnt.ksft_xskip)
+
+static inline __printf(1, 2) int ksft_exit_fail_msg(const char *msg, ...)
{
int saved_errno = errno;
va_list args;
@@ -188,20 +359,32 @@ static inline int ksft_exit_xpass(void)
exit(KSFT_XPASS);
}
-static inline int ksft_exit_skip(const char *msg, ...)
+static inline __printf(1, 2) int ksft_exit_skip(const char *msg, ...)
{
- if (msg) {
- int saved_errno = errno;
- va_list args;
+ int saved_errno = errno;
+ va_list args;
- va_start(args, msg);
- printf("not ok %d # SKIP ", 1 + ksft_test_num());
+ va_start(args, msg);
+
+ /*
+ * FIXME: several tests misuse ksft_exit_skip so produce
+ * something sensible if some tests have already been run
+ * or a plan has been printed. Those tests should use
+ * ksft_test_result_skip or ksft_exit_fail_msg instead.
+ */
+ if (ksft_plan || ksft_test_num()) {
+ ksft_cnt.ksft_xskip++;
+ printf("ok %d # SKIP ", 1 + ksft_test_num());
+ } else {
+ printf("1..0 # SKIP ");
+ }
+ if (msg) {
errno = saved_errno;
vprintf(msg, args);
va_end(args);
- } else {
- ksft_print_cnts();
}
+ if (ksft_test_num())
+ ksft_print_cnts();
exit(KSFT_SKIP);
}
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
new file mode 100644
index 000000000000..f2fbb914e058
--- /dev/null
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# Helpers for outputting in KTAP format
+#
+KTAP_TESTNO=1
+KTAP_CNT_PASS=0
+KTAP_CNT_FAIL=0
+KTAP_CNT_SKIP=0
+
+KSFT_PASS=0
+KSFT_FAIL=1
+KSFT_XFAIL=2
+KSFT_XPASS=3
+KSFT_SKIP=4
+
+KSFT_NUM_TESTS=0
+
+ktap_print_header() {
+ echo "TAP version 13"
+}
+
+ktap_print_msg()
+{
+ echo "#" $@
+}
+
+ktap_set_plan() {
+ KSFT_NUM_TESTS="$1"
+
+ echo "1..$KSFT_NUM_TESTS"
+}
+
+ktap_skip_all() {
+ echo -n "1..0 # SKIP "
+ echo $@
+}
+
+__ktap_test() {
+ result="$1"
+ description="$2"
+ directive="$3" # optional
+
+ local directive_str=
+ [[ ! -z "$directive" ]] && directive_str="# $directive"
+
+ echo $result $KTAP_TESTNO $description $directive_str
+
+ KTAP_TESTNO=$((KTAP_TESTNO+1))
+}
+
+ktap_test_pass() {
+ description="$1"
+
+ result="ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_PASS=$((KTAP_CNT_PASS+1))
+}
+
+ktap_test_skip() {
+ description="$1"
+
+ result="ok"
+ directive="SKIP"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
+}
+
+ktap_test_fail() {
+ description="$1"
+
+ result="not ok"
+ __ktap_test "$result" "$description"
+
+ KTAP_CNT_FAIL=$((KTAP_CNT_FAIL+1))
+}
+
+ktap_test_result() {
+ description="$1"
+ shift
+
+ if $@; then
+ ktap_test_pass "$description"
+ else
+ ktap_test_fail "$description"
+ fi
+}
+
+ktap_exit_fail_msg() {
+ echo "Bail out! " $@
+ ktap_print_totals
+
+ exit "$KSFT_FAIL"
+}
+
+ktap_finished() {
+ ktap_print_totals
+
+ if [ $(("$KTAP_CNT_PASS" + "$KTAP_CNT_SKIP")) -eq "$KSFT_NUM_TESTS" ]; then
+ exit "$KSFT_PASS"
+ else
+ exit "$KSFT_FAIL"
+ fi
+}
+
+ktap_print_totals() {
+ echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+}
diff --git a/tools/testing/selftests/kselftest/prefix.pl b/tools/testing/selftests/kselftest/prefix.pl
index 31f7c2a0a8bd..12a7f4ca2684 100755
--- a/tools/testing/selftests/kselftest/prefix.pl
+++ b/tools/testing/selftests/kselftest/prefix.pl
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
# SPDX-License-Identifier: GPL-2.0
# Prefix all lines with "# ", unbuffered. Command being piped in may need
# to have unbuffering forced with "stdbuf -i0 -o0 -e0 $cmd".
diff --git a/tools/testing/selftests/kselftest/runner.sh b/tools/testing/selftests/kselftest/runner.sh
index 676b3a8b114d..74954f6a8f94 100644
--- a/tools/testing/selftests/kselftest/runner.sh
+++ b/tools/testing/selftests/kselftest/runner.sh
@@ -6,9 +6,11 @@ export skip_rc=4
export timeout_rc=124
export logfile=/dev/stdout
export per_test_logging=
+export RUN_IN_NETNS=
# Defaults for "settings" file fields:
-# "timeout" how many seconds to let each test run before failing.
+# "timeout" how many seconds to let each test run before running
+# over our soft timeout limit.
export kselftest_default_timeout=45
# There isn't a shell-agnostic way to find the path of a sourced file,
@@ -18,6 +20,8 @@ if [ -z "$BASE_DIR" ]; then
exit 1
fi
+TR_CMD=$(command -v tr)
+
# If Perl is unavailable, we must fall back to line-at-a-time prefixing
# with sed instead of unbuffered output.
tap_prefix()
@@ -33,9 +37,10 @@ tap_timeout()
{
# Make sure tests will time out if utility is available.
if [ -x /usr/bin/timeout ] ; then
- /usr/bin/timeout --foreground "$kselftest_timeout" "$1"
+ /usr/bin/timeout --foreground "$kselftest_timeout" \
+ /usr/bin/timeout "$kselftest_timeout" $1
else
- "$1"
+ $1
fi
}
@@ -43,44 +48,93 @@ run_one()
{
DIR="$1"
TEST="$2"
- NUM="$3"
+ local test_num="$3"
BASENAME_TEST=$(basename $TEST)
# Reset any "settings"-file variables.
export kselftest_timeout="$kselftest_default_timeout"
+
+ # Safe default if tr not available
+ kselftest_cmd_args_ref="KSELFTEST_ARGS"
+
+ # Optional arguments for this command, possibly defined as an
+ # environment variable built using the test executable in all
+ # uppercase and sanitized substituting non acceptable shell
+ # variable name characters with "_" as in:
+ #
+ # KSELFTEST_<UPPERCASE_SANITIZED_TESTNAME>_ARGS="<options>"
+ #
+ # e.g.
+ #
+ # rtctest --> KSELFTEST_RTCTEST_ARGS="/dev/rtc1"
+ #
+ # cpu-on-off-test.sh --> KSELFTEST_CPU_ON_OFF_TEST_SH_ARGS="-a -p 10"
+ #
+ if [ -n "$TR_CMD" ]; then
+ BASENAME_SANITIZED=$(echo "$BASENAME_TEST" | \
+ $TR_CMD -d "[:blank:][:cntrl:]" | \
+ $TR_CMD -c "[:alnum:]_" "_" | \
+ $TR_CMD [:lower:] [:upper:])
+ kselftest_cmd_args_ref="KSELFTEST_${BASENAME_SANITIZED}_ARGS"
+ fi
+
# Load per-test-directory kselftest "settings" file.
settings="$BASE_DIR/$DIR/settings"
if [ -r "$settings" ] ; then
while read line ; do
+ # Skip comments.
+ if echo "$line" | grep -q '^#'; then
+ continue
+ fi
field=$(echo "$line" | cut -d= -f1)
value=$(echo "$line" | cut -d= -f2-)
eval "kselftest_$field"="$value"
done < "$settings"
fi
+ # Command line timeout overrides the settings file
+ if [ -n "$kselftest_override_timeout" ]; then
+ kselftest_timeout="$kselftest_override_timeout"
+ echo "# overriding timeout to $kselftest_timeout" >> "$logfile"
+ else
+ echo "# timeout set to $kselftest_timeout" >> "$logfile"
+ fi
+
TEST_HDR_MSG="selftests: $DIR: $BASENAME_TEST"
echo "# $TEST_HDR_MSG"
- if [ ! -x "$TEST" ]; then
- echo -n "# Warning: file $TEST is "
- if [ ! -e "$TEST" ]; then
- echo "missing!"
- else
- echo "not executable, correct this."
- fi
+ if [ ! -e "$TEST" ]; then
+ echo "# Warning: file $TEST is missing!"
echo "not ok $test_num $TEST_HDR_MSG"
else
+ if [ -x /usr/bin/stdbuf ]; then
+ stdbuf="/usr/bin/stdbuf --output=L "
+ fi
+ eval kselftest_cmd_args="\$${kselftest_cmd_args_ref:-}"
+ cmd="$stdbuf ./$BASENAME_TEST $kselftest_cmd_args"
+ if [ ! -x "$TEST" ]; then
+ echo "# Warning: file $TEST is not executable"
+
+ if [ $(head -n 1 "$TEST" | cut -c -2) = "#!" ]
+ then
+ interpreter=$(head -n 1 "$TEST" | cut -c 3-)
+ cmd="$stdbuf $interpreter ./$BASENAME_TEST"
+ else
+ echo "not ok $test_num $TEST_HDR_MSG"
+ return
+ fi
+ fi
cd `dirname $TEST` > /dev/null
- ((((( tap_timeout ./$BASENAME_TEST 2>&1; echo $? >&3) |
+ ((((( tap_timeout "$cmd" 2>&1; echo $? >&3) |
tap_prefix >&4) 3>&1) |
(read xs; exit $xs)) 4>>"$logfile" &&
echo "ok $test_num $TEST_HDR_MSG") ||
(rc=$?; \
if [ $rc -eq $skip_rc ]; then \
- echo "not ok $test_num $TEST_HDR_MSG # SKIP"
+ echo "ok $test_num $TEST_HDR_MSG # SKIP"
elif [ $rc -eq $timeout_rc ]; then \
echo "#"
- echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT"
+ echo "not ok $test_num $TEST_HDR_MSG # TIMEOUT $kselftest_timeout seconds"
else
echo "not ok $test_num $TEST_HDR_MSG # exit=$rc"
fi)
@@ -88,6 +142,33 @@ run_one()
fi
}
+in_netns()
+{
+ local name=$1
+ ip netns exec $name bash <<-EOF
+ BASE_DIR=$BASE_DIR
+ source $BASE_DIR/kselftest/runner.sh
+ logfile=$logfile
+ run_one $DIR $TEST $test_num
+ EOF
+}
+
+run_in_netns()
+{
+ local netns=$(mktemp -u ${BASENAME_TEST}-XXXXXX)
+ local tmplog="/tmp/$(mktemp -u ${BASENAME_TEST}-XXXXXX)"
+ ip netns add $netns
+ if [ $? -ne 0 ]; then
+ echo "# Warning: Create namespace failed for $BASENAME_TEST"
+ echo "not ok $test_num selftests: $DIR: $BASENAME_TEST # Create NS failed"
+ fi
+ ip -n $netns link set lo up
+ in_netns $netns &> $tmplog
+ ip netns del $netns &> /dev/null
+ cat $tmplog
+ rm -f $tmplog
+}
+
run_many()
{
echo "TAP version 13"
@@ -102,6 +183,12 @@ run_many()
logfile="/tmp/$BASENAME_TEST"
cat /dev/null > "$logfile"
fi
- run_one "$DIR" "$TEST" "$test_num"
+ if [ -n "$RUN_IN_NETNS" ]; then
+ run_in_netns &
+ else
+ run_one "$DIR" "$TEST" "$test_num"
+ fi
done
+
+ wait
}
diff --git a/tools/testing/selftests/kselftest_deps.sh b/tools/testing/selftests/kselftest_deps.sh
index bbc04646346b..de59cc8f03c3 100755
--- a/tools/testing/selftests/kselftest_deps.sh
+++ b/tools/testing/selftests/kselftest_deps.sh
@@ -12,9 +12,9 @@ usage()
echo -e "Usage: $0 -[p] <compiler> [test_name]\n"
echo -e "\tkselftest_deps.sh [-p] gcc"
-echo -e "\tkselftest_deps.sh [-p] gcc vm"
+echo -e "\tkselftest_deps.sh [-p] gcc mm"
echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc"
-echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc vm\n"
+echo -e "\tkselftest_deps.sh [-p] aarch64-linux-gnu-gcc mm\n"
echo "- Should be run in selftests directory in the kernel repo."
echo "- Checks if Kselftests can be built/cross-built on a system."
echo "- Parses all test/sub-test Makefile to find library dependencies."
@@ -26,7 +26,7 @@ echo " main Makefile when optional -p is specified."
echo "- Prints pass/fail dependency check for each tests/sub-test."
echo "- Prints pass/fail targets and libraries."
echo "- Default: runs dependency checks on all tests."
-echo "- Optional test name can be specified to check dependencies for it."
+echo "- Optional: test name can be specified to check dependencies for it."
exit 1
}
@@ -46,11 +46,11 @@ fi
print_targets=0
while getopts "p" arg; do
- case $arg in
- p)
+ case $arg in
+ p)
print_targets=1
shift;;
- esac
+ esac
done
if [ $# -eq 0 ]
@@ -90,7 +90,11 @@ pass_libs=()
pass_cnt=0
# Get all TARGETS from selftests Makefile
-targets=$(egrep "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+targets=$(grep -E "^TARGETS +|^TARGETS =" Makefile | cut -d "=" -f2)
+
+# Initially, in LDLIBS related lines, the dep checker needs
+# to ignore lines containing the following strings:
+filter="\$(VAR_LDLIBS)\|pkg-config\|PKG_CONFIG\|IOURING_EXTRA_LIBS"
# Single test case
if [ $# -eq 2 ]
@@ -100,6 +104,8 @@ then
l1_test $test
l2_test $test
l3_test $test
+ l4_test $test
+ l5_test $test
print_results $1 $2
exit $?
@@ -113,38 +119,57 @@ fi
# Append space at the end of the list to append more tests.
l1_tests=$(grep -r --include=Makefile "^LDLIBS" | \
- grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+ grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 2: LDLIBS set dynamically.
#
# Level 2
# Some tests have multiple valid LDLIBS lines for individual sub-tests
# that need dependency checks. Find them and append them to the tests
-# e.g: vm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
+# e.g: mm/Makefile:$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
# Filter out VAR_LDLIBS to discard the following:
# memfd/Makefile:$(OUTPUT)/fuse_mnt: LDLIBS += $(VAR_LDLIBS)
# Append space at the end of the list to append more tests.
l2_tests=$(grep -r --include=Makefile ": LDLIBS" | \
- grep -v "VAR_LDLIBS" | awk -F: '{print $1}')
+ grep -v "$filter" | awk -F: '{print $1}' | uniq)
# Level 3
-# gpio, memfd and others use pkg-config to find mount and fuse libs
+# memfd and others use pkg-config to find mount and fuse libs
# respectively and save it in VAR_LDLIBS. If pkg-config doesn't find
# any, VAR_LDLIBS set to default.
# Use the default value and filter out pkg-config for dependency check.
# e.g:
-# gpio/Makefile
-# VAR_LDLIBS := $(shell pkg-config --libs mount) 2>/dev/null)
# memfd/Makefile
# VAR_LDLIBS := $(shell pkg-config fuse --libs 2>/dev/null)
l3_tests=$(grep -r --include=Makefile "^VAR_LDLIBS" | \
- grep -v "pkg-config" | awk -F: '{print $1}')
+ grep -v "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq)
-#echo $l1_tests
-#echo $l2_1_tests
-#echo $l3_tests
+# Level 4
+# some tests may fall back to default using `|| echo -l<libname>`
+# if pkg-config doesn't find the libs, instead of using VAR_LDLIBS
+# as per level 3 checks.
+# e.g:
+# netfilter/Makefile
+# LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+l4_tests=$(grep -r --include=Makefile "^LDLIBS" | \
+ grep "pkg-config\|PKG_CONFIG" | awk -F: '{print $1}' | uniq)
+
+# Level 5
+# some tests may use IOURING_EXTRA_LIBS to add extra libs to LDLIBS,
+# which in turn may be defined in a sub-Makefile
+# e.g.:
+# mm/Makefile
+# $(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+l5_tests=$(grep -r --include=Makefile "LDLIBS +=.*\$(IOURING_EXTRA_LIBS)" | \
+ awk -F: '{print $1}' | uniq)
+
+#echo l1_tests $l1_tests
+#echo l2_tests $l2_tests
+#echo l3_tests $l3_tests
+#echo l4_tests $l4_tests
+#echo l5_tests $l5_tests
all_tests
print_results $1 $2
@@ -166,24 +191,32 @@ all_tests()
for test in $l3_tests; do
l3_test $test
done
+
+ for test in $l4_tests; do
+ l4_test $test
+ done
+
+ for test in $l5_tests; do
+ l5_test $test
+ done
}
# Use same parsing used for l1_tests and pick libraries this time.
l1_test()
{
test_libs=$(grep --include=Makefile "^LDLIBS" $test | \
- grep -v "VAR_LDLIBS" | \
+ grep -v "$filter" | \
sed -e 's/\:/ /' | \
sed -e 's/+/ /' | cut -d "=" -f 2)
check_libs $test $test_libs
}
-# Use same parsing used for l2__tests and pick libraries this time.
+# Use same parsing used for l2_tests and pick libraries this time.
l2_test()
{
test_libs=$(grep --include=Makefile ": LDLIBS" $test | \
- grep -v "VAR_LDLIBS" | \
+ grep -v "$filter" | \
sed -e 's/\:/ /' | sed -e 's/+/ /' | \
cut -d "=" -f 2)
@@ -199,6 +232,24 @@ l3_test()
check_libs $test $test_libs
}
+l4_test()
+{
+ test_libs=$(grep --include=Makefile "^VAR_LDLIBS\|^LDLIBS" $test | \
+ grep "\(pkg-config\|PKG_CONFIG\).*|| echo " | \
+ sed -e 's/.*|| echo //' | sed -e 's/)$//')
+
+ check_libs $test $test_libs
+}
+
+l5_test()
+{
+ tests=$(find $(dirname "$test") -type f -name "*.mk")
+ test_libs=$(grep "^IOURING_EXTRA_LIBS +\?=" $tests | \
+ cut -d "=" -f 2)
+
+ check_libs $test $test_libs
+}
+
check_libs()
{
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index c9f03ef93338..4fd735e48ee7 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -50,17 +50,25 @@
#ifndef __KSELFTEST_HARNESS_H
#define __KSELFTEST_HARNESS_H
+#ifndef _GNU_SOURCE
#define _GNU_SOURCE
+#endif
#include <asm/types.h>
+#include <ctype.h>
#include <errno.h>
+#include <limits.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <setjmp.h>
+
+#include "kselftest.h"
#define TEST_TIMEOUT_DEFAULT 30
@@ -74,7 +82,7 @@
#endif
/**
- * TH_LOG(fmt, ...)
+ * TH_LOG()
*
* @fmt: format string
* @...: optional arguments
@@ -88,14 +96,6 @@
* E.g., #define TH_LOG_ENABLED 1
*
* If no definition is provided, logging is enabled by default.
- *
- * If there is no way to print an error message for the process running the
- * test (e.g. not allowed to write to stderr), it is still possible to get the
- * ASSERT_* number for which the test failed. This behavior can be enabled by
- * writing `_metadata->no_print = true;` before the check sequence that is
- * unable to print. When an error occur, instead of printing an error message
- * and calling `abort(3)`, the test process call `_exit(2)` with the assert
- * number as argument, which is then printed by the parent process.
*/
#define TH_LOG(fmt, ...) do { \
if (TH_LOG_ENABLED) \
@@ -104,32 +104,37 @@
/* Unconditional logger for internal use. */
#define __TH_LOG(fmt, ...) \
- fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \
+ fprintf(TH_LOG_STREAM, "# %s:%d:%s:" fmt "\n", \
__FILE__, __LINE__, _metadata->name, ##__VA_ARGS__)
/**
- * XFAIL(statement, fmt, ...)
+ * SKIP()
*
- * @statement: statement to run after reporting XFAIL
+ * @statement: statement to run after reporting SKIP
* @fmt: format string
* @...: optional arguments
*
- * This forces a "pass" after reporting a failure with an XFAIL prefix,
+ * .. code-block:: c
+ *
+ * SKIP(statement, fmt, ...);
+ *
+ * This forces a "pass" after reporting why something is being skipped
* and runs "statement", which is usually "return" or "goto skip".
*/
-#define XFAIL(statement, fmt, ...) do { \
+#define SKIP(statement, fmt, ...) do { \
+ snprintf(_metadata->results->reason, \
+ sizeof(_metadata->results->reason), fmt, ##__VA_ARGS__); \
if (TH_LOG_ENABLED) { \
- fprintf(TH_LOG_STREAM, "[ XFAIL! ] " fmt "\n", \
- ##__VA_ARGS__); \
+ fprintf(TH_LOG_STREAM, "# SKIP %s\n", \
+ _metadata->results->reason); \
} \
- /* TODO: find a way to pass xfail to test runner process. */ \
- _metadata->passed = 1; \
+ _metadata->exit_code = KSFT_SKIP; \
_metadata->trigger = 0; \
statement; \
} while (0)
/**
- * TEST(test_name) - Defines the test function and creates the registration
+ * TEST() - Defines the test function and creates the registration
* stub
*
* @test_name: test name
@@ -148,7 +153,7 @@
#define TEST(test_name) __TEST_IMPL(test_name, -1)
/**
- * TEST_SIGNAL(test_name, signal)
+ * TEST_SIGNAL()
*
* @test_name: test name
* @signal: signal number
@@ -172,7 +177,10 @@
struct __test_metadata *_metadata, \
struct __fixture_variant_metadata *variant) \
{ \
- test_name(_metadata); \
+ _metadata->setup_completed = true; \
+ if (setjmp(_metadata->env) == 0) \
+ test_name(_metadata); \
+ __test_check_assert(_metadata); \
} \
static struct __test_metadata _##test_name##_object = \
{ .name = #test_name, \
@@ -188,15 +196,16 @@
struct __test_metadata __attribute__((unused)) *_metadata)
/**
- * FIXTURE_DATA(datatype_name) - Wraps the struct name so we have one less
+ * FIXTURE_DATA() - Wraps the struct name so we have one less
* argument to pass around
*
* @datatype_name: datatype name
*
* .. code-block:: c
*
- * FIXTURE_DATA(datatype name)
+ * FIXTURE_DATA(datatype_name)
*
+ * Almost always, you want just FIXTURE() instead (see below).
* This call may be used when the type of the fixture data
* is needed. In general, this should not be needed unless
* the *self* is being passed to a helper directly.
@@ -204,14 +213,14 @@
#define FIXTURE_DATA(datatype_name) struct _test_data_##datatype_name
/**
- * FIXTURE(fixture_name) - Called once per fixture to setup the data and
+ * FIXTURE() - Called once per fixture to setup the data and
* register
*
* @fixture_name: fixture name
*
* .. code-block:: c
*
- * FIXTURE(datatype name) {
+ * FIXTURE(fixture_name) {
* type property1;
* ...
* };
@@ -231,14 +240,14 @@
FIXTURE_DATA(fixture_name)
/**
- * FIXTURE_SETUP(fixture_name) - Prepares the setup function for the fixture.
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * FIXTURE_SETUP() - Prepares the setup function for the fixture.
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
* .. code-block:: c
*
- * FIXTURE_SETUP(fixture name) { implementation }
+ * FIXTURE_SETUP(fixture_name) { implementation }
*
* Populates the required "setup" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
@@ -257,14 +266,14 @@
__attribute__((unused)) *variant)
/**
- * FIXTURE_TEARDOWN(fixture_name)
- * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
+ * FIXTURE_TEARDOWN()
+ * *_metadata* is included so that EXPECT_*, ASSERT_* etc. work correctly.
*
* @fixture_name: fixture name
*
* .. code-block:: c
*
- * FIXTURE_TEARDOWN(fixture name) { implementation }
+ * FIXTURE_TEARDOWN(fixture_name) { implementation }
*
* Populates the required "teardown" function for a fixture. An instance of the
* datatype defined with FIXTURE_DATA() will be exposed as *self* for the
@@ -275,29 +284,31 @@
#define FIXTURE_TEARDOWN(fixture_name) \
void fixture_name##_teardown( \
struct __test_metadata __attribute__((unused)) *_metadata, \
- FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+ FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+ const FIXTURE_VARIANT(fixture_name) \
+ __attribute__((unused)) *variant)
/**
- * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * FIXTURE_VARIANT() - Optionally called once per fixture
* to declare fixture variant
*
* @fixture_name: fixture name
*
* .. code-block:: c
*
- * FIXTURE_VARIANT(datatype name) {
+ * FIXTURE_VARIANT(fixture_name) {
* type property1;
* ...
* };
*
- * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
- * as *variant*. Variants allow the same tests to be run with different
- * arguments.
+ * Defines type of constant parameters provided to FIXTURE_SETUP(), TEST_F() and
+ * FIXTURE_TEARDOWN as *variant*. Variants allow the same tests to be run with
+ * different arguments.
*/
#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
/**
- * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * FIXTURE_VARIANT_ADD() - Called once per fixture
* variant to setup and register the data
*
* @fixture_name: fixture name
@@ -305,8 +316,8 @@
*
* .. code-block:: c
*
- * FIXTURE_ADD(datatype name) {
- * .property1 = val1;
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) {
+ * .property1 = val1,
* ...
* };
*
@@ -331,7 +342,7 @@
_##fixture_name##_##variant_name##_variant =
/**
- * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
+ * TEST_F() - Emits test registration and helpers for
* fixture-based test cases
*
* @fixture_name: fixture name
@@ -345,9 +356,11 @@
* Very similar to TEST() except that *self* is the setup instance of fixture's
* datatype exposed for use by the implementation.
*
- * Warning: use of ASSERT_* here will skip TEARDOWN.
+ * The @test_name code is run in a separate process sharing the same memory
+ * (i.e. vfork), which means that the test process can update its privileges
+ * without impacting the related FIXTURE_TEARDOWN() (e.g. to remove files from
+ * a directory where write access was dropped).
*/
-/* TODO(wad) register fixtures on dedicated test lists. */
#define TEST_F(fixture_name, test_name) \
__TEST_F_IMPL(fixture_name, test_name, -1, TEST_TIMEOUT_DEFAULT)
@@ -368,13 +381,35 @@
{ \
/* fixture data is alloced, setup, and torn down per call. */ \
FIXTURE_DATA(fixture_name) self; \
+ pid_t child = 1; \
+ int status = 0; \
memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
- fixture_name##_setup(_metadata, &self, variant->data); \
- /* Let setup failure terminate early. */ \
- if (!_metadata->passed) \
- return; \
- fixture_name##_##test_name(_metadata, &self, variant->data); \
- fixture_name##_teardown(_metadata, &self); \
+ if (setjmp(_metadata->env) == 0) { \
+ /* Use the same _metadata. */ \
+ child = vfork(); \
+ if (child == 0) { \
+ fixture_name##_setup(_metadata, &self, variant->data); \
+ /* Let setup failure terminate early. */ \
+ if (_metadata->exit_code) \
+ _exit(0); \
+ _metadata->setup_completed = true; \
+ fixture_name##_##test_name(_metadata, &self, variant->data); \
+ } else if (child < 0 || child != waitpid(child, &status, 0)) { \
+ ksft_print_msg("ERROR SPAWNING TEST GRANDCHILD\n"); \
+ _metadata->exit_code = KSFT_FAIL; \
+ } \
+ } \
+ if (child == 0) { \
+ if (_metadata->setup_completed && !_metadata->teardown_parent) \
+ fixture_name##_teardown(_metadata, &self, variant->data); \
+ _exit(0); \
+ } \
+ if (_metadata->setup_completed && _metadata->teardown_parent) \
+ fixture_name##_teardown(_metadata, &self, variant->data); \
+ if (!WIFEXITED(status) && WIFSIGNALED(status)) \
+ /* Forward signal to __wait_for_test(). */ \
+ kill(getpid(), WTERMSIG(status)); \
+ __test_check_assert(_metadata); \
} \
static struct __test_metadata \
_##fixture_name##_##test_name##_object = { \
@@ -383,6 +418,7 @@
.fixture = &_##fixture_name##_fixture_object, \
.termsig = signal, \
.timeout = tmout, \
+ .teardown_parent = false, \
}; \
static void __attribute__((constructor)) \
_register_##fixture_name##_##test_name(void) \
@@ -424,7 +460,7 @@
*/
/**
- * ASSERT_EQ(expected, seen)
+ * ASSERT_EQ()
*
* @expected: expected value
* @seen: measured value
@@ -435,7 +471,7 @@
__EXPECT(expected, #expected, seen, #seen, ==, 1)
/**
- * ASSERT_NE(expected, seen)
+ * ASSERT_NE()
*
* @expected: expected value
* @seen: measured value
@@ -446,7 +482,7 @@
__EXPECT(expected, #expected, seen, #seen, !=, 1)
/**
- * ASSERT_LT(expected, seen)
+ * ASSERT_LT()
*
* @expected: expected value
* @seen: measured value
@@ -457,7 +493,7 @@
__EXPECT(expected, #expected, seen, #seen, <, 1)
/**
- * ASSERT_LE(expected, seen)
+ * ASSERT_LE()
*
* @expected: expected value
* @seen: measured value
@@ -468,7 +504,7 @@
__EXPECT(expected, #expected, seen, #seen, <=, 1)
/**
- * ASSERT_GT(expected, seen)
+ * ASSERT_GT()
*
* @expected: expected value
* @seen: measured value
@@ -479,7 +515,7 @@
__EXPECT(expected, #expected, seen, #seen, >, 1)
/**
- * ASSERT_GE(expected, seen)
+ * ASSERT_GE()
*
* @expected: expected value
* @seen: measured value
@@ -490,7 +526,7 @@
__EXPECT(expected, #expected, seen, #seen, >=, 1)
/**
- * ASSERT_NULL(seen)
+ * ASSERT_NULL()
*
* @seen: measured value
*
@@ -500,7 +536,7 @@
__EXPECT(NULL, "NULL", seen, #seen, ==, 1)
/**
- * ASSERT_TRUE(seen)
+ * ASSERT_TRUE()
*
* @seen: measured value
*
@@ -510,7 +546,7 @@
__EXPECT(0, "0", seen, #seen, !=, 1)
/**
- * ASSERT_FALSE(seen)
+ * ASSERT_FALSE()
*
* @seen: measured value
*
@@ -520,7 +556,7 @@
__EXPECT(0, "0", seen, #seen, ==, 1)
/**
- * ASSERT_STREQ(expected, seen)
+ * ASSERT_STREQ()
*
* @expected: expected value
* @seen: measured value
@@ -531,7 +567,7 @@
__EXPECT_STR(expected, seen, ==, 1)
/**
- * ASSERT_STRNE(expected, seen)
+ * ASSERT_STRNE()
*
* @expected: expected value
* @seen: measured value
@@ -542,7 +578,7 @@
__EXPECT_STR(expected, seen, !=, 1)
/**
- * EXPECT_EQ(expected, seen)
+ * EXPECT_EQ()
*
* @expected: expected value
* @seen: measured value
@@ -553,7 +589,7 @@
__EXPECT(expected, #expected, seen, #seen, ==, 0)
/**
- * EXPECT_NE(expected, seen)
+ * EXPECT_NE()
*
* @expected: expected value
* @seen: measured value
@@ -564,7 +600,7 @@
__EXPECT(expected, #expected, seen, #seen, !=, 0)
/**
- * EXPECT_LT(expected, seen)
+ * EXPECT_LT()
*
* @expected: expected value
* @seen: measured value
@@ -575,7 +611,7 @@
__EXPECT(expected, #expected, seen, #seen, <, 0)
/**
- * EXPECT_LE(expected, seen)
+ * EXPECT_LE()
*
* @expected: expected value
* @seen: measured value
@@ -586,7 +622,7 @@
__EXPECT(expected, #expected, seen, #seen, <=, 0)
/**
- * EXPECT_GT(expected, seen)
+ * EXPECT_GT()
*
* @expected: expected value
* @seen: measured value
@@ -597,7 +633,7 @@
__EXPECT(expected, #expected, seen, #seen, >, 0)
/**
- * EXPECT_GE(expected, seen)
+ * EXPECT_GE()
*
* @expected: expected value
* @seen: measured value
@@ -608,7 +644,7 @@
__EXPECT(expected, #expected, seen, #seen, >=, 0)
/**
- * EXPECT_NULL(seen)
+ * EXPECT_NULL()
*
* @seen: measured value
*
@@ -618,7 +654,7 @@
__EXPECT(NULL, "NULL", seen, #seen, ==, 0)
/**
- * EXPECT_TRUE(seen)
+ * EXPECT_TRUE()
*
* @seen: measured value
*
@@ -628,7 +664,7 @@
__EXPECT(0, "0", seen, #seen, !=, 0)
/**
- * EXPECT_FALSE(seen)
+ * EXPECT_FALSE()
*
* @seen: measured value
*
@@ -638,7 +674,7 @@
__EXPECT(0, "0", seen, #seen, ==, 0)
/**
- * EXPECT_STREQ(expected, seen)
+ * EXPECT_STREQ()
*
* @expected: expected value
* @seen: measured value
@@ -649,7 +685,7 @@
__EXPECT_STR(expected, seen, ==, 0)
/**
- * EXPECT_STRNE(expected, seen)
+ * EXPECT_STRNE()
*
* @expected: expected value
* @seen: measured value
@@ -659,7 +695,9 @@
#define EXPECT_STRNE(expected, seen) \
__EXPECT_STR(expected, seen, !=, 0)
+#ifndef ARRAY_SIZE
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#endif
/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is
* not thread-safe, but it should be fine in most sane test scenarios.
@@ -669,24 +707,51 @@
*/
#define OPTIONAL_HANDLER(_assert) \
for (; _metadata->trigger; _metadata->trigger = \
- __bail(_assert, _metadata->no_print, _metadata->step))
+ __bail(_assert, _metadata))
-#define __INC_STEP(_metadata) \
- if (_metadata->passed && _metadata->step < 255) \
- _metadata->step++;
+#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1))
#define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \
/* Avoid multiple evaluation of the cases */ \
__typeof__(_expected) __exp = (_expected); \
__typeof__(_seen) __seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(__exp _t __seen)) { \
- unsigned long long __exp_print = (uintptr_t)__exp; \
- unsigned long long __seen_print = (uintptr_t)__seen; \
- __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
- _expected_str, __exp_print, #_t, \
- _seen_str, __seen_print); \
- _metadata->passed = 0; \
+ /* Report with actual signedness to avoid weird output. */ \
+ switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \
+ case 0: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 1: { \
+ unsigned long long __exp_print = (uintptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%llu) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 2: { \
+ long long __exp_print = (intptr_t)__exp; \
+ unsigned long long __seen_print = (uintptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%llu)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ case 3: { \
+ long long __exp_print = (intptr_t)__exp; \
+ long long __seen_print = (intptr_t)__seen; \
+ __TH_LOG("Expected %s (%lld) %s %s (%lld)", \
+ _expected_str, __exp_print, #_t, \
+ _seen_str, __seen_print); \
+ break; \
+ } \
+ } \
+ _metadata->exit_code = KSFT_FAIL; \
/* Ensure the optional handler is triggered */ \
_metadata->trigger = 1; \
} \
@@ -695,10 +760,9 @@
#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \
const char *__exp = (_expected); \
const char *__seen = (_seen); \
- if (_assert) __INC_STEP(_metadata); \
if (!(strcmp(__exp, __seen) _t 0)) { \
__TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \
- _metadata->passed = 0; \
+ _metadata->exit_code = KSFT_FAIL; \
_metadata->trigger = 1; \
} \
} while (0); OPTIONAL_HANDLER(_assert)
@@ -726,6 +790,10 @@
} \
}
+struct __test_results {
+ char reason[1024]; /* Reason for test result */
+};
+
struct __test_metadata;
struct __fixture_variant_metadata;
@@ -740,6 +808,37 @@ struct __fixture_metadata {
.prev = &_fixture_global,
};
+struct __test_xfail {
+ struct __fixture_metadata *fixture;
+ struct __fixture_variant_metadata *variant;
+ struct __test_metadata *test;
+ struct __test_xfail *prev, *next;
+};
+
+/**
+ * XFAIL_ADD() - mark variant + test case combination as expected to fail
+ * @fixture_name: name of the fixture
+ * @variant_name: name of the variant
+ * @test_name: name of the test case
+ *
+ * Mark a combination of variant + test case for a given fixture as expected
+ * to fail. Tests marked this way will report XPASS / XFAIL return codes,
+ * instead of PASS / FAIL,and use respective counters.
+ */
+#define XFAIL_ADD(fixture_name, variant_name, test_name) \
+ static struct __test_xfail \
+ _##fixture_name##_##variant_name##_##test_name##_xfail = \
+ { \
+ .fixture = &_##fixture_name##_fixture_object, \
+ .variant = &_##fixture_name##_##variant_name##_object, \
+ .test = &_##fixture_name##_##test_name##_object, \
+ }; \
+ static void __attribute__((constructor)) \
+ _register_##fixture_name##_##variant_name##_##test_name##_xfail(void) \
+ { \
+ __register_xfail(&_##fixture_name##_##variant_name##_##test_name##_xfail); \
+ }
+
static struct __fixture_metadata *__fixture_list = &_fixture_global;
static int __constructor_order;
@@ -754,6 +853,7 @@ static inline void __register_fixture(struct __fixture_metadata *f)
struct __fixture_variant_metadata {
const char *name;
const void *data;
+ struct __test_xfail *xfails;
struct __fixture_variant_metadata *prev, *next;
};
@@ -772,15 +872,24 @@ struct __test_metadata {
pid_t pid; /* pid of test when being run */
struct __fixture_metadata *fixture;
int termsig;
- int passed;
+ int exit_code;
int trigger; /* extra handler after the evaluation */
int timeout; /* seconds to wait for test timeout */
bool timed_out; /* did this test timeout instead of exiting? */
- __u8 step;
- bool no_print; /* manual trigger when TH_LOG_STREAM is not available */
+ bool aborted; /* stopped test due to failed ASSERT */
+ bool setup_completed; /* did setup finish? */
+ bool teardown_parent; /* run teardown in a parent process */
+ jmp_buf env; /* for exiting out of test early */
+ struct __test_results *results;
struct __test_metadata *prev, *next;
};
+static inline bool __test_passed(struct __test_metadata *metadata)
+{
+ return metadata->exit_code != KSFT_FAIL &&
+ metadata->exit_code <= KSFT_SKIP;
+}
+
/*
* Since constructors are called in reverse order, reverse the test
* list so tests are run in source declaration order.
@@ -795,16 +904,28 @@ static inline void __register_test(struct __test_metadata *t)
__LIST_APPEND(t->fixture->tests, t);
}
-static inline int __bail(int for_realz, bool no_print, __u8 step)
+static inline void __register_xfail(struct __test_xfail *xf)
{
+ __LIST_APPEND(xf->variant->xfails, xf);
+}
+
+static inline int __bail(int for_realz, struct __test_metadata *t)
+{
+ /* if this is ASSERT, return immediately. */
if (for_realz) {
- if (no_print)
- _exit(step);
- abort();
+ t->aborted = true;
+ longjmp(t->env, 1);
}
+ /* otherwise, end the for loop and continue. */
return 0;
}
+static inline void __test_check_assert(struct __test_metadata *t)
+{
+ if (t->aborted)
+ abort();
+}
+
struct __test_metadata *__active_test;
static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
{
@@ -813,18 +934,19 @@ static void __timeout_handler(int sig, siginfo_t *info, void *ucontext)
/* Sanity check handler execution environment. */
if (!t) {
fprintf(TH_LOG_STREAM,
- "no active test in SIGALRM handler!?\n");
+ "# no active test in SIGALRM handler!?\n");
abort();
}
if (sig != SIGALRM || sig != info->si_signo) {
fprintf(TH_LOG_STREAM,
- "%s: SIGALRM handler caught signal %d!?\n",
+ "# %s: SIGALRM handler caught signal %d!?\n",
t->name, sig != SIGALRM ? sig : info->si_signo);
abort();
}
t->timed_out = true;
- kill(t->pid, SIGKILL);
+ // signal process group
+ kill(-(t->pid), SIGKILL);
}
void __wait_for_test(struct __test_metadata *t)
@@ -837,9 +959,9 @@ void __wait_for_test(struct __test_metadata *t)
int status;
if (sigaction(SIGALRM, &action, &saved_action)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "%s: unable to install SIGALRM handler\n",
+ "# %s: unable to install SIGALRM handler\n",
t->name);
return;
}
@@ -849,120 +971,300 @@ void __wait_for_test(struct __test_metadata *t)
waitpid(t->pid, &status, 0);
alarm(0);
if (sigaction(SIGALRM, &saved_action, NULL)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "%s: unable to uninstall SIGALRM handler\n",
+ "# %s: unable to uninstall SIGALRM handler\n",
t->name);
return;
}
__active_test = NULL;
if (t->timed_out) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by timeout\n", t->name);
+ "# %s: Test terminated by timeout\n", t->name);
} else if (WIFEXITED(status)) {
- t->passed = t->termsig == -1 ? !WEXITSTATUS(status) : 0;
- if (t->termsig != -1) {
- fprintf(TH_LOG_STREAM,
- "%s: Test exited normally "
- "instead of by signal (code: %d)\n",
- t->name,
- WEXITSTATUS(status));
- } else if (!t->passed) {
+ if (WEXITSTATUS(status) == KSFT_SKIP ||
+ WEXITSTATUS(status) == KSFT_XPASS ||
+ WEXITSTATUS(status) == KSFT_XFAIL) {
+ t->exit_code = WEXITSTATUS(status);
+ } else if (t->termsig != -1) {
+ t->exit_code = KSFT_FAIL;
fprintf(TH_LOG_STREAM,
- "%s: Test failed at step #%d\n",
+ "# %s: Test exited normally instead of by signal (code: %d)\n",
t->name,
WEXITSTATUS(status));
+ } else {
+ switch (WEXITSTATUS(status)) {
+ /* Success */
+ case KSFT_PASS:
+ t->exit_code = KSFT_PASS;
+ break;
+ /* Failure */
+ default:
+ t->exit_code = KSFT_FAIL;
+ fprintf(TH_LOG_STREAM,
+ "# %s: Test failed\n",
+ t->name);
+ }
}
} else if (WIFSIGNALED(status)) {
- t->passed = 0;
+ t->exit_code = KSFT_FAIL;
if (WTERMSIG(status) == SIGABRT) {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated by assertion\n",
+ "# %s: Test terminated by assertion\n",
t->name);
} else if (WTERMSIG(status) == t->termsig) {
- t->passed = 1;
+ t->exit_code = KSFT_PASS;
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test terminated unexpectedly "
- "by signal %d\n",
+ "# %s: Test terminated unexpectedly by signal %d\n",
t->name,
WTERMSIG(status));
}
} else {
fprintf(TH_LOG_STREAM,
- "%s: Test ended in some other way [%u]\n",
+ "# %s: Test ended in some other way [%u]\n",
t->name,
status);
}
}
+static void test_harness_list_tests(void)
+{
+ struct __fixture_variant_metadata *v;
+ struct __fixture_metadata *f;
+ struct __test_metadata *t;
+
+ for (f = __fixture_list; f; f = f->next) {
+ v = f->variant;
+ t = f->tests;
+
+ if (f == __fixture_list)
+ fprintf(stderr, "%-20s %-25s %s\n",
+ "# FIXTURE", "VARIANT", "TEST");
+ else
+ fprintf(stderr, "--------------------------------------------------------------------------------\n");
+
+ do {
+ fprintf(stderr, "%-20s %-25s %s\n",
+ t == f->tests ? f->name : "",
+ v ? v->name : "",
+ t ? t->name : "");
+
+ v = v ? v->next : NULL;
+ t = t ? t->next : NULL;
+ } while (v || t);
+ }
+}
+
+static int test_harness_argv_check(int argc, char **argv)
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hlF:f:V:v:t:T:r:")) != -1) {
+ switch (opt) {
+ case 'f':
+ case 'F':
+ case 'v':
+ case 'V':
+ case 't':
+ case 'T':
+ case 'r':
+ break;
+ case 'l':
+ test_harness_list_tests();
+ return KSFT_SKIP;
+ case 'h':
+ default:
+ fprintf(stderr,
+ "Usage: %s [-h|-l] [-t|-T|-v|-V|-f|-F|-r name]\n"
+ "\t-h print help\n"
+ "\t-l list all tests\n"
+ "\n"
+ "\t-t name include test\n"
+ "\t-T name exclude test\n"
+ "\t-v name include variant\n"
+ "\t-V name exclude variant\n"
+ "\t-f name include fixture\n"
+ "\t-F name exclude fixture\n"
+ "\t-r name run specified test\n"
+ "\n"
+ "Test filter options can be specified "
+ "multiple times. The filtering stops\n"
+ "at the first match. For example to "
+ "include all tests from variant 'bla'\n"
+ "but not test 'foo' specify '-T foo -v bla'.\n"
+ "", argv[0]);
+ return opt == 'h' ? KSFT_SKIP : KSFT_FAIL;
+ }
+ }
+
+ return KSFT_PASS;
+}
+
+static bool test_enabled(int argc, char **argv,
+ struct __fixture_metadata *f,
+ struct __fixture_variant_metadata *v,
+ struct __test_metadata *t)
+{
+ unsigned int flen = 0, vlen = 0, tlen = 0;
+ bool has_positive = false;
+ int opt;
+
+ optind = 1;
+ while ((opt = getopt(argc, argv, "F:f:V:v:t:T:r:")) != -1) {
+ has_positive |= islower(opt);
+
+ switch (tolower(opt)) {
+ case 't':
+ if (!strcmp(t->name, optarg))
+ return islower(opt);
+ break;
+ case 'f':
+ if (!strcmp(f->name, optarg))
+ return islower(opt);
+ break;
+ case 'v':
+ if (!strcmp(v->name, optarg))
+ return islower(opt);
+ break;
+ case 'r':
+ if (!tlen) {
+ flen = strlen(f->name);
+ vlen = strlen(v->name);
+ tlen = strlen(t->name);
+ }
+ if (strlen(optarg) == flen + 1 + vlen + !!vlen + tlen &&
+ !strncmp(f->name, &optarg[0], flen) &&
+ !strncmp(v->name, &optarg[flen + 1], vlen) &&
+ !strncmp(t->name, &optarg[flen + 1 + vlen + !!vlen], tlen))
+ return true;
+ break;
+ }
+ }
+
+ /*
+ * If there are no positive tests then we assume user just wants
+ * exclusions and everything else is a pass.
+ */
+ return !has_positive;
+}
+
void __run_test(struct __fixture_metadata *f,
struct __fixture_variant_metadata *variant,
struct __test_metadata *t)
{
+ struct __test_xfail *xfail;
+ char test_name[LINE_MAX];
+ const char *diagnostic;
+
/* reset test struct */
- t->passed = 1;
+ t->exit_code = KSFT_PASS;
t->trigger = 0;
- t->step = 0;
- t->no_print = 0;
+ memset(t->results->reason, 0, sizeof(t->results->reason));
+
+ snprintf(test_name, sizeof(test_name), "%s%s%s.%s",
+ f->name, variant->name[0] ? "." : "", variant->name, t->name);
+
+ ksft_print_msg(" RUN %s ...\n", test_name);
+
+ /* Make sure output buffers are flushed before fork */
+ fflush(stdout);
+ fflush(stderr);
- printf("[ RUN ] %s%s%s.%s\n",
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
t->pid = fork();
if (t->pid < 0) {
- printf("ERROR SPAWNING TEST CHILD\n");
- t->passed = 0;
+ ksft_print_msg("ERROR SPAWNING TEST CHILD\n");
+ t->exit_code = KSFT_FAIL;
} else if (t->pid == 0) {
+ setpgrp();
t->fn(t, variant);
- /* return the step that failed or 0 */
- _exit(t->passed ? 0 : t->step);
+ _exit(t->exit_code);
} else {
__wait_for_test(t);
}
- printf("[ %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"),
- f->name, variant->name[0] ? "." : "", variant->name, t->name);
+ ksft_print_msg(" %4s %s\n",
+ __test_passed(t) ? "OK" : "FAIL", test_name);
+
+ /* Check if we're expecting this test to fail */
+ for (xfail = variant->xfails; xfail; xfail = xfail->next)
+ if (xfail->test == t)
+ break;
+ if (xfail)
+ t->exit_code = __test_passed(t) ? KSFT_XPASS : KSFT_XFAIL;
+
+ if (t->results->reason[0])
+ diagnostic = t->results->reason;
+ else if (t->exit_code == KSFT_PASS || t->exit_code == KSFT_FAIL)
+ diagnostic = NULL;
+ else
+ diagnostic = "unknown";
+
+ ksft_test_result_code(t->exit_code, test_name,
+ diagnostic ? "%s" : "", diagnostic);
}
-static int test_harness_run(int __attribute__((unused)) argc,
- char __attribute__((unused)) **argv)
+static int test_harness_run(int argc, char **argv)
{
struct __fixture_variant_metadata no_variant = { .name = "", };
struct __fixture_variant_metadata *v;
struct __fixture_metadata *f;
+ struct __test_results *results;
struct __test_metadata *t;
- int ret = 0;
+ int ret;
unsigned int case_count = 0, test_count = 0;
unsigned int count = 0;
unsigned int pass_count = 0;
+ ret = test_harness_argv_check(argc, argv);
+ if (ret != KSFT_PASS)
+ return ret;
+
for (f = __fixture_list; f; f = f->next) {
for (v = f->variant ?: &no_variant; v; v = v->next) {
- case_count++;
+ unsigned int old_tests = test_count;
+
for (t = f->tests; t; t = t->next)
- test_count++;
+ if (test_enabled(argc, argv, f, v, t))
+ test_count++;
+
+ if (old_tests != test_count)
+ case_count++;
}
}
- /* TODO(wad) add optional arguments similar to gtest. */
- printf("[==========] Running %u tests from %u test cases.\n",
+ results = mmap(NULL, sizeof(*results), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+
+ ksft_print_header();
+ ksft_set_plan(test_count);
+ ksft_print_msg("Starting %u tests from %u test cases.\n",
test_count, case_count);
for (f = __fixture_list; f; f = f->next) {
for (v = f->variant ?: &no_variant; v; v = v->next) {
for (t = f->tests; t; t = t->next) {
+ if (!test_enabled(argc, argv, f, v, t))
+ continue;
count++;
+ t->results = results;
__run_test(f, v, t);
- if (t->passed)
+ t->results = NULL;
+ if (__test_passed(t))
pass_count++;
else
ret = 1;
}
}
}
- printf("[==========] %u / %u tests passed.\n", pass_count, count);
- printf("[ %s ]\n", (ret ? "FAILED" : "PASSED"));
- return ret;
+ munmap(results, sizeof(*results));
+
+ ksft_print_msg("%s: %u / %u tests passed.\n", ret ? "FAILED" : "PASSED",
+ pass_count, count);
+ ksft_exit(ret == 0);
+
+ /* unreachable */
+ return KSFT_FAIL;
}
static void __attribute__((constructor)) __constructor_order_first(void)
diff --git a/tools/testing/selftests/kselftest_module.h b/tools/testing/selftests/kselftest_module.h
index e8eafaf0941a..63cd7487373f 100644
--- a/tools/testing/selftests/kselftest_module.h
+++ b/tools/testing/selftests/kselftest_module.h
@@ -3,6 +3,7 @@
#define __KSELFTEST_MODULE_H
#include <linux/module.h>
+#include <linux/panic.h>
/*
* Test framework for writing test modules to be loaded by kselftest.
@@ -11,7 +12,8 @@
#define KSTM_MODULE_GLOBALS() \
static unsigned int total_tests __initdata; \
-static unsigned int failed_tests __initdata
+static unsigned int failed_tests __initdata; \
+static unsigned int skipped_tests __initdata
#define KSTM_CHECK_ZERO(x) do { \
total_tests++; \
@@ -21,11 +23,16 @@ static unsigned int failed_tests __initdata
} \
} while (0)
-static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests)
+static inline int kstm_report(unsigned int total_tests, unsigned int failed_tests,
+ unsigned int skipped_tests)
{
- if (failed_tests == 0)
- pr_info("all %u tests passed\n", total_tests);
- else
+ if (failed_tests == 0) {
+ if (skipped_tests) {
+ pr_info("skipped %u tests\n", skipped_tests);
+ pr_info("remaining %u tests passed\n", total_tests);
+ } else
+ pr_info("all %u tests passed\n", total_tests);
+ } else
pr_warn("failed %u out of %u tests\n", failed_tests, total_tests);
return failed_tests ? -EINVAL : 0;
@@ -35,8 +42,9 @@ static inline int kstm_report(unsigned int total_tests, unsigned int failed_test
static int __init __module##_init(void) \
{ \
pr_info("loaded.\n"); \
+ add_taint(TAINT_TEST, LOCKDEP_STILL_OK); \
selftest(); \
- return kstm_report(total_tests, failed_tests); \
+ return kstm_report(total_tests, failed_tests, skipped_tests); \
} \
static void __exit __module##_exit(void) \
{ \
@@ -45,4 +53,6 @@ static void __exit __module##_exit(void) \
module_init(__module##_init); \
module_exit(__module##_exit)
+MODULE_INFO(test, "Y");
+
#endif /* __KSELFTEST_MODULE_H */
diff --git a/tools/testing/selftests/kvm/.gitignore b/tools/testing/selftests/kvm/.gitignore
index 452787152748..6d9381d60172 100644
--- a/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@ -1,27 +1,7 @@
# SPDX-License-Identifier: GPL-2.0-only
-/s390x/memop
-/s390x/resets
-/s390x/sync_regs_test
-/x86_64/cr4_cpuid_sync_test
-/x86_64/debug_regs
-/x86_64/evmcs_test
-/x86_64/hyperv_cpuid
-/x86_64/mmio_warning_test
-/x86_64/platform_info_test
-/x86_64/set_sregs_test
-/x86_64/smm_test
-/x86_64/state_test
-/x86_64/vmx_preemption_timer_test
-/x86_64/svm_vmcall_test
-/x86_64/sync_regs_test
-/x86_64/vmx_close_while_nested_test
-/x86_64/vmx_dirty_log_test
-/x86_64/vmx_set_nested_state_test
-/x86_64/vmx_tsc_adjust_test
-/x86_64/xss_msr_test
-/clear_dirty_log_test
-/demand_paging_test
-/dirty_log_test
-/kvm_create_max_vcpus
-/set_memory_region_test
-/steal_time
+*
+!/**/
+!*.c
+!*.h
+!*.S
+!*.sh
diff --git a/tools/testing/selftests/kvm/Makefile b/tools/testing/selftests/kvm/Makefile
index 4a166588d99f..741c7dc16afc 100644
--- a/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@ -1,84 +1,219 @@
# SPDX-License-Identifier: GPL-2.0-only
-include ../../../../scripts/Kbuild.include
+include ../../../build/Build.include
all:
top_srcdir = ../../../..
-KSFT_KHDR_INSTALL := 1
-
-# For cross-builds to work, UNAME_M has to map to ARCH and arch specific
-# directories and targets in this Makefile. "uname -m" doesn't map to
-# arch specific sub-directory names.
-#
-# UNAME_M variable to used to run the compiles pointing to the right arch
-# directories and build the right targets for these supported architectures.
-#
-# TEST_GEN_PROGS and LIBKVM are set using UNAME_M variable.
-# LINUX_TOOL_ARCH_INCLUDE is set using ARCH variable.
-#
-# x86_64 targets are named to include x86_64 as a suffix and directories
-# for includes are in x86_64 sub-directory. s390x and aarch64 follow the
-# same convention. "uname -m" doesn't result in the correct mapping for
-# s390x and aarch64.
-#
-# No change necessary for x86_64
-UNAME_M := $(shell uname -m)
-
-# Set UNAME_M for arm64 compile/install to work
-ifeq ($(ARCH),arm64)
- UNAME_M := aarch64
-endif
-# Set UNAME_M s390x compile/install to work
-ifeq ($(ARCH),s390)
- UNAME_M := s390x
+include $(top_srcdir)/scripts/subarch.include
+ARCH ?= $(SUBARCH)
+
+ifeq ($(ARCH),x86)
+ ARCH_DIR := x86_64
+else ifeq ($(ARCH),arm64)
+ ARCH_DIR := aarch64
+else ifeq ($(ARCH),s390)
+ ARCH_DIR := s390x
+else
+ ARCH_DIR := $(ARCH)
endif
-LIBKVM = lib/assert.c lib/elf.c lib/io.c lib/kvm_util.c lib/sparsebit.c lib/test_util.c
-LIBKVM_x86_64 = lib/x86_64/processor.c lib/x86_64/vmx.c lib/x86_64/svm.c lib/x86_64/ucall.c
-LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
-LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c
+LIBKVM += lib/assert.c
+LIBKVM += lib/elf.c
+LIBKVM += lib/guest_modes.c
+LIBKVM += lib/io.c
+LIBKVM += lib/kvm_util.c
+LIBKVM += lib/memstress.c
+LIBKVM += lib/guest_sprintf.c
+LIBKVM += lib/rbtree.c
+LIBKVM += lib/sparsebit.c
+LIBKVM += lib/test_util.c
+LIBKVM += lib/ucall_common.c
+LIBKVM += lib/userfaultfd_util.c
+
+LIBKVM_STRING += lib/string_override.c
+
+LIBKVM_x86_64 += lib/x86_64/apic.c
+LIBKVM_x86_64 += lib/x86_64/handlers.S
+LIBKVM_x86_64 += lib/x86_64/hyperv.c
+LIBKVM_x86_64 += lib/x86_64/memstress.c
+LIBKVM_x86_64 += lib/x86_64/pmu.c
+LIBKVM_x86_64 += lib/x86_64/processor.c
+LIBKVM_x86_64 += lib/x86_64/sev.c
+LIBKVM_x86_64 += lib/x86_64/svm.c
+LIBKVM_x86_64 += lib/x86_64/ucall.c
+LIBKVM_x86_64 += lib/x86_64/vmx.c
+
+LIBKVM_aarch64 += lib/aarch64/gic.c
+LIBKVM_aarch64 += lib/aarch64/gic_v3.c
+LIBKVM_aarch64 += lib/aarch64/handlers.S
+LIBKVM_aarch64 += lib/aarch64/processor.c
+LIBKVM_aarch64 += lib/aarch64/spinlock.c
+LIBKVM_aarch64 += lib/aarch64/ucall.c
+LIBKVM_aarch64 += lib/aarch64/vgic.c
+
+LIBKVM_s390x += lib/s390x/diag318_test_handler.c
+LIBKVM_s390x += lib/s390x/processor.c
+LIBKVM_s390x += lib/s390x/ucall.c
-TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
-TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
+LIBKVM_riscv += lib/riscv/handlers.S
+LIBKVM_riscv += lib/riscv/processor.c
+LIBKVM_riscv += lib/riscv/ucall.c
+
+# Non-compiled test targets
+TEST_PROGS_x86_64 += x86_64/nx_huge_pages_test.sh
+
+# Compiled test targets
+TEST_GEN_PROGS_x86_64 = x86_64/cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/dirty_log_page_splitting_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
+TEST_GEN_PROGS_x86_64 += x86_64/exit_on_emulation_failure_test
+TEST_GEN_PROGS_x86_64 += x86_64/fix_hypercall_test
+TEST_GEN_PROGS_x86_64 += x86_64/hwcr_msr_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
-TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_evmcs
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_extended_hypercalls
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_features
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_ipi
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_svm_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_tlb_flush
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_clock_test
+TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
+TEST_GEN_PROGS_x86_64 += x86_64/monitor_mwait_test
+TEST_GEN_PROGS_x86_64 += x86_64/nested_exceptions_test
TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_counters_test
+TEST_GEN_PROGS_x86_64 += x86_64/pmu_event_filter_test
+TEST_GEN_PROGS_x86_64 += x86_64/private_mem_conversions_test
+TEST_GEN_PROGS_x86_64 += x86_64/private_mem_kvm_exits_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
+TEST_GEN_PROGS_x86_64 += x86_64/smaller_maxphyaddr_emulation_test
TEST_GEN_PROGS_x86_64 += x86_64/smm_test
TEST_GEN_PROGS_x86_64 += x86_64/state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_preemption_timer_test
TEST_GEN_PROGS_x86_64 += x86_64/svm_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_int_ctl_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_shutdown_test
+TEST_GEN_PROGS_x86_64 += x86_64/svm_nested_soft_inject_test
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_scaling_sync
TEST_GEN_PROGS_x86_64 += x86_64/sync_regs_test
+TEST_GEN_PROGS_x86_64 += x86_64/ucna_injection_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_io_test
+TEST_GEN_PROGS_x86_64 += x86_64/userspace_msr_exit_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_apic_access_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_close_while_nested_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_exception_with_invalid_guest_state
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_invalid_nested_guest_state
TEST_GEN_PROGS_x86_64 += x86_64/vmx_set_nested_state_test
TEST_GEN_PROGS_x86_64 += x86_64/vmx_tsc_adjust_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_nested_tsc_scaling_test
+TEST_GEN_PROGS_x86_64 += x86_64/xapic_ipi_test
+TEST_GEN_PROGS_x86_64 += x86_64/xapic_state_test
+TEST_GEN_PROGS_x86_64 += x86_64/xcr0_cpuid_test
TEST_GEN_PROGS_x86_64 += x86_64/xss_msr_test
TEST_GEN_PROGS_x86_64 += x86_64/debug_regs
-TEST_GEN_PROGS_x86_64 += clear_dirty_log_test
+TEST_GEN_PROGS_x86_64 += x86_64/tsc_msrs_test
+TEST_GEN_PROGS_x86_64 += x86_64/vmx_pmu_caps_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_shinfo_test
+TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
+TEST_GEN_PROGS_x86_64 += x86_64/sev_migrate_tests
+TEST_GEN_PROGS_x86_64 += x86_64/sev_smoke_test
+TEST_GEN_PROGS_x86_64 += x86_64/amx_test
+TEST_GEN_PROGS_x86_64 += x86_64/max_vcpuid_cap_test
+TEST_GEN_PROGS_x86_64 += x86_64/triple_fault_event_test
+TEST_GEN_PROGS_x86_64 += x86_64/recalc_apic_map_test
+TEST_GEN_PROGS_x86_64 += access_tracking_perf_test
TEST_GEN_PROGS_x86_64 += demand_paging_test
TEST_GEN_PROGS_x86_64 += dirty_log_test
+TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += guest_memfd_test
+TEST_GEN_PROGS_x86_64 += guest_print_test
+TEST_GEN_PROGS_x86_64 += hardware_disable_test
TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_x86_64 += kvm_page_table_test
+TEST_GEN_PROGS_x86_64 += max_guest_memory_test
+TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
+TEST_GEN_PROGS_x86_64 += memslot_perf_test
+TEST_GEN_PROGS_x86_64 += rseq_test
TEST_GEN_PROGS_x86_64 += set_memory_region_test
TEST_GEN_PROGS_x86_64 += steal_time
+TEST_GEN_PROGS_x86_64 += kvm_binary_stats_test
+TEST_GEN_PROGS_x86_64 += system_counter_offset_test
+
+# Compiled outputs used by test targets
+TEST_GEN_PROGS_EXTENDED_x86_64 += x86_64/nx_huge_pages_test
-TEST_GEN_PROGS_aarch64 += clear_dirty_log_test
+TEST_GEN_PROGS_aarch64 += aarch64/aarch32_id_regs
+TEST_GEN_PROGS_aarch64 += aarch64/debug-exceptions
+TEST_GEN_PROGS_aarch64 += aarch64/hypercalls
+TEST_GEN_PROGS_aarch64 += aarch64/page_fault_test
+TEST_GEN_PROGS_aarch64 += aarch64/psci_test
+TEST_GEN_PROGS_aarch64 += aarch64/set_id_regs
+TEST_GEN_PROGS_aarch64 += aarch64/smccc_filter
+TEST_GEN_PROGS_aarch64 += aarch64/vcpu_width_config
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_init
+TEST_GEN_PROGS_aarch64 += aarch64/vgic_irq
+TEST_GEN_PROGS_aarch64 += aarch64/vpmu_counter_access
+TEST_GEN_PROGS_aarch64 += access_tracking_perf_test
+TEST_GEN_PROGS_aarch64 += arch_timer
TEST_GEN_PROGS_aarch64 += demand_paging_test
TEST_GEN_PROGS_aarch64 += dirty_log_test
+TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
+TEST_GEN_PROGS_aarch64 += guest_print_test
+TEST_GEN_PROGS_aarch64 += get-reg-list
TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+TEST_GEN_PROGS_aarch64 += kvm_page_table_test
+TEST_GEN_PROGS_aarch64 += memslot_modification_stress_test
+TEST_GEN_PROGS_aarch64 += memslot_perf_test
+TEST_GEN_PROGS_aarch64 += rseq_test
TEST_GEN_PROGS_aarch64 += set_memory_region_test
TEST_GEN_PROGS_aarch64 += steal_time
+TEST_GEN_PROGS_aarch64 += kvm_binary_stats_test
TEST_GEN_PROGS_s390x = s390x/memop
TEST_GEN_PROGS_s390x += s390x/resets
TEST_GEN_PROGS_s390x += s390x/sync_regs_test
+TEST_GEN_PROGS_s390x += s390x/tprot
+TEST_GEN_PROGS_s390x += s390x/cmma_test
+TEST_GEN_PROGS_s390x += s390x/debug_test
TEST_GEN_PROGS_s390x += demand_paging_test
TEST_GEN_PROGS_s390x += dirty_log_test
+TEST_GEN_PROGS_s390x += guest_print_test
TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+TEST_GEN_PROGS_s390x += kvm_page_table_test
+TEST_GEN_PROGS_s390x += rseq_test
TEST_GEN_PROGS_s390x += set_memory_region_test
+TEST_GEN_PROGS_s390x += kvm_binary_stats_test
+
+TEST_GEN_PROGS_riscv += arch_timer
+TEST_GEN_PROGS_riscv += demand_paging_test
+TEST_GEN_PROGS_riscv += dirty_log_test
+TEST_GEN_PROGS_riscv += get-reg-list
+TEST_GEN_PROGS_riscv += guest_print_test
+TEST_GEN_PROGS_riscv += kvm_binary_stats_test
+TEST_GEN_PROGS_riscv += kvm_create_max_vcpus
+TEST_GEN_PROGS_riscv += kvm_page_table_test
+TEST_GEN_PROGS_riscv += set_memory_region_test
+TEST_GEN_PROGS_riscv += steal_time
+
+SPLIT_TESTS += arch_timer
+SPLIT_TESTS += get-reg-list
-TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
-LIBKVM += $(LIBKVM_$(UNAME_M))
+TEST_PROGS += $(TEST_PROGS_$(ARCH_DIR))
+TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(ARCH_DIR))
+TEST_GEN_PROGS_EXTENDED += $(TEST_GEN_PROGS_EXTENDED_$(ARCH_DIR))
+LIBKVM += $(LIBKVM_$(ARCH_DIR))
+
+OVERRIDE_TARGETS = 1
+
+# lib.mak defines $(OUTPUT), prepends $(OUTPUT)/ to $(TEST_GEN_PROGS), and most
+# importantly defines, i.e. overwrites, $(CC) (unless `make -e` or `make CC=`,
+# which causes the environment variable to override the makefile).
+include ../lib.mk
INSTALL_HDR_PATH = $(top_srcdir)/usr
LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
@@ -89,38 +224,96 @@ else
LINUX_TOOL_ARCH_INCLUDE = $(top_srcdir)/tools/arch/$(ARCH)/include
endif
CFLAGS += -Wall -Wstrict-prototypes -Wuninitialized -O2 -g -std=gnu99 \
+ -Wno-gnu-variable-sized-type-not-at-end -MD -MP -DCONFIG_64BIT \
+ -fno-builtin-memcmp -fno-builtin-memcpy -fno-builtin-memset \
+ -fno-builtin-strnlen \
-fno-stack-protector -fno-PIE -I$(LINUX_TOOL_INCLUDE) \
-I$(LINUX_TOOL_ARCH_INCLUDE) -I$(LINUX_HDR_PATH) -Iinclude \
- -I$(<D) -Iinclude/$(UNAME_M) -I..
+ -I$(<D) -Iinclude/$(ARCH_DIR) -I ../rseq -I.. $(EXTRA_CFLAGS) \
+ $(KHDR_INCLUDES)
+ifeq ($(ARCH),s390)
+ CFLAGS += -march=z10
+endif
+ifeq ($(ARCH),arm64)
+tools_dir := $(top_srcdir)/tools
+arm64_tools_dir := $(tools_dir)/arch/arm64/tools/
+
+ifneq ($(abs_objdir),)
+arm64_hdr_outdir := $(abs_objdir)/tools/
+else
+arm64_hdr_outdir := $(tools_dir)/
+endif
-no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
- $(CC) -Werror -no-pie -x c - -o "$$TMP", -no-pie)
+GEN_HDRS := $(arm64_hdr_outdir)arch/arm64/include/generated/
+CFLAGS += -I$(GEN_HDRS)
+
+$(GEN_HDRS): $(wildcard $(arm64_tools_dir)/*)
+ $(MAKE) -C $(arm64_tools_dir) OUTPUT=$(arm64_hdr_outdir)
+endif
+
+no-pie-option := $(call try-run, echo 'int main(void) { return 0; }' | \
+ $(CC) -Werror $(CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
# On s390, build the testcases KVM-enabled
-pgste-option = $(call try-run, echo 'int main() { return 0; }' | \
+pgste-option = $(call try-run, echo 'int main(void) { return 0; }' | \
$(CC) -Werror -Wl$(comma)--s390-pgste -x c - -o "$$TMP",-Wl$(comma)--s390-pgste)
-
+LDLIBS += -ldl
LDFLAGS += -pthread $(no-pie-option) $(pgste-option)
-# After inclusion, $(OUTPUT) is defined and
-# $(TEST_GEN_PROGS) starts with $(OUTPUT)/
-include ../lib.mk
+LIBKVM_C := $(filter %.c,$(LIBKVM))
+LIBKVM_S := $(filter %.S,$(LIBKVM))
+LIBKVM_C_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_C))
+LIBKVM_S_OBJ := $(patsubst %.S, $(OUTPUT)/%.o, $(LIBKVM_S))
+LIBKVM_STRING_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM_STRING))
+LIBKVM_OBJS = $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ) $(LIBKVM_STRING_OBJ)
+SPLIT_TEST_GEN_PROGS := $(patsubst %, $(OUTPUT)/%, $(SPLIT_TESTS))
+SPLIT_TEST_GEN_OBJ := $(patsubst %, $(OUTPUT)/$(ARCH_DIR)/%.o, $(SPLIT_TESTS))
+
+TEST_GEN_OBJ = $(patsubst %, %.o, $(TEST_GEN_PROGS))
+TEST_GEN_OBJ += $(patsubst %, %.o, $(TEST_GEN_PROGS_EXTENDED))
+TEST_DEP_FILES = $(patsubst %.o, %.d, $(TEST_GEN_OBJ))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(LIBKVM_OBJS))
+TEST_DEP_FILES += $(patsubst %.o, %.d, $(SPLIT_TEST_GEN_OBJ))
+-include $(TEST_DEP_FILES)
+
+$(shell mkdir -p $(sort $(OUTPUT)/$(ARCH_DIR) $(dir $(LIBKVM_C_OBJ) $(LIBKVM_S_OBJ))))
+
+$(filter-out $(SPLIT_TEST_GEN_PROGS), $(TEST_GEN_PROGS)) \
+$(TEST_GEN_PROGS_EXTENDED): %: %.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $< $(LIBKVM_OBJS) $(LDLIBS) -o $@
+$(TEST_GEN_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-STATIC_LIBS := $(OUTPUT)/libkvm.a
-LIBKVM_OBJ := $(patsubst %.c, $(OUTPUT)/%.o, $(LIBKVM))
-EXTRA_CLEAN += $(LIBKVM_OBJ) $(STATIC_LIBS) cscope.*
+$(SPLIT_TEST_GEN_PROGS): $(OUTPUT)/%: $(OUTPUT)/%.o $(OUTPUT)/$(ARCH_DIR)/%.o
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(LDFLAGS) $(TARGET_ARCH) $^ $(LDLIBS) -o $@
+$(SPLIT_TEST_GEN_OBJ): $(OUTPUT)/$(ARCH_DIR)/%.o: $(ARCH_DIR)/%.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
+
+EXTRA_CLEAN += $(GEN_HDRS) \
+ $(LIBKVM_OBJS) \
+ $(SPLIT_TEST_GEN_OBJ) \
+ $(TEST_DEP_FILES) \
+ $(TEST_GEN_OBJ) \
+ cscope.*
+
+$(LIBKVM_C_OBJ): $(OUTPUT)/%.o: %.c $(GEN_HDRS)
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-x := $(shell mkdir -p $(sort $(dir $(LIBKVM_OBJ))))
-$(LIBKVM_OBJ): $(OUTPUT)/%.o: %.c
+$(LIBKVM_S_OBJ): $(OUTPUT)/%.o: %.S $(GEN_HDRS)
$(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c $< -o $@
-$(OUTPUT)/libkvm.a: $(LIBKVM_OBJ)
- $(AR) crs $@ $^
+# Compile the string overrides as freestanding to prevent the compiler from
+# generating self-referential code, e.g. without "freestanding" the compiler may
+# "optimize" memcmp() by invoking memcmp(), thus causing infinite recursion.
+$(LIBKVM_STRING_OBJ): $(OUTPUT)/%.o: %.c
+ $(CC) $(CFLAGS) $(CPPFLAGS) $(TARGET_ARCH) -c -ffreestanding $< -o $@
-x := $(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
-all: $(STATIC_LIBS)
-$(TEST_GEN_PROGS): $(STATIC_LIBS)
+$(shell mkdir -p $(sort $(dir $(TEST_GEN_PROGS))))
+$(SPLIT_TEST_GEN_OBJ): $(GEN_HDRS)
+$(TEST_GEN_PROGS): $(LIBKVM_OBJS)
+$(TEST_GEN_PROGS_EXTENDED): $(LIBKVM_OBJS)
+$(TEST_GEN_OBJ): $(GEN_HDRS)
cscope: include_paths = $(LINUX_TOOL_INCLUDE) $(LINUX_HDR_PATH) include lib ..
cscope:
diff --git a/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
new file mode 100644
index 000000000000..8e5bd07a3727
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/aarch32_id_regs.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * aarch32_id_regs - Test for ID register behavior on AArch64-only systems
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * Test that KVM handles the AArch64 views of the AArch32 ID registers as RAZ
+ * and WI from userspace.
+ */
+
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+#define BAD_ID_REG_VAL 0x1badc0deul
+
+#define GUEST_ASSERT_REG_RAZ(reg) GUEST_ASSERT_EQ(read_sysreg_s(reg), 0)
+
+static void guest_main(void)
+{
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_AFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR3_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR5_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR4_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_ISAR6_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR0_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_MVFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 3));
+ GUEST_ASSERT_REG_RAZ(SYS_ID_PFR2_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_DFR1_EL1);
+ GUEST_ASSERT_REG_RAZ(SYS_ID_MMFR5_EL1);
+ GUEST_ASSERT_REG_RAZ(sys_reg(3, 0, 0, 3, 7));
+
+ GUEST_DONE();
+}
+
+static void test_guest_raz(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
+
+static uint64_t raz_wi_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_PFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR3_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR5_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR4_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_ISAR6_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR0_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR1_EL1),
+ KVM_ARM64_SYS_REG(SYS_MVFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_PFR2_EL1),
+ KVM_ARM64_SYS_REG(SYS_ID_MMFR5_EL1),
+};
+
+static void test_user_raz_wi(struct kvm_vcpu *vcpu)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(raz_wi_reg_ids); i++) {
+ uint64_t reg_id = raz_wi_reg_ids[i];
+ uint64_t val;
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ TEST_ASSERT_EQ(val, 0);
+
+ /*
+ * Expect the ioctl to succeed with no effect on the register
+ * value.
+ */
+ vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+static uint64_t raz_invariant_reg_ids[] = {
+ KVM_ARM64_SYS_REG(SYS_ID_AFR0_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 3)),
+ KVM_ARM64_SYS_REG(SYS_ID_DFR1_EL1),
+ KVM_ARM64_SYS_REG(sys_reg(3, 0, 0, 3, 7)),
+};
+
+static void test_user_raz_invariant(struct kvm_vcpu *vcpu)
+{
+ int i, r;
+
+ for (i = 0; i < ARRAY_SIZE(raz_invariant_reg_ids); i++) {
+ uint64_t reg_id = raz_invariant_reg_ids[i];
+ uint64_t val;
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ TEST_ASSERT_EQ(val, 0);
+
+ r = __vcpu_set_reg(vcpu, reg_id, BAD_ID_REG_VAL);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ vcpu_get_reg(vcpu, reg_id, &val);
+ TEST_ASSERT_EQ(val, 0);
+ }
+}
+
+
+
+static bool vcpu_aarch64_only(struct kvm_vcpu *vcpu)
+{
+ uint64_t val, el0;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ return el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ TEST_REQUIRE(vcpu_aarch64_only(vcpu));
+
+ test_user_raz_wi(vcpu);
+ test_user_raz_invariant(vcpu);
+ test_guest_raz(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/aarch64/arch_timer.c b/tools/testing/selftests/kvm/aarch64/arch_timer.c
new file mode 100644
index 000000000000..ddba2c2fb5de
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/arch_timer.c
@@ -0,0 +1,225 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * The test validates both the virtual and physical timer IRQs using
+ * CVAL and TVAL registers.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+#define _GNU_SOURCE
+
+#include "arch_timer.h"
+#include "delay.h"
+#include "gic.h"
+#include "processor.h"
+#include "timer_test.h"
+#include "vgic.h"
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+
+enum guest_stage {
+ GUEST_STAGE_VTIMER_CVAL = 1,
+ GUEST_STAGE_VTIMER_TVAL,
+ GUEST_STAGE_PTIMER_CVAL,
+ GUEST_STAGE_PTIMER_TVAL,
+ GUEST_STAGE_MAX,
+};
+
+static int vtimer_irq, ptimer_irq;
+
+static void
+guest_configure_timer_action(struct test_vcpu_shared_data *shared_data)
+{
+ switch (shared_data->guest_stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ timer_set_next_cval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_VTIMER_TVAL:
+ timer_set_next_tval_ms(VIRTUAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(VIRTUAL);
+ timer_set_ctl(VIRTUAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ timer_set_next_cval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ case GUEST_STAGE_PTIMER_TVAL:
+ timer_set_next_tval_ms(PHYSICAL, test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cntct(PHYSICAL);
+ timer_set_ctl(PHYSICAL, CTL_ENABLE);
+ break;
+ default:
+ GUEST_ASSERT(0);
+ }
+}
+
+static void guest_validate_irq(unsigned int intid,
+ struct test_vcpu_shared_data *shared_data)
+{
+ enum guest_stage stage = shared_data->guest_stage;
+ uint64_t xcnt = 0, xcnt_diff_us, cval = 0;
+ unsigned long xctl = 0;
+ unsigned int timer_irq = 0;
+ unsigned int accessor;
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ switch (stage) {
+ case GUEST_STAGE_VTIMER_CVAL:
+ case GUEST_STAGE_VTIMER_TVAL:
+ accessor = VIRTUAL;
+ timer_irq = vtimer_irq;
+ break;
+ case GUEST_STAGE_PTIMER_CVAL:
+ case GUEST_STAGE_PTIMER_TVAL:
+ accessor = PHYSICAL;
+ timer_irq = ptimer_irq;
+ break;
+ default:
+ GUEST_ASSERT(0);
+ return;
+ }
+
+ xctl = timer_get_ctl(accessor);
+ if ((xctl & CTL_IMASK) || !(xctl & CTL_ENABLE))
+ return;
+
+ timer_set_ctl(accessor, CTL_IMASK);
+ xcnt = timer_get_cntct(accessor);
+ cval = timer_get_cval(accessor);
+
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, timer_irq);
+
+ /* Basic 'timer condition met' check */
+ __GUEST_ASSERT(xcnt >= cval,
+ "xcnt = 0x%lx, cval = 0x%lx, xcnt_diff_us = 0x%lx",
+ xcnt, cval, xcnt_diff_us);
+ __GUEST_ASSERT(xctl & CTL_ISTATUS, "xctl = 0x%lx", xctl);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ unsigned int intid = gic_get_and_ack_irq();
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ guest_validate_irq(intid, shared_data);
+
+ gic_set_eoi(intid);
+}
+
+static void guest_run_stage(struct test_vcpu_shared_data *shared_data,
+ enum guest_stage stage)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->guest_stage = stage;
+ shared_data->nr_iter = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ guest_configure_timer_action(shared_data);
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ test_args.timer_err_margin_us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%lx, irq_iter = 0x%lx.\n"
+ " Guest timer interrupt was not trigged within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ local_irq_disable();
+
+ gic_init(GIC_V3, test_args.nr_vcpus,
+ (void *)GICD_BASE_GPA, (void *)GICR_BASE_GPA);
+
+ timer_set_ctl(VIRTUAL, CTL_IMASK);
+ timer_set_ctl(PHYSICAL, CTL_IMASK);
+
+ gic_irq_enable(vtimer_irq);
+ gic_irq_enable(ptimer_irq);
+ local_irq_enable();
+
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_VTIMER_TVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_CVAL);
+ guest_run_stage(shared_data, GUEST_STAGE_PTIMER_TVAL);
+
+ GUEST_DONE();
+}
+
+static void test_init_timer_irq(struct kvm_vm *vm)
+{
+ /* Timer initid should be same for all the vCPUs, so query only vCPU-0 */
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_PTIMER, &ptimer_irq);
+ vcpu_device_attr_get(vcpus[0], KVM_ARM_VCPU_TIMER_CTRL,
+ KVM_ARM_VCPU_TIMER_IRQ_VTIMER, &vtimer_irq);
+
+ sync_global_to_guest(vm, ptimer_irq);
+ sync_global_to_guest(vm, vtimer_irq);
+
+ pr_debug("ptimer_irq: %d; vtimer_irq: %d\n", ptimer_irq, vtimer_irq);
+}
+
+static int gic_fd;
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ unsigned int i;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+ vm_init_descriptor_tables(vm);
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT, guest_irq_handler);
+
+ if (!test_args.reserved) {
+ if (kvm_has_cap(KVM_CAP_COUNTER_OFFSET)) {
+ struct kvm_arm_counter_offset offset = {
+ .counter_offset = test_args.counter_offset,
+ .reserved = 0,
+ };
+ vm_ioctl(vm, KVM_ARM_SET_COUNTER_OFFSET, &offset);
+ } else
+ TEST_FAIL("no support for global offset");
+ }
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_init_descriptor_tables(vcpus[i]);
+
+ test_init_timer_irq(vm);
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3");
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/aarch64/debug-exceptions.c b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
new file mode 100644
index 000000000000..2582c49e525a
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/debug-exceptions.c
@@ -0,0 +1,607 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <linux/bitfield.h>
+
+#define MDSCR_KDE (1 << 13)
+#define MDSCR_MDE (1 << 15)
+#define MDSCR_SS (1 << 0)
+
+#define DBGBCR_LEN8 (0xff << 5)
+#define DBGBCR_EXEC (0x0 << 3)
+#define DBGBCR_EL1 (0x1 << 1)
+#define DBGBCR_E (0x1 << 0)
+#define DBGBCR_LBN_SHIFT 16
+#define DBGBCR_BT_SHIFT 20
+#define DBGBCR_BT_ADDR_LINK_CTX (0x1 << DBGBCR_BT_SHIFT)
+#define DBGBCR_BT_CTX_LINK (0x3 << DBGBCR_BT_SHIFT)
+
+#define DBGWCR_LEN8 (0xff << 5)
+#define DBGWCR_RD (0x1 << 3)
+#define DBGWCR_WR (0x2 << 3)
+#define DBGWCR_EL1 (0x1 << 1)
+#define DBGWCR_E (0x1 << 0)
+#define DBGWCR_LBN_SHIFT 16
+#define DBGWCR_WT_SHIFT 20
+#define DBGWCR_WT_LINK (0x1 << DBGWCR_WT_SHIFT)
+
+#define SPSR_D (1 << 9)
+#define SPSR_SS (1 << 21)
+
+extern unsigned char sw_bp, sw_bp2, hw_bp, hw_bp2, bp_svc, bp_brk, hw_wp, ss_start, hw_bp_ctx;
+extern unsigned char iter_ss_begin, iter_ss_end;
+static volatile uint64_t sw_bp_addr, hw_bp_addr;
+static volatile uint64_t wp_addr, wp_data_addr;
+static volatile uint64_t svc_addr;
+static volatile uint64_t ss_addr[4], ss_idx;
+#define PC(v) ((uint64_t)&(v))
+
+#define GEN_DEBUG_WRITE_REG(reg_name) \
+static void write_##reg_name(int num, uint64_t val) \
+{ \
+ switch (num) { \
+ case 0: \
+ write_sysreg(val, reg_name##0_el1); \
+ break; \
+ case 1: \
+ write_sysreg(val, reg_name##1_el1); \
+ break; \
+ case 2: \
+ write_sysreg(val, reg_name##2_el1); \
+ break; \
+ case 3: \
+ write_sysreg(val, reg_name##3_el1); \
+ break; \
+ case 4: \
+ write_sysreg(val, reg_name##4_el1); \
+ break; \
+ case 5: \
+ write_sysreg(val, reg_name##5_el1); \
+ break; \
+ case 6: \
+ write_sysreg(val, reg_name##6_el1); \
+ break; \
+ case 7: \
+ write_sysreg(val, reg_name##7_el1); \
+ break; \
+ case 8: \
+ write_sysreg(val, reg_name##8_el1); \
+ break; \
+ case 9: \
+ write_sysreg(val, reg_name##9_el1); \
+ break; \
+ case 10: \
+ write_sysreg(val, reg_name##10_el1); \
+ break; \
+ case 11: \
+ write_sysreg(val, reg_name##11_el1); \
+ break; \
+ case 12: \
+ write_sysreg(val, reg_name##12_el1); \
+ break; \
+ case 13: \
+ write_sysreg(val, reg_name##13_el1); \
+ break; \
+ case 14: \
+ write_sysreg(val, reg_name##14_el1); \
+ break; \
+ case 15: \
+ write_sysreg(val, reg_name##15_el1); \
+ break; \
+ default: \
+ GUEST_ASSERT(0); \
+ } \
+}
+
+/* Define write_dbgbcr()/write_dbgbvr()/write_dbgwcr()/write_dbgwvr() */
+GEN_DEBUG_WRITE_REG(dbgbcr)
+GEN_DEBUG_WRITE_REG(dbgbvr)
+GEN_DEBUG_WRITE_REG(dbgwcr)
+GEN_DEBUG_WRITE_REG(dbgwvr)
+
+static void reset_debug_state(void)
+{
+ uint8_t brps, wrps, i;
+ uint64_t dfr0;
+
+ asm volatile("msr daifset, #8");
+
+ write_sysreg(0, osdlr_el1);
+ write_sysreg(0, oslar_el1);
+ isb();
+
+ write_sysreg(0, mdscr_el1);
+ write_sysreg(0, contextidr_el1);
+
+ /* Reset all bcr/bvr/wcr/wvr registers */
+ dfr0 = read_sysreg(id_aa64dfr0_el1);
+ brps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), dfr0);
+ for (i = 0; i <= brps; i++) {
+ write_dbgbcr(i, 0);
+ write_dbgbvr(i, 0);
+ }
+ wrps = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), dfr0);
+ for (i = 0; i <= wrps; i++) {
+ write_dbgwcr(i, 0);
+ write_dbgwvr(i, 0);
+ }
+
+ isb();
+}
+
+static void enable_os_lock(void)
+{
+ write_sysreg(1, oslar_el1);
+ isb();
+
+ GUEST_ASSERT(read_sysreg(oslsr_el1) & 2);
+}
+
+static void enable_monitor_debug_exceptions(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_MDE;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static void install_wp(uint8_t wpn, uint64_t addr)
+{
+ uint32_t wcr;
+
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E;
+ write_dbgwcr(wpn, wcr);
+ write_dbgwvr(wpn, addr);
+
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_hw_bp(uint8_t bpn, uint64_t addr)
+{
+ uint32_t bcr;
+
+ bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E;
+ write_dbgbcr(bpn, bcr);
+ write_dbgbvr(bpn, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_wp_ctx(uint8_t addr_wp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t wcr;
+ uint64_t ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /* Setup a linked watchpoint (linked to the context-aware breakpoint) */
+ wcr = DBGWCR_LEN8 | DBGWCR_RD | DBGWCR_WR | DBGWCR_EL1 | DBGWCR_E |
+ DBGWCR_WT_LINK | ((uint32_t)ctx_bp << DBGWCR_LBN_SHIFT);
+ write_dbgwcr(addr_wp, wcr);
+ write_dbgwvr(addr_wp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+void install_hw_bp_ctx(uint8_t addr_bp, uint8_t ctx_bp, uint64_t addr,
+ uint64_t ctx)
+{
+ uint32_t addr_bcr, ctx_bcr;
+
+ /* Setup a context-aware breakpoint for Linked Context ID Match */
+ ctx_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_CTX_LINK;
+ write_dbgbcr(ctx_bp, ctx_bcr);
+ write_dbgbvr(ctx_bp, ctx);
+
+ /*
+ * Setup a normal breakpoint for Linked Address Match, and link it
+ * to the context-aware breakpoint.
+ */
+ addr_bcr = DBGBCR_LEN8 | DBGBCR_EXEC | DBGBCR_EL1 | DBGBCR_E |
+ DBGBCR_BT_ADDR_LINK_CTX |
+ ((uint32_t)ctx_bp << DBGBCR_LBN_SHIFT);
+ write_dbgbcr(addr_bp, addr_bcr);
+ write_dbgbvr(addr_bp, addr);
+ isb();
+
+ enable_monitor_debug_exceptions();
+}
+
+static void install_ss(void)
+{
+ uint32_t mdscr;
+
+ asm volatile("msr daifclr, #8");
+
+ mdscr = read_sysreg(mdscr_el1) | MDSCR_KDE | MDSCR_SS;
+ write_sysreg(mdscr, mdscr_el1);
+ isb();
+}
+
+static volatile char write_data;
+
+static void guest_code(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ uint64_t ctx = 0xabcdef; /* a random context number */
+
+ /* Software-breakpoint */
+ reset_debug_state();
+ asm volatile("sw_bp: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp));
+
+ /* Hardware-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(hw_bp));
+ asm volatile("hw_bp: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp));
+
+ /* Hardware-breakpoint + svc */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_svc));
+ asm volatile("bp_svc: svc #0");
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_svc));
+ GUEST_ASSERT_EQ(svc_addr, PC(bp_svc) + 4);
+
+ /* Hardware-breakpoint + software-breakpoint */
+ reset_debug_state();
+ install_hw_bp(bpn, PC(bp_brk));
+ asm volatile("bp_brk: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(bp_brk));
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(bp_brk));
+
+ /* Watchpoint */
+ reset_debug_state();
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ /* Single-step */
+ reset_debug_state();
+ install_ss();
+ ss_idx = 0;
+ asm volatile("ss_start:\n"
+ "mrs x0, esr_el1\n"
+ "add x0, x0, #1\n"
+ "msr daifset, #8\n"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], PC(ss_start));
+ GUEST_ASSERT_EQ(ss_addr[1], PC(ss_start) + 4);
+ GUEST_ASSERT_EQ(ss_addr[2], PC(ss_start) + 8);
+
+ /* OS Lock does not block software-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ sw_bp_addr = 0;
+ asm volatile("sw_bp2: brk #0");
+ GUEST_ASSERT_EQ(sw_bp_addr, PC(sw_bp2));
+
+ /* OS Lock blocking hardware-breakpoint */
+ reset_debug_state();
+ enable_os_lock();
+ install_hw_bp(bpn, PC(hw_bp2));
+ hw_bp_addr = 0;
+ asm volatile("hw_bp2: nop");
+ GUEST_ASSERT_EQ(hw_bp_addr, 0);
+
+ /* OS Lock blocking watchpoint */
+ reset_debug_state();
+ enable_os_lock();
+ write_data = '\0';
+ wp_data_addr = 0;
+ install_wp(wpn, PC(write_data));
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, 0);
+
+ /* OS Lock blocking single-step */
+ reset_debug_state();
+ enable_os_lock();
+ ss_addr[0] = 0;
+ install_ss();
+ ss_idx = 0;
+ asm volatile("mrs x0, esr_el1\n\t"
+ "add x0, x0, #1\n\t"
+ "msr daifset, #8\n\t"
+ : : : "x0");
+ GUEST_ASSERT_EQ(ss_addr[0], 0);
+
+ /* Linked hardware-breakpoint */
+ hw_bp_addr = 0;
+ reset_debug_state();
+ install_hw_bp_ctx(bpn, ctx_bpn, PC(hw_bp_ctx), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ asm volatile("hw_bp_ctx: nop");
+ write_sysreg(0, contextidr_el1);
+ GUEST_ASSERT_EQ(hw_bp_addr, PC(hw_bp_ctx));
+
+ /* Linked watchpoint */
+ reset_debug_state();
+ install_wp_ctx(wpn, ctx_bpn, PC(write_data), ctx);
+ /* Set context id */
+ write_sysreg(ctx, contextidr_el1);
+ isb();
+ write_data = 'x';
+ GUEST_ASSERT_EQ(write_data, 'x');
+ GUEST_ASSERT_EQ(wp_data_addr, PC(write_data));
+
+ GUEST_DONE();
+}
+
+static void guest_sw_bp_handler(struct ex_regs *regs)
+{
+ sw_bp_addr = regs->pc;
+ regs->pc += 4;
+}
+
+static void guest_hw_bp_handler(struct ex_regs *regs)
+{
+ hw_bp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_wp_handler(struct ex_regs *regs)
+{
+ wp_data_addr = read_sysreg(far_el1);
+ wp_addr = regs->pc;
+ regs->pstate |= SPSR_D;
+}
+
+static void guest_ss_handler(struct ex_regs *regs)
+{
+ __GUEST_ASSERT(ss_idx < 4, "Expected index < 4, got '%lu'", ss_idx);
+ ss_addr[ss_idx++] = regs->pc;
+ regs->pstate |= SPSR_SS;
+}
+
+static void guest_svc_handler(struct ex_regs *regs)
+{
+ svc_addr = regs->pc;
+}
+
+static void guest_code_ss(int test_cnt)
+{
+ uint64_t i;
+ uint64_t bvr, wvr, w_bvr, w_wvr;
+
+ for (i = 0; i < test_cnt; i++) {
+ /* Bits [1:0] of dbg{b,w}vr are RES0 */
+ w_bvr = i << 2;
+ w_wvr = i << 2;
+
+ /*
+ * Enable Single Step execution. Note! This _must_ be a bare
+ * ucall as the ucall() path uses atomic operations to manage
+ * the ucall structures, and the built-in "atomics" are usually
+ * implemented via exclusive access instructions. The exlusive
+ * monitor is cleared on ERET, and so taking debug exceptions
+ * during a LDREX=>STREX sequence will prevent forward progress
+ * and hang the guest/test.
+ */
+ GUEST_UCALL_NONE();
+
+ /*
+ * The userspace will verify that the pc is as expected during
+ * single step execution between iter_ss_begin and iter_ss_end.
+ */
+ asm volatile("iter_ss_begin:nop\n");
+
+ write_sysreg(w_bvr, dbgbvr0_el1);
+ write_sysreg(w_wvr, dbgwvr0_el1);
+ bvr = read_sysreg(dbgbvr0_el1);
+ wvr = read_sysreg(dbgwvr0_el1);
+
+ /* Userspace disables Single Step when the end is nigh. */
+ asm volatile("iter_ss_end:\n");
+
+ GUEST_ASSERT_EQ(bvr, w_bvr);
+ GUEST_ASSERT_EQ(wvr, w_wvr);
+ }
+ GUEST_DONE();
+}
+
+static int debug_version(uint64_t id_aa64dfr0)
+{
+ return FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_DebugVer), id_aa64dfr0);
+}
+
+static void test_guest_debug_exceptions(uint8_t bpn, uint8_t wpn, uint8_t ctx_bpn)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_BRK_INS, guest_sw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_HW_BP_CURRENT, guest_hw_bp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_WP_CURRENT, guest_wp_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SSTEP_CURRENT, guest_ss_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_SVC64, guest_svc_handler);
+
+ /* Specify bpn/wpn/ctx_bpn to be tested */
+ vcpu_args_set(vcpu, 3, bpn, wpn, ctx_bpn);
+ pr_debug("Use bpn#%d, wpn#%d and ctx_bpn#%d\n", bpn, wpn, ctx_bpn);
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+done:
+ kvm_vm_free(vm);
+}
+
+void test_single_step_from_userspace(int test_cnt)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ struct kvm_run *run;
+ uint64_t pc, cmd;
+ uint64_t test_pc = 0;
+ bool ss_enable = false;
+ struct kvm_guest_debug debug = {};
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_ss);
+ run = vcpu->run;
+ vcpu_args_set(vcpu, 1, test_cnt);
+
+ while (1) {
+ vcpu_run(vcpu);
+ if (run->exit_reason != KVM_EXIT_DEBUG) {
+ cmd = get_ucall(vcpu, &uc);
+ if (cmd == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ } else if (cmd == UCALL_DONE) {
+ break;
+ }
+
+ TEST_ASSERT(cmd == UCALL_NONE,
+ "Unexpected ucall cmd 0x%lx", cmd);
+
+ debug.control = KVM_GUESTDBG_ENABLE |
+ KVM_GUESTDBG_SINGLESTEP;
+ ss_enable = true;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ TEST_ASSERT(ss_enable, "Unexpected KVM_EXIT_DEBUG");
+
+ /* Check if the current pc is expected. */
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
+ TEST_ASSERT(!test_pc || pc == test_pc,
+ "Unexpected pc 0x%lx (expected 0x%lx)",
+ pc, test_pc);
+
+ if ((pc + 4) == (uint64_t)&iter_ss_end) {
+ test_pc = 0;
+ debug.control = KVM_GUESTDBG_ENABLE;
+ ss_enable = false;
+ vcpu_guest_debug_set(vcpu, &debug);
+ continue;
+ }
+
+ /*
+ * If the current pc is between iter_ss_bgin and
+ * iter_ss_end, the pc for the next KVM_EXIT_DEBUG should
+ * be the current pc + 4.
+ */
+ if ((pc >= (uint64_t)&iter_ss_begin) &&
+ (pc < (uint64_t)&iter_ss_end))
+ test_pc = pc + 4;
+ else
+ test_pc = 0;
+ }
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Run debug testing using the various breakpoint#, watchpoint# and
+ * context-aware breakpoint# with the given ID_AA64DFR0_EL1 configuration.
+ */
+void test_guest_debug_exceptions_all(uint64_t aa64dfr0)
+{
+ uint8_t brp_num, wrp_num, ctx_brp_num, normal_brp_num, ctx_brp_base;
+ int b, w, c;
+
+ /* Number of breakpoints */
+ brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_BRPs), aa64dfr0) + 1;
+ __TEST_REQUIRE(brp_num >= 2, "At least two breakpoints are required");
+
+ /* Number of watchpoints */
+ wrp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_WRPs), aa64dfr0) + 1;
+
+ /* Number of context aware breakpoints */
+ ctx_brp_num = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_CTX_CMPs), aa64dfr0) + 1;
+
+ pr_debug("%s brp_num:%d, wrp_num:%d, ctx_brp_num:%d\n", __func__,
+ brp_num, wrp_num, ctx_brp_num);
+
+ /* Number of normal (non-context aware) breakpoints */
+ normal_brp_num = brp_num - ctx_brp_num;
+
+ /* Lowest context aware breakpoint number */
+ ctx_brp_base = normal_brp_num;
+
+ /* Run tests with all supported breakpoints/watchpoints */
+ for (c = ctx_brp_base; c < ctx_brp_base + ctx_brp_num; c++) {
+ for (b = 0; b < normal_brp_num; b++) {
+ for (w = 0; w < wrp_num; w++)
+ test_guest_debug_exceptions(b, w, c);
+ }
+ }
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("Usage: %s [-h] [-i iterations of the single step test]\n", name);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int opt;
+ int ss_iteration = 10000;
+ uint64_t aa64dfr0;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &aa64dfr0);
+ __TEST_REQUIRE(debug_version(aa64dfr0) >= 6,
+ "Armv8 debug architecture not supported.");
+ kvm_vm_free(vm);
+
+ while ((opt = getopt(argc, argv, "i:")) != -1) {
+ switch (opt) {
+ case 'i':
+ ss_iteration = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ test_guest_debug_exceptions_all(aa64dfr0);
+ test_single_step_from_userspace(ss_iteration);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/get-reg-list.c b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
new file mode 100644
index 000000000000..709d7d721760
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/get-reg-list.c
@@ -0,0 +1,757 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * While the blessed list should be created from the oldest possible
+ * kernel, we can't go older than v5.2, though, because that's the first
+ * release which includes df205b5c6328 ("KVM: arm64: Filter out invalid
+ * core register IDs in KVM_GET_REG_LIST"). Without that commit the core
+ * registers won't match expectations.
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+struct feature_id_reg {
+ __u64 reg;
+ __u64 id_reg;
+ __u64 feat_shift;
+ __u64 feat_min;
+};
+
+static struct feature_id_reg feat_id_regs[] = {
+ {
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 0,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 4,
+ 1
+ },
+ {
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ 4,
+ 1
+ }
+};
+
+bool filter_reg(__u64 reg)
+{
+ /*
+ * DEMUX register presence depends on the host's CLIDR_EL1.
+ * This means there's no set of them that we can bless.
+ */
+ if ((reg & KVM_REG_ARM_COPROC_MASK) == KVM_REG_ARM_DEMUX)
+ return true;
+
+ return false;
+}
+
+static bool check_supported_feat_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ int i, ret;
+ __u64 data, feat_val;
+
+ for (i = 0; i < ARRAY_SIZE(feat_id_regs); i++) {
+ if (feat_id_regs[i].reg == reg) {
+ ret = __vcpu_get_reg(vcpu, feat_id_regs[i].id_reg, &data);
+ if (ret < 0)
+ return false;
+
+ feat_val = ((data >> feat_id_regs[i].feat_shift) & 0xf);
+ return feat_val >= feat_id_regs[i].feat_min;
+ }
+ }
+
+ return true;
+}
+
+bool check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ return check_supported_feat_reg(vcpu, reg);
+}
+
+bool check_reject_set(int err)
+{
+ return err == EPERM;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int feature;
+
+ for_each_sublist(c, s) {
+ if (s->finalize) {
+ feature = s->feature;
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_FINALIZE, &feature);
+ }
+ }
+}
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK | KVM_REG_ARM_COPROC_MASK)
+
+#define CORE_REGS_XX_NR_WORDS 2
+#define CORE_SPSR_XX_NR_WORDS 2
+#define CORE_FPREGS_XX_NR_WORDS 4
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 core_off = id & ~REG_MASK, idx;
+
+ /*
+ * core_off is the offset into struct kvm_regs
+ */
+ switch (core_off) {
+ case KVM_REG_ARM_CORE_REG(regs.regs[0]) ...
+ KVM_REG_ARM_CORE_REG(regs.regs[30]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(regs.regs[0])) / CORE_REGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 31, "%s: Unexpected regs.regs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(regs.regs[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(regs.sp):
+ return "KVM_REG_ARM_CORE_REG(regs.sp)";
+ case KVM_REG_ARM_CORE_REG(regs.pc):
+ return "KVM_REG_ARM_CORE_REG(regs.pc)";
+ case KVM_REG_ARM_CORE_REG(regs.pstate):
+ return "KVM_REG_ARM_CORE_REG(regs.pstate)";
+ case KVM_REG_ARM_CORE_REG(sp_el1):
+ return "KVM_REG_ARM_CORE_REG(sp_el1)";
+ case KVM_REG_ARM_CORE_REG(elr_el1):
+ return "KVM_REG_ARM_CORE_REG(elr_el1)";
+ case KVM_REG_ARM_CORE_REG(spsr[0]) ...
+ KVM_REG_ARM_CORE_REG(spsr[KVM_NR_SPSR - 1]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(spsr[0])) / CORE_SPSR_XX_NR_WORDS;
+ TEST_ASSERT(idx < KVM_NR_SPSR, "%s: Unexpected spsr index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(spsr[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]) ...
+ KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]):
+ idx = (core_off - KVM_REG_ARM_CORE_REG(fp_regs.vregs[0])) / CORE_FPREGS_XX_NR_WORDS;
+ TEST_ASSERT(idx < 32, "%s: Unexpected fp_regs.vregs index: %lld", prefix, idx);
+ return strdup_printf("KVM_REG_ARM_CORE_REG(fp_regs.vregs[%lld])", idx);
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpsr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpsr)";
+ case KVM_REG_ARM_CORE_REG(fp_regs.fpcr):
+ return "KVM_REG_ARM_CORE_REG(fp_regs.fpcr)";
+ }
+
+ TEST_FAIL("%s: Unknown core reg id: 0x%llx", prefix, id);
+ return NULL;
+}
+
+static const char *sve_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 sve_off, n, i;
+
+ if (id == KVM_REG_ARM64_SVE_VLS)
+ return "KVM_REG_ARM64_SVE_VLS";
+
+ sve_off = id & ~(REG_MASK | ((1ULL << 5) - 1));
+ i = id & (KVM_ARM64_SVE_MAX_SLICES - 1);
+
+ TEST_ASSERT(i == 0, "%s: Currently we don't expect slice > 0, reg id 0x%llx", prefix, id);
+
+ switch (sve_off) {
+ case KVM_REG_ARM64_SVE_ZREG_BASE ...
+ KVM_REG_ARM64_SVE_ZREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_ZREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_ZREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_ZREG(n, 0),
+ "%s: Unexpected bits set in SVE ZREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_ZREG(%lld, 0)", n);
+ case KVM_REG_ARM64_SVE_PREG_BASE ...
+ KVM_REG_ARM64_SVE_PREG_BASE + (1ULL << 5) * KVM_ARM64_SVE_NUM_PREGS - 1:
+ n = (id >> 5) & (KVM_ARM64_SVE_NUM_PREGS - 1);
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_PREG(n, 0),
+ "%s: Unexpected bits set in SVE PREG id: 0x%llx", prefix, id);
+ return strdup_printf("KVM_REG_ARM64_SVE_PREG(%lld, 0)", n);
+ case KVM_REG_ARM64_SVE_FFR_BASE:
+ TEST_ASSERT(id == KVM_REG_ARM64_SVE_FFR(0),
+ "%s: Unexpected bits set in SVE FFR id: 0x%llx", prefix, id);
+ return "KVM_REG_ARM64_SVE_FFR(0)";
+ }
+
+ return NULL;
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+ unsigned op0, op1, crn, crm, op2;
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_ARM64,
+ "%s: KVM_REG_ARM64 missing in reg id: 0x%llx", prefix, id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U8:
+ reg_size = "KVM_REG_SIZE_U8";
+ break;
+ case KVM_REG_SIZE_U16:
+ reg_size = "KVM_REG_SIZE_U16";
+ break;
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ case KVM_REG_SIZE_U256:
+ reg_size = "KVM_REG_SIZE_U256";
+ break;
+ case KVM_REG_SIZE_U512:
+ reg_size = "KVM_REG_SIZE_U512";
+ break;
+ case KVM_REG_SIZE_U1024:
+ reg_size = "KVM_REG_SIZE_U1024";
+ break;
+ case KVM_REG_SIZE_U2048:
+ reg_size = "KVM_REG_SIZE_U2048";
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected reg size: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id);
+ }
+
+ switch (id & KVM_REG_ARM_COPROC_MASK) {
+ case KVM_REG_ARM_CORE:
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_CORE | %s,\n", reg_size, core_id_to_str(prefix, id));
+ break;
+ case KVM_REG_ARM_DEMUX:
+ TEST_ASSERT(!(id & ~(REG_MASK | KVM_REG_ARM_DEMUX_ID_MASK | KVM_REG_ARM_DEMUX_VAL_MASK)),
+ "%s: Unexpected bits set in DEMUX reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM64 | %s | KVM_REG_ARM_DEMUX | KVM_REG_ARM_DEMUX_ID_CCSIDR | %lld,\n",
+ reg_size, id & KVM_REG_ARM_DEMUX_VAL_MASK);
+ break;
+ case KVM_REG_ARM64_SYSREG:
+ op0 = (id & KVM_REG_ARM64_SYSREG_OP0_MASK) >> KVM_REG_ARM64_SYSREG_OP0_SHIFT;
+ op1 = (id & KVM_REG_ARM64_SYSREG_OP1_MASK) >> KVM_REG_ARM64_SYSREG_OP1_SHIFT;
+ crn = (id & KVM_REG_ARM64_SYSREG_CRN_MASK) >> KVM_REG_ARM64_SYSREG_CRN_SHIFT;
+ crm = (id & KVM_REG_ARM64_SYSREG_CRM_MASK) >> KVM_REG_ARM64_SYSREG_CRM_SHIFT;
+ op2 = (id & KVM_REG_ARM64_SYSREG_OP2_MASK) >> KVM_REG_ARM64_SYSREG_OP2_SHIFT;
+ TEST_ASSERT(id == ARM64_SYS_REG(op0, op1, crn, crm, op2),
+ "%s: Unexpected bits set in SYSREG reg id: 0x%llx", prefix, id);
+ printf("\tARM64_SYS_REG(%d, %d, %d, %d, %d),\n", op0, op1, crn, crm, op2);
+ break;
+ case KVM_REG_ARM_FW:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_REG(id & 0xffff),
+ "%s: Unexpected bits set in FW reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM_FW_REG(%lld),\n", id & 0xffff);
+ break;
+ case KVM_REG_ARM_FW_FEAT_BMAP:
+ TEST_ASSERT(id == KVM_REG_ARM_FW_FEAT_BMAP_REG(id & 0xffff),
+ "%s: Unexpected bits set in the bitmap feature FW reg id: 0x%llx", prefix, id);
+ printf("\tKVM_REG_ARM_FW_FEAT_BMAP_REG(%lld),\n", id & 0xffff);
+ break;
+ case KVM_REG_ARM64_SVE:
+ printf("\t%s,\n", sve_id_to_str(prefix, id));
+ break;
+ default:
+ TEST_FAIL("%s: Unexpected coproc type: 0x%llx in reg id: 0x%llx",
+ prefix, (id & KVM_REG_ARM_COPROC_MASK) >> KVM_REG_ARM_COPROC_SHIFT, id);
+ }
+}
+
+/*
+ * The original blessed list was primed with the output of kernel version
+ * v4.15 with --core-reg-fixup and then later updated with new registers.
+ * (The --core-reg-fixup option and it's fixup function have been removed
+ * from the test, as it's unlikely to use this type of test on a kernel
+ * older than v5.2.)
+ *
+ * The blessed list is up to date with kernel version v6.4 (or so we hope)
+ */
+static __u64 base_regs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.regs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.sp),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pc),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(regs.pstate),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(sp_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(elr_el1),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U64 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(spsr[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpsr),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U32 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.fpcr),
+ KVM_REG_ARM_FW_REG(0), /* KVM_REG_ARM_PSCI_VERSION */
+ KVM_REG_ARM_FW_REG(1), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_1 */
+ KVM_REG_ARM_FW_REG(2), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_2 */
+ KVM_REG_ARM_FW_REG(3), /* KVM_REG_ARM_SMCCC_ARCH_WORKAROUND_3 */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(0), /* KVM_REG_ARM_STD_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(1), /* KVM_REG_ARM_STD_HYP_BMAP */
+ KVM_REG_ARM_FW_FEAT_BMAP_REG(2), /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ ARM64_SYS_REG(3, 3, 14, 3, 1), /* CNTV_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 3, 2), /* CNTV_CVAL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 2),
+ ARM64_SYS_REG(3, 0, 0, 0, 0), /* MIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 0, 6), /* REVIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 1), /* CLIDR_EL1 */
+ ARM64_SYS_REG(3, 1, 0, 0, 7), /* AIDR_EL1 */
+ ARM64_SYS_REG(3, 3, 0, 0, 1), /* CTR_EL0 */
+ ARM64_SYS_REG(2, 0, 0, 0, 4),
+ ARM64_SYS_REG(2, 0, 0, 0, 5),
+ ARM64_SYS_REG(2, 0, 0, 0, 6),
+ ARM64_SYS_REG(2, 0, 0, 0, 7),
+ ARM64_SYS_REG(2, 0, 0, 1, 4),
+ ARM64_SYS_REG(2, 0, 0, 1, 5),
+ ARM64_SYS_REG(2, 0, 0, 1, 6),
+ ARM64_SYS_REG(2, 0, 0, 1, 7),
+ ARM64_SYS_REG(2, 0, 0, 2, 0), /* MDCCINT_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 2), /* MDSCR_EL1 */
+ ARM64_SYS_REG(2, 0, 0, 2, 4),
+ ARM64_SYS_REG(2, 0, 0, 2, 5),
+ ARM64_SYS_REG(2, 0, 0, 2, 6),
+ ARM64_SYS_REG(2, 0, 0, 2, 7),
+ ARM64_SYS_REG(2, 0, 0, 3, 4),
+ ARM64_SYS_REG(2, 0, 0, 3, 5),
+ ARM64_SYS_REG(2, 0, 0, 3, 6),
+ ARM64_SYS_REG(2, 0, 0, 3, 7),
+ ARM64_SYS_REG(2, 0, 0, 4, 4),
+ ARM64_SYS_REG(2, 0, 0, 4, 5),
+ ARM64_SYS_REG(2, 0, 0, 4, 6),
+ ARM64_SYS_REG(2, 0, 0, 4, 7),
+ ARM64_SYS_REG(2, 0, 0, 5, 4),
+ ARM64_SYS_REG(2, 0, 0, 5, 5),
+ ARM64_SYS_REG(2, 0, 0, 5, 6),
+ ARM64_SYS_REG(2, 0, 0, 5, 7),
+ ARM64_SYS_REG(2, 0, 0, 6, 4),
+ ARM64_SYS_REG(2, 0, 0, 6, 5),
+ ARM64_SYS_REG(2, 0, 0, 6, 6),
+ ARM64_SYS_REG(2, 0, 0, 6, 7),
+ ARM64_SYS_REG(2, 0, 0, 7, 4),
+ ARM64_SYS_REG(2, 0, 0, 7, 5),
+ ARM64_SYS_REG(2, 0, 0, 7, 6),
+ ARM64_SYS_REG(2, 0, 0, 7, 7),
+ ARM64_SYS_REG(2, 0, 0, 8, 4),
+ ARM64_SYS_REG(2, 0, 0, 8, 5),
+ ARM64_SYS_REG(2, 0, 0, 8, 6),
+ ARM64_SYS_REG(2, 0, 0, 8, 7),
+ ARM64_SYS_REG(2, 0, 0, 9, 4),
+ ARM64_SYS_REG(2, 0, 0, 9, 5),
+ ARM64_SYS_REG(2, 0, 0, 9, 6),
+ ARM64_SYS_REG(2, 0, 0, 9, 7),
+ ARM64_SYS_REG(2, 0, 0, 10, 4),
+ ARM64_SYS_REG(2, 0, 0, 10, 5),
+ ARM64_SYS_REG(2, 0, 0, 10, 6),
+ ARM64_SYS_REG(2, 0, 0, 10, 7),
+ ARM64_SYS_REG(2, 0, 0, 11, 4),
+ ARM64_SYS_REG(2, 0, 0, 11, 5),
+ ARM64_SYS_REG(2, 0, 0, 11, 6),
+ ARM64_SYS_REG(2, 0, 0, 11, 7),
+ ARM64_SYS_REG(2, 0, 0, 12, 4),
+ ARM64_SYS_REG(2, 0, 0, 12, 5),
+ ARM64_SYS_REG(2, 0, 0, 12, 6),
+ ARM64_SYS_REG(2, 0, 0, 12, 7),
+ ARM64_SYS_REG(2, 0, 0, 13, 4),
+ ARM64_SYS_REG(2, 0, 0, 13, 5),
+ ARM64_SYS_REG(2, 0, 0, 13, 6),
+ ARM64_SYS_REG(2, 0, 0, 13, 7),
+ ARM64_SYS_REG(2, 0, 0, 14, 4),
+ ARM64_SYS_REG(2, 0, 0, 14, 5),
+ ARM64_SYS_REG(2, 0, 0, 14, 6),
+ ARM64_SYS_REG(2, 0, 0, 14, 7),
+ ARM64_SYS_REG(2, 0, 0, 15, 4),
+ ARM64_SYS_REG(2, 0, 0, 15, 5),
+ ARM64_SYS_REG(2, 0, 0, 15, 6),
+ ARM64_SYS_REG(2, 0, 0, 15, 7),
+ ARM64_SYS_REG(2, 0, 1, 1, 4), /* OSLSR_EL1 */
+ ARM64_SYS_REG(2, 4, 0, 7, 0), /* DBGVCR32_EL2 */
+ ARM64_SYS_REG(3, 0, 0, 0, 5), /* MPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 0), /* ID_PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 1), /* ID_PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 2), /* ID_DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 3), /* ID_AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 4), /* ID_MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 5), /* ID_MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 6), /* ID_MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 1, 7), /* ID_MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 0), /* ID_ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 1), /* ID_ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 2), /* ID_ISAR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 3), /* ID_ISAR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 4), /* ID_ISAR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 5), /* ID_ISAR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 6), /* ID_MMFR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 2, 7), /* ID_ISAR6_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 0), /* MVFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 1), /* MVFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 2), /* MVFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 3),
+ ARM64_SYS_REG(3, 0, 0, 3, 4), /* ID_PFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 5), /* ID_DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 6), /* ID_MMFR5_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 3, 7),
+ ARM64_SYS_REG(3, 0, 0, 4, 0), /* ID_AA64PFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 1), /* ID_AA64PFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 2), /* ID_AA64PFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 3),
+ ARM64_SYS_REG(3, 0, 0, 4, 4), /* ID_AA64ZFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 5), /* ID_AA64SMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 4, 6),
+ ARM64_SYS_REG(3, 0, 0, 4, 7),
+ ARM64_SYS_REG(3, 0, 0, 5, 0), /* ID_AA64DFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 1), /* ID_AA64DFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 2),
+ ARM64_SYS_REG(3, 0, 0, 5, 3),
+ ARM64_SYS_REG(3, 0, 0, 5, 4), /* ID_AA64AFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 5), /* ID_AA64AFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 5, 6),
+ ARM64_SYS_REG(3, 0, 0, 5, 7),
+ ARM64_SYS_REG(3, 0, 0, 6, 0), /* ID_AA64ISAR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 1), /* ID_AA64ISAR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 2), /* ID_AA64ISAR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 6, 3),
+ ARM64_SYS_REG(3, 0, 0, 6, 4),
+ ARM64_SYS_REG(3, 0, 0, 6, 5),
+ ARM64_SYS_REG(3, 0, 0, 6, 6),
+ ARM64_SYS_REG(3, 0, 0, 6, 7),
+ ARM64_SYS_REG(3, 0, 0, 7, 0), /* ID_AA64MMFR0_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 1), /* ID_AA64MMFR1_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 2), /* ID_AA64MMFR2_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 3), /* ID_AA64MMFR3_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 4), /* ID_AA64MMFR4_EL1 */
+ ARM64_SYS_REG(3, 0, 0, 7, 5),
+ ARM64_SYS_REG(3, 0, 0, 7, 6),
+ ARM64_SYS_REG(3, 0, 0, 7, 7),
+ ARM64_SYS_REG(3, 0, 1, 0, 0), /* SCTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 1), /* ACTLR_EL1 */
+ ARM64_SYS_REG(3, 0, 1, 0, 2), /* CPACR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 0), /* TTBR0_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 1), /* TTBR1_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 2), /* TCR_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 0, 3), /* TCR2_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 0), /* AFSR0_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 1, 1), /* AFSR1_EL1 */
+ ARM64_SYS_REG(3, 0, 5, 2, 0), /* ESR_EL1 */
+ ARM64_SYS_REG(3, 0, 6, 0, 0), /* FAR_EL1 */
+ ARM64_SYS_REG(3, 0, 7, 4, 0), /* PAR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 0), /* MAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 2), /* PIRE0_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 2, 3), /* PIR_EL1 */
+ ARM64_SYS_REG(3, 0, 10, 3, 0), /* AMAIR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 0, 0), /* VBAR_EL1 */
+ ARM64_SYS_REG(3, 0, 12, 1, 1), /* DISR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 1), /* CONTEXTIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 13, 0, 4), /* TPIDR_EL1 */
+ ARM64_SYS_REG(3, 0, 14, 1, 0), /* CNTKCTL_EL1 */
+ ARM64_SYS_REG(3, 2, 0, 0, 0), /* CSSELR_EL1 */
+ ARM64_SYS_REG(3, 3, 13, 0, 2), /* TPIDR_EL0 */
+ ARM64_SYS_REG(3, 3, 13, 0, 3), /* TPIDRRO_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 0, 1), /* CNTPCT_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 1), /* CNTP_CTL_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 2, 2), /* CNTP_CVAL_EL0 */
+ ARM64_SYS_REG(3, 4, 3, 0, 0), /* DACR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 0, 1), /* IFSR32_EL2 */
+ ARM64_SYS_REG(3, 4, 5, 3, 0), /* FPEXC32_EL2 */
+};
+
+static __u64 pmu_regs[] = {
+ ARM64_SYS_REG(3, 0, 9, 14, 1), /* PMINTENSET_EL1 */
+ ARM64_SYS_REG(3, 0, 9, 14, 2), /* PMINTENCLR_EL1 */
+ ARM64_SYS_REG(3, 3, 9, 12, 0), /* PMCR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 1), /* PMCNTENSET_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 2), /* PMCNTENCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 3), /* PMOVSCLR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 4), /* PMSWINC_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 12, 5), /* PMSELR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 13, 0), /* PMCCNTR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 0), /* PMUSERENR_EL0 */
+ ARM64_SYS_REG(3, 3, 9, 14, 3), /* PMOVSSET_EL0 */
+ ARM64_SYS_REG(3, 3, 14, 8, 0),
+ ARM64_SYS_REG(3, 3, 14, 8, 1),
+ ARM64_SYS_REG(3, 3, 14, 8, 2),
+ ARM64_SYS_REG(3, 3, 14, 8, 3),
+ ARM64_SYS_REG(3, 3, 14, 8, 4),
+ ARM64_SYS_REG(3, 3, 14, 8, 5),
+ ARM64_SYS_REG(3, 3, 14, 8, 6),
+ ARM64_SYS_REG(3, 3, 14, 8, 7),
+ ARM64_SYS_REG(3, 3, 14, 9, 0),
+ ARM64_SYS_REG(3, 3, 14, 9, 1),
+ ARM64_SYS_REG(3, 3, 14, 9, 2),
+ ARM64_SYS_REG(3, 3, 14, 9, 3),
+ ARM64_SYS_REG(3, 3, 14, 9, 4),
+ ARM64_SYS_REG(3, 3, 14, 9, 5),
+ ARM64_SYS_REG(3, 3, 14, 9, 6),
+ ARM64_SYS_REG(3, 3, 14, 9, 7),
+ ARM64_SYS_REG(3, 3, 14, 10, 0),
+ ARM64_SYS_REG(3, 3, 14, 10, 1),
+ ARM64_SYS_REG(3, 3, 14, 10, 2),
+ ARM64_SYS_REG(3, 3, 14, 10, 3),
+ ARM64_SYS_REG(3, 3, 14, 10, 4),
+ ARM64_SYS_REG(3, 3, 14, 10, 5),
+ ARM64_SYS_REG(3, 3, 14, 10, 6),
+ ARM64_SYS_REG(3, 3, 14, 10, 7),
+ ARM64_SYS_REG(3, 3, 14, 11, 0),
+ ARM64_SYS_REG(3, 3, 14, 11, 1),
+ ARM64_SYS_REG(3, 3, 14, 11, 2),
+ ARM64_SYS_REG(3, 3, 14, 11, 3),
+ ARM64_SYS_REG(3, 3, 14, 11, 4),
+ ARM64_SYS_REG(3, 3, 14, 11, 5),
+ ARM64_SYS_REG(3, 3, 14, 11, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 0),
+ ARM64_SYS_REG(3, 3, 14, 12, 1),
+ ARM64_SYS_REG(3, 3, 14, 12, 2),
+ ARM64_SYS_REG(3, 3, 14, 12, 3),
+ ARM64_SYS_REG(3, 3, 14, 12, 4),
+ ARM64_SYS_REG(3, 3, 14, 12, 5),
+ ARM64_SYS_REG(3, 3, 14, 12, 6),
+ ARM64_SYS_REG(3, 3, 14, 12, 7),
+ ARM64_SYS_REG(3, 3, 14, 13, 0),
+ ARM64_SYS_REG(3, 3, 14, 13, 1),
+ ARM64_SYS_REG(3, 3, 14, 13, 2),
+ ARM64_SYS_REG(3, 3, 14, 13, 3),
+ ARM64_SYS_REG(3, 3, 14, 13, 4),
+ ARM64_SYS_REG(3, 3, 14, 13, 5),
+ ARM64_SYS_REG(3, 3, 14, 13, 6),
+ ARM64_SYS_REG(3, 3, 14, 13, 7),
+ ARM64_SYS_REG(3, 3, 14, 14, 0),
+ ARM64_SYS_REG(3, 3, 14, 14, 1),
+ ARM64_SYS_REG(3, 3, 14, 14, 2),
+ ARM64_SYS_REG(3, 3, 14, 14, 3),
+ ARM64_SYS_REG(3, 3, 14, 14, 4),
+ ARM64_SYS_REG(3, 3, 14, 14, 5),
+ ARM64_SYS_REG(3, 3, 14, 14, 6),
+ ARM64_SYS_REG(3, 3, 14, 14, 7),
+ ARM64_SYS_REG(3, 3, 14, 15, 0),
+ ARM64_SYS_REG(3, 3, 14, 15, 1),
+ ARM64_SYS_REG(3, 3, 14, 15, 2),
+ ARM64_SYS_REG(3, 3, 14, 15, 3),
+ ARM64_SYS_REG(3, 3, 14, 15, 4),
+ ARM64_SYS_REG(3, 3, 14, 15, 5),
+ ARM64_SYS_REG(3, 3, 14, 15, 6),
+ ARM64_SYS_REG(3, 3, 14, 15, 7), /* PMCCFILTR_EL0 */
+};
+
+static __u64 vregs[] = {
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[0]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[1]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[2]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[3]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[4]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[5]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[6]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[7]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[8]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[9]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[10]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[11]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[12]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[13]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[14]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[15]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[16]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[17]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[18]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[19]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[20]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[21]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[22]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[23]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[24]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[25]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[26]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[27]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[28]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[29]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[30]),
+ KVM_REG_ARM64 | KVM_REG_SIZE_U128 | KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(fp_regs.vregs[31]),
+};
+
+static __u64 sve_regs[] = {
+ KVM_REG_ARM64_SVE_VLS,
+ KVM_REG_ARM64_SVE_ZREG(0, 0),
+ KVM_REG_ARM64_SVE_ZREG(1, 0),
+ KVM_REG_ARM64_SVE_ZREG(2, 0),
+ KVM_REG_ARM64_SVE_ZREG(3, 0),
+ KVM_REG_ARM64_SVE_ZREG(4, 0),
+ KVM_REG_ARM64_SVE_ZREG(5, 0),
+ KVM_REG_ARM64_SVE_ZREG(6, 0),
+ KVM_REG_ARM64_SVE_ZREG(7, 0),
+ KVM_REG_ARM64_SVE_ZREG(8, 0),
+ KVM_REG_ARM64_SVE_ZREG(9, 0),
+ KVM_REG_ARM64_SVE_ZREG(10, 0),
+ KVM_REG_ARM64_SVE_ZREG(11, 0),
+ KVM_REG_ARM64_SVE_ZREG(12, 0),
+ KVM_REG_ARM64_SVE_ZREG(13, 0),
+ KVM_REG_ARM64_SVE_ZREG(14, 0),
+ KVM_REG_ARM64_SVE_ZREG(15, 0),
+ KVM_REG_ARM64_SVE_ZREG(16, 0),
+ KVM_REG_ARM64_SVE_ZREG(17, 0),
+ KVM_REG_ARM64_SVE_ZREG(18, 0),
+ KVM_REG_ARM64_SVE_ZREG(19, 0),
+ KVM_REG_ARM64_SVE_ZREG(20, 0),
+ KVM_REG_ARM64_SVE_ZREG(21, 0),
+ KVM_REG_ARM64_SVE_ZREG(22, 0),
+ KVM_REG_ARM64_SVE_ZREG(23, 0),
+ KVM_REG_ARM64_SVE_ZREG(24, 0),
+ KVM_REG_ARM64_SVE_ZREG(25, 0),
+ KVM_REG_ARM64_SVE_ZREG(26, 0),
+ KVM_REG_ARM64_SVE_ZREG(27, 0),
+ KVM_REG_ARM64_SVE_ZREG(28, 0),
+ KVM_REG_ARM64_SVE_ZREG(29, 0),
+ KVM_REG_ARM64_SVE_ZREG(30, 0),
+ KVM_REG_ARM64_SVE_ZREG(31, 0),
+ KVM_REG_ARM64_SVE_PREG(0, 0),
+ KVM_REG_ARM64_SVE_PREG(1, 0),
+ KVM_REG_ARM64_SVE_PREG(2, 0),
+ KVM_REG_ARM64_SVE_PREG(3, 0),
+ KVM_REG_ARM64_SVE_PREG(4, 0),
+ KVM_REG_ARM64_SVE_PREG(5, 0),
+ KVM_REG_ARM64_SVE_PREG(6, 0),
+ KVM_REG_ARM64_SVE_PREG(7, 0),
+ KVM_REG_ARM64_SVE_PREG(8, 0),
+ KVM_REG_ARM64_SVE_PREG(9, 0),
+ KVM_REG_ARM64_SVE_PREG(10, 0),
+ KVM_REG_ARM64_SVE_PREG(11, 0),
+ KVM_REG_ARM64_SVE_PREG(12, 0),
+ KVM_REG_ARM64_SVE_PREG(13, 0),
+ KVM_REG_ARM64_SVE_PREG(14, 0),
+ KVM_REG_ARM64_SVE_PREG(15, 0),
+ KVM_REG_ARM64_SVE_FFR(0),
+ ARM64_SYS_REG(3, 0, 1, 2, 0), /* ZCR_EL1 */
+};
+
+static __u64 sve_rejects_set[] = {
+ KVM_REG_ARM64_SVE_VLS,
+};
+
+static __u64 pauth_addr_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 1, 0), /* APIAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 1), /* APIAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 2), /* APIBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 1, 3), /* APIBKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 0), /* APDAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 1), /* APDAKEYHI_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 2), /* APDBKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 2, 3) /* APDBKEYHI_EL1 */
+};
+
+static __u64 pauth_generic_regs[] = {
+ ARM64_SYS_REG(3, 0, 2, 3, 0), /* APGAKEYLO_EL1 */
+ ARM64_SYS_REG(3, 0, 2, 3, 1), /* APGAKEYHI_EL1 */
+};
+
+#define BASE_SUBLIST \
+ { "base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), }
+#define VREGS_SUBLIST \
+ { "vregs", .regs = vregs, .regs_n = ARRAY_SIZE(vregs), }
+#define PMU_SUBLIST \
+ { "pmu", .capability = KVM_CAP_ARM_PMU_V3, .feature = KVM_ARM_VCPU_PMU_V3, \
+ .regs = pmu_regs, .regs_n = ARRAY_SIZE(pmu_regs), }
+#define SVE_SUBLIST \
+ { "sve", .capability = KVM_CAP_ARM_SVE, .feature = KVM_ARM_VCPU_SVE, .finalize = true, \
+ .regs = sve_regs, .regs_n = ARRAY_SIZE(sve_regs), \
+ .rejects_set = sve_rejects_set, .rejects_set_n = ARRAY_SIZE(sve_rejects_set), }
+#define PAUTH_SUBLIST \
+ { \
+ .name = "pauth_address", \
+ .capability = KVM_CAP_ARM_PTRAUTH_ADDRESS, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_ADDRESS, \
+ .regs = pauth_addr_regs, \
+ .regs_n = ARRAY_SIZE(pauth_addr_regs), \
+ }, \
+ { \
+ .name = "pauth_generic", \
+ .capability = KVM_CAP_ARM_PTRAUTH_GENERIC, \
+ .feature = KVM_ARM_VCPU_PTRAUTH_GENERIC, \
+ .regs = pauth_generic_regs, \
+ .regs_n = ARRAY_SIZE(pauth_generic_regs), \
+ }
+
+static struct vcpu_reg_list vregs_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list vregs_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list sve_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ SVE_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ {0},
+ },
+};
+static struct vcpu_reg_list pauth_pmu_config = {
+ .sublists = {
+ BASE_SUBLIST,
+ VREGS_SUBLIST,
+ PAUTH_SUBLIST,
+ PMU_SUBLIST,
+ {0},
+ },
+};
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &vregs_config,
+ &vregs_pmu_config,
+ &sve_config,
+ &sve_pmu_config,
+ &pauth_config,
+ &pauth_pmu_config,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/aarch64/hypercalls.c b/tools/testing/selftests/kvm/aarch64/hypercalls.c
new file mode 100644
index 000000000000..9d192ce0078d
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/hypercalls.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/* hypercalls: Check the ARM64's psuedo-firmware bitmap register interface.
+ *
+ * The test validates the basic hypercall functionalities that are exposed
+ * via the psuedo-firmware bitmap register. This includes the registers'
+ * read/write behavior before and after the VM has started, and if the
+ * hypercalls are properly masked or unmasked to the guest when disabled or
+ * enabled from the KVM userspace, respectively.
+ */
+#include <errno.h>
+#include <linux/arm-smccc.h>
+#include <asm/kvm.h>
+#include <kvm_util.h>
+
+#include "processor.h"
+
+#define FW_REG_ULIMIT_VAL(max_feat_bit) (GENMASK(max_feat_bit, 0))
+
+/* Last valid bits of the bitmapped firmware registers */
+#define KVM_REG_ARM_STD_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_STD_HYP_BMAP_BIT_MAX 0
+#define KVM_REG_ARM_VENDOR_HYP_BMAP_BIT_MAX 1
+
+struct kvm_fw_reg_info {
+ uint64_t reg; /* Register definition */
+ uint64_t max_feat_bit; /* Bit that represents the upper limit of the feature-map */
+};
+
+#define FW_REG_INFO(r) \
+ { \
+ .reg = r, \
+ .max_feat_bit = r##_BIT_MAX, \
+ }
+
+static const struct kvm_fw_reg_info fw_reg_info[] = {
+ FW_REG_INFO(KVM_REG_ARM_STD_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_STD_HYP_BMAP),
+ FW_REG_INFO(KVM_REG_ARM_VENDOR_HYP_BMAP),
+};
+
+enum test_stage {
+ TEST_STAGE_REG_IFACE,
+ TEST_STAGE_HVC_IFACE_FEAT_DISABLED,
+ TEST_STAGE_HVC_IFACE_FEAT_ENABLED,
+ TEST_STAGE_HVC_IFACE_FALSE_INFO,
+ TEST_STAGE_END,
+};
+
+static int stage = TEST_STAGE_REG_IFACE;
+
+struct test_hvc_info {
+ uint32_t func_id;
+ uint64_t arg1;
+};
+
+#define TEST_HVC_INFO(f, a1) \
+ { \
+ .func_id = f, \
+ .arg1 = a1, \
+ }
+
+static const struct test_hvc_info hvc_info[] = {
+ /* KVM_REG_ARM_STD_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_VERSION, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_GET_UUID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND32, 0),
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_RND64, 0),
+
+ /* KVM_REG_ARM_STD_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_HV_PV_TIME_FEATURES),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_HV_PV_TIME_ST),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_ST, 0),
+
+ /* KVM_REG_ARM_VENDOR_HYP_BMAP */
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_FEATURES_FUNC_ID,
+ ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0),
+ TEST_HVC_INFO(ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID, KVM_PTP_VIRT_COUNTER),
+};
+
+/* Feed false hypercall info to test the KVM behavior */
+static const struct test_hvc_info false_hvc_info[] = {
+ /* Feature support check against a different family of hypercalls */
+ TEST_HVC_INFO(ARM_SMCCC_TRNG_FEATURES, ARM_SMCCC_VENDOR_HYP_KVM_PTP_FUNC_ID),
+ TEST_HVC_INFO(ARM_SMCCC_ARCH_FEATURES_FUNC_ID, ARM_SMCCC_TRNG_RND64),
+ TEST_HVC_INFO(ARM_SMCCC_HV_PV_TIME_FEATURES, ARM_SMCCC_TRNG_RND64),
+};
+
+static void guest_test_hvc(const struct test_hvc_info *hc_info)
+{
+ unsigned int i;
+ struct arm_smccc_res res;
+ unsigned int hvc_info_arr_sz;
+
+ hvc_info_arr_sz =
+ hc_info == hvc_info ? ARRAY_SIZE(hvc_info) : ARRAY_SIZE(false_hvc_info);
+
+ for (i = 0; i < hvc_info_arr_sz; i++, hc_info++) {
+ memset(&res, 0, sizeof(res));
+ smccc_hvc(hc_info->func_id, hc_info->arg1, 0, 0, 0, 0, 0, 0, &res);
+
+ switch (stage) {
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ __GUEST_ASSERT(res.a0 == SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ __GUEST_ASSERT(res.a0 != SMCCC_RET_NOT_SUPPORTED,
+ "a0 = 0x%lx, func_id = 0x%x, arg1 = 0x%lx, stage = %u",
+ res.a0, hc_info->func_id, hc_info->arg1, stage);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+ }
+}
+
+static void guest_code(void)
+{
+ while (stage != TEST_STAGE_END) {
+ switch (stage) {
+ case TEST_STAGE_REG_IFACE:
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ guest_test_hvc(hvc_info);
+ break;
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ guest_test_hvc(false_hvc_info);
+ break;
+ default:
+ GUEST_FAIL("Unexpected stage = %u", stage);
+ }
+
+ GUEST_SYNC(stage);
+ }
+
+ GUEST_DONE();
+}
+
+struct st_time {
+ uint32_t rev;
+ uint32_t attr;
+ uint64_t st_time;
+};
+
+#define STEAL_TIME_SIZE ((sizeof(struct st_time) + 63) & ~63)
+#define ST_GPA_BASE (1 << 30)
+
+static void steal_time_init(struct kvm_vcpu *vcpu)
+{
+ uint64_t st_ipa = (ulong)ST_GPA_BASE;
+ unsigned int gpages;
+
+ gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE);
+ vm_userspace_mem_region_add(vcpu->vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
+
+ vcpu_device_attr_set(vcpu, KVM_ARM_VCPU_PVTIME_CTRL,
+ KVM_ARM_VCPU_PVTIME_IPA, &st_ipa);
+}
+
+static void test_fw_regs_before_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+ /* First 'read' should be an upper limit of the features supported */
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
+ TEST_ASSERT(val == FW_REG_ULIMIT_VAL(reg_info->max_feat_bit),
+ "Expected all the features to be set for reg: 0x%lx; expected: 0x%lx; read: 0x%lx",
+ reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit), val);
+
+ /* Test a 'write' by disabling all the features of the register map */
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, 0);
+ TEST_ASSERT(ret == 0,
+ "Failed to clear all the features of reg: 0x%lx; ret: %d",
+ reg_info->reg, errno);
+
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
+ TEST_ASSERT(val == 0,
+ "Expected all the features to be cleared for reg: 0x%lx", reg_info->reg);
+
+ /*
+ * Test enabling a feature that's not supported.
+ * Avoid this check if all the bits are occupied.
+ */
+ if (reg_info->max_feat_bit < 63) {
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, BIT(reg_info->max_feat_bit + 1));
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Unexpected behavior or return value (%d) while setting an unsupported feature for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+ }
+}
+
+static void test_fw_regs_after_vm_start(struct kvm_vcpu *vcpu)
+{
+ uint64_t val;
+ unsigned int i;
+ int ret;
+
+ for (i = 0; i < ARRAY_SIZE(fw_reg_info); i++) {
+ const struct kvm_fw_reg_info *reg_info = &fw_reg_info[i];
+
+ /*
+ * Before starting the VM, the test clears all the bits.
+ * Check if that's still the case.
+ */
+ vcpu_get_reg(vcpu, reg_info->reg, &val);
+ TEST_ASSERT(val == 0,
+ "Expected all the features to be cleared for reg: 0x%lx",
+ reg_info->reg);
+
+ /*
+ * Since the VM has run at least once, KVM shouldn't allow modification of
+ * the registers and should return EBUSY. Set the registers and check for
+ * the expected errno.
+ */
+ ret = __vcpu_set_reg(vcpu, reg_info->reg, FW_REG_ULIMIT_VAL(reg_info->max_feat_bit));
+ TEST_ASSERT(ret != 0 && errno == EBUSY,
+ "Unexpected behavior or return value (%d) while setting a feature while VM is running for reg: 0x%lx",
+ errno, reg_info->reg);
+ }
+}
+
+static struct kvm_vm *test_vm_create(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+
+ steal_time_init(*vcpu);
+
+ return vm;
+}
+
+static void test_guest_stage(struct kvm_vm **vm, struct kvm_vcpu **vcpu)
+{
+ int prev_stage = stage;
+
+ pr_debug("Stage: %d\n", prev_stage);
+
+ /* Sync the stage early, the VM might be freed below. */
+ stage++;
+ sync_global_to_guest(*vm, stage);
+
+ switch (prev_stage) {
+ case TEST_STAGE_REG_IFACE:
+ test_fw_regs_after_vm_start(*vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_DISABLED:
+ /* Start a new VM so that all the features are now enabled by default */
+ kvm_vm_free(*vm);
+ *vm = test_vm_create(vcpu);
+ break;
+ case TEST_STAGE_HVC_IFACE_FEAT_ENABLED:
+ case TEST_STAGE_HVC_IFACE_FALSE_INFO:
+ break;
+ default:
+ TEST_FAIL("Unknown test stage: %d", prev_stage);
+ }
+}
+
+static void test_run(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ bool guest_done = false;
+
+ vm = test_vm_create(&vcpu);
+
+ test_fw_regs_before_vm_start(vcpu);
+
+ while (!guest_done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ test_guest_stage(&vm, &vcpu);
+ break;
+ case UCALL_DONE:
+ guest_done = true;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ test_run();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/page_fault_test.c b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
new file mode 100644
index 000000000000..5972905275cf
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/page_fault_test.c
@@ -0,0 +1,1136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * page_fault_test.c - Test stage 2 faults.
+ *
+ * This test tries different combinations of guest accesses (e.g., write,
+ * S1PTW), backing source type (e.g., anon) and types of faults (e.g., read on
+ * hugetlbfs with a hole). It checks that the expected handling method is
+ * called (e.g., uffd faults with the right address and write/read flag).
+ */
+#define _GNU_SOURCE
+#include <linux/bitmap.h>
+#include <fcntl.h>
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+#include <asm/sysreg.h>
+#include <linux/bitfield.h>
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
+
+/* Guest virtual addresses that point to the test page and its PTE. */
+#define TEST_GVA 0xc0000000
+#define TEST_EXEC_GVA (TEST_GVA + 0x8)
+#define TEST_PTE_GVA 0xb0000000
+#define TEST_DATA 0x0123456789ABCDEF
+
+static uint64_t *guest_test_memory = (uint64_t *)TEST_GVA;
+
+#define CMD_NONE (0)
+#define CMD_SKIP_TEST (1ULL << 1)
+#define CMD_HOLE_PT (1ULL << 2)
+#define CMD_HOLE_DATA (1ULL << 3)
+#define CMD_CHECK_WRITE_IN_DIRTY_LOG (1ULL << 4)
+#define CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG (1ULL << 5)
+#define CMD_CHECK_NO_WRITE_IN_DIRTY_LOG (1ULL << 6)
+#define CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG (1ULL << 7)
+#define CMD_SET_PTE_AF (1ULL << 8)
+
+#define PREPARE_FN_NR 10
+#define CHECK_FN_NR 10
+
+static struct event_cnt {
+ int mmio_exits;
+ int fail_vcpu_runs;
+ int uffd_faults;
+ /* uffd_faults is incremented from multiple threads. */
+ pthread_mutex_t uffd_faults_mutex;
+} events;
+
+struct test_desc {
+ const char *name;
+ uint64_t mem_mark_cmd;
+ /* Skip the test if any prepare function returns false */
+ bool (*guest_prepare[PREPARE_FN_NR])(void);
+ void (*guest_test)(void);
+ void (*guest_test_check[CHECK_FN_NR])(void);
+ uffd_handler_t uffd_pt_handler;
+ uffd_handler_t uffd_data_handler;
+ void (*dabt_handler)(struct ex_regs *regs);
+ void (*iabt_handler)(struct ex_regs *regs);
+ void (*mmio_handler)(struct kvm_vm *vm, struct kvm_run *run);
+ void (*fail_vcpu_run_handler)(int ret);
+ uint32_t pt_memslot_flags;
+ uint32_t data_memslot_flags;
+ bool skip;
+ struct event_cnt expected_events;
+};
+
+struct test_params {
+ enum vm_mem_backing_src_type src_type;
+ struct test_desc *test_desc;
+};
+
+static inline void flush_tlb_page(uint64_t vaddr)
+{
+ uint64_t page = vaddr >> 12;
+
+ dsb(ishst);
+ asm volatile("tlbi vaae1is, %0" :: "r" (page));
+ dsb(ish);
+ isb();
+}
+
+static void guest_write64(void)
+{
+ uint64_t val;
+
+ WRITE_ONCE(*guest_test_memory, TEST_DATA);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+/* Check the system for atomic instructions. */
+static bool guest_check_lse(void)
+{
+ uint64_t isar0 = read_sysreg(id_aa64isar0_el1);
+ uint64_t atomic;
+
+ atomic = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64ISAR0_EL1_ATOMIC), isar0);
+ return atomic >= 2;
+}
+
+static bool guest_check_dc_zva(void)
+{
+ uint64_t dczid = read_sysreg(dczid_el0);
+ uint64_t dzp = FIELD_GET(ARM64_FEATURE_MASK(DCZID_EL0_DZP), dczid);
+
+ return dzp == 0;
+}
+
+/* Compare and swap instruction. */
+static void guest_cas(void)
+{
+ uint64_t val;
+
+ GUEST_ASSERT(guest_check_lse());
+ asm volatile(".arch_extension lse\n"
+ "casal %0, %1, [%2]\n"
+ :: "r" (0ul), "r" (TEST_DATA), "r" (guest_test_memory));
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, TEST_DATA);
+}
+
+static void guest_read64(void)
+{
+ uint64_t val;
+
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/* Address translation instruction */
+static void guest_at(void)
+{
+ uint64_t par;
+
+ asm volatile("at s1e1r, %0" :: "r" (guest_test_memory));
+ isb();
+ par = read_sysreg(par_el1);
+
+ /* Bit 1 indicates whether the AT was successful */
+ GUEST_ASSERT_EQ(par & 1, 0);
+}
+
+/*
+ * The size of the block written by "dc zva" is guaranteed to be between (2 <<
+ * 0) and (2 << 9), which is safe in our case as we need the write to happen
+ * for at least a word, and not more than a page.
+ */
+static void guest_dc_zva(void)
+{
+ uint16_t val;
+
+ asm volatile("dc zva, %0" :: "r" (guest_test_memory));
+ dsb(ish);
+ val = READ_ONCE(*guest_test_memory);
+ GUEST_ASSERT_EQ(val, 0);
+}
+
+/*
+ * Pre-indexing loads and stores don't have a valid syndrome (ESR_EL2.ISV==0).
+ * And that's special because KVM must take special care with those: they
+ * should still count as accesses for dirty logging or user-faulting, but
+ * should be handled differently on mmio.
+ */
+static void guest_ld_preidx(void)
+{
+ uint64_t val;
+ uint64_t addr = TEST_GVA - 8;
+
+ /*
+ * This ends up accessing "TEST_GVA + 8 - 8", where "TEST_GVA - 8" is
+ * in a gap between memslots not backing by anything.
+ */
+ asm volatile("ldr %0, [%1, #8]!"
+ : "=r" (val), "+r" (addr));
+ GUEST_ASSERT_EQ(val, 0);
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+}
+
+static void guest_st_preidx(void)
+{
+ uint64_t val = TEST_DATA;
+ uint64_t addr = TEST_GVA - 8;
+
+ asm volatile("str %0, [%1, #8]!"
+ : "+r" (val), "+r" (addr));
+
+ GUEST_ASSERT_EQ(addr, TEST_GVA);
+ val = READ_ONCE(*guest_test_memory);
+}
+
+static bool guest_set_ha(void)
+{
+ uint64_t mmfr1 = read_sysreg(id_aa64mmfr1_el1);
+ uint64_t hadbs, tcr;
+
+ /* Skip if HA is not supported. */
+ hadbs = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR1_EL1_HAFDBS), mmfr1);
+ if (hadbs == 0)
+ return false;
+
+ tcr = read_sysreg(tcr_el1) | TCR_EL1_HA;
+ write_sysreg(tcr, tcr_el1);
+ isb();
+
+ return true;
+}
+
+static bool guest_clear_pte_af(void)
+{
+ *((uint64_t *)TEST_PTE_GVA) &= ~PTE_AF;
+ flush_tlb_page(TEST_GVA);
+
+ return true;
+}
+
+static void guest_check_pte_af(void)
+{
+ dsb(ish);
+ GUEST_ASSERT_EQ(*((uint64_t *)TEST_PTE_GVA) & PTE_AF, PTE_AF);
+}
+
+static void guest_check_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_write_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_WRITE_IN_DIRTY_LOG);
+}
+
+static void guest_check_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_check_no_s1ptw_wr_in_dirty_log(void)
+{
+ GUEST_SYNC(CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG);
+}
+
+static void guest_exec(void)
+{
+ int (*code)(void) = (int (*)(void))TEST_EXEC_GVA;
+ int ret;
+
+ ret = code();
+ GUEST_ASSERT_EQ(ret, 0x77);
+}
+
+static bool guest_prepare(struct test_desc *test)
+{
+ bool (*prepare_fn)(void);
+ int i;
+
+ for (i = 0; i < PREPARE_FN_NR; i++) {
+ prepare_fn = test->guest_prepare[i];
+ if (prepare_fn && !prepare_fn())
+ return false;
+ }
+
+ return true;
+}
+
+static void guest_test_check(struct test_desc *test)
+{
+ void (*check_fn)(void);
+ int i;
+
+ for (i = 0; i < CHECK_FN_NR; i++) {
+ check_fn = test->guest_test_check[i];
+ if (check_fn)
+ check_fn();
+ }
+}
+
+static void guest_code(struct test_desc *test)
+{
+ if (!guest_prepare(test))
+ GUEST_SYNC(CMD_SKIP_TEST);
+
+ GUEST_SYNC(test->mem_mark_cmd);
+
+ if (test->guest_test)
+ test->guest_test();
+
+ guest_test_check(test);
+ GUEST_DONE();
+}
+
+static void no_dabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected dabt, far_el1 = 0x%lx", read_sysreg(far_el1));
+}
+
+static void no_iabt_handler(struct ex_regs *regs)
+{
+ GUEST_FAIL("Unexpected iabt, pc = 0x%lx", regs->pc);
+}
+
+static struct uffd_args {
+ char *copy;
+ void *hva;
+ uint64_t paging_size;
+} pt_args, data_args;
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static int uffd_generic_handler(int uffd_mode, int uffd, struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uint64_t addr = msg->arg.pagefault.address;
+ uint64_t flags = msg->arg.pagefault.flags;
+ struct uffdio_copy copy;
+ int ret;
+
+ TEST_ASSERT(uffd_mode == UFFDIO_REGISTER_MODE_MISSING,
+ "The only expected UFFD mode is MISSING");
+ TEST_ASSERT_EQ(addr, (uint64_t)args->hva);
+
+ pr_debug("uffd fault: addr=%p write=%d\n",
+ (void *)addr, !!(flags & UFFD_PAGEFAULT_FLAG_WRITE));
+
+ copy.src = (uint64_t)args->copy;
+ copy.dst = addr;
+ copy.len = args->paging_size;
+ copy.mode = 0;
+
+ ret = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (ret == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx with errno: %d\n",
+ addr, errno);
+ return ret;
+ }
+
+ pthread_mutex_lock(&events.uffd_faults_mutex);
+ events.uffd_faults += 1;
+ pthread_mutex_unlock(&events.uffd_faults_mutex);
+ return 0;
+}
+
+static int uffd_pt_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &pt_args);
+}
+
+static int uffd_data_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ return uffd_generic_handler(mode, uffd, msg, &data_args);
+}
+
+static void setup_uffd_args(struct userspace_mem_region *region,
+ struct uffd_args *args)
+{
+ args->hva = (void *)region->region.userspace_addr;
+ args->paging_size = region->region.memory_size;
+
+ args->copy = malloc(args->paging_size);
+ TEST_ASSERT(args->copy, "Failed to allocate data copy.");
+ memcpy(args->copy, args->hva, args->paging_size);
+}
+
+static void setup_uffd(struct kvm_vm *vm, struct test_params *p,
+ struct uffd_desc **pt_uffd, struct uffd_desc **data_uffd)
+{
+ struct test_desc *test = p->test_desc;
+ int uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_PT), &pt_args);
+ setup_uffd_args(vm_get_mem_region(vm, MEM_REGION_TEST_DATA), &data_args);
+
+ *pt_uffd = NULL;
+ if (test->uffd_pt_handler)
+ *pt_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ pt_args.hva,
+ pt_args.paging_size,
+ test->uffd_pt_handler);
+
+ *data_uffd = NULL;
+ if (test->uffd_data_handler)
+ *data_uffd = uffd_setup_demand_paging(uffd_mode, 0,
+ data_args.hva,
+ data_args.paging_size,
+ test->uffd_data_handler);
+}
+
+static void free_uffd(struct test_desc *test, struct uffd_desc *pt_uffd,
+ struct uffd_desc *data_uffd)
+{
+ if (test->uffd_pt_handler)
+ uffd_stop_demand_paging(pt_uffd);
+ if (test->uffd_data_handler)
+ uffd_stop_demand_paging(data_uffd);
+
+ free(pt_args.copy);
+ free(data_args.copy);
+}
+
+static int uffd_no_handler(int mode, int uffd, struct uffd_msg *msg)
+{
+ TEST_FAIL("There was no UFFD fault expected.");
+ return -1;
+}
+
+/* Returns false if the test should be skipped. */
+static bool punch_hole_in_backing_store(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ void *hva = (void *)region->region.userspace_addr;
+ uint64_t paging_size = region->region.memory_size;
+ int ret, fd = region->fd;
+
+ if (fd != -1) {
+ ret = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, paging_size);
+ TEST_ASSERT(ret == 0, "fallocate failed");
+ } else {
+ ret = madvise(hva, paging_size, MADV_DONTNEED);
+ TEST_ASSERT(ret == 0, "madvise failed");
+ }
+
+ return true;
+}
+
+static void mmio_on_test_gpa_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ TEST_ASSERT_EQ(run->mmio.phys_addr, region->region.guest_phys_addr);
+
+ memcpy(hva, run->mmio.data, run->mmio.len);
+ events.mmio_exits += 1;
+}
+
+static void mmio_no_handler(struct kvm_vm *vm, struct kvm_run *run)
+{
+ uint64_t data;
+
+ memcpy(&data, run->mmio.data, sizeof(data));
+ pr_debug("addr=%lld len=%d w=%d data=%lx\n",
+ run->mmio.phys_addr, run->mmio.len,
+ run->mmio.is_write, data);
+ TEST_FAIL("There was no MMIO exit expected.");
+}
+
+static bool check_write_in_dirty_log(struct kvm_vm *vm,
+ struct userspace_mem_region *region,
+ uint64_t host_pg_nr)
+{
+ unsigned long *bmap;
+ bool first_page_dirty;
+ uint64_t size = region->region.memory_size;
+
+ /* getpage_size() is not always equal to vm->page_size */
+ bmap = bitmap_zalloc(size / getpagesize());
+ kvm_vm_get_dirty_log(vm, region->region.slot, bmap);
+ first_page_dirty = test_bit(host_pg_nr, bmap);
+ free(bmap);
+ return first_page_dirty;
+}
+
+/* Returns true to continue the test, and false if it should be skipped. */
+static bool handle_cmd(struct kvm_vm *vm, int cmd)
+{
+ struct userspace_mem_region *data_region, *pt_region;
+ bool continue_test = true;
+ uint64_t pte_gpa, pte_pg;
+
+ data_region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ pt_region = vm_get_mem_region(vm, MEM_REGION_PT);
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ pte_pg = (pte_gpa - pt_region->region.guest_phys_addr) / getpagesize();
+
+ if (cmd == CMD_SKIP_TEST)
+ continue_test = false;
+
+ if (cmd & CMD_HOLE_PT)
+ continue_test = punch_hole_in_backing_store(vm, pt_region);
+ if (cmd & CMD_HOLE_DATA)
+ continue_test = punch_hole_in_backing_store(vm, data_region);
+ if (cmd & CMD_CHECK_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, data_region, 0),
+ "Missing write in dirty log");
+ if (cmd & CMD_CHECK_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Missing s1ptw write in dirty log");
+ if (cmd & CMD_CHECK_NO_WRITE_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, data_region, 0),
+ "Unexpected write in dirty log");
+ if (cmd & CMD_CHECK_NO_S1PTW_WR_IN_DIRTY_LOG)
+ TEST_ASSERT(!check_write_in_dirty_log(vm, pt_region, pte_pg),
+ "Unexpected s1ptw write in dirty log");
+
+ return continue_test;
+}
+
+void fail_vcpu_run_no_handler(int ret)
+{
+ TEST_FAIL("Unexpected vcpu run failure");
+}
+
+void fail_vcpu_run_mmio_no_syndrome_handler(int ret)
+{
+ TEST_ASSERT(errno == ENOSYS,
+ "The mmio handler should have returned not implemented.");
+ events.fail_vcpu_runs += 1;
+}
+
+typedef uint32_t aarch64_insn_t;
+extern aarch64_insn_t __exec_test[2];
+
+noinline void __return_0x77(void)
+{
+ asm volatile("__exec_test: mov x0, #0x77\n"
+ "ret\n");
+}
+
+/*
+ * Note that this function runs on the host before the test VM starts: there's
+ * no need to sync the D$ and I$ caches.
+ */
+static void load_exec_code_for_test(struct kvm_vm *vm)
+{
+ uint64_t *code;
+ struct userspace_mem_region *region;
+ void *hva;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ hva = (void *)region->region.userspace_addr;
+
+ assert(TEST_EXEC_GVA > TEST_GVA);
+ code = hva + TEST_EXEC_GVA - TEST_GVA;
+ memcpy(code, __exec_test, sizeof(__exec_test));
+}
+
+static void setup_abort_handlers(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_DABT, no_dabt_handler);
+ vm_install_sync_handler(vm, VECTOR_SYNC_CURRENT,
+ ESR_EC_IABT, no_iabt_handler);
+}
+
+static void setup_gva_maps(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region;
+ uint64_t pte_gpa;
+
+ region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+ /* Map TEST_GVA first. This will install a new PTE. */
+ virt_pg_map(vm, TEST_GVA, region->region.guest_phys_addr);
+ /* Then map TEST_PTE_GVA to the above PTE. */
+ pte_gpa = addr_hva2gpa(vm, virt_get_pte_hva(vm, TEST_GVA));
+ virt_pg_map(vm, TEST_PTE_GVA, pte_gpa);
+}
+
+enum pf_test_memslots {
+ CODE_AND_DATA_MEMSLOT,
+ PAGE_TABLE_MEMSLOT,
+ TEST_DATA_MEMSLOT,
+};
+
+/*
+ * Create a memslot for code and data at pfn=0, and test-data and PT ones
+ * at max_gfn.
+ */
+static void setup_memslots(struct kvm_vm *vm, struct test_params *p)
+{
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(p->src_type);
+ uint64_t guest_page_size = vm->page_size;
+ uint64_t max_gfn = vm_compute_max_gfn(vm);
+ /* Enough for 2M of code when using 4K guest pages. */
+ uint64_t code_npages = 512;
+ uint64_t pt_size, data_size, data_gpa;
+
+ /*
+ * This test requires 1 pgd, 2 pud, 4 pmd, and 6 pte pages when using
+ * VM_MODE_P48V48_4K. Note that the .text takes ~1.6MBs. That's 13
+ * pages. VM_MODE_P48V48_4K is the mode with most PT pages; let's use
+ * twice that just in case.
+ */
+ pt_size = 26 * guest_page_size;
+
+ /* memslot sizes and gpa's must be aligned to the backing page size */
+ pt_size = align_up(pt_size, backing_src_pagesz);
+ data_size = align_up(guest_page_size, backing_src_pagesz);
+ data_gpa = (max_gfn * guest_page_size) - data_size;
+ data_gpa = align_down(data_gpa, backing_src_pagesz);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0,
+ CODE_AND_DATA_MEMSLOT, code_npages, 0);
+ vm->memslots[MEM_REGION_CODE] = CODE_AND_DATA_MEMSLOT;
+ vm->memslots[MEM_REGION_DATA] = CODE_AND_DATA_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa - pt_size,
+ PAGE_TABLE_MEMSLOT, pt_size / guest_page_size,
+ p->test_desc->pt_memslot_flags);
+ vm->memslots[MEM_REGION_PT] = PAGE_TABLE_MEMSLOT;
+
+ vm_userspace_mem_region_add(vm, p->src_type, data_gpa, TEST_DATA_MEMSLOT,
+ data_size / guest_page_size,
+ p->test_desc->data_memslot_flags);
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void setup_ucall(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *region = vm_get_mem_region(vm, MEM_REGION_TEST_DATA);
+
+ ucall_init(vm, region->region.guest_phys_addr + region->region.memory_size);
+}
+
+static void setup_default_handlers(struct test_desc *test)
+{
+ if (!test->mmio_handler)
+ test->mmio_handler = mmio_no_handler;
+
+ if (!test->fail_vcpu_run_handler)
+ test->fail_vcpu_run_handler = fail_vcpu_run_no_handler;
+}
+
+static void check_event_counts(struct test_desc *test)
+{
+ TEST_ASSERT_EQ(test->expected_events.uffd_faults, events.uffd_faults);
+ TEST_ASSERT_EQ(test->expected_events.mmio_exits, events.mmio_exits);
+ TEST_ASSERT_EQ(test->expected_events.fail_vcpu_runs, events.fail_vcpu_runs);
+}
+
+static void print_test_banner(enum vm_guest_mode mode, struct test_params *p)
+{
+ struct test_desc *test = p->test_desc;
+
+ pr_debug("Test: %s\n", test->name);
+ pr_debug("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_debug("Testing memory backing src type: %s\n",
+ vm_mem_backing_src_alias(p->src_type)->name);
+}
+
+static void reset_event_counts(void)
+{
+ memset(&events, 0, sizeof(events));
+}
+
+/*
+ * This function either succeeds, skips the test (after setting test->skip), or
+ * fails with a TEST_FAIL that aborts all tests.
+ */
+static void vcpu_run_loop(struct kvm_vm *vm, struct kvm_vcpu *vcpu,
+ struct test_desc *test)
+{
+ struct kvm_run *run;
+ struct ucall uc;
+ int ret;
+
+ run = vcpu->run;
+
+ for (;;) {
+ ret = _vcpu_run(vcpu);
+ if (ret) {
+ test->fail_vcpu_run_handler(ret);
+ goto done;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!handle_cmd(vm, uc.args[1])) {
+ test->skip = true;
+ goto done;
+ }
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ test->mmio_handler(vm, run);
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ pr_debug(test->skip ? "Skipped.\n" : "Done.\n");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = (struct test_params *)arg;
+ struct test_desc *test = p->test_desc;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ struct uffd_desc *pt_uffd, *data_uffd;
+
+ print_test_banner(mode, p);
+
+ vm = ____vm_create(VM_SHAPE(mode));
+ setup_memslots(vm, p);
+ kvm_vm_elf_load(vm, program_invocation_name);
+ setup_ucall(vm);
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ setup_gva_maps(vm);
+
+ reset_event_counts();
+
+ /*
+ * Set some code in the data memslot for the guest to execute (only
+ * applicable to the EXEC tests). This has to be done before
+ * setup_uffd() as that function copies the memslot data for the uffd
+ * handler.
+ */
+ load_exec_code_for_test(vm);
+ setup_uffd(vm, p, &pt_uffd, &data_uffd);
+ setup_abort_handlers(vm, vcpu, test);
+ setup_default_handlers(test);
+ vcpu_args_set(vcpu, 1, test);
+
+ vcpu_run_loop(vm, vcpu, test);
+
+ kvm_vm_free(vm);
+ free_uffd(test, pt_uffd, data_uffd);
+
+ /*
+ * Make sure we check the events after the uffd threads have exited,
+ * which means they updated their respective event counters.
+ */
+ if (!test->skip)
+ check_event_counts(test);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-s mem-type]\n", name);
+ puts("");
+ guest_modes_help();
+ backing_src_help("-s");
+ puts("");
+}
+
+#define SNAME(s) #s
+#define SCAT2(a, b) SNAME(a ## _ ## b)
+#define SCAT3(a, b, c) SCAT2(a, SCAT2(b, c))
+#define SCAT4(a, b, c, d) SCAT2(a, SCAT3(b, c, d))
+
+#define _CHECK(_test) _CHECK_##_test
+#define _PREPARE(_test) _PREPARE_##_test
+#define _PREPARE_guest_read64 NULL
+#define _PREPARE_guest_ld_preidx NULL
+#define _PREPARE_guest_write64 NULL
+#define _PREPARE_guest_st_preidx NULL
+#define _PREPARE_guest_exec NULL
+#define _PREPARE_guest_at NULL
+#define _PREPARE_guest_dc_zva guest_check_dc_zva
+#define _PREPARE_guest_cas guest_check_lse
+
+/* With or without access flag checks */
+#define _PREPARE_with_af guest_set_ha, guest_clear_pte_af
+#define _PREPARE_no_af NULL
+#define _CHECK_with_af guest_check_pte_af
+#define _CHECK_no_af NULL
+
+/* Performs an access and checks that no faults were triggered. */
+#define TEST_ACCESS(_access, _with_af, _mark_cmd) \
+{ \
+ .name = SCAT3(_access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD(_access, _with_af, _mark_cmd, \
+ _uffd_data_handler, _uffd_pt_handler, _uffd_faults) \
+{ \
+ .name = SCAT4(uffd, _access, _with_af, #_mark_cmd), \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = _mark_cmd, \
+ .guest_test_check = { _CHECK(_with_af) }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = _uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_DIRTY_LOG(_access, _with_af, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .expected_events = { 0 }, \
+}
+
+#define TEST_UFFD_AND_DIRTY_LOG(_access, _with_af, _uffd_data_handler, \
+ _uffd_faults, _test_check, _pt_check) \
+{ \
+ .name = SCAT3(uffd_and_dirty_log, _access, _with_af), \
+ .data_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_with_af), \
+ _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_test_check = { _CHECK(_with_af), _test_check, _pt_check }, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .expected_events = { .uffd_faults = _uffd_faults, }, \
+}
+
+#define TEST_RO_MEMSLOT(_access, _mmio_handler, _mmio_exits) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME(_access) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_DIRTY_LOG(_access, _mmio_handler, _mmio_exits, \
+ _test_check) \
+{ \
+ .name = SCAT2(ro_memslot, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits}, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(_access, _test_check) \
+{ \
+ .name = SCAT2(ro_memslot_no_syn_and_dlog, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .pt_memslot_flags = KVM_MEM_READONLY | KVM_MEM_LOG_DIRTY_PAGES, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .guest_test_check = { _test_check }, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1 }, \
+}
+
+#define TEST_RO_MEMSLOT_AND_UFFD(_access, _mmio_handler, _mmio_exits, \
+ _uffd_data_handler, _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_uffd, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .mmio_handler = _mmio_handler, \
+ .expected_events = { .mmio_exits = _mmio_exits, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+#define TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(_access, _uffd_data_handler, \
+ _uffd_faults) \
+{ \
+ .name = SCAT2(ro_memslot_no_syndrome, _access), \
+ .data_memslot_flags = KVM_MEM_READONLY, \
+ .pt_memslot_flags = KVM_MEM_READONLY, \
+ .mem_mark_cmd = CMD_HOLE_DATA | CMD_HOLE_PT, \
+ .guest_prepare = { _PREPARE(_access) }, \
+ .guest_test = _access, \
+ .uffd_data_handler = _uffd_data_handler, \
+ .uffd_pt_handler = uffd_pt_handler, \
+ .fail_vcpu_run_handler = fail_vcpu_run_mmio_no_syndrome_handler, \
+ .expected_events = { .fail_vcpu_runs = 1, \
+ .uffd_faults = _uffd_faults }, \
+}
+
+static struct test_desc tests[] = {
+
+ /* Check that HW is setting the Access Flag (AF) (sanity checks). */
+ TEST_ACCESS(guest_read64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_ld_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_cas, with_af, CMD_NONE),
+ TEST_ACCESS(guest_write64, with_af, CMD_NONE),
+ TEST_ACCESS(guest_st_preidx, with_af, CMD_NONE),
+ TEST_ACCESS(guest_dc_zva, with_af, CMD_NONE),
+ TEST_ACCESS(guest_exec, with_af, CMD_NONE),
+
+ /*
+ * Punch a hole in the data backing store, and then try multiple
+ * accesses: reads should rturn zeroes, and writes should
+ * re-populate the page. Moreover, the test also check that no
+ * exception was generated in the guest. Note that this
+ * reading/writing behavior is the same as reading/writing a
+ * punched page (with fallocate(FALLOC_FL_PUNCH_HOLE)) from
+ * userspace.
+ */
+ TEST_ACCESS(guest_read64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_cas, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_ld_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_write64, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_st_preidx, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_at, no_af, CMD_HOLE_DATA),
+ TEST_ACCESS(guest_dc_zva, no_af, CMD_HOLE_DATA),
+
+ /*
+ * Punch holes in the data and PT backing stores and mark them for
+ * userfaultfd handling. This should result in 2 faults: the access
+ * on the data backing store, and its respective S1 page table walk
+ * (S1PTW).
+ */
+ TEST_UFFD(guest_read64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_read64, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_cas, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ /*
+ * Can't test guest_at with_af as it's IMPDEF whether the AF is set.
+ * The S1PTW fault should still be marked as a write.
+ */
+ TEST_UFFD(guest_at, no_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_no_handler, uffd_pt_handler, 1),
+ TEST_UFFD(guest_ld_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_write64, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_dc_zva, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_st_preidx, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+ TEST_UFFD(guest_exec, with_af, CMD_HOLE_DATA | CMD_HOLE_PT,
+ uffd_data_handler, uffd_pt_handler, 2),
+
+ /*
+ * Try accesses when the data and PT memory regions are both
+ * tracked for dirty logging.
+ */
+ TEST_DIRTY_LOG(guest_read64, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_read64, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_ld_preidx, with_af,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_at, no_af, guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_exec, with_af, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_write64, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_cas, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_dc_zva, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_DIRTY_LOG(guest_st_preidx, with_af, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+
+ /*
+ * Access when the data and PT memory regions are both marked for
+ * dirty logging and UFFD at the same time. The expected result is
+ * that writes should mark the dirty log and trigger a userfaultfd
+ * write fault. Reads/execs should result in a read userfaultfd
+ * fault, and nothing in the dirty log. Any S1PTW should result in
+ * a write in the dirty log and a userfaultfd write.
+ */
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_read64, no_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_no_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_ld_preidx, with_af,
+ uffd_data_handler,
+ 2, guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_at, with_af, uffd_no_handler, 1,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_exec, with_af,
+ uffd_data_handler, 2,
+ guest_check_no_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_write64, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_cas, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_dc_zva, with_af,
+ uffd_data_handler,
+ 2, guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ TEST_UFFD_AND_DIRTY_LOG(guest_st_preidx, with_af,
+ uffd_data_handler, 2,
+ guest_check_write_in_dirty_log,
+ guest_check_s1ptw_wr_in_dirty_log),
+ /*
+ * Access when both the PT and data regions are marked read-only
+ * (with KVM_MEM_READONLY). Writes with a syndrome result in an
+ * MMIO exit, writes with no syndrome (e.g., CAS) result in a
+ * failed vcpu run, and reads/execs with and without syndroms do
+ * not fault.
+ */
+ TEST_RO_MEMSLOT(guest_read64, 0, 0),
+ TEST_RO_MEMSLOT(guest_ld_preidx, 0, 0),
+ TEST_RO_MEMSLOT(guest_at, 0, 0),
+ TEST_RO_MEMSLOT(guest_exec, 0, 0),
+ TEST_RO_MEMSLOT(guest_write64, mmio_on_test_gpa_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_dc_zva),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_cas),
+ TEST_RO_MEMSLOT_NO_SYNDROME(guest_st_preidx),
+
+ /*
+ * The PT and data regions are both read-only and marked
+ * for dirty logging at the same time. The expected result is that
+ * for writes there should be no write in the dirty log. The
+ * readonly handling is the same as if the memslot was not marked
+ * for dirty logging: writes with a syndrome result in an MMIO
+ * exit, and writes with no syndrome result in a failed vcpu run.
+ */
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_read64, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_ld_preidx, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_at, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_exec, 0, 0,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_AND_DIRTY_LOG(guest_write64, mmio_on_test_gpa_handler,
+ 1, guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_dc_zva,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_cas,
+ guest_check_no_write_in_dirty_log),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_DIRTY_LOG(guest_st_preidx,
+ guest_check_no_write_in_dirty_log),
+
+ /*
+ * The PT and data regions are both read-only and punched with
+ * holes tracked with userfaultfd. The expected result is the
+ * union of both userfaultfd and read-only behaviors. For example,
+ * write accesses result in a userfaultfd write fault and an MMIO
+ * exit. Writes with no syndrome result in a failed vcpu run and
+ * no userfaultfd write fault. Reads result in userfaultfd getting
+ * triggered.
+ */
+ TEST_RO_MEMSLOT_AND_UFFD(guest_read64, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_ld_preidx, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_at, 0, 0, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_exec, 0, 0, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_AND_UFFD(guest_write64, mmio_on_test_gpa_handler, 1,
+ uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_cas, uffd_data_handler, 2),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_dc_zva, uffd_no_handler, 1),
+ TEST_RO_MEMSLOT_NO_SYNDROME_AND_UFFD(guest_st_preidx, uffd_no_handler, 1),
+
+ { 0 }
+};
+
+static void for_each_test_and_guest_mode(enum vm_mem_backing_src_type src_type)
+{
+ struct test_desc *t;
+
+ for (t = &tests[0]; t->name; t++) {
+ if (t->skip)
+ continue;
+
+ struct test_params p = {
+ .src_type = src_type,
+ .test_desc = t,
+ };
+
+ for_each_guest_mode(run_test, &p);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type;
+ int opt;
+
+ src_type = DEFAULT_VM_MEM_SRC;
+
+ while ((opt = getopt(argc, argv, "hm:s:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(0);
+ }
+ }
+
+ for_each_test_and_guest_mode(src_type);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/psci_test.c b/tools/testing/selftests/kvm/aarch64/psci_test.c
new file mode 100644
index 000000000000..9b004905d1d3
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/psci_test.c
@@ -0,0 +1,198 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * psci_test - Tests relating to KVM's PSCI implementation.
+ *
+ * Copyright (c) 2021 Google LLC.
+ *
+ * This test includes:
+ * - A regression test for a race between KVM servicing the PSCI CPU_ON call
+ * and userspace reading the targeted vCPU's registers.
+ * - A test for KVM's handling of PSCI SYSTEM_SUSPEND and the associated
+ * KVM_SYSTEM_EVENT_SUSPEND UAPI.
+ */
+
+#define _GNU_SOURCE
+
+#include <linux/psci.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define CPU_ON_ENTRY_ADDR 0xfeedf00dul
+#define CPU_ON_CONTEXT_ID 0xdeadc0deul
+
+static uint64_t psci_cpu_on(uint64_t target_cpu, uint64_t entry_addr,
+ uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_0_2_FN64_CPU_ON, target_cpu, entry_addr, context_id,
+ 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_affinity_info(uint64_t target_affinity,
+ uint64_t lowest_affinity_level)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_0_2_FN64_AFFINITY_INFO, target_affinity, lowest_affinity_level,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_system_suspend(uint64_t entry_addr, uint64_t context_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_1_0_FN64_SYSTEM_SUSPEND, entry_addr, context_id,
+ 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static uint64_t psci_features(uint32_t func_id)
+{
+ struct arm_smccc_res res;
+
+ smccc_hvc(PSCI_1_0_FN_PSCI_FEATURES, func_id, 0, 0, 0, 0, 0, 0, &res);
+
+ return res.a0;
+}
+
+static void vcpu_power_off(struct kvm_vcpu *vcpu)
+{
+ struct kvm_mp_state mp_state = {
+ .mp_state = KVM_MP_STATE_STOPPED,
+ };
+
+ vcpu_mp_state_set(vcpu, &mp_state);
+}
+
+static struct kvm_vm *setup_vm(void *guest_code, struct kvm_vcpu **source,
+ struct kvm_vcpu **target)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(2);
+
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *source = aarch64_vcpu_add(vm, 0, &init, guest_code);
+ *target = aarch64_vcpu_add(vm, 1, &init, guest_code);
+
+ return vm;
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT)
+ REPORT_GUEST_ASSERT(uc);
+}
+
+static void assert_vcpu_reset(struct kvm_vcpu *vcpu)
+{
+ uint64_t obs_pc, obs_x0;
+
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &obs_pc);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.regs[0]), &obs_x0);
+
+ TEST_ASSERT(obs_pc == CPU_ON_ENTRY_ADDR,
+ "unexpected target cpu pc: %lx (expected: %lx)",
+ obs_pc, CPU_ON_ENTRY_ADDR);
+ TEST_ASSERT(obs_x0 == CPU_ON_CONTEXT_ID,
+ "unexpected target context id: %lx (expected: %lx)",
+ obs_x0, CPU_ON_CONTEXT_ID);
+}
+
+static void guest_test_cpu_on(uint64_t target_cpu)
+{
+ uint64_t target_state;
+
+ GUEST_ASSERT(!psci_cpu_on(target_cpu, CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID));
+
+ do {
+ target_state = psci_affinity_info(target_cpu, 0);
+
+ GUEST_ASSERT((target_state == PSCI_0_2_AFFINITY_LEVEL_ON) ||
+ (target_state == PSCI_0_2_AFFINITY_LEVEL_OFF));
+ } while (target_state != PSCI_0_2_AFFINITY_LEVEL_ON);
+
+ GUEST_DONE();
+}
+
+static void host_test_cpu_on(void)
+{
+ struct kvm_vcpu *source, *target;
+ uint64_t target_mpidr;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = setup_vm(guest_test_cpu_on, &source, &target);
+
+ /*
+ * make sure the target is already off when executing the test.
+ */
+ vcpu_power_off(target);
+
+ vcpu_get_reg(target, KVM_ARM64_SYS_REG(SYS_MPIDR_EL1), &target_mpidr);
+ vcpu_args_set(source, 1, target_mpidr & MPIDR_HWID_BITMASK);
+ enter_guest(source);
+
+ if (get_ucall(source, &uc) != UCALL_DONE)
+ TEST_FAIL("Unhandled ucall: %lu", uc.cmd);
+
+ assert_vcpu_reset(target);
+ kvm_vm_free(vm);
+}
+
+static void guest_test_system_suspend(void)
+{
+ uint64_t ret;
+
+ /* assert that SYSTEM_SUSPEND is discoverable */
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN_SYSTEM_SUSPEND));
+ GUEST_ASSERT(!psci_features(PSCI_1_0_FN64_SYSTEM_SUSPEND));
+
+ ret = psci_system_suspend(CPU_ON_ENTRY_ADDR, CPU_ON_CONTEXT_ID);
+ GUEST_SYNC(ret);
+}
+
+static void host_test_system_suspend(void)
+{
+ struct kvm_vcpu *source, *target;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+
+ vm = setup_vm(guest_test_system_suspend, &source, &target);
+ vm_enable_cap(vm, KVM_CAP_ARM_SYSTEM_SUSPEND, 0);
+
+ vcpu_power_off(target);
+ run = source->run;
+
+ enter_guest(source);
+
+ TEST_ASSERT_KVM_EXIT_REASON(source, KVM_EXIT_SYSTEM_EVENT);
+ TEST_ASSERT(run->system_event.type == KVM_SYSTEM_EVENT_SUSPEND,
+ "Unhandled system event: %u (expected: %u)",
+ run->system_event.type, KVM_SYSTEM_EVENT_SUSPEND);
+
+ kvm_vm_free(vm);
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SYSTEM_SUSPEND));
+
+ host_test_cpu_on();
+ host_test_system_suspend();
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
new file mode 100644
index 000000000000..16e2338686c1
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * set_id_regs - Test for setting ID register from usersapce.
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ *
+ * Test that KVM supports setting ID registers from userspace and handles the
+ * feature set correctly.
+ */
+
+#include <stdint.h>
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+#include <linux/bitfield.h>
+
+enum ftr_type {
+ FTR_EXACT, /* Use a predefined safe value */
+ FTR_LOWER_SAFE, /* Smaller value is safe */
+ FTR_HIGHER_SAFE, /* Bigger value is safe */
+ FTR_HIGHER_OR_ZERO_SAFE, /* Bigger value is safe, but 0 is biggest */
+ FTR_END, /* Mark the last ftr bits */
+};
+
+#define FTR_SIGNED true /* Value should be treated as signed */
+#define FTR_UNSIGNED false /* Value should be treated as unsigned */
+
+struct reg_ftr_bits {
+ char *name;
+ bool sign;
+ enum ftr_type type;
+ uint8_t shift;
+ uint64_t mask;
+ /*
+ * For FTR_EXACT, safe_val is used as the exact safe value.
+ * For FTR_LOWER_SAFE, safe_val is used as the minimal safe value.
+ */
+ int64_t safe_val;
+};
+
+struct test_feature_reg {
+ uint32_t reg;
+ const struct reg_ftr_bits *ftr_bits;
+};
+
+#define __REG_FTR_BITS(NAME, SIGNED, TYPE, SHIFT, MASK, SAFE_VAL) \
+ { \
+ .name = #NAME, \
+ .sign = SIGNED, \
+ .type = TYPE, \
+ .shift = SHIFT, \
+ .mask = MASK, \
+ .safe_val = SAFE_VAL, \
+ }
+
+#define REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_UNSIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define S_REG_FTR_BITS(type, reg, field, safe_val) \
+ __REG_FTR_BITS(reg##_##field, FTR_SIGNED, type, reg##_##field##_SHIFT, \
+ reg##_##field##_MASK, safe_val)
+
+#define REG_FTR_END \
+ { \
+ .type = FTR_END, \
+ }
+
+static const struct reg_ftr_bits ftr_id_aa64dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, PMUVer, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64DFR0_EL1, DebugVer, ID_AA64DFR0_EL1_DebugVer_IMP),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_dfr0_el1[] = {
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, PerfMon, ID_DFR0_EL1_PerfMon_PMUv3),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_DFR0_EL1, CopDbg, ID_DFR0_EL1_CopDbg_Armv8),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RNDR, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TLB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, FHM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, DP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SM3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, RDM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, TME, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, ATOMIC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, CRC32, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, SHA1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR0_EL1, AES, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LS64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, XS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DGH, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SPECRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, SB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FRINTTS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, LRCPC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, FCMA, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, JSCVT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR1_EL1, DPB, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64isar2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, BC, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, RPRES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ISAR2_EL1, WFxT, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64pfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, CSV2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, DIT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, SEL2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL2, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64PFR0_EL1, EL0, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ECV, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, EXS, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN4, 0),
+ S_REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN64, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, TGRAN16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr1_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, TIDCP1, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, AFP, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, ETS, 0),
+ REG_FTR_BITS(FTR_HIGHER_SAFE, ID_AA64MMFR1_EL1, SpecSEI, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, PAN, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, LO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HPDS, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR1_EL1, HAFDBS, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64mmfr2_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, E0PD, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, BBM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, TTL, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, AT, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, ST, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, VARange, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, IESB, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, LSM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, UAO, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR2_EL1, CnP, 0),
+ REG_FTR_END,
+};
+
+static const struct reg_ftr_bits ftr_id_aa64zfr0_el1[] = {
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F64MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, F32MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, I8MM, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SM4, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SHA3, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BF16, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, BitPerm, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, AES, 0),
+ REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64ZFR0_EL1, SVEver, 0),
+ REG_FTR_END,
+};
+
+#define TEST_REG(id, table) \
+ { \
+ .reg = id, \
+ .ftr_bits = &((table)[0]), \
+ }
+
+static struct test_feature_reg test_regs[] = {
+ TEST_REG(SYS_ID_AA64DFR0_EL1, ftr_id_aa64dfr0_el1),
+ TEST_REG(SYS_ID_DFR0_EL1, ftr_id_dfr0_el1),
+ TEST_REG(SYS_ID_AA64ISAR0_EL1, ftr_id_aa64isar0_el1),
+ TEST_REG(SYS_ID_AA64ISAR1_EL1, ftr_id_aa64isar1_el1),
+ TEST_REG(SYS_ID_AA64ISAR2_EL1, ftr_id_aa64isar2_el1),
+ TEST_REG(SYS_ID_AA64PFR0_EL1, ftr_id_aa64pfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR0_EL1, ftr_id_aa64mmfr0_el1),
+ TEST_REG(SYS_ID_AA64MMFR1_EL1, ftr_id_aa64mmfr1_el1),
+ TEST_REG(SYS_ID_AA64MMFR2_EL1, ftr_id_aa64mmfr2_el1),
+ TEST_REG(SYS_ID_AA64ZFR0_EL1, ftr_id_aa64zfr0_el1),
+};
+
+#define GUEST_REG_SYNC(id) GUEST_SYNC_ARGS(0, id, read_sysreg_s(id), 0, 0);
+
+static void guest_code(void)
+{
+ GUEST_REG_SYNC(SYS_ID_AA64DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_DFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ISAR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64PFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR0_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR1_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64MMFR2_EL1);
+ GUEST_REG_SYNC(SYS_ID_AA64ZFR0_EL1);
+
+ GUEST_DONE();
+}
+
+/* Return a safe value to a given ftr_bits an ftr value */
+uint64_t get_safe_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == ftr_max)
+ ftr = 0;
+ else if (ftr != 0)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = ftr_bits->safe_val;
+ break;
+ case FTR_LOWER_SAFE:
+ if (ftr > ftr_bits->safe_val)
+ ftr--;
+ break;
+ case FTR_HIGHER_SAFE:
+ if (ftr < ftr_max - 1)
+ ftr++;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr != 0 && ftr != ftr_max - 1)
+ ftr++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ return ftr;
+}
+
+/* Return an invalid value to a given ftr_bits an ftr value */
+uint64_t get_invalid_value(const struct reg_ftr_bits *ftr_bits, uint64_t ftr)
+{
+ uint64_t ftr_max = GENMASK_ULL(ARM64_FEATURE_FIELD_BITS - 1, 0);
+
+ if (ftr_bits->sign == FTR_UNSIGNED) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else if (ftr != ftr_max) {
+ switch (ftr_bits->type) {
+ case FTR_EXACT:
+ ftr = max((uint64_t)ftr_bits->safe_val + 1, ftr + 1);
+ break;
+ case FTR_LOWER_SAFE:
+ ftr++;
+ break;
+ case FTR_HIGHER_SAFE:
+ ftr--;
+ break;
+ case FTR_HIGHER_OR_ZERO_SAFE:
+ if (ftr == 0)
+ ftr = ftr_max - 1;
+ else
+ ftr--;
+ break;
+ default:
+ break;
+ }
+ } else {
+ ftr = 0;
+ }
+
+ return ftr;
+}
+
+static void test_reg_set_success(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, new_val, ftr;
+
+ vcpu_get_reg(vcpu, reg, &val);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_safe_value(ftr_bits, ftr);
+
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ vcpu_set_reg(vcpu, reg, val);
+ vcpu_get_reg(vcpu, reg, &new_val);
+ TEST_ASSERT_EQ(new_val, val);
+}
+
+static void test_reg_set_fail(struct kvm_vcpu *vcpu, uint64_t reg,
+ const struct reg_ftr_bits *ftr_bits)
+{
+ uint8_t shift = ftr_bits->shift;
+ uint64_t mask = ftr_bits->mask;
+ uint64_t val, old_val, ftr;
+ int r;
+
+ vcpu_get_reg(vcpu, reg, &val);
+ ftr = (val & mask) >> shift;
+
+ ftr = get_invalid_value(ftr_bits, ftr);
+
+ old_val = val;
+ ftr <<= shift;
+ val &= ~mask;
+ val |= ftr;
+
+ r = __vcpu_set_reg(vcpu, reg, val);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Unexpected KVM_SET_ONE_REG error: r=%d, errno=%d", r, errno);
+
+ vcpu_get_reg(vcpu, reg, &val);
+ TEST_ASSERT_EQ(val, old_val);
+}
+
+static void test_user_set_reg(struct kvm_vcpu *vcpu, bool aarch64_only)
+{
+ uint64_t masks[KVM_ARM_FEATURE_ID_RANGE_SIZE];
+ struct reg_mask_range range = {
+ .addr = (__u64)masks,
+ };
+ int ret;
+
+ /* KVM should return error when reserved field is not zero */
+ range.reserved[0] = 1;
+ ret = __vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+ TEST_ASSERT(ret, "KVM doesn't check invalid parameters.");
+
+ /* Get writable masks for feature ID registers */
+ memset(range.reserved, 0, sizeof(range.reserved));
+ vm_ioctl(vcpu->vm, KVM_ARM_GET_REG_WRITABLE_MASKS, &range);
+
+ for (int i = 0; i < ARRAY_SIZE(test_regs); i++) {
+ const struct reg_ftr_bits *ftr_bits = test_regs[i].ftr_bits;
+ uint32_t reg_id = test_regs[i].reg;
+ uint64_t reg = KVM_ARM64_SYS_REG(reg_id);
+ int idx;
+
+ /* Get the index to masks array for the idreg */
+ idx = KVM_ARM_FEATURE_ID_RANGE_IDX(sys_reg_Op0(reg_id), sys_reg_Op1(reg_id),
+ sys_reg_CRn(reg_id), sys_reg_CRm(reg_id),
+ sys_reg_Op2(reg_id));
+
+ for (int j = 0; ftr_bits[j].type != FTR_END; j++) {
+ /* Skip aarch32 reg on aarch64 only system, since they are RAZ/WI. */
+ if (aarch64_only && sys_reg_CRm(reg_id) < 4) {
+ ksft_test_result_skip("%s on AARCH64 only system\n",
+ ftr_bits[j].name);
+ continue;
+ }
+
+ /* Make sure the feature field is writable */
+ TEST_ASSERT_EQ(masks[idx] & ftr_bits[j].mask, ftr_bits[j].mask);
+
+ test_reg_set_fail(vcpu, reg, &ftr_bits[j]);
+ test_reg_set_success(vcpu, reg, &ftr_bits[j]);
+
+ ksft_test_result_pass("%s\n", ftr_bits[j].name);
+ }
+ }
+}
+
+static void test_guest_reg_read(struct kvm_vcpu *vcpu)
+{
+ bool done = false;
+ struct ucall uc;
+ uint64_t val;
+
+ while (!done) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ /* Make sure the written values are seen by guest */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(uc.args[2]), &val);
+ TEST_ASSERT_EQ(val, uc.args[3]);
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ bool aarch64_only;
+ uint64_t val, el0;
+ int ftr_cnt;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_SUPPORTED_REG_MASK_RANGES));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Check for AARCH64 only system */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_ID_AA64PFR0_EL1), &val);
+ el0 = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64PFR0_EL1_EL0), val);
+ aarch64_only = (el0 == ID_AA64PFR0_EL1_ELx_64BIT_ONLY);
+
+ ksft_print_header();
+
+ ftr_cnt = ARRAY_SIZE(ftr_id_aa64dfr0_el1) + ARRAY_SIZE(ftr_id_dfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar0_el1) + ARRAY_SIZE(ftr_id_aa64isar1_el1) +
+ ARRAY_SIZE(ftr_id_aa64isar2_el1) + ARRAY_SIZE(ftr_id_aa64pfr0_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr0_el1) + ARRAY_SIZE(ftr_id_aa64mmfr1_el1) +
+ ARRAY_SIZE(ftr_id_aa64mmfr2_el1) + ARRAY_SIZE(ftr_id_aa64zfr0_el1) -
+ ARRAY_SIZE(test_regs);
+
+ ksft_set_plan(ftr_cnt);
+
+ test_user_set_reg(vcpu, aarch64_only);
+ test_guest_reg_read(vcpu);
+
+ kvm_vm_free(vm);
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/aarch64/smccc_filter.c b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
new file mode 100644
index 000000000000..2d189f3da228
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/smccc_filter.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * smccc_filter - Tests for the SMCCC filter UAPI.
+ *
+ * Copyright (c) 2023 Google LLC
+ *
+ * This test includes:
+ * - Tests that the UAPI constraints are upheld by KVM. For example, userspace
+ * is prevented from filtering the architecture range of SMCCC calls.
+ * - Test that the filter actions (DENIED, FWD_TO_USER) work as intended.
+ */
+
+#include <linux/arm-smccc.h>
+#include <linux/psci.h>
+#include <stdint.h>
+
+#include "processor.h"
+#include "test_util.h"
+
+enum smccc_conduit {
+ HVC_INSN,
+ SMC_INSN,
+};
+
+#define for_each_conduit(conduit) \
+ for (conduit = HVC_INSN; conduit <= SMC_INSN; conduit++)
+
+static void guest_main(uint32_t func_id, enum smccc_conduit conduit)
+{
+ struct arm_smccc_res res;
+
+ if (conduit == SMC_INSN)
+ smccc_smc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+ else
+ smccc_hvc(func_id, 0, 0, 0, 0, 0, 0, 0, &res);
+
+ GUEST_SYNC(res.a0);
+}
+
+static int __set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ struct kvm_smccc_filter filter = {
+ .base = start,
+ .nr_functions = nr_functions,
+ .action = action,
+ };
+
+ return __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+}
+
+static void set_smccc_filter(struct kvm_vm *vm, uint32_t start, uint32_t nr_functions,
+ enum kvm_smccc_filter_action action)
+{
+ int ret = __set_smccc_filter(vm, start, nr_functions, action);
+
+ TEST_ASSERT(!ret, "failed to configure SMCCC filter: %d", ret);
+}
+
+static struct kvm_vm *setup_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vcpu_init init;
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init);
+
+ /*
+ * Enable in-kernel emulation of PSCI to ensure that calls are denied
+ * due to the SMCCC filter, not because of KVM.
+ */
+ init.features[0] |= (1 << KVM_ARM_VCPU_PSCI_0_2);
+
+ *vcpu = aarch64_vcpu_add(vm, 0, &init, guest_main);
+ return vm;
+}
+
+static void test_pad_must_be_zero(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ struct kvm_smccc_filter filter = {
+ .base = PSCI_0_2_FN_PSCI_VERSION,
+ .nr_functions = 1,
+ .action = KVM_SMCCC_FILTER_DENY,
+ .pad = { -1 },
+ };
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_ARM_VM_SMCCC_CTRL,
+ KVM_ARM_VM_SMCCC_FILTER, &filter);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Setting filter with nonzero padding should return EINVAL");
+}
+
+/* Ensure that userspace cannot filter the Arm Architecture SMCCC range */
+static void test_filter_reserved_range(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ uint32_t smc64_fn;
+ int r;
+
+ r = __set_smccc_filter(vm, ARM_SMCCC_ARCH_WORKAROUND_1,
+ 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ smc64_fn = ARM_SMCCC_CALL_VAL(ARM_SMCCC_FAST_CALL, ARM_SMCCC_SMC_64,
+ 0, 0);
+
+ r = __set_smccc_filter(vm, smc64_fn, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter reserved range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void test_invalid_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to filter 0 functions should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_overflow_nr_functions(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, ~0, ~0, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to overflow filter range should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+static void test_reserved_action(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, -1);
+ TEST_ASSERT(r < 0 && errno == EINVAL,
+ "Attempt to use reserved filter action should return EINVAL");
+
+ kvm_vm_free(vm);
+}
+
+
+/* Test that overlapping configurations of the SMCCC filter are rejected */
+static void test_filter_overlap(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm = setup_vm(&vcpu);
+ int r;
+
+ set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+
+ r = __set_smccc_filter(vm, PSCI_0_2_FN64_CPU_ON, 1, KVM_SMCCC_FILTER_DENY);
+ TEST_ASSERT(r < 0 && errno == EEXIST,
+ "Attempt to filter already configured range should return EEXIST");
+
+ kvm_vm_free(vm);
+}
+
+static void expect_call_denied(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_SYNC)
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+
+ TEST_ASSERT(uc.args[1] == SMCCC_RET_NOT_SUPPORTED,
+ "Unexpected SMCCC return code: %lu", uc.args[1]);
+}
+
+/* Denied SMCCC calls have a return code of SMCCC_RET_NOT_SUPPORTED */
+static void test_filter_denied(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_DENY);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_denied(vcpu);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static void expect_call_fwd_to_user(struct kvm_vcpu *vcpu, uint32_t func_id,
+ enum smccc_conduit conduit)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERCALL,
+ "Unexpected exit reason: %u", run->exit_reason);
+ TEST_ASSERT(run->hypercall.nr == func_id,
+ "Unexpected SMCCC function: %llu", run->hypercall.nr);
+
+ if (conduit == SMC_INSN)
+ TEST_ASSERT(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC,
+ "KVM_HYPERCALL_EXIT_SMC is not set");
+ else
+ TEST_ASSERT(!(run->hypercall.flags & KVM_HYPERCALL_EXIT_SMC),
+ "KVM_HYPERCALL_EXIT_SMC is set");
+}
+
+/* SMCCC calls forwarded to userspace cause KVM_EXIT_HYPERCALL exits */
+static void test_filter_fwd_to_user(void)
+{
+ enum smccc_conduit conduit;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ for_each_conduit(conduit) {
+ vm = setup_vm(&vcpu);
+
+ set_smccc_filter(vm, PSCI_0_2_FN_PSCI_VERSION, 1, KVM_SMCCC_FILTER_FWD_TO_USER);
+ vcpu_args_set(vcpu, 2, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ vcpu_run(vcpu);
+ expect_call_fwd_to_user(vcpu, PSCI_0_2_FN_PSCI_VERSION, conduit);
+
+ kvm_vm_free(vm);
+ }
+}
+
+static bool kvm_supports_smccc_filter(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = __kvm_has_device_attr(vm->fd, KVM_ARM_VM_SMCCC_CTRL, KVM_ARM_VM_SMCCC_FILTER);
+
+ kvm_vm_free(vm);
+ return !r;
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_supports_smccc_filter());
+
+ test_pad_must_be_zero();
+ test_invalid_nr_functions();
+ test_overflow_nr_functions();
+ test_reserved_action();
+ test_filter_reserved_range();
+ test_filter_overlap();
+ test_filter_denied();
+ test_filter_fwd_to_user();
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
new file mode 100644
index 000000000000..80b74c6f152b
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vcpu_width_config.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vcpu_width_config - Test KVM_ARM_VCPU_INIT() with KVM_ARM_VCPU_EL1_32BIT.
+ *
+ * Copyright (c) 2022 Google LLC.
+ *
+ * This is a test that ensures that non-mixed-width vCPUs (all 64bit vCPUs
+ * or all 32bit vcPUs) can be configured and mixed-width vCPUs cannot be
+ * configured.
+ */
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+
+/*
+ * Add a vCPU, run KVM_ARM_VCPU_INIT with @init0, and then
+ * add another vCPU, and run KVM_ARM_VCPU_INIT with @init1.
+ */
+static int add_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ vcpu1 = __vm_vcpu_add(vm, 1);
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Add two vCPUs, then run KVM_ARM_VCPU_INIT for one vCPU with @init0,
+ * and run KVM_ARM_VCPU_INIT for another vCPU with @init1.
+ */
+static int add_2vcpus_init_2vcpus(struct kvm_vcpu_init *init0,
+ struct kvm_vcpu_init *init1)
+{
+ struct kvm_vcpu *vcpu0, *vcpu1;
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ vcpu0 = __vm_vcpu_add(vm, 0);
+ vcpu1 = __vm_vcpu_add(vm, 1);
+
+ ret = __vcpu_ioctl(vcpu0, KVM_ARM_VCPU_INIT, init0);
+ if (ret)
+ goto free_exit;
+
+ ret = __vcpu_ioctl(vcpu1, KVM_ARM_VCPU_INIT, init1);
+
+free_exit:
+ kvm_vm_free(vm);
+ return ret;
+}
+
+/*
+ * Tests that two 64bit vCPUs can be configured, two 32bit vCPUs can be
+ * configured, and two mixed-width vCPUs cannot be configured.
+ * Each of those three cases, configure vCPUs in two different orders.
+ * The one is running KVM_CREATE_VCPU for 2 vCPUs, and then running
+ * KVM_ARM_VCPU_INIT for them.
+ * The other is running KVM_CREATE_VCPU and KVM_ARM_VCPU_INIT for a vCPU,
+ * and then run those commands for another vCPU.
+ */
+int main(void)
+{
+ struct kvm_vcpu_init init0, init1;
+ struct kvm_vm *vm;
+ int ret;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_EL1_32BIT));
+
+ /* Get the preferred target type and copy that to init1 for later use */
+ vm = vm_create_barebones();
+ vm_ioctl(vm, KVM_ARM_PREFERRED_TARGET, &init0);
+ kvm_vm_free(vm);
+ init1 = init0;
+
+ /* Test with 64bit vCPUs */
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 64bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with 32bit vCPUs */
+ init0.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init0);
+ TEST_ASSERT(ret == 0,
+ "Configuring 32bit EL1 vCPUs failed unexpectedly");
+
+ /* Test with mixed-width vCPUs */
+ init0.features[0] = 0;
+ init1.features[0] = (1 << KVM_ARM_VCPU_EL1_32BIT);
+ ret = add_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+ ret = add_2vcpus_init_2vcpus(&init0, &init1);
+ TEST_ASSERT(ret != 0,
+ "Configuring mixed-width vCPUs worked unexpectedly");
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_init.c b/tools/testing/selftests/kvm/aarch64/vgic_init.c
new file mode 100644
index 000000000000..eef816b80993
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_init.c
@@ -0,0 +1,716 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic init sequence tests
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE
+#include <linux/kernel.h>
+#include <sys/syscall.h>
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vgic.h"
+
+#define NR_VCPUS 4
+
+#define REG_OFFSET(vcpu, offset) (((uint64_t)vcpu << 32) | offset)
+
+#define GICR_TYPER 0x8
+
+#define VGIC_DEV_IS_V2(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V2)
+#define VGIC_DEV_IS_V3(_d) ((_d) == KVM_DEV_TYPE_ARM_VGIC_V3)
+
+struct vm_gic {
+ struct kvm_vm *vm;
+ int gic_fd;
+ uint32_t gic_dev_type;
+};
+
+static uint64_t max_phys_size;
+
+/*
+ * Helpers to access a redistributor register and verify the ioctl() failed or
+ * succeeded as expected, and provided the correct value on success.
+ */
+static void v3_redist_reg_get_errno(int gicv3_fd, int vcpu, int offset,
+ int want, const char *msg)
+{
+ uint32_t ignored_val;
+ int ret = __kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &ignored_val);
+
+ TEST_ASSERT(ret && errno == want, "%s; want errno = %d", msg, want);
+}
+
+static void v3_redist_reg_get(int gicv3_fd, int vcpu, int offset, uint32_t want,
+ const char *msg)
+{
+ uint32_t val;
+
+ kvm_device_attr_get(gicv3_fd, KVM_DEV_ARM_VGIC_GRP_REDIST_REGS,
+ REG_OFFSET(vcpu, offset), &val);
+ TEST_ASSERT(val == want, "%s; want '0x%x', got '0x%x'", msg, want, val);
+}
+
+/* dummy guest code */
+static void guest_code(void)
+{
+ GUEST_SYNC(0);
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+ GUEST_DONE();
+}
+
+/* we don't want to assert on run execution, hence that helper */
+static int run_vcpu(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_run(vcpu) ? -errno : 0;
+}
+
+static struct vm_gic vm_gic_create_with_vcpus(uint32_t gic_dev_type,
+ uint32_t nr_vcpus,
+ struct kvm_vcpu *vcpus[])
+{
+ struct vm_gic v;
+
+ v.gic_dev_type = gic_dev_type;
+ v.vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ return v;
+}
+
+static void vm_gic_destroy(struct vm_gic *v)
+{
+ close(v->gic_fd);
+ kvm_vm_free(v->vm);
+}
+
+struct vgic_region_attr {
+ uint64_t attr;
+ uint64_t size;
+ uint64_t alignment;
+};
+
+struct vgic_region_attr gic_v3_dist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_DIST,
+ .size = 0x10000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v3_redist_region = {
+ .attr = KVM_VGIC_V3_ADDR_TYPE_REDIST,
+ .size = NR_VCPUS * 0x20000,
+ .alignment = 0x10000,
+};
+
+struct vgic_region_attr gic_v2_dist_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_DIST,
+ .size = 0x1000,
+ .alignment = 0x1000,
+};
+
+struct vgic_region_attr gic_v2_cpu_region = {
+ .attr = KVM_VGIC_V2_ADDR_TYPE_CPU,
+ .size = 0x2000,
+ .alignment = 0x1000,
+};
+
+/**
+ * Helper routine that performs KVM device tests in general. Eventually the
+ * ARM_VGIC (GICv2 or GICv3) device gets created with an overlapping
+ * DIST/REDIST (or DIST/CPUIF for GICv2). Assumption is 4 vcpus are going to be
+ * used hence the overlap. In the case of GICv3, A RDIST region is set at @0x0
+ * and a DIST region is set @0x70000. The GICv2 case sets a CPUIF @0x0 and a
+ * DIST region @0x1000.
+ */
+static void subtest_dist_rdist(struct vm_gic *v)
+{
+ int ret;
+ uint64_t addr;
+ struct vgic_region_attr rdist; /* CPU interface in GICv2*/
+ struct vgic_region_attr dist;
+
+ rdist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_redist_region
+ : gic_v2_cpu_region;
+ dist = VGIC_DEV_IS_V3(v->gic_dev_type) ? gic_v3_dist_region
+ : gic_v2_dist_region;
+
+ /* Check existing group/attributes */
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, dist.attr);
+
+ kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, rdist.attr);
+
+ /* check non existing attribute */
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR, -1);
+ TEST_ASSERT(ret && errno == ENXIO, "attribute not supported");
+
+ /* misaligned DIST and REDIST address settings */
+ addr = dist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC dist base not aligned");
+
+ addr = rdist.alignment / 0x10;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "GIC redist/cpu base not aligned");
+
+ /* out of range address */
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "dist address beyond IPA limit");
+
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG, "redist address beyond IPA limit");
+
+ /* Space for half a rdist (a rdist is: 2 * rdist.alignment). */
+ addr = max_phys_size - dist.alignment;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "half of the redist is beyond IPA limit");
+
+ /* set REDIST base address @0x0*/
+ addr = 0x00000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+
+ /* Attempt to create a second legacy redistributor region */
+ addr = 0xE0000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ rdist.attr, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "GIC redist base set again");
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ if (!ret) {
+ /* Attempt to mix legacy and new redistributor regions */
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to mix GICv3 REDIST and REDIST_REGION");
+ }
+
+ /*
+ * Set overlapping DIST / REDIST, cannot be detected here. Will be detected
+ * on first vcpu run instead.
+ */
+ addr = rdist.size - rdist.alignment;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ dist.attr, &addr);
+}
+
+/* Test the new REDIST region API */
+static void subtest_v3_redist_regions(struct vm_gic *v)
+{
+ uint64_t addr, expected_addr;
+ int ret;
+
+ ret = __kvm_has_device_attr(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST);
+ TEST_ASSERT(!ret, "Multiple redist regions advertised");
+
+ addr = REDIST_REGION_ATTR_ADDR(NR_VCPUS, 0x100000, 2, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with flags != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0x100000, 0, 0);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "redist region attr value with count== 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "attempt to register the first rdist region with index != 0");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x201000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "rdist region with misaligned address");
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 1);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register an rdist region with already used index");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x210000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "register an rdist region overlapping with another one");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region with index not +1");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, max_phys_size, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with base address beyond IPA range");
+
+ /* The last redist is above the pa range. */
+ addr = REDIST_REGION_ATTR_ADDR(2, max_phys_size - 0x30000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register redist region with top address beyond IPA range");
+
+ addr = 0x260000;
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "Mix KVM_VGIC_V3_ADDR_TYPE_REDIST and REDIST_REGION");
+
+ /*
+ * Now there are 2 redist regions:
+ * region 0 @ 0x200000 2 redists
+ * region 1 @ 0x240000 1 redist
+ * Attempt to read their characteristics
+ */
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 0);
+ expected_addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #0");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 1);
+ expected_addr = REDIST_REGION_ATTR_ADDR(1, 0x240000, 0, 1);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(!ret && addr == expected_addr, "read characteristics of region #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(0, 0, 0, 2);
+ ret = __kvm_device_attr_get(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == ENOENT, "read characteristics of non existing region");
+
+ addr = 0x260000;
+ kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x260000, 0, 2);
+ ret = __kvm_device_attr_set(v->gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "register redist region colliding with dist");
+}
+
+/*
+ * VGIC KVM device is created and initialized before the secondary CPUs
+ * get created
+ */
+static void test_vgic_then_vcpus(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, 1, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+/* All the VCPUs are created before the VGIC KVM device gets initialized */
+static void test_vcpus_then_vgic(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(gic_dev_type, NR_VCPUS, vcpus);
+
+ subtest_dist_rdist(&v);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EINVAL, "dist/rdist overlap detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_new_redist_regions(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ void *dummy = NULL;
+ struct vm_gic v;
+ uint64_t addr;
+ int ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -ENXIO, "running without sufficient number of rdists");
+ vm_gic_destroy(&v);
+
+ /* step2 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(ret == -EBUSY, "running without vgic explicit init");
+
+ vm_gic_destroy(&v);
+
+ /* step 3 */
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ subtest_v3_redist_regions(&v);
+
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, dummy);
+ TEST_ASSERT(ret && errno == EFAULT,
+ "register a third region allowing to cover the 4 vcpus");
+
+ addr = REDIST_REGION_ATTR_ADDR(1, 0x280000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ ret = run_vcpu(vcpus[3]);
+ TEST_ASSERT(!ret, "vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_typer_accesses(void)
+{
+ struct vm_gic v;
+ uint64_t addr;
+ int ret, i;
+
+ v.vm = vm_create(NR_VCPUS);
+ (void)vm_vcpu_add(v.vm, 0, guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ (void)vm_vcpu_add(v.vm, 3, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EINVAL,
+ "attempting to read GICR_TYPER of non created vcpu");
+
+ (void)vm_vcpu_add(v.vm, 1, guest_code);
+
+ v3_redist_reg_get_errno(v.gic_fd, 1, GICR_TYPER, EBUSY,
+ "read GICR_TYPER before GIC initialized");
+
+ (void)vm_vcpu_add(v.vm, 2, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ for (i = 0; i < NR_VCPUS ; i++) {
+ v3_redist_reg_get(v.gic_fd, i, GICR_TYPER, i * 0x100,
+ "read GICR_TYPER before rdist region setting");
+ }
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ /* The 2 first rdists should be put there (vcpu 0 and 3) */
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x0, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #1");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x100000, 0, 1);
+ ret = __kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+ TEST_ASSERT(ret && errno == EINVAL, "collision with previous rdist region");
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100,
+ "no redist region attached to vcpu #1 yet, last cannot be returned");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200,
+ "no redist region attached to vcpu #2, last cannot be returned");
+
+ addr = REDIST_REGION_ATTR_ADDR(10, 0x20000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210,
+ "read typer of rdist #1, last properly returned");
+
+ vm_gic_destroy(&v);
+}
+
+static struct vm_gic vm_gic_v3_create_with_vcpuids(int nr_vcpus,
+ uint32_t vcpuids[])
+{
+ struct vm_gic v;
+ int i;
+
+ v.vm = vm_create(nr_vcpus);
+ for (i = 0; i < nr_vcpus; i++)
+ vm_vcpu_add(v.vm, vcpuids[i], guest_code);
+
+ v.gic_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+
+ return v;
+}
+
+/**
+ * Test GICR_TYPER last bit with new redist regions
+ * rdist regions #1 and #2 are contiguous
+ * rdist region #0 @0x100000 2 rdist capacity
+ * rdists: 0, 3 (Last)
+ * rdist region #1 @0x240000 2 rdist capacity
+ * rdists: 5, 4 (Last)
+ * rdist region #2 @0x200000 2 rdist capacity
+ * rdists: 1, 2
+ */
+static void test_v3_last_bit_redist_regions(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x100000, 0, 0);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x240000, 0, 1);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ addr = REDIST_REGION_ATTR_ADDR(2, 0x200000, 0, 2);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x200, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x310, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #5");
+ v3_redist_reg_get(v.gic_fd, 4, GICR_TYPER, 0x410, "read typer of rdist #4");
+
+ vm_gic_destroy(&v);
+}
+
+/* Test last bit with legacy region */
+static void test_v3_last_bit_single_rdist(void)
+{
+ uint32_t vcpuids[] = { 0, 3, 5, 4, 1, 2 };
+ struct vm_gic v;
+ uint64_t addr;
+
+ v = vm_gic_v3_create_with_vcpuids(ARRAY_SIZE(vcpuids), vcpuids);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ addr = 0x10000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ v3_redist_reg_get(v.gic_fd, 0, GICR_TYPER, 0x000, "read typer of rdist #0");
+ v3_redist_reg_get(v.gic_fd, 3, GICR_TYPER, 0x300, "read typer of rdist #1");
+ v3_redist_reg_get(v.gic_fd, 5, GICR_TYPER, 0x500, "read typer of rdist #2");
+ v3_redist_reg_get(v.gic_fd, 1, GICR_TYPER, 0x100, "read typer of rdist #3");
+ v3_redist_reg_get(v.gic_fd, 2, GICR_TYPER, 0x210, "read typer of rdist #3");
+
+ vm_gic_destroy(&v);
+}
+
+/* Uses the legacy REDIST region API. */
+static void test_v3_redist_ipa_range_check_at_vcpu_run(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ int ret, i;
+ uint64_t addr;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, 1, vcpus);
+
+ /* Set space for 3 redists, we have 1 vcpu, so this succeeds. */
+ addr = max_phys_size - (3 * 2 * 0x10000);
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST, &addr);
+
+ addr = 0x00000;
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &addr);
+
+ /* Add the rest of the VCPUs */
+ for (i = 1; i < NR_VCPUS; ++i)
+ vcpus[i] = vm_vcpu_add(v.vm, i, guest_code);
+
+ kvm_device_attr_set(v.gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ /* Attempt to run a vcpu without enough redist space. */
+ ret = run_vcpu(vcpus[2]);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "redist base+size above PA range detected on 1st vcpu run");
+
+ vm_gic_destroy(&v);
+}
+
+static void test_v3_its_region(void)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint64_t addr;
+ int its_fd, ret;
+
+ v = vm_gic_create_with_vcpus(KVM_DEV_TYPE_ARM_VGIC_V3, NR_VCPUS, vcpus);
+ its_fd = kvm_create_device(v.vm, KVM_DEV_TYPE_ARM_VGIC_ITS);
+
+ addr = 0x401000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EINVAL,
+ "ITS region with misaligned address");
+
+ addr = max_phys_size;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "register ITS region with base address beyond IPA range");
+
+ addr = max_phys_size - 0x10000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == E2BIG,
+ "Half of ITS region is beyond IPA range");
+
+ /* This one succeeds setting the ITS base */
+ addr = 0x400000;
+ kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+
+ addr = 0x300000;
+ ret = __kvm_device_attr_set(its_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_ITS_ADDR_TYPE, &addr);
+ TEST_ASSERT(ret && errno == EEXIST, "ITS base set again");
+
+ close(its_fd);
+ vm_gic_destroy(&v);
+}
+
+/*
+ * Returns 0 if it's possible to create GIC device of a given type (V2 or V3).
+ */
+int test_kvm_device(uint32_t gic_dev_type)
+{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ struct vm_gic v;
+ uint32_t other;
+ int ret;
+
+ v.vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
+
+ /* try to create a non existing KVM device */
+ ret = __kvm_test_create_device(v.vm, 0);
+ TEST_ASSERT(ret && errno == ENODEV, "unsupported device");
+
+ /* trial mode */
+ ret = __kvm_test_create_device(v.vm, gic_dev_type);
+ if (ret)
+ return ret;
+ v.gic_fd = kvm_create_device(v.vm, gic_dev_type);
+
+ ret = __kvm_create_device(v.vm, gic_dev_type);
+ TEST_ASSERT(ret < 0 && errno == EEXIST, "create GIC device twice");
+
+ /* try to create the other gic_dev_type */
+ other = VGIC_DEV_IS_V2(gic_dev_type) ? KVM_DEV_TYPE_ARM_VGIC_V3
+ : KVM_DEV_TYPE_ARM_VGIC_V2;
+
+ if (!__kvm_test_create_device(v.vm, other)) {
+ ret = __kvm_create_device(v.vm, other);
+ TEST_ASSERT(ret < 0 && (errno == EINVAL || errno == EEXIST),
+ "create GIC device while other version exists");
+ }
+
+ vm_gic_destroy(&v);
+
+ return 0;
+}
+
+void run_tests(uint32_t gic_dev_type)
+{
+ test_vcpus_then_vgic(gic_dev_type);
+ test_vgic_then_vcpus(gic_dev_type);
+
+ if (VGIC_DEV_IS_V3(gic_dev_type)) {
+ test_v3_new_redist_regions();
+ test_v3_typer_accesses();
+ test_v3_last_bit_redist_regions();
+ test_v3_last_bit_single_rdist();
+ test_v3_redist_ipa_range_check_at_vcpu_run();
+ test_v3_its_region();
+ }
+}
+
+int main(int ac, char **av)
+{
+ int ret;
+ int pa_bits;
+ int cnt_impl = 0;
+
+ pa_bits = vm_guest_mode_params[VM_MODE_DEFAULT].pa_bits;
+ max_phys_size = 1ULL << pa_bits;
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (!ret) {
+ pr_info("Running GIC_v3 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V3);
+ cnt_impl++;
+ }
+
+ ret = test_kvm_device(KVM_DEV_TYPE_ARM_VGIC_V2);
+ if (!ret) {
+ pr_info("Running GIC_v2 tests.\n");
+ run_tests(KVM_DEV_TYPE_ARM_VGIC_V2);
+ cnt_impl++;
+ }
+
+ if (!cnt_impl) {
+ print_skip("No GICv2 nor GICv3 support");
+ exit(KSFT_SKIP);
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vgic_irq.c b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
new file mode 100644
index 000000000000..2e64b4856e38
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vgic_irq.c
@@ -0,0 +1,855 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vgic_irq.c - Test userspace injection of IRQs
+ *
+ * This test validates the injection of IRQs from userspace using various
+ * methods (e.g., KVM_IRQ_LINE) and modes (e.g., EOI). The guest "asks" the
+ * host to inject a specific intid via a GUEST_SYNC call, and then checks that
+ * it received it.
+ */
+#include <asm/kvm.h>
+#include <asm/kvm_para.h>
+#include <sys/eventfd.h>
+#include <linux/sizes.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "gic.h"
+#include "gic_v3.h"
+#include "vgic.h"
+
+#define GICD_BASE_GPA 0x08000000ULL
+#define GICR_BASE_GPA 0x080A0000ULL
+
+/*
+ * Stores the user specified args; it's passed to the guest and to every test
+ * function.
+ */
+struct test_args {
+ uint32_t nr_irqs; /* number of KVM supported IRQs. */
+ bool eoi_split; /* 1 is eoir+dir, 0 is eoir only */
+ bool level_sensitive; /* 1 is level, 0 is edge */
+ int kvm_max_routes; /* output of KVM_CAP_IRQ_ROUTING */
+ bool kvm_supports_irqfd; /* output of KVM_CAP_IRQFD */
+};
+
+/*
+ * KVM implements 32 priority levels:
+ * 0x00 (highest priority) - 0xF8 (lowest priority), in steps of 8
+ *
+ * Note that these macros will still be correct in the case that KVM implements
+ * more priority levels. Also note that 32 is the minimum for GICv3 and GICv2.
+ */
+#define KVM_NUM_PRIOS 32
+#define KVM_PRIO_SHIFT 3 /* steps of 8 = 1 << 3 */
+#define KVM_PRIO_STEPS (1 << KVM_PRIO_SHIFT) /* 8 */
+#define LOWEST_PRIO (KVM_NUM_PRIOS - 1)
+#define CPU_PRIO_MASK (LOWEST_PRIO << KVM_PRIO_SHIFT) /* 0xf8 */
+#define IRQ_DEFAULT_PRIO (LOWEST_PRIO - 1)
+#define IRQ_DEFAULT_PRIO_REG (IRQ_DEFAULT_PRIO << KVM_PRIO_SHIFT) /* 0xf0 */
+
+static void *dist = (void *)GICD_BASE_GPA;
+static void *redist = (void *)GICR_BASE_GPA;
+
+/*
+ * The kvm_inject_* utilities are used by the guest to ask the host to inject
+ * interrupts (e.g., using the KVM_IRQ_LINE ioctl).
+ */
+
+typedef enum {
+ KVM_INJECT_EDGE_IRQ_LINE = 1,
+ KVM_SET_IRQ_LINE,
+ KVM_SET_IRQ_LINE_HIGH,
+ KVM_SET_LEVEL_INFO_HIGH,
+ KVM_INJECT_IRQFD,
+ KVM_WRITE_ISPENDR,
+ KVM_WRITE_ISACTIVER,
+} kvm_inject_cmd;
+
+struct kvm_inject_args {
+ kvm_inject_cmd cmd;
+ uint32_t first_intid;
+ uint32_t num;
+ int level;
+ bool expect_failure;
+};
+
+/* Used on the guest side to perform the hypercall. */
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure);
+
+/* Used on the host side to get the hypercall info. */
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args);
+
+#define _KVM_INJECT_MULTI(cmd, intid, num, expect_failure) \
+ kvm_inject_call(cmd, intid, num, -1 /* not used */, expect_failure)
+
+#define KVM_INJECT_MULTI(cmd, intid, num) \
+ _KVM_INJECT_MULTI(cmd, intid, num, false)
+
+#define _KVM_INJECT(cmd, intid, expect_failure) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, expect_failure)
+
+#define KVM_INJECT(cmd, intid) \
+ _KVM_INJECT_MULTI(cmd, intid, 1, false)
+
+#define KVM_ACTIVATE(cmd, intid) \
+ kvm_inject_call(cmd, intid, 1, 1, false);
+
+struct kvm_inject_desc {
+ kvm_inject_cmd cmd;
+ /* can inject PPIs, PPIs, and/or SPIs. */
+ bool sgi, ppi, spi;
+};
+
+static struct kvm_inject_desc inject_edge_fns[] = {
+ /* sgi ppi spi */
+ { KVM_INJECT_EDGE_IRQ_LINE, false, false, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, true, false, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc inject_level_fns[] = {
+ /* sgi ppi spi */
+ { KVM_SET_IRQ_LINE_HIGH, false, true, true },
+ { KVM_SET_LEVEL_INFO_HIGH, false, true, true },
+ { KVM_INJECT_IRQFD, false, false, true },
+ { KVM_WRITE_ISPENDR, false, true, true },
+ { 0, },
+};
+
+static struct kvm_inject_desc set_active_fns[] = {
+ /* sgi ppi spi */
+ { KVM_WRITE_ISACTIVER, true, true, true },
+ { 0, },
+};
+
+#define for_each_inject_fn(t, f) \
+ for ((f) = (t); (f)->cmd; (f)++)
+
+#define for_each_supported_inject_fn(args, t, f) \
+ for_each_inject_fn(t, f) \
+ if ((args)->kvm_supports_irqfd || (f)->cmd != KVM_INJECT_IRQFD)
+
+#define for_each_supported_activate_fn(args, t, f) \
+ for_each_supported_inject_fn((args), (t), (f))
+
+/* Shared between the guest main thread and the IRQ handlers. */
+volatile uint64_t irq_handled;
+volatile uint32_t irqnr_received[MAX_SPI + 1];
+
+static void reset_stats(void)
+{
+ int i;
+
+ irq_handled = 0;
+ for (i = 0; i <= MAX_SPI; i++)
+ irqnr_received[i] = 0;
+}
+
+static uint64_t gic_read_ap1r0(void)
+{
+ uint64_t reg = read_sysreg_s(SYS_ICV_AP1R0_EL1);
+
+ dsb(sy);
+ return reg;
+}
+
+static void gic_write_ap1r0(uint64_t val)
+{
+ write_sysreg_s(val, SYS_ICV_AP1R0_EL1);
+ isb();
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level);
+
+static void guest_irq_generic_handler(bool eoi_split, bool level_sensitive)
+{
+ uint32_t intid = gic_get_and_ack_irq();
+
+ if (intid == IAR_SPURIOUS)
+ return;
+
+ GUEST_ASSERT(gic_irq_get_active(intid));
+
+ if (!level_sensitive)
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+
+ if (level_sensitive)
+ guest_set_irq_line(intid, 0);
+
+ GUEST_ASSERT(intid < MAX_SPI);
+ irqnr_received[intid] += 1;
+ irq_handled += 1;
+
+ gic_set_eoi(intid);
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ if (eoi_split)
+ gic_set_dir(intid);
+
+ GUEST_ASSERT(!gic_irq_get_active(intid));
+ GUEST_ASSERT(!gic_irq_get_pending(intid));
+}
+
+static void kvm_inject_call(kvm_inject_cmd cmd, uint32_t first_intid,
+ uint32_t num, int level, bool expect_failure)
+{
+ struct kvm_inject_args args = {
+ .cmd = cmd,
+ .first_intid = first_intid,
+ .num = num,
+ .level = level,
+ .expect_failure = expect_failure,
+ };
+ GUEST_SYNC(&args);
+}
+
+#define GUEST_ASSERT_IAR_EMPTY() \
+do { \
+ uint32_t _intid; \
+ _intid = gic_get_and_ack_irq(); \
+ GUEST_ASSERT(_intid == 0 || _intid == IAR_SPURIOUS); \
+} while (0)
+
+#define CAT_HELPER(a, b) a ## b
+#define CAT(a, b) CAT_HELPER(a, b)
+#define PREFIX guest_irq_handler_
+#define GUEST_IRQ_HANDLER_NAME(split, lev) CAT(PREFIX, CAT(split, lev))
+#define GENERATE_GUEST_IRQ_HANDLER(split, lev) \
+static void CAT(PREFIX, CAT(split, lev))(struct ex_regs *regs) \
+{ \
+ guest_irq_generic_handler(split, lev); \
+}
+
+GENERATE_GUEST_IRQ_HANDLER(0, 0);
+GENERATE_GUEST_IRQ_HANDLER(0, 1);
+GENERATE_GUEST_IRQ_HANDLER(1, 0);
+GENERATE_GUEST_IRQ_HANDLER(1, 1);
+
+static void (*guest_irq_handlers[2][2])(struct ex_regs *) = {
+ {GUEST_IRQ_HANDLER_NAME(0, 0), GUEST_IRQ_HANDLER_NAME(0, 1),},
+ {GUEST_IRQ_HANDLER_NAME(1, 0), GUEST_IRQ_HANDLER_NAME(1, 1),},
+};
+
+static void reset_priorities(struct test_args *args)
+{
+ int i;
+
+ for (i = 0; i < args->nr_irqs; i++)
+ gic_set_priority(i, IRQ_DEFAULT_PRIO_REG);
+}
+
+static void guest_set_irq_line(uint32_t intid, uint32_t level)
+{
+ kvm_inject_call(KVM_SET_IRQ_LINE, intid, 1, level, false);
+}
+
+static void test_inject_fail(struct test_args *args,
+ uint32_t intid, kvm_inject_cmd cmd)
+{
+ reset_stats();
+
+ _KVM_INJECT(cmd, intid, true);
+ /* no IRQ to handle on entry */
+
+ GUEST_ASSERT_EQ(irq_handled, 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+static void guest_inject(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t i;
+
+ reset_stats();
+
+ /* Cycle over all priorities to make things more interesting. */
+ for (i = first_intid; i < num + first_intid; i++)
+ gic_set_priority(i, (i % (KVM_NUM_PRIOS - 1)) << 3);
+
+ asm volatile("msr daifset, #2" : : : "memory");
+ KVM_INJECT_MULTI(cmd, first_intid, num);
+
+ while (irq_handled < num) {
+ asm volatile("wfi\n"
+ "msr daifclr, #2\n"
+ /* handle IRQ */
+ "msr daifset, #2\n"
+ : : : "memory");
+ }
+ asm volatile("msr daifclr, #2" : : : "memory");
+
+ GUEST_ASSERT_EQ(irq_handled, num);
+ for (i = first_intid; i < num + first_intid; i++)
+ GUEST_ASSERT_EQ(irqnr_received[i], 1);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+/*
+ * Restore the active state of multiple concurrent IRQs (given by
+ * concurrent_irqs). This does what a live-migration would do on the
+ * destination side assuming there are some active IRQs that were not
+ * deactivated yet.
+ */
+static void guest_restore_active(struct test_args *args,
+ uint32_t first_intid, uint32_t num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t prio, intid, ap1r;
+ int i;
+
+ /*
+ * Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * 8; i < num; i++, prio -= 8) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ /*
+ * In a real migration, KVM would restore all GIC state before running
+ * guest code.
+ */
+ for (i = 0; i < num; i++) {
+ intid = i + first_intid;
+ KVM_ACTIVATE(cmd, intid);
+ ap1r = gic_read_ap1r0();
+ ap1r |= 1U << i;
+ gic_write_ap1r0(ap1r);
+ }
+
+ /* This is where the "migration" would occur. */
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+}
+
+/*
+ * Polls the IAR until it's not a spurious interrupt.
+ *
+ * This function should only be used in test_inject_preemption (with IRQs
+ * masked).
+ */
+static uint32_t wait_for_and_activate_irq(void)
+{
+ uint32_t intid;
+
+ do {
+ asm volatile("wfi" : : : "memory");
+ intid = gic_get_and_ack_irq();
+ } while (intid == IAR_SPURIOUS);
+
+ return intid;
+}
+
+/*
+ * Inject multiple concurrent IRQs (num IRQs starting at first_intid) and
+ * handle them without handling the actual exceptions. This is done by masking
+ * interrupts for the whole test.
+ */
+static void test_inject_preemption(struct test_args *args,
+ uint32_t first_intid, int num,
+ kvm_inject_cmd cmd)
+{
+ uint32_t intid, prio, step = KVM_PRIO_STEPS;
+ int i;
+
+ /* Set the priorities of the first (KVM_NUM_PRIOS - 1) IRQs
+ * in descending order, so intid+1 can preempt intid.
+ */
+ for (i = 0, prio = (num - 1) * step; i < num; i++, prio -= step) {
+ GUEST_ASSERT(prio >= 0);
+ intid = i + first_intid;
+ gic_set_priority(intid, prio);
+ }
+
+ local_irq_disable();
+
+ for (i = 0; i < num; i++) {
+ uint32_t tmp;
+ intid = i + first_intid;
+ KVM_INJECT(cmd, intid);
+ /* Each successive IRQ will preempt the previous one. */
+ tmp = wait_for_and_activate_irq();
+ GUEST_ASSERT_EQ(tmp, intid);
+ if (args->level_sensitive)
+ guest_set_irq_line(intid, 0);
+ }
+
+ /* finish handling the IRQs starting with the highest priority one. */
+ for (i = 0; i < num; i++) {
+ intid = num - i - 1 + first_intid;
+ gic_set_eoi(intid);
+ if (args->eoi_split)
+ gic_set_dir(intid);
+ }
+
+ local_irq_enable();
+
+ for (i = 0; i < num; i++)
+ GUEST_ASSERT(!gic_irq_get_active(i + first_intid));
+ GUEST_ASSERT_EQ(gic_read_ap1r0(), 0);
+ GUEST_ASSERT_IAR_EMPTY();
+
+ reset_priorities(args);
+}
+
+static void test_injection(struct test_args *args, struct kvm_inject_desc *f)
+{
+ uint32_t nr_irqs = args->nr_irqs;
+
+ if (f->sgi) {
+ guest_inject(args, MIN_SGI, 1, f->cmd);
+ guest_inject(args, 0, 16, f->cmd);
+ }
+
+ if (f->ppi)
+ guest_inject(args, MIN_PPI, 1, f->cmd);
+
+ if (f->spi) {
+ guest_inject(args, MIN_SPI, 1, f->cmd);
+ guest_inject(args, nr_irqs - 1, 1, f->cmd);
+ guest_inject(args, MIN_SPI, nr_irqs - MIN_SPI, f->cmd);
+ }
+}
+
+static void test_injection_failure(struct test_args *args,
+ struct kvm_inject_desc *f)
+{
+ uint32_t bad_intid[] = { args->nr_irqs, 1020, 1024, 1120, 5120, ~0U, };
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(bad_intid); i++)
+ test_inject_fail(args, bad_intid[i], f->cmd);
+}
+
+static void test_preemption(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /*
+ * Test up to 4 levels of preemption. The reason is that KVM doesn't
+ * currently implement the ability to have more than the number-of-LRs
+ * number of concurrently active IRQs. The number of LRs implemented is
+ * IMPLEMENTATION DEFINED, however, it seems that most implement 4.
+ */
+ if (f->sgi)
+ test_inject_preemption(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ test_inject_preemption(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ test_inject_preemption(args, MIN_SPI, 4, f->cmd);
+}
+
+static void test_restore_active(struct test_args *args, struct kvm_inject_desc *f)
+{
+ /* Test up to 4 active IRQs. Same reason as in test_preemption. */
+ if (f->sgi)
+ guest_restore_active(args, MIN_SGI, 4, f->cmd);
+
+ if (f->ppi)
+ guest_restore_active(args, MIN_PPI, 4, f->cmd);
+
+ if (f->spi)
+ guest_restore_active(args, MIN_SPI, 4, f->cmd);
+}
+
+static void guest_code(struct test_args *args)
+{
+ uint32_t i, nr_irqs = args->nr_irqs;
+ bool level_sensitive = args->level_sensitive;
+ struct kvm_inject_desc *f, *inject_fns;
+
+ gic_init(GIC_V3, 1, dist, redist);
+
+ for (i = 0; i < nr_irqs; i++)
+ gic_irq_enable(i);
+
+ for (i = MIN_SPI; i < nr_irqs; i++)
+ gic_irq_set_config(i, !level_sensitive);
+
+ gic_set_eoi_split(args->eoi_split);
+
+ reset_priorities(args);
+ gic_set_priority_mask(CPU_PRIO_MASK);
+
+ inject_fns = level_sensitive ? inject_level_fns
+ : inject_edge_fns;
+
+ local_irq_enable();
+
+ /* Start the tests. */
+ for_each_supported_inject_fn(args, inject_fns, f) {
+ test_injection(args, f);
+ test_preemption(args, f);
+ test_injection_failure(args, f);
+ }
+
+ /*
+ * Restore the active state of IRQs. This would happen when live
+ * migrating IRQs in the middle of being handled.
+ */
+ for_each_supported_activate_fn(args, set_active_fns, f)
+ test_restore_active(args, f);
+
+ GUEST_DONE();
+}
+
+static void kvm_irq_line_check(struct kvm_vm *vm, uint32_t intid, int level,
+ struct test_args *test_args, bool expect_failure)
+{
+ int ret;
+
+ if (!expect_failure) {
+ kvm_arm_irq_line(vm, intid, level);
+ } else {
+ /* The interface doesn't allow larger intid's. */
+ if (intid > KVM_ARM_IRQ_NUM_MASK)
+ return;
+
+ ret = _kvm_arm_irq_line(vm, intid, level);
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause KVM_IRQ_LINE "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ }
+}
+
+void kvm_irq_set_level_info_check(int gic_fd, uint32_t intid, int level,
+ bool expect_failure)
+{
+ if (!expect_failure) {
+ kvm_irq_set_level_info(gic_fd, intid, level);
+ } else {
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+ /*
+ * The kernel silently fails for invalid SPIs and SGIs (which
+ * are not level-sensitive). It only checks for intid to not
+ * spill over 1U << 10 (the max reserved SPI). Also, callers
+ * are supposed to mask the intid with 0x3ff (1023).
+ */
+ if (intid > VGIC_MAX_RESERVED)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %i did not cause VGIC_GRP_LEVEL_INFO "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(!ret, "KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_set_gsi_routing_irqchip_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ struct kvm_irq_routing *routing;
+ int ret;
+ uint64_t i;
+
+ assert(num <= kvm_max_routes && kvm_max_routes <= KVM_MAX_IRQ_ROUTES);
+
+ routing = kvm_gsi_routing_create();
+ for (i = intid; i < (uint64_t)intid + num; i++)
+ kvm_gsi_routing_irqchip_add(routing, i - MIN_SPI, i - MIN_SPI);
+
+ if (!expect_failure) {
+ kvm_gsi_routing_write(vm, routing);
+ } else {
+ ret = _kvm_gsi_routing_write(vm, routing);
+ /* The kernel only checks e->irqchip.pin >= KVM_IRQCHIP_NUM_PINS */
+ if (((uint64_t)intid + num - 1 - MIN_SPI) >= KVM_IRQCHIP_NUM_PINS)
+ TEST_ASSERT(ret != 0 && errno == EINVAL,
+ "Bad intid %u did not cause KVM_SET_GSI_ROUTING "
+ "error: rc: %i errno: %i", intid, ret, errno);
+ else
+ TEST_ASSERT(ret == 0, "KVM_SET_GSI_ROUTING "
+ "for intid %i failed, rc: %i errno: %i",
+ intid, ret, errno);
+ }
+}
+
+static void kvm_irq_write_ispendr_check(int gic_fd, uint32_t intid,
+ struct kvm_vcpu *vcpu,
+ bool expect_failure)
+{
+ /*
+ * Ignore this when expecting failure as invalid intids will lead to
+ * either trying to inject SGIs when we configured the test to be
+ * level_sensitive (or the reverse), or inject large intids which
+ * will lead to writing above the ISPENDR register space (and we
+ * don't want to do that either).
+ */
+ if (!expect_failure)
+ kvm_irq_write_ispendr(gic_fd, intid, vcpu);
+}
+
+static void kvm_routing_and_irqfd_check(struct kvm_vm *vm,
+ uint32_t intid, uint32_t num, uint32_t kvm_max_routes,
+ bool expect_failure)
+{
+ int fd[MAX_SPI];
+ uint64_t val;
+ int ret, f;
+ uint64_t i;
+
+ /*
+ * There is no way to try injecting an SGI or PPI as the interface
+ * starts counting from the first SPI (above the private ones), so just
+ * exit.
+ */
+ if (INTID_IS_SGI(intid) || INTID_IS_PPI(intid))
+ return;
+
+ kvm_set_gsi_routing_irqchip_check(vm, intid, num,
+ kvm_max_routes, expect_failure);
+
+ /*
+ * If expect_failure, then just to inject anyway. These
+ * will silently fail. And in any case, the guest will check
+ * that no actual interrupt was injected for those cases.
+ */
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ fd[f] = eventfd(0, 0);
+ TEST_ASSERT(fd[f] != -1, __KVM_SYSCALL_ERROR("eventfd()", fd[f]));
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ struct kvm_irqfd irqfd = {
+ .fd = fd[f],
+ .gsi = i - MIN_SPI,
+ };
+ assert(i <= (uint64_t)UINT_MAX);
+ vm_ioctl(vm, KVM_IRQFD, &irqfd);
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++) {
+ val = 1;
+ ret = write(fd[f], &val, sizeof(uint64_t));
+ TEST_ASSERT(ret == sizeof(uint64_t),
+ __KVM_SYSCALL_ERROR("write()", ret));
+ }
+
+ for (f = 0, i = intid; i < (uint64_t)intid + num; i++, f++)
+ close(fd[f]);
+}
+
+/* handles the valid case: intid=0xffffffff num=1 */
+#define for_each_intid(first, num, tmp, i) \
+ for ((tmp) = (i) = (first); \
+ (tmp) < (uint64_t)(first) + (uint64_t)(num); \
+ (tmp)++, (i)++)
+
+static void run_guest_cmd(struct kvm_vcpu *vcpu, int gic_fd,
+ struct kvm_inject_args *inject_args,
+ struct test_args *test_args)
+{
+ kvm_inject_cmd cmd = inject_args->cmd;
+ uint32_t intid = inject_args->first_intid;
+ uint32_t num = inject_args->num;
+ int level = inject_args->level;
+ bool expect_failure = inject_args->expect_failure;
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t tmp;
+ uint32_t i;
+
+ /* handles the valid case: intid=0xffffffff num=1 */
+ assert(intid < UINT_MAX - num || num == 1);
+
+ switch (cmd) {
+ case KVM_INJECT_EDGE_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 0, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, level, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_IRQ_LINE_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_line_check(vm, i, 1, test_args,
+ expect_failure);
+ break;
+ case KVM_SET_LEVEL_INFO_HIGH:
+ for_each_intid(intid, num, tmp, i)
+ kvm_irq_set_level_info_check(gic_fd, i, 1,
+ expect_failure);
+ break;
+ case KVM_INJECT_IRQFD:
+ kvm_routing_and_irqfd_check(vm, intid, num,
+ test_args->kvm_max_routes,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISPENDR:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_ispendr_check(gic_fd, i, vcpu,
+ expect_failure);
+ break;
+ case KVM_WRITE_ISACTIVER:
+ for (i = intid; i < intid + num; i++)
+ kvm_irq_write_isactiver(gic_fd, i, vcpu);
+ break;
+ default:
+ break;
+ }
+}
+
+static void kvm_inject_get_call(struct kvm_vm *vm, struct ucall *uc,
+ struct kvm_inject_args *args)
+{
+ struct kvm_inject_args *kvm_args_hva;
+ vm_vaddr_t kvm_args_gva;
+
+ kvm_args_gva = uc->args[1];
+ kvm_args_hva = (struct kvm_inject_args *)addr_gva2hva(vm, kvm_args_gva);
+ memcpy(args, kvm_args_hva, sizeof(struct kvm_inject_args));
+}
+
+static void print_args(struct test_args *args)
+{
+ printf("nr-irqs=%d level-sensitive=%d eoi-split=%d\n",
+ args->nr_irqs, args->level_sensitive,
+ args->eoi_split);
+}
+
+static void test_vgic(uint32_t nr_irqs, bool level_sensitive, bool eoi_split)
+{
+ struct ucall uc;
+ int gic_fd;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_inject_args inject_args;
+ vm_vaddr_t args_gva;
+
+ struct test_args args = {
+ .nr_irqs = nr_irqs,
+ .level_sensitive = level_sensitive,
+ .eoi_split = eoi_split,
+ .kvm_max_routes = kvm_check_cap(KVM_CAP_IRQ_ROUTING),
+ .kvm_supports_irqfd = kvm_check_cap(KVM_CAP_IRQFD),
+ };
+
+ print_args(&args);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* Setup the guest args page (so it gets the args). */
+ args_gva = vm_vaddr_alloc_page(vm);
+ memcpy(addr_gva2hva(vm, args_gva), &args, sizeof(args));
+ vcpu_args_set(vcpu, 1, args_gva);
+
+ gic_fd = vgic_v3_setup(vm, 1, nr_irqs,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+ __TEST_REQUIRE(gic_fd >= 0, "Failed to create vgic-v3, skipping");
+
+ vm_install_exception_handler(vm, VECTOR_IRQ_CURRENT,
+ guest_irq_handlers[args.eoi_split][args.level_sensitive]);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ kvm_inject_get_call(vm, &uc, &inject_args);
+ run_guest_cmd(vcpu, gic_fd, &inject_args, &args);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ close(gic_fd);
+ kvm_vm_free(vm);
+}
+
+static void help(const char *name)
+{
+ printf(
+ "\n"
+ "usage: %s [-n num_irqs] [-e eoi_split] [-l level_sensitive]\n", name);
+ printf(" -n: specify number of IRQs to setup the vgic with. "
+ "It has to be a multiple of 32 and between 64 and 1024.\n");
+ printf(" -e: if 1 then EOI is split into a write to DIR on top "
+ "of writing EOI.\n");
+ printf(" -l: specify whether the IRQs are level-sensitive (1) or not (0).");
+ puts("");
+ exit(1);
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t nr_irqs = 64;
+ bool default_args = true;
+ bool level_sensitive = false;
+ int opt;
+ bool eoi_split = false;
+
+ while ((opt = getopt(argc, argv, "hn:e:l:")) != -1) {
+ switch (opt) {
+ case 'n':
+ nr_irqs = atoi_non_negative("Number of IRQs", optarg);
+ if (nr_irqs > 1024 || nr_irqs % 32)
+ help(argv[0]);
+ break;
+ case 'e':
+ eoi_split = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'l':
+ level_sensitive = (bool)atoi_paranoid(optarg);
+ default_args = false;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ /*
+ * If the user just specified nr_irqs and/or gic_version, then run all
+ * combinations.
+ */
+ if (default_args) {
+ test_vgic(nr_irqs, false /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, false /* level */, true /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, false /* eoi_split */);
+ test_vgic(nr_irqs, true /* level */, true /* eoi_split */);
+ } else {
+ test_vgic(nr_irqs, level_sensitive, eoi_split);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
new file mode 100644
index 000000000000..f2fb0e3f14bc
--- /dev/null
+++ b/tools/testing/selftests/kvm/aarch64/vpmu_counter_access.c
@@ -0,0 +1,653 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vpmu_counter_access - Test vPMU event counter access
+ *
+ * Copyright (c) 2023 Google LLC.
+ *
+ * This test checks if the guest can see the same number of the PMU event
+ * counters (PMCR_EL0.N) that userspace sets, if the guest can access
+ * those counters, and if the guest is prevented from accessing any
+ * other counters.
+ * It also checks if the userspace accesses to the PMU regsisters honor the
+ * PMCR.N value that's set for the guest.
+ * This test runs only when KVM_CAP_ARM_PMU_V3 is supported on the host.
+ */
+#include <kvm_util.h>
+#include <processor.h>
+#include <test_util.h>
+#include <vgic.h>
+#include <perf/arm_pmuv3.h>
+#include <linux/bitfield.h>
+
+/* The max number of the PMU event counters (excluding the cycle counter) */
+#define ARMV8_PMU_MAX_GENERAL_COUNTERS (ARMV8_PMU_MAX_COUNTERS - 1)
+
+/* The cycle counter bit position that's common among the PMU registers */
+#define ARMV8_PMU_CYCLE_IDX 31
+
+struct vpmu_vm {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ int gic_fd;
+};
+
+static struct vpmu_vm vpmu_vm;
+
+struct pmreg_sets {
+ uint64_t set_reg_id;
+ uint64_t clr_reg_id;
+};
+
+#define PMREG_SET(set, clr) {.set_reg_id = set, .clr_reg_id = clr}
+
+static uint64_t get_pmcr_n(uint64_t pmcr)
+{
+ return FIELD_GET(ARMV8_PMU_PMCR_N, pmcr);
+}
+
+static void set_pmcr_n(uint64_t *pmcr, uint64_t pmcr_n)
+{
+ u64p_replace_bits((__u64 *) pmcr, pmcr_n, ARMV8_PMU_PMCR_N);
+}
+
+static uint64_t get_counters_mask(uint64_t n)
+{
+ uint64_t mask = BIT(ARMV8_PMU_CYCLE_IDX);
+
+ if (n)
+ mask |= GENMASK(n - 1, 0);
+ return mask;
+}
+
+/* Read PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline unsigned long read_sel_evcntr(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevcntr_el0);
+}
+
+/* Write PMEVTCNTR<n>_EL0 through PMXEVCNTR_EL0 */
+static inline void write_sel_evcntr(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevcntr_el0);
+ isb();
+}
+
+/* Read PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline unsigned long read_sel_evtyper(int sel)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ return read_sysreg(pmxevtyper_el0);
+}
+
+/* Write PMEVTYPER<n>_EL0 through PMXEVTYPER_EL0 */
+static inline void write_sel_evtyper(int sel, unsigned long val)
+{
+ write_sysreg(sel, pmselr_el0);
+ isb();
+ write_sysreg(val, pmxevtyper_el0);
+ isb();
+}
+
+static void pmu_disable_reset(void)
+{
+ uint64_t pmcr = read_sysreg(pmcr_el0);
+
+ /* Reset all counters, disabling them */
+ pmcr &= ~ARMV8_PMU_PMCR_E;
+ write_sysreg(pmcr | ARMV8_PMU_PMCR_P, pmcr_el0);
+ isb();
+}
+
+#define RETURN_READ_PMEVCNTRN(n) \
+ return read_sysreg(pmevcntr##n##_el0)
+static unsigned long read_pmevcntrn(int n)
+{
+ PMEVN_SWITCH(n, RETURN_READ_PMEVCNTRN);
+ return 0;
+}
+
+#define WRITE_PMEVCNTRN(n) \
+ write_sysreg(val, pmevcntr##n##_el0)
+static void write_pmevcntrn(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVCNTRN);
+ isb();
+}
+
+#define READ_PMEVTYPERN(n) \
+ return read_sysreg(pmevtyper##n##_el0)
+static unsigned long read_pmevtypern(int n)
+{
+ PMEVN_SWITCH(n, READ_PMEVTYPERN);
+ return 0;
+}
+
+#define WRITE_PMEVTYPERN(n) \
+ write_sysreg(val, pmevtyper##n##_el0)
+static void write_pmevtypern(int n, unsigned long val)
+{
+ PMEVN_SWITCH(n, WRITE_PMEVTYPERN);
+ isb();
+}
+
+/*
+ * The pmc_accessor structure has pointers to PMEV{CNTR,TYPER}<n>_EL0
+ * accessors that test cases will use. Each of the accessors will
+ * either directly reads/writes PMEV{CNTR,TYPER}<n>_EL0
+ * (i.e. {read,write}_pmev{cnt,type}rn()), or reads/writes them through
+ * PMXEV{CNTR,TYPER}_EL0 (i.e. {read,write}_sel_ev{cnt,type}r()).
+ *
+ * This is used to test that combinations of those accessors provide
+ * the consistent behavior.
+ */
+struct pmc_accessor {
+ /* A function to be used to read PMEVTCNTR<n>_EL0 */
+ unsigned long (*read_cntr)(int idx);
+ /* A function to be used to write PMEVTCNTR<n>_EL0 */
+ void (*write_cntr)(int idx, unsigned long val);
+ /* A function to be used to read PMEVTYPER<n>_EL0 */
+ unsigned long (*read_typer)(int idx);
+ /* A function to be used to write PMEVTYPER<n>_EL0 */
+ void (*write_typer)(int idx, unsigned long val);
+};
+
+struct pmc_accessor pmc_accessors[] = {
+ /* test with all direct accesses */
+ { read_pmevcntrn, write_pmevcntrn, read_pmevtypern, write_pmevtypern },
+ /* test with all indirect accesses */
+ { read_sel_evcntr, write_sel_evcntr, read_sel_evtyper, write_sel_evtyper },
+ /* read with direct accesses, and write with indirect accesses */
+ { read_pmevcntrn, write_sel_evcntr, read_pmevtypern, write_sel_evtyper },
+ /* read with indirect accesses, and write with direct accesses */
+ { read_sel_evcntr, write_pmevcntrn, read_sel_evtyper, write_pmevtypern },
+};
+
+/*
+ * Convert a pointer of pmc_accessor to an index in pmc_accessors[],
+ * assuming that the pointer is one of the entries in pmc_accessors[].
+ */
+#define PMC_ACC_TO_IDX(acc) (acc - &pmc_accessors[0])
+
+#define GUEST_ASSERT_BITMAP_REG(regname, mask, set_expected) \
+{ \
+ uint64_t _tval = read_sysreg(regname); \
+ \
+ if (set_expected) \
+ __GUEST_ASSERT((_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+ else \
+ __GUEST_ASSERT(!(_tval & mask), \
+ "tval: 0x%lx; mask: 0x%lx; set_expected: %u", \
+ _tval, mask, set_expected); \
+}
+
+/*
+ * Check if @mask bits in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers
+ * are set or cleared as specified in @set_expected.
+ */
+static void check_bitmap_pmu_regs(uint64_t mask, bool set_expected)
+{
+ GUEST_ASSERT_BITMAP_REG(pmcntenset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmcntenclr_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenset_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmintenclr_el1, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsset_el0, mask, set_expected);
+ GUEST_ASSERT_BITMAP_REG(pmovsclr_el0, mask, set_expected);
+}
+
+/*
+ * Check if the bit in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers corresponding
+ * to the specified counter (@pmc_idx) can be read/written as expected.
+ * When @set_op is true, it tries to set the bit for the counter in
+ * those registers by writing the SET registers (the bit won't be set
+ * if the counter is not implemented though).
+ * Otherwise, it tries to clear the bits in the registers by writing
+ * the CLR registers.
+ * Then, it checks if the values indicated in the registers are as expected.
+ */
+static void test_bitmap_pmu_regs(int pmc_idx, bool set_op)
+{
+ uint64_t pmcr_n, test_bit = BIT(pmc_idx);
+ bool set_expected = false;
+
+ if (set_op) {
+ write_sysreg(test_bit, pmcntenset_el0);
+ write_sysreg(test_bit, pmintenset_el1);
+ write_sysreg(test_bit, pmovsset_el0);
+
+ /* The bit will be set only if the counter is implemented */
+ pmcr_n = get_pmcr_n(read_sysreg(pmcr_el0));
+ set_expected = (pmc_idx < pmcr_n) ? true : false;
+ } else {
+ write_sysreg(test_bit, pmcntenclr_el0);
+ write_sysreg(test_bit, pmintenclr_el1);
+ write_sysreg(test_bit, pmovsclr_el0);
+ }
+ check_bitmap_pmu_regs(test_bit, set_expected);
+}
+
+/*
+ * Tests for reading/writing registers for the (implemented) event counter
+ * specified by @pmc_idx.
+ */
+static void test_access_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ uint64_t write_data, read_data;
+
+ /* Disable all PMCs and reset all PMCs to zero. */
+ pmu_disable_reset();
+
+ /*
+ * Tests for reading/writing {PMCNTEN,PMINTEN,PMOVS}{SET,CLR}_EL1.
+ */
+
+ /* Make sure that the bit in those registers are set to 0 */
+ test_bitmap_pmu_regs(pmc_idx, false);
+ /* Test if setting the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, true);
+ /* Test if clearing the bit in those registers works */
+ test_bitmap_pmu_regs(pmc_idx, false);
+
+ /*
+ * Tests for reading/writing the event type register.
+ */
+
+ /*
+ * Set the event type register to an arbitrary value just for testing
+ * of reading/writing the register.
+ * Arm ARM says that for the event from 0x0000 to 0x003F,
+ * the value indicated in the PMEVTYPER<n>_EL0.evtCount field is
+ * the value written to the field even when the specified event
+ * is not supported.
+ */
+ write_data = (ARMV8_PMU_EXCLUDE_EL1 | ARMV8_PMUV3_PERFCTR_INST_RETIRED);
+ acc->write_typer(pmc_idx, write_data);
+ read_data = acc->read_typer(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+
+ /*
+ * Tests for reading/writing the event count register.
+ */
+
+ read_data = acc->read_cntr(pmc_idx);
+
+ /* The count value must be 0, as it is disabled and reset */
+ __GUEST_ASSERT(read_data == 0,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data);
+
+ write_data = read_data + pmc_idx + 0x12345;
+ acc->write_cntr(pmc_idx, write_data);
+ read_data = acc->read_cntr(pmc_idx);
+ __GUEST_ASSERT(read_data == write_data,
+ "pmc_idx: 0x%x; acc_idx: 0x%lx; read_data: 0x%lx; write_data: 0x%lx",
+ pmc_idx, PMC_ACC_TO_IDX(acc), read_data, write_data);
+}
+
+#define INVALID_EC (-1ul)
+uint64_t expected_ec = INVALID_EC;
+
+static void guest_sync_handler(struct ex_regs *regs)
+{
+ uint64_t esr, ec;
+
+ esr = read_sysreg(esr_el1);
+ ec = (esr >> ESR_EC_SHIFT) & ESR_EC_MASK;
+
+ __GUEST_ASSERT(expected_ec == ec,
+ "PC: 0x%lx; ESR: 0x%lx; EC: 0x%lx; EC expected: 0x%lx",
+ regs->pc, esr, ec, expected_ec);
+
+ /* skip the trapping instruction */
+ regs->pc += 4;
+
+ /* Use INVALID_EC to indicate an exception occurred */
+ expected_ec = INVALID_EC;
+}
+
+/*
+ * Run the given operation that should trigger an exception with the
+ * given exception class. The exception handler (guest_sync_handler)
+ * will reset op_end_addr to 0, expected_ec to INVALID_EC, and skip
+ * the instruction that trapped.
+ */
+#define TEST_EXCEPTION(ec, ops) \
+({ \
+ GUEST_ASSERT(ec != INVALID_EC); \
+ WRITE_ONCE(expected_ec, ec); \
+ dsb(ish); \
+ ops; \
+ GUEST_ASSERT(expected_ec == INVALID_EC); \
+})
+
+/*
+ * Tests for reading/writing registers for the unimplemented event counter
+ * specified by @pmc_idx (>= PMCR_EL0.N).
+ */
+static void test_access_invalid_pmc_regs(struct pmc_accessor *acc, int pmc_idx)
+{
+ /*
+ * Reading/writing the event count/type registers should cause
+ * an UNDEFINED exception.
+ */
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_cntr(pmc_idx));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_cntr(pmc_idx, 0));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->read_typer(pmc_idx));
+ TEST_EXCEPTION(ESR_EC_UNKNOWN, acc->write_typer(pmc_idx, 0));
+ /*
+ * The bit corresponding to the (unimplemented) counter in
+ * {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers should be RAZ.
+ */
+ test_bitmap_pmu_regs(pmc_idx, 1);
+ test_bitmap_pmu_regs(pmc_idx, 0);
+}
+
+/*
+ * The guest is configured with PMUv3 with @expected_pmcr_n number of
+ * event counters.
+ * Check if @expected_pmcr_n is consistent with PMCR_EL0.N, and
+ * if reading/writing PMU registers for implemented or unimplemented
+ * counters works as expected.
+ */
+static void guest_code(uint64_t expected_pmcr_n)
+{
+ uint64_t pmcr, pmcr_n, unimp_mask;
+ int i, pmc;
+
+ __GUEST_ASSERT(expected_pmcr_n <= ARMV8_PMU_MAX_GENERAL_COUNTERS,
+ "Expected PMCR.N: 0x%lx; ARMv8 general counters: 0x%x",
+ expected_pmcr_n, ARMV8_PMU_MAX_GENERAL_COUNTERS);
+
+ pmcr = read_sysreg(pmcr_el0);
+ pmcr_n = get_pmcr_n(pmcr);
+
+ /* Make sure that PMCR_EL0.N indicates the value userspace set */
+ __GUEST_ASSERT(pmcr_n == expected_pmcr_n,
+ "Expected PMCR.N: 0x%lx, PMCR.N: 0x%lx",
+ expected_pmcr_n, pmcr_n);
+
+ /*
+ * Make sure that (RAZ) bits corresponding to unimplemented event
+ * counters in {PMCNTEN,PMINTEN,PMOVS}{SET,CLR} registers are reset
+ * to zero.
+ * (NOTE: bits for implemented event counters are reset to UNKNOWN)
+ */
+ unimp_mask = GENMASK_ULL(ARMV8_PMU_MAX_GENERAL_COUNTERS - 1, pmcr_n);
+ check_bitmap_pmu_regs(unimp_mask, false);
+
+ /*
+ * Tests for reading/writing PMU registers for implemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = 0; pmc < pmcr_n; pmc++)
+ test_access_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ /*
+ * Tests for reading/writing PMU registers for unimplemented counters.
+ * Use each combination of PMEV{CNTR,TYPER}<n>_EL0 accessor functions.
+ */
+ for (i = 0; i < ARRAY_SIZE(pmc_accessors); i++) {
+ for (pmc = pmcr_n; pmc < ARMV8_PMU_MAX_GENERAL_COUNTERS; pmc++)
+ test_access_invalid_pmc_regs(&pmc_accessors[i], pmc);
+ }
+
+ GUEST_DONE();
+}
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+
+/* Create a VM that has one vCPU with PMUv3 configured. */
+static void create_vpmu_vm(void *guest_code)
+{
+ struct kvm_vcpu_init init;
+ uint8_t pmuver, ec;
+ uint64_t dfr0, irq = 23;
+ struct kvm_device_attr irq_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_IRQ,
+ .addr = (uint64_t)&irq,
+ };
+ struct kvm_device_attr init_attr = {
+ .group = KVM_ARM_VCPU_PMU_V3_CTRL,
+ .attr = KVM_ARM_VCPU_PMU_V3_INIT,
+ };
+
+ /* The test creates the vpmu_vm multiple times. Ensure a clean state */
+ memset(&vpmu_vm, 0, sizeof(vpmu_vm));
+
+ vpmu_vm.vm = vm_create(1);
+ vm_init_descriptor_tables(vpmu_vm.vm);
+ for (ec = 0; ec < ESR_EC_NUM; ec++) {
+ vm_install_sync_handler(vpmu_vm.vm, VECTOR_SYNC_CURRENT, ec,
+ guest_sync_handler);
+ }
+
+ /* Create vCPU with PMUv3 */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ vpmu_vm.vcpu = aarch64_vcpu_add(vpmu_vm.vm, 0, &init, guest_code);
+ vcpu_init_descriptor_tables(vpmu_vm.vcpu);
+ vpmu_vm.gic_fd = vgic_v3_setup(vpmu_vm.vm, 1, 64,
+ GICD_BASE_GPA, GICR_BASE_GPA);
+ __TEST_REQUIRE(vpmu_vm.gic_fd >= 0,
+ "Failed to create vgic-v3, skipping");
+
+ /* Make sure that PMUv3 support is indicated in the ID register */
+ vcpu_get_reg(vpmu_vm.vcpu,
+ KVM_ARM64_SYS_REG(SYS_ID_AA64DFR0_EL1), &dfr0);
+ pmuver = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64DFR0_EL1_PMUVer), dfr0);
+ TEST_ASSERT(pmuver != ID_AA64DFR0_EL1_PMUVer_IMP_DEF &&
+ pmuver >= ID_AA64DFR0_EL1_PMUVer_IMP,
+ "Unexpected PMUVER (0x%x) on the vCPU with PMUv3", pmuver);
+
+ /* Initialize vPMU */
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &irq_attr);
+ vcpu_ioctl(vpmu_vm.vcpu, KVM_SET_DEVICE_ATTR, &init_attr);
+}
+
+static void destroy_vpmu_vm(void)
+{
+ close(vpmu_vm.gic_fd);
+ kvm_vm_free(vpmu_vm.vm);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, uint64_t pmcr_n)
+{
+ struct ucall uc;
+
+ vcpu_args_set(vcpu, 1, pmcr_n);
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ break;
+ }
+}
+
+static void test_create_vpmu_vm_with_pmcr_n(uint64_t pmcr_n, bool expect_fail)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t pmcr, pmcr_orig;
+
+ create_vpmu_vm(guest_code);
+ vcpu = vpmu_vm.vcpu;
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr_orig);
+ pmcr = pmcr_orig;
+
+ /*
+ * Setting a larger value of PMCR.N should not modify the field, and
+ * return a success.
+ */
+ set_pmcr_n(&pmcr, pmcr_n);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), pmcr);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+
+ if (expect_fail)
+ TEST_ASSERT(pmcr_orig == pmcr,
+ "PMCR.N modified by KVM to a larger value (PMCR: 0x%lx) for pmcr_n: 0x%lx",
+ pmcr, pmcr_n);
+ else
+ TEST_ASSERT(pmcr_n == get_pmcr_n(pmcr),
+ "Failed to update PMCR.N to %lu (received: %lu)",
+ pmcr_n, get_pmcr_n(pmcr));
+}
+
+/*
+ * Create a guest with one vCPU, set the PMCR_EL0.N for the vCPU to @pmcr_n,
+ * and run the test.
+ */
+static void run_access_test(uint64_t pmcr_n)
+{
+ uint64_t sp;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vcpu_init init;
+
+ pr_debug("Test with pmcr_n %lu\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ /* Save the initial sp to restore them later to run the guest again */
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(sp_el1), &sp);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ /*
+ * Reset and re-initialize the vCPU, and run the guest code again to
+ * check if PMCR_EL0.N is preserved.
+ */
+ vm_ioctl(vpmu_vm.vm, KVM_ARM_PREFERRED_TARGET, &init);
+ init.features[0] |= (1 << KVM_ARM_VCPU_PMU_V3);
+ aarch64_vcpu_setup(vcpu, &init);
+ vcpu_init_descriptor_tables(vcpu);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), sp);
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+
+ run_vcpu(vcpu, pmcr_n);
+
+ destroy_vpmu_vm();
+}
+
+static struct pmreg_sets validity_check_reg_sets[] = {
+ PMREG_SET(SYS_PMCNTENSET_EL0, SYS_PMCNTENCLR_EL0),
+ PMREG_SET(SYS_PMINTENSET_EL1, SYS_PMINTENCLR_EL1),
+ PMREG_SET(SYS_PMOVSSET_EL0, SYS_PMOVSCLR_EL0),
+};
+
+/*
+ * Create a VM, and check if KVM handles the userspace accesses of
+ * the PMU register sets in @validity_check_reg_sets[] correctly.
+ */
+static void run_pmregs_validity_test(uint64_t pmcr_n)
+{
+ int i;
+ struct kvm_vcpu *vcpu;
+ uint64_t set_reg_id, clr_reg_id, reg_val;
+ uint64_t valid_counters_mask, max_counters_mask;
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, false);
+ vcpu = vpmu_vm.vcpu;
+
+ valid_counters_mask = get_counters_mask(pmcr_n);
+ max_counters_mask = get_counters_mask(ARMV8_PMU_MAX_COUNTERS);
+
+ for (i = 0; i < ARRAY_SIZE(validity_check_reg_sets); i++) {
+ set_reg_id = validity_check_reg_sets[i].set_reg_id;
+ clr_reg_id = validity_check_reg_sets[i].clr_reg_id;
+
+ /*
+ * Test if the 'set' and 'clr' variants of the registers
+ * are initialized based on the number of valid counters.
+ */
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Initial read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+
+ /*
+ * Using the 'set' variant, force-set the register to the
+ * max number of possible counters and test if KVM discards
+ * the bits for unimplemented counters as it should.
+ */
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), max_counters_mask);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(set_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of set_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(set_reg_id), reg_val);
+
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(clr_reg_id), &reg_val);
+ TEST_ASSERT((reg_val & (~valid_counters_mask)) == 0,
+ "Read of clr_reg: 0x%llx has unimplemented counters enabled: 0x%lx",
+ KVM_ARM64_SYS_REG(clr_reg_id), reg_val);
+ }
+
+ destroy_vpmu_vm();
+}
+
+/*
+ * Create a guest with one vCPU, and attempt to set the PMCR_EL0.N for
+ * the vCPU to @pmcr_n, which is larger than the host value.
+ * The attempt should fail as @pmcr_n is too big to set for the vCPU.
+ */
+static void run_error_test(uint64_t pmcr_n)
+{
+ pr_debug("Error test with pmcr_n %lu (larger than the host)\n", pmcr_n);
+
+ test_create_vpmu_vm_with_pmcr_n(pmcr_n, true);
+ destroy_vpmu_vm();
+}
+
+/*
+ * Return the default number of implemented PMU event counters excluding
+ * the cycle counter (i.e. PMCR_EL0.N value) for the guest.
+ */
+static uint64_t get_pmcr_n_limit(void)
+{
+ uint64_t pmcr;
+
+ create_vpmu_vm(guest_code);
+ vcpu_get_reg(vpmu_vm.vcpu, KVM_ARM64_SYS_REG(SYS_PMCR_EL0), &pmcr);
+ destroy_vpmu_vm();
+ return get_pmcr_n(pmcr);
+}
+
+int main(void)
+{
+ uint64_t i, pmcr_n;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ARM_PMU_V3));
+
+ pmcr_n = get_pmcr_n_limit();
+ for (i = 0; i <= pmcr_n; i++) {
+ run_access_test(i);
+ run_pmregs_validity_test(i);
+ }
+
+ for (i = pmcr_n + 1; i < ARMV8_PMU_MAX_COUNTERS; i++)
+ run_error_test(i);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/access_tracking_perf_test.c b/tools/testing/selftests/kvm/access_tracking_perf_test.c
new file mode 100644
index 000000000000..3c7defd34f56
--- /dev/null
+++ b/tools/testing/selftests/kvm/access_tracking_perf_test.c
@@ -0,0 +1,394 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * access_tracking_perf_test
+ *
+ * Copyright (C) 2021, Google, Inc.
+ *
+ * This test measures the performance effects of KVM's access tracking.
+ * Access tracking is driven by the MMU notifiers test_young, clear_young, and
+ * clear_flush_young. These notifiers do not have a direct userspace API,
+ * however the clear_young notifier can be triggered by marking a pages as idle
+ * in /sys/kernel/mm/page_idle/bitmap. This test leverages that mechanism to
+ * enable access tracking on guest memory.
+ *
+ * To measure performance this test runs a VM with a configurable number of
+ * vCPUs that each touch every page in disjoint regions of memory. Performance
+ * is measured in the time it takes all vCPUs to finish touching their
+ * predefined region.
+ *
+ * Note that a deterministic correctness test of access tracking is not possible
+ * by using page_idle as it exists today. This is for a few reasons:
+ *
+ * 1. page_idle only issues clear_young notifiers, which lack a TLB flush. This
+ * means subsequent guest accesses are not guaranteed to see page table
+ * updates made by KVM until some time in the future.
+ *
+ * 2. page_idle only operates on LRU pages. Newly allocated pages are not
+ * immediately allocated to LRU lists. Instead they are held in a "pagevec",
+ * which is drained to LRU lists some time in the future. There is no
+ * userspace API to force this drain to occur.
+ *
+ * These limitations are worked around in this test by using a large enough
+ * region of memory for each vCPU such that the number of translations cached in
+ * the TLB and the number of pages held in pagevecs are a small fraction of the
+ * overall workload. And if either of those conditions are not true (for example
+ * in nesting, where TLB size is unlimited) this test will print a warning
+ * rather than silently passing.
+ */
+#include <inttypes.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "processor.h"
+
+/* Global variable used to synchronize all of the vCPU threads. */
+static int iteration;
+
+/* Defines what vCPU threads should do during a given iteration. */
+static enum {
+ /* Run the vCPU to access all its memory. */
+ ITERATION_ACCESS_MEMORY,
+ /* Mark the vCPU's memory idle in page_idle. */
+ ITERATION_MARK_IDLE,
+} iteration_work;
+
+/* The iteration that was last completed by each vCPU. */
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+/* Whether to overlap the regions of memory vCPUs access. */
+static bool overlap_memory_access;
+
+struct test_params {
+ /* The backing source for the region of memory. */
+ enum vm_mem_backing_src_type backing_src;
+
+ /* The amount of memory to allocate for each vCPU. */
+ uint64_t vcpu_memory_bytes;
+
+ /* The number of vCPUs to create in the VM. */
+ int nr_vcpus;
+};
+
+static uint64_t pread_uint64(int fd, const char *filename, uint64_t index)
+{
+ uint64_t value;
+ off_t offset = index * sizeof(value);
+
+ TEST_ASSERT(pread(fd, &value, sizeof(value), offset) == sizeof(value),
+ "pread from %s offset 0x%" PRIx64 " failed!",
+ filename, offset);
+
+ return value;
+
+}
+
+#define PAGEMAP_PRESENT (1ULL << 63)
+#define PAGEMAP_PFN_MASK ((1ULL << 55) - 1)
+
+static uint64_t lookup_pfn(int pagemap_fd, struct kvm_vm *vm, uint64_t gva)
+{
+ uint64_t hva = (uint64_t) addr_gva2hva(vm, gva);
+ uint64_t entry;
+ uint64_t pfn;
+
+ entry = pread_uint64(pagemap_fd, "pagemap", hva / getpagesize());
+ if (!(entry & PAGEMAP_PRESENT))
+ return 0;
+
+ pfn = entry & PAGEMAP_PFN_MASK;
+ __TEST_REQUIRE(pfn, "Looking up PFNs requires CAP_SYS_ADMIN");
+
+ return pfn;
+}
+
+static bool is_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = pread_uint64(page_idle_fd, "page_idle", pfn / 64);
+
+ return !!((bits >> (pfn % 64)) & 1);
+}
+
+static void mark_page_idle(int page_idle_fd, uint64_t pfn)
+{
+ uint64_t bits = 1ULL << (pfn % 64);
+
+ TEST_ASSERT(pwrite(page_idle_fd, &bits, 8, 8 * (pfn / 64)) == 8,
+ "Set page_idle bits for PFN 0x%" PRIx64, pfn);
+}
+
+static void mark_vcpu_memory_idle(struct kvm_vm *vm,
+ struct memstress_vcpu_args *vcpu_args)
+{
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ uint64_t base_gva = vcpu_args->gva;
+ uint64_t pages = vcpu_args->pages;
+ uint64_t page;
+ uint64_t still_idle = 0;
+ uint64_t no_pfn = 0;
+ int page_idle_fd;
+ int pagemap_fd;
+
+ /* If vCPUs are using an overlapping region, let vCPU 0 mark it idle. */
+ if (overlap_memory_access && vcpu_idx)
+ return;
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ TEST_ASSERT(page_idle_fd > 0, "Failed to open page_idle.");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ TEST_ASSERT(pagemap_fd > 0, "Failed to open pagemap.");
+
+ for (page = 0; page < pages; page++) {
+ uint64_t gva = base_gva + page * memstress_args.guest_page_size;
+ uint64_t pfn = lookup_pfn(pagemap_fd, vm, gva);
+
+ if (!pfn) {
+ no_pfn++;
+ continue;
+ }
+
+ if (is_page_idle(page_idle_fd, pfn)) {
+ still_idle++;
+ continue;
+ }
+
+ mark_page_idle(page_idle_fd, pfn);
+ }
+
+ /*
+ * Assumption: Less than 1% of pages are going to be swapped out from
+ * under us during this test.
+ */
+ TEST_ASSERT(no_pfn < pages / 100,
+ "vCPU %d: No PFN for %" PRIu64 " out of %" PRIu64 " pages.",
+ vcpu_idx, no_pfn, pages);
+
+ /*
+ * Check that at least 90% of memory has been marked idle (the rest
+ * might not be marked idle because the pages have not yet made it to an
+ * LRU list or the translations are still cached in the TLB). 90% is
+ * arbitrary; high enough that we ensure most memory access went through
+ * access tracking but low enough as to not make the test too brittle
+ * over time and across architectures.
+ *
+ * When running the guest as a nested VM, "warn" instead of asserting
+ * as the TLB size is effectively unlimited and the KVM doesn't
+ * explicitly flush the TLB when aging SPTEs. As a result, more pages
+ * are cached and the guest won't see the "idle" bit cleared.
+ */
+ if (still_idle >= pages / 10) {
+#ifdef __x86_64__
+ TEST_ASSERT(this_cpu_has(X86_FEATURE_HYPERVISOR),
+ "vCPU%d: Too many pages still idle (%lu out of %lu)",
+ vcpu_idx, still_idle, pages);
+#endif
+ printf("WARNING: vCPU%d: Too many pages still idle (%lu out of %lu), "
+ "this will affect performance results.\n",
+ vcpu_idx, still_idle, pages);
+ }
+
+ close(page_idle_fd);
+ close(pagemap_fd);
+}
+
+static void assert_ucall(struct kvm_vcpu *vcpu, uint64_t expected_ucall)
+{
+ struct ucall uc;
+ uint64_t actual_ucall = get_ucall(vcpu, &uc);
+
+ TEST_ASSERT(expected_ucall == actual_ucall,
+ "Guest exited unexpectedly (expected ucall %" PRIu64
+ ", got %" PRIu64 ")",
+ expected_ucall, actual_ucall);
+}
+
+static bool spin_wait_for_next_iteration(int *current_iteration)
+{
+ int last_iteration = *current_iteration;
+
+ do {
+ if (READ_ONCE(memstress_args.stop_vcpus))
+ return false;
+
+ *current_iteration = READ_ONCE(iteration);
+ } while (last_iteration == *current_iteration);
+
+ return true;
+}
+
+static void vcpu_thread_main(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ struct kvm_vm *vm = memstress_args.vm;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ int current_iteration = 0;
+
+ while (spin_wait_for_next_iteration(&current_iteration)) {
+ switch (READ_ONCE(iteration_work)) {
+ case ITERATION_ACCESS_MEMORY:
+ vcpu_run(vcpu);
+ assert_ucall(vcpu, UCALL_SYNC);
+ break;
+ case ITERATION_MARK_IDLE:
+ mark_vcpu_memory_idle(vm, vcpu_args);
+ break;
+ };
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+ }
+}
+
+static void spin_wait_for_vcpu(int vcpu_idx, int target_iteration)
+{
+ while (READ_ONCE(vcpu_last_completed_iteration[vcpu_idx]) !=
+ target_iteration) {
+ continue;
+ }
+}
+
+/* The type of memory accesses to perform in the VM. */
+enum access_type {
+ ACCESS_READ,
+ ACCESS_WRITE,
+};
+
+static void run_iteration(struct kvm_vm *vm, int nr_vcpus, const char *description)
+{
+ struct timespec ts_start;
+ struct timespec ts_elapsed;
+ int next_iteration, i;
+
+ /* Kick off the vCPUs by incrementing iteration. */
+ next_iteration = ++iteration;
+
+ clock_gettime(CLOCK_MONOTONIC, &ts_start);
+
+ /* Wait for all vCPUs to finish the iteration. */
+ for (i = 0; i < nr_vcpus; i++)
+ spin_wait_for_vcpu(i, next_iteration);
+
+ ts_elapsed = timespec_elapsed(ts_start);
+ pr_info("%-30s: %ld.%09lds\n",
+ description, ts_elapsed.tv_sec, ts_elapsed.tv_nsec);
+}
+
+static void access_memory(struct kvm_vm *vm, int nr_vcpus,
+ enum access_type access, const char *description)
+{
+ memstress_set_write_percent(vm, (access == ACCESS_READ) ? 0 : 100);
+ iteration_work = ITERATION_ACCESS_MEMORY;
+ run_iteration(vm, nr_vcpus, description);
+}
+
+static void mark_memory_idle(struct kvm_vm *vm, int nr_vcpus)
+{
+ /*
+ * Even though this parallelizes the work across vCPUs, this is still a
+ * very slow operation because page_idle forces the test to mark one pfn
+ * at a time and the clear_young notifier serializes on the KVM MMU
+ * lock.
+ */
+ pr_debug("Marking VM memory idle (slow)...\n");
+ iteration_work = ITERATION_MARK_IDLE;
+ run_iteration(vm, nr_vcpus, "Mark memory idle");
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *params = arg;
+ struct kvm_vm *vm;
+ int nr_vcpus = params->nr_vcpus;
+
+ vm = memstress_create_vm(mode, nr_vcpus, params->vcpu_memory_bytes, 1,
+ params->backing_src, !overlap_memory_access);
+
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_thread_main);
+
+ pr_info("\n");
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Populating memory");
+
+ /* As a control, read and write to the populated memory first. */
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to populated memory");
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from populated memory");
+
+ /* Repeat on memory that has been marked as idle. */
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_WRITE, "Writing to idle memory");
+ mark_memory_idle(vm, nr_vcpus);
+ access_memory(vm, nr_vcpus, ACCESS_READ, "Reading from idle memory");
+
+ memstress_join_vcpu_threads(nr_vcpus);
+ memstress_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-b vcpu_bytes] [-v vcpus] [-o] [-s mem_type]\n",
+ name);
+ puts("");
+ printf(" -h: Display this help message.");
+ guest_modes_help();
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ backing_src_help("-s");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_params params = {
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .vcpu_memory_bytes = DEFAULT_PER_VCPU_MEM_SIZE,
+ .nr_vcpus = 1,
+ };
+ int page_idle_fd;
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:b:v:os:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ params.vcpu_memory_bytes = parse_size(optarg);
+ break;
+ case 'v':
+ params.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ break;
+ case 'o':
+ overlap_memory_access = true;
+ break;
+ case 's':
+ params.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ page_idle_fd = open("/sys/kernel/mm/page_idle/bitmap", O_RDWR);
+ __TEST_REQUIRE(page_idle_fd >= 0,
+ "CONFIG_IDLE_PAGE_TRACKING is not enabled");
+ close(page_idle_fd);
+
+ for_each_guest_mode(run_test, &params);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/arch_timer.c b/tools/testing/selftests/kvm/arch_timer.c
new file mode 100644
index 000000000000..ae1f1a6d8312
--- /dev/null
+++ b/tools/testing/selftests/kvm/arch_timer.c
@@ -0,0 +1,259 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the arch timer IRQ functionality
+ *
+ * The guest's main thread configures the timer interrupt and waits
+ * for it to fire, with a timeout equal to the timer period.
+ * It asserts that the timeout doesn't exceed the timer period plus
+ * a user configurable error margin(default to 100us)
+ *
+ * On the other hand, upon receipt of an interrupt, the guest's interrupt
+ * handler validates the interrupt by checking if the architectural state
+ * is in compliance with the specifications.
+ *
+ * The test provides command-line options to configure the timer's
+ * period (-p), number of vCPUs (-n), iterations per stage (-i) and timer
+ * interrupt arrival error margin (-e). To stress-test the timer stack
+ * even more, an option to migrate the vCPUs across pCPUs (-m), at a
+ * particular rate, is also provided.
+ *
+ * Copyright (c) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/sizes.h>
+#include <linux/bitmap.h>
+#include <sys/sysinfo.h>
+
+#include "timer_test.h"
+
+struct test_args test_args = {
+ .nr_vcpus = NR_VCPUS_DEF,
+ .nr_iter = NR_TEST_ITERS_DEF,
+ .timer_period_ms = TIMER_TEST_PERIOD_MS_DEF,
+ .migration_freq_ms = TIMER_TEST_MIGRATION_FREQ_MS,
+ .timer_err_margin_us = TIMER_TEST_ERR_MARGIN_US,
+ .reserved = 1,
+};
+
+struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+struct test_vcpu_shared_data vcpu_shared_data[KVM_MAX_VCPUS];
+
+static pthread_t pt_vcpu_run[KVM_MAX_VCPUS];
+static unsigned long *vcpu_done_map;
+static pthread_mutex_t vcpu_done_map_lock;
+
+static void *test_vcpu_run(void *arg)
+{
+ unsigned int vcpu_idx = (unsigned long)arg;
+ struct ucall uc;
+ struct kvm_vcpu *vcpu = vcpus[vcpu_idx];
+ struct kvm_vm *vm = vcpu->vm;
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[vcpu_idx];
+
+ vcpu_run(vcpu);
+
+ /* Currently, any exit from guest is an indication of completion */
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ __set_bit(vcpu_idx, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ sync_global_from_guest(vm, *shared_data);
+ fprintf(stderr, "Guest assert failed, vcpu %u; stage; %u; iter: %u\n",
+ vcpu_idx, shared_data->guest_stage, shared_data->nr_iter);
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Unexpected guest exit");
+ }
+
+ pr_info("PASS(vCPU-%d).\n", vcpu_idx);
+
+ return NULL;
+}
+
+static uint32_t test_get_pcpu(void)
+{
+ uint32_t pcpu;
+ unsigned int nproc_conf;
+ cpu_set_t online_cpuset;
+
+ nproc_conf = get_nprocs_conf();
+ sched_getaffinity(0, sizeof(cpu_set_t), &online_cpuset);
+
+ /* Randomly find an available pCPU to place a vCPU on */
+ do {
+ pcpu = rand() % nproc_conf;
+ } while (!CPU_ISSET(pcpu, &online_cpuset));
+
+ return pcpu;
+}
+
+static int test_migrate_vcpu(unsigned int vcpu_idx)
+{
+ int ret;
+ cpu_set_t cpuset;
+ uint32_t new_pcpu = test_get_pcpu();
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(new_pcpu, &cpuset);
+
+ pr_debug("Migrating vCPU: %u to pCPU: %u\n", vcpu_idx, new_pcpu);
+
+ ret = pthread_setaffinity_np(pt_vcpu_run[vcpu_idx],
+ sizeof(cpuset), &cpuset);
+
+ /* Allow the error where the vCPU thread is already finished */
+ TEST_ASSERT(ret == 0 || ret == ESRCH,
+ "Failed to migrate the vCPU:%u to pCPU: %u; ret: %d",
+ vcpu_idx, new_pcpu, ret);
+
+ return ret;
+}
+
+static void *test_vcpu_migration(void *arg)
+{
+ unsigned int i, n_done;
+ bool vcpu_done;
+
+ do {
+ usleep(msecs_to_usecs(test_args.migration_freq_ms));
+
+ for (n_done = 0, i = 0; i < test_args.nr_vcpus; i++) {
+ pthread_mutex_lock(&vcpu_done_map_lock);
+ vcpu_done = test_bit(i, vcpu_done_map);
+ pthread_mutex_unlock(&vcpu_done_map_lock);
+
+ if (vcpu_done) {
+ n_done++;
+ continue;
+ }
+
+ test_migrate_vcpu(i);
+ }
+ } while (test_args.nr_vcpus != n_done);
+
+ return NULL;
+}
+
+static void test_run(struct kvm_vm *vm)
+{
+ pthread_t pt_vcpu_migration;
+ unsigned int i;
+ int ret;
+
+ pthread_mutex_init(&vcpu_done_map_lock, NULL);
+ vcpu_done_map = bitmap_zalloc(test_args.nr_vcpus);
+ TEST_ASSERT(vcpu_done_map, "Failed to allocate vcpu done bitmap");
+
+ for (i = 0; i < (unsigned long)test_args.nr_vcpus; i++) {
+ ret = pthread_create(&pt_vcpu_run[i], NULL, test_vcpu_run,
+ (void *)(unsigned long)i);
+ TEST_ASSERT(!ret, "Failed to create vCPU-%d pthread", i);
+ }
+
+ /* Spawn a thread to control the vCPU migrations */
+ if (test_args.migration_freq_ms) {
+ srand(time(NULL));
+
+ ret = pthread_create(&pt_vcpu_migration, NULL,
+ test_vcpu_migration, NULL);
+ TEST_ASSERT(!ret, "Failed to create the migration pthread");
+ }
+
+
+ for (i = 0; i < test_args.nr_vcpus; i++)
+ pthread_join(pt_vcpu_run[i], NULL);
+
+ if (test_args.migration_freq_ms)
+ pthread_join(pt_vcpu_migration, NULL);
+
+ bitmap_free(vcpu_done_map);
+}
+
+static void test_print_help(char *name)
+{
+ pr_info("Usage: %s [-h] [-n nr_vcpus] [-i iterations] [-p timer_period_ms]\n"
+ "\t\t [-m migration_freq_ms] [-o counter_offset]\n"
+ "\t\t [-e timer_err_margin_us]\n", name);
+ pr_info("\t-n: Number of vCPUs to configure (default: %u; max: %u)\n",
+ NR_VCPUS_DEF, KVM_MAX_VCPUS);
+ pr_info("\t-i: Number of iterations per stage (default: %u)\n",
+ NR_TEST_ITERS_DEF);
+ pr_info("\t-p: Periodicity (in ms) of the guest timer (default: %u)\n",
+ TIMER_TEST_PERIOD_MS_DEF);
+ pr_info("\t-m: Frequency (in ms) of vCPUs to migrate to different pCPU. 0 to turn off (default: %u)\n",
+ TIMER_TEST_MIGRATION_FREQ_MS);
+ pr_info("\t-o: Counter offset (in counter cycles, default: 0) [aarch64-only]\n");
+ pr_info("\t-e: Interrupt arrival error margin (in us) of the guest timer (default: %u)\n",
+ TIMER_TEST_ERR_MARGIN_US);
+ pr_info("\t-h: print this help screen\n");
+}
+
+static bool parse_args(int argc, char *argv[])
+{
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hn:i:p:m:o:e:")) != -1) {
+ switch (opt) {
+ case 'n':
+ test_args.nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ if (test_args.nr_vcpus > KVM_MAX_VCPUS) {
+ pr_info("Max allowed vCPUs: %u\n",
+ KVM_MAX_VCPUS);
+ goto err;
+ }
+ break;
+ case 'i':
+ test_args.nr_iter = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'p':
+ test_args.timer_period_ms = atoi_positive("Periodicity", optarg);
+ break;
+ case 'm':
+ test_args.migration_freq_ms = atoi_non_negative("Frequency", optarg);
+ break;
+ case 'e':
+ test_args.timer_err_margin_us = atoi_non_negative("Error Margin", optarg);
+ break;
+ case 'o':
+ test_args.counter_offset = strtol(optarg, NULL, 0);
+ test_args.reserved = 0;
+ break;
+ case 'h':
+ default:
+ goto err;
+ }
+ }
+
+ return true;
+
+err:
+ test_print_help(argv[0]);
+ return false;
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+
+ if (!parse_args(argc, argv))
+ exit(KSFT_SKIP);
+
+ __TEST_REQUIRE(!test_args.migration_freq_ms || get_nprocs() >= 2,
+ "At least two physical CPUs needed for vCPU migration");
+
+ vm = test_vm_create();
+ test_run(vm);
+ test_vm_cleanup(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/clear_dirty_log_test.c b/tools/testing/selftests/kvm/clear_dirty_log_test.c
deleted file mode 100644
index 11672ec6f74e..000000000000
--- a/tools/testing/selftests/kvm/clear_dirty_log_test.c
+++ /dev/null
@@ -1,6 +0,0 @@
-#define USE_CLEAR_DIRTY_LOG
-#define KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE (1 << 0)
-#define KVM_DIRTY_LOG_INITIALLY_SET (1 << 1)
-#define KVM_DIRTY_LOG_MANUAL_CAPS (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE | \
- KVM_DIRTY_LOG_INITIALLY_SET)
-#include "dirty_log_test.c"
diff --git a/tools/testing/selftests/kvm/config b/tools/testing/selftests/kvm/config
index 63ed533f73d6..8835fed09e9f 100644
--- a/tools/testing/selftests/kvm/config
+++ b/tools/testing/selftests/kvm/config
@@ -1,3 +1,5 @@
CONFIG_KVM=y
CONFIG_KVM_INTEL=y
CONFIG_KVM_AMD=y
+CONFIG_USERFAULTFD=y
+CONFIG_IDLE_PAGE_TRACKING=y
diff --git a/tools/testing/selftests/kvm/demand_paging_test.c b/tools/testing/selftests/kvm/demand_paging_test.c
index 360cd3ea4cd6..bf3609f71854 100644
--- a/tools/testing/selftests/kvm/demand_paging_test.c
+++ b/tools/testing/selftests/kvm/demand_paging_test.c
@@ -7,627 +7,270 @@
* Copyright (C) 2019, Google, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
+#define _GNU_SOURCE /* for pipe2 */
+#include <inttypes.h>
#include <stdio.h>
#include <stdlib.h>
-#include <sys/syscall.h>
-#include <unistd.h>
-#include <asm/unistd.h>
#include <time.h>
#include <poll.h>
#include <pthread.h>
-#include <linux/bitmap.h>
-#include <linux/bitops.h>
#include <linux/userfaultfd.h>
+#include <sys/syscall.h>
-#include "test_util.h"
#include "kvm_util.h"
-#include "processor.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+#include "userfaultfd_util.h"
#ifdef __NR_userfaultfd
-/* The memory slot index demand page */
-#define TEST_MEM_SLOT_INDEX 1
-
-/* Default guest test virtual memory offset */
-#define DEFAULT_GUEST_TEST_MEM 0xc0000000
-
-#define DEFAULT_GUEST_TEST_MEM_SIZE (1 << 30) /* 1G */
-
-#ifdef PRINT_PER_PAGE_UPDATES
-#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
-#ifdef PRINT_PER_VCPU_UPDATES
-#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
-#else
-#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
-#endif
-
-#define MAX_VCPUS 512
-
-/*
- * Guest/Host shared variables. Ensure addr_gva2hva() and/or
- * sync_global_to/from_guest() are used when accessing from
- * the host. READ/WRITE_ONCE() should also be used with anything
- * that may change.
- */
-static uint64_t host_page_size;
-static uint64_t guest_page_size;
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static size_t demand_paging_size;
static char *guest_data_prototype;
-/*
- * Guest physical memory offset of the testing memory slot.
- * This will be set to the topmost valid physical address minus
- * the test memory size.
- */
-static uint64_t guest_test_phys_mem;
-
-/*
- * Guest virtual memory offset of the testing memory slot.
- * Must not conflict with identity mapped test code.
- */
-static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
-
-struct vcpu_args {
- uint64_t gva;
- uint64_t pages;
-
- /* Only used by the host userspace part of the vCPU thread */
- int vcpu_id;
- struct kvm_vm *vm;
-};
-
-static struct vcpu_args vcpu_args[MAX_VCPUS];
-
-/*
- * Continuously write to the first 8 bytes of each page in the demand paging
- * memory region.
- */
-static void guest_code(uint32_t vcpu_id)
-{
- uint64_t gva;
- uint64_t pages;
- int i;
-
- /* Make sure vCPU args data structure is not corrupt. */
- GUEST_ASSERT(vcpu_args[vcpu_id].vcpu_id == vcpu_id);
-
- gva = vcpu_args[vcpu_id].gva;
- pages = vcpu_args[vcpu_id].pages;
-
- for (i = 0; i < pages; i++) {
- uint64_t addr = gva + (i * guest_page_size);
-
- addr &= ~(host_page_size - 1);
- *(uint64_t *)addr = 0x0123456789ABCDEF;
- }
-
- GUEST_SYNC(1);
-}
-
-static void *vcpu_worker(void *data)
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ struct kvm_run *run = vcpu->run;
+ struct timespec start;
+ struct timespec ts_diff;
int ret;
- struct vcpu_args *args = (struct vcpu_args *)data;
- struct kvm_vm *vm = args->vm;
- int vcpu_id = args->vcpu_id;
- struct kvm_run *run;
- struct timespec start, end, ts_diff;
-
- vcpu_args_set(vm, vcpu_id, 1, vcpu_id);
- run = vcpu_state(vm, vcpu_id);
clock_gettime(CLOCK_MONOTONIC, &start);
/* Let the guest access its memory */
- ret = _vcpu_run(vm, vcpu_id);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, vcpu_id, NULL) != UCALL_SYNC) {
+ ret = _vcpu_run(vcpu);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ if (get_ucall(vcpu, NULL) != UCALL_SYNC) {
TEST_ASSERT(false,
- "Invalid guest sync status: exit_reason=%s\n",
+ "Invalid guest sync status: exit_reason=%s",
exit_reason_str(run->exit_reason));
}
- clock_gettime(CLOCK_MONOTONIC, &end);
- ts_diff = timespec_sub(end, start);
- PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_id,
+ ts_diff = timespec_elapsed(start);
+ PER_VCPU_DEBUG("vCPU %d execution time: %ld.%.9lds\n", vcpu_idx,
ts_diff.tv_sec, ts_diff.tv_nsec);
-
- return NULL;
-}
-
-#define PAGE_SHIFT_4K 12
-#define PTES_PER_4K_PT 512
-
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, int vcpus,
- uint64_t vcpu_memory_bytes)
-{
- struct kvm_vm *vm;
- uint64_t pages = DEFAULT_GUEST_PHY_PAGES;
-
- /* Account for a few pages per-vCPU for stacks */
- pages += DEFAULT_STACK_PGS * vcpus;
-
- /*
- * Reserve twice the ammount of memory needed to map the test region and
- * the page table / stacks region, at 4k, for page tables. Do the
- * calculation with 4K page size: the smallest of all archs. (e.g., 64K
- * page size guest will need even less memory for page tables).
- */
- pages += (2 * pages) / PTES_PER_4K_PT;
- pages += ((2 * vcpus * vcpu_memory_bytes) >> PAGE_SHIFT_4K) /
- PTES_PER_4K_PT;
- pages = vm_adjust_num_guest_pages(mode, pages);
-
- pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
-
- vm = _vm_create(mode, pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
- vm_create_irqchip(vm);
-#endif
- return vm;
}
-static int handle_uffd_page_request(int uffd, uint64_t addr)
+static int handle_uffd_page_request(int uffd_mode, int uffd,
+ struct uffd_msg *msg)
{
- pid_t tid;
+ pid_t tid = syscall(__NR_gettid);
+ uint64_t addr = msg->arg.pagefault.address;
struct timespec start;
- struct timespec end;
- struct uffdio_copy copy;
+ struct timespec ts_diff;
int r;
- tid = syscall(__NR_gettid);
-
- copy.src = (uint64_t)guest_data_prototype;
- copy.dst = addr;
- copy.len = host_page_size;
- copy.mode = 0;
-
clock_gettime(CLOCK_MONOTONIC, &start);
- r = ioctl(uffd, UFFDIO_COPY, &copy);
- if (r == -1) {
- pr_info("Failed Paged in 0x%lx from thread %d with errno: %d\n",
- addr, tid, errno);
- return r;
- }
-
- clock_gettime(CLOCK_MONOTONIC, &end);
-
- PER_PAGE_DEBUG("UFFDIO_COPY %d \t%ld ns\n", tid,
- timespec_to_ns(timespec_sub(end, start)));
- PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
- host_page_size, addr, tid);
-
- return 0;
-}
-
-bool quit_uffd_thread;
-
-struct uffd_handler_args {
- int uffd;
- int pipefd;
- useconds_t delay;
-};
-
-static void *uffd_handler_thread_fn(void *arg)
-{
- struct uffd_handler_args *uffd_args = (struct uffd_handler_args *)arg;
- int uffd = uffd_args->uffd;
- int pipefd = uffd_args->pipefd;
- useconds_t delay = uffd_args->delay;
- int64_t pages = 0;
- struct timespec start, end, ts_diff;
-
- clock_gettime(CLOCK_MONOTONIC, &start);
- while (!quit_uffd_thread) {
- struct uffd_msg msg;
- struct pollfd pollfd[2];
- char tmp_chr;
- int r;
- uint64_t addr;
-
- pollfd[0].fd = uffd;
- pollfd[0].events = POLLIN;
- pollfd[1].fd = pipefd;
- pollfd[1].events = POLLIN;
-
- r = poll(pollfd, 2, -1);
- switch (r) {
- case -1:
- pr_info("poll err");
- continue;
- case 0:
- continue;
- case 1:
- break;
- default:
- pr_info("Polling uffd returned %d", r);
- return NULL;
- }
+ if (uffd_mode == UFFDIO_REGISTER_MODE_MISSING) {
+ struct uffdio_copy copy;
- if (pollfd[0].revents & POLLERR) {
- pr_info("uffd revents has POLLERR");
- return NULL;
- }
+ copy.src = (uint64_t)guest_data_prototype;
+ copy.dst = addr;
+ copy.len = demand_paging_size;
+ copy.mode = 0;
- if (pollfd[1].revents & POLLIN) {
- r = read(pollfd[1].fd, &tmp_chr, 1);
- TEST_ASSERT(r == 1,
- "Error reading pipefd in UFFD thread\n");
- return NULL;
+ r = ioctl(uffd, UFFDIO_COPY, &copy);
+ if (r == -1) {
+ pr_info("Failed UFFDIO_COPY in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
}
+ } else if (uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ struct uffdio_continue cont = {0};
- if (!pollfd[0].revents & POLLIN)
- continue;
+ cont.range.start = addr;
+ cont.range.len = demand_paging_size;
- r = read(uffd, &msg, sizeof(msg));
+ r = ioctl(uffd, UFFDIO_CONTINUE, &cont);
if (r == -1) {
- if (errno == EAGAIN)
- continue;
- pr_info("Read of uffd gor errno %d", errno);
- return NULL;
+ pr_info("Failed UFFDIO_CONTINUE in 0x%lx from thread %d with errno: %d\n",
+ addr, tid, errno);
+ return r;
}
-
- if (r != sizeof(msg)) {
- pr_info("Read on uffd returned unexpected size: %d bytes", r);
- return NULL;
- }
-
- if (!(msg.event & UFFD_EVENT_PAGEFAULT))
- continue;
-
- if (delay)
- usleep(delay);
- addr = msg.arg.pagefault.address;
- r = handle_uffd_page_request(uffd, addr);
- if (r < 0)
- return NULL;
- pages++;
+ } else {
+ TEST_FAIL("Invalid uffd mode %d", uffd_mode);
}
- clock_gettime(CLOCK_MONOTONIC, &end);
- ts_diff = timespec_sub(end, start);
- PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
- pages, ts_diff.tv_sec, ts_diff.tv_nsec,
- pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+ ts_diff = timespec_elapsed(start);
- return NULL;
-}
+ PER_PAGE_DEBUG("UFFD page-in %d \t%ld ns\n", tid,
+ timespec_to_ns(ts_diff));
+ PER_PAGE_DEBUG("Paged in %ld bytes at 0x%lx from thread %d\n",
+ demand_paging_size, addr, tid);
-static int setup_demand_paging(struct kvm_vm *vm,
- pthread_t *uffd_handler_thread, int pipefd,
- useconds_t uffd_delay,
- struct uffd_handler_args *uffd_args,
- void *hva, uint64_t len)
-{
- int uffd;
- struct uffdio_api uffdio_api;
- struct uffdio_register uffdio_register;
-
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd == -1) {
- pr_info("uffd creation failed\n");
- return -1;
- }
+ return 0;
+}
- uffdio_api.api = UFFD_API;
- uffdio_api.features = 0;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1) {
- pr_info("ioctl uffdio_api failed\n");
- return -1;
- }
+struct test_params {
+ int uffd_mode;
+ useconds_t uffd_delay;
+ enum vm_mem_backing_src_type src_type;
+ bool partition_vcpu_memory_access;
+};
- uffdio_register.range.start = (uint64_t)hva;
- uffdio_register.range.len = len;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1) {
- pr_info("ioctl uffdio_register failed\n");
- return -1;
- }
+static void prefault_mem(void *alias, uint64_t len)
+{
+ size_t p;
- if ((uffdio_register.ioctls & UFFD_API_RANGE_IOCTLS) !=
- UFFD_API_RANGE_IOCTLS) {
- pr_info("unexpected userfaultfd ioctl set\n");
- return -1;
+ TEST_ASSERT(alias != NULL, "Alias required for minor faults");
+ for (p = 0; p < (len / demand_paging_size); ++p) {
+ memcpy(alias + (p * demand_paging_size),
+ guest_data_prototype, demand_paging_size);
}
-
- uffd_args->uffd = uffd;
- uffd_args->pipefd = pipefd;
- uffd_args->delay = uffd_delay;
- pthread_create(uffd_handler_thread, NULL, uffd_handler_thread_fn,
- uffd_args);
-
- PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
- hva, hva + len);
-
- return 0;
}
-static void run_test(enum vm_guest_mode mode, bool use_uffd,
- useconds_t uffd_delay, int vcpus,
- uint64_t vcpu_memory_bytes)
+static void run_test(enum vm_guest_mode mode, void *arg)
{
- pthread_t *vcpu_threads;
- pthread_t *uffd_handler_threads = NULL;
- struct uffd_handler_args *uffd_args = NULL;
- struct timespec start, end, ts_diff;
- int *pipefds = NULL;
+ struct memstress_vcpu_args *vcpu_args;
+ struct test_params *p = arg;
+ struct uffd_desc **uffd_descs = NULL;
+ struct timespec start;
+ struct timespec ts_diff;
struct kvm_vm *vm;
- uint64_t guest_num_pages;
- int vcpu_id;
- int r;
-
- vm = create_vm(mode, vcpus, vcpu_memory_bytes);
-
- guest_page_size = vm_get_page_size(vm);
-
- TEST_ASSERT(vcpu_memory_bytes % guest_page_size == 0,
- "Guest memory size is not guest page size aligned.");
-
- guest_num_pages = (vcpus * vcpu_memory_bytes) / guest_page_size;
- guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
-
- /*
- * If there should be more memory in the guest test region than there
- * can be pages in the guest, it will definitely cause problems.
- */
- TEST_ASSERT(guest_num_pages < vm_get_max_gfn(vm),
- "Requested more guest memory than address space allows.\n"
- " guest pages: %lx max gfn: %x vcpus: %d wss: %lx]\n",
- guest_num_pages, vm_get_max_gfn(vm), vcpus,
- vcpu_memory_bytes);
-
- host_page_size = getpagesize();
- TEST_ASSERT(vcpu_memory_bytes % host_page_size == 0,
- "Guest memory size is not host page size aligned.");
-
- guest_test_phys_mem = (vm_get_max_gfn(vm) - guest_num_pages) *
- guest_page_size;
- guest_test_phys_mem &= ~(host_page_size - 1);
-
-#ifdef __s390x__
- /* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
-#endif
-
- pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
-
- /* Add an extra memory slot for testing demand paging */
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_test_phys_mem,
- TEST_MEM_SLOT_INDEX,
- guest_num_pages, 0);
+ int i;
- /* Do mapping for the demand paging memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+ p->src_type, p->partition_vcpu_memory_access);
- ucall_init(vm, NULL);
+ demand_paging_size = get_backing_src_pagesz(p->src_type);
- guest_data_prototype = malloc(host_page_size);
+ guest_data_prototype = malloc(demand_paging_size);
TEST_ASSERT(guest_data_prototype,
"Failed to allocate buffer for guest data pattern");
- memset(guest_data_prototype, 0xAB, host_page_size);
-
- vcpu_threads = malloc(vcpus * sizeof(*vcpu_threads));
- TEST_ASSERT(vcpu_threads, "Memory allocation failed");
-
- if (use_uffd) {
- uffd_handler_threads =
- malloc(vcpus * sizeof(*uffd_handler_threads));
- TEST_ASSERT(uffd_handler_threads, "Memory allocation failed");
+ memset(guest_data_prototype, 0xAB, demand_paging_size);
- uffd_args = malloc(vcpus * sizeof(*uffd_args));
- TEST_ASSERT(uffd_args, "Memory allocation failed");
-
- pipefds = malloc(sizeof(int) * vcpus * 2);
- TEST_ASSERT(pipefds, "Unable to allocate memory for pipefd");
+ if (p->uffd_mode == UFFDIO_REGISTER_MODE_MINOR) {
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &memstress_args.vcpu_args[i];
+ prefault_mem(addr_gpa2alias(vm, vcpu_args->gpa),
+ vcpu_args->pages * memstress_args.guest_page_size);
+ }
}
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- vm_paddr_t vcpu_gpa;
- void *vcpu_hva;
-
- vm_vcpu_add_default(vm, vcpu_id, guest_code);
-
- vcpu_gpa = guest_test_phys_mem + (vcpu_id * vcpu_memory_bytes);
- PER_VCPU_DEBUG("Added VCPU %d with test mem gpa [%lx, %lx)\n",
- vcpu_id, vcpu_gpa, vcpu_gpa + vcpu_memory_bytes);
+ if (p->uffd_mode) {
+ uffd_descs = malloc(nr_vcpus * sizeof(struct uffd_desc *));
+ TEST_ASSERT(uffd_descs, "Memory allocation failed");
+ for (i = 0; i < nr_vcpus; i++) {
+ void *vcpu_hva;
- /* Cache the HVA pointer of the region */
- vcpu_hva = addr_gpa2hva(vm, vcpu_gpa);
+ vcpu_args = &memstress_args.vcpu_args[i];
- if (use_uffd) {
+ /* Cache the host addresses of the region */
+ vcpu_hva = addr_gpa2hva(vm, vcpu_args->gpa);
/*
* Set up user fault fd to handle demand paging
* requests.
*/
- r = pipe2(&pipefds[vcpu_id * 2],
- O_CLOEXEC | O_NONBLOCK);
- TEST_ASSERT(!r, "Failed to set up pipefd");
-
- r = setup_demand_paging(vm,
- &uffd_handler_threads[vcpu_id],
- pipefds[vcpu_id * 2],
- uffd_delay, &uffd_args[vcpu_id],
- vcpu_hva, vcpu_memory_bytes);
- if (r < 0)
- exit(-r);
+ uffd_descs[i] = uffd_setup_demand_paging(
+ p->uffd_mode, p->uffd_delay, vcpu_hva,
+ vcpu_args->pages * memstress_args.guest_page_size,
+ &handle_uffd_page_request);
}
-
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, vcpu_id, kvm_get_supported_cpuid());
-#endif
-
- vcpu_args[vcpu_id].vm = vm;
- vcpu_args[vcpu_id].vcpu_id = vcpu_id;
- vcpu_args[vcpu_id].gva = guest_test_virt_mem +
- (vcpu_id * vcpu_memory_bytes);
- vcpu_args[vcpu_id].pages = vcpu_memory_bytes / guest_page_size;
}
- /* Export the shared variables to the guest */
- sync_global_to_guest(vm, host_page_size);
- sync_global_to_guest(vm, guest_page_size);
- sync_global_to_guest(vm, vcpu_args);
-
pr_info("Finished creating vCPUs and starting uffd threads\n");
clock_gettime(CLOCK_MONOTONIC, &start);
-
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- pthread_create(&vcpu_threads[vcpu_id], NULL, vcpu_worker,
- &vcpu_args[vcpu_id]);
- }
-
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
pr_info("Started all vCPUs\n");
- /* Wait for the vcpu threads to quit */
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- pthread_join(vcpu_threads[vcpu_id], NULL);
- PER_VCPU_DEBUG("Joined thread for vCPU %d\n", vcpu_id);
- }
-
+ memstress_join_vcpu_threads(nr_vcpus);
+ ts_diff = timespec_elapsed(start);
pr_info("All vCPU threads joined\n");
- clock_gettime(CLOCK_MONOTONIC, &end);
-
- if (use_uffd) {
- char c;
-
+ if (p->uffd_mode) {
/* Tell the user fault fd handler threads to quit */
- for (vcpu_id = 0; vcpu_id < vcpus; vcpu_id++) {
- r = write(pipefds[vcpu_id * 2 + 1], &c, 1);
- TEST_ASSERT(r == 1, "Unable to write to pipefd");
-
- pthread_join(uffd_handler_threads[vcpu_id], NULL);
- }
+ for (i = 0; i < nr_vcpus; i++)
+ uffd_stop_demand_paging(uffd_descs[i]);
}
- ts_diff = timespec_sub(end, start);
pr_info("Total guest execution time: %ld.%.9lds\n",
ts_diff.tv_sec, ts_diff.tv_nsec);
pr_info("Overall demand paging rate: %f pgs/sec\n",
- guest_num_pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / 100000000.0));
+ memstress_args.vcpu_args[0].pages * nr_vcpus /
+ ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
- ucall_uninit(vm);
- kvm_vm_free(vm);
+ memstress_destroy_vm(vm);
free(guest_data_prototype);
- free(vcpu_threads);
- if (use_uffd) {
- free(uffd_handler_threads);
- free(uffd_args);
- free(pipefds);
- }
+ if (p->uffd_mode)
+ free(uffd_descs);
}
-struct guest_mode {
- bool supported;
- bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
- guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
static void help(char *name)
{
- int i;
-
puts("");
- printf("usage: %s [-h] [-m mode] [-u] [-d uffd_delay_usec]\n"
- " [-b memory] [-v vcpus]\n", name);
- printf(" -m: specify the guest mode ID to test\n"
- " (default: test all supported modes)\n"
- " This option may be used multiple times.\n"
- " Guest mode IDs:\n");
- for (i = 0; i < NUM_VM_MODES; ++i) {
- printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
- guest_modes[i].supported ? " (supported)" : "");
- }
- printf(" -u: use User Fault FD to handle vCPU page\n"
- " faults.\n");
+ printf("usage: %s [-h] [-m vm_mode] [-u uffd_mode] [-d uffd_delay_usec]\n"
+ " [-b memory] [-s type] [-v vcpus] [-c cpu_list] [-o]\n", name);
+ guest_modes_help();
+ printf(" -u: use userfaultfd to handle vCPU page faults. Mode is a\n"
+ " UFFD registration mode: 'MISSING' or 'MINOR'.\n");
+ kvm_print_vcpu_pinning_help();
printf(" -d: add a delay in usec to the User Fault\n"
" FD handler to simulate demand paging\n"
" overheads. Ignored without -u.\n");
printf(" -b: specify the size of the memory region which should be\n"
" demand paged by each vCPU. e.g. 10M or 3G.\n"
" Default: 1G\n");
+ backing_src_help("-s");
printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
- bool mode_selected = false;
- uint64_t vcpu_memory_bytes = DEFAULT_GUEST_TEST_MEM_SIZE;
- int vcpus = 1;
- unsigned int mode;
- int opt, i;
- bool use_uffd = false;
- useconds_t uffd_delay = 0;
-
-#ifdef __x86_64__
- guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
- guest_mode_init(VM_MODE_P40V48_4K, true, true);
- guest_mode_init(VM_MODE_P40V48_64K, true, true);
- {
- unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
-
- if (limit >= 52)
- guest_mode_init(VM_MODE_P52V48_64K, true, true);
- if (limit >= 48) {
- guest_mode_init(VM_MODE_P48V48_4K, true, true);
- guest_mode_init(VM_MODE_P48V48_64K, true, true);
- }
- }
-#endif
-#ifdef __s390x__
- guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ const char *cpulist = NULL;
+ struct test_params p = {
+ .src_type = DEFAULT_VM_MEM_SRC,
+ .partition_vcpu_memory_access = true,
+ };
+ int opt;
+
+ guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hm:ud:b:v:")) != -1) {
+ while ((opt = getopt(argc, argv, "hm:u:d:b:s:v:c:o")) != -1) {
switch (opt) {
case 'm':
- if (!mode_selected) {
- for (i = 0; i < NUM_VM_MODES; ++i)
- guest_modes[i].enabled = false;
- mode_selected = true;
- }
- mode = strtoul(optarg, NULL, 10);
- TEST_ASSERT(mode < NUM_VM_MODES,
- "Guest mode ID %d too big", mode);
- guest_modes[mode].enabled = true;
+ guest_modes_cmdline(optarg);
break;
case 'u':
- use_uffd = true;
+ if (!strcmp("MISSING", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MISSING;
+ else if (!strcmp("MINOR", optarg))
+ p.uffd_mode = UFFDIO_REGISTER_MODE_MINOR;
+ TEST_ASSERT(p.uffd_mode, "UFFD mode must be 'MISSING' or 'MINOR'.");
break;
case 'd':
- uffd_delay = strtoul(optarg, NULL, 0);
- TEST_ASSERT(uffd_delay >= 0,
- "A negative UFFD delay is not supported.");
+ p.uffd_delay = strtoul(optarg, NULL, 0);
+ TEST_ASSERT(p.uffd_delay >= 0, "A negative UFFD delay is not supported.");
break;
case 'b':
- vcpu_memory_bytes = parse_size(optarg);
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 's':
+ p.src_type = parse_backing_src_type(optarg);
break;
case 'v':
- vcpus = atoi(optarg);
- TEST_ASSERT(vcpus > 0,
- "Must have a positive number of vCPUs");
- TEST_ASSERT(vcpus <= MAX_VCPUS,
- "This test does not currently support\n"
- "more than %d vCPUs.", MAX_VCPUS);
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ break;
+ case 'c':
+ cpulist = optarg;
+ break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
break;
case 'h':
default:
@@ -636,15 +279,19 @@ int main(int argc, char *argv[])
}
}
- for (i = 0; i < NUM_VM_MODES; ++i) {
- if (!guest_modes[i].enabled)
- continue;
- TEST_ASSERT(guest_modes[i].supported,
- "Guest mode ID %d (%s) not supported.",
- i, vm_guest_mode_string(i));
- run_test(i, use_uffd, uffd_delay, vcpus, vcpu_memory_bytes);
+ if (p.uffd_mode == UFFDIO_REGISTER_MODE_MINOR &&
+ !backing_src_is_shared(p.src_type)) {
+ TEST_FAIL("userfaultfd MINOR mode requires shared memory; pick a different -s");
+ }
+
+ if (cpulist) {
+ kvm_parse_vcpu_pinning(cpulist, memstress_args.vcpu_to_pcpu,
+ nr_vcpus);
+ memstress_args.pin_vcpus = true;
}
+ for_each_guest_mode(run_test, &p);
+
return 0;
}
diff --git a/tools/testing/selftests/kvm/dirty_log_perf_test.c b/tools/testing/selftests/kvm/dirty_log_perf_test.c
new file mode 100644
index 000000000000..504f6fe980e8
--- /dev/null
+++ b/tools/testing/selftests/kvm/dirty_log_perf_test.c
@@ -0,0 +1,435 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty page logging performance test
+ *
+ * Based on dirty_log_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+
+#ifdef __aarch64__
+#include "aarch64/vgic.h"
+
+#define GICD_BASE_GPA 0x8000000ULL
+#define GICR_BASE_GPA 0x80A0000ULL
+
+static int gic_fd;
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+ /*
+ * The test can still run even if hardware does not support GICv3, as it
+ * is only an optimization to reduce guest exits.
+ */
+ gic_fd = vgic_v3_setup(vm, nr_vcpus, 64, GICD_BASE_GPA, GICR_BASE_GPA);
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+ if (gic_fd > 0)
+ close(gic_fd);
+}
+
+#else /* __aarch64__ */
+
+static void arch_setup_vm(struct kvm_vm *vm, unsigned int nr_vcpus)
+{
+}
+
+static void arch_cleanup_vm(struct kvm_vm *vm)
+{
+}
+
+#endif
+
+/* How many host loops to run by default (one KVM_GET_DIRTY_LOG for each loop)*/
+#define TEST_HOST_LOOP_N 2UL
+
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+static bool run_vcpus_while_disabling_dirty_logging;
+
+/* Host variables */
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+ uint64_t pages_count = 0;
+ struct kvm_run *run;
+ struct timespec start;
+ struct timespec ts_diff;
+ struct timespec total = (struct timespec){0};
+ struct timespec avg;
+ int ret;
+
+ run = vcpu->run;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ ret = _vcpu_run(vcpu);
+ ts_diff = timespec_elapsed(start);
+
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
+ exit_reason_str(run->exit_reason));
+
+ pr_debug("Got sync event from vCPU %d\n", vcpu_idx);
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+ pr_debug("vCPU %d updated last completed iteration to %d\n",
+ vcpu_idx, vcpu_last_completed_iteration[vcpu_idx]);
+
+ if (current_iteration) {
+ pages_count += vcpu_args->pages;
+ total = timespec_add(total, ts_diff);
+ pr_debug("vCPU %d iteration %d dirty memory time: %ld.%.9lds\n",
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
+ ts_diff.tv_nsec);
+ } else {
+ pr_debug("vCPU %d iteration %d populate memory time: %ld.%.9lds\n",
+ vcpu_idx, current_iteration, ts_diff.tv_sec,
+ ts_diff.tv_nsec);
+ }
+
+ /*
+ * Keep running the guest while dirty logging is being disabled
+ * (iteration is negative) so that vCPUs are accessing memory
+ * for the entire duration of zapping collapsible SPTEs.
+ */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 && !READ_ONCE(host_quit)) {}
+ }
+
+ avg = timespec_div(total, vcpu_last_completed_iteration[vcpu_idx]);
+ pr_debug("\nvCPU %d dirtied 0x%lx pages over %d iterations in %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ vcpu_idx, pages_count, vcpu_last_completed_iteration[vcpu_idx],
+ total.tv_sec, total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+}
+
+struct test_params {
+ unsigned long iterations;
+ uint64_t phys_offset;
+ bool partition_vcpu_memory_access;
+ enum vm_mem_backing_src_type backing_src;
+ int slots;
+ uint32_t write_percent;
+ uint32_t random_seed;
+ bool random_access;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = arg;
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ struct timespec start;
+ struct timespec ts_diff;
+ struct timespec get_dirty_log_total = (struct timespec){0};
+ struct timespec vcpu_dirty_total = (struct timespec){0};
+ struct timespec avg;
+ struct timespec clear_dirty_log_total = (struct timespec){0};
+ int i;
+
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size,
+ p->slots, p->backing_src,
+ p->partition_vcpu_memory_access);
+
+ pr_info("Random seed: %u\n", p->random_seed);
+ memstress_set_random_seed(vm, p->random_seed);
+ memstress_set_write_percent(vm, p->write_percent);
+
+ guest_num_pages = (nr_vcpus * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / p->slots;
+
+ bitmaps = memstress_alloc_bitmaps(p->slots, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ arch_setup_vm(vm, nr_vcpus);
+
+ /* Start the iterations */
+ iteration = 0;
+ host_quit = false;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ for (i = 0; i < nr_vcpus; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ /*
+ * Use 100% writes during the population phase to ensure all
+ * memory is actually populated and not just mapped to the zero
+ * page. The prevents expensive copy-on-write faults from
+ * occurring during the dirty memory iterations below, which
+ * would pollute the performance results.
+ */
+ memstress_set_write_percent(vm, 100);
+ memstress_set_random_access(vm, false);
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
+
+ /* Allow the vCPUs to populate memory */
+ pr_debug("Starting iteration %d - Populating\n", iteration);
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+
+ ts_diff = timespec_elapsed(start);
+ pr_info("Populate memory time: %ld.%.9lds\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Enable dirty logging */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ memstress_enable_dirty_logging(vm, p->slots);
+ ts_diff = timespec_elapsed(start);
+ pr_info("Enabling dirty logging time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ memstress_set_write_percent(vm, p->write_percent);
+ memstress_set_random_access(vm, p->random_access);
+
+ while (iteration < p->iterations) {
+ /*
+ * Incrementing the iteration number will start the vCPUs
+ * dirtying memory again.
+ */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ iteration++;
+
+ pr_debug("Starting iteration %d\n", iteration);
+ for (i = 0; i < nr_vcpus; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i])
+ != iteration)
+ ;
+ }
+
+ ts_diff = timespec_elapsed(start);
+ vcpu_dirty_total = timespec_add(vcpu_dirty_total, ts_diff);
+ pr_info("Iteration %d dirty memory time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ memstress_get_dirty_log(vm, bitmaps, p->slots);
+ ts_diff = timespec_elapsed(start);
+ get_dirty_log_total = timespec_add(get_dirty_log_total,
+ ts_diff);
+ pr_info("Iteration %d get dirty log time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ if (dirty_log_manual_caps) {
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ memstress_clear_dirty_log(vm, bitmaps, p->slots,
+ pages_per_slot);
+ ts_diff = timespec_elapsed(start);
+ clear_dirty_log_total = timespec_add(clear_dirty_log_total,
+ ts_diff);
+ pr_info("Iteration %d clear dirty log time: %ld.%.9lds\n",
+ iteration, ts_diff.tv_sec, ts_diff.tv_nsec);
+ }
+ }
+
+ /*
+ * Run vCPUs while dirty logging is being disabled to stress disabling
+ * in terms of both performance and correctness. Opt-in via command
+ * line as this significantly increases time to disable dirty logging.
+ */
+ if (run_vcpus_while_disabling_dirty_logging)
+ WRITE_ONCE(iteration, -1);
+
+ /* Disable dirty logging */
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ memstress_disable_dirty_logging(vm, p->slots);
+ ts_diff = timespec_elapsed(start);
+ pr_info("Disabling dirty logging time: %ld.%.9lds\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(nr_vcpus);
+
+ avg = timespec_div(get_dirty_log_total, p->iterations);
+ pr_info("Get dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ p->iterations, get_dirty_log_total.tv_sec,
+ get_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+
+ if (dirty_log_manual_caps) {
+ avg = timespec_div(clear_dirty_log_total, p->iterations);
+ pr_info("Clear dirty log over %lu iterations took %ld.%.9lds. (Avg %ld.%.9lds/iteration)\n",
+ p->iterations, clear_dirty_log_total.tv_sec,
+ clear_dirty_log_total.tv_nsec, avg.tv_sec, avg.tv_nsec);
+ }
+
+ memstress_free_bitmaps(bitmaps, p->slots);
+ arch_cleanup_vm(vm);
+ memstress_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-a] [-i iterations] [-p offset] [-g] "
+ "[-m mode] [-n] [-b vcpu bytes] [-v vcpus] [-o] [-r random seed ] [-s mem type]"
+ "[-x memslots] [-w percentage] [-c physical cpus to run test on]\n", name);
+ puts("");
+ printf(" -a: access memory randomly rather than in order.\n");
+ printf(" -i: specify iteration counts (default: %"PRIu64")\n",
+ TEST_HOST_LOOP_N);
+ printf(" -g: Do not enable KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2. This\n"
+ " makes KVM_GET_DIRTY_LOG clear the dirty log (i.e.\n"
+ " KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE is not enabled)\n"
+ " and writes will be tracked as soon as dirty logging is\n"
+ " enabled on the memslot (i.e. KVM_DIRTY_LOG_INITIALLY_SET\n"
+ " is not enabled).\n");
+ printf(" -p: specify guest physical test memory offset\n"
+ " Warning: a low offset can conflict with the loaded test code.\n");
+ guest_modes_help();
+ printf(" -n: Run the vCPUs in nested mode (L2)\n");
+ printf(" -e: Run vCPUs while dirty logging is being disabled. This\n"
+ " can significantly increase runtime, especially if there\n"
+ " isn't a dedicated pCPU for the main thread.\n");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -r: specify the starting random seed.\n");
+ backing_src_help("-s");
+ printf(" -x: Split the memory region into this number of memslots.\n"
+ " (default: 1)\n");
+ printf(" -w: specify the percentage of pages which should be written to\n"
+ " as an integer from 0-100 inclusive. This is probabilistic,\n"
+ " so -w X means each page has an X%% chance of writing\n"
+ " and a (100-X)%% chance of reading.\n"
+ " (default: 100 i.e. all pages are written to.)\n");
+ kvm_print_vcpu_pinning_help();
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ const char *pcpu_list = NULL;
+ struct test_params p = {
+ .iterations = TEST_HOST_LOOP_N,
+ .partition_vcpu_memory_access = true,
+ .backing_src = DEFAULT_VM_MEM_SRC,
+ .slots = 1,
+ .random_seed = 1,
+ .write_percent = 100,
+ };
+ int opt;
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "ab:c:eghi:m:nop:r:s:v:x:w:")) != -1) {
+ switch (opt) {
+ case 'a':
+ p.random_access = true;
+ break;
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'c':
+ pcpu_list = optarg;
+ break;
+ case 'e':
+ /* 'e' is for evil. */
+ run_vcpus_while_disabling_dirty_logging = true;
+ break;
+ case 'g':
+ dirty_log_manual_caps = 0;
+ break;
+ case 'h':
+ help(argv[0]);
+ break;
+ case 'i':
+ p.iterations = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'n':
+ memstress_args.nested = true;
+ break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
+ break;
+ case 'p':
+ p.phys_offset = strtoull(optarg, NULL, 0);
+ break;
+ case 'r':
+ p.random_seed = atoi_positive("Random seed", optarg);
+ break;
+ case 's':
+ p.backing_src = parse_backing_src_type(optarg);
+ break;
+ case 'v':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ break;
+ case 'w':
+ p.write_percent = atoi_non_negative("Write percentage", optarg);
+ TEST_ASSERT(p.write_percent <= 100,
+ "Write percentage must be between 0 and 100");
+ break;
+ case 'x':
+ p.slots = atoi_positive("Number of slots", optarg);
+ break;
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ if (pcpu_list) {
+ kvm_parse_vcpu_pinning(pcpu_list, memstress_args.vcpu_to_pcpu,
+ nr_vcpus);
+ memstress_args.pin_vcpus = true;
+ }
+
+ TEST_ASSERT(p.iterations >= 2, "The test should have at least two iterations");
+
+ pr_info("Test iterations: %"PRIu64"\n", p.iterations);
+
+ for_each_guest_mode(run_test, &p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/dirty_log_test.c b/tools/testing/selftests/kvm/dirty_log_test.c
index 752ec158ac59..eaad5b20854c 100644
--- a/tools/testing/selftests/kvm/dirty_log_test.c
+++ b/tools/testing/selftests/kvm/dirty_log_test.c
@@ -9,17 +9,23 @@
#include <stdio.h>
#include <stdlib.h>
-#include <unistd.h>
-#include <time.h>
#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
#include <linux/bitmap.h>
#include <linux/bitops.h>
+#include <linux/atomic.h>
+#include <asm/barrier.h>
-#include "test_util.h"
#include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
#include "processor.h"
-#define VCPU_ID 1
+#define DIRTY_MEM_BITS 30 /* 1G */
+#define PAGE_SHIFT_4K 12
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
@@ -41,22 +47,26 @@
# define BITOP_LE_SWIZZLE ((BITS_PER_LONG-1) & ~0x7)
# define test_bit_le(nr, addr) \
test_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define set_bit_le(nr, addr) \
- set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define clear_bit_le(nr, addr) \
- clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_set_bit_le(nr, addr) \
- test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
-# define test_and_clear_bit_le(nr, addr) \
- test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __set_bit_le(nr, addr) \
+ __set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __clear_bit_le(nr, addr) \
+ __clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_set_bit_le(nr, addr) \
+ __test_and_set_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
+# define __test_and_clear_bit_le(nr, addr) \
+ __test_and_clear_bit((nr) ^ BITOP_LE_SWIZZLE, addr)
#else
-# define test_bit_le test_bit
-# define set_bit_le set_bit
-# define clear_bit_le clear_bit
-# define test_and_set_bit_le test_and_set_bit
-# define test_and_clear_bit_le test_and_clear_bit
+# define test_bit_le test_bit
+# define __set_bit_le __set_bit
+# define __clear_bit_le __clear_bit
+# define __test_and_set_bit_le __test_and_set_bit
+# define __test_and_clear_bit_le __test_and_clear_bit
#endif
+#define TEST_DIRTY_RING_COUNT 65536
+
+#define SIG_IPI SIGUSR1
+
/*
* Guest/Host shared variables. Ensure addr_gva2hva() and/or
* sync_global_to/from_guest() are used when accessing from
@@ -107,7 +117,7 @@ static void guest_code(void)
addr = guest_test_virt_mem;
addr += (READ_ONCE(random_array[i]) % guest_num_pages)
* guest_page_size;
- addr &= ~(host_page_size - 1);
+ addr = align_down(addr, host_page_size);
*(uint64_t *)addr = READ_ONCE(iteration);
}
@@ -128,6 +138,320 @@ static uint64_t host_dirty_count;
static uint64_t host_clear_count;
static uint64_t host_track_next_count;
+/* Whether dirty ring reset is requested, or finished */
+static sem_t sem_vcpu_stop;
+static sem_t sem_vcpu_cont;
+/*
+ * This is only set by main thread, and only cleared by vcpu thread. It is
+ * used to request vcpu thread to stop at the next GUEST_SYNC, since GUEST_SYNC
+ * is the only place that we'll guarantee both "dirty bit" and "dirty data"
+ * will match. E.g., SIG_IPI won't guarantee that if the vcpu is interrupted
+ * after setting dirty bit but before the data is written.
+ */
+static atomic_t vcpu_sync_stop_requested;
+/*
+ * This is updated by the vcpu thread to tell the host whether it's a
+ * ring-full event. It should only be read until a sem_wait() of
+ * sem_vcpu_stop and before vcpu continues to run.
+ */
+static bool dirty_ring_vcpu_ring_full;
+/*
+ * This is only used for verifying the dirty pages. Dirty ring has a very
+ * tricky case when the ring just got full, kvm will do userspace exit due to
+ * ring full. When that happens, the very last PFN is set but actually the
+ * data is not changed (the guest WRITE is not really applied yet), because
+ * we found that the dirty ring is full, refused to continue the vcpu, and
+ * recorded the dirty gfn with the old contents.
+ *
+ * For this specific case, it's safe to skip checking this pfn for this
+ * bit, because it's a redundant bit, and when the write happens later the bit
+ * will be set again. We use this variable to always keep track of the latest
+ * dirty gfn we've collected, so that if a mismatch of data found later in the
+ * verifying process, we let it pass.
+ */
+static uint64_t dirty_ring_last_page;
+
+enum log_mode_t {
+ /* Only use KVM_GET_DIRTY_LOG for logging */
+ LOG_MODE_DIRTY_LOG = 0,
+
+ /* Use both KVM_[GET|CLEAR]_DIRTY_LOG for logging */
+ LOG_MODE_CLEAR_LOG = 1,
+
+ /* Use dirty ring for logging */
+ LOG_MODE_DIRTY_RING = 2,
+
+ LOG_MODE_NUM,
+
+ /* Run all supported modes */
+ LOG_MODE_ALL = LOG_MODE_NUM,
+};
+
+/* Mode of logging to test. Default is to run all supported modes */
+static enum log_mode_t host_log_mode_option = LOG_MODE_ALL;
+/* Logging mode for current run */
+static enum log_mode_t host_log_mode;
+static pthread_t vcpu_thread;
+static uint32_t test_dirty_ring_count = TEST_DIRTY_RING_COUNT;
+
+static void vcpu_kick(void)
+{
+ pthread_kill(vcpu_thread, SIG_IPI);
+}
+
+/*
+ * In our test we do signal tricks, let's use a better version of
+ * sem_wait to avoid signal interrupts
+ */
+static void sem_wait_until(sem_t *sem)
+{
+ int ret;
+
+ do
+ ret = sem_wait(sem);
+ while (ret == -1 && errno == EINTR);
+}
+
+static bool clear_log_supported(void)
+{
+ return kvm_has_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+}
+
+static void clear_log_create_vm_done(struct kvm_vm *vm)
+{
+ u64 manual_caps;
+
+ manual_caps = kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+ TEST_ASSERT(manual_caps, "MANUAL_CAPS is zero!");
+ manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2, manual_caps);
+}
+
+static void dirty_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *unused)
+{
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
+}
+
+static void clear_log_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *unused)
+{
+ kvm_vm_get_dirty_log(vcpu->vm, slot, bitmap);
+ kvm_vm_clear_dirty_log(vcpu->vm, slot, bitmap, 0, num_pages);
+}
+
+/* Should only be called after a GUEST_SYNC */
+static void vcpu_handle_sync_stop(void)
+{
+ if (atomic_read(&vcpu_sync_stop_requested)) {
+ /* It means main thread is sleeping waiting */
+ atomic_set(&vcpu_sync_stop_requested, false);
+ sem_post(&sem_vcpu_stop);
+ sem_wait_until(&sem_vcpu_cont);
+ }
+}
+
+static void default_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+{
+ struct kvm_run *run = vcpu->run;
+
+ TEST_ASSERT(ret == 0 || (ret == -1 && err == EINTR),
+ "vcpu run failed: errno=%d", err);
+
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
+ exit_reason_str(run->exit_reason));
+
+ vcpu_handle_sync_stop();
+}
+
+static bool dirty_ring_supported(void)
+{
+ return (kvm_has_cap(KVM_CAP_DIRTY_LOG_RING) ||
+ kvm_has_cap(KVM_CAP_DIRTY_LOG_RING_ACQ_REL));
+}
+
+static void dirty_ring_create_vm_done(struct kvm_vm *vm)
+{
+ uint64_t pages;
+ uint32_t limit;
+
+ /*
+ * We rely on vcpu exit due to full dirty ring state. Adjust
+ * the ring buffer size to ensure we're able to reach the
+ * full dirty ring state.
+ */
+ pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
+ pages = vm_adjust_num_guest_pages(vm->mode, pages);
+ if (vm->page_size < getpagesize())
+ pages = vm_num_host_pages(vm->mode, pages);
+
+ limit = 1 << (31 - __builtin_clz(pages));
+ test_dirty_ring_count = 1 << (31 - __builtin_clz(test_dirty_ring_count));
+ test_dirty_ring_count = min(limit, test_dirty_ring_count);
+ pr_info("dirty ring count: 0x%x\n", test_dirty_ring_count);
+
+ /*
+ * Switch to dirty ring mode after VM creation but before any
+ * of the vcpu creation.
+ */
+ vm_enable_dirty_ring(vm, test_dirty_ring_count *
+ sizeof(struct kvm_dirty_gfn));
+}
+
+static inline bool dirty_gfn_is_dirtied(struct kvm_dirty_gfn *gfn)
+{
+ return smp_load_acquire(&gfn->flags) == KVM_DIRTY_GFN_F_DIRTY;
+}
+
+static inline void dirty_gfn_set_collected(struct kvm_dirty_gfn *gfn)
+{
+ smp_store_release(&gfn->flags, KVM_DIRTY_GFN_F_RESET);
+}
+
+static uint32_t dirty_ring_collect_one(struct kvm_dirty_gfn *dirty_gfns,
+ int slot, void *bitmap,
+ uint32_t num_pages, uint32_t *fetch_index)
+{
+ struct kvm_dirty_gfn *cur;
+ uint32_t count = 0;
+
+ while (true) {
+ cur = &dirty_gfns[*fetch_index % test_dirty_ring_count];
+ if (!dirty_gfn_is_dirtied(cur))
+ break;
+ TEST_ASSERT(cur->slot == slot, "Slot number didn't match: "
+ "%u != %u", cur->slot, slot);
+ TEST_ASSERT(cur->offset < num_pages, "Offset overflow: "
+ "0x%llx >= 0x%x", cur->offset, num_pages);
+ //pr_info("fetch 0x%x page %llu\n", *fetch_index, cur->offset);
+ __set_bit_le(cur->offset, bitmap);
+ dirty_ring_last_page = cur->offset;
+ dirty_gfn_set_collected(cur);
+ (*fetch_index)++;
+ count++;
+ }
+
+ return count;
+}
+
+static void dirty_ring_wait_vcpu(void)
+{
+ /* This makes sure that hardware PML cache flushed */
+ vcpu_kick();
+ sem_wait_until(&sem_vcpu_stop);
+}
+
+static void dirty_ring_continue_vcpu(void)
+{
+ pr_info("Notifying vcpu to continue\n");
+ sem_post(&sem_vcpu_cont);
+}
+
+static void dirty_ring_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx)
+{
+ uint32_t count = 0, cleared;
+ bool continued_vcpu = false;
+
+ dirty_ring_wait_vcpu();
+
+ if (!dirty_ring_vcpu_ring_full) {
+ /*
+ * This is not a ring-full event, it's safe to allow
+ * vcpu to continue
+ */
+ dirty_ring_continue_vcpu();
+ continued_vcpu = true;
+ }
+
+ /* Only have one vcpu */
+ count = dirty_ring_collect_one(vcpu_map_dirty_ring(vcpu),
+ slot, bitmap, num_pages,
+ ring_buf_idx);
+
+ cleared = kvm_vm_reset_dirty_ring(vcpu->vm);
+
+ /*
+ * Cleared pages should be the same as collected, as KVM is supposed to
+ * clear only the entries that have been harvested.
+ */
+ TEST_ASSERT(cleared == count, "Reset dirty pages (%u) mismatch "
+ "with collected (%u)", cleared, count);
+
+ if (!continued_vcpu) {
+ TEST_ASSERT(dirty_ring_vcpu_ring_full,
+ "Didn't continue vcpu even without ring full");
+ dirty_ring_continue_vcpu();
+ }
+
+ pr_info("Iteration %ld collected %u pages\n", iteration, count);
+}
+
+static void dirty_ring_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+{
+ struct kvm_run *run = vcpu->run;
+
+ /* A ucall-sync or ring-full event is allowed */
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC) {
+ /* We should allow this to continue */
+ ;
+ } else if (run->exit_reason == KVM_EXIT_DIRTY_RING_FULL ||
+ (ret == -1 && err == EINTR)) {
+ /* Update the flag first before pause */
+ WRITE_ONCE(dirty_ring_vcpu_ring_full,
+ run->exit_reason == KVM_EXIT_DIRTY_RING_FULL);
+ sem_post(&sem_vcpu_stop);
+ pr_info("vcpu stops because %s...\n",
+ dirty_ring_vcpu_ring_full ?
+ "dirty ring is full" : "vcpu is kicked out");
+ sem_wait_until(&sem_vcpu_cont);
+ pr_info("vcpu continues now.\n");
+ } else {
+ TEST_ASSERT(false, "Invalid guest sync status: "
+ "exit_reason=%s",
+ exit_reason_str(run->exit_reason));
+ }
+}
+
+struct log_mode {
+ const char *name;
+ /* Return true if this mode is supported, otherwise false */
+ bool (*supported)(void);
+ /* Hook when the vm creation is done (before vcpu creation) */
+ void (*create_vm_done)(struct kvm_vm *vm);
+ /* Hook to collect the dirty pages into the bitmap provided */
+ void (*collect_dirty_pages) (struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx);
+ /* Hook to call when after each vcpu run */
+ void (*after_vcpu_run)(struct kvm_vcpu *vcpu, int ret, int err);
+} log_modes[LOG_MODE_NUM] = {
+ {
+ .name = "dirty-log",
+ .collect_dirty_pages = dirty_log_collect_dirty_pages,
+ .after_vcpu_run = default_after_vcpu_run,
+ },
+ {
+ .name = "clear-log",
+ .supported = clear_log_supported,
+ .create_vm_done = clear_log_create_vm_done,
+ .collect_dirty_pages = clear_log_collect_dirty_pages,
+ .after_vcpu_run = default_after_vcpu_run,
+ },
+ {
+ .name = "dirty-ring",
+ .supported = dirty_ring_supported,
+ .create_vm_done = dirty_ring_create_vm_done,
+ .collect_dirty_pages = dirty_ring_collect_dirty_pages,
+ .after_vcpu_run = dirty_ring_after_vcpu_run,
+ },
+};
+
/*
* We use this bitmap to track some pages that should have its dirty
* bit set in the _next_ iteration. For example, if we detected the
@@ -137,6 +461,53 @@ static uint64_t host_track_next_count;
*/
static unsigned long *host_bmap_track;
+static void log_modes_dump(void)
+{
+ int i;
+
+ printf("all");
+ for (i = 0; i < LOG_MODE_NUM; i++)
+ printf(", %s", log_modes[i].name);
+ printf("\n");
+}
+
+static bool log_mode_supported(void)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->supported)
+ return mode->supported();
+
+ return true;
+}
+
+static void log_mode_create_vm_done(struct kvm_vm *vm)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->create_vm_done)
+ mode->create_vm_done(vm);
+}
+
+static void log_mode_collect_dirty_pages(struct kvm_vcpu *vcpu, int slot,
+ void *bitmap, uint32_t num_pages,
+ uint32_t *ring_buf_idx)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ TEST_ASSERT(mode->collect_dirty_pages != NULL,
+ "collect_dirty_pages() is required for any log mode!");
+ mode->collect_dirty_pages(vcpu, slot, bitmap, num_pages, ring_buf_idx);
+}
+
+static void log_mode_after_vcpu_run(struct kvm_vcpu *vcpu, int ret, int err)
+{
+ struct log_mode *mode = &log_modes[host_log_mode];
+
+ if (mode->after_vcpu_run)
+ mode->after_vcpu_run(vcpu, ret, err);
+}
+
static void generate_random_array(uint64_t *guest_array, uint64_t size)
{
uint64_t i;
@@ -148,28 +519,41 @@ static void generate_random_array(uint64_t *guest_array, uint64_t size)
static void *vcpu_worker(void *data)
{
int ret;
- struct kvm_vm *vm = data;
+ struct kvm_vcpu *vcpu = data;
+ struct kvm_vm *vm = vcpu->vm;
uint64_t *guest_array;
uint64_t pages_count = 0;
- struct kvm_run *run;
+ struct kvm_signal_mask *sigmask = alloca(offsetof(struct kvm_signal_mask, sigset)
+ + sizeof(sigset_t));
+ sigset_t *sigset = (sigset_t *) &sigmask->sigset;
- run = vcpu_state(vm, VCPU_ID);
+ /*
+ * SIG_IPI is unblocked atomically while in KVM_RUN. It causes the
+ * ioctl to return with -EINTR, but it is still pending and we need
+ * to accept it with the sigwait.
+ */
+ sigmask->len = 8;
+ pthread_sigmask(0, NULL, sigset);
+ sigdelset(sigset, SIG_IPI);
+ vcpu_ioctl(vcpu, KVM_SET_SIGNAL_MASK, sigmask);
+
+ sigemptyset(sigset);
+ sigaddset(sigset, SIG_IPI);
guest_array = addr_gva2hva(vm, (vm_vaddr_t)random_array);
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
while (!READ_ONCE(host_quit)) {
+ /* Clear any existing kick signals */
+ generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
+ pages_count += TEST_PAGES_PER_LOOP;
/* Let the guest dirty the random pages */
- ret = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(ret == 0, "vcpu_run failed: %d\n", ret);
- if (get_ucall(vm, VCPU_ID, NULL) == UCALL_SYNC) {
- pages_count += TEST_PAGES_PER_LOOP;
- generate_random_array(guest_array, TEST_PAGES_PER_LOOP);
- } else {
- TEST_FAIL("Invalid guest sync status: "
- "exit_reason=%s\n",
- exit_reason_str(run->exit_reason));
+ ret = __vcpu_run(vcpu);
+ if (ret == -1 && errno == EINTR) {
+ int sig = -1;
+ sigwait(sigset, &sig);
+ assert(sig == SIG_IPI);
}
+ log_mode_after_vcpu_run(vcpu, ret, errno);
}
pr_info("Dirtied %"PRIu64" pages\n", pages_count);
@@ -182,12 +566,13 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
uint64_t step = vm_num_host_pages(mode, 1);
uint64_t page;
uint64_t *value_ptr;
+ uint64_t min_iter = 0;
for (page = 0; page < host_num_pages; page += step) {
value_ptr = host_test_mem + page * host_page_size;
/* If this is a special page that we were tracking... */
- if (test_and_clear_bit_le(page, host_bmap_track)) {
+ if (__test_and_clear_bit_le(page, host_bmap_track)) {
host_track_next_count++;
TEST_ASSERT(test_bit_le(page, bmap),
"Page %"PRIu64" should have its dirty bit "
@@ -195,15 +580,65 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
page);
}
- if (test_bit_le(page, bmap)) {
+ if (__test_and_clear_bit_le(page, bmap)) {
+ bool matched;
+
host_dirty_count++;
+
/*
* If the bit is set, the value written onto
* the corresponding page should be either the
* previous iteration number or the current one.
*/
- TEST_ASSERT(*value_ptr == iteration ||
- *value_ptr == iteration - 1,
+ matched = (*value_ptr == iteration ||
+ *value_ptr == iteration - 1);
+
+ if (host_log_mode == LOG_MODE_DIRTY_RING && !matched) {
+ if (*value_ptr == iteration - 2 && min_iter <= iteration - 2) {
+ /*
+ * Short answer: this case is special
+ * only for dirty ring test where the
+ * page is the last page before a kvm
+ * dirty ring full in iteration N-2.
+ *
+ * Long answer: Assuming ring size R,
+ * one possible condition is:
+ *
+ * main thr vcpu thr
+ * -------- --------
+ * iter=1
+ * write 1 to page 0~(R-1)
+ * full, vmexit
+ * collect 0~(R-1)
+ * kick vcpu
+ * write 1 to (R-1)~(2R-2)
+ * full, vmexit
+ * iter=2
+ * collect (R-1)~(2R-2)
+ * kick vcpu
+ * write 1 to (2R-2)
+ * (NOTE!!! "1" cached in cpu reg)
+ * write 2 to (2R-1)~(3R-3)
+ * full, vmexit
+ * iter=3
+ * collect (2R-2)~(3R-3)
+ * (here if we read value on page
+ * "2R-2" is 1, while iter=3!!!)
+ *
+ * This however can only happen once per iteration.
+ */
+ min_iter = iteration - 1;
+ continue;
+ } else if (page == dirty_ring_last_page) {
+ /*
+ * Please refer to comments in
+ * dirty_ring_last_page.
+ */
+ continue;
+ }
+ }
+
+ TEST_ASSERT(matched,
"Set page %"PRIu64" value %"PRIu64
" incorrect (iteration=%"PRIu64")",
page, *value_ptr, iteration);
@@ -238,42 +673,46 @@ static void vm_dirty_log_verify(enum vm_guest_mode mode, unsigned long *bmap)
* should report its dirtyness in the
* next run
*/
- set_bit_le(page, host_bmap_track);
+ __set_bit_le(page, host_bmap_track);
}
}
}
}
-static struct kvm_vm *create_vm(enum vm_guest_mode mode, uint32_t vcpuid,
+static struct kvm_vm *create_vm(enum vm_guest_mode mode, struct kvm_vcpu **vcpu,
uint64_t extra_mem_pages, void *guest_code)
{
struct kvm_vm *vm;
- uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
- vm = _vm_create(mode, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
-#ifdef __x86_64__
- vm_create_irqchip(vm);
-#endif
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ vm = __vm_create(VM_SHAPE(mode), 1, extra_mem_pages);
+
+ log_mode_create_vm_done(vm);
+ *vcpu = vm_vcpu_add(vm, 0, guest_code);
return vm;
}
-#define DIRTY_MEM_BITS 30 /* 1G */
-#define PAGE_SHIFT_4K 12
-
-#ifdef USE_CLEAR_DIRTY_LOG
-static u64 dirty_log_manual_caps;
-#endif
+struct test_params {
+ unsigned long iterations;
+ unsigned long interval;
+ uint64_t phys_offset;
+};
-static void run_test(enum vm_guest_mode mode, unsigned long iterations,
- unsigned long interval, uint64_t phys_offset)
+static void run_test(enum vm_guest_mode mode, void *arg)
{
- pthread_t vcpu_thread;
+ struct test_params *p = arg;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
unsigned long *bmap;
+ uint32_t ring_buf_idx = 0;
+ int sem_val;
+
+ if (!log_mode_supported()) {
+ print_skip("Log mode '%s' not supported",
+ log_modes[host_log_mode].name);
+ return;
+ }
/*
* We reserve page table for 2 times of extra dirty mem which
@@ -283,47 +722,37 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
* (e.g., 64K page size guest will need even less memory for
* page tables).
*/
- vm = create_vm(mode, VCPU_ID,
- 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K),
- guest_code);
+ vm = create_vm(mode, &vcpu,
+ 2ul << (DIRTY_MEM_BITS - PAGE_SHIFT_4K), guest_code);
- guest_page_size = vm_get_page_size(vm);
+ guest_page_size = vm->page_size;
/*
* A little more than 1G of guest page sized pages. Cover the
* case where the size is not aligned to 64 pages.
*/
- guest_num_pages = (1ul << (DIRTY_MEM_BITS -
- vm_get_page_shift(vm))) + 3;
+ guest_num_pages = (1ul << (DIRTY_MEM_BITS - vm->page_shift)) + 3;
guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
host_page_size = getpagesize();
host_num_pages = vm_num_host_pages(mode, guest_num_pages);
- if (!phys_offset) {
- guest_test_phys_mem = (vm_get_max_gfn(vm) -
- guest_num_pages) * guest_page_size;
- guest_test_phys_mem &= ~(host_page_size - 1);
+ if (!p->phys_offset) {
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
+ guest_page_size;
+ guest_test_phys_mem = align_down(guest_test_phys_mem, host_page_size);
} else {
- guest_test_phys_mem = phys_offset;
+ guest_test_phys_mem = p->phys_offset;
}
#ifdef __s390x__
/* Align to 1M (segment size) */
- guest_test_phys_mem &= ~((1 << 20) - 1);
+ guest_test_phys_mem = align_down(guest_test_phys_mem, 1 << 20);
#endif
pr_info("guest physical test memory offset: 0x%lx\n", guest_test_phys_mem);
- bmap = bitmap_alloc(host_num_pages);
- host_bmap_track = bitmap_alloc(host_num_pages);
-
-#ifdef USE_CLEAR_DIRTY_LOG
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2;
- cap.args[0] = dirty_log_manual_caps;
- vm_enable_cap(vm, &cap);
-#endif
+ bmap = bitmap_zalloc(host_num_pages);
+ host_bmap_track = bitmap_zalloc(host_num_pages);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -333,16 +762,11 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
KVM_MEM_LOG_DIRTY_PAGES);
/* Do mapping for the dirty track memory slot */
- virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages, 0);
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
/* Cache the HVA pointer of the region */
host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
-#ifdef __x86_64__
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-#endif
- ucall_init(vm, NULL);
-
/* Export the shared variables to the guest */
sync_global_to_guest(vm, host_page_size);
sync_global_to_guest(vm, guest_page_size);
@@ -352,28 +776,62 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
/* Start the iterations */
iteration = 1;
sync_global_to_guest(vm, iteration);
- host_quit = false;
+ WRITE_ONCE(host_quit, false);
host_dirty_count = 0;
host_clear_count = 0;
host_track_next_count = 0;
+ WRITE_ONCE(dirty_ring_vcpu_ring_full, false);
- pthread_create(&vcpu_thread, NULL, vcpu_worker, vm);
+ /*
+ * Ensure the previous iteration didn't leave a dangling semaphore, i.e.
+ * that the main task and vCPU worker were synchronized and completed
+ * verification of all iterations.
+ */
+ sem_getvalue(&sem_vcpu_stop, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+ sem_getvalue(&sem_vcpu_cont, &sem_val);
+ TEST_ASSERT_EQ(sem_val, 0);
+
+ pthread_create(&vcpu_thread, NULL, vcpu_worker, vcpu);
- while (iteration < iterations) {
+ while (iteration < p->iterations) {
/* Give the vcpu thread some time to dirty some pages */
- usleep(interval * 1000);
- kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
-#ifdef USE_CLEAR_DIRTY_LOG
- kvm_vm_clear_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap, 0,
- host_num_pages);
-#endif
+ usleep(p->interval * 1000);
+ log_mode_collect_dirty_pages(vcpu, TEST_MEM_SLOT_INDEX,
+ bmap, host_num_pages,
+ &ring_buf_idx);
+
+ /*
+ * See vcpu_sync_stop_requested definition for details on why
+ * we need to stop vcpu when verify data.
+ */
+ atomic_set(&vcpu_sync_stop_requested, true);
+ sem_wait_until(&sem_vcpu_stop);
+ /*
+ * NOTE: for dirty ring, it's possible that we didn't stop at
+ * GUEST_SYNC but instead we stopped because ring is full;
+ * that's okay too because ring full means we're only missing
+ * the flush of the last page, and since we handle the last
+ * page specially verification will succeed anyway.
+ */
+ assert(host_log_mode == LOG_MODE_DIRTY_RING ||
+ atomic_read(&vcpu_sync_stop_requested) == false);
vm_dirty_log_verify(mode, bmap);
- iteration++;
+
+ /*
+ * Set host_quit before sem_vcpu_cont in the final iteration to
+ * ensure that the vCPU worker doesn't resume the guest. As
+ * above, the dirty ring test may stop and wait even when not
+ * explicitly request to do so, i.e. would hang waiting for a
+ * "continue" if it's allowed to resume the guest.
+ */
+ if (++iteration == p->iterations)
+ WRITE_ONCE(host_quit, true);
+
+ sem_post(&sem_vcpu_cont);
sync_global_to_guest(vm, iteration);
}
- /* Tell the vcpu thread to quit */
- host_quit = true;
pthread_join(vcpu_thread, NULL);
pr_info("Total bits checked: dirty (%"PRIu64"), clear (%"PRIu64"), "
@@ -382,109 +840,82 @@ static void run_test(enum vm_guest_mode mode, unsigned long iterations,
free(bmap);
free(host_bmap_track);
- ucall_uninit(vm);
kvm_vm_free(vm);
}
-struct guest_mode {
- bool supported;
- bool enabled;
-};
-static struct guest_mode guest_modes[NUM_VM_MODES];
-
-#define guest_mode_init(mode, supported, enabled) ({ \
- guest_modes[mode] = (struct guest_mode){ supported, enabled }; \
-})
-
static void help(char *name)
{
- int i;
-
puts("");
printf("usage: %s [-h] [-i iterations] [-I interval] "
"[-p offset] [-m mode]\n", name);
puts("");
+ printf(" -c: hint to dirty ring size, in number of entries\n");
+ printf(" (only useful for dirty-ring test; default: %"PRIu32")\n",
+ TEST_DIRTY_RING_COUNT);
printf(" -i: specify iteration counts (default: %"PRIu64")\n",
TEST_HOST_LOOP_N);
printf(" -I: specify interval in ms (default: %"PRIu64" ms)\n",
TEST_HOST_LOOP_INTERVAL);
printf(" -p: specify guest physical test memory offset\n"
" Warning: a low offset can conflict with the loaded test code.\n");
- printf(" -m: specify the guest mode ID to test "
- "(default: test all supported modes)\n"
- " This option may be used multiple times.\n"
- " Guest mode IDs:\n");
- for (i = 0; i < NUM_VM_MODES; ++i) {
- printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
- guest_modes[i].supported ? " (supported)" : "");
- }
+ printf(" -M: specify the host logging mode "
+ "(default: run all log modes). Supported modes: \n\t");
+ log_modes_dump();
+ guest_modes_help();
puts("");
exit(0);
}
int main(int argc, char *argv[])
{
- unsigned long iterations = TEST_HOST_LOOP_N;
- unsigned long interval = TEST_HOST_LOOP_INTERVAL;
- bool mode_selected = false;
- uint64_t phys_offset = 0;
- unsigned int mode;
+ struct test_params p = {
+ .iterations = TEST_HOST_LOOP_N,
+ .interval = TEST_HOST_LOOP_INTERVAL,
+ };
int opt, i;
+ sigset_t sigset;
-#ifdef USE_CLEAR_DIRTY_LOG
- dirty_log_manual_caps =
- kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
- if (!dirty_log_manual_caps) {
- print_skip("KVM_CLEAR_DIRTY_LOG not available");
- exit(KSFT_SKIP);
- }
- dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
- KVM_DIRTY_LOG_INITIALLY_SET);
-#endif
-
-#ifdef __x86_64__
- guest_mode_init(VM_MODE_PXXV48_4K, true, true);
-#endif
-#ifdef __aarch64__
- guest_mode_init(VM_MODE_P40V48_4K, true, true);
- guest_mode_init(VM_MODE_P40V48_64K, true, true);
-
- {
- unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ sem_init(&sem_vcpu_stop, 0, 0);
+ sem_init(&sem_vcpu_cont, 0, 0);
- if (limit >= 52)
- guest_mode_init(VM_MODE_P52V48_64K, true, true);
- if (limit >= 48) {
- guest_mode_init(VM_MODE_P48V48_4K, true, true);
- guest_mode_init(VM_MODE_P48V48_64K, true, true);
- }
- }
-#endif
-#ifdef __s390x__
- guest_mode_init(VM_MODE_P40V48_4K, true, true);
-#endif
+ guest_modes_append_default();
- while ((opt = getopt(argc, argv, "hi:I:p:m:")) != -1) {
+ while ((opt = getopt(argc, argv, "c:hi:I:p:m:M:")) != -1) {
switch (opt) {
+ case 'c':
+ test_dirty_ring_count = strtol(optarg, NULL, 10);
+ break;
case 'i':
- iterations = strtol(optarg, NULL, 10);
+ p.iterations = strtol(optarg, NULL, 10);
break;
case 'I':
- interval = strtol(optarg, NULL, 10);
+ p.interval = strtol(optarg, NULL, 10);
break;
case 'p':
- phys_offset = strtoull(optarg, NULL, 0);
+ p.phys_offset = strtoull(optarg, NULL, 0);
break;
case 'm':
- if (!mode_selected) {
- for (i = 0; i < NUM_VM_MODES; ++i)
- guest_modes[i].enabled = false;
- mode_selected = true;
+ guest_modes_cmdline(optarg);
+ break;
+ case 'M':
+ if (!strcmp(optarg, "all")) {
+ host_log_mode_option = LOG_MODE_ALL;
+ break;
+ }
+ for (i = 0; i < LOG_MODE_NUM; i++) {
+ if (!strcmp(optarg, log_modes[i].name)) {
+ pr_info("Setting log mode to: '%s'\n",
+ optarg);
+ host_log_mode_option = i;
+ break;
+ }
+ }
+ if (i == LOG_MODE_NUM) {
+ printf("Log mode '%s' invalid. Please choose "
+ "from: ", optarg);
+ log_modes_dump();
+ exit(1);
}
- mode = strtoul(optarg, NULL, 10);
- TEST_ASSERT(mode < NUM_VM_MODES,
- "Guest mode ID %d too big", mode);
- guest_modes[mode].enabled = true;
break;
case 'h':
default:
@@ -493,21 +924,29 @@ int main(int argc, char *argv[])
}
}
- TEST_ASSERT(iterations > 2, "Iterations must be greater than two");
- TEST_ASSERT(interval > 0, "Interval must be greater than zero");
+ TEST_ASSERT(p.iterations > 2, "Iterations must be greater than two");
+ TEST_ASSERT(p.interval > 0, "Interval must be greater than zero");
pr_info("Test iterations: %"PRIu64", interval: %"PRIu64" (ms)\n",
- iterations, interval);
+ p.iterations, p.interval);
srandom(time(0));
- for (i = 0; i < NUM_VM_MODES; ++i) {
- if (!guest_modes[i].enabled)
- continue;
- TEST_ASSERT(guest_modes[i].supported,
- "Guest mode ID %d (%s) not supported.",
- i, vm_guest_mode_string(i));
- run_test(i, iterations, interval, phys_offset);
+ /* Ensure that vCPU threads start with SIG_IPI blocked. */
+ sigemptyset(&sigset);
+ sigaddset(&sigset, SIG_IPI);
+ pthread_sigmask(SIG_BLOCK, &sigset, NULL);
+
+ if (host_log_mode_option == LOG_MODE_ALL) {
+ /* Run each log mode */
+ for (i = 0; i < LOG_MODE_NUM; i++) {
+ pr_info("Testing Log Mode '%s'\n", log_modes[i].name);
+ host_log_mode = i;
+ for_each_guest_mode(run_test, &p);
+ }
+ } else {
+ host_log_mode = host_log_mode_option;
+ for_each_guest_mode(run_test, &p);
}
return 0;
diff --git a/tools/testing/selftests/kvm/get-reg-list.c b/tools/testing/selftests/kvm/get-reg-list.c
new file mode 100644
index 000000000000..91f05f78e824
--- /dev/null
+++ b/tools/testing/selftests/kvm/get-reg-list.c
@@ -0,0 +1,402 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ *
+ * When attempting to migrate from a host with an older kernel to a host
+ * with a newer kernel we allow the newer kernel on the destination to
+ * list new registers with get-reg-list. We assume they'll be unused, at
+ * least until the guest reboots, and so they're relatively harmless.
+ * However, if the destination host with the newer kernel is missing
+ * registers which the source host with the older kernel has, then that's
+ * a regression in get-reg-list. This test checks for that regression by
+ * checking the current list against a blessed list. We should never have
+ * missing registers, but if new ones appear then they can probably be
+ * added to the blessed list. A completely new blessed list can be created
+ * by running the test with the --list command line argument.
+ *
+ * The blessed list should be created from the oldest possible kernel.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+static struct kvm_reg_list *reg_list;
+static __u64 *blessed_reg, blessed_n;
+
+extern struct vcpu_reg_list *vcpu_configs[];
+extern int vcpu_configs_n;
+
+#define for_each_reg(i) \
+ for ((i) = 0; (i) < reg_list->n; ++(i))
+
+#define for_each_reg_filtered(i) \
+ for_each_reg(i) \
+ if (!filter_reg(reg_list->reg[i]))
+
+#define for_each_missing_reg(i) \
+ for ((i) = 0; (i) < blessed_n; ++(i)) \
+ if (!find_reg(reg_list->reg, reg_list->n, blessed_reg[i])) \
+ if (check_supported_reg(vcpu, blessed_reg[i]))
+
+#define for_each_new_reg(i) \
+ for_each_reg_filtered(i) \
+ if (!find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+#define for_each_present_blessed_reg(i) \
+ for_each_reg(i) \
+ if (find_reg(blessed_reg, blessed_n, reg_list->reg[i]))
+
+static const char *config_name(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+ int len = 0;
+
+ if (c->name)
+ return c->name;
+
+ for_each_sublist(c, s)
+ len += strlen(s->name) + 1;
+
+ c->name = malloc(len);
+
+ len = 0;
+ for_each_sublist(c, s) {
+ if (!strcmp(s->name, "base"))
+ continue;
+ if (len)
+ c->name[len++] = '+';
+ strcpy(c->name + len, s->name);
+ len += strlen(s->name);
+ }
+ c->name[len] = '\0';
+
+ return c->name;
+}
+
+bool __weak check_supported_reg(struct kvm_vcpu *vcpu, __u64 reg)
+{
+ return true;
+}
+
+bool __weak filter_reg(__u64 reg)
+{
+ return false;
+}
+
+static bool find_reg(__u64 regs[], __u64 nr_regs, __u64 reg)
+{
+ int i;
+
+ for (i = 0; i < nr_regs; ++i)
+ if (reg == regs[i])
+ return true;
+ return false;
+}
+
+void __weak print_reg(const char *prefix, __u64 id)
+{
+ printf("\t0x%llx,\n", id);
+}
+
+bool __weak check_reject_set(int err)
+{
+ return true;
+}
+
+void __weak finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+}
+
+#ifdef __aarch64__
+static void prepare_vcpu_init(struct vcpu_reg_list *c, struct kvm_vcpu_init *init)
+{
+ struct vcpu_reg_sublist *s;
+
+ for_each_sublist(c, s)
+ if (s->capability)
+ init->features[s->feature / 32] |= 1 << (s->feature % 32);
+}
+
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ struct kvm_vcpu_init init = { .target = -1, };
+ struct kvm_vcpu *vcpu;
+
+ prepare_vcpu_init(c, &init);
+ vcpu = __vm_vcpu_add(vm, 0);
+ aarch64_vcpu_setup(vcpu, &init);
+
+ return vcpu;
+}
+#else
+static struct kvm_vcpu *vcpu_config_get_vcpu(struct vcpu_reg_list *c, struct kvm_vm *vm)
+{
+ return __vm_vcpu_add(vm, 0);
+}
+#endif
+
+static void check_supported(struct vcpu_reg_list *c)
+{
+ struct vcpu_reg_sublist *s;
+
+ for_each_sublist(c, s) {
+ if (!s->capability)
+ continue;
+
+ __TEST_REQUIRE(kvm_has_cap(s->capability),
+ "%s: %s not available, skipping tests",
+ config_name(c), s->name);
+ }
+}
+
+static bool print_list;
+static bool print_filtered;
+
+static void run_test(struct vcpu_reg_list *c)
+{
+ int new_regs = 0, missing_regs = 0, i, n;
+ int failed_get = 0, failed_set = 0, failed_reject = 0;
+ int skipped_set = 0;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct vcpu_reg_sublist *s;
+
+ check_supported(c);
+
+ vm = vm_create_barebones();
+ vcpu = vcpu_config_get_vcpu(c, vm);
+ finalize_vcpu(vcpu, c);
+
+ reg_list = vcpu_get_reg_list(vcpu);
+
+ if (print_list || print_filtered) {
+ putchar('\n');
+ for_each_reg(i) {
+ __u64 id = reg_list->reg[i];
+ if ((print_list && !filter_reg(id)) ||
+ (print_filtered && filter_reg(id)))
+ print_reg(config_name(c), id);
+ }
+ putchar('\n');
+ return;
+ }
+
+ for_each_sublist(c, s)
+ blessed_n += s->regs_n;
+ blessed_reg = calloc(blessed_n, sizeof(__u64));
+
+ n = 0;
+ for_each_sublist(c, s) {
+ for (i = 0; i < s->regs_n; ++i)
+ blessed_reg[n++] = s->regs[i];
+ }
+
+ /*
+ * We only test that we can get the register and then write back the
+ * same value. Some registers may allow other values to be written
+ * back, but others only allow some bits to be changed, and at least
+ * for ID registers set will fail if the value does not exactly match
+ * what was returned by get. If registers that allow other values to
+ * be written need to have the other values tested, then we should
+ * create a new set of tests for those in a new independent test
+ * executable.
+ *
+ * Only do the get/set tests on present, blessed list registers,
+ * since we don't know the capabilities of any new registers.
+ */
+ for_each_present_blessed_reg(i) {
+ uint8_t addr[2048 / 8];
+ struct kvm_one_reg reg = {
+ .id = reg_list->reg[i],
+ .addr = (__u64)&addr,
+ };
+ bool reject_reg = false, skip_reg = false;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, reg_list->reg[i], &addr);
+ if (ret) {
+ printf("%s: Failed to get ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_get;
+ }
+
+ for_each_sublist(c, s) {
+ /* rejects_set registers are rejected for set operation */
+ if (s->rejects_set && find_reg(s->rejects_set, s->rejects_set_n, reg.id)) {
+ reject_reg = true;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ if (ret != -1 || !check_reject_set(errno)) {
+ printf("%s: Failed to reject (ret=%d, errno=%d) ", config_name(c), ret, errno);
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_reject;
+ }
+ break;
+ }
+
+ /* skips_set registers are skipped for set operation */
+ if (s->skips_set && find_reg(s->skips_set, s->skips_set_n, reg.id)) {
+ skip_reg = true;
+ ++skipped_set;
+ break;
+ }
+ }
+
+ if (!reject_reg && !skip_reg) {
+ ret = __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+ if (ret) {
+ printf("%s: Failed to set ", config_name(c));
+ print_reg(config_name(c), reg.id);
+ putchar('\n');
+ ++failed_set;
+ }
+ }
+ }
+
+ for_each_new_reg(i)
+ ++new_regs;
+
+ for_each_missing_reg(i)
+ ++missing_regs;
+
+ if (new_regs || missing_regs) {
+ n = 0;
+ for_each_reg_filtered(i)
+ ++n;
+
+ printf("%s: Number blessed registers: %5lld\n", config_name(c), blessed_n);
+ printf("%s: Number registers: %5lld (includes %lld filtered registers)\n",
+ config_name(c), reg_list->n, reg_list->n - n);
+ }
+
+ if (new_regs) {
+ printf("\n%s: There are %d new registers.\n"
+ "Consider adding them to the blessed reg "
+ "list with the following lines:\n\n", config_name(c), new_regs);
+ for_each_new_reg(i)
+ print_reg(config_name(c), reg_list->reg[i]);
+ putchar('\n');
+ }
+
+ if (missing_regs) {
+ printf("\n%s: There are %d missing registers.\n"
+ "The following lines are missing registers:\n\n", config_name(c), missing_regs);
+ for_each_missing_reg(i)
+ print_reg(config_name(c), blessed_reg[i]);
+ putchar('\n');
+ }
+
+ TEST_ASSERT(!missing_regs && !failed_get && !failed_set && !failed_reject,
+ "%s: There are %d missing registers; %d registers failed get; "
+ "%d registers failed set; %d registers failed reject; %d registers skipped set",
+ config_name(c), missing_regs, failed_get, failed_set, failed_reject, skipped_set);
+
+ pr_info("%s: PASS\n", config_name(c));
+ blessed_n = 0;
+ free(blessed_reg);
+ free(reg_list);
+ kvm_vm_free(vm);
+}
+
+static void help(void)
+{
+ struct vcpu_reg_list *c;
+ int i;
+
+ printf(
+ "\n"
+ "usage: get-reg-list [--config=<selection>] [--list] [--list-filtered]\n\n"
+ " --config=<selection> Used to select a specific vcpu configuration for the test/listing\n"
+ " '<selection>' may be\n");
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ printf(
+ " '%s'\n", config_name(c));
+ }
+
+ printf(
+ "\n"
+ " --list Print the register list rather than test it (requires --config)\n"
+ " --list-filtered Print registers that would normally be filtered out (requires --config)\n"
+ "\n"
+ );
+}
+
+static struct vcpu_reg_list *parse_config(const char *config)
+{
+ struct vcpu_reg_list *c = NULL;
+ int i;
+
+ if (config[8] != '=')
+ help(), exit(1);
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (strcmp(config_name(c), &config[9]) == 0)
+ break;
+ }
+
+ if (i == vcpu_configs_n)
+ help(), exit(1);
+
+ return c;
+}
+
+int main(int ac, char **av)
+{
+ struct vcpu_reg_list *c, *sel = NULL;
+ int i, ret = 0;
+ pid_t pid;
+
+ for (i = 1; i < ac; ++i) {
+ if (strncmp(av[i], "--config", 8) == 0)
+ sel = parse_config(av[i]);
+ else if (strcmp(av[i], "--list") == 0)
+ print_list = true;
+ else if (strcmp(av[i], "--list-filtered") == 0)
+ print_filtered = true;
+ else if (strcmp(av[i], "--help") == 0 || strcmp(av[1], "-h") == 0)
+ help(), exit(0);
+ else
+ help(), exit(1);
+ }
+
+ if (print_list || print_filtered) {
+ /*
+ * We only want to print the register list of a single config.
+ */
+ if (!sel)
+ help(), exit(1);
+ }
+
+ for (i = 0; i < vcpu_configs_n; ++i) {
+ c = vcpu_configs[i];
+ if (sel && c != sel)
+ continue;
+
+ pid = fork();
+
+ if (!pid) {
+ run_test(c);
+ exit(0);
+ } else {
+ int wstatus;
+ pid_t wpid = wait(&wstatus);
+ TEST_ASSERT(wpid == pid && WIFEXITED(wstatus), "wait: Unexpected return");
+ if (WEXITSTATUS(wstatus) && WEXITSTATUS(wstatus) != KSFT_SKIP)
+ ret = KSFT_FAIL;
+ }
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/kvm/guest_memfd_test.c b/tools/testing/selftests/kvm/guest_memfd_test.c
new file mode 100644
index 000000000000..92eae206baa6
--- /dev/null
+++ b/tools/testing/selftests/kvm/guest_memfd_test.c
@@ -0,0 +1,201 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright Intel Corporation, 2023
+ *
+ * Author: Chao Peng <chao.p.peng@linux.intel.com>
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+#include <fcntl.h>
+
+#include <linux/bitmap.h>
+#include <linux/falloc.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+#include "test_util.h"
+#include "kvm_util_base.h"
+
+static void test_file_read_write(int fd)
+{
+ char buf[64];
+
+ TEST_ASSERT(read(fd, buf, sizeof(buf)) < 0,
+ "read on a guest_mem fd should fail");
+ TEST_ASSERT(write(fd, buf, sizeof(buf)) < 0,
+ "write on a guest_mem fd should fail");
+ TEST_ASSERT(pread(fd, buf, sizeof(buf), 0) < 0,
+ "pread on a guest_mem fd should fail");
+ TEST_ASSERT(pwrite(fd, buf, sizeof(buf), 0) < 0,
+ "pwrite on a guest_mem fd should fail");
+}
+
+static void test_mmap(int fd, size_t page_size)
+{
+ char *mem;
+
+ mem = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT_EQ(mem, MAP_FAILED);
+}
+
+static void test_file_size(int fd, size_t page_size, size_t total_size)
+{
+ struct stat sb;
+ int ret;
+
+ ret = fstat(fd, &sb);
+ TEST_ASSERT(!ret, "fstat should succeed");
+ TEST_ASSERT_EQ(sb.st_size, total_size);
+ TEST_ASSERT_EQ(sb.st_blksize, page_size);
+}
+
+static void test_fallocate(int fd, size_t page_size, size_t total_size)
+{
+ int ret;
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, 0, total_size);
+ TEST_ASSERT(!ret, "fallocate with aligned offset and size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size - 1, page_size);
+ TEST_ASSERT(ret, "fallocate with unaligned offset should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size, page_size);
+ TEST_ASSERT(ret, "fallocate beginning at total_size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, total_size + page_size, page_size);
+ TEST_ASSERT(ret, "fallocate beginning after total_size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ total_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) at total_size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ total_size + page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) after total_size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size, page_size - 1);
+ TEST_ASSERT(ret, "fallocate with unaligned size should fail");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate(PUNCH_HOLE) with aligned offset and size should succeed");
+
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE, page_size, page_size);
+ TEST_ASSERT(!ret, "fallocate to restore punched hole should succeed");
+}
+
+static void test_invalid_punch_hole(int fd, size_t page_size, size_t total_size)
+{
+ struct {
+ off_t offset;
+ off_t len;
+ } testcases[] = {
+ {0, 1},
+ {0, page_size - 1},
+ {0, page_size + 1},
+
+ {1, 1},
+ {1, page_size - 1},
+ {1, page_size},
+ {1, page_size + 1},
+
+ {page_size, 1},
+ {page_size, page_size - 1},
+ {page_size, page_size + 1},
+ };
+ int ret, i;
+
+ for (i = 0; i < ARRAY_SIZE(testcases); i++) {
+ ret = fallocate(fd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE,
+ testcases[i].offset, testcases[i].len);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "PUNCH_HOLE with !PAGE_SIZE offset (%lx) and/or length (%lx) should fail",
+ testcases[i].offset, testcases[i].len);
+ }
+}
+
+static void test_create_guest_memfd_invalid(struct kvm_vm *vm)
+{
+ size_t page_size = getpagesize();
+ uint64_t flag;
+ size_t size;
+ int fd;
+
+ for (size = 1; size < page_size; size++) {
+ fd = __vm_create_guest_memfd(vm, size, 0);
+ TEST_ASSERT(fd == -1 && errno == EINVAL,
+ "guest_memfd() with non-page-aligned page size '0x%lx' should fail with EINVAL",
+ size);
+ }
+
+ for (flag = 0; flag; flag <<= 1) {
+ fd = __vm_create_guest_memfd(vm, page_size, flag);
+ TEST_ASSERT(fd == -1 && errno == EINVAL,
+ "guest_memfd() with flag '0x%lx' should fail with EINVAL",
+ flag);
+ }
+}
+
+static void test_create_guest_memfd_multiple(struct kvm_vm *vm)
+{
+ int fd1, fd2, ret;
+ struct stat st1, st2;
+
+ fd1 = __vm_create_guest_memfd(vm, 4096, 0);
+ TEST_ASSERT(fd1 != -1, "memfd creation should succeed");
+
+ ret = fstat(fd1, &st1);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st1.st_size == 4096, "memfd st_size should match requested size");
+
+ fd2 = __vm_create_guest_memfd(vm, 8192, 0);
+ TEST_ASSERT(fd2 != -1, "memfd creation should succeed");
+
+ ret = fstat(fd2, &st2);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st2.st_size == 8192, "second memfd st_size should match requested size");
+
+ ret = fstat(fd1, &st1);
+ TEST_ASSERT(ret != -1, "memfd fstat should succeed");
+ TEST_ASSERT(st1.st_size == 4096, "first memfd st_size should still match requested size");
+ TEST_ASSERT(st1.st_ino != st2.st_ino, "different memfd should have different inode numbers");
+
+ close(fd2);
+ close(fd1);
+}
+
+int main(int argc, char *argv[])
+{
+ size_t page_size;
+ size_t total_size;
+ int fd;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GUEST_MEMFD));
+
+ page_size = getpagesize();
+ total_size = page_size * 4;
+
+ vm = vm_create_barebones();
+
+ test_create_guest_memfd_invalid(vm);
+ test_create_guest_memfd_multiple(vm);
+
+ fd = vm_create_guest_memfd(vm, total_size, 0);
+
+ test_file_read_write(fd);
+ test_mmap(fd, page_size);
+ test_file_size(fd, page_size, total_size);
+ test_fallocate(fd, page_size, total_size);
+ test_invalid_punch_hole(fd, page_size, total_size);
+
+ close(fd);
+}
diff --git a/tools/testing/selftests/kvm/guest_print_test.c b/tools/testing/selftests/kvm/guest_print_test.c
new file mode 100644
index 000000000000..3502caa3590c
--- /dev/null
+++ b/tools/testing/selftests/kvm/guest_print_test.c
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test for GUEST_PRINTF
+ *
+ * Copyright 2022, Google, Inc. and/or its affiliates.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct guest_vals {
+ uint64_t a;
+ uint64_t b;
+ uint64_t type;
+};
+
+static struct guest_vals vals;
+
+/* GUEST_PRINTF()/GUEST_ASSERT_FMT() does not support float or double. */
+#define TYPE_LIST \
+TYPE(test_type_i64, I64, "%ld", int64_t) \
+TYPE(test_type_u64, U64u, "%lu", uint64_t) \
+TYPE(test_type_x64, U64x, "0x%lx", uint64_t) \
+TYPE(test_type_X64, U64X, "0x%lX", uint64_t) \
+TYPE(test_type_u32, U32u, "%u", uint32_t) \
+TYPE(test_type_x32, U32x, "0x%x", uint32_t) \
+TYPE(test_type_X32, U32X, "0x%X", uint32_t) \
+TYPE(test_type_int, INT, "%d", int) \
+TYPE(test_type_char, CHAR, "%c", char) \
+TYPE(test_type_str, STR, "'%s'", const char *) \
+TYPE(test_type_ptr, PTR, "%p", uintptr_t)
+
+enum args_type {
+#define TYPE(fn, ext, fmt_t, T) TYPE_##ext,
+ TYPE_LIST
+#undef TYPE
+};
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert);
+
+#define BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T) \
+const char *PRINTF_FMT_##ext = "Got params a = " fmt_t " and b = " fmt_t; \
+const char *ASSERT_FMT_##ext = "Expected " fmt_t ", got " fmt_t " instead"; \
+static void fn(struct kvm_vcpu *vcpu, T a, T b) \
+{ \
+ char expected_printf[UCALL_BUFFER_LEN]; \
+ char expected_assert[UCALL_BUFFER_LEN]; \
+ \
+ snprintf(expected_printf, UCALL_BUFFER_LEN, PRINTF_FMT_##ext, a, b); \
+ snprintf(expected_assert, UCALL_BUFFER_LEN, ASSERT_FMT_##ext, a, b); \
+ vals = (struct guest_vals){ (uint64_t)a, (uint64_t)b, TYPE_##ext }; \
+ sync_global_to_guest(vcpu->vm, vals); \
+ run_test(vcpu, expected_printf, expected_assert); \
+}
+
+#define TYPE(fn, ext, fmt_t, T) \
+ BUILD_TYPE_STRINGS_AND_HELPER(fn, ext, fmt_t, T)
+ TYPE_LIST
+#undef TYPE
+
+static void guest_code(void)
+{
+ while (1) {
+ switch (vals.type) {
+#define TYPE(fn, ext, fmt_t, T) \
+ case TYPE_##ext: \
+ GUEST_PRINTF(PRINTF_FMT_##ext, vals.a, vals.b); \
+ __GUEST_ASSERT(vals.a == vals.b, \
+ ASSERT_FMT_##ext, vals.a, vals.b); \
+ break;
+ TYPE_LIST
+#undef TYPE
+ default:
+ GUEST_SYNC(vals.type);
+ }
+
+ GUEST_DONE();
+ }
+}
+
+/*
+ * Unfortunately this gets a little messy because 'assert_msg' doesn't
+ * just contains the matching string, it also contains additional assert
+ * info. Fortunately the part that matches should be at the very end of
+ * 'assert_msg'.
+ */
+static void ucall_abort(const char *assert_msg, const char *expected_assert_msg)
+{
+ int len_str = strlen(assert_msg);
+ int len_substr = strlen(expected_assert_msg);
+ int offset = len_str - len_substr;
+
+ TEST_ASSERT(len_substr <= len_str,
+ "Expected '%s' to be a substring of '%s'",
+ assert_msg, expected_assert_msg);
+
+ TEST_ASSERT(strcmp(&assert_msg[offset], expected_assert_msg) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_assert_msg, &assert_msg[offset]);
+}
+
+static void run_test(struct kvm_vcpu *vcpu, const char *expected_printf,
+ const char *expected_assert)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_FAIL("Unknown 'args_type' = %lu", uc.args[1]);
+ break;
+ case UCALL_PRINTF:
+ TEST_ASSERT(strcmp(uc.buffer, expected_printf) == 0,
+ "Unexpected mismatch. Expected: '%s', got: '%s'",
+ expected_printf, uc.buffer);
+ break;
+ case UCALL_ABORT:
+ ucall_abort(uc.buffer, expected_assert);
+ break;
+ case UCALL_DONE:
+ return;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+}
+
+static void guest_code_limits(void)
+{
+ char test_str[UCALL_BUFFER_LEN + 10];
+
+ memset(test_str, 'a', sizeof(test_str));
+ test_str[sizeof(test_str) - 1] = 0;
+
+ GUEST_PRINTF("%s", test_str);
+}
+
+static void test_limits(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code_limits);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == UCALL_EXIT_REASON,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_ABORT,
+ "Unexpected ucall command: %lu, Expected: %u (UCALL_ABORT)",
+ uc.cmd, UCALL_ABORT);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ test_type_i64(vcpu, -1, -1);
+ test_type_i64(vcpu, -1, 1);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_i64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_u64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_x64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_X64(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_u32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_x32(vcpu, 0x90abcdef, 0x90abcdee);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdef);
+ test_type_X32(vcpu, 0x90abcdef, 0x90abcdee);
+
+ test_type_int(vcpu, -1, -1);
+ test_type_int(vcpu, -1, 1);
+ test_type_int(vcpu, 1, 1);
+
+ test_type_char(vcpu, 'a', 'a');
+ test_type_char(vcpu, 'a', 'A');
+ test_type_char(vcpu, 'a', 'b');
+
+ test_type_str(vcpu, "foo", "foo");
+ test_type_str(vcpu, "foo", "bar");
+
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdef);
+ test_type_ptr(vcpu, 0x1234567890abcdef, 0x1234567890abcdee);
+
+ kvm_vm_free(vm);
+
+ test_limits();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644
index 000000000000..decc521fc760
--- /dev/null
+++ b/tools/testing/selftests/kvm/hardware_disable_test.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+#define GUEST_CODE_PIO_PORT 4
+
+sem_t *sem;
+
+static void guest_code(void)
+{
+ for (;;)
+ ; /* Some busy work */
+ printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+ struct kvm_vcpu *vcpu = arg;
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(false, "%s: exited with reason %d: %s",
+ __func__, run->exit_reason,
+ exit_reason_str(run->exit_reason));
+ pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+ int fd;
+
+ while (true) {
+ fd = open("/dev/null", O_RDWR);
+ close(fd);
+ }
+ TEST_ASSERT(false, "%s: exited", __func__);
+ pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+ void *(*f)(void *), void *arg)
+{
+ int r;
+
+ r = pthread_create(thread, attr, f, arg);
+ TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+ int r;
+
+ r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+ TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+ int r;
+
+ r = pthread_join(thread, retval);
+ TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ cpu_set_t cpu_set;
+ pthread_t threads[VCPU_NUM];
+ pthread_t throw_away;
+ void *b;
+ uint32_t i, j;
+
+ CPU_ZERO(&cpu_set);
+ for (i = 0; i < VCPU_NUM; i++)
+ CPU_SET(i, &cpu_set);
+
+ vm = vm_create(VCPU_NUM);
+
+ pr_debug("%s: [%d] start vcpus\n", __func__, run);
+ for (i = 0; i < VCPU_NUM; ++i) {
+ vcpu = vm_vcpu_add(vm, i, guest_code);
+
+ check_create_thread(&threads[i], NULL, run_vcpu, vcpu);
+ check_set_affinity(threads[i], &cpu_set);
+
+ for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+ check_create_thread(&throw_away, NULL, sleeping_thread,
+ (void *)NULL);
+ check_set_affinity(throw_away, &cpu_set);
+ }
+ }
+ pr_debug("%s: [%d] all threads launched\n", __func__, run);
+ sem_post(sem);
+ for (i = 0; i < VCPU_NUM; ++i)
+ check_join(threads[i], &b);
+ /* Should not be reached */
+ TEST_ASSERT(false, "%s: [%d] child escaped the ninja", __func__, run);
+}
+
+void wait_for_child_setup(pid_t pid)
+{
+ /*
+ * Wait for the child to post to the semaphore, but wake up periodically
+ * to check if the child exited prematurely.
+ */
+ for (;;) {
+ const struct timespec wait_period = { .tv_sec = 1 };
+ int status;
+
+ if (!sem_timedwait(sem, &wait_period))
+ return;
+
+ /* Child is still running, keep waiting. */
+ if (pid != waitpid(pid, &status, WNOHANG))
+ continue;
+
+ /*
+ * Child is no longer running, which is not expected.
+ *
+ * If it exited with a non-zero status, we explicitly forward
+ * the child's status in case it exited with KSFT_SKIP.
+ */
+ if (WIFEXITED(status))
+ exit(WEXITSTATUS(status));
+ else
+ TEST_ASSERT(false, "Child exited unexpectedly");
+ }
+}
+
+int main(int argc, char **argv)
+{
+ uint32_t i;
+ int s, r;
+ pid_t pid;
+
+ sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+ sem_unlink("vm_sem");
+
+ for (i = 0; i < FORK_NUM; ++i) {
+ pid = fork();
+ TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+ if (pid == 0)
+ run_test(i); /* This function always exits */
+
+ pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
+ wait_for_child_setup(pid);
+ r = (rand() % DELAY_US_MAX) + 1;
+ pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
+ usleep(r);
+ r = waitpid(pid, &s, WNOHANG);
+ TEST_ASSERT(r != pid,
+ "%s: [%d] child exited unexpectedly status: [%d]",
+ __func__, i, s);
+ pr_debug("%s: [%d] killing child\n", __func__, i);
+ kill(pid, SIGKILL);
+ }
+
+ sem_destroy(sem);
+ exit(0);
+}
diff --git a/tools/testing/selftests/kvm/include/aarch64/arch_timer.h b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
new file mode 100644
index 000000000000..b3e97525cb55
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/arch_timer.h
@@ -0,0 +1,142 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Timer specific interface
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include "processor.h"
+
+enum arch_timer {
+ VIRTUAL,
+ PHYSICAL,
+};
+
+#define CTL_ENABLE (1 << 0)
+#define CTL_IMASK (1 << 1)
+#define CTL_ISTATUS (1 << 2)
+
+#define msec_to_cycles(msec) \
+ (timer_get_cntfrq() * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ (timer_get_cntfrq() * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / timer_get_cntfrq())
+
+static inline uint32_t timer_get_cntfrq(void)
+{
+ return read_sysreg(cntfrq_el0);
+}
+
+static inline uint64_t timer_get_cntct(enum arch_timer timer)
+{
+ isb();
+
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntvct_el0);
+ case PHYSICAL:
+ return read_sysreg(cntpct_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_cval(enum arch_timer timer, uint64_t cval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(cval, cntv_cval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(cval, cntp_cval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint64_t timer_get_cval(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_cval_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_cval_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_tval(enum arch_timer timer, uint32_t tval)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(tval, cntv_tval_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(tval, cntp_tval_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline void timer_set_ctl(enum arch_timer timer, uint32_t ctl)
+{
+ switch (timer) {
+ case VIRTUAL:
+ write_sysreg(ctl, cntv_ctl_el0);
+ break;
+ case PHYSICAL:
+ write_sysreg(ctl, cntp_ctl_el0);
+ break;
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ isb();
+}
+
+static inline uint32_t timer_get_ctl(enum arch_timer timer)
+{
+ switch (timer) {
+ case VIRTUAL:
+ return read_sysreg(cntv_ctl_el0);
+ case PHYSICAL:
+ return read_sysreg(cntp_ctl_el0);
+ default:
+ GUEST_FAIL("Unexpected timer type = %u", timer);
+ }
+
+ /* We should not reach here */
+ return 0;
+}
+
+static inline void timer_set_next_cval_ms(enum arch_timer timer, uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cntct(timer);
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cval(timer, next_ct);
+}
+
+static inline void timer_set_next_tval_ms(enum arch_timer timer, uint32_t msec)
+{
+ timer_set_tval(timer, msec_to_cycles(msec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/delay.h b/tools/testing/selftests/kvm/include/aarch64/delay.h
new file mode 100644
index 000000000000..329e4f5079ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/delay.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM simple delay routines
+ */
+
+#ifndef SELFTEST_KVM_ARM_DELAY_H
+#define SELFTEST_KVM_ARM_DELAY_H
+
+#include "arch_timer.h"
+
+static inline void __delay(uint64_t cycles)
+{
+ enum arch_timer timer = VIRTUAL;
+ uint64_t start = timer_get_cntct(timer);
+
+ while ((timer_get_cntct(timer) - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARM_DELAY_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic.h b/tools/testing/selftests/kvm/include/aarch64/gic.h
new file mode 100644
index 000000000000..b217ea17cac5
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) specific defines
+ */
+
+#ifndef SELFTEST_KVM_GIC_H
+#define SELFTEST_KVM_GIC_H
+
+enum gic_type {
+ GIC_V3,
+ GIC_TYPE_MAX,
+};
+
+#define MIN_SGI 0
+#define MIN_PPI 16
+#define MIN_SPI 32
+#define MAX_SPI 1019
+#define IAR_SPURIOUS 1023
+
+#define INTID_IS_SGI(intid) (0 <= (intid) && (intid) < MIN_PPI)
+#define INTID_IS_PPI(intid) (MIN_PPI <= (intid) && (intid) < MIN_SPI)
+#define INTID_IS_SPI(intid) (MIN_SPI <= (intid) && (intid) <= MAX_SPI)
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+ void *dist_base, void *redist_base);
+void gic_irq_enable(unsigned int intid);
+void gic_irq_disable(unsigned int intid);
+unsigned int gic_get_and_ack_irq(void);
+void gic_set_eoi(unsigned int intid);
+void gic_set_dir(unsigned int intid);
+
+/*
+ * Sets the EOI mode. When split is false, EOI just drops the priority. When
+ * split is true, EOI drops the priority and deactivates the interrupt.
+ */
+void gic_set_eoi_split(bool split);
+void gic_set_priority_mask(uint64_t mask);
+void gic_set_priority(uint32_t intid, uint32_t prio);
+void gic_irq_set_active(unsigned int intid);
+void gic_irq_clear_active(unsigned int intid);
+bool gic_irq_get_active(unsigned int intid);
+void gic_irq_set_pending(unsigned int intid);
+void gic_irq_clear_pending(unsigned int intid);
+bool gic_irq_get_pending(unsigned int intid);
+void gic_irq_set_config(unsigned int intid, bool is_edge);
+
+#endif /* SELFTEST_KVM_GIC_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/gic_v3.h b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
new file mode 100644
index 000000000000..ba0886e8a2bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/gic_v3.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 specific defines
+ */
+
+#ifndef SELFTEST_KVM_GICV3_H
+#define SELFTEST_KVM_GICV3_H
+
+#include <asm/sysreg.h>
+
+/*
+ * Distributor registers
+ */
+#define GICD_CTLR 0x0000
+#define GICD_TYPER 0x0004
+#define GICD_IGROUPR 0x0080
+#define GICD_ISENABLER 0x0100
+#define GICD_ICENABLER 0x0180
+#define GICD_ISPENDR 0x0200
+#define GICD_ICPENDR 0x0280
+#define GICD_ICACTIVER 0x0380
+#define GICD_ISACTIVER 0x0300
+#define GICD_IPRIORITYR 0x0400
+#define GICD_ICFGR 0x0C00
+
+/*
+ * The assumption is that the guest runs in a non-secure mode.
+ * The following bits of GICD_CTLR are defined accordingly.
+ */
+#define GICD_CTLR_RWP (1U << 31)
+#define GICD_CTLR_nASSGIreq (1U << 8)
+#define GICD_CTLR_ARE_NS (1U << 4)
+#define GICD_CTLR_ENABLE_G1A (1U << 1)
+#define GICD_CTLR_ENABLE_G1 (1U << 0)
+
+#define GICD_TYPER_SPIS(typer) ((((typer) & 0x1f) + 1) * 32)
+#define GICD_INT_DEF_PRI_X4 0xa0a0a0a0
+
+/*
+ * Redistributor registers
+ */
+#define GICR_CTLR 0x000
+#define GICR_WAKER 0x014
+
+#define GICR_CTLR_RWP (1U << 3)
+
+#define GICR_WAKER_ProcessorSleep (1U << 1)
+#define GICR_WAKER_ChildrenAsleep (1U << 2)
+
+/*
+ * Redistributor registers, offsets from SGI base
+ */
+#define GICR_IGROUPR0 GICD_IGROUPR
+#define GICR_ISENABLER0 GICD_ISENABLER
+#define GICR_ICENABLER0 GICD_ICENABLER
+#define GICR_ISPENDR0 GICD_ISPENDR
+#define GICR_ISACTIVER0 GICD_ISACTIVER
+#define GICR_ICACTIVER0 GICD_ICACTIVER
+#define GICR_ICENABLER GICD_ICENABLER
+#define GICR_ICACTIVER GICD_ICACTIVER
+#define GICR_IPRIORITYR0 GICD_IPRIORITYR
+
+/* CPU interface registers */
+#define SYS_ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0)
+#define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0)
+#define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1)
+#define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1)
+#define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4)
+#define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5)
+#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7)
+
+#define SYS_ICV_AP1R0_EL1 sys_reg(3, 0, 12, 9, 0)
+
+#define ICC_PMR_DEF_PRIO 0xf0
+
+#define ICC_SRE_EL1_SRE (1U << 0)
+
+#define ICC_IGRPEN1_EL1_ENABLE (1U << 0)
+
+#define GICV3_MAX_CPUS 512
+
+#endif /* SELFTEST_KVM_GICV3_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/aarch64/processor.h b/tools/testing/selftests/kvm/include/aarch64/processor.h
index b7fa0c8551db..9e518b562827 100644
--- a/tools/testing/selftests/kvm/include/aarch64/processor.h
+++ b/tools/testing/selftests/kvm/include/aarch64/processor.h
@@ -8,16 +8,25 @@
#define SELFTEST_KVM_PROCESSOR_H
#include "kvm_util.h"
+#include <linux/stringify.h>
+#include <linux/types.h>
+#include <asm/sysreg.h>
#define ARM64_CORE_REG(x) (KVM_REG_ARM64 | KVM_REG_SIZE_U64 | \
KVM_REG_ARM_CORE | KVM_REG_ARM_CORE_REG(x))
-#define CPACR_EL1 3, 0, 1, 0, 2
-#define TCR_EL1 3, 0, 2, 0, 2
-#define MAIR_EL1 3, 0, 10, 2, 0
-#define TTBR0_EL1 3, 0, 2, 0, 0
-#define SCTLR_EL1 3, 0, 1, 0, 0
+/*
+ * KVM_ARM64_SYS_REG(sys_reg_id): Helper macro to convert
+ * SYS_* register definitions in asm/sysreg.h to use in KVM
+ * calls such as vcpu_get_reg() and vcpu_set_reg().
+ */
+#define KVM_ARM64_SYS_REG(sys_reg_id) \
+ ARM64_SYS_REG(sys_reg_Op0(sys_reg_id), \
+ sys_reg_Op1(sys_reg_id), \
+ sys_reg_CRn(sys_reg_id), \
+ sys_reg_CRm(sys_reg_id), \
+ sys_reg_Op2(sys_reg_id))
/*
* Default MAIR
@@ -29,31 +38,192 @@
* NORMAL 4 1111:1111
* NORMAL_WT 5 1011:1011
*/
-#define DEFAULT_MAIR_EL1 ((0x00ul << (0 * 8)) | \
- (0x04ul << (1 * 8)) | \
- (0x0cul << (2 * 8)) | \
- (0x44ul << (3 * 8)) | \
- (0xfful << (4 * 8)) | \
- (0xbbul << (5 * 8)))
-
-static inline void get_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t *addr)
+
+/* Linux doesn't use these memory types, so let's define them. */
+#define MAIR_ATTR_DEVICE_GRE UL(0x0c)
+#define MAIR_ATTR_NORMAL_WT UL(0xbb)
+
+#define MT_DEVICE_nGnRnE 0
+#define MT_DEVICE_nGnRE 1
+#define MT_DEVICE_GRE 2
+#define MT_NORMAL_NC 3
+#define MT_NORMAL 4
+#define MT_NORMAL_WT 5
+
+#define DEFAULT_MAIR_EL1 \
+ (MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRnE, MT_DEVICE_nGnRnE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_nGnRE, MT_DEVICE_nGnRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_DEVICE_GRE, MT_DEVICE_GRE) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_NC, MT_NORMAL_NC) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL, MT_NORMAL) | \
+ MAIR_ATTRIDX(MAIR_ATTR_NORMAL_WT, MT_NORMAL_WT))
+
+#define MPIDR_HWID_BITMASK (0xff00fffffful)
+
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init);
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code);
+
+struct ex_regs {
+ u64 regs[31];
+ u64 sp;
+ u64 pc;
+ u64 pstate;
+};
+
+#define VECTOR_NUM 16
+
+enum {
+ VECTOR_SYNC_CURRENT_SP0,
+ VECTOR_IRQ_CURRENT_SP0,
+ VECTOR_FIQ_CURRENT_SP0,
+ VECTOR_ERROR_CURRENT_SP0,
+
+ VECTOR_SYNC_CURRENT,
+ VECTOR_IRQ_CURRENT,
+ VECTOR_FIQ_CURRENT,
+ VECTOR_ERROR_CURRENT,
+
+ VECTOR_SYNC_LOWER_64,
+ VECTOR_IRQ_LOWER_64,
+ VECTOR_FIQ_LOWER_64,
+ VECTOR_ERROR_LOWER_64,
+
+ VECTOR_SYNC_LOWER_32,
+ VECTOR_IRQ_LOWER_32,
+ VECTOR_FIQ_LOWER_32,
+ VECTOR_ERROR_LOWER_32,
+};
+
+#define VECTOR_IS_SYNC(v) ((v) == VECTOR_SYNC_CURRENT_SP0 || \
+ (v) == VECTOR_SYNC_CURRENT || \
+ (v) == VECTOR_SYNC_LOWER_64 || \
+ (v) == VECTOR_SYNC_LOWER_32)
+
+#define ESR_EC_NUM 64
+#define ESR_EC_SHIFT 26
+#define ESR_EC_MASK (ESR_EC_NUM - 1)
+
+#define ESR_EC_UNKNOWN 0x0
+#define ESR_EC_SVC64 0x15
+#define ESR_EC_IABT 0x21
+#define ESR_EC_DABT 0x25
+#define ESR_EC_HW_BP_CURRENT 0x31
+#define ESR_EC_SSTEP_CURRENT 0x33
+#define ESR_EC_WP_CURRENT 0x35
+#define ESR_EC_BRK_INS 0x3c
+
+/* Access flag */
+#define PTE_AF (1ULL << 10)
+
+/* Access flag update enable/disable */
+#define TCR_EL1_HA (1ULL << 39)
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k);
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+
+typedef void(*handler_fn)(struct ex_regs *);
+void vm_install_exception_handler(struct kvm_vm *vm,
+ int vector, handler_fn handler);
+void vm_install_sync_handler(struct kvm_vm *vm,
+ int vector, int ec, handler_fn handler);
+
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline void cpu_relax(void)
{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)addr;
- vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, &reg);
+ asm volatile("yield" ::: "memory");
}
-static inline void set_reg(struct kvm_vm *vm, uint32_t vcpuid, uint64_t id, uint64_t val)
+#define isb() asm volatile("isb" : : : "memory")
+#define dsb(opt) asm volatile("dsb " #opt : : : "memory")
+#define dmb(opt) asm volatile("dmb " #opt : : : "memory")
+
+#define dma_wmb() dmb(oshst)
+#define __iowmb() dma_wmb()
+
+#define dma_rmb() dmb(oshld)
+
+#define __iormb(v) \
+({ \
+ unsigned long tmp; \
+ \
+ dma_rmb(); \
+ \
+ /* \
+ * Courtesy of arch/arm64/include/asm/io.h: \
+ * Create a dummy control dependency from the IO read to any \
+ * later instructions. This ensures that a subsequent call \
+ * to udelay() will be ordered due to the ISB in __delay(). \
+ */ \
+ asm volatile("eor %0, %1, %1\n" \
+ "cbnz %0, ." \
+ : "=r" (tmp) : "r" ((unsigned long)(v)) \
+ : "memory"); \
+})
+
+static __always_inline void __raw_writel(u32 val, volatile void *addr)
+{
+ asm volatile("str %w0, [%1]" : : "rZ" (val), "r" (addr));
+}
+
+static __always_inline u32 __raw_readl(const volatile void *addr)
+{
+ u32 val;
+ asm volatile("ldr %w0, [%1]" : "=r" (val) : "r" (addr));
+ return val;
+}
+
+#define writel_relaxed(v,c) ((void)__raw_writel((__force u32)cpu_to_le32(v),(c)))
+#define readl_relaxed(c) ({ u32 __r = le32_to_cpu((__force __le32)__raw_readl(c)); __r; })
+
+#define writel(v,c) ({ __iowmb(); writel_relaxed((v),(c));})
+#define readl(c) ({ u32 __v = readl_relaxed(c); __iormb(__v); __v; })
+
+static inline void local_irq_enable(void)
+{
+ asm volatile("msr daifclr, #3" : : : "memory");
+}
+
+static inline void local_irq_disable(void)
{
- struct kvm_one_reg reg;
- reg.id = id;
- reg.addr = (uint64_t)&val;
- vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, &reg);
+ asm volatile("msr daifset, #3" : : : "memory");
}
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init);
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code);
+/**
+ * struct arm_smccc_res - Result from SMC/HVC call
+ * @a0-a3 result values from registers 0 to 3
+ */
+struct arm_smccc_res {
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+};
+
+/**
+ * smccc_hvc - Invoke a SMCCC function using the hvc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
+
+/**
+ * smccc_smc - Invoke a SMCCC function using the smc conduit
+ * @function_id: the SMCCC function to be called
+ * @arg0-arg6: SMCCC function arguments, corresponding to registers x1-x7
+ * @res: pointer to write the return values from registers x0-x3
+ *
+ */
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res);
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/spinlock.h b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
new file mode 100644
index 000000000000..cf0984106d14
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/spinlock.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef SELFTEST_KVM_ARM64_SPINLOCK_H
+#define SELFTEST_KVM_ARM64_SPINLOCK_H
+
+struct spinlock {
+ int v;
+};
+
+extern void spin_lock(struct spinlock *lock);
+extern void spin_unlock(struct spinlock *lock);
+
+#endif /* SELFTEST_KVM_ARM64_SPINLOCK_H */
diff --git a/tools/testing/selftests/kvm/include/aarch64/ucall.h b/tools/testing/selftests/kvm/include/aarch64/ucall.h
new file mode 100644
index 000000000000..4b68f37efd36
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_MMIO
+
+/*
+ * ucall_exit_mmio_addr holds per-VM values (global data is duplicated by each
+ * VM), it must not be accessed from host code.
+ */
+extern vm_vaddr_t *ucall_exit_mmio_addr;
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ WRITE_ONCE(*ucall_exit_mmio_addr, uc);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/aarch64/vgic.h b/tools/testing/selftests/kvm/include/aarch64/vgic.h
new file mode 100644
index 000000000000..0ac6f05c63f9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/aarch64/vgic.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) host specific defines
+ */
+
+#ifndef SELFTEST_KVM_VGIC_H
+#define SELFTEST_KVM_VGIC_H
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+
+#define REDIST_REGION_ATTR_ADDR(count, base, flags, index) \
+ (((uint64_t)(count) << 52) | \
+ ((uint64_t)((base) >> 16) << 16) | \
+ ((uint64_t)(flags) << 12) | \
+ index)
+
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
+ uint64_t gicd_base_gpa, uint64_t gicr_base_gpa);
+
+#define VGIC_MAX_RESERVED 1023
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level);
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level);
+
+/* The vcpu arg only applies to private interrupts. */
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu);
+
+#define KVM_IRQCHIP_NUM_PINS (1020 - 32)
+
+#endif // SELFTEST_KVM_VGIC_H
diff --git a/tools/testing/selftests/kvm/include/guest_modes.h b/tools/testing/selftests/kvm/include/guest_modes.h
new file mode 100644
index 000000000000..63f5167397cc
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/guest_modes.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "kvm_util.h"
+
+struct guest_mode {
+ bool supported;
+ bool enabled;
+};
+
+extern struct guest_mode guest_modes[NUM_VM_MODES];
+
+#define guest_mode_append(mode, enabled) ({ \
+ guest_modes[mode] = (struct guest_mode){ (enabled), (enabled) }; \
+})
+
+void guest_modes_append_default(void);
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg);
+void guest_modes_help(void);
+void guest_modes_cmdline(const char *arg);
diff --git a/tools/testing/selftests/kvm/include/kvm_test_harness.h b/tools/testing/selftests/kvm/include/kvm_test_harness.h
new file mode 100644
index 000000000000..8f7c6858e8e2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_test_harness.h
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Macros for defining a KVM test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_TEST_HARNESS_H
+#define SELFTEST_KVM_TEST_HARNESS_H
+
+#include "kselftest_harness.h"
+
+#define KVM_ONE_VCPU_TEST_SUITE(name) \
+ FIXTURE(name) { \
+ struct kvm_vcpu *vcpu; \
+ }; \
+ \
+ FIXTURE_SETUP(name) { \
+ (void)vm_create_with_one_vcpu(&self->vcpu, NULL); \
+ } \
+ \
+ FIXTURE_TEARDOWN(name) { \
+ kvm_vm_free(self->vcpu->vm); \
+ }
+
+#define KVM_ONE_VCPU_TEST(suite, test, guestcode) \
+static void __suite##_##test(struct kvm_vcpu *vcpu); \
+ \
+TEST_F(suite, test) \
+{ \
+ vcpu_arch_set_entry_point(self->vcpu, guestcode); \
+ __suite##_##test(self->vcpu); \
+} \
+static void __suite##_##test(struct kvm_vcpu *vcpu)
+
+#endif /* SELFTEST_KVM_TEST_HARNESS_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h
index 919e161dd289..c9286811a4cb 100644
--- a/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@ -7,337 +7,7 @@
#ifndef SELFTEST_KVM_UTIL_H
#define SELFTEST_KVM_UTIL_H
-#include "test_util.h"
-
-#include "asm/kvm.h"
-#include "linux/list.h"
-#include "linux/kvm.h"
-#include <sys/ioctl.h>
-
-#include "sparsebit.h"
-
-
-/*
- * Callers of kvm_util only have an incomplete/opaque description of the
- * structure kvm_util is using to maintain the state of a VM.
- */
-struct kvm_vm;
-
-typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
-typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
-
-/* Minimum allocated guest virtual and physical addresses */
-#define KVM_UTIL_MIN_VADDR 0x2000
-
-#define DEFAULT_GUEST_PHY_PAGES 512
-#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
-#define DEFAULT_STACK_PGS 5
-
-enum vm_guest_mode {
- VM_MODE_P52V48_4K,
- VM_MODE_P52V48_64K,
- VM_MODE_P48V48_4K,
- VM_MODE_P48V48_64K,
- VM_MODE_P40V48_4K,
- VM_MODE_P40V48_64K,
- VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
- NUM_VM_MODES,
-};
-
-#if defined(__aarch64__)
-#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
-#elif defined(__x86_64__)
-#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
-#else
-#define VM_MODE_DEFAULT VM_MODE_P52V48_4K
-#endif
-
-#define vm_guest_mode_string(m) vm_guest_mode_string[m]
-extern const char * const vm_guest_mode_string[];
-
-enum vm_mem_backing_src_type {
- VM_MEM_SRC_ANONYMOUS,
- VM_MEM_SRC_ANONYMOUS_THP,
- VM_MEM_SRC_ANONYMOUS_HUGETLB,
-};
-
-int kvm_check_cap(long cap);
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap);
-
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
-void kvm_vm_free(struct kvm_vm *vmp);
-void kvm_vm_restart(struct kvm_vm *vmp, int perm);
-void kvm_vm_release(struct kvm_vm *vmp);
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log);
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages);
-
-int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
- size_t len);
-
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot);
-
-void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * VM VCPU Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the current state of the VCPU specified by @vcpuid, within the VM
- * given by @vm, to the FILE stream given by @stream.
- */
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid,
- uint8_t indent);
-
-void vm_create_irqchip(struct kvm_vm *vm);
-
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags);
-
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
- void *arg);
-void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
-void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
-void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
-void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid);
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot);
-void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot);
-void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
-void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
-vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
-
-/*
- * Address Guest Virtual to Guest Physical
- *
- * Input Args:
- * vm - Virtual Machine
- * gva - VM virtual address
- *
- * Output Args: None
- *
- * Return:
- * Equivalent VM physical address
- *
- * Returns the VM physical address of the translated VM virtual
- * address given by @gva.
- */
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
-
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug);
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state);
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs);
-
-/*
- * VM VCPU Args Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * num - number of arguments
- * ... - arguments, each of type uint64_t
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the first @num function input registers of the VCPU with @vcpuid,
- * per the C calling convention of the architecture, to the values given
- * as variable args. Each of the variable args is expected to be of type
- * uint64_t. The maximum @num can be is specific to the architecture.
- */
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...);
-
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_sregs *sregs);
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_fpu *fpu);
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg);
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events);
-#endif
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state);
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error);
-#endif
-
-const char *exit_reason_str(unsigned int exit_reason);
-
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot);
-
-/*
- * VM Virtual Page Map
- *
- * Input Args:
- * vm - Virtual Machine
- * vaddr - VM Virtual Address
- * paddr - VM Physical Address
- * memslot - Memory region slot for new virtual translation tables
- *
- * Output Args: None
- *
- * Return: None
- *
- * Within @vm, creates a virtual translation for the page starting
- * at @vaddr to the page starting at @paddr.
- */
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t memslot);
-
-vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
- uint32_t memslot);
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot);
-
-/*
- * Create a VM with reasonable defaults
- *
- * Input Args:
- * vcpuid - The id of the single VCPU to add to the VM.
- * extra_mem_pages - The number of extra pages to add (this will
- * decide how much extra space we will need to
- * setup the page tables using memslot 0)
- * guest_code - The vCPU's entry point
- *
- * Output Args: None
- *
- * Return:
- * Pointer to opaque structure that describes the created VM.
- */
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code);
-
-/*
- * Adds a vCPU with reasonable defaults (e.g. a stack)
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - The id of the VCPU to add to the VM.
- * guest_code - The vCPU's entry point
- */
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code);
-
-bool vm_is_unrestricted_guest(struct kvm_vm *vm);
-
-unsigned int vm_get_page_size(struct kvm_vm *vm);
-unsigned int vm_get_page_shift(struct kvm_vm *vm);
-unsigned int vm_get_max_gfn(struct kvm_vm *vm);
-int vm_get_fd(struct kvm_vm *vm);
-
-unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
-unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
-unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
-static inline unsigned int
-vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
-{
- unsigned int n;
- n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
-#ifdef __s390x__
- /* s390 requires 1M aligned guest sizes */
- n = (n + 255) & ~255;
-#endif
- return n;
-}
-
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end);
-
-struct kvm_dirty_log *
-allocate_kvm_dirty_log(struct kvm_userspace_memory_region *region);
-
-int vm_create_device(struct kvm_vm *vm, struct kvm_create_device *cd);
-
-#define sync_global_to_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(_p, &(g), sizeof(g)); \
-})
-
-#define sync_global_from_guest(vm, g) ({ \
- typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
- memcpy(&(g), _p, sizeof(g)); \
-})
-
-/* Common ucalls */
-enum {
- UCALL_NONE,
- UCALL_SYNC,
- UCALL_ABORT,
- UCALL_DONE,
-};
-
-#define UCALL_MAX_ARGS 6
-
-struct ucall {
- uint64_t cmd;
- uint64_t args[UCALL_MAX_ARGS];
-};
-
-void ucall_init(struct kvm_vm *vm, void *arg);
-void ucall_uninit(struct kvm_vm *vm);
-void ucall(uint64_t cmd, int nargs, ...);
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc);
-
-#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
- ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
-#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
-#define GUEST_DONE() ucall(UCALL_DONE, 0)
-#define __GUEST_ASSERT(_condition, _nargs, _args...) do { \
- if (!(_condition)) \
- ucall(UCALL_ABORT, 2 + _nargs, \
- "Failed guest assert: " \
- #_condition, __LINE__, _args); \
-} while (0)
-
-#define GUEST_ASSERT(_condition) \
- __GUEST_ASSERT((_condition), 0, 0)
-
-#define GUEST_ASSERT_1(_condition, arg1) \
- __GUEST_ASSERT((_condition), 1, (arg1))
-
-#define GUEST_ASSERT_2(_condition, arg1, arg2) \
- __GUEST_ASSERT((_condition), 2, (arg1), (arg2))
-
-#define GUEST_ASSERT_3(_condition, arg1, arg2, arg3) \
- __GUEST_ASSERT((_condition), 3, (arg1), (arg2), (arg3))
-
-#define GUEST_ASSERT_4(_condition, arg1, arg2, arg3, arg4) \
- __GUEST_ASSERT((_condition), 4, (arg1), (arg2), (arg3), (arg4))
+#include "kvm_util_base.h"
+#include "ucall_common.h"
#endif /* SELFTEST_KVM_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/kvm_util_base.h b/tools/testing/selftests/kvm/include/kvm_util_base.h
new file mode 100644
index 000000000000..3e0db283a46a
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/kvm_util_base.h
@@ -0,0 +1,1135 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/kvm_util_base.h
+ *
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UTIL_BASE_H
+#define SELFTEST_KVM_UTIL_BASE_H
+
+#include "test_util.h"
+
+#include <linux/compiler.h>
+#include "linux/hashtable.h"
+#include "linux/list.h"
+#include <linux/kernel.h>
+#include <linux/kvm.h>
+#include "linux/rbtree.h"
+#include <linux/types.h>
+
+#include <asm/atomic.h>
+#include <asm/kvm.h>
+
+#include <sys/ioctl.h>
+
+#include "kvm_util_arch.h"
+#include "sparsebit.h"
+
+/*
+ * Provide a version of static_assert() that is guaranteed to have an optional
+ * message param. If _ISOC11_SOURCE is defined, glibc (/usr/include/assert.h)
+ * #undefs and #defines static_assert() as a direct alias to _Static_assert(),
+ * i.e. effectively makes the message mandatory. Many KVM selftests #define
+ * _GNU_SOURCE for various reasons, and _GNU_SOURCE implies _ISOC11_SOURCE. As
+ * a result, static_assert() behavior is non-deterministic and may or may not
+ * require a message depending on #include order.
+ */
+#define __kvm_static_assert(expr, msg, ...) _Static_assert(expr, msg)
+#define kvm_static_assert(expr, ...) __kvm_static_assert(expr, ##__VA_ARGS__, #expr)
+
+#define KVM_DEV_PATH "/dev/kvm"
+#define KVM_MAX_VCPUS 512
+
+#define NSEC_PER_SEC 1000000000L
+
+typedef uint64_t vm_paddr_t; /* Virtual Machine (Guest) physical address */
+typedef uint64_t vm_vaddr_t; /* Virtual Machine (Guest) virtual address */
+
+struct userspace_mem_region {
+ struct kvm_userspace_memory_region2 region;
+ struct sparsebit *unused_phy_pages;
+ struct sparsebit *protected_phy_pages;
+ int fd;
+ off_t offset;
+ enum vm_mem_backing_src_type backing_src_type;
+ void *host_mem;
+ void *host_alias;
+ void *mmap_start;
+ void *mmap_alias;
+ size_t mmap_size;
+ struct rb_node gpa_node;
+ struct rb_node hva_node;
+ struct hlist_node slot_node;
+};
+
+struct kvm_vcpu {
+ struct list_head list;
+ uint32_t id;
+ int fd;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+#ifdef __x86_64__
+ struct kvm_cpuid2 *cpuid;
+#endif
+ struct kvm_dirty_gfn *dirty_gfns;
+ uint32_t fetch_index;
+ uint32_t dirty_gfns_count;
+};
+
+struct userspace_mem_regions {
+ struct rb_root gpa_tree;
+ struct rb_root hva_tree;
+ DECLARE_HASHTABLE(slot_hash, 9);
+};
+
+enum kvm_mem_region_type {
+ MEM_REGION_CODE,
+ MEM_REGION_DATA,
+ MEM_REGION_PT,
+ MEM_REGION_TEST_DATA,
+ NR_MEM_REGIONS,
+};
+
+struct kvm_vm {
+ int mode;
+ unsigned long type;
+ uint8_t subtype;
+ int kvm_fd;
+ int fd;
+ unsigned int pgtable_levels;
+ unsigned int page_size;
+ unsigned int page_shift;
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ uint64_t max_gfn;
+ struct list_head vcpus;
+ struct userspace_mem_regions regions;
+ struct sparsebit *vpages_valid;
+ struct sparsebit *vpages_mapped;
+ bool has_irqchip;
+ bool pgd_created;
+ vm_paddr_t ucall_mmio_addr;
+ vm_paddr_t pgd;
+ vm_vaddr_t gdt;
+ vm_vaddr_t tss;
+ vm_vaddr_t idt;
+ vm_vaddr_t handlers;
+ uint32_t dirty_ring_size;
+ uint64_t gpa_tag_mask;
+
+ struct kvm_vm_arch arch;
+
+ /* Cache of information for binary stats interface */
+ int stats_fd;
+ struct kvm_stats_header stats_header;
+ struct kvm_stats_desc *stats_desc;
+
+ /*
+ * KVM region slots. These are the default memslots used by page
+ * allocators, e.g., lib/elf uses the memslots[MEM_REGION_CODE]
+ * memslot.
+ */
+ uint32_t memslots[NR_MEM_REGIONS];
+};
+
+struct vcpu_reg_sublist {
+ const char *name;
+ long capability;
+ int feature;
+ int feature_type;
+ bool finalize;
+ __u64 *regs;
+ __u64 regs_n;
+ __u64 *rejects_set;
+ __u64 rejects_set_n;
+ __u64 *skips_set;
+ __u64 skips_set_n;
+};
+
+struct vcpu_reg_list {
+ char *name;
+ struct vcpu_reg_sublist sublists[];
+};
+
+#define for_each_sublist(c, s) \
+ for ((s) = &(c)->sublists[0]; (s)->regs; ++(s))
+
+#define kvm_for_each_vcpu(vm, i, vcpu) \
+ for ((i) = 0; (i) <= (vm)->last_vcpu_id; (i)++) \
+ if (!((vcpu) = vm->vcpus[i])) \
+ continue; \
+ else
+
+struct userspace_mem_region *
+memslot2region(struct kvm_vm *vm, uint32_t memslot);
+
+static inline struct userspace_mem_region *vm_get_mem_region(struct kvm_vm *vm,
+ enum kvm_mem_region_type type)
+{
+ assert(type < NR_MEM_REGIONS);
+ return memslot2region(vm, vm->memslots[type]);
+}
+
+/* Minimum allocated guest virtual and physical addresses */
+#define KVM_UTIL_MIN_VADDR 0x2000
+#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
+
+#define DEFAULT_GUEST_STACK_VADDR_MIN 0xab6000
+#define DEFAULT_STACK_PGS 5
+
+enum vm_guest_mode {
+ VM_MODE_P52V48_4K,
+ VM_MODE_P52V48_16K,
+ VM_MODE_P52V48_64K,
+ VM_MODE_P48V48_4K,
+ VM_MODE_P48V48_16K,
+ VM_MODE_P48V48_64K,
+ VM_MODE_P40V48_4K,
+ VM_MODE_P40V48_16K,
+ VM_MODE_P40V48_64K,
+ VM_MODE_PXXV48_4K, /* For 48bits VA but ANY bits PA */
+ VM_MODE_P47V64_4K,
+ VM_MODE_P44V64_4K,
+ VM_MODE_P36V48_4K,
+ VM_MODE_P36V48_16K,
+ VM_MODE_P36V48_64K,
+ VM_MODE_P36V47_16K,
+ NUM_VM_MODES,
+};
+
+struct vm_shape {
+ uint32_t type;
+ uint8_t mode;
+ uint8_t subtype;
+ uint16_t padding;
+};
+
+kvm_static_assert(sizeof(struct vm_shape) == sizeof(uint64_t));
+
+#define VM_TYPE_DEFAULT 0
+
+#define VM_SHAPE(__mode) \
+({ \
+ struct vm_shape shape = { \
+ .mode = (__mode), \
+ .type = VM_TYPE_DEFAULT \
+ }; \
+ \
+ shape; \
+})
+
+#if defined(__aarch64__)
+
+extern enum vm_guest_mode vm_mode_default;
+
+#define VM_MODE_DEFAULT vm_mode_default
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__x86_64__)
+
+#define VM_MODE_DEFAULT VM_MODE_PXXV48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#elif defined(__s390x__)
+
+#define VM_MODE_DEFAULT VM_MODE_P44V64_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 16)
+
+#elif defined(__riscv)
+
+#if __riscv_xlen == 32
+#error "RISC-V 32-bit kvm selftests not supported"
+#endif
+
+#define VM_MODE_DEFAULT VM_MODE_P40V48_4K
+#define MIN_PAGE_SHIFT 12U
+#define ptes_per_page(page_size) ((page_size) / 8)
+
+#endif
+
+#define VM_SHAPE_DEFAULT VM_SHAPE(VM_MODE_DEFAULT)
+
+#define MIN_PAGE_SIZE (1U << MIN_PAGE_SHIFT)
+#define PTES_PER_MIN_PAGE ptes_per_page(MIN_PAGE_SIZE)
+
+struct vm_guest_mode_params {
+ unsigned int pa_bits;
+ unsigned int va_bits;
+ unsigned int page_size;
+ unsigned int page_shift;
+};
+extern const struct vm_guest_mode_params vm_guest_mode_params[];
+
+int open_path_or_exit(const char *path, int flags);
+int open_kvm_dev_path_or_exit(void);
+
+bool get_kvm_param_bool(const char *param);
+bool get_kvm_intel_param_bool(const char *param);
+bool get_kvm_amd_param_bool(const char *param);
+
+int get_kvm_param_integer(const char *param);
+int get_kvm_intel_param_integer(const char *param);
+int get_kvm_amd_param_integer(const char *param);
+
+unsigned int kvm_check_cap(long cap);
+
+static inline bool kvm_has_cap(long cap)
+{
+ return kvm_check_cap(cap);
+}
+
+#define __KVM_SYSCALL_ERROR(_name, _ret) \
+ "%s failed, rc: %i errno: %i (%s)", (_name), (_ret), errno, strerror(errno)
+
+/*
+ * Use the "inner", double-underscore macro when reporting errors from within
+ * other macros so that the name of ioctl() and not its literal numeric value
+ * is printed on error. The "outer" macro is strongly preferred when reporting
+ * errors "directly", i.e. without an additional layer of macros, as it reduces
+ * the probability of passing in the wrong string.
+ */
+#define __KVM_IOCTL_ERROR(_name, _ret) __KVM_SYSCALL_ERROR(_name, _ret)
+#define KVM_IOCTL_ERROR(_ioctl, _ret) __KVM_IOCTL_ERROR(#_ioctl, _ret)
+
+#define kvm_do_ioctl(fd, cmd, arg) \
+({ \
+ kvm_static_assert(!_IOC_SIZE(cmd) || sizeof(*arg) == _IOC_SIZE(cmd)); \
+ ioctl(fd, cmd, arg); \
+})
+
+#define __kvm_ioctl(kvm_fd, cmd, arg) \
+ kvm_do_ioctl(kvm_fd, cmd, arg)
+
+#define kvm_ioctl(kvm_fd, cmd, arg) \
+({ \
+ int ret = __kvm_ioctl(kvm_fd, cmd, arg); \
+ \
+ TEST_ASSERT(!ret, __KVM_IOCTL_ERROR(#cmd, ret)); \
+})
+
+static __always_inline void static_assert_is_vm(struct kvm_vm *vm) { }
+
+#define __vm_ioctl(vm, cmd, arg) \
+({ \
+ static_assert_is_vm(vm); \
+ kvm_do_ioctl((vm)->fd, cmd, arg); \
+})
+
+/*
+ * Assert that a VM or vCPU ioctl() succeeded, with extra magic to detect if
+ * the ioctl() failed because KVM killed/bugged the VM. To detect a dead VM,
+ * probe KVM_CAP_USER_MEMORY, which (a) has been supported by KVM since before
+ * selftests existed and (b) should never outright fail, i.e. is supposed to
+ * return 0 or 1. If KVM kills a VM, KVM returns -EIO for all ioctl()s for the
+ * VM and its vCPUs, including KVM_CHECK_EXTENSION.
+ */
+#define __TEST_ASSERT_VM_VCPU_IOCTL(cond, name, ret, vm) \
+do { \
+ int __errno = errno; \
+ \
+ static_assert_is_vm(vm); \
+ \
+ if (cond) \
+ break; \
+ \
+ if (errno == EIO && \
+ __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)KVM_CAP_USER_MEMORY) < 0) { \
+ TEST_ASSERT(errno == EIO, "KVM killed the VM, should return -EIO"); \
+ TEST_FAIL("KVM killed/bugged the VM, check the kernel log for clues"); \
+ } \
+ errno = __errno; \
+ TEST_ASSERT(cond, __KVM_IOCTL_ERROR(name, ret)); \
+} while (0)
+
+#define TEST_ASSERT_VM_VCPU_IOCTL(cond, cmd, ret, vm) \
+ __TEST_ASSERT_VM_VCPU_IOCTL(cond, #cmd, ret, vm)
+
+#define vm_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+static __always_inline void static_assert_is_vcpu(struct kvm_vcpu *vcpu) { }
+
+#define __vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ static_assert_is_vcpu(vcpu); \
+ kvm_do_ioctl((vcpu)->fd, cmd, arg); \
+})
+
+#define vcpu_ioctl(vcpu, cmd, arg) \
+({ \
+ int ret = __vcpu_ioctl(vcpu, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, (vcpu)->vm); \
+})
+
+/*
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
+ */
+static inline int vm_check_cap(struct kvm_vm *vm, long cap)
+{
+ int ret = __vm_ioctl(vm, KVM_CHECK_EXTENSION, (void *)cap);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(ret >= 0, KVM_CHECK_EXTENSION, ret, vm);
+ return ret;
+}
+
+static inline int __vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ return __vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+static inline void vm_enable_cap(struct kvm_vm *vm, uint32_t cap, uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vm_ioctl(vm, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vm_set_memory_attributes(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size, uint64_t attributes)
+{
+ struct kvm_memory_attributes attr = {
+ .attributes = attributes,
+ .address = gpa,
+ .size = size,
+ .flags = 0,
+ };
+
+ /*
+ * KVM_SET_MEMORY_ATTRIBUTES overwrites _all_ attributes. These flows
+ * need significant enhancements to support multiple attributes.
+ */
+ TEST_ASSERT(!attributes || attributes == KVM_MEMORY_ATTRIBUTE_PRIVATE,
+ "Update me to support multiple attributes!");
+
+ vm_ioctl(vm, KVM_SET_MEMORY_ATTRIBUTES, &attr);
+}
+
+
+static inline void vm_mem_set_private(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, KVM_MEMORY_ATTRIBUTE_PRIVATE);
+}
+
+static inline void vm_mem_set_shared(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_set_memory_attributes(vm, gpa, size, 0);
+}
+
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t gpa, uint64_t size,
+ bool punch_hole);
+
+static inline void vm_guest_mem_punch_hole(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, true);
+}
+
+static inline void vm_guest_mem_allocate(struct kvm_vm *vm, uint64_t gpa,
+ uint64_t size)
+{
+ vm_guest_mem_fallocate(vm, gpa, size, false);
+}
+
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+const char *vm_guest_mode_string(uint32_t i);
+
+void kvm_vm_free(struct kvm_vm *vmp);
+void kvm_vm_restart(struct kvm_vm *vmp);
+void kvm_vm_release(struct kvm_vm *vmp);
+int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, const vm_vaddr_t gva,
+ size_t len);
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename);
+int kvm_memfd_alloc(size_t size, bool hugepages);
+
+void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+{
+ struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
+
+ vm_ioctl(vm, KVM_GET_DIRTY_LOG, &args);
+}
+
+static inline void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
+ uint64_t first_page, uint32_t num_pages)
+{
+ struct kvm_clear_dirty_log args = {
+ .dirty_bitmap = log,
+ .slot = slot,
+ .first_page = first_page,
+ .num_pages = num_pages
+ };
+
+ vm_ioctl(vm, KVM_CLEAR_DIRTY_LOG, &args);
+}
+
+static inline uint32_t kvm_vm_reset_dirty_ring(struct kvm_vm *vm)
+{
+ return __vm_ioctl(vm, KVM_RESET_DIRTY_RINGS, NULL);
+}
+
+static inline int vm_get_stats_fd(struct kvm_vm *vm)
+{
+ int fd = __vm_ioctl(vm, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_GET_STATS_FD, fd, vm);
+ return fd;
+}
+
+static inline void read_stats_header(int stats_fd, struct kvm_stats_header *header)
+{
+ ssize_t ret;
+
+ ret = pread(stats_fd, header, sizeof(*header), 0);
+ TEST_ASSERT(ret == sizeof(*header),
+ "Failed to read '%lu' header bytes, ret = '%ld'",
+ sizeof(*header), ret);
+}
+
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header);
+
+static inline ssize_t get_stats_descriptor_size(struct kvm_stats_header *header)
+{
+ /*
+ * The base size of the descriptor is defined by KVM's ABI, but the
+ * size of the name field is variable, as far as KVM's ABI is
+ * concerned. For a given instance of KVM, the name field is the same
+ * size for all stats and is provided in the overall stats header.
+ */
+ return sizeof(struct kvm_stats_desc) + header->name_size;
+}
+
+static inline struct kvm_stats_desc *get_stats_descriptor(struct kvm_stats_desc *stats,
+ int index,
+ struct kvm_stats_header *header)
+{
+ /*
+ * Note, size_desc includes the size of the name field, which is
+ * variable. i.e. this is NOT equivalent to &stats_desc[i].
+ */
+ return (void *)stats + index * get_stats_descriptor_size(header);
+}
+
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements);
+
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements);
+
+static inline uint64_t vm_get_stat(struct kvm_vm *vm, const char *stat_name)
+{
+ uint64_t data;
+
+ __vm_get_stat(vm, stat_name, &data, 1);
+ return data;
+}
+
+void vm_create_irqchip(struct kvm_vm *vm);
+
+static inline int __vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ struct kvm_create_guest_memfd guest_memfd = {
+ .size = size,
+ .flags = flags,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_GUEST_MEMFD, &guest_memfd);
+}
+
+static inline int vm_create_guest_memfd(struct kvm_vm *vm, uint64_t size,
+ uint64_t flags)
+{
+ int fd = __vm_create_guest_memfd(vm, size, flags);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_GUEST_MEMFD, fd));
+ return fd;
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva);
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset);
+
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags);
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd_fd, uint64_t guest_memfd_offset);
+
+#ifndef vm_arch_has_protected_memory
+static inline bool vm_arch_has_protected_memory(struct kvm_vm *vm)
+{
+ return false;
+}
+#endif
+
+void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
+void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa);
+void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot);
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vm_populate_vaddr_bitmap(struct kvm_vm *vm);
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min);
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages);
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm,
+ enum kvm_mem_region_type type);
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm);
+
+void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ unsigned int npages);
+void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa);
+void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva);
+vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva);
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa);
+
+
+static inline vm_paddr_t vm_untag_gpa(struct kvm_vm *vm, vm_paddr_t gpa)
+{
+ return gpa & ~vm->gpa_tag_mask;
+}
+
+void vcpu_run(struct kvm_vcpu *vcpu);
+int _vcpu_run(struct kvm_vcpu *vcpu);
+
+static inline int __vcpu_run(struct kvm_vcpu *vcpu)
+{
+ return __vcpu_ioctl(vcpu, KVM_RUN, NULL);
+}
+
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu);
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu);
+
+static inline void vcpu_enable_cap(struct kvm_vcpu *vcpu, uint32_t cap,
+ uint64_t arg0)
+{
+ struct kvm_enable_cap enable_cap = { .cap = cap, .args = { arg0 } };
+
+ vcpu_ioctl(vcpu, KVM_ENABLE_CAP, &enable_cap);
+}
+
+static inline void vcpu_guest_debug_set(struct kvm_vcpu *vcpu,
+ struct kvm_guest_debug *debug)
+{
+ vcpu_ioctl(vcpu, KVM_SET_GUEST_DEBUG, debug);
+}
+
+static inline void vcpu_mp_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_MP_STATE, mp_state);
+}
+static inline void vcpu_mp_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_mp_state *mp_state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_MP_STATE, mp_state);
+}
+
+static inline void vcpu_regs_get(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_REGS, regs);
+}
+
+static inline void vcpu_regs_set(struct kvm_vcpu *vcpu, struct kvm_regs *regs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_REGS, regs);
+}
+static inline void vcpu_sregs_get(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_SREGS, sregs);
+
+}
+static inline void vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline int _vcpu_sregs_set(struct kvm_vcpu *vcpu, struct kvm_sregs *sregs)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_SREGS, sregs);
+}
+static inline void vcpu_fpu_get(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_GET_FPU, fpu);
+}
+static inline void vcpu_fpu_set(struct kvm_vcpu *vcpu, struct kvm_fpu *fpu)
+{
+ vcpu_ioctl(vcpu, KVM_SET_FPU, fpu);
+}
+
+static inline int __vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ return __vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline int __vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ return __vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+static inline void vcpu_get_reg(struct kvm_vcpu *vcpu, uint64_t id, void *addr)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)addr };
+
+ vcpu_ioctl(vcpu, KVM_GET_ONE_REG, &reg);
+}
+static inline void vcpu_set_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t val)
+{
+ struct kvm_one_reg reg = { .id = id, .addr = (uint64_t)&val };
+
+ vcpu_ioctl(vcpu, KVM_SET_ONE_REG, &reg);
+}
+
+#ifdef __KVM_HAVE_VCPU_EVENTS
+static inline void vcpu_events_get(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_GET_VCPU_EVENTS, events);
+}
+static inline void vcpu_events_set(struct kvm_vcpu *vcpu,
+ struct kvm_vcpu_events *events)
+{
+ vcpu_ioctl(vcpu, KVM_SET_VCPU_EVENTS, events);
+}
+#endif
+#ifdef __x86_64__
+static inline void vcpu_nested_state_get(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_GET_NESTED_STATE, state);
+}
+static inline int __vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ return __vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+
+static inline void vcpu_nested_state_set(struct kvm_vcpu *vcpu,
+ struct kvm_nested_state *state)
+{
+ vcpu_ioctl(vcpu, KVM_SET_NESTED_STATE, state);
+}
+#endif
+static inline int vcpu_get_stats_fd(struct kvm_vcpu *vcpu)
+{
+ int fd = __vcpu_ioctl(vcpu, KVM_GET_STATS_FD, NULL);
+
+ TEST_ASSERT_VM_VCPU_IOCTL(fd >= 0, KVM_CHECK_EXTENSION, fd, vcpu->vm);
+ return fd;
+}
+
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr);
+
+static inline void kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ int ret = __kvm_has_device_attr(dev_fd, group, attr);
+
+ TEST_ASSERT(!ret, "KVM_HAS_DEVICE_ATTR failed, rc: %i errno: %i", ret, errno);
+}
+
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_get(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_get(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_GET_DEVICE_ATTR, ret));
+}
+
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val);
+
+static inline void kvm_device_attr_set(int dev_fd, uint32_t group,
+ uint64_t attr, void *val)
+{
+ int ret = __kvm_device_attr_set(dev_fd, group, attr, val);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_DEVICE_ATTR, ret));
+}
+
+static inline int __vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ return __kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline void vcpu_has_device_attr(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr)
+{
+ kvm_has_device_attr(vcpu->fd, group, attr);
+}
+
+static inline int __vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_get(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_get(vcpu->fd, group, attr, val);
+}
+
+static inline int __vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ return __kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+static inline void vcpu_device_attr_set(struct kvm_vcpu *vcpu, uint32_t group,
+ uint64_t attr, void *val)
+{
+ kvm_device_attr_set(vcpu->fd, group, attr, val);
+}
+
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type);
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type);
+
+static inline int kvm_create_device(struct kvm_vm *vm, uint64_t type)
+{
+ int fd = __kvm_create_device(vm, type);
+
+ TEST_ASSERT(fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_DEVICE, fd));
+ return fd;
+}
+
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu);
+
+/*
+ * VM VCPU Args Set
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * num - number of arguments
+ * ... - arguments, each of type uint64_t
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Sets the first @num input parameters for the function at @vcpu's entry point,
+ * per the C calling convention of the architecture, to the values given as
+ * variable args. Each of the variable args is expected to be of type uint64_t.
+ * The maximum @num can be is specific to the architecture.
+ */
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...);
+
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level);
+
+#define KVM_MAX_IRQ_ROUTES 4096
+
+struct kvm_irq_routing *kvm_gsi_routing_create(void);
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin);
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing);
+
+const char *exit_reason_str(unsigned int exit_reason);
+
+vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
+ uint32_t memslot);
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected);
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm);
+
+static inline vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot)
+{
+ /*
+ * By default, allocate memory as protected for VMs that support
+ * protected memory, as the majority of memory for such VMs is
+ * protected, i.e. using shared memory is effectively opt-in.
+ */
+ return __vm_phy_pages_alloc(vm, num, paddr_min, memslot,
+ vm_arch_has_protected_memory(vm));
+}
+
+/*
+ * ____vm_create() does KVM_CREATE_VM and little else. __vm_create() also
+ * loads the test binary into guest memory and creates an IRQ chip (x86 only).
+ * __vm_create() does NOT create vCPUs, @nr_runnable_vcpus is used purely to
+ * calculate the amount of memory needed for per-vCPU data, e.g. stacks.
+ */
+struct kvm_vm *____vm_create(struct vm_shape shape);
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages);
+
+static inline struct kvm_vm *vm_create_barebones(void)
+{
+ return ____vm_create(VM_SHAPE_DEFAULT);
+}
+
+#ifdef __x86_64__
+static inline struct kvm_vm *vm_create_barebones_protected_vm(void)
+{
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+
+ return ____vm_create(shape);
+}
+#endif
+
+static inline struct kvm_vm *vm_create(uint32_t nr_runnable_vcpus)
+{
+ return __vm_create(VM_SHAPE_DEFAULT, nr_runnable_vcpus, 0);
+}
+
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[]);
+
+static inline struct kvm_vm *vm_create_with_vcpus(uint32_t nr_vcpus,
+ void *guest_code,
+ struct kvm_vcpu *vcpus[])
+{
+ return __vm_create_with_vcpus(VM_SHAPE_DEFAULT, nr_vcpus, 0,
+ guest_code, vcpus);
+}
+
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code);
+
+/*
+ * Create a VM with a single vCPU with reasonable defaults and @extra_mem_pages
+ * additional pages of guest memory. Returns the VM and vCPU (via out param).
+ */
+static inline struct kvm_vm *__vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(VM_SHAPE_DEFAULT, vcpu,
+ extra_mem_pages, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_with_one_vcpu(vcpu, 0, guest_code);
+}
+
+static inline struct kvm_vm *vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ void *guest_code)
+{
+ return __vm_create_shape_with_one_vcpu(shape, vcpu, 0, guest_code);
+}
+
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm);
+
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu);
+void kvm_print_vcpu_pinning_help(void);
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus);
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm);
+unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size);
+unsigned int vm_num_host_pages(enum vm_guest_mode mode, unsigned int num_guest_pages);
+unsigned int vm_num_guest_pages(enum vm_guest_mode mode, unsigned int num_host_pages);
+static inline unsigned int
+vm_adjust_num_guest_pages(enum vm_guest_mode mode, unsigned int num_guest_pages)
+{
+ unsigned int n;
+ n = vm_num_guest_pages(mode, vm_num_host_pages(mode, num_guest_pages));
+#ifdef __s390x__
+ /* s390 requires 1M aligned guest sizes */
+ n = (n + 255) & ~255;
+#endif
+ return n;
+}
+
+#define sync_global_to_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(_p, &(g), sizeof(g)); \
+})
+
+#define sync_global_from_guest(vm, g) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ memcpy(&(g), _p, sizeof(g)); \
+})
+
+/*
+ * Write a global value, but only in the VM's (guest's) domain. Primarily used
+ * for "globals" that hold per-VM values (VMs always duplicate code and global
+ * data into their own region of physical memory), but can be used anytime it's
+ * undesirable to change the host's copy of the global.
+ */
+#define write_guest_global(vm, g, val) ({ \
+ typeof(g) *_p = addr_gva2hva(vm, (vm_vaddr_t)&(g)); \
+ typeof(g) _val = val; \
+ \
+ memcpy(_p, &(_val), sizeof(g)); \
+})
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu);
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent);
+
+static inline void vcpu_dump(FILE *stream, struct kvm_vcpu *vcpu,
+ uint8_t indent)
+{
+ vcpu_arch_dump(stream, vcpu, indent);
+}
+
+/*
+ * Adds a vCPU with reasonable defaults (e.g. a stack)
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vcpu_id - The id of the VCPU to add to the VM.
+ */
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id);
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code);
+
+static inline struct kvm_vcpu *vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_arch_vcpu_add(vm, vcpu_id);
+
+ vcpu_arch_set_entry_point(vcpu, guest_code);
+
+ return vcpu;
+}
+
+/* Re-create a vCPU after restarting a VM, e.g. for state save/restore tests. */
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id);
+
+static inline struct kvm_vcpu *vm_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
+{
+ return vm_arch_vcpu_recreate(vm, vcpu_id);
+}
+
+void vcpu_arch_free(struct kvm_vcpu *vcpu);
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm);
+
+static inline void virt_pgd_alloc(struct kvm_vm *vm)
+{
+ virt_arch_pgd_alloc(vm);
+}
+
+/*
+ * VM Virtual Page Map
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * vaddr - VM Virtual Address
+ * paddr - VM Physical Address
+ * memslot - Memory region slot for new virtual translation tables
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Within @vm, creates a virtual translation for the page starting
+ * at @vaddr to the page starting at @paddr.
+ */
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr);
+
+static inline void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ virt_arch_pg_map(vm, vaddr, paddr);
+}
+
+
+/*
+ * Address Guest Virtual to Guest Physical
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ * gva - VM virtual address
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Equivalent VM physical address
+ *
+ * Returns the VM physical address of the translated VM virtual
+ * address given by @gva.
+ */
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva);
+
+static inline vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ return addr_arch_gva2gpa(vm, gva);
+}
+
+/*
+ * Virtual Translation Tables Dump
+ *
+ * Input Args:
+ * stream - Output FILE stream
+ * vm - Virtual Machine
+ * indent - Left margin indent amount
+ *
+ * Output Args: None
+ *
+ * Return: None
+ *
+ * Dumps to the FILE stream given by @stream, the contents of all the
+ * virtual translation tables for the VM given by @vm.
+ */
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
+
+static inline void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ virt_arch_dump(stream, vm, indent);
+}
+
+
+static inline int __vm_disable_nx_huge_pages(struct kvm_vm *vm)
+{
+ return __vm_enable_cap(vm, KVM_CAP_VM_DISABLE_NX_HUGE_PAGES, 0);
+}
+
+/*
+ * Arch hook that is invoked via a constructor, i.e. before exeucting main(),
+ * to allow for arch-specific setup that is common to all tests, e.g. computing
+ * the default guest "mode".
+ */
+void kvm_selftest_arch_init(void);
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm);
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr);
+
+uint32_t guest_get_vcpuid(void);
+
+#endif /* SELFTEST_KVM_UTIL_BASE_H */
diff --git a/tools/testing/selftests/kvm/include/memstress.h b/tools/testing/selftests/kvm/include/memstress.h
new file mode 100644
index 000000000000..ce4e603050ea
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/memstress.h
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * tools/testing/selftests/kvm/include/memstress.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MEMSTRESS_H
+#define SELFTEST_KVM_MEMSTRESS_H
+
+#include <pthread.h>
+
+#include "kvm_util.h"
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+#define DEFAULT_PER_VCPU_MEM_SIZE (1 << 30) /* 1G */
+
+#define MEMSTRESS_MEM_SLOT_INDEX 1
+
+struct memstress_vcpu_args {
+ uint64_t gpa;
+ uint64_t gva;
+ uint64_t pages;
+
+ /* Only used by the host userspace part of the vCPU thread */
+ struct kvm_vcpu *vcpu;
+ int vcpu_idx;
+};
+
+struct memstress_args {
+ struct kvm_vm *vm;
+ /* The starting address and size of the guest test region. */
+ uint64_t gpa;
+ uint64_t size;
+ uint64_t guest_page_size;
+ uint32_t random_seed;
+ uint32_t write_percent;
+
+ /* Run vCPUs in L2 instead of L1, if the architecture supports it. */
+ bool nested;
+ /* Randomize which pages are accessed by the guest. */
+ bool random_access;
+ /* True if all vCPUs are pinned to pCPUs */
+ bool pin_vcpus;
+ /* The vCPU=>pCPU pinning map. Only valid if pin_vcpus is true. */
+ uint32_t vcpu_to_pcpu[KVM_MAX_VCPUS];
+
+ /* Test is done, stop running vCPUs. */
+ bool stop_vcpus;
+
+ struct memstress_vcpu_args vcpu_args[KVM_MAX_VCPUS];
+};
+
+extern struct memstress_args memstress_args;
+
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+ uint64_t vcpu_memory_bytes, int slots,
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access);
+void memstress_destroy_vm(struct kvm_vm *vm);
+
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent);
+void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed);
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access);
+
+void memstress_start_vcpu_threads(int vcpus, void (*vcpu_fn)(struct memstress_vcpu_args *));
+void memstress_join_vcpu_threads(int vcpus);
+void memstress_guest_code(uint32_t vcpu_id);
+
+uint64_t memstress_nested_pages(int nr_vcpus);
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[]);
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots);
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots);
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot);
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot);
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots);
+
+#endif /* SELFTEST_KVM_MEMSTRESS_H */
diff --git a/tools/testing/selftests/kvm/include/numaif.h b/tools/testing/selftests/kvm/include/numaif.h
new file mode 100644
index 000000000000..b020547403fd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/numaif.h
@@ -0,0 +1,55 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/numaif.h
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Header file that provides access to NUMA API functions not explicitly
+ * exported to user space.
+ */
+
+#ifndef SELFTEST_KVM_NUMAIF_H
+#define SELFTEST_KVM_NUMAIF_H
+
+#define __NR_get_mempolicy 239
+#define __NR_migrate_pages 256
+
+/* System calls */
+long get_mempolicy(int *policy, const unsigned long *nmask,
+ unsigned long maxnode, void *addr, int flags)
+{
+ return syscall(__NR_get_mempolicy, policy, nmask,
+ maxnode, addr, flags);
+}
+
+long migrate_pages(int pid, unsigned long maxnode,
+ const unsigned long *frommask,
+ const unsigned long *tomask)
+{
+ return syscall(__NR_migrate_pages, pid, maxnode, frommask, tomask);
+}
+
+/* Policies */
+#define MPOL_DEFAULT 0
+#define MPOL_PREFERRED 1
+#define MPOL_BIND 2
+#define MPOL_INTERLEAVE 3
+
+#define MPOL_MAX MPOL_INTERLEAVE
+
+/* Flags for get_mem_policy */
+#define MPOL_F_NODE (1<<0) /* return next il node or node of address */
+ /* Warning: MPOL_F_NODE is unsupported and
+ * subject to change. Don't use.
+ */
+#define MPOL_F_ADDR (1<<1) /* look up vma using address */
+#define MPOL_F_MEMS_ALLOWED (1<<2) /* query nodes allowed in cpuset */
+
+/* Flags for mbind */
+#define MPOL_MF_STRICT (1<<0) /* Verify existing pages in the mapping */
+#define MPOL_MF_MOVE (1<<1) /* Move pages owned by this process to conform to mapping */
+#define MPOL_MF_MOVE_ALL (1<<2) /* Move every page to conform to mapping */
+
+#endif /* SELFTEST_KVM_NUMAIF_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/arch_timer.h b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
new file mode 100644
index 000000000000..225d81dad064
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/arch_timer.h
@@ -0,0 +1,71 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V Arch Timer(sstc) specific interface
+ *
+ * Copyright (c) 2024 Intel Corporation
+ */
+
+#ifndef SELFTEST_KVM_ARCH_TIMER_H
+#define SELFTEST_KVM_ARCH_TIMER_H
+
+#include <asm/csr.h>
+#include <asm/vdso/processor.h>
+
+static unsigned long timer_freq;
+
+#define msec_to_cycles(msec) \
+ ((timer_freq) * (uint64_t)(msec) / 1000)
+
+#define usec_to_cycles(usec) \
+ ((timer_freq) * (uint64_t)(usec) / 1000000)
+
+#define cycles_to_usec(cycles) \
+ ((uint64_t)(cycles) * 1000000 / (timer_freq))
+
+static inline uint64_t timer_get_cycles(void)
+{
+ return csr_read(CSR_TIME);
+}
+
+static inline void timer_set_cmp(uint64_t cval)
+{
+ csr_write(CSR_STIMECMP, cval);
+}
+
+static inline uint64_t timer_get_cmp(void)
+{
+ return csr_read(CSR_STIMECMP);
+}
+
+static inline void timer_irq_enable(void)
+{
+ csr_set(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_irq_disable(void)
+{
+ csr_clear(CSR_SIE, IE_TIE);
+}
+
+static inline void timer_set_next_cmp_ms(uint32_t msec)
+{
+ uint64_t now_ct = timer_get_cycles();
+ uint64_t next_ct = now_ct + msec_to_cycles(msec);
+
+ timer_set_cmp(next_ct);
+}
+
+static inline void __delay(uint64_t cycles)
+{
+ uint64_t start = timer_get_cycles();
+
+ while ((timer_get_cycles() - start) < cycles)
+ cpu_relax();
+}
+
+static inline void udelay(unsigned long usec)
+{
+ __delay(usec_to_cycles(usec));
+}
+
+#endif /* SELFTEST_KVM_ARCH_TIMER_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/riscv/processor.h b/tools/testing/selftests/kvm/include/riscv/processor.h
new file mode 100644
index 000000000000..ce473fe251dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/processor.h
@@ -0,0 +1,206 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * RISC-V processor specific defines
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+#ifndef SELFTEST_KVM_PROCESSOR_H
+#define SELFTEST_KVM_PROCESSOR_H
+
+#include <linux/stringify.h>
+#include <asm/csr.h>
+#include "kvm_util.h"
+
+static inline uint64_t __kvm_reg_id(uint64_t type, uint64_t subtype,
+ uint64_t idx, uint64_t size)
+{
+ return KVM_REG_RISCV | type | subtype | idx | size;
+}
+
+#if __riscv_xlen == 64
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U64
+#else
+#define KVM_REG_SIZE_ULONG KVM_REG_SIZE_U32
+#endif
+
+#define RISCV_CONFIG_REG(name) __kvm_reg_id(KVM_REG_RISCV_CONFIG, 0, \
+ KVM_REG_RISCV_CONFIG_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_CORE_REG(name) __kvm_reg_id(KVM_REG_RISCV_CORE, 0, \
+ KVM_REG_RISCV_CORE_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_GENERAL_CSR_REG(name) __kvm_reg_id(KVM_REG_RISCV_CSR, \
+ KVM_REG_RISCV_CSR_GENERAL, \
+ KVM_REG_RISCV_CSR_REG(name), \
+ KVM_REG_SIZE_ULONG)
+
+#define RISCV_TIMER_REG(name) __kvm_reg_id(KVM_REG_RISCV_TIMER, 0, \
+ KVM_REG_RISCV_TIMER_REG(name), \
+ KVM_REG_SIZE_U64)
+
+#define RISCV_ISA_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_ISA_EXT, \
+ KVM_REG_RISCV_ISA_SINGLE, \
+ idx, KVM_REG_SIZE_ULONG)
+
+#define RISCV_SBI_EXT_REG(idx) __kvm_reg_id(KVM_REG_RISCV_SBI_EXT, \
+ KVM_REG_RISCV_SBI_SINGLE, \
+ idx, KVM_REG_SIZE_ULONG)
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext);
+
+struct ex_regs {
+ unsigned long ra;
+ unsigned long sp;
+ unsigned long gp;
+ unsigned long tp;
+ unsigned long t0;
+ unsigned long t1;
+ unsigned long t2;
+ unsigned long s0;
+ unsigned long s1;
+ unsigned long a0;
+ unsigned long a1;
+ unsigned long a2;
+ unsigned long a3;
+ unsigned long a4;
+ unsigned long a5;
+ unsigned long a6;
+ unsigned long a7;
+ unsigned long s2;
+ unsigned long s3;
+ unsigned long s4;
+ unsigned long s5;
+ unsigned long s6;
+ unsigned long s7;
+ unsigned long s8;
+ unsigned long s9;
+ unsigned long s10;
+ unsigned long s11;
+ unsigned long t3;
+ unsigned long t4;
+ unsigned long t5;
+ unsigned long t6;
+ unsigned long epc;
+ unsigned long status;
+ unsigned long cause;
+};
+
+#define NR_VECTORS 2
+#define NR_EXCEPTIONS 32
+#define EC_MASK (NR_EXCEPTIONS - 1)
+
+typedef void(*exception_handler_fn)(struct ex_regs *);
+
+void vm_init_vector_tables(struct kvm_vm *vm);
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu);
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler);
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler);
+
+/* L3 index Bit[47:39] */
+#define PGTBL_L3_INDEX_MASK 0x0000FF8000000000ULL
+#define PGTBL_L3_INDEX_SHIFT 39
+#define PGTBL_L3_BLOCK_SHIFT 39
+#define PGTBL_L3_BLOCK_SIZE 0x0000008000000000ULL
+#define PGTBL_L3_MAP_MASK (~(PGTBL_L3_BLOCK_SIZE - 1))
+/* L2 index Bit[38:30] */
+#define PGTBL_L2_INDEX_MASK 0x0000007FC0000000ULL
+#define PGTBL_L2_INDEX_SHIFT 30
+#define PGTBL_L2_BLOCK_SHIFT 30
+#define PGTBL_L2_BLOCK_SIZE 0x0000000040000000ULL
+#define PGTBL_L2_MAP_MASK (~(PGTBL_L2_BLOCK_SIZE - 1))
+/* L1 index Bit[29:21] */
+#define PGTBL_L1_INDEX_MASK 0x000000003FE00000ULL
+#define PGTBL_L1_INDEX_SHIFT 21
+#define PGTBL_L1_BLOCK_SHIFT 21
+#define PGTBL_L1_BLOCK_SIZE 0x0000000000200000ULL
+#define PGTBL_L1_MAP_MASK (~(PGTBL_L1_BLOCK_SIZE - 1))
+/* L0 index Bit[20:12] */
+#define PGTBL_L0_INDEX_MASK 0x00000000001FF000ULL
+#define PGTBL_L0_INDEX_SHIFT 12
+#define PGTBL_L0_BLOCK_SHIFT 12
+#define PGTBL_L0_BLOCK_SIZE 0x0000000000001000ULL
+#define PGTBL_L0_MAP_MASK (~(PGTBL_L0_BLOCK_SIZE - 1))
+
+#define PGTBL_PTE_ADDR_MASK 0x003FFFFFFFFFFC00ULL
+#define PGTBL_PTE_ADDR_SHIFT 10
+#define PGTBL_PTE_RSW_MASK 0x0000000000000300ULL
+#define PGTBL_PTE_RSW_SHIFT 8
+#define PGTBL_PTE_DIRTY_MASK 0x0000000000000080ULL
+#define PGTBL_PTE_DIRTY_SHIFT 7
+#define PGTBL_PTE_ACCESSED_MASK 0x0000000000000040ULL
+#define PGTBL_PTE_ACCESSED_SHIFT 6
+#define PGTBL_PTE_GLOBAL_MASK 0x0000000000000020ULL
+#define PGTBL_PTE_GLOBAL_SHIFT 5
+#define PGTBL_PTE_USER_MASK 0x0000000000000010ULL
+#define PGTBL_PTE_USER_SHIFT 4
+#define PGTBL_PTE_EXECUTE_MASK 0x0000000000000008ULL
+#define PGTBL_PTE_EXECUTE_SHIFT 3
+#define PGTBL_PTE_WRITE_MASK 0x0000000000000004ULL
+#define PGTBL_PTE_WRITE_SHIFT 2
+#define PGTBL_PTE_READ_MASK 0x0000000000000002ULL
+#define PGTBL_PTE_READ_SHIFT 1
+#define PGTBL_PTE_PERM_MASK (PGTBL_PTE_ACCESSED_MASK | \
+ PGTBL_PTE_DIRTY_MASK | \
+ PGTBL_PTE_EXECUTE_MASK | \
+ PGTBL_PTE_WRITE_MASK | \
+ PGTBL_PTE_READ_MASK)
+#define PGTBL_PTE_VALID_MASK 0x0000000000000001ULL
+#define PGTBL_PTE_VALID_SHIFT 0
+
+#define PGTBL_PAGE_SIZE PGTBL_L0_BLOCK_SIZE
+#define PGTBL_PAGE_SIZE_SHIFT PGTBL_L0_BLOCK_SHIFT
+
+/* SBI return error codes */
+#define SBI_SUCCESS 0
+#define SBI_ERR_FAILURE -1
+#define SBI_ERR_NOT_SUPPORTED -2
+#define SBI_ERR_INVALID_PARAM -3
+#define SBI_ERR_DENIED -4
+#define SBI_ERR_INVALID_ADDRESS -5
+#define SBI_ERR_ALREADY_AVAILABLE -6
+#define SBI_ERR_ALREADY_STARTED -7
+#define SBI_ERR_ALREADY_STOPPED -8
+
+#define SBI_EXT_EXPERIMENTAL_START 0x08000000
+#define SBI_EXT_EXPERIMENTAL_END 0x08FFFFFF
+
+#define KVM_RISCV_SELFTESTS_SBI_EXT SBI_EXT_EXPERIMENTAL_END
+#define KVM_RISCV_SELFTESTS_SBI_UCALL 0
+#define KVM_RISCV_SELFTESTS_SBI_UNEXP 1
+
+enum sbi_ext_id {
+ SBI_EXT_BASE = 0x10,
+ SBI_EXT_STA = 0x535441,
+};
+
+enum sbi_ext_base_fid {
+ SBI_EXT_BASE_PROBE_EXT = 3,
+};
+
+struct sbiret {
+ long error;
+ long value;
+};
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5);
+
+bool guest_sbi_probe_extension(int extid, long *out_val);
+
+static inline void local_irq_enable(void)
+{
+ csr_set(CSR_SSTATUS, SR_SIE);
+}
+
+static inline void local_irq_disable(void)
+{
+ csr_clear(CSR_SSTATUS, SR_SIE);
+}
+
+#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/riscv/ucall.h b/tools/testing/selftests/kvm/include/riscv/ucall.h
new file mode 100644
index 000000000000..be46eb32ec27
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/riscv/ucall.h
@@ -0,0 +1,20 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "processor.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_RISCV_SBI
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UCALL,
+ uc, 0, 0, 0, 0, 0);
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
new file mode 100644
index 000000000000..b0ed71302722
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/diag318_test_handler.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#ifndef SELFTEST_KVM_DIAG318_TEST_HANDLER
+#define SELFTEST_KVM_DIAG318_TEST_HANDLER
+
+uint64_t get_diag318_info(void);
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
new file mode 100644
index 000000000000..e43a57d99b56
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/kvm_util_arch.h
@@ -0,0 +1,7 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+struct kvm_vm_arch {};
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/s390x/processor.h b/tools/testing/selftests/kvm/include/s390x/processor.h
index e0e96a5f608c..255c9b990f4c 100644
--- a/tools/testing/selftests/kvm/include/s390x/processor.h
+++ b/tools/testing/selftests/kvm/include/s390x/processor.h
@@ -5,6 +5,8 @@
#ifndef SELFTEST_KVM_PROCESSOR_H
#define SELFTEST_KVM_PROCESSOR_H
+#include <linux/compiler.h>
+
/* Bits in the region/segment table entry */
#define REGION_ENTRY_ORIGIN ~0xfffUL /* region/segment table origin */
#define REGION_ENTRY_PROTECT 0x200 /* region protection bit */
@@ -19,4 +21,10 @@
#define PAGE_PROTECT 0x200 /* HW read-only bit */
#define PAGE_NOEXEC 0x100 /* HW no-execute bit */
+/* Is there a portable way to do this? */
+static inline void cpu_relax(void)
+{
+ barrier();
+}
+
#endif
diff --git a/tools/testing/selftests/kvm/include/s390x/ucall.h b/tools/testing/selftests/kvm/include/s390x/ucall.h
new file mode 100644
index 000000000000..b231bf2e49d6
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/s390x/ucall.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_S390_SIEIC
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+static inline void ucall_arch_do_ucall(vm_vaddr_t uc)
+{
+ /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
+ asm volatile ("diag 0,%0,0x501" : : "a"(uc) : "memory");
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/sparsebit.h b/tools/testing/selftests/kvm/include/sparsebit.h
index 12a9a4b9cead..bc760761e1a3 100644
--- a/tools/testing/selftests/kvm/include/sparsebit.h
+++ b/tools/testing/selftests/kvm/include/sparsebit.h
@@ -30,26 +30,26 @@ typedef uint64_t sparsebit_num_t;
struct sparsebit *sparsebit_alloc(void);
void sparsebit_free(struct sparsebit **sbitp);
-void sparsebit_copy(struct sparsebit *dstp, struct sparsebit *src);
+void sparsebit_copy(struct sparsebit *dstp, const struct sparsebit *src);
-bool sparsebit_is_set(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_set_num(struct sparsebit *sbit,
+bool sparsebit_is_set(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_set_num(const struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
-bool sparsebit_is_clear(struct sparsebit *sbit, sparsebit_idx_t idx);
-bool sparsebit_is_clear_num(struct sparsebit *sbit,
+bool sparsebit_is_clear(const struct sparsebit *sbit, sparsebit_idx_t idx);
+bool sparsebit_is_clear_num(const struct sparsebit *sbit,
sparsebit_idx_t idx, sparsebit_num_t num);
-sparsebit_num_t sparsebit_num_set(struct sparsebit *sbit);
-bool sparsebit_any_set(struct sparsebit *sbit);
-bool sparsebit_any_clear(struct sparsebit *sbit);
-bool sparsebit_all_set(struct sparsebit *sbit);
-bool sparsebit_all_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *sbit);
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *sbit, sparsebit_idx_t prev);
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *sbit,
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *sbit);
+bool sparsebit_any_set(const struct sparsebit *sbit);
+bool sparsebit_any_clear(const struct sparsebit *sbit);
+bool sparsebit_all_set(const struct sparsebit *sbit);
+bool sparsebit_all_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *sbit);
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *sbit, sparsebit_idx_t prev);
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *sbit,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *sbit,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_set(struct sparsebit *sbitp, sparsebit_idx_t idx);
@@ -62,9 +62,29 @@ void sparsebit_clear_num(struct sparsebit *sbitp,
sparsebit_idx_t start, sparsebit_num_t num);
void sparsebit_clear_all(struct sparsebit *sbitp);
-void sparsebit_dump(FILE *stream, struct sparsebit *sbit,
+void sparsebit_dump(FILE *stream, const struct sparsebit *sbit,
unsigned int indent);
-void sparsebit_validate_internal(struct sparsebit *sbit);
+void sparsebit_validate_internal(const struct sparsebit *sbit);
+
+/*
+ * Iterate over an inclusive ranges within sparsebit @s. In each iteration,
+ * @range_begin and @range_end will take the beginning and end of the set
+ * range, which are of type sparsebit_idx_t.
+ *
+ * For example, if the range [3, 7] (inclusive) is set, within the
+ * iteration,@range_begin will take the value 3 and @range_end will take
+ * the value 7.
+ *
+ * Ensure that there is at least one bit set before using this macro with
+ * sparsebit_any_set(), because sparsebit_first_set() will abort if none
+ * are set.
+ */
+#define sparsebit_for_each_set_range(s, range_begin, range_end) \
+ for (range_begin = sparsebit_first_set(s), \
+ range_end = sparsebit_next_clear(s, range_begin) - 1; \
+ range_begin && range_end; \
+ range_begin = sparsebit_next_set(s, range_end), \
+ range_end = sparsebit_next_clear(s, range_begin) - 1)
#ifdef __cplusplus
}
diff --git a/tools/testing/selftests/kvm/include/test_util.h b/tools/testing/selftests/kvm/include/test_util.h
index 5eb01bf51b86..8a6e30612c86 100644
--- a/tools/testing/selftests/kvm/include/test_util.h
+++ b/tools/testing/selftests/kvm/include/test_util.h
@@ -17,8 +17,11 @@
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
+#include <sys/mman.h>
#include "kselftest.h"
+#define msecs_to_usecs(msec) ((msec) * 1000ULL)
+
static inline int _no_printf(const char *format, ...) { return 0; }
#ifdef DEBUG
@@ -32,31 +35,48 @@ static inline int _no_printf(const char *format, ...) { return 0; }
#define pr_info(...) _no_printf(__VA_ARGS__)
#endif
-void print_skip(const char *fmt, ...) __attribute__((format(printf, 1, 2)));
+void __printf(1, 2) print_skip(const char *fmt, ...);
+#define __TEST_REQUIRE(f, fmt, ...) \
+do { \
+ if (!(f)) \
+ ksft_exit_skip("- " fmt "\n", ##__VA_ARGS__); \
+} while (0)
+
+#define TEST_REQUIRE(f) __TEST_REQUIRE(f, "Requirement not met: %s", #f)
ssize_t test_write(int fd, const void *buf, size_t count);
ssize_t test_read(int fd, void *buf, size_t count);
int test_seq_read(const char *path, char **bufp, size_t *sizep);
-void test_assert(bool exp, const char *exp_str,
- const char *file, unsigned int line, const char *fmt, ...)
- __attribute__((format(printf, 5, 6)));
+void __printf(5, 6) test_assert(bool exp, const char *exp_str,
+ const char *file, unsigned int line,
+ const char *fmt, ...);
#define TEST_ASSERT(e, fmt, ...) \
test_assert((e), #e, __FILE__, __LINE__, fmt, ##__VA_ARGS__)
-#define ASSERT_EQ(a, b) do { \
- typeof(a) __a = (a); \
- typeof(b) __b = (b); \
- TEST_ASSERT(__a == __b, \
- "ASSERT_EQ(%s, %s) failed.\n" \
- "\t%s is %#lx\n" \
- "\t%s is %#lx", \
- #a, #b, #a, (unsigned long) __a, #b, (unsigned long) __b); \
+#define TEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ test_assert(__a == __b, #a " == " #b, __FILE__, __LINE__, \
+ "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b);\
+} while (0)
+
+#define TEST_ASSERT_KVM_EXIT_REASON(vcpu, expected) do { \
+ __u32 exit_reason = (vcpu)->run->exit_reason; \
+ \
+ TEST_ASSERT(exit_reason == (expected), \
+ "Wanted KVM exit reason: %u (%s), got: %u (%s)", \
+ (expected), exit_reason_str((expected)), \
+ exit_reason, exit_reason_str(exit_reason)); \
} while (0)
-#define TEST_FAIL(fmt, ...) \
- TEST_ASSERT(false, fmt, ##__VA_ARGS__)
+#define TEST_FAIL(fmt, ...) do { \
+ TEST_ASSERT(false, fmt, ##__VA_ARGS__); \
+ __builtin_unreachable(); \
+} while (0)
size_t parse_size(const char *size);
@@ -64,5 +84,119 @@ int64_t timespec_to_ns(struct timespec ts);
struct timespec timespec_add_ns(struct timespec ts, int64_t ns);
struct timespec timespec_add(struct timespec ts1, struct timespec ts2);
struct timespec timespec_sub(struct timespec ts1, struct timespec ts2);
+struct timespec timespec_elapsed(struct timespec start);
+struct timespec timespec_div(struct timespec ts, int divisor);
+
+struct guest_random_state {
+ uint32_t seed;
+};
+
+struct guest_random_state new_guest_random_state(uint32_t seed);
+uint32_t guest_random_u32(struct guest_random_state *state);
+
+enum vm_mem_backing_src_type {
+ VM_MEM_SRC_ANONYMOUS,
+ VM_MEM_SRC_ANONYMOUS_THP,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB,
+ VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB,
+ VM_MEM_SRC_SHMEM,
+ VM_MEM_SRC_SHARED_HUGETLB,
+ NUM_SRC_TYPES,
+};
+
+#define DEFAULT_VM_MEM_SRC VM_MEM_SRC_ANONYMOUS
+
+struct vm_mem_backing_src_alias {
+ const char *name;
+ uint32_t flag;
+};
+
+#define MIN_RUN_DELAY_NS 200000UL
+
+bool thp_configured(void);
+size_t get_trans_hugepagesz(void);
+size_t get_def_hugetlb_pagesz(void);
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i);
+size_t get_backing_src_pagesz(uint32_t i);
+bool is_backing_src_hugetlb(uint32_t i);
+void backing_src_help(const char *flag);
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name);
+long get_run_delay(void);
+
+/*
+ * Whether or not the given source type is shared memory (as opposed to
+ * anonymous).
+ */
+static inline bool backing_src_is_shared(enum vm_mem_backing_src_type t)
+{
+ return vm_mem_backing_src_alias(t)->flag & MAP_SHARED;
+}
+
+static inline bool backing_src_can_be_huge(enum vm_mem_backing_src_type t)
+{
+ return t != VM_MEM_SRC_ANONYMOUS && t != VM_MEM_SRC_SHMEM;
+}
+
+/* Aligns x up to the next multiple of size. Size must be a power of 2. */
+static inline uint64_t align_up(uint64_t x, uint64_t size)
+{
+ uint64_t mask = size - 1;
+
+ TEST_ASSERT(size != 0 && !(size & (size - 1)),
+ "size not a power of 2: %lu", size);
+ return ((x + mask) & ~mask);
+}
+
+static inline uint64_t align_down(uint64_t x, uint64_t size)
+{
+ uint64_t x_aligned_up = align_up(x, size);
+
+ if (x == x_aligned_up)
+ return x;
+ else
+ return x_aligned_up - size;
+}
+
+static inline void *align_ptr_up(void *x, size_t size)
+{
+ return (void *)align_up((unsigned long)x, size);
+}
+
+int atoi_paranoid(const char *num_str);
+
+static inline uint32_t atoi_positive(const char *name, const char *num_str)
+{
+ int num = atoi_paranoid(num_str);
+
+ TEST_ASSERT(num > 0, "%s must be greater than 0, got '%s'", name, num_str);
+ return num;
+}
+
+static inline uint32_t atoi_non_negative(const char *name, const char *num_str)
+{
+ int num = atoi_paranoid(num_str);
+
+ TEST_ASSERT(num >= 0, "%s must be non-negative, got '%s'", name, num_str);
+ return num;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args);
+__printf(3, 4) int guest_snprintf(char *buf, int n, const char *fmt, ...);
+
+char *strdup_printf(const char *fmt, ...) __attribute__((format(printf, 1, 2), nonnull(1)));
+
+char *sys_get_cur_clocksource(void);
#endif /* SELFTEST_KVM_TEST_UTIL_H */
diff --git a/tools/testing/selftests/kvm/include/timer_test.h b/tools/testing/selftests/kvm/include/timer_test.h
new file mode 100644
index 000000000000..9b6edaafe6d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/timer_test.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * timer test specific header
+ *
+ * Copyright (C) 2018, Google LLC
+ */
+
+#ifndef SELFTEST_KVM_TIMER_TEST_H
+#define SELFTEST_KVM_TIMER_TEST_H
+
+#include "kvm_util.h"
+
+#define NR_VCPUS_DEF 4
+#define NR_TEST_ITERS_DEF 5
+#define TIMER_TEST_PERIOD_MS_DEF 10
+#define TIMER_TEST_ERR_MARGIN_US 100
+#define TIMER_TEST_MIGRATION_FREQ_MS 2
+
+/* Timer test cmdline parameters */
+struct test_args {
+ uint32_t nr_vcpus;
+ uint32_t nr_iter;
+ uint32_t timer_period_ms;
+ uint32_t migration_freq_ms;
+ uint32_t timer_err_margin_us;
+ /* Members of struct kvm_arm_counter_offset */
+ uint64_t counter_offset;
+ uint64_t reserved;
+};
+
+/* Shared variables between host and guest */
+struct test_vcpu_shared_data {
+ uint32_t nr_iter;
+ int guest_stage;
+ uint64_t xcnt;
+};
+
+extern struct test_args test_args;
+extern struct kvm_vcpu *vcpus[];
+extern struct test_vcpu_shared_data vcpu_shared_data[];
+
+struct kvm_vm *test_vm_create(void);
+void test_vm_cleanup(struct kvm_vm *vm);
+
+#endif /* SELFTEST_KVM_TIMER_TEST_H */
diff --git a/tools/testing/selftests/kvm/include/ucall_common.h b/tools/testing/selftests/kvm/include/ucall_common.h
new file mode 100644
index 000000000000..d9d6581b8d4f
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/ucall_common.h
@@ -0,0 +1,116 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2018, Google LLC.
+ */
+#ifndef SELFTEST_KVM_UCALL_COMMON_H
+#define SELFTEST_KVM_UCALL_COMMON_H
+#include "test_util.h"
+#include "ucall.h"
+
+/* Common ucalls */
+enum {
+ UCALL_NONE,
+ UCALL_SYNC,
+ UCALL_ABORT,
+ UCALL_PRINTF,
+ UCALL_DONE,
+ UCALL_UNHANDLED,
+};
+
+#define UCALL_MAX_ARGS 7
+#define UCALL_BUFFER_LEN 1024
+
+struct ucall {
+ uint64_t cmd;
+ uint64_t args[UCALL_MAX_ARGS];
+ char buffer[UCALL_BUFFER_LEN];
+
+ /* Host virtual address of this struct. */
+ struct ucall *hva;
+};
+
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+void ucall_arch_do_ucall(vm_vaddr_t uc);
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu);
+
+void ucall(uint64_t cmd, int nargs, ...);
+__printf(2, 3) void ucall_fmt(uint64_t cmd, const char *fmt, ...);
+__printf(5, 6) void ucall_assert(uint64_t cmd, const char *exp,
+ const char *file, unsigned int line,
+ const char *fmt, ...);
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc);
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa);
+int ucall_nr_pages_required(uint64_t page_size);
+
+/*
+ * Perform userspace call without any associated data. This bare call avoids
+ * allocating a ucall struct, which can be useful if the atomic operations in
+ * the full ucall() are problematic and/or unwanted. Note, this will come out
+ * as UCALL_NONE on the backend.
+ */
+#define GUEST_UCALL_NONE() ucall_arch_do_ucall((vm_vaddr_t)NULL)
+
+#define GUEST_SYNC_ARGS(stage, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 6, "hello", stage, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC(stage) ucall(UCALL_SYNC, 2, "hello", stage)
+#define GUEST_SYNC1(arg0) ucall(UCALL_SYNC, 1, arg0)
+#define GUEST_SYNC2(arg0, arg1) ucall(UCALL_SYNC, 2, arg0, arg1)
+#define GUEST_SYNC3(arg0, arg1, arg2) \
+ ucall(UCALL_SYNC, 3, arg0, arg1, arg2)
+#define GUEST_SYNC4(arg0, arg1, arg2, arg3) \
+ ucall(UCALL_SYNC, 4, arg0, arg1, arg2, arg3)
+#define GUEST_SYNC5(arg0, arg1, arg2, arg3, arg4) \
+ ucall(UCALL_SYNC, 5, arg0, arg1, arg2, arg3, arg4)
+#define GUEST_SYNC6(arg0, arg1, arg2, arg3, arg4, arg5) \
+ ucall(UCALL_SYNC, 6, arg0, arg1, arg2, arg3, arg4, arg5)
+
+#define GUEST_PRINTF(_fmt, _args...) ucall_fmt(UCALL_PRINTF, _fmt, ##_args)
+#define GUEST_DONE() ucall(UCALL_DONE, 0)
+
+#define REPORT_GUEST_PRINTF(ucall) pr_info("%s", (ucall).buffer)
+
+enum guest_assert_builtin_args {
+ GUEST_ERROR_STRING,
+ GUEST_FILE,
+ GUEST_LINE,
+ GUEST_ASSERT_BUILTIN_NARGS
+};
+
+#define ____GUEST_ASSERT(_condition, _exp, _fmt, _args...) \
+do { \
+ if (!(_condition)) \
+ ucall_assert(UCALL_ABORT, _exp, __FILE__, __LINE__, _fmt, ##_args); \
+} while (0)
+
+#define __GUEST_ASSERT(_condition, _fmt, _args...) \
+ ____GUEST_ASSERT(_condition, #_condition, _fmt, ##_args)
+
+#define GUEST_ASSERT(_condition) \
+ __GUEST_ASSERT(_condition, #_condition)
+
+#define GUEST_FAIL(_fmt, _args...) \
+ ucall_assert(UCALL_ABORT, "Unconditional guest failure", \
+ __FILE__, __LINE__, _fmt, ##_args)
+
+#define GUEST_ASSERT_EQ(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a == __b, #a " == " #b, "%#lx != %#lx (%s != %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
+
+#define GUEST_ASSERT_NE(a, b) \
+do { \
+ typeof(a) __a = (a); \
+ typeof(b) __b = (b); \
+ ____GUEST_ASSERT(__a != __b, #a " != " #b, "%#lx == %#lx (%s == %s)", \
+ (unsigned long)(__a), (unsigned long)(__b), #a, #b); \
+} while (0)
+
+#define REPORT_GUEST_ASSERT(ucall) \
+ test_assert(false, (const char *)(ucall).args[GUEST_ERROR_STRING], \
+ (const char *)(ucall).args[GUEST_FILE], \
+ (ucall).args[GUEST_LINE], "%s", (ucall).buffer)
+
+#endif /* SELFTEST_KVM_UCALL_COMMON_H */
diff --git a/tools/testing/selftests/kvm/include/userfaultfd_util.h b/tools/testing/selftests/kvm/include/userfaultfd_util.h
new file mode 100644
index 000000000000..877449c34592
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/userfaultfd_util.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * KVM userfaultfd util
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+
+#define _GNU_SOURCE /* for pipe2 */
+
+#include <inttypes.h>
+#include <time.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+
+#include "test_util.h"
+
+typedef int (*uffd_handler_t)(int uffd_mode, int uffd, struct uffd_msg *msg);
+
+struct uffd_desc {
+ int uffd_mode;
+ int uffd;
+ int pipefds[2];
+ useconds_t delay;
+ uffd_handler_t handler;
+ pthread_t thread;
+};
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+ void *hva, uint64_t len,
+ uffd_handler_t handler);
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd);
+
+#ifdef PRINT_PER_PAGE_UPDATES
+#define PER_PAGE_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_PAGE_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
+
+#ifdef PRINT_PER_VCPU_UPDATES
+#define PER_VCPU_DEBUG(...) printf(__VA_ARGS__)
+#else
+#define PER_VCPU_DEBUG(...) _no_printf(__VA_ARGS__)
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/apic.h b/tools/testing/selftests/kvm/include/x86_64/apic.h
new file mode 100644
index 000000000000..bed316fdecd5
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/apic.h
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/apic.h
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_APIC_H
+#define SELFTEST_KVM_APIC_H
+
+#include <stdint.h>
+
+#include "processor.h"
+
+#define APIC_DEFAULT_GPA 0xfee00000ULL
+
+/* APIC base address MSR and fields */
+#define MSR_IA32_APICBASE 0x0000001b
+#define MSR_IA32_APICBASE_BSP (1<<8)
+#define MSR_IA32_APICBASE_EXTD (1<<10)
+#define MSR_IA32_APICBASE_ENABLE (1<<11)
+#define MSR_IA32_APICBASE_BASE (0xfffff<<12)
+#define GET_APIC_BASE(x) (((x) >> 12) << 12)
+
+#define APIC_BASE_MSR 0x800
+#define X2APIC_ENABLE (1UL << 10)
+#define APIC_ID 0x20
+#define APIC_LVR 0x30
+#define GET_APIC_ID_FIELD(x) (((x) >> 24) & 0xFF)
+#define APIC_TASKPRI 0x80
+#define APIC_PROCPRI 0xA0
+#define APIC_EOI 0xB0
+#define APIC_SPIV 0xF0
+#define APIC_SPIV_FOCUS_DISABLED (1 << 9)
+#define APIC_SPIV_APIC_ENABLED (1 << 8)
+#define APIC_IRR 0x200
+#define APIC_ICR 0x300
+#define APIC_LVTCMCI 0x2f0
+#define APIC_DEST_SELF 0x40000
+#define APIC_DEST_ALLINC 0x80000
+#define APIC_DEST_ALLBUT 0xC0000
+#define APIC_ICR_RR_MASK 0x30000
+#define APIC_ICR_RR_INVALID 0x00000
+#define APIC_ICR_RR_INPROG 0x10000
+#define APIC_ICR_RR_VALID 0x20000
+#define APIC_INT_LEVELTRIG 0x08000
+#define APIC_INT_ASSERT 0x04000
+#define APIC_ICR_BUSY 0x01000
+#define APIC_DEST_LOGICAL 0x00800
+#define APIC_DEST_PHYSICAL 0x00000
+#define APIC_DM_FIXED 0x00000
+#define APIC_DM_FIXED_MASK 0x00700
+#define APIC_DM_LOWEST 0x00100
+#define APIC_DM_SMI 0x00200
+#define APIC_DM_REMRD 0x00300
+#define APIC_DM_NMI 0x00400
+#define APIC_DM_INIT 0x00500
+#define APIC_DM_STARTUP 0x00600
+#define APIC_DM_EXTINT 0x00700
+#define APIC_VECTOR_MASK 0x000FF
+#define APIC_ICR2 0x310
+#define SET_APIC_DEST_FIELD(x) ((x) << 24)
+
+void apic_disable(void);
+void xapic_enable(void);
+void x2apic_enable(void);
+
+static inline uint32_t get_bsp_flag(void)
+{
+ return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static inline uint32_t xapic_read_reg(unsigned int reg)
+{
+ return ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2];
+}
+
+static inline void xapic_write_reg(unsigned int reg, uint32_t val)
+{
+ ((volatile uint32_t *)APIC_DEFAULT_GPA)[reg >> 2] = val;
+}
+
+static inline uint64_t x2apic_read_reg(unsigned int reg)
+{
+ return rdmsr(APIC_BASE_MSR + (reg >> 4));
+}
+
+static inline void x2apic_write_reg(unsigned int reg, uint64_t value)
+{
+ wrmsr(APIC_BASE_MSR + (reg >> 4), value);
+}
+
+#endif /* SELFTEST_KVM_APIC_H */
diff --git a/tools/testing/selftests/kvm/include/evmcs.h b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
index a034438b6266..901caf0e0939 100644
--- a/tools/testing/selftests/kvm/include/evmcs.h
+++ b/tools/testing/selftests/kvm/include/x86_64/evmcs.h
@@ -1,6 +1,6 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * tools/testing/selftests/kvm/include/vmx.h
+ * tools/testing/selftests/kvm/include/x86_64/evmcs.h
*
* Copyright (C) 2018, Red Hat, Inc.
*
@@ -10,6 +10,7 @@
#define SELFTEST_KVM_EVMCS_H
#include <stdint.h>
+#include "hyperv.h"
#include "vmx.h"
#define u16 uint16_t
@@ -20,15 +21,6 @@
extern bool enable_evmcs;
-struct hv_vp_assist_page {
- __u32 apic_assist;
- __u32 reserved;
- __u64 vtl_control[2];
- __u64 nested_enlightenments_control[2];
- __u32 enlighten_vmentry;
- __u64 current_nested_vmcs;
-};
-
struct hv_enlightened_vmcs {
u32 revision_id;
u32 abort;
@@ -41,6 +33,8 @@ struct hv_enlightened_vmcs {
u16 host_gs_selector;
u16 host_tr_selector;
+ u16 padding16_1;
+
u64 host_ia32_pat;
u64 host_ia32_efer;
@@ -159,7 +153,7 @@ struct hv_enlightened_vmcs {
u64 ept_pointer;
u16 virtual_processor_id;
- u16 padding16[3];
+ u16 padding16_2[3];
u64 padding64_2[5];
u64 guest_physical_address;
@@ -195,47 +189,63 @@ struct hv_enlightened_vmcs {
u64 guest_rip;
u32 hv_clean_fields;
- u32 hv_padding_32;
+ u32 padding32_1;
u32 hv_synthetic_controls;
struct {
u32 nested_flush_hypercall:1;
u32 msr_bitmap:1;
u32 reserved:30;
- } hv_enlightenments_control;
+ } __packed hv_enlightenments_control;
u32 hv_vp_id;
-
+ u32 padding32_2;
u64 hv_vm_id;
u64 partition_assist_page;
u64 padding64_4[4];
u64 guest_bndcfgs;
- u64 padding64_5[7];
+ u64 guest_ia32_perf_global_ctrl;
+ u64 guest_ia32_s_cet;
+ u64 guest_ssp;
+ u64 guest_ia32_int_ssp_table_addr;
+ u64 guest_ia32_lbr_ctl;
+ u64 padding64_5[2];
u64 xss_exit_bitmap;
- u64 padding64_6[7];
-};
+ u64 encls_exiting_bitmap;
+ u64 host_ia32_perf_global_ctrl;
+ u64 tsc_multiplier;
+ u64 host_ia32_s_cet;
+ u64 host_ssp;
+ u64 host_ia32_int_ssp_table_addr;
+ u64 padding64_6;
+} __packed;
-#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
-#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
-#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
- (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE 0
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP BIT(0)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP BIT(1)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2 BIT(2)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1 BIT(3)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC BIT(4)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT BIT(5)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY BIT(6)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN BIT(7)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR BIT(8)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT BIT(9)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC BIT(10)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1 BIT(11)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2 BIT(12)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER BIT(13)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1 BIT(14)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ENLIGHTENMENTSCONTROL BIT(15)
+#define HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL 0xFFFF
+
+#define HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH 0x10000031
extern struct hv_enlightened_vmcs *current_evmcs;
-extern struct hv_vp_assist_page *current_vp_assist;
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id);
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu);
-static inline int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+static inline void evmcs_enable(void)
{
- u64 val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
- HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
-
- wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
-
- current_vp_assist = vp_assist;
-
enable_evmcs = true;
-
- return 0;
}
static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
@@ -248,6 +258,16 @@ static inline int evmcs_vmptrld(uint64_t vmcs_pa, void *vmcs)
return 0;
}
+static inline bool load_evmcs(struct hyperv_test_pages *hv)
+{
+ if (evmcs_vmptrld(hv->enlightened_vmcs_gpa, hv->enlightened_vmcs))
+ return false;
+
+ current_evmcs->revision_id = EVMCS_VERSION;
+
+ return true;
+}
+
static inline int evmcs_vmptrst(uint64_t *value)
{
*value = current_vp_assist->current_nested_vmcs &
@@ -637,6 +657,18 @@ static inline int evmcs_vmread(uint64_t encoding, uint64_t *value)
case VIRTUAL_PROCESSOR_ID:
*value = current_evmcs->virtual_processor_id;
break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->host_ia32_perf_global_ctrl;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ *value = current_evmcs->guest_ia32_perf_global_ctrl;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ *value = current_evmcs->encls_exiting_bitmap;
+ break;
+ case TSC_MULTIPLIER:
+ *value = current_evmcs->tsc_multiplier;
+ break;
default: return 1;
}
@@ -648,381 +680,523 @@ static inline int evmcs_vmwrite(uint64_t encoding, uint64_t value)
switch (encoding) {
case GUEST_RIP:
current_evmcs->guest_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_RSP:
current_evmcs->guest_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case GUEST_RFLAGS:
current_evmcs->guest_rflags = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case HOST_IA32_PAT:
current_evmcs->host_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_EFER:
current_evmcs->host_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR0:
current_evmcs->host_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR3:
current_evmcs->host_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CR4:
current_evmcs->host_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_SYSENTER_ESP:
current_evmcs->host_ia32_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_IA32_SYSENTER_EIP:
current_evmcs->host_ia32_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_RIP:
current_evmcs->host_rip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case IO_BITMAP_A:
current_evmcs->io_bitmap_a = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
break;
case IO_BITMAP_B:
current_evmcs->io_bitmap_b = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP;
break;
case MSR_BITMAP:
current_evmcs->msr_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
break;
case GUEST_ES_BASE:
current_evmcs->guest_es_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_BASE:
current_evmcs->guest_cs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_BASE:
current_evmcs->guest_ss_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_BASE:
current_evmcs->guest_ds_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_BASE:
current_evmcs->guest_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_BASE:
current_evmcs->guest_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_BASE:
current_evmcs->guest_ldtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_BASE:
current_evmcs->guest_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GDTR_BASE:
current_evmcs->guest_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_IDTR_BASE:
current_evmcs->guest_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case TSC_OFFSET:
current_evmcs->tsc_offset = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case VIRTUAL_APIC_PAGE_ADDR:
current_evmcs->virtual_apic_page_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case VMCS_LINK_POINTER:
current_evmcs->vmcs_link_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_DEBUGCTL:
current_evmcs->guest_ia32_debugctl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_PAT:
current_evmcs->guest_ia32_pat = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_IA32_EFER:
current_evmcs->guest_ia32_efer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR0:
current_evmcs->guest_pdptr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR1:
current_evmcs->guest_pdptr1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR2:
current_evmcs->guest_pdptr2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PDPTR3:
current_evmcs->guest_pdptr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_PENDING_DBG_EXCEPTIONS:
current_evmcs->guest_pending_dbg_exceptions = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_ESP:
current_evmcs->guest_sysenter_esp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_EIP:
current_evmcs->guest_sysenter_eip = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case CR0_GUEST_HOST_MASK:
current_evmcs->cr0_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR4_GUEST_HOST_MASK:
current_evmcs->cr4_guest_host_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR0_READ_SHADOW:
current_evmcs->cr0_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case CR4_READ_SHADOW:
current_evmcs->cr4_read_shadow = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR0:
current_evmcs->guest_cr0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR3:
current_evmcs->guest_cr3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_CR4:
current_evmcs->guest_cr4 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case GUEST_DR7:
current_evmcs->guest_dr7 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR;
break;
case HOST_FS_BASE:
current_evmcs->host_fs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_GS_BASE:
current_evmcs->host_gs_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_TR_BASE:
current_evmcs->host_tr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_GDTR_BASE:
current_evmcs->host_gdtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_IDTR_BASE:
current_evmcs->host_idtr_base = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case HOST_RSP:
current_evmcs->host_rsp = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
break;
case EPT_POINTER:
current_evmcs->ept_pointer = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
break;
case GUEST_BNDCFGS:
current_evmcs->guest_bndcfgs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case XSS_EXIT_BITMAP:
current_evmcs->xss_exit_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
case GUEST_PHYSICAL_ADDRESS:
current_evmcs->guest_physical_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case EXIT_QUALIFICATION:
current_evmcs->exit_qualification = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_LINEAR_ADDRESS:
current_evmcs->guest_linear_address = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_MSR_STORE_ADDR:
current_evmcs->vm_exit_msr_store_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_LOAD_ADDR:
current_evmcs->vm_exit_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_ENTRY_MSR_LOAD_ADDR:
current_evmcs->vm_entry_msr_load_addr = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE0:
current_evmcs->cr3_target_value0 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE1:
current_evmcs->cr3_target_value1 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE2:
current_evmcs->cr3_target_value2 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_VALUE3:
current_evmcs->cr3_target_value3 = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case TPR_THRESHOLD:
current_evmcs->tpr_threshold = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case GUEST_INTERRUPTIBILITY_INFO:
current_evmcs->guest_interruptibility_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC;
break;
case CPU_BASED_VM_EXEC_CONTROL:
current_evmcs->cpu_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC;
break;
case EXCEPTION_BITMAP:
current_evmcs->exception_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN;
break;
case VM_ENTRY_CONTROLS:
current_evmcs->vm_entry_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY;
break;
case VM_ENTRY_INTR_INFO_FIELD:
current_evmcs->vm_entry_intr_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case VM_ENTRY_EXCEPTION_ERROR_CODE:
current_evmcs->vm_entry_exception_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case VM_ENTRY_INSTRUCTION_LEN:
current_evmcs->vm_entry_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT;
break;
case HOST_IA32_SYSENTER_CS:
current_evmcs->host_ia32_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case PIN_BASED_VM_EXEC_CONTROL:
current_evmcs->pin_based_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case VM_EXIT_CONTROLS:
current_evmcs->vm_exit_controls = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case SECONDARY_VM_EXEC_CONTROL:
current_evmcs->secondary_vm_exec_control = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1;
break;
case GUEST_ES_LIMIT:
current_evmcs->guest_es_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_LIMIT:
current_evmcs->guest_cs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_LIMIT:
current_evmcs->guest_ss_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_LIMIT:
current_evmcs->guest_ds_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_LIMIT:
current_evmcs->guest_fs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_LIMIT:
current_evmcs->guest_gs_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_LIMIT:
current_evmcs->guest_ldtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_LIMIT:
current_evmcs->guest_tr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GDTR_LIMIT:
current_evmcs->guest_gdtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_IDTR_LIMIT:
current_evmcs->guest_idtr_limit = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_ES_AR_BYTES:
current_evmcs->guest_es_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_AR_BYTES:
current_evmcs->guest_cs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_AR_BYTES:
current_evmcs->guest_ss_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_AR_BYTES:
current_evmcs->guest_ds_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_AR_BYTES:
current_evmcs->guest_fs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_AR_BYTES:
current_evmcs->guest_gs_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_AR_BYTES:
current_evmcs->guest_ldtr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_AR_BYTES:
current_evmcs->guest_tr_ar_bytes = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_ACTIVITY_STATE:
current_evmcs->guest_activity_state = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case GUEST_SYSENTER_CS:
current_evmcs->guest_sysenter_cs = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
break;
case VM_INSTRUCTION_ERROR:
current_evmcs->vm_instruction_error = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_REASON:
current_evmcs->vm_exit_reason = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INTR_INFO:
current_evmcs->vm_exit_intr_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INTR_ERROR_CODE:
current_evmcs->vm_exit_intr_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case IDT_VECTORING_INFO_FIELD:
current_evmcs->idt_vectoring_info_field = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case IDT_VECTORING_ERROR_CODE:
current_evmcs->idt_vectoring_error_code = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VM_EXIT_INSTRUCTION_LEN:
current_evmcs->vm_exit_instruction_len = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case VMX_INSTRUCTION_INFO:
current_evmcs->vmx_instruction_info = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE;
break;
case PAGE_FAULT_ERROR_CODE_MASK:
current_evmcs->page_fault_error_code_mask = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case PAGE_FAULT_ERROR_CODE_MATCH:
current_evmcs->page_fault_error_code_match = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case CR3_TARGET_COUNT:
current_evmcs->cr3_target_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_STORE_COUNT:
current_evmcs->vm_exit_msr_store_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_EXIT_MSR_LOAD_COUNT:
current_evmcs->vm_exit_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case VM_ENTRY_MSR_LOAD_COUNT:
current_evmcs->vm_entry_msr_load_count = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
break;
case HOST_ES_SELECTOR:
current_evmcs->host_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_CS_SELECTOR:
current_evmcs->host_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_SS_SELECTOR:
current_evmcs->host_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_DS_SELECTOR:
current_evmcs->host_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_FS_SELECTOR:
current_evmcs->host_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_GS_SELECTOR:
current_evmcs->host_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case HOST_TR_SELECTOR:
current_evmcs->host_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
break;
case GUEST_ES_SELECTOR:
current_evmcs->guest_es_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_CS_SELECTOR:
current_evmcs->guest_cs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_SS_SELECTOR:
current_evmcs->guest_ss_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_DS_SELECTOR:
current_evmcs->guest_ds_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_FS_SELECTOR:
current_evmcs->guest_fs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_GS_SELECTOR:
current_evmcs->guest_gs_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_LDTR_SELECTOR:
current_evmcs->guest_ldtr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case GUEST_TR_SELECTOR:
current_evmcs->guest_tr_selector = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2;
break;
case VIRTUAL_PROCESSOR_ID:
current_evmcs->virtual_processor_id = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT;
+ break;
+ case HOST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->host_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ break;
+ case GUEST_IA32_PERF_GLOBAL_CTRL:
+ current_evmcs->guest_ia32_perf_global_ctrl = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1;
+ break;
+ case ENCLS_EXITING_BITMAP:
+ current_evmcs->encls_exiting_bitmap = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
+ break;
+ case TSC_MULTIPLIER:
+ current_evmcs->tsc_multiplier = value;
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2;
break;
default: return 1;
}
@@ -1070,7 +1244,10 @@ static inline int evmcs_vmresume(void)
{
int ret;
- current_evmcs->hv_clean_fields = 0;
+ /* HOST_RIP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1;
+ /* HOST_RSP */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER;
__asm__ __volatile__("push %%rbp;"
"push %%rcx;"
diff --git a/tools/testing/selftests/kvm/include/x86_64/hyperv.h b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
new file mode 100644
index 000000000000..fa65b908b13e
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/hyperv.h
@@ -0,0 +1,346 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/hyperv.h
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+
+#ifndef SELFTEST_KVM_HYPERV_H
+#define SELFTEST_KVM_HYPERV_H
+
+#include "processor.h"
+
+#define HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS 0x40000000
+#define HYPERV_CPUID_INTERFACE 0x40000001
+#define HYPERV_CPUID_VERSION 0x40000002
+#define HYPERV_CPUID_FEATURES 0x40000003
+#define HYPERV_CPUID_ENLIGHTMENT_INFO 0x40000004
+#define HYPERV_CPUID_IMPLEMENT_LIMITS 0x40000005
+#define HYPERV_CPUID_CPU_MANAGEMENT_FEATURES 0x40000007
+#define HYPERV_CPUID_NESTED_FEATURES 0x4000000A
+#define HYPERV_CPUID_SYNDBG_VENDOR_AND_MAX_FUNCTIONS 0x40000080
+#define HYPERV_CPUID_SYNDBG_INTERFACE 0x40000081
+#define HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES 0x40000082
+
+#define HV_X64_MSR_GUEST_OS_ID 0x40000000
+#define HV_X64_MSR_HYPERCALL 0x40000001
+#define HV_X64_MSR_VP_INDEX 0x40000002
+#define HV_X64_MSR_RESET 0x40000003
+#define HV_X64_MSR_VP_RUNTIME 0x40000010
+#define HV_X64_MSR_TIME_REF_COUNT 0x40000020
+#define HV_X64_MSR_REFERENCE_TSC 0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY 0x40000022
+#define HV_X64_MSR_APIC_FREQUENCY 0x40000023
+#define HV_X64_MSR_EOI 0x40000070
+#define HV_X64_MSR_ICR 0x40000071
+#define HV_X64_MSR_TPR 0x40000072
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_SCONTROL 0x40000080
+#define HV_X64_MSR_SVERSION 0x40000081
+#define HV_X64_MSR_SIEFP 0x40000082
+#define HV_X64_MSR_SIMP 0x40000083
+#define HV_X64_MSR_EOM 0x40000084
+#define HV_X64_MSR_SINT0 0x40000090
+#define HV_X64_MSR_SINT1 0x40000091
+#define HV_X64_MSR_SINT2 0x40000092
+#define HV_X64_MSR_SINT3 0x40000093
+#define HV_X64_MSR_SINT4 0x40000094
+#define HV_X64_MSR_SINT5 0x40000095
+#define HV_X64_MSR_SINT6 0x40000096
+#define HV_X64_MSR_SINT7 0x40000097
+#define HV_X64_MSR_SINT8 0x40000098
+#define HV_X64_MSR_SINT9 0x40000099
+#define HV_X64_MSR_SINT10 0x4000009A
+#define HV_X64_MSR_SINT11 0x4000009B
+#define HV_X64_MSR_SINT12 0x4000009C
+#define HV_X64_MSR_SINT13 0x4000009D
+#define HV_X64_MSR_SINT14 0x4000009E
+#define HV_X64_MSR_SINT15 0x4000009F
+#define HV_X64_MSR_STIMER0_CONFIG 0x400000B0
+#define HV_X64_MSR_STIMER0_COUNT 0x400000B1
+#define HV_X64_MSR_STIMER1_CONFIG 0x400000B2
+#define HV_X64_MSR_STIMER1_COUNT 0x400000B3
+#define HV_X64_MSR_STIMER2_CONFIG 0x400000B4
+#define HV_X64_MSR_STIMER2_COUNT 0x400000B5
+#define HV_X64_MSR_STIMER3_CONFIG 0x400000B6
+#define HV_X64_MSR_STIMER3_COUNT 0x400000B7
+#define HV_X64_MSR_GUEST_IDLE 0x400000F0
+#define HV_X64_MSR_CRASH_P0 0x40000100
+#define HV_X64_MSR_CRASH_P1 0x40000101
+#define HV_X64_MSR_CRASH_P2 0x40000102
+#define HV_X64_MSR_CRASH_P3 0x40000103
+#define HV_X64_MSR_CRASH_P4 0x40000104
+#define HV_X64_MSR_CRASH_CTL 0x40000105
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL 0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL 0x40000107
+#define HV_X64_MSR_TSC_EMULATION_STATUS 0x40000108
+#define HV_X64_MSR_TSC_INVARIANT_CONTROL 0x40000118
+
+#define HV_X64_MSR_SYNDBG_CONTROL 0x400000F1
+#define HV_X64_MSR_SYNDBG_STATUS 0x400000F2
+#define HV_X64_MSR_SYNDBG_SEND_BUFFER 0x400000F3
+#define HV_X64_MSR_SYNDBG_RECV_BUFFER 0x400000F4
+#define HV_X64_MSR_SYNDBG_PENDING_BUFFER 0x400000F5
+#define HV_X64_MSR_SYNDBG_OPTIONS 0x400000FF
+
+/* HYPERV_CPUID_FEATURES.EAX */
+#define HV_MSR_VP_RUNTIME_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 0)
+#define HV_MSR_TIME_REF_COUNT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 1)
+#define HV_MSR_SYNIC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 2)
+#define HV_MSR_SYNTIMER_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 3)
+#define HV_MSR_APIC_ACCESS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 4)
+#define HV_MSR_HYPERCALL_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 5)
+#define HV_MSR_VP_INDEX_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 6)
+#define HV_MSR_RESET_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 7)
+#define HV_MSR_STAT_PAGES_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 8)
+#define HV_MSR_REFERENCE_TSC_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 9)
+#define HV_MSR_GUEST_IDLE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 10)
+#define HV_ACCESS_FREQUENCY_MSRS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 11)
+#define HV_ACCESS_REENLIGHTENMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 13)
+#define HV_ACCESS_TSC_INVARIANT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EAX, 15)
+
+/* HYPERV_CPUID_FEATURES.EBX */
+#define HV_CREATE_PARTITIONS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 0)
+#define HV_ACCESS_PARTITION_ID \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 1)
+#define HV_ACCESS_MEMORY_POOL \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 2)
+#define HV_ADJUST_MESSAGE_BUFFERS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 3)
+#define HV_POST_MESSAGES \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 4)
+#define HV_SIGNAL_EVENTS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 5)
+#define HV_CREATE_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 6)
+#define HV_CONNECT_PORT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 7)
+#define HV_ACCESS_STATS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 8)
+#define HV_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 11)
+#define HV_CPU_MANAGEMENT \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 12)
+#define HV_ENABLE_EXTENDED_HYPERCALLS \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 20)
+#define HV_ISOLATION \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EBX, 22)
+
+/* HYPERV_CPUID_FEATURES.EDX */
+#define HV_X64_MWAIT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 0)
+#define HV_X64_GUEST_DEBUGGING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 1)
+#define HV_X64_PERF_MONITOR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 2)
+#define HV_X64_CPU_DYNAMIC_PARTITIONING_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 3)
+#define HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 4)
+#define HV_X64_GUEST_IDLE_STATE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 5)
+#define HV_FEATURE_FREQUENCY_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 8)
+#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 10)
+#define HV_FEATURE_DEBUG_MSRS_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 11)
+#define HV_STIMER_DIRECT_MODE_AVAILABLE \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_FEATURES, 0, EDX, 19)
+
+/* HYPERV_CPUID_ENLIGHTMENT_INFO.EAX */
+#define HV_X64_AS_SWITCH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 0)
+#define HV_X64_LOCAL_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 1)
+#define HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 2)
+#define HV_X64_APIC_ACCESS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 3)
+#define HV_X64_SYSTEM_RESET_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 4)
+#define HV_X64_RELAXED_TIMING_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 5)
+#define HV_DEPRECATING_AEOI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 9)
+#define HV_X64_CLUSTER_IPI_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 10)
+#define HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 11)
+#define HV_X64_ENLIGHTENED_VMCS_RECOMMENDED \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EAX, 14)
+
+/* HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES.EAX */
+#define HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES, 0, EAX, 1)
+
+/* Hypercalls */
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE 0x0002
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST 0x0003
+#define HVCALL_NOTIFY_LONG_SPIN_WAIT 0x0008
+#define HVCALL_SEND_IPI 0x000b
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX 0x0013
+#define HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX 0x0014
+#define HVCALL_SEND_IPI_EX 0x0015
+#define HVCALL_GET_PARTITION_ID 0x0046
+#define HVCALL_DEPOSIT_MEMORY 0x0048
+#define HVCALL_CREATE_VP 0x004e
+#define HVCALL_GET_VP_REGISTERS 0x0050
+#define HVCALL_SET_VP_REGISTERS 0x0051
+#define HVCALL_POST_MESSAGE 0x005c
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HVCALL_POST_DEBUG_DATA 0x0069
+#define HVCALL_RETRIEVE_DEBUG_DATA 0x006a
+#define HVCALL_RESET_DEBUG_SESSION 0x006b
+#define HVCALL_ADD_LOGICAL_PROCESSOR 0x0076
+#define HVCALL_MAP_DEVICE_INTERRUPT 0x007c
+#define HVCALL_UNMAP_DEVICE_INTERRUPT 0x007d
+#define HVCALL_RETARGET_INTERRUPT 0x007e
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_SPACE 0x00af
+#define HVCALL_FLUSH_GUEST_PHYSICAL_ADDRESS_LIST 0x00b0
+
+/* Extended hypercalls */
+#define HV_EXT_CALL_QUERY_CAPABILITIES 0x8001
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* hypercall status code */
+#define HV_STATUS_SUCCESS 0
+#define HV_STATUS_INVALID_HYPERCALL_CODE 2
+#define HV_STATUS_INVALID_HYPERCALL_INPUT 3
+#define HV_STATUS_INVALID_ALIGNMENT 4
+#define HV_STATUS_INVALID_PARAMETER 5
+#define HV_STATUS_ACCESS_DENIED 6
+#define HV_STATUS_OPERATION_DENIED 8
+#define HV_STATUS_INSUFFICIENT_MEMORY 11
+#define HV_STATUS_INVALID_PORT_ID 17
+#define HV_STATUS_INVALID_CONNECTION_ID 18
+#define HV_STATUS_INSUFFICIENT_BUFFERS 19
+
+/* hypercall options */
+#define HV_HYPERCALL_FAST_BIT BIT(16)
+#define HV_HYPERCALL_VARHEAD_OFFSET 17
+#define HV_HYPERCALL_REP_COMP_OFFSET 32
+
+/*
+ * Issue a Hyper-V hypercall. Returns exception vector raised or 0, 'hv_status'
+ * is set to the hypercall status (if no exception occurred).
+ */
+static inline uint8_t __hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address,
+ uint64_t *hv_status)
+{
+ uint64_t error_code;
+ uint8_t vector;
+
+ /* Note both the hypercall and the "asm safe" clobber r9-r11. */
+ asm volatile("mov %[output_address], %%r8\n\t"
+ KVM_ASM_SAFE("vmcall")
+ : "=a" (*hv_status),
+ "+c" (control), "+d" (input_address),
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code)
+ : [output_address] "r"(output_address),
+ "a" (-EFAULT)
+ : "cc", "memory", "r8", KVM_ASM_SAFE_CLOBBERS);
+ return vector;
+}
+
+/* Issue a Hyper-V hypercall and assert that it succeeded. */
+static inline void hyperv_hypercall(u64 control, vm_vaddr_t input_address,
+ vm_vaddr_t output_address)
+{
+ uint64_t hv_status;
+ uint8_t vector;
+
+ vector = __hyperv_hypercall(control, input_address, output_address, &hv_status);
+
+ GUEST_ASSERT(!vector);
+ GUEST_ASSERT((hv_status & 0xffff) == 0);
+}
+
+/* Write 'Fast' hypercall input 'data' to the first 'n_sse_regs' SSE regs */
+static inline void hyperv_write_xmm_input(void *data, int n_sse_regs)
+{
+ int i;
+
+ for (i = 0; i < n_sse_regs; i++)
+ write_sse_reg(i, (sse128_t *)(data + sizeof(sse128_t) * i));
+}
+
+/* Proper HV_X64_MSR_GUEST_OS_ID value */
+#define HYPERV_LINUX_OS_ID ((u64)0x8100 << 48)
+
+#define HV_X64_MSR_VP_ASSIST_PAGE 0x40000073
+#define HV_X64_MSR_VP_ASSIST_PAGE_ENABLE 0x00000001
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT 12
+#define HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK \
+ (~((1ull << HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_SHIFT) - 1))
+
+struct hv_nested_enlightenments_control {
+ struct {
+ __u32 directhypercall:1;
+ __u32 reserved:31;
+ } features;
+ struct {
+ __u32 reserved;
+ } hypercallControls;
+} __packed;
+
+/* Define virtual processor assist page structure. */
+struct hv_vp_assist_page {
+ __u32 apic_assist;
+ __u32 reserved1;
+ __u64 vtl_control[3];
+ struct hv_nested_enlightenments_control nested_control;
+ __u8 enlighten_vmentry;
+ __u8 reserved2[7];
+ __u64 current_nested_vmcs;
+} __packed;
+
+extern struct hv_vp_assist_page *current_vp_assist;
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist);
+
+struct hyperv_test_pages {
+ /* VP assist page */
+ void *vp_assist_hva;
+ uint64_t vp_assist_gpa;
+ void *vp_assist;
+
+ /* Partition assist page */
+ void *partition_assist_hva;
+ uint64_t partition_assist_gpa;
+ void *partition_assist;
+
+ /* Enlightened VMCS */
+ void *enlightened_vmcs_hva;
+ uint64_t enlightened_vmcs_gpa;
+ void *enlightened_vmcs;
+};
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva);
+
+/* HV_X64_MSR_TSC_INVARIANT_CONTROL bits */
+#define HV_INVARIANT_TSC_EXPOSED BIT_ULL(0)
+
+#endif /* !SELFTEST_KVM_HYPERV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
new file mode 100644
index 000000000000..9f1725192aa2
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/kvm_util_arch.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UTIL_ARCH_H
+#define SELFTEST_KVM_UTIL_ARCH_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+struct kvm_vm_arch {
+ uint64_t c_bit;
+ uint64_t s_bit;
+ int sev_fd;
+ bool is_pt_protected;
+};
+
+static inline bool __vm_arch_has_protected_memory(struct kvm_vm_arch *arch)
+{
+ return arch->c_bit || arch->s_bit;
+}
+
+#define vm_arch_has_protected_memory(vm) \
+ __vm_arch_has_protected_memory(&(vm)->arch)
+
+#endif // SELFTEST_KVM_UTIL_ARCH_H
diff --git a/tools/testing/selftests/kvm/include/x86_64/mce.h b/tools/testing/selftests/kvm/include/x86_64/mce.h
new file mode 100644
index 000000000000..6119321f3f5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/mce.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * tools/testing/selftests/kvm/include/x86_64/mce.h
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#ifndef SELFTEST_KVM_MCE_H
+#define SELFTEST_KVM_MCE_H
+
+#define MCG_CTL_P BIT_ULL(8) /* MCG_CTL register available */
+#define MCG_SER_P BIT_ULL(24) /* MCA recovery/new status bits */
+#define MCG_LMCE_P BIT_ULL(27) /* Local machine check supported */
+#define MCG_CMCI_P BIT_ULL(10) /* CMCI supported */
+#define KVM_MAX_MCE_BANKS 32
+#define MCG_CAP_BANKS_MASK 0xff /* Bit 0-7 of the MCG_CAP register are #banks */
+#define MCI_STATUS_VAL (1ULL << 63) /* valid error */
+#define MCI_STATUS_UC (1ULL << 61) /* uncorrected error */
+#define MCI_STATUS_EN (1ULL << 60) /* error enabled */
+#define MCI_STATUS_MISCV (1ULL << 59) /* misc error reg. valid */
+#define MCI_STATUS_ADDRV (1ULL << 58) /* addr reg. valid */
+#define MCM_ADDR_PHYS 2 /* physical address */
+#define MCI_CTL2_CMCI_EN BIT_ULL(30)
+
+#endif /* SELFTEST_KVM_MCE_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/pmu.h b/tools/testing/selftests/kvm/include/x86_64/pmu.h
new file mode 100644
index 000000000000..3c10c4dc0ae8
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/pmu.h
@@ -0,0 +1,97 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+#ifndef SELFTEST_KVM_PMU_H
+#define SELFTEST_KVM_PMU_H
+
+#include <stdint.h>
+
+#define KVM_PMU_EVENT_FILTER_MAX_EVENTS 300
+
+/*
+ * Encode an eventsel+umask pair into event-select MSR format. Note, this is
+ * technically AMD's format, as Intel's format only supports 8 bits for the
+ * event selector, i.e. doesn't use bits 24:16 for the selector. But, OR-ing
+ * in '0' is a nop and won't clobber the CMASK.
+ */
+#define RAW_EVENT(eventsel, umask) (((eventsel & 0xf00UL) << 24) | \
+ ((eventsel) & 0xff) | \
+ ((umask) & 0xff) << 8)
+
+/*
+ * These are technically Intel's definitions, but except for CMASK (see above),
+ * AMD's layout is compatible with Intel's.
+ */
+#define ARCH_PERFMON_EVENTSEL_EVENT GENMASK_ULL(7, 0)
+#define ARCH_PERFMON_EVENTSEL_UMASK GENMASK_ULL(15, 8)
+#define ARCH_PERFMON_EVENTSEL_USR BIT_ULL(16)
+#define ARCH_PERFMON_EVENTSEL_OS BIT_ULL(17)
+#define ARCH_PERFMON_EVENTSEL_EDGE BIT_ULL(18)
+#define ARCH_PERFMON_EVENTSEL_PIN_CONTROL BIT_ULL(19)
+#define ARCH_PERFMON_EVENTSEL_INT BIT_ULL(20)
+#define ARCH_PERFMON_EVENTSEL_ANY BIT_ULL(21)
+#define ARCH_PERFMON_EVENTSEL_ENABLE BIT_ULL(22)
+#define ARCH_PERFMON_EVENTSEL_INV BIT_ULL(23)
+#define ARCH_PERFMON_EVENTSEL_CMASK GENMASK_ULL(31, 24)
+
+/* RDPMC control flags, Intel only. */
+#define INTEL_RDPMC_METRICS BIT_ULL(29)
+#define INTEL_RDPMC_FIXED BIT_ULL(30)
+#define INTEL_RDPMC_FAST BIT_ULL(31)
+
+/* Fixed PMC controls, Intel only. */
+#define FIXED_PMC_GLOBAL_CTRL_ENABLE(_idx) BIT_ULL((32 + (_idx)))
+
+#define FIXED_PMC_KERNEL BIT_ULL(0)
+#define FIXED_PMC_USER BIT_ULL(1)
+#define FIXED_PMC_ANYTHREAD BIT_ULL(2)
+#define FIXED_PMC_ENABLE_PMI BIT_ULL(3)
+#define FIXED_PMC_NR_BITS 4
+#define FIXED_PMC_CTRL(_idx, _val) ((_val) << ((_idx) * FIXED_PMC_NR_BITS))
+
+#define PMU_CAP_FW_WRITES BIT_ULL(13)
+#define PMU_CAP_LBR_FMT 0x3f
+
+#define INTEL_ARCH_CPU_CYCLES RAW_EVENT(0x3c, 0x00)
+#define INTEL_ARCH_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define INTEL_ARCH_REFERENCE_CYCLES RAW_EVENT(0x3c, 0x01)
+#define INTEL_ARCH_LLC_REFERENCES RAW_EVENT(0x2e, 0x4f)
+#define INTEL_ARCH_LLC_MISSES RAW_EVENT(0x2e, 0x41)
+#define INTEL_ARCH_BRANCHES_RETIRED RAW_EVENT(0xc4, 0x00)
+#define INTEL_ARCH_BRANCHES_MISPREDICTED RAW_EVENT(0xc5, 0x00)
+#define INTEL_ARCH_TOPDOWN_SLOTS RAW_EVENT(0xa4, 0x01)
+
+#define AMD_ZEN_CORE_CYCLES RAW_EVENT(0x76, 0x00)
+#define AMD_ZEN_INSTRUCTIONS_RETIRED RAW_EVENT(0xc0, 0x00)
+#define AMD_ZEN_BRANCHES_RETIRED RAW_EVENT(0xc2, 0x00)
+#define AMD_ZEN_BRANCHES_MISPREDICTED RAW_EVENT(0xc3, 0x00)
+
+/*
+ * Note! The order and thus the index of the architectural events matters as
+ * support for each event is enumerated via CPUID using the index of the event.
+ */
+enum intel_pmu_architectural_events {
+ INTEL_ARCH_CPU_CYCLES_INDEX,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX,
+ INTEL_ARCH_REFERENCE_CYCLES_INDEX,
+ INTEL_ARCH_LLC_REFERENCES_INDEX,
+ INTEL_ARCH_LLC_MISSES_INDEX,
+ INTEL_ARCH_BRANCHES_RETIRED_INDEX,
+ INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX,
+ INTEL_ARCH_TOPDOWN_SLOTS_INDEX,
+ NR_INTEL_ARCH_EVENTS,
+};
+
+enum amd_pmu_zen_events {
+ AMD_ZEN_CORE_CYCLES_INDEX,
+ AMD_ZEN_INSTRUCTIONS_INDEX,
+ AMD_ZEN_BRANCHES_INDEX,
+ AMD_ZEN_BRANCH_MISSES_INDEX,
+ NR_AMD_ZEN_EVENTS,
+};
+
+extern const uint64_t intel_pmu_arch_events[];
+extern const uint64_t amd_pmu_zen_events[];
+
+#endif /* SELFTEST_KVM_PMU_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/processor.h b/tools/testing/selftests/kvm/include/x86_64/processor.h
index 82b7fe16a824..3bd03b088dda 100644
--- a/tools/testing/selftests/kvm/include/x86_64/processor.h
+++ b/tools/testing/selftests/kvm/include/x86_64/processor.h
@@ -10,8 +10,29 @@
#include <assert.h>
#include <stdint.h>
+#include <syscall.h>
#include <asm/msr-index.h>
+#include <asm/prctl.h>
+
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+
+#include "../kvm_util.h"
+
+extern bool host_cpu_is_intel;
+extern bool host_cpu_is_amd;
+
+enum vm_guest_x86_subtype {
+ VM_SUBTYPE_NONE = 0,
+ VM_SUBTYPE_SEV,
+ VM_SUBTYPE_SEV_ES,
+};
+
+/* Forced emulation prefix, used to invoke the emulator unconditionally. */
+#define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
+
+#define NMI_VECTOR 0x02
#define X86_EFLAGS_FIXED (1u << 1)
@@ -27,6 +48,7 @@
#define X86_CR4_OSFXSR (1ul << 9)
#define X86_CR4_OSXMMEXCPT (1ul << 10)
#define X86_CR4_UMIP (1ul << 11)
+#define X86_CR4_LA57 (1ul << 12)
#define X86_CR4_VMXE (1ul << 13)
#define X86_CR4_SMXE (1ul << 14)
#define X86_CR4_FSGSBASE (1ul << 16)
@@ -36,6 +58,316 @@
#define X86_CR4_SMAP (1ul << 21)
#define X86_CR4_PKE (1ul << 22)
+struct xstate_header {
+ u64 xstate_bv;
+ u64 xcomp_bv;
+ u64 reserved[6];
+} __attribute__((packed));
+
+struct xstate {
+ u8 i387[512];
+ struct xstate_header header;
+ u8 extended_state_area[0];
+} __attribute__ ((packed, aligned (64)));
+
+#define XFEATURE_MASK_FP BIT_ULL(0)
+#define XFEATURE_MASK_SSE BIT_ULL(1)
+#define XFEATURE_MASK_YMM BIT_ULL(2)
+#define XFEATURE_MASK_BNDREGS BIT_ULL(3)
+#define XFEATURE_MASK_BNDCSR BIT_ULL(4)
+#define XFEATURE_MASK_OPMASK BIT_ULL(5)
+#define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
+#define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
+#define XFEATURE_MASK_PT BIT_ULL(8)
+#define XFEATURE_MASK_PKRU BIT_ULL(9)
+#define XFEATURE_MASK_PASID BIT_ULL(10)
+#define XFEATURE_MASK_CET_USER BIT_ULL(11)
+#define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
+#define XFEATURE_MASK_LBR BIT_ULL(15)
+#define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
+#define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
+
+#define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
+ XFEATURE_MASK_ZMM_Hi256 | \
+ XFEATURE_MASK_Hi16_ZMM)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
+ XFEATURE_MASK_XTILE_CFG)
+
+/* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
+enum cpuid_output_regs {
+ KVM_CPUID_EAX,
+ KVM_CPUID_EBX,
+ KVM_CPUID_ECX,
+ KVM_CPUID_EDX
+};
+
+/*
+ * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
+ * passed by value with no overhead.
+ */
+struct kvm_x86_cpu_feature {
+ u32 function;
+ u16 index;
+ u8 reg;
+ u8 bit;
+};
+#define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
+({ \
+ struct kvm_x86_cpu_feature feature = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .bit = __bit, \
+ }; \
+ \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
+ feature; \
+})
+
+/*
+ * Basic Leafs, a.k.a. Intel defined
+ */
+#define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
+#define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
+#define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
+#define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
+#define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
+#define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
+#define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
+#define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
+#define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
+#define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
+#define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
+#define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
+#define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
+#define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
+#define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
+#define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
+#define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
+#define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
+#define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
+#define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
+#define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
+#define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
+#define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
+#define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
+#define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
+#define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
+#define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
+#define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
+#define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
+#define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
+#define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
+#define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
+#define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
+#define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
+#define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
+#define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
+#define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
+#define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
+#define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
+#define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
+#define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
+#define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
+#define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
+#define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
+#define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
+#define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
+#define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
+
+/*
+ * Extended Leafs, a.k.a. AMD defined
+ */
+#define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
+#define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
+#define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
+#define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
+#define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
+#define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
+#define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
+#define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
+#define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
+#define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
+#define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
+#define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
+#define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
+#define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
+#define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
+#define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
+#define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
+
+/*
+ * KVM defined paravirt features.
+ */
+#define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
+#define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
+#define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
+#define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
+#define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
+#define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
+#define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
+#define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
+/* Bit 8 apparently isn't used?!?! */
+#define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
+#define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
+#define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
+#define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
+#define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
+#define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
+#define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
+#define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
+#define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
+
+/*
+ * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
+ * value/property as opposed to a single-bit feature. Again, pack the info
+ * into a 64-bit value to pass by value with no overhead.
+ */
+struct kvm_x86_cpu_property {
+ u32 function;
+ u8 index;
+ u8 reg;
+ u8 lo_bit;
+ u8 hi_bit;
+};
+#define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \
+({ \
+ struct kvm_x86_cpu_property property = { \
+ .function = fn, \
+ .index = idx, \
+ .reg = KVM_CPUID_##gpr, \
+ .lo_bit = low_bit, \
+ .hi_bit = high_bit, \
+ }; \
+ \
+ kvm_static_assert(low_bit < high_bit); \
+ kvm_static_assert((fn & 0xc0000000) == 0 || \
+ (fn & 0xc0000000) == 0x40000000 || \
+ (fn & 0xc0000000) == 0x80000000 || \
+ (fn & 0xc0000000) == 0xc0000000); \
+ kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
+ property; \
+})
+
+#define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
+#define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
+#define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
+#define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
+#define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
+#define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 7)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
+#define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
+#define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
+
+#define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
+#define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
+#define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
+
+#define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
+#define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
+#define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
+#define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
+#define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
+#define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
+#define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
+#define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15)
+
+#define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
+
+#define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
+#define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
+#define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
+#define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
+#define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
+
+#define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
+
+/*
+ * Intel's architectural PMU events are bizarre. They have a "feature" bit
+ * that indicates the feature is _not_ supported, and a property that states
+ * the length of the bit mask of unsupported features. A feature is supported
+ * if the size of the bit mask is larger than the "unavailable" bit, and said
+ * bit is not set. Fixed counters also bizarre enumeration, but inverted from
+ * arch events for general purpose counters. Fixed counters are supported if a
+ * feature flag is set **OR** the total number of fixed counters is greater
+ * than index of the counter.
+ *
+ * Wrap the events for general purpose and fixed counters to simplify checking
+ * whether or not a given architectural event is supported.
+ */
+struct kvm_x86_pmu_feature {
+ struct kvm_x86_cpu_feature f;
+};
+#define KVM_X86_PMU_FEATURE(__reg, __bit) \
+({ \
+ struct kvm_x86_pmu_feature feature = { \
+ .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
+ }; \
+ \
+ kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
+ KVM_CPUID_##__reg == KVM_CPUID_ECX); \
+ feature; \
+})
+
+#define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
+#define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
+#define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
+#define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
+#define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
+#define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
+#define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
+
+#define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
+#define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
+#define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
+#define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
+
+static inline unsigned int x86_family(unsigned int eax)
+{
+ unsigned int x86;
+
+ x86 = (eax >> 8) & 0xf;
+
+ if (x86 == 0xf)
+ x86 += (eax >> 20) & 0xff;
+
+ return x86;
+}
+
+static inline unsigned int x86_model(unsigned int eax)
+{
+ return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
+}
+
+/* Page table bitfield declarations */
+#define PTE_PRESENT_MASK BIT_ULL(0)
+#define PTE_WRITABLE_MASK BIT_ULL(1)
+#define PTE_USER_MASK BIT_ULL(2)
+#define PTE_ACCESSED_MASK BIT_ULL(5)
+#define PTE_DIRTY_MASK BIT_ULL(6)
+#define PTE_LARGE_MASK BIT_ULL(7)
+#define PTE_GLOBAL_MASK BIT_ULL(8)
+#define PTE_NX_MASK BIT_ULL(63)
+
+#define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
+
+#define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
+#define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x))
+#define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
+
+#define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK)
+#define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT)
+
/* General Registers in 64-Bit Mode */
struct gpr64_regs {
u64 rax;
@@ -59,7 +391,7 @@ struct gpr64_regs {
struct desc64 {
uint16_t limit0;
uint16_t base0;
- unsigned base1:8, s:1, type:4, dpl:2, p:1;
+ unsigned base1:8, type:4, s:1, dpl:2, p:1;
unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
uint32_t base3;
uint32_t zero1;
@@ -70,6 +402,21 @@ struct desc_ptr {
uint64_t address;
} __attribute__((packed));
+struct kvm_x86_state {
+ struct kvm_xsave *xsave;
+ struct kvm_vcpu_events events;
+ struct kvm_mp_state mp_state;
+ struct kvm_regs regs;
+ struct kvm_xcrs xcrs;
+ struct kvm_sregs sregs;
+ struct kvm_debugregs debugregs;
+ union {
+ struct kvm_nested_state nested;
+ char nested_[16384];
+ };
+ struct kvm_msrs msrs;
+};
+
static inline uint64_t get_desc64_base(const struct desc64 *desc)
{
return ((uint64_t)desc->base3 << 32) |
@@ -223,6 +570,31 @@ static inline void set_cr4(uint64_t val)
__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
}
+static inline u64 xgetbv(u32 index)
+{
+ u32 eax, edx;
+
+ __asm__ __volatile__("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax | ((u64)edx << 32);
+}
+
+static inline void xsetbv(u32 index, u64 value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
+}
+
+static inline void wrpkru(u32 pkru)
+{
+ /* Note, ECX and EDX are architecturally required to be '0'. */
+ asm volatile(".byte 0x0f,0x01,0xef\n\t"
+ : : "a" (pkru), "c"(0), "d"(0));
+}
+
static inline struct desc_ptr get_gdt(void)
{
struct desc_ptr gdt;
@@ -239,104 +611,720 @@ static inline struct desc_ptr get_idt(void)
return idt;
}
-#define SET_XMM(__var, __xmm) \
- asm volatile("movq %0, %%"#__xmm : : "r"(__var) : #__xmm)
+static inline void outl(uint16_t port, uint32_t value)
+{
+ __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
+}
+
+static inline void __cpuid(uint32_t function, uint32_t index,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ *eax = function;
+ *ecx = index;
+
+ asm volatile("cpuid"
+ : "=a" (*eax),
+ "=b" (*ebx),
+ "=c" (*ecx),
+ "=d" (*edx)
+ : "0" (*eax), "2" (*ecx)
+ : "memory");
+}
+
+static inline void cpuid(uint32_t function,
+ uint32_t *eax, uint32_t *ebx,
+ uint32_t *ecx, uint32_t *edx)
+{
+ return __cpuid(function, 0, eax, ebx, ecx, edx);
+}
+
+static inline uint32_t this_cpu_fms(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ return eax;
+}
+
+static inline uint32_t this_cpu_family(void)
+{
+ return x86_family(this_cpu_fms());
+}
+
+static inline uint32_t this_cpu_model(void)
+{
+ return x86_model(this_cpu_fms());
+}
+
+static inline bool this_cpu_vendor_string_is(const char *vendor)
+{
+ const uint32_t *chunk = (const uint32_t *)vendor;
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(0, &eax, &ebx, &ecx, &edx);
+ return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+}
+
+static inline bool this_cpu_is_intel(void)
+{
+ return this_cpu_vendor_string_is("GenuineIntel");
+}
+
+/*
+ * Exclude early K5 samples with a vendor string of "AMDisbetter!"
+ */
+static inline bool this_cpu_is_amd(void)
+{
+ return this_cpu_vendor_string_is("AuthenticAMD");
+}
+
+static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
+{
+ uint32_t gprs[4];
+
+ __cpuid(function, index,
+ &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
+ &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
+
+ return (gprs[reg] & GENMASK(hi, lo)) >> lo;
+}
+
+static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return __this_cpu_has(feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return __this_cpu_has(property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
-static inline void set_xmm(int n, unsigned long val)
+static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
{
- switch (n) {
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
case 0:
- SET_XMM(val, xmm0);
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
+ }
+ return max_leaf >= property.function;
+}
+
+static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
+ }
+
+ GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
+ nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || this_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t this_cpu_supported_xcr0(void)
+{
+ if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+typedef u32 __attribute__((vector_size(16))) sse128_t;
+#define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
+#define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
+#define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
+
+static inline void read_sse_reg(int reg, sse128_t *data)
+{
+ switch (reg) {
+ case 0:
+ asm("movdqa %%xmm0, %0" : "=m"(*data));
break;
case 1:
- SET_XMM(val, xmm1);
+ asm("movdqa %%xmm1, %0" : "=m"(*data));
break;
case 2:
- SET_XMM(val, xmm2);
+ asm("movdqa %%xmm2, %0" : "=m"(*data));
break;
case 3:
- SET_XMM(val, xmm3);
+ asm("movdqa %%xmm3, %0" : "=m"(*data));
break;
case 4:
- SET_XMM(val, xmm4);
+ asm("movdqa %%xmm4, %0" : "=m"(*data));
break;
case 5:
- SET_XMM(val, xmm5);
+ asm("movdqa %%xmm5, %0" : "=m"(*data));
break;
case 6:
- SET_XMM(val, xmm6);
+ asm("movdqa %%xmm6, %0" : "=m"(*data));
break;
case 7:
- SET_XMM(val, xmm7);
+ asm("movdqa %%xmm7, %0" : "=m"(*data));
break;
+ default:
+ BUG();
}
}
-typedef unsigned long v1di __attribute__ ((vector_size (8)));
-static inline unsigned long get_xmm(int n)
+static inline void write_sse_reg(int reg, const sse128_t *data)
{
- assert(n >= 0 && n <= 7);
-
- register v1di xmm0 __asm__("%xmm0");
- register v1di xmm1 __asm__("%xmm1");
- register v1di xmm2 __asm__("%xmm2");
- register v1di xmm3 __asm__("%xmm3");
- register v1di xmm4 __asm__("%xmm4");
- register v1di xmm5 __asm__("%xmm5");
- register v1di xmm6 __asm__("%xmm6");
- register v1di xmm7 __asm__("%xmm7");
- switch (n) {
+ switch (reg) {
case 0:
- return (unsigned long)xmm0;
+ asm("movdqa %0, %%xmm0" : : "m"(*data));
+ break;
case 1:
- return (unsigned long)xmm1;
+ asm("movdqa %0, %%xmm1" : : "m"(*data));
+ break;
case 2:
- return (unsigned long)xmm2;
+ asm("movdqa %0, %%xmm2" : : "m"(*data));
+ break;
case 3:
- return (unsigned long)xmm3;
+ asm("movdqa %0, %%xmm3" : : "m"(*data));
+ break;
case 4:
- return (unsigned long)xmm4;
+ asm("movdqa %0, %%xmm4" : : "m"(*data));
+ break;
case 5:
- return (unsigned long)xmm5;
+ asm("movdqa %0, %%xmm5" : : "m"(*data));
+ break;
case 6:
- return (unsigned long)xmm6;
+ asm("movdqa %0, %%xmm6" : : "m"(*data));
+ break;
case 7:
- return (unsigned long)xmm7;
+ asm("movdqa %0, %%xmm7" : : "m"(*data));
+ break;
+ default:
+ BUG();
+ }
+}
+
+static inline void cpu_relax(void)
+{
+ asm volatile("rep; nop" ::: "memory");
+}
+
+#define ud2() \
+ __asm__ __volatile__( \
+ "ud2\n" \
+ )
+
+#define hlt() \
+ __asm__ __volatile__( \
+ "hlt\n" \
+ )
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
+void kvm_x86_state_cleanup(struct kvm_x86_state *state);
+
+const struct kvm_msr_list *kvm_get_msr_index_list(void);
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
+uint64_t kvm_get_feature_msr(uint64_t msr_index);
+
+static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
+{
+ int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
+
+ TEST_ASSERT(r == msrs->nmsrs,
+ "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
+ r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
+}
+static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
+ struct kvm_debugregs *debugregs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
+}
+static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
+}
+static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
+}
+static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
+ struct kvm_xsave *xsave)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
+}
+static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
+ struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
+}
+static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
+{
+ vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
+}
+
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index);
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void);
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu);
+
+static inline uint32_t kvm_cpu_fms(void)
+{
+ return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
+}
+
+static inline uint32_t kvm_cpu_family(void)
+{
+ return x86_family(kvm_cpu_fms());
+}
+
+static inline uint32_t kvm_cpu_model(void)
+{
+ return x86_model(kvm_cpu_fms());
+}
+
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature);
+
+static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
+{
+ return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property);
+
+static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
+{
+ return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
+}
+
+static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
+{
+ uint32_t max_leaf;
+
+ switch (property.function & 0xc0000000) {
+ case 0:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
+ break;
+ case 0x40000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
+ break;
+ case 0x80000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
+ break;
+ case 0xc0000000:
+ max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
}
+ return max_leaf >= property.function;
+}
+
+static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
+{
+ uint32_t nr_bits;
+
+ if (feature.f.reg == KVM_CPUID_EBX) {
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
+ }
+
+ TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
+ nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
+}
+
+static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
+ return 0;
+
+ return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
+ ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
+}
+
+static inline size_t kvm_cpuid2_size(int nr_entries)
+{
+ return sizeof(struct kvm_cpuid2) +
+ sizeof(struct kvm_cpuid_entry2) * nr_entries;
+}
+
+/*
+ * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
+ * entries sized to hold @nr_entries. The caller is responsible for freeing
+ * the struct.
+ */
+static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
+{
+ struct kvm_cpuid2 *cpuid;
+
+ cpuid = malloc(kvm_cpuid2_size(nr_entries));
+ TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
+
+ cpuid->nent = nr_entries;
+
+ return cpuid;
+}
+
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu);
+
+static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function,
+ uint32_t index)
+{
+ return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
+ function, index);
+}
+
+static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
+ uint32_t function)
+{
+ return __vcpu_get_cpuid_entry(vcpu, function, 0);
+}
+
+static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
+ if (r)
+ return r;
+
+ /* On success, refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
return 0;
}
-bool is_intel_cpu(void);
+static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
+ vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
-struct kvm_x86_state;
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid);
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_x86_state *state);
+ /* Refresh the cache to pick up adjustments made by KVM. */
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
+}
-struct kvm_msr_list *kvm_get_msr_index_list(void);
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value);
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
-void vcpu_set_cpuid(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_cpuid2 *cpuid);
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set);
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index);
+static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
+
+}
-static inline struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_entry(uint32_t function)
+static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature)
+{
+ vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
+
+/*
+ * Assert on an MSR access(es) and pretty print the MSR name when possible.
+ * Note, the caller provides the stringified name so that the name of macro is
+ * printed, not the value the macro resolves to (due to macro expansion).
+ */
+#define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
+do { \
+ if (__builtin_constant_p(msr)) { \
+ TEST_ASSERT(cond, fmt, str, args); \
+ } else if (!(cond)) { \
+ char buf[16]; \
+ \
+ snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
+ TEST_ASSERT(cond, fmt, buf, args); \
+ } \
+} while (0)
+
+/*
+ * Returns true if KVM should return the last written value when reading an MSR
+ * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
+ * is changing, etc. This is NOT an exhaustive list! The intent is to filter
+ * out MSRs that are not durable _and_ that a selftest wants to write.
+ */
+static inline bool is_durable_msr(uint32_t msr)
{
- return kvm_get_supported_cpuid_index(function, 0);
+ return msr != MSR_IA32_TSC;
}
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index);
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value);
+#define vcpu_set_msr(vcpu, msr, val) \
+do { \
+ uint64_t r, v = val; \
+ \
+ TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
+ "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
+ if (!is_durable_msr(msr)) \
+ break; \
+ r = vcpu_get_msr(vcpu, msr); \
+ TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
+} while (0)
-uint32_t kvm_get_cpuid_max_basic(void);
-uint32_t kvm_get_cpuid_max_extended(void);
void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
+void kvm_init_vm_address_properties(struct kvm_vm *vm);
+bool vm_is_unrestricted_guest(struct kvm_vm *vm);
+
+struct ex_regs {
+ uint64_t rax, rcx, rdx, rbx;
+ uint64_t rbp, rsi, rdi;
+ uint64_t r8, r9, r10, r11;
+ uint64_t r12, r13, r14, r15;
+ uint64_t vector;
+ uint64_t error_code;
+ uint64_t rip;
+ uint64_t cs;
+ uint64_t rflags;
+};
+
+struct idt_entry {
+ uint16_t offset0;
+ uint16_t selector;
+ uint16_t ist : 3;
+ uint16_t : 5;
+ uint16_t type : 4;
+ uint16_t : 1;
+ uint16_t dpl : 2;
+ uint16_t p : 1;
+ uint16_t offset1;
+ uint32_t offset2; uint32_t reserved;
+};
+
+void vm_init_descriptor_tables(struct kvm_vm *vm);
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu);
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *));
+
+/* If a toddler were to say "abracadabra". */
+#define KVM_EXCEPTION_MAGIC 0xabacadabaULL
+
+/*
+ * KVM selftest exception fixup uses registers to coordinate with the exception
+ * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
+ * per-CPU data. Using only registers avoids having to map memory into the
+ * guest, doesn't require a valid, stable GS.base, and reduces the risk of
+ * for recursive faults when accessing memory in the handler. The downside to
+ * using registers is that it restricts what registers can be used by the actual
+ * instruction. But, selftests are 64-bit only, making register* pressure a
+ * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved
+ * by the callee, and except for r11 are not implicit parameters to any
+ * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
+ * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
+ * is higher priority than testing non-faulting SYSCALL/SYSRET.
+ *
+ * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
+ * is guaranteed to be non-zero on fault.
+ *
+ * REGISTER INPUTS:
+ * r9 = MAGIC
+ * r10 = RIP
+ * r11 = new RIP on fault
+ *
+ * REGISTER OUTPUTS:
+ * r9 = exception vector (non-zero)
+ * r10 = error code
+ */
+#define __KVM_ASM_SAFE(insn, fep) \
+ "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
+ "lea 1f(%%rip), %%r10\n\t" \
+ "lea 2f(%%rip), %%r11\n\t" \
+ fep "1: " insn "\n\t" \
+ "xor %%r9, %%r9\n\t" \
+ "2:\n\t" \
+ "mov %%r9b, %[vector]\n\t" \
+ "mov %%r10, %[error_code]\n\t"
+
+#define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
+#define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
+
+#define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
+#define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
+
+#define kvm_asm_safe(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_fep(insn, inputs...) \
+({ \
+ uint64_t ign_error_code; \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
+({ \
+ uint8_t vector; \
+ \
+ asm volatile(KVM_ASM_SAFE_FEP(insn) \
+ : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : inputs \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ vector; \
+})
+
+#define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
+{ \
+ uint64_t error_code; \
+ uint8_t vector; \
+ uint32_t a, d; \
+ \
+ asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
+ : "=a"(a), "=d"(d), \
+ KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
+ : "c"(idx) \
+ : KVM_ASM_SAFE_CLOBBERS); \
+ \
+ *val = (uint64_t)a | ((uint64_t)d << 32); \
+ return vector; \
+}
+
+/*
+ * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
+ * use ECX as in input index, and EDX:EAX as a 64-bit output.
+ */
+#define BUILD_READ_U64_SAFE_HELPERS(insn) \
+ BUILD_READ_U64_SAFE_HELPER(insn, , ) \
+ BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
+
+BUILD_READ_U64_SAFE_HELPERS(rdmsr)
+BUILD_READ_U64_SAFE_HELPERS(rdpmc)
+BUILD_READ_U64_SAFE_HELPERS(xgetbv)
+
+static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
+{
+ return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
+}
+
+static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
+{
+ u32 eax = value;
+ u32 edx = value >> 32;
+
+ return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
+}
+
+bool kvm_is_tdp_enabled(void);
+
+static inline bool kvm_is_pmu_enabled(void)
+{
+ return get_kvm_param_bool("enable_pmu");
+}
+
+static inline bool kvm_is_forced_emulation_enabled(void)
+{
+ return !!get_kvm_param_integer("force_emulation_prefix");
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level);
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr);
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3);
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
+
+static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
+ uint64_t size, uint64_t flags)
+{
+ return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
+}
+
+static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
+ uint64_t flags)
+{
+ uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
+
+ GUEST_ASSERT(!ret);
+}
+
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
+
+#define vm_xsave_require_permission(xfeature) \
+ __vm_xsave_require_permission(xfeature, #xfeature)
+
+enum pg_level {
+ PG_LEVEL_NONE,
+ PG_LEVEL_4K,
+ PG_LEVEL_2M,
+ PG_LEVEL_1G,
+ PG_LEVEL_512G,
+ PG_LEVEL_NUM
+};
+
+#define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
+#define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
+
+#define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
+#define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
+#define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level);
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level);
/*
* Basic CPU control in CR0
@@ -353,34 +1341,28 @@ void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
#define X86_CR0_CD (1UL<<30) /* Cache Disable */
#define X86_CR0_PG (1UL<<31) /* Paging */
-#define APIC_BASE_MSR 0x800
-#define X2APIC_ENABLE (1UL << 10)
-#define APIC_ICR 0x300
-#define APIC_DEST_SELF 0x40000
-#define APIC_DEST_ALLINC 0x80000
-#define APIC_DEST_ALLBUT 0xC0000
-#define APIC_ICR_RR_MASK 0x30000
-#define APIC_ICR_RR_INVALID 0x00000
-#define APIC_ICR_RR_INPROG 0x10000
-#define APIC_ICR_RR_VALID 0x20000
-#define APIC_INT_LEVELTRIG 0x08000
-#define APIC_INT_ASSERT 0x04000
-#define APIC_ICR_BUSY 0x01000
-#define APIC_DEST_LOGICAL 0x00800
-#define APIC_DEST_PHYSICAL 0x00000
-#define APIC_DM_FIXED 0x00000
-#define APIC_DM_FIXED_MASK 0x00700
-#define APIC_DM_LOWEST 0x00100
-#define APIC_DM_SMI 0x00200
-#define APIC_DM_REMRD 0x00300
-#define APIC_DM_NMI 0x00400
-#define APIC_DM_INIT 0x00500
-#define APIC_DM_STARTUP 0x00600
-#define APIC_DM_EXTINT 0x00700
-#define APIC_VECTOR_MASK 0x000FF
-#define APIC_ICR2 0x310
-
-/* VMX_EPT_VPID_CAP bits */
-#define VMX_EPT_VPID_CAP_AD_BITS (1ULL << 21)
+#define PFERR_PRESENT_BIT 0
+#define PFERR_WRITE_BIT 1
+#define PFERR_USER_BIT 2
+#define PFERR_RSVD_BIT 3
+#define PFERR_FETCH_BIT 4
+#define PFERR_PK_BIT 5
+#define PFERR_SGX_BIT 15
+#define PFERR_GUEST_FINAL_BIT 32
+#define PFERR_GUEST_PAGE_BIT 33
+#define PFERR_IMPLICIT_ACCESS_BIT 48
+
+#define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
+#define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
+#define PFERR_USER_MASK BIT(PFERR_USER_BIT)
+#define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
+#define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
+#define PFERR_PK_MASK BIT(PFERR_PK_BIT)
+#define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
+#define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
+#define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
+#define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
+
+bool sys_clocksource_is_based_on_tsc(void);
#endif /* SELFTEST_KVM_PROCESSOR_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/sev.h b/tools/testing/selftests/kvm/include/x86_64/sev.h
new file mode 100644
index 000000000000..8a1bf88474c9
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/sev.h
@@ -0,0 +1,107 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Helpers used for SEV guests
+ *
+ */
+#ifndef SELFTEST_KVM_SEV_H
+#define SELFTEST_KVM_SEV_H
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "linux/psp-sev.h"
+
+#include "kvm_util.h"
+#include "svm_util.h"
+#include "processor.h"
+
+enum sev_guest_state {
+ SEV_GUEST_STATE_UNINITIALIZED = 0,
+ SEV_GUEST_STATE_LAUNCH_UPDATE,
+ SEV_GUEST_STATE_LAUNCH_SECRET,
+ SEV_GUEST_STATE_RUNNING,
+};
+
+#define SEV_POLICY_NO_DBG (1UL << 0)
+#define SEV_POLICY_ES (1UL << 2)
+
+#define GHCB_MSR_TERM_REQ 0x100
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy);
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement);
+void sev_vm_launch_finish(struct kvm_vm *vm);
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+ struct kvm_vcpu **cpu);
+
+kvm_static_assert(SEV_RET_SUCCESS == 0);
+
+/*
+ * The KVM_MEMORY_ENCRYPT_OP uAPI is utter garbage and takes an "unsigned long"
+ * instead of a proper struct. The size of the parameter is embedded in the
+ * ioctl number, i.e. is ABI and thus immutable. Hack around the mess by
+ * creating an overlay to pass in an "unsigned long" without a cast (casting
+ * will make the compiler unhappy due to dereferencing an aliased pointer).
+ */
+#define __vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int r; \
+ \
+ union { \
+ struct kvm_sev_cmd c; \
+ unsigned long raw; \
+ } sev_cmd = { .c = { \
+ .id = (cmd), \
+ .data = (uint64_t)(arg), \
+ .sev_fd = (vm)->arch.sev_fd, \
+ } }; \
+ \
+ r = __vm_ioctl(vm, KVM_MEMORY_ENCRYPT_OP, &sev_cmd.raw); \
+ r ?: sev_cmd.c.error; \
+})
+
+#define vm_sev_ioctl(vm, cmd, arg) \
+({ \
+ int ret = __vm_sev_ioctl(vm, cmd, arg); \
+ \
+ __TEST_ASSERT_VM_VCPU_IOCTL(!ret, #cmd, ret, vm); \
+})
+
+static inline void sev_vm_init(struct kvm_vm *vm)
+{
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+ vm_sev_ioctl(vm, KVM_SEV_INIT, NULL);
+}
+
+
+static inline void sev_es_vm_init(struct kvm_vm *vm)
+{
+ vm->arch.sev_fd = open_sev_dev_path_or_exit();
+
+ vm_sev_ioctl(vm, KVM_SEV_ES_INIT, NULL);
+}
+
+static inline void sev_register_encrypted_memory(struct kvm_vm *vm,
+ struct userspace_mem_region *region)
+{
+ struct kvm_enc_region range = {
+ .addr = region->region.userspace_addr,
+ .size = region->region.memory_size,
+ };
+
+ vm_ioctl(vm, KVM_MEMORY_ENCRYPT_REG_REGION, &range);
+}
+
+static inline void sev_launch_update_data(struct kvm_vm *vm, vm_paddr_t gpa,
+ uint64_t size)
+{
+ struct kvm_sev_launch_update_data update_data = {
+ .uaddr = (unsigned long)addr_gpa2hva(vm, gpa),
+ .len = size,
+ };
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_DATA, &update_data);
+}
+
+#endif /* SELFTEST_KVM_SEV_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm.h b/tools/testing/selftests/kvm/include/x86_64/svm.h
index f4ea2355dbc2..4803e1056055 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm.h
@@ -58,6 +58,27 @@ enum {
INTERCEPT_RDPRU,
};
+struct hv_vmcb_enlightenments {
+ struct __packed hv_enlightenments_control {
+ u32 nested_flush_hypercall:1;
+ u32 msr_bitmap:1;
+ u32 enlightened_npt_tlb: 1;
+ u32 reserved:29;
+ } __packed hv_enlightenments_control;
+ u32 hv_vp_id;
+ u64 hv_vm_id;
+ u64 partition_assist_page;
+ u64 reserved;
+} __packed;
+
+/*
+ * Hyper-V uses the software reserved clean bit in VMCB
+ */
+#define HV_VMCB_NESTED_ENLIGHTENMENTS (1U << 31)
+
+/* Synthetic VM-Exit */
+#define HV_SVM_EXITCODE_ENL 0xf0000000
+#define HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH (1)
struct __attribute__ ((__packed__)) vmcb_control_area {
u32 intercept_cr;
@@ -99,7 +120,17 @@ struct __attribute__ ((__packed__)) vmcb_control_area {
u8 reserved_6[8]; /* Offset 0xe8 */
u64 avic_logical_id; /* Offset 0xf0 */
u64 avic_physical_id; /* Offset 0xf8 */
- u8 reserved_7[768];
+ u8 reserved_7[8];
+ u64 vmsa_pa; /* Used for an SEV-ES guest */
+ u8 reserved_8[720];
+ /*
+ * Offset 0x3e0, 32 bytes reserved
+ * for use by hypervisor/software.
+ */
+ union {
+ struct hv_vmcb_enlightenments hv_enlightenments;
+ u8 reserved_sw[32];
+ };
};
@@ -211,8 +242,6 @@ struct __attribute__ ((__packed__)) vmcb {
struct vmcb_save_area save;
};
-#define SVM_CPUID_FUNC 0x8000000a
-
#define SVM_VM_CR_SVM_DISABLE 4
#define SVM_SELECTOR_S_SHIFT 4
diff --git a/tools/testing/selftests/kvm/include/x86_64/svm_util.h b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
index b7531c83b8ae..044f0f872ba9 100644
--- a/tools/testing/selftests/kvm/include/x86_64/svm_util.h
+++ b/tools/testing/selftests/kvm/include/x86_64/svm_util.h
@@ -9,15 +9,12 @@
#ifndef SELFTEST_KVM_SVM_UTILS_H
#define SELFTEST_KVM_SVM_UTILS_H
+#include <asm/svm.h>
+
#include <stdint.h>
#include "svm.h"
#include "processor.h"
-#define CPUID_SVM_BIT 2
-#define CPUID_SVM BIT_ULL(CPUID_SVM_BIT)
-
-#define SVM_EXIT_VMMCALL 0x081
-
struct svm_test_data {
/* VMCB */
struct vmcb *vmcb; /* gva */
@@ -28,22 +25,41 @@ struct svm_test_data {
struct vmcb_save_area *save_area; /* gva */
void *save_area_hva;
uint64_t save_area_gpa;
+
+ /* MSR-Bitmap */
+ void *msr; /* gva */
+ void *msr_hva;
+ uint64_t msr_gpa;
};
+static inline void vmmcall(void)
+{
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+#define stgi() \
+ __asm__ __volatile__( \
+ "stgi\n" \
+ )
+
+#define clgi() \
+ __asm__ __volatile__( \
+ "clgi\n" \
+ )
+
struct svm_test_data *vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva);
void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_rsp);
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa);
-bool nested_svm_supported(void);
-void nested_svm_check_supported(void);
-
-static inline bool cpu_has_svm(void)
-{
- u32 eax = 0x80000001, ecx;
-
- asm("cpuid" :
- "=a" (eax), "=c" (ecx) : "0" (eax) : "ebx", "edx");
- return ecx & CPUID_SVM;
-}
+int open_sev_dev_path_or_exit(void);
#endif /* SELFTEST_KVM_SVM_UTILS_H */
diff --git a/tools/testing/selftests/kvm/include/x86_64/ucall.h b/tools/testing/selftests/kvm/include/x86_64/ucall.h
new file mode 100644
index 000000000000..06b244bd06ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/include/x86_64/ucall.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_UCALL_H
+#define SELFTEST_KVM_UCALL_H
+
+#include "kvm_util_base.h"
+
+#define UCALL_EXIT_REASON KVM_EXIT_IO
+
+static inline void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+}
+
+#endif
diff --git a/tools/testing/selftests/kvm/include/x86_64/vmx.h b/tools/testing/selftests/kvm/include/x86_64/vmx.h
index 16fa21ebb99c..5f0c0a29c556 100644
--- a/tools/testing/selftests/kvm/include/x86_64/vmx.h
+++ b/tools/testing/selftests/kvm/include/x86_64/vmx.h
@@ -8,12 +8,11 @@
#ifndef SELFTEST_KVM_VMX_H
#define SELFTEST_KVM_VMX_H
+#include <asm/vmx.h>
+
#include <stdint.h>
#include "processor.h"
-
-#define CPUID_VMX_BIT 5
-
-#define CPUID_VMX (1 << 5)
+#include "apic.h"
/*
* Definitions of Primary Processor-Based VM-Execution Controls.
@@ -48,7 +47,7 @@
#define SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES 0x00000001
#define SECONDARY_EXEC_ENABLE_EPT 0x00000002
#define SECONDARY_EXEC_DESC 0x00000004
-#define SECONDARY_EXEC_RDTSCP 0x00000008
+#define SECONDARY_EXEC_ENABLE_RDTSCP 0x00000008
#define SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE 0x00000010
#define SECONDARY_EXEC_ENABLE_VPID 0x00000020
#define SECONDARY_EXEC_WBINVD_EXITING 0x00000040
@@ -99,56 +98,10 @@
#define VMX_MISC_PREEMPTION_TIMER_RATE_MASK 0x0000001f
#define VMX_MISC_SAVE_EFER_LMA 0x00000020
+#define VMX_EPT_VPID_CAP_1G_PAGES 0x00020000
+#define VMX_EPT_VPID_CAP_AD_BITS 0x00200000
+
#define EXIT_REASON_FAILED_VMENTRY 0x80000000
-#define EXIT_REASON_EXCEPTION_NMI 0
-#define EXIT_REASON_EXTERNAL_INTERRUPT 1
-#define EXIT_REASON_TRIPLE_FAULT 2
-#define EXIT_REASON_INTERRUPT_WINDOW 7
-#define EXIT_REASON_NMI_WINDOW 8
-#define EXIT_REASON_TASK_SWITCH 9
-#define EXIT_REASON_CPUID 10
-#define EXIT_REASON_HLT 12
-#define EXIT_REASON_INVD 13
-#define EXIT_REASON_INVLPG 14
-#define EXIT_REASON_RDPMC 15
-#define EXIT_REASON_RDTSC 16
-#define EXIT_REASON_VMCALL 18
-#define EXIT_REASON_VMCLEAR 19
-#define EXIT_REASON_VMLAUNCH 20
-#define EXIT_REASON_VMPTRLD 21
-#define EXIT_REASON_VMPTRST 22
-#define EXIT_REASON_VMREAD 23
-#define EXIT_REASON_VMRESUME 24
-#define EXIT_REASON_VMWRITE 25
-#define EXIT_REASON_VMOFF 26
-#define EXIT_REASON_VMON 27
-#define EXIT_REASON_CR_ACCESS 28
-#define EXIT_REASON_DR_ACCESS 29
-#define EXIT_REASON_IO_INSTRUCTION 30
-#define EXIT_REASON_MSR_READ 31
-#define EXIT_REASON_MSR_WRITE 32
-#define EXIT_REASON_INVALID_STATE 33
-#define EXIT_REASON_MWAIT_INSTRUCTION 36
-#define EXIT_REASON_MONITOR_INSTRUCTION 39
-#define EXIT_REASON_PAUSE_INSTRUCTION 40
-#define EXIT_REASON_MCE_DURING_VMENTRY 41
-#define EXIT_REASON_TPR_BELOW_THRESHOLD 43
-#define EXIT_REASON_APIC_ACCESS 44
-#define EXIT_REASON_EOI_INDUCED 45
-#define EXIT_REASON_EPT_VIOLATION 48
-#define EXIT_REASON_EPT_MISCONFIG 49
-#define EXIT_REASON_INVEPT 50
-#define EXIT_REASON_RDTSCP 51
-#define EXIT_REASON_PREEMPTION_TIMER 52
-#define EXIT_REASON_INVVPID 53
-#define EXIT_REASON_WBINVD 54
-#define EXIT_REASON_XSETBV 55
-#define EXIT_REASON_APIC_WRITE 56
-#define EXIT_REASON_INVPCID 58
-#define EXIT_REASON_PML_FULL 62
-#define EXIT_REASON_XSAVES 63
-#define EXIT_REASON_XRSTORS 64
-#define LAST_EXIT_REASON 64
enum vmcs_field {
VIRTUAL_PROCESSOR_ID = 0x00000000,
@@ -208,6 +161,8 @@ enum vmcs_field {
VMWRITE_BITMAP_HIGH = 0x00002029,
XSS_EXIT_BITMAP = 0x0000202C,
XSS_EXIT_BITMAP_HIGH = 0x0000202D,
+ ENCLS_EXITING_BITMAP = 0x0000202E,
+ ENCLS_EXITING_BITMAP_HIGH = 0x0000202F,
TSC_MULTIPLIER = 0x00002032,
TSC_MULTIPLIER_HIGH = 0x00002033,
GUEST_PHYSICAL_ADDRESS = 0x00002400,
@@ -482,11 +437,16 @@ static inline int vmresume(void)
static inline void vmcall(void)
{
- /* Currently, L1 destroys our GPRs during vmexits. */
- __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp" : : :
- "rax", "rbx", "rcx", "rdx",
- "rsi", "rdi", "r8", "r9", "r10", "r11", "r12",
- "r13", "r14", "r15");
+ /*
+ * Stuff RAX and RCX with "safe" values to make sure L0 doesn't handle
+ * it as a valid hypercall (e.g. Hyper-V L2 TLB flush) as the intended
+ * use of this function is to exit to L1 from L2. Clobber all other
+ * GPRs as L1 doesn't correctly preserve them during vmexits.
+ */
+ __asm__ __volatile__("push %%rbp; vmcall; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
}
static inline int vmread(uint64_t encoding, uint64_t *value)
@@ -562,17 +522,13 @@ struct vmx_pages {
uint64_t vmwrite_gpa;
void *vmwrite;
- void *vp_assist_hva;
- uint64_t vp_assist_gpa;
- void *vp_assist;
-
- void *enlightened_vmcs_hva;
- uint64_t enlightened_vmcs_gpa;
- void *enlightened_vmcs;
-
void *eptp_hva;
uint64_t eptp_gpa;
void *eptp;
+
+ void *apic_access_hva;
+ uint64_t apic_access_gpa;
+ void *apic_access;
};
union vmx_basic {
@@ -603,17 +559,19 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx);
void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp);
bool load_vmcs(struct vmx_pages *vmx);
-bool nested_vmx_supported(void);
-void nested_vmx_check_supported(void);
+bool ept_1g_pages_supported(void);
void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr);
void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot);
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size);
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot);
+ uint32_t memslot);
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size);
+bool kvm_cpu_has_ept(void);
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot);
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm);
#endif /* SELFTEST_KVM_VMX_H */
diff --git a/tools/testing/selftests/kvm/kvm_binary_stats_test.c b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
new file mode 100644
index 000000000000..698c1cfa3111
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_binary_stats_test.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * kvm_binary_stats_test
+ *
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Test the fd-based interface for KVM statistics.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <errno.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "asm/kvm.h"
+#include "linux/kvm.h"
+#include "kselftest.h"
+
+static void stats_test(int stats_fd)
+{
+ ssize_t ret;
+ int i;
+ size_t size_desc;
+ size_t size_data = 0;
+ struct kvm_stats_header header;
+ char *id;
+ struct kvm_stats_desc *stats_desc;
+ u64 *stats_data;
+ struct kvm_stats_desc *pdesc;
+ u32 type, unit, base;
+
+ /* Read kvm stats header */
+ read_stats_header(stats_fd, &header);
+
+ size_desc = get_stats_descriptor_size(&header);
+
+ /* Read kvm stats id string */
+ id = malloc(header.name_size);
+ TEST_ASSERT(id, "Allocate memory for id string");
+
+ ret = pread(stats_fd, id, header.name_size, sizeof(header));
+ TEST_ASSERT(ret == header.name_size,
+ "Expected header size '%u', read '%lu' bytes",
+ header.name_size, ret);
+
+ /* Check id string, that should start with "kvm" */
+ TEST_ASSERT(!strncmp(id, "kvm", 3) && strlen(id) < header.name_size,
+ "Invalid KVM stats type, id: %s", id);
+
+ /* Sanity check for other fields in header */
+ if (header.num_desc == 0) {
+ ksft_print_msg("No KVM stats defined!\n");
+ return;
+ }
+ /*
+ * The descriptor and data offsets must be valid, they must not overlap
+ * the header, and the descriptor and data blocks must not overlap each
+ * other. Note, the data block is rechecked after its size is known.
+ */
+ TEST_ASSERT(header.desc_offset && header.desc_offset >= sizeof(header) &&
+ header.data_offset && header.data_offset >= sizeof(header),
+ "Invalid offset fields in header");
+
+ TEST_ASSERT(header.desc_offset > header.data_offset ||
+ (header.desc_offset + size_desc * header.num_desc <= header.data_offset),
+ "Descriptor block is overlapped with data block");
+
+ /* Read kvm stats descriptors */
+ stats_desc = read_stats_descriptors(stats_fd, &header);
+
+ /* Sanity check for fields in descriptors */
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ type = pdesc->flags & KVM_STATS_TYPE_MASK;
+ unit = pdesc->flags & KVM_STATS_UNIT_MASK;
+ base = pdesc->flags & KVM_STATS_BASE_MASK;
+
+ /* Check name string */
+ TEST_ASSERT(strlen(pdesc->name) < header.name_size,
+ "KVM stats name (index: %d) too long", i);
+
+ /* Check type,unit,base boundaries */
+ TEST_ASSERT(type <= KVM_STATS_TYPE_MAX,
+ "Unknown KVM stats (%s) type: %u", pdesc->name, type);
+ TEST_ASSERT(unit <= KVM_STATS_UNIT_MAX,
+ "Unknown KVM stats (%s) unit: %u", pdesc->name, unit);
+ TEST_ASSERT(base <= KVM_STATS_BASE_MAX,
+ "Unknown KVM stats (%s) base: %u", pdesc->name, base);
+
+ /*
+ * Check exponent for stats unit
+ * Exponent for counter should be greater than or equal to 0
+ * Exponent for unit bytes should be greater than or equal to 0
+ * Exponent for unit seconds should be less than or equal to 0
+ * Exponent for unit clock cycles should be greater than or
+ * equal to 0
+ * Exponent for unit boolean should be 0
+ */
+ switch (pdesc->flags & KVM_STATS_UNIT_MASK) {
+ case KVM_STATS_UNIT_NONE:
+ case KVM_STATS_UNIT_BYTES:
+ case KVM_STATS_UNIT_CYCLES:
+ TEST_ASSERT(pdesc->exponent >= 0,
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
+ break;
+ case KVM_STATS_UNIT_SECONDS:
+ TEST_ASSERT(pdesc->exponent <= 0,
+ "Unsupported KVM stats (%s) exponent: %i",
+ pdesc->name, pdesc->exponent);
+ break;
+ case KVM_STATS_UNIT_BOOLEAN:
+ TEST_ASSERT(pdesc->exponent == 0,
+ "Unsupported KVM stats (%s) exponent: %d",
+ pdesc->name, pdesc->exponent);
+ break;
+ }
+
+ /* Check size field, which should not be zero */
+ TEST_ASSERT(pdesc->size,
+ "KVM descriptor(%s) with size of 0", pdesc->name);
+ /* Check bucket_size field */
+ switch (pdesc->flags & KVM_STATS_TYPE_MASK) {
+ case KVM_STATS_TYPE_LINEAR_HIST:
+ TEST_ASSERT(pdesc->bucket_size,
+ "Bucket size of Linear Histogram stats (%s) is zero",
+ pdesc->name);
+ break;
+ default:
+ TEST_ASSERT(!pdesc->bucket_size,
+ "Bucket size of stats (%s) is not zero",
+ pdesc->name);
+ }
+ size_data = max(size_data, pdesc->offset + pdesc->size * sizeof(*stats_data));
+ }
+
+ /*
+ * Now that the size of the data block is known, verify the data block
+ * doesn't overlap the descriptor block.
+ */
+ TEST_ASSERT(header.data_offset >= header.desc_offset ||
+ header.data_offset + size_data <= header.desc_offset,
+ "Data block is overlapped with Descriptor block");
+
+ /* Check validity of all stats data size */
+ TEST_ASSERT(size_data >= header.num_desc * sizeof(*stats_data),
+ "Data size is not correct");
+
+ /* Allocate memory for stats data */
+ stats_data = malloc(size_data);
+ TEST_ASSERT(stats_data, "Allocate memory for stats data");
+ /* Read kvm stats data as a bulk */
+ ret = pread(stats_fd, stats_data, size_data, header.data_offset);
+ TEST_ASSERT(ret == size_data, "Read KVM stats data");
+ /* Read kvm stats data one by one */
+ for (i = 0; i < header.num_desc; ++i) {
+ pdesc = get_stats_descriptor(stats_desc, i, &header);
+ read_stat_data(stats_fd, &header, pdesc, stats_data,
+ pdesc->size);
+ }
+
+ free(stats_data);
+ free(stats_desc);
+ free(id);
+
+ close(stats_fd);
+ TEST_ASSERT(fcntl(stats_fd, F_GETFD) == -1, "Stats fd not freed");
+}
+
+#define DEFAULT_NUM_VM 4
+#define DEFAULT_NUM_VCPU 4
+
+/*
+ * Usage: kvm_bin_form_stats [#vm] [#vcpu]
+ * The first parameter #vm set the number of VMs being created.
+ * The second parameter #vcpu set the number of VCPUs being created.
+ * By default, DEFAULT_NUM_VM VM and DEFAULT_NUM_VCPU VCPU for the VM would be
+ * created for testing.
+ */
+
+int main(int argc, char *argv[])
+{
+ int vm_stats_fds, *vcpu_stats_fds;
+ int i, j;
+ struct kvm_vcpu **vcpus;
+ struct kvm_vm **vms;
+ int max_vm = DEFAULT_NUM_VM;
+ int max_vcpu = DEFAULT_NUM_VCPU;
+
+ /* Get the number of VMs and VCPUs that would be created for testing. */
+ if (argc > 1) {
+ max_vm = strtol(argv[1], NULL, 0);
+ if (max_vm <= 0)
+ max_vm = DEFAULT_NUM_VM;
+ }
+ if (argc > 2) {
+ max_vcpu = strtol(argv[2], NULL, 0);
+ if (max_vcpu <= 0)
+ max_vcpu = DEFAULT_NUM_VCPU;
+ }
+
+ ksft_print_header();
+
+ /* Check the extension for binary stats */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_BINARY_STATS_FD));
+
+ ksft_set_plan(max_vm);
+
+ /* Create VMs and VCPUs */
+ vms = malloc(sizeof(vms[0]) * max_vm);
+ TEST_ASSERT(vms, "Allocate memory for storing VM pointers");
+
+ vcpus = malloc(sizeof(struct kvm_vcpu *) * max_vm * max_vcpu);
+ TEST_ASSERT(vcpus, "Allocate memory for storing vCPU pointers");
+
+ /*
+ * Not per-VM as the array is populated, used, and invalidated within a
+ * single for-loop iteration.
+ */
+ vcpu_stats_fds = calloc(max_vm, sizeof(*vcpu_stats_fds));
+ TEST_ASSERT(vcpu_stats_fds, "Allocate memory for VM stats fds");
+
+ for (i = 0; i < max_vm; ++i) {
+ vms[i] = vm_create_barebones();
+ for (j = 0; j < max_vcpu; ++j)
+ vcpus[i * max_vcpu + j] = __vm_vcpu_add(vms[i], j);
+ }
+
+ /*
+ * Check stats read for every VM and vCPU, with a variety of flavors.
+ * Note, stats_test() closes the passed in stats fd.
+ */
+ for (i = 0; i < max_vm; ++i) {
+ /*
+ * Verify that creating multiple userspace references to a
+ * single stats file works and doesn't cause explosions.
+ */
+ vm_stats_fds = vm_get_stats_fd(vms[i]);
+ stats_test(dup(vm_stats_fds));
+
+ /* Verify userspace can instantiate multiple stats files. */
+ stats_test(vm_get_stats_fd(vms[i]));
+
+ for (j = 0; j < max_vcpu; ++j) {
+ vcpu_stats_fds[j] = vcpu_get_stats_fd(vcpus[i * max_vcpu + j]);
+ stats_test(dup(vcpu_stats_fds[j]));
+ stats_test(vcpu_get_stats_fd(vcpus[i * max_vcpu + j]));
+ }
+
+ /*
+ * Close the VM fd and redo the stats tests. KVM should gift a
+ * reference (to the VM) to each stats fd, i.e. stats should
+ * still be accessible even after userspace has put its last
+ * _direct_ reference to the VM.
+ */
+ kvm_vm_free(vms[i]);
+
+ stats_test(vm_stats_fds);
+ for (j = 0; j < max_vcpu; ++j)
+ stats_test(vcpu_stats_fds[j]);
+
+ ksft_test_result_pass("vm%i\n", i);
+ }
+
+ free(vms);
+ free(vcpus);
+ free(vcpu_stats_fds);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
index 0299cd81b8ba..b9e23265e4b3 100644
--- a/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
+++ b/tools/testing/selftests/kvm/kvm_create_max_vcpus.c
@@ -12,6 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/resource.h>
#include "test_util.h"
@@ -27,11 +28,11 @@ void test_vcpu_creation(int first_vcpu_id, int num_vcpus)
pr_info("Testing creating %d vCPUs, with IDs %d...%d.\n",
num_vcpus, first_vcpu_id, first_vcpu_id + num_vcpus - 1);
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+ vm = vm_create_barebones();
for (i = first_vcpu_id; i < first_vcpu_id + num_vcpus; i++)
/* This asserts that the vCPU was created. */
- vm_vcpu_add(vm, i);
+ __vm_vcpu_add(vm, i);
kvm_vm_free(vm);
}
@@ -40,11 +41,38 @@ int main(int argc, char *argv[])
{
int kvm_max_vcpu_id = kvm_check_cap(KVM_CAP_MAX_VCPU_ID);
int kvm_max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ /*
+ * Number of file descriptors reqired, KVM_CAP_MAX_VCPUS for vCPU fds +
+ * an arbitrary number for everything else.
+ */
+ int nr_fds_wanted = kvm_max_vcpus + 100;
+ struct rlimit rl;
pr_info("KVM_CAP_MAX_VCPU_ID: %d\n", kvm_max_vcpu_id);
pr_info("KVM_CAP_MAX_VCPUS: %d\n", kvm_max_vcpus);
/*
+ * Check that we're allowed to open nr_fds_wanted file descriptors and
+ * try raising the limits if needed.
+ */
+ TEST_ASSERT(!getrlimit(RLIMIT_NOFILE, &rl), "getrlimit() failed!");
+
+ if (rl.rlim_cur < nr_fds_wanted) {
+ rl.rlim_cur = nr_fds_wanted;
+ if (rl.rlim_max < nr_fds_wanted) {
+ int old_rlim_max = rl.rlim_max;
+ rl.rlim_max = nr_fds_wanted;
+
+ int r = setrlimit(RLIMIT_NOFILE, &rl);
+ __TEST_REQUIRE(r >= 0,
+ "RLIMIT_NOFILE hard limit is too low (%d, wanted %d)",
+ old_rlim_max, nr_fds_wanted);
+ } else {
+ TEST_ASSERT(!setrlimit(RLIMIT_NOFILE, &rl), "setrlimit() failed!");
+ }
+ }
+
+ /*
* Upstream KVM prior to 4.8 does not support KVM_CAP_MAX_VCPU_ID.
* Userspace is supposed to use KVM_CAP_MAX_VCPUS as the maximum ID
* in this case.
@@ -53,7 +81,7 @@ int main(int argc, char *argv[])
kvm_max_vcpu_id = kvm_max_vcpus;
TEST_ASSERT(kvm_max_vcpu_id >= kvm_max_vcpus,
- "KVM_MAX_VCPU_ID (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
+ "KVM_MAX_VCPU_IDS (%d) must be at least as large as KVM_MAX_VCPUS (%d).",
kvm_max_vcpu_id, kvm_max_vcpus);
test_vcpu_creation(0, kvm_max_vcpus);
diff --git a/tools/testing/selftests/kvm/kvm_page_table_test.c b/tools/testing/selftests/kvm/kvm_page_table_test.c
new file mode 100644
index 000000000000..e0ba97ac1c56
--- /dev/null
+++ b/tools/testing/selftests/kvm/kvm_page_table_test.c
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM page table test
+ *
+ * Copyright (C) 2021, Huawei, Inc.
+ *
+ * Make sure that THP has been enabled or enough HUGETLB pages with specific
+ * page size have been pre-allocated on your system, if you are planning to
+ * use hugepages to back the guest memory for testing.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <pthread.h>
+#include <semaphore.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "guest_modes.h"
+
+#define TEST_MEM_SLOT_INDEX 1
+
+/* Default size(1GB) of the memory for testing */
+#define DEFAULT_TEST_MEM_SIZE (1 << 30)
+
+/* Default guest test virtual memory offset */
+#define DEFAULT_GUEST_TEST_MEM 0xc0000000
+
+/* Different guest memory accessing stages */
+enum test_stage {
+ KVM_BEFORE_MAPPINGS,
+ KVM_CREATE_MAPPINGS,
+ KVM_UPDATE_MAPPINGS,
+ KVM_ADJUST_MAPPINGS,
+ NUM_TEST_STAGES,
+};
+
+static const char * const test_stage_string[] = {
+ "KVM_BEFORE_MAPPINGS",
+ "KVM_CREATE_MAPPINGS",
+ "KVM_UPDATE_MAPPINGS",
+ "KVM_ADJUST_MAPPINGS",
+};
+
+struct test_args {
+ struct kvm_vm *vm;
+ uint64_t guest_test_virt_mem;
+ uint64_t host_page_size;
+ uint64_t host_num_pages;
+ uint64_t large_page_size;
+ uint64_t large_num_pages;
+ uint64_t host_pages_per_lpage;
+ enum vm_mem_backing_src_type src_type;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+};
+
+/*
+ * Guest variables. Use addr_gva2hva() if these variables need
+ * to be changed in host.
+ */
+static enum test_stage guest_test_stage;
+
+/* Host variables */
+static uint32_t nr_vcpus = 1;
+static struct test_args test_args;
+static enum test_stage *current_stage;
+static bool host_quit;
+
+/* Whether the test stage is updated, or completed */
+static sem_t test_stage_updated;
+static sem_t test_stage_completed;
+
+/*
+ * Guest physical memory offset of the testing memory slot.
+ * This will be set to the topmost valid physical address minus
+ * the test memory size.
+ */
+static uint64_t guest_test_phys_mem;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+static void guest_code(bool do_write)
+{
+ struct test_args *p = &test_args;
+ enum test_stage *current_stage = &guest_test_stage;
+ uint64_t addr;
+ int i, j;
+
+ while (true) {
+ addr = p->guest_test_virt_mem;
+
+ switch (READ_ONCE(*current_stage)) {
+ /*
+ * All vCPU threads will be started in this stage,
+ * where guest code of each vCPU will do nothing.
+ */
+ case KVM_BEFORE_MAPPINGS:
+ break;
+
+ /*
+ * Before dirty logging, vCPUs concurrently access the first
+ * 8 bytes of each page (host page/large page) within the same
+ * memory region with different accessing types (read/write).
+ * Then KVM will create normal page mappings or huge block
+ * mappings for them.
+ */
+ case KVM_CREATE_MAPPINGS:
+ for (i = 0; i < p->large_num_pages; i++) {
+ if (do_write)
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+
+ addr += p->large_page_size;
+ }
+ break;
+
+ /*
+ * During dirty logging, KVM will only update attributes of the
+ * normal page mappings from RO to RW if memory backing src type
+ * is anonymous. In other cases, KVM will split the huge block
+ * mappings into normal page mappings if memory backing src type
+ * is THP or HUGETLB.
+ */
+ case KVM_UPDATE_MAPPINGS:
+ if (p->src_type == VM_MEM_SRC_ANONYMOUS) {
+ for (i = 0; i < p->host_num_pages; i++) {
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ addr += p->host_page_size;
+ }
+ break;
+ }
+
+ for (i = 0; i < p->large_num_pages; i++) {
+ /*
+ * Write to the first host page in each large
+ * page region, and triger break of large pages.
+ */
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+
+ /*
+ * Access the middle host pages in each large
+ * page region. Since dirty logging is enabled,
+ * this will create new mappings at the smallest
+ * granularity.
+ */
+ addr += p->large_page_size / 2;
+ for (j = 0; j < p->host_pages_per_lpage / 2; j++) {
+ READ_ONCE(*(uint64_t *)addr);
+ addr += p->host_page_size;
+ }
+ }
+ break;
+
+ /*
+ * After dirty logging is stopped, vCPUs concurrently read
+ * from every single host page. Then KVM will coalesce the
+ * split page mappings back to block mappings. And a TLB
+ * conflict abort could occur here if TLB entries of the
+ * page mappings are not fully invalidated.
+ */
+ case KVM_ADJUST_MAPPINGS:
+ for (i = 0; i < p->host_num_pages; i++) {
+ READ_ONCE(*(uint64_t *)addr);
+ addr += p->host_page_size;
+ }
+ break;
+
+ default:
+ GUEST_ASSERT(0);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct kvm_vcpu *vcpu = data;
+ bool do_write = !(vcpu->id % 2);
+ struct timespec start;
+ struct timespec ts_diff;
+ enum test_stage stage;
+ int ret;
+
+ vcpu_args_set(vcpu, 1, do_write);
+
+ while (!READ_ONCE(host_quit)) {
+ ret = sem_wait(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_wait");
+
+ if (READ_ONCE(host_quit))
+ return NULL;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ ret = _vcpu_run(vcpu);
+ ts_diff = timespec_elapsed(start);
+
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Invalid guest sync status: exit_reason=%s",
+ exit_reason_str(vcpu->run->exit_reason));
+
+ pr_debug("Got sync event from vCPU %d\n", vcpu->id);
+ stage = READ_ONCE(*current_stage);
+
+ /*
+ * Here we can know the execution time of every
+ * single vcpu running in different test stages.
+ */
+ pr_debug("vCPU %d has completed stage %s\n"
+ "execution time is: %ld.%.9lds\n\n",
+ vcpu->id, test_stage_string[stage],
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ ret = sem_post(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+
+ return NULL;
+}
+
+struct test_params {
+ uint64_t phys_offset;
+ uint64_t test_mem_size;
+ enum vm_mem_backing_src_type src_type;
+};
+
+static struct kvm_vm *pre_init_before_test(enum vm_guest_mode mode, void *arg)
+{
+ int ret;
+ struct test_params *p = arg;
+ enum vm_mem_backing_src_type src_type = p->src_type;
+ uint64_t large_page_size = get_backing_src_pagesz(src_type);
+ uint64_t guest_page_size = vm_guest_mode_params[mode].page_size;
+ uint64_t host_page_size = getpagesize();
+ uint64_t test_mem_size = p->test_mem_size;
+ uint64_t guest_num_pages;
+ uint64_t alignment;
+ void *host_test_mem;
+ struct kvm_vm *vm;
+
+ /* Align up the test memory size */
+ alignment = max(large_page_size, guest_page_size);
+ test_mem_size = (test_mem_size + alignment - 1) & ~(alignment - 1);
+
+ /* Create a VM with enough guest pages */
+ guest_num_pages = test_mem_size / guest_page_size;
+ vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus, guest_num_pages,
+ guest_code, test_args.vcpus);
+
+ /* Align down GPA of the testing memslot */
+ if (!p->phys_offset)
+ guest_test_phys_mem = (vm->max_gfn - guest_num_pages) *
+ guest_page_size;
+ else
+ guest_test_phys_mem = p->phys_offset;
+#ifdef __s390x__
+ alignment = max(0x100000UL, alignment);
+#endif
+ guest_test_phys_mem = align_down(guest_test_phys_mem, alignment);
+
+ /* Set up the shared data structure test_args */
+ test_args.vm = vm;
+ test_args.guest_test_virt_mem = guest_test_virt_mem;
+ test_args.host_page_size = host_page_size;
+ test_args.host_num_pages = test_mem_size / host_page_size;
+ test_args.large_page_size = large_page_size;
+ test_args.large_num_pages = test_mem_size / large_page_size;
+ test_args.host_pages_per_lpage = large_page_size / host_page_size;
+ test_args.src_type = src_type;
+
+ /* Add an extra memory slot with specified backing src type */
+ vm_userspace_mem_region_add(vm, src_type, guest_test_phys_mem,
+ TEST_MEM_SLOT_INDEX, guest_num_pages, 0);
+
+ /* Do mapping(GVA->GPA) for the testing memory slot */
+ virt_map(vm, guest_test_virt_mem, guest_test_phys_mem, guest_num_pages);
+
+ /* Cache the HVA pointer of the region */
+ host_test_mem = addr_gpa2hva(vm, (vm_paddr_t)guest_test_phys_mem);
+
+ /* Export shared structure test_args to guest */
+ sync_global_to_guest(vm, test_args);
+
+ ret = sem_init(&test_stage_updated, 0, 0);
+ TEST_ASSERT(ret == 0, "Error in sem_init");
+
+ ret = sem_init(&test_stage_completed, 0, 0);
+ TEST_ASSERT(ret == 0, "Error in sem_init");
+
+ current_stage = addr_gva2hva(vm, (vm_vaddr_t)(&guest_test_stage));
+ *current_stage = NUM_TEST_STAGES;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+ pr_info("Testing memory backing src type: %s\n",
+ vm_mem_backing_src_alias(src_type)->name);
+ pr_info("Testing memory backing src granularity: 0x%lx\n",
+ large_page_size);
+ pr_info("Testing memory size(aligned): 0x%lx\n", test_mem_size);
+ pr_info("Guest physical test memory offset: 0x%lx\n",
+ guest_test_phys_mem);
+ pr_info("Host virtual test memory offset: 0x%lx\n",
+ (uint64_t)host_test_mem);
+ pr_info("Number of testing vCPUs: %d\n", nr_vcpus);
+
+ return vm;
+}
+
+static void vcpus_complete_new_stage(enum test_stage stage)
+{
+ int ret;
+ int vcpus;
+
+ /* Wake up all the vcpus to run new test stage */
+ for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
+ ret = sem_post(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+ pr_debug("All vcpus have been notified to continue\n");
+
+ /* Wait for all the vcpus to complete new test stage */
+ for (vcpus = 0; vcpus < nr_vcpus; vcpus++) {
+ ret = sem_wait(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_wait");
+
+ pr_debug("%d vcpus have completed stage %s\n",
+ vcpus + 1, test_stage_string[stage]);
+ }
+
+ pr_debug("All vcpus have completed stage %s\n",
+ test_stage_string[stage]);
+}
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ pthread_t *vcpu_threads;
+ struct kvm_vm *vm;
+ struct timespec start;
+ struct timespec ts_diff;
+ int ret, i;
+
+ /* Create VM with vCPUs and make some pre-initialization */
+ vm = pre_init_before_test(mode, arg);
+
+ vcpu_threads = malloc(nr_vcpus * sizeof(*vcpu_threads));
+ TEST_ASSERT(vcpu_threads, "Memory allocation failed");
+
+ host_quit = false;
+ *current_stage = KVM_BEFORE_MAPPINGS;
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_create(&vcpu_threads[i], NULL, vcpu_worker,
+ test_args.vcpus[i]);
+
+ vcpus_complete_new_stage(*current_stage);
+ pr_info("Started all vCPUs successfully\n");
+
+ /* Test the stage of KVM creating mappings */
+ *current_stage = KVM_CREATE_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_CREATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Test the stage of KVM updating mappings */
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX,
+ KVM_MEM_LOG_DIRTY_PAGES);
+
+ *current_stage = KVM_UPDATE_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_UPDATE_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Test the stage of KVM adjusting mappings */
+ vm_mem_region_set_flags(vm, TEST_MEM_SLOT_INDEX, 0);
+
+ *current_stage = KVM_ADJUST_MAPPINGS;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ vcpus_complete_new_stage(*current_stage);
+ ts_diff = timespec_elapsed(start);
+
+ pr_info("KVM_ADJUST_MAPPINGS: total execution time: %ld.%.9lds\n\n",
+ ts_diff.tv_sec, ts_diff.tv_nsec);
+
+ /* Tell the vcpu thread to quit */
+ host_quit = true;
+ for (i = 0; i < nr_vcpus; i++) {
+ ret = sem_post(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_post");
+ }
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i], NULL);
+
+ ret = sem_destroy(&test_stage_updated);
+ TEST_ASSERT(ret == 0, "Error in sem_destroy");
+
+ ret = sem_destroy(&test_stage_completed);
+ TEST_ASSERT(ret == 0, "Error in sem_destroy");
+
+ free(vcpu_threads);
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p offset] [-m mode] "
+ "[-b mem-size] [-v vcpus] [-s mem-type]\n", name);
+ puts("");
+ printf(" -p: specify guest physical test memory offset\n"
+ " Warning: a low offset can conflict with the loaded test code.\n");
+ guest_modes_help();
+ printf(" -b: specify size of the memory region for testing. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ printf(" -v: specify the number of vCPUs to run\n"
+ " (default: 1)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ struct test_params p = {
+ .test_mem_size = DEFAULT_TEST_MEM_SIZE,
+ .src_type = DEFAULT_VM_MEM_SRC,
+ };
+ int opt;
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hp:m:b:v:s:")) != -1) {
+ switch (opt) {
+ case 'p':
+ p.phys_offset = strtoull(optarg, NULL, 0);
+ break;
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'b':
+ p.test_mem_size = parse_size(optarg);
+ break;
+ case 'v':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d", max_vcpus);
+ break;
+ case 's':
+ p.src_type = parse_backing_src_type(optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ exit(0);
+ }
+ }
+
+ for_each_guest_mode(run_test, &p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic.c b/tools/testing/selftests/kvm/lib/aarch64/gic.c
new file mode 100644
index 000000000000..55668631d546
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) support
+ */
+
+#include <errno.h>
+#include <linux/bits.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+
+#include <gic.h>
+#include "gic_private.h"
+#include "processor.h"
+#include "spinlock.h"
+
+static const struct gic_common_ops *gic_common_ops;
+static struct spinlock gic_lock;
+
+static void gic_cpu_init(unsigned int cpu, void *redist_base)
+{
+ gic_common_ops->gic_cpu_init(cpu, redist_base);
+}
+
+static void
+gic_dist_init(enum gic_type type, unsigned int nr_cpus, void *dist_base)
+{
+ const struct gic_common_ops *gic_ops = NULL;
+
+ spin_lock(&gic_lock);
+
+ /* Distributor initialization is needed only once per VM */
+ if (gic_common_ops) {
+ spin_unlock(&gic_lock);
+ return;
+ }
+
+ if (type == GIC_V3)
+ gic_ops = &gicv3_ops;
+
+ GUEST_ASSERT(gic_ops);
+
+ gic_ops->gic_init(nr_cpus, dist_base);
+ gic_common_ops = gic_ops;
+
+ /* Make sure that the initialized data is visible to all the vCPUs */
+ dsb(sy);
+
+ spin_unlock(&gic_lock);
+}
+
+void gic_init(enum gic_type type, unsigned int nr_cpus,
+ void *dist_base, void *redist_base)
+{
+ uint32_t cpu = guest_get_vcpuid();
+
+ GUEST_ASSERT(type < GIC_TYPE_MAX);
+ GUEST_ASSERT(dist_base);
+ GUEST_ASSERT(redist_base);
+ GUEST_ASSERT(nr_cpus);
+
+ gic_dist_init(type, nr_cpus, dist_base);
+ gic_cpu_init(cpu, redist_base);
+}
+
+void gic_irq_enable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_enable(intid);
+}
+
+void gic_irq_disable(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_disable(intid);
+}
+
+unsigned int gic_get_and_ack_irq(void)
+{
+ uint64_t irqstat;
+ unsigned int intid;
+
+ GUEST_ASSERT(gic_common_ops);
+
+ irqstat = gic_common_ops->gic_read_iar();
+ intid = irqstat & GENMASK(23, 0);
+
+ return intid;
+}
+
+void gic_set_eoi(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_eoir(intid);
+}
+
+void gic_set_dir(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_write_dir(intid);
+}
+
+void gic_set_eoi_split(bool split)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_eoi_split(split);
+}
+
+void gic_set_priority_mask(uint64_t pmr)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority_mask(pmr);
+}
+
+void gic_set_priority(unsigned int intid, unsigned int prio)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_set_priority(intid, prio);
+}
+
+void gic_irq_set_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_active(intid);
+}
+
+void gic_irq_clear_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_active(intid);
+}
+
+bool gic_irq_get_active(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_active(intid);
+}
+
+void gic_irq_set_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_pending(intid);
+}
+
+void gic_irq_clear_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_clear_pending(intid);
+}
+
+bool gic_irq_get_pending(unsigned int intid)
+{
+ GUEST_ASSERT(gic_common_ops);
+ return gic_common_ops->gic_irq_get_pending(intid);
+}
+
+void gic_irq_set_config(unsigned int intid, bool is_edge)
+{
+ GUEST_ASSERT(gic_common_ops);
+ gic_common_ops->gic_irq_set_config(intid, is_edge);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_private.h b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
new file mode 100644
index 000000000000..75d07313c893
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_private.h
@@ -0,0 +1,32 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ARM Generic Interrupt Controller (GIC) private defines that's only
+ * shared among the GIC library code.
+ */
+
+#ifndef SELFTEST_KVM_GIC_PRIVATE_H
+#define SELFTEST_KVM_GIC_PRIVATE_H
+
+struct gic_common_ops {
+ void (*gic_init)(unsigned int nr_cpus, void *dist_base);
+ void (*gic_cpu_init)(unsigned int cpu, void *redist_base);
+ void (*gic_irq_enable)(unsigned int intid);
+ void (*gic_irq_disable)(unsigned int intid);
+ uint64_t (*gic_read_iar)(void);
+ void (*gic_write_eoir)(uint32_t irq);
+ void (*gic_write_dir)(uint32_t irq);
+ void (*gic_set_eoi_split)(bool split);
+ void (*gic_set_priority_mask)(uint64_t mask);
+ void (*gic_set_priority)(uint32_t intid, uint32_t prio);
+ void (*gic_irq_set_active)(uint32_t intid);
+ void (*gic_irq_clear_active)(uint32_t intid);
+ bool (*gic_irq_get_active)(uint32_t intid);
+ void (*gic_irq_set_pending)(uint32_t intid);
+ void (*gic_irq_clear_pending)(uint32_t intid);
+ bool (*gic_irq_get_pending)(uint32_t intid);
+ void (*gic_irq_set_config)(uint32_t intid, bool is_edge);
+};
+
+extern const struct gic_common_ops gicv3_ops;
+
+#endif /* SELFTEST_KVM_GIC_PRIVATE_H */
diff --git a/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
new file mode 100644
index 000000000000..263bf3ed8fd5
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/gic_v3.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 support
+ */
+
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "delay.h"
+
+#include "gic_v3.h"
+#include "gic_private.h"
+
+struct gicv3_data {
+ void *dist_base;
+ void *redist_base[GICV3_MAX_CPUS];
+ unsigned int nr_cpus;
+ unsigned int nr_spis;
+};
+
+#define sgi_base_from_redist(redist_base) (redist_base + SZ_64K)
+#define DIST_BIT (1U << 31)
+
+enum gicv3_intid_range {
+ SGI_RANGE,
+ PPI_RANGE,
+ SPI_RANGE,
+ INVALID_RANGE,
+};
+
+static struct gicv3_data gicv3_data;
+
+static void gicv3_gicd_wait_for_rwp(void)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(gicv3_data.dist_base + GICD_CTLR) & GICD_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_gicr_wait_for_rwp(void *redist_base)
+{
+ unsigned int count = 100000; /* 1s */
+
+ while (readl(redist_base + GICR_CTLR) & GICR_CTLR_RWP) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static void gicv3_wait_for_rwp(uint32_t cpu_or_dist)
+{
+ if (cpu_or_dist & DIST_BIT)
+ gicv3_gicd_wait_for_rwp();
+ else
+ gicv3_gicr_wait_for_rwp(gicv3_data.redist_base[cpu_or_dist]);
+}
+
+static enum gicv3_intid_range get_intid_range(unsigned int intid)
+{
+ switch (intid) {
+ case 0 ... 15:
+ return SGI_RANGE;
+ case 16 ... 31:
+ return PPI_RANGE;
+ case 32 ... 1019:
+ return SPI_RANGE;
+ }
+
+ /* We should not be reaching here */
+ GUEST_ASSERT(0);
+
+ return INVALID_RANGE;
+}
+
+static uint64_t gicv3_read_iar(void)
+{
+ uint64_t irqstat = read_sysreg_s(SYS_ICC_IAR1_EL1);
+
+ dsb(sy);
+ return irqstat;
+}
+
+static void gicv3_write_eoir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_EOIR1_EL1);
+ isb();
+}
+
+static void gicv3_write_dir(uint32_t irq)
+{
+ write_sysreg_s(irq, SYS_ICC_DIR_EL1);
+ isb();
+}
+
+static void gicv3_set_priority_mask(uint64_t mask)
+{
+ write_sysreg_s(mask, SYS_ICC_PMR_EL1);
+}
+
+static void gicv3_set_eoi_split(bool split)
+{
+ uint32_t val;
+
+ /*
+ * All other fields are read-only, so no need to read CTLR first. In
+ * fact, the kernel does the same.
+ */
+ val = split ? (1U << 1) : 0;
+ write_sysreg_s(val, SYS_ICC_CTLR_EL1);
+ isb();
+}
+
+uint32_t gicv3_reg_readl(uint32_t cpu_or_dist, uint64_t offset)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ return readl(base + offset);
+}
+
+void gicv3_reg_writel(uint32_t cpu_or_dist, uint64_t offset, uint32_t reg_val)
+{
+ void *base = cpu_or_dist & DIST_BIT ? gicv3_data.dist_base
+ : sgi_base_from_redist(gicv3_data.redist_base[cpu_or_dist]);
+ writel(reg_val, base + offset);
+}
+
+uint32_t gicv3_getl_fields(uint32_t cpu_or_dist, uint64_t offset, uint32_t mask)
+{
+ return gicv3_reg_readl(cpu_or_dist, offset) & mask;
+}
+
+void gicv3_setl_fields(uint32_t cpu_or_dist, uint64_t offset,
+ uint32_t mask, uint32_t reg_val)
+{
+ uint32_t tmp = gicv3_reg_readl(cpu_or_dist, offset) & ~mask;
+
+ tmp |= (reg_val & mask);
+ gicv3_reg_writel(cpu_or_dist, offset, tmp);
+}
+
+/*
+ * We use a single offset for the distributor and redistributor maps as they
+ * have the same value in both. The only exceptions are registers that only
+ * exist in one and not the other, like GICR_WAKER that doesn't exist in the
+ * distributor map. Such registers are conveniently marked as reserved in the
+ * map that doesn't implement it; like GICR_WAKER's offset of 0x0014 being
+ * marked as "Reserved" in the Distributor map.
+ */
+static void gicv3_access_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field,
+ bool write, uint32_t *val)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ enum gicv3_intid_range intid_range = get_intid_range(intid);
+ uint32_t fields_per_reg, index, mask, shift;
+ uint32_t cpu_or_dist;
+
+ GUEST_ASSERT(bits_per_field <= reg_bits);
+ GUEST_ASSERT(!write || *val < (1U << bits_per_field));
+ /*
+ * This function does not support 64 bit accesses. Just asserting here
+ * until we implement readq/writeq.
+ */
+ GUEST_ASSERT(reg_bits == 32);
+
+ fields_per_reg = reg_bits / bits_per_field;
+ index = intid % fields_per_reg;
+ shift = index * bits_per_field;
+ mask = ((1U << bits_per_field) - 1) << shift;
+
+ /* Set offset to the actual register holding intid's config. */
+ offset += (intid / fields_per_reg) * (reg_bits / 8);
+
+ cpu_or_dist = (intid_range == SPI_RANGE) ? DIST_BIT : cpu;
+
+ if (write)
+ gicv3_setl_fields(cpu_or_dist, offset, mask, *val << shift);
+ *val = gicv3_getl_fields(cpu_or_dist, offset, mask) >> shift;
+}
+
+static void gicv3_write_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field, uint32_t val)
+{
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, true, &val);
+}
+
+static uint32_t gicv3_read_reg(uint32_t intid, uint64_t offset,
+ uint32_t reg_bits, uint32_t bits_per_field)
+{
+ uint32_t val;
+
+ gicv3_access_reg(intid, offset, reg_bits,
+ bits_per_field, false, &val);
+ return val;
+}
+
+static void gicv3_set_priority(uint32_t intid, uint32_t prio)
+{
+ gicv3_write_reg(intid, GICD_IPRIORITYR, 32, 8, prio);
+}
+
+/* Sets the intid to be level-sensitive or edge-triggered. */
+static void gicv3_irq_set_config(uint32_t intid, bool is_edge)
+{
+ uint32_t val;
+
+ /* N/A for private interrupts. */
+ GUEST_ASSERT(get_intid_range(intid) == SPI_RANGE);
+ val = is_edge ? 2 : 0;
+ gicv3_write_reg(intid, GICD_ICFGR, 32, 2, val);
+}
+
+static void gicv3_irq_enable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ISENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_disable(uint32_t intid)
+{
+ bool is_spi = get_intid_range(intid) == SPI_RANGE;
+ uint32_t cpu = guest_get_vcpuid();
+
+ gicv3_write_reg(intid, GICD_ICENABLER, 32, 1, 1);
+ gicv3_wait_for_rwp(is_spi ? DIST_BIT : cpu);
+}
+
+static void gicv3_irq_set_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISACTIVER, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_active(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICACTIVER, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_active(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISACTIVER, 32, 1);
+}
+
+static void gicv3_irq_set_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ISPENDR, 32, 1, 1);
+}
+
+static void gicv3_irq_clear_pending(uint32_t intid)
+{
+ gicv3_write_reg(intid, GICD_ICPENDR, 32, 1, 1);
+}
+
+static bool gicv3_irq_get_pending(uint32_t intid)
+{
+ return gicv3_read_reg(intid, GICD_ISPENDR, 32, 1);
+}
+
+static void gicv3_enable_redist(void *redist_base)
+{
+ uint32_t val = readl(redist_base + GICR_WAKER);
+ unsigned int count = 100000; /* 1s */
+
+ val &= ~GICR_WAKER_ProcessorSleep;
+ writel(val, redist_base + GICR_WAKER);
+
+ /* Wait until the processor is 'active' */
+ while (readl(redist_base + GICR_WAKER) & GICR_WAKER_ChildrenAsleep) {
+ GUEST_ASSERT(count--);
+ udelay(10);
+ }
+}
+
+static inline void *gicr_base_cpu(void *redist_base, uint32_t cpu)
+{
+ /* Align all the redistributors sequentially */
+ return redist_base + cpu * SZ_64K * 2;
+}
+
+static void gicv3_cpu_init(unsigned int cpu, void *redist_base)
+{
+ void *sgi_base;
+ unsigned int i;
+ void *redist_base_cpu;
+
+ GUEST_ASSERT(cpu < gicv3_data.nr_cpus);
+
+ redist_base_cpu = gicr_base_cpu(redist_base, cpu);
+ sgi_base = sgi_base_from_redist(redist_base_cpu);
+
+ gicv3_enable_redist(redist_base_cpu);
+
+ /*
+ * Mark all the SGI and PPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ writel(~0, sgi_base + GICR_IGROUPR0);
+ writel(~0, sgi_base + GICR_ICACTIVER0);
+ writel(~0, sgi_base + GICR_ICENABLER0);
+
+ /* Set a default priority for all the SGIs and PPIs */
+ for (i = 0; i < 32; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ sgi_base + GICR_IPRIORITYR0 + i);
+
+ gicv3_gicr_wait_for_rwp(redist_base_cpu);
+
+ /* Enable the GIC system register (ICC_*) access */
+ write_sysreg_s(read_sysreg_s(SYS_ICC_SRE_EL1) | ICC_SRE_EL1_SRE,
+ SYS_ICC_SRE_EL1);
+
+ /* Set a default priority threshold */
+ write_sysreg_s(ICC_PMR_DEF_PRIO, SYS_ICC_PMR_EL1);
+
+ /* Enable non-secure Group-1 interrupts */
+ write_sysreg_s(ICC_IGRPEN1_EL1_ENABLE, SYS_ICC_GRPEN1_EL1);
+
+ gicv3_data.redist_base[cpu] = redist_base_cpu;
+}
+
+static void gicv3_dist_init(void)
+{
+ void *dist_base = gicv3_data.dist_base;
+ unsigned int i;
+
+ /* Disable the distributor until we set things up */
+ writel(0, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+
+ /*
+ * Mark all the SPI interrupts as non-secure Group-1.
+ * Also, deactivate and disable them.
+ */
+ for (i = 32; i < gicv3_data.nr_spis; i += 32) {
+ writel(~0, dist_base + GICD_IGROUPR + i / 8);
+ writel(~0, dist_base + GICD_ICACTIVER + i / 8);
+ writel(~0, dist_base + GICD_ICENABLER + i / 8);
+ }
+
+ /* Set a default priority for all the SPIs */
+ for (i = 32; i < gicv3_data.nr_spis; i += 4)
+ writel(GICD_INT_DEF_PRI_X4,
+ dist_base + GICD_IPRIORITYR + i);
+
+ /* Wait for the settings to sync-in */
+ gicv3_gicd_wait_for_rwp();
+
+ /* Finally, enable the distributor globally with ARE */
+ writel(GICD_CTLR_ARE_NS | GICD_CTLR_ENABLE_G1A |
+ GICD_CTLR_ENABLE_G1, dist_base + GICD_CTLR);
+ gicv3_gicd_wait_for_rwp();
+}
+
+static void gicv3_init(unsigned int nr_cpus, void *dist_base)
+{
+ GUEST_ASSERT(nr_cpus <= GICV3_MAX_CPUS);
+
+ gicv3_data.nr_cpus = nr_cpus;
+ gicv3_data.dist_base = dist_base;
+ gicv3_data.nr_spis = GICD_TYPER_SPIS(
+ readl(gicv3_data.dist_base + GICD_TYPER));
+ if (gicv3_data.nr_spis > 1020)
+ gicv3_data.nr_spis = 1020;
+
+ /*
+ * Initialize only the distributor for now.
+ * The redistributor and CPU interfaces are initialized
+ * later for every PE.
+ */
+ gicv3_dist_init();
+}
+
+const struct gic_common_ops gicv3_ops = {
+ .gic_init = gicv3_init,
+ .gic_cpu_init = gicv3_cpu_init,
+ .gic_irq_enable = gicv3_irq_enable,
+ .gic_irq_disable = gicv3_irq_disable,
+ .gic_read_iar = gicv3_read_iar,
+ .gic_write_eoir = gicv3_write_eoir,
+ .gic_write_dir = gicv3_write_dir,
+ .gic_set_priority_mask = gicv3_set_priority_mask,
+ .gic_set_eoi_split = gicv3_set_eoi_split,
+ .gic_set_priority = gicv3_set_priority,
+ .gic_irq_set_active = gicv3_irq_set_active,
+ .gic_irq_clear_active = gicv3_irq_clear_active,
+ .gic_irq_get_active = gicv3_irq_get_active,
+ .gic_irq_set_pending = gicv3_irq_set_pending,
+ .gic_irq_clear_pending = gicv3_irq_clear_pending,
+ .gic_irq_get_pending = gicv3_irq_get_pending,
+ .gic_irq_set_config = gicv3_irq_set_config,
+};
diff --git a/tools/testing/selftests/kvm/lib/aarch64/handlers.S b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
new file mode 100644
index 000000000000..0e443eadfac6
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/handlers.S
@@ -0,0 +1,126 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+.macro save_registers
+ add sp, sp, #-16 * 17
+
+ stp x0, x1, [sp, #16 * 0]
+ stp x2, x3, [sp, #16 * 1]
+ stp x4, x5, [sp, #16 * 2]
+ stp x6, x7, [sp, #16 * 3]
+ stp x8, x9, [sp, #16 * 4]
+ stp x10, x11, [sp, #16 * 5]
+ stp x12, x13, [sp, #16 * 6]
+ stp x14, x15, [sp, #16 * 7]
+ stp x16, x17, [sp, #16 * 8]
+ stp x18, x19, [sp, #16 * 9]
+ stp x20, x21, [sp, #16 * 10]
+ stp x22, x23, [sp, #16 * 11]
+ stp x24, x25, [sp, #16 * 12]
+ stp x26, x27, [sp, #16 * 13]
+ stp x28, x29, [sp, #16 * 14]
+
+ /*
+ * This stores sp_el1 into ex_regs.sp so exception handlers can "look"
+ * at it. It will _not_ be used to restore the sp on return from the
+ * exception so handlers can not update it.
+ */
+ add x1, sp, #16 * 17
+ stp x30, x1, [sp, #16 * 15] /* x30, SP */
+
+ mrs x1, elr_el1
+ mrs x2, spsr_el1
+ stp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+.endm
+
+.macro restore_registers
+ ldp x1, x2, [sp, #16 * 16] /* PC, PSTATE */
+ msr elr_el1, x1
+ msr spsr_el1, x2
+
+ /* sp is not restored */
+ ldp x30, xzr, [sp, #16 * 15] /* x30, SP */
+
+ ldp x28, x29, [sp, #16 * 14]
+ ldp x26, x27, [sp, #16 * 13]
+ ldp x24, x25, [sp, #16 * 12]
+ ldp x22, x23, [sp, #16 * 11]
+ ldp x20, x21, [sp, #16 * 10]
+ ldp x18, x19, [sp, #16 * 9]
+ ldp x16, x17, [sp, #16 * 8]
+ ldp x14, x15, [sp, #16 * 7]
+ ldp x12, x13, [sp, #16 * 6]
+ ldp x10, x11, [sp, #16 * 5]
+ ldp x8, x9, [sp, #16 * 4]
+ ldp x6, x7, [sp, #16 * 3]
+ ldp x4, x5, [sp, #16 * 2]
+ ldp x2, x3, [sp, #16 * 1]
+ ldp x0, x1, [sp, #16 * 0]
+
+ add sp, sp, #16 * 17
+
+ eret
+.endm
+
+.pushsection ".entry.text", "ax"
+.balign 0x800
+.global vectors
+vectors:
+.popsection
+
+.set vector, 0
+
+/*
+ * Build an exception handler for vector and append a jump to it into
+ * vectors (while making sure that it's 0x80 aligned).
+ */
+.macro HANDLER, label
+handler_\label:
+ save_registers
+ mov x0, sp
+ mov x1, #vector
+ bl route_exception
+ restore_registers
+
+.pushsection ".entry.text", "ax"
+.balign 0x80
+ b handler_\label
+.popsection
+
+.set vector, vector + 1
+.endm
+
+.macro HANDLER_INVALID
+.pushsection ".entry.text", "ax"
+.balign 0x80
+/* This will abort so no need to save and restore registers. */
+ mov x0, #vector
+ mov x1, #0 /* ec */
+ mov x2, #0 /* valid_ec */
+ b kvm_exit_unexpected_exception
+.popsection
+
+.set vector, vector + 1
+.endm
+
+/*
+ * Caution: be sure to not add anything between the declaration of vectors
+ * above and these macro calls that will build the vectors table below it.
+ */
+ HANDLER_INVALID // Synchronous EL1t
+ HANDLER_INVALID // IRQ EL1t
+ HANDLER_INVALID // FIQ EL1t
+ HANDLER_INVALID // Error EL1t
+
+ HANDLER el1h_sync // Synchronous EL1h
+ HANDLER el1h_irq // IRQ EL1h
+ HANDLER el1h_fiq // FIQ EL1h
+ HANDLER el1h_error // Error EL1h
+
+ HANDLER el0_sync_64 // Synchronous 64-bit EL0
+ HANDLER el0_irq_64 // IRQ 64-bit EL0
+ HANDLER el0_fiq_64 // FIQ 64-bit EL0
+ HANDLER el0_error_64 // Error 64-bit EL0
+
+ HANDLER el0_sync_32 // Synchronous 32-bit EL0
+ HANDLER el0_irq_32 // IRQ 32-bit EL0
+ HANDLER el0_fiq_32 // FIQ 32-bit EL0
+ HANDLER el0_error_32 // Error 32-bit EL0
diff --git a/tools/testing/selftests/kvm/lib/aarch64/processor.c b/tools/testing/selftests/kvm/lib/aarch64/processor.c
index 2afa6618b396..a9eb17295be4 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/processor.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/processor.c
@@ -5,17 +5,19 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include <linux/compiler.h>
+#include <assert.h>
+#include "guest_modes.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
+#include <linux/bitfield.h>
+#include <linux/sizes.h>
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define DEFAULT_ARM64_GUEST_STACK_VADDR_MIN 0xac0000
+static vm_vaddr_t exception_handlers;
+
static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
{
return (v + vm->page_size) & ~(vm->page_size - 1);
@@ -57,10 +59,44 @@ static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva)
return (gva >> vm->page_shift) & mask;
}
-static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+static inline bool use_lpa2_pte_format(struct kvm_vm *vm)
+{
+ return (vm->page_size == SZ_4K || vm->page_size == SZ_16K) &&
+ (vm->pa_bits > 48 || vm->va_bits > 48);
+}
+
+static uint64_t addr_pte(struct kvm_vm *vm, uint64_t pa, uint64_t attrs)
+{
+ uint64_t pte;
+
+ if (use_lpa2_pte_format(vm)) {
+ pte = pa & GENMASK(49, vm->page_shift);
+ pte |= FIELD_GET(GENMASK(51, 50), pa) << 8;
+ attrs &= ~GENMASK(9, 8);
+ } else {
+ pte = pa & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pte |= FIELD_GET(GENMASK(51, 48), pa) << 12;
+ }
+ pte |= attrs;
+
+ return pte;
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t pte)
{
- uint64_t mask = ((1UL << (vm->va_bits - vm->page_shift)) - 1) << vm->page_shift;
- return entry & mask;
+ uint64_t pa;
+
+ if (use_lpa2_pte_format(vm)) {
+ pa = pte & GENMASK(49, vm->page_shift);
+ pa |= FIELD_GET(GENMASK(9, 8), pte) << 50;
+ } else {
+ pa = pte & GENMASK(47, vm->page_shift);
+ if (vm->page_shift == 16)
+ pa |= FIELD_GET(GENMASK(15, 12), pte) << 48;
+ }
+
+ return pa;
}
static uint64_t ptrs_per_pgd(struct kvm_vm *vm)
@@ -74,19 +110,21 @@ static uint64_t __maybe_unused ptrs_per_pte(struct kvm_vm *vm)
return 1 << (vm->page_shift - 3);
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
- if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_pages_alloc(vm,
- page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
- vm->pgd_created = true;
- }
+ size_t nr_pages = page_align(vm, ptrs_per_pgd(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
}
-void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot, uint64_t flags)
+static void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t flags)
{
uint8_t attr_idx = flags & 7;
uint64_t *ptep;
@@ -106,25 +144,19 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
paddr, vm->max_gfn, vm->page_size);
ptep = addr_gpa2hva(vm, vm->pgd) + pgd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
switch (vm->pgtable_levels) {
case 4:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pud_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 3:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pmd_index(vm, vaddr) * 8;
- if (!*ptep) {
- *ptep = vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- *ptep |= 3;
- }
+ if (!*ptep)
+ *ptep = addr_pte(vm, vm_alloc_page_table(vm), 3);
/* fall through */
case 2:
ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) + pte_index(vm, vaddr) * 8;
@@ -133,19 +165,17 @@ void _virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- *ptep = paddr | 3;
- *ptep |= (attr_idx << 2) | (1 << 10) /* Access Flag */;
+ *ptep = addr_pte(vm, paddr, (attr_idx << 2) | (1 << 10) | 3); /* AF */
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
{
- uint64_t attr_idx = 4; /* NORMAL (See DEFAULT_MAIR_EL1) */
+ uint64_t attr_idx = MT_NORMAL;
- _virt_pg_map(vm, vaddr, paddr, pgd_memslot, attr_idx);
+ _virt_pg_map(vm, vaddr, paddr, attr_idx);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+uint64_t *virt_get_pte_hva(struct kvm_vm *vm, vm_vaddr_t gva)
{
uint64_t *ptep;
@@ -176,11 +206,18 @@ vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
TEST_FAIL("Page table levels must be 2, 3, or 4");
}
- return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+ return ptep;
unmapped_gva:
TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(1);
+ exit(EXIT_FAILURE);
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep = virt_get_pte_hva(vm, gva);
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
}
static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t page, int level)
@@ -202,7 +239,7 @@ static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent, uint64_t p
#endif
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
int level = 4 - (vm->pgtable_levels - 1);
uint64_t pgd, *ptep;
@@ -219,25 +256,11 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
}
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
-{
- uint64_t ptrs_per_4k_pte = 512;
- uint64_t extra_pg_pages = (extra_mem_pages / ptrs_per_4k_pte) * 2;
- struct kvm_vm *vm;
-
- vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
-
- return vm;
-}
-
-void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *init)
+void aarch64_vcpu_setup(struct kvm_vcpu *vcpu, struct kvm_vcpu_init *init)
{
struct kvm_vcpu_init default_init = { .target = -1, };
- uint64_t sctlr_el1, tcr_el1;
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t sctlr_el1, tcr_el1, ttbr0_el1;
if (!init)
init = &default_init;
@@ -248,44 +271,71 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
init->target = preferred.target;
}
- vcpu_ioctl(vm, vcpuid, KVM_ARM_VCPU_INIT, init);
+ vcpu_ioctl(vcpu, KVM_ARM_VCPU_INIT, init);
/*
* Enable FP/ASIMD to avoid trapping when accessing Q0-Q15
* registers, which the variable argument list macros do.
*/
- set_reg(vm, vcpuid, ARM64_SYS_REG(CPACR_EL1), 3 << 20);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_CPACR_EL1), 3 << 20);
- get_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), &sctlr_el1);
- get_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), &tcr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), &sctlr_el1);
+ vcpu_get_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), &tcr_el1);
+ /* Configure base granule size */
switch (vm->mode) {
- case VM_MODE_P52V48_4K:
- TEST_FAIL("AArch64 does not support 4K sized pages "
- "with 52-bit physical address ranges");
case VM_MODE_PXXV48_4K:
TEST_FAIL("AArch64 does not support 4K sized pages "
"with ANY-bit physical address ranges");
case VM_MODE_P52V48_64K:
+ case VM_MODE_P48V48_64K:
+ case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
- tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 2ul << 14; /* TG0 = 16KB */
+ break;
+ case VM_MODE_P52V48_4K:
case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ ttbr0_el1 = vm->pgd & GENMASK(47, vm->page_shift);
+
+ /* Configure output size */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P52V48_64K:
+ tcr_el1 |= 6ul << 32; /* IPS = 52 bits */
+ ttbr0_el1 |= FIELD_GET(GENMASK(51, 48), vm->pgd) << 2;
+ break;
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P48V48_16K:
case VM_MODE_P48V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 5ul << 32; /* IPS = 48 bits */
break;
case VM_MODE_P40V48_4K:
- tcr_el1 |= 0ul << 14; /* TG0 = 4KB */
- tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
- break;
+ case VM_MODE_P40V48_16K:
case VM_MODE_P40V48_64K:
- tcr_el1 |= 1ul << 14; /* TG0 = 64KB */
tcr_el1 |= 2ul << 32; /* IPS = 40 bits */
break;
+ case VM_MODE_P36V48_4K:
+ case VM_MODE_P36V48_16K:
+ case VM_MODE_P36V48_64K:
+ case VM_MODE_P36V47_16K:
+ tcr_el1 |= 1ul << 32; /* IPS = 36 bits */
+ break;
default:
TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
}
@@ -294,59 +344,296 @@ void aarch64_vcpu_setup(struct kvm_vm *vm, int vcpuid, struct kvm_vcpu_init *ini
/* TCR_EL1 |= IRGN0:WBWA | ORGN0:WBWA | SH0:Inner-Shareable */;
tcr_el1 |= (1 << 8) | (1 << 10) | (3 << 12);
tcr_el1 |= (64 - vm->va_bits) /* T0SZ */;
-
- set_reg(vm, vcpuid, ARM64_SYS_REG(SCTLR_EL1), sctlr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TCR_EL1), tcr_el1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(MAIR_EL1), DEFAULT_MAIR_EL1);
- set_reg(vm, vcpuid, ARM64_SYS_REG(TTBR0_EL1), vm->pgd);
+ if (use_lpa2_pte_format(vm))
+ tcr_el1 |= (1ul << 59) /* DS */;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_SCTLR_EL1), sctlr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TCR_EL1), tcr_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_MAIR_EL1), DEFAULT_MAIR_EL1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TTBR0_EL1), ttbr0_el1);
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_TPIDR_EL1), vcpu->id);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
uint64_t pstate, pc;
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pstate), &pstate);
- get_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), &pc);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pstate), &pstate);
+ vcpu_get_reg(vcpu, ARM64_CORE_REG(regs.pc), &pc);
fprintf(stream, "%*spstate: 0x%.16lx pc: 0x%.16lx\n",
indent, "", pstate, pc);
}
-void aarch64_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_init *init, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+}
+
+static struct kvm_vcpu *__aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init)
+{
+ size_t stack_size;
+ uint64_t stack_vaddr;
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_ARM64_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ aarch64_vcpu_setup(vcpu, init);
+
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
+ return vcpu;
+}
+
+struct kvm_vcpu *aarch64_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id,
+ struct kvm_vcpu_init *init, void *guest_code)
{
- size_t stack_size = vm->page_size == 4096 ?
- DEFAULT_STACK_PGS * vm->page_size :
- vm->page_size;
- uint64_t stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_ARM64_GUEST_STACK_VADDR_MIN, 0, 0);
+ struct kvm_vcpu *vcpu = __aarch64_vcpu_add(vm, vcpu_id, init);
- vm_vcpu_add(vm, vcpuid);
- aarch64_vcpu_setup(vm, vcpuid, init);
+ vcpu_arch_set_entry_point(vcpu, guest_code);
- set_reg(vm, vcpuid, ARM64_CORE_REG(sp_el1), stack_vaddr + stack_size);
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.pc), (uint64_t)guest_code);
+ return vcpu;
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- aarch64_vcpu_add_default(vm, vcpuid, NULL, guest_code);
+ return __aarch64_vcpu_add(vm, vcpu_id, NULL);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
int i;
TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
- " num: %u\n", num);
+ " num: %u", num);
va_start(ap, num);
for (i = 0; i < num; i++) {
- set_reg(vm, vcpuid, ARM64_CORE_REG(regs.regs[i]),
- va_arg(ap, uint64_t));
+ vcpu_set_reg(vcpu, ARM64_CORE_REG(regs.regs[i]),
+ va_arg(ap, uint64_t));
}
va_end(ap);
}
+
+void kvm_exit_unexpected_exception(int vector, uint64_t ec, bool valid_ec)
+{
+ ucall(UCALL_UNHANDLED, 3, vector, ec, valid_ec);
+ while (1)
+ ;
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) != UCALL_UNHANDLED)
+ return;
+
+ if (uc.args[2]) /* valid_ec */ {
+ assert(VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ } else {
+ assert(!VECTOR_IS_SYNC(uc.args[0]));
+ TEST_FAIL("Unexpected exception (vector:0x%lx)",
+ uc.args[0]);
+ }
+}
+
+struct handlers {
+ handler_fn exception_handlers[VECTOR_NUM][ESR_EC_NUM];
+};
+
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ extern char vectors;
+
+ vcpu_set_reg(vcpu, KVM_ARM64_SYS_REG(SYS_VBAR_EL1), (uint64_t)&vectors);
+}
+
+void route_exception(struct ex_regs *regs, int vector)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ bool valid_ec;
+ int ec = 0;
+
+ switch (vector) {
+ case VECTOR_SYNC_CURRENT:
+ case VECTOR_SYNC_LOWER_64:
+ ec = (read_sysreg(esr_el1) >> ESR_EC_SHIFT) & ESR_EC_MASK;
+ valid_ec = true;
+ break;
+ case VECTOR_IRQ_CURRENT:
+ case VECTOR_IRQ_LOWER_64:
+ case VECTOR_FIQ_CURRENT:
+ case VECTOR_FIQ_LOWER_64:
+ case VECTOR_ERROR_CURRENT:
+ case VECTOR_ERROR_LOWER_64:
+ ec = 0;
+ valid_ec = false;
+ break;
+ default:
+ valid_ec = false;
+ goto unexpected_exception;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ kvm_exit_unexpected_exception(vector, ec, valid_ec);
+}
+
+void vm_init_descriptor_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_sync_handler(struct kvm_vm *vm, int vector, int ec,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ assert(ec < ESR_EC_NUM);
+ handlers->exception_handlers[vector][ec] = handler;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(!VECTOR_IS_SYNC(vector));
+ assert(vector < VECTOR_NUM);
+ handlers->exception_handlers[vector][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return read_sysreg(tpidr_el1);
+}
+
+static uint32_t max_ipa_for_page_size(uint32_t vm_ipa, uint32_t gran,
+ uint32_t not_sup_val, uint32_t ipa52_min_val)
+{
+ if (gran == not_sup_val)
+ return 0;
+ else if (gran >= ipa52_min_val && vm_ipa >= 52)
+ return 52;
+ else
+ return min(vm_ipa, 48U);
+}
+
+void aarch64_get_supported_page_sizes(uint32_t ipa, uint32_t *ipa4k,
+ uint32_t *ipa16k, uint32_t *ipa64k)
+{
+ struct kvm_vcpu_init preferred_init;
+ int kvm_fd, vm_fd, vcpu_fd, err;
+ uint64_t val;
+ uint32_t gran;
+ struct kvm_one_reg reg = {
+ .id = KVM_ARM64_SYS_REG(SYS_ID_AA64MMFR0_EL1),
+ .addr = (uint64_t)&val,
+ };
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, (void *)(unsigned long)ipa);
+ TEST_ASSERT(vm_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm_fd));
+
+ vcpu_fd = ioctl(vm_fd, KVM_CREATE_VCPU, 0);
+ TEST_ASSERT(vcpu_fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VCPU, vcpu_fd));
+
+ err = ioctl(vm_fd, KVM_ARM_PREFERRED_TARGET, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_PREFERRED_TARGET, err));
+ err = ioctl(vcpu_fd, KVM_ARM_VCPU_INIT, &preferred_init);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_ARM_VCPU_INIT, err));
+
+ err = ioctl(vcpu_fd, KVM_GET_ONE_REG, &reg);
+ TEST_ASSERT(err == 0, KVM_IOCTL_ERROR(KVM_GET_ONE_REG, vcpu_fd));
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN4), val);
+ *ipa4k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN4_NI,
+ ID_AA64MMFR0_EL1_TGRAN4_52_BIT);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN64), val);
+ *ipa64k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN64_NI,
+ ID_AA64MMFR0_EL1_TGRAN64_IMP);
+
+ gran = FIELD_GET(ARM64_FEATURE_MASK(ID_AA64MMFR0_EL1_TGRAN16), val);
+ *ipa16k = max_ipa_for_page_size(ipa, gran, ID_AA64MMFR0_EL1_TGRAN16_NI,
+ ID_AA64MMFR0_EL1_TGRAN16_52_BIT);
+
+ close(vcpu_fd);
+ close(vm_fd);
+ close(kvm_fd);
+}
+
+#define __smccc_call(insn, function_id, arg0, arg1, arg2, arg3, arg4, arg5, \
+ arg6, res) \
+ asm volatile("mov w0, %w[function_id]\n" \
+ "mov x1, %[arg0]\n" \
+ "mov x2, %[arg1]\n" \
+ "mov x3, %[arg2]\n" \
+ "mov x4, %[arg3]\n" \
+ "mov x5, %[arg4]\n" \
+ "mov x6, %[arg5]\n" \
+ "mov x7, %[arg6]\n" \
+ #insn "#0\n" \
+ "mov %[res0], x0\n" \
+ "mov %[res1], x1\n" \
+ "mov %[res2], x2\n" \
+ "mov %[res3], x3\n" \
+ : [res0] "=r"(res->a0), [res1] "=r"(res->a1), \
+ [res2] "=r"(res->a2), [res3] "=r"(res->a3) \
+ : [function_id] "r"(function_id), [arg0] "r"(arg0), \
+ [arg1] "r"(arg1), [arg2] "r"(arg2), [arg3] "r"(arg3), \
+ [arg4] "r"(arg4), [arg5] "r"(arg5), [arg6] "r"(arg6) \
+ : "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7")
+
+
+void smccc_hvc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(hvc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void smccc_smc(uint32_t function_id, uint64_t arg0, uint64_t arg1,
+ uint64_t arg2, uint64_t arg3, uint64_t arg4, uint64_t arg5,
+ uint64_t arg6, struct arm_smccc_res *res)
+{
+ __smccc_call(smc, function_id, arg0, arg1, arg2, arg3, arg4, arg5,
+ arg6, res);
+}
+
+void kvm_selftest_arch_init(void)
+{
+ /*
+ * arm64 doesn't have a true default mode, so start by computing the
+ * available IPA space and page sizes early.
+ */
+ guest_modes_append_default();
+}
+
+void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ /*
+ * arm64 selftests use only TTBR0_EL1, meaning that the valid VA space
+ * is [0, 2^(64 - TCR_EL1.T0SZ)).
+ */
+ sparsebit_set_num(vm->vpages_valid, 0,
+ (1ULL << vm->va_bits) >> vm->page_shift);
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/spinlock.c b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
new file mode 100644
index 000000000000..a076e780be5d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/spinlock.c
@@ -0,0 +1,27 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM64 Spinlock support
+ */
+#include <stdint.h>
+
+#include "spinlock.h"
+
+void spin_lock(struct spinlock *lock)
+{
+ int val, res;
+
+ asm volatile(
+ "1: ldaxr %w0, [%2]\n"
+ " cbnz %w0, 1b\n"
+ " mov %w0, #1\n"
+ " stxr %w1, %w0, [%2]\n"
+ " cbnz %w1, 1b\n"
+ : "=&r" (val), "=&r" (res)
+ : "r" (&lock->v)
+ : "memory");
+}
+
+void spin_unlock(struct spinlock *lock)
+{
+ asm volatile("stlr wzr, [%0]\n" : : "r" (&lock->v) : "memory");
+}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/ucall.c b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
index c8e0ec20d3bf..ddab0ce89d4d 100644
--- a/tools/testing/selftests/kvm/lib/aarch64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/aarch64/ucall.c
@@ -5,108 +5,30 @@
* Copyright (C) 2018, Red Hat, Inc.
*/
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-static vm_vaddr_t *ucall_exit_mmio_addr;
+vm_vaddr_t *ucall_exit_mmio_addr;
-static bool ucall_mmio_init(struct kvm_vm *vm, vm_paddr_t gpa)
+void ucall_arch_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
{
- if (kvm_userspace_memory_region_find(vm, gpa, gpa + 1))
- return false;
+ vm_vaddr_t mmio_gva = vm_vaddr_unused_gap(vm, vm->page_size, KVM_UTIL_MIN_VADDR);
- virt_pg_map(vm, gpa, gpa, 0);
+ virt_map(vm, mmio_gva, mmio_gpa, 1);
- ucall_exit_mmio_addr = (vm_vaddr_t *)gpa;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
+ vm->ucall_mmio_addr = mmio_gpa;
- return true;
+ write_guest_global(vm, ucall_exit_mmio_addr, (vm_vaddr_t *)mmio_gva);
}
-void ucall_init(struct kvm_vm *vm, void *arg)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
- vm_paddr_t gpa, start, end, step, offset;
- unsigned int bits;
- bool ret;
-
- if (arg) {
- gpa = (vm_paddr_t)arg;
- ret = ucall_mmio_init(vm, gpa);
- TEST_ASSERT(ret, "Can't set ucall mmio address to %lx", gpa);
- return;
- }
-
- /*
- * Find an address within the allowed physical and virtual address
- * spaces, that does _not_ have a KVM memory region associated with
- * it. Identity mapping an address like this allows the guest to
- * access it, but as KVM doesn't know what to do with it, it
- * will assume it's something userspace handles and exit with
- * KVM_EXIT_MMIO. Well, at least that's how it works for AArch64.
- * Here we start with a guess that the addresses around 5/8th
- * of the allowed space are unmapped and then work both down and
- * up from there in 1/16th allowed space sized steps.
- *
- * Note, we need to use VA-bits - 1 when calculating the allowed
- * virtual address space for an identity mapping because the upper
- * half of the virtual address space is the two's complement of the
- * lower and won't match physical addresses.
- */
- bits = vm->va_bits - 1;
- bits = vm->pa_bits < bits ? vm->pa_bits : bits;
- end = 1ul << bits;
- start = end * 5 / 8;
- step = end / 16;
- for (offset = 0; offset < end - start; offset += step) {
- if (ucall_mmio_init(vm, start - offset))
- return;
- if (ucall_mmio_init(vm, start + offset))
- return;
- }
- TEST_FAIL("Can't find a ucall mmio address");
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
- ucall_exit_mmio_addr = 0;
- sync_global_to_guest(vm, ucall_exit_mmio_addr);
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- *ucall_exit_mmio_addr = (vm_vaddr_t)&uc;
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_MMIO &&
- run->mmio.phys_addr == (uint64_t)ucall_exit_mmio_addr) {
- vm_vaddr_t gva;
-
- TEST_ASSERT(run->mmio.is_write && run->mmio.len == 8,
+ run->mmio.phys_addr == vcpu->vm->ucall_mmio_addr) {
+ TEST_ASSERT(run->mmio.is_write && run->mmio.len == sizeof(uint64_t),
"Unexpected ucall exit mmio address access");
- memcpy(&gva, run->mmio.data, sizeof(gva));
- memcpy(&ucall, addr_gva2hva(vm, gva), sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ return (void *)(*((uint64_t *)run->mmio.data));
}
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/aarch64/vgic.c b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
new file mode 100644
index 000000000000..184378d593e9
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/aarch64/vgic.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ARM Generic Interrupt Controller (GIC) v3 host support
+ */
+
+#include <linux/kvm.h>
+#include <linux/sizes.h>
+#include <asm/kvm_para.h>
+#include <asm/kvm.h>
+
+#include "kvm_util.h"
+#include "vgic.h"
+#include "gic.h"
+#include "gic_v3.h"
+
+/*
+ * vGIC-v3 default host setup
+ *
+ * Input args:
+ * vm - KVM VM
+ * nr_vcpus - Number of vCPUs supported by this VM
+ * gicd_base_gpa - Guest Physical Address of the Distributor region
+ * gicr_base_gpa - Guest Physical Address of the Redistributor region
+ *
+ * Output args: None
+ *
+ * Return: GIC file-descriptor or negative error code upon failure
+ *
+ * The function creates a vGIC-v3 device and maps the distributor and
+ * redistributor regions of the guest. Since it depends on the number of
+ * vCPUs for the VM, it must be called after all the vCPUs have been created.
+ */
+int vgic_v3_setup(struct kvm_vm *vm, unsigned int nr_vcpus, uint32_t nr_irqs,
+ uint64_t gicd_base_gpa, uint64_t gicr_base_gpa)
+{
+ int gic_fd;
+ uint64_t redist_attr;
+ struct list_head *iter;
+ unsigned int nr_gic_pages, nr_vcpus_created = 0;
+
+ TEST_ASSERT(nr_vcpus, "Number of vCPUs cannot be empty");
+
+ /*
+ * Make sure that the caller is infact calling this
+ * function after all the vCPUs are added.
+ */
+ list_for_each(iter, &vm->vcpus)
+ nr_vcpus_created++;
+ TEST_ASSERT(nr_vcpus == nr_vcpus_created,
+ "Number of vCPUs requested (%u) doesn't match with the ones created for the VM (%u)",
+ nr_vcpus, nr_vcpus_created);
+
+ /* Distributor setup */
+ gic_fd = __kvm_create_device(vm, KVM_DEV_TYPE_ARM_VGIC_V3);
+ if (gic_fd < 0)
+ return gic_fd;
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_NR_IRQS, 0, &nr_irqs);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_DIST, &gicd_base_gpa);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode, KVM_VGIC_V3_DIST_SIZE);
+ virt_map(vm, gicd_base_gpa, gicd_base_gpa, nr_gic_pages);
+
+ /* Redistributor setup */
+ redist_attr = REDIST_REGION_ATTR_ADDR(nr_vcpus, gicr_base_gpa, 0, 0);
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_ADDR,
+ KVM_VGIC_V3_ADDR_TYPE_REDIST_REGION, &redist_attr);
+ nr_gic_pages = vm_calc_num_guest_pages(vm->mode,
+ KVM_VGIC_V3_REDIST_SIZE * nr_vcpus);
+ virt_map(vm, gicr_base_gpa, gicr_base_gpa, nr_gic_pages);
+
+ kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_CTRL,
+ KVM_DEV_ARM_VGIC_CTRL_INIT, NULL);
+
+ return gic_fd;
+}
+
+/* should only work for level sensitive interrupts */
+int _kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ uint64_t attr = 32 * (intid / 32);
+ uint64_t index = intid % 32;
+ uint64_t val;
+ int ret;
+
+ ret = __kvm_device_attr_get(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ if (ret != 0)
+ return ret;
+
+ val |= 1U << index;
+ ret = __kvm_device_attr_set(gic_fd, KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO,
+ attr, &val);
+ return ret;
+}
+
+void kvm_irq_set_level_info(int gic_fd, uint32_t intid, int level)
+{
+ int ret = _kvm_irq_set_level_info(gic_fd, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_DEV_ARM_VGIC_GRP_LEVEL_INFO, ret));
+}
+
+int _kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ uint32_t irq = intid & KVM_ARM_IRQ_NUM_MASK;
+
+ TEST_ASSERT(!INTID_IS_SGI(intid), "KVM_IRQ_LINE's interface itself "
+ "doesn't allow injecting SGIs. There's no mask for it.");
+
+ if (INTID_IS_PPI(intid))
+ irq |= KVM_ARM_IRQ_TYPE_PPI << KVM_ARM_IRQ_TYPE_SHIFT;
+ else
+ irq |= KVM_ARM_IRQ_TYPE_SPI << KVM_ARM_IRQ_TYPE_SHIFT;
+
+ return _kvm_irq_line(vm, irq, level);
+}
+
+void kvm_arm_irq_line(struct kvm_vm *vm, uint32_t intid, int level)
+{
+ int ret = _kvm_arm_irq_line(vm, intid, level);
+
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
+}
+
+static void vgic_poke_irq(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu,
+ uint64_t reg_off)
+{
+ uint64_t reg = intid / 32;
+ uint64_t index = intid % 32;
+ uint64_t attr = reg_off + reg * 4;
+ uint64_t val;
+ bool intid_is_private = INTID_IS_SGI(intid) || INTID_IS_PPI(intid);
+
+ uint32_t group = intid_is_private ? KVM_DEV_ARM_VGIC_GRP_REDIST_REGS
+ : KVM_DEV_ARM_VGIC_GRP_DIST_REGS;
+
+ if (intid_is_private) {
+ /* TODO: only vcpu 0 implemented for now. */
+ assert(vcpu->id == 0);
+ attr += SZ_64K;
+ }
+
+ /* Check that the addr part of the attr is within 32 bits. */
+ assert((attr & ~KVM_DEV_ARM_VGIC_OFFSET_MASK) == 0);
+
+ /*
+ * All calls will succeed, even with invalid intid's, as long as the
+ * addr part of the attr is within 32 bits (checked above). An invalid
+ * intid will just make the read/writes point to above the intended
+ * register space (i.e., ICPENDR after ISPENDR).
+ */
+ kvm_device_attr_get(gic_fd, group, attr, &val);
+ val |= 1ULL << index;
+ kvm_device_attr_set(gic_fd, group, attr, &val);
+}
+
+void kvm_irq_write_ispendr(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISPENDR);
+}
+
+void kvm_irq_write_isactiver(int gic_fd, uint32_t intid, struct kvm_vcpu *vcpu)
+{
+ vgic_poke_irq(gic_fd, intid, vcpu, GICD_ISACTIVER);
+}
diff --git a/tools/testing/selftests/kvm/lib/assert.c b/tools/testing/selftests/kvm/lib/assert.c
index 5ebbd0d6b472..2bd25b191d15 100644
--- a/tools/testing/selftests/kvm/lib/assert.c
+++ b/tools/testing/selftests/kvm/lib/assert.c
@@ -22,7 +22,7 @@ static void test_dump_stack(void)
* Build and run this command:
*
* addr2line -s -e /proc/$PPID/exe -fpai {backtrace addresses} | \
- * grep -v test_dump_stack | cat -n 1>&2
+ * cat -n 1>&2
*
* Note that the spacing is different and there's no newline.
*/
@@ -36,18 +36,24 @@ static void test_dump_stack(void)
n * (((sizeof(void *)) * 2) + 1) +
/* Null terminator: */
1];
- char *c;
+ char *c = cmd;
n = backtrace(stack, n);
- c = &cmd[0];
- c += sprintf(c, "%s", addr2line);
/*
- * Skip the first 3 frames: backtrace, test_dump_stack, and
- * test_assert. We hope that backtrace isn't inlined and the other two
- * we've declared noinline.
+ * Skip the first 2 frames, which should be test_dump_stack() and
+ * test_assert(); both of which are declared noinline. Bail if the
+ * resulting stack trace would be empty. Otherwise, addr2line will block
+ * waiting for addresses to be passed in via stdin.
*/
+ if (n <= 2) {
+ fputs(" (stack trace empty)\n", stderr);
+ return;
+ }
+
+ c += sprintf(c, "%s", addr2line);
for (i = 2; i < n; i++)
c += sprintf(c, " %lx", ((unsigned long) stack[i]) - 1);
+
c += sprintf(c, "%s", pipeline);
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wunused-result"
@@ -71,9 +77,9 @@ test_assert(bool exp, const char *exp_str,
fprintf(stderr, "==== Test Assertion Failure ====\n"
" %s:%u: %s\n"
- " pid=%d tid=%d - %s\n",
+ " pid=%d tid=%d errno=%d - %s\n",
file, line, exp_str, getpid(), _gettid(),
- strerror(errno));
+ errno, strerror(errno));
test_dump_stack();
if (fmt) {
fputs(" ", stderr);
diff --git a/tools/testing/selftests/kvm/lib/elf.c b/tools/testing/selftests/kvm/lib/elf.c
index bc75a91e00a6..f34d926d9735 100644
--- a/tools/testing/selftests/kvm/lib/elf.c
+++ b/tools/testing/selftests/kvm/lib/elf.c
@@ -11,7 +11,6 @@
#include <linux/elf.h>
#include "kvm_util.h"
-#include "kvm_util_internal.h"
static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
{
@@ -91,6 +90,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
" hdrp->e_shentsize: %x\n"
" expected: %zx",
hdrp->e_shentsize, sizeof(Elf64_Shdr));
+ close(fd);
}
/* VM ELF Load
@@ -111,8 +111,7 @@ static void elfhdr_get(const char *filename, Elf64_Ehdr *hdrp)
* by the image and it needs to have sufficient available physical pages, to
* back the virtual pages used to load the image.
*/
-void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
- uint32_t data_memslot, uint32_t pgd_memslot)
+void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename)
{
off_t offset, offset_rv;
Elf64_Ehdr hdr;
@@ -140,7 +139,7 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
offset = hdr.e_phoff + (n1 * hdr.e_phentsize);
offset_rv = lseek(fd, offset, SEEK_SET);
TEST_ASSERT(offset_rv == offset,
- "Failed to seek to begining of program header %u,\n"
+ "Failed to seek to beginning of program header %u,\n"
" filename: %s\n"
" rv: %jd errno: %i",
n1, filename, (intmax_t) offset_rv, errno);
@@ -158,14 +157,13 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"memsize of 0,\n"
" phdr index: %u p_memsz: 0x%" PRIx64,
n1, (uint64_t) phdr.p_memsz);
- vm_vaddr_t seg_vstart = phdr.p_vaddr;
- seg_vstart &= ~(vm_vaddr_t)(vm->page_size - 1);
+ vm_vaddr_t seg_vstart = align_down(phdr.p_vaddr, vm->page_size);
vm_vaddr_t seg_vend = phdr.p_vaddr + phdr.p_memsz - 1;
seg_vend |= vm->page_size - 1;
size_t seg_size = seg_vend - seg_vstart + 1;
- vm_vaddr_t vaddr = vm_vaddr_alloc(vm, seg_size, seg_vstart,
- data_memslot, pgd_memslot);
+ vm_vaddr_t vaddr = __vm_vaddr_alloc(vm, seg_size, seg_vstart,
+ MEM_REGION_CODE);
TEST_ASSERT(vaddr == seg_vstart, "Unable to allocate "
"virtual memory for segment at requested min addr,\n"
" segment idx: %u\n"
@@ -186,11 +184,12 @@ void kvm_vm_elf_load(struct kvm_vm *vm, const char *filename,
"Seek to program segment offset failed,\n"
" program header idx: %u errno: %i\n"
" offset_rv: 0x%jx\n"
- " expected: 0x%jx\n",
+ " expected: 0x%jx",
n1, errno, (intmax_t) offset_rv,
(intmax_t) phdr.p_offset);
test_read(fd, addr_gva2hva(vm, phdr.p_vaddr),
phdr.p_filesz);
}
}
+ close(fd);
}
diff --git a/tools/testing/selftests/kvm/lib/guest_modes.c b/tools/testing/selftests/kvm/lib/guest_modes.c
new file mode 100644
index 000000000000..b04901e55138
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_modes.c
@@ -0,0 +1,129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include "guest_modes.h"
+
+#ifdef __aarch64__
+#include "processor.h"
+enum vm_guest_mode vm_mode_default;
+#endif
+
+struct guest_mode guest_modes[NUM_VM_MODES];
+
+void guest_modes_append_default(void)
+{
+#ifndef __aarch64__
+ guest_mode_append(VM_MODE_DEFAULT, true);
+#else
+ {
+ unsigned int limit = kvm_check_cap(KVM_CAP_ARM_VM_IPA_SIZE);
+ uint32_t ipa4k, ipa16k, ipa64k;
+ int i;
+
+ aarch64_get_supported_page_sizes(limit, &ipa4k, &ipa16k, &ipa64k);
+
+ guest_mode_append(VM_MODE_P52V48_4K, ipa4k >= 52);
+ guest_mode_append(VM_MODE_P52V48_16K, ipa16k >= 52);
+ guest_mode_append(VM_MODE_P52V48_64K, ipa64k >= 52);
+
+ guest_mode_append(VM_MODE_P48V48_4K, ipa4k >= 48);
+ guest_mode_append(VM_MODE_P48V48_16K, ipa16k >= 48);
+ guest_mode_append(VM_MODE_P48V48_64K, ipa64k >= 48);
+
+ guest_mode_append(VM_MODE_P40V48_4K, ipa4k >= 40);
+ guest_mode_append(VM_MODE_P40V48_16K, ipa16k >= 40);
+ guest_mode_append(VM_MODE_P40V48_64K, ipa64k >= 40);
+
+ guest_mode_append(VM_MODE_P36V48_4K, ipa4k >= 36);
+ guest_mode_append(VM_MODE_P36V48_16K, ipa16k >= 36);
+ guest_mode_append(VM_MODE_P36V48_64K, ipa64k >= 36);
+ guest_mode_append(VM_MODE_P36V47_16K, ipa16k >= 36);
+
+ vm_mode_default = ipa4k >= 40 ? VM_MODE_P40V48_4K : NUM_VM_MODES;
+
+ /*
+ * Pick the first supported IPA size if the default
+ * isn't available.
+ */
+ for (i = 0; vm_mode_default == NUM_VM_MODES && i < NUM_VM_MODES; i++) {
+ if (guest_modes[i].supported && guest_modes[i].enabled)
+ vm_mode_default = i;
+ }
+
+ TEST_ASSERT(vm_mode_default != NUM_VM_MODES,
+ "No supported mode!");
+ }
+#endif
+#ifdef __s390x__
+ {
+ int kvm_fd, vm_fd;
+ struct kvm_s390_vm_cpu_processor info;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ vm_fd = __kvm_ioctl(kvm_fd, KVM_CREATE_VM, NULL);
+ kvm_device_attr_get(vm_fd, KVM_S390_VM_CPU_MODEL,
+ KVM_S390_VM_CPU_PROCESSOR, &info);
+ close(vm_fd);
+ close(kvm_fd);
+ /* Starting with z13 we have 47bits of physical address */
+ if (info.ibc >= 0x30)
+ guest_mode_append(VM_MODE_P47V64_4K, true);
+ }
+#endif
+#ifdef __riscv
+ {
+ unsigned int sz = kvm_check_cap(KVM_CAP_VM_GPA_BITS);
+
+ if (sz >= 52)
+ guest_mode_append(VM_MODE_P52V48_4K, true);
+ if (sz >= 48)
+ guest_mode_append(VM_MODE_P48V48_4K, true);
+ }
+#endif
+}
+
+void for_each_guest_mode(void (*func)(enum vm_guest_mode, void *), void *arg)
+{
+ int i;
+
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ if (!guest_modes[i].enabled)
+ continue;
+ TEST_ASSERT(guest_modes[i].supported,
+ "Guest mode ID %d (%s) not supported.",
+ i, vm_guest_mode_string(i));
+ func(i, arg);
+ }
+}
+
+void guest_modes_help(void)
+{
+ int i;
+
+ printf(" -m: specify the guest mode ID to test\n"
+ " (default: test all supported modes)\n"
+ " This option may be used multiple times.\n"
+ " Guest mode IDs:\n");
+ for (i = 0; i < NUM_VM_MODES; ++i) {
+ printf(" %d: %s%s\n", i, vm_guest_mode_string(i),
+ guest_modes[i].supported ? " (supported)" : "");
+ }
+}
+
+void guest_modes_cmdline(const char *arg)
+{
+ static bool mode_selected;
+ unsigned int mode;
+ int i;
+
+ if (!mode_selected) {
+ for (i = 0; i < NUM_VM_MODES; ++i)
+ guest_modes[i].enabled = false;
+ mode_selected = true;
+ }
+
+ mode = atoi_non_negative("Guest mode ID", arg);
+ TEST_ASSERT(mode < NUM_VM_MODES, "Guest mode ID %d too big", mode);
+ guest_modes[mode].enabled = true;
+}
diff --git a/tools/testing/selftests/kvm/lib/guest_sprintf.c b/tools/testing/selftests/kvm/lib/guest_sprintf.c
new file mode 100644
index 000000000000..74627514c4d4
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/guest_sprintf.c
@@ -0,0 +1,314 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "ucall_common.h"
+
+#define APPEND_BUFFER_SAFE(str, end, v) \
+do { \
+ GUEST_ASSERT(str < end); \
+ *str++ = (v); \
+} while (0)
+
+static int isdigit(int ch)
+{
+ return (ch >= '0') && (ch <= '9');
+}
+
+static int skip_atoi(const char **s)
+{
+ int i = 0;
+
+ while (isdigit(**s))
+ i = i * 10 + *((*s)++) - '0';
+ return i;
+}
+
+#define ZEROPAD 1 /* pad with zero */
+#define SIGN 2 /* unsigned/signed long */
+#define PLUS 4 /* show plus */
+#define SPACE 8 /* space if plus */
+#define LEFT 16 /* left justified */
+#define SMALL 32 /* Must be 32 == 0x20 */
+#define SPECIAL 64 /* 0x */
+
+#define __do_div(n, base) \
+({ \
+ int __res; \
+ \
+ __res = ((uint64_t) n) % (uint32_t) base; \
+ n = ((uint64_t) n) / (uint32_t) base; \
+ __res; \
+})
+
+static char *number(char *str, const char *end, long num, int base, int size,
+ int precision, int type)
+{
+ /* we are called with base 8, 10 or 16, only, thus don't need "G..." */
+ static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */
+
+ char tmp[66];
+ char c, sign, locase;
+ int i;
+
+ /*
+ * locase = 0 or 0x20. ORing digits or letters with 'locase'
+ * produces same digits or (maybe lowercased) letters
+ */
+ locase = (type & SMALL);
+ if (type & LEFT)
+ type &= ~ZEROPAD;
+ if (base < 2 || base > 16)
+ return NULL;
+ c = (type & ZEROPAD) ? '0' : ' ';
+ sign = 0;
+ if (type & SIGN) {
+ if (num < 0) {
+ sign = '-';
+ num = -num;
+ size--;
+ } else if (type & PLUS) {
+ sign = '+';
+ size--;
+ } else if (type & SPACE) {
+ sign = ' ';
+ size--;
+ }
+ }
+ if (type & SPECIAL) {
+ if (base == 16)
+ size -= 2;
+ else if (base == 8)
+ size--;
+ }
+ i = 0;
+ if (num == 0)
+ tmp[i++] = '0';
+ else
+ while (num != 0)
+ tmp[i++] = (digits[__do_div(num, base)] | locase);
+ if (i > precision)
+ precision = i;
+ size -= precision;
+ if (!(type & (ZEROPAD + LEFT)))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ if (sign)
+ APPEND_BUFFER_SAFE(str, end, sign);
+ if (type & SPECIAL) {
+ if (base == 8)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ else if (base == 16) {
+ APPEND_BUFFER_SAFE(str, end, '0');
+ APPEND_BUFFER_SAFE(str, end, 'x');
+ }
+ }
+ if (!(type & LEFT))
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, c);
+ while (i < precision--)
+ APPEND_BUFFER_SAFE(str, end, '0');
+ while (i-- > 0)
+ APPEND_BUFFER_SAFE(str, end, tmp[i]);
+ while (size-- > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+
+ return str;
+}
+
+int guest_vsnprintf(char *buf, int n, const char *fmt, va_list args)
+{
+ char *str, *end;
+ const char *s;
+ uint64_t num;
+ int i, base;
+ int len;
+
+ int flags; /* flags to number() */
+
+ int field_width; /* width of output field */
+ int precision; /*
+ * min. # of digits for integers; max
+ * number of chars for from string
+ */
+ int qualifier; /* 'h', 'l', or 'L' for integer fields */
+
+ end = buf + n;
+ GUEST_ASSERT(buf < end);
+ GUEST_ASSERT(n > 0);
+
+ for (str = buf; *fmt; ++fmt) {
+ if (*fmt != '%') {
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ continue;
+ }
+
+ /* process flags */
+ flags = 0;
+repeat:
+ ++fmt; /* this also skips first '%' */
+ switch (*fmt) {
+ case '-':
+ flags |= LEFT;
+ goto repeat;
+ case '+':
+ flags |= PLUS;
+ goto repeat;
+ case ' ':
+ flags |= SPACE;
+ goto repeat;
+ case '#':
+ flags |= SPECIAL;
+ goto repeat;
+ case '0':
+ flags |= ZEROPAD;
+ goto repeat;
+ }
+
+ /* get field width */
+ field_width = -1;
+ if (isdigit(*fmt))
+ field_width = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ field_width = va_arg(args, int);
+ if (field_width < 0) {
+ field_width = -field_width;
+ flags |= LEFT;
+ }
+ }
+
+ /* get the precision */
+ precision = -1;
+ if (*fmt == '.') {
+ ++fmt;
+ if (isdigit(*fmt))
+ precision = skip_atoi(&fmt);
+ else if (*fmt == '*') {
+ ++fmt;
+ /* it's the next argument */
+ precision = va_arg(args, int);
+ }
+ if (precision < 0)
+ precision = 0;
+ }
+
+ /* get the conversion qualifier */
+ qualifier = -1;
+ if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L') {
+ qualifier = *fmt;
+ ++fmt;
+ }
+
+ /*
+ * Play nice with %llu, %llx, etc. KVM selftests only support
+ * 64-bit builds, so just treat %ll* the same as %l*.
+ */
+ if (qualifier == 'l' && *fmt == 'l')
+ ++fmt;
+
+ /* default base */
+ base = 10;
+
+ switch (*fmt) {
+ case 'c':
+ if (!(flags & LEFT))
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ APPEND_BUFFER_SAFE(str, end,
+ (uint8_t)va_arg(args, int));
+ while (--field_width > 0)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 's':
+ s = va_arg(args, char *);
+ len = strnlen(s, precision);
+
+ if (!(flags & LEFT))
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ for (i = 0; i < len; ++i)
+ APPEND_BUFFER_SAFE(str, end, *s++);
+ while (len < field_width--)
+ APPEND_BUFFER_SAFE(str, end, ' ');
+ continue;
+
+ case 'p':
+ if (field_width == -1) {
+ field_width = 2 * sizeof(void *);
+ flags |= SPECIAL | SMALL | ZEROPAD;
+ }
+ str = number(str, end,
+ (uint64_t)va_arg(args, void *), 16,
+ field_width, precision, flags);
+ continue;
+
+ case 'n':
+ if (qualifier == 'l') {
+ long *ip = va_arg(args, long *);
+ *ip = (str - buf);
+ } else {
+ int *ip = va_arg(args, int *);
+ *ip = (str - buf);
+ }
+ continue;
+
+ case '%':
+ APPEND_BUFFER_SAFE(str, end, '%');
+ continue;
+
+ /* integer number formats - set up the flags and "break" */
+ case 'o':
+ base = 8;
+ break;
+
+ case 'x':
+ flags |= SMALL;
+ case 'X':
+ base = 16;
+ break;
+
+ case 'd':
+ case 'i':
+ flags |= SIGN;
+ case 'u':
+ break;
+
+ default:
+ APPEND_BUFFER_SAFE(str, end, '%');
+ if (*fmt)
+ APPEND_BUFFER_SAFE(str, end, *fmt);
+ else
+ --fmt;
+ continue;
+ }
+ if (qualifier == 'l')
+ num = va_arg(args, uint64_t);
+ else if (qualifier == 'h') {
+ num = (uint16_t)va_arg(args, int);
+ if (flags & SIGN)
+ num = (int16_t)num;
+ } else if (flags & SIGN)
+ num = va_arg(args, int);
+ else
+ num = va_arg(args, uint32_t);
+ str = number(str, end, num, base, field_width, precision, flags);
+ }
+
+ GUEST_ASSERT(str < end);
+ *str = '\0';
+ return str - buf;
+}
+
+int guest_snprintf(char *buf, int n, const char *fmt, ...)
+{
+ va_list va;
+ int len;
+
+ va_start(va, fmt);
+ len = guest_vsnprintf(buf, n, fmt, va);
+ va_end(va);
+
+ return len;
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c
index 74776ee228f2..b2262b5fad9e 100644
--- a/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@ -5,172 +5,283 @@
* Copyright (C) 2018, Google LLC.
*/
+#define _GNU_SOURCE /* for program_invocation_name */
#include "test_util.h"
#include "kvm_util.h"
-#include "kvm_util_internal.h"
#include "processor.h"
#include <assert.h>
+#include <sched.h>
#include <sys/mman.h>
#include <sys/types.h>
#include <sys/stat.h>
+#include <unistd.h>
#include <linux/kernel.h>
-#define KVM_UTIL_PGS_PER_HUGEPG 512
#define KVM_UTIL_MIN_PFN 2
-/* Aligns x up to the next multiple of size. Size must be a power of 2. */
-static void *align(void *x, size_t size)
+static int vcpu_mmap_sz(void);
+
+int open_path_or_exit(const char *path, int flags)
{
- size_t mask = size - 1;
- TEST_ASSERT(size != 0 && !(size & (size - 1)),
- "size not a power of 2: %lu", size);
- return (void *) (((size_t) x + mask) & ~mask);
+ int fd;
+
+ fd = open(path, flags);
+ __TEST_REQUIRE(fd >= 0 || errno != ENOENT, "Cannot open %s: %s", path, strerror(errno));
+ TEST_ASSERT(fd >= 0, "Failed to open '%s'", path);
+
+ return fd;
}
/*
- * Capability
+ * Open KVM_DEV_PATH if available, otherwise exit the entire program.
*
* Input Args:
- * cap - Capability
- *
- * Output Args: None
+ * flags - The flags to pass when opening KVM_DEV_PATH.
*
* Return:
- * On success, the Value corresponding to the capability (KVM_CAP_*)
- * specified by the value of cap. On failure a TEST_ASSERT failure
- * is produced.
- *
- * Looks up and returns the value corresponding to the capability
- * (KVM_CAP_*) given by cap.
+ * The opened file descriptor of /dev/kvm.
*/
-int kvm_check_cap(long cap)
+static int _open_kvm_dev_path_or_exit(int flags)
{
- int ret;
- int kvm_fd;
+ return open_path_or_exit(KVM_DEV_PATH, flags);
+}
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+int open_kvm_dev_path_or_exit(void)
+{
+ return _open_kvm_dev_path_or_exit(O_RDONLY);
+}
- ret = ioctl(kvm_fd, KVM_CHECK_EXTENSION, cap);
- TEST_ASSERT(ret != -1, "KVM_CHECK_EXTENSION IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+static ssize_t get_module_param(const char *module_name, const char *param,
+ void *buffer, size_t buffer_size)
+{
+ const int path_size = 128;
+ char path[path_size];
+ ssize_t bytes_read;
+ int fd, r;
- close(kvm_fd);
+ r = snprintf(path, path_size, "/sys/module/%s/parameters/%s",
+ module_name, param);
+ TEST_ASSERT(r < path_size,
+ "Failed to construct sysfs path in %d bytes.", path_size);
- return ret;
+ fd = open_path_or_exit(path, O_RDONLY);
+
+ bytes_read = read(fd, buffer, buffer_size);
+ TEST_ASSERT(bytes_read > 0, "read(%s) returned %ld, wanted %ld bytes",
+ path, bytes_read, buffer_size);
+
+ r = close(fd);
+ TEST_ASSERT(!r, "close(%s) failed", path);
+ return bytes_read;
}
-/* VM Enable Capability
- *
- * Input Args:
- * vm - Virtual Machine
- * cap - Capability
- *
- * Output Args: None
- *
- * Return: On success, 0. On failure a TEST_ASSERT failure is produced.
- *
- * Enables a capability (KVM_CAP_*) on the VM.
- */
-int vm_enable_cap(struct kvm_vm *vm, struct kvm_enable_cap *cap)
+static int get_module_param_integer(const char *module_name, const char *param)
{
- int ret;
+ /*
+ * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the
+ * NUL char, and 1 byte because the kernel sucks and inserts a newline
+ * at the end.
+ */
+ char value[16 + 1 + 1];
+ ssize_t r;
- ret = ioctl(vm->fd, KVM_ENABLE_CAP, cap);
- TEST_ASSERT(ret == 0, "KVM_ENABLE_CAP IOCTL failed,\n"
- " rc: %i errno: %i", ret, errno);
+ memset(value, '\0', sizeof(value));
- return ret;
+ r = get_module_param(module_name, param, value, sizeof(value));
+ TEST_ASSERT(value[r - 1] == '\n',
+ "Expected trailing newline, got char '%c'", value[r - 1]);
+
+ /*
+ * Squash the newline, otherwise atoi_paranoid() will complain about
+ * trailing non-NUL characters in the string.
+ */
+ value[r - 1] = '\0';
+ return atoi_paranoid(value);
}
-static void vm_open(struct kvm_vm *vm, int perm)
+static bool get_module_param_bool(const char *module_name, const char *param)
{
- vm->kvm_fd = open(KVM_DEV_PATH, perm);
- if (vm->kvm_fd < 0)
- exit(KSFT_SKIP);
+ char value;
+ ssize_t r;
- if (!kvm_check_cap(KVM_CAP_IMMEDIATE_EXIT)) {
- print_skip("immediate_exit not available");
- exit(KSFT_SKIP);
- }
+ r = get_module_param(module_name, param, &value, sizeof(value));
+ TEST_ASSERT_EQ(r, 1);
- vm->fd = ioctl(vm->kvm_fd, KVM_CREATE_VM, vm->type);
- TEST_ASSERT(vm->fd >= 0, "KVM_CREATE_VM ioctl failed, "
- "rc: %i errno: %i", vm->fd, errno);
+ if (value == 'Y')
+ return true;
+ else if (value == 'N')
+ return false;
+
+ TEST_FAIL("Unrecognized value '%c' for boolean module param", value);
}
-const char * const vm_guest_mode_string[] = {
- "PA-bits:52, VA-bits:48, 4K pages",
- "PA-bits:52, VA-bits:48, 64K pages",
- "PA-bits:48, VA-bits:48, 4K pages",
- "PA-bits:48, VA-bits:48, 64K pages",
- "PA-bits:40, VA-bits:48, 4K pages",
- "PA-bits:40, VA-bits:48, 64K pages",
- "PA-bits:ANY, VA-bits:48, 4K pages",
-};
-_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
- "Missing new mode strings?");
-
-struct vm_guest_mode_params {
- unsigned int pa_bits;
- unsigned int va_bits;
- unsigned int page_size;
- unsigned int page_shift;
-};
+bool get_kvm_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm", param);
+}
-static const struct vm_guest_mode_params vm_guest_mode_params[] = {
- { 52, 48, 0x1000, 12 },
- { 52, 48, 0x10000, 16 },
- { 48, 48, 0x1000, 12 },
- { 48, 48, 0x10000, 16 },
- { 40, 48, 0x1000, 12 },
- { 40, 48, 0x10000, 16 },
- { 0, 0, 0x1000, 12 },
-};
-_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
- "Missing new mode params?");
+bool get_kvm_intel_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_intel", param);
+}
+
+bool get_kvm_amd_param_bool(const char *param)
+{
+ return get_module_param_bool("kvm_amd", param);
+}
+
+int get_kvm_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm", param);
+}
+
+int get_kvm_intel_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm_intel", param);
+}
+
+int get_kvm_amd_param_integer(const char *param)
+{
+ return get_module_param_integer("kvm_amd", param);
+}
/*
- * VM Create
+ * Capability
*
* Input Args:
- * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
- * phy_pages - Physical memory pages
- * perm - permission
+ * cap - Capability
*
* Output Args: None
*
* Return:
- * Pointer to opaque structure that describes the created VM.
+ * On success, the Value corresponding to the capability (KVM_CAP_*)
+ * specified by the value of cap. On failure a TEST_ASSERT failure
+ * is produced.
*
- * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
- * When phy_pages is non-zero, a memory region of phy_pages physical pages
- * is created and mapped starting at guest physical address 0. The file
- * descriptor to control the created VM is created with the permissions
- * given by perm (e.g. O_RDWR).
+ * Looks up and returns the value corresponding to the capability
+ * (KVM_CAP_*) given by cap.
*/
-struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+unsigned int kvm_check_cap(long cap)
{
- struct kvm_vm *vm;
+ int ret;
+ int kvm_fd;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
+ ret = __kvm_ioctl(kvm_fd, KVM_CHECK_EXTENSION, (void *)cap);
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_CHECK_EXTENSION, ret));
+
+ close(kvm_fd);
- pr_debug("%s: mode='%s' pages='%ld' perm='%d'\n", __func__,
- vm_guest_mode_string(mode), phy_pages, perm);
+ return (unsigned int)ret;
+}
+
+void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size)
+{
+ if (vm_check_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL))
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING_ACQ_REL, ring_size);
+ else
+ vm_enable_cap(vm, KVM_CAP_DIRTY_LOG_RING, ring_size);
+ vm->dirty_ring_size = ring_size;
+}
+
+static void vm_open(struct kvm_vm *vm)
+{
+ vm->kvm_fd = _open_kvm_dev_path_or_exit(O_RDWR);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_IMMEDIATE_EXIT));
+
+ vm->fd = __kvm_ioctl(vm->kvm_fd, KVM_CREATE_VM, (void *)vm->type);
+ TEST_ASSERT(vm->fd >= 0, KVM_IOCTL_ERROR(KVM_CREATE_VM, vm->fd));
+}
+
+const char *vm_guest_mode_string(uint32_t i)
+{
+ static const char * const strings[] = {
+ [VM_MODE_P52V48_4K] = "PA-bits:52, VA-bits:48, 4K pages",
+ [VM_MODE_P52V48_16K] = "PA-bits:52, VA-bits:48, 16K pages",
+ [VM_MODE_P52V48_64K] = "PA-bits:52, VA-bits:48, 64K pages",
+ [VM_MODE_P48V48_4K] = "PA-bits:48, VA-bits:48, 4K pages",
+ [VM_MODE_P48V48_16K] = "PA-bits:48, VA-bits:48, 16K pages",
+ [VM_MODE_P48V48_64K] = "PA-bits:48, VA-bits:48, 64K pages",
+ [VM_MODE_P40V48_4K] = "PA-bits:40, VA-bits:48, 4K pages",
+ [VM_MODE_P40V48_16K] = "PA-bits:40, VA-bits:48, 16K pages",
+ [VM_MODE_P40V48_64K] = "PA-bits:40, VA-bits:48, 64K pages",
+ [VM_MODE_PXXV48_4K] = "PA-bits:ANY, VA-bits:48, 4K pages",
+ [VM_MODE_P47V64_4K] = "PA-bits:47, VA-bits:64, 4K pages",
+ [VM_MODE_P44V64_4K] = "PA-bits:44, VA-bits:64, 4K pages",
+ [VM_MODE_P36V48_4K] = "PA-bits:36, VA-bits:48, 4K pages",
+ [VM_MODE_P36V48_16K] = "PA-bits:36, VA-bits:48, 16K pages",
+ [VM_MODE_P36V48_64K] = "PA-bits:36, VA-bits:48, 64K pages",
+ [VM_MODE_P36V47_16K] = "PA-bits:36, VA-bits:47, 16K pages",
+ };
+ _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
+ "Missing new mode strings?");
+
+ TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
+
+ return strings[i];
+}
+
+const struct vm_guest_mode_params vm_guest_mode_params[] = {
+ [VM_MODE_P52V48_4K] = { 52, 48, 0x1000, 12 },
+ [VM_MODE_P52V48_16K] = { 52, 48, 0x4000, 14 },
+ [VM_MODE_P52V48_64K] = { 52, 48, 0x10000, 16 },
+ [VM_MODE_P48V48_4K] = { 48, 48, 0x1000, 12 },
+ [VM_MODE_P48V48_16K] = { 48, 48, 0x4000, 14 },
+ [VM_MODE_P48V48_64K] = { 48, 48, 0x10000, 16 },
+ [VM_MODE_P40V48_4K] = { 40, 48, 0x1000, 12 },
+ [VM_MODE_P40V48_16K] = { 40, 48, 0x4000, 14 },
+ [VM_MODE_P40V48_64K] = { 40, 48, 0x10000, 16 },
+ [VM_MODE_PXXV48_4K] = { 0, 0, 0x1000, 12 },
+ [VM_MODE_P47V64_4K] = { 47, 64, 0x1000, 12 },
+ [VM_MODE_P44V64_4K] = { 44, 64, 0x1000, 12 },
+ [VM_MODE_P36V48_4K] = { 36, 48, 0x1000, 12 },
+ [VM_MODE_P36V48_16K] = { 36, 48, 0x4000, 14 },
+ [VM_MODE_P36V48_64K] = { 36, 48, 0x10000, 16 },
+ [VM_MODE_P36V47_16K] = { 36, 47, 0x4000, 14 },
+};
+_Static_assert(sizeof(vm_guest_mode_params)/sizeof(struct vm_guest_mode_params) == NUM_VM_MODES,
+ "Missing new mode params?");
+
+/*
+ * Initializes vm->vpages_valid to match the canonical VA space of the
+ * architecture.
+ *
+ * The default implementation is valid for architectures which split the
+ * range addressed by a single page table into a low and high region
+ * based on the MSB of the VA. On architectures with this behavior
+ * the VA region spans [0, 2^(va_bits - 1)), [-(2^(va_bits - 1), -1].
+ */
+__weak void vm_vaddr_populate_bitmap(struct kvm_vm *vm)
+{
+ sparsebit_set_num(vm->vpages_valid,
+ 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ sparsebit_set_num(vm->vpages_valid,
+ (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
+ (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+}
+
+struct kvm_vm *____vm_create(struct vm_shape shape)
+{
+ struct kvm_vm *vm;
vm = calloc(1, sizeof(*vm));
TEST_ASSERT(vm != NULL, "Insufficient Memory");
INIT_LIST_HEAD(&vm->vcpus);
- INIT_LIST_HEAD(&vm->userspace_mem_regions);
+ vm->regions.gpa_tree = RB_ROOT;
+ vm->regions.hva_tree = RB_ROOT;
+ hash_init(vm->regions.slot_hash);
- vm->mode = mode;
- vm->type = 0;
+ vm->mode = shape.mode;
+ vm->type = shape.type;
+ vm->subtype = shape.subtype;
- vm->pa_bits = vm_guest_mode_params[mode].pa_bits;
- vm->va_bits = vm_guest_mode_params[mode].va_bits;
- vm->page_size = vm_guest_mode_params[mode].page_size;
- vm->page_shift = vm_guest_mode_params[mode].page_shift;
+ vm->pa_bits = vm_guest_mode_params[vm->mode].pa_bits;
+ vm->va_bits = vm_guest_mode_params[vm->mode].va_bits;
+ vm->page_size = vm_guest_mode_params[vm->mode].page_size;
+ vm->page_shift = vm_guest_mode_params[vm->mode].page_shift;
/* Setup mode specific traits. */
switch (vm->mode) {
@@ -187,18 +298,30 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
vm->pgtable_levels = 3;
break;
case VM_MODE_P40V48_4K:
+ case VM_MODE_P36V48_4K:
vm->pgtable_levels = 4;
break;
case VM_MODE_P40V48_64K:
+ case VM_MODE_P36V48_64K:
+ vm->pgtable_levels = 3;
+ break;
+ case VM_MODE_P52V48_16K:
+ case VM_MODE_P48V48_16K:
+ case VM_MODE_P40V48_16K:
+ case VM_MODE_P36V48_16K:
+ vm->pgtable_levels = 4;
+ break;
+ case VM_MODE_P36V47_16K:
vm->pgtable_levels = 3;
break;
case VM_MODE_PXXV48_4K:
#ifdef __x86_64__
kvm_get_cpu_address_width(&vm->pa_bits, &vm->va_bits);
+ kvm_init_vm_address_properties(vm);
/*
* Ignore KVM support for 5-level paging (vm->va_bits == 57),
* it doesn't take effect unless a CR4.LA57 is set, which it
- * isn't for this VM_MODE.
+ * isn't for this mode (48-bit virtual address space).
*/
TEST_ASSERT(vm->va_bits == 48 || vm->va_bits == 57,
"Linear address width (%d bits) not supported",
@@ -211,40 +334,158 @@ struct kvm_vm *_vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
TEST_FAIL("VM_MODE_PXXV48_4K not supported on non-x86 platforms");
#endif
break;
+ case VM_MODE_P47V64_4K:
+ vm->pgtable_levels = 5;
+ break;
+ case VM_MODE_P44V64_4K:
+ vm->pgtable_levels = 5;
+ break;
default:
- TEST_FAIL("Unknown guest mode, mode: 0x%x", mode);
+ TEST_FAIL("Unknown guest mode: 0x%x", vm->mode);
}
#ifdef __aarch64__
+ TEST_ASSERT(!vm->type, "ARM doesn't support test-provided types");
if (vm->pa_bits != 40)
vm->type = KVM_VM_TYPE_ARM_IPA_SIZE(vm->pa_bits);
#endif
- vm_open(vm, perm);
+ vm_open(vm);
/* Limit to VA-bit canonical virtual addresses. */
vm->vpages_valid = sparsebit_alloc();
- sparsebit_set_num(vm->vpages_valid,
- 0, (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
- sparsebit_set_num(vm->vpages_valid,
- (~((1ULL << (vm->va_bits - 1)) - 1)) >> vm->page_shift,
- (1ULL << (vm->va_bits - 1)) >> vm->page_shift);
+ vm_vaddr_populate_bitmap(vm);
/* Limit physical addresses to PA-bits. */
- vm->max_gfn = ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
+ vm->max_gfn = vm_compute_max_gfn(vm);
/* Allocate and setup memory for guest. */
vm->vpages_mapped = sparsebit_alloc();
- if (phy_pages != 0)
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- 0, 0, phy_pages, 0);
return vm;
}
-struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
+static uint64_t vm_nr_pages_required(enum vm_guest_mode mode,
+ uint32_t nr_runnable_vcpus,
+ uint64_t extra_mem_pages)
+{
+ uint64_t page_size = vm_guest_mode_params[mode].page_size;
+ uint64_t nr_pages;
+
+ TEST_ASSERT(nr_runnable_vcpus,
+ "Use vm_create_barebones() for VMs that _never_ have vCPUs");
+
+ TEST_ASSERT(nr_runnable_vcpus <= kvm_check_cap(KVM_CAP_MAX_VCPUS),
+ "nr_vcpus = %d too large for host, max-vcpus = %d",
+ nr_runnable_vcpus, kvm_check_cap(KVM_CAP_MAX_VCPUS));
+
+ /*
+ * Arbitrarily allocate 512 pages (2mb when page size is 4kb) for the
+ * test code and other per-VM assets that will be loaded into memslot0.
+ */
+ nr_pages = 512;
+
+ /* Account for the per-vCPU stacks on behalf of the test. */
+ nr_pages += nr_runnable_vcpus * DEFAULT_STACK_PGS;
+
+ /*
+ * Account for the number of pages needed for the page tables. The
+ * maximum page table size for a memory region will be when the
+ * smallest page size is used. Considering each page contains x page
+ * table descriptors, the total extra size for page tables (for extra
+ * N pages) will be: N/x+N/x^2+N/x^3+... which is definitely smaller
+ * than N/x*2.
+ */
+ nr_pages += (nr_pages + extra_mem_pages) / PTES_PER_MIN_PAGE * 2;
+
+ /* Account for the number of pages needed by ucall. */
+ nr_pages += ucall_nr_pages_required(page_size);
+
+ return vm_adjust_num_guest_pages(mode, nr_pages);
+}
+
+struct kvm_vm *__vm_create(struct vm_shape shape, uint32_t nr_runnable_vcpus,
+ uint64_t nr_extra_pages)
+{
+ uint64_t nr_pages = vm_nr_pages_required(shape.mode, nr_runnable_vcpus,
+ nr_extra_pages);
+ struct userspace_mem_region *slot0;
+ struct kvm_vm *vm;
+ int i;
+
+ pr_debug("%s: mode='%s' type='%d', pages='%ld'\n", __func__,
+ vm_guest_mode_string(shape.mode), shape.type, nr_pages);
+
+ vm = ____vm_create(shape);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, nr_pages, 0);
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ /*
+ * TODO: Add proper defines to protect the library's memslots, and then
+ * carve out memslot1 for the ucall MMIO address. KVM treats writes to
+ * read-only memslots as MMIO, and creating a read-only memslot for the
+ * MMIO region would prevent silently clobbering the MMIO region.
+ */
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+
+ return vm;
+}
+
+/*
+ * VM Create with customized parameters
+ *
+ * Input Args:
+ * mode - VM Mode (e.g. VM_MODE_P52V48_4K)
+ * nr_vcpus - VCPU count
+ * extra_mem_pages - Non-slot0 physical memory total size
+ * guest_code - Guest entry point
+ * vcpuids - VCPU IDs
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Pointer to opaque structure that describes the created VM.
+ *
+ * Creates a VM with the mode specified by mode (e.g. VM_MODE_P52V48_4K).
+ * extra_mem_pages is only used to calculate the maximum page table size,
+ * no real memory allocation for non-slot0 memory in this function.
+ */
+struct kvm_vm *__vm_create_with_vcpus(struct vm_shape shape, uint32_t nr_vcpus,
+ uint64_t extra_mem_pages,
+ void *guest_code, struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ int i;
+
+ TEST_ASSERT(!nr_vcpus || vcpus, "Must provide vCPU array");
+
+ vm = __vm_create(shape, nr_vcpus, extra_mem_pages);
+
+ for (i = 0; i < nr_vcpus; ++i)
+ vcpus[i] = vm_vcpu_add(vm, i, guest_code);
+
+ return vm;
+}
+
+struct kvm_vm *__vm_create_shape_with_one_vcpu(struct vm_shape shape,
+ struct kvm_vcpu **vcpu,
+ uint64_t extra_mem_pages,
+ void *guest_code)
{
- return _vm_create(mode, phy_pages, perm);
+ struct kvm_vcpu *vcpus[1];
+ struct kvm_vm *vm;
+
+ vm = __vm_create_with_vcpus(shape, 1, extra_mem_pages, guest_code, vcpus);
+
+ *vcpu = vcpus[0];
+ return vm;
}
/*
@@ -252,7 +493,6 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
*
* Input Args:
* vm - VM that has been released before
- * perm - permission
*
* Output Args: None
*
@@ -260,17 +500,19 @@ struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm)
* global state, such as the irqchip and the memory regions that are mapped
* into the guest.
*/
-void kvm_vm_restart(struct kvm_vm *vmp, int perm)
+void kvm_vm_restart(struct kvm_vm *vmp)
{
+ int ctr;
struct userspace_mem_region *region;
- vm_open(vmp, perm);
+ vm_open(vmp);
if (vmp->has_irqchip)
vm_create_irqchip(vmp);
- list_for_each_entry(region, &vmp->userspace_mem_regions, list) {
- int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ hash_for_each(vmp->regions.slot_hash, ctr, region, slot_node) {
+ int ret = ioctl(vmp->fd, KVM_SET_USER_MEMORY_REGION2, &region->region);
+
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
" guest_phys_addr: 0x%llx size: 0x%llx",
@@ -281,27 +523,87 @@ void kvm_vm_restart(struct kvm_vm *vmp, int perm)
}
}
-void kvm_vm_get_dirty_log(struct kvm_vm *vm, int slot, void *log)
+__weak struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm,
+ uint32_t vcpu_id)
{
- struct kvm_dirty_log args = { .dirty_bitmap = log, .slot = slot };
- int ret;
+ return __vm_vcpu_add(vm, vcpu_id);
+}
- ret = ioctl(vm->fd, KVM_GET_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_GET_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+struct kvm_vcpu *vm_recreate_with_one_vcpu(struct kvm_vm *vm)
+{
+ kvm_vm_restart(vm);
+
+ return vm_vcpu_recreate(vm, 0);
}
-void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
- uint64_t first_page, uint32_t num_pages)
+void kvm_pin_this_task_to_pcpu(uint32_t pcpu)
{
- struct kvm_clear_dirty_log args = { .dirty_bitmap = log, .slot = slot,
- .first_page = first_page,
- .num_pages = num_pages };
- int ret;
+ cpu_set_t mask;
+ int r;
- ret = ioctl(vm->fd, KVM_CLEAR_DIRTY_LOG, &args);
- TEST_ASSERT(ret == 0, "%s: KVM_CLEAR_DIRTY_LOG failed: %s",
- __func__, strerror(-ret));
+ CPU_ZERO(&mask);
+ CPU_SET(pcpu, &mask);
+ r = sched_setaffinity(0, sizeof(mask), &mask);
+ TEST_ASSERT(!r, "sched_setaffinity() failed for pCPU '%u'.", pcpu);
+}
+
+static uint32_t parse_pcpu(const char *cpu_str, const cpu_set_t *allowed_mask)
+{
+ uint32_t pcpu = atoi_non_negative("CPU number", cpu_str);
+
+ TEST_ASSERT(CPU_ISSET(pcpu, allowed_mask),
+ "Not allowed to run on pCPU '%d', check cgroups?", pcpu);
+ return pcpu;
+}
+
+void kvm_print_vcpu_pinning_help(void)
+{
+ const char *name = program_invocation_name;
+
+ printf(" -c: Pin tasks to physical CPUs. Takes a list of comma separated\n"
+ " values (target pCPU), one for each vCPU, plus an optional\n"
+ " entry for the main application task (specified via entry\n"
+ " <nr_vcpus + 1>). If used, entries must be provided for all\n"
+ " vCPUs, i.e. pinning vCPUs is all or nothing.\n\n"
+ " E.g. to create 3 vCPUs, pin vCPU0=>pCPU22, vCPU1=>pCPU23,\n"
+ " vCPU2=>pCPU24, and pin the application task to pCPU50:\n\n"
+ " %s -v 3 -c 22,23,24,50\n\n"
+ " To leave the application task unpinned, drop the final entry:\n\n"
+ " %s -v 3 -c 22,23,24\n\n"
+ " (default: no pinning)\n", name, name);
+}
+
+void kvm_parse_vcpu_pinning(const char *pcpus_string, uint32_t vcpu_to_pcpu[],
+ int nr_vcpus)
+{
+ cpu_set_t allowed_mask;
+ char *cpu, *cpu_list;
+ char delim[2] = ",";
+ int i, r;
+
+ cpu_list = strdup(pcpus_string);
+ TEST_ASSERT(cpu_list, "strdup() allocation failed.");
+
+ r = sched_getaffinity(0, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_getaffinity() failed");
+
+ cpu = strtok(cpu_list, delim);
+
+ /* 1. Get all pcpus for vcpus. */
+ for (i = 0; i < nr_vcpus; i++) {
+ TEST_ASSERT(cpu, "pCPU not provided for vCPU '%d'", i);
+ vcpu_to_pcpu[i] = parse_pcpu(cpu, &allowed_mask);
+ cpu = strtok(NULL, delim);
+ }
+
+ /* 2. Check if the main worker needs to be pinned. */
+ if (cpu) {
+ kvm_pin_this_task_to_pcpu(parse_pcpu(cpu, &allowed_mask));
+ cpu = strtok(NULL, delim);
+ }
+
+ TEST_ASSERT(!cpu, "pCPU list contains trailing garbage characters '%s'", cpu);
+ free(cpu_list);
}
/*
@@ -326,74 +628,29 @@ void kvm_vm_clear_dirty_log(struct kvm_vm *vm, int slot, void *log,
static struct userspace_mem_region *
userspace_mem_region_find(struct kvm_vm *vm, uint64_t start, uint64_t end)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ for (node = vm->regions.gpa_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, gpa_node);
uint64_t existing_start = region->region.guest_phys_addr;
uint64_t existing_end = region->region.guest_phys_addr
+ region->region.memory_size - 1;
if (start <= existing_end && end >= existing_start)
return region;
+
+ if (start < existing_start)
+ node = node->rb_left;
+ else
+ node = node->rb_right;
}
return NULL;
}
-/*
- * KVM Userspace Memory Region Find
- *
- * Input Args:
- * vm - Virtual Machine
- * start - Starting VM physical address
- * end - Ending VM physical address, inclusive.
- *
- * Output Args: None
- *
- * Return:
- * Pointer to overlapping region, NULL if no such region.
- *
- * Public interface to userspace_mem_region_find. Allows tests to look up
- * the memslot datastructure for a given range of guest physical memory.
- */
-struct kvm_userspace_memory_region *
-kvm_userspace_memory_region_find(struct kvm_vm *vm, uint64_t start,
- uint64_t end)
-{
- struct userspace_mem_region *region;
-
- region = userspace_mem_region_find(vm, start, end);
- if (!region)
- return NULL;
-
- return &region->region;
-}
-
-/*
- * VCPU Find
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to VCPU structure
- *
- * Locates a vcpu structure that describes the VCPU specified by vcpuid and
- * returns a pointer to it. Returns NULL if the VM doesn't contain a VCPU
- * for the specified vcpuid.
- */
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
+__weak void vcpu_arch_free(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu;
- list_for_each_entry(vcpu, &vm->vcpus, list) {
- if (vcpu->id == vcpuid)
- return vcpu;
- }
-
- return NULL;
}
/*
@@ -408,53 +665,70 @@ struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid)
*
* Removes a vCPU from a VM and frees its resources.
*/
-static void vm_vcpu_rm(struct vcpu *vcpu)
+static void vm_vcpu_rm(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
int ret;
- ret = munmap(vcpu->state, sizeof(*vcpu->state));
- TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
- close(vcpu->fd);
- TEST_ASSERT(ret == 0, "Close of VCPU fd failed, rc: %i "
- "errno: %i", ret, errno);
+ if (vcpu->dirty_gfns) {
+ ret = munmap(vcpu->dirty_gfns, vm->dirty_ring_size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ vcpu->dirty_gfns = NULL;
+ }
+
+ ret = munmap(vcpu->run, vcpu_mmap_sz());
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+
+ ret = close(vcpu->fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
list_del(&vcpu->list);
+
+ vcpu_arch_free(vcpu);
free(vcpu);
}
void kvm_vm_release(struct kvm_vm *vmp)
{
- struct vcpu *vcpu, *tmp;
+ struct kvm_vcpu *vcpu, *tmp;
int ret;
list_for_each_entry_safe(vcpu, tmp, &vmp->vcpus, list)
- vm_vcpu_rm(vcpu);
+ vm_vcpu_rm(vmp, vcpu);
ret = close(vmp->fd);
- TEST_ASSERT(ret == 0, "Close of vm fd failed,\n"
- " vmp->fd: %i rc: %i errno: %i", vmp->fd, ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
- close(vmp->kvm_fd);
- TEST_ASSERT(ret == 0, "Close of /dev/kvm fd failed,\n"
- " vmp->kvm_fd: %i rc: %i errno: %i", vmp->kvm_fd, ret, errno);
+ ret = close(vmp->kvm_fd);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("close()", ret));
}
static void __vm_mem_region_delete(struct kvm_vm *vm,
- struct userspace_mem_region *region)
+ struct userspace_mem_region *region,
+ bool unlink)
{
int ret;
- list_del(&region->list);
+ if (unlink) {
+ rb_erase(&region->gpa_node, &vm->regions.gpa_tree);
+ rb_erase(&region->hva_node, &vm->regions.hva_tree);
+ hash_del(&region->slot_node);
+ }
region->region.memory_size = 0;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
sparsebit_free(&region->unused_phy_pages);
+ sparsebit_free(&region->protected_phy_pages);
ret = munmap(region->mmap_start, region->mmap_size);
- TEST_ASSERT(ret == 0, "munmap failed, rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ if (region->fd >= 0) {
+ /* There's an extra map when using shared memory. */
+ ret = munmap(region->mmap_alias, region->mmap_size);
+ TEST_ASSERT(!ret, __KVM_SYSCALL_ERROR("munmap()", ret));
+ close(region->fd);
+ }
+ if (region->region.guest_memfd >= 0)
+ close(region->region.guest_memfd);
free(region);
}
@@ -464,14 +738,22 @@ static void __vm_mem_region_delete(struct kvm_vm *vm,
*/
void kvm_vm_free(struct kvm_vm *vmp)
{
- struct userspace_mem_region *region, *tmp;
+ int ctr;
+ struct hlist_node *node;
+ struct userspace_mem_region *region;
if (vmp == NULL)
return;
+ /* Free cached stats metadata and close FD */
+ if (vmp->stats_fd) {
+ free(vmp->stats_desc);
+ close(vmp->stats_fd);
+ }
+
/* Free userspace_mem_regions. */
- list_for_each_entry_safe(region, tmp, &vmp->userspace_mem_regions, list)
- __vm_mem_region_delete(vmp, region);
+ hash_for_each_safe(vmp->regions.slot_hash, ctr, node, region, slot_node)
+ __vm_mem_region_delete(vmp, region, false);
/* Free sparsebit arrays. */
sparsebit_free(&vmp->vpages_valid);
@@ -483,6 +765,26 @@ void kvm_vm_free(struct kvm_vm *vmp)
free(vmp);
}
+int kvm_memfd_alloc(size_t size, bool hugepages)
+{
+ int memfd_flags = MFD_CLOEXEC;
+ int fd, r;
+
+ if (hugepages)
+ memfd_flags |= MFD_HUGETLB;
+
+ fd = memfd_create("kvm_selftest", memfd_flags);
+ TEST_ASSERT(fd != -1, __KVM_SYSCALL_ERROR("memfd_create()", fd));
+
+ r = ftruncate(fd, size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("ftruncate()", r));
+
+ r = fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0, size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ return fd;
+}
+
/*
* Memory Compare, host virtual to guest virtual
*
@@ -553,36 +855,119 @@ int kvm_memcmp_hva_gva(void *hva, struct kvm_vm *vm, vm_vaddr_t gva, size_t len)
return 0;
}
-/*
- * VM Userspace Memory Region Add
- *
- * Input Args:
- * vm - Virtual Machine
- * backing_src - Storage source for this region.
- * NULL to use anonymous memory.
- * guest_paddr - Starting guest physical address
- * slot - KVM region slot
- * npages - Number of physical pages
- * flags - KVM memory region flags (e.g. KVM_MEM_LOG_DIRTY_PAGES)
- *
- * Output Args: None
- *
- * Return: None
- *
- * Allocates a memory area of the number of pages specified by npages
- * and maps it to the VM specified by vm, at a starting physical address
- * given by guest_paddr. The region is created with a KVM region slot
- * given by slot, which must be unique and < KVM_MEM_SLOTS_NUM. The
- * region is created with the flags given by flags.
- */
-void vm_userspace_mem_region_add(struct kvm_vm *vm,
- enum vm_mem_backing_src_type src_type,
- uint64_t guest_paddr, uint32_t slot, uint64_t npages,
- uint32_t flags)
+static void vm_userspace_mem_region_gpa_insert(struct rb_root *gpa_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &gpa_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), gpa_node);
+ parent = *cur;
+ if (region->region.guest_phys_addr <
+ cregion->region.guest_phys_addr)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->region.guest_phys_addr !=
+ cregion->region.guest_phys_addr,
+ "Duplicate GPA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->gpa_node, parent, cur);
+ rb_insert_color(&region->gpa_node, gpa_tree);
+}
+
+static void vm_userspace_mem_region_hva_insert(struct rb_root *hva_tree,
+ struct userspace_mem_region *region)
+{
+ struct rb_node **cur, *parent;
+
+ for (cur = &hva_tree->rb_node, parent = NULL; *cur; ) {
+ struct userspace_mem_region *cregion;
+
+ cregion = container_of(*cur, typeof(*cregion), hva_node);
+ parent = *cur;
+ if (region->host_mem < cregion->host_mem)
+ cur = &(*cur)->rb_left;
+ else {
+ TEST_ASSERT(region->host_mem !=
+ cregion->host_mem,
+ "Duplicate HVA in region tree");
+
+ cur = &(*cur)->rb_right;
+ }
+ }
+
+ rb_link_node(&region->hva_node, parent, cur);
+ rb_insert_color(&region->hva_node, hva_tree);
+}
+
+
+int __vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ struct kvm_userspace_memory_region region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ };
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region);
+}
+
+void vm_set_user_memory_region(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva)
+{
+ int ret = __vm_set_user_memory_region(vm, slot, flags, gpa, size, hva);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+int __vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ struct kvm_userspace_memory_region2 region = {
+ .slot = slot,
+ .flags = flags,
+ .guest_phys_addr = gpa,
+ .memory_size = size,
+ .userspace_addr = (uintptr_t)hva,
+ .guest_memfd = guest_memfd,
+ .guest_memfd_offset = guest_memfd_offset,
+ };
+
+ return ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION2, &region);
+}
+
+void vm_set_user_memory_region2(struct kvm_vm *vm, uint32_t slot, uint32_t flags,
+ uint64_t gpa, uint64_t size, void *hva,
+ uint32_t guest_memfd, uint64_t guest_memfd_offset)
+{
+ int ret = __vm_set_user_memory_region2(vm, slot, flags, gpa, size, hva,
+ guest_memfd, guest_memfd_offset);
+
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed, errno = %d (%s)",
+ errno, strerror(errno));
+}
+
+
+/* FIXME: This thing needs to be ripped apart and rewritten. */
+void vm_mem_add(struct kvm_vm *vm, enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot, uint64_t npages,
+ uint32_t flags, int guest_memfd, uint64_t guest_memfd_offset)
{
int ret;
struct userspace_mem_region *region;
- size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+ size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
+ size_t mem_size = npages * vm->page_size;
size_t alignment;
TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
@@ -617,7 +1002,8 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
(uint64_t) region->region.memory_size);
/* Confirm no region with the requested slot already exists. */
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ slot) {
if (region->region.slot != slot)
continue;
@@ -634,7 +1020,7 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
/* Allocate and initialize new mem region structure. */
region = calloc(1, sizeof(*region));
TEST_ASSERT(region != NULL, "Insufficient Memory");
- region->mmap_size = npages * vm->page_size;
+ region->mmap_size = mem_size;
#ifdef __s390x__
/* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
@@ -643,37 +1029,79 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
alignment = 1;
#endif
+ /*
+ * When using THP mmap is not guaranteed to returned a hugepage aligned
+ * address so we have to pad the mmap. Padding is not needed for HugeTLB
+ * because mmap will always return an address aligned to the HugeTLB
+ * page size.
+ */
if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
- alignment = max(huge_page_size, alignment);
+ alignment = max(backing_src_pagesz, alignment);
+
+ TEST_ASSERT_EQ(guest_paddr, align_up(guest_paddr, backing_src_pagesz));
/* Add enough memory to align up if necessary */
if (alignment > 1)
region->mmap_size += alignment;
+ region->fd = -1;
+ if (backing_src_is_shared(src_type))
+ region->fd = kvm_memfd_alloc(region->mmap_size,
+ src_type == VM_MEM_SRC_SHARED_HUGETLB);
+
region->mmap_start = mmap(NULL, region->mmap_size,
PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS
- | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
- -1, 0);
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
TEST_ASSERT(region->mmap_start != MAP_FAILED,
- "test_malloc failed, mmap_start: %p errno: %i",
- region->mmap_start, errno);
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+
+ TEST_ASSERT(!is_backing_src_hugetlb(src_type) ||
+ region->mmap_start == align_ptr_up(region->mmap_start, backing_src_pagesz),
+ "mmap_start %p is not aligned to HugeTLB page size 0x%lx",
+ region->mmap_start, backing_src_pagesz);
/* Align host address */
- region->host_mem = align(region->mmap_start, alignment);
+ region->host_mem = align_ptr_up(region->mmap_start, alignment);
/* As needed perform madvise */
- if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
- ret = madvise(region->host_mem, npages * vm->page_size,
- src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
- TEST_ASSERT(ret == 0, "madvise failed,\n"
- " addr: %p\n"
- " length: 0x%lx\n"
- " src_type: %x",
- region->host_mem, npages * vm->page_size, src_type);
+ if ((src_type == VM_MEM_SRC_ANONYMOUS ||
+ src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
+ ret = madvise(region->host_mem, mem_size,
+ src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
+ region->host_mem, mem_size,
+ vm_mem_backing_src_alias(src_type)->name);
+ }
+
+ region->backing_src_type = src_type;
+
+ if (flags & KVM_MEM_GUEST_MEMFD) {
+ if (guest_memfd < 0) {
+ uint32_t guest_memfd_flags = 0;
+ TEST_ASSERT(!guest_memfd_offset,
+ "Offset must be zero when creating new guest_memfd");
+ guest_memfd = vm_create_guest_memfd(vm, mem_size, guest_memfd_flags);
+ } else {
+ /*
+ * Install a unique fd for each memslot so that the fd
+ * can be closed when the region is deleted without
+ * needing to track if the fd is owned by the framework
+ * or by the caller.
+ */
+ guest_memfd = dup(guest_memfd);
+ TEST_ASSERT(guest_memfd >= 0, __KVM_SYSCALL_ERROR("dup()", guest_memfd));
+ }
+
+ region->region.guest_memfd = guest_memfd;
+ region->region.guest_memfd_offset = guest_memfd_offset;
+ } else {
+ region->region.guest_memfd = -1;
}
region->unused_phy_pages = sparsebit_alloc();
+ if (vm_arch_has_protected_memory(vm))
+ region->protected_phy_pages = sparsebit_alloc();
sparsebit_set_num(region->unused_phy_pages,
guest_paddr >> vm->page_shift, npages);
region->region.slot = slot;
@@ -681,16 +1109,40 @@ void vm_userspace_mem_region_add(struct kvm_vm *vm,
region->region.guest_phys_addr = guest_paddr;
region->region.memory_size = npages * vm->page_size;
region->region.userspace_addr = (uintptr_t) region->host_mem;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i\n"
" slot: %u flags: 0x%x\n"
- " guest_phys_addr: 0x%lx size: 0x%lx",
+ " guest_phys_addr: 0x%lx size: 0x%lx guest_memfd: %d",
ret, errno, slot, flags,
- guest_paddr, (uint64_t) region->region.memory_size);
+ guest_paddr, (uint64_t) region->region.memory_size,
+ region->region.guest_memfd);
+
+ /* Add to quick lookup data structures */
+ vm_userspace_mem_region_gpa_insert(&vm->regions.gpa_tree, region);
+ vm_userspace_mem_region_hva_insert(&vm->regions.hva_tree, region);
+ hash_add(vm->regions.slot_hash, &region->slot_node, slot);
+
+ /* If shared memory, create an alias. */
+ if (region->fd >= 0) {
+ region->mmap_alias = mmap(NULL, region->mmap_size,
+ PROT_READ | PROT_WRITE,
+ vm_mem_backing_src_alias(src_type)->flag,
+ region->fd, 0);
+ TEST_ASSERT(region->mmap_alias != MAP_FAILED,
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
+
+ /* Align host alias address */
+ region->host_alias = align_ptr_up(region->mmap_alias, alignment);
+ }
+}
- /* Add to linked-list of memory regions. */
- list_add(&region->list, &vm->userspace_mem_regions);
+void vm_userspace_mem_region_add(struct kvm_vm *vm,
+ enum vm_mem_backing_src_type src_type,
+ uint64_t guest_paddr, uint32_t slot,
+ uint64_t npages, uint32_t flags)
+{
+ vm_mem_add(vm, src_type, guest_paddr, slot, npages, flags, -1, 0);
}
/*
@@ -713,10 +1165,10 @@ memslot2region(struct kvm_vm *vm, uint32_t memslot)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each_possible(vm->regions.slot_hash, region, slot_node,
+ memslot)
if (region->region.slot == memslot)
return region;
- }
fprintf(stderr, "No mem region with the requested slot found,\n"
" requested slot: %u\n", memslot);
@@ -749,9 +1201,9 @@ void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags)
region->region.flags = flags;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION IOCTL failed,\n"
+ TEST_ASSERT(ret == 0, "KVM_SET_USER_MEMORY_REGION2 IOCTL failed,\n"
" rc: %i errno: %i slot: %u flags: 0x%x",
ret, errno, slot, flags);
}
@@ -779,9 +1231,9 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
region->region.guest_phys_addr = new_gpa;
- ret = ioctl(vm->fd, KVM_SET_USER_MEMORY_REGION, &region->region);
+ ret = __vm_ioctl(vm, KVM_SET_USER_MEMORY_REGION2, &region->region);
- TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION failed\n"
+ TEST_ASSERT(!ret, "KVM_SET_USER_MEMORY_REGION2 failed\n"
"ret: %i errno: %i slot: %u new_gpa: 0x%lx",
ret, errno, slot, new_gpa);
}
@@ -801,85 +1253,97 @@ void vm_mem_region_move(struct kvm_vm *vm, uint32_t slot, uint64_t new_gpa)
*/
void vm_mem_region_delete(struct kvm_vm *vm, uint32_t slot)
{
- __vm_mem_region_delete(vm, memslot2region(vm, slot));
+ __vm_mem_region_delete(vm, memslot2region(vm, slot), true);
}
-/*
- * VCPU mmap Size
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return:
- * Size of VCPU state
- *
- * Returns the size of the structure pointed to by the return value
- * of vcpu_state().
- */
+void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size,
+ bool punch_hole)
+{
+ const int mode = FALLOC_FL_KEEP_SIZE | (punch_hole ? FALLOC_FL_PUNCH_HOLE : 0);
+ struct userspace_mem_region *region;
+ uint64_t end = base + size;
+ uint64_t gpa, len;
+ off_t fd_offset;
+ int ret;
+
+ for (gpa = base; gpa < end; gpa += len) {
+ uint64_t offset;
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ TEST_ASSERT(region && region->region.flags & KVM_MEM_GUEST_MEMFD,
+ "Private memory region not found for GPA 0x%lx", gpa);
+
+ offset = gpa - region->region.guest_phys_addr;
+ fd_offset = region->region.guest_memfd_offset + offset;
+ len = min_t(uint64_t, end - gpa, region->region.memory_size - offset);
+
+ ret = fallocate(region->region.guest_memfd, mode, fd_offset, len);
+ TEST_ASSERT(!ret, "fallocate() failed to %s at %lx (len = %lu), fd = %d, mode = %x, offset = %lx",
+ punch_hole ? "punch hole" : "allocate", gpa, len,
+ region->region.guest_memfd, mode, fd_offset);
+ }
+}
+
+/* Returns the size of a vCPU's kvm_run structure. */
static int vcpu_mmap_sz(void)
{
int dev_fd, ret;
- dev_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (dev_fd < 0)
- exit(KSFT_SKIP);
+ dev_fd = open_kvm_dev_path_or_exit();
ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL);
TEST_ASSERT(ret >= sizeof(struct kvm_run),
- "%s KVM_GET_VCPU_MMAP_SIZE ioctl failed, rc: %i errno: %i",
- __func__, ret, errno);
+ KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret));
close(dev_fd);
return ret;
}
+static bool vcpu_exists(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu;
+
+ list_for_each_entry(vcpu, &vm->vcpus, list) {
+ if (vcpu->id == vcpu_id)
+ return true;
+ }
+
+ return false;
+}
+
/*
- * VM VCPU Add
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Adds a virtual CPU to the VM specified by vm with the ID given by vcpuid.
- * No additional VCPU setup is done.
+ * Adds a virtual CPU to the VM specified by vm with the ID given by vcpu_id.
+ * No additional vCPU setup is done. Returns the vCPU.
*/
-void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
+struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
/* Confirm a vcpu with the specified id doesn't already exist. */
- vcpu = vcpu_find(vm, vcpuid);
- if (vcpu != NULL)
- TEST_FAIL("vcpu with the specified id "
- "already exists,\n"
- " requested vcpuid: %u\n"
- " existing vcpuid: %u state: %p",
- vcpuid, vcpu->id, vcpu->state);
+ TEST_ASSERT(!vcpu_exists(vm, vcpu_id), "vCPU%d already exists", vcpu_id);
/* Allocate and initialize new vcpu structure. */
vcpu = calloc(1, sizeof(*vcpu));
TEST_ASSERT(vcpu != NULL, "Insufficient Memory");
- vcpu->id = vcpuid;
- vcpu->fd = ioctl(vm->fd, KVM_CREATE_VCPU, vcpuid);
- TEST_ASSERT(vcpu->fd >= 0, "KVM_CREATE_VCPU failed, rc: %i errno: %i",
- vcpu->fd, errno);
- TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
+ vcpu->vm = vm;
+ vcpu->id = vcpu_id;
+ vcpu->fd = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)(unsigned long)vcpu_id);
+ TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm);
+
+ TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size "
"smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
- vcpu_mmap_sz(), sizeof(*vcpu->state));
- vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+ vcpu_mmap_sz(), sizeof(*vcpu->run));
+ vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
- TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
- "vcpu id: %u errno: %i", vcpuid, errno);
+ TEST_ASSERT(vcpu->run != MAP_FAILED,
+ __KVM_SYSCALL_ERROR("mmap()", (int)(unsigned long)MAP_FAILED));
/* Add to linked-list of VCPUs. */
list_add(&vcpu->list, &vm->vcpus);
+
+ return vcpu;
}
/*
@@ -902,8 +1366,8 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
* TEST_ASSERT failure occurs for invalid input or no area of at least
* sz unallocated bytes >= vaddr_min is available.
*/
-static vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
- vm_vaddr_t vaddr_min)
+vm_vaddr_t vm_vaddr_unused_gap(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min)
{
uint64_t pages = (sz + vm->page_size - 1) >> vm->page_shift;
@@ -968,6 +1432,50 @@ va_found:
return pgidx_start * vm->page_size;
}
+static vm_vaddr_t ____vm_vaddr_alloc(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type,
+ bool protected)
+{
+ uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
+
+ virt_pgd_alloc(vm);
+ vm_paddr_t paddr = __vm_phy_pages_alloc(vm, pages,
+ KVM_UTIL_MIN_PFN * vm->page_size,
+ vm->memslots[type], protected);
+
+ /*
+ * Find an unused range of virtual page addresses of at least
+ * pages in length.
+ */
+ vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
+
+ /* Map the virtual pages. */
+ for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
+ pages--, vaddr += vm->page_size, paddr += vm->page_size) {
+
+ virt_pg_map(vm, vaddr, paddr);
+
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+ }
+
+ return vaddr_start;
+}
+
+vm_vaddr_t __vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type,
+ vm_arch_has_protected_memory(vm));
+}
+
+vm_vaddr_t vm_vaddr_alloc_shared(struct kvm_vm *vm, size_t sz,
+ vm_vaddr_t vaddr_min,
+ enum kvm_mem_region_type type)
+{
+ return ____vm_vaddr_alloc(vm, sz, vaddr_min, type, false);
+}
+
/*
* VM Virtual Address Allocate
*
@@ -975,8 +1483,6 @@ va_found:
* vm - Virtual Machine
* sz - Size in bytes
* vaddr_min - Minimum starting virtual address
- * data_memslot - Memory region slot for data pages
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -987,36 +1493,54 @@ va_found:
* given by vm. The allocated bytes are mapped to a virtual address >=
* the address given by vaddr_min. Note that each allocation uses a
* a unique set of pages, with the minimum real allocation being at least
- * a page.
+ * a page. The allocated physical space comes from the TEST_DATA memory region.
*/
-vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
- uint32_t data_memslot, uint32_t pgd_memslot)
+vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min)
{
- uint64_t pages = (sz >> vm->page_shift) + ((sz % vm->page_size) != 0);
-
- virt_pgd_alloc(vm, pgd_memslot);
-
- /*
- * Find an unused range of virtual page addresses of at least
- * pages in length.
- */
- vm_vaddr_t vaddr_start = vm_vaddr_unused_gap(vm, sz, vaddr_min);
-
- /* Map the virtual pages. */
- for (vm_vaddr_t vaddr = vaddr_start; pages > 0;
- pages--, vaddr += vm->page_size) {
- vm_paddr_t paddr;
-
- paddr = vm_phy_page_alloc(vm,
- KVM_UTIL_MIN_PFN * vm->page_size, data_memslot);
+ return __vm_vaddr_alloc(vm, sz, vaddr_min, MEM_REGION_TEST_DATA);
+}
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+/*
+ * VM Virtual Address Allocate Pages
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least N system pages worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_pages(struct kvm_vm *vm, int nr_pages)
+{
+ return vm_vaddr_alloc(vm, nr_pages * getpagesize(), KVM_UTIL_MIN_VADDR);
+}
- sparsebit_set(vm->vpages_mapped,
- vaddr >> vm->page_shift);
- }
+vm_vaddr_t __vm_vaddr_alloc_page(struct kvm_vm *vm, enum kvm_mem_region_type type)
+{
+ return __vm_vaddr_alloc(vm, getpagesize(), KVM_UTIL_MIN_VADDR, type);
+}
- return vaddr_start;
+/*
+ * VM Virtual Address Allocate Page
+ *
+ * Input Args:
+ * vm - Virtual Machine
+ *
+ * Output Args: None
+ *
+ * Return:
+ * Starting guest virtual address
+ *
+ * Allocates at least one system page worth of bytes within the virtual address
+ * space of the vm.
+ */
+vm_vaddr_t vm_vaddr_alloc_page(struct kvm_vm *vm)
+{
+ return vm_vaddr_alloc_pages(vm, 1);
}
/*
@@ -1027,7 +1551,6 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* vaddr - Virtuall address to map
* paddr - VM Physical Address
* npages - The number of pages to map
- * pgd_memslot - Memory region slot for new virtual translation tables
*
* Output Args: None
*
@@ -1037,7 +1560,7 @@ vm_vaddr_t vm_vaddr_alloc(struct kvm_vm *vm, size_t sz, vm_vaddr_t vaddr_min,
* @npages starting at @vaddr to the page range starting at @paddr.
*/
void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- unsigned int npages, uint32_t pgd_memslot)
+ unsigned int npages)
{
size_t page_size = vm->page_size;
size_t size = npages * page_size;
@@ -1046,7 +1569,9 @@ void virt_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- virt_pg_map(vm, vaddr, paddr, pgd_memslot);
+ virt_pg_map(vm, vaddr, paddr);
+ sparsebit_set(vm->vpages_mapped, vaddr >> vm->page_shift);
+
vaddr += page_size;
paddr += page_size;
}
@@ -1073,16 +1598,16 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
{
struct userspace_mem_region *region;
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((gpa >= region->region.guest_phys_addr)
- && (gpa <= (region->region.guest_phys_addr
- + region->region.memory_size - 1)))
- return (void *) ((uintptr_t) region->host_mem
- + (gpa - region->region.guest_phys_addr));
+ gpa = vm_untag_gpa(vm, gpa);
+
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region) {
+ TEST_FAIL("No vm physical memory at 0x%lx", gpa);
+ return NULL;
}
- TEST_FAIL("No vm physical memory at 0x%lx", gpa);
- return NULL;
+ return (void *)((uintptr_t)region->host_mem
+ + (gpa - region->region.guest_phys_addr));
}
/*
@@ -1104,15 +1629,22 @@ void *addr_gpa2hva(struct kvm_vm *vm, vm_paddr_t gpa)
*/
vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
{
- struct userspace_mem_region *region;
+ struct rb_node *node;
+
+ for (node = vm->regions.hva_tree.rb_node; node; ) {
+ struct userspace_mem_region *region =
+ container_of(node, struct userspace_mem_region, hva_node);
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
- if ((hva >= region->host_mem)
- && (hva <= (region->host_mem
- + region->region.memory_size - 1)))
- return (vm_paddr_t) ((uintptr_t)
- region->region.guest_phys_addr
- + (hva - (uintptr_t) region->host_mem));
+ if (hva >= region->host_mem) {
+ if (hva <= (region->host_mem
+ + region->region.memory_size - 1))
+ return (vm_paddr_t)((uintptr_t)
+ region->region.guest_phys_addr
+ + (hva - (uintptr_t)region->host_mem));
+
+ node = node->rb_right;
+ } else
+ node = node->rb_left;
}
TEST_FAIL("No mapping to a guest physical address, hva: %p", hva);
@@ -1120,402 +1652,265 @@ vm_paddr_t addr_hva2gpa(struct kvm_vm *vm, void *hva)
}
/*
- * VM Create IRQ Chip
+ * Address VM physical to Host Virtual *alias*.
*
* Input Args:
* vm - Virtual Machine
+ * gpa - VM physical address
*
* Output Args: None
*
- * Return: None
- *
- * Creates an interrupt controller chip for the VM specified by vm.
+ * Return:
+ * Equivalent address within the host virtual *alias* area, or NULL
+ * (without failing the test) if the guest memory is not shared (so
+ * no alias exists).
+ *
+ * Create a writable, shared virtual=>physical alias for the specific GPA.
+ * The primary use case is to allow the host selftest to manipulate guest
+ * memory without mapping said memory in the guest's address space. And, for
+ * userfaultfd-based demand paging, to do so without triggering userfaults.
*/
-void vm_create_irqchip(struct kvm_vm *vm)
+void *addr_gpa2alias(struct kvm_vm *vm, vm_paddr_t gpa)
{
- int ret;
-
- ret = ioctl(vm->fd, KVM_CREATE_IRQCHIP, 0);
- TEST_ASSERT(ret == 0, "KVM_CREATE_IRQCHIP IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ struct userspace_mem_region *region;
+ uintptr_t offset;
- vm->has_irqchip = true;
-}
+ region = userspace_mem_region_find(vm, gpa, gpa);
+ if (!region)
+ return NULL;
-/*
- * VM VCPU State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return:
- * Pointer to structure that describes the state of the VCPU.
- *
- * Locates and returns a pointer to a structure that describes the
- * state of the VCPU with the given vcpuid.
- */
-struct kvm_run *vcpu_state(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ if (!region->host_alias)
+ return NULL;
- return vcpu->state;
+ offset = gpa - region->region.guest_phys_addr;
+ return (void *) ((uintptr_t) region->host_alias + offset);
}
-/*
- * VM VCPU Run
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args: None
- *
- * Return: None
- *
- * Switch to executing the code for the VCPU given by vcpuid, within the VM
- * given by vm.
- */
-void vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+/* Create an interrupt controller chip for the specified VM. */
+void vm_create_irqchip(struct kvm_vm *vm)
{
- int ret = _vcpu_run(vm, vcpuid);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ vm_ioctl(vm, KVM_CREATE_IRQCHIP, NULL);
+
+ vm->has_irqchip = true;
}
-int _vcpu_run(struct kvm_vm *vm, uint32_t vcpuid)
+int _vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int rc;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
do {
- rc = ioctl(vcpu->fd, KVM_RUN, NULL);
+ rc = __vcpu_run(vcpu);
} while (rc == -1 && errno == EINTR);
- return rc;
-}
-
-void vcpu_run_complete_io(struct kvm_vm *vm, uint32_t vcpuid)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- vcpu->state->immediate_exit = 1;
- ret = ioctl(vcpu->fd, KVM_RUN, NULL);
- vcpu->state->immediate_exit = 0;
-
- TEST_ASSERT(ret == -1 && errno == EINTR,
- "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
- ret, errno);
-}
-void vcpu_set_guest_debug(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_guest_debug *debug)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret = ioctl(vcpu->fd, KVM_SET_GUEST_DEBUG, debug);
+ assert_on_unhandled_exception(vcpu);
- TEST_ASSERT(ret == 0, "KVM_SET_GUEST_DEBUG failed: %d", ret);
+ return rc;
}
/*
- * VM VCPU Set MP State
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * mp_state - mp_state to be set
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the MP state of the VCPU given by vcpuid, to the state given
- * by mp_state.
+ * Invoke KVM_RUN on a vCPU until KVM returns something other than -EINTR.
+ * Assert if the KVM returns an error (other than -EINTR).
*/
-void vcpu_set_mp_state(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_mp_state *mp_state)
+void vcpu_run(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ int ret = _vcpu_run(vcpu);
- ret = ioctl(vcpu->fd, KVM_SET_MP_STATE, mp_state);
- TEST_ASSERT(ret == 0, "KVM_SET_MP_STATE IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_RUN, ret));
}
-/*
- * VM VCPU Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * regs - current state of VCPU regs
- *
- * Return: None
- *
- * Obtains the current register state for the VCPU specified by vcpuid
- * and stores it at the location given by regs.
- */
-void vcpu_regs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+void vcpu_run_complete_io(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ vcpu->run->immediate_exit = 1;
+ ret = __vcpu_run(vcpu);
+ vcpu->run->immediate_exit = 0;
- ret = ioctl(vcpu->fd, KVM_GET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_GET_REGS failed, rc: %i errno: %i",
- ret, errno);
+ TEST_ASSERT(ret == -1 && errno == EINTR,
+ "KVM_RUN IOCTL didn't exit immediately, rc: %i, errno: %i",
+ ret, errno);
}
/*
- * VM VCPU Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * regs - Values to set VCPU regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the regs of the VCPU specified by vcpuid to the values
- * given by regs.
+ * Get the list of guest registers which are supported for
+ * KVM_GET_ONE_REG/KVM_SET_ONE_REG ioctls. Returns a kvm_reg_list pointer,
+ * it is the caller's responsibility to free the list.
*/
-void vcpu_regs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_regs *regs)
+struct kvm_reg_list *vcpu_get_reg_list(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_reg_list reg_list_n = { .n = 0 }, *reg_list;
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ ret = __vcpu_ioctl(vcpu, KVM_GET_REG_LIST, &reg_list_n);
+ TEST_ASSERT(ret == -1 && errno == E2BIG, "KVM_GET_REG_LIST n=0");
- ret = ioctl(vcpu->fd, KVM_SET_REGS, regs);
- TEST_ASSERT(ret == 0, "KVM_SET_REGS failed, rc: %i errno: %i",
- ret, errno);
+ reg_list = calloc(1, sizeof(*reg_list) + reg_list_n.n * sizeof(__u64));
+ reg_list->n = reg_list_n.n;
+ vcpu_ioctl(vcpu, KVM_GET_REG_LIST, reg_list);
+ return reg_list;
}
-#ifdef __KVM_HAVE_VCPU_EVENTS
-void vcpu_events_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
+void *vcpu_map_dirty_ring(struct kvm_vcpu *vcpu)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ uint32_t page_size = getpagesize();
+ uint32_t size = vcpu->vm->dirty_ring_size;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ TEST_ASSERT(size > 0, "Should enable dirty ring first");
- ret = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_GET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
+ if (!vcpu->dirty_gfns) {
+ void *addr;
-void vcpu_events_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_vcpu_events *events)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ addr = mmap(NULL, size, PROT_READ, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped private");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ addr = mmap(NULL, size, PROT_READ | PROT_EXEC, MAP_PRIVATE, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr == MAP_FAILED, "Dirty ring mapped exec");
- ret = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, events);
- TEST_ASSERT(ret == 0, "KVM_SET_VCPU_EVENTS, failed, rc: %i errno: %i",
- ret, errno);
-}
-#endif
-
-#ifdef __x86_64__
-void vcpu_nested_state_get(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
+ addr = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd,
+ page_size * KVM_DIRTY_LOG_PAGE_OFFSET);
+ TEST_ASSERT(addr != MAP_FAILED, "Dirty ring map failed");
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, state);
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
-}
-
-int vcpu_nested_state_set(struct kvm_vm *vm, uint32_t vcpuid,
- struct kvm_nested_state *state, bool ignore_error)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
-
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, state);
- if (!ignore_error) {
- TEST_ASSERT(ret == 0,
- "KVM_SET_NESTED_STATE failed, ret: %i errno: %i",
- ret, errno);
+ vcpu->dirty_gfns = addr;
+ vcpu->dirty_gfns_count = size / sizeof(struct kvm_dirty_gfn);
}
- return ret;
+ return vcpu->dirty_gfns;
}
-#endif
/*
- * VM VCPU System Regs Get
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- *
- * Output Args:
- * sregs - current state of VCPU system regs
- *
- * Return: None
- *
- * Obtains the current system register state for the VCPU specified by
- * vcpuid and stores it at the location given by sregs.
+ * Device Ioctl
*/
-void vcpu_sregs_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
-{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+int __kvm_has_device_attr(int dev_fd, uint32_t group, uint64_t attr)
+{
+ struct kvm_device_attr attribute = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ };
- ret = ioctl(vcpu->fd, KVM_GET_SREGS, sregs);
- TEST_ASSERT(ret == 0, "KVM_GET_SREGS failed, rc: %i errno: %i",
- ret, errno);
+ return ioctl(dev_fd, KVM_HAS_DEVICE_ATTR, &attribute);
}
-/*
- * VM VCPU System Regs Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * sregs - Values to set VCPU system regs to
- *
- * Output Args: None
- *
- * Return: None
- *
- * Sets the system regs of the VCPU specified by vcpuid to the values
- * given by sregs.
- */
-void vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+int __kvm_test_create_device(struct kvm_vm *vm, uint64_t type)
{
- int ret = _vcpu_sregs_set(vm, vcpuid, sregs);
- TEST_ASSERT(ret == 0, "KVM_RUN IOCTL failed, "
- "rc: %i errno: %i", ret, errno);
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .flags = KVM_CREATE_DEVICE_TEST,
+ };
+
+ return __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
}
-int _vcpu_sregs_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_sregs *sregs)
+int __kvm_create_device(struct kvm_vm *vm, uint64_t type)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
+ struct kvm_create_device create_dev = {
+ .type = type,
+ .fd = -1,
+ .flags = 0,
+ };
+ int err;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ err = __vm_ioctl(vm, KVM_CREATE_DEVICE, &create_dev);
+ TEST_ASSERT(err <= 0, "KVM_CREATE_DEVICE shouldn't return a positive value");
+ return err ? : create_dev.fd;
+}
- return ioctl(vcpu->fd, KVM_SET_SREGS, sregs);
+int __kvm_device_attr_get(int dev_fd, uint32_t group, uint64_t attr, void *val)
+{
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
+
+ return __kvm_ioctl(dev_fd, KVM_GET_DEVICE_ATTR, &kvmattr);
}
-void vcpu_fpu_get(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+int __kvm_device_attr_set(int dev_fd, uint32_t group, uint64_t attr, void *val)
{
- int ret;
+ struct kvm_device_attr kvmattr = {
+ .group = group,
+ .attr = attr,
+ .flags = 0,
+ .addr = (uintptr_t)val,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_GET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __kvm_ioctl(dev_fd, KVM_SET_DEVICE_ATTR, &kvmattr);
}
-void vcpu_fpu_set(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_fpu *fpu)
+/*
+ * IRQ related functions.
+ */
+
+int _kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- int ret;
+ struct kvm_irq_level irq_level = {
+ .irq = irq,
+ .level = level,
+ };
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_FPU, fpu);
- TEST_ASSERT(ret == 0, "KVM_SET_FPU failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ return __vm_ioctl(vm, KVM_IRQ_LINE, &irq_level);
}
-void vcpu_get_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+void kvm_irq_line(struct kvm_vm *vm, uint32_t irq, int level)
{
- int ret;
+ int ret = _kvm_irq_line(vm, irq, level);
- ret = _vcpu_ioctl(vm, vcpuid, KVM_GET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_GET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ TEST_ASSERT(ret >= 0, KVM_IOCTL_ERROR(KVM_IRQ_LINE, ret));
}
-void vcpu_set_reg(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_one_reg *reg)
+struct kvm_irq_routing *kvm_gsi_routing_create(void)
{
- int ret;
+ struct kvm_irq_routing *routing;
+ size_t size;
- ret = _vcpu_ioctl(vm, vcpuid, KVM_SET_ONE_REG, reg);
- TEST_ASSERT(ret == 0, "KVM_SET_ONE_REG failed, rc: %i errno: %i (%s)",
- ret, errno, strerror(errno));
+ size = sizeof(struct kvm_irq_routing);
+ /* Allocate space for the max number of entries: this wastes 196 KBs. */
+ size += KVM_MAX_IRQ_ROUTES * sizeof(struct kvm_irq_routing_entry);
+ routing = calloc(1, size);
+ assert(routing);
+
+ return routing;
}
-/*
- * VCPU Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VCPU fd.
- */
-void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+void kvm_gsi_routing_irqchip_add(struct kvm_irq_routing *routing,
+ uint32_t gsi, uint32_t pin)
{
- int ret;
+ int i;
- ret = _vcpu_ioctl(vm, vcpuid, cmd, arg);
- TEST_ASSERT(ret == 0, "vcpu ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ assert(routing);
+ assert(routing->nr < KVM_MAX_IRQ_ROUTES);
+
+ i = routing->nr;
+ routing->entries[i].gsi = gsi;
+ routing->entries[i].type = KVM_IRQ_ROUTING_IRQCHIP;
+ routing->entries[i].flags = 0;
+ routing->entries[i].u.irqchip.irqchip = 0;
+ routing->entries[i].u.irqchip.pin = pin;
+ routing->nr++;
}
-int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid,
- unsigned long cmd, void *arg)
+int _kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
int ret;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
-
- ret = ioctl(vcpu->fd, cmd, arg);
+ assert(routing);
+ ret = __vm_ioctl(vm, KVM_SET_GSI_ROUTING, routing);
+ free(routing);
return ret;
}
-/*
- * VM Ioctl
- *
- * Input Args:
- * vm - Virtual Machine
- * cmd - Ioctl number
- * arg - Argument to pass to the ioctl
- *
- * Return: None
- *
- * Issues an arbitrary ioctl on a VM fd.
- */
-void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+void kvm_gsi_routing_write(struct kvm_vm *vm, struct kvm_irq_routing *routing)
{
int ret;
- ret = ioctl(vm->fd, cmd, arg);
- TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
- cmd, ret, errno, strerror(errno));
+ ret = _kvm_gsi_routing_write(vm, routing);
+ TEST_ASSERT(!ret, KVM_IOCTL_ERROR(KVM_SET_GSI_ROUTING, ret));
}
/*
@@ -1535,14 +1930,15 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
*/
void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
+ int ctr;
struct userspace_mem_region *region;
- struct vcpu *vcpu;
+ struct kvm_vcpu *vcpu;
fprintf(stream, "%*smode: 0x%x\n", indent, "", vm->mode);
fprintf(stream, "%*sfd: %i\n", indent, "", vm->fd);
fprintf(stream, "%*spage_size: 0x%x\n", indent, "", vm->page_size);
fprintf(stream, "%*sMem Regions:\n", indent, "");
- list_for_each_entry(region, &vm->userspace_mem_regions, list) {
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node) {
fprintf(stream, "%*sguest_phys: 0x%lx size: 0x%lx "
"host_virt: %p\n", indent + 2, "",
(uint64_t) region->region.guest_phys_addr,
@@ -1550,6 +1946,10 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
region->host_mem);
fprintf(stream, "%*sunused_phy_pages: ", indent + 2, "");
sparsebit_dump(stream, region->unused_phy_pages, 0);
+ if (region->protected_phy_pages) {
+ fprintf(stream, "%*sprotected_phy_pages: ", indent + 2, "");
+ sparsebit_dump(stream, region->protected_phy_pages, 0);
+ }
}
fprintf(stream, "%*sMapped Virtual Pages:\n", indent, "");
sparsebit_dump(stream, vm->vpages_mapped, indent + 2);
@@ -1561,37 +1961,58 @@ void vm_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump(stream, vm, indent + 4);
}
fprintf(stream, "%*sVCPUs:\n", indent, "");
+
list_for_each_entry(vcpu, &vm->vcpus, list)
- vcpu_dump(stream, vm, vcpu->id, indent + 2);
+ vcpu_dump(stream, vcpu, indent + 2);
}
+#define KVM_EXIT_STRING(x) {KVM_EXIT_##x, #x}
+
/* Known KVM exit reasons */
static struct exit_reason {
unsigned int reason;
const char *name;
} exit_reasons_known[] = {
- {KVM_EXIT_UNKNOWN, "UNKNOWN"},
- {KVM_EXIT_EXCEPTION, "EXCEPTION"},
- {KVM_EXIT_IO, "IO"},
- {KVM_EXIT_HYPERCALL, "HYPERCALL"},
- {KVM_EXIT_DEBUG, "DEBUG"},
- {KVM_EXIT_HLT, "HLT"},
- {KVM_EXIT_MMIO, "MMIO"},
- {KVM_EXIT_IRQ_WINDOW_OPEN, "IRQ_WINDOW_OPEN"},
- {KVM_EXIT_SHUTDOWN, "SHUTDOWN"},
- {KVM_EXIT_FAIL_ENTRY, "FAIL_ENTRY"},
- {KVM_EXIT_INTR, "INTR"},
- {KVM_EXIT_SET_TPR, "SET_TPR"},
- {KVM_EXIT_TPR_ACCESS, "TPR_ACCESS"},
- {KVM_EXIT_S390_SIEIC, "S390_SIEIC"},
- {KVM_EXIT_S390_RESET, "S390_RESET"},
- {KVM_EXIT_DCR, "DCR"},
- {KVM_EXIT_NMI, "NMI"},
- {KVM_EXIT_INTERNAL_ERROR, "INTERNAL_ERROR"},
- {KVM_EXIT_OSI, "OSI"},
- {KVM_EXIT_PAPR_HCALL, "PAPR_HCALL"},
+ KVM_EXIT_STRING(UNKNOWN),
+ KVM_EXIT_STRING(EXCEPTION),
+ KVM_EXIT_STRING(IO),
+ KVM_EXIT_STRING(HYPERCALL),
+ KVM_EXIT_STRING(DEBUG),
+ KVM_EXIT_STRING(HLT),
+ KVM_EXIT_STRING(MMIO),
+ KVM_EXIT_STRING(IRQ_WINDOW_OPEN),
+ KVM_EXIT_STRING(SHUTDOWN),
+ KVM_EXIT_STRING(FAIL_ENTRY),
+ KVM_EXIT_STRING(INTR),
+ KVM_EXIT_STRING(SET_TPR),
+ KVM_EXIT_STRING(TPR_ACCESS),
+ KVM_EXIT_STRING(S390_SIEIC),
+ KVM_EXIT_STRING(S390_RESET),
+ KVM_EXIT_STRING(DCR),
+ KVM_EXIT_STRING(NMI),
+ KVM_EXIT_STRING(INTERNAL_ERROR),
+ KVM_EXIT_STRING(OSI),
+ KVM_EXIT_STRING(PAPR_HCALL),
+ KVM_EXIT_STRING(S390_UCONTROL),
+ KVM_EXIT_STRING(WATCHDOG),
+ KVM_EXIT_STRING(S390_TSCH),
+ KVM_EXIT_STRING(EPR),
+ KVM_EXIT_STRING(SYSTEM_EVENT),
+ KVM_EXIT_STRING(S390_STSI),
+ KVM_EXIT_STRING(IOAPIC_EOI),
+ KVM_EXIT_STRING(HYPERV),
+ KVM_EXIT_STRING(ARM_NISV),
+ KVM_EXIT_STRING(X86_RDMSR),
+ KVM_EXIT_STRING(X86_WRMSR),
+ KVM_EXIT_STRING(DIRTY_RING_FULL),
+ KVM_EXIT_STRING(AP_RESET_HOLD),
+ KVM_EXIT_STRING(X86_BUS_LOCK),
+ KVM_EXIT_STRING(XEN),
+ KVM_EXIT_STRING(RISCV_SBI),
+ KVM_EXIT_STRING(RISCV_CSR),
+ KVM_EXIT_STRING(NOTIFY),
#ifdef KVM_EXIT_MEMORY_NOT_PRESENT
- {KVM_EXIT_MEMORY_NOT_PRESENT, "MEMORY_NOT_PRESENT"},
+ KVM_EXIT_STRING(MEMORY_NOT_PRESENT),
#endif
};
@@ -1630,6 +2051,7 @@ const char *exit_reason_str(unsigned int exit_reason)
* num - number of pages
* paddr_min - Physical address minimum
* memslot - Memory region to allocate page from
+ * protected - True if the pages will be used as protected/private memory
*
* Output Args: None
*
@@ -1641,8 +2063,9 @@ const char *exit_reason_str(unsigned int exit_reason)
* and their base address is returned. A TEST_ASSERT failure occurs if
* not enough pages are available at or above paddr_min.
*/
-vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
- vm_paddr_t paddr_min, uint32_t memslot)
+vm_paddr_t __vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
+ vm_paddr_t paddr_min, uint32_t memslot,
+ bool protected)
{
struct userspace_mem_region *region;
sparsebit_idx_t pg, base;
@@ -1655,8 +2078,10 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
paddr_min, vm->page_size);
region = memslot2region(vm, memslot);
- base = pg = paddr_min >> vm->page_shift;
+ TEST_ASSERT(!protected || region->protected_phy_pages,
+ "Region doesn't support protected memory");
+ base = pg = paddr_min >> vm->page_shift;
do {
for (; pg < base + num; ++pg) {
if (!sparsebit_is_set(region->unused_phy_pages, pg)) {
@@ -1675,8 +2100,11 @@ vm_paddr_t vm_phy_pages_alloc(struct kvm_vm *vm, size_t num,
abort();
}
- for (pg = base; pg < base + num; ++pg)
+ for (pg = base; pg < base + num; ++pg) {
sparsebit_clear(region->unused_phy_pages, pg);
+ if (protected)
+ sparsebit_set(region->protected_phy_pages, pg);
+ }
return base * vm->page_size;
}
@@ -1687,6 +2115,12 @@ vm_paddr_t vm_phy_page_alloc(struct kvm_vm *vm, vm_paddr_t paddr_min,
return vm_phy_pages_alloc(vm, 1, paddr_min, memslot);
}
+vm_paddr_t vm_alloc_page_table(struct kvm_vm *vm)
+{
+ return vm_phy_page_alloc(vm, KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+}
+
/*
* Address Guest Virtual to Host Virtual
*
@@ -1704,60 +2138,9 @@ void *addr_gva2hva(struct kvm_vm *vm, vm_vaddr_t gva)
return addr_gpa2hva(vm, addr_gva2gpa(vm, gva));
}
-/*
- * Is Unrestricted Guest
- *
- * Input Args:
- * vm - Virtual Machine
- *
- * Output Args: None
- *
- * Return: True if the unrestricted guest is set to 'Y', otherwise return false.
- *
- * Check if the unrestricted guest flag is enabled.
- */
-bool vm_is_unrestricted_guest(struct kvm_vm *vm)
-{
- char val = 'N';
- size_t count;
- FILE *f;
-
- if (vm == NULL) {
- /* Ensure that the KVM vendor-specific module is loaded. */
- f = fopen(KVM_DEV_PATH, "r");
- TEST_ASSERT(f != NULL, "Error in opening KVM dev file: %d",
- errno);
- fclose(f);
- }
-
- f = fopen("/sys/module/kvm_intel/parameters/unrestricted_guest", "r");
- if (f) {
- count = fread(&val, sizeof(char), 1, f);
- TEST_ASSERT(count == 1, "Unable to read from param file.");
- fclose(f);
- }
-
- return val == 'Y';
-}
-
-unsigned int vm_get_page_size(struct kvm_vm *vm)
+unsigned long __weak vm_compute_max_gfn(struct kvm_vm *vm)
{
- return vm->page_size;
-}
-
-unsigned int vm_get_page_shift(struct kvm_vm *vm)
-{
- return vm->page_shift;
-}
-
-unsigned int vm_get_max_gfn(struct kvm_vm *vm)
-{
- return vm->max_gfn;
-}
-
-int vm_get_fd(struct kvm_vm *vm)
-{
- return vm->fd;
+ return ((1ULL << vm->pa_bits) >> vm->page_shift) - 1;
}
static unsigned int vm_calc_num_pages(unsigned int num_pages,
@@ -1799,3 +2182,144 @@ unsigned int vm_calc_num_guest_pages(enum vm_guest_mode mode, size_t size)
n = DIV_ROUND_UP(size, vm_guest_mode_params[mode].page_size);
return vm_adjust_num_guest_pages(mode, n);
}
+
+/*
+ * Read binary stats descriptors
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ *
+ * Output Args: None
+ *
+ * Return:
+ * A pointer to a newly allocated series of stat descriptors.
+ * Caller is responsible for freeing the returned kvm_stats_desc.
+ *
+ * Read the stats descriptors from the binary stats interface.
+ */
+struct kvm_stats_desc *read_stats_descriptors(int stats_fd,
+ struct kvm_stats_header *header)
+{
+ struct kvm_stats_desc *stats_desc;
+ ssize_t desc_size, total_size, ret;
+
+ desc_size = get_stats_descriptor_size(header);
+ total_size = header->num_desc * desc_size;
+
+ stats_desc = calloc(header->num_desc, desc_size);
+ TEST_ASSERT(stats_desc, "Allocate memory for stats descriptors");
+
+ ret = pread(stats_fd, stats_desc, total_size, header->desc_offset);
+ TEST_ASSERT(ret == total_size, "Read KVM stats descriptors");
+
+ return stats_desc;
+}
+
+/*
+ * Read stat data for a particular stat
+ *
+ * Input Args:
+ * stats_fd - the file descriptor for the binary stats file from which to read
+ * header - the binary stats metadata header corresponding to the given FD
+ * desc - the binary stat metadata for the particular stat to be read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void read_stat_data(int stats_fd, struct kvm_stats_header *header,
+ struct kvm_stats_desc *desc, uint64_t *data,
+ size_t max_elements)
+{
+ size_t nr_elements = min_t(ssize_t, desc->size, max_elements);
+ size_t size = nr_elements * sizeof(*data);
+ ssize_t ret;
+
+ TEST_ASSERT(desc->size, "No elements in stat '%s'", desc->name);
+ TEST_ASSERT(max_elements, "Zero elements requested for stat '%s'", desc->name);
+
+ ret = pread(stats_fd, data, size,
+ header->data_offset + desc->offset);
+
+ TEST_ASSERT(ret >= 0, "pread() failed on stat '%s', errno: %i (%s)",
+ desc->name, errno, strerror(errno));
+ TEST_ASSERT(ret == size,
+ "pread() on stat '%s' read %ld bytes, wanted %lu bytes",
+ desc->name, size, ret);
+}
+
+/*
+ * Read the data of the named stat
+ *
+ * Input Args:
+ * vm - the VM for which the stat should be read
+ * stat_name - the name of the stat to read
+ * max_elements - the maximum number of 8-byte values to read into data
+ *
+ * Output Args:
+ * data - the buffer into which stat data should be read
+ *
+ * Read the data values of a specified stat from the binary stats interface.
+ */
+void __vm_get_stat(struct kvm_vm *vm, const char *stat_name, uint64_t *data,
+ size_t max_elements)
+{
+ struct kvm_stats_desc *desc;
+ size_t size_desc;
+ int i;
+
+ if (!vm->stats_fd) {
+ vm->stats_fd = vm_get_stats_fd(vm);
+ read_stats_header(vm->stats_fd, &vm->stats_header);
+ vm->stats_desc = read_stats_descriptors(vm->stats_fd,
+ &vm->stats_header);
+ }
+
+ size_desc = get_stats_descriptor_size(&vm->stats_header);
+
+ for (i = 0; i < vm->stats_header.num_desc; ++i) {
+ desc = (void *)vm->stats_desc + (i * size_desc);
+
+ if (strcmp(desc->name, stat_name))
+ continue;
+
+ read_stat_data(vm->stats_fd, &vm->stats_header, desc,
+ data, max_elements);
+
+ break;
+ }
+}
+
+__weak void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+}
+
+__weak void kvm_selftest_arch_init(void)
+{
+}
+
+void __attribute((constructor)) kvm_selftest_init(void)
+{
+ /* Tell stdout not to buffer its content. */
+ setbuf(stdout, NULL);
+
+ kvm_selftest_arch_init();
+}
+
+bool vm_is_gpa_protected(struct kvm_vm *vm, vm_paddr_t paddr)
+{
+ sparsebit_idx_t pg = 0;
+ struct userspace_mem_region *region;
+
+ if (!vm_arch_has_protected_memory(vm))
+ return false;
+
+ region = userspace_mem_region_find(vm, paddr, paddr);
+ TEST_ASSERT(region, "No vm physical memory at 0x%lx", paddr);
+
+ pg = paddr >> vm->page_shift;
+ return sparsebit_is_set(region->protected_phy_pages, pg);
+}
diff --git a/tools/testing/selftests/kvm/lib/kvm_util_internal.h b/tools/testing/selftests/kvm/lib/kvm_util_internal.h
deleted file mode 100644
index 2ef446520748..000000000000
--- a/tools/testing/selftests/kvm/lib/kvm_util_internal.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * tools/testing/selftests/kvm/lib/kvm_util_internal.h
- *
- * Copyright (C) 2018, Google LLC.
- */
-
-#ifndef SELFTEST_KVM_UTIL_INTERNAL_H
-#define SELFTEST_KVM_UTIL_INTERNAL_H
-
-#include "sparsebit.h"
-
-#define KVM_DEV_PATH "/dev/kvm"
-
-struct userspace_mem_region {
- struct kvm_userspace_memory_region region;
- struct sparsebit *unused_phy_pages;
- int fd;
- off_t offset;
- void *host_mem;
- void *mmap_start;
- size_t mmap_size;
- struct list_head list;
-};
-
-struct vcpu {
- struct list_head list;
- uint32_t id;
- int fd;
- struct kvm_run *state;
-};
-
-struct kvm_vm {
- int mode;
- unsigned long type;
- int kvm_fd;
- int fd;
- unsigned int pgtable_levels;
- unsigned int page_size;
- unsigned int page_shift;
- unsigned int pa_bits;
- unsigned int va_bits;
- uint64_t max_gfn;
- struct list_head vcpus;
- struct list_head userspace_mem_regions;
- struct sparsebit *vpages_valid;
- struct sparsebit *vpages_mapped;
- bool has_irqchip;
- bool pgd_created;
- vm_paddr_t pgd;
- vm_vaddr_t gdt;
- vm_vaddr_t tss;
-};
-
-struct vcpu *vcpu_find(struct kvm_vm *vm, uint32_t vcpuid);
-
-/*
- * Virtual Translation Tables Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * vm - Virtual Machine
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps to the FILE stream given by @stream, the contents of all the
- * virtual translation tables for the VM given by @vm.
- */
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent);
-
-/*
- * Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * regs - Registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the registers given by @regs, to the FILE stream
- * given by @stream.
- */
-void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent);
-
-/*
- * System Register Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * sregs - System registers
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the system registers given by @sregs, to the FILE stream
- * given by @stream.
- */
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent);
-
-struct userspace_mem_region *
-memslot2region(struct kvm_vm *vm, uint32_t memslot);
-
-#endif /* SELFTEST_KVM_UTIL_INTERNAL_H */
diff --git a/tools/testing/selftests/kvm/lib/memstress.c b/tools/testing/selftests/kvm/lib/memstress.c
new file mode 100644
index 000000000000..cf2c73971308
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/memstress.c
@@ -0,0 +1,398 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ */
+#define _GNU_SOURCE
+
+#include <inttypes.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+
+struct memstress_args memstress_args;
+
+/*
+ * Guest virtual memory offset of the testing memory slot.
+ * Must not conflict with identity mapped test code.
+ */
+static uint64_t guest_test_virt_mem = DEFAULT_GUEST_TEST_MEM;
+
+struct vcpu_thread {
+ /* The index of the vCPU. */
+ int vcpu_idx;
+
+ /* The pthread backing the vCPU. */
+ pthread_t thread;
+
+ /* Set to true once the vCPU thread is up and running. */
+ bool running;
+};
+
+/* The vCPU threads involved in this test. */
+static struct vcpu_thread vcpu_threads[KVM_MAX_VCPUS];
+
+/* The function run by each vCPU thread, as provided by the test. */
+static void (*vcpu_thread_fn)(struct memstress_vcpu_args *);
+
+/* Set to true once all vCPU threads are up and running. */
+static bool all_vcpu_threads_running;
+
+static struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+
+/*
+ * Continuously write to the first 8 bytes of each page in the
+ * specified region.
+ */
+void memstress_guest_code(uint32_t vcpu_idx)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args = &args->vcpu_args[vcpu_idx];
+ struct guest_random_state rand_state;
+ uint64_t gva;
+ uint64_t pages;
+ uint64_t addr;
+ uint64_t page;
+ int i;
+
+ rand_state = new_guest_random_state(args->random_seed + vcpu_idx);
+
+ gva = vcpu_args->gva;
+ pages = vcpu_args->pages;
+
+ /* Make sure vCPU args data structure is not corrupt. */
+ GUEST_ASSERT(vcpu_args->vcpu_idx == vcpu_idx);
+
+ while (true) {
+ for (i = 0; i < sizeof(memstress_args); i += args->guest_page_size)
+ (void) *((volatile char *)args + i);
+
+ for (i = 0; i < pages; i++) {
+ if (args->random_access)
+ page = guest_random_u32(&rand_state) % pages;
+ else
+ page = i;
+
+ addr = gva + (page * args->guest_page_size);
+
+ if (guest_random_u32(&rand_state) % 100 < args->write_percent)
+ *(uint64_t *)addr = 0x0123456789ABCDEF;
+ else
+ READ_ONCE(*(uint64_t *)addr);
+ }
+
+ GUEST_SYNC(1);
+ }
+}
+
+void memstress_setup_vcpus(struct kvm_vm *vm, int nr_vcpus,
+ struct kvm_vcpu *vcpus[],
+ uint64_t vcpu_memory_bytes,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct memstress_vcpu_args *vcpu_args;
+ int i;
+
+ for (i = 0; i < nr_vcpus; i++) {
+ vcpu_args = &args->vcpu_args[i];
+
+ vcpu_args->vcpu = vcpus[i];
+ vcpu_args->vcpu_idx = i;
+
+ if (partition_vcpu_memory_access) {
+ vcpu_args->gva = guest_test_virt_mem +
+ (i * vcpu_memory_bytes);
+ vcpu_args->pages = vcpu_memory_bytes /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa + (i * vcpu_memory_bytes);
+ } else {
+ vcpu_args->gva = guest_test_virt_mem;
+ vcpu_args->pages = (nr_vcpus * vcpu_memory_bytes) /
+ args->guest_page_size;
+ vcpu_args->gpa = args->gpa;
+ }
+
+ vcpu_args_set(vcpus[i], 1, i);
+
+ pr_debug("Added VCPU %d with test mem gpa [%lx, %lx)\n",
+ i, vcpu_args->gpa, vcpu_args->gpa +
+ (vcpu_args->pages * args->guest_page_size));
+ }
+}
+
+struct kvm_vm *memstress_create_vm(enum vm_guest_mode mode, int nr_vcpus,
+ uint64_t vcpu_memory_bytes, int slots,
+ enum vm_mem_backing_src_type backing_src,
+ bool partition_vcpu_memory_access)
+{
+ struct memstress_args *args = &memstress_args;
+ struct kvm_vm *vm;
+ uint64_t guest_num_pages, slot0_pages = 0;
+ uint64_t backing_src_pagesz = get_backing_src_pagesz(backing_src);
+ uint64_t region_end_gfn;
+ int i;
+
+ pr_info("Testing guest mode: %s\n", vm_guest_mode_string(mode));
+
+ /* By default vCPUs will write to memory. */
+ args->write_percent = 100;
+
+ /*
+ * Snapshot the non-huge page size. This is used by the guest code to
+ * access/dirty pages at the logging granularity.
+ */
+ args->guest_page_size = vm_guest_mode_params[mode].page_size;
+
+ guest_num_pages = vm_adjust_num_guest_pages(mode,
+ (nr_vcpus * vcpu_memory_bytes) / args->guest_page_size);
+
+ TEST_ASSERT(vcpu_memory_bytes % getpagesize() == 0,
+ "Guest memory size is not host page size aligned.");
+ TEST_ASSERT(vcpu_memory_bytes % args->guest_page_size == 0,
+ "Guest memory size is not guest page size aligned.");
+ TEST_ASSERT(guest_num_pages % slots == 0,
+ "Guest memory cannot be evenly divided into %d slots.",
+ slots);
+
+ /*
+ * If using nested, allocate extra pages for the nested page tables and
+ * in-memory data structures.
+ */
+ if (args->nested)
+ slot0_pages += memstress_nested_pages(nr_vcpus);
+
+ /*
+ * Pass guest_num_pages to populate the page tables for test memory.
+ * The memory is also added to memslot 0, but that's a benign side
+ * effect as KVM allows aliasing HVAs in meslots.
+ */
+ vm = __vm_create_with_vcpus(VM_SHAPE(mode), nr_vcpus,
+ slot0_pages + guest_num_pages,
+ memstress_guest_code, vcpus);
+
+ args->vm = vm;
+
+ /* Put the test region at the top guest physical memory. */
+ region_end_gfn = vm->max_gfn + 1;
+
+#ifdef __x86_64__
+ /*
+ * When running vCPUs in L2, restrict the test region to 48 bits to
+ * avoid needing 5-level page tables to identity map L2.
+ */
+ if (args->nested)
+ region_end_gfn = min(region_end_gfn, (1UL << 48) / args->guest_page_size);
+#endif
+ /*
+ * If there should be more memory in the guest test region than there
+ * can be pages in the guest, it will definitely cause problems.
+ */
+ TEST_ASSERT(guest_num_pages < region_end_gfn,
+ "Requested more guest memory than address space allows.\n"
+ " guest pages: %" PRIx64 " max gfn: %" PRIx64
+ " nr_vcpus: %d wss: %" PRIx64 "]",
+ guest_num_pages, region_end_gfn - 1, nr_vcpus, vcpu_memory_bytes);
+
+ args->gpa = (region_end_gfn - guest_num_pages - 1) * args->guest_page_size;
+ args->gpa = align_down(args->gpa, backing_src_pagesz);
+#ifdef __s390x__
+ /* Align to 1M (segment size) */
+ args->gpa = align_down(args->gpa, 1 << 20);
+#endif
+ args->size = guest_num_pages * args->guest_page_size;
+ pr_info("guest physical test memory: [0x%lx, 0x%lx)\n",
+ args->gpa, args->gpa + args->size);
+
+ /* Add extra memory slots for testing */
+ for (i = 0; i < slots; i++) {
+ uint64_t region_pages = guest_num_pages / slots;
+ vm_paddr_t region_start = args->gpa + region_pages * args->guest_page_size * i;
+
+ vm_userspace_mem_region_add(vm, backing_src, region_start,
+ MEMSTRESS_MEM_SLOT_INDEX + i,
+ region_pages, 0);
+ }
+
+ /* Do mapping for the demand paging memory slot */
+ virt_map(vm, guest_test_virt_mem, args->gpa, guest_num_pages);
+
+ memstress_setup_vcpus(vm, nr_vcpus, vcpus, vcpu_memory_bytes,
+ partition_vcpu_memory_access);
+
+ if (args->nested) {
+ pr_info("Configuring vCPUs to run in L2 (nested).\n");
+ memstress_setup_nested(vm, nr_vcpus, vcpus);
+ }
+
+ /* Export the shared variables to the guest. */
+ sync_global_to_guest(vm, memstress_args);
+
+ return vm;
+}
+
+void memstress_destroy_vm(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
+
+void memstress_set_write_percent(struct kvm_vm *vm, uint32_t write_percent)
+{
+ memstress_args.write_percent = write_percent;
+ sync_global_to_guest(vm, memstress_args.write_percent);
+}
+
+void memstress_set_random_seed(struct kvm_vm *vm, uint32_t random_seed)
+{
+ memstress_args.random_seed = random_seed;
+ sync_global_to_guest(vm, memstress_args.random_seed);
+}
+
+void memstress_set_random_access(struct kvm_vm *vm, bool random_access)
+{
+ memstress_args.random_access = random_access;
+ sync_global_to_guest(vm, memstress_args.random_access);
+}
+
+uint64_t __weak memstress_nested_pages(int nr_vcpus)
+{
+ return 0;
+}
+
+void __weak memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu **vcpus)
+{
+ pr_info("%s() not support on this architecture, skipping.\n", __func__);
+ exit(KSFT_SKIP);
+}
+
+static void *vcpu_thread_main(void *data)
+{
+ struct vcpu_thread *vcpu = data;
+ int vcpu_idx = vcpu->vcpu_idx;
+
+ if (memstress_args.pin_vcpus)
+ kvm_pin_this_task_to_pcpu(memstress_args.vcpu_to_pcpu[vcpu_idx]);
+
+ WRITE_ONCE(vcpu->running, true);
+
+ /*
+ * Wait for all vCPU threads to be up and running before calling the test-
+ * provided vCPU thread function. This prevents thread creation (which
+ * requires taking the mmap_sem in write mode) from interfering with the
+ * guest faulting in its memory.
+ */
+ while (!READ_ONCE(all_vcpu_threads_running))
+ ;
+
+ vcpu_thread_fn(&memstress_args.vcpu_args[vcpu_idx]);
+
+ return NULL;
+}
+
+void memstress_start_vcpu_threads(int nr_vcpus,
+ void (*vcpu_fn)(struct memstress_vcpu_args *))
+{
+ int i;
+
+ vcpu_thread_fn = vcpu_fn;
+ WRITE_ONCE(all_vcpu_threads_running, false);
+ WRITE_ONCE(memstress_args.stop_vcpus, false);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ struct vcpu_thread *vcpu = &vcpu_threads[i];
+
+ vcpu->vcpu_idx = i;
+ WRITE_ONCE(vcpu->running, false);
+
+ pthread_create(&vcpu->thread, NULL, vcpu_thread_main, vcpu);
+ }
+
+ for (i = 0; i < nr_vcpus; i++) {
+ while (!READ_ONCE(vcpu_threads[i].running))
+ ;
+ }
+
+ WRITE_ONCE(all_vcpu_threads_running, true);
+}
+
+void memstress_join_vcpu_threads(int nr_vcpus)
+{
+ int i;
+
+ WRITE_ONCE(memstress_args.stop_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(vcpu_threads[i].thread, NULL);
+}
+
+static void toggle_dirty_logging(struct kvm_vm *vm, int slots, bool enable)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+ int flags = enable ? KVM_MEM_LOG_DIRTY_PAGES : 0;
+
+ vm_mem_region_set_flags(vm, slot, flags);
+ }
+}
+
+void memstress_enable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, true);
+}
+
+void memstress_disable_dirty_logging(struct kvm_vm *vm, int slots)
+{
+ toggle_dirty_logging(vm, slots, false);
+}
+
+void memstress_get_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_get_dirty_log(vm, slot, bitmaps[i]);
+ }
+}
+
+void memstress_clear_dirty_log(struct kvm_vm *vm, unsigned long *bitmaps[],
+ int slots, uint64_t pages_per_slot)
+{
+ int i;
+
+ for (i = 0; i < slots; i++) {
+ int slot = MEMSTRESS_MEM_SLOT_INDEX + i;
+
+ kvm_vm_clear_dirty_log(vm, slot, bitmaps[i], 0, pages_per_slot);
+ }
+}
+
+unsigned long **memstress_alloc_bitmaps(int slots, uint64_t pages_per_slot)
+{
+ unsigned long **bitmaps;
+ int i;
+
+ bitmaps = malloc(slots * sizeof(bitmaps[0]));
+ TEST_ASSERT(bitmaps, "Failed to allocate bitmaps array.");
+
+ for (i = 0; i < slots; i++) {
+ bitmaps[i] = bitmap_zalloc(pages_per_slot);
+ TEST_ASSERT(bitmaps[i], "Failed to allocate slot bitmap.");
+ }
+
+ return bitmaps;
+}
+
+void memstress_free_bitmaps(unsigned long *bitmaps[], int slots)
+{
+ int i;
+
+ for (i = 0; i < slots; i++)
+ free(bitmaps[i]);
+
+ free(bitmaps);
+}
diff --git a/tools/testing/selftests/kvm/lib/rbtree.c b/tools/testing/selftests/kvm/lib/rbtree.c
new file mode 100644
index 000000000000..a703f0194ea3
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/rbtree.c
@@ -0,0 +1 @@
+#include "../../../../lib/rbtree.c"
diff --git a/tools/testing/selftests/kvm/lib/riscv/handlers.S b/tools/testing/selftests/kvm/lib/riscv/handlers.S
new file mode 100644
index 000000000000..aa0abd3f35bb
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/handlers.S
@@ -0,0 +1,101 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2023 Intel Corporation
+ */
+
+#ifndef __ASSEMBLY__
+#define __ASSEMBLY__
+#endif
+
+#include <asm/csr.h>
+
+.macro save_context
+ addi sp, sp, (-8*34)
+ sd x1, 0(sp)
+ sd x2, 8(sp)
+ sd x3, 16(sp)
+ sd x4, 24(sp)
+ sd x5, 32(sp)
+ sd x6, 40(sp)
+ sd x7, 48(sp)
+ sd x8, 56(sp)
+ sd x9, 64(sp)
+ sd x10, 72(sp)
+ sd x11, 80(sp)
+ sd x12, 88(sp)
+ sd x13, 96(sp)
+ sd x14, 104(sp)
+ sd x15, 112(sp)
+ sd x16, 120(sp)
+ sd x17, 128(sp)
+ sd x18, 136(sp)
+ sd x19, 144(sp)
+ sd x20, 152(sp)
+ sd x21, 160(sp)
+ sd x22, 168(sp)
+ sd x23, 176(sp)
+ sd x24, 184(sp)
+ sd x25, 192(sp)
+ sd x26, 200(sp)
+ sd x27, 208(sp)
+ sd x28, 216(sp)
+ sd x29, 224(sp)
+ sd x30, 232(sp)
+ sd x31, 240(sp)
+ csrr s0, CSR_SEPC
+ csrr s1, CSR_SSTATUS
+ csrr s2, CSR_SCAUSE
+ sd s0, 248(sp)
+ sd s1, 256(sp)
+ sd s2, 264(sp)
+.endm
+
+.macro restore_context
+ ld s2, 264(sp)
+ ld s1, 256(sp)
+ ld s0, 248(sp)
+ csrw CSR_SCAUSE, s2
+ csrw CSR_SSTATUS, s1
+ csrw CSR_SEPC, s0
+ ld x31, 240(sp)
+ ld x30, 232(sp)
+ ld x29, 224(sp)
+ ld x28, 216(sp)
+ ld x27, 208(sp)
+ ld x26, 200(sp)
+ ld x25, 192(sp)
+ ld x24, 184(sp)
+ ld x23, 176(sp)
+ ld x22, 168(sp)
+ ld x21, 160(sp)
+ ld x20, 152(sp)
+ ld x19, 144(sp)
+ ld x18, 136(sp)
+ ld x17, 128(sp)
+ ld x16, 120(sp)
+ ld x15, 112(sp)
+ ld x14, 104(sp)
+ ld x13, 96(sp)
+ ld x12, 88(sp)
+ ld x11, 80(sp)
+ ld x10, 72(sp)
+ ld x9, 64(sp)
+ ld x8, 56(sp)
+ ld x7, 48(sp)
+ ld x6, 40(sp)
+ ld x5, 32(sp)
+ ld x4, 24(sp)
+ ld x3, 16(sp)
+ ld x2, 8(sp)
+ ld x1, 0(sp)
+ addi sp, sp, (8*34)
+.endm
+
+.balign 4
+.global exception_vectors
+exception_vectors:
+ save_context
+ move a0, sp
+ call route_exception
+ restore_context
+ sret
diff --git a/tools/testing/selftests/kvm/lib/riscv/processor.c b/tools/testing/selftests/kvm/lib/riscv/processor.c
new file mode 100644
index 000000000000..e8211f5d6863
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/processor.c
@@ -0,0 +1,504 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * RISC-V code
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/compiler.h>
+#include <assert.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define DEFAULT_RISCV_GUEST_STACK_VADDR_MIN 0xac0000
+
+static vm_vaddr_t exception_handlers;
+
+bool __vcpu_has_ext(struct kvm_vcpu *vcpu, uint64_t ext)
+{
+ unsigned long value = 0;
+ int ret;
+
+ ret = __vcpu_get_reg(vcpu, ext, &value);
+
+ return !ret && !!value;
+}
+
+static uint64_t page_align(struct kvm_vm *vm, uint64_t v)
+{
+ return (v + vm->page_size) & ~(vm->page_size - 1);
+}
+
+static uint64_t pte_addr(struct kvm_vm *vm, uint64_t entry)
+{
+ return ((entry & PGTBL_PTE_ADDR_MASK) >> PGTBL_PTE_ADDR_SHIFT) <<
+ PGTBL_PAGE_SIZE_SHIFT;
+}
+
+static uint64_t ptrs_per_pte(struct kvm_vm *vm)
+{
+ return PGTBL_PAGE_SIZE / sizeof(uint64_t);
+}
+
+static uint64_t pte_index_mask[] = {
+ PGTBL_L0_INDEX_MASK,
+ PGTBL_L1_INDEX_MASK,
+ PGTBL_L2_INDEX_MASK,
+ PGTBL_L3_INDEX_MASK,
+};
+
+static uint32_t pte_index_shift[] = {
+ PGTBL_L0_INDEX_SHIFT,
+ PGTBL_L1_INDEX_SHIFT,
+ PGTBL_L2_INDEX_SHIFT,
+ PGTBL_L3_INDEX_SHIFT,
+};
+
+static uint64_t pte_index(struct kvm_vm *vm, vm_vaddr_t gva, int level)
+{
+ TEST_ASSERT(level > -1,
+ "Negative page table level (%d) not possible", level);
+ TEST_ASSERT(level < vm->pgtable_levels,
+ "Invalid page table level (%d)", level);
+
+ return (gva & pte_index_mask[level]) >> pte_index_shift[level];
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
+{
+ size_t nr_pages = page_align(vm, ptrs_per_pte(vm) * 8) / vm->page_size;
+
+ if (vm->pgd_created)
+ return;
+
+ vm->pgd = vm_phy_pages_alloc(vm, nr_pages,
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
+ vm->pgd_created = true;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ uint64_t *ptep, next_ppn;
+ int level = vm->pgtable_levels - 1;
+
+ TEST_ASSERT((vaddr % vm->page_size) == 0,
+ "Virtual address not on page boundary,\n"
+ " vaddr: 0x%lx vm->page_size: 0x%x", vaddr, vm->page_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
+ (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % vm->page_size) == 0,
+ "Physical address not on page boundary,\n"
+ " paddr: 0x%lx vm->page_size: 0x%x", paddr, vm->page_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, vaddr, level) * 8;
+ if (!*ptep) {
+ next_ppn = vm_alloc_page_table(vm) >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, vaddr, level) * 8;
+ if (!*ptep && level > 0) {
+ next_ppn = vm_alloc_page_table(vm) >>
+ PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (next_ppn << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_VALID_MASK;
+ }
+ level--;
+ }
+
+ paddr = paddr >> PGTBL_PAGE_SIZE_SHIFT;
+ *ptep = (paddr << PGTBL_PTE_ADDR_SHIFT) |
+ PGTBL_PTE_PERM_MASK | PGTBL_PTE_VALID_MASK;
+}
+
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+{
+ uint64_t *ptep;
+ int level = vm->pgtable_levels - 1;
+
+ if (!vm->pgd_created)
+ goto unmapped_gva;
+
+ ptep = addr_gpa2hva(vm, vm->pgd) + pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+
+ while (level > -1) {
+ ptep = addr_gpa2hva(vm, pte_addr(vm, *ptep)) +
+ pte_index(vm, gva, level) * 8;
+ if (!ptep)
+ goto unmapped_gva;
+ level--;
+ }
+
+ return pte_addr(vm, *ptep) + (gva & (vm->page_size - 1));
+
+unmapped_gva:
+ TEST_FAIL("No mapping for vm virtual address gva: 0x%lx level: %d",
+ gva, level);
+ exit(1);
+}
+
+static void pte_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent,
+ uint64_t page, int level)
+{
+#ifdef DEBUG
+ static const char *const type[] = { "pte", "pmd", "pud", "p4d"};
+ uint64_t pte, *ptep;
+
+ if (level < 0)
+ return;
+
+ for (pte = page; pte < page + ptrs_per_pte(vm) * 8; pte += 8) {
+ ptep = addr_gpa2hva(vm, pte);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*s%s: %lx: %lx at %p\n", indent, "",
+ type[level], pte, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+#endif
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ int level = vm->pgtable_levels - 1;
+ uint64_t pgd, *ptep;
+
+ if (!vm->pgd_created)
+ return;
+
+ for (pgd = vm->pgd; pgd < vm->pgd + ptrs_per_pte(vm) * 8; pgd += 8) {
+ ptep = addr_gpa2hva(vm, pgd);
+ if (!*ptep)
+ continue;
+ fprintf(stream, "%*spgd: %lx: %lx at %p\n", indent, "",
+ pgd, *ptep, ptep);
+ pte_dump(stream, vm, indent + 1,
+ pte_addr(vm, *ptep), level - 1);
+ }
+}
+
+void riscv_vcpu_mmu_setup(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ unsigned long satp;
+
+ /*
+ * The RISC-V Sv48 MMU mode supports 56-bit physical address
+ * for 48-bit virtual address with 4KB last level page size.
+ */
+ switch (vm->mode) {
+ case VM_MODE_P52V48_4K:
+ case VM_MODE_P48V48_4K:
+ case VM_MODE_P40V48_4K:
+ break;
+ default:
+ TEST_FAIL("Unknown guest mode, mode: 0x%x", vm->mode);
+ }
+
+ satp = (vm->pgd >> PGTBL_PAGE_SIZE_SHIFT) & SATP_PPN;
+ satp |= SATP_MODE_48;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(satp), satp);
+}
+
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
+{
+ struct kvm_riscv_core core;
+
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(mode), &core.mode);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.pc), &core.regs.pc);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.ra), &core.regs.ra);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.sp), &core.regs.sp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.gp), &core.regs.gp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.tp), &core.regs.tp);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t0), &core.regs.t0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t1), &core.regs.t1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t2), &core.regs.t2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s0), &core.regs.s0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s1), &core.regs.s1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a0), &core.regs.a0);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a1), &core.regs.a1);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a2), &core.regs.a2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a3), &core.regs.a3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a4), &core.regs.a4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a5), &core.regs.a5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a6), &core.regs.a6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.a7), &core.regs.a7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s2), &core.regs.s2);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s3), &core.regs.s3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s4), &core.regs.s4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s5), &core.regs.s5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s6), &core.regs.s6);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s7), &core.regs.s7);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s8), &core.regs.s8);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s9), &core.regs.s9);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s10), &core.regs.s10);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.s11), &core.regs.s11);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t3), &core.regs.t3);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t4), &core.regs.t4);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t5), &core.regs.t5);
+ vcpu_get_reg(vcpu, RISCV_CORE_REG(regs.t6), &core.regs.t6);
+
+ fprintf(stream,
+ " MODE: 0x%lx\n", core.mode);
+ fprintf(stream,
+ " PC: 0x%016lx RA: 0x%016lx SP: 0x%016lx GP: 0x%016lx\n",
+ core.regs.pc, core.regs.ra, core.regs.sp, core.regs.gp);
+ fprintf(stream,
+ " TP: 0x%016lx T0: 0x%016lx T1: 0x%016lx T2: 0x%016lx\n",
+ core.regs.tp, core.regs.t0, core.regs.t1, core.regs.t2);
+ fprintf(stream,
+ " S0: 0x%016lx S1: 0x%016lx A0: 0x%016lx A1: 0x%016lx\n",
+ core.regs.s0, core.regs.s1, core.regs.a0, core.regs.a1);
+ fprintf(stream,
+ " A2: 0x%016lx A3: 0x%016lx A4: 0x%016lx A5: 0x%016lx\n",
+ core.regs.a2, core.regs.a3, core.regs.a4, core.regs.a5);
+ fprintf(stream,
+ " A6: 0x%016lx A7: 0x%016lx S2: 0x%016lx S3: 0x%016lx\n",
+ core.regs.a6, core.regs.a7, core.regs.s2, core.regs.s3);
+ fprintf(stream,
+ " S4: 0x%016lx S5: 0x%016lx S6: 0x%016lx S7: 0x%016lx\n",
+ core.regs.s4, core.regs.s5, core.regs.s6, core.regs.s7);
+ fprintf(stream,
+ " S8: 0x%016lx S9: 0x%016lx S10: 0x%016lx S11: 0x%016lx\n",
+ core.regs.s8, core.regs.s9, core.regs.s10, core.regs.s11);
+ fprintf(stream,
+ " T3: 0x%016lx T4: 0x%016lx T5: 0x%016lx T6: 0x%016lx\n",
+ core.regs.t3, core.regs.t4, core.regs.t5, core.regs.t6);
+}
+
+static void __aligned(16) guest_unexp_trap(void)
+{
+ sbi_ecall(KVM_RISCV_SELFTESTS_SBI_EXT,
+ KVM_RISCV_SELFTESTS_SBI_UNEXP,
+ 0, 0, 0, 0, 0, 0);
+}
+
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.pc), (unsigned long)guest_code);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ int r;
+ size_t stack_size;
+ unsigned long stack_vaddr;
+ unsigned long current_gp = 0;
+ struct kvm_mp_state mps;
+ struct kvm_vcpu *vcpu;
+
+ stack_size = vm->page_size == 4096 ? DEFAULT_STACK_PGS * vm->page_size :
+ vm->page_size;
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_RISCV_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ riscv_vcpu_mmu_setup(vcpu);
+
+ /*
+ * With SBI HSM support in KVM RISC-V, all secondary VCPUs are
+ * powered-off by default so we ensure that all secondary VCPUs
+ * are powered-on using KVM_SET_MP_STATE ioctl().
+ */
+ mps.mp_state = KVM_MP_STATE_RUNNABLE;
+ r = __vcpu_ioctl(vcpu, KVM_SET_MP_STATE, &mps);
+ TEST_ASSERT(!r, "IOCTL KVM_SET_MP_STATE failed (error %d)", r);
+
+ /* Setup global pointer of guest to be same as the host */
+ asm volatile (
+ "add %0, gp, zero" : "=r" (current_gp) : : "memory");
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.gp), current_gp);
+
+ /* Setup stack pointer and program counter of guest */
+ vcpu_set_reg(vcpu, RISCV_CORE_REG(regs.sp), stack_vaddr + stack_size);
+
+ /* Setup sscratch for guest_get_vcpuid() */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(sscratch), vcpu_id);
+
+ /* Setup default exception vector of guest */
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)guest_unexp_trap);
+
+ return vcpu;
+}
+
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
+{
+ va_list ap;
+ uint64_t id = RISCV_CORE_REG(regs.a0);
+ int i;
+
+ TEST_ASSERT(num >= 1 && num <= 8, "Unsupported number of args,\n"
+ " num: %u", num);
+
+ va_start(ap, num);
+
+ for (i = 0; i < num; i++) {
+ switch (i) {
+ case 0:
+ id = RISCV_CORE_REG(regs.a0);
+ break;
+ case 1:
+ id = RISCV_CORE_REG(regs.a1);
+ break;
+ case 2:
+ id = RISCV_CORE_REG(regs.a2);
+ break;
+ case 3:
+ id = RISCV_CORE_REG(regs.a3);
+ break;
+ case 4:
+ id = RISCV_CORE_REG(regs.a4);
+ break;
+ case 5:
+ id = RISCV_CORE_REG(regs.a5);
+ break;
+ case 6:
+ id = RISCV_CORE_REG(regs.a6);
+ break;
+ case 7:
+ id = RISCV_CORE_REG(regs.a7);
+ break;
+ }
+ vcpu_set_reg(vcpu, id, va_arg(ap, uint64_t));
+ }
+
+ va_end(ap);
+}
+
+void kvm_exit_unexpected_exception(int vector, int ec)
+{
+ ucall(UCALL_UNHANDLED, 2, vector, ec);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED) {
+ TEST_FAIL("Unexpected exception (vector:0x%lx, ec:0x%lx)",
+ uc.args[0], uc.args[1]);
+ }
+}
+
+struct handlers {
+ exception_handler_fn exception_handlers[NR_VECTORS][NR_EXCEPTIONS];
+};
+
+void route_exception(struct ex_regs *regs)
+{
+ struct handlers *handlers = (struct handlers *)exception_handlers;
+ int vector = 0, ec;
+
+ ec = regs->cause & ~CAUSE_IRQ_FLAG;
+ if (ec >= NR_EXCEPTIONS)
+ goto unexpected_exception;
+
+ /* Use the same handler for all the interrupts */
+ if (regs->cause & CAUSE_IRQ_FLAG) {
+ vector = 1;
+ ec = 0;
+ }
+
+ if (handlers && handlers->exception_handlers[vector][ec])
+ return handlers->exception_handlers[vector][ec](regs);
+
+unexpected_exception:
+ return kvm_exit_unexpected_exception(vector, ec);
+}
+
+void vcpu_init_vector_tables(struct kvm_vcpu *vcpu)
+{
+ extern char exception_vectors;
+
+ vcpu_set_reg(vcpu, RISCV_GENERAL_CSR_REG(stvec), (unsigned long)&exception_vectors);
+}
+
+void vm_init_vector_tables(struct kvm_vm *vm)
+{
+ vm->handlers = __vm_vaddr_alloc(vm, sizeof(struct handlers),
+ vm->page_size, MEM_REGION_DATA);
+
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
+}
+
+void vm_install_exception_handler(struct kvm_vm *vm, int vector, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ assert(vector < NR_EXCEPTIONS);
+ handlers->exception_handlers[0][vector] = handler;
+}
+
+void vm_install_interrupt_handler(struct kvm_vm *vm, exception_handler_fn handler)
+{
+ struct handlers *handlers = addr_gva2hva(vm, vm->handlers);
+
+ handlers->exception_handlers[1][0] = handler;
+}
+
+uint32_t guest_get_vcpuid(void)
+{
+ return csr_read(CSR_SSCRATCH);
+}
+
+struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
+ unsigned long arg1, unsigned long arg2,
+ unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ register uintptr_t a0 asm ("a0") = (uintptr_t)(arg0);
+ register uintptr_t a1 asm ("a1") = (uintptr_t)(arg1);
+ register uintptr_t a2 asm ("a2") = (uintptr_t)(arg2);
+ register uintptr_t a3 asm ("a3") = (uintptr_t)(arg3);
+ register uintptr_t a4 asm ("a4") = (uintptr_t)(arg4);
+ register uintptr_t a5 asm ("a5") = (uintptr_t)(arg5);
+ register uintptr_t a6 asm ("a6") = (uintptr_t)(fid);
+ register uintptr_t a7 asm ("a7") = (uintptr_t)(ext);
+ struct sbiret ret;
+
+ asm volatile (
+ "ecall"
+ : "+r" (a0), "+r" (a1)
+ : "r" (a2), "r" (a3), "r" (a4), "r" (a5), "r" (a6), "r" (a7)
+ : "memory");
+ ret.error = a0;
+ ret.value = a1;
+
+ return ret;
+}
+
+bool guest_sbi_probe_extension(int extid, long *out_val)
+{
+ struct sbiret ret;
+
+ ret = sbi_ecall(SBI_EXT_BASE, SBI_EXT_BASE_PROBE_EXT, extid,
+ 0, 0, 0, 0, 0);
+
+ __GUEST_ASSERT(!ret.error || ret.error == SBI_ERR_NOT_SUPPORTED,
+ "ret.error=%ld, ret.value=%ld\n", ret.error, ret.value);
+
+ if (ret.error == SBI_ERR_NOT_SUPPORTED)
+ return false;
+
+ if (out_val)
+ *out_val = ret.value;
+
+ return true;
+}
diff --git a/tools/testing/selftests/kvm/lib/riscv/ucall.c b/tools/testing/selftests/kvm/lib/riscv/ucall.c
new file mode 100644
index 000000000000..14ee17151a59
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/riscv/ucall.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucall support. A ucall is a "hypercall to userspace".
+ *
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
+ */
+
+#include <linux/kvm.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ if (run->exit_reason == KVM_EXIT_RISCV_SBI &&
+ run->riscv_sbi.extension_id == KVM_RISCV_SELFTESTS_SBI_EXT) {
+ switch (run->riscv_sbi.function_id) {
+ case KVM_RISCV_SELFTESTS_SBI_UCALL:
+ return (void *)run->riscv_sbi.args[0];
+ case KVM_RISCV_SELFTESTS_SBI_UNEXP:
+ vcpu_dump(stderr, vcpu, 2);
+ TEST_ASSERT(0, "Unexpected trap taken by guest");
+ break;
+ default:
+ break;
+ }
+ }
+ return NULL;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
new file mode 100644
index 000000000000..2c432fa164f1
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/s390x/diag318_test_handler.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test handler for the s390x DIAGNOSE 0x0318 instruction.
+ *
+ * Copyright (C) 2020, IBM
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+
+static void guest_code(void)
+{
+ uint64_t diag318_info = 0x12345678;
+
+ asm volatile ("diag %0,0,0x318\n" : : "d" (diag318_info));
+}
+
+/*
+ * The DIAGNOSE 0x0318 instruction call must be handled via userspace. As such,
+ * we create an ad-hoc VM here to handle the instruction then extract the
+ * necessary data. It is up to the caller to decide what to do with that data.
+ */
+static uint64_t diag318_handler(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ uint64_t reg;
+ uint64_t diag318_info;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_run(vcpu);
+ run = vcpu->run;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT(run->s390_sieic.icptcode == ICPT_INSTRUCTION,
+ "Unexpected intercept code: 0x%x", run->s390_sieic.icptcode);
+ TEST_ASSERT((run->s390_sieic.ipa & 0xff00) == IPA0_DIAG,
+ "Unexpected IPA0 code: 0x%x", (run->s390_sieic.ipa & 0xff00));
+
+ reg = (run->s390_sieic.ipa & 0x00f0) >> 4;
+ diag318_info = run->s.regs.gprs[reg];
+
+ TEST_ASSERT(diag318_info != 0, "DIAGNOSE 0x0318 info not set");
+
+ kvm_vm_free(vm);
+
+ return diag318_info;
+}
+
+uint64_t get_diag318_info(void)
+{
+ static uint64_t diag318_info;
+ static bool printed_skip;
+
+ /*
+ * If KVM does not support diag318, then return 0 to
+ * ensure tests do not break.
+ */
+ if (!kvm_has_cap(KVM_CAP_S390_DIAG318)) {
+ if (!printed_skip) {
+ fprintf(stdout, "KVM_CAP_S390_DIAG318 not supported. "
+ "Skipping diag318 test.\n");
+ printed_skip = true;
+ }
+ return 0;
+ }
+
+ /*
+ * If a test has previously requested the diag318 info,
+ * then don't bother spinning up a temporary VM again.
+ */
+ if (!diag318_info)
+ diag318_info = diag318_handler();
+
+ return diag318_info;
+}
diff --git a/tools/testing/selftests/kvm/lib/s390x/processor.c b/tools/testing/selftests/kvm/lib/s390x/processor.c
index a88c5d665725..4ad4492eea1d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/processor.c
+++ b/tools/testing/selftests/kvm/lib/s390x/processor.c
@@ -5,17 +5,12 @@
* Copyright (C) 2019, Red Hat, Inc.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
#include "processor.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
-
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
#define PAGES_PER_REGION 4
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
vm_paddr_t paddr;
@@ -26,7 +21,8 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
return;
paddr = vm_phy_pages_alloc(vm, PAGES_PER_REGION,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR,
+ vm->memslots[MEM_REGION_PT]);
memset(addr_gpa2hva(vm, paddr), 0xff, PAGES_PER_REGION * vm->page_size);
vm->pgd = paddr;
@@ -38,12 +34,12 @@ void virt_pgd_alloc(struct kvm_vm *vm, uint32_t memslot)
* a page table (ri == 4). Returns a suitable region/segment table entry
* which points to the freshly allocated pages.
*/
-static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
+static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri)
{
uint64_t taddr;
taddr = vm_phy_pages_alloc(vm, ri < 4 ? PAGES_PER_REGION : 1,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, memslot);
+ KVM_GUEST_PAGE_TABLE_MIN_PADDR, 0);
memset(addr_gpa2hva(vm, taddr), 0xff, PAGES_PER_REGION * vm->page_size);
return (taddr & REGION_ENTRY_ORIGIN)
@@ -51,8 +47,7 @@ static uint64_t virt_alloc_region(struct kvm_vm *vm, int ri, uint32_t memslot)
| ((ri < 4 ? (PAGES_PER_REGION - 1) : 0) & REGION_ENTRY_LENGTH);
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
- uint32_t memslot)
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa)
{
int ri, idx;
uint64_t *entry;
@@ -79,7 +74,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
for (ri = 1; ri <= 4; ri++) {
idx = (gva >> (64 - 11 * ri)) & 0x7ffu;
if (entry[idx] & REGION_ENTRY_INVALID)
- entry[idx] = virt_alloc_region(vm, ri, memslot);
+ entry[idx] = virt_alloc_region(vm, ri);
entry = addr_gpa2hva(vm, entry[idx] & REGION_ENTRY_ORIGIN);
}
@@ -91,7 +86,7 @@ void virt_pg_map(struct kvm_vm *vm, uint64_t gva, uint64_t gpa,
entry[idx] = gpa;
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
int ri, idx;
uint64_t *entry;
@@ -152,7 +147,7 @@ static void virt_dump_region(FILE *stream, struct kvm_vm *vm, uint8_t indent,
}
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
{
if (!vm->pgd_created)
return;
@@ -160,84 +155,69 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
virt_dump_region(stream, vm, indent, vm->pgd);
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
{
- /*
- * The additional amount of pages required for the page tables is:
- * 1 * n / 256 + 4 * (n / 256) / 2048 + 4 * (n / 256) / 2048^2 + ...
- * which is definitely smaller than (n / 256) * 2.
- */
- uint64_t extra_pg_pages = extra_mem_pages / 256 * 2;
- struct kvm_vm *vm;
-
- vm = vm_create(VM_MODE_DEFAULT,
- DEFAULT_GUEST_PHY_PAGES + extra_pg_pages, O_RDWR);
-
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
- vm_vcpu_add_default(vm, vcpuid, guest_code);
-
- return vm;
+ vcpu->run->psw_addr = (uintptr_t)guest_code;
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
size_t stack_size = DEFAULT_STACK_PGS * getpagesize();
uint64_t stack_vaddr;
struct kvm_regs regs;
struct kvm_sregs sregs;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
TEST_ASSERT(vm->page_size == 4096, "Unsupported page size: 0x%x",
vm->page_size);
- stack_vaddr = vm_vaddr_alloc(vm, stack_size,
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ stack_vaddr = __vm_vaddr_alloc(vm, stack_size,
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
- vm_vcpu_add(vm, vcpuid);
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
/* Setup guest registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.gprs[15] = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize()) - 160;
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.crs[0] |= 0x00040000; /* Enable floating point regs */
sregs.crs[1] = vm->pgd | 0xf; /* Primary region table */
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu->run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run = vcpu_state(vm, vcpuid);
- run->psw_mask = 0x0400000180000000ULL; /* DAT enabled + 64 bit mode */
- run->psw_addr = (uintptr_t)guest_code;
+ return vcpu;
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
int i;
TEST_ASSERT(num >= 1 && num <= 5, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
for (i = 0; i < num; i++)
regs.gprs[i + 2] = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
-
- if (!vcpu)
- return;
-
fprintf(stream, "%*spstate: psw: 0x%.16llx:0x%.16llx\n",
- indent, "", vcpu->state->psw_mask, vcpu->state->psw_addr);
+ indent, "", vcpu->run->psw_mask, vcpu->run->psw_addr);
+}
+
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
+{
}
diff --git a/tools/testing/selftests/kvm/lib/s390x/ucall.c b/tools/testing/selftests/kvm/lib/s390x/ucall.c
index fd589dc9bfab..cca98734653d 100644
--- a/tools/testing/selftests/kvm/lib/s390x/ucall.c
+++ b/tools/testing/selftests/kvm/lib/s390x/ucall.c
@@ -6,37 +6,9 @@
*/
#include "kvm_util.h"
-void ucall_init(struct kvm_vm *vm, void *arg)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
-}
-
-void ucall_uninit(struct kvm_vm *vm)
-{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- /* Exit via DIAGNOSE 0x501 (normally used for breakpoints) */
- asm volatile ("diag 0,%0,0x501" : : "a"(&uc) : "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_S390_SIEIC &&
run->s390_sieic.icptcode == 4 &&
@@ -44,13 +16,7 @@ uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
(run->s390_sieic.ipb >> 16) == 0x501) {
int reg = run->s390_sieic.ipa & 0xf;
- memcpy(&ucall, addr_gva2hva(vm, run->s.regs.gprs[reg]),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ return (void *)run->s.regs.gprs[reg];
}
-
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/sparsebit.c b/tools/testing/selftests/kvm/lib/sparsebit.c
index 031ba3c932ed..cfed9d26cc71 100644
--- a/tools/testing/selftests/kvm/lib/sparsebit.c
+++ b/tools/testing/selftests/kvm/lib/sparsebit.c
@@ -202,7 +202,7 @@ static sparsebit_num_t node_num_set(struct node *nodep)
/* Returns a pointer to the node that describes the
* lowest bit index.
*/
-static struct node *node_first(struct sparsebit *s)
+static struct node *node_first(const struct sparsebit *s)
{
struct node *nodep;
@@ -216,7 +216,7 @@ static struct node *node_first(struct sparsebit *s)
* lowest bit index > the index of the node pointed to by np.
* Returns NULL if no node with a higher index exists.
*/
-static struct node *node_next(struct sparsebit *s, struct node *np)
+static struct node *node_next(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -244,7 +244,7 @@ static struct node *node_next(struct sparsebit *s, struct node *np)
* highest index < the index of the node pointed to by np.
* Returns NULL if no node with a lower index exists.
*/
-static struct node *node_prev(struct sparsebit *s, struct node *np)
+static struct node *node_prev(const struct sparsebit *s, struct node *np)
{
struct node *nodep = np;
@@ -273,7 +273,7 @@ static struct node *node_prev(struct sparsebit *s, struct node *np)
* subtree and duplicates the bit settings to the newly allocated nodes.
* Returns the newly allocated copy of subtree.
*/
-static struct node *node_copy_subtree(struct node *subtree)
+static struct node *node_copy_subtree(const struct node *subtree)
{
struct node *root;
@@ -307,7 +307,7 @@ static struct node *node_copy_subtree(struct node *subtree)
* index is within the bits described by the mask bits or the number of
* contiguous bits set after the mask. Returns NULL if there is no such node.
*/
-static struct node *node_find(struct sparsebit *s, sparsebit_idx_t idx)
+static struct node *node_find(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -393,7 +393,7 @@ static struct node *node_add(struct sparsebit *s, sparsebit_idx_t idx)
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_all_set(struct sparsebit *s)
+bool sparsebit_all_set(const struct sparsebit *s)
{
/*
* If any nodes there must be at least one bit set. Only case
@@ -634,7 +634,6 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
tmp = node_prev(s, nodep);
node_rm(s, nodep);
- nodep = NULL;
nodep = tmp;
reduction_performed = true;
@@ -776,7 +775,7 @@ static void node_reduce(struct sparsebit *s, struct node *nodep)
/* Returns whether the bit at the index given by idx, within the
* sparsebit array is set or not.
*/
-bool sparsebit_is_set(struct sparsebit *s, sparsebit_idx_t idx)
+bool sparsebit_is_set(const struct sparsebit *s, sparsebit_idx_t idx)
{
struct node *nodep;
@@ -922,7 +921,7 @@ static inline sparsebit_idx_t node_first_clear(struct node *nodep, int start)
* used by test cases after they detect an unexpected condition, as a means
* to capture diagnostic information.
*/
-static void sparsebit_dump_internal(FILE *stream, struct sparsebit *s,
+static void sparsebit_dump_internal(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
/* Dump the contents of s */
@@ -970,7 +969,7 @@ void sparsebit_free(struct sparsebit **sbitp)
* sparsebit_alloc(). It can though already have bits set, which
* if different from src will be cleared.
*/
-void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
+void sparsebit_copy(struct sparsebit *d, const struct sparsebit *s)
{
/* First clear any bits already set in the destination */
sparsebit_clear_all(d);
@@ -982,7 +981,7 @@ void sparsebit_copy(struct sparsebit *d, struct sparsebit *s)
}
/* Returns whether num consecutive bits starting at idx are all set. */
-bool sparsebit_is_set_num(struct sparsebit *s,
+bool sparsebit_is_set_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_cleared;
@@ -1006,14 +1005,14 @@ bool sparsebit_is_set_num(struct sparsebit *s,
}
/* Returns whether the bit at the index given by idx. */
-bool sparsebit_is_clear(struct sparsebit *s,
+bool sparsebit_is_clear(const struct sparsebit *s,
sparsebit_idx_t idx)
{
return !sparsebit_is_set(s, idx);
}
/* Returns whether num consecutive bits starting at idx are all cleared. */
-bool sparsebit_is_clear_num(struct sparsebit *s,
+bool sparsebit_is_clear_num(const struct sparsebit *s,
sparsebit_idx_t idx, sparsebit_num_t num)
{
sparsebit_idx_t next_set;
@@ -1042,13 +1041,13 @@ bool sparsebit_is_clear_num(struct sparsebit *s,
* value. Use sparsebit_any_set(), instead of sparsebit_num_set() > 0,
* to determine if the sparsebit array has any bits set.
*/
-sparsebit_num_t sparsebit_num_set(struct sparsebit *s)
+sparsebit_num_t sparsebit_num_set(const struct sparsebit *s)
{
return s->num_set;
}
/* Returns whether any bit is set in the sparsebit array. */
-bool sparsebit_any_set(struct sparsebit *s)
+bool sparsebit_any_set(const struct sparsebit *s)
{
/*
* Nodes only describe set bits. If any nodes then there
@@ -1071,20 +1070,20 @@ bool sparsebit_any_set(struct sparsebit *s)
}
/* Returns whether all the bits in the sparsebit array are cleared. */
-bool sparsebit_all_clear(struct sparsebit *s)
+bool sparsebit_all_clear(const struct sparsebit *s)
{
return !sparsebit_any_set(s);
}
/* Returns whether all the bits in the sparsebit array are set. */
-bool sparsebit_any_clear(struct sparsebit *s)
+bool sparsebit_any_clear(const struct sparsebit *s)
{
return !sparsebit_all_set(s);
}
/* Returns the index of the first set bit. Abort if no bits are set.
*/
-sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_set(const struct sparsebit *s)
{
struct node *nodep;
@@ -1098,7 +1097,7 @@ sparsebit_idx_t sparsebit_first_set(struct sparsebit *s)
/* Returns the index of the first cleared bit. Abort if
* no bits are cleared.
*/
-sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
+sparsebit_idx_t sparsebit_first_clear(const struct sparsebit *s)
{
struct node *nodep1, *nodep2;
@@ -1152,7 +1151,7 @@ sparsebit_idx_t sparsebit_first_clear(struct sparsebit *s)
/* Returns index of next bit set within s after the index given by prev.
* Returns 0 if there are no bits after prev that are set.
*/
-sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1245,7 +1244,7 @@ sparsebit_idx_t sparsebit_next_set(struct sparsebit *s,
/* Returns index of next bit cleared within s after the index given by prev.
* Returns 0 if there are no bits after prev that are cleared.
*/
-sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear(const struct sparsebit *s,
sparsebit_idx_t prev)
{
sparsebit_idx_t lowest_possible = prev + 1;
@@ -1301,7 +1300,7 @@ sparsebit_idx_t sparsebit_next_clear(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively set
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_set_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1336,7 +1335,7 @@ sparsebit_idx_t sparsebit_next_set_num(struct sparsebit *s,
* and returns the index of the first sequence of num consecutively cleared
* bits. Returns a value of 0 of no such sequence exists.
*/
-sparsebit_idx_t sparsebit_next_clear_num(struct sparsebit *s,
+sparsebit_idx_t sparsebit_next_clear_num(const struct sparsebit *s,
sparsebit_idx_t start, sparsebit_num_t num)
{
sparsebit_idx_t idx;
@@ -1584,7 +1583,7 @@ static size_t display_range(FILE *stream, sparsebit_idx_t low,
* contiguous bits. This is done because '-' is used to specify command-line
* options, and sometimes ranges are specified as command-line arguments.
*/
-void sparsebit_dump(FILE *stream, struct sparsebit *s,
+void sparsebit_dump(FILE *stream, const struct sparsebit *s,
unsigned int indent)
{
size_t current_line_len = 0;
@@ -1682,7 +1681,7 @@ void sparsebit_dump(FILE *stream, struct sparsebit *s,
* s. On error, diagnostic information is printed to stderr and
* abort is called.
*/
-void sparsebit_validate_internal(struct sparsebit *s)
+void sparsebit_validate_internal(const struct sparsebit *s)
{
bool error_detected = false;
struct node *nodep, *prev = NULL;
@@ -1866,7 +1865,7 @@ void sparsebit_validate_internal(struct sparsebit *s)
* of total bits set.
*/
if (s->num_set != total_bits_set) {
- fprintf(stderr, "Number of bits set missmatch,\n"
+ fprintf(stderr, "Number of bits set mismatch,\n"
" s->num_set: 0x%lx total_bits_set: 0x%lx",
s->num_set, total_bits_set);
@@ -1890,7 +1889,6 @@ void sparsebit_validate_internal(struct sparsebit *s)
*/
#include <stdlib.h>
-#include <assert.h>
struct range {
sparsebit_idx_t first, last;
diff --git a/tools/testing/selftests/kvm/lib/string_override.c b/tools/testing/selftests/kvm/lib/string_override.c
new file mode 100644
index 000000000000..5d1c87277c49
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/string_override.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stddef.h>
+
+/*
+ * Override the "basic" built-in string helpers so that they can be used in
+ * guest code. KVM selftests don't support dynamic loading in guest code and
+ * will jump into the weeds if the compiler decides to insert an out-of-line
+ * call via the PLT.
+ */
+int memcmp(const void *cs, const void *ct, size_t count)
+{
+ const unsigned char *su1, *su2;
+ int res = 0;
+
+ for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--) {
+ if ((res = *su1 - *su2) != 0)
+ break;
+ }
+ return res;
+}
+
+void *memcpy(void *dest, const void *src, size_t count)
+{
+ char *tmp = dest;
+ const char *s = src;
+
+ while (count--)
+ *tmp++ = *s++;
+ return dest;
+}
+
+void *memset(void *s, int c, size_t count)
+{
+ char *xs = s;
+
+ while (count--)
+ *xs++ = c;
+ return s;
+}
+
+size_t strnlen(const char *s, size_t count)
+{
+ const char *sc;
+
+ for (sc = s; count-- && *sc != '\0'; ++sc)
+ /* nothing */;
+ return sc - s;
+}
diff --git a/tools/testing/selftests/kvm/lib/test_util.c b/tools/testing/selftests/kvm/lib/test_util.c
index 689e97c27ee2..5a8f8becb129 100644
--- a/tools/testing/selftests/kvm/lib/test_util.c
+++ b/tools/testing/selftests/kvm/lib/test_util.c
@@ -4,13 +4,40 @@
*
* Copyright (C) 2020, Google LLC.
*/
-#include <stdlib.h>
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdarg.h>
+#include <assert.h>
#include <ctype.h>
#include <limits.h>
-#include <assert.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <linux/mman.h>
+#include "linux/kernel.h"
+
#include "test_util.h"
/*
+ * Random number generator that is usable from guest code. This is the
+ * Park-Miller LCG using standard constants.
+ */
+
+struct guest_random_state new_guest_random_state(uint32_t seed)
+{
+ struct guest_random_state s = {.seed = seed};
+ return s;
+}
+
+uint32_t guest_random_u32(struct guest_random_state *state)
+{
+ state->seed = (uint64_t)state->seed * 48271 % ((uint32_t)(1 << 31) - 1);
+ return state->seed;
+}
+
+/*
* Parses "[0-9]+[kmgt]?".
*/
size_t parse_size(const char *size)
@@ -81,6 +108,21 @@ struct timespec timespec_sub(struct timespec ts1, struct timespec ts2)
return timespec_add_ns((struct timespec){0}, ns1 - ns2);
}
+struct timespec timespec_elapsed(struct timespec start)
+{
+ struct timespec end;
+
+ clock_gettime(CLOCK_MONOTONIC, &end);
+ return timespec_sub(end, start);
+}
+
+struct timespec timespec_div(struct timespec ts, int divisor)
+{
+ int64_t ns = timespec_to_ns(ts) / divisor;
+
+ return timespec_add_ns((struct timespec){0}, ns);
+}
+
void print_skip(const char *fmt, ...)
{
va_list ap;
@@ -91,3 +133,287 @@ void print_skip(const char *fmt, ...)
va_end(ap);
puts(", skipping test");
}
+
+bool thp_configured(void)
+{
+ int ret;
+ struct stat statbuf;
+
+ ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
+ TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
+ "Error in stating /sys/kernel/mm/transparent_hugepage");
+
+ return ret == 0;
+}
+
+size_t get_trans_hugepagesz(void)
+{
+ size_t size;
+ FILE *f;
+ int ret;
+
+ TEST_ASSERT(thp_configured(), "THP is not configured in host kernel");
+
+ f = fopen("/sys/kernel/mm/transparent_hugepage/hpage_pmd_size", "r");
+ TEST_ASSERT(f != NULL, "Error in opening transparent_hugepage/hpage_pmd_size");
+
+ ret = fscanf(f, "%ld", &size);
+ ret = fscanf(f, "%ld", &size);
+ TEST_ASSERT(ret < 1, "Error reading transparent_hugepage/hpage_pmd_size");
+ fclose(f);
+
+ return size;
+}
+
+size_t get_def_hugetlb_pagesz(void)
+{
+ char buf[64];
+ const char *hugepagesize = "Hugepagesize:";
+ const char *hugepages_total = "HugePages_Total:";
+ FILE *f;
+
+ f = fopen("/proc/meminfo", "r");
+ TEST_ASSERT(f != NULL, "Error in opening /proc/meminfo");
+
+ while (fgets(buf, sizeof(buf), f) != NULL) {
+ if (strstr(buf, hugepages_total) == buf) {
+ unsigned long long total = strtoull(buf + strlen(hugepages_total), NULL, 10);
+ if (!total) {
+ fprintf(stderr, "HUGETLB is not enabled in /proc/sys/vm/nr_hugepages\n");
+ exit(KSFT_SKIP);
+ }
+ }
+ if (strstr(buf, hugepagesize) == buf) {
+ fclose(f);
+ return strtoull(buf + strlen(hugepagesize), NULL, 10) << 10;
+ }
+ }
+
+ if (feof(f)) {
+ fprintf(stderr, "HUGETLB is not configured in host kernel");
+ exit(KSFT_SKIP);
+ }
+
+ TEST_FAIL("Error in reading /proc/meminfo");
+}
+
+#define ANON_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS)
+#define ANON_HUGE_FLAGS (ANON_FLAGS | MAP_HUGETLB)
+
+const struct vm_mem_backing_src_alias *vm_mem_backing_src_alias(uint32_t i)
+{
+ static const struct vm_mem_backing_src_alias aliases[] = {
+ [VM_MEM_SRC_ANONYMOUS] = {
+ .name = "anonymous",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_THP] = {
+ .name = "anonymous_thp",
+ .flag = ANON_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB] = {
+ .name = "anonymous_hugetlb",
+ .flag = ANON_HUGE_FLAGS,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16KB] = {
+ .name = "anonymous_hugetlb_16kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_64KB] = {
+ .name = "anonymous_hugetlb_64kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_64KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512KB] = {
+ .name = "anonymous_hugetlb_512kb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512KB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1MB] = {
+ .name = "anonymous_hugetlb_1mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2MB] = {
+ .name = "anonymous_hugetlb_2mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_8MB] = {
+ .name = "anonymous_hugetlb_8mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_8MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16MB] = {
+ .name = "anonymous_hugetlb_16mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_32MB] = {
+ .name = "anonymous_hugetlb_32mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_32MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_256MB] = {
+ .name = "anonymous_hugetlb_256mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_256MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_512MB] = {
+ .name = "anonymous_hugetlb_512mb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_512MB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_1GB] = {
+ .name = "anonymous_hugetlb_1gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_1GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_2GB] = {
+ .name = "anonymous_hugetlb_2gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_2GB,
+ },
+ [VM_MEM_SRC_ANONYMOUS_HUGETLB_16GB] = {
+ .name = "anonymous_hugetlb_16gb",
+ .flag = ANON_HUGE_FLAGS | MAP_HUGE_16GB,
+ },
+ [VM_MEM_SRC_SHMEM] = {
+ .name = "shmem",
+ .flag = MAP_SHARED,
+ },
+ [VM_MEM_SRC_SHARED_HUGETLB] = {
+ .name = "shared_hugetlb",
+ /*
+ * No MAP_HUGETLB, we use MFD_HUGETLB instead. Since
+ * we're using "file backed" memory, we need to specify
+ * this when the FD is created, not when the area is
+ * mapped.
+ */
+ .flag = MAP_SHARED,
+ },
+ };
+ _Static_assert(ARRAY_SIZE(aliases) == NUM_SRC_TYPES,
+ "Missing new backing src types?");
+
+ TEST_ASSERT(i < NUM_SRC_TYPES, "Backing src type ID %d too big", i);
+
+ return &aliases[i];
+}
+
+#define MAP_HUGE_PAGE_SIZE(x) (1ULL << ((x >> MAP_HUGE_SHIFT) & MAP_HUGE_MASK))
+
+size_t get_backing_src_pagesz(uint32_t i)
+{
+ uint32_t flag = vm_mem_backing_src_alias(i)->flag;
+
+ switch (i) {
+ case VM_MEM_SRC_ANONYMOUS:
+ case VM_MEM_SRC_SHMEM:
+ return getpagesize();
+ case VM_MEM_SRC_ANONYMOUS_THP:
+ return get_trans_hugepagesz();
+ case VM_MEM_SRC_ANONYMOUS_HUGETLB:
+ case VM_MEM_SRC_SHARED_HUGETLB:
+ return get_def_hugetlb_pagesz();
+ default:
+ return MAP_HUGE_PAGE_SIZE(flag);
+ }
+}
+
+bool is_backing_src_hugetlb(uint32_t i)
+{
+ return !!(vm_mem_backing_src_alias(i)->flag & MAP_HUGETLB);
+}
+
+static void print_available_backing_src_types(const char *prefix)
+{
+ int i;
+
+ printf("%sAvailable backing src types:\n", prefix);
+
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ printf("%s %s\n", prefix, vm_mem_backing_src_alias(i)->name);
+}
+
+void backing_src_help(const char *flag)
+{
+ printf(" %s: specify the type of memory that should be used to\n"
+ " back the guest data region. (default: %s)\n",
+ flag, vm_mem_backing_src_alias(DEFAULT_VM_MEM_SRC)->name);
+ print_available_backing_src_types(" ");
+}
+
+enum vm_mem_backing_src_type parse_backing_src_type(const char *type_name)
+{
+ int i;
+
+ for (i = 0; i < NUM_SRC_TYPES; i++)
+ if (!strcmp(type_name, vm_mem_backing_src_alias(i)->name))
+ return i;
+
+ print_available_backing_src_types("");
+ TEST_FAIL("Unknown backing src type: %s", type_name);
+ return -1;
+}
+
+long get_run_delay(void)
+{
+ char path[64];
+ long val[2];
+ FILE *fp;
+
+ sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+ fp = fopen(path, "r");
+ /* Return MIN_RUN_DELAY_NS upon failure just to be safe */
+ if (fscanf(fp, "%ld %ld ", &val[0], &val[1]) < 2)
+ val[1] = MIN_RUN_DELAY_NS;
+ fclose(fp);
+
+ return val[1];
+}
+
+int atoi_paranoid(const char *num_str)
+{
+ char *end_ptr;
+ long num;
+
+ errno = 0;
+ num = strtol(num_str, &end_ptr, 0);
+ TEST_ASSERT(!errno, "strtol(\"%s\") failed", num_str);
+ TEST_ASSERT(num_str != end_ptr,
+ "strtol(\"%s\") didn't find a valid integer.", num_str);
+ TEST_ASSERT(*end_ptr == '\0',
+ "strtol(\"%s\") failed to parse trailing characters \"%s\".",
+ num_str, end_ptr);
+ TEST_ASSERT(num >= INT_MIN && num <= INT_MAX,
+ "%ld not in range of [%d, %d]", num, INT_MIN, INT_MAX);
+
+ return num;
+}
+
+char *strdup_printf(const char *fmt, ...)
+{
+ va_list ap;
+ char *str;
+
+ va_start(ap, fmt);
+ TEST_ASSERT(vasprintf(&str, fmt, ap) >= 0, "vasprintf() failed");
+ va_end(ap);
+
+ return str;
+}
+
+#define CLOCKSOURCE_PATH "/sys/devices/system/clocksource/clocksource0/current_clocksource"
+
+char *sys_get_cur_clocksource(void)
+{
+ char *clk_name;
+ struct stat st;
+ FILE *fp;
+
+ fp = fopen(CLOCKSOURCE_PATH, "r");
+ TEST_ASSERT(fp, "failed to open clocksource file, errno: %d", errno);
+
+ TEST_ASSERT(!fstat(fileno(fp), &st), "failed to stat clocksource file, errno: %d",
+ errno);
+
+ clk_name = malloc(st.st_size);
+ TEST_ASSERT(clk_name, "failed to allocate buffer to read file");
+
+ TEST_ASSERT(fgets(clk_name, st.st_size, fp), "failed to read clocksource file: %d",
+ ferror(fp));
+
+ fclose(fp);
+
+ return clk_name;
+}
diff --git a/tools/testing/selftests/kvm/lib/ucall_common.c b/tools/testing/selftests/kvm/lib/ucall_common.c
new file mode 100644
index 000000000000..f5af65a41c29
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/ucall_common.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "kvm_util.h"
+#include "linux/types.h"
+#include "linux/bitmap.h"
+#include "linux/atomic.h"
+
+#define GUEST_UCALL_FAILED -1
+
+struct ucall_header {
+ DECLARE_BITMAP(in_use, KVM_MAX_VCPUS);
+ struct ucall ucalls[KVM_MAX_VCPUS];
+};
+
+int ucall_nr_pages_required(uint64_t page_size)
+{
+ return align_up(sizeof(struct ucall_header), page_size) / page_size;
+}
+
+/*
+ * ucall_pool holds per-VM values (global data is duplicated by each VM), it
+ * must not be accessed from host code.
+ */
+static struct ucall_header *ucall_pool;
+
+void ucall_init(struct kvm_vm *vm, vm_paddr_t mmio_gpa)
+{
+ struct ucall_header *hdr;
+ struct ucall *uc;
+ vm_vaddr_t vaddr;
+ int i;
+
+ vaddr = vm_vaddr_alloc_shared(vm, sizeof(*hdr), KVM_UTIL_MIN_VADDR,
+ MEM_REGION_DATA);
+ hdr = (struct ucall_header *)addr_gva2hva(vm, vaddr);
+ memset(hdr, 0, sizeof(*hdr));
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ uc = &hdr->ucalls[i];
+ uc->hva = uc;
+ }
+
+ write_guest_global(vm, ucall_pool, (struct ucall_header *)vaddr);
+
+ ucall_arch_init(vm, mmio_gpa);
+}
+
+static struct ucall *ucall_alloc(void)
+{
+ struct ucall *uc;
+ int i;
+
+ if (!ucall_pool)
+ goto ucall_failed;
+
+ for (i = 0; i < KVM_MAX_VCPUS; ++i) {
+ if (!test_and_set_bit(i, ucall_pool->in_use)) {
+ uc = &ucall_pool->ucalls[i];
+ memset(uc->args, 0, sizeof(uc->args));
+ return uc;
+ }
+ }
+
+ucall_failed:
+ /*
+ * If the vCPU cannot grab a ucall structure, make a bare ucall with a
+ * magic value to signal to get_ucall() that things went sideways.
+ * GUEST_ASSERT() depends on ucall_alloc() and so cannot be used here.
+ */
+ ucall_arch_do_ucall(GUEST_UCALL_FAILED);
+ return NULL;
+}
+
+static void ucall_free(struct ucall *uc)
+{
+ /* Beware, here be pointer arithmetic. */
+ clear_bit(uc - ucall_pool->ucalls, ucall_pool->in_use);
+}
+
+void ucall_assert(uint64_t cmd, const char *exp, const char *file,
+ unsigned int line, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ WRITE_ONCE(uc->args[GUEST_ERROR_STRING], (uint64_t)(exp));
+ WRITE_ONCE(uc->args[GUEST_FILE], (uint64_t)(file));
+ WRITE_ONCE(uc->args[GUEST_LINE], line);
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall_fmt(uint64_t cmd, const char *fmt, ...)
+{
+ struct ucall *uc;
+ va_list va;
+
+ uc = ucall_alloc();
+ uc->cmd = cmd;
+
+ va_start(va, fmt);
+ guest_vsnprintf(uc->buffer, UCALL_BUFFER_LEN, fmt, va);
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+void ucall(uint64_t cmd, int nargs, ...)
+{
+ struct ucall *uc;
+ va_list va;
+ int i;
+
+ uc = ucall_alloc();
+
+ WRITE_ONCE(uc->cmd, cmd);
+
+ nargs = min(nargs, UCALL_MAX_ARGS);
+
+ va_start(va, nargs);
+ for (i = 0; i < nargs; ++i)
+ WRITE_ONCE(uc->args[i], va_arg(va, uint64_t));
+ va_end(va);
+
+ ucall_arch_do_ucall((vm_vaddr_t)uc->hva);
+
+ ucall_free(uc);
+}
+
+uint64_t get_ucall(struct kvm_vcpu *vcpu, struct ucall *uc)
+{
+ struct ucall ucall;
+ void *addr;
+
+ if (!uc)
+ uc = &ucall;
+
+ addr = ucall_arch_get_ucall(vcpu);
+ if (addr) {
+ TEST_ASSERT(addr != (void *)GUEST_UCALL_FAILED,
+ "Guest failed to allocate ucall struct");
+
+ memcpy(uc, addr, sizeof(*uc));
+ vcpu_run_complete_io(vcpu);
+ } else {
+ memset(uc, 0, sizeof(*uc));
+ }
+
+ return uc->cmd;
+}
diff --git a/tools/testing/selftests/kvm/lib/userfaultfd_util.c b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
new file mode 100644
index 000000000000..f4eef6eb2dc2
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/userfaultfd_util.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM userfaultfd util
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2019-2022 Google LLC
+ */
+
+#define _GNU_SOURCE /* for pipe2 */
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <sys/syscall.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "userfaultfd_util.h"
+
+#ifdef __NR_userfaultfd
+
+static void *uffd_handler_thread_fn(void *arg)
+{
+ struct uffd_desc *uffd_desc = (struct uffd_desc *)arg;
+ int uffd = uffd_desc->uffd;
+ int pipefd = uffd_desc->pipefds[0];
+ useconds_t delay = uffd_desc->delay;
+ int64_t pages = 0;
+ struct timespec start;
+ struct timespec ts_diff;
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+ while (1) {
+ struct uffd_msg msg;
+ struct pollfd pollfd[2];
+ char tmp_chr;
+ int r;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd;
+ pollfd[1].events = POLLIN;
+
+ r = poll(pollfd, 2, -1);
+ switch (r) {
+ case -1:
+ pr_info("poll err");
+ continue;
+ case 0:
+ continue;
+ case 1:
+ break;
+ default:
+ pr_info("Polling uffd returned %d", r);
+ return NULL;
+ }
+
+ if (pollfd[0].revents & POLLERR) {
+ pr_info("uffd revents has POLLERR");
+ return NULL;
+ }
+
+ if (pollfd[1].revents & POLLIN) {
+ r = read(pollfd[1].fd, &tmp_chr, 1);
+ TEST_ASSERT(r == 1,
+ "Error reading pipefd in UFFD thread");
+ break;
+ }
+
+ if (!(pollfd[0].revents & POLLIN))
+ continue;
+
+ r = read(uffd, &msg, sizeof(msg));
+ if (r == -1) {
+ if (errno == EAGAIN)
+ continue;
+ pr_info("Read of uffd got errno %d\n", errno);
+ return NULL;
+ }
+
+ if (r != sizeof(msg)) {
+ pr_info("Read on uffd returned unexpected size: %d bytes", r);
+ return NULL;
+ }
+
+ if (!(msg.event & UFFD_EVENT_PAGEFAULT))
+ continue;
+
+ if (delay)
+ usleep(delay);
+ r = uffd_desc->handler(uffd_desc->uffd_mode, uffd, &msg);
+ if (r < 0)
+ return NULL;
+ pages++;
+ }
+
+ ts_diff = timespec_elapsed(start);
+ PER_VCPU_DEBUG("userfaulted %ld pages over %ld.%.9lds. (%f/sec)\n",
+ pages, ts_diff.tv_sec, ts_diff.tv_nsec,
+ pages / ((double)ts_diff.tv_sec + (double)ts_diff.tv_nsec / NSEC_PER_SEC));
+
+ return NULL;
+}
+
+struct uffd_desc *uffd_setup_demand_paging(int uffd_mode, useconds_t delay,
+ void *hva, uint64_t len,
+ uffd_handler_t handler)
+{
+ struct uffd_desc *uffd_desc;
+ bool is_minor = (uffd_mode == UFFDIO_REGISTER_MODE_MINOR);
+ int uffd;
+ struct uffdio_api uffdio_api;
+ struct uffdio_register uffdio_register;
+ uint64_t expected_ioctls = ((uint64_t) 1) << _UFFDIO_COPY;
+ int ret;
+
+ PER_PAGE_DEBUG("Userfaultfd %s mode, faults resolved with %s\n",
+ is_minor ? "MINOR" : "MISSING",
+ is_minor ? "UFFDIO_CONINUE" : "UFFDIO_COPY");
+
+ uffd_desc = malloc(sizeof(struct uffd_desc));
+ TEST_ASSERT(uffd_desc, "malloc failed");
+
+ /* In order to get minor faults, prefault via the alias. */
+ if (is_minor)
+ expected_ioctls = ((uint64_t) 1) << _UFFDIO_CONTINUE;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(uffd >= 0, "uffd creation failed, errno: %d", errno);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_API, &uffdio_api) != -1,
+ "ioctl UFFDIO_API failed: %" PRIu64,
+ (uint64_t)uffdio_api.api);
+
+ uffdio_register.range.start = (uint64_t)hva;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = uffd_mode;
+ TEST_ASSERT(ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) != -1,
+ "ioctl UFFDIO_REGISTER failed");
+ TEST_ASSERT((uffdio_register.ioctls & expected_ioctls) ==
+ expected_ioctls, "missing userfaultfd ioctls");
+
+ ret = pipe2(uffd_desc->pipefds, O_CLOEXEC | O_NONBLOCK);
+ TEST_ASSERT(!ret, "Failed to set up pipefd");
+
+ uffd_desc->uffd_mode = uffd_mode;
+ uffd_desc->uffd = uffd;
+ uffd_desc->delay = delay;
+ uffd_desc->handler = handler;
+ pthread_create(&uffd_desc->thread, NULL, uffd_handler_thread_fn,
+ uffd_desc);
+
+ PER_VCPU_DEBUG("Created uffd thread for HVA range [%p, %p)\n",
+ hva, hva + len);
+
+ return uffd_desc;
+}
+
+void uffd_stop_demand_paging(struct uffd_desc *uffd)
+{
+ char c = 0;
+ int ret;
+
+ ret = write(uffd->pipefds[1], &c, 1);
+ TEST_ASSERT(ret == 1, "Unable to write to pipefd");
+
+ ret = pthread_join(uffd->thread, NULL);
+ TEST_ASSERT(ret == 0, "Pthread_join failed.");
+
+ close(uffd->uffd);
+
+ close(uffd->pipefds[1]);
+ close(uffd->pipefds[0]);
+
+ free(uffd);
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/kvm/lib/x86_64/apic.c b/tools/testing/selftests/kvm/lib/x86_64/apic.c
new file mode 100644
index 000000000000..89153a333e83
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/apic.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#include "apic.h"
+
+void apic_disable(void)
+{
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) &
+ ~(MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD));
+}
+
+void xapic_enable(void)
+{
+ uint64_t val = rdmsr(MSR_IA32_APICBASE);
+
+ /* Per SDM: to enable xAPIC when in x2APIC must first disable APIC */
+ if (val & MSR_IA32_APICBASE_EXTD) {
+ apic_disable();
+ wrmsr(MSR_IA32_APICBASE,
+ rdmsr(MSR_IA32_APICBASE) | MSR_IA32_APICBASE_ENABLE);
+ } else if (!(val & MSR_IA32_APICBASE_ENABLE)) {
+ wrmsr(MSR_IA32_APICBASE, val | MSR_IA32_APICBASE_ENABLE);
+ }
+
+ /*
+ * Per SDM: reset value of spurious interrupt vector register has the
+ * APIC software enabled bit=0. It must be enabled in addition to the
+ * enable bit in the MSR.
+ */
+ val = xapic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED;
+ xapic_write_reg(APIC_SPIV, val);
+}
+
+void x2apic_enable(void)
+{
+ wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+ MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+ x2apic_write_reg(APIC_SPIV,
+ x2apic_read_reg(APIC_SPIV) | APIC_SPIV_APIC_ENABLED);
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/handlers.S b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
new file mode 100644
index 000000000000..7629819734af
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/handlers.S
@@ -0,0 +1,81 @@
+handle_exception:
+ push %r15
+ push %r14
+ push %r13
+ push %r12
+ push %r11
+ push %r10
+ push %r9
+ push %r8
+
+ push %rdi
+ push %rsi
+ push %rbp
+ push %rbx
+ push %rdx
+ push %rcx
+ push %rax
+ mov %rsp, %rdi
+
+ call route_exception
+
+ pop %rax
+ pop %rcx
+ pop %rdx
+ pop %rbx
+ pop %rbp
+ pop %rsi
+ pop %rdi
+ pop %r8
+ pop %r9
+ pop %r10
+ pop %r11
+ pop %r12
+ pop %r13
+ pop %r14
+ pop %r15
+
+ /* Discard vector and error code. */
+ add $16, %rsp
+ iretq
+
+/*
+ * Build the handle_exception wrappers which push the vector/error code on the
+ * stack and an array of pointers to those wrappers.
+ */
+.pushsection .rodata
+.globl idt_handlers
+idt_handlers:
+.popsection
+
+.macro HANDLERS has_error from to
+ vector = \from
+ .rept \to - \from + 1
+ .align 8
+
+ /* Fetch current address and append it to idt_handlers. */
+666 :
+.pushsection .rodata
+ .quad 666b
+.popsection
+
+ .if ! \has_error
+ pushq $0
+ .endif
+ pushq $vector
+ jmp handle_exception
+ vector = vector + 1
+ .endr
+.endm
+
+.global idt_handler_code
+idt_handler_code:
+ HANDLERS has_error=0 from=0 to=7
+ HANDLERS has_error=1 from=8 to=8
+ HANDLERS has_error=0 from=9 to=9
+ HANDLERS has_error=1 from=10 to=14
+ HANDLERS has_error=0 from=15 to=16
+ HANDLERS has_error=1 from=17 to=17
+ HANDLERS has_error=0 from=18 to=255
+
+.section .note.GNU-stack, "", %progbits
diff --git a/tools/testing/selftests/kvm/lib/x86_64/hyperv.c b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
new file mode 100644
index 000000000000..efb7e7a1354d
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/hyperv.c
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Hyper-V specific functions.
+ *
+ * Copyright (C) 2021, Red Hat Inc.
+ */
+#include <stdint.h>
+#include "processor.h"
+#include "hyperv.h"
+
+struct hyperv_test_pages *vcpu_alloc_hyperv_test_pages(struct kvm_vm *vm,
+ vm_vaddr_t *p_hv_pages_gva)
+{
+ vm_vaddr_t hv_pages_gva = vm_vaddr_alloc_page(vm);
+ struct hyperv_test_pages *hv = addr_gva2hva(vm, hv_pages_gva);
+
+ /* Setup of a region of guest memory for the VP Assist page. */
+ hv->vp_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->vp_assist);
+ hv->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->vp_assist);
+
+ /* Setup of a region of guest memory for the partition assist page. */
+ hv->partition_assist = (void *)vm_vaddr_alloc_page(vm);
+ hv->partition_assist_hva = addr_gva2hva(vm, (uintptr_t)hv->partition_assist);
+ hv->partition_assist_gpa = addr_gva2gpa(vm, (uintptr_t)hv->partition_assist);
+
+ /* Setup of a region of guest memory for the enlightened VMCS. */
+ hv->enlightened_vmcs = (void *)vm_vaddr_alloc_page(vm);
+ hv->enlightened_vmcs_hva = addr_gva2hva(vm, (uintptr_t)hv->enlightened_vmcs);
+ hv->enlightened_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)hv->enlightened_vmcs);
+
+ *p_hv_pages_gva = hv_pages_gva;
+ return hv;
+}
+
+int enable_vp_assist(uint64_t vp_assist_pa, void *vp_assist)
+{
+ uint64_t val = (vp_assist_pa & HV_X64_MSR_VP_ASSIST_PAGE_ADDRESS_MASK) |
+ HV_X64_MSR_VP_ASSIST_PAGE_ENABLE;
+
+ wrmsr(HV_X64_MSR_VP_ASSIST_PAGE, val);
+
+ current_vp_assist = vp_assist;
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/memstress.c b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
new file mode 100644
index 000000000000..d61e623afc8c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/memstress.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * x86_64-specific extensions to memstress.c.
+ *
+ * Copyright (C) 2022, Google, Inc.
+ */
+#include <stdio.h>
+#include <stdlib.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "memstress.h"
+#include "processor.h"
+#include "vmx.h"
+
+void memstress_l2_guest_code(uint64_t vcpu_id)
+{
+ memstress_guest_code(vcpu_id);
+ vmcall();
+}
+
+extern char memstress_l2_guest_entry[];
+__asm__(
+"memstress_l2_guest_entry:"
+" mov (%rsp), %rdi;"
+" call memstress_l2_guest_code;"
+" ud2;"
+);
+
+static void memstress_l1_guest_code(struct vmx_pages *vmx, uint64_t vcpu_id)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ unsigned long *rsp;
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+ GUEST_ASSERT(ept_1g_pages_supported());
+
+ rsp = &l2_guest_stack[L2_GUEST_STACK_SIZE - 1];
+ *rsp = vcpu_id;
+ prepare_vmcs(vmx, memstress_l2_guest_entry, rsp);
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_DONE();
+}
+
+uint64_t memstress_nested_pages(int nr_vcpus)
+{
+ /*
+ * 513 page tables is enough to identity-map 256 TiB of L2 with 1G
+ * pages and 4-level paging, plus a few pages per-vCPU for data
+ * structures such as the VMCS.
+ */
+ return 513 + 10 * nr_vcpus;
+}
+
+void memstress_setup_ept(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ uint64_t start, end;
+
+ prepare_eptp(vmx, vm, 0);
+
+ /*
+ * Identity map the first 4G and the test region with 1G pages so that
+ * KVM can shadow the EPT12 with the maximum huge page size supported
+ * by the backing source.
+ */
+ nested_identity_map_1g(vmx, vm, 0, 0x100000000ULL);
+
+ start = align_down(memstress_args.gpa, PG_SIZE_1G);
+ end = align_up(memstress_args.gpa + memstress_args.size, PG_SIZE_1G);
+ nested_identity_map_1g(vmx, vm, start, end - start);
+}
+
+void memstress_setup_nested(struct kvm_vm *vm, int nr_vcpus, struct kvm_vcpu *vcpus[])
+{
+ struct vmx_pages *vmx, *vmx0 = NULL;
+ struct kvm_regs regs;
+ vm_vaddr_t vmx_gva;
+ int vcpu_id;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
+
+ for (vcpu_id = 0; vcpu_id < nr_vcpus; vcpu_id++) {
+ vmx = vcpu_alloc_vmx(vm, &vmx_gva);
+
+ if (vcpu_id == 0) {
+ memstress_setup_ept(vmx, vm);
+ vmx0 = vmx;
+ } else {
+ /* Share the same EPT table across all vCPUs. */
+ vmx->eptp = vmx0->eptp;
+ vmx->eptp_hva = vmx0->eptp_hva;
+ vmx->eptp_gpa = vmx0->eptp_gpa;
+ }
+
+ /*
+ * Override the vCPU to run memstress_l1_guest_code() which will
+ * bounce it into L2 before calling memstress_guest_code().
+ */
+ vcpu_regs_get(vcpus[vcpu_id], &regs);
+ regs.rip = (unsigned long) memstress_l1_guest_code;
+ vcpu_regs_set(vcpus[vcpu_id], &regs);
+ vcpu_args_set(vcpus[vcpu_id], 2, vmx_gva, vcpu_id);
+ }
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/pmu.c b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
new file mode 100644
index 000000000000..f31f0427c17c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/pmu.c
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#include <stdint.h>
+
+#include <linux/kernel.h>
+
+#include "kvm_util.h"
+#include "pmu.h"
+
+const uint64_t intel_pmu_arch_events[] = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+};
+kvm_static_assert(ARRAY_SIZE(intel_pmu_arch_events) == NR_INTEL_ARCH_EVENTS);
+
+const uint64_t amd_pmu_zen_events[] = {
+ AMD_ZEN_CORE_CYCLES,
+ AMD_ZEN_INSTRUCTIONS_RETIRED,
+ AMD_ZEN_BRANCHES_RETIRED,
+ AMD_ZEN_BRANCHES_MISPREDICTED,
+};
+kvm_static_assert(ARRAY_SIZE(amd_pmu_zen_events) == NR_AMD_ZEN_EVENTS);
diff --git a/tools/testing/selftests/kvm/lib/x86_64/processor.c b/tools/testing/selftests/kvm/lib/x86_64/processor.c
index f6eb34eaa0d2..74a4c736c9ae 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/processor.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/processor.c
@@ -5,80 +5,26 @@
* Copyright (C) 2018, Google LLC.
*/
-#define _GNU_SOURCE /* for program_invocation_name */
-
+#include "linux/bitmap.h"
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
+#include "sev.h"
+
+#ifndef NUM_INTERRUPTS
+#define NUM_INTERRUPTS 256
+#endif
+
+#define DEFAULT_CODE_SELECTOR 0x8
+#define DEFAULT_DATA_SELECTOR 0x10
-/* Minimum physical address used for virtual translation tables. */
-#define KVM_GUEST_PAGE_TABLE_MIN_PADDR 0x180000
-
-/* Virtual translation table structure declarations */
-struct pageMapL4Entry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryPointerEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageDirectoryEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t ignored_06:1;
- uint64_t page_size:1;
- uint64_t ignored_11_08:4;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-struct pageTableEntry {
- uint64_t present:1;
- uint64_t writable:1;
- uint64_t user:1;
- uint64_t write_through:1;
- uint64_t cache_disable:1;
- uint64_t accessed:1;
- uint64_t dirty:1;
- uint64_t reserved_07:1;
- uint64_t global:1;
- uint64_t ignored_11_09:3;
- uint64_t address:40;
- uint64_t ignored_62_52:11;
- uint64_t execute_disable:1;
-};
-
-void regs_dump(FILE *stream, struct kvm_regs *regs,
- uint8_t indent)
+#define MAX_NR_CPUID_ENTRIES 100
+
+vm_vaddr_t exception_handlers;
+bool host_cpu_is_amd;
+bool host_cpu_is_intel;
+
+static void regs_dump(FILE *stream, struct kvm_regs *regs, uint8_t indent)
{
fprintf(stream, "%*srax: 0x%.16llx rbx: 0x%.16llx "
"rcx: 0x%.16llx rdx: 0x%.16llx\n",
@@ -101,21 +47,6 @@ void regs_dump(FILE *stream, struct kvm_regs *regs,
regs->rip, regs->rflags);
}
-/*
- * Segment Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * segment - KVM segment
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM segment given by @segment, to the FILE stream
- * given by @stream.
- */
static void segment_dump(FILE *stream, struct kvm_segment *segment,
uint8_t indent)
{
@@ -133,21 +64,6 @@ static void segment_dump(FILE *stream, struct kvm_segment *segment,
segment->unusable, segment->padding);
}
-/*
- * dtable Dump
- *
- * Input Args:
- * stream - Output FILE stream
- * dtable - KVM dtable
- * indent - Left margin indent amount
- *
- * Output Args: None
- *
- * Return: None
- *
- * Dumps the state of the KVM dtable given by @dtable, to the FILE stream
- * given by @stream.
- */
static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
uint8_t indent)
{
@@ -157,8 +73,7 @@ static void dtable_dump(FILE *stream, struct kvm_dtable *dtable,
dtable->padding[0], dtable->padding[1], dtable->padding[2]);
}
-void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
- uint8_t indent)
+static void sregs_dump(FILE *stream, struct kvm_sregs *sregs, uint8_t indent)
{
unsigned int i;
@@ -200,97 +115,217 @@ void sregs_dump(FILE *stream, struct kvm_sregs *sregs,
}
}
-void virt_pgd_alloc(struct kvm_vm *vm, uint32_t pgd_memslot)
+bool kvm_is_tdp_enabled(void)
+{
+ if (host_cpu_is_intel)
+ return get_kvm_intel_param_bool("ept");
+ else
+ return get_kvm_amd_param_bool("npt");
+}
+
+void virt_arch_pgd_alloc(struct kvm_vm *vm)
{
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
/* If needed, create page map l4 table. */
if (!vm->pgd_created) {
- vm_paddr_t paddr = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot);
- vm->pgd = paddr;
+ vm->pgd = vm_alloc_page_table(vm);
vm->pgd_created = true;
}
}
-void virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
- uint32_t pgd_memslot)
+static void *virt_get_pte(struct kvm_vm *vm, uint64_t *parent_pte,
+ uint64_t vaddr, int level)
{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
+ uint64_t pt_gpa = PTE_GET_PA(*parent_pte);
+ uint64_t *page_table = addr_gpa2hva(vm, pt_gpa);
+ int index = (vaddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+
+ TEST_ASSERT((*parent_pte & PTE_PRESENT_MASK) || parent_pte == &vm->pgd,
+ "Parent PTE (level %d) not PRESENT for gva: 0x%08lx",
+ level + 1, vaddr);
+
+ return &page_table[index];
+}
+
+static uint64_t *virt_create_upper_pte(struct kvm_vm *vm,
+ uint64_t *parent_pte,
+ uint64_t vaddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
+{
+ uint64_t *pte = virt_get_pte(vm, parent_pte, vaddr, current_level);
+
+ paddr = vm_untag_gpa(vm, paddr);
+
+ if (!(*pte & PTE_PRESENT_MASK)) {
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK;
+ if (current_level == target_level)
+ *pte |= PTE_LARGE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+ else
+ *pte |= vm_alloc_page_table(vm) & PHYSICAL_PAGE_MASK;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ TEST_ASSERT(!(*pte & PTE_LARGE_MASK),
+ "Cannot create page table at level: %u, vaddr: 0x%lx",
+ current_level, vaddr);
+ }
+ return pte;
+}
+
+void __virt_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr, int level)
+{
+ const uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t *pml4e, *pdpe, *pde;
+ uint64_t *pte;
+
+ TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K,
+ "Unknown or unsupported guest mode, mode: 0x%x", vm->mode);
+
+ TEST_ASSERT((vaddr % pg_size) == 0,
+ "Virtual address not aligned,\n"
+ "vaddr: 0x%lx page size: 0x%lx", vaddr, pg_size);
+ TEST_ASSERT(sparsebit_is_set(vm->vpages_valid, (vaddr >> vm->page_shift)),
+ "Invalid virtual address, vaddr: 0x%lx", vaddr);
+ TEST_ASSERT((paddr % pg_size) == 0,
+ "Physical address not aligned,\n"
+ " paddr: 0x%lx page size: 0x%lx", paddr, pg_size);
+ TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
+ "Physical address beyond maximum supported,\n"
+ " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
+ paddr, vm->max_gfn, vm->page_size);
+ TEST_ASSERT(vm_untag_gpa(vm, paddr) == paddr,
+ "Unexpected bits in paddr: %lx", paddr);
+
+ /*
+ * Allocate upper level page tables, if not already present. Return
+ * early if a hugepage was created.
+ */
+ pml4e = virt_create_upper_pte(vm, &vm->pgd, vaddr, paddr, PG_LEVEL_512G, level);
+ if (*pml4e & PTE_LARGE_MASK)
+ return;
+
+ pdpe = virt_create_upper_pte(vm, pml4e, vaddr, paddr, PG_LEVEL_1G, level);
+ if (*pdpe & PTE_LARGE_MASK)
+ return;
+
+ pde = virt_create_upper_pte(vm, pdpe, vaddr, paddr, PG_LEVEL_2M, level);
+ if (*pde & PTE_LARGE_MASK)
+ return;
+
+ /* Fill in page table entry. */
+ pte = virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
+ TEST_ASSERT(!(*pte & PTE_PRESENT_MASK),
+ "PTE already present for 4k page at vaddr: 0x%lx", vaddr);
+ *pte = PTE_PRESENT_MASK | PTE_WRITABLE_MASK | (paddr & PHYSICAL_PAGE_MASK);
+
+ /*
+ * Neither SEV nor TDX supports shared page tables, so only the final
+ * leaf PTE needs manually set the C/S-bit.
+ */
+ if (vm_is_gpa_protected(vm, paddr))
+ *pte |= vm->arch.c_bit;
+ else
+ *pte |= vm->arch.s_bit;
+}
+
+void virt_arch_pg_map(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr)
+{
+ __virt_pg_map(vm, vaddr, paddr, PG_LEVEL_4K);
+}
+
+void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
+ uint64_t nr_bytes, int level)
+{
+ uint64_t pg_size = PG_LEVEL_SIZE(level);
+ uint64_t nr_pages = nr_bytes / pg_size;
+ int i;
+
+ TEST_ASSERT(nr_bytes % pg_size == 0,
+ "Region size not aligned: nr_bytes: 0x%lx, page size: 0x%lx",
+ nr_bytes, pg_size);
+
+ for (i = 0; i < nr_pages; i++) {
+ __virt_pg_map(vm, vaddr, paddr, level);
+
+ vaddr += pg_size;
+ paddr += pg_size;
+ }
+}
+
+static bool vm_is_target_pte(uint64_t *pte, int *level, int current_level)
+{
+ if (*pte & PTE_LARGE_MASK) {
+ TEST_ASSERT(*level == PG_LEVEL_NONE ||
+ *level == current_level,
+ "Unexpected hugepage at level %d", current_level);
+ *level = current_level;
+ }
+
+ return *level == current_level;
+}
+
+uint64_t *__vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr,
+ int *level)
+{
+ uint64_t *pml4e, *pdpe, *pde;
+
+ TEST_ASSERT(!vm->arch.is_pt_protected,
+ "Walking page tables of protected guests is impossible");
+
+ TEST_ASSERT(*level >= PG_LEVEL_NONE && *level < PG_LEVEL_NUM,
+ "Invalid PG_LEVEL_* '%d'", *level);
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- TEST_ASSERT((vaddr % vm->page_size) == 0,
- "Virtual address not on page boundary,\n"
- " vaddr: 0x%lx vm->page_size: 0x%x",
- vaddr, vm->page_size);
TEST_ASSERT(sparsebit_is_set(vm->vpages_valid,
(vaddr >> vm->page_shift)),
"Invalid virtual address, vaddr: 0x%lx",
vaddr);
- TEST_ASSERT((paddr % vm->page_size) == 0,
- "Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
- TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
- "Physical address beyond beyond maximum supported,\n"
- " paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
- paddr, vm->max_gfn, vm->page_size);
-
- index[0] = (vaddr >> 12) & 0x1ffu;
- index[1] = (vaddr >> 21) & 0x1ffu;
- index[2] = (vaddr >> 30) & 0x1ffu;
- index[3] = (vaddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].present = true;
- }
+ /*
+ * Based on the mode check above there are 48 bits in the vaddr, so
+ * shift 16 to sign extend the last bit (bit-47),
+ */
+ TEST_ASSERT(vaddr == (((int64_t)vaddr << 16) >> 16),
+ "Canonical check failed. The virtual address is invalid.");
- /* Allocate page directory table if not present. */
- struct pageDirectoryPointerEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].present = true;
- }
+ pml4e = virt_get_pte(vm, &vm->pgd, vaddr, PG_LEVEL_512G);
+ if (vm_is_target_pte(pml4e, level, PG_LEVEL_512G))
+ return pml4e;
- /* Allocate page table if not present. */
- struct pageDirectoryEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_GUEST_PAGE_TABLE_MIN_PADDR, pgd_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].present = true;
- }
+ pdpe = virt_get_pte(vm, pml4e, vaddr, PG_LEVEL_1G);
+ if (vm_is_target_pte(pdpe, level, PG_LEVEL_1G))
+ return pdpe;
- /* Fill in page table entry. */
- struct pageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].present = 1;
+ pde = virt_get_pte(vm, pdpe, vaddr, PG_LEVEL_2M);
+ if (vm_is_target_pte(pde, level, PG_LEVEL_2M))
+ return pde;
+
+ return virt_get_pte(vm, pde, vaddr, PG_LEVEL_4K);
}
-void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+uint64_t *vm_get_page_table_entry(struct kvm_vm *vm, uint64_t vaddr)
{
- struct pageMapL4Entry *pml4e, *pml4e_start;
- struct pageDirectoryPointerEntry *pdpe, *pdpe_start;
- struct pageDirectoryEntry *pde, *pde_start;
- struct pageTableEntry *pte, *pte_start;
+ int level = PG_LEVEL_4K;
+
+ return __vm_get_page_table_entry(vm, vaddr, &level);
+}
+
+void virt_arch_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
+{
+ uint64_t *pml4e, *pml4e_start;
+ uint64_t *pdpe, *pdpe_start;
+ uint64_t *pde, *pde_start;
+ uint64_t *pte, *pte_start;
if (!vm->pgd_created)
return;
@@ -300,62 +335,58 @@ void virt_dump(FILE *stream, struct kvm_vm *vm, uint8_t indent)
fprintf(stream, "%*s index hvaddr gpaddr "
"addr w exec dirty\n",
indent, "");
- pml4e_start = (struct pageMapL4Entry *) addr_gpa2hva(vm,
- vm->pgd);
+ pml4e_start = (uint64_t *) addr_gpa2hva(vm, vm->pgd);
for (uint16_t n1 = 0; n1 <= 0x1ffu; n1++) {
pml4e = &pml4e_start[n1];
- if (!pml4e->present)
+ if (!(*pml4e & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10lx %u "
+ fprintf(stream, "%*spml4e 0x%-3zx %p 0x%-12lx 0x%-10llx %u "
" %u\n",
indent, "",
pml4e - pml4e_start, pml4e,
- addr_hva2gpa(vm, pml4e), (uint64_t) pml4e->address,
- pml4e->writable, pml4e->execute_disable);
+ addr_hva2gpa(vm, pml4e), PTE_GET_PFN(*pml4e),
+ !!(*pml4e & PTE_WRITABLE_MASK), !!(*pml4e & PTE_NX_MASK));
- pdpe_start = addr_gpa2hva(vm, pml4e->address
- * vm->page_size);
+ pdpe_start = addr_gpa2hva(vm, *pml4e & PHYSICAL_PAGE_MASK);
for (uint16_t n2 = 0; n2 <= 0x1ffu; n2++) {
pdpe = &pdpe_start[n2];
- if (!pdpe->present)
+ if (!(*pdpe & PTE_PRESENT_MASK))
continue;
- fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10lx "
+ fprintf(stream, "%*spdpe 0x%-3zx %p 0x%-12lx 0x%-10llx "
"%u %u\n",
indent, "",
pdpe - pdpe_start, pdpe,
addr_hva2gpa(vm, pdpe),
- (uint64_t) pdpe->address, pdpe->writable,
- pdpe->execute_disable);
+ PTE_GET_PFN(*pdpe), !!(*pdpe & PTE_WRITABLE_MASK),
+ !!(*pdpe & PTE_NX_MASK));
- pde_start = addr_gpa2hva(vm,
- pdpe->address * vm->page_size);
+ pde_start = addr_gpa2hva(vm, *pdpe & PHYSICAL_PAGE_MASK);
for (uint16_t n3 = 0; n3 <= 0x1ffu; n3++) {
pde = &pde_start[n3];
- if (!pde->present)
+ if (!(*pde & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spde 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u\n",
+ "0x%-12lx 0x%-10llx %u %u\n",
indent, "", pde - pde_start, pde,
addr_hva2gpa(vm, pde),
- (uint64_t) pde->address, pde->writable,
- pde->execute_disable);
+ PTE_GET_PFN(*pde), !!(*pde & PTE_WRITABLE_MASK),
+ !!(*pde & PTE_NX_MASK));
- pte_start = addr_gpa2hva(vm,
- pde->address * vm->page_size);
+ pte_start = addr_gpa2hva(vm, *pde & PHYSICAL_PAGE_MASK);
for (uint16_t n4 = 0; n4 <= 0x1ffu; n4++) {
pte = &pte_start[n4];
- if (!pte->present)
+ if (!(*pte & PTE_PRESENT_MASK))
continue;
fprintf(stream, "%*spte 0x%-3zx %p "
- "0x%-12lx 0x%-10lx %u %u "
+ "0x%-12lx 0x%-10llx %u %u "
" %u 0x%-10lx\n",
indent, "",
pte - pte_start, pte,
addr_hva2gpa(vm, pte),
- (uint64_t) pte->address,
- pte->writable,
- pte->execute_disable,
- pte->dirty,
+ PTE_GET_PFN(*pte),
+ !!(*pte & PTE_WRITABLE_MASK),
+ !!(*pte & PTE_NX_MASK),
+ !!(*pte & PTE_DIRTY_MASK),
((uint64_t) n1 << 27)
| ((uint64_t) n2 << 18)
| ((uint64_t) n3 << 9)
@@ -392,11 +423,12 @@ static void kvm_seg_fill_gdt_64bit(struct kvm_vm *vm, struct kvm_segment *segp)
desc->limit0 = segp->limit & 0xFFFF;
desc->base0 = segp->base & 0xFFFF;
desc->base1 = segp->base >> 16;
- desc->s = segp->s;
desc->type = segp->type;
+ desc->s = segp->s;
desc->dpl = segp->dpl;
desc->p = segp->present;
desc->limit1 = segp->limit >> 16;
+ desc->avl = segp->avl;
desc->l = segp->l;
desc->db = segp->db;
desc->g = segp->g;
@@ -469,65 +501,35 @@ static void kvm_seg_set_kernel_data_64bit(struct kvm_vm *vm, uint16_t selector,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-vm_paddr_t addr_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
+vm_paddr_t addr_arch_gva2gpa(struct kvm_vm *vm, vm_vaddr_t gva)
{
- uint16_t index[4];
- struct pageMapL4Entry *pml4e;
- struct pageDirectoryPointerEntry *pdpe;
- struct pageDirectoryEntry *pde;
- struct pageTableEntry *pte;
-
- TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
- "unknown or unsupported guest mode, mode: 0x%x", vm->mode);
-
- index[0] = (gva >> 12) & 0x1ffu;
- index[1] = (gva >> 21) & 0x1ffu;
- index[2] = (gva >> 30) & 0x1ffu;
- index[3] = (gva >> 39) & 0x1ffu;
-
- if (!vm->pgd_created)
- goto unmapped_gva;
- pml4e = addr_gpa2hva(vm, vm->pgd);
- if (!pml4e[index[3]].present)
- goto unmapped_gva;
-
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].present)
- goto unmapped_gva;
+ int level = PG_LEVEL_NONE;
+ uint64_t *pte = __vm_get_page_table_entry(vm, gva, &level);
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].present)
- goto unmapped_gva;
+ TEST_ASSERT(*pte & PTE_PRESENT_MASK,
+ "Leaf PTE not PRESENT for gva: 0x%08lx", gva);
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- if (!pte[index[0]].present)
- goto unmapped_gva;
-
- return (pte[index[0]].address * vm->page_size) + (gva & 0xfffu);
-
-unmapped_gva:
- TEST_FAIL("No mapping for vm virtual address, gva: 0x%lx", gva);
- exit(EXIT_FAILURE);
+ /*
+ * No need for a hugepage mask on the PTE, x86-64 requires the "unused"
+ * address bits to be zero.
+ */
+ return vm_untag_gpa(vm, PTE_GET_PA(*pte)) | (gva & ~HUGEPAGE_MASK(level));
}
-static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt, int gdt_memslot,
- int pgd_memslot)
+static void kvm_setup_gdt(struct kvm_vm *vm, struct kvm_dtable *dt)
{
if (!vm->gdt)
- vm->gdt = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->gdt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
dt->base = vm->gdt;
dt->limit = getpagesize();
}
static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
- int selector, int gdt_memslot,
- int pgd_memslot)
+ int selector)
{
if (!vm->tss)
- vm->tss = vm_vaddr_alloc(vm, getpagesize(),
- KVM_UTIL_MIN_VADDR, gdt_memslot, pgd_memslot);
+ vm->tss = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
memset(segp, 0, sizeof(*segp));
segp->base = vm->tss;
@@ -538,16 +540,16 @@ static void kvm_setup_tss_64bit(struct kvm_vm *vm, struct kvm_segment *segp,
kvm_seg_fill_gdt_64bit(vm, segp);
}
-static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_memslot)
+static void vcpu_setup(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
struct kvm_sregs sregs;
/* Set mode specific system register values. */
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.idt.limit = 0;
- kvm_setup_gdt(vm, &sregs.gdt, gdt_memslot, pgd_memslot);
+ kvm_setup_gdt(vm, &sregs.gdt);
switch (vm->mode) {
case VM_MODE_PXXV48_4K:
@@ -556,10 +558,10 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
sregs.efer |= (EFER_LME | EFER_LMA | EFER_NX);
kvm_seg_set_unusable(&sregs.ldt);
- kvm_seg_set_kernel_code_64bit(vm, 0x8, &sregs.cs);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.ds);
- kvm_seg_set_kernel_data_64bit(vm, 0x10, &sregs.es);
- kvm_setup_tss_64bit(vm, &sregs.tr, 0x18, gdt_memslot, pgd_memslot);
+ kvm_seg_set_kernel_code_64bit(vm, DEFAULT_CODE_SELECTOR, &sregs.cs);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.ds);
+ kvm_seg_set_kernel_data_64bit(vm, DEFAULT_DATA_SELECTOR, &sregs.es);
+ kvm_setup_tss_64bit(vm, &sregs.tr, 0x18);
break;
default:
@@ -567,293 +569,305 @@ static void vcpu_setup(struct kvm_vm *vm, int vcpuid, int pgd_memslot, int gdt_m
}
sregs.cr3 = vm->pgd;
- vcpu_sregs_set(vm, vcpuid, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+void kvm_arch_vm_post_create(struct kvm_vm *vm)
+{
+ vm_create_irqchip(vm);
+ sync_global_to_guest(vm, host_cpu_is_intel);
+ sync_global_to_guest(vm, host_cpu_is_amd);
+
+ if (vm->subtype == VM_SUBTYPE_SEV)
+ sev_vm_init(vm);
+ else if (vm->subtype == VM_SUBTYPE_SEV_ES)
+ sev_es_vm_init(vm);
}
-void vm_vcpu_add_default(struct kvm_vm *vm, uint32_t vcpuid, void *guest_code)
+void vcpu_arch_set_entry_point(struct kvm_vcpu *vcpu, void *guest_code)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip = (unsigned long) guest_code;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+struct kvm_vcpu *vm_arch_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id)
{
struct kvm_mp_state mp_state;
struct kvm_regs regs;
vm_vaddr_t stack_vaddr;
- stack_vaddr = vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
- DEFAULT_GUEST_STACK_VADDR_MIN, 0, 0);
+ struct kvm_vcpu *vcpu;
- /* Create VCPU */
- vm_vcpu_add(vm, vcpuid);
- vcpu_setup(vm, vcpuid, 0, 0);
+ stack_vaddr = __vm_vaddr_alloc(vm, DEFAULT_STACK_PGS * getpagesize(),
+ DEFAULT_GUEST_STACK_VADDR_MIN,
+ MEM_REGION_DATA);
+
+ stack_vaddr += DEFAULT_STACK_PGS * getpagesize();
+
+ /*
+ * Align stack to match calling sequence requirements in section "The
+ * Stack Frame" of the System V ABI AMD64 Architecture Processor
+ * Supplement, which requires the value (%rsp + 8) to be a multiple of
+ * 16 when control is transferred to the function entry point.
+ *
+ * If this code is ever used to launch a vCPU with 32-bit entry point it
+ * may need to subtract 4 bytes instead of 8 bytes.
+ */
+ TEST_ASSERT(IS_ALIGNED(stack_vaddr, PAGE_SIZE),
+ "__vm_vaddr_alloc() did not provide a page-aligned address");
+ stack_vaddr -= 8;
+
+ vcpu = __vm_vcpu_add(vm, vcpu_id);
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
+ vcpu_setup(vm, vcpu);
/* Setup guest general purpose registers */
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs.rflags = regs.rflags | 0x2;
- regs.rsp = stack_vaddr + (DEFAULT_STACK_PGS * getpagesize());
- regs.rip = (unsigned long) guest_code;
- vcpu_regs_set(vm, vcpuid, &regs);
+ regs.rsp = stack_vaddr;
+ vcpu_regs_set(vcpu, &regs);
/* Setup the MP state */
mp_state.mp_state = 0;
- vcpu_set_mp_state(vm, vcpuid, &mp_state);
+ vcpu_mp_state_set(vcpu, &mp_state);
+
+ return vcpu;
}
-/*
- * Allocate an instance of struct kvm_cpuid2
- *
- * Input Args: None
- *
- * Output Args: None
- *
- * Return: A pointer to the allocated struct. The caller is responsible
- * for freeing this struct.
- *
- * Since kvm_cpuid2 uses a 0-length array to allow a the size of the
- * array to be decided at allocation time, allocation is slightly
- * complicated. This function uses a reasonable default length for
- * the array and performs the appropriate allocation.
- */
-static struct kvm_cpuid2 *allocate_kvm_cpuid2(void)
-{
- struct kvm_cpuid2 *cpuid;
- int nent = 100;
- size_t size;
-
- size = sizeof(*cpuid);
- size += nent * sizeof(struct kvm_cpuid_entry2);
- cpuid = malloc(size);
- if (!cpuid) {
- perror("malloc");
- abort();
- }
+struct kvm_vcpu *vm_arch_vcpu_recreate(struct kvm_vm *vm, uint32_t vcpu_id)
+{
+ struct kvm_vcpu *vcpu = __vm_vcpu_add(vm, vcpu_id);
- cpuid->nent = nent;
+ vcpu_init_cpuid(vcpu, kvm_get_supported_cpuid());
- return cpuid;
+ return vcpu;
}
-/*
- * KVM Supported CPUID Get
- *
- * Input Args: None
- *
- * Output Args:
- *
- * Return: The supported KVM CPUID
- *
- * Get the guest CPUID supported by KVM.
- */
-struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
+void vcpu_arch_free(struct kvm_vcpu *vcpu)
+{
+ if (vcpu->cpuid)
+ free(vcpu->cpuid);
+}
+
+/* Do not use kvm_supported_cpuid directly except for validity checks. */
+static void *kvm_supported_cpuid;
+
+const struct kvm_cpuid2 *kvm_get_supported_cpuid(void)
{
- static struct kvm_cpuid2 *cpuid;
- int ret;
int kvm_fd;
- if (cpuid)
- return cpuid;
+ if (kvm_supported_cpuid)
+ return kvm_supported_cpuid;
- cpuid = allocate_kvm_cpuid2();
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ kvm_supported_cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
- ret = ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID, cpuid);
- TEST_ASSERT(ret == 0, "KVM_GET_SUPPORTED_CPUID failed %d %d\n",
- ret, errno);
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_CPUID,
+ (struct kvm_cpuid2 *)kvm_supported_cpuid);
close(kvm_fd);
- return cpuid;
+ return kvm_supported_cpuid;
}
-/*
- * Locate a cpuid entry.
- *
- * Input Args:
- * function: The function of the cpuid entry to find.
- * index: The index of the cpuid entry.
- *
- * Output Args: None
- *
- * Return: A pointer to the cpuid entry. Never returns NULL.
- */
-struct kvm_cpuid_entry2 *
-kvm_get_supported_cpuid_index(uint32_t function, uint32_t index)
+static uint32_t __kvm_cpu_has(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index,
+ uint8_t reg, uint8_t lo, uint8_t hi)
{
- struct kvm_cpuid2 *cpuid;
- struct kvm_cpuid_entry2 *entry = NULL;
+ const struct kvm_cpuid_entry2 *entry;
int i;
- cpuid = kvm_get_supported_cpuid();
for (i = 0; i < cpuid->nent; i++) {
- if (cpuid->entries[i].function == function &&
- cpuid->entries[i].index == index) {
- entry = &cpuid->entries[i];
- break;
- }
+ entry = &cpuid->entries[i];
+
+ /*
+ * The output registers in kvm_cpuid_entry2 are in alphabetical
+ * order, but kvm_x86_cpu_feature matches that mess, so yay
+ * pointer shenanigans!
+ */
+ if (entry->function == function && entry->index == index)
+ return ((&entry->eax)[reg] & GENMASK(hi, lo)) >> lo;
}
- TEST_ASSERT(entry, "Guest CPUID entry not found: (EAX=%x, ECX=%x).",
- function, index);
- return entry;
+ return 0;
}
-/*
- * VM VCPU CPUID Set
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU id
- * cpuid - The CPUID values to set.
- *
- * Output Args: None
- *
- * Return: void
- *
- * Set the VCPU's CPUID.
- */
-void vcpu_set_cpuid(struct kvm_vm *vm,
- uint32_t vcpuid, struct kvm_cpuid2 *cpuid)
+bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_feature feature)
+{
+ return __kvm_cpu_has(cpuid, feature.function, feature.index,
+ feature.reg, feature.bit, feature.bit);
+}
+
+uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
+ struct kvm_x86_cpu_property property)
+{
+ return __kvm_cpu_has(cpuid, property.function, property.index,
+ property.reg, property.lo_bit, property.hi_bit);
+}
+
+uint64_t kvm_get_feature_msr(uint64_t msr_index)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int rc;
+ struct {
+ struct kvm_msrs header;
+ struct kvm_msr_entry entry;
+ } buffer = {};
+ int r, kvm_fd;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
+ buffer.header.nmsrs = 1;
+ buffer.entry.index = msr_index;
+ kvm_fd = open_kvm_dev_path_or_exit();
- rc = ioctl(vcpu->fd, KVM_SET_CPUID2, cpuid);
- TEST_ASSERT(rc == 0, "KVM_SET_CPUID2 failed, rc: %i errno: %i",
- rc, errno);
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSRS, &buffer.header);
+ TEST_ASSERT(r == 1, KVM_IOCTL_ERROR(KVM_GET_MSRS, r));
+ close(kvm_fd);
+ return buffer.entry.data;
}
-struct kvm_vm *vm_create_default(uint32_t vcpuid, uint64_t extra_mem_pages,
- void *guest_code)
+void __vm_xsave_require_permission(uint64_t xfeature, const char *name)
{
- struct kvm_vm *vm;
- /*
- * For x86 the maximum page table size for a memory region
- * will be when only 4K pages are used. In that case the
- * total extra size for page tables (for extra N pages) will
- * be: N/512+N/512^2+N/512^3+... which is definitely smaller
- * than N/512*2.
- */
- uint64_t extra_pg_pages = extra_mem_pages / 512 * 2;
+ int kvm_fd;
+ u64 bitmask;
+ long rc;
+ struct kvm_device_attr attr = {
+ .group = 0,
+ .attr = KVM_X86_XCOMP_GUEST_SUPP,
+ .addr = (unsigned long) &bitmask,
+ };
- /* Create VM */
- vm = vm_create(VM_MODE_DEFAULT,
- DEFAULT_GUEST_PHY_PAGES + extra_pg_pages,
- O_RDWR);
+ TEST_ASSERT(!kvm_supported_cpuid,
+ "kvm_get_supported_cpuid() cannot be used before ARCH_REQ_XCOMP_GUEST_PERM");
- /* Setup guest code */
- kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+ TEST_ASSERT(is_power_of_2(xfeature),
+ "Dynamic XFeatures must be enabled one at a time");
- /* Setup IRQ Chip */
- vm_create_irqchip(vm);
+ kvm_fd = open_kvm_dev_path_or_exit();
+ rc = __kvm_ioctl(kvm_fd, KVM_GET_DEVICE_ATTR, &attr);
+ close(kvm_fd);
+
+ if (rc == -1 && (errno == ENXIO || errno == EINVAL))
+ __TEST_REQUIRE(0, "KVM_X86_XCOMP_GUEST_SUPP not supported");
- /* Add the first vCPU. */
- vm_vcpu_add_default(vm, vcpuid, guest_code);
+ TEST_ASSERT(rc == 0, "KVM_GET_DEVICE_ATTR(0, KVM_X86_XCOMP_GUEST_SUPP) error: %ld", rc);
- return vm;
+ __TEST_REQUIRE(bitmask & xfeature,
+ "Required XSAVE feature '%s' not supported", name);
+
+ TEST_REQUIRE(!syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_GUEST_PERM, ilog2(xfeature)));
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_GUEST_PERM, &bitmask);
+ TEST_ASSERT(rc == 0, "prctl(ARCH_GET_XCOMP_GUEST_PERM) error: %ld", rc);
+ TEST_ASSERT(bitmask & xfeature,
+ "'%s' (0x%lx) not permitted after prctl(ARCH_REQ_XCOMP_GUEST_PERM) permitted=0x%lx",
+ name, xfeature, bitmask);
}
-/*
- * VCPU Get MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- *
- * Output Args: None
- *
- * Return: On success, value of the MSR. On failure a TEST_ASSERT is produced.
- *
- * Get value of MSR for VCPU.
- */
-uint64_t vcpu_get_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index)
+void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid)
+{
+ TEST_ASSERT(cpuid != vcpu->cpuid, "@cpuid can't be the vCPU's CPUID");
+
+ /* Allow overriding the default CPUID. */
+ if (vcpu->cpuid && vcpu->cpuid->nent < cpuid->nent) {
+ free(vcpu->cpuid);
+ vcpu->cpuid = NULL;
+ }
+
+ if (!vcpu->cpuid)
+ vcpu->cpuid = allocate_kvm_cpuid2(cpuid->nent);
+
+ memcpy(vcpu->cpuid, cpuid, kvm_cpuid2_size(cpuid->nent));
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_property property,
+ uint32_t value)
+{
+ struct kvm_cpuid_entry2 *entry;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, property.function, property.index);
+
+ (&entry->eax)[property.reg] &= ~GENMASK(property.hi_bit, property.lo_bit);
+ (&entry->eax)[property.reg] |= value << property.lo_bit;
+
+ vcpu_set_cpuid(vcpu);
+
+ /* Sanity check that @value doesn't exceed the bounds in any way. */
+ TEST_ASSERT_EQ(kvm_cpuid_property(vcpu->cpuid, property), value);
+}
+
+void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function)
+{
+ struct kvm_cpuid_entry2 *entry = vcpu_get_cpuid_entry(vcpu, function);
+
+ entry->eax = 0;
+ entry->ebx = 0;
+ entry->ecx = 0;
+ entry->edx = 0;
+ vcpu_set_cpuid(vcpu);
+}
+
+void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
+ struct kvm_x86_cpu_feature feature,
+ bool set)
+{
+ struct kvm_cpuid_entry2 *entry;
+ u32 *reg;
+
+ entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
+ reg = (&entry->eax) + feature.reg;
+
+ if (set)
+ *reg |= BIT(feature.bit);
+ else
+ *reg &= ~BIT(feature.bit);
+
+ vcpu_set_cpuid(vcpu);
+}
+
+uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &buffer.header);
- TEST_ASSERT(r == 1, "KVM_GET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+
+ vcpu_msrs_get(vcpu, &buffer.header);
return buffer.entry.data;
}
-/*
- * _VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: The result of KVM_SET_MSRS.
- *
- * Sets the value of an MSR for the given VCPU.
- */
-int _vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
+int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
struct {
struct kvm_msrs header;
struct kvm_msr_entry entry;
} buffer = {};
- int r;
- TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
memset(&buffer, 0, sizeof(buffer));
buffer.header.nmsrs = 1;
buffer.entry.index = msr_index;
buffer.entry.data = msr_value;
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &buffer.header);
- return r;
-}
-/*
- * VCPU Set MSR
- *
- * Input Args:
- * vm - Virtual Machine
- * vcpuid - VCPU ID
- * msr_index - Index of MSR
- * msr_value - New value of MSR
- *
- * Output Args: None
- *
- * Return: On success, nothing. On failure a TEST_ASSERT is produced.
- *
- * Set value of MSR for VCPU.
- */
-void vcpu_set_msr(struct kvm_vm *vm, uint32_t vcpuid, uint64_t msr_index,
- uint64_t msr_value)
-{
- int r;
-
- r = _vcpu_set_msr(vm, vcpuid, msr_index, msr_value);
- TEST_ASSERT(r == 1, "KVM_SET_MSRS IOCTL failed,\n"
- " rc: %i errno: %i", r, errno);
+ return __vcpu_ioctl(vcpu, KVM_SET_MSRS, &buffer.header);
}
-void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
+void vcpu_args_set(struct kvm_vcpu *vcpu, unsigned int num, ...)
{
va_list ap;
struct kvm_regs regs;
TEST_ASSERT(num >= 1 && num <= 6, "Unsupported number of args,\n"
- " num: %u\n",
+ " num: %u",
num);
va_start(ap, num);
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
if (num >= 1)
regs.rdi = va_arg(ap, uint64_t);
@@ -873,86 +887,112 @@ void vcpu_args_set(struct kvm_vm *vm, uint32_t vcpuid, unsigned int num, ...)
if (num >= 6)
regs.r9 = va_arg(ap, uint64_t);
- vcpu_regs_set(vm, vcpuid, &regs);
+ vcpu_regs_set(vcpu, &regs);
va_end(ap);
}
-void vcpu_dump(FILE *stream, struct kvm_vm *vm, uint32_t vcpuid, uint8_t indent)
+void vcpu_arch_dump(FILE *stream, struct kvm_vcpu *vcpu, uint8_t indent)
{
struct kvm_regs regs;
struct kvm_sregs sregs;
- fprintf(stream, "%*scpuid: %u\n", indent, "", vcpuid);
+ fprintf(stream, "%*svCPU ID: %u\n", indent, "", vcpu->id);
fprintf(stream, "%*sregs:\n", indent + 2, "");
- vcpu_regs_get(vm, vcpuid, &regs);
+ vcpu_regs_get(vcpu, &regs);
regs_dump(stream, &regs, indent + 4);
fprintf(stream, "%*ssregs:\n", indent + 2, "");
- vcpu_sregs_get(vm, vcpuid, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs_dump(stream, &sregs, indent + 4);
}
-struct kvm_x86_state {
- struct kvm_vcpu_events events;
- struct kvm_mp_state mp_state;
- struct kvm_regs regs;
- struct kvm_xsave xsave;
- struct kvm_xcrs xcrs;
- struct kvm_sregs sregs;
- struct kvm_debugregs debugregs;
- union {
- struct kvm_nested_state nested;
- char nested_[16384];
- };
- struct kvm_msrs msrs;
-};
-
-static int kvm_get_num_msrs_fd(int kvm_fd)
+static struct kvm_msr_list *__kvm_get_msr_index_list(bool feature_msrs)
{
+ struct kvm_msr_list *list;
struct kvm_msr_list nmsrs;
- int r;
+ int kvm_fd, r;
+
+ kvm_fd = open_kvm_dev_path_or_exit();
nmsrs.nmsrs = 0;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
- TEST_ASSERT(r == -1 && errno == E2BIG, "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
- r);
+ if (!feature_msrs)
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, &nmsrs);
+ else
+ r = __kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, &nmsrs);
+
+ TEST_ASSERT(r == -1 && errno == E2BIG,
+ "Expected -E2BIG, got rc: %i errno: %i (%s)",
+ r, errno, strerror(errno));
+
+ list = malloc(sizeof(*list) + nmsrs.nmsrs * sizeof(list->indices[0]));
+ TEST_ASSERT(list, "-ENOMEM when allocating MSR index list");
+ list->nmsrs = nmsrs.nmsrs;
+
+ if (!feature_msrs)
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+ else
+ kvm_ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+ close(kvm_fd);
- return nmsrs.nmsrs;
+ TEST_ASSERT(list->nmsrs == nmsrs.nmsrs,
+ "Number of MSRs in list changed, was %d, now %d",
+ nmsrs.nmsrs, list->nmsrs);
+ return list;
}
-static int kvm_get_num_msrs(struct kvm_vm *vm)
+const struct kvm_msr_list *kvm_get_msr_index_list(void)
{
- return kvm_get_num_msrs_fd(vm->kvm_fd);
+ static const struct kvm_msr_list *list;
+
+ if (!list)
+ list = __kvm_get_msr_index_list(false);
+ return list;
}
-struct kvm_msr_list *kvm_get_msr_index_list(void)
+
+const struct kvm_msr_list *kvm_get_feature_msr_index_list(void)
{
- struct kvm_msr_list *list;
- int nmsrs, r, kvm_fd;
+ static const struct kvm_msr_list *list;
- kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
- if (kvm_fd < 0)
- exit(KSFT_SKIP);
+ if (!list)
+ list = __kvm_get_msr_index_list(true);
+ return list;
+}
- nmsrs = kvm_get_num_msrs_fd(kvm_fd);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- close(kvm_fd);
+bool kvm_msr_is_in_save_restore_list(uint32_t msr_index)
+{
+ const struct kvm_msr_list *list = kvm_get_msr_index_list();
+ int i;
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
+ for (i = 0; i < list->nmsrs; ++i) {
+ if (list->indices[i] == msr_index)
+ return true;
+ }
- return list;
+ return false;
}
-struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
+static void vcpu_save_xsave_state(struct kvm_vcpu *vcpu,
+ struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- struct kvm_msr_list *list;
+ int size = vm_check_cap(vcpu->vm, KVM_CAP_XSAVE2);
+
+ if (size) {
+ state->xsave = malloc(size);
+ vcpu_xsave2_get(vcpu, state->xsave);
+ } else {
+ state->xsave = malloc(sizeof(struct kvm_xsave));
+ vcpu_xsave_get(vcpu, state->xsave);
+ }
+}
+
+struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu)
+{
+ const struct kvm_msr_list *msr_list = kvm_get_msr_index_list();
struct kvm_x86_state *state;
- int nmsrs, r, i;
+ int i;
+
static int nested_size = -1;
if (nested_size == -1) {
@@ -968,153 +1008,351 @@ struct kvm_x86_state *vcpu_save_state(struct kvm_vm *vm, uint32_t vcpuid)
* kernel with KVM_RUN. Complete IO prior to migrating state
* to a new VM.
*/
- vcpu_run_complete_io(vm, vcpuid);
-
- nmsrs = kvm_get_num_msrs(vm);
- list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
- list->nmsrs = nmsrs;
- r = ioctl(vm->kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MSR_INDEX_LIST, r: %i",
- r);
-
- state = malloc(sizeof(*state) + nmsrs * sizeof(state->msrs.entries[0]));
- r = ioctl(vcpu->fd, KVM_GET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_VCPU_EVENTS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_MP_STATE, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_REGS, r: %i",
- r);
-
- r = ioctl(vcpu->fd, KVM_GET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XSAVE, r: %i",
- r);
-
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_GET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_XCRS, r: %i",
- r);
- }
+ vcpu_run_complete_io(vcpu);
- r = ioctl(vcpu->fd, KVM_GET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_SREGS, r: %i",
- r);
+ state = malloc(sizeof(*state) + msr_list->nmsrs * sizeof(state->msrs.entries[0]));
+ TEST_ASSERT(state, "-ENOMEM when allocating kvm state");
+
+ vcpu_events_get(vcpu, &state->events);
+ vcpu_mp_state_get(vcpu, &state->mp_state);
+ vcpu_regs_get(vcpu, &state->regs);
+ vcpu_save_xsave_state(vcpu, state);
+
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_get(vcpu, &state->xcrs);
+
+ vcpu_sregs_get(vcpu, &state->sregs);
if (nested_size) {
state->nested.size = sizeof(state->nested_);
- r = ioctl(vcpu->fd, KVM_GET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_NESTED_STATE, r: %i",
- r);
+
+ vcpu_nested_state_get(vcpu, &state->nested);
TEST_ASSERT(state->nested.size <= nested_size,
- "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
- state->nested.size, nested_size);
- } else
+ "Nested state size too big, %i (KVM_CHECK_CAP gave %i)",
+ state->nested.size, nested_size);
+ } else {
state->nested.size = 0;
+ }
- state->msrs.nmsrs = nmsrs;
- for (i = 0; i < nmsrs; i++)
- state->msrs.entries[i].index = list->indices[i];
- r = ioctl(vcpu->fd, KVM_GET_MSRS, &state->msrs);
- TEST_ASSERT(r == nmsrs, "Unexpected result from KVM_GET_MSRS, r: %i (failed MSR was 0x%x)",
- r, r == nmsrs ? -1 : list->indices[r]);
+ state->msrs.nmsrs = msr_list->nmsrs;
+ for (i = 0; i < msr_list->nmsrs; i++)
+ state->msrs.entries[i].index = msr_list->indices[i];
+ vcpu_msrs_get(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_GET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_GET_DEBUGREGS, r: %i",
- r);
+ vcpu_debugregs_get(vcpu, &state->debugregs);
- free(list);
return state;
}
-void vcpu_load_state(struct kvm_vm *vm, uint32_t vcpuid, struct kvm_x86_state *state)
+void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state)
{
- struct vcpu *vcpu = vcpu_find(vm, vcpuid);
- int r;
+ vcpu_sregs_set(vcpu, &state->sregs);
+ vcpu_msrs_set(vcpu, &state->msrs);
- r = ioctl(vcpu->fd, KVM_SET_XSAVE, &state->xsave);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XSAVE, r: %i",
- r);
+ if (kvm_has_cap(KVM_CAP_XCRS))
+ vcpu_xcrs_set(vcpu, &state->xcrs);
+
+ vcpu_xsave_set(vcpu, state->xsave);
+ vcpu_events_set(vcpu, &state->events);
+ vcpu_mp_state_set(vcpu, &state->mp_state);
+ vcpu_debugregs_set(vcpu, &state->debugregs);
+ vcpu_regs_set(vcpu, &state->regs);
+
+ if (state->nested.size)
+ vcpu_nested_state_set(vcpu, &state->nested);
+}
+
+void kvm_x86_state_cleanup(struct kvm_x86_state *state)
+{
+ free(state->xsave);
+ free(state);
+}
+
+void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+{
+ if (!kvm_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR)) {
+ *pa_bits = kvm_cpu_has(X86_FEATURE_PAE) ? 36 : 32;
+ *va_bits = 32;
+ } else {
+ *pa_bits = kvm_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ *va_bits = kvm_cpu_property(X86_PROPERTY_MAX_VIRT_ADDR);
+ }
+}
- if (kvm_check_cap(KVM_CAP_XCRS)) {
- r = ioctl(vcpu->fd, KVM_SET_XCRS, &state->xcrs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_XCRS, r: %i",
- r);
+void kvm_init_vm_address_properties(struct kvm_vm *vm)
+{
+ if (vm->subtype == VM_SUBTYPE_SEV || vm->subtype == VM_SUBTYPE_SEV_ES) {
+ vm->arch.c_bit = BIT_ULL(this_cpu_property(X86_PROPERTY_SEV_C_BIT));
+ vm->gpa_tag_mask = vm->arch.c_bit;
}
+}
- r = ioctl(vcpu->fd, KVM_SET_SREGS, &state->sregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_SREGS, r: %i",
- r);
+static void set_idt_entry(struct kvm_vm *vm, int vector, unsigned long addr,
+ int dpl, unsigned short selector)
+{
+ struct idt_entry *base =
+ (struct idt_entry *)addr_gva2hva(vm, vm->idt);
+ struct idt_entry *e = &base[vector];
+
+ memset(e, 0, sizeof(*e));
+ e->offset0 = addr;
+ e->selector = selector;
+ e->ist = 0;
+ e->type = 14;
+ e->dpl = dpl;
+ e->p = 1;
+ e->offset1 = addr >> 16;
+ e->offset2 = addr >> 32;
+}
- r = ioctl(vcpu->fd, KVM_SET_MSRS, &state->msrs);
- TEST_ASSERT(r == state->msrs.nmsrs, "Unexpected result from KVM_SET_MSRS, r: %i (failed at %x)",
- r, r == state->msrs.nmsrs ? -1 : state->msrs.entries[r].index);
- r = ioctl(vcpu->fd, KVM_SET_VCPU_EVENTS, &state->events);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_VCPU_EVENTS, r: %i",
- r);
+static bool kvm_fixup_exception(struct ex_regs *regs)
+{
+ if (regs->r9 != KVM_EXCEPTION_MAGIC || regs->rip != regs->r10)
+ return false;
- r = ioctl(vcpu->fd, KVM_SET_MP_STATE, &state->mp_state);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_MP_STATE, r: %i",
- r);
+ if (regs->vector == DE_VECTOR)
+ return false;
- r = ioctl(vcpu->fd, KVM_SET_DEBUGREGS, &state->debugregs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_DEBUGREGS, r: %i",
- r);
+ regs->rip = regs->r11;
+ regs->r9 = regs->vector;
+ regs->r10 = regs->error_code;
+ return true;
+}
- r = ioctl(vcpu->fd, KVM_SET_REGS, &state->regs);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_REGS, r: %i",
- r);
+void route_exception(struct ex_regs *regs)
+{
+ typedef void(*handler)(struct ex_regs *);
+ handler *handlers = (handler *)exception_handlers;
- if (state->nested.size) {
- r = ioctl(vcpu->fd, KVM_SET_NESTED_STATE, &state->nested);
- TEST_ASSERT(r == 0, "Unexpected result from KVM_SET_NESTED_STATE, r: %i",
- r);
+ if (handlers && handlers[regs->vector]) {
+ handlers[regs->vector](regs);
+ return;
}
+
+ if (kvm_fixup_exception(regs))
+ return;
+
+ ucall_assert(UCALL_UNHANDLED,
+ "Unhandled exception in guest", __FILE__, __LINE__,
+ "Unhandled exception '0x%lx' at guest RIP '0x%lx'",
+ regs->vector, regs->rip);
}
-bool is_intel_cpu(void)
+void vm_init_descriptor_tables(struct kvm_vm *vm)
{
- int eax, ebx, ecx, edx;
- const uint32_t *chunk;
- const int leaf = 0;
+ extern void *idt_handlers;
+ int i;
+
+ vm->idt = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ vm->handlers = __vm_vaddr_alloc_page(vm, MEM_REGION_DATA);
+ /* Handlers have the same address in both address spaces.*/
+ for (i = 0; i < NUM_INTERRUPTS; i++)
+ set_idt_entry(vm, i, (unsigned long)(&idt_handlers)[i], 0,
+ DEFAULT_CODE_SELECTOR);
+}
- __asm__ __volatile__(
- "cpuid"
- : /* output */ "=a"(eax), "=b"(ebx),
- "=c"(ecx), "=d"(edx)
- : /* input */ "0"(leaf), "2"(0));
+void vcpu_init_descriptor_tables(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_sregs sregs;
- chunk = (const uint32_t *)("GenuineIntel");
- return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.idt.base = vm->idt;
+ sregs.idt.limit = NUM_INTERRUPTS * sizeof(struct idt_entry) - 1;
+ sregs.gdt.base = vm->gdt;
+ sregs.gdt.limit = getpagesize() - 1;
+ kvm_seg_set_kernel_data_64bit(NULL, DEFAULT_DATA_SELECTOR, &sregs.gs);
+ vcpu_sregs_set(vcpu, &sregs);
+ *(vm_vaddr_t *)addr_gva2hva(vm, (vm_vaddr_t)(&exception_handlers)) = vm->handlers;
}
-uint32_t kvm_get_cpuid_max_basic(void)
+void vm_install_exception_handler(struct kvm_vm *vm, int vector,
+ void (*handler)(struct ex_regs *))
{
- return kvm_get_supported_cpuid_entry(0)->eax;
+ vm_vaddr_t *handlers = (vm_vaddr_t *)addr_gva2hva(vm, vm->handlers);
+
+ handlers[vector] = (vm_vaddr_t)handler;
}
-uint32_t kvm_get_cpuid_max_extended(void)
+void assert_on_unhandled_exception(struct kvm_vcpu *vcpu)
{
- return kvm_get_supported_cpuid_entry(0x80000000)->eax;
+ struct ucall uc;
+
+ if (get_ucall(vcpu, &uc) == UCALL_UNHANDLED)
+ REPORT_GUEST_ASSERT(uc);
}
-void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits)
+const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
+ uint32_t function, uint32_t index)
{
- struct kvm_cpuid_entry2 *entry;
- bool pae;
+ int i;
- /* SDM 4.1.4 */
- if (kvm_get_cpuid_max_extended() < 0x80000008) {
- pae = kvm_get_supported_cpuid_entry(1)->edx & (1 << 6);
- *pa_bits = pae ? 36 : 32;
- *va_bits = 32;
- } else {
- entry = kvm_get_supported_cpuid_entry(0x80000008);
- *pa_bits = entry->eax & 0xff;
- *va_bits = (entry->eax >> 8) & 0xff;
+ for (i = 0; i < cpuid->nent; i++) {
+ if (cpuid->entries[i].function == function &&
+ cpuid->entries[i].index == index)
+ return &cpuid->entries[i];
+ }
+
+ TEST_FAIL("CPUID function 0x%x index 0x%x not found ", function, index);
+
+ return NULL;
+}
+
+#define X86_HYPERCALL(inputs...) \
+({ \
+ uint64_t r; \
+ \
+ asm volatile("test %[use_vmmcall], %[use_vmmcall]\n\t" \
+ "jnz 1f\n\t" \
+ "vmcall\n\t" \
+ "jmp 2f\n\t" \
+ "1: vmmcall\n\t" \
+ "2:" \
+ : "=a"(r) \
+ : [use_vmmcall] "r" (host_cpu_is_amd), inputs); \
+ \
+ r; \
+})
+
+uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
+ uint64_t a3)
+{
+ return X86_HYPERCALL("a"(nr), "b"(a0), "c"(a1), "d"(a2), "S"(a3));
+}
+
+uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ return X86_HYPERCALL("a"(nr), "D"(a0), "S"(a1));
+}
+
+void xen_hypercall(uint64_t nr, uint64_t a0, void *a1)
+{
+ GUEST_ASSERT(!__xen_hypercall(nr, a0, a1));
+}
+
+const struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(void)
+{
+ static struct kvm_cpuid2 *cpuid;
+ int kvm_fd;
+
+ if (cpuid)
+ return cpuid;
+
+ cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+ kvm_fd = open_kvm_dev_path_or_exit();
+
+ kvm_ioctl(kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ close(kvm_fd);
+ return cpuid;
+}
+
+void vcpu_set_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ static struct kvm_cpuid2 *cpuid_full;
+ const struct kvm_cpuid2 *cpuid_sys, *cpuid_hv;
+ int i, nent = 0;
+
+ if (!cpuid_full) {
+ cpuid_sys = kvm_get_supported_cpuid();
+ cpuid_hv = kvm_get_supported_hv_cpuid();
+
+ cpuid_full = allocate_kvm_cpuid2(cpuid_sys->nent + cpuid_hv->nent);
+ if (!cpuid_full) {
+ perror("malloc");
+ abort();
+ }
+
+ /* Need to skip KVM CPUID leaves 0x400000xx */
+ for (i = 0; i < cpuid_sys->nent; i++) {
+ if (cpuid_sys->entries[i].function >= 0x40000000 &&
+ cpuid_sys->entries[i].function < 0x40000100)
+ continue;
+ cpuid_full->entries[nent] = cpuid_sys->entries[i];
+ nent++;
+ }
+
+ memcpy(&cpuid_full->entries[nent], cpuid_hv->entries,
+ cpuid_hv->nent * sizeof(struct kvm_cpuid_entry2));
+ cpuid_full->nent = nent + cpuid_hv->nent;
}
+
+ vcpu_init_cpuid(vcpu, cpuid_full);
+}
+
+const struct kvm_cpuid2 *vcpu_get_supported_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(MAX_NR_CPUID_ENTRIES);
+
+ vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+
+ return cpuid;
+}
+
+unsigned long vm_compute_max_gfn(struct kvm_vm *vm)
+{
+ const unsigned long num_ht_pages = 12 << (30 - vm->page_shift); /* 12 GiB */
+ unsigned long ht_gfn, max_gfn, max_pfn;
+ uint8_t maxphyaddr;
+
+ max_gfn = (1ULL << (vm->pa_bits - vm->page_shift)) - 1;
+
+ /* Avoid reserved HyperTransport region on AMD processors. */
+ if (!host_cpu_is_amd)
+ return max_gfn;
+
+ /* On parts with <40 physical address bits, the area is fully hidden */
+ if (vm->pa_bits < 40)
+ return max_gfn;
+
+ /* Before family 17h, the HyperTransport area is just below 1T. */
+ ht_gfn = (1 << 28) - num_ht_pages;
+ if (this_cpu_family() < 0x17)
+ goto done;
+
+ /*
+ * Otherwise it's at the top of the physical address space, possibly
+ * reduced due to SME by bits 11:6 of CPUID[0x8000001f].EBX. Use
+ * the old conservative value if MAXPHYADDR is not enumerated.
+ */
+ if (!this_cpu_has_p(X86_PROPERTY_MAX_PHY_ADDR))
+ goto done;
+
+ maxphyaddr = this_cpu_property(X86_PROPERTY_MAX_PHY_ADDR);
+ max_pfn = (1ULL << (maxphyaddr - vm->page_shift)) - 1;
+
+ if (this_cpu_has_p(X86_PROPERTY_PHYS_ADDR_REDUCTION))
+ max_pfn >>= this_cpu_property(X86_PROPERTY_PHYS_ADDR_REDUCTION);
+
+ ht_gfn = max_pfn - num_ht_pages;
+done:
+ return min(max_gfn, ht_gfn - 1);
+}
+
+/* Returns true if kvm_intel was loaded with unrestricted_guest=1. */
+bool vm_is_unrestricted_guest(struct kvm_vm *vm)
+{
+ /* Ensure that a KVM vendor-specific module is loaded. */
+ if (vm == NULL)
+ close(open_kvm_dev_path_or_exit());
+
+ return get_kvm_intel_param_bool("unrestricted_guest");
+}
+
+void kvm_selftest_arch_init(void)
+{
+ host_cpu_is_intel = this_cpu_is_intel();
+ host_cpu_is_amd = this_cpu_is_amd();
+}
+
+bool sys_clocksource_is_based_on_tsc(void)
+{
+ char *clk_name = sys_get_cur_clocksource();
+ bool ret = !strcmp(clk_name, "tsc\n") ||
+ !strcmp(clk_name, "hyperv_clocksource_tsc_page\n");
+
+ free(clk_name);
+
+ return ret;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/sev.c b/tools/testing/selftests/kvm/lib/x86_64/sev.c
new file mode 100644
index 000000000000..e248d3364b9c
--- /dev/null
+++ b/tools/testing/selftests/kvm/lib/x86_64/sev.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "sev.h"
+
+/*
+ * sparsebit_next_clear() can return 0 if [x, 2**64-1] are all set, and the
+ * -1 would then cause an underflow back to 2**64 - 1. This is expected and
+ * correct.
+ *
+ * If the last range in the sparsebit is [x, y] and we try to iterate,
+ * sparsebit_next_set() will return 0, and sparsebit_next_clear() will try
+ * and find the first range, but that's correct because the condition
+ * expression would cause us to quit the loop.
+ */
+static void encrypt_region(struct kvm_vm *vm, struct userspace_mem_region *region)
+{
+ const struct sparsebit *protected_phy_pages = region->protected_phy_pages;
+ const vm_paddr_t gpa_base = region->region.guest_phys_addr;
+ const sparsebit_idx_t lowest_page_in_region = gpa_base >> vm->page_shift;
+ sparsebit_idx_t i, j;
+
+ if (!sparsebit_any_set(protected_phy_pages))
+ return;
+
+ sev_register_encrypted_memory(vm, region);
+
+ sparsebit_for_each_set_range(protected_phy_pages, i, j) {
+ const uint64_t size = (j - i + 1) * vm->page_size;
+ const uint64_t offset = (i - lowest_page_in_region) * vm->page_size;
+
+ sev_launch_update_data(vm, gpa_base + offset, size);
+ }
+}
+
+void sev_vm_launch(struct kvm_vm *vm, uint32_t policy)
+{
+ struct kvm_sev_launch_start launch_start = {
+ .policy = policy,
+ };
+ struct userspace_mem_region *region;
+ struct kvm_sev_guest_status status;
+ int ctr;
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_START, &launch_start);
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+
+ TEST_ASSERT_EQ(status.policy, policy);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_LAUNCH_UPDATE);
+
+ hash_for_each(vm->regions.slot_hash, ctr, region, slot_node)
+ encrypt_region(vm, region);
+
+ if (policy & SEV_POLICY_ES)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ vm->arch.is_pt_protected = true;
+}
+
+void sev_vm_launch_measure(struct kvm_vm *vm, uint8_t *measurement)
+{
+ struct kvm_sev_launch_measure launch_measure;
+ struct kvm_sev_guest_status guest_status;
+
+ launch_measure.len = 256;
+ launch_measure.uaddr = (__u64)measurement;
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_MEASURE, &launch_measure);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &guest_status);
+ TEST_ASSERT_EQ(guest_status.state, SEV_GUEST_STATE_LAUNCH_SECRET);
+}
+
+void sev_vm_launch_finish(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT(status.state == SEV_GUEST_STATE_LAUNCH_UPDATE ||
+ status.state == SEV_GUEST_STATE_LAUNCH_SECRET,
+ "Unexpected guest state: %d", status.state);
+
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_FINISH, NULL);
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+ TEST_ASSERT_EQ(status.state, SEV_GUEST_STATE_RUNNING);
+}
+
+struct kvm_vm *vm_sev_create_with_one_vcpu(uint32_t policy, void *guest_code,
+ struct kvm_vcpu **cpu)
+{
+ struct vm_shape shape = {
+ .type = VM_TYPE_DEFAULT,
+ .mode = VM_MODE_DEFAULT,
+ .subtype = policy & SEV_POLICY_ES ? VM_SUBTYPE_SEV_ES :
+ VM_SUBTYPE_SEV,
+ };
+ struct kvm_vm *vm;
+ struct kvm_vcpu *cpus[1];
+ uint8_t measurement[512];
+
+ vm = __vm_create_with_vcpus(shape, 1, 0, guest_code, cpus);
+ *cpu = cpus[0];
+
+ sev_vm_launch(vm, policy);
+
+ /* TODO: Validate the measurement is as expected. */
+ sev_vm_launch_measure(vm, measurement);
+
+ sev_vm_launch_finish(vm);
+
+ return vm;
+}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/svm.c b/tools/testing/selftests/kvm/lib/x86_64/svm.c
index 3a5c72ed2b79..5495a92dfd5a 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/svm.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/svm.c
@@ -9,10 +9,11 @@
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "svm_util.h"
+#define SEV_DEV_PATH "/dev/sev"
+
struct gpr64_regs guest_regs;
u64 rflags;
@@ -30,20 +31,22 @@ u64 rflags;
struct svm_test_data *
vcpu_alloc_svm(struct kvm_vm *vm, vm_vaddr_t *p_svm_gva)
{
- vm_vaddr_t svm_gva = vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ vm_vaddr_t svm_gva = vm_vaddr_alloc_page(vm);
struct svm_test_data *svm = addr_gva2hva(vm, svm_gva);
- svm->vmcb = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->vmcb = (void *)vm_vaddr_alloc_page(vm);
svm->vmcb_hva = addr_gva2hva(vm, (uintptr_t)svm->vmcb);
svm->vmcb_gpa = addr_gva2gpa(vm, (uintptr_t)svm->vmcb);
- svm->save_area = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
+ svm->save_area = (void *)vm_vaddr_alloc_page(vm);
svm->save_area_hva = addr_gva2hva(vm, (uintptr_t)svm->save_area);
svm->save_area_gpa = addr_gva2gpa(vm, (uintptr_t)svm->save_area);
+ svm->msr = (void *)vm_vaddr_alloc_page(vm);
+ svm->msr_hva = addr_gva2hva(vm, (uintptr_t)svm->msr);
+ svm->msr_gpa = addr_gva2gpa(vm, (uintptr_t)svm->msr);
+ memset(svm->msr_hva, 0, getpagesize());
+
*p_svm_gva = svm_gva;
return svm;
}
@@ -74,7 +77,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
wrmsr(MSR_VM_HSAVE_PA, svm->save_area_gpa);
memset(vmcb, 0, sizeof(*vmcb));
- asm volatile ("vmsave\n\t" : : "a" (vmcb_gpa) : "memory");
+ asm volatile ("vmsave %0\n\t" : : "a" (vmcb_gpa) : "memory");
vmcb_set_seg(&save->es, get_es(), 0, -1U, data_seg_attr);
vmcb_set_seg(&save->cs, get_cs(), 0, -1U, code_seg_attr);
vmcb_set_seg(&save->ss, get_ss(), 0, -1U, data_seg_attr);
@@ -95,6 +98,7 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
save->dbgctl = rdmsr(MSR_IA32_DEBUGCTLMSR);
ctrl->intercept = (1ULL << INTERCEPT_VMRUN) |
(1ULL << INTERCEPT_VMMCALL);
+ ctrl->msrpm_base_pa = svm->msr_gpa;
vmcb->save.rip = (u64)guest_rip;
vmcb->save.rsp = (u64)guest_rsp;
@@ -131,35 +135,30 @@ void generic_svm_setup(struct svm_test_data *svm, void *guest_rip, void *guest_r
void run_guest(struct vmcb *vmcb, uint64_t vmcb_gpa)
{
asm volatile (
- "vmload\n\t"
+ "vmload %[vmcb_gpa]\n\t"
"mov rflags, %%r15\n\t" // rflags
"mov %%r15, 0x170(%[vmcb])\n\t"
"mov guest_regs, %%r15\n\t" // rax
"mov %%r15, 0x1f8(%[vmcb])\n\t"
LOAD_GPR_C
- "vmrun\n\t"
+ "vmrun %[vmcb_gpa]\n\t"
SAVE_GPR_C
"mov 0x170(%[vmcb]), %%r15\n\t" // rflags
"mov %%r15, rflags\n\t"
"mov 0x1f8(%[vmcb]), %%r15\n\t" // rax
"mov %%r15, guest_regs\n\t"
- "vmsave\n\t"
+ "vmsave %[vmcb_gpa]\n\t"
: : [vmcb] "r" (vmcb), [vmcb_gpa] "a" (vmcb_gpa)
: "r15", "memory");
}
-bool nested_svm_supported(void)
-{
- struct kvm_cpuid_entry2 *entry =
- kvm_get_supported_cpuid_entry(0x80000001);
-
- return entry->ecx & CPUID_SVM;
-}
-
-void nested_svm_check_supported(void)
+/*
+ * Open SEV_DEV_PATH if available, otherwise exit the entire program.
+ *
+ * Return:
+ * The opened file descriptor of /dev/sev.
+ */
+int open_sev_dev_path_or_exit(void)
{
- if (!nested_svm_supported()) {
- print_skip("nested SVM not enabled");
- exit(KSFT_SKIP);
- }
+ return open_path_or_exit(SEV_DEV_PATH, 0);
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/ucall.c b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
index da4d89ad5419..1265cecc7dd1 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/ucall.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/ucall.c
@@ -8,49 +8,49 @@
#define UCALL_PIO_PORT ((uint16_t)0x1000)
-void ucall_init(struct kvm_vm *vm, void *arg)
+void ucall_arch_do_ucall(vm_vaddr_t uc)
{
+ /*
+ * FIXME: Revert this hack (the entire commit that added it) once nVMX
+ * preserves L2 GPRs across a nested VM-Exit. If a ucall from L2, e.g.
+ * to do a GUEST_SYNC(), lands the vCPU in L1, any and all GPRs can be
+ * clobbered by L1. Save and restore non-volatile GPRs (clobbering RBP
+ * in particular is problematic) along with RDX and RDI (which are
+ * inputs), and clobber volatile GPRs. *sigh*
+ */
+#define HORRIFIC_L2_UCALL_CLOBBER_HACK \
+ "rcx", "rsi", "r8", "r9", "r10", "r11"
+
+ asm volatile("push %%rbp\n\t"
+ "push %%r15\n\t"
+ "push %%r14\n\t"
+ "push %%r13\n\t"
+ "push %%r12\n\t"
+ "push %%rbx\n\t"
+ "push %%rdx\n\t"
+ "push %%rdi\n\t"
+ "in %[port], %%al\n\t"
+ "pop %%rdi\n\t"
+ "pop %%rdx\n\t"
+ "pop %%rbx\n\t"
+ "pop %%r12\n\t"
+ "pop %%r13\n\t"
+ "pop %%r14\n\t"
+ "pop %%r15\n\t"
+ "pop %%rbp\n\t"
+ : : [port] "d" (UCALL_PIO_PORT), "D" (uc) : "rax", "memory",
+ HORRIFIC_L2_UCALL_CLOBBER_HACK);
}
-void ucall_uninit(struct kvm_vm *vm)
+void *ucall_arch_get_ucall(struct kvm_vcpu *vcpu)
{
-}
-
-void ucall(uint64_t cmd, int nargs, ...)
-{
- struct ucall uc = {
- .cmd = cmd,
- };
- va_list va;
- int i;
-
- nargs = nargs <= UCALL_MAX_ARGS ? nargs : UCALL_MAX_ARGS;
-
- va_start(va, nargs);
- for (i = 0; i < nargs; ++i)
- uc.args[i] = va_arg(va, uint64_t);
- va_end(va);
-
- asm volatile("in %[port], %%al"
- : : [port] "d" (UCALL_PIO_PORT), "D" (&uc) : "rax", "memory");
-}
-
-uint64_t get_ucall(struct kvm_vm *vm, uint32_t vcpu_id, struct ucall *uc)
-{
- struct kvm_run *run = vcpu_state(vm, vcpu_id);
- struct ucall ucall = {};
+ struct kvm_run *run = vcpu->run;
if (run->exit_reason == KVM_EXIT_IO && run->io.port == UCALL_PIO_PORT) {
struct kvm_regs regs;
- vcpu_regs_get(vm, vcpu_id, &regs);
- memcpy(&ucall, addr_gva2hva(vm, (vm_vaddr_t)regs.rdi),
- sizeof(ucall));
-
- vcpu_run_complete_io(vm, vcpu_id);
- if (uc)
- memcpy(uc, &ucall, sizeof(ucall));
+ vcpu_regs_get(vcpu, &regs);
+ return (void *)regs.rdi;
}
-
- return ucall.cmd;
+ return NULL;
}
diff --git a/tools/testing/selftests/kvm/lib/x86_64/vmx.c b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
index f1e00d43eea2..089b8925b6b2 100644
--- a/tools/testing/selftests/kvm/lib/x86_64/vmx.c
+++ b/tools/testing/selftests/kvm/lib/x86_64/vmx.c
@@ -5,9 +5,10 @@
* Copyright (C) 2018, Google LLC.
*/
+#include <asm/msr-index.h>
+
#include "test_util.h"
#include "kvm_util.h"
-#include "../kvm_util_internal.h"
#include "processor.h"
#include "vmx.h"
@@ -43,21 +44,17 @@ struct eptPageTablePointer {
uint64_t address:40;
uint64_t reserved_63_52:12;
};
-int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
+int vcpu_enable_evmcs(struct kvm_vcpu *vcpu)
{
uint16_t evmcs_ver;
- struct kvm_enable_cap enable_evmcs_cap = {
- .cap = KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
- .args[0] = (unsigned long)&evmcs_ver
- };
-
- vcpu_ioctl(vm, vcpu_id, KVM_ENABLE_CAP, &enable_evmcs_cap);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENLIGHTENED_VMCS,
+ (unsigned long)&evmcs_ver);
/* KVM should return supported EVMCS version range */
TEST_ASSERT(((evmcs_ver >> 8) >= (evmcs_ver & 0xff)) &&
(evmcs_ver & 0xff) > 0,
- "Incorrect EVMCS version range: %x:%x\n",
+ "Incorrect EVMCS version range: %x:%x",
evmcs_ver & 0xff, evmcs_ver >> 8);
return evmcs_ver;
@@ -77,55 +74,41 @@ int vcpu_enable_evmcs(struct kvm_vm *vm, int vcpu_id)
struct vmx_pages *
vcpu_alloc_vmx(struct kvm_vm *vm, vm_vaddr_t *p_vmx_gva)
{
- vm_vaddr_t vmx_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vm_vaddr_t vmx_gva = vm_vaddr_alloc_page(vm);
struct vmx_pages *vmx = addr_gva2hva(vm, vmx_gva);
/* Setup of a region of guest memory for the vmxon region. */
- vmx->vmxon = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmxon = (void *)vm_vaddr_alloc_page(vm);
vmx->vmxon_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmxon);
vmx->vmxon_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmxon);
/* Setup of a region of guest memory for a vmcs. */
- vmx->vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmcs);
vmx->vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmcs);
/* Setup of a region of guest memory for the MSR bitmap. */
- vmx->msr = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->msr = (void *)vm_vaddr_alloc_page(vm);
vmx->msr_hva = addr_gva2hva(vm, (uintptr_t)vmx->msr);
vmx->msr_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->msr);
memset(vmx->msr_hva, 0, getpagesize());
/* Setup of a region of guest memory for the shadow VMCS. */
- vmx->shadow_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->shadow_vmcs = (void *)vm_vaddr_alloc_page(vm);
vmx->shadow_vmcs_hva = addr_gva2hva(vm, (uintptr_t)vmx->shadow_vmcs);
vmx->shadow_vmcs_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->shadow_vmcs);
/* Setup of a region of guest memory for the VMREAD and VMWRITE bitmaps. */
- vmx->vmread = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmread = (void *)vm_vaddr_alloc_page(vm);
vmx->vmread_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmread);
vmx->vmread_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmread);
memset(vmx->vmread_hva, 0, getpagesize());
- vmx->vmwrite = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ vmx->vmwrite = (void *)vm_vaddr_alloc_page(vm);
vmx->vmwrite_hva = addr_gva2hva(vm, (uintptr_t)vmx->vmwrite);
vmx->vmwrite_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vmwrite);
memset(vmx->vmwrite_hva, 0, getpagesize());
- /* Setup of a region of guest memory for the VP Assist page. */
- vmx->vp_assist = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->vp_assist_hva = addr_gva2hva(vm, (uintptr_t)vmx->vp_assist);
- vmx->vp_assist_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->vp_assist);
-
- /* Setup of a region of guest memory for the enlightened VMCS. */
- vmx->enlightened_vmcs = (void *)vm_vaddr_alloc(vm, getpagesize(),
- 0x10000, 0, 0);
- vmx->enlightened_vmcs_hva =
- addr_gva2hva(vm, (uintptr_t)vmx->enlightened_vmcs);
- vmx->enlightened_vmcs_gpa =
- addr_gva2gpa(vm, (uintptr_t)vmx->enlightened_vmcs);
-
*p_vmx_gva = vmx_gva;
return vmx;
}
@@ -176,30 +159,32 @@ bool prepare_for_vmx_operation(struct vmx_pages *vmx)
bool load_vmcs(struct vmx_pages *vmx)
{
- if (!enable_evmcs) {
- /* Load a VMCS. */
- *(uint32_t *)(vmx->vmcs) = vmcs_revision();
- if (vmclear(vmx->vmcs_gpa))
- return false;
-
- if (vmptrld(vmx->vmcs_gpa))
- return false;
-
- /* Setup shadow VMCS, do not load it yet. */
- *(uint32_t *)(vmx->shadow_vmcs) =
- vmcs_revision() | 0x80000000ul;
- if (vmclear(vmx->shadow_vmcs_gpa))
- return false;
- } else {
- if (evmcs_vmptrld(vmx->enlightened_vmcs_gpa,
- vmx->enlightened_vmcs))
- return false;
- current_evmcs->revision_id = EVMCS_VERSION;
- }
+ /* Load a VMCS. */
+ *(uint32_t *)(vmx->vmcs) = vmcs_revision();
+ if (vmclear(vmx->vmcs_gpa))
+ return false;
+
+ if (vmptrld(vmx->vmcs_gpa))
+ return false;
+
+ /* Setup shadow VMCS, do not load it yet. */
+ *(uint32_t *)(vmx->shadow_vmcs) = vmcs_revision() | 0x80000000ul;
+ if (vmclear(vmx->shadow_vmcs_gpa))
+ return false;
return true;
}
+static bool ept_vpid_cap_supported(uint64_t mask)
+{
+ return rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & mask;
+}
+
+bool ept_1g_pages_supported(void)
+{
+ return ept_vpid_cap_supported(VMX_EPT_VPID_CAP_1G_PAGES);
+}
+
/*
* Initialize the control fields to the most basic settings possible.
*/
@@ -217,7 +202,7 @@ static inline void init_vmcs_control_fields(struct vmx_pages *vmx)
struct eptPageTablePointer eptp = {
.memory_type = VMX_BASIC_MEM_TYPE_WB,
.page_walk_length = 3, /* + 1 */
- .ad_enabled = !!(rdmsr(MSR_IA32_VMX_EPT_VPID_CAP) & VMX_EPT_VPID_CAP_AD_BITS),
+ .ad_enabled = ept_vpid_cap_supported(VMX_EPT_VPID_CAP_AD_BITS),
.address = vmx->eptp_gpa >> PAGE_SHIFT_4K,
};
@@ -379,101 +364,93 @@ void prepare_vmcs(struct vmx_pages *vmx, void *guest_rip, void *guest_rsp)
init_vmcs_guest_state(guest_rip, guest_rsp);
}
-bool nested_vmx_supported(void)
+static void nested_create_pte(struct kvm_vm *vm,
+ struct eptPageTableEntry *pte,
+ uint64_t nested_paddr,
+ uint64_t paddr,
+ int current_level,
+ int target_level)
{
- struct kvm_cpuid_entry2 *entry = kvm_get_supported_cpuid_entry(1);
-
- return entry->ecx & CPUID_VMX;
-}
-
-void nested_vmx_check_supported(void)
-{
- if (!nested_vmx_supported()) {
- print_skip("nested VMX not enabled");
- exit(KSFT_SKIP);
+ if (!pte->readable) {
+ pte->writable = true;
+ pte->readable = true;
+ pte->executable = true;
+ pte->page_size = (current_level == target_level);
+ if (pte->page_size)
+ pte->address = paddr >> vm->page_shift;
+ else
+ pte->address = vm_alloc_page_table(vm) >> vm->page_shift;
+ } else {
+ /*
+ * Entry already present. Assert that the caller doesn't want
+ * a hugepage at this level, and that there isn't a hugepage at
+ * this level.
+ */
+ TEST_ASSERT(current_level != target_level,
+ "Cannot create hugepage at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
+ TEST_ASSERT(!pte->page_size,
+ "Cannot create page table at level: %u, nested_paddr: 0x%lx",
+ current_level, nested_paddr);
}
}
-void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint32_t eptp_memslot)
+
+void __nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, int target_level)
{
- uint16_t index[4];
- struct eptPageTableEntry *pml4e;
+ const uint64_t page_size = PG_LEVEL_SIZE(target_level);
+ struct eptPageTableEntry *pt = vmx->eptp_hva, *pte;
+ uint16_t index;
TEST_ASSERT(vm->mode == VM_MODE_PXXV48_4K, "Attempt to use "
"unknown or unsupported guest mode, mode: 0x%x", vm->mode);
- TEST_ASSERT((nested_paddr % vm->page_size) == 0,
+ TEST_ASSERT((nested_paddr >> 48) == 0,
+ "Nested physical address 0x%lx requires 5-level paging",
+ nested_paddr);
+ TEST_ASSERT((nested_paddr % page_size) == 0,
"Nested physical address not on page boundary,\n"
- " nested_paddr: 0x%lx vm->page_size: 0x%x",
- nested_paddr, vm->page_size);
+ " nested_paddr: 0x%lx page_size: 0x%lx",
+ nested_paddr, page_size);
TEST_ASSERT((nested_paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" nested_paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- TEST_ASSERT((paddr % vm->page_size) == 0,
+ TEST_ASSERT((paddr % page_size) == 0,
"Physical address not on page boundary,\n"
- " paddr: 0x%lx vm->page_size: 0x%x",
- paddr, vm->page_size);
+ " paddr: 0x%lx page_size: 0x%lx",
+ paddr, page_size);
TEST_ASSERT((paddr >> vm->page_shift) <= vm->max_gfn,
"Physical address beyond beyond maximum supported,\n"
" paddr: 0x%lx vm->max_gfn: 0x%lx vm->page_size: 0x%x",
paddr, vm->max_gfn, vm->page_size);
- index[0] = (nested_paddr >> 12) & 0x1ffu;
- index[1] = (nested_paddr >> 21) & 0x1ffu;
- index[2] = (nested_paddr >> 30) & 0x1ffu;
- index[3] = (nested_paddr >> 39) & 0x1ffu;
-
- /* Allocate page directory pointer table if not present. */
- pml4e = vmx->eptp_hva;
- if (!pml4e[index[3]].readable) {
- pml4e[index[3]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pml4e[index[3]].writable = true;
- pml4e[index[3]].readable = true;
- pml4e[index[3]].executable = true;
- }
+ for (int level = PG_LEVEL_512G; level >= PG_LEVEL_4K; level--) {
+ index = (nested_paddr >> PG_LEVEL_SHIFT(level)) & 0x1ffu;
+ pte = &pt[index];
- /* Allocate page directory table if not present. */
- struct eptPageTableEntry *pdpe;
- pdpe = addr_gpa2hva(vm, pml4e[index[3]].address * vm->page_size);
- if (!pdpe[index[2]].readable) {
- pdpe[index[2]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pdpe[index[2]].writable = true;
- pdpe[index[2]].readable = true;
- pdpe[index[2]].executable = true;
- }
+ nested_create_pte(vm, pte, nested_paddr, paddr, level, target_level);
- /* Allocate page table if not present. */
- struct eptPageTableEntry *pde;
- pde = addr_gpa2hva(vm, pdpe[index[2]].address * vm->page_size);
- if (!pde[index[1]].readable) {
- pde[index[1]].address = vm_phy_page_alloc(vm,
- KVM_EPT_PAGE_TABLE_MIN_PADDR, eptp_memslot)
- >> vm->page_shift;
- pde[index[1]].writable = true;
- pde[index[1]].readable = true;
- pde[index[1]].executable = true;
- }
+ if (pte->page_size)
+ break;
- /* Fill in page table entry. */
- struct eptPageTableEntry *pte;
- pte = addr_gpa2hva(vm, pde[index[1]].address * vm->page_size);
- pte[index[0]].address = paddr >> vm->page_shift;
- pte[index[0]].writable = true;
- pte[index[0]].readable = true;
- pte[index[0]].executable = true;
+ pt = addr_gpa2hva(vm, pte->address * vm->page_size);
+ }
/*
* For now mark these as accessed and dirty because the only
* testcase we have needs that. Can be reconsidered later.
*/
- pte[index[0]].accessed = true;
- pte[index[0]].dirty = true;
+ pte->accessed = true;
+ pte->dirty = true;
+
+}
+
+void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr)
+{
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, PG_LEVEL_4K);
}
/*
@@ -484,7 +461,7 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* nested_paddr - Nested guest physical address to map
* paddr - VM Physical Address
* size - The size of the range to map
- * eptp_memslot - Memory region slot for new virtual translation tables
+ * level - The level at which to map the range
*
* Output Args: None
*
@@ -493,28 +470,34 @@ void nested_pg_map(struct vmx_pages *vmx, struct kvm_vm *vm,
* Within the VM given by vm, creates a nested guest translation for the
* page range starting at nested_paddr to the page range starting at paddr.
*/
-void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint64_t nested_paddr, uint64_t paddr, uint64_t size,
- uint32_t eptp_memslot)
+void __nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size,
+ int level)
{
- size_t page_size = vm->page_size;
+ size_t page_size = PG_LEVEL_SIZE(level);
size_t npages = size / page_size;
TEST_ASSERT(nested_paddr + size > nested_paddr, "Vaddr overflow");
TEST_ASSERT(paddr + size > paddr, "Paddr overflow");
while (npages--) {
- nested_pg_map(vmx, vm, nested_paddr, paddr, eptp_memslot);
+ __nested_pg_map(vmx, vm, nested_paddr, paddr, level);
nested_paddr += page_size;
paddr += page_size;
}
}
+void nested_map(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t nested_paddr, uint64_t paddr, uint64_t size)
+{
+ __nested_map(vmx, vm, nested_paddr, paddr, size, PG_LEVEL_4K);
+}
+
/* Prepare an identity extended page table that maps all the
* physical pages in VM.
*/
void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
- uint32_t memslot, uint32_t eptp_memslot)
+ uint32_t memslot)
{
sparsebit_idx_t i, last;
struct userspace_mem_region *region =
@@ -530,15 +513,42 @@ void nested_map_memslot(struct vmx_pages *vmx, struct kvm_vm *vm,
nested_map(vmx, vm,
(uint64_t)i << vm->page_shift,
(uint64_t)i << vm->page_shift,
- 1 << vm->page_shift,
- eptp_memslot);
+ 1 << vm->page_shift);
}
}
+/* Identity map a region with 1GiB Pages. */
+void nested_identity_map_1g(struct vmx_pages *vmx, struct kvm_vm *vm,
+ uint64_t addr, uint64_t size)
+{
+ __nested_map(vmx, vm, addr, addr, size, PG_LEVEL_1G);
+}
+
+bool kvm_cpu_has_ept(void)
+{
+ uint64_t ctrl;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_TRUE_PROCBASED_CTLS) >> 32;
+ if (!(ctrl & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+ return false;
+
+ ctrl = kvm_get_feature_msr(MSR_IA32_VMX_PROCBASED_CTLS2) >> 32;
+ return ctrl & SECONDARY_EXEC_ENABLE_EPT;
+}
+
void prepare_eptp(struct vmx_pages *vmx, struct kvm_vm *vm,
uint32_t eptp_memslot)
{
- vmx->eptp = (void *)vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+ TEST_ASSERT(kvm_cpu_has_ept(), "KVM doesn't support nested EPT");
+
+ vmx->eptp = (void *)vm_vaddr_alloc_page(vm);
vmx->eptp_hva = addr_gva2hva(vm, (uintptr_t)vmx->eptp);
vmx->eptp_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->eptp);
}
+
+void prepare_virtualize_apic_accesses(struct vmx_pages *vmx, struct kvm_vm *vm)
+{
+ vmx->apic_access = (void *)vm_vaddr_alloc_page(vm);
+ vmx->apic_access_hva = addr_gva2hva(vm, (uintptr_t)vmx->apic_access);
+ vmx->apic_access_gpa = addr_gva2gpa(vm, (uintptr_t)vmx->apic_access);
+}
diff --git a/tools/testing/selftests/kvm/max_guest_memory_test.c b/tools/testing/selftests/kvm/max_guest_memory_test.c
new file mode 100644
index 000000000000..6628dc4dda89
--- /dev/null
+++ b/tools/testing/selftests/kvm/max_guest_memory_test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <errno.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/atomic.h>
+#include <linux/sizes.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "guest_modes.h"
+#include "processor.h"
+
+static void guest_code(uint64_t start_gpa, uint64_t end_gpa, uint64_t stride)
+{
+ uint64_t gpa;
+
+ for (gpa = start_gpa; gpa < end_gpa; gpa += stride)
+ *((volatile uint64_t *)gpa) = gpa;
+
+ GUEST_DONE();
+}
+
+struct vcpu_info {
+ struct kvm_vcpu *vcpu;
+ uint64_t start_gpa;
+ uint64_t end_gpa;
+};
+
+static int nr_vcpus;
+static atomic_t rendezvous;
+
+static void rendezvous_with_boss(void)
+{
+ int orig = atomic_read(&rendezvous);
+
+ if (orig > 0) {
+ atomic_dec_and_test(&rendezvous);
+ while (atomic_read(&rendezvous) > 0)
+ cpu_relax();
+ } else {
+ atomic_inc(&rendezvous);
+ while (atomic_read(&rendezvous) < 0)
+ cpu_relax();
+ }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+}
+
+static void *vcpu_worker(void *data)
+{
+ struct vcpu_info *info = data;
+ struct kvm_vcpu *vcpu = info->vcpu;
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ vcpu_args_set(vcpu, 3, info->start_gpa, info->end_gpa, vm->page_size);
+
+ /* Snapshot regs before the first run. */
+ vcpu_regs_get(vcpu, &regs);
+ rendezvous_with_boss();
+
+ run_vcpu(vcpu);
+ rendezvous_with_boss();
+ vcpu_regs_set(vcpu, &regs);
+ vcpu_sregs_get(vcpu, &sregs);
+#ifdef __x86_64__
+ /* Toggle CR0.WP to trigger a MMU context reset. */
+ sregs.cr0 ^= X86_CR0_WP;
+#endif
+ vcpu_sregs_set(vcpu, &sregs);
+ rendezvous_with_boss();
+
+ run_vcpu(vcpu);
+ rendezvous_with_boss();
+
+ return NULL;
+}
+
+static pthread_t *spawn_workers(struct kvm_vm *vm, struct kvm_vcpu **vcpus,
+ uint64_t start_gpa, uint64_t end_gpa)
+{
+ struct vcpu_info *info;
+ uint64_t gpa, nr_bytes;
+ pthread_t *threads;
+ int i;
+
+ threads = malloc(nr_vcpus * sizeof(*threads));
+ TEST_ASSERT(threads, "Failed to allocate vCPU threads");
+
+ info = malloc(nr_vcpus * sizeof(*info));
+ TEST_ASSERT(info, "Failed to allocate vCPU gpa ranges");
+
+ nr_bytes = ((end_gpa - start_gpa) / nr_vcpus) &
+ ~((uint64_t)vm->page_size - 1);
+ TEST_ASSERT(nr_bytes, "C'mon, no way you have %d CPUs", nr_vcpus);
+
+ for (i = 0, gpa = start_gpa; i < nr_vcpus; i++, gpa += nr_bytes) {
+ info[i].vcpu = vcpus[i];
+ info[i].start_gpa = gpa;
+ info[i].end_gpa = gpa + nr_bytes;
+ pthread_create(&threads[i], NULL, vcpu_worker, &info[i]);
+ }
+ return threads;
+}
+
+static void rendezvous_with_vcpus(struct timespec *time, const char *name)
+{
+ int i, rendezvoused;
+
+ pr_info("Waiting for vCPUs to finish %s...\n", name);
+
+ rendezvoused = atomic_read(&rendezvous);
+ for (i = 0; abs(rendezvoused) != 1; i++) {
+ usleep(100);
+ if (!(i & 0x3f))
+ pr_info("\r%d vCPUs haven't rendezvoused...",
+ abs(rendezvoused) - 1);
+ rendezvoused = atomic_read(&rendezvous);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, time);
+
+ /* Release the vCPUs after getting the time of the previous action. */
+ pr_info("\rAll vCPUs finished %s, releasing...\n", name);
+ if (rendezvoused > 0)
+ atomic_set(&rendezvous, -nr_vcpus - 1);
+ else
+ atomic_set(&rendezvous, nr_vcpus + 1);
+}
+
+static void calc_default_nr_vcpus(void)
+{
+ cpu_set_t possible_mask;
+ int r;
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+
+ nr_vcpus = CPU_COUNT(&possible_mask) * 3/4;
+ TEST_ASSERT(nr_vcpus > 0, "Uh, no CPUs?");
+}
+
+int main(int argc, char *argv[])
+{
+ /*
+ * Skip the first 4gb and slot0. slot0 maps <1gb and is used to back
+ * the guest's code, stack, and page tables. Because selftests creates
+ * an IRQCHIP, a.k.a. a local APIC, KVM creates an internal memslot
+ * just below the 4gb boundary. This test could create memory at
+ * 1gb-3gb,but it's simpler to skip straight to 4gb.
+ */
+ const uint64_t start_gpa = SZ_4G;
+ const int first_slot = 1;
+
+ struct timespec time_start, time_run1, time_reset, time_run2;
+ uint64_t max_gpa, gpa, slot_size, max_mem, i;
+ int max_slots, slot, opt, fd;
+ bool hugepages = false;
+ struct kvm_vcpu **vcpus;
+ pthread_t *threads;
+ struct kvm_vm *vm;
+ void *mem;
+
+ /*
+ * Default to 2gb so that maxing out systems with MAXPHADDR=46, which
+ * are quite common for x86, requires changing only max_mem (KVM allows
+ * 32k memslots, 32k * 2gb == ~64tb of guest memory).
+ */
+ slot_size = SZ_2G;
+
+ max_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ TEST_ASSERT(max_slots > first_slot, "KVM is broken");
+
+ /* All KVM MMUs should be able to survive a 128gb guest. */
+ max_mem = 128ull * SZ_1G;
+
+ calc_default_nr_vcpus();
+
+ while ((opt = getopt(argc, argv, "c:h:m:s:H")) != -1) {
+ switch (opt) {
+ case 'c':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ break;
+ case 'm':
+ max_mem = 1ull * atoi_positive("Memory size", optarg) * SZ_1G;
+ break;
+ case 's':
+ slot_size = 1ull * atoi_positive("Slot size", optarg) * SZ_1G;
+ break;
+ case 'H':
+ hugepages = true;
+ break;
+ case 'h':
+ default:
+ printf("usage: %s [-c nr_vcpus] [-m max_mem_in_gb] [-s slot_size_in_gb] [-H]\n", argv[0]);
+ exit(1);
+ }
+ }
+
+ vcpus = malloc(nr_vcpus * sizeof(*vcpus));
+ TEST_ASSERT(vcpus, "Failed to allocate vCPU array");
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+
+ max_gpa = vm->max_gfn << vm->page_shift;
+ TEST_ASSERT(max_gpa > (4 * slot_size), "MAXPHYADDR <4gb ");
+
+ fd = kvm_memfd_alloc(slot_size, hugepages);
+ mem = mmap(NULL, slot_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ TEST_ASSERT(mem != MAP_FAILED, "mmap() failed");
+
+ TEST_ASSERT(!madvise(mem, slot_size, MADV_NOHUGEPAGE), "madvise() failed");
+
+ /* Pre-fault the memory to avoid taking mmap_sem on guest page faults. */
+ for (i = 0; i < slot_size; i += vm->page_size)
+ ((uint8_t *)mem)[i] = 0xaa;
+
+ gpa = 0;
+ for (slot = first_slot; slot < max_slots; slot++) {
+ gpa = start_gpa + ((slot - first_slot) * slot_size);
+ if (gpa + slot_size > max_gpa)
+ break;
+
+ if ((gpa - start_gpa) >= max_mem)
+ break;
+
+ vm_set_user_memory_region(vm, slot, 0, gpa, slot_size, mem);
+
+#ifdef __x86_64__
+ /* Identity map memory in the guest using 1gb pages. */
+ for (i = 0; i < slot_size; i += SZ_1G)
+ __virt_pg_map(vm, gpa + i, gpa + i, PG_LEVEL_1G);
+#else
+ for (i = 0; i < slot_size; i += vm->page_size)
+ virt_pg_map(vm, gpa + i, gpa + i);
+#endif
+ }
+
+ atomic_set(&rendezvous, nr_vcpus + 1);
+ threads = spawn_workers(vm, vcpus, start_gpa, gpa);
+
+ free(vcpus);
+ vcpus = NULL;
+
+ pr_info("Running with %lugb of guest memory and %u vCPUs\n",
+ (gpa - start_gpa) / SZ_1G, nr_vcpus);
+
+ rendezvous_with_vcpus(&time_start, "spawning");
+ rendezvous_with_vcpus(&time_run1, "run 1");
+ rendezvous_with_vcpus(&time_reset, "reset");
+ rendezvous_with_vcpus(&time_run2, "run 2");
+
+ time_run2 = timespec_sub(time_run2, time_reset);
+ time_reset = timespec_sub(time_reset, time_run1);
+ time_run1 = timespec_sub(time_run1, time_start);
+
+ pr_info("run1 = %ld.%.9lds, reset = %ld.%.9lds, run2 = %ld.%.9lds\n",
+ time_run1.tv_sec, time_run1.tv_nsec,
+ time_reset.tv_sec, time_reset.tv_nsec,
+ time_run2.tv_sec, time_run2.tv_nsec);
+
+ /*
+ * Delete even numbered slots (arbitrary) and unmap the first half of
+ * the backing (also arbitrary) to verify KVM correctly drops all
+ * references to the removed regions.
+ */
+ for (slot = (slot - 1) & ~1ull; slot >= first_slot; slot -= 2)
+ vm_set_user_memory_region(vm, slot, 0, 0, 0, NULL);
+
+ munmap(mem, slot_size / 2);
+
+ /* Sanity check that the vCPUs actually ran. */
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ /*
+ * Deliberately exit without deleting the remaining memslots or closing
+ * kvm_fd to test cleanup via mmu_notifier.release.
+ */
+}
diff --git a/tools/testing/selftests/kvm/memslot_modification_stress_test.c b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
new file mode 100644
index 000000000000..156361966612
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_modification_stress_test.c
@@ -0,0 +1,182 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM memslot modification stress test
+ * Adapted from demand_paging_test.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2020, Google, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_name */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+#include <time.h>
+#include <poll.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+#include <linux/bitops.h>
+#include <linux/userfaultfd.h>
+
+#include "memstress.h"
+#include "processor.h"
+#include "test_util.h"
+#include "guest_modes.h"
+
+#define DUMMY_MEMSLOT_INDEX 7
+
+#define DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS 10
+
+
+static int nr_vcpus = 1;
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ struct kvm_run *run;
+ int ret;
+
+ run = vcpu->run;
+
+ /* Let the guest access its memory until a stop signal is received */
+ while (!READ_ONCE(memstress_args.stop_vcpus)) {
+ ret = _vcpu_run(vcpu);
+ TEST_ASSERT(ret == 0, "vcpu_run failed: %d", ret);
+
+ if (get_ucall(vcpu, NULL) == UCALL_SYNC)
+ continue;
+
+ TEST_ASSERT(false,
+ "Invalid guest sync status: exit_reason=%s\n",
+ exit_reason_str(run->exit_reason));
+ }
+}
+
+struct memslot_antagonist_args {
+ struct kvm_vm *vm;
+ useconds_t delay;
+ uint64_t nr_modifications;
+};
+
+static void add_remove_memslot(struct kvm_vm *vm, useconds_t delay,
+ uint64_t nr_modifications)
+{
+ uint64_t pages = max_t(int, vm->page_size, getpagesize()) / vm->page_size;
+ uint64_t gpa;
+ int i;
+
+ /*
+ * Add the dummy memslot just below the memstress memslot, which is
+ * at the top of the guest physical address space.
+ */
+ gpa = memstress_args.gpa - pages * vm->page_size;
+
+ for (i = 0; i < nr_modifications; i++) {
+ usleep(delay);
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, gpa,
+ DUMMY_MEMSLOT_INDEX, pages, 0);
+
+ vm_mem_region_delete(vm, DUMMY_MEMSLOT_INDEX);
+ }
+}
+
+struct test_params {
+ useconds_t delay;
+ uint64_t nr_iterations;
+ bool partition_vcpu_memory_access;
+};
+
+static void run_test(enum vm_guest_mode mode, void *arg)
+{
+ struct test_params *p = arg;
+ struct kvm_vm *vm;
+
+ vm = memstress_create_vm(mode, nr_vcpus, guest_percpu_mem_size, 1,
+ VM_MEM_SRC_ANONYMOUS,
+ p->partition_vcpu_memory_access);
+
+ pr_info("Finished creating vCPUs\n");
+
+ memstress_start_vcpu_threads(nr_vcpus, vcpu_worker);
+
+ pr_info("Started all vCPUs\n");
+
+ add_remove_memslot(vm, p->delay, p->nr_iterations);
+
+ memstress_join_vcpu_threads(nr_vcpus);
+ pr_info("All vCPU threads joined\n");
+
+ memstress_destroy_vm(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-m mode] [-d delay_usec]\n"
+ " [-b memory] [-v vcpus] [-o] [-i iterations]\n", name);
+ guest_modes_help();
+ printf(" -d: add a delay between each iteration of adding and\n"
+ " deleting a memslot in usec.\n");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " accessed by each vCPU. e.g. 10M or 3G.\n"
+ " Default: 1G\n");
+ printf(" -v: specify the number of vCPUs to run.\n");
+ printf(" -o: Overlap guest memory accesses instead of partitioning\n"
+ " them into a separate region of memory for each vCPU.\n");
+ printf(" -i: specify the number of iterations of adding and removing\n"
+ " a memslot.\n"
+ " Default: %d\n", DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS);
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+ int max_vcpus = kvm_check_cap(KVM_CAP_MAX_VCPUS);
+ int opt;
+ struct test_params p = {
+ .delay = 0,
+ .nr_iterations = DEFAULT_MEMSLOT_MODIFICATION_ITERATIONS,
+ .partition_vcpu_memory_access = true
+ };
+
+ guest_modes_append_default();
+
+ while ((opt = getopt(argc, argv, "hm:d:b:v:oi:")) != -1) {
+ switch (opt) {
+ case 'm':
+ guest_modes_cmdline(optarg);
+ break;
+ case 'd':
+ p.delay = atoi_non_negative("Delay", optarg);
+ break;
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'v':
+ nr_vcpus = atoi_positive("Number of vCPUs", optarg);
+ TEST_ASSERT(nr_vcpus <= max_vcpus,
+ "Invalid number of vcpus, must be between 1 and %d",
+ max_vcpus);
+ break;
+ case 'o':
+ p.partition_vcpu_memory_access = false;
+ break;
+ case 'i':
+ p.nr_iterations = atoi_positive("Number of iterations", optarg);
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ for_each_guest_mode(run_test, &p);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/memslot_perf_test.c b/tools/testing/selftests/kvm/memslot_perf_test.c
new file mode 100644
index 000000000000..579a64f97333
--- /dev/null
+++ b/tools/testing/selftests/kvm/memslot_perf_test.c
@@ -0,0 +1,1129 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A memslot-related performance benchmark.
+ *
+ * Copyright (C) 2021 Oracle and/or its affiliates.
+ *
+ * Basic guest setup / host vCPU thread code lifted from set_memory_region_test.
+ */
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <linux/compiler.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define MEM_EXTRA_SIZE SZ_64K
+
+#define MEM_SIZE (SZ_512M + MEM_EXTRA_SIZE)
+#define MEM_GPA SZ_256M
+#define MEM_AUX_GPA MEM_GPA
+#define MEM_SYNC_GPA MEM_AUX_GPA
+#define MEM_TEST_GPA (MEM_AUX_GPA + MEM_EXTRA_SIZE)
+#define MEM_TEST_SIZE (MEM_SIZE - MEM_EXTRA_SIZE)
+
+/*
+ * 32 MiB is max size that gets well over 100 iterations on 509 slots.
+ * Considering that each slot needs to have at least one page up to
+ * 8194 slots in use can then be tested (although with slightly
+ * limited resolution).
+ */
+#define MEM_SIZE_MAP (SZ_32M + MEM_EXTRA_SIZE)
+#define MEM_TEST_MAP_SIZE (MEM_SIZE_MAP - MEM_EXTRA_SIZE)
+
+/*
+ * 128 MiB is min size that fills 32k slots with at least one page in each
+ * while at the same time gets 100+ iterations in such test
+ *
+ * 2 MiB chunk size like a typical huge page
+ */
+#define MEM_TEST_UNMAP_SIZE SZ_128M
+#define MEM_TEST_UNMAP_CHUNK_SIZE SZ_2M
+
+/*
+ * For the move active test the middle of the test area is placed on
+ * a memslot boundary: half lies in the memslot being moved, half in
+ * other memslot(s).
+ *
+ * We have different number of memory slots, excluding the reserved
+ * memory slot 0, on various architectures and configurations. The
+ * memory size in this test is calculated by picking the maximal
+ * last memory slot's memory size, with alignment to the largest
+ * supported page size (64KB). In this way, the selected memory
+ * size for this test is compatible with test_memslot_move_prepare().
+ *
+ * architecture slots memory-per-slot memory-on-last-slot
+ * --------------------------------------------------------------
+ * x86-4KB 32763 16KB 160KB
+ * arm64-4KB 32766 16KB 112KB
+ * arm64-16KB 32766 16KB 112KB
+ * arm64-64KB 8192 64KB 128KB
+ */
+#define MEM_TEST_MOVE_SIZE (3 * SZ_64K)
+#define MEM_TEST_MOVE_GPA_DEST (MEM_GPA + MEM_SIZE)
+static_assert(MEM_TEST_MOVE_SIZE <= MEM_TEST_SIZE,
+ "invalid move test region size");
+
+#define MEM_TEST_VAL_1 0x1122334455667788
+#define MEM_TEST_VAL_2 0x99AABBCCDDEEFF00
+
+struct vm_data {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vcpu_thread;
+ uint32_t nslots;
+ uint64_t npages;
+ uint64_t pages_per_slot;
+ void **hva_slots;
+ bool mmio_ok;
+ uint64_t mmio_gpa_min;
+ uint64_t mmio_gpa_max;
+};
+
+struct sync_area {
+ uint32_t guest_page_size;
+ atomic_bool start_flag;
+ atomic_bool exit_flag;
+ atomic_bool sync_flag;
+ void *move_area_ptr;
+};
+
+/*
+ * Technically, we need also for the atomic bool to be address-free, which
+ * is recommended, but not strictly required, by C11 for lockless
+ * implementations.
+ * However, in practice both GCC and Clang fulfill this requirement on
+ * all KVM-supported platforms.
+ */
+static_assert(ATOMIC_BOOL_LOCK_FREE == 2, "atomic bool is not lockless");
+
+static sem_t vcpu_ready;
+
+static bool map_unmap_verify;
+
+static bool verbose;
+#define pr_info_v(...) \
+ do { \
+ if (verbose) \
+ pr_info(__VA_ARGS__); \
+ } while (0)
+
+static void check_mmio_access(struct vm_data *data, struct kvm_run *run)
+{
+ TEST_ASSERT(data->mmio_ok, "Unexpected mmio exit");
+ TEST_ASSERT(run->mmio.is_write, "Unexpected mmio read");
+ TEST_ASSERT(run->mmio.len == 8,
+ "Unexpected exit mmio size = %u", run->mmio.len);
+ TEST_ASSERT(run->mmio.phys_addr >= data->mmio_gpa_min &&
+ run->mmio.phys_addr <= data->mmio_gpa_max,
+ "Unexpected exit mmio address = 0x%llx",
+ run->mmio.phys_addr);
+}
+
+static void *vcpu_worker(void *__data)
+{
+ struct vm_data *data = __data;
+ struct kvm_vcpu *vcpu = data->vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == 0,
+ "Unexpected sync ucall, got %lx",
+ (ulong)uc.args[1]);
+ sem_post(&vcpu_ready);
+ continue;
+ case UCALL_NONE:
+ if (run->exit_reason == KVM_EXIT_MMIO)
+ check_mmio_access(data, run);
+ else
+ goto done;
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ return NULL;
+}
+
+static void wait_for_vcpu(void)
+{
+ struct timespec ts;
+
+ TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
+ "clock_gettime() failed: %d", errno);
+
+ ts.tv_sec += 2;
+ TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
+ "sem_timedwait() failed: %d", errno);
+}
+
+static void *vm_gpa2hva(struct vm_data *data, uint64_t gpa, uint64_t *rempages)
+{
+ uint64_t gpage, pgoffs;
+ uint32_t slot, slotoffs;
+ void *base;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ TEST_ASSERT(gpa >= MEM_GPA, "Too low gpa to translate");
+ TEST_ASSERT(gpa < MEM_GPA + data->npages * guest_page_size,
+ "Too high gpa to translate");
+ gpa -= MEM_GPA;
+
+ gpage = gpa / guest_page_size;
+ pgoffs = gpa % guest_page_size;
+ slot = min(gpage / data->pages_per_slot, (uint64_t)data->nslots - 1);
+ slotoffs = gpage - (slot * data->pages_per_slot);
+
+ if (rempages) {
+ uint64_t slotpages;
+
+ if (slot == data->nslots - 1)
+ slotpages = data->npages - slot * data->pages_per_slot;
+ else
+ slotpages = data->pages_per_slot;
+
+ TEST_ASSERT(!pgoffs,
+ "Asking for remaining pages in slot but gpa not page aligned");
+ *rempages = slotpages - slotoffs;
+ }
+
+ base = data->hva_slots[slot];
+ return (uint8_t *)base + slotoffs * guest_page_size + pgoffs;
+}
+
+static uint64_t vm_slot2gpa(struct vm_data *data, uint32_t slot)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+
+ TEST_ASSERT(slot < data->nslots, "Too high slot number");
+
+ return MEM_GPA + slot * data->pages_per_slot * guest_page_size;
+}
+
+static struct vm_data *alloc_vm(void)
+{
+ struct vm_data *data;
+
+ data = malloc(sizeof(*data));
+ TEST_ASSERT(data, "malloc(vmdata) failed");
+
+ data->vm = NULL;
+ data->vcpu = NULL;
+ data->hva_slots = NULL;
+
+ return data;
+}
+
+static bool check_slot_pages(uint32_t host_page_size, uint32_t guest_page_size,
+ uint64_t pages_per_slot, uint64_t rempages)
+{
+ if (!pages_per_slot)
+ return false;
+
+ if ((pages_per_slot * guest_page_size) % host_page_size)
+ return false;
+
+ if ((rempages * guest_page_size) % host_page_size)
+ return false;
+
+ return true;
+}
+
+
+static uint64_t get_max_slots(struct vm_data *data, uint32_t host_page_size)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t mempages, pages_per_slot, rempages;
+ uint64_t slots;
+
+ mempages = data->npages;
+ slots = data->nslots;
+ while (--slots > 1) {
+ pages_per_slot = mempages / slots;
+ if (!pages_per_slot)
+ continue;
+
+ rempages = mempages % pages_per_slot;
+ if (check_slot_pages(host_page_size, guest_page_size,
+ pages_per_slot, rempages))
+ return slots + 1; /* slot 0 is reserved */
+ }
+
+ return 0;
+}
+
+static bool prepare_vm(struct vm_data *data, int nslots, uint64_t *maxslots,
+ void *guest_code, uint64_t mem_size,
+ struct timespec *slot_runtime)
+{
+ uint64_t mempages, rempages;
+ uint64_t guest_addr;
+ uint32_t slot, host_page_size, guest_page_size;
+ struct timespec tstart;
+ struct sync_area *sync;
+
+ host_page_size = getpagesize();
+ guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+ mempages = mem_size / guest_page_size;
+
+ data->vm = __vm_create_with_one_vcpu(&data->vcpu, mempages, guest_code);
+ TEST_ASSERT(data->vm->page_size == guest_page_size, "Invalid VM page size");
+
+ data->npages = mempages;
+ TEST_ASSERT(data->npages > 1, "Can't test without any memory");
+ data->nslots = nslots;
+ data->pages_per_slot = data->npages / data->nslots;
+ rempages = data->npages % data->nslots;
+ if (!check_slot_pages(host_page_size, guest_page_size,
+ data->pages_per_slot, rempages)) {
+ *maxslots = get_max_slots(data, host_page_size);
+ return false;
+ }
+
+ data->hva_slots = malloc(sizeof(*data->hva_slots) * data->nslots);
+ TEST_ASSERT(data->hva_slots, "malloc() fail");
+
+ pr_info_v("Adding slots 1..%i, each slot with %"PRIu64" pages + %"PRIu64" extra pages last\n",
+ data->nslots, data->pages_per_slot, rempages);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
+ uint64_t npages;
+
+ npages = data->pages_per_slot;
+ if (slot == data->nslots)
+ npages += rempages;
+
+ vm_userspace_mem_region_add(data->vm, VM_MEM_SRC_ANONYMOUS,
+ guest_addr, slot, npages,
+ 0);
+ guest_addr += npages * guest_page_size;
+ }
+ *slot_runtime = timespec_elapsed(tstart);
+
+ for (slot = 1, guest_addr = MEM_GPA; slot <= data->nslots; slot++) {
+ uint64_t npages;
+ uint64_t gpa;
+
+ npages = data->pages_per_slot;
+ if (slot == data->nslots)
+ npages += rempages;
+
+ gpa = vm_phy_pages_alloc(data->vm, npages, guest_addr, slot);
+ TEST_ASSERT(gpa == guest_addr,
+ "vm_phy_pages_alloc() failed");
+
+ data->hva_slots[slot - 1] = addr_gpa2hva(data->vm, guest_addr);
+ memset(data->hva_slots[slot - 1], 0, npages * guest_page_size);
+
+ guest_addr += npages * guest_page_size;
+ }
+
+ virt_map(data->vm, MEM_GPA, MEM_GPA, data->npages);
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ sync->guest_page_size = data->vm->page_size;
+ atomic_init(&sync->start_flag, false);
+ atomic_init(&sync->exit_flag, false);
+ atomic_init(&sync->sync_flag, false);
+
+ data->mmio_ok = false;
+
+ return true;
+}
+
+static void launch_vm(struct vm_data *data)
+{
+ pr_info_v("Launching the test VM\n");
+
+ pthread_create(&data->vcpu_thread, NULL, vcpu_worker, data);
+
+ /* Ensure the guest thread is spun up. */
+ wait_for_vcpu();
+}
+
+static void free_vm(struct vm_data *data)
+{
+ kvm_vm_free(data->vm);
+ free(data->hva_slots);
+ free(data);
+}
+
+static void wait_guest_exit(struct vm_data *data)
+{
+ pthread_join(data->vcpu_thread, NULL);
+}
+
+static void let_guest_run(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->start_flag, true, memory_order_release);
+}
+
+static void guest_spin_until_start(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ while (!atomic_load_explicit(&sync->start_flag, memory_order_acquire))
+ ;
+}
+
+static void make_guest_exit(struct sync_area *sync)
+{
+ atomic_store_explicit(&sync->exit_flag, true, memory_order_release);
+}
+
+static bool _guest_should_exit(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ return atomic_load_explicit(&sync->exit_flag, memory_order_acquire);
+}
+
+#define guest_should_exit() unlikely(_guest_should_exit())
+
+/*
+ * noinline so we can easily see how much time the host spends waiting
+ * for the guest.
+ * For the same reason use alarm() instead of polling clock_gettime()
+ * to implement a wait timeout.
+ */
+static noinline void host_perform_sync(struct sync_area *sync)
+{
+ alarm(2);
+
+ atomic_store_explicit(&sync->sync_flag, true, memory_order_release);
+ while (atomic_load_explicit(&sync->sync_flag, memory_order_acquire))
+ ;
+
+ alarm(0);
+}
+
+static bool guest_perform_sync(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ bool expected;
+
+ do {
+ if (guest_should_exit())
+ return false;
+
+ expected = true;
+ } while (!atomic_compare_exchange_weak_explicit(&sync->sync_flag,
+ &expected, false,
+ memory_order_acq_rel,
+ memory_order_relaxed));
+
+ return true;
+}
+
+static void guest_code_test_memslot_move(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+ uintptr_t base = (typeof(base))READ_ONCE(sync->move_area_ptr);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (!guest_should_exit()) {
+ uintptr_t ptr;
+
+ for (ptr = base; ptr < base + MEM_TEST_MOVE_SIZE;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ /*
+ * No host sync here since the MMIO exits are so expensive
+ * that the host would spend most of its time waiting for
+ * the guest and so instead of measuring memslot move
+ * performance we would measure the performance and
+ * likelihood of MMIO exits
+ */
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_map(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + MEM_TEST_MAP_SIZE / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_MAP_SIZE;
+ ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_unmap(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr = MEM_TEST_GPA;
+
+ /*
+ * We can afford to access (map) just a small number of pages
+ * per host sync as otherwise the host will spend
+ * a significant amount of its time waiting for the guest
+ * (instead of doing unmap operations), so this will
+ * effectively turn this test into a map performance test.
+ *
+ * Just access a single page to be on the safe side.
+ */
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ ptr += MEM_TEST_UNMAP_SIZE / 2;
+ *(uint64_t *)ptr = MEM_TEST_VAL_2;
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_code_test_memslot_rw(void)
+{
+ struct sync_area *sync = (typeof(sync))MEM_SYNC_GPA;
+ uint32_t page_size = (typeof(page_size))READ_ONCE(sync->guest_page_size);
+
+ GUEST_SYNC(0);
+
+ guest_spin_until_start();
+
+ while (1) {
+ uintptr_t ptr;
+
+ for (ptr = MEM_TEST_GPA;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size)
+ *(uint64_t *)ptr = MEM_TEST_VAL_1;
+
+ if (!guest_perform_sync())
+ break;
+
+ for (ptr = MEM_TEST_GPA + page_size / 2;
+ ptr < MEM_TEST_GPA + MEM_TEST_SIZE; ptr += page_size) {
+ uint64_t val = *(uint64_t *)ptr;
+
+ GUEST_ASSERT_EQ(val, MEM_TEST_VAL_2);
+ *(uint64_t *)ptr = 0;
+ }
+
+ if (!guest_perform_sync())
+ break;
+ }
+
+ GUEST_DONE();
+}
+
+static bool test_memslot_move_prepare(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots, bool isactive)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t movesrcgpa, movetestgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+
+ if (isactive) {
+ uint64_t lastpages;
+
+ vm_gpa2hva(data, movesrcgpa, &lastpages);
+ if (lastpages * guest_page_size < MEM_TEST_MOVE_SIZE / 2) {
+ *maxslots = 0;
+ return false;
+ }
+ }
+
+ movetestgpa = movesrcgpa - (MEM_TEST_MOVE_SIZE / (isactive ? 2 : 1));
+ sync->move_area_ptr = (void *)movetestgpa;
+
+ if (isactive) {
+ data->mmio_ok = true;
+ data->mmio_gpa_min = movesrcgpa;
+ data->mmio_gpa_max = movesrcgpa + MEM_TEST_MOVE_SIZE / 2 - 1;
+ }
+
+ return true;
+}
+
+static bool test_memslot_move_prepare_active(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, true);
+}
+
+static bool test_memslot_move_prepare_inactive(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t *maxslots)
+{
+ return test_memslot_move_prepare(data, sync, maxslots, false);
+}
+
+static void test_memslot_move_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t movesrcgpa;
+
+ movesrcgpa = vm_slot2gpa(data, data->nslots - 1);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1,
+ MEM_TEST_MOVE_GPA_DEST);
+ vm_mem_region_move(data->vm, data->nslots - 1 + 1, movesrcgpa);
+}
+
+static void test_memslot_do_unmap(struct vm_data *data,
+ uint64_t offsp, uint64_t count)
+{
+ uint64_t gpa, ctr;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ for (gpa = MEM_TEST_GPA + offsp * guest_page_size, ctr = 0; ctr < count; ) {
+ uint64_t npages;
+ void *hva;
+ int ret;
+
+ hva = vm_gpa2hva(data, gpa, &npages);
+ TEST_ASSERT(npages, "Empty memory slot at gptr 0x%"PRIx64, gpa);
+ npages = min(npages, count - ctr);
+ ret = madvise(hva, npages * guest_page_size, MADV_DONTNEED);
+ TEST_ASSERT(!ret,
+ "madvise(%p, MADV_DONTNEED) on VM memory should not fail for gptr 0x%"PRIx64,
+ hva, gpa);
+ ctr += npages;
+ gpa += npages * guest_page_size;
+ }
+ TEST_ASSERT(ctr == count,
+ "madvise(MADV_DONTNEED) should exactly cover all of the requested area");
+}
+
+static void test_memslot_map_unmap_check(struct vm_data *data,
+ uint64_t offsp, uint64_t valexp)
+{
+ uint64_t gpa;
+ uint64_t *val;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ if (!map_unmap_verify)
+ return;
+
+ gpa = MEM_TEST_GPA + offsp * guest_page_size;
+ val = (typeof(val))vm_gpa2hva(data, gpa, NULL);
+ TEST_ASSERT(*val == valexp,
+ "Guest written values should read back correctly before unmap (%"PRIu64" vs %"PRIu64" @ %"PRIx64")",
+ *val, valexp, gpa);
+ *val = 0;
+}
+
+static void test_memslot_map_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_pages = MEM_TEST_MAP_SIZE / guest_page_size;
+
+ /*
+ * Unmap the second half of the test area while guest writes to (maps)
+ * the first half.
+ */
+ test_memslot_do_unmap(data, guest_pages / 2, guest_pages / 2);
+
+ /*
+ * Wait for the guest to finish writing the first half of the test
+ * area, verify the written value on the first and the last page of
+ * this area and then unmap it.
+ * Meanwhile, the guest is writing to (mapping) the second half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ test_memslot_map_unmap_check(data, guest_pages / 2 - 1, MEM_TEST_VAL_1);
+ test_memslot_do_unmap(data, 0, guest_pages / 2);
+
+
+ /*
+ * Wait for the guest to finish writing the second half of the test
+ * area and verify the written value on the first and the last page
+ * of this area.
+ * The area will be unmapped at the beginning of the next loop
+ * iteration.
+ * Meanwhile, the guest is writing to (mapping) the first half of
+ * the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+ test_memslot_map_unmap_check(data, guest_pages - 1, MEM_TEST_VAL_2);
+}
+
+static void test_memslot_unmap_loop_common(struct vm_data *data,
+ struct sync_area *sync,
+ uint64_t chunk)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_pages = MEM_TEST_UNMAP_SIZE / guest_page_size;
+ uint64_t ctr;
+
+ /*
+ * Wait for the guest to finish mapping page(s) in the first half
+ * of the test area, verify the written value and then perform unmap
+ * of this area.
+ * Meanwhile, the guest is writing to (mapping) page(s) in the second
+ * half of the test area.
+ */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, 0, MEM_TEST_VAL_1);
+ for (ctr = 0; ctr < guest_pages / 2; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+
+ /* Likewise, but for the opposite host / guest areas */
+ host_perform_sync(sync);
+ test_memslot_map_unmap_check(data, guest_pages / 2, MEM_TEST_VAL_2);
+ for (ctr = guest_pages / 2; ctr < guest_pages; ctr += chunk)
+ test_memslot_do_unmap(data, ctr, chunk);
+}
+
+static void test_memslot_unmap_loop(struct vm_data *data,
+ struct sync_area *sync)
+{
+ uint32_t host_page_size = getpagesize();
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_chunk_pages = guest_page_size >= host_page_size ?
+ 1 : host_page_size / guest_page_size;
+
+ test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
+}
+
+static void test_memslot_unmap_loop_chunked(struct vm_data *data,
+ struct sync_area *sync)
+{
+ uint32_t guest_page_size = data->vm->page_size;
+ uint64_t guest_chunk_pages = MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size;
+
+ test_memslot_unmap_loop_common(data, sync, guest_chunk_pages);
+}
+
+static void test_memslot_rw_loop(struct vm_data *data, struct sync_area *sync)
+{
+ uint64_t gptr;
+ uint32_t guest_page_size = data->vm->page_size;
+
+ for (gptr = MEM_TEST_GPA + guest_page_size / 2;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size)
+ *(uint64_t *)vm_gpa2hva(data, gptr, NULL) = MEM_TEST_VAL_2;
+
+ host_perform_sync(sync);
+
+ for (gptr = MEM_TEST_GPA;
+ gptr < MEM_TEST_GPA + MEM_TEST_SIZE; gptr += guest_page_size) {
+ uint64_t *vptr = (typeof(vptr))vm_gpa2hva(data, gptr, NULL);
+ uint64_t val = *vptr;
+
+ TEST_ASSERT(val == MEM_TEST_VAL_1,
+ "Guest written values should read back correctly (is %"PRIu64" @ %"PRIx64")",
+ val, gptr);
+ *vptr = 0;
+ }
+
+ host_perform_sync(sync);
+}
+
+struct test_data {
+ const char *name;
+ uint64_t mem_size;
+ void (*guest_code)(void);
+ bool (*prepare)(struct vm_data *data, struct sync_area *sync,
+ uint64_t *maxslots);
+ void (*loop)(struct vm_data *data, struct sync_area *sync);
+};
+
+static bool test_execute(int nslots, uint64_t *maxslots,
+ unsigned int maxtime,
+ const struct test_data *tdata,
+ uint64_t *nloops,
+ struct timespec *slot_runtime,
+ struct timespec *guest_runtime)
+{
+ uint64_t mem_size = tdata->mem_size ? : MEM_SIZE;
+ struct vm_data *data;
+ struct sync_area *sync;
+ struct timespec tstart;
+ bool ret = true;
+
+ data = alloc_vm();
+ if (!prepare_vm(data, nslots, maxslots, tdata->guest_code,
+ mem_size, slot_runtime)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ sync = (typeof(sync))vm_gpa2hva(data, MEM_SYNC_GPA, NULL);
+ if (tdata->prepare &&
+ !tdata->prepare(data, sync, maxslots)) {
+ ret = false;
+ goto exit_free;
+ }
+
+ launch_vm(data);
+
+ clock_gettime(CLOCK_MONOTONIC, &tstart);
+ let_guest_run(sync);
+
+ while (1) {
+ *guest_runtime = timespec_elapsed(tstart);
+ if (guest_runtime->tv_sec >= maxtime)
+ break;
+
+ tdata->loop(data, sync);
+
+ (*nloops)++;
+ }
+
+ make_guest_exit(sync);
+ wait_guest_exit(data);
+
+exit_free:
+ free_vm(data);
+
+ return ret;
+}
+
+static const struct test_data tests[] = {
+ {
+ .name = "map",
+ .mem_size = MEM_SIZE_MAP,
+ .guest_code = guest_code_test_memslot_map,
+ .loop = test_memslot_map_loop,
+ },
+ {
+ .name = "unmap",
+ .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop,
+ },
+ {
+ .name = "unmap chunked",
+ .mem_size = MEM_TEST_UNMAP_SIZE + MEM_EXTRA_SIZE,
+ .guest_code = guest_code_test_memslot_unmap,
+ .loop = test_memslot_unmap_loop_chunked,
+ },
+ {
+ .name = "move active area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_active,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "move inactive area",
+ .guest_code = guest_code_test_memslot_move,
+ .prepare = test_memslot_move_prepare_inactive,
+ .loop = test_memslot_move_loop,
+ },
+ {
+ .name = "RW",
+ .guest_code = guest_code_test_memslot_rw,
+ .loop = test_memslot_rw_loop
+ },
+};
+
+#define NTESTS ARRAY_SIZE(tests)
+
+struct test_args {
+ int tfirst;
+ int tlast;
+ int nslots;
+ int seconds;
+ int runs;
+};
+
+static void help(char *name, struct test_args *targs)
+{
+ int ctr;
+
+ pr_info("usage: %s [-h] [-v] [-d] [-s slots] [-f first_test] [-e last_test] [-l test_length] [-r run_count]\n",
+ name);
+ pr_info(" -h: print this help screen.\n");
+ pr_info(" -v: enable verbose mode (not for benchmarking).\n");
+ pr_info(" -d: enable extra debug checks.\n");
+ pr_info(" -s: specify memslot count cap (-1 means no cap; currently: %i)\n",
+ targs->nslots);
+ pr_info(" -f: specify the first test to run (currently: %i; max %zu)\n",
+ targs->tfirst, NTESTS - 1);
+ pr_info(" -e: specify the last test to run (currently: %i; max %zu)\n",
+ targs->tlast, NTESTS - 1);
+ pr_info(" -l: specify the test length in seconds (currently: %i)\n",
+ targs->seconds);
+ pr_info(" -r: specify the number of runs per test (currently: %i)\n",
+ targs->runs);
+
+ pr_info("\nAvailable tests:\n");
+ for (ctr = 0; ctr < NTESTS; ctr++)
+ pr_info("%d: %s\n", ctr, tests[ctr].name);
+}
+
+static bool check_memory_sizes(void)
+{
+ uint32_t host_page_size = getpagesize();
+ uint32_t guest_page_size = vm_guest_mode_params[VM_MODE_DEFAULT].page_size;
+
+ if (host_page_size > SZ_64K || guest_page_size > SZ_64K) {
+ pr_info("Unsupported page size on host (0x%x) or guest (0x%x)\n",
+ host_page_size, guest_page_size);
+ return false;
+ }
+
+ if (MEM_SIZE % guest_page_size ||
+ MEM_TEST_SIZE % guest_page_size) {
+ pr_info("invalid MEM_SIZE or MEM_TEST_SIZE\n");
+ return false;
+ }
+
+ if (MEM_SIZE_MAP % guest_page_size ||
+ MEM_TEST_MAP_SIZE % guest_page_size ||
+ (MEM_TEST_MAP_SIZE / guest_page_size) <= 2 ||
+ (MEM_TEST_MAP_SIZE / guest_page_size) % 2) {
+ pr_info("invalid MEM_SIZE_MAP or MEM_TEST_MAP_SIZE\n");
+ return false;
+ }
+
+ if (MEM_TEST_UNMAP_SIZE > MEM_TEST_SIZE ||
+ MEM_TEST_UNMAP_SIZE % guest_page_size ||
+ (MEM_TEST_UNMAP_SIZE / guest_page_size) %
+ (2 * MEM_TEST_UNMAP_CHUNK_SIZE / guest_page_size)) {
+ pr_info("invalid MEM_TEST_UNMAP_SIZE or MEM_TEST_UNMAP_CHUNK_SIZE\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool parse_args(int argc, char *argv[],
+ struct test_args *targs)
+{
+ uint32_t max_mem_slots;
+ int opt;
+
+ while ((opt = getopt(argc, argv, "hvds:f:e:l:r:")) != -1) {
+ switch (opt) {
+ case 'h':
+ default:
+ help(argv[0], targs);
+ return false;
+ case 'v':
+ verbose = true;
+ break;
+ case 'd':
+ map_unmap_verify = true;
+ break;
+ case 's':
+ targs->nslots = atoi_paranoid(optarg);
+ if (targs->nslots <= 1 && targs->nslots != -1) {
+ pr_info("Slot count cap must be larger than 1 or -1 for no cap\n");
+ return false;
+ }
+ break;
+ case 'f':
+ targs->tfirst = atoi_non_negative("First test", optarg);
+ break;
+ case 'e':
+ targs->tlast = atoi_non_negative("Last test", optarg);
+ if (targs->tlast >= NTESTS) {
+ pr_info("Last test to run has to be non-negative and less than %zu\n",
+ NTESTS);
+ return false;
+ }
+ break;
+ case 'l':
+ targs->seconds = atoi_non_negative("Test length", optarg);
+ break;
+ case 'r':
+ targs->runs = atoi_positive("Runs per test", optarg);
+ break;
+ }
+ }
+
+ if (optind < argc) {
+ help(argv[0], targs);
+ return false;
+ }
+
+ if (targs->tfirst > targs->tlast) {
+ pr_info("First test to run cannot be greater than the last test to run\n");
+ return false;
+ }
+
+ max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
+ if (max_mem_slots <= 1) {
+ pr_info("KVM_CAP_NR_MEMSLOTS should be greater than 1\n");
+ return false;
+ }
+
+ /* Memory slot 0 is reserved */
+ if (targs->nslots == -1)
+ targs->nslots = max_mem_slots - 1;
+ else
+ targs->nslots = min_t(int, targs->nslots, max_mem_slots) - 1;
+
+ pr_info_v("Allowed Number of memory slots: %"PRIu32"\n",
+ targs->nslots + 1);
+
+ return true;
+}
+
+struct test_result {
+ struct timespec slot_runtime, guest_runtime, iter_runtime;
+ int64_t slottimens, runtimens;
+ uint64_t nloops;
+};
+
+static bool test_loop(const struct test_data *data,
+ const struct test_args *targs,
+ struct test_result *rbestslottime,
+ struct test_result *rbestruntime)
+{
+ uint64_t maxslots;
+ struct test_result result = {};
+
+ if (!test_execute(targs->nslots, &maxslots, targs->seconds, data,
+ &result.nloops,
+ &result.slot_runtime, &result.guest_runtime)) {
+ if (maxslots)
+ pr_info("Memslot count too high for this test, decrease the cap (max is %"PRIu64")\n",
+ maxslots);
+ else
+ pr_info("Memslot count may be too high for this test, try adjusting the cap\n");
+
+ return false;
+ }
+
+ pr_info("Test took %ld.%.9lds for slot setup + %ld.%.9lds all iterations\n",
+ result.slot_runtime.tv_sec, result.slot_runtime.tv_nsec,
+ result.guest_runtime.tv_sec, result.guest_runtime.tv_nsec);
+ if (!result.nloops) {
+ pr_info("No full loops done - too short test time or system too loaded?\n");
+ return true;
+ }
+
+ result.iter_runtime = timespec_div(result.guest_runtime,
+ result.nloops);
+ pr_info("Done %"PRIu64" iterations, avg %ld.%.9lds each\n",
+ result.nloops,
+ result.iter_runtime.tv_sec,
+ result.iter_runtime.tv_nsec);
+ result.slottimens = timespec_to_ns(result.slot_runtime);
+ result.runtimens = timespec_to_ns(result.iter_runtime);
+
+ /*
+ * Only rank the slot setup time for tests using the whole test memory
+ * area so they are comparable
+ */
+ if (!data->mem_size &&
+ (!rbestslottime->slottimens ||
+ result.slottimens < rbestslottime->slottimens))
+ *rbestslottime = result;
+ if (!rbestruntime->runtimens ||
+ result.runtimens < rbestruntime->runtimens)
+ *rbestruntime = result;
+
+ return true;
+}
+
+int main(int argc, char *argv[])
+{
+ struct test_args targs = {
+ .tfirst = 0,
+ .tlast = NTESTS - 1,
+ .nslots = -1,
+ .seconds = 5,
+ .runs = 1,
+ };
+ struct test_result rbestslottime = {};
+ int tctr;
+
+ if (!check_memory_sizes())
+ return -1;
+
+ if (!parse_args(argc, argv, &targs))
+ return -1;
+
+ for (tctr = targs.tfirst; tctr <= targs.tlast; tctr++) {
+ const struct test_data *data = &tests[tctr];
+ unsigned int runctr;
+ struct test_result rbestruntime = {};
+
+ if (tctr > targs.tfirst)
+ pr_info("\n");
+
+ pr_info("Testing %s performance with %i runs, %d seconds each\n",
+ data->name, targs.runs, targs.seconds);
+
+ for (runctr = 0; runctr < targs.runs; runctr++)
+ if (!test_loop(data, &targs,
+ &rbestslottime, &rbestruntime))
+ break;
+
+ if (rbestruntime.runtimens)
+ pr_info("Best runtime result was %ld.%.9lds per iteration (with %"PRIu64" iterations)\n",
+ rbestruntime.iter_runtime.tv_sec,
+ rbestruntime.iter_runtime.tv_nsec,
+ rbestruntime.nloops);
+ }
+
+ if (rbestslottime.slottimens)
+ pr_info("Best slot setup time for the whole test area was %ld.%.9lds\n",
+ rbestslottime.slot_runtime.tv_sec,
+ rbestslottime.slot_runtime.tv_nsec);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/riscv/arch_timer.c b/tools/testing/selftests/kvm/riscv/arch_timer.c
new file mode 100644
index 000000000000..e22848f747c0
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/arch_timer.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * arch_timer.c - Tests the riscv64 sstc timer IRQ functionality
+ *
+ * The test validates the sstc timer IRQs using vstimecmp registers.
+ * It's ported from the aarch64 arch_timer test.
+ *
+ * Copyright (c) 2024, Intel Corporation.
+ */
+
+#define _GNU_SOURCE
+
+#include "arch_timer.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "timer_test.h"
+
+static int timer_irq = IRQ_S_TIMER;
+
+static void guest_irq_handler(struct ex_regs *regs)
+{
+ uint64_t xcnt, xcnt_diff_us, cmp;
+ unsigned int intid = regs->cause & ~CAUSE_IRQ_FLAG;
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ timer_irq_disable();
+
+ xcnt = timer_get_cycles();
+ cmp = timer_get_cmp();
+ xcnt_diff_us = cycles_to_usec(xcnt - shared_data->xcnt);
+
+ /* Make sure we are dealing with the correct timer IRQ */
+ GUEST_ASSERT_EQ(intid, timer_irq);
+
+ __GUEST_ASSERT(xcnt >= cmp,
+ "xcnt = 0x%"PRIx64", cmp = 0x%"PRIx64", xcnt_diff_us = 0x%" PRIx64,
+ xcnt, cmp, xcnt_diff_us);
+
+ WRITE_ONCE(shared_data->nr_iter, shared_data->nr_iter + 1);
+}
+
+static void guest_run(struct test_vcpu_shared_data *shared_data)
+{
+ uint32_t irq_iter, config_iter;
+
+ shared_data->nr_iter = 0;
+ shared_data->guest_stage = 0;
+
+ for (config_iter = 0; config_iter < test_args.nr_iter; config_iter++) {
+ /* Setup the next interrupt */
+ timer_set_next_cmp_ms(test_args.timer_period_ms);
+ shared_data->xcnt = timer_get_cycles();
+ timer_irq_enable();
+
+ /* Setup a timeout for the interrupt to arrive */
+ udelay(msecs_to_usecs(test_args.timer_period_ms) +
+ test_args.timer_err_margin_us);
+
+ irq_iter = READ_ONCE(shared_data->nr_iter);
+ __GUEST_ASSERT(config_iter + 1 == irq_iter,
+ "config_iter + 1 = 0x%x, irq_iter = 0x%x.\n"
+ " Guest timer interrupt was not trigged within the specified\n"
+ " interval, try to increase the error margin by [-e] option.\n",
+ config_iter + 1, irq_iter);
+ }
+}
+
+static void guest_code(void)
+{
+ uint32_t cpu = guest_get_vcpuid();
+ struct test_vcpu_shared_data *shared_data = &vcpu_shared_data[cpu];
+
+ timer_irq_disable();
+ local_irq_enable();
+
+ guest_run(shared_data);
+
+ GUEST_DONE();
+}
+
+struct kvm_vm *test_vm_create(void)
+{
+ struct kvm_vm *vm;
+ int nr_vcpus = test_args.nr_vcpus;
+
+ vm = vm_create_with_vcpus(nr_vcpus, guest_code, vcpus);
+ __TEST_REQUIRE(__vcpu_has_ext(vcpus[0], RISCV_ISA_EXT_REG(KVM_RISCV_ISA_EXT_SSTC)),
+ "SSTC not available, skipping test\n");
+
+ vm_init_vector_tables(vm);
+ vm_install_interrupt_handler(vm, guest_irq_handler);
+
+ for (int i = 0; i < nr_vcpus; i++)
+ vcpu_init_vector_tables(vcpus[i]);
+
+ /* Initialize guest timer frequency. */
+ vcpu_get_reg(vcpus[0], RISCV_TIMER_REG(frequency), &timer_freq);
+ sync_global_to_guest(vm, timer_freq);
+ pr_debug("timer_freq: %lu\n", timer_freq);
+
+ /* Make all the test's cmdline args visible to the guest */
+ sync_global_to_guest(vm, test_args);
+
+ return vm;
+}
+
+void test_vm_cleanup(struct kvm_vm *vm)
+{
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/riscv/get-reg-list.c b/tools/testing/selftests/kvm/riscv/get-reg-list.c
new file mode 100644
index 000000000000..b882b7b9b785
--- /dev/null
+++ b/tools/testing/selftests/kvm/riscv/get-reg-list.c
@@ -0,0 +1,1034 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Check for KVM_GET_REG_LIST regressions.
+ *
+ * Copyright (c) 2023 Intel Corporation
+ *
+ */
+#include <stdio.h>
+#include "kvm_util.h"
+#include "test_util.h"
+#include "processor.h"
+
+#define REG_MASK (KVM_REG_ARCH_MASK | KVM_REG_SIZE_MASK)
+
+enum {
+ VCPU_FEATURE_ISA_EXT = 0,
+ VCPU_FEATURE_SBI_EXT,
+};
+
+static bool isa_ext_cant_disable[KVM_RISCV_ISA_EXT_MAX];
+
+bool filter_reg(__u64 reg)
+{
+ switch (reg & ~REG_MASK) {
+ /*
+ * Same set of ISA_EXT registers are not present on all host because
+ * ISA_EXT registers are visible to the KVM user space based on the
+ * ISA extensions available on the host. Also, disabling an ISA
+ * extension using corresponding ISA_EXT register does not affect
+ * the visibility of the ISA_EXT register itself.
+ *
+ * Based on above, we should filter-out all ISA_EXT registers.
+ *
+ * Note: The below list is alphabetically sorted.
+ */
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_A:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_C:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_H:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_I:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_M:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_V:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSTC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVINVAL:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVNAPOT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SVPBMT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZACAS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBKX:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZBS:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZFHMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICNTR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICOND:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICSR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIFENCEI:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTNTL:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHINTPAUSE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZIHPM:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKND:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNE:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKNH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKR:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZKT:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZTSO:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVBC:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVFHMIN:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKG:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHA:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKNHB:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSED:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKSH:
+ case KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZVKT:
+ /*
+ * Like ISA_EXT registers, SBI_EXT registers are only visible when the
+ * host supports them and disabling them does not affect the visibility
+ * of the SBI_EXT register itself.
+ */
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_PMU:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_DBCN:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL:
+ case KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR:
+ return true;
+ /* AIA registers are always available when Ssaia can't be disabled */
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+ case KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+ return isa_ext_cant_disable[KVM_RISCV_ISA_EXT_SSAIA];
+ default:
+ break;
+ }
+
+ return false;
+}
+
+bool check_reject_set(int err)
+{
+ return err == EINVAL;
+}
+
+void finalize_vcpu(struct kvm_vcpu *vcpu, struct vcpu_reg_list *c)
+{
+ unsigned long isa_ext_state[KVM_RISCV_ISA_EXT_MAX] = { 0 };
+ struct vcpu_reg_sublist *s;
+ uint64_t feature;
+ int rc;
+
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++)
+ __vcpu_get_reg(vcpu, RISCV_ISA_EXT_REG(i), &isa_ext_state[i]);
+
+ /*
+ * Disable all extensions which were enabled by default
+ * if they were available in the risc-v host.
+ */
+ for (int i = 0; i < KVM_RISCV_ISA_EXT_MAX; i++) {
+ rc = __vcpu_set_reg(vcpu, RISCV_ISA_EXT_REG(i), 0);
+ if (rc && isa_ext_state[i])
+ isa_ext_cant_disable[i] = true;
+ }
+
+ for (int i = 0; i < KVM_RISCV_SBI_EXT_MAX; i++) {
+ rc = __vcpu_set_reg(vcpu, RISCV_SBI_EXT_REG(i), 0);
+ TEST_ASSERT(!rc || (rc == -1 && errno == ENOENT), "Unexpected error");
+ }
+
+ for_each_sublist(c, s) {
+ if (!s->feature)
+ continue;
+
+ switch (s->feature_type) {
+ case VCPU_FEATURE_ISA_EXT:
+ feature = RISCV_ISA_EXT_REG(s->feature);
+ break;
+ case VCPU_FEATURE_SBI_EXT:
+ feature = RISCV_SBI_EXT_REG(s->feature);
+ break;
+ default:
+ TEST_FAIL("Unknown feature type");
+ }
+
+ /* Try to enable the desired extension */
+ __vcpu_set_reg(vcpu, feature, 1);
+
+ /* Double check whether the desired extension was enabled */
+ __TEST_REQUIRE(__vcpu_has_ext(vcpu, feature),
+ "%s not available, skipping tests", s->name);
+ }
+}
+
+static const char *config_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_config */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CONFIG);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CONFIG);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CONFIG_REG(isa):
+ return "KVM_REG_RISCV_CONFIG_REG(isa)";
+ case KVM_REG_RISCV_CONFIG_REG(zicbom_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicbom_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(zicboz_block_size):
+ return "KVM_REG_RISCV_CONFIG_REG(zicboz_block_size)";
+ case KVM_REG_RISCV_CONFIG_REG(mvendorid):
+ return "KVM_REG_RISCV_CONFIG_REG(mvendorid)";
+ case KVM_REG_RISCV_CONFIG_REG(marchid):
+ return "KVM_REG_RISCV_CONFIG_REG(marchid)";
+ case KVM_REG_RISCV_CONFIG_REG(mimpid):
+ return "KVM_REG_RISCV_CONFIG_REG(mimpid)";
+ case KVM_REG_RISCV_CONFIG_REG(satp_mode):
+ return "KVM_REG_RISCV_CONFIG_REG(satp_mode)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *core_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_core */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CORE);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CORE);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_CORE_REG(regs.pc):
+ return "KVM_REG_RISCV_CORE_REG(regs.pc)";
+ case KVM_REG_RISCV_CORE_REG(regs.ra):
+ return "KVM_REG_RISCV_CORE_REG(regs.ra)";
+ case KVM_REG_RISCV_CORE_REG(regs.sp):
+ return "KVM_REG_RISCV_CORE_REG(regs.sp)";
+ case KVM_REG_RISCV_CORE_REG(regs.gp):
+ return "KVM_REG_RISCV_CORE_REG(regs.gp)";
+ case KVM_REG_RISCV_CORE_REG(regs.tp):
+ return "KVM_REG_RISCV_CORE_REG(regs.tp)";
+ case KVM_REG_RISCV_CORE_REG(regs.t0) ... KVM_REG_RISCV_CORE_REG(regs.t2):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t0));
+ case KVM_REG_RISCV_CORE_REG(regs.s0) ... KVM_REG_RISCV_CORE_REG(regs.s1):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s0));
+ case KVM_REG_RISCV_CORE_REG(regs.a0) ... KVM_REG_RISCV_CORE_REG(regs.a7):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.a%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.a0));
+ case KVM_REG_RISCV_CORE_REG(regs.s2) ... KVM_REG_RISCV_CORE_REG(regs.s11):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.s%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.s2) + 2);
+ case KVM_REG_RISCV_CORE_REG(regs.t3) ... KVM_REG_RISCV_CORE_REG(regs.t6):
+ return strdup_printf("KVM_REG_RISCV_CORE_REG(regs.t%lld)",
+ reg_off - KVM_REG_RISCV_CORE_REG(regs.t3) + 3);
+ case KVM_REG_RISCV_CORE_REG(mode):
+ return "KVM_REG_RISCV_CORE_REG(mode)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+#define RISCV_CSR_GENERAL(csr) \
+ "KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_AIA(csr) \
+ "KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_REG(" #csr ")"
+#define RISCV_CSR_SMSTATEEN(csr) \
+ "KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_REG(" #csr ")"
+
+static const char *general_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_REG(sstatus):
+ return RISCV_CSR_GENERAL(sstatus);
+ case KVM_REG_RISCV_CSR_REG(sie):
+ return RISCV_CSR_GENERAL(sie);
+ case KVM_REG_RISCV_CSR_REG(stvec):
+ return RISCV_CSR_GENERAL(stvec);
+ case KVM_REG_RISCV_CSR_REG(sscratch):
+ return RISCV_CSR_GENERAL(sscratch);
+ case KVM_REG_RISCV_CSR_REG(sepc):
+ return RISCV_CSR_GENERAL(sepc);
+ case KVM_REG_RISCV_CSR_REG(scause):
+ return RISCV_CSR_GENERAL(scause);
+ case KVM_REG_RISCV_CSR_REG(stval):
+ return RISCV_CSR_GENERAL(stval);
+ case KVM_REG_RISCV_CSR_REG(sip):
+ return RISCV_CSR_GENERAL(sip);
+ case KVM_REG_RISCV_CSR_REG(satp):
+ return RISCV_CSR_GENERAL(satp);
+ case KVM_REG_RISCV_CSR_REG(scounteren):
+ return RISCV_CSR_GENERAL(scounteren);
+ case KVM_REG_RISCV_CSR_REG(senvcfg):
+ return RISCV_CSR_GENERAL(senvcfg);
+ }
+
+ return strdup_printf("KVM_REG_RISCV_CSR_GENERAL | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *aia_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_aia_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_AIA_REG(siselect):
+ return RISCV_CSR_AIA(siselect);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1):
+ return RISCV_CSR_AIA(iprio1);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2):
+ return RISCV_CSR_AIA(iprio2);
+ case KVM_REG_RISCV_CSR_AIA_REG(sieh):
+ return RISCV_CSR_AIA(sieh);
+ case KVM_REG_RISCV_CSR_AIA_REG(siph):
+ return RISCV_CSR_AIA(siph);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio1h):
+ return RISCV_CSR_AIA(iprio1h);
+ case KVM_REG_RISCV_CSR_AIA_REG(iprio2h):
+ return RISCV_CSR_AIA(iprio2h);
+ }
+
+ return strdup_printf("KVM_REG_RISCV_CSR_AIA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *smstateen_csr_id_to_str(__u64 reg_off)
+{
+ /* reg_off is the offset into struct kvm_riscv_smstateen_csr */
+ switch (reg_off) {
+ case KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0):
+ return RISCV_CSR_SMSTATEEN(sstateen0);
+ }
+
+ TEST_FAIL("Unknown smstateen csr reg: 0x%llx", reg_off);
+ return NULL;
+}
+
+static const char *csr_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_CSR);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_CSR);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_CSR_GENERAL:
+ return general_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_AIA:
+ return aia_csr_id_to_str(reg_off);
+ case KVM_REG_RISCV_CSR_SMSTATEEN:
+ return smstateen_csr_id_to_str(reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *timer_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct kvm_riscv_timer */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_TIMER);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_TIMER);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_TIMER_REG(frequency):
+ return "KVM_REG_RISCV_TIMER_REG(frequency)";
+ case KVM_REG_RISCV_TIMER_REG(time):
+ return "KVM_REG_RISCV_TIMER_REG(time)";
+ case KVM_REG_RISCV_TIMER_REG(compare):
+ return "KVM_REG_RISCV_TIMER_REG(compare)";
+ case KVM_REG_RISCV_TIMER_REG(state):
+ return "KVM_REG_RISCV_TIMER_REG(state)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *fp_f_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_f_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_F);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_F);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_F_REG(f[0]) ...
+ KVM_REG_RISCV_FP_F_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_F_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_F_REG(fcsr):
+ return "KVM_REG_RISCV_FP_F_REG(fcsr)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+static const char *fp_d_id_to_str(const char *prefix, __u64 id)
+{
+ /* reg_off is the offset into struct __riscv_d_ext_state */
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_FP_D);
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_FP_D);
+
+ switch (reg_off) {
+ case KVM_REG_RISCV_FP_D_REG(f[0]) ...
+ KVM_REG_RISCV_FP_D_REG(f[31]):
+ return strdup_printf("KVM_REG_RISCV_FP_D_REG(f[%lld])", reg_off);
+ case KVM_REG_RISCV_FP_D_REG(fcsr):
+ return "KVM_REG_RISCV_FP_D_REG(fcsr)";
+ }
+
+ return strdup_printf("%lld /* UNKNOWN */", reg_off);
+}
+
+#define KVM_ISA_EXT_ARR(ext) \
+[KVM_RISCV_ISA_EXT_##ext] = "KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_" #ext
+
+static const char *isa_ext_single_id_to_str(__u64 reg_off)
+{
+ static const char * const kvm_isa_ext_reg_name[] = {
+ KVM_ISA_EXT_ARR(A),
+ KVM_ISA_EXT_ARR(C),
+ KVM_ISA_EXT_ARR(D),
+ KVM_ISA_EXT_ARR(F),
+ KVM_ISA_EXT_ARR(H),
+ KVM_ISA_EXT_ARR(I),
+ KVM_ISA_EXT_ARR(M),
+ KVM_ISA_EXT_ARR(V),
+ KVM_ISA_EXT_ARR(SMSTATEEN),
+ KVM_ISA_EXT_ARR(SSAIA),
+ KVM_ISA_EXT_ARR(SSTC),
+ KVM_ISA_EXT_ARR(SVINVAL),
+ KVM_ISA_EXT_ARR(SVNAPOT),
+ KVM_ISA_EXT_ARR(SVPBMT),
+ KVM_ISA_EXT_ARR(ZACAS),
+ KVM_ISA_EXT_ARR(ZBA),
+ KVM_ISA_EXT_ARR(ZBB),
+ KVM_ISA_EXT_ARR(ZBC),
+ KVM_ISA_EXT_ARR(ZBKB),
+ KVM_ISA_EXT_ARR(ZBKC),
+ KVM_ISA_EXT_ARR(ZBKX),
+ KVM_ISA_EXT_ARR(ZBS),
+ KVM_ISA_EXT_ARR(ZFA),
+ KVM_ISA_EXT_ARR(ZFH),
+ KVM_ISA_EXT_ARR(ZFHMIN),
+ KVM_ISA_EXT_ARR(ZICBOM),
+ KVM_ISA_EXT_ARR(ZICBOZ),
+ KVM_ISA_EXT_ARR(ZICNTR),
+ KVM_ISA_EXT_ARR(ZICOND),
+ KVM_ISA_EXT_ARR(ZICSR),
+ KVM_ISA_EXT_ARR(ZIFENCEI),
+ KVM_ISA_EXT_ARR(ZIHINTNTL),
+ KVM_ISA_EXT_ARR(ZIHINTPAUSE),
+ KVM_ISA_EXT_ARR(ZIHPM),
+ KVM_ISA_EXT_ARR(ZKND),
+ KVM_ISA_EXT_ARR(ZKNE),
+ KVM_ISA_EXT_ARR(ZKNH),
+ KVM_ISA_EXT_ARR(ZKR),
+ KVM_ISA_EXT_ARR(ZKSED),
+ KVM_ISA_EXT_ARR(ZKSH),
+ KVM_ISA_EXT_ARR(ZKT),
+ KVM_ISA_EXT_ARR(ZTSO),
+ KVM_ISA_EXT_ARR(ZVBB),
+ KVM_ISA_EXT_ARR(ZVBC),
+ KVM_ISA_EXT_ARR(ZVFH),
+ KVM_ISA_EXT_ARR(ZVFHMIN),
+ KVM_ISA_EXT_ARR(ZVKB),
+ KVM_ISA_EXT_ARR(ZVKG),
+ KVM_ISA_EXT_ARR(ZVKNED),
+ KVM_ISA_EXT_ARR(ZVKNHA),
+ KVM_ISA_EXT_ARR(ZVKNHB),
+ KVM_ISA_EXT_ARR(ZVKSED),
+ KVM_ISA_EXT_ARR(ZVKSH),
+ KVM_ISA_EXT_ARR(ZVKT),
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_isa_ext_reg_name))
+ return strdup_printf("KVM_REG_RISCV_ISA_SINGLE | %lld /* UNKNOWN */", reg_off);
+
+ return kvm_isa_ext_reg_name[reg_off];
+}
+
+static const char *isa_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+ const char *unknown = "";
+
+ if (reg_off > KVM_REG_RISCV_ISA_MULTI_REG_LAST)
+ unknown = " /* UNKNOWN */";
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_MULTI_EN:
+ return strdup_printf("KVM_REG_RISCV_ISA_MULTI_EN | %lld%s", reg_off, unknown);
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ return strdup_printf("KVM_REG_RISCV_ISA_MULTI_DIS | %lld%s", reg_off, unknown);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *isa_ext_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_ISA_EXT);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_ISA_EXT);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_ISA_SINGLE:
+ return isa_ext_single_id_to_str(reg_off);
+ case KVM_REG_RISCV_ISA_MULTI_EN:
+ case KVM_REG_RISCV_ISA_MULTI_DIS:
+ return isa_ext_multi_id_to_str(reg_subtype, reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+#define KVM_SBI_EXT_ARR(ext) \
+[ext] = "KVM_REG_RISCV_SBI_SINGLE | " #ext
+
+static const char *sbi_ext_single_id_to_str(__u64 reg_off)
+{
+ /* reg_off is KVM_RISCV_SBI_EXT_ID */
+ static const char * const kvm_sbi_ext_reg_name[] = {
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_V01),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_TIME),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_IPI),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_RFENCE),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_SRST),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_HSM),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_PMU),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_STA),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_EXPERIMENTAL),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_VENDOR),
+ KVM_SBI_EXT_ARR(KVM_RISCV_SBI_EXT_DBCN),
+ };
+
+ if (reg_off >= ARRAY_SIZE(kvm_sbi_ext_reg_name))
+ return strdup_printf("KVM_REG_RISCV_SBI_SINGLE | %lld /* UNKNOWN */", reg_off);
+
+ return kvm_sbi_ext_reg_name[reg_off];
+}
+
+static const char *sbi_ext_multi_id_to_str(__u64 reg_subtype, __u64 reg_off)
+{
+ const char *unknown = "";
+
+ if (reg_off > KVM_REG_RISCV_SBI_MULTI_REG_LAST)
+ unknown = " /* UNKNOWN */";
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_EN | %lld%s", reg_off, unknown);
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return strdup_printf("KVM_REG_RISCV_SBI_MULTI_DIS | %lld%s", reg_off, unknown);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *sbi_ext_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_EXT);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_EXT);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_SINGLE:
+ return sbi_ext_single_id_to_str(reg_off);
+ case KVM_REG_RISCV_SBI_MULTI_EN:
+ case KVM_REG_RISCV_SBI_MULTI_DIS:
+ return sbi_ext_multi_id_to_str(reg_subtype, reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+static const char *sbi_sta_id_to_str(__u64 reg_off)
+{
+ switch (reg_off) {
+ case 0: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo)";
+ case 1: return "KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi)";
+ }
+ return strdup_printf("KVM_REG_RISCV_SBI_STA | %lld /* UNKNOWN */", reg_off);
+}
+
+static const char *sbi_id_to_str(const char *prefix, __u64 id)
+{
+ __u64 reg_off = id & ~(REG_MASK | KVM_REG_RISCV_SBI_STATE);
+ __u64 reg_subtype = reg_off & KVM_REG_RISCV_SUBTYPE_MASK;
+
+ assert((id & KVM_REG_RISCV_TYPE_MASK) == KVM_REG_RISCV_SBI_STATE);
+
+ reg_off &= ~KVM_REG_RISCV_SUBTYPE_MASK;
+
+ switch (reg_subtype) {
+ case KVM_REG_RISCV_SBI_STA:
+ return sbi_sta_id_to_str(reg_off);
+ }
+
+ return strdup_printf("%lld | %lld /* UNKNOWN */", reg_subtype, reg_off);
+}
+
+void print_reg(const char *prefix, __u64 id)
+{
+ const char *reg_size = NULL;
+
+ TEST_ASSERT((id & KVM_REG_ARCH_MASK) == KVM_REG_RISCV,
+ "%s: KVM_REG_RISCV missing in reg id: 0x%llx", prefix, id);
+
+ switch (id & KVM_REG_SIZE_MASK) {
+ case KVM_REG_SIZE_U32:
+ reg_size = "KVM_REG_SIZE_U32";
+ break;
+ case KVM_REG_SIZE_U64:
+ reg_size = "KVM_REG_SIZE_U64";
+ break;
+ case KVM_REG_SIZE_U128:
+ reg_size = "KVM_REG_SIZE_U128";
+ break;
+ default:
+ printf("\tKVM_REG_RISCV | (%lld << KVM_REG_SIZE_SHIFT) | 0x%llx /* UNKNOWN */,\n",
+ (id & KVM_REG_SIZE_MASK) >> KVM_REG_SIZE_SHIFT, id & ~REG_MASK);
+ return;
+ }
+
+ switch (id & KVM_REG_RISCV_TYPE_MASK) {
+ case KVM_REG_RISCV_CONFIG:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CONFIG | %s,\n",
+ reg_size, config_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_CORE:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CORE | %s,\n",
+ reg_size, core_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_CSR:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_CSR | %s,\n",
+ reg_size, csr_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_TIMER:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_TIMER | %s,\n",
+ reg_size, timer_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_F:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_F | %s,\n",
+ reg_size, fp_f_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_FP_D:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_FP_D | %s,\n",
+ reg_size, fp_d_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_ISA_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_ISA_EXT | %s,\n",
+ reg_size, isa_ext_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_SBI_EXT:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_EXT | %s,\n",
+ reg_size, sbi_ext_id_to_str(prefix, id));
+ break;
+ case KVM_REG_RISCV_SBI_STATE:
+ printf("\tKVM_REG_RISCV | %s | KVM_REG_RISCV_SBI_STATE | %s,\n",
+ reg_size, sbi_id_to_str(prefix, id));
+ break;
+ default:
+ printf("\tKVM_REG_RISCV | %s | 0x%llx /* UNKNOWN */,\n",
+ reg_size, id & ~REG_MASK);
+ return;
+ }
+}
+
+/*
+ * The current blessed list was primed with the output of kernel version
+ * v6.5-rc3 and then later updated with new registers.
+ */
+static __u64 base_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(isa),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mvendorid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(marchid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(mimpid),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(satp_mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.pc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.ra),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.sp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.gp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.tp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.a7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s7),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s8),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s9),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s10),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.s11),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t3),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t4),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t5),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(regs.t6),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CORE | KVM_REG_RISCV_CORE_REG(mode),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sstatus),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sie),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stvec),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sscratch),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sepc),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scause),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(stval),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(sip),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(satp),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(scounteren),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_GENERAL | KVM_REG_RISCV_CSR_REG(senvcfg),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(frequency),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(time),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(compare),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+/*
+ * The skips_set list registers that should skip set test.
+ * - KVM_REG_RISCV_TIMER_REG(state): set would fail if it was not initialized properly.
+ */
+static __u64 base_skips_set[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_TIMER | KVM_REG_RISCV_TIMER_REG(state),
+};
+
+static __u64 sbi_base_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_V01,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_TIME,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_IPI,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_RFENCE,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_SRST,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_HSM,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_EXPERIMENTAL,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_VENDOR,
+};
+
+static __u64 sbi_sta_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | KVM_RISCV_SBI_EXT_STA,
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_lo),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_SBI_STATE | KVM_REG_RISCV_SBI_STA | KVM_REG_RISCV_SBI_STA_REG(shmem_hi),
+};
+
+static __u64 zicbom_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicbom_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOM,
+};
+
+static __u64 zicboz_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CONFIG | KVM_REG_RISCV_CONFIG_REG(zicboz_block_size),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_ZICBOZ,
+};
+
+static __u64 aia_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siselect),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(sieh),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(siph),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio1h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_AIA | KVM_REG_RISCV_CSR_AIA_REG(iprio2h),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SSAIA,
+};
+
+static __u64 smstateen_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_CSR | KVM_REG_RISCV_CSR_SMSTATEEN | KVM_REG_RISCV_CSR_SMSTATEEN_REG(sstateen0),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_SMSTATEEN,
+};
+
+static __u64 fp_f_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_F | KVM_REG_RISCV_FP_F_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_F,
+};
+
+static __u64 fp_d_regs[] = {
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[0]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[1]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[2]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[3]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[4]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[5]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[6]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[7]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[8]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[9]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[10]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[11]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[12]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[13]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[14]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[15]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[16]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[17]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[18]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[19]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[20]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[21]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[22]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[23]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[24]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[25]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[26]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[27]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[28]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[29]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[30]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U64 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(f[31]),
+ KVM_REG_RISCV | KVM_REG_SIZE_U32 | KVM_REG_RISCV_FP_D | KVM_REG_RISCV_FP_D_REG(fcsr),
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | KVM_RISCV_ISA_EXT_D,
+};
+
+#define SUBLIST_BASE \
+ {"base", .regs = base_regs, .regs_n = ARRAY_SIZE(base_regs), \
+ .skips_set = base_skips_set, .skips_set_n = ARRAY_SIZE(base_skips_set),}
+#define SUBLIST_SBI_BASE \
+ {"sbi-base", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_V01, \
+ .regs = sbi_base_regs, .regs_n = ARRAY_SIZE(sbi_base_regs),}
+#define SUBLIST_SBI_STA \
+ {"sbi-sta", .feature_type = VCPU_FEATURE_SBI_EXT, .feature = KVM_RISCV_SBI_EXT_STA, \
+ .regs = sbi_sta_regs, .regs_n = ARRAY_SIZE(sbi_sta_regs),}
+#define SUBLIST_ZICBOM \
+ {"zicbom", .feature = KVM_RISCV_ISA_EXT_ZICBOM, .regs = zicbom_regs, .regs_n = ARRAY_SIZE(zicbom_regs),}
+#define SUBLIST_ZICBOZ \
+ {"zicboz", .feature = KVM_RISCV_ISA_EXT_ZICBOZ, .regs = zicboz_regs, .regs_n = ARRAY_SIZE(zicboz_regs),}
+#define SUBLIST_AIA \
+ {"aia", .feature = KVM_RISCV_ISA_EXT_SSAIA, .regs = aia_regs, .regs_n = ARRAY_SIZE(aia_regs),}
+#define SUBLIST_SMSTATEEN \
+ {"smstateen", .feature = KVM_RISCV_ISA_EXT_SMSTATEEN, .regs = smstateen_regs, .regs_n = ARRAY_SIZE(smstateen_regs),}
+#define SUBLIST_FP_F \
+ {"fp_f", .feature = KVM_RISCV_ISA_EXT_F, .regs = fp_f_regs, \
+ .regs_n = ARRAY_SIZE(fp_f_regs),}
+#define SUBLIST_FP_D \
+ {"fp_d", .feature = KVM_RISCV_ISA_EXT_D, .regs = fp_d_regs, \
+ .regs_n = ARRAY_SIZE(fp_d_regs),}
+
+#define KVM_ISA_EXT_SIMPLE_CONFIG(ext, extu) \
+static __u64 regs_##ext[] = { \
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
+ KVM_REG_RISCV_ISA_EXT | KVM_REG_RISCV_ISA_SINGLE | \
+ KVM_RISCV_ISA_EXT_##extu, \
+}; \
+static struct vcpu_reg_list config_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ { \
+ .name = #ext, \
+ .feature = KVM_RISCV_ISA_EXT_##extu, \
+ .regs = regs_##ext, \
+ .regs_n = ARRAY_SIZE(regs_##ext), \
+ }, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_SBI_EXT_SIMPLE_CONFIG(ext, extu) \
+static __u64 regs_sbi_##ext[] = { \
+ KVM_REG_RISCV | KVM_REG_SIZE_ULONG | \
+ KVM_REG_RISCV_SBI_EXT | KVM_REG_RISCV_SBI_SINGLE | \
+ KVM_RISCV_SBI_EXT_##extu, \
+}; \
+static struct vcpu_reg_list config_sbi_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ { \
+ .name = "sbi-"#ext, \
+ .feature_type = VCPU_FEATURE_SBI_EXT, \
+ .feature = KVM_RISCV_SBI_EXT_##extu, \
+ .regs = regs_sbi_##ext, \
+ .regs_n = ARRAY_SIZE(regs_sbi_##ext), \
+ }, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_ISA_EXT_SUBLIST_CONFIG(ext, extu) \
+static struct vcpu_reg_list config_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ SUBLIST_##extu, \
+ {0}, \
+ }, \
+} \
+
+#define KVM_SBI_EXT_SUBLIST_CONFIG(ext, extu) \
+static struct vcpu_reg_list config_sbi_##ext = { \
+ .sublists = { \
+ SUBLIST_BASE, \
+ SUBLIST_SBI_##extu, \
+ {0}, \
+ }, \
+} \
+
+/* Note: The below list is alphabetically sorted. */
+
+KVM_SBI_EXT_SUBLIST_CONFIG(base, BASE);
+KVM_SBI_EXT_SUBLIST_CONFIG(sta, STA);
+KVM_SBI_EXT_SIMPLE_CONFIG(pmu, PMU);
+KVM_SBI_EXT_SIMPLE_CONFIG(dbcn, DBCN);
+
+KVM_ISA_EXT_SUBLIST_CONFIG(aia, AIA);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_f, FP_F);
+KVM_ISA_EXT_SUBLIST_CONFIG(fp_d, FP_D);
+KVM_ISA_EXT_SIMPLE_CONFIG(h, H);
+KVM_ISA_EXT_SUBLIST_CONFIG(smstateen, SMSTATEEN);
+KVM_ISA_EXT_SIMPLE_CONFIG(sstc, SSTC);
+KVM_ISA_EXT_SIMPLE_CONFIG(svinval, SVINVAL);
+KVM_ISA_EXT_SIMPLE_CONFIG(svnapot, SVNAPOT);
+KVM_ISA_EXT_SIMPLE_CONFIG(svpbmt, SVPBMT);
+KVM_ISA_EXT_SIMPLE_CONFIG(zacas, ZACAS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zba, ZBA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbb, ZBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbc, ZBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkb, ZBKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkc, ZBKC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbkx, ZBKX);
+KVM_ISA_EXT_SIMPLE_CONFIG(zbs, ZBS);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfa, ZFA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfh, ZFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zfhmin, ZFHMIN);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicbom, ZICBOM);
+KVM_ISA_EXT_SUBLIST_CONFIG(zicboz, ZICBOZ);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicntr, ZICNTR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicond, ZICOND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zicsr, ZICSR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zifencei, ZIFENCEI);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintntl, ZIHINTNTL);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihintpause, ZIHINTPAUSE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zihpm, ZIHPM);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknd, ZKND);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkne, ZKNE);
+KVM_ISA_EXT_SIMPLE_CONFIG(zknh, ZKNH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkr, ZKR);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksed, ZKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zksh, ZKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zkt, ZKT);
+KVM_ISA_EXT_SIMPLE_CONFIG(ztso, ZTSO);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbb, ZVBB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvbc, ZVBC);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfh, ZVFH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvfhmin, ZVFHMIN);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkb, ZVKB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkg, ZVKG);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkned, ZVKNED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknha, ZVKNHA);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvknhb, ZVKNHB);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksed, ZVKSED);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvksh, ZVKSH);
+KVM_ISA_EXT_SIMPLE_CONFIG(zvkt, ZVKT);
+
+struct vcpu_reg_list *vcpu_configs[] = {
+ &config_sbi_base,
+ &config_sbi_sta,
+ &config_sbi_pmu,
+ &config_sbi_dbcn,
+ &config_aia,
+ &config_fp_f,
+ &config_fp_d,
+ &config_h,
+ &config_smstateen,
+ &config_sstc,
+ &config_svinval,
+ &config_svnapot,
+ &config_svpbmt,
+ &config_zacas,
+ &config_zba,
+ &config_zbb,
+ &config_zbc,
+ &config_zbkb,
+ &config_zbkc,
+ &config_zbkx,
+ &config_zbs,
+ &config_zfa,
+ &config_zfh,
+ &config_zfhmin,
+ &config_zicbom,
+ &config_zicboz,
+ &config_zicntr,
+ &config_zicond,
+ &config_zicsr,
+ &config_zifencei,
+ &config_zihintntl,
+ &config_zihintpause,
+ &config_zihpm,
+ &config_zknd,
+ &config_zkne,
+ &config_zknh,
+ &config_zkr,
+ &config_zksed,
+ &config_zksh,
+ &config_zkt,
+ &config_ztso,
+ &config_zvbb,
+ &config_zvbc,
+ &config_zvfh,
+ &config_zvfhmin,
+ &config_zvkb,
+ &config_zvkg,
+ &config_zvkned,
+ &config_zvknha,
+ &config_zvknhb,
+ &config_zvksed,
+ &config_zvksh,
+ &config_zvkt,
+};
+int vcpu_configs_n = ARRAY_SIZE(vcpu_configs);
diff --git a/tools/testing/selftests/kvm/rseq_test.c b/tools/testing/selftests/kvm/rseq_test.c
new file mode 100644
index 000000000000..28f97fb52044
--- /dev/null
+++ b/tools/testing/selftests/kvm/rseq_test.c
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <errno.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+#include <syscall.h>
+#include <sys/ioctl.h>
+#include <sys/sysinfo.h>
+#include <asm/barrier.h>
+#include <linux/atomic.h>
+#include <linux/rseq.h>
+#include <linux/unistd.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+#include "../rseq/rseq.c"
+
+/*
+ * Any bug related to task migration is likely to be timing-dependent; perform
+ * a large number of migrations to reduce the odds of a false negative.
+ */
+#define NR_TASK_MIGRATIONS 100000
+
+static pthread_t migration_thread;
+static cpu_set_t possible_mask;
+static int min_cpu, max_cpu;
+static bool done;
+
+static atomic_t seq_cnt;
+
+static void guest_code(void)
+{
+ for (;;)
+ GUEST_SYNC(0);
+}
+
+static int next_cpu(int cpu)
+{
+ /*
+ * Advance to the next CPU, skipping those that weren't in the original
+ * affinity set. Sadly, there is no CPU_SET_FOR_EACH, and cpu_set_t's
+ * data storage is considered as opaque. Note, if this task is pinned
+ * to a small set of discontigous CPUs, e.g. 2 and 1023, this loop will
+ * burn a lot cycles and the test will take longer than normal to
+ * complete.
+ */
+ do {
+ cpu++;
+ if (cpu > max_cpu) {
+ cpu = min_cpu;
+ TEST_ASSERT(CPU_ISSET(cpu, &possible_mask),
+ "Min CPU = %d must always be usable", cpu);
+ break;
+ }
+ } while (!CPU_ISSET(cpu, &possible_mask));
+
+ return cpu;
+}
+
+static void *migration_worker(void *__rseq_tid)
+{
+ pid_t rseq_tid = (pid_t)(unsigned long)__rseq_tid;
+ cpu_set_t allowed_mask;
+ int r, i, cpu;
+
+ CPU_ZERO(&allowed_mask);
+
+ for (i = 0, cpu = min_cpu; i < NR_TASK_MIGRATIONS; i++, cpu = next_cpu(cpu)) {
+ CPU_SET(cpu, &allowed_mask);
+
+ /*
+ * Bump the sequence count twice to allow the reader to detect
+ * that a migration may have occurred in between rseq and sched
+ * CPU ID reads. An odd sequence count indicates a migration
+ * is in-progress, while a completely different count indicates
+ * a migration occurred since the count was last read.
+ */
+ atomic_inc(&seq_cnt);
+
+ /*
+ * Ensure the odd count is visible while getcpu() isn't
+ * stable, i.e. while changing affinity is in-progress.
+ */
+ smp_wmb();
+ r = sched_setaffinity(rseq_tid, sizeof(allowed_mask), &allowed_mask);
+ TEST_ASSERT(!r, "sched_setaffinity failed, errno = %d (%s)",
+ errno, strerror(errno));
+ smp_wmb();
+ atomic_inc(&seq_cnt);
+
+ CPU_CLR(cpu, &allowed_mask);
+
+ /*
+ * Wait 1-10us before proceeding to the next iteration and more
+ * specifically, before bumping seq_cnt again. A delay is
+ * needed on three fronts:
+ *
+ * 1. To allow sched_setaffinity() to prompt migration before
+ * ioctl(KVM_RUN) enters the guest so that TIF_NOTIFY_RESUME
+ * (or TIF_NEED_RESCHED, which indirectly leads to handling
+ * NOTIFY_RESUME) is handled in KVM context.
+ *
+ * If NOTIFY_RESUME/NEED_RESCHED is set after KVM enters
+ * the guest, the guest will trigger a IO/MMIO exit all the
+ * way to userspace and the TIF flags will be handled by
+ * the generic "exit to userspace" logic, not by KVM. The
+ * exit to userspace is necessary to give the test a chance
+ * to check the rseq CPU ID (see #2).
+ *
+ * Alternatively, guest_code() could include an instruction
+ * to trigger an exit that is handled by KVM, but any such
+ * exit requires architecture specific code.
+ *
+ * 2. To let ioctl(KVM_RUN) make its way back to the test
+ * before the next round of migration. The test's check on
+ * the rseq CPU ID must wait for migration to complete in
+ * order to avoid false positive, thus any kernel rseq bug
+ * will be missed if the next migration starts before the
+ * check completes.
+ *
+ * 3. To ensure the read-side makes efficient forward progress,
+ * e.g. if getcpu() involves a syscall. Stalling the read-side
+ * means the test will spend more time waiting for getcpu()
+ * to stabilize and less time trying to hit the timing-dependent
+ * bug.
+ *
+ * Because any bug in this area is likely to be timing-dependent,
+ * run with a range of delays at 1us intervals from 1us to 10us
+ * as a best effort to avoid tuning the test to the point where
+ * it can hit _only_ the original bug and not detect future
+ * regressions.
+ *
+ * The original bug can reproduce with a delay up to ~500us on
+ * x86-64, but starts to require more iterations to reproduce
+ * as the delay creeps above ~10us, and the average runtime of
+ * each iteration obviously increases as well. Cap the delay
+ * at 10us to keep test runtime reasonable while minimizing
+ * potential coverage loss.
+ *
+ * The lower bound for reproducing the bug is likely below 1us,
+ * e.g. failures occur on x86-64 with nanosleep(0), but at that
+ * point the overhead of the syscall likely dominates the delay.
+ * Use usleep() for simplicity and to avoid unnecessary kernel
+ * dependencies.
+ */
+ usleep((i % 10) + 1);
+ }
+ done = true;
+ return NULL;
+}
+
+static void calc_min_max_cpu(void)
+{
+ int i, cnt, nproc;
+
+ TEST_REQUIRE(CPU_COUNT(&possible_mask) >= 2);
+
+ /*
+ * CPU_SET doesn't provide a FOR_EACH helper, get the min/max CPU that
+ * this task is affined to in order to reduce the time spent querying
+ * unusable CPUs, e.g. if this task is pinned to a small percentage of
+ * total CPUs.
+ */
+ nproc = get_nprocs_conf();
+ min_cpu = -1;
+ max_cpu = -1;
+ cnt = 0;
+
+ for (i = 0; i < nproc; i++) {
+ if (!CPU_ISSET(i, &possible_mask))
+ continue;
+ if (min_cpu == -1)
+ min_cpu = i;
+ max_cpu = i;
+ cnt++;
+ }
+
+ __TEST_REQUIRE(cnt >= 2,
+ "Only one usable CPU, task migration not possible");
+}
+
+int main(int argc, char *argv[])
+{
+ int r, i, snapshot;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ u32 cpu, rseq_cpu;
+
+ r = sched_getaffinity(0, sizeof(possible_mask), &possible_mask);
+ TEST_ASSERT(!r, "sched_getaffinity failed, errno = %d (%s)", errno,
+ strerror(errno));
+
+ calc_min_max_cpu();
+
+ r = rseq_register_current_thread();
+ TEST_ASSERT(!r, "rseq_register_current_thread failed, errno = %d (%s)",
+ errno, strerror(errno));
+
+ /*
+ * Create and run a dummy VM that immediately exits to userspace via
+ * GUEST_SYNC, while concurrently migrating the process by setting its
+ * CPU affinity.
+ */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ pthread_create(&migration_thread, NULL, migration_worker,
+ (void *)(unsigned long)syscall(SYS_gettid));
+
+ for (i = 0; !done; i++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT(get_ucall(vcpu, NULL) == UCALL_SYNC,
+ "Guest failed?");
+
+ /*
+ * Verify rseq's CPU matches sched's CPU. Ensure migration
+ * doesn't occur between getcpu() and reading the rseq cpu_id
+ * by rereading both if the sequence count changes, or if the
+ * count is odd (migration in-progress).
+ */
+ do {
+ /*
+ * Drop bit 0 to force a mismatch if the count is odd,
+ * i.e. if a migration is in-progress.
+ */
+ snapshot = atomic_read(&seq_cnt) & ~1;
+
+ /*
+ * Ensure calling getcpu() and reading rseq.cpu_id complete
+ * in a single "no migration" window, i.e. are not reordered
+ * across the seq_cnt reads.
+ */
+ smp_rmb();
+ r = sys_getcpu(&cpu, NULL);
+ TEST_ASSERT(!r, "getcpu failed, errno = %d (%s)",
+ errno, strerror(errno));
+ rseq_cpu = rseq_current_cpu_raw();
+ smp_rmb();
+ } while (snapshot != atomic_read(&seq_cnt));
+
+ TEST_ASSERT(rseq_cpu == cpu,
+ "rseq CPU = %d, sched CPU = %d", rseq_cpu, cpu);
+ }
+
+ /*
+ * Sanity check that the test was able to enter the guest a reasonable
+ * number of times, e.g. didn't get stalled too often/long waiting for
+ * getcpu() to stabilize. A 2:1 migration:KVM_RUN ratio is a fairly
+ * conservative ratio on x86-64, which can do _more_ KVM_RUNs than
+ * migrations given the 1us+ delay in the migration task.
+ */
+ TEST_ASSERT(i > (NR_TASK_MIGRATIONS / 2),
+ "Only performed %d KVM_RUNs, task stalled too much?", i);
+
+ pthread_join(migration_thread, NULL);
+
+ kvm_vm_free(vm);
+
+ rseq_unregister_current_thread();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/s390x/cmma_test.c b/tools/testing/selftests/kvm/s390x/cmma_test.c
new file mode 100644
index 000000000000..626a2b8a2037
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/cmma_test.c
@@ -0,0 +1,695 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test for s390x CMMA migration
+ *
+ * Copyright IBM Corp. 2023
+ *
+ * Authors:
+ * Nico Boehr <nrb@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define MAIN_PAGE_COUNT 512
+
+#define TEST_DATA_PAGE_COUNT 512
+#define TEST_DATA_MEMSLOT 1
+#define TEST_DATA_START_GFN 4096
+
+#define TEST_DATA_TWO_PAGE_COUNT 256
+#define TEST_DATA_TWO_MEMSLOT 2
+#define TEST_DATA_TWO_START_GFN 8192
+
+static char cmma_value_buf[MAIN_PAGE_COUNT + TEST_DATA_PAGE_COUNT];
+
+/**
+ * Dirty CMMA attributes of exactly one page in the TEST_DATA memslot,
+ * so use_cmma goes on and the CMMA related ioctls do something.
+ */
+static void guest_do_one_essa(void)
+{
+ asm volatile(
+ /* load TEST_DATA_START_GFN into r1 */
+ " llilf 1,%[start_gfn]\n"
+ /* calculate the address from the gfn */
+ " sllg 1,1,12(0)\n"
+ /* set the first page in TEST_DATA memslot to STABLE */
+ " .insn rrf,0xb9ab0000,2,1,1,0\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN)
+ : "r1", "r2", "memory", "cc"
+ );
+}
+
+/**
+ * Touch CMMA attributes of all pages in TEST_DATA memslot. Set them to stable
+ * state.
+ */
+static void guest_dirty_test_data(void)
+{
+ asm volatile(
+ /* r1 = TEST_DATA_START_GFN */
+ " xgr 1,1\n"
+ " llilf 1,%[start_gfn]\n"
+ /* r5 = TEST_DATA_PAGE_COUNT */
+ " lghi 5,%[page_count]\n"
+ /* r5 += r1 */
+ "2: agfr 5,1\n"
+ /* r2 = r1 << 12 */
+ "1: sllg 2,1,12(0)\n"
+ /* essa(r4, r2, SET_STABLE) */
+ " .insn rrf,0xb9ab0000,4,2,1,0\n"
+ /* i++ */
+ " agfi 1,1\n"
+ /* if r1 < r5 goto 1 */
+ " cgrjl 1,5,1b\n"
+ /* hypercall */
+ " diag 0,0,0x501\n"
+ "0: j 0b"
+ :
+ : [start_gfn] "L"(TEST_DATA_START_GFN),
+ [page_count] "L"(TEST_DATA_PAGE_COUNT)
+ :
+ /* the counter in our loop over the pages */
+ "r1",
+ /* the calculated page physical address */
+ "r2",
+ /* ESSA output register */
+ "r4",
+ /* last page */
+ "r5",
+ "cc", "memory"
+ );
+}
+
+static void create_main_memslot(struct kvm_vm *vm)
+{
+ int i;
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, 0, 0, MAIN_PAGE_COUNT, 0);
+ /* set the array of memslots to zero like __vm_create does */
+ for (i = 0; i < NR_MEM_REGIONS; i++)
+ vm->memslots[i] = 0;
+}
+
+static void create_test_memslot(struct kvm_vm *vm)
+{
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_START_GFN << vm->page_shift,
+ TEST_DATA_MEMSLOT,
+ TEST_DATA_PAGE_COUNT,
+ 0
+ );
+ vm->memslots[MEM_REGION_TEST_DATA] = TEST_DATA_MEMSLOT;
+}
+
+static void create_memslots(struct kvm_vm *vm)
+{
+ /*
+ * Our VM has the following memory layout:
+ * +------+---------------------------+
+ * | GFN | Memslot |
+ * +------+---------------------------+
+ * | 0 | |
+ * | ... | MAIN (Code, Stack, ...) |
+ * | 511 | |
+ * +------+---------------------------+
+ * | 4096 | |
+ * | ... | TEST_DATA |
+ * | 4607 | |
+ * +------+---------------------------+
+ */
+ create_main_memslot(vm);
+ create_test_memslot(vm);
+}
+
+static void finish_vm_setup(struct kvm_vm *vm)
+{
+ struct userspace_mem_region *slot0;
+
+ kvm_vm_elf_load(vm, program_invocation_name);
+
+ slot0 = memslot2region(vm, 0);
+ ucall_init(vm, slot0->region.guest_phys_addr + slot0->region.memory_size);
+
+ kvm_arch_vm_post_create(vm);
+}
+
+static struct kvm_vm *create_vm_two_memslots(void)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_barebones();
+
+ create_memslots(vm);
+
+ finish_vm_setup(vm);
+
+ return vm;
+}
+
+static void enable_cmma(struct kvm_vm *vm)
+{
+ int r;
+
+ r = __kvm_device_attr_set(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA, NULL);
+ TEST_ASSERT(!r, "enabling cmma failed r=%d errno=%d", r, errno);
+}
+
+static void enable_dirty_tracking(struct kvm_vm *vm)
+{
+ vm_mem_region_set_flags(vm, 0, KVM_MEM_LOG_DIRTY_PAGES);
+ vm_mem_region_set_flags(vm, TEST_DATA_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+}
+
+static int __enable_migration_mode(struct kvm_vm *vm)
+{
+ return __kvm_device_attr_set(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_START,
+ NULL
+ );
+}
+
+static void enable_migration_mode(struct kvm_vm *vm)
+{
+ int r = __enable_migration_mode(vm);
+
+ TEST_ASSERT(!r, "enabling migration mode failed r=%d errno=%d", r, errno);
+}
+
+static bool is_migration_mode_on(struct kvm_vm *vm)
+{
+ u64 out;
+ int r;
+
+ r = __kvm_device_attr_get(vm->fd,
+ KVM_S390_VM_MIGRATION,
+ KVM_S390_VM_MIGRATION_STATUS,
+ &out
+ );
+ TEST_ASSERT(!r, "getting migration mode status failed r=%d errno=%d", r, errno);
+ return out;
+}
+
+static int vm_get_cmma_bits(struct kvm_vm *vm, u64 flags, int *errno_out)
+{
+ struct kvm_s390_cmma_log args;
+ int rc;
+
+ errno = 0;
+
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = flags,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ rc = __vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+
+ *errno_out = errno;
+ return rc;
+}
+
+static void test_get_cmma_basic(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+ int rc, errno_out;
+
+ /* GET_CMMA_BITS without CMMA enabled should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, ENXIO);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ vcpu_run(vcpu);
+
+ /* GET_CMMA_BITS without migration mode and without peeking should fail */
+ rc = vm_get_cmma_bits(vm, 0, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ /* GET_CMMA_BITS without migration mode and with peeking should work */
+ rc = vm_get_cmma_bits(vm, KVM_S390_CMMA_PEEK, &errno_out);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT_EQ(errno_out, 0);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* GET_CMMA_BITS with invalid flags */
+ rc = vm_get_cmma_bits(vm, 0xfeedc0fe, &errno_out);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno_out, EINVAL);
+
+ kvm_vm_free(vm);
+}
+
+static void assert_exit_was_hypercall(struct kvm_vcpu *vcpu)
+{
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, 13);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, 4);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa, 0x8300);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipb, 0x5010000);
+}
+
+static void test_migration_mode(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+ int rc;
+
+ /* enabling migration mode on a VM without memory should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ create_memslots(vm);
+ finish_vm_setup(vm);
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /* migration mode when memslots have dirty tracking off should fail */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, -1);
+ TEST_ASSERT_EQ(errno, EINVAL);
+ TEST_ASSERT(!is_migration_mode_on(vm), "migration mode should still be off");
+ errno = 0;
+
+ /* enable dirty tracking */
+ enable_dirty_tracking(vm);
+
+ /* enabling migration mode should work now */
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /* execute another ESSA instruction to see this goes fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * With migration mode on, create a new memslot with dirty tracking off.
+ * This should turn off migration mode.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_userspace_mem_region_add(vm,
+ VM_MEM_SRC_ANONYMOUS,
+ TEST_DATA_TWO_START_GFN << vm->page_shift,
+ TEST_DATA_TWO_MEMSLOT,
+ TEST_DATA_TWO_PAGE_COUNT,
+ 0
+ );
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "creating memslot without dirty tracking turns off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ /*
+ * Turn on dirty tracking on the new memslot.
+ * It should be possible to turn migration mode back on again.
+ */
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, KVM_MEM_LOG_DIRTY_PAGES);
+ rc = __enable_migration_mode(vm);
+ TEST_ASSERT_EQ(rc, 0);
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ errno = 0;
+
+ /*
+ * Turn off dirty tracking again, this time with just a flag change.
+ * Again, migration mode should turn off.
+ */
+ TEST_ASSERT(is_migration_mode_on(vm), "migration mode should be on");
+ vm_mem_region_set_flags(vm, TEST_DATA_TWO_MEMSLOT, 0);
+ TEST_ASSERT(!is_migration_mode_on(vm),
+ "disabling dirty tracking should turn off migration mode"
+ );
+
+ /* ESSA instructions should still execute fine */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/**
+ * Given a VM with the MAIN and TEST_DATA memslot, assert that both slots have
+ * CMMA attributes of all pages in both memslots and nothing more dirty.
+ * This has the useful side effect of ensuring nothing is CMMA dirty after this
+ * function.
+ */
+static void assert_all_slots_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /*
+ * First iteration - everything should be dirty.
+ * Start at the main memslot...
+ */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, MAIN_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.remaining, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+
+ /* ...and then - after a hole - the TEST_DATA memslot should follow */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = MAIN_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, TEST_DATA_PAGE_COUNT);
+ TEST_ASSERT_EQ(args.start_gfn, TEST_DATA_START_GFN);
+ TEST_ASSERT_EQ(args.remaining, 0);
+
+ /* ...and nothing else should be there */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ TEST_ASSERT_EQ(args.count, 0);
+ TEST_ASSERT_EQ(args.start_gfn, 0);
+ TEST_ASSERT_EQ(args.remaining, 0);
+}
+
+/**
+ * Given a VM, assert no pages are CMMA dirty.
+ */
+static void assert_no_pages_cmma_dirty(struct kvm_vm *vm)
+{
+ struct kvm_s390_cmma_log args;
+
+ /* If we start from GFN 0 again, nothing should be dirty. */
+ args = (struct kvm_s390_cmma_log){
+ .start_gfn = 0,
+ .count = sizeof(cmma_value_buf),
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, &args);
+ if (args.count || args.remaining || args.start_gfn)
+ TEST_FAIL("pages are still dirty start_gfn=0x%llx count=%u remaining=%llu",
+ args.start_gfn,
+ args.count,
+ args.remaining
+ );
+}
+
+static void test_get_inital_dirty(void)
+{
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_vcpu *vcpu;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_do_one_essa);
+
+ /*
+ * Execute one essa instruction in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Start from the beginning again and make sure nothing else is dirty */
+ assert_no_pages_cmma_dirty(vm);
+
+ kvm_vm_free(vm);
+}
+
+static void query_cmma_range(struct kvm_vm *vm,
+ u64 start_gfn, u64 gfn_count,
+ struct kvm_s390_cmma_log *res_out)
+{
+ *res_out = (struct kvm_s390_cmma_log){
+ .start_gfn = start_gfn,
+ .count = gfn_count,
+ .flags = 0,
+ .values = (__u64)&cmma_value_buf[0]
+ };
+ memset(cmma_value_buf, 0xff, sizeof(cmma_value_buf));
+ vm_ioctl(vm, KVM_S390_GET_CMMA_BITS, res_out);
+}
+
+/**
+ * Assert the given cmma_log struct that was executed by query_cmma_range()
+ * indicates the first dirty gfn is at first_dirty_gfn and contains exactly
+ * dirty_gfn_count CMMA values.
+ */
+static void assert_cmma_dirty(u64 first_dirty_gfn,
+ u64 dirty_gfn_count,
+ const struct kvm_s390_cmma_log *res)
+{
+ TEST_ASSERT_EQ(res->start_gfn, first_dirty_gfn);
+ TEST_ASSERT_EQ(res->count, dirty_gfn_count);
+ for (size_t i = 0; i < dirty_gfn_count; i++)
+ TEST_ASSERT_EQ(cmma_value_buf[0], 0x0); /* stable state */
+ TEST_ASSERT_EQ(cmma_value_buf[dirty_gfn_count], 0xff); /* not touched */
+}
+
+static void test_get_skip_holes(void)
+{
+ size_t gfn_offset;
+ struct kvm_vm *vm = create_vm_two_memslots();
+ struct kvm_s390_cmma_log log;
+ struct kvm_vcpu *vcpu;
+ u64 orig_psw;
+
+ enable_cmma(vm);
+ vcpu = vm_vcpu_add(vm, 1, guest_dirty_test_data);
+
+ orig_psw = vcpu->run->psw_addr;
+
+ /*
+ * Execute some essa instructions in the guest. Otherwise the guest will
+ * not have use_cmm enabled and GET_CMMA_BITS will return no pages.
+ */
+ vcpu_run(vcpu);
+ assert_exit_was_hypercall(vcpu);
+
+ enable_dirty_tracking(vm);
+ enable_migration_mode(vm);
+
+ /* un-dirty all pages */
+ assert_all_slots_cmma_dirty(vm);
+
+ /* Then, dirty just the TEST_DATA memslot */
+ vcpu->run->psw_addr = orig_psw;
+ vcpu_run(vcpu);
+
+ gfn_offset = TEST_DATA_START_GFN;
+ /**
+ * Query CMMA attributes of one page, starting at page 0. Since the
+ * main memslot was not touched by the VM, this should yield the first
+ * page of the TEST_DATA memslot.
+ * The dirty bitmap should now look like this:
+ * 0: not dirty
+ * [0x1, 0x200): dirty
+ */
+ query_cmma_range(vm, 0, 1, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query CMMA attributes of 32 (0x20) pages past the end of the TEST_DATA
+ * memslot. This should wrap back to the beginning of the TEST_DATA
+ * memslot, page 1.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN + TEST_DATA_PAGE_COUNT, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /* Skip 32 pages */
+ gfn_offset += 0x20;
+
+ /**
+ * After skipping 32 pages, query the next 32 (0x20) pages.
+ * The dirty bitmap should now look like this:
+ * [0, 0x21): not dirty
+ * [0x21, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, gfn_offset, 0x20, &log);
+ assert_cmma_dirty(gfn_offset, 0x20, &log);
+ gfn_offset += 0x20;
+
+ /**
+ * Query 1 page from the beginning of the TEST_DATA memslot. This should
+ * yield page 0x21.
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * [0x22, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ query_cmma_range(vm, TEST_DATA_START_GFN, 1, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x21, 1, &log);
+ gfn_offset++;
+
+ /**
+ * Query 15 (0xF) pages from page 0x23 in TEST_DATA memslot.
+ * This should yield pages [0x23, 0x33).
+ * The dirty bitmap should now look like this:
+ * [0, 0x22): not dirty
+ * 0x22: dirty
+ * [0x23, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x23;
+ query_cmma_range(vm, gfn_offset, 15, &log);
+ assert_cmma_dirty(gfn_offset, 15, &log);
+
+ /**
+ * Query 17 (0x11) pages from page 0x22 in TEST_DATA memslot.
+ * This should yield page [0x22, 0x33)
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x41): dirty
+ * [0x41, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x22;
+ query_cmma_range(vm, gfn_offset, 17, &log);
+ assert_cmma_dirty(gfn_offset, 17, &log);
+
+ /**
+ * Query 25 (0x19) pages from page 0x40 in TEST_DATA memslot.
+ * This should yield page 0x40 and nothing more, since there are more
+ * than 16 non-dirty pages after page 0x40.
+ * The dirty bitmap should now look like this:
+ * [0, 0x33): not dirty
+ * [0x33, 0x40): dirty
+ * [0x40, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x40;
+ query_cmma_range(vm, gfn_offset, 25, &log);
+ assert_cmma_dirty(gfn_offset, 1, &log);
+
+ /**
+ * Query pages [0x33, 0x40).
+ * The dirty bitmap should now look like this:
+ * [0, 0x61): not dirty
+ * [0x61, 0x200): dirty
+ */
+ gfn_offset = TEST_DATA_START_GFN + 0x33;
+ query_cmma_range(vm, gfn_offset, 0x40 - 0x33, &log);
+ assert_cmma_dirty(gfn_offset, 0x40 - 0x33, &log);
+
+ /**
+ * Query the remaining pages [0x61, 0x200).
+ */
+ gfn_offset = TEST_DATA_START_GFN;
+ query_cmma_range(vm, gfn_offset, TEST_DATA_PAGE_COUNT - 0x61, &log);
+ assert_cmma_dirty(TEST_DATA_START_GFN + 0x61, TEST_DATA_PAGE_COUNT - 0x61, &log);
+
+ assert_no_pages_cmma_dirty(vm);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "migration mode and dirty tracking", test_migration_mode },
+ { "GET_CMMA_BITS: basic calls", test_get_cmma_basic },
+ { "GET_CMMA_BITS: all pages are dirty initally", test_get_inital_dirty },
+ { "GET_CMMA_BITS: holes are skipped", test_get_skip_holes },
+};
+
+/**
+ * The kernel may support CMMA, but the machine may not (i.e. if running as
+ * guest-3).
+ *
+ * In this case, the CMMA capabilities are all there, but the CMMA-related
+ * ioctls fail. To find out whether the machine supports CMMA, create a
+ * temporary VM and then query the CMMA feature of the VM.
+ */
+static int machine_has_cmma(void)
+{
+ struct kvm_vm *vm = vm_create_barebones();
+ int r;
+
+ r = !__kvm_has_device_attr(vm->fd, KVM_S390_VM_MEM_CTRL, KVM_S390_VM_MEM_ENABLE_CMMA);
+ kvm_vm_free(vm);
+
+ return r;
+}
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_CMMA_MIGRATION));
+ TEST_REQUIRE(machine_has_cmma());
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/s390x/debug_test.c b/tools/testing/selftests/kvm/s390x/debug_test.c
new file mode 100644
index 000000000000..84313fb27529
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/debug_test.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Test KVM debugging features. */
+#include "kvm_util.h"
+#include "test_util.h"
+
+#include <linux/kvm.h>
+
+#define __LC_SVC_NEW_PSW 0x1c0
+#define __LC_PGM_NEW_PSW 0x1d0
+#define ICPT_INSTRUCTION 0x04
+#define IPA0_DIAG 0x8300
+#define PGM_SPECIFICATION 0x06
+
+/* Common code for testing single-stepping interruptions. */
+extern char int_handler[];
+asm("int_handler:\n"
+ "j .\n");
+
+static struct kvm_vm *test_step_int_1(struct kvm_vcpu **vcpu, void *guest_code,
+ size_t new_psw_off, uint64_t *new_psw)
+{
+ struct kvm_guest_debug debug = {};
+ struct kvm_regs regs;
+ struct kvm_vm *vm;
+ char *lowcore;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ lowcore = addr_gpa2hva(vm, 0);
+ new_psw[0] = (*vcpu)->run->psw_mask;
+ new_psw[1] = (uint64_t)int_handler;
+ memcpy(lowcore + new_psw_off, new_psw, 16);
+ vcpu_regs_get(*vcpu, &regs);
+ regs.gprs[2] = -1;
+ vcpu_regs_set(*vcpu, &regs);
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ vcpu_guest_debug_set(*vcpu, &debug);
+ vcpu_run(*vcpu);
+
+ return vm;
+}
+
+static void test_step_int(void *guest_code, size_t new_psw_off)
+{
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, guest_code, new_psw_off, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/* Test single-stepping "boring" program interruptions. */
+extern char test_step_pgm_guest_code[];
+asm("test_step_pgm_guest_code:\n"
+ ".insn rr,0x1d00,%r1,%r0 /* dr %r1,%r0 */\n"
+ "j .\n");
+
+static void test_step_pgm(void)
+{
+ test_step_int(test_step_pgm_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by DIAG.
+ * Userspace emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_diag_guest_code[];
+asm("test_step_pgm_diag_guest_code:\n"
+ "diag %r0,%r0,0\n"
+ "j .\n");
+
+static void test_step_pgm_diag(void)
+{
+ struct kvm_s390_irq irq = {
+ .type = KVM_S390_PROGRAM_INT,
+ .u.pgm.code = PGM_SPECIFICATION,
+ };
+ struct kvm_vcpu *vcpu;
+ uint64_t new_psw[2];
+ struct kvm_vm *vm;
+
+ vm = test_step_int_1(&vcpu, test_step_pgm_diag_guest_code,
+ __LC_PGM_NEW_PSW, new_psw);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.icptcode, ICPT_INSTRUCTION);
+ TEST_ASSERT_EQ(vcpu->run->s390_sieic.ipa & 0xff00, IPA0_DIAG);
+ vcpu_ioctl(vcpu, KVM_S390_IRQ, &irq);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_DEBUG);
+ TEST_ASSERT_EQ(vcpu->run->psw_mask, new_psw[0]);
+ TEST_ASSERT_EQ(vcpu->run->psw_addr, new_psw[1]);
+ kvm_vm_free(vm);
+}
+
+/*
+ * Test single-stepping program interruptions caused by ISKE.
+ * CPUSTAT_KSS handling must not interfere with single-stepping.
+ */
+extern char test_step_pgm_iske_guest_code[];
+asm("test_step_pgm_iske_guest_code:\n"
+ "iske %r2,%r2\n"
+ "j .\n");
+
+static void test_step_pgm_iske(void)
+{
+ test_step_int(test_step_pgm_iske_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/*
+ * Test single-stepping program interruptions caused by LCTL.
+ * KVM emulation must not interfere with single-stepping.
+ */
+extern char test_step_pgm_lctl_guest_code[];
+asm("test_step_pgm_lctl_guest_code:\n"
+ "lctl %c0,%c0,1\n"
+ "j .\n");
+
+static void test_step_pgm_lctl(void)
+{
+ test_step_int(test_step_pgm_lctl_guest_code, __LC_PGM_NEW_PSW);
+}
+
+/* Test single-stepping supervisor-call interruptions. */
+extern char test_step_svc_guest_code[];
+asm("test_step_svc_guest_code:\n"
+ "svc 0\n"
+ "j .\n");
+
+static void test_step_svc(void)
+{
+ test_step_int(test_step_svc_guest_code, __LC_SVC_NEW_PSW);
+}
+
+/* Run all tests above. */
+static struct testdef {
+ const char *name;
+ void (*test)(void);
+} testlist[] = {
+ { "single-step pgm", test_step_pgm },
+ { "single-step pgm caused by diag", test_step_pgm_diag },
+ { "single-step pgm caused by iske", test_step_pgm_iske },
+ { "single-step pgm caused by lctl", test_step_pgm_lctl },
+ { "single-step svc", test_step_svc },
+};
+
+int main(int argc, char *argv[])
+{
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/kvm/s390x/memop.c b/tools/testing/selftests/kvm/s390x/memop.c
index 9f49ead380ab..48cb910e660d 100644
--- a/tools/testing/selftests/kvm/s390x/memop.c
+++ b/tools/testing/selftests/kvm/s390x/memop.c
@@ -4,163 +4,1185 @@
*
* Copyright (C) 2019, Red Hat, Inc.
*/
-
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+
+#include <linux/bits.h>
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
+
+enum mop_target {
+ LOGICAL,
+ SIDA,
+ ABSOLUTE,
+ INVALID,
+};
+
+enum mop_access_mode {
+ READ,
+ WRITE,
+ CMPXCHG,
+};
+
+struct mop_desc {
+ uintptr_t gaddr;
+ uintptr_t gaddr_v;
+ uint64_t set_flags;
+ unsigned int f_check : 1;
+ unsigned int f_inject : 1;
+ unsigned int f_key : 1;
+ unsigned int _gaddr_v : 1;
+ unsigned int _set_flags : 1;
+ unsigned int _sida_offset : 1;
+ unsigned int _ar : 1;
+ uint32_t size;
+ enum mop_target target;
+ enum mop_access_mode mode;
+ void *buf;
+ uint32_t sida_offset;
+ void *old;
+ uint8_t old_value[16];
+ bool *cmpxchg_success;
+ uint8_t ar;
+ uint8_t key;
+};
+
+const uint8_t NO_KEY = 0xff;
+
+static struct kvm_s390_mem_op ksmo_from_desc(struct mop_desc *desc)
+{
+ struct kvm_s390_mem_op ksmo = {
+ .gaddr = (uintptr_t)desc->gaddr,
+ .size = desc->size,
+ .buf = ((uintptr_t)desc->buf),
+ .reserved = "ignored_ignored_ignored_ignored"
+ };
+
+ switch (desc->target) {
+ case LOGICAL:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
+ break;
+ case SIDA:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_SIDA_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_SIDA_WRITE;
+ break;
+ case ABSOLUTE:
+ if (desc->mode == READ)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_READ;
+ if (desc->mode == WRITE)
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_WRITE;
+ if (desc->mode == CMPXCHG) {
+ ksmo.op = KVM_S390_MEMOP_ABSOLUTE_CMPXCHG;
+ ksmo.old_addr = (uint64_t)desc->old;
+ memcpy(desc->old_value, desc->old, desc->size);
+ }
+ break;
+ case INVALID:
+ ksmo.op = -1;
+ }
+ if (desc->f_check)
+ ksmo.flags |= KVM_S390_MEMOP_F_CHECK_ONLY;
+ if (desc->f_inject)
+ ksmo.flags |= KVM_S390_MEMOP_F_INJECT_EXCEPTION;
+ if (desc->_set_flags)
+ ksmo.flags = desc->set_flags;
+ if (desc->f_key && desc->key != NO_KEY) {
+ ksmo.flags |= KVM_S390_MEMOP_F_SKEY_PROTECTION;
+ ksmo.key = desc->key;
+ }
+ if (desc->_ar)
+ ksmo.ar = desc->ar;
+ else
+ ksmo.ar = 0;
+ if (desc->_sida_offset)
+ ksmo.sida_offset = desc->sida_offset;
+
+ return ksmo;
+}
+
+struct test_info {
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+};
+
+#define PRINT_MEMOP false
+static void print_memop(struct kvm_vcpu *vcpu, const struct kvm_s390_mem_op *ksmo)
+{
+ if (!PRINT_MEMOP)
+ return;
+
+ if (!vcpu)
+ printf("vm memop(");
+ else
+ printf("vcpu memop(");
+ switch (ksmo->op) {
+ case KVM_S390_MEMOP_LOGICAL_READ:
+ printf("LOGICAL, READ, ");
+ break;
+ case KVM_S390_MEMOP_LOGICAL_WRITE:
+ printf("LOGICAL, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_READ:
+ printf("SIDA, READ, ");
+ break;
+ case KVM_S390_MEMOP_SIDA_WRITE:
+ printf("SIDA, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_READ:
+ printf("ABSOLUTE, READ, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_WRITE:
+ printf("ABSOLUTE, WRITE, ");
+ break;
+ case KVM_S390_MEMOP_ABSOLUTE_CMPXCHG:
+ printf("ABSOLUTE, CMPXCHG, ");
+ break;
+ }
+ printf("gaddr=%llu, size=%u, buf=%llu, ar=%u, key=%u, old_addr=%llx",
+ ksmo->gaddr, ksmo->size, ksmo->buf, ksmo->ar, ksmo->key,
+ ksmo->old_addr);
+ if (ksmo->flags & KVM_S390_MEMOP_F_CHECK_ONLY)
+ printf(", CHECK_ONLY");
+ if (ksmo->flags & KVM_S390_MEMOP_F_INJECT_EXCEPTION)
+ printf(", INJECT_EXCEPTION");
+ if (ksmo->flags & KVM_S390_MEMOP_F_SKEY_PROTECTION)
+ printf(", SKEY_PROTECTION");
+ puts(")");
+}
+
+static int err_memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ struct kvm_vcpu *vcpu = info.vcpu;
+
+ if (!vcpu)
+ return __vm_ioctl(info.vm, KVM_S390_MEM_OP, ksmo);
+ else
+ return __vcpu_ioctl(vcpu, KVM_S390_MEM_OP, ksmo);
+}
+
+static void memop_ioctl(struct test_info info, struct kvm_s390_mem_op *ksmo,
+ struct mop_desc *desc)
+{
+ int r;
+
+ r = err_memop_ioctl(info, ksmo, desc);
+ if (ksmo->op == KVM_S390_MEMOP_ABSOLUTE_CMPXCHG) {
+ if (desc->cmpxchg_success) {
+ int diff = memcmp(desc->old_value, desc->old, desc->size);
+ *desc->cmpxchg_success = !diff;
+ }
+ }
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR("KVM_S390_MEM_OP", r));
+}
+
+#define MEMOP(err, info_p, mop_target_p, access_mode_p, buf_p, size_p, ...) \
+({ \
+ struct test_info __info = (info_p); \
+ struct mop_desc __desc = { \
+ .target = (mop_target_p), \
+ .mode = (access_mode_p), \
+ .buf = (buf_p), \
+ .size = (size_p), \
+ __VA_ARGS__ \
+ }; \
+ struct kvm_s390_mem_op __ksmo; \
+ \
+ if (__desc._gaddr_v) { \
+ if (__desc.target == ABSOLUTE) \
+ __desc.gaddr = addr_gva2gpa(__info.vm, __desc.gaddr_v); \
+ else \
+ __desc.gaddr = __desc.gaddr_v; \
+ } \
+ __ksmo = ksmo_from_desc(&__desc); \
+ print_memop(__info.vcpu, &__ksmo); \
+ err##memop_ioctl(__info, &__ksmo, &__desc); \
+})
+
+#define MOP(...) MEMOP(, __VA_ARGS__)
+#define ERR_MOP(...) MEMOP(err_, __VA_ARGS__)
+
+#define GADDR(a) .gaddr = ((uintptr_t)a)
+#define GADDR_V(v) ._gaddr_v = 1, .gaddr_v = ((uintptr_t)v)
+#define CHECK_ONLY .f_check = 1
+#define SET_FLAGS(f) ._set_flags = 1, .set_flags = (f)
+#define SIDA_OFFSET(o) ._sida_offset = 1, .sida_offset = (o)
+#define AR(a) ._ar = 1, .ar = (a)
+#define KEY(a) .f_key = 1, .key = (a)
+#define INJECT .f_inject = 1
+#define CMPXCHG_OLD(o) .old = (o)
+#define CMPXCHG_SUCCESS(s) .cmpxchg_success = (s)
+
+#define CHECK_N_DO(f, ...) ({ f(__VA_ARGS__, CHECK_ONLY); f(__VA_ARGS__); })
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1ULL << PAGE_SHIFT)
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
-#define VCPU_ID 1
+static uint8_t __aligned(PAGE_SIZE) mem1[65536];
+static uint8_t __aligned(PAGE_SIZE) mem2[65536];
-static uint8_t mem1[65536];
-static uint8_t mem2[65536];
+struct test_default {
+ struct kvm_vm *kvm_vm;
+ struct test_info vm;
+ struct test_info vcpu;
+ struct kvm_run *run;
+ int size;
+};
-static void guest_code(void)
+static struct test_default test_default_init(void *guest_code)
+{
+ struct kvm_vcpu *vcpu;
+ struct test_default t;
+
+ t.size = min((size_t)kvm_check_cap(KVM_CAP_S390_MEM_OP), sizeof(mem1));
+ t.kvm_vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ t.vm = (struct test_info) { t.kvm_vm, NULL };
+ t.vcpu = (struct test_info) { t.kvm_vm, vcpu };
+ t.run = vcpu->run;
+ return t;
+}
+
+enum stage {
+ /* Synced state set by host, e.g. DAT */
+ STAGE_INITED,
+ /* Guest did nothing */
+ STAGE_IDLED,
+ /* Guest set storage keys (specifics up to test case) */
+ STAGE_SKEYS_SET,
+ /* Guest copied memory (locations up to test case) */
+ STAGE_COPIED,
+ /* End of guest code reached */
+ STAGE_DONE,
+};
+
+#define HOST_SYNC(info_p, stage) \
+({ \
+ struct test_info __info = (info_p); \
+ struct kvm_vcpu *__vcpu = __info.vcpu; \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) { \
+ REPORT_GUEST_ASSERT(uc); \
+ } \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+}) \
+
+static void prepare_mem12(void)
{
int i;
+ for (i = 0; i < sizeof(mem1); i++)
+ mem1[i] = rand();
+ memset(mem2, 0xaa, sizeof(mem2));
+}
+
+#define ASSERT_MEM_EQ(p1, p2, size) \
+ TEST_ASSERT(!memcmp(p1, p2, size), "Memory contents do not match!")
+
+static void default_write_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size,
+ GADDR_V(mem1), KEY(key));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_read(struct test_info copy_cpu, struct test_info mop_cpu,
+ enum mop_target mop_target, uint32_t size, uint8_t key)
+{
+ prepare_mem12();
+ CHECK_N_DO(MOP, mop_cpu, mop_target, WRITE, mem1, size, GADDR_V(mem1));
+ HOST_SYNC(copy_cpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, mop_cpu, mop_target, READ, mem2, size,
+ GADDR_V(mem2), KEY(key));
+ ASSERT_MEM_EQ(mem1, mem2, size);
+}
+
+static void default_cmpxchg(struct test_default *test, uint8_t key)
+{
+ for (int size = 1; size <= 16; size *= 2) {
+ for (int offset = 0; offset < 16; offset += size) {
+ uint8_t __aligned(16) new[16] = {};
+ uint8_t __aligned(16) old[16];
+ bool succ;
+
+ prepare_mem12();
+ default_write_read(test->vcpu, test->vcpu, LOGICAL, 16, NO_KEY);
+
+ memcpy(&old, mem1, 16);
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(succ, "exchange of values should succeed");
+ memcpy(mem1 + offset, new + offset, size);
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+
+ memcpy(&old, mem1, 16);
+ new[offset]++;
+ old[offset]++;
+ MOP(test->vm, ABSOLUTE, CMPXCHG, new + offset,
+ size, GADDR_V(mem1 + offset),
+ CMPXCHG_OLD(old + offset),
+ CMPXCHG_SUCCESS(&succ), KEY(key));
+ HOST_SYNC(test->vcpu, STAGE_COPIED);
+ MOP(test->vm, ABSOLUTE, READ, mem2, 16, GADDR_V(mem2));
+ TEST_ASSERT(!succ, "exchange of values should not succeed");
+ ASSERT_MEM_EQ(mem1, mem2, 16);
+ ASSERT_MEM_EQ(&old, mem1, 16);
+ }
+ }
+}
+
+static void guest_copy(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+}
+
+static void test_copy(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, NO_KEY);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_access_register(void)
+{
+ struct test_default t = test_default_init(guest_copy);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ prepare_mem12();
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+
+ /*
+ * Primary address space gets used if an access register
+ * contains zero. The host makes use of AR[1] so is a good
+ * candidate to ensure the guest AR (of zero) is used.
+ */
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size,
+ GADDR_V(mem1), AR(1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, t.size,
+ GADDR_V(mem2), AR(1));
+ ASSERT_MEM_EQ(mem1, mem2, t.size);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void set_storage_key_range(void *addr, size_t len, uint8_t key)
+{
+ uintptr_t _addr, abs, i;
+ int not_mapped = 0;
+
+ _addr = (uintptr_t)addr;
+ for (i = _addr & PAGE_MASK; i < _addr + len; i += PAGE_SIZE) {
+ abs = i;
+ asm volatile (
+ "lra %[abs], 0(0,%[abs])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[abs]\n"
+ "1:"
+ : [abs] "+&a" (abs), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ GUEST_ASSERT_EQ(not_mapped, 0);
+ }
+}
+
+static void guest_copy_key(void)
+{
+ set_storage_key_range(mem1, sizeof(mem1), 0x90);
+ set_storage_key_range(mem2, sizeof(mem2), 0x90);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
for (;;) {
- for (i = 0; i < sizeof(mem2); i++)
- mem2[i] = mem1[i];
- GUEST_SYNC(0);
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
}
}
-int main(int argc, char *argv[])
+static void test_copy_key(void)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_s390_mem_op ksmo;
- int rv, i, maxsize;
+ struct test_default t = test_default_init(guest_copy_key);
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, no key */
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, NO_KEY);
+
+ /* vm/vcpu, machting key or key 0 */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+ /*
+ * There used to be different code paths for key handling depending on
+ * if the region crossed a page boundary.
+ * There currently are not, but the more tests the merrier.
+ */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 0);
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, 1, 9);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 0);
+ default_write_read(t.vcpu, t.vm, ABSOLUTE, 1, 9);
+
+ /* vm/vcpu, mismatching keys on read, but no fetch protection */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key);
- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
- maxsize = kvm_check_cap(KVM_CAP_S390_MEM_OP);
- if (!maxsize) {
- print_skip("CAP_S390_MEM_OP not supported");
- exit(KSFT_SKIP);
+ default_cmpxchg(&t, NO_KEY);
+ default_cmpxchg(&t, 0);
+ default_cmpxchg(&t, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static __uint128_t cut_to_size(int size, __uint128_t val)
+{
+ switch (size) {
+ case 1:
+ return (uint8_t)val;
+ case 2:
+ return (uint16_t)val;
+ case 4:
+ return (uint32_t)val;
+ case 8:
+ return (uint64_t)val;
+ case 16:
+ return val;
}
- if (maxsize > sizeof(mem1))
- maxsize = sizeof(mem1);
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- run = vcpu_state(vm, VCPU_ID);
+static bool popcount_eq(__uint128_t a, __uint128_t b)
+{
+ unsigned int count_a, count_b;
- for (i = 0; i < sizeof(mem1); i++)
- mem1[i] = i * i + i;
-
- /* Set the first array */
- ksmo.gaddr = addr_gva2gpa(vm, (uintptr_t)mem1);
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
-
- /* Let the guest code copy the first array to the second */
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ count_a = __builtin_popcountl((uint64_t)(a >> 64)) +
+ __builtin_popcountl((uint64_t)a);
+ count_b = __builtin_popcountl((uint64_t)(b >> 64)) +
+ __builtin_popcountl((uint64_t)b);
+ return count_a == count_b;
+}
- memset(mem2, 0xaa, sizeof(mem2));
+static __uint128_t rotate(int size, __uint128_t val, int amount)
+{
+ unsigned int bits = size * 8;
+
+ amount = (amount + bits) % bits;
+ val = cut_to_size(size, val);
+ if (!amount)
+ return val;
+ return (val << (bits - amount)) | (val >> amount);
+}
+
+const unsigned int max_block = 16;
+
+static void choose_block(bool guest, int i, int *size, int *offset)
+{
+ unsigned int rand;
+
+ rand = i;
+ if (guest) {
+ rand = rand * 19 + 11;
+ *size = 1 << ((rand % 3) + 2);
+ rand = rand * 19 + 11;
+ *offset = (rand % max_block) & ~(*size - 1);
+ } else {
+ rand = rand * 17 + 5;
+ *size = 1 << (rand % 5);
+ rand = rand * 17 + 5;
+ *offset = (rand % max_block) & ~(*size - 1);
+ }
+}
+
+static __uint128_t permutate_bits(bool guest, int i, int size, __uint128_t old)
+{
+ unsigned int rand;
+ int amount;
+ bool swap;
+
+ rand = i;
+ rand = rand * 3 + 1;
+ if (guest)
+ rand = rand * 3 + 1;
+ swap = rand % 2 == 0;
+ if (swap) {
+ int i, j;
+ __uint128_t new;
+ uint8_t byte0, byte1;
+
+ rand = rand * 3 + 1;
+ i = rand % size;
+ rand = rand * 3 + 1;
+ j = rand % size;
+ if (i == j)
+ return old;
+ new = rotate(16, old, i * 8);
+ byte0 = new & 0xff;
+ new &= ~0xff;
+ new = rotate(16, new, -i * 8);
+ new = rotate(16, new, j * 8);
+ byte1 = new & 0xff;
+ new = (new & ~0xff) | byte0;
+ new = rotate(16, new, -j * 8);
+ new = rotate(16, new, i * 8);
+ new = new | byte1;
+ new = rotate(16, new, -i * 8);
+ return new;
+ }
+ rand = rand * 3 + 1;
+ amount = rand % (size * 8);
+ return rotate(size, old, amount);
+}
+
+static bool _cmpxchg(int size, void *target, __uint128_t *old_addr, __uint128_t new)
+{
+ bool ret;
+
+ switch (size) {
+ case 4: {
+ uint32_t old = *old_addr;
+
+ asm volatile ("cs %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint32_t *)(target))
+ : [new] "d" ((uint32_t)new)
+ : "cc"
+ );
+ ret = old == (uint32_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 8: {
+ uint64_t old = *old_addr;
+
+ asm volatile ("csg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(uint64_t *)(target))
+ : [new] "d" ((uint64_t)new)
+ : "cc"
+ );
+ ret = old == (uint64_t)*old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ case 16: {
+ __uint128_t old = *old_addr;
+
+ asm volatile ("cdsg %[old],%[new],%[address]"
+ : [old] "+d" (old),
+ [address] "+Q" (*(__uint128_t *)(target))
+ : [new] "d" (new)
+ : "cc"
+ );
+ ret = old == *old_addr;
+ *old_addr = old;
+ return ret;
+ }
+ }
+ GUEST_FAIL("Invalid size = %u", size);
+ return 0;
+}
+
+const unsigned int cmpxchg_iter_outer = 100, cmpxchg_iter_inner = 10000;
+
+static void guest_cmpxchg_key(void)
+{
+ int size, offset;
+ __uint128_t old, new;
+
+ set_storage_key_range(mem1, max_block, 0x10);
+ set_storage_key_range(mem2, max_block, 0x10);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 1;
+ } while (!_cmpxchg(16, mem1, &old, 0));
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(true, i + j, &size, &offset);
+ do {
+ new = permutate_bits(true, i + j, size, old);
+ } while (!_cmpxchg(size, mem2 + offset, &old, new));
+ }
+ }
+
+ GUEST_SYNC(STAGE_DONE);
+}
- /* Get the second array */
- ksmo.gaddr = (uintptr_t)mem2;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_READ;
- ksmo.buf = (uintptr_t)mem2;
- ksmo.ar = 0;
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
-
- TEST_ASSERT(!memcmp(mem1, mem2, maxsize),
- "Memory contents do not match!");
-
- /* Check error conditions - first bad size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = -1;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+static void *run_guest(void *data)
+{
+ struct test_info *info = data;
+
+ HOST_SYNC(*info, STAGE_DONE);
+ return NULL;
+}
+
+static char *quad_to_char(__uint128_t *quad, int size)
+{
+ return ((char *)quad) + (sizeof(*quad) - size);
+}
+
+static void test_cmpxchg_key_concurrent(void)
+{
+ struct test_default t = test_default_init(guest_cmpxchg_key);
+ int size, offset;
+ __uint128_t old, new;
+ bool success;
+ pthread_t thread;
+
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, max_block, GADDR_V(mem2));
+ pthread_create(&thread, NULL, run_guest, &t.vcpu);
+
+ for (int i = 0; i < cmpxchg_iter_outer; i++) {
+ do {
+ old = 0;
+ new = 1;
+ MOP(t.vm, ABSOLUTE, CMPXCHG, &new,
+ sizeof(new), GADDR_V(mem1),
+ CMPXCHG_OLD(&old),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ for (int j = 0; j < cmpxchg_iter_inner; j++) {
+ choose_block(false, i + j, &size, &offset);
+ do {
+ new = permutate_bits(false, i + j, size, old);
+ MOP(t.vm, ABSOLUTE, CMPXCHG, quad_to_char(&new, size),
+ size, GADDR_V(mem2 + offset),
+ CMPXCHG_OLD(quad_to_char(&old, size)),
+ CMPXCHG_SUCCESS(&success), KEY(1));
+ } while (!success);
+ }
+ }
+
+ pthread_join(thread, NULL);
+
+ MOP(t.vcpu, LOGICAL, READ, mem2, max_block, GADDR_V(mem2));
+ TEST_ASSERT(popcount_eq(*(__uint128_t *)mem1, *(__uint128_t *)mem2),
+ "Must retain number of set bits");
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_copy_key_fetch_prot(void)
+{
+ /*
+ * For some reason combining the first sync with override enablement
+ * results in an exception when calling HOST_SYNC.
+ */
+ GUEST_SYNC(STAGE_INITED);
+ /* Storage protection override applies to both store and fetch. */
+ set_storage_key_range(mem1, sizeof(mem1), 0x98);
+ set_storage_key_range(mem2, sizeof(mem2), 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ memcpy(&mem2, &mem1, sizeof(mem2));
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys, storage protection override in effect */
+ default_write_read(t.vcpu, t.vcpu, LOGICAL, t.size, 2);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_copy_key_fetch_prot(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, matching key, fetch protection in effect */
+ default_read(t.vcpu, t.vcpu, LOGICAL, t.size, 9);
+ default_read(t.vcpu, t.vm, ABSOLUTE, t.size, 9);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+#define ERR_PROT_MOP(...) \
+({ \
+ int rv; \
+ \
+ rv = ERR_MOP(__VA_ARGS__); \
+ TEST_ASSERT(rv == 4, "Should result in protection exception"); \
+})
+
+static void guest_error_key(void)
+{
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(mem1, PAGE_SIZE, 0x18);
+ set_storage_key_range(mem1 + PAGE_SIZE, sizeof(mem1) - PAGE_SIZE, 0x98);
+ GUEST_SYNC(STAGE_SKEYS_SET);
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void test_errors_key(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm/vcpu, mismatching keys, fetch protection in effect */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem1), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg_key(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+ int i;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ for (i = 1; i <= 16; i *= 2) {
+ __uint128_t old = 0;
+
+ ERR_PROT_MOP(t.vm, ABSOLUTE, CMPXCHG, mem2, i, GADDR_V(mem2),
+ CMPXCHG_OLD(&old), KEY(2));
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_termination(void)
+{
+ struct test_default t = test_default_init(guest_error_key);
+ uint64_t prefix;
+ uint64_t teid;
+ uint64_t teid_mask = BIT(63 - 56) | BIT(63 - 60) | BIT(63 - 61);
+ uint64_t psw[2];
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys after first page */
+ ERR_PROT_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), KEY(1), INJECT);
+ /*
+ * The memop injected a program exception and the test needs to check the
+ * Translation-Exception Identification (TEID). It is necessary to run
+ * the guest in order to be able to read the TEID from guest memory.
+ * Set the guest program new PSW, so the guest state is not clobbered.
+ */
+ prefix = t.run->s.regs.prefix;
+ psw[0] = t.run->psw_mask;
+ psw[1] = t.run->psw_addr;
+ MOP(t.vm, ABSOLUTE, WRITE, psw, sizeof(psw), GADDR(prefix + 464));
+ HOST_SYNC(t.vcpu, STAGE_IDLED);
+ MOP(t.vm, ABSOLUTE, READ, &teid, sizeof(teid), GADDR(prefix + 168));
+ /* Bits 56, 60, 61 form a code, 0 being the only one allowing for termination */
+ TEST_ASSERT_EQ(teid & teid_mask, 0);
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_storage_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot);
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vm, mismatching keys, storage protection override not applicable to vm */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, WRITE, mem1, t.size, GADDR_V(mem1), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, t.size, GADDR_V(mem2), KEY(2));
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+const uint64_t last_page_addr = -PAGE_SIZE;
+
+static void guest_copy_key_fetch_prot_override(void)
+{
+ int i;
+ char *page_0 = 0;
+
+ GUEST_SYNC(STAGE_INITED);
+ set_storage_key_range(0, PAGE_SIZE, 0x18);
+ set_storage_key_range((void *)last_page_addr, PAGE_SIZE, 0x0);
+ asm volatile ("sske %[key],%[addr]\n" :: [addr] "r"(0L), [key] "r"(0x18) : "cc");
+ GUEST_SYNC(STAGE_SKEYS_SET);
+
+ for (;;) {
+ for (i = 0; i < PAGE_SIZE; i++)
+ page_0[i] = mem1[i];
+ GUEST_SYNC(STAGE_COPIED);
+ }
+}
+
+static void test_copy_key_fetch_prot_override(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override applies */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, PAGE_SIZE, GADDR_V(mem1));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+ /*
+ * vcpu, mismatching keys on fetch, fetch protection override applies,
+ * wraparound
+ */
+ prepare_mem12();
+ MOP(t.vcpu, LOGICAL, WRITE, mem1, 2 * PAGE_SIZE, GADDR_V(guest_last_page));
+ HOST_SYNC(t.vcpu, STAGE_COPIED);
+ CHECK_N_DO(MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048,
+ GADDR_V(guest_last_page), KEY(2));
+ ASSERT_MEM_EQ(mem1, mem2, 2048);
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_not_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /* vcpu, mismatching keys on fetch, fetch protection override not enabled */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048, GADDR_V(0), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_key_fetch_prot_override_enabled(void)
+{
+ struct test_default t = test_default_init(guest_copy_key_fetch_prot_override);
+ vm_vaddr_t guest_0_page, guest_last_page;
+
+ guest_0_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, 0);
+ guest_last_page = vm_vaddr_alloc(t.kvm_vm, PAGE_SIZE, last_page_addr);
+ if (guest_0_page != 0 || guest_last_page != last_page_addr) {
+ print_skip("did not allocate guest pages at required positions");
+ goto out;
+ }
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+ t.run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ t.run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(t.vcpu, STAGE_SKEYS_SET);
+
+ /*
+ * vcpu, mismatching keys on fetch,
+ * fetch protection override does not apply because memory range exceeded
+ */
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, 2048 + 1, GADDR_V(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vcpu, LOGICAL, READ, mem2, PAGE_SIZE + 2048 + 1,
+ GADDR_V(guest_last_page), KEY(2));
+ /* vm, fetch protected override does not apply */
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR(0), KEY(2));
+ CHECK_N_DO(ERR_PROT_MOP, t.vm, ABSOLUTE, READ, mem2, 2048, GADDR_V(guest_0_page), KEY(2));
+
+out:
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void guest_idle(void)
+{
+ GUEST_SYNC(STAGE_INITED); /* for consistency's sake */
+ for (;;)
+ GUEST_SYNC(STAGE_IDLED);
+}
+
+static void _test_errors_common(struct test_info info, enum mop_target target, int size)
+{
+ int rv;
+
+ /* Bad size: */
+ rv = ERR_MOP(info, target, WRITE, mem1, -1, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && errno == E2BIG, "ioctl allows insane sizes");
/* Zero size: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = 0;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ rv = ERR_MOP(info, target, WRITE, mem1, 0, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && (errno == EINVAL || errno == ENOMEM),
"ioctl allows 0 as size");
/* Bad flags: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = -1;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), SET_FLAGS(-1));
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows all flags");
- /* Bad operation: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = -1;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
-
/* Bad guest address: */
- ksmo.gaddr = ~0xfffUL;
- ksmo.flags = KVM_S390_MEMOP_F_CHECK_ONLY;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
- TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory access");
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL), CHECK_ONLY);
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address with CHECK_ONLY");
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR((void *)~0xfffUL));
+ TEST_ASSERT(rv > 0, "ioctl does not report bad guest memory address on write");
/* Bad host address: */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = 0;
- ksmo.ar = 0;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ rv = ERR_MOP(info, target, WRITE, 0, size, GADDR_V(mem1));
TEST_ASSERT(rv == -1 && errno == EFAULT,
"ioctl does not report bad host memory address");
+ /* Bad key: */
+ rv = ERR_MOP(info, target, WRITE, mem1, size, GADDR_V(mem1), KEY(17));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows invalid key");
+}
+
+static void test_errors(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ int rv;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ _test_errors_common(t.vcpu, LOGICAL, t.size);
+ _test_errors_common(t.vm, ABSOLUTE, t.size);
+
+ /* Bad operation: */
+ rv = ERR_MOP(t.vcpu, INVALID, WRITE, mem1, t.size, GADDR_V(mem1));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+ /* virtual addresses are not translated when passing INVALID */
+ rv = ERR_MOP(t.vm, INVALID, WRITE, mem1, PAGE_SIZE, GADDR(0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows bad operations");
+
/* Bad access register: */
- run->psw_mask &= ~(3UL << (63 - 17));
- run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
- vcpu_run(vm, VCPU_ID); /* To sync new state to SIE block */
- ksmo.gaddr = (uintptr_t)mem1;
- ksmo.flags = 0;
- ksmo.size = maxsize;
- ksmo.op = KVM_S390_MEMOP_LOGICAL_WRITE;
- ksmo.buf = (uintptr_t)mem1;
- ksmo.ar = 17;
- rv = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_MEM_OP, &ksmo);
+ t.run->psw_mask &= ~(3UL << (63 - 17));
+ t.run->psw_mask |= 1UL << (63 - 17); /* Enable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* To sync new state to SIE block */
+ rv = ERR_MOP(t.vcpu, LOGICAL, WRITE, mem1, t.size, GADDR_V(mem1), AR(17));
TEST_ASSERT(rv == -1 && errno == EINVAL, "ioctl allows ARs > 15");
- run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
- vcpu_run(vm, VCPU_ID); /* Run to sync new state */
+ t.run->psw_mask &= ~(3UL << (63 - 17)); /* Disable AR mode */
+ HOST_SYNC(t.vcpu, STAGE_IDLED); /* Run to sync new state */
- kvm_vm_free(vm);
+ /* Check that the SIDA calls are rejected for non-protected guests */
+ rv = ERR_MOP(t.vcpu, SIDA, READ, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_READ in non-protected mode");
+ rv = ERR_MOP(t.vcpu, SIDA, WRITE, mem1, 8, GADDR(0), SIDA_OFFSET(0x1c0));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl does not reject SIDA_WRITE in non-protected mode");
- return 0;
+ kvm_vm_free(t.kvm_vm);
+}
+
+static void test_errors_cmpxchg(void)
+{
+ struct test_default t = test_default_init(guest_idle);
+ __uint128_t old;
+ int rv, i, power = 1;
+
+ HOST_SYNC(t.vcpu, STAGE_INITED);
+
+ for (i = 0; i < 32; i++) {
+ if (i == power) {
+ power *= 2;
+ continue;
+ }
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad size for cmpxchg");
+ }
+ for (i = 1; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR((void *)~0xfffUL),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv > 0, "ioctl allows bad guest address for cmpxchg");
+ }
+ for (i = 2; i <= 16; i *= 2) {
+ rv = ERR_MOP(t.vm, ABSOLUTE, CMPXCHG, mem1, i, GADDR_V(mem1 + 1),
+ CMPXCHG_OLD(&old));
+ TEST_ASSERT(rv == -1 && errno == EINVAL,
+ "ioctl allows bad alignment for cmpxchg");
+ }
+
+ kvm_vm_free(t.kvm_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ int extension_cap, idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_S390_MEM_OP));
+ extension_cap = kvm_check_cap(KVM_CAP_S390_MEM_OP_EXTENSION);
+
+ struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool requirements_met;
+ } testlist[] = {
+ {
+ .name = "simple copy",
+ .test = test_copy,
+ .requirements_met = true,
+ },
+ {
+ .name = "generic error checks",
+ .test = test_errors,
+ .requirements_met = true,
+ },
+ {
+ .name = "copy with storage keys",
+ .test = test_copy_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "cmpxchg with storage keys",
+ .test = test_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "concurrently cmpxchg with storage keys",
+ .test = test_cmpxchg_key_concurrent,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "copy with key storage protection override",
+ .test = test_copy_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection",
+ .test = test_copy_key_fetch_prot,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with key fetch protection override",
+ .test = test_copy_key_fetch_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "copy with access register mode",
+ .test = test_copy_access_register,
+ .requirements_met = true,
+ },
+ {
+ .name = "error checks with key",
+ .test = test_errors_key,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks for cmpxchg with key",
+ .test = test_errors_cmpxchg_key,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "error checks for cmpxchg",
+ .test = test_errors_cmpxchg,
+ .requirements_met = extension_cap & 0x2,
+ },
+ {
+ .name = "termination",
+ .test = test_termination,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key storage protection override",
+ .test = test_errors_key_storage_prot_override,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks without key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_not_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ {
+ .name = "error checks with key fetch prot override",
+ .test = test_errors_key_fetch_prot_override_enabled,
+ .requirements_met = extension_cap > 0,
+ },
+ };
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (testlist[idx].requirements_met) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - requirements not met (kernel has extension cap %#x)\n",
+ testlist[idx].name, extension_cap);
+ }
+ }
+
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/resets.c b/tools/testing/selftests/kvm/s390x/resets.c
index b143db6d8693..357943f2bea8 100644
--- a/tools/testing/selftests/kvm/s390x/resets.c
+++ b/tools/testing/selftests/kvm/s390x/resets.c
@@ -12,15 +12,14 @@
#include "test_util.h"
#include "kvm_util.h"
+#include "kselftest.h"
-#define VCPU_ID 3
#define LOCAL_IRQS 32
-struct kvm_s390_irq buf[VCPU_ID + LOCAL_IRQS];
+#define ARBITRARY_NON_ZERO_VCPU_ID 3
+
+struct kvm_s390_irq buf[ARBITRARY_NON_ZERO_VCPU_ID + LOCAL_IRQS];
-struct kvm_vm *vm;
-struct kvm_run *run;
-struct kvm_sync_regs *sync_regs;
static uint8_t regs_null[512];
static void guest_code_initial(void)
@@ -58,47 +57,45 @@ static void guest_code_initial(void)
);
}
-static void test_one_reg(uint64_t id, uint64_t value)
+static void test_one_reg(struct kvm_vcpu *vcpu, uint64_t id, uint64_t value)
{
- struct kvm_one_reg reg;
uint64_t eval_reg;
- reg.addr = (uintptr_t)&eval_reg;
- reg.id = id;
- vcpu_get_reg(vm, VCPU_ID, &reg);
+ vcpu_get_reg(vcpu, id, &eval_reg);
TEST_ASSERT(eval_reg == value, "value == 0x%lx", value);
}
-static void assert_noirq(void)
+static void assert_noirq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
int irqs;
irq_state.len = sizeof(buf);
irq_state.buf = (unsigned long)buf;
- irqs = _vcpu_ioctl(vm, VCPU_ID, KVM_S390_GET_IRQ_STATE, &irq_state);
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_GET_IRQ_STATE, &irq_state);
/*
* irqs contains the number of retrieved interrupts. Any interrupt
* (notably, the emergency call interrupt we have injected) should
* be cleared by the resets, so this should be 0.
*/
- TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d\n", errno);
+ TEST_ASSERT(irqs >= 0, "Could not fetch IRQs: errno %d", errno);
TEST_ASSERT(!irqs, "IRQ pending");
}
-static void assert_clear(void)
+static void assert_clear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_regs regs;
struct kvm_fpu fpu;
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(!memcmp(&regs.gprs, regs_null, sizeof(regs.gprs)), "grs == 0");
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(!memcmp(&sregs.acrs, regs_null, sizeof(sregs.acrs)), "acrs == 0");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!memcmp(&fpu.fprs, regs_null, sizeof(fpu.fprs)), "fprs == 0");
/* sync regs */
@@ -112,8 +109,10 @@ static void assert_clear(void)
"vrs0-15 == 0 (sync_regs)");
}
-static void assert_initial_noclear(void)
+static void assert_initial_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->gprs[0] == 0xffff000000000000UL,
"gpr0 == 0xffff000000000000 (sync_regs)");
TEST_ASSERT(sync_regs->gprs[1] == 0x0000555500000000UL,
@@ -127,13 +126,14 @@ static void assert_initial_noclear(void)
TEST_ASSERT(sync_regs->acrs[9] == 1, "ar9 == 1 (sync_regs)");
}
-static void assert_initial(void)
+static void assert_initial(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
struct kvm_sregs sregs;
struct kvm_fpu fpu;
/* KVM_GET_SREGS */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
TEST_ASSERT(sregs.crs[0] == 0xE0UL, "cr0 == 0xE0 (KVM_GET_SREGS)");
TEST_ASSERT(sregs.crs[14] == 0xC2000000UL,
"cr14 == 0xC2000000 (KVM_GET_SREGS)");
@@ -156,36 +156,38 @@ static void assert_initial(void)
TEST_ASSERT(sync_regs->gbea == 1, "gbea == 1 (sync_regs)");
/* kvm_run */
- TEST_ASSERT(run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
- TEST_ASSERT(run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_addr == 0, "psw_addr == 0 (kvm_run)");
+ TEST_ASSERT(vcpu->run->psw_mask == 0, "psw_mask == 0 (kvm_run)");
- vcpu_fpu_get(vm, VCPU_ID, &fpu);
+ vcpu_fpu_get(vcpu, &fpu);
TEST_ASSERT(!fpu.fpc, "fpc == 0");
- test_one_reg(KVM_REG_S390_GBEA, 1);
- test_one_reg(KVM_REG_S390_PP, 0);
- test_one_reg(KVM_REG_S390_TODPR, 0);
- test_one_reg(KVM_REG_S390_CPU_TIMER, 0);
- test_one_reg(KVM_REG_S390_CLOCK_COMP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_GBEA, 1);
+ test_one_reg(vcpu, KVM_REG_S390_PP, 0);
+ test_one_reg(vcpu, KVM_REG_S390_TODPR, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CPU_TIMER, 0);
+ test_one_reg(vcpu, KVM_REG_S390_CLOCK_COMP, 0);
}
-static void assert_normal_noclear(void)
+static void assert_normal_noclear(struct kvm_vcpu *vcpu)
{
+ struct kvm_sync_regs *sync_regs = &vcpu->run->s.regs;
+
TEST_ASSERT(sync_regs->crs[2] == 0x10, "cr2 == 10 (sync_regs)");
TEST_ASSERT(sync_regs->crs[8] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[10] == 1, "cr10 == 1 (sync_regs)");
TEST_ASSERT(sync_regs->crs[11] == -1, "cr11 == -1 (sync_regs)");
}
-static void assert_normal(void)
+static void assert_normal(struct kvm_vcpu *vcpu)
{
- test_one_reg(KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
- TEST_ASSERT(sync_regs->pft == KVM_S390_PFAULT_TOKEN_INVALID,
+ test_one_reg(vcpu, KVM_REG_S390_PFTOKEN, KVM_S390_PFAULT_TOKEN_INVALID);
+ TEST_ASSERT(vcpu->run->s.regs.pft == KVM_S390_PFAULT_TOKEN_INVALID,
"pft == 0xff..... (sync_regs)");
- assert_noirq();
+ assert_noirq(vcpu);
}
-static void inject_irq(int cpu_id)
+static void inject_irq(struct kvm_vcpu *vcpu)
{
struct kvm_s390_irq_state irq_state;
struct kvm_s390_irq *irq = &buf[0];
@@ -195,85 +197,117 @@ static void inject_irq(int cpu_id)
irq_state.len = sizeof(struct kvm_s390_irq);
irq_state.buf = (unsigned long)buf;
irq->type = KVM_S390_INT_EMERGENCY;
- irq->u.emerg.code = cpu_id;
- irqs = _vcpu_ioctl(vm, cpu_id, KVM_S390_SET_IRQ_STATE, &irq_state);
- TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d\n", errno);
+ irq->u.emerg.code = vcpu->id;
+ irqs = __vcpu_ioctl(vcpu, KVM_S390_SET_IRQ_STATE, &irq_state);
+ TEST_ASSERT(irqs >= 0, "Error injecting EMERGENCY IRQ errno %d", errno);
+}
+
+static struct kvm_vm *create_vm(struct kvm_vcpu **vcpu)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create(1);
+
+ *vcpu = vm_vcpu_add(vm, ARBITRARY_NON_ZERO_VCPU_ID, guest_code_initial);
+
+ return vm;
}
static void test_normal(void)
{
- pr_info("Testing normal reset\n");
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing normal reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_NORMAL_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_NORMAL_RESET, NULL);
/* must clears */
- assert_normal();
+ assert_normal(vcpu);
/* must not clears */
- assert_normal_noclear();
- assert_initial_noclear();
+ assert_normal_noclear(vcpu);
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_initial(void)
{
- pr_info("Testing initial reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- vcpu_run(vm, VCPU_ID);
+ ksft_print_msg("Testing initial reset\n");
+ vm = create_vm(&vcpu);
- inject_irq(VCPU_ID);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_INITIAL_RESET, 0);
+ inject_irq(vcpu);
+
+ vcpu_ioctl(vcpu, KVM_S390_INITIAL_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
/* must not clears */
- assert_initial_noclear();
+ assert_initial_noclear(vcpu);
kvm_vm_free(vm);
}
static void test_clear(void)
{
- pr_info("Testing clear reset\n");
- vm = vm_create_default(VCPU_ID, 0, guest_code_initial);
- run = vcpu_state(vm, VCPU_ID);
- sync_regs = &run->s.regs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ ksft_print_msg("Testing clear reset\n");
+ vm = create_vm(&vcpu);
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
- inject_irq(VCPU_ID);
+ inject_irq(vcpu);
- vcpu_ioctl(vm, VCPU_ID, KVM_S390_CLEAR_RESET, 0);
+ vcpu_ioctl(vcpu, KVM_S390_CLEAR_RESET, NULL);
/* must clears */
- assert_normal();
- assert_initial();
- assert_clear();
+ assert_normal(vcpu);
+ assert_initial(vcpu);
+ assert_clear(vcpu);
kvm_vm_free(vm);
}
+struct testdef {
+ const char *name;
+ void (*test)(void);
+ bool needs_cap;
+} testlist[] = {
+ { "initial", test_initial, false },
+ { "normal", test_normal, true },
+ { "clear", test_clear, true },
+};
+
int main(int argc, char *argv[])
{
- setbuf(stdout, NULL); /* Tell stdout not to buffer its content */
-
- test_initial();
- if (kvm_check_cap(KVM_CAP_S390_VCPU_RESETS)) {
- test_normal();
- test_clear();
+ bool has_s390_vcpu_resets = kvm_check_cap(KVM_CAP_S390_VCPU_RESETS);
+ int idx;
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ if (!testlist[idx].needs_cap || has_s390_vcpu_resets) {
+ testlist[idx].test();
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ } else {
+ ksft_test_result_skip("%s - no VCPU_RESETS capability\n",
+ testlist[idx].name);
+ }
}
- return 0;
+
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/sync_regs_test.c b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
index 5731ccf34917..43fb25ddc3ec 100644
--- a/tools/testing/selftests/kvm/s390x/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/s390x/sync_regs_test.c
@@ -20,8 +20,8 @@
#include "test_util.h"
#include "kvm_util.h"
-
-#define VCPU_ID 5
+#include "diag318_test_handler.h"
+#include "kselftest.h"
static void guest_code(void)
{
@@ -39,13 +39,13 @@ static void guest_code(void)
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
#define REG_COMPARE32(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%x, 0x%x\n", \
+ " values did not match: 0x%x, 0x%x", \
left->reg, right->reg)
@@ -70,81 +70,83 @@ static void compare_sregs(struct kvm_sregs *left, struct kvm_sync_regs *right)
#undef REG_COMPARE
-#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS)
+#define TEST_SYNC_FIELDS (KVM_SYNC_GPRS|KVM_SYNC_ACRS|KVM_SYNC_CRS|KVM_SYNC_DIAG318)
#define INVALID_SYNC_FIELD 0x80000000
-int main(int argc, char *argv[])
+void test_read_invalid(struct kvm_vcpu *vcpu)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- int rv, cap;
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if (!cap) {
- print_skip("CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
- }
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- run = vcpu_state(vm, VCPU_ID);
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
+}
+
+void test_set_invalid(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
+}
+
+void test_req_and_verify_all_valid_regs(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Request and verify all valid register sets. */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s390_sieic.icptcode == 4 &&
(run->s390_sieic.ipa >> 8) == 0x83 &&
(run->s390_sieic.ipb >> 16) == 0x501,
- "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x\n",
+ "Unexpected interception code: ic=%u, ipa=0x%x, ipb=0x%x",
run->s390_sieic.icptcode, run->s390_sieic.ipa,
run->s390_sieic.ipb);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_set_and_verify_various_reg_values(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+ int rv;
/* Set and verify various register values */
run->s.regs.gprs[11] = 0xBAD1DEA;
@@ -152,24 +154,36 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_GPRS | KVM_SYNC_ACRS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+
+ if (get_diag318_info() > 0) {
+ run->s.regs.diag318 = get_diag318_info();
+ run->kvm_dirty_regs |= KVM_SYNC_DIAG318;
+ }
+
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] == 0xBAD1DEA + 1,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
TEST_ASSERT(run->s.regs.acrs[0] == 1 << 11,
"acr0 sync regs value incorrect 0x%x.",
run->s.regs.acrs[0]);
+ TEST_ASSERT(run->s.regs.diag318 == get_diag318_info(),
+ "diag318 sync regs value incorrect 0x%llx.",
+ run->s.regs.diag318);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs);
+}
+
+void test_clear_kvm_dirty_regs_bits(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
@@ -177,17 +191,50 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.gprs[11] = 0xDEADBEEF;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(rv == 0, "vcpu_run failed: %d\n", rv);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_S390_SIEIC,
- "Unexpected exit reason: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ run->s.regs.diag318 = 0x4B1D;
+ rv = _vcpu_run(vcpu);
+ TEST_ASSERT(rv == 0, "vcpu_run failed: %d", rv);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_S390_SIEIC);
TEST_ASSERT(run->s.regs.gprs[11] != 0xDEADBEEF,
"r11 sync regs value incorrect 0x%llx.",
run->s.regs.gprs[11]);
+ TEST_ASSERT(run->s.regs.diag318 != 0x4B1D,
+ "diag318 sync regs value incorrect 0x%llx.",
+ run->s.regs.diag318);
+}
+
+struct testdef {
+ const char *name;
+ void (*test)(struct kvm_vcpu *vcpu);
+} testlist[] = {
+ { "read invalid", test_read_invalid },
+ { "set invalid", test_set_invalid },
+ { "request+verify all valid regs", test_req_and_verify_all_valid_regs },
+ { "set+verify various regs", test_set_and_verify_various_reg_values },
+ { "clear kvm_dirty_regs bits", test_clear_kvm_dirty_regs_bits },
+};
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ int idx;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SYNC_REGS));
+
+ ksft_print_header();
+
+ ksft_set_plan(ARRAY_SIZE(testlist));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ for (idx = 0; idx < ARRAY_SIZE(testlist); idx++) {
+ testlist[idx].test(vcpu);
+ ksft_test_result_pass("%s\n", testlist[idx].name);
+ }
kvm_vm_free(vm);
- return 0;
+ ksft_finished(); /* Print results and exit() accordingly */
}
diff --git a/tools/testing/selftests/kvm/s390x/tprot.c b/tools/testing/selftests/kvm/s390x/tprot.c
new file mode 100644
index 000000000000..c73f948c9b63
--- /dev/null
+++ b/tools/testing/selftests/kvm/s390x/tprot.c
@@ -0,0 +1,244 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test TEST PROTECTION emulation.
+ *
+ * Copyright IBM Corp. 2021
+ */
+#include <sys/mman.h>
+#include "test_util.h"
+#include "kvm_util.h"
+#include "kselftest.h"
+
+#define PAGE_SHIFT 12
+#define PAGE_SIZE (1 << PAGE_SHIFT)
+#define CR0_FETCH_PROTECTION_OVERRIDE (1UL << (63 - 38))
+#define CR0_STORAGE_PROTECTION_OVERRIDE (1UL << (63 - 39))
+
+static __aligned(PAGE_SIZE) uint8_t pages[2][PAGE_SIZE];
+static uint8_t *const page_store_prot = pages[0];
+static uint8_t *const page_fetch_prot = pages[1];
+
+/* Nonzero return value indicates that address not mapped */
+static int set_storage_key(void *addr, uint8_t key)
+{
+ int not_mapped = 0;
+
+ asm volatile (
+ "lra %[addr], 0(0,%[addr])\n"
+ " jz 0f\n"
+ " llill %[not_mapped],1\n"
+ " j 1f\n"
+ "0: sske %[key], %[addr]\n"
+ "1:"
+ : [addr] "+&a" (addr), [not_mapped] "+r" (not_mapped)
+ : [key] "r" (key)
+ : "cc"
+ );
+ return -not_mapped;
+}
+
+enum permission {
+ READ_WRITE = 0,
+ READ = 1,
+ RW_PROTECTED = 2,
+ TRANSL_UNAVAIL = 3,
+};
+
+static enum permission test_protection(void *addr, uint8_t key)
+{
+ uint64_t mask;
+
+ asm volatile (
+ "tprot %[addr], 0(%[key])\n"
+ " ipm %[mask]\n"
+ : [mask] "=r" (mask)
+ : [addr] "Q" (*(char *)addr),
+ [key] "a" (key)
+ : "cc"
+ );
+
+ return (enum permission)(mask >> 28);
+}
+
+enum stage {
+ STAGE_INIT_SIMPLE,
+ TEST_SIMPLE,
+ STAGE_INIT_FETCH_PROT_OVERRIDE,
+ TEST_FETCH_PROT_OVERRIDE,
+ TEST_STORAGE_PROT_OVERRIDE,
+ STAGE_END /* must be the last entry (it's the amount of tests) */
+};
+
+struct test {
+ enum stage stage;
+ void *addr;
+ uint8_t key;
+ enum permission expected;
+} tests[] = {
+ /*
+ * We perform each test in the array by executing TEST PROTECTION on
+ * the specified addr with the specified key and checking if the returned
+ * permissions match the expected value.
+ * Both guest and host cooperate to set up the required test conditions.
+ * A central condition is that the page targeted by addr has to be DAT
+ * protected in the host mappings, in order for KVM to emulate the
+ * TEST PROTECTION instruction.
+ * Since the page tables are shared, the host uses mprotect to achieve
+ * this.
+ *
+ * Test resulting in RW_PROTECTED/TRANSL_UNAVAIL will be interpreted
+ * by SIE, not KVM, but there is no harm in testing them also.
+ * See Enhanced Suppression-on-Protection Facilities in the
+ * Interpretive-Execution Mode
+ */
+ /*
+ * guest: set storage key of page_store_prot to 1
+ * storage key of page_fetch_prot to 9 and enable
+ * protection for it
+ * STAGE_INIT_SIMPLE
+ * host: write protect both via mprotect
+ */
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_store_prot, 0x10, READ_WRITE },
+ /* mismatched keys, but no fetch protection -> RO */
+ { TEST_SIMPLE, page_store_prot, 0x20, READ },
+ /* access key 0 matches any storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x00, READ_WRITE },
+ /* access key matches storage key -> RW */
+ { TEST_SIMPLE, page_fetch_prot, 0x90, READ_WRITE },
+ /* mismatched keys, fetch protection -> inaccessible */
+ { TEST_SIMPLE, page_fetch_prot, 0x10, RW_PROTECTED },
+ /* page 0 not mapped yet -> translation not available */
+ { TEST_SIMPLE, (void *)0x00, 0x10, TRANSL_UNAVAIL },
+ /*
+ * host: try to map page 0
+ * guest: set storage key of page 0 to 9 and enable fetch protection
+ * STAGE_INIT_FETCH_PROT_OVERRIDE
+ * host: write protect page 0
+ * enable fetch protection override
+ */
+ /* mismatched keys, fetch protection, but override applies -> RO */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)0x00, 0x10, READ },
+ /* mismatched keys, fetch protection, override applies to 0-2048 only -> inaccessible */
+ { TEST_FETCH_PROT_OVERRIDE, (void *)2049, 0x10, RW_PROTECTED },
+ /*
+ * host: enable storage protection override
+ */
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, page_fetch_prot, 0x10, READ_WRITE },
+ /* mismatched keys, no fetch protection, override doesn't apply -> RO */
+ { TEST_STORAGE_PROT_OVERRIDE, page_store_prot, 0x20, READ },
+ /* mismatched keys, but override applies (storage key 9) -> RW */
+ { TEST_STORAGE_PROT_OVERRIDE, (void *)2049, 0x10, READ_WRITE },
+ /* end marker */
+ { STAGE_END, 0, 0, 0 },
+};
+
+static enum stage perform_next_stage(int *i, bool mapped_0)
+{
+ enum stage stage = tests[*i].stage;
+ enum permission result;
+ bool skip;
+
+ for (; tests[*i].stage == stage; (*i)++) {
+ /*
+ * Some fetch protection override tests require that page 0
+ * be mapped, however, when the hosts tries to map that page via
+ * vm_vaddr_alloc, it may happen that some other page gets mapped
+ * instead.
+ * In order to skip these tests we detect this inside the guest
+ */
+ skip = tests[*i].addr < (void *)4096 &&
+ tests[*i].expected != TRANSL_UNAVAIL &&
+ !mapped_0;
+ if (!skip) {
+ result = test_protection(tests[*i].addr, tests[*i].key);
+ __GUEST_ASSERT(result == tests[*i].expected,
+ "Wanted %u, got %u, for i = %u",
+ tests[*i].expected, result, *i);
+ }
+ }
+ return stage;
+}
+
+static void guest_code(void)
+{
+ bool mapped_0;
+ int i = 0;
+
+ GUEST_ASSERT_EQ(set_storage_key(page_store_prot, 0x10), 0);
+ GUEST_ASSERT_EQ(set_storage_key(page_fetch_prot, 0x98), 0);
+ GUEST_SYNC(STAGE_INIT_SIMPLE);
+ GUEST_SYNC(perform_next_stage(&i, false));
+
+ /* Fetch-protection override */
+ mapped_0 = !set_storage_key((void *)0, 0x98);
+ GUEST_SYNC(STAGE_INIT_FETCH_PROT_OVERRIDE);
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+
+ /* Storage-protection override */
+ GUEST_SYNC(perform_next_stage(&i, mapped_0));
+}
+
+#define HOST_SYNC_NO_TAP(vcpup, stage) \
+({ \
+ struct kvm_vcpu *__vcpu = (vcpup); \
+ struct ucall uc; \
+ int __stage = (stage); \
+ \
+ vcpu_run(__vcpu); \
+ get_ucall(__vcpu, &uc); \
+ if (uc.cmd == UCALL_ABORT) \
+ REPORT_GUEST_ASSERT(uc); \
+ TEST_ASSERT_EQ(uc.cmd, UCALL_SYNC); \
+ TEST_ASSERT_EQ(uc.args[1], __stage); \
+})
+
+#define HOST_SYNC(vcpu, stage) \
+({ \
+ HOST_SYNC_NO_TAP(vcpu, stage); \
+ ksft_test_result_pass("" #stage "\n"); \
+})
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_run *run;
+ vm_vaddr_t guest_0_page;
+
+ ksft_print_header();
+ ksft_set_plan(STAGE_END);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ HOST_SYNC(vcpu, STAGE_INIT_SIMPLE);
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)pages), PAGE_SIZE * 2, PROT_READ);
+ HOST_SYNC(vcpu, TEST_SIMPLE);
+
+ guest_0_page = vm_vaddr_alloc(vm, PAGE_SIZE, 0);
+ if (guest_0_page != 0) {
+ /* Use NO_TAP so we don't get a PASS print */
+ HOST_SYNC_NO_TAP(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ ksft_test_result_skip("STAGE_INIT_FETCH_PROT_OVERRIDE - "
+ "Did not allocate page at 0\n");
+ } else {
+ HOST_SYNC(vcpu, STAGE_INIT_FETCH_PROT_OVERRIDE);
+ }
+ if (guest_0_page == 0)
+ mprotect(addr_gva2hva(vm, (vm_vaddr_t)0), PAGE_SIZE, PROT_READ);
+ run->s.regs.crs[0] |= CR0_FETCH_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_FETCH_PROT_OVERRIDE);
+
+ run->s.regs.crs[0] |= CR0_STORAGE_PROTECTION_OVERRIDE;
+ run->kvm_dirty_regs = KVM_SYNC_CRS;
+ HOST_SYNC(vcpu, TEST_STORAGE_PROT_OVERRIDE);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/set_memory_region_test.c b/tools/testing/selftests/kvm/set_memory_region_test.c
index b3ece55a2da6..06b43ed23580 100644
--- a/tools/testing/selftests/kvm/set_memory_region_test.c
+++ b/tools/testing/selftests/kvm/set_memory_region_test.c
@@ -17,8 +17,6 @@
#include <kvm_util.h>
#include <processor.h>
-#define VCPU_ID 0
-
/*
* s390x needs at least 1MB alignment, and the x86_64 MOVE/DELETE tests need a
* 2MB sized and aligned region so that the initial region corresponds to
@@ -54,8 +52,8 @@ static inline uint64_t guest_spin_on_val(uint64_t spin_val)
static void *vcpu_worker(void *data)
{
- struct kvm_vm *vm = data;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu = data;
+ struct kvm_run *run = vcpu->run;
struct ucall uc;
uint64_t cmd;
@@ -64,13 +62,11 @@ static void *vcpu_worker(void *data)
* which will occur if the guest attempts to access a memslot after it
* has been deleted or while it is being moved .
*/
- run = vcpu_state(vm, VCPU_ID);
-
while (1) {
- vcpu_run(vm, VCPU_ID);
+ vcpu_run(vcpu);
if (run->exit_reason == KVM_EXIT_IO) {
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ cmd = get_ucall(vcpu, &uc);
if (cmd != UCALL_SYNC)
break;
@@ -92,8 +88,7 @@ static void *vcpu_worker(void *data)
}
if (run->exit_reason == KVM_EXIT_IO && cmd == UCALL_ABORT)
- TEST_FAIL("%s at %s:%ld, val = %lu", (const char *)uc.args[0],
- __FILE__, uc.args[1], uc.args[2]);
+ REPORT_GUEST_ASSERT(uc);
return NULL;
}
@@ -103,25 +98,24 @@ static void wait_for_vcpu(void)
struct timespec ts;
TEST_ASSERT(!clock_gettime(CLOCK_REALTIME, &ts),
- "clock_gettime() failed: %d\n", errno);
+ "clock_gettime() failed: %d", errno);
ts.tv_sec += 2;
TEST_ASSERT(!sem_timedwait(&vcpu_ready, &ts),
- "sem_timedwait() failed: %d\n", errno);
+ "sem_timedwait() failed: %d", errno);
/* Wait for the vCPU thread to reenter the guest. */
usleep(100000);
}
-static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
+static struct kvm_vm *spawn_vm(struct kvm_vcpu **vcpu, pthread_t *vcpu_thread,
+ void *guest_code)
{
struct kvm_vm *vm;
uint64_t *hva;
uint64_t gpa;
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_THP,
MEM_REGION_GPA, MEM_REGION_SLOT,
@@ -134,13 +128,13 @@ static struct kvm_vm *spawn_vm(pthread_t *vcpu_thread, void *guest_code)
gpa = vm_phy_pages_alloc(vm, 2, MEM_REGION_GPA, MEM_REGION_SLOT);
TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc\n");
- virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2, 0);
+ virt_map(vm, MEM_REGION_GPA, MEM_REGION_GPA, 2);
/* Ditto for the host mapping so that both pages can be zeroed. */
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
memset(hva, 0, 2 * 4096);
- pthread_create(vcpu_thread, NULL, vcpu_worker, vm);
+ pthread_create(vcpu_thread, NULL, vcpu_worker, *vcpu);
/* Ensure the guest thread is spun up. */
wait_for_vcpu();
@@ -156,16 +150,28 @@ static void guest_code_move_memory_region(void)
GUEST_SYNC(0);
/*
- * Spin until the memory region is moved to a misaligned address. This
- * may or may not trigger MMIO, as the window where the memslot is
- * invalid is quite small.
+ * Spin until the memory region starts getting moved to a
+ * misaligned address.
+ * Every region move may or may not trigger MMIO, as the
+ * window where the memslot is invalid is usually quite small.
*/
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == 1 || val == MMIO_VAL, val);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
+
+ /* Spin until the misaligning memory region move completes. */
+ val = guest_spin_on_val(MMIO_VAL);
+ __GUEST_ASSERT(val == 1 || val == 0,
+ "Expected '0' or '1' (no MMIO), got '%lx'", val);
+
+ /* Spin until the memory region starts to get re-aligned. */
+ val = guest_spin_on_val(0);
+ __GUEST_ASSERT(val == 1 || val == MMIO_VAL,
+ "Expected '1' or MMIO ('%lx'), got '%lx'", MMIO_VAL, val);
- /* Spin until the memory region is realigned. */
+ /* Spin until the re-aligning memory region move completes. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 1, val);
+ GUEST_ASSERT_EQ(val, 1);
GUEST_DONE();
}
@@ -173,10 +179,11 @@ static void guest_code_move_memory_region(void)
static void test_move_memory_region(void)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
uint64_t *hva;
- vm = spawn_vm(&vcpu_thread, guest_code_move_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_move_memory_region);
hva = addr_gpa2hva(vm, MEM_REGION_GPA);
@@ -220,15 +227,15 @@ static void guest_code_delete_memory_region(void)
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
/* Spin until the memory region is recreated. */
val = guest_spin_on_val(MMIO_VAL);
- GUEST_ASSERT_1(val == 0, val);
+ GUEST_ASSERT_EQ(val, 0);
/* Spin until the memory region is deleted. */
val = guest_spin_on_val(0);
- GUEST_ASSERT_1(val == MMIO_VAL, val);
+ GUEST_ASSERT_EQ(val, MMIO_VAL);
asm("1:\n\t"
".pushsection .rodata\n\t"
@@ -245,17 +252,18 @@ static void guest_code_delete_memory_region(void)
"final_rip_end: .quad 1b\n\t"
".popsection");
- GUEST_ASSERT_1(0, 0);
+ GUEST_ASSERT(0);
}
static void test_delete_memory_region(void)
{
pthread_t vcpu_thread;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_run *run;
struct kvm_vm *vm;
- vm = spawn_vm(&vcpu_thread, guest_code_delete_memory_region);
+ vm = spawn_vm(&vcpu, &vcpu_thread, guest_code_delete_memory_region);
/* Delete the memory region, the guest should not die. */
vm_mem_region_delete(vm, MEM_REGION_SLOT);
@@ -279,13 +287,13 @@ static void test_delete_memory_region(void)
pthread_join(vcpu_thread, NULL);
- run = vcpu_state(vm, VCPU_ID);
+ run = vcpu->run;
TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN ||
run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
"Unexpected exit reason = %d", run->exit_reason);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
/*
* On AMD, after KVM_EXIT_SHUTDOWN the VMCB has been reinitialized already,
@@ -294,7 +302,7 @@ static void test_delete_memory_region(void)
if (run->exit_reason == KVM_EXIT_INTERNAL_ERROR)
TEST_ASSERT(regs.rip >= final_rip_start &&
regs.rip < final_rip_end,
- "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx\n",
+ "Bad rip, expected 0x%lx - 0x%lx, got 0x%llx",
final_rip_start, final_rip_end, regs.rip);
kvm_vm_free(vm);
@@ -302,26 +310,81 @@ static void test_delete_memory_region(void)
static void test_zero_memory_regions(void)
{
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
pr_info("Testing KVM_RUN with zero added memory regions\n");
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
-
- TEST_ASSERT(!ioctl(vm_get_fd(vm), KVM_SET_NR_MMU_PAGES, 64),
- "KVM_SET_NR_MMU_PAGES failed, errno = %d\n", errno);
- vcpu_run(vm, VCPU_ID);
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
- run = vcpu_state(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_INTERNAL_ERROR,
- "Unexpected exit_reason = %u\n", run->exit_reason);
+ vm_ioctl(vm, KVM_SET_NR_MMU_PAGES, (void *)64ul);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
kvm_vm_free(vm);
}
#endif /* __x86_64__ */
+static void test_invalid_memory_region_flags(void)
+{
+ uint32_t supported_flags = KVM_MEM_LOG_DIRTY_PAGES;
+ const uint32_t v2_only_flags = KVM_MEM_GUEST_MEMFD;
+ struct kvm_vm *vm;
+ int r, i;
+
+#if defined __aarch64__ || defined __x86_64__
+ supported_flags |= KVM_MEM_READONLY;
+#endif
+
+#ifdef __x86_64__
+ if (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))
+ vm = vm_create_barebones_protected_vm();
+ else
+#endif
+ vm = vm_create_barebones();
+
+ if (kvm_check_cap(KVM_CAP_MEMORY_ATTRIBUTES) & KVM_MEMORY_ATTRIBUTE_PRIVATE)
+ supported_flags |= KVM_MEM_GUEST_MEMFD;
+
+ for (i = 0; i < 32; i++) {
+ if ((supported_flags & BIT(i)) && !(v2_only_flags & BIT(i)))
+ continue;
+
+ r = __vm_set_user_memory_region(vm, 0, BIT(i),
+ 0, MEM_REGION_SIZE, NULL);
+
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION should have failed on v2 only flag 0x%lx", BIT(i));
+
+ if (supported_flags & BIT(i))
+ continue;
+
+ r = __vm_set_user_memory_region2(vm, 0, BIT(i),
+ 0, MEM_REGION_SIZE, NULL, 0, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed on unsupported flag 0x%lx", BIT(i));
+ }
+
+ if (supported_flags & KVM_MEM_GUEST_MEMFD) {
+ int guest_memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+
+ r = __vm_set_user_memory_region2(vm, 0,
+ KVM_MEM_LOG_DIRTY_PAGES | KVM_MEM_GUEST_MEMFD,
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed, dirty logging private memory is unsupported");
+
+ r = __vm_set_user_memory_region2(vm, 0,
+ KVM_MEM_READONLY | KVM_MEM_GUEST_MEMFD,
+ 0, MEM_REGION_SIZE, NULL, guest_memfd, 0);
+ TEST_ASSERT(r && errno == EINVAL,
+ "KVM_SET_USER_MEMORY_REGION2 should have failed, read-only GUEST_MEMFD memslots are unsupported");
+
+ close(guest_memfd);
+ }
+}
+
/*
* Test it can be added memory slots up to KVM_CAP_NR_MEMSLOTS, then any
* tentative to add further slots should fail.
@@ -332,54 +395,154 @@ static void test_add_max_memory_regions(void)
struct kvm_vm *vm;
uint32_t max_mem_slots;
uint32_t slot;
- uint64_t guest_addr = 0x0;
- uint64_t mem_reg_npages;
- void *mem;
+ void *mem, *mem_aligned, *mem_extra;
+ size_t alignment;
+
+#ifdef __s390x__
+ /* On s390x, the host address must be aligned to 1M (due to PGSTEs) */
+ alignment = 0x100000;
+#else
+ alignment = 1;
+#endif
max_mem_slots = kvm_check_cap(KVM_CAP_NR_MEMSLOTS);
TEST_ASSERT(max_mem_slots > 0,
"KVM_CAP_NR_MEMSLOTS should be greater than 0");
pr_info("Allowed number of memory slots: %i\n", max_mem_slots);
- vm = vm_create(VM_MODE_DEFAULT, 0, O_RDWR);
-
- mem_reg_npages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, MEM_REGION_SIZE);
+ vm = vm_create_barebones();
/* Check it can be added memory slots up to the maximum allowed */
pr_info("Adding slots 0..%i, each memory region with %dK size\n",
(max_mem_slots - 1), MEM_REGION_SIZE >> 10);
- for (slot = 0; slot < max_mem_slots; slot++) {
- vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
- guest_addr, slot, mem_reg_npages,
- 0);
- guest_addr += MEM_REGION_SIZE;
- }
- /* Check it cannot be added memory slots beyond the limit */
- mem = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ mem = mmap(NULL, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE, -1, 0);
TEST_ASSERT(mem != MAP_FAILED, "Failed to mmap() host");
+ mem_aligned = (void *)(((size_t) mem + alignment - 1) & ~(alignment - 1));
- ret = ioctl(vm_get_fd(vm), KVM_SET_USER_MEMORY_REGION,
- &(struct kvm_userspace_memory_region) {slot, 0, guest_addr,
- MEM_REGION_SIZE, (uint64_t) mem});
+ for (slot = 0; slot < max_mem_slots; slot++)
+ vm_set_user_memory_region(vm, slot, 0,
+ ((uint64_t)slot * MEM_REGION_SIZE),
+ MEM_REGION_SIZE,
+ mem_aligned + (uint64_t)slot * MEM_REGION_SIZE);
+
+ /* Check it cannot be added memory slots beyond the limit */
+ mem_extra = mmap(NULL, MEM_REGION_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ TEST_ASSERT(mem_extra != MAP_FAILED, "Failed to mmap() host");
+
+ ret = __vm_set_user_memory_region(vm, max_mem_slots, 0,
+ (uint64_t)max_mem_slots * MEM_REGION_SIZE,
+ MEM_REGION_SIZE, mem_extra);
TEST_ASSERT(ret == -1 && errno == EINVAL,
"Adding one more memory slot should fail with EINVAL");
- munmap(mem, MEM_REGION_SIZE);
+ munmap(mem, (size_t)max_mem_slots * MEM_REGION_SIZE + alignment);
+ munmap(mem_extra, MEM_REGION_SIZE);
+ kvm_vm_free(vm);
+}
+
+
+#ifdef __x86_64__
+static void test_invalid_guest_memfd(struct kvm_vm *vm, int memfd,
+ size_t offset, const char *msg)
+{
+ int r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE,
+ 0, memfd, offset);
+ TEST_ASSERT(r == -1 && errno == EINVAL, "%s", msg);
+}
+
+static void test_add_private_memory_region(void)
+{
+ struct kvm_vm *vm, *vm2;
+ int memfd, i;
+
+ pr_info("Testing ADD of KVM_MEM_GUEST_MEMFD memory regions\n");
+
+ vm = vm_create_barebones_protected_vm();
+
+ test_invalid_guest_memfd(vm, vm->kvm_fd, 0, "KVM fd should fail");
+ test_invalid_guest_memfd(vm, vm->fd, 0, "VM's fd should fail");
+
+ memfd = kvm_memfd_alloc(MEM_REGION_SIZE, false);
+ test_invalid_guest_memfd(vm, memfd, 0, "Regular memfd() should fail");
+ close(memfd);
+
+ vm2 = vm_create_barebones_protected_vm();
+ memfd = vm_create_guest_memfd(vm2, MEM_REGION_SIZE, 0);
+ test_invalid_guest_memfd(vm, memfd, 0, "Other VM's guest_memfd() should fail");
+
+ vm_set_user_memory_region2(vm2, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
+ close(memfd);
+ kvm_vm_free(vm2);
+
+ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE, 0);
+ for (i = 1; i < PAGE_SIZE; i++)
+ test_invalid_guest_memfd(vm, memfd, i, "Unaligned offset should fail");
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE, 0, memfd, 0);
+ close(memfd);
+
kvm_vm_free(vm);
}
+static void test_add_overlapping_private_memory_regions(void)
+{
+ struct kvm_vm *vm;
+ int memfd;
+ int r;
+
+ pr_info("Testing ADD of overlapping KVM_MEM_GUEST_MEMFD memory regions\n");
+
+ vm = vm_create_barebones_protected_vm();
+
+ memfd = vm_create_guest_memfd(vm, MEM_REGION_SIZE * 4, 0);
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, MEM_REGION_SIZE * 2, 0, memfd, 0);
+
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT + 1, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2, MEM_REGION_SIZE * 2,
+ 0, memfd, MEM_REGION_SIZE * 2);
+
+ /*
+ * Delete the first memslot, and then attempt to recreate it except
+ * with a "bad" offset that results in overlap in the guest_memfd().
+ */
+ vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA, 0, NULL, -1, 0);
+
+ /* Overlap the front half of the other slot. */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2 - MEM_REGION_SIZE,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, 0);
+ TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ /* And now the back half of the other slot. */
+ r = __vm_set_user_memory_region2(vm, MEM_REGION_SLOT, KVM_MEM_GUEST_MEMFD,
+ MEM_REGION_GPA * 2 + MEM_REGION_SIZE,
+ MEM_REGION_SIZE * 2,
+ 0, memfd, 0);
+ TEST_ASSERT(r == -1 && errno == EEXIST, "%s",
+ "Overlapping guest_memfd() bindings should fail with EEXIST");
+
+ close(memfd);
+ kvm_vm_free(vm);
+}
+#endif
+
int main(int argc, char *argv[])
{
#ifdef __x86_64__
int i, loops;
-#endif
-
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-#ifdef __x86_64__
/*
* FIXME: the zero-memslot test fails on aarch64 and s390x because
* KVM_RUN fails with ENOEXEC or EFAULT.
@@ -387,11 +550,21 @@ int main(int argc, char *argv[])
test_zero_memory_regions();
#endif
+ test_invalid_memory_region_flags();
+
test_add_max_memory_regions();
#ifdef __x86_64__
+ if (kvm_has_cap(KVM_CAP_GUEST_MEMFD) &&
+ (kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM))) {
+ test_add_private_memory_region();
+ test_add_overlapping_private_memory_regions();
+ } else {
+ pr_info("Skipping tests for KVM_MEM_GUEST_MEMFD memory regions\n");
+ }
+
if (argc > 1)
- loops = atoi(argv[1]);
+ loops = atoi_positive("Number of iterations", argv[1]);
else
loops = 10;
diff --git a/tools/testing/selftests/kvm/settings b/tools/testing/selftests/kvm/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/kvm/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/kvm/steal_time.c b/tools/testing/selftests/kvm/steal_time.c
index fcc840088c91..bae0c5026f82 100644
--- a/tools/testing/selftests/kvm/steal_time.c
+++ b/tools/testing/selftests/kvm/steal_time.c
@@ -10,9 +10,10 @@
#include <sched.h>
#include <pthread.h>
#include <linux/kernel.h>
-#include <sys/syscall.h>
#include <asm/kvm.h>
+#ifndef __riscv
#include <asm/kvm_para.h>
+#endif
#include "test_util.h"
#include "kvm_util.h"
@@ -20,7 +21,6 @@
#define NR_VCPUS 4
#define ST_GPA_BASE (1 << 30)
-#define MIN_RUN_DELAY_NS 200000UL
static void *st_gva[NR_VCPUS];
static uint64_t guest_stolen_time[NR_VCPUS];
@@ -33,8 +33,8 @@ static uint64_t guest_stolen_time[NR_VCPUS];
static void check_status(struct kvm_steal_time *st)
{
GUEST_ASSERT(!(READ_ONCE(st->version) & 1));
- GUEST_ASSERT(READ_ONCE(st->flags) == 0);
- GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->flags), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->preempted), 0);
}
static void guest_code(int cpu)
@@ -42,7 +42,7 @@ static void guest_code(int cpu)
struct kvm_steal_time *st = st_gva[cpu];
uint32_t version;
- GUEST_ASSERT(rdmsr(MSR_KVM_STEAL_TIME) == ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
+ GUEST_ASSERT_EQ(rdmsr(MSR_KVM_STEAL_TIME), ((uint64_t)st_gva[cpu] | KVM_MSR_ENABLED));
memset(st, 0, sizeof(*st));
GUEST_SYNC(0);
@@ -60,38 +60,32 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
- int i;
-
- if (!(kvm_get_supported_cpuid_entry(KVM_CPUID_FEATURES)->eax &
- KVM_FEATURE_STEAL_TIME)) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
-
- for (i = 0; i < NR_VCPUS; ++i) {
- int ret;
+ return kvm_cpu_has(X86_FEATURE_KVM_STEAL_TIME);
+}
- vcpu_set_cpuid(vm, i, kvm_get_supported_cpuid());
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ int ret;
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vcpu->vm, st_gva[i]);
- ret = _vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
- TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
+ ret = _vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME,
+ (ulong)st_gva[i] | KVM_STEAL_RESERVED_MASK);
+ TEST_ASSERT(ret == 0, "Bad GPA didn't fail");
- vcpu_set_msr(vm, i, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
- }
+ vcpu_set_msr(vcpu, MSR_KVM_STEAL_TIME, (ulong)st_gva[i] | KVM_MSR_ENABLED);
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ struct kvm_steal_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
int i;
- pr_info("VCPU%d:\n", vcpuid);
+ pr_info("VCPU%d:\n", vcpu_idx);
pr_info(" steal: %lld\n", st->steal);
pr_info(" version: %d\n", st->version);
pr_info(" flags: %d\n", st->flags);
@@ -120,25 +114,18 @@ struct st_time {
uint64_t st_time;
};
-static int64_t smccc(uint32_t func, uint32_t arg)
+static int64_t smccc(uint32_t func, uint64_t arg)
{
- unsigned long ret;
-
- asm volatile(
- "mov x0, %1\n"
- "mov x1, %2\n"
- "hvc #0\n"
- "mov %0, x0\n"
- : "=r" (ret) : "r" (func), "r" (arg) :
- "x0", "x1", "x2", "x3");
+ struct arm_smccc_res res;
- return ret;
+ smccc_hvc(func, arg, 0, 0, 0, 0, 0, 0, &res);
+ return res.a0;
}
static void check_status(struct st_time *st)
{
- GUEST_ASSERT(READ_ONCE(st->rev) == 0);
- GUEST_ASSERT(READ_ONCE(st->attr) == 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->rev), 0);
+ GUEST_ASSERT_EQ(READ_ONCE(st->attr), 0);
}
static void guest_code(int cpu)
@@ -147,15 +134,15 @@ static void guest_code(int cpu)
int64_t status;
status = smccc(SMCCC_ARCH_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_FEATURES);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_FEATURES, PV_TIME_ST);
- GUEST_ASSERT(status == 0);
+ GUEST_ASSERT_EQ(status, 0);
status = smccc(PV_TIME_ST, 0);
- GUEST_ASSERT(status != -1);
- GUEST_ASSERT(status == (ulong)st_gva[cpu]);
+ GUEST_ASSERT_NE(status, -1);
+ GUEST_ASSERT_EQ(status, (ulong)st_gva[cpu]);
st = (struct st_time *)status;
GUEST_SYNC(0);
@@ -169,70 +156,154 @@ static void guest_code(int cpu)
GUEST_DONE();
}
-static void steal_time_init(struct kvm_vm *vm)
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
{
struct kvm_device_attr dev = {
.group = KVM_ARM_VCPU_PVTIME_CTRL,
.attr = KVM_ARM_VCPU_PVTIME_IPA,
};
- int i, ret;
-
- ret = _vcpu_ioctl(vm, 0, KVM_HAS_DEVICE_ATTR, &dev);
- if (ret != 0 && errno == ENXIO) {
- print_skip("steal-time not supported");
- exit(KSFT_SKIP);
- }
- for (i = 0; i < NR_VCPUS; ++i) {
- uint64_t st_ipa;
+ return !__vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
+}
- vcpu_ioctl(vm, i, KVM_HAS_DEVICE_ATTR, &dev);
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t st_ipa;
+ int ret;
- dev.addr = (uint64_t)&st_ipa;
+ struct kvm_device_attr dev = {
+ .group = KVM_ARM_VCPU_PVTIME_CTRL,
+ .attr = KVM_ARM_VCPU_PVTIME_IPA,
+ .addr = (uint64_t)&st_ipa,
+ };
- /* ST_GPA_BASE is identity mapped */
- st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
- sync_global_to_guest(vm, st_gva[i]);
+ vcpu_ioctl(vcpu, KVM_HAS_DEVICE_ATTR, &dev);
- st_ipa = (ulong)st_gva[i] | 1;
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ sync_global_to_guest(vm, st_gva[i]);
- st_ipa = (ulong)st_gva[i];
- vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
+ st_ipa = (ulong)st_gva[i] | 1;
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL, "Bad IPA didn't report EINVAL");
- ret = _vcpu_ioctl(vm, i, KVM_SET_DEVICE_ATTR, &dev);
- TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
+ st_ipa = (ulong)st_gva[i];
+ vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
- }
+ ret = __vcpu_ioctl(vcpu, KVM_SET_DEVICE_ATTR, &dev);
+ TEST_ASSERT(ret == -1 && errno == EEXIST, "Set IPA twice without EEXIST");
}
-static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpuid)
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
{
- struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpuid]);
+ struct st_time *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
- pr_info("VCPU%d:\n", vcpuid);
+ pr_info("VCPU%d:\n", vcpu_idx);
pr_info(" rev: %d\n", st->rev);
pr_info(" attr: %d\n", st->attr);
pr_info(" st_time: %ld\n", st->st_time);
}
+#elif defined(__riscv)
+
+/* SBI STA shmem must have 64-byte alignment */
+#define STEAL_TIME_SIZE ((sizeof(struct sta_struct) + 63) & ~63)
+
+static vm_paddr_t st_gpa[NR_VCPUS];
+
+struct sta_struct {
+ uint32_t sequence;
+ uint32_t flags;
+ uint64_t steal;
+ uint8_t preempted;
+ uint8_t pad[47];
+} __packed;
+
+static void sta_set_shmem(vm_paddr_t gpa, unsigned long flags)
+{
+ unsigned long lo = (unsigned long)gpa;
+#if __riscv_xlen == 32
+ unsigned long hi = (unsigned long)(gpa >> 32);
+#else
+ unsigned long hi = gpa == -1 ? -1 : 0;
#endif
+ struct sbiret ret = sbi_ecall(SBI_EXT_STA, 0, lo, hi, flags, 0, 0, 0);
-static long get_run_delay(void)
+ GUEST_ASSERT(ret.value == 0 && ret.error == 0);
+}
+
+static void check_status(struct sta_struct *st)
+{
+ GUEST_ASSERT(!(READ_ONCE(st->sequence) & 1));
+ GUEST_ASSERT(READ_ONCE(st->flags) == 0);
+ GUEST_ASSERT(READ_ONCE(st->preempted) == 0);
+}
+
+static void guest_code(int cpu)
{
- char path[64];
- long val[2];
- FILE *fp;
+ struct sta_struct *st = st_gva[cpu];
+ uint32_t sequence;
+ long out_val = 0;
+ bool probe;
- sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
- fp = fopen(path, "r");
- fscanf(fp, "%ld %ld ", &val[0], &val[1]);
- fclose(fp);
+ probe = guest_sbi_probe_extension(SBI_EXT_STA, &out_val);
+ GUEST_ASSERT(probe && out_val == 1);
- return val[1];
+ sta_set_shmem(st_gpa[cpu], 0);
+ GUEST_SYNC(0);
+
+ check_status(st);
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ sequence = READ_ONCE(st->sequence);
+ check_status(st);
+ GUEST_SYNC(1);
+
+ check_status(st);
+ GUEST_ASSERT(sequence < READ_ONCE(st->sequence));
+ WRITE_ONCE(guest_stolen_time[cpu], st->steal);
+ check_status(st);
+ GUEST_DONE();
+}
+
+static bool is_steal_time_supported(struct kvm_vcpu *vcpu)
+{
+ uint64_t id = RISCV_SBI_EXT_REG(KVM_RISCV_SBI_EXT_STA);
+ unsigned long enabled;
+
+ vcpu_get_reg(vcpu, id, &enabled);
+ TEST_ASSERT(enabled == 0 || enabled == 1, "Expected boolean result");
+
+ return enabled;
+}
+
+static void steal_time_init(struct kvm_vcpu *vcpu, uint32_t i)
+{
+ /* ST_GPA_BASE is identity mapped */
+ st_gva[i] = (void *)(ST_GPA_BASE + i * STEAL_TIME_SIZE);
+ st_gpa[i] = addr_gva2gpa(vcpu->vm, (vm_vaddr_t)st_gva[i]);
+ sync_global_to_guest(vcpu->vm, st_gva[i]);
+ sync_global_to_guest(vcpu->vm, st_gpa[i]);
}
+static void steal_time_dump(struct kvm_vm *vm, uint32_t vcpu_idx)
+{
+ struct sta_struct *st = addr_gva2hva(vm, (ulong)st_gva[vcpu_idx]);
+ int i;
+
+ pr_info("VCPU%d:\n", vcpu_idx);
+ pr_info(" sequence: %d\n", st->sequence);
+ pr_info(" flags: %d\n", st->flags);
+ pr_info(" steal: %"PRIu64"\n", st->steal);
+ pr_info(" preempted: %d\n", st->preempted);
+ pr_info(" pad: ");
+ for (i = 0; i < 47; ++i)
+ pr_info("%d", st->pad[i]);
+ pr_info("\n");
+}
+
+#endif
+
static void *do_steal_time(void *arg)
{
struct timespec ts, stop;
@@ -249,29 +320,27 @@ static void *do_steal_time(void *arg)
return NULL;
}
-static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+static void run_vcpu(struct kvm_vcpu *vcpu)
{
struct ucall uc;
- vcpu_args_set(vm, vcpuid, 1, vcpuid);
+ vcpu_run(vcpu);
- vcpu_ioctl(vm, vcpuid, KVM_RUN, NULL);
-
- switch (get_ucall(vm, vcpuid, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
case UCALL_DONE:
break;
case UCALL_ABORT:
- TEST_ASSERT(false, "%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
default:
TEST_ASSERT(false, "Unexpected exit: %s",
- exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+ exit_reason_str(vcpu->run->exit_reason));
}
}
int main(int ac, char **av)
{
+ struct kvm_vcpu *vcpus[NR_VCPUS];
struct kvm_vm *vm;
pthread_attr_t attr;
pthread_t thread;
@@ -291,26 +360,25 @@ int main(int ac, char **av)
pthread_attr_setaffinity_np(&attr, sizeof(cpu_set_t), &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset);
- /* Create a one VCPU guest and an identity mapped memslot for the steal time structure */
- vm = vm_create_default(0, 0, guest_code);
+ /* Create a VM and an identity mapped memslot for the steal time structure */
+ vm = vm_create_with_vcpus(NR_VCPUS, guest_code, vcpus);
gpages = vm_calc_num_guest_pages(VM_MODE_DEFAULT, STEAL_TIME_SIZE * NR_VCPUS);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, ST_GPA_BASE, 1, gpages, 0);
- virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages, 0);
- ucall_init(vm, NULL);
-
- /* Add the rest of the VCPUs */
- for (i = 1; i < NR_VCPUS; ++i)
- vm_vcpu_add_default(vm, i, guest_code);
+ virt_map(vm, ST_GPA_BASE, ST_GPA_BASE, gpages);
- steal_time_init(vm);
+ TEST_REQUIRE(is_steal_time_supported(vcpus[0]));
/* Run test on each VCPU */
for (i = 0; i < NR_VCPUS; ++i) {
+ steal_time_init(vcpus[i], i);
+
+ vcpu_args_set(vcpus[i], 1, i);
+
/* First VCPU run initializes steal-time */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
/* Second VCPU run, expect guest stolen time to be <= run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i];
run_delay = get_run_delay();
@@ -322,7 +390,7 @@ int main(int ac, char **av)
run_delay = get_run_delay();
pthread_create(&thread, &attr, do_steal_time, NULL);
do
- pthread_yield();
+ sched_yield();
while (get_run_delay() - run_delay < MIN_RUN_DELAY_NS);
pthread_join(thread, NULL);
run_delay = get_run_delay() - run_delay;
@@ -331,7 +399,7 @@ int main(int ac, char **av)
MIN_RUN_DELAY_NS, run_delay);
/* Run VCPU again to confirm stolen time is consistent with run_delay */
- run_vcpu(vm, i);
+ run_vcpu(vcpus[i]);
sync_global_from_guest(vm, guest_stolen_time[i]);
stolen_time = guest_stolen_time[i] - stolen_time;
TEST_ASSERT(stolen_time >= run_delay,
diff --git a/tools/testing/selftests/kvm/system_counter_offset_test.c b/tools/testing/selftests/kvm/system_counter_offset_test.c
new file mode 100644
index 000000000000..513d421a9bff
--- /dev/null
+++ b/tools/testing/selftests/kvm/system_counter_offset_test.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the system counter from userspace
+ */
+#include <asm/kvm_para.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#ifdef __x86_64__
+
+struct test_case {
+ uint64_t tsc_offset;
+};
+
+static struct test_case test_cases[] = {
+ { 0 },
+ { 180 * NSEC_PER_SEC },
+ { -180 * NSEC_PER_SEC },
+};
+
+static void check_preconditions(struct kvm_vcpu *vcpu)
+{
+ __TEST_REQUIRE(!__vcpu_has_device_attr(vcpu, KVM_VCPU_TSC_CTRL,
+ KVM_VCPU_TSC_OFFSET),
+ "KVM_VCPU_TSC_OFFSET not supported; skipping test");
+}
+
+static void setup_system_counter(struct kvm_vcpu *vcpu, struct test_case *test)
+{
+ vcpu_device_attr_set(vcpu, KVM_VCPU_TSC_CTRL, KVM_VCPU_TSC_OFFSET,
+ &test->tsc_offset);
+}
+
+static uint64_t guest_read_system_counter(struct test_case *test)
+{
+ return rdtsc();
+}
+
+static uint64_t host_read_guest_system_counter(struct test_case *test)
+{
+ return rdtsc() + test->tsc_offset;
+}
+
+#else /* __x86_64__ */
+
+#error test not implemented for this architecture!
+
+#endif
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *test = &test_cases[i];
+
+ GUEST_SYNC_CLOCK(i, guest_read_system_counter(test));
+ }
+}
+
+static void handle_sync(struct ucall *uc, uint64_t start, uint64_t end)
+{
+ uint64_t obs = uc->args[2];
+
+ TEST_ASSERT(start <= obs && obs <= end,
+ "unexpected system counter value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, start, end);
+
+ pr_info("system counter value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, start, end);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ uint64_t start, end;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ struct test_case *test = &test_cases[i];
+
+ setup_system_counter(vcpu, test);
+ start = host_read_guest_system_counter(test);
+ vcpu_run(vcpu);
+ end = host_read_guest_system_counter(test);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, start, end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall %ld",
+ get_ucall(vcpu, &uc));
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+ check_preconditions(vcpu);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/amx_test.c b/tools/testing/selftests/kvm/x86_64/amx_test.c
new file mode 100644
index 000000000000..eae521f050e0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/amx_test.c
@@ -0,0 +1,334 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * amx tests
+ *
+ * Copyright (C) 2021, Intel, Inc.
+ *
+ * Tests for amx #NM exception and save/restore.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define NUM_TILES 8
+#define TILE_SIZE 1024
+#define XSAVE_SIZE ((NUM_TILES * TILE_SIZE) + PAGE_SIZE)
+
+/* Tile configuration associated: */
+#define PALETTE_TABLE_INDEX 1
+#define MAX_TILES 16
+#define RESERVED_BYTES 14
+
+#define XSAVE_HDR_OFFSET 512
+
+struct tile_config {
+ u8 palette_id;
+ u8 start_row;
+ u8 reserved[RESERVED_BYTES];
+ u16 colsb[MAX_TILES];
+ u8 rows[MAX_TILES];
+};
+
+struct tile_data {
+ u8 data[NUM_TILES * TILE_SIZE];
+};
+
+struct xtile_info {
+ u16 bytes_per_tile;
+ u16 bytes_per_row;
+ u16 max_names;
+ u16 max_rows;
+ u32 xsave_offset;
+ u32 xsave_size;
+};
+
+static struct xtile_info xtile;
+
+static inline void __ldtilecfg(void *cfg)
+{
+ asm volatile(".byte 0xc4,0xe2,0x78,0x49,0x00"
+ : : "a"(cfg));
+}
+
+static inline void __tileloadd(void *tile)
+{
+ asm volatile(".byte 0xc4,0xe2,0x7b,0x4b,0x04,0x10"
+ : : "a"(tile), "d"(0));
+}
+
+static inline void __tilerelease(void)
+{
+ asm volatile(".byte 0xc4, 0xe2, 0x78, 0x49, 0xc0" ::);
+}
+
+static inline void __xsavec(struct xstate *xstate, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsavec (%%rdi)"
+ : : "D" (xstate), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static void check_xtile_info(void)
+{
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE_XCR0) <= XSAVE_SIZE);
+
+ xtile.xsave_offset = this_cpu_property(X86_PROPERTY_XSTATE_TILE_OFFSET);
+ GUEST_ASSERT(xtile.xsave_offset == 2816);
+ xtile.xsave_size = this_cpu_property(X86_PROPERTY_XSTATE_TILE_SIZE);
+ GUEST_ASSERT(xtile.xsave_size == 8192);
+ GUEST_ASSERT(sizeof(struct tile_data) >= xtile.xsave_size);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_MAX_PALETTE_TABLES));
+ GUEST_ASSERT(this_cpu_property(X86_PROPERTY_AMX_MAX_PALETTE_TABLES) >=
+ PALETTE_TABLE_INDEX);
+
+ GUEST_ASSERT(this_cpu_has_p(X86_PROPERTY_AMX_NR_TILE_REGS));
+ xtile.max_names = this_cpu_property(X86_PROPERTY_AMX_NR_TILE_REGS);
+ GUEST_ASSERT(xtile.max_names == 8);
+ xtile.bytes_per_tile = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_TILE);
+ GUEST_ASSERT(xtile.bytes_per_tile == 1024);
+ xtile.bytes_per_row = this_cpu_property(X86_PROPERTY_AMX_BYTES_PER_ROW);
+ GUEST_ASSERT(xtile.bytes_per_row == 64);
+ xtile.max_rows = this_cpu_property(X86_PROPERTY_AMX_MAX_ROWS);
+ GUEST_ASSERT(xtile.max_rows == 16);
+}
+
+static void set_tilecfg(struct tile_config *cfg)
+{
+ int i;
+
+ /* Only palette id 1 */
+ cfg->palette_id = 1;
+ for (i = 0; i < xtile.max_names; i++) {
+ cfg->colsb[i] = xtile.bytes_per_row;
+ cfg->rows[i] = xtile.max_rows;
+ }
+}
+
+static void init_regs(void)
+{
+ uint64_t cr4, xcr0;
+
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_XSAVE));
+
+ /* turn on CR4.OSXSAVE */
+ cr4 = get_cr4();
+ cr4 |= X86_CR4_OSXSAVE;
+ set_cr4(cr4);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xcr0 = xgetbv(0);
+ xcr0 |= XFEATURE_MASK_XTILE;
+ xsetbv(0x0, xcr0);
+ GUEST_ASSERT((xgetbv(0) & XFEATURE_MASK_XTILE) == XFEATURE_MASK_XTILE);
+}
+
+static void __attribute__((__flatten__)) guest_code(struct tile_config *amx_cfg,
+ struct tile_data *tiledata,
+ struct xstate *xstate)
+{
+ init_regs();
+ check_xtile_info();
+ GUEST_SYNC(1);
+
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(2);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == 0);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ GUEST_SYNC(3);
+ /* Check save/restore when trap to userspace */
+ __tileloadd(tiledata);
+ GUEST_SYNC(4);
+ __tilerelease();
+ GUEST_SYNC(5);
+ /*
+ * After XSAVEC, XTILEDATA is cleared in the xstate_bv but is set in
+ * the xcomp_bv.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT(xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA);
+
+ /* xfd=0x40000, disable amx tiledata */
+ wrmsr(MSR_IA32_XFD, XFEATURE_MASK_XTILE_DATA);
+
+ /*
+ * XTILEDATA is cleared in xstate_bv but set in xcomp_bv, this property
+ * remains the same even when amx tiledata is disabled by IA32_XFD.
+ */
+ xstate->header.xstate_bv = XFEATURE_MASK_XTILE_DATA;
+ __xsavec(xstate, XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(!(xstate->header.xstate_bv & XFEATURE_MASK_XTILE_DATA));
+ GUEST_ASSERT((xstate->header.xcomp_bv & XFEATURE_MASK_XTILE_DATA));
+
+ GUEST_SYNC(6);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ set_tilecfg(amx_cfg);
+ __ldtilecfg(amx_cfg);
+ /* Trigger #NM exception */
+ __tileloadd(tiledata);
+ GUEST_SYNC(10);
+
+ GUEST_DONE();
+}
+
+void guest_nm_handler(struct ex_regs *regs)
+{
+ /* Check if #NM is triggered by XFEATURE_MASK_XTILE_DATA */
+ GUEST_SYNC(7);
+ GUEST_ASSERT(!(get_cr0() & X86_CR0_TS));
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_SYNC(8);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD_ERR) == XFEATURE_MASK_XTILE_DATA);
+ GUEST_ASSERT(rdmsr(MSR_IA32_XFD) == XFEATURE_MASK_XTILE_DATA);
+ /* Clear xfd_err */
+ wrmsr(MSR_IA32_XFD_ERR, 0);
+ /* xfd=0, enable amx */
+ wrmsr(MSR_IA32_XFD, 0);
+ GUEST_SYNC(9);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct kvm_x86_state *state;
+ int xsave_restore_size;
+ vm_vaddr_t amx_cfg, tiledata, xstate;
+ struct ucall uc;
+ u32 amx_offset;
+ int ret;
+
+ /*
+ * Note, all off-by-default features must be enabled before anything
+ * caches KVM_GET_SUPPORTED_CPUID, e.g. before using kvm_cpu_has().
+ */
+ vm_xsave_require_permission(XFEATURE_MASK_XTILE_DATA);
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XFD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_AMX_TILE));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILECFG));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XTILEDATA_XFD));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ TEST_ASSERT(kvm_cpu_has_p(X86_PROPERTY_XSTATE_MAX_SIZE),
+ "KVM should enumerate max XSAVE size when XSAVE is supported");
+ xsave_restore_size = kvm_cpu_property(X86_PROPERTY_XSTATE_MAX_SIZE);
+
+ vcpu_regs_get(vcpu, &regs1);
+
+ /* Register #NM handler */
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, NM_VECTOR, guest_nm_handler);
+
+ /* amx cfg for guest_code */
+ amx_cfg = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, amx_cfg), 0x0, getpagesize());
+
+ /* amx tiledata for guest_code */
+ tiledata = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, tiledata), rand() | 1, 2 * getpagesize());
+
+ /* XSAVE state for guest_code */
+ xstate = vm_vaddr_alloc_pages(vm, DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ memset(addr_gva2hva(vm, xstate), 0, PAGE_SIZE * DIV_ROUND_UP(XSAVE_SIZE, PAGE_SIZE));
+ vcpu_args_set(vcpu, 3, amx_cfg, tiledata, xstate);
+
+ for (;;) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ switch (uc.args[1]) {
+ case 1:
+ case 2:
+ case 3:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ fprintf(stderr, "GUEST_SYNC(%ld)\n", uc.args[1]);
+ break;
+ case 4:
+ case 10:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), check save/restore status\n", uc.args[1]);
+
+ /* Compacted mode, get amx offset by xsave area
+ * size subtract 8K amx size.
+ */
+ amx_offset = xsave_restore_size - NUM_TILES*TILE_SIZE;
+ state = vcpu_save_state(vcpu);
+ void *amx_start = (void *)state->xsave + amx_offset;
+ void *tiles_data = (void *)addr_gva2hva(vm, tiledata);
+ /* Only check TMM0 register, 1 tile */
+ ret = memcmp(amx_start, tiles_data, TILE_SIZE);
+ TEST_ASSERT(ret == 0, "memcmp failed, ret=%d", ret);
+ kvm_x86_state_cleanup(state);
+ break;
+ case 9:
+ fprintf(stderr,
+ "GUEST_SYNC(%ld), #NM exception and enable amx\n", uc.args[1]);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ fprintf(stderr, "UCALL_DONE\n");
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ }
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cpuid_test.c b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
new file mode 100644
index 000000000000..8c579ce714e9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/cpuid_test.c
@@ -0,0 +1,208 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat Inc.
+ *
+ * Generic tests for KVM CPUID set/get ioctls
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* CPUIDs known to differ */
+struct {
+ u32 function;
+ u32 index;
+} mangled_cpuids[] = {
+ /*
+ * These entries depend on the vCPU's XCR0 register and IA32_XSS MSR,
+ * which are not controlled for by this test.
+ */
+ {.function = 0xd, .index = 0},
+ {.function = 0xd, .index = 1},
+};
+
+static void test_guest_cpuids(struct kvm_cpuid2 *guest_cpuid)
+{
+ int i;
+ u32 eax, ebx, ecx, edx;
+
+ for (i = 0; i < guest_cpuid->nent; i++) {
+ __cpuid(guest_cpuid->entries[i].function,
+ guest_cpuid->entries[i].index,
+ &eax, &ebx, &ecx, &edx);
+
+ GUEST_ASSERT_EQ(eax, guest_cpuid->entries[i].eax);
+ GUEST_ASSERT_EQ(ebx, guest_cpuid->entries[i].ebx);
+ GUEST_ASSERT_EQ(ecx, guest_cpuid->entries[i].ecx);
+ GUEST_ASSERT_EQ(edx, guest_cpuid->entries[i].edx);
+ }
+
+}
+
+static void guest_main(struct kvm_cpuid2 *guest_cpuid)
+{
+ GUEST_SYNC(1);
+
+ test_guest_cpuids(guest_cpuid);
+
+ GUEST_SYNC(2);
+
+ GUEST_ASSERT_EQ(this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF), 0x40000001);
+
+ GUEST_DONE();
+}
+
+static bool is_cpuid_mangled(const struct kvm_cpuid_entry2 *entrie)
+{
+ int i;
+
+ for (i = 0; i < sizeof(mangled_cpuids); i++) {
+ if (mangled_cpuids[i].function == entrie->function &&
+ mangled_cpuids[i].index == entrie->index)
+ return true;
+ }
+
+ return false;
+}
+
+static void compare_cpuids(const struct kvm_cpuid2 *cpuid1,
+ const struct kvm_cpuid2 *cpuid2)
+{
+ const struct kvm_cpuid_entry2 *e1, *e2;
+ int i;
+
+ TEST_ASSERT(cpuid1->nent == cpuid2->nent,
+ "CPUID nent mismatch: %d vs. %d", cpuid1->nent, cpuid2->nent);
+
+ for (i = 0; i < cpuid1->nent; i++) {
+ e1 = &cpuid1->entries[i];
+ e2 = &cpuid2->entries[i];
+
+ TEST_ASSERT(e1->function == e2->function &&
+ e1->index == e2->index && e1->flags == e2->flags,
+ "CPUID entries[%d] mismtach: 0x%x.%d.%x vs. 0x%x.%d.%x",
+ i, e1->function, e1->index, e1->flags,
+ e2->function, e2->index, e2->flags);
+
+ if (is_cpuid_mangled(e1))
+ continue;
+
+ TEST_ASSERT(e1->eax == e2->eax && e1->ebx == e2->ebx &&
+ e1->ecx == e2->ecx && e1->edx == e2->edx,
+ "CPUID 0x%x.%x differ: 0x%x:0x%x:0x%x:0x%x vs 0x%x:0x%x:0x%x:0x%x",
+ e1->function, e1->index,
+ e1->eax, e1->ebx, e1->ecx, e1->edx,
+ e2->eax, e2->ebx, e2->ecx, e2->edx);
+ }
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+struct kvm_cpuid2 *vcpu_alloc_cpuid(struct kvm_vm *vm, vm_vaddr_t *p_gva, struct kvm_cpuid2 *cpuid)
+{
+ int size = sizeof(*cpuid) + cpuid->nent * sizeof(cpuid->entries[0]);
+ vm_vaddr_t gva = vm_vaddr_alloc(vm, size, KVM_UTIL_MIN_VADDR);
+ struct kvm_cpuid2 *guest_cpuids = addr_gva2hva(vm, gva);
+
+ memcpy(guest_cpuids, cpuid, size);
+
+ *p_gva = gva;
+ return guest_cpuids;
+}
+
+static void set_cpuid_after_run(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid_entry2 *ent;
+ int rc;
+ u32 eax, ebx, x;
+
+ /* Setting unmodified CPUID is allowed */
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(!rc, "Setting unmodified CPUID after KVM_RUN failed: %d", rc);
+
+ /* Changing CPU features is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x7);
+ ebx = ent->ebx;
+ ent->ebx--;
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing CPU features should fail");
+ ent->ebx = ebx;
+
+ /* Changing MAXPHYADDR is forbidden */
+ ent = vcpu_get_cpuid_entry(vcpu, 0x80000008);
+ eax = ent->eax;
+ x = eax & 0xff;
+ ent->eax = (eax & ~0xffu) | (x - 1);
+ rc = __vcpu_set_cpuid(vcpu);
+ TEST_ASSERT(rc, "Changing MAXPHYADDR should fail");
+ ent->eax = eax;
+}
+
+static void test_get_cpuid2(struct kvm_vcpu *vcpu)
+{
+ struct kvm_cpuid2 *cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent + 1);
+ int i, r;
+
+ vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(cpuid->nent == vcpu->cpuid->nent,
+ "KVM didn't update nent on success, wanted %u, got %u",
+ vcpu->cpuid->nent, cpuid->nent);
+
+ for (i = 0; i < vcpu->cpuid->nent; i++) {
+ cpuid->nent = i;
+ r = __vcpu_ioctl(vcpu, KVM_GET_CPUID2, cpuid);
+ TEST_ASSERT(r && errno == E2BIG, KVM_IOCTL_ERROR(KVM_GET_CPUID2, r));
+ TEST_ASSERT(cpuid->nent == i, "KVM modified nent on failure");
+ }
+ free(cpuid);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t cpuid_gva;
+ struct kvm_vm *vm;
+ int stage;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ compare_cpuids(kvm_get_supported_cpuid(), vcpu->cpuid);
+
+ vcpu_alloc_cpuid(vm, &cpuid_gva, vcpu->cpuid);
+
+ vcpu_args_set(vcpu, 1, cpuid_gva);
+
+ for (stage = 0; stage < 3; stage++)
+ run_vcpu(vcpu, stage);
+
+ set_cpuid_after_run(vcpu);
+
+ test_get_cpuid2(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
index 140e91901582..624dc725e14d 100644
--- a/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
+++ b/tools/testing/selftests/kvm/x86_64/cr4_cpuid_sync_test.c
@@ -19,25 +19,11 @@
#include "kvm_util.h"
#include "processor.h"
-#define X86_FEATURE_XSAVE (1<<26)
-#define X86_FEATURE_OSXSAVE (1<<27)
-#define VCPU_ID 1
-
static inline bool cr4_cpuid_is_sync(void)
{
- int func, subfunc;
- uint32_t eax, ebx, ecx, edx;
- uint64_t cr4;
-
- func = 0x1;
- subfunc = 0x0;
- __asm__ __volatile__("cpuid"
- : "=a"(eax), "=b"(ebx), "=c"(ecx), "=d"(edx)
- : "a"(func), "c"(subfunc));
+ uint64_t cr4 = get_cr4();
- cr4 = get_cr4();
-
- return (!!(ecx & X86_FEATURE_OSXSAVE)) == (!!(cr4 & X86_CR4_OSXSAVE));
+ return (this_cpu_has(X86_FEATURE_OSXSAVE) == !!(cr4 & X86_CR4_OSXSAVE));
}
static void guest_code(void)
@@ -63,45 +49,28 @@ static void guest_code(void)
int main(int argc, char *argv[])
{
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
struct kvm_sregs sregs;
- struct kvm_cpuid_entry2 *entry;
struct ucall uc;
- int rc;
- entry = kvm_get_supported_cpuid_entry(1);
- if (!(entry->ecx & X86_FEATURE_XSAVE)) {
- print_skip("XSAVE feature not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
while (1) {
- rc = _vcpu_run(vm, VCPU_ID);
-
- TEST_ASSERT(rc == 0, "vcpu_run failed: %d\n", rc);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_SYNC:
/* emulate hypervisor clearing CR4.OSXSAVE */
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.cr4 &= ~X86_CR4_OSXSAVE;
- vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ vcpu_sregs_set(vcpu, &sregs);
break;
case UCALL_ABORT:
- TEST_FAIL("Guest CR4 bit (OSXSAVE) unsynchronized with CPUID bit.");
+ REPORT_GUEST_ASSERT(uc);
break;
case UCALL_DONE:
goto done;
@@ -110,8 +79,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
-
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/debug_regs.c b/tools/testing/selftests/kvm/x86_64/debug_regs.c
index 8162c58a1234..f6b295e0b2d2 100644
--- a/tools/testing/selftests/kvm/x86_64/debug_regs.c
+++ b/tools/testing/selftests/kvm/x86_64/debug_regs.c
@@ -8,12 +8,13 @@
#include <string.h>
#include "kvm_util.h"
#include "processor.h"
-
-#define VCPU_ID 0
+#include "apic.h"
#define DR6_BD (1 << 13)
#define DR7_GD (1 << 13)
+#define IRQ_VECTOR 0xAA
+
/* For testing data access debug BP */
uint32_t guest_value;
@@ -21,6 +22,11 @@ extern unsigned char sw_bp, hw_bp, write_data, ss_start, bd_start;
static void guest_code(void)
{
+ /* Create a pending interrupt on current vCPU */
+ x2apic_enable();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT |
+ APIC_DM_FIXED | IRQ_VECTOR);
+
/*
* Software BP tests.
*
@@ -38,77 +44,83 @@ static void guest_code(void)
"mov %%rax,%0;\n\t write_data:"
: "=m" (guest_value) : : "rax");
- /* Single step test, covers 2 basic instructions and 2 emulated */
+ /*
+ * Single step test, covers 2 basic instructions and 2 emulated
+ *
+ * Enable interrupts during the single stepping to see that
+ * pending interrupt we raised is not handled due to KVM_GUESTDBG_BLOCKIRQ
+ */
asm volatile("ss_start: "
- "xor %%rax,%%rax\n\t"
+ "sti\n\t"
+ "xor %%eax,%%eax\n\t"
"cpuid\n\t"
"movl $0x1a0,%%ecx\n\t"
"rdmsr\n\t"
- : : : "rax", "ecx");
+ "cli\n\t"
+ : : : "eax", "ebx", "ecx", "edx");
/* DR6.BD test */
asm volatile("bd_start: mov %%dr0, %%rax" : : : "rax");
GUEST_DONE();
}
-#define CLEAR_DEBUG() memset(&debug, 0, sizeof(debug))
-#define APPLY_DEBUG() vcpu_set_guest_debug(vm, VCPU_ID, &debug)
#define CAST_TO_RIP(v) ((unsigned long long)&(v))
-#define SET_RIP(v) do { \
- vcpu_regs_get(vm, VCPU_ID, &regs); \
- regs.rip = (v); \
- vcpu_regs_set(vm, VCPU_ID, &regs); \
- } while (0)
-#define MOVE_RIP(v) SET_RIP(regs.rip + (v));
+
+static void vcpu_skip_insn(struct kvm_vcpu *vcpu, int insn_len)
+{
+ struct kvm_regs regs;
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += insn_len;
+ vcpu_regs_set(vcpu, &regs);
+}
int main(void)
{
struct kvm_guest_debug debug;
unsigned long long target_dr6, target_rip;
- struct kvm_regs regs;
+ struct kvm_vcpu *vcpu;
struct kvm_run *run;
struct kvm_vm *vm;
struct ucall uc;
uint64_t cmd;
int i;
/* Instruction lengths starting at ss_start */
- int ss_size[4] = {
- 3, /* xor */
+ int ss_size[6] = {
+ 1, /* sti*/
+ 2, /* xor */
2, /* cpuid */
5, /* mov */
2, /* rdmsr */
+ 1, /* cli */
};
- if (!kvm_check_cap(KVM_CAP_SET_GUEST_DEBUG)) {
- print_skip("KVM_CAP_SET_GUEST_DEBUG not supported");
- return 0;
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_GUEST_DEBUG));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
/* Test software BPs - int3 */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == BP_VECTOR &&
run->debug.arch.pc == CAST_TO_RIP(sw_bp),
"INT3: exit %d exception %d rip 0x%llx (should be 0x%llx)",
run->exit_reason, run->debug.arch.exception,
run->debug.arch.pc, CAST_TO_RIP(sw_bp));
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test instruction HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(hw_bp);
debug.arch.debugreg[7] = 0x400 | (1UL << (2*i+1));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -121,17 +133,17 @@ int main(void)
run->debug.arch.dr6, target_dr6);
}
/* Skip "nop" */
- MOVE_RIP(1);
+ vcpu_skip_insn(vcpu, 1);
/* Test data access HW BP over DR[0-3] */
for (i = 0; i < 4; i++) {
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[i] = CAST_TO_RIP(guest_value);
debug.arch.debugreg[7] = 0x00000400 | (1UL << (2*i+1)) |
(0x000d0000UL << (4*i));
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | (1UL << i);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -143,22 +155,22 @@ int main(void)
run->debug.arch.pc, CAST_TO_RIP(write_data),
run->debug.arch.dr6, target_dr6);
/* Rollback the 4-bytes "mov" */
- MOVE_RIP(-7);
+ vcpu_skip_insn(vcpu, -7);
}
/* Skip the 4-bytes "mov" */
- MOVE_RIP(7);
+ vcpu_skip_insn(vcpu, 7);
/* Test single step */
target_rip = CAST_TO_RIP(ss_start);
target_dr6 = 0xffff4ff0ULL;
- vcpu_regs_get(vm, VCPU_ID, &regs);
for (i = 0; i < (sizeof(ss_size) / sizeof(ss_size[0])); i++) {
target_rip += ss_size[i];
- CLEAR_DEBUG();
- debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
+ memset(&debug, 0, sizeof(debug));
+ debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP |
+ KVM_GUESTDBG_BLOCKIRQ;
debug.arch.debugreg[7] = 0x00000400;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
run->debug.arch.pc == target_rip &&
@@ -171,11 +183,11 @@ int main(void)
}
/* Finally test global disable */
- CLEAR_DEBUG();
+ memset(&debug, 0, sizeof(debug));
debug.control = KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_HW_BP;
debug.arch.debugreg[7] = 0x400 | DR7_GD;
- APPLY_DEBUG();
- vcpu_run(vm, VCPU_ID);
+ vcpu_guest_debug_set(vcpu, &debug);
+ vcpu_run(vcpu);
target_dr6 = 0xffff0ff0 | DR6_BD;
TEST_ASSERT(run->exit_reason == KVM_EXIT_DEBUG &&
run->debug.arch.exception == DB_VECTOR &&
@@ -188,12 +200,12 @@ int main(void)
target_dr6);
/* Disable all debug controls, run to the end */
- CLEAR_DEBUG();
- APPLY_DEBUG();
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO, "KVM_EXIT_IO");
- cmd = get_ucall(vm, VCPU_ID, &uc);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ cmd = get_ucall(vcpu, &uc);
TEST_ASSERT(cmd == UCALL_DONE, "UCALL_DONE");
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
new file mode 100644
index 000000000000..ee3b384b991c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/dirty_log_page_splitting_test.c
@@ -0,0 +1,262 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KVM dirty logging page splitting test
+ *
+ * Based on dirty_log_perf.c
+ *
+ * Copyright (C) 2018, Red Hat, Inc.
+ * Copyright (C) 2023, Google, Inc.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <linux/bitmap.h>
+
+#include "kvm_util.h"
+#include "test_util.h"
+#include "memstress.h"
+#include "guest_modes.h"
+
+#define VCPUS 2
+#define SLOTS 2
+#define ITERATIONS 2
+
+static uint64_t guest_percpu_mem_size = DEFAULT_PER_VCPU_MEM_SIZE;
+
+static enum vm_mem_backing_src_type backing_src = VM_MEM_SRC_ANONYMOUS_HUGETLB;
+
+static u64 dirty_log_manual_caps;
+static bool host_quit;
+static int iteration;
+static int vcpu_last_completed_iteration[KVM_MAX_VCPUS];
+
+struct kvm_page_stats {
+ uint64_t pages_4k;
+ uint64_t pages_2m;
+ uint64_t pages_1g;
+ uint64_t hugepages;
+};
+
+static void get_page_stats(struct kvm_vm *vm, struct kvm_page_stats *stats, const char *stage)
+{
+ stats->pages_4k = vm_get_stat(vm, "pages_4k");
+ stats->pages_2m = vm_get_stat(vm, "pages_2m");
+ stats->pages_1g = vm_get_stat(vm, "pages_1g");
+ stats->hugepages = stats->pages_2m + stats->pages_1g;
+
+ pr_debug("\nPage stats after %s: 4K: %ld 2M: %ld 1G: %ld huge: %ld\n",
+ stage, stats->pages_4k, stats->pages_2m, stats->pages_1g,
+ stats->hugepages);
+}
+
+static void run_vcpu_iteration(struct kvm_vm *vm)
+{
+ int i;
+
+ iteration++;
+ for (i = 0; i < VCPUS; i++) {
+ while (READ_ONCE(vcpu_last_completed_iteration[i]) !=
+ iteration)
+ ;
+ }
+}
+
+static void vcpu_worker(struct memstress_vcpu_args *vcpu_args)
+{
+ struct kvm_vcpu *vcpu = vcpu_args->vcpu;
+ int vcpu_idx = vcpu_args->vcpu_idx;
+
+ while (!READ_ONCE(host_quit)) {
+ int current_iteration = READ_ONCE(iteration);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_SYNC);
+
+ vcpu_last_completed_iteration[vcpu_idx] = current_iteration;
+
+ /* Wait for the start of the next iteration to be signaled. */
+ while (current_iteration == READ_ONCE(iteration) &&
+ READ_ONCE(iteration) >= 0 &&
+ !READ_ONCE(host_quit))
+ ;
+ }
+}
+
+static void run_test(enum vm_guest_mode mode, void *unused)
+{
+ struct kvm_vm *vm;
+ unsigned long **bitmaps;
+ uint64_t guest_num_pages;
+ uint64_t host_num_pages;
+ uint64_t pages_per_slot;
+ int i;
+ struct kvm_page_stats stats_populated;
+ struct kvm_page_stats stats_dirty_logging_enabled;
+ struct kvm_page_stats stats_dirty_pass[ITERATIONS];
+ struct kvm_page_stats stats_clear_pass[ITERATIONS];
+ struct kvm_page_stats stats_dirty_logging_disabled;
+ struct kvm_page_stats stats_repopulated;
+
+ vm = memstress_create_vm(mode, VCPUS, guest_percpu_mem_size,
+ SLOTS, backing_src, false);
+
+ guest_num_pages = (VCPUS * guest_percpu_mem_size) >> vm->page_shift;
+ guest_num_pages = vm_adjust_num_guest_pages(mode, guest_num_pages);
+ host_num_pages = vm_num_host_pages(mode, guest_num_pages);
+ pages_per_slot = host_num_pages / SLOTS;
+ TEST_ASSERT_EQ(host_num_pages, pages_per_slot * SLOTS);
+ TEST_ASSERT(!(host_num_pages % 512),
+ "Number of pages, '%lu' not a multiple of 2MiB", host_num_pages);
+
+ bitmaps = memstress_alloc_bitmaps(SLOTS, pages_per_slot);
+
+ if (dirty_log_manual_caps)
+ vm_enable_cap(vm, KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2,
+ dirty_log_manual_caps);
+
+ /* Start the iterations */
+ iteration = -1;
+ host_quit = false;
+
+ for (i = 0; i < VCPUS; i++)
+ vcpu_last_completed_iteration[i] = -1;
+
+ memstress_start_vcpu_threads(VCPUS, vcpu_worker);
+
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_populated, "populating memory");
+
+ /* Enable dirty logging */
+ memstress_enable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_enabled, "enabling dirty logging");
+
+ while (iteration < ITERATIONS) {
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_dirty_pass[iteration - 1],
+ "dirtying memory");
+
+ memstress_get_dirty_log(vm, bitmaps, SLOTS);
+
+ if (dirty_log_manual_caps) {
+ memstress_clear_dirty_log(vm, bitmaps, SLOTS, pages_per_slot);
+
+ get_page_stats(vm, &stats_clear_pass[iteration - 1], "clearing dirty log");
+ }
+ }
+
+ /* Disable dirty logging */
+ memstress_disable_dirty_logging(vm, SLOTS);
+
+ get_page_stats(vm, &stats_dirty_logging_disabled, "disabling dirty logging");
+
+ /* Run vCPUs again to fault pages back in. */
+ run_vcpu_iteration(vm);
+ get_page_stats(vm, &stats_repopulated, "repopulating memory");
+
+ /*
+ * Tell the vCPU threads to quit. No need to manually check that vCPUs
+ * have stopped running after disabling dirty logging, the join will
+ * wait for them to exit.
+ */
+ host_quit = true;
+ memstress_join_vcpu_threads(VCPUS);
+
+ memstress_free_bitmaps(bitmaps, SLOTS);
+ memstress_destroy_vm(vm);
+
+ TEST_ASSERT_EQ((stats_populated.pages_2m * 512 +
+ stats_populated.pages_1g * 512 * 512), host_num_pages);
+
+ /*
+ * Check that all huge pages were split. Since large pages can only
+ * exist in the data slot, and the vCPUs should have dirtied all pages
+ * in the data slot, there should be no huge pages left after splitting.
+ * Splitting happens at dirty log enable time without
+ * KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2 and after the first clear pass
+ * with that capability.
+ */
+ if (dirty_log_manual_caps) {
+ TEST_ASSERT_EQ(stats_clear_pass[0].hugepages, 0);
+ TEST_ASSERT(stats_clear_pass[0].pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_clear_pass[0].pages_4k);
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, stats_populated.hugepages);
+ } else {
+ TEST_ASSERT_EQ(stats_dirty_logging_enabled.hugepages, 0);
+ TEST_ASSERT(stats_dirty_logging_enabled.pages_4k >= host_num_pages,
+ "Expected at least '%lu' 4KiB pages, found only '%lu'",
+ host_num_pages, stats_dirty_logging_enabled.pages_4k);
+ }
+
+ /*
+ * Once dirty logging is disabled and the vCPUs have touched all their
+ * memory again, the hugepage counts should be the same as they were
+ * right after initial population of memory.
+ */
+ TEST_ASSERT_EQ(stats_populated.pages_2m, stats_repopulated.pages_2m);
+ TEST_ASSERT_EQ(stats_populated.pages_1g, stats_repopulated.pages_1g);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-b vcpu bytes] [-s mem type]\n",
+ name);
+ puts("");
+ printf(" -b: specify the size of the memory region which should be\n"
+ " dirtied by each vCPU. e.g. 10M or 3G.\n"
+ " (default: 1G)\n");
+ backing_src_help("-s");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ int opt;
+
+ TEST_REQUIRE(get_kvm_param_bool("eager_page_split"));
+ TEST_REQUIRE(get_kvm_param_bool("tdp_mmu"));
+
+ while ((opt = getopt(argc, argv, "b:hs:")) != -1) {
+ switch (opt) {
+ case 'b':
+ guest_percpu_mem_size = parse_size(optarg);
+ break;
+ case 'h':
+ help(argv[0]);
+ exit(0);
+ case 's':
+ backing_src = parse_backing_src_type(optarg);
+ break;
+ default:
+ help(argv[0]);
+ exit(1);
+ }
+ }
+
+ if (!is_backing_src_hugetlb(backing_src)) {
+ pr_info("This test will only work reliably with HugeTLB memory. "
+ "It can work with THP, but that is best effort.\n");
+ }
+
+ guest_modes_append_default();
+
+ dirty_log_manual_caps = 0;
+ for_each_guest_mode(run_test, NULL);
+
+ dirty_log_manual_caps =
+ kvm_check_cap(KVM_CAP_MANUAL_DIRTY_LOG_PROTECT2);
+
+ if (dirty_log_manual_caps) {
+ dirty_log_manual_caps &= (KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE |
+ KVM_DIRTY_LOG_INITIALLY_SET);
+ for_each_guest_mode(run_test, NULL);
+ } else {
+ pr_info("Skipping testing with MANUAL_PROTECT as it is not supported");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/evmcs_test.c b/tools/testing/selftests/kvm/x86_64/evmcs_test.c
deleted file mode 100644
index 757928199f19..000000000000
--- a/tools/testing/selftests/kvm/x86_64/evmcs_test.c
+++ /dev/null
@@ -1,166 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Copyright (C) 2018, Red Hat, Inc.
- *
- * Tests for Enlightened VMCS, including nested guest state.
- */
-#define _GNU_SOURCE /* for program_invocation_short_name */
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-
-#include "test_util.h"
-
-#include "kvm_util.h"
-
-#include "vmx.h"
-
-#define VCPU_ID 5
-
-void l2_guest_code(void)
-{
- GUEST_SYNC(7);
-
- GUEST_SYNC(8);
-
- /* Done, exit to L1 and never come back. */
- vmcall();
-}
-
-void l1_guest_code(struct vmx_pages *vmx_pages)
-{
-#define L2_GUEST_STACK_SIZE 64
- unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
-
- enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
-
- GUEST_ASSERT(vmx_pages->vmcs_gpa);
- GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
- GUEST_SYNC(3);
- GUEST_ASSERT(load_vmcs(vmx_pages));
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- GUEST_SYNC(4);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-
- prepare_vmcs(vmx_pages, l2_guest_code,
- &l2_guest_stack[L2_GUEST_STACK_SIZE]);
-
- GUEST_SYNC(5);
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- current_evmcs->revision_id = -1u;
- GUEST_ASSERT(vmlaunch());
- current_evmcs->revision_id = EVMCS_VERSION;
- GUEST_SYNC(6);
-
- GUEST_ASSERT(!vmlaunch());
- GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
- GUEST_SYNC(9);
- GUEST_ASSERT(!vmresume());
- GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
- GUEST_SYNC(10);
-}
-
-void guest_code(struct vmx_pages *vmx_pages)
-{
- GUEST_SYNC(1);
- GUEST_SYNC(2);
-
- if (vmx_pages)
- l1_guest_code(vmx_pages);
-
- GUEST_DONE();
-
- /* Try enlightened vmptrld with an incorrect GPA */
- evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
- GUEST_ASSERT(vmlaunch());
-}
-
-int main(int argc, char *argv[])
-{
- vm_vaddr_t vmx_pages_gva = 0;
-
- struct kvm_regs regs1, regs2;
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_x86_state *state;
- struct ucall uc;
- int stage;
-
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
-
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_NESTED_STATE) ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- exit(KSFT_SKIP);
- }
-
- vcpu_enable_evmcs(vm, VCPU_ID);
-
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
-
- for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
-
- switch (get_ucall(vm, VCPU_ID, &uc)) {
- case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
- /* NOT REACHED */
- case UCALL_SYNC:
- break;
- case UCALL_DONE:
- goto part1_done;
- default:
- TEST_FAIL("Unknown ucall %lu", uc.cmd);
- }
-
- /* UCALL_SYNC is handled here. */
- TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
- uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
- stage, (ulong)uc.args[1]);
-
- state = vcpu_save_state(vm, VCPU_ID);
- memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- kvm_vm_release(vm);
-
- /* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_enable_evmcs(vm, VCPU_ID);
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
-
- memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
- TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
- "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
- (ulong) regs2.rdi, (ulong) regs2.rsi);
- }
-
-part1_done:
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Unexpected successful VMEnter with invalid eVMCS pointer!");
-
- kvm_vm_free(vm);
-}
diff --git a/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
new file mode 100644
index 000000000000..6c2e5e0ceb1f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/exit_on_emulation_failure_test.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ *
+ * Test for KVM_CAP_EXIT_ON_EMULATION_FAILURE.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "flds_emulation.h"
+
+#include "test_util.h"
+
+#define MMIO_GPA 0x700000000
+#define MMIO_GVA MMIO_GPA
+
+static void guest_code(void)
+{
+ /* Execute flds with an MMIO address to force KVM to emulate it. */
+ flds(MMIO_GVA);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+ virt_map(vm, MMIO_GVA, MMIO_GPA, 1);
+
+ vcpu_run(vcpu);
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, NULL), UCALL_DONE);
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
new file mode 100644
index 000000000000..f3c2239228b1
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/fix_hypercall_test.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <linux/stringify.h>
+#include <stdint.h>
+
+#include "kvm_test_harness.h"
+#include "apic.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+/* VMCALL and VMMCALL are both 3-byte opcodes. */
+#define HYPERCALL_INSN_SIZE 3
+
+static bool quirk_disabled;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ regs->rax = -EFAULT;
+ regs->rip += HYPERCALL_INSN_SIZE;
+}
+
+static const uint8_t vmx_vmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xc1 };
+static const uint8_t svm_vmmcall[HYPERCALL_INSN_SIZE] = { 0x0f, 0x01, 0xd9 };
+
+extern uint8_t hypercall_insn[HYPERCALL_INSN_SIZE];
+static uint64_t do_sched_yield(uint8_t apic_id)
+{
+ uint64_t ret;
+
+ asm volatile("hypercall_insn:\n\t"
+ ".byte 0xcc,0xcc,0xcc\n\t"
+ : "=a"(ret)
+ : "a"((uint64_t)KVM_HC_SCHED_YIELD), "b"((uint64_t)apic_id)
+ : "memory");
+
+ return ret;
+}
+
+static void guest_main(void)
+{
+ const uint8_t *native_hypercall_insn;
+ const uint8_t *other_hypercall_insn;
+ uint64_t ret;
+
+ if (host_cpu_is_intel) {
+ native_hypercall_insn = vmx_vmcall;
+ other_hypercall_insn = svm_vmmcall;
+ } else if (host_cpu_is_amd) {
+ native_hypercall_insn = svm_vmmcall;
+ other_hypercall_insn = vmx_vmcall;
+ } else {
+ GUEST_ASSERT(0);
+ /* unreachable */
+ return;
+ }
+
+ memcpy(hypercall_insn, other_hypercall_insn, HYPERCALL_INSN_SIZE);
+
+ ret = do_sched_yield(GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID)));
+
+ /*
+ * If the quirk is disabled, verify that guest_ud_handler() "returned"
+ * -EFAULT and that KVM did NOT patch the hypercall. If the quirk is
+ * enabled, verify that the hypercall succeeded and that KVM patched in
+ * the "right" hypercall.
+ */
+ if (quirk_disabled) {
+ GUEST_ASSERT(ret == (uint64_t)-EFAULT);
+ GUEST_ASSERT(!memcmp(other_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ } else {
+ GUEST_ASSERT(!ret);
+ GUEST_ASSERT(!memcmp(native_hypercall_insn, hypercall_insn,
+ HYPERCALL_INSN_SIZE));
+ }
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(fix_hypercall);
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ pr_info("%s: %016lx\n", (const char *)uc.args[2], uc.args[3]);
+ break;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unhandled ucall: %ld\nexit_reason: %u (%s)",
+ uc.cmd, run->exit_reason, exit_reason_str(run->exit_reason));
+ }
+}
+
+static void test_fix_hypercall(struct kvm_vcpu *vcpu, bool disable_quirk)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vcpu->vm, UD_VECTOR, guest_ud_handler);
+
+ if (disable_quirk)
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2,
+ KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ quirk_disabled = disable_quirk;
+ sync_global_to_guest(vm, quirk_disabled);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ enter_guest(vcpu);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, enable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, false);
+}
+
+KVM_ONE_VCPU_TEST(fix_hypercall, disable_quirk, guest_main)
+{
+ test_fix_hypercall(vcpu, true);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_DISABLE_QUIRKS2) & KVM_X86_QUIRK_FIX_HYPERCALL_INSN);
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/flds_emulation.h b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
new file mode 100644
index 000000000000..37b1a9f52864
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/flds_emulation.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_KVM_FLDS_EMULATION_H
+#define SELFTEST_KVM_FLDS_EMULATION_H
+
+#include "kvm_util.h"
+
+#define FLDS_MEM_EAX ".byte 0xd9, 0x00"
+
+/*
+ * flds is an instruction that the KVM instruction emulator is known not to
+ * support. This can be used in guest code along with a mechanism to force
+ * KVM to emulate the instruction (e.g. by providing an MMIO address) to
+ * exercise emulation failures.
+ */
+static inline void flds(uint64_t address)
+{
+ __asm__ __volatile__(FLDS_MEM_EAX :: "a"(address));
+}
+
+static inline void handle_flds_emulation_failure_exit(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+ uint8_t *insn_bytes;
+ uint64_t flags;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Unexpected suberror: %u",
+ run->emulation_failure.suberror);
+
+ flags = run->emulation_failure.flags;
+ TEST_ASSERT(run->emulation_failure.ndata >= 3 &&
+ flags & KVM_INTERNAL_ERROR_EMULATION_FLAG_INSTRUCTION_BYTES,
+ "run->emulation_failure is missing instruction bytes");
+
+ TEST_ASSERT(run->emulation_failure.insn_size >= 2,
+ "Expected a 2-byte opcode for 'flds', got %d bytes",
+ run->emulation_failure.insn_size);
+
+ insn_bytes = run->emulation_failure.insn_bytes;
+ TEST_ASSERT(insn_bytes[0] == 0xd9 && insn_bytes[1] == 0,
+ "Expected 'flds [eax]', opcode '0xd9 0x00', got opcode 0x%02x 0x%02x",
+ insn_bytes[0], insn_bytes[1]);
+
+ vcpu_regs_get(vcpu, &regs);
+ regs.rip += 2;
+ vcpu_regs_set(vcpu, &regs);
+}
+
+#endif /* !SELFTEST_KVM_FLDS_EMULATION_H */
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644
index 000000000000..d09b3cbcadc6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+int main(int argc, char *argv[])
+{
+ const struct kvm_msr_list *feature_list;
+ int i;
+
+ /*
+ * Skip the entire test if MSR_FEATURES isn't supported, other tests
+ * will cover the "regular" list of MSRs, the coverage here is purely
+ * opportunistic and not interesting on its own.
+ */
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_GET_MSR_FEATURES));
+
+ (void)kvm_get_msr_index_list();
+
+ feature_list = kvm_get_feature_msr_index_list();
+ for (i = 0; i < feature_list->nmsrs; i++)
+ kvm_get_feature_msr(feature_list->indices[i]);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
new file mode 100644
index 000000000000..df351ae17029
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hwcr_msr_test.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+void test_hwcr_bit(struct kvm_vcpu *vcpu, unsigned int bit)
+{
+ const uint64_t ignored = BIT_ULL(3) | BIT_ULL(6) | BIT_ULL(8);
+ const uint64_t valid = BIT_ULL(18) | BIT_ULL(24);
+ const uint64_t legal = ignored | valid;
+ uint64_t val = BIT_ULL(bit);
+ uint64_t actual;
+ int r;
+
+ r = _vcpu_set_msr(vcpu, MSR_K7_HWCR, val);
+ TEST_ASSERT(val & ~legal ? !r : r == 1,
+ "Expected KVM_SET_MSRS(MSR_K7_HWCR) = 0x%lx to %s",
+ val, val & ~legal ? "fail" : "succeed");
+
+ actual = vcpu_get_msr(vcpu, MSR_K7_HWCR);
+ TEST_ASSERT(actual == (val & valid),
+ "Bit %u: unexpected HWCR 0x%lx; expected 0x%lx",
+ bit, actual, (val & valid));
+
+ vcpu_set_msr(vcpu, MSR_K7_HWCR, 0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ unsigned int bit;
+
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ for (bit = 0; bit < BITS_PER_LONG; bit++)
+ test_hwcr_bit(vcpu, bit);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644
index 000000000000..e058bc676cd6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+struct ms_hyperv_tsc_page {
+ volatile u32 tsc_sequence;
+ u32 reserved1;
+ volatile u64 tsc_scale;
+ volatile s64 tsc_offset;
+} __packed;
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+ union {
+ u64 ll;
+ struct {
+ u32 low, high;
+ } l;
+ } rm, rn, rh, a0, b0;
+ u64 c;
+
+ a0.ll = a;
+ b0.ll = b;
+
+ rm.ll = (u64)a0.l.low * b0.l.high;
+ rn.ll = (u64)a0.l.high * b0.l.low;
+ rh.ll = (u64)a0.l.high * b0.l.high;
+
+ rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+ rh.l.high = (c >> 32) + rh.l.high;
+
+ return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+ GUEST_ASSERT(tsc_freq > 0);
+
+ /* For increased accuracy, take mean rdtsc() before and afrer rdmsr() */
+ r1 = rdtsc();
+ t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+ return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+ u64 r1, r2, t1, t2;
+
+ /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+ t1 = get_tscpage_ts(tsc_page);
+ r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+ /* 10 ms tolerance */
+ GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+ nop_loop();
+
+ t2 = get_tscpage_ts(tsc_page);
+ r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+ GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+ u64 tsc_scale, tsc_offset;
+
+ /* Set Guest OS id to enable Hyper-V emulation */
+ GUEST_SYNC(1);
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ GUEST_SYNC(2);
+
+ check_tsc_msr_rdtsc();
+
+ GUEST_SYNC(3);
+
+ /* Set up TSC page is disabled state, check that it's clean */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+ GUEST_SYNC(4);
+
+ /* Set up TSC page is enabled state */
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+ GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+ GUEST_SYNC(5);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ GUEST_SYNC(6);
+
+ tsc_offset = tsc_page->tsc_offset;
+ /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+ GUEST_SYNC(7);
+ /* Sanity check TSC page timestamp, it should be close to 0 */
+ GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+ GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+ nop_loop();
+
+ /*
+ * Enable Re-enlightenment and check that TSC page stays constant across
+ * KVM_SET_CLOCK.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+ tsc_offset = tsc_page->tsc_offset;
+ tsc_scale = tsc_page->tsc_scale;
+ GUEST_SYNC(8);
+ GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+ GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+ GUEST_SYNC(9);
+
+ check_tsc_msr_tsc_page(tsc_page);
+
+ /*
+ * Disable re-enlightenment and TSC page, check that KVM doesn't update
+ * it anymore.
+ */
+ wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+ wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+ wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+ memset(tsc_page, 0, sizeof(*tsc_page));
+
+ GUEST_SYNC(10);
+ GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+ GUEST_ASSERT(tsc_page->tsc_offset == 0);
+ GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+ GUEST_DONE();
+}
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vcpu *vcpu)
+{
+ u64 tsc_freq, r1, r2, t1, t2;
+ s64 delta_ns;
+
+ tsc_freq = vcpu_get_msr(vcpu, HV_X64_MSR_TSC_FREQUENCY);
+ TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+ /* For increased accuracy, take mean rdtsc() before and afrer ioctl */
+ r1 = rdtsc();
+ t1 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r1 = (r1 + rdtsc()) / 2;
+ nop_loop();
+ r2 = rdtsc();
+ t2 = vcpu_get_msr(vcpu, HV_X64_MSR_TIME_REF_COUNT);
+ r2 = (r2 + rdtsc()) / 2;
+
+ TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+ /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+ delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+ if (delta_ns < 0)
+ delta_ns = -delta_ns;
+
+ /* 1% tolerance */
+ TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+ "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+ (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ vm_vaddr_t tsc_page_gva;
+ int stage;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TIME));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_set_hv_cpuid(vcpu);
+
+ tsc_page_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, tsc_page_gva), 0x0, getpagesize());
+ TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+ "TSC page has to be page aligned");
+ vcpu_args_set(vcpu, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+ host_check_tsc_msr_rdtsc(vcpu);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ /* Keep in sync with guest_main() */
+ TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d",
+ stage);
+ goto out;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ /* Reset kvmclock triggering TSC page update */
+ if (stage == 7 || stage == 8 || stage == 10) {
+ struct kvm_clock_data clock = {0};
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+ }
+ }
+
+out:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
index 745b708c2d3b..5c27efbf405e 100644
--- a/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_cpuid.c
@@ -20,8 +20,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 0
-
static void guest_code(void)
{
}
@@ -45,32 +43,26 @@ static bool smt_possible(void)
return res;
}
-static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
- bool evmcs_enabled)
+static void test_hv_cpuid(const struct kvm_cpuid2 *hv_cpuid_entries,
+ bool evmcs_expected)
{
int i;
- int nent = 9;
+ int nent_expected = 10;
u32 test_val;
- if (evmcs_enabled)
- nent += 1; /* 0x4000000A */
-
- TEST_ASSERT(hv_cpuid_entries->nent == nent,
+ TEST_ASSERT(hv_cpuid_entries->nent == nent_expected,
"KVM_GET_SUPPORTED_HV_CPUID should return %d entries"
- " with evmcs=%d (returned %d)",
- nent, evmcs_enabled, hv_cpuid_entries->nent);
+ " (returned %d)",
+ nent_expected, hv_cpuid_entries->nent);
for (i = 0; i < hv_cpuid_entries->nent; i++) {
- struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
+ const struct kvm_cpuid_entry2 *entry = &hv_cpuid_entries->entries[i];
TEST_ASSERT((entry->function >= 0x40000000) &&
(entry->function <= 0x40000082),
"function %x is our of supported range",
entry->function);
- TEST_ASSERT(evmcs_enabled || (entry->function != 0x4000000A),
- "0x4000000A leaf should not be reported");
-
TEST_ASSERT(entry->index == 0,
".index field should be zero");
@@ -87,7 +79,7 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
TEST_ASSERT(entry->eax == test_val,
"Wrong max leaf report in 0x40000000.EAX: %x"
" (evmcs=%d)",
- entry->eax, evmcs_enabled
+ entry->eax, evmcs_expected
);
break;
case 0x40000004:
@@ -97,8 +89,20 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
"NoNonArchitecturalCoreSharing bit"
" doesn't reflect SMT setting");
break;
- }
+ case 0x4000000A:
+ TEST_ASSERT(entry->eax & (1UL << 19),
+ "Enlightened MSR-Bitmap should always be supported"
+ " 0x40000000.EAX: %x", entry->eax);
+ if (evmcs_expected)
+ TEST_ASSERT((entry->eax & 0xffff) == 0x101,
+ "Supported Enlightened VMCS version range is supposed to be 1:1"
+ " 0x40000000.EAX: %x", entry->eax);
+
+ break;
+ default:
+ break;
+ }
/*
* If needed for debug:
* fprintf(stdout,
@@ -107,84 +111,64 @@ static void test_hv_cpuid(struct kvm_cpuid2 *hv_cpuid_entries,
* entry->edx);
*/
}
-
}
-void test_hv_cpuid_e2big(struct kvm_vm *vm)
+void test_hv_cpuid_e2big(struct kvm_vm *vm, struct kvm_vcpu *vcpu)
{
static struct kvm_cpuid2 cpuid = {.nent = 0};
int ret;
- ret = _vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ if (vcpu)
+ ret = __vcpu_ioctl(vcpu, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
+ else
+ ret = __kvm_ioctl(vm->kvm_fd, KVM_GET_SUPPORTED_HV_CPUID, &cpuid);
TEST_ASSERT(ret == -1 && errno == E2BIG,
- "KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
- " it should have: %d %d", ret, errno);
+ "%s KVM_GET_SUPPORTED_HV_CPUID didn't fail with -E2BIG when"
+ " it should have: %d %d", !vcpu ? "KVM" : "vCPU", ret, errno);
}
-
-struct kvm_cpuid2 *kvm_get_supported_hv_cpuid(struct kvm_vm *vm)
+int main(int argc, char *argv[])
{
- int nent = 20; /* should be enough */
- static struct kvm_cpuid2 *cpuid;
-
- cpuid = malloc(sizeof(*cpuid) + nent * sizeof(struct kvm_cpuid_entry2));
-
- if (!cpuid) {
- perror("malloc");
- abort();
- }
-
- cpuid->nent = nent;
+ struct kvm_vm *vm;
+ const struct kvm_cpuid2 *hv_cpuid_entries;
+ struct kvm_vcpu *vcpu;
- vcpu_ioctl(vm, VCPU_ID, KVM_GET_SUPPORTED_HV_CPUID, cpuid);
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
- return cpuid;
-}
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ /* Test vCPU ioctl version */
+ test_hv_cpuid_e2big(vm, vcpu);
-int main(int argc, char *argv[])
-{
- struct kvm_vm *vm;
- int rv, stage;
- struct kvm_cpuid2 *hv_cpuid_entries;
- bool evmcs_enabled;
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, false);
+ free((void *)hv_cpuid_entries);
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- rv = kvm_check_cap(KVM_CAP_HYPERV_CPUID);
- if (!rv) {
- print_skip("KVM_CAP_HYPERV_CPUID not supported");
- exit(KSFT_SKIP);
+ if (!kvm_cpu_has(X86_FEATURE_VMX) ||
+ !kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
+ print_skip("Enlightened VMCS is unsupported");
+ goto do_sys;
+ }
+ vcpu_enable_evmcs(vcpu);
+ hv_cpuid_entries = vcpu_get_supported_hv_cpuid(vcpu);
+ test_hv_cpuid(hv_cpuid_entries, true);
+ free((void *)hv_cpuid_entries);
+
+do_sys:
+ /* Test system ioctl version */
+ if (!kvm_has_cap(KVM_CAP_SYS_HYPERV_CPUID)) {
+ print_skip("KVM_CAP_SYS_HYPERV_CPUID not supported");
+ goto out;
}
- for (stage = 0; stage < 3; stage++) {
- evmcs_enabled = false;
+ test_hv_cpuid_e2big(vm, NULL);
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- switch (stage) {
- case 0:
- test_hv_cpuid_e2big(vm);
- continue;
- case 1:
- break;
- case 2:
- if (!nested_vmx_supported() ||
- !kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS)) {
- print_skip("Enlightened VMCS is unsupported");
- continue;
- }
- vcpu_enable_evmcs(vm, VCPU_ID);
- evmcs_enabled = true;
- break;
- }
+ hv_cpuid_entries = kvm_get_supported_hv_cpuid();
+ test_hv_cpuid(hv_cpuid_entries, kvm_cpu_has(X86_FEATURE_VMX));
- hv_cpuid_entries = kvm_get_supported_hv_cpuid(vm);
- test_hv_cpuid(hv_cpuid_entries, evmcs_enabled);
- free(hv_cpuid_entries);
- kvm_vm_free(vm);
- }
+out:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
new file mode 100644
index 000000000000..4c7257ecd2a6
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_evmcs.c
@@ -0,0 +1,310 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2018, Red Hat, Inc.
+ *
+ * Tests for Enlightened VMCS, including nested guest state.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+
+#include "hyperv.h"
+#include "vmx.h"
+
+static int ud_count;
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ ud_count++;
+ regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
+
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+/* Exit to L1 from L2 with RDMSR instruction */
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(7);
+
+ GUEST_SYNC(8);
+
+ /* Forced exit to L1 upon restore */
+ GUEST_SYNC(9);
+
+ vmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES | HV_FLUSH_ALL_PROCESSORS,
+ &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmcall();
+}
+
+void guest_code(struct vmx_pages *vmx_pages, struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t hv_hcall_page_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, hv_hcall_page_gpa);
+
+ x2apic_enable();
+
+ GUEST_SYNC(1);
+ GUEST_SYNC(2);
+
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+ evmcs_enable();
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_SYNC(3);
+ GUEST_ASSERT(load_evmcs(hv_pages));
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ GUEST_SYNC(4);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_SYNC(5);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+ current_evmcs->revision_id = -1u;
+ GUEST_ASSERT(vmlaunch());
+ current_evmcs->revision_id = EVMCS_VERSION;
+ GUEST_SYNC(6);
+
+ vmwrite(PIN_BASED_VM_EXEC_CONTROL, vmreadz(PIN_BASED_VM_EXEC_CONTROL) |
+ PIN_BASED_NMI_EXITING);
+
+ /* L2 TLB flush setup */
+ current_evmcs->partition_assist_page = hv_pages->partition_assist_gpa;
+ current_evmcs->hv_enlightenments_control.nested_flush_hypercall = 1;
+ current_evmcs->hv_vm_id = 1;
+ current_evmcs->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), NMI_VECTOR);
+ GUEST_ASSERT(vmptrstz() == hv_pages->enlightened_vmcs_gpa);
+
+ /*
+ * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+ * up-to-date (RIP points where it should and not at the beginning
+ * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+ */
+ GUEST_ASSERT(!vmresume());
+
+ GUEST_SYNC(10);
+
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, vmreadz(CPU_BASED_VM_EXEC_CONTROL) |
+ CPU_BASED_USE_MSR_BITMAPS);
+ __set_bit(MSR_FS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ current_evmcs->hv_enlightenments_control.msr_bitmap = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(MSR_GS_BASE & 0x1fff, vmx_pages->msr + 0x400);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ current_evmcs->hv_clean_fields |= HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ /* Make sure we don't see EXIT_REASON_MSR_READ here so eMSR bitmap works */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ current_evmcs->guest_rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ current_evmcs->hv_clean_fields &= ~HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_MSR_READ);
+ current_evmcs->guest_rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == HV_VMX_SYNTHETIC_EXIT_REASON_TRAP_AFTER_FLUSH);
+
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+ GUEST_SYNC(11);
+
+ /* Try enlightened vmptrld with an incorrect GPA */
+ evmcs_vmptrld(0xdeadbeef, hv_pages->enlightened_vmcs);
+ GUEST_ASSERT(vmlaunch());
+ GUEST_ASSERT(ud_count == 1);
+ GUEST_DONE();
+}
+
+void inject_nmi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.nmi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
+
+ vcpu_events_set(vcpu, &events);
+}
+
+static struct kvm_vcpu *save_restore_vm(struct kvm_vm *vm,
+ struct kvm_vcpu *vcpu)
+{
+ struct kvm_regs regs1, regs2;
+ struct kvm_x86_state *state;
+
+ state = vcpu_save_state(vcpu);
+ memset(&regs1, 0, sizeof(regs1));
+ vcpu_regs_get(vcpu, &regs1);
+
+ kvm_vm_release(vm);
+
+ /* Restore state in a new VM. */
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
+
+ memset(&regs2, 0, sizeof(regs2));
+ vcpu_regs_get(vcpu, &regs2);
+ TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
+ "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
+ (ulong) regs2.rdi, (ulong) regs2.rsi);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_enable_evmcs(vcpu);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+ vcpu_args_set(vcpu, 3, vmx_pages_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+
+ pr_info("Running L1 which uses EVMCS to run L2\n");
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ vcpu = save_restore_vm(vm, vcpu);
+
+ /* Force immediate L2->L1 exit before resuming */
+ if (stage == 8) {
+ pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+ inject_nmi(vcpu);
+ }
+
+ /*
+ * Do KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE for a freshly
+ * restored VM (before the first KVM_RUN) to check that
+ * KVM_STATE_NESTED_EVMCS is not lost.
+ */
+ if (stage == 9) {
+ pr_info("Trying extra KVM_GET_NESTED_STATE/KVM_SET_NESTED_STATE cycle\n");
+ vcpu = save_restore_vm(vm, vcpu);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
new file mode 100644
index 000000000000..949e08e98f31
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_extended_hypercalls.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test Hyper-V extended hypercall, HV_EXT_CALL_QUERY_CAPABILITIES (0x8001),
+ * exit to userspace and receive result in guest.
+ *
+ * Negative tests are present in hyperv_features.c
+ *
+ * Copyright 2022 Google LLC
+ * Author: Vipin Sharma <vipinsh@google.com>
+ */
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/* Any value is fine */
+#define EXT_CAPABILITIES 0xbull
+
+static void guest_code(vm_paddr_t in_pg_gpa, vm_paddr_t out_pg_gpa,
+ vm_vaddr_t out_pg_gva)
+{
+ uint64_t *output_gva;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, in_pg_gpa);
+
+ output_gva = (uint64_t *)out_pg_gva;
+
+ hyperv_hypercall(HV_EXT_CALL_QUERY_CAPABILITIES, in_pg_gpa, out_pg_gpa);
+
+ /* TLFS states output will be a uint64_t value */
+ GUEST_ASSERT_EQ(*output_gva, EXT_CAPABILITIES);
+
+ GUEST_DONE();
+}
+
+int main(void)
+{
+ vm_vaddr_t hcall_out_page;
+ vm_vaddr_t hcall_in_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ uint64_t *outval;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_CPUID));
+
+ /* Verify if extended hypercalls are supported */
+ if (!kvm_cpuid_has(kvm_get_supported_hv_cpuid(),
+ HV_ENABLE_EXTENDED_HYPERCALLS)) {
+ print_skip("Extended calls not supported by the kernel");
+ exit(KSFT_SKIP);
+ }
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+ vcpu_set_hv_cpuid(vcpu);
+
+ /* Hypercall input */
+ hcall_in_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_in_page), 0x0, vm->page_size);
+
+ /* Hypercall output */
+ hcall_out_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_out_page), 0x0, vm->page_size);
+
+ vcpu_args_set(vcpu, 3, addr_gva2gpa(vm, hcall_in_page),
+ addr_gva2gpa(vm, hcall_out_page), hcall_out_page);
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_HYPERV,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ outval = addr_gpa2hva(vm, run->hyperv.u.hcall.params[1]);
+ *outval = EXT_CAPABILITIES;
+ run->hyperv.u.hcall.result = HV_STATUS_SUCCESS;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_features.c b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
new file mode 100644
index 000000000000..b923a285e96f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_features.c
@@ -0,0 +1,701 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V features enablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+
+/*
+ * HYPERV_CPUID_ENLIGHTMENT_INFO.EBX is not a 'feature' CPUID leaf
+ * but to activate the feature it is sufficient to set it to a non-zero
+ * value. Use BIT(0) for that.
+ */
+#define HV_PV_SPINLOCKS_TEST \
+ KVM_X86_CPU_FEATURE(HYPERV_CPUID_ENLIGHTMENT_INFO, 0, EBX, 0)
+
+struct msr_data {
+ uint32_t idx;
+ bool fault_expected;
+ bool write;
+ u64 write_val;
+};
+
+struct hcall_data {
+ uint64_t control;
+ uint64_t expect;
+ bool ud_expected;
+};
+
+static bool is_write_only_msr(uint32_t msr)
+{
+ return msr == HV_X64_MSR_EOI;
+}
+
+static void guest_msr(struct msr_data *msr)
+{
+ uint8_t vector = 0;
+ uint64_t msr_val = 0;
+
+ GUEST_ASSERT(msr->idx);
+
+ if (msr->write)
+ vector = wrmsr_safe(msr->idx, msr->write_val);
+
+ if (!vector && (!msr->write || !is_write_only_msr(msr->idx)))
+ vector = rdmsr_safe(msr->idx, &msr_val);
+
+ if (msr->fault_expected)
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+ else
+ __GUEST_ASSERT(!vector,
+ "Expected success on %sMSR(0x%x), got vector '0x%x'",
+ msr->write ? "WR" : "RD", msr->idx, vector);
+
+ if (vector || is_write_only_msr(msr->idx))
+ goto done;
+
+ if (msr->write)
+ __GUEST_ASSERT(!vector,
+ "WRMSR(0x%x) to '0x%lx', RDMSR read '0x%lx'",
+ msr->idx, msr->write_val, msr_val);
+
+ /* Invariant TSC bit appears when TSC invariant control MSR is written to */
+ if (msr->idx == HV_X64_MSR_TSC_INVARIANT_CONTROL) {
+ if (!this_cpu_has(HV_ACCESS_TSC_INVARIANT))
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC));
+ else
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_INVTSC) ==
+ !!(msr_val & HV_INVARIANT_TSC_EXPOSED));
+ }
+
+done:
+ GUEST_DONE();
+}
+
+static void guest_hcall(vm_vaddr_t pgs_gpa, struct hcall_data *hcall)
+{
+ u64 res, input, output;
+ uint8_t vector;
+
+ GUEST_ASSERT_NE(hcall->control, 0);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+
+ if (!(hcall->control & HV_HYPERCALL_FAST_BIT)) {
+ input = pgs_gpa;
+ output = pgs_gpa + 4096;
+ } else {
+ input = output = 0;
+ }
+
+ vector = __hyperv_hypercall(hcall->control, input, output, &res);
+ if (hcall->ud_expected) {
+ __GUEST_ASSERT(vector == UD_VECTOR,
+ "Expected #UD for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ } else {
+ __GUEST_ASSERT(!vector,
+ "Expected no exception for control '%lu', got vector '0x%x'",
+ hcall->control, vector);
+ GUEST_ASSERT_EQ(res, hcall->expect);
+ }
+
+ GUEST_DONE();
+}
+
+static void vcpu_reset_hv_cpuid(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Enable all supported Hyper-V features, then clear the leafs holding
+ * the features that will be tested one by one.
+ */
+ vcpu_set_hv_cpuid(vcpu);
+
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_FEATURES);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_ENLIGHTMENT_INFO);
+ vcpu_clear_cpuid_entry(vcpu, HYPERV_CPUID_SYNDBG_PLATFORM_CAPABILITIES);
+}
+
+static void guest_test_msrs_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t msr_gva;
+ struct msr_data *msr;
+ bool has_invtsc = kvm_cpu_has(X86_FEATURE_INVTSC);
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_msr);
+
+ msr_gva = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, msr_gva), 0x0, getpagesize());
+ msr = addr_gva2hva(vm, msr_gva);
+
+ vcpu_args_set(vcpu, 1, msr_gva);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* TODO: Make this entire test easier to maintain. */
+ if (stage >= 21)
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_SYNIC2, 0);
+
+ switch (stage) {
+ case 0:
+ /*
+ * Only available when Hyper-V identification is set
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 1:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ /*
+ * HV_X64_MSR_GUEST_OS_ID has to be written first to make
+ * HV_X64_MSR_HYPERCALL available.
+ */
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = true;
+ msr->write_val = HYPERV_LINUX_OS_ID;
+ msr->fault_expected = false;
+ break;
+ case 3:
+ msr->idx = HV_X64_MSR_GUEST_OS_ID;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 4:
+ msr->idx = HV_X64_MSR_HYPERCALL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+
+ case 5:
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_RUNTIME_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 7:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_RUNTIME;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 8:
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_TIME_REF_COUNT_AVAILABLE);
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 10:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TIME_REF_COUNT;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 11:
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 12:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_VP_INDEX_AVAILABLE);
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 13:
+ /* Read only */
+ msr->idx = HV_X64_MSR_VP_INDEX;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 14:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 15:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_RESET_AVAILABLE);
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 16:
+ msr->idx = HV_X64_MSR_RESET;
+ msr->write = true;
+ /*
+ * TODO: the test only writes '0' to HV_X64_MSR_RESET
+ * at the moment, writing some other value there will
+ * trigger real vCPU reset and the code is not prepared
+ * to handle it yet.
+ */
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 17:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_REFERENCE_TSC_AVAILABLE);
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 19:
+ msr->idx = HV_X64_MSR_REFERENCE_TSC;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 20:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 21:
+ /*
+ * Remains unavailable even with KVM_CAP_HYPERV_SYNIC2
+ * capability enabled and guest visible CPUID bit unset.
+ */
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 22:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNIC_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 23:
+ msr->idx = HV_X64_MSR_EOM;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 24:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 25:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_SYNTIMER_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 26:
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+ case 27:
+ /* Direct mode test */
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = true;
+ break;
+ case 28:
+ vcpu_set_cpuid_feature(vcpu, HV_STIMER_DIRECT_MODE_AVAILABLE);
+ msr->idx = HV_X64_MSR_STIMER0_CONFIG;
+ msr->write = true;
+ msr->write_val = 1 << 12;
+ msr->fault_expected = false;
+ break;
+
+ case 29:
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 30:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_APIC_ACCESS_AVAILABLE);
+ msr->idx = HV_X64_MSR_EOI;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 31:
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 32:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_FREQUENCY_MSRS);
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 33:
+ /* Read only */
+ msr->idx = HV_X64_MSR_TSC_FREQUENCY;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 34:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 35:
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_REENLIGHTENMENT);
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 36:
+ msr->idx = HV_X64_MSR_REENLIGHTENMENT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+ case 37:
+ /* Can only write '0' */
+ msr->idx = HV_X64_MSR_TSC_EMULATION_STATUS;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = true;
+ break;
+
+ case 38:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 39:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE);
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 40:
+ msr->idx = HV_X64_MSR_CRASH_P0;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ case 41:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 42:
+ vcpu_set_cpuid_feature(vcpu, HV_FEATURE_DEBUG_MSRS_AVAILABLE);
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 43:
+ msr->idx = HV_X64_MSR_SYNDBG_STATUS;
+ msr->write = true;
+ msr->write_val = 0;
+ msr->fault_expected = false;
+ break;
+
+ case 44:
+ /* MSR is not available when CPUID feature bit is unset */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = true;
+ break;
+ case 45:
+ /* MSR is vailable when CPUID feature bit is set */
+ if (!has_invtsc)
+ goto next_stage;
+ vcpu_set_cpuid_feature(vcpu, HV_ACCESS_TSC_INVARIANT);
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = false;
+ msr->fault_expected = false;
+ break;
+ case 46:
+ /* Writing bits other than 0 is forbidden */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 0xdeadbeef;
+ msr->fault_expected = true;
+ break;
+ case 47:
+ /* Setting bit 0 enables the feature */
+ if (!has_invtsc)
+ goto next_stage;
+ msr->idx = HV_X64_MSR_TSC_INVARIANT_CONTROL;
+ msr->write = true;
+ msr->write_val = 1;
+ msr->fault_expected = false;
+ break;
+
+ default:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing msr: 0x%x for %s\n", stage,
+ msr->idx, msr->write ? "write" : "read");
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+next_stage:
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+static void guest_test_hcalls_access(void)
+{
+ struct kvm_cpuid2 *prev_cpuid = NULL;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage = 0;
+ vm_vaddr_t hcall_page, hcall_params;
+ struct hcall_data *hcall;
+
+ while (true) {
+ vm = vm_create_with_one_vcpu(&vcpu, guest_hcall);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ hcall_params = vm_vaddr_alloc_page(vm);
+ memset(addr_gva2hva(vm, hcall_params), 0x0, getpagesize());
+ hcall = addr_gva2hva(vm, hcall_params);
+
+ vcpu_args_set(vcpu, 2, addr_gva2gpa(vm, hcall_page), hcall_params);
+ vcpu_enable_cap(vcpu, KVM_CAP_HYPERV_ENFORCE_CPUID, 1);
+
+ if (!prev_cpuid) {
+ vcpu_reset_hv_cpuid(vcpu);
+
+ prev_cpuid = allocate_kvm_cpuid2(vcpu->cpuid->nent);
+ } else {
+ vcpu_init_cpuid(vcpu, prev_cpuid);
+ }
+
+ switch (stage) {
+ case 0:
+ vcpu_set_cpuid_feature(vcpu, HV_MSR_HYPERCALL_AVAILABLE);
+ hcall->control = 0xbeef;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+
+ case 1:
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 2:
+ vcpu_set_cpuid_feature(vcpu, HV_POST_MESSAGES);
+ hcall->control = HVCALL_POST_MESSAGE;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 3:
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 4:
+ vcpu_set_cpuid_feature(vcpu, HV_SIGNAL_EVENTS);
+ hcall->control = HVCALL_SIGNAL_EVENT;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+
+ case 5:
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_CODE;
+ break;
+ case 6:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_SYNDBG_CAP_ALLOW_KERNEL_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 7:
+ vcpu_set_cpuid_feature(vcpu, HV_DEBUGGING);
+ hcall->control = HVCALL_RESET_DEBUG_SESSION;
+ hcall->expect = HV_STATUS_OPERATION_DENIED;
+ break;
+
+ case 8:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 9:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_REMOTE_TLB_FLUSH_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 10:
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 11:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_EX_PROCESSOR_MASKS_RECOMMENDED);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 12:
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 13:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_CLUSTER_IPI_RECOMMENDED);
+ hcall->control = HVCALL_SEND_IPI;
+ hcall->expect = HV_STATUS_INVALID_HYPERCALL_INPUT;
+ break;
+ case 14:
+ /* Nothing in 'sparse banks' -> success */
+ hcall->control = HVCALL_SEND_IPI_EX;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+
+ case 15:
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 16:
+ vcpu_set_cpuid_feature(vcpu, HV_PV_SPINLOCKS_TEST);
+ hcall->control = HVCALL_NOTIFY_LONG_SPIN_WAIT;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 17:
+ /* XMM fast hypercall */
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = true;
+ break;
+ case 18:
+ vcpu_set_cpuid_feature(vcpu, HV_X64_HYPERCALL_XMM_INPUT_AVAILABLE);
+ hcall->control = HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE | HV_HYPERCALL_FAST_BIT;
+ hcall->ud_expected = false;
+ hcall->expect = HV_STATUS_SUCCESS;
+ break;
+ case 19:
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES;
+ hcall->expect = HV_STATUS_ACCESS_DENIED;
+ break;
+ case 20:
+ vcpu_set_cpuid_feature(vcpu, HV_ENABLE_EXTENDED_HYPERCALLS);
+ hcall->control = HV_EXT_CALL_QUERY_CAPABILITIES | HV_HYPERCALL_FAST_BIT;
+ hcall->expect = HV_STATUS_INVALID_PARAMETER;
+ break;
+ case 21:
+ kvm_vm_free(vm);
+ return;
+ }
+
+ vcpu_set_cpuid(vcpu);
+
+ memcpy(prev_cpuid, vcpu->cpuid, kvm_cpuid2_size(vcpu->cpuid->nent));
+
+ pr_debug("Stage %d: testing hcall: 0x%lx\n", stage, hcall->control);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unhandled ucall: %ld", uc.cmd);
+ return;
+ }
+
+ stage++;
+ kvm_vm_free(vm);
+ }
+}
+
+int main(void)
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_ENFORCE_CPUID));
+
+ pr_info("Testing access to Hyper-V specific MSRs\n");
+ guest_test_msrs_access();
+
+ pr_info("Testing access to Hyper-V hypercalls\n");
+ guest_test_hcalls_access();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
new file mode 100644
index 000000000000..f1617762c22f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_ipi.c
@@ -0,0 +1,313 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvCallSendSyntheticClusterIpi{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define RECEIVER_VCPU_ID_1 2
+#define RECEIVER_VCPU_ID_2 65
+
+#define IPI_VECTOR 0xfe
+
+static volatile uint64_t ipis_rcvd[RECEIVER_VCPU_ID_2 + 1];
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[2];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+/* HvCallSendSyntheticClusterIpi hypercall */
+struct hv_send_ipi {
+ u32 vector;
+ u32 reserved;
+ u64 cpu_mask;
+};
+
+/* HvCallSendSyntheticClusterIpiEx hypercall */
+struct hv_send_ipi_ex {
+ u32 vector;
+ u32 reserved;
+ struct hv_vpset vp_set;
+};
+
+static inline void hv_init(vm_vaddr_t pgs_gpa)
+{
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+}
+
+static void receiver_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ u32 vcpu_id;
+
+ x2apic_enable();
+ hv_init(pgs_gpa);
+
+ vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ /* Signal sender vCPU we're ready */
+ ipis_rcvd[vcpu_id] = (u64)-1;
+
+ for (;;)
+ asm volatile("sti; hlt; cli");
+}
+
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+
+ ipis_rcvd[vcpu_id]++;
+ wrmsr(HV_X64_MSR_EOI, 1);
+}
+
+static inline void nop_loop(void)
+{
+ int i;
+
+ for (i = 0; i < 100000000; i++)
+ asm volatile("nop");
+}
+
+static void sender_guest_code(void *hcall_page, vm_vaddr_t pgs_gpa)
+{
+ struct hv_send_ipi *ipi = (struct hv_send_ipi *)hcall_page;
+ struct hv_send_ipi_ex *ipi_ex = (struct hv_send_ipi_ex *)hcall_page;
+ int stage = 1, ipis_expected[2] = {0};
+
+ hv_init(pgs_gpa);
+ GUEST_SYNC(stage++);
+
+ /* Wait for receiver vCPUs to come up */
+ while (!ipis_rcvd[RECEIVER_VCPU_ID_1] || !ipis_rcvd[RECEIVER_VCPU_ID_2])
+ nop_loop();
+ ipis_rcvd[RECEIVER_VCPU_ID_1] = ipis_rcvd[RECEIVER_VCPU_ID_2] = 0;
+
+ /* 'Slow' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ ipi->vector = IPI_VECTOR;
+ ipi->cpu_mask = 1 << RECEIVER_VCPU_ID_1;
+ hyperv_hypercall(HVCALL_SEND_IPI, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'Fast' HvCallSendSyntheticClusterIpi to RECEIVER_VCPU_ID_1 */
+ hyperv_hypercall(HVCALL_SEND_IPI | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, 1 << RECEIVER_VCPU_ID_1);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 0;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_1 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to RECEIVER_VCPU_ID_2 */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 1);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1,2} */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ ipi_ex->vp_set.valid_bank_mask = 1 << 1 | 1;
+ ipi_ex->vp_set.bank_contents[0] = BIT(RECEIVER_VCPU_ID_1);
+ ipi_ex->vp_set.bank_contents[1] = BIT(RECEIVER_VCPU_ID_2 - 64);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /* 'XMM Fast' HvCallSendSyntheticClusterIpiEx to both RECEIVER_VCPU_ID_{1, 2} */
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ IPI_VECTOR, HV_GENERIC_SET_SPARSE_4K);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ /* 'Slow' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL */
+ memset(hcall_page, 0, 4096);
+ ipi_ex->vector = IPI_VECTOR;
+ ipi_ex->vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_SEND_IPI_EX, pgs_gpa, pgs_gpa + 4096);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+ /*
+ * 'XMM Fast' HvCallSendSyntheticClusterIpiEx to HV_GENERIC_SET_ALL.
+ */
+ ipi_ex->vp_set.valid_bank_mask = 0;
+ hyperv_write_xmm_input(&ipi_ex->vp_set.valid_bank_mask, 2);
+ hyperv_hypercall(HVCALL_SEND_IPI_EX | HV_HYPERCALL_FAST_BIT,
+ IPI_VECTOR, HV_GENERIC_SET_ALL);
+ nop_loop();
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_1] == ++ipis_expected[0]);
+ GUEST_ASSERT(ipis_rcvd[RECEIVER_VCPU_ID_2] == ++ipis_expected[1]);
+ GUEST_SYNC(stage++);
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ int old, r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+
+ TEST_FAIL("vCPU %u exited unexpectedly", vcpu->id);
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ vm_vaddr_t hcall_page;
+ pthread_t threads[2];
+ int stage = 1, r;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_SEND_IPI));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Hypercall input/output */
+ hcall_page = vm_vaddr_alloc_pages(vm, 2);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, 2 * getpagesize());
+
+ vm_init_descriptor_tables(vm);
+
+ vcpu[1] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_1, receiver_code);
+ vcpu_init_descriptor_tables(vcpu[1]);
+ vcpu_args_set(vcpu[1], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, RECEIVER_VCPU_ID_2, receiver_code);
+ vcpu_init_descriptor_tables(vcpu[2]);
+ vcpu_args_set(vcpu[2], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, RECEIVER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ vcpu_args_set(vcpu[0], 2, hcall_page, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", r);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create failed errno=%d", errno);
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_DONE:
+ goto done;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return r;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
new file mode 100644
index 000000000000..c9b18707edc0
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_svm_test.c
@@ -0,0 +1,200 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Tests for Hyper-V extensions to SVM.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <linux/bitmap.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "hyperv.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/* Exit to L1 from L2 with RDMSR instruction */
+static inline void rdmsr_from_l2(uint32_t msr)
+{
+ /* Currently, L1 doesn't preserve GPRs during vmexits. */
+ __asm__ __volatile__ ("rdmsr" : : "c"(msr) :
+ "rax", "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+}
+
+void l2_guest_code(void)
+{
+ u64 unused;
+
+ GUEST_SYNC(3);
+ /* Exit to L1 */
+ vmmcall();
+
+ /* MSR-Bitmap tests */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_FS_BASE); /* intercepted */
+ rdmsr_from_l2(MSR_GS_BASE); /* not intercepted */
+ vmmcall();
+ rdmsr_from_l2(MSR_GS_BASE); /* intercepted */
+
+ GUEST_SYNC(5);
+
+ /* L2 TLB flush tests */
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ rdmsr_from_l2(MSR_FS_BASE);
+ /*
+ * Note: hypercall status (RAX) is not preserved correctly by L1 after
+ * synthetic vmexit, use unchecked version.
+ */
+ __hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS, &unused);
+
+ /* Done, exit to L1 and never come back. */
+ vmmcall();
+}
+
+static void __attribute__((__flatten__)) guest_code(struct svm_test_data *svm,
+ struct hyperv_test_pages *hv_pages,
+ vm_vaddr_t pgs_gpa)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+ struct hv_vmcb_enlightenments *hve = &vmcb->control.hv_enlightenments;
+
+ GUEST_SYNC(1);
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, pgs_gpa);
+ enable_vp_assist(hv_pages->vp_assist_gpa, hv_pages->vp_assist);
+
+ GUEST_ASSERT(svm->vmcb_gpa);
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* L2 TLB flush setup */
+ hve->partition_assist_page = hv_pages->partition_assist_gpa;
+ hve->hv_enlightenments_control.nested_flush_hypercall = 1;
+ hve->hv_vm_id = 1;
+ hve->hv_vp_id = 1;
+ current_vp_assist->nested_control.features.directhypercall = 1;
+ *(u32 *)(hv_pages->partition_assist) = 0;
+
+ GUEST_SYNC(2);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(4);
+ vmcb->save.rip += 3;
+
+ /* Intercept RDMSR 0xc0000100 */
+ vmcb->control.intercept |= 1ULL << INTERCEPT_MSR_PROT;
+ __set_bit(2 * (MSR_FS_BASE & 0x1fff), svm->msr + 0x800);
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Enable enlightened MSR bitmap */
+ hve->hv_enlightenments_control.msr_bitmap = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+ /* Intercept RDMSR 0xc0000101 without telling KVM about it */
+ __set_bit(2 * (MSR_GS_BASE & 0x1fff), svm->msr + 0x800);
+ /* Make sure HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP is set */
+ vmcb->control.clean |= HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ /* Make sure we don't see SVM_EXIT_MSR here so eMSR bitmap works */
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ vmcb->save.rip += 3; /* vmcall */
+
+ /* Now tell KVM we've changed MSR-Bitmap */
+ vmcb->control.clean &= ~HV_VMCB_NESTED_ENLIGHTENMENTS;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+
+
+ /*
+ * L2 TLB flush test. First VMCALL should be handled directly by L0,
+ * no VMCALL exit expected.
+ */
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_MSR);
+ vmcb->save.rip += 2; /* rdmsr */
+ /* Enable synthetic vmexit */
+ *(u32 *)(hv_pages->partition_assist) = 1;
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == HV_SVM_EXITCODE_ENL);
+ GUEST_ASSERT(vmcb->control.exit_info_1 == HV_SVM_ENL_EXITCODE_TRAP_AFTER_FLUSH);
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_SYNC(6);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_gva = 0, hv_pages_gva = 0;
+ vm_vaddr_t hcall_page;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int stage;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_DIRECT_TLBFLUSH));
+
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+ vcpu_alloc_svm(vm, &nested_gva);
+ vcpu_alloc_hyperv_test_pages(vm, &hv_pages_gva);
+
+ hcall_page = vm_vaddr_alloc_pages(vm, 1);
+ memset(addr_gva2hva(vm, hcall_page), 0x0, getpagesize());
+
+ vcpu_args_set(vcpu, 3, nested_gva, hv_pages_gva, addr_gva2gpa(vm, hcall_page));
+ vcpu_set_msr(vcpu, HV_X64_MSR_VP_INDEX, vcpu->id);
+
+ for (stage = 1;; stage++) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ /* UCALL_SYNC is handled here. */
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
+ stage, (ulong)uc.args[1]);
+
+ }
+
+done:
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
new file mode 100644
index 000000000000..05b56095cf76
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/hyperv_tlb_flush.c
@@ -0,0 +1,682 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hyper-V HvFlushVirtualAddress{List,Space}{,Ex} tests
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <asm/barrier.h>
+#include <pthread.h>
+#include <inttypes.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "hyperv.h"
+#include "test_util.h"
+#include "vmx.h"
+
+#define WORKER_VCPU_ID_1 2
+#define WORKER_VCPU_ID_2 65
+
+#define NTRY 100
+#define NTEST_PAGES 2
+
+struct hv_vpset {
+ u64 format;
+ u64 valid_bank_mask;
+ u64 bank_contents[];
+};
+
+enum HV_GENERIC_SET_FORMAT {
+ HV_GENERIC_SET_SPARSE_4K,
+ HV_GENERIC_SET_ALL,
+};
+
+#define HV_FLUSH_ALL_PROCESSORS BIT(0)
+#define HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES BIT(1)
+#define HV_FLUSH_NON_GLOBAL_MAPPINGS_ONLY BIT(2)
+#define HV_FLUSH_USE_EXTENDED_RANGE_FORMAT BIT(3)
+
+/* HvFlushVirtualAddressSpace, HvFlushVirtualAddressList hypercalls */
+struct hv_tlb_flush {
+ u64 address_space;
+ u64 flags;
+ u64 processor_mask;
+ u64 gva_list[];
+} __packed;
+
+/* HvFlushVirtualAddressSpaceEx, HvFlushVirtualAddressListEx hypercalls */
+struct hv_tlb_flush_ex {
+ u64 address_space;
+ u64 flags;
+ struct hv_vpset hv_vp_set;
+ u64 gva_list[];
+} __packed;
+
+/*
+ * Pass the following info to 'workers' and 'sender'
+ * - Hypercall page's GVA
+ * - Hypercall page's GPA
+ * - Test pages GVA
+ * - GVAs of the test pages' PTEs
+ */
+struct test_data {
+ vm_vaddr_t hcall_gva;
+ vm_paddr_t hcall_gpa;
+ vm_vaddr_t test_pages;
+ vm_vaddr_t test_pages_pte[NTEST_PAGES];
+};
+
+/* 'Worker' vCPU code checking the contents of the test page */
+static void worker_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ u32 vcpu_id = rdmsr(HV_X64_MSR_VP_INDEX);
+ void *exp_page = (void *)data->test_pages + PAGE_SIZE * NTEST_PAGES;
+ u64 *this_cpu = (u64 *)(exp_page + vcpu_id * sizeof(u64));
+ u64 expected, val;
+
+ x2apic_enable();
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+
+ for (;;) {
+ cpu_relax();
+
+ expected = READ_ONCE(*this_cpu);
+
+ /*
+ * Make sure the value in the test page is read after reading
+ * the expectation for the first time. Pairs with wmb() in
+ * prepare_to_test().
+ */
+ rmb();
+
+ val = READ_ONCE(*(u64 *)data->test_pages);
+
+ /*
+ * Make sure the value in the test page is read after before
+ * reading the expectation for the second time. Pairs with wmb()
+ * post_test().
+ */
+ rmb();
+
+ /*
+ * '0' indicates the sender is between iterations, wait until
+ * the sender is ready for this vCPU to start checking again.
+ */
+ if (!expected)
+ continue;
+
+ /*
+ * Re-read the per-vCPU byte to ensure the sender didn't move
+ * onto a new iteration.
+ */
+ if (expected != READ_ONCE(*this_cpu))
+ continue;
+
+ GUEST_ASSERT(val == expected);
+ }
+}
+
+/*
+ * Write per-CPU info indicating what each 'worker' CPU is supposed to see in
+ * test page. '0' means don't check.
+ */
+static void set_expected_val(void *addr, u64 val, int vcpu_id)
+{
+ void *exp_page = addr + PAGE_SIZE * NTEST_PAGES;
+
+ *(u64 *)(exp_page + vcpu_id * sizeof(u64)) = val;
+}
+
+/*
+ * Update PTEs swapping two test pages.
+ * TODO: use swap()/xchg() when these are provided.
+ */
+static void swap_two_test_pages(vm_paddr_t pte_gva1, vm_paddr_t pte_gva2)
+{
+ uint64_t tmp = *(uint64_t *)pte_gva1;
+
+ *(uint64_t *)pte_gva1 = *(uint64_t *)pte_gva2;
+ *(uint64_t *)pte_gva2 = tmp;
+}
+
+/*
+ * TODO: replace the silly NOP loop with a proper udelay() implementation.
+ */
+static inline void do_delay(void)
+{
+ int i;
+
+ for (i = 0; i < 1000000; i++)
+ asm volatile("nop");
+}
+
+/*
+ * Prepare to test: 'disable' workers by setting the expectation to '0',
+ * clear hypercall input page and then swap two test pages.
+ */
+static inline void prepare_to_test(struct test_data *data)
+{
+ /* Clear hypercall input page */
+ memset((void *)data->hcall_gva, 0, PAGE_SIZE);
+
+ /* 'Disable' workers */
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, 0x0, WORKER_VCPU_ID_2);
+
+ /* Make sure workers are 'disabled' before we swap PTEs. */
+ wmb();
+
+ /* Make sure workers have enough time to notice */
+ do_delay();
+
+ /* Swap test page mappings */
+ swap_two_test_pages(data->test_pages_pte[0], data->test_pages_pte[1]);
+}
+
+/*
+ * Finalize the test: check hypercall resule set the expected val for
+ * 'worker' CPUs and give them some time to test.
+ */
+static inline void post_test(struct test_data *data, u64 exp1, u64 exp2)
+{
+ /* Make sure we change the expectation after swapping PTEs */
+ wmb();
+
+ /* Set the expectation for workers, '0' means don't test */
+ set_expected_val((void *)data->test_pages, exp1, WORKER_VCPU_ID_1);
+ set_expected_val((void *)data->test_pages, exp2, WORKER_VCPU_ID_2);
+
+ /* Make sure workers have enough time to test */
+ do_delay();
+}
+
+#define TESTVAL1 0x0101010101010101
+#define TESTVAL2 0x0202020202020202
+
+/* Main vCPU doing the test */
+static void sender_guest_code(vm_vaddr_t test_data)
+{
+ struct test_data *data = (struct test_data *)test_data;
+ struct hv_tlb_flush *flush = (struct hv_tlb_flush *)data->hcall_gva;
+ struct hv_tlb_flush_ex *flush_ex = (struct hv_tlb_flush_ex *)data->hcall_gva;
+ vm_paddr_t hcall_gpa = data->hcall_gpa;
+ int i, stage = 1;
+
+ wrmsr(HV_X64_MSR_GUEST_OS_ID, HYPERV_LINUX_OS_ID);
+ wrmsr(HV_X64_MSR_HYPERCALL, data->hcall_gpa);
+
+ /* "Slow" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->processor_mask = 0;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE, hcall_gpa,
+ hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS;
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX,
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ hcall_gpa, hcall_gpa + PAGE_SIZE);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ /* "Fast" hypercalls */
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for WORKER_VCPU_ID_1 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->processor_mask = BIT(WORKER_VCPU_ID_1);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2, 0x0);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE |
+ HV_HYPERCALL_FAST_BIT, 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST for HV_FLUSH_ALL_PROCESSORS */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush->processor_mask, 1);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET), 0x0,
+ HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES |
+ HV_FLUSH_ALL_PROCESSORS);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for WORKER_VCPU_ID_2 */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [1] */
+ flush_ex->gva_list[1] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, 0x0, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_2 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_1 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 :
+ TESTVAL2, i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for both vCPUs */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_SPARSE_4K;
+ flush_ex->hv_vp_set.valid_bank_mask = BIT_ULL(WORKER_VCPU_ID_1 / 64) |
+ BIT_ULL(WORKER_VCPU_ID_2 / 64);
+ flush_ex->hv_vp_set.bank_contents[0] = BIT_ULL(WORKER_VCPU_ID_1 % 64);
+ flush_ex->hv_vp_set.bank_contents[1] = BIT_ULL(WORKER_VCPU_ID_2 % 64);
+ /* bank_contents and gva_list occupy the same space, thus [2] */
+ flush_ex->gva_list[2] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 3);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (2 << HV_HYPERCALL_VARHEAD_OFFSET) |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_SPACE_EX |
+ HV_HYPERCALL_FAST_BIT,
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_SYNC(stage++);
+
+ /* HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX for HV_GENERIC_SET_ALL */
+ for (i = 0; i < NTRY; i++) {
+ prepare_to_test(data);
+ flush_ex->flags = HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES;
+ flush_ex->hv_vp_set.format = HV_GENERIC_SET_ALL;
+ flush_ex->gva_list[0] = (u64)data->test_pages;
+ hyperv_write_xmm_input(&flush_ex->hv_vp_set, 2);
+ hyperv_hypercall(HVCALL_FLUSH_VIRTUAL_ADDRESS_LIST_EX |
+ HV_HYPERCALL_FAST_BIT |
+ (1UL << HV_HYPERCALL_REP_COMP_OFFSET),
+ 0x0, HV_FLUSH_ALL_VIRTUAL_ADDRESS_SPACES);
+ post_test(data, i % 2 ? TESTVAL1 : TESTVAL2,
+ i % 2 ? TESTVAL1 : TESTVAL2);
+ }
+
+ GUEST_DONE();
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct kvm_vcpu *vcpu = (struct kvm_vcpu *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(!r, "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ default:
+ TEST_FAIL("Unexpected ucall %lu, vCPU %d", uc.cmd, vcpu->id);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(!r, "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(!r, "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu[3];
+ pthread_t threads[2];
+ vm_vaddr_t test_data_page, gva;
+ vm_paddr_t gpa;
+ uint64_t *pte;
+ struct test_data *data;
+ struct ucall uc;
+ int stage = 1, r, i;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_HYPERV_TLBFLUSH));
+
+ vm = vm_create_with_one_vcpu(&vcpu[0], sender_guest_code);
+
+ /* Test data page */
+ test_data_page = vm_vaddr_alloc_page(vm);
+ data = (struct test_data *)addr_gva2hva(vm, test_data_page);
+
+ /* Hypercall input/output */
+ data->hcall_gva = vm_vaddr_alloc_pages(vm, 2);
+ data->hcall_gpa = addr_gva2gpa(vm, data->hcall_gva);
+ memset(addr_gva2hva(vm, data->hcall_gva), 0x0, 2 * PAGE_SIZE);
+
+ /*
+ * Test pages: the first one is filled with '0x01's, the second with '0x02's
+ * and the test will swap their mappings. The third page keeps the indication
+ * about the current state of mappings.
+ */
+ data->test_pages = vm_vaddr_alloc_pages(vm, NTEST_PAGES + 1);
+ for (i = 0; i < NTEST_PAGES; i++)
+ memset(addr_gva2hva(vm, data->test_pages + PAGE_SIZE * i),
+ (u8)(i + 1), PAGE_SIZE);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_1);
+ set_expected_val(addr_gva2hva(vm, data->test_pages), 0x0, WORKER_VCPU_ID_2);
+
+ /*
+ * Get PTE pointers for test pages and map them inside the guest.
+ * Use separate page for each PTE for simplicity.
+ */
+ gva = vm_vaddr_unused_gap(vm, NTEST_PAGES * PAGE_SIZE, KVM_UTIL_MIN_VADDR);
+ for (i = 0; i < NTEST_PAGES; i++) {
+ pte = vm_get_page_table_entry(vm, data->test_pages + i * PAGE_SIZE);
+ gpa = addr_hva2gpa(vm, pte);
+ __virt_pg_map(vm, gva + PAGE_SIZE * i, gpa & PAGE_MASK, PG_LEVEL_4K);
+ data->test_pages_pte[i] = gva + (gpa & ~PAGE_MASK);
+ }
+
+ /*
+ * Sender vCPU which performs the test: swaps test pages, sets expectation
+ * for 'workers' and issues TLB flush hypercalls.
+ */
+ vcpu_args_set(vcpu[0], 1, test_data_page);
+ vcpu_set_hv_cpuid(vcpu[0]);
+
+ /* Create worker vCPUs which check the contents of the test pages */
+ vcpu[1] = vm_vcpu_add(vm, WORKER_VCPU_ID_1, worker_guest_code);
+ vcpu_args_set(vcpu[1], 1, test_data_page);
+ vcpu_set_msr(vcpu[1], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_1);
+ vcpu_set_hv_cpuid(vcpu[1]);
+
+ vcpu[2] = vm_vcpu_add(vm, WORKER_VCPU_ID_2, worker_guest_code);
+ vcpu_args_set(vcpu[2], 1, test_data_page);
+ vcpu_set_msr(vcpu[2], HV_X64_MSR_VP_INDEX, WORKER_VCPU_ID_2);
+ vcpu_set_hv_cpuid(vcpu[2]);
+
+ r = pthread_create(&threads[0], NULL, vcpu_thread, vcpu[1]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, vcpu[2]);
+ TEST_ASSERT(!r, "pthread_create() failed");
+
+ while (true) {
+ vcpu_run(vcpu[0]);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu[0], KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu[0], &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(uc.args[1] == stage,
+ "Unexpected stage: %ld (%d expected)",
+ uc.args[1], stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ stage++;
+ }
+
+done:
+ cancel_join_vcpu_thread(threads[0], vcpu[1]);
+ cancel_join_vcpu_thread(threads[1], vcpu[2]);
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
new file mode 100644
index 000000000000..5bc12222d87a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_clock_test.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Google LLC.
+ *
+ * Tests for adjusting the KVM clock from userspace
+ */
+#include <asm/kvm_para.h>
+#include <asm/pvclock.h>
+#include <asm/pvclock-abi.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <time.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct test_case {
+ uint64_t kvmclock_base;
+ int64_t realtime_offset;
+};
+
+static struct test_case test_cases[] = {
+ { .kvmclock_base = 0 },
+ { .kvmclock_base = 180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = -180 * NSEC_PER_SEC },
+ { .kvmclock_base = 0, .realtime_offset = 180 * NSEC_PER_SEC },
+};
+
+#define GUEST_SYNC_CLOCK(__stage, __val) \
+ GUEST_SYNC_ARGS(__stage, __val, 0, 0, 0)
+
+static void guest_main(vm_paddr_t pvti_pa, struct pvclock_vcpu_time_info *pvti)
+{
+ int i;
+
+ wrmsr(MSR_KVM_SYSTEM_TIME_NEW, pvti_pa | KVM_MSR_ENABLED);
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ GUEST_SYNC_CLOCK(i, __pvclock_read_cycles(pvti, rdtsc()));
+}
+
+#define EXPECTED_FLAGS (KVM_CLOCK_REALTIME | KVM_CLOCK_HOST_TSC)
+
+static inline void assert_flags(struct kvm_clock_data *data)
+{
+ TEST_ASSERT((data->flags & EXPECTED_FLAGS) == EXPECTED_FLAGS,
+ "unexpected clock data flags: %x (want set: %x)",
+ data->flags, EXPECTED_FLAGS);
+}
+
+static void handle_sync(struct ucall *uc, struct kvm_clock_data *start,
+ struct kvm_clock_data *end)
+{
+ uint64_t obs, exp_lo, exp_hi;
+
+ obs = uc->args[2];
+ exp_lo = start->clock;
+ exp_hi = end->clock;
+
+ assert_flags(start);
+ assert_flags(end);
+
+ TEST_ASSERT(exp_lo <= obs && obs <= exp_hi,
+ "unexpected kvm-clock value: %"PRIu64" expected range: [%"PRIu64", %"PRIu64"]",
+ obs, exp_lo, exp_hi);
+
+ pr_info("kvm-clock value: %"PRIu64" expected range [%"PRIu64", %"PRIu64"]\n",
+ obs, exp_lo, exp_hi);
+}
+
+static void handle_abort(struct ucall *uc)
+{
+ REPORT_GUEST_ASSERT(*uc);
+}
+
+static void setup_clock(struct kvm_vm *vm, struct test_case *test_case)
+{
+ struct kvm_clock_data data;
+
+ memset(&data, 0, sizeof(data));
+
+ data.clock = test_case->kvmclock_base;
+ if (test_case->realtime_offset) {
+ struct timespec ts;
+ int r;
+
+ data.flags |= KVM_CLOCK_REALTIME;
+ do {
+ r = clock_gettime(CLOCK_REALTIME, &ts);
+ if (!r)
+ break;
+ } while (errno == EINTR);
+
+ TEST_ASSERT(!r, "clock_gettime() failed: %d", r);
+
+ data.realtime = ts.tv_sec * NSEC_PER_SEC;
+ data.realtime += ts.tv_nsec;
+ data.realtime += test_case->realtime_offset;
+ }
+
+ vm_ioctl(vm, KVM_SET_CLOCK, &data);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct kvm_clock_data start, end;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++) {
+ setup_clock(vm, &test_cases[i]);
+
+ vm_ioctl(vm, KVM_GET_CLOCK, &start);
+
+ vcpu_run(vcpu);
+ vm_ioctl(vm, KVM_GET_CLOCK, &end);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ handle_sync(&uc, &start, &end);
+ break;
+ case UCALL_ABORT:
+ handle_abort(&uc);
+ return;
+ default:
+ TEST_ASSERT(0, "unhandled ucall: %ld", uc.cmd);
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t pvti_gva;
+ vm_paddr_t pvti_gpa;
+ struct kvm_vm *vm;
+ int flags;
+
+ flags = kvm_check_cap(KVM_CAP_ADJUST_CLOCK);
+ TEST_REQUIRE(flags & KVM_CLOCK_REALTIME);
+
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ pvti_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000);
+ pvti_gpa = addr_gva2gpa(vm, pvti_gva);
+ vcpu_args_set(vcpu, 2, pvti_gpa, pvti_gva);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
new file mode 100644
index 000000000000..9e2879af7c20
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/kvm_pv_test.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for KVM paravirtual feature disablement
+ */
+#include <asm/kvm_para.h>
+#include <linux/kvm_para.h>
+#include <stdint.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct msr_data {
+ uint32_t idx;
+ const char *name;
+};
+
+#define TEST_MSR(msr) { .idx = msr, .name = #msr }
+#define UCALL_PR_MSR 0xdeadbeef
+#define PR_MSR(msr) ucall(UCALL_PR_MSR, 1, msr)
+
+/*
+ * KVM paravirtual msrs to test. Expect a #GP if any of these msrs are read or
+ * written, as the KVM_CPUID_FEATURES leaf is cleared.
+ */
+static struct msr_data msrs_to_test[] = {
+ TEST_MSR(MSR_KVM_SYSTEM_TIME),
+ TEST_MSR(MSR_KVM_SYSTEM_TIME_NEW),
+ TEST_MSR(MSR_KVM_WALL_CLOCK),
+ TEST_MSR(MSR_KVM_WALL_CLOCK_NEW),
+ TEST_MSR(MSR_KVM_ASYNC_PF_EN),
+ TEST_MSR(MSR_KVM_STEAL_TIME),
+ TEST_MSR(MSR_KVM_PV_EOI_EN),
+ TEST_MSR(MSR_KVM_POLL_CONTROL),
+ TEST_MSR(MSR_KVM_ASYNC_PF_INT),
+ TEST_MSR(MSR_KVM_ASYNC_PF_ACK),
+};
+
+static void test_msr(struct msr_data *msr)
+{
+ uint64_t ignored;
+ uint8_t vector;
+
+ PR_MSR(msr);
+
+ vector = rdmsr_safe(msr->idx, &ignored);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+
+ vector = wrmsr_safe(msr->idx, 0);
+ GUEST_ASSERT_EQ(vector, GP_VECTOR);
+}
+
+struct hcall_data {
+ uint64_t nr;
+ const char *name;
+};
+
+#define TEST_HCALL(hc) { .nr = hc, .name = #hc }
+#define UCALL_PR_HCALL 0xdeadc0de
+#define PR_HCALL(hc) ucall(UCALL_PR_HCALL, 1, hc)
+
+/*
+ * KVM hypercalls to test. Expect -KVM_ENOSYS when called, as the corresponding
+ * features have been cleared in KVM_CPUID_FEATURES.
+ */
+static struct hcall_data hcalls_to_test[] = {
+ TEST_HCALL(KVM_HC_KICK_CPU),
+ TEST_HCALL(KVM_HC_SEND_IPI),
+ TEST_HCALL(KVM_HC_SCHED_YIELD),
+};
+
+static void test_hcall(struct hcall_data *hc)
+{
+ uint64_t r;
+
+ PR_HCALL(hc);
+ r = kvm_hypercall(hc->nr, 0, 0, 0, 0);
+ GUEST_ASSERT_EQ(r, -KVM_ENOSYS);
+}
+
+static void guest_main(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(msrs_to_test); i++) {
+ test_msr(&msrs_to_test[i]);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(hcalls_to_test); i++) {
+ test_hcall(&hcalls_to_test[i]);
+ }
+
+ GUEST_DONE();
+}
+
+static void pr_msr(struct ucall *uc)
+{
+ struct msr_data *msr = (struct msr_data *)uc->args[0];
+
+ pr_info("testing msr: %s (%#x)\n", msr->name, msr->idx);
+}
+
+static void pr_hcall(struct ucall *uc)
+{
+ struct hcall_data *hc = (struct hcall_data *)uc->args[0];
+
+ pr_info("testing hcall: %s (%lu)\n", hc->name, hc->nr);
+}
+
+static void enter_guest(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ while (true) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_PR_MSR:
+ pr_msr(&uc);
+ break;
+ case UCALL_PR_HCALL:
+ pr_hcall(&uc);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ return;
+ case UCALL_DONE:
+ return;
+ }
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_ENFORCE_PV_FEATURE_CPUID));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_main);
+
+ vcpu_enable_cap(vcpu, KVM_CAP_ENFORCE_PV_FEATURE_CPUID, 1);
+
+ vcpu_clear_cpuid_entry(vcpu, KVM_CPUID_FEATURES);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ enter_guest(vcpu);
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
new file mode 100644
index 000000000000..3cc4b86832fe
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/max_vcpuid_cap_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * maximum APIC ID capability tests
+ *
+ * Copyright (C) 2022, Intel, Inc.
+ *
+ * Tests for getting/setting maximum APIC ID capability
+ */
+
+#include "kvm_util.h"
+
+#define MAX_VCPU_ID 2
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vm *vm;
+ int ret;
+
+ vm = vm_create_barebones();
+
+ /* Get KVM_CAP_MAX_VCPU_ID cap supported in KVM */
+ ret = vm_check_cap(vm, KVM_CAP_MAX_VCPU_ID);
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID beyond KVM cap */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, ret + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID beyond KVM cap should fail");
+
+ /* Set KVM_CAP_MAX_VCPU_ID */
+ vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID);
+
+
+ /* Try to set KVM_CAP_MAX_VCPU_ID again */
+ ret = __vm_enable_cap(vm, KVM_CAP_MAX_VCPU_ID, MAX_VCPU_ID + 1);
+ TEST_ASSERT(ret < 0,
+ "Setting KVM_CAP_MAX_VCPU_ID multiple times should fail");
+
+ /* Create vCPU with id beyond KVM_CAP_MAX_VCPU_ID cap*/
+ ret = __vm_ioctl(vm, KVM_CREATE_VCPU, (void *)MAX_VCPU_ID);
+ TEST_ASSERT(ret < 0, "Creating vCPU with ID > MAX_VCPU_ID should fail");
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c b/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
deleted file mode 100644
index e6480fd5c4bd..000000000000
--- a/tools/testing/selftests/kvm/x86_64/mmio_warning_test.c
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * mmio_warning_test
- *
- * Copyright (C) 2019, Google LLC.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.
- *
- * Test that we don't get a kernel warning when we call KVM_RUN after a
- * triple fault occurs. To get the triple fault to occur we call KVM_RUN
- * on a VCPU that hasn't been properly setup.
- *
- */
-
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <kvm_util.h>
-#include <linux/kvm.h>
-#include <processor.h>
-#include <pthread.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <test_util.h>
-#include <unistd.h>
-
-#define NTHREAD 4
-#define NPROCESS 5
-
-struct thread_context {
- int kvmcpu;
- struct kvm_run *run;
-};
-
-void *thr(void *arg)
-{
- struct thread_context *tc = (struct thread_context *)arg;
- int res;
- int kvmcpu = tc->kvmcpu;
- struct kvm_run *run = tc->run;
-
- res = ioctl(kvmcpu, KVM_RUN, 0);
- pr_info("ret1=%d exit_reason=%d suberror=%d\n",
- res, run->exit_reason, run->internal.suberror);
-
- return 0;
-}
-
-void test(void)
-{
- int i, kvm, kvmvm, kvmcpu;
- pthread_t th[NTHREAD];
- struct kvm_run *run;
- struct thread_context tc;
-
- kvm = open("/dev/kvm", O_RDWR);
- TEST_ASSERT(kvm != -1, "failed to open /dev/kvm");
- kvmvm = ioctl(kvm, KVM_CREATE_VM, 0);
- TEST_ASSERT(kvmvm != -1, "KVM_CREATE_VM failed");
- kvmcpu = ioctl(kvmvm, KVM_CREATE_VCPU, 0);
- TEST_ASSERT(kvmcpu != -1, "KVM_CREATE_VCPU failed");
- run = (struct kvm_run *)mmap(0, 4096, PROT_READ|PROT_WRITE, MAP_SHARED,
- kvmcpu, 0);
- tc.kvmcpu = kvmcpu;
- tc.run = run;
- srand(getpid());
- for (i = 0; i < NTHREAD; i++) {
- pthread_create(&th[i], NULL, thr, (void *)(uintptr_t)&tc);
- usleep(rand() % 10000);
- }
- for (i = 0; i < NTHREAD; i++)
- pthread_join(th[i], NULL);
-}
-
-int get_warnings_count(void)
-{
- int warnings;
- FILE *f;
-
- f = popen("dmesg | grep \"WARNING:\" | wc -l", "r");
- fscanf(f, "%d", &warnings);
- fclose(f);
-
- return warnings;
-}
-
-int main(void)
-{
- int warnings_before, warnings_after;
-
- if (!is_intel_cpu()) {
- print_skip("Must be run on an Intel CPU");
- exit(KSFT_SKIP);
- }
-
- if (vm_is_unrestricted_guest(NULL)) {
- print_skip("Unrestricted guest must be disabled");
- exit(KSFT_SKIP);
- }
-
- warnings_before = get_warnings_count();
-
- for (int i = 0; i < NPROCESS; ++i) {
- int status;
- int pid = fork();
-
- if (pid < 0)
- exit(1);
- if (pid == 0) {
- test();
- exit(0);
- }
- while (waitpid(pid, &status, __WALL) != pid)
- ;
- }
-
- warnings_after = get_warnings_count();
- TEST_ASSERT(warnings_before == warnings_after,
- "Warnings found in kernel. Run 'dmesg' to inspect them.");
-
- return 0;
-}
diff --git a/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
new file mode 100644
index 000000000000..853802641e1e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/monitor_mwait_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+
+#define CPUID_MWAIT (1u << 3)
+
+enum monitor_mwait_testcases {
+ MWAIT_QUIRK_DISABLED = BIT(0),
+ MISC_ENABLES_QUIRK_DISABLED = BIT(1),
+ MWAIT_DISABLED = BIT(2),
+};
+
+/*
+ * If both MWAIT and its quirk are disabled, MONITOR/MWAIT should #UD, in all
+ * other scenarios KVM should emulate them as nops.
+ */
+#define GUEST_ASSERT_MONITOR_MWAIT(insn, testcase, vector) \
+do { \
+ bool fault_wanted = ((testcase) & MWAIT_QUIRK_DISABLED) && \
+ ((testcase) & MWAIT_DISABLED); \
+ \
+ if (fault_wanted) \
+ __GUEST_ASSERT((vector) == UD_VECTOR, \
+ "Expected #UD on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+ else \
+ __GUEST_ASSERT(!(vector), \
+ "Expected success on " insn " for testcase '0x%x', got '0x%x'", \
+ testcase, vector); \
+} while (0)
+
+static void guest_monitor_wait(int testcase)
+{
+ u8 vector;
+
+ GUEST_SYNC(testcase);
+
+ /*
+ * Arbitrarily MONITOR this function, SVM performs fault checks before
+ * intercept checks, so the inputs for MONITOR and MWAIT must be valid.
+ */
+ vector = kvm_asm_safe("monitor", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MONITOR", testcase, vector);
+
+ vector = kvm_asm_safe("mwait", "a"(guest_monitor_wait), "c"(0), "d"(0));
+ GUEST_ASSERT_MONITOR_MWAIT("MWAIT", testcase, vector);
+}
+
+static void guest_code(void)
+{
+ guest_monitor_wait(MWAIT_DISABLED);
+
+ guest_monitor_wait(MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED);
+
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED | MWAIT_DISABLED);
+ guest_monitor_wait(MISC_ENABLES_QUIRK_DISABLED | MWAIT_QUIRK_DISABLED);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ uint64_t disabled_quirks;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ int testcase;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_MWAIT);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ testcase = uc.args[1];
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ goto done;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ goto done;
+ }
+
+ disabled_quirks = 0;
+ if (testcase & MWAIT_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MWAIT_NEVER_UD_FAULTS;
+ if (testcase & MISC_ENABLES_QUIRK_DISABLED)
+ disabled_quirks |= KVM_X86_QUIRK_MISC_ENABLE_NO_MWAIT;
+ vm_enable_cap(vm, KVM_CAP_DISABLE_QUIRKS2, disabled_quirks);
+
+ /*
+ * If the MISC_ENABLES quirk (KVM neglects to update CPUID to
+ * enable/disable MWAIT) is disabled, toggle the ENABLE_MWAIT
+ * bit in MISC_ENABLES accordingly. If the quirk is enabled,
+ * the only valid configuration is MWAIT disabled, as CPUID
+ * can't be manually changed after running the vCPU.
+ */
+ if (!(testcase & MISC_ENABLES_QUIRK_DISABLED)) {
+ TEST_ASSERT(testcase & MWAIT_DISABLED,
+ "Can't toggle CPUID features after running vCPU");
+ continue;
+ }
+
+ vcpu_set_msr(vcpu, MSR_IA32_MISC_ENABLE,
+ (testcase & MWAIT_DISABLED) ? 0 : MSR_IA32_MISC_ENABLE_MWAIT);
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
new file mode 100644
index 000000000000..3670331adf21
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nested_exceptions_test.c
@@ -0,0 +1,290 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#define L2_GUEST_STACK_SIZE 256
+
+/*
+ * Arbitrary, never shoved into KVM/hardware, just need to avoid conflict with
+ * the "real" exceptions used, #SS/#GP/#DF (12/13/8).
+ */
+#define FAKE_TRIPLE_FAULT_VECTOR 0xaa
+
+/* Arbitrary 32-bit error code injected by this test. */
+#define SS_ERROR_CODE 0xdeadbeef
+
+/*
+ * Bit '0' is set on Intel if the exception occurs while delivering a previous
+ * event/exception. AMD's wording is ambiguous, but presumably the bit is set
+ * if the exception occurs while delivering an external event, e.g. NMI or INTR,
+ * but not for exceptions that occur when delivering other exceptions or
+ * software interrupts.
+ *
+ * Note, Intel's name for it, "External event", is misleading and much more
+ * aligned with AMD's behavior, but the SDM is quite clear on its behavior.
+ */
+#define ERROR_CODE_EXT_FLAG BIT(0)
+
+/*
+ * Bit '1' is set if the fault occurred when looking up a descriptor in the
+ * IDT, which is the case here as the IDT is empty/NULL.
+ */
+#define ERROR_CODE_IDT_FLAG BIT(1)
+
+/*
+ * The #GP that occurs when vectoring #SS should show the index into the IDT
+ * for #SS, plus have the "IDT flag" set.
+ */
+#define GP_ERROR_CODE_AMD ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG)
+#define GP_ERROR_CODE_INTEL ((SS_VECTOR * 8) | ERROR_CODE_IDT_FLAG | ERROR_CODE_EXT_FLAG)
+
+/*
+ * Intel and AMD both shove '0' into the error code on #DF, regardless of what
+ * led to the double fault.
+ */
+#define DF_ERROR_CODE 0
+
+#define INTERCEPT_SS (BIT_ULL(SS_VECTOR))
+#define INTERCEPT_SS_DF (INTERCEPT_SS | BIT_ULL(DF_VECTOR))
+#define INTERCEPT_SS_GP_DF (INTERCEPT_SS_DF | BIT_ULL(GP_VECTOR))
+
+static void l2_ss_pending_test(void)
+{
+ GUEST_SYNC(SS_VECTOR);
+}
+
+static void l2_ss_injected_gp_test(void)
+{
+ GUEST_SYNC(GP_VECTOR);
+}
+
+static void l2_ss_injected_df_test(void)
+{
+ GUEST_SYNC(DF_VECTOR);
+}
+
+static void l2_ss_injected_tf_test(void)
+{
+ GUEST_SYNC(FAKE_TRIPLE_FAULT_VECTOR);
+}
+
+static void svm_run_l2(struct svm_test_data *svm, void *l2_code, int vector,
+ uint32_t error_code)
+{
+ struct vmcb *vmcb = svm->vmcb;
+ struct vmcb_control_area *ctrl = &vmcb->control;
+
+ vmcb->save.rip = (u64)l2_code;
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(ctrl->exit_code, (SVM_EXIT_EXCP_BASE + vector));
+ GUEST_ASSERT_EQ(ctrl->exit_info_1, error_code);
+}
+
+static void l1_svm_code(struct svm_test_data *svm)
+{
+ struct vmcb_control_area *ctrl = &svm->vmcb->control;
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ generic_svm_setup(svm, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ svm->vmcb->save.idtr.limit = 0;
+ ctrl->intercept |= BIT_ULL(INTERCEPT_SHUTDOWN);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_GP_DF;
+ svm_run_l2(svm, l2_ss_pending_test, SS_VECTOR, SS_ERROR_CODE);
+ svm_run_l2(svm, l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_AMD);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS_DF;
+ svm_run_l2(svm, l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ ctrl->intercept_exceptions = INTERCEPT_SS;
+ svm_run_l2(svm, l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(ctrl->exit_code, SVM_EXIT_SHUTDOWN);
+
+ GUEST_DONE();
+}
+
+static void vmx_run_l2(void *l2_code, int vector, uint32_t error_code)
+{
+ GUEST_ASSERT(!vmwrite(GUEST_RIP, (u64)l2_code));
+
+ GUEST_ASSERT_EQ(vector == SS_VECTOR ? vmlaunch() : vmresume(), 0);
+
+ if (vector == FAKE_TRIPLE_FAULT_VECTOR)
+ return;
+
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_EXCEPTION_NMI);
+ GUEST_ASSERT_EQ((vmreadz(VM_EXIT_INTR_INFO) & 0xff), vector);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_INTR_ERROR_CODE), error_code);
+}
+
+static void l1_vmx_code(struct vmx_pages *vmx)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT_EQ(prepare_for_vmx_operation(vmx), true);
+
+ GUEST_ASSERT_EQ(load_vmcs(vmx), true);
+
+ prepare_vmcs(vmx, NULL, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ GUEST_ASSERT_EQ(vmwrite(GUEST_IDTR_LIMIT, 0), 0);
+
+ /*
+ * VMX disallows injecting an exception with error_code[31:16] != 0,
+ * and hardware will never generate a VM-Exit with bits 31:16 set.
+ * KVM should likewise truncate the "bad" userspace value.
+ */
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_GP_DF), 0);
+ vmx_run_l2(l2_ss_pending_test, SS_VECTOR, (u16)SS_ERROR_CODE);
+ vmx_run_l2(l2_ss_injected_gp_test, GP_VECTOR, GP_ERROR_CODE_INTEL);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS_DF), 0);
+ vmx_run_l2(l2_ss_injected_df_test, DF_VECTOR, DF_ERROR_CODE);
+
+ GUEST_ASSERT_EQ(vmwrite(EXCEPTION_BITMAP, INTERCEPT_SS), 0);
+ vmx_run_l2(l2_ss_injected_tf_test, FAKE_TRIPLE_FAULT_VECTOR, 0);
+ GUEST_ASSERT_EQ(vmreadz(VM_EXIT_REASON), EXIT_REASON_TRIPLE_FAULT);
+
+ GUEST_DONE();
+}
+
+static void __attribute__((__flatten__)) l1_guest_code(void *test_data)
+{
+ if (this_cpu_has(X86_FEATURE_SVM))
+ l1_svm_code(test_data);
+ else
+ l1_vmx_code(test_data);
+}
+
+static void assert_ucall_vector(struct kvm_vcpu *vcpu, int vector)
+{
+ struct ucall uc;
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(vector == uc.args[1],
+ "Expected L2 to ask for %d, got %ld", vector, uc.args[1]);
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(vector == -1,
+ "Expected L2 to ask for %d, L2 says it's done", vector);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ default:
+ TEST_FAIL("Expected L2 to ask for %d, got unexpected ucall %lu", vector, uc.cmd);
+ }
+}
+
+static void queue_ss_exception(struct kvm_vcpu *vcpu, bool inject)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ TEST_ASSERT(!events.exception.pending,
+ "Vector %d unexpectedlt pending", events.exception.nr);
+ TEST_ASSERT(!events.exception.injected,
+ "Vector %d unexpectedly injected", events.exception.nr);
+
+ events.flags = KVM_VCPUEVENT_VALID_PAYLOAD;
+ events.exception.pending = !inject;
+ events.exception.injected = inject;
+ events.exception.nr = SS_VECTOR;
+ events.exception.has_error_code = true;
+ events.exception.error_code = SS_ERROR_CODE;
+ vcpu_events_set(vcpu, &events);
+}
+
+/*
+ * Verify KVM_{G,S}ET_EVENTS play nice with pending vs. injected exceptions
+ * when an exception is being queued for L2. Specifically, verify that KVM
+ * honors L1 exception intercept controls when a #SS is pending/injected,
+ * triggers a #GP on vectoring the #SS, morphs to #DF if #GP isn't intercepted
+ * by L1, and finally causes (nested) SHUTDOWN if #DF isn't intercepted by L1.
+ */
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t nested_test_data_gva;
+ struct kvm_vcpu_events events;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_EXCEPTION_PAYLOAD));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM) || kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_enable_cap(vm, KVM_CAP_EXCEPTION_PAYLOAD, -2ul);
+
+ if (kvm_cpu_has(X86_FEATURE_SVM))
+ vcpu_alloc_svm(vm, &nested_test_data_gva);
+ else
+ vcpu_alloc_vmx(vm, &nested_test_data_gva);
+
+ vcpu_args_set(vcpu, 1, nested_test_data_gva);
+
+ /* Run L1 => L2. L2 should sync and request #SS. */
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, SS_VECTOR);
+
+ /* Pend #SS and request immediate exit. #SS should still be pending. */
+ queue_ss_exception(vcpu, false);
+ vcpu->run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ /* Verify the pending events comes back out the same as it went in. */
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT_EQ(events.flags & KVM_VCPUEVENT_VALID_PAYLOAD,
+ KVM_VCPUEVENT_VALID_PAYLOAD);
+ TEST_ASSERT_EQ(events.exception.pending, true);
+ TEST_ASSERT_EQ(events.exception.nr, SS_VECTOR);
+ TEST_ASSERT_EQ(events.exception.has_error_code, true);
+ TEST_ASSERT_EQ(events.exception.error_code, SS_ERROR_CODE);
+
+ /*
+ * Run for real with the pending #SS, L1 should get a VM-Exit due to
+ * #SS interception and re-enter L2 to request #GP (via injected #SS).
+ */
+ vcpu->run->immediate_exit = false;
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, GP_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 should intercept before KVM morphs it to #DF. L1 should then
+ * disable #GP interception and run L2 to request #DF (via #SS => #GP).
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, DF_VECTOR);
+
+ /*
+ * Inject #SS, the #SS should bypass interception and cause #GP, which
+ * L1 is no longer interception, and so should see a #DF VM-Exit. L1
+ * should then signal that is done.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, FAKE_TRIPLE_FAULT_VECTOR);
+
+ /*
+ * Inject #SS yet again. L1 is not intercepting #GP or #DF, and so
+ * should see nested TRIPLE_FAULT / SHUTDOWN.
+ */
+ queue_ss_exception(vcpu, true);
+ vcpu_run(vcpu);
+ assert_ucall_vector(vcpu, -1);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
new file mode 100644
index 000000000000..17bbb96fc4df
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.c
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Usage: to be run via nx_huge_page_test.sh, which does the necessary
+ * environment setup and teardown
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <stdint.h>
+#include <time.h>
+
+#include <test_util.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HPAGE_SLOT 10
+#define HPAGE_GPA (4UL << 30) /* 4G prevents collision w/ slot 0 */
+#define HPAGE_GVA HPAGE_GPA /* GVA is arbitrary, so use GPA. */
+#define PAGES_PER_2MB_HUGE_PAGE 512
+#define HPAGE_SLOT_NPAGES (3 * PAGES_PER_2MB_HUGE_PAGE)
+
+/*
+ * Passed by nx_huge_pages_test.sh to provide an easy warning if this test is
+ * being run without it.
+ */
+#define MAGIC_TOKEN 887563923
+
+/*
+ * x86 opcode for the return instruction. Used to call into, and then
+ * immediately return from, memory backed with hugepages.
+ */
+#define RETURN_OPCODE 0xC3
+
+/* Call the specified memory address. */
+static void guest_do_CALL(uint64_t target)
+{
+ ((void (*)(void)) target)();
+}
+
+/*
+ * Exit the VM after each memory access so that the userspace component of the
+ * test can make assertions about the pages backing the VM.
+ *
+ * See the below for an explanation of how each access should affect the
+ * backing mappings.
+ */
+void guest_code(void)
+{
+ uint64_t hpage_1 = HPAGE_GVA;
+ uint64_t hpage_2 = hpage_1 + (PAGE_SIZE * 512);
+ uint64_t hpage_3 = hpage_2 + (PAGE_SIZE * 512);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(1);
+
+ READ_ONCE(*(uint64_t *)hpage_2);
+ GUEST_SYNC(2);
+
+ guest_do_CALL(hpage_1);
+ GUEST_SYNC(3);
+
+ guest_do_CALL(hpage_3);
+ GUEST_SYNC(4);
+
+ READ_ONCE(*(uint64_t *)hpage_1);
+ GUEST_SYNC(5);
+
+ READ_ONCE(*(uint64_t *)hpage_3);
+ GUEST_SYNC(6);
+}
+
+static void check_2m_page_count(struct kvm_vm *vm, int expected_pages_2m)
+{
+ int actual_pages_2m;
+
+ actual_pages_2m = vm_get_stat(vm, "pages_2m");
+
+ TEST_ASSERT(actual_pages_2m == expected_pages_2m,
+ "Unexpected 2m page count. Expected %d, got %d",
+ expected_pages_2m, actual_pages_2m);
+}
+
+static void check_split_count(struct kvm_vm *vm, int expected_splits)
+{
+ int actual_splits;
+
+ actual_splits = vm_get_stat(vm, "nx_lpage_splits");
+
+ TEST_ASSERT(actual_splits == expected_splits,
+ "Unexpected NX huge page split count. Expected %d, got %d",
+ expected_splits, actual_splits);
+}
+
+static void wait_for_reclaim(int reclaim_period_ms)
+{
+ long reclaim_wait_ms;
+ struct timespec ts;
+
+ reclaim_wait_ms = reclaim_period_ms * 5;
+ ts.tv_sec = reclaim_wait_ms / 1000;
+ ts.tv_nsec = (reclaim_wait_ms - (ts.tv_sec * 1000)) * 1000000;
+ nanosleep(&ts, NULL);
+}
+
+void run_test(int reclaim_period_ms, bool disable_nx_huge_pages,
+ bool reboot_permissions)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t nr_bytes;
+ void *hva;
+ int r;
+
+ vm = vm_create(1);
+
+ if (disable_nx_huge_pages) {
+ r = __vm_disable_nx_huge_pages(vm);
+ if (reboot_permissions) {
+ TEST_ASSERT(!r, "Disabling NX huge pages should succeed if process has reboot permissions");
+ } else {
+ TEST_ASSERT(r == -1 && errno == EPERM,
+ "This process should not have permission to disable NX huge pages");
+ return;
+ }
+ }
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS_HUGETLB,
+ HPAGE_GPA, HPAGE_SLOT,
+ HPAGE_SLOT_NPAGES, 0);
+
+ nr_bytes = HPAGE_SLOT_NPAGES * vm->page_size;
+
+ /*
+ * Ensure that KVM can map HPAGE_SLOT with huge pages by mapping the
+ * region into the guest with 2MiB pages whenever TDP is disabled (i.e.
+ * whenever KVM is shadowing the guest page tables).
+ *
+ * When TDP is enabled, KVM should be able to map HPAGE_SLOT with huge
+ * pages irrespective of the guest page size, so map with 4KiB pages
+ * to test that that is the case.
+ */
+ if (kvm_is_tdp_enabled())
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_4K);
+ else
+ virt_map_level(vm, HPAGE_GVA, HPAGE_GPA, nr_bytes, PG_LEVEL_2M);
+
+ hva = addr_gpa2hva(vm, HPAGE_GPA);
+ memset(hva, RETURN_OPCODE, nr_bytes);
+
+ check_2m_page_count(vm, 0);
+ check_split_count(vm, 0);
+
+ /*
+ * The guest code will first read from the first hugepage, resulting
+ * in a huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 1);
+ check_split_count(vm, 0);
+
+ /*
+ * Then the guest code will read from the second hugepage, resulting
+ * in another huge page mapping being created.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, 2);
+ check_split_count(vm, 0);
+
+ /*
+ * Next, the guest will execute from the first huge page, causing it
+ * to be remapped at 4k.
+ *
+ * If NX huge pages are disabled, this should have no effect.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 2 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 1);
+
+ /*
+ * Executing from the third huge page (previously unaccessed) will
+ * cause part to be mapped at 4k.
+ *
+ * If NX huge pages are disabled, it should be mapped at 2M.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Reading from the first huge page again should have no effect. */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, disable_nx_huge_pages ? 0 : 2);
+
+ /* Give recovery thread time to run. */
+ wait_for_reclaim(reclaim_period_ms);
+
+ /*
+ * Now that the reclaimer has run, all the split pages should be gone.
+ *
+ * If NX huge pages are disabled, the relaimer will not run, so
+ * nothing should change from here on.
+ */
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 1);
+ check_split_count(vm, 0);
+
+ /*
+ * The 4k mapping on hpage 3 should have been removed, so check that
+ * reading from it causes a huge page mapping to be installed.
+ */
+ vcpu_run(vcpu);
+ check_2m_page_count(vm, disable_nx_huge_pages ? 3 : 2);
+ check_split_count(vm, 0);
+
+ kvm_vm_free(vm);
+}
+
+static void help(char *name)
+{
+ puts("");
+ printf("usage: %s [-h] [-p period_ms] [-t token]\n", name);
+ puts("");
+ printf(" -p: The NX reclaim period in milliseconds.\n");
+ printf(" -t: The magic token to indicate environment setup is done.\n");
+ printf(" -r: The test has reboot permissions and can disable NX huge pages.\n");
+ puts("");
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ int reclaim_period_ms = 0, token = 0, opt;
+ bool reboot_permissions = false;
+
+ while ((opt = getopt(argc, argv, "hp:t:r")) != -1) {
+ switch (opt) {
+ case 'p':
+ reclaim_period_ms = atoi_positive("Reclaim period", optarg);
+ break;
+ case 't':
+ token = atoi_paranoid(optarg);
+ break;
+ case 'r':
+ reboot_permissions = true;
+ break;
+ case 'h':
+ default:
+ help(argv[0]);
+ break;
+ }
+ }
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_DISABLE_NX_HUGE_PAGES));
+
+ __TEST_REQUIRE(token == MAGIC_TOKEN,
+ "This test must be run with the magic token via '-t %d'.\n"
+ "Running via nx_huge_pages_test.sh, which also handles "
+ "environment setup, is strongly recommended.", MAGIC_TOKEN);
+
+ run_test(reclaim_period_ms, false, reboot_permissions);
+ run_test(reclaim_period_ms, true, reboot_permissions);
+
+ return 0;
+}
+
diff --git a/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
new file mode 100755
index 000000000000..7cbb409801ee
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/nx_huge_pages_test.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-only */
+#
+# Wrapper script which performs setup and cleanup for nx_huge_pages_test.
+# Makes use of root privileges to set up huge pages and KVM module parameters.
+#
+# Copyright (C) 2022, Google LLC.
+
+set -e
+
+NX_HUGE_PAGES=$(cat /sys/module/kvm/parameters/nx_huge_pages)
+NX_HUGE_PAGES_RECOVERY_RATIO=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio)
+NX_HUGE_PAGES_RECOVERY_PERIOD=$(cat /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms)
+HUGE_PAGES=$(cat /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages)
+
+set +e
+
+function sudo_echo () {
+ echo "$1" | sudo tee -a "$2" > /dev/null
+}
+
+NXECUTABLE="$(dirname $0)/nx_huge_pages_test"
+
+sudo_echo test /dev/null || exit 4 # KSFT_SKIP=4
+
+(
+ set -e
+
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages
+ sudo_echo 1 /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+ sudo_echo 100 /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+ sudo_echo "$(( $HUGE_PAGES + 3 ))" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+ # Test with reboot permissions
+ if [ $(whoami) == "root" ] || sudo setcap cap_sys_boot+ep $NXECUTABLE 2> /dev/null; then
+ echo Running test with CAP_SYS_BOOT enabled
+ $NXECUTABLE -t 887563923 -p 100 -r
+ test $(whoami) == "root" || sudo setcap cap_sys_boot-ep $NXECUTABLE
+ else
+ echo setcap failed, skipping nx_huge_pages_test with CAP_SYS_BOOT enabled
+ fi
+
+ # Test without reboot permissions
+ if [ $(whoami) != "root" ] ; then
+ echo Running test with CAP_SYS_BOOT disabled
+ $NXECUTABLE -t 887563923 -p 100
+ else
+ echo Running as root, skipping nx_huge_pages_test with CAP_SYS_BOOT disabled
+ fi
+)
+RET=$?
+
+sudo_echo "$NX_HUGE_PAGES" /sys/module/kvm/parameters/nx_huge_pages
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_RATIO" /sys/module/kvm/parameters/nx_huge_pages_recovery_ratio
+sudo_echo "$NX_HUGE_PAGES_RECOVERY_PERIOD" /sys/module/kvm/parameters/nx_huge_pages_recovery_period_ms
+sudo_echo "$HUGE_PAGES" /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+
+exit $RET
diff --git a/tools/testing/selftests/kvm/x86_64/platform_info_test.c b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
index 1e89688cbbbf..87011965dc41 100644
--- a/tools/testing/selftests/kvm/x86_64/platform_info_test.c
+++ b/tools/testing/selftests/kvm/x86_64/platform_info_test.c
@@ -21,7 +21,6 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 0
#define MSR_PLATFORM_INFO_MAX_TURBO_RATIO 0xff00
static void guest_code(void)
@@ -35,71 +34,46 @@ static void guest_code(void)
}
}
-static void set_msr_platform_info_enabled(struct kvm_vm *vm, bool enable)
+static void test_msr_platform_info_enabled(struct kvm_vcpu *vcpu)
{
- struct kvm_enable_cap cap = {};
-
- cap.cap = KVM_CAP_MSR_PLATFORM_INFO;
- cap.flags = 0;
- cap.args[0] = (int)enable;
- vm_enable_cap(vm, &cap);
-}
-
-static void test_msr_platform_info_enabled(struct kvm_vm *vm)
-{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- set_msr_platform_info_enabled(vm, true);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Exit_reason other than KVM_EXIT_IO: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
- get_ucall(vm, VCPU_ID, &uc);
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, true);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ get_ucall(vcpu, &uc);
TEST_ASSERT(uc.cmd == UCALL_SYNC,
- "Received ucall other than UCALL_SYNC: %lu\n", uc.cmd);
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
TEST_ASSERT((uc.args[1] & MSR_PLATFORM_INFO_MAX_TURBO_RATIO) ==
MSR_PLATFORM_INFO_MAX_TURBO_RATIO,
"Expected MSR_PLATFORM_INFO to have max turbo ratio mask: %i.",
MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
}
-static void test_msr_platform_info_disabled(struct kvm_vm *vm)
+static void test_msr_platform_info_disabled(struct kvm_vcpu *vcpu)
{
- struct kvm_run *run = vcpu_state(vm, VCPU_ID);
-
- set_msr_platform_info_enabled(vm, false);
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
- "Exit_reason other than KVM_EXIT_SHUTDOWN: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vm_enable_cap(vcpu->vm, KVM_CAP_MSR_PLATFORM_INFO, false);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
}
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int rv;
uint64_t msr_platform_info;
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
-
- rv = kvm_check_cap(KVM_CAP_MSR_PLATFORM_INFO);
- if (!rv) {
- print_skip("KVM_CAP_MSR_PLATFORM_INFO not supported");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_MSR_PLATFORM_INFO));
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- msr_platform_info = vcpu_get_msr(vm, VCPU_ID, MSR_PLATFORM_INFO);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO,
- msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
- test_msr_platform_info_enabled(vm);
- test_msr_platform_info_disabled(vm);
- vcpu_set_msr(vm, VCPU_ID, MSR_PLATFORM_INFO, msr_platform_info);
+ msr_platform_info = vcpu_get_msr(vcpu, MSR_PLATFORM_INFO);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO,
+ msr_platform_info | MSR_PLATFORM_INFO_MAX_TURBO_RATIO);
+ test_msr_platform_info_enabled(vcpu);
+ test_msr_platform_info_disabled(vcpu);
+ vcpu_set_msr(vcpu, MSR_PLATFORM_INFO, msr_platform_info);
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
new file mode 100644
index 000000000000..29609b52f8fa
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_counters_test.c
@@ -0,0 +1,620 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023, Tencent, Inc.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <x86intrin.h>
+
+#include "pmu.h"
+#include "processor.h"
+
+/* Number of LOOP instructions for the guest measurement payload. */
+#define NUM_BRANCHES 10
+/*
+ * Number of "extra" instructions that will be counted, i.e. the number of
+ * instructions that are needed to set up the loop and then disabled the
+ * counter. 1 CLFLUSH/CLFLUSHOPT/NOP, 1 MFENCE, 2 MOV, 2 XOR, 1 WRMSR.
+ */
+#define NUM_EXTRA_INSNS 7
+#define NUM_INSNS_RETIRED (NUM_BRANCHES + NUM_EXTRA_INSNS)
+
+static uint8_t kvm_pmu_version;
+static bool kvm_has_perf_caps;
+static bool is_forced_emulation_enabled;
+
+static struct kvm_vm *pmu_vm_create_with_one_vcpu(struct kvm_vcpu **vcpu,
+ void *guest_code,
+ uint8_t pmu_version,
+ uint64_t perf_capabilities)
+{
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(vcpu, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(*vcpu);
+
+ sync_global_to_guest(vm, kvm_pmu_version);
+ sync_global_to_guest(vm, is_forced_emulation_enabled);
+
+ /*
+ * Set PERF_CAPABILITIES before PMU version as KVM disallows enabling
+ * features via PERF_CAPABILITIES if the guest doesn't have a vPMU.
+ */
+ if (kvm_has_perf_caps)
+ vcpu_set_msr(*vcpu, MSR_IA32_PERF_CAPABILITIES, perf_capabilities);
+
+ vcpu_set_cpuid_property(*vcpu, X86_PROPERTY_PMU_VERSION, pmu_version);
+ return vm;
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ do {
+ vcpu_run(vcpu);
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_PRINTF:
+ pr_info("%s", uc.buffer);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ } while (uc.cmd != UCALL_DONE);
+}
+
+static uint8_t guest_get_pmu_version(void)
+{
+ /*
+ * Return the effective PMU version, i.e. the minimum between what KVM
+ * supports and what is enumerated to the guest. The host deliberately
+ * advertises a PMU version to the guest beyond what is actually
+ * supported by KVM to verify KVM doesn't freak out and do something
+ * bizarre with an architecturally valid, but unsupported, version.
+ */
+ return min_t(uint8_t, kvm_pmu_version, this_cpu_property(X86_PROPERTY_PMU_VERSION));
+}
+
+/*
+ * If an architectural event is supported and guaranteed to generate at least
+ * one "hit, assert that its count is non-zero. If an event isn't supported or
+ * the test can't guarantee the associated action will occur, then all bets are
+ * off regarding the count, i.e. no checks can be done.
+ *
+ * Sanity check that in all cases, the event doesn't count when it's disabled,
+ * and that KVM correctly emulates the write of an arbitrary value.
+ */
+static void guest_assert_event_count(uint8_t idx,
+ struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr)
+{
+ uint64_t count;
+
+ count = _rdpmc(pmc);
+ if (!this_pmu_has(event))
+ goto sanity_checks;
+
+ switch (idx) {
+ case INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_INSNS_RETIRED);
+ break;
+ case INTEL_ARCH_BRANCHES_RETIRED_INDEX:
+ GUEST_ASSERT_EQ(count, NUM_BRANCHES);
+ break;
+ case INTEL_ARCH_LLC_REFERENCES_INDEX:
+ case INTEL_ARCH_LLC_MISSES_INDEX:
+ if (!this_cpu_has(X86_FEATURE_CLFLUSHOPT) &&
+ !this_cpu_has(X86_FEATURE_CLFLUSH))
+ break;
+ fallthrough;
+ case INTEL_ARCH_CPU_CYCLES_INDEX:
+ case INTEL_ARCH_REFERENCE_CYCLES_INDEX:
+ GUEST_ASSERT_NE(count, 0);
+ break;
+ case INTEL_ARCH_TOPDOWN_SLOTS_INDEX:
+ GUEST_ASSERT(count >= NUM_INSNS_RETIRED);
+ break;
+ default:
+ break;
+ }
+
+sanity_checks:
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ GUEST_ASSERT_EQ(_rdpmc(pmc), count);
+
+ wrmsr(pmc_msr, 0xdead);
+ GUEST_ASSERT_EQ(_rdpmc(pmc), 0xdead);
+}
+
+/*
+ * Enable and disable the PMC in a monolithic asm blob to ensure that the
+ * compiler can't insert _any_ code into the measured sequence. Note, ECX
+ * doesn't need to be clobbered as the input value, @pmc_msr, is restored
+ * before the end of the sequence.
+ *
+ * If CLFUSH{,OPT} is supported, flush the cacheline containing (at least) the
+ * start of the loop to force LLC references and misses, i.e. to allow testing
+ * that those events actually count.
+ *
+ * If forced emulation is enabled (and specified), force emulation on a subset
+ * of the measured code to verify that KVM correctly emulates instructions and
+ * branches retired events in conjunction with hardware also counting said
+ * events.
+ */
+#define GUEST_MEASURE_EVENT(_msr, _value, clflush, FEP) \
+do { \
+ __asm__ __volatile__("wrmsr\n\t" \
+ clflush "\n\t" \
+ "mfence\n\t" \
+ "1: mov $" __stringify(NUM_BRANCHES) ", %%ecx\n\t" \
+ FEP "loop .\n\t" \
+ FEP "mov %%edi, %%ecx\n\t" \
+ FEP "xor %%eax, %%eax\n\t" \
+ FEP "xor %%edx, %%edx\n\t" \
+ "wrmsr\n\t" \
+ :: "a"((uint32_t)_value), "d"(_value >> 32), \
+ "c"(_msr), "D"(_msr) \
+ ); \
+} while (0)
+
+#define GUEST_TEST_EVENT(_idx, _event, _pmc, _pmc_msr, _ctrl_msr, _value, FEP) \
+do { \
+ wrmsr(pmc_msr, 0); \
+ \
+ if (this_cpu_has(X86_FEATURE_CLFLUSHOPT)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflushopt 1f", FEP); \
+ else if (this_cpu_has(X86_FEATURE_CLFLUSH)) \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "clflush 1f", FEP); \
+ else \
+ GUEST_MEASURE_EVENT(_ctrl_msr, _value, "nop", FEP); \
+ \
+ guest_assert_event_count(_idx, _event, _pmc, _pmc_msr); \
+} while (0)
+
+static void __guest_test_arch_event(uint8_t idx, struct kvm_x86_pmu_feature event,
+ uint32_t pmc, uint32_t pmc_msr,
+ uint32_t ctrl_msr, uint64_t ctrl_msr_value)
+{
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, "");
+
+ if (is_forced_emulation_enabled)
+ GUEST_TEST_EVENT(idx, event, pmc, pmc_msr, ctrl_msr, ctrl_msr_value, KVM_FEP);
+}
+
+#define X86_PMU_FEATURE_NULL \
+({ \
+ struct kvm_x86_pmu_feature feature = {}; \
+ \
+ feature; \
+})
+
+static bool pmu_is_null_feature(struct kvm_x86_pmu_feature event)
+{
+ return !(*(u64 *)&event);
+}
+
+static void guest_test_arch_event(uint8_t idx)
+{
+ const struct {
+ struct kvm_x86_pmu_feature gp_event;
+ struct kvm_x86_pmu_feature fixed_event;
+ } intel_event_to_feature[] = {
+ [INTEL_ARCH_CPU_CYCLES_INDEX] = { X86_PMU_FEATURE_CPU_CYCLES, X86_PMU_FEATURE_CPU_CYCLES_FIXED },
+ [INTEL_ARCH_INSTRUCTIONS_RETIRED_INDEX] = { X86_PMU_FEATURE_INSNS_RETIRED, X86_PMU_FEATURE_INSNS_RETIRED_FIXED },
+ /*
+ * Note, the fixed counter for reference cycles is NOT the same
+ * as the general purpose architectural event. The fixed counter
+ * explicitly counts at the same frequency as the TSC, whereas
+ * the GP event counts at a fixed, but uarch specific, frequency.
+ * Bundle them here for simplicity.
+ */
+ [INTEL_ARCH_REFERENCE_CYCLES_INDEX] = { X86_PMU_FEATURE_REFERENCE_CYCLES, X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED },
+ [INTEL_ARCH_LLC_REFERENCES_INDEX] = { X86_PMU_FEATURE_LLC_REFERENCES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_LLC_MISSES_INDEX] = { X86_PMU_FEATURE_LLC_MISSES, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_RETIRED_INDEX] = { X86_PMU_FEATURE_BRANCH_INSNS_RETIRED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_BRANCHES_MISPREDICTED_INDEX] = { X86_PMU_FEATURE_BRANCHES_MISPREDICTED, X86_PMU_FEATURE_NULL },
+ [INTEL_ARCH_TOPDOWN_SLOTS_INDEX] = { X86_PMU_FEATURE_TOPDOWN_SLOTS, X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED },
+ };
+
+ uint32_t nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint32_t pmu_version = guest_get_pmu_version();
+ /* PERF_GLOBAL_CTRL exists only for Architectural PMU Version 2+. */
+ bool guest_has_perf_global_ctrl = pmu_version >= 2;
+ struct kvm_x86_pmu_feature gp_event, fixed_event;
+ uint32_t base_pmc_msr;
+ unsigned int i;
+
+ /* The host side shouldn't invoke this without a guest PMU. */
+ GUEST_ASSERT(pmu_version);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_pmc_msr = MSR_IA32_PMC0;
+ else
+ base_pmc_msr = MSR_IA32_PERFCTR0;
+
+ gp_event = intel_event_to_feature[idx].gp_event;
+ GUEST_ASSERT_EQ(idx, gp_event.f.bit);
+
+ GUEST_ASSERT(nr_gp_counters);
+
+ for (i = 0; i < nr_gp_counters; i++) {
+ uint64_t eventsel = ARCH_PERFMON_EVENTSEL_OS |
+ ARCH_PERFMON_EVENTSEL_ENABLE |
+ intel_pmu_arch_events[idx];
+
+ wrmsr(MSR_P6_EVNTSEL0 + i, 0);
+ if (guest_has_perf_global_ctrl)
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, BIT_ULL(i));
+
+ __guest_test_arch_event(idx, gp_event, i, base_pmc_msr + i,
+ MSR_P6_EVNTSEL0 + i, eventsel);
+ }
+
+ if (!guest_has_perf_global_ctrl)
+ return;
+
+ fixed_event = intel_event_to_feature[idx].fixed_event;
+ if (pmu_is_null_feature(fixed_event) || !this_pmu_has(fixed_event))
+ return;
+
+ i = fixed_event.f.bit;
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+
+ __guest_test_arch_event(idx, fixed_event, i | INTEL_RDPMC_FIXED,
+ MSR_CORE_PERF_FIXED_CTR0 + i,
+ MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+}
+
+static void guest_test_arch_events(void)
+{
+ uint8_t i;
+
+ for (i = 0; i < NR_INTEL_ARCH_EVENTS; i++)
+ guest_test_arch_event(i);
+
+ GUEST_DONE();
+}
+
+static void test_arch_events(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t length, uint8_t unavailable_mask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ /* Testing arch events requires a vPMU (there are no negative tests). */
+ if (!pmu_version)
+ return;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_arch_events,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH,
+ length);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_EVENTS_MASK,
+ unavailable_mask);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * Limit testing to MSRs that are actually defined by Intel (in the SDM). MSRs
+ * that aren't defined counter MSRs *probably* don't exist, but there's no
+ * guarantee that currently undefined MSR indices won't be used for something
+ * other than PMCs in the future.
+ */
+#define MAX_NR_GP_COUNTERS 8
+#define MAX_NR_FIXED_COUNTERS 3
+
+#define GUEST_ASSERT_PMC_MSR_ACCESS(insn, msr, expect_gp, vector) \
+__GUEST_ASSERT(expect_gp ? vector == GP_VECTOR : !vector, \
+ "Expected %s on " #insn "(0x%x), got vector %u", \
+ expect_gp ? "#GP" : "no fault", msr, vector) \
+
+#define GUEST_ASSERT_PMC_VALUE(insn, msr, val, expected) \
+ __GUEST_ASSERT(val == expected_val, \
+ "Expected " #insn "(0x%x) to yield 0x%lx, got 0x%lx", \
+ msr, expected_val, val);
+
+static void guest_test_rdpmc(uint32_t rdpmc_idx, bool expect_success,
+ uint64_t expected_val)
+{
+ uint8_t vector;
+ uint64_t val;
+
+ vector = rdpmc_safe(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+
+ if (!is_forced_emulation_enabled)
+ return;
+
+ vector = rdpmc_safe_fep(rdpmc_idx, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDPMC, rdpmc_idx, !expect_success, vector);
+ if (expect_success)
+ GUEST_ASSERT_PMC_VALUE(RDPMC, rdpmc_idx, val, expected_val);
+}
+
+static void guest_rd_wr_counters(uint32_t base_msr, uint8_t nr_possible_counters,
+ uint8_t nr_counters, uint32_t or_mask)
+{
+ const bool pmu_has_fast_mode = !guest_get_pmu_version();
+ uint8_t i;
+
+ for (i = 0; i < nr_possible_counters; i++) {
+ /*
+ * TODO: Test a value that validates full-width writes and the
+ * width of the counters.
+ */
+ const uint64_t test_val = 0xffff;
+ const uint32_t msr = base_msr + i;
+
+ /*
+ * Fixed counters are supported if the counter is less than the
+ * number of enumerated contiguous counters *or* the counter is
+ * explicitly enumerated in the supported counters mask.
+ */
+ const bool expect_success = i < nr_counters || (or_mask & BIT(i));
+
+ /*
+ * KVM drops writes to MSR_P6_PERFCTR[0|1] if the counters are
+ * unsupported, i.e. doesn't #GP and reads back '0'.
+ */
+ const uint64_t expected_val = expect_success ? test_val : 0;
+ const bool expect_gp = !expect_success && msr != MSR_P6_PERFCTR0 &&
+ msr != MSR_P6_PERFCTR1;
+ uint32_t rdpmc_idx;
+ uint8_t vector;
+ uint64_t val;
+
+ vector = wrmsr_safe(msr, test_val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+
+ vector = rdmsr_safe(msr, &val);
+ GUEST_ASSERT_PMC_MSR_ACCESS(RDMSR, msr, expect_gp, vector);
+
+ /* On #GP, the result of RDMSR is undefined. */
+ if (!expect_gp)
+ GUEST_ASSERT_PMC_VALUE(RDMSR, msr, val, expected_val);
+
+ /*
+ * Redo the read tests with RDPMC, which has different indexing
+ * semantics and additional capabilities.
+ */
+ rdpmc_idx = i;
+ if (base_msr == MSR_CORE_PERF_FIXED_CTR0)
+ rdpmc_idx |= INTEL_RDPMC_FIXED;
+
+ guest_test_rdpmc(rdpmc_idx, expect_success, expected_val);
+
+ /*
+ * KVM doesn't support non-architectural PMUs, i.e. it should
+ * impossible to have fast mode RDPMC. Verify that attempting
+ * to use fast RDPMC always #GPs.
+ */
+ GUEST_ASSERT(!expect_success || !pmu_has_fast_mode);
+ rdpmc_idx |= INTEL_RDPMC_FAST;
+ guest_test_rdpmc(rdpmc_idx, false, -1ull);
+
+ vector = wrmsr_safe(msr, 0);
+ GUEST_ASSERT_PMC_MSR_ACCESS(WRMSR, msr, expect_gp, vector);
+ }
+}
+
+static void guest_test_gp_counters(void)
+{
+ uint8_t nr_gp_counters = 0;
+ uint32_t base_msr;
+
+ if (guest_get_pmu_version())
+ nr_gp_counters = this_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+
+ if (this_cpu_has(X86_FEATURE_PDCM) &&
+ rdmsr(MSR_IA32_PERF_CAPABILITIES) & PMU_CAP_FW_WRITES)
+ base_msr = MSR_IA32_PMC0;
+ else
+ base_msr = MSR_IA32_PERFCTR0;
+
+ guest_rd_wr_counters(base_msr, MAX_NR_GP_COUNTERS, nr_gp_counters, 0);
+ GUEST_DONE();
+}
+
+static void test_gp_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_gp_counters)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_gp_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_GP_COUNTERS,
+ nr_gp_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void guest_test_fixed_counters(void)
+{
+ uint64_t supported_bitmask = 0;
+ uint8_t nr_fixed_counters = 0;
+ uint8_t i;
+
+ /* Fixed counters require Architectural vPMU Version 2+. */
+ if (guest_get_pmu_version() >= 2)
+ nr_fixed_counters = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+
+ /*
+ * The supported bitmask for fixed counters was introduced in PMU
+ * version 5.
+ */
+ if (guest_get_pmu_version() >= 5)
+ supported_bitmask = this_cpu_property(X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK);
+
+ guest_rd_wr_counters(MSR_CORE_PERF_FIXED_CTR0, MAX_NR_FIXED_COUNTERS,
+ nr_fixed_counters, supported_bitmask);
+
+ for (i = 0; i < MAX_NR_FIXED_COUNTERS; i++) {
+ uint8_t vector;
+ uint64_t val;
+
+ if (i >= nr_fixed_counters && !(supported_bitmask & BIT_ULL(i))) {
+ vector = wrmsr_safe(MSR_CORE_PERF_FIXED_CTR_CTRL,
+ FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in FIXED_CTR_CTRL", i);
+
+ vector = wrmsr_safe(MSR_CORE_PERF_GLOBAL_CTRL,
+ FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for counter %u in PERF_GLOBAL_CTRL", i);
+ continue;
+ }
+
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + i, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(i, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(i));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ val = rdmsr(MSR_CORE_PERF_FIXED_CTR0 + i);
+
+ GUEST_ASSERT_NE(val, 0);
+ }
+ GUEST_DONE();
+}
+
+static void test_fixed_counters(uint8_t pmu_version, uint64_t perf_capabilities,
+ uint8_t nr_fixed_counters,
+ uint32_t supported_bitmask)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ vm = pmu_vm_create_with_one_vcpu(&vcpu, guest_test_fixed_counters,
+ pmu_version, perf_capabilities);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK,
+ supported_bitmask);
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_PMU_NR_FIXED_COUNTERS,
+ nr_fixed_counters);
+
+ run_vcpu(vcpu);
+
+ kvm_vm_free(vm);
+}
+
+static void test_intel_counters(void)
+{
+ uint8_t nr_arch_events = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ uint8_t nr_gp_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS);
+ uint8_t pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ unsigned int i;
+ uint8_t v, j;
+ uint32_t k;
+
+ const uint64_t perf_caps[] = {
+ 0,
+ PMU_CAP_FW_WRITES,
+ };
+
+ /*
+ * Test up to PMU v5, which is the current maximum version defined by
+ * Intel, i.e. is the last version that is guaranteed to be backwards
+ * compatible with KVM's existing behavior.
+ */
+ uint8_t max_pmu_version = max_t(typeof(pmu_version), pmu_version, 5);
+
+ /*
+ * Detect the existence of events that aren't supported by selftests.
+ * This will (obviously) fail any time the kernel adds support for a
+ * new event, but it's worth paying that price to keep the test fresh.
+ */
+ TEST_ASSERT(nr_arch_events <= NR_INTEL_ARCH_EVENTS,
+ "New architectural event(s) detected; please update this test (length = %u, mask = %x)",
+ nr_arch_events, kvm_cpu_property(X86_PROPERTY_PMU_EVENTS_MASK));
+
+ /*
+ * Force iterating over known arch events regardless of whether or not
+ * KVM/hardware supports a given event.
+ */
+ nr_arch_events = max_t(typeof(nr_arch_events), nr_arch_events, NR_INTEL_ARCH_EVENTS);
+
+ for (v = 0; v <= max_pmu_version; v++) {
+ for (i = 0; i < ARRAY_SIZE(perf_caps); i++) {
+ if (!kvm_has_perf_caps && perf_caps[i])
+ continue;
+
+ pr_info("Testing arch events, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ /*
+ * To keep the total runtime reasonable, test every
+ * possible non-zero, non-reserved bitmap combination
+ * only with the native PMU version and the full bit
+ * vector length.
+ */
+ if (v == pmu_version) {
+ for (k = 1; k < (BIT(nr_arch_events) - 1); k++)
+ test_arch_events(v, perf_caps[i], nr_arch_events, k);
+ }
+ /*
+ * Test single bits for all PMU version and lengths up
+ * the number of events +1 (to verify KVM doesn't do
+ * weird things if the guest length is greater than the
+ * host length). Explicitly test a mask of '0' and all
+ * ones i.e. all events being available and unavailable.
+ */
+ for (j = 0; j <= nr_arch_events + 1; j++) {
+ test_arch_events(v, perf_caps[i], j, 0);
+ test_arch_events(v, perf_caps[i], j, 0xff);
+
+ for (k = 0; k < nr_arch_events; k++)
+ test_arch_events(v, perf_caps[i], j, BIT(k));
+ }
+
+ pr_info("Testing GP counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_gp_counters; j++)
+ test_gp_counters(v, perf_caps[i], j);
+
+ pr_info("Testing fixed counters, PMU version %u, perf_caps = %lx\n",
+ v, perf_caps[i]);
+ for (j = 0; j <= nr_fixed_counters; j++) {
+ for (k = 0; k <= (BIT(nr_fixed_counters) - 1); k++)
+ test_fixed_counters(v, perf_caps[i], j, k);
+ }
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ kvm_pmu_version = kvm_cpu_property(X86_PROPERTY_PMU_VERSION);
+ kvm_has_perf_caps = kvm_cpu_has(X86_FEATURE_PDCM);
+ is_forced_emulation_enabled = kvm_is_forced_emulation_enabled();
+
+ test_intel_counters();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
new file mode 100644
index 000000000000..3c85d1ae9893
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/pmu_event_filter_test.c
@@ -0,0 +1,910 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for x86 KVM_SET_PMU_EVENT_FILTER.
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Verifies the expected behavior of allow lists and deny lists for
+ * virtual PMU events.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "kvm_util.h"
+#include "pmu.h"
+#include "processor.h"
+#include "test_util.h"
+
+#define NUM_BRANCHES 42
+#define MAX_TEST_EVENTS 10
+
+#define PMU_EVENT_FILTER_INVALID_ACTION (KVM_PMU_EVENT_DENY + 1)
+#define PMU_EVENT_FILTER_INVALID_FLAGS (KVM_PMU_EVENT_FLAGS_VALID_MASK << 1)
+#define PMU_EVENT_FILTER_INVALID_NEVENTS (KVM_PMU_EVENT_FILTER_MAX_EVENTS + 1)
+
+struct __kvm_pmu_event_filter {
+ __u32 action;
+ __u32 nevents;
+ __u32 fixed_counter_bitmap;
+ __u32 flags;
+ __u32 pad[4];
+ __u64 events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+};
+
+/*
+ * This event list comprises Intel's known architectural events, plus AMD's
+ * "retired branch instructions" for Zen1-Zen3 (and* possibly other AMD CPUs).
+ * Note, AMD and Intel use the same encoding for instructions retired.
+ */
+kvm_static_assert(INTEL_ARCH_INSTRUCTIONS_RETIRED == AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+static const struct __kvm_pmu_event_filter base_event_filter = {
+ .nevents = ARRAY_SIZE(base_event_filter.events),
+ .events = {
+ INTEL_ARCH_CPU_CYCLES,
+ INTEL_ARCH_INSTRUCTIONS_RETIRED,
+ INTEL_ARCH_REFERENCE_CYCLES,
+ INTEL_ARCH_LLC_REFERENCES,
+ INTEL_ARCH_LLC_MISSES,
+ INTEL_ARCH_BRANCHES_RETIRED,
+ INTEL_ARCH_BRANCHES_MISPREDICTED,
+ INTEL_ARCH_TOPDOWN_SLOTS,
+ AMD_ZEN_BRANCHES_RETIRED,
+ },
+};
+
+struct {
+ uint64_t loads;
+ uint64_t stores;
+ uint64_t loads_stores;
+ uint64_t branches_retired;
+ uint64_t instructions_retired;
+} pmc_results;
+
+/*
+ * If we encounter a #GP during the guest PMU sanity check, then the guest
+ * PMU is not functional. Inform the hypervisor via GUEST_SYNC(0).
+ */
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(-EFAULT);
+}
+
+/*
+ * Check that we can write a new value to the given MSR and read it back.
+ * The caller should provide a non-empty set of bits that are safe to flip.
+ *
+ * Return on success. GUEST_SYNC(0) on error.
+ */
+static void check_msr(uint32_t msr, uint64_t bits_to_flip)
+{
+ uint64_t v = rdmsr(msr) ^ bits_to_flip;
+
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+
+ v ^= bits_to_flip;
+ wrmsr(msr, v);
+ if (rdmsr(msr) != v)
+ GUEST_SYNC(-EIO);
+}
+
+static void run_and_measure_loop(uint32_t msr_base)
+{
+ const uint64_t branches_retired = rdmsr(msr_base + 0);
+ const uint64_t insn_retired = rdmsr(msr_base + 1);
+
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+
+ pmc_results.branches_retired = rdmsr(msr_base + 0) - branches_retired;
+ pmc_results.instructions_retired = rdmsr(msr_base + 1) - insn_retired;
+}
+
+static void intel_guest_code(void)
+{
+ check_msr(MSR_CORE_PERF_GLOBAL_CTRL, 1);
+ check_msr(MSR_P6_EVNTSEL0, 0xffff);
+ check_msr(MSR_IA32_PMC0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_P6_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_BRANCHES_RETIRED);
+ wrmsr(MSR_P6_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x3);
+
+ run_and_measure_loop(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * To avoid needing a check for CPUID.80000001:ECX.PerfCtrExtCore[bit 23],
+ * this code uses the always-available, legacy K7 PMU MSRs, which alias to
+ * the first four of the six extended core PMU MSRs.
+ */
+static void amd_guest_code(void)
+{
+ check_msr(MSR_K7_EVNTSEL0, 0xffff);
+ check_msr(MSR_K7_PERFCTR0, 0xffff);
+ GUEST_SYNC(0);
+
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_BRANCHES_RETIRED);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | AMD_ZEN_INSTRUCTIONS_RETIRED);
+
+ run_and_measure_loop(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+/*
+ * Run the VM to the next GUEST_SYNC(value), and return the value passed
+ * to the sync. Any other exit from the guest is fatal.
+ */
+static uint64_t run_vcpu_to_sync(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ get_ucall(vcpu, &uc);
+ TEST_ASSERT(uc.cmd == UCALL_SYNC,
+ "Received ucall other than UCALL_SYNC: %lu", uc.cmd);
+ return uc.args[1];
+}
+
+static void run_vcpu_and_sync_pmc_results(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ memset(&pmc_results, 0, sizeof(pmc_results));
+ sync_global_to_guest(vcpu->vm, pmc_results);
+
+ r = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(!r, "Unexpected sync value: 0x%lx", r);
+
+ sync_global_from_guest(vcpu->vm, pmc_results);
+}
+
+/*
+ * In a nested environment or if the vPMU is disabled, the guest PMU
+ * might not work as architected (accessing the PMU MSRs may raise
+ * #GP, or writes could simply be discarded). In those situations,
+ * there is no point in running these tests. The guest code will perform
+ * a sanity check and then GUEST_SYNC(success). In the case of failure,
+ * the behavior of the guest on resumption is undefined.
+ */
+static bool sanity_check_pmu(struct kvm_vcpu *vcpu)
+{
+ uint64_t r;
+
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, guest_gp_handler);
+ r = run_vcpu_to_sync(vcpu);
+ vm_install_exception_handler(vcpu->vm, GP_VECTOR, NULL);
+
+ return !r;
+}
+
+/*
+ * Remove the first occurrence of 'event' (if any) from the filter's
+ * event list.
+ */
+static void remove_event(struct __kvm_pmu_event_filter *f, uint64_t event)
+{
+ bool found = false;
+ int i;
+
+ for (i = 0; i < f->nevents; i++) {
+ if (found)
+ f->events[i - 1] = f->events[i];
+ else
+ found = f->events[i] == event;
+ }
+ if (found)
+ f->nevents--;
+}
+
+#define ASSERT_PMC_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ if (br && br != NUM_BRANCHES) \
+ pr_info("%s: Branch instructions retired = %lu (expected %u)\n", \
+ __func__, br, NUM_BRANCHES); \
+ TEST_ASSERT(br, "%s: Branch instructions retired = %lu (expected > 0)", \
+ __func__, br); \
+ TEST_ASSERT(ir, "%s: Instructions retired = %lu (expected > 0)", \
+ __func__, ir); \
+} while (0)
+
+#define ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS() \
+do { \
+ uint64_t br = pmc_results.branches_retired; \
+ uint64_t ir = pmc_results.instructions_retired; \
+ \
+ TEST_ASSERT(!br, "%s: Branch instructions retired = %lu (expected 0)", \
+ __func__, br); \
+ TEST_ASSERT(!ir, "%s: Instructions retired = %lu (expected 0)", \
+ __func__, ir); \
+} while (0)
+
+static void test_without_filter(struct kvm_vcpu *vcpu)
+{
+ run_vcpu_and_sync_pmc_results(vcpu);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_with_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+ run_vcpu_and_sync_pmc_results(vcpu);
+}
+
+static void test_amd_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = KVM_PMU_EVENT_DENY,
+ .nevents = 1,
+ .events = {
+ RAW_EVENT(0x1C2, 0),
+ },
+ };
+
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+static void test_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_deny_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_DENY;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_COUNTING_INSTRUCTIONS();
+}
+
+static void test_not_member_allow_list(struct kvm_vcpu *vcpu)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = KVM_PMU_EVENT_ALLOW;
+
+ remove_event(&f, INTEL_ARCH_INSTRUCTIONS_RETIRED);
+ remove_event(&f, INTEL_ARCH_BRANCHES_RETIRED);
+ remove_event(&f, AMD_ZEN_BRANCHES_RETIRED);
+ test_with_filter(vcpu, &f);
+
+ ASSERT_PMC_NOT_COUNTING_INSTRUCTIONS();
+}
+
+/*
+ * Verify that setting KVM_PMU_CAP_DISABLE prevents the use of the PMU.
+ *
+ * Note that KVM_CAP_PMU_CAPABILITY must be invoked prior to creating VCPUs.
+ */
+static void test_pmu_config_disable(void (*guest_code)(void))
+{
+ struct kvm_vcpu *vcpu;
+ int r;
+ struct kvm_vm *vm;
+
+ r = kvm_check_cap(KVM_CAP_PMU_CAPABILITY);
+ if (!(r & KVM_PMU_CAP_DISABLE))
+ return;
+
+ vm = vm_create(1);
+
+ vm_enable_cap(vm, KVM_CAP_PMU_CAPABILITY, KVM_PMU_CAP_DISABLE);
+
+ vcpu = vm_vcpu_add(vm, 0, guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ TEST_ASSERT(!sanity_check_pmu(vcpu),
+ "Guest should not be able to use disabled PMU.");
+
+ kvm_vm_free(vm);
+}
+
+/*
+ * On Intel, check for a non-zero PMU version, at least one general-purpose
+ * counter per logical processor, and support for counting the number of branch
+ * instructions retired.
+ */
+static bool use_intel_pmu(void)
+{
+ return host_cpu_is_intel &&
+ kvm_cpu_property(X86_PROPERTY_PMU_VERSION) &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) &&
+ kvm_pmu_has(X86_PMU_FEATURE_BRANCH_INSNS_RETIRED);
+}
+
+static bool is_zen1(uint32_t family, uint32_t model)
+{
+ return family == 0x17 && model <= 0x0f;
+}
+
+static bool is_zen2(uint32_t family, uint32_t model)
+{
+ return family == 0x17 && model >= 0x30 && model <= 0x3f;
+}
+
+static bool is_zen3(uint32_t family, uint32_t model)
+{
+ return family == 0x19 && model <= 0x0f;
+}
+
+/*
+ * Determining AMD support for a PMU event requires consulting the AMD
+ * PPR for the CPU or reference material derived therefrom. The AMD
+ * test code herein has been verified to work on Zen1, Zen2, and Zen3.
+ *
+ * Feel free to add more AMD CPUs that are documented to support event
+ * select 0xc2 umask 0 as "retired branch instructions."
+ */
+static bool use_amd_pmu(void)
+{
+ uint32_t family = kvm_cpu_family();
+ uint32_t model = kvm_cpu_model();
+
+ return host_cpu_is_amd &&
+ (is_zen1(family, model) ||
+ is_zen2(family, model) ||
+ is_zen3(family, model));
+}
+
+/*
+ * "MEM_INST_RETIRED.ALL_LOADS", "MEM_INST_RETIRED.ALL_STORES", and
+ * "MEM_INST_RETIRED.ANY" from https://perfmon-events.intel.com/
+ * supported on Intel Xeon processors:
+ * - Sapphire Rapids, Ice Lake, Cascade Lake, Skylake.
+ */
+#define MEM_INST_RETIRED 0xD0
+#define MEM_INST_RETIRED_LOAD RAW_EVENT(MEM_INST_RETIRED, 0x81)
+#define MEM_INST_RETIRED_STORE RAW_EVENT(MEM_INST_RETIRED, 0x82)
+#define MEM_INST_RETIRED_LOAD_STORE RAW_EVENT(MEM_INST_RETIRED, 0x83)
+
+static bool supports_event_mem_inst_retired(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ cpuid(1, &eax, &ebx, &ecx, &edx);
+ if (x86_family(eax) == 0x6) {
+ switch (x86_model(eax)) {
+ /* Sapphire Rapids */
+ case 0x8F:
+ /* Ice Lake */
+ case 0x6A:
+ /* Skylake */
+ /* Cascade Lake */
+ case 0x55:
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/*
+ * "LS Dispatch", from Processor Programming Reference
+ * (PPR) for AMD Family 17h Model 01h, Revision B1 Processors,
+ * Preliminary Processor Programming Reference (PPR) for AMD Family
+ * 17h Model 31h, Revision B0 Processors, and Preliminary Processor
+ * Programming Reference (PPR) for AMD Family 19h Model 01h, Revision
+ * B1 Processors Volume 1 of 2.
+ */
+#define LS_DISPATCH 0x29
+#define LS_DISPATCH_LOAD RAW_EVENT(LS_DISPATCH, BIT(0))
+#define LS_DISPATCH_STORE RAW_EVENT(LS_DISPATCH, BIT(1))
+#define LS_DISPATCH_LOAD_STORE RAW_EVENT(LS_DISPATCH, BIT(2))
+
+#define INCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, false)
+#define EXCLUDE_MASKED_ENTRY(event_select, mask, match) \
+ KVM_PMU_ENCODE_MASKED_ENTRY(event_select, mask, match, true)
+
+static void masked_events_guest_test(uint32_t msr_base)
+{
+ /*
+ * The actual value of the counters don't determine the outcome of
+ * the test. Only that they are zero or non-zero.
+ */
+ const uint64_t loads = rdmsr(msr_base + 0);
+ const uint64_t stores = rdmsr(msr_base + 1);
+ const uint64_t loads_stores = rdmsr(msr_base + 2);
+ int val;
+
+
+ __asm__ __volatile__("movl $0, %[v];"
+ "movl %[v], %%eax;"
+ "incl %[v];"
+ : [v]"+m"(val) :: "eax");
+
+ pmc_results.loads = rdmsr(msr_base + 0) - loads;
+ pmc_results.stores = rdmsr(msr_base + 1) - stores;
+ pmc_results.loads_stores = rdmsr(msr_base + 2) - loads_stores;
+}
+
+static void intel_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ wrmsr(MSR_P6_EVNTSEL0 + 0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD);
+ wrmsr(MSR_P6_EVNTSEL0 + 1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_STORE);
+ wrmsr(MSR_P6_EVNTSEL0 + 2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | MEM_INST_RETIRED_LOAD_STORE);
+
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0x7);
+
+ masked_events_guest_test(MSR_IA32_PMC0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void amd_masked_events_guest_code(void)
+{
+ for (;;) {
+ wrmsr(MSR_K7_EVNTSEL0, 0);
+ wrmsr(MSR_K7_EVNTSEL1, 0);
+ wrmsr(MSR_K7_EVNTSEL2, 0);
+
+ wrmsr(MSR_K7_EVNTSEL0, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD);
+ wrmsr(MSR_K7_EVNTSEL1, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_STORE);
+ wrmsr(MSR_K7_EVNTSEL2, ARCH_PERFMON_EVENTSEL_ENABLE |
+ ARCH_PERFMON_EVENTSEL_OS | LS_DISPATCH_LOAD_STORE);
+
+ masked_events_guest_test(MSR_K7_PERFCTR0);
+ GUEST_SYNC(0);
+ }
+}
+
+static void run_masked_events_test(struct kvm_vcpu *vcpu,
+ const uint64_t masked_events[],
+ const int nmasked_events)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = nmasked_events,
+ .action = KVM_PMU_EVENT_ALLOW,
+ .flags = KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ };
+
+ memcpy(f.events, masked_events, sizeof(uint64_t) * nmasked_events);
+ test_with_filter(vcpu, &f);
+}
+
+#define ALLOW_LOADS BIT(0)
+#define ALLOW_STORES BIT(1)
+#define ALLOW_LOADS_STORES BIT(2)
+
+struct masked_events_test {
+ uint64_t intel_events[MAX_TEST_EVENTS];
+ uint64_t intel_event_end;
+ uint64_t amd_events[MAX_TEST_EVENTS];
+ uint64_t amd_event_end;
+ const char *msg;
+ uint32_t flags;
+};
+
+/*
+ * These are the test cases for the masked events tests.
+ *
+ * For each test, the guest enables 3 PMU counters (loads, stores,
+ * loads + stores). The filter is then set in KVM with the masked events
+ * provided. The test then verifies that the counters agree with which
+ * ones should be counting and which ones should be filtered.
+ */
+const struct masked_events_test test_cases[] = {
+ {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x81),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow loads.",
+ .flags = ALLOW_LOADS,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow stores.",
+ .flags = ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(2)),
+ },
+ .msg = "Only allow loads + stores.",
+ .flags = ALLOW_LOADS_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x83),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, ~(BIT(0) | BIT(1)), 0),
+ },
+ .msg = "Only allow loads and stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES,
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ EXCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFF, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(1)),
+ },
+ .msg = "Only allow loads and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0xFE, 0x82),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ EXCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xFF, BIT(0)),
+ },
+ .msg = "Only allow stores and loads + stores.",
+ .flags = ALLOW_STORES | ALLOW_LOADS_STORES
+ }, {
+ .intel_events = {
+ INCLUDE_MASKED_ENTRY(MEM_INST_RETIRED, 0x7C, 0),
+ },
+ .amd_events = {
+ INCLUDE_MASKED_ENTRY(LS_DISPATCH, 0xF8, 0),
+ },
+ .msg = "Only allow loads, stores, and loads + stores.",
+ .flags = ALLOW_LOADS | ALLOW_STORES | ALLOW_LOADS_STORES
+ },
+};
+
+static int append_test_events(const struct masked_events_test *test,
+ uint64_t *events, int nevents)
+{
+ const uint64_t *evts;
+ int i;
+
+ evts = use_intel_pmu() ? test->intel_events : test->amd_events;
+ for (i = 0; i < MAX_TEST_EVENTS; i++) {
+ if (evts[i] == 0)
+ break;
+
+ events[nevents + i] = evts[i];
+ }
+
+ return nevents + i;
+}
+
+static bool bool_eq(bool a, bool b)
+{
+ return a == b;
+}
+
+static void run_masked_events_tests(struct kvm_vcpu *vcpu, uint64_t *events,
+ int nevents)
+{
+ int ntests = ARRAY_SIZE(test_cases);
+ int i, n;
+
+ for (i = 0; i < ntests; i++) {
+ const struct masked_events_test *test = &test_cases[i];
+
+ /* Do any test case events overflow MAX_TEST_EVENTS? */
+ assert(test->intel_event_end == 0);
+ assert(test->amd_event_end == 0);
+
+ n = append_test_events(test, events, nevents);
+
+ run_masked_events_test(vcpu, events, n);
+
+ TEST_ASSERT(bool_eq(pmc_results.loads, test->flags & ALLOW_LOADS) &&
+ bool_eq(pmc_results.stores, test->flags & ALLOW_STORES) &&
+ bool_eq(pmc_results.loads_stores,
+ test->flags & ALLOW_LOADS_STORES),
+ "%s loads: %lu, stores: %lu, loads + stores: %lu",
+ test->msg, pmc_results.loads, pmc_results.stores,
+ pmc_results.loads_stores);
+ }
+}
+
+static void add_dummy_events(uint64_t *events, int nevents)
+{
+ int i;
+
+ for (i = 0; i < nevents; i++) {
+ int event_select = i % 0xFF;
+ bool exclude = ((i % 4) == 0);
+
+ if (event_select == MEM_INST_RETIRED ||
+ event_select == LS_DISPATCH)
+ event_select++;
+
+ events[i] = KVM_PMU_ENCODE_MASKED_ENTRY(event_select, 0,
+ 0, exclude);
+ }
+}
+
+static void test_masked_events(struct kvm_vcpu *vcpu)
+{
+ int nevents = KVM_PMU_EVENT_FILTER_MAX_EVENTS - MAX_TEST_EVENTS;
+ uint64_t events[KVM_PMU_EVENT_FILTER_MAX_EVENTS];
+
+ /* Run the test cases against a sparse PMU event filter. */
+ run_masked_events_tests(vcpu, events, 0);
+
+ /* Run the test cases against a dense PMU event filter. */
+ add_dummy_events(events, KVM_PMU_EVENT_FILTER_MAX_EVENTS);
+ run_masked_events_tests(vcpu, events, nevents);
+}
+
+static int set_pmu_event_filter(struct kvm_vcpu *vcpu,
+ struct __kvm_pmu_event_filter *__f)
+{
+ struct kvm_pmu_event_filter *f = (void *)__f;
+
+ return __vm_ioctl(vcpu->vm, KVM_SET_PMU_EVENT_FILTER, f);
+}
+
+static int set_pmu_single_event_filter(struct kvm_vcpu *vcpu, uint64_t event,
+ uint32_t flags, uint32_t action)
+{
+ struct __kvm_pmu_event_filter f = {
+ .nevents = 1,
+ .flags = flags,
+ .action = action,
+ .events = {
+ event,
+ },
+ };
+
+ return set_pmu_event_filter(vcpu, &f);
+}
+
+static void test_filter_ioctl(struct kvm_vcpu *vcpu)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct __kvm_pmu_event_filter f;
+ uint64_t e = ~0ul;
+ int r;
+
+ /*
+ * Unfortunately having invalid bits set in event data is expected to
+ * pass when flags == 0 (bits other than eventsel+umask).
+ */
+ r = set_pmu_single_event_filter(vcpu, e, 0, KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r != 0, "Invalid PMU Event Filter is expected to fail");
+
+ e = KVM_PMU_ENCODE_MASKED_ENTRY(0xff, 0xff, 0xff, 0xf);
+ r = set_pmu_single_event_filter(vcpu, e,
+ KVM_PMU_EVENT_FLAG_MASKED_EVENTS,
+ KVM_PMU_EVENT_ALLOW);
+ TEST_ASSERT(r == 0, "Valid PMU Event Filter is failing");
+
+ f = base_event_filter;
+ f.action = PMU_EVENT_FILTER_INVALID_ACTION;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid action is expected to fail");
+
+ f = base_event_filter;
+ f.flags = PMU_EVENT_FILTER_INVALID_FLAGS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Set invalid flags is expected to fail");
+
+ f = base_event_filter;
+ f.nevents = PMU_EVENT_FILTER_INVALID_NEVENTS;
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(r, "Exceeding the max number of filter events should fail");
+
+ f = base_event_filter;
+ f.fixed_counter_bitmap = ~GENMASK_ULL(nr_fixed_counters, 0);
+ r = set_pmu_event_filter(vcpu, &f);
+ TEST_ASSERT(!r, "Masking non-existent fixed counters should be allowed");
+}
+
+static void intel_run_fixed_counter_guest_code(uint8_t idx)
+{
+ for (;;) {
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+ wrmsr(MSR_CORE_PERF_FIXED_CTR0 + idx, 0);
+
+ /* Only OS_EN bit is enabled for fixed counter[idx]. */
+ wrmsr(MSR_CORE_PERF_FIXED_CTR_CTRL, FIXED_PMC_CTRL(idx, FIXED_PMC_KERNEL));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, FIXED_PMC_GLOBAL_CTRL_ENABLE(idx));
+ __asm__ __volatile__("loop ." : "+c"((int){NUM_BRANCHES}));
+ wrmsr(MSR_CORE_PERF_GLOBAL_CTRL, 0);
+
+ GUEST_SYNC(rdmsr(MSR_CORE_PERF_FIXED_CTR0 + idx));
+ }
+}
+
+static uint64_t test_with_fixed_counter_filter(struct kvm_vcpu *vcpu,
+ uint32_t action, uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = {
+ .action = action,
+ .fixed_counter_bitmap = bitmap,
+ };
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static uint64_t test_set_gp_and_fixed_event_filter(struct kvm_vcpu *vcpu,
+ uint32_t action,
+ uint32_t bitmap)
+{
+ struct __kvm_pmu_event_filter f = base_event_filter;
+
+ f.action = action;
+ f.fixed_counter_bitmap = bitmap;
+ set_pmu_event_filter(vcpu, &f);
+
+ return run_vcpu_to_sync(vcpu);
+}
+
+static void __test_fixed_counter_bitmap(struct kvm_vcpu *vcpu, uint8_t idx,
+ uint8_t nr_fixed_counters)
+{
+ unsigned int i;
+ uint32_t bitmap;
+ uint64_t count;
+
+ TEST_ASSERT(nr_fixed_counters < sizeof(bitmap) * 8,
+ "Invalid nr_fixed_counters");
+
+ /*
+ * Check the fixed performance counter can count normally when KVM
+ * userspace doesn't set any pmu filter.
+ */
+ count = run_vcpu_to_sync(vcpu);
+ TEST_ASSERT(count, "Unexpected count value: %ld", count);
+
+ for (i = 0; i < BIT(nr_fixed_counters); i++) {
+ bitmap = BIT(i);
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_with_fixed_counter_filter(vcpu, KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+
+ /*
+ * Check that fixed_counter_bitmap has higher priority than
+ * events[] when both are set.
+ */
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_ALLOW,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !!(bitmap & BIT(idx)));
+
+ count = test_set_gp_and_fixed_event_filter(vcpu,
+ KVM_PMU_EVENT_DENY,
+ bitmap);
+ TEST_ASSERT_EQ(!!count, !(bitmap & BIT(idx)));
+ }
+}
+
+static void test_fixed_counter_bitmap(void)
+{
+ uint8_t nr_fixed_counters = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint8_t idx;
+
+ /*
+ * Check that pmu_event_filter works as expected when it's applied to
+ * fixed performance counters.
+ */
+ for (idx = 0; idx < nr_fixed_counters; idx++) {
+ vm = vm_create_with_one_vcpu(&vcpu,
+ intel_run_fixed_counter_guest_code);
+ vcpu_args_set(vcpu, 1, idx);
+ __test_fixed_counter_bitmap(vcpu, idx, nr_fixed_counters);
+ kvm_vm_free(vm);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ void (*guest_code)(void);
+ struct kvm_vcpu *vcpu, *vcpu2 = NULL;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_FILTER));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_PMU_EVENT_MASKED_EVENTS));
+
+ TEST_REQUIRE(use_intel_pmu() || use_amd_pmu());
+ guest_code = use_intel_pmu() ? intel_guest_code : amd_guest_code;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ TEST_REQUIRE(sanity_check_pmu(vcpu));
+
+ if (use_amd_pmu())
+ test_amd_deny_list(vcpu);
+
+ test_without_filter(vcpu);
+ test_member_deny_list(vcpu);
+ test_member_allow_list(vcpu);
+ test_not_member_deny_list(vcpu);
+ test_not_member_allow_list(vcpu);
+
+ if (use_intel_pmu() &&
+ supports_event_mem_inst_retired() &&
+ kvm_cpu_property(X86_PROPERTY_PMU_NR_GP_COUNTERS) >= 3)
+ vcpu2 = vm_vcpu_add(vm, 2, intel_masked_events_guest_code);
+ else if (use_amd_pmu())
+ vcpu2 = vm_vcpu_add(vm, 2, amd_masked_events_guest_code);
+
+ if (vcpu2)
+ test_masked_events(vcpu2);
+ test_filter_ioctl(vcpu);
+
+ kvm_vm_free(vm);
+
+ test_pmu_config_disable(guest_code);
+ test_fixed_counter_bitmap();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
new file mode 100644
index 000000000000..e0f642d2a3c4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_conversions_test.c
@@ -0,0 +1,484 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2022, Google LLC.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <limits.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <linux/kvm_para.h>
+#include <linux/memfd.h>
+#include <linux/sizes.h>
+
+#include <test_util.h>
+#include <kvm_util.h>
+#include <processor.h>
+
+#define BASE_DATA_SLOT 10
+#define BASE_DATA_GPA ((uint64_t)(1ull << 32))
+#define PER_CPU_DATA_SIZE ((uint64_t)(SZ_2M + PAGE_SIZE))
+
+/* Horrific macro so that the line info is captured accurately :-( */
+#define memcmp_g(gpa, pattern, size) \
+do { \
+ uint8_t *mem = (uint8_t *)gpa; \
+ size_t i; \
+ \
+ for (i = 0; i < size; i++) \
+ __GUEST_ASSERT(mem[i] == pattern, \
+ "Guest expected 0x%x at offset %lu (gpa 0x%lx), got 0x%x", \
+ pattern, i, gpa + i, mem[i]); \
+} while (0)
+
+static void memcmp_h(uint8_t *mem, uint64_t gpa, uint8_t pattern, size_t size)
+{
+ size_t i;
+
+ for (i = 0; i < size; i++)
+ TEST_ASSERT(mem[i] == pattern,
+ "Host expected 0x%x at gpa 0x%lx, got 0x%x",
+ pattern, gpa + i, mem[i]);
+}
+
+/*
+ * Run memory conversion tests with explicit conversion:
+ * Execute KVM hypercall to map/unmap gpa range which will cause userspace exit
+ * to back/unback private memory. Subsequent accesses by guest to the gpa range
+ * will not cause exit to userspace.
+ *
+ * Test memory conversion scenarios with following steps:
+ * 1) Access private memory using private access and verify that memory contents
+ * are not visible to userspace.
+ * 2) Convert memory to shared using explicit conversions and ensure that
+ * userspace is able to access the shared regions.
+ * 3) Convert memory back to private using explicit conversions and ensure that
+ * userspace is again not able to access converted private regions.
+ */
+
+#define GUEST_STAGE(o, s) { .offset = o, .size = s }
+
+enum ucall_syncs {
+ SYNC_SHARED,
+ SYNC_PRIVATE,
+};
+
+static void guest_sync_shared(uint64_t gpa, uint64_t size,
+ uint8_t current_pattern, uint8_t new_pattern)
+{
+ GUEST_SYNC5(SYNC_SHARED, gpa, size, current_pattern, new_pattern);
+}
+
+static void guest_sync_private(uint64_t gpa, uint64_t size, uint8_t pattern)
+{
+ GUEST_SYNC4(SYNC_PRIVATE, gpa, size, pattern);
+}
+
+/* Arbitrary values, KVM doesn't care about the attribute flags. */
+#define MAP_GPA_SET_ATTRIBUTES BIT(0)
+#define MAP_GPA_SHARED BIT(1)
+#define MAP_GPA_DO_FALLOCATE BIT(2)
+
+static void guest_map_mem(uint64_t gpa, uint64_t size, bool map_shared,
+ bool do_fallocate)
+{
+ uint64_t flags = MAP_GPA_SET_ATTRIBUTES;
+
+ if (map_shared)
+ flags |= MAP_GPA_SHARED;
+ if (do_fallocate)
+ flags |= MAP_GPA_DO_FALLOCATE;
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+static void guest_map_shared(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, true, do_fallocate);
+}
+
+static void guest_map_private(uint64_t gpa, uint64_t size, bool do_fallocate)
+{
+ guest_map_mem(gpa, size, false, do_fallocate);
+}
+
+struct {
+ uint64_t offset;
+ uint64_t size;
+} static const test_ranges[] = {
+ GUEST_STAGE(0, PAGE_SIZE),
+ GUEST_STAGE(0, SZ_2M),
+ GUEST_STAGE(PAGE_SIZE, PAGE_SIZE),
+ GUEST_STAGE(PAGE_SIZE, SZ_2M),
+ GUEST_STAGE(SZ_2M, PAGE_SIZE),
+};
+
+static void guest_test_explicit_conversion(uint64_t base_gpa, bool do_fallocate)
+{
+ const uint8_t def_p = 0xaa;
+ const uint8_t init_p = 0xcc;
+ uint64_t j;
+ int i;
+
+ /* Memory should be shared by default. */
+ memset((void *)base_gpa, def_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, def_p, PER_CPU_DATA_SIZE);
+ guest_sync_shared(base_gpa, PER_CPU_DATA_SIZE, def_p, init_p);
+
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+ uint8_t p1 = 0x11;
+ uint8_t p2 = 0x22;
+ uint8_t p3 = 0x33;
+ uint8_t p4 = 0x44;
+
+ /*
+ * Set the test region to pattern one to differentiate it from
+ * the data range as a whole (contains the initial pattern).
+ */
+ memset((void *)gpa, p1, size);
+
+ /*
+ * Convert to private, set and verify the private data, and
+ * then verify that the rest of the data (map shared) still
+ * holds the initial pattern, and that the host always sees the
+ * shared memory (initial pattern). Unlike shared memory,
+ * punching a hole in private memory is destructive, i.e.
+ * previous values aren't guaranteed to be preserved.
+ */
+ guest_map_private(gpa, size, do_fallocate);
+
+ if (size > PAGE_SIZE) {
+ memset((void *)gpa, p2, PAGE_SIZE);
+ goto skip;
+ }
+
+ memset((void *)gpa, p2, size);
+ guest_sync_private(gpa, size, p1);
+
+ /*
+ * Verify that the private memory was set to pattern two, and
+ * that shared memory still holds the initial pattern.
+ */
+ memcmp_g(gpa, p2, size);
+ if (gpa > base_gpa)
+ memcmp_g(base_gpa, init_p, gpa - base_gpa);
+ if (gpa + size < base_gpa + PER_CPU_DATA_SIZE)
+ memcmp_g(gpa + size, init_p,
+ (base_gpa + PER_CPU_DATA_SIZE) - (gpa + size));
+
+ /*
+ * Convert odd-number page frames back to shared to verify KVM
+ * also correctly handles holes in private ranges.
+ */
+ for (j = 0; j < size; j += PAGE_SIZE) {
+ if ((j >> PAGE_SHIFT) & 1) {
+ guest_map_shared(gpa + j, PAGE_SIZE, do_fallocate);
+ guest_sync_shared(gpa + j, PAGE_SIZE, p1, p3);
+
+ memcmp_g(gpa + j, p3, PAGE_SIZE);
+ } else {
+ guest_sync_private(gpa + j, PAGE_SIZE, p1);
+ }
+ }
+
+skip:
+ /*
+ * Convert the entire region back to shared, explicitly write
+ * pattern three to fill in the even-number frames before
+ * asking the host to verify (and write pattern four).
+ */
+ guest_map_shared(gpa, size, do_fallocate);
+ memset((void *)gpa, p3, size);
+ guest_sync_shared(gpa, size, p3, p4);
+ memcmp_g(gpa, p4, size);
+
+ /* Reset the shared memory back to the initial pattern. */
+ memset((void *)gpa, init_p, size);
+
+ /*
+ * Free (via PUNCH_HOLE) *all* private memory so that the next
+ * iteration starts from a clean slate, e.g. with respect to
+ * whether or not there are pages/folios in guest_mem.
+ */
+ guest_map_shared(base_gpa, PER_CPU_DATA_SIZE, true);
+ }
+}
+
+static void guest_punch_hole(uint64_t gpa, uint64_t size)
+{
+ /* "Mapping" memory shared via fallocate() is done via PUNCH_HOLE. */
+ uint64_t flags = MAP_GPA_SHARED | MAP_GPA_DO_FALLOCATE;
+
+ kvm_hypercall_map_gpa_range(gpa, size, flags);
+}
+
+/*
+ * Test that PUNCH_HOLE actually frees memory by punching holes without doing a
+ * proper conversion. Freeing (PUNCH_HOLE) should zap SPTEs, and reallocating
+ * (subsequent fault) should zero memory.
+ */
+static void guest_test_punch_hole(uint64_t base_gpa, bool precise)
+{
+ const uint8_t init_p = 0xcc;
+ int i;
+
+ /*
+ * Convert the entire range to private, this testcase is all about
+ * punching holes in guest_memfd, i.e. shared mappings aren't needed.
+ */
+ guest_map_private(base_gpa, PER_CPU_DATA_SIZE, false);
+
+ for (i = 0; i < ARRAY_SIZE(test_ranges); i++) {
+ uint64_t gpa = base_gpa + test_ranges[i].offset;
+ uint64_t size = test_ranges[i].size;
+
+ /*
+ * Free all memory before each iteration, even for the !precise
+ * case where the memory will be faulted back in. Freeing and
+ * reallocating should obviously work, and freeing all memory
+ * minimizes the probability of cross-testcase influence.
+ */
+ guest_punch_hole(base_gpa, PER_CPU_DATA_SIZE);
+
+ /* Fault-in and initialize memory, and verify the pattern. */
+ if (precise) {
+ memset((void *)gpa, init_p, size);
+ memcmp_g(gpa, init_p, size);
+ } else {
+ memset((void *)base_gpa, init_p, PER_CPU_DATA_SIZE);
+ memcmp_g(base_gpa, init_p, PER_CPU_DATA_SIZE);
+ }
+
+ /*
+ * Punch a hole at the target range and verify that reads from
+ * the guest succeed and return zeroes.
+ */
+ guest_punch_hole(gpa, size);
+ memcmp_g(gpa, 0, size);
+ }
+}
+
+static void guest_code(uint64_t base_gpa)
+{
+ /*
+ * Run the conversion test twice, with and without doing fallocate() on
+ * the guest_memfd backing when converting between shared and private.
+ */
+ guest_test_explicit_conversion(base_gpa, false);
+ guest_test_explicit_conversion(base_gpa, true);
+
+ /*
+ * Run the PUNCH_HOLE test twice too, once with the entire guest_memfd
+ * faulted in, once with only the target range faulted in.
+ */
+ guest_test_punch_hole(base_gpa, false);
+ guest_test_punch_hole(base_gpa, true);
+ GUEST_DONE();
+}
+
+static void handle_exit_hypercall(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+ uint64_t gpa = run->hypercall.args[0];
+ uint64_t size = run->hypercall.args[1] * PAGE_SIZE;
+ bool set_attributes = run->hypercall.args[2] & MAP_GPA_SET_ATTRIBUTES;
+ bool map_shared = run->hypercall.args[2] & MAP_GPA_SHARED;
+ bool do_fallocate = run->hypercall.args[2] & MAP_GPA_DO_FALLOCATE;
+ struct kvm_vm *vm = vcpu->vm;
+
+ TEST_ASSERT(run->hypercall.nr == KVM_HC_MAP_GPA_RANGE,
+ "Wanted MAP_GPA_RANGE (%u), got '%llu'",
+ KVM_HC_MAP_GPA_RANGE, run->hypercall.nr);
+
+ if (do_fallocate)
+ vm_guest_mem_fallocate(vm, gpa, size, map_shared);
+
+ if (set_attributes)
+ vm_set_memory_attributes(vm, gpa, size,
+ map_shared ? 0 : KVM_MEMORY_ATTRIBUTE_PRIVATE);
+ run->hypercall.ret = 0;
+}
+
+static bool run_vcpus;
+
+static void *__test_mem_conversions(void *__vcpu)
+{
+ struct kvm_vcpu *vcpu = __vcpu;
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vm *vm = vcpu->vm;
+ struct ucall uc;
+
+ while (!READ_ONCE(run_vcpus))
+ ;
+
+ for ( ;; ) {
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_HYPERCALL) {
+ handle_exit_hypercall(vcpu);
+ continue;
+ }
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Wanted KVM_EXIT_IO, got exit reason: %u (%s)",
+ run->exit_reason, exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC: {
+ uint64_t gpa = uc.args[1];
+ size_t size = uc.args[2];
+ size_t i;
+
+ TEST_ASSERT(uc.args[0] == SYNC_SHARED ||
+ uc.args[0] == SYNC_PRIVATE,
+ "Unknown sync command '%ld'", uc.args[0]);
+
+ for (i = 0; i < size; i += vm->page_size) {
+ size_t nr_bytes = min_t(size_t, vm->page_size, size - i);
+ uint8_t *hva = addr_gpa2hva(vm, gpa + i);
+
+ /* In all cases, the host should observe the shared data. */
+ memcmp_h(hva, gpa + i, uc.args[3], nr_bytes);
+
+ /* For shared, write the new pattern to guest memory. */
+ if (uc.args[0] == SYNC_SHARED)
+ memset(hva, uc.args[4], nr_bytes);
+ }
+ break;
+ }
+ case UCALL_DONE:
+ return NULL;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+}
+
+static void test_mem_conversions(enum vm_mem_backing_src_type src_type, uint32_t nr_vcpus,
+ uint32_t nr_memslots)
+{
+ /*
+ * Allocate enough memory so that each vCPU's chunk of memory can be
+ * naturally aligned with respect to the size of the backing store.
+ */
+ const size_t alignment = max_t(size_t, SZ_2M, get_backing_src_pagesz(src_type));
+ const size_t per_cpu_size = align_up(PER_CPU_DATA_SIZE, alignment);
+ const size_t memfd_size = per_cpu_size * nr_vcpus;
+ const size_t slot_size = memfd_size / nr_memslots;
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ pthread_t threads[KVM_MAX_VCPUS];
+ struct kvm_vm *vm;
+ int memfd, i, r;
+
+ const struct vm_shape shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+ };
+
+ TEST_ASSERT(slot_size * nr_memslots == memfd_size,
+ "The memfd size (0x%lx) needs to be cleanly divisible by the number of memslots (%u)",
+ memfd_size, nr_memslots);
+ vm = __vm_create_with_vcpus(shape, nr_vcpus, 0, guest_code, vcpus);
+
+ vm_enable_cap(vm, KVM_CAP_EXIT_HYPERCALL, (1 << KVM_HC_MAP_GPA_RANGE));
+
+ memfd = vm_create_guest_memfd(vm, memfd_size, 0);
+
+ for (i = 0; i < nr_memslots; i++)
+ vm_mem_add(vm, src_type, BASE_DATA_GPA + slot_size * i,
+ BASE_DATA_SLOT + i, slot_size / vm->page_size,
+ KVM_MEM_GUEST_MEMFD, memfd, slot_size * i);
+
+ for (i = 0; i < nr_vcpus; i++) {
+ uint64_t gpa = BASE_DATA_GPA + i * per_cpu_size;
+
+ vcpu_args_set(vcpus[i], 1, gpa);
+
+ /*
+ * Map only what is needed so that an out-of-bounds access
+ * results #PF => SHUTDOWN instead of data corruption.
+ */
+ virt_map(vm, gpa, gpa, PER_CPU_DATA_SIZE / vm->page_size);
+
+ pthread_create(&threads[i], NULL, __test_mem_conversions, vcpus[i]);
+ }
+
+ WRITE_ONCE(run_vcpus, true);
+
+ for (i = 0; i < nr_vcpus; i++)
+ pthread_join(threads[i], NULL);
+
+ kvm_vm_free(vm);
+
+ /*
+ * Allocate and free memory from the guest_memfd after closing the VM
+ * fd. The guest_memfd is gifted a reference to its owning VM, i.e.
+ * should prevent the VM from being fully destroyed until the last
+ * reference to the guest_memfd is also put.
+ */
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ r = fallocate(memfd, FALLOC_FL_KEEP_SIZE, 0, memfd_size);
+ TEST_ASSERT(!r, __KVM_SYSCALL_ERROR("fallocate()", r));
+
+ close(memfd);
+}
+
+static void usage(const char *cmd)
+{
+ puts("");
+ printf("usage: %s [-h] [-m nr_memslots] [-s mem_type] [-n nr_vcpus]\n", cmd);
+ puts("");
+ backing_src_help("-s");
+ puts("");
+ puts(" -n: specify the number of vcpus (default: 1)");
+ puts("");
+ puts(" -m: specify the number of memslots (default: 1)");
+ puts("");
+}
+
+int main(int argc, char *argv[])
+{
+ enum vm_mem_backing_src_type src_type = DEFAULT_VM_MEM_SRC;
+ uint32_t nr_memslots = 1;
+ uint32_t nr_vcpus = 1;
+ int opt;
+
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ while ((opt = getopt(argc, argv, "hm:s:n:")) != -1) {
+ switch (opt) {
+ case 's':
+ src_type = parse_backing_src_type(optarg);
+ break;
+ case 'n':
+ nr_vcpus = atoi_positive("nr_vcpus", optarg);
+ break;
+ case 'm':
+ nr_memslots = atoi_positive("nr_memslots", optarg);
+ break;
+ case 'h':
+ default:
+ usage(argv[0]);
+ exit(0);
+ }
+ }
+
+ test_mem_conversions(src_type, nr_vcpus, nr_memslots);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
new file mode 100644
index 000000000000..13e72fcec8dd
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/private_mem_kvm_exits_test.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2023, Google LLC.
+ */
+#include <linux/kvm.h>
+#include <pthread.h>
+#include <stdint.h>
+
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+/* Arbitrarily selected to avoid overlaps with anything else */
+#define EXITS_TEST_GVA 0xc0000000
+#define EXITS_TEST_GPA EXITS_TEST_GVA
+#define EXITS_TEST_NPAGES 1
+#define EXITS_TEST_SIZE (EXITS_TEST_NPAGES * PAGE_SIZE)
+#define EXITS_TEST_SLOT 10
+
+static uint64_t guest_repeatedly_read(void)
+{
+ volatile uint64_t value;
+
+ while (true)
+ value = *((uint64_t *) EXITS_TEST_GVA);
+
+ return value;
+}
+
+static uint32_t run_vcpu_get_exit_reason(struct kvm_vcpu *vcpu)
+{
+ int r;
+
+ r = _vcpu_run(vcpu);
+ if (r) {
+ TEST_ASSERT(errno == EFAULT, KVM_IOCTL_ERROR(KVM_RUN, r));
+ TEST_ASSERT_EQ(vcpu->run->exit_reason, KVM_EXIT_MEMORY_FAULT);
+ }
+ return vcpu->run->exit_reason;
+}
+
+const struct vm_shape protected_vm_shape = {
+ .mode = VM_MODE_DEFAULT,
+ .type = KVM_X86_SW_PROTECTED_VM,
+};
+
+static void test_private_access_memslot_deleted(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ pthread_t vm_thread;
+ void *thread_return;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES,
+ KVM_MEM_GUEST_MEMFD);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ pthread_create(&vm_thread, NULL,
+ (void *(*)(void *))run_vcpu_get_exit_reason,
+ (void *)vcpu);
+
+ vm_mem_region_delete(vm, EXITS_TEST_SLOT);
+
+ pthread_join(vm_thread, &thread_return);
+ exit_reason = (uint32_t)(uint64_t)thread_return;
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+static void test_private_access_memslot_not_private(void)
+{
+ struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
+ uint32_t exit_reason;
+
+ vm = vm_create_shape_with_one_vcpu(protected_vm_shape, &vcpu,
+ guest_repeatedly_read);
+
+ /* Add a non-private memslot (flags = 0) */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ EXITS_TEST_GPA, EXITS_TEST_SLOT,
+ EXITS_TEST_NPAGES, 0);
+
+ virt_map(vm, EXITS_TEST_GVA, EXITS_TEST_GPA, EXITS_TEST_NPAGES);
+
+ /* Request to access page privately */
+ vm_mem_set_private(vm, EXITS_TEST_GPA, EXITS_TEST_SIZE);
+
+ exit_reason = run_vcpu_get_exit_reason(vcpu);
+
+ TEST_ASSERT_EQ(exit_reason, KVM_EXIT_MEMORY_FAULT);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.flags, KVM_MEMORY_EXIT_FLAG_PRIVATE);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.gpa, EXITS_TEST_GPA);
+ TEST_ASSERT_EQ(vcpu->run->memory_fault.size, EXITS_TEST_SIZE);
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_check_cap(KVM_CAP_VM_TYPES) & BIT(KVM_X86_SW_PROTECTED_VM));
+
+ test_private_access_memslot_deleted();
+ test_private_access_memslot_not_private();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
new file mode 100644
index 000000000000..cbc92a862ea9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/recalc_apic_map_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test edge cases and race conditions in kvm_recalculate_apic_map().
+ */
+
+#include <sys/ioctl.h>
+#include <pthread.h>
+#include <time.h>
+
+#include "processor.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "apic.h"
+
+#define TIMEOUT 5 /* seconds */
+
+#define LAPIC_DISABLED 0
+#define LAPIC_X2APIC (MSR_IA32_APICBASE_ENABLE | X2APIC_ENABLE)
+#define MAX_XAPIC_ID 0xff
+
+static void *race(void *arg)
+{
+ struct kvm_lapic_state lapic = {};
+ struct kvm_vcpu *vcpu = arg;
+
+ while (1) {
+ /* Trigger kvm_recalculate_apic_map(). */
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &lapic);
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpus[KVM_MAX_VCPUS];
+ struct kvm_vcpu *vcpuN;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ time_t t;
+ int i;
+
+ kvm_static_assert(KVM_MAX_VCPUS > MAX_XAPIC_ID);
+
+ /*
+ * Create the max number of vCPUs supported by selftests so that KVM
+ * has decent amount of work to do when recalculating the map, i.e. to
+ * make the problematic window large enough to hit.
+ */
+ vm = vm_create_with_vcpus(KVM_MAX_VCPUS, NULL, vcpus);
+
+ /*
+ * Enable x2APIC on all vCPUs so that KVM doesn't bail from the recalc
+ * due to vCPUs having aliased xAPIC IDs (truncated to 8 bits).
+ */
+ for (i = 0; i < KVM_MAX_VCPUS; i++)
+ vcpu_set_msr(vcpus[i], MSR_IA32_APICBASE, LAPIC_X2APIC);
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, race, vcpus[0]), 0);
+
+ vcpuN = vcpus[KVM_MAX_VCPUS - 1];
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_X2APIC);
+ vcpu_set_msr(vcpuN, MSR_IA32_APICBASE, LAPIC_DISABLED);
+ }
+
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644
index 000000000000..366cf18600bc
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "apic.h"
+
+static void guest_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_NE(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+ GUEST_SYNC(1);
+
+ GUEST_ASSERT_EQ(get_bsp_flag(), 0);
+
+ GUEST_DONE();
+}
+
+static void test_set_bsp_busy(struct kvm_vcpu *vcpu, const char *msg)
+{
+ int r = __vm_ioctl(vcpu->vm, KVM_SET_BOOT_CPU_ID,
+ (void *)(unsigned long)vcpu->id);
+
+ TEST_ASSERT(r == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set %s", msg);
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+ int stage;
+
+ for (stage = 0; stage < 2; stage++) {
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1,
+ "Stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ test_set_bsp_busy(vcpu, "while running vm");
+ break;
+ case UCALL_DONE:
+ TEST_ASSERT(stage == 1,
+ "Expected GUEST_DONE in stage 2, got stage %d",
+ stage);
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+}
+
+static struct kvm_vm *create_vm(uint32_t nr_vcpus, uint32_t bsp_vcpu_id,
+ struct kvm_vcpu *vcpus[])
+{
+ struct kvm_vm *vm;
+ uint32_t i;
+
+ vm = vm_create(nr_vcpus);
+
+ vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *)(unsigned long)bsp_vcpu_id);
+
+ for (i = 0; i < nr_vcpus; i++)
+ vcpus[i] = vm_vcpu_add(vm, i, i == bsp_vcpu_id ? guest_bsp_vcpu :
+ guest_not_bsp_vcpu);
+ return vm;
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu_id)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), bsp_vcpu_id, vcpus);
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+ struct kvm_vcpu *vcpus[2];
+ struct kvm_vm *vm;
+
+ vm = create_vm(ARRAY_SIZE(vcpus), 0, vcpus);
+
+ test_set_bsp_busy(vcpus[1], "after adding vcpu");
+
+ run_vcpu(vcpus[0]);
+ run_vcpu(vcpus[1]);
+
+ test_set_bsp_busy(vcpus[1], "to a terminated vcpu");
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SET_BOOT_CPU_ID));
+
+ run_vm_bsp(0);
+ run_vm_bsp(1);
+ run_vm_bsp(0);
+
+ check_set_bsp_busy();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
index 9f7656184f31..3610981d9162 100644
--- a/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/set_sregs_test.c
@@ -22,27 +22,117 @@
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
+#define TEST_INVALID_CR_BIT(vcpu, cr, orig, bit) \
+do { \
+ struct kvm_sregs new; \
+ int rc; \
+ \
+ /* Skip the sub-test, the feature/bit is supported. */ \
+ if (orig.cr & bit) \
+ break; \
+ \
+ memcpy(&new, &orig, sizeof(sregs)); \
+ new.cr |= bit; \
+ \
+ rc = _vcpu_sregs_set(vcpu, &new); \
+ TEST_ASSERT(rc, "KVM allowed invalid " #cr " bit (0x%lx)", bit); \
+ \
+ /* Sanity check that KVM didn't change anything. */ \
+ vcpu_sregs_get(vcpu, &new); \
+ TEST_ASSERT(!memcmp(&new, &orig, sizeof(new)), "KVM modified sregs"); \
+} while (0)
+
+static uint64_t calc_supported_cr4_feature_bits(void)
+{
+ uint64_t cr4;
+
+ cr4 = X86_CR4_VME | X86_CR4_PVI | X86_CR4_TSD | X86_CR4_DE |
+ X86_CR4_PSE | X86_CR4_PAE | X86_CR4_MCE | X86_CR4_PGE |
+ X86_CR4_PCE | X86_CR4_OSFXSR | X86_CR4_OSXMMEXCPT;
+ if (kvm_cpu_has(X86_FEATURE_UMIP))
+ cr4 |= X86_CR4_UMIP;
+ if (kvm_cpu_has(X86_FEATURE_LA57))
+ cr4 |= X86_CR4_LA57;
+ if (kvm_cpu_has(X86_FEATURE_VMX))
+ cr4 |= X86_CR4_VMXE;
+ if (kvm_cpu_has(X86_FEATURE_SMX))
+ cr4 |= X86_CR4_SMXE;
+ if (kvm_cpu_has(X86_FEATURE_FSGSBASE))
+ cr4 |= X86_CR4_FSGSBASE;
+ if (kvm_cpu_has(X86_FEATURE_PCID))
+ cr4 |= X86_CR4_PCIDE;
+ if (kvm_cpu_has(X86_FEATURE_XSAVE))
+ cr4 |= X86_CR4_OSXSAVE;
+ if (kvm_cpu_has(X86_FEATURE_SMEP))
+ cr4 |= X86_CR4_SMEP;
+ if (kvm_cpu_has(X86_FEATURE_SMAP))
+ cr4 |= X86_CR4_SMAP;
+ if (kvm_cpu_has(X86_FEATURE_PKU))
+ cr4 |= X86_CR4_PKE;
+
+ return cr4;
+}
int main(int argc, char *argv[])
{
struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- int rc;
+ uint64_t cr4;
+ int rc, i;
+
+ /*
+ * Create a dummy VM, specifically to avoid doing KVM_SET_CPUID2, and
+ * use it to verify all supported CR4 bits can be set prior to defining
+ * the vCPU model, i.e. without doing KVM_SET_CPUID2.
+ */
+ vm = vm_create_barebones();
+ vcpu = __vm_vcpu_add(vm, 0);
+
+ vcpu_sregs_get(vcpu, &sregs);
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
+ sregs.cr0 = 0;
+ sregs.cr4 |= calc_supported_cr4_feature_bits();
+ cr4 = sregs.cr4;
+
+ rc = _vcpu_sregs_set(vcpu, &sregs);
+ TEST_ASSERT(!rc, "Failed to set supported CR4 bits (0x%lx)", cr4);
+
+ vcpu_sregs_get(vcpu, &sregs);
+ TEST_ASSERT(sregs.cr4 == cr4, "sregs.CR4 (0x%llx) != CR4 (0x%lx)",
+ sregs.cr4, cr4);
+
+ /* Verify all unsupported features are rejected by KVM. */
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_UMIP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_LA57);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_VMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMXE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_FSGSBASE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PCIDE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_OSXSAVE);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMEP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_SMAP);
+ TEST_INVALID_CR_BIT(vcpu, cr4, sregs, X86_CR4_PKE);
+
+ for (i = 32; i < 64; i++)
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, BIT(i));
+
+ /* NW without CD is illegal, as is PG without PE. */
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_NW);
+ TEST_INVALID_CR_BIT(vcpu, cr0, sregs, X86_CR0_PG);
+
+ kvm_vm_free(vm);
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, NULL);
+ /* Create a "real" VM and verify APIC_BASE can be set. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
sregs.apic_base = 1 << 10;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(rc, "Set IA32_APIC_BASE to %llx (invalid)",
sregs.apic_base);
sregs.apic_base = 1 << 11;
- rc = _vcpu_sregs_set(vm, VCPU_ID, &sregs);
+ rc = _vcpu_sregs_set(vcpu, &sregs);
TEST_ASSERT(!rc, "Couldn't set IA32_APIC_BASE to %llx (valid)",
sregs.apic_base);
diff --git a/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
new file mode 100644
index 000000000000..0a6dfba3905b
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_migrate_tests.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/kvm.h>
+#include <linux/psp-sev.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <pthread.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "sev.h"
+#include "kselftest.h"
+
+#define NR_MIGRATE_TEST_VCPUS 4
+#define NR_MIGRATE_TEST_VMS 3
+#define NR_LOCK_TESTING_THREADS 3
+#define NR_LOCK_TESTING_ITERATIONS 10000
+
+bool have_sev_es;
+
+static struct kvm_vm *sev_vm_create(bool es)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!es)
+ sev_vm_init(vm);
+ else
+ sev_es_vm_init(vm);
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ sev_vm_launch(vm, es ? SEV_POLICY_ES : 0);
+
+ if (es)
+ vm_sev_ioctl(vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+ return vm;
+}
+
+static struct kvm_vm *aux_vm_create(bool with_vcpus)
+{
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_barebones();
+ if (!with_vcpus)
+ return vm;
+
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(vm, i);
+
+ return vm;
+}
+
+static int __sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_migrate_from(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_migrate_from(dst, src);
+ TEST_ASSERT(!ret, "Migration failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void test_sev_migrate_from(bool es)
+{
+ struct kvm_vm *src_vm;
+ struct kvm_vm *dst_vms[NR_MIGRATE_TEST_VMS];
+ int i, ret;
+
+ src_vm = sev_vm_create(es);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ dst_vms[i] = aux_vm_create(true);
+
+ /* Initial migration from the src to the first dst. */
+ sev_migrate_from(dst_vms[0], src_vm);
+
+ for (i = 1; i < NR_MIGRATE_TEST_VMS; i++)
+ sev_migrate_from(dst_vms[i], dst_vms[i - 1]);
+
+ /* Migrate the guest back to the original VM. */
+ ret = __sev_migrate_from(src_vm, dst_vms[NR_MIGRATE_TEST_VMS - 1]);
+ TEST_ASSERT(ret == -1 && errno == EIO,
+ "VM that was migrated from should be dead. ret %d, errno: %d", ret,
+ errno);
+
+ kvm_vm_free(src_vm);
+ for (i = 0; i < NR_MIGRATE_TEST_VMS; ++i)
+ kvm_vm_free(dst_vms[i]);
+}
+
+struct locking_thread_input {
+ struct kvm_vm *vm;
+ struct kvm_vm *source_vms[NR_LOCK_TESTING_THREADS];
+};
+
+static void *locking_test_thread(void *arg)
+{
+ int i, j;
+ struct locking_thread_input *input = (struct locking_thread_input *)arg;
+
+ for (i = 0; i < NR_LOCK_TESTING_ITERATIONS; ++i) {
+ j = i % NR_LOCK_TESTING_THREADS;
+ __sev_migrate_from(input->vm, input->source_vms[j]);
+ }
+
+ return NULL;
+}
+
+static void test_sev_migrate_locking(void)
+{
+ struct locking_thread_input input[NR_LOCK_TESTING_THREADS];
+ pthread_t pt[NR_LOCK_TESTING_THREADS];
+ int i;
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i) {
+ input[i].vm = sev_vm_create(/* es= */ false);
+ input[0].source_vms[i] = input[i].vm;
+ }
+ for (i = 1; i < NR_LOCK_TESTING_THREADS; ++i)
+ memcpy(input[i].source_vms, input[0].source_vms,
+ sizeof(input[i].source_vms));
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_create(&pt[i], NULL, locking_test_thread, &input[i]);
+
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ pthread_join(pt[i], NULL);
+ for (i = 0; i < NR_LOCK_TESTING_THREADS; ++i)
+ kvm_vm_free(input[i].vm);
+}
+
+static void test_sev_migrate_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_no_sev,
+ *sev_es_vm_no_vmsa;
+ int ret;
+
+ vm_no_vcpu = vm_create_barebones();
+ vm_no_sev = aux_vm_create(true);
+ ret = __sev_migrate_from(vm_no_vcpu, vm_no_sev);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Migrations require SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ sev_es_vm_no_vmsa = vm_create_barebones();
+ sev_es_vm_init(sev_es_vm_no_vmsa);
+ __vm_vcpu_add(sev_es_vm_no_vmsa, 1);
+
+ ret = __sev_migrate_from(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able migrate to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require same number of vCPUS. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_migrate_from(vm_no_vcpu, sev_es_vm_no_vmsa);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV-ES migrations require UPDATE_VMSA. ret %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(sev_es_vm);
+ kvm_vm_free(sev_es_vm_no_vmsa);
+out:
+ kvm_vm_free(vm_no_vcpu);
+ kvm_vm_free(vm_no_sev);
+}
+
+static int __sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ return __vm_enable_cap(dst, KVM_CAP_VM_COPY_ENC_CONTEXT_FROM, src->fd);
+}
+
+
+static void sev_mirror_create(struct kvm_vm *dst, struct kvm_vm *src)
+{
+ int ret;
+
+ ret = __sev_mirror_create(dst, src);
+ TEST_ASSERT(!ret, "Copying context failed, ret: %d, errno: %d", ret, errno);
+}
+
+static void verify_mirror_allowed_cmds(struct kvm_vm *vm)
+{
+ struct kvm_sev_guest_status status;
+ int cmd_id;
+
+ for (cmd_id = KVM_SEV_INIT; cmd_id < KVM_SEV_NR_MAX; ++cmd_id) {
+ int ret;
+
+ /*
+ * These commands are allowed for mirror VMs, all others are
+ * not.
+ */
+ switch (cmd_id) {
+ case KVM_SEV_LAUNCH_UPDATE_VMSA:
+ case KVM_SEV_GUEST_STATUS:
+ case KVM_SEV_DBG_DECRYPT:
+ case KVM_SEV_DBG_ENCRYPT:
+ continue;
+ default:
+ break;
+ }
+
+ /*
+ * These commands should be disallowed before the data
+ * parameter is examined so NULL is OK here.
+ */
+ ret = __vm_sev_ioctl(vm, cmd_id, NULL);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able call command: %d. ret: %d, errno: %d",
+ cmd_id, ret, errno);
+ }
+
+ vm_sev_ioctl(vm, KVM_SEV_GUEST_STATUS, &status);
+}
+
+static void test_sev_mirror(bool es)
+{
+ struct kvm_vm *src_vm, *dst_vm;
+ int i;
+
+ src_vm = sev_vm_create(es);
+ dst_vm = aux_vm_create(false);
+
+ sev_mirror_create(dst_vm, src_vm);
+
+ /* Check that we can complete creation of the mirror VM. */
+ for (i = 0; i < NR_MIGRATE_TEST_VCPUS; ++i)
+ __vm_vcpu_add(dst_vm, i);
+
+ if (es)
+ vm_sev_ioctl(dst_vm, KVM_SEV_LAUNCH_UPDATE_VMSA, NULL);
+
+ verify_mirror_allowed_cmds(dst_vm);
+
+ kvm_vm_free(src_vm);
+ kvm_vm_free(dst_vm);
+}
+
+static void test_sev_mirror_parameters(void)
+{
+ struct kvm_vm *sev_vm, *sev_es_vm, *vm_no_vcpu, *vm_with_vcpu;
+ int ret;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ vm_with_vcpu = aux_vm_create(true);
+ vm_no_vcpu = aux_vm_create(false);
+
+ ret = __sev_mirror_create(sev_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to self. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(vm_no_vcpu, vm_with_vcpu);
+ TEST_ASSERT(ret == -1 && errno == EINVAL,
+ "Copy context requires SEV enabled. ret %d, errno: %d", ret,
+ errno);
+
+ ret = __sev_mirror_create(vm_with_vcpu, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "SEV copy context requires no vCPUS on the destination. ret: %d, errno: %d",
+ ret, errno);
+
+ if (!have_sev_es)
+ goto out;
+
+ sev_es_vm = sev_vm_create(/* es= */ true);
+ ret = __sev_mirror_create(sev_vm, sev_es_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ ret = __sev_mirror_create(sev_es_vm, sev_vm);
+ TEST_ASSERT(
+ ret == -1 && errno == EINVAL,
+ "Should not be able copy context to SEV-ES enabled VM. ret: %d, errno: %d",
+ ret, errno);
+
+ kvm_vm_free(sev_es_vm);
+
+out:
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(vm_with_vcpu);
+ kvm_vm_free(vm_no_vcpu);
+}
+
+static void test_sev_move_copy(void)
+{
+ struct kvm_vm *dst_vm, *dst2_vm, *dst3_vm, *sev_vm, *mirror_vm,
+ *dst_mirror_vm, *dst2_mirror_vm, *dst3_mirror_vm;
+
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ dst2_vm = aux_vm_create(true);
+ dst3_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+ dst2_mirror_vm = aux_vm_create(false);
+ dst3_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ sev_migrate_from(dst2_vm, dst_vm);
+ sev_migrate_from(dst2_mirror_vm, dst_mirror_vm);
+
+ sev_migrate_from(dst3_mirror_vm, dst2_mirror_vm);
+ sev_migrate_from(dst3_vm, dst2_vm);
+
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+ kvm_vm_free(dst2_vm);
+ kvm_vm_free(dst3_vm);
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst2_mirror_vm);
+ kvm_vm_free(dst3_mirror_vm);
+
+ /*
+ * Run similar test be destroy mirrors before mirrored VMs to ensure
+ * destruction is done safely.
+ */
+ sev_vm = sev_vm_create(/* es= */ false);
+ dst_vm = aux_vm_create(true);
+ mirror_vm = aux_vm_create(false);
+ dst_mirror_vm = aux_vm_create(false);
+
+ sev_mirror_create(mirror_vm, sev_vm);
+
+ sev_migrate_from(dst_mirror_vm, mirror_vm);
+ sev_migrate_from(dst_vm, sev_vm);
+
+ kvm_vm_free(mirror_vm);
+ kvm_vm_free(dst_mirror_vm);
+ kvm_vm_free(dst_vm);
+ kvm_vm_free(sev_vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM));
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ have_sev_es = kvm_cpu_has(X86_FEATURE_SEV_ES);
+
+ if (kvm_has_cap(KVM_CAP_VM_MOVE_ENC_CONTEXT_FROM)) {
+ test_sev_migrate_from(/* es= */ false);
+ if (have_sev_es)
+ test_sev_migrate_from(/* es= */ true);
+ test_sev_migrate_locking();
+ test_sev_migrate_parameters();
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM))
+ test_sev_move_copy();
+ }
+ if (kvm_has_cap(KVM_CAP_VM_COPY_ENC_CONTEXT_FROM)) {
+ test_sev_mirror(/* es= */ false);
+ if (have_sev_es)
+ test_sev_mirror(/* es= */ true);
+ test_sev_mirror_parameters();
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
new file mode 100644
index 000000000000..026779f3ed06
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/sev_smoke_test.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "linux/psp-sev.h"
+#include "sev.h"
+
+
+static void guest_sev_es_code(void)
+{
+ /* TODO: Check CPUID after GHCB-based hypercall support is added. */
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ES_ENABLED);
+
+ /*
+ * TODO: Add GHCB and ucall support for SEV-ES guests. For now, simply
+ * force "termination" to signal "done" via the GHCB MSR protocol.
+ */
+ wrmsr(MSR_AMD64_SEV_ES_GHCB, GHCB_MSR_TERM_REQ);
+ __asm__ __volatile__("rep; vmmcall");
+}
+
+static void guest_sev_code(void)
+{
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_SEV));
+ GUEST_ASSERT(rdmsr(MSR_AMD64_SEV) & MSR_AMD64_SEV_ENABLED);
+
+ GUEST_DONE();
+}
+
+static void test_sev(void *guest_code, uint64_t policy)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_sev_create_with_one_vcpu(policy, guest_code, &vcpu);
+
+ for (;;) {
+ vcpu_run(vcpu);
+
+ if (policy & SEV_POLICY_ES) {
+ TEST_ASSERT(vcpu->run->exit_reason == KVM_EXIT_SYSTEM_EVENT,
+ "Wanted SYSTEM_EVENT, got %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ TEST_ASSERT_EQ(vcpu->run->system_event.type, KVM_SYSTEM_EVENT_SEV_TERM);
+ TEST_ASSERT_EQ(vcpu->run->system_event.ndata, 1);
+ TEST_ASSERT_EQ(vcpu->run->system_event.data[0], GHCB_MSR_TERM_REQ);
+ break;
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ continue;
+ case UCALL_DONE:
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SEV));
+
+ test_sev(guest_sev_code, SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_code, 0);
+
+ if (kvm_cpu_has(X86_FEATURE_SEV_ES)) {
+ test_sev(guest_sev_es_code, SEV_POLICY_ES | SEV_POLICY_NO_DBG);
+ test_sev(guest_sev_es_code, SEV_POLICY_ES);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
new file mode 100644
index 000000000000..416207c38a17
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/smaller_maxphyaddr_emulation_test.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Test that KVM emulates instructions in response to EPT violations when
+ * allow_smaller_maxphyaddr is enabled and guest.MAXPHYADDR < host.MAXPHYADDR.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+
+#include "flds_emulation.h"
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+#define MAXPHYADDR 36
+
+#define MEM_REGION_GVA 0x0000123456789000
+#define MEM_REGION_GPA 0x0000000700000000
+#define MEM_REGION_SLOT 10
+#define MEM_REGION_SIZE PAGE_SIZE
+
+static void guest_code(bool tdp_enabled)
+{
+ uint64_t error_code;
+ uint64_t vector;
+
+ vector = kvm_asm_safe_ec(FLDS_MEM_EAX, error_code, "a"(MEM_REGION_GVA));
+
+ /*
+ * When TDP is enabled, flds will trigger an emulation failure, exit to
+ * userspace, and then the selftest host "VMM" skips the instruction.
+ *
+ * When TDP is disabled, no instruction emulation is required so flds
+ * should generate #PF(RSVD).
+ */
+ if (tdp_enabled) {
+ GUEST_ASSERT(!vector);
+ } else {
+ GUEST_ASSERT_EQ(vector, PF_VECTOR);
+ GUEST_ASSERT(error_code & PFERR_RSVD_MASK);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ struct ucall uc;
+ uint64_t *pte;
+ uint64_t *hva;
+ uint64_t gpa;
+ int rc;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_SMALLER_MAXPHYADDR));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_args_set(vcpu, 1, kvm_is_tdp_enabled());
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_set_cpuid_property(vcpu, X86_PROPERTY_MAX_PHY_ADDR, MAXPHYADDR);
+
+ rc = kvm_check_cap(KVM_CAP_EXIT_ON_EMULATION_FAILURE);
+ TEST_ASSERT(rc, "KVM_CAP_EXIT_ON_EMULATION_FAILURE is unavailable");
+ vm_enable_cap(vm, KVM_CAP_EXIT_ON_EMULATION_FAILURE, 1);
+
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ MEM_REGION_GPA, MEM_REGION_SLOT,
+ MEM_REGION_SIZE / PAGE_SIZE, 0);
+ gpa = vm_phy_pages_alloc(vm, MEM_REGION_SIZE / PAGE_SIZE,
+ MEM_REGION_GPA, MEM_REGION_SLOT);
+ TEST_ASSERT(gpa == MEM_REGION_GPA, "Failed vm_phy_pages_alloc");
+ virt_map(vm, MEM_REGION_GVA, MEM_REGION_GPA, 1);
+ hva = addr_gpa2hva(vm, MEM_REGION_GPA);
+ memset(hva, 0, PAGE_SIZE);
+
+ pte = vm_get_page_table_entry(vm, MEM_REGION_GVA);
+ *pte |= BIT_ULL(MAXPHYADDR);
+
+ vcpu_run(vcpu);
+
+ /*
+ * When TDP is enabled, KVM must emulate in response the guest physical
+ * address that is illegal from the guest's perspective, but is legal
+ * from hardware's perspeective. This should result in an emulation
+ * failure exit to userspace since KVM doesn't support emulating flds.
+ */
+ if (kvm_is_tdp_enabled()) {
+ handle_flds_emulation_failure_exit(vcpu);
+ vcpu_run(vcpu);
+ }
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unrecognized ucall: %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/smm_test.c b/tools/testing/selftests/kvm/x86_64/smm_test.c
index ae39a220609f..e18b86666e1f 100644
--- a/tools/testing/selftests/kvm/x86_64/smm_test.c
+++ b/tools/testing/selftests/kvm/x86_64/smm_test.c
@@ -19,10 +19,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 1
-
-#define PAGE_SIZE 4096
-
#define SMRAM_SIZE 65536
#define SMRAM_MEMSLOT ((1 << 16) | 1)
#define SMRAM_PAGES (SMRAM_SIZE / PAGE_SIZE)
@@ -53,15 +49,28 @@ static inline void sync_with_host(uint64_t phase)
: "+a" (phase));
}
-void self_smi(void)
+static void self_smi(void)
+{
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+}
+
+static void l2_guest_code(void)
{
- wrmsr(APIC_BASE_MSR + (APIC_ICR >> 4),
- APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_SMI);
+ sync_with_host(8);
+
+ sync_with_host(10);
+
+ vmcall();
}
-void guest_code(void *arg)
+static void guest_code(void *arg)
{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
uint64_t apicbase = rdmsr(MSR_IA32_APICBASE);
+ struct svm_test_data *svm = arg;
+ struct vmx_pages *vmx_pages = arg;
sync_with_host(1);
@@ -74,37 +83,63 @@ void guest_code(void *arg)
sync_with_host(4);
if (arg) {
- if (cpu_has_svm())
- generic_svm_setup(arg, NULL, NULL);
- else
- GUEST_ASSERT(prepare_for_vmx_operation(arg));
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ } else {
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ }
sync_with_host(5);
self_smi();
sync_with_host(7);
+
+ if (this_cpu_has(X86_FEATURE_SVM)) {
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ run_guest(svm->vmcb, svm->vmcb_gpa);
+ } else {
+ vmlaunch();
+ vmresume();
+ }
+
+ /* Stages 8-11 are eaten by SMM (SMRAM_STAGE reported instead) */
+ sync_with_host(12);
}
sync_with_host(DONE);
}
+void inject_smi(struct kvm_vcpu *vcpu)
+{
+ struct kvm_vcpu_events events;
+
+ vcpu_events_get(vcpu, &events);
+
+ events.smi.pending = 1;
+ events.flags |= KVM_VCPUEVENT_VALID_SMM;
+
+ vcpu_events_set(vcpu, &events);
+}
+
int main(int argc, char *argv[])
{
vm_vaddr_t nested_gva = 0;
+ struct kvm_vcpu *vcpu;
struct kvm_regs regs;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
int stage, stage_reported;
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
-
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_SMM));
- run = vcpu_state(vm, VCPU_ID);
+ /* Create VM */
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS, SMRAM_GPA,
SMRAM_MEMSLOT, SMRAM_PAGES, 0);
@@ -115,29 +150,26 @@ int main(int argc, char *argv[])
memcpy(addr_gpa2hva(vm, SMRAM_GPA) + 0x8000, smi_handler,
sizeof(smi_handler));
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_SMBASE, SMRAM_GPA);
+ vcpu_set_msr(vcpu, MSR_IA32_SMBASE, SMRAM_GPA);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip SMM test with VMX enabled\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
memset(&regs, 0, sizeof(regs));
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
stage_reported = regs.rax & 0xff;
@@ -149,14 +181,28 @@ int main(int argc, char *argv[])
"Unexpected stage: #%x, got %x",
stage, stage_reported);
- state = vcpu_save_state(vm, VCPU_ID);
+ /*
+ * Enter SMM during L2 execution and check that we correctly
+ * return from it. Do not perform save/restore while in SMM yet.
+ */
+ if (stage == 8) {
+ inject_smi(vcpu);
+ continue;
+ }
+
+ /*
+ * Perform save/restore while the guest is in SMM triggered
+ * during L2 execution.
+ */
+ if (stage == 10)
+ inject_smi(vcpu);
+
+ state = vcpu_save_state(vcpu);
kvm_vm_release(vm);
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
}
done:
diff --git a/tools/testing/selftests/kvm/x86_64/state_test.c b/tools/testing/selftests/kvm/x86_64/state_test.c
index f6c8b9042f8a..88b58aab7207 100644
--- a/tools/testing/selftests/kvm/x86_64/state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/state_test.c
@@ -20,7 +20,6 @@
#include "vmx.h"
#include "svm_util.h"
-#define VCPU_ID 5
#define L2_GUEST_STACK_SIZE 256
void svm_l2_guest_code(void)
@@ -140,10 +139,87 @@ static void vmx_l1_guest_code(struct vmx_pages *vmx_pages)
static void __attribute__((__flatten__)) guest_code(void *arg)
{
GUEST_SYNC(1);
+
+ if (this_cpu_has(X86_FEATURE_XSAVE)) {
+ uint64_t supported_xcr0 = this_cpu_supported_xcr0();
+ uint8_t buffer[4096];
+
+ memset(buffer, 0xcc, sizeof(buffer));
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSXSAVE));
+
+ xsetbv(0, xgetbv(0) | supported_xcr0);
+
+ /*
+ * Modify state for all supported xfeatures to take them out of
+ * their "init" state, i.e. to make them show up in XSTATE_BV.
+ *
+ * Note off-by-default features, e.g. AMX, are out of scope for
+ * this particular testcase as they have a different ABI.
+ */
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_FP);
+ asm volatile ("fincstp");
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_SSE);
+ asm volatile ("vmovdqu %0, %%xmm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_YMM)
+ asm volatile ("vmovdqu %0, %%ymm0" :: "m" (buffer));
+
+ if (supported_xcr0 & XFEATURE_MASK_AVX512) {
+ asm volatile ("kmovq %0, %%k1" :: "r" (-1ull));
+ asm volatile ("vmovupd %0, %%zmm0" :: "m" (buffer));
+ asm volatile ("vmovupd %0, %%zmm16" :: "m" (buffer));
+ }
+
+ if (this_cpu_has(X86_FEATURE_MPX)) {
+ uint64_t bounds[2] = { 10, 0xffffffffull };
+ uint64_t output[2] = { };
+
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDREGS);
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_BNDCSR);
+
+ /*
+ * Don't bother trying to get BNDCSR into the INUSE
+ * state. MSR_IA32_BNDCFGS doesn't count as it isn't
+ * managed via XSAVE/XRSTOR, and BNDCFGU can only be
+ * modified by XRSTOR. Stuffing XSTATE_BV in the host
+ * is simpler than doing XRSTOR here in the guest.
+ *
+ * However, temporarily enable MPX in BNDCFGS so that
+ * BNDMOV actually loads BND1. If MPX isn't *fully*
+ * enabled, all MPX instructions are treated as NOPs.
+ *
+ * Hand encode "bndmov (%rax),%bnd1" as support for MPX
+ * mnemonics/registers has been removed from gcc and
+ * clang (and was never fully supported by clang).
+ */
+ wrmsr(MSR_IA32_BNDCFGS, BIT_ULL(0));
+ asm volatile (".byte 0x66,0x0f,0x1a,0x08" :: "a" (bounds));
+ /*
+ * Hand encode "bndmov %bnd1, (%rax)" to sanity check
+ * that BND1 actually got loaded.
+ */
+ asm volatile (".byte 0x66,0x0f,0x1b,0x08" :: "a" (output));
+ wrmsr(MSR_IA32_BNDCFGS, 0);
+
+ GUEST_ASSERT_EQ(bounds[0], output[0]);
+ GUEST_ASSERT_EQ(bounds[1], output[1]);
+ }
+ if (this_cpu_has(X86_FEATURE_PKU)) {
+ GUEST_ASSERT(supported_xcr0 & XFEATURE_MASK_PKRU);
+ set_cr4(get_cr4() | X86_CR4_PKE);
+ GUEST_ASSERT(this_cpu_has(X86_FEATURE_OSPKE));
+
+ wrpkru(-1u);
+ }
+ }
+
GUEST_SYNC(2);
if (arg) {
- if (cpu_has_svm())
+ if (this_cpu_has(X86_FEATURE_SVM))
svm_l1_guest_code(arg);
else
vmx_l1_guest_code(arg);
@@ -154,45 +230,40 @@ static void __attribute__((__flatten__)) guest_code(void *arg)
int main(int argc, char *argv[])
{
+ uint64_t *xstate_bv, saved_xstate_bv;
vm_vaddr_t nested_gva = 0;
-
+ struct kvm_cpuid2 empty_cpuid = {};
struct kvm_regs regs1, regs2;
+ struct kvm_vcpu *vcpu, *vcpuN;
struct kvm_vm *vm;
- struct kvm_run *run;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- if (nested_svm_supported())
+ if (kvm_has_cap(KVM_CAP_NESTED_STATE)) {
+ if (kvm_cpu_has(X86_FEATURE_SVM))
vcpu_alloc_svm(vm, &nested_gva);
- else if (nested_vmx_supported())
+ else if (kvm_cpu_has(X86_FEATURE_VMX))
vcpu_alloc_vmx(vm, &nested_gva);
}
if (!nested_gva)
pr_info("will skip nested state checks\n");
- vcpu_args_set(vm, VCPU_ID, 1, nested_gva);
+ vcpu_args_set(vcpu, 1, nested_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -207,22 +278,47 @@ int main(int argc, char *argv[])
uc.args[1] == stage, "Stage %d: Unexpected register values vmexit, got %lx",
stage, (ulong)uc.args[1]);
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+
+ /*
+ * Restore XSAVE state in a dummy vCPU, first without doing
+ * KVM_SET_CPUID2, and then with an empty guest CPUID. Except
+ * for off-by-default xfeatures, e.g. AMX, KVM is supposed to
+ * allow KVM_SET_XSAVE regardless of guest CPUID. Manually
+ * load only XSAVE state, MSRs in particular have a much more
+ * convoluted ABI.
+ *
+ * Load two versions of XSAVE state: one with the actual guest
+ * XSAVE state, and one with all supported features forced "on"
+ * in xstate_bv, e.g. to ensure that KVM allows loading all
+ * supported features, even if something goes awry in saving
+ * the original snapshot.
+ */
+ xstate_bv = (void *)&((uint8_t *)state->xsave->region)[512];
+ saved_xstate_bv = *xstate_bv;
+
+ vcpuN = __vm_vcpu_add(vm, vcpu->id + 1);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = kvm_cpu_supported_xcr0();
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ vcpu_init_cpuid(vcpuN, &empty_cpuid);
+ vcpu_xsave_set(vcpuN, state->xsave);
+ *xstate_bv = saved_xstate_bv;
+ vcpu_xsave_set(vcpuN, state->xsave);
+
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
new file mode 100644
index 000000000000..32bef39bec21
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_int_ctl_test.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Nested SVM testing: test simultaneous use of V_IRQ from L1 and L0.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "apic.h"
+
+bool vintr_irq_called;
+bool intr_irq_called;
+
+#define VINTR_IRQ_NUMBER 0x20
+#define INTR_IRQ_NUMBER 0x30
+
+static void vintr_irq_handler(struct ex_regs *regs)
+{
+ vintr_irq_called = true;
+}
+
+static void intr_irq_handler(struct ex_regs *regs)
+{
+ x2apic_write_reg(APIC_EOI, 0x00);
+ intr_irq_called = true;
+}
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ /* This code raises interrupt INTR_IRQ_NUMBER in the L1's LAPIC,
+ * and since L1 didn't enable virtual interrupt masking,
+ * L2 should receive it and not L1.
+ *
+ * L2 also has virtual interrupt 'VINTR_IRQ_NUMBER' pending in V_IRQ
+ * so it should also receive it after the following 'sti'.
+ */
+ x2apic_write_reg(APIC_ICR,
+ APIC_DEST_SELF | APIC_INT_ASSERT | INTR_IRQ_NUMBER);
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ GUEST_ASSERT(vintr_irq_called);
+ GUEST_ASSERT(intr_irq_called);
+
+ __asm__ __volatile__(
+ "vmcall\n"
+ );
+}
+
+static void l1_guest_code(struct svm_test_data *svm)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* No virtual interrupt masking */
+ vmcb->control.int_ctl &= ~V_INTR_MASKING_MASK;
+
+ /* No intercepts for real and virtual interrupts */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_INTR) | BIT(INTERCEPT_VINTR));
+
+ /* Make a virtual interrupt VINTR_IRQ_NUMBER pending */
+ vmcb->control.int_ctl |= V_IRQ_MASK | (0x1 << V_INTR_PRIO_SHIFT);
+ vmcb->control.int_vector = VINTR_IRQ_NUMBER;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, VINTR_IRQ_NUMBER, vintr_irq_handler);
+ vm_install_exception_handler(vm, INTR_IRQ_NUMBER, intr_irq_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
new file mode 100644
index 000000000000..d6fcdcc3af31
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_shutdown_test.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * svm_nested_shutdown_test
+ *
+ * Copyright (C) 2022, Red Hat, Inc.
+ *
+ * Nested SVM testing: test that unintercepted shutdown in L2 doesn't crash the host
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+
+static void l2_guest_code(struct svm_test_data *svm)
+{
+ __asm__ __volatile__("ud2");
+}
+
+static void l1_guest_code(struct svm_test_data *svm, struct idt_entry *idt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ idt[6].p = 0; // #UD is intercepted but its injection will cause #NP
+ idt[11].p = 0; // #NP is not intercepted and will cause another
+ // #NP that will be converted to #DF
+ idt[8].p = 0; // #DF will cause #NP which will cause SHUTDOWN
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here */
+ GUEST_ASSERT(0);
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ vcpu_args_set(vcpu, 2, svm_gva, vm->idt);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
new file mode 100644
index 000000000000..0c7ce3d4e83a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/svm_nested_soft_inject_test.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2022 Oracle and/or its affiliates.
+ *
+ * Based on:
+ * svm_int_ctl_test
+ *
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ */
+#include <stdatomic.h>
+#include <stdio.h>
+#include <unistd.h>
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "svm_util.h"
+#include "test_util.h"
+
+#define INT_NR 0x20
+
+static_assert(ATOMIC_INT_LOCK_FREE == 2, "atomic int is not lockless");
+
+static unsigned int bp_fired;
+static void guest_bp_handler(struct ex_regs *regs)
+{
+ bp_fired++;
+}
+
+static unsigned int int_fired;
+static void l2_guest_code_int(void);
+
+static void guest_int_handler(struct ex_regs *regs)
+{
+ int_fired++;
+ GUEST_ASSERT_EQ(regs->rip, (unsigned long)l2_guest_code_int);
+}
+
+static void l2_guest_code_int(void)
+{
+ GUEST_ASSERT_EQ(int_fired, 1);
+
+ /*
+ * Same as the vmmcall() function, but with a ud2 sneaked after the
+ * vmmcall. The caller injects an exception with the return address
+ * increased by 2, so the "pop rbp" must be after the ud2 and we cannot
+ * use vmmcall() directly.
+ */
+ __asm__ __volatile__("push %%rbp; vmmcall; ud2; pop %%rbp"
+ : : "a"(0xdeadbeef), "c"(0xbeefdead)
+ : "rbx", "rdx", "rsi", "rdi", "r8", "r9",
+ "r10", "r11", "r12", "r13", "r14", "r15");
+
+ GUEST_ASSERT_EQ(bp_fired, 1);
+ hlt();
+}
+
+static atomic_int nmi_stage;
+#define nmi_stage_get() atomic_load_explicit(&nmi_stage, memory_order_acquire)
+#define nmi_stage_inc() atomic_fetch_add_explicit(&nmi_stage, 1, memory_order_acq_rel)
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+ nmi_stage_inc();
+
+ if (nmi_stage_get() == 1) {
+ vmmcall();
+ GUEST_FAIL("Unexpected resume after VMMCALL");
+ } else {
+ GUEST_ASSERT_EQ(nmi_stage_get(), 3);
+ GUEST_DONE();
+ }
+}
+
+static void l2_guest_code_nmi(void)
+{
+ ud2();
+}
+
+static void l1_guest_code(struct svm_test_data *svm, uint64_t is_nmi, uint64_t idt_alt)
+{
+ #define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ struct vmcb *vmcb = svm->vmcb;
+
+ if (is_nmi)
+ x2apic_enable();
+
+ /* Prepare for L2 execution. */
+ generic_svm_setup(svm,
+ is_nmi ? l2_guest_code_nmi : l2_guest_code_int,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ vmcb->control.intercept_exceptions |= BIT(PF_VECTOR) | BIT(UD_VECTOR);
+ vmcb->control.intercept |= BIT(INTERCEPT_NMI) | BIT(INTERCEPT_HLT);
+
+ if (is_nmi) {
+ vmcb->control.event_inj = SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_NMI;
+ } else {
+ vmcb->control.event_inj = INT_NR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_SOFT;
+ /* The return address pushed on stack */
+ vmcb->control.next_rip = vmcb->save.rip;
+ }
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_VMMCALL,
+ "Expected VMMCAL #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ if (is_nmi) {
+ clgi();
+ x2apic_write_reg(APIC_ICR, APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_NMI);
+
+ GUEST_ASSERT_EQ(nmi_stage_get(), 1);
+ nmi_stage_inc();
+
+ stgi();
+ /* self-NMI happens here */
+ while (true)
+ cpu_relax();
+ }
+
+ /* Skip over VMMCALL */
+ vmcb->save.rip += 3;
+
+ /* Switch to alternate IDT to cause intervening NPF again */
+ vmcb->save.idtr.base = idt_alt;
+ vmcb->control.clean = 0; /* &= ~BIT(VMCB_DT) would be enough */
+
+ vmcb->control.event_inj = BP_VECTOR | SVM_EVTINJ_VALID | SVM_EVTINJ_TYPE_EXEPT;
+ /* The return address pushed on stack, skip over UD2 */
+ vmcb->control.next_rip = vmcb->save.rip + 2;
+
+ run_guest(vmcb, svm->vmcb_gpa);
+ __GUEST_ASSERT(vmcb->control.exit_code == SVM_EXIT_HLT,
+ "Expected HLT #VMEXIT, got '0x%x', info1 = '0x%lx, info2 = '0x%lx'",
+ vmcb->control.exit_code,
+ vmcb->control.exit_info_1, vmcb->control.exit_info_2);
+
+ GUEST_DONE();
+}
+
+static void run_test(bool is_nmi)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t svm_gva;
+ vm_vaddr_t idt_alt_vm;
+ struct kvm_guest_debug debug;
+
+ pr_info("Running %s test\n", is_nmi ? "NMI" : "soft int");
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, NMI_VECTOR, guest_nmi_handler);
+ vm_install_exception_handler(vm, BP_VECTOR, guest_bp_handler);
+ vm_install_exception_handler(vm, INT_NR, guest_int_handler);
+
+ vcpu_alloc_svm(vm, &svm_gva);
+
+ if (!is_nmi) {
+ void *idt, *idt_alt;
+
+ idt_alt_vm = vm_vaddr_alloc_page(vm);
+ idt_alt = addr_gva2hva(vm, idt_alt_vm);
+ idt = addr_gva2hva(vm, vm->idt);
+ memcpy(idt_alt, idt, getpagesize());
+ } else {
+ idt_alt_vm = 0;
+ }
+ vcpu_args_set(vcpu, 3, svm_gva, (uint64_t)is_nmi, (uint64_t)idt_alt_vm);
+
+ memset(&debug, 0, sizeof(debug));
+ vcpu_guest_debug_set(vcpu, &debug);
+
+ struct ucall uc;
+
+ alarm(2);
+ vcpu_run(vcpu);
+ alarm(0);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ /* NOT REACHED */
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+done:
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
+
+ TEST_ASSERT(kvm_cpu_has(X86_FEATURE_NRIPS),
+ "KVM with nSVM is supposed to unconditionally advertise nRIP Save");
+
+ atomic_init(&nmi_stage, 0);
+
+ run_test(false);
+ run_test(true);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
index 0e1adb4e3199..8a62cca28cfb 100644
--- a/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
+++ b/tools/testing/selftests/kvm/x86_64/svm_vmcall_test.c
@@ -12,10 +12,6 @@
#include "processor.h"
#include "svm_util.h"
-#define VCPU_ID 5
-
-static struct kvm_vm *vm;
-
static void l2_guest_code(struct svm_test_data *svm)
{
__asm__ __volatile__("vmcall");
@@ -39,29 +35,26 @@ static void l1_guest_code(struct svm_test_data *svm)
int main(int argc, char *argv[])
{
+ struct kvm_vcpu *vcpu;
vm_vaddr_t svm_gva;
+ struct kvm_vm *vm;
- nested_svm_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_SVM));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vcpu_alloc_svm(vm, &svm_gva);
- vcpu_args_set(vm, VCPU_ID, 1, svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
diff --git a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
index d672f0a473f8..adb5593daf48 100644
--- a/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
+++ b/tools/testing/selftests/kvm/x86_64/sync_regs_test.c
@@ -15,15 +15,19 @@
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <pthread.h>
+#include "kvm_test_harness.h"
#include "test_util.h"
#include "kvm_util.h"
#include "processor.h"
-#define VCPU_ID 5
-
#define UCALL_PIO_PORT ((uint16_t)0x1000)
+struct ucall uc_none = {
+ .cmd = UCALL_NONE,
+};
+
/*
* ucall is embedded here to protect against compiler reshuffling registers
* before calling a function. In this test we only need to get KVM_EXIT_IO
@@ -34,15 +38,18 @@ void guest_code(void)
asm volatile("1: in %[port], %%al\n"
"add $0x1, %%rbx\n"
"jmp 1b"
- : : [port] "d" (UCALL_PIO_PORT) : "rax", "rbx");
+ : : [port] "d" (UCALL_PIO_PORT), "D" (&uc_none)
+ : "rax", "rbx");
}
+KVM_ONE_VCPU_TEST_SUITE(sync_regs_test);
+
static void compare_regs(struct kvm_regs *left, struct kvm_regs *right)
{
#define REG_COMPARE(reg) \
TEST_ASSERT(left->reg == right->reg, \
"Register " #reg \
- " values did not match: 0x%llx, 0x%llx\n", \
+ " values did not match: 0x%llx, 0x%llx", \
left->reg, right->reg)
REG_COMPARE(rax);
REG_COMPARE(rbx);
@@ -77,80 +84,205 @@ static void compare_vcpu_events(struct kvm_vcpu_events *left,
#define TEST_SYNC_FIELDS (KVM_SYNC_X86_REGS|KVM_SYNC_X86_SREGS|KVM_SYNC_X86_EVENTS)
#define INVALID_SYNC_FIELD 0x80000000
-int main(int argc, char *argv[])
+/*
+ * Set an exception as pending *and* injected while KVM is processing events.
+ * KVM is supposed to ignore/drop pending exceptions if userspace is also
+ * requesting that an exception be injected.
+ */
+static void *race_events_inj_pen(void *arg)
{
- struct kvm_vm *vm;
- struct kvm_run *run;
- struct kvm_regs regs;
- struct kvm_sregs sregs;
- struct kvm_vcpu_events events;
- int rv, cap;
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
- /* Tell stdout not to buffer its content */
- setbuf(stdout, NULL);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
- cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
- if ((cap & TEST_SYNC_FIELDS) != TEST_SYNC_FIELDS) {
- print_skip("KVM_CAP_SYNC_REGS not supported");
- exit(KSFT_SKIP);
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.injected, 1);
+ WRITE_ONCE(events->exception.pending, 1);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Set an invalid exception vector while KVM is processing events. KVM is
+ * supposed to reject any vector >= 32, as well as NMIs (vector 2).
+ */
+static void *race_events_exc(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ struct kvm_vcpu_events *events = &run->s.regs.events;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_EVENTS);
+ WRITE_ONCE(events->flags, 0);
+ WRITE_ONCE(events->exception.nr, UD_VECTOR);
+ WRITE_ONCE(events->exception.pending, 1);
+ WRITE_ONCE(events->exception.nr, 255);
+
+ pthread_testcancel();
+ }
+
+ return NULL;
+}
+
+/*
+ * Toggle CR4.PAE while KVM is processing SREGS, EFER.LME=1 with CR4.PAE=0 is
+ * illegal, and KVM's MMU heavily relies on vCPU state being valid.
+ */
+static noinline void *race_sregs_cr4(void *arg)
+{
+ struct kvm_run *run = (struct kvm_run *)arg;
+ __u64 *cr4 = &run->s.regs.sregs.cr4;
+ __u64 pae_enabled = *cr4;
+ __u64 pae_disabled = *cr4 & ~X86_CR4_PAE;
+
+ for (;;) {
+ WRITE_ONCE(run->kvm_dirty_regs, KVM_SYNC_X86_SREGS);
+ WRITE_ONCE(*cr4, pae_enabled);
+ asm volatile(".rept 512\n\t"
+ "nop\n\t"
+ ".endr");
+ WRITE_ONCE(*cr4, pae_disabled);
+
+ pthread_testcancel();
}
- if ((cap & INVALID_SYNC_FIELD) != 0) {
- print_skip("The \"invalid\" field is not invalid");
- exit(KSFT_SKIP);
+
+ return NULL;
+}
+
+static void race_sync_regs(struct kvm_vcpu *vcpu, void *racer)
+{
+ const time_t TIMEOUT = 2; /* seconds, roughly */
+ struct kvm_x86_state *state;
+ struct kvm_translation tr;
+ struct kvm_run *run;
+ pthread_t thread;
+ time_t t;
+
+ run = vcpu->run;
+
+ run->kvm_valid_regs = KVM_SYNC_X86_SREGS;
+ vcpu_run(vcpu);
+ run->kvm_valid_regs = 0;
+
+ /* Save state *before* spawning the thread that mucks with vCPU state. */
+ state = vcpu_save_state(vcpu);
+
+ /*
+ * Selftests run 64-bit guests by default, both EFER.LME and CR4.PAE
+ * should already be set in guest state.
+ */
+ TEST_ASSERT((run->s.regs.sregs.cr4 & X86_CR4_PAE) &&
+ (run->s.regs.sregs.efer & EFER_LME),
+ "vCPU should be in long mode, CR4.PAE=%d, EFER.LME=%d",
+ !!(run->s.regs.sregs.cr4 & X86_CR4_PAE),
+ !!(run->s.regs.sregs.efer & EFER_LME));
+
+ TEST_ASSERT_EQ(pthread_create(&thread, NULL, racer, (void *)run), 0);
+
+ for (t = time(NULL) + TIMEOUT; time(NULL) < t;) {
+ /*
+ * Reload known good state if the vCPU triple faults, e.g. due
+ * to the unhandled #GPs being injected. VMX preserves state
+ * on shutdown, but SVM synthesizes an INIT as the VMCB state
+ * is architecturally undefined on triple fault.
+ */
+ if (!__vcpu_run(vcpu) && run->exit_reason == KVM_EXIT_SHUTDOWN)
+ vcpu_load_state(vcpu, state);
+
+ if (racer == race_sregs_cr4) {
+ tr = (struct kvm_translation) { .linear_address = 0 };
+ __vcpu_ioctl(vcpu, KVM_TRANSLATE, &tr);
+ }
}
- /* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
+ TEST_ASSERT_EQ(pthread_cancel(thread), 0);
+ TEST_ASSERT_EQ(pthread_join(thread, NULL), 0);
- run = vcpu_state(vm, VCPU_ID);
+ kvm_x86_state_cleanup(state);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, read_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request reading invalid register set from VCPU. */
run->kvm_valid_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
run->kvm_valid_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_valid_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_valid_regs = 0;
+ run->kvm_valid_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_invalid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ int rv;
/* Request setting invalid register set into VCPU. */
run->kvm_dirty_regs = INVALID_SYNC_FIELD;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
run->kvm_dirty_regs = INVALID_SYNC_FIELD | TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
+ rv = _vcpu_run(vcpu);
TEST_ASSERT(rv < 0 && errno == EINVAL,
- "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d\n",
+ "Invalid kvm_dirty_regs did not cause expected KVM_RUN error: %d",
rv);
- vcpu_state(vm, VCPU_ID)->kvm_dirty_regs = 0;
+ run->kvm_dirty_regs = 0;
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, req_and_verify_all_valid, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
/* Request and verify all valid register sets. */
/* TODO: BUILD TIME CHECK: TEST_ASSERT(KVM_SYNC_X86_NUM_FIELDS != 3); */
run->kvm_valid_regs = TEST_SYNC_FIELDS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, set_and_verify_various, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_vcpu_events events;
+ struct kvm_sregs sregs;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Set and verify various register values. */
run->s.regs.regs.rbx = 0xBAD1DEA;
@@ -159,11 +291,8 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = KVM_SYNC_X86_REGS | KVM_SYNC_X86_SREGS;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xBAD1DEA + 1,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
@@ -171,14 +300,19 @@ int main(int argc, char *argv[])
"apic_base sync regs value incorrect 0x%llx.",
run->s.regs.sregs.apic_base);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
compare_regs(&regs, &run->s.regs.regs);
- vcpu_sregs_get(vm, VCPU_ID, &sregs);
+ vcpu_sregs_get(vcpu, &sregs);
compare_sregs(&sregs, &run->s.regs.sregs);
- vcpu_events_get(vm, VCPU_ID, &events);
+ vcpu_events_get(vcpu, &events);
compare_vcpu_events(&events, &run->s.regs.events);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_dirty_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
/* Clear kvm_dirty_regs bits, verify new s.regs values are
* overwritten with existing guest values.
@@ -186,14 +320,22 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = TEST_SYNC_FIELDS;
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xDEADBEEF;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx != 0xDEADBEEF,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_and_dirty_regs, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Clear kvm_valid_regs bits and kvm_dirty_bits.
* Verify s.regs values are not overwritten with existing guest values
@@ -202,20 +344,29 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = 0;
run->kvm_dirty_regs = 0;
run->s.regs.regs.rbx = 0xAAAA;
+ vcpu_regs_get(vcpu, &regs);
regs.rbx = 0xBAC0;
- vcpu_regs_set(vm, VCPU_ID, &regs);
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_regs_set(vcpu, &regs);
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xAAAA,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBAC0 + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, clear_kvm_valid_regs_bits, guest_code)
+{
+ struct kvm_run *run = vcpu->run;
+ struct kvm_regs regs;
+
+ /* Run once to get register set */
+ run->kvm_valid_regs = TEST_SYNC_FIELDS;
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
/* Clear kvm_valid_regs bits. Verify s.regs values are not overwritten
* with existing guest values but that guest values are overwritten
@@ -224,20 +375,39 @@ int main(int argc, char *argv[])
run->kvm_valid_regs = 0;
run->kvm_dirty_regs = TEST_SYNC_FIELDS;
run->s.regs.regs.rbx = 0xBBBB;
- rv = _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
TEST_ASSERT(run->s.regs.regs.rbx == 0xBBBB,
"rbx sync regs value incorrect 0x%llx.",
run->s.regs.regs.rbx);
- vcpu_regs_get(vm, VCPU_ID, &regs);
+ vcpu_regs_get(vcpu, &regs);
TEST_ASSERT(regs.rbx == 0xBBBB + 1,
"rbx guest value incorrect 0x%llx.",
regs.rbx);
+}
- kvm_vm_free(vm);
+KVM_ONE_VCPU_TEST(sync_regs_test, race_cr4, guest_code)
+{
+ race_sync_regs(vcpu, race_sregs_cr4);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_exc, guest_code)
+{
+ race_sync_regs(vcpu, race_events_exc);
+}
+
+KVM_ONE_VCPU_TEST(sync_regs_test, race_inj_pen, guest_code)
+{
+ race_sync_regs(vcpu, race_events_inj_pen);
+}
+
+int main(int argc, char *argv[])
+{
+ int cap;
+
+ cap = kvm_check_cap(KVM_CAP_SYNC_REGS);
+ TEST_REQUIRE((cap & TEST_SYNC_FIELDS) == TEST_SYNC_FIELDS);
+ TEST_REQUIRE(!(cap & INVALID_SYNC_FIELD));
- return 0;
+ return test_harness_run(argc, argv);
}
diff --git a/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
new file mode 100644
index 000000000000..56306a19144a
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/triple_fault_event_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+#include "svm_util.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+/* The virtual machine object. */
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+#define L2_GUEST_STACK_SIZE 64
+unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+void l1_guest_code_vmx(struct vmx_pages *vmx)
+{
+
+ GUEST_ASSERT(vmx->vmcs_gpa);
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx));
+ GUEST_ASSERT(load_vmcs(vmx));
+
+ prepare_vmcs(vmx, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ GUEST_ASSERT(!vmlaunch());
+ /* L2 should triple fault after a triple fault event injected. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+void l1_guest_code_svm(struct svm_test_data *svm)
+{
+ struct vmcb *vmcb = svm->vmcb;
+
+ generic_svm_setup(svm, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* don't intercept shutdown to test the case of SVM allowing to do so */
+ vmcb->control.intercept &= ~(BIT(INTERCEPT_SHUTDOWN));
+
+ run_guest(vmcb, svm->vmcb_gpa);
+
+ /* should not reach here, L1 should crash */
+ GUEST_ASSERT(0);
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vcpu_events events;
+ struct ucall uc;
+
+ bool has_vmx = kvm_cpu_has(X86_FEATURE_VMX);
+ bool has_svm = kvm_cpu_has(X86_FEATURE_SVM);
+
+ TEST_REQUIRE(has_vmx || has_svm);
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_X86_TRIPLE_FAULT_EVENT));
+
+
+ if (has_vmx) {
+ vm_vaddr_t vmx_pages_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_vmx);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+ } else {
+ vm_vaddr_t svm_gva;
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code_svm);
+ vcpu_alloc_svm(vm, &svm_gva);
+ vcpu_args_set(vcpu, 1, svm_gva);
+ }
+
+ vm_enable_cap(vm, KVM_CAP_X86_TRIPLE_FAULT_EVENT, 1);
+ run = vcpu->run;
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+ vcpu_events_get(vcpu, &events);
+ events.flags |= KVM_VCPUEVENT_VALID_TRIPLE_FAULT;
+ events.triple_fault.pending = true;
+ vcpu_events_set(vcpu, &events);
+ run->immediate_exit = true;
+ vcpu_run_complete_io(vcpu);
+
+ vcpu_events_get(vcpu, &events);
+ TEST_ASSERT(events.flags & KVM_VCPUEVENT_VALID_TRIPLE_FAULT,
+ "Triple fault event invalid");
+ TEST_ASSERT(events.triple_fault.pending,
+ "No triple fault pending");
+ vcpu_run(vcpu);
+
+
+ if (has_svm) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_SHUTDOWN);
+ } else {
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+ }
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
new file mode 100644
index 000000000000..12b0964f4f13
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_msrs_test.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Tests for MSR_IA32_TSC and MSR_IA32_TSC_ADJUST.
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <stdio.h>
+#include <string.h>
+#include "kvm_util.h"
+#include "processor.h"
+
+#define UNITY (1ull << 30)
+#define HOST_ADJUST (UNITY * 64)
+#define GUEST_STEP (UNITY * 4)
+#define ROUND(x) ((x + UNITY / 2) & -UNITY)
+#define rounded_rdmsr(x) ROUND(rdmsr(x))
+#define rounded_host_rdmsr(x) ROUND(vcpu_get_msr(vcpu, x))
+
+static void guest_code(void)
+{
+ u64 val = 0;
+
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ val = 1ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ GUEST_SYNC(2);
+ val = 2ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Host: setting the TSC offset. */
+ GUEST_SYNC(3);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ GUEST_SYNC(4);
+ val = 3ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC_ADJUST, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ GUEST_SYNC(5);
+ val = 4ull * GUEST_STEP;
+ wrmsr(MSR_IA32_TSC, val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC), val);
+ GUEST_ASSERT_EQ(rounded_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ GUEST_DONE();
+}
+
+static void run_vcpu(struct kvm_vcpu *vcpu, int stage)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ if (!strcmp((const char *)uc.args[0], "hello") &&
+ uc.args[1] == stage + 1)
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ else
+ ksft_test_result_fail(
+ "stage %d: Unexpected register values vmexit, got %lx",
+ stage + 1, (ulong)uc.args[1]);
+ return;
+ case UCALL_DONE:
+ ksft_test_result_pass("stage %d passed\n", stage + 1);
+ return;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_ASSERT(false, "Unexpected exit: %s",
+ exit_reason_str(vcpu->run->exit_reason));
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ uint64_t val;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ val = 0;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC affect both MSRs. */
+ run_vcpu(vcpu, 1);
+ val = 1ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /* Guest: writes to MSR_IA32_TSC_ADJUST affect both MSRs. */
+ run_vcpu(vcpu, 2);
+ val = 2ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Host: writes to MSR_IA32_TSC set the host-side offset
+ * and therefore do not change MSR_IA32_TSC_ADJUST.
+ */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+ run_vcpu(vcpu, 3);
+
+ /* Host: writes to MSR_IA32_TSC_ADJUST do not modify the TSC. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, UNITY * 123456);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_TSC_ADJUST), UNITY * 123456);
+
+ /* Restore previous value. */
+ vcpu_set_msr(vcpu, MSR_IA32_TSC_ADJUST, val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC_ADJUST do not destroy the
+ * host-side offset and affect both MSRs.
+ */
+ run_vcpu(vcpu, 4);
+ val = 3ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), HOST_ADJUST + val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val);
+
+ /*
+ * Guest: writes to MSR_IA32_TSC affect both MSRs, so the host-side
+ * offset is now visible in MSR_IA32_TSC_ADJUST.
+ */
+ run_vcpu(vcpu, 5);
+ val = 4ull * GUEST_STEP;
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC), val);
+ TEST_ASSERT_EQ(rounded_host_rdmsr(MSR_IA32_TSC_ADJUST), val - HOST_ADJUST);
+
+ kvm_vm_free(vm);
+
+ ksft_finished(); /* Print results and exit() accordingly */
+}
diff --git a/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
new file mode 100644
index 000000000000..59c7304f805e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/tsc_scaling_sync.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#define NR_TEST_VCPUS 20
+
+static struct kvm_vm *vm;
+pthread_spinlock_t create_lock;
+
+#define TEST_TSC_KHZ 2345678UL
+#define TEST_TSC_OFFSET 200000000
+
+uint64_t tsc_sync;
+static void guest_code(void)
+{
+ uint64_t start_tsc, local_tsc, tmp;
+
+ start_tsc = rdtsc();
+ do {
+ tmp = READ_ONCE(tsc_sync);
+ local_tsc = rdtsc();
+ WRITE_ONCE(tsc_sync, local_tsc);
+ if (unlikely(local_tsc < tmp))
+ GUEST_SYNC_ARGS(0, local_tsc, tmp, 0, 0);
+
+ } while (local_tsc - start_tsc < 5000 * TEST_TSC_KHZ);
+
+ GUEST_DONE();
+}
+
+
+static void *run_vcpu(void *_cpu_nr)
+{
+ unsigned long vcpu_id = (unsigned long)_cpu_nr;
+ unsigned long failures = 0;
+ static bool first_cpu_done;
+ struct kvm_vcpu *vcpu;
+
+ /* The kernel is fine, but vm_vcpu_add() needs locking */
+ pthread_spin_lock(&create_lock);
+
+ vcpu = vm_vcpu_add(vm, vcpu_id, guest_code);
+
+ if (!first_cpu_done) {
+ first_cpu_done = true;
+ vcpu_set_msr(vcpu, MSR_IA32_TSC, TEST_TSC_OFFSET);
+ }
+
+ pthread_spin_unlock(&create_lock);
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ goto out;
+
+ case UCALL_SYNC:
+ printf("Guest %d sync %lx %lx %ld\n", vcpu->id,
+ uc.args[2], uc.args[3], uc.args[2] - uc.args[3]);
+ failures++;
+ break;
+
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+ out:
+ return (void *)failures;
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_VM_TSC_CONTROL));
+
+ vm = vm_create(NR_TEST_VCPUS);
+ vm_ioctl(vm, KVM_SET_TSC_KHZ, (void *) TEST_TSC_KHZ);
+
+ pthread_spin_init(&create_lock, PTHREAD_PROCESS_PRIVATE);
+ pthread_t cpu_threads[NR_TEST_VCPUS];
+ unsigned long cpu;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++)
+ pthread_create(&cpu_threads[cpu], NULL, run_vcpu, (void *)cpu);
+
+ unsigned long failures = 0;
+ for (cpu = 0; cpu < NR_TEST_VCPUS; cpu++) {
+ void *this_cpu_failures;
+ pthread_join(cpu_threads[cpu], &this_cpu_failures);
+ failures += (unsigned long)this_cpu_failures;
+ }
+
+ TEST_ASSERT(!failures, "TSC sync failed");
+ pthread_spin_destroy(&create_lock);
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
new file mode 100644
index 000000000000..dcbb3c29fb8e
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/ucna_injection_test.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ucna_injection_test
+ *
+ * Copyright (C) 2022, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that user space can inject UnCorrectable No Action required (UCNA)
+ * memory errors to the guest.
+ *
+ * The test starts one vCPU with the MCG_CMCI_P enabled. It verifies that
+ * proper UCNA errors can be injected to a vCPU with MCG_CMCI_P and
+ * corresponding per-bank control register (MCI_CTL2) bit enabled.
+ * The test also checks that the UCNA errors get recorded in the
+ * Machine Check bank registers no matter the error signal interrupts get
+ * delivered into the guest or not.
+ *
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util_base.h"
+#include "kvm_util.h"
+#include "mce.h"
+#include "processor.h"
+#include "test_util.h"
+#include "apic.h"
+
+#define SYNC_FIRST_UCNA 9
+#define SYNC_SECOND_UCNA 10
+#define SYNC_GP 11
+#define FIRST_UCNA_ADDR 0xdeadbeef
+#define SECOND_UCNA_ADDR 0xcafeb0ba
+
+/*
+ * Vector for the CMCI interrupt.
+ * Value is arbitrary. Any value in 0x20-0xFF should work:
+ * https://wiki.osdev.org/Interrupt_Vector_Table
+ */
+#define CMCI_VECTOR 0xa9
+
+#define UCNA_BANK 0x7 // IMC0 bank
+
+#define MCI_CTL2_RESERVED_BIT BIT_ULL(29)
+
+static uint64_t supported_mcg_caps;
+
+/*
+ * Record states about the injected UCNA.
+ * The variables started with the 'i_' prefixes are recorded in interrupt
+ * handler. Variables without the 'i_' prefixes are recorded in guest main
+ * execution thread.
+ */
+static volatile uint64_t i_ucna_rcvd;
+static volatile uint64_t i_ucna_addr;
+static volatile uint64_t ucna_addr;
+static volatile uint64_t ucna_addr2;
+
+struct thread_params {
+ struct kvm_vcpu *vcpu;
+ uint64_t *p_i_ucna_rcvd;
+ uint64_t *p_i_ucna_addr;
+ uint64_t *p_ucna_addr;
+ uint64_t *p_ucna_addr2;
+};
+
+static void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void ucna_injection_guest_code(void)
+{
+ uint64_t ctl2;
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /* Sets up the interrupt vector and enables per-bank CMCI sigaling. */
+ xapic_write_reg(APIC_LVTCMCI, CMCI_VECTOR | APIC_DM_FIXED);
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ /* Enables interrupt in guest. */
+ asm volatile("sti");
+
+ /* Let user space inject the first UCNA */
+ GUEST_SYNC(SYNC_FIRST_UCNA);
+
+ ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+
+ /* Disables the per-bank CMCI signaling. */
+ ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 & ~MCI_CTL2_CMCI_EN);
+
+ /* Let the user space inject the second UCNA */
+ GUEST_SYNC(SYNC_SECOND_UCNA);
+
+ ucna_addr2 = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ GUEST_DONE();
+}
+
+static void cmci_disabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_CMCI_EN);
+
+ GUEST_DONE();
+}
+
+static void cmci_enabled_guest_code(void)
+{
+ uint64_t ctl2 = rdmsr(MSR_IA32_MCx_CTL2(UCNA_BANK));
+ wrmsr(MSR_IA32_MCx_CTL2(UCNA_BANK), ctl2 | MCI_CTL2_RESERVED_BIT);
+
+ GUEST_DONE();
+}
+
+static void guest_cmci_handler(struct ex_regs *regs)
+{
+ i_ucna_rcvd++;
+ i_ucna_addr = rdmsr(MSR_IA32_MCx_ADDR(UCNA_BANK));
+ xapic_write_reg(APIC_EOI, 0);
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ GUEST_SYNC(SYNC_GP);
+}
+
+static void run_vcpu_expect_gp(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_GP, "#GP is expected.");
+ printf("vCPU received GP in guest.\n");
+}
+
+static void inject_ucna(struct kvm_vcpu *vcpu, uint64_t addr) {
+ /*
+ * A UCNA error is indicated with VAL=1, UC=1, PCC=0, S=0 and AR=0 in
+ * the IA32_MCi_STATUS register.
+ * MSCOD=1 (BIT[16] - MscodDataRdErr).
+ * MCACOD=0x0090 (Memory controller error format, channel 0)
+ */
+ uint64_t status = MCI_STATUS_VAL | MCI_STATUS_UC | MCI_STATUS_EN |
+ MCI_STATUS_MISCV | MCI_STATUS_ADDRV | 0x10090;
+ struct kvm_x86_mce mce = {};
+ mce.status = status;
+ mce.mcg_status = 0;
+ /*
+ * MCM_ADDR_PHYS indicates the reported address is a physical address.
+ * Lowest 6 bits is the recoverable address LSB, i.e., the injected MCE
+ * is at 4KB granularity.
+ */
+ mce.misc = (MCM_ADDR_PHYS << 6) | 0xc;
+ mce.addr = addr;
+ mce.bank = UCNA_BANK;
+
+ vcpu_ioctl(vcpu, KVM_X86_SET_MCE, &mce);
+}
+
+static void *run_ucna_injection(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed with errno=%d",
+ r);
+
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_FIRST_UCNA, "Injecting first UCNA.");
+
+ printf("Injecting first UCNA at %#x.\n", FIRST_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, FIRST_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ TEST_ASSERT(get_ucall(params->vcpu, &uc) == UCALL_SYNC,
+ "Expect UCALL_SYNC");
+ TEST_ASSERT(uc.args[1] == SYNC_SECOND_UCNA, "Injecting second UCNA.");
+
+ printf("Injecting second UCNA at %#x.\n", SECOND_UCNA_ADDR);
+
+ inject_ucna(params->vcpu, SECOND_UCNA_ADDR);
+ vcpu_run(params->vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(params->vcpu, KVM_EXIT_IO);
+ if (get_ucall(params->vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false, "vCPU assertion failure: %s.",
+ (const char *)uc.args[0]);
+ }
+
+ return NULL;
+}
+
+static void test_ucna_injection(struct kvm_vcpu *vcpu, struct thread_params *params)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ params->vcpu = vcpu;
+ params->p_i_ucna_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_rcvd);
+ params->p_i_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&i_ucna_addr);
+ params->p_ucna_addr = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr);
+ params->p_ucna_addr2 = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ucna_addr2);
+
+ run_ucna_injection(params);
+
+ TEST_ASSERT(*params->p_i_ucna_rcvd == 1, "Only first UCNA get signaled.");
+ TEST_ASSERT(*params->p_i_ucna_addr == FIRST_UCNA_ADDR,
+ "Only first UCNA reported addr get recorded via interrupt.");
+ TEST_ASSERT(*params->p_ucna_addr == FIRST_UCNA_ADDR,
+ "First injected UCNAs should get exposed via registers.");
+ TEST_ASSERT(*params->p_ucna_addr2 == SECOND_UCNA_ADDR,
+ "Second injected UCNAs should get exposed via registers.");
+
+ printf("Test successful.\n"
+ "UCNA CMCI interrupts received: %ld\n"
+ "Last UCNA address received via CMCI: %lx\n"
+ "First UCNA address in vCPU thread: %lx\n"
+ "Second UCNA address in vCPU thread: %lx\n",
+ *params->p_i_ucna_rcvd, *params->p_i_ucna_addr,
+ *params->p_ucna_addr, *params->p_ucna_addr2);
+}
+
+static void setup_mce_cap(struct kvm_vcpu *vcpu, bool enable_cmci_p)
+{
+ uint64_t mcg_caps = MCG_CTL_P | MCG_SER_P | MCG_LMCE_P | KVM_MAX_MCE_BANKS;
+ if (enable_cmci_p)
+ mcg_caps |= MCG_CMCI_P;
+
+ mcg_caps &= supported_mcg_caps | MCG_CAP_BANKS_MASK;
+ vcpu_ioctl(vcpu, KVM_X86_SETUP_MCE, &mcg_caps);
+}
+
+static struct kvm_vcpu *create_vcpu_with_mce_cap(struct kvm_vm *vm, uint32_t vcpuid,
+ bool enable_cmci_p, void *guest_code)
+{
+ struct kvm_vcpu *vcpu = vm_vcpu_add(vm, vcpuid, guest_code);
+ setup_mce_cap(vcpu, enable_cmci_p);
+ return vcpu;
+}
+
+int main(int argc, char *argv[])
+{
+ struct thread_params params;
+ struct kvm_vm *vm;
+ struct kvm_vcpu *ucna_vcpu;
+ struct kvm_vcpu *cmcidis_vcpu;
+ struct kvm_vcpu *cmci_vcpu;
+
+ kvm_check_cap(KVM_CAP_MCE);
+
+ vm = __vm_create(VM_SHAPE_DEFAULT, 3, 0);
+
+ kvm_ioctl(vm->kvm_fd, KVM_X86_GET_MCE_CAP_SUPPORTED,
+ &supported_mcg_caps);
+
+ if (!(supported_mcg_caps & MCG_CMCI_P)) {
+ print_skip("MCG_CMCI_P is not supported");
+ exit(KSFT_SKIP);
+ }
+
+ ucna_vcpu = create_vcpu_with_mce_cap(vm, 0, true, ucna_injection_guest_code);
+ cmcidis_vcpu = create_vcpu_with_mce_cap(vm, 1, false, cmci_disabled_guest_code);
+ cmci_vcpu = create_vcpu_with_mce_cap(vm, 2, true, cmci_enabled_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(ucna_vcpu);
+ vcpu_init_descriptor_tables(cmcidis_vcpu);
+ vcpu_init_descriptor_tables(cmci_vcpu);
+ vm_install_exception_handler(vm, CMCI_VECTOR, guest_cmci_handler);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ test_ucna_injection(ucna_vcpu, &params);
+ run_vcpu_expect_gp(cmcidis_vcpu);
+ run_vcpu_expect_gp(cmci_vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_io_test.c b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
new file mode 100644
index 000000000000..9481cbcf284f
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_io_test.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+static void guest_ins_port80(uint8_t *buffer, unsigned int count)
+{
+ unsigned long end;
+
+ if (count == 2)
+ end = (unsigned long)buffer + 1;
+ else
+ end = (unsigned long)buffer + 8192;
+
+ asm volatile("cld; rep; insb" : "+D"(buffer), "+c"(count) : "d"(0x80) : "memory");
+ GUEST_ASSERT_EQ(count, 0);
+ GUEST_ASSERT_EQ((unsigned long)buffer, end);
+}
+
+static void guest_code(void)
+{
+ uint8_t buffer[8192];
+ int i;
+
+ /*
+ * Special case tests. main() will adjust RCX 2 => 1 and 3 => 8192 to
+ * test that KVM doesn't explode when userspace modifies the "count" on
+ * a userspace I/O exit. KVM isn't required to play nice with the I/O
+ * itself as KVM doesn't support manipulating the count, it just needs
+ * to not explode or overflow a buffer.
+ */
+ guest_ins_port80(buffer, 2);
+ guest_ins_port80(buffer, 3);
+
+ /* Verify KVM fills the buffer correctly when not stuffing RCX. */
+ memset(buffer, 0, sizeof(buffer));
+ guest_ins_port80(buffer, 8192);
+ for (i = 0; i < 8192; i++)
+ __GUEST_ASSERT(buffer[i] == 0xaa,
+ "Expected '0xaa', got '0x%x' at buffer[%u]",
+ buffer[i], i);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_regs regs;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ memset(&regs, 0, sizeof(regs));
+
+ while (1) {
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc))
+ break;
+
+ TEST_ASSERT(run->io.port == 0x80,
+ "Expected I/O at port 0x80, got port 0x%x", run->io.port);
+
+ /*
+ * Modify the rep string count in RCX: 2 => 1 and 3 => 8192.
+ * Note, this abuses KVM's batching of rep string I/O to avoid
+ * getting stuck in an infinite loop. That behavior isn't in
+ * scope from a testing perspective as it's not ABI in any way,
+ * i.e. it really is abusing internal KVM knowledge.
+ */
+ vcpu_regs_get(vcpu, &regs);
+ if (regs.rcx == 2)
+ regs.rcx = 1;
+ if (regs.rcx == 3)
+ regs.rcx = 8192;
+ memset((void *)run + run->io.data_offset, 0xaa, 4096);
+ vcpu_regs_set(vcpu, &regs);
+ }
+
+ switch (uc.cmd) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
new file mode 100644
index 000000000000..f4f61a2d2464
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/userspace_msr_exit_test.c
@@ -0,0 +1,780 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020, Google LLC.
+ *
+ * Tests for exiting into userspace on registered MSRs
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include "kvm_test_harness.h"
+#include "test_util.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static bool fep_available;
+
+#define MSR_NON_EXISTENT 0x474f4f00
+
+static u64 deny_bits = 0;
+struct kvm_msr_filter filter_allow = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel knows about. */
+ .base = MSR_IA32_XSS,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test an MSR the kernel doesn't know about. */
+ .base = MSR_IA32_FLUSH_CMD,
+ .bitmap = (uint8_t*)&deny_bits,
+ }, {
+ .flags = KVM_MSR_FILTER_READ |
+ KVM_MSR_FILTER_WRITE,
+ .nmsrs = 1,
+ /* Test a fabricated MSR that no one knows about. */
+ .base = MSR_NON_EXISTENT,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_fs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_FS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+struct kvm_msr_filter filter_gs = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = MSR_GS_BASE,
+ .bitmap = (uint8_t*)&deny_bits,
+ },
+ },
+};
+
+static uint64_t msr_non_existent_data;
+static int guest_exception_count;
+static u32 msr_reads, msr_writes;
+
+static u8 bitmap_00000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_00000000_write[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_40000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_c0000000_read[KVM_MSR_FILTER_MAX_BITMAP_SIZE];
+static u8 bitmap_deadbeef[1] = { 0x1 };
+
+static void deny_msr(uint8_t *bitmap, u32 msr)
+{
+ u32 idx = msr & (KVM_MSR_FILTER_MAX_BITMAP_SIZE - 1);
+
+ bitmap[idx / 8] &= ~(1 << (idx % 8));
+}
+
+static void prepare_bitmaps(void)
+{
+ memset(bitmap_00000000, 0xff, sizeof(bitmap_00000000));
+ memset(bitmap_00000000_write, 0xff, sizeof(bitmap_00000000_write));
+ memset(bitmap_40000000, 0xff, sizeof(bitmap_40000000));
+ memset(bitmap_c0000000, 0xff, sizeof(bitmap_c0000000));
+ memset(bitmap_c0000000_read, 0xff, sizeof(bitmap_c0000000_read));
+
+ deny_msr(bitmap_00000000_write, MSR_IA32_POWER_CTL);
+ deny_msr(bitmap_c0000000_read, MSR_SYSCALL_MASK);
+ deny_msr(bitmap_c0000000_read, MSR_GS_BASE);
+}
+
+struct kvm_msr_filter filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_DENY,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0x00000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_00000000_write,
+ }, {
+ .flags = KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE,
+ .base = 0x40000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_40000000,
+ }, {
+ .flags = KVM_MSR_FILTER_READ,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000_read,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE,
+ .base = 0xc0000000,
+ .nmsrs = KVM_MSR_FILTER_MAX_BITMAP_SIZE * BITS_PER_BYTE,
+ .bitmap = bitmap_c0000000,
+ }, {
+ .flags = KVM_MSR_FILTER_WRITE | KVM_MSR_FILTER_READ,
+ .base = 0xdeadbeef,
+ .nmsrs = 1,
+ .bitmap = bitmap_deadbeef,
+ },
+ },
+};
+
+struct kvm_msr_filter no_filter_deny = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+};
+
+/*
+ * Note: Force test_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("rdmsr_start: rdmsr; rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__("wrmsr_start: wrmsr; wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char rdmsr_start, rdmsr_end;
+extern char wrmsr_start, wrmsr_end;
+
+/*
+ * Note: Force test_em_rdmsr() to not be inlined to prevent the labels,
+ * rdmsr_start and rdmsr_end, from being defined multiple times.
+ */
+static noinline uint64_t test_em_rdmsr(uint32_t msr)
+{
+ uint32_t a, d;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_rdmsr_start: rdmsr; em_rdmsr_end:" :
+ "=a"(a), "=d"(d) : "c"(msr) : "memory");
+
+ return a | ((uint64_t) d << 32);
+}
+
+/*
+ * Note: Force test_em_wrmsr() to not be inlined to prevent the labels,
+ * wrmsr_start and wrmsr_end, from being defined multiple times.
+ */
+static noinline void test_em_wrmsr(uint32_t msr, uint64_t value)
+{
+ uint32_t a = value;
+ uint32_t d = value >> 32;
+
+ guest_exception_count = 0;
+
+ __asm__ __volatile__(KVM_FEP "em_wrmsr_start: wrmsr; em_wrmsr_end:" ::
+ "a"(a), "d"(d), "c"(msr) : "memory");
+}
+
+extern char em_rdmsr_start, em_rdmsr_end;
+extern char em_wrmsr_start, em_wrmsr_end;
+
+static void guest_code_filter_allow(void)
+{
+ uint64_t data;
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_XSS.
+ *
+ * A GP is thrown if anything other than 0 is written to
+ * MSR_IA32_XSS.
+ */
+ data = test_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_IA32_FLUSH_CMD.
+ *
+ * A GP is thrown if MSR_IA32_FLUSH_CMD is read
+ * from or if a value other than 1 is written to it.
+ */
+ test_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ /*
+ * Test userspace intercepting rdmsr / wrmsr for MSR_NON_EXISTENT.
+ *
+ * Test that a fabricated MSR can pass through the kernel
+ * and be handled in userspace.
+ */
+ test_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ data = test_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ if (fep_available) {
+ /* Let userspace know we aren't done. */
+ GUEST_SYNC(0);
+
+ /*
+ * Now run the same tests with the instruction emulator.
+ */
+ data = test_em_rdmsr(MSR_IA32_XSS);
+ GUEST_ASSERT(data == 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 0);
+ GUEST_ASSERT(guest_exception_count == 0);
+ test_em_wrmsr(MSR_IA32_XSS, 1);
+ GUEST_ASSERT(guest_exception_count == 1);
+
+ test_em_rdmsr(MSR_IA32_FLUSH_CMD);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 0);
+ GUEST_ASSERT(guest_exception_count == 1);
+ test_em_wrmsr(MSR_IA32_FLUSH_CMD, 1);
+ GUEST_ASSERT(guest_exception_count == 0);
+
+ test_em_wrmsr(MSR_NON_EXISTENT, 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ data = test_em_rdmsr(MSR_NON_EXISTENT);
+ GUEST_ASSERT(data == 2);
+ GUEST_ASSERT(guest_exception_count == 0);
+ }
+
+ GUEST_DONE();
+}
+
+static void guest_msr_calls(bool trapped)
+{
+ /* This goes into the in-kernel emulation */
+ wrmsr(MSR_SYSCALL_MASK, 0);
+
+ if (trapped) {
+ /* This goes into user space emulation */
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) == MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) == MSR_GS_BASE);
+ } else {
+ GUEST_ASSERT(rdmsr(MSR_SYSCALL_MASK) != MSR_SYSCALL_MASK);
+ GUEST_ASSERT(rdmsr(MSR_GS_BASE) != MSR_GS_BASE);
+ }
+
+ /* If trapped == true, this goes into user space emulation */
+ wrmsr(MSR_IA32_POWER_CTL, 0x1234);
+
+ /* This goes into the in-kernel emulation */
+ rdmsr(MSR_IA32_POWER_CTL);
+
+ /* Invalid MSR, should always be handled by user space exit */
+ GUEST_ASSERT(rdmsr(0xdeadbeef) == 0xdeadbeef);
+ wrmsr(0xdeadbeef, 0x1234);
+}
+
+static void guest_code_filter_deny(void)
+{
+ guest_msr_calls(true);
+
+ /*
+ * Disable msr filtering, so that the kernel
+ * handles everything in the next round
+ */
+ GUEST_SYNC(0);
+
+ guest_msr_calls(false);
+
+ GUEST_DONE();
+}
+
+static void guest_code_permission_bitmap(void)
+{
+ uint64_t data;
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data == MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data != MSR_GS_BASE);
+
+ /* Let userspace know to switch the filter */
+ GUEST_SYNC(0);
+
+ data = test_rdmsr(MSR_FS_BASE);
+ GUEST_ASSERT(data != MSR_FS_BASE);
+ data = test_rdmsr(MSR_GS_BASE);
+ GUEST_ASSERT(data == MSR_GS_BASE);
+
+ GUEST_DONE();
+}
+
+static void __guest_gp_handler(struct ex_regs *regs,
+ char *r_start, char *r_end,
+ char *w_start, char *w_end)
+{
+ if (regs->rip == (uintptr_t)r_start) {
+ regs->rip = (uintptr_t)r_end;
+ regs->rax = 0;
+ regs->rdx = 0;
+ } else if (regs->rip == (uintptr_t)w_start) {
+ regs->rip = (uintptr_t)w_end;
+ } else {
+ GUEST_ASSERT(!"RIP is at an unknown location!");
+ }
+
+ ++guest_exception_count;
+}
+
+static void guest_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &rdmsr_start, &rdmsr_end,
+ &wrmsr_start, &wrmsr_end);
+}
+
+static void guest_fep_gp_handler(struct ex_regs *regs)
+{
+ __guest_gp_handler(regs, &em_rdmsr_start, &em_rdmsr_end,
+ &em_wrmsr_start, &em_wrmsr_end);
+}
+
+static void check_for_guest_assert(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ if (vcpu->run->exit_reason == KVM_EXIT_IO &&
+ get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ REPORT_GUEST_ASSERT(uc);
+ }
+}
+
+static void process_rdmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_RDMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ run->msr.data = 0;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ run->msr.data = msr_non_existent_data;
+ break;
+ case MSR_FS_BASE:
+ run->msr.data = MSR_FS_BASE;
+ break;
+ case MSR_GS_BASE:
+ run->msr.data = MSR_GS_BASE;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_wrmsr(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ struct kvm_run *run = vcpu->run;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_X86_WRMSR);
+ TEST_ASSERT(run->msr.index == msr_index,
+ "Unexpected msr (0x%04x), expected 0x%04x",
+ run->msr.index, msr_index);
+
+ switch (run->msr.index) {
+ case MSR_IA32_XSS:
+ if (run->msr.data != 0)
+ run->msr.error = 1;
+ break;
+ case MSR_IA32_FLUSH_CMD:
+ if (run->msr.data != 1)
+ run->msr.error = 1;
+ break;
+ case MSR_NON_EXISTENT:
+ msr_non_existent_data = run->msr.data;
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected MSR: 0x%04x", run->msr.index);
+ }
+}
+
+static void process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(get_ucall(vcpu, &uc) == UCALL_DONE,
+ "Unexpected ucall command: %lu, expected UCALL_DONE (%d)",
+ uc.cmd, UCALL_DONE);
+}
+
+static uint64_t process_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc = {};
+
+ check_for_guest_assert(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_SYNC:
+ break;
+ case UCALL_ABORT:
+ check_for_guest_assert(vcpu);
+ break;
+ case UCALL_DONE:
+ process_ucall_done(vcpu);
+ break;
+ default:
+ TEST_ASSERT(false, "Unexpected ucall");
+ }
+
+ return uc.cmd;
+}
+
+static void run_guest_then_process_rdmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_rdmsr(vcpu, msr_index);
+}
+
+static void run_guest_then_process_wrmsr(struct kvm_vcpu *vcpu,
+ uint32_t msr_index)
+{
+ vcpu_run(vcpu);
+ process_wrmsr(vcpu, msr_index);
+}
+
+static uint64_t run_guest_then_process_ucall(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ return process_ucall(vcpu);
+}
+
+static void run_guest_then_process_ucall_done(struct kvm_vcpu *vcpu)
+{
+ vcpu_run(vcpu);
+ process_ucall_done(vcpu);
+}
+
+KVM_ONE_VCPU_TEST_SUITE(user_msr);
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_allow, guest_code_filter_allow)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ uint64_t cmd;
+ int rc;
+
+ sync_global_to_guest(vm, fep_available);
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_allow);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, GP_VECTOR, guest_gp_handler);
+
+ /* Process guest code userspace exits. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ vcpu_run(vcpu);
+ cmd = process_ucall(vcpu);
+
+ if (fep_available) {
+ TEST_ASSERT_EQ(cmd, UCALL_SYNC);
+ vm_install_exception_handler(vm, GP_VECTOR, guest_fep_gp_handler);
+
+ /* Process emulated rdmsr and wrmsr instructions. */
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_XSS);
+
+ run_guest_then_process_rdmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+ run_guest_then_process_wrmsr(vcpu, MSR_IA32_FLUSH_CMD);
+
+ run_guest_then_process_wrmsr(vcpu, MSR_NON_EXISTENT);
+ run_guest_then_process_rdmsr(vcpu, MSR_NON_EXISTENT);
+
+ /* Confirm the guest completed without issues. */
+ run_guest_then_process_ucall_done(vcpu);
+ } else {
+ TEST_ASSERT_EQ(cmd, UCALL_DONE);
+ printf("To run the instruction emulated tests set the module parameter 'kvm.force_emulation_prefix=1'\n");
+ }
+}
+
+static int handle_ucall(struct kvm_vcpu *vcpu)
+{
+ struct ucall uc;
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_SYNC:
+ vm_ioctl(vcpu->vm, KVM_X86_SET_MSR_FILTER, &no_filter_deny);
+ break;
+ case UCALL_DONE:
+ return 1;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+
+ return 0;
+}
+
+static void handle_rdmsr(struct kvm_run *run)
+{
+ run->msr.data = run->msr.index;
+ msr_reads++;
+
+ if (run->msr.index == MSR_SYSCALL_MASK ||
+ run->msr.index == MSR_GS_BASE) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR read trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "MSR deadbeef read trap w/o inval fault");
+ }
+}
+
+static void handle_wrmsr(struct kvm_run *run)
+{
+ /* ignore */
+ msr_writes++;
+
+ if (run->msr.index == MSR_IA32_POWER_CTL) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for MSR_IA32_POWER_CTL incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_FILTER,
+ "MSR_IA32_POWER_CTL trap w/o access fault");
+ }
+
+ if (run->msr.index == 0xdeadbeef) {
+ TEST_ASSERT(run->msr.data == 0x1234,
+ "MSR data for deadbeef incorrect");
+ TEST_ASSERT(run->msr.reason == KVM_MSR_EXIT_REASON_UNKNOWN,
+ "deadbeef trap w/o inval fault");
+ }
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_filter_deny, guest_code_filter_deny)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ struct kvm_run *run = vcpu->run;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_INVAL |
+ KVM_MSR_EXIT_REASON_UNKNOWN |
+ KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ prepare_bitmaps();
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_deny);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ switch (run->exit_reason) {
+ case KVM_EXIT_X86_RDMSR:
+ handle_rdmsr(run);
+ break;
+ case KVM_EXIT_X86_WRMSR:
+ handle_wrmsr(run);
+ break;
+ case KVM_EXIT_IO:
+ if (handle_ucall(vcpu))
+ goto done;
+ break;
+ }
+
+ }
+
+done:
+ TEST_ASSERT(msr_reads == 4, "Handled 4 rdmsr in user space");
+ TEST_ASSERT(msr_writes == 3, "Handled 3 wrmsr in user space");
+}
+
+KVM_ONE_VCPU_TEST(user_msr, msr_permission_bitmap, guest_code_permission_bitmap)
+{
+ struct kvm_vm *vm = vcpu->vm;
+ int rc;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+ vm_enable_cap(vm, KVM_CAP_X86_USER_SPACE_MSR, KVM_MSR_EXIT_REASON_FILTER);
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_fs);
+ run_guest_then_process_rdmsr(vcpu, MSR_FS_BASE);
+ TEST_ASSERT(run_guest_then_process_ucall(vcpu) == UCALL_SYNC,
+ "Expected ucall state to be UCALL_SYNC.");
+ vm_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter_gs);
+ run_guest_then_process_rdmsr(vcpu, MSR_GS_BASE);
+ run_guest_then_process_ucall_done(vcpu);
+}
+
+#define test_user_exit_msr_ioctl(vm, cmd, arg, flag, valid_mask) \
+({ \
+ int r = __vm_ioctl(vm, cmd, arg); \
+ \
+ if (flag & valid_mask) \
+ TEST_ASSERT(!r, __KVM_IOCTL_ERROR(#cmd, r)); \
+ else \
+ TEST_ASSERT(r == -1 && errno == EINVAL, \
+ "Wanted EINVAL for %s with flag = 0x%llx, got rc: %i errno: %i (%s)", \
+ #cmd, flag, r, errno, strerror(errno)); \
+})
+
+static void run_user_space_msr_flag_test(struct kvm_vm *vm)
+{
+ struct kvm_enable_cap cap = { .cap = KVM_CAP_X86_USER_SPACE_MSR };
+ int nflags = sizeof(cap.args[0]) * BITS_PER_BYTE;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_USER_SPACE_MSR);
+ TEST_ASSERT(rc, "KVM_CAP_X86_USER_SPACE_MSR is available");
+
+ for (i = 0; i < nflags; i++) {
+ cap.args[0] = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_ENABLE_CAP, &cap,
+ BIT_ULL(i), KVM_MSR_EXIT_REASON_VALID_MASK);
+ }
+}
+
+static void run_msr_filter_flag_test(struct kvm_vm *vm)
+{
+ u64 deny_bits = 0;
+ struct kvm_msr_filter filter = {
+ .flags = KVM_MSR_FILTER_DEFAULT_ALLOW,
+ .ranges = {
+ {
+ .flags = KVM_MSR_FILTER_READ,
+ .nmsrs = 1,
+ .base = 0,
+ .bitmap = (uint8_t *)&deny_bits,
+ },
+ },
+ };
+ int nflags;
+ int rc;
+ int i;
+
+ rc = kvm_check_cap(KVM_CAP_X86_MSR_FILTER);
+ TEST_ASSERT(rc, "KVM_CAP_X86_MSR_FILTER is available");
+
+ nflags = sizeof(filter.flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_VALID_MASK);
+ }
+
+ filter.flags = KVM_MSR_FILTER_DEFAULT_ALLOW;
+ nflags = sizeof(filter.ranges[0].flags) * BITS_PER_BYTE;
+ for (i = 0; i < nflags; i++) {
+ filter.ranges[0].flags = BIT_ULL(i);
+ test_user_exit_msr_ioctl(vm, KVM_X86_SET_MSR_FILTER, &filter,
+ BIT_ULL(i), KVM_MSR_FILTER_RANGE_VALID_MASK);
+ }
+}
+
+/* Test that attempts to write to the unused bits in a flag fails. */
+KVM_ONE_VCPU_TEST(user_msr, user_exit_msr_flags, NULL)
+{
+ struct kvm_vm *vm = vcpu->vm;
+
+ /* Test flags for KVM_CAP_X86_USER_SPACE_MSR. */
+ run_user_space_msr_flag_test(vm);
+
+ /* Test flags and range flags for KVM_X86_SET_MSR_FILTER. */
+ run_msr_filter_flag_test(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ fep_available = kvm_is_forced_emulation_enabled();
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
new file mode 100644
index 000000000000..a81a24761aac
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_apic_access_test.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_apic_access_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * The first subtest simply checks to see that an L2 guest can be
+ * launched with a valid APIC-access address that is backed by a
+ * page of L1 physical memory.
+ *
+ * The second subtest sets the APIC-access address to a (valid) L1
+ * physical address that is not backed by memory. KVM can't handle
+ * this situation, so resuming L2 should result in a KVM exit for
+ * internal error (emulation). This is not an architectural
+ * requirement. It is just a shortcoming of KVM. The internal error
+ * is unfortunate, but it's better than what used to happen!
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+static void l2_guest_code(void)
+{
+ /* Exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages, unsigned long high_gpa)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+ vmwrite(APIC_ACCESS_ADDR, vmx_pages->apic_access_gpa);
+
+ /* Try to launch L2 with the memory-backed APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ vmwrite(APIC_ACCESS_ADDR, high_gpa);
+
+ /* Try to resume L2 with the unbacked APIC-access address. */
+ GUEST_SYNC(vmreadz(APIC_ACCESS_ADDR));
+ GUEST_ASSERT(!vmresume());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned long apic_access_addr = ~0ul;
+ vm_vaddr_t vmx_pages_gva;
+ unsigned long high_gpa;
+ struct vmx_pages *vmx;
+ bool done = false;
+
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ high_gpa = (vm->max_gfn - 1) << vm->page_shift;
+
+ vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ prepare_virtualize_apic_accesses(vmx, vm);
+ vcpu_args_set(vcpu, 2, vmx_pages_gva, high_gpa);
+
+ while (!done) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ if (apic_access_addr == high_gpa) {
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->internal.suberror ==
+ KVM_INTERNAL_ERROR_EMULATION,
+ "Got internal suberror other than KVM_INTERNAL_ERROR_EMULATION: %u",
+ run->internal.suberror);
+ break;
+ }
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ apic_access_addr = uc.args[1];
+ break;
+ case UCALL_DONE:
+ done = true;
+ break;
+ default:
+ TEST_ASSERT(false, "Unknown ucall %lu", uc.cmd);
+ }
+ }
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
index fe40ade06a49..dad988351493 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_close_while_nested_test.c
@@ -18,20 +18,15 @@
#include "kselftest.h"
-#define VCPU_ID 5
-
enum {
PORT_L0_EXIT = 0x2000,
};
-/* The virtual machine object. */
-static struct kvm_vm *vm;
-
static void l2_guest_code(void)
{
/* Exit to L0 */
- asm volatile("inb %%dx, %%al"
- : : [port] "d" (PORT_L0_EXIT) : "rax");
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (PORT_L0_EXIT) : "rax");
}
static void l1_guest_code(struct vmx_pages *vmx_pages)
@@ -53,32 +48,30 @@ static void l1_guest_code(struct vmx_pages *vmx_pages)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
+ volatile struct kvm_run *run = vcpu->run;
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
if (run->io.port == PORT_L0_EXIT)
break;
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
default:
TEST_FAIL("Unknown ucall %lu", uc.cmd);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
index e894a638a155..7f6f5f23fb9b 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_dirty_log_test.c
@@ -17,8 +17,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 1
-
/* The memory slot index to track dirty pages */
#define TEST_MEM_SLOT_INDEX 1
#define TEST_MEM_PAGES 3
@@ -73,19 +71,18 @@ int main(int argc, char *argv[])
unsigned long *bmap;
uint64_t *host_test_mem;
+ struct kvm_vcpu *vcpu;
struct kvm_vm *vm;
- struct kvm_run *run;
struct ucall uc;
bool done = false;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_cpu_has_ept());
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
vmx = vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- run = vcpu_state(vm, VCPU_ID);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
/* Add an extra memory slot for testing dirty logging */
vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
@@ -98,7 +95,7 @@ int main(int argc, char *argv[])
* Add an identity map for GVA range [0xc0000000, 0xc0002000). This
* affects both L1 and L2. However...
*/
- virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES, 0);
+ virt_map(vm, GUEST_TEST_MEM, GUEST_TEST_MEM, TEST_MEM_PAGES);
/*
* ... pages in the L2 GPA range [0xc0001000, 0xc0003000) will map to
@@ -108,25 +105,21 @@ int main(int argc, char *argv[])
* meaning after the last call to virt_map.
*/
prepare_eptp(vmx, vm, 0);
- nested_map_memslot(vmx, vm, 0, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096, 0);
- nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096, 0);
+ nested_map_memslot(vmx, vm, 0);
+ nested_map(vmx, vm, NESTED_TEST_MEM1, GUEST_TEST_MEM, 4096);
+ nested_map(vmx, vm, NESTED_TEST_MEM2, GUEST_TEST_MEM, 4096);
- bmap = bitmap_alloc(TEST_MEM_PAGES);
+ bmap = bitmap_zalloc(TEST_MEM_PAGES);
host_test_mem = addr_gpa2hva(vm, GUEST_TEST_MEM);
while (!done) {
memset(host_test_mem, 0xaa, TEST_MEM_PAGES * 4096);
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Unexpected exit reason: %u (%s),\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
/*
@@ -135,17 +128,17 @@ int main(int argc, char *argv[])
*/
kvm_vm_get_dirty_log(vm, TEST_MEM_SLOT_INDEX, bmap);
if (uc.args[1]) {
- TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean\n");
- TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest\n");
+ TEST_ASSERT(test_bit(0, bmap), "Page 0 incorrectly reported clean");
+ TEST_ASSERT(host_test_mem[0] == 1, "Page 0 not written by guest");
} else {
- TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest\n");
+ TEST_ASSERT(!test_bit(0, bmap), "Page 0 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[0] == 0xaaaaaaaaaaaaaaaaULL, "Page 0 written by guest");
}
- TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest\n");
- TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty\n");
- TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest\n");
+ TEST_ASSERT(!test_bit(1, bmap), "Page 1 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[4096 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 1 written by guest");
+ TEST_ASSERT(!test_bit(2, bmap), "Page 2 incorrectly reported dirty");
+ TEST_ASSERT(host_test_mem[8192 / 8] == 0xaaaaaaaaaaaaaaaaULL, "Page 2 written by guest");
break;
case UCALL_DONE:
done = true;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
new file mode 100644
index 000000000000..fad3634fd9eb
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_exception_with_invalid_guest_state.c
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <signal.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/time.h>
+
+#include "kselftest.h"
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+ /* Loop on the ud2 until guest state is made invalid. */
+}
+
+static void guest_code(void)
+{
+ asm volatile("ud2");
+}
+
+static void __run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ struct kvm_run *run = vcpu->run;
+
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_INTERNAL_ERROR);
+ TEST_ASSERT(run->emulation_failure.suberror == KVM_INTERNAL_ERROR_EMULATION,
+ "Expected emulation failure, got %d",
+ run->emulation_failure.suberror);
+}
+
+static void run_vcpu_with_invalid_state(struct kvm_vcpu *vcpu)
+{
+ /*
+ * Always run twice to verify KVM handles the case where _KVM_ queues
+ * an exception with invalid state and then exits to userspace, i.e.
+ * that KVM doesn't explode if userspace ignores the initial error.
+ */
+ __run_vcpu_with_invalid_state(vcpu);
+ __run_vcpu_with_invalid_state(vcpu);
+}
+
+static void set_timer(void)
+{
+ struct itimerval timer;
+
+ timer.it_value.tv_sec = 0;
+ timer.it_value.tv_usec = 200;
+ timer.it_interval = timer.it_value;
+ TEST_ASSERT_EQ(setitimer(ITIMER_REAL, &timer, NULL), 0);
+}
+
+static void set_or_clear_invalid_guest_state(struct kvm_vcpu *vcpu, bool set)
+{
+ static struct kvm_sregs sregs;
+
+ if (!sregs.cr0)
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = !!set;
+ vcpu_sregs_set(vcpu, &sregs);
+}
+
+static void set_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, true);
+}
+
+static void clear_invalid_guest_state(struct kvm_vcpu *vcpu)
+{
+ set_or_clear_invalid_guest_state(vcpu, false);
+}
+
+static struct kvm_vcpu *get_set_sigalrm_vcpu(struct kvm_vcpu *__vcpu)
+{
+ static struct kvm_vcpu *vcpu = NULL;
+
+ if (__vcpu)
+ vcpu = __vcpu;
+ return vcpu;
+}
+
+static void sigalrm_handler(int sig)
+{
+ struct kvm_vcpu *vcpu = get_set_sigalrm_vcpu(NULL);
+ struct kvm_vcpu_events events;
+
+ TEST_ASSERT(sig == SIGALRM, "Unexpected signal = %d", sig);
+
+ vcpu_events_get(vcpu, &events);
+
+ /*
+ * If an exception is pending, attempt KVM_RUN with invalid guest,
+ * otherwise rearm the timer and keep doing so until the timer fires
+ * between KVM queueing an exception and re-entering the guest.
+ */
+ if (events.exception.pending) {
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+ } else {
+ set_timer();
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(host_cpu_is_intel);
+ TEST_REQUIRE(!vm_is_unrestricted_guest(NULL));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ get_set_sigalrm_vcpu(vcpu);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vm_install_exception_handler(vm, UD_VECTOR, guest_ud_handler);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ set_invalid_guest_state(vcpu);
+ run_vcpu_with_invalid_state(vcpu);
+
+ /*
+ * Verify KVM also handles the case where userspace gains control while
+ * an exception is pending and stuffs invalid state. Run with valid
+ * guest state and a timer firing every 200us, and attempt to enter the
+ * guest with invalid state when the handler interrupts KVM with an
+ * exception pending.
+ */
+ clear_invalid_guest_state(vcpu);
+ TEST_ASSERT(signal(SIGALRM, sigalrm_handler) != SIG_ERR,
+ "Failed to register SIGALRM handler, errno = %d (%s)",
+ errno, strerror(errno));
+
+ set_timer();
+ run_vcpu_with_invalid_state(vcpu);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
new file mode 100644
index 000000000000..a100ee5f0009
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_invalid_nested_guest_state.c
@@ -0,0 +1,103 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "vmx.h"
+
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "kselftest.h"
+
+#define ARBITRARY_IO_PORT 0x2000
+
+static struct kvm_vm *vm;
+
+static void l2_guest_code(void)
+{
+ /*
+ * Generate an exit to L0 userspace, i.e. main(), via I/O to an
+ * arbitrary port.
+ */
+ asm volatile("inb %%dx, %%al"
+ : : [port] "d" (ARBITRARY_IO_PORT) : "rax");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+#define L2_GUEST_STACK_SIZE 64
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* Prepare the VMCS for L2 execution. */
+ prepare_vmcs(vmx_pages, l2_guest_code,
+ &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /*
+ * L2 must be run without unrestricted guest, verify that the selftests
+ * library hasn't enabled it. Because KVM selftests jump directly to
+ * 64-bit mode, unrestricted guest support isn't required.
+ */
+ GUEST_ASSERT(!(vmreadz(CPU_BASED_VM_EXEC_CONTROL) & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) ||
+ !(vmreadz(SECONDARY_VM_EXEC_CONTROL) & SECONDARY_EXEC_UNRESTRICTED_GUEST));
+
+ GUEST_ASSERT(!vmlaunch());
+
+ /* L2 should triple fault after main() stuffs invalid guest state. */
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_TRIPLE_FAULT);
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ vm_vaddr_t vmx_pages_gva;
+ struct kvm_sregs sregs;
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+
+ /* Allocate VMX pages and shared descriptors (vmx_pages). */
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ vcpu_run(vcpu);
+
+ run = vcpu->run;
+
+ /*
+ * The first exit to L0 userspace should be an I/O access from L2.
+ * Running L1 should launch L2 without triggering an exit to userspace.
+ */
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ TEST_ASSERT(run->io.port == ARBITRARY_IO_PORT,
+ "Expected IN from port %d from L2, got port %d",
+ ARBITRARY_IO_PORT, run->io.port);
+
+ /*
+ * Stuff invalid guest state for L2 by making TR unusuable. The next
+ * KVM_RUN should induce a TRIPLE_FAULT in L2 as KVM doesn't support
+ * emulating invalid guest state for L2.
+ */
+ memset(&sregs, 0, sizeof(sregs));
+ vcpu_sregs_get(vcpu, &sregs);
+ sregs.tr.unusable = 1;
+ vcpu_sregs_set(vcpu, &sregs);
+
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_DONE:
+ break;
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
new file mode 100644
index 000000000000..90720b6205f4
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_msrs_test.c
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * VMX control MSR test
+ *
+ * Copyright (C) 2022 Google LLC.
+ *
+ * Tests for KVM ownership of bits in the VMX entry/exit control MSRs. Checks
+ * that KVM will set owned bits where appropriate, and will not if
+ * KVM_X86_QUIRK_TWEAK_VMX_CTRL_MSRS is disabled.
+ */
+#include <linux/bitmap.h>
+#include "kvm_util.h"
+#include "vmx.h"
+
+static void vmx_fixed1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask &= val;
+
+ for_each_set_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val & ~BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index,
+ uint64_t mask)
+{
+ uint64_t val = vcpu_get_msr(vcpu, msr_index);
+ uint64_t bit;
+
+ mask = ~mask | val;
+
+ for_each_clear_bit(bit, &mask, 64) {
+ vcpu_set_msr(vcpu, msr_index, val | BIT_ULL(bit));
+ vcpu_set_msr(vcpu, msr_index, val);
+ }
+}
+
+static void vmx_fixed0and1_msr_test(struct kvm_vcpu *vcpu, uint32_t msr_index)
+{
+ vmx_fixed0_msr_test(vcpu, msr_index, GENMASK_ULL(31, 0));
+ vmx_fixed1_msr_test(vcpu, msr_index, GENMASK_ULL(63, 32));
+}
+
+static void vmx_save_restore_msrs_test(struct kvm_vcpu *vcpu)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_VMX_VMCS_ENUM, -1ull);
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_BASIC,
+ BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55));
+
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_MISC,
+ BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) |
+ BIT_ULL(15) | BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30));
+
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_PROCBASED_CTLS2);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_EPT_VPID_CAP, -1ull);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PINBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_PROCBASED_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_EXIT_CTLS);
+ vmx_fixed0and1_msr_test(vcpu, MSR_IA32_VMX_TRUE_ENTRY_CTLS);
+ vmx_fixed1_msr_test(vcpu, MSR_IA32_VMX_VMFUNC, -1ull);
+}
+
+static void __ia32_feature_control_msr_test(struct kvm_vcpu *vcpu,
+ uint64_t msr_bit,
+ struct kvm_x86_cpu_feature feature)
+{
+ uint64_t val;
+
+ vcpu_clear_cpuid_feature(vcpu, feature);
+
+ val = vcpu_get_msr(vcpu, MSR_IA32_FEAT_CTL);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val | msr_bit | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, (val & ~msr_bit) | FEAT_CTL_LOCKED);
+ vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, val);
+
+ if (!kvm_cpu_has(feature))
+ return;
+
+ vcpu_set_cpuid_feature(vcpu, feature);
+}
+
+static void ia32_feature_control_msr_test(struct kvm_vcpu *vcpu)
+{
+ uint64_t supported_bits = FEAT_CTL_LOCKED |
+ FEAT_CTL_VMX_ENABLED_INSIDE_SMX |
+ FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX |
+ FEAT_CTL_SGX_LC_ENABLED |
+ FEAT_CTL_SGX_ENABLED |
+ FEAT_CTL_LMCE_ENABLED;
+ int bit, r;
+
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_SMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_INSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX, X86_FEATURE_VMX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX_LC);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_LC_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_SGX_ENABLED, X86_FEATURE_SGX);
+ __ia32_feature_control_msr_test(vcpu, FEAT_CTL_LMCE_ENABLED, X86_FEATURE_MCE);
+
+ for_each_clear_bit(bit, &supported_bits, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_FEAT_CTL, BIT(bit));
+ TEST_ASSERT(r == 0,
+ "Setting reserved bit %d in IA32_FEATURE_CONTROL should fail", bit);
+ }
+}
+
+int main(void)
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_DISABLE_QUIRKS2));
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ /* No need to actually do KVM_RUN, thus no guest code. */
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ vmx_save_restore_msrs_test(vcpu);
+ ia32_feature_control_msr_test(vcpu);
+
+ kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
new file mode 100644
index 000000000000..1759fa5cb3f2
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_nested_tsc_scaling_test.c
@@ -0,0 +1,206 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * vmx_nested_tsc_scaling_test
+ *
+ * Copyright 2021 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+ *
+ * This test case verifies that nested TSC scaling behaves as expected when
+ * both L1 and L2 are scaled using different ratios. For this test we scale
+ * L1 down and scale L2 up.
+ */
+
+#include <time.h>
+
+#include "kvm_util.h"
+#include "vmx.h"
+#include "kselftest.h"
+
+/* L2 is scaled up (from L1's perspective) by this factor */
+#define L2_SCALE_FACTOR 4ULL
+
+#define TSC_OFFSET_L2 ((uint64_t) -33125236320908)
+#define TSC_MULTIPLIER_L2 (L2_SCALE_FACTOR << 48)
+
+#define L2_GUEST_STACK_SIZE 64
+
+enum { USLEEP, UCHECK_L1, UCHECK_L2 };
+#define GUEST_SLEEP(sec) ucall(UCALL_SYNC, 2, USLEEP, sec)
+#define GUEST_CHECK(level, freq) ucall(UCALL_SYNC, 2, level, freq)
+
+
+/*
+ * This function checks whether the "actual" TSC frequency of a guest matches
+ * its expected frequency. In order to account for delays in taking the TSC
+ * measurements, a difference of 1% between the actual and the expected value
+ * is tolerated.
+ */
+static void compare_tsc_freq(uint64_t actual, uint64_t expected)
+{
+ uint64_t tolerance, thresh_low, thresh_high;
+
+ tolerance = expected / 100;
+ thresh_low = expected - tolerance;
+ thresh_high = expected + tolerance;
+
+ TEST_ASSERT(thresh_low < actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+ TEST_ASSERT(thresh_high > actual,
+ "TSC freq is expected to be between %"PRIu64" and %"PRIu64
+ " but it actually is %"PRIu64,
+ thresh_low, thresh_high, actual);
+}
+
+static void check_tsc_freq(int level)
+{
+ uint64_t tsc_start, tsc_end, tsc_freq;
+
+ /*
+ * Reading the TSC twice with about a second's difference should give
+ * us an approximation of the TSC frequency from the guest's
+ * perspective. Now, this won't be completely accurate, but it should
+ * be good enough for the purposes of this test.
+ */
+ tsc_start = rdmsr(MSR_IA32_TSC);
+ GUEST_SLEEP(1);
+ tsc_end = rdmsr(MSR_IA32_TSC);
+
+ tsc_freq = tsc_end - tsc_start;
+
+ GUEST_CHECK(level, tsc_freq);
+}
+
+static void l2_guest_code(void)
+{
+ check_tsc_freq(UCHECK_L2);
+
+ /* exit to L1 */
+ __asm__ __volatile__("vmcall");
+}
+
+static void l1_guest_code(struct vmx_pages *vmx_pages)
+{
+ unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
+ uint32_t control;
+
+ /* check that L1's frequency looks alright before launching L2 */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_ASSERT(prepare_for_vmx_operation(vmx_pages));
+ GUEST_ASSERT(load_vmcs(vmx_pages));
+
+ /* prepare the VMCS for L2 execution */
+ prepare_vmcs(vmx_pages, l2_guest_code, &l2_guest_stack[L2_GUEST_STACK_SIZE]);
+
+ /* enable TSC offsetting and TSC scaling for L2 */
+ control = vmreadz(CPU_BASED_VM_EXEC_CONTROL);
+ control |= CPU_BASED_USE_MSR_BITMAPS | CPU_BASED_USE_TSC_OFFSETTING;
+ vmwrite(CPU_BASED_VM_EXEC_CONTROL, control);
+
+ control = vmreadz(SECONDARY_VM_EXEC_CONTROL);
+ control |= SECONDARY_EXEC_TSC_SCALING;
+ vmwrite(SECONDARY_VM_EXEC_CONTROL, control);
+
+ vmwrite(TSC_OFFSET, TSC_OFFSET_L2);
+ vmwrite(TSC_MULTIPLIER, TSC_MULTIPLIER_L2);
+ vmwrite(TSC_MULTIPLIER_HIGH, TSC_MULTIPLIER_L2 >> 32);
+
+ /* launch L2 */
+ GUEST_ASSERT(!vmlaunch());
+ GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
+ /* check that L1's frequency still looks good */
+ check_tsc_freq(UCHECK_L1);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+ vm_vaddr_t vmx_pages_gva;
+
+ uint64_t tsc_start, tsc_end;
+ uint64_t tsc_khz;
+ uint64_t l1_scale_factor;
+ uint64_t l0_tsc_freq = 0;
+ uint64_t l1_tsc_freq = 0;
+ uint64_t l2_tsc_freq = 0;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_TSC_CONTROL));
+ TEST_REQUIRE(sys_clocksource_is_based_on_tsc());
+
+ /*
+ * We set L1's scale factor to be a random number from 2 to 10.
+ * Ideally we would do the same for L2's factor but that one is
+ * referenced by both main() and l1_guest_code() and using a global
+ * variable does not work.
+ */
+ srand(time(NULL));
+ l1_scale_factor = (rand() % 9) + 2;
+ printf("L1's scale down factor is: %"PRIu64"\n", l1_scale_factor);
+ printf("L2's scale up factor is: %llu\n", L2_SCALE_FACTOR);
+
+ tsc_start = rdtsc();
+ sleep(1);
+ tsc_end = rdtsc();
+
+ l0_tsc_freq = tsc_end - tsc_start;
+ printf("real TSC frequency is around: %"PRIu64"\n", l0_tsc_freq);
+
+ vm = vm_create_with_one_vcpu(&vcpu, l1_guest_code);
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
+
+ tsc_khz = __vcpu_ioctl(vcpu, KVM_GET_TSC_KHZ, NULL);
+ TEST_ASSERT(tsc_khz != -1, "vcpu ioctl KVM_GET_TSC_KHZ failed");
+
+ /* scale down L1's TSC frequency */
+ vcpu_ioctl(vcpu, KVM_SET_TSC_KHZ, (void *) (tsc_khz / l1_scale_factor));
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ case UCALL_SYNC:
+ switch (uc.args[0]) {
+ case USLEEP:
+ sleep(uc.args[1]);
+ break;
+ case UCHECK_L1:
+ l1_tsc_freq = uc.args[1];
+ printf("L1's TSC frequency is around: %"PRIu64
+ "\n", l1_tsc_freq);
+
+ compare_tsc_freq(l1_tsc_freq,
+ l0_tsc_freq / l1_scale_factor);
+ break;
+ case UCHECK_L2:
+ l2_tsc_freq = uc.args[1];
+ printf("L2's TSC frequency is around: %"PRIu64
+ "\n", l2_tsc_freq);
+
+ compare_tsc_freq(l2_tsc_freq,
+ l1_tsc_freq * L2_SCALE_FACTOR);
+ break;
+ }
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
new file mode 100644
index 000000000000..ea0cb3cae0f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/vmx_pmu_caps_test.c
@@ -0,0 +1,228 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for VMX-pmu perf capability msr
+ *
+ * Copyright (C) 2021 Intel Corporation
+ *
+ * Test to check the effect of various CPUID settings on
+ * MSR_IA32_PERF_CAPABILITIES MSR, and check that what
+ * we write with KVM_SET_MSR is _not_ modified by the guest
+ * and check it can be retrieved with KVM_GET_MSR, also test
+ * the invalid LBR formats are rejected.
+ */
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <sys/ioctl.h>
+
+#include <linux/bitmap.h>
+
+#include "kvm_test_harness.h"
+#include "kvm_util.h"
+#include "vmx.h"
+
+static union perf_capabilities {
+ struct {
+ u64 lbr_format:6;
+ u64 pebs_trap:1;
+ u64 pebs_arch_reg:1;
+ u64 pebs_format:4;
+ u64 smm_freeze:1;
+ u64 full_width_write:1;
+ u64 pebs_baseline:1;
+ u64 perf_metrics:1;
+ u64 pebs_output_pt_available:1;
+ u64 anythread_deprecated:1;
+ };
+ u64 capabilities;
+} host_cap;
+
+/*
+ * The LBR format and most PEBS features are immutable, all other features are
+ * fungible (if supported by the host and KVM).
+ */
+static const union perf_capabilities immutable_caps = {
+ .lbr_format = -1,
+ .pebs_trap = 1,
+ .pebs_arch_reg = 1,
+ .pebs_format = -1,
+ .pebs_baseline = 1,
+};
+
+static const union perf_capabilities format_caps = {
+ .lbr_format = -1,
+ .pebs_format = -1,
+};
+
+static void guest_test_perf_capabilities_gp(uint64_t val)
+{
+ uint8_t vector = wrmsr_safe(MSR_IA32_PERF_CAPABILITIES, val);
+
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP for value '0x%lx', got vector '0x%x'",
+ val, vector);
+}
+
+static void guest_code(uint64_t current_val)
+{
+ int i;
+
+ guest_test_perf_capabilities_gp(current_val);
+ guest_test_perf_capabilities_gp(0);
+
+ for (i = 0; i < 64; i++)
+ guest_test_perf_capabilities_gp(current_val ^ BIT_ULL(i));
+
+ GUEST_DONE();
+}
+
+KVM_ONE_VCPU_TEST_SUITE(vmx_pmu_caps);
+
+/*
+ * Verify that guest WRMSRs to PERF_CAPABILITIES #GP regardless of the value
+ * written, that the guest always sees the userspace controlled value, and that
+ * PERF_CAPABILITIES is immutable after KVM_RUN.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, guest_wrmsr_perf_capabilities, guest_code)
+{
+ struct ucall uc;
+ int r, i;
+
+ vm_init_descriptor_tables(vcpu->vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ vcpu_args_set(vcpu, 1, host_cap.capabilities);
+ vcpu_run(vcpu);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ break;
+ default:
+ TEST_FAIL("Unexpected ucall: %lu", uc.cmd);
+ }
+
+ TEST_ASSERT_EQ(vcpu_get_msr(vcpu, MSR_IA32_PERF_CAPABILITIES),
+ host_cap.capabilities);
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0' didn't fail");
+
+ for (i = 0; i < 64; i++) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(i));
+ TEST_ASSERT(!r, "Post-KVM_RUN write '0x%llx'didn't fail",
+ host_cap.capabilities ^ BIT_ULL(i));
+ }
+}
+
+/*
+ * Verify KVM allows writing PERF_CAPABILITIES with all KVM-supported features
+ * enabled, as well as '0' (to disable all features).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, basic_perf_capabilities, guest_code)
+{
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, 0);
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, fungible_perf_capabilities, guest_code)
+{
+ const uint64_t fungible_caps = host_cap.capabilities & ~immutable_caps.capabilities;
+ int bit;
+
+ for_each_set_bit(bit, &fungible_caps, 64) {
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, BIT_ULL(bit));
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities & ~BIT_ULL(bit));
+ }
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+}
+
+/*
+ * Verify KVM rejects attempts to set unsupported and/or immutable features in
+ * PERF_CAPABILITIES. Note, LBR format and PEBS format need to be validated
+ * separately as they are multi-bit values, e.g. toggling or setting a single
+ * bit can generate a false positive without dedicated safeguards.
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, immutable_perf_capabilities, guest_code)
+{
+ const uint64_t reserved_caps = (~host_cap.capabilities |
+ immutable_caps.capabilities) &
+ ~format_caps.capabilities;
+ union perf_capabilities val = host_cap;
+ int r, bit;
+
+ for_each_set_bit(bit, &reserved_caps, 64) {
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES,
+ host_cap.capabilities ^ BIT_ULL(bit));
+ TEST_ASSERT(!r, "%s immutable feature 0x%llx (bit %d) didn't fail",
+ host_cap.capabilities & BIT_ULL(bit) ? "Setting" : "Clearing",
+ BIT_ULL(bit), bit);
+ }
+
+ /*
+ * KVM only supports the host's native LBR format, as well as '0' (to
+ * disable LBR support). Verify KVM rejects all other LBR formats.
+ */
+ for (val.lbr_format = 1; val.lbr_format; val.lbr_format++) {
+ if (val.lbr_format == host_cap.lbr_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad LBR FMT = 0x%x didn't fail, host = 0x%x",
+ val.lbr_format, host_cap.lbr_format);
+ }
+
+ /* Ditto for the PEBS format. */
+ for (val.pebs_format = 1; val.pebs_format; val.pebs_format++) {
+ if (val.pebs_format == host_cap.pebs_format)
+ continue;
+
+ r = _vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, val.capabilities);
+ TEST_ASSERT(!r, "Bad PEBS FMT = 0x%x didn't fail, host = 0x%x",
+ val.pebs_format, host_cap.pebs_format);
+ }
+}
+
+/*
+ * Test that LBR MSRs are writable when LBRs are enabled, and then verify that
+ * disabling the vPMU via CPUID also disables LBR support. Set bits 2:0 of
+ * LBR_TOS as those bits are writable across all uarch implementations (arch
+ * LBRs will need to poke a different MSR).
+ */
+KVM_ONE_VCPU_TEST(vmx_pmu_caps, lbr_perf_capabilities, guest_code)
+{
+ int r;
+
+ if (!host_cap.lbr_format)
+ return;
+
+ vcpu_set_msr(vcpu, MSR_IA32_PERF_CAPABILITIES, host_cap.capabilities);
+ vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+
+ vcpu_clear_cpuid_entry(vcpu, X86_PROPERTY_PMU_VERSION.function);
+
+ r = _vcpu_set_msr(vcpu, MSR_LBR_TOS, 7);
+ TEST_ASSERT(!r, "Writing LBR_TOS should fail after disabling vPMU");
+}
+
+int main(int argc, char *argv[])
+{
+ TEST_REQUIRE(kvm_is_pmu_enabled());
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_PDCM));
+
+ TEST_REQUIRE(kvm_cpu_has_p(X86_PROPERTY_PMU_VERSION));
+ TEST_REQUIRE(kvm_cpu_property(X86_PROPERTY_PMU_VERSION) > 0);
+
+ host_cap.capabilities = kvm_get_feature_msr(MSR_IA32_PERF_CAPABILITIES);
+
+ TEST_ASSERT(host_cap.full_width_write,
+ "Full-width writes should always be supported");
+
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
index a7737af1224f..affc32800158 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_preemption_timer_test.c
@@ -22,7 +22,6 @@
#include "processor.h"
#include "vmx.h"
-#define VCPU_ID 5
#define PREEMPTION_TIMER_VALUE 100000000ull
#define PREEMPTION_TIMER_VALUE_THRESHOLD1 80000000ull
@@ -158,7 +157,7 @@ int main(int argc, char *argv[])
struct kvm_regs regs1, regs2;
struct kvm_vm *vm;
- struct kvm_run *run;
+ struct kvm_vcpu *vcpu;
struct kvm_x86_state *state;
struct ucall uc;
int stage;
@@ -167,34 +166,25 @@ int main(int argc, char *argv[])
* AMD currently does not implement any VMX features, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
+
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- run = vcpu_state(vm, VCPU_ID);
-
- vcpu_regs_get(vm, VCPU_ID, &regs1);
-
- if (kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
- } else {
- pr_info("will skip vmx preemption timer checks\n");
- goto done;
- }
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ vcpu_regs_get(vcpu, &regs1);
+
+ vcpu_alloc_vmx(vm, &vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (stage = 1;; stage++) {
- _vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Stage %d: unexpected exit reason: %u (%s),\n",
- stage, run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
- __FILE__, uc.args[1]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
break;
@@ -233,22 +223,19 @@ int main(int argc, char *argv[])
stage, uc.args[4], uc.args[5]);
}
- state = vcpu_save_state(vm, VCPU_ID);
+ state = vcpu_save_state(vcpu);
memset(&regs1, 0, sizeof(regs1));
- vcpu_regs_get(vm, VCPU_ID, &regs1);
+ vcpu_regs_get(vcpu, &regs1);
kvm_vm_release(vm);
/* Restore state in a new VM. */
- kvm_vm_restart(vm, O_RDWR);
- vm_vcpu_add(vm, VCPU_ID);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
- vcpu_load_state(vm, VCPU_ID, state);
- run = vcpu_state(vm, VCPU_ID);
- free(state);
+ vcpu = vm_recreate_with_one_vcpu(vm);
+ vcpu_load_state(vcpu, state);
+ kvm_x86_state_cleanup(state);
memset(&regs2, 0, sizeof(regs2));
- vcpu_regs_get(vm, VCPU_ID, &regs2);
+ vcpu_regs_get(vcpu, &regs2);
TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
"Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
(ulong) regs2.rdi, (ulong) regs2.rsi);
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
index 54cdefdfb49d..67a62a5a8895 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_set_nested_state_test.c
@@ -23,38 +23,37 @@
* changes this should be updated.
*/
#define VMCS12_REVISION 0x11e57ed0
-#define VCPU_ID 5
bool have_evmcs;
-void test_nested_state(struct kvm_vm *vm, struct kvm_nested_state *state)
+void test_nested_state(struct kvm_vcpu *vcpu, struct kvm_nested_state *state)
{
- vcpu_nested_state_set(vm, VCPU_ID, state, false);
+ vcpu_nested_state_set(vcpu, state);
}
-void test_nested_state_expect_errno(struct kvm_vm *vm,
+void test_nested_state_expect_errno(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state,
int expected_errno)
{
int rv;
- rv = vcpu_nested_state_set(vm, VCPU_ID, state, true);
+ rv = __vcpu_nested_state_set(vcpu, state);
TEST_ASSERT(rv == -1 && errno == expected_errno,
"Expected %s (%d) from vcpu_nested_state_set but got rv: %i errno: %s (%d)",
strerror(expected_errno), expected_errno, rv, strerror(errno),
errno);
}
-void test_nested_state_expect_einval(struct kvm_vm *vm,
+void test_nested_state_expect_einval(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EINVAL);
+ test_nested_state_expect_errno(vcpu, state, EINVAL);
}
-void test_nested_state_expect_efault(struct kvm_vm *vm,
+void test_nested_state_expect_efault(struct kvm_vcpu *vcpu,
struct kvm_nested_state *state)
{
- test_nested_state_expect_errno(vm, state, EFAULT);
+ test_nested_state_expect_errno(vcpu, state, EFAULT);
}
void set_revision_id_for_vmcs12(struct kvm_nested_state *state,
@@ -76,10 +75,8 @@ void set_default_state(struct kvm_nested_state *state)
void set_default_vmx_state(struct kvm_nested_state *state, int size)
{
memset(state, 0, size);
- state->flags = KVM_STATE_NESTED_GUEST_MODE |
- KVM_STATE_NESTED_RUN_PENDING;
if (have_evmcs)
- state->flags |= KVM_STATE_NESTED_EVMCS;
+ state->flags = KVM_STATE_NESTED_EVMCS;
state->format = 0;
state->size = size;
state->hdr.vmx.vmxon_pa = 0x1000;
@@ -88,7 +85,7 @@ void set_default_vmx_state(struct kvm_nested_state *state, int size)
set_revision_id_for_vmcs12(state, VMCS12_REVISION);
}
-void test_vmx_nested_state(struct kvm_vm *vm)
+void test_vmx_nested_state(struct kvm_vcpu *vcpu)
{
/* Add a page for VMCS12. */
const int state_sz = sizeof(struct kvm_nested_state) + getpagesize();
@@ -98,14 +95,14 @@ void test_vmx_nested_state(struct kvm_vm *vm)
/* The format must be set to 0. 0 for VMX, 1 for SVM. */
set_default_vmx_state(state, state_sz);
state->format = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
* enabled.
*/
set_default_vmx_state(state, state_sz);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* We cannot virtualize anything if the guest does not have VMX
@@ -114,50 +111,59 @@ void test_vmx_nested_state(struct kvm_vm *vm)
*/
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = KVM_STATE_NESTED_EVMCS;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
state->flags = 0;
- test_nested_state(vm, state);
+ test_nested_state(vcpu, state);
/* Enable VMX in the guest CPUID. */
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vcpu_set_cpuid_feature(vcpu, X86_FEATURE_VMX);
/*
* Setting vmxon_pa == -1ull and vmcs_pa == -1ull exits early without
- * setting the nested state but flags other than eVMCS must be clear.
- * The eVMCS flag can be set if the enlightened VMCS capability has
- * been enabled.
+ * setting the nested state. When the eVMCS flag is not set, the
+ * expected return value is '0'.
*/
set_default_vmx_state(state, state_sz);
+ state->flags = 0;
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state(vcpu, state);
- state->flags &= KVM_STATE_NESTED_EVMCS;
+ /*
+ * When eVMCS is supported, the eVMCS flag can only be set if the
+ * enlightened VMCS capability has been enabled.
+ */
if (have_evmcs) {
- test_nested_state_expect_einval(vm, state);
- vcpu_enable_evmcs(vm, VCPU_ID);
+ state->flags = KVM_STATE_NESTED_EVMCS;
+ test_nested_state_expect_einval(vcpu, state);
+ vcpu_enable_evmcs(vcpu);
+ test_nested_state(vcpu, state);
}
- test_nested_state(vm, state);
/* It is invalid to have vmxon_pa == -1ull and SMM flags non-zero. */
state->hdr.vmx.smm.flags = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /* Invalid flags are rejected. */
+ set_default_vmx_state(state, state_sz);
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa == -1ull and vmcs_pa != -1ull. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = -1ull;
state->flags = 0;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* It is invalid to have vmxon_pa set to a non-page aligned address. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 1;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have KVM_STATE_NESTED_SMM_GUEST_MODE and
@@ -167,7 +173,7 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->flags = KVM_STATE_NESTED_GUEST_MODE |
KVM_STATE_NESTED_RUN_PENDING;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* It is invalid to have any of the SMM flags set besides:
@@ -177,29 +183,50 @@ void test_vmx_nested_state(struct kvm_vm *vm)
set_default_vmx_state(state, state_sz);
state->hdr.vmx.smm.flags = ~(KVM_STATE_NESTED_SMM_GUEST_MODE |
KVM_STATE_NESTED_SMM_VMXON);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/* Outside SMM, SMM flags must be zero. */
set_default_vmx_state(state, state_sz);
state->flags = 0;
state->hdr.vmx.smm.flags = KVM_STATE_NESTED_SMM_GUEST_MODE;
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
+
+ /*
+ * Size must be large enough to fit kvm_nested_state and vmcs12
+ * if VMCS12 physical address is set
+ */
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ test_nested_state_expect_einval(vcpu, state);
+
+ set_default_vmx_state(state, state_sz);
+ state->size = sizeof(*state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ test_nested_state(vcpu, state);
+
+ /*
+ * KVM_SET_NESTED_STATE succeeds with invalid VMCS
+ * contents but L2 not running.
+ */
+ set_default_vmx_state(state, state_sz);
+ state->flags = 0;
+ test_nested_state(vcpu, state);
- /* Size must be large enough to fit kvm_nested_state and vmcs12. */
+ /* Invalid flags are rejected, even if no VMCS loaded. */
set_default_vmx_state(state, state_sz);
state->size = sizeof(*state);
- test_nested_state(vm, state);
+ state->flags = 0;
+ state->hdr.vmx.vmcs12_pa = -1;
+ state->hdr.vmx.flags = ~0;
+ test_nested_state_expect_einval(vcpu, state);
/* vmxon_pa cannot be the same address as vmcs_pa. */
set_default_vmx_state(state, state_sz);
state->hdr.vmx.vmxon_pa = 0;
state->hdr.vmx.vmcs12_pa = 0;
- test_nested_state_expect_einval(vm, state);
-
- /* The revision id for vmcs12 must be VMCS12_REVISION. */
- set_default_vmx_state(state, state_sz);
- set_revision_id_for_vmcs12(state, 0);
- test_nested_state_expect_einval(vm, state);
+ test_nested_state_expect_einval(vcpu, state);
/*
* Test that if we leave nesting the state reflects that when we get
@@ -209,8 +236,8 @@ void test_vmx_nested_state(struct kvm_vm *vm)
state->hdr.vmx.vmxon_pa = -1ull;
state->hdr.vmx.vmcs12_pa = -1ull;
state->flags = 0;
- test_nested_state(vm, state);
- vcpu_nested_state_get(vm, VCPU_ID, state);
+ test_nested_state(vcpu, state);
+ vcpu_nested_state_get(vcpu, state);
TEST_ASSERT(state->size >= sizeof(*state) && state->size <= state_sz,
"Size must be between %ld and %d. The size returned was %d.",
sizeof(*state), state_sz, state->size);
@@ -224,29 +251,32 @@ int main(int argc, char *argv[])
{
struct kvm_vm *vm;
struct kvm_nested_state state;
+ struct kvm_vcpu *vcpu;
have_evmcs = kvm_check_cap(KVM_CAP_HYPERV_ENLIGHTENED_VMCS);
- if (!kvm_check_cap(KVM_CAP_NESTED_STATE)) {
- print_skip("KVM_CAP_NESTED_STATE not available");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_has_cap(KVM_CAP_NESTED_STATE));
/*
* AMD currently does not implement set_nested_state, so for now we
* just early out.
*/
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
+
+ /*
+ * First run tests with VMX disabled to check error handling.
+ */
+ vcpu_clear_cpuid_feature(vcpu, X86_FEATURE_VMX);
/* Passing a NULL kvm_nested_state causes a EFAULT. */
- test_nested_state_expect_efault(vm, NULL);
+ test_nested_state_expect_efault(vcpu, NULL);
/* 'size' cannot be smaller than sizeof(kvm_nested_state). */
set_default_state(&state);
state.size = 0;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* Setting the flags 0xf fails the flags check. The only flags that
@@ -257,7 +287,7 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = 0xf;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
/*
* If KVM_STATE_NESTED_RUN_PENDING is set then
@@ -265,9 +295,9 @@ int main(int argc, char *argv[])
*/
set_default_state(&state);
state.flags = KVM_STATE_NESTED_RUN_PENDING;
- test_nested_state_expect_einval(vm, &state);
+ test_nested_state_expect_einval(vcpu, &state);
- test_vmx_nested_state(vm);
+ test_vmx_nested_state(vcpu);
kvm_vm_free(vm);
return 0;
diff --git a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
index fbe8417cbc2c..2ceb5c78c442 100644
--- a/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
+++ b/tools/testing/selftests/kvm/x86_64/vmx_tsc_adjust_test.c
@@ -32,9 +32,6 @@
#define MSR_IA32_TSC_ADJUST 0x3b
#endif
-#define PAGE_SIZE 4096
-#define VCPU_ID 5
-
#define TSC_ADJUST_VALUE (1ll << 32)
#define TSC_OFFSET_VALUE -(1ll << 48)
@@ -52,11 +49,6 @@ enum {
NUM_VMX_PAGES,
};
-struct kvm_single_msr {
- struct kvm_msrs header;
- struct kvm_msr_entry entry;
-} __attribute__((packed));
-
/* The virtual machine object. */
static struct kvm_vm *vm;
@@ -128,29 +120,25 @@ static void report(int64_t val)
int main(int argc, char *argv[])
{
vm_vaddr_t vmx_pages_gva;
+ struct kvm_vcpu *vcpu;
- nested_vmx_check_supported();
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_VMX));
- vm = vm_create_default(VCPU_ID, 0, (void *) l1_guest_code);
- vcpu_set_cpuid(vm, VCPU_ID, kvm_get_supported_cpuid());
+ vm = vm_create_with_one_vcpu(&vcpu, (void *) l1_guest_code);
/* Allocate VMX pages and shared descriptors (vmx_pages). */
vcpu_alloc_vmx(vm, &vmx_pages_gva);
- vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
+ vcpu_args_set(vcpu, 1, vmx_pages_gva);
for (;;) {
- volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
struct ucall uc;
- vcpu_run(vm, VCPU_ID);
- TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
- "Got exit_reason other than KVM_EXIT_IO: %u (%s)\n",
- run->exit_reason,
- exit_reason_str(run->exit_reason));
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
- switch (get_ucall(vm, VCPU_ID, &uc)) {
+ switch (get_ucall(vcpu, &uc)) {
case UCALL_ABORT:
- TEST_FAIL("%s", (const char *)uc.args[0]);
+ REPORT_GUEST_ASSERT(uc);
/* NOT REACHED */
case UCALL_SYNC:
report(uc.args[1]);
@@ -162,7 +150,7 @@ int main(int argc, char *argv[])
}
}
- kvm_vm_free(vm);
done:
+ kvm_vm_free(vm);
return 0;
}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
new file mode 100644
index 000000000000..725c206ba0b9
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_ipi_test.c
@@ -0,0 +1,491 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * xapic_ipi_test
+ *
+ * Copyright (C) 2020, Google LLC.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.
+ *
+ * Test that when the APIC is in xAPIC mode, a vCPU can send an IPI to wake
+ * another vCPU that is halted when KVM's backing page for the APIC access
+ * address has been moved by mm.
+ *
+ * The test starts two vCPUs: one that sends IPIs and one that continually
+ * executes HLT. The sender checks that the halter has woken from the HLT and
+ * has reentered HLT before sending the next IPI. While the vCPUs are running,
+ * the host continually calls migrate_pages to move all of the process' pages
+ * amongst the available numa nodes on the machine.
+ *
+ * Migration is a command line option. When used on non-numa machines will
+ * exit with error. Test is still usefull on non-numa for testing IPIs.
+ */
+
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <getopt.h>
+#include <pthread.h>
+#include <inttypes.h>
+#include <string.h>
+#include <time.h>
+
+#include "kvm_util.h"
+#include "numaif.h"
+#include "processor.h"
+#include "test_util.h"
+#include "vmx.h"
+
+/* Default running time for the test */
+#define DEFAULT_RUN_SECS 3
+
+/* Default delay between migrate_pages calls (microseconds) */
+#define DEFAULT_DELAY_USECS 500000
+
+/*
+ * Vector for IPI from sender vCPU to halting vCPU.
+ * Value is arbitrary and was chosen for the alternating bit pattern. Any
+ * value should work.
+ */
+#define IPI_VECTOR 0xa5
+
+/*
+ * Incremented in the IPI handler. Provides evidence to the sender that the IPI
+ * arrived at the destination
+ */
+static volatile uint64_t ipis_rcvd;
+
+/* Data struct shared between host main thread and vCPUs */
+struct test_data_page {
+ uint32_t halter_apic_id;
+ volatile uint64_t hlt_count;
+ volatile uint64_t wake_count;
+ uint64_t ipis_sent;
+ uint64_t migrations_attempted;
+ uint64_t migrations_completed;
+ uint32_t icr;
+ uint32_t icr2;
+ uint32_t halter_tpr;
+ uint32_t halter_ppr;
+
+ /*
+ * Record local version register as a cross-check that APIC access
+ * worked. Value should match what KVM reports (APIC_VERSION in
+ * arch/x86/kvm/lapic.c). If test is failing, check that values match
+ * to determine whether APIC access exits are working.
+ */
+ uint32_t halter_lvr;
+};
+
+struct thread_params {
+ struct test_data_page *data;
+ struct kvm_vcpu *vcpu;
+ uint64_t *pipis_rcvd; /* host address of ipis_rcvd global */
+};
+
+void verify_apic_base_addr(void)
+{
+ uint64_t msr = rdmsr(MSR_IA32_APICBASE);
+ uint64_t base = GET_APIC_BASE(msr);
+
+ GUEST_ASSERT(base == APIC_DEFAULT_GPA);
+}
+
+static void halter_guest_code(struct test_data_page *data)
+{
+ verify_apic_base_addr();
+ xapic_enable();
+
+ data->halter_apic_id = GET_APIC_ID_FIELD(xapic_read_reg(APIC_ID));
+ data->halter_lvr = xapic_read_reg(APIC_LVR);
+
+ /*
+ * Loop forever HLTing and recording halts & wakes. Disable interrupts
+ * each time around to minimize window between signaling the pending
+ * halt to the sender vCPU and executing the halt. No need to disable on
+ * first run as this vCPU executes first and the host waits for it to
+ * signal going into first halt before starting the sender vCPU. Record
+ * TPR and PPR for diagnostic purposes in case the test fails.
+ */
+ for (;;) {
+ data->halter_tpr = xapic_read_reg(APIC_TASKPRI);
+ data->halter_ppr = xapic_read_reg(APIC_PROCPRI);
+ data->hlt_count++;
+ asm volatile("sti; hlt; cli");
+ data->wake_count++;
+ }
+}
+
+/*
+ * Runs on halter vCPU when IPI arrives. Write an arbitrary non-zero value to
+ * enable diagnosing errant writes to the APIC access address backing page in
+ * case of test failure.
+ */
+static void guest_ipi_handler(struct ex_regs *regs)
+{
+ ipis_rcvd++;
+ xapic_write_reg(APIC_EOI, 77);
+}
+
+static void sender_guest_code(struct test_data_page *data)
+{
+ uint64_t last_wake_count;
+ uint64_t last_hlt_count;
+ uint64_t last_ipis_rcvd_count;
+ uint32_t icr_val;
+ uint32_t icr2_val;
+ uint64_t tsc_start;
+
+ verify_apic_base_addr();
+ xapic_enable();
+
+ /*
+ * Init interrupt command register for sending IPIs
+ *
+ * Delivery mode=fixed, per SDM:
+ * "Delivers the interrupt specified in the vector field to the target
+ * processor."
+ *
+ * Destination mode=physical i.e. specify target by its local APIC
+ * ID. This vCPU assumes that the halter vCPU has already started and
+ * set data->halter_apic_id.
+ */
+ icr_val = (APIC_DEST_PHYSICAL | APIC_DM_FIXED | IPI_VECTOR);
+ icr2_val = SET_APIC_DEST_FIELD(data->halter_apic_id);
+ data->icr = icr_val;
+ data->icr2 = icr2_val;
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ for (;;) {
+ /*
+ * Send IPI to halter vCPU.
+ * First IPI can be sent unconditionally because halter vCPU
+ * starts earlier.
+ */
+ xapic_write_reg(APIC_ICR2, icr2_val);
+ xapic_write_reg(APIC_ICR, icr_val);
+ data->ipis_sent++;
+
+ /*
+ * Wait up to ~1 sec for halter to indicate that it has:
+ * 1. Received the IPI
+ * 2. Woken up from the halt
+ * 3. Gone back into halt
+ * Current CPUs typically run at 2.x Ghz which is ~2
+ * billion ticks per second.
+ */
+ tsc_start = rdtsc();
+ while (rdtsc() - tsc_start < 2000000000) {
+ if ((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count))
+ break;
+ }
+
+ GUEST_ASSERT((ipis_rcvd != last_ipis_rcvd_count) &&
+ (data->wake_count != last_wake_count) &&
+ (data->hlt_count != last_hlt_count));
+
+ last_wake_count = data->wake_count;
+ last_hlt_count = data->hlt_count;
+ last_ipis_rcvd_count = ipis_rcvd;
+ }
+}
+
+static void *vcpu_thread(void *arg)
+{
+ struct thread_params *params = (struct thread_params *)arg;
+ struct kvm_vcpu *vcpu = params->vcpu;
+ struct ucall uc;
+ int old;
+ int r;
+
+ r = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old);
+ TEST_ASSERT(r == 0,
+ "pthread_setcanceltype failed on vcpu_id=%u with errno=%d",
+ vcpu->id, r);
+
+ fprintf(stderr, "vCPU thread running vCPU %u\n", vcpu->id);
+ vcpu_run(vcpu);
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ if (get_ucall(vcpu, &uc) == UCALL_ABORT) {
+ TEST_ASSERT(false,
+ "vCPU %u exited with error: %s.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu",
+ vcpu->id, (const char *)uc.args[0],
+ params->data->ipis_sent, params->data->hlt_count,
+ params->data->wake_count,
+ *params->pipis_rcvd, params->data->halter_tpr,
+ params->data->halter_ppr, params->data->halter_lvr,
+ params->data->migrations_attempted,
+ params->data->migrations_completed);
+ }
+
+ return NULL;
+}
+
+static void cancel_join_vcpu_thread(pthread_t thread, struct kvm_vcpu *vcpu)
+{
+ void *retval;
+ int r;
+
+ r = pthread_cancel(thread);
+ TEST_ASSERT(r == 0,
+ "pthread_cancel on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+
+ r = pthread_join(thread, &retval);
+ TEST_ASSERT(r == 0,
+ "pthread_join on vcpu_id=%d failed with errno=%d",
+ vcpu->id, r);
+ TEST_ASSERT(retval == PTHREAD_CANCELED,
+ "expected retval=%p, got %p", PTHREAD_CANCELED,
+ retval);
+}
+
+void do_migrations(struct test_data_page *data, int run_secs, int delay_usecs,
+ uint64_t *pipis_rcvd)
+{
+ long pages_not_moved;
+ unsigned long nodemask = 0;
+ unsigned long nodemasks[sizeof(nodemask) * 8];
+ int nodes = 0;
+ time_t start_time, last_update, now;
+ time_t interval_secs = 1;
+ int i, r;
+ int from, to;
+ unsigned long bit;
+ uint64_t hlt_count;
+ uint64_t wake_count;
+ uint64_t ipis_sent;
+
+ fprintf(stderr, "Calling migrate_pages every %d microseconds\n",
+ delay_usecs);
+
+ /* Get set of first 64 numa nodes available */
+ r = get_mempolicy(NULL, &nodemask, sizeof(nodemask) * 8,
+ 0, MPOL_F_MEMS_ALLOWED);
+ TEST_ASSERT(r == 0, "get_mempolicy failed errno=%d", errno);
+
+ fprintf(stderr, "Numa nodes found amongst first %lu possible nodes "
+ "(each 1-bit indicates node is present): %#lx\n",
+ sizeof(nodemask) * 8, nodemask);
+
+ /* Init array of masks containing a single-bit in each, one for each
+ * available node. migrate_pages called below requires specifying nodes
+ * as bit masks.
+ */
+ for (i = 0, bit = 1; i < sizeof(nodemask) * 8; i++, bit <<= 1) {
+ if (nodemask & bit) {
+ nodemasks[nodes] = nodemask & bit;
+ nodes++;
+ }
+ }
+
+ TEST_ASSERT(nodes > 1,
+ "Did not find at least 2 numa nodes. Can't do migration");
+
+ fprintf(stderr, "Migrating amongst %d nodes found\n", nodes);
+
+ from = 0;
+ to = 1;
+ start_time = time(NULL);
+ last_update = start_time;
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+
+ while ((int)(time(NULL) - start_time) < run_secs) {
+ data->migrations_attempted++;
+
+ /*
+ * migrate_pages with PID=0 will migrate all pages of this
+ * process between the nodes specified as bitmasks. The page
+ * backing the APIC access address belongs to this process
+ * because it is allocated by KVM in the context of the
+ * KVM_CREATE_VCPU ioctl. If that assumption ever changes this
+ * test may break or give a false positive signal.
+ */
+ pages_not_moved = migrate_pages(0, sizeof(nodemasks[from]),
+ &nodemasks[from],
+ &nodemasks[to]);
+ if (pages_not_moved < 0)
+ fprintf(stderr,
+ "migrate_pages failed, errno=%d\n", errno);
+ else if (pages_not_moved > 0)
+ fprintf(stderr,
+ "migrate_pages could not move %ld pages\n",
+ pages_not_moved);
+ else
+ data->migrations_completed++;
+
+ from = to;
+ to++;
+ if (to == nodes)
+ to = 0;
+
+ now = time(NULL);
+ if (((now - start_time) % interval_secs == 0) &&
+ (now != last_update)) {
+ last_update = now;
+ fprintf(stderr,
+ "%lu seconds: Migrations attempted=%lu completed=%lu, "
+ "IPIs sent=%lu received=%lu, HLTs=%lu wakes=%lu\n",
+ now - start_time, data->migrations_attempted,
+ data->migrations_completed,
+ data->ipis_sent, *pipis_rcvd,
+ data->hlt_count, data->wake_count);
+
+ TEST_ASSERT(ipis_sent != data->ipis_sent &&
+ hlt_count != data->hlt_count &&
+ wake_count != data->wake_count,
+ "IPI, HLT and wake count have not increased "
+ "in the last %lu seconds. "
+ "HLTer is likely hung.", interval_secs);
+
+ ipis_sent = data->ipis_sent;
+ hlt_count = data->hlt_count;
+ wake_count = data->wake_count;
+ }
+ usleep(delay_usecs);
+ }
+}
+
+void get_cmdline_args(int argc, char *argv[], int *run_secs,
+ bool *migrate, int *delay_usecs)
+{
+ for (;;) {
+ int opt = getopt(argc, argv, "s:d:m");
+
+ if (opt == -1)
+ break;
+ switch (opt) {
+ case 's':
+ *run_secs = parse_size(optarg);
+ break;
+ case 'm':
+ *migrate = true;
+ break;
+ case 'd':
+ *delay_usecs = parse_size(optarg);
+ break;
+ default:
+ TEST_ASSERT(false,
+ "Usage: -s <runtime seconds>. Default is %d seconds.\n"
+ "-m adds calls to migrate_pages while vCPUs are running."
+ " Default is no migrations.\n"
+ "-d <delay microseconds> - delay between migrate_pages() calls."
+ " Default is %d microseconds.",
+ DEFAULT_RUN_SECS, DEFAULT_DELAY_USECS);
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int r;
+ int wait_secs;
+ const int max_halter_wait = 10;
+ int run_secs = 0;
+ int delay_usecs = 0;
+ struct test_data_page *data;
+ vm_vaddr_t test_data_page_vaddr;
+ bool migrate = false;
+ pthread_t threads[2];
+ struct thread_params params[2];
+ struct kvm_vm *vm;
+ uint64_t *pipis_rcvd;
+
+ get_cmdline_args(argc, argv, &run_secs, &migrate, &delay_usecs);
+ if (run_secs <= 0)
+ run_secs = DEFAULT_RUN_SECS;
+ if (delay_usecs <= 0)
+ delay_usecs = DEFAULT_DELAY_USECS;
+
+ vm = vm_create_with_one_vcpu(&params[0].vcpu, halter_guest_code);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(params[0].vcpu);
+ vm_install_exception_handler(vm, IPI_VECTOR, guest_ipi_handler);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+
+ params[1].vcpu = vm_vcpu_add(vm, 1, sender_guest_code);
+
+ test_data_page_vaddr = vm_vaddr_alloc_page(vm);
+ data = addr_gva2hva(vm, test_data_page_vaddr);
+ memset(data, 0, sizeof(*data));
+ params[0].data = data;
+ params[1].data = data;
+
+ vcpu_args_set(params[0].vcpu, 1, test_data_page_vaddr);
+ vcpu_args_set(params[1].vcpu, 1, test_data_page_vaddr);
+
+ pipis_rcvd = (uint64_t *)addr_gva2hva(vm, (uint64_t)&ipis_rcvd);
+ params[0].pipis_rcvd = pipis_rcvd;
+ params[1].pipis_rcvd = pipis_rcvd;
+
+ /* Start halter vCPU thread and wait for it to execute first HLT. */
+ r = pthread_create(&threads[0], NULL, vcpu_thread, &params[0]);
+ TEST_ASSERT(r == 0,
+ "pthread_create halter failed errno=%d", errno);
+ fprintf(stderr, "Halter vCPU thread started\n");
+
+ wait_secs = 0;
+ while ((wait_secs < max_halter_wait) && !data->hlt_count) {
+ sleep(1);
+ wait_secs++;
+ }
+
+ TEST_ASSERT(data->hlt_count,
+ "Halter vCPU did not execute first HLT within %d seconds",
+ max_halter_wait);
+
+ fprintf(stderr,
+ "Halter vCPU thread reported its APIC ID: %u after %d seconds.\n",
+ data->halter_apic_id, wait_secs);
+
+ r = pthread_create(&threads[1], NULL, vcpu_thread, &params[1]);
+ TEST_ASSERT(r == 0, "pthread_create sender failed errno=%d", errno);
+
+ fprintf(stderr,
+ "IPI sender vCPU thread started. Letting vCPUs run for %d seconds.\n",
+ run_secs);
+
+ if (!migrate)
+ sleep(run_secs);
+ else
+ do_migrations(data, run_secs, delay_usecs, pipis_rcvd);
+
+ /*
+ * Cancel threads and wait for them to stop.
+ */
+ cancel_join_vcpu_thread(threads[0], params[0].vcpu);
+ cancel_join_vcpu_thread(threads[1], params[1].vcpu);
+
+ fprintf(stderr,
+ "Test successful after running for %d seconds.\n"
+ "Sending vCPU sent %lu IPIs to halting vCPU\n"
+ "Halting vCPU halted %lu times, woke %lu times, received %lu IPIs.\n"
+ "Halter APIC ID=%#x\n"
+ "Sender ICR value=%#x ICR2 value=%#x\n"
+ "Halter TPR=%#x PPR=%#x LVR=%#x\n"
+ "Migrations attempted: %lu\n"
+ "Migrations completed: %lu\n",
+ run_secs, data->ipis_sent,
+ data->hlt_count, data->wake_count, *pipis_rcvd,
+ data->halter_apic_id,
+ data->icr, data->icr2,
+ data->halter_tpr, data->halter_ppr, data->halter_lvr,
+ data->migrations_attempted, data->migrations_completed);
+
+ kvm_vm_free(vm);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xapic_state_test.c b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
new file mode 100644
index 000000000000..ab75b873a4ad
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xapic_state_test.c
@@ -0,0 +1,215 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE /* for program_invocation_short_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "apic.h"
+#include "kvm_util.h"
+#include "processor.h"
+#include "test_util.h"
+
+struct xapic_vcpu {
+ struct kvm_vcpu *vcpu;
+ bool is_x2apic;
+};
+
+static void xapic_guest_code(void)
+{
+ asm volatile("cli");
+
+ xapic_enable();
+
+ while (1) {
+ uint64_t val = (u64)xapic_read_reg(APIC_IRR) |
+ (u64)xapic_read_reg(APIC_IRR + 0x10) << 32;
+
+ xapic_write_reg(APIC_ICR2, val >> 32);
+ xapic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ }
+}
+
+static void x2apic_guest_code(void)
+{
+ asm volatile("cli");
+
+ x2apic_enable();
+
+ do {
+ uint64_t val = x2apic_read_reg(APIC_IRR) |
+ x2apic_read_reg(APIC_IRR + 0x10) << 32;
+
+ x2apic_write_reg(APIC_ICR, val);
+ GUEST_SYNC(val);
+ } while (1);
+}
+
+static void ____test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ struct kvm_lapic_state xapic;
+ struct ucall uc;
+ uint64_t icr;
+
+ /*
+ * Tell the guest what ICR value to write. Use the IRR to pass info,
+ * all bits are valid and should not be modified by KVM (ignoring the
+ * fact that vectors 0-15 are technically illegal).
+ */
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ *((u32 *)&xapic.regs[APIC_IRR]) = val;
+ *((u32 *)&xapic.regs[APIC_IRR + 0x10]) = val >> 32;
+ vcpu_ioctl(vcpu, KVM_SET_LAPIC, &xapic);
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_EQ(get_ucall(vcpu, &uc), UCALL_SYNC);
+ TEST_ASSERT_EQ(uc.args[1], val);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+ icr = (u64)(*((u32 *)&xapic.regs[APIC_ICR])) |
+ (u64)(*((u32 *)&xapic.regs[APIC_ICR2])) << 32;
+ if (!x->is_x2apic) {
+ val &= (-1u | (0xffull << (32 + 24)));
+ TEST_ASSERT_EQ(icr, val & ~APIC_ICR_BUSY);
+ } else {
+ TEST_ASSERT_EQ(icr & ~APIC_ICR_BUSY, val & ~APIC_ICR_BUSY);
+ }
+}
+
+#define X2APIC_RSVED_BITS_MASK (GENMASK_ULL(31,20) | \
+ GENMASK_ULL(17,16) | \
+ GENMASK_ULL(13,13))
+
+static void __test_icr(struct xapic_vcpu *x, uint64_t val)
+{
+ if (x->is_x2apic) {
+ /* Hardware writing vICR register requires reserved bits 31:20,
+ * 17:16 and 13 kept as zero to avoid #GP exception. Data value
+ * written to vICR should mask out those bits above.
+ */
+ val &= ~X2APIC_RSVED_BITS_MASK;
+ }
+ ____test_icr(x, val | APIC_ICR_BUSY);
+ ____test_icr(x, val & ~(u64)APIC_ICR_BUSY);
+}
+
+static void test_icr(struct xapic_vcpu *x)
+{
+ struct kvm_vcpu *vcpu = x->vcpu;
+ uint64_t icr, i, j;
+
+ icr = APIC_DEST_SELF | APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ icr = APIC_INT_ASSERT | APIC_DM_FIXED;
+ for (i = 0; i <= 0xff; i++)
+ __test_icr(x, icr | i);
+
+ /*
+ * Send all flavors of IPIs to non-existent vCPUs. TODO: use number of
+ * vCPUs, not vcpu.id + 1. Arbitrarily use vector 0xff.
+ */
+ icr = APIC_INT_ASSERT | 0xff;
+ for (i = 0; i < 0xff; i++) {
+ if (i == vcpu->id)
+ continue;
+ for (j = 0; j < 8; j++)
+ __test_icr(x, i << (32 + 24) | icr | (j << 8));
+ }
+
+ /* And again with a shorthand destination for all types of IPIs. */
+ icr = APIC_DEST_ALLBUT | APIC_INT_ASSERT;
+ for (i = 0; i < 8; i++)
+ __test_icr(x, icr | (i << 8));
+
+ /* And a few garbage value, just make sure it's an IRQ (blocked). */
+ __test_icr(x, 0xa5a5a5a5a5a5a5a5 & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, 0x5a5a5a5a5a5a5a5a & ~APIC_DM_FIXED_MASK);
+ __test_icr(x, -1ull & ~APIC_DM_FIXED_MASK);
+}
+
+static void __test_apic_id(struct kvm_vcpu *vcpu, uint64_t apic_base)
+{
+ uint32_t apic_id, expected;
+ struct kvm_lapic_state xapic;
+
+ vcpu_set_msr(vcpu, MSR_IA32_APICBASE, apic_base);
+
+ vcpu_ioctl(vcpu, KVM_GET_LAPIC, &xapic);
+
+ expected = apic_base & X2APIC_ENABLE ? vcpu->id : vcpu->id << 24;
+ apic_id = *((u32 *)&xapic.regs[APIC_ID]);
+
+ TEST_ASSERT(apic_id == expected,
+ "APIC_ID not set back to %s format; wanted = %x, got = %x",
+ (apic_base & X2APIC_ENABLE) ? "x2APIC" : "xAPIC",
+ expected, apic_id);
+}
+
+/*
+ * Verify that KVM switches the APIC_ID between xAPIC and x2APIC when userspace
+ * stuffs MSR_IA32_APICBASE. Setting the APIC_ID when x2APIC is enabled and
+ * when the APIC transitions for DISABLED to ENABLED is architectural behavior
+ * (on Intel), whereas the x2APIC => xAPIC transition behavior is KVM ABI since
+ * attempted to transition from x2APIC to xAPIC without disabling the APIC is
+ * architecturally disallowed.
+ */
+static void test_apic_id(void)
+{
+ const uint32_t NR_VCPUS = 3;
+ struct kvm_vcpu *vcpus[NR_VCPUS];
+ uint64_t apic_base;
+ struct kvm_vm *vm;
+ int i;
+
+ vm = vm_create_with_vcpus(NR_VCPUS, NULL, vcpus);
+ vm_enable_cap(vm, KVM_CAP_X2APIC_API, KVM_X2APIC_API_USE_32BIT_IDS);
+
+ for (i = 0; i < NR_VCPUS; i++) {
+ apic_base = vcpu_get_msr(vcpus[i], MSR_IA32_APICBASE);
+
+ TEST_ASSERT(apic_base & MSR_IA32_APICBASE_ENABLE,
+ "APIC not in ENABLED state at vCPU RESET");
+ TEST_ASSERT(!(apic_base & X2APIC_ENABLE),
+ "APIC not in xAPIC mode at vCPU RESET");
+
+ __test_apic_id(vcpus[i], apic_base);
+ __test_apic_id(vcpus[i], apic_base | X2APIC_ENABLE);
+ __test_apic_id(vcpus[i], apic_base);
+ }
+
+ kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+ struct xapic_vcpu x = {
+ .vcpu = NULL,
+ .is_x2apic = true,
+ };
+ struct kvm_vm *vm;
+
+ vm = vm_create_with_one_vcpu(&x.vcpu, x2apic_guest_code);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ /*
+ * Use a second VM for the xAPIC test so that x2APIC can be hidden from
+ * the guest in order to test AVIC. KVM disallows changing CPUID after
+ * KVM_RUN and AVIC is disabled if _any_ vCPU is allowed to use x2APIC.
+ */
+ vm = vm_create_with_one_vcpu(&x.vcpu, xapic_guest_code);
+ x.is_x2apic = false;
+
+ vcpu_clear_cpuid_feature(x.vcpu, X86_FEATURE_X2APIC);
+
+ virt_pg_map(vm, APIC_DEFAULT_GPA, APIC_DEFAULT_GPA);
+ test_icr(&x);
+ kvm_vm_free(vm);
+
+ test_apic_id();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
new file mode 100644
index 000000000000..25a0b0db5c3c
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xcr0_cpuid_test.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * XCR0 cpuid test
+ *
+ * Copyright (C) 2022, Google LLC.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+
+#include "kvm_util.h"
+#include "processor.h"
+
+/*
+ * Assert that architectural dependency rules are satisfied, e.g. that AVX is
+ * supported if and only if SSE is supported.
+ */
+#define ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0, xfeatures, dependencies) \
+do { \
+ uint64_t __supported = (supported_xcr0) & ((xfeatures) | (dependencies)); \
+ \
+ __GUEST_ASSERT((__supported & (xfeatures)) != (xfeatures) || \
+ __supported == ((xfeatures) | (dependencies)), \
+ "supported = 0x%lx, xfeatures = 0x%llx, dependencies = 0x%llx", \
+ __supported, (xfeatures), (dependencies)); \
+} while (0)
+
+/*
+ * Assert that KVM reports a sane, usable as-is XCR0. Architecturally, a CPU
+ * isn't strictly required to _support_ all XFeatures related to a feature, but
+ * at the same time XSETBV will #GP if bundled XFeatures aren't enabled and
+ * disabled coherently. E.g. a CPU can technically enumerate supported for
+ * XTILE_CFG but not XTILE_DATA, but attempting to enable XTILE_CFG without
+ * XTILE_DATA will #GP.
+ */
+#define ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0, xfeatures) \
+do { \
+ uint64_t __supported = (supported_xcr0) & (xfeatures); \
+ \
+ __GUEST_ASSERT(!__supported || __supported == (xfeatures), \
+ "supported = 0x%lx, xfeatures = 0x%llx", \
+ __supported, (xfeatures)); \
+} while (0)
+
+static void guest_code(void)
+{
+ uint64_t xcr0_reset;
+ uint64_t supported_xcr0;
+ int i, vector;
+
+ set_cr4(get_cr4() | X86_CR4_OSXSAVE);
+
+ xcr0_reset = xgetbv(0);
+ supported_xcr0 = this_cpu_supported_xcr0();
+
+ GUEST_ASSERT(xcr0_reset == XFEATURE_MASK_FP);
+
+ /* Check AVX */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_YMM,
+ XFEATURE_MASK_SSE);
+
+ /* Check MPX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_BNDREGS | XFEATURE_MASK_BNDCSR);
+
+ /* Check AVX-512 */
+ ASSERT_XFEATURE_DEPENDENCIES(supported_xcr0,
+ XFEATURE_MASK_AVX512,
+ XFEATURE_MASK_SSE | XFEATURE_MASK_YMM);
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_AVX512);
+
+ /* Check AMX */
+ ASSERT_ALL_OR_NONE_XFEATURE(supported_xcr0,
+ XFEATURE_MASK_XTILE);
+
+ vector = xsetbv_safe(0, supported_xcr0);
+ __GUEST_ASSERT(!vector,
+ "Expected success on XSETBV(0x%lx), got vector '0x%x'",
+ supported_xcr0, vector);
+
+ for (i = 0; i < 64; i++) {
+ if (supported_xcr0 & BIT_ULL(i))
+ continue;
+
+ vector = xsetbv_safe(0, supported_xcr0 | BIT_ULL(i));
+ __GUEST_ASSERT(vector == GP_VECTOR,
+ "Expected #GP on XSETBV(0x%llx), supported XCR0 = %lx, got vector '0x%x'",
+ BIT_ULL(i), supported_xcr0, vector);
+ }
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ struct kvm_vcpu *vcpu;
+ struct kvm_run *run;
+ struct kvm_vm *vm;
+ struct ucall uc;
+
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVE));
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ run = vcpu->run;
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+
+ while (1) {
+ vcpu_run(vcpu);
+
+ TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+ "Unexpected exit reason: %u (%s),",
+ run->exit_reason,
+ exit_reason_str(run->exit_reason));
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall %lu", uc.cmd);
+ }
+ }
+
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
new file mode 100644
index 000000000000..d2ea0435f4f7
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
@@ -0,0 +1,1156 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright © 2021 Amazon.com, Inc. or its affiliates.
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#include <stdint.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+
+#include <sys/eventfd.h>
+
+#define SHINFO_REGION_GVA 0xc0000000ULL
+#define SHINFO_REGION_GPA 0xc0000000ULL
+#define SHINFO_REGION_SLOT 10
+
+#define DUMMY_REGION_GPA (SHINFO_REGION_GPA + (3 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT 11
+
+#define DUMMY_REGION_GPA_2 (SHINFO_REGION_GPA + (4 * PAGE_SIZE))
+#define DUMMY_REGION_SLOT_2 12
+
+#define SHINFO_ADDR (SHINFO_REGION_GPA)
+#define VCPU_INFO_ADDR (SHINFO_REGION_GPA + 0x40)
+#define PVTIME_ADDR (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR (SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define SHINFO_VADDR (SHINFO_REGION_GVA)
+#define VCPU_INFO_VADDR (SHINFO_REGION_GVA + 0x40)
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + PAGE_SIZE - 15)
+
+#define EVTCHN_VECTOR 0x10
+
+#define EVTCHN_TEST1 15
+#define EVTCHN_TEST2 66
+#define EVTCHN_TIMER 13
+
+enum {
+ TEST_INJECT_VECTOR = 0,
+ TEST_RUNSTATE_runnable,
+ TEST_RUNSTATE_blocked,
+ TEST_RUNSTATE_offline,
+ TEST_RUNSTATE_ADJUST,
+ TEST_RUNSTATE_DATA,
+ TEST_STEAL_TIME,
+ TEST_EVTCHN_MASKED,
+ TEST_EVTCHN_UNMASKED,
+ TEST_EVTCHN_SLOWPATH,
+ TEST_EVTCHN_SEND_IOCTL,
+ TEST_EVTCHN_HCALL,
+ TEST_EVTCHN_HCALL_SLOWPATH,
+ TEST_EVTCHN_HCALL_EVENTFD,
+ TEST_TIMER_SETUP,
+ TEST_TIMER_WAIT,
+ TEST_TIMER_RESTORE,
+ TEST_POLL_READY,
+ TEST_POLL_TIMEOUT,
+ TEST_POLL_MASKED,
+ TEST_POLL_WAKE,
+ SET_VCPU_INFO,
+ TEST_TIMER_PAST,
+ TEST_LOCKING_SEND_RACE,
+ TEST_LOCKING_POLL_RACE,
+ TEST_LOCKING_POLL_TIMEOUT,
+ TEST_DONE,
+
+ TEST_GUEST_SAW_IRQ,
+};
+
+#define XEN_HYPERCALL_MSR 0x40000000
+
+#define MIN_STEAL_TIME 50000
+
+#define SHINFO_RACE_TIMEOUT 2 /* seconds */
+
+#define __HYPERVISOR_set_timer_op 15
+#define __HYPERVISOR_sched_op 29
+#define __HYPERVISOR_event_channel_op 32
+
+#define SCHEDOP_poll 3
+
+#define EVTCHNOP_send 4
+
+#define EVTCHNSTAT_interdomain 2
+
+struct evtchn_send {
+ u32 port;
+};
+
+struct sched_poll {
+ u32 *ports;
+ unsigned int nr_ports;
+ u64 timeout;
+};
+
+struct pvclock_vcpu_time_info {
+ u32 version;
+ u32 pad0;
+ u64 tsc_timestamp;
+ u64 system_time;
+ u32 tsc_to_system_mul;
+ s8 tsc_shift;
+ u8 flags;
+ u8 pad[2];
+} __attribute__((__packed__)); /* 32 bytes */
+
+struct pvclock_wall_clock {
+ u32 version;
+ u32 sec;
+ u32 nsec;
+} __attribute__((__packed__));
+
+struct vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5]; /* Extra field for overrun check */
+};
+
+struct compat_vcpu_runstate_info {
+ uint32_t state;
+ uint64_t state_entry_time;
+ uint64_t time[5];
+} __attribute__((__packed__));;
+
+struct arch_vcpu_info {
+ unsigned long cr2;
+ unsigned long pad; /* sizeof(vcpu_info_t) == 64 */
+};
+
+struct vcpu_info {
+ uint8_t evtchn_upcall_pending;
+ uint8_t evtchn_upcall_mask;
+ unsigned long evtchn_pending_sel;
+ struct arch_vcpu_info arch;
+ struct pvclock_vcpu_time_info time;
+}; /* 64 bytes (x86) */
+
+struct shared_info {
+ struct vcpu_info vcpu_info[32];
+ unsigned long evtchn_pending[64];
+ unsigned long evtchn_mask[64];
+ struct pvclock_wall_clock wc;
+ uint32_t wc_sec_hi;
+ /* arch_shared_info here */
+};
+
+#define RUNSTATE_running 0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked 2
+#define RUNSTATE_offline 3
+
+static const char *runstate_names[] = {
+ "running",
+ "runnable",
+ "blocked",
+ "offline"
+};
+
+struct {
+ struct kvm_irq_routing info;
+ struct kvm_irq_routing_entry entries[2];
+} irq_routes;
+
+static volatile bool guest_saw_irq;
+
+static void evtchn_handler(struct ex_regs *regs)
+{
+ struct vcpu_info *vi = (void *)VCPU_INFO_VADDR;
+ vi->evtchn_upcall_pending = 0;
+ vi->evtchn_pending_sel = 0;
+ guest_saw_irq = true;
+
+ GUEST_SYNC(TEST_GUEST_SAW_IRQ);
+}
+
+static void guest_wait_for_irq(void)
+{
+ while (!guest_saw_irq)
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+ guest_saw_irq = false;
+}
+
+static void guest_code(void)
+{
+ struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+ int i;
+
+ __asm__ __volatile__(
+ "sti\n"
+ "nop\n"
+ );
+
+ /* Trigger an interrupt injection */
+ GUEST_SYNC(TEST_INJECT_VECTOR);
+
+ guest_wait_for_irq();
+
+ /* Test having the host set runstates manually */
+ GUEST_SYNC(TEST_RUNSTATE_runnable);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_blocked);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ GUEST_SYNC(TEST_RUNSTATE_offline);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+ GUEST_ASSERT(rs->state == 0);
+
+ /* Test runstate time adjust */
+ GUEST_SYNC(TEST_RUNSTATE_ADJUST);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+ /* Test runstate time set */
+ GUEST_SYNC(TEST_RUNSTATE_DATA);
+ GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+ GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+ GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+ /* sched_yield() should result in some 'runnable' time */
+ GUEST_SYNC(TEST_STEAL_TIME);
+ GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
+ /* Attempt to deliver a *masked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_MASKED);
+
+ /* Wait until we see the bit set */
+ struct shared_info *si = (void *)SHINFO_VADDR;
+ while (!si->evtchn_pending[0])
+ __asm__ __volatile__ ("rep nop" : : : "memory");
+
+ /* Now deliver an *unmasked* interrupt */
+ GUEST_SYNC(TEST_EVTCHN_UNMASKED);
+
+ guest_wait_for_irq();
+
+ /* Change memslots and deliver an interrupt */
+ GUEST_SYNC(TEST_EVTCHN_SLOWPATH);
+
+ guest_wait_for_irq();
+
+ /* Deliver event channel with KVM_XEN_HVM_EVTCHN_SEND */
+ GUEST_SYNC(TEST_EVTCHN_SEND_IOCTL);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL);
+
+ /* Our turn. Deliver event channel (to ourselves) with
+ * EVTCHNOP_send hypercall. */
+ struct evtchn_send s = { .port = 127 };
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_SLOWPATH);
+
+ /*
+ * Same again, but this time the host has messed with memslots so it
+ * should take the slow path in kvm_xen_set_evtchn().
+ */
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_EVTCHN_HCALL_EVENTFD);
+
+ /* Deliver "outbound" event channel to an eventfd which
+ * happens to be one of our own irqfds. */
+ s.port = 197;
+ xen_hypercall(__HYPERVISOR_event_channel_op, EVTCHNOP_send, &s);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_SETUP);
+
+ /* Set a timer 100ms in the future. */
+ xen_hypercall(__HYPERVISOR_set_timer_op,
+ rs->state_entry_time + 100000000, NULL);
+
+ GUEST_SYNC(TEST_TIMER_WAIT);
+
+ /* Now wait for the timer */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_RESTORE);
+
+ /* The host has 'restored' the timer. Just wait for it. */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_POLL_READY);
+
+ /* Poll for an event channel port which is already set */
+ u32 ports[1] = { EVTCHN_TIMER };
+ struct sched_poll p = {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0,
+ };
+
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_TIMEOUT);
+
+ /* Poll for an unset port and wait for the timeout. */
+ p.timeout = 100000000;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_MASKED);
+
+ /* A timer will wake the masked port we're waiting on, while we poll */
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ GUEST_SYNC(TEST_POLL_WAKE);
+
+ /* Set the vcpu_info to point at exactly the place it already is to
+ * make sure the attribute is functional. */
+ GUEST_SYNC(SET_VCPU_INFO);
+
+ /* A timer wake an *unmasked* port which should wake us with an
+ * actual interrupt, while we're polling on a different port. */
+ ports[0]++;
+ p.timeout = 0;
+ xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_TIMER_PAST);
+
+ /* Timer should have fired already */
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_SEND_RACE);
+ /* Racing host ioctls */
+
+ guest_wait_for_irq();
+
+ GUEST_SYNC(TEST_LOCKING_POLL_RACE);
+ /* Racing vmcall against host ioctl */
+
+ ports[0] = 0;
+
+ p = (struct sched_poll) {
+ .ports = ports,
+ .nr_ports = 1,
+ .timeout = 0
+ };
+
+wait_for_timer:
+ /*
+ * Poll for a timer wake event while the worker thread is mucking with
+ * the shared info. KVM XEN drops timer IRQs if the shared info is
+ * invalid when the timer expires. Arbitrarily poll 100 times before
+ * giving up and asking the VMM to re-arm the timer. 100 polls should
+ * consume enough time to beat on KVM without taking too long if the
+ * timer IRQ is dropped due to an invalid event channel.
+ */
+ for (i = 0; i < 100 && !guest_saw_irq; i++)
+ __xen_hypercall(__HYPERVISOR_sched_op, SCHEDOP_poll, &p);
+
+ /*
+ * Re-send the timer IRQ if it was (likely) dropped due to the timer
+ * expiring while the event channel was invalid.
+ */
+ if (!guest_saw_irq) {
+ GUEST_SYNC(TEST_LOCKING_POLL_TIMEOUT);
+ goto wait_for_timer;
+ }
+ guest_saw_irq = false;
+
+ GUEST_SYNC(TEST_DONE);
+}
+
+static int cmp_timespec(struct timespec *a, struct timespec *b)
+{
+ if (a->tv_sec > b->tv_sec)
+ return 1;
+ else if (a->tv_sec < b->tv_sec)
+ return -1;
+ else if (a->tv_nsec > b->tv_nsec)
+ return 1;
+ else if (a->tv_nsec < b->tv_nsec)
+ return -1;
+ else
+ return 0;
+}
+
+static struct shared_info *shinfo;
+static struct vcpu_info *vinfo;
+static struct kvm_vcpu *vcpu;
+
+static void handle_alrm(int sig)
+{
+ if (vinfo)
+ printf("evtchn_upcall_pending 0x%x\n", vinfo->evtchn_upcall_pending);
+ vcpu_dump(stdout, vcpu, 0);
+ TEST_FAIL("IRQ delivery timed out");
+}
+
+static void *juggle_shinfo_state(void *arg)
+{
+ struct kvm_vm *vm = (struct kvm_vm *)arg;
+
+ struct kvm_xen_hvm_attr cache_activate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = SHINFO_REGION_GPA / PAGE_SIZE
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_gfn = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.gfn = KVM_XEN_INVALID_GFN
+ };
+
+ struct kvm_xen_hvm_attr cache_activate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA,
+ .u.shared_info.hva = (unsigned long)shinfo
+ };
+
+ struct kvm_xen_hvm_attr cache_deactivate_hva = {
+ .type = KVM_XEN_ATTR_TYPE_SHARED_INFO,
+ .u.shared_info.hva = 0
+ };
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+
+ for (;;) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_gfn);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_gfn);
+
+ if (xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA) {
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_activate_hva);
+ pthread_testcancel();
+ __vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &cache_deactivate_hva);
+ }
+ }
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec min_ts, max_ts, vm_ts;
+ struct kvm_xen_hvm_attr evt_reset;
+ struct kvm_vm *vm;
+ pthread_t thread;
+ bool verbose;
+ int ret;
+
+ verbose = argc > 1 && (!strncmp(argv[1], "-v", 3) ||
+ !strncmp(argv[1], "--verbose", 10));
+
+ int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO);
+
+ bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+ bool do_runstate_flag = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE_UPDATE_FLAG);
+ bool do_eventfd_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_2LEVEL);
+ bool do_evtchn_tests = do_eventfd_tests && !!(xen_caps & KVM_XEN_HVM_CONFIG_EVTCHN_SEND);
+ bool has_shinfo_hva = !!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO_HVA);
+
+ clock_gettime(CLOCK_REALTIME, &min_ts);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+
+ /* Map a region for the shared_info page */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 3, 0);
+ virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 3);
+
+ shinfo = addr_gpa2hva(vm, SHINFO_VADDR);
+
+ int zero_fd = open("/dev/zero", O_RDONLY);
+ TEST_ASSERT(zero_fd != -1, "Failed to open /dev/zero");
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+
+ /* Let the kernel know that we *will* use it for sending all
+ * event channels, which lets it intercept SCHEDOP_poll */
+ if (do_evtchn_tests)
+ hvmc.flags |= KVM_XEN_HVM_CONFIG_EVTCHN_SEND;
+
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ struct kvm_xen_hvm_attr lm = {
+ .type = KVM_XEN_ATTR_TYPE_LONG_MODE,
+ .u.long_mode = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ if (do_runstate_flag) {
+ struct kvm_xen_hvm_attr ruf = {
+ .type = KVM_XEN_ATTR_TYPE_RUNSTATE_UPDATE_FLAG,
+ .u.runstate_update_flag = 1,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ruf);
+
+ ruf.u.runstate_update_flag = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_GET_ATTR, &ruf);
+ TEST_ASSERT(ruf.u.runstate_update_flag == 1,
+ "Failed to read back RUNSTATE_UPDATE_FLAG attr");
+ }
+
+ struct kvm_xen_hvm_attr ha = {};
+
+ if (has_shinfo_hva) {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO_HVA;
+ ha.u.shared_info.hva = (unsigned long)shinfo;
+ } else {
+ ha.type = KVM_XEN_ATTR_TYPE_SHARED_INFO;
+ ha.u.shared_info.gfn = SHINFO_ADDR / PAGE_SIZE;
+ }
+
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &ha);
+
+ /*
+ * Test what happens when the HVA of the shinfo page is remapped after
+ * the kernel has a reference to it. But make sure we copy the clock
+ * info over since that's only set at setup time, and we test it later.
+ */
+ struct pvclock_wall_clock wc_copy = shinfo->wc;
+ void *m = mmap(shinfo, PAGE_SIZE, PROT_READ|PROT_WRITE, MAP_FIXED|MAP_PRIVATE, zero_fd, 0);
+ TEST_ASSERT(m == shinfo, "Failed to map /dev/zero over shared info");
+ shinfo->wc = wc_copy;
+
+ struct kvm_xen_vcpu_attr vi = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO,
+ .u.gpa = VCPU_INFO_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vi);
+
+ struct kvm_xen_vcpu_attr pvclock = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO,
+ .u.gpa = PVTIME_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &pvclock);
+
+ struct kvm_xen_hvm_attr vec = {
+ .type = KVM_XEN_ATTR_TYPE_UPCALL_VECTOR,
+ .u.vector = EVTCHN_VECTOR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &vec);
+
+ vm_init_descriptor_tables(vm);
+ vcpu_init_descriptor_tables(vcpu);
+ vm_install_exception_handler(vm, EVTCHN_VECTOR, evtchn_handler);
+
+ if (do_runstate_tests) {
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = RUNSTATE_ADDR,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+ }
+
+ int irq_fd[2] = { -1, -1 };
+
+ if (do_eventfd_tests) {
+ irq_fd[0] = eventfd(0, 0);
+ irq_fd[1] = eventfd(0, 0);
+
+ /* Unexpected, but not a KVM failure */
+ if (irq_fd[0] == -1 || irq_fd[1] == -1)
+ do_evtchn_tests = do_eventfd_tests = false;
+ }
+
+ if (do_eventfd_tests) {
+ irq_routes.info.nr = 2;
+
+ irq_routes.entries[0].gsi = 32;
+ irq_routes.entries[0].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[0].u.xen_evtchn.port = EVTCHN_TEST1;
+ irq_routes.entries[0].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[0].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ irq_routes.entries[1].gsi = 33;
+ irq_routes.entries[1].type = KVM_IRQ_ROUTING_XEN_EVTCHN;
+ irq_routes.entries[1].u.xen_evtchn.port = EVTCHN_TEST2;
+ irq_routes.entries[1].u.xen_evtchn.vcpu = vcpu->id;
+ irq_routes.entries[1].u.xen_evtchn.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_SET_GSI_ROUTING, &irq_routes.info);
+
+ struct kvm_irqfd ifd = { };
+
+ ifd.fd = irq_fd[0];
+ ifd.gsi = 32;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ ifd.fd = irq_fd[1];
+ ifd.gsi = 33;
+ vm_ioctl(vm, KVM_IRQFD, &ifd);
+
+ struct sigaction sa = { };
+ sa.sa_handler = handle_alrm;
+ sigaction(SIGALRM, &sa, NULL);
+ }
+
+ struct kvm_xen_vcpu_attr tmr = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_TIMER,
+ .u.timer.port = EVTCHN_TIMER,
+ .u.timer.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ .u.timer.expires_ns = 0
+ };
+
+ if (do_evtchn_tests) {
+ struct kvm_xen_hvm_attr inj = {
+ .type = KVM_XEN_ATTR_TYPE_EVTCHN,
+ .u.evtchn.send_port = 127,
+ .u.evtchn.type = EVTCHNSTAT_interdomain,
+ .u.evtchn.flags = 0,
+ .u.evtchn.deliver.port.port = EVTCHN_TEST1,
+ .u.evtchn.deliver.port.vcpu = vcpu->id + 1,
+ .u.evtchn.deliver.port.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ /* Test migration to a different vCPU */
+ inj.u.evtchn.flags = KVM_XEN_EVTCHN_UPDATE;
+ inj.u.evtchn.deliver.port.vcpu = vcpu->id;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ inj.u.evtchn.send_port = 197;
+ inj.u.evtchn.deliver.eventfd.port = 0;
+ inj.u.evtchn.deliver.eventfd.fd = irq_fd[1];
+ inj.u.evtchn.flags = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &inj);
+
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ }
+ vinfo = addr_gpa2hva(vm, VCPU_INFO_VADDR);
+ vinfo->evtchn_upcall_pending = 0;
+
+ struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);
+ rs->state = 0x5a;
+
+ bool evtchn_irq_expected = false;
+
+ for (;;) {
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC: {
+ struct kvm_xen_vcpu_attr rst;
+ long rundelay;
+
+ if (do_runstate_tests)
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+
+ switch (uc.args[1]) {
+ case TEST_INJECT_VECTOR:
+ if (verbose)
+ printf("Delivering evtchn upcall\n");
+ evtchn_irq_expected = true;
+ vinfo->evtchn_upcall_pending = 1;
+ break;
+
+ case TEST_RUNSTATE_runnable...TEST_RUNSTATE_offline:
+ TEST_ASSERT(!evtchn_irq_expected, "Event channel IRQ not seen");
+ if (!do_runstate_tests)
+ goto done;
+ if (verbose)
+ printf("Testing runstate %s\n", runstate_names[uc.args[1]]);
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+ rst.u.runstate.state = uc.args[1] + RUNSTATE_runnable -
+ TEST_RUNSTATE_runnable;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_ADJUST:
+ if (verbose)
+ printf("Testing RUNSTATE_ADJUST\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = (uint64_t)-1;
+ rst.u.runstate.time_blocked =
+ 0x5a - rs->time[RUNSTATE_blocked];
+ rst.u.runstate.time_offline =
+ 0x6b6b - rs->time[RUNSTATE_offline];
+ rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+ rst.u.runstate.time_offline;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_RUNSTATE_DATA:
+ if (verbose)
+ printf("Testing RUNSTATE_DATA\n");
+ rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+ memset(&rst.u, 0, sizeof(rst.u));
+ rst.u.runstate.state = RUNSTATE_running;
+ rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+ rst.u.runstate.time_blocked = 0x6b6b;
+ rst.u.runstate.time_offline = 0x5a;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &rst);
+ break;
+
+ case TEST_STEAL_TIME:
+ if (verbose)
+ printf("Testing steal time\n");
+ /* Yield until scheduler delay exceeds target */
+ rundelay = get_run_delay() + MIN_STEAL_TIME;
+ do {
+ sched_yield();
+ } while (get_run_delay() < rundelay);
+ break;
+
+ case TEST_EVTCHN_MASKED:
+ if (!do_eventfd_tests)
+ goto done;
+ if (verbose)
+ printf("Testing masked event channel\n");
+ shinfo->evtchn_mask[0] = 1UL << EVTCHN_TEST1;
+ eventfd_write(irq_fd[0], 1UL);
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_UNMASKED:
+ if (verbose)
+ printf("Testing unmasked event channel\n");
+ /* Unmask that, but deliver the other one */
+ shinfo->evtchn_pending[0] = 0;
+ shinfo->evtchn_mask[0] = 0;
+ eventfd_write(irq_fd[1], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+ if (verbose)
+ printf("Testing event channel after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA, DUMMY_REGION_SLOT, 1, 0);
+ eventfd_write(irq_fd[0], 1UL);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_SEND_IOCTL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ if (!do_evtchn_tests)
+ goto done;
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing injection with KVM_XEN_HVM_EVTCHN_SEND\n");
+
+ struct kvm_irq_routing_xen_evtchn e;
+ e.port = EVTCHN_TEST2;
+ e.vcpu = vcpu->id;
+ e.priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL;
+
+ vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &e);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_SLOWPATH:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send direct to evtchn after memslot change\n");
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ DUMMY_REGION_GPA_2, DUMMY_REGION_SLOT_2, 1, 0);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_EVTCHN_HCALL_EVENTFD:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing guest EVTCHNOP_send to eventfd\n");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_SETUP:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[1] = 0;
+
+ if (verbose)
+ printf("Testing guest oneshot timer\n");
+ break;
+
+ case TEST_TIMER_WAIT:
+ memset(&tmr, 0, sizeof(tmr));
+ tmr.type = KVM_XEN_VCPU_ATTR_TYPE_TIMER;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.port == EVTCHN_TIMER,
+ "Timer port not returned");
+ TEST_ASSERT(tmr.u.timer.priority == KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL,
+ "Timer priority not returned");
+ TEST_ASSERT(tmr.u.timer.expires_ns > rs->state_entry_time,
+ "Timer expiry not returned");
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_TIMER_RESTORE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ shinfo->evtchn_pending[0] = 0;
+
+ if (verbose)
+ printf("Testing restored oneshot timer\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ evtchn_irq_expected = true;
+ alarm(1);
+ break;
+
+ case TEST_POLL_READY:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing SCHEDOP_poll with already pending event\n");
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 1UL << EVTCHN_TIMER;
+ alarm(1);
+ break;
+
+ case TEST_POLL_TIMEOUT:
+ if (verbose)
+ printf("Testing SCHEDOP_poll timeout\n");
+ shinfo->evtchn_pending[0] = 0;
+ alarm(1);
+ break;
+
+ case TEST_POLL_MASKED:
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on masked event\n");
+
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_POLL_WAKE:
+ shinfo->evtchn_pending[0] = shinfo->evtchn_mask[0] = 0;
+ if (verbose)
+ printf("Testing SCHEDOP_poll wake on unmasked event\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time + 100000000;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+
+ /* Read it back and check the pending time is reported correctly */
+ tmr.u.timer.expires_ns = 0;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(tmr.u.timer.expires_ns == rs->state_entry_time + 100000000,
+ "Timer not reported pending");
+ alarm(1);
+ break;
+
+ case SET_VCPU_INFO:
+ if (has_shinfo_hva) {
+ struct kvm_xen_vcpu_attr vih = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO_HVA,
+ .u.hva = (unsigned long)vinfo
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &vih);
+ }
+ break;
+
+ case TEST_TIMER_PAST:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ /* Read timer and check it is no longer pending */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+ TEST_ASSERT(!tmr.u.timer.expires_ns, "Timer still reported pending");
+
+ shinfo->evtchn_pending[0] = 0;
+ if (verbose)
+ printf("Testing timer in the past\n");
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time - 100000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ alarm(1);
+ break;
+
+ case TEST_LOCKING_SEND_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+ alarm(0);
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (KVM_XEN_HVM_EVTCHN_SEND)\n");
+
+ ret = pthread_create(&thread, NULL, &juggle_shinfo_state, (void *)vm);
+ TEST_ASSERT(ret == 0, "pthread_create() failed: %s", strerror(ret));
+
+ struct kvm_irq_routing_xen_evtchn uxe = {
+ .port = 1,
+ .vcpu = vcpu->id,
+ .priority = KVM_IRQ_ROUTING_XEN_EVTCHN_PRIO_2LEVEL
+ };
+
+ evtchn_irq_expected = true;
+ for (time_t t = time(NULL) + SHINFO_RACE_TIMEOUT; time(NULL) < t;)
+ __vm_ioctl(vm, KVM_XEN_HVM_EVTCHN_SEND, &uxe);
+ break;
+
+ case TEST_LOCKING_POLL_RACE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ if (verbose)
+ printf("Testing shinfo lock corruption (SCHEDOP_poll)\n");
+
+ shinfo->evtchn_pending[0] = 1;
+
+ evtchn_irq_expected = true;
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+
+ case TEST_LOCKING_POLL_TIMEOUT:
+ /*
+ * Optional and possibly repeated sync point.
+ * Injecting the timer IRQ may fail if the
+ * shinfo is invalid when the timer expires.
+ * If the timer has expired but the IRQ hasn't
+ * been delivered, rearm the timer and retry.
+ */
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &tmr);
+
+ /* Resume the guest if the timer is still pending. */
+ if (tmr.u.timer.expires_ns)
+ break;
+
+ /* All done if the IRQ was delivered. */
+ if (!evtchn_irq_expected)
+ break;
+
+ tmr.u.timer.expires_ns = rs->state_entry_time +
+ SHINFO_RACE_TIMEOUT * 1000000000ULL;
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &tmr);
+ break;
+ case TEST_DONE:
+ TEST_ASSERT(!evtchn_irq_expected,
+ "Expected event channel IRQ but it didn't happen");
+
+ ret = pthread_cancel(thread);
+ TEST_ASSERT(ret == 0, "pthread_cancel() failed: %s", strerror(ret));
+
+ ret = pthread_join(thread, 0);
+ TEST_ASSERT(ret == 0, "pthread_join() failed: %s", strerror(ret));
+ goto done;
+
+ case TEST_GUEST_SAW_IRQ:
+ TEST_ASSERT(evtchn_irq_expected, "Unexpected event channel IRQ");
+ evtchn_irq_expected = false;
+ break;
+ }
+ break;
+ }
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+
+ done:
+ evt_reset.type = KVM_XEN_ATTR_TYPE_EVTCHN;
+ evt_reset.u.evtchn.flags = KVM_XEN_EVTCHN_RESET;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &evt_reset);
+
+ alarm(0);
+ clock_gettime(CLOCK_REALTIME, &max_ts);
+
+ /*
+ * Just a *really* basic check that things are being put in the
+ * right place. The actual calculations are much the same for
+ * Xen as they are for the KVM variants, so no need to check.
+ */
+ struct pvclock_wall_clock *wc;
+ struct pvclock_vcpu_time_info *ti, *ti2;
+
+ wc = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0xc00);
+ ti = addr_gpa2hva(vm, SHINFO_REGION_GPA + 0x40 + 0x20);
+ ti2 = addr_gpa2hva(vm, PVTIME_ADDR);
+
+ if (verbose) {
+ printf("Wall clock (v %d) %d.%09d\n", wc->version, wc->sec, wc->nsec);
+ printf("Time info 1: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti->version, ti->tsc_timestamp, ti->system_time, ti->tsc_to_system_mul,
+ ti->tsc_shift, ti->flags);
+ printf("Time info 2: v %u tsc %" PRIu64 " time %" PRIu64 " mul %u shift %u flags %x\n",
+ ti2->version, ti2->tsc_timestamp, ti2->system_time, ti2->tsc_to_system_mul,
+ ti2->tsc_shift, ti2->flags);
+ }
+
+ vm_ts.tv_sec = wc->sec;
+ vm_ts.tv_nsec = wc->nsec;
+ TEST_ASSERT(wc->version && !(wc->version & 1),
+ "Bad wallclock version %x", wc->version);
+ TEST_ASSERT(cmp_timespec(&min_ts, &vm_ts) <= 0, "VM time too old");
+ TEST_ASSERT(cmp_timespec(&max_ts, &vm_ts) >= 0, "VM time too new");
+
+ TEST_ASSERT(ti->version && !(ti->version & 1),
+ "Bad time_info version %x", ti->version);
+ TEST_ASSERT(ti2->version && !(ti2->version & 1),
+ "Bad time_info version %x", ti->version);
+
+ if (do_runstate_tests) {
+ /*
+ * Fetch runstate and check sanity. Strictly speaking in the
+ * general case we might not expect the numbers to be identical
+ * but in this case we know we aren't running the vCPU any more.
+ */
+ struct kvm_xen_vcpu_attr rst = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+ if (verbose) {
+ printf("Runstate: %s(%d), entry %" PRIu64 " ns\n",
+ rs->state <= RUNSTATE_offline ? runstate_names[rs->state] : "unknown",
+ rs->state, rs->state_entry_time);
+ for (int i = RUNSTATE_running; i <= RUNSTATE_offline; i++) {
+ printf("State %s: %" PRIu64 " ns\n",
+ runstate_names[i], rs->time[i]);
+ }
+ }
+
+ /*
+ * Exercise runstate info at all points across the page boundary, in
+ * 32-bit and 64-bit mode. In particular, test the case where it is
+ * configured in 32-bit mode and then switched to 64-bit mode while
+ * active, which takes it onto the second page.
+ */
+ unsigned long runstate_addr;
+ struct compat_vcpu_runstate_info *crs;
+ for (runstate_addr = SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE - sizeof(*rs) - 4;
+ runstate_addr < SHINFO_REGION_GPA + PAGE_SIZE + PAGE_SIZE + 4; runstate_addr++) {
+
+ rs = addr_gpa2hva(vm, runstate_addr);
+ crs = (void *)rs;
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Set to compatibility mode */
+ lm.u.long_mode = 0;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ /* Set runstate to new address (kernel will write it) */
+ struct kvm_xen_vcpu_attr st = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+ .u.gpa = runstate_addr,
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &st);
+
+ if (verbose)
+ printf("Compatibility runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(crs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(crs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(crs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+ TEST_ASSERT(crs->state_entry_time == crs->time[0] +
+ crs->time[1] + crs->time[2] + crs->time[3],
+ "runstate times don't add up");
+
+
+ /* Now switch to 64-bit mode */
+ lm.u.long_mode = 1;
+ vm_ioctl(vm, KVM_XEN_HVM_SET_ATTR, &lm);
+
+ memset(rs, 0xa5, sizeof(*rs));
+
+ /* Don't change the address, just trigger a write */
+ struct kvm_xen_vcpu_attr adj = {
+ .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST,
+ .u.runstate.state = (uint64_t)-1
+ };
+ vcpu_ioctl(vcpu, KVM_XEN_VCPU_SET_ATTR, &adj);
+
+ if (verbose)
+ printf("64-bit runstate at %08lx\n", runstate_addr);
+
+ TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+ TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+ "State entry time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+ "Running time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+ "Runnable time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+ "Blocked time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+ "Offline time mismatch");
+ TEST_ASSERT(rs->time[RUNSTATE_offline + 1] == 0xa5a5a5a5a5a5a5a5ULL,
+ "Structure overrun");
+
+ TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+ rs->time[1] + rs->time[2] + rs->time[3],
+ "runstate times don't add up");
+ }
+ }
+
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
new file mode 100644
index 000000000000..e149d0574961
--- /dev/null
+++ b/tools/testing/selftests/kvm/x86_64/xen_vmcall_test.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * xen_vmcall_test
+ *
+ * Copyright © 2020 Amazon.com, Inc. or its affiliates.
+ *
+ * Userspace hypercall testing
+ */
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define HCALL_REGION_GPA 0xc0000000ULL
+#define HCALL_REGION_SLOT 10
+
+#define INPUTVALUE 17
+#define ARGVALUE(x) (0xdeadbeef5a5a0000UL + x)
+#define RETVALUE 0xcafef00dfbfbffffUL
+
+#define XEN_HYPERCALL_MSR 0x40000200
+#define HV_GUEST_OS_ID_MSR 0x40000000
+#define HV_HYPERCALL_MSR 0x40000001
+
+#define HVCALL_SIGNAL_EVENT 0x005d
+#define HV_STATUS_INVALID_ALIGNMENT 4
+
+static void guest_code(void)
+{
+ unsigned long rax = INPUTVALUE;
+ unsigned long rdi = ARGVALUE(1);
+ unsigned long rsi = ARGVALUE(2);
+ unsigned long rdx = ARGVALUE(3);
+ unsigned long rcx;
+ register unsigned long r10 __asm__("r10") = ARGVALUE(4);
+ register unsigned long r8 __asm__("r8") = ARGVALUE(5);
+ register unsigned long r9 __asm__("r9") = ARGVALUE(6);
+
+ /* First a direct invocation of 'vmcall' */
+ __asm__ __volatile__("vmcall" :
+ "=a"(rax) :
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Fill in the Xen hypercall page */
+ __asm__ __volatile__("wrmsr" : : "c" (XEN_HYPERCALL_MSR),
+ "a" (HCALL_REGION_GPA & 0xffffffff),
+ "d" (HCALL_REGION_GPA >> 32));
+
+ /* Set Hyper-V Guest OS ID */
+ __asm__ __volatile__("wrmsr" : : "c" (HV_GUEST_OS_ID_MSR),
+ "a" (0x5a), "d" (0));
+
+ /* Hyper-V hypercall page */
+ u64 msrval = HCALL_REGION_GPA + PAGE_SIZE + 1;
+ __asm__ __volatile__("wrmsr" : : "c" (HV_HYPERCALL_MSR),
+ "a" (msrval & 0xffffffff),
+ "d" (msrval >> 32));
+
+ /* Invoke a Xen hypercall */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + INPUTVALUE * 32),
+ "a"(rax), "D"(rdi), "S"(rsi), "d"(rdx),
+ "r"(r10), "r"(r8), "r"(r9));
+ GUEST_ASSERT(rax == RETVALUE);
+
+ /* Invoke a Hyper-V hypercall */
+ rax = 0;
+ rcx = HVCALL_SIGNAL_EVENT; /* code */
+ rdx = 0x5a5a5a5a; /* ingpa (badly aligned) */
+ __asm__ __volatile__("call *%1" : "=a"(rax) :
+ "r"(HCALL_REGION_GPA + PAGE_SIZE),
+ "a"(rax), "c"(rcx), "d"(rdx),
+ "r"(r8));
+ GUEST_ASSERT(rax == HV_STATUS_INVALID_ALIGNMENT);
+
+ GUEST_DONE();
+}
+
+int main(int argc, char *argv[])
+{
+ unsigned int xen_caps;
+ struct kvm_vcpu *vcpu;
+ struct kvm_vm *vm;
+
+ xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+ TEST_REQUIRE(xen_caps & KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL);
+
+ vm = vm_create_with_one_vcpu(&vcpu, guest_code);
+ vcpu_set_hv_cpuid(vcpu);
+
+ struct kvm_xen_hvm_config hvmc = {
+ .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
+ .msr = XEN_HYPERCALL_MSR,
+ };
+ vm_ioctl(vm, KVM_XEN_HVM_CONFIG, &hvmc);
+
+ /* Map a region for the hypercall pages */
+ vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
+ HCALL_REGION_GPA, HCALL_REGION_SLOT, 2, 0);
+ virt_map(vm, HCALL_REGION_GPA, HCALL_REGION_GPA, 2);
+
+ for (;;) {
+ volatile struct kvm_run *run = vcpu->run;
+ struct ucall uc;
+
+ vcpu_run(vcpu);
+
+ if (run->exit_reason == KVM_EXIT_XEN) {
+ TEST_ASSERT_EQ(run->xen.type, KVM_EXIT_XEN_HCALL);
+ TEST_ASSERT_EQ(run->xen.u.hcall.cpl, 0);
+ TEST_ASSERT_EQ(run->xen.u.hcall.longmode, 1);
+ TEST_ASSERT_EQ(run->xen.u.hcall.input, INPUTVALUE);
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[0], ARGVALUE(1));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[1], ARGVALUE(2));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[2], ARGVALUE(3));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[3], ARGVALUE(4));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[4], ARGVALUE(5));
+ TEST_ASSERT_EQ(run->xen.u.hcall.params[5], ARGVALUE(6));
+ run->xen.u.hcall.result = RETVALUE;
+ continue;
+ }
+
+ TEST_ASSERT_KVM_EXIT_REASON(vcpu, KVM_EXIT_IO);
+
+ switch (get_ucall(vcpu, &uc)) {
+ case UCALL_ABORT:
+ REPORT_GUEST_ASSERT(uc);
+ /* NOT REACHED */
+ case UCALL_SYNC:
+ break;
+ case UCALL_DONE:
+ goto done;
+ default:
+ TEST_FAIL("Unknown ucall 0x%lx.", uc.cmd);
+ }
+ }
+done:
+ kvm_vm_free(vm);
+ return 0;
+}
diff --git a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
index 3529376747c2..167c97abff1b 100644
--- a/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
+++ b/tools/testing/selftests/kvm/x86_64/xss_msr_test.c
@@ -12,64 +12,44 @@
#include "kvm_util.h"
#include "vmx.h"
-#define VCPU_ID 1
#define MSR_BITS 64
-#define X86_FEATURE_XSAVES (1<<3)
-
-bool is_supported_msr(u32 msr_index)
-{
- struct kvm_msr_list *list;
- bool found = false;
- int i;
-
- list = kvm_get_msr_index_list();
- for (i = 0; i < list->nmsrs; ++i) {
- if (list->indices[i] == msr_index) {
- found = true;
- break;
- }
- }
-
- free(list);
- return found;
-}
-
int main(int argc, char *argv[])
{
- struct kvm_cpuid_entry2 *entry;
- bool xss_supported = false;
+ bool xss_in_msr_list;
struct kvm_vm *vm;
+ struct kvm_vcpu *vcpu;
uint64_t xss_val;
int i, r;
/* Create VM */
- vm = vm_create_default(VCPU_ID, 0, 0);
+ vm = vm_create_with_one_vcpu(&vcpu, NULL);
- if (kvm_get_cpuid_max_basic() >= 0xd) {
- entry = kvm_get_supported_cpuid_index(0xd, 1);
- xss_supported = entry && !!(entry->eax & X86_FEATURE_XSAVES);
- }
- if (!xss_supported) {
- print_skip("IA32_XSS is not supported by the vCPU");
- exit(KSFT_SKIP);
- }
+ TEST_REQUIRE(kvm_cpu_has(X86_FEATURE_XSAVES));
- xss_val = vcpu_get_msr(vm, VCPU_ID, MSR_IA32_XSS);
+ xss_val = vcpu_get_msr(vcpu, MSR_IA32_XSS);
TEST_ASSERT(xss_val == 0,
- "MSR_IA32_XSS should be initialized to zero\n");
+ "MSR_IA32_XSS should be initialized to zero");
+
+ vcpu_set_msr(vcpu, MSR_IA32_XSS, xss_val);
- vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, xss_val);
/*
* At present, KVM only supports a guest IA32_XSS value of 0. Verify
* that trying to set the guest IA32_XSS to an unsupported value fails.
* Also, in the future when a non-zero value succeeds check that
- * IA32_XSS is in the KVM_GET_MSR_INDEX_LIST.
+ * IA32_XSS is in the list of MSRs to save/restore.
*/
+ xss_in_msr_list = kvm_msr_is_in_save_restore_list(MSR_IA32_XSS);
for (i = 0; i < MSR_BITS; ++i) {
- r = _vcpu_set_msr(vm, VCPU_ID, MSR_IA32_XSS, 1ull << i);
- TEST_ASSERT(r == 0 || is_supported_msr(MSR_IA32_XSS),
- "IA32_XSS was able to be set, but was not found in KVM_GET_MSR_INDEX_LIST.\n");
+ r = _vcpu_set_msr(vcpu, MSR_IA32_XSS, 1ull << i);
+
+ /*
+ * Setting a list of MSRs returns the entry that "faulted", or
+ * the last entry +1 if all MSRs were successfully written.
+ */
+ TEST_ASSERT(!r || r == 1, KVM_IOCTL_ERROR(KVM_SET_MSRS, r));
+ TEST_ASSERT(r != 1 || xss_in_msr_list,
+ "IA32_XSS was able to be set, but was not in save/restore list");
}
kvm_vm_free(vm);
diff --git a/tools/testing/selftests/landlock/.gitignore b/tools/testing/selftests/landlock/.gitignore
new file mode 100644
index 000000000000..470203a7cd73
--- /dev/null
+++ b/tools/testing/selftests/landlock/.gitignore
@@ -0,0 +1,2 @@
+/*_test
+/true
diff --git a/tools/testing/selftests/landlock/Makefile b/tools/testing/selftests/landlock/Makefile
new file mode 100644
index 000000000000..348e2dbdb4e0
--- /dev/null
+++ b/tools/testing/selftests/landlock/Makefile
@@ -0,0 +1,23 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# First run: make -C ../../../.. headers_install
+
+CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
+
+LOCAL_HDRS += common.h
+
+src_test := $(wildcard *_test.c)
+
+TEST_GEN_PROGS := $(src_test:.c=)
+
+TEST_GEN_PROGS_EXTENDED := true
+
+# Short targets:
+$(TEST_GEN_PROGS): LDLIBS += -lcap
+$(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
+
+include ../lib.mk
+
+# Targets with $(OUTPUT)/ prefix:
+$(TEST_GEN_PROGS): LDLIBS += -lcap
+$(TEST_GEN_PROGS_EXTENDED): LDFLAGS += -static
diff --git a/tools/testing/selftests/landlock/base_test.c b/tools/testing/selftests/landlock/base_test.c
new file mode 100644
index 000000000000..a6f89aaea77d
--- /dev/null
+++ b/tools/testing/selftests/landlock/base_test.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Common user space base
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+
+#include "common.h"
+
+#ifndef O_PATH
+#define O_PATH 010000000
+#endif
+
+TEST(inconsistent_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ char *const buf = malloc(page_size + 1);
+ struct landlock_ruleset_attr *const ruleset_attr = (void *)buf;
+
+ ASSERT_NE(NULL, buf);
+
+ /* Checks copy_from_user(). */
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 0, 0));
+ /* The size if less than sizeof(struct landlock_attr_enforce). */
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 7, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 1, 0));
+ /* The size if less than sizeof(struct landlock_attr_enforce). */
+ ASSERT_EQ(EFAULT, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ NULL, sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ /* Checks minimal valid attribute size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, 8, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(
+ ruleset_attr,
+ sizeof(struct landlock_ruleset_attr), 0));
+ ASSERT_EQ(ENOMSG, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
+ ASSERT_EQ(ENOMSG, errno);
+
+ /* Checks non-zero value. */
+ buf[page_size - 2] = '.';
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(ruleset_attr, page_size + 1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ free(buf);
+}
+
+TEST(abi_version)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ ASSERT_EQ(4, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0,
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ LANDLOCK_CREATE_RULESET_VERSION));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0,
+ LANDLOCK_CREATE_RULESET_VERSION |
+ 1 << 31));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+/* Tests ordering of syscall argument checks. */
+TEST(create_ruleset_checks_ordering)
+{
+ const int last_flag = LANDLOCK_CREATE_RULESET_VERSION;
+ const int invalid_flag = last_flag << 1;
+ int ruleset_fd;
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+
+ /* Checks priority for invalid flags. */
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, 0, invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1, landlock_create_ruleset(NULL, sizeof(ruleset_attr),
+ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(-1,
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr),
+ invalid_flag));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks too big ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, -1, 0));
+ ASSERT_EQ(E2BIG, errno);
+
+ /* Checks too small ruleset_attr size. */
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 0, 0));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr, 1, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks valid call. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests ordering of syscall argument checks. */
+TEST(add_rule_checks_ordering)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_add_rule(-1, 0, NULL, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks invalid rule type. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, 0, NULL, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid rule attr. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ NULL, 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Checks invalid path_beneath.parent_fd. */
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks valid call. */
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests ordering of syscall argument and permission checks. */
+TEST(restrict_self_checks_ordering)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_EXECUTE,
+ .parent_fd = -1,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ /* Checks unprivileged enforcement without no_new_privs. */
+ drop_caps(_metadata);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(EPERM, errno);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+
+ /* Checks invalid flags. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, -1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Checks invalid ruleset FD. */
+ ASSERT_EQ(-1, landlock_restrict_self(-1, 0));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Checks valid call. */
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST(ruleset_fd_io)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ int ruleset_fd;
+ char buf;
+
+ drop_caps(_metadata);
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(-1, write(ruleset_fd, ".", 1));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(-1, read(ruleset_fd, &buf, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* Tests enforcement of a ruleset FD transferred through a UNIX socket. */
+TEST(ruleset_fd_transfer)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ struct landlock_path_beneath_attr path_beneath_attr = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ };
+ int ruleset_fd_tx, dir_fd;
+ int socket_fds[2];
+ pid_t child;
+ int status;
+
+ drop_caps(_metadata);
+
+ /* Creates a test ruleset with a simple rule. */
+ ruleset_fd_tx =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd_tx);
+ path_beneath_attr.parent_fd =
+ open("/tmp", O_PATH | O_NOFOLLOW | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_attr.parent_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd_tx, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_attr, 0));
+ ASSERT_EQ(0, close(path_beneath_attr.parent_fd));
+
+ /* Sends the ruleset FD over a socketpair and then close it. */
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+ socket_fds));
+ ASSERT_EQ(0, send_fd(socket_fds[0], ruleset_fd_tx));
+ ASSERT_EQ(0, close(socket_fds[0]));
+ ASSERT_EQ(0, close(ruleset_fd_tx));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ const int ruleset_fd_rx = recv_fd(socket_fds[1]);
+
+ ASSERT_LE(0, ruleset_fd_rx);
+ ASSERT_EQ(0, close(socket_fds[1]));
+
+ /* Enforces the received ruleset on the child. */
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd_rx, 0));
+ ASSERT_EQ(0, close(ruleset_fd_rx));
+
+ /* Checks that the ruleset enforcement. */
+ ASSERT_EQ(-1, open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC));
+ ASSERT_EQ(EACCES, errno);
+ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(socket_fds[1]));
+
+ /* Checks that the parent is unrestricted. */
+ dir_fd = open("/", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+ dir_fd = open("/tmp", O_RDONLY | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, dir_fd);
+ ASSERT_EQ(0, close(dir_fd));
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/common.h b/tools/testing/selftests/landlock/common.h
new file mode 100644
index 000000000000..7e2b431b9f90
--- /dev/null
+++ b/tools/testing/selftests/landlock/common.h
@@ -0,0 +1,228 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Landlock test helpers
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ * Copyright © 2021 Microsoft Corporation
+ */
+
+#include <errno.h>
+#include <linux/landlock.h>
+#include <linux/securebits.h>
+#include <sys/capability.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef __maybe_unused
+#define __maybe_unused __attribute__((__unused__))
+#endif
+
+/* TEST_F_FORK() should not be used for new tests. */
+#define TEST_F_FORK(fixture_name, test_name) TEST_F(fixture_name, test_name)
+
+#ifndef landlock_create_ruleset
+static inline int
+landlock_create_ruleset(const struct landlock_ruleset_attr *const attr,
+ const size_t size, const __u32 flags)
+{
+ return syscall(__NR_landlock_create_ruleset, attr, size, flags);
+}
+#endif
+
+#ifndef landlock_add_rule
+static inline int landlock_add_rule(const int ruleset_fd,
+ const enum landlock_rule_type rule_type,
+ const void *const rule_attr,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_add_rule, ruleset_fd, rule_type, rule_attr,
+ flags);
+}
+#endif
+
+#ifndef landlock_restrict_self
+static inline int landlock_restrict_self(const int ruleset_fd,
+ const __u32 flags)
+{
+ return syscall(__NR_landlock_restrict_self, ruleset_fd, flags);
+}
+#endif
+
+static void _init_caps(struct __test_metadata *const _metadata, bool drop_all)
+{
+ cap_t cap_p;
+ /* Only these three capabilities are useful for the tests. */
+ const cap_value_t caps[] = {
+ /* clang-format off */
+ CAP_DAC_OVERRIDE,
+ CAP_MKNOD,
+ CAP_NET_ADMIN,
+ CAP_NET_BIND_SERVICE,
+ CAP_SYS_ADMIN,
+ CAP_SYS_CHROOT,
+ /* clang-format on */
+ };
+ const unsigned int noroot = SECBIT_NOROOT | SECBIT_NOROOT_LOCKED;
+
+ if ((cap_get_secbits() & noroot) != noroot)
+ EXPECT_EQ(0, cap_set_secbits(noroot));
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_clear(cap_p));
+ if (!drop_all) {
+ EXPECT_NE(-1, cap_set_flag(cap_p, CAP_PERMITTED,
+ ARRAY_SIZE(caps), caps, CAP_SET));
+ }
+
+ /* Automatically resets ambient capabilities. */
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
+ TH_LOG("Failed to set capabilities: %s", strerror(errno));
+ }
+ EXPECT_NE(-1, cap_free(cap_p));
+
+ /* Quickly checks that ambient capabilities are cleared. */
+ EXPECT_NE(-1, cap_get_ambient(caps[0]));
+}
+
+/* We cannot put such helpers in a library because of kselftest_harness.h . */
+static void __maybe_unused disable_caps(struct __test_metadata *const _metadata)
+{
+ _init_caps(_metadata, false);
+}
+
+static void __maybe_unused drop_caps(struct __test_metadata *const _metadata)
+{
+ _init_caps(_metadata, true);
+}
+
+static void _change_cap(struct __test_metadata *const _metadata,
+ const cap_flag_t flag, const cap_value_t cap,
+ const cap_flag_value_t value)
+{
+ cap_t cap_p;
+
+ cap_p = cap_get_proc();
+ EXPECT_NE(NULL, cap_p);
+ EXPECT_NE(-1, cap_set_flag(cap_p, flag, 1, &cap, value));
+ EXPECT_NE(-1, cap_set_proc(cap_p))
+ {
+ TH_LOG("Failed to set capability %d: %s", cap, strerror(errno));
+ }
+ EXPECT_NE(-1, cap_free(cap_p));
+}
+
+static void __maybe_unused set_cap(struct __test_metadata *const _metadata,
+ const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_SET);
+}
+
+static void __maybe_unused clear_cap(struct __test_metadata *const _metadata,
+ const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_EFFECTIVE, cap, CAP_CLEAR);
+}
+
+static void __maybe_unused
+set_ambient_cap(struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_SET);
+
+ EXPECT_NE(-1, cap_set_ambient(cap, CAP_SET))
+ {
+ TH_LOG("Failed to set ambient capability %d: %s", cap,
+ strerror(errno));
+ }
+}
+
+static void __maybe_unused clear_ambient_cap(
+ struct __test_metadata *const _metadata, const cap_value_t cap)
+{
+ EXPECT_EQ(1, cap_get_ambient(cap));
+ _change_cap(_metadata, CAP_INHERITABLE, cap, CAP_CLEAR);
+ EXPECT_EQ(0, cap_get_ambient(cap));
+}
+
+/* Receives an FD from a UNIX socket. Returns the received FD, or -errno. */
+static int __maybe_unused recv_fd(int usock)
+{
+ int fd_rx;
+ union {
+ /* Aligned ancillary data buffer. */
+ char buf[CMSG_SPACE(sizeof(fd_rx))];
+ struct cmsghdr _align;
+ } cmsg_rx = {};
+ char data = '\0';
+ struct iovec io = {
+ .iov_base = &data,
+ .iov_len = sizeof(data),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg_rx.buf,
+ .msg_controllen = sizeof(cmsg_rx.buf),
+ };
+ struct cmsghdr *cmsg;
+ int res;
+
+ res = recvmsg(usock, &msg, MSG_CMSG_CLOEXEC);
+ if (res < 0)
+ return -errno;
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ if (cmsg->cmsg_len != CMSG_LEN(sizeof(fd_rx)))
+ return -EIO;
+
+ memcpy(&fd_rx, CMSG_DATA(cmsg), sizeof(fd_rx));
+ return fd_rx;
+}
+
+/* Sends an FD on a UNIX socket. Returns 0 on success or -errno. */
+static int __maybe_unused send_fd(int usock, int fd_tx)
+{
+ union {
+ /* Aligned ancillary data buffer. */
+ char buf[CMSG_SPACE(sizeof(fd_tx))];
+ struct cmsghdr _align;
+ } cmsg_tx = {};
+ char data_tx = '.';
+ struct iovec io = {
+ .iov_base = &data_tx,
+ .iov_len = sizeof(data_tx),
+ };
+ struct msghdr msg = {
+ .msg_iov = &io,
+ .msg_iovlen = 1,
+ .msg_control = &cmsg_tx.buf,
+ .msg_controllen = sizeof(cmsg_tx.buf),
+ };
+ struct cmsghdr *cmsg = CMSG_FIRSTHDR(&msg);
+
+ cmsg->cmsg_len = CMSG_LEN(sizeof(fd_tx));
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_RIGHTS;
+ memcpy(CMSG_DATA(cmsg), &fd_tx, sizeof(fd_tx));
+
+ if (sendmsg(usock, &msg, 0) < 0)
+ return -errno;
+ return 0;
+}
+
+static void __maybe_unused
+enforce_ruleset(struct __test_metadata *const _metadata, const int ruleset_fd)
+{
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0))
+ {
+ TH_LOG("Failed to enforce ruleset: %s", strerror(errno));
+ }
+}
diff --git a/tools/testing/selftests/landlock/config b/tools/testing/selftests/landlock/config
new file mode 100644
index 000000000000..0086efaa7b68
--- /dev/null
+++ b/tools/testing/selftests/landlock/config
@@ -0,0 +1,14 @@
+CONFIG_CGROUPS=y
+CONFIG_CGROUP_SCHED=y
+CONFIG_INET=y
+CONFIG_IPV6=y
+CONFIG_NET=y
+CONFIG_NET_NS=y
+CONFIG_OVERLAY_FS=y
+CONFIG_PROC_FS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITY_LANDLOCK=y
+CONFIG_SHMEM=y
+CONFIG_SYSFS=y
+CONFIG_TMPFS=y
+CONFIG_TMPFS_XATTR=y
diff --git a/tools/testing/selftests/landlock/config.um b/tools/testing/selftests/landlock/config.um
new file mode 100644
index 000000000000..40937c0395d6
--- /dev/null
+++ b/tools/testing/selftests/landlock/config.um
@@ -0,0 +1 @@
+CONFIG_HOSTFS=y
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
new file mode 100644
index 000000000000..9a6036fbf289
--- /dev/null
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -0,0 +1,4877 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Filesystem
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2020 ANSSI
+ * Copyright © 2020-2022 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/magic.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/capability.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/sendfile.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#include "common.h"
+
+#ifndef renameat2
+int renameat2(int olddirfd, const char *oldpath, int newdirfd,
+ const char *newpath, unsigned int flags)
+{
+ return syscall(__NR_renameat2, olddirfd, oldpath, newdirfd, newpath,
+ flags);
+}
+#endif
+
+#ifndef RENAME_EXCHANGE
+#define RENAME_EXCHANGE (1 << 1)
+#endif
+
+#define TMP_DIR "tmp"
+#define BINARY_PATH "./true"
+
+/* Paths (sibling number and depth) */
+static const char dir_s1d1[] = TMP_DIR "/s1d1";
+static const char file1_s1d1[] = TMP_DIR "/s1d1/f1";
+static const char file2_s1d1[] = TMP_DIR "/s1d1/f2";
+static const char dir_s1d2[] = TMP_DIR "/s1d1/s1d2";
+static const char file1_s1d2[] = TMP_DIR "/s1d1/s1d2/f1";
+static const char file2_s1d2[] = TMP_DIR "/s1d1/s1d2/f2";
+static const char dir_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3";
+static const char file1_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f1";
+static const char file2_s1d3[] = TMP_DIR "/s1d1/s1d2/s1d3/f2";
+
+static const char dir_s2d1[] = TMP_DIR "/s2d1";
+static const char file1_s2d1[] = TMP_DIR "/s2d1/f1";
+static const char dir_s2d2[] = TMP_DIR "/s2d1/s2d2";
+static const char file1_s2d2[] = TMP_DIR "/s2d1/s2d2/f1";
+static const char dir_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3";
+static const char file1_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f1";
+static const char file2_s2d3[] = TMP_DIR "/s2d1/s2d2/s2d3/f2";
+
+static const char dir_s3d1[] = TMP_DIR "/s3d1";
+static const char file1_s3d1[] = TMP_DIR "/s3d1/f1";
+/* dir_s3d2 is a mount point. */
+static const char dir_s3d2[] = TMP_DIR "/s3d1/s3d2";
+static const char dir_s3d3[] = TMP_DIR "/s3d1/s3d2/s3d3";
+
+/*
+ * layout1 hierarchy:
+ *
+ * tmp
+ * ├── s1d1
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d2
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3
+ * │   ├── f1
+ * │   └── f2
+ * ├── s2d1
+ * │   ├── f1
+ * │   └── s2d2
+ * │   ├── f1
+ * │   └── s2d3
+ * │   ├── f1
+ * │   └── f2
+ * └── s3d1
+ *    ├── f1
+ * └── s3d2
+ * └── s3d3
+ */
+
+static bool fgrep(FILE *const inf, const char *const str)
+{
+ char line[32];
+ const int slen = strlen(str);
+
+ while (!feof(inf)) {
+ if (!fgets(line, sizeof(line), inf))
+ break;
+ if (strncmp(line, str, slen))
+ continue;
+
+ return true;
+ }
+
+ return false;
+}
+
+static bool supports_filesystem(const char *const filesystem)
+{
+ char str[32];
+ int len;
+ bool res = true;
+ FILE *const inf = fopen("/proc/filesystems", "r");
+
+ /*
+ * Consider that the filesystem is supported if we cannot get the
+ * supported ones.
+ */
+ if (!inf)
+ return true;
+
+ /* filesystem can be null for bind mounts. */
+ if (!filesystem)
+ goto out;
+
+ len = snprintf(str, sizeof(str), "nodev\t%s\n", filesystem);
+ if (len >= sizeof(str))
+ /* Ignores too-long filesystem names. */
+ goto out;
+
+ res = fgrep(inf, str);
+
+out:
+ fclose(inf);
+ return res;
+}
+
+static bool cwd_matches_fs(unsigned int fs_magic)
+{
+ struct statfs statfs_buf;
+
+ if (!fs_magic)
+ return true;
+
+ if (statfs(".", &statfs_buf))
+ return true;
+
+ return statfs_buf.f_type == fs_magic;
+}
+
+static void mkdir_parents(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ char *walker;
+ const char *parent;
+ int i, err;
+
+ ASSERT_NE(path[0], '\0');
+ walker = strdup(path);
+ ASSERT_NE(NULL, walker);
+ parent = walker;
+ for (i = 1; walker[i]; i++) {
+ if (walker[i] != '/')
+ continue;
+ walker[i] = '\0';
+ err = mkdir(parent, 0700);
+ ASSERT_FALSE(err && errno != EEXIST)
+ {
+ TH_LOG("Failed to create directory \"%s\": %s", parent,
+ strerror(errno));
+ }
+ walker[i] = '/';
+ }
+ free(walker);
+}
+
+static void create_directory(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ mkdir_parents(_metadata, path);
+ ASSERT_EQ(0, mkdir(path, 0700))
+ {
+ TH_LOG("Failed to create directory \"%s\": %s", path,
+ strerror(errno));
+ }
+}
+
+static void create_file(struct __test_metadata *const _metadata,
+ const char *const path)
+{
+ mkdir_parents(_metadata, path);
+ ASSERT_EQ(0, mknod(path, S_IFREG | 0700, 0))
+ {
+ TH_LOG("Failed to create file \"%s\": %s", path,
+ strerror(errno));
+ }
+}
+
+static int remove_path(const char *const path)
+{
+ char *walker;
+ int i, ret, err = 0;
+
+ walker = strdup(path);
+ if (!walker) {
+ err = ENOMEM;
+ goto out;
+ }
+ if (unlink(path) && rmdir(path)) {
+ if (errno != ENOENT && errno != ENOTDIR)
+ err = errno;
+ goto out;
+ }
+ for (i = strlen(walker); i > 0; i--) {
+ if (walker[i] != '/')
+ continue;
+ walker[i] = '\0';
+ ret = rmdir(walker);
+ if (ret) {
+ if (errno != ENOTEMPTY && errno != EBUSY)
+ err = errno;
+ goto out;
+ }
+ if (strcmp(walker, TMP_DIR) == 0)
+ goto out;
+ }
+
+out:
+ free(walker);
+ return err;
+}
+
+struct mnt_opt {
+ const char *const source;
+ const char *const type;
+ const unsigned long flags;
+ const char *const data;
+};
+
+#define MNT_TMP_DATA "size=4m,mode=700"
+
+static const struct mnt_opt mnt_tmp = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+};
+
+static int mount_opt(const struct mnt_opt *const mnt, const char *const target)
+{
+ return mount(mnt->source ?: mnt->type, target, mnt->type, mnt->flags,
+ mnt->data);
+}
+
+static void prepare_layout_opt(struct __test_metadata *const _metadata,
+ const struct mnt_opt *const mnt)
+{
+ disable_caps(_metadata);
+ umask(0077);
+ create_directory(_metadata, TMP_DIR);
+
+ /*
+ * Do not pollute the rest of the system: creates a private mount point
+ * for tests relying on pivot_root(2) and move_mount(2).
+ */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, unshare(CLONE_NEWNS | CLONE_NEWCGROUP));
+ ASSERT_EQ(0, mount_opt(mnt, TMP_DIR))
+ {
+ TH_LOG("Failed to mount the %s filesystem: %s", mnt->type,
+ strerror(errno));
+ /*
+ * FIXTURE_TEARDOWN() is not called when FIXTURE_SETUP()
+ * failed, so we need to explicitly do a minimal cleanup to
+ * avoid cascading errors with other tests that don't depend on
+ * the same filesystem.
+ */
+ remove_path(TMP_DIR);
+ }
+ ASSERT_EQ(0, mount(NULL, TMP_DIR, NULL, MS_PRIVATE | MS_REC, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+static void prepare_layout(struct __test_metadata *const _metadata)
+{
+ _metadata->teardown_parent = true;
+
+ prepare_layout_opt(_metadata, &mnt_tmp);
+}
+
+static void cleanup_layout(struct __test_metadata *const _metadata)
+{
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, umount(TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, remove_path(TMP_DIR));
+}
+
+/* clang-format off */
+FIXTURE(layout0) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout0)
+{
+ prepare_layout(_metadata);
+}
+
+FIXTURE_TEARDOWN(layout0)
+{
+ cleanup_layout(_metadata);
+}
+
+static void create_layout1(struct __test_metadata *const _metadata)
+{
+ create_file(_metadata, file1_s1d1);
+ create_file(_metadata, file1_s1d2);
+ create_file(_metadata, file1_s1d3);
+ create_file(_metadata, file2_s1d1);
+ create_file(_metadata, file2_s1d2);
+ create_file(_metadata, file2_s1d3);
+
+ create_file(_metadata, file1_s2d1);
+ create_file(_metadata, file1_s2d2);
+ create_file(_metadata, file1_s2d3);
+ create_file(_metadata, file2_s2d3);
+
+ create_file(_metadata, file1_s3d1);
+ create_directory(_metadata, dir_s3d2);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ASSERT_EQ(0, mkdir(dir_s3d3, 0700));
+}
+
+static void remove_layout1(struct __test_metadata *const _metadata)
+{
+ EXPECT_EQ(0, remove_path(file2_s1d3));
+ EXPECT_EQ(0, remove_path(file2_s1d2));
+ EXPECT_EQ(0, remove_path(file2_s1d1));
+ EXPECT_EQ(0, remove_path(file1_s1d3));
+ EXPECT_EQ(0, remove_path(file1_s1d2));
+ EXPECT_EQ(0, remove_path(file1_s1d1));
+ EXPECT_EQ(0, remove_path(dir_s1d3));
+
+ EXPECT_EQ(0, remove_path(file2_s2d3));
+ EXPECT_EQ(0, remove_path(file1_s2d3));
+ EXPECT_EQ(0, remove_path(file1_s2d2));
+ EXPECT_EQ(0, remove_path(file1_s2d1));
+ EXPECT_EQ(0, remove_path(dir_s2d2));
+
+ EXPECT_EQ(0, remove_path(file1_s3d1));
+ EXPECT_EQ(0, remove_path(dir_s3d3));
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ umount(dir_s3d2);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ EXPECT_EQ(0, remove_path(dir_s3d2));
+}
+
+/* clang-format off */
+FIXTURE(layout1) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout1)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+}
+
+FIXTURE_TEARDOWN(layout1)
+{
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+}
+
+/*
+ * This helper enables to use the ASSERT_* macros and print the line number
+ * pointing to the test caller.
+ */
+static int test_open_rel(const int dirfd, const char *const path,
+ const int flags)
+{
+ int fd;
+
+ /* Works with file and directories. */
+ fd = openat(dirfd, path, flags | O_CLOEXEC);
+ if (fd < 0)
+ return errno;
+ /*
+ * Mixing error codes from close(2) and open(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) != 0)
+ return errno;
+ return 0;
+}
+
+static int test_open(const char *const path, const int flags)
+{
+ return test_open_rel(AT_FDCWD, path, flags);
+}
+
+TEST_F_FORK(layout1, no_restriction)
+{
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, inval)
+{
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .parent_fd = -1,
+ };
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ };
+ int ruleset_fd;
+
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ ruleset_fd = open(dir_s1d1, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ /* Returns EBADF because ruleset_fd is not a landlock-ruleset FD. */
+ ASSERT_EQ(EBADF, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ruleset_fd = open(dir_s1d1, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ /* Returns EBADFD because ruleset_fd is not a valid ruleset. */
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Gets a real ruleset. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests without O_PATH. */
+ path_beneath.parent_fd = open(dir_s1d2, O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Tests with a ruleset FD. */
+ path_beneath.parent_fd = ruleset_fd;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+
+ /* Checks unhandled allowed_access. */
+ path_beneath.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ /* Test with legitimate values. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_EXECUTE;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_EXECUTE;
+
+ /* Tests with denied-by-default access right. */
+ path_beneath.allowed_access |= LANDLOCK_ACCESS_FS_REFER;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~LANDLOCK_ACCESS_FS_REFER;
+
+ /* Test with unknown (64-bits) value. */
+ path_beneath.allowed_access |= (1ULL << 60);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EINVAL, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+ /* Test with no access. */
+ path_beneath.allowed_access = 0;
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(ENOMSG, errno);
+ path_beneath.allowed_access &= ~(1ULL << 60);
+
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Enforces the ruleset. */
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+/* clang-format off */
+
+#define ACCESS_FILE ( \
+ LANDLOCK_ACCESS_FS_EXECUTE | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE | \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_TRUNCATE)
+
+#define ACCESS_LAST LANDLOCK_ACCESS_FS_TRUNCATE
+
+#define ACCESS_ALL ( \
+ ACCESS_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR | \
+ LANDLOCK_ACCESS_FS_REMOVE_DIR | \
+ LANDLOCK_ACCESS_FS_REMOVE_FILE | \
+ LANDLOCK_ACCESS_FS_MAKE_CHAR | \
+ LANDLOCK_ACCESS_FS_MAKE_DIR | \
+ LANDLOCK_ACCESS_FS_MAKE_REG | \
+ LANDLOCK_ACCESS_FS_MAKE_SOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_FIFO | \
+ LANDLOCK_ACCESS_FS_MAKE_BLOCK | \
+ LANDLOCK_ACCESS_FS_MAKE_SYM | \
+ LANDLOCK_ACCESS_FS_REFER)
+
+/* clang-format on */
+
+TEST_F_FORK(layout1, file_and_dir_access_rights)
+{
+ __u64 access;
+ int err;
+ struct landlock_path_beneath_attr path_beneath_file = {},
+ path_beneath_dir = {};
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Tests access rights for files. */
+ path_beneath_file.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_file.parent_fd);
+
+ /* Tests access rights for directories. */
+ path_beneath_dir.parent_fd =
+ open(dir_s1d2, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath_dir.parent_fd);
+
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+ path_beneath_dir.allowed_access = access;
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_dir, 0));
+
+ path_beneath_file.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath_file, 0);
+ if (access & ACCESS_FILE) {
+ ASSERT_EQ(0, err);
+ } else {
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(EINVAL, errno);
+ }
+ }
+ ASSERT_EQ(0, close(path_beneath_file.parent_fd));
+ ASSERT_EQ(0, close(path_beneath_dir.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout0, ruleset_with_unknown_access)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = access_mask,
+ };
+
+ ASSERT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ ASSERT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_F_FORK(layout0, rule_with_unknown_access)
+{
+ __u64 access;
+ struct landlock_path_beneath_attr path_beneath = {};
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = ACCESS_ALL,
+ };
+ const int ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ path_beneath.parent_fd =
+ open(TMP_DIR, O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) {
+ path_beneath.allowed_access = access;
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, rule_with_unhandled_access)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ struct landlock_path_beneath_attr path_beneath = {};
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ path_beneath.parent_fd = open(file1_s1d2, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+
+ for (access = 1; access > 0; access <<= 1) {
+ int err;
+
+ path_beneath.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0);
+ if (access == ruleset_attr.handled_access_fs) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EINVAL, errno);
+ }
+ }
+
+ EXPECT_EQ(0, close(path_beneath.parent_fd));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static void add_path_beneath(struct __test_metadata *const _metadata,
+ const int ruleset_fd, const __u64 allowed_access,
+ const char *const path)
+{
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = allowed_access,
+ };
+
+ path_beneath.parent_fd = open(path, O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd)
+ {
+ TH_LOG("Failed to open directory \"%s\": %s", path,
+ strerror(errno));
+ }
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0))
+ {
+ TH_LOG("Failed to update the ruleset with \"%s\": %s", path,
+ strerror(errno));
+ }
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+}
+
+struct rule {
+ const char *path;
+ __u64 access;
+};
+
+/* clang-format off */
+
+#define ACCESS_RO ( \
+ LANDLOCK_ACCESS_FS_READ_FILE | \
+ LANDLOCK_ACCESS_FS_READ_DIR)
+
+#define ACCESS_RW ( \
+ ACCESS_RO | \
+ LANDLOCK_ACCESS_FS_WRITE_FILE)
+
+/* clang-format on */
+
+static int create_ruleset(struct __test_metadata *const _metadata,
+ const __u64 handled_access_fs,
+ const struct rule rules[])
+{
+ int ruleset_fd, i;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = handled_access_fs,
+ };
+
+ ASSERT_NE(NULL, rules)
+ {
+ TH_LOG("No rule list");
+ }
+ ASSERT_NE(NULL, rules[0].path)
+ {
+ TH_LOG("Empty rule list");
+ }
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+
+ for (i = 0; rules[i].path; i++) {
+ add_path_beneath(_metadata, ruleset_fd, rules[i].access,
+ rules[i].path);
+ }
+ return ruleset_fd;
+}
+
+TEST_F_FORK(layout0, proc_nsfs)
+{
+ const struct rule rules[] = {
+ {
+ .path = "/dev/null",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ struct landlock_path_beneath_attr path_beneath;
+ const int ruleset_fd = create_ruleset(
+ _metadata, rules[0].access | LANDLOCK_ACCESS_FS_READ_DIR,
+ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/dev", O_RDONLY));
+ ASSERT_EQ(0, test_open("/dev/null", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/dev/full", O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open("/proc", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/proc/self", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/proc/self/ns", O_RDONLY));
+ /*
+ * Because nsfs is an internal filesystem, /proc/self/ns/mnt is a
+ * disconnected path. Such path cannot be identified and must then be
+ * allowed.
+ */
+ ASSERT_EQ(0, test_open("/proc/self/ns/mnt", O_RDONLY));
+
+ /*
+ * Checks that it is not possible to add nsfs-like filesystem
+ * references to a ruleset.
+ */
+ path_beneath.allowed_access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ path_beneath.parent_fd = open("/proc/self/ns/mnt", O_PATH | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ ASSERT_EQ(EBADFD, errno);
+ ASSERT_EQ(0, close(path_beneath.parent_fd));
+}
+
+TEST_F_FORK(layout0, unpriv)
+{
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ drop_caps(_metadata);
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(-1, landlock_restrict_self(ruleset_fd, 0));
+ ASSERT_EQ(EPERM, errno);
+
+ /* enforce_ruleset() calls prctl(no_new_privs). */
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, effective_access)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = file1_s2d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ char buf;
+ int reg_fd;
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Tests on a directory (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(0, test_open("/", O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_PATH));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY | O_PATH));
+
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ /* Tests on a file (with or without O_PATH). */
+ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_PATH));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+
+ /* Checks effective read and write actions. */
+ reg_fd = open(file1_s2d2, O_RDWR | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(1, write(reg_fd, ".", 1));
+ ASSERT_LE(0, lseek(reg_fd, 0, SEEK_SET));
+ ASSERT_EQ(1, read(reg_fd, &buf, 1));
+ ASSERT_EQ('.', buf);
+ ASSERT_EQ(0, close(reg_fd));
+
+ /* Just in case, double-checks effective actions. */
+ reg_fd = open(file1_s2d2, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(-1, write(reg_fd, &buf, 1));
+ ASSERT_EQ(EBADF, errno);
+ ASSERT_EQ(0, close(reg_fd));
+}
+
+TEST_F_FORK(layout1, unhandled_access)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ /* Here, we only handle read accesses, not write accesses. */
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RO, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Because the policy does not handle LANDLOCK_ACCESS_FS_WRITE_FILE,
+ * opening for write-only should be allowed, but not read-write.
+ */
+ ASSERT_EQ(0, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+}
+
+TEST_F_FORK(layout1, ruleset_overlap)
+{
+ const struct rule rules[] = {
+ /* These rules should be ORed among them. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, layer_rule_unions)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ /* Doesn't change anything from layer1. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer3[] = {
+ /* Only allows write (but not read) to dir_s1d3. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer1. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer1. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Doesn't change anything from layer1. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer2. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should allow READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Only allows write (but not read) to dir_s1d3. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks s1d1 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d2 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Checks s1d3 hierarchy with layer3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_WRONLY));
+ /* dir_s1d3 should now deny READ_FILE and WRITE_FILE (O_RDWR). */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, non_overlapping_accesses)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Unchanged accesses for file creation. */
+ ASSERT_EQ(-1, mknod(file1_s1d1, S_IFREG | 0700, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mknod(file1_s1d2, S_IFREG | 0700, 0));
+
+ /* Checks file removing. */
+ ASSERT_EQ(-1, unlink(file1_s1d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d3));
+}
+
+TEST_F_FORK(layout1, interleaved_masked_accesses)
+{
+ /*
+ * Checks overly restrictive rules:
+ * layer 1: allows R s1d1/s1d2/s1d3/file1
+ * layer 2: allows RW s1d1/s1d2/s1d3
+ * allows W s1d1/s1d2
+ * denies R s1d1/s1d2
+ * layer 3: allows R s1d1
+ * layer 4: allows R s1d1/s1d2
+ * denies W s1d1/s1d2
+ * layer 5: allows R s1d1/s1d2
+ * layer 6: allows X ----
+ * layer 7: allows W s1d1/s1d2
+ * denies R s1d1/s1d2
+ */
+ const struct rule layer1_read[] = {
+ /* Allows read access to file1_s1d3 with the first layer. */
+ {
+ .path = file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ /* First rule with write restrictions. */
+ const struct rule layer2_read_write[] = {
+ /* Start by granting read-write access via its parent directory... */
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ /* ...but also denies read access via its grandparent directory. */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer3_read[] = {
+ /* Allows read access via its great-grandparent directory. */
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer4_read_write[] = {
+ /*
+ * Try to confuse the deny access by denying write (but not
+ * read) access via its grandparent directory.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer5_read[] = {
+ /*
+ * Try to override layer2's deny read access by explicitly
+ * allowing read access via file1_s1d3's grandparent.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct rule layer6_execute[] = {
+ /*
+ * Restricts an unrelated file hierarchy with a new access
+ * (non-overlapping) type.
+ */
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ const struct rule layer7_read_write[] = {
+ /*
+ * Finally, denies read access to file1_s1d3 via its
+ * grandparent.
+ */
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer1_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that read access is granted for file1_s1d3 with layer 1. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer2_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 2. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer3_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file2_s1d3, O_WRONLY));
+
+ /* This time, denies write access for the file hierarchy. */
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer4_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks that the only change with layer 4 is that write access is
+ * denied.
+ */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer5_read);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 5. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_EXECUTE,
+ layer6_execute);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that previous access rights are unchanged with layer 6. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+
+ ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ layer7_read_write);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks read access is now denied with layer 7. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(file2_s1d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, inherit_subset)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* Write access is forbidden. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Write access is forbidden. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Tests shared rule extension: the following rules should not grant
+ * any new access, only remove some. Once enforced, these rules are
+ * ANDed with the previous ones.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+ dir_s1d2);
+ /*
+ * According to ruleset_fd, dir_s1d2 should now have the
+ * LANDLOCK_ACCESS_FS_READ_FILE and LANDLOCK_ACCESS_FS_WRITE_FILE
+ * access rights (even if this directory is opened a second time).
+ * However, when enforcing this updated ruleset, the ruleset tied to
+ * the current process (i.e. its domain) will still only have the
+ * dir_s1d2 with LANDLOCK_ACCESS_FS_READ_FILE and
+ * LANDLOCK_ACCESS_FS_READ_DIR accesses, but
+ * LANDLOCK_ACCESS_FS_WRITE_FILE must not be allowed because it would
+ * be a privilege escalation.
+ */
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Try to get more privileges by adding new access rights to the parent
+ * directory: dir_s1d1.
+ */
+ add_path_beneath(_metadata, ruleset_fd, ACCESS_RW, dir_s1d1);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /*
+ * Now, dir_s1d3 get a new rule tied to it, only allowing
+ * LANDLOCK_ACCESS_FS_WRITE_FILE. The (kernel internal) difference is
+ * that there was no rule tied to it before.
+ */
+ add_path_beneath(_metadata, ruleset_fd, LANDLOCK_ACCESS_FS_WRITE_FILE,
+ dir_s1d3);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Same tests and results as above, except for open(dir_s1d3) which is
+ * now denied because the new rule mask the rule previously inherited
+ * from dir_s1d2.
+ */
+
+ /* Same tests and results as above. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d2. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ /* Readdir access is still allowed. */
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* It is still forbidden to write in file1_s1d3. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ /*
+ * Readdir of dir_s1d3 is still allowed because of the OR policy inside
+ * the same layer.
+ */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+}
+
+TEST_F_FORK(layout1, inherit_superset)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d3,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* Readdir access is denied for dir_s1d2. */
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+ /* Readdir access is allowed for dir_s1d3. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+ /* File access is allowed for file1_s1d3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ /* Now dir_s1d2, parent of dir_s1d3, gets a new rule tied to it. */
+ add_path_beneath(_metadata, ruleset_fd,
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_READ_DIR,
+ dir_s1d2);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Readdir access is still denied for dir_s1d2. */
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+ /* Readdir access is still allowed for dir_s1d3. */
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+ /* File access is still allowed for file1_s1d3. */
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout0, max_layers)
+{
+ int i, err;
+ const struct rule rules[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ for (i = 0; i < 16; i++)
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ for (i = 0; i < 2; i++) {
+ err = landlock_restrict_self(ruleset_fd, 0);
+ ASSERT_EQ(-1, err);
+ ASSERT_EQ(E2BIG, errno);
+ }
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, empty_or_same_ruleset)
+{
+ struct landlock_ruleset_attr ruleset_attr = {};
+ int ruleset_fd;
+
+ /* Tests empty handled_access_fs. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(-1, ruleset_fd);
+ ASSERT_EQ(ENOMSG, errno);
+
+ /* Enforces policy which deny read access to all files. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE;
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ /* Nests a policy which deny read access to all directories. */
+ ruleset_attr.handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR;
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+
+ /* Enforces a second time with the same ruleset. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, rule_on_mountpoint)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ /* dir_s3d2 is a mount point. */
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_over_mountpoint)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ /* dir_s3d2 is a mount point. */
+ .path = dir_s3d1,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s3d3, O_RDONLY));
+}
+
+/*
+ * This test verifies that we can apply a landlock rule on the root directory
+ * (which might require special handling).
+ */
+TEST_F_FORK(layout1, rule_over_root_allow_then_deny)
+{
+ struct rule rules[] = {
+ {
+ .path = "/",
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks allowed access. */
+ ASSERT_EQ(0, test_open("/", O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+
+ rules[0].access = LANDLOCK_ACCESS_FS_READ_FILE;
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks denied access (on a directory). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_over_root_deny)
+{
+ const struct rule rules[] = {
+ {
+ .path = "/",
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks denied access (on a directory). */
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1, rule_inside_mount_ns)
+{
+ const struct rule rules[] = {
+ {
+ .path = "s3d3",
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3))
+ {
+ TH_LOG("Failed to pivot root: %s", strerror(errno));
+ };
+ ASSERT_EQ(0, chdir("/"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open("s3d3", O_RDONLY));
+ ASSERT_EQ(EACCES, test_open("/", O_RDONLY));
+}
+
+TEST_F_FORK(layout1, mount_and_pivot)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_RDONLY, NULL));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, move_mount)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0))
+ {
+ TH_LOG("Failed to move mount: %s", strerror(errno));
+ }
+
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s3d2, 0));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s1d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_only)
+{
+ const struct landlock_ruleset_attr ruleset_net = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ int ruleset_fd;
+
+ /* Add network restrictions. */
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_net, sizeof(ruleset_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(0, mount(NULL, dir_s1d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(0, syscall(__NR_move_mount, AT_FDCWD, dir_s1d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(0, umount(dir_s2d2));
+ ASSERT_EQ(0, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(0, chdir("/"));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, topology_changes_with_net_and_fs)
+{
+ const struct landlock_ruleset_attr ruleset_net_fs = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .handled_access_fs = LANDLOCK_ACCESS_FS_EXECUTE,
+ };
+ int ruleset_fd;
+
+ /* Add network and filesystem restrictions. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_net_fs,
+ sizeof(ruleset_net_fs), 0);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Mount, remount, move_mount, umount, and pivot_root checks. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(-1, mount_opt(&mnt_tmp, dir_s1d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, mount(NULL, dir_s3d2, NULL, MS_PRIVATE | MS_REC, NULL));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_move_mount, AT_FDCWD, dir_s3d2, AT_FDCWD,
+ dir_s2d2, 0));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, umount(dir_s3d2));
+ ASSERT_EQ(EPERM, errno);
+ ASSERT_EQ(-1, syscall(__NR_pivot_root, dir_s3d2, dir_s3d3));
+ ASSERT_EQ(EPERM, errno);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+TEST_F_FORK(layout1, release_inodes)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s3d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s3d3,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, ACCESS_RW, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ /* Unmount a file hierarchy while it is being used by a ruleset. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(dir_s3d2));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s3d2, O_RDONLY));
+ /* This dir_s3d3 would not be allowed and does not exist anyway. */
+ ASSERT_EQ(ENOENT, test_open(dir_s3d3, O_RDONLY));
+}
+
+enum relative_access {
+ REL_OPEN,
+ REL_CHDIR,
+ REL_CHROOT_ONLY,
+ REL_CHROOT_CHDIR,
+};
+
+static void test_relative_path(struct __test_metadata *const _metadata,
+ const enum relative_access rel)
+{
+ /*
+ * Common layer to check that chroot doesn't ignore it (i.e. a chroot
+ * is not a disconnected root directory).
+ */
+ const struct rule layer1_base[] = {
+ {
+ .path = TMP_DIR,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ const struct rule layer2_subs[] = {
+ {
+ .path = dir_s1d2,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s2d2,
+ .access = ACCESS_RO,
+ },
+ {},
+ };
+ int dirfd, ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_subs);
+
+ ASSERT_LE(0, ruleset_fd);
+ switch (rel) {
+ case REL_OPEN:
+ case REL_CHDIR:
+ break;
+ case REL_CHROOT_ONLY:
+ ASSERT_EQ(0, chdir(dir_s2d2));
+ break;
+ case REL_CHROOT_CHDIR:
+ ASSERT_EQ(0, chdir(dir_s1d2));
+ break;
+ default:
+ ASSERT_TRUE(false);
+ return;
+ }
+
+ set_cap(_metadata, CAP_SYS_CHROOT);
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ switch (rel) {
+ case REL_OPEN:
+ dirfd = open(dir_s1d2, O_DIRECTORY);
+ ASSERT_LE(0, dirfd);
+ break;
+ case REL_CHDIR:
+ ASSERT_EQ(0, chdir(dir_s1d2));
+ dirfd = AT_FDCWD;
+ break;
+ case REL_CHROOT_ONLY:
+ /* Do chroot into dir_s1d2 (relative to dir_s2d2). */
+ ASSERT_EQ(0, chroot("../../s1d1/s1d2"))
+ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+ break;
+ case REL_CHROOT_CHDIR:
+ /* Do chroot into dir_s1d2. */
+ ASSERT_EQ(0, chroot("."))
+ {
+ TH_LOG("Failed to chroot: %s", strerror(errno));
+ }
+ dirfd = AT_FDCWD;
+ break;
+ }
+
+ ASSERT_EQ((rel == REL_CHROOT_CHDIR) ? 0 : EACCES,
+ test_open_rel(dirfd, "..", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, ".", O_RDONLY));
+
+ if (rel == REL_CHROOT_ONLY) {
+ /* The current directory is dir_s2d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "./s2d3", O_RDONLY));
+ } else {
+ /* The current directory is dir_s1d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "./s1d3", O_RDONLY));
+ }
+
+ if (rel == REL_CHROOT_ONLY || rel == REL_CHROOT_CHDIR) {
+ /* Checks the root dir_s1d2. */
+ ASSERT_EQ(0, test_open_rel(dirfd, "/..", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/f1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "/s1d3", O_RDONLY));
+ }
+
+ if (rel != REL_CHROOT_CHDIR) {
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s1d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s1d1/s1d2/s1d3",
+ O_RDONLY));
+
+ ASSERT_EQ(EACCES, test_open_rel(dirfd, "../../s2d1", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2", O_RDONLY));
+ ASSERT_EQ(0, test_open_rel(dirfd, "../../s2d1/s2d2/s2d3",
+ O_RDONLY));
+ }
+
+ if (rel == REL_OPEN)
+ ASSERT_EQ(0, close(dirfd));
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, relative_open)
+{
+ test_relative_path(_metadata, REL_OPEN);
+}
+
+TEST_F_FORK(layout1, relative_chdir)
+{
+ test_relative_path(_metadata, REL_CHDIR);
+}
+
+TEST_F_FORK(layout1, relative_chroot_only)
+{
+ test_relative_path(_metadata, REL_CHROOT_ONLY);
+}
+
+TEST_F_FORK(layout1, relative_chroot_chdir)
+{
+ test_relative_path(_metadata, REL_CHROOT_CHDIR);
+}
+
+static void copy_binary(struct __test_metadata *const _metadata,
+ const char *const dst_path)
+{
+ int dst_fd, src_fd;
+ struct stat statbuf;
+
+ dst_fd = open(dst_path, O_WRONLY | O_TRUNC | O_CLOEXEC);
+ ASSERT_LE(0, dst_fd)
+ {
+ TH_LOG("Failed to open \"%s\": %s", dst_path, strerror(errno));
+ }
+ src_fd = open(BINARY_PATH, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, src_fd)
+ {
+ TH_LOG("Failed to open \"" BINARY_PATH "\": %s",
+ strerror(errno));
+ }
+ ASSERT_EQ(0, fstat(src_fd, &statbuf));
+ ASSERT_EQ(statbuf.st_size,
+ sendfile(dst_fd, src_fd, 0, statbuf.st_size));
+ ASSERT_EQ(0, close(src_fd));
+ ASSERT_EQ(0, close(dst_fd));
+}
+
+static void test_execute(struct __test_metadata *const _metadata, const int err,
+ const char *const path)
+{
+ int status;
+ char *const argv[] = { (char *)path, NULL };
+ const pid_t child = fork();
+
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ ASSERT_EQ(err ? -1 : 0, execve(path, argv, NULL))
+ {
+ TH_LOG("Failed to execute \"%s\": %s", path,
+ strerror(errno));
+ };
+ ASSERT_EQ(err, errno);
+ _exit(__test_passed(_metadata) ? 2 : 1);
+ return;
+ }
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(err ? 2 : 0, WEXITSTATUS(status))
+ {
+ TH_LOG("Unexpected return code for \"%s\": %s", path,
+ strerror(errno));
+ };
+}
+
+TEST_F_FORK(layout1, execute)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ copy_binary(_metadata, file1_s1d1);
+ copy_binary(_metadata, file1_s1d2);
+ copy_binary(_metadata, file1_s1d3);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ test_execute(_metadata, EACCES, file1_s1d1);
+
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ test_execute(_metadata, 0, file1_s1d2);
+
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ test_execute(_metadata, 0, file1_s1d3);
+}
+
+TEST_F_FORK(layout1, link)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const struct rule layer2[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies linking because of reparenting. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+
+ /* Prepares for next unlinks. */
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that linkind doesn't require the ability to delete a file. */
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+}
+
+static int test_rename(const char *const oldpath, const char *const newpath)
+{
+ if (rename(oldpath, newpath))
+ return errno;
+ return 0;
+}
+
+static int test_exchange(const char *const oldpath, const char *const newpath)
+{
+ if (renameat2(AT_FDCWD, oldpath, AT_FDCWD, newpath, RENAME_EXCHANGE))
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, rename_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Tries to replace a file, from a directory that allows file removal,
+ * but to a different directory (which also allows file removal).
+ */
+ ASSERT_EQ(-1, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Tries to replace a file, from a directory that denies file removal,
+ * to a different directory (which allows file removal).
+ */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Exchanges files and directories that partially allow removal. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d2, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s2d1 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s2d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that file1_s1d1 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Renames files with different parents. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Exchanges and renames files with same parent. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file2_s2d3, file1_s2d3));
+
+ /* Exchanges files and directories with same parent, twice. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+}
+
+TEST_F_FORK(layout1, rename_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Empties dir_s1d3 to allow renaming. */
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Exchanges and renames directory to a different parent. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s2d3, dir_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Exchanges directory to the same parent, which doesn't allow
+ * directory removal.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d1, AT_FDCWD, dir_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of ENOTDIR). */
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks that dir_s1d2 cannot be removed (instead of EISDIR). */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Exchanges and renames directory to the same parent, which allows
+ * directory removal.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, unlink(dir_s1d3));
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, rename(file1_s1d2, dir_s1d3));
+ ASSERT_EQ(0, rmdir(dir_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_refer)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+ };
+ int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving should only be allowed when the source and the destination
+ * parent directory have REFER.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, dir_s2d3));
+ ASSERT_EQ(ENOTEMPTY, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, rename(dir_s1d3, dir_s2d3));
+}
+
+/* Checks renames beneath dir_s1d1. */
+static void refer_denied_by_default(struct __test_metadata *const _metadata,
+ const struct rule layer1[],
+ const int layer1_err,
+ const struct rule layer2[])
+{
+ int ruleset_fd;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ ruleset_fd = create_ruleset(_metadata, layer1[0].access, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * If the first layer handles LANDLOCK_ACCESS_FS_REFER (according to
+ * layer1_err), then it allows some different-parent renames and links.
+ */
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d1, file1_s1d2));
+ if (layer1_err == 0)
+ ASSERT_EQ(layer1_err, test_rename(file1_s1d2, file1_s1d1));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(layer1_err, test_exchange(file2_s1d2, file2_s1d1));
+
+ ruleset_fd = create_ruleset(_metadata, layer2[0].access, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Now, either the first or the second layer does not handle
+ * LANDLOCK_ACCESS_FS_REFER, which means that any different-parent
+ * renames and links are denied, thus making the layer handling
+ * LANDLOCK_ACCESS_FS_REFER null and void.
+ */
+ ASSERT_EQ(EXDEV, test_rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d1, file2_s1d2));
+ ASSERT_EQ(EXDEV, test_exchange(file2_s1d2, file2_s1d1));
+}
+
+const struct rule layer_dir_s1d1_refer[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {},
+};
+
+const struct rule layer_dir_s1d1_execute[] = {
+ {
+ /* Matches a parent directory. */
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+const struct rule layer_dir_s2d1_execute[] = {
+ {
+ /* Does not match a parent directory. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+};
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *with* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default1)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s1d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default2)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
+/*
+ * Tests precedence over renames: denied by default for different parent
+ * directories, *without* a rule matching a parent directory, but not directly
+ * denying access (with MAKE_REG nor REMOVE).
+ */
+TEST_F_FORK(layout1, refer_denied_by_default3)
+{
+ refer_denied_by_default(_metadata, layer_dir_s1d1_refer, 0,
+ layer_dir_s2d1_execute);
+}
+
+/*
+ * Same test but this time turning around the ABI version order: the first
+ * layer does not handle LANDLOCK_ACCESS_FS_REFER.
+ */
+TEST_F_FORK(layout1, refer_denied_by_default4)
+{
+ refer_denied_by_default(_metadata, layer_dir_s2d1_execute, EXDEV,
+ layer_dir_s1d1_refer);
+}
+
+TEST_F_FORK(layout1, reparent_link)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing source and destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies linking because of missing source REFER. */
+ ASSERT_EQ(-1, link(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies linking because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies linking because of missing destination REFER. */
+ ASSERT_EQ(-1, link(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Allows linking because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, link(file1_s2d2, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ /* Reverse linking denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, link(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ /* Checks reverse linking. */
+ ASSERT_EQ(0, link(file1_s1d3, file1_s2d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /*
+ * This is OK for a file link, but it should not be allowed for a
+ * directory rename (because of the superset of access rights.
+ */
+ ASSERT_EQ(0, link(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ ASSERT_EQ(-1, link(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, link(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, link(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, link(file2_s1d3, file1_s1d3));
+}
+
+TEST_F_FORK(layout1, reparent_rename)
+{
+ /* Same rules as for reparent_link. */
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Even denies same file exchange. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source and destination REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Denies renaming because of missing MAKE_REG, source and destination
+ * REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file2_s1d1, AT_FDCWD, file1_s2d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing source REFER. */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d1, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Denies renaming because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ /* Denies renaming because of missing destination REFER*/
+ ASSERT_EQ(-1, rename(file1_s2d2, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Denies exchange because of one missing MAKE_REG. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, file2_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ /* Allows renaming because of REFER and MAKE_REG. */
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d3));
+
+ /* Reverse renaming denied because of missing MAKE_REG. */
+ ASSERT_EQ(-1, rename(file1_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /* Tests reverse renaming. */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file2_s2d3, AT_FDCWD, file1_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * This is OK for a file rename, but it should not be allowed for a
+ * directory rename (because of the superset of access rights).
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ /*
+ * Tests superset restrictions applied to directories. Not only the
+ * dir_s2d3's parent (dir_s2d2) should be taken into account but also
+ * access rights tied to dir_s2d3. dir_s2d2 is missing one access right
+ * compared to dir_s1d3/file1_s1d3 (MAKE_REG) but it is provided
+ * directly by the moved dir_s2d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file1_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, dir_s2d3));
+ /*
+ * The first rename is allowed but not the exchange because dir_s1d3's
+ * parent (dir_s1d2) doesn't have REFER.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file1_s2d3, dir_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Renaming in the same directory is always allowed. */
+ ASSERT_EQ(0, rename(file2_s1d2, file1_s1d2));
+ ASSERT_EQ(0, rename(file2_s1d3, file1_s1d3));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ /* Denies because of missing source MAKE_REG and destination REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d3, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ /* Denies because of missing source MAKE_REG and REFER. */
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+static void
+reparent_exdev_layers_enforce1(struct __test_metadata *const _metadata)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ /* Interesting for the layer2 tests. */
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_REG,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_MAKE_REG | LANDLOCK_ACCESS_FS_REFER, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+static void
+reparent_exdev_layers_enforce2(struct __test_metadata *const _metadata)
+{
+ const struct rule layer2[] = {
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+ {},
+ };
+ /*
+ * Same checks as before but with a second layer and a new MAKE_DIR
+ * rule (and no explicit handling of REFER).
+ */
+ const int ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_MAKE_DIR, layer2);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename1)
+{
+ ASSERT_EQ(0, unlink(file1_s2d2));
+ ASSERT_EQ(0, unlink(file1_s2d3));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is allowed by Landlock
+ * because it doesn't inherit new access rights.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is allowed, even if it
+ * gets a new inherited access rights (MAKE_REG), because MAKE_REG is
+ * already allowed for dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(0, rename(file1_s2d3, dir_s1d3));
+
+ /*
+ * However, moving the file1_s1d3 file below dir_s2d3 is allowed
+ * because it cannot inherit MAKE_REG right (which is dedicated to
+ * directories).
+ */
+ ASSERT_EQ(0, rename(file1_s1d3, file1_s2d3));
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d2 is now denied because
+ * MAKE_DIR is not tied to dir_s2d2.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Moving the dir_s1d3 directory below dir_s2d3 is forbidden because it
+ * would grants MAKE_REG and MAKE_DIR rights to it.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /*
+ * Moving the file2_s1d3 file below dir_s2d3 is denied because the
+ * second layer does not handle REFER, which is always denied by
+ * default.
+ */
+ ASSERT_EQ(-1, rename(file2_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_rename2)
+{
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /* Modify layout! */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d3));
+
+ /* Without REFER source. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks EACCES predominance over EXDEV. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ /* Checks with actual file2_s1d2. */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Modifying the layout is now denied because the second layer does not
+ * handle REFER, which is always denied by default.
+ */
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Without REFER source, EACCES wins over EXDEV. */
+ ASSERT_EQ(-1, rename(dir_s1d1, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d2));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange1)
+{
+ const char *const dir_file1_s1d2 = file1_s1d2, *const dir_file2_s2d3 =
+ file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /*
+ * Checks with directories which creation could be allowed, but denied
+ * because of access rights that would be inherited.
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks with same access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+
+ /*
+ * Checks that exchange between file and directory are consistent.
+ *
+ * Moving a file (file1_s2d2) to a directory which only grants more
+ * directory-related access rights is allowed, and at the same time
+ * moving a directory (dir_file2_s2d3) to another directory which
+ * grants less access rights is allowed too.
+ *
+ * See layout1.reparent_exdev_layers_exchange3 for inverted arguments.
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ /*
+ * However, moving back the directory is denied because it would get
+ * more access rights than the current state and because file creation
+ * is forbidden (in dir_s2d2).
+ */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Error predominance with file exchange: returns EXDEV and EACCES. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, file1_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d3, AT_FDCWD, file1_s1d1,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with directories which creation is now denied. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD,
+ dir_file2_s2d3, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD,
+ dir_file1_s1d2, RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s1d3, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Checks with different (child-only) access rights. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_s2d3, AT_FDCWD, dir_file1_s1d2,
+ RENAME_EXCHANGE));
+ /* Denied because of MAKE_DIR. */
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file1_s1d2, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* See layout1.reparent_exdev_layers_exchange2 for complement. */
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange2)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+ reparent_exdev_layers_enforce2(_metadata);
+
+ /* Checks that exchange between file and directory are consistent. */
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_exdev_layers_exchange3)
+{
+ const char *const dir_file2_s2d3 = file2_s2d3;
+
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ ASSERT_EQ(0, mkdir(file2_s2d3, 0700));
+
+ reparent_exdev_layers_enforce1(_metadata);
+
+ /*
+ * Checks that exchange between file and directory are consistent,
+ * including with inverted arguments (see
+ * layout1.reparent_exdev_layers_exchange1).
+ */
+ ASSERT_EQ(0, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_file2_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, dir_file2_s2d3, AT_FDCWD, file1_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_remove)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_REMOVE_DIR |
+ LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Access denied because of wrong/swapped remove file/dir. */
+ ASSERT_EQ(-1, rename(file1_s1d1, dir_s2d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(dir_s2d2, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d2,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s1d1, AT_FDCWD, dir_s2d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+
+ /* Access allowed thanks to the matching rights. */
+ ASSERT_EQ(-1, rename(file1_s2d1, dir_s1d2));
+ ASSERT_EQ(EISDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d2, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(ENOTDIR, errno);
+ ASSERT_EQ(0, unlink(file1_s2d1));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d1));
+
+ /* Effectively removes a file and a directory by exchanging them. */
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(-1, renameat2(AT_FDCWD, file1_s2d2, AT_FDCWD, dir_s1d3,
+ RENAME_EXCHANGE));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, reparent_dom_superset)
+{
+ const struct rule layer1[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK,
+ },
+ {
+ .path = dir_s2d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(_metadata,
+ LANDLOCK_ACCESS_FS_REFER |
+ LANDLOCK_ACCESS_FS_EXECUTE |
+ LANDLOCK_ACCESS_FS_MAKE_SOCK |
+ LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_MAKE_FIFO,
+ layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 beneath dir_s2d3 would grant it the READ_FILE
+ * access right.
+ */
+ ASSERT_EQ(-1, rename(file1_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving file1_s1d2 should be allowed even if dir_s2d2 grants a
+ * superset of access rights compared to dir_s1d2, because file1_s1d2
+ * already has these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(file1_s1d2, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, file1_s1d2));
+
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d1));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 beneath dir_s2d3 would grant it the MAKE_FIFO access
+ * right.
+ */
+ ASSERT_EQ(-1, rename(dir_s1d3, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ /*
+ * Moving dir_s1d3 should be allowed even if dir_s2d2 grants a superset
+ * of access rights compared to dir_s1d2, because dir_s1d3 already has
+ * these access rights anyway.
+ */
+ ASSERT_EQ(0, rename(dir_s1d3, file1_s2d2));
+ ASSERT_EQ(0, rename(file1_s2d2, dir_s1d3));
+
+ /*
+ * Moving file1_s2d3 beneath dir_s1d2 is allowed, but moving it back
+ * will be denied because the new inherited access rights from dir_s1d2
+ * will be less than the destination (original) dir_s2d3. This is a
+ * sinkhole scenario where we cannot move back files or directories.
+ */
+ ASSERT_EQ(0, rename(file1_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, file1_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s2d3));
+ /*
+ * Checks similar directory one-way move: dir_s2d3 loses EXECUTE and
+ * MAKE_SOCK which were inherited from dir_s1d3.
+ */
+ ASSERT_EQ(0, rename(dir_s2d3, file2_s1d2));
+ ASSERT_EQ(-1, rename(file2_s1d2, dir_s2d3));
+ ASSERT_EQ(EXDEV, errno);
+}
+
+TEST_F_FORK(layout1, remove_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(0, rmdir(dir_s1d3));
+ ASSERT_EQ(0, mkdir(dir_s1d3, 0700));
+ ASSERT_EQ(0, unlinkat(AT_FDCWD, dir_s1d3, AT_REMOVEDIR));
+
+ /* dir_s1d2 itself cannot be removed. */
+ ASSERT_EQ(-1, rmdir(dir_s1d2));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d2, AT_REMOVEDIR));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rmdir(dir_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, dir_s1d1, AT_REMOVEDIR));
+ ASSERT_EQ(EACCES, errno);
+}
+
+TEST_F_FORK(layout1, remove_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_REMOVE_FILE,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, unlink(file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, unlinkat(AT_FDCWD, file1_s1d1, 0));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlinkat(AT_FDCWD, file1_s1d3, 0));
+}
+
+static void test_make_file(struct __test_metadata *const _metadata,
+ const __u64 access, const mode_t mode,
+ const dev_t dev)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = access,
+ },
+ {},
+ };
+ const int ruleset_fd = create_ruleset(_metadata, access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file2_s1d1));
+ ASSERT_EQ(0, mknod(file2_s1d1, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file2_s1d1,
+ strerror(errno));
+ };
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, mknod(file1_s1d1, mode | 0400, dev));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, mknod(file1_s1d2, mode | 0400, dev))
+ {
+ TH_LOG("Failed to make file \"%s\": %s", file1_s1d2,
+ strerror(errno));
+ };
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
+
+ ASSERT_EQ(0, mknod(file1_s1d3, mode | 0400, dev));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
+}
+
+TEST_F_FORK(layout1, make_char)
+{
+ /* Creates a /dev/null device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_CHAR, S_IFCHR,
+ makedev(1, 3));
+}
+
+TEST_F_FORK(layout1, make_block)
+{
+ /* Creates a /dev/loop0 device. */
+ set_cap(_metadata, CAP_MKNOD);
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_BLOCK, S_IFBLK,
+ makedev(7, 0));
+}
+
+TEST_F_FORK(layout1, make_reg_1)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, S_IFREG, 0);
+}
+
+TEST_F_FORK(layout1, make_reg_2)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_REG, 0, 0);
+}
+
+TEST_F_FORK(layout1, make_sock)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_SOCK, S_IFSOCK, 0);
+}
+
+TEST_F_FORK(layout1, make_fifo)
+{
+ test_make_file(_metadata, LANDLOCK_ACCESS_FS_MAKE_FIFO, S_IFIFO, 0);
+}
+
+TEST_F_FORK(layout1, make_sym)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_SYM,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file2_s1d1));
+ ASSERT_EQ(0, symlink("none", file2_s1d1));
+
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+
+ ASSERT_EQ(0, unlink(file1_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(-1, symlink("none", file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, link(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(-1, rename(file2_s1d1, file1_s1d1));
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, symlink("none", file1_s1d2));
+ ASSERT_EQ(0, link(file1_s1d2, file2_s1d2));
+ ASSERT_EQ(0, unlink(file2_s1d2));
+ ASSERT_EQ(0, rename(file1_s1d2, file2_s1d2));
+
+ ASSERT_EQ(0, symlink("none", file1_s1d3));
+ ASSERT_EQ(0, link(file1_s1d3, file2_s1d3));
+ ASSERT_EQ(0, unlink(file2_s1d3));
+ ASSERT_EQ(0, rename(file1_s1d3, file2_s1d3));
+}
+
+TEST_F_FORK(layout1, make_dir)
+{
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_MAKE_DIR,
+ },
+ {},
+ };
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, unlink(file1_s1d1));
+ ASSERT_EQ(0, unlink(file1_s1d2));
+ ASSERT_EQ(0, unlink(file1_s1d3));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Uses file_* as directory names. */
+ ASSERT_EQ(-1, mkdir(file1_s1d1, 0700));
+ ASSERT_EQ(EACCES, errno);
+ ASSERT_EQ(0, mkdir(file1_s1d2, 0700));
+ ASSERT_EQ(0, mkdir(file1_s1d3, 0700));
+}
+
+static int open_proc_fd(struct __test_metadata *const _metadata, const int fd,
+ const int open_flags)
+{
+ static const char path_template[] = "/proc/self/fd/%d";
+ char procfd_path[sizeof(path_template) + 10];
+ const int procfd_path_size =
+ snprintf(procfd_path, sizeof(procfd_path), path_template, fd);
+
+ ASSERT_LT(procfd_path_size, sizeof(procfd_path));
+ return open(procfd_path, open_flags);
+}
+
+TEST_F_FORK(layout1, proc_unlinked_file)
+{
+ const struct rule rules[] = {
+ {
+ .path = file1_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ int reg_fd, proc_fd;
+ const int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_READ_FILE | LANDLOCK_ACCESS_FS_WRITE_FILE,
+ rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ reg_fd = open(file1_s1d2, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, reg_fd);
+ ASSERT_EQ(0, unlink(file1_s1d2));
+
+ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ ASSERT_EQ(0, close(proc_fd));
+
+ proc_fd = open_proc_fd(_metadata, reg_fd, O_RDWR | O_CLOEXEC);
+ ASSERT_EQ(-1, proc_fd)
+ {
+ TH_LOG("Successfully opened /proc/self/fd/%d: %s", reg_fd,
+ strerror(errno));
+ }
+ ASSERT_EQ(EACCES, errno);
+
+ ASSERT_EQ(0, close(reg_fd));
+}
+
+TEST_F_FORK(layout1, proc_pipe)
+{
+ int proc_fd;
+ int pipe_fds[2];
+ char buf = '\0';
+ const struct rule rules[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ /* Limits read and write access to files tied to the filesystem. */
+ const int ruleset_fd =
+ create_ruleset(_metadata, rules[0].access, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks enforcement for normal files. */
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDWR));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDWR));
+
+ /* Checks access to pipes through FD. */
+ ASSERT_EQ(0, pipe2(pipe_fds, O_CLOEXEC));
+ ASSERT_EQ(1, write(pipe_fds[1], ".", 1))
+ {
+ TH_LOG("Failed to write in pipe: %s", strerror(errno));
+ }
+ ASSERT_EQ(1, read(pipe_fds[0], &buf, 1));
+ ASSERT_EQ('.', buf);
+
+ /* Checks write access to pipe through /proc/self/fd . */
+ proc_fd = open_proc_fd(_metadata, pipe_fds[1], O_WRONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ ASSERT_EQ(1, write(proc_fd, ".", 1))
+ {
+ TH_LOG("Failed to write through /proc/self/fd/%d: %s",
+ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+ /* Checks read access to pipe through /proc/self/fd . */
+ proc_fd = open_proc_fd(_metadata, pipe_fds[0], O_RDONLY | O_CLOEXEC);
+ ASSERT_LE(0, proc_fd);
+ buf = '\0';
+ ASSERT_EQ(1, read(proc_fd, &buf, 1))
+ {
+ TH_LOG("Failed to read through /proc/self/fd/%d: %s",
+ pipe_fds[1], strerror(errno));
+ }
+ ASSERT_EQ(0, close(proc_fd));
+
+ ASSERT_EQ(0, close(pipe_fds[0]));
+ ASSERT_EQ(0, close(pipe_fds[1]));
+}
+
+/* Invokes truncate(2) and returns its errno or 0. */
+static int test_truncate(const char *const path)
+{
+ if (truncate(path, 10) < 0)
+ return errno;
+ return 0;
+}
+
+/*
+ * Invokes creat(2) and returns its errno or 0.
+ * Closes the opened file descriptor on success.
+ */
+static int test_creat(const char *const path)
+{
+ int fd = creat(path, 0600);
+
+ if (fd < 0)
+ return errno;
+
+ /*
+ * Mixing error codes from close(2) and creat(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) < 0)
+ return errno;
+ return 0;
+}
+
+/*
+ * Exercises file truncation when it's not restricted,
+ * as it was the case before LANDLOCK_ACCESS_FS_TRUNCATE existed.
+ */
+TEST_F_FORK(layout1, truncate_unhandled)
+{
+ const char *const file_r = file1_s1d1;
+ const char *const file_w = file2_s1d1;
+ const char *const file_none = file1_s1d2;
+ const struct rule rules[] = {
+ {
+ .path = file_r,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = file_w,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ /* Implicitly: No rights for file_none. */
+ {},
+ };
+
+ const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ int ruleset_fd;
+
+ /* Enable Landlock. */
+ ruleset_fd = create_ruleset(_metadata, handled, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /*
+ * Checks read right: truncate and open with O_TRUNC work, unless the
+ * file is attempted to be opened for writing.
+ */
+ EXPECT_EQ(0, test_truncate(file_r));
+ EXPECT_EQ(0, test_open(file_r, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_r, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_creat(file_r));
+
+ /*
+ * Checks write right: truncate and open with O_TRUNC work, unless the
+ * file is attempted to be opened for reading.
+ */
+ EXPECT_EQ(0, test_truncate(file_w));
+ EXPECT_EQ(EACCES, test_open(file_w, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(0, test_open(file_w, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(0, test_creat(file_w));
+
+ /*
+ * Checks "no rights" case: truncate works but all open attempts fail,
+ * including creat.
+ */
+ EXPECT_EQ(0, test_truncate(file_none));
+ EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_creat(file_none));
+}
+
+TEST_F_FORK(layout1, truncate)
+{
+ const char *const file_rwt = file1_s1d1;
+ const char *const file_rw = file2_s1d1;
+ const char *const file_rt = file1_s1d2;
+ const char *const file_t = file2_s1d2;
+ const char *const file_none = file1_s1d3;
+ const char *const dir_t = dir_s2d1;
+ const char *const file_in_dir_t = file1_s2d1;
+ const char *const dir_w = dir_s3d1;
+ const char *const file_in_dir_w = file1_s3d1;
+ const struct rule rules[] = {
+ {
+ .path = file_rwt,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = file_rw,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = file_rt,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = file_t,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ /* Implicitly: No access rights for file_none. */
+ {
+ .path = dir_t,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {
+ .path = dir_w,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const __u64 handled = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE |
+ LANDLOCK_ACCESS_FS_TRUNCATE;
+ int ruleset_fd;
+
+ /* Enable Landlock. */
+ ruleset_fd = create_ruleset(_metadata, handled, rules);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks read, write and truncate rights: truncation works. */
+ EXPECT_EQ(0, test_truncate(file_rwt));
+ EXPECT_EQ(0, test_open(file_rwt, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(0, test_open(file_rwt, O_WRONLY | O_TRUNC));
+
+ /* Checks read and write rights: no truncate variant works. */
+ EXPECT_EQ(EACCES, test_truncate(file_rw));
+ EXPECT_EQ(EACCES, test_open(file_rw, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_rw, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks read and truncate rights: truncation works.
+ *
+ * Note: Files can get truncated using open() even with O_RDONLY.
+ */
+ EXPECT_EQ(0, test_truncate(file_rt));
+ EXPECT_EQ(0, test_open(file_rt, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_rt, O_WRONLY | O_TRUNC));
+
+ /* Checks truncate right: truncate works, but can't open file. */
+ EXPECT_EQ(0, test_truncate(file_t));
+ EXPECT_EQ(EACCES, test_open(file_t, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_t, O_WRONLY | O_TRUNC));
+
+ /* Checks "no rights" case: No form of truncation works. */
+ EXPECT_EQ(EACCES, test_truncate(file_none));
+ EXPECT_EQ(EACCES, test_open(file_none, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_none, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks truncate right on directory: truncate works on contained
+ * files.
+ */
+ EXPECT_EQ(0, test_truncate(file_in_dir_t));
+ EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_RDONLY | O_TRUNC));
+ EXPECT_EQ(EACCES, test_open(file_in_dir_t, O_WRONLY | O_TRUNC));
+
+ /*
+ * Checks creat in dir_w: This requires the truncate right when
+ * overwriting an existing file, but does not require it when the file
+ * is new.
+ */
+ EXPECT_EQ(EACCES, test_creat(file_in_dir_w));
+
+ ASSERT_EQ(0, unlink(file_in_dir_w));
+ EXPECT_EQ(0, test_creat(file_in_dir_w));
+}
+
+/* Invokes ftruncate(2) and returns its errno or 0. */
+static int test_ftruncate(int fd)
+{
+ if (ftruncate(fd, 10) < 0)
+ return errno;
+ return 0;
+}
+
+TEST_F_FORK(layout1, ftruncate)
+{
+ /*
+ * This test opens a new file descriptor at different stages of
+ * Landlock restriction:
+ *
+ * without restriction: ftruncate works
+ * something else but truncate restricted: ftruncate works
+ * truncate restricted and permitted: ftruncate works
+ * truncate restricted and not permitted: ftruncate fails
+ *
+ * Whether this works or not is expected to depend on the time when the
+ * FD was opened, not to depend on the time when ftruncate() was
+ * called.
+ */
+ const char *const path = file1_s1d1;
+ const __u64 handled1 = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ const struct rule layer1[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const __u64 handled2 = LANDLOCK_ACCESS_FS_TRUNCATE;
+ const struct rule layer2[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_TRUNCATE,
+ },
+ {},
+ };
+ const __u64 handled3 = LANDLOCK_ACCESS_FS_TRUNCATE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE;
+ const struct rule layer3[] = {
+ {
+ .path = path,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int fd_layer0, fd_layer1, fd_layer2, fd_layer3, ruleset_fd;
+
+ fd_layer0 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+
+ ruleset_fd = create_ruleset(_metadata, handled1, layer1);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer1 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+
+ ruleset_fd = create_ruleset(_metadata, handled2, layer2);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer2 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+ EXPECT_EQ(0, test_ftruncate(fd_layer2));
+
+ ruleset_fd = create_ruleset(_metadata, handled3, layer3);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd_layer3 = open(path, O_WRONLY);
+ EXPECT_EQ(0, test_ftruncate(fd_layer0));
+ EXPECT_EQ(0, test_ftruncate(fd_layer1));
+ EXPECT_EQ(0, test_ftruncate(fd_layer2));
+ EXPECT_EQ(EACCES, test_ftruncate(fd_layer3));
+
+ ASSERT_EQ(0, close(fd_layer0));
+ ASSERT_EQ(0, close(fd_layer1));
+ ASSERT_EQ(0, close(fd_layer2));
+ ASSERT_EQ(0, close(fd_layer3));
+}
+
+/* clang-format off */
+FIXTURE(ftruncate) {};
+/* clang-format on */
+
+FIXTURE_SETUP(ftruncate)
+{
+ prepare_layout(_metadata);
+ create_file(_metadata, file1_s1d1);
+}
+
+FIXTURE_TEARDOWN(ftruncate)
+{
+ EXPECT_EQ(0, remove_path(file1_s1d1));
+ cleanup_layout(_metadata);
+}
+
+FIXTURE_VARIANT(ftruncate)
+{
+ const __u64 handled;
+ const __u64 allowed;
+ const int expected_open_result;
+ const int expected_ftruncate_result;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, w_w) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, t_t) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_w) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = EACCES,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_wt) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = 0,
+ .expected_ftruncate_result = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ftruncate, wt_t) {
+ /* clang-format on */
+ .handled = LANDLOCK_ACCESS_FS_WRITE_FILE | LANDLOCK_ACCESS_FS_TRUNCATE,
+ .allowed = LANDLOCK_ACCESS_FS_TRUNCATE,
+ .expected_open_result = EACCES,
+};
+
+TEST_F_FORK(ftruncate, open_and_ftruncate)
+{
+ const char *const path = file1_s1d1;
+ const struct rule rules[] = {
+ {
+ .path = path,
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int fd, ruleset_fd;
+
+ /* Enable Landlock. */
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open(path, O_WRONLY);
+ EXPECT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0));
+ if (fd >= 0) {
+ EXPECT_EQ(variant->expected_ftruncate_result,
+ test_ftruncate(fd));
+ ASSERT_EQ(0, close(fd));
+ }
+}
+
+TEST_F_FORK(ftruncate, open_and_ftruncate_in_different_processes)
+{
+ int child, fd, status;
+ int socket_fds[2];
+
+ ASSERT_EQ(0, socketpair(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0,
+ socket_fds));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ /*
+ * Enables Landlock in the child process, open a file descriptor
+ * where truncation is forbidden and send it to the
+ * non-landlocked parent process.
+ */
+ const char *const path = file1_s1d1;
+ const struct rule rules[] = {
+ {
+ .path = path,
+ .access = variant->allowed,
+ },
+ {},
+ };
+ int fd, ruleset_fd;
+
+ ruleset_fd = create_ruleset(_metadata, variant->handled, rules);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ fd = open(path, O_WRONLY);
+ ASSERT_EQ(variant->expected_open_result, (fd < 0 ? errno : 0));
+
+ if (fd >= 0) {
+ ASSERT_EQ(0, send_fd(socket_fds[0], fd));
+ ASSERT_EQ(0, close(fd));
+ }
+
+ ASSERT_EQ(0, close(socket_fds[0]));
+
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ if (variant->expected_open_result == 0) {
+ fd = recv_fd(socket_fds[1]);
+ ASSERT_LE(0, fd);
+
+ EXPECT_EQ(variant->expected_ftruncate_result,
+ test_ftruncate(fd));
+ ASSERT_EQ(0, close(fd));
+ }
+
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFEXITED(status));
+ ASSERT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ ASSERT_EQ(0, close(socket_fds[0]));
+ ASSERT_EQ(0, close(socket_fds[1]));
+}
+
+TEST(memfd_ftruncate)
+{
+ int fd;
+
+ fd = memfd_create("name", MFD_CLOEXEC);
+ ASSERT_LE(0, fd);
+
+ /*
+ * Checks that ftruncate is permitted on file descriptors that are
+ * created in ways other than open(2).
+ */
+ EXPECT_EQ(0, test_ftruncate(fd));
+
+ ASSERT_EQ(0, close(fd));
+}
+
+/* clang-format off */
+FIXTURE(layout1_bind) {};
+/* clang-format on */
+
+FIXTURE_SETUP(layout1_bind)
+{
+ prepare_layout(_metadata);
+
+ create_layout1(_metadata);
+
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount(dir_s1d2, dir_s2d2, NULL, MS_BIND, NULL));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN(layout1_bind)
+{
+ /* umount(dir_s2d2)) is handled by namespace lifetime. */
+
+ remove_layout1(_metadata);
+
+ cleanup_layout(_metadata);
+}
+
+static const char bind_dir_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3";
+static const char bind_file1_s1d3[] = TMP_DIR "/s2d1/s2d2/s1d3/f1";
+
+/*
+ * layout1_bind hierarchy:
+ *
+ * tmp
+ * ├── s1d1
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d2
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3
+ * │   ├── f1
+ * │   └── f2
+ * ├── s2d1
+ * │   ├── f1
+ * │   └── s2d2
+ * │   ├── f1
+ * │   ├── f2
+ * │   └── s1d3
+ * │   ├── f1
+ * │   └── f2
+ * └── s3d1
+ * └── s3d2
+ * └── s3d3
+ */
+
+TEST_F_FORK(layout1_bind, no_restriction)
+{
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY));
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(ENOENT, test_open(dir_s2d3, O_RDONLY));
+ ASSERT_EQ(ENOENT, test_open(file1_s2d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY));
+ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(dir_s3d1, O_RDONLY));
+}
+
+TEST_F_FORK(layout1_bind, same_content_same_file)
+{
+ /*
+ * Sets access right on parent directories of both source and
+ * destination mount points.
+ */
+ const struct rule layer1_parent[] = {
+ {
+ .path = dir_s1d1,
+ .access = ACCESS_RO,
+ },
+ {
+ .path = dir_s2d1,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /*
+ * Sets access rights on the same bind-mounted directories. The result
+ * should be ACCESS_RW for both directories, but not both hierarchies
+ * because of the first layer.
+ */
+ const struct rule layer2_mount_point[] = {
+ {
+ .path = dir_s1d2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = dir_s2d2,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Only allow read-access to the s1d3 hierarchies. */
+ const struct rule layer3_source[] = {
+ {
+ .path = dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ /* Removes all access rights. */
+ const struct rule layer4_destination[] = {
+ {
+ .path = bind_file1_s1d3,
+ .access = LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ /* Sets rules for the parent directories. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_parent);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(0, test_open(file1_s2d1, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+
+ /* Sets rules for the mount points. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_mount_point);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(0, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s2d1, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s2d1, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s2d2, O_RDWR));
+ ASSERT_EQ(0, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+ ASSERT_EQ(0, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Sets a (shared) rule only on the source. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_source);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d2, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s2d2, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(dir_s2d2, O_RDONLY | O_DIRECTORY));
+
+ ASSERT_EQ(0, test_open(bind_file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
+ ASSERT_EQ(EACCES, test_open(bind_dir_s1d3, O_RDONLY | O_DIRECTORY));
+
+ /* Sets a (shared) rule only on the destination. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_destination);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks source hierarchy. */
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(file1_s1d3, O_WRONLY));
+
+ /* Checks destination hierarchy. */
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(bind_file1_s1d3, O_WRONLY));
+}
+
+TEST_F_FORK(layout1_bind, reparent_cross_mount)
+{
+ const struct rule layer1[] = {
+ {
+ /* dir_s2d1 is beneath the dir_s2d2 mount point. */
+ .path = dir_s2d1,
+ .access = LANDLOCK_ACCESS_FS_REFER,
+ },
+ {
+ .path = bind_dir_s1d3,
+ .access = LANDLOCK_ACCESS_FS_EXECUTE,
+ },
+ {},
+ };
+ int ruleset_fd = create_ruleset(
+ _metadata,
+ LANDLOCK_ACCESS_FS_REFER | LANDLOCK_ACCESS_FS_EXECUTE, layer1);
+
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks basic denied move. */
+ ASSERT_EQ(-1, rename(file1_s1d1, file1_s1d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks real cross-mount move (Landlock is not involved). */
+ ASSERT_EQ(-1, rename(file1_s2d1, file1_s2d2));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks move that will give more accesses. */
+ ASSERT_EQ(-1, rename(file1_s2d2, bind_file1_s1d3));
+ ASSERT_EQ(EXDEV, errno);
+
+ /* Checks legitimate downgrade move. */
+ ASSERT_EQ(0, rename(bind_file1_s1d3, file1_s2d2));
+}
+
+#define LOWER_BASE TMP_DIR "/lower"
+#define LOWER_DATA LOWER_BASE "/data"
+static const char lower_fl1[] = LOWER_DATA "/fl1";
+static const char lower_dl1[] = LOWER_DATA "/dl1";
+static const char lower_dl1_fl2[] = LOWER_DATA "/dl1/fl2";
+static const char lower_fo1[] = LOWER_DATA "/fo1";
+static const char lower_do1[] = LOWER_DATA "/do1";
+static const char lower_do1_fo2[] = LOWER_DATA "/do1/fo2";
+static const char lower_do1_fl3[] = LOWER_DATA "/do1/fl3";
+
+static const char (*lower_base_files[])[] = {
+ &lower_fl1,
+ &lower_fo1,
+ NULL,
+};
+static const char (*lower_base_directories[])[] = {
+ &lower_dl1,
+ &lower_do1,
+ NULL,
+};
+static const char (*lower_sub_files[])[] = {
+ &lower_dl1_fl2,
+ &lower_do1_fo2,
+ &lower_do1_fl3,
+ NULL,
+};
+
+#define UPPER_BASE TMP_DIR "/upper"
+#define UPPER_DATA UPPER_BASE "/data"
+#define UPPER_WORK UPPER_BASE "/work"
+static const char upper_fu1[] = UPPER_DATA "/fu1";
+static const char upper_du1[] = UPPER_DATA "/du1";
+static const char upper_du1_fu2[] = UPPER_DATA "/du1/fu2";
+static const char upper_fo1[] = UPPER_DATA "/fo1";
+static const char upper_do1[] = UPPER_DATA "/do1";
+static const char upper_do1_fo2[] = UPPER_DATA "/do1/fo2";
+static const char upper_do1_fu3[] = UPPER_DATA "/do1/fu3";
+
+static const char (*upper_base_files[])[] = {
+ &upper_fu1,
+ &upper_fo1,
+ NULL,
+};
+static const char (*upper_base_directories[])[] = {
+ &upper_du1,
+ &upper_do1,
+ NULL,
+};
+static const char (*upper_sub_files[])[] = {
+ &upper_du1_fu2,
+ &upper_do1_fo2,
+ &upper_do1_fu3,
+ NULL,
+};
+
+#define MERGE_BASE TMP_DIR "/merge"
+#define MERGE_DATA MERGE_BASE "/data"
+static const char merge_fl1[] = MERGE_DATA "/fl1";
+static const char merge_dl1[] = MERGE_DATA "/dl1";
+static const char merge_dl1_fl2[] = MERGE_DATA "/dl1/fl2";
+static const char merge_fu1[] = MERGE_DATA "/fu1";
+static const char merge_du1[] = MERGE_DATA "/du1";
+static const char merge_du1_fu2[] = MERGE_DATA "/du1/fu2";
+static const char merge_fo1[] = MERGE_DATA "/fo1";
+static const char merge_do1[] = MERGE_DATA "/do1";
+static const char merge_do1_fo2[] = MERGE_DATA "/do1/fo2";
+static const char merge_do1_fl3[] = MERGE_DATA "/do1/fl3";
+static const char merge_do1_fu3[] = MERGE_DATA "/do1/fu3";
+
+static const char (*merge_base_files[])[] = {
+ &merge_fl1,
+ &merge_fu1,
+ &merge_fo1,
+ NULL,
+};
+static const char (*merge_base_directories[])[] = {
+ &merge_dl1,
+ &merge_du1,
+ &merge_do1,
+ NULL,
+};
+static const char (*merge_sub_files[])[] = {
+ &merge_dl1_fl2, &merge_du1_fu2, &merge_do1_fo2,
+ &merge_do1_fl3, &merge_do1_fu3, NULL,
+};
+
+/*
+ * layout2_overlay hierarchy:
+ *
+ * tmp
+ * ├── lower
+ * │   └── data
+ * │   ├── dl1
+ * │   │   └── fl2
+ * │   ├── do1
+ * │   │   ├── fl3
+ * │   │   └── fo2
+ * │   ├── fl1
+ * │   └── fo1
+ * ├── merge
+ * │   └── data
+ * │   ├── dl1
+ * │   │   └── fl2
+ * │   ├── do1
+ * │   │   ├── fl3
+ * │   │   ├── fo2
+ * │   │   └── fu3
+ * │   ├── du1
+ * │   │   └── fu2
+ * │   ├── fl1
+ * │   ├── fo1
+ * │   └── fu1
+ * └── upper
+ * ├── data
+ * │   ├── do1
+ * │   │   ├── fo2
+ * │   │   └── fu3
+ * │   ├── du1
+ * │   │   └── fu2
+ * │   ├── fo1
+ * │   └── fu1
+ * └── work
+ * └── work
+ */
+
+FIXTURE(layout2_overlay)
+{
+ bool skip_test;
+};
+
+FIXTURE_SETUP(layout2_overlay)
+{
+ if (!supports_filesystem("overlay")) {
+ self->skip_test = true;
+ SKIP(return, "overlayfs is not supported (setup)");
+ }
+
+ prepare_layout(_metadata);
+
+ create_directory(_metadata, LOWER_BASE);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ /* Creates tmpfs mount points to get deterministic overlayfs. */
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, LOWER_BASE));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ create_file(_metadata, lower_fl1);
+ create_file(_metadata, lower_dl1_fl2);
+ create_file(_metadata, lower_fo1);
+ create_file(_metadata, lower_do1_fo2);
+ create_file(_metadata, lower_do1_fl3);
+
+ create_directory(_metadata, UPPER_BASE);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, UPPER_BASE));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+ create_file(_metadata, upper_fu1);
+ create_file(_metadata, upper_du1_fu2);
+ create_file(_metadata, upper_fo1);
+ create_file(_metadata, upper_do1_fo2);
+ create_file(_metadata, upper_do1_fu3);
+ ASSERT_EQ(0, mkdir(UPPER_WORK, 0700));
+
+ create_directory(_metadata, MERGE_DATA);
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ ASSERT_EQ(0, mount("overlay", MERGE_DATA, "overlay", 0,
+ "lowerdir=" LOWER_DATA ",upperdir=" UPPER_DATA
+ ",workdir=" UPPER_WORK));
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+}
+
+FIXTURE_TEARDOWN(layout2_overlay)
+{
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (teardown)");
+
+ EXPECT_EQ(0, remove_path(lower_do1_fl3));
+ EXPECT_EQ(0, remove_path(lower_dl1_fl2));
+ EXPECT_EQ(0, remove_path(lower_fl1));
+ EXPECT_EQ(0, remove_path(lower_do1_fo2));
+ EXPECT_EQ(0, remove_path(lower_fo1));
+
+ /* umount(LOWER_BASE)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(LOWER_BASE));
+
+ EXPECT_EQ(0, remove_path(upper_do1_fu3));
+ EXPECT_EQ(0, remove_path(upper_du1_fu2));
+ EXPECT_EQ(0, remove_path(upper_fu1));
+ EXPECT_EQ(0, remove_path(upper_do1_fo2));
+ EXPECT_EQ(0, remove_path(upper_fo1));
+ EXPECT_EQ(0, remove_path(UPPER_WORK "/work"));
+
+ /* umount(UPPER_BASE)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(UPPER_BASE));
+
+ /* umount(MERGE_DATA)) is handled by namespace lifetime. */
+ EXPECT_EQ(0, remove_path(MERGE_DATA));
+
+ cleanup_layout(_metadata);
+}
+
+TEST_F_FORK(layout2_overlay, no_restriction)
+{
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
+
+ ASSERT_EQ(0, test_open(lower_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_dl1_fl2, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(lower_do1_fl3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(upper_fu1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_du1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_du1_fu2, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(upper_do1_fu3, O_RDONLY));
+
+ ASSERT_EQ(0, test_open(merge_fl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_dl1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_dl1_fl2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_fu1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_du1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_du1_fu2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_fo1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fo2, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fl3, O_RDONLY));
+ ASSERT_EQ(0, test_open(merge_do1_fu3, O_RDONLY));
+}
+
+#define for_each_path(path_list, path_entry, i) \
+ for (i = 0, path_entry = *path_list[i]; path_list[i]; \
+ path_entry = *path_list[++i])
+
+TEST_F_FORK(layout2_overlay, same_content_different_file)
+{
+ /* Sets access right on parent directories of both layers. */
+ const struct rule layer1_base[] = {
+ {
+ .path = LOWER_BASE,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = UPPER_BASE,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = MERGE_BASE,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ const struct rule layer2_data[] = {
+ {
+ .path = LOWER_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = UPPER_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = MERGE_DATA,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Sets access right on directories inside both layers. */
+ const struct rule layer3_subdirs[] = {
+ {
+ .path = lower_dl1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_du1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = merge_dl1,
+ .access = ACCESS_RW,
+ },
+ {
+ .path = merge_du1,
+ .access = ACCESS_RW,
+ },
+ {
+ .path = merge_do1,
+ .access = ACCESS_RW,
+ },
+ {},
+ };
+ /* Tighten access rights to the files. */
+ const struct rule layer4_files[] = {
+ {
+ .path = lower_dl1_fl2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = lower_do1_fl3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_du1_fu2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = upper_do1_fu3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {
+ .path = merge_dl1_fl2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_du1_fu2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fo2,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fl3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {
+ .path = merge_do1_fu3,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ const struct rule layer5_merge_only[] = {
+ {
+ .path = MERGE_DATA,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE |
+ LANDLOCK_ACCESS_FS_WRITE_FILE,
+ },
+ {},
+ };
+ int ruleset_fd;
+ size_t i;
+ const char *path_entry;
+
+ if (self->skip_test)
+ SKIP(return, "overlayfs is not supported (test)");
+
+ /* Sets rules on base directories (i.e. outside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer1_base);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks lower layer. */
+ for_each_path(lower_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(lower_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks upper layer. */
+ for_each_path(upper_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ for_each_path(upper_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /*
+ * Checks that access rights are independent from the lower and upper
+ * layers: write access to upper files viewed through the merge point
+ * is still allowed, and write access to lower file viewed (and copied)
+ * through the merge point is still allowed.
+ */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Sets rules on data directories (i.e. inside overlay scope). */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer2_data);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks merge. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Same checks with tighter rules. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer3_subdirs);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks changes for lower layer. */
+ for_each_path(lower_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks changes for upper layer. */
+ for_each_path(upper_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Sets rules directly on overlayed files. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer4_files);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks unchanged accesses on lower layer. */
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks unchanged accesses on upper layer. */
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDONLY));
+ ASSERT_EQ(EACCES, test_open(path_entry, O_WRONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+
+ /* Only allowes access to the merge hierarchy. */
+ ruleset_fd = create_ruleset(_metadata, ACCESS_RW, layer5_merge_only);
+ ASSERT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks new accesses on lower layer. */
+ for_each_path(lower_sub_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks new accesses on upper layer. */
+ for_each_path(upper_sub_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDONLY));
+ }
+ /* Checks all merge accesses. */
+ for_each_path(merge_base_files, path_entry, i) {
+ ASSERT_EQ(EACCES, test_open(path_entry, O_RDWR));
+ }
+ for_each_path(merge_base_directories, path_entry, i) {
+ ASSERT_EQ(EACCES,
+ test_open(path_entry, O_RDONLY | O_DIRECTORY));
+ }
+ for_each_path(merge_sub_files, path_entry, i) {
+ ASSERT_EQ(0, test_open(path_entry, O_RDWR));
+ }
+}
+
+FIXTURE(layout3_fs)
+{
+ bool has_created_dir;
+ bool has_created_file;
+ char *dir_path;
+ bool skip_test;
+};
+
+FIXTURE_VARIANT(layout3_fs)
+{
+ const struct mnt_opt mnt;
+ const char *const file_path;
+ unsigned int cwd_fs_magic;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(layout3_fs, tmpfs) {
+ /* clang-format on */
+ .mnt = {
+ .type = "tmpfs",
+ .data = MNT_TMP_DATA,
+ },
+ .file_path = file1_s1d1,
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, ramfs) {
+ .mnt = {
+ .type = "ramfs",
+ .data = "mode=700",
+ },
+ .file_path = TMP_DIR "/dir/file",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, cgroup2) {
+ .mnt = {
+ .type = "cgroup2",
+ },
+ .file_path = TMP_DIR "/test/cgroup.procs",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, proc) {
+ .mnt = {
+ .type = "proc",
+ },
+ .file_path = TMP_DIR "/self/status",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, sysfs) {
+ .mnt = {
+ .type = "sysfs",
+ },
+ .file_path = TMP_DIR "/kernel/notes",
+};
+
+FIXTURE_VARIANT_ADD(layout3_fs, hostfs) {
+ .mnt = {
+ .source = TMP_DIR,
+ .flags = MS_BIND,
+ },
+ .file_path = TMP_DIR "/dir/file",
+ .cwd_fs_magic = HOSTFS_SUPER_MAGIC,
+};
+
+FIXTURE_SETUP(layout3_fs)
+{
+ struct stat statbuf;
+ const char *slash;
+ size_t dir_len;
+
+ if (!supports_filesystem(variant->mnt.type) ||
+ !cwd_matches_fs(variant->cwd_fs_magic)) {
+ self->skip_test = true;
+ SKIP(return, "this filesystem is not supported (setup)");
+ }
+
+ _metadata->teardown_parent = true;
+
+ slash = strrchr(variant->file_path, '/');
+ ASSERT_NE(slash, NULL);
+ dir_len = (size_t)slash - (size_t)variant->file_path;
+ ASSERT_LT(0, dir_len);
+ self->dir_path = malloc(dir_len + 1);
+ self->dir_path[dir_len] = '\0';
+ strncpy(self->dir_path, variant->file_path, dir_len);
+
+ prepare_layout_opt(_metadata, &variant->mnt);
+
+ /* Creates directory when required. */
+ if (stat(self->dir_path, &statbuf)) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ EXPECT_EQ(0, mkdir(self->dir_path, 0700))
+ {
+ TH_LOG("Failed to create directory \"%s\": %s",
+ self->dir_path, strerror(errno));
+ free(self->dir_path);
+ self->dir_path = NULL;
+ }
+ self->has_created_dir = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ /* Creates file when required. */
+ if (stat(variant->file_path, &statbuf)) {
+ int fd;
+
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ fd = creat(variant->file_path, 0600);
+ EXPECT_LE(0, fd)
+ {
+ TH_LOG("Failed to create file \"%s\": %s",
+ variant->file_path, strerror(errno));
+ }
+ EXPECT_EQ(0, close(fd));
+ self->has_created_file = true;
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+}
+
+FIXTURE_TEARDOWN(layout3_fs)
+{
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (teardown)");
+
+ if (self->has_created_file) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the file might already
+ * have been removed (cf. release_inode test).
+ */
+ unlink(variant->file_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+
+ if (self->has_created_dir) {
+ set_cap(_metadata, CAP_DAC_OVERRIDE);
+ /*
+ * Don't check for error because the directory might already
+ * have been removed (cf. release_inode test).
+ */
+ rmdir(self->dir_path);
+ clear_cap(_metadata, CAP_DAC_OVERRIDE);
+ }
+ free(self->dir_path);
+ self->dir_path = NULL;
+
+ cleanup_layout(_metadata);
+}
+
+static void layer3_fs_tag_inode(struct __test_metadata *const _metadata,
+ FIXTURE_DATA(layout3_fs) * self,
+ const FIXTURE_VARIANT(layout3_fs) * variant,
+ const char *const rule_path)
+{
+ const struct rule layer1_allow_read_file[] = {
+ {
+ .path = rule_path,
+ .access = LANDLOCK_ACCESS_FS_READ_FILE,
+ },
+ {},
+ };
+ const struct landlock_ruleset_attr layer2_deny_everything_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_FILE,
+ };
+ const char *const dev_null_path = "/dev/null";
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Checks without Landlock. */
+ EXPECT_EQ(0, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ ruleset_fd = create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_FILE,
+ layer1_allow_read_file);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(0, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+
+ /* Forbids directory reading. */
+ ruleset_fd =
+ landlock_create_ruleset(&layer2_deny_everything_attr,
+ sizeof(layer2_deny_everything_attr), 0);
+ EXPECT_LE(0, ruleset_fd);
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Checks with Landlock and forbidden access. */
+ EXPECT_EQ(EACCES, test_open(dev_null_path, O_RDONLY | O_CLOEXEC));
+ EXPECT_EQ(EACCES, test_open(variant->file_path, O_RDONLY | O_CLOEXEC));
+}
+
+/* Matrix of tests to check file hierarchy evaluation. */
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_parent)
+{
+ /* The current directory must not be the root for this test. */
+ layer3_fs_tag_inode(_metadata, self, variant, ".");
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_mnt)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, TMP_DIR);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_dir_child)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, self->dir_path);
+}
+
+TEST_F_FORK(layout3_fs, tag_inode_file)
+{
+ layer3_fs_tag_inode(_metadata, self, variant, variant->file_path);
+}
+
+/* Light version of layout1.release_inodes */
+TEST_F_FORK(layout3_fs, release_inodes)
+{
+ const struct rule layer1[] = {
+ {
+ .path = TMP_DIR,
+ .access = LANDLOCK_ACCESS_FS_READ_DIR,
+ },
+ {},
+ };
+ int ruleset_fd;
+
+ if (self->skip_test)
+ SKIP(return, "this filesystem is not supported (test)");
+
+ /* Clean up for the teardown to not fail. */
+ if (self->has_created_file)
+ EXPECT_EQ(0, remove_path(variant->file_path));
+
+ if (self->has_created_dir)
+ /* Don't check for error because of cgroup specificities. */
+ remove_path(self->dir_path);
+
+ ruleset_fd =
+ create_ruleset(_metadata, LANDLOCK_ACCESS_FS_READ_DIR, layer1);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Unmount the filesystem while it is being used by a ruleset. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, umount(TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ /* Replaces with a new mount point to simplify FIXTURE_TEARDOWN. */
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, mount_opt(&mnt_tmp, TMP_DIR));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ ASSERT_EQ(0, close(ruleset_fd));
+
+ /* Checks that access to the new mount point is denied. */
+ ASSERT_EQ(EACCES, test_open(TMP_DIR, O_RDONLY));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/net_test.c b/tools/testing/selftests/landlock/net_test.c
new file mode 100644
index 000000000000..f21cfbbc3638
--- /dev/null
+++ b/tools/testing/selftests/landlock/net_test.c
@@ -0,0 +1,1804 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Landlock tests - Network
+ *
+ * Copyright © 2022-2023 Huawei Tech. Co., Ltd.
+ * Copyright © 2023 Microsoft Corporation
+ */
+
+#define _GNU_SOURCE
+#include <arpa/inet.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <linux/in.h>
+#include <sched.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/prctl.h>
+#include <sys/socket.h>
+#include <sys/syscall.h>
+#include <sys/un.h>
+
+#include "common.h"
+
+const short sock_port_start = (1 << 10);
+
+static const char loopback_ipv4[] = "127.0.0.1";
+static const char loopback_ipv6[] = "::1";
+
+/* Number pending connections queue to be hold. */
+const short backlog = 10;
+
+enum sandbox_type {
+ NO_SANDBOX,
+ /* This may be used to test rules that allow *and* deny accesses. */
+ TCP_SANDBOX,
+};
+
+struct protocol_variant {
+ int domain;
+ int type;
+};
+
+struct service_fixture {
+ struct protocol_variant protocol;
+ /* port is also stored in ipv4_addr.sin_port or ipv6_addr.sin6_port */
+ unsigned short port;
+ union {
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ struct {
+ struct sockaddr_un unix_addr;
+ socklen_t unix_addr_len;
+ };
+ };
+};
+
+static pid_t sys_gettid(void)
+{
+ return syscall(__NR_gettid);
+}
+
+static int set_service(struct service_fixture *const srv,
+ const struct protocol_variant prot,
+ const unsigned short index)
+{
+ memset(srv, 0, sizeof(*srv));
+
+ /*
+ * Copies all protocol properties in case of the variant only contains
+ * a subset of them.
+ */
+ srv->protocol = prot;
+
+ /* Checks for port overflow. */
+ if (index > 2)
+ return 1;
+ srv->port = sock_port_start << (2 * index);
+
+ switch (prot.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_family = prot.domain;
+ srv->ipv4_addr.sin_port = htons(srv->port);
+ srv->ipv4_addr.sin_addr.s_addr = inet_addr(loopback_ipv4);
+ return 0;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_family = prot.domain;
+ srv->ipv6_addr.sin6_port = htons(srv->port);
+ inet_pton(AF_INET6, loopback_ipv6, &srv->ipv6_addr.sin6_addr);
+ return 0;
+
+ case AF_UNIX:
+ srv->unix_addr.sun_family = prot.domain;
+ sprintf(srv->unix_addr.sun_path,
+ "_selftests-landlock-net-tid%d-index%d", sys_gettid(),
+ index);
+ srv->unix_addr_len = SUN_LEN(&srv->unix_addr);
+ srv->unix_addr.sun_path[0] = '\0';
+ return 0;
+ }
+ return 1;
+}
+
+static void setup_loopback(struct __test_metadata *const _metadata)
+{
+ set_cap(_metadata, CAP_SYS_ADMIN);
+ ASSERT_EQ(0, unshare(CLONE_NEWNET));
+ clear_cap(_metadata, CAP_SYS_ADMIN);
+
+ set_ambient_cap(_metadata, CAP_NET_ADMIN);
+ ASSERT_EQ(0, system("ip link set dev lo up"));
+ clear_ambient_cap(_metadata, CAP_NET_ADMIN);
+}
+
+static bool is_restricted(const struct protocol_variant *const prot,
+ const enum sandbox_type sandbox)
+{
+ switch (prot->domain) {
+ case AF_INET:
+ case AF_INET6:
+ switch (prot->type) {
+ case SOCK_STREAM:
+ return sandbox == TCP_SANDBOX;
+ }
+ break;
+ }
+ return false;
+}
+
+static int socket_variant(const struct service_fixture *const srv)
+{
+ int ret;
+
+ ret = socket(srv->protocol.domain, srv->protocol.type | SOCK_CLOEXEC,
+ 0);
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+#ifndef SIN6_LEN_RFC2133
+#define SIN6_LEN_RFC2133 24
+#endif
+
+static socklen_t get_addrlen(const struct service_fixture *const srv,
+ const bool minimal)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ return sizeof(srv->ipv4_addr);
+
+ case AF_INET6:
+ if (minimal)
+ return SIN6_LEN_RFC2133;
+ return sizeof(srv->ipv6_addr);
+
+ case AF_UNIX:
+ if (minimal)
+ return sizeof(srv->unix_addr) -
+ sizeof(srv->unix_addr.sun_path);
+ return srv->unix_addr_len;
+
+ default:
+ return 0;
+ }
+}
+
+static void set_port(struct service_fixture *const srv, uint16_t port)
+{
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ srv->ipv4_addr.sin_port = htons(port);
+ return;
+
+ case AF_INET6:
+ srv->ipv6_addr.sin6_port = htons(port);
+ return;
+
+ default:
+ return;
+ }
+}
+
+static uint16_t get_binded_port(int socket_fd,
+ const struct protocol_variant *const prot)
+{
+ struct sockaddr_in ipv4_addr;
+ struct sockaddr_in6 ipv6_addr;
+ socklen_t ipv4_addr_len, ipv6_addr_len;
+
+ /* Gets binded port. */
+ switch (prot->domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ipv4_addr_len = sizeof(ipv4_addr);
+ getsockname(socket_fd, &ipv4_addr, &ipv4_addr_len);
+ return ntohs(ipv4_addr.sin_port);
+
+ case AF_INET6:
+ ipv6_addr_len = sizeof(ipv6_addr);
+ getsockname(socket_fd, &ipv6_addr, &ipv6_addr_len);
+ return ntohs(ipv6_addr.sin6_port);
+
+ default:
+ return 0;
+ }
+}
+
+static int bind_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = bind(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = bind(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = bind(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int bind_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return bind_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+static int connect_variant_addrlen(const int sock_fd,
+ const struct service_fixture *const srv,
+ const socklen_t addrlen)
+{
+ int ret;
+
+ switch (srv->protocol.domain) {
+ case AF_UNSPEC:
+ case AF_INET:
+ ret = connect(sock_fd, &srv->ipv4_addr, addrlen);
+ break;
+
+ case AF_INET6:
+ ret = connect(sock_fd, &srv->ipv6_addr, addrlen);
+ break;
+
+ case AF_UNIX:
+ ret = connect(sock_fd, &srv->unix_addr, addrlen);
+ break;
+
+ default:
+ errno = -EAFNOSUPPORT;
+ return -errno;
+ }
+
+ if (ret < 0)
+ return -errno;
+ return ret;
+}
+
+static int connect_variant(const int sock_fd,
+ const struct service_fixture *const srv)
+{
+ return connect_variant_addrlen(sock_fd, srv, get_addrlen(srv, false));
+}
+
+FIXTURE(protocol)
+{
+ struct service_fixture srv0, srv1, srv2, unspec_any0, unspec_srv0;
+};
+
+FIXTURE_VARIANT(protocol)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+FIXTURE_SETUP(protocol)
+{
+ const struct protocol_variant prot_unspec = {
+ .domain = AF_UNSPEC,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, variant->prot, 1));
+ ASSERT_EQ(0, set_service(&self->srv2, variant->prot, 2));
+
+ ASSERT_EQ(0, set_service(&self->unspec_srv0, prot_unspec, 0));
+
+ ASSERT_EQ(0, set_service(&self->unspec_any0, prot_unspec, 0));
+ self->unspec_any0.ipv4_addr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(protocol)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, no_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv4_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_ipv6_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_DGRAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_stream) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(protocol, tcp_sandbox_with_unix_datagram) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_UNIX,
+ .type = SOCK_DGRAM,
+ },
+};
+
+static void test_bind_and_connect(struct __test_metadata *const _metadata,
+ const struct service_fixture *const srv,
+ const bool deny_bind, const bool deny_connect)
+{
+ char buf = '\0';
+ int inval_fd, bind_fd, client_fd, status, ret;
+ pid_t child;
+
+ /* Starts invalid addrlen tests with bind. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd)
+ {
+ TH_LOG("Failed to create socket: %s", strerror(errno));
+ }
+
+ /* Tries to bind with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to bind with too small addrlen. */
+ EXPECT_EQ(-EINVAL, bind_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to bind with minimal addrlen. */
+ ret = bind_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to socket: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts invalid addrlen tests with connect. */
+ inval_fd = socket_variant(srv);
+ ASSERT_LE(0, inval_fd);
+
+ /* Tries to connect with zero as addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv, 0));
+
+ /* Tries to connect with too small addrlen. */
+ EXPECT_EQ(-EINVAL, connect_variant_addrlen(inval_fd, srv,
+ get_addrlen(srv, true) - 1));
+
+ /* Tries to connect with minimal addrlen. */
+ ret = connect_variant_addrlen(inval_fd, srv, get_addrlen(srv, true));
+ if (srv->protocol.domain == AF_UNIX) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (srv->protocol.type == SOCK_STREAM) {
+ /* No listening server, whatever the value of deny_bind. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to connect to socket: %s",
+ strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(inval_fd));
+
+ /* Starts connection tests. */
+ bind_fd = socket_variant(srv);
+ ASSERT_LE(0, bind_fd);
+
+ ret = bind_variant(bind_fd, srv);
+ if (deny_bind) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+
+ /* Creates a listening socket. */
+ if (srv->protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Starts connection tests. */
+ connect_fd = socket_variant(srv);
+ ASSERT_LE(0, connect_fd);
+ ret = connect_variant(connect_fd, srv);
+ if (deny_connect) {
+ EXPECT_EQ(-EACCES, ret);
+ } else if (deny_bind) {
+ /* No listening server. */
+ EXPECT_EQ(-ECONNREFUSED, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(1, write(connect_fd, ".", 1));
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ /* Accepts connection from the child. */
+ client_fd = bind_fd;
+ if (!deny_bind && !deny_connect) {
+ if (srv->protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(1, read(client_fd, &buf, 1));
+ EXPECT_EQ('.', buf);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, bind)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_connect_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows connect and denies bind for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_connect_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Binds a socket to the first port. */
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ /* Binds a socket to the second port. */
+ test_bind_and_connect(_metadata, &self->srv1,
+ is_restricted(&variant->prot, variant->sandbox),
+ false);
+
+ /* Binds a socket to the third port. */
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, connect)
+{
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for the first port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ /* Allows bind and denies connect for the second port. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false, false);
+
+ test_bind_and_connect(_metadata, &self->srv1, false,
+ is_restricted(&variant->prot, variant->sandbox));
+
+ test_bind_and_connect(_metadata, &self->srv2,
+ is_restricted(&variant->prot, variant->sandbox),
+ is_restricted(&variant->prot, variant->sandbox));
+}
+
+TEST_F(protocol, bind_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, ret;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Allowed bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(0, ret)
+ {
+ TH_LOG("Failed to bind to unspec/any socket: %s",
+ strerror(errno));
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies bind. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ /* Denied bind on AF_UNSPEC/INADDR_ANY. */
+ ret = bind_variant(bind_fd, &self->unspec_any0);
+ if (variant->prot.domain == AF_INET) {
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+ } else {
+ EXPECT_EQ(-EINVAL, ret);
+ }
+ EXPECT_EQ(0, close(bind_fd));
+
+ /* Checks bind with AF_UNSPEC and the loopback address. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ ret = bind_variant(bind_fd, &self->unspec_srv0);
+ if (variant->prot.domain == AF_INET) {
+ EXPECT_EQ(-EAFNOSUPPORT, ret);
+ } else {
+ EXPECT_EQ(-EINVAL, ret)
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(protocol, connect_unspec)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int bind_fd, client_fd, status;
+ pid_t child;
+
+ /* Specific connection tests. */
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ if (self->srv0.protocol.type == SOCK_STREAM)
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ int connect_fd, ret;
+
+ /* Closes listening socket for the child. */
+ EXPECT_EQ(0, close(bind_fd));
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+ EXPECT_EQ(0, connect_variant(connect_fd, &self->srv0));
+
+ /* Tries to connect again, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd,
+ LANDLOCK_RULE_NET_PORT,
+ &tcp_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /* Disconnects already connected socket, or set peer. */
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ /* Tries to reconnect, or set peer. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EISCONN, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const int ruleset_fd = landlock_create_ruleset(
+ &ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Denies connect. */
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ ret = connect_variant(connect_fd, &self->unspec_any0);
+ if (self->srv0.protocol.domain == AF_UNIX &&
+ self->srv0.protocol.type == SOCK_STREAM) {
+ EXPECT_EQ(-EINVAL, ret);
+ } else {
+ /* Always allowed to disconnect. */
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ client_fd = bind_fd;
+ if (self->srv0.protocol.type == SOCK_STREAM) {
+ client_fd = accept(bind_fd, NULL, 0);
+ ASSERT_LE(0, client_fd);
+ }
+
+ EXPECT_EQ(child, waitpid(child, &status, 0));
+ EXPECT_EQ(1, WIFEXITED(status));
+ EXPECT_EQ(EXIT_SUCCESS, WEXITSTATUS(status));
+
+ /* Closes connection, if any. */
+ if (client_fd != bind_fd)
+ EXPECT_LE(0, close(client_fd));
+
+ /* Closes listening socket. */
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+FIXTURE(ipv4)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(ipv4)
+{
+ const enum sandbox_type sandbox;
+ const int type;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_tcp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_STREAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, no_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(ipv4, tcp_sandbox_with_udp) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .type = SOCK_DGRAM,
+};
+
+FIXTURE_SETUP(ipv4)
+{
+ const struct protocol_variant prot = {
+ .domain = AF_INET,
+ .type = variant->type,
+ };
+
+ disable_caps(_metadata);
+
+ set_service(&self->srv0, prot, 0);
+ set_service(&self->srv1, prot, 1);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4)
+{
+}
+
+TEST_F(ipv4, from_unix_to_inet)
+{
+ int unix_stream_fd, unix_dgram_fd;
+
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ /* Denies connect and bind to check errno value. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows connect and bind for srv0. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ unix_stream_fd = socket(AF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_stream_fd);
+
+ unix_dgram_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, unix_dgram_fd);
+
+ /* Checks unix stream bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv0));
+
+ /* Checks unix stream bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_stream_fd, &self->srv1))
+ {
+ TH_LOG("Wrong bind error: %s", strerror(errno));
+ }
+ EXPECT_EQ(-EINVAL, connect_variant(unix_stream_fd, &self->srv1));
+
+ /* Checks unix datagram bind and connect for srv0. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv0));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv0));
+
+ /* Checks unix datagram bind and connect for srv1. */
+ EXPECT_EQ(-EINVAL, bind_variant(unix_dgram_fd, &self->srv1));
+ EXPECT_EQ(-EINVAL, connect_variant(unix_dgram_fd, &self->srv1));
+}
+
+FIXTURE(tcp_layers)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_VARIANT(tcp_layers)
+{
+ const size_t num_layers;
+ const int domain;
+};
+
+FIXTURE_SETUP(tcp_layers)
+{
+ const struct protocol_variant prot = {
+ .domain = variant->domain,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, prot, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, prot, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(tcp_layers)
+{
+}
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv4) {
+ /* clang-format on */
+ .domain = AF_INET,
+ .num_layers = 3,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 0,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, one_sandbox_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 1,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, two_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 2,
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(tcp_layers, three_sandboxes_with_ipv6) {
+ /* clang-format on */
+ .domain = AF_INET6,
+ .num_layers = 3,
+};
+
+TEST_F(tcp_layers, ruleset_overlap)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+
+ if (variant->num_layers >= 1) {
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ /* Also allows bind, but allows connect too. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Only allows bind. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ int ruleset_fd;
+
+ /* Creates another ruleset layer. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Try to allow bind and connect. */
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ /*
+ * Forbids to connect to the socket because only one ruleset layer
+ * allows connect.
+ */
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 2);
+}
+
+TEST_F(tcp_layers, ruleset_expand)
+{
+ if (variant->num_layers >= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ /* Allows bind for srv0. */
+ const struct landlock_net_port_attr bind_srv0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_srv0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 2) {
+ /* Expands network mask with connect action. */
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows bind for srv0 and connect to srv0. */
+ const struct landlock_net_port_attr tcp_bind_connect_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = self->srv0.port,
+ };
+ /* Try to allow bind for srv1. */
+ const struct landlock_net_port_attr tcp_bind_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv1.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_p0, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ if (variant->num_layers >= 3) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ /* Allows connect to srv0, without bind rule. */
+ const struct landlock_net_port_attr tcp_bind_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_p0, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ test_bind_and_connect(_metadata, &self->srv0, false,
+ variant->num_layers >= 3);
+
+ test_bind_and_connect(_metadata, &self->srv1, variant->num_layers >= 1,
+ variant->num_layers >= 2);
+}
+
+/* clang-format off */
+FIXTURE(mini) {};
+/* clang-format on */
+
+FIXTURE_SETUP(mini)
+{
+ disable_caps(_metadata);
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(mini)
+{
+}
+
+/* clang-format off */
+
+#define ACCESS_LAST LANDLOCK_ACCESS_NET_CONNECT_TCP
+
+#define ACCESS_ALL ( \
+ LANDLOCK_ACCESS_NET_BIND_TCP | \
+ LANDLOCK_ACCESS_NET_CONNECT_TCP)
+
+/* clang-format on */
+
+TEST_F(mini, network_access_rights)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = ACCESS_ALL,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1; access <= ACCESS_LAST; access <<= 1) {
+ net_port.allowed_access = access;
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0))
+ {
+ TH_LOG("Failed to add rule with access 0x%llx: %s",
+ access, strerror(errno));
+ }
+ }
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+/* Checks invalid attribute, out of landlock network access range. */
+TEST_F(mini, ruleset_with_unknown_access)
+{
+ __u64 access_mask;
+
+ for (access_mask = 1ULL << 63; access_mask != ACCESS_LAST;
+ access_mask >>= 1) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = access_mask,
+ };
+
+ EXPECT_EQ(-1, landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+}
+
+TEST_F(mini, rule_with_unknown_access)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = ACCESS_ALL,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1ULL << 63; access != ACCESS_LAST; access >>= 1) {
+ net_port.allowed_access = access;
+ EXPECT_EQ(-1,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0));
+ EXPECT_EQ(EINVAL, errno);
+ }
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F(mini, rule_with_unhandled_access)
+{
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ struct landlock_net_port_attr net_port = {
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+ __u64 access;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ for (access = 1; access > 0; access <<= 1) {
+ int err;
+
+ net_port.allowed_access = access;
+ err = landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &net_port, 0);
+ if (access == ruleset_attr.handled_access_net) {
+ EXPECT_EQ(0, err);
+ } else {
+ EXPECT_EQ(-1, err);
+ EXPECT_EQ(EINVAL, errno);
+ }
+ }
+
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+TEST_F(mini, inval)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_denied = {
+ .allowed_access = 0,
+ .port = sock_port_start,
+ };
+ const struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = sock_port_start,
+ };
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks unhandled allowed_access. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Checks zero access value. */
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_denied, 0));
+ EXPECT_EQ(ENOMSG, errno);
+
+ /* Adds with legitimate values. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+}
+
+TEST_F(mini, tcp_port_overflow)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr port_max_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_max_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = UINT16_MAX,
+ };
+ const struct landlock_net_port_attr port_overflow1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 1,
+ };
+ const struct landlock_net_port_attr port_overflow2 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT16_MAX + 2,
+ };
+ const struct landlock_net_port_attr port_overflow3 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 1UL,
+ };
+ const struct landlock_net_port_attr port_overflow4 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = UINT32_MAX + 2UL,
+ };
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+ struct service_fixture srv_denied, srv_max_allowed;
+ int ruleset_fd;
+
+ ASSERT_EQ(0, set_service(&srv_denied, ipv4_tcp, 0));
+
+ /* Be careful to avoid port inconsistencies. */
+ srv_max_allowed = srv_denied;
+ srv_max_allowed.port = port_max_bind.port;
+ srv_max_allowed.ipv4_addr.sin_port = htons(port_max_bind.port);
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_bind, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow1, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow2, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow3, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Interleaves with invalid rule additions. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_max_connect, 0));
+
+ EXPECT_EQ(-1, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &port_overflow4, 0));
+ EXPECT_EQ(EINVAL, errno);
+
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ test_bind_and_connect(_metadata, &srv_denied, true, true);
+ test_bind_and_connect(_metadata, &srv_max_allowed, false, false);
+}
+
+FIXTURE(ipv4_tcp)
+{
+ struct service_fixture srv0, srv1;
+};
+
+FIXTURE_SETUP(ipv4_tcp)
+{
+ const struct protocol_variant ipv4_tcp = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ };
+
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, ipv4_tcp, 0));
+ ASSERT_EQ(0, set_service(&self->srv1, ipv4_tcp, 1));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(ipv4_tcp)
+{
+}
+
+TEST_F(ipv4_tcp, port_endianness)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ };
+ const struct landlock_net_port_attr bind_host_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ /* Host port format. */
+ .port = self->srv0.port,
+ };
+ const struct landlock_net_port_attr connect_big_endian_p0 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Big endian port format. */
+ .port = htons(self->srv0.port),
+ };
+ const struct landlock_net_port_attr bind_connect_host_endian_p1 = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ /* Host port format. */
+ .port = self->srv1.port,
+ };
+ const unsigned int one = 1;
+ const char little_endian = *(const char *)&one;
+ int ruleset_fd;
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_host_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &connect_big_endian_p0, 0));
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &bind_connect_host_endian_p1, 0));
+ enforce_ruleset(_metadata, ruleset_fd);
+
+ /* No restriction for big endinan CPU. */
+ test_bind_and_connect(_metadata, &self->srv0, false, little_endian);
+
+ /* No restriction for any CPU. */
+ test_bind_and_connect(_metadata, &self->srv1, false, false);
+}
+
+TEST_F(ipv4_tcp, with_fs)
+{
+ const struct landlock_ruleset_attr ruleset_attr_fs_net = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_READ_DIR,
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP,
+ };
+ struct landlock_path_beneath_attr path_beneath = {
+ .allowed_access = LANDLOCK_ACCESS_FS_READ_DIR,
+ .parent_fd = -1,
+ };
+ struct landlock_net_port_attr tcp_bind = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP,
+ .port = self->srv0.port,
+ };
+ int ruleset_fd, bind_fd, dir_fd;
+
+ /* Creates ruleset both for filesystem and network access. */
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr_fs_net,
+ sizeof(ruleset_attr_fs_net), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Adds a filesystem rule. */
+ path_beneath.parent_fd = open("/dev", O_PATH | O_DIRECTORY | O_CLOEXEC);
+ ASSERT_LE(0, path_beneath.parent_fd);
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_PATH_BENEATH,
+ &path_beneath, 0));
+ EXPECT_EQ(0, close(path_beneath.parent_fd));
+
+ /* Adds a network rule. */
+ ASSERT_EQ(0, landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+
+ /* Tests file access. */
+ dir_fd = open("/dev", O_RDONLY);
+ EXPECT_LE(0, dir_fd);
+ EXPECT_EQ(0, close(dir_fd));
+
+ dir_fd = open("/", O_RDONLY);
+ EXPECT_EQ(-1, dir_fd);
+ EXPECT_EQ(EACCES, errno);
+
+ /* Tests port binding. */
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(0, bind_variant(bind_fd, &self->srv0));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket(AF_INET, SOCK_STREAM | SOCK_CLOEXEC, 0);
+ ASSERT_LE(0, bind_fd);
+ EXPECT_EQ(-EACCES, bind_variant(bind_fd, &self->srv1));
+}
+
+FIXTURE(port_specific)
+{
+ struct service_fixture srv0;
+};
+
+FIXTURE_VARIANT(port_specific)
+{
+ const enum sandbox_type sandbox;
+ const struct protocol_variant prot;
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv4) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, no_sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = NO_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+/* clang-format off */
+FIXTURE_VARIANT_ADD(port_specific, sandbox_with_ipv6) {
+ /* clang-format on */
+ .sandbox = TCP_SANDBOX,
+ .prot = {
+ .domain = AF_INET6,
+ .type = SOCK_STREAM,
+ },
+};
+
+FIXTURE_SETUP(port_specific)
+{
+ disable_caps(_metadata);
+
+ ASSERT_EQ(0, set_service(&self->srv0, variant->prot, 0));
+
+ setup_loopback(_metadata);
+};
+
+FIXTURE_TEARDOWN(port_specific)
+{
+}
+
+TEST_F(port_specific, bind_connect_zero)
+{
+ int bind_fd, connect_fd, ret;
+ uint16_t port;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ const struct landlock_net_port_attr tcp_bind_connect_zero = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 0,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ /* Checks zero port value on bind and connect actions. */
+ EXPECT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_zero, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 0 for both protocol families. */
+ set_port(&self->srv0, 0);
+ /*
+ * Binds on port 0, which selects a random port within
+ * ip_local_port_range.
+ */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on port 0. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(-ECONNREFUSED, ret);
+
+ /* Sets binded port for both protocol families. */
+ port = get_binded_port(bind_fd, &variant->prot);
+ EXPECT_NE(0, port);
+ set_port(&self->srv0, port);
+ /* Connects on the binded port. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ if (is_restricted(&variant->prot, variant->sandbox)) {
+ /* Denied by Landlock. */
+ EXPECT_EQ(-EACCES, ret);
+ } else {
+ EXPECT_EQ(0, ret);
+ }
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_F(port_specific, bind_connect_1023)
+{
+ int bind_fd, connect_fd, ret;
+
+ /* Adds a rule layer with bind and connect actions. */
+ if (variant->sandbox == TCP_SANDBOX) {
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_net = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP
+ };
+ /* A rule with port value less than 1024. */
+ const struct landlock_net_port_attr tcp_bind_connect_low_range = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1023,
+ };
+ /* A rule with 1024 port. */
+ const struct landlock_net_port_attr tcp_bind_connect = {
+ .allowed_access = LANDLOCK_ACCESS_NET_BIND_TCP |
+ LANDLOCK_ACCESS_NET_CONNECT_TCP,
+ .port = 1024,
+ };
+ int ruleset_fd;
+
+ ruleset_fd = landlock_create_ruleset(&ruleset_attr,
+ sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, ruleset_fd);
+
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect_low_range, 0));
+ ASSERT_EQ(0,
+ landlock_add_rule(ruleset_fd, LANDLOCK_RULE_NET_PORT,
+ &tcp_bind_connect, 0));
+
+ enforce_ruleset(_metadata, ruleset_fd);
+ EXPECT_EQ(0, close(ruleset_fd));
+ }
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1023 for both protocol families. */
+ set_port(&self->srv0, 1023);
+ /* Binds on port 1023. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ /* Denied by the system. */
+ EXPECT_EQ(-EACCES, ret);
+
+ /* Binds on port 1023. */
+ set_cap(_metadata, CAP_NET_BIND_SERVICE);
+ ret = bind_variant(bind_fd, &self->srv0);
+ clear_cap(_metadata, CAP_NET_BIND_SERVICE);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1023. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+
+ bind_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, bind_fd);
+
+ connect_fd = socket_variant(&self->srv0);
+ ASSERT_LE(0, connect_fd);
+
+ /* Sets address port to 1024 for both protocol families. */
+ set_port(&self->srv0, 1024);
+ /* Binds on port 1024. */
+ ret = bind_variant(bind_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, listen(bind_fd, backlog));
+
+ /* Connects on the binded port 1024. */
+ ret = connect_variant(connect_fd, &self->srv0);
+ EXPECT_EQ(0, ret);
+
+ EXPECT_EQ(0, close(connect_fd));
+ EXPECT_EQ(0, close(bind_fd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/ptrace_test.c b/tools/testing/selftests/landlock/ptrace_test.c
new file mode 100644
index 000000000000..a19db4d0b3bd
--- /dev/null
+++ b/tools/testing/selftests/landlock/ptrace_test.c
@@ -0,0 +1,439 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Landlock tests - Ptrace
+ *
+ * Copyright © 2017-2020 Mickaël Salaün <mic@digikod.net>
+ * Copyright © 2019-2020 ANSSI
+ */
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/landlock.h>
+#include <signal.h>
+#include <sys/prctl.h>
+#include <sys/ptrace.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "common.h"
+
+/* Copied from security/yama/yama_lsm.c */
+#define YAMA_SCOPE_DISABLED 0
+#define YAMA_SCOPE_RELATIONAL 1
+#define YAMA_SCOPE_CAPABILITY 2
+#define YAMA_SCOPE_NO_ATTACH 3
+
+static void create_domain(struct __test_metadata *const _metadata)
+{
+ int ruleset_fd;
+ struct landlock_ruleset_attr ruleset_attr = {
+ .handled_access_fs = LANDLOCK_ACCESS_FS_MAKE_BLOCK,
+ };
+
+ ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ EXPECT_LE(0, ruleset_fd)
+ {
+ TH_LOG("Failed to create a ruleset: %s", strerror(errno));
+ }
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ EXPECT_EQ(0, landlock_restrict_self(ruleset_fd, 0));
+ EXPECT_EQ(0, close(ruleset_fd));
+}
+
+static int test_ptrace_read(const pid_t pid)
+{
+ static const char path_template[] = "/proc/%d/environ";
+ char procenv_path[sizeof(path_template) + 10];
+ int procenv_path_size, fd;
+
+ procenv_path_size = snprintf(procenv_path, sizeof(procenv_path),
+ path_template, pid);
+ if (procenv_path_size >= sizeof(procenv_path))
+ return E2BIG;
+
+ fd = open(procenv_path, O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return errno;
+ /*
+ * Mixing error codes from close(2) and open(2) should not lead to any
+ * (access type) confusion for this test.
+ */
+ if (close(fd) != 0)
+ return errno;
+ return 0;
+}
+
+static int get_yama_ptrace_scope(void)
+{
+ int ret;
+ char buf[2] = {};
+ const int fd = open("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
+
+ if (fd < 0)
+ return 0;
+
+ if (read(fd, buf, 1) < 0) {
+ close(fd);
+ return -1;
+ }
+
+ ret = atoi(buf);
+ close(fd);
+ return ret;
+}
+
+/* clang-format off */
+FIXTURE(hierarchy) {};
+/* clang-format on */
+
+FIXTURE_VARIANT(hierarchy)
+{
+ const bool domain_both;
+ const bool domain_parent;
+ const bool domain_child;
+};
+
+/*
+ * Test multiple tracing combinations between a parent process P1 and a child
+ * process P2.
+ *
+ * Yama's scoped ptrace is presumed disabled. If enabled, this optional
+ * restriction is enforced in addition to any Landlock check, which means that
+ * all P2 requests to trace P1 would be denied.
+ */
+
+/*
+ * No domain
+ *
+ * P1-. P1 -> P2 : allow
+ * \ P2 -> P1 : allow
+ * 'P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_without_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Child domain
+ *
+ * P1--. P1 -> P2 : allow
+ * \ P2 -> P1 : deny
+ * .'-----.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_with_one_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Parent domain
+ * .------.
+ * | P1 --. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : allow
+ * '
+ * P2
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_parent_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Parent + child domain (siblings)
+ * .------.
+ * | P1 ---. P1 -> P2 : deny
+ * '------' \ P2 -> P1 : deny
+ * .---'--.
+ * | P2 |
+ * '------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_sibling_domain) {
+ /* clang-format on */
+ .domain_both = false,
+ .domain_parent = true,
+ .domain_child = true,
+};
+
+/*
+ * Same domain (inherited)
+ * .-------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : allow
+ * | ' |
+ * | P2 |
+ * '-------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_sibling_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + child domain
+ * .-----------------.
+ * | P1----. | P1 -> P2 : allow
+ * | \ | P2 -> P1 : deny
+ * | .-'----. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, allow_with_nested_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = false,
+ .domain_child = true,
+};
+
+/*
+ * Inherited + parent domain
+ * .-----------------.
+ * |.------. | P1 -> P2 : deny
+ * || P1 ----. | P2 -> P1 : allow
+ * |'------' \ |
+ * | ' |
+ * | P2 |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_nested_and_parent_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = false,
+};
+
+/*
+ * Inherited + parent and child domain (siblings)
+ * .-----------------.
+ * | .------. | P1 -> P2 : deny
+ * | | P1 . | P2 -> P1 : deny
+ * | '------'\ |
+ * | \ |
+ * | .--'---. |
+ * | | P2 | |
+ * | '------' |
+ * '-----------------'
+ */
+/* clang-format off */
+FIXTURE_VARIANT_ADD(hierarchy, deny_with_forked_domain) {
+ /* clang-format on */
+ .domain_both = true,
+ .domain_parent = true,
+ .domain_child = true,
+};
+
+FIXTURE_SETUP(hierarchy)
+{
+}
+
+FIXTURE_TEARDOWN(hierarchy)
+{
+}
+
+/* Test PTRACE_TRACEME and PTRACE_ATTACH for parent and child. */
+TEST_F(hierarchy, trace)
+{
+ pid_t child, parent;
+ int status, err_proc_read;
+ int pipe_child[2], pipe_parent[2];
+ int yama_ptrace_scope;
+ char buf_parent;
+ long ret;
+ bool can_read_child, can_trace_child, can_read_parent, can_trace_parent;
+
+ yama_ptrace_scope = get_yama_ptrace_scope();
+ ASSERT_LE(0, yama_ptrace_scope);
+
+ if (yama_ptrace_scope > YAMA_SCOPE_DISABLED)
+ TH_LOG("Incomplete tests due to Yama restrictions (scope %d)",
+ yama_ptrace_scope);
+
+ /*
+ * can_read_child is true if a parent process can read its child
+ * process, which is only the case when the parent process is not
+ * isolated from the child with a dedicated Landlock domain.
+ */
+ can_read_child = !variant->domain_parent;
+
+ /*
+ * can_trace_child is true if a parent process can trace its child
+ * process. This depends on two conditions:
+ * - The parent process is not isolated from the child with a dedicated
+ * Landlock domain.
+ * - Yama allows tracing children (up to YAMA_SCOPE_RELATIONAL).
+ */
+ can_trace_child = can_read_child &&
+ yama_ptrace_scope <= YAMA_SCOPE_RELATIONAL;
+
+ /*
+ * can_read_parent is true if a child process can read its parent
+ * process, which is only the case when the child process is not
+ * isolated from the parent with a dedicated Landlock domain.
+ */
+ can_read_parent = !variant->domain_child;
+
+ /*
+ * can_trace_parent is true if a child process can trace its parent
+ * process. This depends on two conditions:
+ * - The child process is not isolated from the parent with a dedicated
+ * Landlock domain.
+ * - Yama is disabled (YAMA_SCOPE_DISABLED).
+ */
+ can_trace_parent = can_read_parent &&
+ yama_ptrace_scope <= YAMA_SCOPE_DISABLED;
+
+ /*
+ * Removes all effective and permitted capabilities to not interfere
+ * with cap_ptrace_access_check() in case of PTRACE_MODE_FSCREDS.
+ */
+ drop_caps(_metadata);
+
+ parent = getpid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ if (variant->domain_both) {
+ create_domain(_metadata);
+ if (!__test_passed(_metadata))
+ /* Aborts before forking. */
+ return;
+ }
+
+ child = fork();
+ ASSERT_LE(0, child);
+ if (child == 0) {
+ char buf_child;
+
+ ASSERT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, close(pipe_child[0]));
+ if (variant->domain_child)
+ create_domain(_metadata);
+
+ /* Waits for the parent to be in a domain, if any. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+
+ /* Tests PTRACE_MODE_READ on the parent. */
+ err_proc_read = test_ptrace_read(parent);
+ if (can_read_parent) {
+ EXPECT_EQ(0, err_proc_read);
+ } else {
+ EXPECT_EQ(EACCES, err_proc_read);
+ }
+
+ /* Tests PTRACE_ATTACH on the parent. */
+ ret = ptrace(PTRACE_ATTACH, parent, NULL, 0);
+ if (can_trace_parent) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+ if (ret == 0) {
+ ASSERT_EQ(parent, waitpid(parent, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, parent, NULL, 0));
+ }
+
+ /* Tests child PTRACE_TRACEME. */
+ ret = ptrace(PTRACE_TRACEME);
+ if (can_trace_child) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+
+ /*
+ * Signals that the PTRACE_ATTACH test is done and the
+ * PTRACE_TRACEME test is ongoing.
+ */
+ ASSERT_EQ(1, write(pipe_child[1], ".", 1));
+
+ if (can_trace_child) {
+ ASSERT_EQ(0, raise(SIGSTOP));
+ }
+
+ /* Waits for the parent PTRACE_ATTACH test. */
+ ASSERT_EQ(1, read(pipe_parent[0], &buf_child, 1));
+ _exit(_metadata->exit_code);
+ return;
+ }
+
+ ASSERT_EQ(0, close(pipe_child[1]));
+ ASSERT_EQ(0, close(pipe_parent[0]));
+ if (variant->domain_parent)
+ create_domain(_metadata);
+
+ /* Signals that the parent is in a domain, if any. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+
+ /*
+ * Waits for the child to test PTRACE_ATTACH on the parent and start
+ * testing PTRACE_TRACEME.
+ */
+ ASSERT_EQ(1, read(pipe_child[0], &buf_parent, 1));
+
+ /* Tests child PTRACE_TRACEME. */
+ if (can_trace_child) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+ } else {
+ /* The child should not be traced by the parent. */
+ EXPECT_EQ(-1, ptrace(PTRACE_DETACH, child, NULL, 0));
+ EXPECT_EQ(ESRCH, errno);
+ }
+
+ /* Tests PTRACE_MODE_READ on the child. */
+ err_proc_read = test_ptrace_read(child);
+ if (can_read_child) {
+ EXPECT_EQ(0, err_proc_read);
+ } else {
+ EXPECT_EQ(EACCES, err_proc_read);
+ }
+
+ /* Tests PTRACE_ATTACH on the child. */
+ ret = ptrace(PTRACE_ATTACH, child, NULL, 0);
+ if (can_trace_child) {
+ EXPECT_EQ(0, ret);
+ } else {
+ EXPECT_EQ(-1, ret);
+ EXPECT_EQ(EPERM, errno);
+ }
+
+ if (ret == 0) {
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+ ASSERT_EQ(1, WIFSTOPPED(status));
+ ASSERT_EQ(0, ptrace(PTRACE_DETACH, child, NULL, 0));
+ }
+
+ /* Signals that the parent PTRACE_ATTACH test is done. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ ASSERT_EQ(child, waitpid(child, &status, 0));
+
+ if (WIFSIGNALED(status) || !WIFEXITED(status) ||
+ WEXITSTATUS(status) != EXIT_SUCCESS)
+ _metadata->exit_code = KSFT_FAIL;
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/landlock/true.c b/tools/testing/selftests/landlock/true.c
new file mode 100644
index 000000000000..3f9ccbf52783
--- /dev/null
+++ b/tools/testing/selftests/landlock/true.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+int main(void)
+{
+ return 0;
+}
diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk
index b0556c752443..da2cade3bab0 100644
--- a/tools/testing/selftests/lib.mk
+++ b/tools/testing/selftests/lib.mk
@@ -1,6 +1,39 @@
# This mimics the top-level Makefile. We do it explicitly here so that this
# Makefile can operate with or without the kbuild infrastructure.
+ifneq ($(LLVM),)
+ifneq ($(filter %/,$(LLVM)),)
+LLVM_PREFIX := $(LLVM)
+else ifneq ($(filter -%,$(LLVM)),)
+LLVM_SUFFIX := $(LLVM)
+endif
+
+CLANG_TARGET_FLAGS_arm := arm-linux-gnueabi
+CLANG_TARGET_FLAGS_arm64 := aarch64-linux-gnu
+CLANG_TARGET_FLAGS_hexagon := hexagon-linux-musl
+CLANG_TARGET_FLAGS_i386 := i386-linux-gnu
+CLANG_TARGET_FLAGS_m68k := m68k-linux-gnu
+CLANG_TARGET_FLAGS_mips := mipsel-linux-gnu
+CLANG_TARGET_FLAGS_powerpc := powerpc64le-linux-gnu
+CLANG_TARGET_FLAGS_riscv := riscv64-linux-gnu
+CLANG_TARGET_FLAGS_s390 := s390x-linux-gnu
+CLANG_TARGET_FLAGS_x86 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS_x86_64 := x86_64-linux-gnu
+CLANG_TARGET_FLAGS := $(CLANG_TARGET_FLAGS_$(ARCH))
+
+ifeq ($(CROSS_COMPILE),)
+ifeq ($(CLANG_TARGET_FLAGS),)
+$(error Specify CROSS_COMPILE or add '--target=' option to lib.mk)
+else
+CLANG_FLAGS += --target=$(CLANG_TARGET_FLAGS)
+endif # CLANG_TARGET_FLAGS
+else
+CLANG_FLAGS += --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif # CROSS_COMPILE
+
+CC := $(LLVM_PREFIX)clang$(LLVM_SUFFIX) $(CLANG_FLAGS) -fintegrated-as
+else
CC := $(CROSS_COMPILE)gcc
+endif # LLVM
ifeq (0,$(MAKELEVEL))
ifeq ($(OUTPUT),)
@@ -9,6 +42,11 @@ ifeq (0,$(MAKELEVEL))
endif
endif
selfdir = $(realpath $(dir $(filter %/lib.mk,$(MAKEFILE_LIST))))
+top_srcdir = $(selfdir)/../../..
+
+ifeq ($(KHDR_INCLUDES),)
+KHDR_INCLUDES := -isystem $(top_srcdir)/usr/include
+endif
# The following are built by lib.mk common compile rules.
# TEST_CUSTOM_PROGS should be used by tests that require
@@ -20,48 +58,11 @@ TEST_GEN_PROGS := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS))
TEST_GEN_PROGS_EXTENDED := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_PROGS_EXTENDED))
TEST_GEN_FILES := $(patsubst %,$(OUTPUT)/%,$(TEST_GEN_FILES))
-ifdef KSFT_KHDR_INSTALL
-top_srcdir ?= ../../../..
-include $(top_srcdir)/scripts/subarch.include
-ARCH ?= $(SUBARCH)
-
-# set default goal to all, so make without a target runs all, even when
-# all isn't the first target in the file.
-.DEFAULT_GOAL := all
-
-# Invoke headers install with --no-builtin-rules to avoid circular
-# dependency in "make kselftest" case. In this case, second level
-# make inherits builtin-rules which will use the rule generate
-# Makefile.o and runs into
-# "Circular Makefile.o <- prepare dependency dropped."
-# and headers_install fails and test compile fails.
-# O= KBUILD_OUTPUT cases don't run into this error, since main Makefile
-# invokes them as sub-makes and --no-builtin-rules is not necessary,
-# but doesn't cause any failures. Keep it simple and use the same
-# flags in both cases.
-# Note that the support to install headers from lib.mk is necessary
-# when test Makefile is run directly with "make -C".
-# When local build is done, headers are installed in the default
-# INSTALL_HDR_PATH usr/include.
-.PHONY: khdr
-khdr:
-ifndef KSFT_KHDR_INSTALL_DONE
-ifeq (1,$(DEFAULT_INSTALL_HDR_PATH))
- make --no-builtin-rules ARCH=$(ARCH) -C $(top_srcdir) headers_install
-else
- make --no-builtin-rules INSTALL_HDR_PATH=$$OUTPUT/usr \
- ARCH=$(ARCH) -C $(top_srcdir) headers_install
-endif
-endif
+all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) \
+ $(if $(TEST_GEN_MODS_DIR),gen_mods_dir)
-all: khdr $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-else
-all: $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES)
-endif
-
-.ONESHELL:
define RUN_TESTS
- @BASE_DIR="$(selfdir)"; \
+ BASE_DIR="$(selfdir)"; \
. $(selfdir)/kselftest/runner.sh; \
if [ "X$(summary)" != "X" ]; then \
per_test_logging=1; \
@@ -69,24 +70,53 @@ define RUN_TESTS
run_many $(1)
endef
+define INSTALL_INCLUDES
+ $(if $(TEST_INCLUDES), \
+ relative_files=""; \
+ for entry in $(TEST_INCLUDES); do \
+ entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \
+ entry_name=$$(basename "$$entry"); \
+ relative_dir=$${entry_dir#"$$SRC_PATH"/}; \
+ if [ "$$relative_dir" = "$$entry_dir" ]; then \
+ echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \
+ exit 1; \
+ fi; \
+ relative_files="$$relative_files $$relative_dir/$$entry_name"; \
+ done; \
+ cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \
+ )
+endef
+
run_tests: all
ifdef building_out_of_srctree
- @if [ "X$(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES)" != "X" ]; then
- @rsync -aq $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT)
+ @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)$(TEST_GEN_MODS_DIR)" != "X" ]; then \
+ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(TEST_GEN_MODS_DIR) $(OUTPUT); \
fi
- @if [ "X$(TEST_PROGS)" != "X" ]; then
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(OUTPUT)/$(TEST_PROGS))
- else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS))
+ @$(INSTALL_INCLUDES)
+ @if [ "X$(TEST_PROGS)" != "X" ]; then \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \
+ $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \
+ else \
+ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS)); \
fi
else
- $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
+ @$(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS))
endif
+gen_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR)
+
+clean_mods_dir:
+ $(Q)$(MAKE) -C $(TEST_GEN_MODS_DIR) clean
+
define INSTALL_SINGLE_RULE
$(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH))
- $(if $(INSTALL_LIST),@echo rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
- $(if $(INSTALL_LIST),@rsync -a $(INSTALL_LIST) $(INSTALL_PATH)/)
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST) $(INSTALL_PATH)/)
+endef
+
+define INSTALL_MODS_RULE
+ $(if $(INSTALL_LIST),@mkdir -p $(INSTALL_PATH)/$(INSTALL_LIST))
+ $(if $(INSTALL_LIST),rsync -a --copy-unsafe-links $(INSTALL_LIST)/*.ko $(INSTALL_PATH)/$(INSTALL_LIST))
endef
define INSTALL_RULE
@@ -97,11 +127,14 @@ define INSTALL_RULE
$(eval INSTALL_LIST = $(TEST_CUSTOM_PROGS)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_PROGS_EXTENDED)) $(INSTALL_SINGLE_RULE)
$(eval INSTALL_LIST = $(TEST_GEN_FILES)) $(INSTALL_SINGLE_RULE)
+ $(eval INSTALL_LIST = $(notdir $(TEST_GEN_MODS_DIR))) $(INSTALL_MODS_RULE)
+ $(eval INSTALL_LIST = $(wildcard config settings)) $(INSTALL_SINGLE_RULE)
endef
install: all
ifdef INSTALL_PATH
$(INSTALL_RULE)
+ $(INSTALL_INCLUDES)
else
$(error Error: set INSTALL_PATH to use install)
endif
@@ -109,9 +142,8 @@ endif
emit_tests:
for TEST in $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) $(TEST_PROGS); do \
BASENAME_TEST=`basename $$TEST`; \
- echo " \\"; \
- echo -n " \"$$BASENAME_TEST\""; \
- done; \
+ echo "$(COLLECTION):$$BASENAME_TEST"; \
+ done
# define if isn't already. It is undefined in make O= case.
ifeq ($(RM),)
@@ -122,9 +154,14 @@ define CLEAN
$(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN)
endef
-clean:
+clean: $(if $(TEST_GEN_MODS_DIR),clean_mods_dir)
$(CLEAN)
+# Enables to extend CFLAGS and LDFLAGS from command line, e.g.
+# make USERCFLAGS=-Werror USERLDFLAGS=-static
+CFLAGS += $(USERCFLAGS)
+LDFLAGS += $(USERLDFLAGS)
+
# When make O= with kselftest target from main level
# the following aren't defined.
#
@@ -137,9 +174,9 @@ endif
# Selftest makefiles can override those targets by setting
# OVERRIDE_TARGETS = 1.
ifeq ($(OVERRIDE_TARGETS),)
-LOCAL_HDRS := $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
+LOCAL_HDRS += $(selfdir)/kselftest_harness.h $(selfdir)/kselftest.h
$(OUTPUT)/%:%.c $(LOCAL_HDRS)
- $(LINK.c) $^ $(LDLIBS) -o $@
+ $(LINK.c) $(filter-out $(LOCAL_HDRS),$^) $(LDLIBS) -o $@
$(OUTPUT)/%.o:%.S
$(COMPILE.S) $^ -o $@
@@ -148,4 +185,4 @@ $(OUTPUT)/%:%.S
$(LINK.S) $^ $(LDLIBS) -o $@
endif
-.PHONY: run_tests all clean install emit_tests
+.PHONY: run_tests all clean install emit_tests gen_mods_dir clean_mods_dir
diff --git a/tools/testing/selftests/lib/Makefile b/tools/testing/selftests/lib/Makefile
index a105f094676e..ee71fc99d5b5 100644
--- a/tools/testing/selftests/lib/Makefile
+++ b/tools/testing/selftests/lib/Makefile
@@ -4,6 +4,6 @@
# No binaries, but make sure arg-less "make" doesn't trigger "run_tests"
all:
-TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh strscpy.sh
+TEST_PROGS := printf.sh bitmap.sh prime_numbers.sh scanf.sh strscpy.sh
include ../lib.mk
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index b80ee3f6e265..645839b50b0a 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,4 +1,5 @@
CONFIG_TEST_PRINTF=m
+CONFIG_TEST_SCANF=m
CONFIG_TEST_BITMAP=m
CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_STRSCPY=m
diff --git a/tools/testing/selftests/lib/scanf.sh b/tools/testing/selftests/lib/scanf.sh
new file mode 100755
index 000000000000..b59b8ba561c3
--- /dev/null
+++ b/tools/testing/selftests/lib/scanf.sh
@@ -0,0 +1,4 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# Tests the scanf infrastructure using test_scanf kernel module.
+$(dirname $0)/../kselftest/module.sh "scanf" test_scanf
diff --git a/tools/testing/selftests/livepatch/.gitignore b/tools/testing/selftests/livepatch/.gitignore
new file mode 100644
index 000000000000..f1e9c2a20e99
--- /dev/null
+++ b/tools/testing/selftests/livepatch/.gitignore
@@ -0,0 +1 @@
+test_klp-call_getpid
diff --git a/tools/testing/selftests/livepatch/Makefile b/tools/testing/selftests/livepatch/Makefile
index 1acc9e1fa3fb..35418a4790be 100644
--- a/tools/testing/selftests/livepatch/Makefile
+++ b/tools/testing/selftests/livepatch/Makefile
@@ -1,12 +1,16 @@
# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_FILES := test_klp-call_getpid
+TEST_GEN_MODS_DIR := test_modules
TEST_PROGS_EXTENDED := functions.sh
TEST_PROGS := \
test-livepatch.sh \
test-callbacks.sh \
test-shadow-vars.sh \
test-state.sh \
- test-ftrace.sh
+ test-ftrace.sh \
+ test-sysfs.sh \
+ test-syscall.sh
TEST_FILES := settings
diff --git a/tools/testing/selftests/livepatch/README b/tools/testing/selftests/livepatch/README
index 621d325425c2..d2035dd64a2b 100644
--- a/tools/testing/selftests/livepatch/README
+++ b/tools/testing/selftests/livepatch/README
@@ -6,38 +6,51 @@ This is a small set of sanity tests for the kernel livepatching.
The test suite loads and unloads several test kernel modules to verify
livepatch behavior. Debug information is logged to the kernel's message
-buffer and parsed for expected messages. (Note: the tests will clear
-the message buffer between individual tests.)
+buffer and parsed for expected messages. (Note: the tests will compare
+the message buffer for only the duration of each individual test.)
Config
------
-Set these config options and their prerequisites:
+Set CONFIG_LIVEPATCH=y option and it's prerequisites.
-CONFIG_LIVEPATCH=y
-CONFIG_TEST_LIVEPATCH=m
+Building the tests
+------------------
+
+To only build the tests without running them, run:
+
+ % make -C tools/testing/selftests/livepatch
+
+The command above will compile all test modules and test programs, making them
+ready to be packaged if so desired.
Running the tests
-----------------
-Test kernel modules are built as part of lib/ (make modules) and need to
-be installed (make modules_install) as the test scripts will modprobe
-them.
+Test kernel modules are built before running the livepatch selftests. The
+modules are located under test_modules directory, and are built as out-of-tree
+modules. This is specially useful since the same sources can be built and
+tested on systems with different kABI, ensuring they the tests are backwards
+compatible. The modules will be loaded by the test scripts using insmod.
To run the livepatch selftests, from the top of the kernel source tree:
% make -C tools/testing/selftests TARGETS=livepatch run_tests
+or
+
+ % make kselftest TARGETS=livepatch
+
Adding tests
------------
See the common functions.sh file for the existing collection of utility
-functions, most importantly setup_config() and check_result(). The
-latter function greps the kernel's ring buffer for "livepatch:" and
-"test_klp" strings, so tests be sure to include one of those strings for
-result comparison. Other utility functions include general module
-loading and livepatch loading helpers (waiting for patch transitions,
-sysfs entries, etc.)
+functions, most importantly setup_config(), start_test() and
+check_result(). The latter function greps the kernel's ring buffer for
+"livepatch:" and "test_klp" strings, so tests be sure to include one of
+those strings for result comparison. Other utility functions include
+general module loading and livepatch loading helpers (waiting for patch
+transitions, sysfs entries, etc.)
diff --git a/tools/testing/selftests/livepatch/config b/tools/testing/selftests/livepatch/config
index ad23100cb27c..e88bf518a23a 100644
--- a/tools/testing/selftests/livepatch/config
+++ b/tools/testing/selftests/livepatch/config
@@ -1,3 +1,2 @@
CONFIG_LIVEPATCH=y
CONFIG_DYNAMIC_DEBUG=y
-CONFIG_TEST_LIVEPATCH=m
diff --git a/tools/testing/selftests/livepatch/functions.sh b/tools/testing/selftests/livepatch/functions.sh
index 2aab9791791d..fc4c6a016d38 100644
--- a/tools/testing/selftests/livepatch/functions.sh
+++ b/tools/testing/selftests/livepatch/functions.sh
@@ -6,6 +6,7 @@
MAX_RETRIES=600
RETRY_INTERVAL=".1" # seconds
+KLP_SYSFS_DIR="/sys/kernel/livepatch"
# Kselftest framework requirement - SKIP code is 4
ksft_skip=4
@@ -33,6 +34,18 @@ function is_root() {
fi
}
+# Check if we can compile the modules before loading them
+function has_kdir() {
+ if [ -z "$KDIR" ]; then
+ KDIR="/lib/modules/$(uname -r)/build"
+ fi
+
+ if [ ! -d "$KDIR" ]; then
+ echo "skip all tests: KDIR ($KDIR) not available to compile modules."
+ exit $ksft_skip
+ fi
+}
+
# die(msg) - game over, man
# msg - dying words
function die() {
@@ -64,8 +77,29 @@ function set_dynamic_debug() {
}
function set_ftrace_enabled() {
- result=$(sysctl kernel.ftrace_enabled="$1" 2>&1 | paste --serial --delimiters=' ')
- echo "livepatch: $result" > /dev/kmsg
+ local can_fail=0
+ if [[ "$1" == "--fail" ]] ; then
+ can_fail=1
+ shift
+ fi
+
+ local err=$(sysctl -q kernel.ftrace_enabled="$1" 2>&1)
+ local result=$(sysctl --values kernel.ftrace_enabled)
+
+ if [[ "$result" != "$1" ]] ; then
+ if [[ $can_fail -eq 1 ]] ; then
+ echo "livepatch: $err" | sed 's#/proc/sys/kernel/#kernel.#' > /dev/kmsg
+ return
+ fi
+
+ skip "failed to set kernel.ftrace_enabled = $1"
+ fi
+
+ echo "livepatch: kernel.ftrace_enabled = $result" > /dev/kmsg
+}
+
+function cleanup() {
+ pop_config
}
# setup_config - save the current config and set a script exit trap that
@@ -74,10 +108,11 @@ function set_ftrace_enabled() {
# the ftrace_enabled sysctl.
function setup_config() {
is_root
+ has_kdir
push_config
set_dynamic_debug
set_ftrace_enabled 1
- trap pop_config EXIT INT TERM HUP
+ trap cleanup EXIT INT TERM HUP
}
# loop_until(cmd) - loop a command until it is successful or $MAX_RETRIES,
@@ -93,16 +128,14 @@ function loop_until() {
done
}
-function assert_mod() {
- local mod="$1"
-
- modprobe --dry-run "$mod" &>/dev/null
-}
-
function is_livepatch_mod() {
local mod="$1"
- if [[ $(modinfo "$mod" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
+ if [[ ! -f "test_modules/$mod.ko" ]]; then
+ die "Can't find \"test_modules/$mod.ko\", try \"make\""
+ fi
+
+ if [[ $(modinfo "test_modules/$mod.ko" | awk '/^livepatch:/{print $NF}') == "Y" ]]; then
return 0
fi
@@ -112,9 +145,9 @@ function is_livepatch_mod() {
function __load_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" != "" ]]; then
die "$ret"
fi
@@ -127,13 +160,10 @@ function __load_mod() {
# load_mod(modname, params) - load a kernel module
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_mod() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" &&
die "use load_lp() to load the livepatch module $mod"
@@ -143,13 +173,10 @@ function load_mod() {
# load_lp_nowait(modname, params) - load a kernel module with a livepatch
# but do not wait on until the transition finishes
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp_nowait() {
local mod="$1"; shift
- assert_mod "$mod" ||
- skip "unable to load module ${mod}, verify CONFIG_TEST_LIVEPATCH=m and run self-tests as root"
-
is_livepatch_mod "$mod" ||
die "module $mod is not a livepatch"
@@ -162,7 +189,7 @@ function load_lp_nowait() {
# load_lp(modname, params) - load a kernel module with a livepatch
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_lp() {
local mod="$1"; shift
@@ -175,13 +202,13 @@ function load_lp() {
# load_failing_mod(modname, params) - load a kernel module, expect to fail
# modname - module name to load
-# params - module parameters to pass to modprobe
+# params - module parameters to pass to insmod
function load_failing_mod() {
local mod="$1"; shift
- local msg="% modprobe $mod $*"
+ local msg="% insmod test_modules/$mod.ko $*"
log "${msg%% }"
- ret=$(modprobe "$mod" "$@" 2>&1)
+ ret=$(insmod "test_modules/$mod.ko" "$@" 2>&1)
if [[ "$ret" == "" ]]; then
die "$mod unexpectedly loaded"
fi
@@ -243,18 +270,77 @@ function set_pre_patch_ret {
die "failed to set pre_patch_ret parameter for $mod module"
}
+function start_test {
+ local test="$1"
+
+ # Dump something unique into the dmesg log, then stash the entry
+ # in LAST_DMESG. The check_result() function will use it to
+ # find new kernel messages since the test started.
+ local last_dmesg_msg="livepatch kselftest timestamp: $(date --rfc-3339=ns)"
+ log "$last_dmesg_msg"
+ loop_until 'dmesg | grep -q "$last_dmesg_msg"' ||
+ die "buffer busy? can't find canary dmesg message: $last_dmesg_msg"
+ LAST_DMESG=$(dmesg | grep "$last_dmesg_msg")
+
+ echo -n "TEST: $test ... "
+ log "===== TEST: $test ====="
+}
+
# check_result() - verify dmesg output
# TODO - better filter, out of order msgs, etc?
function check_result {
local expect="$*"
local result
- result=$(dmesg | grep -v 'tainting' | grep -e 'livepatch:' -e 'test_klp' | sed 's/^\[[ 0-9.]*\] //')
+ # Test results include any new dmesg entry since LAST_DMESG, then:
+ # - include lines matching keywords
+ # - exclude lines matching keywords
+ # - filter out dmesg timestamp prefixes
+ result=$(dmesg | awk -v last_dmesg="$LAST_DMESG" 'p; $0 == last_dmesg { p=1 }' | \
+ grep -e 'livepatch:' -e 'test_klp' | \
+ grep -v '\(tainting\|taints\) kernel' | \
+ sed 's/^\[[ 0-9.]*\] //')
if [[ "$expect" == "$result" ]] ; then
echo "ok"
+ elif [[ "$result" == "" ]] ; then
+ echo -e "not ok\n\nbuffer overrun? can't find canary dmesg entry: $LAST_DMESG\n"
+ die "livepatch kselftest(s) failed"
else
echo -e "not ok\n\n$(diff -upr --label expected --label result <(echo "$expect") <(echo "$result"))\n"
die "livepatch kselftest(s) failed"
fi
}
+
+# check_sysfs_rights(modname, rel_path, expected_rights) - check sysfs
+# path permissions
+# modname - livepatch module creating the sysfs interface
+# rel_path - relative path of the sysfs interface
+# expected_rights - expected access rights
+function check_sysfs_rights() {
+ local mod="$1"; shift
+ local rel_path="$1"; shift
+ local expected_rights="$1"; shift
+
+ local path="$KLP_SYSFS_DIR/$mod/$rel_path"
+ local rights=$(/bin/stat --format '%A' "$path")
+ if test "$rights" != "$expected_rights" ; then
+ die "Unexpected access rights of $path: $expected_rights vs. $rights"
+ fi
+}
+
+# check_sysfs_value(modname, rel_path, expected_value) - check sysfs value
+# modname - livepatch module creating the sysfs interface
+# rel_path - relative path of the sysfs interface
+# expected_value - expected value read from the file
+function check_sysfs_value() {
+ local mod="$1"; shift
+ local rel_path="$1"; shift
+ local expected_value="$1"; shift
+
+ local path="$KLP_SYSFS_DIR/$mod/$rel_path"
+ local value=`cat $path`
+ if test "$value" != "$expected_value" ; then
+ die "Unexpected value in $path: $expected_value vs. $value"
+ fi
+}
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index a35289b13c9c..32b150e25b10 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -12,8 +12,6 @@ MOD_TARGET_BUSY=test_klp_callbacks_busy
setup_config
-# TEST: target module before livepatch
-#
# Test a combination of loading a kernel module and a livepatch that
# patches a function in the first module. Load the target module
# before the livepatch module. Unload them in the same order.
@@ -28,8 +26,7 @@ setup_config
# unpatching transition starts. klp_objects are reverted, post-patch
# callbacks execute and the transition completes.
-echo -n "TEST: target module before livepatch ... "
-dmesg -C
+start_test "target module before livepatch"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -37,9 +34,9 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -63,8 +60,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming notifier
-#
# This test is similar to the previous test, but (un)load the livepatch
# module before the target kernel module. This tests the livepatch
# core's module_coming handler.
@@ -78,8 +73,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - On livepatch disable, all currently loaded klp_objects' (vmlinux and
# $MOD_TARGET) pre/post-unpatch callbacks are executed.
-echo -n "TEST: module_coming notifier ... "
-dmesg -C
+start_test "module_coming notifier"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -87,7 +81,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -95,7 +89,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -114,8 +108,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_going notifier
-#
# Test loading the livepatch after a targeted kernel module, then unload
# the kernel module before disabling the livepatch. This tests the
# livepatch core's module_going handler.
@@ -129,8 +121,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
# - When the livepatch is disabled, pre and post-unpatch callbacks are
# run for the remaining klp_object, vmlinux.
-echo -n "TEST: module_going notifier ... "
-dmesg -C
+start_test "module_going notifier"
load_mod $MOD_TARGET
load_lp $MOD_LIVEPATCH
@@ -138,9 +129,9 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -165,8 +156,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: module_coming and module_going notifiers
-#
# This test is similar to the previous test, however the livepatch is
# loaded first. This tests the livepatch core's module_coming and
# module_going handlers.
@@ -180,8 +169,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# from the $MOD_TARGET klp_object. As such, only pre and
# post-unpatch callbacks are executed when this occurs.
-echo -n "TEST: module_coming and module_going notifiers ... "
-dmesg -C
+start_test "module_coming and module_going notifiers"
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
@@ -189,7 +177,7 @@ unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -197,7 +185,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -217,8 +205,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: target module not present
-#
# A simple test of loading a livepatch without one of its patch target
# klp_objects ever loaded ($MOD_TARGET).
#
@@ -227,14 +213,13 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - As expected, only pre/post-(un)patch handlers are executed for
# vmlinux.
-echo -n "TEST: target module not present ... "
-dmesg -C
+start_test "target module not present"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -252,8 +237,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: pre-patch callback -ENODEV
-#
# Test a scenario where a vmlinux pre-patch callback returns a non-zero
# status (ie, failure).
#
@@ -265,16 +248,15 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# The result is that the insmod command refuses to load the livepatch
# module.
-echo -n "TEST: pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "pre-patch callback -ENODEV"
load_mod $MOD_TARGET
load_failing_mod $MOD_LIVEPATCH pre_patch_ret=-19
unload_mod $MOD_TARGET
-check_result "% modprobe $MOD_TARGET
+check_result "% insmod test_modules/$MOD_TARGET.ko
$MOD_TARGET: ${MOD_TARGET}_init
-% modprobe $MOD_LIVEPATCH pre_patch_ret=-19
+% insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
test_klp_callbacks_demo: pre_patch_callback: vmlinux
@@ -283,13 +265,11 @@ livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': No such device
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: No such device
% rmmod $MOD_TARGET
$MOD_TARGET: ${MOD_TARGET}_exit"
-# TEST: module_coming + pre-patch callback -ENODEV
-#
# Similar to the previous test, setup a livepatch such that its vmlinux
# pre-patch callback returns success. However, when a targeted kernel
# module is later loaded, have the livepatch return a failing status
@@ -307,8 +287,7 @@ $MOD_TARGET: ${MOD_TARGET}_exit"
#
# - Pre/post-unpatch callbacks are run for the vmlinux klp_object.
-echo -n "TEST: module_coming + pre-patch callback -ENODEV ... "
-dmesg -C
+start_test "module_coming + pre-patch callback -ENODEV"
load_lp $MOD_LIVEPATCH
set_pre_patch_ret $MOD_LIVEPATCH -19
@@ -316,7 +295,7 @@ load_failing_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -325,12 +304,12 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
% echo -19 > /sys/module/$MOD_LIVEPATCH/parameters/pre_patch_ret
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
livepatch: pre-patch callback failed for object '$MOD_TARGET'
livepatch: patch '$MOD_LIVEPATCH' failed for module '$MOD_TARGET', refusing to load module '$MOD_TARGET'
-modprobe: ERROR: could not insert '$MOD_TARGET': No such device
+insmod: ERROR: could not insert module test_modules/$MOD_TARGET.ko: No such device
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
$MOD_LIVEPATCH: pre_unpatch_callback: vmlinux
@@ -341,8 +320,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple target modules
-#
# Test loading multiple targeted kernel modules. This test-case is
# mainly for comparing with the next test-case.
#
@@ -353,12 +330,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# module. Post-patch callbacks are executed and the transition
# completes quickly.
-echo -n "TEST: multiple target modules ... "
-dmesg -C
+start_test "multiple target modules"
-load_mod $MOD_TARGET_BUSY sleep_secs=0
-# give $MOD_TARGET_BUSY::busymod_work_func() a chance to run
-sleep 5
+load_mod $MOD_TARGET_BUSY block_transition=N
load_lp $MOD_LIVEPATCH
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
@@ -366,11 +340,11 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=0
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=N
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 0 seconds ...
+$MOD_TARGET_BUSY: busymod_work_func enter
$MOD_TARGET_BUSY: busymod_work_func exit
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -380,7 +354,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_LIVEPATCH: post_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
@@ -404,11 +378,8 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-
-# TEST: busy target module
-#
# A similar test as the previous one, but force the "busy" kernel module
-# to do longer work.
+# to block the livepatch transition.
#
# The livepatching core will refuse to patch a task that is currently
# executing a to-be-patched function -- the consistency model stalls the
@@ -417,8 +388,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# function for a long time. Meanwhile, load and unload other target
# kernel modules while the livepatch transition is in progress.
#
-# - Load the "busy" kernel module, this time make it do 10 seconds worth
-# of work.
+# - Load the "busy" kernel module, this time make its work function loop
#
# - Meanwhile, the livepatch is loaded. Notice that the patch
# transition does not complete as the targeted "busy" module is
@@ -435,30 +405,32 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# klp_object's post-patch callbacks executed, the remaining
# klp_object's pre-unpatch callbacks are skipped.
-echo -n "TEST: busy target module ... "
-dmesg -C
+start_test "busy target module"
-load_mod $MOD_TARGET_BUSY sleep_secs=10
+load_mod $MOD_TARGET_BUSY block_transition=Y
load_lp_nowait $MOD_LIVEPATCH
-# Don't wait for transition, load $MOD_TARGET while the transition
-# is still stalled in $MOD_TARGET_BUSY::busymod_work_func()
-sleep 5
+
+# Wait until the livepatch reports in-transition state, i.e. that it's
+# stalled on $MOD_TARGET_BUSY::busymod_work_func()
+loop_until 'grep -q '^1$' /sys/kernel/livepatch/$MOD_LIVEPATCH/transition' ||
+ die "failed to stall transition"
+
load_mod $MOD_TARGET
unload_mod $MOD_TARGET
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
unload_mod $MOD_TARGET_BUSY
-check_result "% modprobe $MOD_TARGET_BUSY sleep_secs=10
+check_result "% insmod test_modules/$MOD_TARGET_BUSY.ko block_transition=Y
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_init
-$MOD_TARGET_BUSY: busymod_work_func, sleeping 10 seconds ...
-% modprobe $MOD_LIVEPATCH
+$MOD_TARGET_BUSY: busymod_work_func enter
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET_BUSY -> [MODULE_STATE_LIVE] Normal state
livepatch: '$MOD_LIVEPATCH': starting patching transition
-% modprobe $MOD_TARGET
+% insmod test_modules/$MOD_TARGET.ko
livepatch: applying patch '$MOD_LIVEPATCH' to loading module '$MOD_TARGET'
$MOD_LIVEPATCH: pre_patch_callback: $MOD_TARGET -> [MODULE_STATE_COMING] Full formed, running module_init
$MOD_TARGET: ${MOD_TARGET}_init
@@ -479,8 +451,6 @@ $MOD_TARGET_BUSY: busymod_work_func exit
$MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
-# TEST: multiple livepatches
-#
# Test loading multiple livepatches. This test-case is mainly for comparing
# with the next test-case.
#
@@ -488,8 +458,7 @@ $MOD_TARGET_BUSY: ${MOD_TARGET_BUSY}_exit"
# execute as each patch progresses through its (un)patching
# transition.
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -498,7 +467,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -506,7 +475,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -532,8 +501,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace
-#
# Load multiple livepatches, but the second as an 'atomic-replace'
# patch. When the latter loads, the original livepatch should be
# disabled and *none* of its pre/post-unpatch callbacks executed. On
@@ -548,8 +515,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# - Once the atomic replace module is loaded, only its pre and post
# unpatch callbacks are executed.
-echo -n "TEST: atomic replace ... "
-dmesg -C
+start_test "atomic replace"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2 replace=1
@@ -557,7 +523,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -565,7 +531,7 @@ livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2 replace=1
+% insmod test_modules/$MOD_LIVEPATCH2.ko replace=1
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
diff --git a/tools/testing/selftests/livepatch/test-ftrace.sh b/tools/testing/selftests/livepatch/test-ftrace.sh
index e2a76887f40a..730218bce99c 100755
--- a/tools/testing/selftests/livepatch/test-ftrace.sh
+++ b/tools/testing/selftests/livepatch/test-ftrace.sh
@@ -9,13 +9,11 @@ MOD_LIVEPATCH=test_klp_livepatch
setup_config
-# TEST: livepatch interaction with ftrace_enabled sysctl
# - turn ftrace_enabled OFF and verify livepatches can't load
# - turn ftrace_enabled ON and verify livepatch can load
# - verify that ftrace_enabled can't be turned OFF while a livepatch is loaded
-echo -n "TEST: livepatch interaction with ftrace_enabled sysctl ... "
-dmesg -C
+start_test "livepatch interaction with ftrace_enabled sysctl"
set_ftrace_enabled 0
load_failing_mod $MOD_LIVEPATCH
@@ -27,7 +25,8 @@ if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]]
die "livepatch kselftest(s) failed"
fi
-set_ftrace_enabled 0
+# Check that ftrace could not get disabled when a livepatch is enabled
+set_ftrace_enabled --fail 0
if [[ "$(cat /proc/cmdline)" != "$MOD_LIVEPATCH: this has been live patched" ]] ; then
echo -e "FAIL\n\n"
die "livepatch kselftest(s) failed"
@@ -36,7 +35,7 @@ disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
check_result "livepatch: kernel.ftrace_enabled = 0
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: failed to register ftrace handler for function 'cmdline_proc_show' (-16)
@@ -45,15 +44,15 @@ livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Device or resource busy
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Device or resource busy
livepatch: kernel.ftrace_enabled = 1
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
-livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy kernel.ftrace_enabled = 0
+livepatch: sysctl: setting key \"kernel.ftrace_enabled\": Device or resource busy
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
livepatch: '$MOD_LIVEPATCH': starting unpatching transition
diff --git a/tools/testing/selftests/livepatch/test-livepatch.sh b/tools/testing/selftests/livepatch/test-livepatch.sh
index 493e3df415a1..e3455a6b1158 100755
--- a/tools/testing/selftests/livepatch/test-livepatch.sh
+++ b/tools/testing/selftests/livepatch/test-livepatch.sh
@@ -10,13 +10,11 @@ MOD_REPLACE=test_klp_atomic_replace
setup_config
-# TEST: basic function patching
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - unload the livepatch and make sure the patch was removed
-echo -n "TEST: basic function patching ... "
-dmesg -C
+start_test "basic function patching"
load_lp $MOD_LIVEPATCH
@@ -33,7 +31,7 @@ if [[ "$(cat /proc/cmdline)" == "$MOD_LIVEPATCH: this has been live patched" ]]
die "livepatch kselftest(s) failed"
fi
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
@@ -47,15 +45,13 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: multiple livepatches
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load another livepatch and verify that both livepatches are active
# - unload the second livepatch and verify that the first is still active
# - unload the first livepatch and verify none are active
-echo -n "TEST: multiple livepatches ... "
-dmesg -C
+start_test "multiple livepatches"
load_lp $MOD_LIVEPATCH
@@ -79,14 +75,14 @@ unload_lp $MOD_LIVEPATCH
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=0
+% insmod test_modules/$MOD_REPLACE.ko replace=0
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
@@ -109,7 +105,6 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: atomic replace livepatch
# - load a livepatch that modifies the output from /proc/cmdline and
# verify correct behavior
# - load an atomic replace livepatch and verify that only the second is active
@@ -117,8 +112,7 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
# is still active
# - remove the atomic replace livepatch and verify that none are active
-echo -n "TEST: atomic replace livepatch ... "
-dmesg -C
+start_test "atomic replace livepatch"
load_lp $MOD_LIVEPATCH
@@ -141,14 +135,14 @@ unload_lp $MOD_REPLACE
grep 'live patched' /proc/cmdline > /dev/kmsg
grep 'live patched' /proc/meminfo > /dev/kmsg
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
livepatch: '$MOD_LIVEPATCH': starting patching transition
livepatch: '$MOD_LIVEPATCH': completing patching transition
livepatch: '$MOD_LIVEPATCH': patching complete
$MOD_LIVEPATCH: this has been live patched
-% modprobe $MOD_REPLACE replace=1
+% insmod test_modules/$MOD_REPLACE.ko replace=1
livepatch: enabling patch '$MOD_REPLACE'
livepatch: '$MOD_REPLACE': initializing patching transition
livepatch: '$MOD_REPLACE': starting patching transition
diff --git a/tools/testing/selftests/livepatch/test-shadow-vars.sh b/tools/testing/selftests/livepatch/test-shadow-vars.sh
index 1aae73299114..1218c155bffe 100755
--- a/tools/testing/selftests/livepatch/test-shadow-vars.sh
+++ b/tools/testing/selftests/livepatch/test-shadow-vars.sh
@@ -9,52 +9,71 @@ MOD_TEST=test_klp_shadow_vars
setup_config
-# TEST: basic shadow variable API
# - load a module that exercises the shadow variable API
-echo -n "TEST: basic shadow variable API ... "
-dmesg -C
+start_test "basic shadow variable API"
load_mod $MOD_TEST
unload_mod $MOD_TEST
-check_result "% modprobe $MOD_TEST
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+check_result "% insmod test_modules/$MOD_TEST.ko
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_ctor: PTR6 -> PTR1
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR1 = PTR6
-$MOD_TEST: shadow_ctor: PTR8 -> PTR2
-$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR2 = PTR8
-$MOD_TEST: shadow_ctor: PTR10 -> PTR3
-$MOD_TEST: klp_shadow_alloc(obj=PTR5, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR3 = PTR10
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR6
-$MOD_TEST: got expected PTR6 -> PTR1 result
+$MOD_TEST: shadow_ctor: PTR3 -> PTR2
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: shadow_ctor: PTR6 -> PTR5
+$MOD_TEST: klp_shadow_alloc(obj=PTR1, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR5 = PTR6
+$MOD_TEST: shadow_ctor: PTR8 -> PTR7
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: shadow_ctor: PTR11 -> PTR10
+$MOD_TEST: klp_shadow_alloc(obj=PTR9, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR10 = PTR11
+$MOD_TEST: shadow_ctor: PTR13 -> PTR12
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: shadow_ctor: PTR16 -> PTR15
+$MOD_TEST: klp_shadow_alloc(obj=PTR14, id=0x1235, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR15 = PTR16
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR8
-$MOD_TEST: got expected PTR8 -> PTR2 result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_ctor: PTR11 -> PTR4
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR12, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR7, ctor_data=PTR4 = PTR11
-$MOD_TEST: got expected PTR11 -> PTR4 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR6)
-$MOD_TEST: klp_shadow_free(obj=PTR5, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR1, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR2 = PTR3
+$MOD_TEST: got expected PTR3 -> PTR2 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR9, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR7 = PTR8
+$MOD_TEST: got expected PTR8 -> PTR7 result
+$MOD_TEST: klp_shadow_get_or_alloc(obj=PTR14, id=0x1234, size=8, gfp_flags=GFP_KERNEL), ctor=PTR4, ctor_data=PTR12 = PTR13
+$MOD_TEST: got expected PTR13 -> PTR12 result
+$MOD_TEST: shadow_dtor(obj=PTR1, shadow_data=PTR3)
+$MOD_TEST: klp_shadow_free(obj=PTR1, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
$MOD_TEST: shadow_dtor(obj=PTR9, shadow_data=PTR8)
-$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR9, id=0x1234, dtor=PTR17)
$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: shadow_dtor(obj=PTR12, shadow_data=PTR11)
-$MOD_TEST: klp_shadow_free(obj=PTR12, id=0x1234, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR12, id=0x1234) = PTR0
+$MOD_TEST: shadow_dtor(obj=PTR14, shadow_data=PTR13)
+$MOD_TEST: klp_shadow_free(obj=PTR14, id=0x1234, dtor=PTR17)
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1234) = PTR0
$MOD_TEST: got expected NULL result
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1235) = PTR10
-$MOD_TEST: got expected PTR10 -> PTR3 result
-$MOD_TEST: shadow_dtor(obj=PTR5, shadow_data=PTR10)
-$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR13)
-$MOD_TEST: klp_shadow_get(obj=PTR5, id=0x1234) = PTR0
-$MOD_TEST: shadow_get() got expected NULL result
-% rmmod test_klp_shadow_vars"
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR6
+$MOD_TEST: got expected PTR6 -> PTR5 result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR11
+$MOD_TEST: got expected PTR11 -> PTR10 result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR16
+$MOD_TEST: got expected PTR16 -> PTR15 result
+$MOD_TEST: klp_shadow_free_all(id=0x1235, dtor=PTR0)
+$MOD_TEST: klp_shadow_get(obj=PTR1, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR9, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+$MOD_TEST: klp_shadow_get(obj=PTR14, id=0x1235) = PTR0
+$MOD_TEST: got expected NULL result
+% rmmod $MOD_TEST"
exit 0
diff --git a/tools/testing/selftests/livepatch/test-state.sh b/tools/testing/selftests/livepatch/test-state.sh
index a08212708115..10a52ac06185 100755
--- a/tools/testing/selftests/livepatch/test-state.sh
+++ b/tools/testing/selftests/livepatch/test-state.sh
@@ -10,16 +10,16 @@ MOD_LIVEPATCH3=test_klp_state3
setup_config
-# TEST: Loading and removing a module that modifies the system state
-echo -n "TEST: system state modification ... "
-dmesg -C
+# Load and remove a module that modifies the system state
+
+start_test "system state modification"
load_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH
unload_lp $MOD_LIVEPATCH
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -41,10 +41,9 @@ livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: taking over system state modification ... "
-dmesg -C
+start_test "taking over system state modification"
load_lp $MOD_LIVEPATCH
load_lp $MOD_LIVEPATCH2
@@ -52,7 +51,7 @@ unload_lp $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
$MOD_LIVEPATCH: pre_patch_callback: vmlinux
@@ -62,7 +61,7 @@ livepatch: '$MOD_LIVEPATCH': completing patching transition
$MOD_LIVEPATCH: post_patch_callback: vmlinux
$MOD_LIVEPATCH: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH': patching complete
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -85,10 +84,9 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH2"
-# TEST: Take over system state change by a cumulative patch
+# Take over system state change by a cumulative patch
-echo -n "TEST: compatible cumulative livepatches ... "
-dmesg -C
+start_test "compatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_lp $MOD_LIVEPATCH3
@@ -98,7 +96,7 @@ disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH3
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -108,7 +106,7 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH3
+% insmod test_modules/$MOD_LIVEPATCH3.ko
livepatch: enabling patch '$MOD_LIVEPATCH3'
livepatch: '$MOD_LIVEPATCH3': initializing patching transition
$MOD_LIVEPATCH3: pre_patch_callback: vmlinux
@@ -119,7 +117,7 @@ $MOD_LIVEPATCH3: post_patch_callback: vmlinux
$MOD_LIVEPATCH3: fix_console_loglevel: taking over the console_loglevel change
livepatch: '$MOD_LIVEPATCH3': patching complete
% rmmod $MOD_LIVEPATCH2
-% modprobe $MOD_LIVEPATCH2
+% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -142,17 +140,16 @@ livepatch: '$MOD_LIVEPATCH2': unpatching complete
% rmmod $MOD_LIVEPATCH3"
-# TEST: Failure caused by incompatible cumulative livepatches
+# Failure caused by incompatible cumulative livepatches
-echo -n "TEST: incompatible cumulative livepatches ... "
-dmesg -C
+start_test "incompatible cumulative livepatches"
load_lp $MOD_LIVEPATCH2
load_failing_mod $MOD_LIVEPATCH
disable_lp $MOD_LIVEPATCH2
unload_lp $MOD_LIVEPATCH2
-check_result "% modprobe $MOD_LIVEPATCH2
+check_result "% insmod test_modules/$MOD_LIVEPATCH2.ko
livepatch: enabling patch '$MOD_LIVEPATCH2'
livepatch: '$MOD_LIVEPATCH2': initializing patching transition
$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
@@ -162,9 +159,9 @@ livepatch: '$MOD_LIVEPATCH2': completing patching transition
$MOD_LIVEPATCH2: post_patch_callback: vmlinux
$MOD_LIVEPATCH2: fix_console_loglevel: fixing console_loglevel
livepatch: '$MOD_LIVEPATCH2': patching complete
-% modprobe $MOD_LIVEPATCH
+% insmod test_modules/$MOD_LIVEPATCH.ko
livepatch: Livepatch patch ($MOD_LIVEPATCH) is not compatible with the already installed livepatches.
-modprobe: ERROR: could not insert '$MOD_LIVEPATCH': Invalid argument
+insmod: ERROR: could not insert module test_modules/$MOD_LIVEPATCH.ko: Invalid parameters
% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH2/enabled
livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
diff --git a/tools/testing/selftests/livepatch/test-syscall.sh b/tools/testing/selftests/livepatch/test-syscall.sh
new file mode 100755
index 000000000000..b76a881d4013
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-syscall.sh
@@ -0,0 +1,53 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2023 SUSE
+# Author: Marcos Paulo de Souza <mpdesouza@suse.com>
+
+. $(dirname $0)/functions.sh
+
+MOD_SYSCALL=test_klp_syscall
+
+setup_config
+
+# - Start _NRPROC processes calling getpid and load a livepatch to patch the
+# getpid syscall. Check if all the processes transitioned to the livepatched
+# state.
+
+start_test "patch getpid syscall while being heavily hammered"
+
+for i in $(seq 1 $(getconf _NPROCESSORS_ONLN)); do
+ ./test_klp-call_getpid &
+ pids[$i]="$!"
+done
+
+pid_list=$(echo ${pids[@]} | tr ' ' ',')
+load_lp $MOD_SYSCALL klp_pids=$pid_list
+
+# wait for all tasks to transition to patched state
+loop_until 'grep -q '^0$' /sys/kernel/test_klp_syscall/npids'
+
+pending_pids=$(cat /sys/kernel/test_klp_syscall/npids)
+log "$MOD_SYSCALL: Remaining not livepatched processes: $pending_pids"
+
+for pid in ${pids[@]}; do
+ kill $pid || true
+done
+
+disable_lp $MOD_SYSCALL
+unload_lp $MOD_SYSCALL
+
+check_result "% insmod test_modules/$MOD_SYSCALL.ko klp_pids=$pid_list
+livepatch: enabling patch '$MOD_SYSCALL'
+livepatch: '$MOD_SYSCALL': initializing patching transition
+livepatch: '$MOD_SYSCALL': starting patching transition
+livepatch: '$MOD_SYSCALL': completing patching transition
+livepatch: '$MOD_SYSCALL': patching complete
+$MOD_SYSCALL: Remaining not livepatched processes: 0
+% echo 0 > /sys/kernel/livepatch/$MOD_SYSCALL/enabled
+livepatch: '$MOD_SYSCALL': initializing unpatching transition
+livepatch: '$MOD_SYSCALL': starting unpatching transition
+livepatch: '$MOD_SYSCALL': completing unpatching transition
+livepatch: '$MOD_SYSCALL': unpatching complete
+% rmmod $MOD_SYSCALL"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
new file mode 100755
index 000000000000..6c646afa7395
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -0,0 +1,86 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2022 Song Liu <song@kernel.org>
+
+. $(dirname $0)/functions.sh
+
+MOD_LIVEPATCH=test_klp_livepatch
+
+setup_config
+
+# - load a livepatch and verifies the sysfs entries work as expected
+
+start_test "sysfs test"
+
+load_lp $MOD_LIVEPATCH
+
+check_sysfs_rights "$MOD_LIVEPATCH" "" "drwxr-xr-x"
+check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1"
+check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
+check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "transition" "0"
+check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "vmlinux/patched" "1"
+
+disable_lp $MOD_LIVEPATCH
+
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% echo 0 > /sys/kernel/livepatch/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
+start_test "sysfs test object/patched"
+
+MOD_LIVEPATCH=test_klp_callbacks_demo
+MOD_TARGET=test_klp_callbacks_mod
+load_lp $MOD_LIVEPATCH
+
+# check the "patch" file changes as target module loads/unloads
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
+load_mod $MOD_TARGET
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "1"
+unload_mod $MOD_TARGET
+check_sysfs_value "$MOD_LIVEPATCH" "$MOD_TARGET/patched" "0"
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/test_klp_callbacks_demo.ko
+livepatch: enabling patch 'test_klp_callbacks_demo'
+livepatch: 'test_klp_callbacks_demo': initializing patching transition
+test_klp_callbacks_demo: pre_patch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': starting patching transition
+livepatch: 'test_klp_callbacks_demo': completing patching transition
+test_klp_callbacks_demo: post_patch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': patching complete
+% insmod test_modules/test_klp_callbacks_mod.ko
+livepatch: applying patch 'test_klp_callbacks_demo' to loading module 'test_klp_callbacks_mod'
+test_klp_callbacks_demo: pre_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
+test_klp_callbacks_demo: post_patch_callback: test_klp_callbacks_mod -> [MODULE_STATE_COMING] Full formed, running module_init
+test_klp_callbacks_mod: test_klp_callbacks_mod_init
+% rmmod test_klp_callbacks_mod
+test_klp_callbacks_mod: test_klp_callbacks_mod_exit
+test_klp_callbacks_demo: pre_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
+livepatch: reverting patch 'test_klp_callbacks_demo' on unloading module 'test_klp_callbacks_mod'
+test_klp_callbacks_demo: post_unpatch_callback: test_klp_callbacks_mod -> [MODULE_STATE_GOING] Going away
+% echo 0 > /sys/kernel/livepatch/test_klp_callbacks_demo/enabled
+livepatch: 'test_klp_callbacks_demo': initializing unpatching transition
+test_klp_callbacks_demo: pre_unpatch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': starting unpatching transition
+livepatch: 'test_klp_callbacks_demo': completing unpatching transition
+test_klp_callbacks_demo: post_unpatch_callback: vmlinux
+livepatch: 'test_klp_callbacks_demo': unpatching complete
+% rmmod test_klp_callbacks_demo"
+
+exit 0
diff --git a/tools/testing/selftests/livepatch/test_klp-call_getpid.c b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
new file mode 100644
index 000000000000..ce321a2d7308
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_klp-call_getpid.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <signal.h>
+
+static int stop;
+static int sig_int;
+
+void hup_handler(int signum)
+{
+ stop = 1;
+}
+
+void int_handler(int signum)
+{
+ stop = 1;
+ sig_int = 1;
+}
+
+int main(int argc, char *argv[])
+{
+ long count = 0;
+
+ signal(SIGHUP, &hup_handler);
+ signal(SIGINT, &int_handler);
+
+ while (!stop) {
+ (void)syscall(SYS_getpid);
+ count++;
+ }
+
+ if (sig_int)
+ printf("%ld iterations done\n", count);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/livepatch/test_modules/Makefile b/tools/testing/selftests/livepatch/test_modules/Makefile
new file mode 100644
index 000000000000..e6e638c4bcba
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/Makefile
@@ -0,0 +1,26 @@
+TESTMODS_DIR := $(realpath $(dir $(abspath $(lastword $(MAKEFILE_LIST)))))
+KDIR ?= /lib/modules/$(shell uname -r)/build
+
+obj-m += test_klp_atomic_replace.o \
+ test_klp_callbacks_busy.o \
+ test_klp_callbacks_demo.o \
+ test_klp_callbacks_demo2.o \
+ test_klp_callbacks_mod.o \
+ test_klp_livepatch.o \
+ test_klp_state.o \
+ test_klp_state2.o \
+ test_klp_state3.o \
+ test_klp_shadow_vars.o \
+ test_klp_syscall.o
+
+# Ensure that KDIR exists, otherwise skip the compilation
+modules:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) modules KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
+
+# Ensure that KDIR exists, otherwise skip the clean target
+clean:
+ifneq ("$(wildcard $(KDIR))", "")
+ $(Q)$(MAKE) -C $(KDIR) clean KBUILD_EXTMOD=$(TESTMODS_DIR)
+endif
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
new file mode 100644
index 000000000000..5af7093ca00c
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_atomic_replace.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+#include <linux/seq_file.h>
+static int livepatch_meminfo_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+ "this has been live patched");
+ return 0;
+}
+
+static struct klp_func funcs[] = {
+ {
+ .old_name = "meminfo_proc_show",
+ .new_func = livepatch_meminfo_proc_show,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = funcs,
+ }, {}
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ /* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_atomic_replace_init(void)
+{
+ patch.replace = replace;
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_atomic_replace_exit(void)
+{
+}
+
+module_init(test_klp_atomic_replace_init);
+module_exit(test_klp_atomic_replace_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: atomic replace");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
new file mode 100644
index 000000000000..133929e0ce8f
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_busy.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
+#include <linux/delay.h>
+
+/* load/run-time control from sysfs writer */
+static bool block_transition;
+module_param(block_transition, bool, 0644);
+MODULE_PARM_DESC(block_transition, "block_transition (default=false)");
+
+static void busymod_work_func(struct work_struct *work);
+static DECLARE_WORK(work, busymod_work_func);
+static DECLARE_COMPLETION(busymod_work_started);
+
+static void busymod_work_func(struct work_struct *work)
+{
+ pr_info("%s enter\n", __func__);
+ complete(&busymod_work_started);
+
+ while (READ_ONCE(block_transition)) {
+ /*
+ * Busy-wait until the sysfs writer has acknowledged a
+ * blocked transition and clears the flag.
+ */
+ msleep(20);
+ }
+
+ pr_info("%s exit\n", __func__);
+}
+
+static int test_klp_callbacks_busy_init(void)
+{
+ pr_info("%s\n", __func__);
+ schedule_work(&work);
+
+ /*
+ * To synchronize kernel messages, hold the init function from
+ * exiting until the work function's entry message has printed.
+ */
+ wait_for_completion(&busymod_work_started);
+
+ if (!block_transition) {
+ /*
+ * Serialize output: print all messages from the work
+ * function before returning from init().
+ */
+ flush_work(&work);
+ }
+
+ return 0;
+}
+
+static void test_klp_callbacks_busy_exit(void)
+{
+ WRITE_ONCE(block_transition, false);
+ flush_work(&work);
+ pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_busy_init);
+module_exit(test_klp_callbacks_busy_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: busy target module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
new file mode 100644
index 000000000000..3fd8fe1cd1cc
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo.c
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int pre_patch_ret;
+module_param(pre_patch_ret, int, 0644);
+MODULE_PARM_DESC(pre_patch_ret, "pre_patch_ret (default=0)");
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return pre_patch_ret;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+static void patched_work_func(struct work_struct *work)
+{
+ pr_info("%s\n", __func__);
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_func busymod_funcs[] = {
+ {
+ .old_name = "busymod_work_func",
+ .new_func = patched_work_func,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, {
+ .name = "test_klp_callbacks_mod",
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, {
+ .name = "test_klp_callbacks_busy",
+ .funcs = busymod_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
new file mode 100644
index 000000000000..5417573e80af
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_demo2.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+static int replace;
+module_param(replace, int, 0644);
+MODULE_PARM_DESC(replace, "replace (default=0)");
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return 0;
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+}
+
+static struct klp_func no_funcs[] = {
+ { }
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ /* set .replace in the init function below for demo purposes */
+};
+
+static int test_klp_callbacks_demo2_init(void)
+{
+ patch.replace = replace;
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo2_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo2_init);
+module_exit(test_klp_callbacks_demo2_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch demo2");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
new file mode 100644
index 000000000000..8fbe645b1c2c
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_callbacks_mod.c
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+
+static int test_klp_callbacks_mod_init(void)
+{
+ pr_info("%s\n", __func__);
+ return 0;
+}
+
+static void test_klp_callbacks_mod_exit(void)
+{
+ pr_info("%s\n", __func__);
+}
+
+module_init(test_klp_callbacks_mod_init);
+module_exit(test_klp_callbacks_mod_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: target module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
new file mode 100644
index 000000000000..aff08199de71
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_livepatch.c
@@ -0,0 +1,51 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2014 Seth Jennings <sjenning@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/livepatch.h>
+
+#include <linux/seq_file.h>
+static int livepatch_cmdline_proc_show(struct seq_file *m, void *v)
+{
+ seq_printf(m, "%s: %s\n", THIS_MODULE->name,
+ "this has been live patched");
+ return 0;
+}
+
+static struct klp_func funcs[] = {
+ {
+ .old_name = "cmdline_proc_show",
+ .new_func = livepatch_cmdline_proc_show,
+ }, { }
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = funcs,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int test_klp_livepatch_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_livepatch_exit(void)
+{
+}
+
+module_init(test_klp_livepatch_init);
+module_exit(test_klp_livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Seth Jennings <sjenning@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: livepatch module");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
new file mode 100644
index 000000000000..b99116490858
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_shadow_vars.c
@@ -0,0 +1,301 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2018 Joe Lawrence <joe.lawrence@redhat.com>
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/list.h>
+#include <linux/livepatch.h>
+#include <linux/slab.h>
+
+/*
+ * Keep a small list of pointers so that we can print address-agnostic
+ * pointer values. Use a rolling integer count to differentiate the values.
+ * Ironically we could have used the shadow variable API to do this, but
+ * let's not lean too heavily on the very code we're testing.
+ */
+static LIST_HEAD(ptr_list);
+struct shadow_ptr {
+ void *ptr;
+ int id;
+ struct list_head list;
+};
+
+static void free_ptr_list(void)
+{
+ struct shadow_ptr *sp, *tmp_sp;
+
+ list_for_each_entry_safe(sp, tmp_sp, &ptr_list, list) {
+ list_del(&sp->list);
+ kfree(sp);
+ }
+}
+
+static int ptr_id(void *ptr)
+{
+ struct shadow_ptr *sp;
+ static int count;
+
+ list_for_each_entry(sp, &ptr_list, list) {
+ if (sp->ptr == ptr)
+ return sp->id;
+ }
+
+ sp = kmalloc(sizeof(*sp), GFP_ATOMIC);
+ if (!sp)
+ return -ENOMEM;
+ sp->ptr = ptr;
+ sp->id = count++;
+
+ list_add(&sp->list, &ptr_list);
+
+ return sp->id;
+}
+
+/*
+ * Shadow variable wrapper functions that echo the function and arguments
+ * to the kernel log for testing verification. Don't display raw pointers,
+ * but use the ptr_id() value instead.
+ */
+static void *shadow_get(void *obj, unsigned long id)
+{
+ int **sv;
+
+ sv = klp_shadow_get(obj, id);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx) = PTR%d\n",
+ __func__, ptr_id(obj), id, ptr_id(sv));
+
+ return sv;
+}
+
+static void *shadow_alloc(void *obj, unsigned long id, size_t size,
+ gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+ void *ctor_data)
+{
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_alloc(obj, id, size, gfp_flags, ctor, var);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+ __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
+}
+
+static void *shadow_get_or_alloc(void *obj, unsigned long id, size_t size,
+ gfp_t gfp_flags, klp_shadow_ctor_t ctor,
+ void *ctor_data)
+{
+ int **var = ctor_data;
+ int **sv;
+
+ sv = klp_shadow_get_or_alloc(obj, id, size, gfp_flags, ctor, var);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, size=%zx, gfp_flags=%pGg), ctor=PTR%d, ctor_data=PTR%d = PTR%d\n",
+ __func__, ptr_id(obj), id, size, &gfp_flags, ptr_id(ctor),
+ ptr_id(*var), ptr_id(sv));
+
+ return sv;
+}
+
+static void shadow_free(void *obj, unsigned long id, klp_shadow_dtor_t dtor)
+{
+ klp_shadow_free(obj, id, dtor);
+ pr_info("klp_%s(obj=PTR%d, id=0x%lx, dtor=PTR%d)\n",
+ __func__, ptr_id(obj), id, ptr_id(dtor));
+}
+
+static void shadow_free_all(unsigned long id, klp_shadow_dtor_t dtor)
+{
+ klp_shadow_free_all(id, dtor);
+ pr_info("klp_%s(id=0x%lx, dtor=PTR%d)\n", __func__, id, ptr_id(dtor));
+}
+
+
+/* Shadow variable constructor - remember simple pointer data */
+static int shadow_ctor(void *obj, void *shadow_data, void *ctor_data)
+{
+ int **sv = shadow_data;
+ int **var = ctor_data;
+
+ if (!var)
+ return -EINVAL;
+
+ *sv = *var;
+ pr_info("%s: PTR%d -> PTR%d\n", __func__, ptr_id(sv), ptr_id(*var));
+
+ return 0;
+}
+
+/*
+ * With more than one item to free in the list, order is not determined and
+ * shadow_dtor will not be passed to shadow_free_all() which would make the
+ * test fail. (see pass 6)
+ */
+static void shadow_dtor(void *obj, void *shadow_data)
+{
+ int **sv = shadow_data;
+
+ pr_info("%s(obj=PTR%d, shadow_data=PTR%d)\n",
+ __func__, ptr_id(obj), ptr_id(sv));
+}
+
+/* number of objects we simulate that need shadow vars */
+#define NUM_OBJS 3
+
+/* dynamically created obj fields have the following shadow var id values */
+#define SV_ID1 0x1234
+#define SV_ID2 0x1235
+
+/*
+ * The main test case adds/removes new fields (shadow var) to each of these
+ * test structure instances. The last group of fields in the struct represent
+ * the idea that shadow variables may be added and removed to and from the
+ * struct during execution.
+ */
+struct test_object {
+ /* add anything here below and avoid to define an empty struct */
+ struct shadow_ptr sp;
+
+ /* these represent shadow vars added and removed with SV_ID{1,2} */
+ /* char nfield1; */
+ /* int nfield2; */
+};
+
+static int test_klp_shadow_vars_init(void)
+{
+ struct test_object objs[NUM_OBJS];
+ char nfields1[NUM_OBJS], *pnfields1[NUM_OBJS], **sv1[NUM_OBJS];
+ char *pndup[NUM_OBJS];
+ int nfields2[NUM_OBJS], *pnfields2[NUM_OBJS], **sv2[NUM_OBJS];
+ void **sv;
+ int ret;
+ int i;
+
+ ptr_id(NULL);
+
+ /*
+ * With an empty shadow variable hash table, expect not to find
+ * any matches.
+ */
+ sv = shadow_get(&objs[0], SV_ID1);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+
+ /* pass 1: init & alloc a char+int pair of svars for each objs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ pnfields1[i] = &nfields1[i];
+ ptr_id(pnfields1[i]);
+
+ if (i % 2) {
+ sv1[i] = shadow_alloc(&objs[i], SV_ID1,
+ sizeof(pnfields1[i]), GFP_KERNEL,
+ shadow_ctor, &pnfields1[i]);
+ } else {
+ sv1[i] = shadow_get_or_alloc(&objs[i], SV_ID1,
+ sizeof(pnfields1[i]), GFP_KERNEL,
+ shadow_ctor, &pnfields1[i]);
+ }
+ if (!sv1[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ pnfields2[i] = &nfields2[i];
+ ptr_id(pnfields2[i]);
+ sv2[i] = shadow_alloc(&objs[i], SV_ID2, sizeof(pnfields2[i]),
+ GFP_KERNEL, shadow_ctor, &pnfields2[i]);
+ if (!sv2[i]) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+
+ /* pass 2: verify we find allocated svars and where they point to */
+ for (i = 0; i < NUM_OBJS; i++) {
+ /* check the "char" svar for all objects */
+ sv = shadow_get(&objs[i], SV_ID1);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv1[i]), ptr_id(*sv1[i]));
+
+ /* check the "int" svar for all objects */
+ sv = shadow_get(&objs[i], SV_ID2);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv2[i]), ptr_id(*sv2[i]));
+ }
+
+ /* pass 3: verify that 'get_or_alloc' returns already allocated svars */
+ for (i = 0; i < NUM_OBJS; i++) {
+ pndup[i] = &nfields1[i];
+ ptr_id(pndup[i]);
+
+ sv = shadow_get_or_alloc(&objs[i], SV_ID1, sizeof(pndup[i]),
+ GFP_KERNEL, shadow_ctor, &pndup[i]);
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((char **)sv == sv1[i] && *sv1[i] == pnfields1[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv1[i]), ptr_id(*sv1[i]));
+ }
+
+ /* pass 4: free <objs[*], SV_ID1> pairs of svars, verify removal */
+ for (i = 0; i < NUM_OBJS; i++) {
+ shadow_free(&objs[i], SV_ID1, shadow_dtor); /* 'char' pairs */
+ sv = shadow_get(&objs[i], SV_ID1);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+ }
+
+ /* pass 5: check we still find <objs[*], SV_ID2> svar pairs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ sv = shadow_get(&objs[i], SV_ID2); /* 'int' pairs */
+ if (!sv) {
+ ret = -EINVAL;
+ goto out;
+ }
+ if ((int **)sv == sv2[i] && *sv2[i] == pnfields2[i])
+ pr_info(" got expected PTR%d -> PTR%d result\n",
+ ptr_id(sv2[i]), ptr_id(*sv2[i]));
+ }
+
+ /* pass 6: free all the <objs[*], SV_ID2> svar pairs too. */
+ shadow_free_all(SV_ID2, NULL); /* 'int' pairs */
+ for (i = 0; i < NUM_OBJS; i++) {
+ sv = shadow_get(&objs[i], SV_ID2);
+ if (!sv)
+ pr_info(" got expected NULL result\n");
+ }
+
+ free_ptr_list();
+
+ return 0;
+out:
+ shadow_free_all(SV_ID1, NULL); /* 'char' pairs */
+ shadow_free_all(SV_ID2, NULL); /* 'int' pairs */
+ free_ptr_list();
+
+ return ret;
+}
+
+static void test_klp_shadow_vars_exit(void)
+{
+}
+
+module_init(test_klp_shadow_vars_init);
+module_exit(test_klp_shadow_vars_exit);
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Joe Lawrence <joe.lawrence@redhat.com>");
+MODULE_DESCRIPTION("Livepatch test: shadow variables");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
new file mode 100644
index 000000000000..57a4253acb01
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/livepatch.h>
+
+#define CONSOLE_LOGLEVEL_STATE 1
+/* Version 1 does not support migration. */
+#define CONSOLE_LOGLEVEL_STATE_VERSION 1
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+static struct klp_patch patch;
+
+static int allocate_loglevel_state(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return -EINVAL;
+
+ loglevel_state->data = kzalloc(sizeof(console_loglevel), GFP_KERNEL);
+ if (!loglevel_state->data)
+ return -ENOMEM;
+
+ pr_info("%s: allocating space to store console_loglevel\n",
+ __func__);
+ return 0;
+}
+
+static void fix_console_loglevel(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: fixing console_loglevel\n", __func__);
+ *(int *)loglevel_state->data = console_loglevel;
+ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+}
+
+static void restore_console_loglevel(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: restoring console_loglevel\n", __func__);
+ console_loglevel = *(int *)loglevel_state->data;
+}
+
+static void free_loglevel_state(void)
+{
+ struct klp_state *loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: freeing space for the stored console_loglevel\n",
+ __func__);
+ kfree(loglevel_state->data);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return allocate_loglevel_state();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ fix_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ restore_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ free_loglevel_state();
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_state states[] = {
+ {
+ .id = CONSOLE_LOGLEVEL_STATE,
+ .version = CONSOLE_LOGLEVEL_STATE_VERSION,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ .states = states,
+ .replace = true,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Petr Mladek <pmladek@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: system state modification");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
new file mode 100644
index 000000000000..c978ea4d5e67
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state2.c
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/printk.h>
+#include <linux/livepatch.h>
+
+#define CONSOLE_LOGLEVEL_STATE 1
+/* Version 2 supports migration. */
+#define CONSOLE_LOGLEVEL_STATE_VERSION 2
+
+static const char *const module_state[] = {
+ [MODULE_STATE_LIVE] = "[MODULE_STATE_LIVE] Normal state",
+ [MODULE_STATE_COMING] = "[MODULE_STATE_COMING] Full formed, running module_init",
+ [MODULE_STATE_GOING] = "[MODULE_STATE_GOING] Going away",
+ [MODULE_STATE_UNFORMED] = "[MODULE_STATE_UNFORMED] Still setting it up",
+};
+
+static void callback_info(const char *callback, struct klp_object *obj)
+{
+ if (obj->mod)
+ pr_info("%s: %s -> %s\n", callback, obj->mod->name,
+ module_state[obj->mod->state]);
+ else
+ pr_info("%s: vmlinux\n", callback);
+}
+
+static struct klp_patch patch;
+
+static int allocate_loglevel_state(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: space to store console_loglevel already allocated\n",
+ __func__);
+ return 0;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return -EINVAL;
+
+ loglevel_state->data = kzalloc(sizeof(console_loglevel), GFP_KERNEL);
+ if (!loglevel_state->data)
+ return -ENOMEM;
+
+ pr_info("%s: allocating space to store console_loglevel\n",
+ __func__);
+ return 0;
+}
+
+static void fix_console_loglevel(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: taking over the console_loglevel change\n",
+ __func__);
+ loglevel_state->data = prev_loglevel_state->data;
+ return;
+ }
+
+ pr_info("%s: fixing console_loglevel\n", __func__);
+ *(int *)loglevel_state->data = console_loglevel;
+ console_loglevel = CONSOLE_LOGLEVEL_MOTORMOUTH;
+}
+
+static void restore_console_loglevel(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: passing the console_loglevel change back to the old livepatch\n",
+ __func__);
+ return;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: restoring console_loglevel\n", __func__);
+ console_loglevel = *(int *)loglevel_state->data;
+}
+
+static void free_loglevel_state(void)
+{
+ struct klp_state *loglevel_state, *prev_loglevel_state;
+
+ prev_loglevel_state = klp_get_prev_state(CONSOLE_LOGLEVEL_STATE);
+ if (prev_loglevel_state) {
+ pr_info("%s: keeping space to store console_loglevel\n",
+ __func__);
+ return;
+ }
+
+ loglevel_state = klp_get_state(&patch, CONSOLE_LOGLEVEL_STATE);
+ if (!loglevel_state)
+ return;
+
+ pr_info("%s: freeing space for the stored console_loglevel\n",
+ __func__);
+ kfree(loglevel_state->data);
+}
+
+/* Executed on object patching (ie, patch enablement) */
+static int pre_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ return allocate_loglevel_state();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_patch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ fix_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void pre_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ restore_console_loglevel();
+}
+
+/* Executed on object unpatching (ie, patch disablement) */
+static void post_unpatch_callback(struct klp_object *obj)
+{
+ callback_info(__func__, obj);
+ free_loglevel_state();
+}
+
+static struct klp_func no_funcs[] = {
+ {}
+};
+
+static struct klp_object objs[] = {
+ {
+ .name = NULL, /* vmlinux */
+ .funcs = no_funcs,
+ .callbacks = {
+ .pre_patch = pre_patch_callback,
+ .post_patch = post_patch_callback,
+ .pre_unpatch = pre_unpatch_callback,
+ .post_unpatch = post_unpatch_callback,
+ },
+ }, { }
+};
+
+static struct klp_state states[] = {
+ {
+ .id = CONSOLE_LOGLEVEL_STATE,
+ .version = CONSOLE_LOGLEVEL_STATE_VERSION,
+ }, { }
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+ .states = states,
+ .replace = true,
+};
+
+static int test_klp_callbacks_demo_init(void)
+{
+ return klp_enable_patch(&patch);
+}
+
+static void test_klp_callbacks_demo_exit(void)
+{
+}
+
+module_init(test_klp_callbacks_demo_init);
+module_exit(test_klp_callbacks_demo_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Petr Mladek <pmladek@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: system state modification");
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
new file mode 100644
index 000000000000..9226579d10c5
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_state3.c
@@ -0,0 +1,5 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (C) 2019 SUSE
+
+/* The console loglevel fix is the same in the next cumulative patch. */
+#include "test_klp_state2.c"
diff --git a/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
new file mode 100644
index 000000000000..dd802783ea84
--- /dev/null
+++ b/tools/testing/selftests/livepatch/test_modules/test_klp_syscall.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2017-2023 SUSE
+ * Authors: Libor Pechacek <lpechacek@suse.cz>
+ * Nicolai Stange <nstange@suse.de>
+ * Marcos Paulo de Souza <mpdesouza@suse.com>
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/livepatch.h>
+
+#if defined(__x86_64__)
+#define FN_PREFIX __x64_
+#elif defined(__s390x__)
+#define FN_PREFIX __s390x_
+#elif defined(__aarch64__)
+#define FN_PREFIX __arm64_
+#else
+/* powerpc does not select ARCH_HAS_SYSCALL_WRAPPER */
+#define FN_PREFIX
+#endif
+
+/* Protects klp_pids */
+static DEFINE_MUTEX(kpid_mutex);
+
+static unsigned int npids, npids_pending;
+static int klp_pids[NR_CPUS];
+module_param_array(klp_pids, int, &npids_pending, 0);
+MODULE_PARM_DESC(klp_pids, "Array of pids to be transitioned to livepatched state.");
+
+static ssize_t npids_show(struct kobject *kobj, struct kobj_attribute *attr,
+ char *buf)
+{
+ return sprintf(buf, "%u\n", npids_pending);
+}
+
+static struct kobj_attribute klp_attr = __ATTR_RO(npids);
+static struct kobject *klp_kobj;
+
+static asmlinkage long lp_sys_getpid(void)
+{
+ int i;
+
+ mutex_lock(&kpid_mutex);
+ if (npids_pending > 0) {
+ for (i = 0; i < npids; i++) {
+ if (current->pid == klp_pids[i]) {
+ klp_pids[i] = 0;
+ npids_pending--;
+ break;
+ }
+ }
+ }
+ mutex_unlock(&kpid_mutex);
+
+ return task_tgid_vnr(current);
+}
+
+static struct klp_func vmlinux_funcs[] = {
+ {
+ .old_name = __stringify(FN_PREFIX) "sys_getpid",
+ .new_func = lp_sys_getpid,
+ }, {}
+};
+
+static struct klp_object objs[] = {
+ {
+ /* name being NULL means vmlinux */
+ .funcs = vmlinux_funcs,
+ }, {}
+};
+
+static struct klp_patch patch = {
+ .mod = THIS_MODULE,
+ .objs = objs,
+};
+
+static int livepatch_init(void)
+{
+ int ret;
+
+ klp_kobj = kobject_create_and_add("test_klp_syscall", kernel_kobj);
+ if (!klp_kobj)
+ return -ENOMEM;
+
+ ret = sysfs_create_file(klp_kobj, &klp_attr.attr);
+ if (ret) {
+ kobject_put(klp_kobj);
+ return ret;
+ }
+
+ /*
+ * Save the number pids to transition to livepatched state before the
+ * number of pending pids is decremented.
+ */
+ npids = npids_pending;
+
+ return klp_enable_patch(&patch);
+}
+
+static void livepatch_exit(void)
+{
+ kobject_put(klp_kobj);
+}
+
+module_init(livepatch_init);
+module_exit(livepatch_exit);
+MODULE_LICENSE("GPL");
+MODULE_INFO(livepatch, "Y");
+MODULE_AUTHOR("Libor Pechacek <lpechacek@suse.cz>");
+MODULE_AUTHOR("Nicolai Stange <nstange@suse.de>");
+MODULE_AUTHOR("Marcos Paulo de Souza <mpdesouza@suse.com>");
+MODULE_DESCRIPTION("Livepatch test: syscall transition");
diff --git a/tools/testing/selftests/lkdtm/.gitignore b/tools/testing/selftests/lkdtm/.gitignore
index f26212605b6b..d4b0be857deb 100644
--- a/tools/testing/selftests/lkdtm/.gitignore
+++ b/tools/testing/selftests/lkdtm/.gitignore
@@ -1,2 +1,3 @@
*.sh
!run.sh
+!stack-entropy.sh
diff --git a/tools/testing/selftests/lkdtm/Makefile b/tools/testing/selftests/lkdtm/Makefile
index 1bcc9ee990eb..c71109ceeb2d 100644
--- a/tools/testing/selftests/lkdtm/Makefile
+++ b/tools/testing/selftests/lkdtm/Makefile
@@ -5,6 +5,7 @@ include ../lib.mk
# NOTE: $(OUTPUT) won't get default value if used before lib.mk
TEST_FILES := tests.txt
+TEST_PROGS := stack-entropy.sh
TEST_GEN_PROGS = $(patsubst %,$(OUTPUT)/%.sh,$(shell awk '{print $$1}' tests.txt | sed -e 's/\#//'))
all: $(TEST_GEN_PROGS)
diff --git a/tools/testing/selftests/lkdtm/config b/tools/testing/selftests/lkdtm/config
index d874990e442b..7afe05e8c4d7 100644
--- a/tools/testing/selftests/lkdtm/config
+++ b/tools/testing/selftests/lkdtm/config
@@ -1 +1,14 @@
CONFIG_LKDTM=y
+CONFIG_DEBUG_LIST=y
+CONFIG_SLAB_FREELIST_HARDENED=y
+CONFIG_FORTIFY_SOURCE=y
+CONFIG_GCC_PLUGIN_STACKLEAK=y
+CONFIG_HARDENED_USERCOPY=y
+CONFIG_RANDOMIZE_KSTACK_OFFSET_DEFAULT=y
+CONFIG_INIT_ON_FREE_DEFAULT_ON=y
+CONFIG_INIT_ON_ALLOC_DEFAULT_ON=y
+CONFIG_UBSAN=y
+CONFIG_UBSAN_BOUNDS=y
+CONFIG_STACKPROTECTOR_STRONG=y
+CONFIG_SLUB_DEBUG=y
+CONFIG_SLUB_DEBUG_ON=y
diff --git a/tools/testing/selftests/lkdtm/run.sh b/tools/testing/selftests/lkdtm/run.sh
index ee64ff8df8f4..95e904959207 100755
--- a/tools/testing/selftests/lkdtm/run.sh
+++ b/tools/testing/selftests/lkdtm/run.sh
@@ -8,6 +8,7 @@
#
set -e
TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+CLEAR_ONCE=/sys/kernel/debug/clear_warn_once
KSELFTEST_SKIP_TEST=4
# Verify we have LKDTM available in the kernel.
@@ -55,8 +56,14 @@ if echo "$test" | grep -q '^#' ; then
fi
# If no expected output given, assume an Oops with back trace is success.
+repeat=1
if [ -z "$expect" ]; then
expect="call trace:"
+else
+ if echo "$expect" | grep -q '^repeat:' ; then
+ repeat=$(echo "$expect" | cut -d' ' -f1 | cut -d: -f2)
+ expect=$(echo "$expect" | cut -d' ' -f2-)
+ fi
fi
# Prepare log for report checking
@@ -67,16 +74,27 @@ cleanup() {
}
trap cleanup EXIT
+# Reset WARN_ONCE counters so we trip it each time this runs.
+if [ -w $CLEAR_ONCE ] ; then
+ echo 1 > $CLEAR_ONCE
+fi
+
# Save existing dmesg so we can detect new content below
dmesg > "$DMESG"
-# Most shells yell about signals and we're expecting the "cat" process
-# to usually be killed by the kernel. So we have to run it in a sub-shell
-# and silence errors.
-($SHELL -c 'cat <(echo '"$test"') >'"$TRIGGER" 2>/dev/null) || true
+# Since the kernel is likely killing the process writing to the trigger
+# file, it must not be the script's shell itself. i.e. we cannot do:
+# echo "$test" >"$TRIGGER"
+# Instead, use "cat" to take the signal. Since the shell will yell about
+# the signal that killed the subprocess, we must ignore the failure and
+# continue. However we don't silence stderr since there might be other
+# useful details reported there in the case of other unexpected conditions.
+for i in $(seq 1 $repeat); do
+ echo "$test" | cat >"$TRIGGER" || true
+done
# Record and dump the results
-dmesg | diff --changed-group-format='%>' --unchanged-group-format='' "$DMESG" - > "$LOG" || true
+dmesg | comm --nocheck-order -13 "$DMESG" - > "$LOG" || true
cat "$LOG"
# Check for expected output
diff --git a/tools/testing/selftests/lkdtm/stack-entropy.sh b/tools/testing/selftests/lkdtm/stack-entropy.sh
new file mode 100755
index 000000000000..14fedeef762e
--- /dev/null
+++ b/tools/testing/selftests/lkdtm/stack-entropy.sh
@@ -0,0 +1,51 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Measure kernel stack entropy by sampling via LKDTM's REPORT_STACK test.
+set -e
+samples="${1:-1000}"
+TRIGGER=/sys/kernel/debug/provoke-crash/DIRECT
+KSELFTEST_SKIP_TEST=4
+
+# Verify we have LKDTM available in the kernel.
+if [ ! -r $TRIGGER ] ; then
+ /sbin/modprobe -q lkdtm || true
+ if [ ! -r $TRIGGER ] ; then
+ echo "Cannot find $TRIGGER (missing CONFIG_LKDTM?)"
+ else
+ echo "Cannot write $TRIGGER (need to run as root?)"
+ fi
+ # Skip this test
+ exit $KSELFTEST_SKIP_TEST
+fi
+
+# Capture dmesg continuously since it may fill up depending on sample size.
+log=$(mktemp -t stack-entropy-XXXXXX)
+dmesg --follow >"$log" & pid=$!
+report=-1
+for i in $(seq 1 $samples); do
+ echo "REPORT_STACK" > $TRIGGER
+ if [ -t 1 ]; then
+ percent=$(( 100 * $i / $samples ))
+ if [ "$percent" -ne "$report" ]; then
+ /bin/echo -en "$percent%\r"
+ report="$percent"
+ fi
+ fi
+done
+kill "$pid"
+
+# Count unique offsets since last run.
+seen=$(tac "$log" | grep -m1 -B"$samples"0 'Starting stack offset' | \
+ grep 'Stack offset' | awk '{print $NF}' | sort | uniq -c | wc -l)
+bits=$(echo "obase=2; $seen" | bc | wc -L)
+echo "Bits of stack entropy: $bits"
+rm -f "$log"
+
+# We would expect any functional stack randomization to be at least 5 bits.
+if [ "$bits" -lt 5 ]; then
+ echo "Stack entropy is low! Booted without 'randomize_kstack_offset=y'?"
+ exit 1
+else
+ exit 0
+fi
diff --git a/tools/testing/selftests/lkdtm/tests.txt b/tools/testing/selftests/lkdtm/tests.txt
index 92ca32143ae5..368973f05250 100644
--- a/tools/testing/selftests/lkdtm/tests.txt
+++ b/tools/testing/selftests/lkdtm/tests.txt
@@ -1,4 +1,5 @@
#PANIC
+#PANIC_STOP_IRQOFF Crashes entire system
BUG kernel BUG at
WARNING WARNING:
WARNING_MESSAGE message trigger
@@ -7,19 +8,24 @@ EXCEPTION
#EXHAUST_STACK Corrupts memory on failure
#CORRUPT_STACK Crashes entire system on success
#CORRUPT_STACK_STRONG Crashes entire system on success
+ARRAY_BOUNDS call trace:|UBSAN: array-index-out-of-bounds
CORRUPT_LIST_ADD list_add corruption
CORRUPT_LIST_DEL list_del corruption
-CORRUPT_USER_DS Invalid address limit on user-mode return
STACK_GUARD_PAGE_LEADING
STACK_GUARD_PAGE_TRAILING
-UNSET_SMEP CR4 bits went missing
+REPORT_STACK_CANARY repeat:2 ok: stack canaries differ
+UNSET_SMEP pinned CR4 bits changed:
DOUBLE_FAULT
+CORRUPT_PAC
UNALIGNED_LOAD_STORE_WRITE
-#OVERWRITE_ALLOCATION Corrupts memory on failure
+SLAB_LINEAR_OVERFLOW
+VMALLOC_LINEAR_OVERFLOW
#WRITE_AFTER_FREE Corrupts memory on failure
-READ_AFTER_FREE
+READ_AFTER_FREE call trace:|Memory correctly poisoned
#WRITE_BUDDY_AFTER_FREE Corrupts memory on failure
-READ_BUDDY_AFTER_FREE
+READ_BUDDY_AFTER_FREE call trace:|Memory correctly poisoned
+SLAB_INIT_ON_ALLOC Memory appears initialized
+BUDDY_INIT_ON_ALLOC Memory appears initialized
SLAB_FREE_DOUBLE
SLAB_FREE_CROSS
SLAB_FREE_PAGE
@@ -39,6 +45,7 @@ ACCESS_NULL
WRITE_RO
WRITE_RO_AFTER_INIT
WRITE_KERN
+WRITE_OPD
REFCOUNT_INC_OVERFLOW
REFCOUNT_ADD_OVERFLOW
REFCOUNT_INC_NOT_ZERO_OVERFLOW
@@ -58,14 +65,20 @@ REFCOUNT_DEC_AND_TEST_SATURATED Saturation detected: still saturated
REFCOUNT_SUB_AND_TEST_SATURATED Saturation detected: still saturated
#REFCOUNT_TIMING timing only
#ATOMIC_TIMING timing only
-USERCOPY_HEAP_SIZE_TO
-USERCOPY_HEAP_SIZE_FROM
-USERCOPY_HEAP_WHITELIST_TO
-USERCOPY_HEAP_WHITELIST_FROM
+USERCOPY_SLAB_SIZE_TO
+USERCOPY_SLAB_SIZE_FROM
+USERCOPY_SLAB_WHITELIST_TO
+USERCOPY_SLAB_WHITELIST_FROM
USERCOPY_STACK_FRAME_TO
USERCOPY_STACK_FRAME_FROM
USERCOPY_STACK_BEYOND
USERCOPY_KERNEL
-USERCOPY_KERNEL_DS
STACKLEAK_ERASING OK: the rest of the thread stack is properly erased
CFI_FORWARD_PROTO
+CFI_BACKWARD call trace:|ok: control flow unchanged
+FORTIFY_STRSCPY detected buffer overflow
+FORTIFY_STR_OBJECT detected buffer overflow
+FORTIFY_STR_MEMBER detected buffer overflow
+FORTIFY_MEM_OBJECT detected buffer overflow
+FORTIFY_MEM_MEMBER detected field-spanning write
+PPC_SLB_MULTIHIT Recovered
diff --git a/tools/testing/selftests/lsm/.gitignore b/tools/testing/selftests/lsm/.gitignore
new file mode 100644
index 000000000000..bd68f6c3fd07
--- /dev/null
+++ b/tools/testing/selftests/lsm/.gitignore
@@ -0,0 +1 @@
+/*_test
diff --git a/tools/testing/selftests/lsm/Makefile b/tools/testing/selftests/lsm/Makefile
new file mode 100644
index 000000000000..3f80c0bc093d
--- /dev/null
+++ b/tools/testing/selftests/lsm/Makefile
@@ -0,0 +1,17 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# First run: make -C ../../../.. headers_install
+
+CFLAGS += -Wall -O2 $(KHDR_INCLUDES)
+LOCAL_HDRS += common.h
+
+TEST_GEN_PROGS := lsm_get_self_attr_test lsm_list_modules_test \
+ lsm_set_self_attr_test
+
+include ../lib.mk
+
+$(OUTPUT)/lsm_get_self_attr_test: lsm_get_self_attr_test.c common.c
+$(OUTPUT)/lsm_set_self_attr_test: lsm_set_self_attr_test.c common.c
+$(OUTPUT)/lsm_list_modules_test: lsm_list_modules_test.c common.c
+
+EXTRA_CLEAN = $(OUTPUT)/common.o
diff --git a/tools/testing/selftests/lsm/common.c b/tools/testing/selftests/lsm/common.c
new file mode 100644
index 000000000000..9ad258912646
--- /dev/null
+++ b/tools/testing/selftests/lsm/common.c
@@ -0,0 +1,89 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ *
+ * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "common.h"
+
+#define PROCATTR "/proc/self/attr/"
+
+int read_proc_attr(const char *attr, char *value, size_t size)
+{
+ int fd;
+ int len;
+ char *path;
+
+ len = strlen(PROCATTR) + strlen(attr) + 1;
+ path = calloc(len, 1);
+ if (path == NULL)
+ return -1;
+ sprintf(path, "%s%s", PROCATTR, attr);
+
+ fd = open(path, O_RDONLY);
+ free(path);
+
+ if (fd < 0)
+ return -1;
+ len = read(fd, value, size);
+
+ close(fd);
+
+ /* Ensure value is terminated */
+ if (len <= 0 || len == size)
+ return -1;
+ value[len] = '\0';
+
+ path = strchr(value, '\n');
+ if (path)
+ *path = '\0';
+
+ return 0;
+}
+
+int read_sysfs_lsms(char *lsms, size_t size)
+{
+ FILE *fp;
+ size_t red;
+
+ fp = fopen("/sys/kernel/security/lsm", "r");
+ if (fp == NULL)
+ return -1;
+ red = fread(lsms, 1, size, fp);
+ fclose(fp);
+
+ if (red <= 0 || red == size)
+ return -1;
+ lsms[red] = '\0';
+ return 0;
+}
+
+int attr_lsm_count(void)
+{
+ char *names = calloc(sysconf(_SC_PAGESIZE), 1);
+ int count = 0;
+
+ if (!names)
+ return 0;
+
+ if (read_sysfs_lsms(names, sysconf(_SC_PAGESIZE)))
+ return 0;
+
+ if (strstr(names, "selinux"))
+ count++;
+ if (strstr(names, "smack"))
+ count++;
+ if (strstr(names, "apparmor"))
+ count++;
+
+ return count;
+}
diff --git a/tools/testing/selftests/lsm/common.h b/tools/testing/selftests/lsm/common.h
new file mode 100644
index 000000000000..06d12110d241
--- /dev/null
+++ b/tools/testing/selftests/lsm/common.h
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Linux Security Module infrastructure tests
+ *
+ * Copyright © 2023 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#ifndef lsm_get_self_attr
+static inline int lsm_get_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ __u32 *size, __u32 flags)
+{
+ return syscall(__NR_lsm_get_self_attr, attr, ctx, size, flags);
+}
+#endif
+
+#ifndef lsm_set_self_attr
+static inline int lsm_set_self_attr(unsigned int attr, struct lsm_ctx *ctx,
+ __u32 size, __u32 flags)
+{
+ return syscall(__NR_lsm_set_self_attr, attr, ctx, size, flags);
+}
+#endif
+
+#ifndef lsm_list_modules
+static inline int lsm_list_modules(__u64 *ids, __u32 *size, __u32 flags)
+{
+ return syscall(__NR_lsm_list_modules, ids, size, flags);
+}
+#endif
+
+extern int read_proc_attr(const char *attr, char *value, size_t size);
+extern int read_sysfs_lsms(char *lsms, size_t size);
+int attr_lsm_count(void);
diff --git a/tools/testing/selftests/lsm/config b/tools/testing/selftests/lsm/config
new file mode 100644
index 000000000000..1c0c4c020f9c
--- /dev/null
+++ b/tools/testing/selftests/lsm/config
@@ -0,0 +1,3 @@
+CONFIG_SYSFS=y
+CONFIG_SECURITY=y
+CONFIG_SECURITYFS=y
diff --git a/tools/testing/selftests/lsm/lsm_get_self_attr_test.c b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
new file mode 100644
index 000000000000..df215e4aa63f
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_get_self_attr_test.c
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_get_self_attr system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <fcntl.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "../kselftest_harness.h"
+#include "common.h"
+
+static struct lsm_ctx *next_ctx(struct lsm_ctx *ctxp)
+{
+ void *vp;
+
+ vp = (void *)ctxp + sizeof(*ctxp) + ctxp->ctx_len;
+ return (struct lsm_ctx *)vp;
+}
+
+TEST(size_null_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, NULL, 0));
+ ASSERT_EQ(EINVAL, errno);
+
+ free(ctx);
+}
+
+TEST(ctx_null_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ int rc;
+
+ rc = lsm_get_self_attr(LSM_ATTR_CURRENT, NULL, &size, 0);
+
+ if (attr_lsm_count()) {
+ ASSERT_NE(-1, rc);
+ ASSERT_NE(1, size);
+ } else {
+ ASSERT_EQ(-1, rc);
+ }
+}
+
+TEST(size_too_small_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = 1;
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0));
+ if (attr_lsm_count()) {
+ ASSERT_EQ(E2BIG, errno);
+ } else {
+ ASSERT_EQ(EOPNOTSUPP, errno);
+ }
+ ASSERT_NE(1, size);
+
+ free(ctx);
+}
+
+TEST(flags_zero_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size;
+ int lsmcount;
+ int i;
+
+ ASSERT_NE(NULL, ctx);
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ LSM_FLAG_SINGLE));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(page_size, size);
+
+ lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0);
+ ASSERT_LE(1, lsmcount);
+ ASSERT_NE(NULL, syscall_lsms);
+
+ for (i = 0; i < lsmcount; i++) {
+ errno = 0;
+ size = page_size;
+ ctx->id = syscall_lsms[i];
+
+ if (syscall_lsms[i] == LSM_ID_SELINUX ||
+ syscall_lsms[i] == LSM_ID_SMACK ||
+ syscall_lsms[i] == LSM_ID_APPARMOR) {
+ ASSERT_EQ(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx,
+ &size, LSM_FLAG_SINGLE));
+ } else {
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx,
+ &size,
+ LSM_FLAG_SINGLE));
+ }
+ }
+
+ free(ctx);
+}
+
+TEST(flags_overset_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size;
+
+ ASSERT_NE(NULL, ctx);
+
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx,
+ &size, 0));
+ ASSERT_EQ(EOPNOTSUPP, errno);
+
+ errno = 0;
+ size = page_size;
+ ASSERT_EQ(-1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ LSM_FLAG_SINGLE |
+ (LSM_FLAG_SINGLE << 1)));
+ ASSERT_EQ(EINVAL, errno);
+
+ free(ctx);
+}
+
+TEST(basic_lsm_get_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ struct lsm_ctx *tctx = NULL;
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ char *attr = calloc(page_size, 1);
+ int cnt_current = 0;
+ int cnt_exec = 0;
+ int cnt_fscreate = 0;
+ int cnt_keycreate = 0;
+ int cnt_prev = 0;
+ int cnt_sockcreate = 0;
+ int lsmcount;
+ int count;
+ int i;
+
+ ASSERT_NE(NULL, ctx);
+ ASSERT_NE(NULL, syscall_lsms);
+
+ lsmcount = syscall(__NR_lsm_list_modules, syscall_lsms, &size, 0);
+ ASSERT_LE(1, lsmcount);
+
+ for (i = 0; i < lsmcount; i++) {
+ switch (syscall_lsms[i]) {
+ case LSM_ID_SELINUX:
+ cnt_current++;
+ cnt_exec++;
+ cnt_fscreate++;
+ cnt_keycreate++;
+ cnt_prev++;
+ cnt_sockcreate++;
+ break;
+ case LSM_ID_SMACK:
+ cnt_current++;
+ break;
+ case LSM_ID_APPARMOR:
+ cnt_current++;
+ cnt_exec++;
+ cnt_prev++;
+ break;
+ default:
+ break;
+ }
+ }
+
+ if (cnt_current) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size, 0);
+ ASSERT_EQ(cnt_current, count);
+ tctx = ctx;
+ ASSERT_EQ(0, read_proc_attr("current", attr, page_size));
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_exec) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_EXEC, ctx, &size, 0);
+ ASSERT_GE(cnt_exec, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("exec", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_fscreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_FSCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_fscreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("fscreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_keycreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_KEYCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_keycreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("keycreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ if (cnt_prev) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_PREV, ctx, &size, 0);
+ ASSERT_GE(cnt_prev, count);
+ if (count > 0) {
+ tctx = ctx;
+ ASSERT_EQ(0, read_proc_attr("prev", attr, page_size));
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+ }
+ if (cnt_sockcreate) {
+ size = page_size;
+ count = lsm_get_self_attr(LSM_ATTR_SOCKCREATE, ctx, &size, 0);
+ ASSERT_GE(cnt_sockcreate, count);
+ if (count > 0) {
+ tctx = ctx;
+ if (read_proc_attr("sockcreate", attr, page_size) == 0)
+ ASSERT_EQ(0, strcmp((char *)tctx->ctx, attr));
+ }
+ for (i = 1; i < count; i++) {
+ tctx = next_ctx(tctx);
+ ASSERT_NE(0, strcmp((char *)tctx->ctx, attr));
+ }
+ }
+
+ free(ctx);
+ free(attr);
+ free(syscall_lsms);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lsm/lsm_list_modules_test.c b/tools/testing/selftests/lsm/lsm_list_modules_test.c
new file mode 100644
index 000000000000..06d24d4679a6
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_list_modules_test.c
@@ -0,0 +1,143 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_list_modules system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "../kselftest_harness.h"
+#include "common.h"
+
+TEST(size_null_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, NULL, 0));
+ ASSERT_EQ(EFAULT, errno);
+
+ free(syscall_lsms);
+}
+
+TEST(ids_null_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(NULL, &size, 0));
+ ASSERT_EQ(EFAULT, errno);
+ ASSERT_NE(1, size);
+}
+
+TEST(size_too_small_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size = 1;
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 0));
+ ASSERT_EQ(E2BIG, errno);
+ ASSERT_NE(1, size);
+
+ free(syscall_lsms);
+}
+
+TEST(flags_set_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, syscall_lsms);
+ errno = 0;
+ ASSERT_EQ(-1, lsm_list_modules(syscall_lsms, &size, 7));
+ ASSERT_EQ(EINVAL, errno);
+ ASSERT_EQ(page_size, size);
+
+ free(syscall_lsms);
+}
+
+TEST(correct_lsm_list_modules)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ __u32 size = page_size;
+ __u64 *syscall_lsms = calloc(page_size, 1);
+ char *sysfs_lsms = calloc(page_size, 1);
+ char *name;
+ char *cp;
+ int count;
+ int i;
+
+ ASSERT_NE(NULL, sysfs_lsms);
+ ASSERT_NE(NULL, syscall_lsms);
+ ASSERT_EQ(0, read_sysfs_lsms(sysfs_lsms, page_size));
+
+ count = lsm_list_modules(syscall_lsms, &size, 0);
+ ASSERT_LE(1, count);
+ cp = sysfs_lsms;
+ for (i = 0; i < count; i++) {
+ switch (syscall_lsms[i]) {
+ case LSM_ID_CAPABILITY:
+ name = "capability";
+ break;
+ case LSM_ID_SELINUX:
+ name = "selinux";
+ break;
+ case LSM_ID_SMACK:
+ name = "smack";
+ break;
+ case LSM_ID_TOMOYO:
+ name = "tomoyo";
+ break;
+ case LSM_ID_APPARMOR:
+ name = "apparmor";
+ break;
+ case LSM_ID_YAMA:
+ name = "yama";
+ break;
+ case LSM_ID_LOADPIN:
+ name = "loadpin";
+ break;
+ case LSM_ID_SAFESETID:
+ name = "safesetid";
+ break;
+ case LSM_ID_LOCKDOWN:
+ name = "lockdown";
+ break;
+ case LSM_ID_BPF:
+ name = "bpf";
+ break;
+ case LSM_ID_LANDLOCK:
+ name = "landlock";
+ break;
+ case LSM_ID_IMA:
+ name = "ima";
+ break;
+ case LSM_ID_EVM:
+ name = "evm";
+ break;
+ default:
+ name = "INVALID";
+ break;
+ }
+ ASSERT_EQ(0, strncmp(cp, name, strlen(name)));
+ cp += strlen(name) + 1;
+ }
+
+ free(sysfs_lsms);
+ free(syscall_lsms);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
new file mode 100644
index 000000000000..66dec47e3ca3
--- /dev/null
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Linux Security Module infrastructure tests
+ * Tests for the lsm_set_self_attr system call
+ *
+ * Copyright © 2022 Casey Schaufler <casey@schaufler-ca.com>
+ */
+
+#define _GNU_SOURCE
+#include <linux/lsm.h>
+#include <string.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include "../kselftest_harness.h"
+#include "common.h"
+
+TEST(ctx_null_lsm_set_self_attr)
+{
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, NULL,
+ sizeof(struct lsm_ctx), 0));
+}
+
+TEST(size_too_small_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, 1, 0));
+
+ free(ctx);
+}
+
+TEST(flags_zero_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT, ctx, size, 1));
+
+ free(ctx);
+}
+
+TEST(flags_overset_lsm_set_self_attr)
+{
+ const long page_size = sysconf(_SC_PAGESIZE);
+ char *ctx = calloc(page_size, 1);
+ __u32 size = page_size;
+ struct lsm_ctx *tctx = (struct lsm_ctx *)ctx;
+
+ ASSERT_NE(NULL, ctx);
+ if (attr_lsm_count()) {
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size,
+ 0));
+ }
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx,
+ size, 0));
+
+ free(ctx);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/media_tests/Makefile b/tools/testing/selftests/media_tests/Makefile
index 60826d7d37d4..471d83e61d95 100644
--- a/tools/testing/selftests/media_tests/Makefile
+++ b/tools/testing/selftests/media_tests/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
#
-CFLAGS += -I../ -I../../../../usr/include/
+CFLAGS += -I../ $(KHDR_INCLUDES)
TEST_GEN_PROGS := media_device_test media_device_open video_device_test
include ../lib.mk
diff --git a/tools/testing/selftests/media_tests/video_device_test.c b/tools/testing/selftests/media_tests/video_device_test.c
index 0f6aef2e2593..2c44e115f2f0 100644
--- a/tools/testing/selftests/media_tests/video_device_test.c
+++ b/tools/testing/selftests/media_tests/video_device_test.c
@@ -37,45 +37,58 @@
#include <time.h>
#include <linux/videodev2.h>
-int main(int argc, char **argv)
+#define PRIORITY_MAX 4
+
+int priority_test(int fd)
{
- int opt;
- char video_dev[256];
- int count;
- struct v4l2_tuner vtuner;
- struct v4l2_capability vcap;
+ /* This test will try to update the priority associated with a file descriptor */
+
+ enum v4l2_priority old_priority, new_priority, priority_to_compare;
int ret;
- int fd;
+ int result = 0;
- if (argc < 2) {
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to get priority: %s\n", strerror(errno));
+ return -1;
+ }
+ new_priority = (old_priority + 1) % PRIORITY_MAX;
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &new_priority);
+ if (ret < 0) {
+ printf("Failed to set priority: %s\n", strerror(errno));
+ return -1;
+ }
+ ret = ioctl(fd, VIDIOC_G_PRIORITY, &priority_to_compare);
+ if (ret < 0) {
+ printf("Failed to get new priority: %s\n", strerror(errno));
+ result = -1;
+ goto cleanup;
+ }
+ if (priority_to_compare != new_priority) {
+ printf("Priority wasn't set - test failed\n");
+ result = -1;
}
- /* Process arguments */
- while ((opt = getopt(argc, argv, "d:")) != -1) {
- switch (opt) {
- case 'd':
- strncpy(video_dev, optarg, sizeof(video_dev) - 1);
- video_dev[sizeof(video_dev)-1] = '\0';
- break;
- default:
- printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
- exit(-1);
- }
+cleanup:
+ ret = ioctl(fd, VIDIOC_S_PRIORITY, &old_priority);
+ if (ret < 0) {
+ printf("Failed to restore priority: %s\n", strerror(errno));
+ return -1;
}
+ return result;
+}
+
+int loop_test(int fd)
+{
+ int count;
+ struct v4l2_tuner vtuner;
+ struct v4l2_capability vcap;
+ int ret;
/* Generate random number of interations */
srand((unsigned int) time(NULL));
count = rand();
- /* Open Video device and keep it open */
- fd = open(video_dev, O_RDWR);
- if (fd == -1) {
- printf("Video Device open errno %s\n", strerror(errno));
- exit(-1);
- }
-
printf("\nNote:\n"
"While test is running, remove the device or unbind\n"
"driver and ensure there are no use after free errors\n"
@@ -98,4 +111,46 @@ int main(int argc, char **argv)
sleep(10);
count--;
}
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ int opt;
+ char video_dev[256];
+ int fd;
+ int test_result;
+
+ if (argc < 2) {
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "d:")) != -1) {
+ switch (opt) {
+ case 'd':
+ strncpy(video_dev, optarg, sizeof(video_dev) - 1);
+ video_dev[sizeof(video_dev)-1] = '\0';
+ break;
+ default:
+ printf("Usage: %s [-d </dev/videoX>]\n", argv[0]);
+ exit(-1);
+ }
+ }
+
+ /* Open Video device and keep it open */
+ fd = open(video_dev, O_RDWR);
+ if (fd == -1) {
+ printf("Video Device open errno %s\n", strerror(errno));
+ exit(-1);
+ }
+
+ test_result = priority_test(fd);
+ if (!test_result)
+ printf("Priority test - PASSED\n");
+ else
+ printf("Priority test - FAILED\n");
+
+ loop_test(fd);
}
diff --git a/tools/testing/selftests/membarrier/Makefile b/tools/testing/selftests/membarrier/Makefile
index 34d1c81a2324..fc840e06ff56 100644
--- a/tools/testing/selftests/membarrier/Makefile
+++ b/tools/testing/selftests/membarrier/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g $(KHDR_INCLUDES)
LDLIBS += -lpthread
TEST_GEN_PROGS := membarrier_test_single_thread \
diff --git a/tools/testing/selftests/membarrier/membarrier_test_impl.h b/tools/testing/selftests/membarrier/membarrier_test_impl.h
index 186be69f0a59..af89855adb7b 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_impl.h
+++ b/tools/testing/selftests/membarrier/membarrier_test_impl.h
@@ -9,11 +9,38 @@
#include "../kselftest.h"
+static int registrations;
+
static int sys_membarrier(int cmd, int flags)
{
return syscall(__NR_membarrier, cmd, flags);
}
+static int test_membarrier_get_registrations(int cmd)
+{
+ int ret, flags = 0;
+ const char *test_name =
+ "sys membarrier MEMBARRIER_CMD_GET_REGISTRATIONS";
+
+ registrations |= cmd;
+
+ ret = sys_membarrier(MEMBARRIER_CMD_GET_REGISTRATIONS, 0);
+ if (ret < 0) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, errno = %d\n",
+ test_name, flags, errno);
+ } else if (ret != registrations) {
+ ksft_exit_fail_msg(
+ "%s test: flags = %d, ret = %d, registrations = %d\n",
+ test_name, flags, ret, registrations);
+ }
+ ksft_test_result_pass(
+ "%s test: flags = %d, ret = %d, registrations = %d\n",
+ test_name, flags, ret, registrations);
+
+ return 0;
+}
+
static int test_membarrier_cmd_fail(void)
{
int cmd = -1, flags = 0;
@@ -113,6 +140,8 @@ static int test_membarrier_register_private_expedited_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
@@ -170,6 +199,8 @@ static int test_membarrier_register_private_expedited_sync_core_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
@@ -204,6 +235,8 @@ static int test_membarrier_register_global_expedited_success(void)
ksft_test_result_pass(
"%s test: flags = %d\n",
test_name, flags);
+
+ test_membarrier_get_registrations(cmd);
return 0;
}
diff --git a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
index ac5613e5b0eb..a9cc17facfb3 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_multi_thread.c
@@ -62,7 +62,7 @@ static int test_mt_membarrier(void)
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(13);
+ ksft_set_plan(16);
test_membarrier_query();
diff --git a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
index c1c963902854..4cdc8b1d124c 100644
--- a/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
+++ b/tools/testing/selftests/membarrier/membarrier_test_single_thread.c
@@ -12,7 +12,9 @@
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(13);
+ ksft_set_plan(18);
+
+ test_membarrier_get_registrations(/*cmd=*/0);
test_membarrier_query();
@@ -20,5 +22,7 @@ int main(int argc, char **argv)
test_membarrier_success();
+ test_membarrier_get_registrations(/*cmd=*/0);
+
return ksft_exit_pass();
}
diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile
index 4da8b565fa32..163b6f68631c 100644
--- a/tools/testing/selftests/memfd/Makefile
+++ b/tools/testing/selftests/memfd/Makefile
@@ -1,8 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -D_FILE_OFFSET_BITS=64
-CFLAGS += -I../../../../include/uapi/
-CFLAGS += -I../../../../include/
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
TEST_GEN_PROGS := memfd_test
TEST_PROGS := run_fuse_test.sh run_hugetlbfs_test.sh
diff --git a/tools/testing/selftests/memfd/fuse_test.c b/tools/testing/selftests/memfd/fuse_test.c
index b018e835737d..93798c8c5d54 100644
--- a/tools/testing/selftests/memfd/fuse_test.c
+++ b/tools/testing/selftests/memfd/fuse_test.c
@@ -20,8 +20,9 @@
#include <inttypes.h>
#include <limits.h>
#include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <linux/memfd.h>
+#include <linux/types.h>
#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
diff --git a/tools/testing/selftests/memfd/memfd_test.c b/tools/testing/selftests/memfd/memfd_test.c
index 334a7eea2004..18f585684e20 100644
--- a/tools/testing/selftests/memfd/memfd_test.c
+++ b/tools/testing/selftests/memfd/memfd_test.c
@@ -6,7 +6,7 @@
#include <inttypes.h>
#include <limits.h>
#include <linux/falloc.h>
-#include <linux/fcntl.h>
+#include <fcntl.h>
#include <linux/memfd.h>
#include <sched.h>
#include <stdio.h>
@@ -18,6 +18,7 @@
#include <sys/syscall.h>
#include <sys/wait.h>
#include <unistd.h>
+#include <ctype.h>
#include "common.h"
@@ -28,12 +29,46 @@
#define MFD_DEF_SIZE 8192
#define STACK_SIZE 65536
+#define F_SEAL_EXEC 0x0020
+
+#define F_WX_SEALS (F_SEAL_SHRINK | \
+ F_SEAL_GROW | \
+ F_SEAL_WRITE | \
+ F_SEAL_FUTURE_WRITE | \
+ F_SEAL_EXEC)
+
+#define MFD_NOEXEC_SEAL 0x0008U
+
/*
* Default is not to test hugetlbfs
*/
static size_t mfd_def_size = MFD_DEF_SIZE;
static const char *memfd_str = MEMFD_STR;
+static ssize_t fd2name(int fd, char *buf, size_t bufsize)
+{
+ char buf1[PATH_MAX];
+ int size;
+ ssize_t nbytes;
+
+ size = snprintf(buf1, PATH_MAX, "/proc/self/fd/%d", fd);
+ if (size < 0) {
+ printf("snprintf(%d) failed on %m\n", fd);
+ abort();
+ }
+
+ /*
+ * reserver one byte for string termination.
+ */
+ nbytes = readlink(buf1, buf, bufsize-1);
+ if (nbytes == -1) {
+ printf("readlink(%s) failed %m\n", buf1);
+ abort();
+ }
+ buf[nbytes] = '\0';
+ return nbytes;
+}
+
static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
{
int r, fd;
@@ -54,9 +89,67 @@ static int mfd_assert_new(const char *name, loff_t sz, unsigned int flags)
return fd;
}
+static void sysctl_assert_write(const char *val)
+{
+ int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (write(fd, val, strlen(val)) < 0) {
+ printf("write sysctl %s failed: %m\n", val);
+ abort();
+ }
+}
+
+static void sysctl_fail_write(const char *val)
+{
+ int fd = open("/proc/sys/vm/memfd_noexec", O_WRONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (write(fd, val, strlen(val)) >= 0) {
+ printf("write sysctl %s succeeded, but failure expected\n",
+ val);
+ abort();
+ }
+}
+
+static void sysctl_assert_equal(const char *val)
+{
+ char *p, buf[128] = {};
+ int fd = open("/proc/sys/vm/memfd_noexec", O_RDONLY | O_CLOEXEC);
+
+ if (fd < 0) {
+ printf("open sysctl failed: %m\n");
+ abort();
+ }
+
+ if (read(fd, buf, sizeof(buf)) < 0) {
+ printf("read sysctl failed: %m\n");
+ abort();
+ }
+
+ /* Strip trailing whitespace. */
+ p = buf;
+ while (!isspace(*p))
+ p++;
+ *p = '\0';
+
+ if (strcmp(buf, val) != 0) {
+ printf("unexpected sysctl value: expected %s, got %s\n", val, buf);
+ abort();
+ }
+}
+
static int mfd_assert_reopen_fd(int fd_in)
{
- int r, fd;
+ int fd;
char path[100];
sprintf(path, "/proc/self/fd/%d", fd_in);
@@ -98,11 +191,13 @@ static unsigned int mfd_assert_get_seals(int fd)
static void mfd_assert_has_seals(int fd, unsigned int seals)
{
+ char buf[PATH_MAX];
unsigned int s;
+ fd2name(fd, buf, PATH_MAX);
s = mfd_assert_get_seals(fd);
if (s != seals) {
- printf("%u != %u = GET_SEALS(%d)\n", seals, s, fd);
+ printf("%u != %u = GET_SEALS(%s)\n", seals, s, buf);
abort();
}
}
@@ -455,6 +550,7 @@ static void mfd_fail_write(int fd)
printf("mmap()+mprotect() didn't fail as expected\n");
abort();
}
+ munmap(p, mfd_def_size);
}
/* verify PUNCH_HOLE fails */
@@ -593,6 +689,61 @@ static void mfd_fail_grow_write(int fd)
}
}
+static void mfd_assert_mode(int fd, int mode)
+{
+ struct stat st;
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fstat(fd, &st) < 0) {
+ printf("fstat(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ if ((st.st_mode & 07777) != mode) {
+ printf("fstat(%s) wrong file mode 0%04o, but expected 0%04o\n",
+ buf, (int)st.st_mode & 07777, mode);
+ abort();
+ }
+}
+
+static void mfd_assert_chmod(int fd, int mode)
+{
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fchmod(fd, mode) < 0) {
+ printf("fchmod(%s, 0%04o) failed: %m\n", buf, mode);
+ abort();
+ }
+
+ mfd_assert_mode(fd, mode);
+}
+
+static void mfd_fail_chmod(int fd, int mode)
+{
+ struct stat st;
+ char buf[PATH_MAX];
+
+ fd2name(fd, buf, PATH_MAX);
+
+ if (fstat(fd, &st) < 0) {
+ printf("fstat(%s) failed: %m\n", buf);
+ abort();
+ }
+
+ if (fchmod(fd, mode) == 0) {
+ printf("fchmod(%s, 0%04o) didn't fail as expected\n",
+ buf, mode);
+ abort();
+ }
+
+ /* verify that file mode bits did not change */
+ mfd_assert_mode(fd, st.st_mode & 07777);
+}
+
static int idle_thread_fn(void *arg)
{
sigset_t set;
@@ -606,7 +757,7 @@ static int idle_thread_fn(void *arg)
return 0;
}
-static pid_t spawn_idle_thread(unsigned int flags)
+static pid_t spawn_thread(unsigned int flags, int (*fn)(void *), void *arg)
{
uint8_t *stack;
pid_t pid;
@@ -617,10 +768,7 @@ static pid_t spawn_idle_thread(unsigned int flags)
abort();
}
- pid = clone(idle_thread_fn,
- stack + STACK_SIZE,
- SIGCHLD | flags,
- NULL);
+ pid = clone(fn, stack + STACK_SIZE, SIGCHLD | flags, arg);
if (pid < 0) {
printf("clone() failed: %m\n");
abort();
@@ -629,6 +777,33 @@ static pid_t spawn_idle_thread(unsigned int flags)
return pid;
}
+static void join_thread(pid_t pid)
+{
+ int wstatus;
+
+ if (waitpid(pid, &wstatus, 0) < 0) {
+ printf("newpid thread: waitpid() failed: %m\n");
+ abort();
+ }
+
+ if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) != 0) {
+ printf("newpid thread: exited with non-zero error code %d\n",
+ WEXITSTATUS(wstatus));
+ abort();
+ }
+
+ if (WIFSIGNALED(wstatus)) {
+ printf("newpid thread: killed by signal %d\n",
+ WTERMSIG(wstatus));
+ abort();
+ }
+}
+
+static pid_t spawn_idle_thread(unsigned int flags)
+{
+ return spawn_thread(flags, idle_thread_fn, NULL);
+}
+
static void join_idle_thread(pid_t pid)
{
kill(pid, SIGTERM);
@@ -670,6 +845,9 @@ static void test_create(void)
mfd_fail_new("", ~0);
mfd_fail_new("", 0x80000000U);
+ /* verify EXEC and NOEXEC_SEAL can't both be set */
+ mfd_fail_new("", MFD_EXEC | MFD_NOEXEC_SEAL);
+
/* verify MFD_CLOEXEC is allowed */
fd = mfd_assert_new("", 0, MFD_CLOEXEC);
close(fd);
@@ -880,6 +1058,357 @@ static void test_seal_resize(void)
}
/*
+ * Test SEAL_EXEC
+ * Test fd is created with exec and allow sealing.
+ * chmod() cannot change x bits after sealing.
+ */
+static void test_exec_seal(void)
+{
+ int fd;
+
+ printf("%s SEAL-EXEC\n", memfd_str);
+
+ printf("%s Apply SEAL_EXEC\n", memfd_str);
+ fd = mfd_assert_new("kern_memfd_seal_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
+
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_chmod(fd, 0644);
+
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_EXEC);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+
+ mfd_assert_chmod(fd, 0600);
+ mfd_fail_chmod(fd, 0777);
+ mfd_fail_chmod(fd, 0670);
+ mfd_fail_chmod(fd, 0605);
+ mfd_fail_chmod(fd, 0700);
+ mfd_fail_chmod(fd, 0100);
+ mfd_assert_chmod(fd, 0666);
+ mfd_assert_write(fd);
+ close(fd);
+
+ printf("%s Apply ALL_SEALS\n", memfd_str);
+ fd = mfd_assert_new("kern_memfd_seal_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_EXEC);
+
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_chmod(fd, 0700);
+
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_add_seals(fd, F_SEAL_EXEC);
+ mfd_assert_has_seals(fd, F_WX_SEALS);
+
+ mfd_fail_chmod(fd, 0711);
+ mfd_fail_chmod(fd, 0600);
+ mfd_fail_write(fd);
+ close(fd);
+}
+
+/*
+ * Test EXEC_NO_SEAL
+ * Test fd is created with exec and not allow sealing.
+ */
+static void test_exec_no_seal(void)
+{
+ int fd;
+
+ printf("%s EXEC_NO_SEAL\n", memfd_str);
+
+ /* Create with EXEC but without ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_exec_no_sealing",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_EXEC);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, F_SEAL_SEAL);
+ mfd_assert_chmod(fd, 0666);
+ close(fd);
+}
+
+/*
+ * Test memfd_create with MFD_NOEXEC flag
+ */
+static void test_noexec_seal(void)
+{
+ int fd;
+
+ printf("%s NOEXEC_SEAL\n", memfd_str);
+
+ /* Create with NOEXEC and ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ /* Create with NOEXEC but without ALLOW_SEALING */
+ fd = mfd_assert_new("kern_memfd_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_sysctl0(void)
+{
+ int fd;
+
+ sysctl_assert_equal("0");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_0_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl0(void)
+{
+ sysctl_assert_write("0");
+ test_sysctl_sysctl0();
+}
+
+static void test_sysctl_sysctl1(void)
+{
+ int fd;
+
+ sysctl_assert_equal("1");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_exec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0777);
+ mfd_assert_has_seals(fd, 0);
+ mfd_assert_chmod(fd, 0644);
+ close(fd);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_1_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl1(void)
+{
+ sysctl_assert_write("1");
+ test_sysctl_sysctl1();
+}
+
+static void test_sysctl_sysctl2(void)
+{
+ int fd;
+
+ sysctl_assert_equal("2");
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_dfl",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+
+ mfd_fail_new("kern_memfd_sysctl_2_exec",
+ MFD_CLOEXEC | MFD_EXEC | MFD_ALLOW_SEALING);
+
+ fd = mfd_assert_new("kern_memfd_sysctl_2_noexec",
+ mfd_def_size,
+ MFD_CLOEXEC | MFD_NOEXEC_SEAL | MFD_ALLOW_SEALING);
+ mfd_assert_mode(fd, 0666);
+ mfd_assert_has_seals(fd, F_SEAL_EXEC);
+ mfd_fail_chmod(fd, 0777);
+ close(fd);
+}
+
+static void test_sysctl_set_sysctl2(void)
+{
+ sysctl_assert_write("2");
+ test_sysctl_sysctl2();
+}
+
+static int sysctl_simple_child(void *arg)
+{
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ printf("%s sysctl 2\n", memfd_str);
+ test_sysctl_set_sysctl2();
+
+ printf("%s sysctl 1\n", memfd_str);
+ test_sysctl_set_sysctl1();
+
+ printf("%s sysctl 0\n", memfd_str);
+ test_sysctl_set_sysctl0();
+
+ return 0;
+}
+
+/*
+ * Test sysctl
+ * A very basic test to make sure the core sysctl semantics work.
+ */
+static void test_sysctl_simple(void)
+{
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+
+ join_thread(pid);
+}
+
+static int sysctl_nested(void *arg)
+{
+ void (*fn)(void) = arg;
+
+ fn();
+ return 0;
+}
+
+static int sysctl_nested_wait(void *arg)
+{
+ /* Wait for a SIGCONT. */
+ kill(getpid(), SIGSTOP);
+ return sysctl_nested(arg);
+}
+
+static void test_sysctl_sysctl1_failset(void)
+{
+ sysctl_fail_write("0");
+ test_sysctl_sysctl1();
+}
+
+static void test_sysctl_sysctl2_failset(void)
+{
+ sysctl_fail_write("1");
+ test_sysctl_sysctl2();
+
+ sysctl_fail_write("0");
+ test_sysctl_sysctl2();
+}
+
+static int sysctl_nested_child(void *arg)
+{
+ int pid;
+
+ printf("%s nested sysctl 0\n", memfd_str);
+ sysctl_assert_write("0");
+ /* A further nested pidns works the same. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_simple_child, NULL);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1\n", memfd_str);
+ sysctl_assert_write("1");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl1);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl1_failset);
+ join_thread(pid);
+ /* Child can lower the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_set_sysctl2);
+ join_thread(pid);
+ /* Child lowering the setting has no effect on our setting. */
+ test_sysctl_sysctl1();
+
+ printf("%s nested sysctl 2\n", memfd_str);
+ sysctl_assert_write("2");
+ /* Child inherits our setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested, test_sysctl_sysctl2);
+ join_thread(pid);
+ /* Child cannot raise the setting. */
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested,
+ test_sysctl_sysctl2_failset);
+ join_thread(pid);
+
+ /* Verify that the rules are actually inherited after fork. */
+ printf("%s nested sysctl 0 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1_failset);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 0 -> 2 after fork\n", memfd_str);
+ sysctl_assert_write("0");
+
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2_failset);
+ sysctl_assert_write("2");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ /*
+ * Verify that the current effective setting is saved on fork, meaning
+ * that the parent lowering the sysctl doesn't affect already-forked
+ * children.
+ */
+ printf("%s nested sysctl 2 -> 1 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("1");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 2 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("2");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl2);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ printf("%s nested sysctl 1 -> 0 after fork\n", memfd_str);
+ sysctl_assert_write("1");
+ pid = spawn_thread(CLONE_NEWPID, sysctl_nested_wait,
+ test_sysctl_sysctl1);
+ sysctl_assert_write("0");
+ kill(pid, SIGCONT);
+ join_thread(pid);
+
+ return 0;
+}
+
+/*
+ * Test sysctl with nested pid namespaces
+ * Make sure that the sysctl nesting semantics work correctly.
+ */
+static void test_sysctl_nested(void)
+{
+ int pid = spawn_thread(CLONE_NEWPID, sysctl_nested_child, NULL);
+
+ join_thread(pid);
+}
+
+/*
* Test sharing via dup()
* Test that seals are shared between dupped FDs and they're all equal.
*/
@@ -1052,6 +1581,9 @@ int main(int argc, char **argv)
test_create();
test_basic();
+ test_exec_seal();
+ test_exec_no_seal();
+ test_noexec_seal();
test_seal_write();
test_seal_future_write();
@@ -1059,6 +1591,9 @@ int main(int argc, char **argv)
test_seal_grow();
test_seal_resize();
+ test_sysctl_simple();
+ test_sysctl_nested();
+
test_share_dup("SHARE-DUP", "");
test_share_mmap("SHARE-MMAP", "");
test_share_open("SHARE-OPEN", "");
diff --git a/tools/testing/selftests/memory-hotplug/config b/tools/testing/selftests/memory-hotplug/config
index a7e8cd5bb265..1eef042a31e1 100644
--- a/tools/testing/selftests/memory-hotplug/config
+++ b/tools/testing/selftests/memory-hotplug/config
@@ -1,5 +1,4 @@
CONFIG_MEMORY_HOTPLUG=y
-CONFIG_MEMORY_HOTPLUG_SPARSE=y
CONFIG_NOTIFIER_ERROR_INJECTION=y
CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
CONFIG_MEMORY_HOTREMOVE=y
diff --git a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
index b37585e6aa38..611be86eaf3d 100755
--- a/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
+++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh
@@ -134,6 +134,15 @@ offline_memory_expect_fail()
return 0
}
+online_all_offline_memory()
+{
+ for memory in `hotpluggable_offline_memory`; do
+ if ! online_memory_expect_success $memory; then
+ retval=1
+ fi
+ done
+}
+
error=-12
priority=0
# Run with default of ratio=2 for Kselftest run
@@ -197,8 +206,11 @@ echo -e "\t trying to offline $target out of $hotpluggable_num memory block(s):"
for memory in `hotpluggable_online_memory`; do
if [ "$target" -gt 0 ]; then
echo "online->offline memory$memory"
- if offline_memory_expect_success $memory; then
+ if offline_memory_expect_success $memory &>/dev/null; then
target=$(($target - 1))
+ echo "-> Success"
+ else
+ echo "-> Failure"
fi
fi
done
@@ -257,7 +269,7 @@ prerequisite_extra
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
if [ $((RANDOM % 100)) -lt $ratio ]; then
- offline_memory_expect_success $memory
+ offline_memory_expect_success $memory &>/dev/null
fi
done
@@ -266,26 +278,35 @@ done
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_fail $memory
+ if ! online_memory_expect_fail $memory; then
+ retval=1
+ fi
done
#
# Online all hot-pluggable memory
#
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_ONLINE/error
-for memory in `hotpluggable_offline_memory`; do
- online_memory_expect_success $memory
-done
+online_all_offline_memory
#
# Test memory hot-remove error handling (online => offline)
#
echo $error > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
for memory in `hotpluggable_online_memory`; do
- offline_memory_expect_fail $memory
+ if [ $((RANDOM % 100)) -lt $ratio ]; then
+ if ! offline_memory_expect_fail $memory; then
+ retval=1
+ fi
+ fi
done
echo 0 > $NOTIFIER_ERR_INJECT_DIR/actions/MEM_GOING_OFFLINE/error
/sbin/modprobe -q -r memory-notifier-error-inject
+#
+# Restore memory before exit
+#
+online_all_offline_memory
+
exit $retval
diff --git a/tools/testing/selftests/mincore/.gitignore b/tools/testing/selftests/mincore/.gitignore
new file mode 100644
index 000000000000..15c4dfc2df00
--- /dev/null
+++ b/tools/testing/selftests/mincore/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+mincore_selftest
diff --git a/tools/testing/selftests/mincore/Makefile b/tools/testing/selftests/mincore/Makefile
new file mode 100644
index 000000000000..38c7db1e8926
--- /dev/null
+++ b/tools/testing/selftests/mincore/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+CFLAGS += -Wall
+
+TEST_GEN_PROGS := mincore_selftest
+include ../lib.mk
diff --git a/tools/testing/selftests/mincore/mincore_selftest.c b/tools/testing/selftests/mincore/mincore_selftest.c
new file mode 100644
index 000000000000..e949a43a6145
--- /dev/null
+++ b/tools/testing/selftests/mincore/mincore_selftest.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * kselftest suite for mincore().
+ *
+ * Copyright (C) 2020 Collabora, Ltd.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+#include <fcntl.h>
+
+#include "../kselftest.h"
+#include "../kselftest_harness.h"
+
+/* Default test file size: 4MB */
+#define MB (1UL << 20)
+#define FILE_SIZE (4 * MB)
+
+
+/*
+ * Tests the user interface. This test triggers most of the documented
+ * error conditions in mincore().
+ */
+TEST(basic_interface)
+{
+ int retval;
+ int page_size;
+ unsigned char vec[1];
+ char *addr;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Query a 0 byte sized range */
+ retval = mincore(0, 0, vec);
+ EXPECT_EQ(0, retval);
+
+ /* Addresses in the specified range are invalid or unmapped */
+ errno = 0;
+ retval = mincore(NULL, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+
+ /* <addr> argument is not page-aligned */
+ errno = 0;
+ retval = mincore(addr + 1, page_size, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* <length> argument is too large */
+ errno = 0;
+ retval = mincore(addr, -1, vec);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(ENOMEM, errno);
+
+ /* <vec> argument points to an illegal address */
+ errno = 0;
+ retval = mincore(addr, page_size, NULL);
+ EXPECT_EQ(-1, retval);
+ EXPECT_EQ(EFAULT, errno);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a private anonymous page mapping.
+ * Check that the page is not loaded into memory right after the mapping
+ * but after accessing it (on-demand allocation).
+ * Then free the page and check that it's not memory-resident.
+ */
+TEST(check_anonymous_locked_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Map one page and check it's not memory-resident */
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ /* Touch the page and check again. It should now be in memory */
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ /*
+ * It shouldn't be memory-resident after unlocking it and
+ * marking it as unneeded.
+ */
+ munlock(addr, page_size);
+ madvise(addr, page_size, MADV_DONTNEED);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page in memory after being zapped");
+ }
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Check mincore() behavior on huge pages.
+ * This test will be skipped if the mapping fails (ie. if there are no
+ * huge pages available).
+ *
+ * Make sure the system has at least one free huge page, check
+ * "HugePages_Free" in /proc/meminfo.
+ * Increment /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages if
+ * needed.
+ */
+TEST(check_huge_pages)
+{
+ unsigned char vec[1];
+ char *addr;
+ int retval;
+ int page_size;
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ errno = 0;
+ addr = mmap(NULL, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ if (errno == ENOMEM || errno == EINVAL)
+ SKIP(return, "No huge pages available or CONFIG_HUGETLB_PAGE disabled.");
+ else
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(0, vec[0]) {
+ TH_LOG("Page found in memory before use");
+ }
+
+ addr[0] = 1;
+ mlock(addr, page_size);
+ retval = mincore(addr, page_size, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[0]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ munlock(addr, page_size);
+ munmap(addr, page_size);
+}
+
+
+/*
+ * Test mincore() behavior on a file-backed page.
+ * No pages should be loaded into memory right after the mapping. Then,
+ * accessing any address in the mapping range should load the page
+ * containing the address and a number of subsequent pages (readahead).
+ *
+ * The actual readahead settings depend on the test environment, so we
+ * can't make a lot of assumptions about that. This test covers the most
+ * general cases.
+ */
+TEST(check_file_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open(".", O_TMPFILE | O_RDWR, 0600);
+ if (fd < 0) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_free, "O_TMPFILE not supported by filesystem.");
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ if (retval) {
+ ASSERT_EQ(errno, EOPNOTSUPP) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+ SKIP(goto out_close, "fallocate not supported by filesystem.");
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect the next
+ * few pages (the readahead window) to be populated too.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ EXPECT_GT(ra_pages, 0) {
+ TH_LOG("No read-ahead pages found in memory");
+ }
+
+ EXPECT_LT(i, vec_size) {
+ TH_LOG("Read-ahead pages reached the end of the file");
+ }
+ /*
+ * End of the readahead window. The rest of the pages shouldn't
+ * be in memory.
+ */
+ if (i < vec_size) {
+ while (i < vec_size && !vec[i])
+ i++;
+ EXPECT_EQ(vec_size, i) {
+ TH_LOG("Unexpected page in memory beyond readahead window");
+ }
+ }
+
+ munmap(addr, FILE_SIZE);
+out_close:
+ close(fd);
+out_free:
+ free(vec);
+}
+
+
+/*
+ * Test mincore() behavior on a page backed by a tmpfs file. This test
+ * performs the same steps as the previous one. However, we don't expect
+ * any readahead in this case.
+ */
+TEST(check_tmpfs_mmap)
+{
+ unsigned char *vec;
+ int vec_size;
+ char *addr;
+ int retval;
+ int page_size;
+ int fd;
+ int i;
+ int ra_pages = 0;
+
+ page_size = sysconf(_SC_PAGESIZE);
+ vec_size = FILE_SIZE / page_size;
+ if (FILE_SIZE % page_size)
+ vec_size++;
+
+ vec = calloc(vec_size, sizeof(unsigned char));
+ ASSERT_NE(NULL, vec) {
+ TH_LOG("Can't allocate array");
+ }
+
+ errno = 0;
+ fd = open("/dev/shm", O_TMPFILE | O_RDWR, 0600);
+ ASSERT_NE(-1, fd) {
+ TH_LOG("Can't create temporary file: %s",
+ strerror(errno));
+ }
+ errno = 0;
+ retval = fallocate(fd, 0, 0, FILE_SIZE);
+ ASSERT_EQ(0, retval) {
+ TH_LOG("Error allocating space for the temporary file: %s",
+ strerror(errno));
+ }
+
+ /*
+ * Map the whole file, the pages shouldn't be fetched yet.
+ */
+ errno = 0;
+ addr = mmap(NULL, FILE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, addr) {
+ TH_LOG("mmap error: %s", strerror(errno));
+ }
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ for (i = 0; i < vec_size; i++) {
+ ASSERT_EQ(0, vec[i]) {
+ TH_LOG("Unexpected page in memory");
+ }
+ }
+
+ /*
+ * Touch a page in the middle of the mapping. We expect only
+ * that page to be fetched into memory.
+ */
+ addr[FILE_SIZE / 2] = 1;
+ retval = mincore(addr, FILE_SIZE, vec);
+ ASSERT_EQ(0, retval);
+ ASSERT_EQ(1, vec[FILE_SIZE / 2 / page_size]) {
+ TH_LOG("Page not found in memory after use");
+ }
+
+ i = FILE_SIZE / 2 / page_size + 1;
+ while (i < vec_size && vec[i]) {
+ ra_pages++;
+ i++;
+ }
+ ASSERT_EQ(ra_pages, 0) {
+ TH_LOG("Read-ahead pages found in memory");
+ }
+
+ munmap(addr, FILE_SIZE);
+ close(fd);
+ free(vec);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/.gitignore b/tools/testing/selftests/mm/.gitignore
new file mode 100644
index 000000000000..d26e962f2ac4
--- /dev/null
+++ b/tools/testing/selftests/mm/.gitignore
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: GPL-2.0-only
+cow
+hugepage-mmap
+hugepage-mremap
+hugepage-shm
+hugepage-vmemmap
+hugetlb-madvise
+hugetlb-read-hwpoison
+khugepaged
+map_hugetlb
+map_populate
+thuge-gen
+compaction_test
+migration
+mlock2-tests
+mrelease_test
+mremap_dontunmap
+mremap_test
+on-fault-limit
+transhuge-stress
+pagemap_ioctl
+*.tmp*
+protection_keys
+protection_keys_32
+protection_keys_64
+madv_populate
+uffd-stress
+uffd-unit-tests
+mlock-intersect-test
+mlock-random-test
+virtual_address_range
+gup_test
+va_128TBswitch
+map_fixed_noreplace
+write_to_hugetlbfs
+hmm-tests
+memfd_secret
+soft-dirty
+split_huge_page_test
+ksm_tests
+local_config.h
+local_config.mk
+ksm_functional_tests
+mdwe_test
+gup_longterm
+mkdirty
+va_high_addr_switch
+hugetlb_fault_after_madv
+hugetlb_madv_vs_map
diff --git a/tools/testing/selftests/mm/Makefile b/tools/testing/selftests/mm/Makefile
new file mode 100644
index 000000000000..eb5f39a2668b
--- /dev/null
+++ b/tools/testing/selftests/mm/Makefile
@@ -0,0 +1,204 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mm selftests
+
+LOCAL_HDRS += $(selfdir)/mm/local_config.h $(top_srcdir)/mm/gup_test.h
+
+include local_config.mk
+
+ifeq ($(ARCH),)
+
+ifeq ($(CROSS_COMPILE),)
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+else
+uname_M := $(shell echo $(CROSS_COMPILE) | grep -o '^[a-z0-9]\+')
+endif
+ARCH ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/' -e 's/ppc64.*/ppc64/')
+endif
+
+# Without this, failed build products remain, with up-to-date timestamps,
+# thus tricking Make (and you!) into believing that All Is Well, in subsequent
+# make invocations:
+.DELETE_ON_ERROR:
+
+# Avoid accidental wrong builds, due to built-in rules working just a little
+# bit too well--but not quite as well as required for our situation here.
+#
+# In other words, "make $SOME_TEST" is supposed to fail to build at all,
+# because this Makefile only supports either "make" (all), or "make /full/path".
+# However, the built-in rules, if not suppressed, will pick up CFLAGS and the
+# initial LDLIBS (but not the target-specific LDLIBS, because those are only
+# set for the full path target!). This causes it to get pretty far into building
+# things despite using incorrect values such as an *occasionally* incomplete
+# LDLIBS.
+MAKEFLAGS += --no-builtin-rules
+
+CFLAGS = -Wall -I $(top_srcdir) $(EXTRA_CFLAGS) $(KHDR_INCLUDES)
+LDLIBS = -lrt -lpthread -lm
+
+TEST_GEN_FILES = cow
+TEST_GEN_FILES += compaction_test
+TEST_GEN_FILES += gup_longterm
+TEST_GEN_FILES += gup_test
+TEST_GEN_FILES += hmm-tests
+TEST_GEN_FILES += hugetlb-madvise
+TEST_GEN_FILES += hugetlb-read-hwpoison
+TEST_GEN_FILES += hugepage-mmap
+TEST_GEN_FILES += hugepage-mremap
+TEST_GEN_FILES += hugepage-shm
+TEST_GEN_FILES += hugepage-vmemmap
+TEST_GEN_FILES += khugepaged
+TEST_GEN_FILES += madv_populate
+TEST_GEN_FILES += map_fixed_noreplace
+TEST_GEN_FILES += map_hugetlb
+TEST_GEN_FILES += map_populate
+TEST_GEN_FILES += memfd_secret
+TEST_GEN_FILES += migration
+TEST_GEN_FILES += mkdirty
+TEST_GEN_FILES += mlock-random-test
+TEST_GEN_FILES += mlock2-tests
+TEST_GEN_FILES += mrelease_test
+TEST_GEN_FILES += mremap_dontunmap
+TEST_GEN_FILES += mremap_test
+TEST_GEN_FILES += on-fault-limit
+TEST_GEN_FILES += pagemap_ioctl
+TEST_GEN_FILES += thuge-gen
+TEST_GEN_FILES += transhuge-stress
+TEST_GEN_FILES += uffd-stress
+TEST_GEN_FILES += uffd-unit-tests
+TEST_GEN_FILES += split_huge_page_test
+TEST_GEN_FILES += ksm_tests
+TEST_GEN_FILES += ksm_functional_tests
+TEST_GEN_FILES += mdwe_test
+TEST_GEN_FILES += hugetlb_fault_after_madv
+TEST_GEN_FILES += hugetlb_madv_vs_map
+
+ifneq ($(ARCH),arm64)
+TEST_GEN_FILES += soft-dirty
+endif
+
+ifeq ($(ARCH),x86_64)
+CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh "$(CC)" ../x86/trivial_program.c -no-pie)
+
+VMTARGETS := protection_keys
+BINARIES_32 := $(VMTARGETS:%=%_32)
+BINARIES_64 := $(VMTARGETS:%=%_64)
+
+ifeq ($(CAN_BUILD_WITH_NOPIE),1)
+CFLAGS += -no-pie
+endif
+
+ifeq ($(CAN_BUILD_I386),1)
+TEST_GEN_FILES += $(BINARIES_32)
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+TEST_GEN_FILES += $(BINARIES_64)
+endif
+else
+
+ifneq (,$(findstring $(ARCH),ppc64))
+TEST_GEN_FILES += protection_keys
+endif
+
+endif
+
+ifneq (,$(filter $(ARCH),arm64 ia64 mips64 parisc64 ppc64 riscv64 s390x sparc64 x86_64))
+TEST_GEN_FILES += va_high_addr_switch
+TEST_GEN_FILES += virtual_address_range
+TEST_GEN_FILES += write_to_hugetlbfs
+endif
+
+TEST_PROGS := run_vmtests.sh
+
+TEST_FILES := test_vmalloc.sh
+TEST_FILES += test_hmm.sh
+TEST_FILES += va_high_addr_switch.sh
+TEST_FILES += charge_reserved_hugetlb.sh
+TEST_FILES += hugetlb_reparenting_test.sh
+
+# required by charge_reserved_hugetlb.sh
+TEST_FILES += write_hugetlb_memory.sh
+
+include ../lib.mk
+
+$(TEST_GEN_PROGS): vm_util.c thp_settings.c
+$(TEST_GEN_FILES): vm_util.c thp_settings.c
+
+$(OUTPUT)/uffd-stress: uffd-common.c
+$(OUTPUT)/uffd-unit-tests: uffd-common.c
+
+ifeq ($(ARCH),x86_64)
+BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
+BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
+
+define gen-target-rule-32
+$(1) $(1)_32: $(OUTPUT)/$(1)_32
+.PHONY: $(1) $(1)_32
+endef
+
+define gen-target-rule-64
+$(1) $(1)_64: $(OUTPUT)/$(1)_64
+.PHONY: $(1) $(1)_64
+endef
+
+ifeq ($(CAN_BUILD_I386),1)
+$(BINARIES_32): CFLAGS += -m32 -mxsave
+$(BINARIES_32): LDLIBS += -lrt -ldl -lm
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-32,$(t))))
+endif
+
+ifeq ($(CAN_BUILD_X86_64),1)
+$(BINARIES_64): CFLAGS += -m64 -mxsave
+$(BINARIES_64): LDLIBS += -lrt -ldl
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
+ $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
+$(foreach t,$(VMTARGETS),$(eval $(call gen-target-rule-64,$(t))))
+endif
+
+# x86_64 users should be encouraged to install 32-bit libraries
+ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
+all: warn_32bit_failure
+
+warn_32bit_failure:
+ @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
+ echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
+ echo "kernels. If you are using a Debian-like distribution," 2>&1; \
+ echo "try:"; 2>&1; \
+ echo ""; \
+ echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
+ echo ""; \
+ echo "If you are using a Fedora-like distribution, try:"; \
+ echo ""; \
+ echo " yum install glibc-devel.*i686"; \
+ exit 0;
+endif
+endif
+
+# IOURING_EXTRA_LIBS may get set in local_config.mk, or it may be left empty.
+$(OUTPUT)/cow: LDLIBS += $(IOURING_EXTRA_LIBS)
+
+$(OUTPUT)/gup_longterm: LDLIBS += $(IOURING_EXTRA_LIBS)
+
+$(OUTPUT)/mlock-random-test $(OUTPUT)/memfd_secret: LDLIBS += -lcap
+
+$(OUTPUT)/ksm_tests: LDLIBS += -lnuma
+
+$(OUTPUT)/migration: LDLIBS += -lnuma
+
+local_config.mk local_config.h: check_config.sh
+ /bin/sh ./check_config.sh $(CC)
+
+EXTRA_CLEAN += local_config.mk local_config.h
+
+ifeq ($(IOURING_EXTRA_LIBS),)
+all: warn_missing_liburing
+
+warn_missing_liburing:
+ @echo ; \
+ echo "Warning: missing liburing support. Some tests will be skipped." ; \
+ echo
+endif
diff --git a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
index 18d33684faad..d680c00d2853 100644..100755
--- a/tools/testing/selftests/vm/charge_reserved_hugetlb.sh
+++ b/tools/testing/selftests/mm/charge_reserved_hugetlb.sh
@@ -1,13 +1,18 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+
fault_limit_file=limit_in_bytes
reservation_limit_file=rsvd.limit_in_bytes
fault_usage_file=usage_in_bytes
@@ -21,19 +26,23 @@ if [[ "$1" == "-cgroup-v2" ]]; then
reservation_usage_file=rsvd.current
fi
-cgroup_path=/dev/cgroup/memory
-if [[ ! -e $cgroup_path ]]; then
- mkdir -p $cgroup_path
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ cgroup_path=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup2 none $cgroup_path
- else
+ do_umount=1
+ fi
+ echo "+hugetlb" >$cgroup_path/cgroup.subtree_control
+else
+ cgroup_path=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$cgroup_path" ]]; then
+ cgroup_path=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $cgroup_path
+ do_umount=1
fi
fi
-
-if [[ $cgroup2 ]]; then
- echo "+hugetlb" >/dev/cgroup/memory/cgroup.subtree_control
-fi
+export cgroup_path
function cleanup() {
if [[ $cgroup2 ]]; then
@@ -105,7 +114,7 @@ function setup_cgroup() {
function wait_for_hugetlb_memory_to_get_depleted() {
local cgroup="$1"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get depleted.
while [ $(cat $path) != 0 ]; do
echo Waiting for hugetlb memory to get depleted.
@@ -118,7 +127,7 @@ function wait_for_hugetlb_memory_to_get_reserved() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$reservation_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory reservation to reach size $size.
@@ -131,7 +140,7 @@ function wait_for_hugetlb_memory_to_get_written() {
local cgroup="$1"
local size="$2"
- local path="/dev/cgroup/memory/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
+ local path="$cgroup_path/$cgroup/hugetlb.${MB}MB.$fault_usage_file"
# Wait for hugetlbfs memory to get written.
while [ $(cat $path) != $size ]; do
echo Waiting for hugetlb memory to reach size $size.
@@ -571,5 +580,9 @@ for populate in "" "-o"; do
done # populate
done # method
-umount $cgroup_path
-rmdir $cgroup_path
+if [[ $do_umount ]]; then
+ umount $cgroup_path
+ rmdir $cgroup_path
+fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/mm/check_config.sh b/tools/testing/selftests/mm/check_config.sh
new file mode 100755
index 000000000000..3954f4746161
--- /dev/null
+++ b/tools/testing/selftests/mm/check_config.sh
@@ -0,0 +1,31 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Probe for libraries and create header files to record the results. Both C
+# header files and Makefile include fragments are created.
+
+OUTPUT_H_FILE=local_config.h
+OUTPUT_MKFILE=local_config.mk
+
+tmpname=$(mktemp)
+tmpfile_c=${tmpname}.c
+tmpfile_o=${tmpname}.o
+
+# liburing
+echo "#include <sys/types.h>" > $tmpfile_c
+echo "#include <liburing.h>" >> $tmpfile_c
+echo "int func(void) { return 0; }" >> $tmpfile_c
+
+CC=${1:?"Usage: $0 <compiler> # example compiler: gcc"}
+$CC -c $tmpfile_c -o $tmpfile_o >/dev/null 2>&1
+
+if [ -f $tmpfile_o ]; then
+ echo "#define LOCAL_CONFIG_HAVE_LIBURING 1" > $OUTPUT_H_FILE
+ echo "IOURING_EXTRA_LIBS = -luring" > $OUTPUT_MKFILE
+else
+ echo "// No liburing support found" > $OUTPUT_H_FILE
+ echo "# No liburing support found, so:" > $OUTPUT_MKFILE
+ echo "IOURING_EXTRA_LIBS = " >> $OUTPUT_MKFILE
+fi
+
+rm ${tmpname}.*
diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/mm/compaction_test.c
index bcec71250873..533999b6c284 100644
--- a/tools/testing/selftests/vm/compaction_test.c
+++ b/tools/testing/selftests/mm/compaction_test.c
@@ -18,7 +18,8 @@
#include "../kselftest.h"
-#define MAP_SIZE 1048576
+#define MAP_SIZE_MB 100
+#define MAP_SIZE (MAP_SIZE_MB * 1024 * 1024)
struct map_list {
void *map;
@@ -32,7 +33,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
FILE *cmdfile = popen(cmd, "r");
if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
- perror("Failed to read meminfo\n");
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
return -1;
}
@@ -43,7 +44,7 @@ int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize)
cmdfile = popen(cmd, "r");
if (!(fgets(buffer, sizeof(buffer), cmdfile))) {
- perror("Failed to read meminfo\n");
+ ksft_print_msg("Failed to read meminfo: %s\n", strerror(errno));
return -1;
}
@@ -61,14 +62,14 @@ int prereq(void)
fd = open("/proc/sys/vm/compact_unevictable_allowed",
O_RDONLY | O_NONBLOCK);
if (fd < 0) {
- perror("Failed to open\n"
- "/proc/sys/vm/compact_unevictable_allowed\n");
+ ksft_print_msg("Failed to open /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
return -1;
}
if (read(fd, &allowed, sizeof(char)) != sizeof(char)) {
- perror("Failed to read from\n"
- "/proc/sys/vm/compact_unevictable_allowed\n");
+ ksft_print_msg("Failed to read from /proc/sys/vm/compact_unevictable_allowed: %s\n",
+ strerror(errno));
close(fd);
return -1;
}
@@ -77,12 +78,13 @@ int prereq(void)
if (allowed == '1')
return 0;
+ ksft_print_msg("Compaction isn't allowed\n");
return -1;
}
int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
{
- int fd;
+ int fd, ret = -1;
int compaction_index = 0;
char initial_nr_hugepages[10] = {0};
char nr_hugepages[10] = {0};
@@ -93,18 +95,22 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK);
if (fd < 0) {
- perror("Failed to open /proc/sys/vm/nr_hugepages");
- return -1;
+ ksft_print_msg("Failed to open /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
+ ret = -1;
+ goto out;
}
if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) {
- perror("Failed to read from /proc/sys/vm/nr_hugepages");
+ ksft_print_msg("Failed to read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
/* Start with the initial condition of 0 huge pages*/
if (write(fd, "0", sizeof(char)) != sizeof(char)) {
- perror("Failed to write 0 to /proc/sys/vm/nr_hugepages\n");
+ ksft_print_msg("Failed to write 0 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
@@ -113,14 +119,16 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
/* Request a large number of huge pages. The Kernel will allocate
as much as it can */
if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) {
- perror("Failed to write 100000 to /proc/sys/vm/nr_hugepages\n");
+ ksft_print_msg("Failed to write 100000 to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
lseek(fd, 0, SEEK_SET);
if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) {
- perror("Failed to re-read from /proc/sys/vm/nr_hugepages\n");
+ ksft_print_msg("Failed to re-read from /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
@@ -128,71 +136,64 @@ int check_compaction(unsigned long mem_free, unsigned int hugepage_size)
huge pages */
compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size);
- if (compaction_index > 3) {
- printf("No of huge pages allocated = %d\n",
- (atoi(nr_hugepages)));
- fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n"
- "as huge pages\n", compaction_index);
- goto close_fd;
- }
-
- printf("No of huge pages allocated = %d\n",
- (atoi(nr_hugepages)));
-
lseek(fd, 0, SEEK_SET);
if (write(fd, initial_nr_hugepages, strlen(initial_nr_hugepages))
!= strlen(initial_nr_hugepages)) {
- perror("Failed to write value to /proc/sys/vm/nr_hugepages\n");
+ ksft_print_msg("Failed to write value to /proc/sys/vm/nr_hugepages: %s\n",
+ strerror(errno));
goto close_fd;
}
- close(fd);
- return 0;
+ ksft_print_msg("Number of huge pages allocated = %d\n",
+ atoi(nr_hugepages));
+
+ if (compaction_index > 3) {
+ ksft_print_msg("ERROR: Less that 1/%d of memory is available\n"
+ "as huge pages\n", compaction_index);
+ goto close_fd;
+ }
+
+ ret = 0;
close_fd:
close(fd);
- printf("Not OK. Compaction test failed.");
- return -1;
+ out:
+ ksft_test_result(ret == 0, "check_compaction\n");
+ return ret;
}
int main(int argc, char **argv)
{
struct rlimit lim;
- struct map_list *list, *entry;
+ struct map_list *list = NULL, *entry;
size_t page_size, i;
void *map = NULL;
unsigned long mem_free = 0;
unsigned long hugepage_size = 0;
- unsigned long mem_fragmentable = 0;
+ long mem_fragmentable_MB = 0;
- if (prereq() != 0) {
- printf("Either the sysctl compact_unevictable_allowed is not\n"
- "set to 1 or couldn't read the proc file.\n"
- "Skipping the test\n");
- return KSFT_SKIP;
- }
+ ksft_print_header();
+
+ if (prereq() || geteuid())
+ return ksft_exit_skip("Prerequisites unsatisfied\n");
+
+ ksft_set_plan(1);
lim.rlim_cur = RLIM_INFINITY;
lim.rlim_max = RLIM_INFINITY;
- if (setrlimit(RLIMIT_MEMLOCK, &lim)) {
- perror("Failed to set rlimit:\n");
- return -1;
- }
+ if (setrlimit(RLIMIT_MEMLOCK, &lim))
+ ksft_exit_fail_msg("Failed to set rlimit: %s\n", strerror(errno));
page_size = getpagesize();
- list = NULL;
-
- if (read_memory_info(&mem_free, &hugepage_size) != 0) {
- printf("ERROR: Cannot read meminfo\n");
- return -1;
- }
+ if (read_memory_info(&mem_free, &hugepage_size) != 0)
+ ksft_exit_fail_msg("Failed to get meminfo\n");
- mem_fragmentable = mem_free * 0.8 / 1024;
+ mem_fragmentable_MB = mem_free * 0.8 / 1024;
- while (mem_fragmentable > 0) {
+ while (mem_fragmentable_MB > 0) {
map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0);
if (map == MAP_FAILED)
@@ -213,7 +214,7 @@ int main(int argc, char **argv)
for (i = 0; i < MAP_SIZE; i += page_size)
*(unsigned long *)(map + i) = (unsigned long)map + i;
- mem_fragmentable--;
+ mem_fragmentable_MB -= MAP_SIZE_MB;
}
for (entry = list; entry != NULL; entry = entry->next) {
@@ -224,7 +225,7 @@ int main(int argc, char **argv)
}
if (check_compaction(mem_free, hugepage_size) == 0)
- return 0;
+ return ksft_exit_pass();
- return -1;
+ return ksft_exit_fail();
}
diff --git a/tools/testing/selftests/vm/config b/tools/testing/selftests/mm/config
index 3ba674b64fa9..4309916f629e 100644
--- a/tools/testing/selftests/vm/config
+++ b/tools/testing/selftests/mm/config
@@ -1,5 +1,9 @@
CONFIG_SYSVIPC=y
CONFIG_USERFAULTFD=y
+CONFIG_PTE_MARKER_UFFD_WP=y
CONFIG_TEST_VMALLOC=m
CONFIG_DEVICE_PRIVATE=y
CONFIG_TEST_HMM=m
+CONFIG_GUP_TEST=y
+CONFIG_TRANSPARENT_HUGEPAGE=y
+CONFIG_MEM_SOFT_DIRTY=y
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
new file mode 100644
index 000000000000..363bf5f801be
--- /dev/null
+++ b/tools/testing/selftests/mm/cow.c
@@ -0,0 +1,1783 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * COW (Copy On Write) tests.
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+#include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
+#include "vm_util.h"
+#include "thp_settings.h"
+
+static size_t pagesize;
+static int pagemap_fd;
+static size_t pmdsize;
+static int nr_thpsizes;
+static size_t thpsizes[20];
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+static bool has_huge_zeropage;
+
+static int sz2ord(size_t size)
+{
+ return __builtin_ctzll(size / pagesize);
+}
+
+static int detect_thp_sizes(size_t sizes[], int max)
+{
+ int count = 0;
+ unsigned long orders;
+ size_t kb;
+ int i;
+
+ /* thp not supported at all. */
+ if (!pmdsize)
+ return 0;
+
+ orders = 1UL << sz2ord(pmdsize);
+ orders |= thp_supported_orders();
+
+ for (i = 0; orders && count < max; i++) {
+ if (!(orders & (1UL << i)))
+ continue;
+ orders &= ~(1UL << i);
+ kb = (pagesize >> 10) << i;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n", kb);
+ }
+
+ return count;
+}
+
+static void detect_huge_zeropage(void)
+{
+ int fd = open("/sys/kernel/mm/transparent_hugepage/use_zero_page",
+ O_RDONLY);
+ size_t enabled = 0;
+ char buf[15];
+ int ret;
+
+ if (fd < 0)
+ return;
+
+ ret = pread(fd, buf, sizeof(buf), 0);
+ if (ret > 0 && ret < sizeof(buf)) {
+ buf[ret] = 0;
+
+ enabled = strtoul(buf, NULL, 10);
+ if (enabled == 1) {
+ has_huge_zeropage = true;
+ ksft_print_msg("[INFO] huge zeropage is enabled\n");
+ }
+ }
+
+ close(fd);
+}
+
+static bool range_is_swapped(void *addr, size_t size)
+{
+ for (; size; addr += pagesize, size -= pagesize)
+ if (!pagemap_is_swapped(pagemap_fd, addr))
+ return false;
+ return true;
+}
+
+struct comm_pipes {
+ int child_ready[2];
+ int parent_ready[2];
+};
+
+static int setup_comm_pipes(struct comm_pipes *comm_pipes)
+{
+ if (pipe(comm_pipes->child_ready) < 0)
+ return -errno;
+ if (pipe(comm_pipes->parent_ready) < 0) {
+ close(comm_pipes->child_ready[0]);
+ close(comm_pipes->child_ready[1]);
+ return -errno;
+ }
+
+ return 0;
+}
+
+static void close_comm_pipes(struct comm_pipes *comm_pipes)
+{
+ close(comm_pipes->child_ready[0]);
+ close(comm_pipes->child_ready[1]);
+ close(comm_pipes->parent_ready[0]);
+ close(comm_pipes->parent_ready[1]);
+}
+
+static int child_memcmp_fn(char *mem, size_t size,
+ struct comm_pipes *comm_pipes)
+{
+ char *old = malloc(size);
+ char buf;
+
+ /* Backup the original content. */
+ memcpy(old, mem, size);
+
+ /* Wait until the parent modified the page. */
+ write(comm_pipes->child_ready[1], "0", 1);
+ while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+ ;
+
+ /* See if we still read the old values. */
+ return memcmp(old, mem, size);
+}
+
+static int child_vmsplice_memcmp_fn(char *mem, size_t size,
+ struct comm_pipes *comm_pipes)
+{
+ struct iovec iov = {
+ .iov_base = mem,
+ .iov_len = size,
+ };
+ ssize_t cur, total, transferred;
+ char *old, *new;
+ int fds[2];
+ char buf;
+
+ old = malloc(size);
+ new = malloc(size);
+
+ /* Backup the original content. */
+ memcpy(old, mem, size);
+
+ if (pipe(fds) < 0)
+ return -errno;
+
+ /* Trigger a read-only pin. */
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred < 0)
+ return -errno;
+ if (transferred == 0)
+ return -EINVAL;
+
+ /* Unmap it from our page tables. */
+ if (munmap(mem, size) < 0)
+ return -errno;
+
+ /* Wait until the parent modified it. */
+ write(comm_pipes->child_ready[1], "0", 1);
+ while (read(comm_pipes->parent_ready[0], &buf, 1) != 1)
+ ;
+
+ /* See if we still read the old values via the pipe. */
+ for (total = 0; total < transferred; total += cur) {
+ cur = read(fds[0], new + total, transferred - total);
+ if (cur < 0)
+ return -errno;
+ }
+
+ return memcmp(old, new, transferred);
+}
+
+typedef int (*child_fn)(char *mem, size_t size, struct comm_pipes *comm_pipes);
+
+static void do_test_cow_in_parent(char *mem, size_t size, bool do_mprotect,
+ child_fn fn)
+{
+ struct comm_pipes comm_pipes;
+ char buf;
+ int ret;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ ksft_test_result_fail("pipe() failed\n");
+ return;
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto close_comm_pipes;
+ } else if (!ret) {
+ exit(fn(mem, size, &comm_pipes));
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ if (do_mprotect) {
+ /*
+ * mprotect() optimizations might try avoiding
+ * write-faults by directly mapping pages writable.
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ ret |= mprotect(mem, size, PROT_READ|PROT_WRITE);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+
+ ksft_test_result(!ret, "No leak from parent into child\n");
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_cow_in_parent(char *mem, size_t size)
+{
+ do_test_cow_in_parent(mem, size, false, child_memcmp_fn);
+}
+
+static void test_cow_in_parent_mprotect(char *mem, size_t size)
+{
+ do_test_cow_in_parent(mem, size, true, child_memcmp_fn);
+}
+
+static void test_vmsplice_in_child(char *mem, size_t size)
+{
+ do_test_cow_in_parent(mem, size, false, child_vmsplice_memcmp_fn);
+}
+
+static void test_vmsplice_in_child_mprotect(char *mem, size_t size)
+{
+ do_test_cow_in_parent(mem, size, true, child_vmsplice_memcmp_fn);
+}
+
+static void do_test_vmsplice_in_parent(char *mem, size_t size,
+ bool before_fork)
+{
+ struct iovec iov = {
+ .iov_base = mem,
+ .iov_len = size,
+ };
+ ssize_t cur, total, transferred;
+ struct comm_pipes comm_pipes;
+ char *old, *new;
+ int ret, fds[2];
+ char buf;
+
+ old = malloc(size);
+ new = malloc(size);
+
+ memcpy(old, mem, size);
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ ksft_test_result_fail("pipe() failed\n");
+ goto free;
+ }
+
+ if (pipe(fds) < 0) {
+ ksft_test_result_fail("pipe() failed\n");
+ goto close_comm_pipes;
+ }
+
+ if (before_fork) {
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred <= 0) {
+ ksft_test_result_fail("vmsplice() failed\n");
+ goto close_pipe;
+ }
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto close_pipe;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ /* Modify page content in the child. */
+ memset(mem, 0xff, size);
+ exit(0);
+ }
+
+ if (!before_fork) {
+ transferred = vmsplice(fds[1], &iov, 1, 0);
+ if (transferred <= 0) {
+ ksft_test_result_fail("vmsplice() failed\n");
+ wait(&ret);
+ goto close_pipe;
+ }
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+ if (munmap(mem, size) < 0) {
+ ksft_test_result_fail("munmap() failed\n");
+ goto close_pipe;
+ }
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ /* Wait until the child is done writing. */
+ wait(&ret);
+ if (!WIFEXITED(ret)) {
+ ksft_test_result_fail("wait() failed\n");
+ goto close_pipe;
+ }
+
+ /* See if we still read the old values. */
+ for (total = 0; total < transferred; total += cur) {
+ cur = read(fds[0], new + total, transferred - total);
+ if (cur < 0) {
+ ksft_test_result_fail("read() failed\n");
+ goto close_pipe;
+ }
+ }
+
+ ksft_test_result(!memcmp(old, new, transferred),
+ "No leak from child into parent\n");
+close_pipe:
+ close(fds[0]);
+ close(fds[1]);
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+free:
+ free(old);
+ free(new);
+}
+
+static void test_vmsplice_before_fork(char *mem, size_t size)
+{
+ do_test_vmsplice_in_parent(mem, size, true);
+}
+
+static void test_vmsplice_after_fork(char *mem, size_t size)
+{
+ do_test_vmsplice_in_parent(mem, size, false);
+}
+
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void do_test_iouring(char *mem, size_t size, bool use_fork)
+{
+ struct comm_pipes comm_pipes;
+ struct io_uring_cqe *cqe;
+ struct io_uring_sqe *sqe;
+ struct io_uring ring;
+ ssize_t cur, total;
+ struct iovec iov;
+ char *buf, *tmp;
+ int ret, fd;
+ FILE *file;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ ksft_test_result_fail("pipe() failed\n");
+ return;
+ }
+
+ file = tmpfile();
+ if (!file) {
+ ksft_test_result_fail("tmpfile() failed\n");
+ goto close_comm_pipes;
+ }
+ fd = fileno(file);
+ assert(fd);
+
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_test_result_fail("malloc() failed\n");
+ goto close_file;
+ }
+
+ /* Skip on errors, as we might just lack kernel support. */
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret < 0) {
+ ksft_test_result_skip("io_uring_queue_init() failed\n");
+ goto free_tmp;
+ }
+
+ /*
+ * Register the range as a fixed buffer. This will FOLL_WRITE | FOLL_PIN
+ * | FOLL_LONGTERM the range.
+ *
+ * Skip on errors, as we might just lack kernel support or might not
+ * have sufficient MEMLOCK permissions.
+ */
+ iov.iov_base = mem;
+ iov.iov_len = size;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret) {
+ ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ goto queue_exit;
+ }
+
+ if (use_fork) {
+ /*
+ * fork() and keep the child alive until we're done. Note that
+ * we expect the pinned page to not get shared with the child.
+ */
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto unregister_buffers;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ exit(0);
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+ } else {
+ /*
+ * Map the page R/O into the page table. Enable softdirty
+ * tracking to stop the page from getting mapped R/W immediately
+ * again by mprotect() optimizations. Note that we don't have an
+ * easy way to test if that worked (the pagemap does not export
+ * if the page is mapped R/O vs. R/W).
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ clear_softdirty();
+ ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto unregister_buffers;
+ }
+ }
+
+ /*
+ * Modify the page and write page content as observed by the fixed
+ * buffer pin to the file so we can verify it.
+ */
+ memset(mem, 0xff, size);
+ sqe = io_uring_get_sqe(&ring);
+ if (!sqe) {
+ ksft_test_result_fail("io_uring_get_sqe() failed\n");
+ goto quit_child;
+ }
+ io_uring_prep_write_fixed(sqe, fd, mem, size, 0, 0);
+
+ ret = io_uring_submit(&ring);
+ if (ret < 0) {
+ ksft_test_result_fail("io_uring_submit() failed\n");
+ goto quit_child;
+ }
+
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret < 0) {
+ ksft_test_result_fail("io_uring_wait_cqe() failed\n");
+ goto quit_child;
+ }
+
+ if (cqe->res != size) {
+ ksft_test_result_fail("write_fixed failed\n");
+ goto quit_child;
+ }
+ io_uring_cqe_seen(&ring, cqe);
+
+ /* Read back the file content to the temporary buffer. */
+ total = 0;
+ while (total < size) {
+ cur = pread(fd, tmp + total, size - total, total);
+ if (cur < 0) {
+ ksft_test_result_fail("pread() failed\n");
+ goto quit_child;
+ }
+ total += cur;
+ }
+
+ /* Finally, check if we read what we expected. */
+ ksft_test_result(!memcmp(mem, tmp, size),
+ "Longterm R/W pin is reliable\n");
+
+quit_child:
+ if (use_fork) {
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ }
+unregister_buffers:
+ io_uring_unregister_buffers(&ring);
+queue_exit:
+ io_uring_queue_exit(&ring);
+free_tmp:
+ free(tmp);
+close_file:
+ fclose(file);
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_iouring_ro(char *mem, size_t size)
+{
+ do_test_iouring(mem, size, false);
+}
+
+static void test_iouring_fork(char *mem, size_t size)
+{
+ do_test_iouring(mem, size, true);
+}
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+enum ro_pin_test {
+ RO_PIN_TEST,
+ RO_PIN_TEST_SHARED,
+ RO_PIN_TEST_PREVIOUSLY_SHARED,
+ RO_PIN_TEST_RO_EXCLUSIVE,
+};
+
+static void do_test_ro_pin(char *mem, size_t size, enum ro_pin_test test,
+ bool fast)
+{
+ struct pin_longterm_test args;
+ struct comm_pipes comm_pipes;
+ char *tmp, buf;
+ __u64 tmp_val;
+ int ret;
+
+ if (gup_fd < 0) {
+ ksft_test_result_skip("gup_test not available\n");
+ return;
+ }
+
+ tmp = malloc(size);
+ if (!tmp) {
+ ksft_test_result_fail("malloc() failed\n");
+ return;
+ }
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ ksft_test_result_fail("pipe() failed\n");
+ goto free_tmp;
+ }
+
+ switch (test) {
+ case RO_PIN_TEST:
+ break;
+ case RO_PIN_TEST_SHARED:
+ case RO_PIN_TEST_PREVIOUSLY_SHARED:
+ /*
+ * Share the pages with our child. As the pages are not pinned,
+ * this should just work.
+ */
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto close_comm_pipes;
+ } else if (!ret) {
+ write(comm_pipes.child_ready[1], "0", 1);
+ while (read(comm_pipes.parent_ready[0], &buf, 1) != 1)
+ ;
+ exit(0);
+ }
+
+ /* Wait until our child is ready. */
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ if (test == RO_PIN_TEST_PREVIOUSLY_SHARED) {
+ /*
+ * Tell the child to quit now and wait until it quit.
+ * The pages should now be mapped R/O into our page
+ * tables, but they are no longer shared.
+ */
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ if (!WIFEXITED(ret))
+ ksft_print_msg("[INFO] wait() failed\n");
+ }
+ break;
+ case RO_PIN_TEST_RO_EXCLUSIVE:
+ /*
+ * Map the page R/O into the page table. Enable softdirty
+ * tracking to stop the page from getting mapped R/W immediately
+ * again by mprotect() optimizations. Note that we don't have an
+ * easy way to test if that worked (the pagemap does not export
+ * if the page is mapped R/O vs. R/W).
+ */
+ ret = mprotect(mem, size, PROT_READ);
+ clear_softdirty();
+ ret |= mprotect(mem, size, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ /* Take a R/O pin. This should trigger unsharing. */
+ args.addr = (__u64)(uintptr_t)mem;
+ args.size = size;
+ args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+ if (ret) {
+ if (errno == EINVAL)
+ ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ else
+ ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ goto wait;
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+
+ /*
+ * Read back the content via the pin to the temporary buffer and
+ * test if we observed the modification.
+ */
+ tmp_val = (__u64)(uintptr_t)tmp;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_READ, &tmp_val);
+ if (ret)
+ ksft_test_result_fail("PIN_LONGTERM_TEST_READ failed\n");
+ else
+ ksft_test_result(!memcmp(mem, tmp, size),
+ "Longterm R/O pin is reliable\n");
+
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_STOP);
+ if (ret)
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+wait:
+ switch (test) {
+ case RO_PIN_TEST_SHARED:
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ if (!WIFEXITED(ret))
+ ksft_print_msg("[INFO] wait() failed\n");
+ break;
+ default:
+ break;
+ }
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+free_tmp:
+ free(tmp);
+}
+
+static void test_ro_pin_on_shared(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_shared(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_previously_shared(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, false);
+}
+
+static void test_ro_fast_pin_on_ro_previously_shared(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_PREVIOUSLY_SHARED, true);
+}
+
+static void test_ro_pin_on_ro_exclusive(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, false);
+}
+
+static void test_ro_fast_pin_on_ro_exclusive(char *mem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST_RO_EXCLUSIVE, true);
+}
+
+typedef void (*test_fn)(char *mem, size_t size);
+
+static void do_run_with_base_page(test_fn fn, bool swapout)
+{
+ char *mem;
+ int ret;
+
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ ret = madvise(mem, pagesize, MADV_NOHUGEPAGE);
+ /* Ignore if not around on a kernel. */
+ if (ret && errno != EINVAL) {
+ ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ goto munmap;
+ }
+
+ /* Populate a base page. */
+ memset(mem, 0, pagesize);
+
+ if (swapout) {
+ madvise(mem, pagesize, MADV_PAGEOUT);
+ if (!pagemap_is_swapped(pagemap_fd, mem)) {
+ ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ goto munmap;
+ }
+ }
+
+ fn(mem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void run_with_base_page(test_fn fn, const char *desc)
+{
+ ksft_print_msg("[RUN] %s ... with base page\n", desc);
+ do_run_with_base_page(fn, false);
+}
+
+static void run_with_base_page_swap(test_fn fn, const char *desc)
+{
+ ksft_print_msg("[RUN] %s ... with swapped out base page\n", desc);
+ do_run_with_base_page(fn, true);
+}
+
+enum thp_run {
+ THP_RUN_PMD,
+ THP_RUN_PMD_SWAPOUT,
+ THP_RUN_PTE,
+ THP_RUN_PTE_SWAPOUT,
+ THP_RUN_SINGLE_PTE,
+ THP_RUN_SINGLE_PTE_SWAPOUT,
+ THP_RUN_PARTIAL_MREMAP,
+ THP_RUN_PARTIAL_SHARED,
+};
+
+static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
+{
+ char *mem, *mmap_mem, *tmp, *mremap_mem = MAP_FAILED;
+ size_t size, mmap_size, mremap_size;
+ int ret;
+
+ /* For alignment purposes, we need twice the thp size. */
+ mmap_size = 2 * thpsize;
+ mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+
+ ret = madvise(mem, thpsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ goto munmap;
+ }
+
+ /*
+ * Try to populate a THP. Touch the first sub-page and test if
+ * we get the last sub-page populated automatically.
+ */
+ mem[0] = 0;
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+ memset(mem, 0, thpsize);
+
+ size = thpsize;
+ switch (thp_run) {
+ case THP_RUN_PMD:
+ case THP_RUN_PMD_SWAPOUT:
+ assert(thpsize == pmdsize);
+ break;
+ case THP_RUN_PTE:
+ case THP_RUN_PTE_SWAPOUT:
+ /*
+ * Trigger PTE-mapping the THP by temporarily mapping a single
+ * subpage R/O. This is a noop if the THP is not pmdsize (and
+ * therefore already PTE-mapped).
+ */
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+ break;
+ case THP_RUN_SINGLE_PTE:
+ case THP_RUN_SINGLE_PTE_SWAPOUT:
+ /*
+ * Discard all but a single subpage of that PTE-mapped THP. What
+ * remains is a single PTE mapping a single subpage.
+ */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTNEED);
+ if (ret) {
+ ksft_test_result_fail("MADV_DONTNEED failed\n");
+ goto munmap;
+ }
+ size = pagesize;
+ break;
+ case THP_RUN_PARTIAL_MREMAP:
+ /*
+ * Remap half of the THP. We need some new memory location
+ * for that.
+ */
+ mremap_size = thpsize / 2;
+ mremap_mem = mmap(NULL, mremap_size, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+ tmp = mremap(mem + mremap_size, mremap_size, mremap_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, mremap_mem);
+ if (tmp != mremap_mem) {
+ ksft_test_result_fail("mremap() failed\n");
+ goto munmap;
+ }
+ size = mremap_size;
+ break;
+ case THP_RUN_PARTIAL_SHARED:
+ /*
+ * Share the first page of the THP with a child and quit the
+ * child. This will result in some parts of the THP never
+ * have been shared.
+ */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DONTFORK);
+ if (ret) {
+ ksft_test_result_fail("MADV_DONTFORK failed\n");
+ goto munmap;
+ }
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto munmap;
+ } else if (!ret) {
+ exit(0);
+ }
+ wait(&ret);
+ /* Allow for sharing all pages again. */
+ ret = madvise(mem + pagesize, thpsize - pagesize, MADV_DOFORK);
+ if (ret) {
+ ksft_test_result_fail("MADV_DOFORK failed\n");
+ goto munmap;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ switch (thp_run) {
+ case THP_RUN_PMD_SWAPOUT:
+ case THP_RUN_PTE_SWAPOUT:
+ case THP_RUN_SINGLE_PTE_SWAPOUT:
+ madvise(mem, size, MADV_PAGEOUT);
+ if (!range_is_swapped(mem, size)) {
+ ksft_test_result_skip("MADV_PAGEOUT did not work, is swap enabled?\n");
+ goto munmap;
+ }
+ break;
+ default:
+ break;
+ }
+
+ fn(mem, size);
+munmap:
+ munmap(mmap_mem, mmap_size);
+ if (mremap_mem != MAP_FAILED)
+ munmap(mremap_mem, mremap_size);
+}
+
+static void run_with_thp(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD, size);
+}
+
+static void run_with_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with swapped-out THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PMD_SWAPOUT, size);
+}
+
+static void run_with_pte_mapped_thp(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with PTE-mapped THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE, size);
+}
+
+static void run_with_pte_mapped_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with swapped-out, PTE-mapped THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PTE_SWAPOUT, size);
+}
+
+static void run_with_single_pte_of_thp(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with single PTE of THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE, size);
+}
+
+static void run_with_single_pte_of_thp_swap(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with single PTE of swapped-out THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_SINGLE_PTE_SWAPOUT, size);
+}
+
+static void run_with_partial_mremap_thp(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with partially mremap()'ed THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_MREMAP, size);
+}
+
+static void run_with_partial_shared_thp(test_fn fn, const char *desc, size_t size)
+{
+ ksft_print_msg("[RUN] %s ... with partially shared THP (%zu kB)\n",
+ desc, size / 1024);
+ do_run_with_thp(fn, THP_RUN_PARTIAL_SHARED, size);
+}
+
+static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
+{
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB;
+ char *mem, *dummy;
+
+ ksft_print_msg("[RUN] %s ... with hugetlb (%zu kB)\n", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MAP_HUGE_SHIFT;
+
+ mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_skip("need more free huge pages\n");
+ return;
+ }
+
+ /* Populate an huge page. */
+ memset(mem, 0, hugetlbsize);
+
+ /*
+ * We need a total of two hugetlb pages to handle COW/unsharing
+ * properly, otherwise we might get zapped by a SIGBUS.
+ */
+ dummy = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, flags, -1, 0);
+ if (dummy == MAP_FAILED) {
+ ksft_test_result_skip("need more free huge pages\n");
+ goto munmap;
+ }
+ munmap(dummy, hugetlbsize);
+
+ fn(mem, hugetlbsize);
+munmap:
+ munmap(mem, hugetlbsize);
+}
+
+struct test_case {
+ const char *desc;
+ test_fn fn;
+};
+
+/*
+ * Test cases that are specific to anonymous pages: pages in private mappings
+ * that may get shared via COW during fork().
+ */
+static const struct test_case anon_test_cases[] = {
+ /*
+ * Basic COW tests for fork() without any GUP. If we miss to break COW,
+ * either the child can observe modifications by the parent or the
+ * other way around.
+ */
+ {
+ "Basic COW after fork()",
+ test_cow_in_parent,
+ },
+ /*
+ * Basic test, but do an additional mprotect(PROT_READ)+
+ * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+ */
+ {
+ "Basic COW after fork() with mprotect() optimization",
+ test_cow_in_parent_mprotect,
+ },
+ /*
+ * vmsplice() [R/O GUP] + unmap in the child; modify in the parent. If
+ * we miss to break COW, the child observes modifications by the parent.
+ * This is CVE-2020-29374 reported by Jann Horn.
+ */
+ {
+ "vmsplice() + unmap in child",
+ test_vmsplice_in_child
+ },
+ /*
+ * vmsplice() test, but do an additional mprotect(PROT_READ)+
+ * mprotect(PROT_READ|PROT_WRITE) in the parent before write access.
+ */
+ {
+ "vmsplice() + unmap in child with mprotect() optimization",
+ test_vmsplice_in_child_mprotect
+ },
+ /*
+ * vmsplice() [R/O GUP] in parent before fork(), unmap in parent after
+ * fork(); modify in the child. If we miss to break COW, the parent
+ * observes modifications by the child.
+ */
+ {
+ "vmsplice() before fork(), unmap in parent after fork()",
+ test_vmsplice_before_fork,
+ },
+ /*
+ * vmsplice() [R/O GUP] + unmap in parent after fork(); modify in the
+ * child. If we miss to break COW, the parent observes modifications by
+ * the child.
+ */
+ {
+ "vmsplice() + unmap in parent after fork()",
+ test_vmsplice_after_fork,
+ },
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ /*
+ * Take a R/W longterm pin and then map the page R/O into the page
+ * table to trigger a write fault on next access. When modifying the
+ * page, the page content must be visible via the pin.
+ */
+ {
+ "R/O-mapping a page registered as iouring fixed buffer",
+ test_iouring_ro,
+ },
+ /*
+ * Take a R/W longterm pin and then fork() a child. When modifying the
+ * page, the page content must be visible via the pin. We expect the
+ * pinned page to not get shared with the child.
+ */
+ {
+ "fork() with an iouring fixed buffer",
+ test_iouring_fork,
+ },
+
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+ /*
+ * Take a R/O longterm pin on a R/O-mapped shared anonymous page.
+ * When modifying the page via the page table, the page content change
+ * must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped shared page",
+ test_ro_pin_on_shared,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped shared page",
+ test_ro_fast_pin_on_shared,
+ },
+ /*
+ * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page that
+ * was previously shared. When modifying the page via the page table,
+ * the page content change must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped previously-shared page",
+ test_ro_pin_on_ro_previously_shared,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped previously-shared page",
+ test_ro_fast_pin_on_ro_previously_shared,
+ },
+ /*
+ * Take a R/O longterm pin on a R/O-mapped exclusive anonymous page.
+ * When modifying the page via the page table, the page content change
+ * must be visible via the pin.
+ */
+ {
+ "R/O GUP pin on R/O-mapped exclusive page",
+ test_ro_pin_on_ro_exclusive,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O GUP-fast pin on R/O-mapped exclusive page",
+ test_ro_fast_pin_on_ro_exclusive,
+ },
+};
+
+static void run_anon_test_case(struct test_case const *test_case)
+{
+ int i;
+
+ run_with_base_page(test_case->fn, test_case->desc);
+ run_with_base_page_swap(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_thpsizes; i++) {
+ size_t size = thpsizes[i];
+ struct thp_settings settings = *thp_current_settings();
+
+ settings.hugepages[sz2ord(pmdsize)].enabled = THP_NEVER;
+ settings.hugepages[sz2ord(size)].enabled = THP_ALWAYS;
+ thp_push_settings(&settings);
+
+ if (size == pmdsize) {
+ run_with_thp(test_case->fn, test_case->desc, size);
+ run_with_thp_swap(test_case->fn, test_case->desc, size);
+ }
+
+ run_with_pte_mapped_thp(test_case->fn, test_case->desc, size);
+ run_with_pte_mapped_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp(test_case->fn, test_case->desc, size);
+ run_with_single_pte_of_thp_swap(test_case->fn, test_case->desc, size);
+ run_with_partial_mremap_thp(test_case->fn, test_case->desc, size);
+ run_with_partial_shared_thp(test_case->fn, test_case->desc, size);
+
+ thp_pop_settings();
+ }
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static void run_anon_test_cases(void)
+{
+ int i;
+
+ ksft_print_msg("[INFO] Anonymous memory tests in private mappings\n");
+
+ for (i = 0; i < ARRAY_SIZE(anon_test_cases); i++)
+ run_anon_test_case(&anon_test_cases[i]);
+}
+
+static int tests_per_anon_test_case(void)
+{
+ int tests = 2 + nr_hugetlbsizes;
+
+ tests += 6 * nr_thpsizes;
+ if (pmdsize)
+ tests += 2;
+ return tests;
+}
+
+enum anon_thp_collapse_test {
+ ANON_THP_COLLAPSE_UNSHARED,
+ ANON_THP_COLLAPSE_FULLY_SHARED,
+ ANON_THP_COLLAPSE_LOWER_SHARED,
+ ANON_THP_COLLAPSE_UPPER_SHARED,
+};
+
+static void do_test_anon_thp_collapse(char *mem, size_t size,
+ enum anon_thp_collapse_test test)
+{
+ struct comm_pipes comm_pipes;
+ char buf;
+ int ret;
+
+ ret = setup_comm_pipes(&comm_pipes);
+ if (ret) {
+ ksft_test_result_fail("pipe() failed\n");
+ return;
+ }
+
+ /*
+ * Trigger PTE-mapping the THP by temporarily mapping a single subpage
+ * R/O, such that we can try collapsing it later.
+ */
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto close_comm_pipes;
+ }
+ ret = mprotect(mem + pagesize, pagesize, PROT_READ | PROT_WRITE);
+ if (ret) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto close_comm_pipes;
+ }
+
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ /* Collapse before actually COW-sharing the page. */
+ ret = madvise(mem, size, MADV_COLLAPSE);
+ if (ret) {
+ ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
+ strerror(errno));
+ goto close_comm_pipes;
+ }
+ break;
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ /* COW-share the full PTE-mapped THP. */
+ break;
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ /* Don't COW-share the upper part of the THP. */
+ ret = madvise(mem + size / 2, size / 2, MADV_DONTFORK);
+ if (ret) {
+ ksft_test_result_fail("MADV_DONTFORK failed\n");
+ goto close_comm_pipes;
+ }
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ /* Don't COW-share the lower part of the THP. */
+ ret = madvise(mem, size / 2, MADV_DONTFORK);
+ if (ret) {
+ ksft_test_result_fail("MADV_DONTFORK failed\n");
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ ret = fork();
+ if (ret < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ goto close_comm_pipes;
+ } else if (!ret) {
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ exit(child_memcmp_fn(mem, size, &comm_pipes));
+ break;
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ exit(child_memcmp_fn(mem, size / 2, &comm_pipes));
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ exit(child_memcmp_fn(mem + size / 2, size / 2,
+ &comm_pipes));
+ break;
+ default:
+ assert(false);
+ }
+ }
+
+ while (read(comm_pipes.child_ready[0], &buf, 1) != 1)
+ ;
+
+ switch (test) {
+ case ANON_THP_COLLAPSE_UNSHARED:
+ break;
+ case ANON_THP_COLLAPSE_UPPER_SHARED:
+ case ANON_THP_COLLAPSE_LOWER_SHARED:
+ /*
+ * Revert MADV_DONTFORK such that we merge the VMAs and are
+ * able to actually collapse.
+ */
+ ret = madvise(mem, size, MADV_DOFORK);
+ if (ret) {
+ ksft_test_result_fail("MADV_DOFORK failed\n");
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ /* FALLTHROUGH */
+ case ANON_THP_COLLAPSE_FULLY_SHARED:
+ /* Collapse before anyone modified the COW-shared page. */
+ ret = madvise(mem, size, MADV_COLLAPSE);
+ if (ret) {
+ ksft_test_result_skip("MADV_COLLAPSE failed: %s\n",
+ strerror(errno));
+ write(comm_pipes.parent_ready[1], "0", 1);
+ wait(&ret);
+ goto close_comm_pipes;
+ }
+ break;
+ default:
+ assert(false);
+ }
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+ write(comm_pipes.parent_ready[1], "0", 1);
+
+ wait(&ret);
+ if (WIFEXITED(ret))
+ ret = WEXITSTATUS(ret);
+ else
+ ret = -EINVAL;
+
+ ksft_test_result(!ret, "No leak from parent into child\n");
+close_comm_pipes:
+ close_comm_pipes(&comm_pipes);
+}
+
+static void test_anon_thp_collapse_unshared(char *mem, size_t size)
+{
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UNSHARED);
+}
+
+static void test_anon_thp_collapse_fully_shared(char *mem, size_t size)
+{
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_FULLY_SHARED);
+}
+
+static void test_anon_thp_collapse_lower_shared(char *mem, size_t size)
+{
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_LOWER_SHARED);
+}
+
+static void test_anon_thp_collapse_upper_shared(char *mem, size_t size)
+{
+ do_test_anon_thp_collapse(mem, size, ANON_THP_COLLAPSE_UPPER_SHARED);
+}
+
+/*
+ * Test cases that are specific to anonymous THP: pages in private mappings
+ * that may get shared via COW during fork().
+ */
+static const struct test_case anon_thp_test_cases[] = {
+ /*
+ * Basic COW test for fork() without any GUP when collapsing a THP
+ * before fork().
+ *
+ * Re-mapping a PTE-mapped anon THP using a single PMD ("in-place
+ * collapse") might easily get COW handling wrong when not collapsing
+ * exclusivity information properly.
+ */
+ {
+ "Basic COW after fork() when collapsing before fork()",
+ test_anon_thp_collapse_unshared,
+ },
+ /* Basic COW test, but collapse after COW-sharing a full THP. */
+ {
+ "Basic COW after fork() when collapsing after fork() (fully shared)",
+ test_anon_thp_collapse_fully_shared,
+ },
+ /*
+ * Basic COW test, but collapse after COW-sharing the lower half of a
+ * THP.
+ */
+ {
+ "Basic COW after fork() when collapsing after fork() (lower shared)",
+ test_anon_thp_collapse_lower_shared,
+ },
+ /*
+ * Basic COW test, but collapse after COW-sharing the upper half of a
+ * THP.
+ */
+ {
+ "Basic COW after fork() when collapsing after fork() (upper shared)",
+ test_anon_thp_collapse_upper_shared,
+ },
+};
+
+static void run_anon_thp_test_cases(void)
+{
+ int i;
+
+ if (!pmdsize)
+ return;
+
+ ksft_print_msg("[INFO] Anonymous THP tests\n");
+
+ for (i = 0; i < ARRAY_SIZE(anon_thp_test_cases); i++) {
+ struct test_case const *test_case = &anon_thp_test_cases[i];
+
+ ksft_print_msg("[RUN] %s\n", test_case->desc);
+ do_run_with_thp(test_case->fn, THP_RUN_PMD, pmdsize);
+ }
+}
+
+static int tests_per_anon_thp_test_case(void)
+{
+ return pmdsize ? 1 : 0;
+}
+
+typedef void (*non_anon_test_fn)(char *mem, const char *smem, size_t size);
+
+static void test_cow(char *mem, const char *smem, size_t size)
+{
+ char *old = malloc(size);
+
+ /* Backup the original content. */
+ memcpy(old, smem, size);
+
+ /* Modify the page. */
+ memset(mem, 0xff, size);
+
+ /* See if we still read the old values via the other mapping. */
+ ksft_test_result(!memcmp(smem, old, size),
+ "Other mapping not modified\n");
+ free(old);
+}
+
+static void test_ro_pin(char *mem, const char *smem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST, false);
+}
+
+static void test_ro_fast_pin(char *mem, const char *smem, size_t size)
+{
+ do_test_ro_pin(mem, size, RO_PIN_TEST, true);
+}
+
+static void run_with_zeropage(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem, tmp;
+
+ ksft_print_msg("[RUN] %s ... with shared zeropage\n", desc);
+
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+
+ /* Read from the page to populate the shared zeropage. */
+ tmp = *mem + *smem;
+ asm volatile("" : "+r" (tmp));
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+}
+
+static void run_with_huge_zeropage(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem, *mmap_mem, *mmap_smem, tmp;
+ size_t mmap_size;
+ int ret;
+
+ ksft_print_msg("[RUN] %s ... with huge zeropage\n", desc);
+
+ if (!has_huge_zeropage) {
+ ksft_test_result_skip("Huge zeropage not enabled\n");
+ return;
+ }
+
+ /* For alignment purposes, we need twice the thp size. */
+ mmap_size = 2 * pmdsize;
+ mmap_mem = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+ mmap_smem = mmap(NULL, mmap_size, PROT_READ,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (mmap_smem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+
+ /* We need a THP-aligned memory area. */
+ mem = (char *)(((uintptr_t)mmap_mem + pmdsize) & ~(pmdsize - 1));
+ smem = (char *)(((uintptr_t)mmap_smem + pmdsize) & ~(pmdsize - 1));
+
+ ret = madvise(mem, pmdsize, MADV_HUGEPAGE);
+ ret |= madvise(smem, pmdsize, MADV_HUGEPAGE);
+ if (ret) {
+ ksft_test_result_fail("MADV_HUGEPAGE failed\n");
+ goto munmap;
+ }
+
+ /*
+ * Read from the memory to populate the huge shared zeropage. Read from
+ * the first sub-page and test if we get another sub-page populated
+ * automatically.
+ */
+ tmp = *mem + *smem;
+ asm volatile("" : "+r" (tmp));
+ if (!pagemap_is_populated(pagemap_fd, mem + pagesize) ||
+ !pagemap_is_populated(pagemap_fd, smem + pagesize)) {
+ ksft_test_result_skip("Did not get THPs populated\n");
+ goto munmap;
+ }
+
+ fn(mem, smem, pmdsize);
+munmap:
+ munmap(mmap_mem, mmap_size);
+ if (mmap_smem != MAP_FAILED)
+ munmap(mmap_smem, mmap_size);
+}
+
+static void run_with_memfd(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem, tmp;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+
+ fd = memfd_create("test", 0);
+ if (fd < 0) {
+ ksft_test_result_fail("memfd_create() failed\n");
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, pagesize)) {
+ ksft_test_result_fail("fallocate() failed\n");
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto close;
+ }
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ tmp = *mem + *smem;
+ asm volatile("" : "+r" (tmp));
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+close:
+ close(fd);
+}
+
+static void run_with_tmpfile(non_anon_test_fn fn, const char *desc)
+{
+ char *mem, *smem, tmp;
+ FILE *file;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+
+ file = tmpfile();
+ if (!file) {
+ ksft_test_result_fail("tmpfile() failed\n");
+ return;
+ }
+
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_test_result_skip("fileno() failed\n");
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, pagesize)) {
+ ksft_test_result_fail("fallocate() failed\n");
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, pagesize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto close;
+ }
+ smem = mmap(NULL, pagesize, PROT_READ, MAP_SHARED, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ tmp = *mem + *smem;
+ asm volatile("" : "+r" (tmp));
+
+ fn(mem, smem, pagesize);
+munmap:
+ munmap(mem, pagesize);
+ if (smem != MAP_FAILED)
+ munmap(smem, pagesize);
+close:
+ fclose(file);
+}
+
+static void run_with_memfd_hugetlb(non_anon_test_fn fn, const char *desc,
+ size_t hugetlbsize)
+{
+ int flags = MFD_HUGETLB;
+ char *mem, *smem, tmp;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("test", flags);
+ if (fd < 0) {
+ ksft_test_result_skip("memfd_create() failed\n");
+ return;
+ }
+
+ /* File consists of a single page filled with zeroes. */
+ if (fallocate(fd, 0, 0, hugetlbsize)) {
+ ksft_test_result_skip("need more free huge pages\n");
+ goto close;
+ }
+
+ /* Create a private mapping of the memfd. */
+ mem = mmap(NULL, hugetlbsize, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd,
+ 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_skip("need more free huge pages\n");
+ goto close;
+ }
+ smem = mmap(NULL, hugetlbsize, PROT_READ, MAP_SHARED, fd, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ goto munmap;
+ }
+
+ /* Fault the page in. */
+ tmp = *mem + *smem;
+ asm volatile("" : "+r" (tmp));
+
+ fn(mem, smem, hugetlbsize);
+munmap:
+ munmap(mem, hugetlbsize);
+ if (mem != MAP_FAILED)
+ munmap(smem, hugetlbsize);
+close:
+ close(fd);
+}
+
+struct non_anon_test_case {
+ const char *desc;
+ non_anon_test_fn fn;
+};
+
+/*
+ * Test cases that target any pages in private mappings that are not anonymous:
+ * pages that may get shared via COW ndependent of fork(). This includes
+ * the shared zeropage(s), pagecache pages, ...
+ */
+static const struct non_anon_test_case non_anon_test_cases[] = {
+ /*
+ * Basic COW test without any GUP. If we miss to break COW, changes are
+ * visible via other private/shared mappings.
+ */
+ {
+ "Basic COW",
+ test_cow,
+ },
+ /*
+ * Take a R/O longterm pin. When modifying the page via the page table,
+ * the page content change must be visible via the pin.
+ */
+ {
+ "R/O longterm GUP pin",
+ test_ro_pin,
+ },
+ /* Same as above, but using GUP-fast. */
+ {
+ "R/O longterm GUP-fast pin",
+ test_ro_fast_pin,
+ },
+};
+
+static void run_non_anon_test_case(struct non_anon_test_case const *test_case)
+{
+ int i;
+
+ run_with_zeropage(test_case->fn, test_case->desc);
+ run_with_memfd(test_case->fn, test_case->desc);
+ run_with_tmpfile(test_case->fn, test_case->desc);
+ if (pmdsize)
+ run_with_huge_zeropage(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static void run_non_anon_test_cases(void)
+{
+ int i;
+
+ ksft_print_msg("[RUN] Non-anonymous memory tests in private mappings\n");
+
+ for (i = 0; i < ARRAY_SIZE(non_anon_test_cases); i++)
+ run_non_anon_test_case(&non_anon_test_cases[i]);
+}
+
+static int tests_per_non_anon_test_case(void)
+{
+ int tests = 3 + nr_hugetlbsizes;
+
+ if (pmdsize)
+ tests += 1;
+ return tests;
+}
+
+int main(int argc, char **argv)
+{
+ int err;
+ struct thp_settings default_settings;
+
+ ksft_print_header();
+
+ pagesize = getpagesize();
+ pmdsize = read_pmd_pagesize();
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_read_settings(&default_settings);
+ default_settings.hugepages[sz2ord(pmdsize)].enabled = THP_INHERIT;
+ thp_save_settings();
+ thp_push_settings(&default_settings);
+
+ ksft_print_msg("[INFO] detected PMD size: %zu KiB\n",
+ pmdsize / 1024);
+ nr_thpsizes = detect_thp_sizes(thpsizes, ARRAY_SIZE(thpsizes));
+ }
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+ detect_huge_zeropage();
+
+ ksft_set_plan(ARRAY_SIZE(anon_test_cases) * tests_per_anon_test_case() +
+ ARRAY_SIZE(anon_thp_test_cases) * tests_per_anon_thp_test_case() +
+ ARRAY_SIZE(non_anon_test_cases) * tests_per_non_anon_test_case());
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+
+ run_anon_test_cases();
+ run_anon_thp_test_cases();
+ run_non_anon_test_cases();
+
+ if (pmdsize) {
+ /* Only if THP is supported. */
+ thp_restore_settings();
+ }
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/gup_longterm.c b/tools/testing/selftests/mm/gup_longterm.c
new file mode 100644
index 000000000000..ad168d35b23b
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_longterm.c
@@ -0,0 +1,460 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * GUP long-term page pinning tests.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <assert.h>
+#include <sys/mman.h>
+#include <sys/ioctl.h>
+#include <sys/vfs.h>
+#include <linux/magic.h>
+#include <linux/memfd.h>
+
+#include "local_config.h"
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+#include <liburing.h>
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+#include "../../../../mm/gup_test.h"
+#include "../kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static int nr_hugetlbsizes;
+static size_t hugetlbsizes[10];
+static int gup_fd;
+
+static __fsword_t get_fs_type(int fd)
+{
+ struct statfs fs;
+ int ret;
+
+ do {
+ ret = fstatfs(fd, &fs);
+ } while (ret && errno == EINTR);
+
+ return ret ? 0 : fs.f_type;
+}
+
+static bool fs_is_unknown(__fsword_t fs_type)
+{
+ /*
+ * We only support some filesystems in our tests when dealing with
+ * R/W long-term pinning. For these filesystems, we can be fairly sure
+ * whether they support it or not.
+ */
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ case BTRFS_SUPER_MAGIC:
+ case EXT4_SUPER_MAGIC:
+ case XFS_SUPER_MAGIC:
+ return false;
+ default:
+ return true;
+ }
+}
+
+static bool fs_supports_writable_longterm_pinning(__fsword_t fs_type)
+{
+ assert(!fs_is_unknown(fs_type));
+ switch (fs_type) {
+ case TMPFS_MAGIC:
+ case HUGETLBFS_MAGIC:
+ return true;
+ default:
+ return false;
+ }
+}
+
+enum test_type {
+ TEST_TYPE_RO,
+ TEST_TYPE_RO_FAST,
+ TEST_TYPE_RW,
+ TEST_TYPE_RW_FAST,
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ TEST_TYPE_IOURING,
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void do_test(int fd, size_t size, enum test_type type, bool shared)
+{
+ __fsword_t fs_type = get_fs_type(fd);
+ bool should_work;
+ char *mem;
+ int ret;
+
+ if (ftruncate(fd, size)) {
+ ksft_test_result_fail("ftruncate() failed\n");
+ return;
+ }
+
+ if (fallocate(fd, 0, 0, size)) {
+ if (size == pagesize)
+ ksft_test_result_fail("fallocate() failed\n");
+ else
+ ksft_test_result_skip("need more free huge pages\n");
+ return;
+ }
+
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ shared ? MAP_SHARED : MAP_PRIVATE, fd, 0);
+ if (mem == MAP_FAILED) {
+ if (size == pagesize || shared)
+ ksft_test_result_fail("mmap() failed\n");
+ else
+ ksft_test_result_skip("need more free huge pages\n");
+ return;
+ }
+
+ /*
+ * Fault in the page writable such that GUP-fast can eventually pin
+ * it immediately.
+ */
+ memset(mem, 0, size);
+
+ switch (type) {
+ case TEST_TYPE_RO:
+ case TEST_TYPE_RO_FAST:
+ case TEST_TYPE_RW:
+ case TEST_TYPE_RW_FAST: {
+ struct pin_longterm_test args;
+ const bool fast = type == TEST_TYPE_RO_FAST ||
+ type == TEST_TYPE_RW_FAST;
+ const bool rw = type == TEST_TYPE_RW ||
+ type == TEST_TYPE_RW_FAST;
+
+ if (gup_fd < 0) {
+ ksft_test_result_skip("gup_test not available\n");
+ break;
+ }
+
+ if (rw && shared && fs_is_unknown(fs_type)) {
+ ksft_test_result_skip("Unknown filesystem\n");
+ return;
+ }
+ /*
+ * R/O pinning or pinning in a private mapping is always
+ * expected to work. Otherwise, we expect long-term R/W pinning
+ * to only succeed for special fielesystems.
+ */
+ should_work = !shared || !rw ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ args.addr = (__u64)(uintptr_t)mem;
+ args.size = size;
+ args.flags = fast ? PIN_LONGTERM_TEST_FLAG_USE_FAST : 0;
+ args.flags |= rw ? PIN_LONGTERM_TEST_FLAG_USE_WRITE : 0;
+ ret = ioctl(gup_fd, PIN_LONGTERM_TEST_START, &args);
+ if (ret && errno == EINVAL) {
+ ksft_test_result_skip("PIN_LONGTERM_TEST_START failed\n");
+ break;
+ } else if (ret && errno == EFAULT) {
+ ksft_test_result(!should_work, "Should have failed\n");
+ break;
+ } else if (ret) {
+ ksft_test_result_fail("PIN_LONGTERM_TEST_START failed\n");
+ break;
+ }
+
+ if (ioctl(gup_fd, PIN_LONGTERM_TEST_STOP))
+ ksft_print_msg("[INFO] PIN_LONGTERM_TEST_STOP failed\n");
+
+ /*
+ * TODO: if the kernel ever supports long-term R/W pinning on
+ * some previously unsupported filesystems, we might want to
+ * perform some additional tests for possible data corruptions.
+ */
+ ksft_test_result(should_work, "Should have worked\n");
+ break;
+ }
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ case TEST_TYPE_IOURING: {
+ struct io_uring ring;
+ struct iovec iov;
+
+ /* io_uring always pins pages writable. */
+ if (shared && fs_is_unknown(fs_type)) {
+ ksft_test_result_skip("Unknown filesystem\n");
+ return;
+ }
+ should_work = !shared ||
+ fs_supports_writable_longterm_pinning(fs_type);
+
+ /* Skip on errors, as we might just lack kernel support. */
+ ret = io_uring_queue_init(1, &ring, 0);
+ if (ret < 0) {
+ ksft_test_result_skip("io_uring_queue_init() failed\n");
+ break;
+ }
+ /*
+ * Register the range as a fixed buffer. This will FOLL_WRITE |
+ * FOLL_PIN | FOLL_LONGTERM the range.
+ */
+ iov.iov_base = mem;
+ iov.iov_len = size;
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ /* Only new kernels return EFAULT. */
+ if (ret && (errno == ENOSPC || errno == EOPNOTSUPP ||
+ errno == EFAULT)) {
+ ksft_test_result(!should_work, "Should have failed\n");
+ } else if (ret) {
+ /*
+ * We might just lack support or have insufficient
+ * MEMLOCK limits.
+ */
+ ksft_test_result_skip("io_uring_register_buffers() failed\n");
+ } else {
+ ksft_test_result(should_work, "Should have worked\n");
+ io_uring_unregister_buffers(&ring);
+ }
+
+ io_uring_queue_exit(&ring);
+ break;
+ }
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+ default:
+ assert(false);
+ }
+
+ munmap(mem, size);
+}
+
+typedef void (*test_fn)(int fd, size_t size);
+
+static void run_with_memfd(test_fn fn, const char *desc)
+{
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd\n", desc);
+
+ fd = memfd_create("test", 0);
+ if (fd < 0) {
+ ksft_test_result_fail("memfd_create() failed\n");
+ return;
+ }
+
+ fn(fd, pagesize);
+ close(fd);
+}
+
+static void run_with_tmpfile(test_fn fn, const char *desc)
+{
+ FILE *file;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with tmpfile\n", desc);
+
+ file = tmpfile();
+ if (!file) {
+ ksft_test_result_fail("tmpfile() failed\n");
+ return;
+ }
+
+ fd = fileno(file);
+ if (fd < 0) {
+ ksft_test_result_fail("fileno() failed\n");
+ goto close;
+ }
+
+ fn(fd, pagesize);
+close:
+ fclose(file);
+}
+
+static void run_with_local_tmpfile(test_fn fn, const char *desc)
+{
+ char filename[] = __FILE__"_tmpfile_XXXXXX";
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with local tmpfile\n", desc);
+
+ fd = mkstemp(filename);
+ if (fd < 0) {
+ ksft_test_result_fail("mkstemp() failed\n");
+ return;
+ }
+
+ if (unlink(filename)) {
+ ksft_test_result_fail("unlink() failed\n");
+ goto close;
+ }
+
+ fn(fd, pagesize);
+close:
+ close(fd);
+}
+
+static void run_with_memfd_hugetlb(test_fn fn, const char *desc,
+ size_t hugetlbsize)
+{
+ int flags = MFD_HUGETLB;
+ int fd;
+
+ ksft_print_msg("[RUN] %s ... with memfd hugetlb (%zu kB)\n", desc,
+ hugetlbsize / 1024);
+
+ flags |= __builtin_ctzll(hugetlbsize) << MFD_HUGE_SHIFT;
+
+ fd = memfd_create("test", flags);
+ if (fd < 0) {
+ ksft_test_result_skip("memfd_create() failed\n");
+ return;
+ }
+
+ fn(fd, hugetlbsize);
+ close(fd);
+}
+
+struct test_case {
+ const char *desc;
+ test_fn fn;
+};
+
+static void test_shared_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, true);
+}
+
+static void test_shared_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, true);
+}
+
+static void test_shared_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, true);
+}
+
+static void test_shared_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, true);
+}
+
+static void test_private_rw_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW, false);
+}
+
+static void test_private_rw_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RW_FAST, false);
+}
+
+static void test_private_ro_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO, false);
+}
+
+static void test_private_ro_fast_pin(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_RO_FAST, false);
+}
+
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+static void test_shared_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, true);
+}
+
+static void test_private_iouring(int fd, size_t size)
+{
+ do_test(fd, size, TEST_TYPE_IOURING, false);
+}
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+
+static const struct test_case test_cases[] = {
+ {
+ "R/W longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_SHARED file mapping",
+ test_shared_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_SHARED file mapping",
+ test_shared_ro_fast_pin,
+ },
+ {
+ "R/W longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_rw_pin,
+ },
+ {
+ "R/W longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_rw_fast_pin,
+ },
+ {
+ "R/O longterm GUP pin in MAP_PRIVATE file mapping",
+ test_private_ro_pin,
+ },
+ {
+ "R/O longterm GUP-fast pin in MAP_PRIVATE file mapping",
+ test_private_ro_fast_pin,
+ },
+#ifdef LOCAL_CONFIG_HAVE_LIBURING
+ {
+ "io_uring fixed buffer with MAP_SHARED file mapping",
+ test_shared_iouring,
+ },
+ {
+ "io_uring fixed buffer with MAP_PRIVATE file mapping",
+ test_private_iouring,
+ },
+#endif /* LOCAL_CONFIG_HAVE_LIBURING */
+};
+
+static void run_test_case(struct test_case const *test_case)
+{
+ int i;
+
+ run_with_memfd(test_case->fn, test_case->desc);
+ run_with_tmpfile(test_case->fn, test_case->desc);
+ run_with_local_tmpfile(test_case->fn, test_case->desc);
+ for (i = 0; i < nr_hugetlbsizes; i++)
+ run_with_memfd_hugetlb(test_case->fn, test_case->desc,
+ hugetlbsizes[i]);
+}
+
+static int tests_per_test_case(void)
+{
+ return 3 + nr_hugetlbsizes;
+}
+
+int main(int argc, char **argv)
+{
+ int i, err;
+
+ pagesize = getpagesize();
+ nr_hugetlbsizes = detect_hugetlb_page_sizes(hugetlbsizes,
+ ARRAY_SIZE(hugetlbsizes));
+
+ ksft_print_header();
+ ksft_set_plan(ARRAY_SIZE(test_cases) * tests_per_test_case());
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ run_test_case(&test_cases[i]);
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/gup_test.c b/tools/testing/selftests/mm/gup_test.c
new file mode 100644
index 000000000000..cbe99594d319
--- /dev/null
+++ b/tools/testing/selftests/mm/gup_test.c
@@ -0,0 +1,271 @@
+#include <fcntl.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <pthread.h>
+#include <assert.h>
+#include <mm/gup_test.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define MB (1UL << 20)
+
+/* Just the flags we need, copied from mm.h: */
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#define FOLL_TOUCH 0x02 /* mark page accessed */
+
+#define GUP_TEST_FILE "/sys/kernel/debug/gup_test"
+
+static unsigned long cmd = GUP_FAST_BENCHMARK;
+static int gup_fd, repeats = 1;
+static unsigned long size = 128 * MB;
+/* Serialize prints */
+static pthread_mutex_t print_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static char *cmd_to_str(unsigned long cmd)
+{
+ switch (cmd) {
+ case GUP_FAST_BENCHMARK:
+ return "GUP_FAST_BENCHMARK";
+ case PIN_FAST_BENCHMARK:
+ return "PIN_FAST_BENCHMARK";
+ case PIN_LONGTERM_BENCHMARK:
+ return "PIN_LONGTERM_BENCHMARK";
+ case GUP_BASIC_TEST:
+ return "GUP_BASIC_TEST";
+ case PIN_BASIC_TEST:
+ return "PIN_BASIC_TEST";
+ case DUMP_USER_PAGES_TEST:
+ return "DUMP_USER_PAGES_TEST";
+ }
+ return "Unknown command";
+}
+
+void *gup_thread(void *data)
+{
+ struct gup_test gup = *(struct gup_test *)data;
+ int i, status;
+
+ /* Only report timing information on the *_BENCHMARK commands: */
+ if ((cmd == PIN_FAST_BENCHMARK) || (cmd == GUP_FAST_BENCHMARK) ||
+ (cmd == PIN_LONGTERM_BENCHMARK)) {
+ for (i = 0; i < repeats; i++) {
+ gup.size = size;
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ break;
+
+ pthread_mutex_lock(&print_mutex);
+ ksft_print_msg("%s: Time: get:%lld put:%lld us",
+ cmd_to_str(cmd), gup.get_delta_usec,
+ gup.put_delta_usec);
+ if (gup.size != size)
+ ksft_print_msg(", truncated (size: %lld)", gup.size);
+ ksft_print_msg("\n");
+ pthread_mutex_unlock(&print_mutex);
+ }
+ } else {
+ gup.size = size;
+ status = ioctl(gup_fd, cmd, &gup);
+ if (status)
+ goto return_;
+
+ pthread_mutex_lock(&print_mutex);
+ ksft_print_msg("%s: done\n", cmd_to_str(cmd));
+ if (gup.size != size)
+ ksft_print_msg("Truncated (size: %lld)\n", gup.size);
+ pthread_mutex_unlock(&print_mutex);
+ }
+
+return_:
+ ksft_test_result(!status, "ioctl status %d\n", status);
+ return NULL;
+}
+
+int main(int argc, char **argv)
+{
+ struct gup_test gup = { 0 };
+ int filed, i, opt, nr_pages = 1, thp = -1, write = 1, nthreads = 1, ret;
+ int flags = MAP_PRIVATE, touch = 0;
+ char *file = "/dev/zero";
+ pthread_t *tid;
+ char *p;
+
+ while ((opt = getopt(argc, argv, "m:r:n:F:f:abcj:tTLUuwWSHpz")) != -1) {
+ switch (opt) {
+ case 'a':
+ cmd = PIN_FAST_BENCHMARK;
+ break;
+ case 'b':
+ cmd = PIN_BASIC_TEST;
+ break;
+ case 'L':
+ cmd = PIN_LONGTERM_BENCHMARK;
+ break;
+ case 'c':
+ cmd = DUMP_USER_PAGES_TEST;
+ /*
+ * Dump page 0 (index 1). May be overridden later, by
+ * user's non-option arguments.
+ *
+ * .which_pages is zero-based, so that zero can mean "do
+ * nothing".
+ */
+ gup.which_pages[0] = 1;
+ break;
+ case 'p':
+ /* works only with DUMP_USER_PAGES_TEST */
+ gup.test_flags |= GUP_TEST_FLAG_DUMP_PAGES_USE_PIN;
+ break;
+ case 'F':
+ /* strtol, so you can pass flags in hex form */
+ gup.gup_flags = strtol(optarg, 0, 0);
+ break;
+ case 'j':
+ nthreads = atoi(optarg);
+ break;
+ case 'm':
+ size = atoi(optarg) * MB;
+ break;
+ case 'r':
+ repeats = atoi(optarg);
+ break;
+ case 'n':
+ nr_pages = atoi(optarg);
+ break;
+ case 't':
+ thp = 1;
+ break;
+ case 'T':
+ thp = 0;
+ break;
+ case 'U':
+ cmd = GUP_BASIC_TEST;
+ break;
+ case 'u':
+ cmd = GUP_FAST_BENCHMARK;
+ break;
+ case 'w':
+ write = 1;
+ break;
+ case 'W':
+ write = 0;
+ break;
+ case 'f':
+ file = optarg;
+ break;
+ case 'S':
+ flags &= ~MAP_PRIVATE;
+ flags |= MAP_SHARED;
+ break;
+ case 'H':
+ flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
+ break;
+ case 'z':
+ /* fault pages in gup, do not fault in userland */
+ touch = 1;
+ break;
+ default:
+ ksft_exit_fail_msg("Wrong argument\n");
+ }
+ }
+
+ if (optind < argc) {
+ int extra_arg_count = 0;
+ /*
+ * For example:
+ *
+ * ./gup_test -c 0 1 0x1001
+ *
+ * ...to dump pages 0, 1, and 4097
+ */
+
+ while ((optind < argc) &&
+ (extra_arg_count < GUP_TEST_MAX_PAGES_TO_DUMP)) {
+ /*
+ * Do the 1-based indexing here, so that the user can
+ * use normal 0-based indexing on the command line.
+ */
+ long page_index = strtol(argv[optind], 0, 0) + 1;
+
+ gup.which_pages[extra_arg_count] = page_index;
+ extra_arg_count++;
+ optind++;
+ }
+ }
+
+ ksft_print_header();
+ ksft_set_plan(nthreads);
+
+ filed = open(file, O_RDWR|O_CREAT);
+ if (filed < 0)
+ ksft_exit_fail_msg("Unable to open %s: %s\n", file, strerror(errno));
+
+ gup.nr_pages_per_call = nr_pages;
+ if (write)
+ gup.gup_flags |= FOLL_WRITE;
+
+ gup_fd = open(GUP_TEST_FILE, O_RDWR);
+ if (gup_fd == -1) {
+ switch (errno) {
+ case EACCES:
+ if (getuid())
+ ksft_print_msg("Please run this test as root\n");
+ break;
+ case ENOENT:
+ if (opendir("/sys/kernel/debug") == NULL)
+ ksft_print_msg("mount debugfs at /sys/kernel/debug\n");
+ ksft_print_msg("check if CONFIG_GUP_TEST is enabled in kernel config\n");
+ break;
+ default:
+ ksft_print_msg("failed to open %s: %s\n", GUP_TEST_FILE, strerror(errno));
+ break;
+ }
+ ksft_test_result_skip("Please run this test as root\n");
+ return ksft_exit_pass();
+ }
+
+ p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
+ if (p == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
+ gup.addr = (unsigned long)p;
+
+ if (thp == 1)
+ madvise(p, size, MADV_HUGEPAGE);
+ else if (thp == 0)
+ madvise(p, size, MADV_NOHUGEPAGE);
+
+ /*
+ * FOLL_TOUCH, in gup_test, is used as an either/or case: either
+ * fault pages in from the kernel via FOLL_TOUCH, or fault them
+ * in here, from user space. This allows comparison of performance
+ * between those two cases.
+ */
+ if (touch) {
+ gup.gup_flags |= FOLL_TOUCH;
+ } else {
+ for (; (unsigned long)p < gup.addr + size; p += psize())
+ p[0] = 0;
+ }
+
+ tid = malloc(sizeof(pthread_t) * nthreads);
+ assert(tid);
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_create(&tid[i], NULL, gup_thread, &gup);
+ assert(ret == 0);
+ }
+ for (i = 0; i < nthreads; i++) {
+ ret = pthread_join(tid[i], NULL);
+ assert(ret == 0);
+ }
+
+ free(tid);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/hmm-tests.c b/tools/testing/selftests/mm/hmm-tests.c
index 79db22604019..d2cfc9b494a0 100644
--- a/tools/testing/selftests/vm/hmm-tests.c
+++ b/tools/testing/selftests/mm/hmm-tests.c
@@ -21,17 +21,18 @@
#include <strings.h>
#include <time.h>
#include <pthread.h>
-#include <hugetlbfs.h>
#include <sys/types.h>
#include <sys/stat.h>
#include <sys/mman.h>
#include <sys/ioctl.h>
+
/*
* This is a private UAPI to the kernel test module so it isn't exported
* in the usual include/uapi/... directory.
*/
-#include "../../../../lib/test_hmm_uapi.h"
+#include <lib/test_hmm_uapi.h>
+#include <mm/gup_test.h>
struct hmm_buffer {
void *ptr;
@@ -42,13 +43,28 @@ struct hmm_buffer {
uint64_t faults;
};
+enum {
+ HMM_PRIVATE_DEVICE_ONE,
+ HMM_PRIVATE_DEVICE_TWO,
+ HMM_COHERENCE_DEVICE_ONE,
+ HMM_COHERENCE_DEVICE_TWO,
+};
+
#define TWOMEG (1 << 21)
#define HMM_BUFFER_SIZE (1024 << 12)
#define HMM_PATH_MAX 64
-#define NTIMES 256
+#define NTIMES 10
#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+/* Just the flags we need, copied from mm.h: */
+
+#ifndef FOLL_WRITE
+#define FOLL_WRITE 0x01 /* check pte is writable */
+#endif
+#ifndef FOLL_LONGTERM
+#define FOLL_LONGTERM 0x100 /* mapping lifetime is indefinite */
+#endif
FIXTURE(hmm)
{
int fd;
@@ -56,6 +72,21 @@ FIXTURE(hmm)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm)
+{
+ int device_number;
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_private)
+{
+ .device_number = HMM_PRIVATE_DEVICE_ONE,
+};
+
+FIXTURE_VARIANT_ADD(hmm, hmm_device_coherent)
+{
+ .device_number = HMM_COHERENCE_DEVICE_ONE,
+};
+
FIXTURE(hmm2)
{
int fd0;
@@ -64,6 +95,24 @@ FIXTURE(hmm2)
unsigned int page_shift;
};
+FIXTURE_VARIANT(hmm2)
+{
+ int device_number0;
+ int device_number1;
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_private)
+{
+ .device_number0 = HMM_PRIVATE_DEVICE_ONE,
+ .device_number1 = HMM_PRIVATE_DEVICE_TWO,
+};
+
+FIXTURE_VARIANT_ADD(hmm2, hmm2_device_coherent)
+{
+ .device_number0 = HMM_COHERENCE_DEVICE_ONE,
+ .device_number1 = HMM_COHERENCE_DEVICE_TWO,
+};
+
static int hmm_open(int unit)
{
char pathname[HMM_PATH_MAX];
@@ -77,12 +126,19 @@ static int hmm_open(int unit)
return fd;
}
+static bool hmm_is_coherent_type(int dev_num)
+{
+ return (dev_num >= HMM_COHERENCE_DEVICE_ONE);
+}
+
FIXTURE_SETUP(hmm)
{
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd = hmm_open(0);
+ self->fd = hmm_open(variant->device_number);
+ if (self->fd < 0 && hmm_is_coherent_type(variant->device_number))
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd, 0);
}
@@ -91,9 +147,11 @@ FIXTURE_SETUP(hmm2)
self->page_size = sysconf(_SC_PAGE_SIZE);
self->page_shift = ffs(self->page_size) - 1;
- self->fd0 = hmm_open(0);
+ self->fd0 = hmm_open(variant->device_number0);
+ if (self->fd0 < 0 && hmm_is_coherent_type(variant->device_number0))
+ SKIP(return, "DEVICE_COHERENT not available");
ASSERT_GE(self->fd0, 0);
- self->fd1 = hmm_open(1);
+ self->fd1 = hmm_open(variant->device_number1);
ASSERT_GE(self->fd1, 0);
}
@@ -207,6 +265,20 @@ static void hmm_nanosleep(unsigned int n)
nanosleep(&t, NULL);
}
+static int hmm_migrate_sys_to_dev(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_DEV, buffer, npages);
+}
+
+static int hmm_migrate_dev_to_sys(int fd,
+ struct hmm_buffer *buffer,
+ unsigned long npages)
+{
+ return hmm_dmirror_cmd(fd, HMM_DMIRROR_MIGRATE_TO_SYS, buffer, npages);
+}
+
/*
* Simple NULL test of device open/close.
*/
@@ -663,6 +735,54 @@ TEST_F(hmm, anon_write_huge)
}
/*
+ * Read numeric data from raw and tagged kernel status files. Used to read
+ * /proc and /sys data (without a tag) and from /proc/meminfo (with a tag).
+ */
+static long file_read_ulong(char *file, const char *tag)
+{
+ int fd;
+ char buf[2048];
+ int len;
+ char *p, *q;
+ long val;
+
+ fd = open(file, O_RDONLY);
+ if (fd < 0) {
+ /* Error opening the file */
+ return -1;
+ }
+
+ len = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (len < 0) {
+ /* Error in reading the file */
+ return -1;
+ }
+ if (len == sizeof(buf)) {
+ /* Error file is too large */
+ return -1;
+ }
+ buf[len] = '\0';
+
+ /* Search for a tag if provided */
+ if (tag) {
+ p = strstr(buf, tag);
+ if (!p)
+ return -1; /* looks like the line we want isn't there */
+ p += strlen(tag);
+ } else
+ p = buf;
+
+ val = strtol(p, &q, 0);
+ if (*q != ' ') {
+ /* Error parsing the file */
+ return -1;
+ }
+
+ return val;
+}
+
+/*
* Write huge TLBFS page.
*/
TEST_F(hmm, anon_write_hugetlbfs)
@@ -670,31 +790,29 @@ TEST_F(hmm, anon_write_hugetlbfs)
struct hmm_buffer *buffer;
unsigned long npages;
unsigned long size;
+ unsigned long default_hsize;
unsigned long i;
int *ptr;
int ret;
- long pagesizes[4];
- int n, idx;
- /* Skip test if we can't allocate a hugetlbfs page. */
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
- n = gethugepagesizes(pagesizes, 4);
- if (n <= 0)
- return;
- for (idx = 0; --n > 0; ) {
- if (pagesizes[n] < pagesizes[idx])
- idx = n;
- }
- size = ALIGN(TWOMEG, pagesizes[idx]);
+ size = ALIGN(TWOMEG, default_hsize);
npages = size >> self->page_shift;
buffer = malloc(sizeof(*buffer));
ASSERT_NE(buffer, NULL);
- buffer->ptr = get_hugepage_region(size, GHR_STRICT);
- if (buffer->ptr == NULL) {
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
free(buffer);
- return;
+ SKIP(return, "Huge page could not be allocated");
}
buffer->fd = -1;
@@ -716,7 +834,7 @@ TEST_F(hmm, anon_write_hugetlbfs)
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- free_hugepage_region(buffer->ptr);
+ munmap(buffer->ptr, buffer->size);
buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
@@ -869,7 +987,7 @@ TEST_F(hmm, migrate)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -881,8 +999,9 @@ TEST_F(hmm, migrate)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory.
+ * Migrate anonymous memory to device private memory and fault some of it back
+ * to system memory, then try migrating the resulting mix of system and device
+ * private memory to the device.
*/
TEST_F(hmm, migrate_fault)
{
@@ -916,7 +1035,7 @@ TEST_F(hmm, migrate_fault)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -924,14 +1043,107 @@ TEST_F(hmm, migrate_fault)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
+ /* Fault half the pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Migrate memory to the device again. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, migrate_release)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Release device memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_RELEASE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / (2 * sizeof(*ptr)); ++i)
ASSERT_EQ(ptr[i], i);
hmm_buffer_free(buffer);
}
/*
+ * Migrate anonymous shared memory to device private memory.
+ */
+TEST_F(hmm, migrate_shared)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, -ENOENT);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
* Try to migrate various memory types to device private memory.
*/
TEST_F(hmm2, migrate_mixed)
@@ -964,7 +1176,7 @@ TEST_F(hmm2, migrate_mixed)
p = buffer->ptr;
/* Migrating a protected area should be an error. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, npages);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
ASSERT_EQ(ret, -EINVAL);
/* Punch a hole after the first page address. */
@@ -972,7 +1184,7 @@ TEST_F(hmm2, migrate_mixed)
ASSERT_EQ(ret, 0);
/* We expect an error if the vma doesn't cover the range. */
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 3);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 3);
ASSERT_EQ(ret, -EINVAL);
/* Page 2 will be a read-only zero page. */
@@ -1004,13 +1216,13 @@ TEST_F(hmm2, migrate_mixed)
/* Now try to migrate pages 2-5 to device 1. */
buffer->ptr = p + 2 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 4);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 4);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 4);
/* Page 5 won't be migrated to device 0 because it's on device 1. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, -ENOENT);
buffer->ptr = p;
@@ -1019,8 +1231,12 @@ TEST_F(hmm2, migrate_mixed)
}
/*
- * Migrate anonymous memory to device private memory and fault it back to system
- * memory multiple times.
+ * Migrate anonymous memory to device memory and back to system memory
+ * multiple times. In case of private zone configuration, this is done
+ * through fault pages accessed by CPU. In case of coherent zone configuration,
+ * the pages from the device should be explicitly migrated back to system memory.
+ * The reason is Coherent device zone has coherent access by CPU, therefore
+ * it will not generate any page fault.
*/
TEST_F(hmm, migrate_multiple)
{
@@ -1056,8 +1272,7 @@ TEST_F(hmm, migrate_multiple)
ptr[i] = i;
/* Migrate memory to device. */
- ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_MIGRATE, buffer,
- npages);
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, npages);
@@ -1065,7 +1280,13 @@ TEST_F(hmm, migrate_multiple)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Fault pages back to system memory and check them. */
+ /* Migrate back to system memory and check them. */
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ret = hmm_migrate_dev_to_sys(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ }
+
for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
@@ -1200,6 +1421,48 @@ TEST_F(hmm, anon_teardown)
/*
* Test memory snapshot without faulting in pages accessed by the device.
*/
+TEST_F(hmm, mixedmap)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned char *m;
+ int ret;
+
+ npages = 1;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+
+ /* Reserve a range of addresses. */
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE,
+ self->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ ASSERT_EQ(m[0], HMM_DMIRROR_PROT_READ);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test memory snapshot without faulting in pages accessed by the device.
+ */
TEST_F(hmm2, snapshot)
{
struct hmm_buffer *buffer;
@@ -1261,13 +1524,13 @@ TEST_F(hmm2, snapshot)
/* Page 5 will be migrated to device 0. */
buffer->ptr = p + 5 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd0, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
/* Page 6 will be migrated to device 1. */
buffer->ptr = p + 6 * self->page_size;
- ret = hmm_dmirror_cmd(self->fd1, HMM_DMIRROR_MIGRATE, buffer, 1);
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, 1);
ASSERT_EQ(ret, 0);
ASSERT_EQ(buffer->cpages, 1);
@@ -1284,10 +1547,93 @@ TEST_F(hmm2, snapshot)
ASSERT_EQ(m[2], HMM_DMIRROR_PROT_ZERO | HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[3], HMM_DMIRROR_PROT_READ);
ASSERT_EQ(m[4], HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
- HMM_DMIRROR_PROT_WRITE);
- ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ if (!hmm_is_coherent_type(variant->device_number0)) {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_PRIVATE_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_NONE);
+ } else {
+ ASSERT_EQ(m[5], HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL |
+ HMM_DMIRROR_PROT_WRITE);
+ ASSERT_EQ(m[6], HMM_DMIRROR_PROT_DEV_COHERENT_REMOTE |
+ HMM_DMIRROR_PROT_WRITE);
+ }
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test the hmm_range_fault() HMM_PFN_PMD flag for large pages that
+ * should be mapped by a large page table entry.
+ */
+TEST_F(hmm, compound)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long default_hsize;
+ int *ptr;
+ unsigned char *m;
+ int ret;
+ unsigned long i;
+
+ /* Skip test if we can't allocate a hugetlbfs page. */
+
+ default_hsize = file_read_ulong("/proc/meminfo", "Hugepagesize:");
+ if (default_hsize < 0 || default_hsize*1024 < default_hsize)
+ SKIP(return, "Huge page size could not be determined");
+ default_hsize = default_hsize*1024; /* KB to B */
+
+ size = ALIGN(TWOMEG, default_hsize);
+ npages = size >> self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (buffer->ptr == MAP_FAILED) {
+ free(buffer);
+ return;
+ }
+
+ buffer->size = size;
+ buffer->mirror = malloc(npages);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ /* Initialize the pages the device will snapshot in buffer->ptr. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_WRITE |
+ HMM_DMIRROR_PROT_PMD);
+
+ /* Make the region read-only. */
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device snapshotting CPU pagetables. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device saw. */
+ m = buffer->mirror;
+ for (i = 0; i < npages; ++i)
+ ASSERT_EQ(m[i], HMM_DMIRROR_PROT_READ |
+ HMM_DMIRROR_PROT_PMD);
+
+ munmap(buffer->ptr, buffer->size);
+ buffer->ptr = NULL;
hmm_buffer_free(buffer);
}
@@ -1349,11 +1695,365 @@ TEST_F(hmm2, double_map)
for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
ASSERT_EQ(ptr[i], i);
- /* Punch a hole after the first page address. */
- ret = munmap(buffer->ptr + self->page_size, self->page_size);
+ /* Migrate pages to device 1 and try to read from device 0. */
+ ret = hmm_migrate_sys_to_dev(self->fd1, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ ret = hmm_dmirror_cmd(self->fd0, HMM_DMIRROR_READ, buffer, npages);
ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ ASSERT_EQ(buffer->faults, 1);
+
+ /* Check what device 0 read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
hmm_buffer_free(buffer);
}
+/*
+ * Basic check of exclusive faulting.
+ */
+TEST_F(hmm, exclusive)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ /* Check atomic access revoked */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_CHECK_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+
+ hmm_buffer_free(buffer);
+}
+
+TEST_F(hmm, exclusive_mprotect)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ret = mprotect(buffer->ptr, size, PROT_READ);
+ ASSERT_EQ(ret, 0);
+
+ /* Simulate a device writing system memory. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_WRITE, buffer, npages);
+ ASSERT_EQ(ret, -EPERM);
+
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Check copy-on-write works.
+ */
+TEST_F(hmm, exclusive_cow)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+
+ npages = ALIGN(HMM_BUFFER_SIZE, self->page_size) >> self->page_shift;
+ ASSERT_NE(npages, 0);
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Map memory exclusively for device access. */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_EXCLUSIVE, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ fork();
+
+ /* Fault pages back to system memory and check them. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i]++, i);
+
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i+1);
+
+ hmm_buffer_free(buffer);
+}
+
+static int gup_test_exec(int gup_fd, unsigned long addr, int cmd,
+ int npages, int size, int flags)
+{
+ struct gup_test gup = {
+ .nr_pages_per_call = npages,
+ .addr = addr,
+ .gup_flags = FOLL_WRITE | flags,
+ .size = size,
+ };
+
+ if (ioctl(gup_fd, cmd, &gup)) {
+ perror("ioctl on error\n");
+ return errno;
+ }
+
+ return 0;
+}
+
+/*
+ * Test get user device pages through gup_test. Setting PIN_LONGTERM flag.
+ * This should trigger a migration back to system memory for both, private
+ * and coherent type pages.
+ * This test makes use of gup_test module. Make sure GUP_TEST_CONFIG is added
+ * to your configuration before you run it.
+ */
+TEST_F(hmm, hmm_gup_test)
+{
+ struct hmm_buffer *buffer;
+ int gup_fd;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+
+ gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (gup_fd == -1)
+ SKIP(return, "Skipping test, could not find gup_test driver");
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ /* Check what the device read. */
+ for (i = 0, ptr = buffer->mirror; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr,
+ GUP_BASIC_TEST, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 1 * self->page_size,
+ GUP_FAST_BENCHMARK, 1, self->page_size, 0), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 2 * self->page_size,
+ PIN_FAST_BENCHMARK, 1, self->page_size, FOLL_LONGTERM), 0);
+ ASSERT_EQ(gup_test_exec(gup_fd,
+ (unsigned long)buffer->ptr + 3 * self->page_size,
+ PIN_LONGTERM_BENCHMARK, 1, self->page_size, 0), 0);
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ if (hmm_is_coherent_type(variant->device_number)) {
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_DEV_COHERENT_LOCAL | HMM_DMIRROR_PROT_WRITE, m[1]);
+ } else {
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[0]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[1]);
+ }
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[2]);
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[3]);
+ /*
+ * Check again the content on the pages. Make sure there's no
+ * corrupted data.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ASSERT_EQ(ptr[i], i);
+
+ close(gup_fd);
+ hmm_buffer_free(buffer);
+}
+
+/*
+ * Test copy-on-write in device pages.
+ * In case of writing to COW private page(s), a page fault will migrate pages
+ * back to system memory first. Then, these pages will be duplicated. In case
+ * of COW device coherent type, pages are duplicated directly from device
+ * memory.
+ */
+TEST_F(hmm, hmm_cow_in_device)
+{
+ struct hmm_buffer *buffer;
+ unsigned long npages;
+ unsigned long size;
+ unsigned long i;
+ int *ptr;
+ int ret;
+ unsigned char *m;
+ pid_t pid;
+ int status;
+
+ npages = 4;
+ size = npages << self->page_shift;
+
+ buffer = malloc(sizeof(*buffer));
+ ASSERT_NE(buffer, NULL);
+
+ buffer->fd = -1;
+ buffer->size = size;
+ buffer->mirror = malloc(size);
+ ASSERT_NE(buffer->mirror, NULL);
+
+ buffer->ptr = mmap(NULL, size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS,
+ buffer->fd, 0);
+ ASSERT_NE(buffer->ptr, MAP_FAILED);
+
+ /* Initialize buffer in system memory. */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Migrate memory to device. */
+
+ ret = hmm_migrate_sys_to_dev(self->fd, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+
+ pid = fork();
+ if (pid == -1)
+ ASSERT_EQ(pid, 0);
+ if (!pid) {
+ /* Child process waitd for SIGTERM from the parent. */
+ while (1) {
+ }
+ perror("Should not reach this\n");
+ exit(0);
+ }
+ /* Parent process writes to COW pages(s) and gets a
+ * new copy in system. In case of device private pages,
+ * this write causes a migration to system mem first.
+ */
+ for (i = 0, ptr = buffer->ptr; i < size / sizeof(*ptr); ++i)
+ ptr[i] = i;
+
+ /* Terminate child and wait */
+ EXPECT_EQ(0, kill(pid, SIGTERM));
+ EXPECT_EQ(pid, waitpid(pid, &status, 0));
+ EXPECT_NE(0, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+
+ /* Take snapshot to CPU pagetables */
+ ret = hmm_dmirror_cmd(self->fd, HMM_DMIRROR_SNAPSHOT, buffer, npages);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(buffer->cpages, npages);
+ m = buffer->mirror;
+ for (i = 0; i < npages; i++)
+ ASSERT_EQ(HMM_DMIRROR_PROT_WRITE, m[i]);
+
+ hmm_buffer_free(buffer);
+}
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/vm/hugepage-mmap.c b/tools/testing/selftests/mm/hugepage-mmap.c
index 93f9e7b81331..267eea2e0e0b 100644
--- a/tools/testing/selftests/vm/hugepage-mmap.c
+++ b/tools/testing/selftests/mm/hugepage-mmap.c
@@ -16,14 +16,14 @@
* range.
* Other architectures, such as ppc64, i386 or x86_64 are not so constrained.
*/
-
+#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "../kselftest.h"
-#define FILE_NAME "huge/hugepagefile"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
@@ -38,7 +38,7 @@
static void check_bytes(char *addr)
{
- printf("First hex is %x\n", *((unsigned int *)addr));
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
}
static void write_bytes(char *addr)
@@ -56,7 +56,7 @@ static int read_bytes(char *addr)
check_bytes(addr);
for (i = 0; i < LENGTH; i++)
if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
+ ksft_print_msg("Error: Mismatch at %lu\n", i);
return 1;
}
return 0;
@@ -67,27 +67,28 @@ int main(void)
void *addr;
int fd, ret;
- fd = open(FILE_NAME, O_CREAT | O_RDWR, 0755);
- if (fd < 0) {
- perror("Open failed");
- exit(1);
- }
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ fd = memfd_create("hugepage-mmap", MFD_HUGETLB);
+ if (fd < 0)
+ ksft_exit_fail_msg("memfd_create() failed: %s\n", strerror(errno));
addr = mmap(ADDR, LENGTH, PROTECTION, FLAGS, fd, 0);
if (addr == MAP_FAILED) {
- perror("mmap");
- unlink(FILE_NAME);
- exit(1);
+ close(fd);
+ ksft_exit_fail_msg("mmap(): %s\n", strerror(errno));
}
- printf("Returned address is %p\n", addr);
+ ksft_print_msg("Returned address is %p\n", addr);
check_bytes(addr);
write_bytes(addr);
ret = read_bytes(addr);
munmap(addr, LENGTH);
close(fd);
- unlink(FILE_NAME);
- return ret;
+ ksft_test_result(!ret, "Read same data\n");
+
+ ksft_exit(!ret);
}
diff --git a/tools/testing/selftests/mm/hugepage-mremap.c b/tools/testing/selftests/mm/hugepage-mremap.c
new file mode 100644
index 000000000000..c463d1c09c9b
--- /dev/null
+++ b/tools/testing/selftests/mm/hugepage-mremap.c
@@ -0,0 +1,168 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-mremap:
+ *
+ * Example of remapping huge page memory in a user application using the
+ * mremap system call. The path to a file in a hugetlbfs filesystem must
+ * be passed as the last argument to this test. The amount of memory used
+ * by this test in MBs can optionally be passed as an argument. If no memory
+ * amount is passed, the default amount is 10MB.
+ *
+ * To make sure the test triggers pmd sharing and goes through the 'unshare'
+ * path in the mremap code use 1GB (1024) or more.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <fcntl.h> /* Definition of O_* constants */
+#include <sys/syscall.h> /* Definition of SYS_* constants */
+#include <linux/userfaultfd.h>
+#include <sys/ioctl.h>
+#include <string.h>
+#include <stdbool.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define DEFAULT_LENGTH_MB 10UL
+#define MB_TO_BYTES(x) (x * 1024 * 1024)
+
+#define PROTECTION (PROT_READ | PROT_WRITE | PROT_EXEC)
+#define FLAGS (MAP_SHARED | MAP_ANONYMOUS)
+
+static void check_bytes(char *addr)
+{
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
+}
+
+static void write_bytes(char *addr, size_t len)
+{
+ unsigned long i;
+
+ for (i = 0; i < len; i++)
+ *(addr + i) = (char)i;
+}
+
+static int read_bytes(char *addr, size_t len)
+{
+ unsigned long i;
+
+ check_bytes(addr);
+ for (i = 0; i < len; i++)
+ if (*(addr + i) != (char)i) {
+ ksft_print_msg("Mismatch at %lu\n", i);
+ return 1;
+ }
+ return 0;
+}
+
+static void register_region_with_uffd(char *addr, size_t len)
+{
+ long uffd; /* userfaultfd file descriptor */
+ struct uffdio_api uffdio_api;
+
+ /* Create and enable userfaultfd object. */
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd == -1)
+ ksft_exit_fail_msg("userfaultfd: %s\n", strerror(errno));
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == -1)
+ ksft_exit_fail_msg("ioctl-UFFDIO_API: %s\n", strerror(errno));
+
+ /* Create a private anonymous mapping. The memory will be
+ * demand-zero paged--that is, not yet allocated. When we
+ * actually touch the memory, it will be allocated via
+ * the userfaultfd.
+ */
+
+ addr = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
+
+ ksft_print_msg("Address returned by mmap() = %p\n", addr);
+
+ /* Register the memory range of the mapping we just created for
+ * handling by the userfaultfd object. In mode, we request to track
+ * missing pages (i.e., pages that have not yet been faulted in).
+ */
+ if (uffd_register(uffd, addr, len, true, false, false))
+ ksft_exit_fail_msg("ioctl-UFFDIO_REGISTER: %s\n", strerror(errno));
+}
+
+int main(int argc, char *argv[])
+{
+ size_t length = 0;
+ int ret = 0, fd;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (argc >= 2 && !strcmp(argv[1], "-h"))
+ ksft_exit_fail_msg("Usage: %s [length_in_MB]\n", argv[0]);
+
+ /* Read memory length as the first arg if valid, otherwise fallback to
+ * the default length.
+ */
+ if (argc >= 2)
+ length = (size_t)atoi(argv[1]);
+ else
+ length = DEFAULT_LENGTH_MB;
+
+ length = MB_TO_BYTES(length);
+ fd = memfd_create(argv[0], MFD_HUGETLB);
+ if (fd < 0)
+ ksft_exit_fail_msg("Open failed: %s\n", strerror(errno));
+
+ /* mmap to a PUD aligned address to hopefully trigger pmd sharing. */
+ unsigned long suggested_addr = 0x7eaa40000000;
+ void *haddr = mmap((void *)suggested_addr, length, PROTECTION,
+ MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ ksft_print_msg("Map haddr: Returned address is %p\n", haddr);
+ if (haddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap1: %s\n", strerror(errno));
+
+ /* mmap again to a dummy address to hopefully trigger pmd sharing. */
+ suggested_addr = 0x7daa40000000;
+ void *daddr = mmap((void *)suggested_addr, length, PROTECTION,
+ MAP_HUGETLB | MAP_SHARED | MAP_POPULATE, fd, 0);
+ ksft_print_msg("Map daddr: Returned address is %p\n", daddr);
+ if (daddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap3: %s\n", strerror(errno));
+
+ suggested_addr = 0x7faa40000000;
+ void *vaddr =
+ mmap((void *)suggested_addr, length, PROTECTION, FLAGS, -1, 0);
+ ksft_print_msg("Map vaddr: Returned address is %p\n", vaddr);
+ if (vaddr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap2: %s\n", strerror(errno));
+
+ register_region_with_uffd(haddr, length);
+
+ void *addr = mremap(haddr, length, length,
+ MREMAP_MAYMOVE | MREMAP_FIXED, vaddr);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mremap: %s\n", strerror(errno));
+
+ ksft_print_msg("Mremap: Returned address is %p\n", addr);
+ check_bytes(addr);
+ write_bytes(addr, length);
+ ret = read_bytes(addr, length);
+
+ munmap(addr, length);
+
+ addr = mremap(addr, length, length, 0);
+ if (addr != MAP_FAILED)
+ ksft_exit_fail_msg("mremap: Expected failure, but call succeeded\n");
+
+ close(fd);
+
+ ksft_test_result(!ret, "Read same data\n");
+ ksft_exit(!ret);
+}
diff --git a/tools/testing/selftests/vm/hugepage-shm.c b/tools/testing/selftests/mm/hugepage-shm.c
index e2527f32005b..478bb1e989e9 100644
--- a/tools/testing/selftests/vm/hugepage-shm.c
+++ b/tools/testing/selftests/mm/hugepage-shm.c
@@ -35,10 +35,6 @@
#include <sys/shm.h>
#include <sys/mman.h>
-#ifndef SHM_HUGETLB
-#define SHM_HUGETLB 04000
-#endif
-
#define LENGTH (256UL*1024*1024)
#define dprintf(x) printf(x)
diff --git a/tools/testing/selftests/mm/hugepage-vmemmap.c b/tools/testing/selftests/mm/hugepage-vmemmap.c
new file mode 100644
index 000000000000..894d28c3dd47
--- /dev/null
+++ b/tools/testing/selftests/mm/hugepage-vmemmap.c
@@ -0,0 +1,147 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case of using hugepage memory in a user application using the
+ * mmap system call with MAP_HUGETLB flag. Before running this program
+ * make sure the administrator has allocated enough default sized huge
+ * pages to cover the 2 MB allocation.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "vm_util.h"
+
+#define PAGE_COMPOUND_HEAD (1UL << 15)
+#define PAGE_COMPOUND_TAIL (1UL << 16)
+#define PAGE_HUGE (1UL << 17)
+
+#define HEAD_PAGE_FLAGS (PAGE_COMPOUND_HEAD | PAGE_HUGE)
+#define TAIL_PAGE_FLAGS (PAGE_COMPOUND_TAIL | PAGE_HUGE)
+
+#define PM_PFRAME_BITS 55
+#define PM_PFRAME_MASK ~((1UL << PM_PFRAME_BITS) - 1)
+
+/*
+ * For ia64 architecture, Linux kernel reserves Region number 4 for hugepages.
+ * That means the addresses starting with 0x800000... will need to be
+ * specified. Specifying a fixed address is not required on ppc64, i386
+ * or x86_64.
+ */
+#ifdef __ia64__
+#define MAP_ADDR (void *)(0x8000000000000000UL)
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_FIXED)
+#else
+#define MAP_ADDR NULL
+#define MAP_FLAGS (MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB)
+#endif
+
+static size_t pagesize;
+static size_t maplength;
+
+static void write_bytes(char *addr, size_t length)
+{
+ unsigned long i;
+
+ for (i = 0; i < length; i++)
+ *(addr + i) = (char)i;
+}
+
+static unsigned long virt_to_pfn(void *addr)
+{
+ int fd;
+ unsigned long pagemap;
+
+ fd = open("/proc/self/pagemap", O_RDONLY);
+ if (fd < 0)
+ return -1UL;
+
+ lseek(fd, (unsigned long)addr / pagesize * sizeof(pagemap), SEEK_SET);
+ read(fd, &pagemap, sizeof(pagemap));
+ close(fd);
+
+ return pagemap & ~PM_PFRAME_MASK;
+}
+
+static int check_page_flags(unsigned long pfn)
+{
+ int fd, i;
+ unsigned long pageflags;
+
+ fd = open("/proc/kpageflags", O_RDONLY);
+ if (fd < 0)
+ return -1;
+
+ lseek(fd, pfn * sizeof(pageflags), SEEK_SET);
+
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & HEAD_PAGE_FLAGS) != HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Head page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+
+ /*
+ * pages other than the first page must be tail and shouldn't be head;
+ * this also verifies kernel has correctly set the fake page_head to tail
+ * while hugetlb_free_vmemmap is enabled.
+ */
+ for (i = 1; i < maplength / pagesize; i++) {
+ read(fd, &pageflags, sizeof(pageflags));
+ if ((pageflags & TAIL_PAGE_FLAGS) != TAIL_PAGE_FLAGS ||
+ (pageflags & HEAD_PAGE_FLAGS) == HEAD_PAGE_FLAGS) {
+ close(fd);
+ printf("Tail page flags (%lx) is invalid\n", pageflags);
+ return -1;
+ }
+ }
+
+ close(fd);
+
+ return 0;
+}
+
+int main(int argc, char **argv)
+{
+ void *addr;
+ unsigned long pfn;
+
+ pagesize = psize();
+ maplength = default_huge_page_size();
+ if (!maplength) {
+ printf("Unable to determine huge page size\n");
+ exit(1);
+ }
+
+ addr = mmap(MAP_ADDR, maplength, PROT_READ | PROT_WRITE, MAP_FLAGS, -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* Trigger allocation of HugeTLB page. */
+ write_bytes(addr, maplength);
+
+ pfn = virt_to_pfn(addr);
+ if (pfn == -1UL) {
+ munmap(addr, maplength);
+ perror("virt_to_pfn");
+ exit(1);
+ }
+
+ printf("Returned address is %p whose pfn is %lx\n", addr, pfn);
+
+ if (check_page_flags(pfn) < 0) {
+ munmap(addr, maplength);
+ perror("check_page_flags");
+ exit(1);
+ }
+
+ /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
+ if (munmap(addr, maplength)) {
+ perror("munmap");
+ exit(1);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/hugetlb-madvise.c b/tools/testing/selftests/mm/hugetlb-madvise.c
new file mode 100644
index 000000000000..e74107185324
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-madvise.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * hugepage-madvise:
+ *
+ * Basic functional testing of madvise MADV_DONTNEED and MADV_REMOVE
+ * on hugetlb mappings.
+ *
+ * Before running this test, make sure the administrator has pre-allocated
+ * at least MIN_FREE_PAGES hugetlb pages and they are free. In addition,
+ * the test takes an argument that is the path to a file in a hugetlbfs
+ * filesystem. Therefore, a hugetlbfs filesystem must be mounted on some
+ * directory.
+ */
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <sys/mman.h>
+#include <fcntl.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MIN_FREE_PAGES 20
+#define NR_HUGE_PAGES 10 /* common number of pages to map/allocate */
+
+#define validate_free_pages(exp_free) \
+ do { \
+ int fhp = get_free_hugepages(); \
+ if (fhp != (exp_free)) { \
+ printf("Unexpected number of free huge " \
+ "pages line %d\n", __LINE__); \
+ exit(1); \
+ } \
+ } while (0)
+
+unsigned long huge_page_size;
+unsigned long base_page_size;
+
+void write_fault_pages(void *addr, unsigned long nr_pages)
+{
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(addr + (i * huge_page_size))) = i;
+}
+
+void read_fault_pages(void *addr, unsigned long nr_pages)
+{
+ volatile unsigned long dummy = 0;
+ unsigned long i;
+
+ for (i = 0; i < nr_pages; i++) {
+ dummy += *((unsigned long *)(addr + (i * huge_page_size)));
+
+ /* Prevent the compiler from optimizing out the entire loop: */
+ asm volatile("" : "+r" (dummy));
+ }
+}
+
+int main(int argc, char **argv)
+{
+ unsigned long free_hugepages;
+ void *addr, *addr2;
+ int fd;
+ int ret;
+
+ huge_page_size = default_huge_page_size();
+ if (!huge_page_size) {
+ printf("Unable to determine huge page size, exiting!\n");
+ exit(1);
+ }
+ base_page_size = sysconf(_SC_PAGE_SIZE);
+ if (!huge_page_size) {
+ printf("Unable to determine base page size, exiting!\n");
+ exit(1);
+ }
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages < MIN_FREE_PAGES) {
+ printf("Not enough free huge pages to test, exiting!\n");
+ exit(KSFT_SKIP);
+ }
+
+ fd = memfd_create(argv[0], MFD_HUGETLB);
+ if (fd < 0) {
+ perror("memfd_create() failed");
+ exit(1);
+ }
+
+ /*
+ * Test validity of MADV_DONTNEED addr and length arguments. mmap
+ * size is NR_HUGE_PAGES + 2. One page at the beginning and end of
+ * the mapping will be unmapped so we KNOW there is nothing mapped
+ * there.
+ */
+ addr = mmap(NULL, (NR_HUGE_PAGES + 2) * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ if (munmap(addr, huge_page_size) ||
+ munmap(addr + (NR_HUGE_PAGES + 1) * huge_page_size,
+ huge_page_size)) {
+ perror("munmap");
+ exit(1);
+ }
+ addr = addr + huge_page_size;
+
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr before mapping should fail */
+ ret = madvise(addr - base_page_size, NR_HUGE_PAGES * huge_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid addr line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length after mapping should fail */
+ ret = madvise(addr, (NR_HUGE_PAGES * huge_page_size) + base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with invalid length line %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test alignment of MADV_DONTNEED addr and length arguments
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* addr is not huge page size aligned and should fail */
+ ret = madvise(addr + base_page_size,
+ NR_HUGE_PAGES * huge_page_size - base_page_size,
+ MADV_DONTNEED);
+ if (!ret) {
+ printf("Unexpected success of madvise call with unaligned start address %d\n",
+ __LINE__);
+ exit(1);
+ }
+
+ /* addr + length should be aligned down to huge page size */
+ if (madvise(addr,
+ ((NR_HUGE_PAGES - 1) * huge_page_size) + base_page_size,
+ MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all but last page in mapping */
+ validate_free_pages(free_hugepages - 1);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+ validate_free_pages(free_hugepages);
+
+ /*
+ * Test MADV_DONTNEED on anonymous private mapping
+ */
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+
+ /* should free all pages in mapping */
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* read should not consume any pages */
+ read_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise should free private pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* writes should allocate private pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * The fallocate below certainly should free the pages associated
+ * with the file. However, pages in the private mapping are also
+ * freed. This is not the 'correct' behavior, but is expected
+ * because this is how it has worked since the initial hugetlb
+ * implementation.
+ */
+ if (fallocate(fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
+ 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_DONTNEED on shared mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* write should not consume any pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* madvise should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /*
+ * Test MADV_REMOVE on shared mapping of hugetlb file
+ *
+ * madvise is same as hole punch and should free all pages.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+
+ /*
+ * Test MADV_REMOVE on shared and private mapping of hugetlb file
+ */
+ if (fallocate(fd, 0, 0, NR_HUGE_PAGES * huge_page_size)) {
+ perror("fallocate");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED, fd, 0);
+ if (addr == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* shared write should not consume any additional pages */
+ write_fault_pages(addr, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ addr2 = mmap(NULL, NR_HUGE_PAGES * huge_page_size,
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, fd, 0);
+ if (addr2 == MAP_FAILED) {
+ perror("mmap");
+ exit(1);
+ }
+
+ /* private read should not consume any pages */
+ read_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of shared mapping should not free any pages */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /* madvise of private mapping should free private pages */
+ if (madvise(addr2, NR_HUGE_PAGES * huge_page_size, MADV_DONTNEED)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages - NR_HUGE_PAGES);
+
+ /* private write should consume additional pages again */
+ write_fault_pages(addr2, NR_HUGE_PAGES);
+ validate_free_pages(free_hugepages - (2 * NR_HUGE_PAGES));
+
+ /*
+ * madvise should free both file and private pages although this is
+ * not correct. private pages should not be freed, but this is
+ * expected. See comment associated with FALLOC_FL_PUNCH_HOLE call.
+ */
+ if (madvise(addr, NR_HUGE_PAGES * huge_page_size, MADV_REMOVE)) {
+ perror("madvise");
+ exit(1);
+ }
+ validate_free_pages(free_hugepages);
+
+ (void)munmap(addr, NR_HUGE_PAGES * huge_page_size);
+ (void)munmap(addr2, NR_HUGE_PAGES * huge_page_size);
+
+ close(fd);
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/hugetlb-read-hwpoison.c b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
new file mode 100644
index 000000000000..ba6cc6f9cabc
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb-read-hwpoison.c
@@ -0,0 +1,322 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+
+#include <linux/magic.h>
+#include <sys/mman.h>
+#include <sys/statfs.h>
+#include <errno.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#define PREFIX " ... "
+#define ERROR_PREFIX " !!! "
+
+#define MAX_WRITE_READ_CHUNK_SIZE (getpagesize() * 16)
+#define MAX(a, b) (((a) > (b)) ? (a) : (b))
+
+enum test_status {
+ TEST_PASSED = 0,
+ TEST_FAILED = 1,
+ TEST_SKIPPED = 2,
+};
+
+static char *status_to_str(enum test_status status)
+{
+ switch (status) {
+ case TEST_PASSED:
+ return "TEST_PASSED";
+ case TEST_FAILED:
+ return "TEST_FAILED";
+ case TEST_SKIPPED:
+ return "TEST_SKIPPED";
+ default:
+ return "TEST_???";
+ }
+}
+
+static int setup_filemap(char *filemap, size_t len, size_t wr_chunk_size)
+{
+ char iter = 0;
+
+ for (size_t offset = 0; offset < len;
+ offset += wr_chunk_size) {
+ iter++;
+ memset(filemap + offset, iter, wr_chunk_size);
+ }
+
+ return 0;
+}
+
+static bool verify_chunk(char *buf, size_t len, char val)
+{
+ size_t i;
+
+ for (i = 0; i < len; ++i) {
+ if (buf[i] != val) {
+ printf(PREFIX ERROR_PREFIX "check fail: buf[%lu] = %u != %u\n",
+ i, buf[i], val);
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static bool seek_read_hugepage_filemap(int fd, size_t len, size_t wr_chunk_size,
+ off_t offset, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = offset / wr_chunk_size + offset % wr_chunk_size;
+
+ printf(PREFIX PREFIX "init val=%u with offset=0x%lx\n", val, offset);
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ if (lseek(fd, offset, SEEK_SET) < 0) {
+ perror(PREFIX ERROR_PREFIX "seek failed");
+ return false;
+ }
+
+ while (offset + total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static bool read_hugepage_filemap(int fd, size_t len,
+ size_t wr_chunk_size, size_t expected)
+{
+ char buf[MAX_WRITE_READ_CHUNK_SIZE];
+ ssize_t ret_count = 0;
+ ssize_t total_ret_count = 0;
+ char val = 0;
+
+ printf(PREFIX PREFIX "expect to read 0x%lx bytes of data in total\n",
+ expected);
+ while (total_ret_count < len) {
+ ret_count = read(fd, buf, wr_chunk_size);
+ if (ret_count == 0) {
+ printf(PREFIX PREFIX "read reach end of the file\n");
+ break;
+ } else if (ret_count < 0) {
+ perror(PREFIX ERROR_PREFIX "read failed");
+ break;
+ }
+ ++val;
+ if (!verify_chunk(buf, ret_count, val))
+ return false;
+
+ total_ret_count += ret_count;
+ }
+ printf(PREFIX PREFIX "actually read 0x%lx bytes of data in total\n",
+ total_ret_count);
+
+ return total_ret_count == expected;
+}
+
+static enum test_status
+test_hugetlb_read(int fd, size_t len, size_t wr_chunk_size)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ if (read_hugepage_filemap(fd, len, wr_chunk_size, len))
+ status = TEST_PASSED;
+
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static enum test_status
+test_hugetlb_read_hwpoison(int fd, size_t len, size_t wr_chunk_size,
+ bool skip_hwpoison_page)
+{
+ enum test_status status = TEST_SKIPPED;
+ char *filemap = NULL;
+ char *hwp_addr = NULL;
+ const unsigned long pagesize = getpagesize();
+
+ if (ftruncate(fd, len) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate failed");
+ return status;
+ }
+
+ filemap = mmap(NULL, len, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (filemap == MAP_FAILED) {
+ perror(PREFIX ERROR_PREFIX "mmap for primary mapping failed");
+ goto done;
+ }
+
+ setup_filemap(filemap, len, wr_chunk_size);
+ status = TEST_FAILED;
+
+ /*
+ * Poisoned hugetlb page layout (assume hugepagesize=2MB):
+ * |<---------------------- 1MB ---------------------->|
+ * |<---- healthy page ---->|<---- HWPOISON page ----->|
+ * |<------------------- (1MB - 8KB) ----------------->|
+ */
+ hwp_addr = filemap + len / 2 + pagesize;
+ if (madvise(hwp_addr, pagesize, MADV_HWPOISON) < 0) {
+ perror(PREFIX ERROR_PREFIX "MADV_HWPOISON failed");
+ goto unmap;
+ }
+
+ if (!skip_hwpoison_page) {
+ /*
+ * Userspace should be able to read (1MB + 1 page) from
+ * the beginning of the HWPOISONed hugepage.
+ */
+ if (read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + pagesize))
+ status = TEST_PASSED;
+ } else {
+ /*
+ * Userspace should be able to read (1MB - 2 pages) from
+ * HWPOISONed hugepage.
+ */
+ if (seek_read_hugepage_filemap(fd, len, wr_chunk_size,
+ len / 2 + MAX(2 * pagesize, wr_chunk_size),
+ len / 2 - MAX(2 * pagesize, wr_chunk_size)))
+ status = TEST_PASSED;
+ }
+
+unmap:
+ munmap(filemap, len);
+done:
+ if (ftruncate(fd, 0) < 0) {
+ perror(PREFIX ERROR_PREFIX "ftruncate back to 0 failed");
+ status = TEST_FAILED;
+ }
+
+ return status;
+}
+
+static int create_hugetlbfs_file(struct statfs *file_stat)
+{
+ int fd;
+
+ fd = memfd_create("hugetlb_tmp", MFD_HUGETLB);
+ if (fd < 0) {
+ perror(PREFIX ERROR_PREFIX "could not open hugetlbfs file");
+ return -1;
+ }
+
+ memset(file_stat, 0, sizeof(*file_stat));
+ if (fstatfs(fd, file_stat)) {
+ perror(PREFIX ERROR_PREFIX "fstatfs failed");
+ goto close;
+ }
+ if (file_stat->f_type != HUGETLBFS_MAGIC) {
+ printf(PREFIX ERROR_PREFIX "not hugetlbfs file\n");
+ goto close;
+ }
+
+ return fd;
+close:
+ close(fd);
+ return -1;
+}
+
+int main(void)
+{
+ int fd;
+ struct statfs file_stat;
+ enum test_status status;
+ /* Test read() in different granularity. */
+ size_t wr_chunk_sizes[] = {
+ getpagesize() / 2, getpagesize(),
+ getpagesize() * 2, getpagesize() * 4
+ };
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(wr_chunk_sizes); ++i) {
+ printf("Write/read chunk size=0x%lx\n",
+ wr_chunk_sizes[i]);
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read regression test...\n");
+ status = test_hugetlb_read(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i]);
+ printf(PREFIX "HugeTLB read regression test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], false);
+ printf(PREFIX "HugeTLB read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+
+ fd = create_hugetlbfs_file(&file_stat);
+ if (fd < 0)
+ goto create_failure;
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...\n");
+ status = test_hugetlb_read_hwpoison(fd, file_stat.f_bsize,
+ wr_chunk_sizes[i], true);
+ printf(PREFIX "HugeTLB seek then read HWPOISON test...%s\n",
+ status_to_str(status));
+ close(fd);
+ if (status == TEST_FAILED)
+ return -1;
+ }
+
+ return 0;
+
+create_failure:
+ printf(ERROR_PREFIX "Abort test: failed to create hugetlbfs file\n");
+ return -1;
+}
diff --git a/tools/testing/selftests/mm/hugetlb_fault_after_madv.c b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
new file mode 100644
index 000000000000..73b81c632366
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_fault_after_madv.c
@@ -0,0 +1,73 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MMAP_SIZE (1 << 21)
+#define INLOOP_ITER 100
+
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+ char *ptr = (char *)huge_ptr;
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ ptr[0] = '.';
+
+ return NULL;
+}
+
+void *madv(void *unused)
+{
+ usleep(rand() % 10);
+
+ for (int i = 0; i < INLOOP_ITER; i++)
+ madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+
+ return NULL;
+}
+
+int main(void)
+{
+ unsigned long free_hugepages;
+ pthread_t thread1, thread2;
+ /*
+ * On kernel 6.4, we are able to reproduce the problem with ~1000
+ * interactions
+ */
+ int max = 10000;
+
+ srand(getpid());
+
+ free_hugepages = get_free_hugepages();
+ if (free_hugepages != 1) {
+ ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+ free_hugepages);
+ }
+
+ while (max--) {
+ huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((unsigned long)huge_ptr == -1)
+ ksft_exit_skip("Failed to allocated huge page\n");
+
+ pthread_create(&thread1, NULL, madv, NULL);
+ pthread_create(&thread2, NULL, touch, NULL);
+
+ pthread_join(thread1, NULL);
+ pthread_join(thread2, NULL);
+ munmap(huge_ptr, MMAP_SIZE);
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/mm/hugetlb_madv_vs_map.c b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
new file mode 100644
index 000000000000..d01e8d4901d0
--- /dev/null
+++ b/tools/testing/selftests/mm/hugetlb_madv_vs_map.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test case that must run on a system with one and only one huge page available.
+ * # echo 1 > /sys/kernel/mm/hugepages/hugepages-2048kB/nr_hugepages
+ *
+ * During setup, the test allocates the only available page, and starts three threads:
+ * - thread1:
+ * * madvise(MADV_DONTNEED) on the allocated huge page
+ * - thread 2:
+ * * Write to the allocated huge page
+ * - thread 3:
+ * * Try to allocated an extra huge page (which must not available)
+ *
+ * The test fails if thread3 is able to allocate a page.
+ *
+ * Touching the first page after thread3's allocation will raise a SIGBUS
+ *
+ * Author: Breno Leitao <leitao@debian.org>
+ */
+#include <pthread.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MMAP_SIZE (1 << 21)
+#define INLOOP_ITER 100
+
+char *huge_ptr;
+
+/* Touch the memory while it is being madvised() */
+void *touch(void *unused)
+{
+ for (int i = 0; i < INLOOP_ITER; i++)
+ huge_ptr[0] = '.';
+
+ return NULL;
+}
+
+void *madv(void *unused)
+{
+ for (int i = 0; i < INLOOP_ITER; i++)
+ madvise(huge_ptr, MMAP_SIZE, MADV_DONTNEED);
+
+ return NULL;
+}
+
+/*
+ * We got here, and there must be no huge page available for mapping
+ * The other hugepage should be flipping from used <-> reserved, because
+ * of madvise(DONTNEED).
+ */
+void *map_extra(void *unused)
+{
+ void *ptr;
+
+ for (int i = 0; i < INLOOP_ITER; i++) {
+ ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((long)ptr != -1) {
+ /* Touching the other page now will cause a SIGBUG
+ * huge_ptr[0] = '1';
+ */
+ return ptr;
+ }
+ }
+
+ return NULL;
+}
+
+int main(void)
+{
+ pthread_t thread1, thread2, thread3;
+ unsigned long free_hugepages;
+ void *ret;
+
+ /*
+ * On kernel 6.7, we are able to reproduce the problem with ~10
+ * interactions
+ */
+ int max = 10;
+
+ free_hugepages = get_free_hugepages();
+
+ if (free_hugepages != 1) {
+ ksft_exit_skip("This test needs one and only one page to execute. Got %lu\n",
+ free_hugepages);
+ }
+
+ while (max--) {
+ huge_ptr = mmap(NULL, MMAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB,
+ -1, 0);
+
+ if ((unsigned long)huge_ptr == -1) {
+ ksft_exit_skip("Failed to allocated huge page\n");
+ return KSFT_SKIP;
+ }
+
+ pthread_create(&thread1, NULL, madv, NULL);
+ pthread_create(&thread2, NULL, touch, NULL);
+ pthread_create(&thread3, NULL, map_extra, NULL);
+
+ pthread_join(thread1, NULL);
+ pthread_join(thread2, NULL);
+ pthread_join(thread3, &ret);
+
+ if (ret) {
+ ksft_test_result_fail("Unexpected huge page allocation\n");
+ return KSFT_FAIL;
+ }
+
+ /* Unmap and restart */
+ munmap(huge_ptr, MMAP_SIZE);
+ }
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
index d11d1febccc3..11f9bbe7dc22 100644..100755
--- a/tools/testing/selftests/vm/hugetlb_reparenting_test.sh
+++ b/tools/testing/selftests/mm/hugetlb_reparenting_test.sh
@@ -1,13 +1,17 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
set -e
if [[ $(id -u) -ne 0 ]]; then
echo "This test must be run as root. Skipping..."
- exit 0
+ exit $ksft_skip
fi
+nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
usage_file=usage_in_bytes
if [[ "$1" == "-cgroup-v2" ]]; then
@@ -15,19 +19,24 @@ if [[ "$1" == "-cgroup-v2" ]]; then
usage_file=current
fi
-CGROUP_ROOT='/dev/cgroup/memory'
-MNT='/mnt/huge/'
-if [[ ! -e $CGROUP_ROOT ]]; then
- mkdir -p $CGROUP_ROOT
- if [[ $cgroup2 ]]; then
+if [[ $cgroup2 ]]; then
+ CGROUP_ROOT=$(mount -t cgroup2 | head -1 | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup2 none $CGROUP_ROOT
- sleep 1
- echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
- else
+ do_umount=1
+ fi
+ echo "+hugetlb +memory" >$CGROUP_ROOT/cgroup.subtree_control
+else
+ CGROUP_ROOT=$(mount -t cgroup | grep ",hugetlb" | awk '{print $3}')
+ if [[ -z "$CGROUP_ROOT" ]]; then
+ CGROUP_ROOT=/dev/cgroup/memory
mount -t cgroup memory,hugetlb $CGROUP_ROOT
+ do_umount=1
fi
fi
+MNT='/mnt/huge/'
function get_machine_hugepage_size() {
hpz=$(grep -i hugepagesize /proc/meminfo)
@@ -240,5 +249,9 @@ cleanup
echo ALL PASS
-umount $CGROUP_ROOT
-rm -rf $CGROUP_ROOT
+if [[ $do_umount ]]; then
+ umount $CGROUP_ROOT
+ rm -rf $CGROUP_ROOT
+fi
+
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
diff --git a/tools/testing/selftests/mm/khugepaged.c b/tools/testing/selftests/mm/khugepaged.c
new file mode 100644
index 000000000000..829320a519e7
--- /dev/null
+++ b/tools/testing/selftests/mm/khugepaged.c
@@ -0,0 +1,1285 @@
+#define _GNU_SOURCE
+#include <ctype.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <dirent.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <sys/sysmacros.h>
+#include <sys/vfs.h>
+
+#include "linux/magic.h"
+
+#include "vm_util.h"
+#include "thp_settings.h"
+
+#define BASE_ADDR ((void *)(1UL << 30))
+static unsigned long hpage_pmd_size;
+static unsigned long page_size;
+static int hpage_pmd_nr;
+static int anon_order;
+
+#define PID_SMAPS "/proc/self/smaps"
+#define TEST_FILE "collapse_test_file"
+
+#define MAX_LINE_LENGTH 500
+
+enum vma_type {
+ VMA_ANON,
+ VMA_FILE,
+ VMA_SHMEM,
+};
+
+struct mem_ops {
+ void *(*setup_area)(int nr_hpages);
+ void (*cleanup_area)(void *p, unsigned long size);
+ void (*fault)(void *p, unsigned long start, unsigned long end);
+ bool (*check_huge)(void *addr, int nr_hpages);
+ const char *name;
+};
+
+static struct mem_ops *file_ops;
+static struct mem_ops *anon_ops;
+static struct mem_ops *shmem_ops;
+
+struct collapse_context {
+ void (*collapse)(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect);
+ bool enforce_pte_scan_limits;
+ const char *name;
+};
+
+static struct collapse_context *khugepaged_context;
+static struct collapse_context *madvise_context;
+
+struct file_info {
+ const char *dir;
+ char path[PATH_MAX];
+ enum vma_type type;
+ int fd;
+ char dev_queue_read_ahead_path[PATH_MAX];
+};
+
+static struct file_info finfo;
+static bool skip_settings_restore;
+static int exit_status;
+
+static void success(const char *msg)
+{
+ printf(" \e[32m%s\e[0m\n", msg);
+}
+
+static void fail(const char *msg)
+{
+ printf(" \e[31m%s\e[0m\n", msg);
+ exit_status++;
+}
+
+static void skip(const char *msg)
+{
+ printf(" \e[33m%s\e[0m\n", msg);
+}
+
+static void restore_settings_atexit(void)
+{
+ if (skip_settings_restore)
+ return;
+
+ printf("Restore THP and khugepaged settings...");
+ thp_restore_settings();
+ success("OK");
+
+ skip_settings_restore = true;
+}
+
+static void restore_settings(int sig)
+{
+ /* exit() will invoke the restore_settings_atexit handler. */
+ exit(sig ? EXIT_FAILURE : exit_status);
+}
+
+static void save_settings(void)
+{
+ printf("Save THP and khugepaged settings...");
+ if (file_ops && finfo.type == VMA_FILE)
+ thp_set_read_ahead_path(finfo.dev_queue_read_ahead_path);
+ thp_save_settings();
+
+ success("OK");
+
+ atexit(restore_settings_atexit);
+ signal(SIGTERM, restore_settings);
+ signal(SIGINT, restore_settings);
+ signal(SIGHUP, restore_settings);
+ signal(SIGQUIT, restore_settings);
+}
+
+static void get_finfo(const char *dir)
+{
+ struct stat path_stat;
+ struct statfs fs;
+ char buf[1 << 10];
+ char path[PATH_MAX];
+ char *str, *end;
+
+ finfo.dir = dir;
+ stat(finfo.dir, &path_stat);
+ if (!S_ISDIR(path_stat.st_mode)) {
+ printf("%s: Not a directory (%s)\n", __func__, finfo.dir);
+ exit(EXIT_FAILURE);
+ }
+ if (snprintf(finfo.path, sizeof(finfo.path), "%s/" TEST_FILE,
+ finfo.dir) >= sizeof(finfo.path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (statfs(finfo.dir, &fs)) {
+ perror("statfs()");
+ exit(EXIT_FAILURE);
+ }
+ finfo.type = fs.f_type == TMPFS_MAGIC ? VMA_SHMEM : VMA_FILE;
+ if (finfo.type == VMA_SHMEM)
+ return;
+
+ /* Find owning device's queue/read_ahead_kb control */
+ if (snprintf(path, sizeof(path), "/sys/dev/block/%d:%d/uevent",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file(read_num)");
+ exit(EXIT_FAILURE);
+ }
+ if (strstr(buf, "DEVTYPE=disk")) {
+ /* Found it */
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/dev/block/%d:%d/queue/read_ahead_kb",
+ major(path_stat.st_dev), minor(path_stat.st_dev))
+ >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ if (!strstr(buf, "DEVTYPE=partition")) {
+ printf("%s: Unknown device type: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Partition of block device - need to find actual device.
+ * Using naming convention that devnameN is partition of
+ * device devname.
+ */
+ str = strstr(buf, "DEVNAME=");
+ if (!str) {
+ printf("%s: Could not read: %s", __func__, path);
+ exit(EXIT_FAILURE);
+ }
+ str += 8;
+ end = str;
+ while (*end) {
+ if (isdigit(*end)) {
+ *end = '\0';
+ if (snprintf(finfo.dev_queue_read_ahead_path,
+ sizeof(finfo.dev_queue_read_ahead_path),
+ "/sys/block/%s/queue/read_ahead_kb",
+ str) >= sizeof(finfo.dev_queue_read_ahead_path)) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return;
+ }
+ ++end;
+ }
+ printf("%s: Could not read: %s\n", __func__, path);
+ exit(EXIT_FAILURE);
+}
+
+static bool check_swap(void *addr, unsigned long size)
+{
+ bool swap = false;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+
+ fp = fopen(PID_SMAPS, "r");
+ if (!fp) {
+ printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
+ exit(EXIT_FAILURE);
+ }
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
+ size >> 10);
+ if (ret >= MAX_LINE_LENGTH) {
+ printf("%s: Pattern is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ /*
+ * Fetch the Swap: in the same block and check whether it got
+ * the expected number of hugeepages next.
+ */
+ if (!check_for_pattern(fp, "Swap:", buffer, sizeof(buffer)))
+ goto err_out;
+
+ if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
+ goto err_out;
+
+ swap = true;
+err_out:
+ fclose(fp);
+ return swap;
+}
+
+static void *alloc_mapping(int nr)
+{
+ void *p;
+
+ p = mmap(BASE_ADDR, nr * hpage_pmd_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (p != BASE_ADDR) {
+ printf("Failed to allocate VMA at %p\n", BASE_ADDR);
+ exit(EXIT_FAILURE);
+ }
+
+ return p;
+}
+
+static void fill_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++)
+ p[i * page_size / sizeof(*p)] = i + 0xdead0000;
+}
+
+/*
+ * MADV_COLLAPSE is a best-effort request and may fail if an internal
+ * resource is temporarily unavailable, in which case it will set errno to
+ * EAGAIN. In such a case, immediately reattempt the operation one more
+ * time.
+ */
+static int madvise_collapse_retry(void *p, unsigned long size)
+{
+ bool retry = true;
+ int ret;
+
+retry:
+ ret = madvise(p, size, MADV_COLLAPSE);
+ if (ret && errno == EAGAIN && retry) {
+ retry = false;
+ goto retry;
+ }
+ return ret;
+}
+
+/*
+ * Returns pmd-mapped hugepage in VMA marked VM_HUGEPAGE, filled with
+ * validate_memory()'able contents.
+ */
+static void *alloc_hpage(struct mem_ops *ops)
+{
+ void *p = ops->setup_area(1);
+
+ ops->fault(p, 0, hpage_pmd_size);
+
+ /*
+ * VMA should be neither VM_HUGEPAGE nor VM_NOHUGEPAGE.
+ * The latter is ineligible for collapse by MADV_COLLAPSE
+ * while the former might cause MADV_COLLAPSE to race with
+ * khugepaged on low-load system (like a test machine), which
+ * would cause MADV_COLLAPSE to fail with EAGAIN.
+ */
+ printf("Allocate huge page...");
+ if (madvise_collapse_retry(p, hpage_pmd_size)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (!ops->check_huge(p, 1)) {
+ perror("madvise(MADV_COLLAPSE)");
+ exit(EXIT_FAILURE);
+ }
+ if (madvise(p, hpage_pmd_size, MADV_HUGEPAGE)) {
+ perror("madvise(MADV_HUGEPAGE)");
+ exit(EXIT_FAILURE);
+ }
+ success("OK");
+ return p;
+}
+
+static void validate_memory(int *p, unsigned long start, unsigned long end)
+{
+ int i;
+
+ for (i = start / page_size; i < end / page_size; i++) {
+ if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
+ printf("Page %d is corrupted: %#x\n",
+ i, p[i * page_size / sizeof(*p)]);
+ exit(EXIT_FAILURE);
+ }
+ }
+}
+
+static void *anon_setup_area(int nr_hpages)
+{
+ return alloc_mapping(nr_hpages);
+}
+
+static void anon_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+}
+
+static void anon_fault(void *p, unsigned long start, unsigned long end)
+{
+ fill_memory(p, start, end);
+}
+
+static bool anon_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_anon(addr, nr_hpages, hpage_pmd_size);
+}
+
+static void *file_setup_area(int nr_hpages)
+{
+ int fd;
+ void *p;
+ unsigned long size;
+
+ unlink(finfo.path); /* Cleanup from previous failed tests */
+ printf("Creating %s for collapse%s...", finfo.path,
+ finfo.type == VMA_SHMEM ? " (tmpfs)" : "");
+ fd = open(finfo.path, O_DSYNC | O_CREAT | O_RDWR | O_TRUNC | O_EXCL,
+ 777);
+ if (fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+
+ size = nr_hpages * hpage_pmd_size;
+ p = alloc_mapping(nr_hpages);
+ fill_memory(p, 0, size);
+ write(fd, p, size);
+ close(fd);
+ munmap(p, size);
+ success("OK");
+
+ printf("Opening %s read only for collapse...", finfo.path);
+ finfo.fd = open(finfo.path, O_RDONLY, 777);
+ if (finfo.fd < 0) {
+ perror("open()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_EXEC,
+ MAP_PRIVATE, finfo.fd, 0);
+ if (p == MAP_FAILED || p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Drop page cache */
+ write_file("/proc/sys/vm/drop_caches", "3", 2);
+ success("OK");
+ return p;
+}
+
+static void file_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+ unlink(finfo.path);
+}
+
+static void file_fault(void *p, unsigned long start, unsigned long end)
+{
+ if (madvise(((char *)p) + start, end - start, MADV_POPULATE_READ)) {
+ perror("madvise(MADV_POPULATE_READ");
+ exit(EXIT_FAILURE);
+ }
+}
+
+static bool file_check_huge(void *addr, int nr_hpages)
+{
+ switch (finfo.type) {
+ case VMA_FILE:
+ return check_huge_file(addr, nr_hpages, hpage_pmd_size);
+ case VMA_SHMEM:
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+ default:
+ exit(EXIT_FAILURE);
+ return false;
+ }
+}
+
+static void *shmem_setup_area(int nr_hpages)
+{
+ void *p;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ finfo.fd = memfd_create("khugepaged-selftest-collapse-shmem", 0);
+ if (finfo.fd < 0) {
+ perror("memfd_create()");
+ exit(EXIT_FAILURE);
+ }
+ if (ftruncate(finfo.fd, size)) {
+ perror("ftruncate()");
+ exit(EXIT_FAILURE);
+ }
+ p = mmap(BASE_ADDR, size, PROT_READ | PROT_WRITE, MAP_SHARED, finfo.fd,
+ 0);
+ if (p != BASE_ADDR) {
+ perror("mmap()");
+ exit(EXIT_FAILURE);
+ }
+ return p;
+}
+
+static void shmem_cleanup_area(void *p, unsigned long size)
+{
+ munmap(p, size);
+ close(finfo.fd);
+}
+
+static bool shmem_check_huge(void *addr, int nr_hpages)
+{
+ return check_huge_shmem(addr, nr_hpages, hpage_pmd_size);
+}
+
+static struct mem_ops __anon_ops = {
+ .setup_area = &anon_setup_area,
+ .cleanup_area = &anon_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &anon_check_huge,
+ .name = "anon",
+};
+
+static struct mem_ops __file_ops = {
+ .setup_area = &file_setup_area,
+ .cleanup_area = &file_cleanup_area,
+ .fault = &file_fault,
+ .check_huge = &file_check_huge,
+ .name = "file",
+};
+
+static struct mem_ops __shmem_ops = {
+ .setup_area = &shmem_setup_area,
+ .cleanup_area = &shmem_cleanup_area,
+ .fault = &anon_fault,
+ .check_huge = &shmem_check_huge,
+ .name = "shmem",
+};
+
+static void __madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ int ret;
+ struct thp_settings settings = *thp_current_settings();
+
+ printf("%s...", msg);
+
+ /*
+ * Prevent khugepaged interference and tests that MADV_COLLAPSE
+ * ignores /sys/kernel/mm/transparent_hugepage/enabled
+ */
+ settings.thp_enabled = THP_NEVER;
+ settings.shmem_enabled = SHMEM_NEVER;
+ thp_push_settings(&settings);
+
+ /* Clear VM_NOHUGEPAGE */
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+ ret = madvise_collapse_retry(p, nr_hpages * hpage_pmd_size);
+ if (((bool)ret) == expect)
+ fail("Fail: Bad return value");
+ else if (!ops->check_huge(p, expect ? nr_hpages : 0))
+ fail("Fail: check_huge()");
+ else
+ success("OK");
+
+ thp_pop_settings();
+}
+
+static void madvise_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ __madvise_collapse(msg, p, nr_hpages, ops, expect);
+}
+
+#define TICK 500000
+static bool wait_for_scan(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops)
+{
+ int full_scans;
+ int timeout = 6; /* 3 seconds */
+
+ /* Sanity check */
+ if (!ops->check_huge(p, 0)) {
+ printf("Unexpected huge page\n");
+ exit(EXIT_FAILURE);
+ }
+
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_HUGEPAGE);
+
+ /* Wait until the second full_scan completed */
+ full_scans = thp_read_num("khugepaged/full_scans") + 2;
+
+ printf("%s...", msg);
+ while (timeout--) {
+ if (ops->check_huge(p, nr_hpages))
+ break;
+ if (thp_read_num("khugepaged/full_scans") >= full_scans)
+ break;
+ printf(".");
+ usleep(TICK);
+ }
+
+ madvise(p, nr_hpages * hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ return timeout == -1;
+}
+
+static void khugepaged_collapse(const char *msg, char *p, int nr_hpages,
+ struct mem_ops *ops, bool expect)
+{
+ if (wait_for_scan(msg, p, nr_hpages, ops)) {
+ if (expect)
+ fail("Timeout");
+ else
+ success("OK");
+ return;
+ }
+
+ /*
+ * For file and shmem memory, khugepaged only retracts pte entries after
+ * putting the new hugepage in the page cache. The hugepage must be
+ * subsequently refaulted to install the pmd mapping for the mm.
+ */
+ if (ops != &__anon_ops)
+ ops->fault(p, 0, nr_hpages * hpage_pmd_size);
+
+ if (ops->check_huge(p, expect ? nr_hpages : 0))
+ success("OK");
+ else
+ fail("Fail");
+}
+
+static struct collapse_context __khugepaged_context = {
+ .collapse = &khugepaged_collapse,
+ .enforce_pte_scan_limits = true,
+ .name = "khugepaged",
+};
+
+static struct collapse_context __madvise_context = {
+ .collapse = &madvise_collapse,
+ .enforce_pte_scan_limits = false,
+ .name = "madvise",
+};
+
+static bool is_tmpfs(struct mem_ops *ops)
+{
+ return ops == &__file_ops && finfo.type == VMA_SHMEM;
+}
+
+static bool is_anon(struct mem_ops *ops)
+{
+ return ops == &__anon_ops;
+}
+
+static void alloc_at_fault(void)
+{
+ struct thp_settings settings = *thp_current_settings();
+ char *p;
+
+ settings.thp_enabled = THP_ALWAYS;
+ thp_push_settings(&settings);
+
+ p = alloc_mapping(1);
+ *p = 1;
+ printf("Allocate huge page on fault...");
+ if (check_huge_anon(p, 1, hpage_pmd_size))
+ success("OK");
+ else
+ fail("Fail");
+
+ thp_pop_settings();
+
+ madvise(p, page_size, MADV_DONTNEED);
+ printf("Split huge PMD on MADV_DONTNEED...");
+ if (check_huge_anon(p, 0, hpage_pmd_size))
+ success("OK");
+ else
+ fail("Fail");
+ munmap(p, hpage_pmd_size);
+}
+
+static void collapse_full(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 4;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+ c->collapse("Collapse multiple fully populated PTE table", p, nr_hpages,
+ ops, true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void collapse_empty(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ c->collapse("Do not collapse empty PTE table", p, 1, ops, false);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, page_size);
+ c->collapse("Collapse PTE table with single PTE entry present", p,
+ 1, ops, true);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_none(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_none = hpage_pmd_nr / 2;
+ struct thp_settings settings = *thp_current_settings();
+ void *p;
+ int fault_nr_pages = is_anon(ops) ? 1 << anon_order : 1;
+
+ settings.khugepaged.max_ptes_none = max_ptes_none;
+ thp_push_settings(&settings);
+
+ p = ops->setup_area(1);
+
+ if (is_tmpfs(ops)) {
+ /* shmem pages always in the page cache */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
+ c->collapse("Maybe collapse with max_ptes_none exceeded", p, 1,
+ ops, !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - fault_nr_pages) * page_size);
+
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
+ c->collapse("Collapse with max_ptes_none PTEs empty", p, 1, ops,
+ true);
+ validate_memory(p, 0,
+ (hpage_pmd_nr - max_ptes_none) * page_size);
+ }
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+ thp_pop_settings();
+}
+
+static void collapse_swapin_single_pte(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+
+ printf("Swapout one page...");
+ if (madvise(p, page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Collapse with swapping in single PTE entry", p, 1, ops,
+ true);
+ validate_memory(p, 0, hpage_pmd_size);
+out:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_swap(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap");
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+
+ printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
+ if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Maybe collapse with max_ptes_swap exceeded", p, 1, ops,
+ !c->enforce_pte_scan_limits);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ if (c->enforce_pte_scan_limits) {
+ ops->fault(p, 0, hpage_pmd_size);
+ printf("Swapout %d of %d pages...", max_ptes_swap,
+ hpage_pmd_nr);
+ if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
+ perror("madvise(MADV_PAGEOUT)");
+ exit(EXIT_FAILURE);
+ }
+ if (check_swap(p, max_ptes_swap * page_size)) {
+ success("OK");
+ } else {
+ fail("Fail");
+ goto out;
+ }
+
+ c->collapse("Collapse with max_ptes_swap pages swapped out", p,
+ 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ }
+out:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_single_pte_entry_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = alloc_hpage(ops);
+
+ if (is_tmpfs(ops)) {
+ /* MADV_DONTNEED won't evict tmpfs pages */
+ printf("tmpfs...");
+ skip("Skip");
+ goto skip;
+ }
+
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ printf("Split huge page leaving single PTE mapping compound page...");
+ madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table with single PTE mapping compound page",
+ p, 1, ops, true);
+ validate_memory(p, 0, page_size);
+skip:
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_full_of_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Split huge page leaving single PTE page table full of compound pages...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table full of compound pages", p, 1, ops,
+ true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_compound_extreme(struct collapse_context *c, struct mem_ops *ops)
+{
+ void *p;
+ int i;
+
+ p = ops->setup_area(1);
+ for (i = 0; i < hpage_pmd_nr; i++) {
+ printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
+ i + 1, hpage_pmd_nr);
+
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
+ ops->fault(BASE_ADDR, 0, hpage_pmd_size);
+ if (!ops->check_huge(BASE_ADDR, 1)) {
+ printf("Failed to allocate huge page\n");
+ exit(EXIT_FAILURE);
+ }
+ madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
+
+ p = mremap(BASE_ADDR - i * page_size,
+ i * page_size + hpage_pmd_size,
+ (i + 1) * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR + 2 * hpage_pmd_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+unmap");
+ exit(EXIT_FAILURE);
+ }
+
+ p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
+ (i + 1) * page_size,
+ (i + 1) * page_size + hpage_pmd_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED,
+ BASE_ADDR - (i + 1) * page_size);
+ if (p == MAP_FAILED) {
+ perror("mremap+alloc");
+ exit(EXIT_FAILURE);
+ }
+ }
+
+ ops->cleanup_area(BASE_ADDR, hpage_pmd_size);
+ ops->fault(p, 0, hpage_pmd_size);
+ if (!ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse PTE table full of different compound pages", p, 1,
+ ops, true);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_fork(struct collapse_context *c, struct mem_ops *ops)
+{
+ int wstatus;
+ void *p;
+
+ p = ops->setup_area(1);
+
+ printf("Allocate small page...");
+ ops->fault(p, 0, page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Share small page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ ops->fault(p, page_size, 2 * page_size);
+ c->collapse("Collapse PTE table with single page shared with parent process",
+ p, 1, ops, true);
+
+ validate_memory(p, 0, page_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has small page...");
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, page_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_fork_compound(struct collapse_context *c, struct mem_ops *ops)
+{
+ int wstatus;
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Split huge page PMD in child process...");
+ madvise(p, page_size, MADV_NOHUGEPAGE);
+ madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+ ops->fault(p, 0, page_size);
+
+ thp_write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
+ c->collapse("Collapse PTE table full of compound pages in child",
+ p, 1, ops, true);
+ thp_write_num("khugepaged/max_ptes_shared",
+ thp_current_settings()->khugepaged.max_ptes_shared);
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void collapse_max_ptes_shared(struct collapse_context *c, struct mem_ops *ops)
+{
+ int max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared");
+ int wstatus;
+ void *p;
+
+ p = alloc_hpage(ops);
+ printf("Share huge page over fork()...");
+ if (!fork()) {
+ /* Do not touch settings on child exit */
+ skip_settings_restore = true;
+ exit_status = 0;
+
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Maybe collapse with max_ptes_shared exceeded", p,
+ 1, ops, !c->enforce_pte_scan_limits);
+
+ if (c->enforce_pte_scan_limits) {
+ printf("Trigger CoW on page %d of %d...",
+ hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
+ ops->fault(p, 0, (hpage_pmd_nr - max_ptes_shared) *
+ page_size);
+ if (ops->check_huge(p, 0))
+ success("OK");
+ else
+ fail("Fail");
+
+ c->collapse("Collapse with max_ptes_shared PTEs shared",
+ p, 1, ops, true);
+ }
+
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+ exit(exit_status);
+ }
+
+ wait(&wstatus);
+ exit_status += WEXITSTATUS(wstatus);
+
+ printf("Check if parent still has huge page...");
+ if (ops->check_huge(p, 1))
+ success("OK");
+ else
+ fail("Fail");
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+static void madvise_collapse_existing_thps(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+
+ p = ops->setup_area(1);
+ ops->fault(p, 0, hpage_pmd_size);
+ c->collapse("Collapse fully populated PTE table...", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+
+ /* c->collapse() will find a hugepage and complain - call directly. */
+ __madvise_collapse("Re-collapse PMD-mapped hugepage", p, 1, ops, true);
+ validate_memory(p, 0, hpage_pmd_size);
+ ops->cleanup_area(p, hpage_pmd_size);
+}
+
+/*
+ * Test race with khugepaged where page tables have been retracted and
+ * pmd cleared.
+ */
+static void madvise_retracted_page_tables(struct collapse_context *c,
+ struct mem_ops *ops)
+{
+ void *p;
+ int nr_hpages = 1;
+ unsigned long size = nr_hpages * hpage_pmd_size;
+
+ p = ops->setup_area(nr_hpages);
+ ops->fault(p, 0, size);
+
+ /* Let khugepaged collapse and leave pmd cleared */
+ if (wait_for_scan("Collapse and leave PMD cleared", p, nr_hpages,
+ ops)) {
+ fail("Timeout");
+ return;
+ }
+ success("OK");
+ c->collapse("Install huge PMD from page cache", p, nr_hpages, ops,
+ true);
+ validate_memory(p, 0, size);
+ ops->cleanup_area(p, size);
+}
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./khugepaged [OPTIONS] <test type> [dir]\n\n");
+ fprintf(stderr, "\t<test type>\t: <context>:<mem_type>\n");
+ fprintf(stderr, "\t<context>\t: [all|khugepaged|madvise]\n");
+ fprintf(stderr, "\t<mem_type>\t: [all|anon|file|shmem]\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires [dir] argument\n");
+ fprintf(stderr, "\n\t\"file,all\" mem_type requires kernel built with\n");
+ fprintf(stderr, "\tCONFIG_READ_ONLY_THP_FOR_FS=y\n");
+ fprintf(stderr, "\n\tif [dir] is a (sub)directory of a tmpfs mount, tmpfs must be\n");
+ fprintf(stderr, "\tmounted with huge=madvise option for khugepaged tests to work\n");
+ fprintf(stderr, "\n\tSupported Options:\n");
+ fprintf(stderr, "\t\t-h: This help message.\n");
+ fprintf(stderr, "\t\t-s: mTHP size, expressed as page order.\n");
+ fprintf(stderr, "\t\t Defaults to 0. Use this size for anon allocations.\n");
+ exit(1);
+}
+
+static void parse_test_type(int argc, char **argv)
+{
+ int opt;
+ char *buf;
+ const char *token;
+
+ while ((opt = getopt(argc, argv, "s:h")) != -1) {
+ switch (opt) {
+ case 's':
+ anon_order = atoi(optarg);
+ break;
+ case 'h':
+ default:
+ usage();
+ }
+ }
+
+ argv += optind;
+ argc -= optind;
+
+ if (argc == 0) {
+ /* Backwards compatibility */
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ anon_ops = &__anon_ops;
+ return;
+ }
+
+ buf = strdup(argv[0]);
+ token = strsep(&buf, ":");
+
+ if (!strcmp(token, "all")) {
+ khugepaged_context = &__khugepaged_context;
+ madvise_context = &__madvise_context;
+ } else if (!strcmp(token, "khugepaged")) {
+ khugepaged_context = &__khugepaged_context;
+ } else if (!strcmp(token, "madvise")) {
+ madvise_context = &__madvise_context;
+ } else {
+ usage();
+ }
+
+ if (!buf)
+ usage();
+
+ if (!strcmp(buf, "all")) {
+ file_ops = &__file_ops;
+ anon_ops = &__anon_ops;
+ shmem_ops = &__shmem_ops;
+ } else if (!strcmp(buf, "anon")) {
+ anon_ops = &__anon_ops;
+ } else if (!strcmp(buf, "file")) {
+ file_ops = &__file_ops;
+ } else if (!strcmp(buf, "shmem")) {
+ shmem_ops = &__shmem_ops;
+ } else {
+ usage();
+ }
+
+ if (!file_ops)
+ return;
+
+ if (argc != 2)
+ usage();
+
+ get_finfo(argv[1]);
+}
+
+int main(int argc, char **argv)
+{
+ int hpage_pmd_order;
+ struct thp_settings default_settings = {
+ .thp_enabled = THP_MADVISE,
+ .thp_defrag = THP_DEFRAG_ALWAYS,
+ .shmem_enabled = SHMEM_ADVISE,
+ .use_zero_page = 0,
+ .khugepaged = {
+ .defrag = 1,
+ .alloc_sleep_millisecs = 10,
+ .scan_sleep_millisecs = 10,
+ },
+ /*
+ * When testing file-backed memory, the collapse path
+ * looks at how many pages are found in the page cache, not
+ * what pages are mapped. Disable read ahead optimization so
+ * pages don't find their way into the page cache unless
+ * we mem_ops->fault() them in.
+ */
+ .read_ahead_kb = 0,
+ };
+
+ parse_test_type(argc, argv);
+
+ setbuf(stdout, NULL);
+
+ page_size = getpagesize();
+ hpage_pmd_size = read_pmd_pagesize();
+ if (!hpage_pmd_size) {
+ printf("Reading PMD pagesize failed");
+ exit(EXIT_FAILURE);
+ }
+ hpage_pmd_nr = hpage_pmd_size / page_size;
+ hpage_pmd_order = __builtin_ctz(hpage_pmd_nr);
+
+ default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
+ default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
+ default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
+ default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
+ default_settings.hugepages[hpage_pmd_order].enabled = THP_INHERIT;
+ default_settings.hugepages[anon_order].enabled = THP_ALWAYS;
+
+ save_settings();
+ thp_push_settings(&default_settings);
+
+ alloc_at_fault();
+
+#define TEST(t, c, o) do { \
+ if (c && o) { \
+ printf("\nRun test: " #t " (%s:%s)\n", c->name, o->name); \
+ t(c, o); \
+ } \
+ } while (0)
+
+ TEST(collapse_full, khugepaged_context, anon_ops);
+ TEST(collapse_full, khugepaged_context, file_ops);
+ TEST(collapse_full, khugepaged_context, shmem_ops);
+ TEST(collapse_full, madvise_context, anon_ops);
+ TEST(collapse_full, madvise_context, file_ops);
+ TEST(collapse_full, madvise_context, shmem_ops);
+
+ TEST(collapse_empty, khugepaged_context, anon_ops);
+ TEST(collapse_empty, madvise_context, anon_ops);
+
+ TEST(collapse_single_pte_entry, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry, khugepaged_context, shmem_ops);
+ TEST(collapse_single_pte_entry, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry, madvise_context, file_ops);
+ TEST(collapse_single_pte_entry, madvise_context, shmem_ops);
+
+ TEST(collapse_max_ptes_none, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_none, khugepaged_context, file_ops);
+ TEST(collapse_max_ptes_none, madvise_context, anon_ops);
+ TEST(collapse_max_ptes_none, madvise_context, file_ops);
+
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, khugepaged_context, file_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, anon_ops);
+ TEST(collapse_single_pte_entry_compound, madvise_context, file_ops);
+
+ TEST(collapse_full_of_compound, khugepaged_context, anon_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, file_ops);
+ TEST(collapse_full_of_compound, khugepaged_context, shmem_ops);
+ TEST(collapse_full_of_compound, madvise_context, anon_ops);
+ TEST(collapse_full_of_compound, madvise_context, file_ops);
+ TEST(collapse_full_of_compound, madvise_context, shmem_ops);
+
+ TEST(collapse_compound_extreme, khugepaged_context, anon_ops);
+ TEST(collapse_compound_extreme, madvise_context, anon_ops);
+
+ TEST(collapse_swapin_single_pte, khugepaged_context, anon_ops);
+ TEST(collapse_swapin_single_pte, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_swap, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_swap, madvise_context, anon_ops);
+
+ TEST(collapse_fork, khugepaged_context, anon_ops);
+ TEST(collapse_fork, madvise_context, anon_ops);
+
+ TEST(collapse_fork_compound, khugepaged_context, anon_ops);
+ TEST(collapse_fork_compound, madvise_context, anon_ops);
+
+ TEST(collapse_max_ptes_shared, khugepaged_context, anon_ops);
+ TEST(collapse_max_ptes_shared, madvise_context, anon_ops);
+
+ TEST(madvise_collapse_existing_thps, madvise_context, anon_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, file_ops);
+ TEST(madvise_collapse_existing_thps, madvise_context, shmem_ops);
+
+ TEST(madvise_retracted_page_tables, madvise_context, file_ops);
+ TEST(madvise_retracted_page_tables, madvise_context, shmem_ops);
+
+ restore_settings(0);
+}
diff --git a/tools/testing/selftests/mm/ksm_functional_tests.c b/tools/testing/selftests/mm/ksm_functional_tests.c
new file mode 100644
index 000000000000..d615767e396b
--- /dev/null
+++ b/tools/testing/selftests/mm/ksm_functional_tests.c
@@ -0,0 +1,650 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * KSM functional tests
+ *
+ * Copyright 2022, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <linux/userfaultfd.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define KiB 1024u
+#define MiB (1024 * KiB)
+#define FORK_EXEC_CHILD_PRG_NAME "ksm_fork_exec_child"
+
+static int mem_fd;
+static int ksm_fd;
+static int ksm_full_scans_fd;
+static int proc_self_ksm_stat_fd;
+static int proc_self_ksm_merging_pages_fd;
+static int ksm_use_zero_pages_fd;
+static int pagemap_fd;
+static size_t pagesize;
+
+static bool range_maps_duplicates(char *addr, unsigned long size)
+{
+ unsigned long offs_a, offs_b, pfn_a, pfn_b;
+
+ /*
+ * There is no easy way to check if there are KSM pages mapped into
+ * this range. We only check that the range does not map the same PFN
+ * twice by comparing each pair of mapped pages.
+ */
+ for (offs_a = 0; offs_a < size; offs_a += pagesize) {
+ pfn_a = pagemap_get_pfn(pagemap_fd, addr + offs_a);
+ /* Page not present or PFN not exposed by the kernel. */
+ if (pfn_a == -1ul || !pfn_a)
+ continue;
+
+ for (offs_b = offs_a + pagesize; offs_b < size;
+ offs_b += pagesize) {
+ pfn_b = pagemap_get_pfn(pagemap_fd, addr + offs_b);
+ if (pfn_b == -1ul || !pfn_b)
+ continue;
+ if (pfn_a == pfn_b)
+ return true;
+ }
+ }
+ return false;
+}
+
+static long get_my_ksm_zero_pages(void)
+{
+ char buf[200];
+ char *substr_ksm_zero;
+ size_t value_pos;
+ ssize_t read_size;
+ unsigned long my_ksm_zero_pages;
+
+ if (!proc_self_ksm_stat_fd)
+ return 0;
+
+ read_size = pread(proc_self_ksm_stat_fd, buf, sizeof(buf) - 1, 0);
+ if (read_size < 0)
+ return -errno;
+
+ buf[read_size] = 0;
+
+ substr_ksm_zero = strstr(buf, "ksm_zero_pages");
+ if (!substr_ksm_zero)
+ return 0;
+
+ value_pos = strcspn(substr_ksm_zero, "0123456789");
+ my_ksm_zero_pages = strtol(substr_ksm_zero + value_pos, NULL, 10);
+
+ return my_ksm_zero_pages;
+}
+
+static long get_my_merging_pages(void)
+{
+ char buf[10];
+ ssize_t ret;
+
+ if (proc_self_ksm_merging_pages_fd < 0)
+ return proc_self_ksm_merging_pages_fd;
+
+ ret = pread(proc_self_ksm_merging_pages_fd, buf, sizeof(buf) - 1, 0);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+static long ksm_get_full_scans(void)
+{
+ char buf[10];
+ ssize_t ret;
+
+ ret = pread(ksm_full_scans_fd, buf, sizeof(buf) - 1, 0);
+ if (ret <= 0)
+ return -errno;
+ buf[ret] = 0;
+
+ return strtol(buf, NULL, 10);
+}
+
+static int ksm_merge(void)
+{
+ long start_scans, end_scans;
+
+ /* Wait for two full scans such that any possible merging happened. */
+ start_scans = ksm_get_full_scans();
+ if (start_scans < 0)
+ return start_scans;
+ if (write(ksm_fd, "1", 1) != 1)
+ return -errno;
+ do {
+ end_scans = ksm_get_full_scans();
+ if (end_scans < 0)
+ return end_scans;
+ } while (end_scans < start_scans + 2);
+
+ return 0;
+}
+
+static int ksm_unmerge(void)
+{
+ if (write(ksm_fd, "2", 1) != 1)
+ return -errno;
+ return 0;
+}
+
+static char *mmap_and_merge_range(char val, unsigned long size, int prot,
+ bool use_prctl)
+{
+ char *map;
+ int ret;
+
+ /* Stabilize accounting by disabling KSM completely. */
+ if (ksm_unmerge()) {
+ ksft_test_result_fail("Disabling (unmerging) KSM failed\n");
+ return MAP_FAILED;
+ }
+
+ if (get_my_merging_pages() > 0) {
+ ksft_test_result_fail("Still pages merged\n");
+ return MAP_FAILED;
+ }
+
+ map = mmap(NULL, size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (map == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return MAP_FAILED;
+ }
+
+ /* Don't use THP. Ignore if THP are not around on a kernel. */
+ if (madvise(map, size, MADV_NOHUGEPAGE) && errno != EINVAL) {
+ ksft_test_result_fail("MADV_NOHUGEPAGE failed\n");
+ goto unmap;
+ }
+
+ /* Make sure each page contains the same values to merge them. */
+ memset(map, val, size);
+
+ if (mprotect(map, size, prot)) {
+ ksft_test_result_skip("mprotect() failed\n");
+ goto unmap;
+ }
+
+ if (use_prctl) {
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ goto unmap;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ goto unmap;
+ }
+ } else if (madvise(map, size, MADV_MERGEABLE)) {
+ ksft_test_result_fail("MADV_MERGEABLE failed\n");
+ goto unmap;
+ }
+
+ /* Run KSM to trigger merging and wait. */
+ if (ksm_merge()) {
+ ksft_test_result_fail("Running KSM failed\n");
+ goto unmap;
+ }
+
+ /*
+ * Check if anything was merged at all. Ignore the zero page that is
+ * accounted differently (depending on kernel support).
+ */
+ if (val && !get_my_merging_pages()) {
+ ksft_test_result_fail("No pages got merged\n");
+ goto unmap;
+ }
+
+ return map;
+unmap:
+ munmap(map, size);
+ return MAP_FAILED;
+}
+
+static void test_unmerge(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ if (map == MAP_FAILED)
+ return;
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
+static void test_unmerge_zero_pages(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ unsigned int offs;
+ unsigned long pages_expected;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ if (proc_self_ksm_stat_fd < 0) {
+ ksft_test_result_skip("open(\"/proc/self/ksm_stat\") failed\n");
+ return;
+ }
+ if (ksm_use_zero_pages_fd < 0) {
+ ksft_test_result_skip("open \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ return;
+ }
+ if (write(ksm_use_zero_pages_fd, "1", 1) != 1) {
+ ksft_test_result_skip("write \"/sys/kernel/mm/ksm/use_zero_pages\" failed\n");
+ return;
+ }
+
+ /* Let KSM deduplicate zero pages. */
+ map = mmap_and_merge_range(0x00, size, PROT_READ | PROT_WRITE, false);
+ if (map == MAP_FAILED)
+ return;
+
+ /* Check if ksm_zero_pages is updated correctly after KSM merging */
+ pages_expected = size / pagesize;
+ if (pages_expected != get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after merging\n");
+ goto unmap;
+ }
+
+ /* Try to unmerge half of the region */
+ if (madvise(map, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ /* Check if ksm_zero_pages is updated correctly after unmerging */
+ pages_expected /= 2;
+ if (pages_expected != get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after unmerging\n");
+ goto unmap;
+ }
+
+ /* Trigger unmerging of the other half by writing to the pages. */
+ for (offs = size / 2; offs < size; offs += pagesize)
+ *((unsigned int *)&map[offs]) = offs;
+
+ /* Now we should have no zeropages remaining. */
+ if (get_my_ksm_zero_pages()) {
+ ksft_test_result_fail("'ksm_zero_pages' updated after write fault\n");
+ goto unmap;
+ }
+
+ /* Check if ksm zero pages are really unmerged */
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "KSM zero pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
+static void test_unmerge_discarded(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ if (map == MAP_FAILED)
+ return;
+
+ /* Discard half of all mapped pages so we have pte_none() entries. */
+ if (madvise(map, size / 2, MADV_DONTNEED)) {
+ ksft_test_result_fail("MADV_DONTNEED failed\n");
+ goto unmap;
+ }
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
+#ifdef __NR_userfaultfd
+static void test_unmerge_uffd_wp(void)
+{
+ struct uffdio_writeprotect uffd_writeprotect;
+ const unsigned int size = 2 * MiB;
+ struct uffdio_api uffdio_api;
+ char *map;
+ int uffd;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, false);
+ if (map == MAP_FAILED)
+ return;
+
+ /* See if UFFD is around. */
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_skip("__NR_userfaultfd failed\n");
+ goto unmap;
+ }
+
+ /* See if UFFD-WP is around. */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_PAGEFAULT_FLAG_WP;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ ksft_test_result_fail("UFFDIO_API failed\n");
+ goto close_uffd;
+ }
+ if (!(uffdio_api.features & UFFD_FEATURE_PAGEFAULT_FLAG_WP)) {
+ ksft_test_result_skip("UFFD_FEATURE_PAGEFAULT_FLAG_WP not available\n");
+ goto close_uffd;
+ }
+
+ /* Register UFFD-WP, no need for an actual handler. */
+ if (uffd_register(uffd, map, size, false, true, false)) {
+ ksft_test_result_fail("UFFDIO_REGISTER_MODE_WP failed\n");
+ goto close_uffd;
+ }
+
+ /* Write-protect the range using UFFD-WP. */
+ uffd_writeprotect.range.start = (unsigned long) map;
+ uffd_writeprotect.range.len = size;
+ uffd_writeprotect.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &uffd_writeprotect)) {
+ ksft_test_result_fail("UFFDIO_WRITEPROTECT failed\n");
+ goto close_uffd;
+ }
+
+ if (madvise(map, size, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto close_uffd;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+close_uffd:
+ close(uffd);
+unmap:
+ munmap(map, size);
+}
+#endif
+
+/* Verify that KSM can be enabled / queried with prctl. */
+static void test_prctl(void)
+{
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret < 0) {
+ ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
+ return;
+ } else if (ret != 1) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 not effective\n");
+ return;
+ }
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ret = prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0);
+ if (ret < 0) {
+ ksft_test_result_fail("PR_GET_MEMORY_MERGE failed\n");
+ return;
+ } else if (ret != 0) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 not effective\n");
+ return;
+ }
+
+ ksft_test_result_pass("Setting/clearing PR_SET_MEMORY_MERGE works\n");
+}
+
+/* Verify that prctl ksm flag is inherited. */
+static void test_prctl_fork(void)
+{
+ int ret, status;
+ pid_t child_pid;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ child_pid = fork();
+ if (!child_pid) {
+ exit(prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0));
+ } else if (child_pid < 0) {
+ ksft_test_result_fail("fork() failed\n");
+ return;
+ }
+
+ if (waitpid(child_pid, &status, 0) < 0) {
+ ksft_test_result_fail("waitpid() failed\n");
+ return;
+ } else if (WEXITSTATUS(status) != 1) {
+ ksft_test_result_fail("unexpected PR_GET_MEMORY_MERGE result in child\n");
+ return;
+ }
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
+static int ksm_fork_exec_child(void)
+{
+ /* Test if KSM is enabled for the process. */
+ return prctl(PR_GET_MEMORY_MERGE, 0, 0, 0, 0) == 1;
+}
+
+static void test_prctl_fork_exec(void)
+{
+ int ret, status;
+ pid_t child_pid;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ ret = prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0);
+ if (ret < 0 && errno == EINVAL) {
+ ksft_test_result_skip("PR_SET_MEMORY_MERGE not supported\n");
+ return;
+ } else if (ret) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=1 failed\n");
+ return;
+ }
+
+ child_pid = fork();
+ if (child_pid == -1) {
+ ksft_test_result_skip("fork() failed\n");
+ return;
+ } else if (child_pid == 0) {
+ char *prg_name = "./ksm_functional_tests";
+ char *argv_for_program[] = { prg_name, FORK_EXEC_CHILD_PRG_NAME };
+
+ execv(prg_name, argv_for_program);
+ return;
+ }
+
+ if (waitpid(child_pid, &status, 0) > 0) {
+ if (WIFEXITED(status)) {
+ status = WEXITSTATUS(status);
+ if (status) {
+ ksft_test_result_fail("KSM not enabled\n");
+ return;
+ }
+ } else {
+ ksft_test_result_fail("program didn't terminate normally\n");
+ return;
+ }
+ } else {
+ ksft_test_result_fail("waitpid() failed\n");
+ return;
+ }
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ return;
+ }
+
+ ksft_test_result_pass("PR_SET_MEMORY_MERGE value is inherited\n");
+}
+
+static void test_prctl_unmerge(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0xcf, size, PROT_READ | PROT_WRITE, true);
+ if (map == MAP_FAILED)
+ return;
+
+ if (prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0)) {
+ ksft_test_result_fail("PR_SET_MEMORY_MERGE=0 failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
+static void test_prot_none(void)
+{
+ const unsigned int size = 2 * MiB;
+ char *map;
+ int i;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ map = mmap_and_merge_range(0x11, size, PROT_NONE, false);
+ if (map == MAP_FAILED)
+ goto unmap;
+
+ /* Store a unique value in each page on one half using ptrace */
+ for (i = 0; i < size / 2; i += pagesize) {
+ lseek(mem_fd, (uintptr_t) map + i, SEEK_SET);
+ if (write(mem_fd, &i, sizeof(i)) != sizeof(i)) {
+ ksft_test_result_fail("ptrace write failed\n");
+ goto unmap;
+ }
+ }
+
+ /* Trigger unsharing on the other half. */
+ if (madvise(map + size / 2, size / 2, MADV_UNMERGEABLE)) {
+ ksft_test_result_fail("MADV_UNMERGEABLE failed\n");
+ goto unmap;
+ }
+
+ ksft_test_result(!range_maps_duplicates(map, size),
+ "Pages were unmerged\n");
+unmap:
+ munmap(map, size);
+}
+
+int main(int argc, char **argv)
+{
+ unsigned int tests = 8;
+ int err;
+
+ if (argc > 1 && !strcmp(argv[1], FORK_EXEC_CHILD_PRG_NAME)) {
+ exit(ksm_fork_exec_child() == 1 ? 0 : 1);
+ }
+
+#ifdef __NR_userfaultfd
+ tests++;
+#endif
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ pagesize = getpagesize();
+
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ ksm_fd = open("/sys/kernel/mm/ksm/run", O_RDWR);
+ if (ksm_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/run\") failed\n");
+ ksm_full_scans_fd = open("/sys/kernel/mm/ksm/full_scans", O_RDONLY);
+ if (ksm_full_scans_fd < 0)
+ ksft_exit_skip("open(\"/sys/kernel/mm/ksm/full_scans\") failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_skip("open(\"/proc/self/pagemap\") failed\n");
+ proc_self_ksm_stat_fd = open("/proc/self/ksm_stat", O_RDONLY);
+ proc_self_ksm_merging_pages_fd = open("/proc/self/ksm_merging_pages",
+ O_RDONLY);
+ ksm_use_zero_pages_fd = open("/sys/kernel/mm/ksm/use_zero_pages", O_RDWR);
+
+ test_unmerge();
+ test_unmerge_zero_pages();
+ test_unmerge_discarded();
+#ifdef __NR_userfaultfd
+ test_unmerge_uffd_wp();
+#endif
+
+ test_prot_none();
+
+ test_prctl();
+ test_prctl_fork();
+ test_prctl_fork_exec();
+ test_prctl_unmerge();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/ksm_tests.c b/tools/testing/selftests/mm/ksm_tests.c
new file mode 100644
index 000000000000..b748c48908d9
--- /dev/null
+++ b/tools/testing/selftests/mm/ksm_tests.c
@@ -0,0 +1,948 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <stdbool.h>
+#include <time.h>
+#include <string.h>
+#include <numa.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <err.h>
+
+#include "../kselftest.h"
+#include <include/vdso/time64.h>
+#include "vm_util.h"
+
+#define KSM_SYSFS_PATH "/sys/kernel/mm/ksm/"
+#define KSM_FP(s) (KSM_SYSFS_PATH s)
+#define KSM_SCAN_LIMIT_SEC_DEFAULT 120
+#define KSM_PAGE_COUNT_DEFAULT 10l
+#define KSM_PROT_STR_DEFAULT "rw"
+#define KSM_USE_ZERO_PAGES_DEFAULT false
+#define KSM_MERGE_ACROSS_NODES_DEFAULT true
+#define KSM_MERGE_TYPE_DEFAULT 0
+#define MB (1ul << 20)
+
+struct ksm_sysfs {
+ unsigned long max_page_sharing;
+ unsigned long merge_across_nodes;
+ unsigned long pages_to_scan;
+ unsigned long run;
+ unsigned long sleep_millisecs;
+ unsigned long stable_node_chains_prune_millisecs;
+ unsigned long use_zero_pages;
+};
+
+enum ksm_merge_type {
+ KSM_MERGE_MADVISE,
+ KSM_MERGE_PRCTL,
+ KSM_MERGE_LAST = KSM_MERGE_PRCTL
+};
+
+enum ksm_test_name {
+ CHECK_KSM_MERGE,
+ CHECK_KSM_UNMERGE,
+ CHECK_KSM_GET_MERGE_TYPE,
+ CHECK_KSM_ZERO_PAGE_MERGE,
+ CHECK_KSM_NUMA_MERGE,
+ KSM_MERGE_TIME,
+ KSM_MERGE_TIME_HUGE_PAGES,
+ KSM_UNMERGE_TIME,
+ KSM_COW_TIME
+};
+
+int debug;
+
+static int ksm_write_sysfs(const char *file_path, unsigned long val)
+{
+ FILE *f = fopen(file_path, "w");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fprintf(f, "%lu", val) < 0) {
+ perror("fprintf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static int ksm_read_sysfs(const char *file_path, unsigned long *val)
+{
+ FILE *f = fopen(file_path, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_path);
+ perror("fopen");
+ return 1;
+ }
+ if (fscanf(f, "%lu", val) != 1) {
+ perror("fscanf");
+ fclose(f);
+ return 1;
+ }
+ fclose(f);
+
+ return 0;
+}
+
+static void ksm_print_sysfs(void)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+ unsigned long full_scans, pages_unshared, pages_volatile;
+ unsigned long stable_node_chains, stable_node_dups;
+ long general_profit;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing) ||
+ ksm_read_sysfs(KSM_FP("full_scans"), &full_scans) ||
+ ksm_read_sysfs(KSM_FP("pages_unshared"), &pages_unshared) ||
+ ksm_read_sysfs(KSM_FP("pages_volatile"), &pages_volatile) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains"), &stable_node_chains) ||
+ ksm_read_sysfs(KSM_FP("stable_node_dups"), &stable_node_dups) ||
+ ksm_read_sysfs(KSM_FP("general_profit"), (unsigned long *)&general_profit))
+ return;
+
+ printf("pages_shared : %lu\n", pages_shared);
+ printf("pages_sharing : %lu\n", pages_sharing);
+ printf("max_page_sharing : %lu\n", max_page_sharing);
+ printf("full_scans : %lu\n", full_scans);
+ printf("pages_unshared : %lu\n", pages_unshared);
+ printf("pages_volatile : %lu\n", pages_volatile);
+ printf("stable_node_chains: %lu\n", stable_node_chains);
+ printf("stable_node_dups : %lu\n", stable_node_dups);
+ printf("general_profit : %ld\n", general_profit);
+}
+
+static void ksm_print_procfs(void)
+{
+ const char *file_name = "/proc/self/ksm_stat";
+ char buffer[512];
+ FILE *f = fopen(file_name, "r");
+
+ if (!f) {
+ fprintf(stderr, "f %s\n", file_name);
+ perror("fopen");
+ return;
+ }
+
+ while (fgets(buffer, sizeof(buffer), f))
+ printf("%s", buffer);
+
+ fclose(f);
+}
+
+static int str_to_prot(char *prot_str)
+{
+ int prot = 0;
+
+ if ((strchr(prot_str, 'r')) != NULL)
+ prot |= PROT_READ;
+ if ((strchr(prot_str, 'w')) != NULL)
+ prot |= PROT_WRITE;
+ if ((strchr(prot_str, 'x')) != NULL)
+ prot |= PROT_EXEC;
+
+ return prot;
+}
+
+static void print_help(void)
+{
+ printf("usage: ksm_tests [-h] <test type> [-a prot] [-p page_count] [-l timeout]\n"
+ "[-z use_zero_pages] [-m merge_across_nodes] [-s size]\n");
+
+ printf("Supported <test type>:\n"
+ " -M (page merging)\n"
+ " -Z (zero pages merging)\n"
+ " -N (merging of pages in different NUMA nodes)\n"
+ " -U (page unmerging)\n"
+ " -P evaluate merging time and speed.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -H evaluate merging time and speed of area allocated mostly with huge pages\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -D evaluate unmerging time and speed when disabling KSM.\n"
+ " For this test, the size of duplicated memory area (in MiB)\n"
+ " must be provided using -s option\n"
+ " -C evaluate the time required to break COW of merged pages.\n\n");
+
+ printf(" -a: specify the access protections of pages.\n"
+ " <prot> must be of the form [rwx].\n"
+ " Default: %s\n", KSM_PROT_STR_DEFAULT);
+ printf(" -p: specify the number of pages to test.\n"
+ " Default: %ld\n", KSM_PAGE_COUNT_DEFAULT);
+ printf(" -l: limit the maximum running time (in seconds) for a test.\n"
+ " Default: %d seconds\n", KSM_SCAN_LIMIT_SEC_DEFAULT);
+ printf(" -z: change use_zero_pages tunable\n"
+ " Default: %d\n", KSM_USE_ZERO_PAGES_DEFAULT);
+ printf(" -m: change merge_across_nodes tunable\n"
+ " Default: %d\n", KSM_MERGE_ACROSS_NODES_DEFAULT);
+ printf(" -d: turn debugging output on\n");
+ printf(" -s: the size of duplicated memory area (in MiB)\n");
+ printf(" -t: KSM merge type\n"
+ " Default: 0\n"
+ " 0: madvise merging\n"
+ " 1: prctl merging\n");
+
+ exit(0);
+}
+
+static void *allocate_memory(void *ptr, int prot, int mapping, char data, size_t map_size)
+{
+ void *map_ptr = mmap(ptr, map_size, PROT_WRITE, mapping, -1, 0);
+
+ if (!map_ptr) {
+ perror("mmap");
+ return NULL;
+ }
+ memset(map_ptr, data, map_size);
+ if (mprotect(map_ptr, map_size, prot)) {
+ perror("mprotect");
+ munmap(map_ptr, map_size);
+ return NULL;
+ }
+
+ return map_ptr;
+}
+
+static int ksm_do_scan(int scan_count, struct timespec start_time, int timeout)
+{
+ struct timespec cur_time;
+ unsigned long cur_scan, init_scan;
+
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &init_scan))
+ return 1;
+ cur_scan = init_scan;
+
+ while (cur_scan < init_scan + scan_count) {
+ if (ksm_read_sysfs(KSM_FP("full_scans"), &cur_scan))
+ return 1;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time)) {
+ perror("clock_gettime");
+ return 1;
+ }
+ if ((cur_time.tv_sec - start_time.tv_sec) > timeout) {
+ printf("Scan time limit exceeded\n");
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int ksm_merge_pages(int merge_type, void *addr, size_t size,
+ struct timespec start_time, int timeout)
+{
+ if (merge_type == KSM_MERGE_MADVISE) {
+ if (madvise(addr, size, MADV_MERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ } else if (merge_type == KSM_MERGE_PRCTL) {
+ if (prctl(PR_SET_MEMORY_MERGE, 1, 0, 0, 0)) {
+ perror("prctl");
+ return 1;
+ }
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 1))
+ return 1;
+
+ /* Since merging occurs only after 2 scans, make sure to get at least 2 full scans */
+ if (ksm_do_scan(2, start_time, timeout))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_unmerge_pages(void *addr, size_t size,
+ struct timespec start_time, int timeout)
+{
+ if (madvise(addr, size, MADV_UNMERGEABLE)) {
+ perror("madvise");
+ return 1;
+ }
+ return 0;
+}
+
+static bool assert_ksm_pages_count(long dupl_page_count)
+{
+ unsigned long max_page_sharing, pages_sharing, pages_shared;
+
+ if (ksm_read_sysfs(KSM_FP("pages_shared"), &pages_shared) ||
+ ksm_read_sysfs(KSM_FP("pages_sharing"), &pages_sharing) ||
+ ksm_read_sysfs(KSM_FP("max_page_sharing"), &max_page_sharing))
+ return false;
+
+ if (debug) {
+ ksm_print_sysfs();
+ ksm_print_procfs();
+ }
+
+ /*
+ * Since there must be at least 2 pages for merging and 1 page can be
+ * shared with the limited number of pages (max_page_sharing), sometimes
+ * there are 'leftover' pages that cannot be merged. For example, if there
+ * are 11 pages and max_page_sharing = 10, then only 10 pages will be
+ * merged and the 11th page won't be affected. As a result, when the number
+ * of duplicate pages is divided by max_page_sharing and the remainder is 1,
+ * pages_shared and pages_sharing values will be equal between dupl_page_count
+ * and dupl_page_count - 1.
+ */
+ if (dupl_page_count % max_page_sharing == 1 || dupl_page_count % max_page_sharing == 0) {
+ if (pages_shared == dupl_page_count / max_page_sharing &&
+ pages_sharing == pages_shared * (max_page_sharing - 1))
+ return true;
+ } else {
+ if (pages_shared == (dupl_page_count / max_page_sharing + 1) &&
+ pages_sharing == dupl_page_count - pages_shared)
+ return true;
+ }
+
+ return false;
+}
+
+static int ksm_save_def(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_read_sysfs(KSM_FP("max_page_sharing"), &ksm_sysfs->max_page_sharing) ||
+ numa_available() ? 0 :
+ ksm_read_sysfs(KSM_FP("merge_across_nodes"), &ksm_sysfs->merge_across_nodes) ||
+ ksm_read_sysfs(KSM_FP("sleep_millisecs"), &ksm_sysfs->sleep_millisecs) ||
+ ksm_read_sysfs(KSM_FP("pages_to_scan"), &ksm_sysfs->pages_to_scan) ||
+ ksm_read_sysfs(KSM_FP("run"), &ksm_sysfs->run) ||
+ ksm_read_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ &ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_read_sysfs(KSM_FP("use_zero_pages"), &ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int ksm_restore(struct ksm_sysfs *ksm_sysfs)
+{
+ if (ksm_write_sysfs(KSM_FP("max_page_sharing"), ksm_sysfs->max_page_sharing) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), ksm_sysfs->merge_across_nodes) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), ksm_sysfs->pages_to_scan) ||
+ ksm_write_sysfs(KSM_FP("run"), ksm_sysfs->run) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), ksm_sysfs->sleep_millisecs) ||
+ ksm_write_sysfs(KSM_FP("stable_node_chains_prune_millisecs"),
+ ksm_sysfs->stable_node_chains_prune_millisecs) ||
+ ksm_write_sysfs(KSM_FP("use_zero_pages"), ksm_sysfs->use_zero_pages))
+ return 1;
+
+ return 0;
+}
+
+static int check_ksm_merge(int merge_type, int mapping, int prot,
+ long page_count, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* verify that the right number of pages are merged */
+ if (assert_ksm_pages_count(page_count)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ if (merge_type == KSM_MERGE_PRCTL)
+ prctl(PR_SET_MEMORY_MERGE, 0, 0, 0, 0);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_unmerge(int merge_type, int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ /* fill pages with the same data and merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /* change 1 byte in each of the 2 pages -- KSM must automatically unmerge them */
+ memset(map_ptr, '-', 1);
+ memset(map_ptr + page_size, '+', 1);
+
+ /* get at least 1 scan, so KSM can detect that the pages were modified */
+ if (ksm_do_scan(1, start_time, timeout))
+ goto err_out;
+
+ /* check that unmerging was successful and 0 pages are currently merged */
+ if (assert_ksm_pages_count(0)) {
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+ }
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int check_ksm_zero_page_merge(int merge_type, int mapping, int prot, long page_count,
+ int timeout, bool use_zero_pages, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("use_zero_pages"), use_zero_pages))
+ return KSFT_FAIL;
+
+ /* fill pages with zero and try to merge them */
+ map_ptr = allocate_memory(NULL, prot, mapping, 0, page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if use_zero_pages is set to 1, empty pages are merged
+ * with the kernel zero page instead of with each other;
+ * 2) if use_zero_pages is set to 0, empty pages are not treated specially
+ * and merged as usual.
+ */
+ if (use_zero_pages && !assert_ksm_pages_count(0))
+ goto err_out;
+ else if (!use_zero_pages && !assert_ksm_pages_count(page_count))
+ goto err_out;
+
+ printf("OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+static int get_next_mem_node(int node)
+{
+
+ long node_size;
+ int mem_node = 0;
+ int i, max_node = numa_max_node();
+
+ for (i = node + 1; i <= max_node + node; i++) {
+ mem_node = i % (max_node + 1);
+ node_size = numa_node_size(mem_node, NULL);
+ if (node_size > 0)
+ break;
+ }
+ return mem_node;
+}
+
+static int get_first_mem_node(void)
+{
+ return get_next_mem_node(numa_max_node());
+}
+
+static int check_ksm_numa_merge(int merge_type, int mapping, int prot, int timeout,
+ bool merge_across_nodes, size_t page_size)
+{
+ void *numa1_map_ptr, *numa2_map_ptr;
+ struct timespec start_time;
+ int page_count = 2;
+ int first_node;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ if (numa_available() < 0) {
+ perror("NUMA support not enabled");
+ return KSFT_SKIP;
+ }
+ if (numa_num_configured_nodes() <= 1) {
+ printf("At least 2 NUMA nodes must be available\n");
+ return KSFT_SKIP;
+ }
+ if (ksm_write_sysfs(KSM_FP("merge_across_nodes"), merge_across_nodes))
+ return KSFT_FAIL;
+
+ /* allocate 2 pages in 2 different NUMA nodes and fill them with the same data */
+ first_node = get_first_mem_node();
+ numa1_map_ptr = numa_alloc_onnode(page_size, first_node);
+ numa2_map_ptr = numa_alloc_onnode(page_size, get_next_mem_node(first_node));
+ if (!numa1_map_ptr || !numa2_map_ptr) {
+ perror("numa_alloc_onnode");
+ return KSFT_FAIL;
+ }
+
+ memset(numa1_map_ptr, '*', page_size);
+ memset(numa2_map_ptr, '*', page_size);
+
+ /* try to merge the pages */
+ if (ksm_merge_pages(merge_type, numa1_map_ptr, page_size, start_time, timeout) ||
+ ksm_merge_pages(merge_type, numa2_map_ptr, page_size, start_time, timeout))
+ goto err_out;
+
+ /*
+ * verify that the right number of pages are merged:
+ * 1) if merge_across_nodes was enabled, 2 duplicate pages will be merged;
+ * 2) if merge_across_nodes = 0, there must be 0 merged pages, since there is
+ * only 1 unique page in each node and they can't be shared.
+ */
+ if (merge_across_nodes && !assert_ksm_pages_count(page_count))
+ goto err_out;
+ else if (!merge_across_nodes && !assert_ksm_pages_count(0))
+ goto err_out;
+
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("OK\n");
+ return KSFT_PASS;
+
+err_out:
+ numa_free(numa1_map_ptr, page_size);
+ numa_free(numa2_map_ptr, page_size);
+ printf("Not OK\n");
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_hugepages_time(int merge_type, int mapping, int prot,
+ int timeout, size_t map_size)
+{
+ void *map_ptr, *map_ptr_orig;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+ int pagemap_fd, n_normal_pages, n_huge_pages;
+
+ map_size *= MB;
+ size_t len = map_size;
+
+ len -= len % HPAGE_SIZE;
+ map_ptr_orig = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
+ map_ptr = map_ptr_orig + HPAGE_SIZE - (uintptr_t)map_ptr_orig % HPAGE_SIZE;
+
+ if (map_ptr_orig == MAP_FAILED)
+ err(2, "initial mmap");
+
+ if (madvise(map_ptr, len, MADV_HUGEPAGE))
+ err(2, "MADV_HUGEPAGE");
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ err(2, "open pagemap");
+
+ n_normal_pages = 0;
+ n_huge_pages = 0;
+ for (void *p = map_ptr; p < map_ptr + len; p += HPAGE_SIZE) {
+ if (allocate_transhuge(p, pagemap_fd) < 0)
+ n_normal_pages++;
+ else
+ n_huge_pages++;
+ }
+ printf("Number of normal pages: %d\n", n_normal_pages);
+ printf("Number of huge pages: %d\n", n_huge_pages);
+
+ memset(map_ptr, '*', len);
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr_orig, len + HPAGE_SIZE);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr_orig, len + HPAGE_SIZE);
+ return KSFT_FAIL;
+}
+
+static int ksm_merge_time(int merge_type, int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_unmerge_time(int merge_type, int mapping, int prot, int timeout, size_t map_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long scan_time_ns;
+
+ map_size *= MB;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', map_size);
+ if (!map_ptr)
+ return KSFT_FAIL;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, map_size, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ if (ksm_unmerge_pages(map_ptr, map_size, start_time, timeout))
+ goto err_out;
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ scan_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n", map_size / MB);
+ printf("Total time: %ld.%09ld s\n", scan_time_ns / NSEC_PER_SEC,
+ scan_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", (map_size / MB) /
+ ((double)scan_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, map_size);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, map_size);
+ return KSFT_FAIL;
+}
+
+static int ksm_cow_time(int merge_type, int mapping, int prot, int timeout, size_t page_size)
+{
+ void *map_ptr;
+ struct timespec start_time, end_time;
+ unsigned long cow_time_ns;
+
+ /* page_count must be less than 2*page_size */
+ size_t page_count = 4000;
+
+ map_ptr = allocate_memory(NULL, prot, mapping, '*', page_size * page_count);
+ if (!map_ptr)
+ return KSFT_FAIL;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ return KSFT_FAIL;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Total size: %lu MiB\n\n", (page_size * page_count) / MB);
+ printf("Not merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ /* Create 2000 pairs of duplicate pages */
+ for (size_t i = 0; i < page_count - 1; i = i + 2) {
+ memset(map_ptr + page_size * i, '+', i / 2 + 1);
+ memset(map_ptr + page_size * (i + 1), '+', i / 2 + 1);
+ }
+ if (ksm_merge_pages(merge_type, map_ptr, page_size * page_count, start_time, timeout))
+ goto err_out;
+
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &start_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+ for (size_t i = 0; i < page_count - 1; i = i + 2)
+ memset(map_ptr + page_size * i, '-', 1);
+ if (clock_gettime(CLOCK_MONOTONIC_RAW, &end_time)) {
+ perror("clock_gettime");
+ goto err_out;
+ }
+
+ cow_time_ns = (end_time.tv_sec - start_time.tv_sec) * NSEC_PER_SEC +
+ (end_time.tv_nsec - start_time.tv_nsec);
+
+ printf("Merged pages:\n");
+ printf("Total time: %ld.%09ld s\n", cow_time_ns / NSEC_PER_SEC,
+ cow_time_ns % NSEC_PER_SEC);
+ printf("Average speed: %.3f MiB/s\n", ((page_size * (page_count / 2)) / MB) /
+ ((double)cow_time_ns / NSEC_PER_SEC));
+
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_PASS;
+
+err_out:
+ printf("Not OK\n");
+ munmap(map_ptr, page_size * page_count);
+ return KSFT_FAIL;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret, opt;
+ int prot = 0;
+ int ksm_scan_limit_sec = KSM_SCAN_LIMIT_SEC_DEFAULT;
+ int merge_type = KSM_MERGE_TYPE_DEFAULT;
+ long page_count = KSM_PAGE_COUNT_DEFAULT;
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ struct ksm_sysfs ksm_sysfs_old;
+ int test_name = CHECK_KSM_MERGE;
+ bool use_zero_pages = KSM_USE_ZERO_PAGES_DEFAULT;
+ bool merge_across_nodes = KSM_MERGE_ACROSS_NODES_DEFAULT;
+ long size_MB = 0;
+
+ while ((opt = getopt(argc, argv, "dha:p:l:z:m:s:t:MUZNPCHD")) != -1) {
+ switch (opt) {
+ case 'a':
+ prot = str_to_prot(optarg);
+ break;
+ case 'p':
+ page_count = atol(optarg);
+ if (page_count <= 0) {
+ printf("The number of pages must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'l':
+ ksm_scan_limit_sec = atoi(optarg);
+ if (ksm_scan_limit_sec <= 0) {
+ printf("Timeout value must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 'h':
+ print_help();
+ break;
+ case 'z':
+ if (strcmp(optarg, "0") == 0)
+ use_zero_pages = 0;
+ else
+ use_zero_pages = 1;
+ break;
+ case 'm':
+ if (strcmp(optarg, "0") == 0)
+ merge_across_nodes = 0;
+ else
+ merge_across_nodes = 1;
+ break;
+ case 'd':
+ debug = 1;
+ break;
+ case 's':
+ size_MB = atoi(optarg);
+ if (size_MB <= 0) {
+ printf("Size must be greater than 0\n");
+ return KSFT_FAIL;
+ }
+ break;
+ case 't':
+ {
+ int tmp = atoi(optarg);
+
+ if (tmp < 0 || tmp > KSM_MERGE_LAST) {
+ printf("Invalid merge type\n");
+ return KSFT_FAIL;
+ }
+ merge_type = tmp;
+ }
+ break;
+ case 'M':
+ break;
+ case 'U':
+ test_name = CHECK_KSM_UNMERGE;
+ break;
+ case 'Z':
+ test_name = CHECK_KSM_ZERO_PAGE_MERGE;
+ break;
+ case 'N':
+ test_name = CHECK_KSM_NUMA_MERGE;
+ break;
+ case 'P':
+ test_name = KSM_MERGE_TIME;
+ break;
+ case 'H':
+ test_name = KSM_MERGE_TIME_HUGE_PAGES;
+ break;
+ case 'D':
+ test_name = KSM_UNMERGE_TIME;
+ break;
+ case 'C':
+ test_name = KSM_COW_TIME;
+ break;
+ default:
+ return KSFT_FAIL;
+ }
+ }
+
+ if (prot == 0)
+ prot = str_to_prot(KSM_PROT_STR_DEFAULT);
+
+ if (access(KSM_SYSFS_PATH, F_OK)) {
+ printf("Config KSM not enabled\n");
+ return KSFT_SKIP;
+ }
+
+ if (ksm_save_def(&ksm_sysfs_old)) {
+ printf("Cannot save default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ if (ksm_write_sysfs(KSM_FP("run"), 2) ||
+ ksm_write_sysfs(KSM_FP("sleep_millisecs"), 0) ||
+ numa_available() ? 0 :
+ ksm_write_sysfs(KSM_FP("merge_across_nodes"), 1) ||
+ ksm_write_sysfs(KSM_FP("pages_to_scan"), page_count))
+ return KSFT_FAIL;
+
+ switch (test_name) {
+ case CHECK_KSM_MERGE:
+ ret = check_ksm_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot, page_count,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_UNMERGE:
+ ret = check_ksm_unmerge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, page_size);
+ break;
+ case CHECK_KSM_ZERO_PAGE_MERGE:
+ ret = check_ksm_zero_page_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ page_count, ksm_scan_limit_sec, use_zero_pages,
+ page_size);
+ break;
+ case CHECK_KSM_NUMA_MERGE:
+ ret = check_ksm_numa_merge(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, merge_across_nodes, page_size);
+ break;
+ case KSM_MERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_MERGE_TIME_HUGE_PAGES:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_merge_hugepages_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_UNMERGE_TIME:
+ if (size_MB == 0) {
+ printf("Option '-s' is required.\n");
+ return KSFT_FAIL;
+ }
+ ret = ksm_unmerge_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, size_MB);
+ break;
+ case KSM_COW_TIME:
+ ret = ksm_cow_time(merge_type, MAP_PRIVATE | MAP_ANONYMOUS, prot,
+ ksm_scan_limit_sec, page_size);
+ break;
+ }
+
+ if (ksm_restore(&ksm_sysfs_old)) {
+ printf("Cannot restore default tunables\n");
+ return KSFT_FAIL;
+ }
+
+ return ret;
+}
diff --git a/tools/testing/selftests/mm/madv_populate.c b/tools/testing/selftests/mm/madv_populate.c
new file mode 100644
index 000000000000..17bcb07f19f3
--- /dev/null
+++ b/tools/testing/selftests/mm/madv_populate.c
@@ -0,0 +1,311 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+ *
+ * Copyright 2021, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#define _GNU_SOURCE
+#include <stdlib.h>
+#include <string.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+/*
+ * For now, we're using 2 MiB of private anonymous memory for all tests.
+ */
+#define SIZE (2 * 1024 * 1024)
+
+static size_t pagesize;
+
+static void sense_support(void)
+{
+ char *addr;
+ int ret;
+
+ addr = mmap(0, pagesize, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (!addr)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_READ);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_READ is not available\n");
+
+ ret = madvise(addr, pagesize, MADV_POPULATE_WRITE);
+ if (ret)
+ ksft_exit_skip("MADV_POPULATE_WRITE is not available\n");
+
+ munmap(addr, pagesize);
+}
+
+static void test_prot_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ with PROT_READ\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_WRITE with PROT_READ\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_prot_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == EINVAL,
+ "MADV_POPULATE_READ with PROT_WRITE\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE with PROT_WRITE\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_holes(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ret = munmap(addr + pagesize, pagesize);
+ if (ret)
+ ksft_exit_fail_msg("munmap failed\n");
+
+ /* Hole in the middle */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes in the middle\n");
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes in the middle\n");
+
+ /* Hole at end */
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the end\n");
+ ret = madvise(addr, 2 * pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the end\n");
+
+ /* Hole at beginning */
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_READ);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_READ with holes at the beginning\n");
+ ret = madvise(addr + pagesize, pagesize, MADV_POPULATE_WRITE);
+ ksft_test_result(ret == -1 && errno == ENOMEM,
+ "MADV_POPULATE_WRITE with holes at the beginning\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_populated(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_populated(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_populate_read(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static void test_populate_write(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+ ksft_test_result(range_is_not_populated(addr, SIZE),
+ "range initially not populated\n");
+
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_populated(addr, SIZE),
+ "range is populated\n");
+
+ munmap(addr, SIZE);
+}
+
+static bool range_is_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (!pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static bool range_is_not_softdirty(char *start, ssize_t size)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+ bool ret = true;
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening pagemap failed\n");
+ for (; size > 0 && ret; size -= pagesize, start += pagesize)
+ if (pagemap_is_softdirty(fd, start))
+ ret = false;
+ close(fd);
+ return ret;
+}
+
+static void test_softdirty(void)
+{
+ char *addr;
+ int ret;
+
+ ksft_print_msg("[RUN] %s\n", __func__);
+
+ addr = mmap(0, SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, 0, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ /* Clear any softdirty bits. */
+ clear_softdirty();
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating READ should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_READ);
+ ksft_test_result(!ret, "MADV_POPULATE_READ\n");
+ ksft_test_result(range_is_not_softdirty(addr, SIZE),
+ "range is not softdirty\n");
+
+ /* Populating WRITE should set softdirty. */
+ ret = madvise(addr, SIZE, MADV_POPULATE_WRITE);
+ ksft_test_result(!ret, "MADV_POPULATE_WRITE\n");
+ ksft_test_result(range_is_softdirty(addr, SIZE),
+ "range is softdirty\n");
+
+ munmap(addr, SIZE);
+}
+
+static int system_has_softdirty(void)
+{
+ /*
+ * There is no way to check if the kernel supports soft-dirty, other
+ * than by writing to a page and seeing if the bit was set. But the
+ * tests are intended to check that the bit gets set when it should, so
+ * doing that check would turn a potentially legitimate fail into a
+ * skip. Fortunately, we know for sure that arm64 does not support
+ * soft-dirty. So for now, let's just use the arch as a corse guide.
+ */
+#if defined(__aarch64__)
+ return 0;
+#else
+ return 1;
+#endif
+}
+
+int main(int argc, char **argv)
+{
+ int nr_tests = 16;
+ int err;
+
+ pagesize = getpagesize();
+
+ if (system_has_softdirty())
+ nr_tests += 5;
+
+ ksft_print_header();
+ ksft_set_plan(nr_tests);
+
+ sense_support();
+ test_prot_read();
+ test_prot_write();
+ test_holes();
+ test_populate_read();
+ test_populate_write();
+ if (system_has_softdirty())
+ test_softdirty();
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/map_fixed_noreplace.c b/tools/testing/selftests/mm/map_fixed_noreplace.c
index d91bde511268..b74813fdc951 100644
--- a/tools/testing/selftests/vm/map_fixed_noreplace.c
+++ b/tools/testing/selftests/mm/map_fixed_noreplace.c
@@ -12,13 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
-
-#ifndef MAP_FIXED_NOREPLACE
-#define MAP_FIXED_NOREPLACE 0x100000
-#endif
-
-#define BASE_ADDRESS (256ul * 1024 * 1024)
-
+#include "../kselftest.h"
static void dump_maps(void)
{
@@ -28,48 +22,61 @@ static void dump_maps(void)
system(cmd);
}
+static unsigned long find_base_addr(unsigned long size)
+{
+ void *addr;
+ unsigned long flags;
+
+ flags = MAP_PRIVATE | MAP_ANONYMOUS;
+ addr = mmap(NULL, size, PROT_NONE, flags, -1, 0);
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
+
+ if (munmap(addr, size) != 0)
+ ksft_exit_fail_msg("Error: munmap failed\n");
+
+ return (unsigned long)addr;
+}
+
int main(void)
{
+ unsigned long base_addr;
unsigned long flags, addr, size, page_size;
char *p;
+ ksft_print_header();
+ ksft_set_plan(9);
+
page_size = sysconf(_SC_PAGE_SIZE);
+ /* let's find a base addr that is free before we start the tests */
+ size = 5 * page_size;
+ base_addr = find_base_addr(size);
+
flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE;
- // Check we can map all the areas we need below
- errno = 0;
- addr = BASE_ADDRESS;
+ /* Check we can map all the areas we need below */
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
-
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p == MAP_FAILED) {
dump_maps();
- printf("Error: couldn't map the space we need for the test\n");
- return 1;
+ ksft_exit_fail_msg("Error: couldn't map the space we need for the test\n");
}
-
- errno = 0;
if (munmap((void *)addr, 5 * page_size) != 0) {
dump_maps();
- printf("Error: munmap failed!?\n");
- return 1;
+ ksft_exit_fail_msg("Error: munmap failed!?\n");
}
- printf("unmap() successful\n");
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
- errno = 0;
- addr = BASE_ADDRESS + page_size;
+ addr = base_addr + page_size;
size = 3 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p == MAP_FAILED) {
dump_maps();
- printf("Error: first mmap() failed unexpectedly\n");
- return 1;
+ ksft_exit_fail_msg("Error: first mmap() failed unexpectedly\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Exact same mapping again:
@@ -79,17 +86,14 @@ int main(void)
* +3 | mapped | new
* +4 | free | new
*/
- errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p != MAP_FAILED) {
dump_maps();
- printf("Error:1: mmap() succeeded when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:1: mmap() succeeded when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Second mapping contained within first:
@@ -100,17 +104,14 @@ int main(void)
* +3 | mapped |
* +4 | free |
*/
- errno = 0;
- addr = BASE_ADDRESS + (2 * page_size);
+ addr = base_addr + (2 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p != MAP_FAILED) {
dump_maps();
- printf("Error:2: mmap() succeeded when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:2: mmap() succeeded when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Overlap end of existing mapping:
@@ -120,17 +121,14 @@ int main(void)
* +3 | mapped | new
* +4 | free | new
*/
- errno = 0;
- addr = BASE_ADDRESS + (3 * page_size);
+ addr = base_addr + (3 * page_size);
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p != MAP_FAILED) {
dump_maps();
- printf("Error:3: mmap() succeeded when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:3: mmap() succeeded when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Overlap start of existing mapping:
@@ -140,17 +138,14 @@ int main(void)
* +3 | mapped |
* +4 | free |
*/
- errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 2 * page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p != MAP_FAILED) {
dump_maps();
- printf("Error:4: mmap() succeeded when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:4: mmap() succeeded when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Adjacent to start of existing mapping:
@@ -160,17 +155,14 @@ int main(void)
* +3 | mapped |
* +4 | free |
*/
- errno = 0;
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p == MAP_FAILED) {
dump_maps();
- printf("Error:5: mmap() failed when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:5: mmap() failed when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
/*
* Adjacent to end of existing mapping:
@@ -180,27 +172,22 @@ int main(void)
* +3 | mapped |
* +4 | free | new
*/
- errno = 0;
- addr = BASE_ADDRESS + (4 * page_size);
+ addr = base_addr + (4 * page_size);
size = page_size;
p = mmap((void *)addr, size, PROT_NONE, flags, -1, 0);
- printf("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
-
if (p == MAP_FAILED) {
dump_maps();
- printf("Error:6: mmap() failed when it shouldn't have\n");
- return 1;
+ ksft_exit_fail_msg("Error:6: mmap() failed when it shouldn't have\n");
}
+ ksft_test_result_pass("mmap() @ 0x%lx-0x%lx p=%p result=%m\n", addr, addr + size, p);
- addr = BASE_ADDRESS;
+ addr = base_addr;
size = 5 * page_size;
if (munmap((void *)addr, size) != 0) {
dump_maps();
- printf("Error: munmap failed!?\n");
- return 1;
+ ksft_exit_fail_msg("Error: munmap failed!?\n");
}
- printf("unmap() successful\n");
+ ksft_test_result_pass("Base Address unmap() successful\n");
- printf("OK\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/mm/map_hugetlb.c
index 6af951900aa3..a1f005a90a4f 100644
--- a/tools/testing/selftests/vm/map_hugetlb.c
+++ b/tools/testing/selftests/mm/map_hugetlb.c
@@ -15,22 +15,12 @@
#include <unistd.h>
#include <sys/mman.h>
#include <fcntl.h>
+#include "vm_util.h"
+#include "../kselftest.h"
#define LENGTH (256UL*1024*1024)
#define PROTECTION (PROT_READ | PROT_WRITE)
-#ifndef MAP_HUGETLB
-#define MAP_HUGETLB 0x40000 /* arch specific */
-#endif
-
-#ifndef MAP_HUGE_SHIFT
-#define MAP_HUGE_SHIFT 26
-#endif
-
-#ifndef MAP_HUGE_MASK
-#define MAP_HUGE_MASK 0x3f
-#endif
-
/* Only ia64 requires this */
#ifdef __ia64__
#define ADDR (void *)(0x8000000000000000UL)
@@ -42,7 +32,7 @@
static void check_bytes(char *addr)
{
- printf("First hex is %x\n", *((unsigned int *)addr));
+ ksft_print_msg("First hex is %x\n", *((unsigned int *)addr));
}
static void write_bytes(char *addr, size_t length)
@@ -53,27 +43,34 @@ static void write_bytes(char *addr, size_t length)
*(addr + i) = (char)i;
}
-static int read_bytes(char *addr, size_t length)
+static void read_bytes(char *addr, size_t length)
{
unsigned long i;
check_bytes(addr);
for (i = 0; i < length; i++)
- if (*(addr + i) != (char)i) {
- printf("Mismatch at %lu\n", i);
- return 1;
- }
- return 0;
+ if (*(addr + i) != (char)i)
+ ksft_exit_fail_msg("Mismatch at %lu\n", i);
+
+ ksft_test_result_pass("Read correct data\n");
}
int main(int argc, char **argv)
{
void *addr;
- int ret;
+ size_t hugepage_size;
size_t length = LENGTH;
int flags = FLAGS;
int shift = 0;
+ hugepage_size = default_huge_page_size();
+ /* munmap with fail if the length is not page aligned */
+ if (hugepage_size > length)
+ length = hugepage_size;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
if (argc > 1)
length = atol(argv[1]) << 20;
if (argc > 2) {
@@ -83,27 +80,23 @@ int main(int argc, char **argv)
}
if (shift)
- printf("%u kB hugepages\n", 1 << shift);
+ ksft_print_msg("%u kB hugepages\n", 1 << (shift - 10));
else
- printf("Default size hugepages\n");
- printf("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
+ ksft_print_msg("Default size hugepages\n");
+ ksft_print_msg("Mapping %lu Mbytes\n", (unsigned long)length >> 20);
addr = mmap(ADDR, length, PROTECTION, flags, -1, 0);
- if (addr == MAP_FAILED) {
- perror("mmap");
- exit(1);
- }
+ if (addr == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
- printf("Returned address is %p\n", addr);
+ ksft_print_msg("Returned address is %p\n", addr);
check_bytes(addr);
write_bytes(addr, length);
- ret = read_bytes(addr, length);
+ read_bytes(addr, length);
/* munmap() length of MAP_HUGETLB memory must be hugepage aligned */
- if (munmap(addr, length)) {
- perror("munmap");
- exit(1);
- }
+ if (munmap(addr, length))
+ ksft_exit_fail_msg("munmap: %s\n", strerror(errno));
- return ret;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/map_populate.c b/tools/testing/selftests/mm/map_populate.c
index 6b8aeaa0bf7a..5c8a53869b1b 100644
--- a/tools/testing/selftests/vm/map_populate.c
+++ b/tools/testing/selftests/mm/map_populate.c
@@ -16,21 +16,21 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include "../kselftest.h"
-#ifndef MMAP_SZ
#define MMAP_SZ 4096
-#endif
-
-#define BUG_ON(condition, description) \
- do { \
- if (condition) { \
- fprintf(stderr, "[FAIL]\t%s:%d\t%s:%s\n", __func__, \
- __LINE__, (description), strerror(errno)); \
- exit(1); \
- } \
+
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) \
+ ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n", \
+ __func__, __LINE__, (description), \
+ strerror(errno)); \
} while (0)
-static int parent_f(int sock, unsigned long *smap, int child)
+#define TESTS_IN_CHILD 2
+
+static void parent_f(int sock, unsigned long *smap, int child)
{
int status, ret;
@@ -45,9 +45,10 @@ static int parent_f(int sock, unsigned long *smap, int child)
BUG_ON(ret <= 0, "write(sock)");
waitpid(child, &status, 0);
- BUG_ON(!WIFEXITED(status), "child in unexpected state");
- return WEXITSTATUS(status);
+ /* The ksft macros don't keep counters between processes */
+ ksft_cnt.ksft_pass = WEXITSTATUS(status);
+ ksft_cnt.ksft_fail = TESTS_IN_CHILD - WEXITSTATUS(status);
}
static int child_f(int sock, unsigned long *smap, int fd)
@@ -66,10 +67,11 @@ static int child_f(int sock, unsigned long *smap, int fd)
ret = read(sock, &buf, sizeof(int));
BUG_ON(ret <= 0, "read(sock)");
- BUG_ON(*smap == 0x22222BAD, "MAP_POPULATE didn't COW private page");
- BUG_ON(*smap != 0xdeadbabe, "mapping was corrupted");
+ ksft_test_result(*smap != 0x22222BAD, "MAP_POPULATE COW private page\n");
+ ksft_test_result(*smap == 0xdeadbabe, "The mapping state\n");
- return 0;
+ /* The ksft macros don't keep counters between processes */
+ return ksft_cnt.ksft_pass;
}
int main(int argc, char **argv)
@@ -78,8 +80,11 @@ int main(int argc, char **argv)
FILE *ftmp;
unsigned long *smap;
+ ksft_print_header();
+ ksft_set_plan(TESTS_IN_CHILD);
+
ftmp = tmpfile();
- BUG_ON(ftmp == 0, "tmpfile()");
+ BUG_ON(!ftmp, "tmpfile()");
ret = ftruncate(fileno(ftmp), MMAP_SZ);
BUG_ON(ret, "ftruncate()");
@@ -103,7 +108,9 @@ int main(int argc, char **argv)
ret = close(sock[0]);
BUG_ON(ret, "close()");
- return parent_f(sock[1], smap, child);
+ parent_f(sock[1], smap, child);
+
+ ksft_finished();
}
ret = close(sock[1]);
diff --git a/tools/testing/selftests/mm/mdwe_test.c b/tools/testing/selftests/mm/mdwe_test.c
new file mode 100644
index 000000000000..200bedcdc32e
--- /dev/null
+++ b/tools/testing/selftests/mm/mdwe_test.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#ifdef __aarch64__
+#include <asm/hwcap.h>
+#endif
+
+#include <linux/mman.h>
+#include <linux/prctl.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef __aarch64__
+# define PROT_BTI 0
+#endif
+
+TEST(prctl_flags)
+{
+ EXPECT_LT(prctl(PR_SET_MDWE, PR_MDWE_NO_INHERIT, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ EXPECT_LT(prctl(PR_SET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_SET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+
+ EXPECT_LT(prctl(PR_GET_MDWE, 7L, 0L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 7L, 0L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 7L, 0L), 0);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_LT(prctl(PR_GET_MDWE, 0L, 0L, 0L, 7L), 0);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(consecutive_prctl_flags) {};
+FIXTURE_SETUP(consecutive_prctl_flags) {}
+FIXTURE_TEARDOWN(consecutive_prctl_flags) {}
+
+FIXTURE_VARIANT(consecutive_prctl_flags)
+{
+ unsigned long first_flags;
+ unsigned long second_flags;
+ bool should_work;
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_no_flags)
+{
+ .first_flags = 0,
+ .second_flags = 0,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_exec_gain)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, can_keep_both_flags)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = true,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_mdwe_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = 0,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_disable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .should_work = false,
+};
+
+FIXTURE_VARIANT_ADD(consecutive_prctl_flags, cant_enable_no_inherit)
+{
+ .first_flags = PR_MDWE_REFUSE_EXEC_GAIN,
+ .second_flags = PR_MDWE_REFUSE_EXEC_GAIN | PR_MDWE_NO_INHERIT,
+ .should_work = false,
+};
+
+TEST_F(consecutive_prctl_flags, two_prctls)
+{
+ int ret;
+
+ EXPECT_EQ(prctl(PR_SET_MDWE, variant->first_flags, 0L, 0L, 0L), 0);
+
+ ret = prctl(PR_SET_MDWE, variant->second_flags, 0L, 0L, 0L);
+ if (variant->should_work) {
+ EXPECT_EQ(ret, 0);
+
+ ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+ ASSERT_EQ(ret, variant->second_flags);
+ } else {
+ EXPECT_NE(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+ }
+}
+
+FIXTURE(mdwe)
+{
+ void *p;
+ int flags;
+ size_t size;
+ pid_t pid;
+};
+
+FIXTURE_VARIANT(mdwe)
+{
+ bool enabled;
+ bool forked;
+ bool inherit;
+};
+
+FIXTURE_VARIANT_ADD(mdwe, stock)
+{
+ .enabled = false,
+ .forked = false,
+ .inherit = false,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, enabled)
+{
+ .enabled = true,
+ .forked = false,
+ .inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, inherited)
+{
+ .enabled = true,
+ .forked = true,
+ .inherit = true,
+};
+
+FIXTURE_VARIANT_ADD(mdwe, not_inherited)
+{
+ .enabled = true,
+ .forked = true,
+ .inherit = false,
+};
+
+static bool executable_map_should_fail(const FIXTURE_VARIANT(mdwe) *variant)
+{
+ return variant->enabled && (!variant->forked || variant->inherit);
+}
+
+FIXTURE_SETUP(mdwe)
+{
+ unsigned long mdwe_flags;
+ int ret, status;
+
+ self->p = NULL;
+ self->flags = MAP_SHARED | MAP_ANONYMOUS;
+ self->size = getpagesize();
+
+ if (!variant->enabled)
+ return;
+
+ mdwe_flags = PR_MDWE_REFUSE_EXEC_GAIN;
+ if (!variant->inherit)
+ mdwe_flags |= PR_MDWE_NO_INHERIT;
+
+ ret = prctl(PR_SET_MDWE, mdwe_flags, 0L, 0L, 0L);
+ ASSERT_EQ(ret, 0) {
+ TH_LOG("PR_SET_MDWE failed or unsupported");
+ }
+
+ ret = prctl(PR_GET_MDWE, 0L, 0L, 0L, 0L);
+ ASSERT_EQ(ret, mdwe_flags);
+
+ if (variant->forked) {
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0) {
+ TH_LOG("fork failed\n");
+ }
+
+ if (self->pid > 0) {
+ ret = waitpid(self->pid, &status, 0);
+ ASSERT_TRUE(WIFEXITED(status));
+ exit(WEXITSTATUS(status));
+ }
+ }
+}
+
+FIXTURE_TEARDOWN(mdwe)
+{
+ if (self->p && self->p != MAP_FAILED)
+ munmap(self->p, self->size);
+}
+
+TEST_F(mdwe, mmap_READ_EXEC)
+{
+ self->p = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0);
+ EXPECT_NE(self->p, MAP_FAILED);
+}
+
+TEST_F(mdwe, mmap_WRITE_EXEC)
+{
+ self->p = mmap(NULL, self->size, PROT_WRITE | PROT_EXEC, self->flags, 0, 0);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_EQ(self->p, MAP_FAILED);
+ } else {
+ EXPECT_NE(self->p, MAP_FAILED);
+ }
+}
+
+TEST_F(mdwe, mprotect_stay_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_READ | PROT_EXEC, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(mdwe, mprotect_add_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_READ | PROT_EXEC);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_LT(ret, 0);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+}
+
+TEST_F(mdwe, mprotect_WRITE_EXEC)
+{
+ int ret;
+
+ self->p = mmap(NULL, self->size, PROT_WRITE, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_WRITE | PROT_EXEC);
+ if (executable_map_should_fail(variant)) {
+ EXPECT_LT(ret, 0);
+ } else {
+ EXPECT_EQ(ret, 0);
+ }
+}
+
+TEST_F(mdwe, mmap_FIXED)
+{
+ void *p;
+
+ self->p = mmap(NULL, self->size, PROT_READ, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ /* MAP_FIXED unmaps the existing page before mapping which is allowed */
+ p = mmap(self->p, self->size, PROT_READ | PROT_EXEC,
+ self->flags | MAP_FIXED, 0, 0);
+ EXPECT_EQ(p, self->p);
+}
+
+TEST_F(mdwe, arm64_BTI)
+{
+ int ret;
+
+#ifdef __aarch64__
+ if (!(getauxval(AT_HWCAP2) & HWCAP2_BTI))
+#endif
+ SKIP(return, "HWCAP2_BTI not supported");
+
+ self->p = mmap(NULL, self->size, PROT_EXEC, self->flags, 0, 0);
+ ASSERT_NE(self->p, MAP_FAILED);
+
+ ret = mprotect(self->p, self->size, PROT_EXEC | PROT_BTI);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/memfd_secret.c b/tools/testing/selftests/mm/memfd_secret.c
new file mode 100644
index 000000000000..9b298f6a04b3
--- /dev/null
+++ b/tools/testing/selftests/mm/memfd_secret.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright IBM Corporation, 2021
+ *
+ * Author: Mike Rapoport <rppt@linux.ibm.com>
+ */
+
+#define _GNU_SOURCE
+#include <sys/uio.h>
+#include <sys/mman.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <errno.h>
+#include <stdio.h>
+
+#include "../kselftest.h"
+
+#define fail(fmt, ...) ksft_test_result_fail(fmt, ##__VA_ARGS__)
+#define pass(fmt, ...) ksft_test_result_pass(fmt, ##__VA_ARGS__)
+#define skip(fmt, ...) ksft_test_result_skip(fmt, ##__VA_ARGS__)
+
+#ifdef __NR_memfd_secret
+
+#define PATTERN 0x55
+
+static const int prot = PROT_READ | PROT_WRITE;
+static const int mode = MAP_SHARED;
+
+static unsigned long page_size;
+static unsigned long mlock_limit_cur;
+static unsigned long mlock_limit_max;
+
+static int memfd_secret(unsigned int flags)
+{
+ return syscall(__NR_memfd_secret, flags);
+}
+
+static void test_file_apis(int fd)
+{
+ char buf[64];
+
+ if ((read(fd, buf, sizeof(buf)) >= 0) ||
+ (write(fd, buf, sizeof(buf)) >= 0) ||
+ (pread(fd, buf, sizeof(buf), 0) >= 0) ||
+ (pwrite(fd, buf, sizeof(buf), 0) >= 0))
+ fail("unexpected file IO\n");
+ else
+ pass("file IO is blocked as expected\n");
+}
+
+static void test_mlock_limit(int fd)
+{
+ size_t len;
+ char *mem;
+
+ len = mlock_limit_cur;
+ if (len % page_size != 0)
+ len = (len/page_size) * page_size;
+
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("unable to mmap secret memory\n");
+ return;
+ }
+ munmap(mem, len);
+
+ len = mlock_limit_max * 2;
+ mem = mmap(NULL, len, prot, mode, fd, 0);
+ if (mem != MAP_FAILED) {
+ fail("unexpected mlock limit violation\n");
+ munmap(mem, len);
+ return;
+ }
+
+ pass("mlock limit is respected\n");
+}
+
+static void try_process_vm_read(int fd, int pipefd[2])
+{
+ struct iovec liov, riov;
+ char buf[64];
+ char *mem;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
+ }
+
+ liov.iov_len = riov.iov_len = sizeof(buf);
+ liov.iov_base = buf;
+ riov.iov_base = mem;
+
+ if (process_vm_readv(getppid(), &liov, 1, &riov, 1, 0) < 0) {
+ if (errno == ENOSYS)
+ exit(KSFT_SKIP);
+ exit(KSFT_PASS);
+ }
+
+ exit(KSFT_FAIL);
+}
+
+static void try_ptrace(int fd, int pipefd[2])
+{
+ pid_t ppid = getppid();
+ int status;
+ char *mem;
+ long ret;
+
+ if (read(pipefd[0], &mem, sizeof(mem)) < 0) {
+ perror("pipe write");
+ exit(KSFT_FAIL);
+ }
+
+ ret = ptrace(PTRACE_ATTACH, ppid, 0, 0);
+ if (ret) {
+ perror("ptrace_attach");
+ exit(KSFT_FAIL);
+ }
+
+ ret = waitpid(ppid, &status, WUNTRACED);
+ if ((ret != ppid) || !(WIFSTOPPED(status))) {
+ fprintf(stderr, "weird waitppid result %ld stat %x\n",
+ ret, status);
+ exit(KSFT_FAIL);
+ }
+
+ if (ptrace(PTRACE_PEEKDATA, ppid, mem, 0))
+ exit(KSFT_PASS);
+
+ exit(KSFT_FAIL);
+}
+
+static void check_child_status(pid_t pid, const char *name)
+{
+ int status;
+
+ waitpid(pid, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == KSFT_SKIP) {
+ skip("%s is not supported\n", name);
+ return;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == KSFT_PASS) ||
+ WIFSIGNALED(status)) {
+ pass("%s is blocked as expected\n", name);
+ return;
+ }
+
+ fail("%s: unexpected memory access\n", name);
+}
+
+static void test_remote_access(int fd, const char *name,
+ void (*func)(int fd, int pipefd[2]))
+{
+ int pipefd[2];
+ pid_t pid;
+ char *mem;
+
+ if (pipe(pipefd)) {
+ fail("pipe failed: %s\n", strerror(errno));
+ return;
+ }
+
+ pid = fork();
+ if (pid < 0) {
+ fail("fork failed: %s\n", strerror(errno));
+ return;
+ }
+
+ if (pid == 0) {
+ func(fd, pipefd);
+ return;
+ }
+
+ mem = mmap(NULL, page_size, prot, mode, fd, 0);
+ if (mem == MAP_FAILED) {
+ fail("Unable to mmap secret memory\n");
+ return;
+ }
+
+ ftruncate(fd, page_size);
+ memset(mem, PATTERN, page_size);
+
+ if (write(pipefd[1], &mem, sizeof(mem)) < 0) {
+ fail("pipe write: %s\n", strerror(errno));
+ return;
+ }
+
+ check_child_status(pid, name);
+}
+
+static void test_process_vm_read(int fd)
+{
+ test_remote_access(fd, "process_vm_read", try_process_vm_read);
+}
+
+static void test_ptrace(int fd)
+{
+ test_remote_access(fd, "ptrace", try_ptrace);
+}
+
+static int set_cap_limits(rlim_t max)
+{
+ struct rlimit new;
+ cap_t cap = cap_init();
+
+ new.rlim_cur = max;
+ new.rlim_max = max;
+ if (setrlimit(RLIMIT_MEMLOCK, &new)) {
+ perror("setrlimit() returns error");
+ return -1;
+ }
+
+ /* drop capabilities including CAP_IPC_LOCK */
+ if (cap_set_proc(cap)) {
+ perror("cap_set_proc() returns error");
+ return -2;
+ }
+
+ return 0;
+}
+
+static void prepare(void)
+{
+ struct rlimit rlim;
+
+ page_size = sysconf(_SC_PAGE_SIZE);
+ if (!page_size)
+ ksft_exit_fail_msg("Failed to get page size %s\n",
+ strerror(errno));
+
+ if (getrlimit(RLIMIT_MEMLOCK, &rlim))
+ ksft_exit_fail_msg("Unable to detect mlock limit: %s\n",
+ strerror(errno));
+
+ mlock_limit_cur = rlim.rlim_cur;
+ mlock_limit_max = rlim.rlim_max;
+
+ printf("page_size: %ld, mlock.soft: %ld, mlock.hard: %ld\n",
+ page_size, mlock_limit_cur, mlock_limit_max);
+
+ if (page_size > mlock_limit_cur)
+ mlock_limit_cur = page_size;
+ if (page_size > mlock_limit_max)
+ mlock_limit_max = page_size;
+
+ if (set_cap_limits(mlock_limit_max))
+ ksft_exit_fail_msg("Unable to set mlock limit: %s\n",
+ strerror(errno));
+}
+
+#define NUM_TESTS 4
+
+int main(int argc, char *argv[])
+{
+ int fd;
+
+ prepare();
+
+ ksft_print_header();
+ ksft_set_plan(NUM_TESTS);
+
+ fd = memfd_secret(0);
+ if (fd < 0) {
+ if (errno == ENOSYS)
+ ksft_exit_skip("memfd_secret is not supported\n");
+ else
+ ksft_exit_fail_msg("memfd_secret failed: %s\n",
+ strerror(errno));
+ }
+
+ test_mlock_limit(fd);
+ test_file_apis(fd);
+ test_process_vm_read(fd);
+ test_ptrace(fd);
+
+ close(fd);
+
+ ksft_finished();
+}
+
+#else /* __NR_memfd_secret */
+
+int main(int argc, char *argv[])
+{
+ printf("skip: skipping memfd_secret test (missing __NR_memfd_secret)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_memfd_secret */
diff --git a/tools/testing/selftests/mm/migration.c b/tools/testing/selftests/mm/migration.c
new file mode 100644
index 000000000000..6908569ef406
--- /dev/null
+++ b/tools/testing/selftests/mm/migration.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The main purpose of the tests here is to exercise the migration entry code
+ * paths in the kernel.
+ */
+
+#include "../kselftest_harness.h"
+#include <strings.h>
+#include <pthread.h>
+#include <numa.h>
+#include <numaif.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <sys/types.h>
+#include <signal.h>
+#include <time.h>
+
+#define TWOMEG (2<<20)
+#define RUNTIME (20)
+
+#define ALIGN(x, a) (((x) + (a - 1)) & (~((a) - 1)))
+
+FIXTURE(migration)
+{
+ pthread_t *threads;
+ pid_t *pids;
+ int nthreads;
+ int n1;
+ int n2;
+};
+
+FIXTURE_SETUP(migration)
+{
+ int n;
+
+ ASSERT_EQ(numa_available(), 0);
+ self->nthreads = numa_num_task_cpus() - 1;
+ self->n1 = -1;
+ self->n2 = -1;
+
+ for (n = 0; n < numa_max_possible_node(); n++)
+ if (numa_bitmask_isbitset(numa_all_nodes_ptr, n)) {
+ if (self->n1 == -1) {
+ self->n1 = n;
+ } else {
+ self->n2 = n;
+ break;
+ }
+ }
+
+ self->threads = malloc(self->nthreads * sizeof(*self->threads));
+ ASSERT_NE(self->threads, NULL);
+ self->pids = malloc(self->nthreads * sizeof(*self->pids));
+ ASSERT_NE(self->pids, NULL);
+};
+
+FIXTURE_TEARDOWN(migration)
+{
+ free(self->threads);
+ free(self->pids);
+}
+
+int migrate(uint64_t *ptr, int n1, int n2)
+{
+ int ret, tmp;
+ int status = 0;
+ struct timespec ts1, ts2;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &ts1))
+ return -1;
+
+ while (1) {
+ if (clock_gettime(CLOCK_MONOTONIC, &ts2))
+ return -1;
+
+ if (ts2.tv_sec - ts1.tv_sec >= RUNTIME)
+ return 0;
+
+ ret = move_pages(0, 1, (void **) &ptr, &n2, &status,
+ MPOL_MF_MOVE_ALL);
+ if (ret) {
+ if (ret > 0)
+ printf("Didn't migrate %d pages\n", ret);
+ else
+ perror("Couldn't migrate pages");
+ return -2;
+ }
+
+ tmp = n2;
+ n2 = n1;
+ n1 = tmp;
+ }
+
+ return 0;
+}
+
+void *access_mem(void *ptr)
+{
+ volatile uint64_t y = 0;
+ volatile uint64_t *x = ptr;
+
+ while (1) {
+ pthread_testcancel();
+ y += *x;
+
+ /* Prevent the compiler from optimizing out the writes to y: */
+ asm volatile("" : "+r" (y));
+ }
+
+ return NULL;
+}
+
+/*
+ * Basic migration entry testing. One thread will move pages back and forth
+ * between nodes whilst other threads try and access them triggering the
+ * migration entry wait paths in the kernel.
+ */
+TEST_F_TIMEOUT(migration, private_anon, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+/*
+ * Same as the previous test but with shared memory.
+ */
+TEST_F_TIMEOUT(migration, shared_anon, 2*RUNTIME)
+{
+ pid_t pid;
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++) {
+ pid = fork();
+ if (!pid) {
+ prctl(PR_SET_PDEATHSIG, SIGHUP);
+ /* Parent may have died before prctl so check now. */
+ if (getppid() == 1)
+ kill(getpid(), SIGHUP);
+ access_mem(ptr);
+ } else {
+ self->pids[i] = pid;
+ }
+ }
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(kill(self->pids[i], SIGTERM), 0);
+}
+
+/*
+ * Tests the pmd migration entry paths.
+ */
+TEST_F_TIMEOUT(migration, private_anon_thp, 2*RUNTIME)
+{
+ uint64_t *ptr;
+ int i;
+
+ if (self->nthreads < 2 || self->n1 < 0 || self->n2 < 0)
+ SKIP(return, "Not enough threads or NUMA nodes available");
+
+ ptr = mmap(NULL, 2*TWOMEG, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(ptr, MAP_FAILED);
+
+ ptr = (uint64_t *) ALIGN((uintptr_t) ptr, TWOMEG);
+ ASSERT_EQ(madvise(ptr, TWOMEG, MADV_HUGEPAGE), 0);
+ memset(ptr, 0xde, TWOMEG);
+ for (i = 0; i < self->nthreads - 1; i++)
+ if (pthread_create(&self->threads[i], NULL, access_mem, ptr))
+ perror("Couldn't create thread");
+
+ ASSERT_EQ(migrate(ptr, self->n1, self->n2), 0);
+ for (i = 0; i < self->nthreads - 1; i++)
+ ASSERT_EQ(pthread_cancel(self->threads[i]), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mm/mkdirty.c b/tools/testing/selftests/mm/mkdirty.c
new file mode 100644
index 000000000000..301abb99e027
--- /dev/null
+++ b/tools/testing/selftests/mm/mkdirty.c
@@ -0,0 +1,379 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Test handling of code that might set PTE/PMD dirty in read-only VMAs.
+ * Setting a PTE/PMD dirty must not accidentally set the PTE/PMD writable.
+ *
+ * Copyright 2023, Red Hat, Inc.
+ *
+ * Author(s): David Hildenbrand <david@redhat.com>
+ */
+#include <fcntl.h>
+#include <signal.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <sys/mman.h>
+#include <setjmp.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <linux/mempolicy.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+static size_t pagesize;
+static size_t thpsize;
+static int mem_fd;
+static int pagemap_fd;
+static sigjmp_buf env;
+
+static void signal_handler(int sig)
+{
+ if (sig == SIGSEGV)
+ siglongjmp(env, 1);
+ siglongjmp(env, 2);
+}
+
+static void do_test_write_sigsegv(char *mem)
+{
+ char orig = *mem;
+ int ret;
+
+ if (signal(SIGSEGV, signal_handler) == SIG_ERR) {
+ ksft_test_result_fail("signal() failed\n");
+ return;
+ }
+
+ ret = sigsetjmp(env, 1);
+ if (!ret)
+ *mem = orig + 1;
+
+ if (signal(SIGSEGV, SIG_DFL) == SIG_ERR)
+ ksft_test_result_fail("signal() failed\n");
+
+ ksft_test_result(ret == 1 && *mem == orig,
+ "SIGSEGV generated, page not modified\n");
+}
+
+static char *mmap_thp_range(int prot, char **_mmap_mem, size_t *_mmap_size)
+{
+ const size_t mmap_size = 2 * thpsize;
+ char *mem, *mmap_mem;
+
+ mmap_mem = mmap(NULL, mmap_size, prot, MAP_PRIVATE|MAP_ANON,
+ -1, 0);
+ if (mmap_mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return MAP_FAILED;
+ }
+ mem = (char *)(((uintptr_t)mmap_mem + thpsize) & ~(thpsize - 1));
+
+ if (madvise(mem, thpsize, MADV_HUGEPAGE)) {
+ ksft_test_result_skip("MADV_HUGEPAGE failed\n");
+ munmap(mmap_mem, mmap_size);
+ return MAP_FAILED;
+ }
+
+ *_mmap_mem = mmap_mem;
+ *_mmap_size = mmap_size;
+ return mem;
+}
+
+static void test_ptrace_write(void)
+{
+ char data = 1;
+ char *mem;
+ int ret;
+
+ ksft_print_msg("[INFO] PTRACE write access\n");
+
+ mem = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ /* Fault in the shared zeropage. */
+ if (*mem != 0) {
+ ksft_test_result_fail("Memory not zero\n");
+ goto munmap;
+ }
+
+ /*
+ * Unshare the page (populating a fresh anon page that might be set
+ * dirty in the PTE) in the read-only VMA using ptrace (FOLL_FORCE).
+ */
+ lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
+ ret = write(mem_fd, &data, 1);
+ if (ret != 1 || *mem != data) {
+ ksft_test_result_fail("write() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void test_ptrace_write_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+ char data = 1;
+ int ret;
+
+ ksft_print_msg("[INFO] PTRACE write access to THP\n");
+
+ mem = mmap_thp_range(PROT_READ, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first subpage in the read-only VMA using
+ * ptrace(FOLL_FORCE), eventually placing a fresh THP that is marked
+ * dirty in the PMD.
+ */
+ lseek(mem_fd, (uintptr_t) mem, SEEK_SET);
+ ret = write(mem_fd, &data, 1);
+ if (ret != 1 || *mem != data) {
+ ksft_test_result_fail("write() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+static void test_page_migration(void)
+{
+ char *mem;
+
+ ksft_print_msg("[INFO] Page migration\n");
+
+ mem = mmap(NULL, pagesize, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANON,
+ -1, 0);
+ if (mem == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ /* Populate a fresh page and dirty it. */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, pagesize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* Trigger page migration. Might not be available or fail. */
+ if (syscall(__NR_mbind, mem, pagesize, MPOL_LOCAL, NULL, 0x7fful,
+ MPOL_MF_MOVE)) {
+ ksft_test_result_skip("mbind() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mem, pagesize);
+}
+
+static void test_page_migration_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+
+ ksft_print_msg("[INFO] Page migration of THP\n");
+
+ mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first page, which might populate a fresh anon THP
+ * and dirty it.
+ */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, thpsize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ /* Trigger page migration. Might not be available or fail. */
+ if (syscall(__NR_mbind, mem, thpsize, MPOL_LOCAL, NULL, 0x7fful,
+ MPOL_MF_MOVE)) {
+ ksft_test_result_skip("mbind() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+static void test_pte_mapped_thp(void)
+{
+ char *mem, *mmap_mem;
+ size_t mmap_size;
+
+ ksft_print_msg("[INFO] PTE-mapping a THP\n");
+
+ mem = mmap_thp_range(PROT_READ|PROT_WRITE, &mmap_mem, &mmap_size);
+ if (mem == MAP_FAILED)
+ return;
+
+ /*
+ * Write to the first page, which might populate a fresh anon THP
+ * and dirty it.
+ */
+ memset(mem, 1, pagesize);
+ if (mprotect(mem, thpsize, PROT_READ)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ /* MM populated a THP if we got the last subpage populated as well. */
+ if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
+ ksft_test_result_skip("Did not get a THP populated\n");
+ goto munmap;
+ }
+
+ /* Trigger PTE-mapping the THP by mprotect'ing the last subpage. */
+ if (mprotect(mem + thpsize - pagesize, pagesize,
+ PROT_READ|PROT_WRITE)) {
+ ksft_test_result_fail("mprotect() failed\n");
+ goto munmap;
+ }
+
+ do_test_write_sigsegv(mem);
+munmap:
+ munmap(mmap_mem, mmap_size);
+}
+
+#ifdef __NR_userfaultfd
+static void test_uffdio_copy(void)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_copy uffdio_copy;
+ struct uffdio_api uffdio_api;
+ char *dst, *src;
+ int uffd;
+
+ ksft_print_msg("[INFO] UFFDIO_COPY\n");
+
+ src = malloc(pagesize);
+ memset(src, 1, pagesize);
+ dst = mmap(NULL, pagesize, PROT_READ, MAP_PRIVATE|MAP_ANON, -1, 0);
+ if (dst == MAP_FAILED) {
+ ksft_test_result_fail("mmap() failed\n");
+ return;
+ }
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ ksft_test_result_skip("__NR_userfaultfd failed\n");
+ goto munmap;
+ }
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) < 0) {
+ ksft_test_result_fail("UFFDIO_API failed\n");
+ goto close_uffd;
+ }
+
+ uffdio_register.range.start = (unsigned long) dst;
+ uffdio_register.range.len = pagesize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
+ ksft_test_result_fail("UFFDIO_REGISTER failed\n");
+ goto close_uffd;
+ }
+
+ /* Place a page in a read-only VMA, which might set the PTE dirty. */
+ uffdio_copy.dst = (unsigned long) dst;
+ uffdio_copy.src = (unsigned long) src;
+ uffdio_copy.len = pagesize;
+ uffdio_copy.mode = 0;
+ if (ioctl(uffd, UFFDIO_COPY, &uffdio_copy)) {
+ ksft_test_result_fail("UFFDIO_COPY failed\n");
+ goto close_uffd;
+ }
+
+ do_test_write_sigsegv(dst);
+close_uffd:
+ close(uffd);
+munmap:
+ munmap(dst, pagesize);
+ free(src);
+}
+#endif /* __NR_userfaultfd */
+
+int main(void)
+{
+ int err, tests = 2;
+
+ pagesize = getpagesize();
+ thpsize = read_pmd_pagesize();
+ if (thpsize) {
+ ksft_print_msg("[INFO] detected THP size: %zu KiB\n",
+ thpsize / 1024);
+ tests += 3;
+ }
+#ifdef __NR_userfaultfd
+ tests += 1;
+#endif /* __NR_userfaultfd */
+
+ ksft_print_header();
+ ksft_set_plan(tests);
+
+ mem_fd = open("/proc/self/mem", O_RDWR);
+ if (mem_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/mem failed\n");
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("opening /proc/self/pagemap failed\n");
+
+ /*
+ * On some ptrace(FOLL_FORCE) write access via /proc/self/mem in
+ * read-only VMAs, the kernel may set the PTE/PMD dirty.
+ */
+ test_ptrace_write();
+ if (thpsize)
+ test_ptrace_write_thp();
+ /*
+ * On page migration, the kernel may set the PTE/PMD dirty when
+ * remapping the page.
+ */
+ test_page_migration();
+ if (thpsize)
+ test_page_migration_thp();
+ /* PTE-mapping a THP might propagate the dirty PMD bit to the PTEs. */
+ if (thpsize)
+ test_pte_mapped_thp();
+ /* Placing a fresh page via userfaultfd may set the PTE dirty. */
+#ifdef __NR_userfaultfd
+ test_uffdio_copy();
+#endif /* __NR_userfaultfd */
+
+ err = ksft_get_fail_cnt();
+ if (err)
+ ksft_exit_fail_msg("%d out of %d tests failed\n",
+ err, ksft_test_num());
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/mlock-random-test.c b/tools/testing/selftests/mm/mlock-random-test.c
index ff4d72eb74b9..1cd80b0f76c3 100644
--- a/tools/testing/selftests/vm/mlock-random-test.c
+++ b/tools/testing/selftests/mm/mlock-random-test.c
@@ -7,11 +7,13 @@
#include <sys/resource.h>
#include <sys/capability.h>
#include <sys/mman.h>
+#include <linux/mman.h>
#include <fcntl.h>
#include <string.h>
#include <sys/ipc.h>
#include <sys/shm.h>
#include <time.h>
+#include "../kselftest.h"
#include "mlock2.h"
#define CHUNK_UNIT (128 * 1024)
@@ -30,14 +32,14 @@ int set_cap_limits(rlim_t max)
new.rlim_cur = max;
new.rlim_max = max;
if (setrlimit(RLIMIT_MEMLOCK, &new)) {
- perror("setrlimit() returns error\n");
+ ksft_perror("setrlimit() returns error\n");
return -1;
}
/* drop capabilities including CAP_IPC_LOCK */
if (cap_set_proc(cap)) {
- perror("cap_set_proc() returns error\n");
- return -2;
+ ksft_perror("cap_set_proc() returns error\n");
+ return -1;
}
return 0;
@@ -51,27 +53,24 @@ int get_proc_locked_vm_size(void)
unsigned long lock_size = 0;
f = fopen("/proc/self/status", "r");
- if (!f) {
- perror("fopen");
- return -1;
- }
+ if (!f)
+ ksft_exit_fail_msg("fopen: %s\n", strerror(errno));
while (fgets(line, 1024, f)) {
if (strstr(line, "VmLck")) {
ret = sscanf(line, "VmLck:\t%8lu kB", &lock_size);
if (ret <= 0) {
- printf("sscanf() on VmLck error: %s: %d\n",
- line, ret);
fclose(f);
- return -1;
+ ksft_exit_fail_msg("sscanf() on VmLck error: %s: %d\n",
+ line, ret);
}
fclose(f);
return (int)(lock_size << 10);
}
}
- perror("cann't parse VmLck in /proc/self/status\n");
fclose(f);
+ ksft_exit_fail_msg("cannot parse VmLck in /proc/self/status: %s\n", strerror(errno));
return -1;
}
@@ -90,10 +89,8 @@ int get_proc_page_size(unsigned long addr)
size_t size;
smaps = seek_to_smaps_entry(addr);
- if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
- return 0;
- }
+ if (!smaps)
+ ksft_exit_fail_msg("Unable to parse /proc/self/smaps\n");
while (getline(&line, &size, smaps) > 0) {
if (!strstr(line, "MMUPageSize")) {
@@ -104,12 +101,9 @@ int get_proc_page_size(unsigned long addr)
}
/* found the MMUPageSize of this section */
- if (sscanf(line, "MMUPageSize: %8lu kB",
- &mmupage_size) < 1) {
- printf("Unable to parse smaps entry for Size:%s\n",
- line);
- break;
- }
+ if (sscanf(line, "MMUPageSize: %8lu kB", &mmupage_size) < 1)
+ ksft_exit_fail_msg("Unable to parse smaps entry for Size:%s\n",
+ line);
}
free(line);
@@ -135,7 +129,7 @@ int get_proc_page_size(unsigned long addr)
* return value: 0 - success
* else: failure
*/
-int test_mlock_within_limit(char *p, int alloc_size)
+static void test_mlock_within_limit(char *p, int alloc_size)
{
int i;
int ret = 0;
@@ -144,11 +138,9 @@ int test_mlock_within_limit(char *p, int alloc_size)
int page_size = 0;
getrlimit(RLIMIT_MEMLOCK, &cur);
- if (cur.rlim_cur < alloc_size) {
- printf("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
- alloc_size, (unsigned int)cur.rlim_cur);
- return -1;
- }
+ if (cur.rlim_cur < alloc_size)
+ ksft_exit_fail_msg("alloc_size[%d] < %u rlimit,lead to mlock failure\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
srand(time(NULL));
for (i = 0; i < TEST_LOOP; i++) {
@@ -168,13 +160,11 @@ int test_mlock_within_limit(char *p, int alloc_size)
ret = mlock2_(p + start_offset, lock_size,
MLOCK_ONFAULT);
- if (ret) {
- printf("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
- is_mlock ? "mlock" : "mlock2",
- p, alloc_size,
- p + start_offset, lock_size);
- return ret;
- }
+ if (ret)
+ ksft_exit_fail_msg("%s() failure at |%p(%d)| mlock:|%p(%d)|\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size,
+ p + start_offset, lock_size);
}
/*
@@ -182,18 +172,12 @@ int test_mlock_within_limit(char *p, int alloc_size)
*/
locked_vm_size = get_proc_locked_vm_size();
page_size = get_proc_page_size((unsigned long)p);
- if (page_size == 0) {
- printf("cannot get proc MMUPageSize\n");
- return -1;
- }
- if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size) {
- printf("test_mlock_within_limit() left VmLck:%d on %d chunk\n",
- locked_vm_size, alloc_size);
- return -1;
- }
+ if (locked_vm_size > PAGE_ALIGN(alloc_size, page_size) + page_size)
+ ksft_exit_fail_msg("%s left VmLck:%d on %d chunk\n",
+ __func__, locked_vm_size, alloc_size);
- return 0;
+ ksft_test_result_pass("%s\n", __func__);
}
@@ -212,7 +196,7 @@ int test_mlock_within_limit(char *p, int alloc_size)
* return value: 0 - success
* else: failure
*/
-int test_mlock_outof_limit(char *p, int alloc_size)
+static void test_mlock_outof_limit(char *p, int alloc_size)
{
int i;
int ret = 0;
@@ -220,11 +204,9 @@ int test_mlock_outof_limit(char *p, int alloc_size)
struct rlimit cur;
getrlimit(RLIMIT_MEMLOCK, &cur);
- if (cur.rlim_cur >= alloc_size) {
- printf("alloc_size[%d] >%u rlimit, violates test condition\n",
- alloc_size, (unsigned int)cur.rlim_cur);
- return -1;
- }
+ if (cur.rlim_cur >= alloc_size)
+ ksft_exit_fail_msg("alloc_size[%d] >%u rlimit, violates test condition\n",
+ alloc_size, (unsigned int)cur.rlim_cur);
old_locked_vm_size = get_proc_locked_vm_size();
srand(time(NULL));
@@ -239,56 +221,47 @@ int test_mlock_outof_limit(char *p, int alloc_size)
else
ret = mlock2_(p + start_offset, lock_size,
MLOCK_ONFAULT);
- if (ret == 0) {
- printf("%s() succeeds? on %p(%d) mlock%p(%d)\n",
- is_mlock ? "mlock" : "mlock2",
- p, alloc_size,
- p + start_offset, lock_size);
- return -1;
- }
+ if (ret == 0)
+ ksft_exit_fail_msg("%s() succeeds? on %p(%d) mlock%p(%d)\n",
+ is_mlock ? "mlock" : "mlock2",
+ p, alloc_size, p + start_offset, lock_size);
}
locked_vm_size = get_proc_locked_vm_size();
- if (locked_vm_size != old_locked_vm_size) {
- printf("tests leads to new mlocked page: old[%d], new[%d]\n",
- old_locked_vm_size,
- locked_vm_size);
- return -1;
- }
+ if (locked_vm_size != old_locked_vm_size)
+ ksft_exit_fail_msg("tests leads to new mlocked page: old[%d], new[%d]\n",
+ old_locked_vm_size,
+ locked_vm_size);
- return 0;
+ ksft_test_result_pass("%s\n", __func__);
}
int main(int argc, char **argv)
{
char *p = NULL;
- int ret = 0;
+
+ ksft_print_header();
if (set_cap_limits(MLOCK_RLIMIT_SIZE))
- return -1;
+ ksft_finished();
+
+ ksft_set_plan(2);
p = malloc(MLOCK_WITHIN_LIMIT_SIZE);
- if (p == NULL) {
- perror("malloc() failure\n");
- return -1;
- }
- ret = test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
- if (ret)
- return ret;
+ if (p == NULL)
+ ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+ test_mlock_within_limit(p, MLOCK_WITHIN_LIMIT_SIZE);
munlock(p, MLOCK_WITHIN_LIMIT_SIZE);
free(p);
-
p = malloc(MLOCK_OUTOF_LIMIT_SIZE);
- if (p == NULL) {
- perror("malloc() failure\n");
- return -1;
- }
- ret = test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
- if (ret)
- return ret;
+ if (p == NULL)
+ ksft_exit_fail_msg("malloc() failure: %s\n", strerror(errno));
+
+ test_mlock_outof_limit(p, MLOCK_OUTOF_LIMIT_SIZE);
munlock(p, MLOCK_OUTOF_LIMIT_SIZE);
free(p);
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/mlock2-tests.c b/tools/testing/selftests/mm/mlock2-tests.c
index 11b2301f3aa3..26f744188ad0 100644
--- a/tools/testing/selftests/vm/mlock2-tests.c
+++ b/tools/testing/selftests/mm/mlock2-tests.c
@@ -7,9 +7,8 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <stdbool.h>
-#include "mlock2.h"
-
#include "../kselftest.h"
+#include "mlock2.h"
struct vm_boundaries {
unsigned long start;
@@ -40,17 +39,16 @@ static int get_vm_area(unsigned long addr, struct vm_boundaries *area)
while(fgets(line, 1024, file)) {
end_addr = strchr(line, '-');
if (!end_addr) {
- printf("cannot parse /proc/self/maps\n");
+ ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
*end_addr = '\0';
end_addr++;
stop = strchr(end_addr, ' ');
if (!stop) {
- printf("cannot parse /proc/self/maps\n");
+ ksft_print_msg("cannot parse /proc/self/maps\n");
goto out;
}
- stop = '\0';
sscanf(line, "%lx", &start);
sscanf(end_addr, "%lx", &end);
@@ -79,7 +77,7 @@ static bool is_vmflag_set(unsigned long addr, const char *vmflag)
smaps = seek_to_smaps_entry(addr);
if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
+ ksft_print_msg("Unable to parse /proc/self/smaps\n");
goto out;
}
@@ -116,7 +114,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
smaps = seek_to_smaps_entry(addr);
if (!smaps) {
- printf("Unable to parse /proc/self/smaps\n");
+ ksft_print_msg("Unable to parse /proc/self/smaps\n");
goto out;
}
@@ -130,7 +128,7 @@ static unsigned long get_value_for_name(unsigned long addr, const char *name)
value_ptr = line + strlen(name);
if (sscanf(value_ptr, "%lu kB", &value) < 1) {
- printf("Unable to parse smaps entry for Size\n");
+ ksft_print_msg("Unable to parse smaps entry for Size\n");
goto out;
}
break;
@@ -181,57 +179,45 @@ static int lock_check(unsigned long addr)
static int unlock_lock_check(char *map)
{
if (is_vmflag_set((unsigned long)map, LOCKED)) {
- printf("VMA flag %s is present on page 1 after unlock\n", LOCKED);
+ ksft_print_msg("VMA flag %s is present on page 1 after unlock\n", LOCKED);
return 1;
}
return 0;
}
-static int test_mlock_lock()
+static void test_mlock_lock(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlock2_(map, 2 * page_size, 0)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(0)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlock2(0): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
/* Now unlock and recheck attributes */
if (munlock(map, 2 * page_size)) {
- perror("munlock()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ret = unlock_lock_check(map);
-
-unmap:
+ ksft_test_result(!unlock_lock_check(map), "%s: Locked\n", __func__);
munmap(map, 2 * page_size);
-out:
- return ret;
}
static int onfault_check(char *map)
{
*map = 'a';
if (!is_vma_lock_on_fault((unsigned long)map)) {
- printf("VMA is not marked for lock on fault\n");
+ ksft_print_msg("VMA is not marked for lock on fault\n");
return 1;
}
@@ -244,172 +230,131 @@ static int unlock_onfault_check(char *map)
if (is_vma_lock_on_fault((unsigned long)map) ||
is_vma_lock_on_fault((unsigned long)map + page_size)) {
- printf("VMA is still lock on fault after unlock\n");
+ ksft_print_msg("VMA is still lock on fault after unlock\n");
return 1;
}
return 0;
}
-static int test_mlock_onfault()
+static void test_mlock_onfault(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(MLOCK_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlock2(MLOCK_ONFAULT): %s\n", strerror(errno));
}
- if (onfault_check(map))
- goto unmap;
+ ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
/* Now unlock and recheck attributes */
if (munlock(map, 2 * page_size)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("munlock()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlock(): %s\n", strerror(errno));
}
- ret = unlock_onfault_check(map);
-unmap:
+ ksft_test_result(!unlock_onfault_check(map), "VMA open lock after fault\n");
munmap(map, 2 * page_size);
-out:
- return ret;
}
-static int test_lock_onfault_of_present()
+static void test_lock_onfault_of_present(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("test_mlock_locked mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
*map = 'a';
if (mlock2_(map, 2 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock2(MLOCK_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_test_result_fail("mlock2(MLOCK_ONFAULT) error: %s", strerror(errno));
}
- if (!is_vma_lock_on_fault((unsigned long)map) ||
- !is_vma_lock_on_fault((unsigned long)map + page_size)) {
- printf("VMA with present pages is not marked lock on fault\n");
- goto unmap;
- }
- ret = 0;
-unmap:
+ ksft_test_result(is_vma_lock_on_fault((unsigned long)map) ||
+ is_vma_lock_on_fault((unsigned long)map + page_size),
+ "VMA with present pages is not marked lock on fault\n");
munmap(map, 2 * page_size);
-out:
- return ret;
}
-static int test_munlockall()
+static void test_munlockall0(void)
{
char *map;
- int ret = 1;
unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- if (map == MAP_FAILED) {
- perror("test_munlockall mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s\n", strerror(errno));
if (mlockall(MCL_CURRENT)) {
- perror("mlockall(MCL_CURRENT)");
- goto out;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked memory area\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
}
- if (unlock_lock_check(map))
- goto unmap;
-
+ ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
munmap(map, 2 * page_size);
+}
+
+static void test_munlockall1(void)
+{
+ char *map;
+ unsigned long page_size = getpagesize();
map = mmap(NULL, 2 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
-
- if (map == MAP_FAILED) {
- perror("test_munlockall second mmap");
- goto out;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (mlockall(MCL_CURRENT | MCL_ONFAULT)) {
- perror("mlockall(MCL_CURRENT | MCL_ONFAULT)");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_ONFAULT): %s\n", strerror(errno));
}
- if (onfault_check(map))
- goto unmap;
+ ksft_test_result(!onfault_check(map), "%s: VMA marked for lock on fault\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall(): %s\n", strerror(errno));
}
- if (unlock_onfault_check(map))
- goto unmap;
+ ksft_test_result(!unlock_onfault_check(map), "%s: Unlocked\n", __func__);
if (mlockall(MCL_CURRENT | MCL_FUTURE)) {
- perror("mlockall(MCL_CURRENT | MCL_FUTURE)");
- goto out;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("mlockall(MCL_CURRENT | MCL_FUTURE): %s\n", strerror(errno));
}
- if (!lock_check((unsigned long)map))
- goto unmap;
+ ksft_test_result(lock_check((unsigned long)map), "%s: Locked\n", __func__);
if (munlockall()) {
- perror("munlockall()");
- goto unmap;
+ munmap(map, 2 * page_size);
+ ksft_exit_fail_msg("munlockall() %s\n", strerror(errno));
}
- ret = unlock_lock_check(map);
-
-unmap:
+ ksft_test_result(!unlock_lock_check(map), "%s: No locked memory\n", __func__);
munmap(map, 2 * page_size);
-out:
- munlockall();
- return ret;
}
-static int test_vma_management(bool call_mlock)
+static void test_vma_management(bool call_mlock)
{
- int ret = 1;
void *map;
unsigned long page_size = getpagesize();
struct vm_boundaries page1;
@@ -418,25 +363,19 @@ static int test_vma_management(bool call_mlock)
map = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (map == MAP_FAILED) {
- perror("mmap()");
- return ret;
- }
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
if (call_mlock && mlock2_(map, 3 * page_size, MLOCK_ONFAULT)) {
- if (errno == ENOSYS) {
- printf("Cannot call new mlock family, skipping test\n");
- _exit(KSFT_SKIP);
- }
- perror("mlock(ONFAULT)\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("mlock error: %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/*
@@ -445,76 +384,86 @@ static int test_vma_management(bool call_mlock)
* not a failure)
*/
if (page1.start != page2.start || page2.start != page3.start) {
- printf("VMAs are not merged to start, aborting test\n");
- ret = 0;
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("VMAs are not merged to start, aborting test");
}
if (munlock(map + page_size, page_size)) {
- perror("munlock()");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("munlock(): %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/* All three VMAs should be different */
if (page1.start == page2.start || page2.start == page3.start) {
- printf("failed to split VMA for munlock\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("failed to split VMA for munlock");
}
/* Now unlock the first and third page and check the VMAs again */
if (munlock(map, page_size * 3)) {
- perror("munlock()");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("munlock(): %s", strerror(errno));
}
if (get_vm_area((unsigned long)map, &page1) ||
get_vm_area((unsigned long)map + page_size, &page2) ||
get_vm_area((unsigned long)map + page_size * 2, &page3)) {
- printf("couldn't find mapping in /proc/self/maps\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("couldn't find mapping in /proc/self/maps");
}
/* Now all three VMAs should be the same */
if (page1.start != page2.start || page2.start != page3.start) {
- printf("failed to merge VMAs after munlock\n");
- goto out;
+ munmap(map, 3 * page_size);
+ ksft_test_result_fail("failed to merge VMAs after munlock");
}
- ret = 0;
-out:
+ ksft_test_result_pass("%s call_mlock %d\n", __func__, call_mlock);
munmap(map, 3 * page_size);
- return ret;
}
-static int test_mlockall(int (test_function)(bool call_mlock))
+static void test_mlockall(void)
{
- int ret = 1;
+ if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE))
+ ksft_exit_fail_msg("mlockall failed: %s\n", strerror(errno));
- if (mlockall(MCL_CURRENT | MCL_ONFAULT | MCL_FUTURE)) {
- perror("mlockall");
- return ret;
- }
-
- ret = test_function(false);
+ test_vma_management(false);
munlockall();
- return ret;
}
int main(int argc, char **argv)
{
- int ret = 0;
- ret += test_mlock_lock();
- ret += test_mlock_onfault();
- ret += test_munlockall();
- ret += test_lock_onfault_of_present();
- ret += test_vma_management(true);
- ret += test_mlockall(test_vma_management);
- return ret;
+ int ret, size = 3 * getpagesize();
+ void *map;
+
+ ksft_print_header();
+
+ map = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap error: %s", strerror(errno));
+
+ ret = mlock2_(map, size, MLOCK_ONFAULT);
+ if (ret && errno == ENOSYS)
+ ksft_finished();
+
+ munmap(map, size);
+
+ ksft_set_plan(13);
+
+ test_mlock_lock();
+ test_mlock_onfault();
+ test_munlockall0();
+ test_munlockall1();
+ test_lock_onfault_of_present();
+ test_vma_management(true);
+ test_mlockall();
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/vm/mlock2.h b/tools/testing/selftests/mm/mlock2.h
index 2a6e76c226bc..4417eaa5cfb7 100644
--- a/tools/testing/selftests/vm/mlock2.h
+++ b/tools/testing/selftests/mm/mlock2.h
@@ -4,22 +4,9 @@
#include <stdio.h>
#include <stdlib.h>
-#ifndef MLOCK_ONFAULT
-#define MLOCK_ONFAULT 1
-#endif
-
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
static int mlock2_(void *start, size_t len, int flags)
{
-#ifdef __NR_mlock2
return syscall(__NR_mlock2, start, len, flags);
-#else
- errno = ENOSYS;
- return -1;
-#endif
}
static FILE *seek_to_smaps_entry(unsigned long addr)
@@ -35,10 +22,8 @@ static FILE *seek_to_smaps_entry(unsigned long addr)
char path[BUFSIZ];
file = fopen("/proc/self/smaps", "r");
- if (!file) {
- perror("fopen smaps");
- _exit(1);
- }
+ if (!file)
+ ksft_exit_fail_msg("fopen smaps: %s\n", strerror(errno));
while (getline(&line, &size, file) > 0) {
if (sscanf(line, "%lx-%lx %s %lx %s %lu %s\n",
diff --git a/tools/testing/selftests/mm/mrelease_test.c b/tools/testing/selftests/mm/mrelease_test.c
new file mode 100644
index 000000000000..100370a7111d
--- /dev/null
+++ b/tools/testing/selftests/mm/mrelease_test.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2022 Google LLC
+ */
+#define _GNU_SOURCE
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <asm-generic/unistd.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+
+#define MB(x) (x << 20)
+#define MAX_SIZE_MB 1024
+
+static int alloc_noexit(unsigned long nr_pages, int pipefd)
+{
+ int ppid = getppid();
+ int timeout = 10; /* 10sec timeout to get killed */
+ unsigned long i;
+ char *buf;
+
+ buf = (char *)mmap(NULL, nr_pages * psize(), PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANON, 0, 0);
+ if (buf == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed, halting the test: %s\n", strerror(errno));
+
+ for (i = 0; i < nr_pages; i++)
+ *((unsigned long *)(buf + (i * psize()))) = i;
+
+ /* Signal the parent that the child is ready */
+ if (write(pipefd, "", 1) < 0)
+ ksft_exit_fail_msg("write: %s\n", strerror(errno));
+
+ /* Wait to be killed (when reparenting happens) */
+ while (getppid() == ppid && timeout > 0) {
+ sleep(1);
+ timeout--;
+ }
+
+ munmap(buf, nr_pages * psize());
+
+ return (timeout > 0) ? KSFT_PASS : KSFT_FAIL;
+}
+
+/* The process_mrelease calls in this test are expected to fail */
+static void run_negative_tests(int pidfd)
+{
+ /* Test invalid flags. Expect to fail with EINVAL error code. */
+ if (!syscall(__NR_process_mrelease, pidfd, (unsigned int)-1) ||
+ errno != EINVAL) {
+ ksft_exit_fail_msg("process_mrelease with wrong flags: %s\n", strerror(errno));
+ }
+ /*
+ * Test reaping while process is alive with no pending SIGKILL.
+ * Expect to fail with EINVAL error code.
+ */
+ if (!syscall(__NR_process_mrelease, pidfd, 0) || errno != EINVAL)
+ ksft_exit_fail_msg("process_mrelease on a live process: %s\n", strerror(errno));
+}
+
+static int child_main(int pipefd[], size_t size)
+{
+ int res;
+
+ /* Allocate and fault-in memory and wait to be killed */
+ close(pipefd[0]);
+ res = alloc_noexit(MB(size) / psize(), pipefd[1]);
+ close(pipefd[1]);
+ return res;
+}
+
+int main(void)
+{
+ int pipefd[2], pidfd;
+ bool success, retry;
+ size_t size;
+ pid_t pid;
+ char byte;
+ int res;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ /* Test a wrong pidfd */
+ if (!syscall(__NR_process_mrelease, -1, 0) || errno != EBADF) {
+ if (errno == ENOSYS) {
+ ksft_test_result_skip("process_mrelease not implemented\n");
+ ksft_finished();
+ } else {
+ ksft_exit_fail_msg("process_mrelease with wrong pidfd: %s",
+ strerror(errno));
+ }
+ }
+
+ /* Start the test with 1MB child memory allocation */
+ size = 1;
+retry:
+ /*
+ * Pipe for the child to signal when it's done allocating
+ * memory
+ */
+ if (pipe(pipefd))
+ ksft_exit_fail_msg("pipe: %s\n", strerror(errno));
+
+ pid = fork();
+ if (pid < 0) {
+ close(pipefd[0]);
+ close(pipefd[1]);
+ ksft_exit_fail_msg("fork: %s\n", strerror(errno));
+ }
+
+ if (pid == 0) {
+ /* Child main routine */
+ res = child_main(pipefd, size);
+ exit(res);
+ }
+
+ /*
+ * Parent main routine:
+ * Wait for the child to finish allocations, then kill and reap
+ */
+ close(pipefd[1]);
+ /* Block until the child is ready */
+ res = read(pipefd[0], &byte, 1);
+ close(pipefd[0]);
+ if (res < 0) {
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("read: %s\n", strerror(errno));
+ }
+
+ pidfd = syscall(__NR_pidfd_open, pid, 0);
+ if (pidfd < 0) {
+ if (!kill(pid, SIGKILL))
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("pidfd_open: %s\n", strerror(errno));
+ }
+
+ /* Run negative tests which require a live child */
+ run_negative_tests(pidfd);
+
+ if (kill(pid, SIGKILL))
+ ksft_exit_fail_msg("kill: %s\n", strerror(errno));
+
+ success = (syscall(__NR_process_mrelease, pidfd, 0) == 0);
+ if (!success) {
+ /*
+ * If we failed to reap because the child exited too soon,
+ * before we could call process_mrelease. Double child's memory
+ * which causes it to spend more time on cleanup and increases
+ * our chances of reaping its memory before it exits.
+ * Retry until we succeed or reach MAX_SIZE_MB.
+ */
+ if (errno == ESRCH) {
+ retry = (size <= MAX_SIZE_MB);
+ } else {
+ waitpid(pid, NULL, 0);
+ ksft_exit_fail_msg("process_mrelease: %s\n", strerror(errno));
+ }
+ }
+
+ /* Cleanup to prevent zombies */
+ if (waitpid(pid, NULL, 0) < 0)
+ ksft_exit_fail_msg("waitpid: %s\n", strerror(errno));
+
+ close(pidfd);
+
+ if (!success) {
+ if (retry) {
+ size *= 2;
+ goto retry;
+ }
+ ksft_exit_fail_msg("All process_mrelease attempts failed!\n");
+ }
+
+ ksft_test_result_pass("Success reaping a child with %zuMB of memory allocations\n",
+ size);
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/vm/mremap_dontunmap.c b/tools/testing/selftests/mm/mremap_dontunmap.c
index 3a7b5ef0b0c6..1d75084b9ca5 100644
--- a/tools/testing/selftests/vm/mremap_dontunmap.c
+++ b/tools/testing/selftests/mm/mremap_dontunmap.c
@@ -7,6 +7,7 @@
*/
#define _GNU_SOURCE
#include <sys/mman.h>
+#include <linux/mman.h>
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
@@ -15,10 +16,6 @@
#include "../kselftest.h"
-#ifndef MREMAP_DONTUNMAP
-#define MREMAP_DONTUNMAP 4
-#endif
-
unsigned long page_size;
char *page_buffer;
@@ -30,14 +27,14 @@ static void dump_maps(void)
system(cmd);
}
-#define BUG_ON(condition, description) \
- do { \
- if (condition) { \
- fprintf(stderr, "[FAIL]\t%s():%d\t%s:%s\n", __func__, \
- __LINE__, (description), strerror(errno)); \
- dump_maps(); \
- exit(1); \
- } \
+#define BUG_ON(condition, description) \
+ do { \
+ if (condition) { \
+ dump_maps(); \
+ ksft_exit_fail_msg("[FAIL]\t%s:%d\t%s:%s\n", \
+ __func__, __LINE__, (description), \
+ strerror(errno)); \
+ } \
} while (0)
// Try a simple operation for to "test" for kernel support this prevents
@@ -125,6 +122,59 @@ static void mremap_dontunmap_simple()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
+}
+
+// This test validates that MREMAP_DONTUNMAP on a shared mapping works as expected.
+static void mremap_dontunmap_simple_shmem()
+{
+ unsigned long num_pages = 5;
+
+ int mem_fd = memfd_create("memfd", MFD_CLOEXEC);
+ BUG_ON(mem_fd < 0, "memfd_create");
+
+ BUG_ON(ftruncate(mem_fd, num_pages * page_size) < 0,
+ "ftruncate");
+
+ void *source_mapping =
+ mmap(NULL, num_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_FILE | MAP_SHARED, mem_fd, 0);
+ BUG_ON(source_mapping == MAP_FAILED, "mmap");
+
+ BUG_ON(close(mem_fd) < 0, "close");
+
+ memset(source_mapping, 'a', num_pages * page_size);
+
+ // Try to just move the whole mapping anywhere (not fixed).
+ void *dest_mapping =
+ mremap(source_mapping, num_pages * page_size, num_pages * page_size,
+ MREMAP_DONTUNMAP | MREMAP_MAYMOVE, NULL);
+ if (dest_mapping == MAP_FAILED && errno == EINVAL) {
+ // Old kernel which doesn't support MREMAP_DONTUNMAP on shmem.
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+ return;
+ }
+
+ BUG_ON(dest_mapping == MAP_FAILED, "mremap");
+
+ // Validate that the pages have been moved, we know they were moved if
+ // the dest_mapping contains a's.
+ BUG_ON(check_region_contains_byte
+ (dest_mapping, num_pages * page_size, 'a') != 0,
+ "pages did not migrate");
+
+ // Because the region is backed by shmem, we will actually see the same
+ // memory at the source location still.
+ BUG_ON(check_region_contains_byte
+ (source_mapping, num_pages * page_size, 'a') != 0,
+ "source should have no ptes");
+
+ BUG_ON(munmap(dest_mapping, num_pages * page_size) == -1,
+ "unable to unmap destination mapping");
+ BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
+ "unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates MREMAP_DONTUNMAP will move page tables to a specific
@@ -171,6 +221,7 @@ static void mremap_dontunmap_simple_fixed()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates that we can MREMAP_DONTUNMAP for a portion of an
@@ -221,6 +272,7 @@ static void mremap_dontunmap_partial_mapping()
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, num_pages * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
// This test validates that we can remap over only a portion of a mapping.
@@ -280,19 +332,24 @@ static void mremap_dontunmap_partial_mapping_overwrite(void)
"unable to unmap destination mapping");
BUG_ON(munmap(source_mapping, 5 * page_size) == -1,
"unable to unmap source mapping");
+ ksft_test_result_pass("%s\n", __func__);
}
int main(void)
{
+ ksft_print_header();
+
page_size = sysconf(_SC_PAGE_SIZE);
// test for kernel support for MREMAP_DONTUNMAP skipping the test if
// not.
if (kernel_support_for_mremap_dontunmap() != 0) {
- printf("No kernel support for MREMAP_DONTUNMAP\n");
- return KSFT_SKIP;
+ ksft_print_msg("No kernel support for MREMAP_DONTUNMAP\n");
+ ksft_finished();
}
+ ksft_set_plan(5);
+
// Keep a page sized buffer around for when we need it.
page_buffer =
mmap(NULL, page_size, PROT_READ | PROT_WRITE,
@@ -300,6 +357,7 @@ int main(void)
BUG_ON(page_buffer == MAP_FAILED, "unable to mmap a page.");
mremap_dontunmap_simple();
+ mremap_dontunmap_simple_shmem();
mremap_dontunmap_simple_fixed();
mremap_dontunmap_partial_mapping();
mremap_dontunmap_partial_mapping_overwrite();
@@ -307,6 +365,5 @@ int main(void)
BUG_ON(munmap(page_buffer, page_size) == -1,
"unable to unmap page buffer");
- printf("OK\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c
new file mode 100644
index 000000000000..2f8b991f78cb
--- /dev/null
+++ b/tools/testing/selftests/mm/mremap_test.c
@@ -0,0 +1,762 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2020 Google LLC
+ */
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <stdbool.h>
+
+#include "../kselftest.h"
+
+#define EXPECT_SUCCESS 0
+#define EXPECT_FAILURE 1
+#define NON_OVERLAPPING 0
+#define OVERLAPPING 1
+#define NS_PER_SEC 1000000000ULL
+#define VALIDATION_DEFAULT_THRESHOLD 4 /* 4MB */
+#define VALIDATION_NO_THRESHOLD 0 /* Verify the entire region */
+
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
+#define SIZE_MB(m) ((size_t)m * (1024 * 1024))
+#define SIZE_KB(k) ((size_t)k * 1024)
+
+struct config {
+ unsigned long long src_alignment;
+ unsigned long long dest_alignment;
+ unsigned long long region_size;
+ int overlapping;
+ int dest_preamble_size;
+};
+
+struct test {
+ const char *name;
+ struct config config;
+ int expect_failure;
+};
+
+enum {
+ _1KB = 1ULL << 10, /* 1KB -> not page aligned */
+ _4KB = 4ULL << 10,
+ _8KB = 8ULL << 10,
+ _1MB = 1ULL << 20,
+ _2MB = 2ULL << 20,
+ _4MB = 4ULL << 20,
+ _5MB = 5ULL << 20,
+ _1GB = 1ULL << 30,
+ _2GB = 2ULL << 30,
+ PMD = _2MB,
+ PUD = _1GB,
+};
+
+#define PTE page_size
+
+#define MAKE_TEST(source_align, destination_align, size, \
+ overlaps, should_fail, test_name) \
+(struct test){ \
+ .name = test_name, \
+ .config = { \
+ .src_alignment = source_align, \
+ .dest_alignment = destination_align, \
+ .region_size = size, \
+ .overlapping = overlaps, \
+ }, \
+ .expect_failure = should_fail \
+}
+
+/*
+ * Returns false if the requested remap region overlaps with an
+ * existing mapping (e.g text, stack) else returns true.
+ */
+static bool is_remap_region_valid(void *addr, unsigned long long size)
+{
+ void *remap_addr = NULL;
+ bool ret = true;
+
+ /* Use MAP_FIXED_NOREPLACE flag to ensure region is not mapped */
+ remap_addr = mmap(addr, size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+
+ if (remap_addr == MAP_FAILED) {
+ if (errno == EEXIST)
+ ret = false;
+ } else {
+ munmap(remap_addr, size);
+ }
+
+ return ret;
+}
+
+/* Returns mmap_min_addr sysctl tunable from procfs */
+static unsigned long long get_mmap_min_addr(void)
+{
+ FILE *fp;
+ int n_matched;
+ static unsigned long long addr;
+
+ if (addr)
+ return addr;
+
+ fp = fopen("/proc/sys/vm/mmap_min_addr", "r");
+ if (fp == NULL) {
+ ksft_print_msg("Failed to open /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ exit(KSFT_SKIP);
+ }
+
+ n_matched = fscanf(fp, "%llu", &addr);
+ if (n_matched != 1) {
+ ksft_print_msg("Failed to read /proc/sys/vm/mmap_min_addr: %s\n",
+ strerror(errno));
+ fclose(fp);
+ exit(KSFT_SKIP);
+ }
+
+ fclose(fp);
+ return addr;
+}
+
+/*
+ * Using /proc/self/maps, assert that the specified address range is contained
+ * within a single mapping.
+ */
+static bool is_range_mapped(FILE *maps_fp, void *start, void *end)
+{
+ char *line = NULL;
+ size_t len = 0;
+ bool success = false;
+
+ rewind(maps_fp);
+
+ while (getline(&line, &len, maps_fp) != -1) {
+ char *first = strtok(line, "- ");
+ void *first_val = (void *)strtol(first, NULL, 16);
+ char *second = strtok(NULL, "- ");
+ void *second_val = (void *) strtol(second, NULL, 16);
+
+ if (first_val <= start && second_val >= end) {
+ success = true;
+ break;
+ }
+ }
+
+ return success;
+}
+
+/*
+ * Returns the start address of the mapping on success, else returns
+ * NULL on failure.
+ */
+static void *get_source_mapping(struct config c)
+{
+ unsigned long long addr = 0ULL;
+ void *src_addr = NULL;
+ unsigned long long mmap_min_addr;
+
+ mmap_min_addr = get_mmap_min_addr();
+ /*
+ * For some tests, we need to not have any mappings below the
+ * source mapping. Add some headroom to mmap_min_addr for this.
+ */
+ mmap_min_addr += 10 * _4MB;
+
+retry:
+ addr += c.src_alignment;
+ if (addr < mmap_min_addr)
+ goto retry;
+
+ src_addr = mmap((void *) addr, c.region_size, PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (src_addr == MAP_FAILED) {
+ if (errno == EPERM || errno == EEXIST)
+ goto retry;
+ goto error;
+ }
+ /*
+ * Check that the address is aligned to the specified alignment.
+ * Addresses which have alignments that are multiples of that
+ * specified are not considered valid. For instance, 1GB address is
+ * 2MB-aligned, however it will not be considered valid for a
+ * requested alignment of 2MB. This is done to reduce coincidental
+ * alignment in the tests.
+ */
+ if (((unsigned long long) src_addr & (c.src_alignment - 1)) ||
+ !((unsigned long long) src_addr & c.src_alignment)) {
+ munmap(src_addr, c.region_size);
+ goto retry;
+ }
+
+ if (!src_addr)
+ goto error;
+
+ return src_addr;
+error:
+ ksft_print_msg("Failed to map source region: %s\n",
+ strerror(errno));
+ return NULL;
+}
+
+/*
+ * This test validates that merge is called when expanding a mapping.
+ * Mapping containing three pages is created, middle page is unmapped
+ * and then the mapping containing the first page is expanded so that
+ * it fills the created hole. The two parts should merge creating
+ * single mapping with three pages.
+ */
+static void mremap_expand_merge(FILE *maps_fp, unsigned long page_size)
+{
+ char *test_name = "mremap expand merge";
+ bool success = false;
+ char *remap, *start;
+
+ start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (start == MAP_FAILED) {
+ ksft_print_msg("mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ munmap(start + page_size, page_size);
+ remap = mremap(start, page_size, 2 * page_size, 0);
+ if (remap == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ munmap(start, page_size);
+ munmap(start + 2 * page_size, page_size);
+ goto out;
+ }
+
+ success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ munmap(start, 3 * page_size);
+
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+/*
+ * Similar to mremap_expand_merge() except instead of removing the middle page,
+ * we remove the last then attempt to remap offset from the second page. This
+ * should result in the mapping being restored to its former state.
+ */
+static void mremap_expand_merge_offset(FILE *maps_fp, unsigned long page_size)
+{
+
+ char *test_name = "mremap expand merge offset";
+ bool success = false;
+ char *remap, *start;
+
+ start = mmap(NULL, 3 * page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+
+ if (start == MAP_FAILED) {
+ ksft_print_msg("mmap failed: %s\n", strerror(errno));
+ goto out;
+ }
+
+ /* Unmap final page to ensure we have space to expand. */
+ munmap(start + 2 * page_size, page_size);
+ remap = mremap(start + page_size, page_size, 2 * page_size, 0);
+ if (remap == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ munmap(start, 2 * page_size);
+ goto out;
+ }
+
+ success = is_range_mapped(maps_fp, start, start + 3 * page_size);
+ munmap(start, 3 * page_size);
+
+out:
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+/*
+ * Verify that an mremap within a range does not cause corruption
+ * of unrelated part of range.
+ *
+ * Consider the following range which is 2MB aligned and is
+ * a part of a larger 20MB range which is not shown. Each
+ * character is 256KB below making the source and destination
+ * 2MB each. The lower case letters are moved (s to d) and the
+ * upper case letters are not moved. The below test verifies
+ * that the upper case S letters are not corrupted by the
+ * adjacent mremap.
+ *
+ * |DDDDddddSSSSssss|
+ */
+static void mremap_move_within_range(char pattern_seed)
+{
+ char *test_name = "mremap mremap move within range";
+ void *src, *dest;
+ int i, success = 1;
+
+ size_t size = SIZE_MB(20);
+ void *ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (ptr == MAP_FAILED) {
+ perror("mmap");
+ success = 0;
+ goto out;
+ }
+ memset(ptr, 0, size);
+
+ src = ptr + SIZE_MB(6);
+ src = (void *)((unsigned long)src & ~(SIZE_MB(2) - 1));
+
+ /* Set byte pattern for source block. */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(2); i++) {
+ ((char *)src)[i] = (char) rand();
+ }
+
+ dest = src - SIZE_MB(2);
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
+ }
+
+out:
+ if (munmap(ptr, size) == -1)
+ perror("munmap");
+
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+/* Returns the time taken for the remap on success else returns -1. */
+static long long remap_region(struct config c, unsigned int threshold_mb,
+ char pattern_seed)
+{
+ void *addr, *src_addr, *dest_addr, *dest_preamble_addr;
+ int d;
+ unsigned long long t;
+ struct timespec t_start = {0, 0}, t_end = {0, 0};
+ long long start_ns, end_ns, align_mask, ret, offset;
+ unsigned long long threshold;
+
+ if (threshold_mb == VALIDATION_NO_THRESHOLD)
+ threshold = c.region_size;
+ else
+ threshold = MIN(threshold_mb * _1MB, c.region_size);
+
+ src_addr = get_source_mapping(c);
+ if (!src_addr) {
+ ret = -1;
+ goto out;
+ }
+
+ /* Set byte pattern for source block. */
+ srand(pattern_seed);
+ for (t = 0; t < threshold; t++)
+ memset((char *) src_addr + t, (char) rand(), 1);
+
+ /* Mask to zero out lower bits of address for alignment */
+ align_mask = ~(c.dest_alignment - 1);
+ /* Offset of destination address from the end of the source region */
+ offset = (c.overlapping) ? -c.dest_alignment : c.dest_alignment;
+ addr = (void *) (((unsigned long long) src_addr + c.region_size
+ + offset) & align_mask);
+
+ /* Remap after the destination block preamble. */
+ addr += c.dest_preamble_size;
+
+ /* See comment in get_source_mapping() */
+ if (!((unsigned long long) addr & c.dest_alignment))
+ addr = (void *) ((unsigned long long) addr | c.dest_alignment);
+
+ /* Don't destroy existing mappings unless expected to overlap */
+ while (!is_remap_region_valid(addr, c.region_size) && !c.overlapping) {
+ /* Check for unsigned overflow */
+ if (addr + c.dest_alignment < addr) {
+ ksft_print_msg("Couldn't find a valid region to remap to\n");
+ ret = -1;
+ goto clean_up_src;
+ }
+ addr += c.dest_alignment;
+ }
+
+ if (c.dest_preamble_size) {
+ dest_preamble_addr = mmap((void *) addr - c.dest_preamble_size, c.dest_preamble_size,
+ PROT_READ | PROT_WRITE,
+ MAP_FIXED_NOREPLACE | MAP_ANONYMOUS | MAP_SHARED,
+ -1, 0);
+ if (dest_preamble_addr == MAP_FAILED) {
+ ksft_print_msg("Failed to map dest preamble region: %s\n",
+ strerror(errno));
+ ret = -1;
+ goto clean_up_src;
+ }
+
+ /* Set byte pattern for the dest preamble block. */
+ srand(pattern_seed);
+ for (d = 0; d < c.dest_preamble_size; d++)
+ memset((char *) dest_preamble_addr + d, (char) rand(), 1);
+ }
+
+ clock_gettime(CLOCK_MONOTONIC, &t_start);
+ dest_addr = mremap(src_addr, c.region_size, c.region_size,
+ MREMAP_MAYMOVE|MREMAP_FIXED, (char *) addr);
+ clock_gettime(CLOCK_MONOTONIC, &t_end);
+
+ if (dest_addr == MAP_FAILED) {
+ ksft_print_msg("mremap failed: %s\n", strerror(errno));
+ ret = -1;
+ goto clean_up_dest_preamble;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (t = 0; t < threshold; t++) {
+ char c = (char) rand();
+
+ if (((char *) dest_addr)[t] != c) {
+ ksft_print_msg("Data after remap doesn't match at offset %llu\n",
+ t);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) dest_addr)[t] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+
+ /* Verify the dest preamble byte pattern after remapping */
+ if (c.dest_preamble_size) {
+ srand(pattern_seed);
+ for (d = 0; d < c.dest_preamble_size; d++) {
+ char c = (char) rand();
+
+ if (((char *) dest_preamble_addr)[d] != c) {
+ ksft_print_msg("Preamble data after remap doesn't match at offset %d\n",
+ d);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) dest_preamble_addr)[d] & 0xff);
+ ret = -1;
+ goto clean_up_dest;
+ }
+ }
+ }
+
+ start_ns = t_start.tv_sec * NS_PER_SEC + t_start.tv_nsec;
+ end_ns = t_end.tv_sec * NS_PER_SEC + t_end.tv_nsec;
+ ret = end_ns - start_ns;
+
+/*
+ * Since the destination address is specified using MREMAP_FIXED, subsequent
+ * mremap will unmap any previous mapping at the address range specified by
+ * dest_addr and region_size. This significantly affects the remap time of
+ * subsequent tests. So we clean up mappings after each test.
+ */
+clean_up_dest:
+ munmap(dest_addr, c.region_size);
+clean_up_dest_preamble:
+ if (c.dest_preamble_size && dest_preamble_addr)
+ munmap(dest_preamble_addr, c.dest_preamble_size);
+clean_up_src:
+ munmap(src_addr, c.region_size);
+out:
+ return ret;
+}
+
+/*
+ * Verify that an mremap aligning down does not destroy
+ * the beginning of the mapping just because the aligned
+ * down address landed on a mapping that maybe does not exist.
+ */
+static void mremap_move_1mb_from_start(char pattern_seed)
+{
+ char *test_name = "mremap move 1mb from start at 1MB+256KB aligned src";
+ void *src = NULL, *dest = NULL;
+ int i, success = 1;
+
+ /* Config to reuse get_source_mapping() to do an aligned mmap. */
+ struct config c = {
+ .src_alignment = SIZE_MB(1) + SIZE_KB(256),
+ .region_size = SIZE_MB(6)
+ };
+
+ src = get_source_mapping(c);
+ if (!src) {
+ success = 0;
+ goto out;
+ }
+
+ c.src_alignment = SIZE_MB(1) + SIZE_KB(256);
+ dest = get_source_mapping(c);
+ if (!dest) {
+ success = 0;
+ goto out;
+ }
+
+ /* Set byte pattern for source block. */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(2); i++) {
+ ((char *)src)[i] = (char) rand();
+ }
+
+ /*
+ * Unmap the beginning of dest so that the aligned address
+ * falls on no mapping.
+ */
+ munmap(dest, SIZE_MB(1));
+
+ void *new_ptr = mremap(src + SIZE_MB(1), SIZE_MB(1), SIZE_MB(1),
+ MREMAP_MAYMOVE | MREMAP_FIXED, dest + SIZE_MB(1));
+ if (new_ptr == MAP_FAILED) {
+ perror("mremap");
+ success = 0;
+ goto out;
+ }
+
+ /* Verify byte pattern after remapping */
+ srand(pattern_seed);
+ for (i = 0; i < SIZE_MB(1); i++) {
+ char c = (char) rand();
+
+ if (((char *)src)[i] != c) {
+ ksft_print_msg("Data at src at %d got corrupted due to unrelated mremap\n",
+ i);
+ ksft_print_msg("Expected: %#x\t Got: %#x\n", c & 0xff,
+ ((char *) src)[i] & 0xff);
+ success = 0;
+ }
+ }
+
+out:
+ if (src && munmap(src, c.region_size) == -1)
+ perror("munmap src");
+
+ if (dest && munmap(dest, c.region_size) == -1)
+ perror("munmap dest");
+
+ if (success)
+ ksft_test_result_pass("%s\n", test_name);
+ else
+ ksft_test_result_fail("%s\n", test_name);
+}
+
+static void run_mremap_test_case(struct test test_case, int *failures,
+ unsigned int threshold_mb,
+ unsigned int pattern_seed)
+{
+ long long remap_time = remap_region(test_case.config, threshold_mb,
+ pattern_seed);
+
+ if (remap_time < 0) {
+ if (test_case.expect_failure)
+ ksft_test_result_xfail("%s\n\tExpected mremap failure\n",
+ test_case.name);
+ else {
+ ksft_test_result_fail("%s\n", test_case.name);
+ *failures += 1;
+ }
+ } else {
+ /*
+ * Comparing mremap time is only applicable if entire region
+ * was faulted in.
+ */
+ if (threshold_mb == VALIDATION_NO_THRESHOLD ||
+ test_case.config.region_size <= threshold_mb * _1MB)
+ ksft_test_result_pass("%s\n\tmremap time: %12lldns\n",
+ test_case.name, remap_time);
+ else
+ ksft_test_result_pass("%s\n", test_case.name);
+ }
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage: %s [[-t <threshold_mb>] [-p <pattern_seed>]]\n"
+ "-t\t only validate threshold_mb of the remapped region\n"
+ " \t if 0 is supplied no threshold is used; all tests\n"
+ " \t are run and remapped regions validated fully.\n"
+ " \t The default threshold used is 4MB.\n"
+ "-p\t provide a seed to generate the random pattern for\n"
+ " \t validating the remapped region.\n", cmd);
+}
+
+static int parse_args(int argc, char **argv, unsigned int *threshold_mb,
+ unsigned int *pattern_seed)
+{
+ const char *optstr = "t:p:";
+ int opt;
+
+ while ((opt = getopt(argc, argv, optstr)) != -1) {
+ switch (opt) {
+ case 't':
+ *threshold_mb = atoi(optarg);
+ break;
+ case 'p':
+ *pattern_seed = atoi(optarg);
+ break;
+ default:
+ usage(argv[0]);
+ return -1;
+ }
+ }
+
+ if (optind < argc) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ return 0;
+}
+
+#define MAX_TEST 15
+#define MAX_PERF_TEST 3
+int main(int argc, char **argv)
+{
+ int failures = 0;
+ int i, run_perf_tests;
+ unsigned int threshold_mb = VALIDATION_DEFAULT_THRESHOLD;
+ unsigned int pattern_seed;
+ int num_expand_tests = 2;
+ int num_misc_tests = 2;
+ struct test test_cases[MAX_TEST] = {};
+ struct test perf_test_cases[MAX_PERF_TEST];
+ int page_size;
+ time_t t;
+ FILE *maps_fp;
+
+ pattern_seed = (unsigned int) time(&t);
+
+ if (parse_args(argc, argv, &threshold_mb, &pattern_seed) < 0)
+ exit(EXIT_FAILURE);
+
+ ksft_print_msg("Test configs:\n\tthreshold_mb=%u\n\tpattern_seed=%u\n\n",
+ threshold_mb, pattern_seed);
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ /* Expected mremap failures */
+ test_cases[0] = MAKE_TEST(page_size, page_size, page_size,
+ OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source and Destination Regions Overlapping");
+
+ test_cases[1] = MAKE_TEST(page_size, page_size/4, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Destination Address Misaligned (1KB-aligned)");
+ test_cases[2] = MAKE_TEST(page_size/4, page_size, page_size,
+ NON_OVERLAPPING, EXPECT_FAILURE,
+ "mremap - Source Address Misaligned (1KB-aligned)");
+
+ /* Src addr PTE aligned */
+ test_cases[3] = MAKE_TEST(PTE, PTE, PTE * 2,
+ NON_OVERLAPPING, EXPECT_SUCCESS,
+ "8KB mremap - Source PTE-aligned, Destination PTE-aligned");
+
+ /* Src addr 1MB aligned */
+ test_cases[4] = MAKE_TEST(_1MB, PTE, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination PTE-aligned");
+ test_cases[5] = MAKE_TEST(_1MB, _1MB, _2MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src addr PMD aligned */
+ test_cases[6] = MAKE_TEST(PMD, PTE, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PTE-aligned");
+ test_cases[7] = MAKE_TEST(PMD, _1MB, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination 1MB-aligned");
+ test_cases[8] = MAKE_TEST(PMD, PMD, _4MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "4MB mremap - Source PMD-aligned, Destination PMD-aligned");
+
+ /* Src addr PUD aligned */
+ test_cases[9] = MAKE_TEST(PUD, PTE, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PTE-aligned");
+ test_cases[10] = MAKE_TEST(PUD, _1MB, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination 1MB-aligned");
+ test_cases[11] = MAKE_TEST(PUD, PMD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PMD-aligned");
+ test_cases[12] = MAKE_TEST(PUD, PUD, _2GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "2GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[13] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Destination 1MB-aligned");
+
+ /* Src and Dest addr 1MB aligned. 5MB mremap. */
+ test_cases[14] = MAKE_TEST(_1MB, _1MB, _5MB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "5MB mremap - Source 1MB-aligned, Dest 1MB-aligned with 40MB Preamble");
+ test_cases[14].config.dest_preamble_size = 10 * _4MB;
+
+ perf_test_cases[0] = MAKE_TEST(page_size, page_size, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PTE-aligned, Destination PTE-aligned");
+ /*
+ * mremap 1GB region - Page table level aligned time
+ * comparison.
+ */
+ perf_test_cases[1] = MAKE_TEST(PMD, PMD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PMD-aligned, Destination PMD-aligned");
+ perf_test_cases[2] = MAKE_TEST(PUD, PUD, _1GB, NON_OVERLAPPING, EXPECT_SUCCESS,
+ "1GB mremap - Source PUD-aligned, Destination PUD-aligned");
+
+ run_perf_tests = (threshold_mb == VALIDATION_NO_THRESHOLD) ||
+ (threshold_mb * _1MB >= _1GB);
+
+ ksft_set_plan(ARRAY_SIZE(test_cases) + (run_perf_tests ?
+ ARRAY_SIZE(perf_test_cases) : 0) + num_expand_tests + num_misc_tests);
+
+ for (i = 0; i < ARRAY_SIZE(test_cases); i++)
+ run_mremap_test_case(test_cases[i], &failures, threshold_mb,
+ pattern_seed);
+
+ maps_fp = fopen("/proc/self/maps", "r");
+
+ if (maps_fp == NULL) {
+ ksft_print_msg("Failed to read /proc/self/maps: %s\n", strerror(errno));
+ exit(KSFT_FAIL);
+ }
+
+ mremap_expand_merge(maps_fp, page_size);
+ mremap_expand_merge_offset(maps_fp, page_size);
+
+ fclose(maps_fp);
+
+ mremap_move_within_range(pattern_seed);
+ mremap_move_1mb_from_start(pattern_seed);
+
+ if (run_perf_tests) {
+ ksft_print_msg("\n%s\n",
+ "mremap HAVE_MOVE_PMD/PUD optimization time comparison for 1GB region:");
+ for (i = 0; i < ARRAY_SIZE(perf_test_cases); i++)
+ run_mremap_test_case(perf_test_cases[i], &failures,
+ threshold_mb, pattern_seed);
+ }
+
+ if (failures > 0)
+ ksft_exit_fail();
+ else
+ ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/on-fault-limit.c b/tools/testing/selftests/mm/on-fault-limit.c
new file mode 100644
index 000000000000..431c1277d83a
--- /dev/null
+++ b/tools/testing/selftests/mm/on-fault-limit.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <sys/mman.h>
+#include <stdio.h>
+#include <unistd.h>
+#include <string.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include "../kselftest.h"
+
+static void test_limit(void)
+{
+ struct rlimit lims;
+ void *map;
+
+ if (getrlimit(RLIMIT_MEMLOCK, &lims))
+ ksft_exit_fail_msg("getrlimit: %s\n", strerror(errno));
+
+ if (mlockall(MCL_ONFAULT | MCL_FUTURE))
+ ksft_exit_fail_msg("mlockall: %s\n", strerror(errno));
+
+ map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
+
+ ksft_test_result(map == MAP_FAILED, "The map failed respecting mlock limits\n");
+
+ if (map != MAP_FAILED)
+ munmap(map, 2 * lims.rlim_max);
+ munlockall();
+}
+
+int main(int argc, char **argv)
+{
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ if (!getuid())
+ ksft_test_result_skip("The test must be run from a normal user\n");
+ else
+ test_limit();
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/mm/pagemap_ioctl.c b/tools/testing/selftests/mm/pagemap_ioctl.c
new file mode 100644
index 000000000000..d59517ed3d48
--- /dev/null
+++ b/tools/testing/selftests/mm/pagemap_ioctl.c
@@ -0,0 +1,1664 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
+#include <malloc.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+#include <linux/types.h>
+#include <linux/memfd.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <math.h>
+#include <asm/unistd.h>
+#include <pthread.h>
+#include <sys/resource.h>
+#include <assert.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+
+#define PAGEMAP_BITS_ALL (PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | \
+ PAGE_IS_FILE | PAGE_IS_PRESENT | \
+ PAGE_IS_SWAPPED | PAGE_IS_PFNZERO | \
+ PAGE_IS_HUGE)
+#define PAGEMAP_NON_WRITTEN_BITS (PAGE_IS_WPALLOWED | PAGE_IS_FILE | \
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | \
+ PAGE_IS_PFNZERO | PAGE_IS_HUGE)
+
+#define TEST_ITERATIONS 100
+#define PAGEMAP "/proc/self/pagemap"
+int pagemap_fd;
+int uffd;
+int page_size;
+int hpage_size;
+const char *progname;
+
+#define LEN(region) ((region.end - region.start)/page_size)
+
+static long pagemap_ioctl(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ return ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+}
+
+static long pagemap_ioc(void *start, int len, void *vec, int vec_len, int flag,
+ int max_pages, long required_mask, long anyof_mask, long excluded_mask,
+ long return_mask, long *walk_end)
+{
+ struct pm_scan_arg arg;
+ int ret;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + len);
+ arg.vec = (uintptr_t)vec;
+ arg.vec_len = vec_len;
+ arg.flags = flag;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = max_pages;
+ arg.category_mask = required_mask;
+ arg.category_anyof_mask = anyof_mask;
+ arg.category_inverted = excluded_mask;
+ arg.return_mask = return_mask;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+
+ if (walk_end)
+ *walk_end = arg.walk_end;
+
+ return ret;
+}
+
+
+int init_uffd(void)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY);
+ if (uffd == -1)
+ return uffd;
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = UFFD_FEATURE_WP_UNPOPULATED | UFFD_FEATURE_WP_ASYNC |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ return -1;
+
+ if (!(uffdio_api.api & UFFDIO_REGISTER_MODE_WP) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_UNPOPULATED) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_ASYNC) ||
+ !(uffdio_api.features & UFFD_FEATURE_WP_HUGETLBFS_SHMEM))
+ return -1;
+
+ return 0;
+}
+
+int wp_init(void *lpBaseAddress, int dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_writeprotect wp;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ ksft_exit_fail_msg("ioctl(UFFDIO_REGISTER) %d %s\n", errno, strerror(errno));
+
+ if (!(uffdio_register.ioctls & UFFDIO_WRITEPROTECT))
+ ksft_exit_fail_msg("ioctl set is incorrect\n");
+
+ wp.range.start = (unsigned long)lpBaseAddress;
+ wp.range.len = dwRegionSize;
+ wp.mode = UFFDIO_WRITEPROTECT_MODE_WP;
+
+ if (ioctl(uffd, UFFDIO_WRITEPROTECT, &wp))
+ ksft_exit_fail_msg("ioctl(UFFDIO_WRITEPROTECT)\n");
+
+ return 0;
+}
+
+int wp_free(void *lpBaseAddress, int dwRegionSize)
+{
+ struct uffdio_register uffdio_register;
+
+ uffdio_register.range.start = (unsigned long)lpBaseAddress;
+ uffdio_register.range.len = dwRegionSize;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_WP;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range))
+ ksft_exit_fail_msg("ioctl unregister failure\n");
+ return 0;
+}
+
+int wp_addr_range(void *lpBaseAddress, int dwRegionSize)
+{
+ if (pagemap_ioctl(lpBaseAddress, dwRegionSize, NULL, 0,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", 1, errno, strerror(errno));
+
+ return 0;
+}
+
+void *gethugetlb_mem(int size, int *shmid)
+{
+ char *mem;
+
+ if (shmid) {
+ *shmid = shmget(2, size, SHM_HUGETLB | IPC_CREAT | SHM_R | SHM_W);
+ if (*shmid < 0)
+ return NULL;
+
+ mem = shmat(*shmid, 0, 0);
+ if (mem == (char *)-1) {
+ shmctl(*shmid, IPC_RMID, NULL);
+ ksft_exit_fail_msg("Shared memory attach failure\n");
+ }
+ } else {
+ mem = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_HUGETLB | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ return NULL;
+ }
+
+ return mem;
+}
+
+int userfaultfd_tests(void)
+{
+ int mem_size, vec_size, written, num_pages = 16;
+ char *mem, *vec;
+
+ mem_size = num_pages * page_size;
+ mem = mmap(NULL, mem_size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+
+ /* Change protection of pages differently */
+ mprotect(mem, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 1 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 2 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 3 * mem_size/8, mem_size/8, PROT_READ);
+ mprotect(mem + 4 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 5 * mem_size/8, mem_size/8, PROT_NONE);
+ mprotect(mem + 6 * mem_size/8, mem_size/8, PROT_READ|PROT_WRITE);
+ mprotect(mem + 7 * mem_size/8, mem_size/8, PROT_READ);
+
+ wp_addr_range(mem + (mem_size/16), mem_size - 2 * (mem_size/8));
+ wp_addr_range(mem, mem_size);
+
+ vec_size = mem_size/page_size;
+ vec = malloc(sizeof(struct page_region) * vec_size);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ free(vec);
+ return 0;
+}
+
+int get_reads(struct page_region *vec, int vec_size)
+{
+ int i, sum = 0;
+
+ for (i = 0; i < vec_size; i++)
+ sum += LEN(vec[i]);
+
+ return sum;
+}
+
+int sanity_tests_sd(void)
+{
+ int mem_size, vec_size, ret, ret2, ret3, i, num_pages = 1000, total_pages = 0;
+ int total_writes, total_reads, reads, count;
+ struct page_region *vec, *vec2;
+ char *mem, *m[2];
+ long walk_end;
+
+ vec_size = num_pages/2;
+ mem_size = num_pages * page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec2)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ /* 1. wrong operation */
+ ksft_test_result(pagemap_ioctl(mem, 0, vec, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s Zero range size is valid\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, NULL, vec_size, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) < 0,
+ "%s output buffer must be specified with size\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, 0, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) == 0,
+ "%s output buffer can be 0\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, -1,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s wrong flag specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC | 0xFF,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) < 0,
+ "%s flag has extra bits specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, 0, 0, 0, PAGE_IS_WRITTEN) >= 0,
+ "%s no selection mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, PAGE_IS_WRITTEN, 0, 0) == 0,
+ "%s no return mask is specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, 0x1000) < 0,
+ "%s wrong return mask specified\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0xFFF, PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN) < 0,
+ "%s mixture of correct and wrong flag\n", __func__);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, 0, 0, PAGEMAP_BITS_ALL, PAGE_IS_WRITTEN) >= 0,
+ "%s PAGEMAP_BITS_ALL can be specified with PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n",
+ __func__);
+
+ /* 2. Clear area with larger vec size */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0, "%s Clear area with larger vec size\n", __func__);
+
+ /* 3. Repeated pattern of written and non-written pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN, 0,
+ 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == mem_size/(page_size * 2),
+ "%s Repeated pattern of written and non-written pages\n", __func__);
+
+ /* 4. Repeated pattern of written and non-written pages in parts */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2 - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, 2, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ret3 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret3 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret3, errno, strerror(errno));
+
+ ksft_test_result((ret + ret3) == num_pages/2 && ret2 == 2,
+ "%s Repeated pattern of written and non-written pages in parts %d %d %d\n",
+ __func__, ret, ret3, ret2);
+
+ /* 5. Repeated pattern of written and non-written pages max_pages */
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+ mem[(mem_size/page_size - 1) * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ num_pages/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == num_pages/2 && ret2 == 1,
+ "%s Repeated pattern of written and non-written pages max_pages\n",
+ __func__);
+
+ /* 6. only get 2 dirty pages and clear them as well */
+ vec_size = mem_size/page_size;
+ memset(mem, -1, mem_size);
+
+ /* get and clear second and third pages */
+ ret = pagemap_ioctl(mem + page_size, 2 * page_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret2 = pagemap_ioctl(mem, mem_size, vec2, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == 2 &&
+ vec[0].start == (uintptr_t)(mem + page_size) &&
+ ret2 == 2 && LEN(vec2[0]) == 1 && vec2[0].start == (uintptr_t)mem &&
+ LEN(vec2[1]) == vec_size - 3 &&
+ vec2[1].start == (uintptr_t)(mem + 3 * page_size),
+ "%s only get 2 written pages and clear them as well\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 7. Two regions */
+ m[0] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[0] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ m[1] = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (m[1] == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(m[0], mem_size);
+ wp_init(m[1], mem_size);
+ wp_addr_range(m[0], mem_size);
+ wp_addr_range(m[1], mem_size);
+
+ memset(m[0], 'a', mem_size);
+ memset(m[1], 'b', mem_size);
+
+ wp_addr_range(m[0], mem_size);
+
+ ret = pagemap_ioctl(m[1], mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s Two regions\n", __func__);
+
+ wp_free(m[0], mem_size);
+ wp_free(m[1], mem_size);
+ munmap(m[0], mem_size);
+ munmap(m[1], mem_size);
+
+ free(vec);
+ free(vec2);
+
+ /* 8. Smaller vec */
+ mem_size = 1050 * page_size;
+ vec_size = mem_size/(page_size*2);
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ for (i = 0; i < mem_size/page_size; i += 2)
+ mem[i * page_size]++;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ mem_size/(page_size*5), PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ total_pages += ret;
+
+ ksft_test_result(total_pages == mem_size/(page_size*2), "%s Smaller max_pages\n", __func__);
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+ total_pages = 0;
+
+ /* 9. Smaller vec */
+ mem_size = 10000 * page_size;
+ vec_size = 50;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ for (count = 0; count < TEST_ITERATIONS; count++) {
+ total_writes = total_reads = 0;
+ walk_end = (long)mem;
+
+ for (i = 0; i < mem_size; i += page_size) {
+ if (rand() % 2) {
+ mem[i]++;
+ total_writes++;
+ }
+ }
+
+ while (total_reads < total_writes) {
+ ret = pagemap_ioc((void *)walk_end, mem_size-(walk_end - (long)mem), vec,
+ vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ if (ret > vec_size)
+ break;
+
+ reads = get_reads(vec, ret);
+ total_reads += reads;
+ }
+
+ if (total_reads != total_writes)
+ break;
+ }
+
+ ksft_test_result(count == TEST_ITERATIONS, "Smaller vec\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 10. Walk_end tester */
+ vec_size = 1000;
+ mem_size = vec_size * page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec)
+ ksft_exit_fail_msg("error nomem\n");
+
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end address\n");
+
+ ret = pagemap_ioc(mem, 0, vec, vec_size, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with WP\n");
+
+ ret = pagemap_ioc(mem, 0, vec, 0, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 0 && walk_end == (long)mem,
+ "Walk_end: Same start and end with 0 output buffer\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size/2),
+ "Walk_end: Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size),
+ "Walk_end: 1 max page\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ -1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + mem_size),
+ "Walk_end: max pages\n");
+
+ wp_addr_range(mem, mem_size);
+ for (i = 0; i < mem_size; i += 2 * page_size)
+ mem[i]++;
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Big vec\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: vec of minimum length\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, 1, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size/2, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_end sparse: Max pages specified\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == vec_size/2 && walk_end == (long)(mem + mem_size),
+ "Walk_endsparse : Half max pages\n");
+
+ ret = pagemap_ioc(mem, mem_size, vec, vec_size, 0,
+ 1, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN, &walk_end);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+ ksft_test_result(ret == 1 && walk_end == (long)(mem + page_size * 2),
+ "Walk_end: 1 max page\n");
+
+ free(vec);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ return 0;
+}
+
+int base_tests(char *prefix, char *mem, int mem_size, int skip)
+{
+ int vec_size, written;
+ struct page_region *vec, *vec2;
+
+ if (skip) {
+ ksft_test_result_skip("%s all new pages must not be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages must be written (dirty)\n", prefix);
+ ksft_test_result_skip("%s all pages dirty other than first and the last one\n",
+ prefix);
+ ksft_test_result_skip("%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+ ksft_test_result_skip("%s only middle page dirty\n", prefix);
+ ksft_test_result_skip("%s only two middle pages dirty\n", prefix);
+ return 0;
+ }
+
+ vec_size = mem_size/page_size;
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+
+ /* 1. all new pages must be not be written (dirty) */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size - 2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0, "%s all new pages must not be written (dirty)\n", prefix);
+
+ /* 2. all pages must be written */
+ memset(mem, -1, mem_size);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0, PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) == mem_size/page_size,
+ "%s all pages must be written (dirty)\n", prefix);
+
+ /* 3. all pages dirty other than first and the last one */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ memset(mem + page_size, 0, mem_size - (2 * page_size));
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= vec_size - 2 && LEN(vec[0]) <= vec_size,
+ "%s all pages dirty other than first and the last one\n", prefix);
+
+ written = pagemap_ioctl(mem, mem_size, vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 0,
+ "%s PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC\n", prefix);
+
+ /* 4. only middle page dirty */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+
+ written = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && LEN(vec[0]) >= 1,
+ "%s only middle page dirty\n", prefix);
+
+ /* 5. only two middle pages dirty and walk over only middle pages */
+ written = pagemap_ioctl(mem, mem_size, vec, 1, PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ mem[vec_size/2 * page_size]++;
+ mem[(vec_size/2 + 1) * page_size]++;
+
+ written = pagemap_ioctl(&mem[vec_size/2 * page_size], 2 * page_size, vec, 1, 0,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN | PAGE_IS_HUGE);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written == 1 && vec[0].start == (uintptr_t)(&mem[vec_size/2 * page_size])
+ && LEN(vec[0]) == 2,
+ "%s only two middle pages dirty\n", prefix);
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+void *gethugepage(int map_size)
+{
+ int ret;
+ char *map;
+
+ map = memalign(hpage_size, map_size);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed %d %s\n", errno, strerror(errno));
+
+ ret = madvise(map, map_size, MADV_HUGEPAGE);
+ if (ret)
+ return NULL;
+
+ memset(map, 0, map_size);
+
+ return map;
+}
+
+int hpage_unit_tests(void)
+{
+ char *map;
+ int ret, ret2;
+ size_t num_pages = 10;
+ int map_size = hpage_size * num_pages;
+ int vec_size = map_size/page_size;
+ struct page_region *vec, *vec2;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ vec2 = malloc(sizeof(struct page_region) * vec_size);
+ if (!vec || !vec2)
+ ksft_exit_fail_msg("malloc failed\n");
+
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ /* 1. all new huge page must not be written (dirty) */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all new huge page must not be written (dirty)\n",
+ __func__);
+
+ /* 2. all the huge page must not be written */
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 0, "%s all the huge page must not be written\n", __func__);
+
+ /* 3. all the huge page must be written and clear dirty as well */
+ memset(map, -1, map_size);
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && vec[0].start == (uintptr_t)map &&
+ LEN(vec[0]) == vec_size && vec[0].categories == PAGE_IS_WRITTEN,
+ "%s all the huge page must be written and clear\n", __func__);
+
+ /* 4. only middle page written */
+ wp_free(map, map_size);
+ free(map);
+ map = gethugepage(map_size);
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+ map[vec_size/2 * page_size]++;
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) > 0,
+ "%s only middle page written\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s all new huge page must be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must not be written\n", __func__);
+ ksft_test_result_skip("%s all the huge page must be written and clear\n", __func__);
+ ksft_test_result_skip("%s only middle page written\n", __func__);
+ }
+
+ /* 5. clear first half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ wp_addr_range(map, map_size/2);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page\n", __func__);
+ }
+
+ /* 6. clear first half of huge page with limited buffer */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, 0, map_size);
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ vec_size/2, PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2 &&
+ vec[0].start == (uintptr_t)(map + map_size/2),
+ "%s clear first half of huge page with limited buffer\n",
+ __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear first half of huge page with limited buffer\n",
+ __func__);
+ }
+
+ /* 7. clear second half of huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+
+ ret = pagemap_ioctl(map + map_size/2, map_size/2, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, vec_size/2,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ret = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == vec_size/2,
+ "%s clear second half huge page\n", __func__);
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s clear second half huge page\n", __func__);
+ }
+
+ /* 8. get half huge page */
+ map = gethugepage(map_size);
+ if (map) {
+ wp_init(map, map_size);
+ wp_addr_range(map, map_size);
+
+ memset(map, -1, map_size);
+ usleep(100);
+
+ ret = pagemap_ioctl(map, map_size, vec, 1,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ hpage_size/(2*page_size), PAGE_IS_WRITTEN, 0, 0,
+ PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec[0]) == hpage_size/(2*page_size),
+ "%s get half huge page\n", __func__);
+
+ ret2 = pagemap_ioctl(map, map_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN);
+ if (ret2 < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret2, errno, strerror(errno));
+
+ ksft_test_result(ret2 == 1 && LEN(vec[0]) == (map_size - hpage_size/2)/page_size,
+ "%s get half huge page\n", __func__);
+
+ wp_free(map, map_size);
+ free(map);
+ } else {
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ ksft_test_result_skip("%s get half huge page\n", __func__);
+ }
+
+ free(vec);
+ free(vec2);
+ return 0;
+}
+
+int unmapped_region_tests(void)
+{
+ void *start = (void *)0x10000000;
+ int written, len = 0x00040000;
+ int vec_size = len / page_size;
+ struct page_region *vec = malloc(sizeof(struct page_region) * vec_size);
+
+ /* 1. Get written pages */
+ written = pagemap_ioctl(start, len, vec, vec_size, 0, 0,
+ PAGEMAP_NON_WRITTEN_BITS, 0, 0, PAGEMAP_NON_WRITTEN_BITS);
+ if (written < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", written, errno, strerror(errno));
+
+ ksft_test_result(written >= 0, "%s Get status of pages\n", __func__);
+
+ free(vec);
+ return 0;
+}
+
+static void test_simple(void)
+{
+ int i;
+ char *map;
+ struct page_region vec;
+
+ map = aligned_alloc(page_size, page_size);
+ if (!map)
+ ksft_exit_fail_msg("aligned_alloc failed\n");
+
+ wp_init(map, page_size);
+ wp_addr_range(map, page_size);
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 1) {
+ ksft_print_msg("written bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ /* Write something to the page to get the written bit enabled on the page */
+ map[0]++;
+
+ if (pagemap_ioctl(map, page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0) {
+ ksft_print_msg("written bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ wp_addr_range(map, page_size);
+ }
+ wp_free(map, page_size);
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+int sanity_tests(void)
+{
+ int mem_size, vec_size, ret, fd, i, buf_size;
+ struct page_region *vec;
+ char *mem, *fmem;
+ struct stat sbuf;
+ char *tmp_buf;
+
+ /* 1. wrong operation */
+ mem_size = 10 * page_size;
+ vec_size = mem_size / page_size;
+
+ vec = malloc(sizeof(struct page_region) * vec_size);
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED || vec == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC,
+ 0, PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s WP op can be specified with !PAGE_IS_WRITTEN\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL) >= 0,
+ "%s anyof_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, 0, PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL) >= 0,
+ "%s excluded_mask specified\n", __func__);
+ ksft_test_result(pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, PAGEMAP_BITS_ALL, 0,
+ PAGEMAP_BITS_ALL) >= 0,
+ "%s required_mask and anyof_mask specified\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 2. Get sd and present pages with anyof_mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with anyof_mask\n", __func__);
+
+ /* 3. Get sd and present pages with required_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGEMAP_BITS_ALL, 0, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get all the pages with required_mask\n", __func__);
+
+ /* 4. Get sd and present pages with required_mask and anyof_mask */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT, 0, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ (vec[0].categories & (PAGE_IS_WRITTEN | PAGE_IS_PRESENT)) ==
+ (PAGE_IS_WRITTEN | PAGE_IS_PRESENT),
+ "%s Get sd and present pages with required_mask and anyof_mask\n",
+ __func__);
+
+ /* 5. Don't get sd pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, 0, PAGE_IS_WRITTEN, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get sd pages\n", __func__);
+
+ /* 6. Don't get present pages */
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size, 0, 0,
+ PAGE_IS_PRESENT, 0, PAGE_IS_PRESENT, PAGEMAP_BITS_ALL);
+ ksft_test_result(ret == 0, "%s Don't get present pages\n", __func__);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 8. Find written present pages with return mask */
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ memset(mem, 0, mem_size);
+
+ ret = pagemap_ioctl(mem, mem_size, vec, vec_size,
+ PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC, 0,
+ 0, PAGEMAP_BITS_ALL, 0, PAGE_IS_WRITTEN);
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)mem && LEN(vec[0]) == vec_size &&
+ vec[0].categories == PAGE_IS_WRITTEN,
+ "%s Find written present pages with return mask\n", __func__);
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 9. Memory mapped file */
+ fd = open(progname, O_RDONLY);
+ if (fd < 0)
+ ksft_exit_fail_msg("%s Memory mapped file\n", __func__);
+
+ ret = stat(progname, &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ tmp_buf = malloc(sbuf.st_size);
+ memcpy(tmp_buf, fmem, sbuf.st_size);
+
+ ret = pagemap_ioctl(fmem, sbuf.st_size, vec, vec_size, 0, 0,
+ 0, PAGEMAP_NON_WRITTEN_BITS, 0, PAGEMAP_NON_WRITTEN_BITS);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == ceilf((float)sbuf.st_size/page_size) &&
+ (vec[0].categories & PAGE_IS_FILE),
+ "%s Memory mapped file\n", __func__);
+
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 10. Create and read/write to a memory mapped file */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp2", O_RDWR | O_CREAT, 0666);
+ if (fd < 0)
+ ksft_exit_fail_msg("Read/write to memory: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ for (i = 0; i < buf_size; i++)
+ fmem[i] = 'z';
+
+ msync(fmem, buf_size, MS_SYNC);
+
+ ret = pagemap_ioctl(fmem, buf_size, vec, vec_size, 0, 0,
+ PAGE_IS_WRITTEN, PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_FILE, 0,
+ PAGEMAP_BITS_ALL);
+
+ ksft_test_result(ret >= 0 && vec[0].start == (uintptr_t)fmem &&
+ LEN(vec[0]) == (buf_size/page_size) &&
+ (vec[0].categories & PAGE_IS_WRITTEN),
+ "%s Read/write to memory\n", __func__);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ free(vec);
+ return 0;
+}
+
+int mprotect_tests(void)
+{
+ int ret;
+ char *mem, *mem2;
+ struct page_region vec;
+ int pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (pagemap_fd < 0) {
+ fprintf(stderr, "open() failed\n");
+ exit(1);
+ }
+
+ /* 1. Map two pages */
+ mem = mmap(0, 2 * page_size, PROT_READ|PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, 2 * page_size);
+ wp_addr_range(mem, 2 * page_size);
+
+ /* Populate both pages. */
+ memset(mem, 1, 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2, "%s Both pages written\n", __func__);
+
+ /* 2. Start tracking */
+ wp_addr_range(mem, 2 * page_size);
+
+ ksft_test_result(pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0,
+ PAGE_IS_WRITTEN, 0, 0, PAGE_IS_WRITTEN) == 0,
+ "%s Both pages are not written (dirty)\n", __func__);
+
+ /* 3. Remap the second page */
+ mem2 = mmap(mem + page_size, page_size, PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANON|MAP_FIXED, -1, 0);
+ if (mem2 == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem2, page_size);
+ wp_addr_range(mem2, page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, 2 * page_size, PROT_READ);
+ mprotect(mem, 2 * page_size, PROT_READ|PROT_WRITE);
+
+ /* Modify both pages. */
+ memset(mem, 2, 2 * page_size);
+
+ /* Protect + unprotect. */
+ mprotect(mem, page_size, PROT_NONE);
+ mprotect(mem, page_size, PROT_READ);
+ mprotect(mem, page_size, PROT_READ|PROT_WRITE);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Both pages written after remap and mprotect\n", __func__);
+
+ /* 4. Clear and make the pages written */
+ wp_addr_range(mem, 2 * page_size);
+
+ memset(mem, 'A', 2 * page_size);
+
+ ret = pagemap_ioctl(mem, 2 * page_size, &vec, 1, 0, 0, PAGE_IS_WRITTEN,
+ 0, 0, PAGE_IS_WRITTEN);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ ksft_test_result(ret == 1 && LEN(vec) == 2,
+ "%s Clear and make the pages written\n", __func__);
+
+ wp_free(mem, 2 * page_size);
+ munmap(mem, 2 * page_size);
+ return 0;
+}
+
+/* transact test */
+static const unsigned int nthreads = 6, pages_per_thread = 32, access_per_thread = 8;
+static pthread_barrier_t start_barrier, end_barrier;
+static unsigned int extra_thread_faults;
+static unsigned int iter_count = 1000;
+static volatile int finish;
+
+static ssize_t get_dirty_pages_reset(char *mem, unsigned int count,
+ int reset, int page_size)
+{
+ struct pm_scan_arg arg = {0};
+ struct page_region rgns[256];
+ int i, j, cnt, ret;
+
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.start = (uintptr_t)mem;
+ arg.max_pages = count;
+ arg.end = (uintptr_t)(mem + count * page_size);
+ arg.vec = (uintptr_t)rgns;
+ arg.vec_len = sizeof(rgns) / sizeof(*rgns);
+ if (reset)
+ arg.flags |= PM_SCAN_WP_MATCHING | PM_SCAN_CHECK_WPASYNC;
+ arg.category_mask = PAGE_IS_WRITTEN;
+ arg.return_mask = PAGE_IS_WRITTEN;
+
+ ret = ioctl(pagemap_fd, PAGEMAP_SCAN, &arg);
+ if (ret < 0)
+ ksft_exit_fail_msg("ioctl failed\n");
+
+ cnt = 0;
+ for (i = 0; i < ret; ++i) {
+ if (rgns[i].categories != PAGE_IS_WRITTEN)
+ ksft_exit_fail_msg("wrong flags\n");
+
+ for (j = 0; j < LEN(rgns[i]); ++j)
+ cnt++;
+ }
+
+ return cnt;
+}
+
+void *thread_proc(void *mem)
+{
+ int *m = mem;
+ long curr_faults, faults;
+ struct rusage r;
+ unsigned int i;
+ int ret;
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ curr_faults = r.ru_minflt;
+
+ while (!finish) {
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ for (i = 0; i < access_per_thread; ++i)
+ __atomic_add_fetch(m + i * (0x1000 / sizeof(*m)), 1, __ATOMIC_SEQ_CST);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (getrusage(RUSAGE_THREAD, &r))
+ ksft_exit_fail_msg("getrusage\n");
+
+ faults = r.ru_minflt - curr_faults;
+ if (faults < access_per_thread)
+ ksft_exit_fail_msg("faults < access_per_thread");
+
+ __atomic_add_fetch(&extra_thread_faults, faults - access_per_thread,
+ __ATOMIC_SEQ_CST);
+ curr_faults = r.ru_minflt;
+ }
+
+ return NULL;
+}
+
+static void transact_test(int page_size)
+{
+ unsigned int i, count, extra_pages;
+ pthread_t th;
+ char *mem;
+ int ret, c;
+
+ if (pthread_barrier_init(&start_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ if (pthread_barrier_init(&end_barrier, NULL, nthreads + 1))
+ ksft_exit_fail_msg("pthread_barrier_init\n");
+
+ mem = mmap(NULL, 0x1000 * nthreads * pages_per_thread, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("Error mmap %s.\n", strerror(errno));
+
+ wp_init(mem, 0x1000 * nthreads * pages_per_thread);
+ wp_addr_range(mem, 0x1000 * nthreads * pages_per_thread);
+
+ memset(mem, 0, 0x1000 * nthreads * pages_per_thread);
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count > 0, "%s count %d\n", __func__, count);
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ ksft_test_result(count == 0, "%s count %d\n", __func__, count);
+
+ finish = 0;
+ for (i = 0; i < nthreads; ++i)
+ pthread_create(&th, NULL, thread_proc, mem + 0x1000 * i * pages_per_thread);
+
+ extra_pages = 0;
+ for (i = 0; i < iter_count; ++i) {
+ count = 0;
+
+ ret = pthread_barrier_wait(&start_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ count = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1,
+ page_size);
+
+ ret = pthread_barrier_wait(&end_barrier);
+ if (ret && ret != PTHREAD_BARRIER_SERIAL_THREAD)
+ ksft_exit_fail_msg("pthread_barrier_wait\n");
+
+ if (count > nthreads * access_per_thread)
+ ksft_exit_fail_msg("Too big count %d expected %d, iter %d\n",
+ count, nthreads * access_per_thread, i);
+
+ c = get_dirty_pages_reset(mem, nthreads * pages_per_thread, 1, page_size);
+ count += c;
+
+ if (c > nthreads * access_per_thread) {
+ ksft_test_result_fail(" %s count > nthreads\n", __func__);
+ return;
+ }
+
+ if (count != nthreads * access_per_thread) {
+ /*
+ * The purpose of the test is to make sure that no page updates are lost
+ * when the page updates and read-resetting soft dirty flags are performed
+ * in parallel. However, it is possible that the application will get the
+ * soft dirty flags twice on the two consecutive read-resets. This seems
+ * unavoidable as soft dirty flag is handled in software through page faults
+ * in kernel. While the updating the flags is supposed to be synchronized
+ * between page fault handling and read-reset, it is possible that
+ * read-reset happens after page fault PTE update but before the application
+ * re-executes write instruction. So read-reset gets the flag, clears write
+ * access and application gets page fault again for the same write.
+ */
+ if (count < nthreads * access_per_thread) {
+ ksft_test_result_fail("Lost update, iter %d, %d vs %d.\n", i, count,
+ nthreads * access_per_thread);
+ return;
+ }
+
+ extra_pages += count - nthreads * access_per_thread;
+ }
+ }
+
+ pthread_barrier_wait(&start_barrier);
+ finish = 1;
+ pthread_barrier_wait(&end_barrier);
+
+ ksft_test_result_pass("%s Extra pages %u (%.1lf%%), extra thread faults %d.\n", __func__,
+ extra_pages,
+ 100.0 * extra_pages / (iter_count * nthreads * access_per_thread),
+ extra_thread_faults);
+}
+
+int main(int argc, char *argv[])
+{
+ int mem_size, shmid, buf_size, fd, i, ret;
+ char *mem, *map, *fmem;
+ struct stat sbuf;
+
+ progname = argv[0];
+
+ ksft_print_header();
+
+ if (init_uffd())
+ return ksft_exit_pass();
+
+ ksft_set_plan(115);
+
+ page_size = getpagesize();
+ hpage_size = read_pmd_pagesize();
+
+ pagemap_fd = open(PAGEMAP, O_RDONLY);
+ if (pagemap_fd < 0)
+ return -EINVAL;
+
+ /* 1. Sanity testing */
+ sanity_tests_sd();
+
+ /* 2. Normal page testing */
+ mem_size = 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 3. Large page testing */
+ mem_size = 512 * 10 * page_size;
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
+ if (mem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem\n");
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Large Page testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ munmap(mem, mem_size);
+
+ /* 4. Huge page testing */
+ map = gethugepage(hpage_size);
+ if (map) {
+ wp_init(map, hpage_size);
+ wp_addr_range(map, hpage_size);
+ base_tests("Huge page testing:", map, hpage_size, 0);
+ wp_free(map, hpage_size);
+ free(map);
+ } else {
+ base_tests("Huge page testing:", NULL, 0, 1);
+ }
+
+ /* 5. SHM Hugetlb page testing */
+ mem_size = 2*1024*1024;
+ mem = gethugetlb_mem(mem_size, &shmid);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+
+ /* 6. Hugetlb page testing */
+ mem = gethugetlb_mem(mem_size, NULL);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb mem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ } else {
+ base_tests("Hugetlb mem testing:", NULL, 0, 1);
+ }
+
+ /* 7. File Hugetlb testing */
+ mem_size = 2*1024*1024;
+ fd = memfd_create("uffd-test", MFD_HUGETLB | MFD_NOEXEC_SEAL);
+ mem = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+ if (mem) {
+ wp_init(mem, mem_size);
+ wp_addr_range(mem, mem_size);
+
+ base_tests("Hugetlb shmem testing:", mem, mem_size, 0);
+
+ wp_free(mem, mem_size);
+ shmctl(shmid, IPC_RMID, NULL);
+ } else {
+ base_tests("Hugetlb shmem testing:", NULL, 0, 1);
+ }
+ close(fd);
+
+ /* 8. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = open(__FILE__".tmp0", O_RDWR | O_CREAT, 0777);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ ret = stat(__FILE__".tmp0", &sbuf);
+ if (ret < 0)
+ ksft_exit_fail_msg("error %d %d %s\n", ret, errno, strerror(errno));
+
+ fmem = mmap(NULL, sbuf.st_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, sbuf.st_size);
+ wp_addr_range(fmem, sbuf.st_size);
+
+ base_tests("File memory testing:", fmem, sbuf.st_size, 0);
+
+ wp_free(fmem, sbuf.st_size);
+ munmap(fmem, sbuf.st_size);
+ close(fd);
+
+ /* 9. File memory testing */
+ buf_size = page_size * 10;
+
+ fd = memfd_create(__FILE__".tmp00", MFD_NOEXEC_SEAL);
+ if (fd < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file: %s\n",
+ strerror(errno));
+
+ if (ftruncate(fd, buf_size))
+ ksft_exit_fail_msg("Error ftruncate\n");
+
+ for (i = 0; i < buf_size; i++)
+ if (write(fd, "c", 1) < 0)
+ ksft_exit_fail_msg("Create and read/write to a memory mapped file\n");
+
+ fmem = mmap(NULL, buf_size, PROT_READ | PROT_WRITE, MAP_PRIVATE, fd, 0);
+ if (fmem == MAP_FAILED)
+ ksft_exit_fail_msg("error nomem %d %s\n", errno, strerror(errno));
+
+ wp_init(fmem, buf_size);
+ wp_addr_range(fmem, buf_size);
+
+ base_tests("File anonymous memory testing:", fmem, buf_size, 0);
+
+ wp_free(fmem, buf_size);
+ munmap(fmem, buf_size);
+ close(fd);
+
+ /* 10. Huge page tests */
+ hpage_unit_tests();
+
+ /* 11. Iterative test */
+ test_simple();
+
+ /* 12. Mprotect test */
+ mprotect_tests();
+
+ /* 13. Transact test */
+ transact_test(page_size);
+
+ /* 14. Sanity testing */
+ sanity_tests();
+
+ /*15. Unmapped address test */
+ unmapped_region_tests();
+
+ /* 16. Userfaultfd tests */
+ userfaultfd_tests();
+
+ close(pagemap_fd);
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/vm/pkey-helpers.h b/tools/testing/selftests/mm/pkey-helpers.h
index 622a85848f61..1af3156a9db8 100644
--- a/tools/testing/selftests/vm/pkey-helpers.h
+++ b/tools/testing/selftests/mm/pkey-helpers.h
@@ -13,6 +13,8 @@
#include <ucontext.h>
#include <sys/mman.h>
+#include "../kselftest.h"
+
/* Define some kernel-like types */
#define u8 __u8
#define u16 __u16
@@ -32,7 +34,7 @@ extern int test_nr;
extern int iteration_nr;
#ifdef __GNUC__
-__attribute__((format(printf, 1, 2)))
+__printf(1, 2)
#endif
static inline void sigsafe_printf(const char *format, ...)
{
@@ -175,7 +177,6 @@ static inline void __pkey_write_allow(int pkey, int do_allow_write)
dprintf4("pkey_reg now: %016llx\n", read_pkey_reg());
}
-#define ARRAY_SIZE(x) (sizeof(x) / sizeof(*(x)))
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_DOWN(x, align_to) ((x) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) \
diff --git a/tools/testing/selftests/vm/pkey-powerpc.h b/tools/testing/selftests/mm/pkey-powerpc.h
index 1ebb586b2fbc..ae5df26104e5 100644
--- a/tools/testing/selftests/vm/pkey-powerpc.h
+++ b/tools/testing/selftests/mm/pkey-powerpc.h
@@ -3,9 +3,6 @@
#ifndef _PKEYS_POWERPC_H
#define _PKEYS_POWERPC_H
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 386
-#endif
#ifndef SYS_pkey_alloc
# define SYS_pkey_alloc 384
# define SYS_pkey_free 385
diff --git a/tools/testing/selftests/vm/pkey-x86.h b/tools/testing/selftests/mm/pkey-x86.h
index 3be20f5d5275..814758e109c0 100644
--- a/tools/testing/selftests/vm/pkey-x86.h
+++ b/tools/testing/selftests/mm/pkey-x86.h
@@ -5,29 +5,11 @@
#ifdef __i386__
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 380
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 381
-# define SYS_pkey_free 382
-#endif
-
#define REG_IP_IDX REG_EIP
#define si_pkey_offset 0x14
#else
-#ifndef SYS_mprotect_key
-# define SYS_mprotect_key 329
-#endif
-
-#ifndef SYS_pkey_alloc
-# define SYS_pkey_alloc 330
-# define SYS_pkey_free 331
-#endif
-
#define REG_IP_IDX REG_RIP
#define si_pkey_offset 0x20
@@ -80,19 +62,6 @@ static inline void __write_pkey_reg(u64 pkey_reg)
assert(pkey_reg == __read_pkey_reg());
}
-static inline void __cpuid(unsigned int *eax, unsigned int *ebx,
- unsigned int *ecx, unsigned int *edx)
-{
- /* ecx is often an input as well as an output. */
- asm volatile(
- "cpuid;"
- : "=a" (*eax),
- "=b" (*ebx),
- "=c" (*ecx),
- "=d" (*edx)
- : "0" (*eax), "2" (*ecx));
-}
-
/* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx) */
#define X86_FEATURE_PKU (1<<3) /* Protection Keys for Userspace */
#define X86_FEATURE_OSPKE (1<<4) /* OS Protection Keys Enable */
@@ -104,9 +73,7 @@ static inline int cpu_has_pkeys(void)
unsigned int ecx;
unsigned int edx;
- eax = 0x7;
- ecx = 0x0;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(0x7, 0x0, eax, ebx, ecx, edx);
if (!(ecx & X86_FEATURE_PKU)) {
dprintf2("cpu does not have PKU\n");
@@ -119,6 +86,18 @@ static inline int cpu_has_pkeys(void)
return 1;
}
+static inline int cpu_max_xsave_size(void)
+{
+ unsigned long XSTATE_CPUID = 0xd;
+ unsigned int eax;
+ unsigned int ebx;
+ unsigned int ecx;
+ unsigned int edx;
+
+ __cpuid_count(XSTATE_CPUID, 0, eax, ebx, ecx, edx);
+ return ecx;
+}
+
static inline u32 pkey_bit_position(int pkey)
{
return pkey * PKEY_BITS_PER_PKEY;
@@ -126,6 +105,7 @@ static inline u32 pkey_bit_position(int pkey)
#define XSTATE_PKEY_BIT (9)
#define XSTATE_PKEY 0x200
+#define XSTATE_BV_OFFSET 512
int pkey_reg_xstate_offset(void)
{
@@ -134,16 +114,14 @@ int pkey_reg_xstate_offset(void)
unsigned int ecx;
unsigned int edx;
int xstate_offset;
- int xstate_size;
+ int xstate_size = 0;
unsigned long XSTATE_CPUID = 0xd;
int leaf;
/* assume that XSTATE_PKEY is set in XCR0 */
leaf = XSTATE_PKEY_BIT;
{
- eax = XSTATE_CPUID;
- ecx = leaf;
- __cpuid(&eax, &ebx, &ecx, &edx);
+ __cpuid_count(XSTATE_CPUID, leaf, eax, ebx, ecx, edx);
if (leaf == XSTATE_PKEY_BIT) {
xstate_offset = ebx;
diff --git a/tools/testing/selftests/vm/protection_keys.c b/tools/testing/selftests/mm/protection_keys.c
index fc19addcb5c8..f822ae31af22 100644
--- a/tools/testing/selftests/vm/protection_keys.c
+++ b/tools/testing/selftests/mm/protection_keys.c
@@ -1,6 +1,6 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Tests Memory Protection Keys (see Documentation/vm/protection-keys.txt)
+ * Tests Memory Protection Keys (see Documentation/core-api/protection-keys.rst)
*
* There are examples in here of:
* * how to set protection keys on memory
@@ -18,12 +18,13 @@
* do a plain mprotect() to a mprotect_pkey() area and make sure the pkey sticks
*
* Compile like this:
- * gcc -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
- * gcc -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -mxsave -o protection_keys -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
+ * gcc -mxsave -m32 -o protection_keys_32 -O2 -g -std=gnu99 -pthread -Wall protection_keys.c -lrt -ldl -lm
*/
#define _GNU_SOURCE
#define __SANE_USERSPACE_TYPES__
#include <errno.h>
+#include <linux/elf.h>
#include <linux/futex.h>
#include <time.h>
#include <sys/time.h>
@@ -53,6 +54,7 @@ int test_nr;
u64 shadow_pkey_reg;
int dprint_in_signal;
char dprint_in_signal_buffer[DPRINT_IN_SIGNAL_BUF_SIZE];
+char buf[256];
void cat_into_file(char *str, char *file)
{
@@ -97,7 +99,7 @@ int tracing_root_ok(void)
void tracing_on(void)
{
#if CONTROL_TRACING > 0
-#define TRACEDIR "/sys/kernel/debug/tracing"
+#define TRACEDIR "/sys/kernel/tracing"
char pidstr[32];
if (!tracing_root_ok())
@@ -123,7 +125,7 @@ void tracing_off(void)
#if CONTROL_TRACING > 0
if (!tracing_root_ok())
return;
- cat_into_file("0", "/sys/kernel/debug/tracing/tracing_on");
+ cat_into_file("0", "/sys/kernel/tracing/tracing_on");
#endif
}
@@ -293,15 +295,6 @@ void pkey_access_deny(int pkey)
pkey_disable_set(pkey, PKEY_DISABLE_ACCESS);
}
-/* Failed address bound checks: */
-#ifndef SEGV_BNDERR
-# define SEGV_BNDERR 3
-#endif
-
-#ifndef SEGV_PKUERR
-# define SEGV_PKUERR 4
-#endif
-
static char *si_code_str(int si_code)
{
if (si_code == SEGV_MAPERR)
@@ -475,7 +468,7 @@ int sys_mprotect_pkey(void *ptr, size_t size, unsigned long orig_prot,
ptr, size, orig_prot, pkey);
errno = 0;
- sret = syscall(SYS_mprotect_key, ptr, size, orig_prot, pkey);
+ sret = syscall(__NR_pkey_mprotect, ptr, size, orig_prot, pkey);
if (errno) {
dprintf2("SYS_mprotect_key sret: %d\n", sret);
dprintf2("SYS_mprotect_key prot: 0x%lx\n", orig_prot);
@@ -510,7 +503,7 @@ int alloc_pkey(void)
" shadow: 0x%016llx\n",
__func__, __LINE__, ret, __read_pkey_reg(),
shadow_pkey_reg);
- if (ret) {
+ if (ret > 0) {
/* clear both the bits: */
shadow_pkey_reg = set_pkey_bits(shadow_pkey_reg, ret,
~PKEY_MASK);
@@ -561,7 +554,6 @@ int alloc_random_pkey(void)
int nr_alloced = 0;
int random_index;
memset(alloced_pkeys, 0, sizeof(alloced_pkeys));
- srand((unsigned int)time(NULL));
/* allocate every possible key and make a note of which ones we got */
max_nr_pkey_allocs = NR_PKEYS;
@@ -1278,6 +1270,78 @@ void test_pkey_alloc_exhaust(int *ptr, u16 pkey)
}
}
+void arch_force_pkey_reg_init(void)
+{
+#if defined(__i386__) || defined(__x86_64__) /* arch */
+ u64 *buf;
+
+ /*
+ * All keys should be allocated and set to allow reads and
+ * writes, so the register should be all 0. If not, just
+ * skip the test.
+ */
+ if (read_pkey_reg())
+ return;
+
+ /*
+ * Just allocate an absurd about of memory rather than
+ * doing the XSAVE size enumeration dance.
+ */
+ buf = mmap(NULL, 1*MB, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+
+ /* These __builtins require compiling with -mxsave */
+
+ /* XSAVE to build a valid buffer: */
+ __builtin_ia32_xsave(buf, XSTATE_PKEY);
+ /* Clear XSTATE_BV[PKRU]: */
+ buf[XSTATE_BV_OFFSET/sizeof(u64)] &= ~XSTATE_PKEY;
+ /* XRSTOR will likely get PKRU back to the init state: */
+ __builtin_ia32_xrstor(buf, XSTATE_PKEY);
+
+ munmap(buf, 1*MB);
+#endif
+}
+
+
+/*
+ * This is mostly useless on ppc for now. But it will not
+ * hurt anything and should give some better coverage as
+ * a long-running test that continually checks the pkey
+ * register.
+ */
+void test_pkey_init_state(int *ptr, u16 pkey)
+{
+ int err;
+ int allocated_pkeys[NR_PKEYS] = {0};
+ int nr_allocated_pkeys = 0;
+ int i;
+
+ for (i = 0; i < NR_PKEYS; i++) {
+ int new_pkey = alloc_pkey();
+
+ if (new_pkey < 0)
+ continue;
+ allocated_pkeys[nr_allocated_pkeys++] = new_pkey;
+ }
+
+ dprintf3("%s()::%d\n", __func__, __LINE__);
+
+ arch_force_pkey_reg_init();
+
+ /*
+ * Loop for a bit, hoping to get exercise the kernel
+ * context switch code.
+ */
+ for (i = 0; i < 1000000; i++)
+ read_pkey_reg();
+
+ for (i = 0; i < nr_allocated_pkeys; i++) {
+ err = sys_pkey_free(allocated_pkeys[i]);
+ pkey_assert(!err);
+ read_pkey_reg(); /* for shadow checking */
+ }
+}
+
/*
* pkey 0 is special. It is allocated by default, so you do not
* have to call pkey_alloc() to use it first. Make sure that it
@@ -1449,6 +1513,13 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
ret = mprotect(p1, PAGE_SIZE, PROT_EXEC);
pkey_assert(!ret);
+ /*
+ * Reset the shadow, assuming that the above mprotect()
+ * correctly changed PKRU, but to an unknown value since
+ * the actual allocated pkey is unknown.
+ */
+ shadow_pkey_reg = __read_pkey_reg();
+
dprintf2("pkey_reg: %016llx\n", read_pkey_reg());
/* Make sure this is an *instruction* fault */
@@ -1472,6 +1543,129 @@ void test_implicit_mprotect_exec_only_memory(int *ptr, u16 pkey)
do_not_expect_pkey_fault("plain read on recently PROT_EXEC area");
}
+#if defined(__i386__) || defined(__x86_64__)
+void test_ptrace_modifies_pkru(int *ptr, u16 pkey)
+{
+ u32 new_pkru;
+ pid_t child;
+ int status, ret;
+ int pkey_offset = pkey_reg_xstate_offset();
+ size_t xsave_size = cpu_max_xsave_size();
+ void *xsave;
+ u32 *pkey_register;
+ u64 *xstate_bv;
+ struct iovec iov;
+
+ new_pkru = ~read_pkey_reg();
+ /* Don't make PROT_EXEC mappings inaccessible */
+ new_pkru &= ~3;
+
+ child = fork();
+ pkey_assert(child >= 0);
+ dprintf3("[%d] fork() ret: %d\n", getpid(), child);
+ if (!child) {
+ ptrace(PTRACE_TRACEME, 0, 0, 0);
+ /* Stop and allow the tracer to modify PKRU directly */
+ raise(SIGSTOP);
+
+ /*
+ * need __read_pkey_reg() version so we do not do shadow_pkey_reg
+ * checking
+ */
+ if (__read_pkey_reg() != new_pkru)
+ exit(1);
+
+ /* Stop and allow the tracer to clear XSTATE_BV for PKRU */
+ raise(SIGSTOP);
+
+ if (__read_pkey_reg() != 0)
+ exit(1);
+
+ /* Stop and allow the tracer to examine PKRU */
+ raise(SIGSTOP);
+
+ exit(0);
+ }
+
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ xsave = (void *)malloc(xsave_size);
+ pkey_assert(xsave > 0);
+
+ /* Modify the PKRU register directly */
+ iov.iov_base = xsave;
+ iov.iov_len = xsave_size;
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ pkey_register = (u32 *)(xsave + pkey_offset);
+ pkey_assert(*pkey_register == read_pkey_reg());
+
+ *pkey_register = new_pkru;
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == new_pkru);
+
+ /* Execute the tracee */
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value change */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == new_pkru);
+
+ /* Clear the PKRU bit from XSTATE_BV */
+ xstate_bv = (u64 *)(xsave + 512);
+ *xstate_bv &= ~(1 << 9);
+
+ ret = ptrace(PTRACE_SETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+
+ /* Test that the modification is visible in ptrace before any execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == 0);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+
+ /* Test that the tracee saw the PKRU value go to 0 */
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFSTOPPED(status) && WSTOPSIG(status) == SIGSTOP);
+
+ /* Test that the modification is visible in ptrace after execution */
+ memset(xsave, 0xCC, xsave_size);
+ ret = ptrace(PTRACE_GETREGSET, child, (void *)NT_X86_XSTATE, &iov);
+ pkey_assert(ret == 0);
+ pkey_assert(*pkey_register == 0);
+
+ ret = ptrace(PTRACE_CONT, child, 0, 0);
+ pkey_assert(ret == 0);
+ pkey_assert(child == waitpid(child, &status, 0));
+ dprintf3("[%d] waitpid(%d) status: %x\n", getpid(), child, status);
+ pkey_assert(WIFEXITED(status));
+ pkey_assert(WEXITSTATUS(status) == 0);
+ free(xsave);
+}
+#endif
+
void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
{
int size = PAGE_SIZE;
@@ -1482,7 +1676,7 @@ void test_mprotect_pkey_on_unsupported_cpu(int *ptr, u16 pkey)
return;
}
- sret = syscall(SYS_mprotect_key, ptr, size, PROT_READ, pkey);
+ sret = syscall(__NR_pkey_mprotect, ptr, size, PROT_READ, pkey);
pkey_assert(sret < 0);
}
@@ -1502,10 +1696,14 @@ void (*pkey_tests[])(int *ptr, u16 pkey) = {
test_implicit_mprotect_exec_only_memory,
test_mprotect_with_pkey_0,
test_ptrace_of_child,
+ test_pkey_init_state,
test_pkey_syscalls_on_non_allocated_pkey,
test_pkey_syscalls_bad_args,
test_pkey_alloc_exhaust,
test_pkey_alloc_free_attach_pkey0,
+#if defined(__i386__) || defined(__x86_64__)
+ test_ptrace_modifies_pkru,
+#endif
};
void run_tests_once(void)
@@ -1547,11 +1745,46 @@ void pkey_setup_shadow(void)
shadow_pkey_reg = __read_pkey_reg();
}
+void restore_settings_atexit(void)
+{
+ cat_into_file(buf, "/proc/sys/vm/nr_hugepages");
+}
+
+void save_settings(void)
+{
+ int fd;
+ int err;
+
+ if (geteuid())
+ return;
+
+ fd = open("/proc/sys/vm/nr_hugepages", O_RDONLY);
+ if (fd < 0) {
+ fprintf(stderr, "error opening\n");
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ /* -1 to guarantee leaving the trailing \0 */
+ err = read(fd, buf, sizeof(buf)-1);
+ if (err < 0) {
+ fprintf(stderr, "error reading\n");
+ perror("error: ");
+ exit(__LINE__);
+ }
+
+ atexit(restore_settings_atexit);
+ close(fd);
+}
+
int main(void)
{
int nr_iterations = 22;
int pkeys_supported = is_pkeys_supported();
+ srand((unsigned int)time(NULL));
+
+ save_settings();
setup_handlers();
printf("has pkeys: %d\n", pkeys_supported);
diff --git a/tools/testing/selftests/mm/run_vmtests.sh b/tools/testing/selftests/mm/run_vmtests.sh
new file mode 100755
index 000000000000..c2c542fe7b17
--- /dev/null
+++ b/tools/testing/selftests/mm/run_vmtests.sh
@@ -0,0 +1,445 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Please run as root
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+count_total=0
+count_pass=0
+count_fail=0
+count_skip=0
+exitcode=0
+
+usage() {
+ cat <<EOF
+usage: ${BASH_SOURCE[0]:-$0} [ options ]
+
+ -a: run all tests, including extra ones (other than destructive ones)
+ -t: specify specific categories to tests to run
+ -h: display this message
+ -n: disable TAP output
+ -d: run destructive tests
+
+The default behavior is to run required tests only. If -a is specified,
+will run all tests.
+
+Alternatively, specific groups tests can be run by passing a string
+to the -t argument containing one or more of the following categories
+separated by spaces:
+- mmap
+ tests for mmap(2)
+- gup_test
+ tests for gup
+- userfaultfd
+ tests for userfaultfd(2)
+- compaction
+ a test for the patch "Allow compaction of unevictable pages"
+- mlock
+ tests for mlock(2)
+- mremap
+ tests for mremap(2)
+- hugevm
+ tests for very large virtual address space
+- vmalloc
+ vmalloc smoke tests
+- hmm
+ hmm smoke tests
+- madv_populate
+ test memadvise(2) MADV_POPULATE_{READ,WRITE} options
+- memfd_secret
+ test memfd_secret(2)
+- process_mrelease
+ test process_mrelease(2)
+- ksm
+ ksm tests that do not require >=2 NUMA nodes
+- ksm_numa
+ ksm tests that require >=2 NUMA nodes
+- pkey
+ memory protection key tests
+- soft_dirty
+ test soft dirty page bit semantics
+- pagemap
+ test pagemap_scan IOCTL
+- cow
+ test copy-on-write semantics
+- thp
+ test transparent huge pages
+- hugetlb
+ test hugetlbfs huge pages
+- migration
+ invoke move_pages(2) to exercise the migration entry code
+ paths in the kernel
+- mkdirty
+ test handling of code that might set PTE/PMD dirty in
+ read-only VMAs
+- mdwe
+ test prctl(PR_SET_MDWE, ...)
+
+example: ./run_vmtests.sh -t "hmm mmap ksm"
+EOF
+ exit 0
+}
+
+RUN_ALL=false
+RUN_DESTRUCTIVE=false
+TAP_PREFIX="# "
+
+while getopts "aht:n" OPT; do
+ case ${OPT} in
+ "a") RUN_ALL=true ;;
+ "h") usage ;;
+ "t") VM_SELFTEST_ITEMS=${OPTARG} ;;
+ "n") TAP_PREFIX= ;;
+ "d") RUN_DESTRUCTIVE=true ;;
+ esac
+done
+shift $((OPTIND -1))
+
+# default behavior: run all tests
+VM_SELFTEST_ITEMS=${VM_SELFTEST_ITEMS:-default}
+
+test_selected() {
+ if [ "$VM_SELFTEST_ITEMS" == "default" ]; then
+ # If no VM_SELFTEST_ITEMS are specified, run all tests
+ return 0
+ fi
+ # If test selected argument is one of the test items
+ if [[ " ${VM_SELFTEST_ITEMS[*]} " =~ " ${1} " ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
+run_gup_matrix() {
+ # -t: thp=on, -T: thp=off, -H: hugetlb=on
+ local hugetlb_mb=$(( needmem_KB / 1024 ))
+
+ for huge in -t -T "-H -m $hugetlb_mb"; do
+ # -u: gup-fast, -U: gup-basic, -a: pin-fast, -b: pin-basic, -L: pin-longterm
+ for test_cmd in -u -U -a -b -L; do
+ # -w: write=1, -W: write=0
+ for write in -w -W; do
+ # -S: shared
+ for share in -S " "; do
+ # -n: How many pages to fetch together? 512 is special
+ # because it's default thp size (or 2M on x86), 123 to
+ # just test partial gup when hit a huge in whatever form
+ for num in "-n 1" "-n 512" "-n 123"; do
+ CATEGORY="gup_test" run_test ./gup_test \
+ $huge $test_cmd $write $share $num
+ done
+ done
+ done
+ done
+ done
+}
+
+# get huge pagesize and freepages from /proc/meminfo
+while read -r name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs="$size"
+ fi
+ if [ "$name" = "Hugepagesize:" ]; then
+ hpgsize_KB="$size"
+ fi
+done < /proc/meminfo
+
+# Simple hugetlbfs tests have a hardcoded minimum requirement of
+# huge pages totaling 256MB (262144KB) in size. The userfaultfd
+# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
+# both of these requirements into account and attempt to increase
+# number of huge pages available.
+nr_cpus=$(nproc)
+hpgsize_MB=$((hpgsize_KB / 1024))
+half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
+needmem_KB=$((half_ufd_size_MB * 2 * 1024))
+
+# set proper nr_hugepages
+if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
+ nr_hugepgs=$(cat /proc/sys/vm/nr_hugepages)
+ needpgs=$((needmem_KB / hpgsize_KB))
+ tries=2
+ while [ "$tries" -gt 0 ] && [ "$freepgs" -lt "$needpgs" ]; do
+ lackpgs=$((needpgs - freepgs))
+ echo 3 > /proc/sys/vm/drop_caches
+ if ! echo $((lackpgs + nr_hugepgs)) > /proc/sys/vm/nr_hugepages; then
+ echo "Please run this test as root"
+ exit $ksft_skip
+ fi
+ while read -r name size unit; do
+ if [ "$name" = "HugePages_Free:" ]; then
+ freepgs=$size
+ fi
+ done < /proc/meminfo
+ tries=$((tries - 1))
+ done
+ if [ "$freepgs" -lt "$needpgs" ]; then
+ printf "Not enough huge pages available (%d < %d)\n" \
+ "$freepgs" "$needpgs"
+ fi
+else
+ echo "no hugetlbfs support in kernel?"
+ exit 1
+fi
+
+# filter 64bit architectures
+ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sparc64 x86_64"
+if [ -z "$ARCH" ]; then
+ ARCH=$(uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/')
+fi
+VADDR64=0
+echo "$ARCH64STR" | grep "$ARCH" &>/dev/null && VADDR64=1
+
+tap_prefix() {
+ sed -e "s/^/${TAP_PREFIX}/"
+}
+
+tap_output() {
+ if [[ ! -z "$TAP_PREFIX" ]]; then
+ read str
+ echo $str
+ fi
+}
+
+pretty_name() {
+ echo "$*" | sed -e 's/^\(bash \)\?\.\///'
+}
+
+# Usage: run_test [test binary] [arbitrary test arguments...]
+run_test() {
+ if test_selected ${CATEGORY}; then
+ # On memory constrainted systems some tests can fail to allocate hugepages.
+ # perform some cleanup before the test for a higher success rate.
+ if [ ${CATEGORY} == "thp" ] | [ ${CATEGORY} == "hugetlb" ]; then
+ echo 3 > /proc/sys/vm/drop_caches
+ sleep 2
+ echo 1 > /proc/sys/vm/compact_memory
+ sleep 2
+ fi
+
+ local test=$(pretty_name "$*")
+ local title="running $*"
+ local sep=$(echo -n "$title" | tr "[:graph:][:space:]" -)
+ printf "%s\n%s\n%s\n" "$sep" "$title" "$sep" | tap_prefix
+
+ ("$@" 2>&1) | tap_prefix
+ local ret=${PIPESTATUS[0]}
+ count_total=$(( count_total + 1 ))
+ if [ $ret -eq 0 ]; then
+ count_pass=$(( count_pass + 1 ))
+ echo "[PASS]" | tap_prefix
+ echo "ok ${count_total} ${test}" | tap_output
+ elif [ $ret -eq $ksft_skip ]; then
+ count_skip=$(( count_skip + 1 ))
+ echo "[SKIP]" | tap_prefix
+ echo "ok ${count_total} ${test} # SKIP" | tap_output
+ exitcode=$ksft_skip
+ else
+ count_fail=$(( count_fail + 1 ))
+ echo "[FAIL]" | tap_prefix
+ echo "not ok ${count_total} ${test} # exit=$ret" | tap_output
+ exitcode=1
+ fi
+ fi # test_selected
+}
+
+echo "TAP version 13" | tap_output
+
+CATEGORY="hugetlb" run_test ./hugepage-mmap
+
+shmmax=$(cat /proc/sys/kernel/shmmax)
+shmall=$(cat /proc/sys/kernel/shmall)
+echo 268435456 > /proc/sys/kernel/shmmax
+echo 4194304 > /proc/sys/kernel/shmall
+CATEGORY="hugetlb" run_test ./hugepage-shm
+echo "$shmmax" > /proc/sys/kernel/shmmax
+echo "$shmall" > /proc/sys/kernel/shmall
+
+CATEGORY="hugetlb" run_test ./map_hugetlb
+CATEGORY="hugetlb" run_test ./hugepage-mremap
+CATEGORY="hugetlb" run_test ./hugepage-vmemmap
+CATEGORY="hugetlb" run_test ./hugetlb-madvise
+
+nr_hugepages_tmp=$(cat /proc/sys/vm/nr_hugepages)
+# For this test, we need one and just one huge page
+echo 1 > /proc/sys/vm/nr_hugepages
+CATEGORY="hugetlb" run_test ./hugetlb_fault_after_madv
+CATEGORY="hugetlb" run_test ./hugetlb_madv_vs_map
+# Restore the previous number of huge pages, since further tests rely on it
+echo "$nr_hugepages_tmp" > /proc/sys/vm/nr_hugepages
+
+if test_selected "hugetlb"; then
+ echo "NOTE: These hugetlb tests provide minimal coverage. Use" | tap_prefix
+ echo " https://github.com/libhugetlbfs/libhugetlbfs.git for" | tap_prefix
+ echo " hugetlb regression testing." | tap_prefix
+fi
+
+CATEGORY="mmap" run_test ./map_fixed_noreplace
+
+if $RUN_ALL; then
+ run_gup_matrix
+else
+ # get_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -u
+ # pin_user_pages_fast() benchmark
+ CATEGORY="gup_test" run_test ./gup_test -a
+fi
+# Dump pages 0, 19, and 4096, using pin_user_pages:
+CATEGORY="gup_test" run_test ./gup_test -ct -F 0x1 0 19 0x1000
+CATEGORY="gup_test" run_test ./gup_longterm
+
+CATEGORY="userfaultfd" run_test ./uffd-unit-tests
+uffd_stress_bin=./uffd-stress
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} anon 20 16
+# Hugetlb tests require source and destination huge pages. Pass in half
+# the size ($half_ufd_size_MB), which is used for *each*.
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb "$half_ufd_size_MB" 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} hugetlb-private "$half_ufd_size_MB" 32
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem 20 16
+CATEGORY="userfaultfd" run_test ${uffd_stress_bin} shmem-private 20 16
+
+#cleanup
+echo "$nr_hugepgs" > /proc/sys/vm/nr_hugepages
+
+CATEGORY="compaction" run_test ./compaction_test
+
+if command -v sudo &> /dev/null;
+then
+ CATEGORY="mlock" run_test sudo -u nobody ./on-fault-limit
+else
+ echo "# SKIP ./on-fault-limit"
+fi
+
+CATEGORY="mmap" run_test ./map_populate
+
+CATEGORY="mlock" run_test ./mlock-random-test
+
+CATEGORY="mlock" run_test ./mlock2-tests
+
+CATEGORY="process_mrelease" run_test ./mrelease_test
+
+CATEGORY="mremap" run_test ./mremap_test
+
+CATEGORY="hugetlb" run_test ./thuge-gen
+CATEGORY="hugetlb" run_test ./charge_reserved_hugetlb.sh -cgroup-v2
+CATEGORY="hugetlb" run_test ./hugetlb_reparenting_test.sh -cgroup-v2
+if $RUN_DESTRUCTIVE; then
+CATEGORY="hugetlb" run_test ./hugetlb-read-hwpoison
+fi
+
+if [ $VADDR64 -ne 0 ]; then
+
+ # set overcommit_policy as OVERCOMMIT_ALWAYS so that kernel
+ # allows high virtual address allocation requests independent
+ # of platform's physical memory.
+
+ prev_policy=$(cat /proc/sys/vm/overcommit_memory)
+ echo 1 > /proc/sys/vm/overcommit_memory
+ CATEGORY="hugevm" run_test ./virtual_address_range
+ echo $prev_policy > /proc/sys/vm/overcommit_memory
+
+ # va high address boundary switch test
+ ARCH_ARM64="arm64"
+ prev_nr_hugepages=$(cat /proc/sys/vm/nr_hugepages)
+ if [ "$ARCH" == "$ARCH_ARM64" ]; then
+ echo 6 > /proc/sys/vm/nr_hugepages
+ fi
+ CATEGORY="hugevm" run_test bash ./va_high_addr_switch.sh
+ if [ "$ARCH" == "$ARCH_ARM64" ]; then
+ echo $prev_nr_hugepages > /proc/sys/vm/nr_hugepages
+ fi
+fi # VADDR64
+
+# vmalloc stability smoke test
+CATEGORY="vmalloc" run_test bash ./test_vmalloc.sh smoke
+
+CATEGORY="mremap" run_test ./mremap_dontunmap
+
+CATEGORY="hmm" run_test bash ./test_hmm.sh smoke
+
+# MADV_POPULATE_READ and MADV_POPULATE_WRITE tests
+CATEGORY="madv_populate" run_test ./madv_populate
+
+(echo 0 | sudo tee /proc/sys/kernel/yama/ptrace_scope 2>&1) | tap_prefix
+CATEGORY="memfd_secret" run_test ./memfd_secret
+
+# KSM KSM_MERGE_TIME_HUGE_PAGES test with size of 100
+CATEGORY="ksm" run_test ./ksm_tests -H -s 100
+# KSM KSM_MERGE_TIME test with size of 100
+CATEGORY="ksm" run_test ./ksm_tests -P -s 100
+# KSM MADV_MERGEABLE test with 10 identical pages
+CATEGORY="ksm" run_test ./ksm_tests -M -p 10
+# KSM unmerge test
+CATEGORY="ksm" run_test ./ksm_tests -U
+# KSM test with 10 zero pages and use_zero_pages = 0
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 0
+# KSM test with 10 zero pages and use_zero_pages = 1
+CATEGORY="ksm" run_test ./ksm_tests -Z -p 10 -z 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 1
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 1
+# KSM test with 2 NUMA nodes and merge_across_nodes = 0
+CATEGORY="ksm_numa" run_test ./ksm_tests -N -m 0
+
+CATEGORY="ksm" run_test ./ksm_functional_tests
+
+# protection_keys tests
+if [ -x ./protection_keys_32 ]
+then
+ CATEGORY="pkey" run_test ./protection_keys_32
+fi
+
+if [ -x ./protection_keys_64 ]
+then
+ CATEGORY="pkey" run_test ./protection_keys_64
+fi
+
+if [ -x ./soft-dirty ]
+then
+ CATEGORY="soft_dirty" run_test ./soft-dirty
+fi
+
+CATEGORY="pagemap" run_test ./pagemap_ioctl
+
+# COW tests
+CATEGORY="cow" run_test ./cow
+
+CATEGORY="thp" run_test ./khugepaged
+
+CATEGORY="thp" run_test ./khugepaged -s 2
+
+CATEGORY="thp" run_test ./transhuge-stress -d 20
+
+# Try to create XFS if not provided
+if [ -z "${SPLIT_HUGE_PAGE_TEST_XFS_PATH}" ]; then
+ if test_selected "thp"; then
+ if grep xfs /proc/filesystems &>/dev/null; then
+ XFS_IMG=$(mktemp /tmp/xfs_img_XXXXXX)
+ SPLIT_HUGE_PAGE_TEST_XFS_PATH=$(mktemp -d /tmp/xfs_dir_XXXXXX)
+ truncate -s 314572800 ${XFS_IMG}
+ mkfs.xfs -q ${XFS_IMG}
+ mount -o loop ${XFS_IMG} ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ MOUNTED_XFS=1
+ fi
+ fi
+fi
+
+CATEGORY="thp" run_test ./split_huge_page_test ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+
+if [ -n "${MOUNTED_XFS}" ]; then
+ umount ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ rmdir ${SPLIT_HUGE_PAGE_TEST_XFS_PATH}
+ rm -f ${XFS_IMG}
+fi
+
+CATEGORY="migration" run_test ./migration
+
+CATEGORY="mkdirty" run_test ./mkdirty
+
+CATEGORY="mdwe" run_test ./mdwe_test
+
+echo "SUMMARY: PASS=${count_pass} SKIP=${count_skip} FAIL=${count_fail}" | tap_prefix
+echo "1..${count_total}" | tap_output
+
+exit $exitcode
diff --git a/tools/testing/selftests/mm/settings b/tools/testing/selftests/mm/settings
new file mode 100644
index 000000000000..a953c96aa16e
--- /dev/null
+++ b/tools/testing/selftests/mm/settings
@@ -0,0 +1 @@
+timeout=180
diff --git a/tools/testing/selftests/mm/soft-dirty.c b/tools/testing/selftests/mm/soft-dirty.c
new file mode 100644
index 000000000000..cc5f144430d4
--- /dev/null
+++ b/tools/testing/selftests/mm/soft-dirty.c
@@ -0,0 +1,213 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <sys/mman.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PAGEMAP_FILE_PATH "/proc/self/pagemap"
+#define TEST_ITERATIONS 10000
+
+static void test_simple(int pagemap_fd, int pagesize)
+{
+ int i;
+ char *map;
+
+ map = aligned_alloc(pagesize, pagesize);
+ if (!map)
+ ksft_exit_fail_msg("mmap failed\n");
+
+ clear_softdirty();
+
+ for (i = 0 ; i < TEST_ITERATIONS; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ }
+ free(map);
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s\n", __func__);
+}
+
+static void test_vma_reuse(int pagemap_fd, int pagesize)
+{
+ char *map, *map2;
+
+ map = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // The kernel always marks new regions as soft dirty
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s dirty bit of allocated page\n", __func__);
+
+ clear_softdirty();
+ munmap(map, pagesize);
+
+ map2 = mmap(NULL, pagesize, (PROT_READ | PROT_WRITE), (MAP_PRIVATE | MAP_ANON), -1, 0);
+ if (map2 == MAP_FAILED)
+ ksft_exit_fail_msg("mmap failed");
+
+ // Dirty bit is set for new regions even if they are reused
+ if (map == map2)
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map2) == 1,
+ "Test %s dirty bit of reused address page\n", __func__);
+ else
+ ksft_test_result_skip("Test %s dirty bit of reused address page\n", __func__);
+
+ munmap(map2, pagesize);
+}
+
+static void test_hugepage(int pagemap_fd, int pagesize)
+{
+ char *map;
+ int i, ret;
+ size_t hpage_len = read_pmd_pagesize();
+
+ if (!hpage_len)
+ ksft_exit_fail_msg("Reading PMD pagesize failed");
+
+ map = memalign(hpage_len, hpage_len);
+ if (!map)
+ ksft_exit_fail_msg("memalign failed\n");
+
+ ret = madvise(map, hpage_len, MADV_HUGEPAGE);
+ if (ret)
+ ksft_exit_fail_msg("madvise failed %d\n", ret);
+
+ for (i = 0; i < hpage_len; i++)
+ map[i] = (char)i;
+
+ if (check_huge_anon(map, 1, hpage_len)) {
+ ksft_test_result_pass("Test %s huge page allocation\n", __func__);
+
+ clear_softdirty();
+ for (i = 0 ; i < TEST_ITERATIONS ; i++) {
+ if (pagemap_is_softdirty(pagemap_fd, map) == 1) {
+ ksft_print_msg("dirty bit was 1, but should be 0 (i=%d)\n", i);
+ break;
+ }
+
+ clear_softdirty();
+ // Write something to the page to get the dirty bit enabled on the page
+ map[0]++;
+
+ if (pagemap_is_softdirty(pagemap_fd, map) == 0) {
+ ksft_print_msg("dirty bit was 0, but should be 1 (i=%d)\n", i);
+ break;
+ }
+ clear_softdirty();
+ }
+
+ ksft_test_result(i == TEST_ITERATIONS, "Test %s huge page dirty bit\n", __func__);
+ } else {
+ // hugepage allocation failed. skip these tests
+ ksft_test_result_skip("Test %s huge page allocation\n", __func__);
+ ksft_test_result_skip("Test %s huge page dirty bit\n", __func__);
+ }
+ free(map);
+}
+
+static void test_mprotect(int pagemap_fd, int pagesize, bool anon)
+{
+ const char *type[] = {"file", "anon"};
+ const char *fname = "./soft-dirty-test-file";
+ int test_fd;
+ char *map;
+
+ if (anon) {
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+ if (!map)
+ ksft_exit_fail_msg("anon mmap failed\n");
+ } else {
+ test_fd = open(fname, O_RDWR | O_CREAT);
+ if (test_fd < 0) {
+ ksft_test_result_skip("Test %s open() file failed\n", __func__);
+ return;
+ }
+ unlink(fname);
+ ftruncate(test_fd, pagesize);
+ map = mmap(NULL, pagesize, PROT_READ|PROT_WRITE,
+ MAP_SHARED, test_fd, 0);
+ if (!map)
+ ksft_exit_fail_msg("file mmap failed\n");
+ }
+
+ *map = 1;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s dirty bit of new written page\n",
+ __func__, type[anon]);
+ clear_softdirty();
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after clear_refs\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RO\n",
+ __func__, type[anon]);
+ mprotect(map, pagesize, PROT_READ|PROT_WRITE);
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 0,
+ "Test %s-%s soft-dirty clear after marking RW\n",
+ __func__, type[anon]);
+ *map = 2;
+ ksft_test_result(pagemap_is_softdirty(pagemap_fd, map) == 1,
+ "Test %s-%s soft-dirty after rewritten\n",
+ __func__, type[anon]);
+
+ munmap(map, pagesize);
+
+ if (!anon)
+ close(test_fd);
+}
+
+static void test_mprotect_anon(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, true);
+}
+
+static void test_mprotect_file(int pagemap_fd, int pagesize)
+{
+ test_mprotect(pagemap_fd, pagesize, false);
+}
+
+int main(int argc, char **argv)
+{
+ int pagemap_fd;
+ int pagesize;
+
+ ksft_print_header();
+ ksft_set_plan(15);
+
+ pagemap_fd = open(PAGEMAP_FILE_PATH, O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("Failed to open %s\n", PAGEMAP_FILE_PATH);
+
+ pagesize = getpagesize();
+
+ test_simple(pagemap_fd, pagesize);
+ test_vma_reuse(pagemap_fd, pagesize);
+ test_hugepage(pagemap_fd, pagesize);
+ test_mprotect_anon(pagemap_fd, pagesize);
+ test_mprotect_file(pagemap_fd, pagesize);
+
+ close(pagemap_fd);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/mm/split_huge_page_test.c b/tools/testing/selftests/mm/split_huge_page_test.c
new file mode 100644
index 000000000000..856662d2f87a
--- /dev/null
+++ b/tools/testing/selftests/mm/split_huge_page_test.c
@@ -0,0 +1,446 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A test of splitting PMD THPs and PTE-mapped THPs from a specified virtual
+ * address range in a process via <debugfs>/split_huge_pages interface.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdarg.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <malloc.h>
+#include <stdbool.h>
+#include <time.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+
+uint64_t pagesize;
+unsigned int pageshift;
+uint64_t pmd_pagesize;
+
+#define SPLIT_DEBUGFS "/sys/kernel/debug/split_huge_pages"
+#define SMAP_PATH "/proc/self/smaps"
+#define INPUT_MAX 80
+
+#define PID_FMT "%d,0x%lx,0x%lx,%d"
+#define PATH_FMT "%s,0x%lx,0x%lx,%d"
+
+#define PFN_MASK ((1UL<<55)-1)
+#define KPF_THP (1UL<<22)
+
+int is_backed_by_thp(char *vaddr, int pagemap_file, int kpageflags_file)
+{
+ uint64_t paddr;
+ uint64_t page_flags;
+
+ if (pagemap_file) {
+ pread(pagemap_file, &paddr, sizeof(paddr),
+ ((long)vaddr >> pageshift) * sizeof(paddr));
+
+ if (kpageflags_file) {
+ pread(kpageflags_file, &page_flags, sizeof(page_flags),
+ (paddr & PFN_MASK) * sizeof(page_flags));
+
+ return !!(page_flags & KPF_THP);
+ }
+ }
+ return 0;
+}
+
+static void write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1)
+ ksft_exit_fail_msg("%s open failed: %s\n", path, strerror(errno));
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1)
+ ksft_exit_fail_msg("Write failed\n");
+}
+
+static void write_debugfs(const char *fmt, ...)
+{
+ char input[INPUT_MAX];
+ int ret;
+ va_list argp;
+
+ va_start(argp, fmt);
+ ret = vsnprintf(input, INPUT_MAX, fmt, argp);
+ va_end(argp);
+
+ if (ret >= INPUT_MAX)
+ ksft_exit_fail_msg("%s: Debugfs input is too long\n", __func__);
+
+ write_file(SPLIT_DEBUGFS, input, ret + 1);
+}
+
+void split_pmd_thp(void)
+{
+ char *one_page;
+ size_t len = 4 * pmd_pagesize;
+ size_t i;
+
+ one_page = memalign(pmd_pagesize, len);
+ if (!one_page)
+ ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
+
+ madvise(one_page, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ one_page[i] = (char)i;
+
+ if (!check_huge_anon(one_page, 4, pmd_pagesize))
+ ksft_exit_fail_msg("No THP is allocated\n");
+
+ /* split all THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)one_page,
+ (uint64_t)one_page + len, 0);
+
+ for (i = 0; i < len; i++)
+ if (one_page[i] != (char)i)
+ ksft_exit_fail_msg("%ld byte corrupted\n", i);
+
+
+ if (!check_huge_anon(one_page, 0, pmd_pagesize))
+ ksft_exit_fail_msg("Still AnonHugePages not split\n");
+
+ ksft_test_result_pass("Split huge pages successful\n");
+ free(one_page);
+}
+
+void split_pte_mapped_thp(void)
+{
+ char *one_page, *pte_mapped, *pte_mapped2;
+ size_t len = 4 * pmd_pagesize;
+ uint64_t thp_size;
+ size_t i;
+ const char *pagemap_template = "/proc/%d/pagemap";
+ const char *kpageflags_proc = "/proc/kpageflags";
+ char pagemap_proc[255];
+ int pagemap_fd;
+ int kpageflags_fd;
+
+ if (snprintf(pagemap_proc, 255, pagemap_template, getpid()) < 0)
+ ksft_exit_fail_msg("get pagemap proc error: %s\n", strerror(errno));
+
+ pagemap_fd = open(pagemap_proc, O_RDONLY);
+ if (pagemap_fd == -1)
+ ksft_exit_fail_msg("read pagemap: %s\n", strerror(errno));
+
+ kpageflags_fd = open(kpageflags_proc, O_RDONLY);
+ if (kpageflags_fd == -1)
+ ksft_exit_fail_msg("read kpageflags: %s\n", strerror(errno));
+
+ one_page = mmap((void *)(1UL << 30), len, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (one_page == MAP_FAILED)
+ ksft_exit_fail_msg("Fail to allocate memory: %s\n", strerror(errno));
+
+ madvise(one_page, len, MADV_HUGEPAGE);
+
+ for (i = 0; i < len; i++)
+ one_page[i] = (char)i;
+
+ if (!check_huge_anon(one_page, 4, pmd_pagesize))
+ ksft_exit_fail_msg("No THP is allocated\n");
+
+ /* remap the first pagesize of first THP */
+ pte_mapped = mremap(one_page, pagesize, pagesize, MREMAP_MAYMOVE);
+
+ /* remap the Nth pagesize of Nth THP */
+ for (i = 1; i < 4; i++) {
+ pte_mapped2 = mremap(one_page + pmd_pagesize * i + pagesize * i,
+ pagesize, pagesize,
+ MREMAP_MAYMOVE|MREMAP_FIXED,
+ pte_mapped + pagesize * i);
+ if (pte_mapped2 == MAP_FAILED)
+ ksft_exit_fail_msg("mremap failed: %s\n", strerror(errno));
+ }
+
+ /* smap does not show THPs after mremap, use kpageflags instead */
+ thp_size = 0;
+ for (i = 0; i < pagesize * 4; i++)
+ if (i % pagesize == 0 &&
+ is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ thp_size++;
+
+ if (thp_size != 4)
+ ksft_exit_fail_msg("Some THPs are missing during mremap\n");
+
+ /* split all remapped THPs */
+ write_debugfs(PID_FMT, getpid(), (uint64_t)pte_mapped,
+ (uint64_t)pte_mapped + pagesize * 4, 0);
+
+ /* smap does not show THPs after mremap, use kpageflags instead */
+ thp_size = 0;
+ for (i = 0; i < pagesize * 4; i++) {
+ if (pte_mapped[i] != (char)i)
+ ksft_exit_fail_msg("%ld byte corrupted\n", i);
+
+ if (i % pagesize == 0 &&
+ is_backed_by_thp(&pte_mapped[i], pagemap_fd, kpageflags_fd))
+ thp_size++;
+ }
+
+ if (thp_size)
+ ksft_exit_fail_msg("Still %ld THPs not split\n", thp_size);
+
+ ksft_test_result_pass("Split PTE-mapped huge pages successful\n");
+ munmap(one_page, len);
+ close(pagemap_fd);
+ close(kpageflags_fd);
+}
+
+void split_file_backed_thp(void)
+{
+ int status;
+ int fd;
+ ssize_t num_written;
+ char tmpfs_template[] = "/tmp/thp_split_XXXXXX";
+ const char *tmpfs_loc = mkdtemp(tmpfs_template);
+ char testfile[INPUT_MAX];
+ uint64_t pgoff_start = 0, pgoff_end = 1024;
+
+ ksft_print_msg("Please enable pr_debug in split_huge_pages_in_file() for more info.\n");
+
+ status = mount("tmpfs", tmpfs_loc, "tmpfs", 0, "huge=always,size=4m");
+
+ if (status)
+ ksft_exit_fail_msg("Unable to create a tmpfs for testing\n");
+
+ status = snprintf(testfile, INPUT_MAX, "%s/thp_file", tmpfs_loc);
+ if (status >= INPUT_MAX) {
+ ksft_exit_fail_msg("Fail to create file-backed THP split testing file\n");
+ }
+
+ fd = open(testfile, O_CREAT|O_WRONLY);
+ if (fd == -1) {
+ ksft_perror("Cannot open testing file");
+ goto cleanup;
+ }
+
+ /* write something to the file, so a file-backed THP can be allocated */
+ num_written = write(fd, tmpfs_loc, strlen(tmpfs_loc) + 1);
+ close(fd);
+
+ if (num_written < 1) {
+ ksft_perror("Fail to write data to testing file");
+ goto cleanup;
+ }
+
+ /* split the file-backed THP */
+ write_debugfs(PATH_FMT, testfile, pgoff_start, pgoff_end, 0);
+
+ status = unlink(testfile);
+ if (status) {
+ ksft_perror("Cannot remove testing file");
+ goto cleanup;
+ }
+
+ status = umount(tmpfs_loc);
+ if (status) {
+ rmdir(tmpfs_loc);
+ ksft_exit_fail_msg("Unable to umount %s\n", tmpfs_loc);
+ }
+
+ status = rmdir(tmpfs_loc);
+ if (status)
+ ksft_exit_fail_msg("cannot remove tmp dir: %s\n", strerror(errno));
+
+ ksft_print_msg("Please check dmesg for more information\n");
+ ksft_test_result_pass("File-backed THP split test done\n");
+ return;
+
+cleanup:
+ umount(tmpfs_loc);
+ rmdir(tmpfs_loc);
+ ksft_exit_fail_msg("Error occurred\n");
+}
+
+bool prepare_thp_fs(const char *xfs_path, char *thp_fs_template,
+ const char **thp_fs_loc)
+{
+ if (xfs_path) {
+ *thp_fs_loc = xfs_path;
+ return false;
+ }
+
+ *thp_fs_loc = mkdtemp(thp_fs_template);
+
+ if (!*thp_fs_loc)
+ ksft_exit_fail_msg("cannot create temp folder\n");
+
+ return true;
+}
+
+void cleanup_thp_fs(const char *thp_fs_loc, bool created_tmp)
+{
+ int status;
+
+ if (!created_tmp)
+ return;
+
+ status = rmdir(thp_fs_loc);
+ if (status)
+ ksft_exit_fail_msg("cannot remove tmp dir: %s\n",
+ strerror(errno));
+}
+
+int create_pagecache_thp_and_fd(const char *testfile, size_t fd_size, int *fd,
+ char **addr)
+{
+ size_t i;
+ int dummy;
+
+ srand(time(NULL));
+
+ *fd = open(testfile, O_CREAT | O_RDWR, 0664);
+ if (*fd == -1)
+ ksft_exit_fail_msg("Failed to create a file at %s\n", testfile);
+
+ for (i = 0; i < fd_size; i++) {
+ unsigned char byte = (unsigned char)i;
+
+ write(*fd, &byte, sizeof(byte));
+ }
+ close(*fd);
+ sync();
+ *fd = open("/proc/sys/vm/drop_caches", O_WRONLY);
+ if (*fd == -1) {
+ ksft_perror("open drop_caches");
+ goto err_out_unlink;
+ }
+ if (write(*fd, "3", 1) != 1) {
+ ksft_perror("write to drop_caches");
+ goto err_out_unlink;
+ }
+ close(*fd);
+
+ *fd = open(testfile, O_RDWR);
+ if (*fd == -1) {
+ ksft_perror("Failed to open testfile\n");
+ goto err_out_unlink;
+ }
+
+ *addr = mmap(NULL, fd_size, PROT_READ|PROT_WRITE, MAP_SHARED, *fd, 0);
+ if (*addr == (char *)-1) {
+ ksft_perror("cannot mmap");
+ goto err_out_close;
+ }
+ madvise(*addr, fd_size, MADV_HUGEPAGE);
+
+ for (size_t i = 0; i < fd_size; i++)
+ dummy += *(*addr + i);
+
+ if (!check_huge_file(*addr, fd_size / pmd_pagesize, pmd_pagesize)) {
+ ksft_print_msg("No large pagecache folio generated, please provide a filesystem supporting large folio\n");
+ munmap(*addr, fd_size);
+ close(*fd);
+ unlink(testfile);
+ ksft_test_result_skip("Pagecache folio split skipped\n");
+ return -2;
+ }
+ return 0;
+err_out_close:
+ close(*fd);
+err_out_unlink:
+ unlink(testfile);
+ ksft_exit_fail_msg("Failed to create large pagecache folios\n");
+ return -1;
+}
+
+void split_thp_in_pagecache_to_order(size_t fd_size, int order, const char *fs_loc)
+{
+ int fd;
+ char *addr;
+ size_t i;
+ char testfile[INPUT_MAX];
+ int err = 0;
+
+ err = snprintf(testfile, INPUT_MAX, "%s/test", fs_loc);
+
+ if (err < 0)
+ ksft_exit_fail_msg("cannot generate right test file name\n");
+
+ err = create_pagecache_thp_and_fd(testfile, fd_size, &fd, &addr);
+ if (err)
+ return;
+ err = 0;
+
+ write_debugfs(PID_FMT, getpid(), (uint64_t)addr, (uint64_t)addr + fd_size, order);
+
+ for (i = 0; i < fd_size; i++)
+ if (*(addr + i) != (char)i) {
+ ksft_print_msg("%lu byte corrupted in the file\n", i);
+ err = EXIT_FAILURE;
+ goto out;
+ }
+
+ if (!check_huge_file(addr, 0, pmd_pagesize)) {
+ ksft_print_msg("Still FilePmdMapped not split\n");
+ err = EXIT_FAILURE;
+ goto out;
+ }
+
+out:
+ munmap(addr, fd_size);
+ close(fd);
+ unlink(testfile);
+ if (err)
+ ksft_exit_fail_msg("Split PMD-mapped pagecache folio to order %d failed\n", order);
+ ksft_test_result_pass("Split PMD-mapped pagecache folio to order %d passed\n", order);
+}
+
+int main(int argc, char **argv)
+{
+ int i;
+ size_t fd_size;
+ char *optional_xfs_path = NULL;
+ char fs_loc_template[] = "/tmp/thp_fs_XXXXXX";
+ const char *fs_loc;
+ bool created_tmp;
+
+ ksft_print_header();
+
+ if (geteuid() != 0) {
+ ksft_print_msg("Please run the benchmark as root\n");
+ ksft_finished();
+ }
+
+ if (argc > 1)
+ optional_xfs_path = argv[1];
+
+ ksft_set_plan(3+9);
+
+ pagesize = getpagesize();
+ pageshift = ffs(pagesize) - 1;
+ pmd_pagesize = read_pmd_pagesize();
+ if (!pmd_pagesize)
+ ksft_exit_fail_msg("Reading PMD pagesize failed\n");
+
+ fd_size = 2 * pmd_pagesize;
+
+ split_pmd_thp();
+ split_pte_mapped_thp();
+ split_file_backed_thp();
+
+ created_tmp = prepare_thp_fs(optional_xfs_path, fs_loc_template,
+ &fs_loc);
+ for (i = 8; i >= 0; i--)
+ split_thp_in_pagecache_to_order(fd_size, i, fs_loc);
+ cleanup_thp_fs(fs_loc, created_tmp);
+
+ ksft_finished();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/vm/test_hmm.sh b/tools/testing/selftests/mm/test_hmm.sh
index 0647b525a625..46e19b5d648d 100755
--- a/tools/testing/selftests/vm/test_hmm.sh
+++ b/tools/testing/selftests/mm/test_hmm.sh
@@ -40,25 +40,30 @@ check_test_requirements()
load_driver()
{
- modprobe $DRIVER > /dev/null 2>&1
- if [ $? == 0 ]; then
- major=$(awk "\$2==\"HMM_DMIRROR\" {print \$1}" /proc/devices)
- mknod /dev/hmm_dmirror0 c $major 0
- mknod /dev/hmm_dmirror1 c $major 1
+ if [ $# -eq 0 ]; then
+ modprobe $DRIVER > /dev/null 2>&1
+ else
+ if [ $# -eq 2 ]; then
+ modprobe $DRIVER spm_addr_dev0=$1 spm_addr_dev1=$2
+ > /dev/null 2>&1
+ else
+ echo "Missing module parameters. Make sure pass"\
+ "spm_addr_dev0 and spm_addr_dev1"
+ usage
+ fi
fi
}
unload_driver()
{
modprobe -r $DRIVER > /dev/null 2>&1
- rm -f /dev/hmm_dmirror?
}
run_smoke()
{
echo "Running smoke test. Note, this test provides basic coverage."
- load_driver
+ load_driver $1 $2
$(dirname "${BASH_SOURCE[0]}")/hmm-tests
unload_driver
}
@@ -75,6 +80,9 @@ usage()
echo "# Smoke testing"
echo "./${TEST_NAME}.sh smoke"
echo
+ echo "# Smoke testing with SPM enabled"
+ echo "./${TEST_NAME}.sh smoke <spm_addr_dev0> <spm_addr_dev1>"
+ echo
exit 0
}
@@ -84,7 +92,7 @@ function run_test()
usage
else
if [ "$1" = "smoke" ]; then
- run_smoke
+ run_smoke $2 $3
else
usage
fi
diff --git a/tools/testing/selftests/vm/test_vmalloc.sh b/tools/testing/selftests/mm/test_vmalloc.sh
index 06d2bb109f06..d73b846736f1 100755
--- a/tools/testing/selftests/vm/test_vmalloc.sh
+++ b/tools/testing/selftests/mm/test_vmalloc.sh
@@ -11,6 +11,7 @@
TEST_NAME="vmalloc"
DRIVER="test_${TEST_NAME}"
+NUM_CPUS=`grep -c ^processor /proc/cpuinfo`
# 1 if fails
exitcode=1
@@ -22,9 +23,9 @@ ksft_skip=4
# Static templates for performance, stressing and smoke tests.
# Also it is possible to pass any supported parameters manualy.
#
-PERF_PARAM="single_cpu_test=1 sequential_test_order=1 test_repeat_count=3"
-SMOKE_PARAM="single_cpu_test=1 test_loop_count=10000 test_repeat_count=10"
-STRESS_PARAM="test_repeat_count=20"
+PERF_PARAM="sequential_test_order=1 test_repeat_count=3"
+SMOKE_PARAM="test_loop_count=10000 test_repeat_count=10"
+STRESS_PARAM="nr_threads=$NUM_CPUS test_repeat_count=20"
check_test_requirements()
{
@@ -58,8 +59,8 @@ run_perfformance_check()
run_stability_check()
{
- echo "Run stability tests. In order to stress vmalloc subsystem we run"
- echo "all available test cases on all available CPUs simultaneously."
+ echo "Run stability tests. In order to stress vmalloc subsystem all"
+ echo "available test cases are run by NUM_CPUS workers simultaneously."
echo "It will take time, so be patient."
modprobe $DRIVER $STRESS_PARAM > /dev/null 2>&1
@@ -92,17 +93,17 @@ usage()
echo "# Shows help message"
echo "./${DRIVER}.sh"
echo
- echo "# Runs 1 test(id_1), repeats it 5 times on all online CPUs"
- echo "./${DRIVER}.sh run_test_mask=1 test_repeat_count=5"
+ echo "# Runs 1 test(id_1), repeats it 5 times by NUM_CPUS workers"
+ echo "./${DRIVER}.sh nr_threads=$NUM_CPUS run_test_mask=1 test_repeat_count=5"
echo
echo -n "# Runs 4 tests(id_1|id_2|id_4|id_16) on one CPU with "
echo "sequential order"
- echo -n "./${DRIVER}.sh single_cpu_test=1 sequential_test_order=1 "
+ echo -n "./${DRIVER}.sh sequential_test_order=1 "
echo "run_test_mask=23"
echo
- echo -n "# Runs all tests on all online CPUs, shuffled order, repeats "
+ echo -n "# Runs all tests by NUM_CPUS workers, shuffled order, repeats "
echo "20 times"
- echo "./${DRIVER}.sh test_repeat_count=20"
+ echo "./${DRIVER}.sh nr_threads=$NUM_CPUS test_repeat_count=20"
echo
echo "# Performance analysis"
echo "./${DRIVER}.sh performance"
diff --git a/tools/testing/selftests/mm/thp_settings.c b/tools/testing/selftests/mm/thp_settings.c
new file mode 100644
index 000000000000..a4163438108e
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.c
@@ -0,0 +1,349 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "thp_settings.h"
+
+#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
+#define MAX_SETTINGS_DEPTH 4
+static struct thp_settings settings_stack[MAX_SETTINGS_DEPTH];
+static int settings_index;
+static struct thp_settings saved_settings;
+static char dev_queue_read_ahead_path[PATH_MAX];
+
+static const char * const thp_enabled_strings[] = {
+ "never",
+ "always",
+ "inherit",
+ "madvise",
+ NULL
+};
+
+static const char * const thp_defrag_strings[] = {
+ "always",
+ "defer",
+ "defer+madvise",
+ "madvise",
+ "never",
+ NULL
+};
+
+static const char * const shmem_enabled_strings[] = {
+ "always",
+ "within_size",
+ "advise",
+ "never",
+ "deny",
+ "force",
+ NULL
+};
+
+int read_file(const char *path, char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numread;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ numread = read(fd, buf, buflen - 1);
+ if (numread < 1) {
+ close(fd);
+ return 0;
+ }
+
+ buf[numread] = '\0';
+ close(fd);
+
+ return (unsigned int) numread;
+}
+
+int write_file(const char *path, const char *buf, size_t buflen)
+{
+ int fd;
+ ssize_t numwritten;
+
+ fd = open(path, O_WRONLY);
+ if (fd == -1) {
+ printf("open(%s)\n", path);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ numwritten = write(fd, buf, buflen - 1);
+ close(fd);
+ if (numwritten < 1) {
+ printf("write(%s)\n", buf);
+ exit(EXIT_FAILURE);
+ return 0;
+ }
+
+ return (unsigned int) numwritten;
+}
+
+const unsigned long read_num(const char *path)
+{
+ char buf[21];
+
+ if (read_file(path, buf, sizeof(buf)) < 0) {
+ perror("read_file()");
+ exit(EXIT_FAILURE);
+ }
+
+ return strtoul(buf, NULL, 10);
+}
+
+void write_num(const char *path, unsigned long num)
+{
+ char buf[21];
+
+ sprintf(buf, "%ld", num);
+ if (!write_file(path, buf, strlen(buf) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+int thp_read_string(const char *name, const char * const strings[])
+{
+ char path[PATH_MAX];
+ char buf[256];
+ char *c;
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!read_file(path, buf, sizeof(buf))) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+
+ c = strchr(buf, '[');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ c++;
+ memmove(buf, c, sizeof(buf) - (c - buf));
+
+ c = strchr(buf, ']');
+ if (!c) {
+ printf("%s: Parse failure\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ *c = '\0';
+
+ ret = 0;
+ while (strings[ret]) {
+ if (!strcmp(strings[ret], buf))
+ return ret;
+ ret++;
+ }
+
+ printf("Failed to parse %s\n", name);
+ exit(EXIT_FAILURE);
+}
+
+void thp_write_string(const char *name, const char *val)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ if (!write_file(path, val, strlen(val) + 1)) {
+ perror(path);
+ exit(EXIT_FAILURE);
+ }
+}
+
+const unsigned long thp_read_num(const char *name)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ return read_num(path);
+}
+
+void thp_write_num(const char *name, unsigned long num)
+{
+ char path[PATH_MAX];
+ int ret;
+
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+ write_num(path, num);
+}
+
+void thp_read_settings(struct thp_settings *settings)
+{
+ unsigned long orders = thp_supported_orders();
+ char path[PATH_MAX];
+ int i;
+
+ *settings = (struct thp_settings) {
+ .thp_enabled = thp_read_string("enabled", thp_enabled_strings),
+ .thp_defrag = thp_read_string("defrag", thp_defrag_strings),
+ .shmem_enabled =
+ thp_read_string("shmem_enabled", shmem_enabled_strings),
+ .use_zero_page = thp_read_num("use_zero_page"),
+ };
+ settings->khugepaged = (struct khugepaged_settings) {
+ .defrag = thp_read_num("khugepaged/defrag"),
+ .alloc_sleep_millisecs =
+ thp_read_num("khugepaged/alloc_sleep_millisecs"),
+ .scan_sleep_millisecs =
+ thp_read_num("khugepaged/scan_sleep_millisecs"),
+ .max_ptes_none = thp_read_num("khugepaged/max_ptes_none"),
+ .max_ptes_swap = thp_read_num("khugepaged/max_ptes_swap"),
+ .max_ptes_shared = thp_read_num("khugepaged/max_ptes_shared"),
+ .pages_to_scan = thp_read_num("khugepaged/pages_to_scan"),
+ };
+ if (dev_queue_read_ahead_path[0])
+ settings->read_ahead_kb = read_num(dev_queue_read_ahead_path);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders)) {
+ settings->hugepages[i].enabled = THP_NEVER;
+ continue;
+ }
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ settings->hugepages[i].enabled =
+ thp_read_string(path, thp_enabled_strings);
+ }
+}
+
+void thp_write_settings(struct thp_settings *settings)
+{
+ struct khugepaged_settings *khugepaged = &settings->khugepaged;
+ unsigned long orders = thp_supported_orders();
+ char path[PATH_MAX];
+ int enabled;
+ int i;
+
+ thp_write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
+ thp_write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
+ thp_write_string("shmem_enabled",
+ shmem_enabled_strings[settings->shmem_enabled]);
+ thp_write_num("use_zero_page", settings->use_zero_page);
+
+ thp_write_num("khugepaged/defrag", khugepaged->defrag);
+ thp_write_num("khugepaged/alloc_sleep_millisecs",
+ khugepaged->alloc_sleep_millisecs);
+ thp_write_num("khugepaged/scan_sleep_millisecs",
+ khugepaged->scan_sleep_millisecs);
+ thp_write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
+ thp_write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
+ thp_write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
+ thp_write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
+
+ if (dev_queue_read_ahead_path[0])
+ write_num(dev_queue_read_ahead_path, settings->read_ahead_kb);
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ if (!((1 << i) & orders))
+ continue;
+ snprintf(path, PATH_MAX, "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ enabled = settings->hugepages[i].enabled;
+ thp_write_string(path, thp_enabled_strings[enabled]);
+ }
+}
+
+struct thp_settings *thp_current_settings(void)
+{
+ if (!settings_index) {
+ printf("Fail: No settings set");
+ exit(EXIT_FAILURE);
+ }
+ return settings_stack + settings_index - 1;
+}
+
+void thp_push_settings(struct thp_settings *settings)
+{
+ if (settings_index >= MAX_SETTINGS_DEPTH) {
+ printf("Fail: Settings stack exceeded");
+ exit(EXIT_FAILURE);
+ }
+ settings_stack[settings_index++] = *settings;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_pop_settings(void)
+{
+ if (settings_index <= 0) {
+ printf("Fail: Settings stack empty");
+ exit(EXIT_FAILURE);
+ }
+ --settings_index;
+ thp_write_settings(thp_current_settings());
+}
+
+void thp_restore_settings(void)
+{
+ thp_write_settings(&saved_settings);
+}
+
+void thp_save_settings(void)
+{
+ thp_read_settings(&saved_settings);
+}
+
+void thp_set_read_ahead_path(char *path)
+{
+ if (!path) {
+ dev_queue_read_ahead_path[0] = '\0';
+ return;
+ }
+
+ strncpy(dev_queue_read_ahead_path, path,
+ sizeof(dev_queue_read_ahead_path));
+ dev_queue_read_ahead_path[sizeof(dev_queue_read_ahead_path) - 1] = '\0';
+}
+
+unsigned long thp_supported_orders(void)
+{
+ unsigned long orders = 0;
+ char path[PATH_MAX];
+ char buf[256];
+ int ret;
+ int i;
+
+ for (i = 0; i < NR_ORDERS; i++) {
+ ret = snprintf(path, PATH_MAX, THP_SYSFS "hugepages-%ukB/enabled",
+ (getpagesize() >> 10) << i);
+ if (ret >= PATH_MAX) {
+ printf("%s: Pathname is too long\n", __func__);
+ exit(EXIT_FAILURE);
+ }
+
+ ret = read_file(path, buf, sizeof(buf));
+ if (ret)
+ orders |= 1UL << i;
+ }
+
+ return orders;
+}
diff --git a/tools/testing/selftests/mm/thp_settings.h b/tools/testing/selftests/mm/thp_settings.h
new file mode 100644
index 000000000000..71cbff05f4c7
--- /dev/null
+++ b/tools/testing/selftests/mm/thp_settings.h
@@ -0,0 +1,80 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __THP_SETTINGS_H__
+#define __THP_SETTINGS_H__
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+enum thp_enabled {
+ THP_NEVER,
+ THP_ALWAYS,
+ THP_INHERIT,
+ THP_MADVISE,
+};
+
+enum thp_defrag {
+ THP_DEFRAG_ALWAYS,
+ THP_DEFRAG_DEFER,
+ THP_DEFRAG_DEFER_MADVISE,
+ THP_DEFRAG_MADVISE,
+ THP_DEFRAG_NEVER,
+};
+
+enum shmem_enabled {
+ SHMEM_ALWAYS,
+ SHMEM_WITHIN_SIZE,
+ SHMEM_ADVISE,
+ SHMEM_NEVER,
+ SHMEM_DENY,
+ SHMEM_FORCE,
+};
+
+#define NR_ORDERS 20
+
+struct hugepages_settings {
+ enum thp_enabled enabled;
+};
+
+struct khugepaged_settings {
+ bool defrag;
+ unsigned int alloc_sleep_millisecs;
+ unsigned int scan_sleep_millisecs;
+ unsigned int max_ptes_none;
+ unsigned int max_ptes_swap;
+ unsigned int max_ptes_shared;
+ unsigned long pages_to_scan;
+};
+
+struct thp_settings {
+ enum thp_enabled thp_enabled;
+ enum thp_defrag thp_defrag;
+ enum shmem_enabled shmem_enabled;
+ bool use_zero_page;
+ struct khugepaged_settings khugepaged;
+ unsigned long read_ahead_kb;
+ struct hugepages_settings hugepages[NR_ORDERS];
+};
+
+int read_file(const char *path, char *buf, size_t buflen);
+int write_file(const char *path, const char *buf, size_t buflen);
+const unsigned long read_num(const char *path);
+void write_num(const char *path, unsigned long num);
+
+int thp_read_string(const char *name, const char * const strings[]);
+void thp_write_string(const char *name, const char *val);
+const unsigned long thp_read_num(const char *name);
+void thp_write_num(const char *name, unsigned long num);
+
+void thp_write_settings(struct thp_settings *settings);
+void thp_read_settings(struct thp_settings *settings);
+struct thp_settings *thp_current_settings(void);
+void thp_push_settings(struct thp_settings *settings);
+void thp_pop_settings(void);
+void thp_restore_settings(void);
+void thp_save_settings(void);
+
+void thp_set_read_ahead_path(char *path);
+unsigned long thp_supported_orders(void);
+
+#endif /* __THP_SETTINGS_H__ */
diff --git a/tools/testing/selftests/vm/thuge-gen.c b/tools/testing/selftests/mm/thuge-gen.c
index 361ef7192cc6..ea7fd8fe2876 100644
--- a/tools/testing/selftests/vm/thuge-gen.c
+++ b/tools/testing/selftests/mm/thuge-gen.c
@@ -3,7 +3,8 @@
Before running this huge pages for each huge page size must have been
reserved.
- For large pages beyond MAX_ORDER (like 1GB on x86) boot options must be used.
+ For large pages beyond MAX_PAGE_ORDER (like 1GB on x86) boot options must
+ be used. 1GB wouldn't be tested if it isn't available.
Also shmmax must be increased.
And you need to run as root to work around some weird permissions in shm.
And nothing using huge pages should run in parallel.
@@ -24,8 +25,8 @@
#include <unistd.h>
#include <stdarg.h>
#include <string.h>
-
-#define err(x) perror(x), exit(1)
+#include "vm_util.h"
+#include "../kselftest.h"
#define MAP_HUGE_2MB (21 << MAP_HUGE_SHIFT)
#define MAP_HUGE_1GB (30 << MAP_HUGE_SHIFT)
@@ -42,11 +43,8 @@
#define SHM_HUGE_1GB (30 << SHM_HUGE_SHIFT)
#define NUM_PAGESIZES 5
-
#define NUM_PAGES 4
-#define Dprintf(fmt...) // printf(fmt)
-
unsigned long page_sizes[NUM_PAGESIZES];
int num_page_sizes;
@@ -58,46 +56,15 @@ int ilog2(unsigned long v)
return l;
}
-void find_pagesizes(void)
-{
- glob_t g;
- int i;
- glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
- assert(g.gl_pathc <= NUM_PAGESIZES);
- for (i = 0; i < g.gl_pathc; i++) {
- sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
- &page_sizes[i]);
- page_sizes[i] <<= 10;
- printf("Found %luMB\n", page_sizes[i] >> 20);
- }
- num_page_sizes = g.gl_pathc;
- globfree(&g);
-}
-
-unsigned long default_huge_page_size(void)
-{
- unsigned long hps = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
- if (!f)
- return 0;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
- hps <<= 10;
- break;
- }
- }
- free(line);
- return hps;
-}
-
void show(unsigned long ps)
{
char buf[100];
+
if (ps == getpagesize())
return;
- printf("%luMB: ", ps >> 20);
+
+ ksft_print_msg("%luMB: ", ps >> 20);
+
fflush(stdout);
snprintf(buf, sizeof buf,
"cat /sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
@@ -121,7 +88,7 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
f = fopen(buf, "r");
if (!f) {
if (warn)
- printf("missing %s\n", buf);
+ ksft_print_msg("missing %s\n", buf);
return 0;
}
if (getline(&line, &linelen, f) > 0) {
@@ -135,123 +102,143 @@ unsigned long read_sysfs(int warn, char *fmt, ...)
unsigned long read_free(unsigned long ps)
{
return read_sysfs(ps != getpagesize(),
- "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
- ps >> 10);
+ "/sys/kernel/mm/hugepages/hugepages-%lukB/free_hugepages",
+ ps >> 10);
}
void test_mmap(unsigned long size, unsigned flags)
{
char *map;
unsigned long before, after;
- int err;
before = read_free(size);
map = mmap(NULL, size*NUM_PAGES, PROT_READ|PROT_WRITE,
MAP_PRIVATE|MAP_ANONYMOUS|MAP_HUGETLB|flags, -1, 0);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("mmap: %s\n", strerror(errno));
- if (map == (char *)-1) err("mmap");
memset(map, 0xff, size*NUM_PAGES);
after = read_free(size);
- Dprintf("before %lu after %lu diff %ld size %lu\n",
- before, after, before - after, size);
- assert(size == getpagesize() || (before - after) == NUM_PAGES);
+
show(size);
- err = munmap(map, size);
- assert(!err);
+ ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+ "%s mmap\n", __func__);
+
+ if (munmap(map, size * NUM_PAGES))
+ ksft_exit_fail_msg("%s: unmap %s\n", __func__, strerror(errno));
}
void test_shmget(unsigned long size, unsigned flags)
{
int id;
unsigned long before, after;
- int err;
+ struct shm_info i;
+ char *map;
before = read_free(size);
id = shmget(IPC_PRIVATE, size * NUM_PAGES, IPC_CREAT|0600|flags);
- if (id < 0) err("shmget");
-
- struct shm_info i;
- if (shmctl(id, SHM_INFO, (void *)&i) < 0) err("shmctl");
- Dprintf("alloc %lu res %lu\n", i.shm_tot, i.shm_rss);
+ if (id < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("shmget requires root privileges: %s\n",
+ strerror(errno));
+ return;
+ }
+ ksft_exit_fail_msg("shmget: %s\n", strerror(errno));
+ }
+ if (shmctl(id, SHM_INFO, (void *)&i) < 0)
+ ksft_exit_fail_msg("shmctl: %s\n", strerror(errno));
- Dprintf("id %d\n", id);
- char *map = shmat(id, NULL, 0600);
- if (map == (char*)-1) err("shmat");
+ map = shmat(id, NULL, 0600);
+ if (map == MAP_FAILED)
+ ksft_exit_fail_msg("shmat: %s\n", strerror(errno));
shmctl(id, IPC_RMID, NULL);
memset(map, 0xff, size*NUM_PAGES);
after = read_free(size);
- Dprintf("before %lu after %lu diff %ld size %lu\n",
- before, after, before - after, size);
- assert(size == getpagesize() || (before - after) == NUM_PAGES);
show(size);
- err = shmdt(map);
- assert(!err);
+ ksft_test_result(size == getpagesize() || (before - after) == NUM_PAGES,
+ "%s: mmap\n", __func__);
+ if (shmdt(map))
+ ksft_exit_fail_msg("%s: shmdt: %s\n", __func__, strerror(errno));
}
-void sanity_checks(void)
+void find_pagesizes(void)
{
- int i;
unsigned long largest = getpagesize();
+ int i;
+ glob_t g;
- for (i = 0; i < num_page_sizes; i++) {
- if (page_sizes[i] > largest)
+ glob("/sys/kernel/mm/hugepages/hugepages-*kB", 0, NULL, &g);
+ assert(g.gl_pathc <= NUM_PAGESIZES);
+ for (i = 0; (i < g.gl_pathc) && (num_page_sizes < NUM_PAGESIZES); i++) {
+ sscanf(g.gl_pathv[i], "/sys/kernel/mm/hugepages/hugepages-%lukB",
+ &page_sizes[num_page_sizes]);
+ page_sizes[num_page_sizes] <<= 10;
+ ksft_print_msg("Found %luMB\n", page_sizes[i] >> 20);
+
+ if (page_sizes[num_page_sizes] > largest)
largest = page_sizes[i];
- if (read_free(page_sizes[i]) < NUM_PAGES) {
- printf("Not enough huge pages for page size %lu MB, need %u\n",
- page_sizes[i] >> 20,
- NUM_PAGES);
- exit(0);
- }
+ if (read_free(page_sizes[num_page_sizes]) >= NUM_PAGES)
+ num_page_sizes++;
+ else
+ ksft_print_msg("SKIP for size %lu MB as not enough huge pages, need %u\n",
+ page_sizes[num_page_sizes] >> 20, NUM_PAGES);
}
+ globfree(&g);
- if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest) {
- printf("Please do echo %lu > /proc/sys/kernel/shmmax", largest * NUM_PAGES);
- exit(0);
- }
+ if (read_sysfs(0, "/proc/sys/kernel/shmmax") < NUM_PAGES * largest)
+ ksft_exit_fail_msg("Please do echo %lu > /proc/sys/kernel/shmmax",
+ largest * NUM_PAGES);
#if defined(__x86_64__)
if (largest != 1U<<30) {
- printf("No GB pages available on x86-64\n"
- "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
- exit(0);
+ ksft_exit_fail_msg("No GB pages available on x86-64\n"
+ "Please boot with hugepagesz=1G hugepages=%d\n", NUM_PAGES);
}
#endif
}
int main(void)
{
- int i;
unsigned default_hps = default_huge_page_size();
+ int i;
+
+ ksft_print_header();
find_pagesizes();
- sanity_checks();
+ if (!num_page_sizes)
+ ksft_finished();
+
+ ksft_set_plan(2 * num_page_sizes + 3);
for (i = 0; i < num_page_sizes; i++) {
unsigned long ps = page_sizes[i];
int arg = ilog2(ps) << MAP_HUGE_SHIFT;
- printf("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
+
+ ksft_print_msg("Testing %luMB mmap with shift %x\n", ps >> 20, arg);
test_mmap(ps, MAP_HUGETLB | arg);
}
- printf("Testing default huge mmap\n");
- test_mmap(default_hps, SHM_HUGETLB);
- puts("Testing non-huge shmget");
+ ksft_print_msg("Testing default huge mmap\n");
+ test_mmap(default_hps, MAP_HUGETLB);
+
+ ksft_print_msg("Testing non-huge shmget\n");
test_shmget(getpagesize(), 0);
for (i = 0; i < num_page_sizes; i++) {
unsigned long ps = page_sizes[i];
int arg = ilog2(ps) << SHM_HUGE_SHIFT;
- printf("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
+ ksft_print_msg("Testing %luMB shmget with shift %x\n", ps >> 20, arg);
test_shmget(ps, SHM_HUGETLB | arg);
}
- puts("default huge shmget");
+
+ ksft_print_msg("default huge shmget\n");
test_shmget(default_hps, SHM_HUGETLB);
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/transhuge-stress.c b/tools/testing/selftests/mm/transhuge-stress.c
new file mode 100644
index 000000000000..68201192e37c
--- /dev/null
+++ b/tools/testing/selftests/mm/transhuge-stress.c
@@ -0,0 +1,138 @@
+/*
+ * Stress test for transparent huge pages, memory compaction and migration.
+ *
+ * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
+ *
+ * This is free and unencumbered software released into the public domain.
+ */
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <err.h>
+#include <time.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <string.h>
+#include <sys/mman.h>
+#include "vm_util.h"
+#include "../kselftest.h"
+
+int backing_fd = -1;
+int mmap_flags = MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE;
+#define PROT_RW (PROT_READ | PROT_WRITE)
+
+int main(int argc, char **argv)
+{
+ size_t ram, len;
+ void *ptr, *p;
+ struct timespec start, a, b;
+ int i = 0;
+ char *name = NULL;
+ double s;
+ uint8_t *map;
+ size_t map_len;
+ int pagemap_fd;
+ int duration = 0;
+
+ ksft_print_header();
+
+ ram = sysconf(_SC_PHYS_PAGES);
+ if (ram > SIZE_MAX / psize() / 4)
+ ram = SIZE_MAX / 4;
+ else
+ ram *= psize();
+ len = ram;
+
+ while (++i < argc) {
+ if (!strcmp(argv[i], "-h"))
+ ksft_exit_fail_msg("usage: %s [-f <filename>] [-d <duration>] [size in MiB]\n",
+ argv[0]);
+ else if (!strcmp(argv[i], "-f"))
+ name = argv[++i];
+ else if (!strcmp(argv[i], "-d"))
+ duration = atoi(argv[++i]);
+ else
+ len = atoll(argv[i]) << 20;
+ }
+
+ ksft_set_plan(1);
+
+ if (name) {
+ backing_fd = open(name, O_RDWR);
+ if (backing_fd == -1)
+ ksft_exit_fail_msg("open %s\n", name);
+ mmap_flags = MAP_SHARED;
+ }
+
+ warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
+ " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
+ ram >> (20 + HPAGE_SHIFT - pshift() - 1));
+
+ pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
+ if (pagemap_fd < 0)
+ ksft_exit_fail_msg("open pagemap\n");
+
+ len -= len % HPAGE_SIZE;
+ ptr = mmap(NULL, len + HPAGE_SIZE, PROT_RW, mmap_flags, backing_fd, 0);
+ if (ptr == MAP_FAILED)
+ ksft_exit_fail_msg("initial mmap");
+ ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
+
+ if (madvise(ptr, len, MADV_HUGEPAGE))
+ ksft_exit_fail_msg("MADV_HUGEPAGE");
+
+ map_len = ram >> (HPAGE_SHIFT - 1);
+ map = malloc(map_len);
+ if (!map)
+ ksft_exit_fail_msg("map malloc\n");
+
+ clock_gettime(CLOCK_MONOTONIC, &start);
+
+ while (1) {
+ int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
+
+ memset(map, 0, map_len);
+
+ clock_gettime(CLOCK_MONOTONIC, &a);
+ for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
+ int64_t pfn;
+
+ pfn = allocate_transhuge(p, pagemap_fd);
+
+ if (pfn < 0) {
+ nr_failed++;
+ } else {
+ size_t idx = pfn >> (HPAGE_SHIFT - pshift());
+
+ nr_succeed++;
+ if (idx >= map_len) {
+ map = realloc(map, idx + 1);
+ if (!map)
+ ksft_exit_fail_msg("map realloc\n");
+ memset(map + map_len, 0, idx + 1 - map_len);
+ map_len = idx + 1;
+ }
+ if (!map[idx])
+ nr_pages++;
+ map[idx] = 1;
+ }
+
+ /* split transhuge page, keep last page */
+ if (madvise(p, HPAGE_SIZE - psize(), MADV_DONTNEED))
+ ksft_exit_fail_msg("MADV_DONTNEED");
+ }
+ clock_gettime(CLOCK_MONOTONIC, &b);
+ s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
+
+ ksft_print_msg("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
+ "%4d succeed, %4d failed, %4d different pages\n",
+ s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
+ nr_succeed, nr_failed, nr_pages);
+
+ if (duration > 0 && b.tv_sec - start.tv_sec >= duration) {
+ ksft_test_result_pass("Completed\n");
+ ksft_finished();
+ }
+ }
+}
diff --git a/tools/testing/selftests/mm/uffd-common.c b/tools/testing/selftests/mm/uffd-common.c
new file mode 100644
index 000000000000..b0ac0ec2356d
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-common.c
@@ -0,0 +1,715 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd tests util functions
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+
+#include "uffd-common.h"
+
+#define BASE_PMD_ADDR ((void *)(1UL << 30))
+
+volatile bool test_uffdio_copy_eexist = true;
+unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
+int uffd = -1, uffd_flags, finished, *pipefd, test_type;
+bool map_shared;
+bool test_uffdio_wp = true;
+unsigned long long *count_verify;
+uffd_test_ops_t *uffd_test_ops;
+uffd_test_case_ops_t *uffd_test_case_ops;
+
+static int uffd_mem_fd_create(off_t mem_size, bool hugetlb)
+{
+ unsigned int memfd_flags = 0;
+ int mem_fd;
+
+ if (hugetlb)
+ memfd_flags = MFD_HUGETLB;
+ mem_fd = memfd_create("uffd-test", memfd_flags);
+ if (mem_fd < 0)
+ err("memfd_create");
+ if (ftruncate(mem_fd, mem_size))
+ err("ftruncate");
+ if (fallocate(mem_fd,
+ FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, 0,
+ mem_size))
+ err("fallocate");
+
+ return mem_fd;
+}
+
+static void anon_release_pages(char *rel_area)
+{
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+}
+
+static int anon_allocate_area(void **alloc_area, bool is_src)
+{
+ *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ return -errno;
+ }
+ return 0;
+}
+
+static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+}
+
+static void hugetlb_release_pages(char *rel_area)
+{
+ if (!map_shared) {
+ if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+ } else {
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
+ }
+}
+
+static int hugetlb_allocate_area(void **alloc_area, bool is_src)
+{
+ off_t size = nr_pages * page_size;
+ off_t offset = is_src ? 0 : size;
+ void *area_alias = NULL;
+ char **alloc_area_alias;
+ int mem_fd = uffd_mem_fd_create(size * 2, true);
+
+ *alloc_area = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ (map_shared ? MAP_SHARED : MAP_PRIVATE) |
+ (is_src ? 0 : MAP_NORESERVE),
+ mem_fd, offset);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ return -errno;
+ }
+
+ if (map_shared) {
+ area_alias = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_SHARED, mem_fd, offset);
+ if (area_alias == MAP_FAILED)
+ return -errno;
+ }
+
+ if (is_src) {
+ alloc_area_alias = &area_src_alias;
+ } else {
+ alloc_area_alias = &area_dst_alias;
+ }
+ if (area_alias)
+ *alloc_area_alias = area_alias;
+
+ close(mem_fd);
+ return 0;
+}
+
+static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ if (!map_shared)
+ return;
+
+ *start = (unsigned long) area_dst_alias + offset;
+}
+
+static void shmem_release_pages(char *rel_area)
+{
+ if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE))
+ err("madvise(MADV_REMOVE) failed");
+}
+
+static int shmem_allocate_area(void **alloc_area, bool is_src)
+{
+ void *area_alias = NULL;
+ size_t bytes = nr_pages * page_size, hpage_size = read_pmd_pagesize();
+ unsigned long offset = is_src ? 0 : bytes;
+ char *p = NULL, *p_alias = NULL;
+ int mem_fd = uffd_mem_fd_create(bytes * 2, false);
+
+ /* TODO: clean this up. Use a static addr is ugly */
+ p = BASE_PMD_ADDR;
+ if (!is_src)
+ /* src map + alias + interleaved hpages */
+ p += 2 * (bytes + hpage_size);
+ p_alias = p;
+ p_alias += bytes;
+ p_alias += hpage_size; /* Prevent src/dst VMA merge */
+
+ *alloc_area = mmap(p, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ mem_fd, offset);
+ if (*alloc_area == MAP_FAILED) {
+ *alloc_area = NULL;
+ return -errno;
+ }
+ if (*alloc_area != p)
+ err("mmap of memfd failed at %p", p);
+
+ area_alias = mmap(p_alias, bytes, PROT_READ | PROT_WRITE, MAP_SHARED,
+ mem_fd, offset);
+ if (area_alias == MAP_FAILED) {
+ munmap(*alloc_area, bytes);
+ *alloc_area = NULL;
+ return -errno;
+ }
+ if (area_alias != p_alias)
+ err("mmap of anonymous memory failed at %p", p_alias);
+
+ if (is_src)
+ area_src_alias = area_alias;
+ else
+ area_dst_alias = area_alias;
+
+ close(mem_fd);
+ return 0;
+}
+
+static void shmem_alias_mapping(__u64 *start, size_t len, unsigned long offset)
+{
+ *start = (unsigned long)area_dst_alias + offset;
+}
+
+static void shmem_check_pmd_mapping(void *p, int expect_nr_hpages)
+{
+ if (!check_huge_shmem(area_dst_alias, expect_nr_hpages,
+ read_pmd_pagesize()))
+ err("Did not find expected %d number of hugepages",
+ expect_nr_hpages);
+}
+
+struct uffd_test_ops anon_uffd_test_ops = {
+ .allocate_area = anon_allocate_area,
+ .release_pages = anon_release_pages,
+ .alias_mapping = noop_alias_mapping,
+ .check_pmd_mapping = NULL,
+};
+
+struct uffd_test_ops shmem_uffd_test_ops = {
+ .allocate_area = shmem_allocate_area,
+ .release_pages = shmem_release_pages,
+ .alias_mapping = shmem_alias_mapping,
+ .check_pmd_mapping = shmem_check_pmd_mapping,
+};
+
+struct uffd_test_ops hugetlb_uffd_test_ops = {
+ .allocate_area = hugetlb_allocate_area,
+ .release_pages = hugetlb_release_pages,
+ .alias_mapping = hugetlb_alias_mapping,
+ .check_pmd_mapping = NULL,
+};
+
+void uffd_stats_report(struct uffd_args *args, int n_cpus)
+{
+ int i;
+ unsigned long long miss_total = 0, wp_total = 0, minor_total = 0;
+
+ for (i = 0; i < n_cpus; i++) {
+ miss_total += args[i].missing_faults;
+ wp_total += args[i].wp_faults;
+ minor_total += args[i].minor_faults;
+ }
+
+ printf("userfaults: ");
+ if (miss_total) {
+ printf("%llu missing (", miss_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].missing_faults);
+ printf("\b) ");
+ }
+ if (wp_total) {
+ printf("%llu wp (", wp_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].wp_faults);
+ printf("\b) ");
+ }
+ if (minor_total) {
+ printf("%llu minor (", minor_total);
+ for (i = 0; i < n_cpus; i++)
+ printf("%lu+", args[i].minor_faults);
+ printf("\b)");
+ }
+ printf("\n");
+}
+
+int userfaultfd_open(uint64_t *features)
+{
+ struct uffdio_api uffdio_api;
+
+ uffd = uffd_open(UFFD_FLAGS);
+ if (uffd < 0)
+ return -1;
+ uffd_flags = fcntl(uffd, F_GETFD, NULL);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = *features;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ /* Probably lack of CAP_PTRACE? */
+ return -1;
+ if (uffdio_api.api != UFFD_API)
+ err("UFFDIO_API error: %" PRIu64, (uint64_t)uffdio_api.api);
+
+ *features = uffdio_api.features;
+ return 0;
+}
+
+static inline void munmap_area(void **area)
+{
+ if (*area)
+ if (munmap(*area, nr_pages * page_size))
+ err("munmap");
+
+ *area = NULL;
+}
+
+void uffd_test_ctx_clear(void)
+{
+ size_t i;
+
+ if (pipefd) {
+ for (i = 0; i < nr_cpus * 2; ++i) {
+ if (close(pipefd[i]))
+ err("close pipefd");
+ }
+ free(pipefd);
+ pipefd = NULL;
+ }
+
+ if (count_verify) {
+ free(count_verify);
+ count_verify = NULL;
+ }
+
+ if (uffd != -1) {
+ if (close(uffd))
+ err("close uffd");
+ uffd = -1;
+ }
+
+ munmap_area((void **)&area_src);
+ munmap_area((void **)&area_src_alias);
+ munmap_area((void **)&area_dst);
+ munmap_area((void **)&area_dst_alias);
+ munmap_area((void **)&area_remap);
+}
+
+int uffd_test_ctx_init(uint64_t features, const char **errmsg)
+{
+ unsigned long nr, cpu;
+ int ret;
+
+ if (uffd_test_case_ops && uffd_test_case_ops->pre_alloc) {
+ ret = uffd_test_case_ops->pre_alloc(errmsg);
+ if (ret)
+ return ret;
+ }
+
+ ret = uffd_test_ops->allocate_area((void **)&area_src, true);
+ ret |= uffd_test_ops->allocate_area((void **)&area_dst, false);
+ if (ret) {
+ if (errmsg)
+ *errmsg = "memory allocation failed";
+ return ret;
+ }
+
+ if (uffd_test_case_ops && uffd_test_case_ops->post_alloc) {
+ ret = uffd_test_case_ops->post_alloc(errmsg);
+ if (ret)
+ return ret;
+ }
+
+ ret = userfaultfd_open(&features);
+ if (ret) {
+ if (errmsg)
+ *errmsg = "possible lack of priviledge";
+ return ret;
+ }
+
+ count_verify = malloc(nr_pages * sizeof(unsigned long long));
+ if (!count_verify)
+ err("count_verify");
+
+ for (nr = 0; nr < nr_pages; nr++) {
+ *area_mutex(area_src, nr) =
+ (pthread_mutex_t)PTHREAD_MUTEX_INITIALIZER;
+ count_verify[nr] = *area_count(area_src, nr) = 1;
+ /*
+ * In the transition between 255 to 256, powerpc will
+ * read out of order in my_bcmp and see both bytes as
+ * zero, so leave a placeholder below always non-zero
+ * after the count, to avoid my_bcmp to trigger false
+ * positives.
+ */
+ *(area_count(area_src, nr) + 1) = 1;
+ }
+
+ /*
+ * After initialization of area_src, we must explicitly release pages
+ * for area_dst to make sure it's fully empty. Otherwise we could have
+ * some area_dst pages be errornously initialized with zero pages,
+ * hence we could hit memory corruption later in the test.
+ *
+ * One example is when THP is globally enabled, above allocate_area()
+ * calls could have the two areas merged into a single VMA (as they
+ * will have the same VMA flags so they're mergeable). When we
+ * initialize the area_src above, it's possible that some part of
+ * area_dst could have been faulted in via one huge THP that will be
+ * shared between area_src and area_dst. It could cause some of the
+ * area_dst won't be trapped by missing userfaults.
+ *
+ * This release_pages() will guarantee even if that happened, we'll
+ * proactively split the thp and drop any accidentally initialized
+ * pages within area_dst.
+ */
+ uffd_test_ops->release_pages(area_dst);
+
+ pipefd = malloc(sizeof(int) * nr_cpus * 2);
+ if (!pipefd)
+ err("pipefd");
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pipe2(&pipefd[cpu * 2], O_CLOEXEC | O_NONBLOCK))
+ err("pipe");
+
+ return 0;
+}
+
+void wp_range(int ufd, __u64 start, __u64 len, bool wp)
+{
+ struct uffdio_writeprotect prms;
+
+ /* Write protection page faults */
+ prms.range.start = start;
+ prms.range.len = len;
+ /* Undo write-protect, do wakeup after that */
+ prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
+
+ if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
+ err("clear WP failed: address=0x%"PRIx64, (uint64_t)start);
+}
+
+static void continue_range(int ufd, __u64 start, __u64 len, bool wp)
+{
+ struct uffdio_continue req;
+ int ret;
+
+ req.range.start = start;
+ req.range.len = len;
+ req.mode = 0;
+ if (wp)
+ req.mode |= UFFDIO_CONTINUE_MODE_WP;
+
+ if (ioctl(ufd, UFFDIO_CONTINUE, &req))
+ err("UFFDIO_CONTINUE failed for address 0x%" PRIx64,
+ (uint64_t)start);
+
+ /*
+ * Error handling within the kernel for continue is subtly different
+ * from copy or zeropage, so it may be a source of bugs. Trigger an
+ * error (-EEXIST) on purpose, to verify doing so doesn't cause a BUG.
+ */
+ req.mapped = 0;
+ ret = ioctl(ufd, UFFDIO_CONTINUE, &req);
+ if (ret >= 0 || req.mapped != -EEXIST)
+ err("failed to exercise UFFDIO_CONTINUE error handling, ret=%d, mapped=%" PRId64,
+ ret, (int64_t) req.mapped);
+}
+
+int uffd_read_msg(int ufd, struct uffd_msg *msg)
+{
+ int ret = read(uffd, msg, sizeof(*msg));
+
+ if (ret != sizeof(*msg)) {
+ if (ret < 0) {
+ if (errno == EAGAIN || errno == EINTR)
+ return 1;
+ err("blocking read error");
+ } else {
+ err("short read");
+ }
+ }
+
+ return 0;
+}
+
+void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
+ /* Write protect page faults */
+ wp_range(uffd, msg->arg.pagefault.address, page_size, false);
+ args->wp_faults++;
+ } else if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_MINOR) {
+ uint8_t *area;
+ int b;
+
+ /*
+ * Minor page faults
+ *
+ * To prove we can modify the original range for testing
+ * purposes, we're going to bit flip this range before
+ * continuing.
+ *
+ * Note that this requires all minor page fault tests operate on
+ * area_dst (non-UFFD-registered) and area_dst_alias
+ * (UFFD-registered).
+ */
+
+ area = (uint8_t *)(area_dst +
+ ((char *)msg->arg.pagefault.address -
+ area_dst_alias));
+ for (b = 0; b < page_size; ++b)
+ area[b] = ~area[b];
+ continue_range(uffd, msg->arg.pagefault.address, page_size,
+ args->apply_wp);
+ args->minor_faults++;
+ } else {
+ /*
+ * Missing page faults.
+ *
+ * Here we force a write check for each of the missing mode
+ * faults. It's guaranteed because the only threads that
+ * will trigger uffd faults are the locking threads, and
+ * their first instruction to touch the missing page will
+ * always be pthread_mutex_lock().
+ *
+ * Note that here we relied on an NPTL glibc impl detail to
+ * always read the lock type at the entry of the lock op
+ * (pthread_mutex_t.__data.__type, offset 0x10) before
+ * doing any locking operations to guarantee that. It's
+ * actually not good to rely on this impl detail because
+ * logically a pthread-compatible lib can implement the
+ * locks without types and we can fail when linking with
+ * them. However since we used to find bugs with this
+ * strict check we still keep it around. Hopefully this
+ * could be a good hint when it fails again. If one day
+ * it'll break on some other impl of glibc we'll revisit.
+ */
+ if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
+ err("unexpected write fault");
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+
+ if (copy_page(uffd, offset, args->apply_wp))
+ args->missing_faults++;
+ }
+}
+
+void *uffd_poll_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *)arg;
+ unsigned long cpu = args->cpu;
+ struct pollfd pollfd[2];
+ struct uffd_msg msg;
+ struct uffdio_register uffd_reg;
+ int ret;
+ char tmp_chr;
+
+ if (!args->handle_fault)
+ args->handle_fault = uffd_handle_page_fault;
+
+ pollfd[0].fd = uffd;
+ pollfd[0].events = POLLIN;
+ pollfd[1].fd = pipefd[cpu*2];
+ pollfd[1].events = POLLIN;
+
+ for (;;) {
+ ret = poll(pollfd, 2, -1);
+ if (ret <= 0) {
+ if (errno == EINTR || errno == EAGAIN)
+ continue;
+ err("poll error: %d", ret);
+ }
+ if (pollfd[1].revents) {
+ if (!(pollfd[1].revents & POLLIN))
+ err("pollfd[1].revents %d", pollfd[1].revents);
+ if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
+ err("read pipefd error");
+ break;
+ }
+ if (!(pollfd[0].revents & POLLIN))
+ err("pollfd[0].revents %d", pollfd[0].revents);
+ if (uffd_read_msg(uffd, &msg))
+ continue;
+ switch (msg.event) {
+ default:
+ err("unexpected msg event %u\n", msg.event);
+ break;
+ case UFFD_EVENT_PAGEFAULT:
+ args->handle_fault(&msg, args);
+ break;
+ case UFFD_EVENT_FORK:
+ close(uffd);
+ uffd = msg.arg.fork.ufd;
+ pollfd[0].fd = uffd;
+ break;
+ case UFFD_EVENT_REMOVE:
+ uffd_reg.range.start = msg.arg.remove.start;
+ uffd_reg.range.len = msg.arg.remove.end -
+ msg.arg.remove.start;
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
+ err("remove failure");
+ break;
+ case UFFD_EVENT_REMAP:
+ area_remap = area_dst; /* save for later unmap */
+ area_dst = (char *)(unsigned long)msg.arg.remap.to;
+ break;
+ }
+ }
+
+ return NULL;
+}
+
+static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
+ unsigned long offset)
+{
+ uffd_test_ops->alias_mapping(&uffdio_copy->dst,
+ uffdio_copy->len,
+ offset);
+ if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy->copy != -EEXIST)
+ err("UFFDIO_COPY retry error: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ } else {
+ err("UFFDIO_COPY retry unexpected: %"PRId64,
+ (int64_t)uffdio_copy->copy);
+ }
+}
+
+static void wake_range(int ufd, unsigned long addr, unsigned long len)
+{
+ struct uffdio_range uffdio_wake;
+
+ uffdio_wake.start = addr;
+ uffdio_wake.len = len;
+
+ if (ioctl(ufd, UFFDIO_WAKE, &uffdio_wake))
+ fprintf(stderr, "error waking %lu\n",
+ addr), exit(1);
+}
+
+int __copy_page(int ufd, unsigned long offset, bool retry, bool wp)
+{
+ struct uffdio_copy uffdio_copy;
+
+ if (offset >= nr_pages * page_size)
+ err("unexpected offset %lu\n", offset);
+ uffdio_copy.dst = (unsigned long) area_dst + offset;
+ uffdio_copy.src = (unsigned long) area_src + offset;
+ uffdio_copy.len = page_size;
+ if (wp)
+ uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
+ else
+ uffdio_copy.mode = 0;
+ uffdio_copy.copy = 0;
+ if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
+ /* real retval in ufdio_copy.copy */
+ if (uffdio_copy.copy != -EEXIST)
+ err("UFFDIO_COPY error: %"PRId64,
+ (int64_t)uffdio_copy.copy);
+ wake_range(ufd, uffdio_copy.dst, page_size);
+ } else if (uffdio_copy.copy != page_size) {
+ err("UFFDIO_COPY error: %"PRId64, (int64_t)uffdio_copy.copy);
+ } else {
+ if (test_uffdio_copy_eexist && retry) {
+ test_uffdio_copy_eexist = false;
+ retry_copy_page(ufd, &uffdio_copy, offset);
+ }
+ return 1;
+ }
+ return 0;
+}
+
+int copy_page(int ufd, unsigned long offset, bool wp)
+{
+ return __copy_page(ufd, offset, false, wp);
+}
+
+int move_page(int ufd, unsigned long offset, unsigned long len)
+{
+ struct uffdio_move uffdio_move;
+
+ if (offset + len > nr_pages * page_size)
+ err("unexpected offset %lu and length %lu\n", offset, len);
+ uffdio_move.dst = (unsigned long) area_dst + offset;
+ uffdio_move.src = (unsigned long) area_src + offset;
+ uffdio_move.len = len;
+ uffdio_move.mode = UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES;
+ uffdio_move.move = 0;
+ if (ioctl(ufd, UFFDIO_MOVE, &uffdio_move)) {
+ /* real retval in uffdio_move.move */
+ if (uffdio_move.move != -EEXIST)
+ err("UFFDIO_MOVE error: %"PRId64,
+ (int64_t)uffdio_move.move);
+ wake_range(ufd, uffdio_move.dst, len);
+ } else if (uffdio_move.move != len) {
+ err("UFFDIO_MOVE error: %"PRId64, (int64_t)uffdio_move.move);
+ } else
+ return 1;
+ return 0;
+}
+
+int uffd_open_dev(unsigned int flags)
+{
+ int fd, uffd;
+
+ fd = open("/dev/userfaultfd", O_RDWR | O_CLOEXEC);
+ if (fd < 0)
+ return fd;
+ uffd = ioctl(fd, USERFAULTFD_IOC_NEW, flags);
+ close(fd);
+
+ return uffd;
+}
+
+int uffd_open_sys(unsigned int flags)
+{
+#ifdef __NR_userfaultfd
+ return syscall(__NR_userfaultfd, flags);
+#else
+ return -1;
+#endif
+}
+
+int uffd_open(unsigned int flags)
+{
+ int uffd = uffd_open_sys(flags);
+
+ if (uffd < 0)
+ uffd = uffd_open_dev(flags);
+
+ return uffd;
+}
+
+int uffd_get_features(uint64_t *features)
+{
+ struct uffdio_api uffdio_api = { .api = UFFD_API, .features = 0 };
+ /*
+ * This should by default work in most kernels; the feature list
+ * will be the same no matter what we pass in here.
+ */
+ int fd = uffd_open(UFFD_USER_MODE_ONLY);
+
+ if (fd < 0)
+ /* Maybe the kernel is older than user-only mode? */
+ fd = uffd_open(0);
+
+ if (fd < 0)
+ return fd;
+
+ if (ioctl(fd, UFFDIO_API, &uffdio_api)) {
+ close(fd);
+ return -errno;
+ }
+
+ *features = uffdio_api.features;
+ close(fd);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/mm/uffd-common.h b/tools/testing/selftests/mm/uffd-common.h
new file mode 100644
index 000000000000..cb055282c89c
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-common.h
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd tests common header
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+#ifndef __UFFD_COMMON_H__
+#define __UFFD_COMMON_H__
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <time.h>
+#include <signal.h>
+#include <poll.h>
+#include <string.h>
+#include <linux/mman.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <pthread.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <inttypes.h>
+#include <stdint.h>
+#include <sys/random.h>
+
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define UFFD_FLAGS (O_CLOEXEC | O_NONBLOCK | UFFD_USER_MODE_ONLY)
+
+#define _err(fmt, ...) \
+ do { \
+ int ret = errno; \
+ fprintf(stderr, "ERROR: " fmt, ##__VA_ARGS__); \
+ fprintf(stderr, " (errno=%d, @%s:%d)\n", \
+ ret, __FILE__, __LINE__); \
+ } while (0)
+
+#define errexit(exitcode, fmt, ...) \
+ do { \
+ _err(fmt, ##__VA_ARGS__); \
+ exit(exitcode); \
+ } while (0)
+
+#define err(fmt, ...) errexit(1, fmt, ##__VA_ARGS__)
+
+/* pthread_mutex_t starts at page offset 0 */
+#define area_mutex(___area, ___nr) \
+ ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
+/*
+ * count is placed in the page after pthread_mutex_t naturally aligned
+ * to avoid non alignment faults on non-x86 archs.
+ */
+#define area_count(___area, ___nr) \
+ ((volatile unsigned long long *) ((unsigned long) \
+ ((___area) + (___nr)*page_size + \
+ sizeof(pthread_mutex_t) + \
+ sizeof(unsigned long long) - 1) & \
+ ~(unsigned long)(sizeof(unsigned long long) \
+ - 1)))
+
+/* Userfaultfd test statistics */
+struct uffd_args {
+ int cpu;
+ /* Whether apply wr-protects when installing pages */
+ bool apply_wp;
+ unsigned long missing_faults;
+ unsigned long wp_faults;
+ unsigned long minor_faults;
+
+ /* A custom fault handler; defaults to uffd_handle_page_fault. */
+ void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args);
+};
+
+struct uffd_test_ops {
+ int (*allocate_area)(void **alloc_area, bool is_src);
+ void (*release_pages)(char *rel_area);
+ void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
+ void (*check_pmd_mapping)(void *p, int expect_nr_hpages);
+};
+typedef struct uffd_test_ops uffd_test_ops_t;
+
+struct uffd_test_case_ops {
+ int (*pre_alloc)(const char **errmsg);
+ int (*post_alloc)(const char **errmsg);
+};
+typedef struct uffd_test_case_ops uffd_test_case_ops_t;
+
+extern unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
+extern char *area_src, *area_src_alias, *area_dst, *area_dst_alias, *area_remap;
+extern int uffd, uffd_flags, finished, *pipefd, test_type;
+extern bool map_shared;
+extern bool test_uffdio_wp;
+extern unsigned long long *count_verify;
+extern volatile bool test_uffdio_copy_eexist;
+
+extern uffd_test_ops_t anon_uffd_test_ops;
+extern uffd_test_ops_t shmem_uffd_test_ops;
+extern uffd_test_ops_t hugetlb_uffd_test_ops;
+extern uffd_test_ops_t *uffd_test_ops;
+extern uffd_test_case_ops_t *uffd_test_case_ops;
+
+void uffd_stats_report(struct uffd_args *args, int n_cpus);
+int uffd_test_ctx_init(uint64_t features, const char **errmsg);
+void uffd_test_ctx_clear(void);
+int userfaultfd_open(uint64_t *features);
+int uffd_read_msg(int ufd, struct uffd_msg *msg);
+void wp_range(int ufd, __u64 start, __u64 len, bool wp);
+void uffd_handle_page_fault(struct uffd_msg *msg, struct uffd_args *args);
+int __copy_page(int ufd, unsigned long offset, bool retry, bool wp);
+int copy_page(int ufd, unsigned long offset, bool wp);
+int move_page(int ufd, unsigned long offset, unsigned long len);
+void *uffd_poll_thread(void *arg);
+
+int uffd_open_dev(unsigned int flags);
+int uffd_open_sys(unsigned int flags);
+int uffd_open(unsigned int flags);
+int uffd_get_features(uint64_t *features);
+
+#define TEST_ANON 1
+#define TEST_HUGETLB 2
+#define TEST_SHMEM 3
+
+#endif
diff --git a/tools/testing/selftests/mm/uffd-stress.c b/tools/testing/selftests/mm/uffd-stress.c
new file mode 100644
index 000000000000..f78bab0f3d45
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-stress.c
@@ -0,0 +1,480 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Stress userfaultfd syscall.
+ *
+ * Copyright (C) 2015 Red Hat, Inc.
+ *
+ * This test allocates two virtual areas and bounces the physical
+ * memory across the two virtual areas (from area_src to area_dst)
+ * using userfaultfd.
+ *
+ * There are three threads running per CPU:
+ *
+ * 1) one per-CPU thread takes a per-page pthread_mutex in a random
+ * page of the area_dst (while the physical page may still be in
+ * area_src), and increments a per-page counter in the same page,
+ * and checks its value against a verification region.
+ *
+ * 2) another per-CPU thread handles the userfaults generated by
+ * thread 1 above. userfaultfd blocking reads or poll() modes are
+ * exercised interleaved.
+ *
+ * 3) one last per-CPU thread transfers the memory in the background
+ * at maximum bandwidth (if not already transferred by thread
+ * 2). Each cpu thread takes cares of transferring a portion of the
+ * area.
+ *
+ * When all threads of type 3 completed the transfer, one bounce is
+ * complete. area_src and area_dst are then swapped. All threads are
+ * respawned and so the bounce is immediately restarted in the
+ * opposite direction.
+ *
+ * per-CPU threads 1 by triggering userfaults inside
+ * pthread_mutex_lock will also verify the atomicity of the memory
+ * transfer (UFFDIO_COPY).
+ */
+
+#include "uffd-common.h"
+
+#ifdef __NR_userfaultfd
+
+#define BOUNCE_RANDOM (1<<0)
+#define BOUNCE_RACINGFAULTS (1<<1)
+#define BOUNCE_VERIFY (1<<2)
+#define BOUNCE_POLL (1<<3)
+static int bounces;
+
+/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
+#define ALARM_INTERVAL_SECS 10
+static char *zeropage;
+pthread_attr_t attr;
+
+#define swap(a, b) \
+ do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+const char *examples =
+ "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
+ "./uffd-stress anon 100 99999\n\n"
+ "# Run share memory test on 1GiB region with 99 bounces:\n"
+ "./uffd-stress shmem 1000 99\n\n"
+ "# Run hugetlb memory test on 256MiB region with 50 bounces:\n"
+ "./uffd-stress hugetlb 256 50\n\n"
+ "# Run the same hugetlb test but using private file:\n"
+ "./uffd-stress hugetlb-private 256 50\n\n"
+ "# 10MiB-~6GiB 999 bounces anonymous test, "
+ "continue forever unless an error triggers\n"
+ "while ./uffd-stress anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
+
+static void usage(void)
+{
+ fprintf(stderr, "\nUsage: ./uffd-stress <test type> <MiB> <bounces>\n\n");
+ fprintf(stderr, "Supported <test type>: anon, hugetlb, "
+ "hugetlb-private, shmem, shmem-private\n\n");
+ fprintf(stderr, "Examples:\n\n");
+ fprintf(stderr, "%s", examples);
+ exit(1);
+}
+
+static void uffd_stats_reset(struct uffd_args *args, unsigned long n_cpus)
+{
+ int i;
+
+ for (i = 0; i < n_cpus; i++) {
+ args[i].cpu = i;
+ args[i].apply_wp = test_uffdio_wp;
+ args[i].missing_faults = 0;
+ args[i].wp_faults = 0;
+ args[i].minor_faults = 0;
+ }
+}
+
+static void *locking_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr;
+ unsigned long long count;
+
+ if (!(bounces & BOUNCE_RANDOM)) {
+ page_nr = -bounces;
+ if (!(bounces & BOUNCE_RACINGFAULTS))
+ page_nr += cpu * nr_pages_per_cpu;
+ }
+
+ while (!finished) {
+ if (bounces & BOUNCE_RANDOM) {
+ if (getrandom(&page_nr, sizeof(page_nr), 0) != sizeof(page_nr))
+ err("getrandom failed");
+ } else
+ page_nr += 1;
+ page_nr %= nr_pages;
+ pthread_mutex_lock(area_mutex(area_dst, page_nr));
+ count = *area_count(area_dst, page_nr);
+ if (count != count_verify[page_nr])
+ err("page_nr %lu memory corruption %llu %llu",
+ page_nr, count, count_verify[page_nr]);
+ count++;
+ *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
+ pthread_mutex_unlock(area_mutex(area_dst, page_nr));
+ }
+
+ return NULL;
+}
+
+static int copy_page_retry(int ufd, unsigned long offset)
+{
+ return __copy_page(ufd, offset, true, test_uffdio_wp);
+}
+
+pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
+
+static void *uffd_read_thread(void *arg)
+{
+ struct uffd_args *args = (struct uffd_args *)arg;
+ struct uffd_msg msg;
+
+ pthread_mutex_unlock(&uffd_read_mutex);
+ /* from here cancellation is ok */
+
+ for (;;) {
+ if (uffd_read_msg(uffd, &msg))
+ continue;
+ uffd_handle_page_fault(&msg, args);
+ }
+
+ return NULL;
+}
+
+static void *background_thread(void *arg)
+{
+ unsigned long cpu = (unsigned long) arg;
+ unsigned long page_nr, start_nr, mid_nr, end_nr;
+
+ start_nr = cpu * nr_pages_per_cpu;
+ end_nr = (cpu+1) * nr_pages_per_cpu;
+ mid_nr = (start_nr + end_nr) / 2;
+
+ /* Copy the first half of the pages */
+ for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
+ copy_page_retry(uffd, page_nr * page_size);
+
+ /*
+ * If we need to test uffd-wp, set it up now. Then we'll have
+ * at least the first half of the pages mapped already which
+ * can be write-protected for testing
+ */
+ if (test_uffdio_wp)
+ wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
+ nr_pages_per_cpu * page_size, true);
+
+ /*
+ * Continue the 2nd half of the page copying, handling write
+ * protection faults if any
+ */
+ for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
+ copy_page_retry(uffd, page_nr * page_size);
+
+ return NULL;
+}
+
+static int stress(struct uffd_args *args)
+{
+ unsigned long cpu;
+ pthread_t locking_threads[nr_cpus];
+ pthread_t uffd_threads[nr_cpus];
+ pthread_t background_threads[nr_cpus];
+
+ finished = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ if (pthread_create(&locking_threads[cpu], &attr,
+ locking_thread, (void *)cpu))
+ return 1;
+ if (bounces & BOUNCE_POLL) {
+ if (pthread_create(&uffd_threads[cpu], &attr, uffd_poll_thread, &args[cpu]))
+ err("uffd_poll_thread create");
+ } else {
+ if (pthread_create(&uffd_threads[cpu], &attr,
+ uffd_read_thread,
+ (void *)&args[cpu]))
+ return 1;
+ pthread_mutex_lock(&uffd_read_mutex);
+ }
+ if (pthread_create(&background_threads[cpu], &attr,
+ background_thread, (void *)cpu))
+ return 1;
+ }
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(background_threads[cpu], NULL))
+ return 1;
+
+ /*
+ * Be strict and immediately zap area_src, the whole area has
+ * been transferred already by the background treads. The
+ * area_src could then be faulted in a racy way by still
+ * running uffdio_threads reading zeropages after we zapped
+ * area_src (but they're guaranteed to get -EEXIST from
+ * UFFDIO_COPY without writing zero pages into area_dst
+ * because the background threads already completed).
+ */
+ uffd_test_ops->release_pages(area_src);
+
+ finished = 1;
+ for (cpu = 0; cpu < nr_cpus; cpu++)
+ if (pthread_join(locking_threads[cpu], NULL))
+ return 1;
+
+ for (cpu = 0; cpu < nr_cpus; cpu++) {
+ char c;
+ if (bounces & BOUNCE_POLL) {
+ if (write(pipefd[cpu*2+1], &c, 1) != 1)
+ err("pipefd write error");
+ if (pthread_join(uffd_threads[cpu],
+ (void *)&args[cpu]))
+ return 1;
+ } else {
+ if (pthread_cancel(uffd_threads[cpu]))
+ return 1;
+ if (pthread_join(uffd_threads[cpu], NULL))
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int userfaultfd_stress(void)
+{
+ void *area;
+ unsigned long nr;
+ struct uffd_args args[nr_cpus];
+ uint64_t mem_size = nr_pages * page_size;
+
+ memset(args, 0, sizeof(struct uffd_args) * nr_cpus);
+
+ if (uffd_test_ctx_init(UFFD_FEATURE_WP_UNPOPULATED, NULL))
+ err("context init failed");
+
+ if (posix_memalign(&area, page_size, page_size))
+ err("out of memory");
+ zeropage = area;
+ bzero(zeropage, page_size);
+
+ pthread_mutex_lock(&uffd_read_mutex);
+
+ pthread_attr_init(&attr);
+ pthread_attr_setstacksize(&attr, 16*1024*1024);
+
+ while (bounces--) {
+ printf("bounces: %d, mode:", bounces);
+ if (bounces & BOUNCE_RANDOM)
+ printf(" rnd");
+ if (bounces & BOUNCE_RACINGFAULTS)
+ printf(" racing");
+ if (bounces & BOUNCE_VERIFY)
+ printf(" ver");
+ if (bounces & BOUNCE_POLL)
+ printf(" poll");
+ else
+ printf(" read");
+ printf(", ");
+ fflush(stdout);
+
+ if (bounces & BOUNCE_POLL)
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ else
+ fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
+
+ /* register */
+ if (uffd_register(uffd, area_dst, mem_size,
+ true, test_uffdio_wp, false))
+ err("register failure");
+
+ if (area_dst_alias) {
+ if (uffd_register(uffd, area_dst_alias, mem_size,
+ true, test_uffdio_wp, false))
+ err("register failure alias");
+ }
+
+ /*
+ * The madvise done previously isn't enough: some
+ * uffd_thread could have read userfaults (one of
+ * those already resolved by the background thread)
+ * and it may be in the process of calling
+ * UFFDIO_COPY. UFFDIO_COPY will read the zapped
+ * area_src and it would map a zero page in it (of
+ * course such a UFFDIO_COPY is perfectly safe as it'd
+ * return -EEXIST). The problem comes at the next
+ * bounce though: that racing UFFDIO_COPY would
+ * generate zeropages in the area_src, so invalidating
+ * the previous MADV_DONTNEED. Without this additional
+ * MADV_DONTNEED those zeropages leftovers in the
+ * area_src would lead to -EEXIST failure during the
+ * next bounce, effectively leaving a zeropage in the
+ * area_dst.
+ *
+ * Try to comment this out madvise to see the memory
+ * corruption being caught pretty quick.
+ *
+ * khugepaged is also inhibited to collapse THP after
+ * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
+ * required to MADV_DONTNEED here.
+ */
+ uffd_test_ops->release_pages(area_dst);
+
+ uffd_stats_reset(args, nr_cpus);
+
+ /* bounce pass */
+ if (stress(args)) {
+ uffd_test_ctx_clear();
+ return 1;
+ }
+
+ /* Clear all the write protections if there is any */
+ if (test_uffdio_wp)
+ wp_range(uffd, (unsigned long)area_dst,
+ nr_pages * page_size, false);
+
+ /* unregister */
+ if (uffd_unregister(uffd, area_dst, mem_size))
+ err("unregister failure");
+ if (area_dst_alias) {
+ if (uffd_unregister(uffd, area_dst_alias, mem_size))
+ err("unregister failure alias");
+ }
+
+ /* verification */
+ if (bounces & BOUNCE_VERIFY)
+ for (nr = 0; nr < nr_pages; nr++)
+ if (*area_count(area_dst, nr) != count_verify[nr])
+ err("error area_count %llu %llu %lu\n",
+ *area_count(area_src, nr),
+ count_verify[nr], nr);
+
+ /* prepare next bounce */
+ swap(area_src, area_dst);
+
+ swap(area_src_alias, area_dst_alias);
+
+ uffd_stats_report(args, nr_cpus);
+ }
+ uffd_test_ctx_clear();
+
+ return 0;
+}
+
+static void set_test_type(const char *type)
+{
+ if (!strcmp(type, "anon")) {
+ test_type = TEST_ANON;
+ uffd_test_ops = &anon_uffd_test_ops;
+ } else if (!strcmp(type, "hugetlb")) {
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ map_shared = true;
+ } else if (!strcmp(type, "hugetlb-private")) {
+ test_type = TEST_HUGETLB;
+ uffd_test_ops = &hugetlb_uffd_test_ops;
+ } else if (!strcmp(type, "shmem")) {
+ map_shared = true;
+ test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ } else if (!strcmp(type, "shmem-private")) {
+ test_type = TEST_SHMEM;
+ uffd_test_ops = &shmem_uffd_test_ops;
+ }
+}
+
+static void parse_test_type_arg(const char *raw_type)
+{
+ uint64_t features = UFFD_API_FEATURES;
+
+ set_test_type(raw_type);
+
+ if (!test_type)
+ err("failed to parse test type argument: '%s'", raw_type);
+
+ if (test_type == TEST_HUGETLB)
+ page_size = default_huge_page_size();
+ else
+ page_size = sysconf(_SC_PAGE_SIZE);
+
+ if (!page_size)
+ err("Unable to determine page size");
+ if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
+ > page_size)
+ err("Impossible to run this test");
+
+ /*
+ * Whether we can test certain features depends not just on test type,
+ * but also on whether or not this particular kernel supports the
+ * feature.
+ */
+
+ if (userfaultfd_open(&features))
+ err("Userfaultfd open failed");
+
+ test_uffdio_wp = test_uffdio_wp &&
+ (features & UFFD_FEATURE_PAGEFAULT_FLAG_WP);
+
+ close(uffd);
+ uffd = -1;
+}
+
+static void sigalrm(int sig)
+{
+ if (sig != SIGALRM)
+ abort();
+ test_uffdio_copy_eexist = true;
+ alarm(ALARM_INTERVAL_SECS);
+}
+
+int main(int argc, char **argv)
+{
+ size_t bytes;
+
+ if (argc < 4)
+ usage();
+
+ if (signal(SIGALRM, sigalrm) == SIG_ERR)
+ err("failed to arm SIGALRM");
+ alarm(ALARM_INTERVAL_SECS);
+
+ parse_test_type_arg(argv[1]);
+ bytes = atol(argv[2]) * 1024 * 1024;
+
+ if (test_type == TEST_HUGETLB &&
+ get_free_hugepages() < bytes / page_size) {
+ printf("skip: Skipping userfaultfd... not enough hugepages\n");
+ return KSFT_SKIP;
+ }
+
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+
+ nr_pages_per_cpu = bytes / page_size / nr_cpus;
+ if (!nr_pages_per_cpu) {
+ _err("invalid MiB");
+ usage();
+ }
+
+ bounces = atoi(argv[3]);
+ if (bounces <= 0) {
+ _err("invalid bounces");
+ usage();
+ }
+ nr_pages = nr_pages_per_cpu * nr_cpus;
+
+ printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
+ nr_pages, nr_pages_per_cpu);
+ return userfaultfd_stress();
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/mm/uffd-unit-tests.c b/tools/testing/selftests/mm/uffd-unit-tests.c
new file mode 100644
index 000000000000..2b9f8cc52639
--- /dev/null
+++ b/tools/testing/selftests/mm/uffd-unit-tests.c
@@ -0,0 +1,1556 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Userfaultfd unit tests.
+ *
+ * Copyright (C) 2015-2023 Red Hat, Inc.
+ */
+
+#include "uffd-common.h"
+
+#include "../../../../mm/gup_test.h"
+
+#ifdef __NR_userfaultfd
+
+/* The unit test doesn't need a large or random size, make it 32MB for now */
+#define UFFD_TEST_MEM_SIZE (32UL << 20)
+
+#define MEM_ANON BIT_ULL(0)
+#define MEM_SHMEM BIT_ULL(1)
+#define MEM_SHMEM_PRIVATE BIT_ULL(2)
+#define MEM_HUGETLB BIT_ULL(3)
+#define MEM_HUGETLB_PRIVATE BIT_ULL(4)
+
+#define MEM_ALL (MEM_ANON | MEM_SHMEM | MEM_SHMEM_PRIVATE | \
+ MEM_HUGETLB | MEM_HUGETLB_PRIVATE)
+
+#define ALIGN_UP(x, align_to) \
+ ((__typeof__(x))((((unsigned long)(x)) + ((align_to)-1)) & ~((align_to)-1)))
+
+struct mem_type {
+ const char *name;
+ unsigned int mem_flag;
+ uffd_test_ops_t *mem_ops;
+ bool shared;
+};
+typedef struct mem_type mem_type_t;
+
+mem_type_t mem_types[] = {
+ {
+ .name = "anon",
+ .mem_flag = MEM_ANON,
+ .mem_ops = &anon_uffd_test_ops,
+ .shared = false,
+ },
+ {
+ .name = "shmem",
+ .mem_flag = MEM_SHMEM,
+ .mem_ops = &shmem_uffd_test_ops,
+ .shared = true,
+ },
+ {
+ .name = "shmem-private",
+ .mem_flag = MEM_SHMEM_PRIVATE,
+ .mem_ops = &shmem_uffd_test_ops,
+ .shared = false,
+ },
+ {
+ .name = "hugetlb",
+ .mem_flag = MEM_HUGETLB,
+ .mem_ops = &hugetlb_uffd_test_ops,
+ .shared = true,
+ },
+ {
+ .name = "hugetlb-private",
+ .mem_flag = MEM_HUGETLB_PRIVATE,
+ .mem_ops = &hugetlb_uffd_test_ops,
+ .shared = false,
+ },
+};
+
+/* Arguments to be passed over to each uffd unit test */
+struct uffd_test_args {
+ mem_type_t *mem_type;
+};
+typedef struct uffd_test_args uffd_test_args_t;
+
+/* Returns: UFFD_TEST_* */
+typedef void (*uffd_test_fn)(uffd_test_args_t *);
+
+typedef struct {
+ const char *name;
+ uffd_test_fn uffd_fn;
+ unsigned int mem_targets;
+ uint64_t uffd_feature_required;
+ uffd_test_case_ops_t *test_case_ops;
+} uffd_test_case_t;
+
+static void uffd_test_report(void)
+{
+ printf("Userfaults unit tests: pass=%u, skip=%u, fail=%u (total=%u)\n",
+ ksft_get_pass_cnt(),
+ ksft_get_xskip_cnt(),
+ ksft_get_fail_cnt(),
+ ksft_test_num());
+}
+
+static void uffd_test_pass(void)
+{
+ printf("done\n");
+ ksft_inc_pass_cnt();
+}
+
+#define uffd_test_start(...) do { \
+ printf("Testing "); \
+ printf(__VA_ARGS__); \
+ printf("... "); \
+ fflush(stdout); \
+ } while (0)
+
+#define uffd_test_fail(...) do { \
+ printf("failed [reason: "); \
+ printf(__VA_ARGS__); \
+ printf("]\n"); \
+ ksft_inc_fail_cnt(); \
+ } while (0)
+
+static void uffd_test_skip(const char *message)
+{
+ printf("skipped [reason: %s]\n", message);
+ ksft_inc_xskip_cnt();
+}
+
+/*
+ * Returns 1 if specific userfaultfd supported, 0 otherwise. Note, we'll
+ * return 1 even if some test failed as long as uffd supported, because in
+ * that case we still want to proceed with the rest uffd unit tests.
+ */
+static int test_uffd_api(bool use_dev)
+{
+ struct uffdio_api uffdio_api;
+ int uffd;
+
+ uffd_test_start("UFFDIO_API (with %s)",
+ use_dev ? "/dev/userfaultfd" : "syscall");
+
+ if (use_dev)
+ uffd = uffd_open_dev(UFFD_FLAGS);
+ else
+ uffd = uffd_open_sys(UFFD_FLAGS);
+ if (uffd < 0) {
+ uffd_test_skip("cannot open userfaultfd handle");
+ return 0;
+ }
+
+ /* Test wrong UFFD_API */
+ uffdio_api.api = 0xab;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should fail with wrong api but didn't");
+ goto out;
+ }
+
+ /* Test wrong feature bit */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = BIT_ULL(63);
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should fail with wrong feature but didn't");
+ goto out;
+ }
+
+ /* Test normal UFFDIO_API */
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
+ uffd_test_fail("UFFDIO_API should succeed but failed");
+ goto out;
+ }
+
+ /* Test double requests of UFFDIO_API with a random feature set */
+ uffdio_api.features = BIT_ULL(0);
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api) == 0) {
+ uffd_test_fail("UFFDIO_API should reject initialized uffd");
+ goto out;
+ }
+
+ uffd_test_pass();
+out:
+ close(uffd);
+ /* We have a valid uffd handle */
+ return 1;
+}
+
+/*
+ * This function initializes the global variables. TODO: remove global
+ * vars and then remove this.
+ */
+static int
+uffd_setup_environment(uffd_test_args_t *args, uffd_test_case_t *test,
+ mem_type_t *mem_type, const char **errmsg)
+{
+ map_shared = mem_type->shared;
+ uffd_test_ops = mem_type->mem_ops;
+ uffd_test_case_ops = test->test_case_ops;
+
+ if (mem_type->mem_flag & (MEM_HUGETLB_PRIVATE | MEM_HUGETLB))
+ page_size = default_huge_page_size();
+ else
+ page_size = psize();
+
+ nr_pages = UFFD_TEST_MEM_SIZE / page_size;
+ /* TODO: remove this global var.. it's so ugly */
+ nr_cpus = 1;
+
+ /* Initialize test arguments */
+ args->mem_type = mem_type;
+
+ return uffd_test_ctx_init(test->uffd_feature_required, errmsg);
+}
+
+static bool uffd_feature_supported(uffd_test_case_t *test)
+{
+ uint64_t features;
+
+ if (uffd_get_features(&features))
+ return false;
+
+ return (features & test->uffd_feature_required) ==
+ test->uffd_feature_required;
+}
+
+static int pagemap_open(void)
+{
+ int fd = open("/proc/self/pagemap", O_RDONLY);
+
+ if (fd < 0)
+ err("open pagemap");
+
+ return fd;
+}
+
+/* This macro let __LINE__ works in err() */
+#define pagemap_check_wp(value, wp) do { \
+ if (!!(value & PM_UFFD_WP) != wp) \
+ err("pagemap uffd-wp bit error: 0x%"PRIx64, value); \
+ } while (0)
+
+typedef struct {
+ int parent_uffd, child_uffd;
+} fork_event_args;
+
+static void *fork_event_consumer(void *data)
+{
+ fork_event_args *args = data;
+ struct uffd_msg msg = { 0 };
+
+ /* Read until a full msg received */
+ while (uffd_read_msg(args->parent_uffd, &msg));
+
+ if (msg.event != UFFD_EVENT_FORK)
+ err("wrong message: %u\n", msg.event);
+
+ /* Just to be properly freed later */
+ args->child_uffd = msg.arg.fork.ufd;
+ return NULL;
+}
+
+typedef struct {
+ int gup_fd;
+ bool pinned;
+} pin_args;
+
+/*
+ * Returns 0 if succeed, <0 for errors. pin_pages() needs to be paired
+ * with unpin_pages(). Currently it needs to be RO longterm pin to satisfy
+ * all needs of the test cases (e.g., trigger unshare, trigger fork() early
+ * CoW, etc.).
+ */
+static int pin_pages(pin_args *args, void *buffer, size_t size)
+{
+ struct pin_longterm_test test = {
+ .addr = (uintptr_t)buffer,
+ .size = size,
+ /* Read-only pins */
+ .flags = 0,
+ };
+
+ if (args->pinned)
+ err("already pinned");
+
+ args->gup_fd = open("/sys/kernel/debug/gup_test", O_RDWR);
+ if (args->gup_fd < 0)
+ return -errno;
+
+ if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_START, &test)) {
+ /* Even if gup_test existed, can be an old gup_test / kernel */
+ close(args->gup_fd);
+ return -errno;
+ }
+ args->pinned = true;
+ return 0;
+}
+
+static void unpin_pages(pin_args *args)
+{
+ if (!args->pinned)
+ err("unpin without pin first");
+ if (ioctl(args->gup_fd, PIN_LONGTERM_TEST_STOP))
+ err("PIN_LONGTERM_TEST_STOP");
+ close(args->gup_fd);
+ args->pinned = false;
+}
+
+static int pagemap_test_fork(int uffd, bool with_event, bool test_pin)
+{
+ fork_event_args args = { .parent_uffd = uffd, .child_uffd = -1 };
+ pthread_t thread;
+ pid_t child;
+ uint64_t value;
+ int fd, result;
+
+ /* Prepare a thread to resolve EVENT_FORK */
+ if (with_event) {
+ if (pthread_create(&thread, NULL, fork_event_consumer, &args))
+ err("pthread_create()");
+ }
+
+ child = fork();
+ if (!child) {
+ /* Open the pagemap fd of the child itself */
+ pin_args args = {};
+
+ fd = pagemap_open();
+
+ if (test_pin && pin_pages(&args, area_dst, page_size))
+ /*
+ * Normally when reach here we have pinned in
+ * previous tests, so shouldn't fail anymore
+ */
+ err("pin page failed in child");
+
+ value = pagemap_get_entry(fd, area_dst);
+ /*
+ * After fork(), we should handle uffd-wp bit differently:
+ *
+ * (1) when with EVENT_FORK, it should persist
+ * (2) when without EVENT_FORK, it should be dropped
+ */
+ pagemap_check_wp(value, with_event);
+ if (test_pin)
+ unpin_pages(&args);
+ /* Succeed */
+ exit(0);
+ }
+ waitpid(child, &result, 0);
+
+ if (with_event) {
+ if (pthread_join(thread, NULL))
+ err("pthread_join()");
+ if (args.child_uffd < 0)
+ err("Didn't receive child uffd");
+ close(args.child_uffd);
+ }
+
+ return result;
+}
+
+static void uffd_wp_unpopulated_test(uffd_test_args_t *args)
+{
+ uint64_t value;
+ int pagemap_fd;
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Test applying pte marker to anon unpopulated */
+ wp_range(uffd, (uint64_t)area_dst, page_size, true);
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+
+ /* Test unprotect on anon pte marker */
+ wp_range(uffd, (uint64_t)area_dst, page_size, false);
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Test zap on anon marker */
+ wp_range(uffd, (uint64_t)area_dst, page_size, true);
+ if (madvise(area_dst, page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Test fault in after marker removed */
+ *area_dst = 1;
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+ /* Drop it to make pte none again */
+ if (madvise(area_dst, page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+
+ /* Test read-zero-page upon pte marker */
+ wp_range(uffd, (uint64_t)area_dst, page_size, true);
+ *(volatile char *)area_dst;
+ /* Drop it to make pte none again */
+ if (madvise(area_dst, page_size, MADV_DONTNEED))
+ err("madvise(MADV_DONTNEED) failed");
+
+ uffd_test_pass();
+}
+
+static void uffd_wp_fork_test_common(uffd_test_args_t *args,
+ bool with_event)
+{
+ int pagemap_fd;
+ uint64_t value;
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *area_dst = 1;
+ wp_range(uffd, (uint64_t)area_dst, page_size, true);
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ if (pagemap_test_fork(uffd, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in child in present pte",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+
+ /*
+ * This is an attempt for zapping the pgtable so as to test the
+ * markers.
+ *
+ * For private mappings, PAGEOUT will only work on exclusive ptes
+ * (PM_MMAP_EXCLUSIVE) which we should satisfy.
+ *
+ * For shared, PAGEOUT may not work. Use DONTNEED instead which
+ * plays a similar role of zapping (rather than freeing the page)
+ * to expose pte markers.
+ */
+ if (args->mem_type->shared) {
+ if (madvise(area_dst, page_size, MADV_DONTNEED))
+ err("MADV_DONTNEED");
+ } else {
+ /*
+ * NOTE: ignore retval because private-hugetlb doesn't yet
+ * support swapping, so it could fail.
+ */
+ madvise(area_dst, page_size, MADV_PAGEOUT);
+ }
+
+ /* Uffd-wp should persist even swapped out */
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, true);
+ if (pagemap_test_fork(uffd, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in child in zapped pte",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+
+ /* Unprotect; this tests swap pte modifications */
+ wp_range(uffd, (uint64_t)area_dst, page_size, false);
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+
+ /* Fault in the page from disk */
+ *area_dst = 2;
+ value = pagemap_get_entry(pagemap_fd, area_dst);
+ pagemap_check_wp(value, false);
+ uffd_test_pass();
+out:
+ if (uffd_unregister(uffd, area_dst, nr_pages * page_size))
+ err("unregister failed");
+ close(pagemap_fd);
+}
+
+static void uffd_wp_fork_test(uffd_test_args_t *args)
+{
+ uffd_wp_fork_test_common(args, false);
+}
+
+static void uffd_wp_fork_with_event_test(uffd_test_args_t *args)
+{
+ uffd_wp_fork_test_common(args, true);
+}
+
+static void uffd_wp_fork_pin_test_common(uffd_test_args_t *args,
+ bool with_event)
+{
+ int pagemap_fd;
+ pin_args pin_args = {};
+
+ if (uffd_register(uffd, area_dst, page_size, false, true, false))
+ err("register failed");
+
+ pagemap_fd = pagemap_open();
+
+ /* Touch the page */
+ *area_dst = 1;
+ wp_range(uffd, (uint64_t)area_dst, page_size, true);
+
+ /*
+ * 1. First pin, then fork(). This tests fork() special path when
+ * doing early CoW if the page is private.
+ */
+ if (pin_pages(&pin_args, area_dst, page_size)) {
+ uffd_test_skip("Possibly CONFIG_GUP_TEST missing "
+ "or unprivileged");
+ close(pagemap_fd);
+ uffd_unregister(uffd, area_dst, page_size);
+ return;
+ }
+
+ if (pagemap_test_fork(uffd, with_event, false)) {
+ uffd_test_fail("Detected %s uffd-wp bit in early CoW of fork()",
+ with_event ? "missing" : "stall");
+ unpin_pages(&pin_args);
+ goto out;
+ }
+
+ unpin_pages(&pin_args);
+
+ /*
+ * 2. First fork(), then pin (in the child, where test_pin==true).
+ * This tests COR, aka, page unsharing on private memories.
+ */
+ if (pagemap_test_fork(uffd, with_event, true)) {
+ uffd_test_fail("Detected %s uffd-wp bit when RO pin",
+ with_event ? "missing" : "stall");
+ goto out;
+ }
+ uffd_test_pass();
+out:
+ if (uffd_unregister(uffd, area_dst, page_size))
+ err("register failed");
+ close(pagemap_fd);
+}
+
+static void uffd_wp_fork_pin_test(uffd_test_args_t *args)
+{
+ uffd_wp_fork_pin_test_common(args, false);
+}
+
+static void uffd_wp_fork_pin_with_event_test(uffd_test_args_t *args)
+{
+ uffd_wp_fork_pin_test_common(args, true);
+}
+
+static void check_memory_contents(char *p)
+{
+ unsigned long i, j;
+ uint8_t expected_byte;
+
+ for (i = 0; i < nr_pages; ++i) {
+ expected_byte = ~((uint8_t)(i % ((uint8_t)-1)));
+ for (j = 0; j < page_size; j++) {
+ uint8_t v = *(uint8_t *)(p + (i * page_size) + j);
+ if (v != expected_byte)
+ err("unexpected page contents");
+ }
+ }
+}
+
+static void uffd_minor_test_common(bool test_collapse, bool test_wp)
+{
+ unsigned long p;
+ pthread_t uffd_mon;
+ char c;
+ struct uffd_args args = { 0 };
+
+ /*
+ * NOTE: MADV_COLLAPSE is not yet compatible with WP, so testing
+ * both do not make much sense.
+ */
+ assert(!(test_collapse && test_wp));
+
+ if (uffd_register(uffd, area_dst_alias, nr_pages * page_size,
+ /* NOTE! MADV_COLLAPSE may not work with uffd-wp */
+ false, test_wp, true))
+ err("register failure");
+
+ /*
+ * After registering with UFFD, populate the non-UFFD-registered side of
+ * the shared mapping. This should *not* trigger any UFFD minor faults.
+ */
+ for (p = 0; p < nr_pages; ++p)
+ memset(area_dst + (p * page_size), p % ((uint8_t)-1),
+ page_size);
+
+ args.apply_wp = test_wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a minor
+ * fault. uffd_poll_thread will resolve the fault by bit-flipping the
+ * page's contents, and then issuing a CONTINUE ioctl.
+ */
+ check_memory_contents(area_dst_alias);
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("join() failed");
+
+ if (test_collapse) {
+ if (madvise(area_dst_alias, nr_pages * page_size,
+ MADV_COLLAPSE)) {
+ /* It's fine to fail for this one... */
+ uffd_test_skip("MADV_COLLAPSE failed");
+ return;
+ }
+
+ uffd_test_ops->check_pmd_mapping(area_dst,
+ nr_pages * page_size /
+ read_pmd_pagesize());
+ /*
+ * This won't cause uffd-fault - it purely just makes sure there
+ * was no corruption.
+ */
+ check_memory_contents(area_dst_alias);
+ }
+
+ if (args.missing_faults != 0 || args.minor_faults != nr_pages)
+ uffd_test_fail("stats check error");
+ else
+ uffd_test_pass();
+}
+
+void uffd_minor_test(uffd_test_args_t *args)
+{
+ uffd_minor_test_common(false, false);
+}
+
+void uffd_minor_wp_test(uffd_test_args_t *args)
+{
+ uffd_minor_test_common(false, true);
+}
+
+void uffd_minor_collapse_test(uffd_test_args_t *args)
+{
+ uffd_minor_test_common(true, false);
+}
+
+static sigjmp_buf jbuf, *sigbuf;
+
+static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
+{
+ if (sig == SIGBUS) {
+ if (sigbuf)
+ siglongjmp(*sigbuf, 1);
+ abort();
+ }
+}
+
+/*
+ * For non-cooperative userfaultfd test we fork() a process that will
+ * generate pagefaults, will mremap the area monitored by the
+ * userfaultfd and at last this process will release the monitored
+ * area.
+ * For the anonymous and shared memory the area is divided into two
+ * parts, the first part is accessed before mremap, and the second
+ * part is accessed after mremap. Since hugetlbfs does not support
+ * mremap, the entire monitored area is accessed in a single pass for
+ * HUGETLB_TEST.
+ * The release of the pages currently generates event for shmem and
+ * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
+ * for hugetlb.
+ * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
+ * monitored area, generate pagefaults and test that signal is delivered.
+ * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
+ * test robustness use case - we release monitored area, fork a process
+ * that will generate pagefaults and verify signal is generated.
+ * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
+ * feature. Using monitor thread, verify no userfault events are generated.
+ */
+static int faulting_process(int signal_test, bool wp)
+{
+ unsigned long nr, i;
+ unsigned long long count;
+ unsigned long split_nr_pages;
+ unsigned long lastnr;
+ struct sigaction act;
+ volatile unsigned long signalled = 0;
+
+ split_nr_pages = (nr_pages + 1) / 2;
+
+ if (signal_test) {
+ sigbuf = &jbuf;
+ memset(&act, 0, sizeof(act));
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
+ lastnr = (unsigned long)-1;
+ }
+
+ for (nr = 0; nr < split_nr_pages; nr++) {
+ volatile int steps = 1;
+ unsigned long offset = nr * page_size;
+
+ if (signal_test) {
+ if (sigsetjmp(*sigbuf, 1) != 0) {
+ if (steps == 1 && nr == lastnr)
+ err("Signal repeated");
+
+ lastnr = nr;
+ if (signal_test == 1) {
+ if (steps == 1) {
+ /* This is a MISSING request */
+ steps++;
+ if (copy_page(uffd, offset, wp))
+ signalled++;
+ } else {
+ /* This is a WP request */
+ assert(steps == 2);
+ wp_range(uffd,
+ (__u64)area_dst +
+ offset,
+ page_size, false);
+ }
+ } else {
+ signalled++;
+ continue;
+ }
+ }
+ }
+
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
+ /*
+ * Trigger write protection if there is by writing
+ * the same value back.
+ */
+ *area_count(area_dst, nr) = count;
+ }
+
+ if (signal_test)
+ return signalled != split_nr_pages;
+
+ area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
+ MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
+ if (area_dst == MAP_FAILED)
+ err("mremap");
+ /* Reset area_src since we just clobbered it */
+ area_src = NULL;
+
+ for (; nr < nr_pages; nr++) {
+ count = *area_count(area_dst, nr);
+ if (count != count_verify[nr]) {
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[nr]);
+ }
+ /*
+ * Trigger write protection if there is by writing
+ * the same value back.
+ */
+ *area_count(area_dst, nr) = count;
+ }
+
+ uffd_test_ops->release_pages(area_dst);
+
+ for (nr = 0; nr < nr_pages; nr++)
+ for (i = 0; i < page_size; i++)
+ if (*(area_dst + nr * page_size + i) != 0)
+ err("page %lu offset %lu is not zero", nr, i);
+
+ return 0;
+}
+
+static void uffd_sigbus_test_common(bool wp)
+{
+ unsigned long userfaults;
+ pthread_t uffd_mon;
+ pid_t pid;
+ int err;
+ char c;
+ struct uffd_args args = { 0 };
+
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, wp, false))
+ err("register failure");
+
+ if (faulting_process(1, wp))
+ err("faulting process failed");
+
+ uffd_test_ops->release_pages(area_dst);
+
+ args.apply_wp = wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ pid = fork();
+ if (pid < 0)
+ err("fork");
+
+ if (!pid)
+ exit(faulting_process(2, wp));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, (void **)&userfaults))
+ err("pthread_join()");
+
+ if (userfaults)
+ uffd_test_fail("Signal test failed, userfaults: %ld", userfaults);
+ else
+ uffd_test_pass();
+}
+
+static void uffd_sigbus_test(uffd_test_args_t *args)
+{
+ uffd_sigbus_test_common(false);
+}
+
+static void uffd_sigbus_wp_test(uffd_test_args_t *args)
+{
+ uffd_sigbus_test_common(true);
+}
+
+static void uffd_events_test_common(bool wp)
+{
+ pthread_t uffd_mon;
+ pid_t pid;
+ int err;
+ char c;
+ struct uffd_args args = { 0 };
+
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, wp, false))
+ err("register failure");
+
+ args.apply_wp = wp;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ pid = fork();
+ if (pid < 0)
+ err("fork");
+
+ if (!pid)
+ exit(faulting_process(0, wp));
+
+ waitpid(pid, &err, 0);
+ if (err)
+ err("faulting process failed");
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("pthread_join()");
+
+ if (args.missing_faults != nr_pages)
+ uffd_test_fail("Fault counts wrong");
+ else
+ uffd_test_pass();
+}
+
+static void uffd_events_test(uffd_test_args_t *args)
+{
+ uffd_events_test_common(false);
+}
+
+static void uffd_events_wp_test(uffd_test_args_t *args)
+{
+ uffd_events_test_common(true);
+}
+
+static void retry_uffdio_zeropage(int ufd,
+ struct uffdio_zeropage *uffdio_zeropage)
+{
+ uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
+ uffdio_zeropage->range.len,
+ 0);
+ if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
+ if (uffdio_zeropage->zeropage != -EEXIST)
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
+ } else {
+ err("UFFDIO_ZEROPAGE error: %"PRId64,
+ (int64_t)uffdio_zeropage->zeropage);
+ }
+}
+
+static bool do_uffdio_zeropage(int ufd, bool has_zeropage)
+{
+ struct uffdio_zeropage uffdio_zeropage = { 0 };
+ int ret;
+ __s64 res;
+
+ uffdio_zeropage.range.start = (unsigned long) area_dst;
+ uffdio_zeropage.range.len = page_size;
+ uffdio_zeropage.mode = 0;
+ ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
+ res = uffdio_zeropage.zeropage;
+ if (ret) {
+ /* real retval in ufdio_zeropage.zeropage */
+ if (has_zeropage)
+ err("UFFDIO_ZEROPAGE error: %"PRId64, (int64_t)res);
+ else if (res != -EINVAL)
+ err("UFFDIO_ZEROPAGE not -EINVAL");
+ } else if (has_zeropage) {
+ if (res != page_size)
+ err("UFFDIO_ZEROPAGE unexpected size");
+ else
+ retry_uffdio_zeropage(ufd, &uffdio_zeropage);
+ return true;
+ } else
+ err("UFFDIO_ZEROPAGE succeeded");
+
+ return false;
+}
+
+/*
+ * Registers a range with MISSING mode only for zeropage test. Return true
+ * if UFFDIO_ZEROPAGE supported, false otherwise. Can't use uffd_register()
+ * because we want to detect .ioctls along the way.
+ */
+static bool
+uffd_register_detect_zeropage(int uffd, void *addr, uint64_t len)
+{
+ uint64_t ioctls = 0;
+
+ if (uffd_register_with_ioctls(uffd, addr, len, true,
+ false, false, &ioctls))
+ err("zeropage register fail");
+
+ return ioctls & (1 << _UFFDIO_ZEROPAGE);
+}
+
+/* exercise UFFDIO_ZEROPAGE */
+static void uffd_zeropage_test(uffd_test_args_t *args)
+{
+ bool has_zeropage;
+ int i;
+
+ has_zeropage = uffd_register_detect_zeropage(uffd, area_dst, page_size);
+ if (area_dst_alias)
+ /* Ignore the retval; we already have it */
+ uffd_register_detect_zeropage(uffd, area_dst_alias, page_size);
+
+ if (do_uffdio_zeropage(uffd, has_zeropage))
+ for (i = 0; i < page_size; i++)
+ if (area_dst[i] != 0)
+ err("data non-zero at offset %d\n", i);
+
+ if (uffd_unregister(uffd, area_dst, page_size))
+ err("unregister");
+
+ if (area_dst_alias && uffd_unregister(uffd, area_dst_alias, page_size))
+ err("unregister");
+
+ uffd_test_pass();
+}
+
+static void uffd_register_poison(int uffd, void *addr, uint64_t len)
+{
+ uint64_t ioctls = 0;
+ uint64_t expected = (1 << _UFFDIO_COPY) | (1 << _UFFDIO_POISON);
+
+ if (uffd_register_with_ioctls(uffd, addr, len, true,
+ false, false, &ioctls))
+ err("poison register fail");
+
+ if ((ioctls & expected) != expected)
+ err("registered area doesn't support COPY and POISON ioctls");
+}
+
+static void do_uffdio_poison(int uffd, unsigned long offset)
+{
+ struct uffdio_poison uffdio_poison = { 0 };
+ int ret;
+ __s64 res;
+
+ uffdio_poison.range.start = (unsigned long) area_dst + offset;
+ uffdio_poison.range.len = page_size;
+ uffdio_poison.mode = 0;
+ ret = ioctl(uffd, UFFDIO_POISON, &uffdio_poison);
+ res = uffdio_poison.updated;
+
+ if (ret)
+ err("UFFDIO_POISON error: %"PRId64, (int64_t)res);
+ else if (res != page_size)
+ err("UFFDIO_POISON unexpected size: %"PRId64, (int64_t)res);
+}
+
+static void uffd_poison_handle_fault(
+ struct uffd_msg *msg, struct uffd_args *args)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(page_size-1);
+
+ /* Odd pages -> copy zeroed page; even pages -> poison. */
+ if (offset & page_size)
+ copy_page(uffd, offset, false);
+ else
+ do_uffdio_poison(uffd, offset);
+}
+
+static void uffd_poison_test(uffd_test_args_t *targs)
+{
+ pthread_t uffd_mon;
+ char c;
+ struct uffd_args args = { 0 };
+ struct sigaction act = { 0 };
+ unsigned long nr_sigbus = 0;
+ unsigned long nr;
+
+ fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
+
+ uffd_register_poison(uffd, area_dst, nr_pages * page_size);
+ memset(area_src, 0, nr_pages * page_size);
+
+ args.handle_fault = uffd_poison_handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ sigbuf = &jbuf;
+ act.sa_sigaction = sighndl;
+ act.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGBUS, &act, 0))
+ err("sigaction");
+
+ for (nr = 0; nr < nr_pages; ++nr) {
+ unsigned long offset = nr * page_size;
+ const char *bytes = (const char *) area_dst + offset;
+ const char *i;
+
+ if (sigsetjmp(*sigbuf, 1)) {
+ /*
+ * Access below triggered a SIGBUS, which was caught by
+ * sighndl, which then jumped here. Count this SIGBUS,
+ * and move on to next page.
+ */
+ ++nr_sigbus;
+ continue;
+ }
+
+ for (i = bytes; i < bytes + page_size; ++i) {
+ if (*i)
+ err("nonzero byte in area_dst (%p) at %p: %u",
+ area_dst, i, *i);
+ }
+ }
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("pthread_join()");
+
+ if (nr_sigbus != nr_pages / 2)
+ err("expected to receive %lu SIGBUS, actually received %lu",
+ nr_pages / 2, nr_sigbus);
+
+ uffd_test_pass();
+}
+
+static void
+uffd_move_handle_fault_common(struct uffd_msg *msg, struct uffd_args *args,
+ unsigned long len)
+{
+ unsigned long offset;
+
+ if (msg->event != UFFD_EVENT_PAGEFAULT)
+ err("unexpected msg event %u", msg->event);
+
+ if (msg->arg.pagefault.flags &
+ (UFFD_PAGEFAULT_FLAG_WP | UFFD_PAGEFAULT_FLAG_MINOR | UFFD_PAGEFAULT_FLAG_WRITE))
+ err("unexpected fault type %llu", msg->arg.pagefault.flags);
+
+ offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
+ offset &= ~(len-1);
+
+ if (move_page(uffd, offset, len))
+ args->missing_faults++;
+}
+
+static void uffd_move_handle_fault(struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(msg, args, page_size);
+}
+
+static void uffd_move_pmd_handle_fault(struct uffd_msg *msg,
+ struct uffd_args *args)
+{
+ uffd_move_handle_fault_common(msg, args, read_pmd_pagesize());
+}
+
+static void
+uffd_move_test_common(uffd_test_args_t *targs, unsigned long chunk_size,
+ void (*handle_fault)(struct uffd_msg *msg, struct uffd_args *args))
+{
+ unsigned long nr;
+ pthread_t uffd_mon;
+ char c;
+ unsigned long long count;
+ struct uffd_args args = { 0 };
+ char *orig_area_src, *orig_area_dst;
+ unsigned long step_size, step_count;
+ unsigned long src_offs = 0;
+ unsigned long dst_offs = 0;
+
+ /* Prevent source pages from being mapped more than once */
+ if (madvise(area_src, nr_pages * page_size, MADV_DONTFORK))
+ err("madvise(MADV_DONTFORK) failure");
+
+ if (uffd_register(uffd, area_dst, nr_pages * page_size,
+ true, false, false))
+ err("register failure");
+
+ args.handle_fault = handle_fault;
+ if (pthread_create(&uffd_mon, NULL, uffd_poll_thread, &args))
+ err("uffd_poll_thread create");
+
+ step_size = chunk_size / page_size;
+ step_count = nr_pages / step_size;
+
+ if (chunk_size > page_size) {
+ char *aligned_src = ALIGN_UP(area_src, chunk_size);
+ char *aligned_dst = ALIGN_UP(area_dst, chunk_size);
+
+ if (aligned_src != area_src || aligned_dst != area_dst) {
+ src_offs = (aligned_src - area_src) / page_size;
+ dst_offs = (aligned_dst - area_dst) / page_size;
+ step_count--;
+ }
+ orig_area_src = area_src;
+ orig_area_dst = area_dst;
+ area_src = aligned_src;
+ area_dst = aligned_dst;
+ }
+
+ /*
+ * Read each of the pages back using the UFFD-registered mapping. We
+ * expect that the first time we touch a page, it will result in a missing
+ * fault. uffd_poll_thread will resolve the fault by moving source
+ * page to destination.
+ */
+ for (nr = 0; nr < step_count * step_size; nr += step_size) {
+ unsigned long i;
+
+ /* Check area_src content */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_src, nr + i);
+ if (count != count_verify[src_offs + nr + i])
+ err("nr %lu source memory invalid %llu %llu\n",
+ nr + i, count, count_verify[src_offs + nr + i]);
+ }
+
+ /* Faulting into area_dst should move the page or the huge page */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_dst, nr + i);
+ if (count != count_verify[dst_offs + nr + i])
+ err("nr %lu memory corruption %llu %llu\n",
+ nr, count, count_verify[dst_offs + nr + i]);
+ }
+
+ /* Re-check area_src content which should be empty */
+ for (i = 0; i < step_size; i++) {
+ count = *area_count(area_src, nr + i);
+ if (count != 0)
+ err("nr %lu move failed %llu %llu\n",
+ nr, count, count_verify[src_offs + nr + i]);
+ }
+ }
+ if (step_size > page_size) {
+ area_src = orig_area_src;
+ area_dst = orig_area_dst;
+ }
+
+ if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
+ err("pipe write");
+ if (pthread_join(uffd_mon, NULL))
+ err("join() failed");
+
+ if (args.missing_faults != step_count || args.minor_faults != 0)
+ uffd_test_fail("stats check error");
+ else
+ uffd_test_pass();
+}
+
+static void uffd_move_test(uffd_test_args_t *targs)
+{
+ uffd_move_test_common(targs, page_size, uffd_move_handle_fault);
+}
+
+static void uffd_move_pmd_test(uffd_test_args_t *targs)
+{
+ if (madvise(area_dst, nr_pages * page_size, MADV_HUGEPAGE))
+ err("madvise(MADV_HUGEPAGE) failure");
+ uffd_move_test_common(targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static void uffd_move_pmd_split_test(uffd_test_args_t *targs)
+{
+ if (madvise(area_dst, nr_pages * page_size, MADV_NOHUGEPAGE))
+ err("madvise(MADV_NOHUGEPAGE) failure");
+ uffd_move_test_common(targs, read_pmd_pagesize(),
+ uffd_move_pmd_handle_fault);
+}
+
+static int prevent_hugepages(const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(area_src, nr_pages * page_size, MADV_NOHUGEPAGE)) {
+ /* Ignore only if CONFIG_TRANSPARENT_HUGEPAGE=n */
+ if (errno != EINVAL) {
+ if (errmsg)
+ *errmsg = "madvise(MADV_NOHUGEPAGE) failed";
+ return -errno;
+ }
+ }
+ return 0;
+}
+
+static int request_hugepages(const char **errmsg)
+{
+ /* This should be done before source area is populated */
+ if (madvise(area_src, nr_pages * page_size, MADV_HUGEPAGE)) {
+ if (errmsg) {
+ *errmsg = (errno == EINVAL) ?
+ "CONFIG_TRANSPARENT_HUGEPAGE is not set" :
+ "madvise(MADV_HUGEPAGE) failed";
+ }
+ return -errno;
+ }
+ return 0;
+}
+
+struct uffd_test_case_ops uffd_move_test_case_ops = {
+ .post_alloc = prevent_hugepages,
+};
+
+struct uffd_test_case_ops uffd_move_test_pmd_case_ops = {
+ .post_alloc = request_hugepages,
+};
+
+/*
+ * Test the returned uffdio_register.ioctls with different register modes.
+ * Note that _UFFDIO_ZEROPAGE is tested separately in the zeropage test.
+ */
+static void
+do_register_ioctls_test(uffd_test_args_t *args, bool miss, bool wp, bool minor)
+{
+ uint64_t ioctls = 0, expected = BIT_ULL(_UFFDIO_WAKE);
+ mem_type_t *mem_type = args->mem_type;
+ int ret;
+
+ ret = uffd_register_with_ioctls(uffd, area_dst, page_size,
+ miss, wp, minor, &ioctls);
+
+ /*
+ * Handle special cases of UFFDIO_REGISTER here where it should
+ * just fail with -EINVAL first..
+ *
+ * Case 1: register MINOR on anon
+ * Case 2: register with no mode selected
+ */
+ if ((minor && (mem_type->mem_flag == MEM_ANON)) ||
+ (!miss && !wp && !minor)) {
+ if (ret != -EINVAL)
+ err("register (miss=%d, wp=%d, minor=%d) failed "
+ "with wrong errno=%d", miss, wp, minor, ret);
+ return;
+ }
+
+ /* UFFDIO_REGISTER should succeed, then check ioctls returned */
+ if (miss)
+ expected |= BIT_ULL(_UFFDIO_COPY);
+ if (wp)
+ expected |= BIT_ULL(_UFFDIO_WRITEPROTECT);
+ if (minor)
+ expected |= BIT_ULL(_UFFDIO_CONTINUE);
+
+ if ((ioctls & expected) != expected)
+ err("unexpected uffdio_register.ioctls "
+ "(miss=%d, wp=%d, minor=%d): expected=0x%"PRIx64", "
+ "returned=0x%"PRIx64, miss, wp, minor, expected, ioctls);
+
+ if (uffd_unregister(uffd, area_dst, page_size))
+ err("unregister");
+}
+
+static void uffd_register_ioctls_test(uffd_test_args_t *args)
+{
+ int miss, wp, minor;
+
+ for (miss = 0; miss <= 1; miss++)
+ for (wp = 0; wp <= 1; wp++)
+ for (minor = 0; minor <= 1; minor++)
+ do_register_ioctls_test(args, miss, wp, minor);
+
+ uffd_test_pass();
+}
+
+uffd_test_case_t uffd_tests[] = {
+ {
+ /* Test returned uffdio_register.ioctls. */
+ .name = "register-ioctls",
+ .uffd_fn = uffd_register_ioctls_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_MISSING_HUGETLBFS |
+ UFFD_FEATURE_MISSING_SHMEM |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ UFFD_FEATURE_MINOR_HUGETLBFS |
+ UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "zeropage",
+ .uffd_fn = uffd_zeropage_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = 0,
+ },
+ {
+ .name = "move",
+ .uffd_fn = uffd_move_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_case_ops,
+ },
+ {
+ .name = "move-pmd",
+ .uffd_fn = uffd_move_pmd_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
+ .name = "move-pmd-split",
+ .uffd_fn = uffd_move_pmd_split_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required = UFFD_FEATURE_MOVE,
+ .test_case_ops = &uffd_move_test_pmd_case_ops,
+ },
+ {
+ .name = "wp-fork",
+ .uffd_fn = uffd_wp_fork_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "wp-fork-with-event",
+ .uffd_fn = uffd_wp_fork_with_event_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ /* when set, child process should inherit uffd-wp bits */
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "wp-fork-pin",
+ .uffd_fn = uffd_wp_fork_pin_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "wp-fork-pin-with-event",
+ .uffd_fn = uffd_wp_fork_pin_with_event_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM |
+ /* when set, child process should inherit uffd-wp bits */
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "wp-unpopulated",
+ .uffd_fn = uffd_wp_unpopulated_test,
+ .mem_targets = MEM_ANON,
+ .uffd_feature_required =
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP | UFFD_FEATURE_WP_UNPOPULATED,
+ },
+ {
+ .name = "minor",
+ .uffd_fn = uffd_minor_test,
+ .mem_targets = MEM_SHMEM | MEM_HUGETLB,
+ .uffd_feature_required =
+ UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "minor-wp",
+ .uffd_fn = uffd_minor_wp_test,
+ .mem_targets = MEM_SHMEM | MEM_HUGETLB,
+ .uffd_feature_required =
+ UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ /*
+ * HACK: here we leveraged WP_UNPOPULATED to detect whether
+ * minor mode supports wr-protect. There's no feature flag
+ * for it so this is the best we can test against.
+ */
+ UFFD_FEATURE_WP_UNPOPULATED,
+ },
+ {
+ .name = "minor-collapse",
+ .uffd_fn = uffd_minor_collapse_test,
+ /* MADV_COLLAPSE only works with shmem */
+ .mem_targets = MEM_SHMEM,
+ /* We can't test MADV_COLLAPSE, so try our luck */
+ .uffd_feature_required = UFFD_FEATURE_MINOR_SHMEM,
+ },
+ {
+ .name = "sigbus",
+ .uffd_fn = uffd_sigbus_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_SIGBUS |
+ UFFD_FEATURE_EVENT_FORK,
+ },
+ {
+ .name = "sigbus-wp",
+ .uffd_fn = uffd_sigbus_wp_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_SIGBUS |
+ UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_PAGEFAULT_FLAG_WP,
+ },
+ {
+ .name = "events",
+ .uffd_fn = uffd_events_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
+ UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE,
+ },
+ {
+ .name = "events-wp",
+ .uffd_fn = uffd_events_wp_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_EVENT_FORK |
+ UFFD_FEATURE_EVENT_REMAP | UFFD_FEATURE_EVENT_REMOVE |
+ UFFD_FEATURE_PAGEFAULT_FLAG_WP |
+ UFFD_FEATURE_WP_HUGETLBFS_SHMEM,
+ },
+ {
+ .name = "poison",
+ .uffd_fn = uffd_poison_test,
+ .mem_targets = MEM_ALL,
+ .uffd_feature_required = UFFD_FEATURE_POISON,
+ },
+};
+
+static void usage(const char *prog)
+{
+ printf("usage: %s [-f TESTNAME]\n", prog);
+ puts("");
+ puts(" -f: test name to filter (e.g., event)");
+ puts(" -h: show the help msg");
+ puts(" -l: list tests only");
+ puts("");
+ exit(KSFT_FAIL);
+}
+
+int main(int argc, char *argv[])
+{
+ int n_tests = sizeof(uffd_tests) / sizeof(uffd_test_case_t);
+ int n_mems = sizeof(mem_types) / sizeof(mem_type_t);
+ const char *test_filter = NULL;
+ bool list_only = false;
+ uffd_test_case_t *test;
+ mem_type_t *mem_type;
+ uffd_test_args_t args;
+ const char *errmsg;
+ int has_uffd, opt;
+ int i, j;
+
+ while ((opt = getopt(argc, argv, "f:hl")) != -1) {
+ switch (opt) {
+ case 'f':
+ test_filter = optarg;
+ break;
+ case 'l':
+ list_only = true;
+ break;
+ case 'h':
+ default:
+ /* Unknown */
+ usage(argv[0]);
+ break;
+ }
+ }
+
+ if (!test_filter && !list_only) {
+ has_uffd = test_uffd_api(false);
+ has_uffd |= test_uffd_api(true);
+
+ if (!has_uffd) {
+ printf("Userfaultfd not supported or unprivileged, skip all tests\n");
+ exit(KSFT_SKIP);
+ }
+ }
+
+ for (i = 0; i < n_tests; i++) {
+ test = &uffd_tests[i];
+ if (test_filter && !strstr(test->name, test_filter))
+ continue;
+ if (list_only) {
+ printf("%s\n", test->name);
+ continue;
+ }
+ for (j = 0; j < n_mems; j++) {
+ mem_type = &mem_types[j];
+ if (!(test->mem_targets & mem_type->mem_flag))
+ continue;
+
+ uffd_test_start("%s on %s", test->name, mem_type->name);
+ if ((mem_type->mem_flag == MEM_HUGETLB ||
+ mem_type->mem_flag == MEM_HUGETLB_PRIVATE) &&
+ (default_huge_page_size() == 0)) {
+ uffd_test_skip("huge page size is 0, feature missing?");
+ continue;
+ }
+ if (!uffd_feature_supported(test)) {
+ uffd_test_skip("feature missing");
+ continue;
+ }
+ if (uffd_setup_environment(&args, test, mem_type,
+ &errmsg)) {
+ uffd_test_skip(errmsg);
+ continue;
+ }
+ test->uffd_fn(&args);
+ uffd_test_ctx_clear();
+ }
+ }
+
+ if (!list_only)
+ uffd_test_report();
+
+ return ksft_get_fail_cnt() ? KSFT_FAIL : KSFT_PASS;
+}
+
+#else /* __NR_userfaultfd */
+
+#warning "missing __NR_userfaultfd definition"
+
+int main(void)
+{
+ printf("Skipping %s (missing __NR_userfaultfd)\n", __file__);
+ return KSFT_SKIP;
+}
+
+#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/vm/va_128TBswitch.c b/tools/testing/selftests/mm/va_high_addr_switch.c
index 83acdff26a13..cfbc501290d3 100644
--- a/tools/testing/selftests/vm/va_128TBswitch.c
+++ b/tools/testing/selftests/mm/va_high_addr_switch.c
@@ -9,7 +9,7 @@
#include <sys/mman.h>
#include <string.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
#ifdef __powerpc64__
#define PAGE_SIZE (64 << 10)
@@ -17,18 +17,38 @@
* This will work with 16M and 2M hugepage size
*/
#define HUGETLB_SIZE (16 << 20)
+#elif __aarch64__
+/*
+ * The default hugepage size for 64k base pagesize
+ * is 512MB.
+ */
+#define PAGE_SIZE (64 << 10)
+#define HUGETLB_SIZE (512 << 20)
#else
#define PAGE_SIZE (4 << 10)
#define HUGETLB_SIZE (2 << 20)
#endif
/*
- * >= 128TB is the hint addr value we used to select
- * large address space.
+ * The hint addr value is used to allocate addresses
+ * beyond the high address switch boundary.
*/
-#define ADDR_SWITCH_HINT (1UL << 47)
+
+#define ADDR_MARK_128TB (1UL << 47)
+#define ADDR_MARK_256TB (1UL << 48)
+
+#define HIGH_ADDR_128TB ((void *) (1UL << 48))
+#define HIGH_ADDR_256TB ((void *) (1UL << 49))
+
#define LOW_ADDR ((void *) (1UL << 30))
-#define HIGH_ADDR ((void *) (1UL << 48))
+
+#ifdef __aarch64__
+#define ADDR_SWITCH_HINT ADDR_MARK_256TB
+#define HIGH_ADDR HIGH_ADDR_256TB
+#else
+#define ADDR_SWITCH_HINT ADDR_MARK_128TB
+#define HIGH_ADDR HIGH_ADDR_128TB
+#endif
struct testcase {
void *addr;
@@ -53,9 +73,10 @@ static struct testcase testcases[] = {
},
{
/*
- * We should never allocate at the requested address or above it
- * The len cross the 128TB boundary. Without MAP_FIXED
- * we will always search in the lower address space.
+ * Unless MAP_FIXED is specified, allocation based on hint
+ * addr is never at requested address or above it, which is
+ * beyond high address switch boundary in this case. Instead,
+ * a suitable allocation is found in lower address space.
*/
.addr = ((void *)(ADDR_SWITCH_HINT - PAGE_SIZE)),
.size = 2 * PAGE_SIZE,
@@ -65,8 +86,8 @@ static struct testcase testcases[] = {
},
{
/*
- * Exact mapping at 128TB, the area is free we should get that
- * even without MAP_FIXED.
+ * Exact mapping at high address switch boundary, should
+ * be obtained even without MAP_FIXED as area is free.
*/
.addr = ((void *)(ADDR_SWITCH_HINT)),
.size = PAGE_SIZE,
@@ -231,7 +252,7 @@ static struct testcase hugetlb_testcases[] = {
static int run_test(struct testcase *test, int count)
{
void *p;
- int i, ret = 0;
+ int i, ret = KSFT_PASS;
for (i = 0; i < count; i++) {
struct testcase *t = test + i;
@@ -242,13 +263,13 @@ static int run_test(struct testcase *test, int count)
if (p == MAP_FAILED) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
continue;
}
if (t->low_addr_required && p >= (void *)(ADDR_SWITCH_HINT)) {
printf("FAILED\n");
- ret = 1;
+ ret = KSFT_FAIL;
} else {
/*
* Do a dereference of the address returned so that we catch
@@ -270,6 +291,8 @@ static int supported_arch(void)
return 1;
#elif defined(__x86_64__)
return 1;
+#elif defined(__aarch64__)
+ return getpagesize() == PAGE_SIZE;
#else
return 0;
#endif
@@ -280,7 +303,7 @@ int main(int argc, char **argv)
int ret;
if (!supported_arch())
- return 0;
+ return KSFT_SKIP;
ret = run_test(testcases, ARRAY_SIZE(testcases));
if (argc == 2 && !strcmp(argv[1], "--run-hugetlb"))
diff --git a/tools/testing/selftests/mm/va_high_addr_switch.sh b/tools/testing/selftests/mm/va_high_addr_switch.sh
new file mode 100755
index 000000000000..a0a75f302904
--- /dev/null
+++ b/tools/testing/selftests/mm/va_high_addr_switch.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2022 Adam Sindelar (Meta) <adam@wowsignal.io>
+#
+# This is a test for mmap behavior with 5-level paging. This script wraps the
+# real test to check that the kernel is configured to support at least 5
+# pagetable levels.
+
+# 1 means the test failed
+exitcode=1
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+fail()
+{
+ echo "$1"
+ exit $exitcode
+}
+
+check_supported_x86_64()
+{
+ local config="/proc/config.gz"
+ [[ -f "${config}" ]] || config="/boot/config-$(uname -r)"
+ [[ -f "${config}" ]] || fail "Cannot find kernel config in /proc or /boot"
+
+ # gzip -dcfq automatically handles both compressed and plaintext input.
+ # See man 1 gzip under '-f'.
+ local pg_table_levels=$(gzip -dcfq "${config}" | grep PGTABLE_LEVELS | cut -d'=' -f 2)
+
+ local cpu_supports_pl5=$(awk '/^flags/ {if (/la57/) {print 0;}
+ else {print 1}; exit}' /proc/cpuinfo 2>/dev/null)
+
+ if [[ "${pg_table_levels}" -lt 5 ]]; then
+ echo "$0: PGTABLE_LEVELS=${pg_table_levels}, must be >= 5 to run this test"
+ exit $ksft_skip
+ elif [[ "${cpu_supports_pl5}" -ne 0 ]]; then
+ echo "$0: CPU does not have the necessary la57 flag to support page table level 5"
+ exit $ksft_skip
+ fi
+}
+
+check_test_requirements()
+{
+ # The test supports x86_64 and powerpc64. We currently have no useful
+ # eligibility check for powerpc64, and the test itself will reject other
+ # architectures.
+ case `uname -m` in
+ "x86_64")
+ check_supported_x86_64
+ ;;
+ *)
+ return 0
+ ;;
+ esac
+}
+
+check_test_requirements
+./va_high_addr_switch
+
+# In order to run hugetlb testcases, "--run-hugetlb" must be appended
+# to the binary.
+./va_high_addr_switch --run-hugetlb
diff --git a/tools/testing/selftests/vm/virtual_address_range.c b/tools/testing/selftests/mm/virtual_address_range.c
index c0592646ed93..7bcf8d48256a 100644
--- a/tools/testing/selftests/vm/virtual_address_range.c
+++ b/tools/testing/selftests/mm/virtual_address_range.c
@@ -12,14 +12,19 @@
#include <errno.h>
#include <sys/mman.h>
#include <sys/time.h>
+#include "../kselftest.h"
/*
* Maximum address range mapped with a single mmap()
- * call is little bit more than 16GB. Hence 16GB is
+ * call is little bit more than 1GB. Hence 1GB is
* chosen as the single chunk size for address space
* mapping.
*/
-#define MAP_CHUNK_SIZE 17179869184UL /* 16GB */
+
+#define SZ_1GB (1024 * 1024 * 1024UL)
+#define SZ_1TB (1024 * 1024 * 1024 * 1024UL)
+
+#define MAP_CHUNK_SIZE SZ_1GB
/*
* Address space till 128TB is mapped without any hint
@@ -32,13 +37,15 @@
* till it reaches 512TB. One with size 128TB and the
* other being 384TB.
*
- * On Arm64 the address space is 256TB and no high mappings
- * are supported so far.
+ * On Arm64 the address space is 256TB and support for
+ * high mappings up to 4PB virtual address space has
+ * been added.
*/
-#define NR_CHUNKS_128TB 8192UL /* Number of 16GB chunks for 128TB */
+#define NR_CHUNKS_128TB ((128 * SZ_1TB) / MAP_CHUNK_SIZE) /* Number of chunks for 128TB */
#define NR_CHUNKS_256TB (NR_CHUNKS_128TB * 2UL)
#define NR_CHUNKS_384TB (NR_CHUNKS_128TB * 3UL)
+#define NR_CHUNKS_3840TB (NR_CHUNKS_128TB * 30UL)
#define ADDR_MARK_128TB (1UL << 47) /* First address beyond 128TB */
#define ADDR_MARK_256TB (1UL << 48) /* First address beyond 256TB */
@@ -47,7 +54,7 @@
#define HIGH_ADDR_MARK ADDR_MARK_256TB
#define HIGH_ADDR_SHIFT 49
#define NR_CHUNKS_LOW NR_CHUNKS_256TB
-#define NR_CHUNKS_HIGH 0
+#define NR_CHUNKS_HIGH NR_CHUNKS_3840TB
#else
#define HIGH_ADDR_MARK ADDR_MARK_128TB
#define HIGH_ADDR_SHIFT 48
@@ -62,23 +69,15 @@ static char *hind_addr(void)
return (char *) (1UL << bits);
}
-static int validate_addr(char *ptr, int high_addr)
+static void validate_addr(char *ptr, int high_addr)
{
unsigned long addr = (unsigned long) ptr;
- if (high_addr) {
- if (addr < HIGH_ADDR_MARK) {
- printf("Bad address %lx\n", addr);
- return 1;
- }
- return 0;
- }
+ if (high_addr && addr < HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
- if (addr > HIGH_ADDR_MARK) {
- printf("Bad address %lx\n", addr);
- return 1;
- }
- return 0;
+ if (addr > HIGH_ADDR_MARK)
+ ksft_exit_fail_msg("Bad address %lx\n", addr);
}
static int validate_lower_address_hint(void)
@@ -97,24 +96,33 @@ static int validate_lower_address_hint(void)
int main(int argc, char *argv[])
{
char *ptr[NR_CHUNKS_LOW];
- char *hptr[NR_CHUNKS_HIGH];
+ char **hptr;
char *hint;
unsigned long i, lchunks, hchunks;
+ ksft_print_header();
+ ksft_set_plan(1);
+
for (i = 0; i < NR_CHUNKS_LOW; i++) {
ptr[i] = mmap(NULL, MAP_CHUNK_SIZE, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr[i] == MAP_FAILED) {
- if (validate_lower_address_hint())
- return 1;
+ if (validate_lower_address_hint()) {
+ ksft_test_result_skip("Memory constraint not fulfilled\n");
+ ksft_finished();
+ }
break;
}
- if (validate_addr(ptr[i], 0))
- return 1;
+ validate_addr(ptr[i], 0);
}
lchunks = i;
+ hptr = (char **) calloc(NR_CHUNKS_HIGH, sizeof(char *));
+ if (hptr == NULL) {
+ ksft_test_result_skip("Memory constraint not fulfilled\n");
+ ksft_finished();
+ }
for (i = 0; i < NR_CHUNKS_HIGH; i++) {
hint = hind_addr();
@@ -124,8 +132,7 @@ int main(int argc, char *argv[])
if (hptr[i] == MAP_FAILED)
break;
- if (validate_addr(hptr[i], 1))
- return 1;
+ validate_addr(hptr[i], 1);
}
hchunks = i;
@@ -135,5 +142,8 @@ int main(int argc, char *argv[])
for (i = 0; i < hchunks; i++)
munmap(hptr[i], MAP_CHUNK_SIZE);
- return 0;
+ free(hptr);
+
+ ksft_test_result_pass("Test\n");
+ ksft_finished();
}
diff --git a/tools/testing/selftests/mm/vm_util.c b/tools/testing/selftests/mm/vm_util.c
new file mode 100644
index 000000000000..5a62530da3b5
--- /dev/null
+++ b/tools/testing/selftests/mm/vm_util.c
@@ -0,0 +1,364 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <linux/fs.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include "../kselftest.h"
+#include "vm_util.h"
+
+#define PMD_SIZE_FILE_PATH "/sys/kernel/mm/transparent_hugepage/hpage_pmd_size"
+#define SMAP_FILE_PATH "/proc/self/smaps"
+#define MAX_LINE_LENGTH 500
+
+unsigned int __page_size;
+unsigned int __page_shift;
+
+uint64_t pagemap_get_entry(int fd, char *start)
+{
+ const unsigned long pfn = (unsigned long)start / getpagesize();
+ uint64_t entry;
+ int ret;
+
+ ret = pread(fd, &entry, sizeof(entry), pfn * sizeof(entry));
+ if (ret != sizeof(entry))
+ ksft_exit_fail_msg("reading pagemap failed\n");
+ return entry;
+}
+
+static uint64_t __pagemap_scan_get_categories(int fd, char *start, struct page_region *r)
+{
+ struct pm_scan_arg arg;
+
+ arg.start = (uintptr_t)start;
+ arg.end = (uintptr_t)(start + psize());
+ arg.vec = (uintptr_t)r;
+ arg.vec_len = 1;
+ arg.flags = 0;
+ arg.size = sizeof(struct pm_scan_arg);
+ arg.max_pages = 0;
+ arg.category_inverted = 0;
+ arg.category_mask = 0;
+ arg.category_anyof_mask = PAGE_IS_WPALLOWED | PAGE_IS_WRITTEN | PAGE_IS_FILE |
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED | PAGE_IS_PFNZERO |
+ PAGE_IS_HUGE | PAGE_IS_SOFT_DIRTY;
+ arg.return_mask = arg.category_anyof_mask;
+
+ return ioctl(fd, PAGEMAP_SCAN, &arg);
+}
+
+static uint64_t pagemap_scan_get_categories(int fd, char *start)
+{
+ struct page_region r;
+ long ret;
+
+ ret = __pagemap_scan_get_categories(fd, start, &r);
+ if (ret < 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN failed: %s\n", strerror(errno));
+ if (ret == 0)
+ return 0;
+ return r.categories;
+}
+
+/* `start` is any valid address. */
+static bool pagemap_scan_supported(int fd, char *start)
+{
+ static int supported = -1;
+ int ret;
+
+ if (supported != -1)
+ return supported;
+
+ /* Provide an invalid address in order to trigger EFAULT. */
+ ret = __pagemap_scan_get_categories(fd, start, (struct page_region *) ~0UL);
+ if (ret == 0)
+ ksft_exit_fail_msg("PAGEMAP_SCAN succeeded unexpectedly\n");
+
+ supported = errno == EFAULT;
+
+ return supported;
+}
+
+static bool page_entry_is(int fd, char *start, char *desc,
+ uint64_t pagemap_flags, uint64_t pagescan_flags)
+{
+ bool m = pagemap_get_entry(fd, start) & pagemap_flags;
+
+ if (pagemap_scan_supported(fd, start)) {
+ bool s = pagemap_scan_get_categories(fd, start) & pagescan_flags;
+
+ if (m == s)
+ return m;
+
+ ksft_exit_fail_msg(
+ "read and ioctl return unmatched results for %s: %d %d", desc, m, s);
+ }
+ return m;
+}
+
+bool pagemap_is_softdirty(int fd, char *start)
+{
+ return page_entry_is(fd, start, "soft-dirty",
+ PM_SOFT_DIRTY, PAGE_IS_SOFT_DIRTY);
+}
+
+bool pagemap_is_swapped(int fd, char *start)
+{
+ return page_entry_is(fd, start, "swap", PM_SWAP, PAGE_IS_SWAPPED);
+}
+
+bool pagemap_is_populated(int fd, char *start)
+{
+ return page_entry_is(fd, start, "populated",
+ PM_PRESENT | PM_SWAP,
+ PAGE_IS_PRESENT | PAGE_IS_SWAPPED);
+}
+
+unsigned long pagemap_get_pfn(int fd, char *start)
+{
+ uint64_t entry = pagemap_get_entry(fd, start);
+
+ /* If present (63th bit), PFN is at bit 0 -- 54. */
+ if (entry & PM_PRESENT)
+ return entry & 0x007fffffffffffffull;
+ return -1ul;
+}
+
+void clear_softdirty(void)
+{
+ int ret;
+ const char *ctrl = "4";
+ int fd = open("/proc/self/clear_refs", O_WRONLY);
+
+ if (fd < 0)
+ ksft_exit_fail_msg("opening clear_refs failed\n");
+ ret = write(fd, ctrl, strlen(ctrl));
+ close(fd);
+ if (ret != strlen(ctrl))
+ ksft_exit_fail_msg("writing clear_refs failed\n");
+}
+
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len)
+{
+ while (fgets(buf, len, fp)) {
+ if (!strncmp(buf, pattern, strlen(pattern)))
+ return true;
+ }
+ return false;
+}
+
+uint64_t read_pmd_pagesize(void)
+{
+ int fd;
+ char buf[20];
+ ssize_t num_read;
+
+ fd = open(PMD_SIZE_FILE_PATH, O_RDONLY);
+ if (fd == -1)
+ return 0;
+
+ num_read = read(fd, buf, 19);
+ if (num_read < 1) {
+ close(fd);
+ return 0;
+ }
+ buf[num_read] = '\0';
+ close(fd);
+
+ return strtoul(buf, NULL, 10);
+}
+
+bool __check_huge(void *addr, char *pattern, int nr_hpages,
+ uint64_t hpage_size)
+{
+ uint64_t thp = -1;
+ int ret;
+ FILE *fp;
+ char buffer[MAX_LINE_LENGTH];
+ char addr_pattern[MAX_LINE_LENGTH];
+
+ ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
+ (unsigned long) addr);
+ if (ret >= MAX_LINE_LENGTH)
+ ksft_exit_fail_msg("%s: Pattern is too long\n", __func__);
+
+ fp = fopen(SMAP_FILE_PATH, "r");
+ if (!fp)
+ ksft_exit_fail_msg("%s: Failed to open file %s\n", __func__, SMAP_FILE_PATH);
+
+ if (!check_for_pattern(fp, addr_pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ /*
+ * Fetch the pattern in the same block and check the number of
+ * hugepages.
+ */
+ if (!check_for_pattern(fp, pattern, buffer, sizeof(buffer)))
+ goto err_out;
+
+ snprintf(addr_pattern, MAX_LINE_LENGTH, "%s%%9ld kB", pattern);
+
+ if (sscanf(buffer, addr_pattern, &thp) != 1)
+ ksft_exit_fail_msg("Reading smap error\n");
+
+err_out:
+ fclose(fp);
+ return thp == (nr_hpages * (hpage_size >> 10));
+}
+
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "AnonHugePages: ", nr_hpages, hpage_size);
+}
+
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "FilePmdMapped:", nr_hpages, hpage_size);
+}
+
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size)
+{
+ return __check_huge(addr, "ShmemPmdMapped:", nr_hpages, hpage_size);
+}
+
+int64_t allocate_transhuge(void *ptr, int pagemap_fd)
+{
+ uint64_t ent[2];
+
+ /* drop pmd */
+ if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_FIXED | MAP_ANONYMOUS |
+ MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
+ ksft_exit_fail_msg("mmap transhuge\n");
+
+ if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
+ ksft_exit_fail_msg("MADV_HUGEPAGE\n");
+
+ /* allocate transparent huge page */
+ *(volatile void **)ptr = ptr;
+
+ if (pread(pagemap_fd, ent, sizeof(ent),
+ (uintptr_t)ptr >> (pshift() - 3)) != sizeof(ent))
+ ksft_exit_fail_msg("read pagemap\n");
+
+ if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
+ PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
+ !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - pshift())) - 1)))
+ return PAGEMAP_PFN(ent[0]);
+
+ return -1;
+}
+
+unsigned long default_huge_page_size(void)
+{
+ unsigned long hps = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return 0;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
+ hps <<= 10;
+ break;
+ }
+ }
+
+ free(line);
+ fclose(f);
+ return hps;
+}
+
+int detect_hugetlb_page_sizes(size_t sizes[], int max)
+{
+ DIR *dir = opendir("/sys/kernel/mm/hugepages/");
+ int count = 0;
+
+ if (!dir)
+ return 0;
+
+ while (count < max) {
+ struct dirent *entry = readdir(dir);
+ size_t kb;
+
+ if (!entry)
+ break;
+ if (entry->d_type != DT_DIR)
+ continue;
+ if (sscanf(entry->d_name, "hugepages-%zukB", &kb) != 1)
+ continue;
+ sizes[count++] = kb * 1024;
+ ksft_print_msg("[INFO] detected hugetlb page size: %zu KiB\n",
+ kb);
+ }
+ closedir(dir);
+ return count;
+}
+
+/* If `ioctls' non-NULL, the allowed ioctls will be returned into the var */
+int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor, uint64_t *ioctls)
+{
+ struct uffdio_register uffdio_register = { 0 };
+ uint64_t mode = 0;
+ int ret = 0;
+
+ if (miss)
+ mode |= UFFDIO_REGISTER_MODE_MISSING;
+ if (wp)
+ mode |= UFFDIO_REGISTER_MODE_WP;
+ if (minor)
+ mode |= UFFDIO_REGISTER_MODE_MINOR;
+
+ uffdio_register.range.start = (unsigned long)addr;
+ uffdio_register.range.len = len;
+ uffdio_register.mode = mode;
+
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register) == -1)
+ ret = -errno;
+ else if (ioctls)
+ *ioctls = uffdio_register.ioctls;
+
+ return ret;
+}
+
+int uffd_register(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor)
+{
+ return uffd_register_with_ioctls(uffd, addr, len,
+ miss, wp, minor, NULL);
+}
+
+int uffd_unregister(int uffd, void *addr, uint64_t len)
+{
+ struct uffdio_range range = { .start = (uintptr_t)addr, .len = len };
+ int ret = 0;
+
+ if (ioctl(uffd, UFFDIO_UNREGISTER, &range) == -1)
+ ret = -errno;
+
+ return ret;
+}
+
+unsigned long get_free_hugepages(void)
+{
+ unsigned long fhp = 0;
+ char *line = NULL;
+ size_t linelen = 0;
+ FILE *f = fopen("/proc/meminfo", "r");
+
+ if (!f)
+ return fhp;
+ while (getline(&line, &linelen, f) > 0) {
+ if (sscanf(line, "HugePages_Free: %lu", &fhp) == 1)
+ break;
+ }
+
+ free(line);
+ fclose(f);
+ return fhp;
+}
diff --git a/tools/testing/selftests/mm/vm_util.h b/tools/testing/selftests/mm/vm_util.h
new file mode 100644
index 000000000000..c02990bbd56f
--- /dev/null
+++ b/tools/testing/selftests/mm/vm_util.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <stdint.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <err.h>
+#include <string.h> /* ffsl() */
+#include <unistd.h> /* _SC_PAGESIZE */
+
+#define BIT_ULL(nr) (1ULL << (nr))
+#define PM_SOFT_DIRTY BIT_ULL(55)
+#define PM_MMAP_EXCLUSIVE BIT_ULL(56)
+#define PM_UFFD_WP BIT_ULL(57)
+#define PM_FILE BIT_ULL(61)
+#define PM_SWAP BIT_ULL(62)
+#define PM_PRESENT BIT_ULL(63)
+
+extern unsigned int __page_size;
+extern unsigned int __page_shift;
+
+static inline unsigned int psize(void)
+{
+ if (!__page_size)
+ __page_size = sysconf(_SC_PAGESIZE);
+ return __page_size;
+}
+
+static inline unsigned int pshift(void)
+{
+ if (!__page_shift)
+ __page_shift = (ffsl(psize()) - 1);
+ return __page_shift;
+}
+
+uint64_t pagemap_get_entry(int fd, char *start);
+bool pagemap_is_softdirty(int fd, char *start);
+bool pagemap_is_swapped(int fd, char *start);
+bool pagemap_is_populated(int fd, char *start);
+unsigned long pagemap_get_pfn(int fd, char *start);
+void clear_softdirty(void);
+bool check_for_pattern(FILE *fp, const char *pattern, char *buf, size_t len);
+uint64_t read_pmd_pagesize(void);
+bool check_huge_anon(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_file(void *addr, int nr_hpages, uint64_t hpage_size);
+bool check_huge_shmem(void *addr, int nr_hpages, uint64_t hpage_size);
+int64_t allocate_transhuge(void *ptr, int pagemap_fd);
+unsigned long default_huge_page_size(void);
+int detect_hugetlb_page_sizes(size_t sizes[], int max);
+
+int uffd_register(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor);
+int uffd_unregister(int uffd, void *addr, uint64_t len);
+int uffd_register_with_ioctls(int uffd, void *addr, uint64_t len,
+ bool miss, bool wp, bool minor, uint64_t *ioctls);
+unsigned long get_free_hugepages(void);
+
+/*
+ * On ppc64 this will only work with radix 2M hugepage size
+ */
+#define HPAGE_SHIFT 21
+#define HPAGE_SIZE (1 << HPAGE_SHIFT)
+
+#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
+#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
diff --git a/tools/testing/selftests/vm/write_hugetlb_memory.sh b/tools/testing/selftests/mm/write_hugetlb_memory.sh
index d3d0d108924d..3d2d2eb9d6ff 100644..100755
--- a/tools/testing/selftests/vm/write_hugetlb_memory.sh
+++ b/tools/testing/selftests/mm/write_hugetlb_memory.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
set -e
@@ -14,7 +14,7 @@ want_sleep=$8
reserve=$9
echo "Putting task in cgroup '$cgroup'"
-echo $$ > /dev/cgroup/memory/"$cgroup"/cgroup.procs
+echo $$ > ${cgroup_path:-/dev/cgroup/memory}/"$cgroup"/cgroup.procs
echo "Method is $method"
diff --git a/tools/testing/selftests/vm/write_to_hugetlbfs.c b/tools/testing/selftests/mm/write_to_hugetlbfs.c
index 6a2caba19ee1..6a2caba19ee1 100644
--- a/tools/testing/selftests/vm/write_to_hugetlbfs.c
+++ b/tools/testing/selftests/mm/write_to_hugetlbfs.c
diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore
index 0bc64a6d4c18..17f2d8415162 100644
--- a/tools/testing/selftests/mount/.gitignore
+++ b/tools/testing/selftests/mount/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
unprivileged-remount-test
+nosymfollow-test
diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile
index 026890744215..2d9454841644 100644
--- a/tools/testing/selftests/mount/Makefile
+++ b/tools/testing/selftests/mount/Makefile
@@ -3,7 +3,7 @@
CFLAGS = -Wall \
-O2
-TEST_PROGS := run_tests.sh
-TEST_GEN_FILES := unprivileged-remount-test
+TEST_PROGS := run_unprivileged_remount.sh run_nosymfollow.sh
+TEST_GEN_FILES := unprivileged-remount-test nosymfollow-test
include ../lib.mk
diff --git a/tools/testing/selftests/mount/nosymfollow-test.c b/tools/testing/selftests/mount/nosymfollow-test.c
new file mode 100644
index 000000000000..650d6d80a1d2
--- /dev/null
+++ b/tools/testing/selftests/mount/nosymfollow-test.c
@@ -0,0 +1,218 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/vfs.h>
+#include <unistd.h>
+
+#ifndef MS_NOSYMFOLLOW
+# define MS_NOSYMFOLLOW 256 /* Do not follow symlinks */
+#endif
+
+#ifndef ST_NOSYMFOLLOW
+# define ST_NOSYMFOLLOW 0x2000 /* Do not follow symlinks */
+#endif
+
+#define DATA "/tmp/data"
+#define LINK "/tmp/symlink"
+#define TMP "/tmp"
+
+static void die(char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(EXIT_FAILURE);
+}
+
+static void vmaybe_write_file(bool enoent_ok, char *filename, char *fmt,
+ va_list ap)
+{
+ ssize_t written;
+ char buf[4096];
+ int buf_len;
+ int fd;
+
+ buf_len = vsnprintf(buf, sizeof(buf), fmt, ap);
+ if (buf_len < 0)
+ die("vsnprintf failed: %s\n", strerror(errno));
+
+ if (buf_len >= sizeof(buf))
+ die("vsnprintf output truncated\n");
+
+ fd = open(filename, O_WRONLY);
+ if (fd < 0) {
+ if ((errno == ENOENT) && enoent_ok)
+ return;
+ die("open of %s failed: %s\n", filename, strerror(errno));
+ }
+
+ written = write(fd, buf, buf_len);
+ if (written != buf_len) {
+ if (written >= 0) {
+ die("short write to %s\n", filename);
+ } else {
+ die("write to %s failed: %s\n",
+ filename, strerror(errno));
+ }
+ }
+
+ if (close(fd) != 0)
+ die("close of %s failed: %s\n", filename, strerror(errno));
+}
+
+static void maybe_write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(true, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void write_file(char *filename, char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vmaybe_write_file(false, filename, fmt, ap);
+ va_end(ap);
+}
+
+static void create_and_enter_ns(void)
+{
+ uid_t uid = getuid();
+ gid_t gid = getgid();
+
+ if (unshare(CLONE_NEWUSER) != 0)
+ die("unshare(CLONE_NEWUSER) failed: %s\n", strerror(errno));
+
+ maybe_write_file("/proc/self/setgroups", "deny");
+ write_file("/proc/self/uid_map", "0 %d 1", uid);
+ write_file("/proc/self/gid_map", "0 %d 1", gid);
+
+ if (setgid(0) != 0)
+ die("setgid(0) failed %s\n", strerror(errno));
+ if (setuid(0) != 0)
+ die("setuid(0) failed %s\n", strerror(errno));
+
+ if (unshare(CLONE_NEWNS) != 0)
+ die("unshare(CLONE_NEWNS) failed: %s\n", strerror(errno));
+}
+
+static void setup_symlink(void)
+{
+ int data, err;
+
+ data = creat(DATA, O_RDWR);
+ if (data < 0)
+ die("creat failed: %s\n", strerror(errno));
+
+ err = symlink(DATA, LINK);
+ if (err < 0)
+ die("symlink failed: %s\n", strerror(errno));
+
+ if (close(data) != 0)
+ die("close of %s failed: %s\n", DATA, strerror(errno));
+}
+
+static void test_link_traversal(bool nosymfollow)
+{
+ int link;
+
+ link = open(LINK, 0, O_RDWR);
+ if (nosymfollow) {
+ if ((link != -1 || errno != ELOOP)) {
+ die("link traversal unexpected result: %d, %s\n",
+ link, strerror(errno));
+ }
+ } else {
+ if (link < 0)
+ die("link traversal failed: %s\n", strerror(errno));
+
+ if (close(link) != 0)
+ die("close of link failed: %s\n", strerror(errno));
+ }
+}
+
+static void test_readlink(void)
+{
+ char buf[4096];
+ ssize_t ret;
+
+ bzero(buf, sizeof(buf));
+
+ ret = readlink(LINK, buf, sizeof(buf));
+ if (ret < 0)
+ die("readlink failed: %s\n", strerror(errno));
+ if (strcmp(buf, DATA) != 0)
+ die("readlink strcmp failed: '%s' '%s'\n", buf, DATA);
+}
+
+static void test_realpath(void)
+{
+ char *path = realpath(LINK, NULL);
+
+ if (!path)
+ die("realpath failed: %s\n", strerror(errno));
+ if (strcmp(path, DATA) != 0)
+ die("realpath strcmp failed\n");
+
+ free(path);
+}
+
+static void test_statfs(bool nosymfollow)
+{
+ struct statfs buf;
+ int ret;
+
+ ret = statfs(TMP, &buf);
+ if (ret)
+ die("statfs failed: %s\n", strerror(errno));
+
+ if (nosymfollow) {
+ if ((buf.f_flags & ST_NOSYMFOLLOW) == 0)
+ die("ST_NOSYMFOLLOW not set on %s\n", TMP);
+ } else {
+ if ((buf.f_flags & ST_NOSYMFOLLOW) != 0)
+ die("ST_NOSYMFOLLOW set on %s\n", TMP);
+ }
+}
+
+static void run_tests(bool nosymfollow)
+{
+ test_link_traversal(nosymfollow);
+ test_readlink();
+ test_realpath();
+ test_statfs(nosymfollow);
+}
+
+int main(int argc, char **argv)
+{
+ create_and_enter_ns();
+
+ if (mount("testing", TMP, "ramfs", 0, NULL) != 0)
+ die("mount failed: %s\n", strerror(errno));
+
+ setup_symlink();
+ run_tests(false);
+
+ if (mount("testing", TMP, "ramfs", MS_REMOUNT|MS_NOSYMFOLLOW, NULL) != 0)
+ die("remount failed: %s\n", strerror(errno));
+
+ run_tests(true);
+
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/mount/run_nosymfollow.sh b/tools/testing/selftests/mount/run_nosymfollow.sh
new file mode 100755
index 000000000000..5fbbf03043a2
--- /dev/null
+++ b/tools/testing/selftests/mount/run_nosymfollow.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./nosymfollow-test
diff --git a/tools/testing/selftests/mount/run_tests.sh b/tools/testing/selftests/mount/run_unprivileged_remount.sh
index 4ab8f507dcba..4ab8f507dcba 100755
--- a/tools/testing/selftests/mount/run_tests.sh
+++ b/tools/testing/selftests/mount/run_unprivileged_remount.sh
diff --git a/tools/testing/selftests/mount/unprivileged-remount-test.c b/tools/testing/selftests/mount/unprivileged-remount-test.c
index 584dc6bc3b06..d2917054fe3a 100644
--- a/tools/testing/selftests/mount/unprivileged-remount-test.c
+++ b/tools/testing/selftests/mount/unprivileged-remount-test.c
@@ -204,7 +204,7 @@ bool test_unpriv_remount(const char *fstype, const char *mount_options,
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
create_and_enter_userns();
@@ -282,7 +282,7 @@ static bool test_priv_mount_unpriv_remount(void)
if (!WIFEXITED(status)) {
die("child did not terminate cleanly\n");
}
- return WEXITSTATUS(status) == EXIT_SUCCESS ? true : false;
+ return WEXITSTATUS(status) == EXIT_SUCCESS;
}
orig_mnt_flags = read_mnt_flags(orig_path);
diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore
new file mode 100644
index 000000000000..5f74d8488472
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/.gitignore
@@ -0,0 +1 @@
+mount_setattr_test
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
new file mode 100644
index 000000000000..0c0d7b1234c1
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2 -pthread
+
+TEST_GEN_PROGS := mount_setattr_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
new file mode 100644
index 000000000000..c6a8c732b802
--- /dev/null
+++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
@@ -0,0 +1,1500 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/sysinfo.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <linux/mount.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#define DEFAULT_THREADS 4
+#define ptr_to_int(p) ((int)((intptr_t)(p)))
+#define int_to_ptr(u) ((void *)((intptr_t)(u)))
+
+#ifndef __NR_mount_setattr
+ #if defined __alpha__
+ #define __NR_mount_setattr 552
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_mount_setattr (442 + 4000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_mount_setattr (442 + 6000)
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_mount_setattr (442 + 5000)
+ #endif
+ #elif defined __ia64__
+ #define __NR_mount_setattr (442 + 1024)
+ #else
+ #define __NR_mount_setattr 442
+ #endif
+#endif
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree (428 + 1024)
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000
+#endif
+
+#ifndef MOUNT_ATTR_NOSYMFOLLOW
+#define MOUNT_ATTR_NOSYMFOLLOW 0x00200000
+#endif
+
+static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
+ struct mount_attr *attr, size_t size)
+{
+ return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+#ifndef ST_NOSYMFOLLOW
+#define ST_NOSYMFOLLOW 0x2000 /* do not follow symlinks */
+#endif
+
+static int read_mnt_flags(const char *path)
+{
+ int ret;
+ struct statvfs stat;
+ unsigned int mnt_flags;
+
+ ret = statvfs(path, &stat);
+ if (ret != 0)
+ return -EINVAL;
+
+ if (stat.f_flag & ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC |
+ ST_NOATIME | ST_NODIRATIME | ST_RELATIME |
+ ST_SYNCHRONOUS | ST_MANDLOCK | ST_NOSYMFOLLOW))
+ return -EINVAL;
+
+ mnt_flags = 0;
+ if (stat.f_flag & ST_RDONLY)
+ mnt_flags |= MS_RDONLY;
+ if (stat.f_flag & ST_NOSUID)
+ mnt_flags |= MS_NOSUID;
+ if (stat.f_flag & ST_NODEV)
+ mnt_flags |= MS_NODEV;
+ if (stat.f_flag & ST_NOEXEC)
+ mnt_flags |= MS_NOEXEC;
+ if (stat.f_flag & ST_NOATIME)
+ mnt_flags |= MS_NOATIME;
+ if (stat.f_flag & ST_NODIRATIME)
+ mnt_flags |= MS_NODIRATIME;
+ if (stat.f_flag & ST_RELATIME)
+ mnt_flags |= MS_RELATIME;
+ if (stat.f_flag & ST_SYNCHRONOUS)
+ mnt_flags |= MS_SYNCHRONOUS;
+ if (stat.f_flag & ST_MANDLOCK)
+ mnt_flags |= ST_MANDLOCK;
+ if (stat.f_flag & ST_NOSYMFOLLOW)
+ mnt_flags |= ST_NOSYMFOLLOW;
+
+ return mnt_flags;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+static void *mount_setattr_thread(void *data)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
+ .attr_clr = 0,
+ .propagation = MS_SHARED,
+ };
+
+ if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
+ pthread_exit(int_to_ptr(-1));
+
+ pthread_exit(int_to_ptr(0));
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+static bool mount_setattr_supported(void)
+{
+ int ret;
+
+ ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
+ if (ret < 0 && errno == ENOSYS)
+ return false;
+
+ return true;
+}
+
+FIXTURE(mount_setattr) {
+};
+
+#define NOSYMFOLLOW_TARGET "/mnt/A/AA/data"
+#define NOSYMFOLLOW_SYMLINK "/mnt/A/AA/symlink"
+
+FIXTURE_SETUP(mount_setattr)
+{
+ int fd = -EBADF;
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ fd = creat(NOSYMFOLLOW_TARGET, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(symlink(NOSYMFOLLOW_TARGET, NOSYMFOLLOW_SYMLINK), 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr)
+{
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, invalid_attributes)
+{
+ struct mount_attr invalid_attr = {
+ .attr_set = (1U << 31),
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = 0;
+ invalid_attr.attr_clr = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_clr = 0;
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ invalid_attr.attr_set = (1U << 31);
+ invalid_attr.attr_clr = (1U << 31);
+ invalid_attr.propagation = (1U << 31);
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr)), 0);
+}
+
+TEST_F(mount_setattr, extensibility)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ char *s = "dummy";
+ struct mount_attr invalid_attr = {};
+ struct mount_attr_large {
+ struct mount_attr attr1;
+ struct mount_attr attr2;
+ struct mount_attr attr3;
+ } large_attr = {};
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EFAULT);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
+ sizeof(invalid_attr)), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+ sizeof(invalid_attr) / 2), 0);
+ ASSERT_EQ(errno, EINVAL);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ large_attr.attr3.attr_set = 0;
+ large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+ (void *)&large_attr, sizeof(large_attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, basic)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+}
+
+TEST_F(mount_setattr, basic_recursive)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOEXEC;
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_RDONLY;
+ attr.propagation = MS_SHARED;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RDONLY;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open for writing so this needs to fail somewhere
+ * in the middle and the mount options need to be unchanged.
+ */
+ attr.attr_set = MOUNT_ATTR_RDONLY;
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+ EXPECT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, mount_has_writers)
+{
+ int fd, dfd;
+ unsigned int old_flags = 0, new_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+ .attr_clr = MOUNT_ATTR__ATIME,
+ .propagation = MS_SHARED,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+ ASSERT_GE(fd, 0);
+
+ /*
+ * We're holding a fd open to a mount somwhere in the middle so this
+ * needs to fail somewhere in the middle. After this the mount options
+ * need to be unchanged.
+ */
+ ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, old_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
+
+ dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(dfd, 0);
+ EXPECT_EQ(fsync(dfd), 0);
+ EXPECT_EQ(close(dfd), 0);
+
+ EXPECT_EQ(fsync(fd), 0);
+ EXPECT_EQ(close(fd), 0);
+
+ /* All writers are gone so this should succeed. */
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+}
+
+TEST_F(mount_setattr, mixed_mount_options)
+{
+ unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
+ .attr_set = MOUNT_ATTR_RELATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags1 = read_mnt_flags("/mnt/B");
+ ASSERT_GT(old_flags1, 0);
+
+ old_flags2 = read_mnt_flags("/mnt/B/BB");
+ ASSERT_GT(old_flags2, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ expected_flags = old_flags2;
+ expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, time_changes)
+{
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_set = 0;
+ attr.attr_clr = MOUNT_ATTR_STRICTATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ attr.attr_clr = MOUNT_ATTR_NOATIME;
+ ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
+ attr.attr_clr = MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_NOATIME;
+ expected_flags |= MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_NOATIME;
+ attr.attr_set |= MOUNT_ATTR_RELATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NOATIME;
+ expected_flags |= MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_RELATIME;
+ attr.attr_set |= MOUNT_ATTR_STRICTATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_RELATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
+ attr.attr_set |= MOUNT_ATTR_NOATIME;
+ attr.attr_clr |= MOUNT_ATTR__ATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags |= MS_NOATIME;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ memset(&attr, 0, sizeof(attr));
+ attr.attr_clr = MOUNT_ATTR_NODIRATIME;
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~MS_NODIRATIME;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, multi_threaded)
+{
+ int i, j, nthreads, ret = 0;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ pthread_attr_t pattr;
+ pthread_t threads[DEFAULT_THREADS];
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ /* Try to change mount options from multiple threads. */
+ nthreads = get_nprocs_conf();
+ if (nthreads > DEFAULT_THREADS)
+ nthreads = DEFAULT_THREADS;
+
+ pthread_attr_init(&pattr);
+ for (i = 0; i < nthreads; i++)
+ ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
+
+ for (j = 0; j < i; j++) {
+ void *retptr = NULL;
+
+ EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
+
+ ret += ptr_to_int(retptr);
+ EXPECT_EQ(ret, 0);
+ }
+ pthread_attr_destroy(&pattr);
+
+ ASSERT_EQ(ret, 0);
+
+ expected_flags = old_flags;
+ expected_flags |= MS_RDONLY;
+ expected_flags |= MS_NOSUID;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+}
+
+TEST_F(mount_setattr, wrong_user_namespace)
+{
+ int ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ EXPECT_EQ(create_and_enter_userns(), 0);
+ ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, wrong_mount_namespace)
+{
+ int fd, ret;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_RDONLY,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(mount_setattr_idmapped) {
+};
+
+FIXTURE_SETUP(mount_setattr_idmapped)
+{
+ int img_fd = -EBADF;
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
+
+ (void)umount2("/mnt", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+ ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
+ ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+ ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+ MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+ ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+ MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+ ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
+ ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
+ img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
+ ASSERT_GE(img_fd, 0);
+ ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+ ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
+ ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
+ ASSERT_EQ(close(img_fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr_idmapped)
+{
+ (void)umount2("/mnt/A", MNT_DETACH);
+ (void)umount2("/tmp", MNT_DETACH);
+}
+
+/**
+ * Validate that negative fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_negative)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = -EBADF,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with negative fd");
+ }
+}
+
+/**
+ * Validate that excessively large fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_large)
+{
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ .userns_fd = INT64_MAX,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with too large fd value");
+ }
+}
+
+/**
+ * Validate that closed fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_closed)
+{
+ int fd;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(fd, 0);
+ ASSERT_GE(close(fd), 0);
+
+ attr.userns_fd = fd;
+ ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+ TH_LOG("failure: created idmapped mount with closed fd");
+ }
+}
+
+/**
+ * Validate that the initial user namespace is rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(errno, EPERM);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
+ unsigned long range)
+{
+ char map[100], procfile[256];
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+
+ snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
+ snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+ if (write_file(procfile, map, strlen(map)))
+ return -1;
+
+ return 0;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+ return kill(getpid(), SIGSTOP);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+ int ret;
+ pid_t pid;
+ char path[256];
+
+ pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
+ if (pid < 0)
+ return -errno;
+
+ ret = map_ids(pid, nsid, hostid, range);
+ if (ret < 0)
+ return ret;
+
+ snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+ ret = open(path, O_RDONLY | O_CLOEXEC);
+ kill(pid, SIGKILL);
+ wait_for_pid(pid);
+ return ret;
+}
+
+/**
+ * Validate that an attached mount in our mount namespace cannot be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that idmapping a mount is rejected if the mount's mount namespace
+ * and our mount namespace don't match.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ * namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+ sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that a detached mount not in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+ /* Changing mount properties on a detached mount. */
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that currently changing the idmapping of an idmapped mount fails.
+ */
+TEST_F(mount_setattr_idmapped, change_idmapping)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+ AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+
+ /* Change idmapping on a detached mount that is already idmapped. */
+ attr.userns_fd = get_userns_fd(0, 20000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+ uid_t expected_uid, gid_t expected_gid)
+{
+ int ret;
+ struct stat st;
+
+ ret = fstatat(dfd, path, &st, flags);
+ if (ret < 0)
+ return false;
+
+ return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
+TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
+{
+ int open_tree_fd = -EBADF;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_IDMAP,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+
+ open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
+ AT_RECURSIVE |
+ AT_EMPTY_PATH |
+ AT_NO_AUTOMOUNT |
+ AT_SYMLINK_NOFOLLOW |
+ OPEN_TREE_CLOEXEC |
+ OPEN_TREE_CLONE);
+ ASSERT_GE(open_tree_fd, 0);
+
+ attr.userns_fd = get_userns_fd(0, 10000, 10000);
+ ASSERT_GE(attr.userns_fd, 0);
+ ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+ ASSERT_EQ(close(attr.userns_fd), 0);
+ ASSERT_EQ(close(open_tree_fd), 0);
+
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
+ ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
+}
+
+TEST_F(mount_setattr, mount_attr_nosymfollow)
+{
+ int fd;
+ unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+ struct mount_attr attr = {
+ .attr_set = MOUNT_ATTR_NOSYMFOLLOW,
+ };
+
+ if (!mount_setattr_supported())
+ SKIP(return, "mount_setattr syscall not supported");
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+
+ old_flags = read_mnt_flags("/mnt/A");
+ ASSERT_GT(old_flags, 0);
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags = old_flags;
+ expected_flags |= ST_NOSYMFOLLOW;
+
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_LT(fd, 0);
+ ASSERT_EQ(errno, ELOOP);
+
+ attr.attr_set &= ~MOUNT_ATTR_NOSYMFOLLOW;
+ attr.attr_clr |= MOUNT_ATTR_NOSYMFOLLOW;
+
+ ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+ expected_flags &= ~ST_NOSYMFOLLOW;
+ new_flags = read_mnt_flags("/mnt/A");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+ ASSERT_EQ(new_flags, expected_flags);
+
+ fd = open(NOSYMFOLLOW_SYMLINK, O_RDWR | O_CLOEXEC);
+ ASSERT_GT(fd, 0);
+ ASSERT_EQ(close(fd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/move_mount_set_group/.gitignore b/tools/testing/selftests/move_mount_set_group/.gitignore
new file mode 100644
index 000000000000..f5e339268720
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/.gitignore
@@ -0,0 +1 @@
+move_mount_set_group_test
diff --git a/tools/testing/selftests/move_mount_set_group/Makefile b/tools/testing/selftests/move_mount_set_group/Makefile
new file mode 100644
index 000000000000..94235846b6f9
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g $(KHDR_INCLUDES) -Wall -O2
+
+TEST_GEN_FILES += move_mount_set_group_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/move_mount_set_group/config b/tools/testing/selftests/move_mount_set_group/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
new file mode 100644
index 000000000000..bcf51d785a37
--- /dev/null
+++ b/tools/testing/selftests/move_mount_set_group/move_mount_set_group_test.c
@@ -0,0 +1,375 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdarg.h>
+#include <sys/syscall.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#ifndef MS_PRIVATE
+#define MS_PRIVATE (1<<18)
+#endif
+
+#ifndef MOVE_MOUNT_SET_GROUP
+#define MOVE_MOUNT_SET_GROUP 0x00000100
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004
+#endif
+
+#ifndef MOVE_MOUNT_T_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040
+#endif
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+ int fd;
+ ssize_t ret;
+
+ fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+ if (fd < 0)
+ return -1;
+
+ ret = write_nointr(fd, buf, count);
+ close(fd);
+ if (ret < 0 || (size_t)ret != count)
+ return -1;
+
+ return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+ uid_t uid;
+ gid_t gid;
+ char map[100];
+
+ uid = getuid();
+ gid = getgid();
+
+ if (unshare(CLONE_NEWUSER))
+ return -1;
+
+ if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+ errno != ENOENT)
+ return -1;
+
+ snprintf(map, sizeof(map), "0 %d 1", uid);
+ if (write_file("/proc/self/uid_map", map, strlen(map)))
+ return -1;
+
+
+ snprintf(map, sizeof(map), "0 %d 1", gid);
+ if (write_file("/proc/self/gid_map", map, strlen(map)))
+ return -1;
+
+ if (setgid(0))
+ return -1;
+
+ if (setuid(0))
+ return -1;
+
+ return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+ if (create_and_enter_userns())
+ return -1;
+
+ if (unshare(CLONE_NEWNS))
+ return -1;
+
+ if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+ return -1;
+
+ return 0;
+}
+
+static char *get_field(char *src, int nfields)
+{
+ int i;
+ char *p = src;
+
+ for (i = 0; i < nfields; i++) {
+ while (*p && *p != ' ' && *p != '\t')
+ p++;
+
+ if (!*p)
+ break;
+
+ p++;
+ }
+
+ return p;
+}
+
+static void null_endofword(char *word)
+{
+ while (*word && *word != ' ' && *word != '\t')
+ word++;
+ *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+ size_t len = 0;
+ char *line = NULL;
+ FILE *f = NULL;
+
+ f = fopen("/proc/self/mountinfo", "re");
+ if (!f)
+ return false;
+
+ while (getline(&line, &len, f) != -1) {
+ char *opts, *target;
+
+ target = get_field(line, 4);
+ if (!target)
+ continue;
+
+ opts = get_field(target, 2);
+ if (!opts)
+ continue;
+
+ null_endofword(target);
+
+ if (strcmp(target, path) != 0)
+ continue;
+
+ null_endofword(opts);
+ if (strstr(opts, "shared:"))
+ return true;
+ }
+
+ free(line);
+ fclose(f);
+
+ return false;
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define SET_GROUP_FROM "/tmp/move_mount_set_group_supported_from"
+#define SET_GROUP_TO "/tmp/move_mount_set_group_supported_to"
+
+static bool move_mount_set_group_supported(void)
+{
+ int ret;
+
+ if (mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(NULL, "/tmp", NULL, MS_PRIVATE, 0))
+ return -1;
+
+ if (mkdir(SET_GROUP_FROM, 0777))
+ return -1;
+
+ if (mkdir(SET_GROUP_TO, 0777))
+ return -1;
+
+ if (mount("testing", SET_GROUP_FROM, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"))
+ return -1;
+
+ if (mount(SET_GROUP_FROM, SET_GROUP_TO, NULL, MS_BIND, NULL))
+ return -1;
+
+ if (mount(NULL, SET_GROUP_FROM, NULL, MS_SHARED, 0))
+ return -1;
+
+ ret = syscall(__NR_move_mount, AT_FDCWD, SET_GROUP_FROM,
+ AT_FDCWD, SET_GROUP_TO, MOVE_MOUNT_SET_GROUP);
+ umount2("/tmp", MNT_DETACH);
+
+ return ret >= 0;
+}
+
+FIXTURE(move_mount_set_group) {
+};
+
+#define SET_GROUP_A "/tmp/A"
+
+FIXTURE_SETUP(move_mount_set_group)
+{
+ bool ret;
+
+ ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+
+ ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+
+ ASSERT_EQ(mkdir(SET_GROUP_A, 0777), 0);
+
+ ASSERT_EQ(mount("testing", SET_GROUP_A, "tmpfs", MS_NOATIME | MS_NODEV,
+ "size=100000,mode=700"), 0);
+}
+
+FIXTURE_TEARDOWN(move_mount_set_group)
+{
+ bool ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ umount2("/tmp", MNT_DETACH);
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ void *stack;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+ return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int wait_for_pid(pid_t pid)
+{
+ int status, ret;
+
+again:
+ ret = waitpid(pid, &status, 0);
+ if (ret == -1) {
+ if (errno == EINTR)
+ goto again;
+
+ return -1;
+ }
+
+ if (!WIFEXITED(status))
+ return -1;
+
+ return WEXITSTATUS(status);
+}
+
+struct child_args {
+ int unsfd;
+ int mntnsfd;
+ bool shared;
+ int mntfd;
+};
+
+static int get_nestedns_mount_cb(void *data)
+{
+ struct child_args *ca = (struct child_args *)data;
+ int ret;
+
+ ret = prepare_unpriv_mountns();
+ if (ret)
+ return 1;
+
+ if (ca->shared) {
+ ret = mount(NULL, SET_GROUP_A, NULL, MS_SHARED, 0);
+ if (ret)
+ return 1;
+ }
+
+ ret = open("/proc/self/ns/user", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->unsfd = ret;
+
+ ret = open("/proc/self/ns/mnt", O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntnsfd = ret;
+
+ ret = open(SET_GROUP_A, O_RDONLY);
+ if (ret < 0)
+ return 1;
+ ca->mntfd = ret;
+
+ return 0;
+}
+
+TEST_F(move_mount_set_group, complex_sharing_copying)
+{
+ struct child_args ca_from = {
+ .shared = true,
+ };
+ struct child_args ca_to = {
+ .shared = false,
+ };
+ pid_t pid;
+ bool ret;
+
+ ret = move_mount_set_group_supported();
+ ASSERT_GE(ret, 0);
+ if (!ret)
+ SKIP(return, "move_mount(MOVE_MOUNT_SET_GROUP) is not supported");
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_from, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ pid = do_clone(get_nestedns_mount_cb, (void *)&ca_to, CLONE_VFORK |
+ CLONE_VM | CLONE_FILES); ASSERT_GT(pid, 0);
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ ASSERT_EQ(syscall(__NR_move_mount, ca_from.mntfd, "",
+ ca_to.mntfd, "", MOVE_MOUNT_SET_GROUP
+ | MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH),
+ 0);
+
+ ASSERT_EQ(setns(ca_to.mntnsfd, CLONE_NEWNS), 0);
+ ASSERT_EQ(is_shared_mount(SET_GROUP_A), 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/mqueue/mq_perf_tests.c b/tools/testing/selftests/mqueue/mq_perf_tests.c
index b019e0b8221c..5c16159d0bcd 100644
--- a/tools/testing/selftests/mqueue/mq_perf_tests.c
+++ b/tools/testing/selftests/mqueue/mq_perf_tests.c
@@ -35,6 +35,7 @@
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/stat.h>
+#include <sys/param.h>
#include <mqueue.h>
#include <popt.h>
#include <error.h>
@@ -73,7 +74,6 @@ static char *usage =
char *MAX_MSGS = "/proc/sys/fs/mqueue/msg_max";
char *MAX_MSGSIZE = "/proc/sys/fs/mqueue/msgsize_max";
-#define min(a, b) ((a) < (b) ? (a) : (b))
#define MAX_CPUS 64
char *cpu_option_string;
int cpus_to_pin[MAX_CPUS];
@@ -180,6 +180,9 @@ void shutdown(int exit_val, char *err_cause, int line_no)
if (in_shutdown++)
return;
+ /* Free the cpu_set allocated using CPU_ALLOC in main function */
+ CPU_FREE(cpu_set);
+
for (i = 0; i < num_cpus_to_pin; i++)
if (cpu_threads[i]) {
pthread_kill(cpu_threads[i], SIGUSR1);
@@ -551,7 +554,13 @@ int main(int argc, char *argv[])
perror("sysconf(_SC_NPROCESSORS_ONLN)");
exit(1);
}
- cpus_online = min(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
+
+ if (getuid() != 0)
+ ksft_exit_skip("Not running as root, but almost all tests "
+ "require root in order to modify\nsystem settings. "
+ "Exiting.\n");
+
+ cpus_online = MIN(MAX_CPUS, sysconf(_SC_NPROCESSORS_ONLN));
cpu_set = CPU_ALLOC(cpus_online);
if (cpu_set == NULL) {
perror("CPU_ALLOC()");
@@ -589,7 +598,7 @@ int main(int argc, char *argv[])
cpu_set)) {
fprintf(stderr, "Any given CPU may "
"only be given once.\n");
- exit(1);
+ goto err_code;
} else
CPU_SET_S(cpus_to_pin[cpu],
cpu_set_size, cpu_set);
@@ -607,7 +616,7 @@ int main(int argc, char *argv[])
queue_path = malloc(strlen(option) + 2);
if (!queue_path) {
perror("malloc()");
- exit(1);
+ goto err_code;
}
queue_path[0] = '/';
queue_path[1] = 0;
@@ -622,17 +631,12 @@ int main(int argc, char *argv[])
fprintf(stderr, "Must pass at least one CPU to continuous "
"mode.\n");
poptPrintUsage(popt_context, stderr, 0);
- exit(1);
+ goto err_code;
} else if (!continuous_mode) {
num_cpus_to_pin = 1;
cpus_to_pin[0] = cpus_online - 1;
}
- if (getuid() != 0)
- ksft_exit_skip("Not running as root, but almost all tests "
- "require root in order to modify\nsystem settings. "
- "Exiting.\n");
-
max_msgs = fopen(MAX_MSGS, "r+");
max_msgsize = fopen(MAX_MSGSIZE, "r+");
if (!max_msgs)
@@ -740,4 +744,9 @@ int main(int argc, char *argv[])
sleep(1);
}
shutdown(0, "", 0);
+
+err_code:
+ CPU_FREE(cpu_set);
+ exit(1);
+
}
diff --git a/tools/testing/selftests/mqueue/setting b/tools/testing/selftests/mqueue/setting
new file mode 100644
index 000000000000..a953c96aa16e
--- /dev/null
+++ b/tools/testing/selftests/mqueue/setting
@@ -0,0 +1 @@
+timeout=180
diff --git a/tools/testing/selftests/nci/.gitignore b/tools/testing/selftests/nci/.gitignore
new file mode 100644
index 000000000000..448eeb4590fc
--- /dev/null
+++ b/tools/testing/selftests/nci/.gitignore
@@ -0,0 +1 @@
+/nci_dev
diff --git a/tools/testing/selftests/nci/Makefile b/tools/testing/selftests/nci/Makefile
new file mode 100644
index 000000000000..47669a1d6a59
--- /dev/null
+++ b/tools/testing/selftests/nci/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall
+LDFLAGS += -lpthread
+
+TEST_GEN_PROGS := nci_dev
+include ../lib.mk
diff --git a/tools/testing/selftests/nci/config b/tools/testing/selftests/nci/config
new file mode 100644
index 000000000000..b084e78276be
--- /dev/null
+++ b/tools/testing/selftests/nci/config
@@ -0,0 +1,3 @@
+CONFIG_NFC=y
+CONFIG_NFC_NCI=y
+CONFIG_NFC_VIRTUAL_NCI=y
diff --git a/tools/testing/selftests/nci/nci_dev.c b/tools/testing/selftests/nci/nci_dev.c
new file mode 100644
index 000000000000..1562aa7d60b0
--- /dev/null
+++ b/tools/testing/selftests/nci/nci_dev.c
@@ -0,0 +1,904 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021 Samsung Electrnoics
+ * Bongsu Jeon <bongsu.jeon@samsung.com>
+ *
+ * Test code for nci
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <linux/genetlink.h>
+#include <sys/socket.h>
+#include <linux/nfc.h>
+
+#include "../kselftest_harness.h"
+
+#define GENLMSG_DATA(glh) ((void *)(NLMSG_DATA(glh) + GENL_HDRLEN))
+#define GENLMSG_PAYLOAD(glh) (NLMSG_PAYLOAD(glh, 0) - GENL_HDRLEN)
+#define NLA_DATA(na) ((void *)((char *)(na) + NLA_HDRLEN))
+#define NLA_PAYLOAD(len) ((len) - NLA_HDRLEN)
+
+#define MAX_MSG_SIZE 1024
+
+#define IOCTL_GET_NCIDEV_IDX 0
+#define VIRTUAL_NFC_PROTOCOLS (NFC_PROTO_JEWEL_MASK | \
+ NFC_PROTO_MIFARE_MASK | \
+ NFC_PROTO_FELICA_MASK | \
+ NFC_PROTO_ISO14443_MASK | \
+ NFC_PROTO_ISO14443_B_MASK | \
+ NFC_PROTO_ISO15693_MASK)
+
+const __u8 nci_reset_cmd[] = {0x20, 0x00, 0x01, 0x01};
+const __u8 nci_init_cmd[] = {0x20, 0x01, 0x00};
+const __u8 nci_rf_discovery_cmd[] = {0x21, 0x03, 0x09, 0x04, 0x00, 0x01,
+ 0x01, 0x01, 0x02, 0x01, 0x06, 0x01};
+const __u8 nci_init_cmd_v2[] = {0x20, 0x01, 0x02, 0x00, 0x00};
+const __u8 nci_rf_disc_map_cmd[] = {0x21, 0x00, 0x07, 0x02, 0x04, 0x03,
+ 0x02, 0x05, 0x03, 0x03};
+const __u8 nci_rf_deact_cmd[] = {0x21, 0x06, 0x01, 0x00};
+const __u8 nci_reset_rsp[] = {0x40, 0x00, 0x03, 0x00, 0x10, 0x01};
+const __u8 nci_reset_rsp_v2[] = {0x40, 0x00, 0x01, 0x00};
+const __u8 nci_reset_ntf[] = {0x60, 0x00, 0x09, 0x02, 0x01, 0x20, 0x0e,
+ 0x04, 0x61, 0x00, 0x04, 0x02};
+const __u8 nci_init_rsp[] = {0x40, 0x01, 0x14, 0x00, 0x02, 0x0e, 0x02,
+ 0x00, 0x03, 0x01, 0x02, 0x03, 0x02, 0xc8,
+ 0x00, 0xff, 0x10, 0x00, 0x0e, 0x12, 0x00,
+ 0x00, 0x04};
+const __u8 nci_init_rsp_v2[] = {0x40, 0x01, 0x1c, 0x00, 0x1a, 0x7e, 0x06,
+ 0x00, 0x02, 0x92, 0x04, 0xff, 0xff, 0x01,
+ 0x00, 0x40, 0x06, 0x00, 0x00, 0x01, 0x01,
+ 0x00, 0x02, 0x00, 0x03, 0x01, 0x01, 0x06,
+ 0x00, 0x80, 0x00};
+const __u8 nci_rf_disc_map_rsp[] = {0x41, 0x00, 0x01, 0x00};
+const __u8 nci_rf_disc_rsp[] = {0x41, 0x03, 0x01, 0x00};
+const __u8 nci_rf_deact_rsp[] = {0x41, 0x06, 0x01, 0x00};
+const __u8 nci_rf_deact_ntf[] = {0x61, 0x06, 0x02, 0x00, 0x00};
+const __u8 nci_rf_activate_ntf[] = {0x61, 0x05, 0x1D, 0x01, 0x02, 0x04, 0x00,
+ 0xFF, 0xFF, 0x0C, 0x44, 0x03, 0x07, 0x04,
+ 0x62, 0x26, 0x11, 0x80, 0x1D, 0x80, 0x01,
+ 0x20, 0x00, 0x00, 0x00, 0x06, 0x05, 0x75,
+ 0x77, 0x81, 0x02, 0x80};
+const __u8 nci_t4t_select_cmd[] = {0x00, 0x00, 0x0C, 0x00, 0xA4, 0x04, 0x00,
+ 0x07, 0xD2, 0x76, 0x00, 0x00, 0x85, 0x01, 0x01};
+const __u8 nci_t4t_select_cmd2[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x03};
+const __u8 nci_t4t_select_cmd3[] = {0x00, 0x00, 0x07, 0x00, 0xA4, 0x00, 0x0C, 0x02,
+ 0xE1, 0x04};
+const __u8 nci_t4t_read_cmd[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x0F};
+const __u8 nci_t4t_read_rsp[] = {0x00, 0x00, 0x11, 0x00, 0x0F, 0x20, 0x00, 0x3B,
+ 0x00, 0x34, 0x04, 0x06, 0xE1, 0x04, 0x08, 0x00,
+ 0x00, 0x00, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd2[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x00, 0x02};
+const __u8 nci_t4t_read_rsp2[] = {0x00, 0x00, 0x04, 0x00, 0x0F, 0x90, 0x00};
+const __u8 nci_t4t_read_cmd3[] = {0x00, 0x00, 0x05, 0x00, 0xB0, 0x00, 0x02, 0x0F};
+const __u8 nci_t4t_read_rsp3[] = {0x00, 0x00, 0x11, 0xD1, 0x01, 0x0B, 0x54, 0x02,
+ 0x65, 0x6E, 0x4E, 0x46, 0x43, 0x20, 0x54, 0x45,
+ 0x53, 0x54, 0x90, 0x00};
+const __u8 nci_t4t_rsp_ok[] = {0x00, 0x00, 0x02, 0x90, 0x00};
+
+struct msgtemplate {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[MAX_MSG_SIZE];
+};
+
+static int create_nl_socket(void)
+{
+ int fd;
+ struct sockaddr_nl local;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ if (fd < 0)
+ return -1;
+
+ memset(&local, 0, sizeof(local));
+ local.nl_family = AF_NETLINK;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0)
+ goto error;
+
+ return fd;
+error:
+ close(fd);
+ return -1;
+}
+
+static int send_cmd_mt_nla(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u8 genl_cmd, int nla_num, __u16 nla_type[],
+ void *nla_data[], int nla_len[], __u16 flags)
+{
+ struct sockaddr_nl nladdr;
+ struct msgtemplate msg;
+ struct nlattr *na;
+ int cnt, prv_len;
+ int r, buflen;
+ char *buf;
+
+ msg.n.nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN);
+ msg.n.nlmsg_type = nlmsg_type;
+ msg.n.nlmsg_flags = flags;
+ msg.n.nlmsg_seq = 0;
+ msg.n.nlmsg_pid = nlmsg_pid;
+ msg.g.cmd = genl_cmd;
+ msg.g.version = 0x1;
+
+ prv_len = 0;
+ for (cnt = 0; cnt < nla_num; cnt++) {
+ na = (struct nlattr *)(GENLMSG_DATA(&msg) + prv_len);
+ na->nla_type = nla_type[cnt];
+ na->nla_len = nla_len[cnt] + NLA_HDRLEN;
+
+ if (nla_len[cnt] > 0)
+ memcpy(NLA_DATA(na), nla_data[cnt], nla_len[cnt]);
+
+ prv_len = NLA_ALIGN(nla_len[cnt]) + NLA_HDRLEN;
+ msg.n.nlmsg_len += prv_len;
+ }
+
+ buf = (char *)&msg;
+ buflen = msg.n.nlmsg_len;
+ memset(&nladdr, 0, sizeof(nladdr));
+ nladdr.nl_family = AF_NETLINK;
+
+ while ((r = sendto(sd, buf, buflen, 0, (struct sockaddr *)&nladdr,
+ sizeof(nladdr))) < buflen) {
+ if (r > 0) {
+ buf += r;
+ buflen -= r;
+ } else if (errno != EAGAIN) {
+ return -1;
+ }
+ }
+ return 0;
+}
+
+static int send_get_nfc_family(int sd, __u32 pid)
+{
+ __u16 nla_get_family_type = CTRL_ATTR_FAMILY_NAME;
+ void *nla_get_family_data;
+ int nla_get_family_len;
+ char family_name[100];
+
+ nla_get_family_len = strlen(NFC_GENL_NAME) + 1;
+ strcpy(family_name, NFC_GENL_NAME);
+ nla_get_family_data = family_name;
+
+ return send_cmd_mt_nla(sd, GENL_ID_CTRL, pid, CTRL_CMD_GETFAMILY,
+ 1, &nla_get_family_type, &nla_get_family_data,
+ &nla_get_family_len, NLM_F_REQUEST);
+}
+
+static int get_family_id(int sd, __u32 pid, __u32 *event_group)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[512];
+ } ans;
+ struct nlattr *na;
+ int resp_len;
+ __u16 id;
+ int len;
+ int rc;
+
+ rc = send_get_nfc_family(sd, pid);
+
+ if (rc < 0)
+ return 0;
+
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
+
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
+ return 0;
+
+ len = 0;
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
+ na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+ if (na->nla_type == CTRL_ATTR_FAMILY_ID) {
+ id = *(__u16 *)NLA_DATA(na);
+ } else if (na->nla_type == CTRL_ATTR_MCAST_GROUPS) {
+ struct nlattr *nested_na;
+ struct nlattr *group_na;
+ int group_attr_len;
+ int group_attr;
+
+ nested_na = (struct nlattr *)((char *)na + NLA_HDRLEN);
+ group_na = (struct nlattr *)((char *)nested_na + NLA_HDRLEN);
+ group_attr_len = 0;
+
+ for (group_attr = CTRL_ATTR_MCAST_GRP_UNSPEC;
+ group_attr < CTRL_ATTR_MCAST_GRP_MAX; group_attr++) {
+ if (group_na->nla_type == CTRL_ATTR_MCAST_GRP_ID) {
+ *event_group = *(__u32 *)((char *)group_na +
+ NLA_HDRLEN);
+ break;
+ }
+
+ group_attr_len += NLA_ALIGN(group_na->nla_len) +
+ NLA_HDRLEN;
+ if (group_attr_len >= nested_na->nla_len)
+ break;
+
+ group_na = (struct nlattr *)((char *)group_na +
+ NLA_ALIGN(group_na->nla_len));
+ }
+ }
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
+ return id;
+}
+
+static int send_cmd_with_idx(int sd, __u16 nlmsg_type, __u32 nlmsg_pid,
+ __u8 genl_cmd, int dev_id)
+{
+ __u16 nla_type = NFC_ATTR_DEVICE_INDEX;
+ void *nla_data = &dev_id;
+ int nla_len = 4;
+
+ return send_cmd_mt_nla(sd, nlmsg_type, nlmsg_pid, genl_cmd, 1,
+ &nla_type, &nla_data, &nla_len, NLM_F_REQUEST);
+}
+
+static int get_nci_devid(int sd, __u16 fid, __u32 pid, int dev_id, struct msgtemplate *msg)
+{
+ int rc, resp_len;
+
+ rc = send_cmd_with_idx(sd, fid, pid, NFC_CMD_GET_DEVICE, dev_id);
+ if (rc < 0) {
+ rc = -1;
+ goto error;
+ }
+
+ resp_len = recv(sd, msg, sizeof(*msg), 0);
+ if (resp_len < 0) {
+ rc = -2;
+ goto error;
+ }
+
+ if (msg->n.nlmsg_type == NLMSG_ERROR ||
+ !NLMSG_OK(&msg->n, resp_len)) {
+ rc = -3;
+ goto error;
+ }
+
+ return 0;
+error:
+ return rc;
+}
+
+static __u8 get_dev_enable_state(struct msgtemplate *msg)
+{
+ struct nlattr *na;
+ int resp_len;
+ int len;
+
+ resp_len = GENLMSG_PAYLOAD(&msg->n);
+ na = (struct nlattr *)GENLMSG_DATA(msg);
+ len = 0;
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+ if (na->nla_type == NFC_ATTR_DEVICE_POWERED)
+ return *(char *)NLA_DATA(na);
+ na = (struct nlattr *)(GENLMSG_DATA(msg) + len);
+ }
+
+ return resp_len;
+}
+
+FIXTURE(NCI) {
+ int virtual_nci_fd;
+ bool open_state;
+ int dev_idex;
+ bool isNCI2;
+ int proto;
+ __u32 pid;
+ __u16 fid;
+ int sd;
+};
+
+FIXTURE_VARIANT(NCI) {
+ bool isNCI2;
+};
+
+FIXTURE_VARIANT_ADD(NCI, NCI1_0) {
+ .isNCI2 = false,
+};
+
+FIXTURE_VARIANT_ADD(NCI, NCI2_0) {
+ .isNCI2 = true,
+};
+
+static void *virtual_dev_open(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_reset_cmd))
+ goto error;
+ if (memcmp(nci_reset_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_init_cmd))
+ goto error;
+ if (memcmp(nci_init_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_init_rsp, sizeof(nci_init_rsp));
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_rf_disc_map_cmd))
+ goto error;
+ if (memcmp(nci_rf_disc_map_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+static void *virtual_dev_open_v2(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_reset_cmd))
+ goto error;
+ if (memcmp(nci_reset_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
+ write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_init_cmd_v2))
+ goto error;
+ if (memcmp(nci_init_cmd_v2, buf, len))
+ goto error;
+ write(dev_fd, nci_init_rsp_v2, sizeof(nci_init_rsp_v2));
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_rf_disc_map_cmd))
+ goto error;
+ if (memcmp(nci_rf_disc_map_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_rf_disc_map_rsp, sizeof(nci_rf_disc_map_rsp));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+FIXTURE_SETUP(NCI)
+{
+ struct msgtemplate msg;
+ pthread_t thread_t;
+ __u32 event_group;
+ int status;
+ int rc;
+
+ self->open_state = false;
+ self->proto = VIRTUAL_NFC_PROTOCOLS;
+ self->isNCI2 = variant->isNCI2;
+
+ self->sd = create_nl_socket();
+ ASSERT_NE(self->sd, -1);
+
+ self->pid = getpid();
+ self->fid = get_family_id(self->sd, self->pid, &event_group);
+ ASSERT_NE(self->fid, -1);
+
+ self->virtual_nci_fd = open("/dev/virtual_nci", O_RDWR);
+ ASSERT_GT(self->virtual_nci_fd, -1);
+
+ rc = setsockopt(self->sd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &event_group,
+ sizeof(event_group));
+ ASSERT_NE(rc, -1);
+
+ rc = ioctl(self->virtual_nci_fd, IOCTL_GET_NCIDEV_IDX, &self->dev_idex);
+ ASSERT_EQ(rc, 0);
+
+ rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex, &msg);
+ ASSERT_EQ(rc, 0);
+ EXPECT_EQ(get_dev_enable_state(&msg), 0);
+
+ if (self->isNCI2)
+ rc = pthread_create(&thread_t, NULL, virtual_dev_open_v2,
+ (void *)&self->virtual_nci_fd);
+ else
+ rc = pthread_create(&thread_t, NULL, virtual_dev_open,
+ (void *)&self->virtual_nci_fd);
+ ASSERT_GT(rc, -1);
+
+ rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
+ NFC_CMD_DEV_UP, self->dev_idex);
+ EXPECT_EQ(rc, 0);
+
+ pthread_join(thread_t, (void **)&status);
+ ASSERT_EQ(status, 0);
+ self->open_state = true;
+}
+
+static void *virtual_deinit(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_reset_cmd))
+ goto error;
+ if (memcmp(nci_reset_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_reset_rsp, sizeof(nci_reset_rsp));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+static void *virtual_deinit_v2(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_reset_cmd))
+ goto error;
+ if (memcmp(nci_reset_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_reset_rsp_v2, sizeof(nci_reset_rsp_v2));
+ write(dev_fd, nci_reset_ntf, sizeof(nci_reset_ntf));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+FIXTURE_TEARDOWN(NCI)
+{
+ pthread_t thread_t;
+ int status;
+ int rc;
+
+ if (self->open_state) {
+ if (self->isNCI2)
+ rc = pthread_create(&thread_t, NULL,
+ virtual_deinit_v2,
+ (void *)&self->virtual_nci_fd);
+ else
+ rc = pthread_create(&thread_t, NULL, virtual_deinit,
+ (void *)&self->virtual_nci_fd);
+
+ ASSERT_GT(rc, -1);
+ rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
+ NFC_CMD_DEV_DOWN, self->dev_idex);
+ EXPECT_EQ(rc, 0);
+
+ pthread_join(thread_t, (void **)&status);
+ ASSERT_EQ(status, 0);
+ }
+
+ close(self->sd);
+ close(self->virtual_nci_fd);
+ self->open_state = false;
+}
+
+TEST_F(NCI, init)
+{
+ struct msgtemplate msg;
+ int rc;
+
+ rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex,
+ &msg);
+ ASSERT_EQ(rc, 0);
+ EXPECT_EQ(get_dev_enable_state(&msg), 1);
+}
+
+static void *virtual_poll_start(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_rf_discovery_cmd))
+ goto error;
+ if (memcmp(nci_rf_discovery_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_rf_disc_rsp, sizeof(nci_rf_disc_rsp));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+static void *virtual_poll_stop(void *data)
+{
+ char buf[258];
+ int dev_fd;
+ int len;
+
+ dev_fd = *(int *)data;
+
+ len = read(dev_fd, buf, 258);
+ if (len <= 0)
+ goto error;
+ if (len != sizeof(nci_rf_deact_cmd))
+ goto error;
+ if (memcmp(nci_rf_deact_cmd, buf, len))
+ goto error;
+ write(dev_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+
+ return (void *)0;
+error:
+ return (void *)-1;
+}
+
+int start_polling(int dev_idx, int proto, int virtual_fd, int sd, int fid, int pid)
+{
+ __u16 nla_start_poll_type[2] = {NFC_ATTR_DEVICE_INDEX,
+ NFC_ATTR_PROTOCOLS};
+ void *nla_start_poll_data[2] = {&dev_idx, &proto};
+ int nla_start_poll_len[2] = {4, 4};
+ pthread_t thread_t;
+ int status;
+ int rc;
+
+ rc = pthread_create(&thread_t, NULL, virtual_poll_start,
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
+
+ rc = send_cmd_mt_nla(sd, fid, pid, NFC_CMD_START_POLL, 2, nla_start_poll_type,
+ nla_start_poll_data, nla_start_poll_len, NLM_F_REQUEST);
+ if (rc != 0)
+ return rc;
+
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+int stop_polling(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ pthread_t thread_t;
+ int status;
+ int rc;
+
+ rc = pthread_create(&thread_t, NULL, virtual_poll_stop,
+ (void *)&virtual_fd);
+ if (rc < 0)
+ return rc;
+
+ rc = send_cmd_with_idx(sd, fid, pid,
+ NFC_CMD_STOP_POLL, dev_idx);
+ if (rc != 0)
+ return rc;
+
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, start_poll)
+{
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ status = stop_polling(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+}
+
+int get_taginfo(int dev_idx, int sd, int fid, int pid)
+{
+ struct {
+ struct nlmsghdr n;
+ struct genlmsghdr g;
+ char buf[512];
+ } ans;
+
+ struct nlattr *na;
+ __u32 protocol;
+ int targetidx;
+ __u8 sel_res;
+ int resp_len;
+ int len;
+
+ __u16 tagid_type;
+ void *tagid_type_data;
+ int tagid_len;
+
+ tagid_type = NFC_ATTR_DEVICE_INDEX;
+ tagid_type_data = &dev_idx;
+ tagid_len = 4;
+
+ send_cmd_mt_nla(sd, fid, pid, NFC_CMD_GET_TARGET, 1, &tagid_type,
+ &tagid_type_data, &tagid_len, NLM_F_REQUEST | NLM_F_DUMP);
+ resp_len = recv(sd, &ans, sizeof(ans), 0);
+ if (ans.n.nlmsg_type == NLMSG_ERROR || resp_len < 0 ||
+ !NLMSG_OK(&ans.n, resp_len))
+ return -1;
+
+ resp_len = GENLMSG_PAYLOAD(&ans.n);
+ na = (struct nlattr *)GENLMSG_DATA(&ans);
+
+ len = 0;
+ targetidx = -1;
+ protocol = -1;
+ sel_res = -1;
+
+ while (len < resp_len) {
+ len += NLA_ALIGN(na->nla_len);
+
+ if (na->nla_type == NFC_ATTR_TARGET_INDEX)
+ targetidx = *(int *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_TARGET_SEL_RES)
+ sel_res = *(__u8 *)((char *)na + NLA_HDRLEN);
+ else if (na->nla_type == NFC_ATTR_PROTOCOLS)
+ protocol = *(__u32 *)((char *)na + NLA_HDRLEN);
+
+ na = (struct nlattr *)(GENLMSG_DATA(&ans) + len);
+ }
+
+ if (targetidx == -1 || sel_res != 0x20 || protocol != NFC_PROTO_ISO14443_MASK)
+ return -1;
+
+ return targetidx;
+}
+
+int connect_socket(int dev_idx, int target_idx)
+{
+ struct sockaddr_nfc addr;
+ int sock;
+ int err = 0;
+
+ sock = socket(AF_NFC, SOCK_SEQPACKET, NFC_SOCKPROTO_RAW);
+ if (sock == -1)
+ return -1;
+
+ addr.sa_family = AF_NFC;
+ addr.dev_idx = dev_idx;
+ addr.target_idx = target_idx;
+ addr.nfc_protocol = NFC_PROTO_ISO14443;
+
+ err = connect(sock, (struct sockaddr *)&addr, sizeof(addr));
+ if (err) {
+ close(sock);
+ return -1;
+ }
+
+ return sock;
+}
+
+int connect_tag(int dev_idx, int virtual_fd, int sd, int fid, int pid)
+{
+ struct genlmsghdr *genlhdr;
+ struct nlattr *na;
+ char evt_data[255];
+ int target_idx;
+ int resp_len;
+ int evt_dev;
+
+ write(virtual_fd, nci_rf_activate_ntf, sizeof(nci_rf_activate_ntf));
+ resp_len = recv(sd, evt_data, sizeof(evt_data), 0);
+ if (resp_len < 0)
+ return -1;
+
+ genlhdr = (struct genlmsghdr *)((struct nlmsghdr *)evt_data + 1);
+ na = (struct nlattr *)(genlhdr + 1);
+ evt_dev = *(int *)((char *)na + NLA_HDRLEN);
+ if (dev_idx != evt_dev)
+ return -1;
+
+ target_idx = get_taginfo(dev_idx, sd, fid, pid);
+ if (target_idx == -1)
+ return -1;
+ return connect_socket(dev_idx, target_idx);
+}
+
+int read_write_nci_cmd(int nfc_sock, int virtual_fd, const __u8 *cmd, __u32 cmd_len,
+ const __u8 *rsp, __u32 rsp_len)
+{
+ char buf[256];
+ int len;
+
+ send(nfc_sock, &cmd[3], cmd_len - 3, 0);
+ len = read(virtual_fd, buf, cmd_len);
+ if (len < 0 || memcmp(buf, cmd, cmd_len))
+ return -1;
+
+ write(virtual_fd, rsp, rsp_len);
+ len = recv(nfc_sock, buf, rsp_len - 2, 0);
+ if (len < 0 || memcmp(&buf[1], &rsp[3], rsp_len - 3))
+ return -1;
+
+ return 0;
+}
+
+int read_tag(int nfc_sock, int virtual_fd)
+{
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd,
+ sizeof(nci_t4t_select_cmd), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd2,
+ sizeof(nci_t4t_select_cmd2), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd,
+ sizeof(nci_t4t_read_cmd), nci_t4t_read_rsp,
+ sizeof(nci_t4t_read_rsp)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_select_cmd3,
+ sizeof(nci_t4t_select_cmd3), nci_t4t_rsp_ok,
+ sizeof(nci_t4t_rsp_ok)))
+ return -1;
+
+ if (read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd2,
+ sizeof(nci_t4t_read_cmd2), nci_t4t_read_rsp2,
+ sizeof(nci_t4t_read_rsp2)))
+ return -1;
+
+ return read_write_nci_cmd(nfc_sock, virtual_fd, nci_t4t_read_cmd3,
+ sizeof(nci_t4t_read_cmd3), nci_t4t_read_rsp3,
+ sizeof(nci_t4t_read_rsp3));
+}
+
+static void *virtual_deactivate_proc(void *data)
+{
+ int virtual_fd;
+ char buf[256];
+ int deactcmd_len;
+ int len;
+
+ virtual_fd = *(int *)data;
+ deactcmd_len = sizeof(nci_rf_deact_cmd);
+ len = read(virtual_fd, buf, deactcmd_len);
+ if (len != deactcmd_len || memcmp(buf, nci_rf_deact_cmd, deactcmd_len))
+ return (void *)-1;
+
+ write(virtual_fd, nci_rf_deact_rsp, sizeof(nci_rf_deact_rsp));
+ write(virtual_fd, nci_rf_deact_ntf, sizeof(nci_rf_deact_ntf));
+
+ return (void *)0;
+}
+
+int disconnect_tag(int nfc_sock, int virtual_fd)
+{
+ pthread_t thread_t;
+ char buf[256];
+ int status;
+ int len;
+
+ send(nfc_sock, &nci_t4t_select_cmd3[3], sizeof(nci_t4t_select_cmd3) - 3, 0);
+ len = read(virtual_fd, buf, sizeof(nci_t4t_select_cmd3));
+ if (len < 0 || memcmp(buf, nci_t4t_select_cmd3, sizeof(nci_t4t_select_cmd3)))
+ return -1;
+
+ len = recv(nfc_sock, buf, sizeof(nci_t4t_rsp_ok), 0);
+ if (len != -1)
+ return -1;
+
+ status = pthread_create(&thread_t, NULL, virtual_deactivate_proc,
+ (void *)&virtual_fd);
+
+ close(nfc_sock);
+ pthread_join(thread_t, (void **)&status);
+ return status;
+}
+
+TEST_F(NCI, t4t_tag_read)
+{
+ int nfc_sock;
+ int status;
+
+ status = start_polling(self->dev_idex, self->proto, self->virtual_nci_fd,
+ self->sd, self->fid, self->pid);
+ EXPECT_EQ(status, 0);
+
+ nfc_sock = connect_tag(self->dev_idex, self->virtual_nci_fd, self->sd,
+ self->fid, self->pid);
+ ASSERT_GT(nfc_sock, -1);
+
+ status = read_tag(nfc_sock, self->virtual_nci_fd);
+ ASSERT_EQ(status, 0);
+
+ status = disconnect_tag(nfc_sock, self->virtual_nci_fd);
+ EXPECT_EQ(status, 0);
+}
+
+TEST_F(NCI, deinit)
+{
+ struct msgtemplate msg;
+ pthread_t thread_t;
+ int status;
+ int rc;
+
+ rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex,
+ &msg);
+ ASSERT_EQ(rc, 0);
+ EXPECT_EQ(get_dev_enable_state(&msg), 1);
+
+ if (self->isNCI2)
+ rc = pthread_create(&thread_t, NULL, virtual_deinit_v2,
+ (void *)&self->virtual_nci_fd);
+ else
+ rc = pthread_create(&thread_t, NULL, virtual_deinit,
+ (void *)&self->virtual_nci_fd);
+ ASSERT_GT(rc, -1);
+
+ rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
+ NFC_CMD_DEV_DOWN, self->dev_idex);
+ EXPECT_EQ(rc, 0);
+
+ pthread_join(thread_t, (void **)&status);
+ self->open_state = 0;
+ ASSERT_EQ(status, 0);
+
+ rc = get_nci_devid(self->sd, self->fid, self->pid, self->dev_idex,
+ &msg);
+ ASSERT_EQ(rc, 0);
+ EXPECT_EQ(get_dev_enable_state(&msg), 0);
+
+ /* Test that operations that normally send packets to the driver
+ * don't cause issues when the device is already closed.
+ * Note: the send of NFC_CMD_DEV_UP itself still succeeds it's just
+ * that the device won't actually be up.
+ */
+ close(self->virtual_nci_fd);
+ self->virtual_nci_fd = -1;
+ rc = send_cmd_with_idx(self->sd, self->fid, self->pid,
+ NFC_CMD_DEV_UP, self->dev_idex);
+ EXPECT_EQ(rc, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index 742c499328b2..2f9d378edec3 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,31 +1,56 @@
# SPDX-License-Identifier: GPL-2.0-only
+bind_bhash
+bind_timewait
+bind_wildcard
+csum
+cmsg_sender
+diag_uid
+fin_ack_lat
+gro
+hwtstamp_config
+io_uring_zerocopy_tx
+ioam6_parser
+ip_defrag
+ip_local_port_range
+ipsec
+ipv6_flowlabel
+ipv6_flowlabel_mgr
+log.txt
msg_zerocopy
-socket
+nettest
psock_fanout
psock_snd
psock_tpacket
+reuseaddr_conflict
+reuseaddr_ports_exhausted
reuseport_addr_any
reuseport_bpf
reuseport_bpf_cpu
reuseport_bpf_numa
reuseport_dualstack
-reuseaddr_conflict
-tcp_mmap
-udpgso
-udpgso_bench_rx
-udpgso_bench_tx
-tcp_inq
-tls
-txring_overwrite
-ip_defrag
-ipv6_flowlabel
-ipv6_flowlabel_mgr
+rxtimestamp
+sctp_hello
+scm_pidfd
+sk_bind_sendto_listen
+sk_connect_zero_addr
+socket
+so_incoming_cpu
+so_netns_cookie
so_txtime
+stress_reuseport_listen
+tap
tcp_fastopen_backup_key
-nettest
-fin_ack_lat
-reuseaddr_ports_exhausted
-hwtstamp_config
-rxtimestamp
+tcp_inq
+tcp_mmap
+test_unix_oob
timestamping
+tls
+toeplitz
+tools
+tun
+txring_overwrite
txtimestamp
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
+unix_connect
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 895ec992b2f1..7b6918d5f4af 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -2,7 +2,9 @@
# Makefile for net selftests
CFLAGS = -Wall -Wl,--no-as-needed -O2 -g
-CFLAGS += -I../../../../usr/include/
+CFLAGS += -I../../../../usr/include/ $(KHDR_INCLUDES)
+# Additional include paths needed by kselftest.h
+CFLAGS += -I../
TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh \
rtnetlink.sh xfrm_policy.sh test_blackhole_dev.sh
@@ -11,28 +13,143 @@ TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
TEST_PROGS += udpgro_bench.sh udpgro.sh test_vxlan_under_vrf.sh reuseport_addr_any.sh
TEST_PROGS += test_vxlan_fdb_changelink.sh so_txtime.sh ipv6_flowlabel.sh
TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh
-TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh
-TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
+TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh fib_nexthop_nongw.sh
+TEST_PROGS += altnames.sh icmp.sh icmp_redirect.sh ip6_gre_headroom.sh
TEST_PROGS += route_localnet.sh
TEST_PROGS += reuseaddr_ports_exhausted.sh
TEST_PROGS += txtimestamp.sh
TEST_PROGS += vrf-xfrm-tests.sh
-TEST_PROGS_EXTENDED := in_netns.sh
+TEST_PROGS += rxtimestamp.sh
+TEST_PROGS += devlink_port_split.py
+TEST_PROGS += drop_monitor_tests.sh
+TEST_PROGS += vrf_route_leaking.sh
+TEST_PROGS += bareudp.sh
+TEST_PROGS += amt.sh
+TEST_PROGS += unicast_extensions.sh
+TEST_PROGS += udpgro_fwd.sh
+TEST_PROGS += udpgro_frglist.sh
+TEST_PROGS += veth.sh
+TEST_PROGS += ioam6.sh
+TEST_PROGS += gro.sh
+TEST_PROGS += gre_gso.sh
+TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
+TEST_PROGS += netns-name.sh
+TEST_PROGS += srv6_end_dt46_l3vpn_test.sh
+TEST_PROGS += srv6_end_dt4_l3vpn_test.sh
+TEST_PROGS += srv6_end_dt6_l3vpn_test.sh
+TEST_PROGS += srv6_hencap_red_l3vpn_test.sh
+TEST_PROGS += srv6_hl2encap_red_l2vpn_test.sh
+TEST_PROGS += srv6_end_next_csid_l3vpn_test.sh
+TEST_PROGS += srv6_end_x_next_csid_l3vpn_test.sh
+TEST_PROGS += srv6_end_flavors_test.sh
+TEST_PROGS += vrf_strict_mode_test.sh
+TEST_PROGS += arp_ndisc_evict_nocarrier.sh
+TEST_PROGS += ndisc_unsolicited_na_test.sh
+TEST_PROGS += arp_ndisc_untracked_subnets.sh
+TEST_PROGS += stress_reuseport_listen.sh
+TEST_PROGS += l2_tos_ttl_inherit.sh
+TEST_PROGS += bind_bhash.sh
+TEST_PROGS += ip_local_port_range.sh
+TEST_PROGS += rps_default_mask.sh
+TEST_PROGS += big_tcp.sh
+TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh
TEST_GEN_FILES = socket nettest
TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite
TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag
-TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr
+TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr so_netns_cookie
TEST_GEN_FILES += tcp_fastopen_backup_key
TEST_GEN_FILES += fin_ack_lat
TEST_GEN_FILES += reuseaddr_ports_exhausted
TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp
+TEST_GEN_FILES += ipsec
+TEST_GEN_FILES += ioam6_parser
+TEST_GEN_FILES += gro
TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
-TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls
+TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap
+TEST_GEN_FILES += toeplitz
+TEST_GEN_FILES += cmsg_sender
+TEST_GEN_FILES += stress_reuseport_listen
+TEST_PROGS += test_vxlan_vnifiltering.sh
+TEST_GEN_FILES += io_uring_zerocopy_tx
+TEST_PROGS += io_uring_zerocopy_tx.sh
+TEST_GEN_FILES += bind_bhash
+TEST_GEN_PROGS += sk_bind_sendto_listen
+TEST_GEN_PROGS += sk_connect_zero_addr
+TEST_PROGS += test_ingress_egress_chaining.sh
+TEST_GEN_PROGS += so_incoming_cpu
+TEST_PROGS += sctp_vrf.sh
+TEST_GEN_FILES += sctp_hello
+TEST_GEN_FILES += csum
+TEST_GEN_FILES += nat6to4.o
+TEST_GEN_FILES += xdp_dummy.o
+TEST_GEN_FILES += ip_local_port_range
+TEST_GEN_FILES += bind_wildcard
+TEST_PROGS += test_vxlan_mdb.sh
+TEST_PROGS += test_bridge_neigh_suppress.sh
+TEST_PROGS += test_vxlan_nolocalbypass.sh
+TEST_PROGS += test_bridge_backup_port.sh
+TEST_PROGS += fdb_flush.sh
+TEST_PROGS += fq_band_pktlimit.sh
+TEST_PROGS += vlan_hw_filter.sh
+
+TEST_FILES := settings
+TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh
+
+TEST_INCLUDES := forwarding/lib.sh
-KSFT_KHDR_INSTALL := 1
include ../lib.mk
$(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma
-$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread
+$(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto
$(OUTPUT)/tcp_inq: LDLIBS += -lpthread
+$(OUTPUT)/bind_bhash: LDLIBS += -lpthread
+$(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/
+
+# Rules to generate bpf objs
+CLANG ?= clang
+SCRATCH_DIR := $(OUTPUT)/tools
+BUILD_DIR := $(SCRATCH_DIR)/build
+BPFDIR := $(abspath ../../../lib/bpf)
+APIDIR := $(abspath ../../../include/uapi)
+
+CCINCLUDE += -I../bpf
+CCINCLUDE += -I../../../../usr/include/
+CCINCLUDE += -I$(SCRATCH_DIR)/include
+
+BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a
+
+MAKE_DIRS := $(BUILD_DIR)/libbpf
+$(MAKE_DIRS):
+ mkdir -p $@
+
+# Get Clang's default includes on this system, as opposed to those seen by
+# '--target=bpf'. This fixes "missing" files on some architectures/distros,
+# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc.
+#
+# Use '-idirafter': Don't interfere with include mechanics except where the
+# build would have failed anyways.
+define get_sys_includes
+$(shell $(1) $(2) -v -E - </dev/null 2>&1 \
+ | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \
+$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}')
+endef
+
+ifneq ($(CROSS_COMPILE),)
+CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%))
+endif
+
+CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH))
+
+$(OUTPUT)/nat6to4.o $(OUTPUT)/xdp_dummy.o: $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS)
+ $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@
+
+$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \
+ $(APIDIR)/linux/bpf.h \
+ | $(BUILD_DIR)/libbpf
+ $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \
+ EXTRA_CFLAGS='-g -O0' \
+ DESTDIR=$(SCRATCH_DIR) prefix= all install_headers
+
+EXTRA_CLEAN := $(SCRATCH_DIR)
diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile
new file mode 100644
index 000000000000..221c387a7d7f
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/Makefile
@@ -0,0 +1,4 @@
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c
new file mode 100644
index 000000000000..79a3dd75590e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/diag_uid.c
@@ -0,0 +1,177 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <unistd.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <linux/sock_diag.h>
+#include <linux/unix_diag.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(diag_uid)
+{
+ int netlink_fd;
+ int unix_fd;
+ __u32 inode;
+ __u64 cookie;
+};
+
+FIXTURE_VARIANT(diag_uid)
+{
+ int unshare;
+ int udiag_show;
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid)
+{
+ .unshare = 0,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_VARIANT_ADD(diag_uid, uid_unshare)
+{
+ .unshare = CLONE_NEWUSER,
+ .udiag_show = UDIAG_SHOW_UID
+};
+
+FIXTURE_SETUP(diag_uid)
+{
+ struct stat file_stat;
+ socklen_t optlen;
+ int ret;
+
+ if (variant->unshare)
+ ASSERT_EQ(unshare(variant->unshare), 0);
+
+ self->netlink_fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ ASSERT_NE(self->netlink_fd, -1);
+
+ self->unix_fd = socket(AF_UNIX, SOCK_STREAM, 0);
+ ASSERT_NE(self->unix_fd, -1);
+
+ ret = fstat(self->unix_fd, &file_stat);
+ ASSERT_EQ(ret, 0);
+
+ self->inode = file_stat.st_ino;
+
+ optlen = sizeof(self->cookie);
+ ret = getsockopt(self->unix_fd, SOL_SOCKET, SO_COOKIE, &self->cookie, &optlen);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(diag_uid)
+{
+ close(self->netlink_fd);
+ close(self->unix_fd);
+}
+
+int send_request(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self,
+ const FIXTURE_VARIANT(diag_uid) *variant)
+{
+ struct {
+ struct nlmsghdr nlh;
+ struct unix_diag_req udr;
+ } req = {
+ .nlh = {
+ .nlmsg_len = sizeof(req),
+ .nlmsg_type = SOCK_DIAG_BY_FAMILY,
+ .nlmsg_flags = NLM_F_REQUEST
+ },
+ .udr = {
+ .sdiag_family = AF_UNIX,
+ .udiag_ino = self->inode,
+ .udiag_cookie = {
+ (__u32)self->cookie,
+ (__u32)(self->cookie >> 32)
+ },
+ .udiag_show = variant->udiag_show
+ }
+ };
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = &req,
+ .iov_len = sizeof(req)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+
+ return sendmsg(self->netlink_fd, &msg, 0);
+}
+
+void render_response(struct __test_metadata *_metadata,
+ struct unix_diag_req *udr, __u32 len)
+{
+ unsigned int rta_len = len - NLMSG_LENGTH(sizeof(*udr));
+ struct rtattr *attr;
+ uid_t uid;
+
+ ASSERT_GT(len, sizeof(*udr));
+ ASSERT_EQ(udr->sdiag_family, AF_UNIX);
+
+ attr = (struct rtattr *)(udr + 1);
+ ASSERT_NE(RTA_OK(attr, rta_len), 0);
+ ASSERT_EQ(attr->rta_type, UNIX_DIAG_UID);
+
+ uid = *(uid_t *)RTA_DATA(attr);
+ ASSERT_EQ(uid, getuid());
+}
+
+void receive_response(struct __test_metadata *_metadata,
+ FIXTURE_DATA(diag_uid) *self)
+{
+ long buf[8192 / sizeof(long)];
+ struct sockaddr_nl nladdr = {
+ .nl_family = AF_NETLINK
+ };
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = sizeof(buf)
+ };
+ struct msghdr msg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1
+ };
+ struct nlmsghdr *nlh;
+ int ret;
+
+ ret = recvmsg(self->netlink_fd, &msg, 0);
+ ASSERT_GT(ret, 0);
+
+ nlh = (struct nlmsghdr *)buf;
+ ASSERT_NE(NLMSG_OK(nlh, ret), 0);
+ ASSERT_EQ(nlh->nlmsg_type, SOCK_DIAG_BY_FAMILY);
+
+ render_response(_metadata, NLMSG_DATA(nlh), nlh->nlmsg_len);
+
+ nlh = NLMSG_NEXT(nlh, ret);
+ ASSERT_EQ(NLMSG_OK(nlh, ret), 0);
+}
+
+TEST_F(diag_uid, 1)
+{
+ int ret;
+
+ ret = send_request(_metadata, self, variant);
+ ASSERT_GT(ret, 0);
+
+ receive_response(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c
new file mode 100644
index 000000000000..7e534594167e
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c
@@ -0,0 +1,429 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+#define _GNU_SOURCE
+#include <error.h>
+#include <limits.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <linux/socket.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "../../kselftest_harness.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) \
+ fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", __FILE__, __LINE__, \
+ clean_errno(), ##__VA_ARGS__)
+
+#ifndef SCM_PIDFD
+#define SCM_PIDFD 0x04
+#endif
+
+static void child_die()
+{
+ exit(1);
+}
+
+static int safe_int(const char *numstr, int *converted)
+{
+ char *err = NULL;
+ long sli;
+
+ errno = 0;
+ sli = strtol(numstr, &err, 0);
+ if (errno == ERANGE && (sli == LONG_MAX || sli == LONG_MIN))
+ return -ERANGE;
+
+ if (errno != 0 && sli == 0)
+ return -EINVAL;
+
+ if (err == numstr || *err != '\0')
+ return -EINVAL;
+
+ if (sli > INT_MAX || sli < INT_MIN)
+ return -ERANGE;
+
+ *converted = (int)sli;
+ return 0;
+}
+
+static int char_left_gc(const char *buffer, size_t len)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (buffer[i] == ' ' || buffer[i] == '\t')
+ continue;
+
+ return i;
+ }
+
+ return 0;
+}
+
+static int char_right_gc(const char *buffer, size_t len)
+{
+ int i;
+
+ for (i = len - 1; i >= 0; i--) {
+ if (buffer[i] == ' ' || buffer[i] == '\t' ||
+ buffer[i] == '\n' || buffer[i] == '\0')
+ continue;
+
+ return i + 1;
+ }
+
+ return 0;
+}
+
+static char *trim_whitespace_in_place(char *buffer)
+{
+ buffer += char_left_gc(buffer, strlen(buffer));
+ buffer[char_right_gc(buffer, strlen(buffer))] = '\0';
+ return buffer;
+}
+
+/* borrowed (with all helpers) from pidfd/pidfd_open_test.c */
+static pid_t get_pid_from_fdinfo_file(int pidfd, const char *key, size_t keylen)
+{
+ int ret;
+ char path[512];
+ FILE *f;
+ size_t n = 0;
+ pid_t result = -1;
+ char *line = NULL;
+
+ snprintf(path, sizeof(path), "/proc/self/fdinfo/%d", pidfd);
+
+ f = fopen(path, "re");
+ if (!f)
+ return -1;
+
+ while (getline(&line, &n, f) != -1) {
+ char *numstr;
+
+ if (strncmp(line, key, keylen))
+ continue;
+
+ numstr = trim_whitespace_in_place(line + 4);
+ ret = safe_int(numstr, &result);
+ if (ret < 0)
+ goto out;
+
+ break;
+ }
+
+out:
+ free(line);
+ fclose(f);
+ return result;
+}
+
+static int cmsg_check(int fd)
+{
+ struct msghdr msg = { 0 };
+ struct cmsghdr *cmsg;
+ struct iovec iov;
+ struct ucred *ucred = NULL;
+ int data = 0;
+ char control[CMSG_SPACE(sizeof(struct ucred)) +
+ CMSG_SPACE(sizeof(int))] = { 0 };
+ int *pidfd = NULL;
+ pid_t parent_pid;
+ int err;
+
+ iov.iov_base = &data;
+ iov.iov_len = sizeof(data);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = control;
+ msg.msg_controllen = sizeof(control);
+
+ err = recvmsg(fd, &msg, 0);
+ if (err < 0) {
+ log_err("recvmsg");
+ return 1;
+ }
+
+ if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) {
+ log_err("recvmsg: truncated");
+ return 1;
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_PIDFD) {
+ if (cmsg->cmsg_len < sizeof(*pidfd)) {
+ log_err("CMSG parse: SCM_PIDFD wrong len");
+ return 1;
+ }
+
+ pidfd = (void *)CMSG_DATA(cmsg);
+ }
+
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SCM_CREDENTIALS) {
+ if (cmsg->cmsg_len < sizeof(*ucred)) {
+ log_err("CMSG parse: SCM_CREDENTIALS wrong len");
+ return 1;
+ }
+
+ ucred = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ /* send(pfd, "x", sizeof(char), 0) */
+ if (data != 'x') {
+ log_err("recvmsg: data corruption");
+ return 1;
+ }
+
+ if (!pidfd) {
+ log_err("CMSG parse: SCM_PIDFD not found");
+ return 1;
+ }
+
+ if (!ucred) {
+ log_err("CMSG parse: SCM_CREDENTIALS not found");
+ return 1;
+ }
+
+ /* pidfd from SCM_PIDFD should point to the parent process PID */
+ parent_pid =
+ get_pid_from_fdinfo_file(*pidfd, "Pid:", sizeof("Pid:") - 1);
+ if (parent_pid != getppid()) {
+ log_err("wrong SCM_PIDFD %d != %d", parent_pid, getppid());
+ return 1;
+ }
+
+ return 0;
+}
+
+struct sock_addr {
+ char sock_name[32];
+ struct sockaddr_un listen_addr;
+ socklen_t addrlen;
+};
+
+FIXTURE(scm_pidfd)
+{
+ int server;
+ pid_t client_pid;
+ int startup_pipe[2];
+ struct sock_addr server_addr;
+ struct sock_addr *client_addr;
+};
+
+FIXTURE_VARIANT(scm_pidfd)
+{
+ int type;
+ bool abstract;
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_pathname)
+{
+ .type = SOCK_STREAM,
+ .abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, stream_abstract)
+{
+ .type = SOCK_STREAM,
+ .abstract = 1,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_pathname)
+{
+ .type = SOCK_DGRAM,
+ .abstract = 0,
+};
+
+FIXTURE_VARIANT_ADD(scm_pidfd, dgram_abstract)
+{
+ .type = SOCK_DGRAM,
+ .abstract = 1,
+};
+
+FIXTURE_SETUP(scm_pidfd)
+{
+ self->client_addr = mmap(NULL, sizeof(*self->client_addr), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, self->client_addr);
+}
+
+FIXTURE_TEARDOWN(scm_pidfd)
+{
+ close(self->server);
+
+ kill(self->client_pid, SIGKILL);
+ waitpid(self->client_pid, NULL, 0);
+
+ if (!variant->abstract) {
+ unlink(self->server_addr.sock_name);
+ unlink(self->client_addr->sock_name);
+ }
+}
+
+static void fill_sockaddr(struct sock_addr *addr, bool abstract)
+{
+ char *sun_path_buf = (char *)&addr->listen_addr.sun_path;
+
+ addr->listen_addr.sun_family = AF_UNIX;
+ addr->addrlen = offsetof(struct sockaddr_un, sun_path);
+ snprintf(addr->sock_name, sizeof(addr->sock_name), "scm_pidfd_%d", getpid());
+ addr->addrlen += strlen(addr->sock_name);
+ if (abstract) {
+ *sun_path_buf = '\0';
+ addr->addrlen++;
+ sun_path_buf++;
+ } else {
+ unlink(addr->sock_name);
+ }
+ memcpy(sun_path_buf, addr->sock_name, strlen(addr->sock_name));
+}
+
+static void client(FIXTURE_DATA(scm_pidfd) *self,
+ const FIXTURE_VARIANT(scm_pidfd) *variant)
+{
+ int cfd;
+ socklen_t len;
+ struct ucred peer_cred;
+ int peer_pidfd;
+ pid_t peer_pid;
+ int on = 0;
+
+ cfd = socket(AF_UNIX, variant->type, 0);
+ if (cfd < 0) {
+ log_err("socket");
+ child_die();
+ }
+
+ if (variant->type == SOCK_DGRAM) {
+ fill_sockaddr(self->client_addr, variant->abstract);
+
+ if (bind(cfd, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen)) {
+ log_err("bind");
+ child_die();
+ }
+ }
+
+ if (connect(cfd, (struct sockaddr *)&self->server_addr.listen_addr,
+ self->server_addr.addrlen) != 0) {
+ log_err("connect");
+ child_die();
+ }
+
+ on = 1;
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSCRED, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSCRED");
+ child_die();
+ }
+
+ if (setsockopt(cfd, SOL_SOCKET, SO_PASSPIDFD, &on, sizeof(on))) {
+ log_err("Failed to set SO_PASSPIDFD");
+ child_die();
+ }
+
+ close(self->startup_pipe[1]);
+
+ if (cmsg_check(cfd)) {
+ log_err("cmsg_check failed");
+ child_die();
+ }
+
+ /* skip further for SOCK_DGRAM as it's not applicable */
+ if (variant->type == SOCK_DGRAM)
+ return;
+
+ len = sizeof(peer_cred);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERCRED, &peer_cred, &len)) {
+ log_err("Failed to get SO_PEERCRED");
+ child_die();
+ }
+
+ len = sizeof(peer_pidfd);
+ if (getsockopt(cfd, SOL_SOCKET, SO_PEERPIDFD, &peer_pidfd, &len)) {
+ log_err("Failed to get SO_PEERPIDFD");
+ child_die();
+ }
+
+ /* pid from SO_PEERCRED should point to the parent process PID */
+ if (peer_cred.pid != getppid()) {
+ log_err("peer_cred.pid != getppid(): %d != %d", peer_cred.pid, getppid());
+ child_die();
+ }
+
+ peer_pid = get_pid_from_fdinfo_file(peer_pidfd,
+ "Pid:", sizeof("Pid:") - 1);
+ if (peer_pid != peer_cred.pid) {
+ log_err("peer_pid != peer_cred.pid: %d != %d", peer_pid, peer_cred.pid);
+ child_die();
+ }
+}
+
+TEST_F(scm_pidfd, test)
+{
+ int err;
+ int pfd;
+ int child_status = 0;
+
+ self->server = socket(AF_UNIX, variant->type, 0);
+ ASSERT_NE(-1, self->server);
+
+ fill_sockaddr(&self->server_addr, variant->abstract);
+
+ err = bind(self->server, (struct sockaddr *)&self->server_addr.listen_addr, self->server_addr.addrlen);
+ ASSERT_EQ(0, err);
+
+ if (variant->type == SOCK_STREAM) {
+ err = listen(self->server, 1);
+ ASSERT_EQ(0, err);
+ }
+
+ err = pipe(self->startup_pipe);
+ ASSERT_NE(-1, err);
+
+ self->client_pid = fork();
+ ASSERT_NE(-1, self->client_pid);
+ if (self->client_pid == 0) {
+ close(self->server);
+ close(self->startup_pipe[0]);
+ client(self, variant);
+ exit(0);
+ }
+ close(self->startup_pipe[1]);
+
+ if (variant->type == SOCK_STREAM) {
+ pfd = accept(self->server, NULL, NULL);
+ ASSERT_NE(-1, pfd);
+ } else {
+ pfd = self->server;
+ }
+
+ /* wait until the child arrives at checkpoint */
+ read(self->startup_pipe[0], &err, sizeof(int));
+ close(self->startup_pipe[0]);
+
+ if (variant->type == SOCK_DGRAM) {
+ err = sendto(pfd, "x", sizeof(char), 0, (struct sockaddr *)&self->client_addr->listen_addr, self->client_addr->addrlen);
+ ASSERT_NE(-1, err);
+ } else {
+ err = send(pfd, "x", sizeof(char), 0);
+ ASSERT_NE(-1, err);
+ }
+
+ close(pfd);
+ waitpid(self->client_pid, &child_status, 0);
+ ASSERT_EQ(0, WIFEXITED(child_status) ? WEXITSTATUS(child_status) : 1);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/af_unix/test_unix_oob.c b/tools/testing/selftests/net/af_unix/test_unix_oob.c
new file mode 100644
index 000000000000..a7c51889acd5
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/test_unix_oob.c
@@ -0,0 +1,436 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/socket.h>
+#include <arpa/inet.h>
+#include <unistd.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <errno.h>
+#include <netinet/tcp.h>
+#include <sys/un.h>
+#include <sys/signal.h>
+#include <sys/poll.h>
+
+static int pipefd[2];
+static int signal_recvd;
+static pid_t producer_id;
+static char sock_name[32];
+
+static void sig_hand(int sn, siginfo_t *si, void *p)
+{
+ signal_recvd = sn;
+}
+
+static int set_sig_handler(int signal)
+{
+ struct sigaction sa;
+
+ sa.sa_sigaction = sig_hand;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO | SA_RESTART;
+
+ return sigaction(signal, &sa, NULL);
+}
+
+static void set_filemode(int fd, int set)
+{
+ int flags = fcntl(fd, F_GETFL, 0);
+
+ if (set)
+ flags &= ~O_NONBLOCK;
+ else
+ flags |= O_NONBLOCK;
+ fcntl(fd, F_SETFL, flags);
+}
+
+static void signal_producer(int fd)
+{
+ char cmd;
+
+ cmd = 'S';
+ write(fd, &cmd, sizeof(cmd));
+}
+
+static void wait_for_signal(int fd)
+{
+ char buf[5];
+
+ read(fd, buf, 5);
+}
+
+static void die(int status)
+{
+ fflush(NULL);
+ unlink(sock_name);
+ kill(producer_id, SIGTERM);
+ exit(status);
+}
+
+int is_sioctatmark(int fd)
+{
+ int ans = -1;
+
+ if (ioctl(fd, SIOCATMARK, &ans, sizeof(ans)) < 0) {
+#ifdef DEBUG
+ perror("SIOCATMARK Failed");
+#endif
+ }
+ return ans;
+}
+
+void read_oob(int fd, char *c)
+{
+
+ *c = ' ';
+ if (recv(fd, c, sizeof(*c), MSG_OOB) < 0) {
+#ifdef DEBUG
+ perror("Reading MSG_OOB Failed");
+#endif
+ }
+}
+
+int read_data(int pfd, char *buf, int size)
+{
+ int len = 0;
+
+ memset(buf, size, '0');
+ len = read(pfd, buf, size);
+#ifdef DEBUG
+ if (len < 0)
+ perror("read failed");
+#endif
+ return len;
+}
+
+static void wait_for_data(int pfd, int event)
+{
+ struct pollfd pfds[1];
+
+ pfds[0].fd = pfd;
+ pfds[0].events = event;
+ poll(pfds, 1, -1);
+}
+
+void producer(struct sockaddr_un *consumer_addr)
+{
+ int cfd;
+ char buf[64];
+ int i;
+
+ memset(buf, 'x', sizeof(buf));
+ cfd = socket(AF_UNIX, SOCK_STREAM, 0);
+
+ wait_for_signal(pipefd[0]);
+ if (connect(cfd, (struct sockaddr *)consumer_addr,
+ sizeof(*consumer_addr)) != 0) {
+ perror("Connect failed");
+ kill(0, SIGTERM);
+ exit(1);
+ }
+
+ for (i = 0; i < 2; i++) {
+ /* Test 1: Test for SIGURG and OOB */
+ wait_for_signal(pipefd[0]);
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 2: Test for OOB being overwitten */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '#';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 3: Test for SIOCATMARK */
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '@';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ buf[63] = '%';
+ send(cfd, buf, sizeof(buf), MSG_OOB);
+
+ memset(buf, 'x', sizeof(buf));
+ send(cfd, buf, sizeof(buf), 0);
+
+ wait_for_signal(pipefd[0]);
+
+ /* Test 4: Test for 1byte OOB msg */
+ memset(buf, 'x', sizeof(buf));
+ buf[0] = '@';
+ send(cfd, buf, 1, MSG_OOB);
+ }
+}
+
+int
+main(int argc, char **argv)
+{
+ int lfd, pfd;
+ struct sockaddr_un consumer_addr, paddr;
+ socklen_t len = sizeof(consumer_addr);
+ char buf[1024];
+ int on = 0;
+ char oob;
+ int atmark;
+
+ lfd = socket(AF_UNIX, SOCK_STREAM, 0);
+ memset(&consumer_addr, 0, sizeof(consumer_addr));
+ consumer_addr.sun_family = AF_UNIX;
+ sprintf(sock_name, "unix_oob_%d", getpid());
+ unlink(sock_name);
+ strcpy(consumer_addr.sun_path, sock_name);
+
+ if ((bind(lfd, (struct sockaddr *)&consumer_addr,
+ sizeof(consumer_addr))) != 0) {
+ perror("socket bind failed");
+ exit(1);
+ }
+
+ pipe(pipefd);
+
+ listen(lfd, 1);
+
+ producer_id = fork();
+ if (producer_id == 0) {
+ producer(&consumer_addr);
+ exit(0);
+ }
+
+ set_sig_handler(SIGURG);
+ signal_producer(pipefd[1]);
+
+ pfd = accept(lfd, (struct sockaddr *) &paddr, &len);
+ fcntl(pfd, F_SETOWN, getpid());
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1:
+ * veriyf that SIGURG is
+ * delivered, 63 bytes are
+ * read, oob is '@', and POLLPRI works.
+ */
+ wait_for_data(pfd, POLLPRI);
+ read_oob(pfd, &oob);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 63 || oob != '@') {
+ fprintf(stderr, "Test 1 failed sigurg %d len %d %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2:
+ * Verify that the first OOB is over written by
+ * the 2nd one and the first OOB is returned as
+ * part of the read, and sigurg is received.
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = 0;
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ read_oob(pfd, &oob);
+ if (!signal_recvd || len != 127 || oob != '#') {
+ fprintf(stderr, "Test 2 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and atmark
+ * is set.
+ * oob is '%' and second read returns
+ * 64 bytes.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 150)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 127 || oob != '%' || atmark != 1) {
+ fprintf(stderr,
+ "Test 3 failed, sigurg %d len %d OOB %c atmark %d\n",
+ signal_recvd, len, oob, atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+
+ len = read_data(pfd, buf, 1024);
+ if (len != 64) {
+ fprintf(stderr, "Test 3.1 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4:
+ * verify that a single byte
+ * oob message is delivered.
+ * set non blocking mode and
+ * check proper error is
+ * returned and sigurg is
+ * received and correct
+ * oob is read.
+ */
+
+ set_filemode(pfd, 0);
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if ((len == -1) && (errno == 11))
+ len = 0;
+
+ read_oob(pfd, &oob);
+
+ if (!signal_recvd || len != 0 || oob != '@') {
+ fprintf(stderr, "Test 4 failed, sigurg %d len %d OOB %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ set_filemode(pfd, 1);
+
+ /* Inline Testing */
+
+ on = 1;
+ if (setsockopt(pfd, SOL_SOCKET, SO_OOBINLINE, &on, sizeof(on))) {
+ perror("SO_OOBINLINE");
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 1 -- Inline:
+ * Check that SIGURG is
+ * delivered and 63 bytes are
+ * read and oob is '@'
+ */
+
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+
+ if (!signal_recvd || len != 63) {
+ fprintf(stderr, "Test 1 Inline failed, sigurg %d len %d\n",
+ signal_recvd, len);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+
+ if (len != 1) {
+ fprintf(stderr,
+ "Test 1.1 Inline failed, sigurg %d len %d oob %c\n",
+ signal_recvd, len, oob);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 2 -- Inline:
+ * Verify that the first OOB is over written by
+ * the 2nd one and read breaks correctly on
+ * 2nd OOB boundary with the first OOB returned as
+ * part of the read, and sigurg is delivered and
+ * siocatmark returns true.
+ * next read returns one byte, the oob byte
+ * and siocatmark returns false.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 70)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 127 || atmark != 1 || !signal_recvd) {
+ fprintf(stderr, "Test 2 Inline failed, len %d atmark %d\n",
+ len, atmark);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 1 || buf[0] != '#' || atmark == 1) {
+ fprintf(stderr, "Test 2.1 Inline failed, len %d data %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 3 -- Inline:
+ * verify that 2nd oob over writes
+ * the first one and read breaks at
+ * oob boundary returning 127 bytes
+ * and sigurg is received and siocatmark
+ * is true after the read.
+ * subsequent read returns 65 bytes
+ * because of oob which should be '%'.
+ */
+ len = 0;
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ while (len < 126)
+ len = recv(pfd, buf, 1024, MSG_PEEK);
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (!signal_recvd || len != 127 || !atmark) {
+ fprintf(stderr,
+ "Test 3 Inline failed, sigurg %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+
+ len = read_data(pfd, buf, 1024);
+ atmark = is_sioctatmark(pfd);
+ if (len != 65 || buf[0] != '%' || atmark != 0) {
+ fprintf(stderr,
+ "Test 3.1 Inline failed, len %d oob %c atmark %d\n",
+ len, buf[0], atmark);
+ die(1);
+ }
+
+ signal_recvd = 0;
+ signal_producer(pipefd[1]);
+
+ /* Test 4 -- Inline:
+ * verify that a single
+ * byte oob message is delivered
+ * and read returns one byte, the oob
+ * byte and sigurg is received
+ */
+ wait_for_data(pfd, POLLIN | POLLPRI);
+ len = read_data(pfd, buf, 1024);
+ if (!signal_recvd || len != 1 || buf[0] != '@') {
+ fprintf(stderr,
+ "Test 4 Inline failed, signal %d len %d data %c\n",
+ signal_recvd, len, buf[0]);
+ die(1);
+ }
+ die(0);
+}
diff --git a/tools/testing/selftests/net/af_unix/unix_connect.c b/tools/testing/selftests/net/af_unix/unix_connect.c
new file mode 100644
index 000000000000..d799fd8f5c7c
--- /dev/null
+++ b/tools/testing/selftests/net/af_unix/unix_connect.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <stddef.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/un.h>
+
+#include "../../kselftest_harness.h"
+
+FIXTURE(unix_connect)
+{
+ int server, client;
+ int family;
+};
+
+FIXTURE_VARIANT(unix_connect)
+{
+ int type;
+ char sun_path[8];
+ int len;
+ int flags;
+ int err;
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_pathname_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, stream_abstract_netns)
+{
+ .type = SOCK_STREAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = 0,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_pathname_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "test",
+ .len = 4 + 1,
+ .flags = CLONE_NEWNET,
+ .err = 0,
+};
+
+FIXTURE_VARIANT_ADD(unix_connect, dgram_abstract_netns)
+{
+ .type = SOCK_DGRAM,
+ .sun_path = "\0test",
+ .len = 5,
+ .flags = CLONE_NEWNET,
+ .err = ECONNREFUSED,
+};
+
+FIXTURE_SETUP(unix_connect)
+{
+ self->family = AF_UNIX;
+}
+
+FIXTURE_TEARDOWN(unix_connect)
+{
+ close(self->server);
+ close(self->client);
+
+ if (variant->sun_path[0])
+ remove("test");
+}
+
+TEST_F(unix_connect, test)
+{
+ socklen_t addrlen;
+ struct sockaddr_un addr = {
+ .sun_family = self->family,
+ };
+ int err;
+
+ self->server = socket(self->family, variant->type, 0);
+ ASSERT_NE(-1, self->server);
+
+ addrlen = offsetof(struct sockaddr_un, sun_path) + variant->len;
+ memcpy(&addr.sun_path, variant->sun_path, variant->len);
+
+ err = bind(self->server, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(0, err);
+
+ if (variant->type == SOCK_STREAM) {
+ err = listen(self->server, 32);
+ ASSERT_EQ(0, err);
+ }
+
+ err = unshare(variant->flags);
+ ASSERT_EQ(0, err);
+
+ self->client = socket(self->family, variant->type, 0);
+ ASSERT_LT(0, self->client);
+
+ err = connect(self->client, (struct sockaddr *)&addr, addrlen);
+ ASSERT_EQ(variant->err, err == -1 ? errno : 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/altnames.sh b/tools/testing/selftests/net/altnames.sh
index 4254ddc3f70b..1ef9e4159bba 100755
--- a/tools/testing/selftests/net/altnames.sh
+++ b/tools/testing/selftests/net/altnames.sh
@@ -45,7 +45,7 @@ altnames_test()
check_err $? "Got unexpected long alternative name from link show JSON"
ip link property del $DUMMY_DEV altname $SHORT_NAME
- check_err $? "Failed to add short alternative name"
+ check_err $? "Failed to delete short alternative name"
ip -j -p link show $SHORT_NAME &>/dev/null
check_fail $? "Unexpected success while trying to do link show with deleted short alternative name"
diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh
new file mode 100755
index 000000000000..75528788cb95
--- /dev/null
+++ b/tools/testing/selftests/net/amt.sh
@@ -0,0 +1,284 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Taehee Yoo <ap420073@gmail.com>
+#
+# This script evaluates the AMT driver.
+# There are four network-namespaces, LISTENER, SOURCE, GATEWAY, RELAY.
+# The role of LISTENER is to listen multicast traffic.
+# In order to do that, it send IGMP group join message.
+# The role of SOURCE is to send multicast traffic to listener.
+# The role of GATEWAY is to work Gateway role of AMT interface.
+# The role of RELAY is to work Relay role of AMT interface.
+#
+#
+# +------------------------+
+# | LISTENER netns |
+# | |
+# | +------------------+ |
+# | | l_gw | |
+# | | 192.168.0.2/24 | |
+# | | 2001:db8::2/64 | |
+# | +------------------+ |
+# | . |
+# +------------------------+
+# .
+# .
+# +-----------------------------------------------------+
+# | . GATEWAY netns |
+# | . |
+# |+---------------------------------------------------+|
+# || . br0 ||
+# || +------------------+ +------------------+ ||
+# || | gw_l | | amtg | ||
+# || | 192.168.0.1/24 | +--------+---------+ ||
+# || | 2001:db8::1/64 | | ||
+# || +------------------+ | ||
+# |+-------------------------------------|-------------+|
+# | | |
+# | +--------+---------+ |
+# | | gw_relay | |
+# | | 10.0.0.1/24 | |
+# | +------------------+ |
+# | . |
+# +-----------------------------------------------------+
+# .
+# .
+# +-----------------------------------------------------+
+# | RELAY netns . |
+# | +------------------+ |
+# | | relay_gw | |
+# | | 10.0.0.2/24 | |
+# | +--------+---------+ |
+# | | |
+# | | |
+# | +------------------+ +--------+---------+ |
+# | | relay_src | | amtr | |
+# | | 172.17.0.1/24 | +------------------+ |
+# | | 2001:db8:3::1/64 | |
+# | +------------------+ |
+# | . |
+# | . |
+# +-----------------------------------------------------+
+# .
+# .
+# +------------------------+
+# | . |
+# | +------------------+ |
+# | | src_relay | |
+# | | 172.17.0.2/24 | |
+# | | 2001:db8:3::2/64 | |
+# | +------------------+ |
+# | SOURCE netns |
+# +------------------------+
+#==============================================================================
+
+readonly LISTENER=$(mktemp -u listener-XXXXXXXX)
+readonly GATEWAY=$(mktemp -u gateway-XXXXXXXX)
+readonly RELAY=$(mktemp -u relay-XXXXXXXX)
+readonly SOURCE=$(mktemp -u source-XXXXXXXX)
+ERR=4
+err=0
+
+exit_cleanup()
+{
+ for ns in "$@"; do
+ ip netns delete "${ns}" 2>/dev/null || true
+ done
+
+ exit $ERR
+}
+
+create_namespaces()
+{
+ ip netns add "${LISTENER}" || exit_cleanup
+ ip netns add "${GATEWAY}" || exit_cleanup "${LISTENER}"
+ ip netns add "${RELAY}" || exit_cleanup "${LISTENER}" "${GATEWAY}"
+ ip netns add "${SOURCE}" || exit_cleanup "${LISTENER}" "${GATEWAY}" \
+ "${RELAY}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ exit_cleanup "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"
+}
+
+setup_interface()
+{
+ for ns in "${LISTENER}" "${GATEWAY}" "${RELAY}" "${SOURCE}"; do
+ ip -netns "${ns}" link set dev lo up
+ done;
+
+ ip link add l_gw type veth peer name gw_l
+ ip link add gw_relay type veth peer name relay_gw
+ ip link add relay_src type veth peer name src_relay
+
+ ip link set l_gw netns "${LISTENER}" up
+ ip link set gw_l netns "${GATEWAY}" up
+ ip link set gw_relay netns "${GATEWAY}" up
+ ip link set relay_gw netns "${RELAY}" up
+ ip link set relay_src netns "${RELAY}" up
+ ip link set src_relay netns "${SOURCE}" up mtu 1400
+
+ ip netns exec "${LISTENER}" ip a a 192.168.0.2/24 dev l_gw
+ ip netns exec "${LISTENER}" ip r a default via 192.168.0.1 dev l_gw
+ ip netns exec "${LISTENER}" ip a a 2001:db8::2/64 dev l_gw
+ ip netns exec "${LISTENER}" ip r a default via 2001:db8::1 dev l_gw
+ ip netns exec "${LISTENER}" ip a a 239.0.0.1/32 dev l_gw autojoin
+ ip netns exec "${LISTENER}" ip a a ff0e::5:6/128 dev l_gw autojoin
+
+ ip netns exec "${GATEWAY}" ip a a 192.168.0.1/24 dev gw_l
+ ip netns exec "${GATEWAY}" ip a a 2001:db8::1/64 dev gw_l
+ ip netns exec "${GATEWAY}" ip a a 10.0.0.1/24 dev gw_relay
+ ip netns exec "${GATEWAY}" ip link add br0 type bridge
+ ip netns exec "${GATEWAY}" ip link set br0 up
+ ip netns exec "${GATEWAY}" ip link set gw_l master br0
+ ip netns exec "${GATEWAY}" ip link set gw_l up
+ ip netns exec "${GATEWAY}" ip link add amtg master br0 type amt \
+ mode gateway local 10.0.0.1 discovery 10.0.0.2 dev gw_relay \
+ gateway_port 2268 relay_port 2268
+ ip netns exec "${RELAY}" ip a a 10.0.0.2/24 dev relay_gw
+ ip netns exec "${RELAY}" ip link add amtr type amt mode relay \
+ local 10.0.0.2 dev relay_gw relay_port 2268 max_tunnels 4
+ ip netns exec "${RELAY}" ip a a 172.17.0.1/24 dev relay_src
+ ip netns exec "${RELAY}" ip a a 2001:db8:3::1/64 dev relay_src
+ ip netns exec "${SOURCE}" ip a a 172.17.0.2/24 dev src_relay
+ ip netns exec "${SOURCE}" ip a a 2001:db8:3::2/64 dev src_relay
+ ip netns exec "${SOURCE}" ip r a default via 172.17.0.1 dev src_relay
+ ip netns exec "${SOURCE}" ip r a default via 2001:db8:3::1 dev src_relay
+ ip netns exec "${RELAY}" ip link set amtr up
+ ip netns exec "${GATEWAY}" ip link set amtg up
+}
+
+setup_sysctl()
+{
+ ip netns exec "${RELAY}" sysctl net.ipv4.ip_forward=1 -w -q
+}
+
+setup_iptables()
+{
+ ip netns exec "${RELAY}" iptables -t mangle -I PREROUTING \
+ -d 239.0.0.1 -j TTL --ttl-set 2
+ ip netns exec "${RELAY}" ip6tables -t mangle -I PREROUTING \
+ -j HL --hl-set 2
+}
+
+setup_mcast_routing()
+{
+ ip netns exec "${RELAY}" smcrouted
+ ip netns exec "${RELAY}" smcroutectl a relay_src \
+ 172.17.0.2 239.0.0.1 amtr
+ ip netns exec "${RELAY}" smcroutectl a relay_src \
+ 2001:db8:3::2 ff0e::5:6 amtr
+}
+
+test_remote_ip()
+{
+ REMOTE=$(ip netns exec "${GATEWAY}" \
+ ip -d -j link show amtg | jq .[0].linkinfo.info_data.remote)
+ if [ $REMOTE == "\"10.0.0.2\"" ]; then
+ printf "TEST: %-60s [ OK ]\n" "amt discovery"
+ else
+ printf "TEST: %-60s [FAIL]\n" "amt discovery"
+ ERR=1
+ fi
+}
+
+send_mcast_torture4()
+{
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 1G | nc -w 1 -u 239.0.0.1 4001'
+}
+
+
+send_mcast_torture6()
+{
+ ip netns exec "${SOURCE}" bash -c \
+ 'cat /dev/urandom | head -c 1G | nc -w 1 -u ff0e::5:6 6001'
+}
+
+check_features()
+{
+ ip link help 2>&1 | grep -q amt
+ if [ $? -ne 0 ]; then
+ echo "Missing amt support in iproute2" >&2
+ exit_cleanup
+ fi
+}
+
+test_ipv4_forward()
+{
+ RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000)
+ if [ "$RESULT4" == "172.17.0.2" ]; then
+ printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding"
+ exit 0
+ else
+ printf "TEST: %-60s [FAIL]\n" "IPv4 amt multicast forwarding"
+ exit 1
+ fi
+}
+
+test_ipv6_forward()
+{
+ RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000)
+ if [ "$RESULT6" == "2001:db8:3::2" ]; then
+ printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding"
+ exit 0
+ else
+ printf "TEST: %-60s [FAIL]\n" "IPv6 amt multicast forwarding"
+ exit 1
+ fi
+}
+
+send_mcast4()
+{
+ sleep 2
+ ip netns exec "${SOURCE}" bash -c \
+ 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' &
+}
+
+send_mcast6()
+{
+ sleep 2
+ ip netns exec "${SOURCE}" bash -c \
+ 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' &
+}
+
+check_features
+
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_interface
+setup_sysctl
+setup_iptables
+setup_mcast_routing
+test_remote_ip
+test_ipv4_forward &
+pid=$!
+send_mcast4
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+ ERR=1
+fi
+test_ipv6_forward &
+pid=$!
+send_mcast6
+wait $pid || err=$?
+if [ $err -eq 1 ]; then
+ ERR=1
+fi
+send_mcast_torture4
+printf "TEST: %-60s [ OK ]\n" "IPv4 amt traffic forwarding torture"
+send_mcast_torture6
+printf "TEST: %-60s [ OK ]\n" "IPv6 amt traffic forwarding torture"
+sleep 5
+if [ "${ERR}" -eq 1 ]; then
+ echo "Some tests failed." >&2
+else
+ ERR=0
+fi
diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
new file mode 100755
index 000000000000..92eb880c52f2
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh
@@ -0,0 +1,213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Tests sysctl options {arp,ndisc}_evict_nocarrier={0,1}
+#
+# Create a veth pair and set IPs/routes on both. Then ping to establish
+# an entry in the ARP/ND table. Depending on the test set sysctl option to
+# 1 or 0. Set remote veth down which will cause local veth to go into a no
+# carrier state. Depending on the test check the ARP/ND table:
+#
+# {arp,ndisc}_evict_nocarrier=1 should contain no ARP/ND after no carrier
+# {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry
+#
+
+source lib.sh
+
+readonly V4_ADDR0=10.0.10.1
+readonly V4_ADDR1=10.0.10.2
+readonly V6_ADDR0=2001:db8:91::1
+readonly V6_ADDR1=2001:db8:91::2
+nsid=100
+ret=0
+
+cleanup_v6()
+{
+ cleanup_ns ${me} ${peer}
+
+ sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+ sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+setup_v6() {
+ setup_ns me peer
+
+ IP="ip -netns ${me}"
+
+ $IP li add veth1 type veth peer name veth2
+ $IP li set veth1 up
+ $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad
+ $IP li set veth2 netns ${peer} up
+ ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad
+
+ ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1
+
+ # Establish an ND cache entry
+ ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1
+ # Should have the veth1 entry in ND table
+ ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ cleanup_v6
+ echo "failed"
+ exit 1
+ fi
+
+ # Set veth2 down, which will put veth1 in NOCARRIER state
+ ip netns exec ${peer} ip link set veth2 down
+}
+
+setup_v4() {
+ setup_ns PEER_NS
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 up
+ ip link set dev veth1 netns "${PEER_NS}"
+ ip netns exec "${PEER_NS}" ip link set dev veth1 up
+ ip addr add $V4_ADDR0/24 dev veth0
+ ip netns exec "${PEER_NS}" ip addr add $V4_ADDR1/24 dev veth1
+ ip netns exec ${PEER_NS} ip route add default via $V4_ADDR1 dev veth1
+ ip route add default via $V4_ADDR0 dev veth0
+
+ sysctl -w "$1" >/dev/null 2>&1
+
+ # Establish an ARP cache entry
+ ping -c1 -I veth0 $V4_ADDR1 -q >/dev/null 2>&1
+ # Should have the veth1 entry in ARP table
+ ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ cleanup_v4
+ echo "failed"
+ exit 1
+ fi
+
+ # Set veth1 down, which will put veth0 in NOCARRIER state
+ ip netns exec "${PEER_NS}" ip link set veth1 down
+}
+
+cleanup_v4() {
+ ip neigh flush dev veth0
+ ip link del veth0
+ cleanup_ns $PEER_NS
+
+ sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1
+ sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1
+}
+
+# Run test when arp_evict_nocarrier = 1 (default).
+run_arp_evict_nocarrier_enabled() {
+ echo "run arp_evict_nocarrier=1 test"
+ setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=1"
+
+ # ARP table should be empty
+ ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "failed"
+ ret=1
+ else
+ echo "ok"
+ fi
+
+ cleanup_v4
+}
+
+# Run test when arp_evict_nocarrier = 0
+run_arp_evict_nocarrier_disabled() {
+ echo "run arp_evict_nocarrier=0 test"
+ setup_v4 "net.ipv4.conf.veth0.arp_evict_nocarrier=0"
+
+ # ARP table should still contain the entry
+ ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "ok"
+ else
+ echo "failed"
+ ret=1
+ fi
+
+ cleanup_v4
+}
+
+run_arp_evict_nocarrier_disabled_all() {
+ echo "run all.arp_evict_nocarrier=0 test"
+ setup_v4 "net.ipv4.conf.all.arp_evict_nocarrier=0"
+
+ # ARP table should still contain the entry
+ ip neigh get $V4_ADDR1 dev veth0 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "ok"
+ else
+ echo "failed"
+ fi
+
+ cleanup_v4
+}
+
+run_ndisc_evict_nocarrier_enabled() {
+ echo "run ndisc_evict_nocarrier=1 test"
+
+ setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1"
+
+ ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "failed"
+ ret=1
+ else
+ echo "ok"
+ fi
+
+ cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled() {
+ echo "run ndisc_evict_nocarrier=0 test"
+
+ setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0"
+
+ ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "ok"
+ else
+ echo "failed"
+ ret=1
+ fi
+
+ cleanup_v6
+}
+
+run_ndisc_evict_nocarrier_disabled_all() {
+ echo "run all.ndisc_evict_nocarrier=0 test"
+
+ setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0"
+
+ ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1
+
+ if [ $? -eq 0 ];then
+ echo "ok"
+ else
+ echo "failed"
+ ret=1
+ fi
+
+ cleanup_v6
+}
+
+run_all_tests() {
+ run_arp_evict_nocarrier_enabled
+ run_arp_evict_nocarrier_disabled
+ run_arp_evict_nocarrier_disabled_all
+ run_ndisc_evict_nocarrier_enabled
+ run_ndisc_evict_nocarrier_disabled
+ run_ndisc_evict_nocarrier_disabled_all
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+run_all_tests
+exit $ret
diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
new file mode 100755
index 000000000000..a40c0e9bd023
--- /dev/null
+++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh
@@ -0,0 +1,302 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# 2 namespaces: one host and one router. Use arping from the host to send a
+# garp to the router. Router accepts or ignores based on its arp_accept
+# or accept_untracked_na configuration.
+
+source lib.sh
+
+TESTS="arp ndisc"
+
+ROUTER_INTF="veth-router"
+ROUTER_ADDR="10.0.10.1"
+ROUTER_ADDR_V6="2001:db8:abcd:0012::1"
+
+HOST_INTF="veth-host"
+HOST_ADDR="10.0.10.2"
+HOST_ADDR_V6="2001:db8:abcd:0012::2"
+
+SUBNET_WIDTH=24
+PREFIX_WIDTH_V6=64
+
+cleanup() {
+ cleanup_ns ${HOST_NS} ${ROUTER_NS}
+}
+
+cleanup_v6() {
+ cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6}
+}
+
+setup() {
+ set -e
+ local arp_accept=$1
+
+ # Set up two namespaces
+ setup_ns HOST_NS ROUTER_NS
+
+ # Set up interfaces veth0 and veth1, which are pairs in separate
+ # namespaces. veth0 is veth-router, veth1 is veth-host.
+ # first, set up the inteface's link to the namespace
+ # then, set the interface "up"
+ ip netns exec ${ROUTER_NS} ip link add name ${ROUTER_INTF} \
+ type veth peer name ${HOST_INTF}
+
+ ip netns exec ${ROUTER_NS} ip link set dev ${ROUTER_INTF} up
+ ip netns exec ${ROUTER_NS} ip link set dev ${HOST_INTF} netns ${HOST_NS}
+
+ ip netns exec ${HOST_NS} ip link set dev ${HOST_INTF} up
+ ip netns exec ${ROUTER_NS} ip addr add ${ROUTER_ADDR}/${SUBNET_WIDTH} \
+ dev ${ROUTER_INTF}
+
+ ip netns exec ${HOST_NS} ip addr add ${HOST_ADDR}/${SUBNET_WIDTH} \
+ dev ${HOST_INTF}
+ ip netns exec ${HOST_NS} ip route add default via ${HOST_ADDR} \
+ dev ${HOST_INTF}
+ ip netns exec ${ROUTER_NS} ip route add default via ${ROUTER_ADDR} \
+ dev ${ROUTER_INTF}
+
+ ROUTER_CONF=net.ipv4.conf.${ROUTER_INTF}
+ ip netns exec ${ROUTER_NS} sysctl -w \
+ ${ROUTER_CONF}.arp_accept=${arp_accept} >/dev/null 2>&1
+ set +e
+}
+
+setup_v6() {
+ set -e
+ local accept_untracked_na=$1
+
+ # Set up two namespaces
+ setup_ns HOST_NS_V6 ROUTER_NS_V6
+
+ # Set up interfaces veth0 and veth1, which are pairs in separate
+ # namespaces. veth0 is veth-router, veth1 is veth-host.
+ # first, set up the inteface's link to the namespace
+ # then, set the interface "up"
+ ip -6 -netns ${ROUTER_NS_V6} link add name ${ROUTER_INTF} \
+ type veth peer name ${HOST_INTF}
+
+ ip -6 -netns ${ROUTER_NS_V6} link set dev ${ROUTER_INTF} up
+ ip -6 -netns ${ROUTER_NS_V6} link set dev ${HOST_INTF} netns \
+ ${HOST_NS_V6}
+
+ ip -6 -netns ${HOST_NS_V6} link set dev ${HOST_INTF} up
+ ip -6 -netns ${ROUTER_NS_V6} addr add \
+ ${ROUTER_ADDR_V6}/${PREFIX_WIDTH_V6} dev ${ROUTER_INTF} nodad
+
+ HOST_CONF=net.ipv6.conf.${HOST_INTF}
+ ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+ ip netns exec ${HOST_NS_V6} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+ ip -6 -netns ${HOST_NS_V6} addr add ${HOST_ADDR_V6}/${PREFIX_WIDTH_V6} \
+ dev ${HOST_INTF}
+
+ ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.forwarding=1 >/dev/null 2>&1
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.drop_unsolicited_na=0 >/dev/null 2>&1
+ ip netns exec ${ROUTER_NS_V6} sysctl -w \
+ ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na} \
+ >/dev/null 2>&1
+ set +e
+}
+
+verify_arp() {
+ local arp_accept=$1
+ local same_subnet=$2
+
+ neigh_show_output=$(ip netns exec ${ROUTER_NS} ip neigh get \
+ ${HOST_ADDR} dev ${ROUTER_INTF} 2>/dev/null)
+
+ if [ ${arp_accept} -eq 1 ]; then
+ # Neighbor entries expected
+ [[ ${neigh_show_output} ]]
+ elif [ ${arp_accept} -eq 2 ]; then
+ if [ ${same_subnet} -eq 1 ]; then
+ # Neighbor entries expected
+ [[ ${neigh_show_output} ]]
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ }
+
+arp_test_gratuitous() {
+ set -e
+ local arp_accept=$1
+ local same_subnet=$2
+
+ if [ ${arp_accept} -eq 2 ]; then
+ test_msg=("test_arp: "
+ "accept_arp=$1 "
+ "same_subnet=$2")
+ if [ ${same_subnet} -eq 0 ]; then
+ HOST_ADDR=10.0.11.3
+ else
+ HOST_ADDR=10.0.10.3
+ fi
+ else
+ test_msg=("test_arp: "
+ "accept_arp=$1")
+ fi
+ # Supply arp_accept option to set up which sets it in sysctl
+ setup ${arp_accept}
+ ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null
+
+ if verify_arp $1 $2; then
+ printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}"
+ else
+ printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}"
+ fi
+ cleanup
+ set +e
+}
+
+arp_test_gratuitous_combinations() {
+ arp_test_gratuitous 0
+ arp_test_gratuitous 1
+ arp_test_gratuitous 2 0 # Second entry indicates subnet or not
+ arp_test_gratuitous 2 1
+}
+
+cleanup_tcpdump() {
+ set -e
+ [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+ [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+ tcpdump_stdout=
+ tcpdump_stderr=
+ set +e
+}
+
+start_tcpdump() {
+ set -e
+ tcpdump_stdout=`mktemp`
+ tcpdump_stderr=`mktemp`
+ ip netns exec ${ROUTER_NS_V6} timeout 15s \
+ tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+ "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR_V6}" \
+ > ${tcpdump_stdout} 2> /dev/null
+ set +e
+}
+
+verify_ndisc() {
+ local accept_untracked_na=$1
+ local same_subnet=$2
+
+ neigh_show_output=$(ip -6 -netns ${ROUTER_NS_V6} neigh show \
+ to ${HOST_ADDR_V6} dev ${ROUTER_INTF} nud stale)
+
+ if [ ${accept_untracked_na} -eq 1 ]; then
+ # Neighbour entry expected to be present
+ [[ ${neigh_show_output} ]]
+ elif [ ${accept_untracked_na} -eq 2 ]; then
+ if [ ${same_subnet} -eq 1 ]; then
+ [[ ${neigh_show_output} ]]
+ else
+ [[ -z "${neigh_show_output}" ]]
+ fi
+ else
+ # Neighbour entry expected to be absent for all other cases
+ [[ -z "${neigh_show_output}" ]]
+ fi
+}
+
+ndisc_test_untracked_advertisements() {
+ set -e
+ test_msg=("test_ndisc: "
+ "accept_untracked_na=$1")
+
+ local accept_untracked_na=$1
+ local same_subnet=$2
+ if [ ${accept_untracked_na} -eq 2 ]; then
+ test_msg=("test_ndisc: "
+ "accept_untracked_na=$1 "
+ "same_subnet=$2")
+ if [ ${same_subnet} -eq 0 ]; then
+ # Not same subnet
+ HOST_ADDR_V6=2000:db8:abcd:0013::4
+ else
+ HOST_ADDR_V6=2001:db8:abcd:0012::3
+ fi
+ fi
+ setup_v6 $1 $2
+ start_tcpdump
+
+ if verify_ndisc $1 $2; then
+ printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}"
+ else
+ printf " TEST: %-60s [FAIL]\n" "${test_msg[*]}"
+ fi
+
+ cleanup_tcpdump
+ cleanup_v6
+ set +e
+}
+
+ndisc_test_untracked_combinations() {
+ ndisc_test_untracked_advertisements 0
+ ndisc_test_untracked_advertisements 1
+ ndisc_test_untracked_advertisements 2 0
+ ndisc_test_untracked_advertisements 2 1
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+ echo "SKIP: Could not run test without arping tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+cleanup_v6 &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ arp_test_gratuitous_combinations|arp) arp_test_gratuitous_combinations;;
+ ndisc_test_untracked_combinations|ndisc) \
+ ndisc_test_untracked_combinations;;
+ help) echo "Test names: $TESTS"; exit 0;;
+esac
+done
diff --git a/tools/testing/selftests/net/bareudp.sh b/tools/testing/selftests/net/bareudp.sh
new file mode 100755
index 000000000000..f366cadbc5e8
--- /dev/null
+++ b/tools/testing/selftests/net/bareudp.sh
@@ -0,0 +1,546 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test various bareudp tunnel configurations.
+#
+# The bareudp module allows to tunnel network protocols like IP or MPLS over
+# UDP, without adding any intermediate header. This scripts tests several
+# configurations of bareudp (using IPv4 or IPv6 as underlay and transporting
+# IPv4, IPv6 or MPLS packets on the overlay).
+#
+# Network topology:
+#
+# * A chain of 4 network namespaces, connected with veth pairs. Each veth
+# is assigned an IPv4 and an IPv6 address. A host-route allows a veth to
+# join its peer.
+#
+# * NS0 and NS3 are at the extremities of the chain. They have additional
+# IPv4 and IPv6 addresses on their loopback device. Routes are added in NS0
+# and NS3, so that they can communicate using these overlay IP addresses.
+# For IPv4 and IPv6 reachability tests, the route simply sets the peer's
+# veth address as gateway. For MPLS reachability tests, an MPLS header is
+# also pushed before the IP header.
+#
+# * NS1 and NS2 are the intermediate namespaces. They use a bareudp device to
+# encapsulate the traffic into UDP.
+#
+# +-----------------------------------------------------------------------+
+# | NS0 |
+# | |
+# | lo: |
+# | * IPv4 address: 192.0.2.100/32 |
+# | * IPv6 address: 2001:db8::100/128 |
+# | * IPv6 address: 2001:db8::200/128 |
+# | * IPv4 route: 192.0.2.103/32 reachable via 192.0.2.11 |
+# | * IPv6 route: 2001:db8::103/128 reachable via 2001:db8::11 |
+# | * IPv6 route: 2001:db8::203/128 reachable via 2001:db8::11 |
+# | (encapsulated with MPLS label 203) |
+# | |
+# | veth01: |
+# | ^ * IPv4 address: 192.0.2.10, peer 192.0.2.11/32 |
+# | | * IPv6 address: 2001:db8::10, peer 2001:db8::11/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test)
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS1 |
+# | | |
+# | v |
+# | veth10: |
+# | * IPv4 address: 192.0.2.11, peer 192.0.2.10/32 |
+# | * IPv6 address: 2001:db8::11, peer 2001:db8::10/128 |
+# | |
+# | bareudp_ns1: |
+# | * Encapsulate IP or MPLS packets received on veth10 into UDP |
+# | and send the resulting packets through veth12. |
+# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) |
+# | received on veth12 and send the inner packets through veth10. |
+# | |
+# | veth12: |
+# | ^ * IPv4 address: 192.0.2.21, peer 192.0.2.22/32 |
+# | | * IPv6 address: 2001:db8::21, peer 2001:db8::22/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test), over UDP
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS2 |
+# | | |
+# | v |
+# | veth21: |
+# | * IPv4 address: 192.0.2.22, peer 192.0.2.21/32 |
+# | * IPv6 address: 2001:db8::22, peer 2001:db8::21/128 |
+# | |
+# | bareudp_ns2: |
+# | * Decapsulate bareudp packets (either IP or MPLS, over UDP) |
+# | received on veth21 and send the inner packets through veth23. |
+# | * Encapsulate IP or MPLS packets received on veth23 into UDP |
+# | and send the resulting packets through veth21. |
+# | |
+# | veth23: |
+# | ^ * IPv4 address: 192.0.2.32, peer 192.0.2.33/32 |
+# | | * IPv6 address: 2001:db8::32, peer 2001:db8::33/128 |
+# | | |
+# +---+-------------------------------------------------------------------+
+# |
+# | Traffic type: IP or MPLS (depending on test)
+# |
+# +---+-------------------------------------------------------------------+
+# | | NS3 |
+# | v |
+# | veth32: |
+# | * IPv4 address: 192.0.2.33, peer 192.0.2.32/32 |
+# | * IPv6 address: 2001:db8::33, peer 2001:db8::32/128 |
+# | |
+# | lo: |
+# | * IPv4 address: 192.0.2.103/32 |
+# | * IPv6 address: 2001:db8::103/128 |
+# | * IPv6 address: 2001:db8::203/128 |
+# | * IPv4 route: 192.0.2.100/32 reachable via 192.0.2.32 |
+# | * IPv6 route: 2001:db8::100/128 reachable via 2001:db8::32 |
+# | * IPv6 route: 2001:db8::200/128 reachable via 2001:db8::32 |
+# | (encapsulated with MPLS label 200) |
+# | |
+# +-----------------------------------------------------------------------+
+
+ERR=4 # Return 4 by default, which is the SKIP code for kselftest
+PING6="ping"
+PAUSE_ON_FAIL="no"
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+readonly NS2=$(mktemp -u ns2-XXXXXXXX)
+readonly NS3=$(mktemp -u ns3-XXXXXXXX)
+
+# Exit the script after having removed the network namespaces it created
+#
+# Parameters:
+#
+# * The list of network namespaces to delete before exiting.
+#
+exit_cleanup()
+{
+ for ns in "$@"; do
+ ip netns delete "${ns}" 2>/dev/null || true
+ done
+
+ if [ "${ERR}" -eq 4 ]; then
+ echo "Error: Setting up the testing environment failed." >&2
+ fi
+
+ exit "${ERR}"
+}
+
+# Create the four network namespaces used by the script (NS0, NS1, NS2 and NS3)
+#
+# New namespaces are cleaned up manually in case of error, to ensure that only
+# namespaces created by this script are deleted.
+create_namespaces()
+{
+ ip netns add "${NS0}" || exit_cleanup
+ ip netns add "${NS1}" || exit_cleanup "${NS0}"
+ ip netns add "${NS2}" || exit_cleanup "${NS0}" "${NS1}"
+ ip netns add "${NS3}" || exit_cleanup "${NS0}" "${NS1}" "${NS2}"
+}
+
+# The trap function handler
+#
+exit_cleanup_all()
+{
+ exit_cleanup "${NS0}" "${NS1}" "${NS2}" "${NS3}"
+}
+
+# Configure a network interface using a host route
+#
+# Parameters
+#
+# * $1: the netns the network interface resides in,
+# * $2: the network interface name,
+# * $3: the local IPv4 address to assign to this interface,
+# * $4: the IPv4 address of the remote network interface,
+# * $5: the local IPv6 address to assign to this interface,
+# * $6: the IPv6 address of the remote network interface.
+#
+iface_config()
+{
+ local NS="${1}"; readonly NS
+ local DEV="${2}"; readonly DEV
+ local LOCAL_IP4="${3}"; readonly LOCAL_IP4
+ local PEER_IP4="${4}"; readonly PEER_IP4
+ local LOCAL_IP6="${5}"; readonly LOCAL_IP6
+ local PEER_IP6="${6}"; readonly PEER_IP6
+
+ ip -netns "${NS}" link set dev "${DEV}" up
+ ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP4}" peer "${PEER_IP4}"
+ ip -netns "${NS}" address add dev "${DEV}" "${LOCAL_IP6}" peer "${PEER_IP6}" nodad
+}
+
+# Create base networking topology:
+#
+# * set up the loopback device in all network namespaces (NS0..NS3),
+# * set up a veth pair to connect each netns in sequence (NS0 with NS1,
+# NS1 with NS2, etc.),
+# * add and IPv4 and an IPv6 address on each veth interface,
+# * prepare the ingress qdiscs in the intermediate namespaces.
+#
+setup_underlay()
+{
+ for ns in "${NS0}" "${NS1}" "${NS2}" "${NS3}"; do
+ ip -netns "${ns}" link set dev lo up
+ done;
+
+ ip link add name veth01 netns "${NS0}" type veth peer name veth10 netns "${NS1}"
+ ip link add name veth12 netns "${NS1}" type veth peer name veth21 netns "${NS2}"
+ ip link add name veth23 netns "${NS2}" type veth peer name veth32 netns "${NS3}"
+ iface_config "${NS0}" veth01 192.0.2.10 192.0.2.11/32 2001:db8::10 2001:db8::11/128
+ iface_config "${NS1}" veth10 192.0.2.11 192.0.2.10/32 2001:db8::11 2001:db8::10/128
+ iface_config "${NS1}" veth12 192.0.2.21 192.0.2.22/32 2001:db8::21 2001:db8::22/128
+ iface_config "${NS2}" veth21 192.0.2.22 192.0.2.21/32 2001:db8::22 2001:db8::21/128
+ iface_config "${NS2}" veth23 192.0.2.32 192.0.2.33/32 2001:db8::32 2001:db8::33/128
+ iface_config "${NS3}" veth32 192.0.2.33 192.0.2.32/32 2001:db8::33 2001:db8::32/128
+
+ tc -netns "${NS1}" qdisc add dev veth10 ingress
+ tc -netns "${NS2}" qdisc add dev veth23 ingress
+}
+
+# Set up the IPv4, IPv6 and MPLS overlays.
+#
+# Configuration is similar for all protocols:
+#
+# * add an overlay IP address on the loopback interface of each edge
+# namespace,
+# * route these IP addresses via the intermediate namespaces (for the MPLS
+# tests, this is also where MPLS encapsulation is done),
+# * add routes for these IP addresses (or MPLS labels) in the intermediate
+# namespaces.
+#
+# The bareudp encapsulation isn't configured in setup_overlay_*(). That will be
+# done just before running the reachability tests.
+
+setup_overlay_ipv4()
+{
+ # Add the overlay IP addresses and route them through the veth devices
+ ip -netns "${NS0}" address add 192.0.2.100/32 dev lo
+ ip -netns "${NS3}" address add 192.0.2.103/32 dev lo
+ ip -netns "${NS0}" route add 192.0.2.103/32 src 192.0.2.100 via 192.0.2.11
+ ip -netns "${NS3}" route add 192.0.2.100/32 src 192.0.2.103 via 192.0.2.32
+
+ # Route the overlay addresses in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.ip_forward=1
+ ip -netns "${NS1}" route add 192.0.2.100/32 via 192.0.2.10
+ ip -netns "${NS2}" route add 192.0.2.103/32 via 192.0.2.33
+
+ # The intermediate namespaces don't have routes for the reverse path,
+ # as it will be handled by tc. So we need to ensure that rp_filter is
+ # not going to block the traffic.
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${NS1}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${NS2}" sysctl -qw net.ipv4.conf.default.rp_filter=0
+}
+
+setup_overlay_ipv6()
+{
+ # Add the overlay IP addresses and route them through the veth devices
+ ip -netns "${NS0}" address add 2001:db8::100/128 dev lo
+ ip -netns "${NS3}" address add 2001:db8::103/128 dev lo
+ ip -netns "${NS0}" route add 2001:db8::103/128 src 2001:db8::100 via 2001:db8::11
+ ip -netns "${NS3}" route add 2001:db8::100/128 src 2001:db8::103 via 2001:db8::32
+
+ # Route the overlay addresses in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec "${NS2}" sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip -netns "${NS1}" route add 2001:db8::100/128 via 2001:db8::10
+ ip -netns "${NS2}" route add 2001:db8::103/128 via 2001:db8::33
+}
+
+setup_overlay_mpls()
+{
+ # Add specific overlay IP addresses, routed over MPLS
+ ip -netns "${NS0}" address add 2001:db8::200/128 dev lo
+ ip -netns "${NS3}" address add 2001:db8::203/128 dev lo
+ ip -netns "${NS0}" route add 2001:db8::203/128 src 2001:db8::200 encap mpls 203 via 2001:db8::11
+ ip -netns "${NS3}" route add 2001:db8::200/128 src 2001:db8::203 encap mpls 200 via 2001:db8::32
+
+ # Route the MPLS packets in the intermediate namespaces
+ # (used after bareudp decapsulation)
+ ip netns exec "${NS1}" sysctl -qw net.mpls.platform_labels=256
+ ip netns exec "${NS2}" sysctl -qw net.mpls.platform_labels=256
+ ip -netns "${NS1}" -family mpls route add 200 via inet6 2001:db8::10
+ ip -netns "${NS2}" -family mpls route add 203 via inet6 2001:db8::33
+}
+
+# Run "ping" from NS0 and print the result
+#
+# Parameters:
+#
+# * $1: the variant of ping to use (normally either "ping" or "ping6"),
+# * $2: the IP address to ping,
+# * $3: a human readable description of the purpose of the test.
+#
+# If the test fails and PAUSE_ON_FAIL is active, the user is given the
+# possibility to continue with the next test or to quit immediately.
+#
+ping_test_one()
+{
+ local PING="$1"; readonly PING
+ local IP="$2"; readonly IP
+ local MSG="$3"; readonly MSG
+ local RET
+
+ printf "TEST: %-60s " "${MSG}"
+
+ set +e
+ ip netns exec "${NS0}" "${PING}" -w 5 -c 1 "${IP}" > /dev/null 2>&1
+ RET=$?
+ set -e
+
+ if [ "${RET}" -eq 0 ]; then
+ printf "[ OK ]\n"
+ else
+ ERR=1
+ printf "[FAIL]\n"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ printf "\nHit enter to continue, 'q' to quit\n"
+ read a
+ if [ "$a" = "q" ]; then
+ exit 1
+ fi
+ fi
+ fi
+}
+
+# Run reachability tests
+#
+# Parameters:
+#
+# * $1: human readable string describing the underlay protocol.
+#
+# $IPV4, $IPV6, $MPLS_UC and $MULTIPROTO are inherited from the calling
+# function.
+#
+ping_test()
+{
+ local UNDERLAY="$1"; readonly UNDERLAY
+ local MODE
+ local MSG
+
+ if [ "${MULTIPROTO}" = "multiproto" ]; then
+ MODE=" (multiproto mode)"
+ else
+ MODE=""
+ fi
+
+ if [ $IPV4 ]; then
+ ping_test_one "ping" "192.0.2.103" "IPv4 packets over ${UNDERLAY}${MODE}"
+ fi
+ if [ $IPV6 ]; then
+ ping_test_one "${PING6}" "2001:db8::103" "IPv6 packets over ${UNDERLAY}${MODE}"
+ fi
+ if [ $MPLS_UC ]; then
+ ping_test_one "${PING6}" "2001:db8::203" "Unicast MPLS packets over ${UNDERLAY}${MODE}"
+ fi
+}
+
+# Set up a bareudp overlay and run reachability tests over IPv4 and IPv6
+#
+# Parameters:
+#
+# * $1: the packet type (protocol) to be handled by bareudp,
+# * $2: a flag to activate or deactivate bareudp's "multiproto" mode.
+#
+test_overlay()
+{
+ local ETHERTYPE="$1"; readonly ETHERTYPE
+ local MULTIPROTO="$2"; readonly MULTIPROTO
+ local IPV4
+ local IPV6
+ local MPLS_UC
+
+ case "${ETHERTYPE}" in
+ "ipv4")
+ IPV4="ipv4"
+ if [ "${MULTIPROTO}" = "multiproto" ]; then
+ IPV6="ipv6"
+ else
+ IPV6=""
+ fi
+ MPLS_UC=""
+ ;;
+ "ipv6")
+ IPV6="ipv6"
+ IPV4=""
+ MPLS_UC=""
+ ;;
+ "mpls_uc")
+ MPLS_UC="mpls_uc"
+ IPV4=""
+ IPV6=""
+ ;;
+ *)
+ exit 1
+ ;;
+ esac
+ readonly IPV4
+ readonly IPV6
+ readonly MPLS_UC
+
+ # Create the bareudp devices in the intermediate namespaces
+ ip -netns "${NS1}" link add name bareudp_ns1 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+ ip -netns "${NS2}" link add name bareudp_ns2 up type bareudp dstport 6635 ethertype "${ETHERTYPE}" "${MULTIPROTO}"
+
+ # IPv4 over UDPv4
+ if [ $IPV4 ]; then
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.103/32 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.100/32 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # IPv6 over UDPv4
+ if [ $IPV6 ]; then
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::103/128 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::100/128 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # MPLS (unicast) over UDPv4
+ if [ $MPLS_UC ]; then
+ ip netns exec "${NS1}" sysctl -qw net.mpls.conf.bareudp_ns1.input=1
+ ip netns exec "${NS2}" sysctl -qw net.mpls.conf.bareudp_ns2.input=1
+
+ # Encapsulation instructions for bareudp over IPv4
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \
+ flower mpls_label 203 \
+ action tunnel_key set src_ip 192.0.2.21 dst_ip 192.0.2.22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \
+ flower mpls_label 200 \
+ action tunnel_key set src_ip 192.0.2.22 dst_ip 192.0.2.21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # Test IPv4 underlay
+ ping_test "UDPv4"
+
+ # Cleanup bareudp encapsulation instructions, as they were specific to
+ # the IPv4 underlay, before setting up and testing the IPv6 underlay
+ tc -netns "${NS1}" filter delete dev veth10 ingress
+ tc -netns "${NS2}" filter delete dev veth23 ingress
+
+ # IPv4 over UDPv6
+ if [ $IPV4 ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.103/32 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv4 \
+ flower dst_ip 192.0.2.100/32 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # IPv6 over UDPv6
+ if [ $IPV6 ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::103/128 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol ipv6 \
+ flower dst_ip 2001:db8::100/128 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # MPLS (unicast) over UDPv6
+ if [ $MPLS_UC ]; then
+ # New encapsulation instructions for bareudp over IPv6
+ tc -netns "${NS1}" filter add dev veth10 ingress protocol mpls_uc \
+ flower mpls_label 203 \
+ action tunnel_key set src_ip 2001:db8::21 dst_ip 2001:db8::22 id 0 \
+ action mirred egress redirect dev bareudp_ns1
+ tc -netns "${NS2}" filter add dev veth23 ingress protocol mpls_uc \
+ flower mpls_label 200 \
+ action tunnel_key set src_ip 2001:db8::22 dst_ip 2001:db8::21 id 0 \
+ action mirred egress redirect dev bareudp_ns2
+ fi
+
+ # Test IPv6 underlay
+ ping_test "UDPv6"
+
+ tc -netns "${NS1}" filter delete dev veth10 ingress
+ tc -netns "${NS2}" filter delete dev veth23 ingress
+ ip -netns "${NS1}" link delete bareudp_ns1
+ ip -netns "${NS2}" link delete bareudp_ns2
+}
+
+check_features()
+{
+ ip link help 2>&1 | grep -q bareudp
+ if [ $? -ne 0 ]; then
+ echo "Missing bareudp support in iproute2" >&2
+ exit_cleanup
+ fi
+
+ # Use ping6 on systems where ping doesn't handle IPv6
+ ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING6="ping6"
+}
+
+usage()
+{
+ echo "Usage: $0 [-p]"
+ exit 1
+}
+
+while getopts :p o
+do
+ case $o in
+ p) PAUSE_ON_FAIL="yes";;
+ *) usage;;
+ esac
+done
+
+check_features
+
+# Create namespaces before setting up the exit trap.
+# Otherwise, exit_cleanup_all() could delete namespaces that were not created
+# by this script.
+create_namespaces
+
+set -e
+trap exit_cleanup_all EXIT
+
+setup_underlay
+setup_overlay_ipv4
+setup_overlay_ipv6
+setup_overlay_mpls
+
+test_overlay ipv4 nomultiproto
+test_overlay ipv6 nomultiproto
+test_overlay ipv4 multiproto
+test_overlay mpls_uc nomultiproto
+
+if [ "${ERR}" -eq 1 ]; then
+ echo "Some tests failed." >&2
+else
+ ERR=0
+fi
diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh
new file mode 100755
index 000000000000..2db9d15cd45f
--- /dev/null
+++ b/tools/testing/selftests/net/big_tcp.sh
@@ -0,0 +1,182 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For IPv4 and IPv6 BIG TCP.
+# TOPO: CLIENT_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) SERVER_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP4="198.51.100.1"
+CLIENT_IP6="2001:db8:1::1"
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP4="203.0.113.1"
+SERVER_IP6="2001:db8:2::1"
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+SERVER_GW4="203.0.113.2"
+CLIENT_GW4="198.51.100.2"
+SERVER_GW6="2001:db8:2::2"
+CLIENT_GW6="2001:db8:1::2"
+
+MAX_SIZE=128000
+CHK_SIZE=65535
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+setup() {
+ ip netns add $CLIENT_NS
+ ip netns add $SERVER_NS
+ ip netns add $ROUTER_NS
+ ip -net $ROUTER_NS link add link1 type veth peer name link0 netns $CLIENT_NS
+ ip -net $ROUTER_NS link add link2 type veth peer name link3 netns $SERVER_NS
+
+ ip -net $CLIENT_NS link set link0 up
+ ip -net $CLIENT_NS link set link0 mtu 1442
+ ip -net $CLIENT_NS addr add $CLIENT_IP4/24 dev link0
+ ip -net $CLIENT_NS addr add $CLIENT_IP6/64 dev link0 nodad
+ ip -net $CLIENT_NS route add $SERVER_IP4 dev link0 via $CLIENT_GW4
+ ip -net $CLIENT_NS route add $SERVER_IP6 dev link0 via $CLIENT_GW6
+ ip -net $CLIENT_NS link set dev link0 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $CLIENT_NS link set dev link0 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip net exec $CLIENT_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+
+ ip -net $ROUTER_NS link set link1 up
+ ip -net $ROUTER_NS link set link2 up
+ ip -net $ROUTER_NS addr add $CLIENT_GW4/24 dev link1
+ ip -net $ROUTER_NS addr add $CLIENT_GW6/64 dev link1 nodad
+ ip -net $ROUTER_NS addr add $SERVER_GW4/24 dev link2
+ ip -net $ROUTER_NS addr add $SERVER_GW6/64 dev link2 nodad
+ ip -net $ROUTER_NS link set dev link1 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link2 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link1 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip -net $ROUTER_NS link set dev link2 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ # test for nf_ct_skb_network_trim in nf_conntrack_ovs used by TC ct action.
+ ip net exec $ROUTER_NS tc qdisc add dev link1 ingress
+ ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+ proto ip flower ip_proto tcp action ct
+ ip net exec $ROUTER_NS tc filter add dev link1 ingress \
+ proto ipv6 flower ip_proto tcp action ct
+ ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+ ip net exec $ROUTER_NS sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip -net $SERVER_NS link set link3 up
+ ip -net $SERVER_NS addr add $SERVER_IP4/24 dev link3
+ ip -net $SERVER_NS addr add $SERVER_IP6/64 dev link3 nodad
+ ip -net $SERVER_NS route add $CLIENT_IP4 dev link3 via $SERVER_GW4
+ ip -net $SERVER_NS route add $CLIENT_IP6 dev link3 via $SERVER_GW6
+ ip -net $SERVER_NS link set dev link3 \
+ gro_ipv4_max_size $MAX_SIZE gso_ipv4_max_size $MAX_SIZE
+ ip -net $SERVER_NS link set dev link3 \
+ gro_max_size $MAX_SIZE gso_max_size $MAX_SIZE
+ ip net exec $SERVER_NS sysctl -wq net.ipv4.tcp_window_scaling=10
+ ip net exec $SERVER_NS netserver 2>&1 >/dev/null
+}
+
+cleanup() {
+ ip net exec $SERVER_NS pkill netserver
+ ip -net $ROUTER_NS link del link1
+ ip -net $ROUTER_NS link del link2
+ ip netns del "$CLIENT_NS"
+ ip netns del "$SERVER_NS"
+ ip netns del "$ROUTER_NS"
+}
+
+start_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ ip net exec $netns $ipt -t raw -A PREROUTING -i $iface \
+ -m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+check_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ test `ip net exec $netns $ipt -t raw -L -v |grep $iface | awk '{print $1}'` != "0"
+}
+
+stop_counter() {
+ local ipt="iptables"
+ local iface=$1
+ local netns=$2
+
+ [ "$NF" = "6" ] && ipt="ip6tables"
+ ip net exec $netns $ipt -t raw -D PREROUTING -i $iface \
+ -m length ! --length 0:$CHK_SIZE -j ACCEPT
+}
+
+do_netperf() {
+ local serip=$SERVER_IP4
+ local netns=$1
+
+ [ "$NF" = "6" ] && serip=$SERVER_IP6
+
+ # use large write to be sure to generate big tcp packets
+ ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null
+}
+
+do_test() {
+ local cli_tso=$1
+ local gw_gro=$2
+ local gw_tso=$3
+ local ser_gro=$4
+ local ret="PASS"
+
+ ip net exec $CLIENT_NS ethtool -K link0 tso $cli_tso
+ ip net exec $ROUTER_NS ethtool -K link1 gro $gw_gro
+ ip net exec $ROUTER_NS ethtool -K link2 tso $gw_tso
+ ip net exec $SERVER_NS ethtool -K link3 gro $ser_gro
+
+ start_counter link1 $ROUTER_NS
+ start_counter link3 $SERVER_NS
+ do_netperf $CLIENT_NS
+
+ if check_counter link1 $ROUTER_NS; then
+ check_counter link3 $SERVER_NS || ret="FAIL_on_link3"
+ else
+ ret="FAIL_on_link1"
+ fi
+
+ stop_counter link1 $ROUTER_NS
+ stop_counter link3 $SERVER_NS
+ printf "%-9s %-8s %-8s %-8s: [%s]\n" \
+ $cli_tso $gw_gro $gw_tso $ser_gro $ret
+ test $ret = "PASS"
+}
+
+testup() {
+ echo "CLI GSO | GW GRO | GW GSO | SER GRO" && \
+ do_test "on" "on" "on" "on" && \
+ do_test "on" "off" "on" "off" && \
+ do_test "off" "on" "on" "on" && \
+ do_test "on" "on" "off" "on" && \
+ do_test "off" "on" "off" "on"
+}
+
+if ! netperf -V &> /dev/null; then
+ echo "SKIP: Could not run test without netperf tool"
+ exit $ksft_skip
+fi
+
+if ! ip link help 2>&1 | grep gso_ipv4_max_size &> /dev/null; then
+ echo "SKIP: Could not run test without gso/gro_ipv4_max_size supported in ip-link"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+setup && echo "Testing for BIG TCP:" && \
+NF=4 testup && echo "***v4 Tests Done***" && \
+NF=6 testup && echo "***v6 Tests Done***"
+exit $?
diff --git a/tools/testing/selftests/net/bind_bhash.c b/tools/testing/selftests/net/bind_bhash.c
new file mode 100644
index 000000000000..57ff67a3751e
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This times how long it takes to bind to a port when the port already
+ * has multiple sockets in its bhash table.
+ *
+ * In the setup(), we populate the port's bhash table with
+ * MAX_THREADS * MAX_CONNECTIONS number of entries.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <netdb.h>
+#include <pthread.h>
+#include <string.h>
+#include <stdbool.h>
+
+#define MAX_THREADS 600
+#define MAX_CONNECTIONS 40
+
+static const char *setup_addr_v6 = "::1";
+static const char *setup_addr_v4 = "127.0.0.1";
+static const char *setup_addr;
+static const char *bind_addr;
+static const char *port;
+bool use_v6;
+int ret;
+
+static int fd_array[MAX_THREADS][MAX_CONNECTIONS];
+
+static int bind_socket(int opt, const char *addr)
+{
+ struct addrinfo *res, hint = {};
+ int sock_fd, reuse = 1, err;
+ int domain = use_v6 ? AF_INET6 : AF_INET;
+
+ sock_fd = socket(domain, SOCK_STREAM, 0);
+ if (sock_fd < 0) {
+ perror("socket fd err");
+ return sock_fd;
+ }
+
+ hint.ai_family = domain;
+ hint.ai_socktype = SOCK_STREAM;
+
+ err = getaddrinfo(addr, port, &hint, &res);
+ if (err) {
+ perror("getaddrinfo failed");
+ goto cleanup;
+ }
+
+ if (opt) {
+ err = setsockopt(sock_fd, SOL_SOCKET, opt, &reuse, sizeof(reuse));
+ if (err) {
+ perror("setsockopt failed");
+ goto cleanup;
+ }
+ }
+
+ err = bind(sock_fd, res->ai_addr, res->ai_addrlen);
+ if (err) {
+ perror("failed to bind to port");
+ goto cleanup;
+ }
+
+ return sock_fd;
+
+cleanup:
+ close(sock_fd);
+ return err;
+}
+
+static void *setup(void *arg)
+{
+ int sock_fd, i;
+ int *array = (int *)arg;
+
+ for (i = 0; i < MAX_CONNECTIONS; i++) {
+ sock_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ if (sock_fd < 0) {
+ ret = sock_fd;
+ pthread_exit(&ret);
+ }
+ array[i] = sock_fd;
+ }
+
+ return NULL;
+}
+
+int main(int argc, const char *argv[])
+{
+ int listener_fd, sock_fd, i, j;
+ pthread_t tid[MAX_THREADS];
+ clock_t begin, end;
+
+ if (argc != 4) {
+ printf("Usage: listener <port> <ipv6 | ipv4> <bind-addr>\n");
+ return -1;
+ }
+
+ port = argv[1];
+ use_v6 = strcmp(argv[2], "ipv6") == 0;
+ bind_addr = argv[3];
+
+ setup_addr = use_v6 ? setup_addr_v6 : setup_addr_v4;
+
+ listener_fd = bind_socket(SO_REUSEADDR | SO_REUSEPORT, setup_addr);
+ if (listen(listener_fd, 100) < 0) {
+ perror("listen failed");
+ return -1;
+ }
+
+ /* Set up threads to populate the bhash table entry for the port */
+ for (i = 0; i < MAX_THREADS; i++)
+ pthread_create(&tid[i], NULL, setup, fd_array[i]);
+
+ for (i = 0; i < MAX_THREADS; i++)
+ pthread_join(tid[i], NULL);
+
+ if (ret)
+ goto done;
+
+ begin = clock();
+
+ /* Bind to the same port on a different address */
+ sock_fd = bind_socket(0, bind_addr);
+ if (sock_fd < 0)
+ goto done;
+
+ end = clock();
+
+ printf("time spent = %f\n", (double)(end - begin) / CLOCKS_PER_SEC);
+
+ /* clean up */
+ close(sock_fd);
+
+done:
+ close(listener_fd);
+ for (i = 0; i < MAX_THREADS; i++) {
+ for (j = 0; i < MAX_THREADS; i++)
+ close(fd_array[i][j]);
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/bind_bhash.sh b/tools/testing/selftests/net/bind_bhash.sh
new file mode 100755
index 000000000000..a28563bdaae0
--- /dev/null
+++ b/tools/testing/selftests/net/bind_bhash.sh
@@ -0,0 +1,68 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+NR_FILES=32768
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+# default values
+port=443
+addr_v6="2001:0db8:0:f101::1"
+addr_v4="10.8.8.8"
+use_v6=true
+addr=""
+
+usage() {
+ echo "Usage: $0 [-6 | -4] [-p port] [-a address]"
+ echo -e "\t6: use ipv6"
+ echo -e "\t4: use ipv4"
+ echo -e "\tport: Port number"
+ echo -e "\taddress: ip address"
+}
+
+while getopts "ha:p:64" opt; do
+ case ${opt} in
+ h)
+ usage $0
+ exit 0
+ ;;
+ a) addr=$OPTARG;;
+ p)
+ port=$OPTARG;;
+ 6)
+ use_v6=true;;
+ 4)
+ use_v6=false;;
+ esac
+done
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link add veth0 type veth peer name veth1
+ ip -netns "${NETNS}" link set lo up
+ ip -netns "${NETNS}" link set veth0 up
+ ip -netns "${NETNS}" link set veth1 up
+
+ if [[ "$use_v6" == true ]]; then
+ ip -netns "${NETNS}" addr add $addr_v6 nodad dev veth0
+ else
+ ip -netns "${NETNS}" addr add $addr_v4 dev lo
+ fi
+}
+
+cleanup() {
+ ip netns del "${NETNS}"
+}
+
+if [[ "$addr" != "" ]]; then
+ addr_v4=$addr;
+ addr_v6=$addr;
+fi
+setup
+if [[ "$use_v6" == true ]] ; then
+ ip netns exec "${NETNS}" sh -c \
+ "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv6 ${addr_v6}"
+else
+ ip netns exec "${NETNS}" sh -c \
+ "ulimit -n ${NR_FILES};./bind_bhash ${port} ipv4 ${addr_v4}"
+fi
+cleanup
diff --git a/tools/testing/selftests/net/bind_timewait.c b/tools/testing/selftests/net/bind_timewait.c
new file mode 100644
index 000000000000..cb9fdf51ea59
--- /dev/null
+++ b/tools/testing/selftests/net/bind_timewait.c
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(bind_timewait)
+{
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+};
+
+FIXTURE_VARIANT(bind_timewait)
+{
+ __u32 addr_const;
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, localhost)
+{
+ .addr_const = INADDR_LOOPBACK
+};
+
+FIXTURE_VARIANT_ADD(bind_timewait, addrany)
+{
+ .addr_const = INADDR_ANY
+};
+
+FIXTURE_SETUP(bind_timewait)
+{
+ self->addr.sin_family = AF_INET;
+ self->addr.sin_port = 0;
+ self->addr.sin_addr.s_addr = htonl(variant->addr_const);
+ self->addrlen = sizeof(self->addr);
+}
+
+FIXTURE_TEARDOWN(bind_timewait)
+{
+}
+
+void create_timewait_socket(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_timewait) *self)
+{
+ int server_fd, client_fd, child_fd, ret;
+ struct sockaddr_in addr;
+ socklen_t addrlen;
+
+ server_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(server_fd, 0);
+
+ ret = bind(server_fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ ret = listen(server_fd, 1);
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockname(server_fd, (struct sockaddr *)&self->addr, &self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ client_fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(client_fd, 0);
+
+ ret = connect(client_fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ addrlen = sizeof(addr);
+ child_fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+ ASSERT_GT(child_fd, 0);
+
+ close(child_fd);
+ close(client_fd);
+ close(server_fd);
+}
+
+TEST_F(bind_timewait, 1)
+{
+ int fd, ret;
+
+ create_timewait_socket(_metadata, self);
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_GT(fd, 0);
+
+ ret = bind(fd, (struct sockaddr *)&self->addr, self->addrlen);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EADDRINUSE);
+
+ close(fd);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c
new file mode 100644
index 000000000000..a2662348cdb1
--- /dev/null
+++ b/tools/testing/selftests/net/bind_wildcard.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+
+#include <sys/socket.h>
+#include <netinet/in.h>
+
+#include "../kselftest_harness.h"
+
+struct in6_addr in6addr_v4mapped_any = {
+ .s6_addr = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 255, 255,
+ 0, 0, 0, 0
+ }
+};
+
+struct in6_addr in6addr_v4mapped_loopback = {
+ .s6_addr = {
+ 0, 0, 0, 0,
+ 0, 0, 0, 0,
+ 0, 0, 255, 255,
+ 127, 0, 0, 1
+ }
+};
+
+FIXTURE(bind_wildcard)
+{
+ struct sockaddr_in addr4;
+ struct sockaddr_in6 addr6;
+};
+
+FIXTURE_VARIANT(bind_wildcard)
+{
+ const __u32 addr4_const;
+ const struct in6_addr *addr6_const;
+ int expected_errno;
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any)
+{
+ .addr4_const = INADDR_ANY,
+ .addr6_const = &in6addr_any,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local)
+{
+ .addr4_const = INADDR_ANY,
+ .addr6_const = &in6addr_loopback,
+ .expected_errno = 0,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any)
+{
+ .addr4_const = INADDR_ANY,
+ .addr6_const = &in6addr_v4mapped_any,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local)
+{
+ .addr4_const = INADDR_ANY,
+ .addr6_const = &in6addr_v4mapped_loopback,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any)
+{
+ .addr4_const = INADDR_LOOPBACK,
+ .addr6_const = &in6addr_any,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local)
+{
+ .addr4_const = INADDR_LOOPBACK,
+ .addr6_const = &in6addr_loopback,
+ .expected_errno = 0,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any)
+{
+ .addr4_const = INADDR_LOOPBACK,
+ .addr6_const = &in6addr_v4mapped_any,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local)
+{
+ .addr4_const = INADDR_LOOPBACK,
+ .addr6_const = &in6addr_v4mapped_loopback,
+ .expected_errno = EADDRINUSE,
+};
+
+FIXTURE_SETUP(bind_wildcard)
+{
+ self->addr4.sin_family = AF_INET;
+ self->addr4.sin_port = htons(0);
+ self->addr4.sin_addr.s_addr = htonl(variant->addr4_const);
+
+ self->addr6.sin6_family = AF_INET6;
+ self->addr6.sin6_port = htons(0);
+ self->addr6.sin6_addr = *variant->addr6_const;
+}
+
+FIXTURE_TEARDOWN(bind_wildcard)
+{
+}
+
+void bind_sockets(struct __test_metadata *_metadata,
+ FIXTURE_DATA(bind_wildcard) *self,
+ int expected_errno,
+ struct sockaddr *addr1, socklen_t addrlen1,
+ struct sockaddr *addr2, socklen_t addrlen2)
+{
+ int fd[2];
+ int ret;
+
+ fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(fd[0], 0);
+
+ ret = bind(fd[0], addr1, addrlen1);
+ ASSERT_EQ(ret, 0);
+
+ ret = getsockname(fd[0], addr1, &addrlen1);
+ ASSERT_EQ(ret, 0);
+
+ ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port;
+
+ fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0);
+ ASSERT_GT(fd[1], 0);
+
+ ret = bind(fd[1], addr2, addrlen2);
+ if (expected_errno) {
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, expected_errno);
+ } else {
+ ASSERT_EQ(ret, 0);
+ }
+
+ close(fd[1]);
+ close(fd[0]);
+}
+
+TEST_F(bind_wildcard, v4_v6)
+{
+ bind_sockets(_metadata, self, variant->expected_errno,
+ (struct sockaddr *)&self->addr4, sizeof(self->addr4),
+ (struct sockaddr *)&self->addr6, sizeof(self->addr6));
+}
+
+TEST_F(bind_wildcard, v6_v4)
+{
+ bind_sockets(_metadata, self, variant->expected_errno,
+ (struct sockaddr *)&self->addr6, sizeof(self->addr6),
+ (struct sockaddr *)&self->addr4, sizeof(self->addr4));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh
new file mode 100755
index 000000000000..8bc23fb4c82b
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_ipv6.sh
@@ -0,0 +1,154 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+TMPF=$(mktemp --suffix ".pcap")
+
+cleanup()
+{
+ rm -f $TMPF
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+tcpdump -h | grep immediate-mode >> /dev/null
+if [ $? -ne 0 ]; then
+ echo "SKIP - tcpdump with --immediate-mode option required"
+ exit $ksft_skip
+fi
+
+# Namespaces
+setup_ns NS
+NSEXE="ip netns exec $NS"
+
+$NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne $2 ]; then
+ echo " Case $3 returned $1, expected $2"
+ ((BAD++))
+ fi
+}
+
+# IPV6_DONTFRAG
+for ovr in setsock cmsg both diff; do
+ for df in 0 1; do
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-F $df"
+ [ $ovr == "cmsg" ] && m="-f $df"
+ [ $ovr == "both" ] && m="-F $df -f $df"
+ [ $ovr == "diff" ] && m="-F $((1 - df)) -f $df"
+
+ $NSEXE ./cmsg_sender -s -S 2000 -6 -p $p $m $TGT6 1234
+ check_result $? $df "DONTFRAG $prot $ovr"
+ done
+ done
+done
+
+# IPV6_TCLASS
+TOS=0x10
+TOS2=0x20
+
+ip -6 -netns $NS rule add tos $TOS lookup 300
+ip -6 -netns $NS route add table 300 prohibit any
+
+for ovr in setsock cmsg both diff; do
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-C"
+ [ $ovr == "cmsg" ] && m="-c"
+ [ $ovr == "both" ] && m="-C $((TOS2)) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS )) -c"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
+ check_result $? 0 "TCLASS $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null
+ check_result $? 0 "TCLASS $prot $ovr - packet data"
+ rm $TMPF
+
+ [ $ovr == "both" ] && m="-C $((TOS )) -c"
+ [ $ovr == "diff" ] && m="-C $((TOS2)) -c"
+
+ $NSEXE ./cmsg_sender -6 -p $p $m $((TOS)) -s $TGT6 1234
+ check_result $? 1 "TCLASS $prot $ovr - rejection"
+ done
+done
+
+# IPV6_HOPLIMIT
+LIM=4
+
+for ovr in setsock cmsg both diff; do
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-L"
+ [ $ovr == "cmsg" ] && m="-l"
+ [ $ovr == "both" ] && m="-L $LIM -l"
+ [ $ovr == "diff" ] && m="-L $((LIM + 1)) -l"
+
+ $NSEXE nohup tcpdump --immediate-mode -p -ni dummy0 -w $TMPF -c 4 2> /dev/null &
+ BG=$!
+ sleep 0.05
+
+ $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
+ check_result $? 0 "HOPLIMIT $prot $ovr - pass"
+
+ while [ -d /proc/$BG ]; do
+ $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234
+ done
+
+ tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null
+ check_result $? 0 "HOPLIMIT $prot $ovr - packet data"
+ rm $TMPF
+ done
+done
+
+# IPV6 exthdr
+for p in u i r; do
+ # Very basic "does it crash" test
+ for h in h d r; do
+ $NSEXE ./cmsg_sender -p $p -6 -H $h $TGT6 1234
+ check_result $? 0 "ExtHdr $prot $ovr - pass"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
new file mode 100644
index 000000000000..c79e65581dc3
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -0,0 +1,522 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+#include <errno.h>
+#include <error.h>
+#include <netdb.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+#include <linux/errqueue.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/net_tstamp.h>
+#include <linux/types.h>
+#include <linux/udp.h>
+#include <sys/socket.h>
+
+#include "../kselftest.h"
+
+enum {
+ ERN_SUCCESS = 0,
+ /* Well defined errors, callers may depend on these */
+ ERN_SEND = 1,
+ /* Informational, can reorder */
+ ERN_HELP,
+ ERN_SEND_SHORT,
+ ERN_SOCK_CREATE,
+ ERN_RESOLVE,
+ ERN_CMSG_WR,
+ ERN_SOCKOPT,
+ ERN_GETTIME,
+ ERN_RECVERR,
+ ERN_CMSG_RD,
+ ERN_CMSG_RCV,
+};
+
+struct option_cmsg_u32 {
+ bool ena;
+ unsigned int val;
+};
+
+struct options {
+ bool silent_send;
+ const char *host;
+ const char *service;
+ unsigned int size;
+ unsigned int num_pkt;
+ struct {
+ unsigned int mark;
+ unsigned int dontfrag;
+ unsigned int tclass;
+ unsigned int hlimit;
+ unsigned int priority;
+ } sockopt;
+ struct {
+ unsigned int family;
+ unsigned int type;
+ unsigned int proto;
+ } sock;
+ struct option_cmsg_u32 mark;
+ struct {
+ bool ena;
+ unsigned int delay;
+ } txtime;
+ struct {
+ bool ena;
+ } ts;
+ struct {
+ struct option_cmsg_u32 dontfrag;
+ struct option_cmsg_u32 tclass;
+ struct option_cmsg_u32 hlimit;
+ struct option_cmsg_u32 exthdr;
+ } v6;
+} opt = {
+ .size = 13,
+ .num_pkt = 1,
+ .sock = {
+ .family = AF_UNSPEC,
+ .type = SOCK_DGRAM,
+ .proto = IPPROTO_UDP,
+ },
+};
+
+static struct timespec time_start_real;
+static struct timespec time_start_mono;
+
+static void __attribute__((noreturn)) cs_usage(const char *bin)
+{
+ printf("Usage: %s [opts] <dst host> <dst port / service>\n", bin);
+ printf("Options:\n"
+ "\t\t-s Silent send() failures\n"
+ "\t\t-S send() size\n"
+ "\t\t-4/-6 Force IPv4 / IPv6 only\n"
+ "\t\t-p prot Socket protocol\n"
+ "\t\t (u = UDP (default); i = ICMP; r = RAW)\n"
+ "\n"
+ "\t\t-m val Set SO_MARK with given value\n"
+ "\t\t-M val Set SO_MARK via setsockopt\n"
+ "\t\t-d val Set SO_TXTIME with given delay (usec)\n"
+ "\t\t-t Enable time stamp reporting\n"
+ "\t\t-f val Set don't fragment via cmsg\n"
+ "\t\t-F val Set don't fragment via setsockopt\n"
+ "\t\t-c val Set TCLASS via cmsg\n"
+ "\t\t-C val Set TCLASS via setsockopt\n"
+ "\t\t-l val Set HOPLIMIT via cmsg\n"
+ "\t\t-L val Set HOPLIMIT via setsockopt\n"
+ "\t\t-H type Add an IPv6 header option\n"
+ "\t\t (h = HOP; d = DST; r = RTDST)"
+ "");
+ exit(ERN_HELP);
+}
+
+static void cs_parse_args(int argc, char *argv[])
+{
+ int o;
+
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+ switch (o) {
+ case 's':
+ opt.silent_send = true;
+ break;
+ case 'S':
+ opt.size = atoi(optarg);
+ break;
+ case '4':
+ opt.sock.family = AF_INET;
+ break;
+ case '6':
+ opt.sock.family = AF_INET6;
+ break;
+ case 'p':
+ if (*optarg == 'u' || *optarg == 'U') {
+ opt.sock.proto = IPPROTO_UDP;
+ } else if (*optarg == 'i' || *optarg == 'I') {
+ opt.sock.proto = IPPROTO_ICMP;
+ } else if (*optarg == 'r') {
+ opt.sock.type = SOCK_RAW;
+ } else {
+ printf("Error: unknown protocol: %s\n", optarg);
+ cs_usage(argv[0]);
+ }
+ break;
+ case 'P':
+ opt.sockopt.priority = atoi(optarg);
+ break;
+ case 'm':
+ opt.mark.ena = true;
+ opt.mark.val = atoi(optarg);
+ break;
+ case 'M':
+ opt.sockopt.mark = atoi(optarg);
+ break;
+ case 'n':
+ opt.num_pkt = atoi(optarg);
+ break;
+ case 'd':
+ opt.txtime.ena = true;
+ opt.txtime.delay = atoi(optarg);
+ break;
+ case 't':
+ opt.ts.ena = true;
+ break;
+ case 'f':
+ opt.v6.dontfrag.ena = true;
+ opt.v6.dontfrag.val = atoi(optarg);
+ break;
+ case 'F':
+ opt.sockopt.dontfrag = atoi(optarg);
+ break;
+ case 'c':
+ opt.v6.tclass.ena = true;
+ opt.v6.tclass.val = atoi(optarg);
+ break;
+ case 'C':
+ opt.sockopt.tclass = atoi(optarg);
+ break;
+ case 'l':
+ opt.v6.hlimit.ena = true;
+ opt.v6.hlimit.val = atoi(optarg);
+ break;
+ case 'L':
+ opt.sockopt.hlimit = atoi(optarg);
+ break;
+ case 'H':
+ opt.v6.exthdr.ena = true;
+ switch (optarg[0]) {
+ case 'h':
+ opt.v6.exthdr.val = IPV6_HOPOPTS;
+ break;
+ case 'd':
+ opt.v6.exthdr.val = IPV6_DSTOPTS;
+ break;
+ case 'r':
+ opt.v6.exthdr.val = IPV6_RTHDRDSTOPTS;
+ break;
+ default:
+ printf("Error: hdr type: %s\n", optarg);
+ break;
+ }
+ break;
+ }
+ }
+
+ if (optind != argc - 2)
+ cs_usage(argv[0]);
+
+ opt.host = argv[optind];
+ opt.service = argv[optind + 1];
+}
+
+static void memrnd(void *s, size_t n)
+{
+ int *dword = s;
+ char *byte;
+
+ for (; n >= 4; n -= 4)
+ *dword++ = rand();
+ byte = (void *)dword;
+ while (n--)
+ *byte++ = rand();
+}
+
+static void
+ca_write_cmsg_u32(char *cbuf, size_t cbuf_sz, size_t *cmsg_len,
+ int level, int optname, struct option_cmsg_u32 *uopt)
+{
+ struct cmsghdr *cmsg;
+
+ if (!uopt->ena)
+ return;
+
+ cmsg = (struct cmsghdr *)(cbuf + *cmsg_len);
+ *cmsg_len += CMSG_SPACE(sizeof(__u32));
+ if (cbuf_sz < *cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = level;
+ cmsg->cmsg_type = optname;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+ *(__u32 *)CMSG_DATA(cmsg) = uopt->val;
+}
+
+static void
+cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+ struct cmsghdr *cmsg;
+ size_t cmsg_len;
+
+ msg->msg_control = cbuf;
+ cmsg_len = 0;
+
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_SOCKET, SO_MARK, &opt.mark);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit);
+
+ if (opt.txtime.ena) {
+ struct sock_txtime so_txtime = {
+ .clockid = CLOCK_MONOTONIC,
+ };
+ __u64 txtime;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TXTIME,
+ &so_txtime, sizeof(so_txtime)))
+ error(ERN_SOCKOPT, errno, "setsockopt TXTIME");
+
+ txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) +
+ time_start_mono.tv_nsec +
+ opt.txtime.delay * 1000;
+
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(sizeof(txtime));
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SCM_TXTIME;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(txtime));
+ memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime));
+ }
+ if (opt.ts.ena) {
+ __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_TSONLY;
+
+ if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING,
+ &val, sizeof(val)))
+ error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING");
+
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(sizeof(__u32));
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_SOCKET;
+ cmsg->cmsg_type = SO_TIMESTAMPING;
+ cmsg->cmsg_len = CMSG_LEN(sizeof(__u32));
+ *(__u32 *)CMSG_DATA(cmsg) = SOF_TIMESTAMPING_TX_SCHED |
+ SOF_TIMESTAMPING_TX_SOFTWARE;
+ }
+ if (opt.v6.exthdr.ena) {
+ cmsg = (struct cmsghdr *)(cbuf + cmsg_len);
+ cmsg_len += CMSG_SPACE(8);
+ if (cbuf_sz < cmsg_len)
+ error(ERN_CMSG_WR, EFAULT, "cmsg buffer too small");
+
+ cmsg->cmsg_level = SOL_IPV6;
+ cmsg->cmsg_type = opt.v6.exthdr.val;
+ cmsg->cmsg_len = CMSG_LEN(8);
+ *(__u64 *)CMSG_DATA(cmsg) = 0;
+ }
+
+ if (cmsg_len)
+ msg->msg_controllen = cmsg_len;
+ else
+ msg->msg_control = NULL;
+}
+
+static const char *cs_ts_info2str(unsigned int info)
+{
+ static const char *names[] = {
+ [SCM_TSTAMP_SND] = "SND",
+ [SCM_TSTAMP_SCHED] = "SCHED",
+ [SCM_TSTAMP_ACK] = "ACK",
+ };
+
+ if (info < ARRAY_SIZE(names))
+ return names[info];
+ return "unknown";
+}
+
+static void
+cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
+{
+ struct sock_extended_err *see;
+ struct scm_timestamping *ts;
+ struct cmsghdr *cmsg;
+ int i, err;
+
+ if (!opt.ts.ena)
+ return;
+ msg->msg_control = cbuf;
+ msg->msg_controllen = cbuf_sz;
+
+ while (true) {
+ ts = NULL;
+ see = NULL;
+ memset(cbuf, 0, cbuf_sz);
+
+ err = recvmsg(fd, msg, MSG_ERRQUEUE);
+ if (err < 0) {
+ if (errno == EAGAIN)
+ break;
+ error(ERN_RECVERR, errno, "recvmsg ERRQ");
+ }
+
+ for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
+ cmsg = CMSG_NXTHDR(msg, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET &&
+ cmsg->cmsg_type == SO_TIMESTAMPING_OLD) {
+ if (cmsg->cmsg_len < sizeof(*ts))
+ error(ERN_CMSG_RD, EINVAL, "TS cmsg");
+
+ ts = (void *)CMSG_DATA(cmsg);
+ }
+ if ((cmsg->cmsg_level == SOL_IP &&
+ cmsg->cmsg_type == IP_RECVERR) ||
+ (cmsg->cmsg_level == SOL_IPV6 &&
+ cmsg->cmsg_type == IPV6_RECVERR)) {
+ if (cmsg->cmsg_len < sizeof(*see))
+ error(ERN_CMSG_RD, EINVAL, "sock_err cmsg");
+
+ see = (void *)CMSG_DATA(cmsg);
+ }
+ }
+
+ if (!ts)
+ error(ERN_CMSG_RCV, ENOENT, "TS cmsg not found");
+ if (!see)
+ error(ERN_CMSG_RCV, ENOENT, "sock_err cmsg not found");
+
+ for (i = 0; i < 3; i++) {
+ unsigned long long rel_time;
+
+ if (!ts->ts[i].tv_sec && !ts->ts[i].tv_nsec)
+ continue;
+
+ rel_time = (ts->ts[i].tv_sec - time_start_real.tv_sec) *
+ (1000ULL * 1000) +
+ (ts->ts[i].tv_nsec - time_start_real.tv_nsec) /
+ 1000;
+ printf(" %5s ts%d %lluus\n",
+ cs_ts_info2str(see->ee_info),
+ i, rel_time);
+ }
+ }
+}
+
+static void ca_set_sockopts(int fd)
+{
+ if (opt.sockopt.mark &&
+ setsockopt(fd, SOL_SOCKET, SO_MARK,
+ &opt.sockopt.mark, sizeof(opt.sockopt.mark)))
+ error(ERN_SOCKOPT, errno, "setsockopt SO_MARK");
+ if (opt.sockopt.dontfrag &&
+ setsockopt(fd, SOL_IPV6, IPV6_DONTFRAG,
+ &opt.sockopt.dontfrag, sizeof(opt.sockopt.dontfrag)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_DONTFRAG");
+ if (opt.sockopt.tclass &&
+ setsockopt(fd, SOL_IPV6, IPV6_TCLASS,
+ &opt.sockopt.tclass, sizeof(opt.sockopt.tclass)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_TCLASS");
+ if (opt.sockopt.hlimit &&
+ setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS,
+ &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit)))
+ error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT");
+ if (opt.sockopt.priority &&
+ setsockopt(fd, SOL_SOCKET, SO_PRIORITY,
+ &opt.sockopt.priority, sizeof(opt.sockopt.priority)))
+ error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY");
+}
+
+int main(int argc, char *argv[])
+{
+ struct addrinfo hints, *ai;
+ struct iovec iov[1];
+ unsigned char *buf;
+ struct msghdr msg;
+ char cbuf[1024];
+ int err;
+ int fd;
+ int i;
+
+ cs_parse_args(argc, argv);
+
+ buf = malloc(opt.size);
+ memrnd(buf, opt.size);
+
+ memset(&hints, 0, sizeof(hints));
+ hints.ai_family = opt.sock.family;
+
+ ai = NULL;
+ err = getaddrinfo(opt.host, opt.service, &hints, &ai);
+ if (err) {
+ fprintf(stderr, "Can't resolve address [%s]:%s\n",
+ opt.host, opt.service);
+ return ERN_SOCK_CREATE;
+ }
+
+ if (ai->ai_family == AF_INET6 && opt.sock.proto == IPPROTO_ICMP)
+ opt.sock.proto = IPPROTO_ICMPV6;
+
+ fd = socket(ai->ai_family, opt.sock.type, opt.sock.proto);
+ if (fd < 0) {
+ fprintf(stderr, "Can't open socket: %s\n", strerror(errno));
+ freeaddrinfo(ai);
+ return ERN_RESOLVE;
+ }
+
+ if (opt.sock.proto == IPPROTO_ICMP) {
+ buf[0] = ICMP_ECHO;
+ buf[1] = 0;
+ } else if (opt.sock.proto == IPPROTO_ICMPV6) {
+ buf[0] = ICMPV6_ECHO_REQUEST;
+ buf[1] = 0;
+ } else if (opt.sock.type == SOCK_RAW) {
+ struct udphdr hdr = { 1, 2, htons(opt.size), 0 };
+ struct sockaddr_in6 *sin6 = (void *)ai->ai_addr;
+
+ memcpy(buf, &hdr, sizeof(hdr));
+ sin6->sin6_port = htons(opt.sock.proto);
+ }
+
+ ca_set_sockopts(fd);
+
+ if (clock_gettime(CLOCK_REALTIME, &time_start_real))
+ error(ERN_GETTIME, errno, "gettime REALTIME");
+ if (clock_gettime(CLOCK_MONOTONIC, &time_start_mono))
+ error(ERN_GETTIME, errno, "gettime MONOTONIC");
+
+ iov[0].iov_base = buf;
+ iov[0].iov_len = opt.size;
+
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_name = ai->ai_addr;
+ msg.msg_namelen = ai->ai_addrlen;
+ msg.msg_iov = iov;
+ msg.msg_iovlen = 1;
+
+ cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+
+ for (i = 0; i < opt.num_pkt; i++) {
+ err = sendmsg(fd, &msg, 0);
+ if (err < 0) {
+ if (!opt.silent_send)
+ fprintf(stderr, "send failed: %s\n", strerror(errno));
+ err = ERN_SEND;
+ goto err_out;
+ } else if (err != (int)opt.size) {
+ fprintf(stderr, "short send\n");
+ err = ERN_SEND_SHORT;
+ goto err_out;
+ }
+ }
+ err = ERN_SUCCESS;
+
+ if (opt.ts.ena) {
+ /* Make sure all timestamps have time to loop back */
+ usleep(opt.txtime.delay);
+
+ cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf));
+ }
+
+err_out:
+ close(fd);
+ freeaddrinfo(ai);
+ return err;
+}
diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh
new file mode 100755
index 000000000000..772ad0cc2630
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_mark.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+MARK=1000
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+ip -netns $NS rule add fwmark $MARK lookup 300
+ip -6 -netns $NS rule add fwmark $MARK lookup 300
+ip -netns $NS route add prohibit any table 300
+ip -6 -netns $NS route add prohibit any table 300
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne $2 ]; then
+ echo " Case $3 returned $1, expected $2"
+ ((BAD++))
+ fi
+}
+
+for ovr in setsock cmsg both; do
+ for i in 4 6; do
+ [ $i == 4 ] && TGT=$TGT4 || TGT=$TGT6
+
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDP
+ [ $p == "i" ] && prot=ICMP
+ [ $p == "r" ] && prot=RAW
+
+ [ $ovr == "setsock" ] && m="-M"
+ [ $ovr == "cmsg" ] && m="-m"
+ [ $ovr == "both" ] && m="-M $MARK -m"
+
+ ip netns exec $NS ./cmsg_sender -$i -p $p $m $((MARK + 1)) $TGT 1234
+ check_result $? 0 "$prot $ovr - pass"
+
+ [ $ovr == "diff" ] && m="-M $((MARK + 1)) -m"
+
+ ip netns exec $NS ./cmsg_sender -$i -p $p $m $MARK -s $TGT 1234
+ check_result $? 1 "$prot $ovr - rejection"
+ done
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
new file mode 100755
index 000000000000..af85267ad1e3
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -0,0 +1,84 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+IP4=172.16.0.1/24
+TGT4=172.16.0.2
+IP6=2001:db8:1::1/64
+TGT6=2001:db8:1::2
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS
+
+ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null
+
+# Connectivity
+ip -netns $NS link add type dummy
+ip -netns $NS link set dev dummy0 up
+ip -netns $NS addr add $IP4 dev dummy0
+ip -netns $NS addr add $IP6 dev dummy0
+
+# Need FQ for TXTIME
+ip netns exec $NS tc qdisc replace dev dummy0 root fq
+
+# Test
+BAD=0
+TOTAL=0
+
+check_result() {
+ ((TOTAL++))
+ if [ $1 -ne 0 ]; then
+ echo " Case $4 returned $1, expected 0"
+ ((BAD++))
+ elif [ "$2" != "$3" ]; then
+ echo " Case $4 returned '$2', expected '$3'"
+ ((BAD++))
+ fi
+}
+
+for i in "-4 $TGT4" "-6 $TGT6"; do
+ for p in u i r; do
+ [ $p == "u" ] && prot=UDPv${i:1:2}
+ [ $p == "i" ] && prot=ICMPv${i:1:2}
+ [ $p == "r" ] && prot=RAWv${i:1:2}
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234)
+ check_result $? "$ts" "" "$prot - no options"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t | wc -l)
+ check_result $? "$ts" "2" "$prot - ts cnt"
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+ sed -n "s/.*SCHED ts0 [0-9].*/OK/p")
+ check_result $? "$ts" "OK" "$prot - ts0 SCHED"
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t |
+ sed -n "s/.*SND ts0 [0-9].*/OK/p")
+ check_result $? "$ts" "OK" "$prot - ts0 SND"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ awk '/SND/ { if ($3 > 1000) print "OK"; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME abs"
+
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
+ awk '/SND/ {snd=$3}
+ /SCHED/ {sch=$3}
+ END { if (snd - sch > 500) print "OK"; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME rel"
+ done
+done
+
+# Summary
+if [ $BAD -ne 0 ]; then
+ echo "FAIL - $BAD/$TOTAL cases failed"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 3b42c06b5985..5e4390cac17e 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -1,7 +1,12 @@
CONFIG_USER_NS=y
+CONFIG_NET_NS=y
+CONFIG_BONDING=m
CONFIG_BPF_SYSCALL=y
CONFIG_TEST_BPF=m
CONFIG_NUMA=y
+CONFIG_RPS=y
+CONFIG_SYSFS=y
+CONFIG_PROC_SYSCTL=y
CONFIG_NET_VRF=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6=y
@@ -10,24 +15,88 @@ CONFIG_VETH=y
CONFIG_NET_IPVTI=y
CONFIG_IPV6_VTI=y
CONFIG_DUMMY=y
+CONFIG_BRIDGE_VLAN_FILTERING=y
CONFIG_BRIDGE=y
+CONFIG_CRYPTO_CHACHA20POLY1305=m
CONFIG_VLAN_8021Q=y
+CONFIG_GENEVE=m
CONFIG_IFB=y
+CONFIG_INET_DIAG=y
+CONFIG_INET_ESP=y
+CONFIG_INET_ESP_OFFLOAD=y
+CONFIG_NET_FOU=y
+CONFIG_NET_FOU_IP_TUNNELS=y
+CONFIG_IP_GRE=m
CONFIG_NETFILTER=y
CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
+CONFIG_IPV6_SIT=y
+CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
CONFIG_IP6_NF_NAT=m
+CONFIG_IP6_NF_RAW=m
CONFIG_IP_NF_NAT=m
+CONFIG_IP_NF_RAW=m
+CONFIG_IP_NF_TARGET_TTL=m
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_SEG6_LWTUNNEL=y
+CONFIG_L2TP_ETH=m
+CONFIG_L2TP_IP=m
+CONFIG_L2TP=m
+CONFIG_L2TP_V3=y
+CONFIG_MACSEC=m
+CONFIG_MACVLAN=y
+CONFIG_MACVTAP=y
+CONFIG_MPLS=y
+CONFIG_MPTCP=y
CONFIG_NF_TABLES=m
CONFIG_NF_TABLES_IPV6=y
CONFIG_NF_TABLES_IPV4=y
-CONFIG_NFT_CHAIN_NAT_IPV6=m
-CONFIG_NFT_CHAIN_NAT_IPV4=m
+CONFIG_NFT_NAT=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_GACT=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_U32=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPIP=y
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_FQ=m
CONFIG_NET_SCH_ETF=m
CONFIG_NET_SCH_NETEM=y
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_PSAMPLE=m
+CONFIG_TCP_MD5SIG=y
CONFIG_TEST_BLACKHOLE_DEV=m
CONFIG_KALLSYMS=y
+CONFIG_TLS=m
+CONFIG_TRACEPOINTS=y
+CONFIG_NET_DROP_MONITOR=m
+CONFIG_NETDEVSIM=m
+CONFIG_MPLS_ROUTING=m
+CONFIG_MPLS_IPTUNNEL=m
+CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_BAREUDP=m
+CONFIG_IPV6_IOAM6_LWTUNNEL=y
+CONFIG_CRYPTO_SM4_GENERIC=y
+CONFIG_AMT=m
+CONFIG_TUN=y
+CONFIG_VXLAN=m
+CONFIG_IP_SCTP=m
+CONFIG_NETFILTER_XT_MATCH_POLICY=m
+CONFIG_CRYPTO_ARIA=y
+CONFIG_XFRM_INTERFACE=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/csum.c
new file mode 100644
index 000000000000..90eb06fefa59
--- /dev/null
+++ b/tools/testing/selftests/net/csum.c
@@ -0,0 +1,988 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Test hardware checksum offload: Rx + Tx, IPv4 + IPv6, TCP + UDP.
+ *
+ * The test runs on two machines to exercise the NIC. For this reason it
+ * is not integrated in kselftests.
+ *
+ * CMD=$((./csum -[46] -[tu] -S $SADDR -D $DADDR -[RT] -r 1 $EXTRA_ARGS))
+ *
+ * Rx:
+ *
+ * The sender sends packets with a known checksum field using PF_INET(6)
+ * SOCK_RAW sockets.
+ *
+ * good packet: $CMD [-t]
+ * bad packet: $CMD [-t] -E
+ *
+ * The receiver reads UDP packets with a UDP socket. This is not an
+ * option for TCP packets ('-t'). Optionally insert an iptables filter
+ * to avoid these entering the real protocol stack.
+ *
+ * The receiver also reads all packets with a PF_PACKET socket, to
+ * observe whether both good and bad packets arrive on the host. And to
+ * read the optional TP_STATUS_CSUM_VALID bit. This requires setting
+ * option PACKET_AUXDATA, and works only for CHECKSUM_UNNECESSARY.
+ *
+ * Tx:
+ *
+ * The sender needs to build CHECKSUM_PARTIAL packets to exercise tx
+ * checksum offload.
+ *
+ * The sender can sends packets with a UDP socket.
+ *
+ * Optionally crafts a packet that sums up to zero to verify that the
+ * device writes negative zero 0xFFFF in this case to distinguish from
+ * 0x0000 (checksum disabled), as required by RFC 768. Hit this case
+ * by choosing a specific source port.
+ *
+ * good packet: $CMD -U
+ * zero csum: $CMD -U -Z
+ *
+ * The sender can also build packets with PF_PACKET with PACKET_VNET_HDR,
+ * to cover more protocols. PF_PACKET requires passing src and dst mac
+ * addresses.
+ *
+ * good packet: $CMD -s $smac -d $dmac -p [-t]
+ *
+ * Argument '-z' sends UDP packets with a 0x000 checksum disabled field,
+ * to verify that the NIC passes these packets unmodified.
+ *
+ * Argument '-e' adds a transport mode encapsulation header between
+ * network and transport header. This will fail for devices that parse
+ * headers. Should work on devices that implement protocol agnostic tx
+ * checksum offload (NETIF_F_HW_CSUM).
+ *
+ * Argument '-r $SEED' optionally randomizes header, payload and length
+ * to increase coverage between packets sent. SEED 1 further chooses a
+ * different seed for each run (and logs this for reproducibility). It
+ * is advised to enable this for extra coverage in continuous testing.
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/byteorder.h>
+#include <errno.h>
+#include <error.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <linux/virtio_net.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "kselftest.h"
+
+static bool cfg_bad_csum;
+static int cfg_family = PF_INET6;
+static int cfg_num_pkt = 4;
+static bool cfg_do_rx = true;
+static bool cfg_do_tx = true;
+static bool cfg_encap;
+static char *cfg_ifname = "eth0";
+static char *cfg_mac_dst;
+static char *cfg_mac_src;
+static int cfg_proto = IPPROTO_UDP;
+static int cfg_payload_char = 'a';
+static int cfg_payload_len = 100;
+static uint16_t cfg_port_dst = 34000;
+static uint16_t cfg_port_src = 33000;
+static uint16_t cfg_port_src_encap = 33001;
+static unsigned int cfg_random_seed;
+static int cfg_rcvbuf = 1 << 22; /* be able to queue large cfg_num_pkt */
+static bool cfg_send_pfpacket;
+static bool cfg_send_udp;
+static int cfg_timeout_ms = 2000;
+static bool cfg_zero_disable; /* skip checksum: set to zero (udp only) */
+static bool cfg_zero_sum; /* create packet that adds up to zero */
+
+static struct sockaddr_in cfg_daddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in cfg_saddr4 = {.sin_family = AF_INET};
+static struct sockaddr_in6 cfg_daddr6 = {.sin6_family = AF_INET6};
+static struct sockaddr_in6 cfg_saddr6 = {.sin6_family = AF_INET6};
+
+#define ENC_HEADER_LEN (sizeof(struct udphdr) + sizeof(struct udp_encap_hdr))
+#define MAX_HEADER_LEN (sizeof(struct ipv6hdr) + ENC_HEADER_LEN + sizeof(struct tcphdr))
+#define MAX_PAYLOAD_LEN 1024
+
+/* Trivial demo encap. Stand-in for transport layer protocols like ESP or PSP */
+struct udp_encap_hdr {
+ uint8_t nexthdr;
+ uint8_t padding[3];
+};
+
+/* Ipaddrs, for pseudo csum. Global var is ugly, pass through funcs was worse */
+static void *iph_addr_p;
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000UL) + (tv.tv_usec / 1000UL);
+}
+
+static uint32_t checksum_nofold(char *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = (uint16_t *)data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+
+ if (len & 1)
+ sum += ((unsigned char *)data)[len - 1];
+
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+
+ return ~sum;
+}
+
+static uint16_t checksum(void *th, uint16_t proto, size_t len)
+{
+ uint32_t sum;
+ int alen;
+
+ alen = cfg_family == PF_INET6 ? 32 : 8;
+
+ sum = checksum_nofold(iph_addr_p, alen, 0);
+ sum += htons(proto);
+ sum += htons(len);
+
+ /* With CHECKSUM_PARTIAL kernel expects non-inverted pseudo csum */
+ if (cfg_do_tx && cfg_send_pfpacket)
+ return ~checksum_fold(NULL, 0, sum);
+ else
+ return checksum_fold(th, len, sum);
+}
+
+static void *build_packet_ipv4(void *_iph, uint8_t proto, unsigned int len)
+{
+ struct iphdr *iph = _iph;
+
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = proto;
+ iph->saddr = cfg_saddr4.sin_addr.s_addr;
+ iph->daddr = cfg_daddr4.sin_addr.s_addr;
+ iph->tot_len = htons(sizeof(*iph) + len);
+ iph->check = checksum_fold(iph, sizeof(*iph), 0);
+
+ iph_addr_p = &iph->saddr;
+
+ return iph + 1;
+}
+
+static void *build_packet_ipv6(void *_ip6h, uint8_t proto, unsigned int len)
+{
+ struct ipv6hdr *ip6h = _ip6h;
+
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(len);
+ ip6h->nexthdr = proto;
+ ip6h->hop_limit = 64;
+ ip6h->saddr = cfg_saddr6.sin6_addr;
+ ip6h->daddr = cfg_daddr6.sin6_addr;
+
+ iph_addr_p = &ip6h->saddr;
+
+ return ip6h + 1;
+}
+
+static void *build_packet_udp(void *_uh)
+{
+ struct udphdr *uh = _uh;
+
+ uh->source = htons(cfg_port_src);
+ uh->dest = htons(cfg_port_dst);
+ uh->len = htons(sizeof(*uh) + cfg_payload_len);
+ uh->check = 0;
+
+ /* choose source port so that uh->check adds up to zero */
+ if (cfg_zero_sum) {
+ uh->source = 0;
+ uh->source = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+ fprintf(stderr, "tx: changing sport: %hu -> %hu\n",
+ cfg_port_src, ntohs(uh->source));
+ cfg_port_src = ntohs(uh->source);
+ }
+
+ if (cfg_zero_disable)
+ uh->check = 0;
+ else
+ uh->check = checksum(uh, IPPROTO_UDP, sizeof(*uh) + cfg_payload_len);
+
+ if (cfg_bad_csum)
+ uh->check = ~uh->check;
+
+ fprintf(stderr, "tx: sending checksum: 0x%x\n", uh->check);
+ return uh + 1;
+}
+
+static void *build_packet_tcp(void *_th)
+{
+ struct tcphdr *th = _th;
+
+ th->source = htons(cfg_port_src);
+ th->dest = htons(cfg_port_dst);
+ th->doff = 5;
+ th->check = 0;
+
+ th->check = checksum(th, IPPROTO_TCP, sizeof(*th) + cfg_payload_len);
+
+ if (cfg_bad_csum)
+ th->check = ~th->check;
+
+ fprintf(stderr, "tx: sending checksum: 0x%x\n", th->check);
+ return th + 1;
+}
+
+static char *build_packet_udp_encap(void *_uh)
+{
+ struct udphdr *uh = _uh;
+ struct udp_encap_hdr *eh = _uh + sizeof(*uh);
+
+ /* outer dst == inner dst, to simplify BPF filter
+ * outer src != inner src, to demultiplex on recv
+ */
+ uh->dest = htons(cfg_port_dst);
+ uh->source = htons(cfg_port_src_encap);
+ uh->check = 0;
+ uh->len = htons(sizeof(*uh) +
+ sizeof(*eh) +
+ sizeof(struct tcphdr) +
+ cfg_payload_len);
+
+ eh->nexthdr = IPPROTO_TCP;
+
+ return build_packet_tcp(eh + 1);
+}
+
+static char *build_packet(char *buf, int max_len, int *len)
+{
+ uint8_t proto;
+ char *off;
+ int tlen;
+
+ if (cfg_random_seed) {
+ int *buf32 = (void *)buf;
+ int i;
+
+ for (i = 0; i < (max_len / sizeof(int)); i++)
+ buf32[i] = rand();
+ } else {
+ memset(buf, cfg_payload_char, max_len);
+ }
+
+ if (cfg_proto == IPPROTO_UDP)
+ tlen = sizeof(struct udphdr) + cfg_payload_len;
+ else
+ tlen = sizeof(struct tcphdr) + cfg_payload_len;
+
+ if (cfg_encap) {
+ proto = IPPROTO_UDP;
+ tlen += ENC_HEADER_LEN;
+ } else {
+ proto = cfg_proto;
+ }
+
+ if (cfg_family == PF_INET)
+ off = build_packet_ipv4(buf, proto, tlen);
+ else
+ off = build_packet_ipv6(buf, proto, tlen);
+
+ if (cfg_encap)
+ off = build_packet_udp_encap(off);
+ else if (cfg_proto == IPPROTO_UDP)
+ off = build_packet_udp(off);
+ else
+ off = build_packet_tcp(off);
+
+ /* only pass the payload, but still compute headers for cfg_zero_sum */
+ if (cfg_send_udp) {
+ *len = cfg_payload_len;
+ return off;
+ }
+
+ *len = off - buf + cfg_payload_len;
+ return buf;
+}
+
+static int open_inet(int ipproto, int protocol)
+{
+ int fd;
+
+ fd = socket(cfg_family, ipproto, protocol);
+ if (fd == -1)
+ error(1, errno, "socket inet");
+
+ if (cfg_family == PF_INET6) {
+ /* may have been updated by cfg_zero_sum */
+ cfg_saddr6.sin6_port = htons(cfg_port_src);
+
+ if (bind(fd, (void *)&cfg_saddr6, sizeof(cfg_saddr6)))
+ error(1, errno, "bind dgram 6");
+ if (connect(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+ error(1, errno, "connect dgram 6");
+ } else {
+ /* may have been updated by cfg_zero_sum */
+ cfg_saddr4.sin_port = htons(cfg_port_src);
+
+ if (bind(fd, (void *)&cfg_saddr4, sizeof(cfg_saddr4)))
+ error(1, errno, "bind dgram 4");
+ if (connect(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+ error(1, errno, "connect dgram 4");
+ }
+
+ return fd;
+}
+
+static int open_packet(void)
+{
+ int fd, one = 1;
+
+ fd = socket(PF_PACKET, SOCK_RAW, 0);
+ if (fd == -1)
+ error(1, errno, "socket packet");
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+ error(1, errno, "setsockopt packet_vnet_ndr");
+
+ return fd;
+}
+
+static void send_inet(int fd, const char *buf, int len)
+{
+ int ret;
+
+ ret = write(fd, buf, len);
+ if (ret == -1)
+ error(1, errno, "write");
+ if (ret != len)
+ error(1, 0, "write: %d", ret);
+}
+
+static void eth_str_to_addr(const char *str, unsigned char *eth)
+{
+ if (sscanf(str, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &eth[0], &eth[1], &eth[2], &eth[3], &eth[4], &eth[5]) != 6)
+ error(1, 0, "cannot parse mac addr %s", str);
+}
+
+static void send_packet(int fd, const char *buf, int len)
+{
+ struct virtio_net_hdr vh = {0};
+ struct sockaddr_ll addr = {0};
+ struct msghdr msg = {0};
+ struct ethhdr eth;
+ struct iovec iov[3];
+ int ret;
+
+ addr.sll_family = AF_PACKET;
+ addr.sll_halen = ETH_ALEN;
+ addr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!addr.sll_ifindex)
+ error(1, errno, "if_nametoindex %s", cfg_ifname);
+
+ vh.flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ if (cfg_family == PF_INET6) {
+ vh.csum_start = sizeof(struct ethhdr) + sizeof(struct ipv6hdr);
+ addr.sll_protocol = htons(ETH_P_IPV6);
+ } else {
+ vh.csum_start = sizeof(struct ethhdr) + sizeof(struct iphdr);
+ addr.sll_protocol = htons(ETH_P_IP);
+ }
+
+ if (cfg_encap)
+ vh.csum_start += ENC_HEADER_LEN;
+
+ if (cfg_proto == IPPROTO_TCP) {
+ vh.csum_offset = __builtin_offsetof(struct tcphdr, check);
+ vh.hdr_len = vh.csum_start + sizeof(struct tcphdr);
+ } else {
+ vh.csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh.hdr_len = vh.csum_start + sizeof(struct udphdr);
+ }
+
+ eth_str_to_addr(cfg_mac_src, eth.h_source);
+ eth_str_to_addr(cfg_mac_dst, eth.h_dest);
+ eth.h_proto = addr.sll_protocol;
+
+ iov[0].iov_base = &vh;
+ iov[0].iov_len = sizeof(vh);
+
+ iov[1].iov_base = &eth;
+ iov[1].iov_len = sizeof(eth);
+
+ iov[2].iov_base = (void *)buf;
+ iov[2].iov_len = len;
+
+ msg.msg_iov = iov;
+ msg.msg_iovlen = ARRAY_SIZE(iov);
+
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+
+ ret = sendmsg(fd, &msg, 0);
+ if (ret == -1)
+ error(1, errno, "sendmsg packet");
+ if (ret != sizeof(vh) + sizeof(eth) + len)
+ error(1, errno, "sendmsg packet: %u", ret);
+}
+
+static int recv_prepare_udp(void)
+{
+ int fd;
+
+ fd = socket(cfg_family, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket r");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+ error(1, errno, "setsockopt SO_RCVBUF r");
+
+ if (cfg_family == PF_INET6) {
+ if (bind(fd, (void *)&cfg_daddr6, sizeof(cfg_daddr6)))
+ error(1, errno, "bind r");
+ } else {
+ if (bind(fd, (void *)&cfg_daddr4, sizeof(cfg_daddr4)))
+ error(1, errno, "bind r");
+ }
+
+ return fd;
+}
+
+/* Filter out all traffic that is not cfg_proto with our destination port.
+ *
+ * Otherwise background noise may cause PF_PACKET receive queue overflow,
+ * dropping the expected packets and failing the test.
+ */
+static void __recv_prepare_packet_filter(int fd, int off_nexthdr, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_nexthdr),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_encap ? IPPROTO_UDP : cfg_proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_port_dst, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static void recv_prepare_packet_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+
+ if (cfg_family == AF_INET)
+ __recv_prepare_packet_filter(fd, offsetof(struct iphdr, protocol),
+ sizeof(struct iphdr) + off_dport);
+ else
+ __recv_prepare_packet_filter(fd, offsetof(struct ipv6hdr, nexthdr),
+ sizeof(struct ipv6hdr) + off_dport);
+}
+
+static void recv_prepare_packet_bind(int fd)
+{
+ struct sockaddr_ll laddr = {0};
+
+ laddr.sll_family = AF_PACKET;
+
+ if (cfg_family == PF_INET)
+ laddr.sll_protocol = htons(ETH_P_IP);
+ else
+ laddr.sll_protocol = htons(ETH_P_IPV6);
+
+ laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+ if (!laddr.sll_ifindex)
+ error(1, 0, "if_nametoindex %s", cfg_ifname);
+
+ if (bind(fd, (void *)&laddr, sizeof(laddr)))
+ error(1, errno, "bind pf_packet");
+}
+
+static int recv_prepare_packet(void)
+{
+ int fd, one = 1;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket p");
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF,
+ &cfg_rcvbuf, sizeof(cfg_rcvbuf)))
+ error(1, errno, "setsockopt SO_RCVBUF p");
+
+ /* enable auxdata to recv checksum status (valid vs unknown) */
+ if (setsockopt(fd, SOL_PACKET, PACKET_AUXDATA, &one, sizeof(one)))
+ error(1, errno, "setsockopt auxdata");
+
+ /* install filter to restrict packet flow to match */
+ recv_prepare_packet_filter(fd);
+
+ /* bind to address family to start packet flow */
+ recv_prepare_packet_bind(fd);
+
+ return fd;
+}
+
+static int recv_udp(int fd)
+{
+ static char buf[MAX_PAYLOAD_LEN];
+ int ret, count = 0;
+
+ while (1) {
+ ret = recv(fd, buf, sizeof(buf), MSG_DONTWAIT);
+ if (ret == -1 && errno == EAGAIN)
+ break;
+ if (ret == -1)
+ error(1, errno, "recv r");
+
+ fprintf(stderr, "rx: udp: len=%u\n", ret);
+ count++;
+ }
+
+ return count;
+}
+
+static int recv_verify_csum(void *th, int len, uint16_t sport, uint16_t csum_field)
+{
+ uint16_t csum;
+
+ csum = checksum(th, cfg_proto, len);
+
+ fprintf(stderr, "rx: pkt: sport=%hu len=%u csum=0x%hx verify=0x%hx\n",
+ sport, len, csum_field, csum);
+
+ /* csum must be zero unless cfg_bad_csum indicates bad csum */
+ if (csum && !cfg_bad_csum) {
+ fprintf(stderr, "pkt: bad csum\n");
+ return 1;
+ } else if (cfg_bad_csum && !csum) {
+ fprintf(stderr, "pkt: good csum, while bad expected\n");
+ return 1;
+ }
+
+ if (cfg_zero_sum && csum_field != 0xFFFF) {
+ fprintf(stderr, "pkt: zero csum: field should be 0xFFFF, is 0x%hx\n", csum_field);
+ return 1;
+ }
+
+ return 0;
+}
+
+static int recv_verify_packet_tcp(void *th, int len)
+{
+ struct tcphdr *tcph = th;
+
+ if (len < sizeof(*tcph) || tcph->dest != htons(cfg_port_dst))
+ return -1;
+
+ return recv_verify_csum(th, len, ntohs(tcph->source), tcph->check);
+}
+
+static int recv_verify_packet_udp_encap(void *th, int len)
+{
+ struct udp_encap_hdr *eh = th;
+
+ if (len < sizeof(*eh) || eh->nexthdr != IPPROTO_TCP)
+ return -1;
+
+ return recv_verify_packet_tcp(eh + 1, len - sizeof(*eh));
+}
+
+static int recv_verify_packet_udp(void *th, int len)
+{
+ struct udphdr *udph = th;
+
+ if (len < sizeof(*udph))
+ return -1;
+
+ if (udph->dest != htons(cfg_port_dst))
+ return -1;
+
+ if (udph->source == htons(cfg_port_src_encap))
+ return recv_verify_packet_udp_encap(udph + 1,
+ len - sizeof(*udph));
+
+ return recv_verify_csum(th, len, ntohs(udph->source), udph->check);
+}
+
+static int recv_verify_packet_ipv4(void *nh, int len)
+{
+ struct iphdr *iph = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+ if (len < sizeof(*iph) || iph->protocol != proto)
+ return -1;
+
+ iph_addr_p = &iph->saddr;
+ if (proto == IPPROTO_TCP)
+ return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
+ else
+ return recv_verify_packet_udp(iph + 1, len - sizeof(*iph));
+}
+
+static int recv_verify_packet_ipv6(void *nh, int len)
+{
+ struct ipv6hdr *ip6h = nh;
+ uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+
+ if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
+ return -1;
+
+ iph_addr_p = &ip6h->saddr;
+
+ if (proto == IPPROTO_TCP)
+ return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ else
+ return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+}
+
+/* return whether auxdata includes TP_STATUS_CSUM_VALID */
+static bool recv_verify_packet_csum(struct msghdr *msg)
+{
+ struct tpacket_auxdata *aux = NULL;
+ struct cmsghdr *cm;
+
+ if (msg->msg_flags & MSG_CTRUNC)
+ error(1, 0, "cmsg: truncated");
+
+ for (cm = CMSG_FIRSTHDR(msg); cm; cm = CMSG_NXTHDR(msg, cm)) {
+ if (cm->cmsg_level != SOL_PACKET ||
+ cm->cmsg_type != PACKET_AUXDATA)
+ error(1, 0, "cmsg: level=%d type=%d\n",
+ cm->cmsg_level, cm->cmsg_type);
+
+ if (cm->cmsg_len != CMSG_LEN(sizeof(struct tpacket_auxdata)))
+ error(1, 0, "cmsg: len=%lu expected=%lu",
+ cm->cmsg_len, CMSG_LEN(sizeof(struct tpacket_auxdata)));
+
+ aux = (void *)CMSG_DATA(cm);
+ }
+
+ if (!aux)
+ error(1, 0, "cmsg: no auxdata");
+
+ return aux->tp_status & TP_STATUS_CSUM_VALID;
+}
+
+static int recv_packet(int fd)
+{
+ static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+ unsigned long total = 0, bad_csums = 0, bad_validations = 0;
+ char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))];
+ struct pkt *buf = (void *)_buf;
+ struct msghdr msg = {0};
+ struct iovec iov;
+ int len, ret;
+
+ iov.iov_base = _buf;
+ iov.iov_len = sizeof(_buf);
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+
+ msg.msg_control = ctrl;
+ msg.msg_controllen = sizeof(ctrl);
+
+ while (1) {
+ msg.msg_flags = 0;
+
+ len = recvmsg(fd, &msg, MSG_DONTWAIT);
+ if (len == -1 && errno == EAGAIN)
+ break;
+ if (len == -1)
+ error(1, errno, "recv p");
+
+ if (cfg_family == PF_INET6)
+ ret = recv_verify_packet_ipv6(buf, len);
+ else
+ ret = recv_verify_packet_ipv4(buf, len);
+
+ if (ret == -1 /* skip: non-matching */)
+ continue;
+
+ total++;
+ if (ret == 1)
+ bad_csums++;
+
+ /* Fail if kernel returns valid for known bad csum.
+ * Do not fail if kernel does not validate a good csum:
+ * Absence of validation does not imply invalid.
+ */
+ if (recv_verify_packet_csum(&msg) && cfg_bad_csum) {
+ fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n");
+ bad_validations++;
+ }
+ }
+
+ if (bad_csums || bad_validations)
+ error(1, 0, "rx: errors at pf_packet: total=%lu bad_csums=%lu bad_valids=%lu\n",
+ total, bad_csums, bad_validations);
+
+ return total;
+}
+
+static void parse_args(int argc, char *const argv[])
+{
+ const char *daddr = NULL, *saddr = NULL;
+ int c;
+
+ while ((c = getopt(argc, argv, "46d:D:eEi:l:L:n:r:PRs:S:tTuUzZ")) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = PF_INET;
+ break;
+ case '6':
+ cfg_family = PF_INET6;
+ break;
+ case 'd':
+ cfg_mac_dst = optarg;
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'e':
+ cfg_encap = true;
+ break;
+ case 'E':
+ cfg_bad_csum = true;
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'l':
+ cfg_payload_len = strtol(optarg, NULL, 0);
+ break;
+ case 'L':
+ cfg_timeout_ms = strtol(optarg, NULL, 0) * 1000;
+ break;
+ case 'n':
+ cfg_num_pkt = strtol(optarg, NULL, 0);
+ break;
+ case 'r':
+ cfg_random_seed = strtol(optarg, NULL, 0);
+ break;
+ case 'P':
+ cfg_send_pfpacket = true;
+ break;
+ case 'R':
+ /* only Rx: used with two machine tests */
+ cfg_do_tx = false;
+ break;
+ case 's':
+ cfg_mac_src = optarg;
+ break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 't':
+ cfg_proto = IPPROTO_TCP;
+ break;
+ case 'T':
+ /* only Tx: used with two machine tests */
+ cfg_do_rx = false;
+ break;
+ case 'u':
+ cfg_proto = IPPROTO_UDP;
+ break;
+ case 'U':
+ /* send using real udp socket,
+ * to exercise tx checksum offload
+ */
+ cfg_send_udp = true;
+ break;
+ case 'z':
+ cfg_zero_disable = true;
+ break;
+ case 'Z':
+ cfg_zero_sum = true;
+ break;
+ default:
+ error(1, 0, "unknown arg %c", c);
+ }
+ }
+
+ if (!daddr || !saddr)
+ error(1, 0, "Must pass -D <daddr> and -S <saddr>");
+
+ if (cfg_do_tx && cfg_send_pfpacket && (!cfg_mac_src || !cfg_mac_dst))
+ error(1, 0, "Transmit with pf_packet requires mac addresses");
+
+ if (cfg_payload_len > MAX_PAYLOAD_LEN)
+ error(1, 0, "Payload length exceeds max");
+
+ if (cfg_proto != IPPROTO_UDP && (cfg_zero_sum || cfg_zero_disable))
+ error(1, 0, "Only UDP supports zero csum");
+
+ if (cfg_zero_sum && !cfg_send_udp)
+ error(1, 0, "Zero checksum conversion requires -U for tx csum offload");
+ if (cfg_zero_sum && cfg_bad_csum)
+ error(1, 0, "Cannot combine zero checksum conversion and invalid checksum");
+ if (cfg_zero_sum && cfg_random_seed)
+ error(1, 0, "Cannot combine zero checksum conversion with randomization");
+
+ if (cfg_family == PF_INET6) {
+ cfg_saddr6.sin6_port = htons(cfg_port_src);
+ cfg_daddr6.sin6_port = htons(cfg_port_dst);
+
+ if (inet_pton(cfg_family, daddr, &cfg_daddr6.sin6_addr) != 1)
+ error(1, errno, "Cannot parse ipv6 -D");
+ if (inet_pton(cfg_family, saddr, &cfg_saddr6.sin6_addr) != 1)
+ error(1, errno, "Cannot parse ipv6 -S");
+ } else {
+ cfg_saddr4.sin_port = htons(cfg_port_src);
+ cfg_daddr4.sin_port = htons(cfg_port_dst);
+
+ if (inet_pton(cfg_family, daddr, &cfg_daddr4.sin_addr) != 1)
+ error(1, errno, "Cannot parse ipv4 -D");
+ if (inet_pton(cfg_family, saddr, &cfg_saddr4.sin_addr) != 1)
+ error(1, errno, "Cannot parse ipv4 -S");
+ }
+
+ if (cfg_do_tx && cfg_random_seed) {
+ /* special case: time-based seed */
+ if (cfg_random_seed == 1)
+ cfg_random_seed = (unsigned int)gettimeofday_ms();
+ srand(cfg_random_seed);
+ fprintf(stderr, "randomization seed: %u\n", cfg_random_seed);
+ }
+}
+
+static void do_tx(void)
+{
+ static char _buf[MAX_HEADER_LEN + MAX_PAYLOAD_LEN];
+ char *buf;
+ int fd, len, i;
+
+ buf = build_packet(_buf, sizeof(_buf), &len);
+
+ if (cfg_send_pfpacket)
+ fd = open_packet();
+ else if (cfg_send_udp)
+ fd = open_inet(SOCK_DGRAM, 0);
+ else
+ fd = open_inet(SOCK_RAW, IPPROTO_RAW);
+
+ for (i = 0; i < cfg_num_pkt; i++) {
+ if (cfg_send_pfpacket)
+ send_packet(fd, buf, len);
+ else
+ send_inet(fd, buf, len);
+
+ /* randomize each packet individually to increase coverage */
+ if (cfg_random_seed) {
+ cfg_payload_len = rand() % MAX_PAYLOAD_LEN;
+ buf = build_packet(_buf, sizeof(_buf), &len);
+ }
+ }
+
+ if (close(fd))
+ error(1, errno, "close tx");
+}
+
+static void do_rx(int fdp, int fdr)
+{
+ unsigned long count_udp = 0, count_pkt = 0;
+ long tleft, tstop;
+ struct pollfd pfd;
+
+ tstop = gettimeofday_ms() + cfg_timeout_ms;
+ tleft = cfg_timeout_ms;
+
+ do {
+ pfd.events = POLLIN;
+ pfd.fd = fdp;
+ if (poll(&pfd, 1, tleft) == -1)
+ error(1, errno, "poll");
+
+ if (pfd.revents & POLLIN)
+ count_pkt += recv_packet(fdp);
+
+ if (cfg_proto == IPPROTO_UDP)
+ count_udp += recv_udp(fdr);
+
+ tleft = tstop - gettimeofday_ms();
+ } while (tleft > 0);
+
+ if (close(fdr))
+ error(1, errno, "close r");
+ if (close(fdp))
+ error(1, errno, "close p");
+
+ if (count_pkt < cfg_num_pkt)
+ error(1, 0, "rx: missing packets at pf_packet: %lu < %u",
+ count_pkt, cfg_num_pkt);
+
+ if (cfg_proto == IPPROTO_UDP) {
+ if (cfg_bad_csum && count_udp)
+ error(1, 0, "rx: unexpected packets at udp");
+ if (!cfg_bad_csum && !count_udp)
+ error(1, 0, "rx: missing packets at udp");
+ }
+}
+
+int main(int argc, char *const argv[])
+{
+ int fdp = -1, fdr = -1; /* -1 to silence -Wmaybe-uninitialized */
+
+ parse_args(argc, argv);
+
+ /* open receive sockets before transmitting */
+ if (cfg_do_rx) {
+ fdp = recv_prepare_packet();
+ fdr = recv_prepare_udp();
+ }
+
+ if (cfg_do_tx)
+ do_tx();
+
+ if (cfg_do_rx)
+ do_rx(fdp, fdr);
+
+ fprintf(stderr, "OK\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py
new file mode 100755
index 000000000000..2d84c7a0be6b
--- /dev/null
+++ b/tools/testing/selftests/net/devlink_port_split.py
@@ -0,0 +1,309 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from subprocess import PIPE, Popen
+import json
+import time
+import argparse
+import collections
+import sys
+
+#
+# Test port split configuration using devlink-port lanes attribute.
+# The test is skipped in case the attribute is not available.
+#
+# First, check that all the ports with 1 lane fail to split.
+# Second, check that all the ports with more than 1 lane can be split
+# to all valid configurations (e.g., split to 2, split to 4 etc.)
+#
+
+
+# Kselftest framework requirement - SKIP code is 4
+KSFT_SKIP=4
+Port = collections.namedtuple('Port', 'bus_info name')
+
+
+def run_command(cmd, should_fail=False):
+ """
+ Run a command in subprocess.
+ Return: Tuple of (stdout, stderr).
+ """
+
+ p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True)
+ stdout, stderr = p.communicate()
+ stdout, stderr = stdout.decode(), stderr.decode()
+
+ if stderr != "" and not should_fail:
+ print("Error sending command: %s" % cmd)
+ print(stdout)
+ print(stderr)
+ return stdout, stderr
+
+
+class devlink_ports(object):
+ """
+ Class that holds information on the devlink ports, required to the tests;
+ if_names: A list of interfaces in the devlink ports.
+ """
+
+ def get_if_names(dev):
+ """
+ Get a list of physical devlink ports.
+ Return: Array of tuples (bus_info/port, if_name).
+ """
+
+ arr = []
+
+ cmd = "devlink -j port show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ ports = json.loads(stdout)['port']
+
+ validate_devlink_output(ports, 'flavour')
+
+ for port in ports:
+ if dev in port:
+ if ports[port]['flavour'] == 'physical':
+ arr.append(Port(bus_info=port, name=ports[port]['netdev']))
+
+ return arr
+
+ def __init__(self, dev):
+ self.if_names = devlink_ports.get_if_names(dev)
+
+
+def get_max_lanes(port):
+ """
+ Get the $port's maximum number of lanes.
+ Return: number of lanes, e.g. 1, 2, 4 and 8.
+ """
+
+ cmd = "devlink -j port show %s" % port
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ if 'lanes' in values:
+ lanes = values['lanes']
+ else:
+ lanes = 0
+ return lanes
+
+
+def get_split_ability(port):
+ """
+ Get the $port split ability.
+ Return: split ability, true or false.
+ """
+
+ cmd = "devlink -j port show %s" % port.name
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+ values = list(json.loads(stdout)['port'].values())[0]
+
+ return values['splittable']
+
+
+def split(k, port, should_fail=False):
+ """
+ Split $port into $k ports.
+ If should_fail == True, the split should fail. Otherwise, should pass.
+ Return: Array of sub ports after splitting.
+ If the $port wasn't split, the array will be empty.
+ """
+
+ cmd = "devlink port split %s count %s" % (port.bus_info, k)
+ stdout, stderr = run_command(cmd, should_fail=should_fail)
+
+ if should_fail:
+ if not test(stderr != "", "%s is unsplittable" % port.name):
+ print("split an unsplittable port %s" % port.name)
+ return create_split_group(port, k)
+ else:
+ if stderr == "":
+ return create_split_group(port, k)
+ print("didn't split a splittable port %s" % port.name)
+
+ return []
+
+
+def unsplit(port):
+ """
+ Unsplit $port.
+ """
+
+ cmd = "devlink port unsplit %s" % port
+ stdout, stderr = run_command(cmd)
+ test(stderr == "", "Unsplit port %s" % port)
+
+
+def exists(port, dev):
+ """
+ Check if $port exists in the devlink ports.
+ Return: True is so, False otherwise.
+ """
+
+ return any(dev_port.name == port
+ for dev_port in devlink_ports.get_if_names(dev))
+
+
+def exists_and_lanes(ports, lanes, dev):
+ """
+ Check if every port in the list $ports exists in the devlink ports and has
+ $lanes number of lanes after splitting.
+ Return: True if both are True, False otherwise.
+ """
+
+ for port in ports:
+ max_lanes = get_max_lanes(port)
+ if not exists(port, dev):
+ print("port %s doesn't exist in devlink ports" % port)
+ return False
+ if max_lanes != lanes:
+ print("port %s has %d lanes, but %s were expected"
+ % (port, lanes, max_lanes))
+ return False
+ return True
+
+
+def test(cond, msg):
+ """
+ Check $cond and print a message accordingly.
+ Return: True is pass, False otherwise.
+ """
+
+ if cond:
+ print("TEST: %-60s [ OK ]" % msg)
+ else:
+ print("TEST: %-60s [FAIL]" % msg)
+
+ return cond
+
+
+def create_split_group(port, k):
+ """
+ Create the split group for $port.
+ Return: Array with $k elements, which are the split port group.
+ """
+
+ return list(port.name + "s" + str(i) for i in range(k))
+
+
+def split_unsplittable_port(port, k):
+ """
+ Test that splitting of unsplittable port fails.
+ """
+
+ # split to max
+ new_split_group = split(k, port, should_fail=True)
+
+ if new_split_group != []:
+ unsplit(port.bus_info)
+
+
+def split_splittable_port(port, k, lanes, dev):
+ """
+ Test that splitting of splittable port passes correctly.
+ """
+
+ new_split_group = split(k, port)
+
+ # Once the split command ends, it takes some time to the sub ifaces'
+ # to get their names. Use udevadm to continue only when all current udev
+ # events are handled.
+ cmd = "udevadm settle"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ if new_split_group != []:
+ test(exists_and_lanes(new_split_group, lanes/k, dev),
+ "split port %s into %s" % (port.name, k))
+
+ unsplit(port.bus_info)
+
+
+def validate_devlink_output(devlink_data, target_property=None):
+ """
+ Determine if test should be skipped by checking:
+ 1. devlink_data contains values
+ 2. The target_property exist in devlink_data
+ """
+ skip_reason = None
+ if any(devlink_data.values()):
+ if target_property:
+ skip_reason = "{} not found in devlink output, test skipped".format(target_property)
+ for key in devlink_data:
+ if target_property in devlink_data[key]:
+ skip_reason = None
+ else:
+ skip_reason = 'devlink output is empty, test skipped'
+
+ if skip_reason:
+ print(skip_reason)
+ sys.exit(KSFT_SKIP)
+
+
+def make_parser():
+ parser = argparse.ArgumentParser(description='A test for port splitting.')
+ parser.add_argument('--dev',
+ help='The devlink handle of the device under test. ' +
+ 'The default is the first registered devlink ' +
+ 'handle.')
+
+ return parser
+
+
+def main(cmdline=None):
+ parser = make_parser()
+ args = parser.parse_args(cmdline)
+
+ dev = args.dev
+ if not dev:
+ cmd = "devlink -j dev show"
+ stdout, stderr = run_command(cmd)
+ assert stderr == ""
+
+ validate_devlink_output(json.loads(stdout))
+ devs = json.loads(stdout)['dev']
+ dev = list(devs.keys())[0]
+
+ cmd = "devlink dev show %s" % dev
+ stdout, stderr = run_command(cmd)
+ if stderr != "":
+ print("devlink device %s can not be found" % dev)
+ sys.exit(1)
+
+ ports = devlink_ports(dev)
+
+ found_max_lanes = False
+ for port in ports.if_names:
+ max_lanes = get_max_lanes(port.name)
+
+ # If max lanes is 0, do not test port splitting at all
+ if max_lanes == 0:
+ continue
+
+ # If 1 lane, shouldn't be able to split
+ elif max_lanes == 1:
+ test(not get_split_ability(port),
+ "%s should not be able to split" % port.name)
+ split_unsplittable_port(port, max_lanes)
+
+ # Else, splitting should pass and all the split ports should exist.
+ else:
+ lane = max_lanes
+ test(get_split_ability(port),
+ "%s should be able to split" % port.name)
+ while lane > 1:
+ split_splittable_port(port, lane, max_lanes, dev)
+
+ lane //= 2
+ found_max_lanes = True
+
+ if not found_max_lanes:
+ print(f"Test not started, no port of device {dev} reports max_lanes")
+ sys.exit(KSFT_SKIP)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh
new file mode 100755
index 000000000000..7c4818c971fc
--- /dev/null
+++ b/tools/testing/selftests/net/drop_monitor_tests.sh
@@ -0,0 +1,216 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking drop monitor functionality.
+source lib.sh
+ret=0
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ sw_drops
+ hw_drops
+"
+
+NETDEVSIM_PATH=/sys/bus/netdevsim/
+DEV_ADDR=1337
+DEV=netdevsim${DEV_ADDR}
+DEVLINK_DEV=netdevsim/${DEV}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ fi
+}
+
+setup()
+{
+ modprobe netdevsim &> /dev/null
+
+ set -e
+ setup_ns NS1
+ $IP link add dummy10 up type dummy
+
+ $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device
+ udevadm settle
+ local netdev=$($NS_EXEC ls ${NETDEVSIM_PATH}/devices/${DEV}/net/)
+ $IP link set dev $netdev up
+
+ set +e
+}
+
+cleanup()
+{
+ $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device
+ cleanup_ns ${NS1}
+}
+
+sw_drops_test()
+{
+ echo
+ echo "Software drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $TC qdisc add dev dummy10 clsact
+ $TC filter add dev dummy10 egress pref 1 handle 101 proto ip \
+ flower dst_ip 192.0.2.10 action drop
+
+ $NS_EXEC mausezahn dummy10 -a 00:11:22:33:44:55 -b 00:aa:bb:cc:dd:ee \
+ -A 192.0.2.1 -B 192.0.2.10 -t udp sp=12345,dp=54321 -c 0 -q \
+ -d 100msec &
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) != 0))
+ log_test $? 0 "Capturing active software drops"
+
+ rm ${dir}/packets.pcap
+
+ { kill %% && wait %%; } 2>/dev/null
+ timeout 5 dwdump -o sw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'ip.dst == 192.0.2.10' 2> /dev/null | wc -l) == 0))
+ log_test $? 0 "Capturing inactive software drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+hw_drops_test()
+{
+ echo
+ echo "Hardware drops test"
+
+ setup
+
+ local dir=$(mktemp -d)
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action trap
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) != 0))
+ log_test $? 0 "Capturing active hardware drops"
+
+ rm ${dir}/packets.pcap
+
+ $DEVLINK trap set $DEVLINK_DEV trap blackhole_route action drop
+ timeout 5 dwdump -o hw -w ${dir}/packets.pcap
+ (( $(tshark -r ${dir}/packets.pcap \
+ -Y 'net_dm.hw_trap_name== blackhole_route' 2> /dev/null \
+ | wc -l) == 0))
+ log_test $? 0 "Capturing inactive hardware drops"
+
+ rm -r $dir
+
+ cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v devlink)" ]; then
+ echo "SKIP: Could not run test without devlink tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tshark)" ]; then
+ echo "SKIP: Could not run test without tshark tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v dwdump)" ]; then
+ echo "SKIP: Could not run test without dwdump tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v udevadm)" ]; then
+ echo "SKIP: Could not run test without udevadm tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v timeout)" ]; then
+ echo "SKIP: Could not run test without timeout tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+tshark -G fields 2> /dev/null | grep -q net_dm
+if [ $? -ne 0 ]; then
+ echo "SKIP: tshark too old, missing net_dm dissector"
+ exit $ksft_skip
+fi
+
+# create netns first so we can get the namespace name
+setup_ns NS1
+cleanup &> /dev/null
+trap cleanup EXIT
+
+IP="ip -netns ${NS1}"
+TC="tc -netns ${NS1}"
+DEVLINK="devlink -N ${NS1}"
+NS_EXEC="ip netns exec ${NS1}"
+
+for t in $TESTS
+do
+ case $t in
+ sw_drops|sw) sw_drops_test;;
+ hw_drops|hw) hw_drops_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh
index fb5c55dd6df8..386ebd829df5 100755
--- a/tools/testing/selftests/net/fcnal-test.sh
+++ b/tools/testing/selftests/net/fcnal-test.sh
@@ -37,6 +37,10 @@
#
# server / client nomenclature relative to ns-A
+source lib.sh
+
+PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH
+
VERBOSE=0
NSA_DEV=eth1
@@ -63,6 +67,14 @@ NSB_LO_IP=172.16.2.2
NSA_LO_IP6=2001:db8:2::1
NSB_LO_IP6=2001:db8:2::2
+# non-local addresses for freebind tests
+NL_IP=172.17.1.1
+NL_IP6=2001:db8:4::1
+
+# multicast and broadcast addresses
+MCAST_IP=224.0.0.1
+BCAST_IP=255.255.255.255
+
MD5_PW=abc123
MD5_WRONG_PW=abc1234
@@ -71,16 +83,15 @@ MCAST=ff02::1
NSA_LINKIP6=
NSB_LINKIP6=
-NSA=ns-A
-NSB=ns-B
-NSC=ns-C
-
-NSA_CMD="ip netns exec ${NSA}"
-NSB_CMD="ip netns exec ${NSB}"
-NSC_CMD="ip netns exec ${NSC}"
-
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+# Check if FIPS mode is enabled
+if [ -f /proc/sys/crypto/fips_enabled ]; then
+ fips_enabled=`cat /proc/sys/crypto/fips_enabled`
+else
+ fips_enabled=0
+fi
+
################################################################################
# utilities
@@ -89,6 +100,7 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local ans
[ "${VERBOSE}" = "1" ] && echo
@@ -98,19 +110,20 @@ log_test()
else
nfail=$((nfail+1))
printf "TEST: %-70s [FAIL]\n" "${msg}"
+ echo " expected rc $expected; actual rc $rc"
if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
fi
if [ "${PAUSE}" = "yes" ]; then
echo
echo "hit enter to continue, 'q' to quit"
- read a
- [ "$a" = "q" ] && exit 1
+ read ans
+ [ "$ans" = "q" ] && exit 1
fi
kill_procs
@@ -179,6 +192,15 @@ kill_procs()
sleep 1
}
+set_ping_group()
+{
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'"
+ fi
+
+ ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'
+}
+
do_run_cmd()
{
local cmd="$*"
@@ -256,6 +278,28 @@ setup_cmd_nsb()
fi
}
+setup_cmd_nsc()
+{
+ local cmd="$*"
+ local rc
+
+ run_cmd_nsc ${cmd}
+ rc=$?
+ if [ $rc -ne 0 ]; then
+ # show user the command if not done so already
+ if [ "$VERBOSE" = "0" ]; then
+ echo "setup command: $cmd"
+ fi
+ echo "failed. stopping tests"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue"
+ read a
+ fi
+ exit $rc
+ fi
+}
+
# set sysctl values in NS-A
set_sysctl()
{
@@ -264,6 +308,12 @@ set_sysctl()
run_cmd sysctl -q -w $*
}
+# get sysctl values in NS-A
+get_sysctl()
+{
+ ${NSA_CMD} sysctl -n $*
+}
+
################################################################################
# Setup for tests
@@ -273,6 +323,9 @@ addr2str()
127.0.0.1) echo "loopback";;
::1) echo "IPv6 loopback";;
+ ${BCAST_IP}) echo "broadcast";;
+ ${MCAST_IP}) echo "multicast";;
+
${NSA_IP}) echo "ns-A IP";;
${NSA_IP6}) echo "ns-A IPv6";;
${NSA_LO_IP}) echo "ns-A loopback IP";;
@@ -285,6 +338,9 @@ addr2str()
${NSB_LO_IP6}) echo "ns-B loopback IPv6";;
${NSB_LINKIP6}|${NSB_LINKIP6}%*) echo "ns-B IPv6 LLA";;
+ ${NL_IP}) echo "nonlocal IP";;
+ ${NL_IP6}) echo "nonlocal IPv6";;
+
${VRF_IP}) echo "VRF IP";;
${VRF_IP6}) echo "VRF IPv6";;
@@ -354,9 +410,6 @@ create_ns()
local addr=$2
local addr6=$3
- ip netns add ${ns}
-
- ip -netns ${ns} link set lo up
if [ "${addr}" != "-" ]; then
ip -netns ${ns} addr add dev lo ${addr}
fi
@@ -414,13 +467,33 @@ cleanup()
ip -netns ${NSA} link set dev ${NSA_DEV} down
ip -netns ${NSA} link del dev ${NSA_DEV}
- ip netns del ${NSA}
+ ip netns pids ${NSA} | xargs kill 2>/dev/null
+ cleanup_ns ${NSA}
fi
- ip netns del ${NSB}
+ ip netns pids ${NSB} | xargs kill 2>/dev/null
+ ip netns pids ${NSC} | xargs kill 2>/dev/null
+ cleanup_ns ${NSB} ${NSC}
+}
+
+cleanup_vrf_dup()
+{
+ ip link del ${NSA_DEV2} >/dev/null 2>&1
+ ip netns pids ${NSC} | xargs kill 2>/dev/null
ip netns del ${NSC} >/dev/null 2>&1
}
+setup_vrf_dup()
+{
+ # some VRF tests use ns-C which has the same config as
+ # ns-B but for a device NOT in the VRF
+ setup_ns NSC
+ NSC_CMD="ip netns exec ${NSC}"
+ create_ns ${NSC} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
+ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
+}
+
setup()
{
local with_vrf=${1}
@@ -432,6 +505,10 @@ setup()
log_debug "Configuring network namespaces"
set -e
+ setup_ns NSA NSB
+ NSA_CMD="ip netns exec ${NSA}"
+ NSB_CMD="ip netns exec ${NSB}"
+
create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128
create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128
connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \
@@ -450,12 +527,6 @@ setup()
ip -netns ${NSB} ro add ${VRF_IP}/32 via ${NSA_IP} dev ${NSB_DEV}
ip -netns ${NSB} -6 ro add ${VRF_IP6}/128 via ${NSA_IP6} dev ${NSB_DEV}
-
- # some VRF tests use ns-C which has the same config as
- # ns-B but for a device NOT in the VRF
- create_ns ${NSC} "-" "-"
- connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \
- ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64
else
ip -netns ${NSA} ro add ${NSB_LO_IP}/32 via ${NSB_IP} dev ${NSA_DEV}
ip -netns ${NSA} ro add ${NSB_LO_IP6}/128 via ${NSB_IP6} dev ${NSA_DEV}
@@ -471,6 +542,40 @@ setup()
sleep 1
}
+setup_lla_only()
+{
+ # make sure we are starting with a clean slate
+ kill_procs
+ cleanup 2>/dev/null
+
+ log_debug "Configuring network namespaces"
+ set -e
+
+ setup_ns NSA NSB NSC
+ NSA_CMD="ip netns exec ${NSA}"
+ NSB_CMD="ip netns exec ${NSB}"
+ NSC_CMD="ip netns exec ${NSC}"
+ create_ns ${NSA} "-" "-"
+ create_ns ${NSB} "-" "-"
+ create_ns ${NSC} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV} "-" "-" \
+ ${NSB} ${NSB_DEV} "-" "-"
+ connect_ns ${NSA} ${NSA_DEV2} "-" "-" \
+ ${NSC} ${NSC_DEV} "-" "-"
+
+ NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV})
+ NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV})
+ NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV})
+
+ create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-"
+ ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF}
+ ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF}
+
+ set +e
+
+ sleep 1
+}
+
################################################################################
# IPv4
@@ -497,6 +602,20 @@ ipv4_ping_novrf()
done
#
+ # out, but don't use gateway if peer is not on link
+ #
+ a=${NSB_IP}
+ log_start
+ run_cmd ping -c 1 -w 1 -r ${a}
+ log_test_addr ${a} $? 0 "ping out (don't route), peer on link"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Fails since peer is not on link"
+ run_cmd ping -c 1 -w 1 -r ${a}
+ log_test_addr ${a} $? 1 "ping out (don't route), peer not on link"
+
+ #
# in
#
for a in ${NSA_IP} ${NSA_LO_IP}
@@ -669,7 +788,7 @@ ipv4_ping_vrf()
log_start
show_hint "Fails since address on vrf device is out of device scope"
run_cmd ping -c1 -w1 -I ${NSA_DEV} ${a}
- log_test_addr ${a} $? 1 "ping local, device bind"
+ log_test_addr ${a} $? 2 "ping local, device bind"
done
#
@@ -729,10 +848,16 @@ ipv4_ping()
setup
set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null
ipv4_ping_novrf
+ setup
+ set_ping_group
+ ipv4_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv4_ping_vrf
+ setup "yes"
+ set_ping_group
+ ipv4_ping_vrf
}
################################################################################
@@ -749,9 +874,9 @@ ipv4_tcp_md5_novrf()
# basic use case
log_start
- run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
# client sends MD5, server not configured
@@ -759,23 +884,23 @@ ipv4_tcp_md5_novrf()
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
# wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -s -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
# client from different address
log_start
show_hint "Should timeout due to MD5 mismatch"
- run_cmd nettest -s -M ${MD5_PW} -r ${NSB_LO_IP} &
+ run_cmd nettest -s -M ${MD5_PW} -m ${NSB_LO_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
#
@@ -786,7 +911,7 @@ ipv4_tcp_md5_novrf()
log_start
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
# client in prefix, wrong password
@@ -794,7 +919,7 @@ ipv4_tcp_md5_novrf()
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
# client outside of prefix
@@ -802,7 +927,7 @@ ipv4_tcp_md5_novrf()
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -817,33 +942,33 @@ ipv4_tcp_md5()
# basic use case
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
# client sends MD5, server not configured
log_start
show_hint "Should timeout since server does not have MD5 auth"
- run_cmd nettest -s -d ${VRF} &
+ run_cmd nettest -s -I ${VRF} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
# wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
# client from different address
log_start
show_hint "Should timeout since server config differs from client"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
#
@@ -852,25 +977,25 @@ ipv4_tcp_md5()
# client in prefix
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
# client in prefix, wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
# client outside of prefix
log_start
show_hint "Should timeout since client address is outside of prefix"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -l ${NSB_LO_IP} -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -c ${NSB_LO_IP} -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
#
@@ -878,76 +1003,183 @@ ipv4_tcp_md5()
#
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
- run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
- run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
log_start
show_hint "Should timeout since client in default VRF uses VRF password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
- run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
log_start
show_hint "Should timeout since client in VRF uses default VRF password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP} &
- run_cmd nettest -s -M ${MD5_WRONG_PW} -r ${NSB_IP} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP} &
+ run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NSB_IP} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
log_start
show_hint "Should timeout since client in default VRF uses VRF password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsc nettest -r ${NSA_IP} -M ${MD5_PW}
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
log_start
show_hint "Should timeout since client in VRF uses default VRF password"
- run_cmd nettest -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET} &
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} &
run_cmd nettest -s -M ${MD5_WRONG_PW} -m ${NS_NET} &
sleep 1
- run_cmd_nsb nettest -r ${NSA_IP} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
#
# negative tests
#
log_start
- run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP}
+ run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP}
log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
log_start
- run_cmd nettest -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
+ run_cmd nettest -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET}
log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
+ test_ipv4_md5_vrf__vrf_server__no_bind_ifindex
+ test_ipv4_md5_vrf__global_server__bind_ifindex0
+}
+
+test_ipv4_md5_vrf__vrf_server__no_bind_ifindex()
+{
+ log_start
+ show_hint "Simulates applications using VRF without TCP_MD5SIG_FLAG_IFINDEX"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, unbound key accepts connection"
+
+ log_start
+ show_hint "Binding both the socket and the key is not required but it works"
+ run_cmd nettest -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: VRF-bound server, bound key accepts connection"
+}
+
+test_ipv4_md5_vrf__global_server__bind_ifindex0()
+{
+ # This particular test needs tcp_l3mdev_accept=1 for Global server to accept VRF connections
+ local old_tcp_l3mdev_accept
+ old_tcp_l3mdev_accept=$(get_sysctl net.ipv4.tcp_l3mdev_accept)
+ set_sysctl net.ipv4.tcp_l3mdev_accept=1
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 2 "MD5: VRF: Global server, Key bound to ifindex=0 rejects VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --force-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key bound to ifindex=0 accepts non-VRF connection"
+ log_start
+
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsb nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts VRF connection"
+
+ log_start
+ run_cmd nettest -s -M ${MD5_PW} -m ${NS_NET} --no-bind-key-ifindex &
+ sleep 1
+ run_cmd_nsc nettest -r ${NSA_IP} -X ${MD5_PW}
+ log_test $? 0 "MD5: VRF: Global server, key not bound to ifindex accepts non-VRF connection"
+
+ # restore value
+ set_sysctl net.ipv4.tcp_l3mdev_accept="$old_tcp_l3mdev_accept"
+}
+
+ipv4_tcp_dontroute()
+{
+ local syncookies=$1
+ local nsa_syncookies
+ local nsb_syncookies
+ local a
+
+ #
+ # Link local connection tests (SO_DONTROUTE).
+ # Connections should succeed only when the remote IP address is
+ # on link (doesn't need to be routed through a gateway).
+ #
+
+ nsa_syncookies=$(ip netns exec "${NSA}" sysctl -n net.ipv4.tcp_syncookies)
+ nsb_syncookies=$(ip netns exec "${NSB}" sysctl -n net.ipv4.tcp_syncookies)
+ ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+ ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${syncookies}
+
+ # Test with eth1 address (on link).
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -r ${a} --server-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+ # Test with loopback address (routed).
+ #
+ # The client would use the eth1 address as source IP by default.
+ # Therefore, we need to use the -c option here, to force the use of the
+ # routed (loopback) address as source IP (so that the server will try
+ # to respond to a routed address and not a link local one).
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should fail 'Network is unreachable' since server is not on link"
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 1 "SO_DONTROUTE client, syncookies=${syncookies}"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should timeout since server cannot respond (client is not on link)"
+ do_run_cmd nettest -B -N "${NSA}" -O "${NSB}" -c "${NSA_LO_IP}" -r ${a} --server-dontroute
+ log_test_addr ${a} $? 2 "SO_DONTROUTE server, syncookies=${syncookies}"
+
+ ip netns exec "${NSB}" sysctl -wq net.ipv4.tcp_syncookies=${nsb_syncookies}
+ ip netns exec "${NSA}" sysctl -wq net.ipv4.tcp_syncookies=${nsa_syncookies}
}
ipv4_tcp_novrf()
@@ -968,7 +1200,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -d ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1024,7 +1256,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -d ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} &
sleep 1
run_cmd nettest -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1033,7 +1265,7 @@ ipv4_tcp_novrf()
do
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
- run_cmd nettest -s -d ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} &
sleep 1
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -1058,7 +1290,7 @@ ipv4_tcp_novrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1068,7 +1300,10 @@ ipv4_tcp_novrf()
run_cmd nettest -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 1 "No server, device client, local conn"
- ipv4_tcp_md5_novrf
+ [ "$fips_enabled" = "1" ] || ipv4_tcp_md5_novrf
+
+ ipv4_tcp_dontroute 0
+ ipv4_tcp_dontroute 2
}
ipv4_tcp_vrf()
@@ -1093,13 +1328,13 @@ ipv4_tcp_vrf()
log_test_addr ${a} $? 1 "Global server"
log_start
- run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
- run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1122,7 +1357,11 @@ ipv4_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
- ipv4_tcp_md5
+ if [ "$fips_enabled" = "0" ]; then
+ setup_vrf_dup
+ ipv4_tcp_md5
+ cleanup_vrf_dup
+ fi
#
# enable VRF global server
@@ -1134,14 +1373,14 @@ ipv4_tcp_vrf()
do
log_start
show_hint "client socket should be bound to VRF"
- run_cmd nettest -s -2 ${VRF} &
+ run_cmd nettest -s -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
show_hint "client socket should be bound to VRF"
- run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -1156,7 +1395,7 @@ ipv4_tcp_vrf()
a=${NSA_IP}
log_start
show_hint "client socket should be bound to device"
- run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1166,7 +1405,7 @@ ipv4_tcp_vrf()
do
log_start
show_hint "Should fail 'Connection refused' since client is not bound to VRF"
- run_cmd nettest -s -d ${VRF} &
+ run_cmd nettest -s -I ${VRF} &
sleep 1
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -1203,7 +1442,7 @@ ipv4_tcp_vrf()
for a in ${NSA_IP} ${VRF_IP} 127.0.0.1
do
log_start
- run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
@@ -1211,26 +1450,26 @@ ipv4_tcp_vrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
log_start
show_hint "Should fail 'No route to host' since client is out of VRF scope"
- run_cmd nettest -s -d ${VRF} &
+ run_cmd nettest -s -I ${VRF} &
sleep 1
run_cmd nettest -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
- run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
log_start
- run_cmd nettest -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -1269,7 +1508,7 @@ ipv4_udp_novrf()
for a in ${NSA_IP} ${NSA_LO_IP}
do
log_start
- run_cmd nettest -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -1282,7 +1521,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
- run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -1317,6 +1556,13 @@ ipv4_udp_novrf()
log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF"
log_start
+ run_cmd_nsb nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -0 ${NSA_IP} -U
+ log_test_addr ${a} $? 0 "Client, device bind via IP_UNICAST_IF, with connect()"
+
+
+ log_start
show_hint "Should fail 'Connection refused'"
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 1 "No server, unbound client"
@@ -1341,7 +1587,7 @@ ipv4_udp_novrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -1350,7 +1596,7 @@ ipv4_udp_novrf()
do
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
- run_cmd nettest -s -D -d ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} &
sleep 1
run_cmd nettest -D -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -1375,6 +1621,13 @@ ipv4_udp_novrf()
run_cmd nettest -D -d ${NSA_DEV} -S -r ${a}
log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection"
+ log_start
+ run_cmd nettest -s -D &
+ sleep 1
+ run_cmd nettest -D -d ${NSA_DEV} -S -r ${a} -U
+ log_test_addr ${a} $? 0 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
# IPv4 with device bind has really weird behavior - it overrides the
# fib lookup, generates an rtable and tries to send the packet. This
# causes failures for local traffic at different places
@@ -1400,11 +1653,20 @@ ipv4_udp_novrf()
sleep 1
run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+ log_start
+ show_hint "Should fail since addresses on loopback are out of device scope"
+ run_cmd nettest -D -s &
+ sleep 1
+ run_cmd nettest -D -r ${a} -d ${NSA_DEV} -S -U
+ log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
+
+
done
a=${NSA_IP}
log_start
- run_cmd nettest -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -1412,6 +1674,23 @@ ipv4_udp_novrf()
log_start
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 2 "No server, device client, local conn"
+
+ #
+ # Link local connection tests (SO_DONTROUTE).
+ # Connections should succeed only when the remote IP address is
+ # on link (doesn't need to be routed through a gateway).
+ #
+
+ a=${NSB_IP}
+ log_start
+ do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 0 "SO_DONTROUTE client"
+
+ a=${NSB_LO_IP}
+ log_start
+ show_hint "Should fail 'Network is unreachable' since server is not on link"
+ do_run_cmd nettest -B -D -N "${NSA}" -O "${NSB}" -r ${a} --client-dontroute
+ log_test_addr ${a} $? 1 "SO_DONTROUTE client"
}
ipv4_udp_vrf()
@@ -1435,13 +1714,13 @@ ipv4_udp_vrf()
log_test_addr ${a} $? 1 "Global server"
log_start
- run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
- run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1461,26 +1740,26 @@ ipv4_udp_vrf()
a=${NSA_IP}
log_start
- run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
- run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, enslaved device client, local connection"
a=${NSA_IP}
log_start
- run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
- run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1495,19 +1774,19 @@ ipv4_udp_vrf()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
- run_cmd nettest -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
- run_cmd nettest -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
log_start
- run_cmd nettest -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -1549,31 +1828,31 @@ ipv4_udp_vrf()
#
a=${NSA_IP}
log_start
- run_cmd nettest -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
- run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
log_start
- run_cmd nettest -s -D -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
- run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
- run_cmd nettest -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -1581,7 +1860,7 @@ ipv4_udp_vrf()
for a in ${VRF_IP} 127.0.0.1
do
log_start
- run_cmd nettest -D -s -2 ${VRF} &
+ run_cmd nettest -D -s -3 ${VRF} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
@@ -1590,7 +1869,7 @@ ipv4_udp_vrf()
for a in ${VRF_IP} 127.0.0.1
do
log_start
- run_cmd nettest -s -D -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -s -D -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd nettest -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -1645,20 +1924,49 @@ ipv4_addr_bind_novrf()
log_test_addr ${a} $? 0 "Raw socket bind to local address"
log_start
- run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+ run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
done
#
+ # tests for nonlocal bind
+ #
+ a=${NL_IP}
+ log_start
+ run_cmd nettest -s -R -f -l ${a} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -f -l ${a} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address"
+
+ #
# tcp sockets
#
a=${NSA_IP}
log_start
- run_cmd nettest -l ${a} -r ${NSB_IP} -t1 -b
+ run_cmd nettest -c ${a} -r ${NSB_IP} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address"
log_start
- run_cmd nettest -l ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -c ${a} -r ${NSB_IP} -d ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
# Sadly, the kernel allows binding a socket to a device and then
@@ -1668,7 +1976,7 @@ ipv4_addr_bind_novrf()
#a=${NSA_LO_IP}
#log_start
#show_hint "Should fail with 'Cannot assign requested address'"
- #run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ #run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
#log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
}
@@ -1680,46 +1988,76 @@ ipv4_addr_bind_vrf()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
+ show_hint "Socket not bound to VRF, but address is in VRF"
run_cmd nettest -s -R -P icmp -l ${a} -b
- log_test_addr ${a} $? 0 "Raw socket bind to local address"
+ log_test_addr ${a} $? 1 "Raw socket bind to local address"
log_start
- run_cmd nettest -s -R -P icmp -l ${a} -d ${NSA_DEV} -b
+ run_cmd nettest -s -R -P icmp -l ${a} -I ${NSA_DEV} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
log_start
- run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+ run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after VRF bind"
done
a=${NSA_LO_IP}
log_start
show_hint "Address on loopback is out of VRF scope"
- run_cmd nettest -s -R -P icmp -l ${a} -d ${VRF} -b
+ run_cmd nettest -s -R -P icmp -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 1 "Raw socket bind to out of scope address after VRF bind"
#
+ # tests for nonlocal bind
+ #
+ a=${NL_IP}
+ log_start
+ run_cmd nettest -s -R -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+ log_start
+ run_cmd nettest -s -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "TCP socket bind to nonlocal address after VRF bind"
+
+ log_start
+ run_cmd nettest -s -D -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "ICMP socket bind to nonlocal address after VRF bind"
+
+ #
+ # check that ICMP sockets cannot bind to broadcast and multicast addresses
+ #
+ a=${BCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to broadcast address after VRF bind"
+
+ a=${MCAST_IP}
+ log_start
+ run_cmd nettest -s -D -P icmp -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 1 "ICMP socket bind to multicast address after VRF bind"
+
+ #
# tcp sockets
#
for a in ${NSA_IP} ${VRF_IP}
do
log_start
- run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+ run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address"
log_start
- run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
done
a=${NSA_LO_IP}
log_start
show_hint "Address on loopback out of scope for VRF"
- run_cmd nettest -s -l ${a} -d ${VRF} -t1 -b
+ run_cmd nettest -s -l ${a} -I ${VRF} -t1 -b
log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
log_start
show_hint "Address on loopback out of scope for device in VRF"
- run_cmd nettest -s -l ${a} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
}
@@ -1729,10 +2067,12 @@ ipv4_addr_bind()
log_subsection "No VRF"
setup
+ set_ping_group
ipv4_addr_bind_novrf
log_subsection "With VRF"
setup "yes"
+ set_ping_group
ipv4_addr_bind_vrf
}
@@ -1766,7 +2106,7 @@ ipv4_rt()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
- run_cmd nettest ${varg} -s -d ${VRF} &
+ run_cmd nettest ${varg} -s -I ${VRF} &
sleep 1
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
@@ -1779,7 +2119,7 @@ ipv4_rt()
a=${NSA_IP}
log_start
- run_cmd nettest ${varg} -s -d ${NSA_DEV} &
+ run_cmd nettest ${varg} -s -I ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
@@ -1834,7 +2174,7 @@ ipv4_rt()
for a in ${NSA_IP} ${VRF_IP}
do
log_start
- run_cmd nettest ${varg} -d ${VRF} -s &
+ run_cmd nettest ${varg} -I ${VRF} -s &
sleep 1
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
@@ -1847,6 +2187,7 @@ ipv4_rt()
a=${NSA_IP}
log_start
+
run_cmd nettest ${varg} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
@@ -1858,7 +2199,7 @@ ipv4_rt()
setup ${with_vrf}
log_start
- run_cmd nettest ${varg} -d ${VRF} -s &
+ run_cmd nettest ${varg} -I ${VRF} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
@@ -1869,7 +2210,7 @@ ipv4_rt()
setup ${with_vrf}
log_start
- run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ run_cmd nettest ${varg} -I ${NSA_DEV} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
@@ -2073,7 +2414,7 @@ ipv6_ping_vrf()
log_start
show_hint "Fails since VRF device does not support linklocal or multicast"
run_cmd ${ping6} -c1 -w1 ${a}
- log_test_addr ${a} $? 2 "ping out, VRF bind"
+ log_test_addr ${a} $? 1 "ping out, VRF bind"
done
for a in ${NSB_IP6} ${NSB_LO_IP6} ${NSB_LINKIP6}%${NSA_DEV} ${MCAST}%${NSA_DEV}
@@ -2193,10 +2534,16 @@ ipv6_ping()
log_subsection "No VRF"
setup
ipv6_ping_novrf
+ setup
+ set_ping_group
+ ipv6_ping_novrf
log_subsection "With VRF"
setup "yes"
ipv6_ping_vrf
+ setup "yes"
+ set_ping_group
+ ipv6_ping_vrf
}
################################################################################
@@ -2213,9 +2560,9 @@ ipv6_tcp_md5_novrf()
# basic use case
log_start
- run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Single address config"
# client sends MD5, server not configured
@@ -2223,23 +2570,23 @@ ipv6_tcp_md5_novrf()
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Server no config, client uses password"
# wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Client uses wrong password"
# client from different address
log_start
show_hint "Should timeout due to MD5 mismatch"
- run_cmd nettest -6 -s -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_PW} -m ${NSB_LO_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Client address does not match address configured with password"
#
@@ -2250,7 +2597,7 @@ ipv6_tcp_md5_novrf()
log_start
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: Prefix config"
# client in prefix, wrong password
@@ -2258,7 +2605,7 @@ ipv6_tcp_md5_novrf()
show_hint "Should timeout since client uses wrong password"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: Prefix config, client uses wrong password"
# client outside of prefix
@@ -2266,7 +2613,7 @@ ipv6_tcp_md5_novrf()
show_hint "Should timeout due to MD5 mismatch"
run_cmd nettest -6 -s -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: Prefix config, client address not in configured prefix"
}
@@ -2281,33 +2628,33 @@ ipv6_tcp_md5()
# basic use case
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config"
# client sends MD5, server not configured
log_start
show_hint "Should timeout since server does not have MD5 auth"
- run_cmd nettest -6 -s -d ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Server no config, client uses password"
# wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Client uses wrong password"
# client from different address
log_start
show_hint "Should timeout since server config differs from client"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_LO_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_LO_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Client address does not match address configured with password"
#
@@ -2316,25 +2663,25 @@ ipv6_tcp_md5()
# client in prefix
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config"
# client in prefix, wrong password
log_start
show_hint "Should timeout since client uses wrong password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config, client uses wrong password"
# client outside of prefix
log_start
show_hint "Should timeout since client address is outside of prefix"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -l ${NSB_LO_IP6} -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -c ${NSB_LO_IP6} -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config, client address not in configured prefix"
#
@@ -2342,74 +2689,74 @@ ipv6_tcp_md5()
#
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
- run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF"
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
- run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF"
log_start
show_hint "Should timeout since client in default VRF uses VRF password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
- run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in default VRF with VRF pw"
log_start
show_hint "Should timeout since client in VRF uses default VRF password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -r ${NSB_IP6} &
- run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -r ${NSB_IP6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NSB_IP6} &
+ run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NSB_IP6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Single address config in default VRF and VRF, conn in VRF with default VRF pw"
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF"
log_start
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 0 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF"
log_start
show_hint "Should timeout since client in default VRF uses VRF password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsc nettest -6 -r ${NSA_IP6} -M ${MD5_PW}
+ run_cmd_nsc nettest -6 -r ${NSA_IP6} -X ${MD5_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in default VRF with VRF pw"
log_start
show_hint "Should timeout since client in VRF uses default VRF password"
- run_cmd nettest -6 -s -d ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
+ run_cmd nettest -6 -s -I ${VRF} -M ${MD5_PW} -m ${NS_NET6} &
run_cmd nettest -6 -s -M ${MD5_WRONG_PW} -m ${NS_NET6} &
sleep 1
- run_cmd_nsb nettest -6 -r ${NSA_IP6} -M ${MD5_WRONG_PW}
+ run_cmd_nsb nettest -6 -r ${NSA_IP6} -X ${MD5_WRONG_PW}
log_test $? 2 "MD5: VRF: Prefix config in default VRF and VRF, conn in VRF with default VRF pw"
#
# negative tests
#
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -r ${NSB_IP6}
+ run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NSB_IP6}
log_test $? 1 "MD5: VRF: Device must be a VRF - single address"
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
+ run_cmd nettest -6 -s -I ${NSA_DEV} -M ${MD5_PW} -m ${NS_NET6}
log_test $? 1 "MD5: VRF: Device must be a VRF - prefix"
}
@@ -2482,7 +2829,7 @@ ipv6_tcp_novrf()
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2491,7 +2838,7 @@ ipv6_tcp_novrf()
do
log_start
show_hint "Should fail 'Connection refused' since addresses on loopback are out of device scope"
- run_cmd nettest -6 -s -d ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Device server, unbound client, local connection"
@@ -2517,7 +2864,7 @@ ipv6_tcp_novrf()
for a in ${NSA_IP6} ${NSA_LINKIP6}
do
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -2531,7 +2878,7 @@ ipv6_tcp_novrf()
log_test_addr ${a} $? 1 "No server, device client, local conn"
done
- ipv6_tcp_md5_novrf
+ [ "$fips_enabled" = "1" ] || ipv6_tcp_md5_novrf
}
ipv6_tcp_vrf()
@@ -2559,7 +2906,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2568,7 +2915,7 @@ ipv6_tcp_vrf()
# link local is always bound to ingress device
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2576,7 +2923,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6} ${NSA_LINKIP6}%${NSB_DEV}
do
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -2601,7 +2948,11 @@ ipv6_tcp_vrf()
log_test_addr ${a} $? 1 "Global server, local connection"
# run MD5 tests
- ipv6_tcp_md5
+ if [ "$fips_enabled" = "0" ]; then
+ setup_vrf_dup
+ ipv6_tcp_md5
+ cleanup_vrf_dup
+ fi
#
# enable VRF global server
@@ -2612,7 +2963,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -s -2 ${VRF} &
+ run_cmd nettest -6 -s -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -2621,7 +2972,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2630,13 +2981,13 @@ ipv6_tcp_vrf()
# For LLA, child socket is bound to device
a=${NSA_LINKIP6}%${NSB_DEV}
log_start
- run_cmd nettest -6 -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2644,7 +2995,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
do
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -2664,7 +3015,7 @@ ipv6_tcp_vrf()
do
log_start
show_hint "Fails 'Connection refused' since client is not in VRF"
- run_cmd nettest -6 -s -d ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} &
sleep 1
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "Global server, local connection"
@@ -2719,7 +3070,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${VRF_IP6} ::1
do
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local connection"
@@ -2727,7 +3078,7 @@ ipv6_tcp_vrf()
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -s -d ${VRF} -2 ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} -3 ${VRF} &
sleep 1
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local connection"
@@ -2735,13 +3086,13 @@ ipv6_tcp_vrf()
a=${NSA_IP6}
log_start
show_hint "Should fail since unbound client is out of VRF scope"
- run_cmd nettest -6 -s -d ${VRF} &
+ run_cmd nettest -6 -s -I ${VRF} &
sleep 1
run_cmd nettest -6 -r ${a}
log_test_addr ${a} $? 1 "VRF server, unbound client, local connection"
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -r ${a} -d ${VRF} -0 ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local connection"
@@ -2749,7 +3100,7 @@ ipv6_tcp_vrf()
for a in ${NSA_IP6} ${NSA_LINKIP6}
do
log_start
- run_cmd nettest -6 -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -r ${a} -d ${NSA_DEV} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local connection"
@@ -2789,13 +3140,13 @@ ipv6_udp_novrf()
for a in ${NSA_IP6} ${NSA_LINKIP6}%${NSB_DEV}
do
log_start
- run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server"
@@ -2803,7 +3154,7 @@ ipv6_udp_novrf()
a=${NSA_LO_IP6}
log_start
- run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -2813,7 +3164,7 @@ ipv6_udp_novrf()
# behavior.
#log_start
#show_hint "Should fail since loopback address is out of scope"
- #run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ #run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
#sleep 1
#run_cmd_nsb nettest -6 -D -r ${a}
#log_test_addr ${a} $? 1 "Device server"
@@ -2881,7 +3232,7 @@ ipv6_udp_novrf()
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -s -D -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -s -D -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Device server, unbound client, local connection"
@@ -2890,7 +3241,7 @@ ipv6_udp_novrf()
do
log_start
show_hint "Should fail 'Connection refused' since address is out of device scope"
- run_cmd nettest -6 -s -D -d ${NSA_DEV} &
+ run_cmd nettest -6 -s -D -I ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -r ${a}
log_test_addr ${a} $? 1 "Device server, local connection"
@@ -2937,11 +3288,18 @@ ipv6_udp_novrf()
sleep 1
run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S
log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection"
+
+ log_start
+ show_hint "Should fail 'No route to host' since addresses on loopback are out of device scope"
+ run_cmd nettest -6 -D -s &
+ sleep 1
+ run_cmd nettest -6 -D -r ${a} -d ${NSA_DEV} -S -U
+ log_test_addr ${a} $? 1 "Global server, device client via IP_UNICAST_IF, local connection, with connect()"
done
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -D -s -d ${NSA_DEV} -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -I ${NSA_DEV} -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a} -0 ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -2988,7 +3346,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -2997,7 +3355,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -3028,7 +3386,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -d ${VRF} -s &
+ run_cmd nettest -6 -D -I ${VRF} -s &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3043,19 +3401,19 @@ ipv6_udp_vrf()
log_test_addr ${a} $? 1 "Global server, device client, local conn"
log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, VRF client, local conn"
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server, device client, local conn"
@@ -3070,7 +3428,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Global server"
@@ -3079,7 +3437,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "VRF server"
@@ -3088,7 +3446,7 @@ ipv6_udp_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd_nsb nettest -6 -D -r ${a}
log_test_addr ${a} $? 0 "Enslaved device server"
@@ -3132,13 +3490,13 @@ ipv6_udp_vrf()
#
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
#log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3146,13 +3504,13 @@ ipv6_udp_vrf()
a=${VRF_IP6}
log_start
- run_cmd nettest -6 -D -s -2 ${VRF} &
+ run_cmd nettest -6 -D -s -3 ${VRF} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Global server, VRF client, local conn"
log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${VRF} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${VRF} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "VRF server, VRF client, local conn"
@@ -3168,25 +3526,25 @@ ipv6_udp_vrf()
# device to global IP
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -D -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Global server, device client, local conn"
log_start
- run_cmd nettest -6 -D -d ${VRF} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${VRF} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "VRF server, device client, local conn"
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${VRF} -r ${a}
log_test_addr ${a} $? 0 "Device server, VRF client, local conn"
log_start
- run_cmd nettest -6 -D -d ${NSA_DEV} -s -2 ${NSA_DEV} &
+ run_cmd nettest -6 -D -I ${NSA_DEV} -s -3 ${NSA_DEV} &
sleep 1
run_cmd nettest -6 -D -d ${NSA_DEV} -r ${a}
log_test_addr ${a} $? 0 "Device server, device client, local conn"
@@ -3280,11 +3638,19 @@ ipv6_addr_bind_novrf()
log_test_addr ${a} $? 0 "Raw socket bind to local address"
log_start
- run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
done
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP6}
+ log_start
+ run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${NSA_DEV} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address"
+
+ #
# tcp sockets
#
a=${NSA_IP6}
@@ -3293,14 +3659,17 @@ ipv6_addr_bind_novrf()
log_test_addr ${a} $? 0 "TCP socket bind to local address"
log_start
- run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address after device bind"
+ # Sadly, the kernel allows binding a socket to a device and then
+ # binding to an address not on the device. So this test passes
+ # when it really should not
a=${NSA_LO_IP6}
log_start
- show_hint "Should fail with 'Cannot assign requested address'"
- run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
- log_test_addr ${a} $? 1 "TCP socket bind to out of scope local address"
+ show_hint "Tecnically should fail since address is not on device but kernel allows"
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to out of scope local address"
}
ipv6_addr_bind_vrf()
@@ -3311,50 +3680,63 @@ ipv6_addr_bind_vrf()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after vrf bind"
log_start
- run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${NSA_DEV} -b
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${NSA_DEV} -b
log_test_addr ${a} $? 0 "Raw socket bind to local address after device bind"
done
a=${NSA_LO_IP6}
log_start
show_hint "Address on loopback is out of VRF scope"
- run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -d ${VRF} -b
+ run_cmd nettest -6 -s -R -P ipv6-icmp -l ${a} -I ${VRF} -b
log_test_addr ${a} $? 1 "Raw socket bind to invalid local address after vrf bind"
#
+ # raw socket with nonlocal bind
+ #
+ a=${NL_IP6}
+ log_start
+ run_cmd nettest -6 -s -R -P icmp -f -l ${a} -I ${VRF} -b
+ log_test_addr ${a} $? 0 "Raw socket bind to nonlocal address after VRF bind"
+
+ #
# tcp sockets
#
# address on enslaved device is valid for the VRF or device in a VRF
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+ run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address with VRF bind"
done
a=${NSA_IP6}
log_start
- run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 0 "TCP socket bind to local address with device bind"
+ # Sadly, the kernel allows binding a socket to a device and then
+ # binding to an address not on the device. The only restriction
+ # is that the address is valid in the L3 domain. So this test
+ # passes when it really should not
a=${VRF_IP6}
log_start
- run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
- log_test_addr ${a} $? 1 "TCP socket bind to VRF address with device bind"
+ show_hint "Tecnically should fail since address is not on device but kernel allows"
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
+ log_test_addr ${a} $? 0 "TCP socket bind to VRF address with device bind"
a=${NSA_LO_IP6}
log_start
show_hint "Address on loopback out of scope for VRF"
- run_cmd nettest -6 -s -l ${a} -d ${VRF} -t1 -b
+ run_cmd nettest -6 -s -l ${a} -I ${VRF} -t1 -b
log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for VRF"
log_start
show_hint "Address on loopback out of scope for device in VRF"
- run_cmd nettest -6 -s -l ${a} -d ${NSA_DEV} -t1 -b
+ run_cmd nettest -6 -s -l ${a} -I ${NSA_DEV} -t1 -b
log_test_addr ${a} $? 1 "TCP socket bind to invalid local address for device bind"
}
@@ -3402,7 +3784,7 @@ ipv6_rt()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest ${varg} -d ${VRF} -s &
+ run_cmd nettest ${varg} -I ${VRF} -s &
sleep 1
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
@@ -3416,7 +3798,7 @@ ipv6_rt()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ run_cmd nettest ${varg} -I ${NSA_DEV} -s &
sleep 1
run_cmd_nsb nettest ${varg} -r ${a} &
sleep 3
@@ -3473,7 +3855,7 @@ ipv6_rt()
for a in ${NSA_IP6} ${VRF_IP6}
do
log_start
- run_cmd nettest ${varg} -d ${VRF} -s &
+ run_cmd nettest ${varg} -I ${VRF} -s &
sleep 1
run_cmd nettest ${varg} -d ${VRF} -r ${a} &
sleep 3
@@ -3497,7 +3879,7 @@ ipv6_rt()
setup ${with_vrf}
log_start
- run_cmd nettest ${varg} -d ${VRF} -s &
+ run_cmd nettest ${varg} -I ${VRF} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
@@ -3508,7 +3890,7 @@ ipv6_rt()
setup ${with_vrf}
log_start
- run_cmd nettest ${varg} -d ${NSA_DEV} -s &
+ run_cmd nettest ${varg} -I ${NSA_DEV} -s &
sleep 1
run_cmd nettest ${varg} -d ${NSA_DEV} -r ${a} &
sleep 3
@@ -3787,10 +4169,81 @@ use_case_br()
setup_cmd_nsb ip li del vlan100 2>/dev/null
}
+# VRF only.
+# ns-A device is connected to both ns-B and ns-C on a single VRF but only has
+# LLA on the interfaces
+use_case_ping_lla_multi()
+{
+ setup_lla_only
+ # only want reply from ns-A
+ setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+ setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1
+
+ log_start
+ run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+ log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B"
+
+ run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+ log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C"
+
+ # cycle/flap the first ns-A interface
+ setup_cmd ip link set ${NSA_DEV} down
+ setup_cmd ip link set ${NSA_DEV} up
+ sleep 1
+
+ log_start
+ run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+ log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B"
+ run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+ log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C"
+
+ # cycle/flap the second ns-A interface
+ setup_cmd ip link set ${NSA_DEV2} down
+ setup_cmd ip link set ${NSA_DEV2} up
+ sleep 1
+
+ log_start
+ run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV}
+ log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B"
+ run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV}
+ log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C"
+}
+
+# Perform IPv{4,6} SNAT on ns-A, and verify TCP connection is successfully
+# established with ns-B.
+use_case_snat_on_vrf()
+{
+ setup "yes"
+
+ local port="12345"
+
+ run_cmd iptables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -A POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+
+ run_cmd_nsb nettest -s -l ${NSB_IP} -p ${port} &
+ sleep 1
+ run_cmd nettest -d ${VRF} -r ${NSB_IP} -p ${port}
+ log_test $? 0 "IPv4 TCP connection over VRF with SNAT"
+
+ run_cmd_nsb nettest -6 -s -l ${NSB_IP6} -p ${port} &
+ sleep 1
+ run_cmd nettest -6 -d ${VRF} -r ${NSB_IP6} -p ${port}
+ log_test $? 0 "IPv6 TCP connection over VRF with SNAT"
+
+ # Cleanup
+ run_cmd iptables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP} -o ${VRF}
+ run_cmd ip6tables -t nat -D POSTROUTING -p tcp -m tcp --dport ${port} -j SNAT --to-source ${NSA_LO_IP6} -o ${VRF}
+}
+
use_cases()
{
log_section "Use cases"
+ log_subsection "Device enslaved to bridge"
use_case_br
+ log_subsection "Ping LLA with multiple interfaces"
+ use_case_ping_lla_multi
+ log_subsection "SNAT on VRF"
+ use_case_snat_on_vrf
}
################################################################################
@@ -3807,14 +4260,17 @@ usage: ${0##*/} OPTS
-p Pause on fail
-P Pause after each test
-v Be verbose
+
+Tests:
+ $TESTS_IPV4 $TESTS_IPV6 $TESTS_OTHER
EOF
}
################################################################################
# main
-TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_addr_bind ipv4_runtime ipv4_netfilter"
-TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_addr_bind ipv6_runtime ipv6_netfilter"
+TESTS_IPV4="ipv4_ping ipv4_tcp ipv4_udp ipv4_bind ipv4_runtime ipv4_netfilter"
+TESTS_IPV6="ipv6_ping ipv6_tcp ipv6_udp ipv6_bind ipv6_runtime ipv6_netfilter"
TESTS_OTHER="use_cases"
PAUSE_ON_FAIL=no
@@ -3848,10 +4304,13 @@ elif [ "$TESTS" = "ipv6" ]; then
TESTS="$TESTS_IPV6"
fi
-which nettest >/dev/null
-if [ $? -ne 0 ]; then
- echo "'nettest' command not found; skipping tests"
- exit 0
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which nettest >/dev/null; then
+ echo "'nettest' command not found; skipping tests"
+ exit $ksft_skip
+ fi
fi
declare -i nfail=0
@@ -3879,8 +4338,6 @@ do
# setup namespaces and config, but do not run any tests
setup) setup; exit 0;;
vrf_setup) setup "yes"; exit 0;;
-
- help) echo "Test names: $TESTS"; exit 0;;
esac
done
@@ -3888,3 +4345,11 @@ cleanup 2>/dev/null
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+
+if [ $nfail -ne 0 ]; then
+ exit 1 # KSFT_FAIL
+elif [ $nsuccess -eq 0 ]; then
+ exit $ksft_skip
+fi
+
+exit 0 # KSFT_PASS
diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh
new file mode 100755
index 000000000000..d5e3abb8658c
--- /dev/null
+++ b/tools/testing/selftests/net/fdb_flush.sh
@@ -0,0 +1,813 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking functionality of flushing FDB entries.
+# Check that flush works as expected with all the supported arguments and verify
+# some combinations of arguments.
+
+source lib.sh
+
+FLUSH_BY_STATE_TESTS="
+ vxlan_test_flush_by_permanent
+ vxlan_test_flush_by_nopermanent
+ vxlan_test_flush_by_static
+ vxlan_test_flush_by_nostatic
+ vxlan_test_flush_by_dynamic
+ vxlan_test_flush_by_nodynamic
+"
+
+FLUSH_BY_FLAG_TESTS="
+ vxlan_test_flush_by_extern_learn
+ vxlan_test_flush_by_noextern_learn
+ vxlan_test_flush_by_router
+ vxlan_test_flush_by_norouter
+"
+
+TESTS="
+ vxlan_test_flush_by_dev
+ vxlan_test_flush_by_vni
+ vxlan_test_flush_by_src_vni
+ vxlan_test_flush_by_port
+ vxlan_test_flush_by_dst_ip
+ vxlan_test_flush_by_nhid
+ $FLUSH_BY_STATE_TESTS
+ $FLUSH_BY_FLAG_TESTS
+ vxlan_test_flush_by_several_args
+ vxlan_test_flush_by_remote_attributes
+ bridge_test_flush_by_dev
+ bridge_test_flush_by_vlan
+ bridge_vxlan_test_flush
+"
+
+: ${VERBOSE:=0}
+: ${PAUSE_ON_FAIL:=no}
+: ${PAUSE:=no}
+: ${VXPORT:=4789}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local rc
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+ local nsuccess
+ local nfail
+ local ret
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+MAC_POOL_1="
+ de:ad:be:ef:13:10
+ de:ad:be:ef:13:11
+ de:ad:be:ef:13:12
+ de:ad:be:ef:13:13
+ de:ad:be:ef:13:14
+"
+mac_pool_1_len=$(echo "$MAC_POOL_1" | grep -c .)
+
+MAC_POOL_2="
+ ca:fe:be:ef:13:10
+ ca:fe:be:ef:13:11
+ ca:fe:be:ef:13:12
+ ca:fe:be:ef:13:13
+ ca:fe:be:ef:13:14
+"
+mac_pool_2_len=$(echo "$MAC_POOL_2" | grep -c .)
+
+fdb_add_mac_pool_1()
+{
+ local dev=$1; shift
+ local args="$@"
+
+ for mac in $MAC_POOL_1
+ do
+ $BRIDGE fdb add $mac dev $dev $args
+ done
+}
+
+fdb_add_mac_pool_2()
+{
+ local dev=$1; shift
+ local args="$@"
+
+ for mac in $MAC_POOL_2
+ do
+ $BRIDGE fdb add $mac dev $dev $args
+ done
+}
+
+fdb_check_n_entries_by_dev_filter()
+{
+ local dev=$1; shift
+ local exp_entries=$1; shift
+ local filter="$@"
+
+ local entries=$($BRIDGE fdb show dev $dev | grep "$filter" | wc -l)
+
+ [[ $entries -eq $exp_entries ]]
+ rc=$?
+
+ log_test $rc 0 "$dev: Expected $exp_entries FDB entries, got $entries"
+ return $rc
+}
+
+vxlan_test_flush_by_dev()
+{
+ local vni=3000
+ local dst_ip=192.0.2.1
+
+ fdb_add_mac_pool_1 vx10 vni $vni dst $dst_ip
+ fdb_add_mac_pool_2 vx20 vni $vni dst $dst_ip
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len
+ fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len
+
+ run_cmd "$BRIDGE fdb flush dev vx10"
+ log_test $? 0 "Flush FDB by dev vx10"
+
+ fdb_check_n_entries_by_dev_filter vx10 0
+ log_test $? 0 "Flush FDB by dev vx10 - test vx10 entries"
+
+ fdb_check_n_entries_by_dev_filter vx20 $mac_pool_2_len
+ log_test $? 0 "Flush FDB by dev vx10 - test vx20 entries"
+}
+
+vxlan_test_flush_by_vni()
+{
+ local vni_1=3000
+ local vni_2=4000
+ local dst_ip=192.0.2.1
+
+ fdb_add_mac_pool_1 vx10 vni $vni_1 dst $dst_ip
+ fdb_add_mac_pool_2 vx10 vni $vni_2 dst $dst_ip
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len vni $vni_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 vni $vni_2"
+ log_test $? 0 "Flush FDB by dev vx10 and vni $vni_2"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni $vni_1
+ log_test $? 0 "Test entries with vni $vni_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 vni $vni_2
+ log_test $? 0 "Test entries with vni $vni_2"
+}
+
+vxlan_test_flush_by_src_vni()
+{
+ # Set some entries with {vni=x,src_vni=y} and some with the opposite -
+ # {vni=y,src_vni=x}, to verify that when we flush by src_vni=x, entries
+ # with vni=x are not flused.
+ local vni_1=3000
+ local vni_2=4000
+ local src_vni_1=4000
+ local src_vni_2=3000
+ local dst_ip=192.0.2.1
+
+ # Reconfigure vx10 with 'external' to get 'src_vni' details in
+ # 'bridge fdb' output
+ $IP link del dev vx10
+ $IP link add name vx10 type vxlan dstport "$VXPORT" external
+
+ fdb_add_mac_pool_1 vx10 vni $vni_1 src_vni $src_vni_1 dst $dst_ip
+ fdb_add_mac_pool_2 vx10 vni $vni_2 src_vni $src_vni_2 dst $dst_ip
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \
+ src_vni $src_vni_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len \
+ src_vni $src_vni_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 src_vni $src_vni_2"
+ log_test $? 0 "Flush FDB by dev vx10 and src_vni $src_vni_2"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len \
+ src_vni $src_vni_1
+ log_test $? 0 "Test entries with src_vni $src_vni_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 src_vni $src_vni_2
+ log_test $? 0 "Test entries with src_vni $src_vni_2"
+}
+
+vxlan_test_flush_by_port()
+{
+ local port_1=1234
+ local port_2=4321
+ local dst_ip=192.0.2.1
+
+ fdb_add_mac_pool_1 vx10 port $port_1 dst $dst_ip
+ fdb_add_mac_pool_2 vx10 port $port_2 dst $dst_ip
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len port $port_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 port $port_2"
+ log_test $? 0 "Flush FDB by dev vx10 and port $port_2"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len port $port_1
+ log_test $? 0 "Test entries with port $port_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 port $port_2
+ log_test $? 0 "Test entries with port $port_2"
+}
+
+vxlan_test_flush_by_dst_ip()
+{
+ local dst_ip_1=192.0.2.1
+ local dst_ip_2=192.0.2.2
+
+ fdb_add_mac_pool_1 vx10 dst $dst_ip_1
+ fdb_add_mac_pool_2 vx10 dst $dst_ip_2
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 dst $dst_ip_2"
+ log_test $? 0 "Flush FDB by dev vx10 and dst $dst_ip_2"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+ log_test $? 0 "Test entries with dst $dst_ip_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2
+ log_test $? 0 "Test entries with dst $dst_ip_2"
+}
+
+nexthops_add()
+{
+ local nhid_1=$1; shift
+ local nhid_2=$1; shift
+
+ $IP nexthop add id 10 via 192.0.2.1 fdb
+ $IP nexthop add id $nhid_1 group 10 fdb
+
+ $IP nexthop add id 20 via 192.0.2.2 fdb
+ $IP nexthop add id $nhid_2 group 20 fdb
+}
+
+vxlan_test_flush_by_nhid()
+{
+ local nhid_1=100
+ local nhid_2=200
+
+ nexthops_add $nhid_1 $nhid_2
+
+ fdb_add_mac_pool_1 vx10 nhid $nhid_1
+ fdb_add_mac_pool_2 vx10 nhid $nhid_2
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len nhid $nhid_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_2"
+ log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_2"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len nhid $nhid_1
+ log_test $? 0 "Test entries with nhid $nhid_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 nhid $nhid_2
+ log_test $? 0 "Test entries with nhid $nhid_2"
+
+ # Flush also entries with $nhid_1, and then verify that flushing by
+ # 'nhid' does not return an error when there are no entries with
+ # nexthops.
+ run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1"
+ log_test $? 0 "Flush FDB by dev vx10 and nhid $nhid_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 nhid
+ log_test $? 0 "Test entries with 'nhid' keyword"
+
+ run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid_1"
+ log_test $? 0 "Flush FDB by nhid when there are no entries with nexthop"
+}
+
+vxlan_test_flush_by_state()
+{
+ local flush_by_state=$1; shift
+ local state_1=$1; shift
+ local exp_state_1=$1; shift
+ local state_2=$1; shift
+ local exp_state_2=$1; shift
+
+ local dst_ip_1=192.0.2.1
+ local dst_ip_2=192.0.2.2
+
+ fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $state_1
+ fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $state_2
+
+ # Check the entries by dst_ip as not all states appear in 'bridge fdb'
+ # output.
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_state"
+ log_test $? 0 "Flush FDB by dev vx10 and state $flush_by_state"
+
+ fdb_check_n_entries_by_dev_filter vx10 $exp_state_1 dst $dst_ip_1
+ log_test $? 0 "Test entries with state $state_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 $exp_state_2 dst $dst_ip_2
+ log_test $? 0 "Test entries with state $state_2"
+}
+
+vxlan_test_flush_by_permanent()
+{
+ # Entries that are added without state get 'permanent' state by
+ # default, add some entries with flag 'extern_learn' instead of state,
+ # so they will be added with 'permanent' and should be flushed also.
+ local flush_by_state="permanent"
+ local state_1="permanent"
+ local exp_state_1=0
+ local state_2="extern_learn"
+ local exp_state_2=0
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nopermanent()
+{
+ local flush_by_state="nopermanent"
+ local state_1="permanent"
+ local exp_state_1=$mac_pool_1_len
+ local state_2="static"
+ local exp_state_2=0
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_static()
+{
+ local flush_by_state="static"
+ local state_1="static"
+ local exp_state_1=0
+ local state_2="dynamic"
+ local exp_state_2=$mac_pool_2_len
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nostatic()
+{
+ local flush_by_state="nostatic"
+ local state_1="permanent"
+ local exp_state_1=$mac_pool_1_len
+ local state_2="dynamic"
+ local exp_state_2=0
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_dynamic()
+{
+ local flush_by_state="dynamic"
+ local state_1="dynamic"
+ local exp_state_1=0
+ local state_2="static"
+ local exp_state_2=$mac_pool_2_len
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_nodynamic()
+{
+ local flush_by_state="nodynamic"
+ local state_1="permanent"
+ local exp_state_1=0
+ local state_2="dynamic"
+ local exp_state_2=$mac_pool_2_len
+
+ vxlan_test_flush_by_state $flush_by_state $state_1 $exp_state_1 \
+ $state_2 $exp_state_2
+}
+
+vxlan_test_flush_by_flag()
+{
+ local flush_by_flag=$1; shift
+ local flag_1=$1; shift
+ local exp_flag_1=$1; shift
+ local flag_2=$1; shift
+ local exp_flag_2=$1; shift
+
+ local dst_ip_1=192.0.2.1
+ local dst_ip_2=192.0.2.2
+
+ fdb_add_mac_pool_1 vx10 dst $dst_ip_1 $flag_1
+ fdb_add_mac_pool_2 vx10 dst $dst_ip_2 $flag_2
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $flag_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $flag_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_by_flag"
+ log_test $? 0 "Flush FDB by dev vx10 and flag $flush_by_flag"
+
+ fdb_check_n_entries_by_dev_filter vx10 $exp_flag_1 dst $dst_ip_1
+ log_test $? 0 "Test entries with flag $flag_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 $exp_flag_2 dst $dst_ip_2
+ log_test $? 0 "Test entries with flag $flag_2"
+}
+
+vxlan_test_flush_by_extern_learn()
+{
+ local flush_by_flag="extern_learn"
+ local flag_1="extern_learn"
+ local exp_flag_1=0
+ local flag_2="router"
+ local exp_flag_2=$mac_pool_2_len
+
+ vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+ $flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_noextern_learn()
+{
+ local flush_by_flag="noextern_learn"
+ local flag_1="extern_learn"
+ local exp_flag_1=$mac_pool_1_len
+ local flag_2="router"
+ local exp_flag_2=0
+
+ vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+ $flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_router()
+{
+ local flush_by_flag="router"
+ local flag_1="router"
+ local exp_flag_1=0
+ local flag_2="extern_learn"
+ local exp_flag_2=$mac_pool_2_len
+
+ vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+ $flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_norouter()
+{
+
+ local flush_by_flag="norouter"
+ local flag_1="router"
+ local exp_flag_1=$mac_pool_1_len
+ local flag_2="extern_learn"
+ local exp_flag_2=0
+
+ vxlan_test_flush_by_flag $flush_by_flag $flag_1 $exp_flag_1 \
+ $flag_2 $exp_flag_2
+}
+
+vxlan_test_flush_by_several_args()
+{
+ local dst_ip_1=192.0.2.1
+ local dst_ip_2=192.0.2.2
+ local state_1=permanent
+ local state_2=static
+ local vni=3000
+ local port=1234
+ local nhid=100
+ local flag=router
+ local flush_args
+
+ ################### Flush by 2 args - nhid and flag ####################
+ $IP nexthop add id 10 via 192.0.2.1 fdb
+ $IP nexthop add id $nhid group 10 fdb
+
+ fdb_add_mac_pool_1 vx10 nhid $nhid $flag $state_1
+ fdb_add_mac_pool_2 vx10 nhid $nhid $flag $state_2
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len $state_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len $state_2
+
+ run_cmd "$BRIDGE fdb flush dev vx10 nhid $nhid $flag"
+ log_test $? 0 "Flush FDB by dev vx10 nhid $nhid $flag"
+
+ # All entries should be flushed as 'state' is not an argument for flush
+ # filtering.
+ fdb_check_n_entries_by_dev_filter vx10 0 $state_1
+ log_test $? 0 "Test entries with state $state_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 $state_2
+ log_test $? 0 "Test entries with state $state_2"
+
+ ################ Flush by 3 args - VNI, port and dst_ip ################
+ fdb_add_mac_pool_1 vx10 vni $vni port $port dst $dst_ip_1
+ fdb_add_mac_pool_2 vx10 vni $vni port $port dst $dst_ip_2
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_2_len dst $dst_ip_2
+
+ flush_args="vni $vni port $port dst $dst_ip_2"
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+ log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+ # Only entries with $dst_ip_2 should be flushed, even the rest arguments
+ # match the filter, the flush should be AND of all the arguments.
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip_1
+ log_test $? 0 "Test entries with dst $dst_ip_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 dst $dst_ip_2
+ log_test $? 0 "Test entries with dst $dst_ip_2"
+}
+
+multicast_fdb_entries_add()
+{
+ mac=00:00:00:00:00:00
+ vnis=(2000 3000)
+
+ for vni in "${vnis[@]}"; do
+ $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \
+ src_vni 5000
+ $BRIDGE fdb append $mac dev vx10 dst 192.0.2.1 vni $vni \
+ port 1111
+ $BRIDGE fdb append $mac dev vx10 dst 192.0.2.2 vni $vni \
+ port 2222
+ done
+}
+
+vxlan_test_flush_by_remote_attributes()
+{
+ local flush_args
+
+ # Reconfigure vx10 with 'external' to get 'src_vni' details in
+ # 'bridge fdb' output
+ $IP link del dev vx10
+ $IP link add name vx10 type vxlan dstport "$VXPORT" external
+
+ # For multicat FDB entries, the VXLAN driver stores a linked list of
+ # remotes for a given key. Verify that only the expected remotes are
+ # flushed.
+ multicast_fdb_entries_add
+
+ ## Flush by 3 remote's attributes - destination IP, port and VNI ##
+ flush_args="dst 192.0.2.1 port 1111 vni 2000"
+ fdb_check_n_entries_by_dev_filter vx10 1 $flush_args
+
+ t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+ log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+ exp_n_entries=$((t0_n_entries - 1))
+ t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ [[ $t1_n_entries -eq $exp_n_entries ]]
+ log_test $? 0 "Check how many entries were flushed"
+
+ ## Flush by 2 remote's attributes - destination IP and port ##
+ flush_args="dst 192.0.2.2 port 2222"
+
+ fdb_check_n_entries_by_dev_filter vx10 2 $flush_args
+
+ t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+ log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+ exp_n_entries=$((t0_n_entries - 2))
+ t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ [[ $t1_n_entries -eq $exp_n_entries ]]
+ log_test $? 0 "Check how many entries were flushed"
+
+ ## Flush by source VNI, which is not remote's attribute and VNI ##
+ flush_args="vni 3000 src_vni 5000"
+
+ fdb_check_n_entries_by_dev_filter vx10 1 $flush_args
+
+ t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+ log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+ exp_n_entries=$((t0_n_entries -1))
+ t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ [[ $t1_n_entries -eq $exp_n_entries ]]
+ log_test $? 0 "Check how many entries were flushed"
+
+ # Flush by 1 remote's attribute - destination IP ##
+ flush_args="dst 192.0.2.1"
+
+ fdb_check_n_entries_by_dev_filter vx10 2 $flush_args
+
+ t0_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ run_cmd "$BRIDGE fdb flush dev vx10 $flush_args"
+ log_test $? 0 "Flush FDB by dev vx10 $flush_args"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 $flush_args
+
+ exp_n_entries=$((t0_n_entries -2))
+ t1_n_entries=$($BRIDGE fdb show dev vx10 | wc -l)
+ [[ $t1_n_entries -eq $exp_n_entries ]]
+ log_test $? 0 "Check how many entries were flushed"
+}
+
+bridge_test_flush_by_dev()
+{
+ local dst_ip=192.0.2.1
+ local br0_n_ent_t0=$($BRIDGE fdb show dev br0 | wc -l)
+ local br1_n_ent_t0=$($BRIDGE fdb show dev br1 | wc -l)
+
+ fdb_add_mac_pool_1 br0 dst $dst_ip
+ fdb_add_mac_pool_2 br1 dst $dst_ip
+
+ # Each 'fdb add' command adds one extra entry in the bridge with the
+ # default vlan.
+ local exp_br0_n_ent=$(($br0_n_ent_t0 + 2 * $mac_pool_1_len))
+ local exp_br1_n_ent=$(($br1_n_ent_t0 + 2 * $mac_pool_2_len))
+
+ fdb_check_n_entries_by_dev_filter br0 $exp_br0_n_ent
+ fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent
+
+ run_cmd "$BRIDGE fdb flush dev br0"
+ log_test $? 0 "Flush FDB by dev br0"
+
+ # The default entry should not be flushed
+ fdb_check_n_entries_by_dev_filter br0 1
+ log_test $? 0 "Flush FDB by dev br0 - test br0 entries"
+
+ fdb_check_n_entries_by_dev_filter br1 $exp_br1_n_ent
+ log_test $? 0 "Flush FDB by dev br0 - test br1 entries"
+}
+
+bridge_test_flush_by_vlan()
+{
+ local vlan_1=10
+ local vlan_2=20
+ local vlan_1_ent_t0
+ local vlan_2_ent_t0
+
+ $BRIDGE vlan add vid $vlan_1 dev br0 self
+ $BRIDGE vlan add vid $vlan_2 dev br0 self
+
+ vlan_1_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_1" | wc -l)
+ vlan_2_ent_t0=$($BRIDGE fdb show dev br0 | grep "vlan $vlan_2" | wc -l)
+
+ fdb_add_mac_pool_1 br0 vlan $vlan_1
+ fdb_add_mac_pool_2 br0 vlan $vlan_2
+
+ local exp_vlan_1_ent=$(($vlan_1_ent_t0 + $mac_pool_1_len))
+ local exp_vlan_2_ent=$(($vlan_2_ent_t0 + $mac_pool_2_len))
+
+ fdb_check_n_entries_by_dev_filter br0 $exp_vlan_1_ent vlan $vlan_1
+ fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2
+
+ run_cmd "$BRIDGE fdb flush dev br0 vlan $vlan_1"
+ log_test $? 0 "Flush FDB by dev br0 and vlan $vlan_1"
+
+ fdb_check_n_entries_by_dev_filter br0 0 vlan $vlan_1
+ log_test $? 0 "Test entries with vlan $vlan_1"
+
+ fdb_check_n_entries_by_dev_filter br0 $exp_vlan_2_ent vlan $vlan_2
+ log_test $? 0 "Test entries with vlan $vlan_2"
+}
+
+bridge_vxlan_test_flush()
+{
+ local vlan_1=10
+ local dst_ip=192.0.2.1
+
+ $IP link set dev vx10 master br0
+ $BRIDGE vlan add vid $vlan_1 dev br0 self
+ $BRIDGE vlan add vid $vlan_1 dev vx10
+
+ fdb_add_mac_pool_1 vx10 vni 3000 dst $dst_ip self master
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vlan $vlan_1
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len vni 3000
+
+ # Such command should fail in VXLAN driver as vlan is not supported,
+ # but the command should flush the entries in the bridge
+ run_cmd "$BRIDGE fdb flush dev vx10 vlan $vlan_1 master self"
+ log_test $? 255 \
+ "Flush FDB by dev vx10, vlan $vlan_1, master and self"
+
+ fdb_check_n_entries_by_dev_filter vx10 0 vlan $vlan_1
+ log_test $? 0 "Test entries with vlan $vlan_1"
+
+ fdb_check_n_entries_by_dev_filter vx10 $mac_pool_1_len dst $dst_ip
+ log_test $? 0 "Test entries with dst $dst_ip"
+}
+
+setup()
+{
+ setup_ns NS
+ IP="ip -netns ${NS}"
+ BRIDGE="bridge -netns ${NS}"
+
+ $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT"
+ $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT"
+
+ $IP link add br0 type bridge vlan_filtering 1
+ $IP link add br1 type bridge vlan_filtering 1
+}
+
+cleanup()
+{
+ $IP link del dev br1
+ $IP link del dev br0
+
+ $IP link del dev vx20
+ $IP link del dev vx10
+
+ cleanup_ns ${NS}
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhvw: o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ w) PING_TIMEOUT=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+# Check a flag that is added to flush command as part of VXLAN flush support
+bridge fdb help 2>&1 | grep -q "\[no\]router"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing flush command for VXLAN"
+ exit $ksft_skip
+fi
+
+ip link add dev vx10 type vxlan id 1000 2> /dev/null
+out=$(bridge fdb flush dev vx10 2>&1 | grep -q "Operation not supported")
+if [ $? -eq 0 ]; then
+ echo "SKIP: kernel lacks vxlan flush support"
+ exit $ksft_skip
+fi
+ip link del dev vx10
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh
index c287b90b8af8..ec2d6ceb1f08 100755
--- a/tools/testing/selftests/net/fib-onlink-tests.sh
+++ b/tools/testing/selftests/net/fib-onlink-tests.sh
@@ -3,6 +3,7 @@
# IPv4 and IPv6 onlink tests
+source lib.sh
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
VERBOSE=0
@@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254
# mcast address
MCAST6=ff02::1
-
-PEER_NS=bart
-PEER_CMD="ip netns exec ${PEER_NS}"
VRF=lisa
VRF_TABLE=1101
PBR_TABLE=101
@@ -176,8 +174,7 @@ setup()
set -e
# create namespace
- ip netns add ${PEER_NS}
- ip -netns ${PEER_NS} li set lo up
+ setup_ns PEER_NS
# add vrf table
ip li add ${VRF} type vrf table ${VRF_TABLE}
@@ -219,7 +216,7 @@ setup()
cleanup()
{
# make sure we start from a clean slate
- ip netns del ${PEER_NS} 2>/dev/null
+ cleanup_ns ${PEER_NS} 2>/dev/null
for n in 1 3 5 7; do
ip link del ${NETIFS[p${n}]} 2>/dev/null
done
diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
index 9dc35a16e415..e85248609af4 100755
--- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
+++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh
@@ -12,6 +12,7 @@
#
# routing in h0 to hN is done with nexthop objects.
+source lib.sh
PAUSE_ON_FAIL=no
VERBOSE=0
@@ -72,12 +73,6 @@ create_ns()
{
local ns=${1}
- ip netns del ${ns} 2>/dev/null
-
- ip netns add ${ns}
- ip -netns ${ns} addr add 127.0.0.1/8 dev lo
- ip -netns ${ns} link set lo up
-
ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
case ${ns} in
h*)
@@ -97,7 +92,13 @@ setup()
#set -e
- for ns in h0 r1 h1 h2 h3
+ setup_ns h0 r1 h1 h2 h3
+ h[0]=$h0
+ h[1]=$h1
+ h[2]=$h2
+ h[3]=$h3
+ r[1]=$r1
+ for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]}
do
create_ns ${ns}
done
@@ -108,35 +109,35 @@ setup()
for i in 0 1 2 3
do
- ip -netns h${i} li add eth0 type veth peer name r1h${i}
- ip -netns h${i} li set eth0 up
- ip -netns h${i} li set r1h${i} netns r1 name eth${i} up
-
- ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24
- ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64
- ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24
- ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64
+ ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i}
+ ip -netns ${h[$i]} li set eth0 up
+ ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up
+
+ ip -netns ${h[$i]} addr add dev eth0 172.16.10${i}.1/24
+ ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64
+ ip -netns ${r[1]} addr add dev eth${i} 172.16.10${i}.254/24
+ ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64
done
- ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0
- ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0
+ ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0
+ ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0
- # routing from h0 to h1-h3 and back
+ # routing from ${h[0]} to h1-h3 and back
for i in 1 2 3
do
- ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4
- ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254
+ ip -netns ${h[0]} ro add 172.16.10${i}.0/24 nhid 4
+ ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254
- ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6
- ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
+ ip -netns ${h[0]} -6 ro add 2001:db8:10${i}::/64 nhid 6
+ ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64
done
if [ "$VERBOSE" = "1" ]; then
echo
echo "host 1 config"
- ip -netns h0 li sh
- ip -netns h0 ro sh
- ip -netns h0 -6 ro sh
+ ip -netns ${h[0]} li sh
+ ip -netns ${h[0]} ro sh
+ ip -netns ${h[0]} -6 ro sh
fi
#set +e
@@ -144,10 +145,7 @@ setup()
cleanup()
{
- for n in h1 r1 h2 h3 h4
- do
- ip netns del ${n} 2>/dev/null
- done
+ cleanup_all_ns
}
change_mtu()
@@ -156,7 +154,7 @@ change_mtu()
local mtu=$2
run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu}
- run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu}
+ run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu}
}
################################################################################
@@ -168,23 +166,23 @@ validate_v4_exception()
local mtu=$2
local ping_sz=$3
local dst="172.16.10${i}.1"
- local h0=172.16.100.1
- local r1=172.16.100.254
+ local h0_ip=172.16.100.1
+ local r1_ip=172.16.100.254
local rc
if [ ${ping_sz} != "0" ]; then
- run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst}
+ run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst}
fi
if [ "$VERBOSE" = "1" ]; then
echo "Route get"
- ip -netns h0 ro get ${dst}
+ ip -netns ${h0} ro get ${dst}
echo "Searching for:"
echo " cache .* mtu ${mtu}"
echo
fi
- ip -netns h0 ro get ${dst} | \
+ ip -netns ${h0} ro get ${dst} | \
grep -q "cache .* mtu ${mtu}"
rc=$?
@@ -197,24 +195,24 @@ validate_v6_exception()
local mtu=$2
local ping_sz=$3
local dst="2001:db8:10${i}::1"
- local h0=2001:db8:100::1
- local r1=2001:db8:100::64
+ local h0_ip=2001:db8:100::1
+ local r1_ip=2001:db8:100::64
local rc
if [ ${ping_sz} != "0" ]; then
- run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
+ run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst}
fi
if [ "$VERBOSE" = "1" ]; then
echo "Route get"
- ip -netns h0 -6 ro get ${dst}
+ ip -netns ${h0} -6 ro get ${dst}
echo "Searching for:"
- echo " ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+ echo " ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
echo
fi
- ip -netns h0 -6 ro get ${dst} | \
- grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}"
+ ip -netns ${h0} -6 ro get ${dst} | \
+ grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}"
rc=$?
log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}"
@@ -242,11 +240,11 @@ for i in 1 2 3
do
# generate a cached route per-cpu
for c in ${cpus}; do
- run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1
- [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1
+ run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1
+ [ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1
- run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1
- [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1
+ run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1
+ [ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1
[ $ret -ne 0 ] && break
done
@@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then
validate_v6_exception 3 1400 0
# targeted deletes to trigger cleanup paths in kernel
- ip -netns h0 ro del 172.16.102.0/24 nhid 4
- ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6
+ ip -netns ${h0} ro del 172.16.102.0/24 nhid 4
+ ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6
- ip -netns h0 nexthop del id 4
- ip -netns h0 nexthop del id 6
+ ip -netns ${h0} nexthop del id 4
+ ip -netns ${h0} nexthop del id 6
fi
cleanup
diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh
new file mode 100755
index 000000000000..1ccf56f10171
--- /dev/null
+++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# ns: h1 | ns: h2
+# 192.168.0.1/24 |
+# eth0 |
+# | 192.168.1.1/32
+# veth0 <---|---> veth1
+# Validate source address selection for route without gateway
+
+source lib.sh
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+################################################################################
+# helpers
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# config
+setup()
+{
+ setup_ns h1 h2
+
+ # Add a fake eth0 to support an ip address
+ ip -n $h1 link add name eth0 type dummy
+ ip -n $h1 link set eth0 up
+ ip -n $h1 address add 192.168.0.1/24 dev eth0
+
+ # Configure veths (same @mac, arp off)
+ ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2
+ ip -n $h1 link set veth0 up
+
+ ip -n $h2 link set veth1 up
+
+ # Configure @IP in the peer netns
+ ip -n $h2 address add 192.168.1.1/32 dev veth1
+ ip -n $h2 route add default dev veth1
+
+ # Add a nexthop without @gw and use it in a route
+ ip -n $h1 nexthop add id 1 dev veth0
+ ip -n $h1 route add 192.168.1.1 nhid 1
+}
+
+cleanup()
+{
+ cleanup_ns $h1 $h2
+}
+
+trap cleanup EXIT
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ esac
+done
+
+setup
+
+run_cmd ip -netns $h1 route get 192.168.1.1
+log_test $? 0 "nexthop: get route with nexthop without gw"
+run_cmd ip netns exec $h1 ping -c1 192.168.1.1
+log_test $? 0 "nexthop: ping through nexthop without gw"
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index dee567f7576a..ac0b2c6a5761 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -14,19 +14,52 @@
# objects. Device reference counts and network namespace cleanup tested
# by use of network namespace for peer.
+source lib.sh
ret=0
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
# all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
-
-ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
+IPV4_TESTS="
+ ipv4_fcnal
+ ipv4_grp_fcnal
+ ipv4_res_grp_fcnal
+ ipv4_withv6_fcnal
+ ipv4_fcnal_runtime
+ ipv4_large_grp
+ ipv4_large_res_grp
+ ipv4_compat_mode
+ ipv4_fdb_grp_fcnal
+ ipv4_mpath_select
+ ipv4_torture
+ ipv4_res_torture
+"
+
+IPV6_TESTS="
+ ipv6_fcnal
+ ipv6_grp_fcnal
+ ipv6_res_grp_fcnal
+ ipv6_fcnal_runtime
+ ipv6_large_grp
+ ipv6_large_res_grp
+ ipv6_compat_mode
+ ipv6_fdb_grp_fcnal
+ ipv6_mpath_select
+ ipv6_torture
+ ipv6_res_torture
+"
+
+ALL_TESTS="
+ basic
+ basic_res
+ ${IPV4_TESTS}
+ ${IPV6_TESTS}
+"
TESTS="${ALL_TESTS}"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
+PING_TIMEOUT=5
nsid=100
@@ -116,13 +149,7 @@ create_ns()
{
local n=${1}
- ip netns del ${n} 2>/dev/null
-
set -e
- ip netns add ${n}
- ip netns set ${n} $((nsid++))
- ip -netns ${n} addr add 127.0.0.1/8 dev lo
- ip -netns ${n} link set lo up
ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1
ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1
@@ -141,12 +168,13 @@ setup()
{
cleanup
- create_ns me
- create_ns peer
- create_ns remote
+ setup_ns me peer remote
+ create_ns $me
+ create_ns $peer
+ create_ns $remote
- IP="ip -netns me"
- BRIDGE="bridge -netns me"
+ IP="ip -netns $me"
+ BRIDGE="bridge -netns $me"
set -e
$IP li add veth1 type veth peer name veth2
$IP li set veth1 up
@@ -158,24 +186,24 @@ setup()
$IP addr add 172.16.2.1/24 dev veth3
$IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
- $IP li set veth2 netns peer up
- ip -netns peer addr add 172.16.1.2/24 dev veth2
- ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
+ $IP li set veth2 netns $peer up
+ ip -netns $peer addr add 172.16.1.2/24 dev veth2
+ ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
- $IP li set veth4 netns peer up
- ip -netns peer addr add 172.16.2.2/24 dev veth4
- ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
+ $IP li set veth4 netns $peer up
+ ip -netns $peer addr add 172.16.2.2/24 dev veth4
+ ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
- ip -netns remote li add veth5 type veth peer name veth6
- ip -netns remote li set veth5 up
- ip -netns remote addr add dev veth5 172.16.101.1/24
- ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
- ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
- ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
+ ip -netns $remote li add veth5 type veth peer name veth6
+ ip -netns $remote li set veth5 up
+ ip -netns $remote addr add dev veth5 172.16.101.1/24
+ ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
+ ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2
+ ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
- ip -netns remote li set veth6 netns peer up
- ip -netns peer addr add dev veth6 172.16.101.2/24
- ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
+ ip -netns $remote li set veth6 netns $peer up
+ ip -netns $peer addr add dev veth6 172.16.101.2/24
+ ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
set +e
}
@@ -183,7 +211,7 @@ cleanup()
{
local ns
- for ns in me peer remote; do
+ for ns in $me $peer $remote; do
ip netns del ${ns} 2>/dev/null
done
}
@@ -232,6 +260,19 @@ check_nexthop()
check_output "${out}" "${expected}"
}
+check_nexthop_bucket()
+{
+ local nharg="$1"
+ local expected="$2"
+ local out
+
+ # remove the idle time since we cannot match it
+ out=$($IP nexthop bucket ${nharg} \
+ | sed s/idle_time\ [0-9.]*\ // 2>/dev/null)
+
+ check_output "${out}" "${expected}"
+}
+
check_route()
{
local pfx="$1"
@@ -308,6 +349,46 @@ check_large_grp()
log_test $? 0 "Dump large (x$ecmp) ecmp groups"
}
+check_large_res_grp()
+{
+ local ipv=$1
+ local buckets=$2
+ local ipstr=""
+
+ if [ $ipv -eq 4 ]; then
+ ipstr="172.16.1.2"
+ else
+ ipstr="2001:db8:91::2"
+ fi
+
+ # create a resilient group with $buckets buckets and dump them
+ run_cmd "$IP nexthop add id 100 via $ipstr dev veth1"
+ run_cmd "$IP nexthop add id 1000 group 100 type resilient buckets $buckets"
+ run_cmd "$IP nexthop bucket list"
+ log_test $? 0 "Dump large (x$buckets) nexthop buckets"
+}
+
+get_route_dev()
+{
+ local pfx="$1"
+ local out
+
+ if out=$($IP -j route get "$pfx" | jq -re ".[0].dev"); then
+ echo "$out"
+ fi
+}
+
+check_route_dev()
+{
+ local pfx="$1"
+ local expected="$2"
+ local out
+
+ out=$(get_route_dev "$pfx")
+
+ check_output "$out" "$expected"
+}
+
start_ip_monitor()
{
local mtype=$1
@@ -344,6 +425,15 @@ check_nexthop_fdb_support()
fi
}
+check_nexthop_res_support()
+{
+ $IP nexthop help 2>&1 | grep -q resilient
+ if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing resilient nexthop group support"
+ return $ksft_skip
+ fi
+}
+
ipv6_fdb_grp_fcnal()
{
local rc
@@ -411,9 +501,16 @@ ipv6_fdb_grp_fcnal()
run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 61"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
@@ -484,12 +581,125 @@ ipv4_fdb_grp_fcnal()
run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
log_test $? 2 "Route add with fdb nexthop group"
+ run_cmd "$IP nexthop del id 12"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 0 "Fdb entry after deleting a single nexthop"
+
run_cmd "$IP nexthop del id 102"
log_test $? 0 "Fdb nexthop delete"
+ run_cmd "$BRIDGE fdb get to 02:02:00:00:00:13 dev vx10 self"
+ log_test $? 254 "Fdb entry after deleting a nexthop group"
+
$IP link del dev vx10
}
+ipv4_mpath_select()
+{
+ local rc dev match h addr
+
+ echo
+ echo "IPv4 multipath selection"
+ echo "------------------------"
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test; need jq tool"
+ return $ksft_skip
+ fi
+
+ # Use status of existing neighbor entry when determining nexthop for
+ # multipath routes.
+ local -A gws
+ gws=([veth1]=172.16.1.2 [veth3]=172.16.2.2)
+ local -A other_dev
+ other_dev=([veth1]=veth3 [veth3]=veth1)
+
+ run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+ run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+ run_cmd "$IP nexthop add id 1001 group 1/2"
+ run_cmd "$IP ro add 172.16.101.0/24 nhid 1001"
+ rc=0
+ for dev in veth1 veth3; do
+ match=0
+ for h in {1..254}; do
+ addr="172.16.101.$h"
+ if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+ match=1
+ break
+ fi
+ done
+ if (( match == 0 )); then
+ echo "SKIP: Did not find a route using device $dev"
+ return $ksft_skip
+ fi
+ run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+ if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+ rc=1
+ break
+ fi
+ run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+ done
+ log_test $rc 0 "Use valid neighbor during multipath selection"
+
+ run_cmd "$IP neigh add 172.16.1.2 dev veth1 nud incomplete"
+ run_cmd "$IP neigh add 172.16.2.2 dev veth3 nud incomplete"
+ run_cmd "$IP route get 172.16.101.1"
+ # if we did not crash, success
+ log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
+ipv6_mpath_select()
+{
+ local rc dev match h addr
+
+ echo
+ echo "IPv6 multipath selection"
+ echo "------------------------"
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test; need jq tool"
+ return $ksft_skip
+ fi
+
+ # Use status of existing neighbor entry when determining nexthop for
+ # multipath routes.
+ local -A gws
+ gws=([veth1]=2001:db8:91::2 [veth3]=2001:db8:92::2)
+ local -A other_dev
+ other_dev=([veth1]=veth3 [veth3]=veth1)
+
+ run_cmd "$IP nexthop add id 1 via ${gws["veth1"]} dev veth1"
+ run_cmd "$IP nexthop add id 2 via ${gws["veth3"]} dev veth3"
+ run_cmd "$IP nexthop add id 1001 group 1/2"
+ run_cmd "$IP ro add 2001:db8:101::/64 nhid 1001"
+ rc=0
+ for dev in veth1 veth3; do
+ match=0
+ for h in {1..65535}; do
+ addr=$(printf "2001:db8:101::%x" $h)
+ if [ "$(get_route_dev "$addr")" = "$dev" ]; then
+ match=1
+ break
+ fi
+ done
+ if (( match == 0 )); then
+ echo "SKIP: Did not find a route using device $dev"
+ return $ksft_skip
+ fi
+ run_cmd "$IP neigh add ${gws[$dev]} dev $dev nud failed"
+ if ! check_route_dev "$addr" "${other_dev[$dev]}"; then
+ rc=1
+ break
+ fi
+ run_cmd "$IP neigh del ${gws[$dev]} dev $dev"
+ done
+ log_test $rc 0 "Use valid neighbor during multipath selection"
+
+ run_cmd "$IP neigh add 2001:db8:91::2 dev veth1 nud incomplete"
+ run_cmd "$IP neigh add 2001:db8:92::2 dev veth3 nud incomplete"
+ run_cmd "$IP route get 2001:db8:101::1"
+ # if we did not crash, success
+ log_test $rc 0 "Multipath selection with no valid neighbor"
+}
+
################################################################################
# basic operations (add, delete, replace) on nexthops and nexthop groups
#
@@ -545,6 +755,66 @@ ipv6_fcnal()
log_test $? 0 "Nexthops removed on admin down"
}
+ipv6_grp_refs()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP link set dev veth1 up"
+ run_cmd "$IP link add veth1.10 link veth1 up type vlan id 10"
+ run_cmd "$IP link add veth1.20 link veth1 up type vlan id 20"
+ run_cmd "$IP -6 addr add 2001:db8:91::1/64 dev veth1.10"
+ run_cmd "$IP -6 addr add 2001:db8:92::1/64 dev veth1.20"
+ run_cmd "$IP -6 neigh add 2001:db8:91::2 lladdr 00:11:22:33:44:55 dev veth1.10"
+ run_cmd "$IP -6 neigh add 2001:db8:92::2 lladdr 00:11:22:33:44:55 dev veth1.20"
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1.10"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth1.20"
+ run_cmd "$IP nexthop add id 102 group 100"
+ run_cmd "$IP route add 2001:db8:101::1/128 nhid 102"
+
+ # create per-cpu dsts through nh 100
+ run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1"
+
+ # remove nh 100 from the group to delete the route potentially leaving
+ # a stale per-cpu dst which holds a reference to the nexthop's net
+ # device and to the IPv6 route
+ run_cmd "$IP nexthop replace id 102 group 101"
+ run_cmd "$IP route del 2001:db8:101::1/128"
+
+ # add both nexthops to the group so a reference is taken on them
+ run_cmd "$IP nexthop replace id 102 group 100/101"
+
+ # if the bug described in commit "net: nexthop: release IPv6 per-cpu
+ # dsts when replacing a nexthop group" exists at this point we have
+ # an unlinked IPv6 route (but not freed due to stale dst) with a
+ # reference over the group so we delete the group which will again
+ # only unlink it due to the route reference
+ run_cmd "$IP nexthop del id 102"
+
+ # delete the nexthop with stale dst, since we have an unlinked
+ # group with a ref to it and an unlinked IPv6 route with ref to the
+ # group, the nh will only be unlinked and not freed so the stale dst
+ # remains forever and we get a net device refcount imbalance
+ run_cmd "$IP nexthop del id 100"
+
+ # if a reference was lost this command will hang because the net device
+ # cannot be removed
+ timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1
+
+ # we can't cleanup if the command is hung trying to delete the netdev
+ if [ $? -eq 137 ]; then
+ return 1
+ fi
+
+ # cleanup
+ run_cmd "$IP link del veth1.20"
+ run_cmd "$IP nexthop flush"
+
+ return 0
+}
+
ipv6_grp_fcnal()
{
local rc
@@ -650,6 +920,73 @@ ipv6_grp_fcnal()
run_cmd "$IP nexthop add id 108 group 31/24"
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
+
+ ipv6_grp_refs
+ log_test $? 0 "Nexthop group replace refcounts"
+}
+
+ipv6_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv6 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62/63 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62/63 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 63"
+ check_nexthop "id 102" \
+ "id 102 group 62,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 62 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 62,3/63,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 62,3/63 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 63 id 102 index 1 nhid 62 id 102 index 2 nhid 62 id 102 index 3 nhid 62"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
}
ipv6_fcnal_runtime()
@@ -671,13 +1008,13 @@ ipv6_fcnal_runtime()
log_test $? 0 "Route delete"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 0 "Ping with nexthop"
run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3"
run_cmd "$IP nexthop add id 122 group 81/82"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 0 "Ping - multipath"
#
@@ -685,26 +1022,26 @@ ipv6_fcnal_runtime()
#
run_cmd "$IP -6 nexthop add id 83 blackhole"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 2 "Ping - blackhole"
run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 0 "Ping - blackhole replaced with gateway"
run_cmd "$IP -6 nexthop replace id 83 blackhole"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 2 "Ping - gateway replaced by blackhole"
run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
if [ $? -eq 0 ]; then
run_cmd "$IP nexthop replace id 122 group 83"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 2 "Ping - group with blackhole"
run_cmd "$IP nexthop replace id 122 group 81/82"
- run_cmd "ip netns exec me ping -c1 -w1 2001:db8:101::1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
log_test $? 0 "Ping - group blackhole replaced with gateways"
else
log_test 2 0 "Ping - multipath failed"
@@ -739,6 +1076,36 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop replace id 81 via 172.16.1.1 dev veth1"
log_test $? 2 "Nexthop replace of group entry - v6 route, v4 nexthop"
+ run_cmd "$IP nexthop add id 86 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop del id 87"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after removing v4 gateways"
+
+ run_cmd "$IP ro delete 2001:db8:101::1/128"
+ run_cmd "$IP nexthop add id 87 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop add id 88 via 172.16.1.1 dev veth1"
+ run_cmd "$IP nexthop replace id 124 group 86/87/88"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 88 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 2 "IPv6 route can not have a group with v4 and v6 gateways"
+
+ run_cmd "$IP nexthop replace id 87 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 124"
+ log_test $? 0 "IPv6 route using a group after replacing v4 gateways"
+
$IP nexthop flush >/dev/null 2>&1
#
@@ -747,6 +1114,27 @@ ipv6_fcnal_runtime()
run_cmd "$IP nexthop add id 86 via 2001:db8:91::2 dev veth1"
run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81"
+ # route can not use prefsrc with nexthops
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 from 2001:db8:91::1"
+ log_test $? 2 "IPv6 route can not use src routing with external nexthop"
+
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 2001:db8:101::2/128 nhid 86 congctl lock foo"
+ log_test $? 2 "IPv6 route with invalid metric"
+
+ # rpfilter and default route
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP"
+ run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 93 group 91/92"
+ run_cmd "$IP -6 ro add default nhid 91"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+ log_test $? 0 "Nexthop with default route and rpfilter"
+ run_cmd "$IP -6 ro replace default nhid 93"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1"
+ log_test $? 0 "Nexthop with multipath default route and rpfilter"
+
# TO-DO:
# existing route with old nexthop; append route with new nexthop
# existing route with old nexthop; replace route with new
@@ -767,6 +1155,22 @@ ipv6_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv6_large_res_grp()
+{
+ echo
+ echo "IPv6 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 6 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
ipv6_del_add_loop1()
{
while :; do
@@ -808,20 +1212,76 @@ ipv6_torture()
pid1=$!
ipv6_grp_replace_loop &
pid2=$!
- ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
pid3=$!
- ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
pid4=$!
- ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
pid5=$!
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv6 torture test"
}
+ipv6_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv6_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv6 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+ run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+ ipv6_del_add_loop1 &
+ pid1=$!
+ ipv6_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec $me mausezahn -6 veth1 \
+ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv6 resilient nexthop group torture test"
+}
ipv4_fcnal()
{
@@ -874,6 +1334,36 @@ ipv4_fcnal()
set +e
check_nexthop "dev veth1" ""
log_test $? 0 "Nexthops removed on admin down"
+
+ # nexthop route delete warning: route add with nhid and delete
+ # using device
+ run_cmd "$IP li set dev veth1 up"
+ run_cmd "$IP nexthop add id 12 via 172.16.1.3 dev veth1"
+ out1=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+ run_cmd "$IP route add 172.16.101.1/32 nhid 12"
+ run_cmd "$IP route delete 172.16.101.1/32 dev veth1"
+ out2=`dmesg | grep "WARNING:.*fib_nh_match.*" | wc -l`
+ [ $out1 -eq $out2 ]
+ rc=$?
+ log_test $rc 0 "Delete nexthop route warning"
+ run_cmd "$IP route delete 172.16.101.1/32 nhid 12"
+ run_cmd "$IP nexthop del id 12"
+
+ run_cmd "$IP nexthop add id 21 via 172.16.1.6 dev veth1"
+ run_cmd "$IP ro add 172.16.101.0/24 nhid 21"
+ run_cmd "$IP ro del 172.16.101.0/24 nexthop via 172.16.1.7 dev veth1 nexthop via 172.16.1.8 dev veth1"
+ log_test $? 2 "Delete multipath route with only nh id based entry"
+
+ run_cmd "$IP nexthop add id 22 via 172.16.1.6 dev veth1"
+ run_cmd "$IP ro add 172.16.102.0/24 nhid 22"
+ run_cmd "$IP ro del 172.16.102.0/24 dev veth1"
+ log_test $? 2 "Delete route when specifying only nexthop device"
+
+ run_cmd "$IP ro del 172.16.102.0/24 via 172.16.1.6"
+ log_test $? 2 "Delete route when specifying only gateway"
+
+ run_cmd "$IP ro del 172.16.102.0/24"
+ log_test $? 0 "Delete route when not specifying nexthop attributes"
}
ipv4_grp_fcnal()
@@ -981,12 +1471,76 @@ ipv4_grp_fcnal()
log_test $? 2 "Nexthop group can not have a blackhole and another nexthop"
}
+ipv4_res_grp_fcnal()
+{
+ local rc
+
+ echo
+ echo "IPv4 resilient groups functional"
+ echo "--------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ #
+ # migration of nexthop buckets - equal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12/13 type resilient buckets 2 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12/13 type resilient buckets 2 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace"
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ #
+ # migration of nexthop buckets - unequal weights
+ #
+ run_cmd "$IP nexthop add id 12 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop add id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+
+ run_cmd "$IP nexthop del id 13"
+ check_nexthop "id 102" \
+ "id 102 group 12,3 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated when entry is deleted - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 12 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated when entry is deleted - nECMP"
+
+ run_cmd "$IP nexthop add id 13 via 172.16.1.3 dev veth1"
+ run_cmd "$IP nexthop replace id 102 group 12,3/13,1 type resilient buckets 4 idle_timer 0"
+ check_nexthop "id 102" \
+ "id 102 group 12,3/13 type resilient buckets 4 idle_timer 0 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Nexthop group updated after replace - nECMP"
+ check_nexthop_bucket "list id 102" \
+ "id 102 index 0 nhid 13 id 102 index 1 nhid 12 id 102 index 2 nhid 12 id 102 index 3 nhid 12"
+ log_test $? 0 "Nexthop buckets updated after replace - nECMP"
+}
+
ipv4_withv6_fcnal()
{
local lladdr
set -e
- lladdr=$(get_linklocal veth2 peer)
+ lladdr=$(get_linklocal veth2 $peer)
run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1"
set +e
run_cmd "$IP ro add 172.16.101.1/32 nhid 11"
@@ -1039,18 +1593,22 @@ ipv4_fcnal_runtime()
run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
log_test $? 2 "Nexthop replace with invalid scope for existing route"
+ # check cleanup path on invalid metric
+ run_cmd "$IP ro add 172.16.101.2/32 nhid 22 congctl lock foo"
+ log_test $? 2 "IPv4 route with invalid metric"
+
#
# add route with nexthop and check traffic
#
run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1"
run_cmd "$IP ro replace 172.16.101.1/32 nhid 21"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Basic ping"
run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3"
run_cmd "$IP nexthop add id 122 group 21/22"
run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Ping - multipath"
run_cmd "$IP ro delete 172.16.101.1/32 nhid 122"
@@ -1061,7 +1619,7 @@ ipv4_fcnal_runtime()
run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1"
run_cmd "$IP ro add default nhid 501"
run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Ping - multiple default routes, nh first"
# flip the order
@@ -1070,7 +1628,7 @@ ipv4_fcnal_runtime()
run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20"
run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1"
run_cmd "$IP ro add default nhid 501 metric 20"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Ping - multiple default routes, nh second"
run_cmd "$IP nexthop delete nhid 501"
@@ -1081,26 +1639,26 @@ ipv4_fcnal_runtime()
#
run_cmd "$IP nexthop add id 23 blackhole"
run_cmd "$IP ro replace 172.16.101.1/32 nhid 23"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 2 "Ping - blackhole"
run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Ping - blackhole replaced with gateway"
run_cmd "$IP nexthop replace id 23 blackhole"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 2 "Ping - gateway replaced by blackhole"
run_cmd "$IP ro replace 172.16.101.1/32 nhid 122"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
if [ $? -eq 0 ]; then
run_cmd "$IP nexthop replace id 122 group 23"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 2 "Ping - group with blackhole"
run_cmd "$IP nexthop replace id 122 group 21/22"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "Ping - group blackhole replaced with gateways"
else
log_test 2 0 "Ping - multipath failed"
@@ -1123,11 +1681,11 @@ ipv4_fcnal_runtime()
# IPv4 with IPv6
#
set -e
- lladdr=$(get_linklocal veth2 peer)
+ lladdr=$(get_linklocal veth2 $peer)
run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1"
set +e
run_cmd "$IP ro replace 172.16.101.1/32 nhid 24"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "IPv6 nexthop with IPv4 route"
$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1151,11 +1709,11 @@ ipv4_fcnal_runtime()
check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "IPv6 nexthop with IPv4 route"
run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "IPv4 route with IPv6 gateway"
$IP neigh sh | grep -q "${lladdr} dev veth1"
@@ -1172,7 +1730,7 @@ ipv4_fcnal_runtime()
run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
- run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+ run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1"
log_test $? 0 "IPv4 default route with IPv6 gateway"
#
@@ -1202,12 +1760,28 @@ ipv4_large_grp()
$IP nexthop flush >/dev/null 2>&1
}
+ipv4_large_res_grp()
+{
+ echo
+ echo "IPv4 large resilient group (128k buckets)"
+ echo "-----------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ check_large_res_grp 4 $((128 * 1024))
+
+ $IP nexthop flush >/dev/null 2>&1
+}
+
sysctl_nexthop_compat_mode_check()
{
local sysctlname="net.ipv4.nexthop_compat_mode"
local lprefix=$1
- IPE="ip netns exec me"
+ IPE="ip netns exec $me"
$IPE sysctl -q $sysctlname 2>&1 >/dev/null
if [ $? -ne 0 ]; then
@@ -1226,7 +1800,7 @@ sysctl_nexthop_compat_mode_set()
local mode=$1
local lprefix=$2
- IPE="ip netns exec me"
+ IPE="ip netns exec $me"
out=$($IPE sysctl -w $sysctlname=$mode)
log_test $? 0 "$lprefix set compat mode - $mode"
@@ -1410,20 +1984,77 @@ ipv4_torture()
pid1=$!
ipv4_grp_replace_loop &
pid2=$!
- ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
pid3=$!
- ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
pid4=$!
- ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
pid5=$!
sleep 300
kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
# if we did not crash, success
log_test 0 0 "IPv4 torture test"
}
+ipv4_res_grp_replace_loop()
+{
+ while :; do
+ $IP nexthop replace id 102 group 100/101 type resilient
+ done >/dev/null 2>&1
+}
+
+ipv4_res_torture()
+{
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+ local pid5
+
+ echo
+ echo "IPv4 runtime resilient nexthop group torture"
+ echo "--------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test; need mausezahn tool"
+ return
+ fi
+
+ run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+ run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+ run_cmd "$IP nexthop add id 102 group 100/101 type resilient buckets 512 idle_timer 0"
+ run_cmd "$IP route add 172.16.101.1 nhid 102"
+ run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+ ipv4_del_add_loop1 &
+ pid1=$!
+ ipv4_res_grp_replace_loop &
+ pid2=$!
+ ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 &
+ pid3=$!
+ ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 &
+ pid4=$!
+ ip netns exec $me mausezahn veth1 \
+ -B 172.16.101.2 -A 172.16.1.1 -c 0 \
+ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+ pid5=$!
+
+ sleep 300
+ kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+ wait $pid1 $pid2 $pid3 $pid4 $pid5 2>/dev/null
+
+ # if we did not crash, success
+ log_test 0 0 "IPv4 resilient nexthop group torture test"
+}
+
basic()
{
echo
@@ -1435,6 +2066,12 @@ basic()
run_cmd "$IP nexthop get id 1"
log_test $? 2 "Nexthop get on non-existent id"
+ run_cmd "$IP nexthop del id 1"
+ log_test $? 2 "Nexthop del with non-existent id"
+
+ run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8"
+ log_test $? 2 "Nexthop del with non-existent id and extra attributes"
+
# attempt to create nh without a device or gw - fails
run_cmd "$IP nexthop add id 1"
log_test $? 2 "Nexthop with no device or gateway"
@@ -1446,10 +2083,10 @@ basic()
# create nh with linkdown device - fails
$IP li set veth1 up
- ip -netns peer li set veth2 down
+ ip -netns $peer li set veth2 down
run_cmd "$IP nexthop add id 1 dev veth1"
log_test $? 2 "Nexthop with device that is linkdown"
- ip -netns peer li set veth2 up
+ ip -netns $peer li set veth2 up
# device only
run_cmd "$IP nexthop add id 1 dev veth1"
@@ -1467,6 +2104,19 @@ basic()
run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
log_test $? 2 "Blackhole nexthop with other attributes"
+ # blackhole nexthop should not be affected by the state of the loopback
+ # device
+ run_cmd "$IP link set dev lo down"
+ check_nexthop "id 2" "id 2 blackhole"
+ log_test $? 0 "Blackhole nexthop with loopback device down"
+
+ run_cmd "$IP link set dev lo up"
+
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) blackhole"
+ run_cmd "timeout 5 $IP nexthop"
+ log_test $? 0 "Maximum nexthop ID dump"
+
#
# groups
#
@@ -1525,6 +2175,225 @@ basic()
log_test $? 2 "Nexthop group and blackhole"
$IP nexthop flush >/dev/null 2>&1
+
+ # Test to ensure that flushing with a multi-part nexthop dump works as
+ # expected.
+ local batch_file=$(mktemp)
+
+ for i in $(seq 1 $((64 * 1024))); do
+ echo "nexthop add id $i blackhole" >> $batch_file
+ done
+
+ $IP -b $batch_file
+ $IP nexthop flush >/dev/null 2>&1
+ [[ $($IP nexthop | wc -l) -eq 0 ]]
+ log_test $? 0 "Large scale nexthop flushing"
+
+ rm $batch_file
+}
+
+check_nexthop_buckets_balance()
+{
+ local nharg=$1; shift
+ local ret
+
+ while (($# > 0)); do
+ local selector=$1; shift
+ local condition=$1; shift
+ local count
+
+ count=$($IP -j nexthop bucket ${nharg} ${selector} | jq length)
+ (( $count $condition ))
+ ret=$?
+ if ((ret != 0)); then
+ return $ret
+ fi
+ done
+
+ return 0
+}
+
+basic_res()
+{
+ echo
+ echo "Basic resilient nexthop group functional tests"
+ echo "----------------------------------------------"
+
+ check_nexthop_res_support
+ if [ $? -eq $ksft_skip ]; then
+ return $ksft_skip
+ fi
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+
+ #
+ # resilient nexthop group addition
+ #
+
+ run_cmd "$IP nexthop add id 101 group 1 type resilient buckets 8"
+ log_test $? 0 "Add a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop get id 101"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 120 unbalanced_timer 0 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with default parameters"
+
+ run_cmd "$IP nexthop add id 102 group 1 type resilient
+ buckets 4 idle_timer 100 unbalanced_timer 5"
+ run_cmd "$IP nexthop get id 102"
+ check_nexthop "id 102" \
+ "id 102 group 1 type resilient buckets 4 idle_timer 100 unbalanced_timer 5 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group with non-default parameters"
+
+ run_cmd "$IP nexthop add id 103 group 1 type resilient buckets 0"
+ log_test $? 2 "Add a nexthop group with 0 buckets"
+
+ #
+ # resilient nexthop group replacement
+ #
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient
+ buckets 8 idle_timer 240 unbalanced_timer 80"
+ log_test $? 0 "Replace nexthop group parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 240 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient idle_timer 512"
+ log_test $? 0 "Replace idle timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 80 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing idle timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient unbalanced_timer 256"
+ log_test $? 0 "Replace unbalanced timer"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing unbalanced timer"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient"
+ log_test $? 0 "Replace with no parameters"
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing no parameters"
+
+ run_cmd "$IP nexthop replace id 101 group 1"
+ log_test $? 2 "Replace nexthop group type - implicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type mpath"
+ log_test $? 2 "Replace nexthop group type - explicit"
+
+ run_cmd "$IP nexthop replace id 101 group 1 type resilient buckets 1024"
+ log_test $? 2 "Replace number of nexthop buckets"
+
+ check_nexthop "id 101" \
+ "id 101 group 1 type resilient buckets 8 idle_timer 512 unbalanced_timer 256 unbalanced_time 0"
+ log_test $? 0 "Get a nexthop group after replacing with invalid parameters"
+
+ #
+ # resilient nexthop buckets dump
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101 group 1/2 type resilient buckets 4"
+ run_cmd "$IP nexthop add id 201 group 1/2"
+
+ check_nexthop_bucket "" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets"
+
+ check_nexthop_bucket "list id 101" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2 id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets in a group"
+
+ sleep 0.1
+ (( $($IP -j nexthop bucket list id 101 |
+ jq '[.[] | select(.bucket.idle_time > 0 and
+ .bucket.idle_time < 2)] | length') == 4 ))
+ log_test $? 0 "All nexthop buckets report a positive near-zero idle time"
+
+ check_nexthop_bucket "list dev veth1" \
+ "id 101 index 2 nhid 1 id 101 index 3 nhid 1"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop device"
+
+ check_nexthop_bucket "list nhid 2" \
+ "id 101 index 0 nhid 2 id 101 index 1 nhid 2"
+ log_test $? 0 "Dump all nexthop buckets with a specific nexthop identifier"
+
+ run_cmd "$IP nexthop bucket list id 111"
+ log_test $? 2 "Dump all nexthop buckets in a non-existent group"
+
+ run_cmd "$IP nexthop bucket list id 201"
+ log_test $? 2 "Dump all nexthop buckets in a non-resilient group"
+
+ run_cmd "$IP nexthop bucket list dev bla"
+ log_test $? 255 "Dump all nexthop buckets using a non-existent device"
+
+ run_cmd "$IP nexthop bucket list groups"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'groups' keyword"
+
+ run_cmd "$IP nexthop bucket list fdb"
+ log_test $? 255 "Dump all nexthop buckets with invalid 'fdb' keyword"
+
+ # Dump should not loop endlessly when maximum nexthop ID is configured.
+ run_cmd "$IP nexthop add id $((2**32-1)) group 1/2 type resilient buckets 4"
+ run_cmd "timeout 5 $IP nexthop bucket"
+ log_test $? 0 "Maximum nexthop ID dump"
+
+ #
+ # resilient nexthop buckets get requests
+ #
+
+ check_nexthop_bucket "get id 101 index 0" "id 101 index 0 nhid 2"
+ log_test $? 0 "Get a valid nexthop bucket"
+
+ run_cmd "$IP nexthop bucket get id 101 index 999"
+ log_test $? 2 "Get a nexthop bucket with valid group, but invalid index"
+
+ run_cmd "$IP nexthop bucket get id 201 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-resilient group"
+
+ run_cmd "$IP nexthop bucket get id 999 index 0"
+ log_test $? 2 "Get a nexthop bucket from a non-existent group"
+
+ #
+ # tests for bucket migration
+ #
+
+ $IP nexthop flush >/dev/null 2>&1
+
+ run_cmd "$IP nexthop add id 1 dev veth1"
+ run_cmd "$IP nexthop add id 2 dev veth3"
+ run_cmd "$IP nexthop add id 101
+ group 1/2 type resilient buckets 10
+ idle_timer 1 unbalanced_timer 20"
+
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Initial bucket allocation"
+
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 4" \
+ "nhid 2" "== 6"
+ log_test $? 0 "Bucket allocation after replace"
+
+ # Check that increase in idle timer does not make buckets appear busy.
+ run_cmd "$IP nexthop replace id 101
+ group 1,2/2,3 type resilient
+ idle_timer 10"
+ run_cmd "$IP nexthop replace id 101
+ group 1/2 type resilient"
+ check_nexthop_buckets_balance "list id 101" \
+ "nhid 1" "== 5" \
+ "nhid 2" "== 5"
+ log_test $? 0 "Buckets migrated after idle timer change"
+
+ $IP nexthop flush >/dev/null 2>&1
}
################################################################################
@@ -1542,6 +2411,7 @@ usage: ${0##*/} OPTS
-p Pause on fail
-P Pause after each test before cleanup
-v verbose mode (show commands and output)
+ -w Timeout for ping
Runtime test
-n num Number of nexthops to target
@@ -1554,7 +2424,7 @@ EOF
################################################################################
# main
-while getopts :t:pP46hv o
+while getopts :t:pP46hvw: o
do
case $o in
t) TESTS=$OPTARG;;
@@ -1563,6 +2433,7 @@ do
p) PAUSE_ON_FAIL=yes;;
P) PAUSE=yes;;
v) VERBOSE=$(($VERBOSE + 1));;
+ w) PING_TIMEOUT=$OPTARG;;
h) usage; exit 0;;
*) usage; exit 1;;
esac
@@ -1596,7 +2467,7 @@ fi
for t in $TESTS
do
case $t in
- none) IP="ip -netns peer"; setup; exit 0;;
+ none) IP="ip -netns $peer"; setup; exit 0;;
*) setup; $t; cleanup;;
esac
done
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index a93e6b690e06..51157a5559b7 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -3,12 +3,12 @@
# This test is for checking IPv4 and IPv6 FIB rules API
+source lib.sh
ret=0
-
PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
-IP="ip -netns testns"
RTABLE=100
+RTABLE_PEER=101
GW_IP4=192.51.100.2
SRC_IP=192.51.100.3
GW_IP6=2001:db8:1::2
@@ -17,6 +17,9 @@ SRC_IP6=2001:db8:1::3
DEV_ADDR=192.51.100.1
DEV_ADDR6=2001:db8:1::1
DEV=dummy0
+TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect"
+
+SELFTEST_PATH=""
log_test()
{
@@ -48,11 +51,36 @@ log_section()
echo "######################################################################"
}
+check_nettest()
+{
+ if which nettest > /dev/null 2>&1; then
+ return 0
+ fi
+
+ # Add the selftest directory to PATH if not already done
+ if [ "${SELFTEST_PATH}" = "" ]; then
+ SELFTEST_PATH="$(dirname $0)"
+ PATH="${PATH}:${SELFTEST_PATH}"
+
+ # Now retry with the new path
+ if which nettest > /dev/null 2>&1; then
+ return 0
+ fi
+
+ if [ "${ret}" -eq 0 ]; then
+ ret="${ksft_skip}"
+ fi
+ echo "nettest not found (try 'make -C ${SELFTEST_PATH} nettest')"
+ fi
+
+ return 1
+}
+
setup()
{
set -e
- ip netns add testns
- $IP link set dev lo up
+ setup_ns testns
+ IP="ip -netns $testns"
$IP link add dummy0 type dummy
$IP link set dev dummy0 up
@@ -65,7 +93,41 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del testns
+ cleanup_ns $testns
+}
+
+setup_peer()
+{
+ set -e
+
+ setup_ns peerns
+ IP_PEER="ip -netns $peerns"
+ $IP_PEER link set dev lo up
+
+ ip link add name veth0 netns $testns type veth \
+ peer name veth1 netns $peerns
+ $IP link set dev veth0 up
+ $IP_PEER link set dev veth1 up
+
+ $IP address add 192.0.2.10 peer 192.0.2.11/32 dev veth0
+ $IP_PEER address add 192.0.2.11 peer 192.0.2.10/32 dev veth1
+
+ $IP address add 2001:db8::10 peer 2001:db8::11/128 dev veth0 nodad
+ $IP_PEER address add 2001:db8::11 peer 2001:db8::10/128 dev veth1 nodad
+
+ $IP_PEER address add 198.51.100.11/32 dev lo
+ $IP route add table $RTABLE_PEER 198.51.100.11/32 via 192.0.2.11
+
+ $IP_PEER address add 2001:db8::1:11/128 dev lo
+ $IP route add table $RTABLE_PEER 2001:db8::1:11/128 via 2001:db8::11
+
+ set +e
+}
+
+cleanup_peer()
+{
+ $IP link del dev veth0
+ ip netns del $peerns
}
fib_check_iproute_support()
@@ -93,7 +155,7 @@ fib_rule6_del()
fib_rule6_del_by_pref()
{
- pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ pref=$($IP -6 rule show $1 table $RTABLE | cut -d ":" -f 1)
$IP -6 rule del pref $pref
}
@@ -101,17 +163,36 @@ fib_rule6_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
+ local description="$3"
$IP -6 rule add $match table $RTABLE
$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
- log_test $? 0 "rule6 check: $1"
+ log_test $? 0 "rule6 check: $description"
fib_rule6_del_by_pref "$match"
- log_test $? 0 "rule6 del by pref: $match"
+ log_test $? 0 "rule6 del by pref: $description"
+}
+
+fib_rule6_test_reject()
+{
+ local match="$1"
+ local rc
+
+ $IP -6 rule add $match table $RTABLE 2>/dev/null
+ rc=$?
+ log_test $rc 2 "rule6 check: $match"
+
+ if [ $rc -eq 0 ]; then
+ $IP -6 rule del $match table $RTABLE
+ fi
}
fib_rule6_test()
{
+ local getmatch
+ local match
+ local cnt
+
# setup the fib rule redirect route
$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
@@ -121,8 +202,21 @@ fib_rule6_test()
match="from $SRC_IP6 iif $DEV"
fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+ # Reject dsfield (tos) options which have ECN bits set
+ for cnt in $(seq 1 3); do
+ match="dsfield $cnt"
+ fib_rule6_test_reject "$match"
+ done
+
+ # Don't take ECN bits into account when matching on dsfield
match="tos 0x10"
- fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ # Using option 'tos' instead of 'dsfield' as old iproute2
+ # versions don't support 'dsfield' in ip rule show.
+ getmatch="tos $cnt"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getmatch redirect to table"
+ done
match="fwmark 0x64"
getmatch="mark 0x64"
@@ -154,6 +248,37 @@ fib_rule6_test()
fi
}
+# Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly
+# taken into account when connecting the socket and when sending packets.
+fib_rule6_connect_test()
+{
+ local dsfield
+
+ if ! check_nettest; then
+ echo "SKIP: Could not run test without nettest tool"
+ return
+ fi
+
+ setup_peer
+ $IP -6 rule add dsfield 0x04 table $RTABLE_PEER
+
+ # Combine the base DS Field value (0x04) with all possible ECN values
+ # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+ # The ECN bits shouldn't influence the result of the test.
+ for dsfield in 0x04 0x05 0x06 0x07; do
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \
+ -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})"
+
+ nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
+ -l 2001:db8::1:11 -r 2001:db8::1:11
+ log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})"
+ done
+
+ $IP -6 rule del dsfield 0x04 table $RTABLE_PEER
+ cleanup_peer
+}
+
fib_rule4_del()
{
$IP rule del $1
@@ -162,7 +287,7 @@ fib_rule4_del()
fib_rule4_del_by_pref()
{
- pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+ pref=$($IP rule show $1 table $RTABLE | cut -d ":" -f 1)
$IP rule del pref $pref
}
@@ -170,17 +295,36 @@ fib_rule4_test_match_n_redirect()
{
local match="$1"
local getmatch="$2"
+ local description="$3"
$IP rule add $match table $RTABLE
$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
- log_test $? 0 "rule4 check: $1"
+ log_test $? 0 "rule4 check: $description"
fib_rule4_del_by_pref "$match"
- log_test $? 0 "rule4 del by pref: $match"
+ log_test $? 0 "rule4 del by pref: $description"
+}
+
+fib_rule4_test_reject()
+{
+ local match="$1"
+ local rc
+
+ $IP rule add $match table $RTABLE 2>/dev/null
+ rc=$?
+ log_test $rc 2 "rule4 check: $match"
+
+ if [ $rc -eq 0 ]; then
+ $IP rule del $match table $RTABLE
+ fi
}
fib_rule4_test()
{
+ local getmatch
+ local match
+ local cnt
+
# setup the fib rule redirect route
$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
@@ -189,14 +333,27 @@ fib_rule4_test()
# need enable forwarding and disable rp_filter temporarily as all the
# addresses are in the same subnet and egress device == ingress device.
- ip netns exec testns sysctl -w net.ipv4.ip_forward=1
- ip netns exec testns sysctl -w net.ipv4.conf.$DEV.rp_filter=0
+ ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0
match="from $SRC_IP iif $DEV"
fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
- ip netns exec testns sysctl -w net.ipv4.ip_forward=0
+ ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0
+
+ # Reject dsfield (tos) options which have ECN bits set
+ for cnt in $(seq 1 3); do
+ match="dsfield $cnt"
+ fib_rule4_test_reject "$match"
+ done
+ # Don't take ECN bits into account when matching on dsfield
match="tos 0x10"
- fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+ for cnt in "0x10" "0x11" "0x12" "0x13"; do
+ # Using option 'tos' instead of 'dsfield' as old iproute2
+ # versions don't support 'dsfield' in ip rule show.
+ getmatch="tos $cnt"
+ fib_rule4_test_match_n_redirect "$match" "$getmatch" \
+ "$getmatch redirect to table"
+ done
match="fwmark 0x64"
getmatch="mark 0x64"
@@ -228,6 +385,37 @@ fib_rule4_test()
fi
}
+# Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken
+# into account when connecting the socket and when sending packets.
+fib_rule4_connect_test()
+{
+ local dsfield
+
+ if ! check_nettest; then
+ echo "SKIP: Could not run test without nettest tool"
+ return
+ fi
+
+ setup_peer
+ $IP -4 rule add dsfield 0x04 table $RTABLE_PEER
+
+ # Combine the base DS Field value (0x04) with all possible ECN values
+ # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3).
+ # The ECN bits shouldn't influence the result of the test.
+ for dsfield in 0x04 0x05 0x06 0x07; do
+ nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})"
+
+ nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \
+ -l 198.51.100.11 -r 198.51.100.11
+ log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})"
+ done
+
+ $IP -4 rule del dsfield 0x04 table $RTABLE_PEER
+ cleanup_peer
+}
+
run_fibrule_tests()
{
log_section "IPv4 fib rule"
@@ -235,21 +423,55 @@ run_fibrule_tests()
log_section "IPv6 fib rule"
fib_rule6_test
}
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [ ! -x "$(command -v ip)" ]; then
echo "SKIP: Could not run test without ip tool"
- exit 0
+ exit $ksft_skip
fi
# start clean
cleanup &> /dev/null
setup
-run_fibrule_tests
+for t in $TESTS
+do
+ case $t in
+ fib_rule6_test|fib_rule6) fib_rule6_test;;
+ fib_rule4_test|fib_rule4) fib_rule4_test;;
+ fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;;
+ fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+
+ esac
+done
cleanup
if [ "$TESTS" != "none" ]; then
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 84205c3a55eb..73895711cdf4 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -3,19 +3,19 @@
# This test is for checking IPv4 and IPv6 FIB behavior in response to
# different events.
-
+source lib.sh
ret=0
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
# all tests in this script. Can be overridden with -t option
-TESTS="unregister down carrier nexthop suppress ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr"
+TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
+ ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
+ ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
+ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+ ipv4_mpath_list ipv6_mpath_list"
VERBOSE=0
PAUSE_ON_FAIL=no
PAUSE=no
-IP="ip -netns ns1"
-NS_EXEC="ip netns exec ns1"
which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
@@ -51,11 +51,11 @@ log_test()
setup()
{
set -e
- ip netns add ns1
- ip netns set ns1 auto
- $IP link set dev lo up
- ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
+ setup_ns ns1
+ IP="$(which ip) -netns $ns1"
+ NS_EXEC="$(which ip) netns exec $ns1"
+ ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP link add dummy0 type dummy
$IP link set dev dummy0 up
@@ -68,8 +68,7 @@ setup()
cleanup()
{
$IP link del dev dummy0 &> /dev/null
- ip netns del ns1
- ip netns del ns2 &> /dev/null
+ cleanup_ns $ns1 $ns2
}
get_linklocal()
@@ -444,24 +443,60 @@ fib_rp_filter_test()
setup
set -e
+ setup_ns ns2
+
+ $IP link add name veth1 type veth peer name veth2
+ $IP link set dev veth2 netns $ns2
+ $IP address add 192.0.2.1/24 dev veth1
+ ip -netns $ns2 address add 192.0.2.1/24 dev veth2
+ $IP link set dev veth1 up
+ ip -netns $ns2 link set dev veth2 up
+
$IP link set dev lo address 52:54:00:6a:c7:5e
- $IP link set dummy0 address 52:54:00:6a:c7:5e
- $IP link add dummy1 type dummy
- $IP link set dummy1 address 52:54:00:6a:c7:5e
- $IP link set dev dummy1 up
+ $IP link set dev veth1 address 52:54:00:6a:c7:5e
+ ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e
+ ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e
+
+ # 1. (ns2) redirect lo's egress to veth2's egress
+ ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel
+ ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \
+ action mirred egress redirect dev veth2
+ ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \
+ action mirred egress redirect dev veth2
+
+ # 2. (ns1) redirect veth1's ingress to lo's ingress
+ $NS_EXEC tc qdisc add dev veth1 ingress
+ $NS_EXEC tc filter add dev veth1 ingress protocol arp basic \
+ action mirred ingress redirect dev lo
+ $NS_EXEC tc filter add dev veth1 ingress protocol ip basic \
+ action mirred ingress redirect dev lo
+
+ # 3. (ns1) redirect lo's egress to veth1's egress
+ $NS_EXEC tc qdisc add dev lo parent root handle 1: fq_codel
+ $NS_EXEC tc filter add dev lo parent 1: protocol arp basic \
+ action mirred egress redirect dev veth1
+ $NS_EXEC tc filter add dev lo parent 1: protocol ip basic \
+ action mirred egress redirect dev veth1
+
+ # 4. (ns2) redirect veth2's ingress to lo's ingress
+ ip netns exec $ns2 tc qdisc add dev veth2 ingress
+ ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \
+ action mirred ingress redirect dev lo
+ ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \
+ action mirred ingress redirect dev lo
+
$NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1
$NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1
$NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1
-
- $NS_EXEC tc qd add dev dummy1 parent root handle 1: fq_codel
- $NS_EXEC tc filter add dev dummy1 parent 1: protocol arp basic action mirred egress redirect dev lo
- $NS_EXEC tc filter add dev dummy1 parent 1: protocol ip basic action mirred egress redirect dev lo
+ ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1
+ ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1
+ ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1
set +e
- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 198.51.100.1"
+ run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1"
log_test $? 0 "rp_filter passes local packets"
- run_cmd "ip netns exec ns1 ping -I dummy1 -w1 -c1 127.0.0.1"
+ run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1"
log_test $? 0 "rp_filter passes loopback packets"
cleanup
@@ -616,6 +651,264 @@ fib_nexthop_test()
cleanup
}
+fib6_notify_test()
+{
+ setup
+
+ echo
+ echo "Fib6 info length calculation in route notify test"
+ set -e
+
+ for i in 10 20 30 40 50 60 70;
+ do
+ $IP link add dummy_$i type dummy
+ $IP link set dev dummy_$i up
+ $IP -6 address add 2001:$i::1/64 dev dummy_$i
+ done
+
+ $NS_EXEC ip monitor route &> errors.txt &
+ sleep 2
+
+ $IP -6 route add 2001::/64 \
+ nexthop via 2001:10::2 dev dummy_10 \
+ nexthop encap ip6 dst 2002::20 via 2001:20::2 dev dummy_20 \
+ nexthop encap ip6 dst 2002::30 via 2001:30::2 dev dummy_30 \
+ nexthop encap ip6 dst 2002::40 via 2001:40::2 dev dummy_40 \
+ nexthop encap ip6 dst 2002::50 via 2001:50::2 dev dummy_50 \
+ nexthop encap ip6 dst 2002::60 via 2001:60::2 dev dummy_60 \
+ nexthop encap ip6 dst 2002::70 via 2001:70::2 dev dummy_70
+
+ set +e
+
+ err=`cat errors.txt |grep "Message too long"`
+ if [ -z "$err" ];then
+ ret=0
+ else
+ ret=1
+ fi
+
+ log_test $ret 0 "ipv6 route add notify"
+
+ { kill %% && wait %%; } 2>/dev/null
+
+ #rm errors.txt
+
+ cleanup &> /dev/null
+}
+
+
+fib_notify_test()
+{
+ setup
+
+ echo
+ echo "Fib4 info length calculation in route notify test"
+
+ set -e
+
+ for i in 10 20 30 40 50 60 70;
+ do
+ $IP link add dummy_$i type dummy
+ $IP link set dev dummy_$i up
+ $IP address add 20.20.$i.2/24 dev dummy_$i
+ done
+
+ $NS_EXEC ip monitor route &> errors.txt &
+ sleep 2
+
+ $IP route add 10.0.0.0/24 \
+ nexthop via 20.20.10.1 dev dummy_10 \
+ nexthop encap ip dst 192.168.10.20 via 20.20.20.1 dev dummy_20 \
+ nexthop encap ip dst 192.168.10.30 via 20.20.30.1 dev dummy_30 \
+ nexthop encap ip dst 192.168.10.40 via 20.20.40.1 dev dummy_40 \
+ nexthop encap ip dst 192.168.10.50 via 20.20.50.1 dev dummy_50 \
+ nexthop encap ip dst 192.168.10.60 via 20.20.60.1 dev dummy_60 \
+ nexthop encap ip dst 192.168.10.70 via 20.20.70.1 dev dummy_70
+
+ set +e
+
+ err=`cat errors.txt |grep "Message too long"`
+ if [ -z "$err" ];then
+ ret=0
+ else
+ ret=1
+ fi
+
+ log_test $ret 0 "ipv4 route add notify"
+
+ { kill %% && wait %%; } 2>/dev/null
+
+ rm errors.txt
+
+ cleanup &> /dev/null
+}
+
+# Create a new dummy_10 to remove all associated routes.
+reset_dummy_10()
+{
+ $IP link del dev dummy_10
+
+ $IP link add dummy_10 type dummy
+ $IP link set dev dummy_10 up
+ $IP -6 address add 2001:10::1/64 dev dummy_10
+}
+
+check_rt_num()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ echo "FAIL: Expected $expected routes, got $num"
+ ret=1
+ else
+ ret=0
+ fi
+}
+
+check_rt_num_clean()
+{
+ local expected=$1
+ local num=$2
+
+ if [ $num -ne $expected ]; then
+ log_test 1 0 "expected $expected routes, got $num"
+ set +e
+ cleanup &> /dev/null
+ return 1
+ fi
+ return 0
+}
+
+fib6_gc_test()
+{
+ setup
+
+ echo
+ echo "Fib6 garbage collection test"
+ set -e
+
+ EXPIRE=5
+ GC_WAIT_TIME=$((EXPIRE * 2 + 2))
+
+ # Check expiration of routes every $EXPIRE seconds (GC)
+ $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE
+
+ $IP link add dummy_10 type dummy
+ $IP link set dev dummy_10 up
+ $IP -6 address add 2001:10::1/64 dev dummy_10
+
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ sleep $GC_WAIT_TIME
+ $NS_EXEC sysctl -wq net.ipv6.route.flush=1
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection"
+
+ reset_dummy_10
+
+ # Permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route add 2001:30::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (with permanent routes)"
+
+ reset_dummy_10
+
+ # Permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ # Replace with temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with expires)"
+
+ reset_dummy_10
+
+ # Temporary routes
+ for i in $(seq 1 5); do
+ # Expire route after $EXPIRE seconds
+ $IP -6 route add 2001:20::$i \
+ via 2001:10::2 dev dummy_10 expires $EXPIRE
+ done
+ # Replace with permanent routes
+ for i in $(seq 1 5); do
+ $IP -6 route replace 2001:20::$i \
+ via 2001:10::2 dev dummy_10
+ done
+ check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (replace with permanent)"
+
+ # ra6 is required for the next test. (ipv6toolkit)
+ if [ ! -x "$(command -v ra6)" ]; then
+ echo "SKIP: ra6 not found."
+ set +e
+ cleanup &> /dev/null
+ return
+ fi
+
+ # Delete dummy_10 and remove all routes
+ $IP link del dev dummy_10
+
+ # Create a pair of veth devices to send a RA message from one
+ # device to another.
+ $IP link add veth1 type veth peer name veth2
+ $IP link set dev veth1 up
+ $IP link set dev veth2 up
+ $IP -6 address add 2001:10::1/64 dev veth1 nodad
+ $IP -6 address add 2001:10::2/64 dev veth2 nodad
+
+ # Make veth1 ready to receive RA messages.
+ $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2
+
+ # Send a RA message with a route from veth2 to veth1.
+ $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE
+
+ # Wait for the RA message.
+ sleep 1
+
+ # systemd may mess up the test. You syould make sure that
+ # systemd-networkd.service and systemd-networkd.socket are stopped.
+ check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return
+
+ # Wait for GC
+ sleep $GC_WAIT_TIME
+ check_rt_num 0 $($IP -6 route list |grep expires|wc -l)
+ log_test $ret 0 "ipv6 route garbage collection (RA message)"
+
+ set +e
+
+ cleanup &> /dev/null
+}
+
fib_suppress_test()
{
echo
@@ -762,34 +1055,32 @@ route_setup()
[ "${VERBOSE}" = "1" ] && set -x
set -e
- ip netns add ns2
- ip netns set ns2 auto
- ip -netns ns2 link set dev lo up
- ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1
- ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
+ setup_ns ns2
+ ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1
$IP li add veth1 type veth peer name veth2
$IP li add veth3 type veth peer name veth4
$IP li set veth1 up
$IP li set veth3 up
- $IP li set veth2 netns ns2 up
- $IP li set veth4 netns ns2 up
- ip -netns ns2 li add dummy1 type dummy
- ip -netns ns2 li set dummy1 up
+ $IP li set veth2 netns $ns2 up
+ $IP li set veth4 netns $ns2 up
+ ip -netns $ns2 li add dummy1 type dummy
+ ip -netns $ns2 li set dummy1 up
$IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad
$IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad
$IP addr add 172.16.101.1/24 dev veth1
$IP addr add 172.16.103.1/24 dev veth3
- ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
- ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
- ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
+ ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad
+ ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad
+ ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad
- ip -netns ns2 addr add 172.16.101.2/24 dev veth2
- ip -netns ns2 addr add 172.16.103.2/24 dev veth4
- ip -netns ns2 addr add 172.16.104.1/24 dev dummy1
+ ip -netns $ns2 addr add 172.16.101.2/24 dev veth2
+ ip -netns $ns2 addr add 172.16.103.2/24 dev veth4
+ ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1
set +e
}
@@ -949,12 +1240,25 @@ ipv6_rt_replace()
ipv6_rt_replace_mpath
}
+ipv6_rt_dsfield()
+{
+ echo
+ echo "IPv6 route with dsfield tests"
+
+ run_cmd "$IP -6 route flush 2001:db8:102::/64"
+
+ # IPv6 doesn't support routing based on dsfield
+ run_cmd "$IP -6 route add 2001:db8:102::/64 dsfield 0x04 via 2001:db8:101::2"
+ log_test $? 2 "Reject route with dsfield"
+}
+
ipv6_route_test()
{
route_setup
ipv6_rt_add
ipv6_rt_replace
+ ipv6_rt_dsfield
route_cleanup
}
@@ -1028,7 +1332,7 @@ ipv6_addr_metric_test()
log_test $rc 0 "Modify metric of address"
# verify prefix route removed on down
- run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+ run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
run_cmd "$IP li set dev dummy2 down"
rc=$?
if [ $rc -eq 0 ]; then
@@ -1055,7 +1359,6 @@ ipv6_addr_metric_test()
check_route6 "2001:db8:104::1 dev dummy2 proto kernel metric 260"
log_test $? 0 "Set metric with peer route on local side"
- log_test $? 0 "User specified metric on local address"
check_route6 "2001:db8:104::2 dev dummy2 proto kernel metric 260"
log_test $? 0 "Set metric with peer route on peer side"
@@ -1135,7 +1438,7 @@ ipv6_route_metrics_test()
log_test $rc 0 "Multipath route with mtu metric"
$IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300
- run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
+ run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1"
log_test $? 0 "Using route with mtu metric"
run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo"
@@ -1385,12 +1688,113 @@ ipv4_rt_replace()
ipv4_rt_replace_mpath
}
+# checks that cached input route on VRF port is deleted
+# when VRF is deleted
+ipv4_local_rt_cache()
+{
+ run_cmd "ip addr add 10.0.0.1/32 dev lo"
+ run_cmd "setup_ns test-ns"
+ run_cmd "ip link add veth-outside type veth peer name veth-inside"
+ run_cmd "ip link add vrf-100 type vrf table 1100"
+ run_cmd "ip link set veth-outside master vrf-100"
+ run_cmd "ip link set veth-inside netns $test-ns"
+ run_cmd "ip link set veth-outside up"
+ run_cmd "ip link set vrf-100 up"
+ run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100"
+ run_cmd "ip netns exec $test-ns ip link set veth-inside up"
+ run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside"
+ run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside"
+ run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1"
+ run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1"
+ run_cmd "ip link delete vrf-100"
+
+ # if we do not hang test is a success
+ log_test $? 0 "Cached route removed from VRF port device"
+}
+
+ipv4_rt_dsfield()
+{
+ echo
+ echo "IPv4 route with dsfield tests"
+
+ run_cmd "$IP route flush 172.16.102.0/24"
+
+ # New routes should reject dsfield options that interfere with ECN
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x01 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x01"
+
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x02 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x02"
+
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x03 via 172.16.101.2"
+ log_test $? 2 "Reject route with dsfield 0x03"
+
+ # A generic route that doesn't take DSCP into account
+ run_cmd "$IP route add 172.16.102.0/24 via 172.16.101.2"
+
+ # A more specific route for DSCP 0x10
+ run_cmd "$IP route add 172.16.102.0/24 dsfield 0x10 via 172.16.103.2"
+
+ # DSCP 0x10 should match the specific route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x10 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x11 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x12 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x13 | \
+ grep -q "via 172.16.103.2"
+ log_test $? 0 "IPv4 route with DSCP and ECN:CE"
+
+ # Unknown DSCP should match the generic route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x14 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x15 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x16 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x17 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with unknown DSCP and ECN:CE"
+
+ # Null DSCP should match the generic route, no matter the ECN bits
+ $IP route get fibmatch 172.16.102.1 dsfield 0x00 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:Not-ECT"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x01 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(1)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x02 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:ECT(0)"
+
+ $IP route get fibmatch 172.16.102.1 dsfield 0x03 | \
+ grep -q "via 172.16.101.2"
+ log_test $? 0 "IPv4 route with no DSCP and ECN:CE"
+}
+
ipv4_route_test()
{
route_setup
ipv4_rt_add
ipv4_rt_replace
+ ipv4_local_rt_cache
+ ipv4_rt_dsfield
route_cleanup
}
@@ -1531,7 +1935,7 @@ ipv4_route_metrics_test()
log_test $rc 0 "Multipath route with mtu metric"
$IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300
- run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1"
+ run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1"
log_test $? 0 "Using route with mtu metric"
run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo"
@@ -1559,13 +1963,21 @@ ipv4_del_addr_test()
$IP addr add dev dummy1 172.16.104.1/24
$IP addr add dev dummy1 172.16.104.11/24
+ $IP addr add dev dummy1 172.16.104.12/24
+ $IP addr add dev dummy1 172.16.104.13/24
$IP addr add dev dummy2 172.16.104.1/24
$IP addr add dev dummy2 172.16.104.11/24
+ $IP addr add dev dummy2 172.16.104.12/24
$IP route add 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add 172.16.106.0/24 dev lo src 172.16.104.12
+ $IP route add table 0 172.16.107.0/24 via 172.16.104.2 src 172.16.104.13
$IP route add vrf red 172.16.105.0/24 via 172.16.104.2 src 172.16.104.11
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
set +e
# removing address from device in vrf should only remove route from vrf table
+ echo " Regular FIB info"
+
$IP addr del dev dummy2 172.16.104.11/24
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 1 "Route removed from VRF when source address deleted"
@@ -1583,11 +1995,189 @@ ipv4_del_addr_test()
$IP ro ls vrf red | grep -q 172.16.105.0/24
log_test $? 0 "Route in VRF is not removed by address delete"
+ # removing address from device in vrf should only remove route from vrf
+ # table even when the associated fib info only differs in table ID
+ echo " Identical FIB info with different table ID"
+
+ $IP addr del dev dummy2 172.16.104.12/24
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed from VRF when source address deleted"
+
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in default VRF not removed"
+
+ $IP addr add dev dummy2 172.16.104.12/24
+ $IP route add vrf red 172.16.106.0/24 dev lo src 172.16.104.12
+
+ $IP addr del dev dummy1 172.16.104.12/24
+ $IP ro ls | grep -q 172.16.106.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
+ $IP ro ls vrf red | grep -q 172.16.106.0/24
+ log_test $? 0 "Route in VRF is not removed by address delete"
+
+ # removing address from device in default vrf should remove route from
+ # the default vrf even when route was inserted with a table ID of 0.
+ echo " Table ID 0"
+
+ $IP addr del dev dummy1 172.16.104.13/24
+ $IP ro ls | grep -q 172.16.107.0/24
+ log_test $? 1 "Route removed in default VRF when source address deleted"
+
$IP li del dummy1
$IP li del dummy2
cleanup
}
+ipv6_del_addr_test()
+{
+ echo
+ echo "IPv6 delete address route tests"
+
+ setup
+
+ set -e
+ for i in $(seq 6); do
+ $IP li add dummy${i} up type dummy
+ done
+
+ $IP li add red up type vrf table 1111
+ $IP ro add vrf red unreachable default
+ for i in $(seq 4 6); do
+ $IP li set dummy${i} vrf red
+ done
+
+ $IP addr add dev dummy1 fe80::1/128
+ $IP addr add dev dummy1 2001:db8:101::1/64
+ $IP addr add dev dummy1 2001:db8:101::10/64
+ $IP addr add dev dummy1 2001:db8:101::11/64
+ $IP addr add dev dummy1 2001:db8:101::12/64
+ $IP addr add dev dummy1 2001:db8:101::13/64
+ $IP addr add dev dummy1 2001:db8:101::14/64
+ $IP addr add dev dummy1 2001:db8:101::15/64
+ $IP addr add dev dummy2 fe80::1/128
+ $IP addr add dev dummy2 2001:db8:101::1/64
+ $IP addr add dev dummy2 2001:db8:101::11/64
+ $IP addr add dev dummy3 fe80::1/128
+
+ $IP addr add dev dummy4 2001:db8:101::1/64
+ $IP addr add dev dummy4 2001:db8:101::10/64
+ $IP addr add dev dummy4 2001:db8:101::11/64
+ $IP addr add dev dummy4 2001:db8:101::12/64
+ $IP addr add dev dummy4 2001:db8:101::13/64
+ $IP addr add dev dummy4 2001:db8:101::14/64
+ $IP addr add dev dummy5 2001:db8:101::1/64
+ $IP addr add dev dummy5 2001:db8:101::11/64
+
+ # Single device using src address
+ $IP route add 2001:db8:110::/64 dev dummy3 src 2001:db8:101::10
+ # Two devices with the same source address
+ $IP route add 2001:db8:111::/64 dev dummy3 src 2001:db8:101::11
+ # VRF with single device using src address
+ $IP route add vrf red 2001:db8:110::/64 dev dummy6 src 2001:db8:101::10
+ # VRF with two devices using src address
+ $IP route add vrf red 2001:db8:111::/64 dev dummy6 src 2001:db8:101::11
+ # src address and nexthop dev in same VRF
+ $IP route add 2001:db8:112::/64 dev dummy3 src 2001:db8:101::12
+ $IP route add vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12
+ # src address and nexthop device in different VRF
+ $IP route add 2001:db8:113::/64 dev lo src 2001:db8:101::13
+ $IP route add vrf red 2001:db8:113::/64 dev lo src 2001:db8:101::13
+ # table ID 0
+ $IP route add table 0 2001:db8:115::/64 via 2001:db8:101::2 src 2001:db8:101::15
+ # Link local source route
+ $IP route add 2001:db8:116::/64 dev dummy2 src fe80::1
+ $IP route add 2001:db8:117::/64 dev dummy3 src fe80::1
+ set +e
+
+ echo " Single device using src address"
+
+ $IP addr del dev dummy1 2001:db8:101::10/64
+ $IP -6 route show | grep -q "src 2001:db8:101::10 "
+ log_test $? 1 "Prefsrc removed when src address removed on other device"
+
+ echo " Two devices with the same source address"
+
+ $IP addr del dev dummy1 2001:db8:101::11/64
+ $IP -6 route show | grep -q "src 2001:db8:101::11 "
+ log_test $? 0 "Prefsrc not removed when src address exist on other device"
+
+ $IP addr del dev dummy2 2001:db8:101::11/64
+ $IP -6 route show | grep -q "src 2001:db8:101::11 "
+ log_test $? 1 "Prefsrc removed when src address removed on all devices"
+
+ echo " VRF with single device using src address"
+
+ $IP addr del dev dummy4 2001:db8:101::10/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::10 "
+ log_test $? 1 "Prefsrc removed when src address removed on other device"
+
+ echo " VRF with two devices using src address"
+
+ $IP addr del dev dummy4 2001:db8:101::11/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 "
+ log_test $? 0 "Prefsrc not removed when src address exist on other device"
+
+ $IP addr del dev dummy5 2001:db8:101::11/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::11 "
+ log_test $? 1 "Prefsrc removed when src address removed on all devices"
+
+ echo " src address and nexthop dev in same VRF"
+
+ $IP addr del dev dummy4 2001:db8:101::12/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 "
+ log_test $? 1 "Prefsrc removed from VRF when source address deleted"
+ $IP -6 route show | grep -q " src 2001:db8:101::12 "
+ log_test $? 0 "Prefsrc in default VRF not removed"
+
+ $IP addr add dev dummy4 2001:db8:101::12/64
+ $IP route replace vrf red 2001:db8:112::/64 dev dummy6 src 2001:db8:101::12
+ $IP addr del dev dummy1 2001:db8:101::12/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::12 "
+ log_test $? 0 "Prefsrc not removed from VRF when source address exist"
+ $IP -6 route show | grep -q " src 2001:db8:101::12 "
+ log_test $? 1 "Prefsrc in default VRF removed"
+
+ echo " src address and nexthop device in different VRF"
+
+ $IP addr del dev dummy4 2001:db8:101::13/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 "
+ log_test $? 0 "Prefsrc not removed from VRF when nexthop dev in diff VRF"
+ $IP -6 route show | grep -q "src 2001:db8:101::13 "
+ log_test $? 0 "Prefsrc not removed in default VRF"
+
+ $IP addr add dev dummy4 2001:db8:101::13/64
+ $IP addr del dev dummy1 2001:db8:101::13/64
+ $IP -6 route show vrf red | grep -q "src 2001:db8:101::13 "
+ log_test $? 1 "Prefsrc removed from VRF when nexthop dev in diff VRF"
+ $IP -6 route show | grep -q "src 2001:db8:101::13 "
+ log_test $? 1 "Prefsrc removed in default VRF"
+
+ echo " Table ID 0"
+
+ $IP addr del dev dummy1 2001:db8:101::15/64
+ $IP -6 route show | grep -q "src 2001:db8:101::15"
+ log_test $? 1 "Prefsrc removed from default VRF when source address deleted"
+
+ echo " Link local source route"
+ $IP addr del dev dummy1 fe80::1/128
+ $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1"
+ log_test $? 0 "Prefsrc not removed when delete ll addr from other dev"
+ $IP addr del dev dummy2 fe80::1/128
+ $IP -6 route show | grep -q "2001:db8:116::/64 dev dummy2 src fe80::1"
+ log_test $? 1 "Prefsrc removed when delete ll addr"
+ $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1"
+ log_test $? 0 "Prefsrc not removed when delete ll addr from other dev"
+ $IP addr add dev dummy1 fe80::1/128
+ $IP addr del dev dummy3 fe80::1/128
+ $IP -6 route show | grep -q "2001:db8:117::/64 dev dummy3 src fe80::1"
+ log_test $? 1 "Prefsrc removed even ll addr still exist on other dev"
+
+ for i in $(seq 6); do
+ $IP li del dummy${i}
+ done
+ cleanup
+}
ipv4_route_v6_gw_test()
{
@@ -1609,7 +2199,7 @@ ipv4_route_v6_gw_test()
check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1"
fi
- run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1"
+ run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1"
log_test $rc 0 "Single path route with IPv6 gateway - ping"
run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2"
@@ -1654,6 +2244,362 @@ ipv4_route_v6_gw_test()
route_cleanup
}
+socat_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+iptables_check()
+{
+ iptables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "iptables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ip6tables_check()
+{
+ ip6tables -t mangle -L OUTPUT &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ip6tables configuration not supported. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv4_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 mangling tests"
+
+ socat_check || return 1
+ iptables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP route add table 123 172.16.101.0/24 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP route add unreachable 172.16.101.2/32
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC iptables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC iptables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP4:172.16.101.2:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP route del unreachable 172.16.101.2/32
+ $IP route del table 123 172.16.101.0/24 dev veth1
+ $IP rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
+ipv6_mangle_test()
+{
+ local rc
+
+ echo
+ echo "IPv6 mangling tests"
+
+ socat_check || return 1
+ ip6tables_check || return 1
+
+ route_setup
+ sleep 2
+
+ local tmp_file=$(mktemp)
+ ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file &
+
+ # Add a FIB rule and a route that will direct our connection to the
+ # listening server.
+ $IP -6 rule add pref 100 ipproto udp sport 12345 dport 54321 table 123
+ $IP -6 route add table 123 2001:db8:101::/64 dev veth1
+
+ # Add an unreachable route to the main table that will block our
+ # connection in case the FIB rule is not hit.
+ $IP -6 route add unreachable 2001:db8:101::2/128
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters"
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=11111"
+ log_test $? 1 " Connection with incorrect parameters"
+
+ # Add a mangling rule and make sure connection is still successful.
+ $NS_EXEC ip6tables -t mangle -A OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - mangling"
+
+ # Delete the mangling rule and make sure connection is still
+ # successful.
+ $NS_EXEC ip6tables -t mangle -D OUTPUT -j MARK --set-mark 1
+
+ run_cmd "echo a | $NS_EXEC socat STDIN UDP6:[2001:db8:101::2]:54321,sourceport=12345"
+ log_test $? 0 " Connection with correct parameters - no mangling"
+
+ # Verify connections were indeed successful on server side.
+ [[ $(cat $tmp_file | wc -l) -eq 3 ]]
+ log_test $? 0 " Connection check - server side"
+
+ $IP -6 route del unreachable 2001:db8:101::2/128
+ $IP -6 route del table 123 2001:db8:101::/64 dev veth1
+ $IP -6 rule del pref 100
+
+ { kill %% && wait %%; } 2>/dev/null
+ rm $tmp_file
+
+ route_cleanup
+}
+
+ip_neigh_get_check()
+{
+ ip neigh help 2>&1 | grep -q 'ip neigh get'
+ if [ $? -ne 0 ]; then
+ echo "iproute2 command does not support neigh get. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+ipv4_bcast_neigh_test()
+{
+ local rc
+
+ echo
+ echo "IPv4 broadcast neighbour tests"
+
+ ip_neigh_get_check || return 1
+
+ setup
+
+ set -e
+ run_cmd "$IP neigh add 192.0.2.111 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+ run_cmd "$IP neigh add 192.0.2.255 lladdr 00:11:22:33:44:55 nud perm dev dummy0"
+
+ run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+ run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+
+ run_cmd "$IP address add 192.0.2.1/24 broadcast 192.0.2.111 dev dummy0"
+
+ run_cmd "$IP neigh add 203.0.113.111 nud failed dev dummy0"
+ run_cmd "$IP neigh add 203.0.113.255 nud failed dev dummy0"
+
+ run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+ run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+
+ run_cmd "$IP address add 203.0.113.1/24 broadcast 203.0.113.111 dev dummy0"
+ set +e
+
+ run_cmd "$IP neigh get 192.0.2.111 dev dummy0"
+ log_test $? 0 "Resolved neighbour for broadcast address"
+
+ run_cmd "$IP neigh get 192.0.2.255 dev dummy0"
+ log_test $? 0 "Resolved neighbour for network broadcast address"
+
+ run_cmd "$IP neigh get 203.0.113.111 dev dummy0"
+ log_test $? 2 "Unresolved neighbour for broadcast address"
+
+ run_cmd "$IP neigh get 203.0.113.255 dev dummy0"
+ log_test $? 2 "Unresolved neighbour for network broadcast address"
+
+ cleanup
+}
+
+mpath_dep_check()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "mausezahn command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v bc)" ]; then
+ echo "bc command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v perf)" ]; then
+ echo "perf command not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib:* | grep -q fib_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv4 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib6:* | grep -q fib6_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv6 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+link_stats_get()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local dir=$1; shift
+ local stat=$1; shift
+
+ ip -n $ns -j -s link show dev $dev \
+ | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+ local file=$1; shift
+ local expected=$1; shift
+
+ local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+ local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+ local res=$(echo "$ratio >= 0.95" | bc)
+ [[ $res -eq 1 ]]
+ log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+ echo
+ echo "IPv4 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1"
+ run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2"
+ run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n $ns2 route add 203.0.113.0/24
+ nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+ run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+ run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0"
+ run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0"
+ run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0"
+ set +e
+
+ local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac
+ -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+ run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+ echo
+ echo "IPv6 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n $ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n $ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+ run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+ run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64
+ nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+ run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+ set +e
+
+ local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac
+ -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+ run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
################################################################################
# usage
@@ -1673,6 +2619,8 @@ EOF
################################################################################
# main
+trap cleanup EXIT
+
while getopts :t:pPhv o
do
case $o in
@@ -1717,15 +2665,24 @@ do
fib_carrier_test|carrier) fib_carrier_test;;
fib_rp_filter_test|rp_filter) fib_rp_filter_test;;
fib_nexthop_test|nexthop) fib_nexthop_test;;
+ fib_notify_test|ipv4_notify) fib_notify_test;;
+ fib6_notify_test|ipv6_notify) fib6_notify_test;;
fib_suppress_test|suppress) fib_suppress_test;;
ipv6_route_test|ipv6_rt) ipv6_route_test;;
ipv4_route_test|ipv4_rt) ipv4_route_test;;
ipv6_addr_metric) ipv6_addr_metric_test;;
ipv4_addr_metric) ipv4_addr_metric_test;;
ipv4_del_addr) ipv4_del_addr_test;;
+ ipv6_del_addr) ipv6_del_addr_test;;
ipv6_route_metrics) ipv6_route_metrics_test;;
ipv4_route_metrics) ipv4_route_metrics_test;;
ipv4_route_v6_gw) ipv4_route_v6_gw_test;;
+ ipv4_mangle) ipv4_mangle_test;;
+ ipv6_mangle) ipv6_mangle_test;;
+ ipv4_bcast_neigh) ipv4_bcast_neigh_test;;
+ fib6_gc_test|ipv6_gc) fib6_gc_test;;
+ ipv4_mpath_list) ipv4_mpath_list_test;;
+ ipv6_mpath_list) ipv6_mpath_list_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 250fbb2d1625..535865b3d1d6 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -1,14 +1,40 @@
# SPDX-License-Identifier: GPL-2.0+ OR MIT
-TEST_PROGS = bridge_igmp.sh \
+TEST_PROGS = bridge_fdb_learning_limit.sh \
+ bridge_igmp.sh \
+ bridge_locked_port.sh \
+ bridge_mdb.sh \
+ bridge_mdb_host.sh \
+ bridge_mdb_max.sh \
+ bridge_mdb_port_down.sh \
+ bridge_mld.sh \
bridge_port_isolation.sh \
bridge_sticky_fdb.sh \
bridge_vlan_aware.sh \
+ bridge_vlan_mcast.sh \
bridge_vlan_unaware.sh \
+ custom_multipath_hash.sh \
+ dual_vxlan_bridge.sh \
+ ethtool_extended_state.sh \
+ ethtool_mm.sh \
+ ethtool_rmon.sh \
ethtool.sh \
+ gre_custom_multipath_hash.sh \
gre_inner_v4_multipath.sh \
gre_inner_v6_multipath.sh \
+ gre_multipath_nh_res.sh \
+ gre_multipath_nh.sh \
gre_multipath.sh \
+ hw_stats_l3.sh \
+ hw_stats_l3_gre.sh \
+ ip6_forward_instats_vrf.sh \
+ ip6gre_custom_multipath_hash.sh \
+ ip6gre_flat_key.sh \
+ ip6gre_flat_keys.sh \
+ ip6gre_flat.sh \
+ ip6gre_hier_key.sh \
+ ip6gre_hier_keys.sh \
+ ip6gre_hier.sh \
ip6gre_inner_v4_multipath.sh \
ip6gre_inner_v6_multipath.sh \
ipip_flat_gre_key.sh \
@@ -17,6 +43,7 @@ TEST_PROGS = bridge_igmp.sh \
ipip_hier_gre_key.sh \
ipip_hier_gre_keys.sh \
ipip_hier_gre.sh \
+ local_termination.sh \
loopback.sh \
mirror_gre_bound.sh \
mirror_gre_bridge_1d.sh \
@@ -32,45 +59,78 @@ TEST_PROGS = bridge_igmp.sh \
mirror_gre_vlan_bridge_1q.sh \
mirror_gre_vlan.sh \
mirror_vlan.sh \
+ no_forwarding.sh \
+ pedit_dsfield.sh \
+ pedit_ip.sh \
+ pedit_l4port.sh \
+ q_in_vni_ipv6.sh \
+ q_in_vni.sh \
router_bridge.sh \
+ router_bridge_1d.sh \
+ router_bridge_1d_lag.sh \
+ router_bridge_lag.sh \
router_bridge_vlan.sh \
+ router_bridge_vlan_upper.sh \
+ router_bridge_pvid_vlan_upper.sh \
+ router_bridge_vlan_upper_pvid.sh \
router_broadcast.sh \
+ router_mpath_nh_res.sh \
router_mpath_nh.sh \
router_multicast.sh \
router_multipath.sh \
+ router_nh.sh \
router.sh \
router_vid_1.sh \
sch_ets.sh \
+ sch_red.sh \
sch_tbf_ets.sh \
sch_tbf_prio.sh \
sch_tbf_root.sh \
+ skbedit_priority.sh \
tc_actions.sh \
tc_chains.sh \
tc_flower_router.sh \
tc_flower.sh \
+ tc_flower_l2_miss.sh \
+ tc_flower_cfm.sh \
+ tc_flower_port_range.sh \
+ tc_mpls_l2vpn.sh \
+ tc_police.sh \
tc_shblocks.sh \
+ tc_tunnel_key.sh \
tc_vlan_modify.sh \
+ vxlan_asymmetric_ipv6.sh \
vxlan_asymmetric.sh \
+ vxlan_bridge_1d_ipv6.sh \
+ vxlan_bridge_1d_port_8472_ipv6.sh \
vxlan_bridge_1d_port_8472.sh \
vxlan_bridge_1d.sh \
+ vxlan_bridge_1q_ipv6.sh \
+ vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
+ vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
-TEST_PROGS_EXTENDED := devlink_lib.sh \
+TEST_FILES := devlink_lib.sh \
ethtool_lib.sh \
fib_offload_lib.sh \
forwarding.config.sample \
+ ip6gre_lib.sh \
ipip_lib.sh \
lib.sh \
mirror_gre_lib.sh \
mirror_gre_topo_lib.sh \
mirror_lib.sh \
mirror_topo_lib.sh \
+ router_mpath_nh_lib.sh \
sch_ets_core.sh \
sch_ets_tests.sh \
sch_tbf_core.sh \
sch_tbf_etsprio.sh \
tc_common.sh
+TEST_INCLUDES := \
+ ../lib.sh
+
include ../../lib.mk
diff --git a/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
new file mode 100755
index 000000000000..0760a34b7114
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_fdb_learning_limit.sh
@@ -0,0 +1,283 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# ShellCheck incorrectly believes that most of the code here is unreachable
+# because it's invoked by variable name following ALL_TESTS.
+#
+# shellcheck disable=SC2317
+
+ALL_TESTS="check_accounting check_limit"
+NUM_NETIFS=6
+source lib.sh
+
+TEST_MAC_BASE=de:ad:be:ef:42:
+
+NUM_PKTS=16
+FDB_LIMIT=8
+
+FDB_TYPES=(
+ # name is counted? overrides learned?
+ 'learned 1 0'
+ 'static 0 1'
+ 'user 0 1'
+ 'extern_learn 0 1'
+ 'local 0 1'
+)
+
+mac()
+{
+ printf "${TEST_MAC_BASE}%02x" "$1"
+}
+
+H1_DEFAULT_MAC=$(mac 42)
+
+switch_create()
+{
+ ip link add dev br0 type bridge
+
+ ip link set dev "$swp1" master br0
+ ip link set dev "$swp2" master br0
+ # swp3 is used to add local MACs, so do not add it to the bridge yet.
+
+ # swp2 is only used for replying when learning on swp1, its MAC should not be learned.
+ ip link set dev "$swp2" type bridge_slave learning off
+
+ ip link set dev br0 up
+
+ ip link set dev "$swp1" up
+ ip link set dev "$swp2" up
+ ip link set dev "$swp3" up
+}
+
+switch_destroy()
+{
+ ip link set dev "$swp3" down
+ ip link set dev "$swp2" down
+ ip link set dev "$swp1" down
+
+ ip link del dev br0
+}
+
+h_create()
+{
+ ip link set "$h1" addr "$H1_DEFAULT_MAC"
+
+ simple_if_init "$h1" 192.0.2.1/24
+ simple_if_init "$h2" 192.0.2.2/24
+}
+
+h_destroy()
+{
+ simple_if_fini "$h1" 192.0.2.1/24
+ simple_if_fini "$h2" 192.0.2.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p6]}
+
+ vrf_prepare
+
+ h_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h_destroy
+
+ vrf_cleanup
+}
+
+fdb_get_n_learned()
+{
+ ip -d -j link show dev br0 type bridge | \
+ jq '.[]["linkinfo"]["info_data"]["fdb_n_learned"]'
+}
+
+fdb_get_n_mac()
+{
+ local mac=${1}
+
+ bridge -j fdb show br br0 | \
+ jq "map(select(.mac == \"${mac}\" and (has(\"vlan\") | not))) | length"
+}
+
+fdb_fill_learned()
+{
+ local i
+
+ for i in $(seq 1 "$NUM_PKTS"); do
+ fdb_add learned "$(mac "$i")"
+ done
+}
+
+fdb_reset()
+{
+ bridge fdb flush dev br0
+
+ # Keep the default MAC address of h1 in the table. We set it to a different one when
+ # testing dynamic learning.
+ bridge fdb add "$H1_DEFAULT_MAC" dev "$swp1" master static use
+}
+
+fdb_add()
+{
+ local type=$1 mac=$2
+
+ case "$type" in
+ learned)
+ ip link set "$h1" addr "$mac"
+ # Wait for a reply so we implicitly wait until after the forwarding
+ # code finished and the FDB entry was created.
+ PING_COUNT=1 ping_do "$h1" 192.0.2.2
+ check_err $? "Failed to ping another bridge port"
+ ip link set "$h1" addr "$H1_DEFAULT_MAC"
+ ;;
+ local)
+ ip link set dev "$swp3" addr "$mac" && ip link set "$swp3" master br0
+ ;;
+ static)
+ bridge fdb replace "$mac" dev "$swp1" master static
+ ;;
+ user)
+ bridge fdb replace "$mac" dev "$swp1" master static use
+ ;;
+ extern_learn)
+ bridge fdb replace "$mac" dev "$swp1" master extern_learn
+ ;;
+ esac
+
+ check_err $? "Failed to add a FDB entry of type ${type}"
+}
+
+fdb_del()
+{
+ local type=$1 mac=$2
+
+ case "$type" in
+ local)
+ ip link set "$swp3" nomaster
+ ;;
+ *)
+ bridge fdb del "$mac" dev "$swp1" master
+ ;;
+ esac
+
+ check_err $? "Failed to remove a FDB entry of type ${type}"
+}
+
+check_accounting_one_type()
+{
+ local type=$1 is_counted=$2 overrides_learned=$3
+ shift 3
+ RET=0
+
+ fdb_reset
+ fdb_add "$type" "$(mac 0)"
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne "$is_counted" ]
+ check_fail $? "Inserted FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
+
+ fdb_del "$type" "$(mac 0)"
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne 0 ]
+ check_fail $? "Removed FDB type ${type}: Expected the count 0, but got ${learned}"
+
+ if [ "$overrides_learned" -eq 1 ]; then
+ fdb_reset
+ fdb_add learned "$(mac 0)"
+ fdb_add "$type" "$(mac 0)"
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne "$is_counted" ]
+ check_fail $? "Set a learned entry to FDB type ${type}: Expected the count ${is_counted}, but got ${learned}"
+ fdb_del "$type" "$(mac 0)"
+ fi
+
+ log_test "FDB accounting interacting with FDB type ${type}"
+}
+
+check_accounting()
+{
+ local type_args learned
+ RET=0
+
+ fdb_reset
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne 0 ]
+ check_fail $? "Flushed the FDB table: Expected the count 0, but got ${learned}"
+
+ fdb_fill_learned
+ sleep 1
+
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne "$NUM_PKTS" ]
+ check_fail $? "Filled the FDB table: Expected the count ${NUM_PKTS}, but got ${learned}"
+
+ log_test "FDB accounting"
+
+ for type_args in "${FDB_TYPES[@]}"; do
+ # This is intentional use of word splitting.
+ # shellcheck disable=SC2086
+ check_accounting_one_type $type_args
+ done
+}
+
+check_limit_one_type()
+{
+ local type=$1 is_counted=$2
+ local n_mac expected=$((1 - is_counted))
+ RET=0
+
+ fdb_reset
+ fdb_fill_learned
+
+ fdb_add "$type" "$(mac 0)"
+ n_mac=$(fdb_get_n_mac "$(mac 0)")
+ [ "$n_mac" -ne "$expected" ]
+ check_fail $? "Inserted FDB type ${type} at limit: Expected the count ${expected}, but got ${n_mac}"
+
+ log_test "FDB limits interacting with FDB type ${type}"
+}
+
+check_limit()
+{
+ local learned
+ RET=0
+
+ ip link set br0 type bridge fdb_max_learned "$FDB_LIMIT"
+
+ fdb_reset
+ fdb_fill_learned
+
+ learned=$(fdb_get_n_learned)
+ [ "$learned" -ne "$FDB_LIMIT" ]
+ check_fail $? "Filled the limited FDB table: Expected the count ${FDB_LIMIT}, but got ${learned}"
+
+ log_test "FDB limits"
+
+ for type_args in "${FDB_TYPES[@]}"; do
+ # This is intentional use of word splitting.
+ # shellcheck disable=SC2086
+ check_limit_one_type $type_args
+ done
+}
+
+trap cleanup EXIT
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index 88d2472ba151..2aa66d2a1702 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,11 +1,37 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="reportleave_test"
+ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
+ v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
+ v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
+ v3exc_timeout_test v3star_ex_auto_add_test"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+ALL_GROUP="224.0.0.1"
+ALL_MAC="01:00:5e:00:00:01"
+
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.1,192.0.2.2,192.0.2.3
+MZPKT_IS_INC="22:00:9d:de:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:03"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_IS_INC2="22:00:9d:c3:00:00:00:01:01:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 is_in report: grp 239.10.10.10 is_include 192.0.2.20,192.0.2.30
+MZPKT_IS_INC3="22:00:5f:b4:00:00:00:01:01:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.10,192.0.2.11,192.0.2.12
+MZPKT_ALLOW="22:00:99:c3:00:00:00:01:05:00:00:03:ef:0a:0a:0a:c0:00:02:0a:c0:00:02:0b:c0:00:02:0c"
+# IGMPv3 allow report: grp 239.10.10.10 allow 192.0.2.20,192.0.2.30
+MZPKT_ALLOW2="22:00:5b:b4:00:00:00:01:05:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.1,192.0.2.2,192.0.2.20,192.0.2.21
+MZPKT_IS_EXC="22:00:da:b6:00:00:00:01:02:00:00:04:ef:0a:0a:0a:c0:00:02:01:c0:00:02:02:c0:00:02:14:c0:00:02:15"
+# IGMPv3 is_ex report: grp 239.10.10.10 is_exclude 192.0.2.20,192.0.2.30
+MZPKT_IS_EXC2="22:00:5e:b4:00:00:00:01:02:00:00:02:ef:0a:0a:0a:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 to_ex report: grp 239.10.10.10 to_exclude 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_TO_EXC="22:00:9a:b1:00:00:00:01:04:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+# IGMPv3 block report: grp 239.10.10.10 block 192.0.2.1,192.0.2.20,192.0.2.30
+MZPKT_BLOCK="22:00:98:b1:00:00:00:01:06:00:00:03:ef:0a:0a:0a:c0:00:02:01:c0:00:02:14:c0:00:02:1e"
+
source lib.sh
h1_create()
@@ -70,47 +96,13 @@ cleanup()
switch_destroy
- # Always cleanup the mcast group
- ip address del dev $h2 $TEST_GROUP/32 2>&1 1>/dev/null
-
h2_destroy
h1_destroy
vrf_cleanup
}
-# return 0 if the packet wasn't seen on host2_if or 1 if it was
-mcast_packet_test()
-{
- local mac=$1
- local ip=$2
- local host1_if=$3
- local host2_if=$4
- local seen=0
-
- # Add an ACL on `host2_if` which will tell us whether the packet
- # was received by it or not.
- tc qdisc add dev $host2_if ingress
- tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
- flower dst_mac $mac action drop
-
- $MZ $host1_if -c 1 -p 64 -b $mac -B $ip -t udp "dp=4096,sp=2048" -q
- sleep 1
-
- tc -j -s filter show dev $host2_if ingress \
- | jq -e ".[] | select(.options.handle == 101) \
- | select(.options.actions[0].stats.packets == 1)" &> /dev/null
- if [[ $? -eq 0 ]]; then
- seen=1
- fi
-
- tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
- tc qdisc del dev $host2_if ingress
-
- return $seen
-}
-
-reportleave_test()
+v2reportleave_test()
{
RET=0
ip address add dev $h2 $TEST_GROUP/32 autojoin
@@ -118,12 +110,12 @@ reportleave_test()
sleep 5
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
- check_err $? "Report didn't create mdb entry for $TEST_GROUP"
+ check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
- mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
- log_test "IGMP report $TEST_GROUP"
+ log_test "IGMPv2 report $TEST_GROUP"
RET=0
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
@@ -136,10 +128,430 @@ reportleave_test()
bridge mdb show dev br0 | grep $TEST_GROUP 1>/dev/null
check_fail $? "Leave didn't delete mdb entry for $TEST_GROUP"
- mcast_packet_test $TEST_GROUP_MAC $TEST_GROUP $h1 $h2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
check_err $? "Traffic to $TEST_GROUP was forwarded without mdb entry"
- log_test "IGMP leave $TEST_GROUP"
+ log_test "IGMPv2 leave $TEST_GROUP"
+}
+
+v3include_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+ ip link set dev br0 type bridge mcast_igmp_version 3
+ check_err $? "Could not change bridge IGMP version to 3"
+
+ $MZ $host1_if -b $mac -c 1 -B $group -t ip "proto=2,p=$MZPKT_IS_INC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+ check_err $? "Missing *,G entry with source list"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+v3exclude_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local pkt=$4
+ local X=("192.0.2.1" "192.0.2.2")
+ local Y=("192.0.2.20" "192.0.2.21")
+
+ v3include_prepare $host1_if $mac $group
+
+ $MZ $host1_if -c 1 -b $mac -B $group -t ip "proto=2,p=$MZPKT_IS_EXC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.3\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.3 entry still exists"
+}
+
+v3cleanup()
+{
+ local port=$1
+ local group=$2
+
+ bridge mdb del dev br0 port $port grp $group
+ ip link set dev br0 type bridge mcast_igmp_version 2
+}
+
+v3include_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.3")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_allow_test()
+{
+ RET=0
+ local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW" -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> allow"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_include_test()
+{
+ RET=0
+ local X=("192.0.2.10" "192.0.2.11" "192.0.2.12")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC2" -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_is_exclude_test()
+{
+ RET=0
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> is_exclude"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_to_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.1")
+ local Y=("192.0.2.20" "192.0.2.30")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.21\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.21 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_allow_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+ local Y=("192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> allow"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_include_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.20" "192.0.2.30")
+ local Y=("192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC3" -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> is_include"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_is_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.30")
+ local Y=("192.0.2.20")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_EXC2" -q
+ sleep 1
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> is_exclude"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_to_exclude_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.30")
+ local Y=("192.0.2.20")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_TO_EXC" -q
+ sleep 1
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3inc_block_test()
+{
+ RET=0
+ local X=("192.0.2.2" "192.0.2.3")
+
+ v3include_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+ # make sure the lowered timers have expired (by default 2 seconds)
+ sleep 3
+ brmcast_check_sg_entries "block" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "192.0.2.100"
+
+ log_test "IGMPv3 report $TEST_GROUP include -> block"
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_block_test()
+{
+ RET=0
+ local X=("192.0.2.1" "192.0.2.2" "192.0.2.30")
+ local Y=("192.0.2.20" "192.0.2.21")
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_BLOCK" -q
+ sleep 1
+ brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 192.0.2.100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "IGMPv3 report $TEST_GROUP exclude -> block"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3exc_timeout_test()
+{
+ RET=0
+ local X=("192.0.2.20" "192.0.2.30")
+
+ # GMI should be 3 seconds
+ ip link set dev br0 type bridge mcast_query_interval 100 \
+ mcast_query_response_interval 100 \
+ mcast_membership_interval 300
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+ ip link set dev br0 type bridge mcast_query_interval 500 \
+ mcast_query_response_interval 500 \
+ mcast_membership_interval 1500
+
+ $MZ $h1 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_ALLOW2" -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.1 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"192.0.2.2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 192.0.2.2 entry still exists"
+
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 192.0.2.100
+
+ log_test "IGMPv3 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+ mcast_query_response_interval 1000 \
+ mcast_membership_interval 26000
+
+ v3cleanup $swp1 $TEST_GROUP
+}
+
+v3star_ex_auto_add_test()
+{
+ RET=0
+
+ v3exclude_prepare $h1 $ALL_MAC $ALL_GROUP
+
+ $MZ $h2 -c 1 -b $ALL_MAC -B $ALL_GROUP -t ip "proto=2,p=$MZPKT_IS_INC" -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+ .port == \"$swp1\")" &>/dev/null
+ check_err $? "S,G entry for *,G port doesn't exist"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"192.0.2.3\" and \
+ .port == \"$swp1\" and \
+ .flags[] == \"added_by_star_ex\")" &>/dev/null
+ check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+ brmcast_check_sg_fwding 1 192.0.2.3
+
+ log_test "IGMPv3 S,G port entry automatic add to a *,G port"
+
+ v3cleanup $swp1 $TEST_GROUP
+ v3cleanup $swp2 $TEST_GROUP
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
new file mode 100755
index 000000000000..c62331b2e006
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh
@@ -0,0 +1,365 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ locked_port_ipv4
+ locked_port_ipv6
+ locked_port_vlan
+ locked_port_mab
+ locked_port_mab_roam
+ locked_port_mab_config
+ locked_port_mab_flush
+ locked_port_mab_redirect
+"
+
+NUM_NETIFS=4
+CHECK_TC="no"
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+ vlan_create $h1 100 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 100
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+ vlan_create $h2 100 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 100
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge vlan_filtering 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ bridge link set dev $swp1 learning off
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+locked_port_ipv4()
+{
+ RET=0
+
+ check_locked_port_support || return 0
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work before locking port"
+
+ bridge link set dev $swp1 locked on
+
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked after locking port, but before adding FDB entry"
+
+ bridge fdb add `mac_get $h1` dev $swp1 master static
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work after locking port and adding FDB entry"
+
+ bridge link set dev $swp1 locked off
+ bridge fdb del `mac_get $h1` dev $swp1 master static
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work after unlocking port and removing FDB entry."
+
+ log_test "Locked port ipv4"
+}
+
+locked_port_vlan()
+{
+ RET=0
+
+ check_locked_port_support || return 0
+
+ bridge vlan add vid 100 dev $swp1
+ bridge vlan add vid 100 dev $swp2
+
+ ping_do $h1.100 198.51.100.2
+ check_err $? "Ping through vlan did not work before locking port"
+
+ bridge link set dev $swp1 locked on
+ ping_do $h1.100 198.51.100.2
+ check_fail $? "Ping through vlan worked after locking port, but before adding FDB entry"
+
+ bridge fdb add `mac_get $h1` dev $swp1 vlan 100 master static
+
+ ping_do $h1.100 198.51.100.2
+ check_err $? "Ping through vlan did not work after locking port and adding FDB entry"
+
+ bridge link set dev $swp1 locked off
+ bridge fdb del `mac_get $h1` dev $swp1 vlan 100 master static
+
+ ping_do $h1.100 198.51.100.2
+ check_err $? "Ping through vlan did not work after unlocking port and removing FDB entry"
+
+ bridge vlan del vid 100 dev $swp1
+ bridge vlan del vid 100 dev $swp2
+ log_test "Locked port vlan"
+}
+
+locked_port_ipv6()
+{
+ RET=0
+ check_locked_port_support || return 0
+
+ ping6_do $h1 2001:db8:1::2
+ check_err $? "Ping6 did not work before locking port"
+
+ bridge link set dev $swp1 locked on
+
+ ping6_do $h1 2001:db8:1::2
+ check_fail $? "Ping6 worked after locking port, but before adding FDB entry"
+
+ bridge fdb add `mac_get $h1` dev $swp1 master static
+ ping6_do $h1 2001:db8:1::2
+ check_err $? "Ping6 did not work after locking port and adding FDB entry"
+
+ bridge link set dev $swp1 locked off
+ bridge fdb del `mac_get $h1` dev $swp1 master static
+
+ ping6_do $h1 2001:db8:1::2
+ check_err $? "Ping6 did not work after unlocking port and removing FDB entry"
+
+ log_test "Locked port ipv6"
+}
+
+locked_port_mab()
+{
+ RET=0
+ check_port_mab_support || return 0
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work before locking port"
+
+ bridge link set dev $swp1 learning on locked on
+
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked on a locked port without an FDB entry"
+
+ bridge fdb get `mac_get $h1` br br0 vlan 1 &> /dev/null
+ check_fail $? "FDB entry created before enabling MAB"
+
+ bridge link set dev $swp1 learning on locked on mab on
+
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked on MAB enabled port without an FDB entry"
+
+ bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+ check_err $? "Locked FDB entry not created"
+
+ bridge fdb replace `mac_get $h1` dev $swp1 master static
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work after replacing FDB entry"
+
+ bridge fdb get `mac_get $h1` br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+ check_fail $? "FDB entry marked as locked after replacement"
+
+ bridge fdb del `mac_get $h1` dev $swp1 master
+ bridge link set dev $swp1 learning off locked off mab off
+
+ log_test "Locked port MAB"
+}
+
+# Check that entries cannot roam to a locked port, but that entries can roam
+# to an unlocked port.
+locked_port_mab_roam()
+{
+ local mac=a0:b0:c0:c0:b0:a0
+
+ RET=0
+ check_port_mab_support || return 0
+
+ bridge link set dev $swp1 learning on locked on mab on
+
+ $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+ bridge fdb get $mac br br0 vlan 1 | grep "dev $swp1" | grep -q "locked"
+ check_err $? "No locked entry on first injection"
+
+ $MZ $h2 -q -c 5 -d 100msec -t udp -a $mac -b rand
+ bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp2"
+ check_err $? "Entry did not roam to an unlocked port"
+
+ bridge fdb get $mac br br0 vlan 1 | grep -q "locked"
+ check_fail $? "Entry roamed with locked flag on"
+
+ $MZ $h1 -q -c 5 -d 100msec -t udp -a $mac -b rand
+ bridge fdb get $mac br br0 vlan 1 | grep -q "dev $swp1"
+ check_fail $? "Entry roamed back to locked port"
+
+ bridge fdb del $mac vlan 1 dev $swp2 master
+ bridge link set dev $swp1 learning off locked off mab off
+
+ log_test "Locked port MAB roam"
+}
+
+# Check that MAB can only be enabled on a port that is both locked and has
+# learning enabled.
+locked_port_mab_config()
+{
+ RET=0
+ check_port_mab_support || return 0
+
+ bridge link set dev $swp1 learning on locked off mab on &> /dev/null
+ check_fail $? "MAB enabled while port is unlocked"
+
+ bridge link set dev $swp1 learning off locked on mab on &> /dev/null
+ check_fail $? "MAB enabled while port has learning disabled"
+
+ bridge link set dev $swp1 learning on locked on mab on
+ check_err $? "Failed to enable MAB when port is locked and has learning enabled"
+
+ bridge link set dev $swp1 learning off locked off mab off
+
+ log_test "Locked port MAB configuration"
+}
+
+# Check that locked FDB entries are flushed from a port when MAB is disabled.
+locked_port_mab_flush()
+{
+ local locked_mac1=00:01:02:03:04:05
+ local unlocked_mac1=00:01:02:03:04:06
+ local locked_mac2=00:01:02:03:04:07
+ local unlocked_mac2=00:01:02:03:04:08
+
+ RET=0
+ check_port_mab_support || return 0
+
+ bridge link set dev $swp1 learning on locked on mab on
+ bridge link set dev $swp2 learning on locked on mab on
+
+ # Create regular and locked FDB entries on each port.
+ bridge fdb add $unlocked_mac1 dev $swp1 vlan 1 master static
+ bridge fdb add $unlocked_mac2 dev $swp2 vlan 1 master static
+
+ $MZ $h1 -q -c 5 -d 100msec -t udp -a $locked_mac1 -b rand
+ bridge fdb get $locked_mac1 br br0 vlan 1 | grep "dev $swp1" | \
+ grep -q "locked"
+ check_err $? "Failed to create locked FDB entry on first port"
+
+ $MZ $h2 -q -c 5 -d 100msec -t udp -a $locked_mac2 -b rand
+ bridge fdb get $locked_mac2 br br0 vlan 1 | grep "dev $swp2" | \
+ grep -q "locked"
+ check_err $? "Failed to create locked FDB entry on second port"
+
+ # Disable MAB on the first port and check that only the first locked
+ # FDB entry was flushed.
+ bridge link set dev $swp1 mab off
+
+ bridge fdb get $unlocked_mac1 br br0 vlan 1 &> /dev/null
+ check_err $? "Regular FDB entry on first port was flushed after disabling MAB"
+
+ bridge fdb get $unlocked_mac2 br br0 vlan 1 &> /dev/null
+ check_err $? "Regular FDB entry on second port was flushed after disabling MAB"
+
+ bridge fdb get $locked_mac1 br br0 vlan 1 &> /dev/null
+ check_fail $? "Locked FDB entry on first port was not flushed after disabling MAB"
+
+ bridge fdb get $locked_mac2 br br0 vlan 1 &> /dev/null
+ check_err $? "Locked FDB entry on second port was flushed after disabling MAB"
+
+ bridge fdb del $unlocked_mac2 dev $swp2 vlan 1 master static
+ bridge fdb del $unlocked_mac1 dev $swp1 vlan 1 master static
+
+ bridge link set dev $swp2 learning on locked off mab off
+ bridge link set dev $swp1 learning off locked off mab off
+
+ log_test "Locked port MAB FDB flush"
+}
+
+# Check that traffic can be redirected from a locked bridge port and that it
+# does not create locked FDB entries.
+locked_port_mab_redirect()
+{
+ RET=0
+ check_port_mab_support || return 0
+
+ tc qdisc add dev $swp1 clsact
+ tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \
+ action mirred egress redirect dev $swp2
+ bridge link set dev $swp1 learning on locked on mab on
+
+ ping_do $h1 192.0.2.2
+ check_err $? "Ping did not work with redirection"
+
+ bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \
+ grep "dev $swp1" | grep -q "locked"
+ check_fail $? "Locked entry created for redirected traffic"
+
+ tc filter del dev $swp1 ingress protocol all pref 1 handle 101 flower
+
+ ping_do $h1 192.0.2.2
+ check_fail $? "Ping worked without redirection"
+
+ bridge fdb get `mac_get $h1` br br0 vlan 1 2> /dev/null | \
+ grep "dev $swp1" | grep -q "locked"
+ check_err $? "Locked entry not created after deleting filter"
+
+ bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master
+ bridge link set dev $swp1 learning off locked off mab off
+ tc qdisc del dev $swp1 clsact
+
+ log_test "Locked port MAB redirect"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
new file mode 100755
index 000000000000..d9d587454d20
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh
@@ -0,0 +1,1371 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR0 (802.1q) + $swp2 | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ cfg_test
+ fwd_test
+ ctrl_test
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.2/28
+ vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+switch_create()
+{
+ ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 1 mcast_igmp_version 3 mcast_mld_version 2
+ bridge vlan add vid 10 dev br0 self
+ bridge vlan add vid 20 dev br0 self
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan add vid 20 dev $swp1
+
+ ip link set dev $swp2 master br0
+ ip link set dev $swp2 up
+ bridge vlan add vid 10 dev $swp2
+ bridge vlan add vid 20 dev $swp2
+
+ tc qdisc add dev br0 clsact
+ tc qdisc add dev $h2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ tc qdisc del dev br0 clsact
+
+ bridge vlan del vid 20 dev $swp2
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 20 dev $swp1
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev br0 down
+ bridge vlan del vid 20 dev br0 self
+ bridge vlan del vid 10 dev br0 self
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+cfg_test_host_common()
+{
+ local name=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local state=$1; shift
+ local invalid_state=$1; shift
+
+ RET=0
+
+ # Check basic add, replace and delete behavior.
+ bridge mdb add dev br0 port br0 grp $grp $state vid 10
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
+ check_err $? "Failed to add $name host entry"
+
+ bridge mdb replace dev br0 port br0 grp $grp $state vid 10 &> /dev/null
+ check_fail $? "Managed to replace $name host entry"
+
+ bridge mdb del dev br0 port br0 grp $grp $state vid 10
+ bridge mdb get dev br0 grp $grp vid 10 &> /dev/null
+ check_fail $? "Failed to delete $name host entry"
+
+ # Check error cases.
+ bridge mdb add dev br0 port br0 grp $grp $invalid_state vid 10 \
+ &> /dev/null
+ check_fail $? "Managed to add $name host entry with a $invalid_state state"
+
+ bridge mdb add dev br0 port br0 grp $grp src $src $state vid 10 \
+ &> /dev/null
+ check_fail $? "Managed to add $name host entry with a source"
+
+ bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+ filter_mode exclude &> /dev/null
+ check_fail $? "Managed to add $name host entry with a filter mode"
+
+ bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+ source_list $src &> /dev/null
+ check_fail $? "Managed to add $name host entry with a source list"
+
+ bridge mdb add dev br0 port br0 grp $grp $state vid 10 \
+ proto 123 &> /dev/null
+ check_fail $? "Managed to add $name host entry with a protocol"
+
+ log_test "Common host entries configuration tests ($name)"
+}
+
+# Check configuration of host entries from all types.
+cfg_test_host()
+{
+ echo
+ log_info "# Host entries configuration tests"
+
+ cfg_test_host_common "IPv4" "239.1.1.1" "192.0.2.1" "temp" "permanent"
+ cfg_test_host_common "IPv6" "ff0e::1" "2001:db8:1::1" "temp" "permanent"
+ cfg_test_host_common "L2" "01:02:03:04:05:06" "00:00:00:00:00:01" \
+ "permanent" "temp"
+}
+
+cfg_test_port_common()
+{
+ local name=$1;shift
+ local grp_key=$1; shift
+
+ RET=0
+
+ # Check basic add, replace and delete behavior.
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+ check_err $? "Failed to add $name entry"
+
+ bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+ &> /dev/null
+ check_err $? "Failed to replace $name entry"
+
+ bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+ check_fail $? "Failed to delete $name entry"
+
+ # Check default protocol and replacement.
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "static"
+ check_err $? "$name entry not added with default \"static\" protocol"
+
+ bridge mdb replace dev br0 port $swp1 $grp_key permanent vid 10 \
+ proto 123
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "123"
+ check_err $? "Failed to replace protocol of $name entry"
+ bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+ # Check behavior when VLAN is not specified.
+ bridge mdb add dev br0 port $swp1 $grp_key permanent
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+ check_err $? "$name entry with VLAN 10 not added when VLAN was not specified"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
+ check_err $? "$name entry with VLAN 20 not added when VLAN was not specified"
+
+ bridge mdb del dev br0 port $swp1 $grp_key permanent
+ bridge mdb get dev br0 $grp_key vid 10 &> /dev/null
+ check_fail $? "$name entry with VLAN 10 not deleted when VLAN was not specified"
+ bridge mdb get dev br0 $grp_key vid 20 &> /dev/null
+ check_fail $? "$name entry with VLAN 20 not deleted when VLAN was not specified"
+
+ # Check behavior when bridge port is down.
+ ip link set dev $swp1 down
+
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+ check_err $? "Failed to add $name permanent entry when bridge port is down"
+
+ bridge mdb del dev br0 port $swp1 $grp_key permanent vid 10
+
+ bridge mdb add dev br0 port $swp1 $grp_key temp vid 10 &> /dev/null
+ check_fail $? "Managed to add $name temporary entry when bridge port is down"
+
+ ip link set dev $swp1 up
+ setup_wait_dev $swp1
+
+ # Check error cases.
+ ip link set dev br0 down
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10 \
+ &> /dev/null
+ check_fail $? "Managed to add $name entry when bridge is down"
+ ip link set dev br0 up
+
+ ip link set dev br0 type bridge mcast_snooping 0
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid \
+ 10 &> /dev/null
+ check_fail $? "Managed to add $name entry when multicast snooping is disabled"
+ ip link set dev br0 type bridge mcast_snooping 1
+
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 5000 \
+ &> /dev/null
+ check_fail $? "Managed to add $name entry with an invalid VLAN"
+
+ log_test "Common port group entries configuration tests ($name)"
+}
+
+src_list_create()
+{
+ local src_prefix=$1; shift
+ local num_srcs=$1; shift
+ local src_list
+ local i
+
+ for i in $(seq 1 $num_srcs); do
+ src_list=${src_list},${src_prefix}${i}
+ done
+
+ echo $src_list | cut -c 2-
+}
+
+__cfg_test_port_ip_star_g()
+{
+ local name=$1; shift
+ local grp=$1; shift
+ local invalid_grp=$1; shift
+ local src_prefix=$1; shift
+ local src1=${src_prefix}1
+ local src2=${src_prefix}2
+ local src3=${src_prefix}3
+ local max_srcs=31
+ local num_srcs
+
+ RET=0
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+ check_err $? "Default filter mode is not \"exclude\""
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check basic add and delete behavior.
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+ source_list $src1
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
+ check_err $? "(*, G) entry not created"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+ check_err $? "(S, G) entry not created"
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+ bridge -d mdb get dev br0 grp $grp vid 10 &> /dev/null
+ check_fail $? "(*, G) entry not deleted"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+ check_fail $? "(S, G) entry not deleted"
+
+ ## State (permanent / temp) tests.
+
+ # Check that group and source timer are not set for permanent entries.
+ bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
+ check_err $? "(*, G) entry not added as \"permanent\" when should"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | \
+ grep -q "permanent"
+ check_err $? "(S, G) entry not added as \"permanent\" when should"
+
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+ check_err $? "(*, G) \"permanent\" entry has a pending group timer"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+ check_err $? "\"permanent\" source entry has a pending source timer"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that group timer is set for temporary (*, G) EXCLUDE, but not
+ # the source timer.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+ check_err $? "(*, G) EXCLUDE entry not added as \"temp\" when should"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+ check_err $? "(S, G) \"blocked\" entry not added as \"temp\" when should"
+
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+ check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+ check_err $? "\"blocked\" source entry has a pending source timer"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that group timer is not set for temporary (*, G) INCLUDE, but
+ # that the source timer is set.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode include source_list $src1
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+ check_err $? "(*, G) INCLUDE entry not added as \"temp\" when should"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+ check_err $? "(S, G) entry not added as \"temp\" when should"
+
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00"
+ check_err $? "(*, G) INCLUDE entry has a pending group timer"
+ bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00"
+ check_fail $? "Source entry does not have a pending source timer"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that group timer is never set for (S, G) entries.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode include source_list $src1
+
+ bridge -d -s mdb get dev br0 grp $grp src $src1 vid 10 | grep -q " 0.00"
+ check_err $? "(S, G) entry has a pending group timer"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ ## Filter mode (include / exclude) tests.
+
+ # Check that (*, G) INCLUDE entries are added with correct filter mode
+ # and that (S, G) entries are not marked as "blocked".
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+ filter_mode include source_list $src1
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
+ check_err $? "(*, G) INCLUDE not added with \"include\" filter mode"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+ check_fail $? "(S, G) entry marked as \"blocked\" when should not"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that (*, G) EXCLUDE entries are added with correct filter mode
+ # and that (S, G) entries are marked as "blocked".
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+ check_err $? "(*, G) EXCLUDE not added with \"exclude\" filter mode"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+ check_err $? "(S, G) entry not marked as \"blocked\" when should"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ ## Protocol tests.
+
+ # Check that (*, G) and (S, G) entries are added with the specified
+ # protocol.
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+ filter_mode exclude source_list $src1 proto zebra
+
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "zebra"
+ check_err $? "(*, G) entry not added with \"zebra\" protocol"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "zebra"
+ check_err $? "(S, G) entry not marked added with \"zebra\" protocol"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ ## Replace tests.
+
+ # Check that state can be modified.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge mdb replace dev br0 port $swp1 grp $grp permanent vid 10 \
+ filter_mode exclude source_list $src1
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "permanent"
+ check_err $? "(*, G) entry not marked as \"permanent\" after replace"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "permanent"
+ check_err $? "(S, G) entry not marked as \"permanent\" after replace"
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "temp"
+ check_err $? "(*, G) entry not marked as \"temp\" after replace"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "temp"
+ check_err $? "(S, G) entry not marked as \"temp\" after replace"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that filter mode can be modified.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode include source_list $src1
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "include"
+ check_err $? "(*, G) not marked with \"include\" filter mode after replace"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+ check_fail $? "(S, G) marked as \"blocked\" after replace"
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "exclude"
+ check_err $? "(*, G) not marked with \"exclude\" filter mode after replace"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "blocked"
+ check_err $? "(S, G) not marked as \"blocked\" after replace"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that sources can be added to and removed from the source list.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1,$src2,$src3
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+ check_err $? "(S, G) entry for source $src1 not created after replace"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
+ check_err $? "(S, G) entry for source $src2 not created after replace"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
+ check_err $? "(S, G) entry for source $src3 not created after replace"
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1,$src3
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 &> /dev/null
+ check_err $? "(S, G) entry for source $src1 not created after second replace"
+ bridge -d mdb get dev br0 grp $grp src $src2 vid 10 &> /dev/null
+ check_fail $? "(S, G) entry for source $src2 created after second replace"
+ bridge -d mdb get dev br0 grp $grp src $src3 vid 10 &> /dev/null
+ check_err $? "(S, G) entry for source $src3 not created after second replace"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ # Check that protocol can be modified.
+ bridge mdb add dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1 proto zebra
+
+ bridge mdb replace dev br0 port $swp1 grp $grp temp vid 10 \
+ filter_mode exclude source_list $src1 proto bgp
+ bridge -d mdb get dev br0 grp $grp vid 10 | grep -q "bgp"
+ check_err $? "(*, G) protocol not changed to \"bgp\" after replace"
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep -q "bgp"
+ check_err $? "(S, G) protocol not changed to \"bgp\" after replace"
+
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ ## Star exclude tests.
+
+ # Check star exclude functionality. When adding a new EXCLUDE (*, G),
+ # it needs to be also added to all (S, G) entries for proper
+ # replication.
+ bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+ filter_mode include source_list $src1
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10
+ bridge -d mdb get dev br0 grp $grp src $src1 vid 10 | grep "$swp1" | \
+ grep -q "added_by_star_ex"
+ check_err $? "\"added_by_star_ex\" entry not created after adding (*, G) entry"
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+ bridge mdb del dev br0 port $swp2 grp $grp src $src1 vid 10
+
+ ## Error cases tests.
+
+ bridge mdb add dev br0 port $swp1 grp $invalid_grp vid 10 &> /dev/null
+ check_fail $? "Managed to add an entry with an invalid group"
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+ &> /dev/null
+ check_fail $? "Managed to add an INCLUDE entry with an empty source list"
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+ source_list $grp &> /dev/null
+ check_fail $? "Managed to add an entry with an invalid source in source list"
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 \
+ source_list $src &> /dev/null
+ check_fail $? "Managed to add an entry with a source list and no filter mode"
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+ source_list $src1
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+ source_list $src1 &> /dev/null
+ check_fail $? "Managed to replace an entry without using replace"
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ bridge mdb add dev br0 port $swp1 grp $grp src $src2 vid 10
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode include \
+ source_list $src1,$src2,$src3 &> /dev/null
+ check_fail $? "Managed to add a source that already has a forwarding entry"
+ bridge mdb del dev br0 port $swp1 grp $grp src $src2 vid 10
+
+ # Check maximum number of sources.
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+ source_list $(src_list_create $src_prefix $max_srcs)
+ num_srcs=$(bridge -d mdb show dev br0 vid 10 | grep "$grp" | \
+ grep "src" | wc -l)
+ [[ $num_srcs -eq $max_srcs ]]
+ check_err $? "Failed to configure maximum number of sources ($max_srcs)"
+ bridge mdb del dev br0 port $swp1 grp $grp vid 10
+
+ bridge mdb add dev br0 port $swp1 grp $grp vid 10 filter_mode exclude \
+ source_list $(src_list_create $src_prefix $((max_srcs + 1))) \
+ &> /dev/null
+ check_fail $? "Managed to exceed maximum number of sources ($max_srcs)"
+
+ log_test "$name (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_star_g()
+{
+ echo
+ log_info "# Port group entries configuration tests - (*, G)"
+
+ cfg_test_port_common "IPv4 (*, G)" "grp 239.1.1.1"
+ cfg_test_port_common "IPv6 (*, G)" "grp ff0e::1"
+ __cfg_test_port_ip_star_g "IPv4" "239.1.1.1" "224.0.0.1" "192.0.2."
+ __cfg_test_port_ip_star_g "IPv6" "ff0e::1" "ff02::1" "2001:db8:1::"
+}
+
+__cfg_test_port_ip_sg()
+{
+ local name=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local grp_key="grp $grp src $src"
+
+ RET=0
+
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "include"
+ check_err $? "Default filter mode is not \"include\""
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+ # Check that entries can be added as both permanent and temp and that
+ # group timer is set correctly.
+ bridge mdb add dev br0 port $swp1 $grp_key permanent vid 10
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
+ check_err $? "Entry not added as \"permanent\" when should"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+ check_err $? "\"permanent\" entry has a pending group timer"
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+ bridge mdb add dev br0 port $swp1 $grp_key temp vid 10
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
+ check_err $? "Entry not added as \"temp\" when should"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+ check_fail $? "\"temp\" entry has an unpending group timer"
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+ # Check error cases.
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+ filter_mode include &> /dev/null
+ check_fail $? "Managed to add an entry with a filter mode"
+
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10 \
+ filter_mode include source_list $src &> /dev/null
+ check_fail $? "Managed to add an entry with a source list"
+
+ bridge mdb add dev br0 port $swp1 grp $grp src $grp vid 10 &> /dev/null
+ check_fail $? "Managed to add an entry with an invalid source"
+
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10 temp
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10 permanent &> /dev/null
+ check_fail $? "Managed to replace an entry without using replace"
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+ # Check that we can replace available attributes.
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10 proto 123
+ bridge mdb replace dev br0 port $swp1 $grp_key vid 10 proto 111
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "111"
+ check_err $? "Failed to replace protocol"
+
+ bridge mdb replace dev br0 port $swp1 $grp_key vid 10 permanent
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "permanent"
+ check_err $? "Entry not marked as \"permanent\" after replace"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+ check_err $? "Entry has a pending group timer after replace"
+
+ bridge mdb replace dev br0 port $swp1 $grp_key vid 10 temp
+ bridge -d mdb get dev br0 $grp_key vid 10 | grep -q "temp"
+ check_err $? "Entry not marked as \"temp\" after replace"
+ bridge -d -s mdb get dev br0 $grp_key vid 10 | grep -q " 0.00"
+ check_fail $? "Entry has an unpending group timer after replace"
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+
+ # Check star exclude functionality. When adding a (S, G), all matching
+ # (*, G) ports need to be added to it.
+ bridge mdb add dev br0 port $swp2 grp $grp vid 10
+ bridge mdb add dev br0 port $swp1 $grp_key vid 10
+ bridge mdb get dev br0 $grp_key vid 10 | grep $swp2 | \
+ grep -q "added_by_star_ex"
+ check_err $? "\"added_by_star_ex\" entry not created after adding (S, G) entry"
+ bridge mdb del dev br0 port $swp1 $grp_key vid 10
+ bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+ log_test "$name (S, G) port group entries configuration tests"
+}
+
+cfg_test_port_ip_sg()
+{
+ echo
+ log_info "# Port group entries configuration tests - (S, G)"
+
+ cfg_test_port_common "IPv4 (S, G)" "grp 239.1.1.1 src 192.0.2.1"
+ cfg_test_port_common "IPv6 (S, G)" "grp ff0e::1 src 2001:db8:1::1"
+ __cfg_test_port_ip_sg "IPv4" "239.1.1.1" "192.0.2.1"
+ __cfg_test_port_ip_sg "IPv6" "ff0e::1" "2001:db8:1::1"
+}
+
+cfg_test_port_ip()
+{
+ cfg_test_port_ip_star_g
+ cfg_test_port_ip_sg
+}
+
+__cfg_test_port_l2()
+{
+ local grp="01:02:03:04:05:06"
+
+ RET=0
+
+ bridge meb add dev br0 port $swp grp 00:01:02:03:04:05 \
+ permanent vid 10 &> /dev/null
+ check_fail $? "Managed to add an entry with unicast MAC"
+
+ bridge mdb add dev br0 port $swp grp $grp src 00:01:02:03:04:05 \
+ permanent vid 10 &> /dev/null
+ check_fail $? "Managed to add an entry with a source"
+
+ bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+ filter_mode include &> /dev/null
+ check_fail $? "Managed to add an entry with a filter mode"
+
+ bridge mdb add dev br0 port $swp1 grp $grp permanent vid 10 \
+ source_list 00:01:02:03:04:05 &> /dev/null
+ check_fail $? "Managed to add an entry with a source list"
+
+ log_test "L2 (*, G) port group entries configuration tests"
+}
+
+cfg_test_port_l2()
+{
+ echo
+ log_info "# Port group entries configuration tests - L2"
+
+ cfg_test_port_common "L2 (*, G)" "grp 01:02:03:04:05:06"
+ __cfg_test_port_l2
+}
+
+# Check configuration of regular (port) entries of all types.
+cfg_test_port()
+{
+ cfg_test_port_ip
+ cfg_test_port_l2
+}
+
+ipv4_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "239.1.1.$i"
+ done
+}
+
+ipv6_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "ff0e::$(printf %x $i)"
+ done
+}
+
+l2_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "01:00:00:00:00:$(printf %02x $i)"
+ done
+}
+
+cfg_test_dump_common()
+{
+ local name=$1; shift
+ local fn=$1; shift
+ local max_bridges=2
+ local max_grps=256
+ local max_ports=32
+ local num_entries
+ local batch_file
+ local grp
+ local i j
+
+ RET=0
+
+ # Create net devices.
+ for i in $(seq 1 $max_bridges); do
+ ip link add name br-test${i} up type bridge vlan_filtering 1 \
+ mcast_snooping 1
+ for j in $(seq 1 $max_ports); do
+ ip link add name br-test${i}-du${j} up \
+ master br-test${i} type dummy
+ done
+ done
+
+ # Create batch file with MDB entries.
+ batch_file=$(mktemp)
+ for i in $(seq 1 $max_bridges); do
+ for j in $(seq 1 $max_ports); do
+ for grp in $($fn $max_grps); do
+ echo "mdb add dev br-test${i} \
+ port br-test${i}-du${j} grp $grp \
+ permanent vid 1" >> $batch_file
+ done
+ done
+ done
+
+ # Program the batch file and check for expected number of entries.
+ bridge -b $batch_file
+ for i in $(seq 1 $max_bridges); do
+ num_entries=$(bridge mdb show dev br-test${i} | \
+ grep "permanent" | wc -l)
+ [[ $num_entries -eq $((max_grps * max_ports)) ]]
+ check_err $? "Wrong number of entries in br-test${i}"
+ done
+
+ # Cleanup.
+ rm $batch_file
+ for i in $(seq 1 $max_bridges); do
+ ip link del dev br-test${i}
+ for j in $(seq $max_ports); do
+ ip link del dev br-test${i}-du${j}
+ done
+ done
+
+ log_test "$name large scale dump tests"
+}
+
+# Check large scale dump.
+cfg_test_dump()
+{
+ echo
+ log_info "# Large scale dump tests"
+
+ cfg_test_dump_common "IPv4" ipv4_grps_get
+ cfg_test_dump_common "IPv6" ipv6_grps_get
+ cfg_test_dump_common "L2" l2_grps_get
+}
+
+# Check flush functionality with different parameters.
+cfg_test_flush()
+{
+ local num_entries
+
+ # Add entries with different attributes and check that they are all
+ # flushed when the flush command is given with no parameters.
+
+ # Different port.
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10
+
+ # Different VLAN ID.
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20
+
+ # Different routing protocol.
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra
+
+ # Different state.
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp
+
+ bridge mdb flush dev br0
+ num_entries=$(bridge mdb show dev br0 | wc -l)
+ [[ $num_entries -eq 0 ]]
+ check_err $? 0 "Not all entries flushed after flush all"
+
+ # Check that when flushing by port only entries programmed with the
+ # specified port are flushed and the rest are not.
+
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+ bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+ bridge mdb flush dev br0 port $swp1
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+ check_fail $? "Entry not flushed by specified port"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+ check_err $? "Entry flushed by wrong port"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+ check_err $? "Host entry flushed by wrong port"
+
+ bridge mdb flush dev br0 port br0
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+ check_fail $? "Host entry not flushed by specified port"
+
+ bridge mdb flush dev br0
+
+ # Check that when flushing by VLAN ID only entries programmed with the
+ # specified VLAN ID are flushed and the rest are not.
+
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20
+
+ bridge mdb flush dev br0 vid 10
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+ check_fail $? "Entry not flushed by specified VLAN ID"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null
+ check_err $? "Entry flushed by wrong VLAN ID"
+
+ bridge mdb flush dev br0
+
+ # Check that all permanent entries are flushed when "permanent" is
+ # specified and that temporary entries are not.
+
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+ bridge mdb flush dev br0 permanent
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+ check_fail $? "Entry not flushed by \"permanent\" state"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+ check_err $? "Entry flushed by wrong state (\"permanent\")"
+
+ bridge mdb flush dev br0
+
+ # Check that all temporary entries are flushed when "nopermanent" is
+ # specified and that permanent entries are not.
+
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10
+
+ bridge mdb flush dev br0 nopermanent
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+ check_err $? "Entry flushed by wrong state (\"nopermanent\")"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+ check_fail $? "Entry not flushed by \"nopermanent\" state"
+
+ bridge mdb flush dev br0
+
+ # Check that L2 host entries are not flushed when "nopermanent" is
+ # specified, but flushed when "permanent" is specified.
+
+ bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10
+
+ bridge mdb flush dev br0 nopermanent
+
+ bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+ check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")"
+
+ bridge mdb flush dev br0 permanent
+
+ bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null
+ check_fail $? "L2 host entry not flushed by \"permanent\" state"
+
+ bridge mdb flush dev br0
+
+ # Check that IPv4 host entries are not flushed when "permanent" is
+ # specified, but flushed when "nopermanent" is specified.
+
+ bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10
+
+ bridge mdb flush dev br0 permanent
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+ check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")"
+
+ bridge mdb flush dev br0 nopermanent
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null
+ check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state"
+
+ bridge mdb flush dev br0
+
+ # Check that IPv6 host entries are not flushed when "permanent" is
+ # specified, but flushed when "nopermanent" is specified.
+
+ bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10
+
+ bridge mdb flush dev br0 permanent
+
+ bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+ check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")"
+
+ bridge mdb flush dev br0 nopermanent
+
+ bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null
+ check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state"
+
+ bridge mdb flush dev br0
+
+ # Check that when flushing by routing protocol only entries programmed
+ # with the specified routing protocol are flushed and the rest are not.
+
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp
+ bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra
+ bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10
+
+ bridge mdb flush dev br0 proto bgp
+
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1"
+ check_fail $? "Entry not flushed by specified routing protocol"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2"
+ check_err $? "Entry flushed by wrong routing protocol"
+ bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0"
+ check_err $? "Host entry flushed by wrong routing protocol"
+
+ bridge mdb flush dev br0
+
+ # Test that an error is returned when trying to flush using unsupported
+ # parameters.
+
+ bridge mdb flush dev br0 src_vni 10 &> /dev/null
+ check_fail $? "Managed to flush by source VNI"
+
+ bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null
+ check_fail $? "Managed to flush by destination IP"
+
+ bridge mdb flush dev br0 dst_port 4789 &> /dev/null
+ check_fail $? "Managed to flush by UDP destination port"
+
+ bridge mdb flush dev br0 vni 10 &> /dev/null
+ check_fail $? "Managed to flush by destination VNI"
+
+ log_test "Flush tests"
+}
+
+cfg_test()
+{
+ cfg_test_host
+ cfg_test_port
+ cfg_test_dump
+ cfg_test_flush
+}
+
+__fwd_test_host_ip()
+{
+ local grp=$1; shift
+ local dmac=$1; shift
+ local src=$1; shift
+ local mode=$1; shift
+ local name
+ local eth_type
+
+ RET=0
+
+ if [[ $mode == "-4" ]]; then
+ name="IPv4"
+ eth_type="ipv4"
+ else
+ name="IPv6"
+ eth_type="ipv6"
+ fi
+
+ tc filter add dev br0 ingress protocol 802.1q pref 1 handle 1 flower \
+ vlan_ethtype $eth_type vlan_id 10 dst_ip $grp src_ip $src \
+ action drop
+
+ # Packet should only be flooded to multicast router ports when there is
+ # no matching MDB entry. The bridge is not configured as a multicast
+ # router port.
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+ tc_check_packets "dev br0 ingress" 1 0
+ check_err $? "Packet locally received after flood"
+
+ # Install a regular port group entry and expect the packet to not be
+ # locally received.
+ bridge mdb add dev br0 port $swp2 grp $grp temp vid 10
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+ tc_check_packets "dev br0 ingress" 1 0
+ check_err $? "Packet locally received after installing a regular entry"
+
+ # Add a host entry and expect the packet to be locally received.
+ bridge mdb add dev br0 port br0 grp $grp temp vid 10
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+ tc_check_packets "dev br0 ingress" 1 1
+ check_err $? "Packet not locally received after adding a host entry"
+
+ # Remove the host entry and expect the packet to not be locally
+ # received.
+ bridge mdb del dev br0 port br0 grp $grp vid 10
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $src -B $grp -t udp -q
+ tc_check_packets "dev br0 ingress" 1 1
+ check_err $? "Packet locally received after removing a host entry"
+
+ bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+ tc filter del dev br0 ingress protocol 802.1q pref 1 handle 1 flower
+
+ log_test "$name host entries forwarding tests"
+}
+
+fwd_test_host_ip()
+{
+ __fwd_test_host_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "-4"
+ __fwd_test_host_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "-6"
+}
+
+fwd_test_host_l2()
+{
+ local dmac=01:02:03:04:05:06
+
+ RET=0
+
+ tc filter add dev br0 ingress protocol all pref 1 handle 1 flower \
+ dst_mac $dmac action drop
+
+ # Packet should be flooded and locally received when there is no
+ # matching MDB entry.
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev br0 ingress" 1 1
+ check_err $? "Packet not locally received after flood"
+
+ # Install a regular port group entry and expect the packet to not be
+ # locally received.
+ bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev br0 ingress" 1 1
+ check_err $? "Packet locally received after installing a regular entry"
+
+ # Add a host entry and expect the packet to be locally received.
+ bridge mdb add dev br0 port br0 grp $dmac permanent vid 10
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev br0 ingress" 1 2
+ check_err $? "Packet not locally received after adding a host entry"
+
+ # Remove the host entry and expect the packet to not be locally
+ # received.
+ bridge mdb del dev br0 port br0 grp $dmac permanent vid 10
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev br0 ingress" 1 2
+ check_err $? "Packet locally received after removing a host entry"
+
+ bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+
+ tc filter del dev br0 ingress protocol all pref 1 handle 1 flower
+
+ log_test "L2 host entries forwarding tests"
+}
+
+fwd_test_host()
+{
+ # Disable multicast router on the bridge to ensure that packets are
+ # only locally received when a matching host entry is present.
+ ip link set dev br0 type bridge mcast_router 0
+
+ fwd_test_host_ip
+ fwd_test_host_l2
+
+ ip link set dev br0 type bridge mcast_router 1
+}
+
+__fwd_test_port_ip()
+{
+ local grp=$1; shift
+ local dmac=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mode=$1; shift
+ local filter_mode=$1; shift
+ local name
+ local eth_type
+ local src_list
+
+ RET=0
+
+ if [[ $mode == "-4" ]]; then
+ name="IPv4"
+ eth_type="ipv4"
+ else
+ name="IPv6"
+ eth_type="ipv6"
+ fi
+
+ # The valid source is the one we expect to get packets from after
+ # adding the entry.
+ if [[ $filter_mode == "include" ]]; then
+ src_list=$valid_src
+ else
+ src_list=$invalid_src
+ fi
+
+ tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 1 flower \
+ vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+ src_ip $valid_src action drop
+ tc filter add dev $h2 ingress protocol 802.1q pref 1 handle 2 flower \
+ vlan_ethtype $eth_type vlan_id 10 dst_ip $grp \
+ src_ip $invalid_src action drop
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 1 0
+ check_err $? "Packet from valid source received on H2 before adding entry"
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 2 0
+ check_err $? "Packet from invalid source received on H2 before adding entry"
+
+ bridge mdb add dev br0 port $swp2 grp $grp vid 10 \
+ filter_mode $filter_mode source_list $src_list
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 1 1
+ check_err $? "Packet from valid source not received on H2 after adding entry"
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 2 0
+ check_err $? "Packet from invalid source received on H2 after adding entry"
+
+ bridge mdb replace dev br0 port $swp2 grp $grp vid 10 \
+ filter_mode exclude
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 1 2
+ check_err $? "Packet from valid source not received on H2 after allowing all sources"
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 2 1
+ check_err $? "Packet from invalid source not received on H2 after allowing all sources"
+
+ bridge mdb del dev br0 port $swp2 grp $grp vid 10
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $valid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 1 2
+ check_err $? "Packet from valid source received on H2 after deleting entry"
+
+ $MZ $mode $h1.10 -a own -b $dmac -c 1 -p 128 -A $invalid_src -B $grp -t udp -q
+ tc_check_packets "dev $h2 ingress" 2 1
+ check_err $? "Packet from invalid source received on H2 after deleting entry"
+
+ tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 2 flower
+ tc filter del dev $h2 ingress protocol 802.1q pref 1 handle 1 flower
+
+ log_test "$name port group \"$filter_mode\" entries forwarding tests"
+}
+
+fwd_test_port_ip()
+{
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "exclude"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ "exclude"
+ __fwd_test_port_ip "239.1.1.1" "01:00:5e:01:01:01" "192.0.2.1" "192.0.2.2" "-4" "include"
+ __fwd_test_port_ip "ff0e::1" "33:33:00:00:00:01" "2001:db8:1::1" "2001:db8:1::2" "-6" \
+ "include"
+}
+
+fwd_test_port_l2()
+{
+ local dmac=01:02:03:04:05:06
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 1 flower \
+ dst_mac $dmac action drop
+
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev $h2 ingress" 1 0
+ check_err $? "Packet received on H2 before adding entry"
+
+ bridge mdb add dev br0 port $swp2 grp $dmac permanent vid 10
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev $h2 ingress" 1 1
+ check_err $? "Packet not received on H2 after adding entry"
+
+ bridge mdb del dev br0 port $swp2 grp $dmac permanent vid 10
+ $MZ $h1.10 -c 1 -p 128 -a own -b $dmac -q
+ tc_check_packets "dev $h2 ingress" 1 1
+ check_err $? "Packet received on H2 after deleting entry"
+
+ tc filter del dev $h2 ingress protocol all pref 1 handle 1 flower
+
+ log_test "L2 port entries forwarding tests"
+}
+
+fwd_test_port()
+{
+ # Disable multicast flooding to ensure that packets are only forwarded
+ # out of a port when a matching port group entry is present.
+ bridge link set dev $swp2 mcast_flood off
+
+ fwd_test_port_ip
+ fwd_test_port_l2
+
+ bridge link set dev $swp2 mcast_flood on
+}
+
+fwd_test()
+{
+ echo
+ log_info "# Forwarding tests"
+
+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br0 type bridge mcast_query_response_interval 100
+
+ # Forwarding according to MDB entries only takes place when the bridge
+ # detects that there is a valid querier in the network. Set the bridge
+ # as the querier and assign it a valid IPv6 link-local address to be
+ # used as the source address for MLD queries.
+ ip -6 address add fe80::1/64 nodad dev br0
+ ip link set dev br0 type bridge mcast_querier 1
+ sleep 10
+
+ fwd_test_host
+ fwd_test_port
+
+ ip link set dev br0 type bridge mcast_querier 0
+ ip -6 address del fe80::1/64 dev br0
+ ip link set dev br0 type bridge mcast_query_response_interval 1000
+}
+
+ctrl_igmpv3_is_in_test()
+{
+ RET=0
+
+ # Add a permanent entry and check that it is not affected by the
+ # received IGMP packet.
+ bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 \
+ filter_mode include source_list 192.0.2.1
+
+ # IS_IN ( 192.0.2.2 )
+ $MZ $h1.10 -c 1 -a own -b 01:00:5e:01:01:01 -A 192.0.2.1 -B 239.1.1.1 \
+ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
+
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
+ check_fail $? "Permanent entry affected by IGMP packet"
+
+ # Replace the permanent entry with a temporary one and check that after
+ # processing the IGMP packet, a new source is added to the list along
+ # with a new forwarding entry.
+ bridge mdb replace dev br0 port $swp1 grp 239.1.1.1 temp vid 10 \
+ filter_mode include source_list 192.0.2.1
+
+ # IS_IN ( 192.0.2.2 )
+ $MZ $h1.10 -a own -b 01:00:5e:01:01:01 -c 1 -A 192.0.2.1 -B 239.1.1.1 \
+ -t ip proto=2,p=$(igmpv3_is_in_get 239.1.1.1 192.0.2.2) -q
+
+ bridge -d mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q 192.0.2.2
+ check_err $? "Source not add to source list"
+
+ bridge mdb get dev br0 grp 239.1.1.1 src 192.0.2.2 vid 10 &> /dev/null
+ check_err $? "(S, G) entry not created for new source"
+
+ bridge mdb del dev br0 port $swp1 grp 239.1.1.1 vid 10
+
+ log_test "IGMPv3 MODE_IS_INCLUDE tests"
+}
+
+ctrl_mldv2_is_in_test()
+{
+ RET=0
+
+ # Add a permanent entry and check that it is not affected by the
+ # received MLD packet.
+ bridge mdb add dev br0 port $swp1 grp ff0e::1 permanent vid 10 \
+ filter_mode include source_list 2001:db8:1::1
+
+ # IS_IN ( 2001:db8:1::2 )
+ local p=$(mldv2_is_in_get fe80::1 ff0e::1 2001:db8:1::2)
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
+ -t ip hop=1,next=0,p="$p" -q
+
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
+ check_fail $? "Permanent entry affected by MLD packet"
+
+ # Replace the permanent entry with a temporary one and check that after
+ # processing the MLD packet, a new source is added to the list along
+ # with a new forwarding entry.
+ bridge mdb replace dev br0 port $swp1 grp ff0e::1 temp vid 10 \
+ filter_mode include source_list 2001:db8:1::1
+
+ # IS_IN ( 2001:db8:1::2 )
+ $MZ -6 $h1.10 -a own -b 33:33:00:00:00:01 -c 1 -A fe80::1 -B ff0e::1 \
+ -t ip hop=1,next=0,p="$p" -q
+
+ bridge -d mdb get dev br0 grp ff0e::1 vid 10 | grep -q 2001:db8:1::2
+ check_err $? "Source not add to source list"
+
+ bridge mdb get dev br0 grp ff0e::1 src 2001:db8:1::2 vid 10 &> /dev/null
+ check_err $? "(S, G) entry not created for new source"
+
+ bridge mdb del dev br0 port $swp1 grp ff0e::1 vid 10
+
+ log_test "MLDv2 MODE_IS_INCLUDE tests"
+}
+
+ctrl_test()
+{
+ echo
+ log_info "# Control packets tests"
+
+ ctrl_igmpv3_is_in_test
+ ctrl_mldv2_is_in_test
+}
+
+if ! bridge mdb help 2>&1 | grep -q "flush"; then
+ echo "SKIP: iproute2 too old, missing bridge mdb flush support"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
new file mode 100755
index 000000000000..b1ba6876dd86
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_host.sh
@@ -0,0 +1,103 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that adding host mdb entries work as intended for all types of
+# multicast filters: ipv4, ipv6, and mac
+
+ALL_TESTS="mdb_add_del_test"
+NUM_NETIFS=2
+
+TEST_GROUP_IP4="225.1.2.3"
+TEST_GROUP_IP6="ff02::42"
+TEST_GROUP_MAC="01:00:01:c0:ff:ee"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+switch_create()
+{
+ # Enable multicast filtering
+ ip link add dev br0 type bridge mcast_snooping 1
+
+ ip link set dev $swp1 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+}
+
+switch_destroy()
+{
+ ip link set dev $swp1 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ vrf_prepare
+
+ h1_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+do_mdb_add_del()
+{
+ local group=$1
+ local flag=$2
+
+ RET=0
+ bridge mdb add dev br0 port br0 grp $group $flag 2>/dev/null
+ check_err $? "Failed adding $group to br0, port br0"
+
+ if [ -z "$flag" ]; then
+ flag="temp"
+ fi
+
+ bridge mdb show dev br0 | grep $group | grep -q $flag 2>/dev/null
+ check_err $? "$group not added with $flag flag"
+
+ bridge mdb del dev br0 port br0 grp $group 2>/dev/null
+ check_err $? "Failed deleting $group from br0, port br0"
+
+ bridge mdb show dev br0 | grep -q $group >/dev/null
+ check_err_fail 1 $? "$group still in mdb after delete"
+
+ log_test "MDB add/del group $group to bridge port br0"
+}
+
+mdb_add_del_test()
+{
+ do_mdb_add_del $TEST_GROUP_MAC permanent
+ do_mdb_add_del $TEST_GROUP_IP4
+ do_mdb_add_del $TEST_GROUP_IP6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
new file mode 100755
index 000000000000..3da9d93ab36f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_max.sh
@@ -0,0 +1,1347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR0 (802.1q) + $swp2 | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | | | |
+# | +-----------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_8021d
+ test_8021q
+ test_8021qvs
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 10 v$h2 192.0.2.2/28
+ vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ simple_if_fini $h2
+}
+
+switch_create_8021d()
+{
+ log_info "802.1d tests"
+
+ ip link add name br0 type bridge vlan_filtering 0 \
+ mcast_snooping 1 \
+ mcast_igmp_version 3 mcast_mld_version 2
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp1 up
+ bridge link set dev $swp1 fastleave on
+
+ ip link set dev $swp2 master br0
+ ip link set dev $swp2 up
+}
+
+switch_create_8021q()
+{
+ local br_flags=$1; shift
+
+ log_info "802.1q $br_flags${br_flags:+ }tests"
+
+ ip link add name br0 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 1 $br_flags \
+ mcast_igmp_version 3 mcast_mld_version 2
+ bridge vlan add vid 10 dev br0 self
+ bridge vlan add vid 20 dev br0 self
+ ip link set dev br0 up
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp1 up
+ bridge link set dev $swp1 fastleave on
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan add vid 20 dev $swp1
+
+ ip link set dev $swp2 master br0
+ ip link set dev $swp2 up
+ bridge vlan add vid 10 dev $swp2
+ bridge vlan add vid 20 dev $swp2
+}
+
+switch_create_8021qvs()
+{
+ switch_create_8021q "mcast_vlan_snooping 1"
+ bridge vlan global set dev br0 vid 10 mcast_igmp_version 3
+ bridge vlan global set dev br0 vid 10 mcast_mld_version 2
+ bridge vlan global set dev br0 vid 20 mcast_igmp_version 3
+ bridge vlan global set dev br0 vid 20 mcast_mld_version 2
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev br0 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy 2>/dev/null
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+cfg_src_list()
+{
+ local IPs=("$@")
+ local IPstr=$(echo ${IPs[@]} | tr '[:space:]' , | sed 's/,$//')
+
+ echo ${IPstr:+source_list }${IPstr}
+}
+
+cfg_group_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local GRP=$1; shift
+ local state=$1; shift
+ local IPs=("$@")
+
+ local source_list=$(cfg_src_list ${IPs[@]})
+
+ # Everything besides `bridge mdb' uses the "dev X vid Y" syntax,
+ # so we use it here as well and convert.
+ local br_locus=$(echo "$locus" | sed 's/^dev /port /')
+
+ bridge mdb $op dev br0 $br_locus grp $GRP $state \
+ filter_mode include $source_list
+}
+
+cfg4_entries_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local GRP=239.1.1.${grp}
+ local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+ cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg4_entries_add()
+{
+ cfg4_entries_op add "$@"
+}
+
+cfg4_entries_del()
+{
+ cfg4_entries_op del "$@"
+}
+
+cfg6_entries_op()
+{
+ local op=$1; shift
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local GRP=ff0e::${grp}
+ local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+ cfg_group_op "$op" "$locus" "$GRP" "$state" ${IPs[@]}
+}
+
+cfg6_entries_add()
+{
+ cfg6_entries_op add "$@"
+}
+
+cfg6_entries_del()
+{
+ cfg6_entries_op del "$@"
+}
+
+locus_dev_peer()
+{
+ local dev_kw=$1; shift
+ local dev=$1; shift
+ local vid_kw=$1; shift
+ local vid=$1; shift
+
+ echo "$h1.${vid:-10}"
+}
+
+locus_dev()
+{
+ local dev_kw=$1; shift
+ local dev=$1; shift
+
+ echo $dev
+}
+
+ctl4_entries_add()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local IPs=$(seq -f 192.0.2.%g 1 $((n - 1)))
+ local peer=$(locus_dev_peer $locus)
+ local GRP=239.1.1.${grp}
+ local dmac=01:00:5e:01:01:$(printf "%02x" $grp)
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B $GRP \
+ -t ip proto=2,p=$(igmpv3_is_in_get $GRP $IPs) -q
+ sleep 1
+
+ local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+ if ((nn != n)); then
+ echo mcast_max_groups > /dev/stderr
+ false
+ fi
+}
+
+ctl4_entries_del()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local peer=$(locus_dev_peer $locus)
+ local GRP=239.1.1.${grp}
+ local dmac=01:00:5e:00:00:02
+ $MZ $peer -a own -b $dmac -c 1 -A 192.0.2.1 -B 224.0.0.2 \
+ -t ip proto=2,p=$(igmpv2_leave_get $GRP) -q
+ sleep 1
+ ! bridge mdb show dev br0 | grep -q $GRP
+}
+
+ctl6_entries_add()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local IPs=$(printf "2001:db8:1::%x\n" $(seq 1 $((n - 1))))
+ local peer=$(locus_dev_peer $locus)
+ local SIP=fe80::1
+ local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
+ local p=$(mldv2_is_in_get $SIP $GRP $IPs)
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
+ sleep 1
+
+ local nn=$(bridge mdb show dev br0 | grep $GRP | wc -l)
+ if ((nn != n)); then
+ echo mcast_max_groups > /dev/stderr
+ false
+ fi
+}
+
+ctl6_entries_del()
+{
+ local locus=$1; shift
+ local state=$1; shift
+ local n=$1; shift
+ local grp=${1:-1}; shift
+
+ local peer=$(locus_dev_peer $locus)
+ local SIP=fe80::1
+ local GRP=ff0e::${grp}
+ local dmac=33:33:00:00:00:$(printf "%02x" $grp)
+ local p=$(mldv1_done_get $SIP $GRP)
+ $MZ -6 $peer -a own -b $dmac -c 1 -A $SIP -B $GRP \
+ -t ip hop=1,next=0,p="$p" -q
+ sleep 1
+ ! bridge mdb show dev br0 | grep -q $GRP
+}
+
+bridge_maxgroups_errmsg_check_cfg()
+{
+ local msg=$1; shift
+ local needle=$1; shift
+
+ echo "$msg" | grep -q mcast_max_groups
+ check_err $? "Adding MDB entries failed for the wrong reason: $msg"
+}
+
+bridge_maxgroups_errmsg_check_cfg4()
+{
+ bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_cfg6()
+{
+ bridge_maxgroups_errmsg_check_cfg "$@"
+}
+
+bridge_maxgroups_errmsg_check_ctl4()
+{
+ :
+}
+
+bridge_maxgroups_errmsg_check_ctl6()
+{
+ :
+}
+
+bridge_port_ngroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d link show $locus |
+ jq '.[].mcast_n_groups'
+}
+
+bridge_port_maxgroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d link show $locus |
+ jq '.[].mcast_max_groups'
+}
+
+bridge_port_maxgroups_set()
+{
+ local locus=$1; shift
+ local max=$1; shift
+
+ bridge link set dev $(locus_dev $locus) mcast_max_groups $max
+}
+
+bridge_port_vlan_ngroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d vlan show $locus |
+ jq '.[].vlans[].mcast_n_groups'
+}
+
+bridge_port_vlan_maxgroups_get()
+{
+ local locus=$1; shift
+
+ bridge -j -d vlan show $locus |
+ jq '.[].vlans[].mcast_max_groups'
+}
+
+bridge_port_vlan_maxgroups_set()
+{
+ local locus=$1; shift
+ local max=$1; shift
+
+ bridge vlan set $locus mcast_max_groups $max
+}
+
+test_ngroups_reporting()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n0=$(bridge_${context}_ngroups_get "$locus")
+ ${CFG}_entries_add "$locus" temp 5
+ check_err $? "Couldn't add MDB entries"
+ local n1=$(bridge_${context}_ngroups_get "$locus")
+
+ ((n1 == n0 + 5))
+ check_err $? "Number of groups was $n0, now is $n1, but $((n0 + 5)) expected"
+
+ ${CFG}_entries_del "$locus" temp 5
+ check_err $? "Couldn't delete MDB entries"
+ local n2=$(bridge_${context}_ngroups_get "$locus")
+
+ ((n2 == n0))
+ check_err $? "Number of groups was $n0, now is $n2, but should be back to $n0"
+
+ log_test "$CFG: $context: ngroups reporting"
+}
+
+test_8021d_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port "dev $swp1"
+}
+
+test_8021d_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port "dev $swp1"
+}
+
+test_8021q_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg4()
+{
+ test_ngroups_reporting cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl4()
+{
+ test_ngroups_reporting ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_cfg6()
+{
+ test_ngroups_reporting cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_ngroups_reporting_ctl6()
+{
+ test_ngroups_reporting ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_ngroups_cross_vlan()
+{
+ local CFG=$1; shift
+
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+
+ RET=0
+
+ local n10=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n20=$(bridge_port_vlan_ngroups_get "$locus2")
+ ${CFG}_entries_add "$locus1" temp 5 111
+ check_err $? "Couldn't add MDB entries to VLAN 10"
+ local n11=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n21=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n11 == n10 + 5))
+ check_err $? "Number of groups at VLAN 10 was $n10, now is $n11, but 5 entries added on VLAN 10, $((n10 + 5)) expected"
+
+ ((n21 == n20))
+ check_err $? "Number of groups at VLAN 20 was $n20, now is $n21, but no change expected on VLAN 20"
+
+ ${CFG}_entries_add "$locus2" temp 5 112
+ check_err $? "Couldn't add MDB entries to VLAN 20"
+ local n12=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n22=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n12 == n11))
+ check_err $? "Number of groups at VLAN 10 was $n11, now is $n12, but no change expected on VLAN 10"
+
+ ((n22 == n21 + 5))
+ check_err $? "Number of groups at VLAN 20 was $n21, now is $n22, but 5 entries added on VLAN 20, $((n21 + 5)) expected"
+
+ ${CFG}_entries_del "$locus1" temp 5 111
+ check_err $? "Couldn't delete MDB entries from VLAN 10"
+ ${CFG}_entries_del "$locus2" temp 5 112
+ check_err $? "Couldn't delete MDB entries from VLAN 20"
+ local n13=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n23=$(bridge_port_vlan_ngroups_get "$locus2")
+
+ ((n13 == n10))
+ check_err $? "Number of groups at VLAN 10 was $n10, now is $n13, but should be back to $n10"
+
+ ((n23 == n20))
+ check_err $? "Number of groups at VLAN 20 was $n20, now is $n23, but should be back to $n20"
+
+ log_test "$CFG: port_vlan: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg4()
+{
+ test_ngroups_cross_vlan cfg4
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl4()
+{
+ test_ngroups_cross_vlan ctl4
+}
+
+test_8021qvs_ngroups_cross_vlan_cfg6()
+{
+ test_ngroups_cross_vlan cfg6
+}
+
+test_8021qvs_ngroups_cross_vlan_ctl6()
+{
+ test_ngroups_cross_vlan ctl6
+}
+
+test_maxgroups_zero()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+ local max
+
+ max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 0))
+ check_err $? "Max groups on $locus should be 0, but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 100
+ check_err $? "Failed to set max to 100"
+ max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 100))
+ check_err $? "Max groups expected to be 100, but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "Couldn't set maximum to 0"
+
+ # Test that setting 0 explicitly still serves as infinity.
+ ${CFG}_entries_add "$locus" temp 5
+ check_err $? "Adding 5 MDB entries failed but should have passed"
+ ${CFG}_entries_del "$locus" temp 5
+ check_err $? "Couldn't delete MDB entries"
+
+ log_test "$CFG: $context maxgroups: reporting and treatment of 0"
+}
+
+test_8021d_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg4()
+{
+ test_maxgroups_zero cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl4()
+{
+ test_maxgroups_zero ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_cfg6()
+{
+ test_maxgroups_zero cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_zero_ctl6()
+{
+ test_maxgroups_zero ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_zero_cross_vlan()
+{
+ local CFG=$1; shift
+
+ local locus0="dev $swp1"
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+ local max
+
+ RET=0
+
+ bridge_port_vlan_maxgroups_set "$locus1" 100
+ check_err $? "$locus1: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus2" 100
+ check_err $? "$locus2: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_maxgroups_set "$locus0" 100
+ check_err $? "$locus0: Failed to set max to 100"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus1" 0
+ check_err $? "$locus1: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 100))
+ check_err $? "$locus2: Max groups expected to be 100, but $max reported"
+
+ bridge_port_vlan_maxgroups_set "$locus2" 0
+ check_err $? "$locus2: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 100))
+ check_err $? "$locus0: Max groups expected to be 100, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0 but $max reported"
+
+ bridge_port_maxgroups_set "$locus0" 0
+ check_err $? "$locus0: Failed to set max to 0"
+
+ max=$(bridge_port_maxgroups_get "$locus0")
+ ((max == 0))
+ check_err $? "$locus0: Max groups expected to be 0, but $max reported"
+
+ max=$(bridge_port_vlan_maxgroups_get "$locus2")
+ ((max == 0))
+ check_err $? "$locus2: Max groups expected to be 0, but $max reported"
+
+ log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN maximums"
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg4()
+{
+ test_maxgroups_zero_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl4()
+{
+ test_maxgroups_zero_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_cfg6()
+{
+ test_maxgroups_zero_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_zero_cross_vlan_ctl6()
+{
+ test_maxgroups_zero_cross_vlan ctl6
+}
+
+test_maxgroups_too_low()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local msg
+
+ ${CFG}_entries_add "$locus" temp 5 111
+ check_err $? "$locus: Couldn't add MDB entries"
+
+ bridge_${context}_maxgroups_set "$locus" $((n+2))
+ check_err $? "$locus: Setting maxgroups to $((n+2)) failed"
+
+ msg=$(${CFG}_entries_add "$locus" temp 2 112 2>&1)
+ check_fail $? "$locus: Adding more entries passed when max<n"
+ bridge_maxgroups_errmsg_check_cfg "$msg"
+
+ ${CFG}_entries_del "$locus" temp 5 111
+ check_err $? "$locus: Couldn't delete MDB entries"
+
+ ${CFG}_entries_add "$locus" temp 2 112
+ check_err $? "$locus: Adding more entries failed"
+
+ ${CFG}_entries_del "$locus" temp 2 112
+ check_err $? "$locus: Deleting more entries failed"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "$locus: Couldn't set maximum to 0"
+
+ log_test "$CFG: $context maxgroups: configure below ngroups"
+}
+
+test_8021d_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg4()
+{
+ test_maxgroups_too_low cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl4()
+{
+ test_maxgroups_too_low ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_cfg6()
+{
+ test_maxgroups_too_low cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_low_ctl6()
+{
+ test_maxgroups_too_low ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_entries()
+{
+ local CFG=$1; shift
+ local context=$1; shift
+ local locus=$1; shift
+
+ RET=0
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local msg
+
+ # Configure a low maximum
+ bridge_${context}_maxgroups_set "$locus" $((n+1))
+ check_err $? "$locus: Couldn't set maximum"
+
+ # Try to add more entries than the configured maximum
+ msg=$(${CFG}_entries_add "$locus" temp 5 2>&1)
+ check_fail $? "Adding 5 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ # When adding entries through the control path, as many as possible
+ # get created. That's consistent with the mcast_hash_max behavior.
+ # So there, drop the entries explicitly.
+ if [[ ${CFG%[46]} == ctl ]]; then
+ ${CFG}_entries_del "$locus" temp 17 2>&1
+ fi
+
+ local n2=$(bridge_${context}_ngroups_get "$locus")
+ ((n2 == n))
+ check_err $? "Number of groups was $n, but after a failed attempt to add MDB entries it changed to $n2"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "$locus: Couldn't set maximum to 0"
+
+ log_test "$CFG: $context maxgroups: add too many MDB entries"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port "dev $swp1"
+}
+
+test_8021d_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port "dev $swp1"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port "dev $swp1 vid 10"
+}
+
+test_8021q_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg4()
+{
+ test_maxgroups_too_many_entries cfg4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl4()
+{
+ test_maxgroups_too_many_entries ctl4 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_cfg6()
+{
+ test_maxgroups_too_many_entries cfg6 port_vlan "dev $swp1 vid 10"
+}
+
+test_8021qvs_maxgroups_too_many_entries_ctl6()
+{
+ test_maxgroups_too_many_entries ctl6 port_vlan "dev $swp1 vid 10"
+}
+
+test_maxgroups_too_many_cross_vlan()
+{
+ local CFG=$1; shift
+
+ RET=0
+
+ local locus0="dev $swp1"
+ local locus1="dev $swp1 vid 10"
+ local locus2="dev $swp1 vid 20"
+ local n1=$(bridge_port_vlan_ngroups_get "$locus1")
+ local n2=$(bridge_port_vlan_ngroups_get "$locus2")
+ local msg
+
+ if ((n1 > n2)); then
+ local tmp=$n1
+ n1=$n2
+ n2=$tmp
+
+ tmp="$locus1"
+ locus1="$locus2"
+ locus2="$tmp"
+ fi
+
+ # Now 0 <= n1 <= n2.
+ ${CFG}_entries_add "$locus2" temp 5 112
+ check_err $? "Couldn't add 5 entries"
+
+ n2=$(bridge_port_vlan_ngroups_get "$locus2")
+ # Now 0 <= n1 < n2-1.
+
+ # Setting locus1'maxgroups to n2-1 should pass. The number is
+ # smaller than both the absolute number of MDB entries, and in
+ # particular than number of locus2's number of entries, but it is
+ # large enough to cover locus1's entries. Thus we check that
+ # individual VLAN's ngroups are independent.
+ bridge_port_vlan_maxgroups_set "$locus1" $((n2-1))
+ check_err $? "Setting ${locus1}'s maxgroups to $((n2-1)) failed"
+
+ msg=$(${CFG}_entries_add "$locus1" temp $n2 111 2>&1)
+ check_fail $? "$locus1: Adding $n2 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ bridge_port_maxgroups_set "$locus0" $((n1 + n2 + 2))
+ check_err $? "$locus0: Couldn't set maximum"
+
+ msg=$(${CFG}_entries_add "$locus1" temp 5 111 2>&1)
+ check_fail $? "$locus1: Adding 5 MDB entries passed, but should have failed"
+ bridge_maxgroups_errmsg_check_${CFG} "$msg"
+
+ # IGMP/MLD packets can cause several entries to be added, before
+ # the maximum is hit and the rest is then bounced. Remove what was
+ # committed, if anything.
+ ${CFG}_entries_del "$locus1" temp 5 111 2>/dev/null
+
+ ${CFG}_entries_add "$locus1" temp 2 111
+ check_err $? "$locus1: Adding 2 MDB entries failed, but should have passed"
+
+ ${CFG}_entries_del "$locus1" temp 2 111
+ check_err $? "Couldn't delete MDB entries"
+
+ ${CFG}_entries_del "$locus2" temp 5 112
+ check_err $? "Couldn't delete MDB entries"
+
+ bridge_port_vlan_maxgroups_set "$locus1" 0
+ check_err $? "$locus1: Couldn't set maximum to 0"
+
+ bridge_port_maxgroups_set "$locus0" 0
+ check_err $? "$locus0: Couldn't set maximum to 0"
+
+ log_test "$CFG: port_vlan maxgroups: isolation of port and per-VLAN ngroups"
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg4()
+{
+ test_maxgroups_too_many_cross_vlan cfg4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl4()
+{
+ test_maxgroups_too_many_cross_vlan ctl4
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_cfg6()
+{
+ test_maxgroups_too_many_cross_vlan cfg6
+}
+
+test_8021qvs_maxgroups_too_many_cross_vlan_ctl6()
+{
+ test_maxgroups_too_many_cross_vlan ctl6
+}
+
+test_vlan_attributes()
+{
+ local locus=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local max=$(bridge_port_vlan_maxgroups_get "$locus")
+ local n=$(bridge_port_vlan_ngroups_get "$locus")
+
+ eval "[[ $max $expect ]]"
+ check_err $? "$locus: maxgroups attribute expected to be $expect, but was $max"
+
+ eval "[[ $n $expect ]]"
+ check_err $? "$locus: ngroups attribute expected to be $expect, but was $n"
+
+ log_test "port_vlan: presence of ngroups and maxgroups attributes"
+}
+
+test_8021q_vlan_attributes()
+{
+ test_vlan_attributes "dev $swp1 vid 10" "== null"
+}
+
+test_8021qvs_vlan_attributes()
+{
+ test_vlan_attributes "dev $swp1 vid 10" "-ge 0"
+}
+
+test_toggle_vlan_snooping()
+{
+ local mode=$1; shift
+
+ RET=0
+
+ local CFG=cfg4
+ local context=port_vlan
+ local locus="dev $swp1 vid 10"
+
+ ${CFG}_entries_add "$locus" $mode 5
+ check_err $? "Couldn't add MDB entries"
+
+ bridge_${context}_maxgroups_set "$locus" 100
+ check_err $? "Failed to set max to 100"
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 0
+ sleep 1
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+
+ local n=$(bridge_${context}_ngroups_get "$locus")
+ local nn=$(bridge mdb show dev br0 | grep $swp1 | wc -l)
+ ((nn == n))
+ check_err $? "mcast_n_groups expected to be $nn, but $n reported"
+
+ local max=$(bridge_${context}_maxgroups_get "$locus")
+ ((max == 100))
+ check_err $? "Max groups expected to be 100 but $max reported"
+
+ bridge_${context}_maxgroups_set "$locus" 0
+ check_err $? "Failed to set max to 0"
+
+ log_test "$CFG: $context: $mode: mcast_vlan_snooping toggle"
+}
+
+test_toggle_vlan_snooping_temp()
+{
+ test_toggle_vlan_snooping temp
+}
+
+test_toggle_vlan_snooping_permanent()
+{
+ test_toggle_vlan_snooping permanent
+}
+
+# ngroup test suites
+
+test_8021d_ngroups_cfg4()
+{
+ test_8021d_ngroups_reporting_cfg4
+}
+
+test_8021d_ngroups_ctl4()
+{
+ test_8021d_ngroups_reporting_ctl4
+}
+
+test_8021d_ngroups_cfg6()
+{
+ test_8021d_ngroups_reporting_cfg6
+}
+
+test_8021d_ngroups_ctl6()
+{
+ test_8021d_ngroups_reporting_ctl6
+}
+
+test_8021q_ngroups_cfg4()
+{
+ test_8021q_ngroups_reporting_cfg4
+}
+
+test_8021q_ngroups_ctl4()
+{
+ test_8021q_ngroups_reporting_ctl4
+}
+
+test_8021q_ngroups_cfg6()
+{
+ test_8021q_ngroups_reporting_cfg6
+}
+
+test_8021q_ngroups_ctl6()
+{
+ test_8021q_ngroups_reporting_ctl6
+}
+
+test_8021qvs_ngroups_cfg4()
+{
+ test_8021qvs_ngroups_reporting_cfg4
+ test_8021qvs_ngroups_cross_vlan_cfg4
+}
+
+test_8021qvs_ngroups_ctl4()
+{
+ test_8021qvs_ngroups_reporting_ctl4
+ test_8021qvs_ngroups_cross_vlan_ctl4
+}
+
+test_8021qvs_ngroups_cfg6()
+{
+ test_8021qvs_ngroups_reporting_cfg6
+ test_8021qvs_ngroups_cross_vlan_cfg6
+}
+
+test_8021qvs_ngroups_ctl6()
+{
+ test_8021qvs_ngroups_reporting_ctl6
+ test_8021qvs_ngroups_cross_vlan_ctl6
+}
+
+# maxgroups test suites
+
+test_8021d_maxgroups_cfg4()
+{
+ test_8021d_maxgroups_zero_cfg4
+ test_8021d_maxgroups_too_low_cfg4
+ test_8021d_maxgroups_too_many_entries_cfg4
+}
+
+test_8021d_maxgroups_ctl4()
+{
+ test_8021d_maxgroups_zero_ctl4
+ test_8021d_maxgroups_too_low_ctl4
+ test_8021d_maxgroups_too_many_entries_ctl4
+}
+
+test_8021d_maxgroups_cfg6()
+{
+ test_8021d_maxgroups_zero_cfg6
+ test_8021d_maxgroups_too_low_cfg6
+ test_8021d_maxgroups_too_many_entries_cfg6
+}
+
+test_8021d_maxgroups_ctl6()
+{
+ test_8021d_maxgroups_zero_ctl6
+ test_8021d_maxgroups_too_low_ctl6
+ test_8021d_maxgroups_too_many_entries_ctl6
+}
+
+test_8021q_maxgroups_cfg4()
+{
+ test_8021q_maxgroups_zero_cfg4
+ test_8021q_maxgroups_too_low_cfg4
+ test_8021q_maxgroups_too_many_entries_cfg4
+}
+
+test_8021q_maxgroups_ctl4()
+{
+ test_8021q_maxgroups_zero_ctl4
+ test_8021q_maxgroups_too_low_ctl4
+ test_8021q_maxgroups_too_many_entries_ctl4
+}
+
+test_8021q_maxgroups_cfg6()
+{
+ test_8021q_maxgroups_zero_cfg6
+ test_8021q_maxgroups_too_low_cfg6
+ test_8021q_maxgroups_too_many_entries_cfg6
+}
+
+test_8021q_maxgroups_ctl6()
+{
+ test_8021q_maxgroups_zero_ctl6
+ test_8021q_maxgroups_too_low_ctl6
+ test_8021q_maxgroups_too_many_entries_ctl6
+}
+
+test_8021qvs_maxgroups_cfg4()
+{
+ test_8021qvs_maxgroups_zero_cfg4
+ test_8021qvs_maxgroups_zero_cross_vlan_cfg4
+ test_8021qvs_maxgroups_too_low_cfg4
+ test_8021qvs_maxgroups_too_many_entries_cfg4
+ test_8021qvs_maxgroups_too_many_cross_vlan_cfg4
+}
+
+test_8021qvs_maxgroups_ctl4()
+{
+ test_8021qvs_maxgroups_zero_ctl4
+ test_8021qvs_maxgroups_zero_cross_vlan_ctl4
+ test_8021qvs_maxgroups_too_low_ctl4
+ test_8021qvs_maxgroups_too_many_entries_ctl4
+ test_8021qvs_maxgroups_too_many_cross_vlan_ctl4
+}
+
+test_8021qvs_maxgroups_cfg6()
+{
+ test_8021qvs_maxgroups_zero_cfg6
+ test_8021qvs_maxgroups_zero_cross_vlan_cfg6
+ test_8021qvs_maxgroups_too_low_cfg6
+ test_8021qvs_maxgroups_too_many_entries_cfg6
+ test_8021qvs_maxgroups_too_many_cross_vlan_cfg6
+}
+
+test_8021qvs_maxgroups_ctl6()
+{
+ test_8021qvs_maxgroups_zero_ctl6
+ test_8021qvs_maxgroups_zero_cross_vlan_ctl6
+ test_8021qvs_maxgroups_too_low_ctl6
+ test_8021qvs_maxgroups_too_many_entries_ctl6
+ test_8021qvs_maxgroups_too_many_cross_vlan_ctl6
+}
+
+# other test suites
+
+test_8021qvs_toggle_vlan_snooping()
+{
+ test_toggle_vlan_snooping_temp
+ test_toggle_vlan_snooping_permanent
+}
+
+# test groups
+
+test_8021d()
+{
+ # Tests for vlan_filtering 0 mcast_vlan_snooping 0.
+
+ switch_create_8021d
+ setup_wait
+
+ test_8021d_ngroups_cfg4
+ test_8021d_ngroups_ctl4
+ test_8021d_ngroups_cfg6
+ test_8021d_ngroups_ctl6
+ test_8021d_maxgroups_cfg4
+ test_8021d_maxgroups_ctl4
+ test_8021d_maxgroups_cfg6
+ test_8021d_maxgroups_ctl6
+
+ switch_destroy
+}
+
+test_8021q()
+{
+ # Tests for vlan_filtering 1 mcast_vlan_snooping 0.
+
+ switch_create_8021q
+ setup_wait
+
+ test_8021q_vlan_attributes
+ test_8021q_ngroups_cfg4
+ test_8021q_ngroups_ctl4
+ test_8021q_ngroups_cfg6
+ test_8021q_ngroups_ctl6
+ test_8021q_maxgroups_cfg4
+ test_8021q_maxgroups_ctl4
+ test_8021q_maxgroups_cfg6
+ test_8021q_maxgroups_ctl6
+
+ switch_destroy
+}
+
+test_8021qvs()
+{
+ # Tests for vlan_filtering 1 mcast_vlan_snooping 1.
+
+ switch_create_8021qvs
+ setup_wait
+
+ test_8021qvs_vlan_attributes
+ test_8021qvs_ngroups_cfg4
+ test_8021qvs_ngroups_ctl4
+ test_8021qvs_ngroups_cfg6
+ test_8021qvs_ngroups_ctl6
+ test_8021qvs_maxgroups_cfg4
+ test_8021qvs_maxgroups_ctl4
+ test_8021qvs_maxgroups_cfg6
+ test_8021qvs_maxgroups_ctl6
+ test_8021qvs_toggle_vlan_snooping
+
+ switch_destroy
+}
+
+if ! bridge link help 2>&1 | grep -q "mcast_max_groups"; then
+ echo "SKIP: iproute2 too old, missing bridge \"mcast_max_groups\" support"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
new file mode 100755
index 000000000000..1a0480e71d83
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mdb_port_down.sh
@@ -0,0 +1,118 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that permanent mdb entries can be added to and deleted from bridge
+# interfaces that are down, and works correctly when done so.
+
+ALL_TESTS="add_del_to_port_down"
+NUM_NETIFS=4
+
+TEST_GROUP="239.10.10.10"
+TEST_GROUP_MAC="01:00:5e:0a:0a:0a"
+
+source lib.sh
+
+
+add_del_to_port_down() {
+ RET=0
+
+ ip link set dev $swp2 down
+ bridge mdb add dev br0 port "$swp2" grp $TEST_GROUP permanent 2>/dev/null
+ check_err $? "Failed adding mdb entry"
+
+ ip link set dev $swp2 up
+ setup_wait_dev $swp2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+ check_fail $? "Traffic to $TEST_GROUP wasn't forwarded"
+
+ ip link set dev $swp2 down
+ bridge mdb show dev br0 | grep -q "$TEST_GROUP permanent" 2>/dev/null
+ check_err $? "MDB entry did not persist after link up/down"
+
+ bridge mdb del dev br0 port "$swp2" grp $TEST_GROUP 2>/dev/null
+ check_err $? "Failed deleting mdb entry"
+
+ ip link set dev $swp2 up
+ setup_wait_dev $swp2
+ mcast_packet_test $TEST_GROUP_MAC 192.0.2.1 $TEST_GROUP $h1 $h2
+ check_err $? "Traffic to $TEST_GROUP was forwarded after entry removed"
+
+ log_test "MDB add/del entry to port with state down "
+}
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ # Enable multicast filtering
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+
+ bridge link set dev $swp2 mcast_flood off
+ # Bridge currently has a "grace time" at creation time before it
+ # forwards multicast according to the mdb. Since we disable the
+ # mcast_flood setting per port
+ sleep 10
+}
+
+switch_destroy()
+{
+ ip link set dev $swp1 down
+ ip link set dev $swp2 down
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h1_destroy
+ h2_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+tests_run
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
new file mode 100755
index 000000000000..e2b9ff773c6b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -0,0 +1,564 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
+ mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
+ mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
+ mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="ff02::cc"
+TEST_GROUP_MAC="33:33:00:00:00:cc"
+
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::1,2001:db8:1::2,2001:db8:1::3
+MZPKT_IS_INC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:\
+00:05:02:00:00:00:00:8f:00:8e:d9:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:\
+00:00:00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:\
+00:00:00:00:00:00:02:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_IS_INC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:\
+00:00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:\
+05:02:00:00:00:00:8f:00:8e:ac:00:00:00:01:01:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:\
+00:00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:\
+00:00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 is_in report: grp ff02::cc is_include 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_INC3="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bc:5a:00:00:00:01:01:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::10,2001:db8:1::11,2001:db8:1::12
+MZPKT_ALLOW="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:\
+00:00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:\
+02:00:00:00:00:8f:00:8a:ac:00:00:00:01:05:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:\
+00:cc:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:10:20:01:0d:b8:00:01:00:00:00:00:00:00:00:\
+00:00:11:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:12"
+# MLDv2 allow report: grp ff02::cc allow 2001:db8:1::20,2001:db8:1::30
+MZPKT_ALLOW2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:b8:5a:00:00:00:01:05:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::1,2001:db8:1::2,2001:db8:1::20,2001:db8:1::21
+MZPKT_IS_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:64:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:5f:d0:00:00:00:01:02:00:00:04:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:21"
+# MLDv2 is_ex report: grp ff02::cc is_exclude 2001:db8:1::20,2001:db8:1::30
+MZPKT_IS_EXC2="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:44:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:bb:5a:00:00:00:01:02:00:00:02:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 to_ex report: grp ff02::cc to_exclude 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_TO_EXC="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:\
+00:00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:\
+00:00:00:8f:00:8b:8e:00:00:00:01:04:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:\
+01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+# MLDv2 block report: grp ff02::cc block 2001:db8:1::1,2001:db8:1::20,2001:db8:1::30
+MZPKT_BLOCK="33:33:00:00:00:01:fe:54:00:04:5e:ba:86:dd:60:0a:2d:ae:00:54:00:01:fe:80:00:00:00:00:\
+00:00:fc:54:00:ff:fe:04:5e:ba:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:01:3a:00:05:02:00:00:\
+00:00:8f:00:89:8e:00:00:00:01:06:00:00:03:ff:02:00:00:00:00:00:00:00:00:00:00:00:00:00:cc:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:01:20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:20:20:01:\
+0d:b8:00:01:00:00:00:00:00:00:00:00:00:30"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_query_response_interval 100 \
+ mcast_mld_version 2 mcast_startup_query_interval 300 \
+ mcast_querier 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ # make sure a query has been generated
+ sleep 5
+}
+
+switch_destroy()
+{
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+mldv2include_prepare()
+{
+ local host1_if=$1
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+ ip link set dev br0 type bridge mcast_mld_version 2
+ check_err $? "Could not change bridge MLD version to 2"
+
+ $MZ $host1_if $MZPKT_IS_INC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null)" &>/dev/null
+ check_err $? "Missing *,G entry with source list"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+}
+
+mldv2exclude_prepare()
+{
+ local host1_if=$1
+ local mac=$2
+ local group=$3
+ local pkt=$4
+ local X=("2001:db8:1::1" "2001:db8:1::2")
+ local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+ mldv2include_prepare $h1
+
+ $MZ $host1_if -c 1 $MZPKT_IS_EXC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::3\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::3 entry still exists"
+}
+
+mldv2cleanup()
+{
+ local port=$1
+
+ bridge mdb del dev br0 port $port grp $TEST_GROUP
+ ip link set dev br0 type bridge mcast_mld_version 1
+}
+
+mldv2include_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::3")
+
+ mldv2include_prepare $h1
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_allow_test()
+{
+ RET=0
+ local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_ALLOW -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP include -> allow"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_is_include_test()
+{
+ RET=0
+ local X=("2001:db8:1::10" "2001:db8:1::11" "2001:db8:1::12")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_INC2 -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 "2001:db8:1::100"
+
+ log_test "MLDv2 report $TEST_GROUP include -> is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_is_exclude_test()
+{
+ RET=0
+
+ mldv2exclude_prepare $h1
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP include -> is_exclude"
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_to_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::1")
+ local Y=("2001:db8:1::20" "2001:db8:1::30")
+
+ mldv2include_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_TO_EXC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"exclude\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::21\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::21 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP include -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_allow_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+ local Y=("2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+ sleep 1
+ brmcast_check_sg_entries "allow" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> allow"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_is_include_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::20" "2001:db8:1::30")
+ local Y=("2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_INC3 -q
+ sleep 1
+ brmcast_check_sg_entries "is_include" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> is_include"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_is_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::30")
+ local Y=("2001:db8:1::20")
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_IS_EXC2 -q
+ sleep 1
+ brmcast_check_sg_entries "is_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> is_exclude"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_to_exclude_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::30")
+ local Y=("2001:db8:1::20")
+
+ mldv2exclude_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_TO_EXC -q
+ sleep 1
+ brmcast_check_sg_entries "to_exclude" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> to_exclude"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2inc_block_test()
+{
+ RET=0
+ local X=("2001:db8:1::2" "2001:db8:1::3")
+
+ mldv2include_prepare $h1
+
+ $MZ $h1 -c 1 $MZPKT_BLOCK -q
+ # make sure the lowered timers have expired (by default 2 seconds)
+ sleep 3
+ brmcast_check_sg_entries "block" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 2001:db8:1::100
+
+ log_test "MLDv2 report $TEST_GROUP include -> block"
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_block_test()
+{
+ RET=0
+ local X=("2001:db8:1::1" "2001:db8:1::2" "2001:db8:1::30")
+ local Y=("2001:db8:1::20" "2001:db8:1::21")
+
+ mldv2exclude_prepare $h1
+
+ ip link set dev br0 type bridge mcast_last_member_interval 500
+ check_err $? "Could not change mcast_last_member_interval to 5s"
+
+ $MZ $h1 -c 1 $MZPKT_BLOCK -q
+ sleep 1
+ brmcast_check_sg_entries "block" "${X[@]}" "${Y[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+ brmcast_check_sg_state 1 "${Y[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}" 2001:db8:1::100
+ brmcast_check_sg_fwding 0 "${Y[@]}"
+
+ log_test "MLDv2 report $TEST_GROUP exclude -> block"
+
+ ip link set dev br0 type bridge mcast_last_member_interval 100
+
+ mldv2cleanup $swp1
+}
+
+mldv2exc_timeout_test()
+{
+ RET=0
+ local X=("2001:db8:1::20" "2001:db8:1::30")
+
+ # GMI should be 3 seconds
+ ip link set dev br0 type bridge mcast_query_interval 100 \
+ mcast_query_response_interval 100 \
+ mcast_membership_interval 300
+
+ mldv2exclude_prepare $h1
+ ip link set dev br0 type bridge mcast_query_interval 500 \
+ mcast_query_response_interval 500 \
+ mcast_membership_interval 1500
+
+ $MZ $h1 -c 1 $MZPKT_ALLOW2 -q
+ sleep 3
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and .filter_mode == \"include\")" &>/dev/null
+ check_err $? "Wrong *,G entry filter mode"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::1\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::1 entry still exists"
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and \
+ .source_list != null and
+ .source_list[].address == \"2001:db8:1::2\")" &>/dev/null
+ check_fail $? "Wrong *,G entry source list, 2001:db8:1::2 entry still exists"
+
+ brmcast_check_sg_entries "allow" "${X[@]}"
+
+ brmcast_check_sg_state 0 "${X[@]}"
+
+ brmcast_check_sg_fwding 1 "${X[@]}"
+ brmcast_check_sg_fwding 0 2001:db8:1::100
+
+ log_test "MLDv2 group $TEST_GROUP exclude timeout"
+
+ ip link set dev br0 type bridge mcast_query_interval 12500 \
+ mcast_query_response_interval 1000 \
+ mcast_membership_interval 26000
+
+ mldv2cleanup $swp1
+}
+
+mldv2star_ex_auto_add_test()
+{
+ RET=0
+
+ mldv2exclude_prepare $h1
+
+ $MZ $h2 -c 1 $MZPKT_IS_INC -q
+ sleep 1
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+ .port == \"$swp1\")" &>/dev/null
+ check_err $? "S,G entry for *,G port doesn't exist"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"2001:db8:1::3\" and \
+ .port == \"$swp1\" and \
+ .flags[] == \"added_by_star_ex\")" &>/dev/null
+ check_err $? "Auto-added S,G entry doesn't have added_by_star_ex flag"
+
+ brmcast_check_sg_fwding 1 2001:db8:1::3
+
+ log_test "MLDv2 S,G port entry automatic add to a *,G port"
+
+ mldv2cleanup $swp1
+ mldv2cleanup $swp2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index b90dff8d3a94..64bd00fe9a4f 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -28,8 +28,9 @@ h2_destroy()
switch_create()
{
- # 10 Seconds ageing time.
- ip link add dev br0 type bridge vlan_filtering 1 ageing_time 1000 \
+ ip link add dev br0 type bridge \
+ vlan_filtering 1 \
+ ageing_time $LOW_AGEING_TIME \
mcast_snooping 0
ip link set dev $swp1 master br0
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
new file mode 100755
index 000000000000..72dfbeaf56b9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_mcast.sh
@@ -0,0 +1,546 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="vlmc_control_test vlmc_querier_test vlmc_igmp_mld_version_test \
+ vlmc_last_member_test vlmc_startup_query_test vlmc_membership_test \
+ vlmc_querier_intvl_test vlmc_query_intvl_test vlmc_query_response_intvl_test \
+ vlmc_router_port_test vlmc_filtering_test"
+NUM_NETIFS=4
+CHECK_TC="yes"
+TEST_GROUP="239.10.10.10"
+
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+ ip link add l $h1 $h1.10 up type vlan id 10
+}
+
+h1_destroy()
+{
+ ip link del $h1.10
+ simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8:1::2/64
+ ip link add l $h2 $h2.10 up type vlan id 10
+}
+
+h2_destroy()
+{
+ ip link del $h2.10
+ simple_if_fini $h2 192.0.2.2/24 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add dev br0 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1
+
+ ip link set dev $swp1 master br0
+ ip link set dev $swp2 master br0
+
+ ip link set dev br0 up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+
+ bridge vlan add vid 10-11 dev $swp1 master
+ bridge vlan add vid 10-11 dev $swp2 master
+
+ ip link set dev br0 type bridge mcast_vlan_snooping 1
+ check_err $? "Could not enable global vlan multicast snooping"
+ log_test "Vlan multicast snooping enable"
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+
+ ip link del dev br0
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+vlmc_v2join_test()
+{
+ local expect=$1
+
+ RET=0
+ ip address add dev $h2.10 $TEST_GROUP/32 autojoin
+ check_err $? "Could not join $TEST_GROUP"
+
+ sleep 5
+ bridge -j mdb show dev br0 |
+ jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and .vid == 10)" &>/dev/null
+ if [ $expect -eq 0 ]; then
+ check_err $? "IGMPv2 report didn't create mdb entry for $TEST_GROUP"
+ else
+ check_fail $? "IGMPv2 report shouldn't have created mdb entry for $TEST_GROUP"
+ fi
+
+ # check if we need to cleanup
+ if [ $RET -eq 0 ]; then
+ ip address del dev $h2.10 $TEST_GROUP/32 2>&1 1>/dev/null
+ sleep 5
+ bridge -j mdb show dev br0 |
+ jq -e ".[].mdb[] | select(.grp == \"$TEST_GROUP\" and \
+ .vid == 10)" &>/dev/null
+ check_fail $? "IGMPv2 leave didn't remove mdb entry for $TEST_GROUP"
+ fi
+}
+
+vlmc_control_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+ log_test "Vlan global options existence"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_snooping == 1) " &>/dev/null
+ check_err $? "Wrong default mcast_snooping global option value"
+ log_test "Vlan mcast_snooping global option default value"
+
+ RET=0
+ vlmc_v2join_test 0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 0
+ check_err $? "Could not disable multicast snooping in vlan 10"
+ vlmc_v2join_test 1
+ log_test "Vlan 10 multicast snooping control"
+}
+
+# setup for general query counting
+vlmc_query_cnt_xstats()
+{
+ local type=$1
+ local version=$2
+ local dev=$3
+
+ ip -j link xstats type bridge_slave dev $dev | \
+ jq -e ".[].multicast.${type}_queries.tx_v${version}"
+}
+
+vlmc_query_cnt_setup()
+{
+ local type=$1
+ local dev=$2
+
+ if [[ $type == "igmp" ]]; then
+ tc filter add dev $dev egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 224.0.0.1 ip_proto 2 \
+ action pass
+ else
+ tc filter add dev $dev egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv6 dst_ip ff02::1 ip_proto icmpv6 \
+ action pass
+ fi
+
+ ip link set dev br0 type bridge mcast_stats_enabled 1
+}
+
+vlmc_query_cnt_cleanup()
+{
+ local dev=$1
+
+ ip link set dev br0 type bridge mcast_stats_enabled 0
+ tc filter del dev $dev egress pref 10
+}
+
+vlmc_check_query()
+{
+ local type=$1
+ local version=$2
+ local dev=$3
+ local expect=$4
+ local time=$5
+ local ret=0
+
+ vlmc_query_cnt_setup $type $dev
+
+ local pre_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+ ret=$?
+ if [[ $ret -eq 0 ]]; then
+ sleep $time
+
+ local tcstats=$(tc_rule_stats_get $dev 10 egress)
+ local post_tx_xstats=$(vlmc_query_cnt_xstats $type $version $dev)
+
+ if [[ $tcstats != $expect || \
+ $(($post_tx_xstats-$pre_tx_xstats)) != $expect || \
+ $tcstats != $(($post_tx_xstats-$pre_tx_xstats)) ]]; then
+ ret=1
+ fi
+ fi
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+ vlmc_query_cnt_cleanup $dev
+
+ return $ret
+}
+
+vlmc_querier_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_querier == 0) " &>/dev/null
+ check_err $? "Wrong default mcast_querier global vlan option value"
+ log_test "Vlan mcast_querier global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1
+ check_err $? "Could not enable querier in vlan 10"
+ log_test "Vlan 10 multicast querier enable"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 0
+
+ RET=0
+ vlmc_check_query igmp 2 $swp1 1 1
+ check_err $? "No vlan tagged IGMPv2 general query packets sent"
+ log_test "Vlan 10 tagged IGMPv2 general query sent"
+
+ RET=0
+ vlmc_check_query mld 1 $swp1 1 1
+ check_err $? "No vlan tagged MLD general query packets sent"
+ log_test "Vlan 10 tagged MLD general query sent"
+}
+
+vlmc_igmp_mld_version_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_igmp_version == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_igmp_version global vlan option value"
+ log_test "Vlan mcast_igmp_version global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and .mcast_mld_version == 1) " &>/dev/null
+ check_err $? "Wrong default mcast_mld_version global vlan option value"
+ log_test "Vlan mcast_mld_version global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 3
+ check_err $? "Could not set mcast_igmp_version in vlan 10"
+ log_test "Vlan 10 mcast_igmp_version option changed to 3"
+
+ RET=0
+ vlmc_check_query igmp 3 $swp1 1 1
+ check_err $? "No vlan tagged IGMPv3 general query packets sent"
+ log_test "Vlan 10 tagged IGMPv3 general query sent"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 2
+ check_err $? "Could not set mcast_mld_version in vlan 10"
+ log_test "Vlan 10 mcast_mld_version option changed to 2"
+
+ RET=0
+ vlmc_check_query mld 2 $swp1 1 1
+ check_err $? "No vlan tagged MLDv2 general query packets sent"
+ log_test "Vlan 10 tagged MLDv2 general query sent"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_igmp_version 2
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_mld_version 1
+}
+
+vlmc_last_member_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_last_member_count == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_last_member_count global vlan option value"
+ log_test "Vlan mcast_last_member_count global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_last_member_interval == 100) " &>/dev/null
+ check_err $? "Wrong default mcast_last_member_interval global vlan option value"
+ log_test "Vlan mcast_last_member_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 3
+ check_err $? "Could not set mcast_last_member_count in vlan 10"
+ log_test "Vlan 10 mcast_last_member_count option changed to 3"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_count 2
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 200
+ check_err $? "Could not set mcast_last_member_interval in vlan 10"
+ log_test "Vlan 10 mcast_last_member_interval option changed to 200"
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_last_member_interval 100
+}
+
+vlmc_startup_query_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_startup_query_interval == 3125) " &>/dev/null
+ check_err $? "Wrong default mcast_startup_query_interval global vlan option value"
+ log_test "Vlan mcast_startup_query_interval global option default value"
+
+ RET=0
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_startup_query_count == 2) " &>/dev/null
+ check_err $? "Wrong default mcast_startup_query_count global vlan option value"
+ log_test "Vlan mcast_startup_query_count global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 100
+ check_err $? "Could not set mcast_startup_query_interval in vlan 10"
+ vlmc_check_query igmp 2 $swp1 2 3
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_startup_query_interval option changed to 100"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 3
+ check_err $? "Could not set mcast_startup_query_count in vlan 10"
+ vlmc_check_query igmp 2 $swp1 3 4
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_startup_query_count option changed to 3"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_interval 3125
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+}
+
+vlmc_membership_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_membership_interval == 26000) " &>/dev/null
+ check_err $? "Wrong default mcast_membership_interval global vlan option value"
+ log_test "Vlan mcast_membership_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 200
+ check_err $? "Could not set mcast_membership_interval in vlan 10"
+ log_test "Vlan 10 mcast_membership_interval option changed to 200"
+
+ RET=0
+ vlmc_v2join_test 1
+ log_test "Vlan 10 mcast_membership_interval mdb entry expire"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_membership_interval 26000
+}
+
+vlmc_querier_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_querier_interval == 25500) " &>/dev/null
+ check_err $? "Wrong default mcast_querier_interval global vlan option value"
+ log_test "Vlan mcast_querier_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 100
+ check_err $? "Could not set mcast_querier_interval in vlan 10"
+ log_test "Vlan 10 mcast_querier_interval option changed to 100"
+
+ RET=0
+ ip link add dev br1 type bridge mcast_snooping 1 mcast_querier 1 vlan_filtering 1 \
+ mcast_vlan_snooping 1
+ bridge vlan add vid 10 dev br1 self pvid untagged
+ ip link set dev $h1 master br1
+ ip link set dev br1 up
+ bridge vlan add vid 10 dev $h1 master
+ bridge vlan global set vid 10 dev br1 mcast_snooping 1 mcast_querier 1
+ sleep 2
+ ip link del dev br1
+ ip addr replace 2001:db8:1::1/64 dev $h1
+ vlmc_check_query igmp 2 $swp1 1 1
+ check_err $? "Wrong number of IGMPv2 general queries after querier interval"
+ log_test "Vlan 10 mcast_querier_interval expire after outside query"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier_interval 25500
+}
+
+vlmc_query_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_query_interval == 12500) " &>/dev/null
+ check_err $? "Wrong default mcast_query_interval global vlan option value"
+ log_test "Vlan mcast_query_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 200
+ check_err $? "Could not set mcast_query_interval in vlan 10"
+ # 1 is sent immediately, then 2 more in the next 5 seconds
+ vlmc_check_query igmp 2 $swp1 3 5
+ check_err $? "Wrong number of tagged IGMPv2 general queries sent"
+ log_test "Vlan 10 mcast_query_interval option changed to 200"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_startup_query_count 2
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_interval 12500
+}
+
+vlmc_query_response_intvl_test()
+{
+ RET=0
+ local goutput=`bridge -j vlan global show`
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10)" &>/dev/null
+ check_err $? "Could not find vlan 10's global options"
+
+ echo -n $goutput |
+ jq -e ".[].vlans[] | select(.vlan == 10 and \
+ .mcast_query_response_interval == 1000) " &>/dev/null
+ check_err $? "Wrong default mcast_query_response_interval global vlan option value"
+ log_test "Vlan mcast_query_response_interval global option default value"
+
+ RET=0
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 200
+ check_err $? "Could not set mcast_query_response_interval in vlan 10"
+ log_test "Vlan 10 mcast_query_response_interval option changed to 200"
+
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+}
+
+vlmc_router_port_test()
+{
+ RET=0
+ local goutput=`bridge -j -d vlan show`
+ echo -n $goutput |
+ jq -e ".[] | select(.ifname == \"$swp1\" and \
+ .vlans[].vlan == 10)" &>/dev/null
+ check_err $? "Could not find port vlan 10's options"
+
+ echo -n $goutput |
+ jq -e ".[] | select(.ifname == \"$swp1\" and \
+ .vlans[].vlan == 10 and \
+ .vlans[].mcast_router == 1)" &>/dev/null
+ check_err $? "Wrong default port mcast_router option value"
+ log_test "Port vlan 10 option mcast_router default value"
+
+ RET=0
+ bridge vlan set vid 10 dev $swp1 mcast_router 2
+ check_err $? "Could not set port vlan 10's mcast_router option"
+ log_test "Port vlan 10 mcast_router option changed to 2"
+
+ RET=0
+ tc filter add dev $swp1 egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+ tc filter add dev $swp2 egress pref 10 prot 802.1Q \
+ flower vlan_id 10 vlan_ethtype ipv4 dst_ip 239.1.1.1 ip_proto udp action pass
+ bridge vlan set vid 10 dev $swp2 mcast_router 0
+ # we need to enable querier and disable query response interval to
+ # make sure packets are flooded only to router ports
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_querier 1 \
+ mcast_query_response_interval 0
+ bridge vlan add vid 10 dev br0 self
+ sleep 1
+ mausezahn br0 -Q 10 -c 10 -p 128 -b 01:00:5e:01:01:01 -B 239.1.1.1 \
+ -t udp "dp=1024" &>/dev/null
+ local swp1_tcstats=$(tc_rule_stats_get $swp1 10 egress)
+ if [[ $swp1_tcstats != 10 ]]; then
+ check_err 1 "Wrong number of vlan 10 multicast packets flooded"
+ fi
+ local swp2_tcstats=$(tc_rule_stats_get $swp2 10 egress)
+ check_err $swp2_tcstats "Vlan 10 multicast packets flooded to non-router port"
+ log_test "Flood unknown vlan multicast packets to router port only"
+
+ tc filter del dev $swp2 egress pref 10
+ tc filter del dev $swp1 egress pref 10
+ bridge vlan del vid 10 dev br0 self
+ bridge vlan global set vid 10 dev br0 mcast_snooping 1 mcast_query_response_interval 1000
+ bridge vlan set vid 10 dev $swp2 mcast_router 1
+ bridge vlan set vid 10 dev $swp1 mcast_router 1
+}
+
+vlmc_filtering_test()
+{
+ RET=0
+ ip link set dev br0 type bridge vlan_filtering 0
+ ip -j -d link show dev br0 | \
+ jq -e "select(.[0].linkinfo.info_data.mcast_vlan_snooping == 1)" &>/dev/null
+ check_fail $? "Vlan filtering is disabled but multicast vlan snooping is still enabled"
+ log_test "Disable multicast vlan snooping when vlan filtering is disabled"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index c15c6c85c984..1c8a26046589 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -27,8 +27,9 @@ h2_destroy()
switch_create()
{
- # 10 Seconds ageing time.
- ip link add dev br0 type bridge ageing_time 1000 mcast_snooping 0
+ ip link add dev br0 type bridge \
+ ageing_time $LOW_AGEING_TIME \
+ mcast_snooping 0
ip link set dev $swp1 master br0
ip link set dev $swp2 master br0
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index da96eff72a8e..8d7a1a004b7c 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -6,9 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
CONFIG_BPF_SYSCALL=y
CONFIG_CGROUP_BPF=y
+CONFIG_DUMMY=m
+CONFIG_IPV6=y
+CONFIG_IPV6_GRE=m
+CONFIG_IPV6_MROUTE=y
+CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IPV6_PIMSM_V2=y
+CONFIG_IP_MROUTE=y
+CONFIG_IP_MROUTE_MULTIPLE_TABLES=y
+CONFIG_IP_PIMSM_V1=y
+CONFIG_IP_PIMSM_V2=y
+CONFIG_MACVLAN=m
+CONFIG_NET_ACT_CT=m
+CONFIG_NET_ACT_MIRRED=m
+CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_ACT_POLICE=m
+CONFIG_NET_ACT_SAMPLE=m
+CONFIG_NET_ACT_SKBEDIT=m
+CONFIG_NET_ACT_TUNNEL_KEY=m
+CONFIG_NET_ACT_VLAN=m
CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_EMATCH=y
+CONFIG_NET_EMATCH_META=m
+CONFIG_NET_IPGRE=m
+CONFIG_NET_IPGRE_DEMUX=m
+CONFIG_NET_IPIP=m
+CONFIG_NET_SCH_ETS=m
CONFIG_NET_SCH_INGRESS=m
CONFIG_NET_ACT_GACT=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_TC_SKB_EXT=y
+CONFIG_NET_TEAM=y
+CONFIG_NET_TEAM_MODE_LOADBALANCE=y
+CONFIG_NETFILTER=y
+CONFIG_NF_CONNTRACK=m
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
CONFIG_VETH=m
CONFIG_NAMESPACES=y
CONFIG_NET_NS=y
+CONFIG_VXLAN=m
+CONFIG_XFRM_USER=m
diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
new file mode 100755
index 000000000000..1783c10215e5
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh
@@ -0,0 +1,372 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution between two paths when using custom hash policy.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------------+
+# | SW1 | |
+# | $rp1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | |
+# | $rp11 + + $rp12 |
+# | 192.0.2.1/28 | | 192.0.2.17/28 |
+# | 2001:db8:2::1/64 | | 2001:db8:3::1/64 |
+# +------------------|-------------|------------------+
+# | |
+# +------------------|-------------|------------------+
+# | SW2 | | |
+# | | | |
+# | $rp21 + + $rp22 |
+# | 192.0.2.2/28 192.0.2.18/28 |
+# | 2001:db8:2::2/64 2001:db8:3::2/64 |
+# | |
+# | |
+# | $rp2 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:4::1/64 | |
+# +-------------------------|-------------------------+
+# |
+# +-------------------------|------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:4::{2-fd}/64 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $rp1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $rp11 v$rp1 192.0.2.1/28 2001:db8:2::1/64
+ __simple_if_init $rp12 v$rp1 192.0.2.17/28 2001:db8:3::1/64
+
+ ip route add vrf v$rp1 203.0.113.0/24 \
+ nexthop via 192.0.2.2 dev $rp11 \
+ nexthop via 192.0.2.18 dev $rp12
+
+ ip -6 route add vrf v$rp1 2001:db8:4::/64 \
+ nexthop via 2001:db8:2::2 dev $rp11 \
+ nexthop via 2001:db8:3::2 dev $rp12
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$rp1 2001:db8:4::/64
+
+ ip route del vrf v$rp1 203.0.113.0/24
+
+ __simple_if_fini $rp12 192.0.2.17/28 2001:db8:3::1/64
+ __simple_if_fini $rp11 192.0.2.1/28 2001:db8:2::1/64
+ simple_if_fini $rp1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $rp2 203.0.113.1/24 2001:db8:4::1/64
+ __simple_if_init $rp21 v$rp2 192.0.2.2/28 2001:db8:2::2/64
+ __simple_if_init $rp22 v$rp2 192.0.2.18/28 2001:db8:3::2/64
+
+ ip route add vrf v$rp2 198.51.100.0/24 \
+ nexthop via 192.0.2.1 dev $rp21 \
+ nexthop via 192.0.2.17 dev $rp22
+
+ ip -6 route add vrf v$rp2 2001:db8:1::/64 \
+ nexthop via 2001:db8:2::1 dev $rp21 \
+ nexthop via 2001:db8:3::1 dev $rp22
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$rp2 2001:db8:1::/64
+
+ ip route del vrf v$rp2 198.51.100.0/24
+
+ __simple_if_fini $rp22 192.0.2.18/28 2001:db8:3::2/64
+ __simple_if_fini $rp21 192.0.2.2/28 2001:db8:2::2/64
+ simple_if_fini $rp2 203.0.113.1/24 2001:db8:4::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:4::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:4::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:4::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ rp1=${NETIFS[p2]}
+
+ rp11=${NETIFS[p3]}
+ rp21=${NETIFS[p4]}
+
+ rp12=${NETIFS[p5]}
+ rp22=${NETIFS[p6]}
+
+ rp2=${NETIFS[p7]}
+
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:4::2
+}
+
+send_src_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:4::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:4::2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_rp11=$(link_stats_tx_packets_get $rp11)
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+
+ $send_flows
+
+ local t1_rp11=$(link_stats_tx_packets_get $rp11)
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+
+ local d_rp11=$((t1_rp11 - t0_rp11))
+ local d_rp12=$((t1_rp12 - t0_rp12))
+
+ local diff=$((d_rp12 - d_rp11))
+ local sum=$((d_rp11 + d_rp12))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d_rp11 / $d_rp12"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 custom multipath hash tests"
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv4
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp4
+ custom_hash_test "Source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp4
+ custom_hash_test "Destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 custom multipath hash tests"
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0001
+ custom_hash_test "Source IP" "balanced" send_src_ipv6
+ custom_hash_test "Source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0002
+ custom_hash_test "Destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0008
+ custom_hash_test "Flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0010
+ custom_hash_test "Source port" "balanced" send_src_udp6
+ custom_hash_test "Source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0020
+ custom_hash_test "Destination port" "balanced" send_dst_udp6
+ custom_hash_test "Destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+ custom_hash_v4
+ custom_hash_v6
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index f0e6be4c09e9..f1de525cfa55 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -1,23 +1,32 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
##############################################################################
# Defines
if [[ ! -v DEVLINK_DEV ]]; then
- DEVLINK_DEV=$(devlink port show "${NETIFS[p1]}" -j \
+ DEVLINK_DEV=$(devlink port show "${NETIFS[p1]:-$NETIF_NO_CABLE}" -j \
| jq -r '.port | keys[]' | cut -d/ -f-2)
if [ -z "$DEVLINK_DEV" ]; then
echo "SKIP: ${NETIFS[p1]} has no devlink device registered for it"
- exit 1
+ exit $ksft_skip
fi
if [[ "$(echo $DEVLINK_DEV | grep -c pci)" -eq 0 ]]; then
echo "SKIP: devlink device's bus is not PCI"
- exit 1
+ exit $ksft_skip
fi
DEVLINK_VIDDID=$(lspci -s $(echo $DEVLINK_DEV | cut -d"/" -f2) \
-n | cut -d" " -f3)
+elif [[ ! -z "$DEVLINK_DEV" ]]; then
+ devlink dev show $DEVLINK_DEV &> /dev/null
+ if [ $? -ne 0 ]; then
+ echo "SKIP: devlink device \"$DEVLINK_DEV\" not found"
+ exit $ksft_skip
+ fi
fi
##############################################################################
@@ -26,19 +35,19 @@ fi
devlink help 2>&1 | grep resource &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink resource support"
- exit 1
+ exit $ksft_skip
fi
devlink help 2>&1 | grep trap &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink trap support"
- exit 1
+ exit $ksft_skip
fi
devlink dev help 2>&1 | grep info &> /dev/null
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 too old, missing devlink dev info support"
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -98,6 +107,11 @@ devlink_resource_size_set()
check_err $? "Failed setting path $path to size $size"
}
+devlink_resource_occ_get()
+{
+ devlink_resource_get "$@" | jq '.["occ"]'
+}
+
devlink_reload()
{
local still_pending
@@ -112,6 +126,12 @@ devlink_reload()
declare -A DEVLINK_ORIG
+# Changing pool type from static to dynamic causes reinterpretation of threshold
+# values. They therefore need to be saved before pool type is changed, then the
+# pool type can be changed, and then the new values need to be set up. Therefore
+# instead of saving the current state implicitly in the _set call, provide
+# functions for all three primitives: save, set, and restore.
+
devlink_port_pool_threshold()
{
local port=$1; shift
@@ -121,14 +141,21 @@ devlink_port_pool_threshold()
| jq '.port_pool."'"$port"'"[].threshold'
}
-devlink_port_pool_th_set()
+devlink_port_pool_th_save()
{
local port=$1; shift
local pool=$1; shift
- local th=$1; shift
local key="port_pool($port,$pool).threshold"
DEVLINK_ORIG[$key]=$(devlink_port_pool_threshold $port $pool)
+}
+
+devlink_port_pool_th_set()
+{
+ local port=$1; shift
+ local pool=$1; shift
+ local th=$1; shift
+
devlink sb port pool set $port pool $pool th $th
}
@@ -137,8 +164,13 @@ devlink_port_pool_th_restore()
local port=$1; shift
local pool=$1; shift
local key="port_pool($port,$pool).threshold"
+ local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb port pool set $port pool $pool th ${DEVLINK_ORIG[$key]}
+ if [[ -z $orig ]]; then
+ echo "WARNING: Mismatched devlink_port_pool_th_restore"
+ else
+ devlink sb port pool set $port pool $pool th $orig
+ fi
}
devlink_pool_size_thtype()
@@ -149,14 +181,20 @@ devlink_pool_size_thtype()
| jq -r '.pool[][] | (.size, .thtype)'
}
+devlink_pool_size_thtype_save()
+{
+ local pool=$1; shift
+ local key="pool($pool).size_thtype"
+
+ DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
+}
+
devlink_pool_size_thtype_set()
{
local pool=$1; shift
local thtype=$1; shift
local size=$1; shift
- local key="pool($pool).size_thtype"
- DEVLINK_ORIG[$key]=$(devlink_pool_size_thtype $pool)
devlink sb pool set "$DEVLINK_DEV" pool $pool size $size thtype $thtype
}
@@ -166,8 +204,12 @@ devlink_pool_size_thtype_restore()
local key="pool($pool).size_thtype"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb pool set "$DEVLINK_DEV" pool $pool \
- size ${orig[0]} thtype ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_pool_size_thtype_restore"
+ else
+ devlink sb pool set "$DEVLINK_DEV" pool $pool \
+ size ${orig[0]} thtype ${orig[1]}
+ fi
}
devlink_tc_bind_pool_th()
@@ -180,6 +222,16 @@ devlink_tc_bind_pool_th()
| jq -r '.tc_bind[][] | (.pool, .threshold)'
}
+devlink_tc_bind_pool_th_save()
+{
+ local port=$1; shift
+ local tc=$1; shift
+ local dir=$1; shift
+ local key="tc_bind($port,$dir,$tc).pool_th"
+
+ DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
+}
+
devlink_tc_bind_pool_th_set()
{
local port=$1; shift
@@ -187,9 +239,7 @@ devlink_tc_bind_pool_th_set()
local dir=$1; shift
local pool=$1; shift
local th=$1; shift
- local key="tc_bind($port,$dir,$tc).pool_th"
- DEVLINK_ORIG[$key]=$(devlink_tc_bind_pool_th $port $tc $dir)
devlink sb tc bind set $port tc $tc type $dir pool $pool th $th
}
@@ -201,8 +251,12 @@ devlink_tc_bind_pool_th_restore()
local key="tc_bind($port,$dir,$tc).pool_th"
local -a orig=(${DEVLINK_ORIG[$key]})
- devlink sb tc bind set $port tc $tc type $dir \
- pool ${orig[0]} th ${orig[1]}
+ if [[ -z ${orig[0]} ]]; then
+ echo "WARNING: Mismatched devlink_tc_bind_pool_th_restore"
+ else
+ devlink sb tc bind set $port tc $tc type $dir \
+ pool ${orig[0]} th ${orig[1]}
+ fi
}
devlink_traps_num_get()
@@ -273,6 +327,14 @@ devlink_trap_rx_bytes_get()
| jq '.[][][]["stats"]["rx"]["bytes"]'
}
+devlink_trap_drop_packets_get()
+{
+ local trap_name=$1; shift
+
+ devlink -js trap show $DEVLINK_DEV trap $trap_name \
+ | jq '.[][][]["stats"]["rx"]["dropped"]'
+}
+
devlink_trap_stats_idle_test()
{
local trap_name=$1; shift
@@ -294,6 +356,24 @@ devlink_trap_stats_idle_test()
fi
}
+devlink_trap_drop_stats_idle_test()
+{
+ local trap_name=$1; shift
+ local t0_packets t0_bytes
+
+ t0_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ sleep 1
+
+ t1_packets=$(devlink_trap_drop_packets_get $trap_name)
+
+ if [[ $t0_packets -eq $t1_packets ]]; then
+ return 0
+ else
+ return 1
+ fi
+}
+
devlink_traps_enable_all()
{
local trap_name
@@ -423,25 +503,30 @@ devlink_trap_drop_cleanup()
tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
}
-devlink_trap_stats_test()
+devlink_trap_stats_check()
{
- local test_name=$1; shift
local trap_name=$1; shift
local send_one="$@"
local t0_packets
local t1_packets
- RET=0
-
t0_packets=$(devlink_trap_rx_packets_get $trap_name)
$send_one && sleep 1
t1_packets=$(devlink_trap_rx_packets_get $trap_name)
- if [[ $t1_packets -eq $t0_packets ]]; then
- check_err 1 "Trap stats did not increase"
- fi
+ [[ $t1_packets -ne $t0_packets ]]
+}
+
+devlink_trap_stats_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ devlink_trap_stats_check "$@"
+ check_err $? "Trap stats did not increase"
log_test "$test_name"
}
@@ -483,12 +568,6 @@ devlink_trap_group_policer_get()
| jq '.[][][]["policer"]'
}
-devlink_trap_policer_ids_get()
-{
- devlink -j -p trap policer show \
- | jq '.[]["'$DEVLINK_DEV'"][]["policer"]'
-}
-
devlink_port_by_netdev()
{
local if_name=$1
@@ -504,3 +583,14 @@ devlink_cpu_port_get()
echo "$DEVLINK_DEV/$cpu_dl_port_num"
}
+
+devlink_cell_size_get()
+{
+ devlink sb pool show "$DEVLINK_DEV" pool 0 -j \
+ | jq '.pool[][].cell_size'
+}
+
+devlink_pool_size_get()
+{
+ devlink sb show "$DEVLINK_DEV" -j | jq '.[][][]["size"]'
+}
diff --git a/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
new file mode 100755
index 000000000000..68ee92df3e07
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/dual_vxlan_bridge.sh
@@ -0,0 +1,367 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + h1.10 | | + h2.20 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | + $h1 | | + $h2 |
+# | | | | | |
+# +----|---------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|-------------------------------+ +----------------|------------------+ |
+# | | + $swp1 BR1 (802.1ad) | | BR2 (802.1d) + $swp2 | |
+# | | vid 100 pvid untagged | | | | |
+# | | | | + $swp2.20 | |
+# | | | | | |
+# | | + vx100 (vxlan) | | + vx200 (vxlan) | |
+# | | local 192.0.2.17 | | local 192.0.2.17 | |
+# | | remote 192.0.2.34 | | remote 192.0.2.50 | |
+# | | id 1000 dstport $VXPORT | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | |
+# | +--------------------------------- + +-----------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | 192.0.2.48/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +----|-----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# +----|---------------------------------------|----------------+
+# | |
+# +----|------------------------------+ +----|------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 192.0.2.34/28 | | 192.0.2.50/28 |
+# | | | |
+# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 |
+# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR3 (802.1ad) | | | | BR3 (802.1d) | |
+# | | + vx100 (vxlan) | | | | + vx200 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | |
+# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | |
+# | | id 1000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | | |
+# | | | | | | + w1.20 | |
+# | | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | | + w2.10 | | | | + w2.20 | |
+# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-----------------------------------+ +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 20 v$h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 192.0.2.17/28
+
+ ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+ ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+ ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+ ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+ #### BR1 ####
+ ip link add name br1 type bridge vlan_filtering 1 \
+ vlan_protocol 802.1ad vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ #### BR2 ####
+ ip link add name br2 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br2 address $(mac_get $swp2)
+ ip link set dev br2 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ #### VX100 ####
+ ip link add name vx100 type vxlan id 1000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ #### VX200 ####
+ ip link add name vx200 type vxlan id 2000 local 192.0.2.17 \
+ dstport "$VXPORT" nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx200 up
+
+ ip link set dev vx200 master br2
+
+ ip link set dev $swp2 up
+ ip link add name $swp2.20 link $swp2 type vlan id 20
+ ip link set dev $swp2.20 master br2
+ ip link set dev $swp2.20 up
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+ bridge fdb append dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx200 00:00:00:00:00:00 dst 192.0.2.50 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+ ip link set dev vx200 nomaster
+ ip link set dev vx200 down
+ ip link del dev vx200
+
+ ip link del dev $swp2.20
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br2 down
+ ip link del dev br2
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ __simple_if_init v1 v$rp2 192.0.2.33/28
+ __simple_if_init v3 v$rp2 192.0.2.49/28
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 192.0.2.49/28
+ __simple_if_fini v1 192.0.2.33/28
+ simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local vxlan_name=$1; shift
+ local vxlan_id=$1; shift
+ local vlan_id=$1; shift
+ local host_addr=$1; shift
+ local nh_addr=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/28
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br3 type bridge vlan_filtering 0
+ ip link set dev br3 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br3
+ ip link set dev w1 up
+
+ ip link add name $vxlan_name type vxlan id $vxlan_id local $in_addr \
+ dstport "$VXPORT"
+ ip link set dev $vxlan_name up
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst 192.0.2.17 self
+ bridge fdb append dev $vxlan_name 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev $vxlan_name master br3
+ tc qdisc add dev $vxlan_name clsact
+
+ simple_if_init w2
+ vlan_create w2 $vlan_id vw2 $host_addr/28
+
+ ip route add 192.0.2.16/28 nexthop via $nh_addr
+ ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 192.0.2.34 192.0.2.50 vx100 1000 10 192.0.2.3 \
+ 192.0.2.33
+
+ in_ns ns1 bridge vlan add vid 100 dev vx100 pvid untagged
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 192.0.2.50 192.0.2.34 vx200 2000 20 192.0.2.4 \
+ 192.0.2.49
+
+ in_ns ns2 ip link add name w1.20 link w1 type vlan id 20
+ in_ns ns2 ip link set dev w1.20 master br3
+ in_ns ns2 ip link set dev w1.20 up
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 ": local->remote 1 through VxLAN with an 802.1ad bridge"
+ ping_test $h2 192.0.2.4 ": local->remote 2 through VxLAN with an 802.1d bridge"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh
index eb8e2a23bbb4..aa2eafb7b243 100755
--- a/tools/testing/selftests/net/forwarding/ethtool.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool.sh
@@ -50,23 +50,6 @@ cleanup()
h1_destroy
}
-different_speeds_get()
-{
- local dev1=$1; shift
- local dev2=$1; shift
- local with_mode=$1; shift
- local adver=$1; shift
-
- local -a speeds_arr
-
- speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
- if [[ ${#speeds_arr[@]} < 2 ]]; then
- check_err 1 "cannot check different speeds. There are not enough speeds"
- fi
-
- echo ${speeds_arr[0]} ${speeds_arr[1]}
-}
-
same_speeds_autoneg_off()
{
# Check that when each of the reported speeds is forced, the links come
@@ -252,8 +235,6 @@ check_highest_speed_is_chosen()
fi
local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1))
- # Remove the first speed, h1 does not advertise this speed.
- unset speeds_arr[0]
max_speed=${speeds_arr[0]}
for current in ${speeds_arr[@]}; do
@@ -305,6 +286,8 @@ different_speeds_autoneg_on()
ethtool -s $h1 autoneg on
}
+skip_on_veth
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
new file mode 100755
index 000000000000..17f89c3b7c02
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ autoneg
+ autoneg_force_mode
+ no_cable
+"
+
+NUM_NETIFS=2
+source lib.sh
+source ethtool_lib.sh
+
+TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
+setup_prepare()
+{
+ swp1=${NETIFS[p1]}
+ swp2=${NETIFS[p2]}
+ swp3=$NETIF_NO_CABLE
+}
+
+ethtool_ext_state()
+{
+ local dev=$1; shift
+ local expected_ext_state=$1; shift
+ local expected_ext_substate=${1:-""}; shift
+
+ local ext_state=$(ethtool $dev | grep "Link detected" \
+ | cut -d "(" -f2 | cut -d ")" -f1)
+ local ext_substate=$(echo $ext_state | cut -sd "," -f2 \
+ | sed -e 's/^[[:space:]]*//')
+ ext_state=$(echo $ext_state | cut -d "," -f1)
+
+ if [[ $ext_state != $expected_ext_state ]]; then
+ echo "Expected \"$expected_ext_state\", got \"$ext_state\""
+ return 1
+ fi
+ if [[ $ext_substate != $expected_ext_substate ]]; then
+ echo "Expected \"$expected_ext_substate\", got \"$ext_substate\""
+ return 1
+ fi
+}
+
+autoneg()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp1 up
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected")
+ check_err $? "$msg"
+
+ log_test "Autoneg, No partner detected"
+
+ ip link set dev $swp1 down
+}
+
+autoneg_force_mode()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0))
+ local speed1=${speeds_arr[0]}
+ local speed2=${speeds_arr[1]}
+
+ ethtool_set $swp1 speed $speed1 autoneg off
+ ethtool_set $swp2 speed $speed2 autoneg off
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \
+ "Autoneg" "No partner detected during force mode")
+ check_err $? "$msg"
+
+ log_test "Autoneg, No partner detected during force mode"
+
+ ethtool -s $swp2 autoneg on
+ ethtool -s $swp1 autoneg on
+
+ ip link set dev $swp2 down
+ ip link set dev $swp1 down
+}
+
+no_cable()
+{
+ local msg
+
+ RET=0
+
+ ip link set dev $swp3 up
+
+ msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable")
+ check_err $? "$msg"
+
+ log_test "No cable"
+
+ ip link set dev $swp3 down
+}
+
+skip_on_veth
+
+setup_prepare
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
index 925d229a59d8..b9bfb45085af 100644
--- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh
+++ b/tools/testing/selftests/net/forwarding/ethtool_lib.sh
@@ -22,6 +22,40 @@ ethtool_set()
check_err $out "error in configuration. $cmd"
}
+dev_linkmodes_params_get()
+{
+ local dev=$1; shift
+ local adver=$1; shift
+ local -a linkmodes_params
+ local param_count
+ local arr
+
+ if (($adver)); then
+ mode="Advertised link modes"
+ else
+ mode="Supported link modes"
+ fi
+
+ local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver))
+ for ((i=0; i<${#dev_linkmodes[@]}; i++)); do
+ linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \
+ # Replaces all non numbers with spaces
+ sed -e 's/[^0-9]/ /g' | \
+ # Squeeze spaces in sequence to 1 space
+ tr -s ' ')
+ # Count how many numbers were found in the linkmode
+ param_count=$(echo "${linkmodes_params[$i]}" | wc -w)
+ if [[ $param_count -eq 1 ]]; then
+ linkmodes_params[$i]="${linkmodes_params[$i]} 1"
+ elif [[ $param_count -ge 3 ]]; then
+ arr=(${linkmodes_params[$i]})
+ # Take only first two params
+ linkmodes_params[$i]=$(echo "${arr[@]:0:2}")
+ fi
+ done
+ echo ${linkmodes_params[@]}
+}
+
dev_speeds_get()
{
local dev=$1; shift
@@ -67,3 +101,20 @@ common_speeds_get()
<(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \
<(printf '%s\n' "${dev2_speeds[@]}" | sort -u)
}
+
+different_speeds_get()
+{
+ local dev1=$1; shift
+ local dev2=$1; shift
+ local with_mode=$1; shift
+ local adver=$1; shift
+
+ local -a speeds_arr
+
+ speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver))
+ if [[ ${#speeds_arr[@]} < 2 ]]; then
+ check_err 1 "cannot check different speeds. There are not enough speeds"
+ fi
+
+ echo ${speeds_arr[0]} ${speeds_arr[1]}
+}
diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
new file mode 100755
index 000000000000..50d5bfb17ef1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_mm.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ manual_with_verification_h1_to_h2
+ manual_with_verification_h2_to_h1
+ manual_without_verification_h1_to_h2
+ manual_without_verification_h2_to_h1
+ manual_failed_verification_h1_to_h2
+ manual_failed_verification_h2_to_h1
+ lldp
+"
+
+NUM_NETIFS=2
+REQUIRE_MZ=no
+PREEMPTIBLE_PRIO=0
+source lib.sh
+
+traffic_test()
+{
+ local if=$1; shift
+ local src=$1; shift
+ local num_pkts=10000
+ local before=
+ local after=
+ local delta=
+
+ if [ ${has_pmac_stats[$if]} = false ]; then
+ src="aggregate"
+ fi
+
+ before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO
+
+ after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src)
+
+ delta=$((after - before))
+
+ # Allow an extra 1% tolerance for random packets sent by the stack
+ [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ]
+}
+
+manual_with_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit
+ # Processing state diagram whether the "send_r" variable (send response
+ # to verification frame) should be taken into consideration while the
+ # MAC Merge TX direction is disabled. That being said, at least the
+ # NXP ENETC does not, and requires tx-enabled on in order to respond to
+ # the link partner's verification frames.
+ ethtool --set-mm $rx tx-enabled on
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to finish
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_err "$?" "Verification did not succeed"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx tx-enabled off
+
+ log_test "Manual configuration with verification: $tx to $rx"
+}
+
+manual_with_verification_h1_to_h2()
+{
+ manual_with_verification $h1 $h2
+}
+
+manual_with_verification_h2_to_h1()
+{
+ manual_with_verification $h2 $h1
+}
+
+manual_without_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled on
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'DISABLED'
+ check_err "$?" "Verification is not disabled"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_err "$?" "pMAC TX is not active"
+
+ traffic_test $tx "pmac"
+ check_err "$?" "Traffic did not get sent through $tx's pMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+
+ log_test "Manual configuration without verification: $tx to $rx"
+}
+
+manual_without_verification_h1_to_h2()
+{
+ manual_without_verification $h1 $h2
+}
+
+manual_without_verification_h2_to_h1()
+{
+ manual_without_verification $h2 $h1
+}
+
+manual_failed_verification()
+{
+ local tx=$1; shift
+ local rx=$1; shift
+
+ RET=0
+
+ ethtool --set-mm $rx pmac-enabled off
+ ethtool --set-mm $tx verify-enabled on tx-enabled on
+
+ # Wait for verification to time out
+ sleep 1
+
+ ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \
+ grep -q 'SUCCEEDED'
+ check_fail "$?" "Verification succeeded when it shouldn't have"
+
+ ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true'
+ check_fail "$?" "pMAC TX is active when it shouldn't have"
+
+ traffic_test $tx "emac"
+ check_err "$?" "Traffic did not get sent through $tx's eMAC"
+
+ ethtool --set-mm $tx verify-enabled off tx-enabled off
+ ethtool --set-mm $rx pmac-enabled on
+
+ log_test "Manual configuration with failed verification: $tx to $rx"
+}
+
+manual_failed_verification_h1_to_h2()
+{
+ manual_failed_verification $h1 $h2
+}
+
+manual_failed_verification_h2_to_h1()
+{
+ manual_failed_verification $h2 $h1
+}
+
+smallest_supported_add_frag_size()
+{
+ local iface=$1
+ local rx_min_frag_size=
+
+ rx_min_frag_size=$(ethtool --json --show-mm $iface | \
+ jq '.[]."rx-min-frag-size"')
+
+ if [ $rx_min_frag_size -le 60 ]; then
+ echo 0
+ elif [ $rx_min_frag_size -le 124 ]; then
+ echo 1
+ elif [ $rx_min_frag_size -le 188 ]; then
+ echo 2
+ elif [ $rx_min_frag_size -le 252 ]; then
+ echo 3
+ else
+ echo "$iface: RX min frag size $rx_min_frag_size cannot be advertised over LLDP"
+ exit 1
+ fi
+}
+
+expected_add_frag_size()
+{
+ local iface=$1
+ local requested=$2
+ local min=$(smallest_supported_add_frag_size $iface)
+
+ [ $requested -le $min ] && echo $min || echo $requested
+}
+
+lldp_change_add_frag_size()
+{
+ local add_frag_size=$1
+ local pattern=
+
+ lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null
+ # Wait for TLVs to be received
+ sleep 2
+ pattern=$(printf "Additional fragment size: %d" \
+ $(expected_add_frag_size $h1 $add_frag_size))
+ lldptool -i $h2 -t -n -V addEthCaps | grep -q "$pattern"
+}
+
+lldp()
+{
+ RET=0
+
+ systemctl start lldpad
+
+ # Configure the interfaces to receive and transmit LLDPDUs
+ lldptool -L -i $h1 adminStatus=rxtx >/dev/null
+ lldptool -L -i $h2 adminStatus=rxtx >/dev/null
+
+ # Enable the transmission of Additional Ethernet Capabilities TLV
+ lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null
+ lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null
+
+ # Wait for TLVs to be received
+ sleep 2
+
+ lldptool -i $h1 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h1 pMAC TX is not active"
+
+ lldptool -i $h2 -t -n -V addEthCaps | \
+ grep -q "Preemption capability active"
+ check_err "$?" "$h2 pMAC TX is not active"
+
+ lldp_change_add_frag_size 3
+ check_err "$?" "addFragSize 3"
+
+ lldp_change_add_frag_size 2
+ check_err "$?" "addFragSize 2"
+
+ lldp_change_add_frag_size 1
+ check_err "$?" "addFragSize 1"
+
+ lldp_change_add_frag_size 0
+ check_err "$?" "addFragSize 0"
+
+ traffic_test $h1 "pmac"
+ check_err "$?" "Traffic did not get sent through $h1's pMAC"
+
+ traffic_test $h2 "pmac"
+ check_err "$?" "Traffic did not get sent through $h2's pMAC"
+
+ systemctl stop lldpad
+
+ log_test "LLDP"
+}
+
+h1_create()
+{
+ ip link set dev $h1 up
+
+ tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+
+ ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off
+}
+
+h2_create()
+{
+ ip link set dev $h2 up
+
+ ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off
+
+ tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \
+ queues 1@0 1@1 1@2 1@3 \
+ fp P E E E \
+ hw 1
+}
+
+h1_destroy()
+{
+ ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off
+
+ tc qdisc del dev $h1 root
+
+ ip link set dev $h1 down
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 root
+
+ ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off
+
+ ip link set dev $h2 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+}
+
+check_ethtool_mm_support
+check_tc_fp_support
+require_command lldptool
+bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually"
+
+for netif in ${NETIFS[@]}; do
+ ethtool --show-mm $netif 2>&1 &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: $netif does not support MAC Merge"
+ exit $ksft_skip
+ fi
+
+ if check_ethtool_pmac_std_stats_support $netif eth-mac; then
+ has_pmac_stats[$netif]=true
+ else
+ has_pmac_stats[$netif]=false
+ echo "$netif does not report pMAC statistics, falling back to aggregate"
+ fi
+done
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ethtool_rmon.sh b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
new file mode 100755
index 000000000000..41a34a61f763
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ethtool_rmon.sh
@@ -0,0 +1,143 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ rmon_rx_histogram
+ rmon_tx_histogram
+"
+
+NUM_NETIFS=2
+source lib.sh
+
+ETH_FCS_LEN=4
+ETH_HLEN=$((6+6+2))
+
+declare -A netif_mtu
+
+ensure_mtu()
+{
+ local iface=$1; shift
+ local len=$1; shift
+ local current=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ local required=$((len - ETH_HLEN - ETH_FCS_LEN))
+
+ if [ $current -lt $required ]; then
+ ip link set dev $iface mtu $required || return 1
+ fi
+}
+
+bucket_test()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local bucket=$1; shift
+ local len=$1; shift
+ local num_rx=10000
+ local num_tx=20000
+ local expected=
+ local before=
+ local after=
+ local delta=
+
+ # Mausezahn does not include FCS bytes in its length - but the
+ # histogram counters do
+ len=$((len - ETH_FCS_LEN))
+
+ before=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ # Send 10k one way and 20k in the other, to detect counters
+ # mapped to the wrong direction
+ $MZ $neigh -q -c $num_rx -p $len -a own -b bcast -d 10us
+ $MZ $iface -q -c $num_tx -p $len -a own -b bcast -d 10us
+
+ after=$(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][$bucket].val")
+
+ delta=$((after - before))
+
+ expected=$([ $set = rx ] && echo $num_rx || echo $num_tx)
+
+ # Allow some extra tolerance for other packets sent by the stack
+ [ $delta -ge $expected ] && [ $delta -le $((expected + 100)) ]
+}
+
+rmon_histogram()
+{
+ local iface=$1; shift
+ local neigh=$1; shift
+ local set=$1; shift
+ local nbuckets=0
+ local step=
+
+ RET=0
+
+ while read -r -a bucket; do
+ step="$set-pkts${bucket[0]}to${bucket[1]} on $iface"
+
+ for if in $iface $neigh; do
+ if ! ensure_mtu $if ${bucket[0]}; then
+ log_test_skip "$if does not support the required MTU for $step"
+ return
+ fi
+ done
+
+ if ! bucket_test $iface $neigh $set $nbuckets ${bucket[0]}; then
+ check_err 1 "$step failed"
+ return 1
+ fi
+ log_test "$step"
+ nbuckets=$((nbuckets + 1))
+ done < <(ethtool --json -S $iface --groups rmon | \
+ jq -r ".[0].rmon[\"${set}-pktsNtoM\"][]|[.low, .high]|@tsv" 2>/dev/null)
+
+ if [ $nbuckets -eq 0 ]; then
+ log_test_skip "$iface does not support $set histogram counters"
+ return
+ fi
+}
+
+rmon_rx_histogram()
+{
+ rmon_histogram $h1 $h2 rx
+ rmon_histogram $h2 $h1 rx
+}
+
+rmon_tx_histogram()
+{
+ rmon_histogram $h1 $h2 tx
+ rmon_histogram $h2 $h1 tx
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+
+ for iface in $h1 $h2; do
+ netif_mtu[$iface]=$(ip -j link show dev $iface | jq -r '.[0].mtu')
+ ip link set dev $iface up
+ done
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ for iface in $h2 $h1; do
+ ip link set dev $iface \
+ mtu ${netif_mtu[$iface]} \
+ down
+ done
+}
+
+check_ethtool_counter_group_support
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
index 66496659bea7..1b3b46292179 100644
--- a/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
+++ b/tools/testing/selftests/net/forwarding/fib_offload_lib.sh
@@ -99,15 +99,15 @@ fib_ipv4_tos_test()
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" false
check_err $? "Route not in hardware when should"
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 2 metric 1024
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 2 metric 1024" false
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 8 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 8 metric 1024" false
check_err $? "Highest TOS route not in hardware when should"
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0 metric 1024" true
check_err $? "Lowest TOS route still in hardware when should not"
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1 metric 1024
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1 metric 1024" true
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4 metric 1024
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4 metric 1024" true
check_err $? "Middle TOS route in hardware when should not"
log_test "IPv4 routes with TOS"
@@ -224,7 +224,7 @@ fib_ipv4_plen_test()
ip -n $ns link set dev dummy1 up
# Add two routes with the same key and different prefix length and
- # make sure both are in hardware. It can be verfied that both are
+ # make sure both are in hardware. It can be verified that both are
# sharing the same leaf by checking the /proc/net/fib_trie
ip -n $ns route add 192.0.2.0/24 dev dummy1
ip -n $ns route add 192.0.2.0/25 dev dummy1
@@ -277,11 +277,11 @@ fib_ipv4_replay_tos_test()
ip -n $ns link set dev dummy1 up
ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 0
- ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 1
+ ip -n $ns route add 192.0.2.0/24 dev dummy1 tos 4
devlink -N $ns dev reload $devlink_dev
- fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 1" false
+ fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 4" false
check_err $? "Highest TOS route not in hardware when should"
fib4_trap_check $ns "192.0.2.0/24 dev dummy1 tos 0" true
diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample
index e2adb533c8fc..1fc4f0242fc5 100644
--- a/tools/testing/selftests/net/forwarding/forwarding.config.sample
+++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample
@@ -13,6 +13,11 @@ NETIFS[p5]=veth4
NETIFS[p6]=veth5
NETIFS[p7]=veth6
NETIFS[p8]=veth7
+NETIFS[p9]=veth8
+NETIFS[p10]=veth9
+
+# Port that does not have a cable connected.
+NETIF_NO_CABLE=eth8
##############################################################################
# Defines
@@ -23,6 +28,8 @@ PING=ping
PING6=ping6
# Packet generator. Some distributions use 'mz'.
MZ=mausezahn
+# mausezahn delay between transmissions in microseconds.
+MZ_DELAY=0
# Time to wait after interfaces participating in the test are all UP
WAIT_TIME=5
# Whether to pause on failure or not.
@@ -36,3 +43,11 @@ NETIF_CREATE=yes
# Timeout (in seconds) before ping exits regardless of how many packets have
# been sent or received
PING_TIMEOUT=5
+# Minimum ageing_time (in centiseconds) supported by hardware
+LOW_AGEING_TIME=1000
+# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process
+# filter by software/hardware
+TC_FLAG=skip_hw
+# IPv6 traceroute utility name.
+TROUTE6=traceroute6
+
diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..9788bd0f6e8b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh
@@ -0,0 +1,464 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv4 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# | + g1 (gre) |
+# | loc=192.0.2.1 |
+# | rem=192.0.2.2 --. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 192.0.2.17/28 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 192.0.2.18/28 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# | | | |
+# +--|----------------------|------------------+
+# | |
+# +--|----------------------|------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 192.0.2.34/28 | 192.0.2.50/28 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 192.0.2.65/28 | SW3 |
+# +---------------------|----------------------+
+# |
+# +---------------------|----------------------+
+# | + $ul4 |
+# | ^ 192.0.2.66/28 |
+# | | |
+# | + g2 (gre) | |
+# | loc=192.0.2.2 | |
+# | rem=192.0.2.1 --' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 192.0.2.17/28
+
+ tunnel_create g1 gre 192.0.2.1 192.0.2.2 tos inherit dev v$ol1
+ __simple_if_init g1 v$ol1 192.0.2.1/32
+ ip route add vrf v$ol1 192.0.2.2/32 via 192.0.2.18
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 192.0.2.2/32
+ __simple_if_fini g1 192.0.2.1/32
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 192.0.2.17/28
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 192.0.2.18/28
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 192.0.2.33/28
+ vlan_create $ul22 222 v$ul21 192.0.2.49/28
+
+ ip route add vrf v$ul21 192.0.2.1/32 via 192.0.2.17
+ ip route add vrf v$ul21 192.0.2.2/32 \
+ nexthop via 192.0.2.34 \
+ nexthop via 192.0.2.50
+}
+
+sw2_destroy()
+{
+ ip route del vrf v$ul21 192.0.2.2/32
+ ip route del vrf v$ul21 192.0.2.1/32
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 192.0.2.18/28
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 192.0.2.65/28
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 192.0.2.34/28
+ vlan_create $ul32 222 v$ul31 192.0.2.50/28
+
+ ip route add vrf v$ul31 192.0.2.2/32 via 192.0.2.66
+ ip route add vrf v$ul31 192.0.2.1/32 \
+ nexthop via 192.0.2.33 \
+ nexthop via 192.0.2.49
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip route del vrf v$ul31 192.0.2.1/32
+ ip route del vrf v$ul31 192.0.2.2/32
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 192.0.2.65/28
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 192.0.2.66/28
+
+ tunnel_create g2 gre 192.0.2.2 192.0.2.1 tos inherit dev v$ol4
+ __simple_if_init g2 v$ol4 192.0.2.2/32
+ ip route add vrf v$ol4 192.0.2.1/32 via 192.0.2.65
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip route del vrf v$ol4 192.0.2.1/32
+ __simple_if_fini g2 192.0.2.2/32
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 192.0.2.66/28
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv4.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
index e4009f658003..efca6114a3ce 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh
@@ -267,7 +267,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
index e449475c4d3e..a71ad39fc0c3 100755
--- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh
@@ -266,9 +266,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh
index a8d8e8b3dc81..57531c1d884d 100755
--- a/tools/testing/selftests/net/forwarding/gre_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh
@@ -220,7 +220,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
new file mode 100755
index 000000000000..7d5b2b9cc133
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh
@@ -0,0 +1,319 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# | 2001:db8:2::1/64 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# | 2001:db8:2::2/64 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_ipv4
+ multipath_ipv6
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip -6 nexthop add id 101 dev g1a
+ ip -6 nexthop add id 102 dev g1b
+ ip nexthop add id 103 group 101/102
+
+ ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+ ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip nexthop del id 103
+ ip -6 nexthop del id 102
+ ip -6 nexthop del id 101
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip -6 nexthop add id 201 dev g2a
+ ip -6 nexthop add id 202 dev g2b
+ ip nexthop add id 203 group 201/202
+
+ ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+ ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 2001:db8:1::/64
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip nexthop del id 203
+ ip -6 nexthop del id 202
+ ip -6 nexthop del id 201
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
new file mode 100755
index 000000000000..370f9925302d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test traffic distribution when a wECMP route forwards traffic to two GRE
+# tunnels.
+#
+# +-------------------------+
+# | H1 |
+# | $h1 + |
+# | 192.0.2.1/28 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------|-----+
+# |
+# +-------------------|------------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (gre) + g1b (gre) |
+# | loc=192.0.2.65 loc=192.0.2.81 |
+# | rem=192.0.2.66 --. rem=192.0.2.82 --. |
+# | tos=inherit | tos=inherit | |
+# | .------------------' | |
+# | | .------------------' |
+# | v v |
+# | + $ul1.111 (vlan) + $ul1.222 (vlan) |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | \ / |
+# | \________________/ |
+# | | |
+# | + $ul1 |
+# +------------|-------------------------------+
+# |
+# +------------|-------------------------------+
+# | SW2 + $ul2 |
+# | _______|________ |
+# | / \ |
+# | / \ |
+# | + $ul2.111 (vlan) + $ul2.222 (vlan) |
+# | ^ 192.0.2.130/28 ^ 192.0.2.146/28 |
+# | | | |
+# | | '------------------. |
+# | '------------------. | |
+# | + g2a (gre) | + g2b (gre) | |
+# | loc=192.0.2.66 | loc=192.0.2.82 | |
+# | rem=192.0.2.65 --' rem=192.0.2.81 --' |
+# | tos=inherit tos=inherit |
+# | |
+# | $ol2 + |
+# | 192.0.2.17/28 | |
+# | 2001:db8:2::1/64 | |
+# +-------------------|------------------------+
+# |
+# +-------------------|-----+
+# | H2 | |
+# | $h2 + |
+# | 192.0.2.18/28 |
+# | 2001:db8:2::2/64 |
+# +-------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_ipv4
+ multipath_ipv6
+"
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ ip route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 192.0.2.16/28 via 192.0.2.2
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 192.0.2.2/28 2001:db8:1::2/64
+ __simple_if_init $ul1 v$ol1
+ vlan_create $ul1 111 v$ol1 192.0.2.129/28
+ vlan_create $ul1 222 v$ol1 192.0.2.145/28
+
+ tunnel_create g1a gre 192.0.2.65 192.0.2.66 tos inherit dev v$ol1
+ __simple_if_init g1a v$ol1 192.0.2.65/32
+ ip route add vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+
+ tunnel_create g1b gre 192.0.2.81 192.0.2.82 tos inherit dev v$ol1
+ __simple_if_init g1b v$ol1 192.0.2.81/32
+ ip route add vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+
+ ip -6 nexthop add id 101 dev g1a
+ ip -6 nexthop add id 102 dev g1b
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol1 192.0.2.16/28 nhid 103
+ ip route add vrf v$ol1 2001:db8:2::/64 nhid 103
+}
+
+sw1_destroy()
+{
+ ip route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 192.0.2.16/28
+
+ ip nexthop del id 103
+ ip -6 nexthop del id 102
+ ip -6 nexthop del id 101
+
+ ip route del vrf v$ol1 192.0.2.82/32 via 192.0.2.146
+ __simple_if_fini g1b 192.0.2.81/32
+ tunnel_destroy g1b
+
+ ip route del vrf v$ol1 192.0.2.66/32 via 192.0.2.130
+ __simple_if_fini g1a 192.0.2.65/32
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 222
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ simple_if_fini $ol1 192.0.2.2/28 2001:db8:1::2/64
+}
+
+sw2_create()
+{
+ simple_if_init $ol2 192.0.2.17/28 2001:db8:2::1/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 192.0.2.130/28
+ vlan_create $ul2 222 v$ol2 192.0.2.146/28
+
+ tunnel_create g2a gre 192.0.2.66 192.0.2.65 tos inherit dev v$ol2
+ __simple_if_init g2a v$ol2 192.0.2.66/32
+ ip route add vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+
+ tunnel_create g2b gre 192.0.2.82 192.0.2.81 tos inherit dev v$ol2
+ __simple_if_init g2b v$ol2 192.0.2.82/32
+ ip route add vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+
+ ip -6 nexthop add id 201 dev g2a
+ ip -6 nexthop add id 202 dev g2b
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+
+ ip route add vrf v$ol2 192.0.2.0/28 nhid 203
+ ip route add vrf v$ol2 2001:db8:1::/64 nhid 203
+
+ tc qdisc add dev $ul2 clsact
+ tc filter add dev $ul2 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul2 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw2_destroy()
+{
+ tc qdisc del dev $ul2 clsact
+
+ ip route del vrf v$ol2 2001:db8:1::/64
+ ip route del vrf v$ol2 192.0.2.0/28
+
+ ip nexthop del id 203
+ ip -6 nexthop del id 202
+ ip -6 nexthop del id 201
+
+ ip route del vrf v$ol2 192.0.2.81/32 via 192.0.2.145
+ __simple_if_fini g2b 192.0.2.82/32
+ tunnel_destroy g2b
+
+ ip route del vrf v$ol2 192.0.2.65/32 via 192.0.2.129
+ __simple_if_fini g2a 192.0.2.66/32
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 222
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 192.0.2.17/28 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.18/28 2001:db8:2::2/64
+ ip route add vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ ip route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::1
+ ip route del vrf v$h2 192.0.2.0/28 via 192.0.2.17
+ simple_if_fini $h2 192.0.2.18/28 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+multipath4_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_test()
+{
+ local what=$1; shift
+ local weight1=$1; shift
+ local weight2=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ ip nexthop replace id 103 group 101,$weight1/102,$weight2 \
+ type resilient
+
+ local t0_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ ip vrf exec v$h1 \
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ local t1_111=$(tc_rule_stats_get $ul2 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul2 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+ multipath_eval "$what" $weight1 $weight2 $d111 $d222
+
+ ip nexthop replace id 103 group 101/102 type resilient
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+multipath_ipv4()
+{
+ log_info "Running IPv4 multipath tests"
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+}
+
+multipath_ipv6()
+{
+ log_info "Running IPv6 multipath tests"
+ multipath6_test "ECMP" 1 1
+ multipath6_test "Weighted MP 2:1" 2 1
+ multipath6_test "Weighted MP 11:45" 11 45
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
new file mode 100755
index 000000000000..48584a51388f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh
@@ -0,0 +1,340 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.200 + | | + $h2.200 |
+# | 192.0.2.1/28 | | | | 192.0.2.18/28 |
+# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.200 + + $rp2.200 |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | 2001:db8:1::2/64 2001:db8:2::2/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ respin_enablement
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_stats_rx_ipv4
+ test_stats_tx_ipv4
+ test_stats_rx_ipv6
+ test_stats_tx_ipv6
+ test_stats_report_rx
+ test_stats_report_tx
+ test_destroy_enabled
+ test_double_enable
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ vlan_destroy $h1 200
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64
+ ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2
+ ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ vlan_destroy $h2 200
+ simple_if_fini $h2
+}
+
+router_rp1_200_create()
+{
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+router_rp1_200_destroy()
+{
+ ip stats set dev $rp1.200 l3_stats off
+ ip address del dev $rp1.200 2001:db8:1::2/64
+ ip address del dev $rp1.200 192.0.2.2/28
+ ip link del dev $rp1.200
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ router_rp1_200_create
+
+ ip link set dev $rp2 up
+ vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy $rp2 200
+ ip link set dev $rp2 down
+
+ router_rp1_200_destroy
+ ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+ rp2mac=$(mac_get $rp2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1.200 192.0.2.18 " IPv4"
+}
+
+ping_ipv6()
+{
+ ping_test $h1.200 2001:db8:2::1 " IPv6"
+}
+
+send_packets_rx_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_rx_ipv6()
+{
+ $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv4()
+{
+ $MZ $h2.200 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+send_packets_tx_ipv6()
+{
+ $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \
+ -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \
+ -q -t udp sp=54321,dp=12345
+}
+
+___test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ "$@"
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+}
+
+__test_stats()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ RET=0
+ ___test_stats "$dir" "$prot"
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_rx_ipv4()
+{
+ __test_stats rx ipv4
+}
+
+test_stats_tx_ipv4()
+{
+ __test_stats tx ipv4
+}
+
+test_stats_rx_ipv6()
+{
+ __test_stats rx ipv6
+}
+
+test_stats_tx_ipv6()
+{
+ __test_stats tx ipv6
+}
+
+# Make sure everything works well even after stats have been disabled and
+# reenabled on the same device without touching the L3 configuration.
+respin_enablement()
+{
+ log_info "Turning stats off and on again"
+ ip stats set dev $rp1.200 l3_stats off
+ ip stats set dev $rp1.200 l3_stats on
+}
+
+# For the initial run, l3_stats is enabled on a completely set up netdevice. Now
+# do it the other way around: enabling the L3 stats on an L2 netdevice, and only
+# then apply the L3 configuration.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ router_rp1_200_destroy
+
+ ip link add name $rp1.200 link $rp1 type vlan id 200
+ ip link set dev $rp1.200 addrgenmode none
+ ip stats set dev $rp1.200 l3_stats on
+ ip link set dev $rp1.200 addrgenmode eui64
+ ip link set dev $rp1.200 up
+ ip address add dev $rp1.200 192.0.2.2/28
+ ip address add dev $rp1.200 2001:db8:1::2/64
+}
+
+__test_stats_report()
+{
+ local dir=$1; shift
+ local prot=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ send_packets_${dir}_${prot}
+ ip address flush dev $rp1.200
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $rp1.200 ${dir} packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+ log_test "Test ${dir} packets: stats pushed on loss of L3"
+
+ ip stats set dev $rp1.200 l3_stats off
+ ip link del dev $rp1.200
+ router_rp1_200_create
+}
+
+test_stats_report_rx()
+{
+ __test_stats_report rx ipv4
+}
+
+test_stats_report_tx()
+{
+ __test_stats_report tx ipv4
+}
+
+test_destroy_enabled()
+{
+ RET=0
+
+ ip link del dev $rp1.200
+ router_rp1_200_create
+
+ log_test "Destroy l3_stats-enabled netdev"
+}
+
+test_double_enable()
+{
+ RET=0
+ ___test_stats rx ipv4 \
+ ip stats set dev $rp1.200 l3_stats on
+ log_test "Test stat retention across a spurious enablement"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info |
+ jq '.[].info.l3_stats.used')
+kind=$(ip -j -d link show dev $rp1 |
+ jq -r '.[].linkinfo.info_kind')
+if [[ $used != true ]]; then
+ if [[ $kind == veth ]]; then
+ log_test_skip "l3_stats not offloaded on veth interface"
+ EXIT_STATUS=$ksft_skip
+ else
+ RET=1 log_test "l3_stats not offloaded"
+ fi
+else
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
new file mode 100755
index 000000000000..7594bbb49029
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh
@@ -0,0 +1,111 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test L3 stats on IP-in-IP GRE tunnel without key.
+
+# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more
+# details.
+
+ALL_TESTS="
+ ping_ipv4
+ test_stats_rx
+ test_stats_tx
+"
+NUM_NETIFS=6
+source lib.sh
+source ipip_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ ol1mac=$(mac_get $ol1)
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create gre $ol1 $ul1
+ sw2_flat_create gre $ol2 $ul2
+ ip stats set dev g1a l3_stats on
+ ip stats set dev g2a l3_stats on
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ip stats set dev g1a l3_stats off
+ ip stats set dev g2a l3_stats off
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+ forwarding_restore
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ ping_test $h1 192.0.2.18 " gre flat"
+}
+
+send_packets_ipv4()
+{
+ # Send 21 packets instead of 20, because the first one might trap and go
+ # through the SW datapath, which might not bump the HW counter.
+ $MZ $h1 -c 21 -d 20msec -p 100 \
+ -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \
+ -q -t udp sp=54321,dp=12345
+}
+
+test_stats()
+{
+ local dev=$1; shift
+ local dir=$1; shift
+
+ local a
+ local b
+
+ RET=0
+
+ a=$(hw_stats_get l3_stats $dev $dir packets)
+ send_packets_ipv4
+ b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \
+ hw_stats_get l3_stats $dev $dir packets)
+ check_err $? "Traffic not reflected in the counter: $a -> $b"
+
+ log_test "Test $dir packets: $prot"
+}
+
+test_stats_tx()
+{
+ test_stats g1a tx
+}
+
+test_stats_rx()
+{
+ test_stats g2a rx
+}
+
+skip_on_veth
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
new file mode 100755
index 000000000000..49fa94b53a1c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6_forward_instats_vrf.sh
@@ -0,0 +1,174 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test ipv6 stats on the incoming if when forwarding with VRF
+
+ALL_TESTS="
+ ipv6_ping
+ ipv6_in_too_big_err
+ ipv6_in_hdr_err
+ ipv6_in_addr_err
+ ipv6_in_discard
+"
+
+NUM_NETIFS=4
+source lib.sh
+
+require_command $TROUTE6
+
+h1_create()
+{
+ simple_if_init $h1 2001:1:1::2/64
+ ip -6 route add vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 2001:1:2::/64 via 2001:1:1::1
+ simple_if_fini $h1 2001:1:1::2/64
+}
+
+router_create()
+{
+ vrf_create router
+ __simple_if_init $rtr1 router 2001:1:1::1/64
+ __simple_if_init $rtr2 router 2001:1:2::1/64
+ mtu_set $rtr2 1280
+}
+
+router_destroy()
+{
+ mtu_restore $rtr2
+ __simple_if_fini $rtr2 2001:1:2::1/64
+ __simple_if_fini $rtr1 2001:1:1::1/64
+ vrf_destroy router
+}
+
+h2_create()
+{
+ simple_if_init $h2 2001:1:2::2/64
+ ip -6 route add vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ mtu_set $h2 1280
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ ip -6 route del vrf v$h2 2001:1:1::/64 via 2001:1:2::1
+ simple_if_fini $h2 2001:1:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rtr1=${NETIFS[p2]}
+
+ rtr2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ router_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ router_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ipv6_in_too_big_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send too big packets
+ ip vrf exec $vrf_name \
+ $PING6 -s 1300 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InTooBigErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InTooBigErrors"
+}
+
+ipv6_in_hdr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Send packets with hop limit 1, easiest with traceroute6 as some ping6
+ # doesn't allow hop limit to be specified
+ ip vrf exec $vrf_name \
+ $TROUTE6 2001:1:2::2 &> /dev/null
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InHdrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InHdrErrors"
+}
+
+ipv6_in_addr_err()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ local vrf_name=$(master_name_get $h1)
+
+ # Disable forwarding temporary while sending the packet
+ sysctl -qw net.ipv6.conf.all.forwarding=0
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ sysctl -qw net.ipv6.conf.all.forwarding=1
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InAddrErrors)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InAddrErrors"
+}
+
+ipv6_in_discard()
+{
+ RET=0
+
+ local t0=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ local vrf_name=$(master_name_get $h1)
+
+ # Add a policy to discard
+ ip xfrm policy add dst 2001:1:2::2/128 dir fwd action block
+ ip vrf exec $vrf_name \
+ $PING6 2001:1:2::2 -c 1 -w $PING_TIMEOUT &> /dev/null
+ ip xfrm policy del dst 2001:1:2::2/128 dir fwd
+
+ local t1=$(ipv6_stats_get $rtr1 Ip6InDiscards)
+ test "$((t1 - t0))" -ne 0
+ check_err $?
+ log_test "Ip6InDiscards"
+}
+ipv6_ping()
+{
+ RET=0
+
+ ping6_test $h1 2001:1:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
new file mode 100755
index 000000000000..2ab9eaaa5532
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh
@@ -0,0 +1,466 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test traffic distribution when there are multiple paths between an IPv6 GRE
+# tunnel. The tunnel carries IPv4 and IPv6 traffic between multiple hosts.
+# Multiple routes are in the underlay network. With the default multipath
+# policy, SW2 will only look at the outer IP addresses, hence only a single
+# route would be used.
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.{2-253}/24 | |
+# | 2001:db8:1::{2-fd}/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.1/24 |
+# | 2001:db8:1::1/64 |
+# | |
+# |+ g1 (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 -. |
+# | tos=inherit | |
+# | v |
+# | + $ul1 |
+# | | 2001:db8:10::1/64 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | SW2 | |
+# | $ul21 + |
+# | 2001:db8:10::2/64 | |
+# | | |
+# ! __________________+___ |
+# | / \ |
+# | | | |
+# | + $ul22.111 (vlan) + $ul22.222 (vlan) |
+# | | 2001:db8:11::1/64 | 2001:db8:12::1/64 |
+# | | | |
+# +--|----------------------|-------------------+
+# | |
+# +--|----------------------|-------------------+
+# | | | |
+# | + $ul32.111 (vlan) + $ul32.222 (vlan) |
+# | | 2001:db8:11::2/64 | 2001:db8:12::2/64 |
+# | | | |
+# | \__________________+___/ |
+# | | |
+# | | |
+# | $ul31 + |
+# | 2001:db8:13::1/64 | SW3 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | + $ul4 |
+# | ^ 2001:db8:13::2/64 |
+# | | |
+# |+ g2 (ip6gre) | |
+# | loc=2001:db8:3::2 | |
+# | rem=2001:db8:3::1 -' |
+# | tos=inherit |
+# | |
+# | $ol4 + |
+# | 203.0.113.1/24 | |
+# | 2001:db8:2::1/64 | SW4 |
+# +-------------------------|-------------------+
+# |
+# +-------------------------|------+
+# | | |
+# | $h2 + |
+# | 203.0.113.{2-253}/24 |
+# | 2001:db8:2::{2-fd}/64 H2 |
+# +--------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ custom_hash
+"
+
+NUM_NETIFS=10
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.2/24 2001:db8:1::2/64
+ ip route add vrf v$h1 default via 198.51.100.1 dev $h1
+ ip -6 route add vrf v$h1 default via 2001:db8:1::1 dev $h1
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 default
+ ip route del vrf v$h1 default
+ simple_if_fini $h1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw1_create()
+{
+ simple_if_init $ol1 198.51.100.1/24 2001:db8:1::1/64
+ __simple_if_init $ul1 v$ol1 2001:db8:10::1/64
+
+ tunnel_create g1 ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ dev v$ol1
+ __simple_if_init g1 v$ol1 2001:db8:3::1/128
+ ip route add vrf v$ol1 2001:db8:3::2/128 via 2001:db8:10::2
+
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1
+}
+
+sw1_destroy()
+{
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+
+ ip route del vrf v$ol1 2001:db8:3::2/128
+ __simple_if_fini g1 2001:db8:3::1/128
+ tunnel_destroy g1
+
+ __simple_if_fini $ul1 2001:db8:10::1/64
+ simple_if_fini $ol1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+sw2_create()
+{
+ simple_if_init $ul21 2001:db8:10::2/64
+ __simple_if_init $ul22 v$ul21
+ vlan_create $ul22 111 v$ul21 2001:db8:11::1/64
+ vlan_create $ul22 222 v$ul21 2001:db8:12::1/64
+
+ ip -6 route add vrf v$ul21 2001:db8:3::1/128 via 2001:db8:10::1
+ ip -6 route add vrf v$ul21 2001:db8:3::2/128 \
+ nexthop via 2001:db8:11::2 \
+ nexthop via 2001:db8:12::2
+}
+
+sw2_destroy()
+{
+ ip -6 route del vrf v$ul21 2001:db8:3::2/128
+ ip -6 route del vrf v$ul21 2001:db8:3::1/128
+
+ vlan_destroy $ul22 222
+ vlan_destroy $ul22 111
+ __simple_if_fini $ul22
+ simple_if_fini $ul21 2001:db8:10::2/64
+}
+
+sw3_create()
+{
+ simple_if_init $ul31 2001:db8:13::1/64
+ __simple_if_init $ul32 v$ul31
+ vlan_create $ul32 111 v$ul31 2001:db8:11::2/64
+ vlan_create $ul32 222 v$ul31 2001:db8:12::2/64
+
+ ip -6 route add vrf v$ul31 2001:db8:3::2/128 via 2001:db8:13::2
+ ip -6 route add vrf v$ul31 2001:db8:3::1/128 \
+ nexthop via 2001:db8:11::1 \
+ nexthop via 2001:db8:12::1
+
+ tc qdisc add dev $ul32 clsact
+ tc filter add dev $ul32 ingress pref 111 prot 802.1Q \
+ flower vlan_id 111 action pass
+ tc filter add dev $ul32 ingress pref 222 prot 802.1Q \
+ flower vlan_id 222 action pass
+}
+
+sw3_destroy()
+{
+ tc qdisc del dev $ul32 clsact
+
+ ip -6 route del vrf v$ul31 2001:db8:3::1/128
+ ip -6 route del vrf v$ul31 2001:db8:3::2/128
+
+ vlan_destroy $ul32 222
+ vlan_destroy $ul32 111
+ __simple_if_fini $ul32
+ simple_if_fini $ul31 2001:db8:13::1/64
+}
+
+sw4_create()
+{
+ simple_if_init $ol4 203.0.113.1/24 2001:db8:2::1/64
+ __simple_if_init $ul4 v$ol4 2001:db8:13::2/64
+
+ tunnel_create g2 ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ dev v$ol4
+ __simple_if_init g2 v$ol4 2001:db8:3::2/128
+ ip -6 route add vrf v$ol4 2001:db8:3::1/128 via 2001:db8:13::1
+
+ ip route add vrf v$ol4 198.51.100.0/24 dev g2
+ ip -6 route add vrf v$ol4 2001:db8:1::/64 dev g2
+}
+
+sw4_destroy()
+{
+ ip -6 route del vrf v$ol4 2001:db8:1::/64
+ ip route del vrf v$ol4 198.51.100.0/24
+
+ ip -6 route del vrf v$ol4 2001:db8:3::1/128
+ __simple_if_fini g2 2001:db8:3::2/128
+ tunnel_destroy g2
+
+ __simple_if_fini $ul4 2001:db8:13::2/64
+ simple_if_fini $ol4 203.0.113.1/24 2001:db8:2::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.2/24 2001:db8:2::2/64
+ ip route add vrf v$h2 default via 203.0.113.1 dev $h2
+ ip -6 route add vrf v$h2 default via 2001:db8:2::1 dev $h2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 default
+ ip route del vrf v$h2 default
+ simple_if_fini $h2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+
+ ol1=${NETIFS[p2]}
+ ul1=${NETIFS[p3]}
+
+ ul21=${NETIFS[p4]}
+ ul22=${NETIFS[p5]}
+
+ ul32=${NETIFS[p6]}
+ ul31=${NETIFS[p7]}
+
+ ul4=${NETIFS[p8]}
+ ol4=${NETIFS[p9]}
+
+ h2=${NETIFS[p10]}
+
+ vrf_prepare
+ h1_create
+ sw1_create
+ sw2_create
+ sw3_create
+ sw4_create
+ h2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ h2_destroy
+ sw4_destroy
+ sw3_destroy
+ sw2_destroy
+ sw1_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 203.0.113.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+send_src_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_src_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp4()
+{
+ ip vrf exec v$h1 $MZ $h1 -q -p 64 \
+ -A 198.51.100.2 -B 203.0.113.2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+send_src_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_dst_ipv6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \
+ -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000"
+}
+
+send_flowlabel()
+{
+ # Generate 16384 echo requests, each with a random flow label.
+ for _ in $(seq 1 16384); do
+ ip vrf exec v$h1 \
+ $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
+ done
+}
+
+send_src_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=0-32768,dp=30000"
+}
+
+send_dst_udp6()
+{
+ ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \
+ -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=20000,dp=0-32768"
+}
+
+custom_hash_test()
+{
+ local field="$1"; shift
+ local balanced="$1"; shift
+ local send_flows="$@"
+
+ RET=0
+
+ local t0_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ $send_flows
+
+ local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
+ local t1_222=$(tc_rule_stats_get $ul32 222 ingress)
+
+ local d111=$((t1_111 - t0_111))
+ local d222=$((t1_222 - t0_222))
+
+ local diff=$((d222 - d111))
+ local sum=$((d111 + d222))
+
+ local pct=$(echo "$diff / $sum * 100" | bc -l)
+ local is_balanced=$(echo "-20 <= $pct && $pct <= 20" | bc)
+
+ [[ ( $is_balanced -eq 1 && $balanced == "balanced" ) ||
+ ( $is_balanced -eq 0 && $balanced == "unbalanced" ) ]]
+ check_err $? "Expected traffic to be $balanced, but it is not"
+
+ log_test "Multipath hash field: $field ($balanced)"
+ log_info "Packets sent on path1 / path2: $d111 / $d222"
+}
+
+custom_hash_v4()
+{
+ log_info "Running IPv4 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv4.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv4.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv4
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv4
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp4
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp4
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp4
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp4
+
+ sysctl_restore net.ipv4.neigh.default.gc_thresh3
+ sysctl_restore net.ipv4.neigh.default.gc_thresh2
+ sysctl_restore net.ipv4.neigh.default.gc_thresh1
+}
+
+custom_hash_v6()
+{
+ log_info "Running IPv6 overlay custom multipath hash tests"
+
+ # Prevent the neighbour table from overflowing, as different neighbour
+ # entries will be created on $ol4 when using different destination IPs.
+ sysctl_set net.ipv6.neigh.default.gc_thresh1 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh2 1024
+ sysctl_set net.ipv6.neigh.default.gc_thresh3 1024
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0040
+ custom_hash_test "Inner source IP" "balanced" send_src_ipv6
+ custom_hash_test "Inner source IP" "unbalanced" send_dst_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0080
+ custom_hash_test "Inner destination IP" "balanced" send_dst_ipv6
+ custom_hash_test "Inner destination IP" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0200
+ custom_hash_test "Inner flowlabel" "balanced" send_flowlabel
+ custom_hash_test "Inner flowlabel" "unbalanced" send_src_ipv6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0400
+ custom_hash_test "Inner source port" "balanced" send_src_udp6
+ custom_hash_test "Inner source port" "unbalanced" send_dst_udp6
+
+ sysctl_set net.ipv6.fib_multipath_hash_fields 0x0800
+ custom_hash_test "Inner destination port" "balanced" send_dst_udp6
+ custom_hash_test "Inner destination port" "unbalanced" send_src_udp6
+
+ sysctl_restore net.ipv6.neigh.default.gc_thresh3
+ sysctl_restore net.ipv6.neigh.default.gc_thresh2
+ sysctl_restore net.ipv6.neigh.default.gc_thresh1
+}
+
+custom_hash()
+{
+ # Test that when the hash policy is set to custom, traffic is
+ # distributed only according to the fields set in the
+ # fib_multipath_hash_fields sysctl.
+ #
+ # Each time set a different field and make sure traffic is only
+ # distributed when the field is changed in the packet stream.
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 3
+
+ custom_hash_v4
+ custom_hash_v6
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
new file mode 100755
index 000000000000..96c97064f2d3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel without key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+ gre_flat
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create $ol1 $ul1
+ sw2_flat_create $ol2 $ul2
+}
+
+gre_flat()
+{
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
new file mode 100755
index 000000000000..ff9fb0db9bd1
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with key.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+ gre_flat
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create $ol1 $ul1 key 233
+ sw2_flat_create $ol2 $ul2 key 233
+}
+
+gre_flat()
+{
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with key"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
new file mode 100755
index 000000000000..12c138785242
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_flat_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnel with keys.
+# This test uses flat topology for IP tunneling tests. See ip6gre_lib.sh for
+# more details.
+
+ALL_TESTS="
+ gre_flat
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_flat_create $ol1 $ul1 ikey 111 okey 222
+ sw2_flat_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_flat()
+{
+ test_traffic_ip4ip6 "GRE flat IPv4-in-IPv6 with ikey/okey"
+ test_traffic_ip6ip6 "GRE flat IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change gre
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_flat_destroy $ol2 $ul2
+ sw1_flat_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
new file mode 100755
index 000000000000..83b55c30a5c3
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+ gre_hier
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_hierarchical_create $ol1 $ul1
+ sw2_hierarchical_create $ol2 $ul2
+}
+
+gre_hier()
+{
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change gre
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_hierarchical_destroy $ol2 $ul2
+ sw1_hierarchical_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
new file mode 100755
index 000000000000..256607916d92
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_key.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+ gre_hier
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_hierarchical_create $ol1 $ul1 key 22
+ sw2_hierarchical_create $ol2 $ul2 key 22
+}
+
+gre_hier()
+{
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with key"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with key"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change gre
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_hierarchical_destroy $ol2 $ul2
+ sw1_hierarchical_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
new file mode 100755
index 000000000000..ad1bcd6334a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_hier_keys.sh
@@ -0,0 +1,65 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test IP-in-IP GRE tunnels without key.
+# This test uses hierarchical topology for IP tunneling tests. See
+# ip6gre_lib.sh for more details.
+
+ALL_TESTS="
+ gre_hier
+ gre_mtu_change
+"
+
+NUM_NETIFS=6
+source lib.sh
+source ip6gre_lib.sh
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ ol1=${NETIFS[p2]}
+
+ ul1=${NETIFS[p3]}
+ ul2=${NETIFS[p4]}
+
+ ol2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ forwarding_enable
+ vrf_prepare
+ h1_create
+ h2_create
+ sw1_hierarchical_create $ol1 $ul1 ikey 111 okey 222
+ sw2_hierarchical_create $ol2 $ul2 ikey 222 okey 111
+}
+
+gre_hier()
+{
+ test_traffic_ip4ip6 "GRE hierarchical IPv4-in-IPv6 with ikey/okey"
+ test_traffic_ip6ip6 "GRE hierarchical IPv6-in-IPv6 with ikey/okey"
+}
+
+gre_mtu_change()
+{
+ test_mtu_change gre
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ sw2_hierarchical_destroy $ol2 $ul2
+ sw1_hierarchical_destroy $ol1 $ul1
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+ forwarding_restore
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
index a257979d3fc5..32d1461f37b7 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh
@@ -266,7 +266,7 @@ multipath4_test()
ip vrf exec v$h1 \
$MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
index d208f5243ade..e1a4b50505f5 100755
--- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh
@@ -265,9 +265,9 @@ multipath6_test()
local t0_222=$(tc_rule_stats_get $ul32 222 ingress)
ip vrf exec v$h1 \
- $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \
- -B "2001:db8:2::2-2001:db8:2::1e" \
- -d 1msec -c 50 -t udp "sp=1024,dp=1024"
+ $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \
+ -B "2001:db8:2::2-2001:db8:2::3e" \
+ -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024"
sleep 1
local t1_111=$(tc_rule_stats_get $ul32 111 ingress)
diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
new file mode 100644
index 000000000000..24f4ab328bd2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh
@@ -0,0 +1,438 @@
+# SPDX-License-Identifier: GPL-2.0
+#!/bin/bash
+
+# Handles creation and destruction of IP-in-IP or GRE tunnels over the given
+# topology. Supports both flat and hierarchical models.
+#
+# Flat Model:
+# Overlay and underlay share the same VRF.
+# SW1 uses default VRF so tunnel has no bound dev.
+# SW2 uses non-default VRF tunnel has a bound dev.
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | $ol1 + |
+# | 198.51.100.2/24 |
+# | 2001:db8:1::2/64 |
+# | |
+# | + g1a (ip6gre) |
+# | loc=2001:db8:3::1 |
+# | rem=2001:db8:3::2 --. |
+# | tos=inherit | |
+# | . |
+# | .--------------------- |
+# | | |
+# | v |
+# | + $ul1.111 (vlan) |
+# | | 2001:db8:10::1/64 |
+# | \ |
+# | \____________ |
+# | | |
+# | VRF default + $ul1 |
+# +---------------------|-----------------------+
+# |
+# +---------------------|-----------------------+
+# | SW2 | |
+# | $ul2 + |
+# | ___________| |
+# | / |
+# | / |
+# | + $ul2.111 (vlan) |
+# | ^ 2001:db8:10::2/64 |
+# | | |
+# | | |
+# | '----------------------. |
+# | + g2a (ip6gre) | |
+# | loc=2001:db8:3::2 | |
+# | rem=2001:db8:3::1 --' |
+# | tos=inherit |
+# | |
+# | + $ol2 |
+# | | 203.0.113.2/24 |
+# | VRF v$ol2 | 2001:db8:2::2/64 |
+# +---------------------|-----------------------+
+# +---------------------|----------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.1/24 |
+# | 2001:db8:2::1/64 |
+# +--------------------------------+
+#
+# Hierarchical model:
+# The tunnel is bound to a device in a different VRF
+#
+# +--------------------------------+
+# | H1 |
+# | $h1 + |
+# | 198.51.100.1/24 | |
+# | 2001:db8:1::1/64 | |
+# +-------------------------|------+
+# |
+# +-------------------------|-------------------+
+# | SW1 | |
+# | +-----------------------|-----------------+ |
+# | | $ol1 + | |
+# | | 198.51.100.2/24 | |
+# | | 2001:db8:1::2/64 | |
+# | | | |
+# | | + g1a (ip6gre) | |
+# | | loc=2001:db8:3::1 | |
+# | | rem=2001:db8:3::2 | |
+# | | tos=inherit | |
+# | | ^ | |
+# | | VRF v$ol1 | | |
+# | +--------------------|--------------------+ |
+# | | |
+# | +--------------------|--------------------+ |
+# | | VRF v$ul1 | | |
+# | | | | |
+# | | v | |
+# | | dummy1 + | |
+# | | 2001:db8:3::1/64 | |
+# | | .-----------' | |
+# | | | | |
+# | | v | |
+# | | + $ul1.111 (vlan) | |
+# | | | 2001:db8:10::1/64 | |
+# | | \ | |
+# | | \__________ | |
+# | | | | |
+# | | + $ul1 | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+# |
+# +-----------------------|---------------------+
+# | SW2 | |
+# | +---------------------|-------------------+ |
+# | | + $ul2 | |
+# | | _____| | |
+# | | / | |
+# | | / | |
+# | | | $ul2.111 (vlan) | |
+# | | + 2001:db8:10::2/64 | |
+# | | ^ | |
+# | | | | |
+# | | '------. | |
+# | | dummy2 + | |
+# | | 2001:db8:3::2/64 | |
+# | | ^ | |
+# | | | | |
+# | | | | |
+# | | VRF v$ul2 | | |
+# | +---------------------|-------------------+ |
+# | | |
+# | +---------------------|-------------------+ |
+# | | VRF v$ol2 | | |
+# | | | | |
+# | | v | |
+# | | g2a (ip6gre) + | |
+# | | loc=2001:db8:3::2 | |
+# | | rem=2001:db8:3::1 | |
+# | | tos=inherit | |
+# | | | |
+# | | $ol2 + | |
+# | | 203.0.113.2/24 | | |
+# | | 2001:db8:2::2/64 | | |
+# | +---------------------|-------------------+ |
+# +-----------------------|---------------------+
+# |
+# +-----------------------|--------+
+# | H2 | |
+# | $h2 + |
+# | 203.0.113.1/24 |
+# | 2001:db8:2::1/64 |
+# +--------------------------------+
+
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 198.51.100.1/24 2001:db8:1::1/64
+ ip route add vrf v$h1 203.0.113.0/24 via 198.51.100.2
+ ip -6 route add vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del vrf v$h1 2001:db8:2::/64 via 2001:db8:1::2
+ ip route del vrf v$h1 203.0.113.0/24 via 198.51.100.2
+ simple_if_fini $h1 198.51.100.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 203.0.113.1/24 2001:db8:2::1/64
+ ip route add vrf v$h2 198.51.100.0/24 via 203.0.113.2
+ ip -6 route add vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+ ip -6 route del vrf v$h2 2001:db8:1::/64 via 2001:db8:2::2
+ ip route del vrf v$h2 198.51.100.0/24 via 203.0.113.2
+ simple_if_fini $h2 203.0.113.1/24 2001:db8:2::1/64
+}
+
+sw1_flat_create()
+{
+ local ol1=$1; shift
+ local ul1=$1; shift
+
+ ip link set dev $ol1 up
+ __addr_add_del $ol1 add 198.51.100.2/24 2001:db8:1::2/64
+
+ ip link set dev $ul1 up
+ vlan_create $ul1 111 "" 2001:db8:10::1/64
+
+ tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit "$@"
+ ip link set dev g1a up
+ __addr_add_del g1a add "2001:db8:3::1/128"
+
+ ip -6 route add 2001:db8:3::2/128 via 2001:db8:10::2
+ ip route add 203.0.113.0/24 dev g1a
+ ip -6 route add 2001:db8:2::/64 dev g1a
+}
+
+sw1_flat_destroy()
+{
+ local ol1=$1; shift
+ local ul1=$1; shift
+
+ ip -6 route del 2001:db8:2::/64
+ ip route del 203.0.113.0/24
+ ip -6 route del 2001:db8:3::2/128 via 2001:db8:10::2
+
+ __simple_if_fini g1a 2001:db8:3::1/128
+ tunnel_destroy g1a
+
+ vlan_destroy $ul1 111
+ __simple_if_fini $ul1
+ __simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_flat_create()
+{
+ local ol2=$1; shift
+ local ul2=$1; shift
+
+ simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+ __simple_if_init $ul2 v$ol2
+ vlan_create $ul2 111 v$ol2 2001:db8:10::2/64
+
+ tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ ttl inherit dev v$ol2 "$@"
+ __simple_if_init g2a v$ol2 2001:db8:3::2/128
+
+ # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+ ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+ ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+ ip -6 route add vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+ ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+ ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_flat_destroy()
+{
+ local ol2=$1; shift
+ local ul2=$1; shift
+
+ ip -6 route del vrf v$ol2 2001:db8:2::/64
+ ip route del vrf v$ol2 198.51.100.0/24
+ ip -6 route del vrf v$ol2 2001:db8:3::1/128 via 2001:db8:10::1
+
+ __simple_if_fini g2a 2001:db8:3::2/128
+ tunnel_destroy g2a
+
+ vlan_destroy $ul2 111
+ __simple_if_fini $ul2
+ simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+sw1_hierarchical_create()
+{
+ local ol1=$1; shift
+ local ul1=$1; shift
+
+ simple_if_init $ol1 198.51.100.2/24 2001:db8:1::2/64
+ simple_if_init $ul1
+ ip link add name dummy1 type dummy
+ __simple_if_init dummy1 v$ul1 2001:db8:3::1/64
+
+ vlan_create $ul1 111 v$ul1 2001:db8:10::1/64
+ tunnel_create g1a ip6gre 2001:db8:3::1 2001:db8:3::2 tos inherit \
+ ttl inherit dev dummy1 "$@"
+ ip link set dev g1a master v$ol1
+
+ ip -6 route add vrf v$ul1 2001:db8:3::2/128 via 2001:db8:10::2
+ ip route add vrf v$ol1 203.0.113.0/24 dev g1a
+ ip -6 route add vrf v$ol1 2001:db8:2::/64 dev g1a
+}
+
+sw1_hierarchical_destroy()
+{
+ local ol1=$1; shift
+ local ul1=$1; shift
+
+ ip -6 route del vrf v$ol1 2001:db8:2::/64
+ ip route del vrf v$ol1 203.0.113.0/24
+ ip -6 route del vrf v$ul1 2001:db8:3::2/128
+
+ tunnel_destroy g1a
+ vlan_destroy $ul1 111
+
+ __simple_if_fini dummy1 2001:db8:3::1/64
+ ip link del dev dummy1
+
+ simple_if_fini $ul1
+ simple_if_fini $ol1 198.51.100.2/24 2001:db8:1::2/64
+}
+
+sw2_hierarchical_create()
+{
+ local ol2=$1; shift
+ local ul2=$1; shift
+
+ simple_if_init $ol2 203.0.113.2/24 2001:db8:2::2/64
+ simple_if_init $ul2
+
+ ip link add name dummy2 type dummy
+ __simple_if_init dummy2 v$ul2 2001:db8:3::2/64
+
+ vlan_create $ul2 111 v$ul2 2001:db8:10::2/64
+ tunnel_create g2a ip6gre 2001:db8:3::2 2001:db8:3::1 tos inherit \
+ ttl inherit dev dummy2 "$@"
+ ip link set dev g2a master v$ol2
+
+ # Replace neighbor to avoid 1 dropped packet due to "unresolved neigh"
+ ip neigh replace dev $ol2 203.0.113.1 lladdr $(mac_get $h2)
+ ip -6 neigh replace dev $ol2 2001:db8:2::1 lladdr $(mac_get $h2)
+
+ ip -6 route add vrf v$ul2 2001:db8:3::1/128 via 2001:db8:10::1
+ ip route add vrf v$ol2 198.51.100.0/24 dev g2a
+ ip -6 route add vrf v$ol2 2001:db8:1::/64 dev g2a
+}
+
+sw2_hierarchical_destroy()
+{
+ local ol2=$1; shift
+ local ul2=$1; shift
+
+ ip -6 route del vrf v$ol2 2001:db8:2::/64
+ ip route del vrf v$ol2 198.51.100.0/24
+ ip -6 route del vrf v$ul2 2001:db8:3::1/128
+
+ tunnel_destroy g2a
+ vlan_destroy $ul2 111
+
+ __simple_if_fini dummy2 2001:db8:3::2/64
+ ip link del dev dummy2
+
+ simple_if_fini $ul2
+ simple_if_fini $ol2 203.0.113.2/24 2001:db8:2::2/64
+}
+
+test_traffic_ip4ip6()
+{
+ RET=0
+
+ h1mac=$(mac_get $h1)
+ ol1mac=$(mac_get $ol1)
+
+ tc qdisc add dev $ul1 clsact
+ tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+ flower $TC_FLAG action pass
+
+ tc qdisc add dev $ol2 clsact
+ tc filter add dev $ol2 egress protocol ipv4 pref 1 handle 101 \
+ flower $TC_FLAG dst_ip 203.0.113.1 action pass
+
+ $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \
+ -B 203.0.113.1 -t ip -q -d $MZ_DELAY
+
+ # Check ports after encap and after decap.
+ tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+ check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+ tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+ check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+ log_test "$@"
+
+ tc filter del dev $ol2 egress protocol ipv4 pref 1 handle 101 flower
+ tc qdisc del dev $ol2 clsact
+ tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+ tc qdisc del dev $ul1 clsact
+}
+
+test_traffic_ip6ip6()
+{
+ RET=0
+
+ h1mac=$(mac_get $h1)
+ ol1mac=$(mac_get $ol1)
+
+ tc qdisc add dev $ul1 clsact
+ tc filter add dev $ul1 egress proto all pref 1 handle 101 \
+ flower $TC_FLAG action pass
+
+ tc qdisc add dev $ol2 clsact
+ tc filter add dev $ol2 egress protocol ipv6 pref 1 handle 101 \
+ flower $TC_FLAG dst_ip 2001:db8:2::1 action pass
+
+ $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \
+ -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY
+
+ # Check ports after encap and after decap.
+ tc_check_at_least_x_packets "dev $ul1 egress" 101 1000
+ check_err $? "Packets did not go through $ul1, tc_flag = $TC_FLAG"
+
+ tc_check_at_least_x_packets "dev $ol2 egress" 101 1000
+ check_err $? "Packets did not go through $ol2, tc_flag = $TC_FLAG"
+
+ log_test "$@"
+
+ tc filter del dev $ol2 egress protocol ipv6 pref 1 handle 101 flower
+ tc qdisc del dev $ol2 clsact
+ tc filter del dev $ul1 egress proto all pref 1 handle 101 flower
+ tc qdisc del dev $ul1 clsact
+}
+
+topo_mtu_change()
+{
+ local mtu=$1
+
+ ip link set mtu $mtu dev $h1
+ ip link set mtu $mtu dev $ol1
+ ip link set mtu $mtu dev g1a
+ ip link set mtu $mtu dev $ul1
+ ip link set mtu $mtu dev $ul1.111
+ ip link set mtu $mtu dev $h2
+ ip link set mtu $mtu dev $ol2
+ ip link set mtu $mtu dev g2a
+ ip link set mtu $mtu dev $ul2
+ ip link set mtu $mtu dev $ul2.111
+}
+
+test_mtu_change()
+{
+ RET=0
+
+ ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+ check_fail $? "ping GRE IPv6 should not pass with packet size 1800"
+
+ RET=0
+
+ topo_mtu_change 2000
+ ping6_do $h1 2001:db8:2::1 "-s 1800 -w 3"
+ check_err $?
+ log_test "ping GRE IPv6, packet size 1800 after MTU change"
+}
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 977fc2b326a2..e579c2e0c462 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -8,6 +8,7 @@
PING=${PING:=ping}
PING6=${PING6:=ping6}
MZ=${MZ:=mausezahn}
+MZ_DELAY=${MZ_DELAY:=0}
ARPING=${ARPING:=arping}
TEAMD=${TEAMD:=teamd}
WAIT_TIME=${WAIT_TIME:=5}
@@ -17,18 +18,51 @@ NETIF_TYPE=${NETIF_TYPE:=veth}
NETIF_CREATE=${NETIF_CREATE:=yes}
MCD=${MCD:=smcrouted}
MC_CLI=${MC_CLI:=smcroutectl}
+PING_COUNT=${PING_COUNT:=10}
PING_TIMEOUT=${PING_TIMEOUT:=5}
WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600}
-
-relative_path="${BASH_SOURCE%/*}"
-if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then
- relative_path="."
+LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000}
+REQUIRE_JQ=${REQUIRE_JQ:=yes}
+REQUIRE_MZ=${REQUIRE_MZ:=yes}
+REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no}
+STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no}
+TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=}
+TROUTE6=${TROUTE6:=traceroute6}
+
+net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+if [[ -f $net_forwarding_dir/forwarding.config ]]; then
+ source "$net_forwarding_dir/forwarding.config"
fi
-if [[ -f $relative_path/forwarding.config ]]; then
- source "$relative_path/forwarding.config"
-fi
+source "$net_forwarding_dir/../lib.sh"
+
+# timeout in seconds
+slowwait()
+{
+ local timeout=$1; shift
+
+ local start_time="$(date -u +%s)"
+ while true
+ do
+ local out
+ out=$("$@")
+ local ret=$?
+ if ((!ret)); then
+ echo -n "$out"
+ return 0
+ fi
+
+ local current_time="$(date -u +%s)"
+ if ((current_time - start_time > timeout)); then
+ echo -n "$out"
+ return 1
+ fi
+
+ sleep 0.1
+ done
+}
##############################################################################
# Sanity checks
@@ -38,7 +72,48 @@ check_tc_version()
tc -j &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing JSON support"
- exit 1
+ exit $ksft_skip
+ fi
+}
+
+# Old versions of tc don't understand "mpls_uc"
+check_tc_mpls_support()
+{
+ local dev=$1; shift
+
+ tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+ matchall action pipe &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing MPLS support"
+ return $ksft_skip
+ fi
+ tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+ matchall
+}
+
+# Old versions of tc produce invalid json output for mpls lse statistics
+check_tc_mpls_lse_stats()
+{
+ local dev=$1; shift
+ local ret;
+
+ tc filter add dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+ flower mpls lse depth 2 \
+ action continue &> /dev/null
+
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc-flower is missing extended MPLS support"
+ return $ksft_skip
+ fi
+
+ tc -j filter show dev $dev ingress protocol mpls_uc | jq . &> /dev/null
+ ret=$?
+ tc filter del dev $dev ingress protocol mpls_uc pref 1 handle 1 \
+ flower
+
+ if [[ $ret -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc-flower produces invalid json output for extended MPLS filters"
+ return $ksft_skip
fi
}
@@ -47,7 +122,7 @@ check_tc_shblock_support()
tc filter help 2>&1 | grep block &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing shared block support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -56,7 +131,7 @@ check_tc_chain_support()
tc help 2>&1|grep chain &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing chain support"
- exit 1
+ exit $ksft_skip
fi
}
@@ -65,13 +140,85 @@ check_tc_action_hw_stats_support()
tc actions help 2>&1 | grep -q hw_stats
if [[ $? -ne 0 ]]; then
echo "SKIP: iproute2 too old; tc is missing action hw_stats support"
- exit 1
+ exit $ksft_skip
+ fi
+}
+
+check_tc_fp_support()
+{
+ tc qdisc add dev lo mqprio help 2>&1 | grep -q "fp "
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: iproute2 too old; tc is missing frame preemption support"
+ exit $ksft_skip
+ fi
+}
+
+check_ethtool_lanes_support()
+{
+ ethtool --help 2>&1| grep lanes &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: ethtool too old; it is missing lanes support"
+ exit $ksft_skip
+ fi
+}
+
+check_ethtool_mm_support()
+{
+ ethtool --help 2>&1| grep -- '--show-mm' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: ethtool too old; it is missing MAC Merge layer support"
+ exit $ksft_skip
+ fi
+}
+
+check_ethtool_counter_group_support()
+{
+ ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null
+ if [[ $? -ne 0 ]]; then
+ echo "SKIP: ethtool too old; it is missing standard counter group support"
+ exit $ksft_skip
+ fi
+}
+
+check_ethtool_pmac_std_stats_support()
+{
+ local dev=$1; shift
+ local grp=$1; shift
+
+ [ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \
+ | jq ".[].\"$grp\" | length") ]
+}
+
+check_locked_port_support()
+{
+ if ! bridge -d link show | grep -q " locked"; then
+ echo "SKIP: iproute2 too old; Locked port feature not supported."
+ return $ksft_skip
+ fi
+}
+
+check_port_mab_support()
+{
+ if ! bridge -d link show | grep -q "mab"; then
+ echo "SKIP: iproute2 too old; MacAuth feature not supported."
+ return $ksft_skip
+ fi
+}
+
+skip_on_veth()
+{
+ local kind=$(ip -j -d link show dev ${NETIFS[p1]} |
+ jq -r '.[].linkinfo.info_kind')
+
+ if [[ $kind == veth ]]; then
+ echo "SKIP: Test cannot be run with veth pairs"
+ exit $ksft_skip
fi
}
if [[ "$(id -u)" -ne 0 ]]; then
echo "SKIP: need root privileges"
- exit 0
+ exit $ksft_skip
fi
if [[ "$CHECK_TC" = "yes" ]]; then
@@ -84,16 +231,26 @@ require_command()
if [[ ! -x "$(command -v "$cmd")" ]]; then
echo "SKIP: $cmd not installed"
- exit 1
+ exit $ksft_skip
fi
}
-require_command jq
-require_command $MZ
+if [[ "$REQUIRE_JQ" = "yes" ]]; then
+ require_command jq
+fi
+if [[ "$REQUIRE_MZ" = "yes" ]]; then
+ require_command $MZ
+fi
+if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
+ # https://github.com/vladimiroltean/mtools/
+ # patched for IPv6 support
+ require_command msend
+ require_command mreceive
+fi
if [[ ! -v NUM_NETIFS ]]; then
echo "SKIP: importer does not define \"NUM_NETIFS\""
- exit 1
+ exit $ksft_skip
fi
##############################################################################
@@ -121,6 +278,11 @@ create_netif_veth()
for ((i = 1; i <= NUM_NETIFS; ++i)); do
local j=$((i+1))
+ if [ -z ${NETIFS[p$i]} ]; then
+ echo "SKIP: Cannot create interface. Name not specified"
+ exit $ksft_skip
+ fi
+
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
ip link add ${NETIFS[p$i]} type veth \
@@ -145,15 +307,46 @@ create_netif()
esac
}
+declare -A MAC_ADDR_ORIG
+mac_addr_prepare()
+{
+ local new_addr=
+ local dev=
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ dev=${NETIFS[p$i]}
+ new_addr=$(printf "00:01:02:03:04:%02x" $i)
+
+ MAC_ADDR_ORIG["$dev"]=$(ip -j link show dev $dev | jq -e '.[].address')
+ # Strip quotes
+ MAC_ADDR_ORIG["$dev"]=${MAC_ADDR_ORIG["$dev"]//\"/}
+ ip link set dev $dev address $new_addr
+ done
+}
+
+mac_addr_restore()
+{
+ local dev=
+
+ for ((i = 1; i <= NUM_NETIFS; ++i)); do
+ dev=${NETIFS[p$i]}
+ ip link set dev $dev address ${MAC_ADDR_ORIG["$dev"]}
+ done
+}
+
if [[ "$NETIF_CREATE" = "yes" ]]; then
create_netif
fi
+if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+ mac_addr_prepare
+fi
+
for ((i = 1; i <= NUM_NETIFS; ++i)); do
ip link show dev ${NETIFS[p$i]} &> /dev/null
if [[ $? -ne 0 ]]; then
echo "SKIP: could not find all required interfaces"
- exit 1
+ exit $ksft_skip
fi
done
@@ -227,6 +420,15 @@ log_test()
return 0
}
+log_test_skip()
+{
+ local test_name=$1
+ local opt_str=$2
+
+ printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str"
+ return 0
+}
+
log_info()
{
local msg=$1
@@ -234,33 +436,24 @@ log_info()
echo "INFO: $msg"
}
-busywait()
+not()
{
- local timeout=$1; shift
+ "$@"
+ [[ $? != 0 ]]
+}
- local start_time="$(date -u +%s%3N)"
- while true
- do
- local out
- out=$("$@")
- local ret=$?
- if ((!ret)); then
- echo -n "$out"
- return 0
- fi
+get_max()
+{
+ local arr=("$@")
- local current_time="$(date -u +%s%3N)"
- if ((current_time - start_time > timeout)); then
- echo -n "$out"
- return 1
+ max=${arr[0]}
+ for cur in ${arr[@]}; do
+ if [[ $cur -gt $max ]]; then
+ max=$cur
fi
done
-}
-not()
-{
- "$@"
- [[ $? != 0 ]]
+ echo $max
}
grep_bridge_fdb()
@@ -279,11 +472,21 @@ grep_bridge_fdb()
$@ | grep $addr | grep $flag "$word"
}
+wait_for_port_up()
+{
+ "$@" | grep -q "Link detected: yes"
+}
+
wait_for_offload()
{
"$@" | grep -q offload
}
+wait_for_trap()
+{
+ "$@" | grep -q trap
+}
+
until_counter_is()
{
local expr=$1; shift
@@ -302,6 +505,15 @@ busywait_for_counter()
busywait "$timeout" until_counter_is ">= $((base + delta))" "$@"
}
+slowwait_for_counter()
+{
+ local timeout=$1; shift
+ local delta=$1; shift
+
+ local base=$("$@")
+ slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@"
+}
+
setup_wait_dev()
{
local dev=$1; shift
@@ -374,33 +586,16 @@ cmd_jq()
[ ! -z "$output" ]
}
-lldpad_app_wait_set()
-{
- local dev=$1; shift
-
- while lldptool -t -i $dev -V APP -c app | grep -Eq "pending|unknown"; do
- echo "$dev: waiting for lldpad to push pending APP updates"
- sleep 5
- done
-}
-
-lldpad_app_wait_del()
-{
- # Give lldpad a chance to push down the changes. If the device is downed
- # too soon, the updates will be left pending. However, they will have
- # been struck off the lldpad's DB already, so we won't be able to tell
- # they are pending. Then on next test iteration this would cause
- # weirdness as newly-added APP rules conflict with the old ones,
- # sometimes getting stuck in an "unknown" state.
- sleep 5
-}
-
pre_cleanup()
{
if [ "${PAUSE_ON_CLEANUP}" = "yes" ]; then
echo "Pausing before cleanup, hit any key to continue"
read
fi
+
+ if [[ "$STABLE_MAC_ADDRS" = "yes" ]]; then
+ mac_addr_restore
+ fi
}
vrf_prepare()
@@ -640,8 +835,9 @@ tc_rule_handle_stats_get()
local id=$1; shift
local handle=$1; shift
local selector=${1:-.packets}; shift
+ local netns=${1:-""}; shift
- tc -j -s filter show $id \
+ tc $netns -j -s filter show $id \
| jq ".[] | select(.options.handle == $handle) | \
.options.actions[0].stats$selector"
}
@@ -654,6 +850,17 @@ ethtool_stats_get()
ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2
}
+ethtool_std_stats_get()
+{
+ local dev=$1; shift
+ local grp=$1; shift
+ local name=$1; shift
+ local src=$1; shift
+
+ ethtool --json -S $dev --groups $grp -- --src $src | \
+ jq '.[]."'"$grp"'"."'$name'"'
+}
+
qdisc_stats_get()
{
local dev=$1; shift
@@ -674,6 +881,52 @@ qdisc_parent_stats_get()
| jq '.[] | select(.parent == "'"$parent"'") | '"$selector"
}
+ipv6_stats_get()
+{
+ local dev=$1; shift
+ local stat=$1; shift
+
+ cat /proc/net/dev_snmp6/$dev | grep "^$stat" | cut -f2
+}
+
+hw_stats_get()
+{
+ local suite=$1; shift
+ local if_name=$1; shift
+ local dir=$1; shift
+ local stat=$1; shift
+
+ ip -j stats show dev $if_name group offload subgroup $suite |
+ jq ".[0].stats64.$dir.$stat"
+}
+
+__nh_stats_get()
+{
+ local key=$1; shift
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ ip -j -s -s nexthop show id $group_id |
+ jq --argjson member_id "$member_id" --arg key "$key" \
+ '.[].group_stats[] | select(.id == $member_id) | .[$key]'
+}
+
+nh_stats_get()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets "$group_id" "$member_id"
+}
+
+nh_stats_get_hw()
+{
+ local group_id=$1; shift
+ local member_id=$1; shift
+
+ __nh_stats_get packets_hw "$group_id" "$member_id"
+}
+
humanize()
{
local speed=$1; shift
@@ -698,6 +951,15 @@ rate()
echo $((8 * (t1 - t0) / interval))
}
+packets_rate()
+{
+ local t0=$1; shift
+ local t1=$1; shift
+ local interval=$1; shift
+
+ echo $(((t1 - t0) / interval))
+}
+
mac_get()
{
local if_name=$1
@@ -705,6 +967,15 @@ mac_get()
ip -j link show dev $if_name | jq -r '.[]["address"]'
}
+ipv6_lladdr_get()
+{
+ local if_name=$1
+
+ ip -j addr show dev $if_name | \
+ jq -r '.[]["addr_info"][] | select(.scope == "link").local' | \
+ head -1
+}
+
bridge_ageing_time_get()
{
local bridge=$1
@@ -723,14 +994,14 @@ sysctl_set()
local value=$1; shift
SYSCTL_ORIG[$key]=$(sysctl -n $key)
- sysctl -qw $key=$value
+ sysctl -qw $key="$value"
}
sysctl_restore()
{
local key=$1; shift
- sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+ sysctl -qw $key="${SYSCTL_ORIG[$key]}"
}
forwarding_enable()
@@ -1002,7 +1273,8 @@ ping_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+ $PING $args $dip -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT &> /dev/null
}
ping_test()
@@ -1014,6 +1286,15 @@ ping_test()
log_test "ping$3"
}
+ping_test_fails()
+{
+ RET=0
+
+ ping_do $1 $2
+ check_fail $?
+ log_test "ping fails$3"
+}
+
ping6_do()
{
local if_name=$1
@@ -1023,7 +1304,8 @@ ping6_do()
vrf_name=$(master_name_get $if_name)
ip vrf exec $vrf_name \
- $PING6 $args $dip -c 10 -i 0.1 -w $PING_TIMEOUT &> /dev/null
+ $PING6 $args $dip -c $PING_COUNT -i 0.1 \
+ -w $PING_TIMEOUT &> /dev/null
}
ping6_test()
@@ -1035,6 +1317,15 @@ ping6_test()
log_test "ping6$3"
}
+ping6_test_fails()
+{
+ RET=0
+
+ ping6_do $1 $2
+ check_fail $?
+ log_test "ping6 fails$3"
+}
+
learning_test()
{
local bridge=$1
@@ -1055,6 +1346,7 @@ learning_test()
# FDB entry was installed.
bridge link set dev $br_port1 flood off
+ ip link set $host1_if promisc on
tc qdisc add dev $host1_if ingress
tc filter add dev $host1_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
@@ -1065,7 +1357,7 @@ learning_test()
tc -j -s filter show dev $host1_if ingress \
| jq -e ".[] | select(.options.handle == 101) \
| select(.options.actions[0].stats.packets == 1)" &> /dev/null
- check_fail $? "Packet reached second host when should not"
+ check_fail $? "Packet reached first host when should not"
$MZ $host1_if -c 1 -p 64 -a $mac -t ip -q
sleep 1
@@ -1104,6 +1396,7 @@ learning_test()
tc filter del dev $host1_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host1_if ingress
+ ip link set $host1_if promisc off
bridge link set dev $br_port1 flood on
@@ -1121,6 +1414,7 @@ flood_test_do()
# Add an ACL on `host2_if` which will tell us whether the packet
# was flooded to it or not.
+ ip link set $host2_if promisc on
tc qdisc add dev $host2_if ingress
tc filter add dev $host2_if ingress protocol ip pref 1 handle 101 \
flower dst_mac $mac action drop
@@ -1138,6 +1432,7 @@ flood_test_do()
tc filter del dev $host2_if ingress protocol ip pref 1 handle 101 flower
tc qdisc del dev $host2_if ingress
+ ip link set $host2_if promisc off
return $err
}
@@ -1201,25 +1496,40 @@ flood_test()
__start_traffic()
{
+ local pktsize=$1; shift
local proto=$1; shift
local h_in=$1; shift # Where the traffic egresses the host
local sip=$1; shift
local dip=$1; shift
local dmac=$1; shift
- $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \
+ $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \
-a own -b $dmac -t "$proto" -q "$@" &
sleep 1
}
+start_traffic_pktsize()
+{
+ local pktsize=$1; shift
+
+ __start_traffic $pktsize udp "$@"
+}
+
+start_tcp_traffic_pktsize()
+{
+ local pktsize=$1; shift
+
+ __start_traffic $pktsize tcp "$@"
+}
+
start_traffic()
{
- __start_traffic udp "$@"
+ start_traffic_pktsize 8000 "$@"
}
start_tcp_traffic()
{
- __start_traffic tcp "$@"
+ start_tcp_traffic_pktsize 8000 "$@"
}
stop_traffic()
@@ -1227,3 +1537,510 @@ stop_traffic()
# Suppress noise from killing mausezahn.
{ kill %% && wait %%; } 2>/dev/null
}
+
+declare -A cappid
+declare -A capfile
+declare -A capout
+
+tcpdump_start()
+{
+ local if_name=$1; shift
+ local ns=$1; shift
+
+ capfile[$if_name]=$(mktemp)
+ capout[$if_name]=$(mktemp)
+
+ if [ -z $ns ]; then
+ ns_cmd=""
+ else
+ ns_cmd="ip netns exec ${ns}"
+ fi
+
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ $ns_cmd tcpdump $TCPDUMP_EXTRA_FLAGS -e -n -Q in -i $if_name \
+ -s 65535 -B 32768 $capuser -w ${capfile[$if_name]} \
+ > "${capout[$if_name]}" 2>&1 &
+ cappid[$if_name]=$!
+
+ sleep 1
+}
+
+tcpdump_stop()
+{
+ local if_name=$1
+ local pid=${cappid[$if_name]}
+
+ $ns_cmd kill "$pid" && wait "$pid"
+ sleep 1
+}
+
+tcpdump_cleanup()
+{
+ local if_name=$1
+
+ rm ${capfile[$if_name]} ${capout[$if_name]}
+}
+
+tcpdump_show()
+{
+ local if_name=$1
+
+ tcpdump -e -n -r ${capfile[$if_name]} 2>&1
+}
+
+# return 0 if the packet wasn't seen on host2_if or 1 if it was
+mcast_packet_test()
+{
+ local mac=$1
+ local src_ip=$2
+ local ip=$3
+ local host1_if=$4
+ local host2_if=$5
+ local seen=0
+ local tc_proto="ip"
+ local mz_v6arg=""
+
+ # basic check to see if we were passed an IPv4 address, if not assume IPv6
+ if [[ ! $ip =~ ^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$ ]]; then
+ tc_proto="ipv6"
+ mz_v6arg="-6"
+ fi
+
+ # Add an ACL on `host2_if` which will tell us whether the packet
+ # was received by it or not.
+ tc qdisc add dev $host2_if ingress
+ tc filter add dev $host2_if ingress protocol $tc_proto pref 1 handle 101 \
+ flower ip_proto udp dst_mac $mac action drop
+
+ $MZ $host1_if $mz_v6arg -c 1 -p 64 -b $mac -A $src_ip -B $ip -t udp "dp=4096,sp=2048" -q
+ sleep 1
+
+ tc -j -s filter show dev $host2_if ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ if [[ $? -eq 0 ]]; then
+ seen=1
+ fi
+
+ tc filter del dev $host2_if ingress protocol $tc_proto pref 1 handle 101 flower
+ tc qdisc del dev $host2_if ingress
+
+ return $seen
+}
+
+brmcast_check_sg_entries()
+{
+ local report=$1; shift
+ local slist=("$@")
+ local sarg=""
+
+ for src in "${slist[@]}"; do
+ sarg="${sarg} and .source_list[].address == \"$src\""
+ done
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null $sarg)" &>/dev/null
+ check_err $? "Wrong *,G entry source list after $report report"
+
+ for sgent in "${slist[@]}"; do
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"$sgent\")" &>/dev/null
+ check_err $? "Missing S,G entry ($sgent, $TEST_GROUP)"
+ done
+}
+
+brmcast_check_sg_fwding()
+{
+ local should_fwd=$1; shift
+ local sources=("$@")
+
+ for src in "${sources[@]}"; do
+ local retval=0
+
+ mcast_packet_test $TEST_GROUP_MAC $src $TEST_GROUP $h2 $h1
+ retval=$?
+ if [ $should_fwd -eq 1 ]; then
+ check_fail $retval "Didn't forward traffic from S,G ($src, $TEST_GROUP)"
+ else
+ check_err $retval "Forwarded traffic for blocked S,G ($src, $TEST_GROUP)"
+ fi
+ done
+}
+
+brmcast_check_sg_state()
+{
+ local is_blocked=$1; shift
+ local sources=("$@")
+ local should_fail=1
+
+ if [ $is_blocked -eq 1 ]; then
+ should_fail=0
+ fi
+
+ for src in "${sources[@]}"; do
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .source_list != null) |
+ .source_list[] |
+ select(.address == \"$src\") |
+ select(.timer == \"0.00\")" &>/dev/null
+ check_err_fail $should_fail $? "Entry $src has zero timer"
+
+ bridge -j -d -s mdb show dev br0 \
+ | jq -e ".[].mdb[] | \
+ select(.grp == \"$TEST_GROUP\" and .src == \"$src\" and \
+ .flags[] == \"blocked\")" &>/dev/null
+ check_err_fail $should_fail $? "Entry $src has blocked flag"
+ done
+}
+
+mc_join()
+{
+ local if_name=$1
+ local group=$2
+ local vrf_name=$(master_name_get $if_name)
+
+ # We don't care about actual reception, just about joining the
+ # IP multicast group and adding the L2 address to the device's
+ # MAC filtering table
+ ip vrf exec $vrf_name \
+ mreceive -g $group -I $if_name > /dev/null 2>&1 &
+ mreceive_pid=$!
+
+ sleep 1
+}
+
+mc_leave()
+{
+ kill "$mreceive_pid" && wait "$mreceive_pid"
+}
+
+mc_send()
+{
+ local if_name=$1
+ local groups=$2
+ local vrf_name=$(master_name_get $if_name)
+
+ ip vrf exec $vrf_name \
+ msend -g $groups -I $if_name -c 1 > /dev/null 2>&1
+}
+
+start_ip_monitor()
+{
+ local mtype=$1; shift
+ local ip=${1-ip}; shift
+
+ # start the monitor in the background
+ tmpfile=`mktemp /var/run/nexthoptestXXX`
+ mpid=`($ip monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+ sleep 0.2
+ echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+ local mpid=$1; shift
+ local tmpfile=$1; shift
+ local el=$1; shift
+ local what=$1; shift
+
+ sleep 0.2
+ kill $mpid
+ local lines=`grep '^\w' $tmpfile | wc -l`
+ test $lines -eq $el
+ check_err $? "$what: $lines lines of events, expected $el"
+ rm -rf $tmpfile
+}
+
+hw_stats_monitor_test()
+{
+ local dev=$1; shift
+ local type=$1; shift
+ local make_suitable=$1; shift
+ local make_unsuitable=$1; shift
+ local ip=${1-ip}; shift
+
+ RET=0
+
+ # Expect a notification about enablement.
+ local ipmout=$(start_ip_monitor stats "$ip")
+ $ip stats set dev $dev ${type}_stats on
+ stop_ip_monitor $ipmout 1 "${type}_stats enablement"
+
+ # Expect a notification about offload.
+ local ipmout=$(start_ip_monitor stats "$ip")
+ $make_suitable
+ stop_ip_monitor $ipmout 1 "${type}_stats installation"
+
+ # Expect a notification about loss of offload.
+ local ipmout=$(start_ip_monitor stats "$ip")
+ $make_unsuitable
+ stop_ip_monitor $ipmout 1 "${type}_stats deinstallation"
+
+ # Expect a notification about disablement
+ local ipmout=$(start_ip_monitor stats "$ip")
+ $ip stats set dev $dev ${type}_stats off
+ stop_ip_monitor $ipmout 1 "${type}_stats disablement"
+
+ log_test "${type}_stats notifications"
+}
+
+ipv4_to_bytes()
+{
+ local IP=$1; shift
+
+ printf '%02x:' ${IP//./ } |
+ sed 's/:$//'
+}
+
+# Convert a given IPv6 address, `IP' such that the :: token, if present, is
+# expanded, and each 16-bit group is padded with zeroes to be 4 hexadecimal
+# digits. An optional `BYTESEP' parameter can be given to further separate
+# individual bytes of each 16-bit group.
+expand_ipv6()
+{
+ local IP=$1; shift
+ local bytesep=$1; shift
+
+ local cvt_ip=${IP/::/_}
+ local colons=${cvt_ip//[^:]/}
+ local allcol=:::::::
+ # IP where :: -> the appropriate number of colons:
+ local allcol_ip=${cvt_ip/_/${allcol:${#colons}}}
+
+ echo $allcol_ip | tr : '\n' |
+ sed s/^/0000/ |
+ sed 's/.*\(..\)\(..\)/\1'"$bytesep"'\2/' |
+ tr '\n' : |
+ sed 's/:$//'
+}
+
+ipv6_to_bytes()
+{
+ local IP=$1; shift
+
+ expand_ipv6 "$IP" :
+}
+
+u16_to_bytes()
+{
+ local u16=$1; shift
+
+ printf "%04x" $u16 | sed 's/^/000/;s/^.*\(..\)\(..\)$/\1:\2/'
+}
+
+# Given a mausezahn-formatted payload (colon-separated bytes given as %02x),
+# possibly with a keyword CHECKSUM stashed where a 16-bit checksum should be,
+# calculate checksum as per RFC 1071, assuming the CHECKSUM field (if any)
+# stands for 00:00.
+payload_template_calc_checksum()
+{
+ local payload=$1; shift
+
+ (
+ # Set input radix.
+ echo "16i"
+ # Push zero for the initial checksum.
+ echo 0
+
+ # Pad the payload with a terminating 00: in case we get an odd
+ # number of bytes.
+ echo "${payload%:}:00:" |
+ sed 's/CHECKSUM/00:00/g' |
+ tr '[:lower:]' '[:upper:]' |
+ # Add the word to the checksum.
+ sed 's/\(..\):\(..\):/\1\2+\n/g' |
+ # Strip the extra odd byte we pushed if left unconverted.
+ sed 's/\(..\):$//'
+
+ echo "10000 ~ +" # Calculate and add carry.
+ echo "FFFF r - p" # Bit-flip and print.
+ ) |
+ dc |
+ tr '[:upper:]' '[:lower:]'
+}
+
+payload_template_expand_checksum()
+{
+ local payload=$1; shift
+ local checksum=$1; shift
+
+ local ckbytes=$(u16_to_bytes $checksum)
+
+ echo "$payload" | sed "s/CHECKSUM/$ckbytes/g"
+}
+
+payload_template_nbytes()
+{
+ local payload=$1; shift
+
+ payload_template_expand_checksum "${payload%:}" 0 |
+ sed 's/:/\n/g' | wc -l
+}
+
+igmpv3_is_in_get()
+{
+ local GRP=$1; shift
+ local sources=("$@")
+
+ local igmpv3
+ local nsources=$(u16_to_bytes ${#sources[@]})
+
+ # IS_IN ( $sources )
+ igmpv3=$(:
+ )"22:"$( : Type - Membership Report
+ )"00:"$( : Reserved
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Reserved
+ )"00:01:"$( : Number of Group Records
+ )"01:"$( : Record Type - IS_IN
+ )"00:"$( : Aux Data Len
+ )"${nsources}:"$( : Number of Sources
+ )"$(ipv4_to_bytes $GRP):"$( : Multicast Address
+ )"$(for src in "${sources[@]}"; do
+ ipv4_to_bytes $src
+ echo -n :
+ done)"$( : Source Addresses
+ )
+ local checksum=$(payload_template_calc_checksum "$igmpv3")
+
+ payload_template_expand_checksum "$igmpv3" $checksum
+}
+
+igmpv2_leave_get()
+{
+ local GRP=$1; shift
+
+ local payload=$(:
+ )"17:"$( : Type - Leave Group
+ )"00:"$( : Max Resp Time - not meaningful
+ )"CHECKSUM:"$( : Checksum
+ )"$(ipv4_to_bytes $GRP)"$( : Group Address
+ )
+ local checksum=$(payload_template_calc_checksum "$payload")
+
+ payload_template_expand_checksum "$payload" $checksum
+}
+
+mldv2_is_in_get()
+{
+ local SIP=$1; shift
+ local GRP=$1; shift
+ local sources=("$@")
+
+ local hbh
+ local icmpv6
+ local nsources=$(u16_to_bytes ${#sources[@]})
+
+ hbh=$(:
+ )"3a:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )
+
+ icmpv6=$(:
+ )"8f:"$( : Type - MLDv2 Report
+ )"00:"$( : Code
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Reserved
+ )"00:01:"$( : Number of Group Records
+ )"01:"$( : Record Type - IS_IN
+ )"00:"$( : Aux Data Len
+ )"${nsources}:"$( : Number of Sources
+ )"$(ipv6_to_bytes $GRP):"$( : Multicast address
+ )"$(for src in "${sources[@]}"; do
+ ipv6_to_bytes $src
+ echo -n :
+ done)"$( : Source Addresses
+ )
+
+ local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+ local sudohdr=$(:
+ )"$(ipv6_to_bytes $SIP):"$( : SIP
+ )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address
+ )"${len}:"$( : Upper-layer length
+ )"00:3a:"$( : Zero and next-header
+ )
+ local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+ payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
+
+mldv1_done_get()
+{
+ local SIP=$1; shift
+ local GRP=$1; shift
+
+ local hbh
+ local icmpv6
+
+ hbh=$(:
+ )"3a:"$( : Next Header - ICMPv6
+ )"00:"$( : Hdr Ext Len
+ )"00:00:00:00:00:00:"$( : Options and Padding
+ )
+
+ icmpv6=$(:
+ )"84:"$( : Type - MLDv1 Done
+ )"00:"$( : Code
+ )"CHECKSUM:"$( : Checksum
+ )"00:00:"$( : Max Resp Delay - not meaningful
+ )"00:00:"$( : Reserved
+ )"$(ipv6_to_bytes $GRP):"$( : Multicast address
+ )
+
+ local len=$(u16_to_bytes $(payload_template_nbytes $icmpv6))
+ local sudohdr=$(:
+ )"$(ipv6_to_bytes $SIP):"$( : SIP
+ )"$(ipv6_to_bytes $GRP):"$( : DIP is multicast address
+ )"${len}:"$( : Upper-layer length
+ )"00:3a:"$( : Zero and next-header
+ )
+ local checksum=$(payload_template_calc_checksum ${sudohdr}${icmpv6})
+
+ payload_template_expand_checksum "$hbh$icmpv6" $checksum
+}
+
+bail_on_lldpad()
+{
+ local reason1="$1"; shift
+ local reason2="$1"; shift
+
+ if systemctl is-active --quiet lldpad; then
+
+ cat >/dev/stderr <<-EOF
+ WARNING: lldpad is running
+
+ lldpad will likely $reason1, and this test will
+ $reason2. Both are not supported at the same time,
+ one of them is arbitrarily going to overwrite the
+ other. That will cause spurious failures (or, unlikely,
+ passes) of this test.
+ EOF
+
+ if [[ -z $ALLOW_LLDPAD ]]; then
+ cat >/dev/stderr <<-EOF
+
+ If you want to run the test anyway, please set
+ an environment variable ALLOW_LLDPAD to a
+ non-empty string.
+ EOF
+ exit 1
+ else
+ return
+ fi
+ fi
+}
+
+absval()
+{
+ local v=$1; shift
+
+ echo $((v > 0 ? v : -v))
+}
diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh
new file mode 100755
index 000000000000..c5b0cbc85b3e
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/local_termination.sh
@@ -0,0 +1,299 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone bridge"
+NUM_NETIFS=2
+PING_COUNT=1
+REQUIRE_MTOOLS=yes
+REQUIRE_MZ=no
+
+source lib.sh
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+BRIDGE_ADDR="00:00:de:ad:be:ee"
+MACVLAN_ADDR="00:00:de:ad:be:ef"
+UNKNOWN_UC_ADDR1="de:ad:be:ef:ee:03"
+UNKNOWN_UC_ADDR2="de:ad:be:ef:ee:04"
+UNKNOWN_UC_ADDR3="de:ad:be:ef:ee:05"
+JOINED_IPV4_MC_ADDR="225.1.2.3"
+UNKNOWN_IPV4_MC_ADDR1="225.1.2.4"
+UNKNOWN_IPV4_MC_ADDR2="225.1.2.5"
+UNKNOWN_IPV4_MC_ADDR3="225.1.2.6"
+JOINED_IPV6_MC_ADDR="ff2e::0102:0304"
+UNKNOWN_IPV6_MC_ADDR1="ff2e::0102:0305"
+UNKNOWN_IPV6_MC_ADDR2="ff2e::0102:0306"
+UNKNOWN_IPV6_MC_ADDR3="ff2e::0102:0307"
+
+JOINED_MACV4_MC_ADDR="01:00:5e:01:02:03"
+UNKNOWN_MACV4_MC_ADDR1="01:00:5e:01:02:04"
+UNKNOWN_MACV4_MC_ADDR2="01:00:5e:01:02:05"
+UNKNOWN_MACV4_MC_ADDR3="01:00:5e:01:02:06"
+JOINED_MACV6_MC_ADDR="33:33:01:02:03:04"
+UNKNOWN_MACV6_MC_ADDR1="33:33:01:02:03:05"
+UNKNOWN_MACV6_MC_ADDR2="33:33:01:02:03:06"
+UNKNOWN_MACV6_MC_ADDR3="33:33:01:02:03:07"
+
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# Disable promisc to ensure we don't receive unknown MAC DA packets
+export TCPDUMP_EXTRA_FLAGS="-pl"
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p2]}
+
+send_non_ip()
+{
+ local if_name=$1
+ local smac=$2
+ local dmac=$3
+
+ $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+ ping_do $if_name $H2_IPV4
+ ip neigh del $H2_IPV4 dev $if_name
+}
+
+check_rcv()
+{
+ local if_name=$1
+ local type=$2
+ local pattern=$3
+ local should_receive=$4
+ local should_fail=
+
+ [ $should_receive = true ] && should_fail=0 || should_fail=1
+ RET=0
+
+ tcpdump_show $if_name | grep -q "$pattern"
+
+ check_err_fail "$should_fail" "$?" "reception"
+
+ log_test "$if_name: $type"
+}
+
+mc_route_prepare()
+{
+ local if_name=$1
+ local vrf_name=$(master_name_get $if_name)
+
+ ip route add 225.100.1.0/24 dev $if_name vrf $vrf_name
+ ip -6 route add ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+mc_route_destroy()
+{
+ local if_name=$1
+ local vrf_name=$(master_name_get $if_name)
+
+ ip route del 225.100.1.0/24 dev $if_name vrf $vrf_name
+ ip -6 route del ff2e::/64 dev $if_name vrf $vrf_name
+}
+
+run_test()
+{
+ local rcv_if_name=$1
+ local smac=$(mac_get $h1)
+ local rcv_dmac=$(mac_get $rcv_if_name)
+
+ tcpdump_start $rcv_if_name
+
+ mc_route_prepare $h1
+ mc_route_prepare $rcv_if_name
+
+ send_uc_ipv4 $h1 $rcv_dmac
+ send_uc_ipv4 $h1 $MACVLAN_ADDR
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR1
+
+ ip link set dev $rcv_if_name promisc on
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR2
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR2
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR2
+ ip link set dev $rcv_if_name promisc off
+
+ mc_join $rcv_if_name $JOINED_IPV4_MC_ADDR
+ mc_send $h1 $JOINED_IPV4_MC_ADDR
+ mc_leave
+
+ mc_join $rcv_if_name $JOINED_IPV6_MC_ADDR
+ mc_send $h1 $JOINED_IPV6_MC_ADDR
+ mc_leave
+
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR1
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR1
+
+ ip link set dev $rcv_if_name allmulticast on
+ send_uc_ipv4 $h1 $UNKNOWN_UC_ADDR3
+ mc_send $h1 $UNKNOWN_IPV4_MC_ADDR3
+ mc_send $h1 $UNKNOWN_IPV6_MC_ADDR3
+ ip link set dev $rcv_if_name allmulticast off
+
+ mc_route_destroy $rcv_if_name
+ mc_route_destroy $h1
+
+ sleep 1
+
+ tcpdump_stop $rcv_if_name
+
+ check_rcv $rcv_if_name "Unicast IPv4 to primary MAC address" \
+ "$smac > $rcv_dmac, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to macvlan MAC address" \
+ "$smac > $MACVLAN_ADDR, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address" \
+ "$smac > $UNKNOWN_UC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, promisc" \
+ "$smac > $UNKNOWN_UC_ADDR2, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Unicast IPv4 to unknown MAC address, allmulti" \
+ "$smac > $UNKNOWN_UC_ADDR3, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv4 to joined group" \
+ "$smac > $JOINED_MACV4_MC_ADDR, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR1, ethertype IPv4 (0x0800)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group, promisc" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR2, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv4 to unknown group, allmulti" \
+ "$smac > $UNKNOWN_MACV4_MC_ADDR3, ethertype IPv4 (0x0800)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to joined group" \
+ "$smac > $JOINED_MACV6_MC_ADDR, ethertype IPv6 (0x86dd)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR1, ethertype IPv6 (0x86dd)" \
+ false
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group, promisc" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR2, ethertype IPv6 (0x86dd)" \
+ true
+
+ check_rcv $rcv_if_name "Multicast IPv6 to unknown group, allmulti" \
+ "$smac > $UNKNOWN_MACV6_MC_ADDR3, ethertype IPv6 (0x86dd)" \
+ true
+
+ tcpdump_cleanup $rcv_if_name
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_create()
+{
+ ip link add br0 type bridge
+ ip link set br0 address $BRIDGE_ADDR
+ ip link set br0 up
+
+ ip link set $h2 master br0
+ ip link set $h2 up
+
+ simple_if_init br0 $H2_IPV4/24 $H2_IPV6/64
+}
+
+bridge_destroy()
+{
+ simple_if_fini br0 $H2_IPV4/24 $H2_IPV6/64
+
+ ip link del br0
+}
+
+standalone()
+{
+ h1_create
+ h2_create
+
+ ip link add link $h2 name macvlan0 type macvlan mode private
+ ip link set macvlan0 address $MACVLAN_ADDR
+ ip link set macvlan0 up
+
+ run_test $h2
+
+ ip link del macvlan0
+
+ h2_destroy
+ h1_destroy
+}
+
+bridge()
+{
+ h1_create
+ bridge_create
+
+ ip link add link br0 name macvlan0 type macvlan mode private
+ ip link set macvlan0 address $MACVLAN_ADDR
+ ip link set macvlan0 up
+
+ run_test br0
+
+ ip link del macvlan0
+
+ bridge_destroy
+ h1_destroy
+}
+
+cleanup()
+{
+ pre_cleanup
+ vrf_cleanup
+}
+
+setup_prepare()
+{
+ vrf_prepare
+ # setup_wait() needs this
+ ip link set $h1 up
+ ip link set $h2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
index 360ca133bead..6c257ec03756 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -98,6 +98,7 @@ switch_create()
# Bridge between H1 and H2.
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
index c5095da7f6bf..04fd14b0a9b7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh
@@ -65,7 +65,8 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
- ip link add name br2 type bridge vlan_filtering 0
+ ip link add name br2 address $(mac_get $swp3) \
+ type bridge vlan_filtering 0
ip link set dev br2 up
ip link set dev $swp3 master br2
@@ -93,12 +94,16 @@ cleanup()
test_gretap()
{
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
index 197e769c2ed1..f35313c76fac 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -35,7 +35,8 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
- ip link add name br2 type bridge vlan_filtering 0
+ ip link add name br2 address $(mac_get $swp3) \
+ type bridge vlan_filtering 0
ip link set dev br2 up
vlan_create $swp3 555
@@ -80,17 +81,26 @@ test_gretap()
test_ip6gretap()
{
- test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \
"mirror to ip6gretap"
}
test_gretap_stp()
{
+ # Sometimes after mirror installation, the neighbor's state is not valid.
+ # The reason is that there is no SW datapath activity related to the
+ # neighbor for the remote GRE address. Therefore whether the corresponding
+ # neighbor will be valid is a matter of luck, and the test is thus racy.
+ # Set the neighbor's state to permanent, so it would be always valid.
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
}
test_ip6gretap_stp()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br2
full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
index a3402cd8d5b6..0cf4c47a46f9 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh
@@ -61,9 +61,12 @@ setup_prepare()
vrf_prepare
mirror_gre_topo_create
+ # Avoid changing br1's PVID while it is operational as a L3 interface.
+ ip link set dev br1 down
ip link set dev $swp3 master br1
bridge vlan add dev br1 vid 555 pvid untagged self
+ ip link set dev br1 up
ip address add dev br1 192.0.2.129/28
ip address add dev br1 2001:db8:2::1/64
@@ -87,12 +90,16 @@ cleanup()
test_gretap()
{
+ ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+ nud permanent dev br1
full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
}
test_ip6gretap()
{
+ ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+ nud permanent dev br1
full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index 28d568c48a73..c53148b1dc63 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -140,13 +140,15 @@ switch_create()
ip link set dev $swp3 up
ip link set dev $swp4 up
- ip link add name br1 type bridge vlan_filtering 1
- ip link set dev br1 up
- __addr_add_del br1 add 192.0.2.129/32
- ip -4 route add 192.0.2.130/32 dev br1
+ ip link add name br1 address $(mac_get $swp3) \
+ type bridge vlan_filtering 1
team_create lag loadbalance $swp3 $swp4
ip link set dev lag master br1
+
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.129/32
+ ip -4 route add 192.0.2.130/32 dev br1
}
switch_destroy()
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
index 472bd023e2a5..5ea9d63915f7 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -72,9 +72,10 @@ test_span_gre_ttl()
RET=0
- mirror_install $swp1 ingress $tundev "matchall $tcflags"
+ mirror_install $swp1 ingress $tundev \
+ "prot ip flower $tcflags ip_prot icmp"
tc filter add dev $h3 ingress pref 77 prot $prot \
- flower ip_ttl 50 action pass
+ flower skip_hw ip_ttl 50 action pass
mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
index fac486178ef7..0c36546e131e 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
-source "$relative_path/mirror_lib.sh"
+source "$net_forwarding_dir/mirror_lib.sh"
quick_test_span_gre_dir_ips()
{
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
index 39c03e2867f4..6e615fffa4ef 100644
--- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -33,7 +33,7 @@
# | |
# +-------------------------------------------------------------------------+
-source "$relative_path/mirror_topo_lib.sh"
+source "$net_forwarding_dir/mirror_topo_lib.sh"
mirror_gre_topo_h3_create()
{
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
index c02291e9841e..c8a9b5bd841f 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -141,7 +141,7 @@ test_gretap()
test_ip6gretap()
{
- test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ip' \
+ test_vlan_match gt6 'skip_hw vlan_id 555 vlan_ethtype ipv6' \
"mirror to ip6gretap"
}
@@ -218,6 +218,7 @@ test_ip6gretap_forbidden_egress()
test_span_gre_untagged_egress()
{
local tundev=$1; shift
+ local ul_proto=$1; shift
local what=$1; shift
RET=0
@@ -225,7 +226,7 @@ test_span_gre_untagged_egress()
mirror_install $swp1 ingress $tundev "matchall $tcflags"
quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress
+ quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
h3_addr_add_del del $h3.555
bridge vlan add dev $swp3 vid 555 pvid untagged
@@ -233,7 +234,7 @@ test_span_gre_untagged_egress()
sleep 5
quick_test_span_gre_dir $tundev ingress
- fail_test_span_vlan_dir $h3 555 ingress
+ fail_test_span_vlan_dir $h3 555 ingress "$ul_proto"
h3_addr_add_del del $h3
bridge vlan add dev $swp3 vid 555
@@ -241,7 +242,7 @@ test_span_gre_untagged_egress()
sleep 5
quick_test_span_gre_dir $tundev ingress
- quick_test_span_vlan_dir $h3 555 ingress
+ quick_test_span_vlan_dir $h3 555 ingress "$ul_proto"
mirror_uninstall $swp1 ingress
@@ -250,12 +251,12 @@ test_span_gre_untagged_egress()
test_gretap_untagged_egress()
{
- test_span_gre_untagged_egress gt4 "mirror to gretap"
+ test_span_gre_untagged_egress gt4 ip "mirror to gretap"
}
test_ip6gretap_untagged_egress()
{
- test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+ test_span_gre_untagged_egress gt6 ipv6 "mirror to ip6gretap"
}
test_span_gre_fdb_roaming()
@@ -271,7 +272,7 @@ test_span_gre_fdb_roaming()
while ((RET == 0)); do
bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null
- bridge fdb add dev $swp2 $h3mac vlan 555 master
+ bridge fdb add dev $swp2 $h3mac vlan 555 master static
sleep 1
fail_test_span_gre_dir $tundev ingress
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index c33bfd7ba214..3e8ebeff3019 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -20,6 +20,13 @@ mirror_uninstall()
tc filter del dev $swp1 $direction pref 1000
}
+is_ipv6()
+{
+ local addr=$1; shift
+
+ [[ -z ${addr//[0-9a-fA-F:]/} ]]
+}
+
mirror_test()
{
local vrf_name=$1; shift
@@ -29,9 +36,17 @@ mirror_test()
local pref=$1; shift
local expect=$1; shift
+ if is_ipv6 $dip; then
+ local proto=-6
+ local type="icmp6 type=128" # Echo request.
+ else
+ local proto=
+ local type="icmp echoreq"
+ fi
+
local t0=$(tc_rule_stats_get $dev $pref)
- $MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
- -c 10 -d 100ms -t icmp type=8
+ $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+ -c 10 -d 100msec -t $type
sleep 0.5
local t1=$(tc_rule_stats_get $dev $pref)
local delta=$((t1 - t0))
@@ -100,13 +115,14 @@ do_test_span_vlan_dir_ips()
local dev=$1; shift
local vid=$1; shift
local direction=$1; shift
+ local ul_proto=$1; shift
local ip1=$1; shift
local ip2=$1; shift
# Install the capture as skip_hw to avoid double-counting of packets.
# The traffic is meant for local box anyway, so will be trapped to
# kernel.
- vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype ip"
+ vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto"
mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
vlan_capture_uninstall $dev
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
index 04979e5962e7..bb1adbb7b98a 100644
--- a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -60,6 +60,7 @@ mirror_topo_switch_create()
ip link set dev $swp3 up
ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
ip link set dev br1 up
ip link set dev $swp1 master br1
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
index 9ab2ce77b332..0b44e148235e 100755
--- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -85,9 +85,9 @@ test_tagged_vlan_dir()
RET=0
mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
- do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+ do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \
192.0.2.17 192.0.2.18
- do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" \
+ do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \
192.0.2.17 192.0.2.18
mirror_uninstall $swp1 $direction
diff --git a/tools/testing/selftests/net/forwarding/no_forwarding.sh b/tools/testing/selftests/net/forwarding/no_forwarding.sh
new file mode 100755
index 000000000000..af3b398d13f0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/no_forwarding.sh
@@ -0,0 +1,261 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="standalone two_bridges one_bridge_two_pvids"
+NUM_NETIFS=4
+
+source lib.sh
+
+h1=${NETIFS[p1]}
+h2=${NETIFS[p3]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p4]}
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+IPV4_ALLNODES="224.0.0.1"
+IPV6_ALLNODES="ff02::1"
+MACV4_ALLNODES="01:00:5e:00:00:01"
+MACV6_ALLNODES="33:33:00:00:00:01"
+NON_IP_MC="01:02:03:04:05:06"
+NON_IP_PKT="00:04 48:45:4c:4f"
+BC="ff:ff:ff:ff:ff:ff"
+
+# The full 4K VLAN space is too much to check, so strategically pick some
+# values which should provide reasonable coverage
+vids=(0 1 2 5 10 20 50 100 200 500 1000 1000 2000 4000 4094)
+
+send_non_ip()
+{
+ local if_name=$1
+ local smac=$2
+ local dmac=$3
+
+ $MZ -q $if_name "$dmac $smac $NON_IP_PKT"
+}
+
+send_uc_ipv4()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip neigh add $H2_IPV4 lladdr $dmac dev $if_name
+ ping_do $if_name $H2_IPV4
+ ip neigh del $H2_IPV4 dev $if_name
+}
+
+send_mc_ipv4()
+{
+ local if_name=$1
+
+ ping_do $if_name $IPV4_ALLNODES "-I $if_name"
+}
+
+send_uc_ipv6()
+{
+ local if_name=$1
+ local dmac=$2
+
+ ip -6 neigh add $H2_IPV6 lladdr $dmac dev $if_name
+ ping6_do $if_name $H2_IPV6
+ ip -6 neigh del $H2_IPV6 dev $if_name
+}
+
+send_mc_ipv6()
+{
+ local if_name=$1
+
+ ping6_do $if_name $IPV6_ALLNODES%$if_name
+}
+
+check_rcv()
+{
+ local if_name=$1
+ local type=$2
+ local pattern=$3
+ local should_fail=1
+
+ RET=0
+
+ tcpdump_show $if_name | grep -q "$pattern"
+
+ check_err_fail "$should_fail" "$?" "reception"
+
+ log_test "$type"
+}
+
+run_test()
+{
+ local test_name="$1"
+ local smac=$(mac_get $h1)
+ local dmac=$(mac_get $h2)
+ local h1_ipv6_lladdr=$(ipv6_lladdr_get $h1)
+ local vid=
+
+ echo "$test_name: Sending packets"
+
+ tcpdump_start $h2
+
+ send_non_ip $h1 $smac $dmac
+ send_non_ip $h1 $smac $NON_IP_MC
+ send_non_ip $h1 $smac $BC
+ send_uc_ipv4 $h1 $dmac
+ send_mc_ipv4 $h1
+ send_uc_ipv6 $h1 $dmac
+ send_mc_ipv6 $h1
+
+ for vid in "${vids[@]}"; do
+ vlan_create $h1 $vid
+ simple_if_init $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+
+ send_non_ip $h1.$vid $smac $dmac
+ send_non_ip $h1.$vid $smac $NON_IP_MC
+ send_non_ip $h1.$vid $smac $BC
+ send_uc_ipv4 $h1.$vid $dmac
+ send_mc_ipv4 $h1.$vid
+ send_uc_ipv6 $h1.$vid $dmac
+ send_mc_ipv6 $h1.$vid
+
+ simple_if_fini $h1.$vid $H1_IPV4/24 $H1_IPV6/64
+ vlan_destroy $h1 $vid
+ done
+
+ sleep 1
+
+ echo "$test_name: Checking which packets were received"
+
+ tcpdump_stop $h2
+
+ check_rcv $h2 "$test_name: Unicast non-IP untagged" \
+ "$smac > $dmac, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Multicast non-IP untagged" \
+ "$smac > $NON_IP_MC, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Broadcast non-IP untagged" \
+ "$smac > $BC, 802.3, length 4:"
+
+ check_rcv $h2 "$test_name: Unicast IPv4 untagged" \
+ "$smac > $dmac, ethertype IPv4 (0x0800)"
+
+ check_rcv $h2 "$test_name: Multicast IPv4 untagged" \
+ "$smac > $MACV4_ALLNODES, ethertype IPv4 (0x0800).*: $H1_IPV4 > $IPV4_ALLNODES"
+
+ check_rcv $h2 "$test_name: Unicast IPv6 untagged" \
+ "$smac > $dmac, ethertype IPv6 (0x86dd).*8: $H1_IPV6 > $H2_IPV6"
+
+ check_rcv $h2 "$test_name: Multicast IPv6 untagged" \
+ "$smac > $MACV6_ALLNODES, ethertype IPv6 (0x86dd).*: $h1_ipv6_lladdr > $IPV6_ALLNODES"
+
+ for vid in "${vids[@]}"; do
+ check_rcv $h2 "$test_name: Unicast non-IP VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Multicast non-IP VID $vid" \
+ "$smac > $NON_IP_MC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Broadcast non-IP VID $vid" \
+ "$smac > $BC, ethertype 802.1Q (0x8100).*vlan $vid,.*length 4"
+
+ check_rcv $h2 "$test_name: Unicast IPv4 VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $H2_IPV4"
+
+ check_rcv $h2 "$test_name: Multicast IPv4 VID $vid" \
+ "$smac > $MACV4_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv4 (0x0800), $H1_IPV4 > $IPV4_ALLNODES"
+
+ check_rcv $h2 "$test_name: Unicast IPv6 VID $vid" \
+ "$smac > $dmac, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $H1_IPV6 > $H2_IPV6"
+
+ check_rcv $h2 "$test_name: Multicast IPv6 VID $vid" \
+ "$smac > $MACV6_ALLNODES, ethertype 802.1Q (0x8100).*vlan $vid,.*ethertype IPv6 (0x86dd), $h1_ipv6_lladdr > $IPV6_ALLNODES"
+ done
+
+ tcpdump_cleanup $h2
+}
+
+standalone()
+{
+ run_test "Standalone switch ports"
+}
+
+two_bridges()
+{
+ ip link add br0 type bridge && ip link set br0 up
+ ip link add br1 type bridge && ip link set br1 up
+ ip link set $swp1 master br0
+ ip link set $swp2 master br1
+
+ run_test "Switch ports in different bridges"
+
+ ip link del br1
+ ip link del br0
+}
+
+one_bridge_two_pvids()
+{
+ ip link add br0 type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set br0 up
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+ bridge vlan add dev $swp1 vid 2 pvid untagged
+
+ run_test "Switch ports in VLAN-aware bridge with different PVIDs"
+
+ ip link del br0
+}
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ # we call simple_if_init from the test itself, but setup_wait expects
+ # that we call it from here, and waits until the interfaces are up
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 55eeacf59241..af008fbf2725 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -60,7 +60,9 @@ h2_destroy()
switch_create()
{
- ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
@@ -75,7 +77,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/pedit_ip.sh b/tools/testing/selftests/net/forwarding/pedit_ip.sh
new file mode 100755
index 000000000000..d14efb2d23b2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_ip.sh
@@ -0,0 +1,201 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on
+# egress of $swp2, the traffic is acted upon by a pedit action. An ingress
+# filter installed on $h2 verifies that the packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ test_ip4_src
+ test_ip4_dst
+ test_ip6_src
+ test_ip6_dst
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_ip()
+{
+ local pedit_locus=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 -a own -b $h2mac -q -t ip
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_ip6()
+{
+ local locus=$1; shift
+ local pedit_addr=$1; shift
+ local flower_addr=$1; shift
+
+ do_test_pedit_ip "$locus" "$pedit_addr set 2001:db8:2::1" ipv6 \
+ "$flower_addr 2001:db8:2::1" \
+ "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+}
+
+do_test_pedit_ip4()
+{
+ local locus=$1; shift
+ local pedit_addr=$1; shift
+ local flower_addr=$1; shift
+
+ do_test_pedit_ip "$locus" "$pedit_addr set 198.51.100.1" ip \
+ "$flower_addr 198.51.100.1" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+}
+
+test_ip4_src()
+{
+ do_test_pedit_ip4 "dev $swp1 ingress" "ip src" src_ip
+ do_test_pedit_ip4 "dev $swp2 egress" "ip src" src_ip
+}
+
+test_ip4_dst()
+{
+ do_test_pedit_ip4 "dev $swp1 ingress" "ip dst" dst_ip
+ do_test_pedit_ip4 "dev $swp2 egress" "ip dst" dst_ip
+}
+
+test_ip6_src()
+{
+ do_test_pedit_ip6 "dev $swp1 ingress" "ip6 src" src_ip
+ do_test_pedit_ip6 "dev $swp2 egress" "ip6 src" src_ip
+}
+
+test_ip6_dst()
+{
+ do_test_pedit_ip6 "dev $swp1 ingress" "ip6 dst" dst_ip
+ do_test_pedit_ip6 "dev $swp2 egress" "ip6 dst" dst_ip
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/pedit_l4port.sh b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
new file mode 100755
index 000000000000..10e594c55117
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/pedit_l4port.sh
@@ -0,0 +1,200 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on egress of $swp2, the
+# traffic is acted upon by a pedit action. An ingress filter installed on $h2 verifies that the
+# packet looks like expected.
+#
+# +----------------------+ +----------------------+
+# | H1 | | H2 |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# +----|-----------------+ +----------------|-----+
+# | |
+# +----|----------------------------------------------------------------|-----+
+# | SW | | |
+# | +-|----------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +--------------------------------------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ test_udp_sport
+ test_udp_dport
+ test_tcp_sport
+ test_tcp_dport
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+: ${HIT_TIMEOUT:=2000} # ms
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge vlan_filtering 1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2
+}
+
+do_test_pedit_l4port_one()
+{
+ local pedit_locus=$1; shift
+ local pedit_prot=$1; shift
+ local pedit_action=$1; shift
+ local match_prot=$1; shift
+ local match_flower=$1; shift
+ local mz_flags=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+
+ tc filter add $pedit_locus handle 101 pref 1 \
+ flower action pedit ex munge $pedit_action
+ tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \
+ flower skip_hw $match_flower action pass
+
+ RET=0
+
+ $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \
+ -a own -b $h2mac -q -t $pedit_prot sp=54321,dp=12345
+
+ local pkts
+ pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
+ tc_rule_handle_stats_get "dev $h2 ingress" 101)
+ check_err $? "Expected to get 10 packets, but got $pkts."
+
+ pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+ ((pkts >= 10))
+ check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
+ log_test "$pedit_locus pedit $pedit_action"
+
+ tc filter del dev $h2 ingress pref 1
+ tc filter del $pedit_locus pref 1
+}
+
+do_test_pedit_l4port()
+{
+ local locus=$1; shift
+ local prot=$1; shift
+ local pedit_port=$1; shift
+ local flower_port=$1; shift
+ local port
+
+ for port in 1 11111 65535; do
+ do_test_pedit_l4port_one "$locus" "$prot" \
+ "$prot $pedit_port set $port" \
+ ip "ip_proto $prot $flower_port $port" \
+ "-A 192.0.2.1 -B 192.0.2.2"
+ done
+}
+
+test_udp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" udp sport src_port
+}
+
+test_udp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" udp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" udp dport dst_port
+}
+
+test_tcp_sport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp sport src_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp sport src_port
+}
+
+test_tcp_dport()
+{
+ do_test_pedit_l4port "dev $swp1 ingress" tcp dport dst_port
+ do_test_pedit_l4port "dev $swp2 egress" tcp dport dst_port
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni.sh b/tools/testing/selftests/net/forwarding/q_in_vni.sh
new file mode 100755
index 000000000000..798b13525c02
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni.sh
@@ -0,0 +1,348 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1ad) + $swp2 | |
+# | | vid 100 pvid untagged vid 100 pvid | |
+# | | untagged | |
+# | | + vx100 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | remote 192.0.2.34 192.0.2.50 | |
+# | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | 192.0.2.48/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 192.0.2.33/28 | 192.0.2.49/28 |
+# +----|---------------------------------------|----------------+
+# | |
+# +----|------------------------------+ +----|------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 192.0.2.34/28 | | 192.0.2.50/28 |
+# | | | |
+# | 192.0.2.16/28 via 192.0.2.33 | | 192.0.2.16/28 via 192.0.2.49 |
+# | 192.0.2.50/32 via 192.0.2.33 | | 192.0.2.34/32 via 192.0.2.49 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | |
+# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | |
+# | | local 192.0.2.34 | | | | local 192.0.2.50 | |
+# | | remote 192.0.2.17 | | | | remote 192.0.2.17 | |
+# | | remote 192.0.2.50 | | | | remote 192.0.2.34 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-----------------------------------+ +-----------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28
+ vlan_create $h1 20 v$h1 198.51.100.1/24
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 10 v$h2 192.0.2.2/28
+ vlan_create $h2 20 v$h2 198.51.100.2/24
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 192.0.2.17/28
+
+ ip route add 192.0.2.32/28 nexthop via 192.0.2.18
+ ip route add 192.0.2.48/28 nexthop via 192.0.2.18
+}
+
+rp1_unset_addr()
+{
+ ip route del 192.0.2.48/28 nexthop via 192.0.2.18
+ ip route del 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip address del dev $rp1 192.0.2.17/28
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0 mcast_snooping 0
+ ip link set dev br1 addrgenmode none
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ ip link add name vx100 type vxlan id 1000 \
+ local 192.0.2.17 dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 100 dev $swp2 pvid untagged
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.50 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 192.0.2.34 self
+
+ bridge vlan del vid 100 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ __simple_if_init v1 v$rp2 192.0.2.33/28
+ __simple_if_init v3 v$rp2 192.0.2.49/28
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 192.0.2.49/28
+ __simple_if_fini v1 192.0.2.33/28
+ simple_if_fini $rp2 192.0.2.18/28
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr1=$1; shift
+ local host_addr2=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/28
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+ bridge vlan add vid 100 dev w1 pvid untagged
+
+ ip link add name vx100 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT"
+ ip link set dev vx100 up
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 192.0.2.17 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx100 master br2
+ tc qdisc add dev vx100 clsact
+
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ simple_if_init w2
+ vlan_create w2 10 vw2 $host_addr1/28
+ vlan_create w2 20 vw2 $host_addr2/24
+
+ ip route add 192.0.2.16/28 nexthop via $nh_addr
+ ip route add $other_in_addr/32 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 192.0.2.34 192.0.2.50 192.0.2.33 \
+ 192.0.2.3 198.51.100.3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 192.0.2.50 192.0.2.34 192.0.2.49 \
+ 192.0.2.4 198.51.100.4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.2 ": local->local"
+ ping_test $h1 192.0.2.3 ": local->remote 1"
+ ping_test $h1 192.0.2.4 ": local->remote 2"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
new file mode 100755
index 000000000000..0548b2b0d416
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/q_in_vni_ipv6.sh
@@ -0,0 +1,347 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1ad) + $swp2 | |
+# | | vid 100 pvid untagged vid 100 pvid | |
+# | | untagged | |
+# | | + vx100 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1ad) | | | | BR2 (802.1ad) | |
+# | | + vx100 (vxlan) | | | | + vx100 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 100 pvid untagged | | | | vid 100 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 100 pvid untagged | | | | | vid 100 pvid untagged | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | VW2 (vrf) | | | | | VW2 (vrf) | |
+# | | + w2 (veth) | | | | + w2 (veth) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv6
+ "}
+
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 10 v$h2 2001:db8:1::2/64
+ vlan_create $h2 20 v$h2 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+
+ ip link add name vx100 type vxlan id 1000 \
+ local 2001:db8:3::1 dstport "$VXPORT" \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx100 up
+
+ ip link set dev vx100 master br1
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 100 dev $swp1 pvid untagged
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 100 dev $swp2 pvid untagged
+
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx100 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge vlan del vid 100 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 100 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx100 nomaster
+ ip link set dev vx100 down
+ ip link del dev vx100
+
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr1=$1; shift
+ local host_addr2=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 1 vlan_protocol 802.1ad \
+ vlan_default_pvid 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+ bridge vlan add vid 100 dev w1 pvid untagged
+
+ ip link add name vx100 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx100 up
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx100 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx100 master br2
+ tc qdisc add dev vx100 clsact
+
+ bridge vlan add vid 100 dev vx100 pvid untagged
+
+ simple_if_init w2
+ vlan_create w2 10 vw2 $host_addr1/64
+ vlan_create w2 20 vw2 $host_addr2/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 2001:db8:1::3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 2001:db8:1::4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:1::2 ": local->local"
+ ping6_test $h1 2001:db8:1::3 ": local->remote 1"
+ ping6_test $h1 2001:db8:1::4 ": local->remote 2"
+}
+
+test_all()
+{
+ echo "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index 057f91b05098..b98ea9449b8b 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1 + | | + $h2 |
+# | 192.0.2.2/24 | | | | 198.51.100.2/24 |
+# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# | |
+# +-----------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
diff --git a/tools/testing/selftests/net/forwarding/router_bridge.sh b/tools/testing/selftests/net/forwarding/router_bridge.sh
index ebc596a272f7..0182eb2abfa6 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge.sh
@@ -1,9 +1,39 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
+# | | | | | |
+# +----|-------------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|-----------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 |
+# | | 192.0.2.2/28 | 2001:db8:2::1/64 |
+# | | 2001:db8:1::1/64 | |
+# | | | |
+# | +--------------------------------+ |
+# +---------------------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
+ config_remaster
+ ping_ipv4
+ ping_ipv6
+ config_remove_pvid
+ ping_ipv4_fails
+ ping_ipv6_fails
+ config_add_pvid
+ ping_ipv4
+ ping_ipv6
+ config_late_pvid
+ ping_ipv4
+ ping_ipv6
"
NUM_NETIFS=4
source lib.sh
@@ -38,7 +68,8 @@ h2_destroy()
router_create()
{
- ip link add name br1 type bridge vlan_filtering 1
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1
ip link set dev br1 up
ip link set dev $swp1 master br1
@@ -61,6 +92,42 @@ router_destroy()
ip link del dev br1
}
+config_remaster()
+{
+ log_info "Remaster bridge slave"
+
+ ip link set dev $swp1 nomaster
+ sleep 2
+ ip link set dev $swp1 master br1
+}
+
+config_remove_pvid()
+{
+ log_info "Remove PVID from the bridge"
+
+ bridge vlan add dev br1 vid 1 self
+ sleep 2
+}
+
+config_add_pvid()
+{
+ log_info "Add PVID to the bridge"
+
+ bridge vlan add dev br1 vid 1 self pvid untagged
+ sleep 2
+}
+
+config_late_pvid()
+{
+ log_info "Add bridge PVID after enslaving port"
+
+ ip link set dev $swp1 nomaster
+ ip link set dev br1 type bridge vlan_default_pvid 0
+ sleep 2
+ ip link set dev $swp1 master br1
+ ip link set dev br1 type bridge vlan_default_pvid 1
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -103,6 +170,16 @@ ping_ipv6()
ping6_test $h1 2001:db8:2::2
}
+ping_ipv4_fails()
+{
+ ping_test_fails $h1 192.0.2.130
+}
+
+ping_ipv6_fails()
+{
+ ping6_test_fails $h1 2001:db8:2::2
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh
new file mode 100755
index 000000000000..6d51f2ca72a2
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d.sh
@@ -0,0 +1,185 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +---------------------------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.100 + $h1.200 | | + $h2 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 |
+# | \_________ __________/ | | | 2001:db8:2::2/64 |
+# | V | | | 2001:db8:4::2/64 |
+# | + $h1 | | | |
+# +--------------|------------------------------+ +--|-------------------+
+# | |
+# +--------------|----------------------------------------|-------------------+
+# | SW + $swp1 + $swp2 |
+# | | 192.0.2.129/28 |
+# | | 192.0.2.145/28 |
+# | | 2001:db8:2::1/64 |
+# | ________^___________________________ 2001:db8:4::1/64 |
+# | / \ |
+# | +---|------------------------------+ +---|------------------------------+ |
+# | | + $swp1.100 BR1 (802.1d) | | + $swp1.200 BR2 (802.1d) | |
+# | | 192.0.2.2/28 | | 192.0.2.18/28 | |
+# | | 2001:db8:1::2/64 | | 2001:db8:3::2/64 | |
+# | | | | | |
+# | +----------------------------------+ +----------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ config_remaster
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 100 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 200 v$h1 192.0.2.17/28 2001:db8:3::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+ ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:4::/64 vrf v$h1
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.144/28 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 200
+ vlan_destroy $h1 100
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \
+ 192.0.2.146/28 2001:db8:4::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+ ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:3::/64 vrf v$h2
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.16/28 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64 \
+ 192.0.2.146/28 2001:db8:4::2/64
+}
+
+router_create()
+{
+ ip link set dev $swp1 up
+
+ vlan_create $swp1 100
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 address $(mac_get $swp1.100)
+ ip link set dev $swp1.100 master br1
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev br1 up
+
+ vlan_create $swp1 200
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 address $(mac_get $swp1.200)
+ ip link set dev $swp1.200 master br2
+ __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64
+ ip link set dev br2 up
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64 \
+ 192.0.2.145/28 2001:db8:4::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64 \
+ 192.0.2.145/28 2001:db8:4::1/64
+ ip link set dev $swp2 down
+
+ __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64
+ ip link set dev $swp1.200 nomaster
+ ip link del dev br2
+ vlan_destroy $swp1 200
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link set dev $swp1.100 nomaster
+ ip link del dev br1
+ vlan_destroy $swp1 100
+
+ ip link set dev $swp1 down
+}
+
+config_remaster()
+{
+ log_info "Remaster bridge slaves"
+
+ ip link set dev $swp1.100 nomaster
+ ip link set dev $swp1.200 nomaster
+ sleep 2
+ ip link set dev $swp1.200 master br2
+ ip link set dev $swp1.100 master br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130 ": via 100"
+ ping_test $h1 192.0.2.146 ": via 200"
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2 ": via 100"
+ ping6_test $h1 2001:db8:4::2 ": via 200"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
new file mode 100755
index 000000000000..e064b946e821
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -0,0 +1,408 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------------------------------+
+# | H1 (vrf) |
+# | |
+# | + LAG1.100 + LAG1.200 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 |
+# | | 2001:db8:1::1/64 | 2001:db8:3:1/64 |
+# | \___________ _______/ |
+# | v |
+# | + LAG1 (team) |
+# | | |
+# | ____^____ |
+# | / \ |
+# | + $h1 + $h4 |
+# | | | |
+# +----------|-----------|---------------------+
+# | |
+# +----------|-----------|---------------------+
+# | SW | | |
+# | + $swp1 + $swp4 |
+# | \____ ____/ |
+# | v |
+# | LAG2 (team) + |
+# | | |
+# | _______^______________ |
+# | / \ |
+# | +------|------------+ +-------|----------+ |
+# | | + LAG2.100 | | + LAG2.200 | |
+# | | | | | |
+# | | BR1 (802.1d) | | BR2 (802.1d) | |
+# | | 192.0.2.2/28 | | 192.0.2.18/28 | |
+# | | 2001:db8:1::2/64 | | 2001:db8:3:2/64 | |
+# | | | | | |
+# | +-------------------+ +------------------+ |
+# | |
+# | + LAG3.100 + LAG3.200 |
+# | | 192.0.2.129/28 | 192.0.2.145/28 |
+# | | 2001:db8:2::1/64 | 2001:db8:4::1/64 |
+# | | | |
+# | \_________ ___________/ |
+# | v |
+# | + LAG3 (team) |
+# | ____|____ |
+# | / \ |
+# | + $swp2 + $swp3 |
+# | | | |
+# +-------|---------|--------------------------+
+# | |
+# +-------|---------|--------------------------+
+# | | | |
+# | + $h2 + $h3 |
+# | \____ ___/ |
+# | | |
+# | + LAG4 (team) |
+# | | |
+# | __________^__________ |
+# | / \ |
+# | | | |
+# | + LAG4.100 + LAG4.200 |
+# | 192.0.2.130/28 192.0.2.146/28 |
+# | 2001:db8:2::2/64 2001:db8:4::2/64 |
+# | |
+# | H2 (vrf) |
+# +--------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG2 slaves )
+ config_deslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_enslave_swp4
+ config_deslave_swp1
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_deslave_swp4
+ config_enslave_swp1
+ config_enslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG2 itself )
+ config_remaster_lag2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG3 slaves )
+ config_deslave_swp2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_enslave_swp2
+ config_deslave_swp3
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_deslave_swp2
+ config_enslave_swp3
+ config_enslave_swp2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=8
+source lib.sh
+
+h1_create()
+{
+ team_create lag1 lacp
+ ip link set dev lag1 addrgenmode none
+ ip link set dev lag1 address $(mac_get $h1)
+ ip link set dev $h1 master lag1
+ ip link set dev $h4 master lag1
+ simple_if_init lag1
+ ip link set dev $h1 up
+ ip link set dev $h4 up
+
+ vlan_create lag1 100 vlag1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create lag1 200 vlag1 192.0.2.17/28 2001:db8:3::1/64
+
+ ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2
+
+ ip -4 route add 192.0.2.144/28 vrf vlag1 nexthop via 192.0.2.18
+ ip -6 route add 2001:db8:4::/64 vrf vlag1 nexthop via 2001:db8:3::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:4::/64 vrf vlag1
+ ip -4 route del 192.0.2.144/28 vrf vlag1
+
+ ip -6 route del 2001:db8:2::/64 vrf vlag1
+ ip -4 route del 192.0.2.128/28 vrf vlag1
+
+ vlan_destroy lag1 200
+ vlan_destroy lag1 100
+
+ ip link set dev $h4 down
+ ip link set dev $h1 down
+ simple_if_fini lag1
+ ip link set dev $h4 nomaster
+ ip link set dev $h1 nomaster
+ team_destroy lag1
+}
+
+h2_create()
+{
+ team_create lag4 lacp
+ ip link set dev lag4 addrgenmode none
+ ip link set dev lag4 address $(mac_get $h2)
+ ip link set dev $h2 master lag4
+ ip link set dev $h3 master lag4
+ simple_if_init lag4
+ ip link set dev $h2 up
+ ip link set dev $h3 up
+
+ vlan_create lag4 100 vlag4 192.0.2.130/28 2001:db8:2::2/64
+ vlan_create lag4 200 vlag4 192.0.2.146/28 2001:db8:4::2/64
+
+ ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1
+
+ ip -4 route add 192.0.2.16/28 vrf vlag4 nexthop via 192.0.2.145
+ ip -6 route add 2001:db8:3::/64 vrf vlag4 nexthop via 2001:db8:4::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:3::/64 vrf vlag4
+ ip -4 route del 192.0.2.16/28 vrf vlag4
+
+ ip -6 route del 2001:db8:1::/64 vrf vlag4
+ ip -4 route del 192.0.2.0/28 vrf vlag4
+
+ vlan_destroy lag4 200
+ vlan_destroy lag4 100
+
+ ip link set dev $h3 down
+ ip link set dev $h2 down
+ simple_if_fini lag4
+ ip link set dev $h3 nomaster
+ ip link set dev $h2 nomaster
+ team_destroy lag4
+}
+
+router_create()
+{
+ team_create lag2 lacp
+ ip link set dev lag2 addrgenmode none
+ ip link set dev lag2 address $(mac_get $swp1)
+ ip link set dev $swp1 master lag2
+ ip link set dev $swp4 master lag2
+
+ vlan_create lag2 100
+ vlan_create lag2 200
+
+ ip link add name br1 type bridge vlan_filtering 0
+ ip link set dev br1 address $(mac_get lag2.100)
+ ip link set dev lag2.100 master br1
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 address $(mac_get lag2.200)
+ ip link set dev lag2.200 master br2
+
+ ip link set dev $swp1 up
+ ip link set dev $swp4 up
+ ip link set dev br1 up
+ ip link set dev br2 up
+
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+ __addr_add_del br2 add 192.0.2.18/28 2001:db8:3::2/64
+
+ team_create lag3 lacp
+ ip link set dev lag3 addrgenmode none
+ ip link set dev lag3 address $(mac_get $swp2)
+ ip link set dev $swp2 master lag3
+ ip link set dev $swp3 master lag3
+ ip link set dev $swp2 up
+ ip link set dev $swp3 up
+
+ vlan_create lag3 100
+ vlan_create lag3 200
+
+ __addr_add_del lag3.100 add 192.0.2.129/28 2001:db8:2::1/64
+ __addr_add_del lag3.200 add 192.0.2.145/28 2001:db8:4::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del lag3.200 del 192.0.2.145/28 2001:db8:4::1/64
+ __addr_add_del lag3.100 del 192.0.2.129/28 2001:db8:2::1/64
+
+ vlan_destroy lag3 200
+ vlan_destroy lag3 100
+
+ ip link set dev $swp3 down
+ ip link set dev $swp2 down
+ ip link set dev $swp3 nomaster
+ ip link set dev $swp2 nomaster
+ team_destroy lag3
+
+ __addr_add_del br2 del 192.0.2.18/28 2001:db8:3::2/64
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev br2 down
+ ip link set dev br1 down
+ ip link set dev $swp4 down
+ ip link set dev $swp1 down
+
+ ip link set dev lag2.200 nomaster
+ ip link del dev br2
+
+ ip link set dev lag2.100 nomaster
+ ip link del dev br1
+
+ vlan_destroy lag2 200
+ vlan_destroy lag2 100
+
+ ip link set dev $swp4 nomaster
+ ip link set dev $swp1 nomaster
+ team_destroy lag2
+}
+
+config_remaster_lag2()
+{
+ log_info "Remaster bridge slaves"
+
+ ip link set dev lag2.200 nomaster
+ ip link set dev lag2.100 nomaster
+ sleep 2
+ ip link set dev lag2.100 master br1
+ ip link set dev lag2.200 master br2
+}
+
+config_deslave()
+{
+ local netdev=$1; shift
+
+ log_info "Deslave $netdev"
+ ip link set dev $netdev down
+ ip link set dev $netdev nomaster
+ ip link set dev $netdev up
+}
+
+config_deslave_swp1()
+{
+ config_deslave $swp1
+}
+
+config_deslave_swp2()
+{
+ config_deslave $swp2
+}
+
+config_deslave_swp3()
+{
+ config_deslave $swp3
+}
+
+config_deslave_swp4()
+{
+ config_deslave $swp4
+}
+
+config_enslave()
+{
+ local netdev=$1; shift
+ local master=$1; shift
+
+ log_info "Enslave $netdev to $master"
+ ip link set dev $netdev down
+ ip link set dev $netdev master $master
+ ip link set dev $netdev up
+}
+
+config_enslave_swp1()
+{
+ config_enslave $swp1 lag2
+}
+
+config_enslave_swp2()
+{
+ config_enslave $swp2 lag3
+}
+
+config_enslave_swp3()
+{
+ config_enslave $swp3 lag3
+}
+
+config_enslave_swp4()
+{
+ config_enslave $swp4 lag2
+}
+
+config_wait()
+{
+ setup_wait_dev lag2
+ setup_wait_dev lag3
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h4=${NETIFS[p7]}
+ swp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test lag1.100 192.0.2.130 ": via 100"
+ ping_test lag1.200 192.0.2.146 ": via 200"
+}
+
+ping_ipv6()
+{
+ ping6_test lag1.100 2001:db8:2::2 ": via 100"
+ ping6_test lag1.200 2001:db8:4::2 ": via 200"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
new file mode 100755
index 000000000000..f05ffe213c46
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -0,0 +1,323 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+ +--------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | | | |
+# | + LAG1 (team) | | + LAG4 (team) |
+# | | 192.0.2.1/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
+# | __^___ | | __^_____ |
+# | / \ | | / \ |
+# | + $h1 + $h4 | | + $h2 + $h3 |
+# | | | | | | | |
+# +----|--------|--------------+ +-|----------|-------------+
+# | | | |
+# +----|--------|------------------------------------|----------|-------------+
+# | SW | | | | |
+# | + $swp1 + $swp4 + $swp2 + $swp3 |
+# | \__ ___/ \__ _____/ |
+# | v v |
+# | +------|-------------------------------+ | |
+# | | + LAG2 BR1 (802.1q) | + LAG3 (team) |
+# | | (team) 192.0.2.2/28 | 192.0.2.129/28 |
+# | | 2001:db8:1::2/64 | 2001:db8:2::1/64 |
+# | | | |
+# | +--------------------------------------+ |
+# +---------------------------------------------------------------------------+
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG2 slaves )
+ config_deslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_enslave_swp4
+ config_deslave_swp1
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_deslave_swp4
+ config_enslave_swp1
+ config_enslave_swp4
+ config_wait
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG2 itself )
+ config_remaster_lag2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+
+ $(: exercise remastering of LAG3 slaves )
+ config_deslave_swp2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_enslave_swp2
+ config_deslave_swp3
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ config_deslave_swp2
+ config_enslave_swp3
+ config_enslave_swp2
+ config_wait
+ ping_ipv4
+ ping_ipv6
+
+ $(: move LAG3 to a bridge and then out )
+ config_remaster_lag3
+ config_wait
+ ping_ipv4
+ ping_ipv6
+ "}
+NUM_NETIFS=8
+: ${lib_dir:=.}
+source $lib_dir/lib.sh
+$EXTRA_SOURCE
+
+h1_create()
+{
+ team_create lag1 lacp
+ ip link set dev lag1 address $(mac_get $h1)
+ ip link set dev $h1 master lag1
+ ip link set dev $h4 master lag1
+ simple_if_init lag1 192.0.2.1/28 2001:db8:1::1/64
+ ip link set dev $h1 up
+ ip link set dev $h4 up
+ ip -4 route add 192.0.2.128/28 vrf vlag1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf vlag1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf vlag1
+ ip -4 route del 192.0.2.128/28 vrf vlag1
+ ip link set dev $h4 down
+ ip link set dev $h1 down
+ simple_if_fini lag1 192.0.2.1/28 2001:db8:1::1/64
+ ip link set dev $h4 nomaster
+ ip link set dev $h1 nomaster
+ team_destroy lag1
+}
+
+h2_create()
+{
+ team_create lag4 lacp
+ ip link set dev lag4 address $(mac_get $h2)
+ ip link set dev $h2 master lag4
+ ip link set dev $h3 master lag4
+ simple_if_init lag4 192.0.2.130/28 2001:db8:2::2/64
+ ip link set dev $h2 up
+ ip link set dev $h3 up
+ ip -4 route add 192.0.2.0/28 vrf vlag4 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf vlag4 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf vlag4
+ ip -4 route del 192.0.2.0/28 vrf vlag4
+ ip link set dev $h3 down
+ ip link set dev $h2 down
+ simple_if_fini lag4 192.0.2.130/28 2001:db8:2::2/64
+ ip link set dev $h3 nomaster
+ ip link set dev $h2 nomaster
+ team_destroy lag4
+}
+
+router_create()
+{
+ team_create lag2 lacp
+ ip link set dev lag2 address $(mac_get $swp1)
+ ip link set dev $swp1 master lag2
+ ip link set dev $swp4 master lag2
+
+ ip link add name br1 address $(mac_get lag2) \
+ type bridge vlan_filtering 1
+ ip link set dev lag2 master br1
+
+ ip link set dev $swp1 up
+ ip link set dev $swp4 up
+ ip link set dev br1 up
+
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ team_create lag3 lacp
+ ip link set dev lag3 address $(mac_get $swp2)
+ ip link set dev $swp2 master lag3
+ ip link set dev $swp3 master lag3
+ ip link set dev $swp2 up
+ ip link set dev $swp3 up
+ __addr_add_del lag3 add 192.0.2.129/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ __addr_add_del lag3 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp3 down
+ ip link set dev $swp2 down
+ ip link set dev $swp3 nomaster
+ ip link set dev $swp2 nomaster
+ team_destroy lag3
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp4 down
+ ip link set dev $swp1 down
+ ip link set dev br1 down
+
+ ip link set dev lag2 nomaster
+ ip link del dev br1
+
+ ip link set dev $swp4 nomaster
+ ip link set dev $swp1 nomaster
+ team_destroy lag2
+}
+
+config_remaster_lag2()
+{
+ log_info "Remaster bridge slave"
+
+ ip link set dev lag2 nomaster
+ sleep 2
+ ip link set dev lag2 master br1
+}
+
+config_remaster_lag3()
+{
+ log_info "Move lag3 to the bridge, then out again"
+
+ ip link set dev lag3 master br1
+ sleep 2
+ ip link set dev lag3 nomaster
+}
+
+config_deslave()
+{
+ local netdev=$1; shift
+
+ log_info "Deslave $netdev"
+ ip link set dev $netdev down
+ ip link set dev $netdev nomaster
+ ip link set dev $netdev up
+}
+
+config_deslave_swp1()
+{
+ config_deslave $swp1
+}
+
+config_deslave_swp2()
+{
+ config_deslave $swp2
+}
+
+config_deslave_swp3()
+{
+ config_deslave $swp3
+}
+
+config_deslave_swp4()
+{
+ config_deslave $swp4
+}
+
+config_enslave()
+{
+ local netdev=$1; shift
+ local master=$1; shift
+
+ log_info "Enslave $netdev to $master"
+ ip link set dev $netdev down
+ ip link set dev $netdev master $master
+ ip link set dev $netdev up
+}
+
+config_enslave_swp1()
+{
+ config_enslave $swp1 lag2
+}
+
+config_enslave_swp2()
+{
+ config_enslave $swp2 lag3
+}
+
+config_enslave_swp3()
+{
+ config_enslave $swp3 lag3
+}
+
+config_enslave_swp4()
+{
+ config_enslave $swp4 lag2
+}
+
+config_wait()
+{
+ setup_wait_dev lag2
+ setup_wait_dev lag3
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h4=${NETIFS[p7]}
+ swp4=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test lag1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test lag1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
new file mode 100755
index 000000000000..76e4941fef73
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_pvid_vlan_upper.sh
@@ -0,0 +1,155 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# | H1 (vrf) |
+# | + $h1.10 | +----------------------+
+# | | 192.0.2.1/28 | | H2 (vrf) |
+# | | 2001:db8:1::1/64 | | + $h2 |
+# | | | | | 192.0.2.130/28 |
+# | + $h1 | | | 2001:db8:2::2/64 |
+# +---|------------------------+ +--|-------------------+
+# | |
+# +---|--------------------------------------------------|-------------------+
+# | | router (main VRF) | |
+# | +-|----------------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q, pvid=10) | 192.0.2.129/28 |
+# | | 192.0.2.2/28 | 2001:db8:2::1/64 |
+# | | 2001:db8:1::2/64 | |
+# | +------------------------------------+ |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ shuffle_pvid
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br1 up
+ __addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+ bridge vlan add dev br1 vid 10 pvid untagged self
+ bridge vlan add dev $swp1 vid 10
+}
+
+router_destroy()
+{
+ bridge vlan del dev $swp1 vid 10
+ bridge vlan del dev br1 vid 10 self
+
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ __addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+shuffle_pvid()
+{
+ log_info "Add and remove VLAN upper for PVID VLAN"
+
+ # Adding and removing a VLAN upper for the PVID VLAN shouldn't change
+ # anything. The address is arbitrary, just to make sure it will be an L3
+ # netdevice.
+ vlan_create br1 10 "" 192.0.2.33/28
+ sleep 1
+ vlan_destroy br1 10
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
index fa6a88c50750..b76a4a707a5b 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan.sh
@@ -1,10 +1,43 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# +------------------------------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.555 + $h1.777 | | + $h2 |
+# | | 192.0.2.1/28 | 192.0.2.17/28 | | | 192.0.2.130/28 |
+# | | 2001:db8:1::1/64 | 2001:db8:3::1/64 | | | 192.0.2.146/28 |
+# | | .-----------------' | | | 2001:db8:2::2/64 |
+# | |/ | | | 2001:db8:4::2/64 |
+# | + $h1 | | | |
+# +----|-------------------------------------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|-------------------------------+ + $swp2 |
+# | | + $swp1 | 192.0.2.129/28 |
+# | | vid 555 777 | 192.0.2.145/28 |
+# | | | 2001:db8:2::1/64 |
+# | | + BR1 (802.1q) | 2001:db8:4::1/64 |
+# | | vid 555 pvid untagged | |
+# | | 192.0.2.2/28 | |
+# | | 192.0.2.18/28 | |
+# | | 2001:db8:1::2/64 | |
+# | | 2001:db8:3::2/64 | |
+# | +----------------------------------+ |
+# +---------------------------------------------------------------------------+
+
ALL_TESTS="
ping_ipv4
ping_ipv6
vlan
+ config_777
+ ping_ipv4_fails
+ ping_ipv6_fails
+ ping_ipv4_777
+ ping_ipv6_777
+ config_555
+ ping_ipv4
+ ping_ipv6
"
NUM_NETIFS=4
source lib.sh
@@ -12,36 +45,52 @@ source lib.sh
h1_create()
{
simple_if_init $h1
+
vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+
+ vlan_create $h1 777 v$h1 192.0.2.17/28 2001:db8:3::1/64
+ ip -4 route add 192.0.2.144/28 vrf v$h1 nexthop via 192.0.2.18
+ ip -6 route add 2001:db8:4::/64 vrf v$h1 nexthop via 2001:db8:3::2
}
h1_destroy()
{
+ ip -6 route del 2001:db8:4::/64 vrf v$h1
+ ip -4 route del 192.0.2.144/28 vrf v$h1
+ vlan_destroy $h1 777
+
ip -6 route del 2001:db8:2::/64 vrf v$h1
ip -4 route del 192.0.2.128/28 vrf v$h1
vlan_destroy $h1 555
+
simple_if_fini $h1
}
h2_create()
{
- simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64 \
+ 192.0.2.146/28 2001:db8:4::2/64
ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -4 route add 192.0.2.16/28 vrf v$h2 nexthop via 192.0.2.145
ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+ ip -6 route add 2001:db8:3::/64 vrf v$h2 nexthop via 2001:db8:4::1
}
h2_destroy()
{
+ ip -6 route del 2001:db8:3::/64 vrf v$h2
ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.16/28 vrf v$h2
ip -4 route del 192.0.2.0/28 vrf v$h2
- simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+ simple_if_fini $h2 192.0.2.146/28 2001:db8:4::2/64 \
+ 192.0.2.130/28 2001:db8:2::2/64
}
router_create()
{
- ip link add name br1 type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0
ip link set dev br1 up
ip link set dev $swp1 master br1
@@ -49,18 +98,23 @@ router_create()
bridge vlan add dev br1 vid 555 self pvid untagged
bridge vlan add dev $swp1 vid 555
+ bridge vlan add dev $swp1 vid 777
__addr_add_del br1 add 192.0.2.2/28 2001:db8:1::2/64
+ __addr_add_del br1 add 192.0.2.18/28 2001:db8:3::2/64
ip link set dev $swp2 up
__addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+ __addr_add_del $swp2 add 192.0.2.145/28 2001:db8:4::1/64
}
router_destroy()
{
+ __addr_add_del $swp2 del 192.0.2.145/28 2001:db8:4::1/64
__addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
ip link set dev $swp2 down
+ __addr_add_del br1 del 192.0.2.18/28 2001:db8:3::2/64
__addr_add_del br1 del 192.0.2.2/28 2001:db8:1::2/64
ip link set dev $swp1 down
ip link set dev $swp1 nomaster
@@ -86,6 +140,24 @@ setup_prepare()
forwarding_enable
}
+config_555()
+{
+ log_info "Configure VLAN 555 as PVID"
+
+ bridge vlan add dev br1 vid 555 self pvid untagged
+ bridge vlan del dev br1 vid 777 self
+ sleep 2
+}
+
+config_777()
+{
+ log_info "Configure VLAN 777 as PVID"
+
+ bridge vlan add dev br1 vid 777 self pvid untagged
+ bridge vlan del dev br1 vid 555 self
+ sleep 2
+}
+
cleanup()
{
pre_cleanup
@@ -114,12 +186,32 @@ vlan()
ping_ipv4()
{
- ping_test $h1 192.0.2.130
+ ping_test $h1.555 192.0.2.130
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:2::2
+ ping6_test $h1.555 2001:db8:2::2
+}
+
+ping_ipv4_fails()
+{
+ ping_test_fails $h1.555 192.0.2.130 ": via 555"
+}
+
+ping_ipv6_fails()
+{
+ ping6_test_fails $h1.555 2001:db8:2::2 ": via 555"
+}
+
+ping_ipv4_777()
+{
+ ping_test $h1.777 192.0.2.146 ": via 777"
+}
+
+ping_ipv6_777()
+{
+ ping6_test $h1.777 2001:db8:4::2 ": via 777"
}
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh
new file mode 100755
index 000000000000..215309ea1c8c
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +------------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.555 | | + $h2.777 |
+# | | 192.0.2.1/28 | | | 192.0.2.18/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::2/64 |
+# | | | | | |
+# | + $h1 | | + $h2 |
+# +----|-------------------+ +--|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1q) + $swp2 | |
+# | | | |
+# | +------+------------------------------------------+---------------------+ |
+# | | | |
+# | + br1.555 + br1.777 |
+# | 192.0.2.2/28 192.0.2.17/28 |
+# | 2001:db8:1::2/64 2001:db8:2::1/64 |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ respin_config
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 555 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.16/28 vrf v$h1
+ vlan_destroy $h1 555
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ vlan_create $h2 777 v$h2 192.0.2.18/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ vlan_destroy $h2 777
+ simple_if_fini $h2
+}
+
+router_create()
+{
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp2 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 up
+
+ bridge vlan add dev br1 vid 555 self
+ bridge vlan add dev br1 vid 777 self
+ bridge vlan add dev $swp1 vid 555
+ bridge vlan add dev $swp2 vid 777
+
+ vlan_create br1 555 "" 192.0.2.2/28 2001:db8:1::2/64
+ vlan_create br1 777 "" 192.0.2.17/28 2001:db8:2::1/64
+}
+
+router_destroy()
+{
+ vlan_destroy br1 777
+ vlan_destroy br1 555
+
+ bridge vlan del dev $swp2 vid 777
+ bridge vlan del dev $swp1 vid 555
+ bridge vlan del dev br1 vid 777 self
+ bridge vlan del dev br1 vid 555 self
+
+ ip link set dev $swp2 down nomaster
+ ip link set dev $swp1 down nomaster
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.18
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+respin_config()
+{
+ log_info "Remaster bridge slave"
+
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 nomaster
+
+ sleep 2
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp2 master br1
+
+ bridge vlan add dev $swp1 vid 555
+ bridge vlan add dev $swp2 vid 777
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
new file mode 100755
index 000000000000..138558452402
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_bridge_vlan_upper_pvid.sh
@@ -0,0 +1,171 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +----------------------------+
+# | H1 (vrf) |
+# | + $h1.10 | +----------------------+
+# | | 192.0.2.1/28 | | H2 (vrf) |
+# | | 2001:db8:1::1/64 | | + $h2 |
+# | | | | | 192.0.2.130/28 |
+# | + $h1 | | | 2001:db8:2::2/64 |
+# +---|------------------------+ +--|-------------------+
+# | |
+# +---|--------------------------------------------------|-------------------+
+# | | router (main VRF) | |
+# | +-|--------------------------+ + $swp2 |
+# | | + $swp1 BR1 (802.1q) | 192.0.2.129/28 |
+# | +-----+----------------------+ 2001:db8:2::1/64 |
+# | | |
+# | + br1.10 |
+# | 192.0.2.2/28 |
+# | 2001:db8:1::2/64 |
+# +--------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ pvid_set_unset
+ ping_ipv4
+ ping_ipv6
+ pvid_set_move
+ ping_ipv4
+ ping_ipv6
+"
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ ip -4 route add 192.0.2.128/28 vrf v$h1 nexthop via 192.0.2.2
+ ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+ ip -6 route del 2001:db8:2::/64 vrf v$h1
+ ip -4 route del 192.0.2.128/28 vrf v$h1
+ vlan_destroy $h1 10
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.130/28 2001:db8:2::2/64
+ ip -4 route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.129
+ ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip -6 route del 2001:db8:1::/64 vrf v$h2
+ ip -4 route del 192.0.2.0/28 vrf v$h2
+ simple_if_fini $h2 192.0.2.130/28 2001:db8:2::2/64
+}
+
+router_create()
+{
+ ip link add name br1 address $(mac_get $swp1) \
+ type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br1 up
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 up
+ __addr_add_del $swp2 add 192.0.2.129/28 2001:db8:2::1/64
+
+ bridge vlan add dev br1 vid 10 self
+ bridge vlan add dev $swp1 vid 10
+ vlan_create br1 10 "" 192.0.2.2/28 2001:db8:1::2/64
+}
+
+router_destroy()
+{
+ vlan_destroy br1 10
+ bridge vlan del dev $swp1 vid 10
+ bridge vlan del dev br1 vid 10 self
+
+ __addr_add_del $swp2 del 192.0.2.129/28 2001:db8:2::1/64
+ ip link set dev $swp2 down
+
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link del dev br1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+pvid_set_unset()
+{
+ log_info "Set and unset PVID on VLAN 10"
+
+ bridge vlan add dev br1 vid 10 pvid self
+ sleep 1
+ bridge vlan add dev br1 vid 10 self
+}
+
+pvid_set_move()
+{
+ log_info "Set PVID on VLAN 10, then move it to VLAN 20"
+
+ bridge vlan add dev br1 vid 10 pvid self
+ sleep 1
+ bridge vlan add dev br1 vid 20 pvid self
+}
+
+shuffle_vlan()
+{
+ log_info ""
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.130
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
index cf3d26c233e8..3f0f5dc95542 100755
--- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh
@@ -1,9 +1,18 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+ ping_ipv4_blackhole
+ ping_ipv6_blackhole
+ nh_stats_test_v4
+ nh_stats_test_v6
+"
NUM_NETIFS=8
source lib.sh
+source router_mpath_nh_lib.sh
h1_create()
{
@@ -197,8 +206,8 @@ multipath4_test()
t0_rp12=$(link_stats_tx_packets_get $rp12)
t0_rp13=$(link_stats_tx_packets_get $rp13)
- ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -212,7 +221,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -231,7 +240,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -245,34 +254,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip nexthop replace id 106 group 104/105
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -280,15 +261,81 @@ multipath_test()
multipath4_test "Weighted MP 2:1" 2 1
multipath4_test "Weighted MP 11:45" 11 45
+ log_info "Running IPv4 multipath tests with IPv6 link-local nexthops"
+ ip nexthop replace id 101 via fe80:2::22 dev $rp12
+ ip nexthop replace id 102 via fe80:3::23 dev $rp13
+
+ multipath4_test "ECMP" 1 1
+ multipath4_test "Weighted MP 2:1" 2 1
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 102 via 169.254.3.23 dev $rp13
+ ip nexthop replace id 101 via 169.254.2.22 dev $rp12
+
log_info "Running IPv6 multipath tests"
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
+}
+
+ping_ipv4_blackhole()
+{
+ RET=0
+
+ ip nexthop add id 1001 blackhole
+ ip nexthop add id 1002 group 1001
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1001
+ ping_do $h1 198.51.100.2
+ check_fail $? "ping did not fail when using a blackhole nexthop"
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 1002
+ ping_do $h1 198.51.100.2
+ check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+ ip route replace 198.51.100.0/24 vrf vrf-r1 nhid 103
+ ping_do $h1 198.51.100.2
+ check_err $? "ping failed with a valid nexthop"
+
+ log_test "IPv4 blackhole ping"
+
+ ip nexthop del id 1002
+ ip nexthop del id 1001
+}
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
+ping_ipv6_blackhole()
+{
+ RET=0
+
+ ip -6 nexthop add id 1001 blackhole
+ ip nexthop add id 1002 group 1001
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1001
+ ping6_do $h1 2001:db8:2::2
+ check_fail $? "ping did not fail when using a blackhole nexthop"
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 1002
+ ping6_do $h1 2001:db8:2::2
+ check_fail $? "ping did not fail when using a blackhole nexthop group"
+
+ ip route replace 2001:db8:2::/64 vrf vrf-r1 nhid 106
+ ping6_do $h1 2001:db8:2::2
+ check_err $? "ping failed with a valid nexthop"
+
+ log_test "IPv6 blackhole ping"
+
+ ip nexthop del id 1002
+ ip -6 nexthop del id 1001
+}
+
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 mpath
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 mpath
}
setup_prepare()
@@ -312,7 +359,6 @@ setup_prepare()
router1_create
router2_create
- routing_nh_obj
forwarding_enable
}
@@ -345,7 +391,7 @@ ping_ipv6()
ip nexthop ls >/dev/null 2>&1
if [ $? -ne 0 ]; then
echo "Nexthop objects not supported; skipping tests"
- exit 0
+ exit $ksft_skip
fi
trap cleanup EXIT
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
new file mode 100644
index 000000000000..7e7d62161c34
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh
@@ -0,0 +1,129 @@
+# SPDX-License-Identifier: GPL-2.0
+
+nh_stats_do_test()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local stats_get=$1; shift
+ local mz="$@"
+
+ local dp
+
+ RET=0
+
+ sleep 2
+ for ((dp=0; dp < 60000; dp += 10000)); do
+ local dd
+ local t0_rp12=$(link_stats_tx_packets_get $rp12)
+ local t0_rp13=$(link_stats_tx_packets_get $rp13)
+ local t0_nh1=$($stats_get $group_id $nh1_id)
+ local t0_nh2=$($stats_get $group_id $nh2_id)
+
+ ip vrf exec vrf-h1 \
+ $mz -q -p 64 -d 0 -t udp \
+ "sp=1024,dp=$((dp))-$((dp + 10000))"
+ sleep 2
+
+ local t1_rp12=$(link_stats_tx_packets_get $rp12)
+ local t1_rp13=$(link_stats_tx_packets_get $rp13)
+ local t1_nh1=$($stats_get $group_id $nh1_id)
+ local t1_nh2=$($stats_get $group_id $nh2_id)
+
+ local d_rp12=$((t1_rp12 - t0_rp12))
+ local d_rp13=$((t1_rp13 - t0_rp13))
+ local d_nh1=$((t1_nh1 - t0_nh1))
+ local d_nh2=$((t1_nh2 - t0_nh2))
+
+ dd=$(absval $((d_rp12 - d_nh1)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1"
+
+ dd=$(absval $((d_rp13 - d_nh2)))
+ ((dd < 10))
+ check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2"
+ done
+
+ log_test "NH stats test $what"
+}
+
+nh_stats_test_dispatch_swhw()
+{
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local used
+
+ nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get "${mz[@]}"
+
+ used=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.used')
+ kind=$(ip -j -d link show dev $rp11 |
+ jq -r '.[].linkinfo.info_kind')
+ if [[ $used == true ]]; then
+ nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \
+ nh_stats_get_hw "${mz[@]}"
+ elif [[ $kind == veth ]]; then
+ log_test_skip "HW stats not offloaded on veth topology"
+ fi
+}
+
+nh_stats_test_dispatch()
+{
+ local nhgtype=$1; shift
+ local what=$1; shift
+ local nh1_id=$1; shift
+ local nh2_id=$1; shift
+ local group_id=$1; shift
+ local mz="$@"
+
+ local enabled
+ local kind
+
+ if ! ip nexthop help 2>&1 | grep -q hw_stats; then
+ log_test_skip "NH stats test: ip doesn't support HW stats"
+ return
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats on type $nhgtype
+ enabled=$(ip -s -j -d nexthop show id $group_id |
+ jq '.[].hw_stats.enabled')
+ if [[ $enabled == true ]]; then
+ nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \
+ "$group_id" "${mz[@]}"
+ elif [[ $enabled == false ]]; then
+ check_err 1 "HW stats still disabled after enabling"
+ log_test "NH stats test"
+ else
+ log_test_skip "NH stats test: ip doesn't report hw_stats info"
+ fi
+
+ ip nexthop replace id $group_id group $nh1_id/$nh2_id \
+ hw_stats off type $nhgtype
+}
+
+__nh_stats_test_v4()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \
+ $MZ $h1 -A 192.0.2.2 -B 198.51.100.2
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+__nh_stats_test_v6()
+{
+ local nhgtype=$1; shift
+
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+ nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \
+ $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
new file mode 100755
index 000000000000..4b483d24ad00
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh
@@ -0,0 +1,413 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+ multipath_test
+ nh_stats_test_v4
+ nh_stats_test_v6
+"
+NUM_NETIFS=8
+source lib.sh
+source router_mpath_nh_lib.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router1_create()
+{
+ vrf_create "vrf-r1"
+ ip link set dev $rp11 master vrf-r1
+ ip link set dev $rp12 master vrf-r1
+ ip link set dev $rp13 master vrf-r1
+
+ ip link set dev vrf-r1 up
+ ip link set dev $rp11 up
+ ip link set dev $rp12 up
+ ip link set dev $rp13 up
+
+ ip address add 192.0.2.1/24 dev $rp11
+ ip address add 2001:db8:1::1/64 dev $rp11
+
+ ip address add 169.254.2.12/24 dev $rp12
+ ip address add fe80:2::12/64 dev $rp12
+
+ ip address add 169.254.3.13/24 dev $rp13
+ ip address add fe80:3::13/64 dev $rp13
+}
+
+router1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-r1
+ ip route del 198.51.100.0/24 vrf vrf-r1
+
+ ip address del fe80:3::13/64 dev $rp13
+ ip address del 169.254.3.13/24 dev $rp13
+
+ ip address del fe80:2::12/64 dev $rp12
+ ip address del 169.254.2.12/24 dev $rp12
+
+ ip address del 2001:db8:1::1/64 dev $rp11
+ ip address del 192.0.2.1/24 dev $rp11
+
+ ip nexthop del id 103
+ ip nexthop del id 101
+ ip nexthop del id 102
+ ip nexthop del id 106
+ ip nexthop del id 104
+ ip nexthop del id 105
+
+ ip link set dev $rp13 down
+ ip link set dev $rp12 down
+ ip link set dev $rp11 down
+
+ vrf_destroy "vrf-r1"
+}
+
+router2_create()
+{
+ vrf_create "vrf-r2"
+ ip link set dev $rp21 master vrf-r2
+ ip link set dev $rp22 master vrf-r2
+ ip link set dev $rp23 master vrf-r2
+
+ ip link set dev vrf-r2 up
+ ip link set dev $rp21 up
+ ip link set dev $rp22 up
+ ip link set dev $rp23 up
+
+ ip address add 198.51.100.1/24 dev $rp21
+ ip address add 2001:db8:2::1/64 dev $rp21
+
+ ip address add 169.254.2.22/24 dev $rp22
+ ip address add fe80:2::22/64 dev $rp22
+
+ ip address add 169.254.3.23/24 dev $rp23
+ ip address add fe80:3::23/64 dev $rp23
+}
+
+router2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-r2
+ ip route del 192.0.2.0/24 vrf vrf-r2
+
+ ip address del fe80:3::23/64 dev $rp23
+ ip address del 169.254.3.23/24 dev $rp23
+
+ ip address del fe80:2::22/64 dev $rp22
+ ip address del 169.254.2.22/24 dev $rp22
+
+ ip address del 2001:db8:2::1/64 dev $rp21
+ ip address del 198.51.100.1/24 dev $rp21
+
+ ip nexthop del id 201
+ ip nexthop del id 202
+ ip nexthop del id 204
+ ip nexthop del id 205
+
+ ip link set dev $rp23 down
+ ip link set dev $rp22 down
+ ip link set dev $rp21 down
+
+ vrf_destroy "vrf-r2"
+}
+
+routing_nh_obj()
+{
+ ip nexthop add id 101 via 169.254.2.22 dev $rp12
+ ip nexthop add id 102 via 169.254.3.23 dev $rp13
+ ip nexthop add id 103 group 101/102 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 198.51.100.0/24 vrf vrf-r1 nhid 103
+
+ ip nexthop add id 104 via fe80:2::22 dev $rp12
+ ip nexthop add id 105 via fe80:3::23 dev $rp13
+ ip nexthop add id 106 group 104/105 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:2::/64 vrf vrf-r1 nhid 106
+
+ ip nexthop add id 201 via 169.254.2.12 dev $rp22
+ ip nexthop add id 202 via 169.254.3.13 dev $rp23
+ ip nexthop add id 203 group 201/202 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 192.0.2.0/24 vrf vrf-r2 nhid 203
+
+ ip nexthop add id 204 via fe80:2::12 dev $rp22
+ ip nexthop add id 205 via fe80:3::13 dev $rp23
+ ip nexthop add id 206 group 204/205 type resilient buckets 512 \
+ idle_timer 0
+ ip route add 2001:db8:1::/64 vrf vrf-r2 nhid 206
+}
+
+multipath4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv4.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ # Restore settings.
+ sysctl_restore net.ipv4.fib_multipath_hash_policy
+}
+
+multipath6_l4_test()
+{
+ local desc="$1"
+ local weight_rp12=$2
+ local weight_rp13=$3
+ local t0_rp12 t0_rp13 t1_rp12 t1_rp13
+ local packets_rp12 packets_rp13
+
+ # Transmit multiple flows from h1 to h2 and make sure they are
+ # distributed between both multipath links (rp12 and rp13)
+ # according to the provided weights.
+ sysctl_set net.ipv6.fib_multipath_hash_policy 1
+
+ t0_rp12=$(link_stats_tx_packets_get $rp12)
+ t0_rp13=$(link_stats_tx_packets_get $rp13)
+
+ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
+
+ t1_rp12=$(link_stats_tx_packets_get $rp12)
+ t1_rp13=$(link_stats_tx_packets_get $rp13)
+
+ let "packets_rp12 = $t1_rp12 - $t0_rp12"
+ let "packets_rp13 = $t1_rp13 - $t0_rp13"
+ multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
+
+ sysctl_restore net.ipv6.fib_multipath_hash_policy
+}
+
+multipath_test()
+{
+ # Without an idle timer, weight replacement should happen immediately.
+ log_info "Running multipath tests without an idle timer"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 0
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 0
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With an idle timer, weight replacement should not happen, so the
+ # expected ratio should always be the initial one (1:1).
+ log_info "Running multipath tests with an idle timer of 120 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 120
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 120
+
+ log_info "Running IPv4 multipath tests"
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 1 1
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 1 1
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+
+ # With a short idle timer and enough idle time, weight replacement
+ # should happen.
+ log_info "Running multipath tests with an idle timer of 5 seconds"
+ ip nexthop replace id 103 group 101/102 type resilient idle_timer 5
+ ip nexthop replace id 106 group 104/105 type resilient idle_timer 5
+
+ log_info "Running IPv4 multipath tests"
+ sleep 10
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+ multipath4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 103 group 101,2/102,1 type resilient
+ multipath4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 103 group 101,11/102,45 type resilient
+ multipath4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 103 group 101,1/102,1 type resilient
+
+ log_info "Running IPv6 L4 hash multipath tests"
+ sleep 10
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+ multipath6_l4_test "ECMP" 1 1
+ sleep 10
+ ip nexthop replace id 106 group 104,2/105,1 type resilient
+ multipath6_l4_test "Weighted MP 2:1" 2 1
+ sleep 10
+ ip nexthop replace id 106 group 104,11/105,45 type resilient
+ multipath6_l4_test "Weighted MP 11:45" 11 45
+
+ ip nexthop replace id 106 group 104,1/105,1 type resilient
+}
+
+nh_stats_test_v4()
+{
+ __nh_stats_test_v4 resilient
+}
+
+nh_stats_test_v6()
+{
+ __nh_stats_test_v6 resilient
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp11=${NETIFS[p2]}
+
+ rp12=${NETIFS[p3]}
+ rp22=${NETIFS[p4]}
+
+ rp13=${NETIFS[p5]}
+ rp23=${NETIFS[p6]}
+
+ rp21=${NETIFS[p7]}
+ h2=${NETIFS[p8]}
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router1_create
+ router2_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router2_destroy
+ router1_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+ip nexthop ls >/dev/null 2>&1
+if [ $? -ne 0 ]; then
+ echo "Nexthop objects not supported; skipping tests"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multicast.sh b/tools/testing/selftests/net/forwarding/router_multicast.sh
index 57e90c873a2c..5a58b1ec8aef 100755
--- a/tools/testing/selftests/net/forwarding/router_multicast.sh
+++ b/tools/testing/selftests/net/forwarding/router_multicast.sh
@@ -28,7 +28,7 @@
# +------------------+ +------------------+
#
-ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6"
+ALL_TESTS="mcast_v4 mcast_v6 rpf_v4 rpf_v6 unres_v4 unres_v6"
NUM_NETIFS=6
source lib.sh
source tc_common.sh
@@ -406,6 +406,96 @@ rpf_v6()
log_test "RPF IPv6"
}
+unres_v4()
+{
+ # Send a multicast packet not corresponding to an installed route,
+ # causing the kernel to queue the packet for resolution and emit an
+ # IGMPMSG_NOCACHE notification. smcrouted will react to this
+ # notification by consulting its (*, G) list and installing an (S, G)
+ # route, which will be used to forward the queued packet.
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 1 flower \
+ dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+ tc filter add dev $h3 ingress protocol ip pref 1 handle 1 flower \
+ dst_ip 225.1.2.3 ip_proto udp dst_port 12345 action drop
+
+ # Forwarding should fail before installing a matching (*, G).
+ $MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+ -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+ -A 198.51.100.2 -B 225.1.2.3 -q
+
+ tc_check_packets "dev $h2 ingress" 1 0
+ check_err $? "Multicast received on first host when should not"
+ tc_check_packets "dev $h3 ingress" 1 0
+ check_err $? "Multicast received on second host when should not"
+
+ # Create (*, G). Will not be installed in the kernel.
+ create_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3
+
+ $MZ $h1 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+ -a 00:11:22:33:44:55 -b 01:00:5e:01:02:03 \
+ -A 198.51.100.2 -B 225.1.2.3 -q
+
+ tc_check_packets "dev $h2 ingress" 1 1
+ check_err $? "Multicast not received on first host"
+ tc_check_packets "dev $h3 ingress" 1 1
+ check_err $? "Multicast not received on second host"
+
+ delete_mcast_sg $rp1 0.0.0.0 225.1.2.3 $rp2 $rp3
+
+ tc filter del dev $h3 ingress protocol ip pref 1 handle 1 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 1 flower
+
+ log_test "Unresolved queue IPv4"
+}
+
+unres_v6()
+{
+ # Send a multicast packet not corresponding to an installed route,
+ # causing the kernel to queue the packet for resolution and emit an
+ # MRT6MSG_NOCACHE notification. smcrouted will react to this
+ # notification by consulting its (*, G) list and installing an (S, G)
+ # route, which will be used to forward the queued packet.
+
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ipv6 pref 1 handle 1 flower \
+ dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+ tc filter add dev $h3 ingress protocol ipv6 pref 1 handle 1 flower \
+ dst_ip ff0e::3 ip_proto udp dst_port 12345 action drop
+
+ # Forwarding should fail before installing a matching (*, G).
+ $MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+ -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+ -A 2001:db8:1::2 -B ff0e::3 -q
+
+ tc_check_packets "dev $h2 ingress" 1 0
+ check_err $? "Multicast received on first host when should not"
+ tc_check_packets "dev $h3 ingress" 1 0
+ check_err $? "Multicast received on second host when should not"
+
+ # Create (*, G). Will not be installed in the kernel.
+ create_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3
+
+ $MZ $h1 -6 -c 1 -p 128 -t udp "ttl=10,sp=54321,dp=12345" \
+ -a 00:11:22:33:44:55 -b 33:33:00:00:00:03 \
+ -A 2001:db8:1::2 -B ff0e::3 -q
+
+ tc_check_packets "dev $h2 ingress" 1 1
+ check_err $? "Multicast not received on first host"
+ tc_check_packets "dev $h3 ingress" 1 1
+ check_err $? "Multicast not received on second host"
+
+ delete_mcast_sg $rp1 :: ff0e::3 $rp2 $rp3
+
+ tc filter del dev $h3 ingress protocol ipv6 pref 1 handle 1 flower
+ tc filter del dev $h2 ingress protocol ipv6 pref 1 handle 1 flower
+
+ log_test "Unresolved queue IPv6"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 79a209927962..e2be354167a1 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -178,8 +178,8 @@ multipath4_test()
t0_rp12=$(link_stats_tx_packets_get $rp12)
t0_rp13=$(link_stats_tx_packets_get $rp13)
- ip vrf exec vrf-h1 $MZ -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -195,7 +195,7 @@ multipath4_test()
sysctl_restore net.ipv4.fib_multipath_hash_policy
}
-multipath6_l4_test()
+multipath6_test()
{
local desc="$1"
local weight_rp12=$2
@@ -216,7 +216,7 @@ multipath6_l4_test()
t0_rp13=$(link_stats_tx_packets_get $rp13)
$MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \
- -d 1msec -t udp "sp=1024,dp=0-32768"
+ -d $MZ_DELAY -t udp "sp=1024,dp=0-32768"
t1_rp12=$(link_stats_tx_packets_get $rp12)
t1_rp13=$(link_stats_tx_packets_get $rp13)
@@ -232,38 +232,6 @@ multipath6_l4_test()
sysctl_restore net.ipv6.fib_multipath_hash_policy
}
-multipath6_test()
-{
- local desc="$1"
- local weight_rp12=$2
- local weight_rp13=$3
- local t0_rp12 t0_rp13 t1_rp12 t1_rp13
- local packets_rp12 packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
- nexthop via fe80:3::23 dev $rp13 weight $weight_rp13
-
- t0_rp12=$(link_stats_tx_packets_get $rp12)
- t0_rp13=$(link_stats_tx_packets_get $rp13)
-
- # Generate 16384 echo requests, each with a random flow label.
- for _ in $(seq 1 16384); do
- ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null
- done
-
- t1_rp12=$(link_stats_tx_packets_get $rp12)
- t1_rp13=$(link_stats_tx_packets_get $rp13)
-
- let "packets_rp12 = $t1_rp12 - $t0_rp12"
- let "packets_rp13 = $t1_rp13 - $t0_rp13"
- multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13
-
- ip route replace 2001:db8:2::/64 vrf vrf-r1 \
- nexthop via fe80:2::22 dev $rp12 \
- nexthop via fe80:3::23 dev $rp13
-}
-
multipath_test()
{
log_info "Running IPv4 multipath tests"
@@ -275,11 +243,6 @@ multipath_test()
multipath6_test "ECMP" 1 1
multipath6_test "Weighted MP 2:1" 2 1
multipath6_test "Weighted MP 11:45" 11 45
-
- log_info "Running IPv6 L4 hash multipath tests"
- multipath6_l4_test "ECMP" 1 1
- multipath6_l4_test "Weighted MP 2:1" 2 1
- multipath6_l4_test "Weighted MP 11:45" 11 45
}
setup_prepare()
diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh
new file mode 100755
index 000000000000..f3a53738bdcc
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/router_nh.sh
@@ -0,0 +1,160 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ vrf_create "vrf-h1"
+ ip link set dev $h1 master vrf-h1
+
+ ip link set dev vrf-h1 up
+ ip link set dev $h1 up
+
+ ip address add 192.0.2.2/24 dev $h1
+ ip address add 2001:db8:1::2/64 dev $h1
+
+ ip route add 198.51.100.0/24 vrf vrf-h1 nexthop via 192.0.2.1
+ ip route add 2001:db8:2::/64 vrf vrf-h1 nexthop via 2001:db8:1::1
+}
+
+h1_destroy()
+{
+ ip route del 2001:db8:2::/64 vrf vrf-h1
+ ip route del 198.51.100.0/24 vrf vrf-h1
+
+ ip address del 2001:db8:1::2/64 dev $h1
+ ip address del 192.0.2.2/24 dev $h1
+
+ ip link set dev $h1 down
+ vrf_destroy "vrf-h1"
+}
+
+h2_create()
+{
+ vrf_create "vrf-h2"
+ ip link set dev $h2 master vrf-h2
+
+ ip link set dev vrf-h2 up
+ ip link set dev $h2 up
+
+ ip address add 198.51.100.2/24 dev $h2
+ ip address add 2001:db8:2::2/64 dev $h2
+
+ ip route add 192.0.2.0/24 vrf vrf-h2 nexthop via 198.51.100.1
+ ip route add 2001:db8:1::/64 vrf vrf-h2 nexthop via 2001:db8:2::1
+}
+
+h2_destroy()
+{
+ ip route del 2001:db8:1::/64 vrf vrf-h2
+ ip route del 192.0.2.0/24 vrf vrf-h2
+
+ ip address del 2001:db8:2::2/64 dev $h2
+ ip address del 198.51.100.2/24 dev $h2
+
+ ip link set dev $h2 down
+ vrf_destroy "vrf-h2"
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+
+ tc qdisc add dev $rp2 clsact
+
+ ip address add 192.0.2.1/24 dev $rp1
+ ip address add 2001:db8:1::1/64 dev $rp1
+
+ ip address add 198.51.100.1/24 dev $rp2
+ ip address add 2001:db8:2::1/64 dev $rp2
+}
+
+router_destroy()
+{
+ ip address del 2001:db8:2::1/64 dev $rp2
+ ip address del 198.51.100.1/24 dev $rp2
+
+ ip address del 2001:db8:1::1/64 dev $rp1
+ ip address del 192.0.2.1/24 dev $rp1
+
+ tc qdisc del dev $rp2 clsact
+
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+routing_nh_obj()
+{
+ # Create the nexthops as AF_INET6, so that IPv4 and IPv6 routes could
+ # use them.
+ ip -6 nexthop add id 101 dev $rp1
+ ip -6 nexthop add id 102 dev $rp2
+
+ ip route replace 192.0.2.0/24 nhid 101
+ ip route replace 2001:db8:1::/64 nhid 101
+ ip route replace 198.51.100.0/24 nhid 102
+ ip route replace 2001:db8:2::/64 nhid 102
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1mac=$(mac_get $rp1)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+
+ router_create
+
+ forwarding_enable
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ forwarding_restore
+
+ router_destroy
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::2
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+routing_nh_obj
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_vid_1.sh b/tools/testing/selftests/net/forwarding/router_vid_1.sh
index a7306c7ac06d..865c9f7d8143 100755
--- a/tools/testing/selftests/net/forwarding/router_vid_1.sh
+++ b/tools/testing/selftests/net/forwarding/router_vid_1.sh
@@ -1,7 +1,32 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6"
+# +--------------------+ +----------------------+
+# | H1 | | H2 |
+# | | | |
+# | $h1.1 + | | + $h2.1 |
+# | 192.0.2.2/24 | | | | 198.51.100.2/24 |
+# | 2001:db8:1::2/64 | | | | 2001:db8:2::2/64 |
+# | | | | | |
+# | $h1 + | | + $h2 |
+# | | | | | |
+# +------------------|-+ +-|--------------------+
+# | |
+# +------------------|-------------------------|--------------------+
+# | SW | | |
+# | | | |
+# | $rp1 + + $rp2 |
+# | | | |
+# | $rp1.1 + + $rp2.1 |
+# | 192.0.2.1/24 198.51.100.1/24 |
+# | 2001:db8:1::1/64 2001:db8:2::1/64 |
+# | |
+# +-----------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
NUM_NETIFS=4
source lib.sh
diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh
new file mode 100755
index 000000000000..81f31179ac88
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/sch_red.sh
@@ -0,0 +1,493 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test sends one stream of traffic from H1 through a TBF shaper, to a RED
+# within TBF shaper on $swp3. The two shapers have the same configuration, and
+# thus the resulting stream should fill all available bandwidth on the latter
+# shaper. A second stream is sent from H2 also via $swp3, and used to inject
+# additional traffic. Since all available bandwidth is taken, this traffic has
+# to go to backlog.
+#
+# +--------------------------+ +--------------------------+
+# | H1 | | H2 |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | TBF 10Mbps | | | |
+# +-----|--------------------+ +-----|--------------------+
+# | |
+# +-----|------------------------------------------------|--------------------+
+# | SW | | |
+# | +--|------------------------------------------------|----------------+ |
+# | | + $swp1 + $swp2 | |
+# | | BR | |
+# | | | |
+# | | + $swp3 | |
+# | | | TBF 10Mbps / RED | |
+# | +--------------------------------|-----------------------------------+ |
+# | | |
+# +-----------------------------------|---------------------------------------+
+# |
+# +-----|--------------------+
+# | H3 | |
+# | + $h1 |
+# | 192.0.2.3/28 |
+# | |
+# +--------------------------+
+
+ALL_TESTS="
+ ping_ipv4
+ ecn_test
+ ecn_nodrop_test
+ red_test
+ red_qevent_test
+ ecn_qevent_test
+"
+
+NUM_NETIFS=6
+CHECK_TC="yes"
+source lib.sh
+
+BACKLOG=30000
+PKTSZ=1400
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ mtu_set $h1 10000
+ tc qdisc replace dev $h1 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 root
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28
+ mtu_set $h2 10000
+}
+
+h2_destroy()
+{
+ mtu_restore $h2
+ simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+ simple_if_init $h3 192.0.2.3/28
+ mtu_set $h3 10000
+}
+
+h3_destroy()
+{
+ mtu_restore $h3
+ simple_if_fini $h3 192.0.2.3/28
+}
+
+switch_create()
+{
+ ip link add dev br up type bridge
+ ip link set dev $swp1 up master br
+ ip link set dev $swp2 up master br
+ ip link set dev $swp3 up master br
+
+ mtu_set $swp1 10000
+ mtu_set $swp2 10000
+ mtu_set $swp3 10000
+
+ tc qdisc replace dev $swp3 root handle 1: tbf \
+ rate 10Mbit burst 10K limit 1M
+ ip link add name _drop_test up type dummy
+}
+
+switch_destroy()
+{
+ ip link del dev _drop_test
+ tc qdisc del dev $swp3 root
+
+ mtu_restore $h3
+ mtu_restore $h2
+ mtu_restore $h1
+
+ ip link set dev $swp3 down nomaster
+ ip link set dev $swp2 down nomaster
+ ip link set dev $swp1 down nomaster
+ ip link del dev br
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ h2=${NETIFS[p3]}
+ swp2=${NETIFS[p4]}
+
+ swp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ h3_mac=$(mac_get $h3)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ h3_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+ping_ipv4()
+{
+ ping_test $h1 192.0.2.3 " from host 1"
+ ping_test $h2 192.0.2.3 " from host 2"
+}
+
+get_qdisc_backlog()
+{
+ qdisc_stats_get $swp3 11: .backlog
+}
+
+get_nmarked()
+{
+ qdisc_stats_get $swp3 11: .marked
+}
+
+get_qdisc_npackets()
+{
+ qdisc_stats_get $swp3 11: .packets
+}
+
+get_nmirrored()
+{
+ link_stats_get _drop_test tx packets
+}
+
+send_packets()
+{
+ local proto=$1; shift
+ local pkts=$1; shift
+
+ $MZ $h2 -p $PKTSZ -a own -b $h3_mac -A 192.0.2.2 -B 192.0.2.3 -t $proto -q -c $pkts "$@"
+}
+
+# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
+# success. After 10 failed attempts it bails out and returns 1. It dumps the
+# backlog size to stdout.
+build_backlog()
+{
+ local size=$1; shift
+ local proto=$1; shift
+
+ local i=0
+
+ while :; do
+ local cur=$(get_qdisc_backlog)
+ local diff=$((size - cur))
+ local pkts=$(((diff + PKTSZ - 1) / PKTSZ))
+
+ if ((cur >= size)); then
+ echo $cur
+ return 0
+ elif ((i++ > 10)); then
+ echo $cur
+ return 1
+ fi
+
+ send_packets $proto $pkts "$@"
+ sleep 1
+ done
+}
+
+check_marking()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmarked_0=$(get_nmarked)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmarked_1=$(get_nmarked)
+
+ local nmarked_d=$((nmarked_1 - nmarked_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmarked_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+check_mirroring()
+{
+ local cond=$1; shift
+
+ local npackets_0=$(get_qdisc_npackets)
+ local nmirrored_0=$(get_nmirrored)
+ sleep 5
+ local npackets_1=$(get_qdisc_npackets)
+ local nmirrored_1=$(get_nmirrored)
+
+ local nmirrored_d=$((nmirrored_1 - nmirrored_0))
+ local npackets_d=$((npackets_1 - npackets_0))
+ local pct=$((100 * nmirrored_d / npackets_d))
+
+ echo $pct
+ ((pct $cond))
+}
+
+ecn_test_common()
+{
+ local name=$1; shift
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Build the below-the-limit backlog using UDP. We could use TCP just
+ # fine, but this way we get a proof that UDP is accepted when queue
+ # length is below the limit. The main stream is using TCP, and if the
+ # limit is misconfigured, we would see this traffic being ECN marked.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) udp)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "$name backlog < limit"
+
+ # Now push TCP, because non-TCP traffic would be early-dropped after the
+ # backlog crosses the limit, and we want to make sure that the backlog
+ # is above the limit.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
+ log_test "$name backlog > limit"
+}
+
+do_ecn_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, it should all get dropped, and backlog
+ # building should fail.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_fail $? "UDP traffic went into backlog instead of being early-dropped"
+ log_test "$name backlog > limit: UDP early-dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_nodrop_test()
+{
+ local limit=$1; shift
+ local name="ECN nodrop"
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ ecn_test_common "$name" $limit
+
+ # Up there we saw that UDP gets accepted when backlog is below the
+ # limit. Now that it is above, in nodrop mode, make sure it goes to
+ # backlog as well.
+ RET=0
+ build_backlog $((2 * limit)) udp >/dev/null
+ check_err $? "UDP traffic was early-dropped instead of getting into backlog"
+ log_test "$name backlog > limit: UDP not dropped"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_test()
+{
+ local limit=$1; shift
+ local backlog
+ local pct
+
+ # Use ECN-capable TCP to verify there's no marking even though the queue
+ # is above limit.
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+
+ # Pushing below the queue limit should work.
+ RET=0
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog < limit"
+
+ # Pushing above should not.
+ RET=0
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_fail $? "Traffic went into backlog instead of being early-dropped"
+ pct=$(check_marking "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
+ log_test "RED backlog > limit"
+
+ stop_traffic
+ sleep 1
+}
+
+do_red_qevent_test()
+{
+ local limit=$1; shift
+ local backlog
+ local base
+ local now
+ local pct
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t udp -q &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ # Push to the queue until it's at the limit. The configured limit is
+ # rounded by the qdisc, so this is the best we can do to get to the real
+ # limit.
+ build_backlog $((3 * limit / 2)) udp >/dev/null
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now >= base + 100))
+ check_err $? "Dropped packets not observed: 100 expected, $((now - base)) seen"
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ base=$(get_nmirrored)
+ send_packets udp 100
+ sleep 1
+ now=$(get_nmirrored)
+ ((now == base))
+ check_err $? "Dropped packets still observed: 0 expected, $((now - base)) seen"
+
+ log_test "RED early_dropped packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+do_ecn_qevent_test()
+{
+ local limit=$1; shift
+ local name=ECN
+
+ RET=0
+
+ $MZ $h1 -p $PKTSZ -A 192.0.2.1 -B 192.0.2.3 -c 0 \
+ -a own -b $h3_mac -t tcp -q tos=0x01 &
+ sleep 1
+
+ tc filter add block 10 pref 1234 handle 102 matchall skip_hw \
+ action mirred egress mirror dev _drop_test
+
+ backlog=$(build_backlog $((2 * limit / 3)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring "== 0")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected == 0."
+
+ backlog=$(build_backlog $((3 * limit / 2)) tcp tos=0x01)
+ check_err $? "Could not build the requested backlog"
+ pct=$(check_mirroring ">= 95")
+ check_err $? "backlog $backlog / $limit Got $pct% mirrored packets, expected >= 95."
+
+ tc filter del block 10 pref 1234 handle 102 matchall
+
+ log_test "ECN marked packets mirrored"
+
+ stop_traffic
+ sleep 1
+}
+
+install_qdisc()
+{
+ local -a args=("$@")
+
+ tc qdisc replace dev $swp3 parent 1:1 handle 11: red \
+ limit 1M avpkt $PKTSZ probability 1 \
+ min $BACKLOG max $((BACKLOG + 1)) burst 38 "${args[@]}"
+ sleep 1
+}
+
+uninstall_qdisc()
+{
+ tc qdisc del dev $swp3 parent 1:1
+}
+
+ecn_test()
+{
+ install_qdisc ecn
+ do_ecn_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_nodrop_test()
+{
+ install_qdisc ecn nodrop
+ do_ecn_nodrop_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_test()
+{
+ install_qdisc
+ do_red_test $BACKLOG
+ uninstall_qdisc
+}
+
+red_qevent_test()
+{
+ install_qdisc qevent early_drop block 10
+ do_red_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+ecn_qevent_test()
+{
+ install_qdisc ecn qevent mark block 10
+ do_ecn_qevent_test $BACKLOG
+ uninstall_qdisc
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
index 8bd85da1905a..df9bcd6a811a 100644
--- a/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_etsprio.sh
@@ -4,9 +4,12 @@
ALL_TESTS="
ping_ipv4
tbf_test
+ tbf_root_test
"
source $lib_dir/sch_tbf_core.sh
+QDISC_TYPE=${QDISC% *}
+
tbf_test_one()
{
local bs=$1; shift
@@ -22,6 +25,8 @@ tbf_test_one()
tbf_test()
{
+ log_info "Testing root-$QDISC_TYPE-tbf"
+
# This test is used for both ETS and PRIO. Even though we only need two
# bands, PRIO demands a minimum of three.
tc qdisc add dev $swp2 root handle 10: $QDISC 3 priomap 2 1 0
@@ -29,6 +34,33 @@ tbf_test()
tc qdisc del dev $swp2 root
}
+tbf_root_test()
+{
+ local bs=128K
+
+ log_info "Testing root-tbf-$QDISC_TYPE"
+
+ tc qdisc replace dev $swp2 root handle 1: \
+ tbf rate 400Mbit burst $bs limit 1M
+ tc qdisc replace dev $swp2 parent 1:1 handle 10: \
+ $QDISC 3 priomap 2 1 0
+ tc qdisc replace dev $swp2 parent 10:3 handle 103: \
+ bfifo limit 1M
+ tc qdisc replace dev $swp2 parent 10:2 handle 102: \
+ bfifo limit 1M
+ tc qdisc replace dev $swp2 parent 10:1 handle 101: \
+ bfifo limit 1M
+
+ do_tbf_test 10 400 $bs
+ do_tbf_test 11 400 $bs
+
+ tc qdisc del dev $swp2 root
+}
+
+if type -t sch_tbf_pre_hook >/dev/null; then
+ sch_tbf_pre_hook
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
index 72aa21ba88c7..96c997be0d03 100755
--- a/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
+++ b/tools/testing/selftests/net/forwarding/sch_tbf_root.sh
@@ -23,6 +23,10 @@ tbf_test()
tc qdisc del dev $swp2 root
}
+if type -t sch_tbf_pre_hook >/dev/null; then
+ sch_tbf_pre_hook
+fi
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/settings b/tools/testing/selftests/net/forwarding/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
index e3bd8a6bb8b4..3dd5fcbd3eaa 100755
--- a/tools/testing/selftests/net/forwarding/skbedit_priority.sh
+++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh
@@ -54,7 +54,9 @@ h2_destroy()
switch_create()
{
- ip link add name br1 up type bridge vlan_filtering 1
+ ip link add name br1 type bridge vlan_filtering 1
+ ip link set dev br1 addrgenmode none
+ ip link set dev br1 up
ip link set dev $swp1 master br1
ip link set dev $swp1 up
ip link set dev $swp2 master br1
@@ -72,7 +74,9 @@ switch_destroy()
tc qdisc del dev $swp2 clsact
tc qdisc del dev $swp1 clsact
+ ip link set dev $swp2 down
ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
ip link set dev $swp1 nomaster
ip link del dev br1
}
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index d9eca227136b..589629636502 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -3,20 +3,25 @@
ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
- gact_trap_test"
+ gact_trap_test mirred_egress_to_ingress_test \
+ mirred_egress_to_ingress_tcp_test"
NUM_NETIFS=4
source tc_common.sh
source lib.sh
+require_command ncat
+
tcflags="skip_hw"
h1_create()
{
simple_if_init $h1 192.0.2.1/24
+ tc qdisc add dev $h1 clsact
}
h1_destroy()
{
+ tc qdisc del dev $h1 clsact
simple_if_fini $h1 192.0.2.1/24
}
@@ -58,7 +63,7 @@ mirred_egress_test()
RET=0
tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
- $tcflags dst_ip 192.0.2.2 action drop
+ dst_ip 192.0.2.2 action drop
$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
-t ip -q
@@ -153,6 +158,92 @@ gact_trap_test()
log_test "trap ($tcflags)"
}
+mirred_egress_to_ingress_test()
+{
+ RET=0
+
+ tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+ ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action \
+ ct commit nat src addr 192.0.2.2 pipe \
+ ct clear pipe \
+ ct commit nat dst addr 192.0.2.1 pipe \
+ mirred ingress redirect dev $h1
+
+ tc filter add dev $swp1 protocol ip pref 11 handle 111 ingress flower \
+ ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 8 action drop
+ tc filter add dev $swp1 protocol ip pref 12 handle 112 ingress flower \
+ ip_proto icmp src_ip 192.0.2.1 dst_ip 192.0.2.2 type 0 action pass
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t icmp "ping,id=42,seq=10" -q
+
+ tc_check_packets "dev $h1 egress" 100 1
+ check_err $? "didn't mirror first packet"
+
+ tc_check_packets "dev $swp1 ingress" 111 1
+ check_fail $? "didn't redirect first packet"
+ tc_check_packets "dev $swp1 ingress" 112 1
+ check_err $? "didn't receive reply to first packet"
+
+ ping 192.0.2.2 -I$h1 -c1 -w1 -q 1>/dev/null 2>&1
+
+ tc_check_packets "dev $h1 egress" 100 2
+ check_err $? "didn't mirror second packet"
+ tc_check_packets "dev $swp1 ingress" 111 1
+ check_fail $? "didn't redirect second packet"
+ tc_check_packets "dev $swp1 ingress" 112 2
+ check_err $? "didn't receive reply to second packet"
+
+ tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+ tc filter del dev $swp1 ingress protocol ip pref 11 handle 111 flower
+ tc filter del dev $swp1 ingress protocol ip pref 12 handle 112 flower
+
+ log_test "mirred_egress_to_ingress ($tcflags)"
+}
+
+mirred_egress_to_ingress_tcp_test()
+{
+ mirred_e2i_tf1=$(mktemp) mirred_e2i_tf2=$(mktemp)
+
+ RET=0
+ dd conv=sparse status=none if=/dev/zero bs=1M count=2 of=$mirred_e2i_tf1
+ tc filter add dev $h1 protocol ip pref 100 handle 100 egress flower \
+ $tcflags ip_proto tcp src_ip 192.0.2.1 dst_ip 192.0.2.2 \
+ action ct commit nat src addr 192.0.2.2 pipe \
+ action ct clear pipe \
+ action ct commit nat dst addr 192.0.2.1 pipe \
+ action ct clear pipe \
+ action skbedit ptype host pipe \
+ action mirred ingress redirect dev $h1
+ tc filter add dev $h1 protocol ip pref 101 handle 101 egress flower \
+ $tcflags ip_proto icmp \
+ action mirred ingress redirect dev $h1
+ tc filter add dev $h1 protocol ip pref 102 handle 102 ingress flower \
+ ip_proto icmp \
+ action drop
+
+ ip vrf exec v$h1 ncat --recv-only -w10 -l -p 12345 -o $mirred_e2i_tf2 &
+ local rpid=$!
+ ip vrf exec v$h1 ncat -w1 --send-only 192.0.2.2 12345 <$mirred_e2i_tf1
+ wait -n $rpid
+ cmp -s $mirred_e2i_tf1 $mirred_e2i_tf2
+ check_err $? "server output check failed"
+
+ $MZ $h1 -c 10 -p 64 -a $h1mac -b $h1mac -A 192.0.2.1 -B 192.0.2.1 \
+ -t icmp "ping,id=42,seq=5" -q
+ tc_check_packets "dev $h1 egress" 101 10
+ check_err $? "didn't mirred redirect ICMP"
+ tc_check_packets "dev $h1 ingress" 102 10
+ check_err $? "didn't drop mirred ICMP"
+
+ tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower
+ tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower
+ tc filter del dev $h1 ingress protocol ip pref 102 handle 102 flower
+
+ rm -f $mirred_e2i_tf1 $mirred_e2i_tf2
+ log_test "mirred_egress_to_ingress_tcp ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
@@ -178,6 +269,8 @@ setup_prepare()
cleanup()
{
+ local tf
+
pre_cleanup
switch_destroy
@@ -188,6 +281,8 @@ cleanup()
ip link set $swp2 address $swp2origmac
ip link set $swp1 address $swp1origmac
+
+ for tf in $mirred_e2i_tf1 $mirred_e2i_tf2; do rm -f $tf; done
}
mirred_egress_redirect_test()
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2934fb5ed2a2..b95de0463ebd 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -136,7 +136,7 @@ template_filter_fits()
tc filter add dev $h2 ingress protocol ip pref 1 handle 1102 \
flower src_mac $h2mac action drop &> /dev/null
- check_fail $? "Incorrectly succeded to insert filter which does not template"
+ check_fail $? "Incorrectly succeeded to insert filter which does not template"
tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1101 \
flower src_mac $h2mac action drop
@@ -144,7 +144,7 @@ template_filter_fits()
tc filter add dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
flower dst_mac $h2mac action drop &> /dev/null
- check_fail $? "Incorrectly succeded to insert filter which does not template"
+ check_fail $? "Incorrectly succeeded to insert filter which does not template"
tc filter del dev $h2 ingress chain 1 protocol ip pref 1 handle 1102 \
flower &> /dev/null
diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh
index 0e18e8be6e2a..bce8bb8d2b6f 100644
--- a/tools/testing/selftests/net/forwarding/tc_common.sh
+++ b/tools/testing/selftests/net/forwarding/tc_common.sh
@@ -16,6 +16,16 @@ tc_check_packets()
tc_rule_handle_stats_get "$id" "$handle" > /dev/null
}
+tc_check_at_least_x_packets()
+{
+ local id=$1
+ local handle=$2
+ local count=$3
+
+ busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $count" \
+ tc_rule_handle_stats_get "$id" "$handle" > /dev/null
+}
+
tc_check_packets_hitting()
{
local id=$1
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 058c746ee300..b1daad19b01e 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -3,7 +3,10 @@
ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
match_src_ip_test match_ip_flags_test match_pcp_test match_vlan_test \
- match_ip_tos_test match_indev_test"
+ match_ip_tos_test match_indev_test match_ip_ttl_test
+ match_mpls_label_test \
+ match_mpls_tc_test match_mpls_bos_test match_mpls_ttl_test \
+ match_mpls_lse_test"
NUM_NETIFS=2
source tc_common.sh
source lib.sh
@@ -49,8 +52,8 @@ match_dst_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -75,8 +78,8 @@ match_src_mac_test()
tc_check_packets "dev $h2 ingress" 101 1
check_fail $? "Matched on a wrong filter"
- tc_check_packets "dev $h2 ingress" 102 1
- check_err $? "Did not match on correct filter"
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_fail $? "Did not match on correct filter"
tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
@@ -310,6 +313,42 @@ match_ip_tos_test()
log_test "ip_tos match ($tcflags)"
}
+match_ip_ttl_test()
+{
+ RET=0
+
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ $tcflags dst_ip 192.0.2.2 ip_ttl 63 action drop
+ tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+ $tcflags dst_ip 192.0.2.2 action drop
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "ttl=63" -q
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "ttl=63,mf,frag=256" -q
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_fail $? "Matched on the wrong filter (no check on ttl)"
+
+ tc_check_packets "dev $h2 ingress" 101 2
+ check_err $? "Did not match on correct filter (ttl=63)"
+
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+ -t ip "ttl=255" -q
+
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_fail $? "Matched on a wrong filter (ttl=63)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter (no check on ttl)"
+
+ tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+
+ log_test "ip_ttl match ($tcflags)"
+}
+
match_indev_test()
{
RET=0
@@ -334,6 +373,309 @@ match_indev_test()
log_test "indev match ($tcflags)"
}
+# Unfortunately, mausezahn can't build MPLS headers when used in L2
+# mode, so we have this function to build Label Stack Entries.
+mpls_lse()
+{
+ local label=$1
+ local tc=$2
+ local bos=$3
+ local ttl=$4
+
+ printf "%02x %02x %02x %02x" \
+ $((label >> 12)) \
+ $((label >> 4 & 0xff)) \
+ $((((label & 0xf) << 4) + (tc << 1) + bos)) \
+ $ttl
+}
+
+match_mpls_label_test()
+{
+ local ethtype="88 47"; readonly ethtype
+ local pkt
+
+ RET=0
+
+ check_tc_mpls_support $h2 || return 0
+
+ tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+ flower $tcflags mpls_label 0 action drop
+ tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+ flower $tcflags mpls_label 1048575 action drop
+
+ pkt="$ethtype $(mpls_lse 1048575 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter (1048575)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter (1048575)"
+
+ pkt="$ethtype $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_fail $? "Matched on a wrong filter (0)"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter (0)"
+
+ tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+ log_test "mpls_label match ($tcflags)"
+}
+
+match_mpls_tc_test()
+{
+ local ethtype="88 47"; readonly ethtype
+ local pkt
+
+ RET=0
+
+ check_tc_mpls_support $h2 || return 0
+
+ tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+ flower $tcflags mpls_tc 0 action drop
+ tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+ flower $tcflags mpls_tc 7 action drop
+
+ pkt="$ethtype $(mpls_lse 0 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter (7)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter (7)"
+
+ pkt="$ethtype $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_fail $? "Matched on a wrong filter (0)"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter (0)"
+
+ tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+ log_test "mpls_tc match ($tcflags)"
+}
+
+match_mpls_bos_test()
+{
+ local ethtype="88 47"; readonly ethtype
+ local pkt
+
+ RET=0
+
+ check_tc_mpls_support $h2 || return 0
+
+ tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+ flower $tcflags mpls_bos 0 action drop
+ tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+ flower $tcflags mpls_bos 1 action drop
+
+ pkt="$ethtype $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter (1)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter (1)"
+
+ # Need to add a second label to properly mark the Bottom of Stack
+ pkt="$ethtype $(mpls_lse 0 0 0 255) $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_fail $? "Matched on a wrong filter (0)"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter (0)"
+
+ tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+ log_test "mpls_bos match ($tcflags)"
+}
+
+match_mpls_ttl_test()
+{
+ local ethtype="88 47"; readonly ethtype
+ local pkt
+
+ RET=0
+
+ check_tc_mpls_support $h2 || return 0
+
+ tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+ flower $tcflags mpls_ttl 0 action drop
+ tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+ flower $tcflags mpls_ttl 255 action drop
+
+ pkt="$ethtype $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_fail $? "Matched on a wrong filter (255)"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct filter (255)"
+
+ pkt="$ethtype $(mpls_lse 0 0 1 0)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 102 2
+ check_fail $? "Matched on a wrong filter (0)"
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct filter (0)"
+
+ tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+ log_test "mpls_ttl match ($tcflags)"
+}
+
+match_mpls_lse_test()
+{
+ local ethtype="88 47"; readonly ethtype
+ local pkt
+
+ RET=0
+
+ check_tc_mpls_lse_stats $h2 || return 0
+
+ # Match on first LSE (minimal values for each field)
+ tc filter add dev $h2 ingress protocol mpls_uc pref 1 handle 101 \
+ flower $tcflags mpls lse depth 1 label 0 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 2 handle 102 \
+ flower $tcflags mpls lse depth 1 tc 0 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 3 handle 103 \
+ flower $tcflags mpls lse depth 1 bos 0 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 4 handle 104 \
+ flower $tcflags mpls lse depth 1 ttl 0 action continue
+
+ # Match on second LSE (maximal values for each field)
+ tc filter add dev $h2 ingress protocol mpls_uc pref 5 handle 105 \
+ flower $tcflags mpls lse depth 2 label 1048575 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 6 handle 106 \
+ flower $tcflags mpls lse depth 2 tc 7 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 7 handle 107 \
+ flower $tcflags mpls lse depth 2 bos 1 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 8 handle 108 \
+ flower $tcflags mpls lse depth 2 ttl 255 action continue
+
+ # Match on LSE depth
+ tc filter add dev $h2 ingress protocol mpls_uc pref 9 handle 109 \
+ flower $tcflags mpls lse depth 1 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 10 handle 110 \
+ flower $tcflags mpls lse depth 2 action continue
+ tc filter add dev $h2 ingress protocol mpls_uc pref 11 handle 111 \
+ flower $tcflags mpls lse depth 3 action continue
+
+ # Base packet, matched by all filters (except for stack depth 3)
+ pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Make a variant of the above packet, with a non-matching value
+ # for each LSE field
+
+ # Wrong label at depth 1
+ pkt="$ethtype $(mpls_lse 1 0 0 0) $(mpls_lse 1048575 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong TC at depth 1
+ pkt="$ethtype $(mpls_lse 0 1 0 0) $(mpls_lse 1048575 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong BOS at depth 1 (not adding a second LSE here since BOS is set
+ # in the first label, so anything that'd follow wouldn't be considered)
+ pkt="$ethtype $(mpls_lse 0 0 1 0)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong TTL at depth 1
+ pkt="$ethtype $(mpls_lse 0 0 0 1) $(mpls_lse 1048575 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong label at depth 2
+ pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048574 7 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong TC at depth 2
+ pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 6 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong BOS at depth 2 (adding a third LSE here since BOS isn't set in
+ # the second label)
+ pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 0 255)"
+ pkt="$pkt $(mpls_lse 0 0 1 255)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Wrong TTL at depth 2
+ pkt="$ethtype $(mpls_lse 0 0 0 0) $(mpls_lse 1048575 7 1 254)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ # Filters working at depth 1 should match all packets but one
+
+ tc_check_packets "dev $h2 ingress" 101 8
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 102 8
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 103 8
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 104 8
+ check_err $? "Did not match on correct filter"
+
+ # Filters working at depth 2 should match all packets but two (because
+ # of the test packet where the label stack depth is just one)
+
+ tc_check_packets "dev $h2 ingress" 105 7
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 106 7
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 107 7
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 108 7
+ check_err $? "Did not match on correct filter"
+
+ # Finally, verify the filters that only match on LSE depth
+
+ tc_check_packets "dev $h2 ingress" 109 9
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 110 8
+ check_err $? "Did not match on correct filter"
+
+ tc_check_packets "dev $h2 ingress" 111 1
+ check_err $? "Did not match on correct filter"
+
+ tc filter del dev $h2 ingress protocol mpls_uc pref 11 handle 111 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 10 handle 110 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 9 handle 109 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 8 handle 108 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 7 handle 107 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 6 handle 106 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 5 handle 105 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 4 handle 104 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 3 handle 103 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 2 handle 102 flower
+ tc filter del dev $h2 ingress protocol mpls_uc pref 1 handle 101 flower
+
+ log_test "mpls lse match ($tcflags)"
+}
+
setup_prepare()
{
h1=${NETIFS[p1]}
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
new file mode 100755
index 000000000000..3ca20df952eb
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_cfm.sh
@@ -0,0 +1,206 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS="match_cfm_opcode match_cfm_level match_cfm_level_and_opcode"
+NUM_NETIFS=2
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+u8_to_hex()
+{
+ local u8=$1; shift
+
+ printf "%02x" $u8
+}
+
+generate_cfm_hdr()
+{
+ local mdl=$1; shift
+ local op=$1; shift
+ local flags=$1; shift
+ local tlv_offset=$1; shift
+
+ local cfm_hdr=$(:
+ )"$(u8_to_hex $((mdl << 5))):"$( : MD level and Version
+ )"$(u8_to_hex $op):"$( : OpCode
+ )"$(u8_to_hex $flags):"$( : Flags
+ )"$(u8_to_hex $tlv_offset)"$( : TLV offset
+ )
+
+ echo $cfm_hdr
+}
+
+match_cfm_opcode()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm op 47 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm op 43 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 7 47 0 32)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 6 5 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong opcode"
+
+ pkt="$ethtype $(generate_cfm_hdr 0 43 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct opcode"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+ log_test "CFM opcode match test"
+}
+
+match_cfm_level()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm mdl 5 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm mdl 3 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 103 \
+ flower cfm mdl 0 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 5 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 6 1 0 70)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 0 1 0 70)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct level"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong level"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Did not match on correct level"
+
+ pkt="$ethtype $(generate_cfm_hdr 3 0 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong level"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct level"
+
+ tc_check_packets "dev $h2 ingress" 103 1
+ check_err $? "Matched on the wrong level"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 103 flower
+
+ log_test "CFM level match test"
+}
+
+match_cfm_level_and_opcode()
+{
+ local ethtype="89 02"; readonly ethtype
+ RET=0
+
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 101 \
+ flower cfm mdl 5 op 41 action drop
+ tc filter add dev $h2 ingress protocol cfm pref 1 handle 102 \
+ flower cfm mdl 7 op 42 action drop
+
+ pkt="$ethtype $(generate_cfm_hdr 5 41 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 7 3 0 4)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+ pkt="$ethtype $(generate_cfm_hdr 3 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Did not match on correct level and opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 0
+ check_err $? "Matched on the wrong level and opcode"
+
+ pkt="$ethtype $(generate_cfm_hdr 7 42 0 12)"
+ $MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac "$pkt" -q
+
+ tc_check_packets "dev $h2 ingress" 101 1
+ check_err $? "Matched on the wrong level and opcode"
+
+ tc_check_packets "dev $h2 ingress" 102 1
+ check_err $? "Did not match on correct level and opcode"
+
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol cfm pref 1 handle 102 flower
+
+ log_test "CFM opcode and level match test"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ h2=${NETIFS[p2]}
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
new file mode 100755
index 000000000000..c2420bb72c12
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh
@@ -0,0 +1,357 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | |
+# +----|------------------+ +------------------|---+
+# | |
+# +----|-------------------------------------------------------------------|---+
+# | SW | | |
+# | +-|-------------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_l2_miss_unicast
+ test_l2_miss_multicast
+ test_l2_miss_ll_multicast
+ test_l2_miss_broadcast
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 up type bridge
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+test_l2_miss_unicast()
+{
+ local dmac=00:01:02:03:04:05
+ local dip=192.0.2.2
+ local sip=192.0.2.1
+
+ RET=0
+
+ # Unknown unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+ # Known unicast.
+ tc filter add dev $swp2 egress protocol ipv4 handle 102 pref 1 \
+ flower indev $swp1 l2_miss 0 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ # Before adding FDB entry.
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was not hit before adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Known unicast filter was hit before adding FDB entry"
+
+ # Adding FDB entry.
+ bridge fdb replace $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unknown unicast filter was hit after adding FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was not hit after adding FDB entry"
+
+ # Deleting FDB entry.
+ bridge fdb del $dmac dev $swp2 master static
+
+ $MZ $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unknown unicast filter was not hit after deleting FDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Known unicast filter was hit after deleting FDB entry"
+
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol ipv4 pref 1 handle 101 flower
+
+ log_test "L2 miss - Unicast"
+}
+
+test_l2_miss_multicast_common()
+{
+ local proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local dmac=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ # Unregistered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 src_ip $sip dst_ip $dip \
+ action pass
+ # Registered multicast.
+ tc filter add dev $swp2 egress protocol $proto handle 102 pref 1 \
+ flower indev $swp1 l2_miss 0 src_ip $sip dst_ip $dip \
+ action pass
+
+ # Before adding MDB entry.
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was not hit before adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 0
+ check_err $? "Registered multicast filter was hit before adding MDB entry"
+
+ # Adding MDB entry.
+ bridge mdb replace dev br1 port $swp2 grp $dip permanent
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Unregistered multicast filter was hit after adding MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was not hit after adding MDB entry"
+
+ # Deleting MDB entry.
+ bridge mdb del dev br1 port $swp2 grp $dip
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Unregistered multicast filter was not hit after deleting MDB entry"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "Registered multicast filter was hit after deleting MDB entry"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Multicast ($name)"
+}
+
+test_l2_miss_multicast_ipv4()
+{
+ local proto="ipv4"
+ local sip=192.0.2.1
+ local dip=239.1.1.1
+ local dmac=01:00:5e:01:01:01
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast_ipv6()
+{
+ local proto="ipv6"
+ local sip=2001:db8:1::1
+ local dip=ff0e::1
+ local dmac=33:33:00:00:00:01
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_multicast_common $proto $sip $dip $dmac $mode $name
+}
+
+test_l2_miss_multicast()
+{
+ # Configure $swp2 as a multicast router port so that it will forward
+ # both registered and unregistered multicast traffic.
+ bridge link set dev $swp2 mcast_router 2
+
+ # Set the Max Response Delay to 100 centiseconds (1 second) so that the
+ # bridge will start forwarding according to its MDB soon after a
+ # multicast querier is enabled.
+ ip link set dev br1 type bridge mcast_query_response_interval 100
+
+ # Forwarding according to MDB entries only takes place when the bridge
+ # detects that there is a valid querier in the network. Set the bridge
+ # as the querier and assign it a valid IPv6 link-local address to be
+ # used as the source address for MLD queries.
+ ip link set dev br1 type bridge mcast_querier 1
+ ip -6 address add fe80::1/64 nodad dev br1
+ sleep 10
+
+ test_l2_miss_multicast_ipv4
+ test_l2_miss_multicast_ipv6
+
+ ip -6 address del fe80::1/64 dev br1
+ ip link set dev br1 type bridge mcast_querier 0
+ ip link set dev br1 type bridge mcast_query_response_interval 1000
+ bridge link set dev $swp2 mcast_router 1
+}
+
+test_l2_miss_multicast_common2()
+{
+ local name=$1; shift
+ local dmac=$1; shift
+ local dip=224.0.0.1
+ local sip=192.0.2.1
+
+}
+
+test_l2_miss_ll_multicast_common()
+{
+ local proto=$1; shift
+ local dmac=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower indev $swp1 l2_miss 1 dst_mac $dmac src_ip $sip \
+ dst_ip $dip action pass
+
+ $MZ $mode $h1 -a own -b $dmac -t ip -A $sip -B $dip -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Filter was not hit"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+
+ log_test "L2 miss - Link-local multicast ($name)"
+}
+
+test_l2_miss_ll_multicast_ipv4()
+{
+ local proto=ipv4
+ local dmac=01:00:5e:00:00:01
+ local sip=192.0.2.1
+ local dip=224.0.0.1
+ local mode="-4"
+ local name="IPv4"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast_ipv6()
+{
+ local proto=ipv6
+ local dmac=33:33:00:00:00:01
+ local sip=2001:db8:1::1
+ local dip=ff02::1
+ local mode="-6"
+ local name="IPv6"
+
+ test_l2_miss_ll_multicast_common $proto $dmac $sip $dip $mode $name
+}
+
+test_l2_miss_ll_multicast()
+{
+ test_l2_miss_ll_multicast_ipv4
+ test_l2_miss_ll_multicast_ipv6
+}
+
+test_l2_miss_broadcast()
+{
+ local dmac=ff:ff:ff:ff:ff:ff
+ local smac=00:01:02:03:04:05
+
+ RET=0
+
+ tc filter add dev $swp2 egress protocol all handle 101 pref 1 \
+ flower l2_miss 1 dst_mac $dmac src_mac $smac \
+ action pass
+ tc filter add dev $swp2 egress protocol all handle 102 pref 1 \
+ flower l2_miss 0 dst_mac $dmac src_mac $smac \
+ action pass
+
+ $MZ $h1 -a $smac -b $dmac -c 1 -p 100 -q
+
+ tc_check_packets "dev $swp2 egress" 101 0
+ check_err $? "L2 miss filter was hit when should not"
+
+ tc_check_packets "dev $swp2 egress" 102 1
+ check_err $? "L2 no miss filter was not hit when should"
+
+ tc filter del dev $swp2 egress protocol all pref 1 handle 102 flower
+ tc filter del dev $swp2 egress protocol all pref 1 handle 101 flower
+
+ log_test "L2 miss - Broadcast"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
new file mode 100755
index 000000000000..3885a2a91f7d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_flower_port_range.sh
@@ -0,0 +1,228 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +----------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | $h2 + |
+# | | 192.0.2.1/28 | | 192.0.2.2/28 | |
+# | | 2001:db8:1::1/64 | | 2001:db8:1::2/64 | |
+# +----|------------------+ +------------------|---+
+# | |
+# +----|-------------------------------------------------------------------|---+
+# | SW | | |
+# | +-|-------------------------------------------------------------------|-+ |
+# | | + $swp1 BR $swp2 + | |
+# | +-----------------------------------------------------------------------+ |
+# +----------------------------------------------------------------------------+
+
+ALL_TESTS="
+ test_port_range_ipv4_udp
+ test_port_range_ipv4_tcp
+ test_port_range_ipv6_udp
+ test_port_range_ipv6_tcp
+"
+
+NUM_NETIFS=4
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ ip link set dev br1 up
+
+ tc qdisc add dev $swp1 clsact
+ tc qdisc add dev $swp2 clsact
+}
+
+switch_destroy()
+{
+ tc qdisc del dev $swp2 clsact
+ tc qdisc del dev $swp1 clsact
+
+ ip link set dev br1 down
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+ ip link del dev br1
+}
+
+__test_port_range()
+{
+ local proto=$1; shift
+ local ip_proto=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local mode=$1; shift
+ local name=$1; shift
+ local dmac=$(mac_get $h2)
+ local smac=$(mac_get $h1)
+ local sport_min=100
+ local sport_max=200
+ local sport_mid=$((sport_min + (sport_max - sport_min) / 2))
+ local dport_min=300
+ local dport_max=400
+ local dport_mid=$((dport_min + (dport_max - dport_min) / 2))
+
+ RET=0
+
+ tc filter add dev $swp1 ingress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport_min-$dport_max \
+ action pass
+ tc filter add dev $swp2 egress protocol $proto handle 101 pref 1 \
+ flower src_ip $sip dst_ip $dip ip_proto $ip_proto \
+ src_port $sport_min-$sport_max \
+ dst_port $dport_min-$dport_max \
+ action drop
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$dport_min"
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "Ingress filter not hit with minimum ports"
+ tc_check_packets "dev $swp2 egress" 101 1
+ check_err $? "Egress filter not hit with minimum ports"
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_mid,dp=$dport_mid"
+ tc_check_packets "dev $swp1 ingress" 101 2
+ check_err $? "Ingress filter not hit with middle ports"
+ tc_check_packets "dev $swp2 egress" 101 2
+ check_err $? "Egress filter not hit with middle ports"
+
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_max,dp=$dport_max"
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Ingress filter not hit with maximum ports"
+ tc_check_packets "dev $swp2 egress" 101 3
+ check_err $? "Egress filter not hit with maximum ports"
+
+ # Send traffic when both ports are out of range and when only one port
+ # is out of range.
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_min - 1)),dp=$dport_min"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$dport_min"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$((dport_min - 1))"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$sport_min,dp=$((dport_max + 1))"
+ $MZ $mode $h1 -c 1 -q -p 100 -a $smac -b $dmac -A $sip -B $dip \
+ -t $ip_proto "sp=$((sport_max + 1)),dp=$((dport_max + 1))"
+ tc_check_packets "dev $swp1 ingress" 101 3
+ check_err $? "Ingress filter was hit when should not"
+ tc_check_packets "dev $swp2 egress" 101 3
+ check_err $? "Egress filter was hit when should not"
+
+ tc filter del dev $swp2 egress protocol $proto pref 1 handle 101 flower
+ tc filter del dev $swp1 ingress protocol $proto pref 1 handle 101 flower
+
+ log_test "Port range matching - $name"
+}
+
+test_port_range_ipv4_udp()
+{
+ local proto=ipv4
+ local ip_proto=udp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 UDP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv4_tcp()
+{
+ local proto=ipv4
+ local ip_proto=tcp
+ local sip=192.0.2.1
+ local dip=192.0.2.2
+ local mode="-4"
+ local name="IPv4 TCP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_udp()
+{
+ local proto=ipv6
+ local ip_proto=udp
+ local sip=2001:db8:1::1
+ local dip=2001:db8:1::2
+ local mode="-6"
+ local name="IPv6 UDP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+test_port_range_ipv6_tcp()
+{
+ local proto=ipv6
+ local ip_proto=tcp
+ local sip=2001:db8:1::1
+ local dip=2001:db8:1::2
+ local mode="-6"
+ local name="IPv6 TCP"
+
+ __test_port_range $proto $ip_proto $sip $dip $mode "$name"
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ vrf_prepare
+ h1_create
+ h2_create
+ switch_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
new file mode 100755
index 000000000000..03743f04e178
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_mpls_l2vpn.sh
@@ -0,0 +1,192 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+
+# | H1 (v$h1) |
+# | 192.0.2.1/24 |
+# | 2001:db8::1/124 |
+# | + $h1 |
+# +-----------------|-----+
+# |
+# | (Plain Ethernet traffic)
+# |
+# +-----------------|-----------------------------------------+
+# | LER1 + $edge1 |
+# | -ingress: |
+# | -encapsulate Ethernet into MPLS |
+# | -add outer Ethernet header |
+# | -redirect to $mpls1 (egress) |
+# | |
+# | + $mpls1 |
+# | | -ingress: |
+# | | -remove outer Ethernet header |
+# | | -remove MPLS header |
+# | | -redirect to $edge1 (egress) |
+# +-----------------|-----------------------------------------+
+# |
+# | (Ethernet over MPLS traffic)
+# |
+# +-----------------|-----------------------------------------+
+# | LER2 + $mpls2 |
+# | -ingress: |
+# | -remove outer Ethernet header |
+# | -remove MPLS header |
+# | -redirect to $edge2 (egress) |
+# | |
+# | + $edge2 |
+# | | -ingress: |
+# | | -encapsulate Ethernet into MPLS |
+# | | -add outer Ethernet header |
+# | | -redirect to $mpls2 (egress) |
+# +-----------------|-----------------------------------------|
+# |
+# | (Plain Ethernet traffic)
+# |
+# +-----------------|-----+
+# | H2 (v$h2) | |
+# | + $h2 |
+# | 192.0.2.2/24 |
+# | 2001:db8::2/124 |
+# +-----------------------+
+#
+# LER1 and LER2 logically represent two different routers. However, no VRF is
+# created for them, as they don't do any IP routing.
+
+ALL_TESTS="mpls_forward_eth"
+NUM_NETIFS=6
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 192.0.2.1/24 2001:db8::1/124
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24 2001:db8::2/124
+}
+
+ler1_create()
+{
+ tc qdisc add dev $edge1 ingress
+ tc filter add dev $edge1 ingress \
+ matchall \
+ action mpls mac_push label 102 \
+ action vlan push_eth dst_mac $mpls2mac src_mac $mpls1mac \
+ action mirred egress redirect dev $mpls1
+ ip link set dev $edge1 up
+
+ tc qdisc add dev $mpls1 ingress
+ tc filter add dev $mpls1 ingress \
+ protocol mpls_uc \
+ flower mpls_label 101 \
+ action vlan pop_eth \
+ action mpls pop protocol teb \
+ action mirred egress redirect dev $edge1
+ ip link set dev $mpls1 up
+}
+
+ler1_destroy()
+{
+ ip link set dev $mpls1 down
+ tc qdisc del dev $mpls1 ingress
+
+ ip link set dev $edge1 down
+ tc qdisc del dev $edge1 ingress
+}
+
+ler2_create()
+{
+ tc qdisc add dev $edge2 ingress
+ tc filter add dev $edge2 ingress \
+ matchall \
+ action mpls mac_push label 101 \
+ action vlan push_eth dst_mac $mpls1mac src_mac $mpls2mac \
+ action mirred egress redirect dev $mpls2
+ ip link set dev $edge2 up
+
+ tc qdisc add dev $mpls2 ingress
+ tc filter add dev $mpls2 ingress \
+ protocol mpls_uc \
+ flower mpls_label 102 \
+ action vlan pop_eth \
+ action mpls pop protocol teb \
+ action mirred egress redirect dev $edge2
+ ip link set dev $mpls2 up
+}
+
+ler2_destroy()
+{
+ ip link set dev $mpls2 down
+ tc qdisc del dev $mpls2 ingress
+
+ ip link set dev $edge2 down
+ tc qdisc del dev $edge2 ingress
+}
+
+mpls_forward_eth()
+{
+ ping_test $h1 192.0.2.2
+ ping6_test $h1 2001:db8::2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ edge1=${NETIFS[p2]}
+
+ mpls1=${NETIFS[p3]}
+ mpls2=${NETIFS[p4]}
+
+ edge2=${NETIFS[p5]}
+ h2=${NETIFS[p6]}
+
+ mpls1mac=$(mac_get $mpls1)
+ mpls2mac=$(mac_get $mpls2)
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ ler1_create
+ ler2_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ler2_destroy
+ ler1_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh
new file mode 100755
index 000000000000..5103f64a71d6
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_police.sh
@@ -0,0 +1,441 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test tc-police action.
+#
+# +---------------------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/24 |
+# | | |
+# | | default via 192.0.2.2 |
+# +----|----------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | SW | |
+# | + $rp1 |
+# | 192.0.2.2/24 |
+# | |
+# | 198.51.100.2/24 203.0.113.2/24 |
+# | + $rp2 + $rp3 |
+# | | | |
+# +----|-----------------------------------------|----------------------------+
+# | |
+# +----|----------------------------+ +----|----------------------------+
+# | | default via 198.51.100.2 | | | default via 203.0.113.2 |
+# | | | | | |
+# | | 198.51.100.1/24 | | | 203.0.113.1/24 |
+# | + $h2 | | + $h3 |
+# | H2 (vrf) | | H3 (vrf) |
+# +---------------------------------+ +---------------------------------+
+
+ALL_TESTS="
+ police_rx_test
+ police_tx_test
+ police_shared_test
+ police_rx_mirror_test
+ police_tx_mirror_test
+ police_pps_rx_test
+ police_pps_tx_test
+ police_mtu_rx_test
+ police_mtu_tx_test
+"
+NUM_NETIFS=6
+source tc_common.sh
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+
+ ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+}
+
+h1_destroy()
+{
+ ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 198.51.100.1/24
+
+ ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+
+ ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+ simple_if_fini $h2 198.51.100.1/24
+}
+
+h3_create()
+{
+ simple_if_init $h3 203.0.113.1/24
+
+ ip -4 route add default vrf v$h3 nexthop via 203.0.113.2
+
+ tc qdisc add dev $h3 clsact
+}
+
+h3_destroy()
+{
+ tc qdisc del dev $h3 clsact
+
+ ip -4 route del default vrf v$h3 nexthop via 203.0.113.2
+
+ simple_if_fini $h3 203.0.113.1/24
+}
+
+router_create()
+{
+ ip link set dev $rp1 up
+ ip link set dev $rp2 up
+ ip link set dev $rp3 up
+
+ __addr_add_del $rp1 add 192.0.2.2/24
+ __addr_add_del $rp2 add 198.51.100.2/24
+ __addr_add_del $rp3 add 203.0.113.2/24
+
+ tc qdisc add dev $rp1 clsact
+ tc qdisc add dev $rp2 clsact
+}
+
+router_destroy()
+{
+ tc qdisc del dev $rp2 clsact
+ tc qdisc del dev $rp1 clsact
+
+ __addr_add_del $rp3 del 203.0.113.2/24
+ __addr_add_del $rp2 del 198.51.100.2/24
+ __addr_add_del $rp1 del 192.0.2.2/24
+
+ ip link set dev $rp3 down
+ ip link set dev $rp2 down
+ ip link set dev $rp1 down
+}
+
+police_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((10 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 10mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 10mbit burst 16k conform-exceed drop/ok
+
+ police_common_test "police on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_shared_common_test()
+{
+ local dport=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=$dport -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((10 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+}
+
+police_shared_test()
+{
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp src_port 12345 \
+ action drop
+
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 10mbit burst 16k conform-exceed drop/ok \
+ index 10
+
+ # Rule to police a different flow destined to $h2 on egress of $rp2
+ # using same policer
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 22222 \
+ action police index 10
+
+ police_shared_common_test 54321 "police with shared policer - rx"
+
+ police_shared_common_test 22222 "police with shared policer - tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_mirror_common_test()
+{
+ local pol_if=$1; shift
+ local dir=$1; shift
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to measure bandwidth of mirrored traffic on ingress of $h3
+ tc filter add dev $h3 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ # Rule to police traffic destined to $h2 and mirror to $h3
+ tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police rate 10mbit burst 16k conform-exceed drop/pipe \
+ action mirred egress mirror dev $rp3
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .bytes)
+
+ local er=$((10 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ local t0=$(tc_rule_stats_get $h3 1 ingress .bytes)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h3 1 ingress .bytes)
+
+ local er=$((10 * 1000 * 1000))
+ local nr=$(rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $pol_if $dir protocol ip pref 1 handle 101 flower
+ tc filter del dev $h3 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_rx_mirror_test()
+{
+ police_mirror_common_test $rp1 ingress "police rx and mirror"
+}
+
+police_tx_mirror_test()
+{
+ police_mirror_common_test $rp2 egress "police tx and mirror"
+}
+
+police_pps_common_test()
+{
+ local test_name=$1; shift
+
+ RET=0
+
+ # Rule to measure bandwidth on ingress of $h2
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 0 -q &
+
+ local t0=$(tc_rule_stats_get $h2 1 ingress .packets)
+ sleep 10
+ local t1=$(tc_rule_stats_get $h2 1 ingress .packets)
+
+ local er=$((2000))
+ local nr=$(packets_rate $t0 $t1 10)
+ local nr_pct=$((100 * (nr - er) / er))
+ ((-10 <= nr_pct && nr_pct <= 10))
+ check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-10%."
+
+ log_test "$test_name"
+
+ { kill %% && wait %%; } 2>/dev/null
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_rx_test()
+{
+ # Rule to police traffic destined to $h2 on ingress of $rp1
+ tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on rx"
+
+ tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
+}
+
+police_pps_tx_test()
+{
+ # Rule to police traffic destined to $h2 on egress of $rp2
+ tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police pkts_rate 2000 pkts_burst 400 conform-exceed drop/ok
+
+ police_pps_common_test "police pps on tx"
+
+ tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower
+}
+
+police_mtu_common_test() {
+ RET=0
+
+ local test_name=$1; shift
+ local dev=$1; shift
+ local direction=$1; shift
+
+ tc filter add dev $dev $direction protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action police mtu 1042 conform-exceed drop/ok
+
+ # to count "conform" packets
+ tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \
+ action drop
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1001 -c 10 -q
+
+ mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \
+ -t udp sp=12345,dp=54321 -p 1000 -c 3 -q
+
+ tc_check_packets "dev $dev $direction" 101 13
+ check_err $? "wrong packet counter"
+
+ # "exceed" packets
+ local overlimits_t0=$(tc_rule_stats_get ${dev} 1 ${direction} .overlimits)
+ test ${overlimits_t0} = 10
+ check_err $? "wrong overlimits, expected 10 got ${overlimits_t0}"
+
+ # "conform" packets
+ tc_check_packets "dev $h2 ingress" 101 3
+ check_err $? "forwarding error"
+
+ tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+ tc filter del dev $dev $direction protocol ip pref 1 handle 101 flower
+
+ log_test "$test_name"
+}
+
+police_mtu_rx_test()
+{
+ police_mtu_common_test "police mtu (rx)" $rp1 ingress
+}
+
+police_mtu_tx_test()
+{
+ police_mtu_common_test "police mtu (tx)" $rp2 egress
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ rp1=${NETIFS[p2]}
+
+ rp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp3=${NETIFS[p5]}
+ h3=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ h3_create
+ router_create
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ router_destroy
+ h3_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
new file mode 100755
index 000000000000..5a5dd9034819
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh
@@ -0,0 +1,164 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+ALL_TESTS="tunnel_key_nofrag_test"
+
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+
+tcflags="skip_hw"
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/24
+ forwarding_enable
+ mtu_set $h1 1500
+ tunnel_create h1-et vxlan 192.0.2.1 192.0.2.2 dev $h1 dstport 0 external
+ tc qdisc add dev h1-et clsact
+ mtu_set h1-et 1230
+ mtu_restore $h1
+ mtu_set $h1 1000
+}
+
+h1_destroy()
+{
+ tc qdisc del dev h1-et clsact
+ tunnel_destroy h1-et
+ forwarding_restore
+ mtu_restore $h1
+ simple_if_fini $h1 192.0.2.1/24
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/24
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 192.0.2.2/24
+}
+
+switch_create()
+{
+ simple_if_init $swp1 192.0.2.2/24
+ tc qdisc add dev $swp1 clsact
+ simple_if_init $swp2 192.0.2.1/24
+}
+
+switch_destroy()
+{
+ simple_if_fini $swp2 192.0.2.1/24
+ tc qdisc del dev $swp1 clsact
+ simple_if_fini $swp1 192.0.2.2/24
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ h1mac=$(mac_get $h1)
+ h2mac=$(mac_get $h2)
+
+ swp1origmac=$(mac_get $swp1)
+ swp2origmac=$(mac_get $swp2)
+ ip link set $swp1 address $h2mac
+ ip link set $swp2 address $h1mac
+
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ if ! tc action add action tunnel_key help 2>&1 | grep -q nofrag; then
+ log_test "SKIP: iproute doesn't support nofrag"
+ exit $ksft_skip
+ fi
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+
+ ip link set $swp2 address $swp2origmac
+ ip link set $swp1 address $swp1origmac
+}
+
+tunnel_key_nofrag_test()
+{
+ RET=0
+ local i
+
+ tc filter add dev $swp1 ingress protocol ip pref 100 handle 100 \
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofrag action drop
+ tc filter add dev $swp1 ingress protocol ip pref 101 handle 101 \
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags firstfrag action drop
+ tc filter add dev $swp1 ingress protocol ip pref 102 handle 102 \
+ flower src_ip 192.0.2.1 dst_ip 192.0.2.2 ip_proto udp \
+ ip_flags nofirstfrag action drop
+
+ # test 'nofrag' set
+ tc filter add dev h1-et egress protocol all pref 1 handle 1 matchall $tcflags \
+ action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 nofrag index 10
+ $MZ h1-et -c 1 -p 930 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+ tc_check_packets "dev $swp1 ingress" 100 1
+ check_err $? "packet smaller than MTU was not tunneled"
+
+ $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+ tc_check_packets "dev $swp1 ingress" 100 1
+ check_err $? "packet bigger than MTU matched nofrag (nofrag was set)"
+ tc_check_packets "dev $swp1 ingress" 101 0
+ check_err $? "packet bigger than MTU matched firstfrag (nofrag was set)"
+ tc_check_packets "dev $swp1 ingress" 102 0
+ check_err $? "packet bigger than MTU matched nofirstfrag (nofrag was set)"
+
+ # test 'nofrag' cleared
+ tc actions change action tunnel_key set src_ip 192.0.2.1 dst_ip 192.0.2.2 id 42 index 10
+ $MZ h1-et -c 1 -p 931 -a 00:aa:bb:cc:dd:ee -b 00:ee:dd:cc:bb:aa -t ip -q
+ tc_check_packets "dev $swp1 ingress" 100 1
+ check_err $? "packet bigger than MTU matched nofrag (nofrag was unset)"
+ tc_check_packets "dev $swp1 ingress" 101 1
+ check_err $? "packet bigger than MTU didn't match firstfrag (nofrag was unset) "
+ tc_check_packets "dev $swp1 ingress" 102 1
+ check_err $? "packet bigger than MTU didn't match nofirstfrag (nofrag was unset) "
+
+ for i in 100 101 102; do
+ tc filter del dev $swp1 ingress protocol ip pref $i handle $i flower
+ done
+ tc filter del dev h1-et egress pref 1 handle 1 matchall
+
+ log_test "tunnel_key nofrag ($tcflags)"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_info "Could not test offloaded functionality"
+else
+ tcflags="skip_sw"
+ tests_run
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
new file mode 100644
index 000000000000..b91bcd8008a9
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright 2021-2022 NXP
+
+REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
+REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
+
+# Tunables
+UTC_TAI_OFFSET=37
+ISOCHRON_CPU=1
+
+if [[ "$REQUIRE_ISOCHRON" = "yes" ]]; then
+ # https://github.com/vladimiroltean/tsn-scripts
+ # WARNING: isochron versions pre-1.0 are unstable,
+ # always use the latest version
+ require_command isochron
+fi
+if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
+ require_command phc2sys
+ require_command ptp4l
+fi
+
+phc2sys_start()
+{
+ local uds_address=$1
+ local extra_args=""
+
+ if ! [ -z "${uds_address}" ]; then
+ extra_args="${extra_args} -z ${uds_address}"
+ fi
+
+ phc2sys_log="$(mktemp)"
+
+ chrt -f 10 phc2sys -m \
+ -a -rr \
+ --step_threshold 0.00002 \
+ --first_step_threshold 0.00002 \
+ ${extra_args} \
+ > "${phc2sys_log}" 2>&1 &
+ phc2sys_pid=$!
+
+ echo "phc2sys logs to ${phc2sys_log} and has pid ${phc2sys_pid}"
+
+ sleep 1
+}
+
+phc2sys_stop()
+{
+ { kill ${phc2sys_pid} && wait ${phc2sys_pid}; } 2> /dev/null
+ rm "${phc2sys_log}" 2> /dev/null
+}
+
+# Replace space separators from interface list with underscores
+if_names_to_label()
+{
+ local if_name_list="$1"
+
+ echo "${if_name_list/ /_}"
+}
+
+ptp4l_start()
+{
+ local if_names="$1"
+ local slave_only=$2
+ local uds_address=$3
+ local log="ptp4l_log_$(if_names_to_label ${if_names})"
+ local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
+ local extra_args=""
+
+ for if_name in ${if_names}; do
+ extra_args="${extra_args} -i ${if_name}"
+ done
+
+ if [ "${slave_only}" = true ]; then
+ extra_args="${extra_args} -s"
+ fi
+
+ # declare dynamic variables ptp4l_log_${if_name} and ptp4l_pid_${if_name}
+ # as global, so that they can be referenced later
+ declare -g "${log}=$(mktemp)"
+
+ chrt -f 10 ptp4l -m -2 -P \
+ --step_threshold 0.00002 \
+ --first_step_threshold 0.00002 \
+ --tx_timestamp_timeout 100 \
+ --uds_address="${uds_address}" \
+ ${extra_args} \
+ > "${!log}" 2>&1 &
+ declare -g "${pid}=$!"
+
+ echo "ptp4l for interfaces ${if_names} logs to ${!log} and has pid ${!pid}"
+
+ sleep 1
+}
+
+ptp4l_stop()
+{
+ local if_names="$1"
+ local log="ptp4l_log_$(if_names_to_label ${if_names})"
+ local pid="ptp4l_pid_$(if_names_to_label ${if_names})"
+
+ { kill ${!pid} && wait ${!pid}; } 2> /dev/null
+ rm "${!log}" 2> /dev/null
+}
+
+cpufreq_max()
+{
+ local cpu=$1
+ local freq="cpu${cpu}_freq"
+ local governor="cpu${cpu}_governor"
+
+ # Kernel may be compiled with CONFIG_CPU_FREQ disabled
+ if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+ return
+ fi
+
+ # declare dynamic variables cpu${cpu}_freq and cpu${cpu}_governor as
+ # global, so they can be referenced later
+ declare -g "${freq}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq)"
+ declare -g "${governor}=$(cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor)"
+
+ cat /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_max_freq > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+ echo -n "performance" > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+cpufreq_restore()
+{
+ local cpu=$1
+ local freq="cpu${cpu}_freq"
+ local governor="cpu${cpu}_governor"
+
+ if ! [ -d /sys/bus/cpu/devices/cpu${cpu}/cpufreq ]; then
+ return
+ fi
+
+ echo "${!freq}" > /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_min_freq
+ echo -n "${!governor}" > \
+ /sys/bus/cpu/devices/cpu${cpu}/cpufreq/scaling_governor
+}
+
+isochron_recv_start()
+{
+ local if_name=$1
+ local uds=$2
+ local stats_port=$3
+ local extra_args=$4
+ local pid="isochron_pid_${stats_port}"
+
+ if ! [ -z "${uds}" ]; then
+ extra_args="${extra_args} --unix-domain-socket ${uds}"
+ fi
+
+ isochron rcv \
+ --interface ${if_name} \
+ --sched-priority 98 \
+ --sched-fifo \
+ --utc-tai-offset ${UTC_TAI_OFFSET} \
+ --stats-port ${stats_port} \
+ --quiet \
+ ${extra_args} & \
+ declare -g "${pid}=$!"
+
+ sleep 1
+}
+
+isochron_recv_stop()
+{
+ local stats_port=$1
+ local pid="isochron_pid_${stats_port}"
+
+ { kill ${!pid} && wait ${!pid}; } 2> /dev/null
+}
+
+isochron_do()
+{
+ local sender_if_name=$1; shift
+ local receiver_if_name=$1; shift
+ local sender_uds=$1; shift
+ local receiver_uds=$1; shift
+ local base_time=$1; shift
+ local cycle_time=$1; shift
+ local shift_time=$1; shift
+ local num_pkts=$1; shift
+ local vid=$1; shift
+ local priority=$1; shift
+ local dst_ip=$1; shift
+ local isochron_dat=$1; shift
+ local extra_args=""
+ local receiver_extra_args=""
+ local vrf="$(master_name_get ${sender_if_name})"
+ local use_l2="true"
+
+ if ! [ -z "${dst_ip}" ]; then
+ use_l2="false"
+ fi
+
+ if ! [ -z "${vrf}" ]; then
+ dst_ip="${dst_ip}%${vrf}"
+ fi
+
+ if ! [ -z "${vid}" ]; then
+ vid="--vid=${vid}"
+ fi
+
+ if [ -z "${receiver_uds}" ]; then
+ extra_args="${extra_args} --omit-remote-sync"
+ fi
+
+ if ! [ -z "${shift_time}" ]; then
+ extra_args="${extra_args} --shift-time=${shift_time}"
+ fi
+
+ if [ "${use_l2}" = "true" ]; then
+ extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
+ receiver_extra_args="--l2 --etype=0xdead"
+ else
+ extra_args="${extra_args} --l4 --ip-destination=${dst_ip}"
+ receiver_extra_args="--l4"
+ fi
+
+ cpufreq_max ${ISOCHRON_CPU}
+
+ isochron_recv_start "${h2}" "${receiver_uds}" 5000 "${receiver_extra_args}"
+
+ isochron send \
+ --interface ${sender_if_name} \
+ --unix-domain-socket ${sender_uds} \
+ --priority ${priority} \
+ --base-time ${base_time} \
+ --cycle-time ${cycle_time} \
+ --num-frames ${num_pkts} \
+ --frame-size 64 \
+ --txtime \
+ --utc-tai-offset ${UTC_TAI_OFFSET} \
+ --cpu-mask $((1 << ${ISOCHRON_CPU})) \
+ --sched-fifo \
+ --sched-priority 98 \
+ --client 127.0.0.1 \
+ --sync-threshold 5000 \
+ --output-file ${isochron_dat} \
+ ${extra_args} \
+ --quiet
+
+ isochron_recv_stop 5000
+
+ cpufreq_restore ${ISOCHRON_CPU}
+}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
index a0b5f57d6bd3..43469c7de118 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric.sh
@@ -215,10 +215,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -359,6 +365,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
@@ -515,7 +525,7 @@ arp_suppression()
log_test "neigh_suppress: on / neigh exists: yes"
- # Delete the neighbour from the the SVI. A single ARP request should be
+ # Delete the neighbour from the SVI. A single ARP request should be
# received by the remote VTEP
RET=0
diff --git a/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
new file mode 100755
index 000000000000..f4930098974f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_asymmetric_ipv6.sh
@@ -0,0 +1,504 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------------------+ +-----------------------------+
+# | vrf-h1 | | vrf-h2 |
+# | + $h1 | | + $h2 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 |
+# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 |
+# +----|---------------------------+ +-|---------------------------+
+# | |
+# +----|------------------------------------------|---------------------------+
+# | SW | | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | + $swp1 br1 + $swp2 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::1 local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | |
+# | | id 1000 id 2000 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | | |
+# | | + vlan10 vlan20 + | |
+# | | | 2001:db8:1::2/64 2001:db8:2::2/64 | | |
+# | | | | | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | 2001:db8:1::3/64 2001:db8:2::3/64 | |
+# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 2001:db8:4::1/64 2001:db8:3::1/128 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | vrf-spine |
+# | + $rp2 |
+# | 2001:db8:4::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | |
+# | + v1 (veth) |
+# | | 2001:db8:5::2/64 |
+# +----|--------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | + v2 (veth) +lo NS1 (netns) |
+# | 2001:db8:5::1/64 2001:db8:3::2/128 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | vrf-green | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | |
+# | | | | | |
+# | | + vlan10 vlan20 + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::2 local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | |
+# | | id 1000 id 2000 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + w1 (veth) + w3 (veth) | |
+# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | | |
+# | | | |
+# | +--|----------------------+ +--|-------------------------+ |
+# | | | vrf-h1 | | | vrf-h2 | |
+# | | + w2 (veth) | | + w4 (veth) | |
+# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | |
+# | | default via | | default via | |
+# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | |
+# | +-------------------------+ +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv6
+ arp_decap
+"
+NUM_NETIFS=6
+source lib.sh
+
+require_command $ARPING
+
+hx_create()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ ip address add $ip_addr/64 dev $if_name
+ ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+ dev $if_name
+ ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ ip route del default vrf $vrf_name nexthop via $gw_ip
+ ip neigh del $gw_ip dev $if_name
+ ip address del $ip_addr/64 dev $if_name
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+ hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+ hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+ hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:4::1/64
+ ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+ ip link add name vx10 type vxlan id 1000 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 20 dev $swp2 pvid untagged
+
+ ip address add 2001:db8:3::1/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::2/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::2/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+}
+
+switch_destroy()
+{
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+ bridge vlan del vid 20 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ ip link del dev vlan20
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 2001:db8:3::1/128 dev lo
+
+ bridge vlan del vid 20 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+ ip address del dev $rp1 2001:db8:4::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+spine_create()
+{
+ vrf_create "vrf-spine"
+ ip link set dev $rp2 master vrf-spine
+ ip link set dev v1 master vrf-spine
+ ip link set dev vrf-spine up
+ ip link set dev $rp2 up
+ ip link set dev v1 up
+
+ ip address add 2001:db8:4::2/64 dev $rp2
+ ip address add 2001:db8:5::2/64 dev v1
+
+ ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+ ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+}
+
+spine_destroy()
+{
+ ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+ ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+
+ ip address del 2001:db8:5::2/64 dev v1
+ ip address del 2001:db8:4::2/64 dev $rp2
+
+ ip link set dev v1 down
+ ip link set dev $rp2 down
+ vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+ hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+ hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip link set dev br1 up
+
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:5::1/64
+ ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+ ip link add name vx10 type vxlan id 1000 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev w1 master br1
+ ip link set dev w1 up
+ bridge vlan add vid 10 dev w1 pvid untagged
+
+ ip link set dev w3 master br1
+ ip link set dev w3 up
+ bridge vlan add vid 20 dev w3 pvid untagged
+
+ ip address add 2001:db8:3::2/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::3/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::3/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+ ip link add name w1 type veth peer name w2
+ ip link add name w3 type veth peer name w4
+
+ ip link set dev lo up
+
+ ns_h1_create
+ ns_h2_create
+ ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+macs_populate()
+{
+ local mac1=$1; shift
+ local mac2=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local dst=$1; shift
+
+ bridge fdb add $mac1 dev vx10 self master extern_learn static \
+ dst $dst vlan 10
+ bridge fdb add $mac2 dev vx20 self master extern_learn static \
+ dst $dst vlan 20
+
+ ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+ extern_learn
+ ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+ extern_learn
+}
+export -f macs_populate
+
+macs_initialize()
+{
+ local h1_ns_mac=$(in_ns ns1 mac_get w2)
+ local h2_ns_mac=$(in_ns ns1 mac_get w4)
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ macs_populate $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+ 2001:db8:3::2
+ in_ns ns1 macs_populate $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+ 2001:db8:3::1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ spine_create
+ ns1_create
+ in_ns ns1 forwarding_enable
+
+ macs_initialize
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns1_destroy
+ spine_destroy
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+ ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+ ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+ ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+ ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+arp_decap()
+{
+ # Repeat the ping tests, but without populating the neighbours. This
+ # makes sure we correctly decapsulate ARP packets
+ log_info "deleting neighbours from vlan interfaces"
+
+ ip neigh del 2001:db8:1::4 dev vlan10
+ ip neigh del 2001:db8:2::4 dev vlan20
+
+ ping_ipv6
+
+ ip neigh replace 2001:db8:1::4 lladdr $(in_ns ns1 mac_get w2) \
+ nud noarp dev vlan10 extern_learn
+ ip neigh replace 2001:db8:2::4 lladdr $(in_ns ns1 mac_get w4) \
+ nud noarp dev vlan20 extern_learn
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
index ce6bea9675c0..6f0a2e452ba1 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
@@ -495,7 +495,7 @@ vxlan_ping_test()
local delta=$((t1 - t0))
# Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
+ ((expect <= delta && delta <= expect + 5))
check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
}
@@ -532,7 +532,7 @@ __test_ecn_encap()
RET=0
tc filter add dev v1 egress pref 77 prot ip \
- flower ip_tos $tos action pass
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
sleep 1
vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10
tc filter del dev v1 egress pref 77 prot ip
@@ -657,10 +657,21 @@ test_ecn_decap()
{
# In accordance with INET_ECN_decapsulate()
__test_ecn_decap 00 00 0x00
+ __test_ecn_decap 00 01 0x00
+ __test_ecn_decap 00 02 0x00
+ # 00 03 is tested in test_ecn_decap_error()
+ __test_ecn_decap 01 00 0x01
__test_ecn_decap 01 01 0x01
- __test_ecn_decap 02 01 0x02
+ __test_ecn_decap 01 02 0x01
__test_ecn_decap 01 03 0x03
+ __test_ecn_decap 02 00 0x02
+ __test_ecn_decap 02 01 0x01
+ __test_ecn_decap 02 02 0x02
__test_ecn_decap 02 03 0x03
+ __test_ecn_decap 03 00 0x03
+ __test_ecn_decap 03 01 0x03
+ __test_ecn_decap 03 02 0x03
+ __test_ecn_decap 03 03 0x03
test_ecn_decap_error
}
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
new file mode 100755
index 000000000000..a603f7b0a08f
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh
@@ -0,0 +1,804 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1 | | + $h2 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1d) + $swp2 | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1d) | | | | BR2 (802.1d) | |
+# | | + vx2 (vxlan) | | | | + vx2 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | 192.0.2.3/28 | | | | 192.0.2.4/28 | |
+# | | 2001:db8:1::3/64 | | | | 2001:db8:1::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ test_ttl
+ test_tos
+ test_ecn_encap
+ test_ecn_decap
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64
+ tc qdisc add dev $h1 clsact
+}
+
+h1_destroy()
+{
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64
+ tc qdisc add dev $h2 clsact
+}
+
+h2_destroy()
+{
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+ tc qdisc add dev $rp1 clsact
+
+ ip link add name vx1 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx1 up
+
+ ip link set dev vx1 master br1
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ ip link set dev vx1 nomaster
+ ip link set dev vx1 down
+ ip link del dev vx1
+
+ tc qdisc del dev $rp1 clsact
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr_ipv4=$1; shift
+ local host_addr_ipv6=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+
+ ip link add name vx2 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx2 up
+ bridge fdb append dev vx2 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx2 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx2 master br2
+ tc qdisc add dev vx2 clsact
+
+ simple_if_init w2 $host_addr_ipv4/28 $host_addr_ipv6/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 192.0.2.3 2001:db8:1::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 192.0.2.4 2001:db8:1::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+# For the first round of tests, vx1 is the first device to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching the devices to a an
+# already-offloaded bridge, and only then assign the local IP.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ ip link set dev vx1 nomaster
+ rp1_unset_addr
+ sleep 5
+
+ ip link set dev vx1 master br1
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx1 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ sleep 1
+ rp1_set_addr
+ sleep 5
+}
+
+__ping_ipv4()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol ip pref 1 handle 101 \
+ flower src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome ARP noise.
+ PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_ip=192.0.2.1
+ local w2_ns1_ip=192.0.2.3
+ local w2_ns2_ip=192.0.2.4
+
+ ping_test $h1 192.0.2.2 ": local->local"
+
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+ "local->remote 1"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+ "local->remote 2"
+}
+
+__ping_ipv6()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol ipv6 pref 1 handle 101 \
+ flower src_ip $dst_ip dst_ip $src_ip $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome neighbor discovery noise.
+ PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping6: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_ip=2001:db8:1::1
+ local w2_ns1_ip=2001:db8:1::3
+ local w2_ns2_ip=2001:db8:1::4
+
+ ping6_test $h1 2001:db8:1::2 ": local->local"
+
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_ip $w2_ns1_ip $h1 \
+ "local->remote 1"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_ip $w2_ns2_ip $h1 \
+ "local->remote 2"
+}
+
+maybe_in_ns()
+{
+ echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+ local add_del=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local ns=$1; shift
+
+ # Putting the ICMP capture both to HW and to SW will end up
+ # double-counting the packets that are trapped to slow path, such as for
+ # the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+ # but with skip_hw, the flooded packets are not counted at all, because
+ # those are dropped due to MAC address mismatch; and skip_sw is a no-go
+ # for veth-based topologies.
+ #
+ # So try to install with skip_sw and fall back to skip_sw if that fails.
+
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_sw action pass 2>/dev/null || \
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+ __flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+ __flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+ local dev=$1; shift
+ local ns=$1; shift
+
+ $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+ local counters=("${@}")
+ local counter
+
+ for counter in "${counters[@]}"; do
+ flood_fetch_stat $counter
+ done
+}
+
+vxlan_flood_test()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local -a expects=("${@}")
+
+ local -a counters=($h2 "vx2 ns1" "vx2 ns2")
+ local counter
+ local key
+
+ for counter in "${counters[@]}"; do
+ flood_counter_install $dst $counter
+ done
+
+ local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+ $MZ -6 $h1 -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+ sleep 1
+ local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+ for key in ${!t0s[@]}; do
+ local delta=$((t1s[$key] - t0s[$key]))
+ local expect=${expects[$key]}
+
+ ((expect == delta))
+ check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+ done
+
+ for counter in "${counters[@]}"; do
+ flood_counter_uninstall $dst $counter
+ done
+}
+
+__test_flood()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ vxlan_flood_test $mac $dst 10 10 10
+
+ log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+ __test_flood de:ad:be:ef:13:37 2001:db8:1::100 "flood"
+}
+
+vxlan_fdb_add_del()
+{
+ local add_del=$1; shift
+ local mac=$1; shift
+ local dev=$1; shift
+ local dst=$1; shift
+
+ bridge fdb $add_del dev $dev $mac self static permanent \
+ ${dst:+dst} $dst 2>/dev/null
+ bridge fdb $add_del dev $dev $mac master static 2>/dev/null
+}
+
+__test_unicast()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local hit_idx=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ local -a expects=(0 0 0)
+ expects[$hit_idx]=10
+
+ vxlan_flood_test $mac $dst "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+ local -a targets=("$h2_mac $h2"
+ "$r1_mac vx1 2001:db8:4::1"
+ "$r2_mac vx1 2001:db8:5::1")
+ local target
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:1::2 0 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:1::3 1 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:1::4 2 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del $target
+ done
+}
+
+vxlan_ping_test()
+{
+ local ping_dev=$1; shift
+ local ping_dip=$1; shift
+ local ping_args=$1; shift
+ local capture_dev=$1; shift
+ local capture_dir=$1; shift
+ local capture_pref=$1; shift
+ local expect=$1; shift
+
+ local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+ ping6_do $ping_dev $ping_dip "$ping_args"
+ local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
+ local delta=$((t1 - t0))
+
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 5))
+ check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
+}
+
+test_ttl()
+{
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_ttl 99 action pass
+ vxlan_ping_test $h1 2001:db8:1::3 "" v1 egress 77 10
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: envelope TTL"
+}
+
+test_tos()
+{
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_tos 0x14 action pass
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x14" v1 egress 77 10
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q 0x18" v1 egress 77 0
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: envelope TOS inheritance"
+}
+
+__test_ecn_encap()
+{
+ local q=$1; shift
+ local tos=$1; shift
+
+ RET=0
+
+ tc filter add dev v1 egress pref 77 protocol ipv6 \
+ flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass
+ sleep 1
+ vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10
+ tc filter del dev v1 egress pref 77 protocol ipv6
+
+ log_test "VXLAN: ECN encap: $q->$tos"
+}
+
+test_ecn_encap()
+{
+ # In accordance with INET_ECN_encapsulate()
+ __test_ecn_encap 0x00 0x00
+ __test_ecn_encap 0x01 0x01
+ __test_ecn_encap 0x02 0x02
+ __test_ecn_encap 0x03 0x02
+}
+
+vxlan_encapped_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local inner_tos=$1; shift
+ local outer_tos=$1; shift
+ local saddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:03"
+ local daddr="20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:01"
+
+ $MZ -6 $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp tos=$outer_tos,sp=23456,dp=$VXPORT,p=$(:
+ )"08:"$( : VXLAN flags
+ )"00:00:00:"$( : VXLAN reserved
+ )"00:03:e8:"$( : VXLAN VNI
+ )"00:"$( : VXLAN reserved
+ )"$dest_mac:"$( : ETH daddr
+ )"$(mac_get w2):"$( : ETH saddr
+ )"86:dd:"$( : ETH type
+ )"6"$( : IP version
+ )"$inner_tos"$( : Traffic class
+ )"0:00:00:"$( : Flow label
+ )"00:08:"$( : Payload length
+ )"3a:"$( : Next header
+ )"04:"$( : Hop limit
+ )"$saddr:"$( : IP saddr
+ )"$daddr:"$( : IP daddr
+ )"80:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"00:"$( : ICMPv6.checksum
+ )
+}
+export -f vxlan_encapped_ping_do
+
+vxlan_encapped_ping_test()
+{
+ local ping_dev=$1; shift
+ local nh_dev=$1; shift
+ local ping_dip=$1; shift
+ local inner_tos=$1; shift
+ local outer_tos=$1; shift
+ local stat_get=$1; shift
+ local expect=$1; shift
+
+ local t0=$($stat_get)
+
+ in_ns ns1 \
+ vxlan_encapped_ping_do 10 $ping_dev $(mac_get $nh_dev) \
+ $ping_dip $(mac_get $h1) \
+ $inner_tos $outer_tos
+ sleep 1
+ local t1=$($stat_get)
+ local delta=$((t1 - t0))
+
+ # Tolerate a couple stray extra packets.
+ ((expect <= delta && delta <= expect + 2))
+ check_err $? "Expected to capture $expect packets, got $delta."
+}
+export -f vxlan_encapped_ping_test
+
+__test_ecn_decap()
+{
+ local orig_inner_tos=$1; shift
+ local orig_outer_tos=$1; shift
+ local decapped_tos=$1; shift
+
+ RET=0
+
+ tc filter add dev $h1 ingress pref 77 protocol ipv6 \
+ flower src_ip 2001:db8:1::3 dst_ip 2001:db8:1::1 \
+ ip_tos $decapped_tos action drop
+ sleep 1
+ vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+ $orig_inner_tos $orig_outer_tos \
+ "tc_rule_stats_get $h1 77 ingress" 10
+ tc filter del dev $h1 ingress pref 77
+
+ log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->$decapped_tos"
+}
+
+test_ecn_decap_error()
+{
+ local orig_inner_tos="0:0"
+ local orig_outer_tos=03
+
+ RET=0
+
+ vxlan_encapped_ping_test v2 v1 2001:db8:3::1 \
+ $orig_inner_tos $orig_outer_tos \
+ "link_stats_rx_errors_get vx1" 10
+
+ log_test "VXLAN: ECN decap: $orig_outer_tos/$orig_inner_tos->error"
+}
+
+test_ecn_decap()
+{
+ # In accordance with INET_ECN_decapsulate()
+ __test_ecn_decap "0:0" 00 0x00
+ __test_ecn_decap "0:0" 01 0x00
+ __test_ecn_decap "0:0" 02 0x00
+ # 00 03 is tested in test_ecn_decap_error()
+ __test_ecn_decap "0:1" 00 0x01
+ __test_ecn_decap "0:1" 01 0x01
+ __test_ecn_decap "0:1" 02 0x01
+ __test_ecn_decap "0:1" 03 0x03
+ __test_ecn_decap "0:2" 00 0x02
+ __test_ecn_decap "0:2" 01 0x01
+ __test_ecn_decap "0:2" 02 0x02
+ __test_ecn_decap "0:2" 03 0x03
+ __test_ecn_decap "0:3" 00 0x03
+ __test_ecn_decap "0:3" 01 0x03
+ __test_ecn_decap "0:3" 02 0x03
+ __test_ecn_decap "0:3" 03 0x03
+ test_ecn_decap_error
+}
+
+test_all()
+{
+ log_info "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
new file mode 100755
index 000000000000..00540317737a
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+source vxlan_bridge_1d_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
index a5789721ba92..fb9a34cb50c6 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh
@@ -680,26 +680,6 @@ test_pvid()
log_test "VXLAN: flood after vlan re-add"
}
-vxlan_ping_test()
-{
- local ping_dev=$1; shift
- local ping_dip=$1; shift
- local ping_args=$1; shift
- local capture_dev=$1; shift
- local capture_dir=$1; shift
- local capture_pref=$1; shift
- local expect=$1; shift
-
- local t0=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
- ping_do $ping_dev $ping_dip "$ping_args"
- local t1=$(tc_rule_stats_get $capture_dev $capture_pref $capture_dir)
- local delta=$((t1 - t0))
-
- # Tolerate a couple stray extra packets.
- ((expect <= delta && delta <= expect + 2))
- check_err $? "$capture_dev: Expected to capture $expect packets, got $delta."
-}
-
__test_learning()
{
local -a expects=(0 0 0 0 0)
@@ -770,7 +750,7 @@ __test_learning()
expects[0]=0; expects[$idx1]=10; expects[$idx2]=0
vxlan_flood_test $mac $dst $vid "${expects[@]}"
- sleep 20
+ sleep 60
bridge fdb show brport $vx | grep $mac | grep -q self
check_fail $?
@@ -816,11 +796,11 @@ test_learning()
local dst=192.0.2.100
local vid=10
- # Enable learning on the VxLAN devices and set ageing time to 10 seconds
- ip link set dev br1 type bridge ageing_time 1000
- ip link set dev vx10 type vxlan ageing 10
+ # Enable learning on the VxLAN devices and set ageing time to 30 seconds
+ ip link set dev br1 type bridge ageing_time 3000
+ ip link set dev vx10 type vxlan ageing 30
ip link set dev vx10 type vxlan learning
- ip link set dev vx20 type vxlan ageing 10
+ ip link set dev vx20 type vxlan ageing 30
ip link set dev vx20 type vxlan learning
reapply_config
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
new file mode 100755
index 000000000000..e83fde79f40d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh
@@ -0,0 +1,837 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +-----------------------+ +------------------------+
+# | H1 (vrf) | | H2 (vrf) |
+# | + $h1.10 | | + $h2.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + $h1.20 | | | + $h2.20 |
+# | \ | 198.51.100.1/24 | | \ | 198.51.100.2/24 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + $h1 | | + $h2 |
+# +----|------------------+ +----|-------------------+
+# | |
+# +----|--------------------------------------------------|-------------------+
+# | SW | | |
+# | +--|--------------------------------------------------|-----------------+ |
+# | | + $swp1 BR1 (802.1q) + $swp2 | |
+# | | vid 10 vid 10 | |
+# | | vid 20 vid 20 | |
+# | | | |
+# | | + vx10 (vxlan) + vx20 (vxlan) | |
+# | | local: local: | |
+# | | 2001:db8:3::1 2001:db8:3::1 | |
+# | | remote: remote: | |
+# | | 2001:db8:4::1 2001:db8:5::1 2001:db8:4::1 2001:db8:5::1 | |
+# | | id 1000 dstport $VXPORT id 2000 dstport $VXPORT | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | 2001:db8:4::0/64 via 2001:db8:3::2 |
+# | 2001:db8:5::0/64 via 2001:db8:3::2 |
+# | |
+# | + $rp1 |
+# | | 2001:db8:3::1/64 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------+
+# | | VRP2 (vrf) |
+# | + $rp2 |
+# | 2001:db8:3::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | + v1 (veth) + v3 (veth) |
+# | | 2001:db8:4::2/64 | 2001:db8:5::2/64 |
+# +----|---------------------------------------|------------------+
+# | |
+# +----|--------------------------------+ +----|-------------------------------+
+# | + v2 (veth) NS1 (netns) | | + v4 (veth) NS2 (netns) |
+# | 2001:db8:4::1/64 | | 2001:db8:5::1/64 |
+# | | | |
+# | 2001:db8:3::0/64 via 2001:db8:4::2 | | 2001:db8:3::0/64 via 2001:db8:5::2 |
+# | 2001:db8:5::1/128 via 2001:db8:4::2 | | 2001:db8:4::1/128 via |
+# | | | 2001:db8:5::2 |
+# | | | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# | | BR2 (802.1q) | | | | BR2 (802.1q) | |
+# | | + vx10 (vxlan) | | | | + vx10 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 1000 dstport $VXPORT | | | | id 1000 dstport $VXPORT | |
+# | | vid 10 pvid untagged | | | | vid 10 pvid untagged | |
+# | | | | | | | |
+# | | + vx20 (vxlan) | | | | + vx20 (vxlan) | |
+# | | local 2001:db8:4::1 | | | | local 2001:db8:5::1 | |
+# | | remote 2001:db8:3::1 | | | | remote 2001:db8:3::1 | |
+# | | remote 2001:db8:5::1 | | | | remote 2001:db8:4::1 | |
+# | | id 2000 dstport $VXPORT | | | | id 2000 dstport $VXPORT | |
+# | | vid 20 pvid untagged | | | | vid 20 pvid untagged | |
+# | | | | | | | |
+# | | + w1 (veth) | | | | + w1 (veth) | |
+# | | | vid 10 | | | | | vid 10 | |
+# | | | vid 20 | | | | | vid 20 | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | | | | |
+# | +--|----------------------------+ | | +--|----------------------------+ |
+# | | + w2 (veth) VW2 (vrf) | | | | + w2 (veth) VW2 (vrf) | |
+# | | |\ | | | | |\ | |
+# | | | + w2.10 | | | | | + w2.10 | |
+# | | | 192.0.2.3/28 | | | | | 192.0.2.4/28 | |
+# | | | 2001:db8:1::3/64 | | | | | 2001:db8:1::4/64 | |
+# | | | | | | | | | |
+# | | + w2.20 | | | | + w2.20 | |
+# | | 198.51.100.3/24 | | | | 198.51.100.4/24 | |
+# | | 2001:db8:2::3/64 | | | | 2001:db8:2::4/64 | |
+# | +-------------------------------+ | | +-------------------------------+ |
+# +-------------------------------------+ +------------------------------------+
+
+: ${VXPORT:=4789}
+export VXPORT
+
+: ${ALL_TESTS:="
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ reapply_config
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_unicast
+ test_pvid
+ ping_ipv4
+ ping_ipv6
+ test_flood
+ test_pvid
+"}
+
+NUM_NETIFS=6
+source lib.sh
+source tc_common.sh
+
+h1_create()
+{
+ simple_if_init $h1
+ tc qdisc add dev $h1 clsact
+ vlan_create $h1 10 v$h1 192.0.2.1/28 2001:db8:1::1/64
+ vlan_create $h1 20 v$h1 198.51.100.1/24 2001:db8:2::1/64
+}
+
+h1_destroy()
+{
+ vlan_destroy $h1 20
+ vlan_destroy $h1 10
+ tc qdisc del dev $h1 clsact
+ simple_if_fini $h1
+}
+
+h2_create()
+{
+ simple_if_init $h2
+ tc qdisc add dev $h2 clsact
+ vlan_create $h2 10 v$h2 192.0.2.2/28 2001:db8:1::2/64
+ vlan_create $h2 20 v$h2 198.51.100.2/24 2001:db8:2::2/64
+}
+
+h2_destroy()
+{
+ vlan_destroy $h2 20
+ vlan_destroy $h2 10
+ tc qdisc del dev $h2 clsact
+ simple_if_fini $h2
+}
+
+rp1_set_addr()
+{
+ ip address add dev $rp1 2001:db8:3::1/64
+
+ ip route add 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+ ip route add 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+}
+
+rp1_unset_addr()
+{
+ ip route del 2001:db8:5::0/64 nexthop via 2001:db8:3::2
+ ip route del 2001:db8:4::0/64 nexthop via 2001:db8:3::2
+
+ ip address del dev $rp1 2001:db8:3::1/64
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ rp1_set_addr
+ tc qdisc add dev $rp1 clsact
+
+ ip link add name vx10 type vxlan id 1000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 local 2001:db8:3::1 \
+ dstport "$VXPORT" nolearning udp6zerocsumrx udp6zerocsumtx \
+ tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+ tc qdisc add dev $swp1 clsact
+ bridge vlan add vid 10 dev $swp1
+ bridge vlan add vid 20 dev $swp1
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+ bridge vlan add vid 10 dev $swp2
+ bridge vlan add vid 20 dev $swp2
+
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+}
+
+switch_destroy()
+{
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge vlan del vid 20 dev $swp2
+ bridge vlan del vid 10 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 20 dev $swp1
+ bridge vlan del vid 10 dev $swp1
+ tc qdisc del dev $swp1 clsact
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ tc qdisc del dev $rp1 clsact
+ rp1_unset_addr
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 2001:db8:3::2/64
+ __simple_if_init v1 v$rp2 2001:db8:4::2/64
+ __simple_if_init v3 v$rp2 2001:db8:5::2/64
+ tc qdisc add dev v1 clsact
+}
+
+vrp2_destroy()
+{
+ tc qdisc del dev v1 clsact
+ __simple_if_fini v3 2001:db8:5::2/64
+ __simple_if_fini v1 2001:db8:4::2/64
+ simple_if_fini $rp2 2001:db8:3::2/64
+}
+
+ns_init_common()
+{
+ local in_if=$1; shift
+ local in_addr=$1; shift
+ local other_in_addr=$1; shift
+ local nh_addr=$1; shift
+ local host_addr1_ipv4=$1; shift
+ local host_addr1_ipv6=$1; shift
+ local host_addr2_ipv4=$1; shift
+ local host_addr2_ipv6=$1; shift
+
+ ip link set dev $in_if up
+ ip address add dev $in_if $in_addr/64
+ tc qdisc add dev $in_if clsact
+
+ ip link add name br2 type bridge vlan_filtering 1 vlan_default_pvid 0
+ ip link set dev br2 up
+
+ ip link add name w1 type veth peer name w2
+
+ ip link set dev w1 master br2
+ ip link set dev w1 up
+
+ bridge vlan add vid 10 dev w1
+ bridge vlan add vid 20 dev w1
+
+ ip link add name vx10 type vxlan id 1000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx10 up
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx10 master br2
+ tc qdisc add dev vx10 clsact
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 2000 local $in_addr \
+ dstport "$VXPORT" udp6zerocsumrx
+ ip link set dev vx20 up
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:3::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst $other_in_addr self
+
+ ip link set dev vx20 master br2
+ tc qdisc add dev vx20 clsact
+
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ simple_if_init w2
+ vlan_create w2 10 vw2 $host_addr1_ipv4/28 $host_addr1_ipv6/64
+ vlan_create w2 20 vw2 $host_addr2_ipv4/24 $host_addr2_ipv6/64
+
+ ip route add 2001:db8:3::0/64 nexthop via $nh_addr
+ ip route add $other_in_addr/128 nexthop via $nh_addr
+}
+export -f ns_init_common
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 \
+ ns_init_common v2 2001:db8:4::1 2001:db8:5::1 2001:db8:4::2 \
+ 192.0.2.3 2001:db8:1::3 198.51.100.3 2001:db8:2::3
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+ns2_create()
+{
+ ip netns add ns2
+ ip link set dev v4 netns ns2
+ in_ns ns2 \
+ ns_init_common v4 2001:db8:5::1 2001:db8:4::1 2001:db8:5::2 \
+ 192.0.2.4 2001:db8:1::4 198.51.100.4 2001:db8:2::4
+}
+
+ns2_destroy()
+{
+ ip netns exec ns2 ip link set dev v4 netns 1
+ ip netns del ns2
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ ip link add name v3 type veth peer name v4
+ vrp2_create
+ ns1_create
+ ns2_create
+
+ r1_mac=$(in_ns ns1 mac_get w2)
+ r2_mac=$(in_ns ns2 mac_get w2)
+ h2_mac=$(mac_get $h2)
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns2_destroy
+ ns1_destroy
+ vrp2_destroy
+ ip link del dev v3
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+# For the first round of tests, vx10 and vx20 were the first devices to get
+# attached to the bridge, and at that point the local IP is already
+# configured. Try the other scenario of attaching these devices to a bridge
+# that already has local ports members, and only then assign the local IP.
+reapply_config()
+{
+ log_info "Reapplying configuration"
+
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+ bridge fdb del dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+
+ ip link set dev vx20 nomaster
+ ip link set dev vx10 nomaster
+
+ rp1_unset_addr
+ sleep 5
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx10 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:4::1 self
+ bridge fdb append dev vx20 00:00:00:00:00:00 dst 2001:db8:5::1 self
+
+ rp1_set_addr
+ sleep 5
+}
+
+__ping_ipv4()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+ flower vlan_ethtype ipv4 src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome ARP noise.
+ PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 10 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv4()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_10_ip=192.0.2.1
+ local h1_20_ip=198.51.100.1
+ local w2_10_ns1_ip=192.0.2.3
+ local w2_10_ns2_ip=192.0.2.4
+ local w2_20_ns1_ip=198.51.100.3
+ local w2_20_ns2_ip=198.51.100.4
+
+ ping_test $h1.10 192.0.2.2 ": local->local vid 10"
+ ping_test $h1.20 198.51.100.2 ": local->local vid 20"
+
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+ "local->remote 1 vid 10"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+ "local->remote 2 vid 10"
+ __ping_ipv4 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+ "local->remote 1 vid 20"
+ __ping_ipv4 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+ "local->remote 2 vid 20"
+}
+
+__ping_ipv6()
+{
+ local vxlan_local_ip=$1; shift
+ local vxlan_remote_ip=$1; shift
+ local src_ip=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local info=$1; shift
+
+ RET=0
+
+ tc filter add dev $rp1 egress protocol ipv6 pref 1 handle 101 \
+ flower ip_proto udp src_ip $vxlan_local_ip \
+ dst_ip $vxlan_remote_ip dst_port $VXPORT $TC_FLAG action pass
+ # Match ICMP-reply packets after decapsulation, so source IP is
+ # destination IP of the ping and destination IP is source IP of the
+ # ping.
+ tc filter add dev $swp1 egress protocol 802.1q pref 1 handle 101 \
+ flower vlan_ethtype ipv6 src_ip $dst_ip dst_ip $src_ip \
+ $TC_FLAG action pass
+
+ # Send 100 packets and verify that at least 100 packets hit the rule,
+ # to overcome neighbor discovery noise.
+ PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip
+ check_err $? "Ping failed"
+
+ tc_check_at_least_x_packets "dev $rp1 egress" 101 100
+ check_err $? "Encapsulated packets did not go through router"
+
+ tc_check_at_least_x_packets "dev $swp1 egress" 101 100
+ check_err $? "Decapsulated packets did not go through switch"
+
+ log_test "ping6: $info"
+
+ tc filter del dev $swp1 egress
+ tc filter del dev $rp1 egress
+}
+
+ping_ipv6()
+{
+ RET=0
+
+ local local_sw_ip=2001:db8:3::1
+ local remote_ns1_ip=2001:db8:4::1
+ local remote_ns2_ip=2001:db8:5::1
+ local h1_10_ip=2001:db8:1::1
+ local h1_20_ip=2001:db8:2::1
+ local w2_10_ns1_ip=2001:db8:1::3
+ local w2_10_ns2_ip=2001:db8:1::4
+ local w2_20_ns1_ip=2001:db8:2::3
+ local w2_20_ns2_ip=2001:db8:2::4
+
+ ping6_test $h1.10 2001:db8:1::2 ": local->local vid 10"
+ ping6_test $h1.20 2001:db8:2::2 ": local->local vid 20"
+
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_10_ip $w2_10_ns1_ip $h1.10 \
+ "local->remote 1 vid 10"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_10_ip $w2_10_ns2_ip $h1.10 \
+ "local->remote 2 vid 10"
+ __ping_ipv6 $local_sw_ip $remote_ns1_ip $h1_20_ip $w2_20_ns1_ip $h1.20 \
+ "local->remote 1 vid 20"
+ __ping_ipv6 $local_sw_ip $remote_ns2_ip $h1_20_ip $w2_20_ns2_ip $h1.20 \
+ "local->remote 2 vid 20"
+}
+
+maybe_in_ns()
+{
+ echo ${1:+in_ns} $1
+}
+
+__flood_counter_add_del()
+{
+ local add_del=$1; shift
+ local dst_ip=$1; shift
+ local dev=$1; shift
+ local ns=$1; shift
+
+ # Putting the ICMP capture both to HW and to SW will end up
+ # double-counting the packets that are trapped to slow path, such as for
+ # the unicast test. Adding either skip_hw or skip_sw fixes this problem,
+ # but with skip_hw, the flooded packets are not counted at all, because
+ # those are dropped due to MAC address mismatch; and skip_sw is a no-go
+ # for veth-based topologies.
+ #
+ # So try to install with skip_sw and fall back to skip_sw if that fails.
+
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_sw action pass 2>/dev/null || \
+ $(maybe_in_ns $ns) tc filter $add_del dev "$dev" ingress \
+ proto ipv6 pref 100 flower dst_ip $dst_ip ip_proto \
+ icmpv6 skip_hw action pass
+}
+
+flood_counter_install()
+{
+ __flood_counter_add_del add "$@"
+}
+
+flood_counter_uninstall()
+{
+ __flood_counter_add_del del "$@"
+}
+
+flood_fetch_stat()
+{
+ local dev=$1; shift
+ local ns=$1; shift
+
+ $(maybe_in_ns $ns) tc_rule_stats_get $dev 100 ingress
+}
+
+flood_fetch_stats()
+{
+ local counters=("${@}")
+ local counter
+
+ for counter in "${counters[@]}"; do
+ flood_fetch_stat $counter
+ done
+}
+
+vxlan_flood_test()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local vid=$1; shift
+ local -a expects=("${@}")
+
+ local -a counters=($h2 "vx10 ns1" "vx20 ns1" "vx10 ns2" "vx20 ns2")
+ local counter
+ local key
+
+ # Packets reach the local host tagged whereas they reach the VxLAN
+ # devices untagged. In order to be able to use the same filter for
+ # all counters, make sure the packets also reach the local host
+ # untagged
+ bridge vlan add vid $vid dev $swp2 untagged
+ for counter in "${counters[@]}"; do
+ flood_counter_install $dst $counter
+ done
+
+ local -a t0s=($(flood_fetch_stats "${counters[@]}"))
+ $MZ -6 $h1 -Q $vid -c 10 -d 100msec -p 64 -b $mac -B $dst -t icmp6 type=128 -q
+ sleep 1
+ local -a t1s=($(flood_fetch_stats "${counters[@]}"))
+
+ for key in ${!t0s[@]}; do
+ local delta=$((t1s[$key] - t0s[$key]))
+ local expect=${expects[$key]}
+
+ ((expect == delta))
+ check_err $? "${counters[$key]}: Expected to capture $expect packets, got $delta."
+ done
+
+ for counter in "${counters[@]}"; do
+ flood_counter_uninstall $dst $counter
+ done
+ bridge vlan add vid $vid dev $swp2
+}
+
+__test_flood()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local vid=$1; shift
+ local what=$1; shift
+ local -a expects=("${@}")
+
+ RET=0
+
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_flood()
+{
+ __test_flood de:ad:be:ef:13:37 2001:db8:1::100 10 "flood vlan 10" \
+ 10 10 0 10 0
+ __test_flood ca:fe:be:ef:13:37 2001:db8:2::100 20 "flood vlan 20" \
+ 10 0 10 0 10
+}
+
+vxlan_fdb_add_del()
+{
+ local add_del=$1; shift
+ local vid=$1; shift
+ local mac=$1; shift
+ local dev=$1; shift
+ local dst=$1; shift
+
+ bridge fdb $add_del dev $dev $mac self static permanent \
+ ${dst:+dst} $dst 2>/dev/null
+ bridge fdb $add_del dev $dev $mac master static vlan $vid 2>/dev/null
+}
+
+__test_unicast()
+{
+ local mac=$1; shift
+ local dst=$1; shift
+ local hit_idx=$1; shift
+ local vid=$1; shift
+ local what=$1; shift
+
+ RET=0
+
+ local -a expects=(0 0 0 0 0)
+ expects[$hit_idx]=10
+
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: $what"
+}
+
+test_unicast()
+{
+ local -a targets=("$h2_mac $h2"
+ "$r1_mac vx10 2001:db8:4::1"
+ "$r2_mac vx10 2001:db8:5::1")
+ local target
+
+ log_info "unicast vlan 10"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add 10 $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:1::2 0 10 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:1::3 1 10 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:1::4 3 10 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del 10 $target
+ done
+
+ log_info "unicast vlan 20"
+
+ targets=("$h2_mac $h2" "$r1_mac vx20 2001:db8:4::1" \
+ "$r2_mac vx20 2001:db8:5::1")
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del add 20 $target
+ done
+
+ __test_unicast $h2_mac 2001:db8:2::2 0 20 "local MAC unicast"
+ __test_unicast $r1_mac 2001:db8:2::3 2 20 "remote MAC 1 unicast"
+ __test_unicast $r2_mac 2001:db8:2::4 4 20 "remote MAC 2 unicast"
+
+ for target in "${targets[@]}"; do
+ vxlan_fdb_add_del del 20 $target
+ done
+}
+
+test_pvid()
+{
+ local -a expects=(0 0 0 0 0)
+ local mac=de:ad:be:ef:13:37
+ local dst=2001:db8:1::100
+ local vid=10
+
+ # Check that flooding works
+ RET=0
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood before pvid off"
+
+ # Toggle PVID off and test that flood to remote hosts does not work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10
+
+ expects[0]=10; expects[1]=0; expects[3]=0
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after pvid off"
+
+ # Toggle PVID on and test that flood to remote hosts does work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after pvid on"
+
+ # Add a new VLAN and test that it does not affect flooding
+ RET=0
+
+ bridge vlan add vid 30 dev vx10
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ bridge vlan del vid 30 dev vx10
+
+ log_test "VXLAN: flood after vlan add"
+
+ # Remove currently mapped VLAN and test that flood to remote hosts does
+ # not work
+ RET=0
+
+ bridge vlan del vid 10 dev vx10
+
+ expects[0]=10; expects[1]=0; expects[3]=0
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after vlan delete"
+
+ # Re-add the VLAN and test that flood to remote hosts does work
+ RET=0
+
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ expects[0]=10; expects[1]=10; expects[3]=10
+ vxlan_flood_test $mac $dst $vid "${expects[@]}"
+
+ log_test "VXLAN: flood after vlan re-add"
+}
+
+test_all()
+{
+ log_info "Running tests with UDP port $VXPORT"
+ tests_run
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+test_all
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
new file mode 100755
index 000000000000..344f43ccb755
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_port_8472_ipv6.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# A wrapper to run VXLAN tests with an unusual port number.
+
+VXPORT=8472
+ALL_TESTS="
+ ping_ipv4
+ ping_ipv6
+"
+source vxlan_bridge_1q_ipv6.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
index 1209031bc794..5d97fa347d75 100755
--- a/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric.sh
@@ -237,10 +237,16 @@ switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
switch_destroy()
{
+ sysctl_restore net.ipv4.conf.all.rp_filter
+
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
@@ -402,6 +408,10 @@ ns_switch_create()
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ sysctl_set net.ipv4.conf.all.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan10-v.rp_filter 0
+ sysctl_set net.ipv4.conf.vlan20-v.rp_filter 0
}
export -f ns_switch_create
diff --git a/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
new file mode 100755
index 000000000000..904633427fd0
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_symmetric_ipv6.sh
@@ -0,0 +1,563 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+
+# +--------------------------------+ +-----------------------------+
+# | vrf-h1 | | vrf-h2 |
+# | + $h1 | | + $h2 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:2::1/64 |
+# | | default via 2001:db8:1::3 | | | default via 2001:db8:2::3 |
+# +----|---------------------------+ +-|---------------------------+
+# | |
+# +----|------------------------------------------|---------------------------+
+# | SW | | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | + $swp1 br1 + $swp2 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::1 local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 remote 2001:db8:3::2 | |
+# | | id 1010 id 1020 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx4001 | |
+# | | local 2001:db8:3::1 | |
+# | | remote 2001:db8:3::2 | |
+# | | id 104001 | |
+# | | dstport 4789 | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | | | |
+# | | + vlan10 | vlan20 + | |
+# | | | 2001:db8:1::2/64 | 2001:db8:2::2/64 | | |
+# | | | | | | |
+# | | + vlan10-v (macvlan) + vlan20-v (macvlan) + | |
+# | | 2001:db8:1::3/64 vlan4001 2001:db8:2::3/64 | |
+# | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | |
+# | | vrf-green | |
+# | +-----------------------------------------------------------------------+ |
+# | |
+# | + $rp1 +lo |
+# | | 2001:db8:4::1/64 2001:db8:3::1 |
+# +----|----------------------------------------------------------------------+
+# |
+# +----|--------------------------------------------------------+
+# | | vrf-spine |
+# | + $rp2 |
+# | 2001:db8:4::2/64 |
+# | | (maybe) HW
+# =============================================================================
+# | | (likely) SW
+# | |
+# | + v1 (veth) |
+# | | 2001:db8:5::2/64 |
+# +----|--------------------------------------------------------+
+# |
+# +----|----------------------------------------------------------------------+
+# | + v2 (veth) +lo NS1 (netns) |
+# | 2001:db8:5::1/64 2001:db8:3::2/128 |
+# | |
+# | +-----------------------------------------------------------------------+ |
+# | | vrf-green | |
+# | | + vlan10-v (macvlan) vlan20-v (macvlan) + | |
+# | | | 2001:db8:1::3/64 2001:db8:2::3/64 | | |
+# | | | 00:00:5e:00:01:01 00:00:5e:00:01:01 | | |
+# | | | vlan4001 | | |
+# | | + vlan10 + vlan20 + | |
+# | | | 2001:db8:1::3/64 | 2001:db8:2::3/64 | | |
+# | | | | | | |
+# | | +--------------------------------+--------------------------------+ | |
+# | | | | |
+# | +-----------------------------------|-----------------------------------+ |
+# | | |
+# | +-----------------------------------+-----------------------------------+ |
+# | | | |
+# | | + vx10 + vx20 | |
+# | | local 2001:db8:3::2 local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 remote 2001:db8:3::1 | |
+# | | id 1010 id 1020 | |
+# | | dstport 4789 dstport 4789 | |
+# | | vid 10 pvid untagged vid 20 pvid untagged | |
+# | | | |
+# | | + vx4001 | |
+# | | local 2001:db8:3::2 | |
+# | | remote 2001:db8:3::1 | |
+# | | id 104001 | |
+# | | dstport 4789 | |
+# | | vid 4001 pvid untagged | |
+# | | | |
+# | | + w1 (veth) + w3 (veth) | |
+# | | | vid 10 pvid untagged br1 | vid 20 pvid untagged | |
+# | +--|------------------------------------------|-------------------------+ |
+# | | | |
+# | | | |
+# | +--|----------------------+ +--|-------------------------+ |
+# | | | vrf-h1 | | | vrf-h2 | |
+# | | + w2 (veth) | | + w4 (veth) | |
+# | | 2001:db8:1::4/64 | | 2001:db8:2::4/64 | |
+# | | default via | | default via | |
+# | | 2001:db8:1::3/64 | | 2001:db8:2::3/64 | |
+# | +-------------------------+ +----------------------------+ |
+# +---------------------------------------------------------------------------+
+
+ALL_TESTS="
+ ping_ipv6
+"
+NUM_NETIFS=6
+source lib.sh
+
+hx_create()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ vrf_create $vrf_name
+ ip link set dev $if_name master $vrf_name
+ ip link set dev $vrf_name up
+ ip link set dev $if_name up
+
+ ip address add $ip_addr/64 dev $if_name
+ ip neigh replace $gw_ip lladdr 00:00:5e:00:01:01 nud permanent \
+ dev $if_name
+ ip route add default vrf $vrf_name nexthop via $gw_ip
+}
+export -f hx_create
+
+hx_destroy()
+{
+ local vrf_name=$1; shift
+ local if_name=$1; shift
+ local ip_addr=$1; shift
+ local gw_ip=$1; shift
+
+ ip route del default vrf $vrf_name nexthop via $gw_ip
+ ip neigh del $gw_ip dev $if_name
+ ip address del $ip_addr/64 dev $if_name
+
+ ip link set dev $if_name down
+ vrf_destroy $vrf_name
+}
+
+h1_create()
+{
+ hx_create "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h1_destroy()
+{
+ hx_destroy "vrf-h1" $h1 2001:db8:1::1 2001:db8:1::3
+}
+
+h2_create()
+{
+ hx_create "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+h2_destroy()
+{
+ hx_destroy "vrf-h2" $h2 2001:db8:2::1 2001:db8:2::3
+}
+
+switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip link set dev br1 address $(mac_get $swp1)
+ ip link set dev br1 up
+
+ ip link set dev $rp1 up
+ ip address add dev $rp1 2001:db8:4::1/64
+ ip route add 2001:db8:3::2/128 nexthop via 2001:db8:4::2
+
+ ip link add name vx10 type vxlan id 1010 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 1020 \
+ local 2001:db8:3::1 remote 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link set dev $swp1 master br1
+ ip link set dev $swp1 up
+
+ ip link set dev $swp2 master br1
+ ip link set dev $swp2 up
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ ip address add 2001:db8:3::1/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::2/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::2/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+
+ bridge vlan add vid 10 dev $swp1 pvid untagged
+ bridge vlan add vid 20 dev $swp2 pvid untagged
+}
+
+switch_destroy()
+{
+ bridge vlan del vid 20 dev br1 self
+ bridge vlan del vid 10 dev br1 self
+
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 20
+ bridge fdb del 00:00:5e:00:01:01 dev br1 self local vlan 10
+
+ bridge vlan del vid 4001 dev br1 self
+ ip link del dev vlan4001
+
+ ip link del dev vlan20
+
+ ip link del dev vlan10
+
+ vrf_destroy "vrf-green"
+
+ ip address del 2001:db8:3::1/128 dev lo
+
+ bridge vlan del vid 20 dev $swp2
+ ip link set dev $swp2 down
+ ip link set dev $swp2 nomaster
+
+ bridge vlan del vid 10 dev $swp1
+ ip link set dev $swp1 down
+ ip link set dev $swp1 nomaster
+
+ bridge vlan del vid 4001 dev vx4001
+ ip link set dev vx4001 nomaster
+
+ ip link set dev vx4001 down
+ ip link del dev vx4001
+
+ bridge vlan del vid 20 dev vx20
+ ip link set dev vx20 nomaster
+
+ ip link set dev vx20 down
+ ip link del dev vx20
+
+ bridge vlan del vid 10 dev vx10
+ ip link set dev vx10 nomaster
+
+ ip link set dev vx10 down
+ ip link del dev vx10
+
+ ip route del 2001:db8:3::2 nexthop via 2001:db8:4::2
+ ip address del dev $rp1 2001:db8:4::1/64
+ ip link set dev $rp1 down
+
+ ip link set dev br1 down
+ ip link del dev br1
+}
+
+spine_create()
+{
+ vrf_create "vrf-spine"
+ ip link set dev $rp2 master vrf-spine
+ ip link set dev v1 master vrf-spine
+ ip link set dev vrf-spine up
+ ip link set dev $rp2 up
+ ip link set dev v1 up
+
+ ip address add 2001:db8:4::2/64 dev $rp2
+ ip address add 2001:db8:5::2/64 dev v1
+
+ ip route add 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+ ip route add 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+}
+
+spine_destroy()
+{
+ ip route del 2001:db8:3::2/128 vrf vrf-spine nexthop via \
+ 2001:db8:5::1
+ ip route del 2001:db8:3::1/128 vrf vrf-spine nexthop via \
+ 2001:db8:4::1
+
+ ip address del 2001:db8:5::2/64 dev v1
+ ip address del 2001:db8:4::2/64 dev $rp2
+
+ ip link set dev v1 down
+ ip link set dev $rp2 down
+ vrf_destroy "vrf-spine"
+}
+
+ns_h1_create()
+{
+ hx_create "vrf-h1" w2 2001:db8:1::4 2001:db8:1::3
+}
+export -f ns_h1_create
+
+ns_h2_create()
+{
+ hx_create "vrf-h2" w4 2001:db8:2::4 2001:db8:2::3
+}
+export -f ns_h2_create
+
+ns_switch_create()
+{
+ ip link add name br1 type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip link set dev br1 up
+
+ ip link set dev v2 up
+ ip address add dev v2 2001:db8:5::1/64
+ ip route add 2001:db8:3::1 nexthop via 2001:db8:5::2
+
+ ip link add name vx10 type vxlan id 1010 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx10 up
+
+ ip link set dev vx10 master br1
+ bridge vlan add vid 10 dev vx10 pvid untagged
+
+ ip link add name vx20 type vxlan id 1020 \
+ local 2001:db8:3::2 remote 2001:db8:3::1 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx20 up
+
+ ip link set dev vx20 master br1
+ bridge vlan add vid 20 dev vx20 pvid untagged
+
+ ip link add name vx4001 type vxlan id 104001 \
+ local 2001:db8:3::2 dstport 4789 \
+ nolearning udp6zerocsumrx udp6zerocsumtx tos inherit ttl 100
+ ip link set dev vx4001 up
+
+ ip link set dev vx4001 master br1
+ bridge vlan add vid 4001 dev vx4001 pvid untagged
+
+ ip link set dev w1 master br1
+ ip link set dev w1 up
+ bridge vlan add vid 10 dev w1 pvid untagged
+
+ ip link set dev w3 master br1
+ ip link set dev w3 up
+ bridge vlan add vid 20 dev w3 pvid untagged
+
+ ip address add 2001:db8:3::2/128 dev lo
+
+ # Create SVIs
+ vrf_create "vrf-green"
+ ip link set dev vrf-green up
+
+ ip link add link br1 name vlan10 up master vrf-green type vlan id 10
+ ip address add 2001:db8:1::3/64 dev vlan10
+ ip link add link vlan10 name vlan10-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:1::3/64 dev vlan10-v
+
+ ip link add link br1 name vlan20 up master vrf-green type vlan id 20
+ ip address add 2001:db8:2::3/64 dev vlan20
+ ip link add link vlan20 name vlan20-v up master vrf-green \
+ address 00:00:5e:00:01:01 type macvlan mode private
+ ip address add 2001:db8:2::3/64 dev vlan20-v
+
+ ip link add link br1 name vlan4001 up master vrf-green \
+ type vlan id 4001
+
+ bridge vlan add vid 10 dev br1 self
+ bridge vlan add vid 20 dev br1 self
+ bridge vlan add vid 4001 dev br1 self
+
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 10
+ bridge fdb add 00:00:5e:00:01:01 dev br1 self local vlan 20
+}
+export -f ns_switch_create
+
+ns_init()
+{
+ ip link add name w1 type veth peer name w2
+ ip link add name w3 type veth peer name w4
+
+ ip link set dev lo up
+
+ ns_h1_create
+ ns_h2_create
+ ns_switch_create
+}
+export -f ns_init
+
+ns1_create()
+{
+ ip netns add ns1
+ ip link set dev v2 netns ns1
+ in_ns ns1 ns_init
+}
+
+ns1_destroy()
+{
+ ip netns exec ns1 ip link set dev v2 netns 1
+ ip netns del ns1
+}
+
+__l2_vni_init()
+{
+ local mac1=$1; shift
+ local mac2=$1; shift
+ local ip1=$1; shift
+ local ip2=$1; shift
+ local dst=$1; shift
+
+ bridge fdb add $mac1 dev vx10 self master extern_learn static \
+ dst $dst vlan 10
+ bridge fdb add $mac2 dev vx20 self master extern_learn static \
+ dst $dst vlan 20
+
+ ip neigh add $ip1 lladdr $mac1 nud noarp dev vlan10 \
+ extern_learn
+ ip neigh add $ip2 lladdr $mac2 nud noarp dev vlan20 \
+ extern_learn
+}
+export -f __l2_vni_init
+
+l2_vni_init()
+{
+ local h1_ns_mac=$(in_ns ns1 mac_get w2)
+ local h2_ns_mac=$(in_ns ns1 mac_get w4)
+ local h1_mac=$(mac_get $h1)
+ local h2_mac=$(mac_get $h2)
+
+ __l2_vni_init $h1_ns_mac $h2_ns_mac 2001:db8:1::4 2001:db8:2::4 \
+ 2001:db8:3::2
+ in_ns ns1 __l2_vni_init $h1_mac $h2_mac 2001:db8:1::1 2001:db8:2::1 \
+ 2001:db8:3::1
+}
+
+__l3_vni_init()
+{
+ local mac=$1; shift
+ local vtep_ip=$1; shift
+ local host1_ip=$1; shift
+ local host2_ip=$1; shift
+
+ bridge fdb add $mac dev vx4001 self master extern_learn static \
+ dst $vtep_ip vlan 4001
+
+ ip neigh add $vtep_ip lladdr $mac nud noarp dev vlan4001 extern_learn
+
+ ip route add $host1_ip/128 vrf vrf-green nexthop via $vtep_ip \
+ dev vlan4001 onlink
+ ip route add $host2_ip/128 vrf vrf-green nexthop via $vtep_ip \
+ dev vlan4001 onlink
+}
+export -f __l3_vni_init
+
+l3_vni_init()
+{
+ local vlan4001_ns_mac=$(in_ns ns1 mac_get vlan4001)
+ local vlan4001_mac=$(mac_get vlan4001)
+
+ __l3_vni_init $vlan4001_ns_mac 2001:db8:3::2 2001:db8:1::4 \
+ 2001:db8:2::4
+ in_ns ns1 __l3_vni_init $vlan4001_mac 2001:db8:3::1 2001:db8:1::1 \
+ 2001:db8:2::1
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ swp2=${NETIFS[p3]}
+ h2=${NETIFS[p4]}
+
+ rp1=${NETIFS[p5]}
+ rp2=${NETIFS[p6]}
+
+ vrf_prepare
+ forwarding_enable
+
+ h1_create
+ h2_create
+ switch_create
+
+ ip link add name v1 type veth peer name v2
+ spine_create
+ ns1_create
+ in_ns ns1 forwarding_enable
+
+ l2_vni_init
+ l3_vni_init
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ ns1_destroy
+ spine_destroy
+ ip link del dev v1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ forwarding_restore
+ vrf_cleanup
+}
+
+ping_ipv6()
+{
+ ping6_test $h1 2001:db8:2::1 ": local->local vid 10->vid 20"
+ ping6_test $h1 2001:db8:1::4 ": local->remote vid 10->vid 10"
+ ping6_test $h2 2001:db8:2::4 ": local->remote vid 20->vid 20"
+ ping6_test $h1 2001:db8:2::4 ": local->remote vid 10->vid 20"
+ ping6_test $h2 2001:db8:1::4 ": local->remote vid 20->vid 10"
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh
new file mode 100755
index 000000000000..977070ed42b3
--- /dev/null
+++ b/tools/testing/selftests/net/fq_band_pktlimit.sh
@@ -0,0 +1,59 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Verify that FQ has a packet limit per band:
+#
+# 1. set the limit to 10 per band
+# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped
+# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped
+# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped
+#
+# Send packets with a delay to ensure that previously sent
+# packets are still queued when later ones are sent.
+# Use SO_TXTIME for this.
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+# run inside private netns
+if [[ $# -eq 0 ]]; then
+ ./in_netns.sh "$0" __subprocess
+ exit
+fi
+
+ip link add type dummy
+ip link set dev dummy0 up
+ip -6 addr add fdaa::1/128 dev dummy0
+ip -6 route add fdaa::/64 dev dummy0
+tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10
+
+DELAY=400000
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
+OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000
+OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000
+OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Initial stats will report zero sent, as all packets are still
+# queued in FQ. Sleep for at least the delay period and see that
+# twenty are now sent.
+sleep 0.6
+OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')"
+
+# Log the output after the test
+echo "${OUT1}"
+echo "${OUT2}"
+echo "${OUT3}"
+echo "${OUT4}"
+
+# Test the output for expected values
+echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1"
+echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2"
+echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3"
+echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4"
diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh
new file mode 100755
index 000000000000..5100d90f92d2
--- /dev/null
+++ b/tools/testing/selftests/net/gre_gso.sh
@@ -0,0 +1,235 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking GRE GSO.
+source lib.sh
+ret=0
+
+# all tests in this script. Can be overridden with -t option
+TESTS="gre_gso"
+
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+TMPFILE=`mktemp`
+PID=
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+ setup_ns ns1
+ IP="ip -netns $ns1"
+ NS_EXEC="ip netns exec $ns1"
+
+ ip link add veth0 type veth peer name veth1
+ ip link set veth0 up
+ ip link set veth1 netns $ns1
+ $IP link set veth1 name veth0
+ $IP link set veth0 up
+
+ dd if=/dev/urandom of=$TMPFILE bs=1024 count=2048 &>/dev/null
+ set +e
+}
+
+cleanup()
+{
+ rm -rf $TMPFILE
+ [ -n "$PID" ] && kill $PID
+ ip link del dev gre1 &> /dev/null
+ ip link del dev veth0 &> /dev/null
+ cleanup_ns $ns1
+}
+
+get_linklocal()
+{
+ local dev=$1
+ local ns=$2
+ local addr
+
+ [ -n "$ns" ] && ns="-netns $ns"
+
+ addr=$(ip -6 -br $ns addr show dev ${dev} | \
+ awk '{
+ for (i = 3; i <= NF; ++i) {
+ if ($i ~ /^fe80/)
+ print $i
+ }
+ }'
+ )
+ addr=${addr/\/*}
+
+ [ -z "$addr" ] && return 1
+
+ echo $addr
+
+ return 0
+}
+
+gre_create_tun()
+{
+ local a1=$1
+ local a2=$2
+ local mode
+
+ [[ $a1 =~ ^[0-9.]*$ ]] && mode=gre || mode=ip6gre
+
+ ip tunnel add gre1 mode $mode local $a1 remote $a2 dev veth0
+ ip link set gre1 up
+ $IP tunnel add gre1 mode $mode local $a2 remote $a1 dev veth0
+ $IP link set gre1 up
+}
+
+gre_gst_test_checks()
+{
+ local name=$1
+ local addr=$2
+ local proto=$3
+
+ [ "$proto" == 6 ] && addr="[$addr]"
+
+ $NS_EXEC socat - tcp${proto}-listen:$port,reuseaddr,fork >/dev/null &
+ PID=$!
+ while ! $NS_EXEC ss -ltn | grep -q $port; do ((i++)); sleep 0.01; done
+
+ cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+ log_test $? 0 "$name - copy file w/ TSO"
+
+ ethtool -K veth0 tso off
+
+ cat $TMPFILE | timeout 1 socat -u STDIN TCP:$addr:$port
+ log_test $? 0 "$name - copy file w/ GSO"
+
+ ethtool -K veth0 tso on
+
+ kill $PID
+ PID=
+}
+
+gre6_gso_test()
+{
+ local port=7777
+
+ setup
+
+ a1=$(get_linklocal veth0)
+ a2=$(get_linklocal veth0 $ns1)
+
+ gre_create_tun $a1 $a2
+
+ ip addr add 172.16.2.1/24 dev gre1
+ $IP addr add 172.16.2.2/24 dev gre1
+
+ ip -6 addr add 2001:db8:1::1/64 dev gre1 nodad
+ $IP -6 addr add 2001:db8:1::2/64 dev gre1 nodad
+
+ sleep 2
+
+ gre_gst_test_checks GREv6/v4 172.16.2.2 4
+ gre_gst_test_checks GREv6/v6 2001:db8:1::2 6
+
+ cleanup
+}
+
+gre_gso_test()
+{
+ gre6_gso_test
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v socat)" ]; then
+ echo "SKIP: Could not run test without socat tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ gre_gso) gre_gso_test;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c
new file mode 100644
index 000000000000..353e1e867fbb
--- /dev/null
+++ b/tools/testing/selftests/net/gro.c
@@ -0,0 +1,1187 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This testsuite provides conformance testing for GRO coalescing.
+ *
+ * Test cases:
+ * 1.data
+ * Data packets of the same size and same header setup with correct
+ * sequence numbers coalesce. The one exception being the last data
+ * packet coalesced: it can be smaller than the rest and coalesced
+ * as long as it is in the same flow.
+ * 2.ack
+ * Pure ACK does not coalesce.
+ * 3.flags
+ * Specific test cases: no packets with PSH, SYN, URG, RST set will
+ * be coalesced.
+ * 4.tcp
+ * Packets with incorrect checksum, non-consecutive seqno and
+ * different TCP header options shouldn't coalesce. Nit: given that
+ * some extension headers have paddings, such as timestamp, headers
+ * that are padding differently would not be coalesced.
+ * 5.ip:
+ * Packets with different (ECN, TTL, TOS) header, ip options or
+ * ip fragments (ipv6) shouldn't coalesce.
+ * 6.large:
+ * Packets larger than GRO_MAX_SIZE packets shouldn't coalesce.
+ *
+ * MSS is defined as 4096 - header because if it is too small
+ * (i.e. 1500 MTU - header), it will result in many packets,
+ * increasing the "large" test case's flakiness. This is because
+ * due to time sensitivity in the coalescing window, the receiver
+ * may not coalesce all of the packets.
+ *
+ * Note the timing issue applies to all of the test cases, so some
+ * flakiness is to be expected.
+ *
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_packet.h>
+#include <linux/ipv6.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdarg.h>
+#include <string.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define DPORT 8000
+#define SPORT 1500
+#define PAYLOAD_LEN 100
+#define NUM_PACKETS 4
+#define START_SEQ 100
+#define START_ACK 100
+#define ETH_P_NONE 0
+#define TOTAL_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MSS (4096 - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr))
+#define NUM_LARGE_PKT (MAX_PAYLOAD / MSS)
+#define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr))
+#define MIN_EXTHDR_SIZE 8
+#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00"
+#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11"
+
+#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+static const char *addr6_src = "fdaa::2";
+static const char *addr6_dst = "fdaa::1";
+static const char *addr4_src = "192.168.1.200";
+static const char *addr4_dst = "192.168.1.100";
+static int proto = -1;
+static uint8_t src_mac[ETH_ALEN], dst_mac[ETH_ALEN];
+static char *testname = "data";
+static char *ifname = "eth0";
+static char *smac = "aa:00:00:00:00:02";
+static char *dmac = "aa:00:00:00:00:01";
+static bool verbose;
+static bool tx_socket = true;
+static int tcp_offset = -1;
+static int total_hdr_len = -1;
+static int ethhdr_proto = -1;
+
+static void vlog(const char *fmt, ...)
+{
+ va_list args;
+
+ if (verbose) {
+ va_start(args, fmt);
+ vfprintf(stderr, fmt, args);
+ va_end(args);
+ }
+}
+
+static void setup_sock_filter(int fd)
+{
+ const int dport_off = tcp_offset + offsetof(struct tcphdr, dest);
+ const int ethproto_off = offsetof(struct ethhdr, h_proto);
+ int optlen = 0;
+ int ipproto_off, opt_ipproto_off;
+ int next_off;
+
+ if (proto == PF_INET)
+ next_off = offsetof(struct iphdr, protocol);
+ else
+ next_off = offsetof(struct ipv6hdr, nexthdr);
+ ipproto_off = ETH_HLEN + next_off;
+
+ if (strcmp(testname, "ip") == 0) {
+ if (proto == PF_INET)
+ optlen = sizeof(struct ip_timestamp);
+ else {
+ BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE);
+ BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE);
+
+ /* same size for HBH and Fragment extension header types */
+ optlen = MIN_EXTHDR_SIZE;
+ opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr)
+ + offsetof(struct ip6_ext, ip6e_nxt);
+ }
+ }
+
+ /* this filter validates the following:
+ * - packet is IPv4/IPv6 according to the running test.
+ * - packet is TCP. Also handles the case of one extension header and then TCP.
+ * - checks the packet tcp dport equals to DPORT. Also handles the case of one
+ * extension header and then TCP.
+ */
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off + optlen),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 0, 1),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFFFFFF),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+
+ struct sock_fprog bpf = {
+ .len = ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf)) < 0)
+ error(1, errno, "error setting filter");
+}
+
+static uint32_t checksum_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ int i;
+
+ for (i = 0; i < len / 2; i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static uint16_t tcp_checksum(void *buf, int payload_len)
+{
+ struct pseudo_header6 {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph6;
+ struct pseudo_header4 {
+ struct in_addr saddr;
+ struct in_addr daddr;
+ uint16_t protocol;
+ uint16_t payload_len;
+ } ph4;
+ uint32_t sum = 0;
+
+ if (proto == PF_INET6) {
+ if (inet_pton(AF_INET6, addr6_src, &ph6.saddr) != 1)
+ error(1, errno, "inet_pton6 source ip pseudo");
+ if (inet_pton(AF_INET6, addr6_dst, &ph6.daddr) != 1)
+ error(1, errno, "inet_pton6 dest ip pseudo");
+ ph6.protocol = htons(IPPROTO_TCP);
+ ph6.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph6, sizeof(ph6), 0);
+ } else if (proto == PF_INET) {
+ if (inet_pton(AF_INET, addr4_src, &ph4.saddr) != 1)
+ error(1, errno, "inet_pton source ip pseudo");
+ if (inet_pton(AF_INET, addr4_dst, &ph4.daddr) != 1)
+ error(1, errno, "inet_pton dest ip pseudo");
+ ph4.protocol = htons(IPPROTO_TCP);
+ ph4.payload_len = htons(sizeof(struct tcphdr) + payload_len);
+
+ sum = checksum_nofold(&ph4, sizeof(ph4), 0);
+ }
+
+ return checksum_fold(buf, sizeof(struct tcphdr) + payload_len, sum);
+}
+
+static void read_MAC(uint8_t *mac_addr, char *mac)
+{
+ if (sscanf(mac, "%hhx:%hhx:%hhx:%hhx:%hhx:%hhx",
+ &mac_addr[0], &mac_addr[1], &mac_addr[2],
+ &mac_addr[3], &mac_addr[4], &mac_addr[5]) != 6)
+ error(1, 0, "sscanf");
+}
+
+static void fill_datalinklayer(void *buf)
+{
+ struct ethhdr *eth = buf;
+
+ memcpy(eth->h_dest, dst_mac, ETH_ALEN);
+ memcpy(eth->h_source, src_mac, ETH_ALEN);
+ eth->h_proto = ethhdr_proto;
+}
+
+static void fill_networklayer(void *buf, int payload_len)
+{
+ struct ipv6hdr *ip6h = buf;
+ struct iphdr *iph = buf;
+
+ if (proto == PF_INET6) {
+ memset(ip6h, 0, sizeof(*ip6h));
+
+ ip6h->version = 6;
+ ip6h->payload_len = htons(sizeof(struct tcphdr) + payload_len);
+ ip6h->nexthdr = IPPROTO_TCP;
+ ip6h->hop_limit = 8;
+ if (inet_pton(AF_INET6, addr6_src, &ip6h->saddr) != 1)
+ error(1, errno, "inet_pton source ip6");
+ if (inet_pton(AF_INET6, addr6_dst, &ip6h->daddr) != 1)
+ error(1, errno, "inet_pton dest ip6");
+ } else if (proto == PF_INET) {
+ memset(iph, 0, sizeof(*iph));
+
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->ttl = 8;
+ iph->protocol = IPPROTO_TCP;
+ iph->tot_len = htons(sizeof(struct tcphdr) +
+ payload_len + sizeof(struct iphdr));
+ iph->frag_off = htons(0x4000); /* DF = 1, MF = 0 */
+ if (inet_pton(AF_INET, addr4_src, &iph->saddr) != 1)
+ error(1, errno, "inet_pton source ip");
+ if (inet_pton(AF_INET, addr4_dst, &iph->daddr) != 1)
+ error(1, errno, "inet_pton dest ip");
+ iph->check = checksum_fold(buf, sizeof(struct iphdr), 0);
+ }
+}
+
+static void fill_transportlayer(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ struct tcphdr *tcph = buf;
+
+ memset(tcph, 0, sizeof(*tcph));
+
+ tcph->source = htons(SPORT);
+ tcph->dest = htons(DPORT);
+ tcph->seq = ntohl(START_SEQ + seq_offset);
+ tcph->ack_seq = ntohl(START_ACK + ack_offset);
+ tcph->ack = 1;
+ tcph->fin = fin;
+ tcph->doff = 5;
+ tcph->window = htons(TCP_MAXWIN);
+ tcph->urg_ptr = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+}
+
+static void write_packet(int fd, char *buf, int len, struct sockaddr_ll *daddr)
+{
+ int ret = -1;
+
+ ret = sendto(fd, buf, len, 0, (struct sockaddr *)daddr, sizeof(*daddr));
+ if (ret == -1)
+ error(1, errno, "sendto failure");
+ if (ret != len)
+ error(1, errno, "sendto wrong length");
+}
+
+static void create_packet(void *buf, int seq_offset, int ack_offset,
+ int payload_len, int fin)
+{
+ memset(buf, 0, total_hdr_len);
+ memset(buf + total_hdr_len, 'a', payload_len);
+ fill_transportlayer(buf + tcp_offset, seq_offset, ack_offset,
+ payload_len, fin);
+ fill_networklayer(buf + ETH_HLEN, payload_len);
+ fill_datalinklayer(buf);
+}
+
+/* send one extra flag, not first and not last pkt */
+static void send_flags(int fd, struct sockaddr_ll *daddr, int psh, int syn,
+ int rst, int urg)
+{
+ static char flag_buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ int payload_len, pkt_size, flag, i;
+ struct tcphdr *tcph;
+
+ payload_len = PAYLOAD_LEN * psh;
+ pkt_size = total_hdr_len + payload_len;
+ flag = NUM_PACKETS / 2;
+
+ create_packet(flag_buf, flag * payload_len, 0, payload_len, 0);
+
+ tcph = (struct tcphdr *)(flag_buf + tcp_offset);
+ tcph->psh = psh;
+ tcph->syn = syn;
+ tcph->rst = rst;
+ tcph->urg = urg;
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, payload_len);
+
+ for (i = 0; i < NUM_PACKETS + 1; i++) {
+ if (i == flag) {
+ write_packet(fd, flag_buf, pkt_size, daddr);
+ continue;
+ }
+ create_packet(buf, i * PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+ }
+}
+
+/* Test for data of same length, smaller than previous
+ * and of different lengths
+ */
+static void send_data_pkts(int fd, struct sockaddr_ll *daddr,
+ int payload_len1, int payload_len2)
+{
+ static char buf[ETH_HLEN + IP_MAXPACKET];
+
+ create_packet(buf, 0, 0, payload_len1, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len1, daddr);
+ create_packet(buf, payload_len1, 0, payload_len2, 0);
+ write_packet(fd, buf, total_hdr_len + payload_len2, daddr);
+}
+
+/* If incoming segments make tracked segment length exceed
+ * legal IP datagram length, do not coalesce
+ */
+static void send_large(int fd, struct sockaddr_ll *daddr, int remainder)
+{
+ static char pkts[NUM_LARGE_PKT][TOTAL_HDR_LEN + MSS];
+ static char last[TOTAL_HDR_LEN + MSS];
+ static char new_seg[TOTAL_HDR_LEN + MSS];
+ int i;
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ create_packet(pkts[i], i * MSS, 0, MSS, 0);
+ create_packet(last, NUM_LARGE_PKT * MSS, 0, remainder, 0);
+ create_packet(new_seg, (NUM_LARGE_PKT + 1) * MSS, 0, remainder, 0);
+
+ for (i = 0; i < NUM_LARGE_PKT; i++)
+ write_packet(fd, pkts[i], total_hdr_len + MSS, daddr);
+ write_packet(fd, last, total_hdr_len + remainder, daddr);
+ write_packet(fd, new_seg, total_hdr_len + remainder, daddr);
+}
+
+/* Pure acks and dup acks don't coalesce */
+static void send_ack(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN];
+
+ create_packet(buf, 0, 0, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ write_packet(fd, buf, total_hdr_len, daddr);
+ create_packet(buf, 0, 1, 0, 0);
+ write_packet(fd, buf, total_hdr_len, daddr);
+}
+
+static void recompute_packet(char *buf, char *no_ext, int extlen)
+{
+ struct tcphdr *tcphdr = (struct tcphdr *)(buf + tcp_offset);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ memmove(buf, no_ext, total_hdr_len);
+ memmove(buf + total_hdr_len + extlen,
+ no_ext + total_hdr_len, PAYLOAD_LEN);
+
+ tcphdr->doff = tcphdr->doff + (extlen / 4);
+ tcphdr->check = 0;
+ tcphdr->check = tcp_checksum(tcphdr, PAYLOAD_LEN + extlen);
+ if (proto == PF_INET) {
+ iph->tot_len = htons(ntohs(iph->tot_len) + extlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ }
+}
+
+static void tcp_write_options(char *buf, int kind, int ts)
+{
+ struct tcp_option_ts {
+ uint8_t kind;
+ uint8_t len;
+ uint32_t tsval;
+ uint32_t tsecr;
+ } *opt_ts = (void *)buf;
+ struct tcp_option_window {
+ uint8_t kind;
+ uint8_t len;
+ uint8_t shift;
+ } *opt_window = (void *)buf;
+
+ switch (kind) {
+ case TCPOPT_NOP:
+ buf[0] = TCPOPT_NOP;
+ break;
+ case TCPOPT_WINDOW:
+ memset(opt_window, 0, sizeof(struct tcp_option_window));
+ opt_window->kind = TCPOPT_WINDOW;
+ opt_window->len = TCPOLEN_WINDOW;
+ opt_window->shift = 0;
+ break;
+ case TCPOPT_TIMESTAMP:
+ memset(opt_ts, 0, sizeof(struct tcp_option_ts));
+ opt_ts->kind = TCPOPT_TIMESTAMP;
+ opt_ts->len = TCPOLEN_TIMESTAMP;
+ opt_ts->tsval = ts;
+ opt_ts->tsecr = 0;
+ break;
+ default:
+ error(1, 0, "unimplemented TCP option");
+ break;
+ }
+}
+
+/* TCP with options is always a permutation of {TS, NOP, NOP}.
+ * Implement different orders to verify coalescing stops.
+ */
+static void add_standard_tcp_options(char *buf, char *no_ext, int ts, int order)
+{
+ switch (order) {
+ case 0:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 2 /* two NOP opts */,
+ TCPOPT_TIMESTAMP, ts);
+ break;
+ case 1:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + 1,
+ TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + 1 + TCPOLEN_TIMESTAMP,
+ TCPOPT_NOP, 0);
+ break;
+ case 2:
+ tcp_write_options(buf + total_hdr_len, TCPOPT_TIMESTAMP, ts);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 1,
+ TCPOPT_NOP, 0);
+ tcp_write_options(buf + total_hdr_len + TCPOLEN_TIMESTAMP + 2,
+ TCPOPT_NOP, 0);
+ break;
+ default:
+ error(1, 0, "unknown order");
+ break;
+ }
+ recompute_packet(buf, no_ext, TCPOLEN_TSTAMP_APPA);
+}
+
+/* Packets with invalid checksum don't coalesce. */
+static void send_changed_checksum(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->check = tcph->check - 1;
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packets with non-consecutive sequence number don't coalesce.*/
+static void send_changed_seq(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct tcphdr *tcph = (struct tcphdr *)(buf + tcp_offset);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ tcph->seq = ntohl(htonl(tcph->seq) + 1);
+ tcph->check = 0;
+ tcph->check = tcp_checksum(tcph, PAYLOAD_LEN);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+ /* Packet with different timestamp option or different timestamps
+ * don't coalesce.
+ */
+static void send_changed_ts(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 0, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 0);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 1);
+ write_packet(fd, extpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 4, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt, buf, 100, 2);
+ write_packet(fd, extpkt, pkt_size, daddr);
+}
+
+/* Packet with different tcp options don't coalesce. */
+static void send_diff_opt(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt1[sizeof(buf) + TCPOLEN_TSTAMP_APPA];
+ static char extpkt2[sizeof(buf) + TCPOLEN_MAXSEG];
+ int extpkt1_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_TSTAMP_APPA;
+ int extpkt2_size = total_hdr_len + PAYLOAD_LEN + TCPOLEN_MAXSEG;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ add_standard_tcp_options(extpkt1, buf, 0, 0);
+ write_packet(fd, extpkt1, extpkt1_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN, TCPOPT_NOP, 0);
+ tcp_write_options(extpkt2 + MAX_HDR_LEN + 1, TCPOPT_WINDOW, 0);
+ recompute_packet(extpkt2, buf, TCPOLEN_WINDOW + 1);
+ write_packet(fd, extpkt2, extpkt2_size, daddr);
+}
+
+static void add_ipv4_ts_option(void *buf, void *optpkt)
+{
+ struct ip_timestamp *ts = (struct ip_timestamp *)(optpkt + tcp_offset);
+ int optlen = sizeof(struct ip_timestamp);
+ struct iphdr *iph;
+
+ if (optlen % 4)
+ error(1, 0, "ipv4 timestamp length is not a multiple of 4B");
+
+ ts->ipt_code = IPOPT_TS;
+ ts->ipt_len = optlen;
+ ts->ipt_ptr = 5;
+ ts->ipt_flg = IPOPT_TS_TSONLY;
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + optlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph = (struct iphdr *)(optpkt + ETH_HLEN);
+ iph->ihl = 5 + (optlen / 4);
+ iph->tot_len = htons(ntohs(iph->tot_len) + optlen);
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0);
+}
+
+static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload)
+{
+ struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset);
+ struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN);
+ char *exthdr_payload_start = (char *)(exthdr + 1);
+
+ exthdr->hdrlen = 0;
+ exthdr->nexthdr = IPPROTO_TCP;
+
+ memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr));
+
+ memcpy(optpkt, buf, tcp_offset);
+ memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+
+ iph->nexthdr = exthdr_type;
+ iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE);
+}
+
+static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE];
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2);
+ write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr);
+}
+
+/* IPv4 options shouldn't coalesce */
+static void send_ip_options(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char optpkt[sizeof(buf) + sizeof(struct ip_timestamp)];
+ int optlen = sizeof(struct ip_timestamp);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN + optlen;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0);
+ add_ipv4_ts_option(buf, optpkt);
+ write_packet(fd, optpkt, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, total_hdr_len + PAYLOAD_LEN, daddr);
+}
+
+/* IPv4 fragments shouldn't coalesce */
+static void send_fragment4(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[IP_MAXPACKET];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ /* Once fragmented, packet would retain the total_len.
+ * Tcp header is prepared as if rest of data is in follow-up frags,
+ * but follow up frags aren't actually sent.
+ */
+ memset(buf + total_hdr_len, 'a', PAYLOAD_LEN * 2);
+ fill_transportlayer(buf + tcp_offset, PAYLOAD_LEN, 0, PAYLOAD_LEN * 2, 0);
+ fill_networklayer(buf + ETH_HLEN, PAYLOAD_LEN);
+ fill_datalinklayer(buf);
+
+ iph->frag_off = htons(0x6000); // DF = 1, MF = 1
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv4 packets with different ttl don't coalesce.*/
+static void send_changed_ttl(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ iph->ttl = 7;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different tos don't coalesce.*/
+static void send_changed_tos(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ iph->tos = 1;
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else if (proto == PF_INET6) {
+ ip6h->priority = 0xf;
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* Packets with different ECN don't coalesce.*/
+static void send_changed_ECN(int fd, struct sockaddr_ll *daddr)
+{
+ int pkt_size = total_hdr_len + PAYLOAD_LEN;
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ struct iphdr *iph = (struct iphdr *)(buf + ETH_HLEN);
+
+ create_packet(buf, 0, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, pkt_size, daddr);
+
+ create_packet(buf, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0);
+ if (proto == PF_INET) {
+ buf[ETH_HLEN + 1] ^= 0x2; // ECN set to 10
+ iph->check = 0;
+ iph->check = checksum_fold(iph, sizeof(struct iphdr), 0);
+ } else {
+ buf[ETH_HLEN + 1] ^= 0x20; // ECN set to 10
+ }
+ write_packet(fd, buf, pkt_size, daddr);
+}
+
+/* IPv6 fragments and packets with extensions don't coalesce.*/
+static void send_fragment6(int fd, struct sockaddr_ll *daddr)
+{
+ static char buf[MAX_HDR_LEN + PAYLOAD_LEN];
+ static char extpkt[MAX_HDR_LEN + PAYLOAD_LEN +
+ sizeof(struct ip6_frag)];
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buf + ETH_HLEN);
+ struct ip6_frag *frag = (void *)(extpkt + tcp_offset);
+ int extlen = sizeof(struct ip6_frag);
+ int bufpkt_len = total_hdr_len + PAYLOAD_LEN;
+ int extpkt_len = bufpkt_len + extlen;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+ }
+ sleep(1);
+ create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0);
+ memset(extpkt, 0, extpkt_len);
+
+ ip6h->nexthdr = IPPROTO_FRAGMENT;
+ ip6h->payload_len = htons(ntohs(ip6h->payload_len) + extlen);
+ frag->ip6f_nxt = IPPROTO_TCP;
+
+ memcpy(extpkt, buf, tcp_offset);
+ memcpy(extpkt + tcp_offset + extlen, buf + tcp_offset,
+ sizeof(struct tcphdr) + PAYLOAD_LEN);
+ write_packet(fd, extpkt, extpkt_len, daddr);
+
+ create_packet(buf, PAYLOAD_LEN * 3, 0, PAYLOAD_LEN, 0);
+ write_packet(fd, buf, bufpkt_len, daddr);
+}
+
+static void bind_packetsocket(int fd)
+{
+ struct sockaddr_ll daddr = {};
+
+ daddr.sll_family = AF_PACKET;
+ daddr.sll_protocol = ethhdr_proto;
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+
+ if (bind(fd, (void *)&daddr, sizeof(daddr)) < 0)
+ error(1, errno, "could not bind socket");
+}
+
+static void set_timeout(int fd)
+{
+ struct timeval timeout;
+
+ timeout.tv_sec = 3;
+ timeout.tv_usec = 0;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, (char *)&timeout,
+ sizeof(timeout)) < 0)
+ error(1, errno, "cannot set timeout, setsockopt failed");
+}
+
+static void check_recv_pkts(int fd, int *correct_payload,
+ int correct_num_pkts)
+{
+ static char buffer[IP_MAXPACKET + ETH_HLEN + 1];
+ struct iphdr *iph = (struct iphdr *)(buffer + ETH_HLEN);
+ struct ipv6hdr *ip6h = (struct ipv6hdr *)(buffer + ETH_HLEN);
+ struct tcphdr *tcph;
+ bool bad_packet = false;
+ int tcp_ext_len = 0;
+ int ip_ext_len = 0;
+ int pkt_size = -1;
+ int data_len = 0;
+ int num_pkt = 0;
+ int i;
+
+ vlog("Expected {");
+ for (i = 0; i < correct_num_pkts; i++)
+ vlog("%d ", correct_payload[i]);
+ vlog("}, Total %d packets\nReceived {", correct_num_pkts);
+
+ while (1) {
+ ip_ext_len = 0;
+ pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0);
+ if (pkt_size < 0)
+ error(1, errno, "could not receive");
+
+ if (iph->version == 4)
+ ip_ext_len = (iph->ihl - 5) * 4;
+ else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP)
+ ip_ext_len = MIN_EXTHDR_SIZE;
+
+ tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len);
+
+ if (tcph->fin)
+ break;
+
+ tcp_ext_len = (tcph->doff - 5) * 4;
+ data_len = pkt_size - total_hdr_len - tcp_ext_len - ip_ext_len;
+ /* Min ethernet frame payload is 46(ETH_ZLEN - ETH_HLEN) by RFC 802.3.
+ * Ipv4/tcp packets without at least 6 bytes of data will be padded.
+ * Packet sockets are protocol agnostic, and will not trim the padding.
+ */
+ if (pkt_size == ETH_ZLEN && iph->version == 4) {
+ data_len = ntohs(iph->tot_len)
+ - sizeof(struct tcphdr) - sizeof(struct iphdr);
+ }
+ vlog("%d ", data_len);
+ if (data_len != correct_payload[num_pkt]) {
+ vlog("[!=%d]", correct_payload[num_pkt]);
+ bad_packet = true;
+ }
+ num_pkt++;
+ }
+ vlog("}, Total %d packets.\n", num_pkt);
+ if (num_pkt != correct_num_pkts)
+ error(1, 0, "incorrect number of packets");
+ if (bad_packet)
+ error(1, 0, "incorrect packet geometry");
+
+ printf("Test succeeded\n\n");
+}
+
+static void gro_sender(void)
+{
+ static char fin_pkt[MAX_HDR_LEN];
+ struct sockaddr_ll daddr = {};
+ int txfd = -1;
+
+ txfd = socket(PF_PACKET, SOCK_RAW, IPPROTO_RAW);
+ if (txfd < 0)
+ error(1, errno, "socket creation");
+
+ memset(&daddr, 0, sizeof(daddr));
+ daddr.sll_ifindex = if_nametoindex(ifname);
+ if (daddr.sll_ifindex == 0)
+ error(1, errno, "if_nametoindex");
+ daddr.sll_family = AF_PACKET;
+ memcpy(daddr.sll_addr, dst_mac, ETH_ALEN);
+ daddr.sll_halen = ETH_ALEN;
+ create_packet(fin_pkt, PAYLOAD_LEN * 2, 0, 0, 1);
+
+ if (strcmp(testname, "data") == 0) {
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN, PAYLOAD_LEN / 2);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_data_pkts(txfd, &daddr, PAYLOAD_LEN / 2, PAYLOAD_LEN);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ack") == 0) {
+ send_ack(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "flags") == 0) {
+ send_flags(txfd, &daddr, 1, 0, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 1, 0, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 1, 0);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_flags(txfd, &daddr, 0, 0, 0, 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "tcp") == 0) {
+ send_changed_checksum(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_seq(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_ts(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_diff_opt(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (strcmp(testname, "ip") == 0) {
+ send_changed_ECN(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_changed_tos(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ if (proto == PF_INET) {
+ /* Modified packets may be received out of order.
+ * Sleep function added to enforce test boundaries
+ * so that fin pkts are not received prior to other pkts.
+ */
+ sleep(1);
+ send_changed_ttl(txfd, &daddr);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_ip_options(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ send_fragment4(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else if (proto == PF_INET6) {
+ sleep(1);
+ send_fragment6(txfd, &daddr);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ /* send IPv6 packets with ext header with same payload */
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ sleep(1);
+ /* send IPv6 packets with ext header with different payload */
+ send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2);
+ sleep(1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ /* 20 is the difference between min iphdr size
+ * and min ipv6hdr size. Like MAX_HDR_SIZE,
+ * MAX_PAYLOAD is defined with the larger header of the two.
+ */
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ send_large(txfd, &daddr, remainder);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+
+ send_large(txfd, &daddr, remainder + 1);
+ write_packet(txfd, fin_pkt, total_hdr_len, &daddr);
+ } else {
+ error(1, 0, "Unknown testcase");
+ }
+
+ if (close(txfd))
+ error(1, errno, "socket close");
+}
+
+static void gro_receiver(void)
+{
+ static int correct_payload[NUM_PACKETS];
+ int rxfd = -1;
+
+ rxfd = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_NONE));
+ if (rxfd < 0)
+ error(1, 0, "socket creation");
+ setup_sock_filter(rxfd);
+ set_timeout(rxfd);
+ bind_packetsocket(rxfd);
+
+ memset(correct_payload, 0, sizeof(correct_payload));
+
+ if (strcmp(testname, "data") == 0) {
+ printf("pure data packet of same size: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("large data packets followed by a smaller one: ");
+ correct_payload[0] = PAYLOAD_LEN * 1.5;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("small data packets followed by a larger one: ");
+ correct_payload[0] = PAYLOAD_LEN / 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ack") == 0) {
+ printf("duplicate ack and pure ack: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "flags") == 0) {
+ correct_payload[0] = PAYLOAD_LEN * 3;
+ correct_payload[1] = PAYLOAD_LEN * 2;
+
+ printf("psh flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = 0;
+ correct_payload[2] = PAYLOAD_LEN * 2;
+ printf("syn flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("rst flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("urg flag ends coalescing: ");
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else if (strcmp(testname, "tcp") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ correct_payload[3] = PAYLOAD_LEN;
+
+ printf("changed checksum does not coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Wrong Seq number doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("Different timestamp doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 4);
+
+ printf("Different options doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (strcmp(testname, "ip") == 0) {
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+
+ printf("different ECN doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("different tos doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ if (proto == PF_INET) {
+ printf("different ttl doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ printf("ip options doesn't coalesce: ");
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("fragmented ip4 doesn't coalesce: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+ } else if (proto == PF_INET6) {
+ /* GRO doesn't check for ipv6 hop limit when flushing.
+ * Hence no corresponding test to the ipv4 case.
+ */
+ printf("fragmented ip6 doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ correct_payload[1] = PAYLOAD_LEN;
+ correct_payload[2] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 3);
+
+ printf("ipv6 with ext header does coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN * 2;
+ check_recv_pkts(rxfd, correct_payload, 1);
+
+ printf("ipv6 with ext header with different payloads doesn't coalesce: ");
+ correct_payload[0] = PAYLOAD_LEN;
+ correct_payload[1] = PAYLOAD_LEN;
+ check_recv_pkts(rxfd, correct_payload, 2);
+ }
+ } else if (strcmp(testname, "large") == 0) {
+ int offset = proto == PF_INET ? 20 : 0;
+ int remainder = (MAX_PAYLOAD + offset) % MSS;
+
+ correct_payload[0] = (MAX_PAYLOAD + offset);
+ correct_payload[1] = remainder;
+ printf("Shouldn't coalesce if exceed IP max pkt size: ");
+ check_recv_pkts(rxfd, correct_payload, 2);
+
+ /* last segment sent individually, doesn't start new segment */
+ correct_payload[0] = correct_payload[0] - remainder;
+ correct_payload[1] = remainder + 1;
+ correct_payload[2] = remainder + 1;
+ check_recv_pkts(rxfd, correct_payload, 3);
+ } else {
+ error(1, 0, "Test case error, should never trigger");
+ }
+
+ if (close(rxfd))
+ error(1, 0, "socket close");
+}
+
+static void parse_args(int argc, char **argv)
+{
+ static const struct option opts[] = {
+ { "daddr", required_argument, NULL, 'd' },
+ { "dmac", required_argument, NULL, 'D' },
+ { "iface", required_argument, NULL, 'i' },
+ { "ipv4", no_argument, NULL, '4' },
+ { "ipv6", no_argument, NULL, '6' },
+ { "rx", no_argument, NULL, 'r' },
+ { "saddr", required_argument, NULL, 's' },
+ { "smac", required_argument, NULL, 'S' },
+ { "test", required_argument, NULL, 't' },
+ { "verbose", no_argument, NULL, 'v' },
+ { 0, 0, 0, 0 }
+ };
+ int c;
+
+ while ((c = getopt_long(argc, argv, "46d:D:i:rs:S:t:v", opts, NULL)) != -1) {
+ switch (c) {
+ case '4':
+ proto = PF_INET;
+ ethhdr_proto = htons(ETH_P_IP);
+ break;
+ case '6':
+ proto = PF_INET6;
+ ethhdr_proto = htons(ETH_P_IPV6);
+ break;
+ case 'd':
+ addr4_dst = addr6_dst = optarg;
+ break;
+ case 'D':
+ dmac = optarg;
+ break;
+ case 'i':
+ ifname = optarg;
+ break;
+ case 'r':
+ tx_socket = false;
+ break;
+ case 's':
+ addr4_src = addr6_src = optarg;
+ break;
+ case 'S':
+ smac = optarg;
+ break;
+ case 't':
+ testname = optarg;
+ break;
+ case 'v':
+ verbose = true;
+ break;
+ default:
+ error(1, 0, "%s invalid option %c\n", __func__, c);
+ break;
+ }
+ }
+}
+
+int main(int argc, char **argv)
+{
+ parse_args(argc, argv);
+
+ if (proto == PF_INET) {
+ tcp_offset = ETH_HLEN + sizeof(struct iphdr);
+ total_hdr_len = tcp_offset + sizeof(struct tcphdr);
+ } else if (proto == PF_INET6) {
+ tcp_offset = ETH_HLEN + sizeof(struct ipv6hdr);
+ total_hdr_len = MAX_HDR_LEN;
+ } else {
+ error(1, 0, "Protocol family is not ipv4 or ipv6");
+ }
+
+ read_MAC(src_mac, smac);
+ read_MAC(dst_mac, dmac);
+
+ if (tx_socket)
+ gro_sender();
+ else
+ gro_receiver();
+
+ fprintf(stderr, "Gro::%s test passed.\n", testname);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh
new file mode 100755
index 000000000000..02c21ff4ca81
--- /dev/null
+++ b/tools/testing/selftests/net/gro.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly SERVER_MAC="aa:00:00:00:00:02"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+readonly TESTS=("data" "ack" "flags" "tcp" "ip" "large")
+readonly PROTOS=("ipv4" "ipv6")
+dev=""
+test="all"
+proto="ipv4"
+
+run_test() {
+ local server_pid=0
+ local exit_code=0
+ local protocol=$1
+ local test=$2
+ local ARGS=( "--${protocol}" "--dmac" "${SERVER_MAC}" \
+ "--smac" "${CLIENT_MAC}" "--test" "${test}" "--verbose" )
+
+ setup_ns
+ # Each test is run 3 times to deflake, because given the receive timing,
+ # not all packets that should coalesce will be considered in the same flow
+ # on every try.
+ for tries in {1..3}; do
+ # Actual test starts here
+ ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \
+ 1>>log.txt &
+ server_pid=$!
+ sleep 0.5 # to allow for socket init
+ ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \
+ 1>>log.txt
+ wait "${server_pid}"
+ exit_code=$?
+ if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \
+ ${exit_code} -ne 0 ]]; then
+ echo "Ignoring errors due to slow environment" 1>&2
+ exit_code=0
+ fi
+ if [[ "${exit_code}" -eq 0 ]]; then
+ break;
+ fi
+ done
+ cleanup_ns
+ echo ${exit_code}
+}
+
+run_all_tests() {
+ local failed_tests=()
+ for proto in "${PROTOS[@]}"; do
+ for test in "${TESTS[@]}"; do
+ echo "running test ${proto} ${test}" >&2
+ exit_code=$(run_test $proto $test)
+ if [[ "${exit_code}" -ne 0 ]]; then
+ failed_tests+=("${proto}_${test}")
+ fi;
+ done;
+ done
+ if [[ ${#failed_tests[@]} -ne 0 ]]; then
+ echo "failed tests: ${failed_tests[*]}. \
+ Please see log.txt for more logs"
+ exit 1
+ else
+ echo "All Tests Succeeded!"
+ fi;
+}
+
+usage() {
+ echo "Usage: $0 \
+ [-i <DEV>] \
+ [-t data|ack|flags|tcp|ip|large] \
+ [-p <ipv4|ipv6>]" 1>&2;
+ exit 1;
+}
+
+while getopts "i:t:p:" opt; do
+ case "${opt}" in
+ i)
+ dev="${OPTARG}"
+ ;;
+ t)
+ test="${OPTARG}"
+ ;;
+ p)
+ proto="${OPTARG}"
+ ;;
+ *)
+ usage
+ ;;
+ esac
+done
+
+if [ -n "$dev" ]; then
+ source setup_loopback.sh
+else
+ source setup_veth.sh
+fi
+
+setup
+trap cleanup EXIT
+if [[ "${test}" == "all" ]]; then
+ run_all_tests
+else
+ run_test "${proto}" "${test}"
+fi;
diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile
new file mode 100644
index 000000000000..92c1d9d080cd
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+TEST_PROGS := hsr_ping.sh
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/hsr/config b/tools/testing/selftests/net/hsr/config
new file mode 100644
index 000000000000..22061204fb69
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/config
@@ -0,0 +1,4 @@
+CONFIG_IPV6=y
+CONFIG_NET_SCH_NETEM=m
+CONFIG_HSR=y
+CONFIG_VETH=y
diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh
new file mode 100755
index 000000000000..1c6457e54625
--- /dev/null
+++ b/tools/testing/selftests/net/hsr/hsr_ping.sh
@@ -0,0 +1,276 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ret=0
+ksft_skip=4
+ipv6=true
+
+optstring="h4"
+usage() {
+ echo "Usage: $0 [OPTION]"
+ echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
+}
+
+while getopts "$optstring" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "4")
+ ipv6=false
+ ;;
+ "?")
+ usage $0
+ exit 1
+ ;;
+esac
+done
+
+sec=$(date +%s)
+rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
+ns1="ns1-$rndh"
+ns2="ns2-$rndh"
+ns3="ns3-$rndh"
+
+cleanup()
+{
+ local netns
+ for netns in "$ns1" "$ns2" "$ns3" ;do
+ ip netns del $netns
+ done
+}
+
+# $1: IP address
+is_v6()
+{
+ [ -z "${1##*:*}" ]
+}
+
+do_ping()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 2"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+do_ping_long()
+{
+ local netns="$1"
+ local connect_addr="$2"
+ local ping_args="-q -c 10"
+
+ if is_v6 "${connect_addr}"; then
+ $ipv6 || return 0
+ ping_args="${ping_args} -6"
+ fi
+
+ OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)"
+ if [ $? -ne 0 ] ; then
+ echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2
+ ret=1
+ return 1
+ fi
+
+ VAL="$(echo $OUT | cut -d' ' -f1-8)"
+ if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ]
+ then
+ echo "$netns -> $connect_addr ping TEST [ FAIL ]"
+ echo "Expect to send and receive 10 packets and no duplicates."
+ echo "Full message: ${OUT}."
+ ret=1
+ return 1
+ fi
+
+ return 0
+}
+
+stop_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ echo "FAIL: ${msg}" 1>&2
+ exit ${ret}
+ fi
+}
+
+do_complete_ping_test()
+{
+ echo "INFO: Initial validation ping."
+ # Each node has to be able each one.
+ do_ping "$ns1" 100.64.0.2
+ do_ping "$ns2" 100.64.0.1
+ do_ping "$ns3" 100.64.0.1
+ stop_if_error "Initial validation failed."
+
+ do_ping "$ns1" 100.64.0.3
+ do_ping "$ns2" 100.64.0.3
+ do_ping "$ns3" 100.64.0.2
+
+ do_ping "$ns1" dead:beef:1::2
+ do_ping "$ns1" dead:beef:1::3
+ do_ping "$ns2" dead:beef:1::1
+ do_ping "$ns2" dead:beef:1::2
+ do_ping "$ns3" dead:beef:1::1
+ do_ping "$ns3" dead:beef:1::2
+
+ stop_if_error "Initial validation failed."
+
+# Wait until supervisor all supervision frames have been processed and the node
+# entries have been merged. Otherwise duplicate frames will be observed which is
+# valid at this stage.
+ WAIT=5
+ while [ ${WAIT} -gt 0 ]
+ do
+ grep 00:00:00:00:00:00 /sys/kernel/debug/hsr/hsr*/node_table
+ if [ $? -ne 0 ]
+ then
+ break
+ fi
+ sleep 1
+ let "WAIT = WAIT - 1"
+ done
+
+# Just a safety delay in case the above check didn't handle it.
+ sleep 1
+
+ echo "INFO: Longer ping test."
+ do_ping_long "$ns1" 100.64.0.2
+ do_ping_long "$ns1" dead:beef:1::2
+ do_ping_long "$ns1" 100.64.0.3
+ do_ping_long "$ns1" dead:beef:1::3
+
+ stop_if_error "Longer ping test failed."
+
+ do_ping_long "$ns2" 100.64.0.1
+ do_ping_long "$ns2" dead:beef:1::1
+ do_ping_long "$ns2" 100.64.0.3
+ do_ping_long "$ns2" dead:beef:1::2
+ stop_if_error "Longer ping test failed."
+
+ do_ping_long "$ns3" 100.64.0.1
+ do_ping_long "$ns3" dead:beef:1::1
+ do_ping_long "$ns3" 100.64.0.2
+ do_ping_long "$ns3" dead:beef:1::2
+ stop_if_error "Longer ping test failed."
+
+ echo "INFO: Cutting one link."
+ do_ping_long "$ns1" 100.64.0.3 &
+
+ sleep 3
+ ip -net "$ns3" link set ns3eth1 down
+ wait
+
+ ip -net "$ns3" link set ns3eth1 up
+
+ stop_if_error "Failed with one link down."
+
+ echo "INFO: Delay the link and drop a few packages."
+ tc -net "$ns3" qdisc add dev ns3eth1 root netem delay 50ms
+ tc -net "$ns2" qdisc add dev ns2eth1 root netem delay 5ms loss 25%
+
+ do_ping_long "$ns1" 100.64.0.2
+ do_ping_long "$ns1" 100.64.0.3
+
+ stop_if_error "Failed with delay and packetloss."
+
+ do_ping_long "$ns2" 100.64.0.1
+ do_ping_long "$ns2" 100.64.0.3
+
+ stop_if_error "Failed with delay and packetloss."
+
+ do_ping_long "$ns3" 100.64.0.1
+ do_ping_long "$ns3" 100.64.0.2
+ stop_if_error "Failed with delay and packetloss."
+
+ echo "INFO: All good."
+}
+
+setup_hsr_interfaces()
+{
+ local HSRv="$1"
+
+ echo "INFO: preparing interfaces for HSRv${HSRv}."
+# Three HSR nodes. Each node has one link to each of its neighbour, two links in total.
+#
+# ns1eth1 ----- ns2eth1
+# hsr1 hsr2
+# ns1eth2 ns2eth2
+# | |
+# ns3eth1 ns3eth2
+# \ /
+# hsr3
+#
+ # Interfaces
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns3eth1 netns "$ns3"
+ ip link add ns3eth2 netns "$ns3" type veth peer name ns2eth2 netns "$ns2"
+
+ # HSRv0/1
+ ip -net "$ns1" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version $HSRv proto 0
+ ip -net "$ns2" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 supervision 45 version $HSRv proto 0
+ ip -net "$ns3" link add name hsr3 type hsr slave1 ns3eth1 slave2 ns3eth2 supervision 45 version $HSRv proto 0
+
+ # IP for HSR
+ ip -net "$ns1" addr add 100.64.0.1/24 dev hsr1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev hsr1 nodad
+ ip -net "$ns2" addr add 100.64.0.2/24 dev hsr2
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev hsr2 nodad
+ ip -net "$ns3" addr add 100.64.0.3/24 dev hsr3
+ ip -net "$ns3" addr add dead:beef:1::3/64 dev hsr3 nodad
+
+ # All Links up
+ ip -net "$ns1" link set ns1eth1 up
+ ip -net "$ns1" link set ns1eth2 up
+ ip -net "$ns1" link set hsr1 up
+
+ ip -net "$ns2" link set ns2eth1 up
+ ip -net "$ns2" link set ns2eth2 up
+ ip -net "$ns2" link set hsr2 up
+
+ ip -net "$ns3" link set ns3eth1 up
+ ip -net "$ns3" link set ns3eth2 up
+ ip -net "$ns3" link set hsr3 up
+}
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+done
+
+setup_hsr_interfaces 0
+do_complete_ping_test
+cleanup
+
+for i in "$ns1" "$ns2" "$ns3" ;do
+ ip netns add $i || exit $ksft_skip
+ ip -net $i link set lo up
+done
+
+setup_hsr_interfaces 1
+do_complete_ping_test
+
+exit $ret
diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c
index e1fdee841021..170728c96c46 100644
--- a/tools/testing/selftests/net/hwtstamp_config.c
+++ b/tools/testing/selftests/net/hwtstamp_config.c
@@ -16,6 +16,8 @@
#include <linux/net_tstamp.h>
#include <linux/sockios.h>
+#include "kselftest.h"
+
static int
lookup_value(const char **names, int size, const char *name)
{
@@ -50,7 +52,7 @@ static const char *tx_types[] = {
TX_TYPE(ONESTEP_SYNC)
#undef TX_TYPE
};
-#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0])))
+#define N_TX_TYPES ((int)(ARRAY_SIZE(tx_types)))
static const char *rx_filters[] = {
#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name
@@ -71,7 +73,7 @@ static const char *rx_filters[] = {
RX_FILTER(PTP_V2_DELAY_REQ),
#undef RX_FILTER
};
-#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0])))
+#define N_RX_FILTERS ((int)(ARRAY_SIZE(rx_filters)))
static void usage(void)
{
diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh
new file mode 100755
index 000000000000..824cb0e35eff
--- /dev/null
+++ b/tools/testing/selftests/net/icmp.sh
@@ -0,0 +1,72 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test for checking ICMP response with dummy address instead of 0.0.0.0.
+# Sets up two namespaces like:
+# +----------------------+ +--------------------+
+# | ns1 | v4-via-v6 routes: | ns2 |
+# | | ' | |
+# | +--------+ -> 172.16.1.0/24 -> +--------+ |
+# | | veth0 +--------------------------+ veth0 | |
+# | +--------+ <- 172.16.0.0/24 <- +--------+ |
+# | 172.16.0.1 | | 2001:db8:1::2/64 |
+# | 2001:db8:1::2/64 | | |
+# +----------------------+ +--------------------+
+#
+# And then tries to ping 172.16.1.1 from ns1. This results in a "net
+# unreachable" message being sent from ns2, but there is no IPv4 address set in
+# that address space, so the kernel should substitute the dummy address
+# 192.0.0.8 defined in RFC7600.
+
+source lib.sh
+
+H1_IP=172.16.0.1/32
+H1_IP6=2001:db8:1::1
+RT1=172.16.1.0/24
+PINGADDR=172.16.1.1
+RT2=172.16.0.0/24
+H2_IP6=2001:db8:1::2
+
+TMPFILE=$(mktemp)
+
+cleanup()
+{
+ rm -f "$TMPFILE"
+ cleanup_ns $NS1 $NS2
+}
+
+trap cleanup EXIT
+
+# Namespaces
+setup_ns NS1 NS2
+
+# Connectivity
+ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2
+ip -netns $NS1 link set dev veth0 up
+ip -netns $NS2 link set dev veth0 up
+ip -netns $NS1 addr add $H1_IP dev veth0
+ip -netns $NS1 addr add $H1_IP6/64 dev veth0 nodad
+ip -netns $NS2 addr add $H2_IP6/64 dev veth0 nodad
+ip -netns $NS1 route add $RT1 via inet6 $H2_IP6
+ip -netns $NS2 route add $RT2 via inet6 $H1_IP6
+
+# Make sure ns2 will respond with ICMP unreachable
+ip netns exec $NS2 sysctl -qw net.ipv4.icmp_ratelimit=0 net.ipv4.ip_forward=1
+
+# Run the test - a ping runs in the background, and we capture ICMP responses
+# with tcpdump; -c 1 means it should exit on the first ping, but add a timeout
+# in case something goes wrong
+ip netns exec $NS1 ping -w 3 -i 0.5 $PINGADDR >/dev/null &
+ip netns exec $NS1 timeout 10 tcpdump -tpni veth0 -c 1 'icmp and icmp[icmptype] != icmp-echo' > $TMPFILE 2>/dev/null
+
+# Parse response and check for dummy address
+# tcpdump output looks like:
+# IP 192.0.0.8 > 172.16.0.1: ICMP net 172.16.1.1 unreachable, length 92
+RESP_IP=$(awk '{print $2}' < $TMPFILE)
+if [[ "$RESP_IP" != "192.0.0.8" ]]; then
+ echo "FAIL - got ICMP response from $RESP_IP, should be 192.0.0.8"
+ exit 1
+else
+ echo "OK"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh
index 18c5de53558a..d6f0e449c029 100755
--- a/tools/testing/selftests/net/icmp_redirect.sh
+++ b/tools/testing/selftests/net/icmp_redirect.sh
@@ -19,6 +19,7 @@
# Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent
# from r1 to h1 telling h1 to use r2 when talking to h2.
+source lib.sh
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -63,10 +64,14 @@ log_test()
local rc=$1
local expected=$2
local msg="$3"
+ local xfail=$4
if [ ${rc} -eq ${expected} ]; then
printf "TEST: %-60s [ OK ]\n" "${msg}"
nsuccess=$((nsuccess+1))
+ elif [ ${rc} -eq ${xfail} ]; then
+ printf "TEST: %-60s [XFAIL]\n" "${msg}"
+ nxfail=$((nxfail+1))
else
ret=1
nfail=$((nfail+1))
@@ -136,11 +141,7 @@ get_linklocal()
cleanup()
{
- local ns
-
- for ns in h1 h2 r1 r2; do
- ip netns del $ns 2>/dev/null
- done
+ cleanup_ns $h1 $h2 $r1 $r2
}
create_vrf()
@@ -167,100 +168,99 @@ setup()
#
# create nodes as namespaces
- #
- for ns in h1 h2 r1 r2; do
- ip netns add $ns
- ip -netns $ns li set lo up
-
- case "${ns}" in
- h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
- ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
- ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
- ;;
- r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
- ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
-
- ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
- ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
- esac
+ setup_ns h1 h2 r1 r2
+ for ns in $h1 $h2 $r1 $r2; do
+ if echo $ns | grep -q h[12]-; then
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ else
+ ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10
+ fi
done
#
# create interconnects
#
- ip -netns h1 li add eth0 type veth peer name r1h1
- ip -netns h1 li set r1h1 netns r1 name eth0 up
+ ip -netns $h1 li add eth0 type veth peer name r1h1
+ ip -netns $h1 li set r1h1 netns $r1 name eth0 up
- ip -netns h1 li add eth1 type veth peer name r2h1
- ip -netns h1 li set r2h1 netns r2 name eth0 up
+ ip -netns $h1 li add eth1 type veth peer name r2h1
+ ip -netns $h1 li set r2h1 netns $r2 name eth0 up
- ip -netns h2 li add eth0 type veth peer name r2h2
- ip -netns h2 li set eth0 up
- ip -netns h2 li set r2h2 netns r2 name eth2 up
+ ip -netns $h2 li add eth0 type veth peer name r2h2
+ ip -netns $h2 li set eth0 up
+ ip -netns $h2 li set r2h2 netns $r2 name eth2 up
- ip -netns r1 li add eth1 type veth peer name r2r1
- ip -netns r1 li set eth1 up
- ip -netns r1 li set r2r1 netns r2 name eth1 up
+ ip -netns $r1 li add eth1 type veth peer name r2r1
+ ip -netns $r1 li set eth1 up
+ ip -netns $r1 li set r2r1 netns $r2 name eth1 up
#
# h1
#
if [ "${WITH_VRF}" = "yes" ]; then
- create_vrf "h1"
+ create_vrf "$h1"
H1_VRF_ARG="vrf ${VRF}"
H1_PING_ARG="-I ${VRF}"
else
H1_VRF_ARG=
H1_PING_ARG=
fi
- ip -netns h1 li add br0 type bridge
+ ip -netns $h1 li add br0 type bridge
if [ "${WITH_VRF}" = "yes" ]; then
- ip -netns h1 li set br0 vrf ${VRF} up
+ ip -netns $h1 li set br0 vrf ${VRF} up
else
- ip -netns h1 li set br0 up
+ ip -netns $h1 li set br0 up
fi
- ip -netns h1 addr add dev br0 ${H1_N1_IP}/24
- ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
- ip -netns h1 li set eth0 master br0 up
- ip -netns h1 li set eth1 master br0 up
+ ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns $h1 li set eth0 master br0 up
+ ip -netns $h1 li set eth1 master br0 up
#
# h2
#
- ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24
- ip -netns h2 ro add default via ${R2_N2_IP} dev eth0
- ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
- ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0
+ ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0
+ ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0
#
# r1
#
- ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24
- ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
- ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30
- ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
+ ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30
+ ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad
#
# r2
#
- ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24
- ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
- ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30
- ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
- ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24
- ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
+ ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30
+ ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad
+ ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24
+ ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad
sleep 2
- R1_LLADDR=$(get_linklocal r1 eth0)
+ R1_LLADDR=$(get_linklocal $r1 eth0)
if [ $? -ne 0 ]; then
echo "Error: Failed to get link-local address of r1's eth0"
exit 1
fi
log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}"
- R2_LLADDR=$(get_linklocal r2 eth0)
+ R2_LLADDR=$(get_linklocal $r2 eth0)
if [ $? -ne 0 ]; then
echo "Error: Failed to get link-local address of r2's eth0"
exit 1
@@ -272,8 +272,8 @@ change_h2_mtu()
{
local mtu=$1
- run_cmd ip -netns h2 li set eth0 mtu ${mtu}
- run_cmd ip -netns r2 li set eth2 mtu ${mtu}
+ run_cmd ip -netns $h2 li set eth0 mtu ${mtu}
+ run_cmd ip -netns $r2 li set eth2 mtu ${mtu}
}
check_exception()
@@ -285,63 +285,64 @@ check_exception()
# From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102)
if [ "$VERBOSE" = "1" ]; then
echo "Commands to check for exception:"
- run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
- run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
+ run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP}
+ run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6}
fi
if [ -n "${mtu}" ]; then
mtu=" mtu ${mtu}"
fi
if [ "$with_redirect" = "yes" ]; then
- ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
grep -q "cache <redirected> expires [0-9]*sec${mtu}"
elif [ -n "${mtu}" ]; then
- ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
grep -q "cache expires [0-9]*sec${mtu}"
else
# want to verify that neither mtu nor redirected appears in
# the route get output. The -v will wipe out the cache line
# if either are set so the last grep -q will not find a match
- ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
+ ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \
grep -E -v 'mtu|redirected' | grep -q "cache"
fi
- log_test $? 0 "IPv4: ${desc}"
+ log_test $? 0 "IPv4: ${desc}" 0
- if [ "$with_redirect" = "yes" ]; then
- ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
- grep -q "${H2_N2_IP6} from :: via ${R2_LLADDR} dev br0.*${mtu}"
+ # No PMTU info for test "redirect" and "mtu exception plus redirect"
+ if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then
+ ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0"
elif [ -n "${mtu}" ]; then
- ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -q "${mtu}"
else
# IPv6 is a bit harder. First strip out the match if it
# contains an mtu exception and then look for the first
# gateway - R1's lladdr
- ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
+ ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \
grep -v "mtu" | grep -q "${R1_LLADDR}"
fi
- log_test $? 0 "IPv6: ${desc}"
+ log_test $? 0 "IPv6: ${desc}" 1
}
run_ping()
{
local sz=$1
- run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
- run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
+ run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP}
+ run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6}
}
replace_route_new()
{
# r1 to h2 via r2 and eth0
- run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
- run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
+ run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0
+ run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0
}
reset_route_new()
{
- run_cmd ip -netns r1 nexthop flush
- run_cmd ip -netns h1 nexthop flush
+ run_cmd ip -netns $r1 nexthop flush
+ run_cmd ip -netns $h1 nexthop flush
initial_route_new
}
@@ -349,34 +350,34 @@ reset_route_new()
initial_route_new()
{
# r1 to h2 via r2 and eth1
- run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
- run_cmd ip -netns r1 ro add ${H2_N2} nhid 1
+ run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1
+ run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1
- run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
- run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2
+ run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1
+ run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2
# h1 to h2 via r1
- run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0
- run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
+ run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0
+ run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1
- run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0
- run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
+ run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0
+ run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2
}
replace_route_legacy()
{
# r1 to h2 via r2 and eth0
- run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0
- run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
+ run_cmd ip -netns $r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0
+ run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0
}
reset_route_legacy()
{
- run_cmd ip -netns r1 ro del ${H2_N2}
- run_cmd ip -netns r1 -6 ro del ${H2_N2_6}
+ run_cmd ip -netns $r1 ro del ${H2_N2}
+ run_cmd ip -netns $r1 -6 ro del ${H2_N2_6}
- run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2}
- run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
+ run_cmd ip -netns $h1 ro del ${H1_VRF_ARG} ${H2_N2}
+ run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6}
initial_route_legacy
}
@@ -384,22 +385,22 @@ reset_route_legacy()
initial_route_legacy()
{
# r1 to h2 via r2 and eth1
- run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1
- run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
+ run_cmd ip -netns $r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1
+ run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1
# h1 to h2 via r1
# - IPv6 redirect only works if gateway is the LLA
- run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
- run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
+ run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0
+ run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0
}
check_connectivity()
{
local rc
- run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
+ run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP}
rc=$?
- run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
+ run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6}
[ $? -ne 0 ] && rc=$?
return $rc
@@ -486,6 +487,7 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
ret=0
nsuccess=0
nfail=0
+nxfail=0
while getopts :pv o
do
@@ -530,5 +532,6 @@ fi
printf "\nTests passed: %3d\n" ${nsuccess}
printf "Tests failed: %3d\n" ${nfail}
+printf "Tests xfailed: %3d\n" ${nxfail}
exit $ret
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.c b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
new file mode 100644
index 000000000000..76e604e4810e
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.c
@@ -0,0 +1,320 @@
+/* SPDX-License-Identifier: MIT */
+/* based on linux-kernel/tools/testing/selftests/net/msg_zerocopy.c */
+#include <assert.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <arpa/inet.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/io_uring.h>
+#include <linux/ipv6.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/un.h>
+#include <sys/wait.h>
+
+#include <io_uring/mini_liburing.h>
+
+#define NOTIF_TAG 0xfffffffULL
+#define NONZC_TAG 0
+#define ZC_TAG 1
+
+enum {
+ MODE_NONZC = 0,
+ MODE_ZC = 1,
+ MODE_ZC_FIXED = 2,
+ MODE_MIXED = 3,
+};
+
+static bool cfg_cork = false;
+static int cfg_mode = MODE_ZC_FIXED;
+static int cfg_nr_reqs = 8;
+static int cfg_family = PF_UNSPEC;
+static int cfg_payload_len;
+static int cfg_port = 8000;
+static int cfg_runtime_ms = 4200;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static char payload[IP_MAXPACKET] __attribute__((aligned(4096)));
+
+static unsigned long gettimeofday_ms(void)
+{
+ struct timeval tv;
+
+ gettimeofday(&tv, NULL);
+ return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_setsockopt(int fd, int level, int optname, int val)
+{
+ if (setsockopt(fd, level, optname, &val, sizeof(val)))
+ error(1, errno, "setsockopt %d.%d: %d", level, optname, val);
+}
+
+static int do_setup_tx(int domain, int type, int protocol)
+{
+ int fd;
+
+ fd = socket(domain, type, protocol);
+ if (fd == -1)
+ error(1, errno, "socket t");
+
+ do_setsockopt(fd, SOL_SOCKET, SO_SNDBUF, 1 << 21);
+
+ if (connect(fd, (void *) &cfg_dst_addr, cfg_alen))
+ error(1, errno, "connect");
+ return fd;
+}
+
+static void do_tx(int domain, int type, int protocol)
+{
+ struct io_uring_sqe *sqe;
+ struct io_uring_cqe *cqe;
+ unsigned long packets = 0, bytes = 0;
+ struct io_uring ring;
+ struct iovec iov;
+ uint64_t tstop;
+ int i, fd, ret;
+ int compl_cqes = 0;
+
+ fd = do_setup_tx(domain, type, protocol);
+
+ ret = io_uring_queue_init(512, &ring, 0);
+ if (ret)
+ error(1, ret, "io_uring: queue init");
+
+ iov.iov_base = payload;
+ iov.iov_len = cfg_payload_len;
+
+ ret = io_uring_register_buffers(&ring, &iov, 1);
+ if (ret)
+ error(1, ret, "io_uring: buffer registration");
+
+ tstop = gettimeofday_ms() + cfg_runtime_ms;
+ do {
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 1);
+
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ unsigned zc_flags = 0;
+ unsigned buf_idx = 0;
+ unsigned mode = cfg_mode;
+ unsigned msg_flags = MSG_WAITALL;
+
+ if (cfg_mode == MODE_MIXED)
+ mode = rand() % 3;
+
+ sqe = io_uring_get_sqe(&ring);
+
+ if (mode == MODE_NONZC) {
+ io_uring_prep_send(sqe, fd, payload,
+ cfg_payload_len, msg_flags);
+ sqe->user_data = NONZC_TAG;
+ } else {
+ io_uring_prep_sendzc(sqe, fd, payload,
+ cfg_payload_len,
+ msg_flags, zc_flags);
+ if (mode == MODE_ZC_FIXED) {
+ sqe->ioprio |= IORING_RECVSEND_FIXED_BUF;
+ sqe->buf_index = buf_idx;
+ }
+ sqe->user_data = ZC_TAG;
+ }
+ }
+
+ ret = io_uring_submit(&ring);
+ if (ret != cfg_nr_reqs)
+ error(1, ret, "submit");
+
+ if (cfg_cork)
+ do_setsockopt(fd, IPPROTO_UDP, UDP_CORK, 0);
+ for (i = 0; i < cfg_nr_reqs; i++) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+
+ if (cqe->user_data != NONZC_TAG &&
+ cqe->user_data != ZC_TAG)
+ error(1, -EINVAL, "invalid cqe->user_data");
+
+ if (cqe->flags & IORING_CQE_F_NOTIF) {
+ if (cqe->flags & IORING_CQE_F_MORE)
+ error(1, -EINVAL, "invalid notif flags");
+ if (compl_cqes <= 0)
+ error(1, -EINVAL, "notification mismatch");
+ compl_cqes--;
+ i--;
+ io_uring_cqe_seen(&ring);
+ continue;
+ }
+ if (cqe->flags & IORING_CQE_F_MORE) {
+ if (cqe->user_data != ZC_TAG)
+ error(1, cqe->res, "unexpected F_MORE");
+ compl_cqes++;
+ }
+ if (cqe->res >= 0) {
+ packets++;
+ bytes += cqe->res;
+ } else if (cqe->res != -EAGAIN) {
+ error(1, cqe->res, "send failed");
+ }
+ io_uring_cqe_seen(&ring);
+ }
+ } while (gettimeofday_ms() < tstop);
+
+ while (compl_cqes) {
+ ret = io_uring_wait_cqe(&ring, &cqe);
+ if (ret)
+ error(1, ret, "wait cqe");
+ if (cqe->flags & IORING_CQE_F_MORE)
+ error(1, -EINVAL, "invalid notif flags");
+ if (!(cqe->flags & IORING_CQE_F_NOTIF))
+ error(1, -EINVAL, "missing notif flag");
+
+ io_uring_cqe_seen(&ring);
+ compl_cqes--;
+ }
+
+ fprintf(stderr, "tx=%lu (MB=%lu), tx/s=%lu (MB/s=%lu)\n",
+ packets, bytes >> 20,
+ packets / (cfg_runtime_ms / 1000),
+ (bytes >> 20) / (cfg_runtime_ms / 1000));
+
+ if (close(fd))
+ error(1, errno, "close");
+}
+
+static void do_test(int domain, int type, int protocol)
+{
+ int i;
+
+ for (i = 0; i < IP_MAXPACKET; i++)
+ payload[i] = 'a' + (i % 26);
+ do_tx(domain, type, protocol);
+}
+
+static void usage(const char *filepath)
+{
+ error(1, 0, "Usage: %s (-4|-6) (udp|tcp) -D<dst_ip> [-s<payload size>] "
+ "[-t<time s>] [-n<batch>] [-p<port>] [-m<mode>]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ const int max_payload_len = sizeof(payload) -
+ sizeof(struct ipv6hdr) -
+ sizeof(struct tcphdr) -
+ 40 /* max tcp options */;
+ struct sockaddr_in6 *addr6 = (void *) &cfg_dst_addr;
+ struct sockaddr_in *addr4 = (void *) &cfg_dst_addr;
+ char *daddr = NULL;
+ int c;
+
+ if (argc <= 1)
+ usage(argv[0]);
+ cfg_payload_len = max_payload_len;
+
+ while ((c = getopt(argc, argv, "46D:p:s:t:n:c:m:")) != -1) {
+ switch (c) {
+ case '4':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ break;
+ case '6':
+ if (cfg_family != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ cfg_family = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'p':
+ cfg_port = strtoul(optarg, NULL, 0);
+ break;
+ case 's':
+ cfg_payload_len = strtoul(optarg, NULL, 0);
+ break;
+ case 't':
+ cfg_runtime_ms = 200 + strtoul(optarg, NULL, 10) * 1000;
+ break;
+ case 'n':
+ cfg_nr_reqs = strtoul(optarg, NULL, 0);
+ break;
+ case 'c':
+ cfg_cork = strtol(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mode = strtol(optarg, NULL, 0);
+ break;
+ }
+ }
+
+ switch (cfg_family) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET, daddr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", daddr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (daddr &&
+ inet_pton(AF_INET6, daddr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", daddr);
+ break;
+ default:
+ error(1, 0, "illegal domain");
+ }
+
+ if (cfg_payload_len > max_payload_len)
+ error(1, 0, "-s: payload exceeds max (%d)", max_payload_len);
+ if (optind != argc - 1)
+ usage(argv[0]);
+}
+
+int main(int argc, char **argv)
+{
+ const char *cfg_test = argv[argc - 1];
+
+ parse_opts(argc, argv);
+
+ if (!strcmp(cfg_test, "tcp"))
+ do_test(cfg_family, SOCK_STREAM, 0);
+ else if (!strcmp(cfg_test, "udp"))
+ do_test(cfg_family, SOCK_DGRAM, 0);
+ else
+ error(1, 0, "unknown cfg_test %s", cfg_test);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
new file mode 100755
index 000000000000..123439545013
--- /dev/null
+++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh
@@ -0,0 +1,126 @@
+#!/bin/bash
+#
+# Send data between two processes across namespaces
+# Run twice: once without and once with zerocopy
+
+set -e
+
+readonly DEV="veth0"
+readonly DEV_MTU=65535
+readonly BIN_TX="./io_uring_zerocopy_tx"
+readonly BIN_RX="./msg_zerocopy"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+readonly path_sysctl_mem="net.core.optmem_max"
+
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+ IPs=( "4" "6" )
+
+ for IP in "${IPs[@]}"; do
+ for mode in $(seq 1 3); do
+ $0 "$IP" udp -m "$mode" -t 1 -n 32
+ $0 "$IP" tcp -m "$mode" -t 1 -n 1
+ done
+ done
+
+ echo "OK. All tests passed"
+ exit 0
+fi
+
+# Argument parsing
+if [[ "$#" -lt "2" ]]; then
+ echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
+ exit 1
+fi
+
+readonly IP="$1"
+shift
+readonly TXMODE="$1"
+shift
+readonly EXTRA_ARGS="$@"
+
+# Argument parsing: configure addresses
+if [[ "${IP}" == "4" ]]; then
+ readonly SADDR="${SADDR4}"
+ readonly DADDR="${DADDR4}"
+elif [[ "${IP}" == "6" ]]; then
+ readonly SADDR="${SADDR6}"
+ readonly DADDR="${DADDR6}"
+else
+ echo "Invalid IP version ${IP}"
+ exit 1
+fi
+
+# Argument parsing: select receive mode
+#
+# This differs from send mode for
+# - packet: use raw recv, because packet receives skb clones
+# - raw_hdrinc: use raw recv, because hdrincl is a tx-only option
+case "${TXMODE}" in
+'packet' | 'packet_dgram' | 'raw_hdrincl')
+ RXMODE='raw'
+ ;;
+*)
+ RXMODE="${TXMODE}"
+ ;;
+esac
+
+# Start of state changes: install cleanup handler
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
+ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
+ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+# Optionally disable sg or csum offload to test edge cases
+# ip netns exec "${NS1}" ethtool -K "${DEV}" sg off
+
+do_test() {
+ local readonly ARGS="$1"
+
+ echo "ipv${IP} ${TXMODE} ${ARGS}"
+ ip netns exec "${NS2}" "${BIN_RX}" "-${IP}" -t 2 -C 2 -S "${SADDR}" -D "${DADDR}" -r "${RXMODE}" &
+ sleep 0.2
+ ip netns exec "${NS1}" "${BIN_TX}" "-${IP}" -t 1 -D "${DADDR}" ${ARGS} "${TXMODE}"
+ wait
+}
+
+do_test "${EXTRA_ARGS}"
+echo ok
diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh
new file mode 100755
index 000000000000..12491850ae98
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6.sh
@@ -0,0 +1,771 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Author: Justin Iurman <justin.iurman@uliege.be>
+#
+# This script evaluates the IOAM insertion for IPv6 by checking the IOAM data
+# consistency directly inside packets on the receiver side. Tests are divided
+# into three categories: OUTPUT (evaluates the IOAM processing by the sender),
+# INPUT (evaluates the IOAM processing by a receiver) and GLOBAL (evaluates
+# wider use cases that do not fall into the other two categories). Both OUTPUT
+# and INPUT tests only use a two-node topology (alpha and beta), while GLOBAL
+# tests use the entire three-node topology (alpha, beta, gamma). Each test is
+# documented inside its own handler in the code below.
+#
+# An IOAM domain is configured from Alpha to Gamma but not on the reverse path.
+# When either Beta or Gamma is the destination (depending on the test category),
+# Alpha adds an IOAM option (Pre-allocated Trace) inside a Hop-by-hop.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | Alpha netns | | Gamma netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | db01::2/64 | | | | db02::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +----------------------------------------------------+
+# | . . |
+# | +-------------+ +-------------+ |
+# | | veth0 | | veth1 | |
+# | | db01::1/64 | ................ | db02::1/64 | |
+# | +-------------+ +-------------+ |
+# | |
+# | Beta netns |
+# | |
+# +----------------------------------------------------+
+#
+#
+#
+# =============================================================
+# | Alpha - IOAM configuration |
+# +===========================================================+
+# | Node ID | 1 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 11111111 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress ID | 101 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 101101 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee0 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf00dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 777 |
+# +-----------------------------------------------------------+
+# | Schema Data | something that will be 4n-aligned |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Beta - IOAM configuration |
+# +===========================================================+
+# | Node ID | 2 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 22222222 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 201 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 201201 |
+# +-----------------------------------------------------------+
+# | Egress ID | 202 |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 202202 |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee1 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf11dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 666 |
+# +-----------------------------------------------------------+
+# | Schema Data | Hello there -Obi |
+# +-----------------------------------------------------------+
+#
+#
+# =============================================================
+# | Gamma - IOAM configuration |
+# +===========================================================+
+# | Node ID | 3 |
+# +-----------------------------------------------------------+
+# | Node Wide ID | 33333333 |
+# +-----------------------------------------------------------+
+# | Ingress ID | 301 |
+# +-----------------------------------------------------------+
+# | Ingress Wide ID | 301301 |
+# +-----------------------------------------------------------+
+# | Egress ID | 0xffff (default value) |
+# +-----------------------------------------------------------+
+# | Egress Wide ID | 0xffffffff (default value) |
+# +-----------------------------------------------------------+
+# | Namespace Data | 0xdeadbee2 |
+# +-----------------------------------------------------------+
+# | Namespace Wide Data | 0xcafec0caf22dc0de |
+# +-----------------------------------------------------------+
+# | Schema ID | 0xffffff (= None) |
+# +-----------------------------------------------------------+
+# | Schema Data | |
+# +-----------------------------------------------------------+
+
+source lib.sh
+
+################################################################################
+# #
+# WARNING: Be careful if you modify the block below - it MUST be kept #
+# synchronized with configurations inside ioam6_parser.c and always #
+# reflect the same. #
+# #
+################################################################################
+
+ALPHA=(
+ 1 # ID
+ 11111111 # Wide ID
+ 0xffff # Ingress ID
+ 0xffffffff # Ingress Wide ID
+ 101 # Egress ID
+ 101101 # Egress Wide ID
+ 0xdeadbee0 # Namespace Data
+ 0xcafec0caf00dc0de # Namespace Wide Data
+ 777 # Schema ID (0xffffff = None)
+ "something that will be 4n-aligned" # Schema Data
+)
+
+BETA=(
+ 2
+ 22222222
+ 201
+ 201201
+ 202
+ 202202
+ 0xdeadbee1
+ 0xcafec0caf11dc0de
+ 666
+ "Hello there -Obi"
+)
+
+GAMMA=(
+ 3
+ 33333333
+ 301
+ 301301
+ 0xffff
+ 0xffffffff
+ 0xdeadbee2
+ 0xcafec0caf22dc0de
+ 0xffffff
+ ""
+)
+
+TESTS_OUTPUT="
+ out_undef_ns
+ out_no_room
+ out_bits
+ out_full_supp_trace
+"
+
+TESTS_INPUT="
+ in_undef_ns
+ in_no_room
+ in_oflag
+ in_bits
+ in_full_supp_trace
+"
+
+TESTS_GLOBAL="
+ fwd_full_supp_trace
+"
+
+
+################################################################################
+# #
+# LIBRARY #
+# #
+################################################################################
+
+check_kernel_compatibility()
+{
+ setup_ns ioam_tmp_node
+ ip link add name veth0 netns $ioam_tmp_node type veth \
+ peer name veth1 netns $ioam_tmp_node
+
+ ip -netns $ioam_tmp_node link set veth0 up
+ ip -netns $ioam_tmp_node link set veth1 up
+
+ ip -netns $ioam_tmp_node ioam namespace add 0
+ ns_ad=$?
+
+ ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0"
+ ns_sh=$?
+
+ if [[ $ns_ad != 0 || $ns_sh != 0 ]]
+ then
+ echo "SKIP: kernel version probably too old, missing ioam support"
+ ip link del veth0 2>/dev/null || true
+ cleanup_ns $ioam_tmp_node || true
+ exit $ksft_skip
+ fi
+
+ ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \
+ trace prealloc type 0x800000 ns 0 size 4 dev veth0
+ tr_ad=$?
+
+ ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6"
+ tr_sh=$?
+
+ if [[ $tr_ad != 0 || $tr_sh != 0 ]]
+ then
+ echo "SKIP: cannot attach an ioam trace to a route, did you compile" \
+ "without CONFIG_IPV6_IOAM6_LWTUNNEL?"
+ ip link del veth0 2>/dev/null || true
+ cleanup_ns $ioam_tmp_node || true
+ exit $ksft_skip
+ fi
+
+ ip link del veth0 2>/dev/null || true
+ cleanup_ns $ioam_tmp_node || true
+
+ lsmod | grep -q "ip6_tunnel"
+ ip6tnl_loaded=$?
+
+ if [ $ip6tnl_loaded = 0 ]
+ then
+ encap_tests=0
+ else
+ modprobe ip6_tunnel &>/dev/null
+ lsmod | grep -q "ip6_tunnel"
+ encap_tests=$?
+
+ if [ $encap_tests != 0 ]
+ then
+ ip a | grep -q "ip6tnl0"
+ encap_tests=$?
+
+ if [ $encap_tests != 0 ]
+ then
+ echo "Note: ip6_tunnel not found neither as a module nor inside the" \
+ "kernel, tests that require it (encap mode) will be omitted"
+ fi
+ fi
+ fi
+}
+
+cleanup()
+{
+ ip link del ioam-veth-alpha 2>/dev/null || true
+ ip link del ioam-veth-gamma 2>/dev/null || true
+
+ cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true
+
+ if [ $ip6tnl_loaded != 0 ]
+ then
+ modprobe -r ip6_tunnel 2>/dev/null || true
+ fi
+}
+
+setup()
+{
+ setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma
+
+ ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \
+ peer name ioam-veth-betaL netns $ioam_node_beta
+ ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \
+ peer name ioam-veth-gamma netns $ioam_node_gamma
+
+ ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0
+ ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0
+ ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1
+ ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0
+
+ ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0
+ ip -netns $ioam_node_alpha link set veth0 up
+ ip -netns $ioam_node_alpha link set lo up
+ ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0
+ ip -netns $ioam_node_alpha route del db01::/64
+ ip -netns $ioam_node_alpha route add db01::/64 dev veth0
+
+ ip -netns $ioam_node_beta addr add db01::1/64 dev veth0
+ ip -netns $ioam_node_beta addr add db02::1/64 dev veth1
+ ip -netns $ioam_node_beta link set veth0 up
+ ip -netns $ioam_node_beta link set veth1 up
+ ip -netns $ioam_node_beta link set lo up
+
+ ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0
+ ip -netns $ioam_node_gamma link set veth0 up
+ ip -netns $ioam_node_gamma link set lo up
+ ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0
+
+ # - IOAM config -
+ ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]}
+ ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]}
+ ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]}
+ ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]}
+ ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}"
+ ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]}
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]}
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]}
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]}
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]}
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]}
+ ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]}
+ ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}"
+ ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]}
+
+ ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]}
+ ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]}
+ ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]}
+ ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]}
+ ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]}
+
+ sleep 1
+
+ ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null
+ if [ $? != 0 ]
+ then
+ echo "Setup FAILED"
+ cleanup &>/dev/null
+ exit 0
+ fi
+}
+
+log_test_passed()
+{
+ local desc=$1
+ printf "TEST: %-60s [ OK ]\n" "${desc}"
+}
+
+log_test_failed()
+{
+ local desc=$1
+ printf "TEST: %-60s [FAIL]\n" "${desc}"
+}
+
+log_results()
+{
+ echo "- Tests passed: ${npassed}"
+ echo "- Tests failed: ${nfailed}"
+}
+
+run_test()
+{
+ local name=$1
+ local desc=$2
+ local node_src=$3
+ local node_dst=$4
+ local ip6_dst=$5
+ local trace_type=$6
+ local ioam_ns=$7
+ local type=$8
+
+ ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $node_src ping6 -t 64 -c 1 -W 1 $ip6_dst &>/dev/null
+ if [ $? != 0 ]
+ then
+ nfailed=$((nfailed+1))
+ log_test_failed "${desc}"
+ kill -2 $spid &>/dev/null
+ else
+ wait $spid
+ if [ $? = 0 ]
+ then
+ npassed=$((npassed+1))
+ log_test_passed "${desc}"
+ else
+ nfailed=$((nfailed+1))
+ log_test_failed "${desc}"
+ fi
+ fi
+}
+
+run()
+{
+ echo
+ printf "%0.s-" {1..74}
+ echo
+ echo "OUTPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set OUTPUT settings
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0
+
+ for t in $TESTS_OUTPUT
+ do
+ $t "inline"
+ [ $encap_tests = 0 ] && $t "encap"
+ done
+
+ # clean OUTPUT settings
+ ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1
+ ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+
+
+ echo
+ printf "%0.s-" {1..74}
+ echo
+ echo "INPUT tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ # set INPUT settings
+ ip -netns $ioam_node_alpha ioam namespace del 123
+
+ for t in $TESTS_INPUT
+ do
+ $t "inline"
+ [ $encap_tests = 0 ] && $t "encap"
+ done
+
+ # clean INPUT settings
+ ip -netns $ioam_node_alpha ioam namespace add 123 \
+ data ${ALPHA[6]} wide ${ALPHA[7]}
+ ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]}
+ ip -netns $ioam_node_alpha route change db01::/64 dev veth0
+
+ echo
+ printf "%0.s-" {1..74}
+ echo
+ echo "GLOBAL tests"
+ printf "%0.s-" {1..74}
+ echo
+
+ for t in $TESTS_GLOBAL
+ do
+ $t "inline"
+ [ $encap_tests = 0 ] && $t "encap"
+ done
+
+ echo
+ log_results
+}
+
+bit2type=(
+ 0x800000 0x400000 0x200000 0x100000 0x080000 0x040000 0x020000 0x010000
+ 0x008000 0x004000 0x002000 0x001000 0x000800 0x000400 0x000200 0x000100
+ 0x000080 0x000040 0x000020 0x000010 0x000008 0x000004 0x000002
+)
+bit2size=( 4 4 4 4 4 4 4 4 8 8 8 4 4 4 4 4 4 4 4 4 4 4 4 )
+
+
+################################################################################
+# #
+# OUTPUT tests #
+# #
+# Two nodes (sender/receiver), IOAM disabled on ingress for the receiver. #
+################################################################################
+
+out_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace if the chosen IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0x800000 0 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+out_no_room()
+{
+ ##############################################################################
+ # Make sure that the encap node won't fill the trace and will set the #
+ # Overflow flag since there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0xc00000 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+out_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the encap node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) ))
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ for i in {0..22}
+ do
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+ dev veth0 &>/dev/null
+
+ local cmd_res=$?
+ local descr="${desc/<n>/$i}"
+
+ if [[ $i -ge 12 && $i -le 21 ]]
+ then
+ if [ $cmd_res != 0 ]
+ then
+ npassed=$((npassed+1))
+ log_test_passed "$descr ($1 mode)"
+ else
+ nfailed=$((nfailed+1))
+ log_test_failed "$descr ($1 mode)"
+ fi
+ else
+ run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \
+ $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+ fi
+ done
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+ bit2size[22]=$tmp
+}
+
+out_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the encap node will correctly fill a full trace. Be careful,#
+ # "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xfff002 ns 123 size 100 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0xfff002 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+
+################################################################################
+# #
+# INPUT tests #
+# #
+# Two nodes (sender/receiver), the sender MUST NOT fill the trace upon #
+# insertion -> the IOAM namespace configured on the sender is removed #
+# and is used in the inserted trace to force the sender not to fill it. #
+################################################################################
+
+in_undef_ns()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace if the related IOAM #
+ # namespace is not configured locally. #
+ ##############################################################################
+ local desc="Unknown IOAM namespace"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0x800000 ns 0 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0x800000 0 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+in_no_room()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace and will set the #
+ # Overflow flag if there is no room enough for its data. #
+ ##############################################################################
+ local desc="Missing trace room"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0xc00000 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+in_bits()
+{
+ ##############################################################################
+ # Make sure that, for each trace type bit, the receiving node will either: #
+ # (i) fill the trace with its data when it is a supported bit #
+ # (ii) not fill the trace with its data when it is an unsupported bit #
+ ##############################################################################
+ local desc="Trace type with bit <n> only"
+
+ local tmp=${bit2size[22]}
+ bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) ))
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ for i in {0..11} {22..22}
+ do
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \
+ dev veth0
+
+ run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \
+ $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1
+ done
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+ bit2size[22]=$tmp
+}
+
+in_oflag()
+{
+ ##############################################################################
+ # Make sure that the receiving node won't fill the trace since the Overflow #
+ # flag is set. #
+ ##############################################################################
+ local desc="Overflow flag is set"
+
+ # Exception:
+ # Here, we need the sender to set the Overflow flag. For that, we will add
+ # back the IOAM namespace that was previously configured on the sender.
+ ip -netns $ioam_node_alpha ioam namespace add 123
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xc00000 ns 123 size 4 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0xc00000 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+
+ # And we clean the exception for this test to get things back to normal for
+ # other INPUT tests
+ ip -netns $ioam_node_alpha ioam namespace del 123
+}
+
+in_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that the receiving node will correctly fill a full trace. Be #
+ # careful, "full trace" here does NOT mean all bits (only supported ones). #
+ ##############################################################################
+ local desc="Full supported trace"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xfff002 ns 123 size 80 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \
+ db01::1 0xfff002 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down
+}
+
+
+################################################################################
+# #
+# GLOBAL tests #
+# #
+# Three nodes (sender/router/receiver), IOAM fully enabled on every node. #
+################################################################################
+
+fwd_full_supp_trace()
+{
+ ##############################################################################
+ # Make sure that all three nodes correctly filled the full supported trace #
+ # by checking that the trace data is consistent with the predefined config. #
+ ##############################################################################
+ local desc="Forward - Full supported trace"
+
+ [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1"
+ [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up
+
+ ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \
+ trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0
+
+ run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \
+ db02::2 0xfff002 123 $1
+
+ [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down
+}
+
+
+################################################################################
+# #
+# MAIN #
+# #
+################################################################################
+
+npassed=0
+nfailed=0
+
+if [ "$(id -u)" -ne 0 ]
+then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]
+then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip ioam &>/dev/null
+if [ $? = 1 ]
+then
+ echo "SKIP: iproute2 too old, missing ioam command"
+ exit $ksft_skip
+fi
+
+check_kernel_compatibility
+
+cleanup &>/dev/null
+setup
+run
+cleanup &>/dev/null
diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c
new file mode 100644
index 000000000000..895e5bb5044b
--- /dev/null
+++ b/tools/testing/selftests/net/ioam6_parser.c
@@ -0,0 +1,674 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Author: Justin Iurman (justin.iurman@uliege.be)
+ *
+ * IOAM tester for IPv6, see ioam6.sh for details on each test case.
+ */
+#include <arpa/inet.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/const.h>
+#include <linux/ioam6.h>
+#include <linux/ipv6.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+struct ioam_config {
+ __u32 id;
+ __u64 wide;
+ __u16 ingr_id;
+ __u16 egr_id;
+ __u32 ingr_wide;
+ __u32 egr_wide;
+ __u32 ns_data;
+ __u64 ns_wide;
+ __u32 sc_id;
+ __u8 hlim;
+ char *sc_data;
+};
+
+/*
+ * Be careful if you modify structs below - everything MUST be kept synchronized
+ * with configurations inside ioam6.sh and always reflect the same.
+ */
+
+static struct ioam_config node1 = {
+ .id = 1,
+ .wide = 11111111,
+ .ingr_id = 0xffff, /* default value */
+ .egr_id = 101,
+ .ingr_wide = 0xffffffff, /* default value */
+ .egr_wide = 101101,
+ .ns_data = 0xdeadbee0,
+ .ns_wide = 0xcafec0caf00dc0de,
+ .sc_id = 777,
+ .sc_data = "something that will be 4n-aligned",
+ .hlim = 64,
+};
+
+static struct ioam_config node2 = {
+ .id = 2,
+ .wide = 22222222,
+ .ingr_id = 201,
+ .egr_id = 202,
+ .ingr_wide = 201201,
+ .egr_wide = 202202,
+ .ns_data = 0xdeadbee1,
+ .ns_wide = 0xcafec0caf11dc0de,
+ .sc_id = 666,
+ .sc_data = "Hello there -Obi",
+ .hlim = 63,
+};
+
+static struct ioam_config node3 = {
+ .id = 3,
+ .wide = 33333333,
+ .ingr_id = 301,
+ .egr_id = 0xffff, /* default value */
+ .ingr_wide = 301301,
+ .egr_wide = 0xffffffff, /* default value */
+ .ns_data = 0xdeadbee2,
+ .ns_wide = 0xcafec0caf22dc0de,
+ .sc_id = 0xffffff, /* default value */
+ .sc_data = NULL,
+ .hlim = 62,
+};
+
+enum {
+ /**********
+ * OUTPUT *
+ **********/
+ TEST_OUT_UNDEF_NS,
+ TEST_OUT_NO_ROOM,
+ TEST_OUT_BIT0,
+ TEST_OUT_BIT1,
+ TEST_OUT_BIT2,
+ TEST_OUT_BIT3,
+ TEST_OUT_BIT4,
+ TEST_OUT_BIT5,
+ TEST_OUT_BIT6,
+ TEST_OUT_BIT7,
+ TEST_OUT_BIT8,
+ TEST_OUT_BIT9,
+ TEST_OUT_BIT10,
+ TEST_OUT_BIT11,
+ TEST_OUT_BIT22,
+ TEST_OUT_FULL_SUPP_TRACE,
+
+ /*********
+ * INPUT *
+ *********/
+ TEST_IN_UNDEF_NS,
+ TEST_IN_NO_ROOM,
+ TEST_IN_OFLAG,
+ TEST_IN_BIT0,
+ TEST_IN_BIT1,
+ TEST_IN_BIT2,
+ TEST_IN_BIT3,
+ TEST_IN_BIT4,
+ TEST_IN_BIT5,
+ TEST_IN_BIT6,
+ TEST_IN_BIT7,
+ TEST_IN_BIT8,
+ TEST_IN_BIT9,
+ TEST_IN_BIT10,
+ TEST_IN_BIT11,
+ TEST_IN_BIT22,
+ TEST_IN_FULL_SUPP_TRACE,
+
+ /**********
+ * GLOBAL *
+ **********/
+ TEST_FWD_FULL_SUPP_TRACE,
+
+ __TEST_MAX,
+};
+
+static int check_ioam_header(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ if (__be16_to_cpu(ioam6h->namespace_id) != ioam_ns ||
+ __be32_to_cpu(ioam6h->type_be32) != (trace_type << 8))
+ return 1;
+
+ switch (tid) {
+ case TEST_OUT_UNDEF_NS:
+ case TEST_IN_UNDEF_NS:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_NO_ROOM:
+ case TEST_IN_NO_ROOM:
+ case TEST_IN_OFLAG:
+ return !ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen != 1;
+
+ case TEST_OUT_BIT0:
+ case TEST_IN_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_IN_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_IN_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_IN_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_IN_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_IN_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_IN_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_IN_BIT7:
+ case TEST_OUT_BIT11:
+ case TEST_IN_BIT11:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 1 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT8:
+ case TEST_IN_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_IN_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_IN_BIT10:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 2 ||
+ ioam6h->remlen;
+
+ case TEST_OUT_BIT22:
+ case TEST_IN_BIT22:
+ return ioam6h->overflow ||
+ ioam6h->nodelen ||
+ ioam6h->remlen;
+
+ case TEST_OUT_FULL_SUPP_TRACE:
+ case TEST_IN_FULL_SUPP_TRACE:
+ case TEST_FWD_FULL_SUPP_TRACE:
+ return ioam6h->overflow ||
+ ioam6h->nodelen != 15 ||
+ ioam6h->remlen;
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int check_ioam6_data(__u8 **p, struct ioam6_trace_hdr *ioam6h,
+ const struct ioam_config cnf)
+{
+ unsigned int len;
+ __u8 aligned;
+ __u64 raw64;
+ __u32 raw32;
+
+ if (ioam6h->type.bit0) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.hlim != (raw32 >> 24) || cnf.id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit1) {
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (cnf.ingr_id != (raw32 >> 16) ||
+ cnf.egr_id != (raw32 & 0xffff))
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit2)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit3)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit4) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit5) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ns_data)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit6)
+ *p += sizeof(__u32);
+
+ if (ioam6h->type.bit7) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit8) {
+ raw64 = __be64_to_cpu(*((__u64 *)*p));
+ if (cnf.hlim != (raw64 >> 56) ||
+ cnf.wide != (raw64 & 0xffffffffffffff))
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit9) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.ingr_wide)
+ return 1;
+ *p += sizeof(__u32);
+
+ if (__be32_to_cpu(*((__u32 *)*p)) != cnf.egr_wide)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit10) {
+ if (__be64_to_cpu(*((__u64 *)*p)) != cnf.ns_wide)
+ return 1;
+ *p += sizeof(__u64);
+ }
+
+ if (ioam6h->type.bit11) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit12) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit13) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit14) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit15) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit16) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit17) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit18) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit19) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit20) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit21) {
+ if (__be32_to_cpu(*((__u32 *)*p)) != 0xffffffff)
+ return 1;
+ *p += sizeof(__u32);
+ }
+
+ if (ioam6h->type.bit22) {
+ len = cnf.sc_data ? strlen(cnf.sc_data) : 0;
+ aligned = cnf.sc_data ? __ALIGN_KERNEL(len, 4) : 0;
+
+ raw32 = __be32_to_cpu(*((__u32 *)*p));
+ if (aligned != (raw32 >> 24) * 4 ||
+ cnf.sc_id != (raw32 & 0xffffff))
+ return 1;
+ *p += sizeof(__u32);
+
+ if (cnf.sc_data) {
+ if (strncmp((char *)*p, cnf.sc_data, len))
+ return 1;
+
+ *p += len;
+ aligned -= len;
+
+ while (aligned--) {
+ if (**p != '\0')
+ return 1;
+ *p += sizeof(__u8);
+ }
+ }
+ }
+
+ return 0;
+}
+
+static int check_ioam_header_and_data(int tid, struct ioam6_trace_hdr *ioam6h,
+ __u32 trace_type, __u16 ioam_ns)
+{
+ __u8 *p;
+
+ if (check_ioam_header(tid, ioam6h, trace_type, ioam_ns))
+ return 1;
+
+ p = ioam6h->data + ioam6h->remlen * 4;
+
+ switch (tid) {
+ case TEST_OUT_BIT0:
+ case TEST_OUT_BIT1:
+ case TEST_OUT_BIT2:
+ case TEST_OUT_BIT3:
+ case TEST_OUT_BIT4:
+ case TEST_OUT_BIT5:
+ case TEST_OUT_BIT6:
+ case TEST_OUT_BIT7:
+ case TEST_OUT_BIT8:
+ case TEST_OUT_BIT9:
+ case TEST_OUT_BIT10:
+ case TEST_OUT_BIT11:
+ case TEST_OUT_BIT22:
+ case TEST_OUT_FULL_SUPP_TRACE:
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ case TEST_IN_BIT0:
+ case TEST_IN_BIT1:
+ case TEST_IN_BIT2:
+ case TEST_IN_BIT3:
+ case TEST_IN_BIT4:
+ case TEST_IN_BIT5:
+ case TEST_IN_BIT6:
+ case TEST_IN_BIT7:
+ case TEST_IN_BIT8:
+ case TEST_IN_BIT9:
+ case TEST_IN_BIT10:
+ case TEST_IN_BIT11:
+ case TEST_IN_BIT22:
+ case TEST_IN_FULL_SUPP_TRACE:
+ {
+ __u32 tmp32 = node2.egr_wide;
+ __u16 tmp16 = node2.egr_id;
+ int res;
+
+ node2.egr_id = 0xffff;
+ node2.egr_wide = 0xffffffff;
+
+ res = check_ioam6_data(&p, ioam6h, node2);
+
+ node2.egr_id = tmp16;
+ node2.egr_wide = tmp32;
+
+ return res;
+ }
+
+ case TEST_FWD_FULL_SUPP_TRACE:
+ if (check_ioam6_data(&p, ioam6h, node3))
+ return 1;
+ if (check_ioam6_data(&p, ioam6h, node2))
+ return 1;
+ return check_ioam6_data(&p, ioam6h, node1);
+
+ default:
+ break;
+ }
+
+ return 1;
+}
+
+static int str2id(const char *tname)
+{
+ if (!strcmp("out_undef_ns", tname))
+ return TEST_OUT_UNDEF_NS;
+ if (!strcmp("out_no_room", tname))
+ return TEST_OUT_NO_ROOM;
+ if (!strcmp("out_bit0", tname))
+ return TEST_OUT_BIT0;
+ if (!strcmp("out_bit1", tname))
+ return TEST_OUT_BIT1;
+ if (!strcmp("out_bit2", tname))
+ return TEST_OUT_BIT2;
+ if (!strcmp("out_bit3", tname))
+ return TEST_OUT_BIT3;
+ if (!strcmp("out_bit4", tname))
+ return TEST_OUT_BIT4;
+ if (!strcmp("out_bit5", tname))
+ return TEST_OUT_BIT5;
+ if (!strcmp("out_bit6", tname))
+ return TEST_OUT_BIT6;
+ if (!strcmp("out_bit7", tname))
+ return TEST_OUT_BIT7;
+ if (!strcmp("out_bit8", tname))
+ return TEST_OUT_BIT8;
+ if (!strcmp("out_bit9", tname))
+ return TEST_OUT_BIT9;
+ if (!strcmp("out_bit10", tname))
+ return TEST_OUT_BIT10;
+ if (!strcmp("out_bit11", tname))
+ return TEST_OUT_BIT11;
+ if (!strcmp("out_bit22", tname))
+ return TEST_OUT_BIT22;
+ if (!strcmp("out_full_supp_trace", tname))
+ return TEST_OUT_FULL_SUPP_TRACE;
+ if (!strcmp("in_undef_ns", tname))
+ return TEST_IN_UNDEF_NS;
+ if (!strcmp("in_no_room", tname))
+ return TEST_IN_NO_ROOM;
+ if (!strcmp("in_oflag", tname))
+ return TEST_IN_OFLAG;
+ if (!strcmp("in_bit0", tname))
+ return TEST_IN_BIT0;
+ if (!strcmp("in_bit1", tname))
+ return TEST_IN_BIT1;
+ if (!strcmp("in_bit2", tname))
+ return TEST_IN_BIT2;
+ if (!strcmp("in_bit3", tname))
+ return TEST_IN_BIT3;
+ if (!strcmp("in_bit4", tname))
+ return TEST_IN_BIT4;
+ if (!strcmp("in_bit5", tname))
+ return TEST_IN_BIT5;
+ if (!strcmp("in_bit6", tname))
+ return TEST_IN_BIT6;
+ if (!strcmp("in_bit7", tname))
+ return TEST_IN_BIT7;
+ if (!strcmp("in_bit8", tname))
+ return TEST_IN_BIT8;
+ if (!strcmp("in_bit9", tname))
+ return TEST_IN_BIT9;
+ if (!strcmp("in_bit10", tname))
+ return TEST_IN_BIT10;
+ if (!strcmp("in_bit11", tname))
+ return TEST_IN_BIT11;
+ if (!strcmp("in_bit22", tname))
+ return TEST_IN_BIT22;
+ if (!strcmp("in_full_supp_trace", tname))
+ return TEST_IN_FULL_SUPP_TRACE;
+ if (!strcmp("fwd_full_supp_trace", tname))
+ return TEST_FWD_FULL_SUPP_TRACE;
+
+ return -1;
+}
+
+static int get_u32(__u32 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int get_u16(__u16 *val, const char *arg, int base)
+{
+ unsigned long res;
+ char *ptr;
+
+ if (!arg || !*arg)
+ return -1;
+ res = strtoul(arg, &ptr, base);
+
+ if (!ptr || ptr == arg || *ptr)
+ return -1;
+
+ if (res == ULONG_MAX && errno == ERANGE)
+ return -1;
+
+ if (res > 0xFFFFUL)
+ return -1;
+
+ *val = res;
+ return 0;
+}
+
+static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = {
+ [TEST_OUT_UNDEF_NS] = check_ioam_header,
+ [TEST_OUT_NO_ROOM] = check_ioam_header,
+ [TEST_OUT_BIT0] = check_ioam_header_and_data,
+ [TEST_OUT_BIT1] = check_ioam_header_and_data,
+ [TEST_OUT_BIT2] = check_ioam_header_and_data,
+ [TEST_OUT_BIT3] = check_ioam_header_and_data,
+ [TEST_OUT_BIT4] = check_ioam_header_and_data,
+ [TEST_OUT_BIT5] = check_ioam_header_and_data,
+ [TEST_OUT_BIT6] = check_ioam_header_and_data,
+ [TEST_OUT_BIT7] = check_ioam_header_and_data,
+ [TEST_OUT_BIT8] = check_ioam_header_and_data,
+ [TEST_OUT_BIT9] = check_ioam_header_and_data,
+ [TEST_OUT_BIT10] = check_ioam_header_and_data,
+ [TEST_OUT_BIT11] = check_ioam_header_and_data,
+ [TEST_OUT_BIT22] = check_ioam_header_and_data,
+ [TEST_OUT_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_IN_UNDEF_NS] = check_ioam_header,
+ [TEST_IN_NO_ROOM] = check_ioam_header,
+ [TEST_IN_OFLAG] = check_ioam_header,
+ [TEST_IN_BIT0] = check_ioam_header_and_data,
+ [TEST_IN_BIT1] = check_ioam_header_and_data,
+ [TEST_IN_BIT2] = check_ioam_header_and_data,
+ [TEST_IN_BIT3] = check_ioam_header_and_data,
+ [TEST_IN_BIT4] = check_ioam_header_and_data,
+ [TEST_IN_BIT5] = check_ioam_header_and_data,
+ [TEST_IN_BIT6] = check_ioam_header_and_data,
+ [TEST_IN_BIT7] = check_ioam_header_and_data,
+ [TEST_IN_BIT8] = check_ioam_header_and_data,
+ [TEST_IN_BIT9] = check_ioam_header_and_data,
+ [TEST_IN_BIT10] = check_ioam_header_and_data,
+ [TEST_IN_BIT11] = check_ioam_header_and_data,
+ [TEST_IN_BIT22] = check_ioam_header_and_data,
+ [TEST_IN_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+ [TEST_FWD_FULL_SUPP_TRACE] = check_ioam_header_and_data,
+};
+
+int main(int argc, char **argv)
+{
+ int fd, size, hoplen, tid, ret = 1, on = 1;
+ struct ioam6_hdr *opt;
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ struct iovec iov;
+ __u8 buffer[512];
+ __u32 tr_type;
+ __u16 ioam_ns;
+ __u8 *ptr;
+
+ if (argc != 5)
+ goto out;
+
+ tid = str2id(argv[1]);
+ if (tid < 0 || !func[tid])
+ goto out;
+
+ if (get_u32(&tr_type, argv[2], 16) ||
+ get_u16(&ioam_ns, argv[3], 0))
+ goto out;
+
+ fd = socket(PF_INET6, SOCK_RAW,
+ !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6);
+ if (fd < 0)
+ goto out;
+
+ setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on));
+
+ iov.iov_len = 1;
+ iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer)));
+ if (!iov.iov_base)
+ goto close;
+recv:
+ memset(&msg, 0, sizeof(msg));
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = buffer;
+ msg.msg_controllen = CMSG_SPACE(sizeof(buffer));
+
+ size = recvmsg(fd, &msg, 0);
+ if (size <= 0)
+ goto close;
+
+ for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) {
+ if (cmsg->cmsg_level != IPPROTO_IPV6 ||
+ cmsg->cmsg_type != IPV6_HOPOPTS ||
+ cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr))
+ continue;
+
+ ptr = (__u8 *)CMSG_DATA(cmsg);
+
+ hoplen = (ptr[1] + 1) << 3;
+ ptr += sizeof(struct ipv6_hopopt_hdr);
+
+ while (hoplen > 0) {
+ opt = (struct ioam6_hdr *)ptr;
+
+ if (opt->opt_type == IPV6_TLV_IOAM &&
+ opt->type == IOAM6_TYPE_PREALLOC) {
+ ptr += sizeof(*opt);
+ ret = func[tid](tid,
+ (struct ioam6_trace_hdr *)ptr,
+ tr_type, ioam_ns);
+ goto close;
+ }
+
+ ptr += opt->opt_len + 2;
+ hoplen -= opt->opt_len + 2;
+ }
+ }
+
+ goto recv;
+close:
+ free(iov.iov_base);
+ close(fd);
+out:
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ip_defrag.sh b/tools/testing/selftests/net/ip_defrag.sh
index 15d3489ecd9c..ceb7ad4dbd94 100755
--- a/tools/testing/selftests/net/ip_defrag.sh
+++ b/tools/testing/selftests/net/ip_defrag.sh
@@ -6,6 +6,8 @@
set +x
set -e
+modprobe -q nf_defrag_ipv6
+
readonly NETNS="ns-$(mktemp -u XXXXXX)"
setup() {
diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c
new file mode 100644
index 000000000000..193b82745fd8
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.c
@@ -0,0 +1,463 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2023 Cloudflare
+
+/* Test IP_LOCAL_PORT_RANGE socket option: IPv4 + IPv6, TCP + UDP.
+ *
+ * Tests assume that net.ipv4.ip_local_port_range is [40000, 49999].
+ * Don't run these directly but with ip_local_port_range.sh script.
+ */
+
+#include <fcntl.h>
+#include <netinet/ip.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef IP_LOCAL_PORT_RANGE
+#define IP_LOCAL_PORT_RANGE 51
+#endif
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+
+static __u32 pack_port_range(__u16 lo, __u16 hi)
+{
+ return (hi << 16) | (lo << 0);
+}
+
+static void unpack_port_range(__u32 range, __u16 *lo, __u16 *hi)
+{
+ *lo = range & 0xffff;
+ *hi = range >> 16;
+}
+
+static int get_so_domain(int fd)
+{
+ int domain, err;
+ socklen_t len;
+
+ len = sizeof(domain);
+ err = getsockopt(fd, SOL_SOCKET, SO_DOMAIN, &domain, &len);
+ if (err)
+ return -1;
+
+ return domain;
+}
+
+static int bind_to_loopback_any_port(int fd)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } addr;
+ socklen_t addr_len;
+
+ memset(&addr, 0, sizeof(addr));
+ switch (get_so_domain(fd)) {
+ case AF_INET:
+ addr.v4.sin_family = AF_INET;
+ addr.v4.sin_port = htons(0);
+ addr.v4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr_len = sizeof(addr.v4);
+ break;
+ case AF_INET6:
+ addr.v6.sin6_family = AF_INET6;
+ addr.v6.sin6_port = htons(0);
+ addr.v6.sin6_addr = in6addr_loopback;
+ addr_len = sizeof(addr.v6);
+ break;
+ default:
+ return -1;
+ }
+
+ return bind(fd, &addr.sa, addr_len);
+}
+
+static int get_sock_port(int fd)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } addr;
+ socklen_t addr_len;
+ int err;
+
+ addr_len = sizeof(addr);
+ memset(&addr, 0, sizeof(addr));
+ err = getsockname(fd, &addr.sa, &addr_len);
+ if (err)
+ return -1;
+
+ switch (addr.sa.sa_family) {
+ case AF_INET:
+ return ntohs(addr.v4.sin_port);
+ case AF_INET6:
+ return ntohs(addr.v6.sin6_port);
+ default:
+ errno = EAFNOSUPPORT;
+ return -1;
+ }
+}
+
+static int get_ip_local_port_range(int fd, __u32 *range)
+{
+ socklen_t len;
+ __u32 val;
+ int err;
+
+ len = sizeof(val);
+ err = getsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val, &len);
+ if (err)
+ return -1;
+
+ *range = val;
+ return 0;
+}
+
+FIXTURE(ip_local_port_range) {};
+
+FIXTURE_SETUP(ip_local_port_range)
+{
+}
+
+FIXTURE_TEARDOWN(ip_local_port_range)
+{
+}
+
+FIXTURE_VARIANT(ip_local_port_range) {
+ int so_domain;
+ int so_type;
+ int so_protocol;
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_tcp) {
+ .so_domain = AF_INET,
+ .so_type = SOCK_STREAM,
+ .so_protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_udp) {
+ .so_domain = AF_INET,
+ .so_type = SOCK_DGRAM,
+ .so_protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) {
+ .so_domain = AF_INET,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_SCTP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) {
+ .so_domain = AF_INET,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_MPTCP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) {
+ .so_domain = AF_INET6,
+ .so_type = SOCK_STREAM,
+ .so_protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_udp) {
+ .so_domain = AF_INET6,
+ .so_type = SOCK_DGRAM,
+ .so_protocol = 0,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) {
+ .so_domain = AF_INET6,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_SCTP,
+};
+
+FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) {
+ .so_domain = AF_INET6,
+ .so_type = SOCK_STREAM,
+ .so_protocol = IPPROTO_MPTCP,
+};
+
+TEST_F(ip_local_port_range, invalid_option_value)
+{
+ __u16 val16;
+ __u32 val32;
+ __u64 val64;
+ int fd, err;
+
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ /* Too few bytes */
+ val16 = 40000;
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val16, sizeof(val16));
+ EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Empty range: low port > high port */
+ val32 = pack_port_range(40222, 40111);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val32, sizeof(val32));
+ EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Too many bytes */
+ val64 = pack_port_range(40333, 40444);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &val64, sizeof(val64));
+ EXPECT_TRUE(err) TH_LOG("expected setsockopt(IP_LOCAL_PORT_RANGE) to fail");
+ EXPECT_EQ(errno, EINVAL);
+
+ err = close(fd);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_F(ip_local_port_range, port_range_out_of_netns_range)
+{
+ const struct test {
+ __u16 range_lo;
+ __u16 range_hi;
+ } tests[] = {
+ { 30000, 39999 }, /* socket range below netns range */
+ { 50000, 59999 }, /* socket range above netns range */
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ /* Bind a couple of sockets, not just one, to check
+ * that the range wasn't clamped to a single port from
+ * the netns range. That is [40000, 40000] or [49999,
+ * 49999], respectively for each test case.
+ */
+ int fds[2], i;
+
+ TH_LOG("lo %5hu, hi %5hu", t->range_lo, t->range_hi);
+
+ for (i = 0; i < ARRAY_SIZE(fds); i++) {
+ int fd, err, port;
+ __u32 range;
+
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("#%d: socket failed", i);
+
+ range = pack_port_range(t->range_lo, t->range_hi);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("#%d: setsockopt(IP_LOCAL_PORT_RANGE) failed", i);
+
+ err = bind_to_loopback_any_port(fd);
+ ASSERT_TRUE(!err) TH_LOG("#%d: bind failed", i);
+
+ /* Check that socket port range outside of ephemeral range is ignored */
+ port = get_sock_port(fd);
+ ASSERT_GE(port, 40000) TH_LOG("#%d: expected port within netns range", i);
+ ASSERT_LE(port, 49999) TH_LOG("#%d: expected port within netns range", i);
+
+ fds[i] = fd;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(fds); i++)
+ ASSERT_TRUE(close(fds[i]) == 0) TH_LOG("#%d: close failed", i);
+ }
+}
+
+TEST_F(ip_local_port_range, single_port_range)
+{
+ const struct test {
+ __u16 range_lo;
+ __u16 range_hi;
+ __u16 expected;
+ } tests[] = {
+ /* single port range within ephemeral range */
+ { 45000, 45000, 45000 },
+ /* first port in the ephemeral range (clamp from above) */
+ { 0, 40000, 40000 },
+ /* last port in the ephemeral range (clamp from below) */
+ { 49999, 0, 49999 },
+ };
+ const struct test *t;
+
+ for (t = tests; t < tests + ARRAY_SIZE(tests); t++) {
+ int fd, err, port;
+ __u32 range;
+
+ TH_LOG("lo %5hu, hi %5hu, expected %5hu",
+ t->range_lo, t->range_hi, t->expected);
+
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ range = pack_port_range(t->range_lo, t->range_hi);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ err = bind_to_loopback_any_port(fd);
+ ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+ port = get_sock_port(fd);
+ ASSERT_EQ(port, t->expected) TH_LOG("unexpected local port");
+
+ err = close(fd);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+ }
+}
+
+TEST_F(ip_local_port_range, exhaust_8_port_range)
+{
+ __u8 port_set = 0;
+ int i, fd, err;
+ __u32 range;
+ __u16 port;
+ int fds[8];
+
+ for (i = 0; i < ARRAY_SIZE(fds); i++) {
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ range = pack_port_range(40000, 40007);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ err = bind_to_loopback_any_port(fd);
+ ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+ port = get_sock_port(fd);
+ ASSERT_GE(port, 40000) TH_LOG("expected port within sockopt range");
+ ASSERT_LE(port, 40007) TH_LOG("expected port within sockopt range");
+
+ port_set |= 1 << (port - 40000);
+ fds[i] = fd;
+ }
+
+ /* Check that all every port from the test range is in use */
+ ASSERT_EQ(port_set, 0xff) TH_LOG("expected all ports to be busy");
+
+ /* Check that bind() fails because the whole range is busy */
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ range = pack_port_range(40000, 40007);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ err = bind_to_loopback_any_port(fd);
+ ASSERT_TRUE(err) TH_LOG("expected bind to fail");
+ ASSERT_EQ(errno, EADDRINUSE);
+
+ err = close(fd);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+
+ for (i = 0; i < ARRAY_SIZE(fds); i++) {
+ err = close(fds[i]);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+ }
+}
+
+TEST_F(ip_local_port_range, late_bind)
+{
+ union {
+ struct sockaddr sa;
+ struct sockaddr_in v4;
+ struct sockaddr_in6 v6;
+ } addr;
+ socklen_t addr_len;
+ const int one = 1;
+ int fd, err;
+ __u32 range;
+ __u16 port;
+
+ fd = socket(variant->so_domain, variant->so_type, 0);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ range = pack_port_range(40100, 40199);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ err = setsockopt(fd, SOL_IP, IP_BIND_ADDRESS_NO_PORT, &one, sizeof(one));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_BIND_ADDRESS_NO_PORT) failed");
+
+ err = bind_to_loopback_any_port(fd);
+ ASSERT_TRUE(!err) TH_LOG("bind failed");
+
+ port = get_sock_port(fd);
+ ASSERT_EQ(port, 0) TH_LOG("getsockname failed");
+
+ /* Invalid destination */
+ memset(&addr, 0, sizeof(addr));
+ switch (variant->so_domain) {
+ case AF_INET:
+ addr.v4.sin_family = AF_INET;
+ addr.v4.sin_port = htons(0);
+ addr.v4.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr_len = sizeof(addr.v4);
+ break;
+ case AF_INET6:
+ addr.v6.sin6_family = AF_INET6;
+ addr.v6.sin6_port = htons(0);
+ addr.v6.sin6_addr = in6addr_any;
+ addr_len = sizeof(addr.v6);
+ break;
+ default:
+ ASSERT_TRUE(false) TH_LOG("unsupported socket domain");
+ }
+
+ /* connect() doesn't need to succeed for late bind to happen */
+ connect(fd, &addr.sa, addr_len);
+
+ port = get_sock_port(fd);
+ ASSERT_GE(port, 40100);
+ ASSERT_LE(port, 40199);
+
+ err = close(fd);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind);
+XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind);
+
+TEST_F(ip_local_port_range, get_port_range)
+{
+ __u16 lo, hi;
+ __u32 range;
+ int fd, err;
+
+ fd = socket(variant->so_domain, variant->so_type, variant->so_protocol);
+ ASSERT_GE(fd, 0) TH_LOG("socket failed");
+
+ /* Get range before it will be set */
+ err = get_ip_local_port_range(fd, &range);
+ ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ unpack_port_range(range, &lo, &hi);
+ ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+ ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+ range = pack_port_range(12345, 54321);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ /* Get range after it has been set */
+ err = get_ip_local_port_range(fd, &range);
+ ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ unpack_port_range(range, &lo, &hi);
+ ASSERT_EQ(lo, 12345) TH_LOG("unexpected low port");
+ ASSERT_EQ(hi, 54321) TH_LOG("unexpected high port");
+
+ /* Unset the port range */
+ range = pack_port_range(0, 0);
+ err = setsockopt(fd, SOL_IP, IP_LOCAL_PORT_RANGE, &range, sizeof(range));
+ ASSERT_TRUE(!err) TH_LOG("setsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ /* Get range after it has been unset */
+ err = get_ip_local_port_range(fd, &range);
+ ASSERT_TRUE(!err) TH_LOG("getsockopt(IP_LOCAL_PORT_RANGE) failed");
+
+ unpack_port_range(range, &lo, &hi);
+ ASSERT_EQ(lo, 0) TH_LOG("unexpected low port");
+ ASSERT_EQ(hi, 0) TH_LOG("unexpected high port");
+
+ err = close(fd);
+ ASSERT_TRUE(!err) TH_LOG("close failed");
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/ip_local_port_range.sh b/tools/testing/selftests/net/ip_local_port_range.sh
new file mode 100755
index 000000000000..6c6ad346eaa0
--- /dev/null
+++ b/tools/testing/selftests/net/ip_local_port_range.sh
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv4.ip_local_port_range="40000 49999" && ./ip_local_port_range'
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
new file mode 100644
index 000000000000..be4a30a0d02a
--- /dev/null
+++ b/tools/testing/selftests/net/ipsec.c
@@ -0,0 +1,2341 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ipsec.c - Check xfrm on veth inside a net-ns.
+ * Copyright (c) 2018 Dmitry Safonov
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <asm/types.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/limits.h>
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <linux/xfrm.h>
+#include <netinet/in.h>
+#include <net/if.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define printk(fmt, ...) \
+ ksft_print_msg("%d[%u] " fmt "\n", getpid(), __LINE__, ##__VA_ARGS__)
+
+#define pr_err(fmt, ...) printk(fmt ": %m", ##__VA_ARGS__)
+
+#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))
+
+#define IPV4_STR_SZ 16 /* xxx.xxx.xxx.xxx is longest + \0 */
+#define MAX_PAYLOAD 2048
+#define XFRM_ALGO_KEY_BUF_SIZE 512
+#define MAX_PROCESSES (1 << 14) /* /16 mask divided by /30 subnets */
+#define INADDR_A ((in_addr_t) 0x0a000000) /* 10.0.0.0 */
+#define INADDR_B ((in_addr_t) 0xc0a80000) /* 192.168.0.0 */
+
+/* /30 mask for one veth connection */
+#define PREFIX_LEN 30
+#define child_ip(nr) (4*nr + 1)
+#define grchild_ip(nr) (4*nr + 2)
+
+#define VETH_FMT "ktst-%d"
+#define VETH_LEN 12
+
+#define XFRM_ALGO_NR_KEYS 29
+
+static int nsfd_parent = -1;
+static int nsfd_childa = -1;
+static int nsfd_childb = -1;
+static long page_size;
+
+/*
+ * ksft_cnt is static in kselftest, so isn't shared with children.
+ * We have to send a test result back to parent and count there.
+ * results_fd is a pipe with test feedback from children.
+ */
+static int results_fd[2];
+
+const unsigned int ping_delay_nsec = 50 * 1000 * 1000;
+const unsigned int ping_timeout = 300;
+const unsigned int ping_count = 100;
+const unsigned int ping_success = 80;
+
+struct xfrm_key_entry {
+ char algo_name[35];
+ int key_len;
+};
+
+struct xfrm_key_entry xfrm_key_entries[] = {
+ {"digest_null", 0},
+ {"ecb(cipher_null)", 0},
+ {"cbc(des)", 64},
+ {"hmac(md5)", 128},
+ {"cmac(aes)", 128},
+ {"xcbc(aes)", 128},
+ {"cbc(cast5)", 128},
+ {"cbc(serpent)", 128},
+ {"hmac(sha1)", 160},
+ {"hmac(rmd160)", 160},
+ {"cbc(des3_ede)", 192},
+ {"hmac(sha256)", 256},
+ {"cbc(aes)", 256},
+ {"cbc(camellia)", 256},
+ {"cbc(twofish)", 256},
+ {"rfc3686(ctr(aes))", 288},
+ {"hmac(sha384)", 384},
+ {"cbc(blowfish)", 448},
+ {"hmac(sha512)", 512},
+ {"rfc4106(gcm(aes))-128", 160},
+ {"rfc4543(gcm(aes))-128", 160},
+ {"rfc4309(ccm(aes))-128", 152},
+ {"rfc4106(gcm(aes))-192", 224},
+ {"rfc4543(gcm(aes))-192", 224},
+ {"rfc4309(ccm(aes))-192", 216},
+ {"rfc4106(gcm(aes))-256", 288},
+ {"rfc4543(gcm(aes))-256", 288},
+ {"rfc4309(ccm(aes))-256", 280},
+ {"rfc7539(chacha20,poly1305)-128", 0}
+};
+
+static void randomize_buffer(void *buf, size_t buflen)
+{
+ int *p = (int *)buf;
+ size_t words = buflen / sizeof(int);
+ size_t leftover = buflen % sizeof(int);
+
+ if (!buflen)
+ return;
+
+ while (words--)
+ *p++ = rand();
+
+ if (leftover) {
+ int tmp = rand();
+
+ memcpy(buf + buflen - leftover, &tmp, leftover);
+ }
+
+ return;
+}
+
+static int unshare_open(void)
+{
+ const char *netns_path = "/proc/self/ns/net";
+ int fd;
+
+ if (unshare(CLONE_NEWNET) != 0) {
+ pr_err("unshare()");
+ return -1;
+ }
+
+ fd = open(netns_path, O_RDONLY);
+ if (fd <= 0) {
+ pr_err("open(%s)", netns_path);
+ return -1;
+ }
+
+ return fd;
+}
+
+static int switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET)) {
+ pr_err("setns()");
+ return -1;
+ }
+ return 0;
+}
+
+/*
+ * Running the test inside a new parent net namespace to bother less
+ * about cleanup on error-path.
+ */
+static int init_namespaces(void)
+{
+ nsfd_parent = unshare_open();
+ if (nsfd_parent <= 0)
+ return -1;
+
+ nsfd_childa = unshare_open();
+ if (nsfd_childa <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+
+ nsfd_childb = unshare_open();
+ if (nsfd_childb <= 0)
+ return -1;
+
+ if (switch_ns(nsfd_parent))
+ return -1;
+ return 0;
+}
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+ if (*sock > 0) {
+ seq_nr++;
+ return 0;
+ }
+
+ *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+ if (*sock <= 0) {
+ pr_err("socket(AF_NETLINK)");
+ return -1;
+ }
+
+ randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+ return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+ return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+ struct rtattr *attr = rtattr_hdr(nh);
+ size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+ if (req_sz < nl_size) {
+ printk("req buf is too small: %zu < %zu", req_sz, nl_size);
+ return -1;
+ }
+ nh->nlmsg_len = nl_size;
+
+ attr->rta_len = RTA_LENGTH(size);
+ attr->rta_type = rta_type;
+ memcpy(RTA_DATA(attr), payload, size);
+
+ return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ struct rtattr *ret = rtattr_hdr(nh);
+
+ if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+ return 0;
+
+ return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type)
+{
+ return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+ attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+ const char *peer, int ns)
+{
+ struct ifinfomsg pi;
+ struct rtattr *peer_attr;
+
+ memset(&pi, 0, sizeof(pi));
+ pi.ifi_family = AF_UNSPEC;
+ pi.ifi_change = 0xFFFFFFFF;
+
+ peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+ if (!peer_attr)
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+ return -1;
+
+ rtattr_end(nh, peer_attr);
+
+ return 0;
+}
+
+static int netlink_check_answer(int sock)
+{
+ struct nlmsgerror {
+ struct nlmsghdr hdr;
+ int error;
+ struct nlmsghdr orig_msg;
+ } answer;
+
+ if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.hdr.nlmsg_type);
+ return -1;
+ } else if (answer.error) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return answer.error;
+ }
+
+ return 0;
+}
+
+static int veth_add(int sock, uint32_t seq, const char *peera, int ns_a,
+ const char *peerb, int ns_b)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ const char veth_type[] = "veth";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, peera, strlen(peera)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (veth_pack_peerb(&req.nh, sizeof(req), peerb, ns_b))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_addr_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr addr, uint8_t prefix)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifa_family = AF_INET;
+ req.info.ifa_prefixlen = prefix;
+ req.info.ifa_index = if_nametoindex(intf);
+
+#ifdef DEBUG
+ {
+ char addr_str[IPV4_STR_SZ] = {};
+
+ strncpy(addr_str, inet_ntoa(addr), IPV4_STR_SZ - 1);
+
+ printk("ip addr set %s", addr_str);
+ }
+#endif
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, sizeof(addr)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_ADDRESS, &addr, sizeof(addr)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int link_set_up(int sock, uint32_t seq, const char *intf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = if_nametoindex(intf);
+ req.info.ifi_flags = IFF_UP;
+ req.info.ifi_change = IFF_UP;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock);
+}
+
+static int ip4_route_set(int sock, uint32_t seq, const char *intf,
+ struct in_addr src, struct in_addr dst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ unsigned int index = if_nametoindex(intf);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ req.nh.nlmsg_seq = seq;
+ req.rt.rtm_family = AF_INET;
+ req.rt.rtm_dst_len = 32;
+ req.rt.rtm_table = RT_TABLE_MAIN;
+ req.rt.rtm_protocol = RTPROT_BOOT;
+ req.rt.rtm_scope = RT_SCOPE_LINK;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, sizeof(dst)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, sizeof(src)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(sock);
+}
+
+static int tunnel_set_route(int route_sock, uint32_t *route_seq, char *veth,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (ip4_addr_set(route_sock, (*route_seq)++, "lo",
+ tunsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ return -1;
+ }
+
+ if (ip4_route_set(route_sock, (*route_seq)++, veth, tunsrc, tundst)) {
+ printk("Failed to set ipv4 route");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int init_child(int nsfd, char *veth, unsigned int src, unsigned int dst)
+{
+ struct in_addr intsrc = inet_makeaddr(INADDR_B, src);
+ struct in_addr tunsrc = inet_makeaddr(INADDR_A, src);
+ struct in_addr tundst = inet_makeaddr(INADDR_A, dst);
+ int route_sock = -1, ret = -1;
+ uint32_t route_seq;
+
+ if (switch_ns(nsfd))
+ return -1;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) {
+ printk("Failed to open netlink route socket in child");
+ return -1;
+ }
+
+ if (ip4_addr_set(route_sock, route_seq++, veth, intsrc, PREFIX_LEN)) {
+ printk("Failed to set ipv4 addr");
+ goto err;
+ }
+
+ if (link_set_up(route_sock, route_seq++, veth)) {
+ printk("Failed to bring up %s", veth);
+ goto err;
+ }
+
+ if (tunnel_set_route(route_sock, &route_seq, veth, tunsrc, tundst)) {
+ printk("Failed to add tunnel route on %s", veth);
+ goto err;
+ }
+ ret = 0;
+
+err:
+ close(route_sock);
+ return ret;
+}
+
+#define ALGO_LEN 64
+enum desc_type {
+ CREATE_TUNNEL = 0,
+ ALLOCATE_SPI,
+ MONITOR_ACQUIRE,
+ EXPIRE_STATE,
+ EXPIRE_POLICY,
+ SPDINFO_ATTRS,
+};
+const char *desc_name[] = {
+ "create tunnel",
+ "alloc spi",
+ "monitor acquire",
+ "expire state",
+ "expire policy",
+ "spdinfo attributes",
+ ""
+};
+struct xfrm_desc {
+ enum desc_type type;
+ uint8_t proto;
+ char a_algo[ALGO_LEN];
+ char e_algo[ALGO_LEN];
+ char c_algo[ALGO_LEN];
+ char ae_algo[ALGO_LEN];
+ unsigned int icv_len;
+ /* unsigned key_len; */
+};
+
+enum msg_type {
+ MSG_ACK = 0,
+ MSG_EXIT,
+ MSG_PING,
+ MSG_XFRM_PREPARE,
+ MSG_XFRM_ADD,
+ MSG_XFRM_DEL,
+ MSG_XFRM_CLEANUP,
+};
+
+struct test_desc {
+ enum msg_type type;
+ union {
+ struct {
+ in_addr_t reply_ip;
+ unsigned int port;
+ } ping;
+ struct xfrm_desc xfrm_desc;
+ } body;
+};
+
+struct test_result {
+ struct xfrm_desc desc;
+ unsigned int res;
+};
+
+static void write_test_result(unsigned int res, struct xfrm_desc *d)
+{
+ struct test_result tr = {};
+ ssize_t ret;
+
+ tr.desc = *d;
+ tr.res = res;
+
+ ret = write(results_fd[1], &tr, sizeof(tr));
+ if (ret != sizeof(tr))
+ pr_err("Failed to write the result in pipe %zd", ret);
+}
+
+static void write_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = write(fd, msg, sizeof(*msg));
+
+ /* Make sure that write/read is atomic to a pipe */
+ BUILD_BUG_ON(sizeof(struct test_desc) > PIPE_BUF);
+
+ if (bytes < 0) {
+ pr_err("write()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("sent part of the message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static void read_msg(int fd, struct test_desc *msg, bool exit_of_fail)
+{
+ ssize_t bytes = read(fd, msg, sizeof(*msg));
+
+ if (bytes < 0) {
+ pr_err("read()");
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+ if (bytes != sizeof(*msg)) {
+ pr_err("got incomplete message %zd/%zu", bytes, sizeof(*msg));
+ if (exit_of_fail)
+ exit(KSFT_FAIL);
+ }
+}
+
+static int udp_ping_init(struct in_addr listen_ip, unsigned int u_timeout,
+ unsigned int *server_port, int sock[2])
+{
+ struct sockaddr_in server;
+ struct timeval t = { .tv_sec = 0, .tv_usec = u_timeout };
+ socklen_t s_len = sizeof(server);
+
+ sock[0] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[0] < 0) {
+ pr_err("socket()");
+ return -1;
+ }
+
+ server.sin_family = AF_INET;
+ server.sin_port = 0;
+ memcpy(&server.sin_addr.s_addr, &listen_ip, sizeof(struct in_addr));
+
+ if (bind(sock[0], (struct sockaddr *)&server, s_len)) {
+ pr_err("bind()");
+ goto err_close_server;
+ }
+
+ if (getsockname(sock[0], (struct sockaddr *)&server, &s_len)) {
+ pr_err("getsockname()");
+ goto err_close_server;
+ }
+
+ *server_port = ntohs(server.sin_port);
+
+ if (setsockopt(sock[0], SOL_SOCKET, SO_RCVTIMEO, (const char *)&t, sizeof t)) {
+ pr_err("setsockopt()");
+ goto err_close_server;
+ }
+
+ sock[1] = socket(AF_INET, SOCK_DGRAM, 0);
+ if (sock[1] < 0) {
+ pr_err("socket()");
+ goto err_close_server;
+ }
+
+ return 0;
+
+err_close_server:
+ close(sock[0]);
+ return -1;
+}
+
+static int udp_ping_send(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ } else if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ } else if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int udp_ping_reply(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len)
+{
+ struct sockaddr_in server;
+ const struct sockaddr *dest_addr = (struct sockaddr *)&server;
+ char *sock_buf[buf_len];
+ ssize_t r_bytes, s_bytes;
+
+ server.sin_family = AF_INET;
+ server.sin_port = htons(port);
+ server.sin_addr.s_addr = dest_ip;
+
+ r_bytes = recv(sock[0], sock_buf, buf_len, 0);
+ if (r_bytes < 0) {
+ if (errno != EAGAIN)
+ pr_err("recv()");
+ return -1;
+ }
+ if (r_bytes == 0) { /* EOF */
+ printk("EOF on reply to ping");
+ return -1;
+ }
+ if (r_bytes != buf_len || memcmp(buf, sock_buf, buf_len)) {
+ printk("ping reply packet is corrupted %zd/%zu", r_bytes, buf_len);
+ return -1;
+ }
+
+ s_bytes = sendto(sock[1], buf, buf_len, 0, dest_addr, sizeof(server));
+ if (s_bytes < 0) {
+ pr_err("sendto()");
+ return -1;
+ } else if (s_bytes != buf_len) {
+ printk("send part of the message: %zd/%zu", s_bytes, sizeof(server));
+ return -1;
+ }
+
+ return 0;
+}
+
+typedef int (*ping_f)(int sock[2], in_addr_t dest_ip, unsigned int port,
+ char *buf, size_t buf_len);
+static int do_ping(int cmd_fd, char *buf, size_t buf_len, struct in_addr from,
+ bool init_side, int d_port, in_addr_t to, ping_f func)
+{
+ struct test_desc msg;
+ unsigned int s_port, i, ping_succeeded = 0;
+ int ping_sock[2];
+ char to_str[IPV4_STR_SZ] = {}, from_str[IPV4_STR_SZ] = {};
+
+ if (udp_ping_init(from, ping_timeout, &s_port, ping_sock)) {
+ printk("Failed to init ping");
+ return -1;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_PING;
+ msg.body.ping.port = s_port;
+ memcpy(&msg.body.ping.reply_ip, &from, sizeof(from));
+
+ write_msg(cmd_fd, &msg, 0);
+ if (init_side) {
+ /* The other end sends ip to ping */
+ read_msg(cmd_fd, &msg, 0);
+ if (msg.type != MSG_PING)
+ return -1;
+ to = msg.body.ping.reply_ip;
+ d_port = msg.body.ping.port;
+ }
+
+ for (i = 0; i < ping_count ; i++) {
+ struct timespec sleep_time = {
+ .tv_sec = 0,
+ .tv_nsec = ping_delay_nsec,
+ };
+
+ ping_succeeded += !func(ping_sock, to, d_port, buf, page_size);
+ nanosleep(&sleep_time, 0);
+ }
+
+ close(ping_sock[0]);
+ close(ping_sock[1]);
+
+ strncpy(to_str, inet_ntoa(*(struct in_addr *)&to), IPV4_STR_SZ - 1);
+ strncpy(from_str, inet_ntoa(from), IPV4_STR_SZ - 1);
+
+ if (ping_succeeded < ping_success) {
+ printk("ping (%s) %s->%s failed %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_count - ping_succeeded, ping_count);
+ return -1;
+ }
+
+#ifdef DEBUG
+ printk("ping (%s) %s->%s succeeded %u/%u times",
+ init_side ? "send" : "reply", from_str, to_str,
+ ping_succeeded, ping_count);
+#endif
+
+ return 0;
+}
+
+static int xfrm_fill_key(char *name, char *buf,
+ size_t buf_len, unsigned int *key_len)
+{
+ int i;
+
+ for (i = 0; i < XFRM_ALGO_NR_KEYS; i++) {
+ if (strncmp(name, xfrm_key_entries[i].algo_name, ALGO_LEN) == 0)
+ *key_len = xfrm_key_entries[i].key_len;
+ }
+
+ if (*key_len > buf_len) {
+ printk("Can't pack a key - too big for buffer");
+ return -1;
+ }
+
+ randomize_buffer(buf, *key_len);
+
+ return 0;
+}
+
+static int xfrm_state_pack_algo(struct nlmsghdr *nh, size_t req_sz,
+ struct xfrm_desc *desc)
+{
+ struct {
+ union {
+ struct xfrm_algo alg;
+ struct xfrm_algo_aead aead;
+ struct xfrm_algo_auth auth;
+ } u;
+ char buf[XFRM_ALGO_KEY_BUF_SIZE];
+ } alg = {};
+ size_t alen, elen, clen, aelen;
+ unsigned short type;
+
+ alen = strlen(desc->a_algo);
+ elen = strlen(desc->e_algo);
+ clen = strlen(desc->c_algo);
+ aelen = strlen(desc->ae_algo);
+
+ /* Verify desc */
+ switch (desc->proto) {
+ case IPPROTO_AH:
+ if (!alen || elen || clen || aelen) {
+ printk("BUG: buggy ah desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AUTH;
+ break;
+ case IPPROTO_COMP:
+ if (!clen || elen || alen || aelen) {
+ printk("BUG: buggy comp desc");
+ return -1;
+ }
+ strncpy(alg.u.alg.alg_name, desc->c_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->c_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_COMP;
+ break;
+ case IPPROTO_ESP:
+ if (!((alen && elen) ^ aelen) || clen) {
+ printk("BUG: buggy esp desc");
+ return -1;
+ }
+ if (aelen) {
+ alg.u.aead.alg_icv_len = desc->icv_len;
+ strncpy(alg.u.aead.alg_name, desc->ae_algo, ALGO_LEN - 1);
+ if (xfrm_fill_key(desc->ae_algo, alg.u.aead.alg_key,
+ sizeof(alg.buf), &alg.u.aead.alg_key_len))
+ return -1;
+ type = XFRMA_ALG_AEAD;
+ } else {
+
+ strncpy(alg.u.alg.alg_name, desc->e_algo, ALGO_LEN - 1);
+ type = XFRMA_ALG_CRYPT;
+ if (xfrm_fill_key(desc->e_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ strncpy(alg.u.alg.alg_name, desc->a_algo, ALGO_LEN);
+ type = XFRMA_ALG_AUTH;
+ if (xfrm_fill_key(desc->a_algo, alg.u.alg.alg_key,
+ sizeof(alg.buf), &alg.u.alg.alg_key_len))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: unknown proto in desc");
+ return -1;
+ }
+
+ if (rtattr_pack(nh, req_sz, type, &alg, sizeof(alg)))
+ return -1;
+
+ return 0;
+}
+
+static inline uint32_t gen_spi(struct in_addr src)
+{
+ return htonl(inet_lnaof(src));
+}
+
+static int xfrm_state_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(dst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(src));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill id */
+ memcpy(&req.info.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ req.info.id.spi = spi;
+ req.info.id.proto = desc->proto;
+
+ memcpy(&req.info.saddr, &src, sizeof(src));
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.family = AF_INET;
+ req.info.mode = XFRM_MODE_TUNNEL;
+
+ if (xfrm_state_pack_algo(&req.nh, sizeof(req), desc))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static bool xfrm_usersa_found(struct xfrm_usersa_info *info, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ if (memcmp(&info->sel.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->sel.saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->sel.family != AF_INET ||
+ info->sel.prefixlen_d != PREFIX_LEN ||
+ info->sel.prefixlen_s != PREFIX_LEN)
+ return false;
+
+ if (info->id.spi != spi || info->id.proto != desc->proto)
+ return false;
+
+ if (memcmp(&info->id.daddr, &dst, sizeof(dst)))
+ return false;
+
+ if (memcmp(&info->saddr, &src, sizeof(src)))
+ return false;
+
+ if (info->lft.soft_byte_limit != XFRM_INF ||
+ info->lft.hard_byte_limit != XFRM_INF ||
+ info->lft.soft_packet_limit != XFRM_INF ||
+ info->lft.hard_packet_limit != XFRM_INF)
+ return false;
+
+ if (info->family != AF_INET || info->mode != XFRM_MODE_TUNNEL)
+ return false;
+
+ /* XXX: check xfrm algo, see xfrm_state_pack_algo(). */
+
+ return true;
+}
+
+static int xfrm_state_check(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst,
+ struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } answer;
+ struct xfrm_address_filter filter = {};
+ bool found = false;
+
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(0);
+ req.nh.nlmsg_type = XFRM_MSG_GETSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ req.nh.nlmsg_seq = seq;
+
+ /*
+ * Add dump filter by source address as there may be other tunnels
+ * in this netns (if tests run in parallel).
+ */
+ filter.family = AF_INET;
+ filter.splen = 0x1f; /* 0xffffffff mask see addr_match() */
+ memcpy(&filter.saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_ADDRESS_FILTER,
+ &filter, sizeof(filter)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ while (1) {
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ }
+ if (answer.nh.nlmsg_type == NLMSG_ERROR) {
+ printk("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ return -1;
+ } else if (answer.nh.nlmsg_type == NLMSG_DONE) {
+ if (found)
+ return 0;
+ printk("didn't find allocated xfrm state in dump");
+ return -1;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ if (xfrm_usersa_found(&answer.info, spi, src, dst, desc))
+ found = true;
+ }
+ }
+}
+
+static int xfrm_set(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst,
+ struct xfrm_desc *desc)
+{
+ int err;
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ err = xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to add xfrm state");
+ return -1;
+ }
+
+ /* Check dumps for XFRM_MSG_GETSA */
+ err = xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc);
+ err |= xfrm_state_check(xfrm_sock, (*seq)++, gen_spi(src), dst, src, desc);
+ if (err) {
+ printk("Failed to check xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_add(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_info info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl tmpl;
+
+ memset(&req, 0, sizeof(req));
+ memset(&tmpl, 0, sizeof(tmpl));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = XFRM_MSG_NEWPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill selector. */
+ memcpy(&req.info.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.info.sel.saddr, &src, sizeof(tunsrc));
+ req.info.sel.family = AF_INET;
+ req.info.sel.prefixlen_d = PREFIX_LEN;
+ req.info.sel.prefixlen_s = PREFIX_LEN;
+
+ /* Fill lifteme_cfg */
+ req.info.lft.soft_byte_limit = XFRM_INF;
+ req.info.lft.hard_byte_limit = XFRM_INF;
+ req.info.lft.soft_packet_limit = XFRM_INF;
+ req.info.lft.hard_packet_limit = XFRM_INF;
+
+ req.info.dir = dir;
+
+ /* Fill tmpl */
+ memcpy(&tmpl.id.daddr, &dst, sizeof(dst));
+ /* Note: zero-spi cannot be deleted */
+ tmpl.id.spi = spi;
+ tmpl.id.proto = proto;
+ tmpl.family = AF_INET;
+ memcpy(&tmpl.saddr, &src, sizeof(src));
+ tmpl.mode = XFRM_MODE_TUNNEL;
+ tmpl.aalgos = (~(uint32_t)0);
+ tmpl.ealgos = (~(uint32_t)0);
+ tmpl.calgos = (~(uint32_t)0);
+
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &tmpl, sizeof(tmpl)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_prepare(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst, proto)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_policy_del(int xfrm_sock, uint32_t seq,
+ struct in_addr src, struct in_addr dst, uint8_t dir,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userpolicy_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELPOLICY;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ /* Fill id */
+ memcpy(&req.id.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.id.sel.saddr, &src, sizeof(tunsrc));
+ req.id.sel.family = AF_INET;
+ req.id.sel.prefixlen_d = PREFIX_LEN;
+ req.id.sel.prefixlen_s = PREFIX_LEN;
+ req.id.dir = dir;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_cleanup(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst)
+{
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ if (xfrm_policy_del(xfrm_sock, (*seq)++, dst, src,
+ XFRM_POLICY_IN, tunsrc, tundst)) {
+ printk("Failed to add xfrm policy");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_del(int xfrm_sock, uint32_t seq, uint32_t spi,
+ struct in_addr src, struct in_addr dst, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_usersa_id id;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ xfrm_address_t saddr = {};
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.id));
+ req.nh.nlmsg_type = XFRM_MSG_DELSA;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+
+ memcpy(&req.id.daddr, &dst, sizeof(dst));
+ req.id.family = AF_INET;
+ req.id.proto = proto;
+ /* Note: zero-spi cannot be deleted */
+ req.id.spi = spi;
+
+ memcpy(&saddr, &src, sizeof(src));
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SRCADDR, &saddr, sizeof(saddr)))
+ return -1;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(xfrm_sock);
+}
+
+static int xfrm_delete(int xfrm_sock, uint32_t *seq,
+ struct in_addr src, struct in_addr dst,
+ struct in_addr tunsrc, struct in_addr tundst, uint8_t proto)
+{
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), src, dst, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ if (xfrm_state_del(xfrm_sock, (*seq)++, gen_spi(src), dst, src, proto)) {
+ printk("Failed to remove xfrm state");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_state_allocspi(int xfrm_sock, uint32_t *seq,
+ uint32_t spi, uint8_t proto)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct xfrm_userspi_info spi;
+ } req;
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_usersa_info info;
+ int error;
+ };
+ } answer;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.spi));
+ req.nh.nlmsg_type = XFRM_MSG_ALLOCSPI;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.spi.info.family = AF_INET;
+ req.spi.min = spi;
+ req.spi.max = spi;
+ req.spi.info.id.proto = proto;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &answer, sizeof(answer), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (answer.nh.nlmsg_type == XFRM_MSG_NEWSA) {
+ uint32_t new_spi = htonl(answer.info.id.spi);
+
+ if (new_spi != spi) {
+ printk("allocated spi is different from requested: %#x != %#x",
+ new_spi, spi);
+ return KSFT_FAIL;
+ }
+ return KSFT_PASS;
+ } else if (answer.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)answer.nh.nlmsg_type);
+ return KSFT_FAIL;
+ }
+
+ printk("NLMSG_ERROR: %d: %s", answer.error, strerror(-answer.error));
+ return (answer.error) ? KSFT_FAIL : KSFT_PASS;
+}
+
+static int netlink_sock_bind(int *sock, uint32_t *seq, int proto, uint32_t groups)
+{
+ struct sockaddr_nl snl = {};
+ socklen_t addr_len;
+ int ret = -1;
+
+ snl.nl_family = AF_NETLINK;
+ snl.nl_groups = groups;
+
+ if (netlink_sock(sock, seq, proto)) {
+ printk("Failed to open xfrm netlink socket");
+ return -1;
+ }
+
+ if (bind(*sock, (struct sockaddr *)&snl, sizeof(snl)) < 0) {
+ pr_err("bind()");
+ goto out_close;
+ }
+
+ addr_len = sizeof(snl);
+ if (getsockname(*sock, (struct sockaddr *)&snl, &addr_len) < 0) {
+ pr_err("getsockname()");
+ goto out_close;
+ }
+ if (addr_len != sizeof(snl)) {
+ printk("Wrong address length %d", addr_len);
+ goto out_close;
+ }
+ if (snl.nl_family != AF_NETLINK) {
+ printk("Wrong address family %d", snl.nl_family);
+ goto out_close;
+ }
+ return 0;
+
+out_close:
+ close(*sock);
+ return ret;
+}
+
+static int xfrm_monitor_acquire(int xfrm_sock, uint32_t *seq, unsigned int nr)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_acquire acq;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrm_user_tmpl xfrm_tmpl = {};
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_ACQUIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.acq));
+ req.nh.nlmsg_type = XFRM_MSG_ACQUIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ req.acq.policy.sel.family = AF_INET;
+ req.acq.aalgos = 0xfeed;
+ req.acq.ealgos = 0xbaad;
+ req.acq.calgos = 0xbabe;
+
+ xfrm_tmpl.family = AF_INET;
+ xfrm_tmpl.id.proto = IPPROTO_ESP;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_TMPL, &xfrm_tmpl, sizeof(xfrm_tmpl)))
+ goto out_close;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.acq.aalgos != 0xfeed || req.acq.ealgos != 0xbaad
+ || req.acq.calgos != 0xbabe) {
+ printk("xfrm_user_acquire has changed %x %x %x",
+ req.acq.aalgos, req.acq.ealgos, req.acq.calgos);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_state(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_expire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+
+ if (xfrm_state_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst, desc)) {
+ printk("Failed to add xfrm state");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_EXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ memcpy(&req.expire.state.id.daddr, &dst, sizeof(dst));
+ req.expire.state.id.spi = gen_spi(src);
+ req.expire.state.id.proto = desc->proto;
+ req.expire.state.family = AF_INET;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_expire_policy(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, struct xfrm_desc *desc)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ struct xfrm_user_polexpire expire;
+ int error;
+ };
+ } req;
+ struct in_addr src, dst, tunsrc, tundst;
+ int xfrm_listen = -1, ret = KSFT_FAIL;
+ uint32_t seq_listen;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ if (xfrm_policy_add(xfrm_sock, (*seq)++, gen_spi(src), src, dst,
+ XFRM_POLICY_OUT, tunsrc, tundst, desc->proto)) {
+ printk("Failed to add xfrm policy");
+ return KSFT_FAIL;
+ }
+
+ if (netlink_sock_bind(&xfrm_listen, &seq_listen, NETLINK_XFRM, XFRMNLGRP_EXPIRE))
+ return KSFT_FAIL;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.expire));
+ req.nh.nlmsg_type = XFRM_MSG_POLEXPIRE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ /* Fill selector. */
+ memcpy(&req.expire.pol.sel.daddr, &dst, sizeof(tundst));
+ memcpy(&req.expire.pol.sel.saddr, &src, sizeof(tunsrc));
+ req.expire.pol.sel.family = AF_INET;
+ req.expire.pol.sel.prefixlen_d = PREFIX_LEN;
+ req.expire.pol.sel.prefixlen_s = PREFIX_LEN;
+ req.expire.pol.dir = XFRM_POLICY_OUT;
+ req.expire.hard = 0xff;
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ goto out_close;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ goto out_close;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ ret = req.error;
+ goto out_close;
+ }
+
+ if (recv(xfrm_listen, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ goto out_close;
+ }
+
+ if (req.expire.hard != 0x1) {
+ printk("expire.hard is not set: %x", req.expire.hard);
+ goto out_close;
+ }
+
+ ret = KSFT_PASS;
+out_close:
+ close(xfrm_listen);
+ return ret;
+}
+
+static int xfrm_spdinfo_set_thresh(int xfrm_sock, uint32_t *seq,
+ unsigned thresh4_l, unsigned thresh4_r,
+ unsigned thresh6_l, unsigned thresh6_r,
+ bool add_bad_attr)
+
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ struct xfrmu_spdhthresh thresh;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_NEWSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = (*seq)++;
+
+ thresh.lbits = thresh4_l;
+ thresh.rbits = thresh4_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV4_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ thresh.lbits = thresh6_l;
+ thresh.rbits = thresh6_r;
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_SPD_IPV6_HTHRESH, &thresh, sizeof(thresh)))
+ return -1;
+
+ if (add_bad_attr) {
+ BUILD_BUG_ON(XFRMA_IF_ID <= XFRMA_SPD_MAX + 1);
+ if (rtattr_pack(&req.nh, sizeof(req), XFRMA_IF_ID, NULL, 0)) {
+ pr_err("adding attribute failed: no space");
+ return -1;
+ }
+ }
+
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return -1;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return -1;
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return -1;
+ }
+
+ if (req.error) {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ return 0;
+}
+
+static int xfrm_spdinfo_attrs(int xfrm_sock, uint32_t *seq)
+{
+ struct {
+ struct nlmsghdr nh;
+ union {
+ uint32_t unused;
+ int error;
+ };
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 31, 120, 16, false)) {
+ pr_err("Can't set SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ memset(&req, 0, sizeof(req));
+
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.unused));
+ req.nh.nlmsg_type = XFRM_MSG_GETSPDINFO;
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_seq = (*seq)++;
+ if (send(xfrm_sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ pr_err("send()");
+ return KSFT_FAIL;
+ }
+
+ if (recv(xfrm_sock, &req, sizeof(req), 0) < 0) {
+ pr_err("recv()");
+ return KSFT_FAIL;
+ } else if (req.nh.nlmsg_type == XFRM_MSG_NEWSPDINFO) {
+ size_t len = NLMSG_PAYLOAD(&req.nh, sizeof(req.unused));
+ struct rtattr *attr = (void *)req.attrbuf;
+ int got_thresh = 0;
+
+ for (; RTA_OK(attr, len); attr = RTA_NEXT(attr, len)) {
+ if (attr->rta_type == XFRMA_SPD_IPV4_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 32 || t->rbits != 31) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ if (attr->rta_type == XFRMA_SPD_IPV6_HTHRESH) {
+ struct xfrmu_spdhthresh *t = RTA_DATA(attr);
+
+ got_thresh++;
+ if (t->lbits != 120 || t->rbits != 16) {
+ pr_err("thresh differ: %u, %u",
+ t->lbits, t->rbits);
+ return KSFT_FAIL;
+ }
+ }
+ }
+ if (got_thresh != 2) {
+ pr_err("only %d thresh returned by XFRM_MSG_GETSPDINFO", got_thresh);
+ return KSFT_FAIL;
+ }
+ } else if (req.nh.nlmsg_type != NLMSG_ERROR) {
+ printk("expected NLMSG_ERROR, got %d", (int)req.nh.nlmsg_type);
+ return KSFT_FAIL;
+ } else {
+ printk("NLMSG_ERROR: %d: %s", req.error, strerror(-req.error));
+ return -1;
+ }
+
+ /* Restore the default */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, false)) {
+ pr_err("Can't restore SPD HTHRESH");
+ return KSFT_FAIL;
+ }
+
+ /*
+ * At this moment xfrm uses nlmsg_parse_deprecated(), which
+ * implies NL_VALIDATE_LIBERAL - ignoring attributes with
+ * (type > maxtype). nla_parse_depricated_strict() would enforce
+ * it. Or even stricter nla_parse().
+ * Right now it's not expected to fail, but to be ignored.
+ */
+ if (xfrm_spdinfo_set_thresh(xfrm_sock, seq, 32, 32, 128, 128, true))
+ return KSFT_PASS;
+
+ return KSFT_PASS;
+}
+
+static int child_serv(int xfrm_sock, uint32_t *seq,
+ unsigned int nr, int cmd_fd, void *buf, struct xfrm_desc *desc)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ struct test_desc msg;
+ int ret = KSFT_FAIL;
+
+ src = inet_makeaddr(INADDR_B, child_ip(nr));
+ dst = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, child_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, grchild_ip(nr));
+
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, src, true, 0, 0, udp_ping_send)) {
+ printk("ping failed before setting xfrm");
+ return KSFT_FAIL;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_PREPARE;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed to prepare xfrm");
+ goto cleanup;
+ }
+
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_ADD;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ printk("failed to set xfrm");
+ goto delete;
+ }
+
+ /* UDP pinging with xfrm tunnel */
+ if (do_ping(cmd_fd, buf, page_size, tunsrc,
+ true, 0, 0, udp_ping_send)) {
+ printk("ping failed for xfrm");
+ goto delete;
+ }
+
+ ret = KSFT_PASS;
+delete:
+ /* xfrm delete */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_DEL;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst, desc->proto)) {
+ printk("failed ping to remove xfrm");
+ ret = KSFT_FAIL;
+ }
+
+cleanup:
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_XFRM_CLEANUP;
+ memcpy(&msg.body.xfrm_desc, desc, sizeof(*desc));
+ write_msg(cmd_fd, &msg, 1);
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed ping to cleanup xfrm");
+ ret = KSFT_FAIL;
+ }
+ return ret;
+}
+
+static int child_f(unsigned int nr, int test_desc_fd, int cmd_fd, void *buf)
+{
+ struct xfrm_desc desc;
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childa))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ /* Check that seq sock is ready, just for sure. */
+ memset(&msg, 0, sizeof(msg));
+ msg.type = MSG_ACK;
+ write_msg(cmd_fd, &msg, 1);
+ read_msg(cmd_fd, &msg, 1);
+ if (msg.type != MSG_ACK) {
+ printk("Ack failed");
+ exit(KSFT_FAIL);
+ }
+
+ for (;;) {
+ ssize_t received = read(test_desc_fd, &desc, sizeof(desc));
+ int ret;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(desc)) {
+ pr_err("read() returned %zd", received);
+ exit(KSFT_FAIL);
+ }
+
+ switch (desc.type) {
+ case CREATE_TUNNEL:
+ ret = child_serv(xfrm_sock, &seq, nr,
+ cmd_fd, buf, &desc);
+ break;
+ case ALLOCATE_SPI:
+ ret = xfrm_state_allocspi(xfrm_sock, &seq,
+ -1, desc.proto);
+ break;
+ case MONITOR_ACQUIRE:
+ ret = xfrm_monitor_acquire(xfrm_sock, &seq, nr);
+ break;
+ case EXPIRE_STATE:
+ ret = xfrm_expire_state(xfrm_sock, &seq, nr, &desc);
+ break;
+ case EXPIRE_POLICY:
+ ret = xfrm_expire_policy(xfrm_sock, &seq, nr, &desc);
+ break;
+ case SPDINFO_ATTRS:
+ ret = xfrm_spdinfo_attrs(xfrm_sock, &seq);
+ break;
+ default:
+ printk("Unknown desc type %d", desc.type);
+ exit(KSFT_FAIL);
+ }
+ write_test_result(ret, &desc);
+ }
+
+ close(xfrm_sock);
+
+ msg.type = MSG_EXIT;
+ write_msg(cmd_fd, &msg, 1);
+ exit(KSFT_PASS);
+}
+
+static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
+ struct test_desc *msg, int xfrm_sock, uint32_t *seq)
+{
+ struct in_addr src, dst, tunsrc, tundst;
+ bool tun_reply;
+ struct xfrm_desc *desc = &msg->body.xfrm_desc;
+
+ src = inet_makeaddr(INADDR_B, grchild_ip(nr));
+ dst = inet_makeaddr(INADDR_B, child_ip(nr));
+ tunsrc = inet_makeaddr(INADDR_A, grchild_ip(nr));
+ tundst = inet_makeaddr(INADDR_A, child_ip(nr));
+
+ switch (msg->type) {
+ case MSG_EXIT:
+ exit(KSFT_PASS);
+ case MSG_ACK:
+ write_msg(cmd_fd, msg, 1);
+ break;
+ case MSG_PING:
+ tun_reply = memcmp(&dst, &msg->body.ping.reply_ip, sizeof(in_addr_t));
+ /* UDP pinging without xfrm */
+ if (do_ping(cmd_fd, buf, page_size, tun_reply ? tunsrc : src,
+ false, msg->body.ping.port,
+ msg->body.ping.reply_ip, udp_ping_reply)) {
+ printk("ping failed before setting xfrm");
+ }
+ break;
+ case MSG_XFRM_PREPARE:
+ if (xfrm_prepare(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to prepare xfrm");
+ }
+ break;
+ case MSG_XFRM_ADD:
+ if (xfrm_set(xfrm_sock, seq, src, dst, tunsrc, tundst, desc)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to set xfrm");
+ }
+ break;
+ case MSG_XFRM_DEL:
+ if (xfrm_delete(xfrm_sock, seq, src, dst, tunsrc, tundst,
+ desc->proto)) {
+ xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst);
+ printk("failed to remove xfrm");
+ }
+ break;
+ case MSG_XFRM_CLEANUP:
+ if (xfrm_cleanup(xfrm_sock, seq, src, dst, tunsrc, tundst)) {
+ printk("failed to cleanup xfrm");
+ }
+ break;
+ default:
+ printk("got unknown msg type %d", msg->type);
+ }
+}
+
+static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
+{
+ struct test_desc msg;
+ int xfrm_sock = -1;
+ uint32_t seq;
+
+ if (switch_ns(nsfd_childb))
+ exit(KSFT_FAIL);
+
+ if (netlink_sock(&xfrm_sock, &seq, NETLINK_XFRM)) {
+ printk("Failed to open xfrm netlink socket");
+ exit(KSFT_FAIL);
+ }
+
+ do {
+ read_msg(cmd_fd, &msg, 1);
+ grand_child_serv(nr, cmd_fd, buf, &msg, xfrm_sock, &seq);
+ } while (1);
+
+ close(xfrm_sock);
+ exit(KSFT_FAIL);
+}
+
+static int start_child(unsigned int nr, char *veth, int test_desc_fd[2])
+{
+ int cmd_sock[2];
+ void *data_map;
+ pid_t child;
+
+ if (init_child(nsfd_childa, veth, child_ip(nr), grchild_ip(nr)))
+ return -1;
+
+ if (init_child(nsfd_childb, veth, grchild_ip(nr), child_ip(nr)))
+ return -1;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ /* in parent - selftest */
+ return switch_ns(nsfd_parent);
+ }
+
+ if (close(test_desc_fd[1])) {
+ pr_err("close()");
+ return -1;
+ }
+
+ /* child */
+ data_map = mmap(0, page_size, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ if (data_map == MAP_FAILED) {
+ pr_err("mmap()");
+ return -1;
+ }
+
+ randomize_buffer(data_map, page_size);
+
+ if (socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, cmd_sock)) {
+ pr_err("socketpair()");
+ return -1;
+ }
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ } else if (child) {
+ if (close(cmd_sock[0])) {
+ pr_err("close()");
+ return -1;
+ }
+ return child_f(nr, test_desc_fd[0], cmd_sock[1], data_map);
+ }
+ if (close(cmd_sock[1])) {
+ pr_err("close()");
+ return -1;
+ }
+ return grand_child_f(nr, cmd_sock[0], data_map);
+}
+
+static void exit_usage(char **argv)
+{
+ printk("Usage: %s [nr_process]", argv[0]);
+ exit(KSFT_FAIL);
+}
+
+static int __write_desc(int test_desc_fd, struct xfrm_desc *desc)
+{
+ ssize_t ret;
+
+ ret = write(test_desc_fd, desc, sizeof(*desc));
+
+ if (ret == sizeof(*desc))
+ return 0;
+
+ pr_err("Writing test's desc failed %ld", ret);
+
+ return -1;
+}
+
+static int write_desc(int proto, int test_desc_fd,
+ char *a, char *e, char *c, char *ae)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = CREATE_TUNNEL;
+ desc.proto = proto;
+
+ if (a)
+ strncpy(desc.a_algo, a, ALGO_LEN - 1);
+ if (e)
+ strncpy(desc.e_algo, e, ALGO_LEN - 1);
+ if (c)
+ strncpy(desc.c_algo, c, ALGO_LEN - 1);
+ if (ae)
+ strncpy(desc.ae_algo, ae, ALGO_LEN - 1);
+
+ return __write_desc(test_desc_fd, &desc);
+}
+
+int proto_list[] = { IPPROTO_AH, IPPROTO_COMP, IPPROTO_ESP };
+char *ah_list[] = {
+ "digest_null", "hmac(md5)", "hmac(sha1)", "hmac(sha256)",
+ "hmac(sha384)", "hmac(sha512)", "hmac(rmd160)",
+ "xcbc(aes)", "cmac(aes)"
+};
+char *comp_list[] = {
+ "deflate",
+#if 0
+ /* No compression backend realization */
+ "lzs", "lzjh"
+#endif
+};
+char *e_list[] = {
+ "ecb(cipher_null)", "cbc(des)", "cbc(des3_ede)", "cbc(cast5)",
+ "cbc(blowfish)", "cbc(aes)", "cbc(serpent)", "cbc(camellia)",
+ "cbc(twofish)", "rfc3686(ctr(aes))"
+};
+char *ae_list[] = {
+#if 0
+ /* not implemented */
+ "rfc4106(gcm(aes))", "rfc4309(ccm(aes))", "rfc4543(gcm(aes))",
+ "rfc7539esp(chacha20,poly1305)"
+#endif
+};
+
+const unsigned int proto_plan = ARRAY_SIZE(ah_list) + ARRAY_SIZE(comp_list) \
+ + (ARRAY_SIZE(ah_list) * ARRAY_SIZE(e_list)) \
+ + ARRAY_SIZE(ae_list);
+
+static int write_proto_plan(int fd, int proto)
+{
+ unsigned int i;
+
+ switch (proto) {
+ case IPPROTO_AH:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ if (write_desc(proto, fd, ah_list[i], 0, 0, 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_COMP:
+ for (i = 0; i < ARRAY_SIZE(comp_list); i++) {
+ if (write_desc(proto, fd, 0, 0, comp_list[i], 0))
+ return -1;
+ }
+ break;
+ case IPPROTO_ESP:
+ for (i = 0; i < ARRAY_SIZE(ah_list); i++) {
+ int j;
+
+ for (j = 0; j < ARRAY_SIZE(e_list); j++) {
+ if (write_desc(proto, fd, ah_list[i],
+ e_list[j], 0, 0))
+ return -1;
+ }
+ }
+ for (i = 0; i < ARRAY_SIZE(ae_list); i++) {
+ if (write_desc(proto, fd, 0, 0, 0, ae_list[i]))
+ return -1;
+ }
+ break;
+ default:
+ printk("BUG: Specified unknown proto %d", proto);
+ return -1;
+ }
+
+ return 0;
+}
+
+/*
+ * Some structures in xfrm uapi header differ in size between
+ * 64-bit and 32-bit ABI:
+ *
+ * 32-bit UABI | 64-bit UABI
+ * -------------------------------------|-------------------------------------
+ * sizeof(xfrm_usersa_info) = 220 | sizeof(xfrm_usersa_info) = 224
+ * sizeof(xfrm_userpolicy_info) = 164 | sizeof(xfrm_userpolicy_info) = 168
+ * sizeof(xfrm_userspi_info) = 228 | sizeof(xfrm_userspi_info) = 232
+ * sizeof(xfrm_user_acquire) = 276 | sizeof(xfrm_user_acquire) = 280
+ * sizeof(xfrm_user_expire) = 224 | sizeof(xfrm_user_expire) = 232
+ * sizeof(xfrm_user_polexpire) = 168 | sizeof(xfrm_user_polexpire) = 176
+ *
+ * Check the affected by the UABI difference structures.
+ * Also, check translation for xfrm_set_spdinfo: it has it's own attributes
+ * which needs to be correctly copied, but not translated.
+ */
+const unsigned int compat_plan = 5;
+static int write_compat_struct_tests(int test_desc_fd)
+{
+ struct xfrm_desc desc = {};
+
+ desc.type = ALLOCATE_SPI;
+ desc.proto = IPPROTO_AH;
+ strncpy(desc.a_algo, ah_list[0], ALGO_LEN - 1);
+
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = MONITOR_ACQUIRE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_STATE;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = EXPIRE_POLICY;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ desc.type = SPDINFO_ATTRS;
+ if (__write_desc(test_desc_fd, &desc))
+ return -1;
+
+ return 0;
+}
+
+static int write_test_plan(int test_desc_fd)
+{
+ unsigned int i;
+ pid_t child;
+
+ child = fork();
+ if (child < 0) {
+ pr_err("fork()");
+ return -1;
+ }
+ if (child) {
+ if (close(test_desc_fd))
+ printk("close(): %m");
+ return 0;
+ }
+
+ if (write_compat_struct_tests(test_desc_fd))
+ exit(KSFT_FAIL);
+
+ for (i = 0; i < ARRAY_SIZE(proto_list); i++) {
+ if (write_proto_plan(test_desc_fd, proto_list[i]))
+ exit(KSFT_FAIL);
+ }
+
+ exit(KSFT_PASS);
+}
+
+static int children_cleanup(void)
+{
+ unsigned ret = KSFT_PASS;
+
+ while (1) {
+ int status;
+ pid_t p = wait(&status);
+
+ if ((p < 0) && errno == ECHILD)
+ break;
+
+ if (p < 0) {
+ pr_err("wait()");
+ return KSFT_FAIL;
+ }
+
+ if (!WIFEXITED(status)) {
+ ret = KSFT_FAIL;
+ continue;
+ }
+
+ if (WEXITSTATUS(status) == KSFT_FAIL)
+ ret = KSFT_FAIL;
+ }
+
+ return ret;
+}
+
+typedef void (*print_res)(const char *, ...);
+
+static int check_results(void)
+{
+ struct test_result tr = {};
+ struct xfrm_desc *d = &tr.desc;
+ int ret = KSFT_PASS;
+
+ while (1) {
+ ssize_t received = read(results_fd[0], &tr, sizeof(tr));
+ print_res result;
+
+ if (received == 0) /* EOF */
+ break;
+
+ if (received != sizeof(tr)) {
+ pr_err("read() returned %zd", received);
+ return KSFT_FAIL;
+ }
+
+ switch (tr.res) {
+ case KSFT_PASS:
+ result = ksft_test_result_pass;
+ break;
+ case KSFT_FAIL:
+ default:
+ result = ksft_test_result_fail;
+ ret = KSFT_FAIL;
+ }
+
+ result(" %s: [%u, '%s', '%s', '%s', '%s', %u]\n",
+ desc_name[d->type], (unsigned int)d->proto, d->a_algo,
+ d->e_algo, d->c_algo, d->ae_algo, d->icv_len);
+ }
+
+ return ret;
+}
+
+int main(int argc, char **argv)
+{
+ long nr_process = 1;
+ int route_sock = -1, ret = KSFT_SKIP;
+ int test_desc_fd[2];
+ uint32_t route_seq;
+ unsigned int i;
+
+ if (argc > 2)
+ exit_usage(argv);
+
+ if (argc > 1) {
+ char *endptr;
+
+ errno = 0;
+ nr_process = strtol(argv[1], &endptr, 10);
+ if ((errno == ERANGE && (nr_process == LONG_MAX || nr_process == LONG_MIN))
+ || (errno != 0 && nr_process == 0)
+ || (endptr == argv[1]) || (*endptr != '\0')) {
+ printk("Failed to parse [nr_process]");
+ exit_usage(argv);
+ }
+
+ if (nr_process > MAX_PROCESSES || nr_process < 1) {
+ printk("nr_process should be between [1; %u]",
+ MAX_PROCESSES);
+ exit_usage(argv);
+ }
+ }
+
+ srand(time(NULL));
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 1)
+ ksft_exit_skip("sysconf(): %m\n");
+
+ if (pipe2(test_desc_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (pipe2(results_fd, O_DIRECT) < 0)
+ ksft_exit_skip("pipe(): %m\n");
+
+ if (init_namespaces())
+ ksft_exit_skip("Failed to create namespaces\n");
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ ksft_exit_skip("Failed to open netlink route socket\n");
+
+ for (i = 0; i < nr_process; i++) {
+ char veth[VETH_LEN];
+
+ snprintf(veth, VETH_LEN, VETH_FMT, i);
+
+ if (veth_add(route_sock, route_seq++, veth, nsfd_childa, veth, nsfd_childb)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Failed to create veth device");
+ }
+
+ if (start_child(i, veth, test_desc_fd)) {
+ close(route_sock);
+ ksft_exit_fail_msg("Child %u failed to start", i);
+ }
+ }
+
+ if (close(route_sock) || close(test_desc_fd[0]) || close(results_fd[1]))
+ ksft_exit_fail_msg("close(): %m");
+
+ ksft_set_plan(proto_plan + compat_plan);
+
+ if (write_test_plan(test_desc_fd[1]))
+ ksft_exit_fail_msg("Failed to write test plan to pipe");
+
+ ret = check_results();
+
+ if (children_cleanup() == KSFT_FAIL)
+ exit(KSFT_FAIL);
+
+ exit(ret);
+}
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.c b/tools/testing/selftests/net/ipv6_flowlabel.c
index a7c41375374f..708a9822259d 100644
--- a/tools/testing/selftests/net/ipv6_flowlabel.c
+++ b/tools/testing/selftests/net/ipv6_flowlabel.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <fcntl.h>
#include <limits.h>
+#include <linux/icmpv6.h>
#include <linux/in6.h>
#include <stdbool.h>
#include <stdio.h>
@@ -29,26 +30,48 @@
#ifndef IPV6_FLOWLABEL_MGR
#define IPV6_FLOWLABEL_MGR 32
#endif
+#ifndef IPV6_FLOWINFO_SEND
+#define IPV6_FLOWINFO_SEND 33
+#endif
#define FLOWLABEL_WILDCARD ((uint32_t) -1)
static const char cfg_data[] = "a";
static uint32_t cfg_label = 1;
+static bool use_ping;
+static bool use_flowinfo_send;
+
+static struct icmp6hdr icmp6 = {
+ .icmp6_type = ICMPV6_ECHO_REQUEST
+};
+
+static struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_addr = IN6ADDR_LOOPBACK_INIT,
+};
static void do_send(int fd, bool with_flowlabel, uint32_t flowlabel)
{
char control[CMSG_SPACE(sizeof(flowlabel))] = {0};
struct msghdr msg = {0};
- struct iovec iov = {0};
+ struct iovec iov = {
+ .iov_base = (char *)cfg_data,
+ .iov_len = sizeof(cfg_data)
+ };
int ret;
- iov.iov_base = (char *)cfg_data;
- iov.iov_len = sizeof(cfg_data);
+ if (use_ping) {
+ iov.iov_base = &icmp6;
+ iov.iov_len = sizeof(icmp6);
+ }
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
- if (with_flowlabel) {
+ if (use_flowinfo_send) {
+ msg.msg_name = &addr;
+ msg.msg_namelen = sizeof(addr);
+ } else if (with_flowlabel) {
struct cmsghdr *cm;
cm = (void *)control;
@@ -94,6 +117,8 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
ret = recvmsg(fd, &msg, 0);
if (ret == -1)
error(1, errno, "recv");
+ if (use_ping)
+ goto parse_cmsg;
if (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))
error(1, 0, "recv: truncated");
if (ret != sizeof(cfg_data))
@@ -101,6 +126,7 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
if (memcmp(data, cfg_data, sizeof(data)))
error(1, 0, "recv: data mismatch");
+parse_cmsg:
cm = CMSG_FIRSTHDR(&msg);
if (with_flowlabel) {
if (!cm)
@@ -114,9 +140,11 @@ static void do_recv(int fd, bool with_flowlabel, uint32_t expect)
flowlabel = ntohl(*(uint32_t *)CMSG_DATA(cm));
fprintf(stderr, "recv with label %u\n", flowlabel);
- if (expect != FLOWLABEL_WILDCARD && expect != flowlabel)
+ if (expect != FLOWLABEL_WILDCARD && expect != flowlabel) {
fprintf(stderr, "recv: incorrect flowlabel %u != %u\n",
flowlabel, expect);
+ error(1, 0, "recv: flowlabel is wrong");
+ }
} else {
fprintf(stderr, "recv without label\n");
@@ -165,11 +193,17 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "l:")) != -1) {
+ while ((c = getopt(argc, argv, "l:ps")) != -1) {
switch (c) {
case 'l':
cfg_label = strtoul(optarg, NULL, 0);
break;
+ case 'p':
+ use_ping = true;
+ break;
+ case 's':
+ use_flowinfo_send = true;
+ break;
default:
error(1, 0, "%s: parse error", argv[0]);
}
@@ -178,27 +212,30 @@ static void parse_opts(int argc, char **argv)
int main(int argc, char **argv)
{
- struct sockaddr_in6 addr = {
- .sin6_family = AF_INET6,
- .sin6_port = htons(8000),
- .sin6_addr = IN6ADDR_LOOPBACK_INIT,
- };
const int one = 1;
int fdt, fdr;
+ int prot = 0;
+
+ addr.sin6_port = htons(8000);
parse_opts(argc, argv);
- fdt = socket(PF_INET6, SOCK_DGRAM, 0);
+ if (use_ping) {
+ fprintf(stderr, "attempting to use ping sockets\n");
+ prot = IPPROTO_ICMPV6;
+ }
+
+ fdt = socket(PF_INET6, SOCK_DGRAM, prot);
if (fdt == -1)
error(1, errno, "socket t");
- fdr = socket(PF_INET6, SOCK_DGRAM, 0);
+ fdr = use_ping ? fdt : socket(PF_INET6, SOCK_DGRAM, 0);
if (fdr == -1)
error(1, errno, "socket r");
if (connect(fdt, (void *)&addr, sizeof(addr)))
error(1, errno, "connect");
- if (bind(fdr, (void *)&addr, sizeof(addr)))
+ if (!use_ping && bind(fdr, (void *)&addr, sizeof(addr)))
error(1, errno, "bind");
flowlabel_get(fdt, cfg_label, IPV6_FL_S_EXCL, IPV6_FL_F_CREATE);
@@ -216,13 +253,21 @@ int main(int argc, char **argv)
do_recv(fdr, false, 0);
}
+ if (use_flowinfo_send) {
+ fprintf(stderr, "using IPV6_FLOWINFO_SEND to send label\n");
+ addr.sin6_flowinfo = htonl(cfg_label);
+ if (setsockopt(fdt, SOL_IPV6, IPV6_FLOWINFO_SEND, &one,
+ sizeof(one)) == -1)
+ error(1, errno, "setsockopt flowinfo_send");
+ }
+
fprintf(stderr, "send label\n");
do_send(fdt, true, cfg_label);
do_recv(fdr, true, cfg_label);
if (close(fdr))
error(1, errno, "close r");
- if (close(fdt))
+ if (!use_ping && close(fdt))
error(1, errno, "close t");
return 0;
diff --git a/tools/testing/selftests/net/ipv6_flowlabel.sh b/tools/testing/selftests/net/ipv6_flowlabel.sh
index d3bc6442704e..cee95e252bee 100755
--- a/tools/testing/selftests/net/ipv6_flowlabel.sh
+++ b/tools/testing/selftests/net/ipv6_flowlabel.sh
@@ -18,4 +18,20 @@ echo "TEST datapath (with auto-flowlabels)"
./in_netns.sh \
sh -c 'sysctl -q -w net.ipv6.auto_flowlabels=1 && ./ipv6_flowlabel -l 1'
+echo "TEST datapath (with ping-sockets)"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+ sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+ ./ipv6_flowlabel -l 1 -p'
+
+echo "TEST datapath (with flowinfo-send)"
+./in_netns.sh \
+ sh -c './ipv6_flowlabel -l 1 -s'
+
+echo "TEST datapath (with ping-sockets flowinfo-send)"
+./in_netns.sh \
+ sh -c 'sysctl -q -w net.ipv6.flowlabel_reflect=4 && \
+ sysctl -q -w net.ipv4.ping_group_range="0 2147483647" && \
+ ./ipv6_flowlabel -l 1 -p -s'
+
echo OK. All tests passed
diff --git a/tools/testing/selftests/net/l2_tos_ttl_inherit.sh b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
new file mode 100755
index 000000000000..f11756e7df2f
--- /dev/null
+++ b/tools/testing/selftests/net/l2_tos_ttl_inherit.sh
@@ -0,0 +1,446 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Author: Matthias May <matthias.may@westermo.com>
+#
+# This script evaluates ip tunnels that are capable of carrying L2 traffic
+# if they inherit or set the inheritable fields.
+# Namely these tunnels are: 'gretap', 'vxlan' and 'geneve'.
+# Checked inheritable fields are: TOS and TTL.
+# The outer tunnel protocol of 'IPv4' or 'IPv6' is verified-
+# As payload frames of type 'IPv4', 'IPv6' and 'other'(ARP) are verified.
+# In addition this script also checks if forcing a specific field in the
+# outer header is working.
+
+# Return 4 by default (Kselftest SKIP code)
+ERR=4
+
+if [ "$(id -u)" != "0" ]; then
+ echo "Please run as root."
+ exit $ERR
+fi
+if ! which tcpdump > /dev/null 2>&1; then
+ echo "No tcpdump found. Required for this test."
+ exit $ERR
+fi
+
+expected_tos="0x00"
+expected_ttl="0"
+failed=false
+
+readonly NS0=$(mktemp -u ns0-XXXXXXXX)
+readonly NS1=$(mktemp -u ns1-XXXXXXXX)
+
+RUN_NS0="ip netns exec ${NS0}"
+
+get_random_tos() {
+ # Get a random hex tos value between 0x00 and 0xfc, a multiple of 4
+ echo "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 1)\
+$(tr -dc '048c' < /dev/urandom | head -c 1)"
+}
+get_random_ttl() {
+ # Get a random dec value between 0 and 255
+ printf "%d" "0x$(tr -dc '0-9a-f' < /dev/urandom | head -c 2)"
+}
+get_field() {
+ # Expects to get the 'head -n 1' of a captured frame by tcpdump.
+ # Parses this first line and returns the specified field.
+ local field="$1"
+ local input="$2"
+ local found=false
+ input="$(echo "$input" | tr -d '(),')"
+ for input_field in $input; do
+ if $found; then
+ echo "$input_field"
+ return
+ fi
+ # The next field that we iterate over is the looked for value
+ if [ "$input_field" = "$field" ]; then
+ found=true
+ fi
+ done
+ echo "0"
+}
+setup() {
+ local type="$1"
+ local outer="$2"
+ local inner="$3"
+ local tos_ttl="$4"
+ local vlan="$5"
+ local test_tos="0x00"
+ local test_ttl="0"
+
+ # We don't want a test-tos of 0x00,
+ # because this is the value that we get when no tos is set.
+ expected_tos="$(get_random_tos)"
+ while [ "$expected_tos" = "0x00" ]; do
+ expected_tos="$(get_random_tos)"
+ done
+ if [ "$tos_ttl" = "random" ]; then
+ test_tos="$expected_tos"
+ tos="fixed $test_tos"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_tos="$tos_ttl"
+ tos="inherit $expected_tos"
+ fi
+
+ # We don't want a test-ttl of 64 or 0,
+ # because 64 is when no ttl is set and 0 is not a valid ttl.
+ expected_ttl="$(get_random_ttl)"
+ while [ "$expected_ttl" = "64" ] || [ "$expected_ttl" = "0" ]; do
+ expected_ttl="$(get_random_ttl)"
+ done
+
+ if [ "$tos_ttl" = "random" ]; then
+ test_ttl="$expected_ttl"
+ ttl="fixed $test_ttl"
+ elif [ "$tos_ttl" = "inherit" ]; then
+ test_ttl="$tos_ttl"
+ ttl="inherit $expected_ttl"
+ fi
+ printf "│%7s │%6s │%6s │%13s │%13s │%6s │" \
+ "$type" "$outer" "$inner" "$tos" "$ttl" "$vlan"
+
+ # Create netns NS0 and NS1 and connect them with a veth pair
+ ip netns add "${NS0}"
+ ip netns add "${NS1}"
+ ip link add name veth0 netns "${NS0}" type veth \
+ peer name veth1 netns "${NS1}"
+ ip -netns "${NS0}" link set dev veth0 up
+ ip -netns "${NS1}" link set dev veth1 up
+ ip -netns "${NS0}" address flush dev veth0
+ ip -netns "${NS1}" address flush dev veth1
+
+ local local_addr1=""
+ local local_addr2=""
+ if [ "$type" = "gre" ] || [ "$type" = "vxlan" ]; then
+ if [ "$outer" = "4" ]; then
+ local_addr1="local 198.18.0.1"
+ local_addr2="local 198.18.0.2"
+ elif [ "$outer" = "6" ]; then
+ local_addr1="local fdd1:ced0:5d88:3fce::1"
+ local_addr2="local fdd1:ced0:5d88:3fce::2"
+ fi
+ fi
+ local vxlan=""
+ if [ "$type" = "vxlan" ]; then
+ vxlan="vni 100 dstport 4789"
+ fi
+ local geneve=""
+ if [ "$type" = "geneve" ]; then
+ geneve="vni 100"
+ fi
+ # Create tunnel and assign outer IPv4/IPv6 addresses
+ if [ "$outer" = "4" ]; then
+ if [ "$type" = "gre" ]; then
+ type="gretap"
+ fi
+ ip -netns "${NS0}" address add 198.18.0.1/24 dev veth0
+ ip -netns "${NS1}" address add 198.18.0.2/24 dev veth1
+ ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+ remote 198.18.0.2 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+ remote 198.18.0.1 tos $test_tos ttl $test_ttl \
+ $vxlan $geneve
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ type="ip6gretap"
+ fi
+ ip -netns "${NS0}" address add fdd1:ced0:5d88:3fce::1/64 \
+ dev veth0 nodad
+ ip -netns "${NS1}" address add fdd1:ced0:5d88:3fce::2/64 \
+ dev veth1 nodad
+ ip -netns "${NS0}" link add name tep0 type $type $local_addr1 \
+ remote fdd1:ced0:5d88:3fce::2 tos $test_tos \
+ ttl $test_ttl $vxlan $geneve
+ ip -netns "${NS1}" link add name tep1 type $type $local_addr2 \
+ remote fdd1:ced0:5d88:3fce::1 tos $test_tos \
+ ttl $test_ttl $vxlan $geneve
+ fi
+
+ # Bring L2-tunnel link up and create VLAN on top
+ ip -netns "${NS0}" link set tep0 up
+ ip -netns "${NS1}" link set tep1 up
+ ip -netns "${NS0}" address flush dev tep0
+ ip -netns "${NS1}" address flush dev tep1
+ local parent
+ if $vlan; then
+ parent="vlan99-"
+ ip -netns "${NS0}" link add link tep0 name ${parent}0 \
+ type vlan id 99
+ ip -netns "${NS1}" link add link tep1 name ${parent}1 \
+ type vlan id 99
+ ip -netns "${NS0}" link set dev ${parent}0 up
+ ip -netns "${NS1}" link set dev ${parent}1 up
+ ip -netns "${NS0}" address flush dev ${parent}0
+ ip -netns "${NS1}" address flush dev ${parent}1
+ else
+ parent="tep"
+ fi
+
+ # Assign inner IPv4/IPv6 addresses
+ if [ "$inner" = "4" ] || [ "$inner" = "other" ]; then
+ ip -netns "${NS0}" address add 198.19.0.1/24 brd + dev ${parent}0
+ ip -netns "${NS1}" address add 198.19.0.2/24 brd + dev ${parent}1
+ elif [ "$inner" = "6" ]; then
+ ip -netns "${NS0}" address add fdd4:96cf:4eae:443b::1/64 \
+ dev ${parent}0 nodad
+ ip -netns "${NS1}" address add fdd4:96cf:4eae:443b::2/64 \
+ dev ${parent}1 nodad
+ fi
+}
+
+verify() {
+ local outer="$1"
+ local inner="$2"
+ local tos_ttl="$3"
+ local vlan="$4"
+
+ local ping_pid out captured_tos captured_ttl result
+
+ local ping_dst
+ if [ "$inner" = "4" ]; then
+ ping_dst="198.19.0.2"
+ elif [ "$inner" = "6" ]; then
+ ping_dst="fdd4:96cf:4eae:443b::2"
+ elif [ "$inner" = "other" ]; then
+ ping_dst="198.19.0.3" # Generates ARPs which are not IPv4/IPv6
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ ${RUN_NS0} ping -i 0.1 $ping_dst -Q "$expected_tos" \
+ -t "$expected_ttl" 2>/dev/null 1>&2 & ping_pid="$!"
+ else
+ ${RUN_NS0} ping -i 0.1 $ping_dst 2>/dev/null 1>&2 & ping_pid="$!"
+ fi
+ local tunnel_type_offset tunnel_type_proto req_proto_offset req_offset
+ if [ "$type" = "gre" ]; then
+ tunnel_type_proto="0x2f"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_proto="0x11"
+ fi
+ if [ "$outer" = "4" ]; then
+ tunnel_type_offset="9"
+ if [ "$inner" = "4" ]; then
+ req_proto_offset="47"
+ req_offset="58"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x01 and \
+ ip[$req_offset] = 0x08 2>/dev/null \
+ | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ req_proto_offset="44"
+ req_offset="78"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x3a and \
+ ip[$req_offset] = 0x80 2>/dev/null \
+ | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ req_proto_offset="36"
+ req_offset="45"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 12))"
+ req_offset="$((req_offset + 12))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip[$req_proto_offset] = 0x08 and \
+ ip[$((req_proto_offset + 1))] = 0x06 and \
+ ip[$req_offset] = 0x01 2>/dev/null \
+ | head -n 1)"
+ fi
+ elif [ "$outer" = "6" ]; then
+ if [ "$type" = "gre" ]; then
+ tunnel_type_offset="40"
+ elif [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ tunnel_type_offset="6"
+ fi
+ if [ "$inner" = "4" ]; then
+ local req_proto_offset="75"
+ local req_offset="86"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x01 and \
+ ip6[$req_offset] = 0x08 2>/dev/null \
+ | head -n 1)"
+ elif [ "$inner" = "6" ]; then
+ local req_proto_offset="72"
+ local req_offset="106"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x3a and \
+ ip6[$req_offset] = 0x80 2>/dev/null \
+ | head -n 1)"
+ elif [ "$inner" = "other" ]; then
+ local req_proto_offset="64"
+ local req_offset="73"
+ if [ "$type" = "vxlan" ] || [ "$type" = "geneve" ]; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if $vlan; then
+ req_proto_offset="$((req_proto_offset + 4))"
+ req_offset="$((req_offset + 4))"
+ fi
+ if [ "$tos_ttl" = "inherit" ]; then
+ expected_tos="0x00"
+ expected_ttl="64"
+ fi
+ out="$(${RUN_NS0} tcpdump --immediate-mode -p -c 1 -v \
+ -i veth0 -n \
+ ip6[$tunnel_type_offset] = $tunnel_type_proto and \
+ ip6[$req_proto_offset] = 0x08 and \
+ ip6[$((req_proto_offset + 1))] = 0x06 and \
+ ip6[$req_offset] = 0x01 2>/dev/null \
+ | head -n 1)"
+ fi
+ fi
+ kill -9 $ping_pid
+ wait $ping_pid 2>/dev/null || true
+ result="FAIL"
+ if [ "$outer" = "4" ]; then
+ captured_ttl="$(get_field "ttl" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "tos" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ elif [ "$outer" = "6" ]; then
+ captured_ttl="$(get_field "hlim" "$out")"
+ captured_tos="$(printf "0x%02x" "$(get_field "class" "$out")")"
+ if [ "$captured_tos" = "$expected_tos" ] &&
+ [ "$captured_ttl" = "$expected_ttl" ]; then
+ result="OK"
+ fi
+ fi
+
+ printf "%7s │\n" "$result"
+ if [ "$result" = "FAIL" ]; then
+ failed=true
+ if [ "$captured_tos" != "$expected_tos" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TOS value: $expected_tos" \
+ "Captured TOS value: $captured_tos"
+ fi
+ if [ "$captured_ttl" != "$expected_ttl" ]; then
+ printf "│%43s%27s │\n" \
+ "Expected TTL value: $expected_ttl" \
+ "Captured TTL value: $captured_ttl"
+ fi
+ printf "│%71s│\n" " "
+ fi
+}
+
+cleanup() {
+ ip netns del "${NS0}" 2>/dev/null
+ ip netns del "${NS1}" 2>/dev/null
+}
+
+exit_handler() {
+ # Don't exit immediately if one of the intermediate commands fails.
+ # We might be called at the end of the script, when the network
+ # namespaces have already been deleted. So cleanup() may fail, but we
+ # still need to run until 'exit $ERR' or the script won't return the
+ # correct error code.
+ set +e
+
+ cleanup
+
+ exit $ERR
+}
+
+# Restore the default SIGINT handler (just in case) and exit.
+# The exit handler will take care of cleaning everything up.
+interrupted() {
+ trap - INT
+
+ exit $ERR
+}
+
+set -e
+trap exit_handler EXIT
+trap interrupted INT
+
+printf "┌────────┬───────┬───────┬──────────────┬"
+printf "──────────────┬───────┬────────┐\n"
+for type in gre vxlan geneve; do
+ if ! $(modprobe "$type" 2>/dev/null); then
+ continue
+ fi
+ for outer in 4 6; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ printf "│ Type │ outer | inner │ tos │"
+ printf " ttl │ vlan │ result │\n"
+ for inner in 4 6 other; do
+ printf "├────────┼───────┼───────┼──────────────┼"
+ printf "──────────────┼───────┼────────┤\n"
+ for tos_ttl in inherit random; do
+ for vlan in false true; do
+ setup "$type" "$outer" "$inner" \
+ "$tos_ttl" "$vlan"
+ verify "$outer" "$inner" "$tos_ttl" \
+ "$vlan"
+ cleanup
+ done
+ done
+ done
+ done
+done
+printf "└────────┴───────┴───────┴──────────────┴"
+printf "──────────────┴───────┴────────┘\n"
+
+# All tests done.
+# Set ERR appropriately: it will be returned by the exit handler.
+if $failed; then
+ ERR=1
+else
+ ERR=0
+fi
diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh
index 5782433886fc..88de7166c8ae 100755
--- a/tools/testing/selftests/net/l2tp.sh
+++ b/tools/testing/selftests/net/l2tp.sh
@@ -13,6 +13,7 @@
# 10.1.1.1 | | 10.1.2.1
# 2001:db8:1::1 | | 2001:db8:2::1
+source lib.sh
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -80,9 +81,6 @@ create_ns()
[ -z "${addr}" ] && addr="-"
[ -z "${addr6}" ] && addr6="-"
- ip netns add ${ns}
-
- ip -netns ${ns} link set lo up
if [ "${addr}" != "-" ]; then
ip -netns ${ns} addr add dev lo ${addr}
fi
@@ -133,12 +131,7 @@ connect_ns()
cleanup()
{
- local ns
-
- for ns in host-1 host-2 router
- do
- ip netns del ${ns} 2>/dev/null
- done
+ cleanup_ns $host_1 $host_2 $router
}
setup_l2tp_ipv4()
@@ -146,28 +139,28 @@ setup_l2tp_ipv4()
#
# configure l2tpv3 tunnel on host-1
#
- ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
+ ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \
encap ip local 10.1.1.1 remote 10.1.2.1
- ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \
+ ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \
session_id 1041 peer_session_id 1042
- ip -netns host-1 link set dev l2tp4 up
- ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
+ ip -netns $host_1 link set dev l2tp4 up
+ ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2
#
# configure l2tpv3 tunnel on host-2
#
- ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
+ ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \
encap ip local 10.1.2.1 remote 10.1.1.1
- ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \
+ ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \
session_id 1042 peer_session_id 1041
- ip -netns host-2 link set dev l2tp4 up
- ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
+ ip -netns $host_2 link set dev l2tp4 up
+ ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1
#
# add routes to loopback addresses
#
- ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2
- ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1
+ ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2
+ ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1
}
setup_l2tp_ipv6()
@@ -175,28 +168,28 @@ setup_l2tp_ipv6()
#
# configure l2tpv3 tunnel on host-1
#
- ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
+ ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \
encap ip local 2001:db8:1::1 remote 2001:db8:2::1
- ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \
+ ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \
session_id 1061 peer_session_id 1062
- ip -netns host-1 link set dev l2tp6 up
- ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
+ ip -netns $host_1 link set dev l2tp6 up
+ ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2
#
# configure l2tpv3 tunnel on host-2
#
- ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
+ ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \
encap ip local 2001:db8:2::1 remote 2001:db8:1::1
- ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \
+ ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \
session_id 1062 peer_session_id 1061
- ip -netns host-2 link set dev l2tp6 up
- ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
+ ip -netns $host_2 link set dev l2tp6 up
+ ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1
#
# add routes to loopback addresses
#
- ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2
- ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1
+ ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2
+ ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1
}
setup()
@@ -205,21 +198,22 @@ setup()
cleanup
set -e
- create_ns host-1 172.16.101.1/32 fc00:101::1/128
- create_ns host-2 172.16.101.2/32 fc00:101::2/128
- create_ns router
+ setup_ns host_1 host_2 router
+ create_ns $host_1 172.16.101.1/32 fc00:101::1/128
+ create_ns $host_2 172.16.101.2/32 fc00:101::2/128
+ create_ns $router
- connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
- router eth1 10.1.1.2/24 2001:db8:1::2/64
+ connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \
+ $router eth1 10.1.1.2/24 2001:db8:1::2/64
- connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
- router eth2 10.1.2.2/24 2001:db8:2::2/64
+ connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \
+ $router eth2 10.1.2.2/24 2001:db8:2::2/64
- ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2
- ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
+ ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2
+ ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2
- ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2
- ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
+ ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2
+ ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2
setup_l2tp_ipv4
setup_l2tp_ipv6
@@ -231,38 +225,38 @@ setup_ipsec()
#
# IPv4
#
- run_cmd host-1 ip xfrm policy add \
+ run_cmd $host_1 ip xfrm policy add \
src 10.1.1.1 dst 10.1.2.1 dir out \
tmpl proto esp mode transport
- run_cmd host-1 ip xfrm policy add \
+ run_cmd $host_1 ip xfrm policy add \
src 10.1.2.1 dst 10.1.1.1 dir in \
tmpl proto esp mode transport
- run_cmd host-2 ip xfrm policy add \
+ run_cmd $host_2 ip xfrm policy add \
src 10.1.1.1 dst 10.1.2.1 dir in \
tmpl proto esp mode transport
- run_cmd host-2 ip xfrm policy add \
+ run_cmd $host_2 ip xfrm policy add \
src 10.1.2.1 dst 10.1.1.1 dir out \
tmpl proto esp mode transport
- ip -netns host-1 xfrm state add \
+ ip -netns $host_1 xfrm state add \
src 10.1.1.1 dst 10.1.2.1 \
spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-1 xfrm state add \
+ ip -netns $host_1 xfrm state add \
src 10.1.2.1 dst 10.1.1.1 \
spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-2 xfrm state add \
+ ip -netns $host_2 xfrm state add \
src 10.1.1.1 dst 10.1.2.1 \
spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-2 xfrm state add \
+ ip -netns $host_2 xfrm state add \
src 10.1.2.1 dst 10.1.1.1 \
spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -270,38 +264,38 @@ setup_ipsec()
#
# IPV6
#
- run_cmd host-1 ip -6 xfrm policy add \
+ run_cmd $host_1 ip -6 xfrm policy add \
src 2001:db8:1::1 dst 2001:db8:2::1 dir out \
tmpl proto esp mode transport
- run_cmd host-1 ip -6 xfrm policy add \
+ run_cmd $host_1 ip -6 xfrm policy add \
src 2001:db8:2::1 dst 2001:db8:1::1 dir in \
tmpl proto esp mode transport
- run_cmd host-2 ip -6 xfrm policy add \
+ run_cmd $host_2 ip -6 xfrm policy add \
src 2001:db8:1::1 dst 2001:db8:2::1 dir in \
tmpl proto esp mode transport
- run_cmd host-2 ip -6 xfrm policy add \
+ run_cmd $host_2 ip -6 xfrm policy add \
src 2001:db8:2::1 dst 2001:db8:1::1 dir out \
tmpl proto esp mode transport
- ip -netns host-1 -6 xfrm state add \
+ ip -netns $host_1 -6 xfrm state add \
src 2001:db8:1::1 dst 2001:db8:2::1 \
spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-1 -6 xfrm state add \
+ ip -netns $host_1 -6 xfrm state add \
src 2001:db8:2::1 dst 2001:db8:1::1 \
spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-2 -6 xfrm state add \
+ ip -netns $host_2 -6 xfrm state add \
src 2001:db8:1::1 dst 2001:db8:2::1 \
spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
- ip -netns host-2 -6 xfrm state add \
+ ip -netns $host_2 -6 xfrm state add \
src 2001:db8:2::1 dst 2001:db8:1::1 \
spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \
0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport
@@ -309,10 +303,10 @@ setup_ipsec()
teardown_ipsec()
{
- run_cmd host-1 ip xfrm state flush
- run_cmd host-1 ip xfrm policy flush
- run_cmd host-2 ip xfrm state flush
- run_cmd host-2 ip xfrm policy flush
+ run_cmd $host_1 ip xfrm state flush
+ run_cmd $host_1 ip xfrm policy flush
+ run_cmd $host_2 ip xfrm state flush
+ run_cmd $host_2 ip xfrm policy flush
}
################################################################################
@@ -322,16 +316,16 @@ run_ping()
{
local desc="$1"
- run_cmd host-1 ping -c1 -w1 172.16.1.2
+ run_cmd $host_1 ping -c1 -w1 172.16.1.2
log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
- run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+ run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
- run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+ run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
log_test $? 0 "IPv6 basic L2TP tunnel ${desc}"
- run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+ run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
log_test $? 0 "IPv6 route through L2TP tunnel ${desc}"
}
@@ -344,16 +338,16 @@ run_tests()
setup_ipsec
run_ping "- with IPsec"
- run_cmd host-1 ping -c1 -w1 172.16.1.2
+ run_cmd $host_1 ping -c1 -w1 172.16.1.2
log_test $? 0 "IPv4 basic L2TP tunnel ${desc}"
- run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
+ run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2
log_test $? 0 "IPv4 route through L2TP tunnel ${desc}"
- run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2
+ run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2
log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec"
- run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
+ run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2
log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec"
teardown_ipsec
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
new file mode 100644
index 000000000000..f9fe182dfbd4
--- /dev/null
+++ b/tools/testing/selftests/net/lib.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+##############################################################################
+# Defines
+
+WAIT_TIMEOUT=${WAIT_TIMEOUT:=20}
+BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+# namespace list created by setup_ns
+NS_LIST=""
+
+##############################################################################
+# Helpers
+busywait()
+{
+ local timeout=$1; shift
+
+ local start_time="$(date -u +%s%3N)"
+ while true
+ do
+ local out
+ out=$("$@")
+ local ret=$?
+ if ((!ret)); then
+ echo -n "$out"
+ return 0
+ fi
+
+ local current_time="$(date -u +%s%3N)"
+ if ((current_time - start_time > timeout)); then
+ echo -n "$out"
+ return 1
+ fi
+ done
+}
+
+cleanup_ns()
+{
+ local ns=""
+ local errexit=0
+ local ret=0
+
+ # disable errexit temporary
+ if [[ $- =~ "e" ]]; then
+ errexit=1
+ set +e
+ fi
+
+ for ns in "$@"; do
+ ip netns delete "${ns}" &> /dev/null
+ if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then
+ echo "Warn: Failed to remove namespace $ns"
+ ret=1
+ fi
+ done
+
+ [ $errexit -eq 1 ] && set -e
+ return $ret
+}
+
+cleanup_all_ns()
+{
+ cleanup_ns $NS_LIST
+}
+
+# setup netns with given names as prefix. e.g
+# setup_ns local remote
+setup_ns()
+{
+ local ns=""
+ local ns_name=""
+ local ns_list=""
+ for ns_name in "$@"; do
+ # Some test may setup/remove same netns multi times
+ if unset ${ns_name} 2> /dev/null; then
+ ns="${ns_name,,}-$(mktemp -u XXXXXX)"
+ eval readonly ${ns_name}="$ns"
+ else
+ eval ns='$'${ns_name}
+ cleanup_ns "$ns"
+
+ fi
+
+ if ! ip netns add "$ns"; then
+ echo "Failed to create namespace $ns_name"
+ cleanup_ns "$ns_list"
+ return $ksft_skip
+ fi
+ ip -n "$ns" link set lo up
+ ns_list="$ns_list $ns"
+ done
+ NS_LIST="$NS_LIST $ns_list"
+}
diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore
index 260336d5f0b1..49daae73c41e 100644
--- a/tools/testing/selftests/net/mptcp/.gitignore
+++ b/tools/testing/selftests/net/mptcp/.gitignore
@@ -1,4 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
mptcp_connect
+mptcp_inq
+mptcp_sockopt
pm_nl_ctl
*.pcap
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index f50976ee7d44..7b936a926859 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -1,15 +1,15 @@
# SPDX-License-Identifier: GPL-2.0
top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
-TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh
+TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh diag.sh \
+ simult_flows.sh mptcp_sockopt.sh userspace_pm.sh
-TEST_GEN_FILES = mptcp_connect pm_nl_ctl
+TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
-TEST_FILES := settings
+TEST_FILES := mptcp_lib.sh settings
EXTRA_CLEAN := *.pcap
diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config
index 2499824d9e1c..4f80014cae49 100644
--- a/tools/testing/selftests/net/mptcp/config
+++ b/tools/testing/selftests/net/mptcp/config
@@ -1,4 +1,34 @@
+CONFIG_KALLSYMS=y
CONFIG_MPTCP=y
+CONFIG_IPV6=y
CONFIG_MPTCP_IPV6=y
+CONFIG_INET_DIAG=m
+CONFIG_INET_MPTCP_DIAG=m
CONFIG_VETH=y
CONFIG_NET_SCH_NETEM=m
+CONFIG_SYN_COOKIES=y
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
+CONFIG_NETFILTER_NETLINK=m
+CONFIG_NF_TABLES=m
+CONFIG_NFT_COMPAT=m
+CONFIG_NETFILTER_XTABLES=m
+CONFIG_NETFILTER_XT_MATCH_BPF=m
+CONFIG_NETFILTER_XT_MATCH_LENGTH=m
+CONFIG_NETFILTER_XT_MATCH_STATISTIC=m
+CONFIG_NETFILTER_XT_TARGET_MARK=m
+CONFIG_NF_TABLES_INET=y
+CONFIG_NFT_TPROXY=m
+CONFIG_NFT_SOCKET=m
+CONFIG_IP_ADVANCED_ROUTER=y
+CONFIG_IP_MULTIPLE_TABLES=y
+CONFIG_IP_NF_FILTER=m
+CONFIG_IP_NF_MANGLE=m
+CONFIG_IP_NF_TARGET_REJECT=m
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IP6_NF_FILTER=m
+CONFIG_NET_ACT_CSUM=m
+CONFIG_NET_ACT_PEDIT=m
+CONFIG_NET_CLS_ACT=y
+CONFIG_NET_CLS_FW=m
+CONFIG_NET_SCH_INGRESS=m
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
new file mode 100755
index 000000000000..bc97ab33a00e
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -0,0 +1,308 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ns=""
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+ret=0
+
+flush_pids()
+{
+ # mptcp_connect in join mode will sleep a bit before completing,
+ # give it some time
+ sleep 1.1
+
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null
+
+ for _ in $(seq $((timeout_poll * 10))); do
+ [ -z "$(ip netns pids "${ns}")" ] && break
+ sleep 0.1
+ done
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+ ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null
+
+ mptcp_lib_ns_exit "${ns}"
+}
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip ss
+
+get_msk_inuse()
+{
+ ip netns exec $ns cat /proc/net/protocols | awk '$1~/^MPTCP$/{print $3}'
+}
+
+__chk_nr()
+{
+ local command="$1"
+ local expected=$2
+ local msg="$3"
+ local skip="${4-SKIP}"
+ local nr
+
+ nr=$(eval $command)
+
+ mptcp_lib_print_title "$msg"
+ if [ "$nr" != "$expected" ]; then
+ if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then
+ mptcp_lib_pr_skip "Feature probably not supported"
+ mptcp_lib_result_skip "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $expected found $nr"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+ else
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ fi
+}
+
+__chk_msk_nr()
+{
+ local condition=$1
+ shift 1
+
+ __chk_nr "ss -inmHMN $ns | $condition" "$@"
+}
+
+chk_msk_nr()
+{
+ __chk_msk_nr "grep -c token:" "$@"
+}
+
+chk_listener_nr()
+{
+ local expected=$1
+ local msg="$2"
+
+ __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0
+ __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows"
+}
+
+wait_msk_nr()
+{
+ local condition="grep -c token:"
+ local expected=$1
+ local timeout=20
+ local msg nr
+ local max=0
+ local i=0
+
+ shift 1
+ msg=$*
+
+ while [ $i -lt $timeout ]; do
+ nr=$(ss -inmHMN $ns | $condition)
+ [ $nr == $expected ] && break;
+ [ $nr -gt $max ] && max=$nr
+ i=$((i + 1))
+ sleep 1
+ done
+
+ mptcp_lib_print_title "$msg"
+ if [ $i -ge $timeout ]; then
+ mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr"
+ mptcp_lib_result_fail "${msg} # timeout"
+ ret=${KSFT_FAIL}
+ elif [ $nr != $expected ]; then
+ mptcp_lib_pr_fail "expected $expected found $nr"
+ mptcp_lib_result_fail "${msg} # unexpected result"
+ ret=${KSFT_FAIL}
+ else
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ fi
+}
+
+chk_msk_fallback_nr()
+{
+ __chk_msk_nr "grep -c fallback" "$@"
+}
+
+chk_msk_remote_key_nr()
+{
+ __chk_msk_nr "grep -c remote_key" "$@"
+}
+
+__chk_listen()
+{
+ local filter="$1"
+ local expected=$2
+ local msg="$3"
+
+ __chk_nr "ss -N $ns -Ml '$filter' | grep -c LISTEN" "$expected" "$msg" 0
+}
+
+chk_msk_listen()
+{
+ lport=$1
+
+ # destination port search should always return empty list
+ __chk_listen "dport $lport" 0 "listen match for dport $lport"
+
+ # should return 'our' mptcp listen socket
+ __chk_listen "sport $lport" 1 "listen match for sport $lport"
+
+ __chk_listen "src inet:0.0.0.0:$lport" 1 "listen match for saddr and sport"
+
+ __chk_listen "" 1 "all listen sockets"
+
+ nr=$(ss -Ml $filter | wc -l)
+}
+
+chk_msk_inuse()
+{
+ local expected=$1
+ local msg="....chk ${2:-${expected}} msk in use"
+ local listen_nr
+
+ if [ "${expected}" -eq 0 ]; then
+ msg+=" after flush"
+ fi
+
+ listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN)
+ expected=$((expected + listen_nr))
+
+ for _ in $(seq 10); do
+ if [ "$(get_msk_inuse)" -eq $expected ]; then
+ break
+ fi
+ sleep 0.1
+ done
+
+ __chk_nr get_msk_inuse $expected "${msg}" 0
+}
+
+# $1: cestab nr
+chk_msk_cestab()
+{
+ local expected=$1
+ local msg="....chk ${2:-${expected}} cestab"
+
+ if [ "${expected}" -eq 0 ]; then
+ msg+=" after flush"
+ fi
+
+ __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \
+ "${expected}" "${msg}" ""
+}
+
+wait_connected()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex i
+
+ port_hex="$(printf "%04X" "${port}")"
+ for i in $(seq 10); do
+ ip netns exec ${listener_ns} grep -q " 0100007F:${port_hex} " /proc/net/tcp && break
+ sleep 0.1
+ done
+}
+
+trap cleanup EXIT
+mptcp_lib_ns_init ns
+
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \
+ 0.0.0.0 >/dev/null &
+mptcp_lib_wait_local_port_listen $ns 10000
+chk_msk_nr 0 "no msk on netns creation"
+chk_msk_listen 10000
+
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10000 -r 0 -t ${timeout_poll} -w 20 \
+ 127.0.0.1 >/dev/null &
+wait_connected $ns 10000
+chk_msk_nr 2 "after MPC handshake "
+chk_msk_remote_key_nr 2 "....chk remote_key"
+chk_msk_fallback_nr 0 "....chk no fallback"
+chk_msk_inuse 2
+chk_msk_cestab 2
+flush_pids
+
+chk_msk_inuse 0 "2->0"
+chk_msk_cestab 0 "2->0"
+
+echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \
+ 0.0.0.0 >/dev/null &
+mptcp_lib_wait_local_port_listen $ns 10001
+echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p 10001 -r 0 -t ${timeout_poll} -w 20 \
+ 127.0.0.1 >/dev/null &
+wait_connected $ns 10001
+chk_msk_fallback_nr 1 "check fallback"
+chk_msk_inuse 1
+chk_msk_cestab 1
+flush_pids
+
+chk_msk_inuse 0 "1->0"
+chk_msk_cestab 0 "1->0"
+
+NR_CLIENTS=100
+for I in $(seq 1 $NR_CLIENTS); do
+ echo "a" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -l -w 20 \
+ -t ${timeout_poll} 0.0.0.0 >/dev/null &
+done
+mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001))
+
+for I in $(seq 1 $NR_CLIENTS); do
+ echo "b" | \
+ timeout ${timeout_test} \
+ ip netns exec $ns \
+ ./mptcp_connect -p $((I+10001)) -w 20 \
+ -t ${timeout_poll} 127.0.0.1 >/dev/null &
+done
+
+wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present"
+chk_msk_inuse $((NR_CLIENTS*2)) "many"
+chk_msk_cestab $((NR_CLIENTS*2)) "many"
+flush_pids
+
+chk_msk_inuse 0 "many->0"
+chk_msk_cestab 0 "many->0"
+
+chk_listener_nr 0 "no listener sockets"
+NR_SERVERS=100
+for I in $(seq 1 $NR_SERVERS); do
+ ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \
+ -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 &
+done
+mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001))
+
+chk_listener_nr $NR_SERVERS "many listener sockets"
+
+# graceful termination
+for I in $(seq 1 $NR_SERVERS); do
+ echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 &
+done
+flush_pids
+
+mptcp_lib_result_print_all_tap
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index cedee5b952ba..d2043ec3bf6d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -6,14 +6,19 @@
#include <limits.h>
#include <fcntl.h>
#include <string.h>
+#include <stdarg.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <strings.h>
+#include <signal.h>
#include <unistd.h>
+#include <time.h>
+#include <sys/ioctl.h>
#include <sys/poll.h>
+#include <sys/random.h>
#include <sys/sendfile.h>
#include <sys/stat.h>
#include <sys/socket.h>
@@ -24,6 +29,8 @@
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <linux/time_types.h>
+#include <linux/sockios.h>
extern int optind;
@@ -36,6 +43,7 @@ extern int optind;
static int poll_timeout = 10 * 1000;
static bool listen_mode;
+static bool quit;
enum cfg_mode {
CFG_MODE_POLL,
@@ -43,31 +51,108 @@ enum cfg_mode {
CFG_MODE_SENDFILE,
};
+enum cfg_peek {
+ CFG_NONE_PEEK,
+ CFG_WITH_PEEK,
+ CFG_AFTER_PEEK,
+};
+
static enum cfg_mode cfg_mode = CFG_MODE_POLL;
+static enum cfg_peek cfg_peek = CFG_NONE_PEEK;
static const char *cfg_host;
static const char *cfg_port = "12000";
static int cfg_sock_proto = IPPROTO_MPTCP;
-static bool tcpulp_audit;
static int pf = AF_INET;
static int cfg_sndbuf;
static int cfg_rcvbuf;
static bool cfg_join;
+static bool cfg_remove;
+static unsigned int cfg_time;
+static unsigned int cfg_do_w;
+static int cfg_wait;
+static uint32_t cfg_mark;
+static char *cfg_input;
+static int cfg_repeat = 1;
+static int cfg_truncate;
+static int cfg_rcv_trunc;
+
+struct cfg_cmsg_types {
+ unsigned int cmsg_enabled:1;
+ unsigned int timestampns:1;
+ unsigned int tcp_inq:1;
+};
+
+struct cfg_sockopt_types {
+ unsigned int transparent:1;
+ unsigned int mptfo:1;
+};
+
+struct tcp_inq_state {
+ unsigned int last;
+ bool expect_eof;
+};
+
+struct wstate {
+ char buf[8192];
+ unsigned int len;
+ unsigned int off;
+ unsigned int total_len;
+};
+
+static struct tcp_inq_state tcp_inq;
+
+static struct cfg_cmsg_types cfg_cmsg_types;
+static struct cfg_sockopt_types cfg_sockopt_types;
static void die_usage(void)
{
- fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]"
- "[-l] connect_address\n");
+ fprintf(stderr, "Usage: mptcp_connect [-6] [-c cmsg] [-f offset] [-i file] [-I num] [-j] [-l] "
+ "[-m mode] [-M mark] [-o option] [-p port] [-P mode] [-r num] [-R num] "
+ "[-s MPTCP|TCP] [-S num] [-t num] [-T num] [-w sec] connect_address\n");
fprintf(stderr, "\t-6 use ipv6\n");
- fprintf(stderr, "\t-t num -- set poll timeout to num\n");
- fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
- fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-c cmsg -- test cmsg type <cmsg>\n");
+ fprintf(stderr, "\t-f offset -- stop the I/O after receiving and sending the specified amount "
+ "of bytes. If there are unread bytes in the receive queue, that will cause a MPTCP "
+ "fastclose at close/shutdown. If offset is negative, expect the peer to close before "
+ "all the local data as been sent, thus toleration errors on write and EPIPE signals\n");
+ fprintf(stderr, "\t-i file -- read the data to send from the given file instead of stdin");
+ fprintf(stderr, "\t-I num -- repeat the transfer 'num' times. In listen mode accepts num "
+ "incoming connections, in client mode, disconnect and reconnect to the server\n");
+ fprintf(stderr, "\t-j -- add additional sleep at connection start and tear down "
+ "-- for MPJ tests\n");
+ fprintf(stderr, "\t-l -- listens mode, accepts incoming connection\n");
+ fprintf(stderr, "\t-m [poll|mmap|sendfile] -- use poll(default)/mmap+write/sendfile\n");
+ fprintf(stderr, "\t-M mark -- set socket packet mark\n");
+ fprintf(stderr, "\t-o option -- test sockopt <option>\n");
fprintf(stderr, "\t-p num -- use port num\n");
- fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n");
- fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n");
- fprintf(stderr, "\t-u -- check mptcp ulp\n");
+ fprintf(stderr,
+ "\t-P [saveWithPeek|saveAfterPeek] -- save data with/after MSG_PEEK form tcp socket\n");
+ fprintf(stderr, "\t-r num -- enable slow mode, limiting each write to num bytes "
+ "-- for remove addr tests\n");
+ fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n");
+ fprintf(stderr, "\t-s [MPTCP|TCP] -- use mptcp(default) or tcp sockets\n");
+ fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n");
+ fprintf(stderr, "\t-t num -- set poll timeout to num\n");
+ fprintf(stderr, "\t-T num -- set expected runtime to num ms\n");
+ fprintf(stderr, "\t-w num -- wait num sec before closing the socket\n");
exit(1);
}
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+static void handle_signal(int nr)
+{
+ quit = true;
+}
+
static const char *getxinfo_strerr(int err)
{
if (err == EAI_SYSTEM)
@@ -129,10 +214,81 @@ static void set_sndbuf(int fd, unsigned int size)
}
}
+static void set_mark(int fd, uint32_t mark)
+{
+ int err;
+
+ err = setsockopt(fd, SOL_SOCKET, SO_MARK, &mark, sizeof(mark));
+ if (err) {
+ perror("set SO_MARK");
+ exit(1);
+ }
+}
+
+static void set_transparent(int fd, int pf)
+{
+ int one = 1;
+
+ switch (pf) {
+ case AF_INET:
+ if (-1 == setsockopt(fd, SOL_IP, IP_TRANSPARENT, &one, sizeof(one)))
+ perror("IP_TRANSPARENT");
+ break;
+ case AF_INET6:
+ if (-1 == setsockopt(fd, IPPROTO_IPV6, IPV6_TRANSPARENT, &one, sizeof(one)))
+ perror("IPV6_TRANSPARENT");
+ break;
+ }
+}
+
+static void set_mptfo(int fd, int pf)
+{
+ int qlen = 25;
+
+ if (setsockopt(fd, IPPROTO_TCP, TCP_FASTOPEN, &qlen, sizeof(qlen)) == -1)
+ perror("TCP_FASTOPEN");
+}
+
+static int do_ulp_so(int sock, const char *name)
+{
+ return setsockopt(sock, IPPROTO_TCP, TCP_ULP, name, strlen(name));
+}
+
+#define X(m) xerror("%s:%u: %s: failed for proto %d at line %u", __FILE__, __LINE__, (m), proto, line)
+static void sock_test_tcpulp(int sock, int proto, unsigned int line)
+{
+ socklen_t buflen = 8;
+ char buf[8] = "";
+ int ret = getsockopt(sock, IPPROTO_TCP, TCP_ULP, buf, &buflen);
+
+ if (ret != 0)
+ X("getsockopt");
+
+ if (buflen > 0) {
+ if (strcmp(buf, "mptcp") != 0)
+ xerror("unexpected ULP '%s' for proto %d at line %u", buf, proto, line);
+ ret = do_ulp_so(sock, "tls");
+ if (ret == 0)
+ X("setsockopt");
+ } else if (proto == IPPROTO_MPTCP) {
+ ret = do_ulp_so(sock, "tls");
+ if (ret != -1)
+ X("setsockopt");
+ }
+
+ ret = do_ulp_so(sock, "mptcp");
+ if (ret != -1)
+ X("setsockopt");
+
+#undef X
+}
+
+#define SOCK_TEST_TCPULP(s, p) sock_test_tcpulp((s), (p), __LINE__)
+
static int sock_listen_mptcp(const char * const listenaddr,
const char * const port)
{
- int sock;
+ int sock = -1;
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
@@ -152,10 +308,18 @@ static int sock_listen_mptcp(const char * const listenaddr,
if (sock < 0)
continue;
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
sizeof(one)))
perror("setsockopt");
+ if (cfg_sockopt_types.transparent)
+ set_transparent(sock, pf);
+
+ if (cfg_sockopt_types.mptfo)
+ set_mptfo(sock, pf);
+
if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
break; /* success */
@@ -171,60 +335,30 @@ static int sock_listen_mptcp(const char * const listenaddr,
return sock;
}
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
+
if (listen(sock, 20)) {
perror("listen");
close(sock);
return -1;
}
- return sock;
-}
+ SOCK_TEST_TCPULP(sock, cfg_sock_proto);
-static bool sock_test_tcpulp(const char * const remoteaddr,
- const char * const port)
-{
- struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
- .ai_socktype = SOCK_STREAM,
- };
- struct addrinfo *a, *addr;
- int sock = -1, ret = 0;
- bool test_pass = false;
-
- hints.ai_family = AF_INET;
-
- xgetaddrinfo(remoteaddr, port, &hints, &addr);
- for (a = addr; a; a = a->ai_next) {
- sock = socket(a->ai_family, a->ai_socktype, IPPROTO_TCP);
- if (sock < 0) {
- perror("socket");
- continue;
- }
- ret = setsockopt(sock, IPPROTO_TCP, TCP_ULP, "mptcp",
- sizeof("mptcp"));
- if (ret == -1 && errno == EOPNOTSUPP)
- test_pass = true;
- close(sock);
-
- if (test_pass)
- break;
- if (!ret)
- fprintf(stderr,
- "setsockopt(TCP_ULP) returned 0\n");
- else
- perror("setsockopt(TCP_ULP)");
- }
- return test_pass;
+ return sock;
}
static int sock_connect_mptcp(const char * const remoteaddr,
- const char * const port, int proto)
+ const char * const port, int proto,
+ struct addrinfo **peer,
+ int infd, struct wstate *winfo)
{
struct addrinfo hints = {
.ai_protocol = IPPROTO_TCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
+ int syn_copied = 0;
int sock = -1;
hints.ai_family = pf;
@@ -237,15 +371,44 @@ static int sock_connect_mptcp(const char * const remoteaddr,
continue;
}
- if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
- break; /* success */
+ SOCK_TEST_TCPULP(sock, proto);
- perror("connect()");
- close(sock);
- sock = -1;
+ if (cfg_mark)
+ set_mark(sock, cfg_mark);
+
+ if (cfg_sockopt_types.mptfo) {
+ if (!winfo->total_len)
+ winfo->total_len = winfo->len = read(infd, winfo->buf,
+ sizeof(winfo->buf));
+
+ syn_copied = sendto(sock, winfo->buf, winfo->len, MSG_FASTOPEN,
+ a->ai_addr, a->ai_addrlen);
+ if (syn_copied >= 0) {
+ winfo->off = syn_copied;
+ winfo->len -= syn_copied;
+ *peer = a;
+ break; /* success */
+ }
+ } else {
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0) {
+ *peer = a;
+ break; /* success */
+ }
+ }
+ if (cfg_sockopt_types.mptfo) {
+ perror("sendto()");
+ close(sock);
+ sock = -1;
+ } else {
+ perror("connect()");
+ close(sock);
+ sock = -1;
+ }
}
freeaddrinfo(addr);
+ if (sock != -1)
+ SOCK_TEST_TCPULP(sock, proto);
return sock;
}
@@ -262,9 +425,12 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
if (cfg_join && first && do_w > 100)
do_w = 100;
+ if (cfg_remove && do_w > cfg_do_w)
+ do_w = cfg_do_w;
+
bw = write(fd, buf, do_w);
if (bw < 0)
- perror("write");
+ return bw;
/* let the join handshake complete, before going on */
if (cfg_join && first) {
@@ -272,6 +438,9 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len)
first = false;
}
+ if (cfg_remove)
+ usleep(200000);
+
return bw;
}
@@ -296,8 +465,105 @@ static size_t do_write(const int fd, char *buf, const size_t len)
return offset;
}
+static void process_cmsg(struct msghdr *msgh)
+{
+ struct __kernel_timespec ts;
+ bool inq_found = false;
+ bool ts_found = false;
+ unsigned int inq = 0;
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SO_TIMESTAMPNS_NEW) {
+ memcpy(&ts, CMSG_DATA(cmsg), sizeof(ts));
+ ts_found = true;
+ continue;
+ }
+ if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+ memcpy(&inq, CMSG_DATA(cmsg), sizeof(inq));
+ inq_found = true;
+ continue;
+ }
+
+ }
+
+ if (cfg_cmsg_types.timestampns) {
+ if (!ts_found)
+ xerror("TIMESTAMPNS not present\n");
+ }
+
+ if (cfg_cmsg_types.tcp_inq) {
+ if (!inq_found)
+ xerror("TCP_INQ not present\n");
+
+ if (inq > 1024)
+ xerror("tcp_inq %u is larger than one kbyte\n", inq);
+ tcp_inq.last = inq;
+ }
+}
+
+static ssize_t do_recvmsg_cmsg(const int fd, char *buf, const size_t len)
+{
+ char msg_buf[8192];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = len,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ int flags = 0;
+ unsigned int last_hint = tcp_inq.last;
+ int ret = recvmsg(fd, &msg, flags);
+
+ if (ret <= 0) {
+ if (ret == 0 && tcp_inq.expect_eof)
+ return ret;
+
+ if (ret == 0 && cfg_cmsg_types.tcp_inq)
+ if (last_hint != 1 && last_hint != 0)
+ xerror("EOF but last tcp_inq hint was %u\n", last_hint);
+
+ return ret;
+ }
+
+ if (tcp_inq.expect_eof)
+ xerror("expected EOF, last_hint %u, now %u\n",
+ last_hint, tcp_inq.last);
+
+ if (msg.msg_controllen && !cfg_cmsg_types.cmsg_enabled)
+ xerror("got %lu bytes of cmsg data, expected 0\n",
+ (unsigned long)msg.msg_controllen);
+
+ if (msg.msg_controllen == 0 && cfg_cmsg_types.cmsg_enabled)
+ xerror("%s\n", "got no cmsg data");
+
+ if (msg.msg_controllen)
+ process_cmsg(&msg);
+
+ if (cfg_cmsg_types.tcp_inq) {
+ if ((size_t)ret < len && last_hint > (unsigned int)ret) {
+ if (ret + 1 != (int)last_hint) {
+ int next = read(fd, msg_buf, sizeof(msg_buf));
+
+ xerror("read %u of %u, last_hint was %u tcp_inq hint now %u next_read returned %d/%m\n",
+ ret, (unsigned int)len, last_hint, tcp_inq.last, next);
+ } else {
+ tcp_inq.expect_eof = true;
+ }
+ }
+ }
+
+ return ret;
+}
+
static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
{
+ int ret = 0;
+ char tmp[16384];
size_t cap = rand();
cap &= 0xffff;
@@ -307,35 +573,62 @@ static ssize_t do_rnd_read(const int fd, char *buf, const size_t len)
else if (cap > len)
cap = len;
- return read(fd, buf, cap);
+ if (cfg_peek == CFG_WITH_PEEK) {
+ ret = recv(fd, buf, cap, MSG_PEEK);
+ ret = (ret < 0) ? ret : read(fd, tmp, ret);
+ } else if (cfg_peek == CFG_AFTER_PEEK) {
+ ret = recv(fd, buf, cap, MSG_PEEK);
+ ret = (ret < 0) ? ret : read(fd, buf, cap);
+ } else if (cfg_cmsg_types.cmsg_enabled) {
+ ret = do_recvmsg_cmsg(fd, buf, cap);
+ } else {
+ ret = read(fd, buf, cap);
+ }
+
+ return ret;
}
-static void set_nonblock(int fd)
+static void set_nonblock(int fd, bool nonblock)
{
int flags = fcntl(fd, F_GETFL);
if (flags == -1)
return;
- fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ if (nonblock)
+ fcntl(fd, F_SETFL, flags | O_NONBLOCK);
+ else
+ fcntl(fd, F_SETFL, flags & ~O_NONBLOCK);
+}
+
+static void shut_wr(int fd)
+{
+ /* Close our write side, ev. give some time
+ * for address notification and/or checking
+ * the current status
+ */
+ if (cfg_wait)
+ usleep(cfg_wait);
+
+ shutdown(fd, SHUT_WR);
}
-static int copyfd_io_poll(int infd, int peerfd, int outfd)
+static int copyfd_io_poll(int infd, int peerfd, int outfd,
+ bool *in_closed_after_out, struct wstate *winfo)
{
struct pollfd fds = {
.fd = peerfd,
.events = POLLIN | POLLOUT,
};
- unsigned int woff = 0, wlen = 0;
- char wbuf[8192];
+ unsigned int total_wlen = 0, total_rlen = 0;
- set_nonblock(peerfd);
+ set_nonblock(peerfd, true);
for (;;) {
char rbuf[8192];
ssize_t len;
- if (fds.events == 0)
+ if (fds.events == 0 || quit)
break;
switch (poll(&fds, 1, poll_timeout)) {
@@ -352,42 +645,65 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
if (fds.revents & POLLIN) {
- len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+ ssize_t rb = sizeof(rbuf);
+
+ /* limit the total amount of read data to the trunc value*/
+ if (cfg_truncate > 0) {
+ if (rb + total_rlen > cfg_truncate)
+ rb = cfg_truncate - total_rlen;
+ len = read(peerfd, rbuf, rb);
+ } else {
+ len = do_rnd_read(peerfd, rbuf, sizeof(rbuf));
+ }
if (len == 0) {
/* no more data to receive:
* peer has closed its write side
*/
fds.events &= ~POLLIN;
- if ((fds.events & POLLOUT) == 0)
+ if ((fds.events & POLLOUT) == 0) {
+ *in_closed_after_out = true;
/* and nothing more to send */
break;
+ }
/* Else, still have data to transmit */
} else if (len < 0) {
+ if (cfg_rcv_trunc)
+ return 0;
perror("read");
return 3;
}
+ total_rlen += len;
do_write(outfd, rbuf, len);
}
if (fds.revents & POLLOUT) {
- if (wlen == 0) {
- woff = 0;
- wlen = read(infd, wbuf, sizeof(wbuf));
+ if (winfo->len == 0) {
+ winfo->off = 0;
+ winfo->len = read(infd, winfo->buf, sizeof(winfo->buf));
}
- if (wlen > 0) {
+ if (winfo->len > 0) {
ssize_t bw;
- bw = do_rnd_write(peerfd, wbuf + woff, wlen);
- if (bw < 0)
+ /* limit the total amount of written data to the trunc value */
+ if (cfg_truncate > 0 && winfo->len + total_wlen > cfg_truncate)
+ winfo->len = cfg_truncate - total_wlen;
+
+ bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len);
+ if (bw < 0) {
+ if (cfg_rcv_trunc)
+ return 0;
+ perror("write");
return 111;
+ }
- woff += bw;
- wlen -= bw;
- } else if (wlen == 0) {
+ winfo->off += bw;
+ winfo->len -= bw;
+ total_wlen += bw;
+ } else if (winfo->len == 0) {
/* We have no more data to send. */
fds.events &= ~POLLOUT;
@@ -395,13 +711,7 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
/* ... and peer also closed already */
break;
- /* ... but we still receive.
- * Close our write side, ev. give some time
- * for address notification
- */
- if (cfg_join)
- usleep(400000);
- shutdown(peerfd, SHUT_WR);
+ shut_wr(peerfd);
} else {
if (errno == EINTR)
continue;
@@ -411,17 +721,22 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd)
}
if (fds.revents & (POLLERR | POLLNVAL)) {
+ if (cfg_rcv_trunc)
+ return 0;
fprintf(stderr, "Unexpected revents: "
"POLLERR/POLLNVAL(%x)\n", fds.revents);
return 5;
}
+
+ if (cfg_truncate > 0 && total_wlen >= cfg_truncate &&
+ total_rlen >= cfg_truncate)
+ break;
}
/* leave some time for late join/announce */
- if (cfg_join)
- usleep(400000);
+ if (cfg_remove && !quit)
+ usleep(cfg_wait);
- close(peerfd);
return 0;
}
@@ -444,10 +759,26 @@ static int do_recvfile(int infd, int outfd)
return (int)r;
}
-static int do_mmap(int infd, int outfd, unsigned int size)
+static int spool_buf(int fd, struct wstate *winfo)
+{
+ while (winfo->len) {
+ int ret = write(fd, winfo->buf + winfo->off, winfo->len);
+
+ if (ret < 0) {
+ perror("write");
+ return 4;
+ }
+ winfo->off += ret;
+ winfo->len -= ret;
+ }
+ return 0;
+}
+
+static int do_mmap(int infd, int outfd, unsigned int size,
+ struct wstate *winfo)
{
char *inbuf = mmap(NULL, size, PROT_READ, MAP_SHARED, infd, 0);
- ssize_t ret = 0, off = 0;
+ ssize_t ret = 0, off = winfo->total_len;
size_t rem;
if (inbuf == MAP_FAILED) {
@@ -455,7 +786,11 @@ static int do_mmap(int infd, int outfd, unsigned int size)
return 1;
}
- rem = size;
+ ret = spool_buf(outfd, winfo);
+ if (ret < 0)
+ return ret;
+
+ rem = size - winfo->total_len;
while (rem > 0) {
ret = write(outfd, inbuf + off, rem);
@@ -499,8 +834,16 @@ static int get_infd_size(int fd)
return (int)count;
}
-static int do_sendfile(int infd, int outfd, unsigned int count)
+static int do_sendfile(int infd, int outfd, unsigned int count,
+ struct wstate *winfo)
{
+ int ret = spool_buf(outfd, winfo);
+
+ if (ret < 0)
+ return ret;
+
+ count -= winfo->total_len;
+
while (count > 0) {
ssize_t r;
@@ -517,7 +860,8 @@ static int do_sendfile(int infd, int outfd, unsigned int count)
}
static int copyfd_io_mmap(int infd, int peerfd, int outfd,
- unsigned int size)
+ unsigned int size, bool *in_closed_after_out,
+ struct wstate *winfo)
{
int err;
@@ -526,22 +870,23 @@ static int copyfd_io_mmap(int infd, int peerfd, int outfd,
if (err)
return err;
- err = do_mmap(infd, peerfd, size);
+ err = do_mmap(infd, peerfd, size, winfo);
} else {
- err = do_mmap(infd, peerfd, size);
+ err = do_mmap(infd, peerfd, size, winfo);
if (err)
return err;
- shutdown(peerfd, SHUT_WR);
+ shut_wr(peerfd);
err = do_recvfile(peerfd, outfd);
+ *in_closed_after_out = true;
}
return err;
}
static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
- unsigned int size)
+ unsigned int size, bool *in_closed_after_out, struct wstate *winfo)
{
int err;
@@ -550,40 +895,85 @@ static int copyfd_io_sendfile(int infd, int peerfd, int outfd,
if (err)
return err;
- err = do_sendfile(infd, peerfd, size);
+ err = do_sendfile(infd, peerfd, size, winfo);
} else {
- err = do_sendfile(infd, peerfd, size);
+ err = do_sendfile(infd, peerfd, size, winfo);
if (err)
return err;
+
+ shut_wr(peerfd);
+
err = do_recvfile(peerfd, outfd);
+ *in_closed_after_out = true;
}
return err;
}
-static int copyfd_io(int infd, int peerfd, int outfd)
+static int copyfd_io(int infd, int peerfd, int outfd, bool close_peerfd, struct wstate *winfo)
{
+ bool in_closed_after_out = false;
+ struct timespec start, end;
int file_size;
+ int ret;
+
+ if (cfg_time && (clock_gettime(CLOCK_MONOTONIC, &start) < 0))
+ xerror("can not fetch start time %d", errno);
switch (cfg_mode) {
case CFG_MODE_POLL:
- return copyfd_io_poll(infd, peerfd, outfd);
+ ret = copyfd_io_poll(infd, peerfd, outfd, &in_closed_after_out,
+ winfo);
+ break;
+
case CFG_MODE_MMAP:
file_size = get_infd_size(infd);
if (file_size < 0)
return file_size;
- return copyfd_io_mmap(infd, peerfd, outfd, file_size);
+ ret = copyfd_io_mmap(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
+ break;
+
case CFG_MODE_SENDFILE:
file_size = get_infd_size(infd);
if (file_size < 0)
return file_size;
- return copyfd_io_sendfile(infd, peerfd, outfd, file_size);
+ ret = copyfd_io_sendfile(infd, peerfd, outfd, file_size,
+ &in_closed_after_out, winfo);
+ break;
+
+ default:
+ fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+
+ die_usage();
+ return 1;
}
- fprintf(stderr, "Invalid mode %d\n", cfg_mode);
+ if (ret)
+ return ret;
- die_usage();
- return 1;
+ if (close_peerfd)
+ close(peerfd);
+
+ if (cfg_time) {
+ unsigned int delta_ms;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &end) < 0)
+ xerror("can not fetch end time %d", errno);
+ delta_ms = (end.tv_sec - start.tv_sec) * 1000 + (end.tv_nsec - start.tv_nsec) / 1000000;
+ if (delta_ms > cfg_time) {
+ xerror("transfer slower than expected! runtime %d ms, expected %d ms",
+ delta_ms, cfg_time);
+ }
+
+ /* show the runtime only if this end shutdown(wr) before receiving the EOF,
+ * (that is, if this end got the longer runtime)
+ */
+ if (in_closed_after_out)
+ fprintf(stderr, "%d", delta_ms);
+ }
+
+ return 0;
}
static void check_sockaddr(int pf, struct sockaddr_storage *ss,
@@ -676,17 +1066,20 @@ static void maybe_close(int fd)
{
unsigned int r = rand();
- if (!cfg_join && (r & 1))
+ if (!(cfg_join || cfg_remove || cfg_repeat > 1) && (r & 1))
close(fd);
}
int main_loop_s(int listensock)
{
struct sockaddr_storage ss;
+ struct wstate winfo;
struct pollfd polls;
socklen_t salen;
int remotesock;
+ int fd = 0;
+again:
polls.fd = listensock;
polls.events = POLLIN;
@@ -707,47 +1100,206 @@ int main_loop_s(int listensock)
check_sockaddr(pf, &ss, salen);
check_getpeername(remotesock, &ss, salen);
- return copyfd_io(0, remotesock, 1);
+ if (cfg_input) {
+ fd = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s: %d", cfg_input, errno);
+ }
+
+ SOCK_TEST_TCPULP(remotesock, 0);
+
+ memset(&winfo, 0, sizeof(winfo));
+ copyfd_io(fd, remotesock, 1, true, &winfo);
+ } else {
+ perror("accept");
+ return 1;
}
- perror("accept");
+ if (--cfg_repeat > 0) {
+ if (cfg_input)
+ close(fd);
+ goto again;
+ }
- return 1;
+ return 0;
}
static void init_rng(void)
{
- int fd = open("/dev/urandom", O_RDONLY);
unsigned int foo;
- if (fd > 0) {
- int ret = read(fd, &foo, sizeof(foo));
-
- if (ret < 0)
- srand(fd + foo);
- close(fd);
+ if (getrandom(&foo, sizeof(foo), 0) == -1) {
+ perror("getrandom");
+ exit(1);
}
srand(foo);
}
+static void xsetsockopt(int fd, int level, int optname, const void *optval, socklen_t optlen)
+{
+ int err;
+
+ err = setsockopt(fd, level, optname, optval, optlen);
+ if (err) {
+ perror("setsockopt");
+ exit(1);
+ }
+}
+
+static void apply_cmsg_types(int fd, const struct cfg_cmsg_types *cmsg)
+{
+ static const unsigned int on = 1;
+
+ if (cmsg->timestampns)
+ xsetsockopt(fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, &on, sizeof(on));
+ if (cmsg->tcp_inq)
+ xsetsockopt(fd, IPPROTO_TCP, TCP_INQ, &on, sizeof(on));
+}
+
+static void parse_cmsg_types(const char *type)
+{
+ char *next = strchr(type, ',');
+ unsigned int len = 0;
+
+ cfg_cmsg_types.cmsg_enabled = 1;
+
+ if (next) {
+ parse_cmsg_types(next + 1);
+ len = next - type;
+ } else {
+ len = strlen(type);
+ }
+
+ if (strncmp(type, "TIMESTAMPNS", len) == 0) {
+ cfg_cmsg_types.timestampns = 1;
+ return;
+ }
+
+ if (strncmp(type, "TCPINQ", len) == 0) {
+ cfg_cmsg_types.tcp_inq = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized cmsg option %s\n", type);
+ exit(1);
+}
+
+static void parse_setsock_options(const char *name)
+{
+ char *next = strchr(name, ',');
+ unsigned int len = 0;
+
+ if (next) {
+ parse_setsock_options(next + 1);
+ len = next - name;
+ } else {
+ len = strlen(name);
+ }
+
+ if (strncmp(name, "TRANSPARENT", len) == 0) {
+ cfg_sockopt_types.transparent = 1;
+ return;
+ }
+
+ if (strncmp(name, "MPTFO", len) == 0) {
+ cfg_sockopt_types.mptfo = 1;
+ return;
+ }
+
+ fprintf(stderr, "Unrecognized setsockopt option %s\n", name);
+ exit(1);
+}
+
+void xdisconnect(int fd, int addrlen)
+{
+ struct sockaddr_storage empty;
+ int msec_sleep = 10;
+ int queued = 1;
+ int i;
+
+ shutdown(fd, SHUT_WR);
+
+ /* while until the pending data is completely flushed, the later
+ * disconnect will bypass/ignore/drop any pending data.
+ */
+ for (i = 0; ; i += msec_sleep) {
+ if (ioctl(fd, SIOCOUTQ, &queued) < 0)
+ xerror("can't query out socket queue: %d", errno);
+
+ if (!queued)
+ break;
+
+ if (i > poll_timeout)
+ xerror("timeout while waiting for spool to complete");
+ usleep(msec_sleep * 1000);
+ }
+
+ memset(&empty, 0, sizeof(empty));
+ empty.ss_family = AF_UNSPEC;
+ if (connect(fd, (struct sockaddr *)&empty, addrlen) < 0)
+ xerror("can't disconnect: %d", errno);
+}
+
int main_loop(void)
{
- int fd;
+ int fd = 0, ret, fd_in = 0;
+ struct addrinfo *peer;
+ struct wstate winfo;
- /* listener is ready. */
- fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto);
+ if (cfg_input && cfg_sockopt_types.mptfo) {
+ fd_in = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s:%d", cfg_input, errno);
+ }
+
+ memset(&winfo, 0, sizeof(winfo));
+ fd = sock_connect_mptcp(cfg_host, cfg_port, cfg_sock_proto, &peer, fd_in, &winfo);
if (fd < 0)
return 2;
+again:
check_getpeername_connect(fd);
+ SOCK_TEST_TCPULP(fd, cfg_sock_proto);
+
if (cfg_rcvbuf)
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
- return copyfd_io(0, fd, 1);
+ if (cfg_input && !cfg_sockopt_types.mptfo) {
+ fd_in = open(cfg_input, O_RDONLY);
+ if (fd < 0)
+ xerror("can't open %s:%d", cfg_input, errno);
+ }
+
+ ret = copyfd_io(fd_in, fd, 1, 0, &winfo);
+ if (ret)
+ return ret;
+
+ if (cfg_truncate > 0) {
+ xdisconnect(fd, peer->ai_addrlen);
+ } else if (--cfg_repeat > 0) {
+ xdisconnect(fd, peer->ai_addrlen);
+
+ /* the socket could be unblocking at this point, we need the
+ * connect to be blocking
+ */
+ set_nonblock(fd, false);
+ if (connect(fd, peer->ai_addr, peer->ai_addrlen))
+ xerror("can't reconnect: %d", errno);
+ if (cfg_input)
+ close(fd_in);
+ memset(&winfo, 0, sizeof(winfo));
+ goto again;
+ } else {
+ close(fd);
+ }
+
+ return 0;
}
int parse_proto(const char *proto)
@@ -785,6 +1337,26 @@ int parse_mode(const char *mode)
return 0;
}
+int parse_peek(const char *mode)
+{
+ if (!strcasecmp(mode, "saveWithPeek"))
+ return CFG_WITH_PEEK;
+ if (!strcasecmp(mode, "saveAfterPeek"))
+ return CFG_AFTER_PEEK;
+
+ fprintf(stderr, "Unknown: %s\n", mode);
+ fprintf(stderr, "Supported MSG_PEEK mode are:\n");
+ fprintf(stderr,
+ "\t\t\"saveWithPeek\" - recv data with flags 'MSG_PEEK' and save the peek data into file\n");
+ fprintf(stderr,
+ "\t\t\"saveAfterPeek\" - read and save data into file after recv with flags 'MSG_PEEK'\n");
+
+ die_usage();
+
+ /* silence compiler warning */
+ return 0;
+}
+
static int parse_int(const char *size)
{
unsigned long s;
@@ -812,12 +1384,37 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) {
+ while ((c = getopt(argc, argv, "6c:f:hi:I:jlm:M:o:p:P:r:R:s:S:t:T:w:")) != -1) {
switch (c) {
+ case 'f':
+ cfg_truncate = atoi(optarg);
+
+ /* when receiving a fastclose, ignore PIPE signals and
+ * all the I/O errors later in the code
+ */
+ if (cfg_truncate < 0) {
+ cfg_rcv_trunc = true;
+ signal(SIGPIPE, handle_signal);
+ }
+ break;
case 'j':
cfg_join = true;
cfg_mode = CFG_MODE_POLL;
break;
+ case 'r':
+ cfg_remove = true;
+ cfg_mode = CFG_MODE_POLL;
+ cfg_wait = 400000;
+ cfg_do_w = atoi(optarg);
+ if (cfg_do_w <= 0)
+ cfg_do_w = 50;
+ break;
+ case 'i':
+ cfg_input = optarg;
+ break;
+ case 'I':
+ cfg_repeat = atoi(optarg);
+ break;
case 'l':
listen_mode = true;
break;
@@ -830,9 +1427,6 @@ static void parse_opts(int argc, char **argv)
case 'h':
die_usage();
break;
- case 'u':
- tcpulp_audit = true;
- break;
case '6':
pf = AF_INET6;
break;
@@ -841,6 +1435,9 @@ static void parse_opts(int argc, char **argv)
if (poll_timeout <= 0)
poll_timeout = -1;
break;
+ case 'T':
+ cfg_time = atoi(optarg);
+ break;
case 'm':
cfg_mode = parse_mode(optarg);
break;
@@ -850,6 +1447,21 @@ static void parse_opts(int argc, char **argv)
case 'R':
cfg_rcvbuf = parse_int(optarg);
break;
+ case 'w':
+ cfg_wait = atoi(optarg)*1000000;
+ break;
+ case 'M':
+ cfg_mark = strtol(optarg, NULL, 0);
+ break;
+ case 'P':
+ cfg_peek = parse_peek(optarg);
+ break;
+ case 'c':
+ parse_cmsg_types(optarg);
+ break;
+ case 'o':
+ parse_setsock_options(optarg);
+ break;
}
}
@@ -865,11 +1477,9 @@ int main(int argc, char *argv[])
{
init_rng();
+ signal(SIGUSR1, handle_signal);
parse_opts(argc, argv);
- if (tcpulp_audit)
- return sock_test_tcpulp(cfg_host, cfg_port) ? 0 : 1;
-
if (listen_mode) {
int fd = sock_listen_mptcp(cfg_host, cfg_port);
@@ -880,6 +1490,10 @@ int main(int argc, char *argv[])
set_rcvbuf(fd, cfg_rcvbuf);
if (cfg_sndbuf)
set_sndbuf(fd, cfg_sndbuf);
+ if (cfg_mark)
+ set_mark(fd, cfg_mark);
+ if (cfg_cmsg_types.cmsg_enabled)
+ apply_cmsg_types(fd, &cfg_cmsg_types);
return main_loop_s(fd);
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index acf02e156d20..4c4248554826 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -1,26 +1,39 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
time_start=$(date +%s)
-optstring="S:R:d:e:l:r:h4cm:"
+optstring="S:R:d:e:l:r:h4cm:f:tC"
ret=0
+final_ret=0
sin=""
sout=""
+cin_disconnect=""
cin=""
cout=""
-ksft_skip=4
capture=false
-timeout=30
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
ipv6=true
ethtool_random_on=true
-tc_delay="$((RANDOM%400))"
+tc_delay="$((RANDOM%50))"
tc_loss=$((RANDOM%101))
-tc_reorder=""
testmode=""
sndbuf=0
rcvbuf=0
options_log=true
+do_tcp=0
+checksum=false
+filesize=0
+connect_per_transfer=1
+port=$((10000 - 1))
if [ $tc_loss -eq 100 ];then
tc_loss=1%
@@ -40,23 +53,26 @@ usage() {
echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)"
echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)"
echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-f: size of file to transfer in bytes (default random)"
echo -e "\t-S: set sndbuf value (default: use kernel default)"
echo -e "\t-R: set rcvbuf value (default: use kernel default)"
echo -e "\t-m: test mode (poll, sendfile; default: poll)"
+ echo -e "\t-t: also run tests with TCP (use twice to non-fallback tcp)"
+ echo -e "\t-C: enable the MPTCP data checksum"
}
while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"d")
if [ $OPTARG -ge 0 ];then
tc_delay="$OPTARG"
else
echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"e")
@@ -80,7 +96,7 @@ while getopts "$optstring" option;do
sndbuf="$OPTARG"
else
echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"R")
@@ -88,57 +104,61 @@ while getopts "$optstring" option;do
rcvbuf="$OPTARG"
else
echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2
- exit 1
+ exit ${KSFT_FAIL}
fi
;;
"m")
testmode="$OPTARG"
;;
+ "f")
+ filesize="$OPTARG"
+ ;;
+ "t")
+ do_tcp=$((do_tcp+1))
+ ;;
+ "C")
+ checksum=true
+ ;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
-ns2="ns2-$rndh"
-ns3="ns3-$rndh"
-ns4="ns4-$rndh"
+ns1=""
+ns2=""
+ns3=""
+ns4=""
-TEST_COUNT=0
+TEST_GROUP=""
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
+ rm -f "$cin_disconnect" "$cout_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
- local netns
- for netns in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}"
}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+mptcp_lib_check_tools ip
sin=$(mktemp)
sout=$(mktemp)
cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
+cin_disconnect="$cin".disconnect
+cout_disconnect="$cout".disconnect
trap cleanup EXIT
-for i in "$ns1" "$ns2" "$ns3" "$ns4";do
- ip netns add $i || exit $ksft_skip
- ip -net $i link set lo up
-done
+mptcp_lib_ns_init ns1 ns2 ns3 ns4
# "$ns1" ns2 ns3 ns4
# ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3
@@ -186,13 +206,20 @@ ip -net "$ns4" link set ns4eth3 up
ip -net "$ns4" route add default via 10.0.3.2
ip -net "$ns4" route add default via dead:beef:3::2
+if $checksum; then
+ for i in "$ns1" "$ns2" "$ns3" "$ns4";do
+ ip netns exec $i sysctl -q net.mptcp.checksum_enabled=1
+ done
+fi
+
set_ethtool_flags() {
local ns="$1"
local dev="$2"
local flags="$3"
- ip netns exec $ns ethtool -K $dev $flags 2>/dev/null
- [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags"
+ if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then
+ mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags"
+ fi
}
set_random_ethtool_flags() {
@@ -220,100 +247,62 @@ else
set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args"
fi
-print_file_err()
-{
- ls -l "$1" 1>&2
- echo "Trailing bytes are: "
- tail -c 27 "$1"
-}
-
-check_transfer()
-{
- local in=$1
- local out=$2
- local what=$3
-
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
- echo "[ FAIL ] $what does not match (in, out):"
- print_file_err "$in"
- print_file_err "$out"
-
- return 1
- fi
-
- return 0
+print_larger_title() {
+ # here we don't have the time, a bit longer for the alignment
+ MPTCP_LIB_TEST_FORMAT="%02u %-69s" \
+ mptcp_lib_print_title "${@}"
}
check_mptcp_disabled()
{
local disabled_ns
- disabled_ns="ns_disabled-$sech-$(mktemp -u XXXXXX)"
- ip netns add ${disabled_ns} || exit $ksft_skip
+ mptcp_lib_ns_init disabled_ns
+ print_larger_title "New MPTCP socket can be blocked via sysctl"
# net.mptcp.enabled should be enabled by default
if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then
- echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]"
- ret=1
+ mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default"
+ mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default"
+ ret=${KSFT_FAIL}
return 1
fi
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
local err=0
- LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -t $timeout -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
+ LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
- ip netns delete ${disabled_ns}
+ mptcp_lib_ns_exit "${disabled_ns}"
if [ ${err} -eq 0 ]; then
- echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]"
- ret=1
+ mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl"
+ mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl"
+ ret=${KSFT_FAIL}
return 1
fi
- echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]"
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl"
return 0
}
-check_mptcp_ulp_setsockopt()
-{
- local t retval
- t="ns_ulp-$sech-$(mktemp -u XXXXXX)"
-
- ip netns add ${t} || exit $ksft_skip
- if ! ip netns exec ${t} ./mptcp_connect -u -p 10000 -s TCP 127.0.0.1 2>&1; then
- printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) allowed\t[ FAIL ]\n"
- retval=1
- ret=$retval
- else
- printf "setsockopt(..., TCP_ULP, \"mptcp\", ...) blocked\t[ OK ]\n"
- retval=0
- fi
- ip netns del ${t}
- return $retval
-}
-
-# $1: IP address
-is_v6()
-{
- [ -z "${1##*:*}" ]
-}
-
do_ping()
{
local listener_ns="$1"
local connector_ns="$2"
local connect_addr="$3"
local ping_args="-q -c 1"
+ local rc=0
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
$ipv6 || return 0
ping_args="${ping_args} -6"
fi
- ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
- echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
+ ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1
+
+ if [ $rc -ne 0 ] ; then
+ mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity"
+ ret=${KSFT_FAIL}
return 1
fi
@@ -321,23 +310,6 @@ do_ping()
return 0
}
-# $1: ns, $2: port
-wait_local_port_listen()
-{
- local listener_ns="${1}"
- local port="${2}"
-
- local port_hex i
-
- port_hex="$(printf "%04X" "${port}")"
- for i in $(seq 10); do
- ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
- awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" &&
- break
- sleep 0.1
- done
-}
-
do_transfer()
{
local listener_ns="$1"
@@ -346,28 +318,26 @@ do_transfer()
local srv_proto="$4"
local connect_addr="$5"
local local_addr="$6"
- local extra_args=""
+ local extra_args="$7"
- local port
- port=$((10000+$TEST_COUNT))
- TEST_COUNT=$((TEST_COUNT+1))
+ port=$((port + 1))
if [ "$rcvbuf" -gt 0 ]; then
- extra_args="$extra_args -R $rcvbuf"
+ extra_args+=" -R $rcvbuf"
fi
if [ "$sndbuf" -gt 0 ]; then
- extra_args="$extra_args -S $sndbuf"
+ extra_args+=" -S $sndbuf"
fi
if [ -n "$testmode" ]; then
- extra_args="$extra_args -m $testmode"
+ extra_args+=" -m $testmode"
fi
if [ -n "$extra_args" ] && $options_log; then
- options_log=false
- echo "INFO: extra options: $extra_args"
+ mptcp_lib_pr_info "extra options: $extra_args"
fi
+ options_log=false
:> "$cout"
:> "$sout"
@@ -375,32 +345,65 @@ do_transfer()
local addr_port
addr_port=$(printf "%s:%d" ${connect_addr} ${port})
- printf "%.3s %-5s -> %.3s (%-20s) %-5s\t" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto}
+ local result_msg
+ result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})"
+ mptcp_lib_print_title "${result_msg}"
if $capture; then
local capuser
+ local rndh="${connector_ns:4}"
if [ -z $SUDO_USER ] ; then
capuser=""
else
capuser="-Z $SUDO_USER"
fi
- local capfile="${listener_ns}-${connector_ns}-${cl_proto}-${srv_proto}-${connect_addr}.pcap"
+ local capfile="${rndh}-${connector_ns:0:3}-${listener_ns:0:3}-${cl_proto}-${srv_proto}-${connect_addr}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
- ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
- local cappid=$!
+ ip netns exec ${listener_ns} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${connector_ns} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
sleep 1
fi
- ip netns exec ${listener_ns} ./mptcp_connect -t $timeout -l -p $port -s ${srv_proto} $extra_args $local_addr < "$sin" > "$sout" &
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+ fi
+
+ local stat_synrx_last_l
+ local stat_ackrx_last_l
+ local stat_cookietx_last
+ local stat_cookierx_last
+ local stat_csum_err_s
+ local stat_csum_err_c
+ stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_args $local_addr < "$sin" > "$sout" &
local spid=$!
- wait_local_port_listen "${listener_ns}" "${port}"
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
local start
start=$(date +%s%3N)
- ip netns exec ${connector_ns} ./mptcp_connect -t $timeout -p $port -s ${cl_proto} $extra_args $connect_addr < "$cin" > "$cout" &
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_args $connect_addr < "$cin" > "$cout" &
local cpid=$!
wait $cpid
@@ -413,56 +416,159 @@ do_transfer()
if $capture; then
sleep 1
- kill $cappid
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat | grep Tcp > /tmp/${listener_ns}.out
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat | grep Tcp > /tmp/${connector_ns}.out
fi
local duration
duration=$((stop-start))
- duration=$(printf "(duration %05sms)" $duration)
+ result_msg+=" # time=${duration}ms"
+ printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo "$duration [ FAIL ] client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
- ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
- ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
-
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+ cat /tmp/${listener_ns}.out
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
+
+ echo
cat "$capout"
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
return 1
fi
- check_transfer $sin $cout "file received by client"
+ mptcp_lib_check_transfer $sin $cout "file received by client"
retc=$?
- check_transfer $cin $sout "file received by server"
+ mptcp_lib_check_transfer $cin $sout "file received by server"
rets=$?
- if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
- echo "$duration [ OK ]"
- cat "$capout"
- return 0
+ local extra=""
+ local stat_synrx_now_l
+ local stat_ackrx_now_l
+ local stat_cookietx_now
+ local stat_cookierx_now
+ local stat_ooo_now
+ stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX")
+ stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX")
+ stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent")
+ stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv")
+ stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue")
+
+ expect_synrx=$((stat_synrx_last_l))
+ expect_ackrx=$((stat_ackrx_last_l))
+
+ cookies=$(ip netns exec ${listener_ns} sysctl net.ipv4.tcp_syncookies)
+ cookies=${cookies##*=}
+
+ if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then
+ expect_synrx=$((stat_synrx_last_l+connect_per_transfer))
+ expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer))
+ fi
+
+ if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then
+ mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \
+ "than expected (${expect_synrx})"
+ retc=1
+ fi
+ if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then
+ if [ ${stat_ooo_now} -eq 0 ]; then
+ mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \
+ "than expected (${expect_ackrx})"
+ rets=1
+ else
+ extra+=" [ Note ] fallback due to TCP OoO"
+ fi
+ fi
+
+ if $checksum; then
+ local csum_err_s
+ local csum_err_c
+ csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr")
+ csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr")
+
+ local csum_err_s_nr=$((csum_err_s - stat_csum_err_s))
+ if [ $csum_err_s_nr -gt 0 ]; then
+ mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]"
+ rets=1
+ fi
+
+ local csum_err_c_nr=$((csum_err_c - stat_csum_err_c))
+ if [ $csum_err_c_nr -gt 0 ]; then
+ mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]"
+ retc=1
+ fi
+ fi
+
+ if [ $cookies -eq 2 ];then
+ if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then
+ extra+=" WARN: CookieSent: did not advance"
+ fi
+ if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then
+ extra+=" WARN: CookieRecv: did not advance"
+ fi
+ else
+ if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then
+ extra+=" WARN: CookieSent: changed"
+ fi
+ if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then
+ extra+=" WARN: CookieRecv: changed"
+ fi
+ fi
+
+ if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then
+ extra+=" WARN: SYNRX: expect ${expect_synrx},"
+ extra+=" got ${stat_synrx_now_l} (probably retransmissions)"
+ fi
+ if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then
+ extra+=" WARN: ACKRX: expect ${expect_ackrx},"
+ extra+=" got ${stat_ackrx_now_l} (probably retransmissions)"
+ fi
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then
+ mptcp_lib_pr_ok "${extra:1}"
+ mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}"
+ else
+ if [ -n "${extra}" ]; then
+ mptcp_lib_print_warn "${extra:1}"
+ fi
+ mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}"
fi
cat "$capout"
- return 1
+ [ $retc -eq 0 ] && [ $rets -eq 0 ]
}
make_file()
{
local name=$1
local who=$2
+ local SIZE=$filesize
+ local ksize
+ local rem
+
+ if [ $SIZE -eq 0 ]; then
+ local MAXSIZE=$((1024 * 1024 * 8))
+ local MINSIZE=$((1024 * 256))
- local SIZE TSIZE
- SIZE=$((RANDOM % (1024 * 8)))
- TSIZE=$((SIZE * 1024))
+ SIZE=$(((RANDOM * RANDOM + MINSIZE) % MAXSIZE))
+ fi
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
+ ksize=$((SIZE / 1024))
+ rem=$((SIZE - (ksize * 1024)))
- SIZE=$((RANDOM % 1024))
- SIZE=$((SIZE + 128))
- TSIZE=$((TSIZE + SIZE))
- dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+ mptcp_lib_make_file $name 1024 $ksize
+ dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
- echo "Created $name (size $TSIZE) containing data sent by $who"
+ echo "Created $name (size $(du -b "$name")) containing data sent by $who"
}
run_tests_lo()
@@ -471,6 +577,7 @@ run_tests_lo()
local connector_ns="$2"
local connect_addr="$3"
local loopback="$4"
+ local extra_args="$5"
local lret=0
# skip if test programs are running inside same netns for subsequent runs.
@@ -479,43 +586,58 @@ run_tests_lo()
fi
# skip if we don't want v6
- if ! $ipv6 && is_v6 "${connect_addr}"; then
+ if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
return 0
fi
local local_addr
- if is_v6 "${connect_addr}"; then
+ if mptcp_lib_is_v6 "${connect_addr}"; then
local_addr="::"
else
local_addr="0.0.0.0"
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return 1
fi
- # don't bother testing fallback tcp except for loopback case.
- if [ ${listener_ns} != ${connector_ns} ]; then
- return 0
+ if [ $do_tcp -eq 0 ]; then
+ # don't bother testing fallback tcp except for loopback case.
+ if [ ${listener_ns} != ${connector_ns} ]; then
+ return 0
+ fi
fi
- do_transfer ${listener_ns} ${connector_ns} MPTCP TCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} MPTCP TCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return 1
fi
- do_transfer ${listener_ns} ${connector_ns} TCP MPTCP ${connect_addr} ${local_addr}
+ do_transfer ${listener_ns} ${connector_ns} TCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
lret=$?
if [ $lret -ne 0 ]; then
ret=$lret
return 1
fi
+ if [ $do_tcp -gt 1 ] ;then
+ do_transfer ${listener_ns} ${connector_ns} TCP TCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return 1
+ fi
+ fi
+
return 0
}
@@ -524,14 +646,204 @@ run_tests()
run_tests_lo $1 $2 $3 0
}
+run_test_transparent()
+{
+ local connect_addr="$1"
+ local msg="$2"
+
+ local connector_ns="$ns1"
+ local listener_ns="$ns2"
+ local lret=0
+ local r6flag=""
+
+ TEST_GROUP="${msg}"
+
+ # skip if we don't want v6
+ if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then
+ return 0
+ fi
+
+ # IP(V6)_TRANSPARENT has been added after TOS support which came with
+ # the required infrastructure in MPTCP sockopt code. To support TOS, the
+ # following function has been exported (T). Not great but better than
+ # checking for a specific kernel version.
+ if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then
+ mptcp_lib_pr_skip "${msg} not supported by the kernel"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF"
+flush ruleset
+table inet mangle {
+ chain divert {
+ type filter hook prerouting priority -150;
+
+ meta l4proto tcp socket transparent 1 meta mark set 1 accept
+ tcp dport 20000 tproxy to :20000 meta mark set 1 accept
+ }
+}
+EOF
+ then
+ mptcp_lib_pr_skip "$msg, could not load nft ruleset"
+ mptcp_lib_fail_if_expected_feature "nft rules"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ local local_addr
+ if mptcp_lib_is_v6 "${connect_addr}"; then
+ local_addr="::"
+ r6flag="-6"
+ else
+ local_addr="0.0.0.0"
+ fi
+
+ if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ mptcp_lib_pr_skip "$msg, ip $r6flag rule failed"
+ mptcp_lib_fail_if_expected_feature "ip rule"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed"
+ mptcp_lib_fail_if_expected_feature "ip route"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ mptcp_lib_pr_info "test $msg"
+
+ port=$((20000 - 1))
+ local extra_args="-o TRANSPARENT"
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \
+ ${connect_addr} ${local_addr} "${extra_args}"
+ lret=$?
+
+ ip netns exec "$listener_ns" nft flush ruleset
+ ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100
+ ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100
+
+ if [ $lret -ne 0 ]; then
+ mptcp_lib_pr_fail "$msg, mptcp connection error"
+ ret=$lret
+ return 1
+ fi
+
+ mptcp_lib_pr_info "$msg pass"
+ return 0
+}
+
+run_tests_peekmode()
+{
+ local peekmode="$1"
+
+ TEST_GROUP="peek mode: ${peekmode}"
+ mptcp_lib_pr_info "with peek mode: ${peekmode}"
+ run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}"
+ run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}"
+}
+
+run_tests_mptfo()
+{
+ TEST_GROUP="MPTFO"
+
+ if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then
+ mptcp_lib_pr_skip "TFO not supported by the kernel"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ mptcp_lib_pr_info "with MPTFO start"
+ ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1
+
+ run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+ run_tests_lo "$ns1" "$ns2" 10.0.1.1 0 "-o MPTFO"
+
+ run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+ run_tests_lo "$ns1" "$ns2" dead:beef:1::1 0 "-o MPTFO"
+
+ ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0
+ mptcp_lib_pr_info "with MPTFO end"
+}
+
+run_tests_disconnect()
+{
+ local old_cin=$cin
+ local old_sin=$sin
+
+ TEST_GROUP="full disconnect"
+
+ if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then
+ mptcp_lib_pr_skip "Full disconnect not supported"
+ mptcp_lib_result_skip "${TEST_GROUP}"
+ return
+ fi
+
+ cat $cin $cin $cin > "$cin".disconnect
+
+ # force do_transfer to cope with the multiple transmissions
+ sin="$cin.disconnect"
+ cin="$cin.disconnect"
+ cin_disconnect="$old_cin"
+ connect_per_transfer=3
+
+ mptcp_lib_pr_info "disconnect"
+ run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin"
+ run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin"
+
+ # restore previous status
+ sin=$old_sin
+ cin=$old_cin
+ cin_disconnect="$cin".disconnect
+ connect_per_transfer=1
+}
+
+display_time()
+{
+ time_end=$(date +%s)
+ time_run=$((time_end-time_start))
+
+ echo "Time: ${time_run} seconds"
+}
+
+log_if_error()
+{
+ local msg="$1"
+
+ if [ ${ret} -ne 0 ]; then
+ mptcp_lib_pr_fail "${msg}"
+
+ final_ret=${ret}
+ ret=${KSFT_PASS}
+
+ return ${final_ret}
+ fi
+}
+
+stop_if_error()
+{
+ if ! log_if_error "${@}"; then
+ display_time
+ mptcp_lib_result_print_all_tap
+ exit ${final_ret}
+ fi
+}
+
make_file "$cin" "client"
make_file "$sin" "server"
check_mptcp_disabled
-check_mptcp_ulp_setsockopt
+stop_if_error "The kernel configuration is not valid for MPTCP"
-echo "INFO: validating network environment with pings"
+print_larger_title "Validating network environment with pings"
for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns1" $sender 10.0.1.1
do_ping "$ns1" $sender dead:beef:1::1
@@ -550,43 +862,59 @@ for sender in "$ns1" "$ns2" "$ns3" "$ns4";do
do_ping "$ns4" $sender dead:beef:3::1
done
-[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss
-echo -n "INFO: Using loss of $tc_loss "
-test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms "
+mptcp_lib_result_code "${ret}" "ping tests"
+
+stop_if_error "Could not even run ping tests"
+mptcp_lib_pr_ok
+
+[ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms
+tc_info="loss of $tc_loss "
+test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms "
+
+reorder_delay=$((tc_delay / 4))
if [ -z "${tc_reorder}" ]; then
reorder1=$((RANDOM%10))
reorder1=$((100 - reorder1))
reorder2=$((RANDOM%100))
- if [ $tc_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
+ if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then
tc_reorder="reorder ${reorder1}% ${reorder2}%"
- echo -n "$tc_reorder "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
elif [ "$tc_reorder" = "0" ];then
tc_reorder=""
-elif [ "$tc_delay" -gt 0 ];then
+elif [ "$reorder_delay" -gt 0 ];then
# reordering requires some delay
tc_reorder="reorder $tc_reorder"
- echo -n "$tc_reorder "
+ tc_info+="$tc_reorder with delay ${reorder_delay}ms "
fi
-echo "on ns3eth4"
+mptcp_lib_pr_info "Using ${tc_info}on ns3eth4"
+
+tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder
-tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${tc_delay}ms $tc_reorder
+TEST_GROUP="loopback v4"
+run_tests_lo "$ns1" "$ns1" 10.0.1.1 1
+stop_if_error "Could not even run loopback test"
+TEST_GROUP="loopback v6"
+run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1
+stop_if_error "Could not even run loopback v6 test"
+
+TEST_GROUP="multihosts"
for sender in $ns1 $ns2 $ns3 $ns4;do
- run_tests_lo "$ns1" "$sender" 10.0.1.1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback test" 1>&2
- exit $ret
- fi
- run_tests_lo "$ns1" $sender dead:beef:1::1 1
- if [ $ret -ne 0 ] ;then
- echo "FAIL: Could not even run loopback v6 test" 2>&1
- exit $ret
+ # ns1<->ns2 is not subject to reordering/tc delays. Use it to test
+ # mptcp syncookie support.
+ if [ $sender = $ns1 ]; then
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=2
+ else
+ ip netns exec "$ns2" sysctl -q net.ipv4.tcp_syncookies=1
fi
+ run_tests "$ns1" $sender 10.0.1.1
+ run_tests "$ns1" $sender dead:beef:1::1
+
run_tests "$ns2" $sender 10.0.1.2
run_tests "$ns2" $sender dead:beef:1::2
run_tests "$ns2" $sender 10.0.2.1
@@ -599,11 +927,26 @@ for sender in $ns1 $ns2 $ns3 $ns4;do
run_tests "$ns4" $sender 10.0.3.1
run_tests "$ns4" $sender dead:beef:3::1
+
+ log_if_error "Tests with $sender as a sender have failed"
done
-time_end=$(date +%s)
-time_run=$((time_end-time_start))
+run_tests_peekmode "saveWithPeek"
+run_tests_peekmode "saveAfterPeek"
+log_if_error "Tests with peek mode have failed"
+
+# MPTFO (MultiPath TCP Fatopen tests)
+run_tests_mptfo
+log_if_error "Tests with MPTFO have failed"
+
+# connect to ns4 ip address, ns2 should intercept/proxy
+run_test_transparent 10.0.3.1 "tproxy ipv4"
+run_test_transparent dead:beef:3::1 "tproxy ipv6"
+log_if_error "Tests with tproxy have failed"
-echo "Time: ${time_run} seconds"
+run_tests_disconnect
+log_if_error "Tests of the full disconnection have failed"
-exit $ret
+display_time
+mptcp_lib_result_print_all_tap
+exit ${final_ret}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
new file mode 100644
index 000000000000..218aac467321
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <unistd.h>
+#include <time.h>
+
+#include <sys/ioctl.h>
+#include <sys/random.h>
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+#include <linux/sockios.h>
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+static int pf = AF_INET;
+static int proto_tx = IPPROTO_MPTCP;
+static int proto_rx = IPPROTO_MPTCP;
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage: mptcp_inq [-6] [ -t tcp|mptcp ] [ -r tcp|mptcp]\n");
+ exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+ if (err == EAI_SYSTEM)
+ return strerror(errno);
+
+ return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+ const struct addrinfo *hints,
+ struct addrinfo **res)
+{
+ int err = getaddrinfo(node, service, hints, res);
+
+ if (err) {
+ const char *errstr = getxinfo_strerr(err);
+
+ fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+ node ? node : "", service ? service : "", errstr);
+ exit(1);
+ }
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+ const char * const port)
+{
+ int sock = -1;
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_PASSIVE | AI_NUMERICHOST
+ };
+
+ hints.ai_family = pf;
+
+ struct addrinfo *a, *addr;
+ int one = 1;
+
+ xgetaddrinfo(listenaddr, port, &hints, &addr);
+ hints.ai_family = pf;
+
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto_rx);
+ if (sock < 0)
+ continue;
+
+ if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one)))
+ perror("setsockopt");
+
+ if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ perror("bind");
+ close(sock);
+ sock = -1;
+ }
+
+ freeaddrinfo(addr);
+
+ if (sock < 0)
+ xerror("could not create listen socket");
+
+ if (listen(sock, 20))
+ die_perror("listen");
+
+ return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+ const char * const port, int proto)
+{
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *a, *addr;
+ int sock = -1;
+
+ hints.ai_family = pf;
+
+ xgetaddrinfo(remoteaddr, port, &hints, &addr);
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto);
+ if (sock < 0)
+ continue;
+
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ die_perror("connect");
+ }
+
+ if (sock < 0)
+ xerror("could not create connect socket");
+
+ freeaddrinfo(addr);
+ return sock;
+}
+
+static int protostr_to_num(const char *s)
+{
+ if (strcasecmp(s, "tcp") == 0)
+ return IPPROTO_TCP;
+ if (strcasecmp(s, "mptcp") == 0)
+ return IPPROTO_MPTCP;
+
+ die_usage(1);
+ return 0;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "h6t:r:")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case '6':
+ pf = AF_INET6;
+ break;
+ case 't':
+ proto_tx = protostr_to_num(optarg);
+ break;
+ case 'r':
+ proto_rx = protostr_to_num(optarg);
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+/* wait up to timeout milliseconds */
+static void wait_for_ack(int fd, int timeout, size_t total)
+{
+ int i;
+
+ for (i = 0; i < timeout; i++) {
+ int nsd, ret, queued = -1;
+ struct timespec req;
+
+ ret = ioctl(fd, TIOCOUTQ, &queued);
+ if (ret < 0)
+ die_perror("TIOCOUTQ");
+
+ ret = ioctl(fd, SIOCOUTQNSD, &nsd);
+ if (ret < 0)
+ die_perror("SIOCOUTQNSD");
+
+ if ((size_t)queued > total)
+ xerror("TIOCOUTQ %u, but only %zu expected\n", queued, total);
+ assert(nsd <= queued);
+
+ if (queued == 0)
+ return;
+
+ /* wait for peer to ack rx of all data */
+ req.tv_sec = 0;
+ req.tv_nsec = 1 * 1000 * 1000ul; /* 1ms */
+ nanosleep(&req, NULL);
+ }
+
+ xerror("still tx data queued after %u ms\n", timeout);
+}
+
+static void connect_one_server(int fd, int unixfd)
+{
+ size_t len, i, total, sent;
+ char buf[4096], buf2[4096];
+ ssize_t ret;
+
+ len = rand() % (sizeof(buf) - 1);
+
+ if (len < 128)
+ len = 128;
+
+ for (i = 0; i < len ; i++) {
+ buf[i] = rand() % 26;
+ buf[i] += 'A';
+ }
+
+ buf[i] = '\n';
+
+ /* un-block server */
+ ret = read(unixfd, buf2, 4);
+ assert(ret == 4);
+
+ assert(strncmp(buf2, "xmit", 4) == 0);
+
+ ret = write(unixfd, &len, sizeof(len));
+ assert(ret == (ssize_t)sizeof(len));
+
+ ret = write(fd, buf, len);
+ if (ret < 0)
+ die_perror("write");
+
+ if (ret != (ssize_t)len)
+ xerror("short write");
+
+ ret = read(unixfd, buf2, 4);
+ assert(strncmp(buf2, "huge", 4) == 0);
+
+ total = rand() % (16 * 1024 * 1024);
+ total += (1 * 1024 * 1024);
+ sent = total;
+
+ ret = write(unixfd, &total, sizeof(total));
+ assert(ret == (ssize_t)sizeof(total));
+
+ wait_for_ack(fd, 5000, len);
+
+ while (total > 0) {
+ if (total > sizeof(buf))
+ len = sizeof(buf);
+ else
+ len = total;
+
+ ret = write(fd, buf, len);
+ if (ret < 0)
+ die_perror("write");
+ total -= ret;
+
+ /* we don't have to care about buf content, only
+ * number of total bytes sent
+ */
+ }
+
+ ret = read(unixfd, buf2, 4);
+ assert(ret == 4);
+ assert(strncmp(buf2, "shut", 4) == 0);
+
+ wait_for_ack(fd, 5000, sent);
+
+ ret = write(fd, buf, 1);
+ assert(ret == 1);
+ close(fd);
+ ret = write(unixfd, "closed", 6);
+ assert(ret == 6);
+
+ close(unixfd);
+}
+
+static void get_tcp_inq(struct msghdr *msgh, unsigned int *inqv)
+{
+ struct cmsghdr *cmsg;
+
+ for (cmsg = CMSG_FIRSTHDR(msgh); cmsg ; cmsg = CMSG_NXTHDR(msgh, cmsg)) {
+ if (cmsg->cmsg_level == IPPROTO_TCP && cmsg->cmsg_type == TCP_CM_INQ) {
+ memcpy(inqv, CMSG_DATA(cmsg), sizeof(*inqv));
+ return;
+ }
+ }
+
+ xerror("could not find TCP_CM_INQ cmsg type");
+}
+
+static void process_one_client(int fd, int unixfd)
+{
+ unsigned int tcp_inq;
+ size_t expect_len;
+ char msg_buf[4096];
+ char buf[4096];
+ char tmp[16];
+ struct iovec iov = {
+ .iov_base = buf,
+ .iov_len = 1,
+ };
+ struct msghdr msg = {
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ .msg_control = msg_buf,
+ .msg_controllen = sizeof(msg_buf),
+ };
+ ssize_t ret, tot;
+
+ ret = write(unixfd, "xmit", 4);
+ assert(ret == 4);
+
+ ret = read(unixfd, &expect_len, sizeof(expect_len));
+ assert(ret == (ssize_t)sizeof(expect_len));
+
+ if (expect_len > sizeof(buf))
+ xerror("expect len %zu exceeds buffer size", expect_len);
+
+ for (;;) {
+ struct timespec req;
+ unsigned int queued;
+
+ ret = ioctl(fd, FIONREAD, &queued);
+ if (ret < 0)
+ die_perror("FIONREAD");
+ if (queued > expect_len)
+ xerror("FIONREAD returned %u, but only %zu expected\n",
+ queued, expect_len);
+ if (queued == expect_len)
+ break;
+
+ req.tv_sec = 0;
+ req.tv_nsec = 1000 * 1000ul;
+ nanosleep(&req, NULL);
+ }
+
+ /* read one byte, expect cmsg to return expected - 1 */
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ if (msg.msg_controllen == 0)
+ xerror("msg_controllen is 0");
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ assert((size_t)tcp_inq == (expect_len - 1));
+
+ iov.iov_len = sizeof(buf);
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ /* should have gotten exact remainder of all pending data */
+ assert(ret == (ssize_t)tcp_inq);
+
+ /* should be 0, all drained */
+ get_tcp_inq(&msg, &tcp_inq);
+ assert(tcp_inq == 0);
+
+ /* request a large swath of data. */
+ ret = write(unixfd, "huge", 4);
+ assert(ret == 4);
+
+ ret = read(unixfd, &expect_len, sizeof(expect_len));
+ assert(ret == (ssize_t)sizeof(expect_len));
+
+ /* peer should send us a few mb of data */
+ if (expect_len <= sizeof(buf))
+ xerror("expect len %zu too small\n", expect_len);
+
+ tot = 0;
+ do {
+ iov.iov_len = sizeof(buf);
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ tot += ret;
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ if (tcp_inq > expect_len - tot)
+ xerror("inq %d, remaining %d total_len %d\n",
+ tcp_inq, expect_len - tot, (int)expect_len);
+
+ assert(tcp_inq <= expect_len - tot);
+ } while ((size_t)tot < expect_len);
+
+ ret = write(unixfd, "shut", 4);
+ assert(ret == 4);
+
+ /* wait for hangup. Should have received one more byte of data. */
+ ret = read(unixfd, tmp, sizeof(tmp));
+ assert(ret == 6);
+ assert(strncmp(tmp, "closed", 6) == 0);
+
+ sleep(1);
+
+ iov.iov_len = 1;
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+ assert(ret == 1);
+
+ get_tcp_inq(&msg, &tcp_inq);
+
+ /* tcp_inq should be 1 due to received fin. */
+ assert(tcp_inq == 1);
+
+ iov.iov_len = 1;
+ ret = recvmsg(fd, &msg, 0);
+ if (ret < 0)
+ die_perror("recvmsg");
+
+ /* expect EOF */
+ assert(ret == 0);
+ get_tcp_inq(&msg, &tcp_inq);
+ assert(tcp_inq == 1);
+
+ close(fd);
+}
+
+static int xaccept(int s)
+{
+ int fd = accept(s, NULL, 0);
+
+ if (fd < 0)
+ die_perror("accept");
+
+ return fd;
+}
+
+static int server(int unixfd)
+{
+ int fd = -1, r, on = 1;
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_listen_mptcp("127.0.0.1", "15432");
+ break;
+ case AF_INET6:
+ fd = sock_listen_mptcp("::1", "15432");
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ break;
+ }
+
+ r = write(unixfd, "conn", 4);
+ assert(r == 4);
+
+ alarm(15);
+ r = xaccept(fd);
+
+ if (-1 == setsockopt(r, IPPROTO_TCP, TCP_INQ, &on, sizeof(on)))
+ die_perror("setsockopt");
+
+ process_one_client(r, unixfd);
+
+ return 0;
+}
+
+static int client(int unixfd)
+{
+ int fd = -1;
+
+ alarm(15);
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_connect_mptcp("127.0.0.1", "15432", proto_tx);
+ break;
+ case AF_INET6:
+ fd = sock_connect_mptcp("::1", "15432", proto_tx);
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ }
+
+ connect_one_server(fd, unixfd);
+
+ return 0;
+}
+
+static void init_rng(void)
+{
+ unsigned int foo;
+
+ if (getrandom(&foo, sizeof(foo), 0) == -1) {
+ perror("getrandom");
+ exit(1);
+ }
+
+ srand(foo);
+}
+
+static pid_t xfork(void)
+{
+ pid_t p = fork();
+
+ if (p < 0)
+ die_perror("fork");
+ else if (p == 0)
+ init_rng();
+
+ return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == 0)
+ return 0;
+ fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+ return WEXITSTATUS(wstatus);
+ } else if (WIFSIGNALED(wstatus)) {
+ xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+ } else if (WIFSTOPPED(wstatus)) {
+ xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+ }
+
+ return 111;
+}
+
+int main(int argc, char *argv[])
+{
+ int e1, e2, wstatus;
+ pid_t s, c, ret;
+ int unixfds[2];
+
+ parse_opts(argc, argv);
+
+ e1 = socketpair(AF_UNIX, SOCK_DGRAM, 0, unixfds);
+ if (e1 < 0)
+ die_perror("pipe");
+
+ s = xfork();
+ if (s == 0)
+ return server(unixfds[1]);
+
+ close(unixfds[1]);
+
+ /* wait until server bound a socket */
+ e1 = read(unixfds[0], &e1, 4);
+ assert(e1 == 4);
+
+ c = xfork();
+ if (c == 0)
+ return client(unixfds[0]);
+
+ close(unixfds[0]);
+
+ ret = waitpid(s, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e1 = rcheck(wstatus, "server");
+ ret = waitpid(c, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e2 = rcheck(wstatus, "client");
+
+ return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index dd42c2f692d0..5e9211e89825 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1,41 +1,111 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+# ShellCheck incorrectly believes that most of the code here is unreachable
+# because it's invoked by variable name, see how the "tests" array is used
+#shellcheck disable=SC2317
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
ret=0
sin=""
+sinfail=""
sout=""
cin=""
+cinfail=""
+cinsent=""
+tmpfile=""
cout=""
-ksft_skip=4
-timeout=30
-capture=0
+err=""
+capout=""
+ns1=""
+ns2=""
+iptables="iptables"
+ip6tables="ip6tables"
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+capture=false
+checksum=false
+ip_mptcp=0
+check_invert=0
+validate_checksum=false
+init=0
+evts_ns1=""
+evts_ns2=""
+evts_ns1_pid=0
+evts_ns2_pid=0
+last_test_failed=0
+last_test_skipped=0
+last_test_ignored=1
+
+declare -A all_tests
+declare -a only_tests_ids
+declare -a only_tests_names
+declare -A failed_tests
+MPTCP_LIB_TEST_FORMAT="%03u %s\n"
+TEST_NAME=""
+nr_blank=6
-TEST_COUNT=0
+# These var are used only in some tests, make sure they are not already set
+unset FAILING_LINKS
+unset test_linkfail
+unset addr_nr_ns1
+unset addr_nr_ns2
+unset cestab_ns1
+unset cestab_ns2
+unset sflags
+unset fastclose
+unset fullmesh
+unset speed
-init()
+# generated using "nfbpf_compile '(ip && (ip[54] & 0xf0) == 0x30) ||
+# (ip6 && (ip6[74] & 0xf0) == 0x30)'"
+CBPF_MPTCP_SUBOPTION_ADD_ADDR="14,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 3 64,
+ 48 0 0 54,
+ 84 0 0 240,
+ 21 6 7 48,
+ 48 0 0 0,
+ 84 0 0 240,
+ 21 0 4 96,
+ 48 0 0 74,
+ 84 0 0 240,
+ 21 0 1 48,
+ 6 0 0 65535,
+ 6 0 0 0"
+
+init_partial()
{
capout=$(mktemp)
- rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-
- ns1="ns1-$rndh"
- ns2="ns2-$rndh"
+ mptcp_lib_ns_init ns1 ns2
- for netns in "$ns1" "$ns2";do
- ip netns add $netns || exit $ksft_skip
- ip -net $netns link set lo up
- ip netns exec $netns sysctl -q net.mptcp.enabled=1
- ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0
- ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0
+ local netns
+ for netns in "$ns1" "$ns2"; do
+ ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true
+ if $checksum; then
+ ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1
+ fi
done
- # ns1 ns2
+ check_invert=0
+ validate_checksum=$checksum
+
+ # ns1 ns2
# ns1eth1 ns2eth1
# ns1eth2 ns2eth2
# ns1eth3 ns2eth3
# ns1eth4 ns2eth4
- for i in `seq 1 4`; do
+ local i
+ for i in $(seq 1 4); do
ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
@@ -47,6 +117,16 @@ init()
# let $ns2 reach any $ns1 address from any interface
ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+ ip -net "$ns2" route add default via dead:beef:$i::1 dev ns2eth$i metric 10$i
+ done
+}
+
+init_shapers()
+{
+ local i
+ for i in $(seq 1 4); do
+ tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1
+ tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1
done
}
@@ -54,131 +134,986 @@ cleanup_partial()
{
rm -f "$capout"
- for netns in "$ns1" "$ns2"; do
- ip netns del $netns
- done
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
+}
+
+init() {
+ init=1
+
+ mptcp_lib_check_mptcp
+ mptcp_lib_check_kallsyms
+ mptcp_lib_check_tools ip ss "${iptables}" "${ip6tables}"
+
+ sin=$(mktemp)
+ sout=$(mktemp)
+ cin=$(mktemp)
+ cinsent=$(mktemp)
+ cout=$(mktemp)
+ err=$(mktemp)
+ evts_ns1=$(mktemp)
+ evts_ns2=$(mktemp)
+
+ trap cleanup EXIT
+
+ make_file "$cin" "client" 1 >/dev/null
+ make_file "$sin" "server" 1 >/dev/null
}
cleanup()
{
- rm -f "$cin" "$cout"
- rm -f "$sin" "$sout"
+ rm -f "$cin" "$cout" "$sinfail"
+ rm -f "$sin" "$sout" "$cinsent" "$cinfail"
+ rm -f "$tmpfile"
+ rm -rf $evts_ns1 $evts_ns2
+ rm -f "$err"
cleanup_partial
}
+print_check()
+{
+ printf "%-${nr_blank}s%-36s" " " "${*}"
+}
+
+print_info()
+{
+ # It can be empty, no need to print anything then
+ [ -z "${1}" ] && return
+
+ mptcp_lib_print_info " Info: ${*}"
+}
+
+print_ok()
+{
+ mptcp_lib_pr_ok "${@}"
+}
+
+print_fail()
+{
+ mptcp_lib_pr_fail "${@}"
+}
+
+print_skip()
+{
+ mptcp_lib_pr_skip "${@}"
+}
+
+# [ $1: fail msg ]
+mark_as_skipped()
+{
+ local msg="${1:-"Feature not supported"}"
+
+ mptcp_lib_fail_if_expected_feature "${msg}"
+
+ print_check "${msg}"
+ print_skip
+
+ last_test_skipped=1
+}
+
+# $@: condition
+continue_if()
+{
+ if ! "${@}"; then
+ mark_as_skipped
+ return 1
+ fi
+}
+
+skip_test()
+{
+ if [ "${#only_tests_ids[@]}" -eq 0 ] && [ "${#only_tests_names[@]}" -eq 0 ]; then
+ return 1
+ fi
+
+ local i
+ for i in "${only_tests_ids[@]}"; do
+ if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then
+ return 1
+ fi
+ done
+ for i in "${only_tests_names[@]}"; do
+ if [ "${TEST_NAME}" = "${i}" ]; then
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+append_prev_results()
+{
+ if [ ${last_test_failed} -eq 1 ]; then
+ mptcp_lib_result_fail "${TEST_NAME}"
+ elif [ ${last_test_skipped} -eq 1 ]; then
+ mptcp_lib_result_skip "${TEST_NAME}"
+ elif [ ${last_test_ignored} -ne 1 ]; then
+ mptcp_lib_result_pass "${TEST_NAME}"
+ fi
+
+ last_test_failed=0
+ last_test_skipped=0
+ last_test_ignored=0
+}
+
+# $1: test name
reset()
{
- cleanup_partial
- init
+ append_prev_results
+
+ TEST_NAME="${1}"
+
+ if skip_test; then
+ MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1))
+ last_test_ignored=1
+ return 1
+ fi
+
+ mptcp_lib_print_title "${TEST_NAME}"
+
+ if [ "${init}" != "1" ]; then
+ init
+ else
+ cleanup_partial
+ fi
+
+ init_partial
+
+ return 0
}
-for arg in "$@"; do
- if [ "$arg" = "-c" ]; then
- capture=1
+# $1: test name ; $2: counter to check
+reset_check_counter()
+{
+ reset "${1}" || return 1
+
+ local counter="${2}"
+
+ if ! nstat -asz "${counter}" | grep -wq "${counter}"; then
+ mark_as_skipped "counter '${counter}' is not available"
+ return 1
fi
-done
+}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+# $1: test name
+reset_with_cookies()
+{
+ reset "${1}" || return 1
+
+ local netns
+ for netns in "$ns1" "$ns2"; do
+ ip netns exec $netns sysctl -q net.ipv4.tcp_syncookies=2
+ done
+}
+# $1: test name
+reset_with_add_addr_timeout()
+{
+ local ip="${2:-4}"
+ local tables
-check_transfer()
+ reset "${1}" || return 1
+
+ tables="${iptables}"
+ if [ $ip -eq 6 ]; then
+ tables="${ip6tables}"
+ fi
+
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+
+ if ! ip netns exec $ns2 $tables -A OUTPUT -p tcp \
+ -m tcp --tcp-option 30 \
+ -m bpf --bytecode \
+ "$CBPF_MPTCP_SUBOPTION_ADD_ADDR" \
+ -j DROP; then
+ mark_as_skipped "unable to set the 'add addr' rule"
+ return 1
+ fi
+}
+
+# $1: test name
+reset_with_checksum()
+{
+ local ns1_enable=$1
+ local ns2_enable=$2
+
+ reset "checksum test ${1} ${2}" || return 1
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable
+
+ validate_checksum=true
+}
+
+reset_with_allow_join_id0()
+{
+ local ns1_enable=$2
+ local ns2_enable=$3
+
+ reset "${1}" || return 1
+
+ ip netns exec $ns1 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns1_enable
+ ip netns exec $ns2 sysctl -q net.mptcp.allow_join_initial_addr_port=$ns2_enable
+}
+
+# Modify TCP payload without corrupting the TCP packet
+#
+# This rule inverts a 8-bit word at byte offset 148 for the 2nd TCP ACK packets
+# carrying enough data.
+# Once it is done, the TCP Checksum field is updated so the packet is still
+# considered as valid at the TCP level.
+# Because the MPTCP checksum, covering the TCP options and data, has not been
+# updated, the modification will be detected and an MP_FAIL will be emitted:
+# what we want to validate here without corrupting "random" MPTCP options.
+#
+# To avoid having tc producing this pr_info() message for each TCP ACK packets
+# not carrying enough data:
+#
+# tc action pedit offset 162 out of bounds
+#
+# Netfilter is used to mark packets with enough data.
+setup_fail_rules()
+{
+ check_invert=1
+ validate_checksum=true
+ local i="$1"
+ local ip="${2:-4}"
+ local tables
+
+ tables="${iptables}"
+ if [ $ip -eq 6 ]; then
+ tables="${ip6tables}"
+ fi
+
+ ip netns exec $ns2 $tables \
+ -t mangle \
+ -A OUTPUT \
+ -o ns2eth$i \
+ -p tcp \
+ -m length --length 150:9999 \
+ -m statistic --mode nth --packet 1 --every 99999 \
+ -j MARK --set-mark 42 || return ${KSFT_SKIP}
+
+ tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP}
+ tc -n $ns2 filter add dev ns2eth$i egress \
+ protocol ip prio 1000 \
+ handle 42 fw \
+ action pedit munge offset 148 u8 invert \
+ pipe csum tcp \
+ index 100 || return ${KSFT_SKIP}
+}
+
+reset_with_fail()
{
- in=$1
- out=$2
- what=$3
+ reset_check_counter "${1}" "MPTcpExtInfiniteMapTx" || return 1
+ shift
+
+ ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=1
+ ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=1
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
- echo "[ FAIL ] $what does not match (in, out):"
- print_file_err "$in"
- print_file_err "$out"
+ local rc=0
+ setup_fail_rules "${@}" || rc=$?
+ if [ ${rc} -eq ${KSFT_SKIP} ]; then
+ mark_as_skipped "unable to set the 'fail' rules"
return 1
fi
+}
+
+reset_with_events()
+{
+ reset "${1}" || return 1
+
+ mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid
+ mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid
+}
+
+reset_with_tcp_filter()
+{
+ reset "${1}" || return 1
+ shift
+
+ local ns="${!1}"
+ local src="${2}"
+ local target="${3}"
+
+ if ! ip netns exec "${ns}" ${iptables} \
+ -A INPUT \
+ -s "${src}" \
+ -p tcp \
+ -j "${target}"; then
+ mark_as_skipped "unable to set the filter rules"
+ return 1
+ fi
+}
+
+# $1: err msg
+fail_test()
+{
+ ret=${KSFT_FAIL}
+
+ if [ ${#} -gt 0 ]; then
+ print_fail "${@}"
+ fi
+
+ # just in case a test is marked twice as failed
+ if [ ${last_test_failed} -eq 0 ]; then
+ failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}"
+ dump_stats
+ last_test_failed=1
+ fi
+}
+
+get_failed_tests_ids()
+{
+ # sorted
+ local i
+ for i in "${!failed_tests[@]}"; do
+ echo "${i}"
+ done | sort -n
+}
+
+check_transfer()
+{
+ local in=$1
+ local out=$2
+ local what=$3
+ local bytes=$4
+ local i a b
+
+ local line
+ if [ -n "$bytes" ]; then
+ local out_size
+ # when truncating we must check the size explicitly
+ out_size=$(wc -c $out | awk '{print $1}')
+ if [ $out_size -ne $bytes ]; then
+ fail_test "$what output file has wrong size ($out_size, $bytes)"
+ return 1
+ fi
+
+ # note: BusyBox's "cmp" command doesn't support --bytes
+ tmpfile=$(mktemp)
+ head --bytes="$bytes" "$in" > "$tmpfile"
+ mv "$tmpfile" "$in"
+ head --bytes="$bytes" "$out" > "$tmpfile"
+ mv "$tmpfile" "$out"
+ tmpfile=""
+ fi
+ cmp -l "$in" "$out" | while read -r i a b; do
+ local sum=$((0${a} + 0${b}))
+ if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then
+ fail_test "$what does not match (in, out):"
+ mptcp_lib_print_file_err "$in"
+ mptcp_lib_print_file_err "$out"
+
+ return 1
+ else
+ print_info "$what has inverted byte at ${i}"
+ fi
+ done
return 0
}
do_ping()
{
- listener_ns="$1"
- connector_ns="$2"
- connect_addr="$3"
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+
+ if ! ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null; then
+ fail_test "$listener_ns -> $connect_addr connectivity"
+ fi
+}
+
+link_failure()
+{
+ local ns="$1"
+
+ if [ -z "$FAILING_LINKS" ]; then
+ l=$((RANDOM%4))
+ FAILING_LINKS=$((l+1))
+ fi
+
+ local l
+ for l in $FAILING_LINKS; do
+ local veth="ns1eth$l"
+ ip -net "$ns" link set "$veth" down
+ done
+}
+
+rm_addr_count()
+{
+ mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr"
+}
+
+# $1: ns, $2: old rm_addr counter in $ns
+wait_rm_addr()
+{
+ local ns="${1}"
+ local old_cnt="${2}"
+ local cnt
+
+ local i
+ for i in $(seq 10); do
+ cnt=$(rm_addr_count ${ns})
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
+rm_sf_count()
+{
+ mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow"
+}
+
+# $1: ns, $2: old rm_sf counter in $ns
+wait_rm_sf()
+{
+ local ns="${1}"
+ local old_cnt="${2}"
+ local cnt
- ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null
- if [ $? -ne 0 ] ; then
- echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2
- ret=1
+ local i
+ for i in $(seq 10); do
+ cnt=$(rm_sf_count ${ns})
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
+wait_mpj()
+{
+ local ns="${1}"
+ local cnt old_cnt
+
+ old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+
+ local i
+ for i in $(seq 10); do
+ cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx")
+ [ "$cnt" = "${old_cnt}" ] || break
+ sleep 0.1
+ done
+}
+
+kill_events_pids()
+{
+ mptcp_lib_kill_wait $evts_ns1_pid
+ evts_ns1_pid=0
+ mptcp_lib_kill_wait $evts_ns2_pid
+ evts_ns2_pid=0
+}
+
+pm_nl_set_limits()
+{
+ local ns=$1
+ local addrs=$2
+ local subflows=$3
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows
+ else
+ ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows
+ fi
+}
+
+pm_nl_add_endpoint()
+{
+ local ns=$1
+ local addr=$2
+ local flags _flags
+ local port _port
+ local dev _dev
+ local id _id
+ local nr=2
+
+ local p
+ for p in "${@}"
+ do
+ if [ $p = "flags" ]; then
+ eval _flags=\$"$nr"
+ [ -n "$_flags" ]; flags="flags $_flags"
+ fi
+ if [ $p = "dev" ]; then
+ eval _dev=\$"$nr"
+ [ -n "$_dev" ]; dev="dev $_dev"
+ fi
+ if [ $p = "id" ]; then
+ eval _id=\$"$nr"
+ [ -n "$_id" ]; id="id $_id"
+ fi
+ if [ $p = "port" ]; then
+ eval _port=\$"$nr"
+ [ -n "$_port" ]; port="port $_port"
+ fi
+
+ nr=$((nr + 1))
+ done
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port
+ else
+ ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port
+ fi
+}
+
+pm_nl_del_endpoint()
+{
+ local ns=$1
+ local id=$2
+ local addr=$3
+
+ if [ $ip_mptcp -eq 1 ]; then
+ [ $id -ne 0 ] && addr=''
+ ip -n $ns mptcp endpoint delete id $id $addr
+ else
+ ip netns exec $ns ./pm_nl_ctl del $id $addr
+ fi
+}
+
+pm_nl_flush_endpoint()
+{
+ local ns=$1
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint flush
+ else
+ ip netns exec $ns ./pm_nl_ctl flush
+ fi
+}
+
+pm_nl_show_endpoints()
+{
+ local ns=$1
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint show
+ else
+ ip netns exec $ns ./pm_nl_ctl dump
+ fi
+}
+
+pm_nl_change_endpoint()
+{
+ local ns=$1
+ local id=$2
+ local flags=$3
+
+ if [ $ip_mptcp -eq 1 ]; then
+ ip -n $ns mptcp endpoint change id $id ${flags//","/" "}
+ else
+ ip netns exec $ns ./pm_nl_ctl set id $id flags $flags
+ fi
+}
+
+pm_nl_check_endpoint()
+{
+ local line expected_line
+ local msg="$1"
+ local ns=$2
+ local addr=$3
+ local _flags=""
+ local flags
+ local _port
+ local port
+ local dev
+ local _id
+ local id
+
+ print_check "${msg}"
+
+ shift 3
+ while [ -n "$1" ]; do
+ if [ $1 = "flags" ]; then
+ _flags=$2
+ [ -n "$_flags" ]; flags="flags $_flags"
+ shift
+ elif [ $1 = "dev" ]; then
+ [ -n "$2" ]; dev="dev $1"
+ shift
+ elif [ $1 = "id" ]; then
+ _id=$2
+ [ -n "$_id" ]; id="id $_id"
+ shift
+ elif [ $1 = "port" ]; then
+ _port=$2
+ [ -n "$_port" ]; port=" port $_port"
+ shift
+ fi
+
+ shift
+ done
+
+ if [ -z "$id" ]; then
+ test_fail "bad test - missing endpoint id"
+ return
+ fi
+
+ if [ $ip_mptcp -eq 1 ]; then
+ # get line and trim trailing whitespace
+ line=$(ip -n $ns mptcp endpoint show $id)
+ line="${line% }"
+ # the dump order is: address id flags port dev
+ [ -n "$addr" ] && expected_line="$addr"
+ expected_line+=" $id"
+ [ -n "$_flags" ] && expected_line+=" ${_flags//","/" "}"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$port" ] && expected_line+=" $port"
+ else
+ line=$(ip netns exec $ns ./pm_nl_ctl get $_id)
+ # the dump order is: id flags dev address port
+ expected_line="$id"
+ [ -n "$flags" ] && expected_line+=" $flags"
+ [ -n "$dev" ] && expected_line+=" $dev"
+ [ -n "$addr" ] && expected_line+=" $addr"
+ [ -n "$_port" ] && expected_line+=" $_port"
+ fi
+ if [ "$line" = "$expected_line" ]; then
+ print_ok
+ else
+ fail_test "expected '$expected_line' found '$line'"
+ fi
+}
+
+pm_nl_set_endpoint()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+
+ local addr_nr_ns1=${addr_nr_ns1:-0}
+ local addr_nr_ns2=${addr_nr_ns2:-0}
+ local sflags=${sflags:-""}
+ local fullmesh=${fullmesh:-""}
+
+ local flags="subflow"
+ if [ -n "${fullmesh}" ]; then
+ flags="${flags},fullmesh"
+ addr_nr_ns2=${fullmesh}
+ fi
+
+ # let the mptcp subflow be established in background before
+ # do endpoint manipulation
+ if [ $addr_nr_ns1 != "0" ] || [ $addr_nr_ns2 != "0" ]; then
+ sleep 1
+ fi
+
+ if [ $addr_nr_ns1 -gt 0 ]; then
+ local counter=2
+ local add_nr_ns1=${addr_nr_ns1}
+ local id=10
+ while [ $add_nr_ns1 -gt 0 ]; do
+ local addr
+ if mptcp_lib_is_v6 "${connect_addr}"; then
+ addr="dead:beef:$counter::1"
+ else
+ addr="10.0.$counter.1"
+ fi
+ pm_nl_add_endpoint $ns1 $addr flags signal
+ counter=$((counter + 1))
+ add_nr_ns1=$((add_nr_ns1 - 1))
+ id=$((id + 1))
+ done
+ elif [ $addr_nr_ns1 -lt 0 ]; then
+ local rm_nr_ns1=$((-addr_nr_ns1))
+ if [ $rm_nr_ns1 -lt 8 ]; then
+ local counter=0
+ local line
+ pm_nl_show_endpoints ${listener_ns} | while read -r line; do
+ # shellcheck disable=SC2206 # we do want to split per word
+ local arr=($line)
+ local nr=0
+
+ local i
+ for i in "${arr[@]}"; do
+ if [ $i = "id" ]; then
+ if [ $counter -eq $rm_nr_ns1 ]; then
+ break
+ fi
+ id=${arr[$nr+1]}
+ rm_addr=$(rm_addr_count ${connector_ns})
+ pm_nl_del_endpoint ${listener_ns} $id
+ wait_rm_addr ${connector_ns} ${rm_addr}
+ counter=$((counter + 1))
+ fi
+ nr=$((nr + 1))
+ done
+ done
+ elif [ $rm_nr_ns1 -eq 8 ]; then
+ pm_nl_flush_endpoint ${listener_ns}
+ elif [ $rm_nr_ns1 -eq 9 ]; then
+ pm_nl_del_endpoint ${listener_ns} 0 ${connect_addr}
+ fi
+ fi
+
+ # if newly added endpoints must be deleted, give the background msk
+ # some time to created them
+ [ $addr_nr_ns1 -gt 0 ] && [ $addr_nr_ns2 -lt 0 ] && sleep 1
+
+ if [ $addr_nr_ns2 -gt 0 ]; then
+ local add_nr_ns2=${addr_nr_ns2}
+ local counter=3
+ local id=20
+ while [ $add_nr_ns2 -gt 0 ]; do
+ local addr
+ if mptcp_lib_is_v6 "${connect_addr}"; then
+ addr="dead:beef:$counter::2"
+ else
+ addr="10.0.$counter.2"
+ fi
+ pm_nl_add_endpoint $ns2 $addr flags $flags
+ counter=$((counter + 1))
+ add_nr_ns2=$((add_nr_ns2 - 1))
+ id=$((id + 1))
+ done
+ elif [ $addr_nr_ns2 -lt 0 ]; then
+ local rm_nr_ns2=$((-addr_nr_ns2))
+ if [ $rm_nr_ns2 -lt 8 ]; then
+ local counter=0
+ local line
+ pm_nl_show_endpoints ${connector_ns} | while read -r line; do
+ # shellcheck disable=SC2206 # we do want to split per word
+ local arr=($line)
+ local nr=0
+
+ local i
+ for i in "${arr[@]}"; do
+ if [ $i = "id" ]; then
+ if [ $counter -eq $rm_nr_ns2 ]; then
+ break
+ fi
+ local id rm_addr
+ # rm_addr are serialized, allow the previous one to
+ # complete
+ id=${arr[$nr+1]}
+ rm_addr=$(rm_addr_count ${listener_ns})
+ pm_nl_del_endpoint ${connector_ns} $id
+ wait_rm_addr ${listener_ns} ${rm_addr}
+ counter=$((counter + 1))
+ fi
+ nr=$((nr + 1))
+ done
+ done
+ elif [ $rm_nr_ns2 -eq 8 ]; then
+ pm_nl_flush_endpoint ${connector_ns}
+ elif [ $rm_nr_ns2 -eq 9 ]; then
+ local addr
+ if mptcp_lib_is_v6 "${connect_addr}"; then
+ addr="dead:beef:1::2"
+ else
+ addr="10.0.1.2"
+ fi
+ pm_nl_del_endpoint ${connector_ns} 0 $addr
+ fi
+ fi
+
+ if [ -n "${sflags}" ]; then
+ sleep 1
+
+ local netns
+ for netns in "$ns1" "$ns2"; do
+ local line
+ pm_nl_show_endpoints $netns | while read -r line; do
+ # shellcheck disable=SC2206 # we do want to split per word
+ local arr=($line)
+ local nr=0
+ local id
+
+ local i
+ for i in "${arr[@]}"; do
+ if [ $i = "id" ]; then
+ id=${arr[$nr+1]}
+ fi
+ nr=$((nr + 1))
+ done
+ pm_nl_change_endpoint $netns $id $sflags
+ done
+ done
+ fi
+}
+
+chk_cestab_nr()
+{
+ local ns=$1
+ local cestab=$2
+ local count
+
+ print_check "cestab $cestab"
+ count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$cestab" ]; then
+ fail_test "got $count current establish[s] expected $cestab"
+ else
+ print_ok
+ fi
+}
+
+# $1 namespace 1, $2 namespace 2
+check_cestab()
+{
+ if [ -n "${cestab_ns1}" ]; then
+ chk_cestab_nr ${1} ${cestab_ns1}
+ fi
+ if [ -n "${cestab_ns2}" ]; then
+ chk_cestab_nr ${2} ${cestab_ns2}
fi
}
do_transfer()
{
- listener_ns="$1"
- connector_ns="$2"
- cl_proto="$3"
- srv_proto="$4"
- connect_addr="$5"
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local cl_proto="$3"
+ local srv_proto="$4"
+ local connect_addr="$5"
- port=$((10000+$TEST_COUNT))
- TEST_COUNT=$((TEST_COUNT+1))
+ local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1))
+ local cappid
+ local FAILING_LINKS=${FAILING_LINKS:-""}
+ local fastclose=${fastclose:-""}
+ local speed=${speed:-"fast"}
:> "$cout"
:> "$sout"
:> "$capout"
- if [ $capture -eq 1 ]; then
+ if $capture; then
+ local capuser
if [ -z $SUDO_USER ] ; then
capuser=""
else
capuser="-Z $SUDO_USER"
fi
- capfile="mp_join-${listener_ns}.pcap"
+ capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}")
- echo "Capturing traffic for test $TEST_COUNT into $capfile"
+ echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile"
ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 &
cappid=$!
sleep 1
fi
- ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" &
- spid=$!
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
- sleep 1
+ local extra_args
+ if [ $speed = "fast" ]; then
+ extra_args="-j"
+ elif [ $speed = "slow" ]; then
+ extra_args="-r 50"
+ elif [ $speed -gt 0 ]; then
+ extra_args="-r ${speed}"
+ fi
+
+ local extra_cl_args=""
+ local extra_srv_args=""
+ local trunc_size=""
+ if [ -n "${fastclose}" ]; then
+ if [ ${test_linkfail} -le 1 ]; then
+ fail_test "fastclose tests need test_linkfail argument"
+ return 1
+ fi
+
+ # disconnect
+ trunc_size=${test_linkfail}
+ local side=${fastclose}
+
+ if [ ${side} = "client" ]; then
+ extra_cl_args="-f ${test_linkfail}"
+ extra_srv_args="-f -1"
+ elif [ ${side} = "server" ]; then
+ extra_srv_args="-f ${test_linkfail}"
+ extra_cl_args="-f -1"
+ else
+ fail_test "wrong/unknown fastclose spec ${side}"
+ return 1
+ fi
+ fi
+
+ extra_srv_args="$extra_args $extra_srv_args"
+ if [ "$test_linkfail" -gt 1 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_srv_args "::" < "$sinfail" > "$sout" &
+ else
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ ./mptcp_connect -t ${timeout_poll} -l -p $port -s ${srv_proto} \
+ $extra_srv_args "::" < "$sin" > "$sout" &
+ fi
+ local spid=$!
- ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" &
- cpid=$!
+ mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}"
+
+ extra_cl_args="$extra_args $extra_cl_args"
+ if [ "$test_linkfail" -eq 0 ];then
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr < "$cin" > "$cout" &
+ elif [ "$test_linkfail" -eq 1 ] || [ "$test_linkfail" -eq 2 ];then
+ ( cat "$cinfail" ; sleep 2; link_failure $listener_ns ; cat "$cinfail" ) | \
+ tee "$cinsent" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
+ else
+ tee "$cinsent" < "$cinfail" | \
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ ./mptcp_connect -t ${timeout_poll} -p $port -s ${cl_proto} \
+ $extra_cl_args $connect_addr > "$cout" &
+ fi
+ local cpid=$!
+
+ pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr
+ check_cestab $listener_ns $connector_ns
wait $cpid
- retc=$?
+ local retc=$?
wait $spid
- rets=$?
+ local rets=$?
- if [ $capture -eq 1 ]; then
+ if $capture; then
sleep 1
kill $cappid
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat | grep Tcp > /tmp/${listener_ns}.out
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat | grep Tcp > /tmp/${connector_ns}.out
+
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
- echo " client exit code $retc, server $rets" 1>&2
- echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2
- ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port"
- echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2
- ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port"
+ fail_test "client exit code $retc, server $rets"
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+ cat /tmp/${listener_ns}.out
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ cat /tmp/${connector_ns}.out
cat "$capout"
return 1
fi
- check_transfer $sin $cout "file received by client"
+ if [ "$test_linkfail" -gt 1 ];then
+ check_transfer $sinfail $cout "file received by client" $trunc_size
+ else
+ check_transfer $sin $cout "file received by client" $trunc_size
+ fi
retc=$?
- check_transfer $cin $sout "file received by server"
+ if [ "$test_linkfail" -eq 0 ];then
+ check_transfer $cin $sout "file received by server" $trunc_size
+ else
+ check_transfer $cinsent $sout "file received by server" $trunc_size
+ fi
rets=$?
if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
@@ -192,166 +1127,2618 @@ do_transfer()
make_file()
{
- name=$1
- who=$2
-
- SIZE=1
+ local name=$1
+ local who=$2
+ local size=$3
- dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
- echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name"
+ mptcp_lib_make_file $name 1024 $size
- echo "Created $name (size $SIZE KB) containing data sent by $who"
+ print_info "Test file (size $size KB) for $who"
}
run_tests()
{
- listener_ns="$1"
- connector_ns="$2"
- connect_addr="$3"
- lret=0
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+
+ local size
+ local test_linkfail=${test_linkfail:-0}
+
+ # The values above 2 are reused to make test files
+ # with the given sizes (KB)
+ if [ "$test_linkfail" -gt 2 ]; then
+ size=$test_linkfail
+
+ if [ -z "$cinfail" ]; then
+ cinfail=$(mktemp)
+ fi
+ make_file "$cinfail" "client" $size
+ # create the input file for the failure test when
+ # the first failure test run
+ elif [ "$test_linkfail" -ne 0 ] && [ -z "$cinfail" ]; then
+ # the client file must be considerably larger
+ # of the maximum expected cwin value, or the
+ # link utilization will be not predicable
+ size=$((RANDOM%2))
+ size=$((size+1))
+ size=$((size*8192))
+ size=$((size + ( RANDOM % 8192) ))
+
+ cinfail=$(mktemp)
+ make_file "$cinfail" "client" $size
+ fi
+
+ if [ "$test_linkfail" -gt 2 ]; then
+ size=$test_linkfail
+
+ if [ -z "$sinfail" ]; then
+ sinfail=$(mktemp)
+ fi
+ make_file "$sinfail" "server" $size
+ elif [ "$test_linkfail" -eq 2 ] && [ -z "$sinfail" ]; then
+ size=$((RANDOM%16))
+ size=$((size+1))
+ size=$((size*2048))
+
+ sinfail=$(mktemp)
+ make_file "$sinfail" "server" $size
+ fi
do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
- lret=$?
- if [ $lret -ne 0 ]; then
- ret=$lret
- return
+}
+
+dump_stats()
+{
+ echo Server ns stats
+ ip netns exec $ns1 nstat -as | grep Tcp
+ echo Client ns stats
+ ip netns exec $ns2 nstat -as | grep Tcp
+}
+
+chk_csum_nr()
+{
+ local csum_ns1=${1:-0}
+ local csum_ns2=${2:-0}
+ local count
+ local extra_msg=""
+ local allow_multi_errors_ns1=0
+ local allow_multi_errors_ns2=0
+
+ if [[ "${csum_ns1}" = "+"* ]]; then
+ allow_multi_errors_ns1=1
+ csum_ns1=${csum_ns1:1}
+ fi
+ if [[ "${csum_ns2}" = "+"* ]]; then
+ allow_multi_errors_ns2=1
+ csum_ns2=${csum_ns2:1}
+ fi
+
+ print_check "sum"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr")
+ if [ "$count" != "$csum_ns1" ]; then
+ extra_msg+=" ns1=$count"
+ fi
+ if [ -z "$count" ]; then
+ print_skip
+ elif { [ "$count" != $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 0 ]; } ||
+ { [ "$count" -lt $csum_ns1 ] && [ $allow_multi_errors_ns1 -eq 1 ]; }; then
+ fail_test "got $count data checksum error[s] expected $csum_ns1"
+ else
+ print_ok
+ fi
+ print_check "csum"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr")
+ if [ "$count" != "$csum_ns2" ]; then
+ extra_msg+=" ns2=$count"
+ fi
+ if [ -z "$count" ]; then
+ print_skip
+ elif { [ "$count" != $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 0 ]; } ||
+ { [ "$count" -lt $csum_ns2 ] && [ $allow_multi_errors_ns2 -eq 1 ]; }; then
+ fail_test "got $count data checksum error[s] expected $csum_ns2"
+ else
+ print_ok
+ fi
+
+ print_info "$extra_msg"
+}
+
+chk_fail_nr()
+{
+ local fail_tx=$1
+ local fail_rx=$2
+ local ns_invert=${3:-""}
+ local count
+ local ns_tx=$ns1
+ local ns_rx=$ns2
+ local extra_msg=""
+ local allow_tx_lost=0
+ local allow_rx_lost=0
+
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns2
+ ns_rx=$ns1
+ extra_msg="invert"
+ fi
+
+ if [[ "${fail_tx}" = "-"* ]]; then
+ allow_tx_lost=1
+ fail_tx=${fail_tx:1}
+ fi
+ if [[ "${fail_rx}" = "-"* ]]; then
+ allow_rx_lost=1
+ fail_rx=${fail_rx:1}
+ fi
+
+ print_check "ftx"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx")
+ if [ "$count" != "$fail_tx" ]; then
+ extra_msg+=",tx=$count"
+ fi
+ if [ -z "$count" ]; then
+ print_skip
+ elif { [ "$count" != "$fail_tx" ] && [ $allow_tx_lost -eq 0 ]; } ||
+ { [ "$count" -gt "$fail_tx" ] && [ $allow_tx_lost -eq 1 ]; }; then
+ fail_test "got $count MP_FAIL[s] TX expected $fail_tx"
+ else
+ print_ok
+ fi
+
+ print_check "failrx"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx")
+ if [ "$count" != "$fail_rx" ]; then
+ extra_msg+=",rx=$count"
+ fi
+ if [ -z "$count" ]; then
+ print_skip
+ elif { [ "$count" != "$fail_rx" ] && [ $allow_rx_lost -eq 0 ]; } ||
+ { [ "$count" -gt "$fail_rx" ] && [ $allow_rx_lost -eq 1 ]; }; then
+ fail_test "got $count MP_FAIL[s] RX expected $fail_rx"
+ else
+ print_ok
+ fi
+
+ print_info "$extra_msg"
+}
+
+chk_fclose_nr()
+{
+ local fclose_tx=$1
+ local fclose_rx=$2
+ local ns_invert=$3
+ local count
+ local ns_tx=$ns2
+ local ns_rx=$ns1
+ local extra_msg=""
+
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns1
+ ns_rx=$ns2
+ extra_msg="invert"
+ fi
+
+ print_check "ctx"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$fclose_tx" ]; then
+ extra_msg+=",tx=$count"
+ fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx"
+ else
+ print_ok
+ fi
+
+ print_check "fclzrx"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$fclose_rx" ]; then
+ extra_msg+=",rx=$count"
+ fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx"
+ else
+ print_ok
+ fi
+
+ print_info "$extra_msg"
+}
+
+chk_rst_nr()
+{
+ local rst_tx=$1
+ local rst_rx=$2
+ local ns_invert=${3:-""}
+ local count
+ local ns_tx=$ns1
+ local ns_rx=$ns2
+ local extra_msg=""
+
+ if [[ $ns_invert = "invert" ]]; then
+ ns_tx=$ns2
+ ns_rx=$ns1
+ extra_msg="invert"
+ fi
+
+ print_check "rtx"
+ count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx")
+ if [ -z "$count" ]; then
+ print_skip
+ # accept more rst than expected except if we don't expect any
+ elif { [ $rst_tx -ne 0 ] && [ $count -lt $rst_tx ]; } ||
+ { [ $rst_tx -eq 0 ] && [ $count -ne 0 ]; }; then
+ fail_test "got $count MP_RST[s] TX expected $rst_tx"
+ else
+ print_ok
+ fi
+
+ print_check "rstrx"
+ count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx")
+ if [ -z "$count" ]; then
+ print_skip
+ # accept more rst than expected except if we don't expect any
+ elif { [ $rst_rx -ne 0 ] && [ $count -lt $rst_rx ]; } ||
+ { [ $rst_rx -eq 0 ] && [ $count -ne 0 ]; }; then
+ fail_test "got $count MP_RST[s] RX expected $rst_rx"
+ else
+ print_ok
+ fi
+
+ print_info "$extra_msg"
+}
+
+chk_infi_nr()
+{
+ local infi_tx=$1
+ local infi_rx=$2
+ local count
+
+ print_check "itx"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$infi_tx" ]; then
+ fail_test "got $count infinite map[s] TX expected $infi_tx"
+ else
+ print_ok
+ fi
+
+ print_check "infirx"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$infi_rx" ]; then
+ fail_test "got $count infinite map[s] RX expected $infi_rx"
+ else
+ print_ok
fi
}
chk_join_nr()
{
- local msg="$1"
- local syn_nr=$2
- local syn_ack_nr=$3
- local ack_nr=$4
+ local syn_nr=$1
+ local syn_ack_nr=$2
+ local ack_nr=$3
+ local csum_ns1=${4:-0}
+ local csum_ns2=${5:-0}
+ local fail_nr=${6:-0}
+ local rst_nr=${7:-0}
+ local infi_nr=${8:-0}
+ local corrupted_pkts=${9:-0}
local count
+ local with_cookie
+
+ if [ "${corrupted_pkts}" -gt 0 ]; then
+ print_info "${corrupted_pkts} corrupted pkts"
+ fi
+
+ print_check "syn"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$syn_nr" ]; then
+ fail_test "got $count JOIN[s] syn expected $syn_nr"
+ else
+ print_ok
+ fi
+
+ print_check "synack"
+ with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies)
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$syn_ack_nr" ]; then
+ # simult connections exceeding the limit with cookie enabled could go up to
+ # synack validation as the conn limit can be enforced reliably only after
+ # the subflow creation
+ if [ "$with_cookie" = 2 ] && [ "$count" -gt "$syn_ack_nr" ] && [ "$count" -le "$syn_nr" ]; then
+ print_ok
+ else
+ fail_test "got $count JOIN[s] synack expected $syn_ack_nr"
+ fi
+ else
+ print_ok
+ fi
+
+ print_check "ack"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$ack_nr" ]; then
+ fail_test "got $count JOIN[s] ack expected $ack_nr"
+ else
+ print_ok
+ fi
+ if $validate_checksum; then
+ chk_csum_nr $csum_ns1 $csum_ns2
+ chk_fail_nr $fail_nr $fail_nr
+ chk_rst_nr $rst_nr $rst_nr
+ chk_infi_nr $infi_nr $infi_nr
+ fi
+}
+
+# a negative value for 'stale_max' means no upper bound:
+# for bidirectional transfer, if one peer sleep for a while
+# - as these tests do - we can have a quite high number of
+# stale/recover conversions, proportional to
+# sleep duration/ MPTCP-level RTX interval.
+chk_stale_nr()
+{
+ local ns=$1
+ local stale_min=$2
+ local stale_max=$3
+ local stale_delta=$4
local dump_stats
+ local stale_nr
+ local recover_nr
- printf "%-36s %s" "$msg" "syn"
- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'`
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_nr" ]; then
- echo "[fail] got $count JOIN[s] syn expected $syn_nr"
- ret=1
+ print_check "stale"
+
+ stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale")
+ recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover")
+ if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then
+ print_skip
+ elif [ $stale_nr -lt $stale_min ] ||
+ { [ $stale_max -gt 0 ] && [ $stale_nr -gt $stale_max ]; } ||
+ [ $((stale_nr - recover_nr)) -ne $stale_delta ]; then
+ fail_test "got $stale_nr stale[s] $recover_nr recover[s], " \
+ " expected stale in range [$stale_min..$stale_max]," \
+ " stale-recover delta $stale_delta"
dump_stats=1
else
- echo -n "[ ok ]"
+ print_ok
fi
- echo -n " - synack"
- count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'`
- [ -z "$count" ] && count=0
- if [ "$count" != "$syn_ack_nr" ]; then
- echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr"
- ret=1
- dump_stats=1
+ if [ "${dump_stats}" = 1 ]; then
+ echo $ns stats
+ ip netns exec $ns ip -s link show
+ ip netns exec $ns nstat -as | grep MPTcp
+ fi
+}
+
+chk_add_nr()
+{
+ local add_nr=$1
+ local echo_nr=$2
+ local port_nr=${3:-0}
+ local syn_nr=${4:-$port_nr}
+ local syn_ack_nr=${5:-$port_nr}
+ local ack_nr=${6:-$port_nr}
+ local mis_syn_nr=${7:-0}
+ local mis_ack_nr=${8:-0}
+ local count
+ local timeout
+
+ timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+ print_check "add"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr")
+ if [ -z "$count" ]; then
+ print_skip
+ # if the test configured a short timeout tolerate greater then expected
+ # add addrs options, due to retransmissions
+ elif [ "$count" != "$add_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_nr" ]; }; then
+ fail_test "got $count ADD_ADDR[s] expected $add_nr"
+ else
+ print_ok
+ fi
+
+ print_check "echo"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$echo_nr" ]; then
+ fail_test "got $count ADD_ADDR echo[s] expected $echo_nr"
+ else
+ print_ok
+ fi
+
+ if [ $port_nr -gt 0 ]; then
+ print_check "pt"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$port_nr" ]; then
+ fail_test "got $count ADD_ADDR[s] with a port-number expected $port_nr"
+ else
+ print_ok
+ fi
+
+ print_check "syn"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$syn_nr" ]; then
+ fail_test "got $count JOIN[s] syn with a different \
+ port-number expected $syn_nr"
+ else
+ print_ok
+ fi
+
+ print_check "synack"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$syn_ack_nr" ]; then
+ fail_test "got $count JOIN[s] synack with a different \
+ port-number expected $syn_ack_nr"
+ else
+ print_ok
+ fi
+
+ print_check "ack"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$ack_nr" ]; then
+ fail_test "got $count JOIN[s] ack with a different \
+ port-number expected $ack_nr"
+ else
+ print_ok
+ fi
+
+ print_check "syn"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mis_syn_nr" ]; then
+ fail_test "got $count JOIN[s] syn with a mismatched \
+ port-number expected $mis_syn_nr"
+ else
+ print_ok
+ fi
+
+ print_check "ack"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mis_ack_nr" ]; then
+ fail_test "got $count JOIN[s] ack with a mismatched \
+ port-number expected $mis_ack_nr"
+ else
+ print_ok
+ fi
+ fi
+}
+
+chk_add_tx_nr()
+{
+ local add_tx_nr=$1
+ local echo_tx_nr=$2
+ local timeout
+ local count
+
+ timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout)
+
+ print_check "add TX"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx")
+ if [ -z "$count" ]; then
+ print_skip
+ # if the test configured a short timeout tolerate greater then expected
+ # add addrs options, due to retransmissions
+ elif [ "$count" != "$add_tx_nr" ] && { [ "$timeout" -gt 1 ] || [ "$count" -lt "$add_tx_nr" ]; }; then
+ fail_test "got $count ADD_ADDR[s] TX, expected $add_tx_nr"
+ else
+ print_ok
+ fi
+
+ print_check "echo TX"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$echo_tx_nr" ]; then
+ fail_test "got $count ADD_ADDR echo[s] TX, expected $echo_tx_nr"
+ else
+ print_ok
+ fi
+}
+
+chk_rm_nr()
+{
+ local rm_addr_nr=$1
+ local rm_subflow_nr=$2
+ local invert
+ local simult
+ local count
+ local addr_ns=$ns1
+ local subflow_ns=$ns2
+ local extra_msg=""
+
+ shift 2
+ while [ -n "$1" ]; do
+ [ "$1" = "invert" ] && invert=true
+ [ "$1" = "simult" ] && simult=true
+ shift
+ done
+
+ if [ -z $invert ]; then
+ addr_ns=$ns1
+ subflow_ns=$ns2
+ elif [ $invert = "true" ]; then
+ addr_ns=$ns2
+ subflow_ns=$ns1
+ extra_msg="invert"
+ fi
+
+ print_check "rm"
+ count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$rm_addr_nr" ]; then
+ fail_test "got $count RM_ADDR[s] expected $rm_addr_nr"
+ else
+ print_ok
+ fi
+
+ print_check "rmsf"
+ count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ -n "$simult" ]; then
+ local cnt suffix
+
+ cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow")
+
+ # in case of simult flush, the subflow removal count on each side is
+ # unreliable
+ count=$((count + cnt))
+ if [ "$count" != "$rm_subflow_nr" ]; then
+ suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+ extra_msg+=" simult"
+ fi
+ if [ $count -ge "$rm_subflow_nr" ] && \
+ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then
+ print_ok "$suffix"
+ else
+ fail_test "got $count RM_SUBFLOW[s] expected in range [$rm_subflow_nr:$((rm_subflow_nr*2))]"
+ fi
+ elif [ "$count" != "$rm_subflow_nr" ]; then
+ fail_test "got $count RM_SUBFLOW[s] expected $rm_subflow_nr"
else
- echo -n "[ ok ]"
+ print_ok
fi
- echo -n " - ack"
- count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'`
- [ -z "$count" ] && count=0
- if [ "$count" != "$ack_nr" ]; then
- echo "[fail] got $count JOIN[s] ack expected $ack_nr"
- ret=1
+ print_info "$extra_msg"
+}
+
+chk_rm_tx_nr()
+{
+ local rm_addr_tx_nr=$1
+
+ print_check "rm TX"
+ count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$rm_addr_tx_nr" ]; then
+ fail_test "got $count RM_ADDR[s] expected $rm_addr_tx_nr"
+ else
+ print_ok
+ fi
+}
+
+chk_prio_nr()
+{
+ local mp_prio_nr_tx=$1
+ local mp_prio_nr_rx=$2
+ local count
+
+ print_check "ptx"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mp_prio_nr_tx" ]; then
+ fail_test "got $count MP_PRIO[s] TX expected $mp_prio_nr_tx"
+ else
+ print_ok
+ fi
+
+ print_check "prx"
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx")
+ if [ -z "$count" ]; then
+ print_skip
+ elif [ "$count" != "$mp_prio_nr_rx" ]; then
+ fail_test "got $count MP_PRIO[s] RX expected $mp_prio_nr_rx"
+ else
+ print_ok
+ fi
+}
+
+chk_subflow_nr()
+{
+ local msg="$1"
+ local subflow_nr=$2
+ local cnt1
+ local cnt2
+ local dump_stats
+
+ print_check "${msg}"
+
+ cnt1=$(ss -N $ns1 -tOni | grep -c token)
+ cnt2=$(ss -N $ns2 -tOni | grep -c token)
+ if [ "$cnt1" != "$subflow_nr" ] || [ "$cnt2" != "$subflow_nr" ]; then
+ fail_test "got $cnt1:$cnt2 subflows expected $subflow_nr"
dump_stats=1
else
- echo "[ ok ]"
+ print_ok
fi
+
if [ "${dump_stats}" = 1 ]; then
- echo Server ns stats
- ip netns exec $ns1 nstat -as | grep MPTcp
- echo Client ns stats
- ip netns exec $ns2 nstat -as | grep MPTcp
- fi
-}
-
-sin=$(mktemp)
-sout=$(mktemp)
-cin=$(mktemp)
-cout=$(mktemp)
-init
-make_file "$cin" "client"
-make_file "$sin" "server"
-trap cleanup EXIT
-
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "no JOIN" "0" "0" "0"
-
-# subflow limted by client
-reset
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow, limited by client" 0 0 0
-
-# subflow limted by server
-reset
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow, limited by server" 1 1 0
-
-# subflow
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "single subflow" 1 1 1
-
-# multiple subflows
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows" 2 2 2
-
-# multiple subflows limited by serverf
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows, limited by server" 2 2 1
-
-# add_address, unused
-reset
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "unused signal address" 0 0 0
-
-# accept and use add_addr
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 1
-ip netns exec $ns2 ./pm_nl_ctl limits 1 1
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "signal address" 1 1 1
-
-# accept and use add_addr with an additional subflow
-# note: signal address in server ns and local addresses in client ns must
-# belong to different subnets or one of the listed local address could be
-# used for 'add_addr' subflow
-reset
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns1 ./pm_nl_ctl limits 0 2
-ip netns exec $ns2 ./pm_nl_ctl limits 1 2
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "subflow and signal" 2 2 2
-
-# accept and use add_addr with additional subflows
-reset
-ip netns exec $ns1 ./pm_nl_ctl limits 0 3
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal
-ip netns exec $ns2 ./pm_nl_ctl limits 1 3
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow
-ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow
-run_tests $ns1 $ns2 10.0.1.1
-chk_join_nr "multiple subflows and signal" 3 3 3
+ ss -N $ns1 -tOni
+ ss -N $ns1 -tOni | grep token
+ ip -n $ns1 mptcp endpoint
+ fi
+}
+
+chk_mptcp_info()
+{
+ local info1=$1
+ local exp1=$2
+ local info2=$3
+ local exp2=$4
+ local cnt1
+ local cnt2
+ local dump_stats
+
+ print_check "mptcp_info ${info1:0:15}=$exp1:$exp2"
+
+ cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1")
+ cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2")
+ # 'ss' only display active connections and counters that are not 0.
+ [ -z "$cnt1" ] && cnt1=0
+ [ -z "$cnt2" ] && cnt2=0
+
+ if [ "$cnt1" != "$exp1" ] || [ "$cnt2" != "$exp2" ]; then
+ fail_test "got $cnt1:$cnt2 $info1:$info2 expected $exp1:$exp2"
+ dump_stats=1
+ else
+ print_ok
+ fi
+
+ if [ "$dump_stats" = 1 ]; then
+ ss -N $ns1 -inmHM
+ ss -N $ns2 -inmHM
+ fi
+}
+
+# $1: subflows in ns1 ; $2: subflows in ns2
+# number of all subflows, including the initial subflow.
+chk_subflows_total()
+{
+ local cnt1
+ local cnt2
+ local info="subflows_total"
+ local dump_stats
+
+ # if subflows_total counter is supported, use it:
+ if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then
+ chk_mptcp_info $info $1 $info $2
+ return
+ fi
+
+ print_check "$info $1:$2"
+
+ # if not, count the TCP connections that are in fact MPTCP subflows
+ cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv |
+ grep -c tcp-ulp-mptcp)
+ cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv |
+ grep -c tcp-ulp-mptcp)
+
+ if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then
+ fail_test "got subflows $cnt1:$cnt2 expected $1:$2"
+ dump_stats=1
+ else
+ print_ok
+ fi
+
+ if [ "$dump_stats" = 1 ]; then
+ ss -N $ns1 -ti
+ ss -N $ns2 -ti
+ fi
+}
+
+chk_link_usage()
+{
+ local ns=$1
+ local link=$2
+ local out=$3
+ local expected_rate=$4
+
+ local tx_link tx_total
+ tx_link=$(ip netns exec $ns cat /sys/class/net/$link/statistics/tx_bytes)
+ tx_total=$(stat --format=%s $out)
+ local tx_rate=$((tx_link * 100 / tx_total))
+ local tolerance=5
+
+ print_check "link usage"
+ if [ $tx_rate -lt $((expected_rate - tolerance)) ] || \
+ [ $tx_rate -gt $((expected_rate + tolerance)) ]; then
+ fail_test "got $tx_rate% usage, expected $expected_rate%"
+ else
+ print_ok
+ fi
+}
+
+wait_attempt_fail()
+{
+ local timeout_ms=$((timeout_poll * 1000))
+ local time=0
+ local ns=$1
+
+ while [ $time -lt $timeout_ms ]; do
+ local cnt
+
+ cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails")
+
+ [ "$cnt" = 1 ] && return 1
+ time=$((time + 100))
+ sleep 0.1
+ done
+ return 1
+}
+
+set_userspace_pm()
+{
+ local ns=$1
+
+ ip netns exec $ns sysctl -q net.mptcp.pm_type=1
+}
+
+subflows_tests()
+{
+ if reset "no JOIN"; then
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # subflow limited by client
+ if reset "single subflow, limited by client"; then
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_set_limits $ns2 0 0
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # subflow limited by server
+ if reset "single subflow, limited by server"; then
+ pm_nl_set_limits $ns1 0 0
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 0
+ fi
+
+ # subflow
+ if reset "single subflow"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # multiple subflows
+ if reset "multiple subflows"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ fi
+
+ # multiple subflows limited by server
+ if reset "multiple subflows, limited by server"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 1
+ fi
+
+ # single subflow, dev
+ if reset "single subflow, dev"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow dev ns2eth3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+}
+
+subflows_error_tests()
+{
+ # If a single subflow is configured, and matches the MPC src
+ # address, no additional subflow should be created
+ if reset "no MPC reuse with single endpoint"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # multiple subflows, with subflow creation error
+ if reset_with_tcp_filter "multi subflows, with failing subflow" ns1 10.0.3.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # multiple subflows, with subflow timeout on MPJ
+ if reset_with_tcp_filter "multi subflows, with subflow timeout" ns1 10.0.3.2 DROP &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # multiple subflows, check that the endpoint corresponding to
+ # closed subflow (due to reset) is not reused if additional
+ # subflows are added later
+ if reset_with_tcp_filter "multi subflows, fair usage on close" ns1 10.0.3.2 REJECT &&
+ continue_if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 &
+
+ # mpj subflow will be in TW after the reset
+ wait_attempt_fail $ns2
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ wait
+
+ # additional subflow could be created only if the PM select
+ # the later endpoint, skipping the already used one
+ chk_join_nr 1 1 1
+ fi
+}
+
+signal_address_tests()
+{
+ # add_address, unused
+ if reset "unused signal address"; then
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_tx_nr 1 1
+ chk_add_nr 1 1
+ fi
+
+ # accept and use add_addr
+ if reset "signal address"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # accept and use add_addr with an additional subflow
+ # note: signal address in server ns and local addresses in client ns must
+ # belong to different subnets or one of the listed local address could be
+ # used for 'add_addr' subflow
+ if reset "subflow and signal"; then
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ fi
+
+ # accept and use add_addr with additional subflows
+ if reset "multiple subflows and signal"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ fi
+
+ # signal addresses
+ if reset "signal addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 3 3
+ fi
+
+ # signal invalid addresses
+ if reset "signal invalid addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 3 3
+ fi
+
+ # signal addresses race test
+ if reset "signal addresses race test"; then
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_set_limits $ns2 4 4
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags signal
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags signal
+
+ # the peer could possibly miss some addr notification, allow retransmission
+ ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=1
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+
+ # It is not directly linked to the commit introducing this
+ # symbol but for the parent one which is linked anyway.
+ if ! mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ chk_join_nr 3 3 2
+ chk_add_nr 4 4
+ else
+ chk_join_nr 3 3 3
+ # the server will not signal the address terminating
+ # the MPC subflow
+ chk_add_nr 3 3
+ fi
+ fi
+}
+
+link_failure_tests()
+{
+ # accept and use add_addr with additional subflows and link loss
+ if reset "multiple flows, signal, link failure"; then
+ # without any b/w limit each veth could spool the packets and get
+ # them acked at xmit time, so that the corresponding subflow will
+ # have almost always no outstanding pkts, the scheduler will pick
+ # always the first subflow and we will have hard time testing
+ # active backup and link switch-over.
+ # Let's set some arbitrary (low) virtual link limits.
+ init_shapers
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+ test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 5 1
+ fi
+
+ # accept and use add_addr with additional subflows and link loss
+ # for bidirectional transfer
+ if reset "multi flows, signal, bidi, link fail"; then
+ init_shapers
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 dev ns2eth4 flags subflow
+ test_linkfail=2 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 1
+ fi
+
+ # 2 subflows plus 1 backup subflow with a lossy link, backup
+ # will never be used
+ if reset "backup subflow unused, link failure"; then
+ init_shapers
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+ FAILING_LINKS="1" test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ chk_link_usage $ns2 ns2eth3 $cinsent 0
+ fi
+
+ # 2 lossy links after half transfer, backup will get half of
+ # the traffic
+ if reset "backup flow used, multi links fail"; then
+ init_shapers
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+ FAILING_LINKS="1 2" test_linkfail=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 2 4 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+ fi
+
+ # use a backup subflow with the first subflow on a lossy link
+ # for bidirectional transfer
+ if reset "backup flow used, bidi, link failure"; then
+ init_shapers
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 dev ns1eth2 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 dev ns2eth3 flags subflow,backup
+ FAILING_LINKS="1 2" test_linkfail=2 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ chk_stale_nr $ns2 1 -1 2
+ chk_link_usage $ns2 ns2eth3 $cinsent 50
+ fi
+}
+
+add_addr_timeout_tests()
+{
+ # add_addr timeout
+ if reset_with_add_addr_timeout "signal address, ADD_ADDR timeout"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_tx_nr 4 4
+ chk_add_nr 4 0
+ fi
+
+ # add_addr timeout IPv6
+ if reset_with_add_addr_timeout "signal address, ADD_ADDR6 timeout" 6; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 4 0
+ fi
+
+ # signal addresses timeout
+ if reset_with_add_addr_timeout "signal addresses, ADD_ADDR timeout"; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_set_limits $ns2 2 2
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 8 0
+ fi
+
+ # signal invalid addresses timeout
+ if reset_with_add_addr_timeout "invalid address, ADD_ADDR timeout"; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_set_limits $ns2 2 2
+ speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 8 0
+ fi
+}
+
+remove_tests()
+{
+ # single subflow, remove
+ if reset "remove single subflow"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_rm_tx_nr 1
+ chk_rm_nr 1 1
+ chk_rst_nr 0 0
+ fi
+
+ # multiple subflows, remove
+ if reset "remove multiple subflows"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns2=-2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_rm_nr 2 2
+ chk_rst_nr 0 0
+ fi
+
+ # single address, remove
+ if reset "remove single address"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
+ chk_rst_nr 0 0
+ fi
+
+ # subflow and signal, remove
+ if reset "remove subflow and signal"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ chk_rm_nr 1 1
+ chk_rst_nr 0 0
+ fi
+
+ # subflows and signal, remove
+ if reset "remove subflows and signal"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ chk_rm_nr 2 2
+ chk_rst_nr 0 0
+ fi
+
+ # addresses remove
+ if reset "remove addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ addr_nr_ns1=-3 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert
+ chk_rst_nr 0 0
+ fi
+
+ # invalid addresses remove
+ if reset "remove invalid addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ addr_nr_ns1=-3 speed=10 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+ chk_rst_nr 0 0
+ fi
+
+ # subflows and signal, flush
+ if reset "flush subflows and signal"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ chk_rm_nr 1 3 invert simult
+ chk_rst_nr 0 0
+ fi
+
+ # subflows flush
+ if reset "flush subflows"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_set_limits $ns2 3 3
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+
+ if mptcp_lib_kversion_ge 5.18; then
+ chk_rm_tx_nr 0
+ chk_rm_nr 0 3 simult
+ else
+ chk_rm_nr 3 3
+ fi
+ chk_rst_nr 0 0
+ fi
+
+ # addresses flush
+ if reset "flush addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.4.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 3 3
+ chk_rm_nr 3 3 invert simult
+ chk_rst_nr 0 0
+ fi
+
+ # invalid addresses flush
+ if reset "flush invalid addresses"; then
+ pm_nl_set_limits $ns1 3 3
+ pm_nl_add_endpoint $ns1 10.0.12.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal
+ pm_nl_add_endpoint $ns1 10.0.14.1 flags signal
+ pm_nl_set_limits $ns2 3 3
+ addr_nr_ns1=-8 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 3 3
+ chk_rm_nr 3 1 invert
+ chk_rst_nr 0 0
+ fi
+
+ # remove id 0 subflow
+ if reset "remove id 0 subflow"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns2=-9 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_rm_nr 1 1
+ chk_rst_nr 0 0
+ fi
+
+ # remove id 0 address
+ if reset "remove id 0 address"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
+ addr_nr_ns1=-9 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
+ chk_rst_nr 0 0 invert
+ fi
+}
+
+add_tests()
+{
+ # add single subflow
+ if reset "add single subflow"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ addr_nr_ns2=1 speed=slow cestab_ns2=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_cestab_nr $ns2 0
+ fi
+
+ # add signal address
+ if reset "add signal address"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ addr_nr_ns1=1 speed=slow cestab_ns1=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_cestab_nr $ns1 0
+ fi
+
+ # add multiple subflows
+ if reset "add multiple subflows"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ addr_nr_ns2=2 speed=slow cestab_ns2=1 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_cestab_nr $ns2 0
+ fi
+
+ # add multiple subflows IPv6
+ if reset "add multiple subflows IPv6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ addr_nr_ns2=2 speed=slow cestab_ns2=1 \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 2 2 2
+ chk_cestab_nr $ns2 0
+ fi
+
+ # add multiple addresses IPv6
+ if reset "add multiple addresses IPv6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 2 2
+ addr_nr_ns1=2 speed=slow cestab_ns1=1 \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_cestab_nr $ns1 0
+ fi
+}
+
+ipv6_tests()
+{
+ # subflow IPv6
+ if reset "single subflow IPv6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ fi
+
+ # add_address, unused IPv6
+ if reset "unused signal address IPv6"; then
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 0 0 0
+ chk_add_nr 1 1
+ fi
+
+ # signal address IPv6
+ if reset "single address IPv6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 1
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # single address IPv6, remove
+ if reset "remove single address IPv6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 1
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_rm_nr 1 1 invert
+ fi
+
+ # subflow and signal IPv6, remove
+ if reset "remove subflow and signal IPv6"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 dead:beef:3::2 dev ns2eth3 flags subflow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ chk_rm_nr 1 1
+ fi
+}
+
+v4mapped_tests()
+{
+ # subflow IPv4-mapped to IPv4-mapped
+ if reset "single subflow IPv4-mapped"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+ run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+ chk_join_nr 1 1 1
+ fi
+
+ # signal address IPv4-mapped with IPv4-mapped sk
+ if reset "signal address IPv4-mapped"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+ run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # subflow v4-map-v6
+ if reset "single subflow v4-map-v6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+ chk_join_nr 1 1 1
+ fi
+
+ # signal address v4-map-v6
+ if reset "signal address v4-map-v6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 "::ffff:10.0.1.1"
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # subflow v6-map-v4
+ if reset "single subflow v6-map-v4"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 "::ffff:10.0.3.2" flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # signal address v6-map-v4
+ if reset "signal address v6-map-v4"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 "::ffff:10.0.2.1" flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # no subflow IPv6 to v4 address
+ if reset "no JOIN with diff families v4-v6"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # no subflow IPv6 to v4 address even if v6 has a valid v4 at the end
+ if reset "no JOIN with diff families v4-v6-2"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 dead:beef:2::10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # no subflow IPv4 to v6 address, no need to slow down too then
+ if reset "no JOIN with diff families v6-v4"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 0 0 0
+ fi
+}
+
+mixed_tests()
+{
+ if reset "IPv4 sockets do not use IPv6 addresses" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # Need an IPv6 mptcp socket to allow subflows of both families
+ if reset "simult IPv4 and IPv6 subflows" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:2::1
+ chk_join_nr 1 1 1
+ fi
+
+ # cross families subflows will not be created even in fullmesh mode
+ if reset "simult IPv4 and IPv6 subflows, fullmesh 1x1" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 1 4
+ pm_nl_add_endpoint $ns2 dead:beef:2::2 flags subflow,fullmesh
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 dead:beef:2::1
+ chk_join_nr 1 1 1
+ fi
+
+ # fullmesh still tries to create all the possibly subflows with
+ # matching family
+ if reset "simult IPv4 and IPv6 subflows, fullmesh 2x2" &&
+ continue_if mptcp_lib_kversion_ge 6.3; then
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 2 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns1 dead:beef:2::1 flags signal
+ fullmesh=1 speed=slow \
+ run_tests $ns1 $ns2 dead:beef:1::1
+ chk_join_nr 4 4 4
+ fi
+}
+
+backup_tests()
+{
+ # single subflow, backup
+ if reset "single subflow, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup
+ sflags=nobackup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_prio_nr 0 1
+ fi
+
+ # single address, backup
+ if reset "single address, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 1
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 1
+ fi
+
+ # single address with port, backup
+ if reset "single address with port, backup" &&
+ continue_if mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 1
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_prio_nr 1 1
+ fi
+
+ if reset "mpc backup" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_prio_nr 0 1
+ fi
+
+ if reset "mpc backup both sides" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_prio_nr 1 1
+ fi
+
+ if reset "mpc switch to backup" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_prio_nr 0 1
+ fi
+
+ if reset "mpc switch to backup both sides" &&
+ continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then
+ pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_prio_nr 1 1
+ fi
+}
+
+verify_listener_events()
+{
+ local e_type=$2
+ local e_saddr=$4
+ local e_sport=$5
+ local name
+
+ if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then
+ name="LISTENER_CREATED"
+ elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then
+ name="LISTENER_CLOSED "
+ else
+ name="$e_type"
+ fi
+
+ print_check "$name $e_saddr:$e_sport"
+
+ if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ print_skip "event not supported"
+ return
+ fi
+
+ if mptcp_lib_verify_listener_events "${@}"; then
+ print_ok
+ return 0
+ fi
+ fail_test
+}
+
+add_addr_ports_tests()
+{
+ # signal address with port
+ if reset "signal address with port"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1 1
+ fi
+
+ # subflow and signal with port
+ if reset "subflow and signal with port"; then
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1 1
+ fi
+
+ # single address with port, remove
+ # pm listener events
+ if reset_with_events "remove single address with port"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 1
+ addr_nr_ns1=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1 1
+ chk_rm_nr 1 1 invert
+
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
+ verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \
+ $MPTCP_LIB_AF_INET 10.0.2.1 10100
+ kill_events_pids
+ fi
+
+ # subflow and signal with port, remove
+ if reset "remove subflow and signal with port"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1 1
+ chk_rm_nr 1 1
+ fi
+
+ # subflows and signal with port, flush
+ if reset "flush subflows and signal with port"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ addr_nr_ns1=-8 addr_nr_ns2=-2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ chk_rm_nr 1 3 invert simult
+ fi
+
+ # multiple addresses with port
+ if reset "multiple addresses with port"; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10100
+ pm_nl_set_limits $ns2 2 2
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2 2
+ fi
+
+ # multiple addresses with ports
+ if reset "multiple addresses with ports"; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal port 10100
+ pm_nl_add_endpoint $ns1 10.0.3.1 flags signal port 10101
+ pm_nl_set_limits $ns2 2 2
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2 2
+ fi
+}
+
+syncookies_tests()
+{
+ # single subflow, syncookies
+ if reset_with_cookies "single subflow with syn cookies"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # multiple subflows with syn cookies
+ if reset_with_cookies "multiple subflows with syn cookies"; then
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ fi
+
+ # multiple subflows limited by server
+ if reset_with_cookies "subflows limited by server w cookies"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 1 1
+ fi
+
+ # test signal address with cookies
+ if reset_with_cookies "signal address with syn cookies"; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # test cookie with subflow and signal
+ if reset_with_cookies "subflow and signal w cookies"; then
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 1 2
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_add_nr 1 1
+ fi
+
+ # accept and use add_addr with additional subflows
+ if reset_with_cookies "subflows and signal w. cookies"; then
+ pm_nl_set_limits $ns1 0 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ fi
+}
+
+checksum_tests()
+{
+ # checksum test 0 0
+ if reset_with_checksum 0 0; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # checksum test 1 1
+ if reset_with_checksum 1 1; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # checksum test 0 1
+ if reset_with_checksum 0 1; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # checksum test 1 0
+ if reset_with_checksum 1 0; then
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+}
+
+deny_join_id0_tests()
+{
+ # subflow allow join id0 ns1
+ if reset_with_allow_join_id0 "single subflow allow join id0 ns1" 1 0; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+
+ # subflow allow join id0 ns2
+ if reset_with_allow_join_id0 "single subflow allow join id0 ns2" 0 1; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # signal address allow join id0 ns1
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ if reset_with_allow_join_id0 "signal address allow join id0 ns1" 1 0; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # signal address allow join id0 ns2
+ # ADD_ADDRs are not affected by allow_join_id0 value.
+ if reset_with_allow_join_id0 "signal address allow join id0 ns2" 0 1; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ fi
+
+ # subflow and address allow join id0 ns1
+ if reset_with_allow_join_id0 "subflow and address allow join id0 1" 1 0; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ fi
+
+ # subflow and address allow join id0 ns2
+ if reset_with_allow_join_id0 "subflow and address allow join id0 2" 0 1; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1
+ fi
+}
+
+fullmesh_tests()
+{
+ # fullmesh 1
+ # 2 fullmesh addrs in ns2, added before the connection,
+ # 1 non-fullmesh addr in ns1, added during the connection.
+ if reset "fullmesh test 2x1"; then
+ pm_nl_set_limits $ns1 0 4
+ pm_nl_set_limits $ns2 1 4
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,fullmesh
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,fullmesh
+ addr_nr_ns1=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 4 4 4
+ chk_add_nr 1 1
+ fi
+
+ # fullmesh 2
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 1 fullmesh addr in ns2, added during the connection.
+ if reset "fullmesh test 1x1"; then
+ pm_nl_set_limits $ns1 1 3
+ pm_nl_set_limits $ns2 1 3
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ fullmesh=1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 3 3 3
+ chk_add_nr 1 1
+ fi
+
+ # fullmesh 3
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection.
+ if reset "fullmesh test 1x2"; then
+ pm_nl_set_limits $ns1 2 5
+ pm_nl_set_limits $ns2 1 5
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ fullmesh=2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 5 5 5
+ chk_add_nr 1 1
+ fi
+
+ # fullmesh 4
+ # 1 non-fullmesh addr in ns1, added before the connection,
+ # 2 fullmesh addrs in ns2, added during the connection,
+ # limit max_subflows to 4.
+ if reset "fullmesh test 1x2, limited"; then
+ pm_nl_set_limits $ns1 2 4
+ pm_nl_set_limits $ns2 1 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ fullmesh=2 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 4 4 4
+ chk_add_nr 1 1
+ fi
+
+ # set fullmesh flag
+ if reset "set fullmesh flag test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+ pm_nl_set_limits $ns2 4 4
+ addr_nr_ns2=1 sflags=fullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_rm_nr 0 1
+ fi
+
+ # set nofullmesh flag
+ if reset "set nofullmesh flag test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow,fullmesh
+ pm_nl_set_limits $ns2 4 4
+ fullmesh=1 sflags=nofullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_rm_nr 0 1
+ fi
+
+ # set backup,fullmesh flags
+ if reset "set backup,fullmesh flags test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags subflow
+ pm_nl_set_limits $ns2 4 4
+ addr_nr_ns2=1 sflags=backup,fullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_prio_nr 0 1
+ chk_rm_nr 0 1
+ fi
+
+ # set nobackup,nofullmesh flags
+ if reset "set nobackup,nofullmesh flags test" &&
+ continue_if mptcp_lib_kversion_ge 5.18; then
+ pm_nl_set_limits $ns1 4 4
+ pm_nl_set_limits $ns2 4 4
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup,fullmesh
+ sflags=nobackup,nofullmesh speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 2 2 2
+ chk_prio_nr 0 1
+ chk_rm_nr 0 1
+ fi
+}
+
+fastclose_tests()
+{
+ if reset_check_counter "fastclose test" "MPTcpExtMPFastcloseTx"; then
+ test_linkfail=1024 fastclose=client \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_fclose_nr 1 1
+ chk_rst_nr 1 1 invert
+ fi
+
+ if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then
+ test_linkfail=1024 fastclose=server \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0 0 0 0 1
+ chk_fclose_nr 1 1 invert
+ chk_rst_nr 1 1
+ fi
+}
+
+pedit_action_pkts()
+{
+ tc -n $ns2 -j -s action show action pedit index 100 | \
+ mptcp_lib_get_info_value \"packets\" packets
+}
+
+fail_tests()
+{
+ # single subflow
+ if reset_with_fail "Infinite map" 1; then
+ test_linkfail=128 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0 +1 +0 1 0 1 "$(pedit_action_pkts)"
+ chk_fail_nr 1 -1 invert
+ fi
+
+ # multiple subflows
+ if reset_with_fail "MP_FAIL MP_RST" 2; then
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ test_linkfail=1024 \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 1 1 0 1 1 0 "$(pedit_action_pkts)"
+ fi
+}
+
+# $1: ns ; $2: addr ; $3: id
+userspace_pm_add_addr()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3
+ sleep 1
+}
+
+# $1: ns ; $2: id
+userspace_pm_rm_addr()
+{
+ local evts=$evts_ns1
+ local tk
+ local cnt
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ cnt=$(rm_addr_count ${1})
+ ip netns exec $1 ./pm_nl_ctl rem token $tk id $2
+ wait_rm_addr $1 "${cnt}"
+}
+
+# $1: ns ; $2: addr ; $3: id
+userspace_pm_add_sf()
+{
+ local evts=$evts_ns1
+ local tk da dp
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+ da=$(mptcp_lib_evts_get_info daddr4 "$evts")
+ dp=$(mptcp_lib_evts_get_info dport "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \
+ rip $da rport $dp token $tk
+ sleep 1
+}
+
+# $1: ns ; $2: addr $3: event type
+userspace_pm_rm_sf()
+{
+ local evts=$evts_ns1
+ local t=${3:-1}
+ local ip
+ local tk da dp sp
+ local cnt
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4
+ [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+ da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2)
+ dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2)
+ sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2)
+
+ cnt=$(rm_sf_count ${1})
+ ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \
+ rip $da rport $dp token $tk
+ wait_rm_sf $1 "${cnt}"
+}
+
+check_output()
+{
+ local cmd="$1"
+ local expected="$2"
+ local msg="$3"
+ local rc=0
+
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ fail_test "fail to check output # error ${rc}"
+ elif [ ${rc} -eq 0 ]; then
+ print_ok
+ elif [ ${rc} -eq 1 ]; then
+ fail_test "fail to check output # different output"
+ fi
+}
+
+# $1: ns
+userspace_pm_dump()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl dump token $tk
+}
+
+# $1: ns ; $2: id
+userspace_pm_get_addr()
+{
+ local evts=$evts_ns1
+ local tk
+
+ [ "$1" == "$ns2" ] && evts=$evts_ns2
+ tk=$(mptcp_lib_evts_get_info token "$evts")
+
+ ip netns exec $1 ./pm_nl_ctl get $2 token $tk
+}
+
+userspace_pm_chk_dump_addr()
+{
+ local ns="${1}"
+ local exp="${2}"
+ local check="${3}"
+
+ print_check "dump addrs ${check}"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then
+ check_output "userspace_pm_dump ${ns}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
+userspace_pm_chk_get_addr()
+{
+ local ns="${1}"
+ local id="${2}"
+ local exp="${3}"
+
+ print_check "get id ${id} addr"
+
+ if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then
+ check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}"
+ else
+ print_skip
+ fi
+}
+
+userspace_tests()
+{
+ # userspace pm type prevents add_addr
+ if reset "userspace pm type prevents add_addr" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 0 0
+ fi
+
+ # userspace pm type does not echo add_addr without daemon
+ if reset "userspace pm no echo w/o daemon" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 2
+ pm_nl_set_limits $ns2 0 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_add_nr 1 0
+ fi
+
+ # userspace pm type rejects join
+ if reset "userspace pm type rejects join" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 0
+ fi
+
+ # userspace pm type does not send join
+ if reset "userspace pm type does not send join" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ fi
+
+ # userspace pm type prevents mp_prio
+ if reset "userspace pm type prevents mp_prio" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ sflags=backup speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 1 1 0
+ chk_prio_nr 0 0
+ fi
+
+ # userspace pm type prevents rm_addr
+ if reset "userspace pm type prevents rm_addr" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ pm_nl_set_limits $ns2 0 1
+ pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
+ addr_nr_ns2=-1 speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1
+ chk_join_nr 0 0 0
+ chk_rm_nr 0 0
+ fi
+
+ # userspace pm add & remove address
+ if reset_with_events "userspace pm add & remove address" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns2 2 2
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns1
+ userspace_pm_add_addr $ns1 10.0.2.1 10
+ userspace_pm_add_addr $ns1 10.0.3.1 20
+ chk_join_nr 2 2 2
+ chk_add_nr 2 2
+ chk_mptcp_info subflows 2 subflows 2
+ chk_subflows_total 3 3
+ chk_mptcp_info add_addr_signal 2 add_addr_accepted 2
+ userspace_pm_chk_dump_addr "${ns1}" \
+ $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \
+ "signal"
+ userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1"
+ userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1"
+ userspace_pm_rm_addr $ns1 10
+ userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" \
+ "id 20 flags signal 10.0.3.1" "after rm_addr 10"
+ userspace_pm_rm_addr $ns1 20
+ userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20"
+ chk_rm_nr 2 2 invert
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
+ kill_events_pids
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
+ # userspace pm create destroy subflow
+ if reset_with_events "userspace pm create destroy subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ userspace_pm_add_sf $ns2 10.0.3.2 20
+ chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 20 flags subflow 10.0.3.2" \
+ "subflow"
+ userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2"
+ userspace_pm_rm_addr $ns2 20
+ userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "" \
+ "after rm_addr 20"
+ chk_rm_nr 1 1
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
+ kill_events_pids
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
+ # userspace pm create id 0 subflow
+ if reset_with_events "userspace pm create id 0 subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ chk_mptcp_info subflows 0 subflows 0
+ chk_subflows_total 1 1
+ userspace_pm_add_sf $ns2 10.0.3.2 0
+ userspace_pm_chk_dump_addr "${ns2}" \
+ "id 0 flags subflow 10.0.3.2" "id 0 subflow"
+ chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ kill_events_pids
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
+ # userspace pm remove initial subflow
+ if reset_with_events "userspace pm remove initial subflow" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns2
+ pm_nl_set_limits $ns1 0 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns2
+ userspace_pm_add_sf $ns2 10.0.3.2 20
+ chk_join_nr 1 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ userspace_pm_rm_sf $ns2 10.0.1.2
+ # we don't look at the counter linked to the RM_ADDR but
+ # to the one linked to the subflows that have been removed
+ chk_rm_nr 0 1
+ chk_rst_nr 0 0 invert
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 1 1
+ kill_events_pids
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
+ # userspace pm send RM_ADDR for ID 0
+ if reset_with_events "userspace pm send RM_ADDR for ID 0" &&
+ continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ set_userspace_pm $ns1
+ pm_nl_set_limits $ns2 1 1
+ speed=5 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+ wait_mpj $ns1
+ userspace_pm_add_addr $ns1 10.0.2.1 10
+ chk_join_nr 1 1 1
+ chk_add_nr 1 1
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 2 2
+ chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+ userspace_pm_rm_addr $ns1 0
+ # we don't look at the counter linked to the subflows that
+ # have been removed but to the one linked to the RM_ADDR
+ chk_rm_nr 1 0 invert
+ chk_rst_nr 0 0 invert
+ chk_mptcp_info subflows 1 subflows 1
+ chk_subflows_total 1 1
+ kill_events_pids
+ mptcp_lib_kill_wait $tests_pid
+ fi
+}
+
+endpoint_tests()
+{
+ # subflow_rebuild_header is needed to support the implicit flag
+ # userspace pm type prevents add_addr
+ if reset "implicit EP" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 2 2
+ pm_nl_set_limits $ns2 2 2
+ pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
+ speed=slow \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+
+ wait_mpj $ns1
+ pm_nl_check_endpoint "creation" \
+ $ns2 10.0.2.2 id 1 flags implicit
+ chk_mptcp_info subflows 1 subflows 1
+ chk_mptcp_info add_addr_signal 1 add_addr_accepted 1
+
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 33 2>/dev/null
+ pm_nl_check_endpoint "ID change is prevented" \
+ $ns2 10.0.2.2 id 1 flags implicit
+
+ pm_nl_add_endpoint $ns2 10.0.2.2 flags signal
+ pm_nl_check_endpoint "modif is allowed" \
+ $ns2 10.0.2.2 id 1 flags signal
+ mptcp_lib_kill_wait $tests_pid
+ fi
+
+ if reset "delete and re-add" &&
+ mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then
+ pm_nl_set_limits $ns1 1 1
+ pm_nl_set_limits $ns2 1 1
+ pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow
+ test_linkfail=4 speed=20 \
+ run_tests $ns1 $ns2 10.0.1.1 &
+ local tests_pid=$!
+
+ wait_mpj $ns2
+ chk_subflow_nr "before delete" 2
+ chk_mptcp_info subflows 1 subflows 1
+
+ pm_nl_del_endpoint $ns2 2 10.0.2.2
+ sleep 0.5
+ chk_subflow_nr "after delete" 1
+ chk_mptcp_info subflows 0 subflows 0
+
+ pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow
+ wait_mpj $ns2
+ chk_subflow_nr "after re-add" 2
+ chk_mptcp_info subflows 1 subflows 1
+ mptcp_lib_kill_wait $tests_pid
+ fi
+}
+
+# [$1: error message]
+usage()
+{
+ if [ -n "${1}" ]; then
+ echo "${1}"
+ ret=${KSFT_FAIL}
+ fi
+
+ echo "mptcp_join usage:"
+
+ local key
+ for key in "${!all_tests[@]}"; do
+ echo " -${key} ${all_tests[${key}]}"
+ done
+
+ echo " -c capture pcap files"
+ echo " -C enable data checksum"
+ echo " -i use ip mptcp"
+ echo " -h help"
+
+ echo "[test ids|names]"
+
+ exit ${ret}
+}
+
+
+# Use a "simple" array to force an specific order we cannot have with an associative one
+all_tests_sorted=(
+ f@subflows_tests
+ e@subflows_error_tests
+ s@signal_address_tests
+ l@link_failure_tests
+ t@add_addr_timeout_tests
+ r@remove_tests
+ a@add_tests
+ 6@ipv6_tests
+ 4@v4mapped_tests
+ M@mixed_tests
+ b@backup_tests
+ p@add_addr_ports_tests
+ k@syncookies_tests
+ S@checksum_tests
+ d@deny_join_id0_tests
+ m@fullmesh_tests
+ z@fastclose_tests
+ F@fail_tests
+ u@userspace_tests
+ I@endpoint_tests
+)
+
+all_tests_args=""
+all_tests_names=()
+for subtests in "${all_tests_sorted[@]}"; do
+ key="${subtests%@*}"
+ value="${subtests#*@}"
+
+ all_tests_args+="${key}"
+ all_tests_names+=("${value}")
+ all_tests[${key}]="${value}"
+done
+
+tests=()
+while getopts "${all_tests_args}cCih" opt; do
+ case $opt in
+ ["${all_tests_args}"])
+ tests+=("${all_tests[${opt}]}")
+ ;;
+ c)
+ capture=true
+ ;;
+ C)
+ checksum=true
+ ;;
+ i)
+ ip_mptcp=1
+ ;;
+ h)
+ usage
+ ;;
+ *)
+ usage "Unknown option: -${opt}"
+ ;;
+ esac
+done
+
+shift $((OPTIND - 1))
+
+for arg in "${@}"; do
+ if [[ "${arg}" =~ ^[0-9]+$ ]]; then
+ only_tests_ids+=("${arg}")
+ else
+ only_tests_names+=("${arg}")
+ fi
+done
+
+if [ ${#tests[@]} -eq 0 ]; then
+ tests=("${all_tests_names[@]}")
+fi
+
+for subtests in "${tests[@]}"; do
+ "${subtests}"
+done
+
+if [ ${ret} -ne 0 ]; then
+ echo
+ echo "${#failed_tests[@]} failure(s) has(ve) been detected:"
+ for i in $(get_failed_tests_ids); do
+ echo -e "\t- ${i}: ${failed_tests[${i}]}"
+ done
+ echo
+fi
+
+append_prev_results
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
new file mode 100644
index 000000000000..d529b4b37af8
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -0,0 +1,507 @@
+#! /bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly KSFT_PASS=0
+readonly KSFT_FAIL=1
+readonly KSFT_SKIP=4
+
+# shellcheck disable=SC2155 # declare and assign separately
+readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}"
+
+# These variables are used in some selftests, read-only
+declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED
+declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED
+declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED
+declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED
+declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED
+
+declare -rx MPTCP_LIB_AF_INET=2
+declare -rx MPTCP_LIB_AF_INET6=10
+
+MPTCP_LIB_SUBTESTS=()
+MPTCP_LIB_SUBTESTS_DUPLICATED=0
+MPTCP_LIB_TEST_COUNTER=0
+MPTCP_LIB_TEST_FORMAT="%02u %-50s"
+
+# only if supported (or forced) and not disabled, see no-color.org
+if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } &&
+ [ "${NO_COLOR:-}" != "1" ]; then
+ readonly MPTCP_LIB_COLOR_RED="\E[1;31m"
+ readonly MPTCP_LIB_COLOR_GREEN="\E[1;32m"
+ readonly MPTCP_LIB_COLOR_YELLOW="\E[1;33m"
+ readonly MPTCP_LIB_COLOR_BLUE="\E[1;34m"
+ readonly MPTCP_LIB_COLOR_RESET="\E[0m"
+else
+ readonly MPTCP_LIB_COLOR_RED=
+ readonly MPTCP_LIB_COLOR_GREEN=
+ readonly MPTCP_LIB_COLOR_YELLOW=
+ readonly MPTCP_LIB_COLOR_BLUE=
+ readonly MPTCP_LIB_COLOR_RESET=
+fi
+
+# $1: color, $2: text
+mptcp_lib_print_color() {
+ echo -e "${MPTCP_LIB_START_PRINT:-}${*}${MPTCP_LIB_COLOR_RESET}"
+}
+
+mptcp_lib_print_ok() {
+ mptcp_lib_print_color "${MPTCP_LIB_COLOR_GREEN}${*}"
+}
+
+mptcp_lib_print_warn() {
+ mptcp_lib_print_color "${MPTCP_LIB_COLOR_YELLOW}${*}"
+}
+
+mptcp_lib_print_info() {
+ mptcp_lib_print_color "${MPTCP_LIB_COLOR_BLUE}${*}"
+}
+
+mptcp_lib_print_err() {
+ mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}"
+}
+
+# shellcheck disable=SC2120 # parameters are optional
+mptcp_lib_pr_ok() {
+ mptcp_lib_print_ok "[ OK ]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_skip() {
+ mptcp_lib_print_warn "[SKIP]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_fail() {
+ mptcp_lib_print_err "[FAIL]${1:+ ${*}}"
+}
+
+mptcp_lib_pr_info() {
+ mptcp_lib_print_info "INFO: ${*}"
+}
+
+# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
+# features using the last version of the kernel and the selftests to make sure
+# a test is not being skipped by mistake.
+mptcp_lib_expect_all_features() {
+ [ "${SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES:-}" = "1" ]
+}
+
+# $1: msg
+mptcp_lib_fail_if_expected_feature() {
+ if mptcp_lib_expect_all_features; then
+ echo "ERROR: missing feature: ${*}"
+ exit ${KSFT_FAIL}
+ fi
+
+ return 1
+}
+
+# $1: file
+mptcp_lib_has_file() {
+ local f="${1}"
+
+ if [ -f "${f}" ]; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${f} file not found"
+}
+
+mptcp_lib_check_mptcp() {
+ if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then
+ mptcp_lib_pr_skip "MPTCP support is not available"
+ exit ${KSFT_SKIP}
+ fi
+}
+
+mptcp_lib_check_kallsyms() {
+ if ! mptcp_lib_has_file "/proc/kallsyms"; then
+ mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing"
+ exit ${KSFT_SKIP}
+ fi
+}
+
+# Internal: use mptcp_lib_kallsyms_has() instead
+__mptcp_lib_kallsyms_has() {
+ local sym="${1}"
+
+ mptcp_lib_check_kallsyms
+
+ grep -q " ${sym}" /proc/kallsyms
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_has() {
+ local sym="${1}"
+
+ if __mptcp_lib_kallsyms_has "${sym}"; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${sym} symbol not found"
+}
+
+# $1: part of a symbol to look at, add '$' at the end for full name
+mptcp_lib_kallsyms_doesnt_have() {
+ local sym="${1}"
+
+ if ! __mptcp_lib_kallsyms_has "${sym}"; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "${sym} symbol has been found"
+}
+
+# !!!AVOID USING THIS!!!
+# Features might not land in the expected version and features can be backported
+#
+# $1: kernel version, e.g. 6.3
+mptcp_lib_kversion_ge() {
+ local exp_maj="${1%.*}"
+ local exp_min="${1#*.}"
+ local v maj min
+
+ # If the kernel has backported features, set this env var to 1:
+ if [ "${SELFTESTS_MPTCP_LIB_NO_KVERSION_CHECK:-}" = "1" ]; then
+ return 0
+ fi
+
+ v=$(uname -r | cut -d'.' -f1,2)
+ maj=${v%.*}
+ min=${v#*.}
+
+ if [ "${maj}" -gt "${exp_maj}" ] ||
+ { [ "${maj}" -eq "${exp_maj}" ] && [ "${min}" -ge "${exp_min}" ]; }; then
+ return 0
+ fi
+
+ mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}"
+}
+
+__mptcp_lib_result_check_duplicated() {
+ local subtest
+
+ for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+ if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then
+ MPTCP_LIB_SUBTESTS_DUPLICATED=1
+ mptcp_lib_print_err "Duplicated entry: ${*}"
+ break
+ fi
+ done
+}
+
+__mptcp_lib_result_add() {
+ local result="${1}"
+ shift
+
+ local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1))
+
+ __mptcp_lib_result_check_duplicated "${*}"
+
+ MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}")
+}
+
+# $1: test name
+mptcp_lib_result_pass() {
+ __mptcp_lib_result_add "ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_fail() {
+ __mptcp_lib_result_add "not ok" "${1}"
+}
+
+# $1: test name
+mptcp_lib_result_skip() {
+ __mptcp_lib_result_add "ok" "${1} # SKIP"
+}
+
+# $1: result code ; $2: test name
+mptcp_lib_result_code() {
+ local ret="${1}"
+ local name="${2}"
+
+ case "${ret}" in
+ "${KSFT_PASS}")
+ mptcp_lib_result_pass "${name}"
+ ;;
+ "${KSFT_FAIL}")
+ mptcp_lib_result_fail "${name}"
+ ;;
+ "${KSFT_SKIP}")
+ mptcp_lib_result_skip "${name}"
+ ;;
+ *)
+ echo "ERROR: wrong result code: ${ret}"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+}
+
+mptcp_lib_result_print_all_tap() {
+ local subtest
+
+ if [ ${#MPTCP_LIB_SUBTESTS[@]} -eq 0 ] ||
+ [ "${SELFTESTS_MPTCP_LIB_NO_TAP:-}" = "1" ]; then
+ return
+ fi
+
+ printf "\nTAP version 13\n"
+ printf "1..%d\n" "${#MPTCP_LIB_SUBTESTS[@]}"
+
+ for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do
+ printf "%s\n" "${subtest}"
+ done
+
+ if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] &&
+ mptcp_lib_expect_all_features; then
+ mptcp_lib_print_err "Duplicated test entries"
+ exit ${KSFT_FAIL}
+ fi
+}
+
+# get the value of keyword $1 in the line marked by keyword $2
+mptcp_lib_get_info_value() {
+ grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+}
+
+# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
+mptcp_lib_evts_get_info() {
+ grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+}
+
+# $1: PID
+mptcp_lib_kill_wait() {
+ [ "${1}" -eq 0 ] && return 0
+
+ kill -SIGUSR1 "${1}" > /dev/null 2>&1
+ kill "${1}" > /dev/null 2>&1
+ wait "${1}" 2>/dev/null
+}
+
+# $1: IP address
+mptcp_lib_is_v6() {
+ [ -z "${1##*:*}" ]
+}
+
+# $1: ns, $2: MIB counter
+mptcp_lib_get_counter() {
+ local ns="${1}"
+ local counter="${2}"
+ local count
+
+ count=$(ip netns exec "${ns}" nstat -asz "${counter}" |
+ awk 'NR==1 {next} {print $2}')
+ if [ -z "${count}" ]; then
+ mptcp_lib_fail_if_expected_feature "${counter} counter"
+ return 1
+ fi
+
+ echo "${count}"
+}
+
+mptcp_lib_make_file() {
+ local name="${1}"
+ local bs="${2}"
+ local size="${3}"
+
+ dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null
+ echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}"
+}
+
+# $1: file
+mptcp_lib_print_file_err() {
+ ls -l "${1}" 1>&2
+ echo "Trailing bytes are: "
+ tail -c 27 "${1}"
+}
+
+# $1: input file ; $2: output file ; $3: what kind of file
+mptcp_lib_check_transfer() {
+ local in="${1}"
+ local out="${2}"
+ local what="${3}"
+
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
+ mptcp_lib_pr_fail "$what does not match (in, out):"
+ mptcp_lib_print_file_err "$in"
+ mptcp_lib_print_file_err "$out"
+
+ return 1
+ fi
+
+ return 0
+}
+
+# $1: ns, $2: port
+mptcp_lib_wait_local_port_listen() {
+ local listener_ns="${1}"
+ local port="${2}"
+
+ local port_hex
+ port_hex="$(printf "%04X" "${port}")"
+
+ local _
+ for _ in $(seq 10); do
+ ip netns exec "${listener_ns}" cat /proc/net/tcp* | \
+ awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \
+ {rc=0; exit}} END {exit rc}" &&
+ break
+ sleep 0.1
+ done
+}
+
+mptcp_lib_check_output() {
+ local err="${1}"
+ local cmd="${2}"
+ local expected="${3}"
+ local cmd_ret=0
+ local out
+
+ if ! out=$(${cmd} 2>"${err}"); then
+ cmd_ret=${?}
+ fi
+
+ if [ ${cmd_ret} -ne 0 ]; then
+ mptcp_lib_pr_fail "command execution '${cmd}' stderr"
+ cat "${err}"
+ return 2
+ elif [ "${out}" = "${expected}" ]; then
+ return 0
+ else
+ mptcp_lib_pr_fail "expected '${expected}' got '${out}'"
+ return 1
+ fi
+}
+
+mptcp_lib_check_tools() {
+ local tool
+
+ for tool in "${@}"; do
+ case "${tool}" in
+ "ip")
+ if ! ip -Version &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run test without ip tool"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "ss")
+ if ! ss -h | grep -q MPTCP; then
+ mptcp_lib_pr_skip "ss tool does not support MPTCP"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ "iptables"* | "ip6tables"*)
+ if ! "${tool}" -V &> /dev/null; then
+ mptcp_lib_pr_skip "Could not run all tests without ${tool}"
+ exit ${KSFT_SKIP}
+ fi
+ ;;
+ *)
+ mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}"
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+ done
+}
+
+mptcp_lib_ns_init() {
+ local sec rndh
+
+ sec=$(date +%s)
+ rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX)
+
+ local netns
+ for netns in "${@}"; do
+ eval "${netns}=${netns}-${rndh}"
+
+ ip netns add "${!netns}" || exit ${KSFT_SKIP}
+ ip -net "${!netns}" link set lo up
+ ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0
+ done
+}
+
+mptcp_lib_ns_exit() {
+ local netns
+ for netns in "${@}"; do
+ ip netns del "${netns}"
+ rm -f /tmp/"${netns}".{nstat,out}
+ done
+}
+
+mptcp_lib_events() {
+ local ns="${1}"
+ local evts="${2}"
+ declare -n pid="${3}"
+
+ :>"${evts}"
+
+ mptcp_lib_kill_wait "${pid:-0}"
+ ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 &
+ pid=$!
+}
+
+mptcp_lib_print_title() {
+ : "${MPTCP_LIB_TEST_COUNTER:?}"
+ : "${MPTCP_LIB_TEST_FORMAT:?}"
+
+ # shellcheck disable=SC2059 # the format is in a variable
+ printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}"
+}
+
+# $1: var name ; $2: prev ret
+mptcp_lib_check_expected_one() {
+ local var="${1}"
+ local exp="e_${var}"
+ local prev_ret="${2}"
+
+ if [ "${!var}" = "${!exp}" ]; then
+ return 0
+ fi
+
+ if [ "${prev_ret}" = "0" ]; then
+ mptcp_lib_pr_fail
+ fi
+
+ mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'."
+ return 1
+}
+
+# $@: all var names to check
+mptcp_lib_check_expected() {
+ local rc=0
+ local var
+
+ for var in "${@}"; do
+ mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1
+ done
+
+ return "${rc}"
+}
+
+# shellcheck disable=SC2034 # Some variables are used below but indirectly
+mptcp_lib_verify_listener_events() {
+ local evt=${1}
+ local e_type=${2}
+ local e_family=${3}
+ local e_saddr=${4}
+ local e_sport=${5}
+ local type
+ local family
+ local saddr
+ local sport
+ local rc=0
+
+ type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}")
+ family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}")
+ if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then
+ saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}")
+ else
+ saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}")
+ fi
+ sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}")
+
+ mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}"
+ return "${rc}"
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
new file mode 100644
index 000000000000..926b0be87c99
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -0,0 +1,866 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <string.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <sys/socket.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include <netdb.h>
+#include <netinet/in.h>
+
+#include <linux/tcp.h>
+
+static int pf = AF_INET;
+
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
+#ifndef SOL_MPTCP
+#define SOL_MPTCP 284
+#endif
+
+#ifndef MPTCP_INFO
+struct mptcp_info {
+ __u8 mptcpi_subflows;
+ __u8 mptcpi_add_addr_signal;
+ __u8 mptcpi_add_addr_accepted;
+ __u8 mptcpi_subflows_max;
+ __u8 mptcpi_add_addr_signal_max;
+ __u8 mptcpi_add_addr_accepted_max;
+ __u32 mptcpi_flags;
+ __u32 mptcpi_token;
+ __u64 mptcpi_write_seq;
+ __u64 mptcpi_snd_una;
+ __u64 mptcpi_rcv_nxt;
+ __u8 mptcpi_local_addr_used;
+ __u8 mptcpi_local_addr_max;
+ __u8 mptcpi_csum_enabled;
+ __u32 mptcpi_retransmits;
+ __u64 mptcpi_bytes_retrans;
+ __u64 mptcpi_bytes_sent;
+ __u64 mptcpi_bytes_received;
+ __u64 mptcpi_bytes_acked;
+};
+
+struct mptcp_subflow_data {
+ __u32 size_subflow_data; /* size of this structure in userspace */
+ __u32 num_subflows; /* must be 0, set by kernel */
+ __u32 size_kernel; /* must be 0, set by kernel */
+ __u32 size_user; /* size of one element in data[] */
+} __attribute__((aligned(8)));
+
+struct mptcp_subflow_addrs {
+ union {
+ __kernel_sa_family_t sa_family;
+ struct sockaddr sa_local;
+ struct sockaddr_in sin_local;
+ struct sockaddr_in6 sin6_local;
+ struct __kernel_sockaddr_storage ss_local;
+ };
+ union {
+ struct sockaddr sa_remote;
+ struct sockaddr_in sin_remote;
+ struct sockaddr_in6 sin6_remote;
+ struct __kernel_sockaddr_storage ss_remote;
+ };
+};
+
+#define MPTCP_INFO 1
+#define MPTCP_TCPINFO 2
+#define MPTCP_SUBFLOW_ADDRS 3
+#endif
+
+#ifndef MPTCP_FULL_INFO
+struct mptcp_subflow_info {
+ __u32 id;
+ struct mptcp_subflow_addrs addrs;
+};
+
+struct mptcp_full_info {
+ __u32 size_tcpinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_tcpinfo_user;
+ __u32 size_sfinfo_kernel; /* must be 0, set by kernel */
+ __u32 size_sfinfo_user;
+ __u32 num_subflows; /* must be 0, set by kernel (real subflow count) */
+ __u32 size_arrays_user; /* max subflows that userspace is interested in;
+ * the buffers at subflow_info/tcp_info
+ * are respectively at least:
+ * size_arrays * size_sfinfo_user
+ * size_arrays * size_tcpinfo_user
+ * bytes wide
+ */
+ __aligned_u64 subflow_info;
+ __aligned_u64 tcp_info;
+ struct mptcp_info mptcp_info;
+};
+
+#define MPTCP_FULL_INFO 4
+#endif
+
+struct so_state {
+ struct mptcp_info mi;
+ struct mptcp_info last_sample;
+ struct tcp_info tcp_info;
+ struct mptcp_subflow_addrs addrs;
+ uint64_t mptcpi_rcv_delta;
+ uint64_t tcpi_rcv_delta;
+ bool pkt_stats_avail;
+};
+
+#ifndef MIN
+#define MIN(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
+static void die_perror(const char *msg)
+{
+ perror(msg);
+ exit(1);
+}
+
+static void die_usage(int r)
+{
+ fprintf(stderr, "Usage: mptcp_sockopt [-6]\n");
+ exit(r);
+}
+
+static void xerror(const char *fmt, ...)
+{
+ va_list ap;
+
+ va_start(ap, fmt);
+ vfprintf(stderr, fmt, ap);
+ va_end(ap);
+ fputc('\n', stderr);
+ exit(1);
+}
+
+static const char *getxinfo_strerr(int err)
+{
+ if (err == EAI_SYSTEM)
+ return strerror(errno);
+
+ return gai_strerror(err);
+}
+
+static void xgetaddrinfo(const char *node, const char *service,
+ const struct addrinfo *hints,
+ struct addrinfo **res)
+{
+ int err = getaddrinfo(node, service, hints, res);
+
+ if (err) {
+ const char *errstr = getxinfo_strerr(err);
+
+ fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
+ node ? node : "", service ? service : "", errstr);
+ exit(1);
+ }
+}
+
+static int sock_listen_mptcp(const char * const listenaddr,
+ const char * const port)
+{
+ int sock = -1;
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ .ai_flags = AI_PASSIVE | AI_NUMERICHOST
+ };
+
+ hints.ai_family = pf;
+
+ struct addrinfo *a, *addr;
+ int one = 1;
+
+ xgetaddrinfo(listenaddr, port, &hints, &addr);
+ hints.ai_family = pf;
+
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, IPPROTO_MPTCP);
+ if (sock < 0)
+ continue;
+
+ if (-1 == setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &one,
+ sizeof(one)))
+ perror("setsockopt");
+
+ if (bind(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ perror("bind");
+ close(sock);
+ sock = -1;
+ }
+
+ freeaddrinfo(addr);
+
+ if (sock < 0)
+ xerror("could not create listen socket");
+
+ if (listen(sock, 20))
+ die_perror("listen");
+
+ return sock;
+}
+
+static int sock_connect_mptcp(const char * const remoteaddr,
+ const char * const port, int proto)
+{
+ struct addrinfo hints = {
+ .ai_protocol = IPPROTO_TCP,
+ .ai_socktype = SOCK_STREAM,
+ };
+ struct addrinfo *a, *addr;
+ int sock = -1;
+
+ hints.ai_family = pf;
+
+ xgetaddrinfo(remoteaddr, port, &hints, &addr);
+ for (a = addr; a; a = a->ai_next) {
+ sock = socket(a->ai_family, a->ai_socktype, proto);
+ if (sock < 0)
+ continue;
+
+ if (connect(sock, a->ai_addr, a->ai_addrlen) == 0)
+ break; /* success */
+
+ die_perror("connect");
+ }
+
+ if (sock < 0)
+ xerror("could not create connect socket");
+
+ freeaddrinfo(addr);
+ return sock;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "h6")) != -1) {
+ switch (c) {
+ case 'h':
+ die_usage(0);
+ break;
+ case '6':
+ pf = AF_INET6;
+ break;
+ default:
+ die_usage(1);
+ break;
+ }
+ }
+}
+
+static void do_getsockopt_bogus_sf_data(int fd, int optname)
+{
+ struct mptcp_subflow_data good_data;
+ struct bogus_data {
+ struct mptcp_subflow_data d;
+ char buf[2];
+ } bd;
+ socklen_t olen, _olen;
+ int ret;
+
+ memset(&bd, 0, sizeof(bd));
+ memset(&good_data, 0, sizeof(good_data));
+
+ olen = sizeof(good_data);
+ good_data.size_subflow_data = olen;
+
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret < 0); /* 0 size_subflow_data */
+ assert(olen == sizeof(good_data));
+
+ bd.d = good_data;
+
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret == 0);
+ assert(olen == sizeof(good_data));
+ assert(bd.d.num_subflows == 1);
+ assert(bd.d.size_kernel > 0);
+ assert(bd.d.size_user == 0);
+
+ bd.d = good_data;
+ _olen = rand() % olen;
+ olen = _olen;
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret < 0); /* bogus olen */
+ assert(olen == _olen); /* must be unchanged */
+
+ bd.d = good_data;
+ olen = sizeof(good_data);
+ bd.d.size_kernel = 1;
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret < 0); /* size_kernel not 0 */
+
+ bd.d = good_data;
+ olen = sizeof(good_data);
+ bd.d.num_subflows = 1;
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret < 0); /* num_subflows not 0 */
+
+ /* forward compat check: larger struct mptcp_subflow_data on 'old' kernel */
+ bd.d = good_data;
+ olen = sizeof(bd);
+ bd.d.size_subflow_data = sizeof(bd);
+
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &olen);
+ assert(ret == 0);
+
+ /* olen must be truncated to real data size filled by kernel: */
+ assert(olen == sizeof(good_data));
+
+ assert(bd.d.size_subflow_data == sizeof(bd));
+
+ bd.d = good_data;
+ bd.d.size_subflow_data += 1;
+ bd.d.size_user = 1;
+ olen = bd.d.size_subflow_data + 1;
+ _olen = olen;
+
+ ret = getsockopt(fd, SOL_MPTCP, optname, &bd, &_olen);
+ assert(ret == 0);
+
+ /* no truncation, kernel should have filled 1 byte of optname payload in buf[1]: */
+ assert(olen == _olen);
+
+ assert(bd.d.size_subflow_data == sizeof(good_data) + 1);
+ assert(bd.buf[0] == 0);
+}
+
+static void do_getsockopt_mptcp_info(struct so_state *s, int fd, size_t w)
+{
+ struct mptcp_info i;
+ socklen_t olen;
+ int ret;
+
+ olen = sizeof(i);
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_INFO, &i, &olen);
+
+ if (ret < 0)
+ die_perror("getsockopt MPTCP_INFO");
+
+ s->pkt_stats_avail = olen >= sizeof(i);
+
+ s->last_sample = i;
+ if (s->mi.mptcpi_write_seq == 0)
+ s->mi = i;
+
+ assert(s->mi.mptcpi_write_seq + w == i.mptcpi_write_seq);
+
+ s->mptcpi_rcv_delta = i.mptcpi_rcv_nxt - s->mi.mptcpi_rcv_nxt;
+}
+
+static void do_getsockopt_tcp_info(struct so_state *s, int fd, size_t r, size_t w)
+{
+ struct my_tcp_info {
+ struct mptcp_subflow_data d;
+ struct tcp_info ti[2];
+ } ti;
+ int ret, tries = 5;
+ socklen_t olen;
+
+ do {
+ memset(&ti, 0, sizeof(ti));
+
+ ti.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+ ti.d.size_user = sizeof(struct tcp_info);
+ olen = sizeof(ti);
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_TCPINFO, &ti, &olen);
+ if (ret < 0)
+ xerror("getsockopt MPTCP_TCPINFO (tries %d, %m)");
+
+ assert(olen <= sizeof(ti));
+ assert(ti.d.size_kernel > 0);
+ assert(ti.d.size_user ==
+ MIN(ti.d.size_kernel, sizeof(struct tcp_info)));
+ assert(ti.d.num_subflows == 1);
+
+ assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+ olen -= sizeof(struct mptcp_subflow_data);
+ assert(olen == ti.d.size_user);
+
+ s->tcp_info = ti.ti[0];
+
+ if (ti.ti[0].tcpi_bytes_sent == w &&
+ ti.ti[0].tcpi_bytes_received == r)
+ goto done;
+
+ if (r == 0 && ti.ti[0].tcpi_bytes_sent == w &&
+ ti.ti[0].tcpi_bytes_received) {
+ s->tcpi_rcv_delta = ti.ti[0].tcpi_bytes_received;
+ goto done;
+ }
+
+ /* wait and repeat, might be that tx is still ongoing */
+ sleep(1);
+ } while (tries-- > 0);
+
+ xerror("tcpi_bytes_sent %" PRIu64 ", want %zu. tcpi_bytes_received %" PRIu64 ", want %zu",
+ ti.ti[0].tcpi_bytes_sent, w, ti.ti[0].tcpi_bytes_received, r);
+
+done:
+ do_getsockopt_bogus_sf_data(fd, MPTCP_TCPINFO);
+}
+
+static void do_getsockopt_subflow_addrs(struct so_state *s, int fd)
+{
+ struct sockaddr_storage remote, local;
+ socklen_t olen, rlen, llen;
+ int ret;
+ struct my_addrs {
+ struct mptcp_subflow_data d;
+ struct mptcp_subflow_addrs addr[2];
+ } addrs;
+
+ memset(&addrs, 0, sizeof(addrs));
+ memset(&local, 0, sizeof(local));
+ memset(&remote, 0, sizeof(remote));
+
+ addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+ addrs.d.size_user = sizeof(struct mptcp_subflow_addrs);
+ olen = sizeof(addrs);
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+ if (ret < 0)
+ die_perror("getsockopt MPTCP_SUBFLOW_ADDRS");
+
+ assert(olen <= sizeof(addrs));
+ assert(addrs.d.size_kernel > 0);
+ assert(addrs.d.size_user ==
+ MIN(addrs.d.size_kernel, sizeof(struct mptcp_subflow_addrs)));
+ assert(addrs.d.num_subflows == 1);
+
+ assert(olen > (socklen_t)sizeof(struct mptcp_subflow_data));
+ olen -= sizeof(struct mptcp_subflow_data);
+ assert(olen == addrs.d.size_user);
+
+ llen = sizeof(local);
+ ret = getsockname(fd, (struct sockaddr *)&local, &llen);
+ if (ret < 0)
+ die_perror("getsockname");
+ rlen = sizeof(remote);
+ ret = getpeername(fd, (struct sockaddr *)&remote, &rlen);
+ if (ret < 0)
+ die_perror("getpeername");
+
+ assert(rlen > 0);
+ assert(rlen == llen);
+
+ assert(remote.ss_family == local.ss_family);
+
+ assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) == 0);
+ assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) == 0);
+ s->addrs = addrs.addr[0];
+
+ memset(&addrs, 0, sizeof(addrs));
+
+ addrs.d.size_subflow_data = sizeof(struct mptcp_subflow_data);
+ addrs.d.size_user = sizeof(sa_family_t);
+ olen = sizeof(addrs.d) + sizeof(sa_family_t);
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_SUBFLOW_ADDRS, &addrs, &olen);
+ assert(ret == 0);
+ assert(olen == sizeof(addrs.d) + sizeof(sa_family_t));
+
+ assert(addrs.addr[0].sa_family == pf);
+ assert(addrs.addr[0].sa_family == local.ss_family);
+
+ assert(memcmp(&local, &addrs.addr[0].ss_local, sizeof(local)) != 0);
+ assert(memcmp(&remote, &addrs.addr[0].ss_remote, sizeof(remote)) != 0);
+
+ do_getsockopt_bogus_sf_data(fd, MPTCP_SUBFLOW_ADDRS);
+}
+
+static void do_getsockopt_mptcp_full_info(struct so_state *s, int fd)
+{
+ size_t data_size = sizeof(struct mptcp_full_info);
+ struct mptcp_subflow_info sfinfo[2];
+ struct tcp_info tcp_info[2];
+ struct mptcp_full_info mfi;
+ socklen_t olen;
+ int ret;
+
+ memset(&mfi, 0, data_size);
+ memset(tcp_info, 0, sizeof(tcp_info));
+ memset(sfinfo, 0, sizeof(sfinfo));
+
+ mfi.size_tcpinfo_user = sizeof(struct tcp_info);
+ mfi.size_sfinfo_user = sizeof(struct mptcp_subflow_info);
+ mfi.size_arrays_user = 2;
+ mfi.subflow_info = (unsigned long)&sfinfo[0];
+ mfi.tcp_info = (unsigned long)&tcp_info[0];
+ olen = data_size;
+
+ ret = getsockopt(fd, SOL_MPTCP, MPTCP_FULL_INFO, &mfi, &olen);
+ if (ret < 0) {
+ if (errno == EOPNOTSUPP) {
+ perror("MPTCP_FULL_INFO test skipped");
+ return;
+ }
+ xerror("getsockopt MPTCP_FULL_INFO");
+ }
+
+ assert(olen <= data_size);
+ assert(mfi.size_tcpinfo_kernel > 0);
+ assert(mfi.size_tcpinfo_user ==
+ MIN(mfi.size_tcpinfo_kernel, sizeof(struct tcp_info)));
+ assert(mfi.size_sfinfo_kernel > 0);
+ assert(mfi.size_sfinfo_user ==
+ MIN(mfi.size_sfinfo_kernel, sizeof(struct mptcp_subflow_info)));
+ assert(mfi.num_subflows == 1);
+
+ /* Tolerate future extension to mptcp_info struct and running newer
+ * test on top of older kernel.
+ * Anyway any kernel supporting MPTCP_FULL_INFO must at least include
+ * the following in mptcp_info.
+ */
+ assert(olen > (socklen_t)__builtin_offsetof(struct mptcp_full_info, tcp_info));
+ assert(mfi.mptcp_info.mptcpi_subflows == 0);
+ assert(mfi.mptcp_info.mptcpi_bytes_sent == s->last_sample.mptcpi_bytes_sent);
+ assert(mfi.mptcp_info.mptcpi_bytes_received == s->last_sample.mptcpi_bytes_received);
+
+ assert(sfinfo[0].id == 1);
+ assert(tcp_info[0].tcpi_bytes_sent == s->tcp_info.tcpi_bytes_sent);
+ assert(tcp_info[0].tcpi_bytes_received == s->tcp_info.tcpi_bytes_received);
+ assert(!memcmp(&sfinfo->addrs, &s->addrs, sizeof(struct mptcp_subflow_addrs)));
+}
+
+static void do_getsockopts(struct so_state *s, int fd, size_t r, size_t w)
+{
+ do_getsockopt_mptcp_info(s, fd, w);
+
+ do_getsockopt_tcp_info(s, fd, r, w);
+
+ do_getsockopt_subflow_addrs(s, fd);
+
+ if (r)
+ do_getsockopt_mptcp_full_info(s, fd);
+}
+
+static void connect_one_server(int fd, int pipefd)
+{
+ char buf[4096], buf2[4096];
+ size_t len, i, total;
+ struct so_state s;
+ bool eof = false;
+ ssize_t ret;
+
+ memset(&s, 0, sizeof(s));
+
+ len = rand() % (sizeof(buf) - 1);
+
+ if (len < 128)
+ len = 128;
+
+ for (i = 0; i < len ; i++) {
+ buf[i] = rand() % 26;
+ buf[i] += 'A';
+ }
+
+ buf[i] = '\n';
+
+ do_getsockopts(&s, fd, 0, 0);
+
+ /* un-block server */
+ ret = read(pipefd, buf2, 4);
+ assert(ret == 4);
+ close(pipefd);
+
+ assert(strncmp(buf2, "xmit", 4) == 0);
+
+ ret = write(fd, buf, len);
+ if (ret < 0)
+ die_perror("write");
+
+ if (ret != (ssize_t)len)
+ xerror("short write");
+
+ total = 0;
+ do {
+ ret = read(fd, buf2 + total, sizeof(buf2) - total);
+ if (ret < 0)
+ die_perror("read");
+ if (ret == 0) {
+ eof = true;
+ break;
+ }
+
+ total += ret;
+ } while (total < len);
+
+ if (total != len)
+ xerror("total %lu, len %lu eof %d\n", total, len, eof);
+
+ if (memcmp(buf, buf2, len))
+ xerror("data corruption");
+
+ if (s.tcpi_rcv_delta)
+ assert(s.tcpi_rcv_delta <= total);
+
+ do_getsockopts(&s, fd, ret, ret);
+
+ if (eof)
+ total += 1; /* sequence advances due to FIN */
+
+ assert(s.mptcpi_rcv_delta == (uint64_t)total);
+ close(fd);
+}
+
+static void process_one_client(int fd, int pipefd)
+{
+ ssize_t ret, ret2, ret3;
+ struct so_state s;
+ char buf[4096];
+
+ memset(&s, 0, sizeof(s));
+ do_getsockopts(&s, fd, 0, 0);
+
+ ret = write(pipefd, "xmit", 4);
+ assert(ret == 4);
+
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0)
+ die_perror("read");
+
+ assert(s.mptcpi_rcv_delta <= (uint64_t)ret);
+
+ if (s.tcpi_rcv_delta)
+ assert(s.tcpi_rcv_delta == (uint64_t)ret);
+
+ ret2 = write(fd, buf, ret);
+ if (ret2 < 0)
+ die_perror("write");
+
+ /* wait for hangup */
+ ret3 = read(fd, buf, 1);
+ if (ret3 != 0)
+ xerror("expected EOF, got %lu", ret3);
+
+ do_getsockopts(&s, fd, ret, ret2);
+ if (s.mptcpi_rcv_delta != (uint64_t)ret + 1)
+ xerror("mptcpi_rcv_delta %" PRIu64 ", expect %" PRIu64, s.mptcpi_rcv_delta, ret + 1, s.mptcpi_rcv_delta - ret);
+
+ /* be nice when running on top of older kernel */
+ if (s.pkt_stats_avail) {
+ if (s.last_sample.mptcpi_bytes_sent != ret2)
+ xerror("mptcpi_bytes_sent %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_sent, ret2,
+ s.last_sample.mptcpi_bytes_sent - ret2);
+ if (s.last_sample.mptcpi_bytes_received != ret)
+ xerror("mptcpi_bytes_received %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_received, ret,
+ s.last_sample.mptcpi_bytes_received - ret);
+ if (s.last_sample.mptcpi_bytes_acked != ret)
+ xerror("mptcpi_bytes_acked %" PRIu64 ", expect %" PRIu64,
+ s.last_sample.mptcpi_bytes_acked, ret2,
+ s.last_sample.mptcpi_bytes_acked - ret2);
+ }
+
+ close(fd);
+}
+
+static int xaccept(int s)
+{
+ int fd = accept(s, NULL, 0);
+
+ if (fd < 0)
+ die_perror("accept");
+
+ return fd;
+}
+
+static int server(int pipefd)
+{
+ int fd = -1, r;
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_listen_mptcp("127.0.0.1", "15432");
+ break;
+ case AF_INET6:
+ fd = sock_listen_mptcp("::1", "15432");
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ break;
+ }
+
+ r = write(pipefd, "conn", 4);
+ assert(r == 4);
+
+ alarm(15);
+ r = xaccept(fd);
+
+ process_one_client(r, pipefd);
+
+ return 0;
+}
+
+static void test_ip_tos_sockopt(int fd)
+{
+ uint8_t tos_in, tos_out;
+ socklen_t s;
+ int r;
+
+ tos_in = rand() & 0xfc;
+ r = setsockopt(fd, SOL_IP, IP_TOS, &tos_in, sizeof(tos_out));
+ if (r != 0)
+ die_perror("setsockopt IP_TOS");
+
+ tos_out = 0;
+ s = sizeof(tos_out);
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != 0)
+ die_perror("getsockopt IP_TOS");
+
+ if (tos_in != tos_out)
+ xerror("tos %x != %x socklen_t %d\n", tos_in, tos_out, s);
+
+ if (s != 1)
+ xerror("tos should be 1 byte");
+
+ s = 0;
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != 0)
+ die_perror("getsockopt IP_TOS 0");
+ if (s != 0)
+ xerror("expect socklen_t == 0");
+
+ s = -1;
+ r = getsockopt(fd, SOL_IP, IP_TOS, &tos_out, &s);
+ if (r != -1 && errno != EINVAL)
+ die_perror("getsockopt IP_TOS did not indicate -EINVAL");
+ if (s != -1)
+ xerror("expect socklen_t == -1");
+}
+
+static int client(int pipefd)
+{
+ int fd = -1;
+
+ alarm(15);
+
+ switch (pf) {
+ case AF_INET:
+ fd = sock_connect_mptcp("127.0.0.1", "15432", IPPROTO_MPTCP);
+ break;
+ case AF_INET6:
+ fd = sock_connect_mptcp("::1", "15432", IPPROTO_MPTCP);
+ break;
+ default:
+ xerror("Unknown pf %d\n", pf);
+ }
+
+ test_ip_tos_sockopt(fd);
+
+ connect_one_server(fd, pipefd);
+
+ return 0;
+}
+
+static pid_t xfork(void)
+{
+ pid_t p = fork();
+
+ if (p < 0)
+ die_perror("fork");
+
+ return p;
+}
+
+static int rcheck(int wstatus, const char *what)
+{
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == 0)
+ return 0;
+ fprintf(stderr, "%s exited, status=%d\n", what, WEXITSTATUS(wstatus));
+ return WEXITSTATUS(wstatus);
+ } else if (WIFSIGNALED(wstatus)) {
+ xerror("%s killed by signal %d\n", what, WTERMSIG(wstatus));
+ } else if (WIFSTOPPED(wstatus)) {
+ xerror("%s stopped by signal %d\n", what, WSTOPSIG(wstatus));
+ }
+
+ return 111;
+}
+
+static void init_rng(void)
+{
+ int fd = open("/dev/urandom", O_RDONLY);
+
+ if (fd >= 0) {
+ unsigned int foo;
+ ssize_t ret;
+
+ /* can't fail */
+ ret = read(fd, &foo, sizeof(foo));
+ assert(ret == sizeof(foo));
+
+ close(fd);
+ srand(foo);
+ } else {
+ srand(time(NULL));
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int e1, e2, wstatus;
+ pid_t s, c, ret;
+ int pipefds[2];
+
+ parse_opts(argc, argv);
+
+ init_rng();
+
+ e1 = pipe(pipefds);
+ if (e1 < 0)
+ die_perror("pipe");
+
+ s = xfork();
+ if (s == 0)
+ return server(pipefds[1]);
+
+ close(pipefds[1]);
+
+ /* wait until server bound a socket */
+ e1 = read(pipefds[0], &e1, 4);
+ assert(e1 == 4);
+
+ c = xfork();
+ if (c == 0)
+ return client(pipefds[0]);
+
+ close(pipefds[0]);
+
+ ret = waitpid(s, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e1 = rcheck(wstatus, "server");
+ ret = waitpid(c, &wstatus, 0);
+ if (ret == -1)
+ die_perror("waitpid");
+ e2 = rcheck(wstatus, "client");
+
+ return e1 ? e1 : e2;
+}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
new file mode 100755
index 000000000000..e2d70c18786e
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -0,0 +1,338 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ret=0
+sin=""
+sout=""
+cin=""
+cout=""
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+iptables="iptables"
+ip6tables="ip6tables"
+
+ns1=""
+ns2=""
+ns_sbox=""
+
+add_mark_rules()
+{
+ local ns=$1
+ local m=$2
+
+ local t
+ for t in ${iptables} ${ip6tables}; do
+ # just to debug: check we have multiple subflows connection requests
+ ip netns exec $ns $t -A OUTPUT -p tcp --syn -m mark --mark $m -j ACCEPT
+
+ # RST packets might be handled by a internal dummy socket
+ ip netns exec $ns $t -A OUTPUT -p tcp --tcp-flags RST RST -m mark --mark 0 -j ACCEPT
+
+ ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark $m -j ACCEPT
+ ip netns exec $ns $t -A OUTPUT -p tcp -m mark --mark 0 -j DROP
+ done
+}
+
+init()
+{
+ mptcp_lib_ns_init ns1 ns2 ns_sbox
+
+ local i
+ for i in $(seq 1 4); do
+ ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2"
+ ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i
+ ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad
+ ip -net "$ns1" link set ns1eth$i up
+
+ ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i
+ ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad
+ ip -net "$ns2" link set ns2eth$i up
+
+ # let $ns2 reach any $ns1 address from any interface
+ ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i
+
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal
+ ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal
+
+ ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal
+ ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal
+ done
+
+ ip netns exec $ns1 ./pm_nl_ctl limits 8 8
+ ip netns exec $ns2 ./pm_nl_ctl limits 8 8
+
+ add_mark_rules $ns1 1
+ add_mark_rules $ns2 2
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}"
+ rm -f "$cin" "$cout"
+ rm -f "$sin" "$sout"
+}
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+mptcp_lib_check_tools ip "${iptables}" "${ip6tables}"
+
+check_mark()
+{
+ local ns=$1
+ local af=$2
+
+ local tables=${iptables}
+
+ if [ $af -eq 6 ];then
+ tables=${ip6tables}
+ fi
+
+ local counters values
+ counters=$(ip netns exec $ns $tables -v -L OUTPUT | grep DROP)
+ values=${counters%DROP*}
+
+ local v
+ for v in $values; do
+ if [ $v -ne 0 ]; then
+ mptcp_lib_pr_fail "got $tables $values in ns $ns," \
+ "not 0 - not all expected packets marked"
+ ret=${KSFT_FAIL}
+ return 1
+ fi
+ done
+
+ return 0
+}
+
+print_title()
+{
+ mptcp_lib_print_title "${@}"
+}
+
+do_transfer()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local cl_proto="$3"
+ local srv_proto="$4"
+ local connect_addr="$5"
+
+ local port=12001
+
+ :> "$cout"
+ :> "$sout"
+
+ local mptcp_connect="./mptcp_connect -r 20"
+
+ local local_addr ip
+ if mptcp_lib_is_v6 "${connect_addr}"; then
+ local_addr="::"
+ ip=ipv6
+ else
+ local_addr="0.0.0.0"
+ ip=ipv4
+ fi
+
+ cmsg="TIMESTAMPNS"
+ if mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+ cmsg+=",TCPINQ"
+ fi
+
+ timeout ${timeout_test} \
+ ip netns exec ${listener_ns} \
+ $mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
+ ${local_addr} < "$sin" > "$sout" &
+ local spid=$!
+
+ sleep 1
+
+ timeout ${timeout_test} \
+ ip netns exec ${connector_ns} \
+ $mptcp_connect -t ${timeout_poll} -M 2 -p $port -s ${cl_proto} -c "${cmsg}" \
+ $connect_addr < "$cin" > "$cout" &
+
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ print_title "Transfer ${ip:2}"
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
+ echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
+
+ echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
+ ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+
+ mptcp_lib_result_fail "transfer ${ip}"
+
+ ret=${KSFT_FAIL}
+ return 1
+ fi
+ if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then
+ rets=1
+ else
+ mptcp_lib_pr_ok
+ fi
+ mptcp_lib_result_code "${rets}" "transfer ${ip}"
+
+ print_title "Mark ${ip:2}"
+ if [ $local_addr = "::" ];then
+ check_mark $listener_ns 6 || retc=1
+ check_mark $connector_ns 6 || retc=1
+ else
+ check_mark $listener_ns 4 || retc=1
+ check_mark $connector_ns 4 || retc=1
+ fi
+
+ mptcp_lib_result_code "${retc}" "mark ${ip}"
+
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ];then
+ mptcp_lib_pr_ok
+ return 0
+ fi
+ mptcp_lib_pr_fail
+
+ return 1
+}
+
+make_file()
+{
+ local name=$1
+ local who=$2
+ local size=$3
+
+ mptcp_lib_make_file $name 1024 $size
+
+ echo "Created $name (size $size KB) containing data sent by $who"
+}
+
+do_mptcp_sockopt_tests()
+{
+ local lret=0
+
+ if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then
+ mptcp_lib_pr_skip "MPTCP sockopt not supported"
+ mptcp_lib_result_skip "sockopt"
+ return
+ fi
+
+ ip netns exec "$ns_sbox" ./mptcp_sockopt
+ lret=$?
+
+ print_title "SOL_MPTCP sockopt v4"
+ if [ $lret -ne 0 ]; then
+ mptcp_lib_pr_fail
+ mptcp_lib_result_fail "sockopt v4"
+ ret=$lret
+ return
+ fi
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "sockopt v4"
+
+ ip netns exec "$ns_sbox" ./mptcp_sockopt -6
+ lret=$?
+
+ print_title "SOL_MPTCP sockopt v6"
+ if [ $lret -ne 0 ]; then
+ mptcp_lib_pr_fail
+ mptcp_lib_result_fail "sockopt v6"
+ ret=$lret
+ return
+ fi
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "sockopt v6"
+}
+
+run_tests()
+{
+ local listener_ns="$1"
+ local connector_ns="$2"
+ local connect_addr="$3"
+ local lret=0
+
+ do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr}
+
+ lret=$?
+
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ return
+ fi
+}
+
+do_tcpinq_test()
+{
+ print_title "TCP_INQ cmsg/ioctl $*"
+ ip netns exec "$ns_sbox" ./mptcp_inq "$@"
+ local lret=$?
+ if [ $lret -ne 0 ];then
+ ret=$lret
+ mptcp_lib_pr_fail
+ mptcp_lib_result_fail "TCP_INQ: $*"
+ return $lret
+ fi
+
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "TCP_INQ: $*"
+ return $lret
+}
+
+do_tcpinq_tests()
+{
+ local lret=0
+
+ if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then
+ mptcp_lib_pr_skip "TCP_INQ not supported"
+ mptcp_lib_result_skip "TCP_INQ"
+ return
+ fi
+
+ local args
+ for args in "-t tcp" "-r tcp"; do
+ do_tcpinq_test $args
+ lret=$?
+ if [ $lret -ne 0 ] ; then
+ return $lret
+ fi
+ do_tcpinq_test -6 $args
+ lret=$?
+ if [ $lret -ne 0 ] ; then
+ return $lret
+ fi
+ done
+
+ do_tcpinq_test -r tcp -t tcp
+
+ return $?
+}
+
+sin=$(mktemp)
+sout=$(mktemp)
+cin=$(mktemp)
+cout=$(mktemp)
+init
+make_file "$cin" "client" 1
+make_file "$sin" "server" 1
+trap cleanup EXIT
+
+run_tests $ns1 $ns2 10.0.1.1
+run_tests $ns1 $ns2 dead:beef:1::1
+
+do_mptcp_sockopt_tests
+do_tcpinq_tests
+
+mptcp_lib_result_print_all_tap
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh
index 15f4f46ca3a9..6ab8c5d36340 100755
--- a/tools/testing/selftests/net/mptcp/pm_netlink.sh
+++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh
@@ -1,76 +1,79 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ksft_skip=4
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
ret=0
usage() {
echo "Usage: $0 [ -h ]"
}
-
+optstring=h
while getopts "$optstring" option;do
case "$option" in
"h")
usage $0
- exit 0
+ exit ${KSFT_PASS}
;;
"?")
usage $0
- exit 1
+ exit ${KSFT_FAIL}
;;
esac
done
-sec=$(date +%s)
-rndh=$(printf %x $sec)-$(mktemp -u XXXXXX)
-ns1="ns1-$rndh"
+ns1=""
err=$(mktemp)
-ret=0
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
cleanup()
{
rm -f $err
- ip netns del $ns1
+ mptcp_lib_ns_exit "${ns1}"
}
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip
trap cleanup EXIT
-ip netns add $ns1 || exit $ksft_skip
-ip -net $ns1 link set lo up
-ip netns exec $ns1 sysctl -q net.mptcp.enabled=1
+mptcp_lib_ns_init ns1
check()
{
local cmd="$1"
local expected="$2"
local msg="$3"
- local out=`$cmd 2>$err`
- local cmd_ret=$?
-
- printf "%-50s %s" "$msg"
- if [ $cmd_ret -ne 0 ]; then
- echo "[FAIL] command execution '$cmd' stderr "
- cat $err
- ret=1
- elif [ "$out" = "$expected" ]; then
- echo "[ OK ]"
- else
- echo -n "[FAIL] "
- echo "expected '$expected' got '$out'"
- ret=1
+ local rc=0
+
+ mptcp_lib_print_title "$msg"
+ mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?}
+ if [ ${rc} -eq 2 ]; then
+ mptcp_lib_result_fail "${msg} # error ${rc}"
+ ret=${KSFT_FAIL}
+ elif [ ${rc} -eq 0 ]; then
+ mptcp_lib_print_ok "[ OK ]"
+ mptcp_lib_result_pass "${msg}"
+ elif [ ${rc} -eq 1 ]; then
+ mptcp_lib_result_fail "${msg} # different output"
+ ret=${KSFT_FAIL}
fi
}
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list"
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "defaults limits"
+
+default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)"
+if mptcp_lib_expect_all_features; then
+ check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
+subflows 2" "defaults limits"
+fi
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo
@@ -88,21 +91,22 @@ check "ip netns exec $ns1 ./pm_nl_ctl dump" \
"id 1 flags 10.0.1.1
id 3 flags signal,backup 10.0.1.3" "dump addrs after del"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr"
-ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal
check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment"
-for i in `seq 5 9`; do
+for i in $(seq 5 9); do
ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1
done
check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit"
check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit"
-for i in `seq 9 256`; do
+ip netns exec $ns1 ./pm_nl_ctl del 9
+for i in $(seq 10 255); do
+ ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i
ip netns exec $ns1 ./pm_nl_ctl del $i
- ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9
done
check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
id 3 flags signal,backup 10.0.1.3
@@ -115,16 +119,81 @@ id 8 flags signal 10.0.1.8" "id limit"
ip netns exec $ns1 ./pm_nl_ctl flush
check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs"
-ip netns exec $ns1 ./pm_nl_ctl limits 9 1
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "rcv addrs above hard limit"
+ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit"
-ip netns exec $ns1 ./pm_nl_ctl limits 1 9
-check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0
-subflows 0" "subflows above hard limit"
+ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null
+check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit"
ip netns exec $ns1 ./pm_nl_ctl limits 8 8
check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8
subflows 8" "set limits"
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1
+id 2 flags 10.0.1.2
+id 3 flags 10.0.1.7
+id 4 flags 10.0.1.8
+id 100 flags 10.0.1.3
+id 101 flags 10.0.1.4
+id 254 flags 10.0.1.5
+id 255 flags 10.0.1.6" "set ids"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1
+id 2 flags 10.0.0.4
+id 3 flags 10.0.0.6
+id 4 flags 10.0.0.7
+id 5 flags 10.0.0.8
+id 253 flags 10.0.0.5
+id 254 flags 10.0.0.2
+id 255 flags 10.0.0.3" "wrap-around ids"
+
+ip netns exec $ns1 ./pm_nl_ctl flush
+ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup 10.0.1.1" "set flags (backup)"
+ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup
+check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" " (nobackup)"
+
+# fullmesh support has been added later
+ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh 2>/dev/null
+if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" ||
+ mptcp_lib_expect_all_features; then
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,fullmesh 10.0.1.1" " (fullmesh)"
+ ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow 10.0.1.1" " (nofullmesh)"
+ ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh
+ check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \
+subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)"
+else
+ for st in fullmesh nofullmesh backup,fullmesh; do
+ st=" (${st})"
+ mptcp_lib_print_title "${st}"
+ mptcp_lib_pr_skip
+ mptcp_lib_result_skip "${st}"
+ done
+fi
+
+mptcp_lib_result_print_all_tap
exit $ret
diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
index b24a2f17d415..7426a2cbd4a0 100644
--- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
+++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c
@@ -6,6 +6,7 @@
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
+#include <limits.h>
#include <sys/socket.h>
#include <sys/types.h>
@@ -21,16 +22,29 @@
#ifndef MPTCP_PM_NAME
#define MPTCP_PM_NAME "mptcp_pm"
#endif
+#ifndef MPTCP_PM_EVENTS
+#define MPTCP_PM_EVENTS "mptcp_pm_events"
+#endif
+#ifndef IPPROTO_MPTCP
+#define IPPROTO_MPTCP 262
+#endif
static void syntax(char *argv[])
{
- fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]);
- fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n");
- fprintf(stderr, "\tdel <id>\n");
+ fprintf(stderr, "%s add|ann|rem|csf|dsf|get|set|del|flush|dump|events|listen|accept [<args>]\n", argv[0]);
+ fprintf(stderr, "\tadd [flags signal|subflow|backup|fullmesh] [id <nr>] [dev <name>] <ip>\n");
+ fprintf(stderr, "\tann <local-ip> id <local-id> token <token> [port <local-port>] [dev <name>]\n");
+ fprintf(stderr, "\trem id <local-id> token <token>\n");
+ fprintf(stderr, "\tcsf lip <local-ip> lid <local-id> rip <remote-ip> rport <remote-port> token <token>\n");
+ fprintf(stderr, "\tdsf lip <local-ip> lport <local-port> rip <remote-ip> rport <remote-port> token <token>\n");
+ fprintf(stderr, "\tdel <id> [<ip>]\n");
fprintf(stderr, "\tget <id>\n");
+ fprintf(stderr, "\tset [<ip>] [id <nr>] flags [no]backup|[no]fullmesh [port <nr>] [token <token>] [rip <ip>] [rport <port>]\n");
fprintf(stderr, "\tflush\n");
fprintf(stderr, "\tdump\n");
fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n");
+ fprintf(stderr, "\tevents\n");
+ fprintf(stderr, "\tlisten <local-ip> <local-port>\n");
exit(0);
}
@@ -52,20 +66,25 @@ static int init_genl_req(char *data, int family, int cmd, int version)
return off;
}
-static void nl_error(struct nlmsghdr *nh)
+static int nl_error(struct nlmsghdr *nh)
{
struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh);
int len = nh->nlmsg_len - sizeof(*nh);
uint32_t off;
- if (len < sizeof(struct nlmsgerr))
+ if (len < sizeof(struct nlmsgerr)) {
error(1, 0, "netlink error message truncated %d min %ld", len,
sizeof(struct nlmsgerr));
+ return -1;
+ }
- if (!err->error) {
+ if (err->error) {
/* check messages from kernel */
struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh);
+ fprintf(stderr, "netlink error %d (%s)\n",
+ err->error, strerror(-err->error));
+
while (RTA_OK(attrs, len)) {
if (attrs->rta_type == NLMSGERR_ATTR_MSG)
fprintf(stderr, "netlink ext ack msg: %s\n",
@@ -77,12 +96,115 @@ static void nl_error(struct nlmsghdr *nh)
}
attrs = RTA_NEXT(attrs, len);
}
- } else {
- fprintf(stderr, "netlink error %d", err->error);
+ return -1;
}
+
+ return 0;
}
-/* do a netlink command and, if max > 0, fetch the reply */
+static int capture_events(int fd, int event_group)
+{
+ u_int8_t buffer[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024];
+ struct genlmsghdr *ghdr;
+ struct rtattr *attrs;
+ struct nlmsghdr *nh;
+ int ret = 0;
+ int res_len;
+ int msg_len;
+ fd_set rfds;
+
+ if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP,
+ &event_group, sizeof(event_group)) < 0)
+ error(1, errno, "could not join the " MPTCP_PM_EVENTS " mcast group");
+
+ do {
+ FD_ZERO(&rfds);
+ FD_SET(fd, &rfds);
+ res_len = NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024;
+
+ ret = select(FD_SETSIZE, &rfds, NULL, NULL, NULL);
+
+ if (ret < 0)
+ error(1, ret, "error in select() on NL socket");
+
+ res_len = recv(fd, buffer, res_len, 0);
+ if (res_len < 0)
+ error(1, res_len, "error on recv() from NL socket");
+
+ nh = (struct nlmsghdr *)buffer;
+
+ for (; NLMSG_OK(nh, res_len); nh = NLMSG_NEXT(nh, res_len)) {
+ if (nh->nlmsg_type == NLMSG_ERROR)
+ error(1, NLMSG_ERROR, "received invalid NL message");
+
+ ghdr = (struct genlmsghdr *)NLMSG_DATA(nh);
+
+ if (ghdr->cmd == 0)
+ continue;
+
+ fprintf(stderr, "type:%d", ghdr->cmd);
+
+ msg_len = nh->nlmsg_len - NLMSG_LENGTH(GENL_HDRLEN);
+
+ attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ while (RTA_OK(attrs, msg_len)) {
+ if (attrs->rta_type == MPTCP_ATTR_TOKEN)
+ fprintf(stderr, ",token:%u", *(__u32 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_FAMILY)
+ fprintf(stderr, ",family:%u", *(__u16 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_LOC_ID)
+ fprintf(stderr, ",loc_id:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_REM_ID)
+ fprintf(stderr, ",rem_id:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_SADDR4) {
+ u_int32_t saddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+ fprintf(stderr, ",saddr4:%u.%u.%u.%u", saddr4 >> 24,
+ (saddr4 >> 16) & 0xFF, (saddr4 >> 8) & 0xFF,
+ (saddr4 & 0xFF));
+ } else if (attrs->rta_type == MPTCP_ATTR_SADDR6) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+ sizeof(buf)) != NULL)
+ fprintf(stderr, ",saddr6:%s", buf);
+ } else if (attrs->rta_type == MPTCP_ATTR_DADDR4) {
+ u_int32_t daddr4 = ntohl(*(__u32 *)RTA_DATA(attrs));
+
+ fprintf(stderr, ",daddr4:%u.%u.%u.%u", daddr4 >> 24,
+ (daddr4 >> 16) & 0xFF, (daddr4 >> 8) & 0xFF,
+ (daddr4 & 0xFF));
+ } else if (attrs->rta_type == MPTCP_ATTR_DADDR6) {
+ char buf[INET6_ADDRSTRLEN];
+
+ if (inet_ntop(AF_INET6, RTA_DATA(attrs), buf,
+ sizeof(buf)) != NULL)
+ fprintf(stderr, ",daddr6:%s", buf);
+ } else if (attrs->rta_type == MPTCP_ATTR_SPORT)
+ fprintf(stderr, ",sport:%u",
+ ntohs(*(__u16 *)RTA_DATA(attrs)));
+ else if (attrs->rta_type == MPTCP_ATTR_DPORT)
+ fprintf(stderr, ",dport:%u",
+ ntohs(*(__u16 *)RTA_DATA(attrs)));
+ else if (attrs->rta_type == MPTCP_ATTR_BACKUP)
+ fprintf(stderr, ",backup:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_ERROR)
+ fprintf(stderr, ",error:%u", *(__u8 *)RTA_DATA(attrs));
+ else if (attrs->rta_type == MPTCP_ATTR_SERVER_SIDE)
+ fprintf(stderr, ",server_side:%u", *(__u8 *)RTA_DATA(attrs));
+
+ attrs = RTA_NEXT(attrs, msg_len);
+ }
+ }
+ fprintf(stderr, "\n");
+ } while (1);
+
+ return 0;
+}
+
+/* do a netlink command and, if max > 0, fetch the reply ; nh's size >1024B */
static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
{
struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
@@ -91,12 +213,16 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
int rem, ret;
int err = 0;
+ /* If no expected answer, ask for an ACK to look for errors if any */
+ if (max == 0) {
+ nh->nlmsg_flags |= NLM_F_ACK;
+ max = 1024;
+ }
+
nh->nlmsg_len = len;
ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr));
if (ret != len)
error(1, errno, "send netlink: %uB != %uB\n", ret, len);
- if (max == 0)
- return 0;
addr_len = sizeof(nladdr);
rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len);
@@ -105,21 +231,29 @@ static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max)
/* Beware: the NLMSG_NEXT macro updates the 'rem' argument */
for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) {
- if (nh->nlmsg_type == NLMSG_ERROR) {
- nl_error(nh);
+ if (nh->nlmsg_type == NLMSG_DONE)
+ break;
+
+ if (nh->nlmsg_type == NLMSG_ERROR && nl_error(nh))
err = 1;
- }
}
if (err)
error(1, 0, "bailing out due to netlink error[s]");
return ret;
}
-static int genl_parse_getfamily(struct nlmsghdr *nlh)
+static int genl_parse_getfamily(struct nlmsghdr *nlh, int *pm_family,
+ int *events_mcast_grp)
{
struct genlmsghdr *ghdr = NLMSG_DATA(nlh);
int len = nlh->nlmsg_len;
struct rtattr *attrs;
+ struct rtattr *grps;
+ struct rtattr *grp;
+ int got_events_grp;
+ int got_family;
+ int grps_len;
+ int grp_len;
if (nlh->nlmsg_type != GENL_ID_CTRL)
error(1, errno, "Not a controller message, len=%d type=0x%x\n",
@@ -134,9 +268,42 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
error(1, errno, "Unknown controller command %d\n", ghdr->cmd);
attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN);
+ got_family = 0;
+ got_events_grp = 0;
+
while (RTA_OK(attrs, len)) {
- if (attrs->rta_type == CTRL_ATTR_FAMILY_ID)
- return *(__u16 *)RTA_DATA(attrs);
+ if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) {
+ *pm_family = *(__u16 *)RTA_DATA(attrs);
+ got_family = 1;
+ } else if (attrs->rta_type == CTRL_ATTR_MCAST_GROUPS) {
+ grps = RTA_DATA(attrs);
+ grps_len = RTA_PAYLOAD(attrs);
+
+ while (RTA_OK(grps, grps_len)) {
+ grp = RTA_DATA(grps);
+ grp_len = RTA_PAYLOAD(grps);
+ got_events_grp = 0;
+
+ while (RTA_OK(grp, grp_len)) {
+ if (grp->rta_type == CTRL_ATTR_MCAST_GRP_ID)
+ *events_mcast_grp = *(__u32 *)RTA_DATA(grp);
+ else if (grp->rta_type == CTRL_ATTR_MCAST_GRP_NAME &&
+ !strcmp(RTA_DATA(grp), MPTCP_PM_EVENTS))
+ got_events_grp = 1;
+
+ grp = RTA_NEXT(grp, grp_len);
+ }
+
+ if (got_events_grp)
+ break;
+
+ grps = RTA_NEXT(grps, grps_len);
+ }
+ }
+
+ if (got_family && got_events_grp)
+ return 0;
+
attrs = RTA_NEXT(attrs, len);
}
@@ -144,7 +311,7 @@ static int genl_parse_getfamily(struct nlmsghdr *nlh)
return -1;
}
-static int resolve_mptcp_pm_netlink(int fd)
+static int resolve_mptcp_pm_netlink(int fd, int *pm_family, int *events_mcast_grp)
{
char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
@@ -166,7 +333,429 @@ static int resolve_mptcp_pm_netlink(int fd)
off += NLMSG_ALIGN(rta->rta_len);
do_nl_req(fd, nh, off, sizeof(data));
- return genl_parse_getfamily((void *)data);
+ return genl_parse_getfamily((void *)data, pm_family, events_mcast_grp);
+}
+
+int dsf(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *addr;
+ u_int16_t family, port;
+ struct nlmsghdr *nh;
+ u_int32_t token;
+ int addr_start;
+ int off = 0;
+ int arg;
+
+ const char *params[5];
+
+ memset(params, 0, 5 * sizeof(const char *));
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_DESTROY,
+ MPTCP_PM_VER);
+
+ if (argc < 12)
+ syntax(argv);
+
+ /* Params recorded in this order:
+ * <local-ip>, <local-port>, <remote-ip>, <remote-port>, <token>
+ */
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "lip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local IP");
+
+ params[0] = argv[arg];
+ } else if (!strcmp(argv[arg], "lport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local port");
+
+ params[1] = argv[arg];
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote IP");
+
+ params[2] = argv[arg];
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ params[3] = argv[arg];
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token");
+
+ params[4] = argv[arg];
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ for (arg = 0; arg < 4; arg = arg + 2) {
+ /* addr header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED |
+ ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", params[arg]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* port */
+ port = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ addr->rta_len = off - addr_start;
+ }
+
+ /* token */
+ token = strtoul(params[4], NULL, 10);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
+}
+
+int csf(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW;
+ const char *params[5];
+ struct nlmsghdr *nh;
+ struct rtattr *addr;
+ struct rtattr *rta;
+ u_int16_t family;
+ u_int32_t token;
+ u_int16_t port;
+ int addr_start;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(params, 0, 5 * sizeof(const char *));
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SUBFLOW_CREATE,
+ MPTCP_PM_VER);
+
+ if (argc < 12)
+ syntax(argv);
+
+ /* Params recorded in this order:
+ * <local-ip>, <local-id>, <remote-ip>, <remote-port>, <token>
+ */
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "lip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local IP");
+
+ params[0] = argv[arg];
+ } else if (!strcmp(argv[arg], "lid")) {
+ if (++arg >= argc)
+ error(1, 0, " missing local id");
+
+ params[1] = argv[arg];
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote ip");
+
+ params[2] = argv[arg];
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ params[3] = argv[arg];
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token");
+
+ params[4] = argv[arg];
+ } else
+ error(1, 0, "unknown param %s", argv[arg]);
+ }
+
+ for (arg = 0; arg < 4; arg = arg + 2) {
+ /* addr header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED |
+ ((arg == 0) ? MPTCP_PM_ATTR_ADDR : MPTCP_PM_ATTR_ADDR_REMOTE);
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, params[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, params[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", params[arg]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ if (arg == 2) {
+ /* port */
+ port = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ if (arg == 0) {
+ /* id */
+ id = atoi(params[arg + 1]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ /* addr flags */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ addr->rta_len = off - addr_start;
+ }
+
+ /* token */
+ token = strtoul(params[4], NULL, 10);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
+}
+
+int remove_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct nlmsghdr *nh;
+ struct rtattr *rta;
+ u_int32_t token;
+ u_int8_t id;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_REMOVE,
+ MPTCP_PM_VER);
+
+ if (argc < 6)
+ syntax(argv);
+
+ for (arg = 2; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_LOC_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ token = strtoul(argv[arg], NULL, 10);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
+int announce_addr(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ u_int32_t flags = MPTCP_PM_ADDR_FLAG_SIGNAL;
+ u_int32_t token = UINT_MAX;
+ struct rtattr *rta, *addr;
+ u_int32_t id = UINT_MAX;
+ struct nlmsghdr *nh;
+ u_int16_t family;
+ int addr_start;
+ int off = 0;
+ int arg;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ANNOUNCE,
+ MPTCP_PM_VER);
+
+ if (argc < 7)
+ syntax(argv);
+
+ /* local-ip header */
+ addr_start = off;
+ addr = (void *)(data + off);
+ addr->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ addr->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(addr->rta_len);
+
+ /* local-ip data */
+ /* record addr type */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* addr family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ for (arg = 3; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "id")) {
+ /* local-id */
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "dev")) {
+ /* for the if_index */
+ int32_t ifindex;
+
+ if (++arg >= argc)
+ error(1, 0, " missing dev name");
+
+ ifindex = if_nametoindex(argv[arg]);
+ if (!ifindex)
+ error(1, errno, "unknown device %s", argv[arg]);
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &ifindex, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "port")) {
+ /* local-port (optional) */
+ u_int16_t port;
+
+ if (++arg >= argc)
+ error(1, 0, " missing port value");
+
+ port = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "token")) {
+ /* MPTCP connection token */
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ token = strtoul(argv[arg], NULL, 10);
+ } else
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+
+ /* addr flags */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ addr->rta_len = off - addr_start;
+
+ if (id == UINT_MAX || token == UINT_MAX)
+ error(1, 0, " missing mandatory inputs");
+
+ /* token */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ do_nl_req(fd, nh, off, 0);
+
+ return 0;
}
int add_addr(int fd, int pm_family, int argc, char *argv[])
@@ -176,8 +765,8 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int32_t flags = 0;
u_int16_t family;
- u_int32_t flags;
int nest_start;
u_int8_t id;
int off = 0;
@@ -223,7 +812,6 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
char *tok, *str;
/* flags */
- flags = 0;
if (++arg >= argc)
error(1, 0, " missing flags value");
@@ -236,11 +824,18 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
flags |= MPTCP_PM_ADDR_FLAG_SIGNAL;
else if (!strcmp(tok, "backup"))
flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
else
error(1, errno,
"unknown flag %s", argv[arg]);
}
+ if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL &&
+ flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ error(1, errno, "error flag fullmesh");
+ }
+
rta = (void *)(data + off);
rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
rta->rta_len = RTA_LENGTH(4);
@@ -271,6 +866,20 @@ int add_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(4);
memcpy(RTA_DATA(rta), &ifindex, 4);
off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "port")) {
+ u_int16_t port;
+
+ if (++arg >= argc)
+ error(1, 0, " missing port value");
+ if (!(flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
+ error(1, 0, " flags must be signal when using port");
+
+ port = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
} else
error(1, 0, "unknown keyword %s", argv[arg]);
}
@@ -287,6 +896,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int16_t family;
int nest_start;
u_int8_t id;
int off = 0;
@@ -296,11 +906,14 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR,
MPTCP_PM_VER);
- /* the only argument is the address id */
- if (argc != 3)
+ /* the only argument is the address id (nonzero) */
+ if (argc != 3 && argc != 4)
syntax(argv);
id = atoi(argv[2]);
+ /* zero id with the IP address */
+ if (!id && argc != 4)
+ syntax(argv);
nest_start = off;
nest = (void *)(data + off);
@@ -314,6 +927,30 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
rta->rta_len = RTA_LENGTH(1);
memcpy(RTA_DATA(rta), &id, 1);
off += NLMSG_ALIGN(rta->rta_len);
+
+ if (!id) {
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[3], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[3], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", argv[3]);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
nest->rta_len = off - nest_start;
do_nl_req(fd, nh, off, 0);
@@ -323,6 +960,7 @@ int del_addr(int fd, int pm_family, int argc, char *argv[])
static void print_addr(struct rtattr *attrs, int len)
{
uint16_t family = 0;
+ uint16_t port = 0;
char str[1024];
uint32_t flags;
uint8_t id;
@@ -330,12 +968,16 @@ static void print_addr(struct rtattr *attrs, int len)
while (RTA_OK(attrs, len)) {
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY)
memcpy(&family, RTA_DATA(attrs), 2);
+ if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_PORT)
+ memcpy(&port, RTA_DATA(attrs), 2);
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) {
if (family != AF_INET)
error(1, errno, "wrong IP (v4) for family %d",
family);
inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str));
printf("%s", str);
+ if (port)
+ printf(" %d", port);
}
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) {
if (family != AF_INET6)
@@ -343,6 +985,8 @@ static void print_addr(struct rtattr *attrs, int len)
family);
inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str));
printf("%s", str);
+ if (port)
+ printf(" %d", port);
}
if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) {
memcpy(&id, RTA_DATA(attrs), 1);
@@ -373,6 +1017,20 @@ static void print_addr(struct rtattr *attrs, int len)
printf(",");
}
+ if (flags & MPTCP_PM_ADDR_FLAG_FULLMESH) {
+ printf("fullmesh");
+ flags &= ~MPTCP_PM_ADDR_FLAG_FULLMESH;
+ if (flags)
+ printf(",");
+ }
+
+ if (flags & MPTCP_PM_ADDR_FLAG_IMPLICIT) {
+ printf("implicit");
+ flags &= ~MPTCP_PM_ADDR_FLAG_IMPLICIT;
+ if (flags)
+ printf(",");
+ }
+
/* bump unknown flags, if any */
if (flags)
printf("0x%x", flags);
@@ -429,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
1024];
struct rtattr *rta, *nest;
struct nlmsghdr *nh;
+ u_int32_t token = 0;
int nest_start;
u_int8_t id;
int off = 0;
@@ -439,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
MPTCP_PM_VER);
/* the only argument is the address id */
- if (argc != 3)
+ if (argc != 3 && argc != 5)
syntax(argv);
id = atoi(argv[2]);
+ if (argc == 5 && !strcmp(argv[3], "token"))
+ token = strtoul(argv[4], NULL, 10);
nest_start = off;
nest = (void *)(data + off);
@@ -458,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[])
off += NLMSG_ALIGN(rta->rta_len);
nest->rta_len = off - nest_start;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
@@ -469,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
1024];
pid_t pid = getpid();
struct nlmsghdr *nh;
+ u_int32_t token = 0;
+ struct rtattr *rta;
int off = 0;
+ if (argc != 2 && argc != 4)
+ syntax(argv);
+
+ if (argc == 4 && !strcmp(argv[2], "token"))
+ token = strtoul(argv[3], NULL, 10);
+
memset(data, 0, sizeof(data));
nh = (void *)data;
off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR,
@@ -480,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[])
nh->nlmsg_pid = pid;
nh->nlmsg_len = off;
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data)));
return 0;
}
@@ -584,9 +1271,245 @@ int get_set_limits(int fd, int pm_family, int argc, char *argv[])
return 0;
}
+int add_listener(int argc, char *argv[])
+{
+ struct sockaddr_storage addr;
+ struct sockaddr_in6 *a6;
+ struct sockaddr_in *a4;
+ u_int16_t family;
+ int enable = 1;
+ int sock;
+ int err;
+
+ if (argc < 4)
+ syntax(argv);
+
+ memset(&addr, 0, sizeof(struct sockaddr_storage));
+ a4 = (struct sockaddr_in *)&addr;
+ a6 = (struct sockaddr_in6 *)&addr;
+
+ if (inet_pton(AF_INET, argv[2], &a4->sin_addr)) {
+ family = AF_INET;
+ a4->sin_family = family;
+ a4->sin_port = htons(atoi(argv[3]));
+ } else if (inet_pton(AF_INET6, argv[2], &a6->sin6_addr)) {
+ family = AF_INET6;
+ a6->sin6_family = family;
+ a6->sin6_port = htons(atoi(argv[3]));
+ } else
+ error(1, errno, "can't parse ip %s", argv[2]);
+
+ sock = socket(family, SOCK_STREAM, IPPROTO_MPTCP);
+ if (sock < 0)
+ error(1, errno, "can't create listener sock\n");
+
+ if (setsockopt(sock, SOL_SOCKET, SO_REUSEADDR, &enable, sizeof(enable))) {
+ close(sock);
+ error(1, errno, "can't set SO_REUSEADDR on listener sock\n");
+ }
+
+ err = bind(sock, (struct sockaddr *)&addr,
+ ((family == AF_INET) ? sizeof(struct sockaddr_in) :
+ sizeof(struct sockaddr_in6)));
+
+ if (err == 0 && listen(sock, 30) == 0)
+ pause();
+
+ close(sock);
+ return 0;
+}
+
+int set_flags(int fd, int pm_family, int argc, char *argv[])
+{
+ char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) +
+ NLMSG_ALIGN(sizeof(struct genlmsghdr)) +
+ 1024];
+ struct rtattr *rta, *nest;
+ struct nlmsghdr *nh;
+ u_int32_t flags = 0;
+ u_int32_t token = 0;
+ u_int16_t rport = 0;
+ u_int16_t family;
+ void *rip = NULL;
+ int nest_start;
+ int use_id = 0;
+ u_int8_t id;
+ int off = 0;
+ int arg = 2;
+
+ memset(data, 0, sizeof(data));
+ nh = (void *)data;
+ off = init_genl_req(data, pm_family, MPTCP_PM_CMD_SET_FLAGS,
+ MPTCP_PM_VER);
+
+ if (argc < 3)
+ syntax(argv);
+
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ if (!strcmp(argv[arg], "id")) {
+ if (++arg >= argc)
+ error(1, 0, " missing id value");
+
+ use_id = 1;
+ id = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ID;
+ rta->rta_len = RTA_LENGTH(1);
+ memcpy(RTA_DATA(rta), &id, 1);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else {
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, argv[arg], RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, argv[arg], RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", argv[arg]);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ if (++arg >= argc)
+ error(1, 0, " missing flags keyword");
+
+ for (; arg < argc; arg++) {
+ if (!strcmp(argv[arg], "token")) {
+ if (++arg >= argc)
+ error(1, 0, " missing token value");
+
+ /* token */
+ token = strtoul(argv[arg], NULL, 10);
+ } else if (!strcmp(argv[arg], "flags")) {
+ char *tok, *str;
+
+ /* flags */
+ if (++arg >= argc)
+ error(1, 0, " missing flags value");
+
+ for (str = argv[arg]; (tok = strtok(str, ","));
+ str = NULL) {
+ if (!strcmp(tok, "backup"))
+ flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else if (!strcmp(tok, "fullmesh"))
+ flags |= MPTCP_PM_ADDR_FLAG_FULLMESH;
+ else if (strcmp(tok, "nobackup") &&
+ strcmp(tok, "nofullmesh"))
+ error(1, errno,
+ "unknown flag %s", argv[arg]);
+ }
+
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &flags, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "port")) {
+ u_int16_t port;
+
+ if (use_id)
+ error(1, 0, " port can't be used with id");
+
+ if (++arg >= argc)
+ error(1, 0, " missing port value");
+
+ port = atoi(argv[arg]);
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &port, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ } else if (!strcmp(argv[arg], "rport")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote port");
+
+ rport = atoi(argv[arg]);
+ } else if (!strcmp(argv[arg], "rip")) {
+ if (++arg >= argc)
+ error(1, 0, " missing remote ip");
+
+ rip = argv[arg];
+ } else {
+ error(1, 0, "unknown keyword %s", argv[arg]);
+ }
+ }
+ nest->rta_len = off - nest_start;
+
+ /* token */
+ if (token) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ATTR_TOKEN;
+ rta->rta_len = RTA_LENGTH(4);
+ memcpy(RTA_DATA(rta), &token, 4);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ /* remote addr/port */
+ if (rip) {
+ nest_start = off;
+ nest = (void *)(data + off);
+ nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR_REMOTE;
+ nest->rta_len = RTA_LENGTH(0);
+ off += NLMSG_ALIGN(nest->rta_len);
+
+ /* addr data */
+ rta = (void *)(data + off);
+ if (inet_pton(AF_INET, rip, RTA_DATA(rta))) {
+ family = AF_INET;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4;
+ rta->rta_len = RTA_LENGTH(4);
+ } else if (inet_pton(AF_INET6, rip, RTA_DATA(rta))) {
+ family = AF_INET6;
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6;
+ rta->rta_len = RTA_LENGTH(16);
+ } else {
+ error(1, errno, "can't parse ip %s", (char *)rip);
+ }
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ /* family */
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &family, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+
+ if (rport) {
+ rta = (void *)(data + off);
+ rta->rta_type = MPTCP_PM_ADDR_ATTR_PORT;
+ rta->rta_len = RTA_LENGTH(2);
+ memcpy(RTA_DATA(rta), &rport, 2);
+ off += NLMSG_ALIGN(rta->rta_len);
+ }
+
+ nest->rta_len = off - nest_start;
+ }
+
+ do_nl_req(fd, nh, off, 0);
+ return 0;
+}
+
int main(int argc, char *argv[])
{
- int fd, pm_family;
+ int events_mcast_grp;
+ int pm_family;
+ int fd;
if (argc < 2)
syntax(argv);
@@ -595,10 +1518,18 @@ int main(int argc, char *argv[])
if (fd == -1)
error(1, errno, "socket netlink");
- pm_family = resolve_mptcp_pm_netlink(fd);
+ resolve_mptcp_pm_netlink(fd, &pm_family, &events_mcast_grp);
if (!strcmp(argv[1], "add"))
return add_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "ann"))
+ return announce_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "rem"))
+ return remove_addr(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "csf"))
+ return csf(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "dsf"))
+ return dsf(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "del"))
return del_addr(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "flush"))
@@ -609,6 +1540,12 @@ int main(int argc, char *argv[])
return dump_addrs(fd, pm_family, argc, argv);
else if (!strcmp(argv[1], "limits"))
return get_set_limits(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "set"))
+ return set_flags(fd, pm_family, argc, argv);
+ else if (!strcmp(argv[1], "events"))
+ return capture_events(fd, events_mcast_grp);
+ else if (!strcmp(argv[1], "listen"))
+ return add_listener(argc, argv);
fprintf(stderr, "unknown sub-command: %s", argv[1]);
syntax(argv);
diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings
index 026384c189c9..abc5648b59ab 100644
--- a/tools/testing/selftests/net/mptcp/settings
+++ b/tools/testing/selftests/net/mptcp/settings
@@ -1 +1 @@
-timeout=450
+timeout=1800
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
new file mode 100755
index 000000000000..1b2366220388
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -0,0 +1,294 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it, especially because there were too many before having
+# address all other issues detected by shellcheck.
+#shellcheck disable=SC2086
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+ns1=""
+ns2=""
+ns3=""
+capture=false
+timeout_poll=30
+timeout_test=$((timeout_poll * 2 + 1))
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-60s"
+ret=0
+bail=0
+slack=50
+large=""
+small=""
+sout=""
+cout=""
+capout=""
+size=0
+
+usage() {
+ echo "Usage: $0 [ -b ] [ -c ] [ -d ]"
+ echo -e "\t-b: bail out after first error, otherwise runs al testcases"
+ echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)"
+ echo -e "\t-d: debug this script"
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+ rm -f "$cout" "$sout"
+ rm -f "$large" "$small"
+ rm -f "$capout"
+
+ mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}"
+}
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_tools ip
+
+# "$ns1" ns2 ns3
+# ns1eth1 ns2eth1 ns2eth3 ns3eth1
+# netem
+# ns1eth2 ns2eth2
+# netem
+
+setup()
+{
+ large=$(mktemp)
+ small=$(mktemp)
+ sout=$(mktemp)
+ cout=$(mktemp)
+ capout=$(mktemp)
+ size=$((2 * 2048 * 4096))
+
+ dd if=/dev/zero of=$small bs=4096 count=20 >/dev/null 2>&1
+ dd if=/dev/zero of=$large bs=4096 count=$((size / 4096)) >/dev/null 2>&1
+
+ trap cleanup EXIT
+
+ mptcp_lib_ns_init ns1 ns2 ns3
+
+ ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+ ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2"
+ ip link add ns2eth3 netns "$ns2" type veth peer name ns3eth1 netns "$ns3"
+
+ ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth1
+ ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth1 nodad
+ ip -net "$ns1" link set ns1eth1 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.1.2
+ ip -net "$ns1" route add default via dead:beef:1::2
+
+ ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ ip -net "$ns1" link set ns1eth2 up mtu 1500
+ ip -net "$ns1" route add default via 10.0.2.2 metric 101
+ ip -net "$ns1" route add default via dead:beef:2::2 metric 101
+
+ ip netns exec "$ns1" ./pm_nl_ctl limits 1 1
+ ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow
+
+ ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ ip -net "$ns2" link set ns2eth1 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth2
+ ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth2 nodad
+ ip -net "$ns2" link set ns2eth2 up mtu 1500
+
+ ip -net "$ns2" addr add 10.0.3.2/24 dev ns2eth3
+ ip -net "$ns2" addr add dead:beef:3::2/64 dev ns2eth3 nodad
+ ip -net "$ns2" link set ns2eth3 up mtu 1500
+ ip netns exec "$ns2" sysctl -q net.ipv4.ip_forward=1
+ ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.forwarding=1
+
+ ip -net "$ns3" addr add 10.0.3.3/24 dev ns3eth1
+ ip -net "$ns3" addr add dead:beef:3::3/64 dev ns3eth1 nodad
+ ip -net "$ns3" link set ns3eth1 up mtu 1500
+ ip -net "$ns3" route add default via 10.0.3.2
+ ip -net "$ns3" route add default via dead:beef:3::2
+
+ ip netns exec "$ns3" ./pm_nl_ctl limits 1 1
+
+ # debug build can slow down measurably the test program
+ # we use quite tight time limit on the run-time, to ensure
+ # maximum B/W usage.
+ # Use kmemleak/lockdep/kasan/prove_locking presence as a rough
+ # estimate for this being a debug kernel and increase the
+ # maximum run-time accordingly. Observed run times for CI builds
+ # running selftests, including kbuild, were used to determine the
+ # amount of time to add.
+ grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550))
+}
+
+do_transfer()
+{
+ local cin=$1
+ local sin=$2
+ local max_time=$3
+ local port
+ port=$((10000+MPTCP_LIB_TEST_COUNTER))
+
+ :> "$cout"
+ :> "$sout"
+ :> "$capout"
+
+ if $capture; then
+ local capuser
+ local rndh="${ns1:4}"
+ if [ -z $SUDO_USER ] ; then
+ capuser=""
+ else
+ capuser="-Z $SUDO_USER"
+ fi
+
+ local capfile="${rndh}-${port}"
+ local capopt="-i any -s 65535 -B 32768 ${capuser}"
+
+ ip netns exec ${ns3} tcpdump ${capopt} -w "${capfile}-listener.pcap" >> "${capout}" 2>&1 &
+ local cappid_listener=$!
+
+ ip netns exec ${ns1} tcpdump ${capopt} -w "${capfile}-connector.pcap" >> "${capout}" 2>&1 &
+ local cappid_connector=$!
+
+ sleep 1
+ fi
+
+ timeout ${timeout_test} \
+ ip netns exec ${ns3} \
+ ./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
+ 0.0.0.0 < "$sin" > "$sout" &
+ local spid=$!
+
+ mptcp_lib_wait_local_port_listen "${ns3}" "${port}"
+
+ timeout ${timeout_test} \
+ ip netns exec ${ns1} \
+ ./mptcp_connect -jt ${timeout_poll} -p $port -T $max_time \
+ 10.0.3.3 < "$cin" > "$cout" &
+ local cpid=$!
+
+ wait $cpid
+ local retc=$?
+ wait $spid
+ local rets=$?
+
+ if $capture; then
+ sleep 1
+ kill ${cappid_listener}
+ kill ${cappid_connector}
+ fi
+
+ cmp $sin $cout > /dev/null 2>&1
+ local cmps=$?
+ cmp $cin $sout > /dev/null 2>&1
+ local cmpc=$?
+
+ printf "%-16s" " max $max_time "
+ if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
+ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ mptcp_lib_pr_ok
+ cat "$capout"
+ return 0
+ fi
+
+ mptcp_lib_pr_fail
+ echo "client exit code $retc, server $rets" 1>&2
+ echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
+ ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
+ echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
+ ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ ls -l $sin $cout
+ ls -l $cin $sout
+
+ cat "$capout"
+ return 1
+}
+
+run_test()
+{
+ local rate1=$1
+ local rate2=$2
+ local delay1=$3
+ local delay2=$4
+ local lret
+ local dev
+ shift 4
+ local msg=$*
+
+ [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1=""
+ [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2=""
+
+ for dev in ns1eth1 ns1eth2; do
+ tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ for dev in ns2eth1 ns2eth2; do
+ tc -n $ns2 qdisc del dev $dev root >/dev/null 2>&1
+ done
+ tc -n $ns1 qdisc add dev ns1eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns1 qdisc add dev ns1eth2 root netem rate ${rate2}mbit $delay2
+ tc -n $ns2 qdisc add dev ns2eth1 root netem rate ${rate1}mbit $delay1
+ tc -n $ns2 qdisc add dev ns2eth2 root netem rate ${rate2}mbit $delay2
+
+ # time is measured in ms, account for transfer size, aggregated link speed
+ # and header overhead (10%)
+ # ms byte -> bit 10% mbit -> kbit -> bit 10%
+ local time=$((1000 * size * 8 * 10 / ((rate1 + rate2) * 1000 * 1000 * 9) ))
+
+ # mptcp_connect will do some sleeps to allow the mp_join handshake
+ # completion (see mptcp_connect): 200ms on each side, add some slack
+ time=$((time + 400 + slack))
+
+ mptcp_lib_print_title "$msg"
+ do_transfer $small $large $time
+ lret=$?
+ mptcp_lib_result_code "${lret}" "${msg}"
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+
+ msg+=" - reverse direction"
+ mptcp_lib_print_title "${msg}"
+ do_transfer $large $small $time
+ lret=$?
+ mptcp_lib_result_code "${lret}" "${msg}"
+ if [ $lret -ne 0 ]; then
+ ret=$lret
+ [ $bail -eq 0 ] || exit $ret
+ fi
+}
+
+while getopts "bcdh" option;do
+ case "$option" in
+ "h")
+ usage $0
+ exit ${KSFT_PASS}
+ ;;
+ "b")
+ bail=1
+ ;;
+ "c")
+ capture=true
+ ;;
+ "d")
+ set -x
+ ;;
+ "?")
+ usage $0
+ exit ${KSFT_FAIL}
+ ;;
+ esac
+done
+
+setup
+run_test 10 10 0 0 "balanced bwidth"
+run_test 10 10 1 25 "balanced bwidth with unbalanced delay"
+
+# we still need some additional infrastructure to pass the following test-cases
+run_test 10 3 0 0 "unbalanced bwidth"
+run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay"
+run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay"
+
+mptcp_lib_result_print_all_tap
+exit $ret
diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh
new file mode 100755
index 000000000000..9e2981f2d7f5
--- /dev/null
+++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh
@@ -0,0 +1,897 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Double quotes to prevent globbing and word splitting is recommended in new
+# code but we accept it.
+#shellcheck disable=SC2086
+
+# Some variables are used below but indirectly, see verify_*_event()
+#shellcheck disable=SC2034
+
+. "$(dirname "${0}")/mptcp_lib.sh"
+
+mptcp_lib_check_mptcp
+mptcp_lib_check_kallsyms
+
+if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then
+ echo "userspace pm tests are not supported by the kernel: SKIP"
+ exit ${KSFT_SKIP}
+fi
+mptcp_lib_check_tools ip
+
+ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED}
+REMOVED=${MPTCP_LIB_EVENT_REMOVED}
+SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED}
+SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED}
+LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED}
+LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED}
+
+AF_INET=${MPTCP_LIB_AF_INET}
+AF_INET6=${MPTCP_LIB_AF_INET6}
+
+file=""
+server_evts=""
+client_evts=""
+server_evts_pid=0
+client_evts_pid=0
+client4_pid=0
+server4_pid=0
+client6_pid=0
+server6_pid=0
+client4_token=""
+server4_token=""
+client6_token=""
+server6_token=""
+client4_port=0;
+client6_port=0;
+app4_port=50002
+new4_port=50003
+app6_port=50004
+client_addr_id=${RANDOM:0:2}
+server_addr_id=${RANDOM:0:2}
+
+ns1=""
+ns2=""
+ret=0
+test_name=""
+# a bit more space: because we have more to display
+MPTCP_LIB_TEST_FORMAT="%02u %-68s"
+
+print_title()
+{
+ mptcp_lib_pr_info "${1}"
+}
+
+# $1: test name
+print_test()
+{
+ test_name="${1}"
+
+ mptcp_lib_print_title "${test_name}"
+}
+
+test_pass()
+{
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${test_name}"
+}
+
+test_skip()
+{
+ mptcp_lib_pr_skip
+ mptcp_lib_result_skip "${test_name}"
+}
+
+# $1: msg
+test_fail()
+{
+ if [ ${#} -gt 0 ]
+ then
+ mptcp_lib_pr_fail "${@}"
+ fi
+ ret=${KSFT_FAIL}
+ mptcp_lib_result_fail "${test_name}"
+}
+
+# This function is used in the cleanup trap
+#shellcheck disable=SC2317
+cleanup()
+{
+ print_title "Cleanup"
+
+ # Terminate the MPTCP connection and related processes
+ local pid
+ for pid in $client4_pid $server4_pid $client6_pid $server6_pid\
+ $server_evts_pid $client_evts_pid
+ do
+ mptcp_lib_kill_wait $pid
+ done
+
+ mptcp_lib_ns_exit "${ns1}" "${ns2}"
+
+ rm -rf $file $client_evts $server_evts
+
+ mptcp_lib_pr_info "Done"
+}
+
+trap cleanup EXIT
+
+# Create and configure network namespaces for testing
+mptcp_lib_ns_init ns1 ns2
+for i in "$ns1" "$ns2" ;do
+ ip netns exec "$i" sysctl -q net.mptcp.pm_type=1
+done
+
+# "$ns1" ns2
+# ns1eth2 ns2eth1
+
+ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth1 netns "$ns2"
+
+# Add IPv4/v6 addresses to the namespaces
+ip -net "$ns1" addr add 10.0.1.1/24 dev ns1eth2
+ip -net "$ns1" addr add 10.0.2.1/24 dev ns1eth2
+ip -net "$ns1" addr add dead:beef:1::1/64 dev ns1eth2 nodad
+ip -net "$ns1" addr add dead:beef:2::1/64 dev ns1eth2 nodad
+ip -net "$ns1" link set ns1eth2 up
+
+ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1
+ip -net "$ns2" addr add 10.0.2.2/24 dev ns2eth1
+ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad
+ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad
+ip -net "$ns2" link set ns2eth1 up
+
+file=$(mktemp)
+mptcp_lib_make_file "$file" 2 1
+
+# Capture netlink events over the two network namespaces running
+# the MPTCP client and server
+client_evts=$(mktemp)
+mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid
+server_evts=$(mktemp)
+mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid
+sleep 0.5
+
+print_title "Init"
+print_test "Created network namespaces ns1, ns2"
+test_pass
+
+make_connection()
+{
+ local is_v6=$1
+ local app_port=$app4_port
+ local connect_addr="10.0.1.1"
+ local listen_addr="0.0.0.0"
+ if [ "$is_v6" = "v6" ]
+ then
+ connect_addr="dead:beef:1::1"
+ listen_addr="::"
+ app_port=$app6_port
+ else
+ is_v6="v4"
+ fi
+
+ :>"$client_evts"
+ :>"$server_evts"
+
+ # Run the server
+ ip netns exec "$ns1" \
+ ./mptcp_connect -s MPTCP -w 300 -p $app_port -l $listen_addr > /dev/null 2>&1 &
+ local server_pid=$!
+ sleep 0.5
+
+ # Run the client, transfer $file and stay connected to the server
+ # to conduct tests
+ ip netns exec "$ns2" \
+ ./mptcp_connect -s MPTCP -w 300 -m sendfile -p $app_port $connect_addr\
+ 2>&1 > /dev/null < "$file" &
+ local client_pid=$!
+ sleep 1
+
+ # Capture client/server attributes from MPTCP connection netlink events
+
+ local client_token
+ local client_port
+ local client_serverside
+ local server_token
+ local server_serverside
+
+ client_token=$(mptcp_lib_evts_get_info token "$client_evts")
+ client_port=$(mptcp_lib_evts_get_info sport "$client_evts")
+ client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts")
+ server_token=$(mptcp_lib_evts_get_info token "$server_evts")
+ server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts")
+
+ print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1"
+ if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] &&
+ [ "$server_serverside" = 1 ]
+ then
+ test_pass
+ else
+ test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})"
+ mptcp_lib_result_print_all_tap
+ exit ${KSFT_FAIL}
+ fi
+
+ if [ "$is_v6" = "v6" ]
+ then
+ client6_token=$client_token
+ server6_token=$server_token
+ client6_port=$client_port
+ client6_pid=$client_pid
+ server6_pid=$server_pid
+ else
+ client4_token=$client_token
+ server4_token=$server_token
+ client4_port=$client_port
+ client4_pid=$client_pid
+ server4_pid=$server_pid
+ fi
+}
+
+# $@: all var names to check
+check_expected()
+{
+ if mptcp_lib_check_expected "${@}"
+ then
+ test_pass
+ return 0
+ fi
+
+ test_fail
+ return 1
+}
+
+verify_announce_event()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_addr=$4
+ local e_id=$5
+ local e_dport=$6
+ local e_af=$7
+ local type
+ local token
+ local addr
+ local dport
+ local id
+
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+ if [ "$e_af" = "v6" ]
+ then
+ addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
+ else
+ addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
+ fi
+ dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+ id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+
+ check_expected "type" "token" "addr" "dport" "id"
+}
+
+test_announce()
+{
+ print_title "Announce tests"
+
+ # Capture events on the network namespace running the server
+ :>"$server_evts"
+
+ # ADD_ADDR using an invalid token should result in no action
+ local invalid_token=$(( client4_token - 1))
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token $invalid_token id\
+ $client_addr_id dev ns2eth1 > /dev/null 2>&1
+
+ local type
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
+ print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token"
+ if [ "$type" = "" ]
+ then
+ test_pass
+ else
+ test_fail "type defined: ${type}"
+ fi
+
+ # ADD_ADDR from the client to server machine reusing the subflow port
+ :>"$server_evts"
+ ip netns exec "$ns2"\
+ ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id $client_addr_id dev\
+ ns2eth1
+ print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, reuse port"
+ sleep 0.5
+ verify_announce_event $server_evts $ANNOUNCED $server4_token "10.0.2.2" $client_addr_id \
+ "$client4_port"
+
+ # ADD_ADDR6 from the client to server machine reusing the subflow port
+ :>"$server_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann\
+ dead:beef:2::2 token "$client6_token" id $client_addr_id dev ns2eth1
+ print_test "ADD_ADDR6 id:${client_addr_id} dead:beef:2::2 (ns2) => ns1, reuse port"
+ sleep 0.5
+ verify_announce_event "$server_evts" "$ANNOUNCED" "$server6_token" "dead:beef:2::2"\
+ "$client_addr_id" "$client6_port" "v6"
+
+ # ADD_ADDR from the client to server machine using a new port
+ :>"$server_evts"
+ client_addr_id=$((client_addr_id+1))
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id dev ns2eth1 port $new4_port
+ print_test "ADD_ADDR id:${client_addr_id} 10.0.2.2 (ns2) => ns1, new port"
+ sleep 0.5
+ verify_announce_event "$server_evts" "$ANNOUNCED" "$server4_token" "10.0.2.2"\
+ "$client_addr_id" "$new4_port"
+
+ # Capture events on the network namespace running the client
+ :>"$client_evts"
+
+ # ADD_ADDR from the server to client machine reusing the subflow port
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id dev ns1eth2
+ print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ sleep 0.5
+ verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+ "$server_addr_id" "$app4_port"
+
+ # ADD_ADDR6 from the server to client machine reusing the subflow port
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+ $server_addr_id dev ns1eth2
+ print_test "ADD_ADDR6 id:${server_addr_id} dead:beef:2::1 (ns1) => ns2, reuse port"
+ sleep 0.5
+ verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "dead:beef:2::1"\
+ "$server_addr_id" "$app6_port" "v6"
+
+ # ADD_ADDR from the server to client machine using a new port
+ :>"$client_evts"
+ server_addr_id=$((server_addr_id+1))
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id dev ns1eth2 port $new4_port
+ print_test "ADD_ADDR id:${server_addr_id} 10.0.2.1 (ns1) => ns2, new port"
+ sleep 0.5
+ verify_announce_event "$client_evts" "$ANNOUNCED" "$client4_token" "10.0.2.1"\
+ "$server_addr_id" "$new4_port"
+}
+
+verify_remove_event()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_id=$4
+ local type
+ local token
+ local id
+
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+ id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+
+ check_expected "type" "token" "id"
+}
+
+test_remove()
+{
+ print_title "Remove tests"
+
+ # Capture events on the network namespace running the server
+ :>"$server_evts"
+
+ # RM_ADDR using an invalid token should result in no action
+ local invalid_token=$(( client4_token - 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token $invalid_token id\
+ $client_addr_id > /dev/null 2>&1
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token"
+ local type
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
+ if [ "$type" = "" ]
+ then
+ test_pass
+ else
+ test_fail "unexpected type: ${type}"
+ fi
+
+ # RM_ADDR using an invalid addr id should result in no action
+ local invalid_id=$(( client_addr_id + 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $invalid_id > /dev/null 2>&1
+ print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id"
+ type=$(mptcp_lib_evts_get_info type "$server_evts")
+ if [ "$type" = "" ]
+ then
+ test_pass
+ else
+ test_fail "unexpected type: ${type}"
+ fi
+
+ # RM_ADDR from the client to server machine
+ :>"$server_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $client_addr_id
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ sleep 0.5
+ verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+ # RM_ADDR from the client to server machine
+ :>"$server_evts"
+ client_addr_id=$(( client_addr_id - 1 ))
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\
+ $client_addr_id
+ print_test "RM_ADDR id:${client_addr_id} ns2 => ns1"
+ sleep 0.5
+ verify_remove_event "$server_evts" "$REMOVED" "$server4_token" "$client_addr_id"
+
+ # RM_ADDR6 from the client to server machine
+ :>"$server_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl rem token "$client6_token" id\
+ $client_addr_id
+ print_test "RM_ADDR6 id:${client_addr_id} ns2 => ns1"
+ sleep 0.5
+ verify_remove_event "$server_evts" "$REMOVED" "$server6_token" "$client_addr_id"
+
+ # Capture events on the network namespace running the client
+ :>"$client_evts"
+
+ # RM_ADDR from the server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+ $server_addr_id
+ print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ sleep 0.5
+ verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+ # RM_ADDR from the server to client machine
+ :>"$client_evts"
+ server_addr_id=$(( server_addr_id - 1 ))
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server4_token" id\
+ $server_addr_id
+ print_test "RM_ADDR id:${server_addr_id} ns1 => ns2"
+ sleep 0.5
+ verify_remove_event "$client_evts" "$REMOVED" "$client4_token" "$server_addr_id"
+
+ # RM_ADDR6 from the server to client machine
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl rem token "$server6_token" id\
+ $server_addr_id
+ print_test "RM_ADDR6 id:${server_addr_id} ns1 => ns2"
+ sleep 0.5
+ verify_remove_event "$client_evts" "$REMOVED" "$client6_token" "$server_addr_id"
+}
+
+verify_subflow_events()
+{
+ local evt=$1
+ local e_type=$2
+ local e_token=$3
+ local e_family=$4
+ local e_saddr=$5
+ local e_daddr=$6
+ local e_dport=$7
+ local e_locid=$8
+ local e_remid=$9
+ shift 2
+ local e_from=$8
+ local e_to=$9
+ local type
+ local token
+ local family
+ local saddr
+ local daddr
+ local dport
+ local locid
+ local remid
+ local info
+
+ info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})"
+
+ if [ "$e_type" = "$SUB_ESTABLISHED" ]
+ then
+ if [ "$e_family" = "$AF_INET6" ]
+ then
+ print_test "CREATE_SUBFLOW6 ${info}"
+ else
+ print_test "CREATE_SUBFLOW ${info}"
+ fi
+ else
+ if [ "$e_family" = "$AF_INET6" ]
+ then
+ print_test "DESTROY_SUBFLOW6 ${info}"
+ else
+ print_test "DESTROY_SUBFLOW ${info}"
+ fi
+ fi
+
+ type=$(mptcp_lib_evts_get_info type "$evt" $e_type)
+ token=$(mptcp_lib_evts_get_info token "$evt" $e_type)
+ family=$(mptcp_lib_evts_get_info family "$evt" $e_type)
+ dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type)
+ locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type)
+ remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type)
+ if [ "$family" = "$AF_INET6" ]
+ then
+ saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type)
+ daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type)
+ else
+ saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type)
+ daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type)
+ fi
+
+ check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid"
+}
+
+test_subflows()
+{
+ print_title "Subflows v4 or v6 only tests"
+
+ # Capture events on the network namespace running the server
+ :>"$server_evts"
+
+ # Attempt to add a listener at 10.0.2.2:<subflow-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+ "$client4_port" &
+ local listener_pid=$!
+
+ # ADD_ADDR from client to server machine reusing the subflow port
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id
+ sleep 0.5
+
+ # CREATE_SUBFLOW from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+ rport "$client4_port" token "$server4_token"
+ sleep 0.5
+ verify_subflow_events $server_evts $SUB_ESTABLISHED $server4_token $AF_INET "10.0.2.1" \
+ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ local sport
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+ "$client4_port" token "$server4_token"
+ sleep 0.5
+ verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+ "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client4_token"
+ sleep 0.5
+
+ # Attempt to add a listener at dead:beef:2::2:<subflow-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen dead:beef:2::2\
+ "$client6_port" &
+ listener_pid=$!
+
+ # ADD_ADDR6 from client to server machine reusing the subflow port
+ :>"$server_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann dead:beef:2::2 token "$client6_token" id\
+ $client_addr_id
+ sleep 0.5
+
+ # CREATE_SUBFLOW6 from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip dead:beef:2::1 lid 23 rip\
+ dead:beef:2::2 rport "$client6_port" token "$server6_token"
+ sleep 0.5
+ verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server6_token" "$AF_INET6"\
+ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW6 from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip dead:beef:2::1 lport "$sport" rip\
+ dead:beef:2::2 rport "$client6_port" token "$server6_token"
+ sleep 0.5
+ verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server6_token" "$AF_INET6"\
+ "dead:beef:2::1" "dead:beef:2::2" "$client6_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client6_token"
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.2:<new-port>
+ ip netns exec "$ns2" ./pm_nl_ctl listen 10.0.2.2\
+ $new4_port &
+ listener_pid=$!
+
+ # ADD_ADDR from client to server machine using a new port
+ :>"$server_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl ann 10.0.2.2 token "$client4_token" id\
+ $client_addr_id port $new4_port
+ sleep 0.5
+
+ # CREATE_SUBFLOW from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2 rport\
+ $new4_port token "$server4_token"
+ sleep 0.5
+ verify_subflow_events "$server_evts" "$SUB_ESTABLISHED" "$server4_token" "$AF_INET"\
+ "10.0.2.1" "10.0.2.2" "$new4_port" "23"\
+ "$client_addr_id" "ns1" "ns2"
+
+ # Delete the listener from the client ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW from server to client machine
+ :>"$server_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl dsf lip 10.0.2.1 lport "$sport" rip 10.0.2.2 rport\
+ $new4_port token "$server4_token"
+ sleep 0.5
+ verify_subflow_events "$server_evts" "$SUB_CLOSED" "$server4_token" "$AF_INET" "10.0.2.1"\
+ "10.0.2.2" "$new4_port" "23" "$client_addr_id" "ns1" "ns2"
+
+ # RM_ADDR from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl rem id $client_addr_id token\
+ "$client4_token"
+
+ # Capture events on the network namespace running the client
+ :>"$client_evts"
+
+ # Attempt to add a listener at 10.0.2.1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $app4_port &
+ listener_pid=$!
+
+ # ADD_ADDR from server to client machine reusing the subflow port
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id
+ sleep 0.5
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $app4_port token "$client4_token"
+ sleep 0.5
+ verify_subflow_events $client_evts $SUB_ESTABLISHED $client4_token $AF_INET "10.0.2.2"\
+ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $app4_port token "$client4_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+ "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server4_token"
+ sleep 0.5
+
+ # Attempt to add a listener at dead:beef:2::1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen dead:beef:2::1\
+ $app6_port &
+ listener_pid=$!
+
+ # ADD_ADDR6 from server to client machine reusing the subflow port
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann dead:beef:2::1 token "$server6_token" id\
+ $server_addr_id
+ sleep 0.5
+
+ # CREATE_SUBFLOW6 from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip dead:beef:2::2 lid 23 rip\
+ dead:beef:2::1 rport $app6_port token "$client6_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
+ "$AF_INET6" "dead:beef:2::2"\
+ "dead:beef:2::1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW6 from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip dead:beef:2::2 lport "$sport" rip\
+ dead:beef:2::1 rport $app6_port token "$client6_token"
+ sleep 0.5
+ verify_subflow_events $client_evts $SUB_CLOSED $client6_token $AF_INET6 "dead:beef:2::2"\
+ "dead:beef:2::1" "$app6_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR6 from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server6_token"
+ sleep 0.5
+
+ # Attempt to add a listener at 10.0.2.1:<new-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $new4_port &
+ listener_pid=$!
+
+ # ADD_ADDR from server to client machine using a new port
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server4_token" id\
+ $server_addr_id port $new4_port
+ sleep 0.5
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $new4_port token "$client4_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client4_token" "$AF_INET"\
+ "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $new4_port token "$client4_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client4_token" "$AF_INET" "10.0.2.2"\
+ "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server4_token"
+}
+
+test_subflows_v4_v6_mix()
+{
+ print_title "Subflows v4 and v6 mix tests"
+
+ # Attempt to add a listener at 10.0.2.1:<subflow-port>
+ ip netns exec "$ns1" ./pm_nl_ctl listen 10.0.2.1\
+ $app6_port &
+ local listener_pid=$!
+
+ # ADD_ADDR4 from server to client machine reusing the subflow port on
+ # the established v6 connection
+ :>"$client_evts"
+ ip netns exec "$ns1" ./pm_nl_ctl ann 10.0.2.1 token "$server6_token" id\
+ $server_addr_id dev ns1eth2
+ print_test "ADD_ADDR4 id:${server_addr_id} 10.0.2.1 (ns1) => ns2, reuse port"
+ sleep 0.5
+ verify_announce_event "$client_evts" "$ANNOUNCED" "$client6_token" "10.0.2.1"\
+ "$server_addr_id" "$app6_port"
+
+ # CREATE_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl csf lip 10.0.2.2 lid 23 rip 10.0.2.1 rport\
+ $app6_port token "$client6_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_ESTABLISHED" "$client6_token"\
+ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # Delete the listener from the server ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED)
+
+ # DESTROY_SUBFLOW from client to server machine
+ :>"$client_evts"
+ ip netns exec "$ns2" ./pm_nl_ctl dsf lip 10.0.2.2 lport "$sport" rip 10.0.2.1 rport\
+ $app6_port token "$client6_token"
+ sleep 0.5
+ verify_subflow_events "$client_evts" "$SUB_CLOSED" "$client6_token" \
+ "$AF_INET" "10.0.2.2" "10.0.2.1" "$app6_port" "23"\
+ "$server_addr_id" "ns2" "ns1"
+
+ # RM_ADDR from server to client machine
+ ip netns exec "$ns1" ./pm_nl_ctl rem id $server_addr_id token\
+ "$server6_token"
+ sleep 0.5
+}
+
+test_prio()
+{
+ print_title "Prio tests"
+
+ local count
+
+ # Send MP_PRIO signal from client to server machine
+ ip netns exec "$ns2" ./pm_nl_ctl set 10.0.1.2 port "$client4_port" flags backup token "$client4_token" rip 10.0.1.1 rport "$app4_port"
+ sleep 0.5
+
+ # Check TX
+ print_test "MP_PRIO TX"
+ count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx")
+ if [ -z "$count" ]; then
+ test_skip
+ elif [ $count != 1 ]; then
+ test_fail "Count != 1: ${count}"
+ else
+ test_pass
+ fi
+
+ # Check RX
+ print_test "MP_PRIO RX"
+ count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx")
+ if [ -z "$count" ]; then
+ test_skip
+ elif [ $count != 1 ]; then
+ test_fail "Count != 1: ${count}"
+ else
+ test_pass
+ fi
+}
+
+verify_listener_events()
+{
+ if mptcp_lib_verify_listener_events "${@}"; then
+ test_pass
+ else
+ test_fail
+ fi
+}
+
+test_listener()
+{
+ print_title "Listener tests"
+
+ if ! mptcp_lib_kallsyms_has "mptcp_event_pm_listener$"; then
+ print_test "LISTENER events"
+ test_skip
+ return
+ fi
+
+ # Capture events on the network namespace running the client
+ :>$client_evts
+
+ # Attempt to add a listener at 10.0.2.2:<subflow-port>
+ ip netns exec $ns2 ./pm_nl_ctl listen 10.0.2.2\
+ $client4_port &
+ local listener_pid=$!
+
+ sleep 0.5
+ print_test "CREATE_LISTENER 10.0.2.2:$client4_port"
+ verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port
+
+ # ADD_ADDR from client to server machine reusing the subflow port
+ ip netns exec $ns2 ./pm_nl_ctl ann 10.0.2.2 token $client4_token id\
+ $client_addr_id
+ sleep 0.5
+
+ # CREATE_SUBFLOW from server to client machine
+ ip netns exec $ns1 ./pm_nl_ctl csf lip 10.0.2.1 lid 23 rip 10.0.2.2\
+ rport $client4_port token $server4_token
+ sleep 0.5
+
+ # Delete the listener from the client ns, if one was created
+ mptcp_lib_kill_wait $listener_pid
+
+ sleep 0.5
+ print_test "CLOSE_LISTENER 10.0.2.2:$client4_port"
+ verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port
+}
+
+print_title "Make connections"
+make_connection
+make_connection "v6"
+
+test_announce
+test_remove
+test_subflows
+test_subflows_v4_v6_mix
+test_prio
+test_listener
+
+mptcp_lib_result_print_all_tap
+exit ${ret}
diff --git a/tools/testing/selftests/net/msg_zerocopy.c b/tools/testing/selftests/net/msg_zerocopy.c
index 4b02933cab8a..bdc03a2097e8 100644
--- a/tools/testing/selftests/net/msg_zerocopy.c
+++ b/tools/testing/selftests/net/msg_zerocopy.c
@@ -125,9 +125,8 @@ static int do_setcpu(int cpu)
CPU_ZERO(&mask);
CPU_SET(cpu, &mask);
if (sched_setaffinity(0, sizeof(mask), &mask))
- error(1, 0, "setaffinity %d", cpu);
-
- if (cfg_verbose)
+ fprintf(stderr, "cpu: unable to pin, may increase variance.\n");
+ else if (cfg_verbose)
fprintf(stderr, "cpu: %u\n", cpu);
return 0;
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index 825ffec85cea..89c22f5320e0 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -70,23 +70,22 @@ case "${TXMODE}" in
esac
# Start of state changes: install cleanup handler
-save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})"
cleanup() {
ip netns del "${NS2}"
ip netns del "${NS1}"
- sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}"
}
trap cleanup EXIT
-# Configure system settings
-sysctl -w -q "${path_sysctl_mem}=1000000"
-
# Create virtual ethernet pair between network namespaces
ip netns add "${NS1}"
ip netns add "${NS2}"
+# Configure system settings
+ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000"
+ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000"
+
ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \
peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}"
diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.c
new file mode 100644
index 000000000000..ac54c36b25fc
--- /dev/null
+++ b/tools/testing/selftests/net/nat6to4.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This code is taken from the Android Open Source Project and the author
+ * (Maciej Żenczykowski) has gave permission to relicense it under the
+ * GPLv2. Therefore this program is free software;
+ * You can redistribute it and/or modify it under the terms of the GNU
+ * General Public License version 2 as published by the Free Software
+ * Foundation
+
+ * The original headers, including the original license headers, are
+ * included below for completeness.
+ *
+ * Copyright (C) 2019 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <linux/bpf.h>
+#include <linux/if.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/swab.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define IP_DF 0x4000 // Flag: "Don't Fragment"
+
+SEC("schedcls/ingress6/nat_6")
+int sched_cls_ingress6_nat_6_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr * const eth = data; // used iff is_ethernet
+ const struct ipv6hdr * const ip6 = (void *)(eth + 1);
+
+ // Require ethernet dst mac address to be our unicast address.
+ if (skb->pkt_type != PACKET_HOST)
+ return TC_ACT_OK;
+
+ // Must be meta-ethernet IPv6 frame
+ if (skb->protocol != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // Must have (ethernet and) ipv6 header
+ if (data + l2_header_size + sizeof(*ip6) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv6
+ if (eth->h_proto != bpf_htons(ETH_P_IPV6))
+ return TC_ACT_OK;
+
+ // IP version must be 6
+ if (ip6->version != 6)
+ return TC_ACT_OK;
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_ntohs(ip6->payload_len) > 0xFFFF - sizeof(struct iphdr))
+ return TC_ACT_OK;
+ switch (ip6->nexthdr) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_UDP: // address means there is no need to update their checksums.
+ case IPPROTO_GRE: // We do not need to bother looking at GRE/ESP headers,
+ case IPPROTO_ESP: // since there is never a checksum to update.
+ break;
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IP); // But replace the ethertype
+
+ struct iphdr ip = {
+ .version = 4, // u4
+ .ihl = sizeof(struct iphdr) / sizeof(__u32), // u4
+ .tos = (ip6->priority << 4) + (ip6->flow_lbl[0] >> 4), // u8
+ .tot_len = bpf_htons(bpf_ntohs(ip6->payload_len) + sizeof(struct iphdr)), // u16
+ .id = 0, // u16
+ .frag_off = bpf_htons(IP_DF), // u16
+ .ttl = ip6->hop_limit, // u8
+ .protocol = ip6->nexthdr, // u8
+ .check = 0, // u16
+ .saddr = 0x0201a8c0, // u32
+ .daddr = 0x0101a8c0, // u32
+ };
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(ip) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)&ip)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip.version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ ip.check = (__u16)~sum4; // sum4 cannot be zero, so this is never 0xFFFF
+
+ // Calculate the *negative* IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6->version == 6 (which has '0' bits)
+ for (int i = 0; i < sizeof(*ip6) / sizeof(__u16); ++i)
+ sum6 += ~((__u16 *)ip6)[i]; // note the bitwise negation
+
+ // Note that there is no L4 checksum update: we are relying on the checksum neutrality
+ // of the ipv6 address chosen by netd's ClatdController.
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IP), 0))
+ return TC_ACT_OK;
+ bpf_csum_update(skb, sum6);
+
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+ if (data + l2_header_size + sizeof(struct iphdr) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+
+ // Copy over the new ipv4 header.
+ *(struct iphdr *)(new_eth + 1) = ip;
+ return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+SEC("schedcls/egress4/snat4")
+int sched_cls_egress4_snat4_prog(struct __sk_buff *skb)
+{
+ const int l2_header_size = sizeof(struct ethhdr);
+ void *data = (void *)(long)skb->data;
+ const void *data_end = (void *)(long)skb->data_end;
+ const struct ethhdr *const eth = data; // used iff is_ethernet
+ const struct iphdr *const ip4 = (void *)(eth + 1);
+
+ // Must be meta-ethernet IPv4 frame
+ if (skb->protocol != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // Must have ipv4 header
+ if (data + l2_header_size + sizeof(struct ipv6hdr) > data_end)
+ return TC_ACT_OK;
+
+ // Ethertype - if present - must be IPv4
+ if (eth->h_proto != bpf_htons(ETH_P_IP))
+ return TC_ACT_OK;
+
+ // IP version must be 4
+ if (ip4->version != 4)
+ return TC_ACT_OK;
+
+ // We cannot handle IP options, just standard 20 byte == 5 dword minimal IPv4 header
+ if (ip4->ihl != 5)
+ return TC_ACT_OK;
+
+ // Maximum IPv6 payload length that can be translated to IPv4
+ if (bpf_htons(ip4->tot_len) > 0xFFFF - sizeof(struct ipv6hdr))
+ return TC_ACT_OK;
+
+ // Calculate the IPv4 one's complement checksum of the IPv4 header.
+ __wsum sum4 = 0;
+
+ for (int i = 0; i < sizeof(*ip4) / sizeof(__u16); ++i)
+ sum4 += ((__u16 *)ip4)[i];
+
+ // Note that sum4 is guaranteed to be non-zero by virtue of ip4->version == 4
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse u32 into range 1 .. 0x1FFFE
+ sum4 = (sum4 & 0xFFFF) + (sum4 >> 16); // collapse any potential carry into u16
+ // for a correct checksum we should get *a* zero, but sum4 must be positive, ie 0xFFFF
+ if (sum4 != 0xFFFF)
+ return TC_ACT_OK;
+
+ // Minimum IPv4 total length is the size of the header
+ if (bpf_ntohs(ip4->tot_len) < sizeof(*ip4))
+ return TC_ACT_OK;
+
+ // We are incapable of dealing with IPv4 fragments
+ if (ip4->frag_off & ~bpf_htons(IP_DF))
+ return TC_ACT_OK;
+
+ switch (ip4->protocol) {
+ case IPPROTO_TCP: // For TCP & UDP the checksum neutrality of the chosen IPv6
+ case IPPROTO_GRE: // address means there is no need to update their checksums.
+ case IPPROTO_ESP: // We do not need to bother looking at GRE/ESP headers,
+ break; // since there is never a checksum to update.
+
+ case IPPROTO_UDP: // See above comment, but must also have UDP header...
+ if (data + sizeof(*ip4) + sizeof(struct udphdr) > data_end)
+ return TC_ACT_OK;
+ const struct udphdr *uh = (const struct udphdr *)(ip4 + 1);
+ // If IPv4/UDP checksum is 0 then fallback to clatd so it can calculate the
+ // checksum. Otherwise the network or more likely the NAT64 gateway might
+ // drop the packet because in most cases IPv6/UDP packets with a zero checksum
+ // are invalid. See RFC 6935. TODO: calculate checksum via bpf_csum_diff()
+ if (!uh->check)
+ return TC_ACT_OK;
+ break;
+
+ default: // do not know how to handle anything else
+ return TC_ACT_OK;
+ }
+ struct ethhdr eth2; // used iff is_ethernet
+
+ eth2 = *eth; // Copy over the ethernet header (src/dst mac)
+ eth2.h_proto = bpf_htons(ETH_P_IPV6); // But replace the ethertype
+
+ struct ipv6hdr ip6 = {
+ .version = 6, // __u8:4
+ .priority = ip4->tos >> 4, // __u8:4
+ .flow_lbl = {(ip4->tos & 0xF) << 4, 0, 0}, // __u8[3]
+ .payload_len = bpf_htons(bpf_ntohs(ip4->tot_len) - 20), // __be16
+ .nexthdr = ip4->protocol, // __u8
+ .hop_limit = ip4->ttl, // __u8
+ };
+ ip6.saddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.saddr.in6_u.u6_addr32[1] = 0;
+ ip6.saddr.in6_u.u6_addr32[2] = 0;
+ ip6.saddr.in6_u.u6_addr32[3] = bpf_htonl(1);
+ ip6.daddr.in6_u.u6_addr32[0] = bpf_htonl(0x20010db8);
+ ip6.daddr.in6_u.u6_addr32[1] = 0;
+ ip6.daddr.in6_u.u6_addr32[2] = 0;
+ ip6.daddr.in6_u.u6_addr32[3] = bpf_htonl(2);
+
+ // Calculate the IPv6 16-bit one's complement checksum of the IPv6 header.
+ __wsum sum6 = 0;
+ // We'll end up with a non-zero sum due to ip6.version == 6
+ for (int i = 0; i < sizeof(ip6) / sizeof(__u16); ++i)
+ sum6 += ((__u16 *)&ip6)[i];
+
+ // Packet mutations begin - point of no return, but if this first modification fails
+ // the packet is probably still pristine, so let clatd handle it.
+ if (bpf_skb_change_proto(skb, bpf_htons(ETH_P_IPV6), 0))
+ return TC_ACT_OK;
+
+ // This takes care of updating the skb->csum field for a CHECKSUM_COMPLETE packet.
+ // In such a case, skb->csum is a 16-bit one's complement sum of the entire payload,
+ // thus we need to subtract out the ipv4 header's sum, and add in the ipv6 header's sum.
+ // However, we've already verified the ipv4 checksum is correct and thus 0.
+ // Thus we only need to add the ipv6 header's sum.
+ //
+ // bpf_csum_update() always succeeds if the skb is CHECKSUM_COMPLETE and returns an error
+ // (-ENOTSUPP) if it isn't. So we just ignore the return code (see above for more details).
+ bpf_csum_update(skb, sum6);
+
+ // bpf_skb_change_proto() invalidates all pointers - reload them.
+ data = (void *)(long)skb->data;
+ data_end = (void *)(long)skb->data_end;
+
+ // I cannot think of any valid way for this error condition to trigger, however I do
+ // believe the explicit check is required to keep the in kernel ebpf verifier happy.
+ if (data + l2_header_size + sizeof(ip6) > data_end)
+ return TC_ACT_SHOT;
+
+ struct ethhdr *new_eth = data;
+
+ // Copy over the updated ethernet header
+ *new_eth = eth2;
+ // Copy over the new ipv4 header.
+ *(struct ipv6hdr *)(new_eth + 1) = ip6;
+ return TC_ACT_OK;
+}
+
+char _license[] SEC("license") = ("GPL");
diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
new file mode 100755
index 000000000000..5db69dad0cfc
--- /dev/null
+++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh
@@ -0,0 +1,249 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for the accept_untracked_na feature to
+# enable RFC9131 behaviour. The following is the test-matrix.
+# drop accept fwding behaviour
+# ---- ------ ------ ----------------------------------------------
+# 1 X X Don't update NC
+# 0 0 X Don't update NC
+# 0 1 0 Don't update NC
+# 0 1 1 Add a STALE NC entry
+
+source lib.sh
+ret=0
+
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+HOST_INTF="veth-host"
+ROUTER_INTF="veth-router"
+
+ROUTER_ADDR="2000:20::1"
+HOST_ADDR="2000:20::2"
+SUBNET_WIDTH=64
+ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}"
+HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}"
+
+tcpdump_stdout=
+tcpdump_stderr=
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+setup()
+{
+ set -e
+
+ local drop_unsolicited_na=$1
+ local accept_untracked_na=$2
+ local forwarding=$3
+
+ # Setup two namespaces and a veth tunnel across them.
+ # On end of the tunnel is a router and the other end is a host.
+ setup_ns HOST_NS ROUTER_NS
+ IP_HOST="ip -6 -netns ${HOST_NS}"
+ IP_HOST_EXEC="ip netns exec ${HOST_NS}"
+ IP_ROUTER="ip -6 -netns ${ROUTER_NS}"
+ IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}"
+
+ ${IP_ROUTER} link add ${ROUTER_INTF} type veth \
+ peer name ${HOST_INTF} netns ${HOST_NS}
+
+ # Enable IPv6 on both router and host, and configure static addresses.
+ # The router here is the DUT
+ # Setup router configuration as specified by the arguments.
+ # forwarding=0 case is to check that a non-router
+ # doesn't add neighbour entries.
+ ROUTER_CONF=net.ipv6.conf.${ROUTER_INTF}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.forwarding=${forwarding}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.drop_unsolicited_na=${drop_unsolicited_na}
+ ${IP_ROUTER_EXEC} sysctl -qw \
+ ${ROUTER_CONF}.accept_untracked_na=${accept_untracked_na}
+ ${IP_ROUTER_EXEC} sysctl -qw ${ROUTER_CONF}.disable_ipv6=0
+ ${IP_ROUTER} addr add ${ROUTER_ADDR_WITH_MASK} dev ${ROUTER_INTF}
+
+ # Turn on ndisc_notify on host interface so that
+ # the host sends unsolicited NAs.
+ HOST_CONF=net.ipv6.conf.${HOST_INTF}
+ ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.ndisc_notify=1
+ ${IP_HOST_EXEC} sysctl -qw ${HOST_CONF}.disable_ipv6=0
+ ${IP_HOST} addr add ${HOST_ADDR_WITH_MASK} dev ${HOST_INTF}
+
+ set +e
+}
+
+start_tcpdump() {
+ set -e
+ tcpdump_stdout=`mktemp`
+ tcpdump_stderr=`mktemp`
+ ${IP_ROUTER_EXEC} timeout 15s \
+ tcpdump --immediate-mode -tpni ${ROUTER_INTF} -c 1 \
+ "icmp6 && icmp6[0] == 136 && src ${HOST_ADDR}" \
+ > ${tcpdump_stdout} 2> /dev/null
+ set +e
+}
+
+cleanup_tcpdump()
+{
+ set -e
+ [[ ! -z ${tcpdump_stdout} ]] && rm -f ${tcpdump_stdout}
+ [[ ! -z ${tcpdump_stderr} ]] && rm -f ${tcpdump_stderr}
+ tcpdump_stdout=
+ tcpdump_stderr=
+ set +e
+}
+
+cleanup()
+{
+ cleanup_tcpdump
+ ip netns del ${HOST_NS}
+ ip netns del ${ROUTER_NS}
+}
+
+link_up() {
+ set -e
+ ${IP_ROUTER} link set dev ${ROUTER_INTF} up
+ ${IP_HOST} link set dev ${HOST_INTF} up
+ set +e
+}
+
+verify_ndisc() {
+ local drop_unsolicited_na=$1
+ local accept_untracked_na=$2
+ local forwarding=$3
+
+ neigh_show_output=$(${IP_ROUTER} neigh show \
+ to ${HOST_ADDR} dev ${ROUTER_INTF} nud stale)
+ if [ ${drop_unsolicited_na} -eq 0 ] && \
+ [ ${accept_untracked_na} -eq 1 ] && \
+ [ ${forwarding} -eq 1 ]; then
+ # Neighbour entry expected to be present for 011 case
+ [[ ${neigh_show_output} ]]
+ else
+ # Neighbour entry expected to be absent for all other cases
+ [[ -z ${neigh_show_output} ]]
+ fi
+}
+
+test_unsolicited_na_common()
+{
+ # Setup the test bed, but keep links down
+ setup $1 $2 $3
+
+ # Bring the link up, wait for the NA,
+ # and add a delay to ensure neighbour processing is done.
+ link_up
+ start_tcpdump
+
+ # Verify the neighbour table
+ verify_ndisc $1 $2 $3
+
+}
+
+test_unsolicited_na_combination() {
+ test_unsolicited_na_common $1 $2 $3
+ test_msg=("test_unsolicited_na: "
+ "drop_unsolicited_na=$1 "
+ "accept_untracked_na=$2 "
+ "forwarding=$3")
+ log_test $? 0 "${test_msg[*]}"
+ cleanup
+}
+
+test_unsolicited_na_combinations() {
+ # Args: drop_unsolicited_na accept_untracked_na forwarding
+
+ # Expect entry
+ test_unsolicited_na_combination 0 1 1
+
+ # Expect no entry
+ test_unsolicited_na_combination 0 0 0
+ test_unsolicited_na_combination 0 0 1
+ test_unsolicited_na_combination 0 1 0
+ test_unsolicited_na_combination 1 0 0
+ test_unsolicited_na_combination 1 0 1
+ test_unsolicited_na_combination 1 1 0
+ test_unsolicited_na_combination 1 1 1
+}
+
+###############################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+ -p Pause on fail
+ -P Pause after each test before cleanup
+EOF
+}
+
+###############################################################################
+# main
+
+while getopts :pPh o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tcpdump)" ]; then
+ echo "SKIP: Could not run test without tcpdump tool"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+test_unsolicited_na_combinations
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh
new file mode 100644
index 000000000000..6596fe03c77f
--- /dev/null
+++ b/tools/testing/selftests/net/net_helper.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Helper functions
+
+wait_local_port_listen()
+{
+ local listener_ns="${1}"
+ local port="${2}"
+ local protocol="${3}"
+ local pattern
+ local i
+
+ pattern=":$(printf "%04X" "${port}") "
+
+ # for tcp protocol additionally check the socket state
+ [ ${protocol} = "tcp" ] && pattern="${pattern}0A"
+ for i in $(seq 10); do
+ if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \
+ /proc/net/"${protocol}"* | grep -q "${pattern}"; then
+ break
+ fi
+ sleep 0.1
+ done
+}
diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh
new file mode 100755
index 000000000000..6974474c26f3
--- /dev/null
+++ b/tools/testing/selftests/net/netns-name.sh
@@ -0,0 +1,87 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+set -o pipefail
+
+DEV=dummy-dev0
+DEV2=dummy-dev1
+ALT_NAME=some-alt-name
+
+RET_CODE=0
+
+cleanup() {
+ cleanup_ns $NS $test_ns
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ RET_CODE=1
+}
+
+setup_ns NS test_ns
+
+#
+# Test basic move without a rename
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns ||
+ fail "Can't perform a netns move"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test move with a conflict
+#
+ip -netns $test_ns link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null &&
+ fail "Performed a netns move with a name conflict"
+ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move"
+ip -netns $NS link del $DEV || fail
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test move with a conflict and rename
+#
+ip -netns $test_ns link add name $DEV type dummy
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 ||
+ fail "Can't perform a netns move with rename"
+ip -netns $test_ns link del $DEV2 || fail
+ip -netns $test_ns link del $DEV || fail
+
+#
+# Test dup alt-name with netns move
+#
+ip -netns $test_ns link add name $DEV type dummy || fail
+ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link add name $DEV2 type dummy || fail
+ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail
+
+ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null &&
+ fail "Moved with alt-name dup"
+
+ip -netns $test_ns link del $DEV || fail
+ip -netns $NS link del $DEV2 || fail
+
+#
+# Test creating alt-name in one net-ns and using in another
+#
+ip -netns $NS link add name $DEV type dummy || fail
+ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail
+ip -netns $NS link set dev $DEV netns $test_ns || fail
+ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move"
+ip -netns $NS link show dev $ALT_NAME 2> /dev/null &&
+ fail "Can still find alt-name after move"
+ip -netns $test_ns link del $DEV || fail
+
+echo -ne "$(basename $0) \t\t\t\t"
+if [ $RET_CODE -eq 0 ]; then
+ echo "[ OK ]"
+else
+ echo "[ FAIL ]"
+fi
+exit $RET_CODE
diff --git a/tools/testing/selftests/net/nettest.c b/tools/testing/selftests/net/nettest.c
index 93208caacbe6..cd8a58097448 100644
--- a/tools/testing/selftests/net/nettest.c
+++ b/tools/testing/selftests/net/nettest.c
@@ -9,14 +9,18 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
+#include <sys/wait.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <arpa/inet.h>
#include <net/if.h>
#include <netinet/in.h>
+#include <netinet/ip.h>
#include <netdb.h>
#include <fcntl.h>
#include <libgen.h>
#include <limits.h>
+#include <sched.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -24,6 +28,11 @@
#include <unistd.h>
#include <time.h>
#include <errno.h>
+#include <getopt.h>
+
+#include <linux/xfrm.h>
+#include <linux/ipsec.h>
+#include <linux/pfkeyv2.h>
#ifndef IPV6_UNICAST_IF
#define IPV6_UNICAST_IF 76
@@ -34,6 +43,8 @@
#define DEFAULT_PORT 12345
+#define NS_PREFIX "/run/netns/"
+
#ifndef MAX
#define MAX(a, b) ((a) > (b) ? (a) : (b))
#endif
@@ -43,12 +54,15 @@
struct sock_args {
/* local address */
+ const char *local_addr_str;
+ const char *client_local_addr_str;
union {
struct in_addr in;
struct in6_addr in6;
} local_addr;
/* remote address */
+ const char *remote_addr_str;
union {
struct in_addr in;
struct in6_addr in6;
@@ -62,7 +76,9 @@ struct sock_args {
has_grp:1,
has_expected_laddr:1,
has_expected_raddr:1,
- bind_test_only:1;
+ bind_test_only:1,
+ client_dontroute:1,
+ server_dontroute:1;
unsigned short port;
@@ -71,32 +87,52 @@ struct sock_args {
int version; /* AF_INET/AF_INET6 */
int use_setsockopt;
+ int use_freebind;
int use_cmsg;
+ uint8_t dsfield;
const char *dev;
+ const char *server_dev;
int ifindex;
+ const char *clientns;
+ const char *serverns;
+
const char *password;
+ const char *client_pw;
/* prefix for MD5 password */
+ const char *md5_prefix_str;
union {
struct sockaddr_in v4;
struct sockaddr_in6 v6;
} md5_prefix;
unsigned int prefix_len;
+ /* 0: default, -1: force off, +1: force on */
+ int bind_key_ifindex;
/* expected addresses and device index for connection */
+ const char *expected_dev;
+ const char *expected_server_dev;
int expected_ifindex;
/* local address */
+ const char *expected_laddr_str;
union {
struct in_addr in;
struct in6_addr in6;
} expected_laddr;
/* remote address */
+ const char *expected_raddr_str;
union {
struct in_addr in;
struct in6_addr in6;
} expected_raddr;
+
+ /* ESP in UDP encap test */
+ int use_xfrm;
+
+ /* use send() and connect() instead of sendto */
+ int datagram_connect;
};
static int server_mode;
@@ -186,7 +222,7 @@ static void log_address(const char *desc, struct sockaddr *sa)
if (sa->sa_family == AF_INET) {
struct sockaddr_in *s = (struct sockaddr_in *) sa;
- log_msg("%s %s:%d",
+ log_msg("%s %s:%d\n",
desc,
inet_ntop(AF_INET, &s->sin_addr, addrstr,
sizeof(addrstr)),
@@ -195,18 +231,37 @@ static void log_address(const char *desc, struct sockaddr *sa)
} else if (sa->sa_family == AF_INET6) {
struct sockaddr_in6 *s6 = (struct sockaddr_in6 *) sa;
- log_msg("%s [%s]:%d",
+ log_msg("%s [%s]:%d\n",
desc,
inet_ntop(AF_INET6, &s6->sin6_addr, addrstr,
sizeof(addrstr)),
ntohs(s6->sin6_port));
}
- printf("\n");
-
fflush(stdout);
}
+static int switch_ns(const char *ns)
+{
+ char path[PATH_MAX];
+ int fd, ret;
+
+ if (geteuid())
+ log_error("warning: likely need root to set netns %s!\n", ns);
+
+ snprintf(path, sizeof(path), "%s%s", NS_PREFIX, ns);
+ fd = open(path, 0);
+ if (fd < 0) {
+ log_err_errno("Failed to open netns path; can not switch netns");
+ return 1;
+ }
+
+ ret = setns(fd, CLONE_NEWNET);
+ close(fd);
+
+ return ret;
+}
+
static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args)
{
int keylen = strlen(args->password);
@@ -226,11 +281,14 @@ static int tcp_md5sig(int sd, void *addr, socklen_t alen, struct sock_args *args
}
memcpy(&md5sig.tcpm_addr, addr, alen);
- if (args->ifindex) {
+ if ((args->ifindex && args->bind_key_ifindex >= 0) || args->bind_key_ifindex >= 1) {
opt = TCP_MD5SIG_EXT;
md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
md5sig.tcpm_ifindex = args->ifindex;
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX set tcpm_ifindex=%d\n", md5sig.tcpm_ifindex);
+ } else {
+ log_msg("TCP_MD5SIG_FLAG_IFINDEX off\n", md5sig.tcpm_ifindex);
}
rc = setsockopt(sd, IPPROTO_TCP, opt, &md5sig, sizeof(md5sig));
@@ -259,13 +317,13 @@ static int tcp_md5_remote(int sd, struct sock_args *args)
switch (args->version) {
case AF_INET:
sin.sin_port = htons(args->port);
- sin.sin_addr = args->remote_addr.in;
+ sin.sin_addr = args->md5_prefix.v4.sin_addr;
addr = &sin;
alen = sizeof(sin);
break;
case AF_INET6:
sin6.sin6_port = htons(args->port);
- sin6.sin6_addr = args->remote_addr.in6;
+ sin6.sin6_addr = args->md5_prefix.v6.sin6_addr;
addr = &sin6;
alen = sizeof(sin6);
break;
@@ -463,6 +521,29 @@ static int set_membership(int sd, uint32_t grp, uint32_t addr, int ifindex)
return 0;
}
+static int set_freebind(int sd, int version)
+{
+ unsigned int one = 1;
+ int rc = 0;
+
+ switch (version) {
+ case AF_INET:
+ if (setsockopt(sd, SOL_IP, IP_FREEBIND, &one, sizeof(one))) {
+ log_err_errno("setsockopt(IP_FREEBIND)");
+ rc = -1;
+ }
+ break;
+ case AF_INET6:
+ if (setsockopt(sd, SOL_IPV6, IPV6_FREEBIND, &one, sizeof(one))) {
+ log_err_errno("setsockopt(IPV6_FREEBIND");
+ rc = -1;
+ }
+ break;
+ }
+
+ return rc;
+}
+
static int set_broadcast(int sd)
{
unsigned int one = 1;
@@ -502,6 +583,48 @@ static int set_reuseaddr(int sd)
return rc;
}
+static int set_dsfield(int sd, int version, int dsfield)
+{
+ if (!dsfield)
+ return 0;
+
+ switch (version) {
+ case AF_INET:
+ if (setsockopt(sd, SOL_IP, IP_TOS, &dsfield,
+ sizeof(dsfield)) < 0) {
+ log_err_errno("setsockopt(IP_TOS)");
+ return -1;
+ }
+ break;
+
+ case AF_INET6:
+ if (setsockopt(sd, SOL_IPV6, IPV6_TCLASS, &dsfield,
+ sizeof(dsfield)) < 0) {
+ log_err_errno("setsockopt(IPV6_TCLASS)");
+ return -1;
+ }
+ break;
+
+ default:
+ log_error("Invalid address family\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int set_dontroute(int sd)
+{
+ unsigned int one = 1;
+
+ if (setsockopt(sd, SOL_SOCKET, SO_DONTROUTE, &one, sizeof(one)) < 0) {
+ log_err_errno("setsockopt(SO_DONTROUTE)");
+ return -1;
+ }
+
+ return 0;
+}
+
static int str_to_uint(const char *str, int min, int max, unsigned int *value)
{
int number;
@@ -522,6 +645,33 @@ static int str_to_uint(const char *str, int min, int max, unsigned int *value)
return -1;
}
+static int resolve_devices(struct sock_args *args)
+{
+ if (args->dev) {
+ args->ifindex = get_ifidx(args->dev);
+ if (args->ifindex < 0) {
+ log_error("Invalid device name\n");
+ return 1;
+ }
+ }
+
+ if (args->expected_dev) {
+ unsigned int tmp;
+
+ if (str_to_uint(args->expected_dev, 0, INT_MAX, &tmp) == 0) {
+ args->expected_ifindex = (int)tmp;
+ } else {
+ args->expected_ifindex = get_ifidx(args->expected_dev);
+ if (args->expected_ifindex < 0) {
+ fprintf(stderr, "Invalid expected device\n");
+ return 1;
+ }
+ }
+ }
+
+ return 0;
+}
+
static int expected_addr_match(struct sockaddr *sa, void *expected,
const char *desc)
{
@@ -533,7 +683,7 @@ static int expected_addr_match(struct sockaddr *sa, void *expected,
struct in_addr *exp_in = (struct in_addr *) expected;
if (s->sin_addr.s_addr != exp_in->s_addr) {
- log_error("%s address does not match expected %s",
+ log_error("%s address does not match expected %s\n",
desc,
inet_ntop(AF_INET, exp_in,
addrstr, sizeof(addrstr)));
@@ -544,14 +694,14 @@ static int expected_addr_match(struct sockaddr *sa, void *expected,
struct in6_addr *exp_in = (struct in6_addr *) expected;
if (memcmp(&s6->sin6_addr, exp_in, sizeof(*exp_in))) {
- log_error("%s address does not match expected %s",
+ log_error("%s address does not match expected %s\n",
desc,
inet_ntop(AF_INET6, exp_in,
addrstr, sizeof(addrstr)));
rc = 1;
}
} else {
- log_error("%s address does not match expected - unknown family",
+ log_error("%s address does not match expected - unknown family\n",
desc);
rc = 1;
}
@@ -599,6 +749,160 @@ static int show_sockstat(int sd, struct sock_args *args)
return rc;
}
+enum addr_type {
+ ADDR_TYPE_LOCAL,
+ ADDR_TYPE_REMOTE,
+ ADDR_TYPE_MCAST,
+ ADDR_TYPE_EXPECTED_LOCAL,
+ ADDR_TYPE_EXPECTED_REMOTE,
+ ADDR_TYPE_MD5_PREFIX,
+};
+
+static int convert_addr(struct sock_args *args, const char *_str,
+ enum addr_type atype)
+{
+ int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
+ int family = args->version;
+ char *str, *dev, *sep;
+ struct in6_addr *in6;
+ struct in_addr *in;
+ const char *desc;
+ void *addr;
+ int rc = 0;
+
+ str = strdup(_str);
+ if (!str)
+ return -ENOMEM;
+
+ switch (atype) {
+ case ADDR_TYPE_LOCAL:
+ desc = "local";
+ addr = &args->local_addr;
+ break;
+ case ADDR_TYPE_REMOTE:
+ desc = "remote";
+ addr = &args->remote_addr;
+ break;
+ case ADDR_TYPE_MCAST:
+ desc = "mcast grp";
+ addr = &args->grp;
+ break;
+ case ADDR_TYPE_EXPECTED_LOCAL:
+ desc = "expected local";
+ addr = &args->expected_laddr;
+ break;
+ case ADDR_TYPE_EXPECTED_REMOTE:
+ desc = "expected remote";
+ addr = &args->expected_raddr;
+ break;
+ case ADDR_TYPE_MD5_PREFIX:
+ desc = "md5 prefix";
+ if (family == AF_INET) {
+ args->md5_prefix.v4.sin_family = AF_INET;
+ addr = &args->md5_prefix.v4.sin_addr;
+ } else if (family == AF_INET6) {
+ args->md5_prefix.v6.sin6_family = AF_INET6;
+ addr = &args->md5_prefix.v6.sin6_addr;
+ } else
+ return 1;
+
+ sep = strchr(str, '/');
+ if (sep) {
+ *sep = '\0';
+ sep++;
+ if (str_to_uint(sep, 1, pfx_len_max,
+ &args->prefix_len) != 0) {
+ fprintf(stderr, "Invalid port\n");
+ return 1;
+ }
+ } else {
+ args->prefix_len = 0;
+ }
+ break;
+ default:
+ log_error("unknown address type\n");
+ exit(1);
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct in_addr *) addr;
+ if (str) {
+ if (inet_pton(AF_INET, str, in) == 0) {
+ log_error("Invalid %s IP address\n", desc);
+ rc = -1;
+ goto out;
+ }
+ } else {
+ in->s_addr = htonl(INADDR_ANY);
+ }
+ break;
+
+ case AF_INET6:
+ dev = strchr(str, '%');
+ if (dev) {
+ *dev = '\0';
+ dev++;
+ }
+
+ in6 = (struct in6_addr *) addr;
+ if (str) {
+ if (inet_pton(AF_INET6, str, in6) == 0) {
+ log_error("Invalid %s IPv6 address\n", desc);
+ rc = -1;
+ goto out;
+ }
+ } else {
+ *in6 = in6addr_any;
+ }
+ if (dev) {
+ args->scope_id = get_ifidx(dev);
+ if (args->scope_id < 0) {
+ log_error("Invalid scope on %s IPv6 address\n",
+ desc);
+ rc = -1;
+ goto out;
+ }
+ }
+ break;
+
+ default:
+ log_error("Invalid address family\n");
+ }
+
+out:
+ free(str);
+ return rc;
+}
+
+static int validate_addresses(struct sock_args *args)
+{
+ if (args->local_addr_str &&
+ convert_addr(args, args->local_addr_str, ADDR_TYPE_LOCAL) < 0)
+ return 1;
+
+ if (args->remote_addr_str &&
+ convert_addr(args, args->remote_addr_str, ADDR_TYPE_REMOTE) < 0)
+ return 1;
+
+ if (args->md5_prefix_str &&
+ convert_addr(args, args->md5_prefix_str,
+ ADDR_TYPE_MD5_PREFIX) < 0)
+ return 1;
+
+ if (args->expected_laddr_str &&
+ convert_addr(args, args->expected_laddr_str,
+ ADDR_TYPE_EXPECTED_LOCAL))
+ return 1;
+
+ if (args->expected_raddr_str &&
+ convert_addr(args, args->expected_raddr_str,
+ ADDR_TYPE_EXPECTED_REMOTE))
+ return 1;
+
+ return 0;
+}
+
static int get_index_from_cmsg(struct msghdr *m)
{
struct cmsghdr *cm;
@@ -723,6 +1027,11 @@ static int send_msg(int sd, void *addr, socklen_t alen, struct sock_args *args)
log_err_errno("write failed sending msg to peer");
return 1;
}
+ } else if (args->datagram_connect) {
+ if (send(sd, msg, msglen, 0) < 0) {
+ log_err_errno("send failed sending msg to peer");
+ return 1;
+ }
} else if (args->ifindex && args->use_cmsg) {
if (send_msg_cmsg(sd, addr, alen, args->ifindex, args->version))
return 1;
@@ -1053,6 +1362,17 @@ static int msock_init(struct sock_args *args, int server)
(char *)&one, sizeof(one)) < 0)
log_err_errno("Setting SO_BROADCAST error");
+ if (set_dsfield(sd, AF_INET, args->dsfield) != 0)
+ goto out_err;
+
+ if (server) {
+ if (args->server_dontroute && set_dontroute(sd) != 0)
+ goto out_err;
+ } else {
+ if (args->client_dontroute && set_dontroute(sd) != 0)
+ goto out_err;
+ }
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto out_err;
else if (args->use_setsockopt &&
@@ -1129,6 +1449,41 @@ static int bind_socket(int sd, struct sock_args *args)
return 0;
}
+static int config_xfrm_policy(int sd, struct sock_args *args)
+{
+ struct xfrm_userpolicy_info policy = {};
+ int type = UDP_ENCAP_ESPINUDP;
+ int xfrm_af = IP_XFRM_POLICY;
+ int level = SOL_IP;
+
+ if (args->type != SOCK_DGRAM) {
+ log_error("Invalid socket type. Only DGRAM could be used for XFRM\n");
+ return 1;
+ }
+
+ policy.action = XFRM_POLICY_ALLOW;
+ policy.sel.family = args->version;
+ if (args->version == AF_INET6) {
+ xfrm_af = IPV6_XFRM_POLICY;
+ level = SOL_IPV6;
+ }
+
+ policy.dir = XFRM_POLICY_OUT;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ policy.dir = XFRM_POLICY_IN;
+ if (setsockopt(sd, level, xfrm_af, &policy, sizeof(policy)) < 0)
+ return 1;
+
+ if (setsockopt(sd, IPPROTO_UDP, UDP_ENCAP, &type, sizeof(type)) < 0) {
+ log_err_errno("Failed to set xfrm encap");
+ return 1;
+ }
+
+ return 0;
+}
+
static int lsock_init(struct sock_args *args)
{
long flags;
@@ -1146,12 +1501,21 @@ static int lsock_init(struct sock_args *args)
if (set_reuseport(sd) != 0)
goto err;
+ if (set_dsfield(sd, args->version, args->dsfield) != 0)
+ goto err;
+
+ if (args->server_dontroute && set_dontroute(sd) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
set_unicast_if(sd, args->ifindex, args->version))
goto err;
+ if (args->use_freebind && set_freebind(sd, args->version))
+ goto err;
+
if (bind_socket(sd, args))
goto err;
@@ -1172,6 +1536,11 @@ static int lsock_init(struct sock_args *args)
if (fcntl(sd, F_SETFD, FD_CLOEXEC) < 0)
log_err_errno("Failed to set close-on-exec flag");
+ if (args->use_xfrm && config_xfrm_policy(sd, args)) {
+ log_err_errno("Failed to set xfrm policy");
+ goto err;
+ }
+
out:
return sd;
@@ -1180,8 +1549,19 @@ err:
return -1;
}
-static int do_server(struct sock_args *args)
+static void ipc_write(int fd, int message)
+{
+ /* Not in both_mode, so there's no process to signal */
+ if (fd < 0)
+ return;
+
+ if (write(fd, &message, sizeof(message)) < 0)
+ log_err_errno("Failed to send client status");
+}
+
+static int do_server(struct sock_args *args, int ipc_fd)
{
+ /* ipc_fd = -1 if no parent process to signal */
struct timeval timeout = { .tv_sec = prog_timeout }, *ptval = NULL;
unsigned char addr[sizeof(struct sockaddr_in6)] = {};
socklen_t alen = sizeof(addr);
@@ -1190,6 +1570,20 @@ static int do_server(struct sock_args *args)
fd_set rfds;
int rc;
+ if (args->serverns) {
+ if (switch_ns(args->serverns)) {
+ log_error("Could not set server netns to %s\n",
+ args->serverns);
+ goto err_exit;
+ }
+ log_msg("Switched server netns\n");
+ }
+
+ args->dev = args->server_dev;
+ args->expected_dev = args->expected_server_dev;
+ if (resolve_devices(args) || validate_addresses(args))
+ goto err_exit;
+
if (prog_timeout)
ptval = &timeout;
@@ -1199,14 +1593,16 @@ static int do_server(struct sock_args *args)
lsd = lsock_init(args);
if (lsd < 0)
- return 1;
+ goto err_exit;
if (args->bind_test_only) {
close(lsd);
+ ipc_write(ipc_fd, 1);
return 0;
}
if (args->type != SOCK_STREAM) {
+ ipc_write(ipc_fd, 1);
rc = msg_loop(0, lsd, (void *) addr, alen, args);
close(lsd);
return rc;
@@ -1214,11 +1610,11 @@ static int do_server(struct sock_args *args)
if (args->password && tcp_md5_remote(lsd, args)) {
close(lsd);
- return 1;
+ goto err_exit;
}
+ ipc_write(ipc_fd, 1);
while (1) {
- log_msg("\n");
log_msg("waiting for client connection.\n");
FD_ZERO(&rfds);
FD_SET(lsd, &rfds);
@@ -1264,6 +1660,9 @@ static int do_server(struct sock_args *args)
close(lsd);
return rc;
+err_exit:
+ ipc_write(ipc_fd, 0);
+ return 1;
}
static int wait_for_connect(int sd)
@@ -1321,6 +1720,12 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
if (set_reuseport(sd) != 0)
goto err;
+ if (set_dsfield(sd, args->version, args->dsfield) != 0)
+ goto err;
+
+ if (args->client_dontroute && set_dontroute(sd) != 0)
+ goto err;
+
if (args->dev && bind_to_device(sd, args->dev) != 0)
goto err;
else if (args->use_setsockopt &&
@@ -1330,7 +1735,7 @@ static int connectsock(void *addr, socklen_t alen, struct sock_args *args)
if (args->has_local_ip && bind_socket(sd, args))
goto err;
- if (args->type != SOCK_STREAM)
+ if (args->type != SOCK_STREAM && !args->datagram_connect)
goto out;
if (args->password && tcp_md5sig(sd, addr, alen, args))
@@ -1375,6 +1780,26 @@ static int do_client(struct sock_args *args)
return 1;
}
+ if (args->clientns) {
+ if (switch_ns(args->clientns)) {
+ log_error("Could not set client netns to %s\n",
+ args->clientns);
+ return 1;
+ }
+ log_msg("Switched client netns\n");
+ }
+
+ args->local_addr_str = args->client_local_addr_str;
+ if (resolve_devices(args) || validate_addresses(args))
+ return 1;
+
+ if ((args->use_setsockopt || args->use_cmsg) && !args->ifindex) {
+ fprintf(stderr, "Device binding not specified\n");
+ return 1;
+ }
+ if (args->use_setsockopt || args->use_cmsg)
+ args->dev = NULL;
+
switch (args->version) {
case AF_INET:
sin.sin_port = htons(args->port);
@@ -1394,6 +1819,8 @@ static int do_client(struct sock_args *args)
break;
}
+ args->password = args->client_pw;
+
if (args->has_grp)
sd = msock_client(args);
else
@@ -1419,132 +1846,6 @@ out:
return rc;
}
-enum addr_type {
- ADDR_TYPE_LOCAL,
- ADDR_TYPE_REMOTE,
- ADDR_TYPE_MCAST,
- ADDR_TYPE_EXPECTED_LOCAL,
- ADDR_TYPE_EXPECTED_REMOTE,
- ADDR_TYPE_MD5_PREFIX,
-};
-
-static int convert_addr(struct sock_args *args, const char *_str,
- enum addr_type atype)
-{
- int pfx_len_max = args->version == AF_INET6 ? 128 : 32;
- int family = args->version;
- char *str, *dev, *sep;
- struct in6_addr *in6;
- struct in_addr *in;
- const char *desc;
- void *addr;
- int rc = 0;
-
- str = strdup(_str);
- if (!str)
- return -ENOMEM;
-
- switch (atype) {
- case ADDR_TYPE_LOCAL:
- desc = "local";
- addr = &args->local_addr;
- break;
- case ADDR_TYPE_REMOTE:
- desc = "remote";
- addr = &args->remote_addr;
- break;
- case ADDR_TYPE_MCAST:
- desc = "mcast grp";
- addr = &args->grp;
- break;
- case ADDR_TYPE_EXPECTED_LOCAL:
- desc = "expected local";
- addr = &args->expected_laddr;
- break;
- case ADDR_TYPE_EXPECTED_REMOTE:
- desc = "expected remote";
- addr = &args->expected_raddr;
- break;
- case ADDR_TYPE_MD5_PREFIX:
- desc = "md5 prefix";
- if (family == AF_INET) {
- args->md5_prefix.v4.sin_family = AF_INET;
- addr = &args->md5_prefix.v4.sin_addr;
- } else if (family == AF_INET6) {
- args->md5_prefix.v6.sin6_family = AF_INET6;
- addr = &args->md5_prefix.v6.sin6_addr;
- } else
- return 1;
-
- sep = strchr(str, '/');
- if (sep) {
- *sep = '\0';
- sep++;
- if (str_to_uint(sep, 1, pfx_len_max,
- &args->prefix_len) != 0) {
- fprintf(stderr, "Invalid port\n");
- return 1;
- }
- } else {
- args->prefix_len = pfx_len_max;
- }
- break;
- default:
- log_error("unknown address type");
- exit(1);
- }
-
- switch (family) {
- case AF_INET:
- in = (struct in_addr *) addr;
- if (str) {
- if (inet_pton(AF_INET, str, in) == 0) {
- log_error("Invalid %s IP address\n", desc);
- rc = -1;
- goto out;
- }
- } else {
- in->s_addr = htonl(INADDR_ANY);
- }
- break;
-
- case AF_INET6:
- dev = strchr(str, '%');
- if (dev) {
- *dev = '\0';
- dev++;
- }
-
- in6 = (struct in6_addr *) addr;
- if (str) {
- if (inet_pton(AF_INET6, str, in6) == 0) {
- log_error("Invalid %s IPv6 address\n", desc);
- rc = -1;
- goto out;
- }
- } else {
- *in6 = in6addr_any;
- }
- if (dev) {
- args->scope_id = get_ifidx(dev);
- if (args->scope_id < 0) {
- log_error("Invalid scope on %s IPv6 address\n",
- desc);
- rc = -1;
- goto out;
- }
- }
- break;
-
- default:
- log_error("Invalid address family\n");
- }
-
-out:
- free(str);
- return rc;
-}
-
static char *random_msg(int len)
{
int i, n = 0, olen = len + 1;
@@ -1563,12 +1864,86 @@ static char *random_msg(int len)
n += i;
len -= i;
}
- i = snprintf(m + n, olen - n, "%.*s", len,
- "abcdefghijklmnopqrstuvwxyz");
+
+ snprintf(m + n, olen - n, "%.*s", len,
+ "abcdefghijklmnopqrstuvwxyz");
return m;
}
-#define GETOPT_STR "sr:l:p:t:g:P:DRn:M:m:d:SCi6L:0:1:2:Fbq"
+static int ipc_child(int fd, struct sock_args *args)
+{
+ char *outbuf, *errbuf;
+ int rc = 1;
+
+ outbuf = malloc(4096);
+ errbuf = malloc(4096);
+ if (!outbuf || !errbuf) {
+ fprintf(stderr, "server: Failed to allocate buffers for stdout and stderr\n");
+ goto out;
+ }
+
+ setbuffer(stdout, outbuf, 4096);
+ setbuffer(stderr, errbuf, 4096);
+
+ server_mode = 1; /* to tell log_msg in case we are in both_mode */
+
+ /* when running in both mode, address validation applies
+ * solely to client side
+ */
+ args->has_expected_laddr = 0;
+ args->has_expected_raddr = 0;
+
+ rc = do_server(args, fd);
+
+out:
+ free(outbuf);
+ free(errbuf);
+
+ return rc;
+}
+
+static int ipc_parent(int cpid, int fd, struct sock_args *args)
+{
+ int client_status;
+ int status;
+ int buf;
+
+ /* do the client-side function here in the parent process,
+ * waiting to be told when to continue
+ */
+ if (read(fd, &buf, sizeof(buf)) <= 0) {
+ log_err_errno("Failed to read IPC status from status");
+ return 1;
+ }
+ if (!buf) {
+ log_error("Server failed; can not continue\n");
+ return 1;
+ }
+ log_msg("Server is ready\n");
+
+ client_status = do_client(args);
+ log_msg("parent is done!\n");
+
+ if (kill(cpid, 0) == 0)
+ kill(cpid, SIGKILL);
+
+ wait(&status);
+ return client_status;
+}
+
+#define GETOPT_STR "sr:l:c:Q:p:t:g:P:DRn:M:X:m:d:I:BN:O:SUCi6xL:0:1:2:3:Fbqf"
+#define OPT_FORCE_BIND_KEY_IFINDEX 1001
+#define OPT_NO_BIND_KEY_IFINDEX 1002
+#define OPT_CLIENT_DONTROUTE 1003
+#define OPT_SERVER_DONTROUTE 1004
+
+static struct option long_opts[] = {
+ {"force-bind-key-ifindex", 0, 0, OPT_FORCE_BIND_KEY_IFINDEX},
+ {"no-bind-key-ifindex", 0, 0, OPT_NO_BIND_KEY_IFINDEX},
+ {"client-dontroute", 0, 0, OPT_CLIENT_DONTROUTE},
+ {"server-dontroute", 0, 0, OPT_SERVER_DONTROUTE},
+ {0, 0, 0, 0}
+};
static void print_usage(char *prog)
{
@@ -1582,28 +1957,50 @@ static void print_usage(char *prog)
" -t timeout seconds (default: none)\n"
"\n"
"Optional:\n"
+ " -B do both client and server via fork and IPC\n"
+ " -N ns set client to network namespace ns (requires root)\n"
+ " -O ns set server to network namespace ns (requires root)\n"
" -F Restart server loop\n"
" -6 IPv6 (default is IPv4)\n"
" -P proto protocol for socket: icmp, ospf (default: none)\n"
" -D|R datagram (D) / raw (R) socket (default stream)\n"
- " -l addr local address to bind to\n"
+ " -l addr local address to bind to in server mode\n"
+ " -c addr local address to bind to in client mode\n"
+ " -Q dsfield DS Field value of the socket (the IP_TOS or\n"
+ " IPV6_TCLASS socket option)\n"
+ " -x configure XFRM policy on socket\n"
"\n"
" -d dev bind socket to given device name\n"
+ " -I dev bind socket to given device name - server mode\n"
" -S use setsockopt (IP_UNICAST_IF or IP_MULTICAST_IF)\n"
" to set device binding\n"
+ " -U Use connect() and send() for datagram sockets\n"
+ " -f bind socket with the IP[V6]_FREEBIND option\n"
" -C use cmsg and IP_PKTINFO to specify device binding\n"
"\n"
" -L len send random message of given length\n"
" -n num number of times to send message\n"
"\n"
" -M password use MD5 sum protection\n"
+ " -X password MD5 password for client mode\n"
" -m prefix/len prefix and length to use for MD5 key\n"
+ " --no-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX off\n"
+ " --force-bind-key-ifindex: Force TCP_MD5SIG_FLAG_IFINDEX on\n"
+ " (default: only if -I is passed)\n"
+ " --client-dontroute: don't use gateways for client socket: send\n"
+ " packets only if destination is on link (see\n"
+ " SO_DONTROUTE in socket(7))\n"
+ " --server-dontroute: don't use gateways for server socket: send\n"
+ " packets only if destination is on link (see\n"
+ " SO_DONTROUTE in socket(7))\n"
+ "\n"
" -g grp multicast group (e.g., 239.1.1.1)\n"
" -i interactive mode (default is echo and terminate)\n"
"\n"
" -0 addr Expected local address\n"
" -1 addr Expected remote address\n"
" -2 dev Expected device name (or index) to receive packet\n"
+ " -3 dev Expected device name (or index) to receive packets - server mode\n"
"\n"
" -b Bind test only.\n"
" -q Be quiet. Run test without printing anything.\n"
@@ -1618,8 +2015,11 @@ int main(int argc, char *argv[])
.port = DEFAULT_PORT,
};
struct protoent *pe;
+ int both_mode = 0;
unsigned int tmp;
int forever = 0;
+ int fd[2];
+ int cpid;
/* process inputs */
extern char *optarg;
@@ -1629,8 +2029,11 @@ int main(int argc, char *argv[])
* process input args
*/
- while ((rc = getopt(argc, argv, GETOPT_STR)) != -1) {
+ while ((rc = getopt_long(argc, argv, GETOPT_STR, long_opts, NULL)) != -1) {
switch (rc) {
+ case 'B':
+ both_mode = 1;
+ break;
case 's':
server_mode = 1;
break;
@@ -1639,13 +2042,22 @@ int main(int argc, char *argv[])
break;
case 'l':
args.has_local_ip = 1;
- if (convert_addr(&args, optarg, ADDR_TYPE_LOCAL) < 0)
- return 1;
+ args.local_addr_str = optarg;
break;
case 'r':
args.has_remote_ip = 1;
- if (convert_addr(&args, optarg, ADDR_TYPE_REMOTE) < 0)
+ args.remote_addr_str = optarg;
+ break;
+ case 'c':
+ args.has_local_ip = 1;
+ args.client_local_addr_str = optarg;
+ break;
+ case 'Q':
+ if (str_to_uint(optarg, 0, 255, &tmp) != 0) {
+ fprintf(stderr, "Invalid DS Field\n");
return 1;
+ }
+ args.dsfield = tmp;
break;
case 'p':
if (str_to_uint(optarg, 1, 65535, &tmp) != 0) {
@@ -1667,6 +2079,8 @@ int main(int argc, char *argv[])
case 'R':
args.type = SOCK_RAW;
args.port = 0;
+ if (!args.protocol)
+ args.protocol = IPPROTO_RAW;
break;
case 'P':
pe = getprotobyname(optarg);
@@ -1683,29 +2097,50 @@ int main(int argc, char *argv[])
case 'n':
iter = atoi(optarg);
break;
+ case 'N':
+ args.clientns = optarg;
+ break;
+ case 'O':
+ args.serverns = optarg;
+ break;
case 'L':
msg = random_msg(atoi(optarg));
break;
case 'M':
args.password = optarg;
break;
+ case OPT_FORCE_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = 1;
+ break;
+ case OPT_NO_BIND_KEY_IFINDEX:
+ args.bind_key_ifindex = -1;
+ break;
+ case OPT_CLIENT_DONTROUTE:
+ args.client_dontroute = 1;
+ break;
+ case OPT_SERVER_DONTROUTE:
+ args.server_dontroute = 1;
+ break;
+ case 'X':
+ args.client_pw = optarg;
+ break;
case 'm':
- if (convert_addr(&args, optarg, ADDR_TYPE_MD5_PREFIX) < 0)
- return 1;
+ args.md5_prefix_str = optarg;
break;
case 'S':
args.use_setsockopt = 1;
break;
+ case 'f':
+ args.use_freebind = 1;
+ break;
case 'C':
args.use_cmsg = 1;
break;
case 'd':
args.dev = optarg;
- args.ifindex = get_ifidx(optarg);
- if (args.ifindex < 0) {
- fprintf(stderr, "Invalid device name\n");
- return 1;
- }
+ break;
+ case 'I':
+ args.server_dev = optarg;
break;
case 'i':
interactive = 1;
@@ -1724,32 +2159,27 @@ int main(int argc, char *argv[])
break;
case '0':
args.has_expected_laddr = 1;
- if (convert_addr(&args, optarg,
- ADDR_TYPE_EXPECTED_LOCAL))
- return 1;
+ args.expected_laddr_str = optarg;
break;
case '1':
args.has_expected_raddr = 1;
- if (convert_addr(&args, optarg,
- ADDR_TYPE_EXPECTED_REMOTE))
- return 1;
-
+ args.expected_raddr_str = optarg;
break;
case '2':
- if (str_to_uint(optarg, 0, INT_MAX, &tmp) == 0) {
- args.expected_ifindex = (int)tmp;
- } else {
- args.expected_ifindex = get_ifidx(optarg);
- if (args.expected_ifindex < 0) {
- fprintf(stderr,
- "Invalid expected device\n");
- return 1;
- }
- }
+ args.expected_dev = optarg;
+ break;
+ case '3':
+ args.expected_server_dev = optarg;
break;
case 'q':
quiet = 1;
break;
+ case 'x':
+ args.use_xfrm = 1;
+ break;
+ case 'U':
+ args.datagram_connect = 1;
+ break;
default:
print_usage(argv[0]);
return 1;
@@ -1757,23 +2187,17 @@ int main(int argc, char *argv[])
}
if (args.password &&
- ((!args.has_remote_ip && !args.prefix_len) || args.type != SOCK_STREAM)) {
+ ((!args.has_remote_ip && !args.md5_prefix_str) ||
+ args.type != SOCK_STREAM)) {
log_error("MD5 passwords apply to TCP only and require a remote ip for the password\n");
return 1;
}
- if (args.prefix_len && !args.password) {
+ if (args.md5_prefix_str && !args.password) {
log_error("Prefix range for MD5 protection specified without a password\n");
return 1;
}
- if ((args.use_setsockopt || args.use_cmsg) && !args.ifindex) {
- fprintf(stderr, "Device binding not specified\n");
- return 1;
- }
- if (args.use_setsockopt || args.use_cmsg)
- args.dev = NULL;
-
if (iter == 0) {
fprintf(stderr, "Invalid number of messages to send\n");
return 1;
@@ -1790,7 +2214,7 @@ int main(int argc, char *argv[])
return 1;
}
- if (!server_mode && !args.has_grp &&
+ if ((both_mode || !server_mode) && !args.has_grp &&
!args.has_remote_ip && !args.has_local_ip) {
fprintf(stderr,
"Local (server mode) or remote IP (client IP) required\n");
@@ -1802,9 +2226,26 @@ int main(int argc, char *argv[])
msg = NULL;
}
+ if (both_mode) {
+ if (pipe(fd) < 0) {
+ perror("pipe");
+ exit(1);
+ }
+
+ cpid = fork();
+ if (cpid < 0) {
+ perror("fork");
+ exit(1);
+ }
+ if (cpid)
+ return ipc_parent(cpid, fd[0], &args);
+
+ return ipc_child(fd[1], &args);
+ }
+
if (server_mode) {
do {
- rc = do_server(&args);
+ rc = do_server(&args, -1);
} while (forever);
return rc;
diff --git a/tools/testing/selftests/net/openvswitch/Makefile b/tools/testing/selftests/net/openvswitch/Makefile
new file mode 100644
index 000000000000..2f1508abc826
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/Makefile
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: GPL-2.0
+
+top_srcdir = ../../../../..
+
+CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include $(KHDR_INCLUDES)
+
+TEST_PROGS := openvswitch.sh
+
+TEST_FILES := ovs-dpctl.py
+
+EXTRA_CLEAN := test_netlink_checks
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh
new file mode 100755
index 000000000000..5cae53543849
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh
@@ -0,0 +1,722 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# OVS kernel module self tests
+
+trap ovs_exit_sig EXIT TERM INT ERR
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+TRACING=0
+
+tests="
+ arp_ping eth-arp: Basic arp ping between two NS
+ ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct
+ connect_v4 ip4-xon: Basic ipv4 ping between two NS
+ nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT
+ nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT
+ netlink_checks ovsnl: validate netlink attrs and settings
+ upcall_interfaces ovs: test the upcall interfaces
+ drop_reason drop: test drop reasons are emitted"
+
+info() {
+ [ $VERBOSE = 0 ] || echo $*
+}
+
+ovs_base=`pwd`
+sbxs=
+sbx_add () {
+ info "adding sandbox '$1'"
+
+ sbxs="$sbxs $1"
+
+ NO_BIN=0
+
+ # Create sandbox.
+ local d="$ovs_base"/$1
+ if [ -e $d ]; then
+ info "removing $d"
+ rm -rf "$d"
+ fi
+ mkdir "$d" || return 1
+ ovs_setenv $1
+}
+
+ovs_exit_sig() {
+ [ -e ${ovs_dir}/cleanup ] && . "$ovs_dir/cleanup"
+}
+
+on_exit() {
+ echo "$1" > ${ovs_dir}/cleanup.tmp
+ cat ${ovs_dir}/cleanup >> ${ovs_dir}/cleanup.tmp
+ mv ${ovs_dir}/cleanup.tmp ${ovs_dir}/cleanup
+}
+
+ovs_setenv() {
+ sandbox=$1
+
+ ovs_dir=$ovs_base${1:+/$1}; export ovs_dir
+
+ test -e ${ovs_dir}/cleanup || : > ${ovs_dir}/cleanup
+}
+
+ovs_sbx() {
+ if test "X$2" != X; then
+ (ovs_setenv $1; shift; "$@" >> ${ovs_dir}/debug.log)
+ else
+ ovs_setenv $1
+ fi
+}
+
+ovs_add_dp () {
+ info "Adding DP/Bridge IF: sbx:$1 dp:$2 {$3, $4, $5}"
+ sbxname="$1"
+ shift
+ ovs_sbx "$sbxname" python3 $ovs_base/ovs-dpctl.py add-dp $*
+ on_exit "ovs_sbx $sbxname python3 $ovs_base/ovs-dpctl.py del-dp $1;"
+}
+
+ovs_add_if () {
+ info "Adding IF to DP: br:$2 if:$3"
+ if [ "$4" != "-u" ]; then
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-if "$2" "$3" \
+ || return 1
+ else
+ python3 $ovs_base/ovs-dpctl.py add-if \
+ -u "$2" "$3" >$ovs_dir/$3.out 2>$ovs_dir/$3.err &
+ pid=$!
+ on_exit "ovs_sbx $1 kill -TERM $pid 2>/dev/null"
+ fi
+}
+
+ovs_del_if () {
+ info "Deleting IF from DP: br:$2 if:$3"
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-if "$2" "$3" || return 1
+}
+
+ovs_netns_spawn_daemon() {
+ sbx=$1
+ shift
+ netns=$1
+ shift
+ info "spawning cmd: $*"
+ ip netns exec $netns $* >> $ovs_dir/stdout 2>> $ovs_dir/stderr &
+ pid=$!
+ ovs_sbx "$sbx" on_exit "kill -TERM $pid 2>/dev/null"
+}
+
+ovs_add_netns_and_veths () {
+ info "Adding netns attached: sbx:$1 dp:$2 {$3, $4, $5}"
+ ovs_sbx "$1" ip netns add "$3" || return 1
+ on_exit "ovs_sbx $1 ip netns del $3"
+ ovs_sbx "$1" ip link add "$4" type veth peer name "$5" || return 1
+ on_exit "ovs_sbx $1 ip link del $4 >/dev/null 2>&1"
+ ovs_sbx "$1" ip link set "$4" up || return 1
+ ovs_sbx "$1" ip link set "$5" netns "$3" || return 1
+ ovs_sbx "$1" ip netns exec "$3" ip link set "$5" up || return 1
+
+ if [ "$6" != "" ]; then
+ ovs_sbx "$1" ip netns exec "$3" ip addr add "$6" dev "$5" \
+ || return 1
+ fi
+
+ if [ "$7" != "-u" ]; then
+ ovs_add_if "$1" "$2" "$4" || return 1
+ else
+ ovs_add_if "$1" "$2" "$4" -u || return 1
+ fi
+
+ [ $TRACING -eq 1 ] && ovs_netns_spawn_daemon "$1" "$ns" \
+ tcpdump -i any -s 65535
+
+ return 0
+}
+
+ovs_add_flow () {
+ info "Adding flow to DP: sbx:$1 br:$2 flow:$3 act:$4"
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py add-flow "$2" "$3" "$4"
+ if [ $? -ne 0 ]; then
+ echo "Flow [ $3 : $4 ] failed" >> ${ovs_dir}/debug.log
+ return 1
+ fi
+ return 0
+}
+
+ovs_del_flows () {
+ info "Deleting all flows from DP: sbx:$1 br:$2"
+ ovs_sbx "$1" python3 $ovs_base/ovs-dpctl.py del-flows "$2"
+ return 0
+}
+
+ovs_drop_record_and_run () {
+ local sbx=$1
+ shift
+
+ perf record -a -q -e skb:kfree_skb -o ${ovs_dir}/perf.data $* \
+ >> ${ovs_dir}/stdout 2>> ${ovs_dir}/stderr
+ return $?
+}
+
+ovs_drop_reason_count()
+{
+ local reason=$1
+
+ local perf_output=`perf script -i ${ovs_dir}/perf.data -F trace:event,trace`
+ local pattern="skb:kfree_skb:.*reason: $reason"
+
+ return `echo "$perf_output" | grep "$pattern" | wc -l`
+}
+
+usage() {
+ echo
+ echo "$0 [OPTIONS] [TEST]..."
+ echo "If no TEST argument is given, all tests will be run."
+ echo
+ echo "Options"
+ echo " -t: capture traffic via tcpdump"
+ echo " -v: verbose"
+ echo " -p: pause on failure"
+ echo
+ echo "Available tests${tests}"
+ exit 1
+}
+
+# drop_reason test
+# - drop packets and verify the right drop reason is reported
+test_drop_reason() {
+ which perf >/dev/null 2>&1 || return $ksft_skip
+
+ sbx_add "test_drop_reason" || return $?
+
+ ovs_add_dp "test_drop_reason" dropreason || return 1
+
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_drop_reason" "dropreason" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ # Setup client namespace
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+
+ # Setup server namespace
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ # Check if drop reasons can be sent
+ ovs_add_flow "test_drop_reason" dropreason \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(10)' 2>/dev/null
+ if [ $? == 1 ]; then
+ info "no support for drop reasons - skipping"
+ ovs_exit_sig
+ return $ksft_skip
+ fi
+
+ ovs_del_flows "test_drop_reason" dropreason
+
+ # Allow ARP
+ ovs_add_flow "test_drop_reason" dropreason \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_drop_reason" dropreason \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+
+ # Allow client ICMP traffic but drop return path
+ ovs_add_flow "test_drop_reason" dropreason \
+ "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=1),icmp()" '2'
+ ovs_add_flow "test_drop_reason" dropreason \
+ "in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,proto=1),icmp()" 'drop'
+
+ ovs_drop_record_and_run "test_drop_reason" ip netns exec client ping -c 2 172.31.110.20
+ ovs_drop_reason_count 0x30001 # OVS_DROP_FLOW_ACTION
+ if [[ "$?" -ne "2" ]]; then
+ info "Did not detect expected drops: $?"
+ return 1
+ fi
+
+ # Drop UDP 6000 traffic with an explicit action and an error code.
+ ovs_add_flow "test_drop_reason" dropreason \
+ "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=6000)" \
+ 'drop(42)'
+ # Drop UDP 7000 traffic with an explicit action with no error code.
+ ovs_add_flow "test_drop_reason" dropreason \
+ "in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10,proto=17),udp(dst=7000)" \
+ 'drop(0)'
+
+ ovs_drop_record_and_run \
+ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 6000
+ ovs_drop_reason_count 0x30004 # OVS_DROP_EXPLICIT_ACTION_ERROR
+ if [[ "$?" -ne "1" ]]; then
+ info "Did not detect expected explicit error drops: $?"
+ return 1
+ fi
+
+ ovs_drop_record_and_run \
+ "test_drop_reason" ip netns exec client nc -i 1 -zuv 172.31.110.20 7000
+ ovs_drop_reason_count 0x30003 # OVS_DROP_EXPLICIT_ACTION
+ if [[ "$?" -ne "1" ]]; then
+ info "Did not detect expected explicit drops: $?"
+ return 1
+ fi
+
+ return 0
+}
+
+# arp_ping test
+# - client has 1500 byte MTU
+# - server has 1500 byte MTU
+# - send ARP ping between two ns
+test_arp_ping () {
+
+ which arping >/dev/null 2>&1 || return $ksft_skip
+
+ sbx_add "test_arp_ping" || return $?
+
+ ovs_add_dp "test_arp_ping" arpping || return 1
+
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_arp_ping" "arpping" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ # Setup client namespace
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ HW_CLIENT=`ip netns exec client ip link show dev c1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'`
+ info "Client hwaddr: $HW_CLIENT"
+
+ # Setup server namespace
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+ HW_SERVER=`ip netns exec server ip link show dev s1 | grep -E 'link/ether [0-9a-f:]+' | awk '{print $2;}'`
+ info "Server hwaddr: $HW_SERVER"
+
+ ovs_add_flow "test_arp_ping" arpping \
+ "in_port(1),eth(),eth_type(0x0806),arp(sip=172.31.110.10,tip=172.31.110.20,sha=$HW_CLIENT,tha=ff:ff:ff:ff:ff:ff)" '2' || return 1
+ ovs_add_flow "test_arp_ping" arpping \
+ "in_port(2),eth(),eth_type(0x0806),arp()" '1' || return 1
+
+ ovs_sbx "test_arp_ping" ip netns exec client arping -I c1 172.31.110.20 -c 1 || return 1
+
+ return 0
+}
+
+# ct_connect_v4 test
+# - client has 1500 byte MTU
+# - server has 1500 byte MTU
+# - use ICMP to ping in each direction
+# - only allow CT state stuff to pass through new in c -> s
+test_ct_connect_v4 () {
+
+ which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+ sbx_add "test_ct_connect_v4" || return $?
+
+ ovs_add_dp "test_ct_connect_v4" ct4 || return 1
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_ct_connect_v4" "ct4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ # Add forwarding for ARP and ip packets - completely wildcarded
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'ct_state(-trk),eth(),eth_type(0x0800),ipv4()' \
+ 'ct(commit),recirc(0x1)' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'recirc_id(0x1),ct_state(+trk+new),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \
+ '2' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'recirc_id(0x1),ct_state(+trk+est),in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' \
+ '2' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'recirc_id(0x1),ct_state(+trk+est),in_port(2),eth(),eth_type(0x0800),ipv4(dst=172.31.110.10)' \
+ '1' || return 1
+ ovs_add_flow "test_ct_connect_v4" ct4 \
+ 'recirc_id(0x1),ct_state(+trk+inv),eth(),eth_type(0x0800),ipv4()' 'drop' || \
+ return 1
+
+ # do a ping
+ ovs_sbx "test_ct_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1
+
+ # create an echo server in 'server'
+ echo "server" | \
+ ovs_netns_spawn_daemon "test_ct_connect_v4" "server" \
+ nc -lvnp 4443
+ ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.20 4443 || return 1
+
+ # Now test in the other direction (should fail)
+ echo "client" | \
+ ovs_netns_spawn_daemon "test_ct_connect_v4" "client" \
+ nc -lvnp 4443
+ ovs_sbx "test_ct_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443
+ if [ $? == 0 ]; then
+ info "ct connect to client was successful"
+ return 1
+ fi
+
+ info "done..."
+ return 0
+}
+
+# connect_v4 test
+# - client has 1500 byte MTU
+# - server has 1500 byte MTU
+# - use ICMP to ping in each direction
+test_connect_v4 () {
+
+ sbx_add "test_connect_v4" || return $?
+
+ ovs_add_dp "test_connect_v4" cv4 || return 1
+
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_connect_v4" "cv4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ # Add forwarding for ARP and ip packets - completely wildcarded
+ ovs_add_flow "test_connect_v4" cv4 \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_connect_v4" cv4 \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+ ovs_add_flow "test_connect_v4" cv4 \
+ 'in_port(1),eth(),eth_type(0x0800),ipv4(src=172.31.110.10)' '2' || return 1
+ ovs_add_flow "test_connect_v4" cv4 \
+ 'in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20)' '1' || return 1
+
+ # do a ping
+ ovs_sbx "test_connect_v4" ip netns exec client ping 172.31.110.20 -c 3 || return 1
+
+ info "done..."
+ return 0
+}
+
+# nat_connect_v4 test
+# - client has 1500 byte MTU
+# - server has 1500 byte MTU
+# - use ICMP to ping in each direction
+# - only allow CT state stuff to pass through new in c -> s
+test_nat_connect_v4 () {
+ which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+ sbx_add "test_nat_connect_v4" || return $?
+
+ ovs_add_dp "test_nat_connect_v4" nat4 || return 1
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_nat_connect_v4" "nat4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ ip netns exec client ip route add default via 172.31.110.20
+
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' '2' || return 1
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ 'in_port(2),eth(),eth_type(0x0806),arp()' '1' || return 1
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20)" \
+ "ct(commit,nat(dst=172.31.110.20)),recirc(0x1)"
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "ct(commit,nat),recirc(0x2)"
+
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" "2"
+ ovs_add_flow "test_nat_connect_v4" nat4 \
+ "recirc_id(0x2),ct_state(+trk-inv),in_port(2),eth(),eth_type(0x0800),ipv4()" "1"
+
+ # do a ping
+ ovs_sbx "test_nat_connect_v4" ip netns exec client ping 192.168.0.20 -c 3 || return 1
+
+ # create an echo server in 'server'
+ echo "server" | \
+ ovs_netns_spawn_daemon "test_nat_connect_v4" "server" \
+ nc -lvnp 4443
+ ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 192.168.0.20 4443 || return 1
+
+ # Now test in the other direction (should fail)
+ echo "client" | \
+ ovs_netns_spawn_daemon "test_nat_connect_v4" "client" \
+ nc -lvnp 4443
+ ovs_sbx "test_nat_connect_v4" ip netns exec client nc -i 1 -zv 172.31.110.10 4443
+ if [ $? == 0 ]; then
+ info "connect to client was successful"
+ return 1
+ fi
+
+ info "done..."
+ return 0
+}
+
+# nat_related_v4 test
+# - client->server ip packets go via SNAT
+# - client solicits ICMP destination unreachable packet from server
+# - undo NAT for ICMP reply and test dst ip has been updated
+test_nat_related_v4 () {
+ which nc >/dev/null 2>/dev/null || return $ksft_skip
+
+ sbx_add "test_nat_related_v4" || return $?
+
+ ovs_add_dp "test_nat_related_v4" natrelated4 || return 1
+ info "create namespaces"
+ for ns in client server; do
+ ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \
+ "${ns:0:1}0" "${ns:0:1}1" || return 1
+ done
+
+ ip netns exec client ip addr add 172.31.110.10/24 dev c1
+ ip netns exec client ip link set c1 up
+ ip netns exec server ip addr add 172.31.110.20/24 dev s1
+ ip netns exec server ip link set s1 up
+
+ ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10
+
+ # Allow ARP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1
+
+ # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \
+ "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \
+ "2" || return 1
+
+ # Allow related ICMP responses back from server and undo NAT to restore original IP
+ # Drop any ICMP related packets where dst ip hasn't been restored back to original IP
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \
+ "ct(commit,nat),recirc(0x2)" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \
+ "1" || return 1
+ ovs_add_flow "test_nat_related_v4" natrelated4 \
+ "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \
+ "drop" || return 1
+
+ # Solicit destination unreachable response from server
+ ovs_sbx "test_nat_related_v4" ip netns exec client \
+ bash -c "echo a | nc -u -w 1 172.31.110.20 10000"
+
+ # Check to make sure no packets matched the drop rule with incorrect dst ip
+ python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \
+ | grep "drop" | grep "packets:0" >/dev/null || return 1
+
+ info "done..."
+ return 0
+}
+
+# netlink_validation
+# - Create a dp
+# - check no warning with "old version" simulation
+test_netlink_checks () {
+ sbx_add "test_netlink_checks" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "test_netlink_checks" nv0 || return 1
+ # now try again
+ PRE_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+")
+ ovs_add_dp "test_netlink_checks" nv0 -V 0 || return 1
+ POST_TEST=$(dmesg | grep -E "RIP: [0-9a-fA-Fx]+:ovs_dp_cmd_new\+")
+ if [ "$PRE_TEST" != "$POST_TEST" ]; then
+ info "failed - gen warning"
+ return 1
+ fi
+
+ ovs_add_netns_and_veths "test_netlink_checks" nv0 left left0 l0 || \
+ return 1
+ ovs_add_netns_and_veths "test_netlink_checks" nv0 right right0 r0 || \
+ return 1
+ [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+ wc -l) == 3 ] || \
+ return 1
+ ovs_del_if "test_netlink_checks" nv0 right0 || return 1
+ [ $(python3 $ovs_base/ovs-dpctl.py show nv0 | grep port | \
+ wc -l) == 2 ] || \
+ return 1
+
+ info "Checking clone depth"
+ ERR_MSG="Flow actions may not be safe on all matching packets"
+ PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ ovs_add_flow "test_netlink_checks" nv0 \
+ 'in_port(1),eth(),eth_type(0x800),ipv4()' \
+ 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \
+ >/dev/null 2>&1 && return 1
+ POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+
+ if [ "$PRE_TEST" == "$POST_TEST" ]; then
+ info "failed - clone depth too large"
+ return 1
+ fi
+
+ PRE_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ ovs_add_flow "test_netlink_checks" nv0 \
+ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \
+ &> /dev/null && return 1
+ POST_TEST=$(dmesg | grep -c "${ERR_MSG}")
+ if [ "$PRE_TEST" == "$POST_TEST" ]; then
+ info "failed - error not generated"
+ return 1
+ fi
+ return 0
+}
+
+test_upcall_interfaces() {
+ sbx_add "test_upcall_interfaces" || return 1
+
+ info "setting up new DP"
+ ovs_add_dp "test_upcall_interfaces" ui0 -V 2:1 || return 1
+
+ ovs_add_netns_and_veths "test_upcall_interfaces" ui0 upc left0 l0 \
+ 172.31.110.1/24 -u || return 1
+
+ sleep 1
+ info "sending arping"
+ ip netns exec upc arping -I l0 172.31.110.20 -c 1 \
+ >$ovs_dir/arping.stdout 2>$ovs_dir/arping.stderr
+
+ grep -E "MISS upcall\[0/yes\]: .*arp\(sip=172.31.110.1,tip=172.31.110.20,op=1,sha=" $ovs_dir/left0.out >/dev/null 2>&1 || return 1
+ return 0
+}
+
+run_test() {
+ (
+ tname="$1"
+ tdesc="$2"
+
+ if ! lsmod | grep openvswitch >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [NOMOD]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+
+ if python3 ovs-dpctl.py -h 2>&1 | \
+ grep -E "Need to (install|upgrade) the python" >/dev/null 2>&1; then
+ stdbuf -o0 printf "TEST: %-60s [PYLIB]\n" "${tdesc}"
+ return $ksft_skip
+ fi
+ printf "TEST: %-60s [START]\n" "${tname}"
+
+ unset IFS
+
+ eval test_${tname}
+ ret=$?
+
+ if [ $ret -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${tdesc}"
+ ovs_exit_sig
+ rm -rf "$ovs_dir"
+ elif [ $ret -eq 1 ]; then
+ printf "TEST: %-60s [FAIL]\n" "${tdesc}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "Pausing. Logs in $ovs_dir/. Hit enter to continue"
+ read a
+ fi
+ ovs_exit_sig
+ [ "${PAUSE_ON_FAIL}" = "yes" ] || rm -rf "$ovs_dir"
+ exit 1
+ elif [ $ret -eq $ksft_skip ]; then
+ printf "TEST: %-60s [SKIP]\n" "${tdesc}"
+ elif [ $ret -eq 2 ]; then
+ rm -rf test_${tname}
+ run_test "$1" "$2"
+ fi
+
+ return $ret
+ )
+ ret=$?
+ case $ret in
+ 0)
+ [ $all_skipped = true ] && [ $exitcode=$ksft_skip ] && exitcode=0
+ all_skipped=false
+ ;;
+ $ksft_skip)
+ [ $all_skipped = true ] && exitcode=$ksft_skip
+ ;;
+ *)
+ all_skipped=false
+ exitcode=1
+ ;;
+ esac
+
+ return $ret
+}
+
+
+exitcode=0
+desc=0
+all_skipped=true
+
+while getopts :pvt o
+do
+ case $o in
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ t) if which tcpdump > /dev/null 2>&1; then
+ TRACING=1
+ else
+ echo "=== tcpdump not available, tracing disabled"
+ fi
+ ;;
+ *) usage;;
+ esac
+done
+shift $(($OPTIND-1))
+
+IFS="
+"
+
+for arg do
+ # Check first that all requested tests are available before running any
+ command -v > /dev/null "test_${arg}" || { echo "=== Test ${arg} not found"; usage; }
+done
+
+name=""
+desc=""
+for t in ${tests}; do
+ [ "${name}" = "" ] && name="${t}" && continue
+ [ "${desc}" = "" ] && desc="${t}"
+
+ run_this=1
+ for arg do
+ [ "${arg}" != "${arg#--*}" ] && continue
+ [ "${arg}" = "${name}" ] && run_this=1 && break
+ run_this=0
+ done
+ if [ $run_this -eq 1 ]; then
+ run_test "${name}" "${desc}"
+ fi
+ name=""
+ desc=""
+done
+
+exit ${exitcode}
diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
new file mode 100644
index 000000000000..5e0e539a323d
--- /dev/null
+++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py
@@ -0,0 +1,2236 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# Controls the openvswitch module. Part of the kselftest suite, but
+# can be used for some diagnostic purpose as well.
+
+import argparse
+import errno
+import ipaddress
+import logging
+import multiprocessing
+import re
+import struct
+import sys
+import time
+import types
+import uuid
+
+try:
+ from pyroute2 import NDB
+
+ from pyroute2.netlink import NLA_F_NESTED
+ from pyroute2.netlink import NLM_F_ACK
+ from pyroute2.netlink import NLM_F_DUMP
+ from pyroute2.netlink import NLM_F_REQUEST
+ from pyroute2.netlink import genlmsg
+ from pyroute2.netlink import nla
+ from pyroute2.netlink import nlmsg_atoms
+ from pyroute2.netlink.exceptions import NetlinkError
+ from pyroute2.netlink.generic import GenericNetlinkSocket
+ import pyroute2
+
+except ModuleNotFoundError:
+ print("Need to install the python pyroute2 package >= 0.6.")
+ sys.exit(0)
+
+
+OVS_DATAPATH_FAMILY = "ovs_datapath"
+OVS_VPORT_FAMILY = "ovs_vport"
+OVS_FLOW_FAMILY = "ovs_flow"
+OVS_PACKET_FAMILY = "ovs_packet"
+OVS_METER_FAMILY = "ovs_meter"
+OVS_CT_LIMIT_FAMILY = "ovs_ct_limit"
+
+OVS_DATAPATH_VERSION = 2
+OVS_DP_CMD_NEW = 1
+OVS_DP_CMD_DEL = 2
+OVS_DP_CMD_GET = 3
+OVS_DP_CMD_SET = 4
+
+OVS_VPORT_CMD_NEW = 1
+OVS_VPORT_CMD_DEL = 2
+OVS_VPORT_CMD_GET = 3
+OVS_VPORT_CMD_SET = 4
+
+OVS_FLOW_CMD_NEW = 1
+OVS_FLOW_CMD_DEL = 2
+OVS_FLOW_CMD_GET = 3
+OVS_FLOW_CMD_SET = 4
+
+
+def macstr(mac):
+ outstr = ":".join(["%02X" % i for i in mac])
+ return outstr
+
+
+def strcspn(str1, str2):
+ tot = 0
+ for char in str1:
+ if str2.find(char) != -1:
+ return tot
+ tot += 1
+ return tot
+
+
+def strspn(str1, str2):
+ tot = 0
+ for char in str1:
+ if str2.find(char) == -1:
+ return tot
+ tot += 1
+ return tot
+
+
+def intparse(statestr, defmask="0xffffffff"):
+ totalparse = strspn(statestr, "0123456789abcdefABCDEFx/")
+ # scan until "/"
+ count = strspn(statestr, "x0123456789abcdefABCDEF")
+
+ firstnum = statestr[:count]
+ if firstnum[-1] == "/":
+ firstnum = firstnum[:-1]
+ k = int(firstnum, 0)
+
+ m = None
+ if defmask is not None:
+ secondnum = defmask
+ if statestr[count] == "/":
+ secondnum = statestr[count + 1 :] # this is wrong...
+ m = int(secondnum, 0)
+
+ return statestr[totalparse + 1 :], k, m
+
+
+def parse_flags(flag_str, flag_vals):
+ bitResult = 0
+ maskResult = 0
+
+ if len(flag_str) == 0:
+ return flag_str, bitResult, maskResult
+
+ if flag_str[0].isdigit():
+ idx = 0
+ while flag_str[idx].isdigit() or flag_str[idx] == "x":
+ idx += 1
+ digits = flag_str[:idx]
+ flag_str = flag_str[idx:]
+
+ bitResult = int(digits, 0)
+ maskResult = int(digits, 0)
+
+ while len(flag_str) > 0 and (flag_str[0] == "+" or flag_str[0] == "-"):
+ if flag_str[0] == "+":
+ setFlag = True
+ elif flag_str[0] == "-":
+ setFlag = False
+
+ flag_str = flag_str[1:]
+
+ flag_len = 0
+ while (
+ flag_str[flag_len] != "+"
+ and flag_str[flag_len] != "-"
+ and flag_str[flag_len] != ","
+ and flag_str[flag_len] != ")"
+ ):
+ flag_len += 1
+
+ flag = flag_str[0:flag_len]
+
+ if flag in flag_vals:
+ if maskResult & flag_vals[flag]:
+ raise KeyError(
+ "Flag %s set once, cannot be set in multiples" % flag
+ )
+
+ if setFlag:
+ bitResult |= flag_vals[flag]
+
+ maskResult |= flag_vals[flag]
+ else:
+ raise KeyError("Missing flag value: %s" % flag)
+
+ flag_str = flag_str[flag_len:]
+
+ return flag_str, bitResult, maskResult
+
+
+def parse_ct_state(statestr):
+ ct_flags = {
+ "new": 1 << 0,
+ "est": 1 << 1,
+ "rel": 1 << 2,
+ "rpl": 1 << 3,
+ "inv": 1 << 4,
+ "trk": 1 << 5,
+ "snat": 1 << 6,
+ "dnat": 1 << 7,
+ }
+
+ return parse_flags(statestr, ct_flags)
+
+
+def convert_mac(data):
+ def to_bytes(mac):
+ mac_split = mac.split(":")
+ ret = bytearray([int(i, 16) for i in mac_split])
+ return bytes(ret)
+
+ mac_str, _, mask_str = data.partition('/')
+
+ if not mac_str:
+ mac_str = mask_str = "00:00:00:00:00:00"
+ elif not mask_str:
+ mask_str = "FF:FF:FF:FF:FF:FF"
+
+ return to_bytes(mac_str), to_bytes(mask_str)
+
+def convert_ipv4(data):
+ ip, _, mask = data.partition('/')
+
+ if not ip:
+ ip = mask = 0
+ elif not mask:
+ mask = 0xFFFFFFFF
+ elif mask.isdigit():
+ mask = (0xFFFFFFFF << (32 - int(mask))) & 0xFFFFFFFF
+
+ return int(ipaddress.IPv4Address(ip)), int(ipaddress.IPv4Address(mask))
+
+def convert_int(size):
+ def convert_int_sized(data):
+ value, _, mask = data.partition('/')
+
+ if not value:
+ return 0, 0
+ elif not mask:
+ return int(value, 0), pow(2, size) - 1
+ else:
+ return int(value, 0), int(mask, 0)
+
+ return convert_int_sized
+
+def parse_starts_block(block_str, scanstr, returnskipped, scanregex=False):
+ if scanregex:
+ m = re.search(scanstr, block_str)
+ if m is None:
+ if returnskipped:
+ return block_str
+ return False
+ if returnskipped:
+ block_str = block_str[len(m.group(0)) :]
+ return block_str
+ return True
+
+ if block_str.startswith(scanstr):
+ if returnskipped:
+ block_str = block_str[len(scanstr) :]
+ else:
+ return True
+
+ if returnskipped:
+ return block_str
+
+ return False
+
+
+def parse_extract_field(
+ block_str, fieldstr, scanfmt, convert, masked=False, defval=None
+):
+ if fieldstr and not block_str.startswith(fieldstr):
+ return block_str, defval
+
+ if fieldstr:
+ str_skiplen = len(fieldstr)
+ str_skipped = block_str[str_skiplen:]
+ if str_skiplen == 0:
+ return str_skipped, defval
+ else:
+ str_skiplen = 0
+ str_skipped = block_str
+
+ m = re.search(scanfmt, str_skipped)
+ if m is None:
+ raise ValueError("Bad fmt string")
+
+ data = m.group(0)
+ if convert:
+ data = convert(m.group(0))
+
+ str_skipped = str_skipped[len(m.group(0)) :]
+ if masked:
+ if str_skipped[0] == "/":
+ raise ValueError("Masking support TBD...")
+
+ str_skipped = str_skipped[strspn(str_skipped, ", ") :]
+ return str_skipped, data
+
+
+class ovs_dp_msg(genlmsg):
+ # include the OVS version
+ # We need a custom header rather than just being able to rely on
+ # genlmsg because fields ends up not expressing everything correctly
+ # if we use the canonical example of setting fields = (('customfield',),)
+ fields = genlmsg.fields + (("dpifindex", "I"),)
+
+
+class ovsactions(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_ACTION_ATTR_UNSPEC", "none"),
+ ("OVS_ACTION_ATTR_OUTPUT", "uint32"),
+ ("OVS_ACTION_ATTR_USERSPACE", "userspace"),
+ ("OVS_ACTION_ATTR_SET", "none"),
+ ("OVS_ACTION_ATTR_PUSH_VLAN", "none"),
+ ("OVS_ACTION_ATTR_POP_VLAN", "flag"),
+ ("OVS_ACTION_ATTR_SAMPLE", "none"),
+ ("OVS_ACTION_ATTR_RECIRC", "uint32"),
+ ("OVS_ACTION_ATTR_HASH", "none"),
+ ("OVS_ACTION_ATTR_PUSH_MPLS", "none"),
+ ("OVS_ACTION_ATTR_POP_MPLS", "flag"),
+ ("OVS_ACTION_ATTR_SET_MASKED", "none"),
+ ("OVS_ACTION_ATTR_CT", "ctact"),
+ ("OVS_ACTION_ATTR_TRUNC", "uint32"),
+ ("OVS_ACTION_ATTR_PUSH_ETH", "none"),
+ ("OVS_ACTION_ATTR_POP_ETH", "flag"),
+ ("OVS_ACTION_ATTR_CT_CLEAR", "flag"),
+ ("OVS_ACTION_ATTR_PUSH_NSH", "none"),
+ ("OVS_ACTION_ATTR_POP_NSH", "flag"),
+ ("OVS_ACTION_ATTR_METER", "none"),
+ ("OVS_ACTION_ATTR_CLONE", "recursive"),
+ ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"),
+ ("OVS_ACTION_ATTR_ADD_MPLS", "none"),
+ ("OVS_ACTION_ATTR_DEC_TTL", "none"),
+ ("OVS_ACTION_ATTR_DROP", "uint32"),
+ )
+
+ class ctact(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_CT_ATTR_NONE", "none"),
+ ("OVS_CT_ATTR_COMMIT", "flag"),
+ ("OVS_CT_ATTR_ZONE", "uint16"),
+ ("OVS_CT_ATTR_MARK", "none"),
+ ("OVS_CT_ATTR_LABELS", "none"),
+ ("OVS_CT_ATTR_HELPER", "asciiz"),
+ ("OVS_CT_ATTR_NAT", "natattr"),
+ ("OVS_CT_ATTR_FORCE_COMMIT", "flag"),
+ ("OVS_CT_ATTR_EVENTMASK", "uint32"),
+ ("OVS_CT_ATTR_TIMEOUT", "asciiz"),
+ )
+
+ class natattr(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_NAT_ATTR_NONE", "none"),
+ ("OVS_NAT_ATTR_SRC", "flag"),
+ ("OVS_NAT_ATTR_DST", "flag"),
+ ("OVS_NAT_ATTR_IP_MIN", "ipaddr"),
+ ("OVS_NAT_ATTR_IP_MAX", "ipaddr"),
+ ("OVS_NAT_ATTR_PROTO_MIN", "uint16"),
+ ("OVS_NAT_ATTR_PROTO_MAX", "uint16"),
+ ("OVS_NAT_ATTR_PERSISTENT", "flag"),
+ ("OVS_NAT_ATTR_PROTO_HASH", "flag"),
+ ("OVS_NAT_ATTR_PROTO_RANDOM", "flag"),
+ )
+
+ def dpstr(self, more=False):
+ print_str = "nat("
+
+ if self.get_attr("OVS_NAT_ATTR_SRC"):
+ print_str += "src"
+ elif self.get_attr("OVS_NAT_ATTR_DST"):
+ print_str += "dst"
+ else:
+ print_str += "XXX-unknown-nat"
+
+ if self.get_attr("OVS_NAT_ATTR_IP_MIN") or self.get_attr(
+ "OVS_NAT_ATTR_IP_MAX"
+ ):
+ if self.get_attr("OVS_NAT_ATTR_IP_MIN"):
+ print_str += "=%s," % str(
+ self.get_attr("OVS_NAT_ATTR_IP_MIN")
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_IP_MAX"):
+ print_str += "-%s," % str(
+ self.get_attr("OVS_NAT_ATTR_IP_MAX")
+ )
+ else:
+ print_str += ","
+
+ if self.get_attr("OVS_NAT_ATTR_PROTO_MIN"):
+ print_str += "proto_min=%d," % self.get_attr(
+ "OVS_NAT_ATTR_PROTO_MIN"
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_PROTO_MAX"):
+ print_str += "proto_max=%d," % self.get_attr(
+ "OVS_NAT_ATTR_PROTO_MAX"
+ )
+
+ if self.get_attr("OVS_NAT_ATTR_PERSISTENT"):
+ print_str += "persistent,"
+ if self.get_attr("OVS_NAT_ATTR_HASH"):
+ print_str += "hash,"
+ if self.get_attr("OVS_NAT_ATTR_RANDOM"):
+ print_str += "random"
+ print_str += ")"
+ return print_str
+
+ def dpstr(self, more=False):
+ print_str = "ct("
+
+ if self.get_attr("OVS_CT_ATTR_COMMIT") is not None:
+ print_str += "commit,"
+ if self.get_attr("OVS_CT_ATTR_ZONE") is not None:
+ print_str += "zone=%d," % self.get_attr("OVS_CT_ATTR_ZONE")
+ if self.get_attr("OVS_CT_ATTR_HELPER") is not None:
+ print_str += "helper=%s," % self.get_attr("OVS_CT_ATTR_HELPER")
+ if self.get_attr("OVS_CT_ATTR_NAT") is not None:
+ print_str += self.get_attr("OVS_CT_ATTR_NAT").dpstr(more)
+ print_str += ","
+ if self.get_attr("OVS_CT_ATTR_FORCE_COMMIT") is not None:
+ print_str += "force,"
+ if self.get_attr("OVS_CT_ATTR_EVENTMASK") is not None:
+ print_str += "emask=0x%X," % self.get_attr(
+ "OVS_CT_ATTR_EVENTMASK"
+ )
+ if self.get_attr("OVS_CT_ATTR_TIMEOUT") is not None:
+ print_str += "timeout=%s" % self.get_attr(
+ "OVS_CT_ATTR_TIMEOUT"
+ )
+ print_str += ")"
+ return print_str
+
+ class userspace(nla):
+ nla_flags = NLA_F_NESTED
+
+ nla_map = (
+ ("OVS_USERSPACE_ATTR_UNUSED", "none"),
+ ("OVS_USERSPACE_ATTR_PID", "uint32"),
+ ("OVS_USERSPACE_ATTR_USERDATA", "array(uint8)"),
+ ("OVS_USERSPACE_ATTR_EGRESS_TUN_PORT", "uint32"),
+ )
+
+ def dpstr(self, more=False):
+ print_str = "userspace("
+ if self.get_attr("OVS_USERSPACE_ATTR_PID") is not None:
+ print_str += "pid=%d," % self.get_attr(
+ "OVS_USERSPACE_ATTR_PID"
+ )
+ if self.get_attr("OVS_USERSPACE_ATTR_USERDATA") is not None:
+ print_str += "userdata="
+ for f in self.get_attr("OVS_USERSPACE_ATTR_USERDATA"):
+ print_str += "%x." % f
+ if self.get_attr("OVS_USERSPACE_ATTR_TUN_PORT") is not None:
+ print_str += "egress_tun_port=%d" % self.get_attr(
+ "OVS_USERSPACE_ATTR_TUN_PORT"
+ )
+ print_str += ")"
+ return print_str
+
+ def dpstr(self, more=False):
+ print_str = ""
+
+ for field in self.nla_map:
+ if field[1] == "none" or self.get_attr(field[0]) is None:
+ continue
+ if print_str != "":
+ print_str += ","
+
+ if field[1] == "uint32":
+ if field[0] == "OVS_ACTION_ATTR_OUTPUT":
+ print_str += "%d" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_RECIRC":
+ print_str += "recirc(0x%x)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_TRUNC":
+ print_str += "trunc(%d)" % int(self.get_attr(field[0]))
+ elif field[0] == "OVS_ACTION_ATTR_DROP":
+ print_str += "drop(%d)" % int(self.get_attr(field[0]))
+ elif field[1] == "flag":
+ if field[0] == "OVS_ACTION_ATTR_CT_CLEAR":
+ print_str += "ct_clear"
+ elif field[0] == "OVS_ACTION_ATTR_POP_VLAN":
+ print_str += "pop_vlan"
+ elif field[0] == "OVS_ACTION_ATTR_POP_ETH":
+ print_str += "pop_eth"
+ elif field[0] == "OVS_ACTION_ATTR_POP_NSH":
+ print_str += "pop_nsh"
+ elif field[0] == "OVS_ACTION_ATTR_POP_MPLS":
+ print_str += "pop_mpls"
+ else:
+ datum = self.get_attr(field[0])
+ if field[0] == "OVS_ACTION_ATTR_CLONE":
+ print_str += "clone("
+ print_str += datum.dpstr(more)
+ print_str += ")"
+ else:
+ print_str += datum.dpstr(more)
+
+ return print_str
+
+ def parse(self, actstr):
+ totallen = len(actstr)
+ while len(actstr) != 0:
+ parsed = False
+ parencount = 0
+ if actstr.startswith("drop"):
+ # If no reason is provided, the implicit drop is used (i.e no
+ # action). If some reason is given, an explicit action is used.
+ reason = None
+ if actstr.startswith("drop("):
+ parencount += 1
+
+ actstr, reason = parse_extract_field(
+ actstr,
+ "drop(",
+ "([0-9]+)",
+ lambda x: int(x, 0),
+ False,
+ None,
+ )
+
+ if reason is not None:
+ self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason])
+ parsed = True
+ else:
+ actstr = actstr[len("drop"): ]
+ return (totallen - len(actstr))
+
+ elif parse_starts_block(actstr, "^(\d+)", False, True):
+ actstr, output = parse_extract_field(
+ actstr, None, "(\d+)", lambda x: int(x), False, "0"
+ )
+ self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output])
+ parsed = True
+ elif parse_starts_block(actstr, "recirc(", False):
+ actstr, recircid = parse_extract_field(
+ actstr,
+ "recirc(",
+ "([0-9a-fA-Fx]+)",
+ lambda x: int(x, 0),
+ False,
+ 0,
+ )
+ parencount += 1
+ self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid])
+ parsed = True
+
+ parse_flat_map = (
+ ("ct_clear", "OVS_ACTION_ATTR_CT_CLEAR"),
+ ("pop_vlan", "OVS_ACTION_ATTR_POP_VLAN"),
+ ("pop_eth", "OVS_ACTION_ATTR_POP_ETH"),
+ ("pop_nsh", "OVS_ACTION_ATTR_POP_NSH"),
+ )
+
+ for flat_act in parse_flat_map:
+ if parse_starts_block(actstr, flat_act[0], False):
+ actstr = actstr[len(flat_act[0]):]
+ self["attrs"].append([flat_act[1]])
+ actstr = actstr[strspn(actstr, ", ") :]
+ parsed = True
+
+ if parse_starts_block(actstr, "clone(", False):
+ parencount += 1
+ subacts = ovsactions()
+ actstr = actstr[len("clone("):]
+ parsedLen = subacts.parse(actstr)
+ lst = []
+ self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts))
+ actstr = actstr[parsedLen:]
+ parsed = True
+ elif parse_starts_block(actstr, "ct(", False):
+ parencount += 1
+ actstr = actstr[len("ct(") :]
+ ctact = ovsactions.ctact()
+
+ for scan in (
+ ("commit", "OVS_CT_ATTR_COMMIT", None),
+ ("force_commit", "OVS_CT_ATTR_FORCE_COMMIT", None),
+ ("zone", "OVS_CT_ATTR_ZONE", int),
+ ("mark", "OVS_CT_ATTR_MARK", int),
+ ("helper", "OVS_CT_ATTR_HELPER", lambda x, y: str(x)),
+ ("timeout", "OVS_CT_ATTR_TIMEOUT", lambda x, y: str(x)),
+ ):
+ if actstr.startswith(scan[0]):
+ actstr = actstr[len(scan[0]) :]
+ if scan[2] is not None:
+ if actstr[0] != "=":
+ raise ValueError("Invalid ct attr")
+ actstr = actstr[1:]
+ pos = strcspn(actstr, ",)")
+ datum = scan[2](actstr[:pos], 0)
+ ctact["attrs"].append([scan[1], datum])
+ actstr = actstr[pos:]
+ else:
+ ctact["attrs"].append([scan[1], None])
+ actstr = actstr[strspn(actstr, ", ") :]
+ # it seems strange to put this here, but nat() is a complex
+ # sub-action and this lets it sit anywhere in the ct() action
+ if actstr.startswith("nat"):
+ actstr = actstr[3:]
+ natact = ovsactions.ctact.natattr()
+
+ if actstr.startswith("("):
+ parencount += 1
+ t = None
+ actstr = actstr[1:]
+ if actstr.startswith("src"):
+ t = "OVS_NAT_ATTR_SRC"
+ actstr = actstr[3:]
+ elif actstr.startswith("dst"):
+ t = "OVS_NAT_ATTR_DST"
+ actstr = actstr[3:]
+
+ actstr, ip_block_min = parse_extract_field(
+ actstr, "=", "([0-9a-fA-F\.]+)", str, False
+ )
+ actstr, ip_block_max = parse_extract_field(
+ actstr, "-", "([0-9a-fA-F\.]+)", str, False
+ )
+
+ actstr, proto_min = parse_extract_field(
+ actstr, ":", "(\d+)", int, False
+ )
+ actstr, proto_max = parse_extract_field(
+ actstr, "-", "(\d+)", int, False
+ )
+
+ if t is not None:
+ natact["attrs"].append([t, None])
+
+ if ip_block_min is not None:
+ natact["attrs"].append(
+ ["OVS_NAT_ATTR_IP_MIN", ip_block_min]
+ )
+ if ip_block_max is not None:
+ natact["attrs"].append(
+ ["OVS_NAT_ATTR_IP_MAX", ip_block_max]
+ )
+ if proto_min is not None:
+ natact["attrs"].append(
+ ["OVS_NAT_ATTR_PROTO_MIN", proto_min]
+ )
+ if proto_max is not None:
+ natact["attrs"].append(
+ ["OVS_NAT_ATTR_PROTO_MAX", proto_max]
+ )
+
+ for natscan in (
+ ("persistent", "OVS_NAT_ATTR_PERSISTENT"),
+ ("hash", "OVS_NAT_ATTR_PROTO_HASH"),
+ ("random", "OVS_NAT_ATTR_PROTO_RANDOM"),
+ ):
+ if actstr.startswith(natscan[0]):
+ actstr = actstr[len(natscan[0]) :]
+ natact["attrs"].append([natscan[1], None])
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact])
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact])
+ parsed = True
+
+ actstr = actstr[strspn(actstr, ", ") :]
+ while parencount > 0:
+ parencount -= 1
+ actstr = actstr[strspn(actstr, " "):]
+ if len(actstr) and actstr[0] != ")":
+ raise ValueError("Action str: '%s' unbalanced" % actstr)
+ actstr = actstr[1:]
+
+ if len(actstr) and actstr[0] == ")":
+ return (totallen - len(actstr))
+
+ actstr = actstr[strspn(actstr, ", ") :]
+
+ if not parsed:
+ raise ValueError("Action str: '%s' not supported" % actstr)
+
+ return (totallen - len(actstr))
+
+
+class ovskey(nla):
+ nla_flags = NLA_F_NESTED
+ nla_map = (
+ ("OVS_KEY_ATTR_UNSPEC", "none"),
+ ("OVS_KEY_ATTR_ENCAP", "none"),
+ ("OVS_KEY_ATTR_PRIORITY", "uint32"),
+ ("OVS_KEY_ATTR_IN_PORT", "uint32"),
+ ("OVS_KEY_ATTR_ETHERNET", "ethaddr"),
+ ("OVS_KEY_ATTR_VLAN", "uint16"),
+ ("OVS_KEY_ATTR_ETHERTYPE", "be16"),
+ ("OVS_KEY_ATTR_IPV4", "ovs_key_ipv4"),
+ ("OVS_KEY_ATTR_IPV6", "ovs_key_ipv6"),
+ ("OVS_KEY_ATTR_TCP", "ovs_key_tcp"),
+ ("OVS_KEY_ATTR_UDP", "ovs_key_udp"),
+ ("OVS_KEY_ATTR_ICMP", "ovs_key_icmp"),
+ ("OVS_KEY_ATTR_ICMPV6", "ovs_key_icmpv6"),
+ ("OVS_KEY_ATTR_ARP", "ovs_key_arp"),
+ ("OVS_KEY_ATTR_ND", "ovs_key_nd"),
+ ("OVS_KEY_ATTR_SKB_MARK", "uint32"),
+ ("OVS_KEY_ATTR_TUNNEL", "none"),
+ ("OVS_KEY_ATTR_SCTP", "ovs_key_sctp"),
+ ("OVS_KEY_ATTR_TCP_FLAGS", "be16"),
+ ("OVS_KEY_ATTR_DP_HASH", "uint32"),
+ ("OVS_KEY_ATTR_RECIRC_ID", "uint32"),
+ ("OVS_KEY_ATTR_MPLS", "array(ovs_key_mpls)"),
+ ("OVS_KEY_ATTR_CT_STATE", "uint32"),
+ ("OVS_KEY_ATTR_CT_ZONE", "uint16"),
+ ("OVS_KEY_ATTR_CT_MARK", "uint32"),
+ ("OVS_KEY_ATTR_CT_LABELS", "none"),
+ ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4", "ovs_key_ct_tuple_ipv4"),
+ ("OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6", "ovs_key_ct_tuple_ipv6"),
+ ("OVS_KEY_ATTR_NSH", "none"),
+ ("OVS_KEY_ATTR_PACKET_TYPE", "none"),
+ ("OVS_KEY_ATTR_ND_EXTENSIONS", "none"),
+ ("OVS_KEY_ATTR_TUNNEL_INFO", "none"),
+ ("OVS_KEY_ATTR_IPV6_EXTENSIONS", "none"),
+ )
+
+ class ovs_key_proto(nla):
+ fields = (
+ ("src", "!H"),
+ ("dst", "!H"),
+ )
+
+ fields_map = (
+ ("src", "src", "%d", lambda x: int(x) if x else 0,
+ convert_int(16)),
+ ("dst", "dst", "%d", lambda x: int(x) if x else 0,
+ convert_int(16)),
+ )
+
+ def __init__(
+ self,
+ protostr,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ self.proto_str = protostr
+ nla.__init__(
+ self,
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ def parse(self, flowstr, typeInst):
+ if not flowstr.startswith(self.proto_str):
+ return None, None
+
+ k = typeInst()
+ m = typeInst()
+
+ flowstr = flowstr[len(self.proto_str) :]
+ if flowstr.startswith("("):
+ flowstr = flowstr[1:]
+
+ keybits = b""
+ maskbits = b""
+ for f in self.fields_map:
+ if flowstr.startswith(f[1]):
+ # the following assumes that the field looks
+ # something like 'field.' where '.' is a
+ # character that we don't exactly care about.
+ flowstr = flowstr[len(f[1]) + 1 :]
+ splitchar = 0
+ for c in flowstr:
+ if c == "," or c == ")":
+ break
+ splitchar += 1
+ data = flowstr[:splitchar]
+ flowstr = flowstr[splitchar:]
+ else:
+ data = ""
+
+ if len(f) > 4:
+ k[f[0]], m[f[0]] = f[4](data)
+ else:
+ k[f[0]] = f[3](data)
+ m[f[0]] = f[3](data)
+
+ flowstr = flowstr[strspn(flowstr, ", ") :]
+ if len(flowstr) == 0:
+ return flowstr, k, m
+
+ flowstr = flowstr[strspn(flowstr, "), ") :]
+
+ return flowstr, k, m
+
+ def dpstr(self, masked=None, more=False):
+ outstr = self.proto_str + "("
+ first = False
+ for f in self.fields_map:
+ if first:
+ outstr += ","
+ if masked is None:
+ outstr += "%s=" % f[0]
+ if isinstance(f[2], str):
+ outstr += f[2] % self[f[1]]
+ else:
+ outstr += f[2](self[f[1]])
+ first = True
+ elif more or f[3](masked[f[1]]) != 0:
+ outstr += "%s=" % f[0]
+ if isinstance(f[2], str):
+ outstr += f[2] % self[f[1]]
+ else:
+ outstr += f[2](self[f[1]])
+ outstr += "/"
+ if isinstance(f[2], str):
+ outstr += f[2] % masked[f[1]]
+ else:
+ outstr += f[2](masked[f[1]])
+ first = True
+ outstr += ")"
+ return outstr
+
+ class ethaddr(ovs_key_proto):
+ fields = (
+ ("src", "!6s"),
+ ("dst", "!6s"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ (
+ "dst",
+ "dst",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "eth",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ipv4(ovs_key_proto):
+ fields = (
+ ("src", "!I"),
+ ("dst", "!I"),
+ ("proto", "B"),
+ ("tos", "B"),
+ ("ttl", "B"),
+ ("frag", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ ("proto", "proto", "%d", lambda x: int(x) if x else 0,
+ convert_int(8)),
+ ("tos", "tos", "%d", lambda x: int(x) if x else 0,
+ convert_int(8)),
+ ("ttl", "ttl", "%d", lambda x: int(x) if x else 0,
+ convert_int(8)),
+ ("frag", "frag", "%d", lambda x: int(x) if x else 0,
+ convert_int(8)),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ipv4",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ipv6(ovs_key_proto):
+ fields = (
+ ("src", "!16s"),
+ ("dst", "!16s"),
+ ("label", "!I"),
+ ("proto", "B"),
+ ("tclass", "B"),
+ ("hlimit", "B"),
+ ("frag", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ lambda x: ipaddress.IPv6Address(x),
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ lambda x: ipaddress.IPv6Address(x),
+ ),
+ ("label", "label", "%d", int),
+ ("proto", "proto", "%d", int),
+ ("tclass", "tclass", "%d", int),
+ ("hlimit", "hlimit", "%d", int),
+ ("frag", "frag", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ipv6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_tcp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "tcp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_udp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "udp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_sctp(ovs_key_proto):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "sctp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_icmp(ovs_key_proto):
+ fields = (
+ ("type", "B"),
+ ("code", "B"),
+ )
+
+ fields_map = (
+ ("type", "type", "%d", lambda x: int(x) if x else 0),
+ ("code", "code", "%d", lambda x: int(x) if x else 0),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "icmp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_icmpv6(ovs_key_icmp):
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "icmpv6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_arp(ovs_key_proto):
+ fields = (
+ ("sip", "!I"),
+ ("tip", "!I"),
+ ("op", "!H"),
+ ("sha", "!6s"),
+ ("tha", "!6s"),
+ ("pad", "xx"),
+ )
+
+ fields_map = (
+ (
+ "sip",
+ "sip",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ (
+ "tip",
+ "tip",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ ("op", "op", "%d", lambda x: int(x) if x else 0),
+ (
+ "sha",
+ "sha",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ (
+ "tha",
+ "tha",
+ macstr,
+ lambda x: int.from_bytes(x, "big"),
+ convert_mac,
+ ),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "arp",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_nd(ovs_key_proto):
+ fields = (
+ ("target", "!16s"),
+ ("sll", "!6s"),
+ ("tll", "!6s"),
+ )
+
+ fields_map = (
+ (
+ "target",
+ "target",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ ),
+ ("sll", "sll", macstr, lambda x: int.from_bytes(x, "big")),
+ ("tll", "tll", macstr, lambda x: int.from_bytes(x, "big")),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "nd",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ct_tuple_ipv4(ovs_key_proto):
+ fields = (
+ ("src", "!I"),
+ ("dst", "!I"),
+ ("tp_src", "!H"),
+ ("tp_dst", "!H"),
+ ("proto", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv4Address(x)),
+ int,
+ convert_ipv4,
+ ),
+ ("tp_src", "tp_src", "%d", int),
+ ("tp_dst", "tp_dst", "%d", int),
+ ("proto", "proto", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ct_tuple4",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_ct_tuple_ipv6(nla):
+ fields = (
+ ("src", "!16s"),
+ ("dst", "!16s"),
+ ("tp_src", "!H"),
+ ("tp_dst", "!H"),
+ ("proto", "B"),
+ )
+
+ fields_map = (
+ (
+ "src",
+ "src",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big", convertmac),
+ ),
+ (
+ "dst",
+ "dst",
+ lambda x: str(ipaddress.IPv6Address(x)),
+ lambda x: int.from_bytes(x, "big"),
+ ),
+ ("tp_src", "tp_src", "%d", int),
+ ("tp_dst", "tp_dst", "%d", int),
+ ("proto", "proto", "%d", int),
+ )
+
+ def __init__(
+ self,
+ data=None,
+ offset=None,
+ parent=None,
+ length=None,
+ init=None,
+ ):
+ ovskey.ovs_key_proto.__init__(
+ self,
+ "ct_tuple6",
+ data=data,
+ offset=offset,
+ parent=parent,
+ length=length,
+ init=init,
+ )
+
+ class ovs_key_mpls(nla):
+ fields = (("lse", ">I"),)
+
+ def parse(self, flowstr, mask=None):
+ for field in (
+ ("OVS_KEY_ATTR_PRIORITY", "skb_priority", intparse),
+ ("OVS_KEY_ATTR_SKB_MARK", "skb_mark", intparse),
+ ("OVS_KEY_ATTR_RECIRC_ID", "recirc_id", intparse),
+ ("OVS_KEY_ATTR_DP_HASH", "dp_hash", intparse),
+ ("OVS_KEY_ATTR_CT_STATE", "ct_state", parse_ct_state),
+ ("OVS_KEY_ATTR_CT_ZONE", "ct_zone", intparse),
+ ("OVS_KEY_ATTR_CT_MARK", "ct_mark", intparse),
+ ("OVS_KEY_ATTR_IN_PORT", "in_port", intparse),
+ (
+ "OVS_KEY_ATTR_ETHERNET",
+ "eth",
+ ovskey.ethaddr,
+ ),
+ (
+ "OVS_KEY_ATTR_ETHERTYPE",
+ "eth_type",
+ lambda x: intparse(x, "0xffff"),
+ ),
+ (
+ "OVS_KEY_ATTR_IPV4",
+ "ipv4",
+ ovskey.ovs_key_ipv4,
+ ),
+ (
+ "OVS_KEY_ATTR_IPV6",
+ "ipv6",
+ ovskey.ovs_key_ipv6,
+ ),
+ (
+ "OVS_KEY_ATTR_ARP",
+ "arp",
+ ovskey.ovs_key_arp,
+ ),
+ (
+ "OVS_KEY_ATTR_TCP",
+ "tcp",
+ ovskey.ovs_key_tcp,
+ ),
+ (
+ "OVS_KEY_ATTR_UDP",
+ "udp",
+ ovskey.ovs_key_udp,
+ ),
+ (
+ "OVS_KEY_ATTR_ICMP",
+ "icmp",
+ ovskey.ovs_key_icmp,
+ ),
+ (
+ "OVS_KEY_ATTR_TCP_FLAGS",
+ "tcp_flags",
+ lambda x: parse_flags(x, None),
+ ),
+ ):
+ fld = field[1] + "("
+ if not flowstr.startswith(fld):
+ continue
+
+ if not isinstance(field[2], types.FunctionType):
+ nk = field[2]()
+ flowstr, k, m = nk.parse(flowstr, field[2])
+ else:
+ flowstr = flowstr[len(fld) :]
+ flowstr, k, m = field[2](flowstr)
+
+ if m and mask is not None:
+ mask["attrs"].append([field[0], m])
+ self["attrs"].append([field[0], k])
+
+ flowstr = flowstr[strspn(flowstr, "),") :]
+
+ return flowstr
+
+ def dpstr(self, mask=None, more=False):
+ print_str = ""
+
+ for field in (
+ (
+ "OVS_KEY_ATTR_PRIORITY",
+ "skb_priority",
+ "%d",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_SKB_MARK",
+ "skb_mark",
+ "%d",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_RECIRC_ID",
+ "recirc_id",
+ "0x%08X",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_DP_HASH",
+ "dp_hash",
+ "0x%08X",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_STATE",
+ "ct_state",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ZONE",
+ "ct_zone",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_MARK",
+ "ct_mark",
+ "0x%08x",
+ lambda x: False,
+ True,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV4",
+ None,
+ None,
+ False,
+ False,
+ ),
+ (
+ "OVS_KEY_ATTR_CT_ORIG_TUPLE_IPV6",
+ None,
+ None,
+ False,
+ False,
+ ),
+ (
+ "OVS_KEY_ATTR_IN_PORT",
+ "in_port",
+ "%d",
+ lambda x: True,
+ True,
+ ),
+ ("OVS_KEY_ATTR_ETHERNET", None, None, False, False),
+ (
+ "OVS_KEY_ATTR_ETHERTYPE",
+ "eth_type",
+ "0x%04x",
+ lambda x: int(x) == 0xFFFF,
+ True,
+ ),
+ ("OVS_KEY_ATTR_IPV4", None, None, False, False),
+ ("OVS_KEY_ATTR_IPV6", None, None, False, False),
+ ("OVS_KEY_ATTR_ARP", None, None, False, False),
+ ("OVS_KEY_ATTR_TCP", None, None, False, False),
+ (
+ "OVS_KEY_ATTR_TCP_FLAGS",
+ "tcp_flags",
+ "0x%04x",
+ lambda x: False,
+ True,
+ ),
+ ("OVS_KEY_ATTR_UDP", None, None, False, False),
+ ("OVS_KEY_ATTR_SCTP", None, None, False, False),
+ ("OVS_KEY_ATTR_ICMP", None, None, False, False),
+ ("OVS_KEY_ATTR_ICMPV6", None, None, False, False),
+ ("OVS_KEY_ATTR_ND", None, None, False, False),
+ ):
+ v = self.get_attr(field[0])
+ if v is not None:
+ m = None if mask is None else mask.get_attr(field[0])
+ if field[4] is False:
+ print_str += v.dpstr(m, more)
+ print_str += ","
+ else:
+ if m is None or field[3](m):
+ print_str += field[1] + "("
+ print_str += field[2] % v
+ print_str += "),"
+ elif more or m != 0:
+ print_str += field[1] + "("
+ print_str += (field[2] % v) + "/" + (field[2] % m)
+ print_str += "),"
+
+ return print_str
+
+
+class OvsPacket(GenericNetlinkSocket):
+ OVS_PACKET_CMD_MISS = 1 # Flow table miss
+ OVS_PACKET_CMD_ACTION = 2 # USERSPACE action
+ OVS_PACKET_CMD_EXECUTE = 3 # Apply actions to packet
+
+ class ovs_packet_msg(ovs_dp_msg):
+ nla_map = (
+ ("OVS_PACKET_ATTR_UNSPEC", "none"),
+ ("OVS_PACKET_ATTR_PACKET", "array(uint8)"),
+ ("OVS_PACKET_ATTR_KEY", "ovskey"),
+ ("OVS_PACKET_ATTR_ACTIONS", "ovsactions"),
+ ("OVS_PACKET_ATTR_USERDATA", "none"),
+ ("OVS_PACKET_ATTR_EGRESS_TUN_KEY", "none"),
+ ("OVS_PACKET_ATTR_UNUSED1", "none"),
+ ("OVS_PACKET_ATTR_UNUSED2", "none"),
+ ("OVS_PACKET_ATTR_PROBE", "none"),
+ ("OVS_PACKET_ATTR_MRU", "uint16"),
+ ("OVS_PACKET_ATTR_LEN", "uint32"),
+ ("OVS_PACKET_ATTR_HASH", "uint64"),
+ )
+
+ def __init__(self):
+ GenericNetlinkSocket.__init__(self)
+ self.bind(OVS_PACKET_FAMILY, OvsPacket.ovs_packet_msg)
+
+ def upcall_handler(self, up=None):
+ print("listening on upcall packet handler:", self.epid)
+ while True:
+ try:
+ msgs = self.get()
+ for msg in msgs:
+ if not up:
+ continue
+ if msg["cmd"] == OvsPacket.OVS_PACKET_CMD_MISS:
+ up.miss(msg)
+ elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_ACTION:
+ up.action(msg)
+ elif msg["cmd"] == OvsPacket.OVS_PACKET_CMD_EXECUTE:
+ up.execute(msg)
+ else:
+ print("Unkonwn cmd: %d" % msg["cmd"])
+ except NetlinkError as ne:
+ raise ne
+
+
+class OvsDatapath(GenericNetlinkSocket):
+ OVS_DP_F_VPORT_PIDS = 1 << 1
+ OVS_DP_F_DISPATCH_UPCALL_PER_CPU = 1 << 3
+
+ class dp_cmd_msg(ovs_dp_msg):
+ """
+ Message class that will be used to communicate with the kernel module.
+ """
+
+ nla_map = (
+ ("OVS_DP_ATTR_UNSPEC", "none"),
+ ("OVS_DP_ATTR_NAME", "asciiz"),
+ ("OVS_DP_ATTR_UPCALL_PID", "array(uint32)"),
+ ("OVS_DP_ATTR_STATS", "dpstats"),
+ ("OVS_DP_ATTR_MEGAFLOW_STATS", "megaflowstats"),
+ ("OVS_DP_ATTR_USER_FEATURES", "uint32"),
+ ("OVS_DP_ATTR_PAD", "none"),
+ ("OVS_DP_ATTR_MASKS_CACHE_SIZE", "uint32"),
+ ("OVS_DP_ATTR_PER_CPU_PIDS", "array(uint32)"),
+ )
+
+ class dpstats(nla):
+ fields = (
+ ("hit", "=Q"),
+ ("missed", "=Q"),
+ ("lost", "=Q"),
+ ("flows", "=Q"),
+ )
+
+ class megaflowstats(nla):
+ fields = (
+ ("mask_hit", "=Q"),
+ ("masks", "=I"),
+ ("padding", "=I"),
+ ("cache_hits", "=Q"),
+ ("pad1", "=Q"),
+ )
+
+ def __init__(self):
+ GenericNetlinkSocket.__init__(self)
+ self.bind(OVS_DATAPATH_FAMILY, OvsDatapath.dp_cmd_msg)
+
+ def info(self, dpname, ifindex=0):
+ msg = OvsDatapath.dp_cmd_msg()
+ msg["cmd"] = OVS_DP_CMD_GET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = ifindex
+ msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.ENODEV:
+ reply = None
+ else:
+ raise ne
+
+ return reply
+
+ def create(
+ self, dpname, shouldUpcall=False, versionStr=None, p=OvsPacket()
+ ):
+ msg = OvsDatapath.dp_cmd_msg()
+ msg["cmd"] = OVS_DP_CMD_NEW
+ if versionStr is None:
+ msg["version"] = OVS_DATAPATH_VERSION
+ else:
+ msg["version"] = int(versionStr.split(":")[0], 0)
+ msg["reserved"] = 0
+ msg["dpifindex"] = 0
+ msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+ dpfeatures = 0
+ if versionStr is not None and versionStr.find(":") != -1:
+ dpfeatures = int(versionStr.split(":")[1], 0)
+ else:
+ if versionStr is None or versionStr.find(":") == -1:
+ dpfeatures |= OvsDatapath.OVS_DP_F_DISPATCH_UPCALL_PER_CPU
+ dpfeatures &= ~OvsDatapath.OVS_DP_F_VPORT_PIDS
+
+ nproc = multiprocessing.cpu_count()
+ procarray = []
+ for i in range(1, nproc):
+ procarray += [int(p.epid)]
+ msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", procarray])
+ msg["attrs"].append(["OVS_DP_ATTR_USER_FEATURES", dpfeatures])
+ if not shouldUpcall:
+ msg["attrs"].append(["OVS_DP_ATTR_UPCALL_PID", [0]])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.EEXIST:
+ reply = None
+ else:
+ raise ne
+
+ return reply
+
+ def destroy(self, dpname):
+ msg = OvsDatapath.dp_cmd_msg()
+ msg["cmd"] = OVS_DP_CMD_DEL
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = 0
+ msg["attrs"].append(["OVS_DP_ATTR_NAME", dpname])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.ENODEV:
+ reply = None
+ else:
+ raise ne
+
+ return reply
+
+
+class OvsVport(GenericNetlinkSocket):
+ OVS_VPORT_TYPE_NETDEV = 1
+ OVS_VPORT_TYPE_INTERNAL = 2
+ OVS_VPORT_TYPE_GRE = 3
+ OVS_VPORT_TYPE_VXLAN = 4
+ OVS_VPORT_TYPE_GENEVE = 5
+
+ class ovs_vport_msg(ovs_dp_msg):
+ nla_map = (
+ ("OVS_VPORT_ATTR_UNSPEC", "none"),
+ ("OVS_VPORT_ATTR_PORT_NO", "uint32"),
+ ("OVS_VPORT_ATTR_TYPE", "uint32"),
+ ("OVS_VPORT_ATTR_NAME", "asciiz"),
+ ("OVS_VPORT_ATTR_OPTIONS", "none"),
+ ("OVS_VPORT_ATTR_UPCALL_PID", "array(uint32)"),
+ ("OVS_VPORT_ATTR_STATS", "vportstats"),
+ ("OVS_VPORT_ATTR_PAD", "none"),
+ ("OVS_VPORT_ATTR_IFINDEX", "uint32"),
+ ("OVS_VPORT_ATTR_NETNSID", "uint32"),
+ )
+
+ class vportstats(nla):
+ fields = (
+ ("rx_packets", "=Q"),
+ ("tx_packets", "=Q"),
+ ("rx_bytes", "=Q"),
+ ("tx_bytes", "=Q"),
+ ("rx_errors", "=Q"),
+ ("tx_errors", "=Q"),
+ ("rx_dropped", "=Q"),
+ ("tx_dropped", "=Q"),
+ )
+
+ def type_to_str(vport_type):
+ if vport_type == OvsVport.OVS_VPORT_TYPE_NETDEV:
+ return "netdev"
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_INTERNAL:
+ return "internal"
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_GRE:
+ return "gre"
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_VXLAN:
+ return "vxlan"
+ elif vport_type == OvsVport.OVS_VPORT_TYPE_GENEVE:
+ return "geneve"
+ raise ValueError("Unknown vport type:%d" % vport_type)
+
+ def str_to_type(vport_type):
+ if vport_type == "netdev":
+ return OvsVport.OVS_VPORT_TYPE_NETDEV
+ elif vport_type == "internal":
+ return OvsVport.OVS_VPORT_TYPE_INTERNAL
+ elif vport_type == "gre":
+ return OvsVport.OVS_VPORT_TYPE_INTERNAL
+ elif vport_type == "vxlan":
+ return OvsVport.OVS_VPORT_TYPE_VXLAN
+ elif vport_type == "geneve":
+ return OvsVport.OVS_VPORT_TYPE_GENEVE
+ raise ValueError("Unknown vport type: '%s'" % vport_type)
+
+ def __init__(self, packet=OvsPacket()):
+ GenericNetlinkSocket.__init__(self)
+ self.bind(OVS_VPORT_FAMILY, OvsVport.ovs_vport_msg)
+ self.upcall_packet = packet
+
+ def info(self, vport_name, dpifindex=0, portno=None):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_GET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpifindex
+
+ if portno is None:
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_name])
+ else:
+ msg["attrs"].append(["OVS_VPORT_ATTR_PORT_NO", portno])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.ENODEV:
+ reply = None
+ else:
+ raise ne
+ return reply
+
+ def attach(self, dpindex, vport_ifname, ptype):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_NEW
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ port_type = OvsVport.str_to_type(ptype)
+
+ msg["attrs"].append(["OVS_VPORT_ATTR_TYPE", port_type])
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+ msg["attrs"].append(
+ ["OVS_VPORT_ATTR_UPCALL_PID", [self.upcall_packet.epid]]
+ )
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.EEXIST:
+ reply = None
+ else:
+ raise ne
+ return reply
+
+ def reset_upcall(self, dpindex, vport_ifname, p=None):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_SET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+ if p == None:
+ p = self.upcall_packet
+ else:
+ self.upcall_packet = p
+
+ msg["attrs"].append(["OVS_VPORT_ATTR_UPCALL_PID", [p.epid]])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ raise ne
+ return reply
+
+ def detach(self, dpindex, vport_ifname):
+ msg = OvsVport.ovs_vport_msg()
+
+ msg["cmd"] = OVS_VPORT_CMD_DEL
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpindex
+ msg["attrs"].append(["OVS_VPORT_ATTR_NAME", vport_ifname])
+
+ try:
+ reply = self.nlm_request(
+ msg, msg_type=self.prid, msg_flags=NLM_F_REQUEST | NLM_F_ACK
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ if ne.code == errno.ENODEV:
+ reply = None
+ else:
+ raise ne
+ return reply
+
+ def upcall_handler(self, handler=None):
+ self.upcall_packet.upcall_handler(handler)
+
+
+class OvsFlow(GenericNetlinkSocket):
+ class ovs_flow_msg(ovs_dp_msg):
+ nla_map = (
+ ("OVS_FLOW_ATTR_UNSPEC", "none"),
+ ("OVS_FLOW_ATTR_KEY", "ovskey"),
+ ("OVS_FLOW_ATTR_ACTIONS", "ovsactions"),
+ ("OVS_FLOW_ATTR_STATS", "flowstats"),
+ ("OVS_FLOW_ATTR_TCP_FLAGS", "uint8"),
+ ("OVS_FLOW_ATTR_USED", "uint64"),
+ ("OVS_FLOW_ATTR_CLEAR", "none"),
+ ("OVS_FLOW_ATTR_MASK", "ovskey"),
+ ("OVS_FLOW_ATTR_PROBE", "none"),
+ ("OVS_FLOW_ATTR_UFID", "array(uint32)"),
+ ("OVS_FLOW_ATTR_UFID_FLAGS", "uint32"),
+ )
+
+ class flowstats(nla):
+ fields = (
+ ("packets", "=Q"),
+ ("bytes", "=Q"),
+ )
+
+ def dpstr(self, more=False):
+ ufid = self.get_attr("OVS_FLOW_ATTR_UFID")
+ ufid_str = ""
+ if ufid is not None:
+ ufid_str = (
+ "ufid:{:08x}-{:04x}-{:04x}-{:04x}-{:04x}{:08x}".format(
+ ufid[0],
+ ufid[1] >> 16,
+ ufid[1] & 0xFFFF,
+ ufid[2] >> 16,
+ ufid[2] & 0,
+ ufid[3],
+ )
+ )
+
+ key_field = self.get_attr("OVS_FLOW_ATTR_KEY")
+ keymsg = None
+ if key_field is not None:
+ keymsg = key_field
+
+ mask_field = self.get_attr("OVS_FLOW_ATTR_MASK")
+ maskmsg = None
+ if mask_field is not None:
+ maskmsg = mask_field
+
+ acts_field = self.get_attr("OVS_FLOW_ATTR_ACTIONS")
+ actsmsg = None
+ if acts_field is not None:
+ actsmsg = acts_field
+
+ print_str = ""
+
+ if more:
+ print_str += ufid_str + ","
+
+ if keymsg is not None:
+ print_str += keymsg.dpstr(maskmsg, more)
+
+ stats = self.get_attr("OVS_FLOW_ATTR_STATS")
+ if stats is None:
+ print_str += " packets:0, bytes:0,"
+ else:
+ print_str += " packets:%d, bytes:%d," % (
+ stats["packets"],
+ stats["bytes"],
+ )
+
+ used = self.get_attr("OVS_FLOW_ATTR_USED")
+ print_str += " used:"
+ if used is None:
+ print_str += "never,"
+ else:
+ used_time = int(used)
+ cur_time_sec = time.clock_gettime(time.CLOCK_MONOTONIC)
+ used_time = (cur_time_sec * 1000) - used_time
+ print_str += "{}s,".format(used_time / 1000)
+
+ print_str += " actions:"
+ if (
+ actsmsg is None
+ or "attrs" not in actsmsg
+ or len(actsmsg["attrs"]) == 0
+ ):
+ print_str += "drop"
+ else:
+ print_str += actsmsg.dpstr(more)
+
+ return print_str
+
+ def parse(self, flowstr, actstr, dpidx=0):
+ OVS_UFID_F_OMIT_KEY = 1 << 0
+ OVS_UFID_F_OMIT_MASK = 1 << 1
+ OVS_UFID_F_OMIT_ACTIONS = 1 << 2
+
+ self["cmd"] = 0
+ self["version"] = 0
+ self["reserved"] = 0
+ self["dpifindex"] = 0
+
+ if flowstr.startswith("ufid:"):
+ count = 5
+ while flowstr[count] != ",":
+ count += 1
+ ufidstr = flowstr[5:count]
+ flowstr = flowstr[count + 1 :]
+ else:
+ ufidstr = str(uuid.uuid4())
+ uuidRawObj = uuid.UUID(ufidstr).fields
+
+ self["attrs"].append(
+ [
+ "OVS_FLOW_ATTR_UFID",
+ [
+ uuidRawObj[0],
+ uuidRawObj[1] << 16 | uuidRawObj[2],
+ uuidRawObj[3] << 24
+ | uuidRawObj[4] << 16
+ | uuidRawObj[5] & (0xFF << 32) >> 32,
+ uuidRawObj[5] & (0xFFFFFFFF),
+ ],
+ ]
+ )
+ self["attrs"].append(
+ [
+ "OVS_FLOW_ATTR_UFID_FLAGS",
+ int(
+ OVS_UFID_F_OMIT_KEY
+ | OVS_UFID_F_OMIT_MASK
+ | OVS_UFID_F_OMIT_ACTIONS
+ ),
+ ]
+ )
+
+ k = ovskey()
+ m = ovskey()
+ k.parse(flowstr, m)
+ self["attrs"].append(["OVS_FLOW_ATTR_KEY", k])
+ self["attrs"].append(["OVS_FLOW_ATTR_MASK", m])
+
+ a = ovsactions()
+ a.parse(actstr)
+ self["attrs"].append(["OVS_FLOW_ATTR_ACTIONS", a])
+
+ def __init__(self):
+ GenericNetlinkSocket.__init__(self)
+
+ self.bind(OVS_FLOW_FAMILY, OvsFlow.ovs_flow_msg)
+
+ def add_flow(self, dpifindex, flowmsg):
+ """
+ Send a new flow message to the kernel.
+
+ dpifindex should be a valid datapath obtained by calling
+ into the OvsDatapath lookup
+
+ flowmsg is a flow object obtained by calling a dpparse
+ """
+
+ flowmsg["cmd"] = OVS_FLOW_CMD_NEW
+ flowmsg["version"] = OVS_DATAPATH_VERSION
+ flowmsg["reserved"] = 0
+ flowmsg["dpifindex"] = dpifindex
+
+ try:
+ reply = self.nlm_request(
+ flowmsg,
+ msg_type=self.prid,
+ msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ print(flowmsg)
+ raise ne
+ return reply
+
+ def del_flows(self, dpifindex):
+ """
+ Send a del message to the kernel that will drop all flows.
+
+ dpifindex should be a valid datapath obtained by calling
+ into the OvsDatapath lookup
+ """
+
+ flowmsg = OvsFlow.ovs_flow_msg()
+ flowmsg["cmd"] = OVS_FLOW_CMD_DEL
+ flowmsg["version"] = OVS_DATAPATH_VERSION
+ flowmsg["reserved"] = 0
+ flowmsg["dpifindex"] = dpifindex
+
+ try:
+ reply = self.nlm_request(
+ flowmsg,
+ msg_type=self.prid,
+ msg_flags=NLM_F_REQUEST | NLM_F_ACK,
+ )
+ reply = reply[0]
+ except NetlinkError as ne:
+ print(flowmsg)
+ raise ne
+ return reply
+
+ def dump(self, dpifindex, flowspec=None):
+ """
+ Returns a list of messages containing flows.
+
+ dpifindex should be a valid datapath obtained by calling
+ into the OvsDatapath lookup
+
+ flowpsec is a string which represents a flow in the dpctl
+ format.
+ """
+ msg = OvsFlow.ovs_flow_msg()
+
+ msg["cmd"] = OVS_FLOW_CMD_GET
+ msg["version"] = OVS_DATAPATH_VERSION
+ msg["reserved"] = 0
+ msg["dpifindex"] = dpifindex
+
+ msg_flags = NLM_F_REQUEST | NLM_F_ACK
+ if flowspec is None:
+ msg_flags |= NLM_F_DUMP
+ rep = None
+
+ try:
+ rep = self.nlm_request(
+ msg,
+ msg_type=self.prid,
+ msg_flags=msg_flags,
+ )
+ except NetlinkError as ne:
+ raise ne
+ return rep
+
+ def miss(self, packetmsg):
+ seq = packetmsg["header"]["sequence_number"]
+ keystr = "(none)"
+ key_field = packetmsg.get_attr("OVS_PACKET_ATTR_KEY")
+ if key_field is not None:
+ keystr = key_field.dpstr(None, True)
+
+ pktdata = packetmsg.get_attr("OVS_PACKET_ATTR_PACKET")
+ pktpres = "yes" if pktdata is not None else "no"
+
+ print("MISS upcall[%d/%s]: %s" % (seq, pktpres, keystr), flush=True)
+
+ def execute(self, packetmsg):
+ print("userspace execute command")
+
+ def action(self, packetmsg):
+ print("userspace action command")
+
+
+def print_ovsdp_full(dp_lookup_rep, ifindex, ndb=NDB(), vpl=OvsVport()):
+ dp_name = dp_lookup_rep.get_attr("OVS_DP_ATTR_NAME")
+ base_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_STATS")
+ megaflow_stats = dp_lookup_rep.get_attr("OVS_DP_ATTR_MEGAFLOW_STATS")
+ user_features = dp_lookup_rep.get_attr("OVS_DP_ATTR_USER_FEATURES")
+ masks_cache_size = dp_lookup_rep.get_attr("OVS_DP_ATTR_MASKS_CACHE_SIZE")
+
+ print("%s:" % dp_name)
+ print(
+ " lookups: hit:%d missed:%d lost:%d"
+ % (base_stats["hit"], base_stats["missed"], base_stats["lost"])
+ )
+ print(" flows:%d" % base_stats["flows"])
+ pkts = base_stats["hit"] + base_stats["missed"]
+ avg = (megaflow_stats["mask_hit"] / pkts) if pkts != 0 else 0.0
+ print(
+ " masks: hit:%d total:%d hit/pkt:%f"
+ % (megaflow_stats["mask_hit"], megaflow_stats["masks"], avg)
+ )
+ print(" caches:")
+ print(" masks-cache: size:%d" % masks_cache_size)
+
+ if user_features is not None:
+ print(" features: 0x%X" % user_features)
+
+ # port print out
+ for iface in ndb.interfaces:
+ rep = vpl.info(iface.ifname, ifindex)
+ if rep is not None:
+ print(
+ " port %d: %s (%s)"
+ % (
+ rep.get_attr("OVS_VPORT_ATTR_PORT_NO"),
+ rep.get_attr("OVS_VPORT_ATTR_NAME"),
+ OvsVport.type_to_str(rep.get_attr("OVS_VPORT_ATTR_TYPE")),
+ )
+ )
+
+
+def main(argv):
+ nlmsg_atoms.ovskey = ovskey
+ nlmsg_atoms.ovsactions = ovsactions
+
+ # version check for pyroute2
+ prverscheck = pyroute2.__version__.split(".")
+ if int(prverscheck[0]) == 0 and int(prverscheck[1]) < 6:
+ print("Need to upgrade the python pyroute2 package to >= 0.6.")
+ sys.exit(0)
+
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ "-v",
+ "--verbose",
+ action="count",
+ help="Increment 'verbose' output counter.",
+ default=0,
+ )
+ subparsers = parser.add_subparsers()
+
+ showdpcmd = subparsers.add_parser("show")
+ showdpcmd.add_argument(
+ "showdp", metavar="N", type=str, nargs="?", help="Datapath Name"
+ )
+
+ adddpcmd = subparsers.add_parser("add-dp")
+ adddpcmd.add_argument("adddp", help="Datapath Name")
+ adddpcmd.add_argument(
+ "-u",
+ "--upcall",
+ action="store_true",
+ help="Leave open a reader for upcalls",
+ )
+ adddpcmd.add_argument(
+ "-V",
+ "--versioning",
+ required=False,
+ help="Specify a custom version / feature string",
+ )
+
+ deldpcmd = subparsers.add_parser("del-dp")
+ deldpcmd.add_argument("deldp", help="Datapath Name")
+
+ addifcmd = subparsers.add_parser("add-if")
+ addifcmd.add_argument("dpname", help="Datapath Name")
+ addifcmd.add_argument("addif", help="Interface name for adding")
+ addifcmd.add_argument(
+ "-u",
+ "--upcall",
+ action="store_true",
+ help="Leave open a reader for upcalls",
+ )
+ addifcmd.add_argument(
+ "-t",
+ "--ptype",
+ type=str,
+ default="netdev",
+ choices=["netdev", "internal"],
+ help="Interface type (default netdev)",
+ )
+ delifcmd = subparsers.add_parser("del-if")
+ delifcmd.add_argument("dpname", help="Datapath Name")
+ delifcmd.add_argument("delif", help="Interface name for adding")
+
+ dumpflcmd = subparsers.add_parser("dump-flows")
+ dumpflcmd.add_argument("dumpdp", help="Datapath Name")
+
+ addflcmd = subparsers.add_parser("add-flow")
+ addflcmd.add_argument("flbr", help="Datapath name")
+ addflcmd.add_argument("flow", help="Flow specification")
+ addflcmd.add_argument("acts", help="Flow actions")
+
+ delfscmd = subparsers.add_parser("del-flows")
+ delfscmd.add_argument("flsbr", help="Datapath name")
+
+ args = parser.parse_args()
+
+ if args.verbose > 0:
+ if args.verbose > 1:
+ logging.basicConfig(level=logging.DEBUG)
+
+ ovspk = OvsPacket()
+ ovsdp = OvsDatapath()
+ ovsvp = OvsVport(ovspk)
+ ovsflow = OvsFlow()
+ ndb = NDB()
+
+ sys.setrecursionlimit(100000)
+
+ if hasattr(args, "showdp"):
+ found = False
+ for iface in ndb.interfaces:
+ rep = None
+ if args.showdp is None:
+ rep = ovsdp.info(iface.ifname, 0)
+ elif args.showdp == iface.ifname:
+ rep = ovsdp.info(iface.ifname, 0)
+
+ if rep is not None:
+ found = True
+ print_ovsdp_full(rep, iface.index, ndb, ovsvp)
+
+ if not found:
+ msg = "No DP found"
+ if args.showdp is not None:
+ msg += ":'%s'" % args.showdp
+ print(msg)
+ elif hasattr(args, "adddp"):
+ rep = ovsdp.create(args.adddp, args.upcall, args.versioning, ovspk)
+ if rep is None:
+ print("DP '%s' already exists" % args.adddp)
+ else:
+ print("DP '%s' added" % args.adddp)
+ if args.upcall:
+ ovspk.upcall_handler(ovsflow)
+ elif hasattr(args, "deldp"):
+ ovsdp.destroy(args.deldp)
+ elif hasattr(args, "addif"):
+ rep = ovsdp.info(args.dpname, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dpname)
+ return 1
+ dpindex = rep["dpifindex"]
+ rep = ovsvp.attach(rep["dpifindex"], args.addif, args.ptype)
+ msg = "vport '%s'" % args.addif
+ if rep and rep["header"]["error"] is None:
+ msg += " added."
+ else:
+ msg += " failed to add."
+ if args.upcall:
+ if rep is None:
+ rep = ovsvp.reset_upcall(dpindex, args.addif, ovspk)
+ ovsvp.upcall_handler(ovsflow)
+ elif hasattr(args, "delif"):
+ rep = ovsdp.info(args.dpname, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dpname)
+ return 1
+ rep = ovsvp.detach(rep["dpifindex"], args.delif)
+ msg = "vport '%s'" % args.delif
+ if rep and rep["header"]["error"] is None:
+ msg += " removed."
+ else:
+ msg += " failed to remove."
+ elif hasattr(args, "dumpdp"):
+ rep = ovsdp.info(args.dumpdp, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.dumpdp)
+ return 1
+ rep = ovsflow.dump(rep["dpifindex"])
+ for flow in rep:
+ print(flow.dpstr(True if args.verbose > 0 else False))
+ elif hasattr(args, "flbr"):
+ rep = ovsdp.info(args.flbr, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.flbr)
+ return 1
+ flow = OvsFlow.ovs_flow_msg()
+ flow.parse(args.flow, args.acts, rep["dpifindex"])
+ ovsflow.add_flow(rep["dpifindex"], flow)
+ elif hasattr(args, "flsbr"):
+ rep = ovsdp.info(args.flsbr, 0)
+ if rep is None:
+ print("DP '%s' not found." % args.flsbr)
+ ovsflow.del_flows(rep["dpifindex"])
+
+ return 0
+
+
+if __name__ == "__main__":
+ sys.exit(main(sys.argv))
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 77c09cd339c3..cfc84958025a 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
#
# Check that route PMTU values match expectations, and that initial device MTU
@@ -26,6 +26,15 @@
# - pmtu_ipv6
# Same as pmtu_ipv4, except for locked PMTU tests, using IPv6
#
+# - pmtu_ipv4_dscp_icmp_exception
+# Set up the same network topology as pmtu_ipv4, but use non-default
+# routing table in A. A fib-rule is used to jump to this routing table
+# based on DSCP. Send ICMPv4 packets with the expected DSCP value and
+# verify that ECN doesn't interfere with the creation of PMTU exceptions.
+#
+# - pmtu_ipv4_dscp_udp_exception
+# Same as pmtu_ipv4_dscp_icmp_exception, but use UDP instead of ICMP.
+#
# - pmtu_ipv4_vxlan4_exception
# Set up the same network topology as pmtu_ipv4, create a VXLAN tunnel
# over IPv4 between A and B, routed via R1. On the link between R1 and B,
@@ -59,6 +68,45 @@
# Same as pmtu_ipv6_vxlan6_exception, but using a GENEVE tunnel instead of
# VXLAN
#
+# - pmtu_ipv{4,6}_br_vxlan{4,6}_exception
+# Set up three namespaces, A, B, and C, with routing between A and B over
+# R1. R2 is unused in these tests. A has a veth connection to C, and is
+# connected to B via a VXLAN endpoint, which is directly bridged to C.
+# MTU on the B-R1 link is lower than other MTUs.
+#
+# Check that both C and A are able to communicate with B over the VXLAN
+# tunnel, and that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth A B else: 5000
+# ' bridge |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_br_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_br_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
+# - pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception
+# Set up two namespaces, B, and C, with routing between the init namespace
+# and B over R1. A and R2 are unused in these tests. The init namespace
+# has a veth connection to C, and is connected to B via a VXLAN endpoint,
+# which is handled by Open vSwitch and bridged to C. MTU on the B-R1 link
+# is lower than other MTUs.
+#
+# Check that C is able to communicate with B over the VXLAN tunnel, and
+# that PMTU exceptions with the correct values are created.
+#
+# segment a_r1 segment b_r1 b_r1: 4000
+# .--------------R1--------------. everything
+# C---veth init B else: 5000
+# '- ovs |
+# '---- - - - - - VXLAN - - - - - - - '
+#
+# - pmtu_ipv{4,6}_ovs_geneve{4,6}_exception
+# Same as pmtu_ipv{4,6}_ovs_vxlan{4,6}_exception, with a GENEVE tunnel
+# instead.
+#
# - pmtu_ipv{4,6}_fou{4,6}_exception
# Same as pmtu_ipv4_vxlan4, but using a direct IPv4/IPv6 encapsulation
# (FoU) over IPv4/IPv6, instead of VXLAN
@@ -79,6 +127,16 @@
# below for IPv6 doesn't apply here, because, on IPv4, administrative MTU
# changes alone won't affect PMTU
#
+# - pmtu_vti4_udp_exception
+# Same as pmtu_vti4_exception, but using ESP-in-UDP
+#
+# - pmtu_vti4_udp_routed_exception
+# Set up vti tunnel on top of veth connected through routing namespace and
+# add xfrm states and policies with ESP-in-UDP encapsulation. Check that
+# route exception is not created if link layer MTU is not exceeded, then
+# lower MTU on second part of routed environment and check that exception
+# is created with the expected PMTU.
+#
# - pmtu_vti6_exception
# Set up vti6 tunnel on top of veth, with xfrm states and policies, in two
# namespaces with matching endpoints. Check that route exception is
@@ -86,6 +144,13 @@
# decrease and increase MTU of tunnel, checking that route exception PMTU
# changes accordingly
#
+# - pmtu_vti6_udp_exception
+# Same as pmtu_vti6_exception, but using ESP-in-UDP
+#
+# - pmtu_vti6_udp_routed_exception
+# Same as pmtu_vti6_udp_routed_exception but with routing between vti
+# endpoints
+#
# - pmtu_vti4_default_mtu
# Set up vti4 tunnel on top of veth, in two namespaces with matching
# endpoints. Check that MTU assigned to vti interface is the MTU of the
@@ -123,10 +188,18 @@
# - list_flush_ipv6_exception
# Using the same topology as in pmtu_ipv6, create exceptions, and check
# they are shown when listing exception caches, gone after flushing them
+#
+# - pmtu_ipv4_route_change
+# Use the same topology as in pmtu_ipv4, but issue a route replacement
+# command and delete the corresponding device afterward. This tests for
+# proper cleanup of the PMTU exceptions by the route replacement path.
+# Device unregistration should complete successfully
+#
+# - pmtu_ipv6_route_change
+# Same as above but with IPv6
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
+source net_helper.sh
PAUSE_ON_FAIL=no
VERBOSE=0
@@ -139,6 +212,8 @@ which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
tests="
pmtu_ipv4_exception ipv4: PMTU exceptions 1
pmtu_ipv6_exception ipv6: PMTU exceptions 1
+ pmtu_ipv4_dscp_icmp_exception ICMPv4 with DSCP and ECN: PMTU exceptions 1
+ pmtu_ipv4_dscp_udp_exception UDPv4 with DSCP and ECN: PMTU exceptions 1
pmtu_ipv4_vxlan4_exception IPv4 over vxlan4: PMTU exceptions 1
pmtu_ipv6_vxlan4_exception IPv6 over vxlan4: PMTU exceptions 1
pmtu_ipv4_vxlan6_exception IPv4 over vxlan6: PMTU exceptions 1
@@ -147,6 +222,22 @@ tests="
pmtu_ipv6_geneve4_exception IPv6 over geneve4: PMTU exceptions 1
pmtu_ipv4_geneve6_exception IPv4 over geneve6: PMTU exceptions 1
pmtu_ipv6_geneve6_exception IPv6 over geneve6: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan4_exception IPv4, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan4_exception IPv6, bridged vxlan4: PMTU exceptions 1
+ pmtu_ipv4_br_vxlan6_exception IPv4, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv6_br_vxlan6_exception IPv6, bridged vxlan6: PMTU exceptions 1
+ pmtu_ipv4_br_geneve4_exception IPv4, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv6_br_geneve4_exception IPv6, bridged geneve4: PMTU exceptions 1
+ pmtu_ipv4_br_geneve6_exception IPv4, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv6_br_geneve6_exception IPv6, bridged geneve6: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan4_exception IPv4, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan4_exception IPv6, OVS vxlan4: PMTU exceptions 1
+ pmtu_ipv4_ovs_vxlan6_exception IPv4, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv6_ovs_vxlan6_exception IPv6, OVS vxlan6: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve4_exception IPv4, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve4_exception IPv6, OVS geneve4: PMTU exceptions 1
+ pmtu_ipv4_ovs_geneve6_exception IPv4, OVS geneve6: PMTU exceptions 1
+ pmtu_ipv6_ovs_geneve6_exception IPv6, OVS geneve6: PMTU exceptions 1
pmtu_ipv4_fou4_exception IPv4 over fou4: PMTU exceptions 1
pmtu_ipv6_fou4_exception IPv6 over fou4: PMTU exceptions 1
pmtu_ipv4_fou6_exception IPv4 over fou6: PMTU exceptions 1
@@ -161,6 +252,10 @@ tests="
pmtu_ipv6_ipv6_exception IPv6 over IPv6: PMTU exceptions 1
pmtu_vti6_exception vti6: PMTU exceptions 0
pmtu_vti4_exception vti4: PMTU exceptions 0
+ pmtu_vti6_udp_exception vti6: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti4_udp_exception vti4: PMTU exceptions (ESP-in-UDP) 0
+ pmtu_vti6_udp_routed_exception vti6: PMTU exceptions, routed (ESP-in-UDP) 0
+ pmtu_vti4_udp_routed_exception vti4: PMTU exceptions, routed (ESP-in-UDP) 0
pmtu_vti4_default_mtu vti4: default MTU assignment 0
pmtu_vti6_default_mtu vti6: default MTU assignment 0
pmtu_vti4_link_add_mtu vti4: MTU setting on link creation 0
@@ -169,16 +264,9 @@ tests="
cleanup_ipv4_exception ipv4: cleanup of cached exceptions 1
cleanup_ipv6_exception ipv6: cleanup of cached exceptions 1
list_flush_ipv4_exception ipv4: list and flush cached exceptions 1
- list_flush_ipv6_exception ipv6: list and flush cached exceptions 1"
-
-NS_A="ns-A"
-NS_B="ns-B"
-NS_R1="ns-R1"
-NS_R2="ns-R2"
-ns_a="ip netns exec ${NS_A}"
-ns_b="ip netns exec ${NS_B}"
-ns_r1="ip netns exec ${NS_R1}"
-ns_r2="ip netns exec ${NS_R2}"
+ list_flush_ipv6_exception ipv6: list and flush cached exceptions 1
+ pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1
+ pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1"
# Addressing and routing for tests with routers: four network segments, with
# index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an
@@ -212,7 +300,6 @@ routes="
A ${prefix6}:${b_r2}::1 ${prefix6}:${a_r2}::2
B default ${prefix6}:${b_r1}::2
"
-
USE_NH="no"
# ns family nh id destination gateway
nexthops="
@@ -237,11 +324,16 @@ routes_nh="
B 6 default 61
"
+policy_mark=0x04
+rt_table=main
+
veth4_a_addr="192.168.1.1"
veth4_b_addr="192.168.1.2"
+veth4_c_addr="192.168.2.10"
veth4_mask="24"
veth6_a_addr="fd00:1::a"
veth6_b_addr="fd00:1::b"
+veth6_c_addr="fd00:2::c"
veth6_mask="64"
tunnel4_a_addr="192.168.2.1"
@@ -257,6 +349,9 @@ dummy6_mask="64"
err_buf=
tcpdump_pids=
+nettest_pids=
+socat_pids=
+tmpoutfile=
err() {
err_buf="${err_buf}${1}
@@ -285,6 +380,16 @@ run_cmd() {
return $rc
}
+run_cmd_bg() {
+ cmd="$*"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf " COMMAND: %s &\n" "${cmd}"
+ fi
+
+ $cmd 2>&1 &
+}
+
# Find the auto-generated name for this namespace
nsname() {
eval echo \$NS_$1
@@ -296,7 +401,7 @@ setup_fou_or_gue() {
encap="${3}"
if [ "${outer}" = "4" ]; then
- modprobe fou || return 2
+ modprobe fou || return $ksft_skip
a_addr="${prefix4}.${a_r1}.1"
b_addr="${prefix4}.${b_r1}.1"
if [ "${inner}" = "4" ]; then
@@ -307,7 +412,7 @@ setup_fou_or_gue() {
ipproto="41"
fi
else
- modprobe fou6 || return 2
+ modprobe fou6 || return $ksft_skip
a_addr="${prefix6}:${a_r1}::1"
b_addr="${prefix6}:${b_r1}::1"
if [ "${inner}" = "4" ]; then
@@ -321,8 +426,8 @@ setup_fou_or_gue() {
fi
fi
- run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return 2
- run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return 2
+ run_cmd ${ns_a} ip fou add port 5555 ipproto ${ipproto} || return $ksft_skip
+ run_cmd ${ns_a} ip link add ${encap}_a type ${type} ${mode} local ${a_addr} remote ${b_addr} encap ${encap} encap-sport auto encap-dport 5556 || return $ksft_skip
run_cmd ${ns_b} ip fou add port 5556 ipproto ${ipproto}
run_cmd ${ns_b} ip link add ${encap}_b type ${type} ${mode} local ${b_addr} remote ${a_addr} encap ${encap} encap-sport auto encap-dport 5555
@@ -396,7 +501,7 @@ setup_ipvX_over_ipvY() {
fi
fi
- run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+ run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return $ksft_skip
run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
run_cmd ${ns_a} ip link set ip_a up
@@ -428,13 +533,17 @@ setup_ip6ip6() {
}
setup_namespaces() {
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
- ip netns add ${n} || return 1
-
+ setup_ns NS_A NS_B NS_C NS_R1 NS_R2
+ for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do
# Disable DAD, so that we don't have to wait to use the
# configured IPv6 addresses
ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0
done
+ ns_a="ip netns exec ${NS_A}"
+ ns_b="ip netns exec ${NS_B}"
+ ns_c="ip netns exec ${NS_C}"
+ ns_r1="ip netns exec ${NS_R1}"
+ ns_r2="ip netns exec ${NS_R2}"
}
setup_veth() {
@@ -479,11 +588,20 @@ setup_vti6() {
setup_vti 6 ${veth6_a_addr} ${veth6_b_addr} ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
}
+setup_vti4routed() {
+ setup_vti 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 ${tunnel4_a_addr} ${tunnel4_b_addr} ${tunnel4_mask}
+}
+
+setup_vti6routed() {
+ setup_vti 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ${tunnel6_a_addr} ${tunnel6_b_addr} ${tunnel6_mask}
+}
+
setup_vxlan_or_geneve() {
type="${1}"
a_addr="${2}"
b_addr="${3}"
opts="${4}"
+ br_if_a="${5}"
if [ "${type}" = "vxlan" ]; then
opts="${opts} ttl 64 dstport 4789"
@@ -497,10 +615,16 @@ setup_vxlan_or_geneve() {
run_cmd ${ns_a} ip link add ${type}_a type ${type} id 1 ${opts_a} remote ${b_addr} ${opts} || return 1
run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts}
- run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
- run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ if [ -n "${br_if_a}" ]; then
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${br_if_a}
+ run_cmd ${ns_a} ip link set ${type}_a master ${br_if_a}
+ else
+ run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ${type}_a
+ run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ fi
- run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ${type}_a
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
run_cmd ${ns_a} ip link set ${type}_a up
@@ -516,29 +640,65 @@ setup_vxlan4() {
}
setup_geneve6() {
- setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
}
setup_vxlan6() {
- setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 ""
+}
+
+setup_bridged_geneve4() {
+ setup_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_vxlan4() {
+ setup_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "df set" "br0"
+}
+
+setup_bridged_geneve6() {
+ setup_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
+}
+
+setup_bridged_vxlan6() {
+ setup_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "" "br0"
}
setup_xfrm() {
proto=${1}
veth_a_addr="${2}"
veth_b_addr="${3}"
+ encap=${4}
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel || return 1
- run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap} || return 1
+ run_cmd ${ns_a} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_a} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
run_cmd ${ns_a} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
- run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_a_addr} dst ${veth_b_addr} spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
+ run_cmd ${ns_b} ip -${proto} xfrm state add src ${veth_b_addr} dst ${veth_a_addr} spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode tunnel ${encap}
run_cmd ${ns_b} ip -${proto} xfrm policy add dir out mark 10 tmpl src ${veth_b_addr} dst ${veth_a_addr} proto esp mode tunnel
run_cmd ${ns_b} ip -${proto} xfrm policy add dir in mark 10 tmpl src ${veth_a_addr} dst ${veth_b_addr} proto esp mode tunnel
}
+setup_nettest_xfrm() {
+ if ! which nettest >/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which nettest >/dev/null; then
+ echo "'nettest' command not found; skipping tests"
+ return 1
+ fi
+ fi
+
+ [ ${1} -eq 6 ] && proto="-6" || proto=""
+ port=${2}
+
+ run_cmd_bg "${ns_a}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+ nettest_pids="${nettest_pids} $!"
+
+ run_cmd_bg "${ns_b}" nettest "${proto}" -q -D -s -x -p "${port}" -t 5
+ nettest_pids="${nettest_pids} $!"
+}
+
setup_xfrm4() {
setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr}
}
@@ -547,6 +707,26 @@ setup_xfrm6() {
setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr}
}
+setup_xfrm4udp() {
+ setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udp() {
+ setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 6 4500
+}
+
+setup_xfrm4udprouted() {
+ setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 4 4500
+}
+
+setup_xfrm6udprouted() {
+ setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \
+ setup_nettest_xfrm 6 4500
+}
+
setup_routing_old() {
for i in ${routes}; do
[ "${ns}" = "" ] && ns="${i}" && continue
@@ -555,7 +735,7 @@ setup_routing_old() {
ns_name="$(nsname ${ns})"
- ip -n ${ns_name} route add ${addr} via ${gw}
+ ip -n "${ns_name}" route add "${addr}" table "${rt_table}" via "${gw}"
ns=""; addr=""; gw=""
done
@@ -585,7 +765,7 @@ setup_routing_new() {
ns_name="$(nsname ${ns})"
- ip -n ${ns_name} -${fam} route add ${addr} nhid ${nhid}
+ ip -n "${ns_name}" -"${fam}" route add "${addr}" table "${rt_table}" nhid "${nhid}"
ns=""; fam=""; addr=""; nhid=""
done
@@ -630,10 +810,101 @@ setup_routing() {
return 0
}
+setup_policy_routing() {
+ setup_routing
+
+ ip -netns "${NS_A}" -4 rule add dsfield "${policy_mark}" \
+ table "${rt_table}"
+
+ # Set the IPv4 Don't Fragment bit with tc, since socat doesn't seem to
+ # have an option do to it.
+ tc -netns "${NS_A}" qdisc replace dev veth_A-R1 root prio
+ tc -netns "${NS_A}" qdisc replace dev veth_A-R2 root prio
+ tc -netns "${NS_A}" filter add dev veth_A-R1 \
+ protocol ipv4 flower ip_proto udp \
+ action pedit ex munge ip df set 0x40 pipe csum ip and udp
+ tc -netns "${NS_A}" filter add dev veth_A-R2 \
+ protocol ipv4 flower ip_proto udp \
+ action pedit ex munge ip df set 0x40 pipe csum ip and udp
+}
+
+setup_bridge() {
+ run_cmd ${ns_a} ip link add br0 type bridge || return $ksft_skip
+ run_cmd ${ns_a} ip link set br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A}
+
+ run_cmd ${ns_a} ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ${ns_a} ip link set veth_A-C master br0
+}
+
+setup_ovs_vxlan_or_geneve() {
+ type="${1}"
+ a_addr="${2}"
+ b_addr="${3}"
+
+ if [ "${type}" = "vxlan" ]; then
+ opts="${opts} ttl 64 dstport 4789"
+ opts_b="local ${b_addr}"
+ fi
+
+ run_cmd ovs-vsctl add-port ovs_br0 ${type}_a -- \
+ set interface ${type}_a type=${type} \
+ options:remote_ip=${b_addr} options:key=1 options:csum=true || return 1
+
+ run_cmd ${ns_b} ip link add ${type}_b type ${type} id 1 ${opts_b} remote ${a_addr} ${opts} || return 1
+
+ run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ${type}_b
+ run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ${type}_b
+
+ run_cmd ${ns_b} ip link set ${type}_b up
+}
+
+setup_ovs_geneve4() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_vxlan4() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1
+}
+
+setup_ovs_geneve6() {
+ setup_ovs_vxlan_or_geneve geneve ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_vxlan6() {
+ setup_ovs_vxlan_or_geneve vxlan ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1
+}
+
+setup_ovs_bridge() {
+ run_cmd ovs-vsctl add-br ovs_br0 || return $ksft_skip
+ run_cmd ip link set ovs_br0 up
+
+ run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C
+ run_cmd ${ns_c} ip link set veth_A-C netns 1
+
+ run_cmd ip link set veth_A-C up
+ run_cmd ${ns_c} ip link set veth_C-A up
+ run_cmd ${ns_c} ip addr add ${veth4_c_addr}/${veth4_mask} dev veth_C-A
+ run_cmd ${ns_c} ip addr add ${veth6_c_addr}/${veth6_mask} dev veth_C-A
+ run_cmd ovs-vsctl add-port ovs_br0 veth_A-C
+
+ # Move veth_A-R1 to init
+ run_cmd ${ns_a} ip link set veth_A-R1 netns 1
+ run_cmd ip addr add ${prefix4}.${a_r1}.1/${veth4_mask} dev veth_A-R1
+ run_cmd ip addr add ${prefix6}:${a_r1}::1/${veth6_mask} dev veth_A-R1
+ run_cmd ip link set veth_A-R1 up
+ run_cmd ip route add ${prefix4}.${b_r1}.1 via ${prefix4}.${a_r1}.2
+ run_cmd ip route add ${prefix6}:${b_r1}::1 via ${prefix6}:${a_r1}::2
+}
+
setup() {
[ "$(id -u)" -ne 0 ] && echo " need to run as root" && return $ksft_skip
- cleanup
for arg do
eval setup_${arg} || { echo " ${arg} not supported"; return 1; }
done
@@ -644,7 +915,7 @@ trace() {
for arg do
[ "${ns_cmd}" = "" ] && ns_cmd="${arg}" && continue
- ${ns_cmd} tcpdump -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
+ ${ns_cmd} tcpdump --immediate-mode -s 0 -i "${arg}" -w "${name}_${arg}.pcap" 2> /dev/null &
tcpdump_pids="${tcpdump_pids} $!"
ns_cmd=
done
@@ -657,9 +928,23 @@ cleanup() {
done
tcpdump_pids=
- for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
- ip netns del ${n} 2> /dev/null
+ for pid in ${nettest_pids}; do
+ kill ${pid}
done
+ nettest_pids=
+
+ for pid in ${socat_pids}; do
+ kill "${pid}"
+ done
+ socat_pids=
+
+ cleanup_all_ns
+
+ ip link del veth_A-C 2>/dev/null
+ ip link del veth_A-R1 2>/dev/null
+ ovs-vsctl --if-exists del-port vxlan_a 2>/dev/null
+ ovs-vsctl --if-exists del-br ovs_br0 2>/dev/null
+ rm -f "$tmpoutfile"
}
mtu() {
@@ -699,15 +984,21 @@ link_get_mtu() {
route_get_dst_exception() {
ns_cmd="${1}"
dst="${2}"
+ dsfield="${3}"
- ${ns_cmd} ip route get "${dst}"
+ if [ -z "${dsfield}" ]; then
+ dsfield=0
+ fi
+
+ ${ns_cmd} ip route get "${dst}" dsfield "${dsfield}"
}
route_get_dst_pmtu_from_exception() {
ns_cmd="${1}"
dst="${2}"
+ dsfield="${3}"
- mtu_parse "$(route_get_dst_exception "${ns_cmd}" ${dst})"
+ mtu_parse "$(route_get_dst_exception "${ns_cmd}" "${dst}" "${dsfield}")"
}
check_pmtu_value() {
@@ -726,7 +1017,7 @@ check_pmtu_value() {
test_pmtu_ipvX() {
family=${1}
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -817,6 +1108,95 @@ test_pmtu_ipv6_exception() {
test_pmtu_ipvX 6
}
+test_pmtu_ipv4_dscp_icmp_exception() {
+ rt_table=100
+
+ setup namespaces policy_routing || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1400
+ mtu "${ns_b}" veth_B-R1 1400
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+ dst1="${prefix4}.${b_r1}.1"
+ dst2="${prefix4}.${b_r2}.1"
+
+ # Create route exceptions
+ dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+ run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst1}"
+
+ dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+ run_cmd "${ns_a}" ping -q -M want -Q "${dsfield}" -c 1 -w 1 -s "${len}" "${dst2}"
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
+test_pmtu_ipv4_dscp_udp_exception() {
+ rt_table=100
+
+ if ! which socat > /dev/null 2>&1; then
+ echo "'socat' command not found; skipping tests"
+ return $ksft_skip
+ fi
+
+ setup namespaces policy_routing || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1400
+ mtu "${ns_b}" veth_B-R1 1400
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ len=$((2000 - 20 - 8)) # Fills MTU of veth_A-R1
+
+ dst1="${prefix4}.${b_r1}.1"
+ dst2="${prefix4}.${b_r2}.1"
+
+ # Create route exceptions
+ run_cmd_bg "${ns_b}" socat UDP-LISTEN:50000 OPEN:/dev/null,wronly=1
+ socat_pids="${socat_pids} $!"
+
+ dsfield=${policy_mark} # No ECN bit set (Not-ECT)
+ run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+ UDP:"${dst1}":50000,tos="${dsfield}"
+
+ dsfield=$(printf "%#x" $((policy_mark + 0x02))) # ECN=2 (ECT(0))
+ run_cmd "${ns_a}" socat OPEN:/dev/zero,rdonly=1,readbytes="${len}" \
+ UDP:"${dst2}":50000,tos="${dsfield}"
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst1}" "${policy_mark}")"
+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" "${dst2}" "${policy_mark}")"
+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+}
+
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
type=${1}
family=${2}
@@ -824,11 +1204,11 @@ test_pmtu_ipvX_over_vxlanY_or_geneveY_exception() {
ll_mtu=4000
if [ ${outer_family} -eq 4 ]; then
- setup namespaces routing ${type}4 || return 2
+ setup namespaces routing ${type}4 || return $ksft_skip
# IPv4 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
else
- setup namespaces routing ${type}6 || return 2
+ setup namespaces routing ${type}6 || return $ksft_skip
# IPv6 header UDP header VXLAN/GENEVE header Ethernet header
exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
fi
@@ -892,13 +1272,219 @@ test_pmtu_ipv6_geneve6_exception() {
test_pmtu_ipvX_over_vxlanY_or_geneveY_exception geneve 6 6
}
+test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing bridge bridged_${type}4 || return $ksft_skip
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing bridge bridged_${type}6 || return $ksft_skip
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ trace "${ns_a}" ${type}_a "${ns_b}" ${type}_b \
+ "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "${ns_a}" br0 "${ns_a}" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" br0 $((${ll_mtu} + 1000))
+ mtu "${ns_a}" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "${ns_a}" ${type}_a $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 10 -s $((${ll_mtu} + 500)) ${dst} || return 1
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on locally bridged ${type} interface"
+
+ tmpoutfile=$(mktemp)
+
+ # Flush Exceptions, retry with TCP
+ run_cmd ${ns_a} ip route flush cached ${dst}
+ run_cmd ${ns_b} ip route flush cached ${dst}
+ run_cmd ${ns_c} ip route flush cached ${dst}
+
+ for target in "${ns_a}" "${ns_c}" ; do
+ if [ ${family} -eq 4 ]; then
+ TCPDST=TCP:${dst}:50000
+ else
+ TCPDST="TCP:[${dst}]:50000"
+ fi
+ ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile &
+ local socat_pid=$!
+
+ wait_local_port_listen ${NS_B} 50000 tcp
+
+ dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3
+
+ wait ${socat_pid}
+ size=$(du -sb $tmpoutfile)
+ size=${size%%/tmp/*}
+
+ [ $size -ne 1048576 ] && err "File size $size mismatches exepcted value in locally bridged vxlan test" && return 1
+ done
+
+ rm -f "$tmpoutfile"
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp: exceeding link layer MTU on bridged ${type} interface"
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "tcp exceeding link layer MTU on locally bridged ${type} interface"
+}
+
+test_pmtu_ipv4_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_br_vxlan4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_br_geneve4_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_br_vxlan6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_br_geneve6_exception() {
+ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception geneve 6 6
+}
+
+test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception() {
+ type=${1}
+ family=${2}
+ outer_family=${3}
+ ll_mtu=4000
+
+ if [ ${outer_family} -eq 4 ]; then
+ setup namespaces routing ovs_bridge ovs_${type}4 || return $ksft_skip
+ # IPv4 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 20 - 8 - 8 - 14))
+ else
+ setup namespaces routing ovs_bridge ovs_${type}6 || return $ksft_skip
+ # IPv6 header UDP header VXLAN/GENEVE header Ethernet header
+ exp_mtu=$((${ll_mtu} - 40 - 8 - 8 - 14))
+ fi
+
+ if [ "${type}" = "vxlan" ]; then
+ tun_a="vxlan_sys_4789"
+ elif [ "${type}" = "geneve" ]; then
+ tun_a="genev_sys_6081"
+ fi
+
+ trace "" "${tun_a}" "${ns_b}" ${type}_b \
+ "" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B \
+ "" ovs_br0 "" veth-A-C \
+ "${ns_c}" veth_C-A
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst=${tunnel4_b_addr}
+ else
+ ping=${ping6}
+ dst=${tunnel6_b_addr}
+ fi
+
+ # Create route exception by exceeding link layer MTU
+ mtu "" veth_A-R1 $((${ll_mtu} + 1000))
+ mtu "" ovs_br0 $((${ll_mtu} + 1000))
+ mtu "" veth_A-C $((${ll_mtu} + 1000))
+ mtu "${ns_c}" veth_C-A $((${ll_mtu} + 1000))
+ mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+ mtu "${ns_b}" veth_B-R1 ${ll_mtu}
+ mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+ mtu "" ${tun_a} $((${ll_mtu} + 1000))
+ mtu "${ns_b}" ${type}_b $((${ll_mtu} + 1000))
+
+ run_cmd ${ns_c} ${ping} -q -M want -i 0.1 -c 20 -s $((${ll_mtu} + 500)) ${dst} || return 1
+
+ # Check that exceptions were created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_c}" ${dst})"
+ check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on Open vSwitch ${type} interface"
+}
+
+test_pmtu_ipv4_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 4
+}
+
+test_pmtu_ipv6_ovs_vxlan4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 4
+}
+
+test_pmtu_ipv4_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 4
+}
+
+test_pmtu_ipv6_ovs_geneve4_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 4
+}
+
+test_pmtu_ipv4_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 4 6
+}
+
+test_pmtu_ipv6_ovs_vxlan6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception vxlan 6 6
+}
+
+test_pmtu_ipv4_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 4 6
+}
+
+test_pmtu_ipv6_ovs_geneve6_exception() {
+ test_pmtu_ipvX_over_ovs_vxlanY_or_geneveY_exception geneve 6 6
+}
+
test_pmtu_ipvX_over_fouY_or_gueY() {
inner_family=${1}
outer_family=${2}
encap=${3}
ll_mtu=4000
- setup namespaces routing ${encap}${outer_family}${inner_family} || return 2
+ setup namespaces routing ${encap}${outer_family}${inner_family} || return $ksft_skip
trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
@@ -977,7 +1563,7 @@ test_pmtu_ipvX_over_ipvY_exception() {
outer=${2}
ll_mtu=4000
- setup namespaces routing ip${inner}ip${outer} || return 2
+ setup namespaces routing ip${inner}ip${outer} || return $ksft_skip
trace "${ns_a}" ip_a "${ns_b}" ip_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
@@ -1031,7 +1617,7 @@ test_pmtu_ipv6_ipv6_exception() {
}
test_pmtu_vti4_exception() {
- setup namespaces veth vti4 xfrm4 || return 2
+ setup namespaces veth vti4 xfrm4 || return $ksft_skip
trace "${ns_a}" veth_a "${ns_b}" veth_b \
"${ns_a}" vti4_a "${ns_b}" vti4_b
@@ -1061,7 +1647,67 @@ test_pmtu_vti4_exception() {
}
test_pmtu_vti6_exception() {
- setup namespaces veth vti6 xfrm6 || return 2
+ setup namespaces veth vti6 xfrm6 || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+ fail=0
+
+ # Create route exception by exceeding link layer MTU
+ mtu "${ns_a}" veth_a 4000
+ mtu "${ns_b}" veth_b 4000
+ mtu "${ns_a}" vti6_a 5000
+ mtu "${ns_b}" vti6_b 5000
+ run_cmd ${ns_a} ${ping6} -q -i 0.1 -w 1 -s 60000 ${tunnel6_b_addr}
+
+ # Check that exception was created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value any "${pmtu}" "creating tunnel exceeding link layer MTU" || return 1
+
+ # Decrease tunnel MTU, check for PMTU decrease in route exception
+ mtu "${ns_a}" vti6_a 3000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "3000" "${pmtu}" "decreasing tunnel MTU" || fail=1
+
+ # Increase tunnel MTU, check for PMTU increase in route exception
+ mtu "${ns_a}" vti6_a 9000
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "9000" "${pmtu}" "increasing tunnel MTU" || fail=1
+
+ return ${fail}
+}
+
+test_pmtu_vti4_udp_exception() {
+ setup namespaces veth vti4 xfrm4udp || return $ksft_skip
+ trace "${ns_a}" veth_a "${ns_b}" veth_b \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_a ${veth_mtu}
+ mtu "${ns_b}" veth_b ${veth_mtu}
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now exceed link layer MTU by one byte, check that exception is created
+ # with the right PMTU value
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload + 1)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "${esp_payload_rfc4106}" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106 + 1)))"
+}
+
+test_pmtu_vti6_udp_exception() {
+ setup namespaces veth vti6 xfrm6udp || return $ksft_skip
trace "${ns_a}" veth_a "${ns_b}" veth_b \
"${ns_a}" vti6_a "${ns_b}" vti6_b
fail=0
@@ -1090,8 +1736,77 @@ test_pmtu_vti6_exception() {
return ${fail}
}
+test_pmtu_vti4_udp_routed_exception() {
+ setup namespaces routing vti4routed xfrm4udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti4_a "${ns_b}" vti4_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 20))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 28))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ mtu "${ns_a}" vti4_a ${vti_mtu}
+ mtu "${ns_b}" vti4_b ${vti_mtu}
+
+ # Send DF packet without exceeding link layer MTU, check that no
+ # exception is created
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ping -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel4_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel4_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+}
+
+test_pmtu_vti6_udp_routed_exception() {
+ setup namespaces routing vti6routed xfrm6udprouted || return $ksft_skip
+ trace "${ns_a}" veth_A-R1 "${ns_b}" veth_B-R1 \
+ "${ns_a}" vti6_a "${ns_b}" vti6_b
+
+ veth_mtu=1500
+ vti_mtu=$((veth_mtu - 40))
+
+ # UDP SPI SN IV ICV pad length next header
+ esp_payload_rfc4106=$((vti_mtu - 8 - 4 - 4 - 8 - 16 - 1 - 1))
+ ping_payload=$((esp_payload_rfc4106 - 48))
+
+ mtu "${ns_a}" veth_A-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-A ${veth_mtu}
+ mtu "${ns_b}" veth_B-R1 ${veth_mtu}
+ mtu "${ns_r1}" veth_R1-B ${veth_mtu}
+
+ # mtu "${ns_a}" vti6_a ${vti_mtu}
+ # mtu "${ns_b}" vti6_b ${vti_mtu}
+
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s ${ping_payload} ${tunnel6_b_addr}
+
+ # Check that exception was not created
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "" "${pmtu}" "sending packet smaller than PMTU (IP payload length ${esp_payload_rfc4106})" || return 1
+
+ # Now decrease link layer MTU by 8 bytes on R1, check that exception is created
+ # with the right PMTU value
+ mtu "${ns_r1}" veth_R1-B $((veth_mtu - 8))
+ run_cmd ${ns_a} ${ping6} -q -M want -i 0.1 -w 1 -s $((ping_payload)) ${tunnel6_b_addr}
+ pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${tunnel6_b_addr})"
+ check_pmtu_value "$((esp_payload_rfc4106 - 8))" "${pmtu}" "exceeding PMTU (IP payload length $((esp_payload_rfc4106)))"
+
+}
+
test_pmtu_vti4_default_mtu() {
- setup namespaces veth vti4 || return 2
+ setup namespaces veth vti4 || return $ksft_skip
# Check that MTU of vti device is MTU of veth minus IPv4 header length
veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1103,7 +1818,7 @@ test_pmtu_vti4_default_mtu() {
}
test_pmtu_vti6_default_mtu() {
- setup namespaces veth vti6 || return 2
+ setup namespaces veth vti6 || return $ksft_skip
# Check that MTU of vti device is MTU of veth minus IPv6 header length
veth_mtu="$(link_get_mtu "${ns_a}" veth_a)"
@@ -1115,10 +1830,10 @@ test_pmtu_vti6_default_mtu() {
}
test_pmtu_vti4_link_add_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add vti4_a type vti local ${veth4_a_addr} remote ${veth4_b_addr} key 10
- [ $? -ne 0 ] && err " vti not supported" && return 2
+ [ $? -ne 0 ] && err " vti not supported" && return $ksft_skip
run_cmd ${ns_a} ip link del vti4_a
fail=0
@@ -1153,10 +1868,10 @@ test_pmtu_vti4_link_add_mtu() {
}
test_pmtu_vti6_link_add_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add vti6_a type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
- [ $? -ne 0 ] && err " vti6 not supported" && return 2
+ [ $? -ne 0 ] && err " vti6 not supported" && return $ksft_skip
run_cmd ${ns_a} ip link del vti6_a
fail=0
@@ -1191,10 +1906,10 @@ test_pmtu_vti6_link_add_mtu() {
}
test_pmtu_vti6_link_change_mtu() {
- setup namespaces || return 2
+ setup namespaces || return $ksft_skip
run_cmd ${ns_a} ip link add dummy0 mtu 1500 type dummy
- [ $? -ne 0 ] && err " dummy not supported" && return 2
+ [ $? -ne 0 ] && err " dummy not supported" && return $ksft_skip
run_cmd ${ns_a} ip link add dummy1 mtu 3000 type dummy
run_cmd ${ns_a} ip link set dummy0 up
run_cmd ${ns_a} ip link set dummy1 up
@@ -1242,15 +1957,22 @@ check_command() {
return 0
}
+check_running() {
+ pid=${1}
+ cmd=${2}
+
+ [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ]
+}
+
test_cleanup_vxlanX_exception() {
outer="${1}"
encap="vxlan"
ll_mtu=4000
- check_command taskset || return 2
+ check_command taskset || return $ksft_skip
cpu_list=$(grep -m 2 processor /proc/cpuinfo | cut -d ' ' -f 2)
- setup namespaces routing ${encap}${outer} || return 2
+ setup namespaces routing ${encap}${outer} || return $ksft_skip
trace "${ns_a}" ${encap}_a "${ns_b}" ${encap}_b \
"${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_b}" veth_B-R1 "${ns_r1}" veth_R1-B
@@ -1272,11 +1994,12 @@ test_cleanup_vxlanX_exception() {
${ns_a} ip link del dev veth_A-R1 &
iplink_pid=$!
- sleep 1
- if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then
- err " can't delete veth device in a timely manner, PMTU dst likely leaked"
- return 1
- fi
+ for i in $(seq 1 20); do
+ check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0
+ sleep 0.1
+ done
+ err " can't delete veth device in a timely manner, PMTU dst likely leaked"
+ return 1
}
test_cleanup_ipv6_exception() {
@@ -1294,6 +2017,10 @@ run_test() {
unset IFS
+ # Since cleanup() relies on variables modified by this subshell, it
+ # has to run in this context.
+ trap cleanup EXIT
+
if [ "$VERBOSE" = "1" ]; then
printf "\n##########################################################################\n\n"
fi
@@ -1312,7 +2039,7 @@ run_test() {
fi
err_flush
exit 1
- elif [ $ret -eq 2 ]; then
+ elif [ $ret -eq $ksft_skip ]; then
printf "TEST: %-60s [SKIP]\n" "${tdesc}"
err_flush
fi
@@ -1320,7 +2047,19 @@ run_test() {
return $ret
)
ret=$?
- [ $ret -ne 0 ] && exitcode=1
+ case $ret in
+ 0)
+ all_skipped=false
+ [ $exitcode -eq $ksft_skip ] && exitcode=0
+ ;;
+ $ksft_skip)
+ [ $all_skipped = true ] && exitcode=$ksft_skip
+ ;;
+ *)
+ all_skipped=false
+ exitcode=1
+ ;;
+ esac
return $ret
}
@@ -1335,7 +2074,7 @@ run_test_nh() {
}
test_list_flush_ipv4_exception() {
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -1389,7 +2128,7 @@ test_list_flush_ipv4_exception() {
}
test_list_flush_ipv6_exception() {
- setup namespaces routing || return 2
+ setup namespaces routing || return $ksft_skip
trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
"${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
"${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
@@ -1438,6 +2177,63 @@ test_list_flush_ipv6_exception() {
return ${fail}
}
+test_pmtu_ipvX_route_change() {
+ family=${1}
+
+ setup namespaces routing || return 2
+ trace "${ns_a}" veth_A-R1 "${ns_r1}" veth_R1-A \
+ "${ns_r1}" veth_R1-B "${ns_b}" veth_B-R1 \
+ "${ns_a}" veth_A-R2 "${ns_r2}" veth_R2-A \
+ "${ns_r2}" veth_R2-B "${ns_b}" veth_B-R2
+
+ if [ ${family} -eq 4 ]; then
+ ping=ping
+ dst1="${prefix4}.${b_r1}.1"
+ dst2="${prefix4}.${b_r2}.1"
+ gw="${prefix4}.${a_r1}.2"
+ else
+ ping=${ping6}
+ dst1="${prefix6}:${b_r1}::1"
+ dst2="${prefix6}:${b_r2}::1"
+ gw="${prefix6}:${a_r1}::2"
+ fi
+
+ # Set up initial MTU values
+ mtu "${ns_a}" veth_A-R1 2000
+ mtu "${ns_r1}" veth_R1-A 2000
+ mtu "${ns_r1}" veth_R1-B 1400
+ mtu "${ns_b}" veth_B-R1 1400
+
+ mtu "${ns_a}" veth_A-R2 2000
+ mtu "${ns_r2}" veth_R2-A 2000
+ mtu "${ns_r2}" veth_R2-B 1500
+ mtu "${ns_b}" veth_B-R2 1500
+
+ # Create route exceptions
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst1}
+ run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s 1800 ${dst2}
+
+ # Check that exceptions have been created with the correct PMTU
+ pmtu_1="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst1})"
+ check_pmtu_value "1400" "${pmtu_1}" "exceeding MTU" || return 1
+ pmtu_2="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst2})"
+ check_pmtu_value "1500" "${pmtu_2}" "exceeding MTU" || return 1
+
+ # Replace the route from A to R1
+ run_cmd ${ns_a} ip route change default via ${gw}
+
+ # Delete the device in A
+ run_cmd ${ns_a} ip link del "veth_A-R1"
+}
+
+test_pmtu_ipv4_route_change() {
+ test_pmtu_ipvX_route_change 4
+}
+
+test_pmtu_ipv6_route_change() {
+ test_pmtu_ipvX_route_change 6
+}
+
usage() {
echo
echo "$0 [OPTIONS] [TEST]..."
@@ -1454,6 +2250,7 @@ usage() {
#
exitcode=0
desc=0
+all_skipped=true
while getopts :ptv o
do
@@ -1508,7 +2305,7 @@ for t in ${tests}; do
if [ $run_this -eq 1 ]; then
run_test "${name}" "${desc}"
# if test was skipped no need to retry with nexthop objects
- [ $? -eq 2 ] && rerun_nh=0
+ [ $? -eq $ksft_skip ] && rerun_nh=0
if [ "${rerun_nh}" = "1" ]; then
run_test_nh "${name}" "${desc}"
diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c
index 8c8c7d79c38d..1a736f700be4 100644
--- a/tools/testing/selftests/net/psock_fanout.c
+++ b/tools/testing/selftests/net/psock_fanout.c
@@ -53,15 +53,19 @@
#include <unistd.h>
#include "psock_lib.h"
+#include "../kselftest.h"
#define RING_NUM_FRAMES 20
+static uint32_t cfg_max_num_members;
+
/* Open a socket in a given fanout mode.
* @return -1 if mode is bad, a valid socket otherwise */
static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
{
struct sockaddr_ll addr = {0};
- int fd, val;
+ struct fanout_args args;
+ int fd, val, err;
fd = socket(PF_PACKET, SOCK_RAW, 0);
if (fd < 0) {
@@ -83,8 +87,18 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
exit(1);
}
- val = (((int) typeflags) << 16) | group_id;
- if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val, sizeof(val))) {
+ if (cfg_max_num_members) {
+ args.id = group_id;
+ args.type_flags = typeflags;
+ args.max_num_members = cfg_max_num_members;
+ err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args,
+ sizeof(args));
+ } else {
+ val = (((int) typeflags) << 16) | group_id;
+ err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &val,
+ sizeof(val));
+ }
+ if (err) {
if (close(fd)) {
perror("close packet");
exit(1);
@@ -98,13 +112,13 @@ static int sock_fanout_open(uint16_t typeflags, uint16_t group_id)
static void sock_fanout_set_cbpf(int fd)
{
struct sock_filter bpf_filter[] = {
- BPF_STMT(BPF_LD+BPF_B+BPF_ABS, 80), /* ldb [80] */
- BPF_STMT(BPF_RET+BPF_A, 0), /* ret A */
+ BPF_STMT(BPF_LD | BPF_B | BPF_ABS, 80), /* ldb [80] */
+ BPF_STMT(BPF_RET | BPF_A, 0), /* ret A */
};
struct sock_fprog bpf_prog;
bpf_prog.filter = bpf_filter;
- bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+ bpf_prog.len = ARRAY_SIZE(bpf_filter);
if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT_DATA, &bpf_prog,
sizeof(bpf_prog))) {
@@ -149,7 +163,7 @@ static void sock_fanout_set_ebpf(int fd)
memset(&attr, 0, sizeof(attr));
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
attr.insns = (unsigned long) prog;
- attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.insn_cnt = ARRAY_SIZE(prog);
attr.license = (unsigned long) "GPL";
attr.log_buf = (unsigned long) log_buf,
attr.log_size = sizeof(log_buf),
@@ -286,6 +300,56 @@ static void test_control_group(void)
}
}
+/* Test illegal max_num_members values */
+static void test_control_group_max_num_members(void)
+{
+ int fds[3];
+
+ fprintf(stderr, "test: control multiple sockets, max_num_members\n");
+
+ /* expected failure on greater than PACKET_FANOUT_MAX */
+ cfg_max_num_members = (1 << 16) + 1;
+ if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+ fprintf(stderr, "ERROR: max_num_members > PACKET_FANOUT_MAX\n");
+ exit(1);
+ }
+
+ cfg_max_num_members = 256;
+ fds[0] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[0] == -1) {
+ fprintf(stderr, "ERROR: failed open\n");
+ exit(1);
+ }
+
+ /* expected failure on joining group with different max_num_members */
+ cfg_max_num_members = 257;
+ if (sock_fanout_open(PACKET_FANOUT_HASH, 0) != -1) {
+ fprintf(stderr, "ERROR: set different max_num_members\n");
+ exit(1);
+ }
+
+ /* success on joining group with same max_num_members */
+ cfg_max_num_members = 256;
+ fds[1] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[1] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+
+ /* success on joining group with max_num_members unspecified */
+ cfg_max_num_members = 0;
+ fds[2] = sock_fanout_open(PACKET_FANOUT_HASH, 0);
+ if (fds[2] == -1) {
+ fprintf(stderr, "ERROR: failed to join group\n");
+ exit(1);
+ }
+
+ if (close(fds[2]) || close(fds[1]) || close(fds[0])) {
+ fprintf(stderr, "ERROR: closing sockets\n");
+ exit(1);
+ }
+}
+
/* Test creating a unique fanout group ids */
static void test_unique_fanout_group_ids(void)
{
@@ -350,7 +414,8 @@ static int test_datapath(uint16_t typeflags, int port_off,
int fds[2], fds_udp[2][2], ret;
fprintf(stderr, "\ntest: datapath 0x%hx ports %hu,%hu\n",
- typeflags, PORT_BASE, PORT_BASE + port_off);
+ typeflags, (uint16_t)PORT_BASE,
+ (uint16_t)(PORT_BASE + port_off));
fds[0] = sock_fanout_open(typeflags, 0);
fds[1] = sock_fanout_open(typeflags, 0);
@@ -425,8 +490,11 @@ int main(int argc, char **argv)
test_control_single();
test_control_group();
+ test_control_group_max_num_members();
test_unique_fanout_group_ids();
+ /* PACKET_FANOUT_MAX */
+ cfg_max_num_members = 1 << 16;
/* find a set of ports that do not collide onto the same socket */
ret = test_datapath(PACKET_FANOUT_HASH, port_off,
expect_hash[0], expect_hash[1]);
diff --git a/tools/testing/selftests/net/psock_lib.h b/tools/testing/selftests/net/psock_lib.h
index faa884385c45..6e4fef560873 100644
--- a/tools/testing/selftests/net/psock_lib.h
+++ b/tools/testing/selftests/net/psock_lib.h
@@ -14,6 +14,8 @@
#include <arpa/inet.h>
#include <unistd.h>
+#include "kselftest.h"
+
#define DATA_LEN 100
#define DATA_CHAR 'a'
#define DATA_CHAR_1 'b'
@@ -63,7 +65,7 @@ static __maybe_unused void pair_udp_setfilter(int fd)
struct sock_fprog bpf_prog;
bpf_prog.filter = bpf_filter;
- bpf_prog.len = sizeof(bpf_filter) / sizeof(struct sock_filter);
+ bpf_prog.len = ARRAY_SIZE(bpf_filter);
if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &bpf_prog,
sizeof(bpf_prog))) {
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
index 7d15e10a9fb6..edf1e6f80d41 100644
--- a/tools/testing/selftests/net/psock_snd.c
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -389,6 +389,8 @@ int main(int argc, char **argv)
error(1, errno, "ip link set mtu");
if (system("ip addr add dev lo 172.17.0.1/24"))
error(1, errno, "ip addr add");
+ if (system("sysctl -w net.ipv4.conf.lo.accept_local=1"))
+ error(1, errno, "sysctl lo.accept_local");
run_test();
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
index 6331d91b86a6..1cbfeb5052ec 100755
--- a/tools/testing/selftests/net/psock_snd.sh
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -45,7 +45,7 @@ echo "raw vnet hdr"
echo "raw csum_off"
./in_netns.sh ./psock_snd -v -c
-echo "raw csum_off with bad offset (fails)"
+echo "raw csum_off with bad offset (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -C)
@@ -57,7 +57,7 @@ echo "raw min size"
echo "raw mtu size"
./in_netns.sh ./psock_snd -l "${mss}"
-echo "raw mtu size + 1 (fails)"
+echo "raw mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
@@ -65,19 +65,19 @@ echo "raw mtu size + 1 (fails)"
# echo "raw vlan mtu size"
# ./in_netns.sh ./psock_snd -V -l "${mss}"
-echo "raw vlan mtu size + 1 (fails)"
+echo "raw vlan mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
echo "dgram mtu size"
./in_netns.sh ./psock_snd -d -l "${mss}"
-echo "dgram mtu size + 1 (fails)"
+echo "dgram mtu size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
-echo "raw truncate hlen (fails: does not arrive)"
+echo "raw truncate hlen (expected to fail: does not arrive)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
-echo "raw truncate hlen - 1 (fails: EINVAL)"
+echo "raw truncate hlen - 1 (expected to fail: EINVAL)"
(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
@@ -86,13 +86,10 @@ echo "raw truncate hlen - 1 (fails: EINVAL)"
echo "raw gso min size"
./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
-echo "raw gso min size - 1 (fails)"
-(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
-
echo "raw gso max size"
./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
-echo "raw gso max size + 1 (fails)"
+echo "raw gso max size + 1 (expected to fail)"
(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
index 7b01b7c2ec10..066efd30e294 100644
--- a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
+++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c
@@ -30,25 +30,25 @@ struct reuse_opts {
};
struct reuse_opts unreusable_opts[12] = {
- {0, 0, 0, 0},
- {0, 0, 0, 1},
- {0, 0, 1, 0},
- {0, 0, 1, 1},
- {0, 1, 0, 0},
- {0, 1, 0, 1},
- {0, 1, 1, 0},
- {0, 1, 1, 1},
- {1, 0, 0, 0},
- {1, 0, 0, 1},
- {1, 0, 1, 0},
- {1, 0, 1, 1},
+ {{0, 0}, {0, 0}},
+ {{0, 0}, {0, 1}},
+ {{0, 0}, {1, 0}},
+ {{0, 0}, {1, 1}},
+ {{0, 1}, {0, 0}},
+ {{0, 1}, {0, 1}},
+ {{0, 1}, {1, 0}},
+ {{0, 1}, {1, 1}},
+ {{1, 0}, {0, 0}},
+ {{1, 0}, {0, 1}},
+ {{1, 0}, {1, 0}},
+ {{1, 0}, {1, 1}},
};
struct reuse_opts reusable_opts[4] = {
- {1, 1, 0, 0},
- {1, 1, 0, 1},
- {1, 1, 1, 0},
- {1, 1, 1, 1},
+ {{1, 1}, {0, 0}},
+ {{1, 1}, {0, 1}},
+ {{1, 1}, {1, 0}},
+ {{1, 1}, {1, 1}},
};
int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
diff --git a/tools/testing/selftests/net/reuseport_bpf.c b/tools/testing/selftests/net/reuseport_bpf.c
index b5277106df1f..65aea27d761c 100644
--- a/tools/testing/selftests/net/reuseport_bpf.c
+++ b/tools/testing/selftests/net/reuseport_bpf.c
@@ -24,9 +24,7 @@
#include <sys/resource.h>
#include <unistd.h>
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
+#include "../kselftest.h"
struct test_params {
int recv_family;
@@ -330,7 +328,7 @@ static void test_extra_filter(const struct test_params p)
if (bind(fd1, addr, sockaddr_size()))
error(1, errno, "failed to bind recv socket 1");
- if (!bind(fd2, addr, sockaddr_size()) && errno != EADDRINUSE)
+ if (!bind(fd2, addr, sockaddr_size()) || errno != EADDRINUSE)
error(1, errno, "bind socket 2 should fail with EADDRINUSE");
free(addr);
diff --git a/tools/testing/selftests/net/reuseport_bpf_numa.c b/tools/testing/selftests/net/reuseport_bpf_numa.c
index c9f478b40996..c9ba36aa688e 100644
--- a/tools/testing/selftests/net/reuseport_bpf_numa.c
+++ b/tools/testing/selftests/net/reuseport_bpf_numa.c
@@ -86,7 +86,7 @@ static void attach_bpf(int fd)
memset(&attr, 0, sizeof(attr));
attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
- attr.insn_cnt = sizeof(prog) / sizeof(prog[0]);
+ attr.insn_cnt = ARRAY_SIZE(prog);
attr.insns = (unsigned long) &prog;
attr.license = (unsigned long) &bpf_license;
attr.log_buf = (unsigned long) &bpf_log_buf;
@@ -211,12 +211,16 @@ static void test(int *rcv_fd, int len, int family, int proto)
/* Forward iterate */
for (node = 0; node < len; ++node) {
+ if (!numa_bitmask_isbitset(numa_nodes_ptr, node))
+ continue;
send_from_node(node, family, proto);
receive_on_node(rcv_fd, len, epfd, node, proto);
}
/* Reverse iterate */
for (node = len - 1; node >= 0; --node) {
+ if (!numa_bitmask_isbitset(numa_nodes_ptr, node))
+ continue;
send_from_node(node, family, proto);
receive_on_node(rcv_fd, len, epfd, node, proto);
}
diff --git a/tools/testing/selftests/net/route_localnet.sh b/tools/testing/selftests/net/route_localnet.sh
index 116bfeab72fa..e08701c750e3 100755
--- a/tools/testing/selftests/net/route_localnet.sh
+++ b/tools/testing/selftests/net/route_localnet.sh
@@ -18,8 +18,10 @@ setup() {
ip route del 127.0.0.0/8 dev lo table local
ip netns exec "${PEER_NS}" ip route del 127.0.0.0/8 dev lo table local
- ifconfig veth0 127.25.3.4/24 up
- ip netns exec "${PEER_NS}" ifconfig veth1 127.25.3.14/24 up
+ ip address add 127.25.3.4/24 dev veth0
+ ip link set dev veth0 up
+ ip netns exec "${PEER_NS}" ip address add 127.25.3.14/24 dev veth1
+ ip netns exec "${PEER_NS}" ip link set dev veth1 up
ip route flush cache
ip netns exec "${PEER_NS}" ip route flush cache
diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh
new file mode 100755
index 000000000000..4287a8529890
--- /dev/null
+++ b/tools/testing/selftests/net/rps_default_mask.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly ksft_skip=4
+readonly cpus=$(nproc)
+ret=0
+
+[ $cpus -gt 2 ] || exit $ksft_skip
+
+readonly INITIAL_RPS_DEFAULT_MASK=$(cat /proc/sys/net/core/rps_default_mask)
+readonly TAG="$(mktemp -u XXXXXX)"
+readonly VETH="veth${TAG}"
+readonly NETNS="ns-${TAG}"
+
+setup() {
+ ip netns add "${NETNS}"
+ ip -netns "${NETNS}" link set lo up
+}
+
+cleanup() {
+ echo $INITIAL_RPS_DEFAULT_MASK > /proc/sys/net/core/rps_default_mask
+ ip netns del $NETNS
+}
+
+chk_rps() {
+ local rps_mask expected_rps_mask=$4
+ local dev_name=$3
+ local netns=$2
+ local cmd="cat"
+ local msg=$1
+
+ [ -n "$netns" ] && cmd="ip netns exec $netns $cmd"
+
+ rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus)
+ printf "%-60s" "$msg"
+
+ # In case there is more than 32 CPUs we need to remove commas from masks
+ rps_mask=${rps_mask//,}
+ expected_rps_mask=${expected_rps_mask//,}
+ if [ $rps_mask -eq $expected_rps_mask ]; then
+ echo "[ ok ]"
+ else
+ echo "[fail] expected $expected_rps_mask found $rps_mask"
+ ret=1
+ fi
+}
+
+trap cleanup EXIT
+
+echo 0 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "empty rps_default_mask" $NETNS lo 0
+cleanup
+
+echo 1 > /proc/sys/net/core/rps_default_mask
+setup
+chk_rps "changing rps_default_mask dont affect existing devices" "" lo $INITIAL_RPS_DEFAULT_MASK
+
+echo 3 > /proc/sys/net/core/rps_default_mask
+chk_rps "changing rps_default_mask dont affect existing netns" $NETNS lo 0
+
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+ip link set dev $VETH up
+ip -n $NETNS link set dev $VETH up
+chk_rps "changing rps_default_mask affect newly created devices" "" $VETH 3
+chk_rps "changing rps_default_mask don't affect newly child netns[II]" $NETNS $VETH 0
+ip link del dev $VETH
+ip netns del $NETNS
+
+setup
+chk_rps "rps_default_mask is 0 by default in child netns" "$NETNS" lo 0
+
+ip netns exec $NETNS sysctl -qw net.core.rps_default_mask=1
+ip link add name $VETH type veth peer netns $NETNS name $VETH
+chk_rps "changing rps_default_mask in child ns don't affect the main one" "" lo $INITIAL_RPS_DEFAULT_MASK
+chk_rps "changing rps_default_mask in child ns affects new childns devices" $NETNS $VETH 1
+chk_rps "changing rps_default_mask in child ns don't affect existing devices" $NETNS lo 0
+
+exit $ret
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index bdbf4b3125b6..bdf6f10d0558 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -4,11 +4,39 @@
#
# set -e
+ALL_TESTS="
+ kci_test_polrouting
+ kci_test_route_get
+ kci_test_addrlft
+ kci_test_promote_secondaries
+ kci_test_tc
+ kci_test_gre
+ kci_test_gretap
+ kci_test_ip6gretap
+ kci_test_erspan
+ kci_test_ip6erspan
+ kci_test_bridge
+ kci_test_addrlabel
+ kci_test_ifalias
+ kci_test_vrf
+ kci_test_encap
+ kci_test_macsec
+ kci_test_macsec_offload
+ kci_test_ipsec
+ kci_test_ipsec_offload
+ kci_test_fdb_get
+ kci_test_neigh_get
+ kci_test_bridge_parent_id
+ kci_test_address_proto
+ kci_test_enslave_bonding
+"
+
devdummy="test-dummy0"
-ret=0
+VERBOSE=0
+PAUSE=no
+PAUSE_ON_FAIL=no
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
# set global exit status, but never reset nonzero one.
check_err()
@@ -26,35 +54,102 @@ check_fail()
fi
}
+run_cmd_common()
+{
+ local cmd="$*"
+ local out
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${cmd}"
+ fi
+ out=$($cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+ return $rc
+}
+
+run_cmd() {
+ run_cmd_common "$@"
+ rc=$?
+ check_err $rc
+ return $rc
+}
+run_cmd_fail()
+{
+ run_cmd_common "$@"
+ rc=$?
+ check_fail $rc
+ return $rc
+}
+
+run_cmd_grep_common()
+{
+ local find="$1"; shift
+ local cmd="$*"
+ local out
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: ${cmd} 2>&1 | grep -q '${find}'"
+ fi
+ out=$($cmd 2>&1 | grep -q "${find}" 2>&1)
+ return $?
+}
+
+run_cmd_grep() {
+ run_cmd_grep_common "$@"
+ rc=$?
+ check_err $rc
+ return $rc
+}
+
+run_cmd_grep_fail()
+{
+ run_cmd_grep_common "$@"
+ rc=$?
+ check_fail $rc
+ return $rc
+}
+
+end_test()
+{
+ echo "$*"
+ [ "${VERBOSE}" = "1" ] && echo
+
+ if [[ $ret -ne 0 ]] && [[ "${PAUSE_ON_FAIL}" = "yes" ]]; then
+ echo "Hit enter to continue"
+ read a
+ fi;
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo "Hit enter to continue"
+ read a
+ fi
+
+}
+
+
kci_add_dummy()
{
- ip link add name "$devdummy" type dummy
- check_err $?
- ip link set "$devdummy" up
- check_err $?
+ run_cmd ip link add name "$devdummy" type dummy
+ run_cmd ip link set "$devdummy" up
}
kci_del_dummy()
{
- ip link del dev "$devdummy"
- check_err $?
+ run_cmd ip link del dev "$devdummy"
}
kci_test_netconf()
{
dev="$1"
r=$ret
-
- ip netconf show dev "$dev" > /dev/null
- check_err $?
-
+ run_cmd ip netconf show dev "$dev"
for f in 4 6; do
- ip -$f netconf show dev "$dev" > /dev/null
- check_err $?
+ run_cmd ip -$f netconf show dev "$dev"
done
if [ $ret -ne 0 ] ;then
- echo "FAIL: ip netconf show $dev"
+ end_test "FAIL: ip netconf show $dev"
test $r -eq 0 && ret=0
return 1
fi
@@ -66,44 +161,28 @@ kci_test_bridge()
devbr="test-br0"
vlandev="testbr-vlan1"
- ret=0
- ip link add name "$devbr" type bridge
- check_err $?
-
- ip link set dev "$devdummy" master "$devbr"
- check_err $?
-
- ip link set "$devbr" up
- check_err $?
-
- ip link add link "$devbr" name "$vlandev" type vlan id 1
- check_err $?
- ip addr add dev "$vlandev" 10.200.7.23/30
- check_err $?
- ip -6 addr add dev "$vlandev" dead:42::1234/64
- check_err $?
- ip -d link > /dev/null
- check_err $?
- ip r s t all > /dev/null
- check_err $?
+ local ret=0
+ run_cmd ip link add name "$devbr" type bridge
+ run_cmd ip link set dev "$devdummy" master "$devbr"
+ run_cmd ip link set "$devbr" up
+ run_cmd ip link add link "$devbr" name "$vlandev" type vlan id 1
+ run_cmd ip addr add dev "$vlandev" 10.200.7.23/30
+ run_cmd ip -6 addr add dev "$vlandev" dead:42::1234/64
+ run_cmd ip -d link
+ run_cmd ip r s t all
for name in "$devbr" "$vlandev" "$devdummy" ; do
kci_test_netconf "$name"
done
-
- ip -6 addr del dev "$vlandev" dead:42::1234/64
- check_err $?
-
- ip link del dev "$vlandev"
- check_err $?
- ip link del dev "$devbr"
- check_err $?
+ run_cmd ip -6 addr del dev "$vlandev" dead:42::1234/64
+ run_cmd ip link del dev "$vlandev"
+ run_cmd ip link del dev "$devbr"
if [ $ret -ne 0 ];then
- echo "FAIL: bridge setup"
+ end_test "FAIL: bridge setup"
return 1
fi
- echo "PASS: bridge setup"
+ end_test "PASS: bridge setup"
}
@@ -113,35 +192,24 @@ kci_test_gre()
rem=10.42.42.1
loc=10.0.0.1
- ret=0
- ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
- check_err $?
- ip link set $gredev up
- check_err $?
- ip addr add 10.23.7.10 dev $gredev
- check_err $?
- ip route add 10.23.8.0/30 dev $gredev
- check_err $?
- ip addr add dev "$devdummy" 10.23.7.11/24
- check_err $?
- ip link > /dev/null
- check_err $?
- ip addr > /dev/null
- check_err $?
+ local ret=0
+ run_cmd ip tunnel add $gredev mode gre remote $rem local $loc ttl 1
+ run_cmd ip link set $gredev up
+ run_cmd ip addr add 10.23.7.10 dev $gredev
+ run_cmd ip route add 10.23.8.0/30 dev $gredev
+ run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+ run_cmd ip link
+ run_cmd ip addr
kci_test_netconf "$gredev"
-
- ip addr del dev "$devdummy" 10.23.7.11/24
- check_err $?
-
- ip link del $gredev
- check_err $?
+ run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+ run_cmd ip link del $gredev
if [ $ret -ne 0 ];then
- echo "FAIL: gre tunnel endpoint"
+ end_test "FAIL: gre tunnel endpoint"
return 1
fi
- echo "PASS: gre tunnel endpoint"
+ end_test "PASS: gre tunnel endpoint"
}
# tc uses rtnetlink too, for full tc testing
@@ -149,124 +217,94 @@ kci_test_gre()
kci_test_tc()
{
dev=lo
- ret=0
-
- tc qdisc add dev "$dev" root handle 1: htb
- check_err $?
- tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
- check_err $?
- tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
- check_err $?
- tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
- check_err $?
- tc filter show dev "$dev" parent 1:0 > /dev/null
- check_err $?
- tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
- check_err $?
- tc filter show dev "$dev" parent 1:0 > /dev/null
- check_err $?
- tc qdisc del dev "$dev" root handle 1: htb
- check_err $?
+ local ret=0
+
+ run_cmd tc qdisc add dev "$dev" root handle 1: htb
+ run_cmd tc class add dev "$dev" parent 1: classid 1:10 htb rate 1mbit
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffe: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffd: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" parent 1:0 prio 5 handle ffc: protocol ip u32 divisor 256
+ run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32 ht ffe:2: match ip src 10.0.0.3 flowid 1:10
+ run_cmd tc filter add dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:2 u32 ht ffe:2: match ip src 10.0.0.2 flowid 1:10
+ run_cmd tc filter show dev "$dev" parent 1:0
+ run_cmd tc filter del dev "$dev" protocol ip parent 1: prio 5 handle ffe:2:3 u32
+ run_cmd tc filter show dev "$dev" parent 1:0
+ run_cmd tc qdisc del dev "$dev" root handle 1: htb
if [ $ret -ne 0 ];then
- echo "FAIL: tc htb hierarchy"
+ end_test "FAIL: tc htb hierarchy"
return 1
fi
- echo "PASS: tc htb hierarchy"
+ end_test "PASS: tc htb hierarchy"
}
kci_test_polrouting()
{
- ret=0
- ip rule add fwmark 1 lookup 100
- check_err $?
- ip route add local 0.0.0.0/0 dev lo table 100
- check_err $?
- ip r s t all > /dev/null
- check_err $?
- ip rule del fwmark 1 lookup 100
- check_err $?
- ip route del local 0.0.0.0/0 dev lo table 100
- check_err $?
+ local ret=0
+ run_cmd ip rule add fwmark 1 lookup 100
+ run_cmd ip route add local 0.0.0.0/0 dev lo table 100
+ run_cmd ip r s t all
+ run_cmd ip rule del fwmark 1 lookup 100
+ run_cmd ip route del local 0.0.0.0/0 dev lo table 100
if [ $ret -ne 0 ];then
- echo "FAIL: policy route test"
+ end_test "FAIL: policy route test"
return 1
fi
- echo "PASS: policy routing"
+ end_test "PASS: policy routing"
}
kci_test_route_get()
{
local hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
- ret=0
-
- ip route get 127.0.0.1 > /dev/null
- check_err $?
- ip route get 127.0.0.1 dev "$devdummy" > /dev/null
- check_err $?
- ip route get ::1 > /dev/null
- check_err $?
- ip route get fe80::1 dev "$devdummy" > /dev/null
- check_err $?
- ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x1 mark 0x1 > /dev/null
- check_err $?
- ip route get ::1 from ::1 iif lo oif lo tos 0x1 mark 0x1 > /dev/null
- check_err $?
- ip addr add dev "$devdummy" 10.23.7.11/24
- check_err $?
- ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy" > /dev/null
- check_err $?
- ip route add 10.23.8.0/24 \
+ local ret=0
+ run_cmd ip route get 127.0.0.1
+ run_cmd ip route get 127.0.0.1 dev "$devdummy"
+ run_cmd ip route get ::1
+ run_cmd ip route get fe80::1 dev "$devdummy"
+ run_cmd ip route get 127.0.0.1 from 127.0.0.1 oif lo tos 0x10 mark 0x1
+ run_cmd ip route get ::1 from ::1 iif lo oif lo tos 0x10 mark 0x1
+ run_cmd ip addr add dev "$devdummy" 10.23.7.11/24
+ run_cmd ip route get 10.23.7.11 from 10.23.7.12 iif "$devdummy"
+ run_cmd ip route add 10.23.8.0/24 \
nexthop via 10.23.7.13 dev "$devdummy" \
nexthop via 10.23.7.14 dev "$devdummy"
- check_err $?
+
sysctl -wq net.ipv4.fib_multipath_hash_policy=0
- ip route get 10.23.8.11 > /dev/null
- check_err $?
+ run_cmd ip route get 10.23.8.11
sysctl -wq net.ipv4.fib_multipath_hash_policy=1
- ip route get 10.23.8.11 > /dev/null
- check_err $?
+ run_cmd ip route get 10.23.8.11
sysctl -wq net.ipv4.fib_multipath_hash_policy="$hash_policy"
- ip route del 10.23.8.0/24
- check_err $?
- ip addr del dev "$devdummy" 10.23.7.11/24
- check_err $?
+ run_cmd ip route del 10.23.8.0/24
+ run_cmd ip addr del dev "$devdummy" 10.23.7.11/24
+
if [ $ret -ne 0 ];then
- echo "FAIL: route get"
+ end_test "FAIL: route get"
return 1
fi
- echo "PASS: route get"
+ end_test "PASS: route get"
}
kci_test_addrlft()
{
for i in $(seq 10 100) ;do
lft=$(((RANDOM%3) + 1))
- ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
- check_err $?
+ run_cmd ip addr add 10.23.11.$i/32 dev "$devdummy" preferred_lft $lft valid_lft $((lft+1))
done
sleep 5
-
- ip addr show dev "$devdummy" | grep "10.23.11."
+ run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy"
if [ $? -eq 0 ]; then
- echo "FAIL: preferred_lft addresses remaining"
check_err 1
+ end_test "FAIL: preferred_lft addresses remaining"
return
fi
- echo "PASS: preferred_lft addresses have expired"
+ end_test "PASS: preferred_lft addresses have expired"
}
kci_test_promote_secondaries()
@@ -285,27 +323,17 @@ kci_test_promote_secondaries()
[ $promote -eq 0 ] && sysctl -q net.ipv4.conf.$devdummy.promote_secondaries=0
- echo "PASS: promote_secondaries complete"
+ end_test "PASS: promote_secondaries complete"
}
kci_test_addrlabel()
{
- ret=0
-
- ip addrlabel add prefix dead::/64 dev lo label 1
- check_err $?
-
- ip addrlabel list |grep -q "prefix dead::/64 dev lo label 1"
- check_err $?
-
- ip addrlabel del prefix dead::/64 dev lo label 1 2> /dev/null
- check_err $?
-
- ip addrlabel add prefix dead::/64 label 1 2> /dev/null
- check_err $?
-
- ip addrlabel del prefix dead::/64 label 1 2> /dev/null
- check_err $?
+ local ret=0
+ run_cmd ip addrlabel add prefix dead::/64 dev lo label 1
+ run_cmd_grep "prefix dead::/64 dev lo label 1" ip addrlabel list
+ run_cmd ip addrlabel del prefix dead::/64 dev lo label 1
+ run_cmd ip addrlabel add prefix dead::/64 label 1
+ run_cmd ip addrlabel del prefix dead::/64 label 1
# concurrent add/delete
for i in $(seq 1 1000); do
@@ -321,47 +349,40 @@ kci_test_addrlabel()
ip addrlabel del prefix 1c3::/64 label 12345 2>/dev/null
if [ $ret -ne 0 ];then
- echo "FAIL: ipv6 addrlabel"
+ end_test "FAIL: ipv6 addrlabel"
return 1
fi
- echo "PASS: ipv6 addrlabel"
+ end_test "PASS: ipv6 addrlabel"
}
kci_test_ifalias()
{
- ret=0
+ local ret=0
namewant=$(uuidgen)
syspathname="/sys/class/net/$devdummy/ifalias"
-
- ip link set dev "$devdummy" alias "$namewant"
- check_err $?
+ run_cmd ip link set dev "$devdummy" alias "$namewant"
if [ $ret -ne 0 ]; then
- echo "FAIL: cannot set interface alias of $devdummy to $namewant"
+ end_test "FAIL: cannot set interface alias of $devdummy to $namewant"
return 1
fi
-
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_err $?
+ run_cmd_grep "alias $namewant" ip link show "$devdummy"
if [ -r "$syspathname" ] ; then
read namehave < "$syspathname"
if [ "$namewant" != "$namehave" ]; then
- echo "FAIL: did set ifalias $namewant but got $namehave"
+ end_test "FAIL: did set ifalias $namewant but got $namehave"
return 1
fi
namewant=$(uuidgen)
echo "$namewant" > "$syspathname"
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_err $?
+ run_cmd_grep "alias $namewant" ip link show "$devdummy"
# sysfs interface allows to delete alias again
echo "" > "$syspathname"
-
- ip link show "$devdummy" | grep -q "alias $namewant"
- check_fail $?
+ run_cmd_grep_fail "alias $namewant" ip link show "$devdummy"
for i in $(seq 1 100); do
uuidgen > "$syspathname" &
@@ -370,245 +391,239 @@ kci_test_ifalias()
wait
# re-add the alias -- kernel should free mem when dummy dev is removed
- ip link set dev "$devdummy" alias "$namewant"
- check_err $?
+ run_cmd ip link set dev "$devdummy" alias "$namewant"
+
fi
if [ $ret -ne 0 ]; then
- echo "FAIL: set interface alias $devdummy to $namewant"
+ end_test "FAIL: set interface alias $devdummy to $namewant"
return 1
fi
- echo "PASS: set ifalias $namewant for $devdummy"
+ end_test "PASS: set ifalias $namewant for $devdummy"
}
kci_test_vrf()
{
vrfname="test-vrf"
- ret=0
-
- ip link show type vrf 2>/dev/null
+ local ret=0
+ run_cmd ip link show type vrf
if [ $? -ne 0 ]; then
- echo "SKIP: vrf: iproute2 too old"
+ end_test "SKIP: vrf: iproute2 too old"
return $ksft_skip
fi
-
- ip link add "$vrfname" type vrf table 10
- check_err $?
+ run_cmd ip link add "$vrfname" type vrf table 10
if [ $ret -ne 0 ];then
- echo "FAIL: can't add vrf interface, skipping test"
+ end_test "FAIL: can't add vrf interface, skipping test"
return 0
fi
-
- ip -br link show type vrf | grep -q "$vrfname"
- check_err $?
+ run_cmd_grep "$vrfname" ip -br link show type vrf
if [ $ret -ne 0 ];then
- echo "FAIL: created vrf device not found"
+ end_test "FAIL: created vrf device not found"
return 1
fi
- ip link set dev "$vrfname" up
- check_err $?
-
- ip link set dev "$devdummy" master "$vrfname"
- check_err $?
- ip link del dev "$vrfname"
- check_err $?
+ run_cmd ip link set dev "$vrfname" up
+ run_cmd ip link set dev "$devdummy" master "$vrfname"
+ run_cmd ip link del dev "$vrfname"
if [ $ret -ne 0 ];then
- echo "FAIL: vrf"
+ end_test "FAIL: vrf"
return 1
fi
- echo "PASS: vrf"
+ end_test "PASS: vrf"
}
kci_test_encap_vxlan()
{
- ret=0
+ local ret=0
vxlan="test-vxlan0"
vlan="test-vlan0"
- testns="$1"
-
- ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
- dev "$devdummy" dstport 4789 2>/dev/null
+ run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \
+ dev "$devdummy" dstport 4789
if [ $? -ne 0 ]; then
- echo "FAIL: can't add vxlan interface, skipping test"
+ end_test "FAIL: can't add vxlan interface, skipping test"
return 0
fi
- check_err $?
-
- ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
- check_err $?
- ip -netns "$testns" link set up dev "$vxlan"
- check_err $?
-
- ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
- check_err $?
+ run_cmd ip -netns "$testns" addr add 10.2.11.49/24 dev "$vxlan"
+ run_cmd ip -netns "$testns" link set up dev "$vxlan"
+ run_cmd ip -netns "$testns" link add link "$vxlan" name "$vlan" type vlan id 1
# changelink testcases
- ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy" 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
- check_err $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
- check_err $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan proxy 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan norsc 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan external 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan gbp 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link set dev "$vxlan" type vxlan gpe 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" link del "$vxlan"
- check_err $?
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan vni 43
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan group ffe5::5 dev "$devdummy"
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan ttl inherit
+
+ run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan ttl 64
+ run_cmd ip -netns "$testns" link set dev "$vxlan" type vxlan nolearning
+
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan proxy
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan norsc
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l2miss
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan l3miss
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan external
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udpcsum
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumtx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan udp6zerocsumrx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumtx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan remcsumrx
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gbp
+ run_cmd_fail ip -netns "$testns" link set dev "$vxlan" type vxlan gpe
+ run_cmd ip -netns "$testns" link del "$vxlan"
if [ $ret -ne 0 ]; then
- echo "FAIL: vxlan"
+ end_test "FAIL: vxlan"
return 1
fi
- echo "PASS: vxlan"
+ end_test "PASS: vxlan"
}
kci_test_encap_fou()
{
- ret=0
+ local ret=0
name="test-fou"
- testns="$1"
-
- ip fou help 2>&1 |grep -q 'Usage: ip fou'
+ run_cmd_grep 'Usage: ip fou' ip fou help
if [ $? -ne 0 ];then
- echo "SKIP: fou: iproute2 too old"
+ end_test "SKIP: fou: iproute2 too old"
+ return $ksft_skip
+ fi
+
+ if ! /sbin/modprobe -q -n fou; then
+ end_test "SKIP: module fou is not found"
return $ksft_skip
fi
+ /sbin/modprobe -q fou
- ip -netns "$testns" fou add port 7777 ipproto 47 2>/dev/null
+ run_cmd ip -netns "$testns" fou add port 7777 ipproto 47
if [ $? -ne 0 ];then
- echo "FAIL: can't add fou port 7777, skipping test"
+ end_test "FAIL: can't add fou port 7777, skipping test"
return 1
fi
-
- ip -netns "$testns" fou add port 8888 ipproto 4
- check_err $?
-
- ip -netns "$testns" fou del port 9999 2>/dev/null
- check_fail $?
-
- ip -netns "$testns" fou del port 7777
- check_err $?
-
+ run_cmd ip -netns "$testns" fou add port 8888 ipproto 4
+ run_cmd_fail ip -netns "$testns" fou del port 9999
+ run_cmd ip -netns "$testns" fou del port 7777
if [ $ret -ne 0 ]; then
- echo "FAIL: fou"
+ end_test "FAIL: fou"s
return 1
fi
- echo "PASS: fou"
+ end_test "PASS: fou"
}
# test various encap methods, use netns to avoid unwanted interference
kci_test_encap()
{
- testns="testns"
- ret=0
-
- ip netns add "$testns"
+ local ret=0
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP encap tests: cannot add net namespace $testns"
+ end_test "SKIP encap tests: cannot add net namespace $testns"
return $ksft_skip
fi
-
- ip -netns "$testns" link set lo up
- check_err $?
-
- ip -netns "$testns" link add name "$devdummy" type dummy
- check_err $?
- ip -netns "$testns" link set "$devdummy" up
- check_err $?
-
- kci_test_encap_vxlan "$testns"
- kci_test_encap_fou "$testns"
+ run_cmd ip -netns "$testns" link set lo up
+ run_cmd ip -netns "$testns" link add name "$devdummy" type dummy
+ run_cmd ip -netns "$testns" link set "$devdummy" up
+ run_cmd kci_test_encap_vxlan
+ run_cmd kci_test_encap_fou
ip netns del "$testns"
+ return $ret
}
kci_test_macsec()
{
msname="test_macsec0"
- ret=0
-
- ip macsec help 2>&1 | grep -q "^Usage: ip macsec"
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
if [ $? -ne 0 ]; then
- echo "SKIP: macsec: iproute2 too old"
+ end_test "SKIP: macsec: iproute2 too old"
return $ksft_skip
fi
+ run_cmd ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: can't add macsec interface, skipping test"
+ return 1
+ fi
+ run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
+ run_cmd ip macsec show
+ run_cmd ip link del dev "$msname"
- ip link add link "$devdummy" "$msname" type macsec port 42 encrypt on
- check_err $?
if [ $ret -ne 0 ];then
- echo "FAIL: can't add macsec interface, skipping test"
+ end_test "FAIL: macsec"
return 1
fi
- ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
- check_err $?
+ end_test "PASS: macsec"
+}
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
- check_err $?
+kci_test_macsec_offload()
+{
+ sysfsd=/sys/kernel/debug/netdevsim/netdevsim0/ports/0/
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
+ probed=false
+ local ret=0
+ run_cmd_grep "^Usage: ip macsec" ip macsec help
+ if [ $? -ne 0 ]; then
+ end_test "SKIP: macsec: iproute2 too old"
+ return $ksft_skip
+ fi
- ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on key 00 0123456789abcdef0123456789abcdef
- check_err $?
+ if ! mount | grep -q debugfs; then
+ mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+ fi
- ip macsec show > /dev/null
- check_err $?
+ # setup netdevsim since dummydev doesn't have offload support
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ run_cmd modprobe -q netdevsim
- ip link del dev "$msname"
- check_err $?
+ if [ $ret -ne 0 ]; then
+ end_test "SKIP: macsec_offload can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
+ fi
- if [ $ret -ne 0 ];then
- echo "FAIL: macsec"
+ echo "0" > /sys/bus/netdevsim/new_device
+ while [ ! -d $sysfsnet ] ; do :; done
+ udevadm settle
+ dev=`ls $sysfsnet`
+
+ ip link set $dev up
+ if [ ! -d $sysfsd ] ; then
+ end_test "FAIL: macsec_offload can't create device $dev"
+ return 1
+ fi
+ run_cmd_grep 'macsec-hw-offload: on' ethtool -k $dev
+ if [ $? -eq 1 ] ; then
+ end_test "FAIL: macsec_offload netdevsim doesn't support MACsec offload"
return 1
fi
+ run_cmd ip link add link $dev kci_macsec1 type macsec port 4 offload mac
+ run_cmd ip link add link $dev kci_macsec2 type macsec address "aa:bb:cc:dd:ee:ff" port 5 offload mac
+ run_cmd ip link add link $dev kci_macsec3 type macsec sci abbacdde01020304 offload mac
+ run_cmd_fail ip link add link $dev kci_macsec4 type macsec port 8 offload mac
+
+ msname=kci_macsec1
+ run_cmd ip macsec add "$msname" tx sa 0 pn 1024 on key 01 12345678901234567890123456789012
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef"
+ run_cmd ip macsec add "$msname" rx port 1234 address "1c:ed:de:ad:be:ef" sa 0 pn 1 on \
+ key 00 0123456789abcdef0123456789abcdef
+ run_cmd_fail ip macsec add "$msname" rx port 1235 address "1c:ed:de:ad:be:ef"
+ # clean up any leftovers
+ for msdev in kci_macsec{1,2,3,4} ; do
+ ip link del $msdev 2> /dev/null
+ done
+ echo 0 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
- echo "PASS: macsec"
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: macsec_offload"
+ return 1
+ fi
+ end_test "PASS: macsec_offload"
}
#-------------------------------------------------------------------
@@ -631,7 +646,7 @@ kci_test_macsec()
#-------------------------------------------------------------------
kci_test_ipsec()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.1
dstip=192.168.123.2
@@ -640,8 +655,7 @@ kci_test_ipsec()
ip addr add $srcip dev $devdummy
# flush to be sure there's nothing configured
- ip x s flush ; ip x p flush
- check_err $?
+ run_cmd ip x s flush ; ip x p flush
# start the monitor in the background
tmpfile=`mktemp /var/run/ipsectestXXX`
@@ -649,72 +663,57 @@ kci_test_ipsec()
sleep 0.2
ipsecid="proto esp src $srcip dst $dstip spi 0x07"
- ip x s add $ipsecid \
+ run_cmd ip x s add $ipsecid \
mode transport reqid 0x07 replay-window 32 \
$algo sel src $srcip/24 dst $dstip/24
- check_err $?
- lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
- ip x s count | grep -q "SAD count 1"
- check_err $?
+ lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+ run_cmd test $lines -eq 2
+ run_cmd_grep "SAD count 1" ip x s count
lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
-
- ip x s delete $ipsecid
- check_err $?
+ run_cmd test $lines -eq 2
+ run_cmd ip x s delete $ipsecid
lines=`ip x s list | wc -l`
- test $lines -eq 0
- check_err $?
+ run_cmd test $lines -eq 0
ipsecsel="dir out src $srcip/24 dst $dstip/24"
- ip x p add $ipsecsel \
+ run_cmd ip x p add $ipsecsel \
tmpl proto esp src $srcip dst $dstip \
spi 0x07 mode transport reqid 0x07
- check_err $?
+
lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
+ run_cmd test $lines -eq 2
- ip x p count | grep -q "SPD IN 0 OUT 1 FWD 0"
- check_err $?
+ run_cmd_grep "SPD IN 0 OUT 1 FWD 0" ip x p count
lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
- test $lines -eq 2
- check_err $?
+ run_cmd test $lines -eq 2
- ip x p delete $ipsecsel
- check_err $?
+ run_cmd ip x p delete $ipsecsel
lines=`ip x p list | wc -l`
- test $lines -eq 0
- check_err $?
+ run_cmd test $lines -eq 0
# check the monitor results
kill $mpid
lines=`wc -l $tmpfile | cut "-d " -f1`
- test $lines -eq 20
- check_err $?
+ run_cmd test $lines -eq 20
rm -rf $tmpfile
# clean up any leftovers
- ip x s flush
- check_err $?
- ip x p flush
- check_err $?
+ run_cmd ip x s flush
+ run_cmd ip x p flush
ip addr del $srcip/32 dev $devdummy
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec"
+ end_test "FAIL: ipsec"
return 1
fi
- echo "PASS: ipsec"
+ end_test "PASS: ipsec"
}
#-------------------------------------------------------------------
@@ -731,7 +730,7 @@ kci_test_ipsec()
#-------------------------------------------------------------------
kci_test_ipsec_offload()
{
- ret=0
+ local ret=0
algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
srcip=192.168.123.3
dstip=192.168.123.4
@@ -740,12 +739,15 @@ kci_test_ipsec_offload()
sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/
probed=false
+ if ! mount | grep -q debugfs; then
+ mount -t debugfs none /sys/kernel/debug/ &> /dev/null
+ fi
+
# setup netdevsim since dummydev doesn't have offload support
if [ ! -w /sys/bus/netdevsim/new_device ] ; then
- modprobe -q netdevsim
- check_err $?
+ run_cmd modprobe -q netdevsim
if [ $ret -ne 0 ]; then
- echo "SKIP: ipsec_offload can't load netdevsim"
+ end_test "SKIP: ipsec_offload can't load netdevsim"
return $ksft_skip
fi
probed=true
@@ -759,11 +761,11 @@ kci_test_ipsec_offload()
ip addr add $srcip dev $dev
ip link set $dev up
if [ ! -d $sysfsd ] ; then
- echo "FAIL: ipsec_offload can't create device $dev"
+ end_test "FAIL: ipsec_offload can't create device $dev"
return 1
fi
if [ ! -f $sysfsf ] ; then
- echo "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
+ end_test "FAIL: ipsec_offload netdevsim doesn't support IPsec offload"
return 1
fi
@@ -771,40 +773,41 @@ kci_test_ipsec_offload()
ip x s flush ; ip x p flush
# create offloaded SAs, both in and out
- ip x p add dir out src $srcip/24 dst $dstip/24 \
+ run_cmd ip x p add dir out src $srcip/24 dst $dstip/24 \
tmpl proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42
- check_err $?
- ip x p add dir out src $dstip/24 dst $srcip/24 \
+
+ run_cmd ip x p add dir in src $dstip/24 dst $srcip/24 \
tmpl proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42
- check_err $?
- ip x s add proto esp src $srcip dst $dstip spi 9 \
+ run_cmd ip x s add proto esp src $srcip dst $dstip spi 9 \
mode transport reqid 42 $algo sel src $srcip/24 dst $dstip/24 \
offload dev $dev dir out
- check_err $?
- ip x s add proto esp src $dstip dst $srcip spi 9 \
+
+ run_cmd ip x s add proto esp src $dstip dst $srcip spi 9 \
mode transport reqid 42 $algo sel src $dstip/24 dst $srcip/24 \
offload dev $dev dir in
- check_err $?
+
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload can't create SA"
+ end_test "FAIL: ipsec_offload can't create SA"
return 1
fi
# does offload show up in ip output
lines=`ip x s list | grep -c "crypto offload parameters: dev $dev dir"`
if [ $lines -ne 2 ] ; then
- echo "FAIL: ipsec_offload SA offload missing from list output"
check_err 1
+ end_test "FAIL: ipsec_offload SA offload missing from list output"
fi
+ # we didn't create a peer, make sure we can Tx
+ ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55
# use ping to exercise the Tx path
ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null
# does driver have correct offload info
- diff $sysfsf - << EOF
+ run_cmd diff $sysfsf - << EOF
SA count=2 tx=3
sa[0] tx ipaddr=0x00000000 00000000 00000000 00000000
sa[0] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
@@ -814,7 +817,7 @@ sa[1] spi=0x00000009 proto=0x32 salt=0x61626364 crypt=1
sa[1] key=0x34333231 38373635 32313039 36353433
EOF
if [ $? -ne 0 ] ; then
- echo "FAIL: ipsec_offload incorrect driver data"
+ end_test "FAIL: ipsec_offload incorrect driver data"
check_err 1
fi
@@ -823,301 +826,243 @@ EOF
ip x p flush
lines=`grep -c "SA count=0" $sysfsf`
if [ $lines -ne 1 ] ; then
- echo "FAIL: ipsec_offload SA not removed from driver"
check_err 1
+ end_test "FAIL: ipsec_offload SA not removed from driver"
fi
# clean up any leftovers
+ echo 0 > /sys/bus/netdevsim/del_device
$probed && rmmod netdevsim
if [ $ret -ne 0 ]; then
- echo "FAIL: ipsec_offload"
+ end_test "FAIL: ipsec_offload"
return 1
fi
- echo "PASS: ipsec_offload"
+ end_test "PASS: ipsec_offload"
}
kci_test_gretap()
{
- testns="testns"
DEV_NS=gretap00
- ret=0
+ local ret=0
- ip netns add "$testns"
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP gretap tests: cannot add net namespace $testns"
+ end_test "SKIP gretap tests: cannot add net namespace $testns"
return $ksft_skip
fi
- ip link help gretap 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help gretap
if [ $? -ne 0 ];then
- echo "SKIP: gretap: iproute2 too old"
+ end_test "SKIP: gretap: iproute2 too old"
ip netns del "$testns"
return $ksft_skip
fi
# test native tunnel
- ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap seq \
key 102 local 172.16.1.100 remote 172.16.1.200
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
-
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type gretap external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type gretap external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: gretap"
+ end_test "FAIL: gretap"
ip netns del "$testns"
return 1
fi
- echo "PASS: gretap"
+ end_test "PASS: gretap"
ip netns del "$testns"
}
kci_test_ip6gretap()
{
- testns="testns"
DEV_NS=ip6gretap00
- ret=0
+ local ret=0
- ip netns add "$testns"
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP ip6gretap tests: cannot add net namespace $testns"
+ end_test "SKIP ip6gretap tests: cannot add net namespace $testns"
return $ksft_skip
fi
- ip link help ip6gretap 2>&1 | grep -q "^Usage:"
+ run_cmd_grep "^Usage:" ip link help ip6gretap
if [ $? -ne 0 ];then
- echo "SKIP: ip6gretap: iproute2 too old"
+ end_test "SKIP: ip6gretap: iproute2 too old"
ip netns del "$testns"
return $ksft_skip
fi
# test native tunnel
- ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap seq \
key 102 local fc00:100::1 remote fc00:100::2
- check_err $?
-
- ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" fc00:200::1/96
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6gretap external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: ip6gretap"
+ end_test "FAIL: ip6gretap"
ip netns del "$testns"
return 1
fi
- echo "PASS: ip6gretap"
+ end_test "PASS: ip6gretap"
ip netns del "$testns"
}
kci_test_erspan()
{
- testns="testns"
DEV_NS=erspan00
- ret=0
-
- ip link help erspan 2>&1 | grep -q "^Usage:"
+ local ret=0
+ run_cmd_grep "^Usage:" ip link help erspan
if [ $? -ne 0 ];then
- echo "SKIP: erspan: iproute2 too old"
+ end_test "SKIP: erspan: iproute2 too old"
return $ksft_skip
fi
-
- ip netns add "$testns"
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP erspan tests: cannot add net namespace $testns"
+ end_test "SKIP erspan tests: cannot add net namespace $testns"
return $ksft_skip
fi
# test native tunnel erspan v1
- ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
key 102 local 172.16.1.100 remote 172.16.1.200 \
erspan_ver 1 erspan 488
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test native tunnel erspan v2
- ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan seq \
key 102 local 172.16.1.100 remote 172.16.1.200 \
erspan_ver 2 erspan_dir ingress erspan_hwid 7
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" type erspan external
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type erspan external
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: erspan"
+ end_test "FAIL: erspan"
ip netns del "$testns"
return 1
fi
- echo "PASS: erspan"
+ end_test "PASS: erspan"
ip netns del "$testns"
}
kci_test_ip6erspan()
{
- testns="testns"
DEV_NS=ip6erspan00
- ret=0
-
- ip link help ip6erspan 2>&1 | grep -q "^Usage:"
+ local ret=0
+ run_cmd_grep "^Usage:" ip link help ip6erspan
if [ $? -ne 0 ];then
- echo "SKIP: ip6erspan: iproute2 too old"
+ end_test "SKIP: ip6erspan: iproute2 too old"
return $ksft_skip
fi
-
- ip netns add "$testns"
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP ip6erspan tests: cannot add net namespace $testns"
+ end_test "SKIP ip6erspan tests: cannot add net namespace $testns"
return $ksft_skip
fi
# test native tunnel ip6erspan v1
- ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
key 102 local fc00:100::1 remote fc00:100::2 \
erspan_ver 1 erspan 488
- check_err $?
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
-
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test native tunnel ip6erspan v2
- ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" type ip6erspan seq \
key 102 local fc00:100::1 remote fc00:100::2 \
erspan_ver 2 erspan_dir ingress erspan_hwid 7
- check_err $?
-
- ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
- check_err $?
- ip -netns "$testns" link set dev $DEV_NS up
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24
+ run_cmd ip -netns "$testns" link set dev $DEV_NS up
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
# test external mode
- ip -netns "$testns" link add dev "$DEV_NS" \
+ run_cmd ip -netns "$testns" link add dev "$DEV_NS" \
type ip6erspan external
- check_err $?
- ip -netns "$testns" link del "$DEV_NS"
- check_err $?
+ run_cmd ip -netns "$testns" link del "$DEV_NS"
if [ $ret -ne 0 ]; then
- echo "FAIL: ip6erspan"
+ end_test "FAIL: ip6erspan"
ip netns del "$testns"
return 1
fi
- echo "PASS: ip6erspan"
+ end_test "PASS: ip6erspan"
ip netns del "$testns"
}
kci_test_fdb_get()
{
- IP="ip -netns testns"
- BRIDGE="bridge -netns testns"
brdev="test-br0"
vxlandev="vxlan10"
test_mac=de:ad:be:ef:13:37
localip="10.0.2.2"
dstip="10.0.2.3"
- ret=0
+ local ret=0
- bridge fdb help 2>&1 |grep -q 'bridge fdb get'
+ run_cmd_grep 'bridge fdb get' bridge fdb help
if [ $? -ne 0 ];then
- echo "SKIP: fdb get tests: iproute2 too old"
+ end_test "SKIP: fdb get tests: iproute2 too old"
return $ksft_skip
fi
- ip netns add testns
+ setup_ns testns
if [ $? -ne 0 ]; then
- echo "SKIP fdb get tests: cannot add net namespace $testns"
+ end_test "SKIP fdb get tests: cannot add net namespace $testns"
return $ksft_skip
fi
-
- $IP link add "$vxlandev" type vxlan id 10 local $localip \
- dstport 4789 2>/dev/null
- check_err $?
- $IP link add name "$brdev" type bridge &>/dev/null
- check_err $?
- $IP link set dev "$vxlandev" master "$brdev" &>/dev/null
- check_err $?
- $BRIDGE fdb add $test_mac dev "$vxlandev" master &>/dev/null
- check_err $?
- $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self &>/dev/null
- check_err $?
-
- $BRIDGE fdb get $test_mac brport "$vxlandev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
- check_err $?
- $BRIDGE fdb get $test_mac br "$brdev" 2>/dev/null | grep -q "dev $vxlandev master $brdev"
- check_err $?
- $BRIDGE fdb get $test_mac dev "$vxlandev" self 2>/dev/null | grep -q "dev $vxlandev dst $dstip"
- check_err $?
-
- ip netns del testns &>/dev/null
+ IP="ip -netns $testns"
+ BRIDGE="bridge -netns $testns"
+ run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \
+ dstport 4789
+ run_cmd $IP link add name "$brdev" type bridge
+ run_cmd $IP link set dev "$vxlandev" master "$brdev"
+ run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" master
+ run_cmd $BRIDGE fdb add $test_mac dev "$vxlandev" dst $dstip self
+ run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac brport "$vxlandev"
+ run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev"
+ run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self
+
+ ip netns del $testns &>/dev/null
if [ $ret -ne 0 ]; then
- echo "FAIL: bridge fdb get"
+ end_test "FAIL: bridge fdb get"
return 1
fi
- echo "PASS: bridge fdb get"
+ end_test "PASS: bridge fdb get"
}
kci_test_neigh_get()
@@ -1125,100 +1070,263 @@ kci_test_neigh_get()
dstmac=de:ad:be:ef:13:37
dstip=10.0.2.4
dstip6=dead::2
- ret=0
+ local ret=0
- ip neigh help 2>&1 |grep -q 'ip neigh get'
+ run_cmd_grep 'ip neigh get' ip neigh help
if [ $? -ne 0 ];then
- echo "SKIP: fdb get tests: iproute2 too old"
+ end_test "SKIP: fdb get tests: iproute2 too old"
return $ksft_skip
fi
# ipv4
- ip neigh add $dstip lladdr $dstmac dev "$devdummy" > /dev/null
+ run_cmd ip neigh add $dstip lladdr $dstmac dev "$devdummy"
+ run_cmd_grep "$dstmac" ip neigh get $dstip dev "$devdummy"
+ run_cmd ip neigh del $dstip lladdr $dstmac dev "$devdummy"
+
+ # ipv4 proxy
+ run_cmd ip neigh add proxy $dstip dev "$devdummy"
+ run_cmd_grep "$dstip" ip neigh get proxy $dstip dev "$devdummy"
+ run_cmd ip neigh del proxy $dstip dev "$devdummy"
+
+ # ipv6
+ run_cmd ip neigh add $dstip6 lladdr $dstmac dev "$devdummy"
+ run_cmd_grep "$dstmac" ip neigh get $dstip6 dev "$devdummy"
+ run_cmd ip neigh del $dstip6 lladdr $dstmac dev "$devdummy"
+
+ # ipv6 proxy
+ run_cmd ip neigh add proxy $dstip6 dev "$devdummy"
+ run_cmd_grep "$dstip6" ip neigh get proxy $dstip6 dev "$devdummy"
+ run_cmd ip neigh del proxy $dstip6 dev "$devdummy"
+
+ if [ $ret -ne 0 ];then
+ end_test "FAIL: neigh get"
+ return 1
+ fi
+
+ end_test "PASS: neigh get"
+}
+
+kci_test_bridge_parent_id()
+{
+ local ret=0
+ sysfsnet=/sys/bus/netdevsim/devices/netdevsim
+ probed=false
+
+ if [ ! -w /sys/bus/netdevsim/new_device ] ; then
+ run_cmd modprobe -q netdevsim
+ if [ $ret -ne 0 ]; then
+ end_test "SKIP: bridge_parent_id can't load netdevsim"
+ return $ksft_skip
+ fi
+ probed=true
+ fi
+
+ echo "10 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}10 ] ; do :; done
+ echo "20 1" > /sys/bus/netdevsim/new_device
+ while [ ! -d ${sysfsnet}20 ] ; do :; done
+ udevadm settle
+ dev10=`ls ${sysfsnet}10/net/`
+ dev20=`ls ${sysfsnet}20/net/`
+ run_cmd ip link add name test-bond0 type bond mode 802.3ad
+ run_cmd ip link set dev $dev10 master test-bond0
+ run_cmd ip link set dev $dev20 master test-bond0
+ run_cmd ip link add name test-br0 type bridge
+ run_cmd ip link set dev test-bond0 master test-br0
+
+ # clean up any leftovers
+ ip link del dev test-br0
+ ip link del dev test-bond0
+ echo 20 > /sys/bus/netdevsim/del_device
+ echo 10 > /sys/bus/netdevsim/del_device
+ $probed && rmmod netdevsim
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: bridge_parent_id"
+ return 1
+ fi
+ end_test "PASS: bridge_parent_id"
+}
+
+address_get_proto()
+{
+ local addr=$1; shift
+
+ ip -N -j address show dev "$devdummy" |
+ jq -e -r --arg addr "${addr%/*}" \
+ '.[].addr_info[] | select(.local == $addr) | .protocol'
+}
+
+address_count()
+{
+ ip -N -j address show dev "$devdummy" "$@" |
+ jq -e -r '[.[].addr_info[] | .local | select(. != null)] | length'
+}
+
+do_test_address_proto()
+{
+ local what=$1; shift
+ local addr=$1; shift
+ local addr2=${addr%/*}2/${addr#*/}
+ local addr3=${addr%/*}3/${addr#*/}
+ local proto
+ local count
+ local ret=0
+ local err
+
+ ip address add dev "$devdummy" "$addr3"
+ check_err $?
+ proto=$(address_get_proto "$addr3")
+ [[ "$proto" == null ]]
check_err $?
- ip neigh get $dstip dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
+
+ ip address add dev "$devdummy" "$addr2" proto 0x99
check_err $?
- ip neigh del $dstip lladdr $dstmac dev "$devdummy" > /dev/null
+ proto=$(address_get_proto "$addr2")
+ [[ "$proto" == 0x99 ]]
check_err $?
- # ipv4 proxy
- ip neigh add proxy $dstip dev "$devdummy" > /dev/null
+ ip address add dev "$devdummy" "$addr" proto 0xab
check_err $?
- ip neigh get proxy $dstip dev "$devdummy" 2>/dev/null | grep -q "$dstip"
+ proto=$(address_get_proto "$addr")
+ [[ "$proto" == 0xab ]]
check_err $?
- ip neigh del proxy $dstip dev "$devdummy" > /dev/null
+
+ ip address replace dev "$devdummy" "$addr" proto 0x11
+ proto=$(address_get_proto "$addr")
+ check_err $?
+ [[ "$proto" == 0x11 ]]
check_err $?
- # ipv6
- ip neigh add $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null
+ count=$(address_count)
check_err $?
- ip neigh get $dstip6 dev "$devdummy" 2> /dev/null | grep -q "$dstmac"
+ (( count >= 3 )) # $addr, $addr2 and $addr3 plus any kernel addresses
check_err $?
- ip neigh del $dstip6 lladdr $dstmac dev "$devdummy" > /dev/null
+
+ count=$(address_count proto 0)
+ check_err $?
+ (( count == 1 )) # just $addr3
check_err $?
- # ipv6 proxy
- ip neigh add proxy $dstip6 dev "$devdummy" > /dev/null
+ count=$(address_count proto 0x11)
check_err $?
- ip neigh get proxy $dstip6 dev "$devdummy" 2>/dev/null | grep -q "$dstip6"
+ (( count == 2 )) # $addr and $addr3
check_err $?
- ip neigh del proxy $dstip6 dev "$devdummy" > /dev/null
+
+ count=$(address_count proto 0xab)
+ check_err $?
+ (( count == 1 )) # just $addr3
check_err $?
- if [ $ret -ne 0 ];then
- echo "FAIL: neigh get"
+ ip address del dev "$devdummy" "$addr"
+ ip address del dev "$devdummy" "$addr2"
+ ip address del dev "$devdummy" "$addr3"
+
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: address proto $what"
return 1
fi
+ end_test "PASS: address proto $what"
+}
+
+kci_test_address_proto()
+{
+ local ret=0
+
+ do_test_address_proto IPv4 192.0.2.1/28
+ check_err $?
- echo "PASS: neigh get"
+ do_test_address_proto IPv6 2001:db8:1::1/64
+ check_err $?
+
+ return $ret
+}
+
+kci_test_enslave_bonding()
+{
+ local bond="bond123"
+ local ret=0
+
+ setup_ns testns
+ if [ $? -ne 0 ]; then
+ end_test "SKIP bonding tests: cannot add net namespace $testns"
+ return $ksft_skip
+ fi
+
+ run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr
+ run_cmd ip -netns $testns link add dev $devdummy type dummy
+ run_cmd ip -netns $testns link set dev $devdummy up
+ run_cmd ip -netns $testns link set dev $devdummy master $bond down
+ if [ $ret -ne 0 ]; then
+ end_test "FAIL: initially up interface added to a bond and set down"
+ ip netns del "$testns"
+ return 1
+ fi
+
+ end_test "PASS: enslave interface in a bond"
+ ip netns del "$testns"
}
kci_test_rtnl()
{
+ local current_test
+ local ret=0
+
kci_add_dummy
if [ $ret -ne 0 ];then
- echo "FAIL: cannot add dummy interface"
+ end_test "FAIL: cannot add dummy interface"
return 1
fi
- kci_test_polrouting
- kci_test_route_get
- kci_test_addrlft
- kci_test_promote_secondaries
- kci_test_tc
- kci_test_gre
- kci_test_gretap
- kci_test_ip6gretap
- kci_test_erspan
- kci_test_ip6erspan
- kci_test_bridge
- kci_test_addrlabel
- kci_test_ifalias
- kci_test_vrf
- kci_test_encap
- kci_test_macsec
- kci_test_ipsec
- kci_test_ipsec_offload
- kci_test_fdb_get
- kci_test_neigh_get
+ for current_test in ${TESTS:-$ALL_TESTS}; do
+ $current_test
+ check_err $?
+ done
kci_del_dummy
+ return $ret
+}
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $(echo $ALL_TESTS))
+ -v Verbose mode (show commands and output)
+ -P Pause after every test
+ -p Pause after every failing test before cleanup (for debugging)
+EOF
}
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
- echo "SKIP: Need root privileges"
+ end_test "SKIP: Need root privileges"
exit $ksft_skip
fi
for x in ip tc;do
$x -Version 2>/dev/null >/dev/null
if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without the $x tool"
+ end_test "SKIP: Could not run test without the $x tool"
exit $ksft_skip
fi
done
+while getopts t:hvpP o; do
+ case $o in
+ t) TESTS=$OPTARG;;
+ v) VERBOSE=1;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+[ $PAUSE = "yes" ] && PAUSE_ON_FAIL="no"
+
kci_test_rtnl
-exit $ret
+exit $?
diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests
index 8b42e8b04e0f..a59cb6a3c4f5 100755
--- a/tools/testing/selftests/net/run_afpackettests
+++ b/tools/testing/selftests/net/run_afpackettests
@@ -1,9 +1,12 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
if [ $(id -u) != 0 ]; then
echo $msg must be run as root >&2
- exit 0
+ exit $ksft_skip
fi
ret=0
diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c
index 422e7761254d..9eb42570294d 100644
--- a/tools/testing/selftests/net/rxtimestamp.c
+++ b/tools/testing/selftests/net/rxtimestamp.c
@@ -18,7 +18,7 @@
#include <linux/net_tstamp.h>
#include <linux/errqueue.h>
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#include "../kselftest.h"
struct options {
int so_timestamp;
@@ -44,6 +44,7 @@ struct test_case {
struct options sockopt;
struct tstamps expected;
bool enabled;
+ bool warn_on_fail;
};
struct sof_flag {
@@ -67,44 +68,44 @@ static struct socket_type socket_types[] = {
static struct test_case test_cases[] = {
{ {}, {} },
{
- { so_timestamp: 1 },
- { tstamp: true }
+ { .so_timestamp = 1 },
+ { .tstamp = true }
},
{
- { so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamp: 1, so_timestampns: 1 },
- { tstampns: true }
+ { .so_timestamp = 1, .so_timestampns = 1 },
+ { .tstampns = true }
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE },
{}
},
{
/* Loopback device does not support hw timestamps. */
- { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE },
+ { .so_timestamping = SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE },
- {}
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE },
+ .warn_on_fail = true
},
{
- { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_RX_SOFTWARE
| SOF_TIMESTAMPING_RX_HARDWARE },
{}
},
{
- { so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { swtstamp: true }
+ { .swtstamp = true }
},
{
- { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE
+ { .so_timestamp = 1, .so_timestamping = SOF_TIMESTAMPING_SOFTWARE
| SOF_TIMESTAMPING_RX_SOFTWARE },
- { tstamp: true, swtstamp: true }
+ { .tstamp = true, .swtstamp = true }
},
};
@@ -115,6 +116,9 @@ static struct option long_options[] = {
{ "tcp", no_argument, 0, 't' },
{ "udp", no_argument, 0, 'u' },
{ "ip", no_argument, 0, 'i' },
+ { "strict", no_argument, 0, 'S' },
+ { "ipv4", no_argument, 0, '4' },
+ { "ipv6", no_argument, 0, '6' },
{ NULL, 0, NULL, 0 },
};
@@ -270,37 +274,55 @@ void config_so_flags(int rcv, struct options o)
error(1, errno, "Failed to set SO_TIMESTAMPING");
}
-bool run_test_case(struct socket_type s, struct test_case t)
+bool run_test_case(struct socket_type *s, int test_num, char ip_version,
+ bool strict)
{
- int port = (s.type == SOCK_RAW) ? 0 : next_port++;
+ union {
+ struct sockaddr_in6 addr6;
+ struct sockaddr_in addr4;
+ struct sockaddr addr_un;
+ } addr;
int read_size = op_size;
- struct sockaddr_in addr;
+ int src, dst, rcv, port;
+ socklen_t addr_size;
bool failed = false;
- int src, dst, rcv;
- src = socket(AF_INET, s.type, s.protocol);
+ port = (s->type == SOCK_RAW) ? 0 : next_port++;
+ memset(&addr, 0, sizeof(addr));
+ if (ip_version == '4') {
+ addr.addr4.sin_family = AF_INET;
+ addr.addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ addr.addr4.sin_port = htons(port);
+ addr_size = sizeof(addr.addr4);
+ if (s->type == SOCK_RAW)
+ read_size += 20; /* for IPv4 header */
+ } else {
+ addr.addr6.sin6_family = AF_INET6;
+ addr.addr6.sin6_addr = in6addr_loopback;
+ addr.addr6.sin6_port = htons(port);
+ addr_size = sizeof(addr.addr6);
+ }
+ printf("Starting testcase %d over ipv%c...\n", test_num, ip_version);
+ src = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (src < 0)
error(1, errno, "Failed to open src socket");
- dst = socket(AF_INET, s.type, s.protocol);
+ dst = socket(addr.addr_un.sa_family, s->type,
+ s->protocol);
if (dst < 0)
error(1, errno, "Failed to open dst socket");
- memset(&addr, 0, sizeof(addr));
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
- addr.sin_port = htons(port);
-
- if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (bind(dst, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to bind to port %d", port);
- if (s.type == SOCK_STREAM && (listen(dst, 1) < 0))
+ if (s->type == SOCK_STREAM && (listen(dst, 1) < 0))
error(1, errno, "Failed to listen");
- if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ if (connect(src, &addr.addr_un, addr_size) < 0)
error(1, errno, "Failed to connect");
- if (s.type == SOCK_STREAM) {
+ if (s->type == SOCK_STREAM) {
rcv = accept(dst, NULL, NULL);
if (rcv < 0)
error(1, errno, "Failed to accept");
@@ -309,17 +331,22 @@ bool run_test_case(struct socket_type s, struct test_case t)
rcv = dst;
}
- config_so_flags(rcv, t.sockopt);
+ config_so_flags(rcv, test_cases[test_num].sockopt);
usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */
do_send(src);
- if (s.type == SOCK_RAW)
- read_size += 20; /* for IP header */
- failed = do_recv(rcv, read_size, t.expected);
+ failed = do_recv(rcv, read_size, test_cases[test_num].expected);
close(rcv);
close(src);
+ if (failed) {
+ printf("FAILURE in testcase %d over ipv%c ", test_num,
+ ip_version);
+ print_test_case(&test_cases[test_num]);
+ if (!strict && test_cases[test_num].warn_on_fail)
+ failed = false;
+ }
return failed;
}
@@ -327,10 +354,12 @@ int main(int argc, char **argv)
{
bool all_protocols = true;
bool all_tests = true;
+ bool cfg_ipv4 = false;
+ bool cfg_ipv6 = false;
+ bool strict = false;
int arg_index = 0;
int failures = 0;
- int s, t;
- char opt;
+ int s, t, opt;
while ((opt = getopt_long(argc, argv, "", long_options,
&arg_index)) != -1) {
@@ -363,6 +392,15 @@ int main(int argc, char **argv)
all_protocols = false;
socket_types[0].enabled = true;
break;
+ case 'S':
+ strict = true;
+ break;
+ case '4':
+ cfg_ipv4 = true;
+ break;
+ case '6':
+ cfg_ipv6 = true;
+ break;
default:
error(1, 0, "Failed to parse parameters.");
}
@@ -376,13 +414,14 @@ int main(int argc, char **argv)
for (t = 0; t < ARRAY_SIZE(test_cases); t++) {
if (!all_tests && !test_cases[t].enabled)
continue;
-
- printf("Starting testcase %d...\n", t);
- if (run_test_case(socket_types[s], test_cases[t])) {
- failures++;
- printf("FAILURE in test case ");
- print_test_case(&test_cases[t]);
- }
+ if (cfg_ipv4 || !cfg_ipv6)
+ if (run_test_case(&socket_types[s], t, '4',
+ strict))
+ failures++;
+ if (cfg_ipv6 || !cfg_ipv4)
+ if (run_test_case(&socket_types[s], t, '6',
+ strict))
+ failures++;
}
}
if (!failures)
diff --git a/tools/testing/selftests/net/rxtimestamp.sh b/tools/testing/selftests/net/rxtimestamp.sh
new file mode 100755
index 000000000000..91631e88bf46
--- /dev/null
+++ b/tools/testing/selftests/net/rxtimestamp.sh
@@ -0,0 +1,4 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+./in_netns.sh ./rxtimestamp $@
diff --git a/tools/testing/selftests/net/sctp_hello.c b/tools/testing/selftests/net/sctp_hello.c
new file mode 100644
index 000000000000..f02f1f95d227
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_hello.c
@@ -0,0 +1,137 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+
+static void set_addr(struct sockaddr_storage *ss, char *ip, char *port, int *len)
+{
+ if (ss->ss_family == AF_INET) {
+ struct sockaddr_in *a = (struct sockaddr_in *)ss;
+
+ a->sin_addr.s_addr = inet_addr(ip);
+ a->sin_port = htons(atoi(port));
+ *len = sizeof(*a);
+ } else {
+ struct sockaddr_in6 *a = (struct sockaddr_in6 *)ss;
+
+ a->sin6_family = AF_INET6;
+ inet_pton(AF_INET6, ip, &a->sin6_addr);
+ a->sin6_port = htons(atoi(port));
+ *len = sizeof(*a);
+ }
+}
+
+static int do_client(int argc, char *argv[])
+{
+ struct sockaddr_storage ss;
+ char buf[] = "hello";
+ int csk, ret, len;
+
+ if (argc < 5) {
+ printf("%s client -4|6 IP PORT [IP PORT]\n", argv[0]);
+ return -1;
+ }
+
+ bzero((void *)&ss, sizeof(ss));
+ ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+ csk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+ if (csk < 0) {
+ printf("failed to create socket\n");
+ return -1;
+ }
+
+ if (argc >= 7) {
+ set_addr(&ss, argv[5], argv[6], &len);
+ ret = bind(csk, (struct sockaddr *)&ss, len);
+ if (ret < 0) {
+ printf("failed to bind to address\n");
+ return -1;
+ }
+ }
+
+ set_addr(&ss, argv[3], argv[4], &len);
+ ret = connect(csk, (struct sockaddr *)&ss, len);
+ if (ret < 0) {
+ printf("failed to connect to peer\n");
+ return -1;
+ }
+
+ ret = send(csk, buf, strlen(buf) + 1, 0);
+ if (ret < 0) {
+ printf("failed to send msg %d\n", ret);
+ return -1;
+ }
+ close(csk);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_storage ss;
+ int lsk, csk, ret, len;
+ char buf[20];
+
+ if (argc < 2 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+ printf("%s server|client ...\n", argv[0]);
+ return -1;
+ }
+
+ if (!strcmp(argv[1], "client"))
+ return do_client(argc, argv);
+
+ if (argc < 5) {
+ printf("%s server -4|6 IP PORT [IFACE]\n", argv[0]);
+ return -1;
+ }
+
+ ss.ss_family = !strcmp(argv[2], "-4") ? AF_INET : AF_INET6;
+ lsk = socket(ss.ss_family, SOCK_STREAM, IPPROTO_SCTP);
+ if (lsk < 0) {
+ printf("failed to create lsk\n");
+ return -1;
+ }
+
+ if (argc >= 6) {
+ ret = setsockopt(lsk, SOL_SOCKET, SO_BINDTODEVICE,
+ argv[5], strlen(argv[5]) + 1);
+ if (ret < 0) {
+ printf("failed to bind to device\n");
+ return -1;
+ }
+ }
+
+ set_addr(&ss, argv[3], argv[4], &len);
+ ret = bind(lsk, (struct sockaddr *)&ss, len);
+ if (ret < 0) {
+ printf("failed to bind to address\n");
+ return -1;
+ }
+
+ ret = listen(lsk, 5);
+ if (ret < 0) {
+ printf("failed to listen on port\n");
+ return -1;
+ }
+
+ csk = accept(lsk, (struct sockaddr *)NULL, (socklen_t *)NULL);
+ if (csk < 0) {
+ printf("failed to accept new client\n");
+ return -1;
+ }
+
+ ret = recv(csk, buf, sizeof(buf), 0);
+ if (ret <= 0) {
+ printf("failed to recv msg %d\n", ret);
+ return -1;
+ }
+ close(csk);
+ close(lsk);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh
new file mode 100755
index 000000000000..c854034b6aa1
--- /dev/null
+++ b/tools/testing/selftests/net/sctp_vrf.sh
@@ -0,0 +1,172 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP VRF.
+# TOPO: CLIENT_NS1 (veth1) <---> (veth1) -> vrf_s1
+# SERVER_NS
+# CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2
+
+source lib.sh
+CLIENT_IP4="10.0.0.1"
+CLIENT_IP6="2000::1"
+CLIENT_PORT=1234
+
+SERVER_IP4="10.0.0.2"
+SERVER_IP6="2000::2"
+SERVER_PORT=1234
+
+setup() {
+ modprobe sctp
+ modprobe sctp_diag
+ setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS
+
+ ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+ ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+ ip net exec $SERVER_NS sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null
+
+ ip -n $SERVER_NS link add veth1 type veth peer name veth1 netns $CLIENT_NS1
+ ip -n $SERVER_NS link add veth2 type veth peer name veth1 netns $CLIENT_NS2
+
+ ip -n $CLIENT_NS1 link set veth1 up
+ ip -n $CLIENT_NS1 addr add $CLIENT_IP4/24 dev veth1
+ ip -n $CLIENT_NS1 addr add $CLIENT_IP6/24 dev veth1
+
+ ip -n $CLIENT_NS2 link set veth1 up
+ ip -n $CLIENT_NS2 addr add $CLIENT_IP4/24 dev veth1
+ ip -n $CLIENT_NS2 addr add $CLIENT_IP6/24 dev veth1
+
+ ip -n $SERVER_NS link add dummy1 type dummy
+ ip -n $SERVER_NS link set dummy1 up
+ ip -n $SERVER_NS link add vrf-1 type vrf table 10
+ ip -n $SERVER_NS link add vrf-2 type vrf table 20
+ ip -n $SERVER_NS link set vrf-1 up
+ ip -n $SERVER_NS link set vrf-2 up
+ ip -n $SERVER_NS link set veth1 master vrf-1
+ ip -n $SERVER_NS link set veth2 master vrf-2
+
+ ip -n $SERVER_NS addr add $SERVER_IP4/24 dev dummy1
+ ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth1
+ ip -n $SERVER_NS addr add $SERVER_IP4/24 dev veth2
+ ip -n $SERVER_NS addr add $SERVER_IP6/24 dev dummy1
+ ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth1
+ ip -n $SERVER_NS addr add $SERVER_IP6/24 dev veth2
+
+ ip -n $SERVER_NS link set veth1 up
+ ip -n $SERVER_NS link set veth2 up
+ ip -n $SERVER_NS route add table 10 $CLIENT_IP4 dev veth1 src $SERVER_IP4
+ ip -n $SERVER_NS route add table 20 $CLIENT_IP4 dev veth2 src $SERVER_IP4
+ ip -n $SERVER_NS route add $CLIENT_IP4 dev veth1 src $SERVER_IP4
+ ip -n $SERVER_NS route add table 10 $CLIENT_IP6 dev veth1 src $SERVER_IP6
+ ip -n $SERVER_NS route add table 20 $CLIENT_IP6 dev veth2 src $SERVER_IP6
+ ip -n $SERVER_NS route add $CLIENT_IP6 dev veth1 src $SERVER_IP6
+}
+
+cleanup() {
+ ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS
+}
+
+wait_server() {
+ local IFACE=$1
+ local CNT=0
+
+ until ip netns exec $SERVER_NS ss -lS src $SERVER_IP:$SERVER_PORT | \
+ grep LISTEN | grep "$IFACE" 2>&1 >/dev/null; do
+ [ $((CNT++)) = "20" ] && { RET=3; return $RET; }
+ sleep 0.1
+ done
+}
+
+do_test() {
+ local CLIENT_NS=$1
+ local IFACE=$2
+
+ ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+ $SERVER_PORT $IFACE 2>&1 >/dev/null &
+ disown
+ wait_server $IFACE || return $RET
+ timeout 3 ip netns exec $CLIENT_NS ./sctp_hello client $AF \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ RET=$?
+ return $RET
+}
+
+do_testx() {
+ local IFACE1=$1
+ local IFACE2=$2
+
+ ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+ $SERVER_PORT $IFACE1 2>&1 >/dev/null &
+ disown
+ wait_server $IFACE1 || return $RET
+ ip netns exec $SERVER_NS ./sctp_hello server $AF $SERVER_IP \
+ $SERVER_PORT $IFACE2 2>&1 >/dev/null &
+ disown
+ wait_server $IFACE2 || return $RET
+ timeout 3 ip netns exec $CLIENT_NS1 ./sctp_hello client $AF \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null && \
+ timeout 3 ip netns exec $CLIENT_NS2 ./sctp_hello client $AF \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT 2>&1 >/dev/null
+ RET=$?
+ return $RET
+}
+
+testup() {
+ ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=1 2>&1 >/dev/null
+ echo -n "TEST 01: nobind, connect from client 1, l3mdev_accept=1, Y "
+ do_test $CLIENT_NS1 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 02: nobind, connect from client 2, l3mdev_accept=1, N "
+ do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ ip netns exec $SERVER_NS sysctl -w net.sctp.l3mdev_accept=0 2>&1 >/dev/null
+ echo -n "TEST 03: nobind, connect from client 1, l3mdev_accept=0, N "
+ do_test $CLIENT_NS1 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 04: nobind, connect from client 2, l3mdev_accept=0, N "
+ do_test $CLIENT_NS2 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 05: bind veth2 in server, connect from client 1, N "
+ do_test $CLIENT_NS1 veth2 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 06: bind veth1 in server, connect from client 1, Y "
+ do_test $CLIENT_NS1 veth1 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 07: bind vrf-1 in server, connect from client 1, Y "
+ do_test $CLIENT_NS1 vrf-1 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 08: bind vrf-2 in server, connect from client 1, N "
+ do_test $CLIENT_NS1 vrf-2 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 09: bind vrf-2 in server, connect from client 2, Y "
+ do_test $CLIENT_NS2 vrf-2 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 10: bind vrf-1 in server, connect from client 2, N "
+ do_test $CLIENT_NS2 vrf-1 && { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 11: bind vrf-1 & 2 in server, connect from client 1 & 2, Y "
+ do_testx vrf-1 vrf-2 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+
+ echo -n "TEST 12: bind vrf-2 & 1 in server, connect from client 1 & 2, N "
+ do_testx vrf-2 vrf-1 || { echo "[FAIL]"; return $RET; }
+ echo "[PASS]"
+}
+
+trap cleanup EXIT
+setup && echo "Testing For SCTP VRF:" && \
+CLIENT_IP=$CLIENT_IP4 SERVER_IP=$SERVER_IP4 AF="-4" testup && echo "***v4 Tests Done***" &&
+CLIENT_IP=$CLIENT_IP6 SERVER_IP=$SERVER_IP6 AF="-6" testup && echo "***v6 Tests Done***"
+exit $?
diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings
new file mode 100644
index 000000000000..ed8418e8217a
--- /dev/null
+++ b/tools/testing/selftests/net/settings
@@ -0,0 +1 @@
+timeout=3600
diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh
new file mode 100644
index 000000000000..2070b57849de
--- /dev/null
+++ b/tools/testing/selftests/net/setup_loopback.sh
@@ -0,0 +1,120 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout"
+readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs"
+readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})"
+readonly HARD_IRQS="$(< ${IRQ_PATH})"
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
+
+netdev_check_for_carrier() {
+ local -r dev="$1"
+
+ for i in {1..5}; do
+ carrier="$(cat /sys/class/net/${dev}/carrier)"
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "carrier not ready yet..." >&2
+ sleep 1
+ else
+ echo "carrier ready" >&2
+ break
+ fi
+ done
+ echo "${carrier}"
+}
+
+# Assumes that there is no existing ipvlan device on the physical device
+setup_loopback_environment() {
+ local dev="$1"
+
+ # Fail hard if cannot turn on loopback mode for current NIC
+ ethtool -K "${dev}" loopback on || exit 1
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_macvlan_ns(){
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+ local -r addr="$5"
+
+ ip link add link "${link_dev}" dev "${ns_dev}" \
+ address "${ns_mac}" type macvlan
+ exit_code=$?
+ if [[ "${exit_code}" -ne 0 ]]; then
+ echo "setup_macvlan_ns failed"
+ exit $exit_code
+ fi
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ ip link set dev "${ns_dev}" netns "${ns_name}"
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+ if [[ -n "${addr}" ]]; then
+ ip -netns "${ns_name}" addr add dev "${ns_dev}" "${addr}"
+ fi
+
+ sleep 1
+}
+
+cleanup_macvlan_ns(){
+ while (( $# >= 2 )); do
+ ns_name="$1"
+ ns_dev="$2"
+ ip -netns "${ns_name}" link del dev "${ns_dev}"
+ ip netns del "${ns_name}"
+ shift 2
+ done
+}
+
+cleanup_loopback(){
+ local -r dev="$1"
+
+ ethtool -K "${dev}" loopback off
+ sleep 1
+
+ # Check for the carrier
+ carrier=$(netdev_check_for_carrier ${dev})
+ if [[ "${carrier}" -ne 1 ]] ; then
+ echo "setup_loopback_environment failed"
+ exit 1
+ fi
+}
+
+setup_interrupt() {
+ # Use timer on host to trigger the network stack
+ # Also disable device interrupt to not depend on NIC interrupt
+ # Reduce test flakiness caused by unexpected interrupts
+ echo 100000 >"${FLUSH_PATH}"
+ echo 50 >"${IRQ_PATH}"
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+ setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ cleanup_macvlan_ns ${server_ns} server ${client_ns} client
+}
+
+setup() {
+ setup_loopback_environment "${dev}"
+ setup_interrupt
+}
+
+cleanup() {
+ cleanup_loopback "${dev}"
+
+ echo "${FLUSH_TIMEOUT}" >"${FLUSH_PATH}"
+ echo "${HARD_IRQS}" >"${IRQ_PATH}"
+}
diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh
new file mode 100644
index 000000000000..1f78a87f6f37
--- /dev/null
+++ b/tools/testing/selftests/net/setup_veth.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+readonly server_ns=$(mktemp -u server-XXXXXXXX)
+readonly client_ns=$(mktemp -u client-XXXXXXXX)
+
+setup_veth_ns() {
+ local -r link_dev="$1"
+ local -r ns_name="$2"
+ local -r ns_dev="$3"
+ local -r ns_mac="$4"
+
+ [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}"
+ echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout"
+ ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535
+ ip -netns "${ns_name}" link set dev "${ns_dev}" up
+
+ ip netns exec "${ns_name}" ethtool -K "${ns_dev}" gro on tso off
+}
+
+setup_ns() {
+ # Set up server_ns namespace and client_ns namespace
+ ip link add name server type veth peer name client
+
+ setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}"
+ setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}"
+}
+
+cleanup_ns() {
+ local ns_name
+
+ for ns_name in ${client_ns} ${server_ns}; do
+ [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}"
+ done
+}
+
+setup() {
+ # no global init setup step needed
+ :
+}
+
+cleanup() {
+ cleanup_ns
+}
diff --git a/tools/testing/selftests/net/sk_bind_sendto_listen.c b/tools/testing/selftests/net/sk_bind_sendto_listen.c
new file mode 100644
index 000000000000..b420d830f72c
--- /dev/null
+++ b/tools/testing/selftests/net/sk_bind_sendto_listen.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd1, fd2, one = 1;
+ struct sockaddr_in6 bind_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(20000),
+ .sin6_flowinfo = htonl(0),
+ .sin6_addr = {},
+ .sin6_scope_id = 0,
+ };
+
+ inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+ fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd1 < 0) {
+ error(1, errno, "socket fd1");
+ return -1;
+ }
+
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+ goto out_err1;
+ }
+
+ if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd1");
+ goto out_err1;
+ }
+
+ if (sendto(fd1, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+ sizeof(bind_addr))) {
+ error(1, errno, "sendto fd1");
+ goto out_err1;
+ }
+
+ fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd2 < 0) {
+ error(1, errno, "socket fd2");
+ goto out_err1;
+ }
+
+ if (setsockopt(fd2, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd2");
+ goto out_err2;
+ }
+
+ if (bind(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd2");
+ goto out_err2;
+ }
+
+ if (sendto(fd2, NULL, 0, MSG_FASTOPEN, (struct sockaddr *)&bind_addr,
+ sizeof(bind_addr)) != -1) {
+ error(1, errno, "sendto fd2");
+ goto out_err2;
+ }
+
+ if (listen(fd2, 0)) {
+ error(1, errno, "listen");
+ goto out_err2;
+ }
+
+ close(fd2);
+ close(fd1);
+ return 0;
+
+out_err2:
+ close(fd2);
+
+out_err1:
+ close(fd1);
+ return -1;
+}
diff --git a/tools/testing/selftests/net/sk_connect_zero_addr.c b/tools/testing/selftests/net/sk_connect_zero_addr.c
new file mode 100644
index 000000000000..4be418aefd9f
--- /dev/null
+++ b/tools/testing/selftests/net/sk_connect_zero_addr.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <unistd.h>
+
+int main(void)
+{
+ int fd1, fd2, one = 1;
+ struct sockaddr_in6 bind_addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = htons(20000),
+ .sin6_flowinfo = htonl(0),
+ .sin6_addr = {},
+ .sin6_scope_id = 0,
+ };
+
+ inet_pton(AF_INET6, "::", &bind_addr.sin6_addr);
+
+ fd1 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd1 < 0) {
+ error(1, errno, "socket fd1");
+ return -1;
+ }
+
+ if (setsockopt(fd1, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one))) {
+ error(1, errno, "setsockopt(SO_REUSEADDR) fd1");
+ goto out_err1;
+ }
+
+ if (bind(fd1, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd1");
+ goto out_err1;
+ }
+
+ if (listen(fd1, 0)) {
+ error(1, errno, "listen");
+ goto out_err1;
+ }
+
+ fd2 = socket(AF_INET6, SOCK_STREAM, IPPROTO_IP);
+ if (fd2 < 0) {
+ error(1, errno, "socket fd2");
+ goto out_err1;
+ }
+
+ if (connect(fd2, (struct sockaddr *)&bind_addr, sizeof(bind_addr))) {
+ error(1, errno, "bind fd2");
+ goto out_err2;
+ }
+
+ close(fd2);
+ close(fd1);
+ return 0;
+
+out_err2:
+ close(fd2);
+out_err1:
+ close(fd1);
+ return -1;
+}
diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c
new file mode 100644
index 000000000000..e9fa14e10732
--- /dev/null
+++ b/tools/testing/selftests/net/so_incoming_cpu.c
@@ -0,0 +1,274 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright Amazon.com Inc. or its affiliates. */
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <fcntl.h>
+
+#include <netinet/in.h>
+#include <sys/socket.h>
+#include <sys/sysinfo.h>
+
+#include "../kselftest_harness.h"
+
+FIXTURE(so_incoming_cpu)
+{
+ int *servers;
+ union {
+ struct sockaddr addr;
+ struct sockaddr_in in_addr;
+ };
+ socklen_t addrlen;
+};
+
+enum when_to_set {
+ BEFORE_REUSEPORT,
+ BEFORE_LISTEN,
+ AFTER_LISTEN,
+ AFTER_ALL_LISTEN,
+};
+
+FIXTURE_VARIANT(so_incoming_cpu)
+{
+ int when_to_set;
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_reuseport)
+{
+ .when_to_set = BEFORE_REUSEPORT,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, before_listen)
+{
+ .when_to_set = BEFORE_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_listen)
+{
+ .when_to_set = AFTER_LISTEN,
+};
+
+FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen)
+{
+ .when_to_set = AFTER_ALL_LISTEN,
+};
+
+static void write_sysctl(struct __test_metadata *_metadata,
+ char *filename, char *string)
+{
+ int fd, len, ret;
+
+ fd = open(filename, O_WRONLY);
+ ASSERT_NE(fd, -1);
+
+ len = strlen(string);
+ ret = write(fd, string, len);
+ ASSERT_EQ(ret, len);
+}
+
+static void setup_netns(struct __test_metadata *_metadata)
+{
+ ASSERT_EQ(unshare(CLONE_NEWNET), 0);
+ ASSERT_EQ(system("ip link set lo up"), 0);
+
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001");
+ write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0");
+}
+
+#define NR_PORT (60001 - 10000 - 1)
+#define NR_CLIENT_PER_SERVER_DEFAULT 32
+static int nr_client_per_server, nr_server, nr_client;
+
+FIXTURE_SETUP(so_incoming_cpu)
+{
+ setup_netns(_metadata);
+
+ nr_server = get_nprocs();
+ ASSERT_LE(2, nr_server);
+
+ if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT)
+ nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT;
+ else
+ nr_client_per_server = NR_PORT / nr_server;
+
+ nr_client = nr_client_per_server * nr_server;
+
+ self->servers = malloc(sizeof(int) * nr_server);
+ ASSERT_NE(self->servers, NULL);
+
+ self->in_addr.sin_family = AF_INET;
+ self->in_addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ self->in_addr.sin_port = htons(0);
+ self->addrlen = sizeof(struct sockaddr_in);
+}
+
+FIXTURE_TEARDOWN(so_incoming_cpu)
+{
+ int i;
+
+ for (i = 0; i < nr_server; i++)
+ close(self->servers[i]);
+
+ free(self->servers);
+}
+
+void set_so_incoming_cpu(struct __test_metadata *_metadata, int fd, int cpu)
+{
+ int ret;
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, sizeof(int));
+ ASSERT_EQ(ret, 0);
+}
+
+int create_server(struct __test_metadata *_metadata,
+ FIXTURE_DATA(so_incoming_cpu) *self,
+ const FIXTURE_VARIANT(so_incoming_cpu) *variant,
+ int cpu)
+{
+ int fd, ret;
+
+ fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
+ ASSERT_NE(fd, -1);
+
+ if (variant->when_to_set == BEFORE_REUSEPORT)
+ set_so_incoming_cpu(_metadata, fd, cpu);
+
+ ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &(int){1}, sizeof(int));
+ ASSERT_EQ(ret, 0);
+
+ ret = bind(fd, &self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ if (variant->when_to_set == BEFORE_LISTEN)
+ set_so_incoming_cpu(_metadata, fd, cpu);
+
+ /* We don't use nr_client_per_server here not to block
+ * this test at connect() if SO_INCOMING_CPU is broken.
+ */
+ ret = listen(fd, nr_client);
+ ASSERT_EQ(ret, 0);
+
+ if (variant->when_to_set == AFTER_LISTEN)
+ set_so_incoming_cpu(_metadata, fd, cpu);
+
+ return fd;
+}
+
+void create_servers(struct __test_metadata *_metadata,
+ FIXTURE_DATA(so_incoming_cpu) *self,
+ const FIXTURE_VARIANT(so_incoming_cpu) *variant)
+{
+ int i, ret;
+
+ for (i = 0; i < nr_server; i++) {
+ self->servers[i] = create_server(_metadata, self, variant, i);
+
+ if (i == 0) {
+ ret = getsockname(self->servers[i], &self->addr, &self->addrlen);
+ ASSERT_EQ(ret, 0);
+ }
+ }
+
+ if (variant->when_to_set == AFTER_ALL_LISTEN) {
+ for (i = 0; i < nr_server; i++)
+ set_so_incoming_cpu(_metadata, self->servers[i], i);
+ }
+}
+
+void create_clients(struct __test_metadata *_metadata,
+ FIXTURE_DATA(so_incoming_cpu) *self)
+{
+ cpu_set_t cpu_set;
+ int i, j, fd, ret;
+
+ for (i = 0; i < nr_server; i++) {
+ CPU_ZERO(&cpu_set);
+
+ CPU_SET(i, &cpu_set);
+ ASSERT_EQ(CPU_COUNT(&cpu_set), 1);
+ ASSERT_NE(CPU_ISSET(i, &cpu_set), 0);
+
+ /* Make sure SYN will be processed on the i-th CPU
+ * and finally distributed to the i-th listener.
+ */
+ ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set);
+ ASSERT_EQ(ret, 0);
+
+ for (j = 0; j < nr_client_per_server; j++) {
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ ASSERT_NE(fd, -1);
+
+ ret = connect(fd, &self->addr, self->addrlen);
+ ASSERT_EQ(ret, 0);
+
+ close(fd);
+ }
+ }
+}
+
+void verify_incoming_cpu(struct __test_metadata *_metadata,
+ FIXTURE_DATA(so_incoming_cpu) *self)
+{
+ int i, j, fd, cpu, ret, total = 0;
+ socklen_t len = sizeof(int);
+
+ for (i = 0; i < nr_server; i++) {
+ for (j = 0; j < nr_client_per_server; j++) {
+ /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */
+ fd = accept(self->servers[i], &self->addr, &self->addrlen);
+ ASSERT_NE(fd, -1);
+
+ ret = getsockopt(fd, SOL_SOCKET, SO_INCOMING_CPU, &cpu, &len);
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(cpu, i);
+
+ close(fd);
+ total++;
+ }
+ }
+
+ ASSERT_EQ(total, nr_client);
+ TH_LOG("SO_INCOMING_CPU is very likely to be "
+ "working correctly with %d sockets.", total);
+}
+
+TEST_F(so_incoming_cpu, test1)
+{
+ create_servers(_metadata, self, variant);
+ create_clients(_metadata, self);
+ verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test2)
+{
+ int server;
+
+ create_servers(_metadata, self, variant);
+
+ /* No CPU specified */
+ server = create_server(_metadata, self, variant, -1);
+ close(server);
+
+ create_clients(_metadata, self);
+ verify_incoming_cpu(_metadata, self);
+}
+
+TEST_F(so_incoming_cpu, test3)
+{
+ int server, client;
+
+ create_servers(_metadata, self, variant);
+
+ /* No CPU specified */
+ server = create_server(_metadata, self, variant, -1);
+
+ create_clients(_metadata, self);
+
+ /* Never receive any requests */
+ client = accept(server, &self->addr, &self->addrlen);
+ ASSERT_EQ(client, -1);
+
+ verify_incoming_cpu(_metadata, self);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/so_netns_cookie.c b/tools/testing/selftests/net/so_netns_cookie.c
new file mode 100644
index 000000000000..b39e87e967cd
--- /dev/null
+++ b/tools/testing/selftests/net/so_netns_cookie.c
@@ -0,0 +1,61 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <sys/types.h>
+#include <sys/socket.h>
+
+#ifndef SO_NETNS_COOKIE
+#define SO_NETNS_COOKIE 71
+#endif
+
+#define pr_err(fmt, ...) \
+ ({ \
+ fprintf(stderr, "%s:%d:" fmt ": %m\n", \
+ __func__, __LINE__, ##__VA_ARGS__); \
+ 1; \
+ })
+
+int main(int argc, char *argvp[])
+{
+ uint64_t cookie1, cookie2;
+ socklen_t vallen;
+ int sock1, sock2;
+
+ sock1 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock1 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie1);
+ if (getsockopt(sock1, SOL_SOCKET, SO_NETNS_COOKIE, &cookie1, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie1)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (unshare(CLONE_NEWNET))
+ return pr_err("unshare");
+
+ sock2 = socket(AF_INET, SOCK_STREAM, 0);
+ if (sock2 < 0)
+ return pr_err("Unable to create TCP socket");
+
+ vallen = sizeof(cookie2);
+ if (getsockopt(sock2, SOL_SOCKET, SO_NETNS_COOKIE, &cookie2, &vallen) != 0)
+ return pr_err("getsockopt(SOL_SOCKET, SO_NETNS_COOKIE)");
+
+ if (!cookie2)
+ return pr_err("SO_NETNS_COOKIE returned zero cookie");
+
+ if (cookie1 == cookie2)
+ return pr_err("SO_NETNS_COOKIE returned identical cookies for distinct ns");
+
+ close(sock1);
+ close(sock2);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c
index ceaad78e9667..8457b7ccbc09 100644
--- a/tools/testing/selftests/net/so_txtime.c
+++ b/tools/testing/selftests/net/so_txtime.c
@@ -2,9 +2,12 @@
/*
* Test the SO_TXTIME API
*
- * Takes two streams of { payload, delivery time }[], one input and one output.
- * Sends the input stream and verifies arrival matches the output stream.
- * The two streams can differ due to out-of-order delivery and drops.
+ * Takes a stream of { payload, delivery time }[], to be sent across two
+ * processes. Start this program on two separate network namespaces or
+ * connected hosts, one instance in transmit mode and the other in receive
+ * mode using the '-r' option. Receiver will compare arrival timestamps to
+ * the expected stream. Sender will read transmit timestamps from the error
+ * queue. The streams can differ due to out-of-order delivery and drops.
*/
#define _GNU_SOURCE
@@ -28,14 +31,17 @@
#include <sys/types.h>
#include <time.h>
#include <unistd.h>
+#include <poll.h>
static int cfg_clockid = CLOCK_TAI;
-static bool cfg_do_ipv4;
-static bool cfg_do_ipv6;
static uint16_t cfg_port = 8000;
static int cfg_variance_us = 4000;
+static uint64_t cfg_start_time_ns;
+static int cfg_mark;
+static bool cfg_rx;
static uint64_t glob_tstart;
+static uint64_t tdeliver_max;
/* encode one timed transmission (of a 1B payload) */
struct timed_send {
@@ -44,18 +50,21 @@ struct timed_send {
};
#define MAX_NUM_PKT 8
-static struct timed_send cfg_in[MAX_NUM_PKT];
-static struct timed_send cfg_out[MAX_NUM_PKT];
+static struct timed_send cfg_buf[MAX_NUM_PKT];
static int cfg_num_pkt;
static int cfg_errq_level;
static int cfg_errq_type;
-static uint64_t gettime_ns(void)
+static struct sockaddr_storage cfg_dst_addr;
+static struct sockaddr_storage cfg_src_addr;
+static socklen_t cfg_alen;
+
+static uint64_t gettime_ns(clockid_t clock)
{
struct timespec ts;
- if (clock_gettime(cfg_clockid, &ts))
+ if (clock_gettime(clock, &ts))
error(1, errno, "gettime");
return ts.tv_sec * (1000ULL * 1000 * 1000) + ts.tv_nsec;
@@ -75,6 +84,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_iov = &iov;
msg.msg_iovlen = 1;
+ msg.msg_name = (struct sockaddr *)&cfg_dst_addr;
+ msg.msg_namelen = cfg_alen;
if (ts->delay_us >= 0) {
memset(control, 0, sizeof(control));
@@ -82,6 +93,8 @@ static void do_send_one(int fdt, struct timed_send *ts)
msg.msg_controllen = sizeof(control);
tdeliver = glob_tstart + ts->delay_us * 1000;
+ tdeliver_max = tdeliver_max > tdeliver ?
+ tdeliver_max : tdeliver;
cm = CMSG_FIRSTHDR(&msg);
cm->cmsg_level = SOL_SOCKET;
@@ -98,7 +111,7 @@ static void do_send_one(int fdt, struct timed_send *ts)
}
-static bool do_recv_one(int fdr, struct timed_send *ts)
+static void do_recv_one(int fdr, struct timed_send *ts)
{
int64_t tstop, texpect;
char rbuf[2];
@@ -106,13 +119,13 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
ret = recv(fdr, rbuf, sizeof(rbuf), 0);
if (ret == -1 && errno == EAGAIN)
- return true;
+ error(1, EAGAIN, "recv: timeout");
if (ret == -1)
error(1, errno, "read");
if (ret != 1)
error(1, 0, "read: %dB", ret);
- tstop = (gettime_ns() - glob_tstart) / 1000;
+ tstop = (gettime_ns(cfg_clockid) - glob_tstart) / 1000;
texpect = ts->delay_us >= 0 ? ts->delay_us : 0;
fprintf(stderr, "payload:%c delay:%lld expected:%lld (us)\n",
@@ -121,10 +134,11 @@ static bool do_recv_one(int fdr, struct timed_send *ts)
if (rbuf[0] != ts->data)
error(1, 0, "payload mismatch. expected %c", ts->data);
- if (labs(tstop - texpect) > cfg_variance_us)
- error(1, 0, "exceeds variance (%d us)", cfg_variance_us);
-
- return false;
+ if (llabs(tstop - texpect) > cfg_variance_us) {
+ fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us);
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ exit(1);
+ }
}
static void do_recv_verify_empty(int fdr)
@@ -137,18 +151,18 @@ static void do_recv_verify_empty(int fdr)
error(1, 0, "recv: not empty as expected (%d, %d)", ret, errno);
}
-static void do_recv_errqueue_timeout(int fdt)
+static int do_recv_errqueue_timeout(int fdt)
{
char control[CMSG_SPACE(sizeof(struct sock_extended_err)) +
CMSG_SPACE(sizeof(struct sockaddr_in6))] = {0};
char data[sizeof(struct ethhdr) + sizeof(struct ipv6hdr) +
sizeof(struct udphdr) + 1];
struct sock_extended_err *err;
+ int ret, num_tstamp = 0;
struct msghdr msg = {0};
struct iovec iov = {0};
struct cmsghdr *cm;
int64_t tstamp = 0;
- int ret;
iov.iov_base = data;
iov.iov_len = sizeof(data);
@@ -196,7 +210,7 @@ static void do_recv_errqueue_timeout(int fdt)
default:
error(1, 0, "errqueue: errno %u code %u\n",
err->ee_errno, err->ee_code);
- };
+ }
tstamp = ((int64_t) err->ee_data) << 32 | err->ee_info;
tstamp -= (int64_t) glob_tstart;
@@ -206,9 +220,47 @@ static void do_recv_errqueue_timeout(int fdt)
msg.msg_flags = 0;
msg.msg_controllen = sizeof(control);
+ num_tstamp++;
}
- error(1, 0, "recv: timeout");
+ return num_tstamp;
+}
+
+static void recv_errqueue_msgs(int fdt)
+{
+ struct pollfd pfd = { .fd = fdt, .events = POLLERR };
+ const int timeout_ms = 10;
+ int ret, num_tstamp = 0;
+
+ do {
+ ret = poll(&pfd, 1, timeout_ms);
+ if (ret == -1)
+ error(1, errno, "poll");
+
+ if (ret && (pfd.revents & POLLERR))
+ num_tstamp += do_recv_errqueue_timeout(fdt);
+
+ if (num_tstamp == cfg_num_pkt)
+ break;
+
+ } while (gettime_ns(cfg_clockid) < tdeliver_max);
+}
+
+static void start_time_wait(void)
+{
+ uint64_t now;
+ int err;
+
+ if (!cfg_start_time_ns)
+ return;
+
+ now = gettime_ns(CLOCK_REALTIME);
+ if (cfg_start_time_ns < now)
+ return;
+
+ err = usleep((cfg_start_time_ns - now) / 1000);
+ if (err)
+ error(1, errno, "usleep");
}
static void setsockopt_txtime(int fd)
@@ -245,6 +297,10 @@ static int setup_tx(struct sockaddr *addr, socklen_t alen)
setsockopt_txtime(fd);
+ if (cfg_mark &&
+ setsockopt(fd, SOL_SOCKET, SO_MARK, &cfg_mark, sizeof(cfg_mark)))
+ error(1, errno, "setsockopt mark");
+
return fd;
}
@@ -266,31 +322,70 @@ static int setup_rx(struct sockaddr *addr, socklen_t alen)
return fd;
}
-static void do_test(struct sockaddr *addr, socklen_t alen)
+static void do_test_tx(struct sockaddr *addr, socklen_t alen)
{
- int fdt, fdr, i;
+ int fdt, i;
fprintf(stderr, "\nSO_TXTIME ipv%c clock %s\n",
addr->sa_family == PF_INET ? '4' : '6',
cfg_clockid == CLOCK_TAI ? "tai" : "monotonic");
fdt = setup_tx(addr, alen);
- fdr = setup_rx(addr, alen);
- glob_tstart = gettime_ns();
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
for (i = 0; i < cfg_num_pkt; i++)
- do_send_one(fdt, &cfg_in[i]);
+ do_send_one(fdt, &cfg_buf[i]);
+
+ recv_errqueue_msgs(fdt);
+
+ if (close(fdt))
+ error(1, errno, "close t");
+}
+
+static void do_test_rx(struct sockaddr *addr, socklen_t alen)
+{
+ int fdr, i;
+
+ fdr = setup_rx(addr, alen);
+
+ start_time_wait();
+ glob_tstart = gettime_ns(cfg_clockid);
+
for (i = 0; i < cfg_num_pkt; i++)
- if (do_recv_one(fdr, &cfg_out[i]))
- do_recv_errqueue_timeout(fdt);
+ do_recv_one(fdr, &cfg_buf[i]);
do_recv_verify_empty(fdr);
if (close(fdr))
error(1, errno, "close r");
- if (close(fdt))
- error(1, errno, "close t");
+}
+
+static void setup_sockaddr(int domain, const char *str_addr,
+ struct sockaddr_storage *sockaddr)
+{
+ struct sockaddr_in6 *addr6 = (void *) sockaddr;
+ struct sockaddr_in *addr4 = (void *) sockaddr;
+
+ switch (domain) {
+ case PF_INET:
+ memset(addr4, 0, sizeof(*addr4));
+ addr4->sin_family = AF_INET;
+ addr4->sin_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+ error(1, 0, "ipv4 parse error: %s", str_addr);
+ break;
+ case PF_INET6:
+ memset(addr6, 0, sizeof(*addr6));
+ addr6->sin6_family = AF_INET6;
+ addr6->sin6_port = htons(cfg_port);
+ if (str_addr &&
+ inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+ error(1, 0, "ipv6 parse error: %s", str_addr);
+ break;
+ }
}
static int parse_io(const char *optarg, struct timed_send *array)
@@ -323,17 +418,46 @@ static int parse_io(const char *optarg, struct timed_send *array)
return aoff / 2;
}
+static void usage(const char *progname)
+{
+ fprintf(stderr, "\nUsage: %s [options] <payload>\n"
+ "Options:\n"
+ " -4 only IPv4\n"
+ " -6 only IPv6\n"
+ " -c <clock> monotonic or tai (default)\n"
+ " -D <addr> destination IP address (server)\n"
+ " -S <addr> source IP address (client)\n"
+ " -r run rx mode\n"
+ " -t <nsec> start time (UTC nanoseconds)\n"
+ " -m <mark> socket mark\n"
+ "\n",
+ progname);
+ exit(1);
+}
+
static void parse_opts(int argc, char **argv)
{
- int c, ilen, olen;
+ char *daddr = NULL, *saddr = NULL;
+ int domain = PF_UNSPEC;
+ int c;
- while ((c = getopt(argc, argv, "46c:")) != -1) {
+ while ((c = getopt(argc, argv, "46c:S:D:rt:m:")) != -1) {
switch (c) {
case '4':
- cfg_do_ipv4 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET;
+ cfg_alen = sizeof(struct sockaddr_in);
+ cfg_errq_level = SOL_IP;
+ cfg_errq_type = IP_RECVERR;
break;
case '6':
- cfg_do_ipv6 = true;
+ if (domain != PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ domain = PF_INET6;
+ cfg_alen = sizeof(struct sockaddr_in6);
+ cfg_errq_level = SOL_IPV6;
+ cfg_errq_type = IPV6_RECVERR;
break;
case 'c':
if (!strcmp(optarg, "tai"))
@@ -344,50 +468,50 @@ static void parse_opts(int argc, char **argv)
else
error(1, 0, "unknown clock id %s", optarg);
break;
+ case 'S':
+ saddr = optarg;
+ break;
+ case 'D':
+ daddr = optarg;
+ break;
+ case 'r':
+ cfg_rx = true;
+ break;
+ case 't':
+ cfg_start_time_ns = strtoll(optarg, NULL, 0);
+ break;
+ case 'm':
+ cfg_mark = strtol(optarg, NULL, 0);
+ break;
default:
- error(1, 0, "parse error at %d", optind);
+ usage(argv[0]);
}
}
- if (argc - optind != 2)
- error(1, 0, "Usage: %s [-46] -c <clock> <in> <out>", argv[0]);
+ if (argc - optind != 1)
+ usage(argv[0]);
- ilen = parse_io(argv[optind], cfg_in);
- olen = parse_io(argv[optind + 1], cfg_out);
- if (ilen != olen)
- error(1, 0, "i/o streams len mismatch (%d, %d)\n", ilen, olen);
- cfg_num_pkt = ilen;
+ if (domain == PF_UNSPEC)
+ error(1, 0, "Pass one of -4 or -6");
+ if (!daddr)
+ error(1, 0, "-D <server addr> required\n");
+ if (!cfg_rx && !saddr)
+ error(1, 0, "-S <client addr> required\n");
+
+ setup_sockaddr(domain, daddr, &cfg_dst_addr);
+ setup_sockaddr(domain, saddr, &cfg_src_addr);
+
+ cfg_num_pkt = parse_io(argv[optind], cfg_buf);
}
int main(int argc, char **argv)
{
parse_opts(argc, argv);
- if (cfg_do_ipv6) {
- struct sockaddr_in6 addr6 = {0};
-
- addr6.sin6_family = AF_INET6;
- addr6.sin6_port = htons(cfg_port);
- addr6.sin6_addr = in6addr_loopback;
-
- cfg_errq_level = SOL_IPV6;
- cfg_errq_type = IPV6_RECVERR;
-
- do_test((void *)&addr6, sizeof(addr6));
- }
-
- if (cfg_do_ipv4) {
- struct sockaddr_in addr4 = {0};
-
- addr4.sin_family = AF_INET;
- addr4.sin_port = htons(cfg_port);
- addr4.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
-
- cfg_errq_level = SOL_IP;
- cfg_errq_type = IP_RECVERR;
-
- do_test((void *)&addr4, sizeof(addr4));
- }
+ if (cfg_rx)
+ do_test_rx((void *)&cfg_dst_addr, cfg_alen);
+ else
+ do_test_tx((void *)&cfg_src_addr, cfg_alen);
return 0;
}
diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh
index 3f7800eaecb1..5e861ad32a42 100755
--- a/tools/testing/selftests/net/so_txtime.sh
+++ b/tools/testing/selftests/net/so_txtime.sh
@@ -3,34 +3,108 @@
#
# Regression tests for the SO_TXTIME interface
-# Run in network namespace
-if [[ $# -eq 0 ]]; then
- if ! ./in_netns.sh $0 __subprocess; then
- # test is time sensitive, can be flaky
- echo "test failed: retry once"
- ./in_netns.sh $0 __subprocess
+set -e
+
+readonly ksft_skip=4
+readonly DEV="veth0"
+readonly BIN="./so_txtime"
+
+readonly RAND="$(mktemp -u XXXXXX)"
+readonly NSPREFIX="ns-${RAND}"
+readonly NS1="${NSPREFIX}1"
+readonly NS2="${NSPREFIX}2"
+
+readonly SADDR4='192.168.1.1'
+readonly DADDR4='192.168.1.2'
+readonly SADDR6='fd::1'
+readonly DADDR6='fd::2'
+
+cleanup() {
+ ip netns del "${NS2}"
+ ip netns del "${NS1}"
+}
+
+trap cleanup EXIT
+
+# Create virtual ethernet pair between network namespaces
+ip netns add "${NS1}"
+ip netns add "${NS2}"
+
+ip link add "${DEV}" netns "${NS1}" type veth \
+ peer name "${DEV}" netns "${NS2}"
+
+# Bring the devices up
+ip -netns "${NS1}" link set "${DEV}" up
+ip -netns "${NS2}" link set "${DEV}" up
+
+# Set fixed MAC addresses on the devices
+ip -netns "${NS1}" link set dev "${DEV}" address 02:02:02:02:02:02
+ip -netns "${NS2}" link set dev "${DEV}" address 06:06:06:06:06:06
+
+# Add fixed IP addresses to the devices
+ip -netns "${NS1}" addr add 192.168.1.1/24 dev "${DEV}"
+ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}"
+ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad
+ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad
+
+run_test() {
+ local readonly IP="$1"
+ local readonly CLOCK="$2"
+ local readonly TXARGS="$3"
+ local readonly RXARGS="$4"
+
+ if [[ "${IP}" == "4" ]]; then
+ local readonly SADDR="${SADDR4}"
+ local readonly DADDR="${DADDR4}"
+ elif [[ "${IP}" == "6" ]]; then
+ local readonly SADDR="${SADDR6}"
+ local readonly DADDR="${DADDR6}"
+ else
+ echo "Invalid IP version ${IP}"
+ exit 1
fi
- exit $?
-fi
+ local readonly START="$(date +%s%N --date="+ 0.1 seconds")"
-set -e
+ ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r &
+ ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}"
+ wait "$!"
+}
-tc qdisc add dev lo root fq
-./so_txtime -4 -6 -c mono a,-1 a,-1
-./so_txtime -4 -6 -c mono a,0 a,0
-./so_txtime -4 -6 -c mono a,10 a,10
-./so_txtime -4 -6 -c mono a,10,b,20 a,10,b,20
-./so_txtime -4 -6 -c mono a,20,b,10 b,20,a,20
-
-if tc qdisc replace dev lo root etf clockid CLOCK_TAI delta 400000; then
- ! ./so_txtime -4 -6 -c tai a,-1 a,-1
- ! ./so_txtime -4 -6 -c tai a,0 a,0
- ./so_txtime -4 -6 -c tai a,10 a,10
- ./so_txtime -4 -6 -c tai a,10,b,20 a,10,b,20
- ./so_txtime -4 -6 -c tai a,20,b,10 b,10,a,20
+do_test() {
+ run_test $@
+ [ $? -ne 0 ] && ret=1
+}
+
+do_fail_test() {
+ run_test $@
+ [ $? -eq 0 ] && ret=1
+}
+
+ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq
+set +e
+ret=0
+do_test 4 mono a,-1 a,-1
+do_test 6 mono a,0 a,0
+do_test 6 mono a,10 a,10
+do_test 4 mono a,10,b,20 a,10,b,20
+do_test 6 mono a,20,b,10 b,20,a,20
+
+if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then
+ do_fail_test 4 tai a,-1 a,-1
+ do_fail_test 6 tai a,0 a,0
+ do_test 6 tai a,10 a,10
+ do_test 4 tai a,10,b,20 a,10,b,20
+ do_test 6 tai a,20,b,10 b,10,a,20
else
echo "tc ($(tc -V)) does not support qdisc etf. skipping"
+ [ $ret -eq 0 ] && ret=$ksft_skip
fi
-echo OK. All tests passed
+if [ $ret -eq 0 ]; then
+ echo OK. All tests passed
+elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then
+ echo "Ignoring errors due to slow environment" 1>&2
+ ret=0
+fi
+exit $ret
diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c
index afca1ead677f..db1aeb8c5d1e 100644
--- a/tools/testing/selftests/net/socket.c
+++ b/tools/testing/selftests/net/socket.c
@@ -7,6 +7,8 @@
#include <sys/socket.h>
#include <netinet/in.h>
+#include "../kselftest.h"
+
struct socket_testcase {
int domain;
int type;
@@ -31,7 +33,6 @@ static struct socket_testcase tests[] = {
{ AF_INET, SOCK_STREAM, IPPROTO_UDP, -EPROTONOSUPPORT, 1 },
};
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
#define ERR_STRING_SZ 64
static int run_tests(void)
diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
new file mode 100755
index 000000000000..02d617040793
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh
@@ -0,0 +1,573 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT46 Behavior used for
+# implementing IPv4/IPv6 L3 VPN use cases.
+#
+# The current SRv6 code in the Linux kernel only implements SRv6 End.DT4 and
+# End.DT6 Behaviors which can be used respectively to support IPv4-in-IPv6 and
+# IPv6-in-IPv6 VPNs. With End.DT4 and End.DT6 it is not possible to create a
+# single SRv6 VPN tunnel to carry both IPv4 and IPv6 traffic.
+# The SRv6 End.DT46 Behavior implementation is meant to support the
+# decapsulation of IPv4 and IPv6 traffic coming from a single SRv6 tunnel.
+# Therefore, the SRv6 End.DT46 Behavior in the Linux kernel greatly simplifies
+# the setup and operations of SRv6 VPNs.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4/IPv6 L3 VPN services allowing hosts to communicate with
+# each other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the
+# IPv4/IPv6 L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected
+# using the IPv4/IPv6 L3 VPN of tenant 200. Cross connection between tenant 100
+# and tenant 200 is forbidden and thus, for example, hs-t100-1 cannot reach
+# hs-t200-3 and vice versa.
+#
+# Routers rt-1 and rt-2 implement IPv4/IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT46 Behavior and c) VRF.
+#
+# To explain how an IPv4/IPv6 L3 VPN based on SRv6 works, let us briefly
+# consider an example where, within the same domain of tenant 100, the host
+# hs-t100-1 pings the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a arp/ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 or IPv4 packet destined to hs-t100-2,
+# the router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 or IPv4 packet in a IPv6 plus the
+# Segment Routing Header (SRH) packet. This packet is sent through the (IPv6)
+# core network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT46 Behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 or IPv4 packet. Afterwards, the packet is sent to the
+# host hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the roles of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4/IPv6 L3 VPN for tenant 200 works exactly as the IPv4/IPv6
+# L3 VPN for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are
+# able to connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:21:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:21:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +--------------------------------------------------+
+# |SID |Action |
+# +--------------------------------------------------+
+# |fc00:12:100::6046|apply SRv6 End.DT46 vrftable 100|
+# +--------------------------------------------------+
+# |fc00:12:200::6046|apply SRv6 End.DT46 vrftable 200|
+# +--------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t100 |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth-t200 |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6046|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth-t200 |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local id=$1
+ eval local nsname=\${rt_${id}}
+
+ ip link set veth-rt-${id} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hid=$1
+ local rid=$2
+ local tid=$3
+ eval local hsname=\${hs_t${tid}_${hid}}
+ eval local rtname=\${rt_${rid}}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ eval local rtsrc_name=\${rt_${rtsrc}}
+ eval local rtdst_name=\${rt_${rtdst}}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT46 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT46 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns ${rtsrc_name} -6 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+
+ ip -netns ${rtsrc_name} -4 route add unreachable default metric 4278198272 \
+ vrf vrf-${tid}
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_ns rt_1 rt_2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_ns hs_t100_1 hs_t100_2
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_ns hs_t200_3 hs_t200_4
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4/IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+ eval local nsname=\${rt_${rtsrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+ eval local nsname=\${hs_t${tid}_${hssrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+ eval local nsname=\${hs_t${tid}_${hssrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_ipv6_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv6 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+ check_hs_ipv4_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "IPv4 Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_ipv6_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv6 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+ check_hs_ipv4_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "IPv4 Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4/IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
new file mode 100755
index 000000000000..79fb81e63c59
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh
@@ -0,0 +1,496 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+
+# This test is designed for evaluating the new SRv6 End.DT4 behavior used for
+# implementing IPv4 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv4 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv4
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv4 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv4 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT4 behavior and c) VRF.
+#
+# To explain how an IPv4 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as an arp proxy.
+#
+# When the host hs-t100-1 sends an IPv4 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv4 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT4 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv4 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv4 L3 VPN for tenant 200 works exactly as the IPv4 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.1/24 | | | | 10.0.0.2/24 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | 10.0.0.254/24 | +----------+ | | +----------+ | 10.0.0.254/24 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | 10.0.0.254/24 | | | | 10.0.0.254/24 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | 10.0.0.3/24 | | | | 10.0.0.4/24 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:21:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.2 |apply seg6 encap segs fc00:12:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.4 |apply seg6 encap segs fc00:12:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:12:100::6004|apply SRv6 End.DT4 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6004|apply SRv6 End.DT4 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.1 |apply seg6 encap segs fc00:21:100::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |10.0.0.3 |apply seg6 encap segs fc00:21:200::6004|
+# +---------------------------------------------------+
+# |10.0.0.0/24|forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local id=$1
+ eval local nsname=\${rt_${id}}
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip link set veth-rt-${id} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv4.ip_forward=1
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hid=$1
+ local rid=$2
+ local tid=$3
+ eval local hsname=\${hs_t${tid}_${hid}}
+ eval local rtname=\${rt_${rid}}
+ local rtveth=veth-t${tid}
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.default.rp_filter=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv4_HS_NETWORK}.254/24 dev ${rtveth}
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv4.conf.${rtveth}.proxy_arp=1
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ eval local rtsrc_name=\${rt_${rtsrc}}
+ eval local rtdst_name=\${rt_${rtdst}}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -4 route add ${IPv4_HS_NETWORK}.${hsdst}/32 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT4 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT4 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_ns rt_1 rt_2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_ns hs_t100_1 hs_t100_2
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_ns hs_t200_3 hs_t200_4
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv4 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv4 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+ eval local nsname=\${rt_${rtsrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+ eval local nsname=\${hs_t${tid}_${hssrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv4 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
new file mode 100755
index 000000000000..e408406d8489
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh
@@ -0,0 +1,501 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@cnit.it>
+
+# This test is designed for evaluating the new SRv6 End.DT6 behavior used for
+# implementing IPv6 L3 VPN use cases.
+#
+# Hereafter a network diagram is shown, where two different tenants (named 100
+# and 200) offer IPv6 L3 VPN services allowing hosts to communicate with each
+# other across an IPv6 network.
+#
+# Only hosts belonging to the same tenant (and to the same VPN) can communicate
+# with each other. Instead, the communication among hosts of different tenants
+# is forbidden.
+# In other words, hosts hs-t100-1 and hs-t100-2 are connected through the IPv6
+# L3 VPN of tenant 100 while hs-t200-3 and hs-t200-4 are connected using the
+# IPv6 L3 VPN of tenant 200. Cross connection between tenant 100 and tenant 200
+# is forbidden and thus, for example, hs-t100-1 cannot reach hs-t200-3 and vice
+# versa.
+#
+# Routers rt-1 and rt-2 implement IPv6 L3 VPN services leveraging the SRv6
+# architecture. The key components for such VPNs are: a) SRv6 Encap behavior,
+# b) SRv6 End.DT6 behavior and c) VRF.
+#
+# To explain how an IPv6 L3 VPN based on SRv6 works, let us briefly consider an
+# example where, within the same domain of tenant 100, the host hs-t100-1 pings
+# the host hs-t100-2.
+#
+# First of all, L2 reachability of the host hs-t100-2 is taken into account by
+# the router rt-1 which acts as a ndp proxy.
+#
+# When the host hs-t100-1 sends an IPv6 packet destined to hs-t100-2, the
+# router rt-1 receives the packet on the internal veth-t100 interface. Such
+# interface is enslaved to the VRF vrf-100 whose associated table contains the
+# SRv6 Encap route for encapsulating any IPv6 packet in a IPv6 plus the Segment
+# Routing Header (SRH) packet. This packet is sent through the (IPv6) core
+# network up to the router rt-2 that receives it on veth0 interface.
+#
+# The rt-2 router uses the 'localsid' routing table to process incoming
+# IPv6+SRH packets which belong to the VPN of the tenant 100. For each of these
+# packets, the SRv6 End.DT6 behavior removes the outer IPv6+SRH headers and
+# performs the lookup on the vrf-100 table using the destination address of
+# the decapsulated IPv6 packet. Afterwards, the packet is sent to the host
+# hs-t100-2 through the veth-t100 interface.
+#
+# The ping response follows the same processing but this time the role of rt-1
+# and rt-2 are swapped.
+#
+# Of course, the IPv6 L3 VPN for tenant 200 works exactly as the IPv6 L3 VPN
+# for tenant 100. In this case, only hosts hs-t200-3 and hs-t200-4 are able to
+# connect with each other.
+#
+#
+# +-------------------+ +-------------------+
+# | | | |
+# | hs-t100-1 netns | | hs-t100-2 netns |
+# | | | |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::1/64 | | | | cafe::2/64 | |
+# | +-------------+ | | +-------------+ |
+# | . | | . |
+# +-------------------+ +-------------------+
+# . .
+# . .
+# . .
+# +-----------------------------------+ +-----------------------------------+
+# | . | | . |
+# | +---------------+ | | +---------------- |
+# | | veth-t100 | | | | veth-t100 | |
+# | | cafe::254/64 | +----------+ | | +----------+ | cafe::254/64 | |
+# | +-------+-------+ | localsid | | | | localsid | +-------+-------- |
+# | | | table | | | | table | | |
+# | +----+----+ +----------+ | | +----------+ +----+----+ |
+# | | vrf-100 | | | | vrf-100 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | veth0 | | | | veth0 | |
+# | | fd00::1/64 |.|...|.| fd00::2/64 | |
+# | +---------+ +------------+ | | +------------+ +---------+ |
+# | | vrf-200 | | | | vrf-200 | |
+# | +----+----+ | | +----+----+ |
+# | | | | | |
+# | +-------+-------+ | | +-------+-------- |
+# | | veth-t200 | | | | veth-t200 | |
+# | | cafe::254/64 | | | | cafe::254/64 | |
+# | +---------------+ rt-1 netns | | rt-2 netns +---------------- |
+# | . | | . |
+# +-----------------------------------+ +-----------------------------------+
+# . .
+# . .
+# . .
+# . .
+# +-------------------+ +-------------------+
+# | . | | . |
+# | +-------------+ | | +-------------+ |
+# | | veth0 | | | | veth0 | |
+# | | cafe::3/64 | | | | cafe::4/64 | |
+# | +-------------+ | | +-------------+ |
+# | | | |
+# | hs-t200-3 netns | | hs-t200-4 netns |
+# | | | |
+# +-------------------+ +-------------------+
+#
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+# | Network configuration |
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+# rt-1: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:21:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:21:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-1: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::2 |apply seg6 encap segs fc00:12:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-1: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::4 |apply seg6 encap segs fc00:12:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+#
+# rt-2: localsid table (table 90)
+# +-------------------------------------------------+
+# |SID |Action |
+# +-------------------------------------------------+
+# |fc00:12:100::6006|apply SRv6 End.DT6 vrftable 100|
+# +-------------------------------------------------+
+# |fc00:12:200::6006|apply SRv6 End.DT6 vrftable 200|
+# +-------------------------------------------------+
+#
+# rt-2: VRF tenant 100 (table 100)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::1 |apply seg6 encap segs fc00:21:100::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t100 |
+# +---------------------------------------------------+
+#
+# rt-2: VRF tenant 200 (table 200)
+# +---------------------------------------------------+
+# |host |Action |
+# +---------------------------------------------------+
+# |cafe::3 |apply seg6 encap segs fc00:21:200::6006|
+# +---------------------------------------------------+
+# |cafe::/64 |forward to dev veth_t200 |
+# +---------------------------------------------------+
+#
+
+source lib.sh
+
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fd00
+readonly IPv6_HS_NETWORK=cafe
+readonly VPN_LOCATOR_SERVICE=fc00
+PING_TIMEOUT_SEC=4
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+cleanup()
+{
+ ip link del veth-rt-1 2>/dev/null || true
+ ip link del veth-rt-2 2>/dev/null || true
+
+ cleanup_all_ns
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local id=$1
+ eval local nsname=\${rt_${id}}
+
+ ip link set veth-rt-${id} netns ${nsname}
+ ip -netns ${nsname} link set veth-rt-${id} name veth0
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad
+ ip -netns ${nsname} link set veth0 up
+ ip -netns ${nsname} link set lo up
+
+ ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+setup_hs()
+{
+ local hid=$1
+ local rid=$2
+ local tid=$3
+ eval local hsname=\${hs_t${tid}_${hid}}
+ eval local rtname=\${rt_${rid}}
+ local rtveth=veth-t${tid}
+
+ # set the networking for the host
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns ${hsname} link add veth0 type veth peer name ${rtveth}
+ ip -netns ${hsname} link set ${rtveth} netns ${rtname}
+ ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad
+ ip -netns ${hsname} link set veth0 up
+ ip -netns ${hsname} link set lo up
+
+ # configure the VRF for the tenant X on the router which is directly
+ # connected to the source host.
+ ip -netns ${rtname} link add vrf-${tid} type vrf table ${tid}
+ ip -netns ${rtname} link set vrf-${tid} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ # enslave the veth-tX interface to the vrf-X in the access router
+ ip -netns ${rtname} link set ${rtveth} master vrf-${tid}
+ ip -netns ${rtname} addr add ${IPv6_HS_NETWORK}::254/64 dev ${rtveth} nodad
+ ip -netns ${rtname} link set ${rtveth} up
+
+ ip netns exec ${rtname} sysctl -wq net.ipv6.conf.${rtveth}.proxy_ndp=1
+
+ ip netns exec ${rtname} sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup_vpn_config()
+{
+ local hssrc=$1
+ local rtsrc=$2
+ local hsdst=$3
+ local rtdst=$4
+ local tid=$5
+
+ eval local rtsrc_name=\${rt_${rtsrc}}
+ eval local rtdst_name=\${rt_${rtdst}}
+ local rtveth=veth-t${tid}
+ local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006
+
+ ip -netns ${rtsrc_name} -6 neigh add proxy ${IPv6_HS_NETWORK}::${hsdst} dev ${rtveth}
+
+ # set the encap route for encapsulating packets which arrive from the
+ # host hssrc and destined to the access router rtsrc.
+ ip -netns ${rtsrc_name} -6 route add ${IPv6_HS_NETWORK}::${hsdst}/128 vrf vrf-${tid} \
+ encap seg6 mode encap segs ${vpn_sid} dev veth0
+ ip -netns ${rtsrc_name} -6 route add ${vpn_sid}/128 vrf vrf-${tid} \
+ via fd00::${rtdst} dev veth0
+
+ # set the decap route for decapsulating packets which arrive from
+ # the rtdst router and destined to the hsdst host.
+ ip -netns ${rtdst_name} -6 route add ${vpn_sid}/128 table ${LOCALSID_TABLE_ID} \
+ encap seg6local action End.DT6 vrftable ${tid} dev vrf-${tid}
+
+ # all sids for VPNs start with a common locator which is fc00::/16.
+ # Routes for handling the SRv6 End.DT6 behavior instances are grouped
+ # together in the 'localsid' table.
+ #
+ # NOTE: added only once
+ if [ -z "$(ip -netns ${rtdst_name} -6 rule show | \
+ grep "to ${VPN_LOCATOR_SERVICE}::/16 lookup ${LOCALSID_TABLE_ID}")" ]; then
+ ip -netns ${rtdst_name} -6 rule add \
+ to ${VPN_LOCATOR_SERVICE}::/16 \
+ lookup ${LOCALSID_TABLE_ID} prio 999
+ fi
+}
+
+setup()
+{
+ ip link add veth-rt-1 type veth peer name veth-rt-2
+ # setup the networking for router rt-1 and router rt-2
+ setup_ns rt_1 rt_2
+ setup_rt_networking 1
+ setup_rt_networking 2
+
+ # setup two hosts for the tenant 100.
+ # - host hs-1 is directly connected to the router rt-1;
+ # - host hs-2 is directly connected to the router rt-2.
+ setup_ns hs_t100_1 hs_t100_2
+ setup_hs 1 1 100 #args: host router tenant
+ setup_hs 2 2 100
+
+ # setup two hosts for the tenant 200
+ # - host hs-3 is directly connected to the router rt-1;
+ # - host hs-4 is directly connected to the router rt-2.
+ setup_ns hs_t200_3 hs_t200_4
+ setup_hs 3 1 200
+ setup_hs 4 2 200
+
+ # setup the IPv6 L3 VPN which connects the host hs-t100-1 and host
+ # hs-t100-2 within the same tenant 100.
+ setup_vpn_config 1 1 2 2 100 #args: src_host src_router dst_host dst_router tenant
+ setup_vpn_config 2 2 1 1 100
+
+ # setup the IPv6 L3 VPN which connects the host hs-t200-3 and host
+ # hs-t200-4 within the same tenant 200.
+ setup_vpn_config 3 1 4 2 200
+ setup_vpn_config 4 2 3 1 200
+}
+
+check_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+ eval local nsname=\${rt_${rtsrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \
+ >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc=$1
+ local rtdst=$2
+
+ check_rt_connectivity ${rtsrc} ${rtdst}
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+ eval local nsname=\${hs_t${tid}_${hssrc}}
+
+ ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \
+ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc=$1
+ local hsdst=$2
+ local tid=$3
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> hs-t${tid}-${hsdst} (tenant ${tid})"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc=$1
+ local tidsrc=$2
+ local hsdst=$3
+ local tiddst=$4
+
+ check_hs_connectivity ${hssrc} ${hsdst} ${tidsrc}
+ # NOTE: ping should fail
+ log_test $? 1 "Hosts isolation: hs-t${tidsrc}-${hssrc} -X-> hs-t${tiddst}-${hsdst}"
+}
+
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc=$1
+ local tid=$2
+
+ check_hs_connectivity ${hssrc} 254 ${tid}
+ log_test $? 0 "Hosts connectivity: hs-t${tid}-${hssrc} -> gw (tenant ${tid})"
+}
+
+router_tests()
+{
+ log_section "IPv6 routers connectivity test"
+
+ check_and_log_rt_connectivity 1 2
+ check_and_log_rt_connectivity 2 1
+}
+
+host2gateway_tests()
+{
+ log_section "IPv6 connectivity test among hosts and gateway"
+
+ check_and_log_hs2gw_connectivity 1 100
+ check_and_log_hs2gw_connectivity 2 100
+
+ check_and_log_hs2gw_connectivity 3 200
+ check_and_log_hs2gw_connectivity 4 200
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test among hosts in the same tenant"
+
+ check_and_log_hs_connectivity 1 2 100
+ check_and_log_hs_connectivity 2 1 100
+
+ check_and_log_hs_connectivity 3 4 200
+ check_and_log_hs_connectivity 4 3 200
+}
+
+host_vpn_isolation_tests()
+{
+ local i
+ local j
+ local k
+ local tmp
+ local l1="1 2"
+ local l2="3 4"
+ local t1=100
+ local t2=200
+
+ log_section "SRv6 VPN isolation test among hosts in different tentants"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation ${i} ${t1} ${j} ${t2}
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ tmp=${t1}; t1=${t2}; t2=${tmp}
+ done
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit $ksft_skip
+fi
+
+cleanup &>/dev/null
+
+setup
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
+
+cleanup &>/dev/null
+
+exit ${ret}
diff --git a/tools/testing/selftests/net/srv6_end_flavors_test.sh b/tools/testing/selftests/net/srv6_end_flavors_test.sh
new file mode 100755
index 000000000000..50563443a4ad
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_flavors_test.sh
@@ -0,0 +1,869 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+#
+# This script is designed to test the support for "flavors" in the SRv6 End
+# behavior.
+#
+# Flavors defined in RFC8986 [1] represent additional operations that can modify
+# or extend the existing SRv6 End, End.X and End.T behaviors. For the sake of
+# convenience, we report the list of flavors described in [1] hereafter:
+# - Penultimate Segment Pop (PSP);
+# - Ultimate Segment Pop (USP);
+# - Ultimate Segment Decapsulation (USD).
+#
+# The End, End.X, and End.T behaviors can support these flavors either
+# individually or in combinations.
+# Currently in this selftest we consider only the PSP flavor for the SRv6 End
+# behavior. However, it is possible to extend the script as soon as other
+# flavors will be supported in the kernel.
+#
+# The purpose of the PSP flavor consists in instructing the penultimate node
+# listed in the SRv6 policy to remove (i.e. pop) the outermost SRH from the IPv6
+# header.
+# A PSP enabled SRv6 End behavior instance processes the SRH by:
+# - decrementing the Segment Left (SL) value from 1 to 0;
+# - copying the last SID from the SID List into the IPv6 Destination Address
+# (DA);
+# - removing the SRH from the extension headers following the IPv6 header.
+#
+# Once the SRH is removed, the IPv6 packet is forwarded to the destination using
+# the IPv6 DA updated during the PSP operation (i.e. the IPv6 DA corresponding
+# to the last SID carried by the removed SRH).
+#
+# Although the PSP flavor can be set for any SRv6 End behavior instance on any
+# SR node, it will be active only on such behaviors bound to a penultimate SID
+# for a given SRv6 policy.
+# SL=2 SL=1 SL=0
+# | | |
+# For example, given the SRv6 policy (SID List := <X, Y, Z>):
+# - a PSP enabled SRv6 End behavior bound to SID Y will apply the PSP operation
+# as Segment Left (SL) is 1, corresponding to the Penultimate Segment of the
+# SID List;
+# - a PSP enabled SRv6 End behavior bound to SID X will *NOT* apply the PSP
+# operation as the Segment Left is 2. This behavior instance will apply the
+# "standard" End packet processing, ignoring the configured PSP flavor at
+# all.
+#
+# [1] RFC8986: https://datatracker.ietf.org/doc/html/rfc8986
+#
+# Network topology
+# ================
+#
+# The network topology used in this selftest is depicted hereafter, composed by
+# two hosts (hs-1, hs-2) and four routers (rt-1, rt-2, rt-3, rt-4).
+# Hosts hs-1 and hs-2 are connected to routers rt-1 and rt-2, respectively,
+# allowing them to communicate with each other.
+# Traffic exchanged between hs-1 and hs-2 can follow different network paths.
+# The network operator, through specific SRv6 Policies can steer traffic to one
+# path rather than another. In this selftest this is implemented as follows:
+#
+# i) The SRv6 H.Insert behavior applies SRv6 Policies on traffic received by
+# connected hosts. It pushes the Segment Routing Header (SRH) after the
+# IPv6 header. The SRH contains the SID List (i.e. SRv6 Policy) needed for
+# steering traffic across the segments/waypoints specified in that list;
+#
+# ii) The SRv6 End behavior advances the active SID in the SID List carried by
+# the SRH;
+#
+# iii) The PSP enabled SRv6 End behavior is used to remove the SRH when such
+# behavior is configured on a node bound to the Penultimate Segment carried
+# by the SID List.
+#
+# cafe::1 cafe::2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +--- +---+
+# cafe::/64 | | cafe::/64
+# | |
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the IPv6 operator network.
+#
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+# Local SID table for SRv6 router rt-x
+# +---------------------------------------------------------------------+
+# |fcff:x::e is associated with the SRv6 End behavior |
+# |fcff:x::ef1 is associated with the SRv6 End behavior with PSP flavor |
+# +---------------------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for the SIDs. Reachability of
+# SIDs is ensured by proper configuration of the IPv6 operator's network and
+# SRv6 routers.
+#
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from connected hosts on the basis of the destination addresses.
+# In case of SRv6 H.Insert behavior, the SRv6 Policy enforcement consists of
+# pushing the SRH (carrying a given SID List) after the existing IPv6 header.
+# Note that in the inserting mode, there is no encapsulation at all.
+#
+# Before applying an SRv6 Policy using the SRv6 H.Insert behavior
+# +------+---------+
+# | IPv6 | Payload |
+# +------+---------+
+#
+# After applying an SRv6 Policy using the SRv6 H.Insert behavior
+# +------+-----+---------+
+# | IPv6 | SRH | Payload |
+# +------+-----+---------+
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policy:
+#
+# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::ef1,fcff:2::ef1,cafe::2
+#
+# Router rt-1 is configured to enforce the Policy (i.a) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-1 across rt-3, rt-4, rt-2 and finally to the
+# destination hs-2.
+#
+# As the packet reaches the router rt-3, the SRv6 End behavior bound to SID
+# fcff:3::e is triggered. The behavior updates the Segment Left (from SL=3 to
+# SL=2) in the SRH, the IPv6 DA with fcff:4::ef1 and forwards the packet to the
+# next router on the path, i.e. rt-4.
+#
+# When router rt-4 receives the packet, the PSP enabled SRv6 End behavior bound
+# to SID fcff:4::ef1 is executed. Since the SL=2, the PSP operation is *NOT*
+# kicked in and the behavior applies the default End processing: the Segment
+# Left is decreased (from SL=2 to SL=1), the IPv6 DA is updated with the SID
+# fcff:2::ef1 and the packet is forwarded to router rt-2.
+#
+# The PSP enabled SRv6 End behavior on rt-2 is associated with SID fcff:2::ef1
+# and is executed as the packet is received. Because SL=1, the behavior applies
+# the PSP processing on the packet as follows: i) SL is decreased, i.e. from
+# SL=1 to SL=0; ii) last SID (cafe::2) is copied into the IPv6 DA; iii) the
+# outermost SRH is removed from the extension headers following the IPv6 header.
+# Once the PSP processing is completed, the packet is forwarded to the host hs-2
+# (destination).
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed to hs-1 are handled by rt-2 which
+# applies the following SRv6 Policy:
+#
+# i.b) IPv6 traffic, SID List=fcff:1::ef1,cafe::1
+#
+# Router rt-2 is configured to enforce the Policy (i.b) through the SRv6
+# H.Insert behavior which pushes the SRH after the existing IPv6 header. This
+# Policy steers the traffic from hs-2 across rt-1 and finally to the
+# destination hs-1
+#
+#
+# When the router rt-1 receives the packet, the PSP enabled SRv6 End behavior
+# associated with the SID fcff:1::ef1 is triggered. Since the SL=1,
+# the PSP operation takes place: i) the SL is decremented; ii) the IPv6 DA is
+# set with the last SID; iii) the SRH is removed from the extension headers
+# after the IPv6 header. At this point, the packet with IPv6 DA=cafe::1 is sent
+# to the destination, i.e. hs-1.
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth1"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv6_TESTS_ADDR=2001:db8::1
+readonly LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly END_PSP_FUNC=0ef1
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <id> token which represents the ID of the router.
+# i.e. input: "12:psp"
+# output: "12"
+__get_srv6_rtcfg_id()
+{
+ local element="$1"
+
+ echo "${element}" | cut -d':' -f1
+}
+
+# Given the description of a router <id:op> as an input, the function returns
+# the <op> token which represents the operation (e.g. End behavior with or
+# withouth flavors) configured for the node.
+
+# Note that when the operation represents an End behavior with a list of
+# flavors, the output is the ordered version of that list.
+# i.e. input: "5:usp,psp,usd"
+# output: "psp,usd,usp"
+__get_srv6_rtcfg_op()
+{
+ local element="$1"
+
+ # return the lexicographically ordered flavors
+ echo "${element}" | cut -d':' -f2 | sed 's/,/\n/g' | sort | \
+ xargs | sed 's/ /,/g'
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link set lo up
+
+ ip -netns "${nsname}" link add ${DUMMY_DEVNAME} type dummy
+ ip -netns "${nsname}" link set ${DUMMY_DEVNAME} up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ # Local End behavior (note that "dev" is a dummy interface chosen for
+ # the sake of simplicity).
+ ip -netns "${nsname}" -6 route \
+ add "${LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+
+ # all SIDs start with a common locator. Routes and SRv6 Endpoint
+ # behavior instaces are grouped together in the 'localsid' table.
+ ip -netns "${nsname}" -6 rule \
+ add to "${LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+
+ # set default routes to unreachable
+ ip -netns "${nsname}" -6 route \
+ add unreachable default metric 4278198272 \
+ dev "${DUMMY_DEVNAME}"
+}
+
+# This helper function builds and installs the SID List (i.e. SRv6 Policy)
+# to be applied on incoming packets at the ingress node. Moreover, it
+# configures the SRv6 nodes specified in the SID List to process the traffic
+# according to the operations required by the Policy itself.
+# args:
+# $1 - destination host (i.e. cafe::x host)
+# $2 - SRv6 router configured for enforcing the SRv6 Policy
+# $3 - compact way to represent a list of SRv6 routers with their operations
+# (i.e. behaviors) that each of them needs to perform. Every <nodeid:op>
+# element constructs a SID that is associated with the behavior <op> on
+# the <nodeid> node. The list of such elements forms an SRv6 Policy.
+__setup_rt_policy()
+{
+ local dst="$1"
+ local encap_rt="$2"
+ local policy_rts="$3"
+ local behavior_cfg
+ local in_nsname
+ local rt_nsname
+ local policy=''
+ local function
+ local fullsid
+ local op_type
+ local node
+ local n
+
+ in_nsname="$(get_rtname "${encap_rt}")"
+
+ for n in ${policy_rts}; do
+ node="$(__get_srv6_rtcfg_id "${n}")"
+ op_type="$(__get_srv6_rtcfg_op "${n}")"
+ rt_nsname="$(get_rtname "${node}")"
+
+ case "${op_type}" in
+ "noflv")
+ policy="${policy}${LOCATOR_SERVICE}:${node}::${END_FUNC},"
+ function="${END_FUNC}"
+ behavior_cfg="End"
+ ;;
+
+ "psp")
+ policy="${policy}${LOCATOR_SERVICE}:${node}::${END_PSP_FUNC},"
+ function="${END_PSP_FUNC}"
+ behavior_cfg="End flavors psp"
+ ;;
+
+ *)
+ break
+ ;;
+ esac
+
+ fullsid="${LOCATOR_SERVICE}:${node}::${function}"
+
+ # add SRv6 Endpoint behavior to the selected router
+ if ! ip -netns "${rt_nsname}" -6 route get "${fullsid}" \
+ &>/dev/null; then
+ ip -netns "${rt_nsname}" -6 route \
+ add "${fullsid}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action ${behavior_cfg} \
+ dev "${DUMMY_DEVNAME}"
+ fi
+ done
+
+ # we need to remove the trailing comma to avoid inserting an empty
+ # address (::0) in the SID List.
+ policy="${policy%,}"
+
+ # add SRv6 policy to incoming traffic sent by connected hosts
+ ip -netns "${in_nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" \
+ encap seg6 mode inline segs "${policy}" \
+ dev "${DUMMY_DEVNAME}"
+
+ ip -netns "${in_nsname}" -6 neigh \
+ add proxy "${IPv6_HS_NETWORK}::${dst}" \
+ dev "${RT2HS_DEVNAME}"
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+ __setup_rt_policy "$1" "$2" "$3"
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add veth0 type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr \
+ add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+
+ ip -netns "${hsname}" link set veth0 up
+ ip -netns "${hsname}" link set lo up
+
+ ip -netns "${rtname}" addr \
+ add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End behavior)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # set up SRv6 policies
+ # create a connection between hosts hs-1 and hs-2.
+ # The path between hs-1 and hs-2 traverses SRv6 aware routers.
+ # For each direction two path are chosen:
+ #
+ # Direction hs-1 -> hs-2 (PSP flavor)
+ # - rt-1 (SRv6 H.Insert policy)
+ # - rt-3 (SRv6 End behavior)
+ # - rt-4 (SRv6 End flavor PSP with SL>1, acting as End behavior)
+ # - rt-2 (SRv6 End flavor PSP with SL=1)
+ #
+ # Direction hs-2 -> hs-1 (PSP flavor)
+ # - rt-2 (SRv6 H.Insert policy)
+ # - rt-1 (SRv6 End flavor PSP with SL=1)
+ setup_rt_policy_ipv6 2 1 "3:noflv 4:psp 2:psp"
+ setup_rt_policy_ipv6 1 2 "1:psp"
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_srv6_end_flv_psp_tests()
+{
+ log_section "SRv6 connectivity test hosts (h1 <-> h2, PSP flavor)"
+
+ check_and_log_hs_connectivity 1 2
+ check_and_log_hs_connectivity 2 1
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ local flavor="$1"
+
+ if ! ip route help 2>&1 | grep -qo "${flavor}"; then
+ echo "SKIP: Missing SRv6 ${flavor} flavor support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+test_kernel_supp_or_ksft_skip()
+{
+ local flavor="$1"
+ local test_netns
+
+ test_netns="kflv-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns to test kernel support for flavors"
+ exit "${ksft_skip}"
+ fi
+
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: Cannot set up dummy dev to test kernel support for flavors"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ if ! ip -netns "${test_netns}" link \
+ set "${DUMMY_DEVNAME}" up; then
+ echo "SKIP: Cannot activate dummy dev to test kernel support for flavors"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ if ! ip -netns "${test_netns}" -6 route \
+ add "${IPv6_TESTS_ADDR}" encap seg6local \
+ action End flavors "${flavor}" dev "${DUMMY_DEVNAME}"; then
+ echo "SKIP: ${flavor} flavor not supported in kernel"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+ local test_netns
+
+ test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns for testing dummy dev support"
+ exit "${ksft_skip}"
+ fi
+
+ modprobe dummy &>/dev/null || true
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: dummy dev not supported"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+test_command_or_ksft_skip sed
+test_command_or_ksft_skip sort
+test_command_or_ksft_skip xargs
+
+test_dummy_dev_or_ksft_skip
+test_iproute2_supp_or_ksft_skip psp
+test_kernel_supp_or_ksft_skip psp
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_srv6_end_flv_psp_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
new file mode 100755
index 000000000000..87e414cc417c
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_next_csid_l3vpn_test.sh
@@ -0,0 +1,1145 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the support of NEXT-C-SID flavor for SRv6
+# End behavior.
+# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach
+# [2] is assumed for the reader.
+#
+# The network topology used in the selftest is depicted hereafter, composed by
+# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an
+# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using
+# the NEXT-C-SID flavor. The key components for such VPNs are:
+#
+# i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on
+# traffic received by connected hosts, initiating the VPN tunnel;
+#
+# ii) The SRv6 End behavior [1] advances the active SID in the SID List
+# carried by the SRH;
+#
+# iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several
+# SRv6 segments within a single 128-bit SID address, referred to as a
+# Compressed SID (C-SID) container. In this way, the length of the SID
+# List can be drastically reduced.
+# The NEXT-C-SID is provided as a "flavor" of the SRv6 End behavior
+# which advances the current C-SID (i.e. the Locator-Node Function defined
+# in [2]) with the next one carried in the Argument, if available.
+# When no more C-SIDs are available in the Argument, the SRv6 End behavior
+# will apply the End function selecting the next SID in the SID List.
+#
+# iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and,
+# thus, it terminates the VPN tunnel. Such a behavior is capable of
+# handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+# [1] https://datatracker.ietf.org/doc/html/rfc8986
+# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
+#
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +----+---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the selftest network.
+#
+# Local SID/C-SID table
+# =====================
+#
+# Each SRv6 router is configured with a Local SID/C-SID table in which
+# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are
+# configured in the Local SID/C-SIDs table as follows:
+#
+# Local SID/C-SID table for SRv6 router rt-x
+# +-----------------------------------------------------------+
+# |fcff:x::d46 is associated with the non-compressed SRv6 |
+# | End.DT46 behavior |
+# +-----------------------------------------------------------+
+# |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor |
+# | of SRv6 End behavior |
+# +-----------------------------------------------------------+
+# |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46 |
+# | behavior when NEXT-C-SID compression is turned on |
+# +-----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved for implementing SRv6 services with regular
+# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration
+# of the IPv6 routing tables in the routers.
+# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN
+# services leveraging the NEXT-C-SID compression mechanism. Indeed, the
+# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node
+# Function is encoded with 16 bits.
+#
+# Incoming traffic classification and application of SRv6 Policies
+# ================================================================
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from a connected host, considering the IPv4 or IPv6 destination address.
+# SRv6 policy enforcement consists of encapsulating the received traffic into a
+# new IPv6 packet with a given SID List contained in the SRH.
+# When the SID List contains only one SID, the SRH could be omitted completely
+# and that SID is stored directly in the IPv6 Destination Address (DA) (this is
+# called "reduced" encapsulation).
+#
+# Test cases for NEXT-C-SID
+# =========================
+#
+# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID.
+#
+# In the single SID test case we have a number of segments that are all
+# contained in a single Compressed SID (C-SID) container. Therefore the
+# resulting SID List has only one SID. Using the reduced encapsulation format
+# this will result in a packet with no SRH.
+#
+# In the double SID test case we have one segment carried in a Compressed SID
+# (C-SID) container, followed by a regular (non compressed) SID. The resulting
+# SID List has two segments and it is possible to test the advance to the next
+# SID when all the C-SIDs in a C-SID container have been processed. Using the
+# reduced encapsulation format this will result in a packet with an SRH
+# containing 1 segment.
+#
+# For the single SID test case, we use the IPv4 addresses of hs-1 and hs-2, for
+# the double SID test case, we use their IPv6 addresses. This is only done to
+# simplify the test setup and avoid adding other hosts or multiple addresses on
+# the same interface of a host.
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1
+# which applies the SRv6 Policies as follows:
+#
+# i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0400:0300:0200:d46::
+# ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46
+#
+# ### i) single SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it
+# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole
+# C-SID container carrying several C-SIDs (e.g. 0400, 0300, etc).
+#
+# As the packet reaches the router rt-4, the enabled NEXT-C-SID SRv6 End
+# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is
+# updated as follows:
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-4 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvvvvvvvvvvvvvv |
+# | IPv6 DA fcbb:0:0400:0300:0200:d46:: |
+# | ^^^^ <-- shifting |
+# | | |
+# | Locator-Node Function |
+# +---------------------------------------------------------------+
+# | After applying the rt-4 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvvvvvvvvvv |
+# | IPv6 DA fcbb:0:0300:0200:d46:: |
+# | ^^^^ |
+# | | |
+# | Locator-Node Function |
+# +---------------------------------------------------------------+
+#
+# After having applied the enabled NEXT-C-SID SRv6 End behavior, the packet is
+# sent to the next node, i.e. rt-3.
+#
+# The enabled NEXT-C-SID SRv6 End behavior on rt-3 is executed as the packet is
+# received. This behavior processes the packet and updates the IPv6 DA with
+# fcbb:0:0200:d46::, since the Argument is *NOT* zero. Then, the packet is sent
+# to the router rt-2.
+#
+# The router rt-2 is configured for decapsulating the inner IPv6 packet and,
+# for this reason, it applies the SRv6 End.DT46 behavior on the received
+# packet. It is worth noting that the SRv6 End.DT46 behavior does not require
+# the presence of the SRH: it is fully capable to operate properly on
+# IPv4/IPv6-in-IPv6 encapsulations.
+# At the end of the decap operation, the packet is sent to the
+# host hs-2.
+#
+# ### ii) double SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID
+# SRv6 End behavior.
+#
+# +---------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvv (Argument is all filled with zeros) |
+# | IPv6 DA fcbb:0:0300:: |
+# | ^^^^ |
+# | | |
+# | Locator-Node Function |
+# +---------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End behavior |
+# +---------------------------------------------------------------+
+# | |
+# | IPv6 DA fcff:2::d46 |
+# | ^^^^^^^^^^^ |
+# | | |
+# | SID copied from the SID List contained in the SRH |
+# +---------------------------------------------------------------+
+#
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End behavior operates as the
+# traditional End behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. After that, the packet is
+# sent to the node rt-2.
+#
+# Once the packet is received by rt-2, the router decapsulates the inner IPv6
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46)
+# and sends it to the host hs-2.
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2
+# which applies the SRv6 Policies as follows:
+#
+# i) IPv6 DA=cafe::1, SID List=fcbb:0:0300:0400:0100:d46::
+# ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46
+#
+# For simplicity, such SRv6 Policies were chosen so that, in both use cases (i)
+# and (ii), the network paths crossed by traffic from hs-2 to hs-1 are the same
+# as those taken by traffic from hs-1 to hs-2.
+# In this way, traffic from hs-2 to hs-1 is processed similarly to traffic from
+# hs-1 to hs-2. So, the traffic processing scheme turns out to be the same as
+# that adopted in the use cases already examined (of course, it is necessary to
+# consider the different SIDs/C-SIDs).
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly DT46_FUNC=0d46
+readonly HEADEND_ENCAP="encap.red"
+
+# do not add ':' as separator
+readonly LCBLOCK_ADDR=fcbb0000
+readonly LCBLOCK_BLEN=32
+# do not add ':' as separator
+readonly LCNODEFUNC_FMT="0%d00"
+readonly LCNODEFUNC_BLEN=16
+
+readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN))
+
+readonly CSID_CNTR_PREFIX="dead:beaf::/32"
+# ID of the router used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_ID_TEST=1
+# Routing table used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_TABLE=91
+
+# C-SID container configurations to be tested
+#
+# An entry of the array is defined as "a,b,c" where:
+# - 'a' and 'b' elements represent respectively the Locator-Block length
+# (lblen) in bits and the Locator-Node Function length (nflen) in bits.
+# 'a' and 'b' can be set to default values using the placeholder "d" which
+# indicates the default kernel values (32 for lblen and 16 for nflen);
+# otherwise, any numeric value is accepted;
+# - 'c' indicates whether the C-SID configuration provided by the values 'a'
+# and 'b' should be considered valid ("y") or invalid ("n").
+declare -ra CSID_CONTAINER_CFGS=(
+ "d,d,y"
+ "d,16,y"
+ "16,d,y"
+ "16,32,y"
+ "32,16,y"
+ "48,8,y"
+ "8,48,y"
+ "d,0,n"
+ "0,d,n"
+ "32,0,n"
+ "0,32,n"
+ "17,d,n"
+ "d,17,n"
+ "120,16,n"
+ "16,120,n"
+ "0,128,n"
+ "128,0,n"
+ "130,0,n"
+ "0,130,n"
+ "0,0,n"
+)
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+ ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+ ip -netns "${nsname}" link set lo up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# build an ipv6 prefix/address based on the input string
+# Note that the input string does not contain ':' and '::' which are considered
+# to be implicit.
+# e.g.:
+# - input: fbcc00000400300
+# - output: fbcc:0000:0400:0300:0000:0000:0000:0000
+# ^^^^^^^^^^^^^^^^^^^
+# fill the address with 0s
+build_ipv6_addr()
+{
+ local addr="$1"
+ local out=""
+ local strlen="${#addr}"
+ local padn
+ local i
+
+ # add ":" every 4 digits (16 bits)
+ for (( i = 0; i < strlen; i++ )); do
+ if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+ out="${out}:"
+ fi
+
+ out="${out}${addr:$i:1}"
+ done
+
+ # fill the remaining bits of the address with 0s
+ padn=$((32 - strlen))
+ for (( i = padn; i > 0; i-- )); do
+ if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+ out="${out}:"
+ fi
+
+ out="${out}0"
+ done
+
+ printf "${out}"
+}
+
+build_csid()
+{
+ local nodeid="$1"
+
+ printf "${LCNODEFUNC_FMT}" "${nodeid}"
+}
+
+build_lcnode_func_prefix()
+{
+ local nodeid="$1"
+ local lcnodefunc
+ local prefix
+ local out
+
+ lcnodefunc="$(build_csid "${nodeid}")"
+ prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")"
+
+ out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}"
+
+ echo "${out}"
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+ local lcnode_func_prefix
+ local lcblock_prefix
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+
+ # set the underlay network for C-SIDs reachability
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")"
+
+ ip -netns "${nsname}" -6 route \
+ add "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+
+ # enabled NEXT-C-SID SRv6 End behavior (note that "dev" is the dummy
+ # dum0 device chosen for the sake of simplicity).
+ ip -netns "${nsname}" -6 route \
+ add "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End flavors next-csid \
+ lblen "${LCBLOCK_BLEN}" nflen "${LCNODEFUNC_BLEN}" \
+ dev "${DUMMY_DEVNAME}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behavior instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule \
+ add to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+
+ # common locator block for NEXT-C-SIDS compression mechanism.
+ lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")"
+ ip -netns "${nsname}" -6 rule \
+ add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router as well as the
+# decap SID in the egress one.
+# args:
+# $1 - src host (evaluate automatically the ingress router)
+# $2 - dst host (evaluate automatically the egress router)
+# $3 - SRv6 routers configured for steering traffic (End behaviors)
+# $4 - single SID or double SID
+# $5 - traffic type (IPv6 or IPv4)
+__setup_l3vpn()
+{
+ local src="$1"
+ local dst="$2"
+ local end_rts="$3"
+ local mode="$4"
+ local traffic="$5"
+ local nsname
+ local policy
+ local container
+ local decapsid
+ local lcnfunc
+ local dt
+ local n
+ local rtsrc_nsname
+ local rtdst_nsname
+
+ rtsrc_nsname="$(get_rtname "${src}")"
+ rtdst_nsname="$(get_rtname "${dst}")"
+
+ container="${LCBLOCK_ADDR}"
+
+ # build first SID (C-SID container)
+ for n in ${end_rts}; do
+ lcnfunc="$(build_csid "${n}")"
+
+ container="${container}${lcnfunc}"
+ done
+
+ if [ "${mode}" -eq 1 ]; then
+ # single SID policy
+ dt="$(build_csid "${dst}")${DT46_FUNC}"
+ container="${container}${dt}"
+ # build the full ipv6 address for the container
+ policy="$(build_ipv6_addr "${container}")"
+
+ # build the decap SID used in the decap node
+ container="${LCBLOCK_ADDR}${dt}"
+ decapsid="$(build_ipv6_addr "${container}")"
+ else
+ # double SID policy
+ decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}"
+
+ policy="$(build_ipv6_addr "${container}"),${decapsid}"
+ fi
+
+ # apply encap policy
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${rtsrc_nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+
+ ip -netns "${rtsrc_nsname}" -6 neigh \
+ add proxy "${IPv6_HS_NETWORK}::${dst}" \
+ dev "${RT2HS_DEVNAME}"
+ else
+ # "dev" must be different from the one where the packet is
+ # received, otherwise the proxy arp does not work.
+ ip -netns "${rtsrc_nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+ fi
+
+ # apply decap
+ # Local End.DT46 behavior (decap)
+ ip -netns "${rtdst_nsname}" -6 route \
+ add "${decapsid}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+ dev "${VRF_DEVNAME}"
+}
+
+# see __setup_l3vpn()
+setup_ipv4_vpn_2sids()
+{
+ __setup_l3vpn "$1" "$2" "$3" 2 4
+}
+
+# see __setup_l3vpn()
+setup_ipv6_vpn_1sid()
+{
+ __setup_l3vpn "$1" "$2" "$3" 1 6
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add veth0 type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr \
+ add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+ ip -netns "${hsname}" link set veth0 up
+ ip -netns "${hsname}" link set lo up
+
+ # configure the VRF on the router which is directly connected to the
+ # source host.
+ ip -netns "${rtname}" link \
+ add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+ ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+ # enslave the veth interface connecting the router with the host to the
+ # VRF in the access router
+ ip -netns "${rtname}" link \
+ set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+ # set default routes to unreachable for both ipv6 and ipv4
+ ip -netns "${rtname}" -6 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+ ip -netns "${rtname}" -4 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+
+ ip -netns "${rtname}" addr \
+ add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+ ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # set up SRv6 Policies
+
+ # create an IPv6 VPN between hosts hs-1 and hs-2.
+ #
+ # Direction hs-1 -> hs-2
+ # - rt-1 encap (H.Encaps.Red)
+ # - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-2 SRv6 End.DT46 behavior
+ setup_ipv6_vpn_1sid 1 2 "4 3"
+
+ # Direction hs2 -> hs-1
+ # - rt-2 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-4 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-1 SRv6 End.DT46 behavior
+ setup_ipv6_vpn_1sid 2 1 "3 4"
+
+ # create an IPv4 VPN between hosts hs-1 and hs-2
+ #
+ # Direction hs-1 -> hs-2
+ # - rt-1 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-2 SRv6 End.DT46 behavior
+ setup_ipv4_vpn_2sids 1 2 "3"
+
+ # Direction hs-2 -> hs-1
+ # - rt-2 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End behavior (NEXT-C-SID flavor)
+ # - rt-1 SRv6 End.DT46 behavior
+ setup_ipv4_vpn_2sids 2 1 "3"
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+}
+
+__nextcsid_end_behavior_test()
+{
+ local nsname="$1"
+ local cmd="$2"
+ local blen="$3"
+ local flen="$4"
+ local layout=""
+
+ if [ "${blen}" != "d" ]; then
+ layout="${layout} lblen ${blen}"
+ fi
+
+ if [ "${flen}" != "d" ]; then
+ layout="${layout} nflen ${flen}"
+ fi
+
+ ip -netns "${nsname}" -6 route \
+ "${cmd}" "${CSID_CNTR_PREFIX}" \
+ table "${CSID_CNTR_RT_TABLE}" \
+ encap seg6local action End flavors next-csid ${layout} \
+ dev "${DUMMY_DEVNAME}" &>/dev/null
+
+ return "$?"
+}
+
+rt_x_nextcsid_end_behavior_test()
+{
+ local rt="$1"
+ local blen="$2"
+ local flen="$3"
+ local nsname
+ local ret
+
+ nsname="$(get_rtname "${rt}")"
+
+ __nextcsid_end_behavior_test "${nsname}" "add" "${blen}" "${flen}"
+ ret="$?"
+ __nextcsid_end_behavior_test "${nsname}" "del" "${blen}" "${flen}"
+
+ return "${ret}"
+}
+
+__parse_csid_container_cfg()
+{
+ local cfg="$1"
+ local index="$2"
+ local out
+
+ echo "${cfg}" | cut -d',' -f"${index}"
+}
+
+csid_container_cfg_tests()
+{
+ local valid
+ local blen
+ local flen
+ local cfg
+ local ret
+
+ log_section "C-SID Container config tests (legend: d='kernel default')"
+
+ for cfg in "${CSID_CONTAINER_CFGS[@]}"; do
+ blen="$(__parse_csid_container_cfg "${cfg}" 1)"
+ flen="$(__parse_csid_container_cfg "${cfg}" 2)"
+ valid="$(__parse_csid_container_cfg "${cfg}" 3)"
+
+ rt_x_nextcsid_end_behavior_test \
+ "${CSID_CNTR_RT_ID_TEST}" \
+ "${blen}" \
+ "${flen}"
+ ret="$?"
+
+ if [ "${valid}" == "y" ]; then
+ log_test "${ret}" 0 \
+ "Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+ else
+ log_test "${ret}" 2 \
+ "Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+ fi
+ done
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "next-csid"; then
+ echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+ local test_netns
+
+ test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns for testing dummy dev support"
+ exit "${ksft_skip}"
+ fi
+
+ modprobe dummy &>/dev/null || true
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: dummy dev not supported"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+test_vrf_or_ksft_skip()
+{
+ modprobe vrf &>/dev/null || true
+ if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+csid_container_cfg_tests
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
new file mode 100755
index 000000000000..c79cb8ede17f
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_end_x_next_csid_l3vpn_test.sh
@@ -0,0 +1,1213 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+# author: Paolo Lungaroni <paolo.lungaroni@uniroma2.it>
+#
+# This script is designed for testing the support of NEXT-C-SID flavor for SRv6
+# End.X behavior.
+# A basic knowledge of SRv6 architecture [1] and of the compressed SID approach
+# [2] is assumed for the reader.
+#
+# The network topology used in the selftest is depicted hereafter, composed of
+# two hosts and four routers. Hosts hs-1 and hs-2 are connected through an
+# IPv4/IPv6 L3 VPN service, offered by routers rt-1, rt-2, rt-3 and rt-4 using
+# the NEXT-C-SID flavor. The key components for such VPNs are:
+#
+# i) The SRv6 H.Encaps/H.Encaps.Red behaviors [1] apply SRv6 Policies on
+# traffic received by connected hosts, initiating the VPN tunnel;
+#
+# ii) The SRv6 End.X behavior [1] (Endpoint with L3 cross connect) is a
+# variant of SRv6 End behavior. It advances the active SID in the SID
+# List carried by the SRH and forwards the packet to an L3 adjacency;
+#
+# iii) The NEXT-C-SID mechanism [2] offers the possibility of encoding several
+# SRv6 segments within a single 128-bit SID address, referred to as a
+# Compressed SID (C-SID) container. In this way, the length of the SID
+# List can be drastically reduced.
+# The NEXT-C-SID is provided as a "flavor" of the SRv6 End.X behavior
+# which advances the current C-SID (i.e. the Locator-Node Function defined
+# in [2]) with the next one carried in the Argument, if available.
+# When no more C-SIDs are available in the Argument, the SRv6 End.X
+# behavior will apply the End.X function selecting the next SID in the SID
+# List;
+#
+# iv) The SRv6 End.DT46 behavior [1] is used for removing the SRv6 Policy and,
+# thus, it terminates the VPN tunnel. Such a behavior is capable of
+# handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+# [1] https://datatracker.ietf.org/doc/html/rfc8986
+# [2] https://datatracker.ietf.org/doc/html/draft-ietf-spring-srv6-srh-compression
+#
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +----+---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y in
+# the selftest network.
+#
+# Local SID/C-SID table
+# =====================
+#
+# Each SRv6 router is configured with a Local SID/C-SID table in which
+# SIDs/C-SIDs are stored. Considering an SRv6 router rt-x, SIDs/C-SIDs are
+# configured in the Local SID/C-SIDs table as follows:
+#
+# Local SID/C-SID table for SRv6 router rt-x
+# +-----------------------------------------------------------+
+# |fcff:x::d46 is associated with the non-compressed SRv6 |
+# | End.DT46 behavior |
+# +-----------------------------------------------------------+
+# |fcbb:0:0x00::/48 is associated with the NEXT-C-SID flavor |
+# | of SRv6 End.X behavior |
+# +-----------------------------------------------------------+
+# |fcbb:0:0x00:d46::/64 is associated with the SRv6 End.DT46 |
+# | behavior when NEXT-C-SID compression is turned on |
+# +-----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved for implementing SRv6 services with regular
+# (non compressed) SIDs. Reachability of SIDs is ensured by proper configuration
+# of the IPv6 routing tables in the routers.
+# Similarly, the fcbb:0::/32 prefix is reserved for implementing SRv6 VPN
+# services leveraging the NEXT-C-SID compression mechanism. Indeed, the
+# fcbb:0::/32 is used for encoding the Locator-Block while the Locator-Node
+# Function is encoded with 16 bits.
+#
+# Incoming traffic classification and application of SRv6 Policies
+# ================================================================
+#
+# An SRv6 ingress router applies different SRv6 Policies to the traffic received
+# from a connected host, considering the IPv4 or IPv6 destination address.
+# SRv6 policy enforcement consists of encapsulating the received traffic into a
+# new IPv6 packet with a given SID List contained in the SRH.
+# When the SID List contains only one SID, the SRH could be omitted completely
+# and that SID is stored directly in the IPv6 Destination Address (DA) (this is
+# called "reduced" encapsulation).
+#
+# Test cases for NEXT-C-SID
+# =========================
+#
+# We consider two test cases for NEXT-C-SID: i) single SID and ii) double SID.
+#
+# In the single SID test case we have a number of segments that are all
+# contained in a single Compressed SID (C-SID) container. Therefore the
+# resulting SID List has only one SID. Using the reduced encapsulation format
+# this will result in a packet with no SRH.
+#
+# In the double SID test case we have one segment carried in a Compressed SID
+# (C-SID) container, followed by a regular (non compressed) SID. The resulting
+# SID List has two segments and it is possible to test the advance to the next
+# SID when all the C-SIDs in a C-SID container have been processed. Using the
+# reduced encapsulation format this will result in a packet with an SRH
+# containing 1 segment.
+#
+# For the single SID test case, we use the IPv6 addresses of hs-1 and hs-2, for
+# the double SID test case, we use their IPv4 addresses. This is only done to
+# simplify the test setup and avoid adding other hosts or multiple addresses on
+# the same interface of a host.
+#
+# Traffic from hs-1 to hs-2
+# -------------------------
+#
+# Packets generated from hs-1 and directed towards hs-2 are handled by rt-1
+# which applies the SRv6 Policies as follows:
+#
+# i) IPv6 DA=cafe::2, H.Encaps.Red with SID List=fcbb:0:0300:0200:d46::
+# ii) IPv4 DA=10.0.0.2, H.Encaps.Red with SID List=fcbb:0:0300::,fcff:2::d46
+#
+# ### i) single SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red behavior which avoids the presence of the SRH at all, since it
+# pushes the single SID directly in the IPv6 DA. Such a SID encodes a whole
+# C-SID container carrying several C-SIDs (e.g. 0300, 0200, etc).
+#
+# As the packet reaches the router rt-3, the enabled NEXT-C-SID SRv6 End.X
+# behavior (associated with fcbb:0:0300::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. In this case, the Argument is *NOT* zero and the IPv6 DA is
+# updated as follows:
+#
+# +-----------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvvvvvvvv |
+# | IPv6 DA fcbb:0:0300:0200:d46:: |
+# | ^^^^ <-- shifting |
+# | | |
+# | Locator-Node Function |
+# +-----------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvvvv |
+# | IPv6 DA fcbb:0:0200:d46:: |
+# | ^^^^ |
+# | | |
+# | Locator-Node Function |
+# +-----------------------------------------------------------------+
+#
+# After having applied the enabled NEXT-C-SID SRv6 End.X behavior, the packet
+# is sent to rt-4 node using the L3 adjacency address fcf0:0:3:4::4.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to
+# its Local SID table and using the IPv6 DA fcbb:0:0200:d46:: .
+#
+# The router rt-2 is configured for decapsulating the inner IPv6 packet and,
+# for this reason, it applies the SRv6 End.DT46 behavior on the received
+# packet. It is worth noting that the SRv6 End.DT46 behavior does not require
+# the presence of the SRH: it is fully capable to operate properly on
+# IPv4/IPv6-in-IPv6 encapsulations.
+# At the end of the decap operation, the packet is sent to the host hs-2.
+#
+# ### ii) double SID
+#
+# The router rt-1 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:2::d46. Hence, the packet sent by hs-1 to hs-2 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the router applies the enabled NEXT-C-SID
+# SRv6 End.X behavior.
+#
+# +-----------------------------------------------------------------+
+# | Before applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# | +---------- Argument |
+# | vvvv (Argument is all filled with zeros) |
+# | IPv6 DA fcbb:0:0300:: |
+# | ^^^^ |
+# | | |
+# | Locator-Node Function |
+# +-----------------------------------------------------------------+
+# | After applying the rt-3 enabled NEXT-C-SID SRv6 End.X behavior |
+# +-----------------------------------------------------------------+
+# | |
+# | IPv6 DA fcff:2::d46 |
+# | ^^^^^^^^^^^ |
+# | | |
+# | SID copied from the SID List contained in the SRH |
+# +-----------------------------------------------------------------+
+#
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End.X behavior operates as the
+# traditional End.X behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. Next, the packet is
+# forwarded to the rt-4 node using the L3 adjacency fcf0:3:4::4 previously
+# configured for this behavior.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-2 router according to
+# its Local SID table and using the IPv6 DA fcff:2::d46.
+#
+# Once the packet is received by rt-2, the router decapsulates the inner IPv4
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:2::d46)
+# and sends it to the host hs-2.
+#
+# Traffic from hs-2 to hs-1
+# -------------------------
+#
+# Packets generated from hs-2 and directed towards hs-1 are handled by rt-2
+# which applies the SRv6 Policies as follows:
+#
+# i) IPv6 DA=cafe::1, SID List=fcbb:0:0400:0100:d46::
+# ii) IPv4 DA=10.0.0.1, SID List=fcbb:0:0300::,fcff:1::d46
+#
+# ### i) single SID
+#
+# The node hs-2 sends an IPv6 packet directed to node hs-1. The router rt-2 is
+# directly connected to hs-2 and receives the packet. Rt-2 applies the
+# H.Encap.Red behavior with policy i) described above. Since there is only one
+# SID, the SRH header is omitted and the policy is inserted directly into the DA
+# of IPv6 packet.
+#
+# The packet reaches the router rt-4 and the enabled NEXT-C-SID SRv6 End.X
+# behavior (associated with fcbb:0:0400::/48) is triggered. This behavior
+# analyzes the IPv6 DA and checks whether the Argument of the C-SID container
+# is zero or not. The Argument is *NOT* zero and the C-SID in the IPv6 DA is
+# advanced. At this point, the current IPv6 DA is fcbb:0:0100:d46:: .
+# The enabled NEXT-C-SID SRv6 End.X behavior is configured with the L3 adjacency
+# fcf0:0:1:4::1, used to route traffic to the rt-1 node.
+#
+# The router rt-1 is configured for decapsulating the inner packet. It applies
+# the SRv6 End.DT46 behavior on the received packet. Decapsulation does not
+# require the presence of the SRH. At the end of the decap operation, the packet
+# is sent to the host hs-1.
+#
+# ### ii) double SID
+#
+# The router rt-2 is configured to enforce the given Policy through the SRv6
+# H.Encaps.Red. As a result, the first SID fcbb:0:0300:: is stored into the
+# IPv6 DA, while the SRH pushed into the packet is made of only one SID, i.e.
+# fcff:1::d46. Hence, the packet sent by hs-2 to hs-1 is encapsulated in an
+# outer IPv6 header plus the SRH.
+#
+# As the packet reaches the node rt-3, the enabled NEXT-C-SID SRv6 End.X
+# behavior bound to the SID fcbb:0:0300::/48 is triggered.
+# Since the Argument of the C-SID container is zero, the behavior can not
+# update the Locator-Node function with the next C-SID carried in the Argument
+# itself. Thus, the enabled NEXT-C-SID SRv6 End-X behavior operates as the
+# traditional End.X behavior: it updates the IPv6 DA by copying the next
+# available SID in the SID List carried by the SRH. After that, the packet is
+# forwarded to the rt-4 node using the L3 adjacency (fcf0:3:4::4) previously
+# configured for this behavior.
+#
+# The node rt-4 performs a plain IPv6 forward to the rt-1 router according to
+# its Local SID table, considering the IPv6 DA fcff:1::d46.
+#
+# Once the packet is received by rt-1, the router decapsulates the inner IPv4
+# packet using the SRv6 End.DT46 behavior (associated with the SID fcff:1::d46)
+# and sends it to the host hs-1.
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly DT46_FUNC=0d46
+readonly HEADEND_ENCAP="encap.red"
+
+# do not add ':' as separator
+readonly LCBLOCK_ADDR=fcbb0000
+readonly LCBLOCK_BLEN=32
+# do not add ':' as separator
+readonly LCNODEFUNC_FMT="0%d00"
+readonly LCNODEFUNC_BLEN=16
+
+readonly LCBLOCK_NODEFUNC_BLEN=$((LCBLOCK_BLEN + LCNODEFUNC_BLEN))
+
+readonly CSID_CNTR_PREFIX="dead:beaf::/32"
+# ID of the router used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_ID_TEST=1
+# Routing table used for testing the C-SID container cfgs
+readonly CSID_CNTR_RT_TABLE=91
+
+# C-SID container configurations to be tested
+#
+# An entry of the array is defined as "a,b,c" where:
+# - 'a' and 'b' elements represent respectively the Locator-Block length
+# (lblen) in bits and the Locator-Node Function length (nflen) in bits.
+# 'a' and 'b' can be set to default values using the placeholder "d" which
+# indicates the default kernel values (32 for lblen and 16 for nflen);
+# otherwise, any numeric value is accepted;
+# - 'c' indicates whether the C-SID configuration provided by the values 'a'
+# and 'b' should be considered valid ("y") or invalid ("n").
+declare -ra CSID_CONTAINER_CFGS=(
+ "d,d,y"
+ "d,16,y"
+ "16,d,y"
+ "16,32,y"
+ "32,16,y"
+ "48,8,y"
+ "8,48,y"
+ "d,0,n"
+ "0,d,n"
+ "32,0,n"
+ "0,32,n"
+ "17,d,n"
+ "d,17,n"
+ "120,16,n"
+ "16,120,n"
+ "0,128,n"
+ "128,0,n"
+ "130,0,n"
+ "0,130,n"
+ "0,0,n"
+)
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+ ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+ ip -netns "${nsname}" link set lo up
+}
+
+# build an ipv6 prefix/address based on the input string
+# Note that the input string does not contain ':' and '::' which are considered
+# to be implicit.
+# e.g.:
+# - input: fbcc00000400300
+# - output: fbcc:0000:0400:0300:0000:0000:0000:0000
+# ^^^^^^^^^^^^^^^^^^^
+# fill the address with 0s
+build_ipv6_addr()
+{
+ local addr="$1"
+ local out=""
+ local strlen="${#addr}"
+ local padn
+ local i
+
+ # add ":" every 4 digits (16 bits)
+ for (( i = 0; i < strlen; i++ )); do
+ if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+ out="${out}:"
+ fi
+
+ out="${out}${addr:$i:1}"
+ done
+
+ # fill the remaining bits of the address with 0s
+ padn=$((32 - strlen))
+ for (( i = padn; i > 0; i-- )); do
+ if (( i > 0 && i < 32 && (i % 4) == 0 )); then
+ out="${out}:"
+ fi
+
+ out="${out}0"
+ done
+
+ printf "${out}"
+}
+
+build_csid()
+{
+ local nodeid="$1"
+
+ printf "${LCNODEFUNC_FMT}" "${nodeid}"
+}
+
+build_lcnode_func_prefix()
+{
+ local nodeid="$1"
+ local lcnodefunc
+ local prefix
+ local out
+
+ lcnodefunc="$(build_csid "${nodeid}")"
+ prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}${lcnodefunc}")"
+
+ out="${prefix}/${LCBLOCK_NODEFUNC_BLEN}"
+
+ echo "${out}"
+}
+
+set_end_x_nextcsid()
+{
+ local rt="$1"
+ local adj="$2"
+
+ nsname="$(get_rtname "${rt}")"
+ net_prefix="$(get_network_prefix "${rt}" "${adj}")"
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${rt}")"
+
+ # enabled NEXT-C-SID SRv6 End.X behavior (note that "dev" is the dummy
+ # dum0 device chosen for the sake of simplicity).
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.X nh6 "${net_prefix}::${adj}" \
+ flavors next-csid lblen "${LCBLOCK_BLEN}" \
+ nflen "${LCNODEFUNC_BLEN}" dev "${DUMMY_DEVNAME}"
+}
+
+set_underlay_sids_reachability()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ replace "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+
+ # set the underlay network for C-SIDs reachability
+ lcnode_func_prefix="$(build_lcnode_func_prefix "${neigh}")"
+
+ ip -netns "${nsname}" -6 route \
+ replace "${lcnode_func_prefix}" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+ local lcnode_func_prefix
+ local lcblock_prefix
+
+ nsname="$(get_rtname "${rt}")"
+
+ set_underlay_sids_reachability "${rt}" "${rt_neighs}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behavior instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule \
+ add to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+
+ # common locator block for NEXT-C-SIDS compression mechanism.
+ lcblock_prefix="$(build_ipv6_addr "${LCBLOCK_ADDR}")"
+ ip -netns "${nsname}" -6 rule \
+ add to "${lcblock_prefix}/${LCBLOCK_BLEN}" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router as well as the
+# decap SID in the egress one.
+# args:
+# $1 - src host (evaluate automatically the ingress router)
+# $2 - dst host (evaluate automatically the egress router)
+# $3 - SRv6 routers configured for steering traffic (End.X behaviors)
+# $4 - single SID or double SID
+# $5 - traffic type (IPv6 or IPv4)
+__setup_l3vpn()
+{
+ local src="$1"
+ local dst="$2"
+ local end_rts="$3"
+ local mode="$4"
+ local traffic="$5"
+ local nsname
+ local policy
+ local container
+ local decapsid
+ local lcnfunc
+ local dt
+ local n
+ local rtsrc_nsname
+ local rtdst_nsname
+
+ rtsrc_nsname="$(get_rtname "${src}")"
+ rtdst_nsname="$(get_rtname "${dst}")"
+
+ container="${LCBLOCK_ADDR}"
+
+ # build first SID (C-SID container)
+ for n in ${end_rts}; do
+ lcnfunc="$(build_csid "${n}")"
+
+ container="${container}${lcnfunc}"
+ done
+
+ if [ "${mode}" -eq 1 ]; then
+ # single SID policy
+ dt="$(build_csid "${dst}")${DT46_FUNC}"
+ container="${container}${dt}"
+ # build the full ipv6 address for the container
+ policy="$(build_ipv6_addr "${container}")"
+
+ # build the decap SID used in the decap node
+ container="${LCBLOCK_ADDR}${dt}"
+ decapsid="$(build_ipv6_addr "${container}")"
+ else
+ # double SID policy
+ decapsid="${VPN_LOCATOR_SERVICE}:${dst}::${DT46_FUNC}"
+
+ policy="$(build_ipv6_addr "${container}"),${decapsid}"
+ fi
+
+ # apply encap policy
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${rtsrc_nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+
+ ip -netns "${rtsrc_nsname}" -6 neigh \
+ add proxy "${IPv6_HS_NETWORK}::${dst}" \
+ dev "${RT2HS_DEVNAME}"
+ else
+ # "dev" must be different from the one where the packet is
+ # received, otherwise the proxy arp does not work.
+ ip -netns "${rtsrc_nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${HEADEND_ENCAP}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+ fi
+
+ # apply decap
+ # Local End.DT46 behavior (decap)
+ ip -netns "${rtdst_nsname}" -6 route \
+ add "${decapsid}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+ dev "${VRF_DEVNAME}"
+}
+
+# see __setup_l3vpn()
+setup_ipv4_vpn_2sids()
+{
+ __setup_l3vpn "$1" "$2" "$3" 2 4
+}
+
+# see __setup_l3vpn()
+setup_ipv6_vpn_1sid()
+{
+ __setup_l3vpn "$1" "$2" "$3" 1 6
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add veth0 type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr \
+ add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+ ip -netns "${hsname}" link set veth0 up
+ ip -netns "${hsname}" link set lo up
+
+ # configure the VRF on the router which is directly connected to the
+ # source host.
+ ip -netns "${rtname}" link \
+ add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+ ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+ # enslave the veth interface connecting the router with the host to the
+ # VRF in the access router
+ ip -netns "${rtname}" link \
+ set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+ # set default routes to unreachable for both ipv6 and ipv4
+ ip -netns "${rtname}" -6 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+ ip -netns "${rtname}" -4 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+
+ ip -netns "${rtname}" addr \
+ add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+ ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # set up SRv6 Policies
+
+ # create an IPv6 VPN between hosts hs-1 and hs-2.
+ #
+ # Direction hs-1 -> hs-2
+ # - rt-1 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+ # - rt-4 Plain IPv6 Forwarding to rt-2
+ # - rt-2 SRv6 End.DT46 behavior
+ setup_ipv6_vpn_1sid 1 2 "3"
+
+ # Direction hs2 -> hs-1
+ # - rt-2 encap (H.Encaps.Red)
+ # - rt-4 SRv6 End.X behavior adj rt-1 (NEXT-C-SID flavor)
+ # - rt-1 SRv6 End.DT46 behavior
+ setup_ipv6_vpn_1sid 2 1 "4"
+
+ # create an IPv4 VPN between hosts hs-1 and hs-2
+ #
+ # Direction hs-1 -> hs-2
+ # - rt-1 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+ # - rt-4 Plain IPv6 Forwarding to rt-2
+ # - rt-2 SRv6 End.DT46 behavior
+ setup_ipv4_vpn_2sids 1 2 "3"
+
+ # Direction hs-2 -> hs-1
+ # - rt-2 encap (H.Encaps.Red)
+ # - rt-3 SRv6 End.X behavior adj rt-4 (NEXT-C-SID flavor)
+ # - rt-4 Plain IPv6 Forwarding to rt-1
+ # - rt-1 SRv6 End.DT46 behavior
+ setup_ipv4_vpn_2sids 2 1 "3"
+
+ # Setup the adjacencies in the SRv6 aware routers
+ # - rt-3 SRv6 End.X adjacency with rt-4
+ # - rt-4 SRv6 End.X adjacency with rt-1
+ set_end_x_nextcsid 3 4
+ set_end_x_nextcsid 4 1
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv6)"
+
+ check_and_log_hs_ipv6_connectivity 1 2
+ check_and_log_hs_ipv6_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4)"
+
+ check_and_log_hs_ipv4_connectivity 1 2
+ check_and_log_hs_ipv4_connectivity 2 1
+}
+
+__nextcsid_end_x_behavior_test()
+{
+ local nsname="$1"
+ local cmd="$2"
+ local blen="$3"
+ local flen="$4"
+ local layout=""
+
+ if [ "${blen}" != "d" ]; then
+ layout="${layout} lblen ${blen}"
+ fi
+
+ if [ "${flen}" != "d" ]; then
+ layout="${layout} nflen ${flen}"
+ fi
+
+ ip -netns "${nsname}" -6 route \
+ "${cmd}" "${CSID_CNTR_PREFIX}" \
+ table "${CSID_CNTR_RT_TABLE}" \
+ encap seg6local action End.X nh6 :: \
+ flavors next-csid ${layout} \
+ dev "${DUMMY_DEVNAME}" &>/dev/null
+
+ return "$?"
+}
+
+rt_x_nextcsid_end_x_behavior_test()
+{
+ local rt="$1"
+ local blen="$2"
+ local flen="$3"
+ local nsname
+ local ret
+
+ nsname="$(get_rtname "${rt}")"
+
+ __nextcsid_end_x_behavior_test "${nsname}" "add" "${blen}" "${flen}"
+ ret="$?"
+ __nextcsid_end_x_behavior_test "${nsname}" "del" "${blen}" "${flen}"
+
+ return "${ret}"
+}
+
+__parse_csid_container_cfg()
+{
+ local cfg="$1"
+ local index="$2"
+ local out
+
+ echo "${cfg}" | cut -d',' -f"${index}"
+}
+
+csid_container_cfg_tests()
+{
+ local valid
+ local blen
+ local flen
+ local cfg
+ local ret
+
+ log_section "C-SID Container config tests (legend: d='kernel default')"
+
+ for cfg in "${CSID_CONTAINER_CFGS[@]}"; do
+ blen="$(__parse_csid_container_cfg "${cfg}" 1)"
+ flen="$(__parse_csid_container_cfg "${cfg}" 2)"
+ valid="$(__parse_csid_container_cfg "${cfg}" 3)"
+
+ rt_x_nextcsid_end_x_behavior_test \
+ "${CSID_CNTR_RT_ID_TEST}" \
+ "${blen}" \
+ "${flen}"
+ ret="$?"
+
+ if [ "${valid}" == "y" ]; then
+ log_test "${ret}" 0 \
+ "Accept valid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+ else
+ log_test "${ret}" 2 \
+ "Reject invalid C-SID container cfg (lblen=${blen}, nflen=${flen})"
+ fi
+ done
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "next-csid"; then
+ echo "SKIP: Missing SRv6 NEXT-C-SID flavor support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+ local test_netns
+
+ test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns for testing dummy dev support"
+ exit "${ksft_skip}"
+ fi
+
+ modprobe dummy &>/dev/null || true
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: dummy dev not supported"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+test_vrf_or_ksft_skip()
+{
+ modprobe vrf &>/dev/null || true
+ if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+test_command_or_ksft_skip cut
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+csid_container_cfg_tests
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
new file mode 100755
index 000000000000..28a775654b92
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hencap_red_l3vpn_test.sh
@@ -0,0 +1,879 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers advanced
+# IPv4/IPv6 VPN services to hosts, enabling them to communicate with each
+# other.
+# In this example, hosts hs-1 and hs-2 are connected through an IPv4/IPv6 VPN
+# service, while hs-3 and hs-4 are connected using an IPv6 only VPN.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement IPv4/IPv6 L3 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+# i) The SRv6 H.Encaps.Red behavior applies SRv6 Policies on traffic received
+# by connected hosts, initiating the VPN tunnel. Such a behavior is an
+# optimization of the SRv6 H.Encap aiming to reduce the length of the SID
+# List carried in the pushed SRH. Specifically, the H.Encaps.Red removes
+# the first SID contained in the SID List (i.e. SRv6 Policy) by storing it
+# into the IPv6 Destination Address. When a SRv6 Policy is made of only one
+# SID, the SRv6 H.Encaps.Red behavior omits the SRH at all and pushes that
+# SID directly into the IPv6 DA;
+#
+# ii) The SRv6 End behavior advances the active SID in the SID List carried by
+# the SRH;
+#
+# iii) The SRv6 End.DT46 behavior is used for removing the SRv6 Policy and,
+# thus, it terminates the VPN tunnel. Such a behavior is capable of
+# handling, at the same time, both tunneled IPv4 and IPv6 traffic.
+#
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +--- +---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +--- +---+
+# | | | |
+# | hs-4 | | hs-3 |
+# | | | |
+# +--------+ +--------+
+# cafe::4 cafe::3
+# 10.0.0.4 10.0.0.3
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+# Local SID table for SRv6 router rt-x
+# +----------------------------------------------------------+
+# |fcff:x::e is associated with the SRv6 End behavior |
+# |fcff:x::d46 is associated with the SRv6 End.DT46 behavior |
+# +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# # SRv6 Policies
+# ===============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# IPv4/IPv6 VPN between hs-1 and hs-2
+# -----------------------------------
+#
+# Hosts hs-1 and hs-2 are connected using dedicated IPv4/IPv6 VPNs.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+# i.a) IPv6 traffic, SID List=fcff:3::e,fcff:4::e,fcff:2::d46
+# ii.a) IPv4 traffic, SID List=fcff:2::d46
+#
+# Policy (i.a) steers tunneled IPv6 traffic through SRv6 routers
+# rt-3,rt-4,rt-2. Instead, Policy (ii.a) steers tunneled IPv4 traffic through
+# rt-2.
+# The H.Encaps.Red reduces the SID List (i.a) carried in SRH by removing the
+# first SID (fcff:3::e) and pushing it into the IPv6 DA. In case of IPv4
+# traffic, the H.Encaps.Red omits the presence of SRH at all, since the SID
+# List (ii.a) consists of only one SID that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+# i.b) IPv6 traffic, SID List=fcff:1::d46
+# ii.b) IPv4 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d46
+#
+# Policy (i.b) steers tunneled IPv6 traffic through the SRv6 router rt-1.
+# Conversely, Policy (ii.b) steers tunneled IPv4 traffic through SRv6 routers
+# rt-4,rt-3,rt-1.
+# The H.Encaps.Red omits the SRH at all in case of (i.b) by pushing the single
+# SID (fcff::1::d46) inside the IPv6 DA.
+# The H.Encaps.Red reduces the SID List (ii.b) in the SRH by removing the first
+# SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+# hs-1->hs-2 |IPv6 DA=fcff:3::e|SRH SIDs=fcff:4::e,fcff:2::d46|IPv6|...| (i.a)
+# hs-1->hs-2 |IPv6 DA=fcff:2::d46|IPv4|...| (ii.a)
+#
+# hs-2->hs-1 |IPv6 DA=fcff:1::d46|IPv6|...| (i.b)
+# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d46|IPv4|...| (ii.b)
+#
+#
+# IPv6 VPN between hs-3 and hs-4
+# ------------------------------
+#
+# Hosts hs-3 and hs-4 are connected using a dedicated IPv6 only VPN.
+# Specifically, packets generated from hs-3 and directed towards hs-4 are
+# handled by rt-3 which applies the following SRv6 Policy:
+#
+# i.c) IPv6 traffic, SID List=fcff:2::e,fcff:4::d46
+#
+# Policy (i.c) steers tunneled IPv6 traffic through SRv6 routers rt-2,rt-4.
+# The H.Encaps.Red reduces the SID List (i.c) carried in SRH by pushing the
+# first SID (fcff:2::e) in the IPv6 DA.
+#
+# On the reverse path (i.e. from hs-4 to hs-3) the router rt-4 applies the
+# following SRv6 Policy:
+#
+# i.d) IPv6 traffic, SID List=fcff:1::e,fcff:3::d46.
+#
+# Policy (i.d) steers tunneled IPv6 traffic through SRv6 routers rt-1,rt-3.
+# The H.Encaps.Red reduces the SID List (i.d) carried in SRH by pushing the
+# first SID (fcff:1::e) in the IPv6 DA.
+#
+# In summary:
+# hs-3->hs-4 |IPv6 DA=fcff:2::e|SRH SIDs=fcff:4::d46|IPv6|...| (i.c)
+# hs-4->hs-3 |IPv6 DA=fcff:1::e|SRH SIDs=fcff:3::d46|IPv6|...| (i.d)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly VRF_TID=100
+readonly VRF_DEVNAME="vrf-${VRF_TID}"
+readonly RT2HS_DEVNAME="veth-t${VRF_TID}"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly END_FUNC=000e
+readonly DT46_FUNC=0d46
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link set lo up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ # Local End behavior (note that "dev" is dummy and the VRF is chosen
+ # for the sake of simplicity).
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End dev "${VRF_DEVNAME}"
+
+ # Local End.DT46 behavior
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${DT46_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DT46 vrftable "${VRF_TID}" \
+ dev "${VRF_DEVNAME}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behavior instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule \
+ add to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+
+ # set default routes to unreachable for both ipv4 and ipv6
+ ip -netns "${nsname}" -6 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+
+ ip -netns "${nsname}" -4 route \
+ add unreachable default metric 4278198272 \
+ vrf "${VRF_DEVNAME}"
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+# $1 - destination host (i.e. cafe::x host)
+# $2 - SRv6 router configured for enforcing the SRv6 Policy
+# $3 - SRv6 routers configured for steering traffic (End behaviors)
+# $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+# to the destination host)
+# $5 - encap mode (full or red)
+# $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+ local dst="$1"
+ local encap_rt="$2"
+ local end_rts="$3"
+ local dec_rt="$4"
+ local mode="$5"
+ local traffic="$6"
+ local nsname
+ local policy=''
+ local n
+
+ nsname="$(get_rtname "${encap_rt}")"
+
+ for n in ${end_rts}; do
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+ done
+
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DT46_FUNC}"
+
+ # add SRv6 policy to incoming traffic sent by connected hosts
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+
+ ip -netns "${nsname}" -6 neigh \
+ add proxy "${IPv6_HS_NETWORK}::${dst}" \
+ dev "${RT2HS_DEVNAME}"
+ else
+ # "dev" must be different from the one where the packet is
+ # received, otherwise the proxy arp does not work.
+ ip -netns "${nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" vrf "${VRF_DEVNAME}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev "${VRF_DEVNAME}"
+ fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add veth0 type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr \
+ add "${IPv6_HS_NETWORK}::${hs}/64" dev veth0 nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" dev veth0
+
+ ip -netns "${hsname}" link set veth0 up
+ ip -netns "${hsname}" link set lo up
+
+ # configure the VRF on the router which is directly connected to the
+ # source host.
+ ip -netns "${rtname}" link \
+ add "${VRF_DEVNAME}" type vrf table "${VRF_TID}"
+ ip -netns "${rtname}" link set "${VRF_DEVNAME}" up
+
+ # enslave the veth interface connecting the router with the host to the
+ # VRF in the access router
+ ip -netns "${rtname}" link \
+ set "${RT2HS_DEVNAME}" master "${VRF_DEVNAME}"
+
+ ip -netns "${rtname}" addr \
+ add "${IPv6_HS_NETWORK}::254/64" dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv6.conf."${RT2HS_DEVNAME}".proxy_ndp=1
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".proxy_arp=1
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+
+ ip netns exec "${rtname}" sh -c "echo 1 > /proc/sys/net/vrf/strict_mode"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2 3 4"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+ setup_hs 3 3
+ setup_hs 4 4
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DT46)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # set up SRv6 policies
+
+ # create an IPv6 VPN between hosts hs-1 and hs-2.
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.Encaps.Red)
+ # - rt-3,rt-4 (SRv6 End behaviors)
+ # - rt-2 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.Encaps.Red)
+ # - rt-1 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv6 2 1 "3 4" 2 encap.red
+ setup_rt_policy_ipv6 1 2 "" 1 encap.red
+
+ # create an IPv4 VPN between hosts hs-1 and hs-2
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.Encaps.Red)
+ # - rt-2 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.Encaps.Red)
+ # - rt-4,rt-3 (SRv6 End behaviors)
+ # - rt-1 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv4 2 1 "" 2 encap.red
+ setup_rt_policy_ipv4 1 2 "4 3" 1 encap.red
+
+ # create an IPv6 VPN between hosts hs-3 and hs-4
+ # the network path between hs-3 and hs-4 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-3 -> hs-4 (H.Encaps.Red)
+ # - rt-2 (SRv6 End Behavior)
+ # - rt-4 (SRv6 End.DT46 behavior)
+ #
+ # Direction hs-4 -> hs-3 (H.Encaps.Red)
+ # - rt-1 (SRv6 End behavior)
+ # - rt-3 (SRv6 End.DT46 behavior)
+ setup_rt_policy_ipv6 4 3 "2" 4 encap.red
+ setup_rt_policy_ipv6 3 4 "1" 3 encap.red
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+check_and_log_hs_ipv6_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ # in this case, the connectivity test must fail
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 1 "IPv6 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ # in this case, the connectivity test must fail
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 1 "IPv4 Hosts isolation: hs-${hssrc} -X-> hs-${hsdst}"
+}
+
+check_and_log_hs_isolation()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv6_isolation "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv4_isolation "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 VPN connectivity test hosts (h1 <-> h2, IPv4/IPv6)"
+
+ check_and_log_hs_connectivity 1 2
+ check_and_log_hs_connectivity 2 1
+
+ log_section "SRv6 VPN connectivity test hosts (h3 <-> h4, IPv6 only)"
+
+ check_and_log_hs_ipv6_connectivity 3 4
+ check_and_log_hs_ipv6_connectivity 4 3
+}
+
+host_vpn_isolation_tests()
+{
+ local l1="1 2"
+ local l2="3 4"
+ local tmp
+ local i
+ local j
+ local k
+
+ log_section "SRv6 VPN isolation test among hosts"
+
+ for k in 0 1; do
+ for i in ${l1}; do
+ for j in ${l2}; do
+ check_and_log_hs_isolation "${i}" "${j}"
+ done
+ done
+
+ # let us test the reverse path
+ tmp="${l1}"; l1="${l2}"; l2="${tmp}"
+ done
+
+ log_section "SRv6 VPN isolation test among hosts (h2 <-> h4, IPv4 only)"
+
+ check_and_log_hs_ipv4_isolation 2 4
+ check_and_log_hs_ipv4_isolation 4 2
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "encap.red"; then
+ echo "SKIP: Missing SRv6 encap.red support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+test_vrf_or_ksft_skip()
+{
+ modprobe vrf &>/dev/null || true
+ if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_vrf_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+host_vpn_isolation_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
new file mode 100755
index 000000000000..cb4177d41b21
--- /dev/null
+++ b/tools/testing/selftests/net/srv6_hl2encap_red_l2vpn_test.sh
@@ -0,0 +1,821 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# author: Andrea Mayer <andrea.mayer@uniroma2.it>
+#
+# This script is designed for testing the SRv6 H.L2Encaps.Red behavior.
+#
+# Below is depicted the IPv6 network of an operator which offers L2 VPN
+# services to hosts, enabling them to communicate with each other.
+# In this example, hosts hs-1 and hs-2 are connected through an L2 VPN service.
+# Currently, the SRv6 subsystem in Linux allows hosts hs-1 and hs-2 to exchange
+# full L2 frames as long as they carry IPv4/IPv6.
+#
+# Routers rt-1,rt-2,rt-3 and rt-4 implement L2 VPN services
+# leveraging the SRv6 architecture. The key components for such VPNs are:
+#
+# i) The SRv6 H.L2Encaps.Red behavior applies SRv6 Policies on traffic
+# received by connected hosts, initiating the VPN tunnel. Such a behavior
+# is an optimization of the SRv6 H.L2Encap aiming to reduce the
+# length of the SID List carried in the pushed SRH. Specifically, the
+# H.L2Encaps.Red removes the first SID contained in the SID List (i.e. SRv6
+# Policy) by storing it into the IPv6 Destination Address. When a SRv6
+# Policy is made of only one SID, the SRv6 H.L2Encaps.Red behavior omits
+# the SRH at all and pushes that SID directly into the IPv6 DA;
+#
+# ii) The SRv6 End behavior advances the active SID in the SID List
+# carried by the SRH;
+#
+# iii) The SRv6 End.DX2 behavior is used for removing the SRv6 Policy
+# and, thus, it terminates the VPN tunnel. The decapsulated L2 frame is
+# sent over the interface connected with the destination host.
+#
+# cafe::1 cafe::2
+# 10.0.0.1 10.0.0.2
+# +--------+ +--------+
+# | | | |
+# | hs-1 | | hs-2 |
+# | | | |
+# +---+----+ +--- +---+
+# cafe::/64 | | cafe::/64
+# 10.0.0.0/24 | | 10.0.0.0/24
+# +---+----+ +----+---+
+# | | fcf0:0:1:2::/64 | |
+# | rt-1 +-------------------+ rt-2 |
+# | | | |
+# +---+----+ +----+---+
+# | . . |
+# | fcf0:0:1:3::/64 . |
+# | . . |
+# | . . |
+# fcf0:0:1:4::/64 | . | fcf0:0:2:3::/64
+# | . . |
+# | . . |
+# | fcf0:0:2:4::/64 . |
+# | . . |
+# +---+----+ +----+---+
+# | | | |
+# | rt-4 +-------------------+ rt-3 |
+# | | fcf0:0:3:4::/64 | |
+# +---+----+ +----+---+
+#
+#
+# Every fcf0:0:x:y::/64 network interconnects the SRv6 routers rt-x with rt-y
+# in the IPv6 operator network.
+#
+# Local SID table
+# ===============
+#
+# Each SRv6 router is configured with a Local SID table in which SIDs are
+# stored. Considering the given SRv6 router rt-x, at least two SIDs are
+# configured in the Local SID table:
+#
+# Local SID table for SRv6 router rt-x
+# +----------------------------------------------------------+
+# |fcff:x::e is associated with the SRv6 End behavior |
+# |fcff:x::d2 is associated with the SRv6 End.DX2 behavior |
+# +----------------------------------------------------------+
+#
+# The fcff::/16 prefix is reserved by the operator for implementing SRv6 VPN
+# services. Reachability of SIDs is ensured by proper configuration of the IPv6
+# operator's network and SRv6 routers.
+#
+# SRv6 Policies
+# =============
+#
+# An SRv6 ingress router applies SRv6 policies to the traffic received from a
+# connected host. SRv6 policy enforcement consists of encapsulating the
+# received traffic into a new IPv6 packet with a given SID List contained in
+# the SRH.
+#
+# L2 VPN between hs-1 and hs-2
+# ----------------------------
+#
+# Hosts hs-1 and hs-2 are connected using a dedicated L2 VPN.
+# Specifically, packets generated from hs-1 and directed towards hs-2 are
+# handled by rt-1 which applies the following SRv6 Policies:
+#
+# i.a) L2 traffic, SID List=fcff:2::d2
+#
+# Policy (i.a) steers tunneled L2 traffic through SRv6 router rt-2.
+# The H.L2Encaps.Red omits the presence of SRH at all, since the SID List
+# consists of only one SID (fcff:2::d2) that can be stored directly in the IPv6
+# DA.
+#
+# On the reverse path (i.e. from hs-2 to hs-1), rt-2 applies the following
+# policies:
+#
+# i.b) L2 traffic, SID List=fcff:4::e,fcff:3::e,fcff:1::d2
+#
+# Policy (i.b) steers tunneled L2 traffic through the SRv6 routers
+# rt-4,rt-3,rt2. The H.L2Encaps.Red reduces the SID List in the SRH by removing
+# the first SID (fcff:4::e) and pushing it into the IPv6 DA.
+#
+# In summary:
+# hs-1->hs-2 |IPv6 DA=fcff:2::d2|eth|...| (i.a)
+# hs-2->hs-1 |IPv6 DA=fcff:4::e|SRH SIDs=fcff:3::e,fcff:1::d2|eth|...| (i.b)
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+readonly ksft_skip=4
+
+readonly RDMSUFF="$(mktemp -u XXXXXXXX)"
+readonly DUMMY_DEVNAME="dum0"
+readonly RT2HS_DEVNAME="veth-hs"
+readonly HS_VETH_NAME="veth0"
+readonly LOCALSID_TABLE_ID=90
+readonly IPv6_RT_NETWORK=fcf0:0
+readonly IPv6_HS_NETWORK=cafe
+readonly IPv4_HS_NETWORK=10.0.0
+readonly VPN_LOCATOR_SERVICE=fcff
+readonly MAC_PREFIX=00:00:00:c0:01
+readonly END_FUNC=000e
+readonly DX2_FUNC=00d2
+
+PING_TIMEOUT_SEC=4
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+# IDs of routers and hosts are initialized during the setup of the testing
+# network
+ROUTERS=''
+HOSTS=''
+
+SETUP_ERR=1
+
+ret=${ksft_skip}
+nsuccess=0
+nfail=0
+
+log_test()
+{
+ local rc="$1"
+ local expected="$2"
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ printf "\nTests passed: %3d\n" "${nsuccess}"
+ printf "Tests failed: %3d\n" "${nfail}"
+
+ # when a test fails, the value of 'ret' is set to 1 (error code).
+ # Conversely, when all tests are passed successfully, the 'ret' value
+ # is set to 0 (success code).
+ if [ "${ret}" -ne 1 ]; then
+ ret=0
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+test_command_or_ksft_skip()
+{
+ local cmd="$1"
+
+ if [ ! -x "$(command -v "${cmd}")" ]; then
+ echo "SKIP: Could not run test without \"${cmd}\" tool";
+ exit "${ksft_skip}"
+ fi
+}
+
+get_nodename()
+{
+ local name="$1"
+
+ echo "${name}-${RDMSUFF}"
+}
+
+get_rtname()
+{
+ local rtid="$1"
+
+ get_nodename "rt-${rtid}"
+}
+
+get_hsname()
+{
+ local hsid="$1"
+
+ get_nodename "hs-${hsid}"
+}
+
+__create_namespace()
+{
+ local name="$1"
+
+ ip netns add "${name}"
+}
+
+create_router()
+{
+ local rtid="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rtid}")"
+
+ __create_namespace "${nsname}"
+}
+
+create_host()
+{
+ local hsid="$1"
+ local nsname
+
+ nsname="$(get_hsname "${hsid}")"
+
+ __create_namespace "${nsname}"
+}
+
+cleanup()
+{
+ local nsname
+ local i
+
+ # destroy routers
+ for i in ${ROUTERS}; do
+ nsname="$(get_rtname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # destroy hosts
+ for i in ${HOSTS}; do
+ nsname="$(get_hsname "${i}")"
+
+ ip netns del "${nsname}" &>/dev/null || true
+ done
+
+ # check whether the setup phase was completed successfully or not. In
+ # case of an error during the setup phase of the testing environment,
+ # the selftest is considered as "skipped".
+ if [ "${SETUP_ERR}" -ne 0 ]; then
+ echo "SKIP: Setting up the testing environment failed"
+ exit "${ksft_skip}"
+ fi
+
+ exit "${ret}"
+}
+
+add_link_rt_pairs()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local neigh
+ local nsname
+ local neigh_nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ neigh_nsname="$(get_rtname "${neigh}")"
+
+ ip link add "veth-rt-${rt}-${neigh}" netns "${nsname}" \
+ type veth peer name "veth-rt-${neigh}-${rt}" \
+ netns "${neigh_nsname}"
+ done
+}
+
+get_network_prefix()
+{
+ local rt="$1"
+ local neigh="$2"
+ local p="${rt}"
+ local q="${neigh}"
+
+ if [ "${p}" -gt "${q}" ]; then
+ p="${q}"; q="${rt}"
+ fi
+
+ echo "${IPv6_RT_NETWORK}:${p}:${q}"
+}
+
+# Setup the basic networking for the routers
+setup_rt_networking()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local nsname
+ local net_prefix
+ local devname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ ip -netns "${nsname}" addr \
+ add "${net_prefix}::${rt}/64" dev "${devname}" nodad
+
+ ip -netns "${nsname}" link set "${devname}" up
+ done
+
+ ip -netns "${nsname}" link add "${DUMMY_DEVNAME}" type dummy
+
+ ip -netns "${nsname}" link set "${DUMMY_DEVNAME}" up
+ ip -netns "${nsname}" link set lo up
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv6.conf.all.forwarding=1
+
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.all.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.conf.default.rp_filter=0
+ ip netns exec "${nsname}" sysctl -wq net.ipv4.ip_forward=1
+}
+
+# Setup local SIDs for an SRv6 router
+setup_rt_local_sids()
+{
+ local rt="$1"
+ local rt_neighs="$2"
+ local net_prefix
+ local devname
+ local nsname
+ local neigh
+
+ nsname="$(get_rtname "${rt}")"
+
+ for neigh in ${rt_neighs}; do
+ devname="veth-rt-${rt}-${neigh}"
+
+ net_prefix="$(get_network_prefix "${rt}" "${neigh}")"
+
+ # set underlay network routes for SIDs reachability
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${neigh}::/32" \
+ table "${LOCALSID_TABLE_ID}" \
+ via "${net_prefix}::${neigh}" dev "${devname}"
+ done
+
+ # Local End behavior (note that dev "${DUMMY_DEVNAME}" is a dummy
+ # interface)
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${END_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End dev "${DUMMY_DEVNAME}"
+
+ # all SIDs for VPNs start with a common locator. Routes and SRv6
+ # Endpoint behaviors instaces are grouped together in the 'localsid'
+ # table.
+ ip -netns "${nsname}" -6 rule add \
+ to "${VPN_LOCATOR_SERVICE}::/16" \
+ lookup "${LOCALSID_TABLE_ID}" prio 999
+}
+
+# build and install the SRv6 policy into the ingress SRv6 router.
+# args:
+# $1 - destination host (i.e. cafe::x host)
+# $2 - SRv6 router configured for enforcing the SRv6 Policy
+# $3 - SRv6 routers configured for steering traffic (End behaviors)
+# $4 - SRv6 router configured for removing the SRv6 Policy (router connected
+# to the destination host)
+# $5 - encap mode (full or red)
+# $6 - traffic type (IPv6 or IPv4)
+__setup_rt_policy()
+{
+ local dst="$1"
+ local encap_rt="$2"
+ local end_rts="$3"
+ local dec_rt="$4"
+ local mode="$5"
+ local traffic="$6"
+ local nsname
+ local policy=''
+ local n
+
+ nsname="$(get_rtname "${encap_rt}")"
+
+ for n in ${end_rts}; do
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${n}::${END_FUNC},"
+ done
+
+ policy="${policy}${VPN_LOCATOR_SERVICE}:${dec_rt}::${DX2_FUNC}"
+
+ # add SRv6 policy to incoming traffic sent by connected hosts
+ if [ "${traffic}" -eq 6 ]; then
+ ip -netns "${nsname}" -6 route \
+ add "${IPv6_HS_NETWORK}::${dst}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev dum0
+ else
+ ip -netns "${nsname}" -4 route \
+ add "${IPv4_HS_NETWORK}.${dst}" \
+ encap seg6 mode "${mode}" segs "${policy}" \
+ dev dum0
+ fi
+}
+
+# see __setup_rt_policy
+setup_rt_policy_ipv6()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 6
+}
+
+#see __setup_rt_policy
+setup_rt_policy_ipv4()
+{
+ __setup_rt_policy "$1" "$2" "$3" "$4" "$5" 4
+}
+
+setup_decap()
+{
+ local rt="$1"
+ local nsname
+
+ nsname="$(get_rtname "${rt}")"
+
+ # Local End.DX2 behavior
+ ip -netns "${nsname}" -6 route \
+ add "${VPN_LOCATOR_SERVICE}:${rt}::${DX2_FUNC}" \
+ table "${LOCALSID_TABLE_ID}" \
+ encap seg6local action End.DX2 oif "${RT2HS_DEVNAME}" \
+ dev "${RT2HS_DEVNAME}"
+}
+
+setup_hs()
+{
+ local hs="$1"
+ local rt="$2"
+ local hsname
+ local rtname
+
+ hsname="$(get_hsname "${hs}")"
+ rtname="$(get_rtname "${rt}")"
+
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.all.accept_dad=0
+ ip netns exec "${hsname}" sysctl -wq net.ipv6.conf.default.accept_dad=0
+
+ ip -netns "${hsname}" link add "${HS_VETH_NAME}" type veth \
+ peer name "${RT2HS_DEVNAME}" netns "${rtname}"
+
+ ip -netns "${hsname}" addr add "${IPv6_HS_NETWORK}::${hs}/64" \
+ dev "${HS_VETH_NAME}" nodad
+ ip -netns "${hsname}" addr add "${IPv4_HS_NETWORK}.${hs}/24" \
+ dev "${HS_VETH_NAME}"
+
+ ip -netns "${hsname}" link set "${HS_VETH_NAME}" up
+ ip -netns "${hsname}" link set lo up
+
+ ip -netns "${rtname}" addr add "${IPv6_HS_NETWORK}::254/64" \
+ dev "${RT2HS_DEVNAME}" nodad
+ ip -netns "${rtname}" addr \
+ add "${IPv4_HS_NETWORK}.254/24" dev "${RT2HS_DEVNAME}"
+
+ ip -netns "${rtname}" link set "${RT2HS_DEVNAME}" up
+
+ # disable the rp_filter otherwise the kernel gets confused about how
+ # to route decap ipv4 packets.
+ ip netns exec "${rtname}" \
+ sysctl -wq net.ipv4.conf."${RT2HS_DEVNAME}".rp_filter=0
+}
+
+# set an auto-generated mac address
+# args:
+# $1 - name of the node (e.g.: hs-1, rt-3, etc)
+# $2 - id of the node (e.g.: 1 for hs-1, 3 for rt-3, etc)
+# $3 - host part of the IPv6 network address
+# $4 - name of the network interface to which the generated mac address must
+# be set.
+set_mac_address()
+{
+ local nodename="$1"
+ local nodeid="$2"
+ local host="$3"
+ local ifname="$4"
+ local nsname
+
+ nsname=$(get_nodename "${nodename}")
+
+ ip -netns "${nsname}" link set dev "${ifname}" down
+
+ ip -netns "${nsname}" link set address "${MAC_PREFIX}:${nodeid}" \
+ dev "${ifname}"
+
+ # the IPv6 address must be set once again after the MAC address has
+ # been changed.
+ ip -netns "${nsname}" addr add "${IPv6_HS_NETWORK}::${host}/64" \
+ dev "${ifname}" nodad
+
+ ip -netns "${nsname}" link set dev "${ifname}" up
+}
+
+set_host_l2peer()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local ipprefix="$3"
+ local proto="$4"
+ local hssrc_name
+ local ipaddr
+
+ hssrc_name="$(get_hsname "${hssrc}")"
+
+ if [ "${proto}" -eq 6 ]; then
+ ipaddr="${ipprefix}::${hsdst}"
+ else
+ ipaddr="${ipprefix}.${hsdst}"
+ fi
+
+ ip -netns "${hssrc_name}" route add "${ipaddr}" dev "${HS_VETH_NAME}"
+
+ ip -netns "${hssrc_name}" neigh \
+ add "${ipaddr}" lladdr "${MAC_PREFIX}:${hsdst}" \
+ dev "${HS_VETH_NAME}"
+}
+
+# setup an SRv6 L2 VPN between host hs-x and hs-y (currently, the SRv6
+# subsystem only supports L2 frames whose layer-3 is IPv4/IPv6).
+# args:
+# $1 - source host
+# $2 - SRv6 routers configured for steering tunneled traffic
+# $3 - destination host
+setup_l2vpn()
+{
+ local hssrc="$1"
+ local end_rts="$2"
+ local hsdst="$3"
+ local rtsrc="${hssrc}"
+ local rtdst="${hsdst}"
+
+ # set fixed mac for source node and the neigh MAC address
+ set_mac_address "hs-${hssrc}" "${hssrc}" "${hssrc}" "${HS_VETH_NAME}"
+ set_host_l2peer "${hssrc}" "${hsdst}" "${IPv6_HS_NETWORK}" 6
+ set_host_l2peer "${hssrc}" "${hsdst}" "${IPv4_HS_NETWORK}" 4
+
+ # we have to set the mac address of the veth-host (on ingress router)
+ # to the mac address of the remote peer (L2 VPN destination host).
+ # Otherwise, traffic coming from the source host is dropped at the
+ # ingress router.
+ set_mac_address "rt-${rtsrc}" "${hsdst}" 254 "${RT2HS_DEVNAME}"
+
+ # set the SRv6 Policies at the ingress router
+ setup_rt_policy_ipv6 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+ l2encap.red 6
+ setup_rt_policy_ipv4 "${hsdst}" "${rtsrc}" "${end_rts}" "${rtdst}" \
+ l2encap.red 4
+
+ # set the decap behavior
+ setup_decap "${rtsrc}"
+}
+
+setup()
+{
+ local i
+
+ # create routers
+ ROUTERS="1 2 3 4"; readonly ROUTERS
+ for i in ${ROUTERS}; do
+ create_router "${i}"
+ done
+
+ # create hosts
+ HOSTS="1 2"; readonly HOSTS
+ for i in ${HOSTS}; do
+ create_host "${i}"
+ done
+
+ # set up the links for connecting routers
+ add_link_rt_pairs 1 "2 3 4"
+ add_link_rt_pairs 2 "3 4"
+ add_link_rt_pairs 3 "4"
+
+ # set up the basic connectivity of routers and routes required for
+ # reachability of SIDs.
+ setup_rt_networking 1 "2 3 4"
+ setup_rt_networking 2 "1 3 4"
+ setup_rt_networking 3 "1 2 4"
+ setup_rt_networking 4 "1 2 3"
+
+ # set up the hosts connected to routers
+ setup_hs 1 1
+ setup_hs 2 2
+
+ # set up default SRv6 Endpoints (i.e. SRv6 End and SRv6 End.DX2)
+ setup_rt_local_sids 1 "2 3 4"
+ setup_rt_local_sids 2 "1 3 4"
+ setup_rt_local_sids 3 "1 2 4"
+ setup_rt_local_sids 4 "1 2 3"
+
+ # create a L2 VPN between hs-1 and hs-2.
+ # NB: currently, H.L2Encap* enables tunneling of L2 frames whose
+ # layer-3 is IPv4/IPv6.
+ #
+ # the network path between hs-1 and hs-2 traverses several routers
+ # depending on the direction of traffic.
+ #
+ # Direction hs-1 -> hs-2 (H.L2Encaps.Red)
+ # - rt-2 (SRv6 End.DX2 behavior)
+ #
+ # Direction hs-2 -> hs-1 (H.L2Encaps.Red)
+ # - rt-4,rt-3 (SRv6 End behaviors)
+ # - rt-1 (SRv6 End.DX2 behavior)
+ setup_l2vpn 1 "" 2
+ setup_l2vpn 2 "4 3" 1
+
+ # testing environment was set up successfully
+ SETUP_ERR=0
+}
+
+check_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+ local prefix
+ local rtsrc_nsname
+
+ rtsrc_nsname="$(get_rtname "${rtsrc}")"
+
+ prefix="$(get_network_prefix "${rtsrc}" "${rtdst}")"
+
+ ip netns exec "${rtsrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${prefix}::${rtdst}" >/dev/null 2>&1
+}
+
+check_and_log_rt_connectivity()
+{
+ local rtsrc="$1"
+ local rtdst="$2"
+
+ check_rt_connectivity "${rtsrc}" "${rtdst}"
+ log_test $? 0 "Routers connectivity: rt-${rtsrc} -> rt-${rtdst}"
+}
+
+check_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv6_HS_NETWORK}::${hsdst}" >/dev/null 2>&1
+}
+
+check_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+ local hssrc_nsname
+
+ hssrc_nsname="$(get_hsname "${hssrc}")"
+
+ ip netns exec "${hssrc_nsname}" ping -c 1 -W "${PING_TIMEOUT_SEC}" \
+ "${IPv4_HS_NETWORK}.${hsdst}" >/dev/null 2>&1
+}
+
+check_and_log_hs2gw_connectivity()
+{
+ local hssrc="$1"
+
+ check_hs_ipv6_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> gw"
+
+ check_hs_ipv4_connectivity "${hssrc}" 254
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> gw"
+}
+
+check_and_log_hs_ipv6_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv6 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_ipv4_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ log_test $? 0 "IPv4 Hosts connectivity: hs-${hssrc} -> hs-${hsdst}"
+}
+
+check_and_log_hs_connectivity()
+{
+ local hssrc="$1"
+ local hsdst="$2"
+
+ check_and_log_hs_ipv4_connectivity "${hssrc}" "${hsdst}"
+ check_and_log_hs_ipv6_connectivity "${hssrc}" "${hsdst}"
+}
+
+router_tests()
+{
+ local i
+ local j
+
+ log_section "IPv6 routers connectivity test"
+
+ for i in ${ROUTERS}; do
+ for j in ${ROUTERS}; do
+ if [ "${i}" -eq "${j}" ]; then
+ continue
+ fi
+
+ check_and_log_rt_connectivity "${i}" "${j}"
+ done
+ done
+}
+
+host2gateway_tests()
+{
+ local hs
+
+ log_section "IPv4/IPv6 connectivity test among hosts and gateways"
+
+ for hs in ${HOSTS}; do
+ check_and_log_hs2gw_connectivity "${hs}"
+ done
+}
+
+host_vpn_tests()
+{
+ log_section "SRv6 L2 VPN connectivity test hosts (h1 <-> h2)"
+
+ check_and_log_hs_connectivity 1 2
+ check_and_log_hs_connectivity 2 1
+}
+
+test_dummy_dev_or_ksft_skip()
+{
+ local test_netns
+
+ test_netns="dummy-$(mktemp -u XXXXXXXX)"
+
+ if ! ip netns add "${test_netns}"; then
+ echo "SKIP: Cannot set up netns for testing dummy dev support"
+ exit "${ksft_skip}"
+ fi
+
+ modprobe dummy &>/dev/null || true
+ if ! ip -netns "${test_netns}" link \
+ add "${DUMMY_DEVNAME}" type dummy; then
+ echo "SKIP: dummy dev not supported"
+
+ ip netns del "${test_netns}"
+ exit "${ksft_skip}"
+ fi
+
+ ip netns del "${test_netns}"
+}
+
+test_iproute2_supp_or_ksft_skip()
+{
+ if ! ip route help 2>&1 | grep -qo "l2encap.red"; then
+ echo "SKIP: Missing SRv6 l2encap.red support in iproute2"
+ exit "${ksft_skip}"
+ fi
+}
+
+if [ "$(id -u)" -ne 0 ]; then
+ echo "SKIP: Need root privileges"
+ exit "${ksft_skip}"
+fi
+
+# required programs to carry out this selftest
+test_command_or_ksft_skip ip
+test_command_or_ksft_skip ping
+test_command_or_ksft_skip sysctl
+test_command_or_ksft_skip grep
+
+test_iproute2_supp_or_ksft_skip
+test_dummy_dev_or_ksft_skip
+
+set -e
+trap cleanup EXIT
+
+setup
+set +e
+
+router_tests
+host2gateway_tests
+host_vpn_tests
+
+print_log_test_results
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.c b/tools/testing/selftests/net/stress_reuseport_listen.c
new file mode 100644
index 000000000000..ef800bb35a8e
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* Copyright (c) 2022 Meta Platforms, Inc. and affiliates. */
+
+/* Test listening on the same port 443 with multiple VIPS.
+ * Each VIP:443 will have multiple sk listening on by using
+ * SO_REUSEPORT.
+ */
+
+#include <unistd.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <error.h>
+#include <errno.h>
+#include <time.h>
+#include <arpa/inet.h>
+
+#define IP6_LADDR_START "2401:dead::1"
+#define IP6_LPORT 443
+#define NSEC_PER_SEC 1000000000L
+#define NSEC_PER_USEC 1000L
+
+static unsigned int nr_socks_per_vip;
+static unsigned int nr_vips;
+
+static int *bind_reuseport_sock6(void)
+{
+ int *lfds, *cur_fd, err, optvalue = 1;
+ struct sockaddr_in6 sa6 = {};
+ unsigned int i, j;
+
+ sa6.sin6_family = AF_INET6;
+ sa6.sin6_port = htons(IP6_LPORT);
+ err = inet_pton(AF_INET6, IP6_LADDR_START, &sa6.sin6_addr);
+ if (err != 1)
+ error(1, err, "inet_pton(%s)", IP6_LADDR_START);
+
+ lfds = malloc(nr_vips * nr_socks_per_vip * sizeof(lfds[0]));
+ if (!lfds)
+ error(1, errno, "cannot alloc array of lfds");
+
+ cur_fd = lfds;
+ for (i = 0; i < nr_vips; i++) {
+ for (j = 0; j < nr_socks_per_vip; j++) {
+ *cur_fd = socket(AF_INET6, SOCK_STREAM, 0);
+ if (*cur_fd == -1)
+ error(1, errno,
+ "lfds[%u,%u] = socket(AF_INET6)", i, j);
+
+ err = setsockopt(*cur_fd, SOL_SOCKET, SO_REUSEPORT,
+ &optvalue, sizeof(optvalue));
+ if (err)
+ error(1, errno,
+ "setsockopt(lfds[%u,%u], SO_REUSEPORT)",
+ i, j);
+
+ err = bind(*cur_fd, (struct sockaddr *)&sa6,
+ sizeof(sa6));
+ if (err)
+ error(1, errno, "bind(lfds[%u,%u])", i, j);
+ cur_fd++;
+ }
+ sa6.sin6_addr.s6_addr32[3]++;
+ }
+
+ return lfds;
+}
+
+int main(int argc, const char *argv[])
+{
+ struct timespec start_ts, end_ts;
+ unsigned long start_ns, end_ns;
+ unsigned int nr_lsocks;
+ int *lfds, i, err;
+
+ if (argc != 3 || atoi(argv[1]) <= 0 || atoi(argv[2]) <= 0)
+ error(1, 0, "Usage: %s <nr_vips> <nr_socks_per_vip>\n",
+ argv[0]);
+
+ nr_vips = atoi(argv[1]);
+ nr_socks_per_vip = atoi(argv[2]);
+ nr_lsocks = nr_vips * nr_socks_per_vip;
+ lfds = bind_reuseport_sock6();
+
+ clock_gettime(CLOCK_MONOTONIC, &start_ts);
+ for (i = 0; i < nr_lsocks; i++) {
+ err = listen(lfds[i], 0);
+ if (err)
+ error(1, errno, "listen(lfds[%d])", i);
+ }
+ clock_gettime(CLOCK_MONOTONIC, &end_ts);
+
+ start_ns = start_ts.tv_sec * NSEC_PER_SEC + start_ts.tv_nsec;
+ end_ns = end_ts.tv_sec * NSEC_PER_SEC + end_ts.tv_nsec;
+
+ printf("listen %d socks took %lu.%lu\n", nr_lsocks,
+ (end_ns - start_ns) / NSEC_PER_SEC,
+ (end_ns - start_ns) / NSEC_PER_USEC);
+
+ for (i = 0; i < nr_lsocks; i++)
+ close(lfds[i]);
+
+ free(lfds);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh
new file mode 100755
index 000000000000..94d5d1a1c90f
--- /dev/null
+++ b/tools/testing/selftests/net/stress_reuseport_listen.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (c) 2022 Meta Platforms, Inc. and affiliates.
+
+source lib.sh
+NR_FILES=24100
+SAVED_NR_FILES=$(ulimit -n)
+
+setup() {
+ setup_ns NS
+ ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1
+ ulimit -n $NR_FILES
+}
+
+cleanup() {
+ cleanup_ns $NS
+ ulimit -n $SAVED_NR_FILES
+}
+
+trap cleanup EXIT
+setup
+# 300 different vips listen on port 443
+# Each vip:443 sockaddr has 80 LISTEN sock by using SO_REUSEPORT
+# Total 24000 listening socks
+ip netns exec $NS ./stress_reuseport_listen 300 80
diff --git a/tools/testing/selftests/net/tap.c b/tools/testing/selftests/net/tap.c
new file mode 100644
index 000000000000..247c3b3ac1c9
--- /dev/null
+++ b/tools/testing/selftests/net/tap.c
@@ -0,0 +1,434 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <net/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <linux/virtio_net.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include "../kselftest_harness.h"
+
+static const char param_dev_tap_name[] = "xmacvtap0";
+static const char param_dev_dummy_name[] = "xdummy0";
+static unsigned char param_hwaddr_src[] = { 0x00, 0xfe, 0x98, 0x14, 0x22, 0x42 };
+static unsigned char param_hwaddr_dest[] = {
+ 0x00, 0xfe, 0x98, 0x94, 0xd2, 0x43
+};
+
+#define MAX_RTNL_PAYLOAD (2048)
+#define PKT_DATA 0xCB
+#define TEST_PACKET_SZ (sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU)
+
+static struct rtattr *rtattr_add(struct nlmsghdr *nh, unsigned short type,
+ unsigned short len)
+{
+ struct rtattr *rta =
+ (struct rtattr *)((uint8_t *)nh + RTA_ALIGN(nh->nlmsg_len));
+ rta->rta_type = type;
+ rta->rta_len = RTA_LENGTH(len);
+ nh->nlmsg_len = RTA_ALIGN(nh->nlmsg_len) + RTA_ALIGN(rta->rta_len);
+ return rta;
+}
+
+static struct rtattr *rtattr_begin(struct nlmsghdr *nh, unsigned short type)
+{
+ return rtattr_add(nh, type, 0);
+}
+
+static void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ uint8_t *end = (uint8_t *)nh + nh->nlmsg_len;
+
+ attr->rta_len = end - (uint8_t *)attr;
+}
+
+static struct rtattr *rtattr_add_str(struct nlmsghdr *nh, unsigned short type,
+ const char *s)
+{
+ struct rtattr *rta = rtattr_add(nh, type, strlen(s));
+
+ memcpy(RTA_DATA(rta), s, strlen(s));
+ return rta;
+}
+
+static struct rtattr *rtattr_add_strsz(struct nlmsghdr *nh, unsigned short type,
+ const char *s)
+{
+ struct rtattr *rta = rtattr_add(nh, type, strlen(s) + 1);
+
+ strcpy(RTA_DATA(rta), s);
+ return rta;
+}
+
+static struct rtattr *rtattr_add_any(struct nlmsghdr *nh, unsigned short type,
+ const void *arr, size_t len)
+{
+ struct rtattr *rta = rtattr_add(nh, type, len);
+
+ memcpy(RTA_DATA(rta), arr, len);
+ return rta;
+}
+
+static int dev_create(const char *dev, const char *link_type,
+ int (*fill_rtattr)(struct nlmsghdr *nh),
+ int (*fill_info_data)(struct nlmsghdr *nh))
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ unsigned char data[MAX_RTNL_PAYLOAD];
+ } req;
+ struct rtattr *link_info, *info_data;
+ int ret, rtnl;
+
+ rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (rtnl < 0) {
+ fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+ return 1;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+ req.nh.nlmsg_type = RTM_NEWLINK;
+
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_type = 1;
+ req.info.ifi_index = 0;
+ req.info.ifi_flags = IFF_BROADCAST | IFF_UP;
+ req.info.ifi_change = 0xffffffff;
+
+ rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
+
+ if (fill_rtattr) {
+ ret = fill_rtattr(&req.nh);
+ if (ret)
+ return ret;
+ }
+
+ link_info = rtattr_begin(&req.nh, IFLA_LINKINFO);
+
+ rtattr_add_strsz(&req.nh, IFLA_INFO_KIND, link_type);
+
+ if (fill_info_data) {
+ info_data = rtattr_begin(&req.nh, IFLA_INFO_DATA);
+ ret = fill_info_data(&req.nh);
+ if (ret)
+ return ret;
+ rtattr_end(&req.nh, info_data);
+ }
+
+ rtattr_end(&req.nh, link_info);
+
+ ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0)
+ fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
+ ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+ close(rtnl);
+ return ret;
+}
+
+static int dev_delete(const char *dev)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ unsigned char data[MAX_RTNL_PAYLOAD];
+ } req;
+ int ret, rtnl;
+
+ rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (rtnl < 0) {
+ fprintf(stderr, "%s: socket %s\n", __func__, strerror(errno));
+ return 1;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_DELLINK;
+
+ req.info.ifi_family = AF_UNSPEC;
+
+ rtattr_add_str(&req.nh, IFLA_IFNAME, dev);
+
+ ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0)
+ fprintf(stderr, "%s: send %s\n", __func__, strerror(errno));
+
+ ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+ close(rtnl);
+ return ret;
+}
+
+static int macvtap_fill_rtattr(struct nlmsghdr *nh)
+{
+ int ifindex;
+
+ ifindex = if_nametoindex(param_dev_dummy_name);
+ if (ifindex == 0) {
+ fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
+ return -errno;
+ }
+
+ rtattr_add_any(nh, IFLA_LINK, &ifindex, sizeof(ifindex));
+ rtattr_add_any(nh, IFLA_ADDRESS, param_hwaddr_src, ETH_ALEN);
+
+ return 0;
+}
+
+static int opentap(const char *devname)
+{
+ int ifindex;
+ char buf[256];
+ int fd;
+ struct ifreq ifr;
+
+ ifindex = if_nametoindex(devname);
+ if (ifindex == 0) {
+ fprintf(stderr, "%s: ifindex %s\n", __func__, strerror(errno));
+ return -errno;
+ }
+
+ sprintf(buf, "/dev/tap%d", ifindex);
+ fd = open(buf, O_RDWR | O_NONBLOCK);
+ if (fd < 0) {
+ fprintf(stderr, "%s: open %s\n", __func__, strerror(errno));
+ return -errno;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, devname);
+ ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_VNET_HDR | IFF_MULTI_QUEUE;
+ if (ioctl(fd, TUNSETIFF, &ifr, sizeof(ifr)) < 0)
+ return -errno;
+ return fd;
+}
+
+size_t build_eth(uint8_t *buf, uint16_t proto)
+{
+ struct ethhdr *eth = (struct ethhdr *)buf;
+
+ eth->h_proto = htons(proto);
+ memcpy(eth->h_source, param_hwaddr_src, ETH_ALEN);
+ memcpy(eth->h_dest, param_hwaddr_dest, ETH_ALEN);
+
+ return ETH_HLEN;
+}
+
+static uint32_t add_csum(const uint8_t *buf, int len)
+{
+ uint32_t sum = 0;
+ uint16_t *sbuf = (uint16_t *)buf;
+
+ while (len > 1) {
+ sum += *sbuf++;
+ len -= 2;
+ }
+
+ if (len)
+ sum += *(uint8_t *)sbuf;
+
+ return sum;
+}
+
+static uint16_t finish_ip_csum(uint32_t sum)
+{
+ uint16_t lo = sum & 0xffff;
+ uint16_t hi = sum >> 16;
+
+ return ~(lo + hi);
+
+}
+
+static uint16_t build_ip_csum(const uint8_t *buf, int len,
+ uint32_t sum)
+{
+ sum += add_csum(buf, len);
+ return finish_ip_csum(sum);
+}
+
+static int build_ipv4_header(uint8_t *buf, int payload_len)
+{
+ struct iphdr *iph = (struct iphdr *)buf;
+
+ iph->ihl = 5;
+ iph->version = 4;
+ iph->ttl = 8;
+ iph->tot_len =
+ htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+ iph->id = htons(1337);
+ iph->protocol = IPPROTO_UDP;
+ iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+ iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+ iph->check = build_ip_csum(buf, iph->ihl << 2, 0);
+
+ return iph->ihl << 2;
+}
+
+static int build_udp_packet(uint8_t *buf, int payload_len, bool csum_off)
+{
+ const int ip4alen = sizeof(uint32_t);
+ struct udphdr *udph = (struct udphdr *)buf;
+ int len = sizeof(*udph) + payload_len;
+ uint32_t sum = 0;
+
+ udph->source = htons(22);
+ udph->dest = htons(58822);
+ udph->len = htons(len);
+
+ memset(buf + sizeof(struct udphdr), PKT_DATA, payload_len);
+
+ sum = add_csum(buf - 2 * ip4alen, 2 * ip4alen);
+ sum += htons(IPPROTO_UDP) + udph->len;
+
+ if (!csum_off)
+ sum += add_csum(buf, len);
+
+ udph->check = finish_ip_csum(sum);
+
+ return sizeof(*udph) + payload_len;
+}
+
+size_t build_test_packet_valid_udp_gso(uint8_t *buf, size_t payload_len)
+{
+ uint8_t *cur = buf;
+ struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+ vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+ vh->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
+ vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+ vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
+ cur += sizeof(*vh);
+
+ cur += build_eth(cur, ETH_P_IP);
+ cur += build_ipv4_header(cur, payload_len);
+ cur += build_udp_packet(cur, payload_len, true);
+
+ return cur - buf;
+}
+
+size_t build_test_packet_valid_udp_csum(uint8_t *buf, size_t payload_len)
+{
+ uint8_t *cur = buf;
+ struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+ vh->flags = VIRTIO_NET_HDR_F_DATA_VALID;
+ vh->gso_type = VIRTIO_NET_HDR_GSO_NONE;
+ cur += sizeof(*vh);
+
+ cur += build_eth(cur, ETH_P_IP);
+ cur += build_ipv4_header(cur, payload_len);
+ cur += build_udp_packet(cur, payload_len, false);
+
+ return cur - buf;
+}
+
+size_t build_test_packet_crash_tap_invalid_eth_proto(uint8_t *buf,
+ size_t payload_len)
+{
+ uint8_t *cur = buf;
+ struct virtio_net_hdr *vh = (struct virtio_net_hdr *)buf;
+
+ vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+ vh->flags = 0;
+ vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+ vh->gso_size = ETH_DATA_LEN - sizeof(struct iphdr);
+ cur += sizeof(*vh);
+
+ cur += build_eth(cur, 0);
+ cur += sizeof(struct iphdr) + sizeof(struct udphdr);
+ cur += build_ipv4_header(cur, payload_len);
+ cur += build_udp_packet(cur, payload_len, true);
+ cur += payload_len;
+
+ return cur - buf;
+}
+
+FIXTURE(tap)
+{
+ int fd;
+};
+
+FIXTURE_SETUP(tap)
+{
+ int ret;
+
+ ret = dev_create(param_dev_dummy_name, "dummy", NULL, NULL);
+ EXPECT_EQ(ret, 0);
+
+ ret = dev_create(param_dev_tap_name, "macvtap", macvtap_fill_rtattr,
+ NULL);
+ EXPECT_EQ(ret, 0);
+
+ self->fd = opentap(param_dev_tap_name);
+ ASSERT_GE(self->fd, 0);
+}
+
+FIXTURE_TEARDOWN(tap)
+{
+ int ret;
+
+ if (self->fd != -1)
+ close(self->fd);
+
+ ret = dev_delete(param_dev_tap_name);
+ EXPECT_EQ(ret, 0);
+
+ ret = dev_delete(param_dev_dummy_name);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_F(tap, test_packet_valid_udp_gso)
+{
+ uint8_t pkt[TEST_PACKET_SZ];
+ size_t off;
+ int ret;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_test_packet_valid_udp_gso(pkt, 1021);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, off);
+}
+
+TEST_F(tap, test_packet_valid_udp_csum)
+{
+ uint8_t pkt[TEST_PACKET_SZ];
+ size_t off;
+ int ret;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_test_packet_valid_udp_csum(pkt, 1024);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, off);
+}
+
+TEST_F(tap, test_packet_crash_tap_invalid_eth_proto)
+{
+ uint8_t pkt[TEST_PACKET_SZ];
+ size_t off;
+ int ret;
+
+ memset(pkt, 0, sizeof(pkt));
+ off = build_test_packet_crash_tap_invalid_eth_proto(pkt, 1024);
+ ret = write(self->fd, pkt, off);
+ ASSERT_EQ(ret, -1);
+ ASSERT_EQ(errno, EINVAL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore
new file mode 100644
index 000000000000..e8bb81b715b7
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/.gitignore
@@ -0,0 +1,2 @@
+*_ipv4
+*_ipv6
diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile
new file mode 100644
index 000000000000..522d991e310e
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/Makefile
@@ -0,0 +1,56 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_BOTH_AF := bench-lookups
+TEST_BOTH_AF += connect
+TEST_BOTH_AF += connect-deny
+TEST_BOTH_AF += icmps-accept icmps-discard
+TEST_BOTH_AF += key-management
+TEST_BOTH_AF += restore
+TEST_BOTH_AF += rst
+TEST_BOTH_AF += self-connect
+TEST_BOTH_AF += seq-ext
+TEST_BOTH_AF += setsockopt-closed
+TEST_BOTH_AF += unsigned-md5
+
+TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4)
+TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6)
+
+TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS)
+
+top_srcdir := ../../../../..
+include ../../lib.mk
+
+HOSTAR ?= ar
+
+LIBDIR := $(OUTPUT)/lib
+LIB := $(LIBDIR)/libaotst.a
+LDLIBS += $(LIB) -pthread
+LIBDEPS := lib/aolib.h Makefile
+
+CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing
+CFLAGS += $(KHDR_INCLUDES)
+CFLAGS += -iquote ./lib/ -I ../../../../include/
+
+# Library
+LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c
+LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o)
+EXTRA_CLEAN += $(LIBOBJ) $(LIB)
+
+$(LIB): $(LIBOBJ)
+ $(HOSTAR) rcs $@ $^
+
+$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS)
+ mkdir -p $(LIBDIR)
+ $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c
+
+$(TEST_GEN_PROGS): $(LIB)
+
+$(OUTPUT)/%_ipv4: %.c
+ $(LINK.c) $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/%_ipv6: %.c
+ $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@
+
+$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT
+$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm
+$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm
diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
new file mode 100644
index 000000000000..a1e6e007c291
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c
@@ -0,0 +1,360 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <arpa/inet.h>
+#include <inttypes.h>
+#include <math.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <time.h>
+
+#include "../../../../include/linux/bits.h"
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+#define BENCH_NR_ITERS 100 /* number of times to run gathering statistics */
+
+static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand)
+{
+ union tcp_addr net = {};
+ size_t i, j;
+
+ if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+ test_error("Can't convert ip address %s", TEST_NETWORK);
+
+ if (!use_rand) {
+ for (i = 0; i < ips_nr; i++)
+ ips[i] = gen_tcp_addr(net, 2 * i + 1);
+ return;
+ }
+ for (i = 0; i < ips_nr; i++) {
+ size_t r = (size_t)random() | 0x1;
+
+ ips[i] = gen_tcp_addr(net, r);
+
+ for (j = i - 1; j > 0 && i > 0; j--) {
+ if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) {
+ i--; /* collision */
+ break;
+ }
+ }
+ }
+}
+
+static void test_add_routes(union tcp_addr *ips, size_t ips_nr)
+{
+ size_t i;
+
+ for (i = 0; i < ips_nr; i++) {
+ union tcp_addr *p = (union tcp_addr *)&ips[i];
+ int err;
+
+ err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p);
+ if (err && err != -EEXIST)
+ test_error("Failed to add route");
+ }
+}
+
+static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+ size_t i;
+
+ for (i = 0; i < ips_nr; i++) {
+ union tcp_addr *p = (union tcp_addr *)&ips[i];
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+ test_error("setsockopt(TCP_AO)");
+ }
+}
+
+static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 };
+static union tcp_addr *test_ips;
+
+struct bench_stats {
+ uint64_t min;
+ uint64_t max;
+ uint64_t nr;
+ double mean;
+ double s2;
+};
+
+static struct bench_tests {
+ struct bench_stats delete_last_key;
+ struct bench_stats add_key;
+ struct bench_stats delete_rand_key;
+ struct bench_stats connect_last_key;
+ struct bench_stats connect_rand_key;
+ struct bench_stats delete_async;
+} bench_results[ARRAY_SIZE(nr_keys)];
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static void measure_call(struct bench_stats *st,
+ void (*f)(int, void *), int sk, void *arg)
+{
+ struct timespec start = {}, end = {};
+ double delta;
+ uint64_t nsec;
+
+ if (clock_gettime(CLOCK_MONOTONIC, &start))
+ test_error("clock_gettime()");
+
+ f(sk, arg);
+
+ if (clock_gettime(CLOCK_MONOTONIC, &end))
+ test_error("clock_gettime()");
+
+ nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC;
+ if (end.tv_nsec >= start.tv_nsec)
+ nsec += end.tv_nsec - start.tv_nsec;
+ else
+ nsec -= start.tv_nsec - end.tv_nsec;
+
+ if (st->nr == 0) {
+ st->min = st->max = nsec;
+ } else {
+ if (st->min > nsec)
+ st->min = nsec;
+ if (st->max < nsec)
+ st->max = nsec;
+ }
+
+ /* Welford-Knuth algorithm */
+ st->nr++;
+ delta = (double)nsec - st->mean;
+ st->mean += delta / st->nr;
+ st->s2 += delta * ((double)nsec - st->mean);
+}
+
+static void delete_mkt(int sk, void *arg)
+{
+ struct tcp_ao_del *ao = arg;
+
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao)))
+ test_error("setsockopt(TCP_AO_DEL_KEY)");
+}
+
+static void add_back_mkt(int sk, void *arg)
+{
+ union tcp_addr *p = arg;
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100))
+ test_error("setsockopt(TCP_AO)");
+}
+
+static void bench_delete(int lsk, struct bench_stats *add,
+ struct bench_stats *del,
+ union tcp_addr *ips, size_t ips_nr,
+ bool rand_order, bool async)
+{
+ struct tcp_ao_del ao_del = {};
+ union tcp_addr *p;
+ size_t i;
+
+ ao_del.sndid = 100;
+ ao_del.rcvid = 100;
+ ao_del.del_async = !!async;
+ ao_del.prefix = DEFAULT_TEST_PREFIX;
+
+ /* Remove the first added */
+ p = (union tcp_addr *)&ips[0];
+ tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+
+ for (i = 0; i < BENCH_NR_ITERS; i++) {
+ measure_call(del, delete_mkt, lsk, (void *)&ao_del);
+
+ /* Restore it back */
+ measure_call(add, add_back_mkt, lsk, (void *)p);
+
+ /*
+ * Slowest for FILO-linked-list:
+ * on (i) iteration removing ips[i] element. When it gets
+ * added to the list back - it becomes first to fetch, so
+ * on (i + 1) iteration go to ips[i + 1] element.
+ */
+ if (rand_order)
+ p = (union tcp_addr *)&ips[rand() % ips_nr];
+ else
+ p = (union tcp_addr *)&ips[i % ips_nr];
+ tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0);
+ }
+}
+
+static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr)
+{
+ size_t i;
+
+ for (i = 0; i < BENCH_NR_ITERS; i++) {
+ int sk;
+
+ synchronize_threads();
+
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ close(sk);
+ }
+}
+
+static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs)
+{
+ test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g",
+ desc, nr, bs->min / 1000000, bs->max / 1000000,
+ bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr));
+}
+
+static void *server_fn(void *arg)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+ struct bench_tests *bt = &bench_results[i];
+ int lsk;
+
+ test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr));
+ if (!test_ips)
+ test_error("malloc()");
+
+ lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1);
+
+ gen_test_ips(test_ips, nr_keys[i], false);
+ test_add_routes(test_ips, nr_keys[i]);
+ test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]);
+ server_apply_keys(lsk, test_ips, nr_keys[i]);
+
+ synchronize_threads();
+ bench_connect_srv(lsk, test_ips, nr_keys[i]);
+ bench_connect_srv(lsk, test_ips, nr_keys[i]);
+
+ /* The worst case for FILO-list */
+ bench_delete(lsk, &bt->add_key, &bt->delete_last_key,
+ test_ips, nr_keys[i], false, false);
+ test_print_stats("Add a new key",
+ nr_keys[i], &bt->add_key);
+ test_print_stats("Delete: worst case",
+ nr_keys[i], &bt->delete_last_key);
+
+ bench_delete(lsk, &bt->add_key, &bt->delete_rand_key,
+ test_ips, nr_keys[i], true, false);
+ test_print_stats("Delete: random-search",
+ nr_keys[i], &bt->delete_rand_key);
+
+ bench_delete(lsk, &bt->add_key, &bt->delete_async,
+ test_ips, nr_keys[i], false, true);
+ test_print_stats("Delete: async", nr_keys[i], &bt->delete_async);
+
+ free(test_ips);
+ close(lsk);
+ }
+
+ return NULL;
+}
+
+static void connect_client(int sk, void *arg)
+{
+ size_t *p = arg;
+
+ if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0)
+ test_error("failed to connect()");
+}
+
+static void client_addr_setup(int sk, union tcp_addr taddr)
+{
+#ifdef IPV6_TEST
+ struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = 0,
+ .sin6_addr = taddr.a6,
+ };
+#else
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = 0,
+ .sin_addr = taddr.a4,
+ };
+#endif
+ int ret;
+
+ ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX);
+ if (ret && ret != -EEXIST)
+ test_error("Failed to add ip address");
+ ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest);
+ if (ret && ret != -EEXIST)
+ test_error("Failed to add route");
+
+ if (bind(sk, &addr, sizeof(addr)))
+ test_error("bind()");
+}
+
+static void bench_connect_client(size_t port_off, struct bench_tests *bt,
+ union tcp_addr *ips, size_t ips_nr, bool rand_order)
+{
+ struct bench_stats *con;
+ union tcp_addr *p;
+ size_t i;
+
+ if (rand_order)
+ con = &bt->connect_rand_key;
+ else
+ con = &bt->connect_last_key;
+
+ p = (union tcp_addr *)&ips[0];
+
+ for (i = 0; i < BENCH_NR_ITERS; i++) {
+ int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+ if (sk < 0)
+ test_error("socket()");
+
+ client_addr_setup(sk, *p);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+ -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads();
+
+ measure_call(con, connect_client, sk, (void *)&port_off);
+
+ close(sk);
+
+ /*
+ * Slowest for FILO-linked-list:
+ * on (i) iteration removing ips[i] element. When it gets
+ * added to the list back - it becomes first to fetch, so
+ * on (i + 1) iteration go to ips[i + 1] element.
+ */
+ if (rand_order)
+ p = (union tcp_addr *)&ips[rand() % ips_nr];
+ else
+ p = (union tcp_addr *)&ips[i % ips_nr];
+ }
+}
+
+static void *client_fn(void *arg)
+{
+ size_t i;
+
+ for (i = 0; i < ARRAY_SIZE(nr_keys); i++) {
+ struct bench_tests *bt = &bench_results[i];
+
+ synchronize_threads();
+ bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+ test_print_stats("Connect: worst case",
+ nr_keys[i], &bt->connect_last_key);
+
+ bench_connect_client(i, bt, test_ips, nr_keys[i], false);
+ test_print_stats("Connect: random-search",
+ nr_keys[i], &bt->connect_last_key);
+ }
+ synchronize_threads();
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(30, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config
new file mode 100644
index 000000000000..d3277a9de987
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/config
@@ -0,0 +1,10 @@
+CONFIG_CRYPTO_HMAC=y
+CONFIG_CRYPTO_RMD160=y
+CONFIG_CRYPTO_SHA1=y
+CONFIG_IPV6_MULTIPLE_TABLES=y
+CONFIG_IPV6=y
+CONFIG_NET_L3_MASTER_DEV=y
+CONFIG_NET_VRF=y
+CONFIG_TCP_AO=y
+CONFIG_TCP_MD5SIG=y
+CONFIG_VETH=m
diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c
new file mode 100644
index 000000000000..185a2f6e5ff3
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type) (inj == FAULT_ ## type)
+
+static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen,
+ union tcp_addr in_addr, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_add tmp = {};
+ int err;
+
+ if (prefix > DEFAULT_TEST_PREFIX)
+ prefix = DEFAULT_TEST_PREFIX;
+
+ err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false,
+ prefix, 0, sndid, rcvid, maclen,
+ 0, strlen(key), key);
+ if (err)
+ return err;
+
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+ if (err < 0)
+ return -errno;
+
+ return test_verify_socket_key(sk, &tmp);
+}
+
+static void try_accept(const char *tst_name, unsigned int port, const char *pwd,
+ union tcp_addr addr, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+ const char *cnt_name, test_cnt cnt_expected,
+ fault_t inj)
+{
+ struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+ int lsk, err, sk = 0;
+ time_t timeout;
+
+ lsk = test_listen_socket(this_ip_addr, port, 1);
+
+ if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ if (cnt_name)
+ before_cnt = netstat_get_one(cnt_name, NULL);
+ if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* preparations done */
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ err = test_wait_fd(lsk, timeout, 0);
+ if (err == -ETIMEDOUT) {
+ if (!fault(TIMEOUT))
+ test_fail("timed out for accept()");
+ } else if (err < 0) {
+ test_error("test_wait_fd()");
+ } else {
+ if (fault(TIMEOUT))
+ test_fail("ready to accept");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0) {
+ test_error("accept()");
+ } else {
+ if (fault(TIMEOUT))
+ test_fail("%s: accepted", tst_name);
+ }
+ }
+
+ if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+ test_error("test_get_tcp_ao_counters()");
+
+ close(lsk);
+ if (pwd)
+ test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+ if (!cnt_name)
+ goto out;
+
+ after_cnt = netstat_get_one(cnt_name, NULL);
+
+ if (after_cnt <= before_cnt) {
+ test_fail("%s: %s counter did not increase: %zu <= %zu",
+ tst_name, cnt_name, after_cnt, before_cnt);
+ } else {
+ test_ok("%s: counter %s increased %zu => %zu",
+ tst_name, cnt_name, before_cnt, after_cnt);
+ }
+
+out:
+ synchronize_threads(); /* close() */
+ if (sk > 0)
+ close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+ union tcp_addr wrong_addr, network_addr;
+ unsigned int port = test_server_port;
+
+ if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+ test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+ try_accept("Non-AO server + AO client", port++, NULL,
+ this_ip_dest, -1, 100, 100, 0,
+ "TCPAOKeyNotFound", 0, FAULT_TIMEOUT);
+
+ try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0,
+ "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT);
+
+ try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD",
+ this_ip_dest, -1, 100, 100, 0,
+ "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+ try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 101, 0,
+ "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+ try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 101, 100, 0,
+ "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT);
+
+ try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 8,
+ "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT);
+
+ try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+ wrong_addr, -1, 100, 100, 0,
+ "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT);
+
+ try_accept("Client: Wrong addr", port++, NULL,
+ this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT);
+
+ try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 200, 100, 0,
+ "TCPAOGood", TEST_CNT_GOOD, 0);
+
+ if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+ test_error("Can't convert ip address %s", TEST_NETWORK);
+
+ try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+ network_addr, 16, 100, 100, 0,
+ "TCPAOGood", TEST_CNT_GOOD, 0);
+
+ try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0,
+ "TCPAOGood", TEST_CNT_GOOD, 0);
+
+ /* client exits */
+ synchronize_threads();
+ return NULL;
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+ const char *pwd, union tcp_addr addr, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid,
+ test_cnt cnt_expected, fault_t inj)
+{
+ struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ time_t timeout;
+ int sk, ret;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* preparations done */
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+ if (ret < 0) {
+ if (fault(KEYREJECT) && ret == -EKEYREJECTED) {
+ test_ok("%s: connect() was prevented", tst_name);
+ } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) {
+ test_ok("%s", tst_name);
+ } else if (ret == -ECONNREFUSED &&
+ (fault(TIMEOUT) || fault(KEYREJECT))) {
+ test_ok("%s: refused to connect", tst_name);
+ } else {
+ test_error("%s: connect() returned %d", tst_name, ret);
+ }
+ goto out;
+ }
+
+ if (fault(TIMEOUT) || fault(KEYREJECT))
+ test_fail("%s: connected", tst_name);
+ else
+ test_ok("%s: connected", tst_name);
+ if (pwd && ret > 0) {
+ if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+ test_error("test_get_tcp_ao_counters()");
+ test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+ }
+out:
+ synchronize_threads(); /* close() */
+
+ if (ret > 0)
+ close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+ union tcp_addr wrong_addr, network_addr;
+ unsigned int port = test_server_port;
+
+ if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+ test_error("Can't convert ip address %s", TEST_WRONG_IP);
+
+ try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("AO server + Non-AO client", port++, NULL,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT);
+
+ try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD,
+ wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT);
+
+ try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0);
+
+ if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1)
+ test_error("Can't convert ip address %s", TEST_NETWORK);
+
+ try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0);
+
+ try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD,
+ network_addr, 16, 100, 100, TEST_CNT_GOOD, 0);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(21, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c
new file mode 100644
index 000000000000..81653b47f303
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/connect.c
@@ -0,0 +1,90 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static void *server_fn(void *arg)
+{
+ int sk, lsk;
+ ssize_t bytes;
+
+ lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ synchronize_threads();
+
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ synchronize_threads();
+
+ bytes = test_server_run(sk, 0, 0);
+
+ test_fail("server served: %zd", bytes);
+ return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+ int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ uint64_t before_aogood, after_aogood;
+ const size_t nr_packets = 20;
+ struct netstat *ns_before, *ns_after;
+ struct tcp_ao_counters ao1, ao2;
+
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads();
+ if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+ test_error("failed to connect()");
+ synchronize_threads();
+
+ ns_before = netstat_read();
+ before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+ if (test_get_tcp_ao_counters(sk, &ao1))
+ test_error("test_get_tcp_ao_counters()");
+
+ if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ test_fail("verify failed");
+ return NULL;
+ }
+
+ ns_after = netstat_read();
+ after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ netstat_print_diff(ns_before, ns_after);
+ netstat_free(ns_before);
+ netstat_free(ns_after);
+
+ if (nr_packets > (after_aogood - before_aogood)) {
+ test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)",
+ nr_packets, after_aogood, before_aogood);
+ return NULL;
+ }
+ if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD))
+ return NULL;
+
+ test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64,
+ before_aogood, ao1.ao_info_pkt_good,
+ ao1.key_cnts[0].pkt_good,
+ after_aogood, ao2.ao_info_pkt_good,
+ ao2.key_cnts[0].pkt_good,
+ nr_packets);
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(1, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
new file mode 120000
index 000000000000..0a5bb85eb260
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c
@@ -0,0 +1 @@
+icmps-discard.c \ No newline at end of file
diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
new file mode 100644
index 000000000000..d69bcba3c929
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c
@@ -0,0 +1,449 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Selftest that verifies that incomping ICMPs are ignored,
+ * the TCP connection stays alive, no hard or soft errors get reported
+ * to the usespace and the counter for ignored ICMPs is updated.
+ *
+ * RFC5925, 7.8:
+ * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4
+ * messages of Type 3 (destination unreachable), Codes 2-4 (protocol
+ * unreachable, port unreachable, and fragmentation needed -- ’hard
+ * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1
+ * (administratively prohibited) and Code 4 (port unreachable) intended
+ * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN-
+ * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#include <inttypes.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/ipv6.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <sys/socket.h>
+#include "aolib.h"
+#include "../../../../include/linux/compiler.h"
+
+const size_t packets_nr = 20;
+const size_t packet_size = 100;
+const char *tcpao_icmps = "TCPAODroppedIcmps";
+
+#ifdef IPV6_TEST
+const char *dst_unreach = "Icmp6InDestUnreachs";
+const int sk_ip_level = SOL_IPV6;
+const int sk_recverr = IPV6_RECVERR;
+#else
+const char *dst_unreach = "InDestUnreachs";
+const int sk_ip_level = SOL_IP;
+const int sk_recverr = IP_RECVERR;
+#endif
+
+/* Server is expected to fail with hard error if ::accept_icmp is set */
+#ifdef TEST_ICMPS_ACCEPT
+# define test_icmps_fail test_ok
+# define test_icmps_ok test_fail
+#else
+# define test_icmps_fail test_fail
+# define test_icmps_ok test_ok
+#endif
+
+static void serve_interfered(int sk)
+{
+ ssize_t test_quota = packet_size * packets_nr * 10;
+ uint64_t dest_unreach_a, dest_unreach_b;
+ uint64_t icmp_ignored_a, icmp_ignored_b;
+ struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ bool counter_not_found;
+ struct netstat *ns_after, *ns_before;
+ ssize_t bytes;
+
+ ns_before = netstat_read();
+ dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL);
+ icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL);
+ if (test_get_tcp_ao_counters(sk, &ao_cnt1))
+ test_error("test_get_tcp_ao_counters()");
+ bytes = test_server_run(sk, test_quota, 0);
+ ns_after = netstat_read();
+ netstat_print_diff(ns_before, ns_after);
+ dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL);
+ icmp_ignored_b = netstat_get(ns_after, tcpao_icmps,
+ &counter_not_found);
+ if (test_get_tcp_ao_counters(sk, &ao_cnt2))
+ test_error("test_get_tcp_ao_counters()");
+
+ netstat_free(ns_before);
+ netstat_free(ns_after);
+
+ if (dest_unreach_a >= dest_unreach_b) {
+ test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+ dst_unreach, dest_unreach_a, dest_unreach_b);
+ return;
+ }
+ test_ok("%s delivered %" PRIu64,
+ dst_unreach, dest_unreach_b - dest_unreach_a);
+ if (bytes < 0)
+ test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes));
+ else
+ test_icmps_ok("Server survived %zd bytes of traffic", test_quota);
+ if (counter_not_found) {
+ test_fail("Not found %s counter", tcpao_icmps);
+ return;
+ }
+#ifdef TEST_ICMPS_ACCEPT
+ test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD);
+#else
+ test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP);
+#endif
+ if (icmp_ignored_a >= icmp_ignored_b) {
+ test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64,
+ tcpao_icmps, icmp_ignored_a, icmp_ignored_b);
+ return;
+ }
+ test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a);
+}
+
+static void *server_fn(void *arg)
+{
+ int val, sk, lsk;
+ bool accept_icmps = false;
+
+ lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+#ifdef TEST_ICMPS_ACCEPT
+ accept_icmps = true;
+#endif
+
+ if (test_set_ao_flags(lsk, false, accept_icmps))
+ test_error("setsockopt(TCP_AO_INFO)");
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ synchronize_threads();
+
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ /* Fail on hard ip errors, such as dest unreachable (RFC1122) */
+ val = 1;
+ if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val)))
+ test_error("setsockopt()");
+
+ synchronize_threads();
+
+ serve_interfered(sk);
+ return NULL;
+}
+
+static size_t packets_sent;
+static size_t icmps_sent;
+
+static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ size_t i;
+
+ for (i = 0; i < len / sizeof(uint16_t); i++)
+ sum += words[i];
+ if (len & 1)
+ sum += ((char *)data)[len - 1];
+ return sum;
+}
+
+static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum)
+{
+ sum = checksum4_nofold(data, len, sum);
+ while (sum > 0xFFFF)
+ sum = (sum & 0xFFFF) + (sum >> 16);
+ return ~sum;
+}
+
+static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto,
+ struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+ iph->version = 4;
+ iph->ihl = 5;
+ iph->tos = 0;
+ iph->tot_len = htons(packet_len);
+ iph->ttl = 2;
+ iph->protocol = proto;
+ iph->saddr = src->sin_addr.s_addr;
+ iph->daddr = dst->sin_addr.s_addr;
+ iph->check = checksum4_fold((void *)iph, iph->ihl << 1, 0);
+}
+
+static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt,
+ struct sockaddr_in *src, struct sockaddr_in *dst)
+{
+ int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW);
+ struct {
+ struct iphdr iph;
+ struct icmphdr icmph;
+ struct iphdr iphe;
+ struct {
+ uint16_t sport;
+ uint16_t dport;
+ uint32_t seq;
+ } tcph;
+ } packet = {};
+ size_t packet_len;
+ ssize_t bytes;
+
+ if (sk < 0)
+ test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)");
+
+ packet_len = sizeof(packet);
+ set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst);
+
+ packet.icmph.type = type;
+ packet.icmph.code = code;
+ if (code == ICMP_FRAG_NEEDED) {
+ randomize_buffer(&packet.icmph.un.frag.mtu,
+ sizeof(packet.icmph.un.frag.mtu));
+ }
+
+ packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+ set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+ packet.tcph.sport = dst->sin_port;
+ packet.tcph.dport = src->sin_port;
+ packet.tcph.seq = htonl(rcv_nxt);
+
+ packet_len = sizeof(packet) - sizeof(packet.iph);
+ packet.icmph.checksum = checksum4_fold((void *)&packet.icmph,
+ packet_len, 0);
+
+ bytes = sendto(sk, &packet, sizeof(packet), 0,
+ (struct sockaddr *)dst, sizeof(*dst));
+ if (bytes != sizeof(packet))
+ test_error("send(): %zd", bytes);
+ icmps_sent++;
+
+ close(sk);
+}
+
+static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto,
+ struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+ iph->version = 6;
+ iph->payload_len = htons(packet_len);
+ iph->nexthdr = proto;
+ iph->hop_limit = 2;
+ iph->saddr = src->sin6_addr;
+ iph->daddr = dst->sin6_addr;
+}
+
+static inline uint16_t csum_fold(uint32_t csum)
+{
+ uint32_t sum = csum;
+
+ sum = (sum & 0xffff) + (sum >> 16);
+ sum = (sum & 0xffff) + (sum >> 16);
+ return (uint16_t)~sum;
+}
+
+static inline uint32_t csum_add(uint32_t csum, uint32_t addend)
+{
+ uint32_t res = csum;
+
+ res += addend;
+ return res + (res < addend);
+}
+
+noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum)
+{
+ uint16_t *words = data;
+ size_t i;
+
+ for (i = 0; i < len / sizeof(uint16_t); i++)
+ sum = csum_add(sum, words[i]);
+ if (len & 1)
+ sum = csum_add(sum, ((char *)data)[len - 1]);
+ return sum;
+}
+
+noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src,
+ struct sockaddr_in6 *dst,
+ void *ptr, size_t len, uint8_t proto)
+{
+ struct {
+ struct in6_addr saddr;
+ struct in6_addr daddr;
+ uint32_t payload_len;
+ uint8_t zero[3];
+ uint8_t nexthdr;
+ } pseudo_header = {};
+ uint32_t sum;
+
+ pseudo_header.saddr = src->sin6_addr;
+ pseudo_header.daddr = dst->sin6_addr;
+ pseudo_header.payload_len = htonl(len);
+ pseudo_header.nexthdr = proto;
+
+ sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0);
+ sum = checksum6_nofold(ptr, len, sum);
+
+ return csum_fold(sum);
+}
+
+static void icmp6_interfere(int type, int code, uint32_t rcv_nxt,
+ struct sockaddr_in6 *src, struct sockaddr_in6 *dst)
+{
+ int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW);
+ struct sockaddr_in6 dst_raw = *dst;
+ struct {
+ struct ipv6hdr iph;
+ struct icmp6hdr icmph;
+ struct ipv6hdr iphe;
+ struct {
+ uint16_t sport;
+ uint16_t dport;
+ uint32_t seq;
+ } tcph;
+ } packet = {};
+ size_t packet_len;
+ ssize_t bytes;
+
+
+ if (sk < 0)
+ test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)");
+
+ packet_len = sizeof(packet) - sizeof(packet.iph);
+ set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst);
+
+ packet.icmph.icmp6_type = type;
+ packet.icmph.icmp6_code = code;
+
+ packet_len = sizeof(packet.iphe) + sizeof(packet.tcph);
+ set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src);
+
+ packet.tcph.sport = dst->sin6_port;
+ packet.tcph.dport = src->sin6_port;
+ packet.tcph.seq = htonl(rcv_nxt);
+
+ packet_len = sizeof(packet) - sizeof(packet.iph);
+
+ packet.icmph.icmp6_cksum = icmp6_checksum(src, dst,
+ (void *)&packet.icmph, packet_len, IPPROTO_ICMPV6);
+
+ dst_raw.sin6_port = htons(IPPROTO_RAW);
+ bytes = sendto(sk, &packet, sizeof(packet), 0,
+ (struct sockaddr *)&dst_raw, sizeof(dst_raw));
+ if (bytes != sizeof(packet))
+ test_error("send(): %zd", bytes);
+ icmps_sent++;
+
+ close(sk);
+}
+
+static uint32_t get_rcv_nxt(int sk)
+{
+ int val = TCP_REPAIR_ON;
+ uint32_t ret;
+ socklen_t sz = sizeof(ret);
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+ test_error("setsockopt(TCP_REPAIR)");
+ val = TCP_RECV_QUEUE;
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val)))
+ test_error("setsockopt(TCP_REPAIR_QUEUE)");
+ if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz))
+ test_error("getsockopt(TCP_QUEUE_SEQ)");
+ val = TCP_REPAIR_OFF_NO_WP;
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+ test_error("setsockopt(TCP_REPAIR)");
+ return ret;
+}
+
+static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst)
+{
+ struct sockaddr_in *saddr4 = src;
+ struct sockaddr_in *daddr4 = dst;
+ struct sockaddr_in6 *saddr6 = src;
+ struct sockaddr_in6 *daddr6 = dst;
+ size_t i;
+
+ if (saddr4->sin_family != daddr4->sin_family)
+ test_error("Different address families");
+
+ for (i = 0; i < nr; i++) {
+ if (saddr4->sin_family == AF_INET) {
+ icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH,
+ rcv_nxt, saddr4, daddr4);
+ icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH,
+ rcv_nxt, saddr4, daddr4);
+ icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+ rcv_nxt, saddr4, daddr4);
+ icmps_sent += 3;
+ } else if (saddr4->sin_family == AF_INET6) {
+ icmp6_interfere(ICMPV6_DEST_UNREACH,
+ ICMPV6_ADM_PROHIBITED,
+ rcv_nxt, saddr6, daddr6);
+ icmp6_interfere(ICMPV6_DEST_UNREACH,
+ ICMPV6_PORT_UNREACH,
+ rcv_nxt, saddr6, daddr6);
+ icmps_sent += 2;
+ } else {
+ test_error("Not ip address family");
+ }
+ }
+}
+
+static void send_interfered(int sk)
+{
+ const unsigned int timeout = TEST_TIMEOUT_SEC;
+ struct sockaddr_in6 src, dst;
+ socklen_t addr_sz;
+
+ addr_sz = sizeof(src);
+ if (getsockname(sk, &src, &addr_sz))
+ test_error("getsockname()");
+ addr_sz = sizeof(dst);
+ if (getpeername(sk, &dst, &addr_sz))
+ test_error("getpeername()");
+
+ while (1) {
+ uint32_t rcv_nxt;
+
+ if (test_client_verify(sk, packet_size, packets_nr, timeout)) {
+ test_fail("client: connection is broken");
+ return;
+ }
+ packets_sent += packets_nr;
+ rcv_nxt = get_rcv_nxt(sk);
+ icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst);
+ }
+}
+
+static void *client_fn(void *arg)
+{
+ int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads();
+ if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+ test_error("failed to connect()");
+ synchronize_threads();
+
+ send_interfered(sk);
+
+ /* Not expecting client to quit */
+ test_fail("client disconnected");
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(3, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c
new file mode 100644
index 000000000000..24e62120b792
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/key-management.c
@@ -0,0 +1,1186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+union tcp_addr wrong_addr;
+#define SECOND_PASSWORD "at all times sincere friends of freedom have been rare"
+#define fault(type) (inj == FAULT_ ## type)
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+ int err;
+
+ if (!kernel_config_has(KCONFIG_NET_VRF))
+ return;
+
+ err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+ if (err)
+ test_error("Failed to add a VRF: %d", err);
+
+ err = link_set_up("ksft-vrf");
+ if (err)
+ test_error("Failed to bring up a VRF");
+
+ err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+ this_ip_addr, this_ip_dest, test_vrf_tabid);
+ if (err)
+ test_error("Failed to add a route to VRF");
+}
+
+
+static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+ int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+ DEFAULT_TEST_PREFIX, 100, 100))
+ test_error("test_add_key()");
+
+ if (addr && test_add_key(sk, SECOND_PASSWORD, *addr,
+ DEFAULT_TEST_PREFIX, sndid, rcvid))
+ test_error("test_add_key()");
+
+ return sk;
+}
+
+static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid)
+{
+ int sk = prepare_sk(addr, sndid, rcvid);
+
+ if (listen(sk, 10))
+ test_error("listen()");
+
+ return sk;
+}
+
+static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async,
+ int current_key, int rnext_key)
+{
+ struct tcp_ao_info_opt ao_info = {};
+ struct tcp_ao_getsockopt key = {};
+ struct tcp_ao_del del = {};
+ sockaddr_af sockaddr;
+ int err;
+
+ tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0);
+ del.prefix = DEFAULT_TEST_PREFIX;
+ del.sndid = sndid;
+ del.rcvid = rcvid;
+
+ if (current_key >= 0) {
+ del.set_current = 1;
+ del.current_key = (uint8_t)current_key;
+ }
+ if (rnext_key >= 0) {
+ del.set_rnext = 1;
+ del.rnext = (uint8_t)rnext_key;
+ }
+
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del));
+ if (err < 0)
+ return -errno;
+
+ if (async)
+ return 0;
+
+ tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0);
+ err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr),
+ DEFAULT_TEST_PREFIX, sndid, rcvid);
+ if (!err)
+ return -EEXIST;
+ if (err != -E2BIG)
+ test_error("getsockopt()");
+ if (current_key < 0 && rnext_key < 0)
+ return 0;
+ if (test_get_ao_info(sk, &ao_info))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+ if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key)
+ return -ENOTRECOVERABLE;
+ if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key)
+ return -ENOTRECOVERABLE;
+ return 0;
+}
+
+static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid,
+ bool async, int current_key, int rnext_key,
+ fault_t inj)
+{
+ int err;
+
+ err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key);
+ if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) {
+ test_ok("%s: key deletion was prevented", tst_name);
+ return;
+ }
+ if (err && fault(FIXME)) {
+ test_xfail("%s: failed to delete the key %u:%u %d",
+ tst_name, sndid, rcvid, err);
+ return;
+ }
+ if (!err) {
+ if (fault(BUSY) || fault(CURRNEXT)) {
+ test_fail("%s: the key was deleted %u:%u %d", tst_name,
+ sndid, rcvid, err);
+ } else {
+ test_ok("%s: the key was deleted", tst_name);
+ }
+ return;
+ }
+ test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err);
+}
+
+static int test_set_key(int sk, int current_keyid, int rnext_keyid)
+{
+ struct tcp_ao_info_opt ao_info = {};
+ int err;
+
+ if (current_keyid >= 0) {
+ ao_info.set_current = 1;
+ ao_info.current_key = (uint8_t)current_keyid;
+ }
+ if (rnext_keyid >= 0) {
+ ao_info.set_rnext = 1;
+ ao_info.rnext = (uint8_t)rnext_keyid;
+ }
+
+ err = test_set_ao_info(sk, &ao_info);
+ if (err)
+ return err;
+ if (test_get_ao_info(sk, &ao_info))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+ if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid)
+ return -ENOTRECOVERABLE;
+ if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid)
+ return -ENOTRECOVERABLE;
+ return 0;
+}
+
+static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix,
+ bool set_current, bool set_rnext,
+ uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_add tmp = {};
+ int err;
+
+ err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr,
+ set_current, set_rnext,
+ prefix, 0, sndid, rcvid, 0, keyflags,
+ strlen(key), key);
+ if (err)
+ return err;
+
+
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+ if (err < 0)
+ return -errno;
+
+ return test_verify_socket_key(sk, &tmp);
+}
+
+static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix,
+ bool set_current, bool set_rnext,
+ uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_info_opt ao_info = {};
+ int err;
+
+ err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+ set_current, set_rnext, sndid, rcvid);
+ if (err)
+ return err;
+
+ if (test_get_ao_info(sk, &ao_info))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+ if (set_current && ao_info.current_key != sndid)
+ return -ENOTRECOVERABLE;
+ if (set_rnext && ao_info.rnext != rcvid)
+ return -ENOTRECOVERABLE;
+ return 0;
+}
+
+static void try_add_current_rnext_key(char *tst_name, int sk, const char *key,
+ uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix,
+ bool set_current, bool set_rnext,
+ uint8_t sndid, uint8_t rcvid, fault_t inj)
+{
+ int err;
+
+ err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix,
+ set_current, set_rnext, sndid, rcvid);
+ if (!err && !fault(CURRNEXT)) {
+ test_ok("%s", tst_name);
+ return;
+ }
+ if (err == -EINVAL && fault(CURRNEXT)) {
+ test_ok("%s", tst_name);
+ return;
+ }
+ test_fail("%s", tst_name);
+}
+
+static void check_closed_socket(void)
+{
+ int sk;
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+ try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+ close(sk);
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ if (test_set_key(sk, 100, 200))
+ test_error("failed to set current/rnext keys");
+ try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY);
+ try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+ close(sk);
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+ DEFAULT_TEST_PREFIX, 10, 11))
+ test_error("test_add_key()");
+ if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+ DEFAULT_TEST_PREFIX, 12, 13))
+ test_error("test_add_key()");
+ try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0);
+ try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0);
+ try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0);
+ try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY);
+ close(sk);
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ if (test_set_key(sk, 100, 200))
+ test_error("failed to set current/rnext keys");
+ try_add_current_rnext_key("closed socket, add + change current key",
+ sk, "Laaaa! Lalala-la-la-lalala...", 0,
+ this_ip_dest, DEFAULT_TEST_PREFIX,
+ true, false, 10, 20, 0);
+ try_add_current_rnext_key("closed socket, add + change rnext key",
+ sk, "Laaaa! Lalala-la-la-lalala...", 0,
+ this_ip_dest, DEFAULT_TEST_PREFIX,
+ false, true, 20, 10, 0);
+ close(sk);
+}
+
+static void assert_no_current_rnext(const char *tst_msg, int sk)
+{
+ struct tcp_ao_info_opt ao_info = {};
+
+ if (test_get_ao_info(sk, &ao_info))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+
+ errno = 0;
+ if (ao_info.set_current || ao_info.set_rnext) {
+ test_xfail("%s: the socket has current/rnext keys: %d:%d",
+ tst_msg,
+ (ao_info.set_current) ? ao_info.current_key : -1,
+ (ao_info.set_rnext) ? ao_info.rnext : -1);
+ } else {
+ test_ok("%s: the socket has no current/rnext keys", tst_msg);
+ }
+}
+
+static void assert_no_tcp_repair(void)
+{
+ struct tcp_ao_repair ao_img = {};
+ socklen_t len = sizeof(ao_img);
+ int sk, err;
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ test_enable_repair(sk);
+ if (listen(sk, 10))
+ test_error("listen()");
+ errno = 0;
+ err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len);
+ if (err && errno == EPERM)
+ test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted");
+ else
+ test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works");
+ errno = 0;
+ err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img));
+ if (err && errno == EPERM)
+ test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted");
+ else
+ test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works");
+ close(sk);
+}
+
+static void check_listen_socket(void)
+{
+ int sk, err;
+
+ sk = prepare_lsk(&this_ip_dest, 200, 200);
+ try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0);
+ try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0);
+ close(sk);
+
+ sk = prepare_lsk(&this_ip_dest, 200, 200);
+ err = test_set_key(sk, 100, -1);
+ if (err == -EINVAL)
+ test_ok("listen socket, setting current key not allowed");
+ else
+ test_fail("listen socket, set current key");
+ err = test_set_key(sk, -1, 200);
+ if (err == -EINVAL)
+ test_ok("listen socket, setting rnext key not allowed");
+ else
+ test_fail("listen socket, set rnext key");
+ close(sk);
+
+ sk = prepare_sk(&this_ip_dest, 200, 200);
+ if (test_set_key(sk, 100, 200))
+ test_error("failed to set current/rnext keys");
+ if (listen(sk, 10))
+ test_error("listen()");
+ assert_no_current_rnext("listen() after current/rnext keys set", sk);
+ try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME);
+ try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME);
+ close(sk);
+
+ assert_no_tcp_repair();
+
+ sk = prepare_lsk(&this_ip_dest, 200, 200);
+ if (test_add_key(sk, "Glory to heros!", this_ip_dest,
+ DEFAULT_TEST_PREFIX, 10, 11))
+ test_error("test_add_key()");
+ if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest,
+ DEFAULT_TEST_PREFIX, 12, 13))
+ test_error("test_add_key()");
+ try_delete_key("listen socket, delete a key + set current/rnext", sk,
+ 100, 100, 0, 10, 13, FAULT_CURRNEXT);
+ try_delete_key("listen socket, force-delete current key", sk,
+ 10, 11, 0, 200, -1, FAULT_CURRNEXT);
+ try_delete_key("listen socket, force-delete rnext key", sk,
+ 12, 13, 0, -1, 200, FAULT_CURRNEXT);
+ try_delete_key("listen socket, delete a key", sk,
+ 200, 200, 0, -1, -1, 0);
+ close(sk);
+
+ sk = prepare_lsk(&this_ip_dest, 200, 200);
+ try_add_current_rnext_key("listen socket, add + change current key",
+ sk, "Laaaa! Lalala-la-la-lalala...", 0,
+ this_ip_dest, DEFAULT_TEST_PREFIX,
+ true, false, 10, 20, FAULT_CURRNEXT);
+ try_add_current_rnext_key("listen socket, add + change rnext key",
+ sk, "Laaaa! Lalala-la-la-lalala...", 0,
+ this_ip_dest, DEFAULT_TEST_PREFIX,
+ false, true, 20, 10, FAULT_CURRNEXT);
+ close(sk);
+}
+
+static const char *fips_fpath = "/proc/sys/crypto/fips_enabled";
+static bool is_fips_enabled(void)
+{
+ static int fips_checked = -1;
+ FILE *fenabled;
+ int enabled;
+
+ if (fips_checked >= 0)
+ return !!fips_checked;
+ if (access(fips_fpath, R_OK)) {
+ if (errno != ENOENT)
+ test_error("Can't open %s", fips_fpath);
+ fips_checked = 0;
+ return false;
+ }
+ fenabled = fopen(fips_fpath, "r");
+ if (!fenabled)
+ test_error("Can't open %s", fips_fpath);
+ if (fscanf(fenabled, "%d", &enabled) != 1)
+ test_error("Can't read from %s", fips_fpath);
+ fclose(fenabled);
+ fips_checked = !!enabled;
+ return !!fips_checked;
+}
+
+struct test_key {
+ char password[TCP_AO_MAXKEYLEN];
+ const char *alg;
+ unsigned int len;
+ uint8_t client_keyid;
+ uint8_t server_keyid;
+ uint8_t maclen;
+ uint8_t matches_client : 1,
+ matches_server : 1,
+ matches_vrf : 1,
+ is_current : 1,
+ is_rnext : 1,
+ used_on_server_tx : 1,
+ used_on_client_tx : 1,
+ skip_counters_checks : 1;
+};
+
+struct key_collection {
+ unsigned int nr_keys;
+ struct test_key *keys;
+};
+
+static struct key_collection collection;
+
+#define TEST_MAX_MACLEN 16
+const char *test_algos[] = {
+ "cmac(aes128)",
+ "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)",
+ "hmac(sha224)", "hmac(sha3-512)",
+ /* only if !CONFIG_FIPS */
+#define TEST_NON_FIPS_ALGOS 2
+ "hmac(rmd160)", "hmac(md5)"
+};
+const unsigned int test_maclens[] = { 1, 4, 12, 16 };
+#define MACLEN_SHIFT 2
+#define ALGOS_SHIFT 4
+
+static unsigned int make_mask(unsigned int shift, unsigned int prev_shift)
+{
+ unsigned int ret = BIT(shift) - 1;
+
+ return ret << prev_shift;
+}
+
+static void init_key_in_collection(unsigned int index, bool randomized)
+{
+ struct test_key *key = &collection.keys[index];
+ unsigned int algos_nr, algos_index;
+
+ /* Same for randomized and non-randomized test flows */
+ key->client_keyid = index;
+ key->server_keyid = 127 + index;
+ key->matches_client = 1;
+ key->matches_server = 1;
+ key->matches_vrf = 1;
+ /* not really even random, but good enough for a test */
+ key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN);
+ key->len += TEST_TCP_AO_MINKEYLEN;
+ randomize_buffer(key->password, key->len);
+
+ if (randomized) {
+ key->maclen = (rand() % TEST_MAX_MACLEN) + 1;
+ algos_index = rand();
+ } else {
+ unsigned int shift = MACLEN_SHIFT;
+
+ key->maclen = test_maclens[index & make_mask(shift, 0)];
+ algos_index = index & make_mask(ALGOS_SHIFT, shift);
+ }
+ algos_nr = ARRAY_SIZE(test_algos);
+ if (is_fips_enabled())
+ algos_nr -= TEST_NON_FIPS_ALGOS;
+ key->alg = test_algos[algos_index % algos_nr];
+}
+
+static int init_default_key_collection(unsigned int nr_keys, bool randomized)
+{
+ size_t key_sz = sizeof(collection.keys[0]);
+
+ if (!nr_keys) {
+ free(collection.keys);
+ collection.keys = NULL;
+ return 0;
+ }
+
+ /*
+ * All keys have uniq sndid/rcvid and sndid != rcvid in order to
+ * check for any bugs/issues for different keyids, visible to both
+ * peers. Keyid == 254 is unused.
+ */
+ if (nr_keys > 127)
+ test_error("Test requires too many keys, correct the source");
+
+ collection.keys = reallocarray(collection.keys, nr_keys, key_sz);
+ if (!collection.keys)
+ return -ENOMEM;
+
+ memset(collection.keys, 0, nr_keys * key_sz);
+ collection.nr_keys = nr_keys;
+ while (nr_keys--)
+ init_key_in_collection(nr_keys, randomized);
+
+ return 0;
+}
+
+static void test_key_error(const char *msg, struct test_key *key)
+{
+ test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}",
+ msg, key->alg, key->client_keyid, key->server_keyid,
+ key->maclen, key->matches_client, key->matches_server,
+ key->matches_vrf, key->is_current, key->is_rnext, key->len);
+}
+
+static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len,
+ union tcp_addr addr, uint8_t vrf,
+ uint8_t sndid, uint8_t rcvid,
+ uint8_t maclen, const char *alg,
+ bool set_current, bool set_rnext)
+{
+ struct tcp_ao_add tmp = {};
+ uint8_t keyflags = 0;
+ int err;
+
+ if (!alg)
+ alg = DEFAULT_TEST_ALGO;
+
+ if (vrf)
+ keyflags |= TCP_AO_KEYF_IFINDEX;
+ err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext,
+ DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen,
+ keyflags, pwd_len, pwd);
+ if (err)
+ return err;
+
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+ if (err < 0)
+ return -errno;
+
+ return test_verify_socket_key(sk, &tmp);
+}
+
+static void verify_current_rnext(const char *tst, int sk,
+ int current_keyid, int rnext_keyid)
+{
+ struct tcp_ao_info_opt ao_info = {};
+
+ if (test_get_ao_info(sk, &ao_info))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+
+ errno = 0;
+ if (current_keyid >= 0) {
+ if (!ao_info.set_current)
+ test_fail("%s: the socket doesn't have current key", tst);
+ else if (ao_info.current_key != current_keyid)
+ test_fail("%s: current key is not the expected one %d != %u",
+ tst, current_keyid, ao_info.current_key);
+ else
+ test_ok("%s: current key %u as expected",
+ tst, ao_info.current_key);
+ }
+ if (rnext_keyid >= 0) {
+ if (!ao_info.set_rnext)
+ test_fail("%s: the socket doesn't have rnext key", tst);
+ else if (ao_info.rnext != rnext_keyid)
+ test_fail("%s: rnext key is not the expected one %d != %u",
+ tst, rnext_keyid, ao_info.rnext);
+ else
+ test_ok("%s: rnext key %u as expected", tst, ao_info.rnext);
+ }
+}
+
+
+static int key_collection_socket(bool server, unsigned int port)
+{
+ unsigned int i;
+ int sk;
+
+ if (server)
+ sk = test_listen_socket(this_ip_addr, port, 1);
+ else
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ for (i = 0; i < collection.nr_keys; i++) {
+ struct test_key *key = &collection.keys[i];
+ union tcp_addr *addr = &wrong_addr;
+ uint8_t sndid, rcvid, vrf;
+ bool set_current = false, set_rnext = false;
+
+ if (key->matches_vrf)
+ vrf = 0;
+ else
+ vrf = test_vrf_ifindex;
+ if (server) {
+ if (key->matches_client)
+ addr = &this_ip_dest;
+ sndid = key->server_keyid;
+ rcvid = key->client_keyid;
+ } else {
+ if (key->matches_server)
+ addr = &this_ip_dest;
+ sndid = key->client_keyid;
+ rcvid = key->server_keyid;
+ key->used_on_client_tx = set_current = key->is_current;
+ key->used_on_server_tx = set_rnext = key->is_rnext;
+ }
+
+ if (test_add_key_cr(sk, key->password, key->len,
+ *addr, vrf, sndid, rcvid, key->maclen,
+ key->alg, set_current, set_rnext))
+ test_key_error("setsockopt(TCP_AO_ADD_KEY)", key);
+#ifdef DEBUG
+ test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}",
+ server ? "server" : "client", i, collection.nr_keys,
+ key->alg, rcvid, sndid, key->maclen,
+ key->matches_client, key->matches_server,
+ key->is_current, key->is_rnext, key->len);
+#endif
+ }
+ return sk;
+}
+
+static void verify_counters(const char *tst_name, bool is_listen_sk, bool server,
+ struct tcp_ao_counters *a, struct tcp_ao_counters *b)
+{
+ unsigned int i;
+
+ __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD);
+
+ for (i = 0; i < collection.nr_keys; i++) {
+ struct test_key *key = &collection.keys[i];
+ uint8_t sndid, rcvid;
+ bool rx_cnt_expected;
+
+ if (key->skip_counters_checks)
+ continue;
+ if (server) {
+ sndid = key->server_keyid;
+ rcvid = key->client_keyid;
+ rx_cnt_expected = key->used_on_client_tx;
+ } else {
+ sndid = key->client_keyid;
+ rcvid = key->server_keyid;
+ rx_cnt_expected = key->used_on_server_tx;
+ }
+
+ test_tcp_ao_key_counters_cmp(tst_name, a, b,
+ rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0,
+ sndid, rcvid);
+ }
+ test_tcp_ao_counters_free(a);
+ test_tcp_ao_counters_free(b);
+ test_ok("%s: passed counters checks", tst_name);
+}
+
+static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf,
+ size_t len, int sndid, int rcvid)
+{
+ size_t i;
+
+ for (i = 0; i < len; i++) {
+ if (sndid >= 0 && buf[i].sndid != sndid)
+ continue;
+ if (rcvid >= 0 && buf[i].rcvid != rcvid)
+ continue;
+ return &buf[i];
+ }
+ return NULL;
+}
+
+static void verify_keys(const char *tst_name, int sk,
+ bool is_listen_sk, bool server)
+{
+ socklen_t len = sizeof(struct tcp_ao_getsockopt);
+ struct tcp_ao_getsockopt *keys;
+ bool passed_test = true;
+ unsigned int i;
+
+ keys = calloc(collection.nr_keys, len);
+ if (!keys)
+ test_error("calloc()");
+
+ keys->nkeys = collection.nr_keys;
+ keys->get_all = 1;
+
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) {
+ free(keys);
+ test_error("getsockopt(TCP_AO_GET_KEYS)");
+ }
+
+ for (i = 0; i < collection.nr_keys; i++) {
+ struct test_key *key = &collection.keys[i];
+ struct tcp_ao_getsockopt *dump_key;
+ bool is_kdf_aes_128_cmac = false;
+ bool is_cmac_aes = false;
+ uint8_t sndid, rcvid;
+ bool matches = false;
+
+ if (server) {
+ if (key->matches_client)
+ matches = true;
+ sndid = key->server_keyid;
+ rcvid = key->client_keyid;
+ } else {
+ if (key->matches_server)
+ matches = true;
+ sndid = key->client_keyid;
+ rcvid = key->server_keyid;
+ }
+ if (!key->matches_vrf)
+ matches = false;
+ /* no keys get removed on the original listener socket */
+ if (is_listen_sk)
+ matches = true;
+
+ dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid);
+ if (matches != !!dump_key) {
+ test_fail("%s: key %u:%u %s%s on the socket",
+ tst_name, sndid, rcvid,
+ key->matches_vrf ? "" : "[vrf] ",
+ matches ? "disappeared" : "yet present");
+ passed_test = false;
+ goto out;
+ }
+ if (!dump_key)
+ continue;
+
+ if (!strcmp("cmac(aes128)", key->alg)) {
+ is_kdf_aes_128_cmac = (key->len != 16);
+ is_cmac_aes = true;
+ }
+
+ if (is_cmac_aes) {
+ if (strcmp(dump_key->alg_name, "cmac(aes)")) {
+ test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s",
+ tst_name, sndid, rcvid,
+ dump_key->alg_name);
+ passed_test = false;
+ continue;
+ }
+ } else if (strcmp(dump_key->alg_name, key->alg)) {
+ test_fail("%s: key %u:%u has unexpected alg %s != %s",
+ tst_name, sndid, rcvid,
+ dump_key->alg_name, key->alg);
+ passed_test = false;
+ continue;
+ }
+ if (is_kdf_aes_128_cmac) {
+ if (dump_key->keylen != 16) {
+ test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u",
+ tst_name, sndid, rcvid,
+ dump_key->keylen);
+ continue;
+ }
+ } else if (dump_key->keylen != key->len) {
+ test_fail("%s: key %u:%u changed password len %u != %u",
+ tst_name, sndid, rcvid,
+ dump_key->keylen, key->len);
+ passed_test = false;
+ continue;
+ }
+ if (!is_kdf_aes_128_cmac &&
+ memcmp(dump_key->key, key->password, key->len)) {
+ test_fail("%s: key %u:%u has different password",
+ tst_name, sndid, rcvid);
+ passed_test = false;
+ continue;
+ }
+ if (dump_key->maclen != key->maclen) {
+ test_fail("%s: key %u:%u changed maclen %u != %u",
+ tst_name, sndid, rcvid,
+ dump_key->maclen, key->maclen);
+ passed_test = false;
+ continue;
+ }
+ }
+
+ if (passed_test)
+ test_ok("%s: The socket keys are consistent with the expectations",
+ tst_name);
+out:
+ free(keys);
+}
+
+static int start_server(const char *tst_name, unsigned int port, size_t quota,
+ struct tcp_ao_counters *begin,
+ unsigned int current_index, unsigned int rnext_index)
+{
+ struct tcp_ao_counters lsk_c1, lsk_c2;
+ ssize_t bytes;
+ int sk, lsk;
+
+ synchronize_threads(); /* 1: key collection initialized */
+ lsk = key_collection_socket(true, port);
+ if (test_get_tcp_ao_counters(lsk, &lsk_c1))
+ test_error("test_get_tcp_ao_counters()");
+ synchronize_threads(); /* 2: MKTs added => connect() */
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+ if (test_get_tcp_ao_counters(sk, begin))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 3: accepted => send data */
+ if (test_get_tcp_ao_counters(lsk, &lsk_c2))
+ test_error("test_get_tcp_ao_counters()");
+ verify_keys(tst_name, lsk, true, true);
+ close(lsk);
+
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota)
+ test_fail("%s: server served: %zd", tst_name, bytes);
+ else
+ test_ok("%s: server alive", tst_name);
+
+ verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2);
+
+ return sk;
+}
+
+static void end_server(const char *tst_name, int sk,
+ struct tcp_ao_counters *begin)
+{
+ struct tcp_ao_counters end;
+
+ if (test_get_tcp_ao_counters(sk, &end))
+ test_error("test_get_tcp_ao_counters()");
+ verify_keys(tst_name, sk, false, true);
+
+ synchronize_threads(); /* 4: verified => closed */
+ close(sk);
+
+ verify_counters(tst_name, false, true, begin, &end);
+ synchronize_threads(); /* 5: counters */
+}
+
+static void try_server_run(const char *tst_name, unsigned int port, size_t quota,
+ unsigned int current_index, unsigned int rnext_index)
+{
+ struct tcp_ao_counters tmp;
+ int sk;
+
+ sk = start_server(tst_name, port, quota, &tmp,
+ current_index, rnext_index);
+ end_server(tst_name, sk, &tmp);
+}
+
+static void server_rotations(const char *tst_name, unsigned int port,
+ size_t quota, unsigned int rotations,
+ unsigned int current_index, unsigned int rnext_index)
+{
+ struct tcp_ao_counters tmp;
+ unsigned int i;
+ int sk;
+
+ sk = start_server(tst_name, port, quota, &tmp,
+ current_index, rnext_index);
+
+ for (i = current_index + 1; rotations > 0; i++, rotations--) {
+ ssize_t bytes;
+
+ if (i >= collection.nr_keys)
+ i = 0;
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota) {
+ test_fail("%s: server served: %zd", tst_name, bytes);
+ return;
+ }
+ verify_current_rnext(tst_name, sk,
+ collection.keys[i].server_keyid, -1);
+ synchronize_threads(); /* verify current/rnext */
+ }
+ end_server(tst_name, sk, &tmp);
+}
+
+static int run_client(const char *tst_name, unsigned int port,
+ unsigned int nr_keys, int current_index, int rnext_index,
+ struct tcp_ao_counters *before,
+ const size_t msg_sz, const size_t msg_nr)
+{
+ int sk;
+
+ synchronize_threads(); /* 1: key collection initialized */
+ sk = key_collection_socket(false, port);
+
+ if (current_index >= 0 || rnext_index >= 0) {
+ int sndid = -1, rcvid = -1;
+
+ if (current_index >= 0)
+ sndid = collection.keys[current_index].client_keyid;
+ if (rnext_index >= 0)
+ rcvid = collection.keys[rnext_index].server_keyid;
+ if (test_set_key(sk, sndid, rcvid))
+ test_error("failed to set current/rnext keys");
+ }
+ if (before && test_get_tcp_ao_counters(sk, before))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 2: MKTs added => connect() */
+ if (test_connect_socket(sk, this_ip_dest, port++) <= 0)
+ test_error("failed to connect()");
+ if (current_index < 0)
+ current_index = nr_keys - 1;
+ if (rnext_index < 0)
+ rnext_index = nr_keys - 1;
+ collection.keys[current_index].used_on_client_tx = 1;
+ collection.keys[rnext_index].used_on_server_tx = 1;
+
+ synchronize_threads(); /* 3: accepted => send data */
+ if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) {
+ test_fail("verify failed");
+ close(sk);
+ if (before)
+ test_tcp_ao_counters_free(before);
+ return -1;
+ }
+
+ return sk;
+}
+
+static int start_client(const char *tst_name, unsigned int port,
+ unsigned int nr_keys, int current_index, int rnext_index,
+ struct tcp_ao_counters *before,
+ const size_t msg_sz, const size_t msg_nr)
+{
+ if (init_default_key_collection(nr_keys, true))
+ test_error("Failed to init the key collection");
+
+ return run_client(tst_name, port, nr_keys, current_index,
+ rnext_index, before, msg_sz, msg_nr);
+}
+
+static void end_client(const char *tst_name, int sk, unsigned int nr_keys,
+ int current_index, int rnext_index,
+ struct tcp_ao_counters *start)
+{
+ struct tcp_ao_counters end;
+
+ /* Some application may become dependent on this kernel choice */
+ if (current_index < 0)
+ current_index = nr_keys - 1;
+ if (rnext_index < 0)
+ rnext_index = nr_keys - 1;
+ verify_current_rnext(tst_name, sk,
+ collection.keys[current_index].client_keyid,
+ collection.keys[rnext_index].server_keyid);
+ if (start && test_get_tcp_ao_counters(sk, &end))
+ test_error("test_get_tcp_ao_counters()");
+ verify_keys(tst_name, sk, false, false);
+ synchronize_threads(); /* 4: verify => closed */
+ close(sk);
+ if (start)
+ verify_counters(tst_name, false, false, start, &end);
+ synchronize_threads(); /* 5: counters */
+}
+
+static void try_unmatched_keys(int sk, int *rnext_index)
+{
+ struct test_key *key;
+ unsigned int i = 0;
+ int err;
+
+ do {
+ key = &collection.keys[i];
+ if (!key->matches_server)
+ break;
+ } while (++i < collection.nr_keys);
+ if (key->matches_server)
+ test_error("all keys on client match the server");
+
+ err = test_add_key_cr(sk, key->password, key->len, wrong_addr,
+ 0, key->client_keyid, key->server_keyid,
+ key->maclen, key->alg, 0, 0);
+ if (!err) {
+ test_fail("Added a key with non-matching ip-address for established sk");
+ return;
+ }
+ if (err == -EINVAL)
+ test_ok("Can't add a key with non-matching ip-address for established sk");
+ else
+ test_error("Failed to add a key");
+
+ err = test_add_key_cr(sk, key->password, key->len, this_ip_dest,
+ test_vrf_ifindex,
+ key->client_keyid, key->server_keyid,
+ key->maclen, key->alg, 0, 0);
+ if (!err) {
+ test_fail("Added a key with non-matching VRF for established sk");
+ return;
+ }
+ if (err == -EINVAL)
+ test_ok("Can't add a key with non-matching VRF for established sk");
+ else
+ test_error("Failed to add a key");
+
+ for (i = 0; i < collection.nr_keys; i++) {
+ key = &collection.keys[i];
+ if (!key->matches_client)
+ break;
+ }
+ if (key->matches_client)
+ test_error("all keys on server match the client");
+ if (test_set_key(sk, -1, key->server_keyid))
+ test_error("Can't change the current key");
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ test_fail("verify failed");
+ *rnext_index = i;
+}
+
+static int client_non_matching(const char *tst_name, unsigned int port,
+ unsigned int nr_keys,
+ int current_index, int rnext_index,
+ const size_t msg_sz, const size_t msg_nr)
+{
+ unsigned int i;
+
+ if (init_default_key_collection(nr_keys, true))
+ test_error("Failed to init the key collection");
+
+ for (i = 0; i < nr_keys; i++) {
+ /* key (0, 0) matches */
+ collection.keys[i].matches_client = !!((i + 3) % 4);
+ collection.keys[i].matches_server = !!((i + 2) % 4);
+ if (kernel_config_has(KCONFIG_NET_VRF))
+ collection.keys[i].matches_vrf = !!((i + 1) % 4);
+ }
+
+ return run_client(tst_name, port, nr_keys, current_index,
+ rnext_index, NULL, msg_sz, msg_nr);
+}
+
+static void check_current_back(const char *tst_name, unsigned int port,
+ unsigned int nr_keys,
+ unsigned int current_index, unsigned int rnext_index,
+ unsigned int rotate_to_index)
+{
+ struct tcp_ao_counters tmp;
+ int sk;
+
+ sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+ &tmp, msg_len, nr_packets);
+ if (sk < 0)
+ return;
+ if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1))
+ test_error("Can't change the current key");
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ test_fail("verify failed");
+ /* There is a race here: between setting the current_key with
+ * setsockopt(TCP_AO_INFO) and starting to send some data - there
+ * might have been a segment received with the desired
+ * RNext_key set. In turn that would mean that the first outgoing
+ * segment will have the desired current_key (flipped back).
+ * Which is what the user/test wants. As it's racy, skip checking
+ * the counters, yet check what are the resulting current/rnext
+ * keys on both sides.
+ */
+ collection.keys[rotate_to_index].skip_counters_checks = 1;
+
+ end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void roll_over_keys(const char *tst_name, unsigned int port,
+ unsigned int nr_keys, unsigned int rotations,
+ unsigned int current_index, unsigned int rnext_index)
+{
+ struct tcp_ao_counters tmp;
+ unsigned int i;
+ int sk;
+
+ sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+ &tmp, msg_len, nr_packets);
+ if (sk < 0)
+ return;
+ for (i = rnext_index + 1; rotations > 0; i++, rotations--) {
+ if (i >= collection.nr_keys)
+ i = 0;
+ if (test_set_key(sk, -1, collection.keys[i].server_keyid))
+ test_error("Can't change the Rnext key");
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ test_fail("verify failed");
+ close(sk);
+ test_tcp_ao_counters_free(&tmp);
+ return;
+ }
+ verify_current_rnext(tst_name, sk, -1,
+ collection.keys[i].server_keyid);
+ collection.keys[i].used_on_server_tx = 1;
+ synchronize_threads(); /* verify current/rnext */
+ }
+ end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_run(const char *tst_name, unsigned int port,
+ unsigned int nr_keys, int current_index, int rnext_index)
+{
+ struct tcp_ao_counters tmp;
+ int sk;
+
+ sk = start_client(tst_name, port, nr_keys, current_index, rnext_index,
+ &tmp, msg_len, nr_packets);
+ if (sk < 0)
+ return;
+ end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp);
+}
+
+static void try_client_match(const char *tst_name, unsigned int port,
+ unsigned int nr_keys,
+ int current_index, int rnext_index)
+{
+ int sk;
+
+ sk = client_non_matching(tst_name, port, nr_keys, current_index,
+ rnext_index, msg_len, nr_packets);
+ if (sk < 0)
+ return;
+ try_unmatched_keys(sk, &rnext_index);
+ end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL);
+}
+
+static void *server_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+
+ setup_vrfs();
+ try_server_run("server: Check current/rnext keys unset before connect()",
+ port++, quota, 19, 19);
+ try_server_run("server: Check current/rnext keys set before connect()",
+ port++, quota, 10, 10);
+ try_server_run("server: Check current != rnext keys set before connect()",
+ port++, quota, 5, 10);
+ try_server_run("server: Check current flapping back on peer's RnextKey request",
+ port++, quota * 2, 5, 10);
+ server_rotations("server: Rotate over all different keys", port++,
+ quota, 20, 0, 0);
+ try_server_run("server: Check accept() => established key matching",
+ port++, quota * 2, 0, 0);
+
+ synchronize_threads(); /* don't race to exit: client exits */
+ return NULL;
+}
+
+static void check_established_socket(void)
+{
+ unsigned int port = test_server_port;
+
+ setup_vrfs();
+ try_client_run("client: Check current/rnext keys unset before connect()",
+ port++, 20, -1, -1);
+ try_client_run("client: Check current/rnext keys set before connect()",
+ port++, 20, 10, 10);
+ try_client_run("client: Check current != rnext keys set before connect()",
+ port++, 20, 10, 5);
+ check_current_back("client: Check current flapping back on peer's RnextKey request",
+ port++, 20, 10, 5, 2);
+ roll_over_keys("client: Rotate over all different keys", port++,
+ 20, 20, 0, 0);
+ try_client_match("client: Check connect() => established key matching",
+ port++, 20, 0, 0);
+}
+
+static void *client_fn(void *arg)
+{
+ if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1)
+ test_error("Can't convert ip address %s", TEST_WRONG_IP);
+ check_closed_socket();
+ check_listen_socket();
+ check_established_socket();
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(120, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
new file mode 100644
index 000000000000..fbc7f6111815
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h
@@ -0,0 +1,605 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * TCP-AO selftest library. Provides helpers to unshare network
+ * namespaces, create veth, assign ip addresses, set routes,
+ * manipulate socket options, read network counter and etc.
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#ifndef _AOLIB_H_
+#define _AOLIB_H_
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <linux/snmp.h>
+#include <linux/tcp.h>
+#include <netinet/in.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../../../../../include/linux/stringify.h"
+#include "../../../../../include/linux/bits.h"
+
+#ifndef SOL_TCP
+/* can't include <netinet/tcp.h> as including <linux/tcp.h> */
+# define SOL_TCP 6 /* TCP level */
+#endif
+
+/* Working around ksft, see the comment in lib/setup.c */
+extern void __test_msg(const char *buf);
+extern void __test_ok(const char *buf);
+extern void __test_fail(const char *buf);
+extern void __test_xfail(const char *buf);
+extern void __test_error(const char *buf);
+extern void __test_skip(const char *buf);
+
+__attribute__((__format__(__printf__, 2, 3)))
+static inline void __test_print(void (*fn)(const char *), const char *fmt, ...)
+{
+#define TEST_MSG_BUFFER_SIZE 4096
+ char buf[TEST_MSG_BUFFER_SIZE];
+ va_list arg;
+
+ va_start(arg, fmt);
+ vsnprintf(buf, sizeof(buf), fmt, arg);
+ va_end(arg);
+ fn(buf);
+}
+
+#define test_print(fmt, ...) \
+ __test_print(__test_msg, "%ld[%s:%u] " fmt "\n", \
+ syscall(SYS_gettid), \
+ __FILE__, __LINE__, ##__VA_ARGS__)
+
+#define test_ok(fmt, ...) \
+ __test_print(__test_ok, fmt "\n", ##__VA_ARGS__)
+#define test_skip(fmt, ...) \
+ __test_print(__test_skip, fmt "\n", ##__VA_ARGS__)
+#define test_xfail(fmt, ...) \
+ __test_print(__test_xfail, fmt "\n", ##__VA_ARGS__)
+
+#define test_fail(fmt, ...) \
+do { \
+ if (errno) \
+ __test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__); \
+ else \
+ __test_print(__test_fail, fmt "\n", ##__VA_ARGS__); \
+ test_failed(); \
+} while (0)
+
+#define KSFT_FAIL 1
+#define test_error(fmt, ...) \
+do { \
+ if (errno) \
+ __test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n", \
+ syscall(SYS_gettid), __FILE__, __LINE__, \
+ ##__VA_ARGS__); \
+ else \
+ __test_print(__test_error, "%ld[%s:%u] " fmt "\n", \
+ syscall(SYS_gettid), __FILE__, __LINE__, \
+ ##__VA_ARGS__); \
+ exit(KSFT_FAIL); \
+} while (0)
+
+enum test_fault {
+ FAULT_TIMEOUT = 1,
+ FAULT_KEYREJECT,
+ FAULT_PREINSTALL_AO,
+ FAULT_PREINSTALL_MD5,
+ FAULT_POSTINSTALL,
+ FAULT_BUSY,
+ FAULT_CURRNEXT,
+ FAULT_FIXME,
+};
+typedef enum test_fault fault_t;
+
+enum test_needs_kconfig {
+ KCONFIG_NET_NS = 0, /* required */
+ KCONFIG_VETH, /* required */
+ KCONFIG_TCP_AO, /* required */
+ KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */
+ KCONFIG_NET_VRF, /* optional, for L3/VRF testing */
+ __KCONFIG_LAST__
+};
+extern bool kernel_config_has(enum test_needs_kconfig k);
+extern const char *tests_skip_reason[__KCONFIG_LAST__];
+static inline bool should_skip_test(const char *tst_name,
+ enum test_needs_kconfig k)
+{
+ if (kernel_config_has(k))
+ return false;
+ test_skip("%s: %s", tst_name, tests_skip_reason[k]);
+ return true;
+}
+
+union tcp_addr {
+ struct in_addr a4;
+ struct in6_addr a6;
+};
+
+typedef void *(*thread_fn)(void *);
+extern void test_failed(void);
+extern void __test_init(unsigned int ntests, int family, unsigned int prefix,
+ union tcp_addr addr1, union tcp_addr addr2,
+ thread_fn peer1, thread_fn peer2);
+
+static inline void test_init2(unsigned int ntests,
+ thread_fn peer1, thread_fn peer2,
+ int family, unsigned int prefix,
+ const char *addr1, const char *addr2)
+{
+ union tcp_addr taddr1, taddr2;
+
+ if (inet_pton(family, addr1, &taddr1) != 1)
+ test_error("Can't convert ip address %s", addr1);
+ if (inet_pton(family, addr2, &taddr2) != 1)
+ test_error("Can't convert ip address %s", addr2);
+
+ __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2);
+}
+extern void test_add_destructor(void (*d)(void));
+
+/* To adjust optmem socket limit, approximately estimate a number,
+ * that is bigger than sizeof(struct tcp_ao_key).
+ */
+#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP 300
+
+extern void test_set_optmem(size_t value);
+extern size_t test_get_optmem(void);
+
+extern const struct sockaddr_in6 addr_any6;
+extern const struct sockaddr_in addr_any4;
+
+#ifdef IPV6_TEST
+# define __TEST_CLIENT_IP(n) ("2001:db8:" __stringify(n) "::1")
+# define TEST_CLIENT_IP __TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP "2001:db8:253::1"
+# define TEST_SERVER_IP "2001:db8:254::1"
+# define TEST_NETWORK "2001::"
+# define TEST_PREFIX 128
+# define TEST_FAMILY AF_INET6
+# define SOCKADDR_ANY addr_any6
+# define sockaddr_af struct sockaddr_in6
+#else
+# define __TEST_CLIENT_IP(n) ("10.0." __stringify(n) ".1")
+# define TEST_CLIENT_IP __TEST_CLIENT_IP(1)
+# define TEST_WRONG_IP "10.0.253.1"
+# define TEST_SERVER_IP "10.0.254.1"
+# define TEST_NETWORK "10.0.0.0"
+# define TEST_PREFIX 32
+# define TEST_FAMILY AF_INET
+# define SOCKADDR_ANY addr_any4
+# define sockaddr_af struct sockaddr_in
+#endif
+
+static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n)
+{
+ union tcp_addr ret = net;
+
+#ifdef IPV6_TEST
+ ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1));
+ ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1));
+#else
+ ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n);
+#endif
+
+ return ret;
+}
+
+static inline void tcp_addr_to_sockaddr_in(void *dest,
+ const union tcp_addr *src,
+ unsigned int port)
+{
+ sockaddr_af *out = dest;
+
+ memset(out, 0, sizeof(*out));
+#ifdef IPV6_TEST
+ out->sin6_family = AF_INET6;
+ out->sin6_port = port;
+ out->sin6_addr = src->a6;
+#else
+ out->sin_family = AF_INET;
+ out->sin_port = port;
+ out->sin_addr = src->a4;
+#endif
+}
+
+static inline void test_init(unsigned int ntests,
+ thread_fn peer1, thread_fn peer2)
+{
+ test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX,
+ TEST_SERVER_IP, TEST_CLIENT_IP);
+}
+extern void synchronize_threads(void);
+extern void switch_ns(int fd);
+
+extern __thread union tcp_addr this_ip_addr;
+extern __thread union tcp_addr this_ip_dest;
+extern int test_family;
+
+extern void randomize_buffer(void *buf, size_t buflen);
+extern int open_netns(void);
+extern int unshare_open_netns(void);
+extern const char veth_name[];
+extern int add_veth(const char *name, int nsfda, int nsfdb);
+extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd);
+extern int ip_addr_add(const char *intf, int family,
+ union tcp_addr addr, uint8_t prefix);
+extern int ip_route_add(const char *intf, int family,
+ union tcp_addr src, union tcp_addr dst);
+extern int ip_route_add_vrf(const char *intf, int family,
+ union tcp_addr src, union tcp_addr dst,
+ uint8_t vrf);
+extern int link_set_up(const char *intf);
+
+extern const unsigned int test_server_port;
+extern int test_wait_fd(int sk, time_t sec, bool write);
+extern int __test_connect_socket(int sk, const char *device,
+ void *addr, size_t addr_sz, time_t timeout);
+extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz);
+
+static inline int test_listen_socket(const union tcp_addr taddr,
+ unsigned int port, int backlog)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+ return __test_listen_socket(backlog, (void *)&addr, sizeof(addr));
+}
+
+/*
+ * In order for selftests to work under CONFIG_CRYPTO_FIPS=y,
+ * the password should be loger than 14 bytes, see hmac_setkey()
+ */
+#define TEST_TCP_AO_MINKEYLEN 14
+#define DEFAULT_TEST_PASSWORD "In this hour, I do not believe that any darkness will endure."
+
+#ifndef DEFAULT_TEST_ALGO
+#define DEFAULT_TEST_ALGO "cmac(aes128)"
+#endif
+
+#ifdef IPV6_TEST
+#define DEFAULT_TEST_PREFIX 128
+#else
+#define DEFAULT_TEST_PREFIX 32
+#endif
+
+/*
+ * Timeout on syscalls where failure is not expected.
+ * You may want to rise it if the test machine is very busy.
+ */
+#ifndef TEST_TIMEOUT_SEC
+#define TEST_TIMEOUT_SEC 5
+#endif
+
+/*
+ * Timeout on connect() where a failure is expected.
+ * If set to 0 - kernel will try to retransmit SYN number of times, set in
+ * /proc/sys/net/ipv4/tcp_syn_retries
+ * By default set to 1 to make tests pass faster on non-busy machine.
+ */
+#ifndef TEST_RETRANSMIT_SEC
+#define TEST_RETRANSMIT_SEC 1
+#endif
+
+static inline int _test_connect_socket(int sk, const union tcp_addr taddr,
+ unsigned int port, time_t timeout)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port));
+ return __test_connect_socket(sk, veth_name,
+ (void *)&addr, sizeof(addr), timeout);
+}
+
+static inline int test_connect_socket(int sk, const union tcp_addr taddr,
+ unsigned int port)
+{
+ return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC);
+}
+
+extern int __test_set_md5(int sk, void *addr, size_t addr_sz,
+ uint8_t prefix, int vrf, const char *password);
+static inline int test_set_md5(int sk, const union tcp_addr in_addr,
+ uint8_t prefix, int vrf, const char *password)
+{
+ sockaddr_af addr;
+
+ if (prefix > DEFAULT_TEST_PREFIX)
+ prefix = DEFAULT_TEST_PREFIX;
+
+ tcp_addr_to_sockaddr_in(&addr, &in_addr, 0);
+ return __test_set_md5(sk, (void *)&addr, sizeof(addr),
+ prefix, vrf, password);
+}
+
+extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+ void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+ uint8_t prefix, uint8_t vrf,
+ uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+ uint8_t keyflags, uint8_t keylen, const char *key);
+
+static inline int test_prepare_key(struct tcp_ao_add *ao,
+ const char *alg, union tcp_addr taddr,
+ bool set_current, bool set_rnext,
+ uint8_t prefix, uint8_t vrf,
+ uint8_t sndid, uint8_t rcvid, uint8_t maclen,
+ uint8_t keyflags, uint8_t keylen, const char *key)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &taddr, 0);
+ return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr),
+ set_current, set_rnext, prefix, vrf, sndid, rcvid,
+ maclen, keyflags, keylen, key);
+}
+
+static inline int test_prepare_def_key(struct tcp_ao_add *ao,
+ const char *key, uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix, uint8_t vrf,
+ uint8_t sndid, uint8_t rcvid)
+{
+ if (prefix > DEFAULT_TEST_PREFIX)
+ prefix = DEFAULT_TEST_PREFIX;
+
+ return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false,
+ prefix, vrf, sndid, rcvid, 0, keyflags,
+ strlen(key), key);
+}
+
+extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+ void *addr, size_t addr_sz,
+ uint8_t prefix, uint8_t sndid, uint8_t rcvid);
+extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out);
+extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in);
+extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+ const struct tcp_ao_getsockopt *b);
+extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+ const struct tcp_ao_info_opt *b);
+
+static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key)
+{
+ struct tcp_ao_getsockopt key2 = {};
+ int err;
+
+ err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr),
+ key->prefix, key->sndid, key->rcvid);
+ if (err)
+ return err;
+
+ return test_cmp_getsockopt_setsockopt(key, &key2);
+}
+
+static inline int test_add_key_vrf(int sk,
+ const char *key, uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix,
+ uint8_t vrf, uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_add tmp = {};
+ int err;
+
+ err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+ vrf, sndid, rcvid);
+ if (err)
+ return err;
+
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp));
+ if (err < 0)
+ return -errno;
+
+ return test_verify_socket_key(sk, &tmp);
+}
+
+static inline int test_add_key(int sk, const char *key,
+ union tcp_addr in_addr, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid)
+{
+ return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid);
+}
+
+static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao)
+{
+ struct tcp_ao_info_opt ao2 = {};
+ int err;
+
+ err = test_get_ao_info(sk, &ao2);
+ if (err)
+ return err;
+
+ return test_cmp_getsockopt_setsockopt_ao(ao, &ao2);
+}
+
+static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps)
+{
+ struct tcp_ao_info_opt ao = {};
+ int err;
+
+ err = test_get_ao_info(sk, &ao);
+ /* Maybe ao_info wasn't allocated yet */
+ if (err && err != -ENOENT)
+ return err;
+
+ ao.ao_required = !!ao_required;
+ ao.accept_icmps = !!accept_icmps;
+ err = test_set_ao_info(sk, &ao);
+ if (err)
+ return err;
+
+ return test_verify_socket_ao(sk, &ao);
+}
+
+extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec);
+extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+ const size_t msg_len, time_t timeout_sec);
+extern int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+ time_t timeout_sec);
+
+struct tcp_ao_key_counters {
+ uint8_t sndid;
+ uint8_t rcvid;
+ uint64_t pkt_good;
+ uint64_t pkt_bad;
+};
+
+struct tcp_ao_counters {
+ /* per-netns */
+ uint64_t netns_ao_good;
+ uint64_t netns_ao_bad;
+ uint64_t netns_ao_key_not_found;
+ uint64_t netns_ao_required;
+ uint64_t netns_ao_dropped_icmp;
+ /* per-socket */
+ uint64_t ao_info_pkt_good;
+ uint64_t ao_info_pkt_bad;
+ uint64_t ao_info_pkt_key_not_found;
+ uint64_t ao_info_pkt_ao_required;
+ uint64_t ao_info_pkt_dropped_icmp;
+ /* per-key */
+ size_t nr_keys;
+ struct tcp_ao_key_counters *key_cnts;
+};
+extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out);
+
+#define TEST_CNT_KEY_GOOD BIT(0)
+#define TEST_CNT_KEY_BAD BIT(1)
+#define TEST_CNT_SOCK_GOOD BIT(2)
+#define TEST_CNT_SOCK_BAD BIT(3)
+#define TEST_CNT_SOCK_KEY_NOT_FOUND BIT(4)
+#define TEST_CNT_SOCK_AO_REQUIRED BIT(5)
+#define TEST_CNT_SOCK_DROPPED_ICMP BIT(6)
+#define TEST_CNT_NS_GOOD BIT(7)
+#define TEST_CNT_NS_BAD BIT(8)
+#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9)
+#define TEST_CNT_NS_AO_REQUIRED BIT(10)
+#define TEST_CNT_NS_DROPPED_ICMP BIT(11)
+typedef uint16_t test_cnt;
+
+#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD)
+#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD)
+#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \
+ TEST_CNT_NS_KEY_NOT_FOUND)
+#define TEST_CNT_AO_REQUIRED (TEST_CNT_SOCK_AO_REQUIRED | \
+ TEST_CNT_NS_AO_REQUIRED)
+#define TEST_CNT_AO_DROPPED_ICMP (TEST_CNT_SOCK_DROPPED_ICMP | \
+ TEST_CNT_NS_DROPPED_ICMP)
+#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD)
+#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD)
+
+extern int __test_tcp_ao_counters_cmp(const char *tst_name,
+ struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+ test_cnt expected);
+extern int test_tcp_ao_key_counters_cmp(const char *tst_name,
+ struct tcp_ao_counters *before, struct tcp_ao_counters *after,
+ test_cnt expected, int sndid, int rcvid);
+extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts);
+/*
+ * Frees buffers allocated in test_get_tcp_ao_counters().
+ * The function doesn't expect new keys or keys removed between calls
+ * to test_get_tcp_ao_counters(). Check key counters manually if they
+ * may change.
+ */
+static inline int test_tcp_ao_counters_cmp(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected)
+{
+ int ret;
+
+ ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected);
+ if (ret)
+ goto out;
+ ret = test_tcp_ao_key_counters_cmp(tst_name, before, after,
+ expected, -1, -1);
+out:
+ test_tcp_ao_counters_free(before);
+ test_tcp_ao_counters_free(after);
+ return ret;
+}
+
+struct netstat;
+extern struct netstat *netstat_read(void);
+extern void netstat_free(struct netstat *ns);
+extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb);
+extern uint64_t netstat_get(struct netstat *ns,
+ const char *name, bool *not_found);
+
+static inline uint64_t netstat_get_one(const char *name, bool *not_found)
+{
+ struct netstat *ns = netstat_read();
+ uint64_t ret;
+
+ ret = netstat_get(ns, name, not_found);
+
+ netstat_free(ns);
+ return ret;
+}
+
+struct tcp_sock_queue {
+ uint32_t seq;
+ void *buf;
+};
+
+struct tcp_sock_state {
+ struct tcp_info info;
+ struct tcp_repair_window trw;
+ struct tcp_sock_queue out;
+ int outq_len; /* output queue size (not sent + not acked) */
+ int outq_nsd_len; /* output queue size (not sent only) */
+ struct tcp_sock_queue in;
+ int inq_len;
+ int mss;
+ int timestamp;
+};
+
+extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+ void *addr, size_t addr_size);
+static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+ sockaddr_af *saddr)
+{
+ __test_sock_checkpoint(sk, state, saddr, sizeof(*saddr));
+}
+extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state);
+extern void __test_sock_restore(int sk, const char *device,
+ struct tcp_sock_state *state,
+ void *saddr, void *daddr, size_t addr_size);
+static inline void test_sock_restore(int sk, struct tcp_sock_state *state,
+ sockaddr_af *saddr,
+ const union tcp_addr daddr,
+ unsigned int dport)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport));
+ __test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr));
+}
+extern void test_ao_restore(int sk, struct tcp_ao_repair *state);
+extern void test_sock_state_free(struct tcp_sock_state *state);
+extern void test_enable_repair(int sk);
+extern void test_disable_repair(int sk);
+extern void test_kill_sk(int sk);
+static inline int test_add_repaired_key(int sk,
+ const char *key, uint8_t keyflags,
+ union tcp_addr in_addr, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_add tmp = {};
+ int err;
+
+ err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix,
+ 0, sndid, rcvid);
+ if (err)
+ return err;
+
+ tmp.set_current = 1;
+ tmp.set_rnext = 1;
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0)
+ return -errno;
+
+ return test_verify_socket_key(sk, &tmp);
+}
+
+#endif /* _AOLIB_H_ */
diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
new file mode 100644
index 000000000000..f279ffc3843b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c
@@ -0,0 +1,148 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check what features does the kernel support (where the selftest is running).
+ * Somewhat inspired by CRIU kerndat/kdat kernel features detector.
+ */
+#include <pthread.h>
+#include "aolib.h"
+
+struct kconfig_t {
+ int _errno; /* the returned error if not supported */
+ int (*check_kconfig)(int *error);
+};
+
+static int has_net_ns(int *err)
+{
+ if (access("/proc/self/ns/net", F_OK) < 0) {
+ *err = errno;
+ if (errno == ENOENT)
+ return 0;
+ test_print("Unable to access /proc/self/ns/net: %m");
+ return -errno;
+ }
+ return *err = errno = 0;
+}
+
+static int has_veth(int *err)
+{
+ int orig_netns, ns_a, ns_b;
+
+ orig_netns = open_netns();
+ ns_a = unshare_open_netns();
+ ns_b = unshare_open_netns();
+
+ *err = add_veth("check_veth", ns_a, ns_b);
+
+ switch_ns(orig_netns);
+ close(orig_netns);
+ close(ns_a);
+ close(ns_b);
+ return 0;
+}
+
+static int has_tcp_ao(int *err)
+{
+ struct sockaddr_in addr = {
+ .sin_family = test_family,
+ };
+ struct tcp_ao_add tmp = {};
+ const char *password = DEFAULT_TEST_PASSWORD;
+ int sk, ret = 0;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0) {
+ test_print("socket(): %m");
+ return -errno;
+ }
+
+ tmp.sndid = 100;
+ tmp.rcvid = 100;
+ tmp.keylen = strlen(password);
+ memcpy(tmp.key, password, strlen(password));
+ strcpy(tmp.alg_name, "hmac(sha1)");
+ memcpy(&tmp.addr, &addr, sizeof(addr));
+ *err = 0;
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) {
+ *err = errno;
+ if (errno != ENOPROTOOPT)
+ ret = -errno;
+ }
+ close(sk);
+ return ret;
+}
+
+static int has_tcp_md5(int *err)
+{
+ union tcp_addr addr_any = {};
+ int sk, ret = 0;
+
+ sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0) {
+ test_print("socket(): %m");
+ return -errno;
+ }
+
+ /*
+ * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with
+ * anything more descriptive. Oh well.
+ */
+ *err = 0;
+ if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) {
+ *err = errno;
+ if (errno != ENOPROTOOPT && errno == ENOMEM) {
+ test_print("setsockopt(TCP_MD5SIG_EXT): %m");
+ ret = -errno;
+ }
+ }
+ close(sk);
+ return ret;
+}
+
+static int has_vrfs(int *err)
+{
+ int orig_netns, ns_test, ret = 0;
+
+ orig_netns = open_netns();
+ ns_test = unshare_open_netns();
+
+ *err = add_vrf("ksft-check", 55, 101, ns_test);
+ if (*err && *err != -EOPNOTSUPP) {
+ test_print("Failed to add a VRF: %d", *err);
+ ret = *err;
+ }
+
+ switch_ns(orig_netns);
+ close(orig_netns);
+ close(ns_test);
+ return ret;
+}
+
+static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER;
+static struct kconfig_t kconfig[__KCONFIG_LAST__] = {
+ { -1, has_net_ns },
+ { -1, has_veth },
+ { -1, has_tcp_ao },
+ { -1, has_tcp_md5 },
+ { -1, has_vrfs },
+};
+
+const char *tests_skip_reason[__KCONFIG_LAST__] = {
+ "Tests require network namespaces support (CONFIG_NET_NS)",
+ "Tests require veth support (CONFIG_VETH)",
+ "Tests require TCP-AO support (CONFIG_TCP_AO)",
+ "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)",
+ "VRFs are not supported (CONFIG_NET_VRF)",
+};
+
+bool kernel_config_has(enum test_needs_kconfig k)
+{
+ bool ret;
+
+ pthread_mutex_lock(&kconfig_lock);
+ if (kconfig[k]._errno == -1) {
+ if (kconfig[k].check_kconfig(&kconfig[k]._errno))
+ test_error("Failed to initialize kconfig %u", k);
+ }
+ ret = kconfig[k]._errno == 0;
+ pthread_mutex_unlock(&kconfig_lock);
+ return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
new file mode 100644
index 000000000000..7f108493a29a
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c
@@ -0,0 +1,413 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Original from tools/testing/selftests/net/ipsec.c */
+#include <linux/netlink.h>
+#include <linux/random.h>
+#include <linux/rtnetlink.h>
+#include <linux/veth.h>
+#include <net/if.h>
+#include <stdint.h>
+#include <string.h>
+#include <sys/socket.h>
+
+#include "aolib.h"
+
+#define MAX_PAYLOAD 2048
+
+static int netlink_sock(int *sock, uint32_t *seq_nr, int proto)
+{
+ if (*sock > 0) {
+ seq_nr++;
+ return 0;
+ }
+
+ *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto);
+ if (*sock < 0) {
+ test_print("socket(AF_NETLINK)");
+ return -1;
+ }
+
+ randomize_buffer(seq_nr, sizeof(*seq_nr));
+
+ return 0;
+}
+
+static int netlink_check_answer(int sock, bool quite)
+{
+ struct nlmsgerror {
+ struct nlmsghdr hdr;
+ int error;
+ struct nlmsghdr orig_msg;
+ } answer;
+
+ if (recv(sock, &answer, sizeof(answer), 0) < 0) {
+ test_print("recv()");
+ return -1;
+ } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) {
+ test_print("expected NLMSG_ERROR, got %d",
+ (int)answer.hdr.nlmsg_type);
+ return -1;
+ } else if (answer.error) {
+ if (!quite) {
+ test_print("NLMSG_ERROR: %d: %s",
+ answer.error, strerror(-answer.error));
+ }
+ return answer.error;
+ }
+
+ return 0;
+}
+
+static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh)
+{
+ return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len));
+}
+
+static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */
+ struct rtattr *attr = rtattr_hdr(nh);
+ size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size);
+
+ if (req_sz < nl_size) {
+ test_print("req buf is too small: %zu < %zu", req_sz, nl_size);
+ return -1;
+ }
+ nh->nlmsg_len = nl_size;
+
+ attr->rta_len = RTA_LENGTH(size);
+ attr->rta_type = rta_type;
+ memcpy(RTA_DATA(attr), payload, size);
+
+ return 0;
+}
+
+static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type, const void *payload, size_t size)
+{
+ struct rtattr *ret = rtattr_hdr(nh);
+
+ if (rtattr_pack(nh, req_sz, rta_type, payload, size))
+ return 0;
+
+ return ret;
+}
+
+static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz,
+ unsigned short rta_type)
+{
+ return _rtattr_begin(nh, req_sz, rta_type, 0, 0);
+}
+
+static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr)
+{
+ char *nlmsg_end = (char *)nh + nh->nlmsg_len;
+
+ attr->rta_len = nlmsg_end - (char *)attr;
+}
+
+static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz,
+ const char *peer, int ns)
+{
+ struct ifinfomsg pi;
+ struct rtattr *peer_attr;
+
+ memset(&pi, 0, sizeof(pi));
+ pi.ifi_family = AF_UNSPEC;
+ pi.ifi_change = 0xFFFFFFFF;
+
+ peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi));
+ if (!peer_attr)
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer)))
+ return -1;
+
+ if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns)))
+ return -1;
+
+ rtattr_end(nh, peer_attr);
+
+ return 0;
+}
+
+static int __add_veth(int sock, uint32_t seq, const char *name,
+ int ns_a, int ns_b)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ static const char veth_type[] = "veth";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ test_print("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock, false);
+}
+
+int add_veth(const char *name, int nsfda, int nsfdb)
+{
+ int route_sock = -1, ret;
+ uint32_t route_seq;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ test_error("Failed to open netlink route socket\n");
+
+ ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb);
+ close(route_sock);
+ return ret;
+}
+
+static int __ip_addr_add(int sock, uint32_t seq, const char *intf,
+ int family, union tcp_addr addr, uint8_t prefix)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifaddrmsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWADDR;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifa_family = family;
+ req.info.ifa_prefixlen = prefix;
+ req.info.ifa_index = if_nametoindex(intf);
+ req.info.ifa_flags = IFA_F_NODAD;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ test_print("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock, true);
+}
+
+int ip_addr_add(const char *intf, int family,
+ union tcp_addr addr, uint8_t prefix)
+{
+ int route_sock = -1, ret;
+ uint32_t route_seq;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ test_error("Failed to open netlink route socket\n");
+
+ ret = __ip_addr_add(route_sock, route_seq++, intf,
+ family, addr, prefix);
+
+ close(route_sock);
+ return ret;
+}
+
+static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family,
+ union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct rtmsg rt;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ unsigned int index = if_nametoindex(intf);
+ size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) :
+ sizeof(struct in6_addr);
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt));
+ req.nh.nlmsg_type = RTM_NEWROUTE;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE;
+ req.nh.nlmsg_seq = seq;
+ req.rt.rtm_family = family;
+ req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128;
+ req.rt.rtm_table = vrf;
+ req.rt.rtm_protocol = RTPROT_BOOT;
+ req.rt.rtm_scope = RT_SCOPE_UNIVERSE;
+ req.rt.rtm_type = RTN_UNICAST;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len))
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index)))
+ return -1;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ test_print("send()");
+ return -1;
+ }
+
+ return netlink_check_answer(sock, true);
+}
+
+int ip_route_add_vrf(const char *intf, int family,
+ union tcp_addr src, union tcp_addr dst, uint8_t vrf)
+{
+ int route_sock = -1, ret;
+ uint32_t route_seq;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ test_error("Failed to open netlink route socket\n");
+
+ ret = __ip_route_add(route_sock, route_seq++, intf,
+ family, src, dst, vrf);
+
+ close(route_sock);
+ return ret;
+}
+
+int ip_route_add(const char *intf, int family,
+ union tcp_addr src, union tcp_addr dst)
+{
+ return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN);
+}
+
+static int __link_set_up(int sock, uint32_t seq, const char *intf)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = if_nametoindex(intf);
+ req.info.ifi_flags = IFF_UP;
+ req.info.ifi_change = IFF_UP;
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ test_print("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock, false);
+}
+
+int link_set_up(const char *intf)
+{
+ int route_sock = -1, ret;
+ uint32_t route_seq;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ test_error("Failed to open netlink route socket\n");
+
+ ret = __link_set_up(route_sock, route_seq++, intf);
+
+ close(route_sock);
+ return ret;
+}
+
+static int __add_vrf(int sock, uint32_t seq, const char *name,
+ uint32_t tabid, int ifindex, int nsfd)
+{
+ uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE;
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg info;
+ char attrbuf[MAX_PAYLOAD];
+ } req;
+ static const char vrf_type[] = "vrf";
+ struct rtattr *link_info, *info_data;
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info));
+ req.nh.nlmsg_type = RTM_NEWLINK;
+ req.nh.nlmsg_flags = flags;
+ req.nh.nlmsg_seq = seq;
+ req.info.ifi_family = AF_UNSPEC;
+ req.info.ifi_change = 0xFFFFFFFF;
+ req.info.ifi_index = ifindex;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name)))
+ return -1;
+
+ if (nsfd >= 0)
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD,
+ &nsfd, sizeof(nsfd)))
+ return -1;
+
+ link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO);
+ if (!link_info)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type)))
+ return -1;
+
+ info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA);
+ if (!info_data)
+ return -1;
+
+ if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE,
+ &tabid, sizeof(tabid)))
+ return -1;
+
+ rtattr_end(&req.nh, info_data);
+ rtattr_end(&req.nh, link_info);
+
+ if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) {
+ test_print("send()");
+ return -1;
+ }
+ return netlink_check_answer(sock, true);
+}
+
+int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd)
+{
+ int route_sock = -1, ret;
+ uint32_t route_seq;
+
+ if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE))
+ test_error("Failed to open netlink route socket\n");
+
+ ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd);
+ close(route_sock);
+ return ret;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c
new file mode 100644
index 000000000000..2fb6dd8adba6
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <inttypes.h>
+#include <pthread.h>
+#include <stdio.h>
+#include "../../../../../include/linux/compiler.h"
+#include "../../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+struct netstat_counter {
+ uint64_t val;
+ char *name;
+};
+
+struct netstat {
+ char *header_name;
+ struct netstat *next;
+ size_t counters_nr;
+ struct netstat_counter *counters;
+};
+
+static struct netstat *lookup_type(struct netstat *ns,
+ const char *type, size_t len)
+{
+ while (ns != NULL) {
+ size_t cmp = max(len, strlen(ns->header_name));
+
+ if (!strncmp(ns->header_name, type, cmp))
+ return ns;
+ ns = ns->next;
+ }
+ return NULL;
+}
+
+static struct netstat *lookup_get(struct netstat *ns,
+ const char *type, const size_t len)
+{
+ struct netstat *ret;
+
+ ret = lookup_type(ns, type, len);
+ if (ret != NULL)
+ return ret;
+
+ ret = malloc(sizeof(struct netstat));
+ if (!ret)
+ test_error("malloc()");
+
+ ret->header_name = strndup(type, len);
+ if (ret->header_name == NULL)
+ test_error("strndup()");
+ ret->next = ns;
+ ret->counters_nr = 0;
+ ret->counters = NULL;
+
+ return ret;
+}
+
+static struct netstat *lookup_get_column(struct netstat *ns, const char *line)
+{
+ char *column;
+
+ column = strchr(line, ':');
+ if (!column)
+ test_error("can't parse netstat file");
+
+ return lookup_get(ns, line, column - line);
+}
+
+static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line)
+{
+ struct netstat *type = lookup_get_column(*dest, line);
+ const char *pos = line;
+ size_t i, nr_elems = 0;
+ char tmp;
+
+ while ((pos = strchr(pos, ' '))) {
+ nr_elems++;
+ pos++;
+ }
+
+ *dest = type;
+ type->counters = reallocarray(type->counters,
+ type->counters_nr + nr_elems,
+ sizeof(struct netstat_counter));
+ if (!type->counters)
+ test_error("reallocarray()");
+
+ pos = strchr(line, ' ') + 1;
+
+ if (fscanf(fnetstat, type->header_name) == EOF)
+ test_error("fscanf(%s)", type->header_name);
+ if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':')
+ test_error("Unexpected netstat format (%c)", tmp);
+
+ for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) {
+ struct netstat_counter *nc = &type->counters[i];
+ const char *new_pos = strchr(pos, ' ');
+ const char *fmt = " %" PRIu64;
+
+ if (new_pos == NULL)
+ new_pos = strchr(pos, '\n');
+
+ nc->name = strndup(pos, new_pos - pos);
+ if (nc->name == NULL)
+ test_error("strndup()");
+
+ if (unlikely(!strcmp(nc->name, "MaxConn")))
+ fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */
+ if (fscanf(fnetstat, fmt, &nc->val) != 1)
+ test_error("fscanf(%s)", nc->name);
+ pos = new_pos + 1;
+ }
+ type->counters_nr += nr_elems;
+
+ if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n')
+ test_error("Unexpected netstat format");
+}
+
+static const char *snmp6_name = "Snmp6";
+static void snmp6_read(FILE *fnetstat, struct netstat **dest)
+{
+ struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name));
+ char *counter_name;
+ size_t i;
+
+ for (i = type->counters_nr;; i++) {
+ struct netstat_counter *nc;
+ uint64_t counter;
+
+ if (fscanf(fnetstat, "%ms", &counter_name) == EOF)
+ break;
+ if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF)
+ test_error("Unexpected snmp6 format");
+ type->counters = reallocarray(type->counters, i + 1,
+ sizeof(struct netstat_counter));
+ if (!type->counters)
+ test_error("reallocarray()");
+ nc = &type->counters[i];
+ nc->name = counter_name;
+ nc->val = counter;
+ }
+ type->counters_nr = i;
+ *dest = type;
+}
+
+struct netstat *netstat_read(void)
+{
+ struct netstat *ret = 0;
+ size_t line_sz = 0;
+ char *line = NULL;
+ FILE *fnetstat;
+
+ /*
+ * Opening thread-self instead of /proc/net/... as the latter
+ * points to /proc/self/net/ which instantiates thread-leader's
+ * net-ns, see:
+ * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..")
+ */
+ errno = 0;
+ fnetstat = fopen("/proc/thread-self/net/netstat", "r");
+ if (fnetstat == NULL)
+ test_error("failed to open /proc/net/netstat");
+
+ while (getline(&line, &line_sz, fnetstat) != -1)
+ netstat_read_type(fnetstat, &ret, line);
+ fclose(fnetstat);
+
+ errno = 0;
+ fnetstat = fopen("/proc/thread-self/net/snmp", "r");
+ if (fnetstat == NULL)
+ test_error("failed to open /proc/net/snmp");
+
+ while (getline(&line, &line_sz, fnetstat) != -1)
+ netstat_read_type(fnetstat, &ret, line);
+ fclose(fnetstat);
+
+ errno = 0;
+ fnetstat = fopen("/proc/thread-self/net/snmp6", "r");
+ if (fnetstat == NULL)
+ test_error("failed to open /proc/net/snmp6");
+
+ snmp6_read(fnetstat, &ret);
+ fclose(fnetstat);
+
+ free(line);
+ return ret;
+}
+
+void netstat_free(struct netstat *ns)
+{
+ while (ns != NULL) {
+ struct netstat *prev = ns;
+ size_t i;
+
+ free(ns->header_name);
+ for (i = 0; i < ns->counters_nr; i++)
+ free(ns->counters[i].name);
+ free(ns->counters);
+ ns = ns->next;
+ free(prev);
+ }
+}
+
+static inline void
+__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i)
+{
+ if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) {
+ test_print("%8s %25s: %" PRId64 " => %" PRId64,
+ nsb->header_name, nsb->counters[i].name,
+ a, nsb->counters[i].val);
+ return;
+ }
+
+ test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name,
+ nsb->counters[i].name, a, nsb->counters[i].val);
+}
+
+void netstat_print_diff(struct netstat *nsa, struct netstat *nsb)
+{
+ size_t i, j;
+
+ while (nsb != NULL) {
+ if (unlikely(strcmp(nsb->header_name, nsa->header_name))) {
+ for (i = 0; i < nsb->counters_nr; i++)
+ __netstat_print_diff(0, nsb, i);
+ nsb = nsb->next;
+ continue;
+ }
+
+ if (nsb->counters_nr < nsa->counters_nr)
+ test_error("Unexpected: some counters disappeared!");
+
+ for (j = 0, i = 0; i < nsb->counters_nr; i++) {
+ if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) {
+ __netstat_print_diff(0, nsb, i);
+ continue;
+ }
+
+ if (nsa->counters[j].val == nsb->counters[i].val) {
+ j++;
+ continue;
+ }
+
+ __netstat_print_diff(nsa->counters[j].val, nsb, i);
+ j++;
+ }
+ if (j != nsa->counters_nr)
+ test_error("Unexpected: some counters disappeared!");
+
+ nsb = nsb->next;
+ nsa = nsa->next;
+ }
+}
+
+uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found)
+{
+ if (not_found)
+ *not_found = false;
+
+ while (ns != NULL) {
+ size_t i;
+
+ for (i = 0; i < ns->counters_nr; i++) {
+ if (!strcmp(name, ns->counters[i].name))
+ return ns->counters[i].val;
+ }
+
+ ns = ns->next;
+ }
+
+ if (not_found)
+ *not_found = true;
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c
new file mode 100644
index 000000000000..9893b3ba69f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <fcntl.h>
+#include <linux/sockios.h>
+#include <sys/ioctl.h>
+#include "aolib.h"
+
+#ifndef TCPOPT_MAXSEG
+# define TCPOPT_MAXSEG 2
+#endif
+#ifndef TCPOPT_WINDOW
+# define TCPOPT_WINDOW 3
+#endif
+#ifndef TCPOPT_SACK_PERMITTED
+# define TCPOPT_SACK_PERMITTED 4
+#endif
+#ifndef TCPOPT_TIMESTAMP
+# define TCPOPT_TIMESTAMP 8
+#endif
+
+enum {
+ TCP_ESTABLISHED = 1,
+ TCP_SYN_SENT,
+ TCP_SYN_RECV,
+ TCP_FIN_WAIT1,
+ TCP_FIN_WAIT2,
+ TCP_TIME_WAIT,
+ TCP_CLOSE,
+ TCP_CLOSE_WAIT,
+ TCP_LAST_ACK,
+ TCP_LISTEN,
+ TCP_CLOSING, /* Now a valid state */
+ TCP_NEW_SYN_RECV,
+
+ TCP_MAX_STATES /* Leave at the end! */
+};
+
+static void test_sock_checkpoint_queue(int sk, int queue, int qlen,
+ struct tcp_sock_queue *q)
+{
+ socklen_t len;
+ int ret;
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+ test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+ len = sizeof(q->seq);
+ ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len);
+ if (ret || len != sizeof(q->seq))
+ test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len);
+
+ if (!qlen) {
+ q->buf = NULL;
+ return;
+ }
+
+ q->buf = malloc(qlen);
+ if (q->buf == NULL)
+ test_error("malloc()");
+ ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT);
+ if (ret != qlen)
+ test_error("recv(%d): %d", qlen, ret);
+}
+
+void __test_sock_checkpoint(int sk, struct tcp_sock_state *state,
+ void *addr, size_t addr_size)
+{
+ socklen_t len = sizeof(state->info);
+ int ret;
+
+ memset(state, 0, sizeof(*state));
+
+ ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len);
+ if (ret || len != sizeof(state->info))
+ test_error("getsockopt(TCP_INFO): %d", (int)len);
+
+ len = addr_size;
+ if (getsockname(sk, addr, &len) || len != addr_size)
+ test_error("getsockname(): %d", (int)len);
+
+ len = sizeof(state->trw);
+ ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len);
+ if (ret || len != sizeof(state->trw))
+ test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len);
+
+ if (ioctl(sk, SIOCOUTQ, &state->outq_len))
+ test_error("ioctl(SIOCOUTQ)");
+
+ if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len))
+ test_error("ioctl(SIOCOUTQNSD)");
+ test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out);
+
+ if (ioctl(sk, SIOCINQ, &state->inq_len))
+ test_error("ioctl(SIOCINQ)");
+ test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in);
+
+ if (state->info.tcpi_state == TCP_CLOSE)
+ state->outq_len = state->outq_nsd_len = 0;
+
+ len = sizeof(state->mss);
+ ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len);
+ if (ret || len != sizeof(state->mss))
+ test_error("getsockopt(TCP_MAXSEG): %d", (int)len);
+
+ len = sizeof(state->timestamp);
+ ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len);
+ if (ret || len != sizeof(state->timestamp))
+ test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len);
+}
+
+void test_ao_checkpoint(int sk, struct tcp_ao_repair *state)
+{
+ socklen_t len = sizeof(*state);
+ int ret;
+
+ memset(state, 0, sizeof(*state));
+
+ ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len);
+ if (ret || len != sizeof(*state))
+ test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len);
+}
+
+static void test_sock_restore_seq(int sk, int queue, uint32_t seq)
+{
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+ test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+ if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq)))
+ test_error("setsockopt(TCP_QUEUE_SEQ)");
+}
+
+static void test_sock_restore_queue(int sk, int queue, void *buf, int len)
+{
+ int chunk = len;
+ size_t off = 0;
+
+ if (len == 0)
+ return;
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue)))
+ test_error("setsockopt(TCP_REPAIR_QUEUE)");
+
+ do {
+ int ret;
+
+ ret = send(sk, buf + off, chunk, 0);
+ if (ret <= 0) {
+ if (chunk > 1024) {
+ chunk >>= 1;
+ continue;
+ }
+ test_error("send()");
+ }
+ off += ret;
+ len -= ret;
+ } while (len > 0);
+}
+
+void __test_sock_restore(int sk, const char *device,
+ struct tcp_sock_state *state,
+ void *saddr, void *daddr, size_t addr_size)
+{
+ struct tcp_repair_opt opts[4];
+ unsigned int opt_nr = 0;
+ long flags;
+
+ if (bind(sk, saddr, addr_size))
+ test_error("bind()");
+
+ flags = fcntl(sk, F_GETFL);
+ if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+ test_error("fcntl()");
+
+ test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len);
+ test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len);
+
+ if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE,
+ device, strlen(device) + 1))
+ test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+
+ if (connect(sk, daddr, addr_size))
+ test_error("connect()");
+
+ if (state->info.tcpi_options & TCPI_OPT_SACK) {
+ opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED;
+ opts[opt_nr].opt_val = 0;
+ opt_nr++;
+ }
+ if (state->info.tcpi_options & TCPI_OPT_WSCALE) {
+ opts[opt_nr].opt_code = TCPOPT_WINDOW;
+ opts[opt_nr].opt_val = state->info.tcpi_snd_wscale +
+ (state->info.tcpi_rcv_wscale << 16);
+ opt_nr++;
+ }
+ if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+ opts[opt_nr].opt_code = TCPOPT_TIMESTAMP;
+ opts[opt_nr].opt_val = 0;
+ opt_nr++;
+ }
+ opts[opt_nr].opt_code = TCPOPT_MAXSEG;
+ opts[opt_nr].opt_val = state->mss;
+ opt_nr++;
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0])))
+ test_error("setsockopt(TCP_REPAIR_OPTIONS)");
+
+ if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) {
+ if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP,
+ &state->timestamp, opt_nr * sizeof(opts[0])))
+ test_error("setsockopt(TCP_TIMESTAMP)");
+ }
+ test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len);
+ test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len);
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw)))
+ test_error("setsockopt(TCP_REPAIR_WINDOW)");
+}
+
+void test_ao_restore(int sk, struct tcp_ao_repair *state)
+{
+ if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state)))
+ test_error("setsockopt(TCP_AO_REPAIR)");
+}
+
+void test_sock_state_free(struct tcp_sock_state *state)
+{
+ free(state->out.buf);
+ free(state->in.buf);
+}
+
+void test_enable_repair(int sk)
+{
+ int val = TCP_REPAIR_ON;
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+ test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_disable_repair(int sk)
+{
+ int val = TCP_REPAIR_OFF_NO_WP;
+
+ if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val)))
+ test_error("setsockopt(TCP_REPAIR)");
+}
+
+void test_kill_sk(int sk)
+{
+ test_enable_repair(sk);
+ close(sk);
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c
new file mode 100644
index 000000000000..92276f916f2f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c
@@ -0,0 +1,361 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <fcntl.h>
+#include <pthread.h>
+#include <sched.h>
+#include <signal.h>
+#include "aolib.h"
+
+/*
+ * Can't be included in the header: it defines static variables which
+ * will be unique to every object. Let's include it only once here.
+ */
+#include "../../../kselftest.h"
+
+/* Prevent overriding of one thread's output by another */
+static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER;
+
+void __test_msg(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_print_msg(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_ok(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_test_result_pass(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_fail(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_test_result_fail(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_xfail(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_test_result_xfail(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_error(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_test_result_error(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+void __test_skip(const char *buf)
+{
+ pthread_mutex_lock(&ksft_print_lock);
+ ksft_test_result_skip(buf);
+ pthread_mutex_unlock(&ksft_print_lock);
+}
+
+static volatile int failed;
+static volatile int skipped;
+
+void test_failed(void)
+{
+ failed = 1;
+}
+
+static void test_exit(void)
+{
+ if (failed) {
+ ksft_exit_fail();
+ } else if (skipped) {
+ /* ksft_exit_skip() is different from ksft_exit_*() */
+ ksft_print_cnts();
+ exit(KSFT_SKIP);
+ } else {
+ ksft_exit_pass();
+ }
+}
+
+struct dlist_t {
+ void (*destruct)(void);
+ struct dlist_t *next;
+};
+static struct dlist_t *destructors_list;
+
+void test_add_destructor(void (*d)(void))
+{
+ struct dlist_t *p;
+
+ p = malloc(sizeof(struct dlist_t));
+ if (p == NULL)
+ test_error("malloc() failed");
+
+ p->next = destructors_list;
+ p->destruct = d;
+ destructors_list = p;
+}
+
+static void test_destructor(void) __attribute__((destructor));
+static void test_destructor(void)
+{
+ while (destructors_list) {
+ struct dlist_t *p = destructors_list->next;
+
+ destructors_list->destruct();
+ free(destructors_list);
+ destructors_list = p;
+ }
+ test_exit();
+}
+
+static void sig_int(int signo)
+{
+ test_error("Caught SIGINT - exiting");
+}
+
+int open_netns(void)
+{
+ const char *netns_path = "/proc/self/ns/net";
+ int fd;
+
+ fd = open(netns_path, O_RDONLY);
+ if (fd < 0)
+ test_error("open(%s)", netns_path);
+ return fd;
+}
+
+int unshare_open_netns(void)
+{
+ if (unshare(CLONE_NEWNET) != 0)
+ test_error("unshare()");
+
+ return open_netns();
+}
+
+void switch_ns(int fd)
+{
+ if (setns(fd, CLONE_NEWNET))
+ test_error("setns()");
+}
+
+int switch_save_ns(int new_ns)
+{
+ int ret = open_netns();
+
+ switch_ns(new_ns);
+ return ret;
+}
+
+static int nsfd_outside = -1;
+static int nsfd_parent = -1;
+static int nsfd_child = -1;
+const char veth_name[] = "ktst-veth";
+
+static void init_namespaces(void)
+{
+ nsfd_outside = open_netns();
+ nsfd_parent = unshare_open_netns();
+ nsfd_child = unshare_open_netns();
+}
+
+static void link_init(const char *veth, int family, uint8_t prefix,
+ union tcp_addr addr, union tcp_addr dest)
+{
+ if (link_set_up(veth))
+ test_error("Failed to set link up");
+ if (ip_addr_add(veth, family, addr, prefix))
+ test_error("Failed to add ip address");
+ if (ip_route_add(veth, family, addr, dest))
+ test_error("Failed to add route");
+}
+
+static unsigned int nr_threads = 1;
+
+static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER;
+static volatile unsigned int stage_threads[2];
+static volatile unsigned int stage_nr;
+
+/* synchronize all threads in the same stage */
+void synchronize_threads(void)
+{
+ unsigned int q = stage_nr;
+
+ pthread_mutex_lock(&sync_lock);
+ stage_threads[q]++;
+ if (stage_threads[q] == nr_threads) {
+ stage_nr ^= 1;
+ stage_threads[stage_nr] = 0;
+ pthread_cond_signal(&sync_cond);
+ }
+ while (stage_threads[q] < nr_threads)
+ pthread_cond_wait(&sync_cond, &sync_lock);
+ pthread_mutex_unlock(&sync_lock);
+}
+
+__thread union tcp_addr this_ip_addr;
+__thread union tcp_addr this_ip_dest;
+int test_family;
+
+struct new_pthread_arg {
+ thread_fn func;
+ union tcp_addr my_ip;
+ union tcp_addr dest_ip;
+};
+static void *new_pthread_entry(void *arg)
+{
+ struct new_pthread_arg *p = arg;
+
+ this_ip_addr = p->my_ip;
+ this_ip_dest = p->dest_ip;
+ p->func(NULL); /* shouldn't return */
+ exit(KSFT_FAIL);
+}
+
+static void __test_skip_all(const char *msg)
+{
+ ksft_set_plan(1);
+ ksft_print_header();
+ skipped = 1;
+ test_skip("%s", msg);
+ exit(KSFT_SKIP);
+}
+
+void __test_init(unsigned int ntests, int family, unsigned int prefix,
+ union tcp_addr addr1, union tcp_addr addr2,
+ thread_fn peer1, thread_fn peer2)
+{
+ struct sigaction sa = {
+ .sa_handler = sig_int,
+ .sa_flags = SA_RESTART,
+ };
+ time_t seed = time(NULL);
+
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(SIGINT, &sa, NULL))
+ test_error("Can't set SIGINT handler");
+
+ test_family = family;
+ if (!kernel_config_has(KCONFIG_NET_NS))
+ __test_skip_all(tests_skip_reason[KCONFIG_NET_NS]);
+ if (!kernel_config_has(KCONFIG_VETH))
+ __test_skip_all(tests_skip_reason[KCONFIG_VETH]);
+ if (!kernel_config_has(KCONFIG_TCP_AO))
+ __test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]);
+
+ ksft_set_plan(ntests);
+ test_print("rand seed %u", (unsigned int)seed);
+ srand(seed);
+
+
+ ksft_print_header();
+ init_namespaces();
+
+ if (add_veth(veth_name, nsfd_parent, nsfd_child))
+ test_error("Failed to add veth");
+
+ switch_ns(nsfd_child);
+ link_init(veth_name, family, prefix, addr2, addr1);
+ if (peer2) {
+ struct new_pthread_arg targ;
+ pthread_t t;
+
+ targ.my_ip = addr2;
+ targ.dest_ip = addr1;
+ targ.func = peer2;
+ nr_threads++;
+ if (pthread_create(&t, NULL, new_pthread_entry, &targ))
+ test_error("Failed to create pthread");
+ }
+ switch_ns(nsfd_parent);
+ link_init(veth_name, family, prefix, addr1, addr2);
+
+ this_ip_addr = addr1;
+ this_ip_dest = addr2;
+ peer1(NULL);
+ if (failed)
+ exit(KSFT_FAIL);
+ else
+ exit(KSFT_PASS);
+}
+
+/* /proc/sys/net/core/optmem_max artifically limits the amount of memory
+ * that can be allocated with sock_kmalloc() on each socket in the system.
+ * It is not virtualized in v6.7, so it has to written outside test
+ * namespaces. To be nice a test will revert optmem back to the old value.
+ * Keeping it simple without any file lock, which means the tests that
+ * need to set/increase optmem value shouldn't run in parallel.
+ * Also, not re-entrant.
+ * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max")
+ * it is per-namespace, keeping logic for non-virtualized optmem_max
+ * for v6.7, which supports TCP-AO.
+ */
+static const char *optmem_file = "/proc/sys/net/core/optmem_max";
+static size_t saved_optmem;
+static int optmem_ns = -1;
+
+static bool is_optmem_namespaced(void)
+{
+ if (optmem_ns == -1) {
+ int old_ns = switch_save_ns(nsfd_child);
+
+ optmem_ns = !access(optmem_file, F_OK);
+ switch_ns(old_ns);
+ }
+ return !!optmem_ns;
+}
+
+size_t test_get_optmem(void)
+{
+ int old_ns = 0;
+ FILE *foptmem;
+ size_t ret;
+
+ if (!is_optmem_namespaced())
+ old_ns = switch_save_ns(nsfd_outside);
+ foptmem = fopen(optmem_file, "r");
+ if (!foptmem)
+ test_error("failed to open %s", optmem_file);
+
+ if (fscanf(foptmem, "%zu", &ret) != 1)
+ test_error("can't read from %s", optmem_file);
+ fclose(foptmem);
+ if (!is_optmem_namespaced())
+ switch_ns(old_ns);
+ return ret;
+}
+
+static void __test_set_optmem(size_t new, size_t *old)
+{
+ int old_ns = 0;
+ FILE *foptmem;
+
+ if (old != NULL)
+ *old = test_get_optmem();
+
+ if (!is_optmem_namespaced())
+ old_ns = switch_save_ns(nsfd_outside);
+ foptmem = fopen(optmem_file, "w");
+ if (!foptmem)
+ test_error("failed to open %s", optmem_file);
+
+ if (fprintf(foptmem, "%zu", new) <= 0)
+ test_error("can't write %zu to %s", new, optmem_file);
+ fclose(foptmem);
+ if (!is_optmem_namespaced())
+ switch_ns(old_ns);
+}
+
+static void test_revert_optmem(void)
+{
+ if (saved_optmem == 0)
+ return;
+
+ __test_set_optmem(saved_optmem, NULL);
+}
+
+void test_set_optmem(size_t value)
+{
+ if (saved_optmem == 0) {
+ __test_set_optmem(value, &saved_optmem);
+ test_add_destructor(test_revert_optmem);
+ } else {
+ __test_set_optmem(value, NULL);
+ }
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c
new file mode 100644
index 000000000000..15aeb0963058
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c
@@ -0,0 +1,596 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <alloca.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <string.h>
+#include "../../../../../include/linux/kernel.h"
+#include "../../../../../include/linux/stringify.h"
+#include "aolib.h"
+
+const unsigned int test_server_port = 7010;
+int __test_listen_socket(int backlog, void *addr, size_t addr_sz)
+{
+ int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ long flags;
+
+ if (sk < 0)
+ test_error("socket()");
+
+ err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name,
+ strlen(veth_name) + 1);
+ if (err < 0)
+ test_error("setsockopt(SO_BINDTODEVICE)");
+
+ if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0)
+ test_error("bind()");
+
+ flags = fcntl(sk, F_GETFL);
+ if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+ test_error("fcntl()");
+
+ if (listen(sk, backlog))
+ test_error("listen()");
+
+ return sk;
+}
+
+int test_wait_fd(int sk, time_t sec, bool write)
+{
+ struct timeval tv = { .tv_sec = sec };
+ struct timeval *ptv = NULL;
+ fd_set fds, efds;
+ int ret;
+ socklen_t slen = sizeof(ret);
+
+ FD_ZERO(&fds);
+ FD_SET(sk, &fds);
+ FD_ZERO(&efds);
+ FD_SET(sk, &efds);
+
+ if (sec)
+ ptv = &tv;
+
+ errno = 0;
+ if (write)
+ ret = select(sk + 1, NULL, &fds, &efds, ptv);
+ else
+ ret = select(sk + 1, &fds, NULL, &efds, ptv);
+ if (ret < 0)
+ return -errno;
+ if (ret == 0) {
+ errno = ETIMEDOUT;
+ return -ETIMEDOUT;
+ }
+
+ if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen))
+ return -errno;
+ if (ret)
+ return -ret;
+ return 0;
+}
+
+int __test_connect_socket(int sk, const char *device,
+ void *addr, size_t addr_sz, time_t timeout)
+{
+ long flags;
+ int err;
+
+ if (device != NULL) {
+ err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device,
+ strlen(device) + 1);
+ if (err < 0)
+ test_error("setsockopt(SO_BINDTODEVICE, %s)", device);
+ }
+
+ if (!timeout) {
+ err = connect(sk, addr, addr_sz);
+ if (err) {
+ err = -errno;
+ goto out;
+ }
+ return 0;
+ }
+
+ flags = fcntl(sk, F_GETFL);
+ if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0))
+ test_error("fcntl()");
+
+ if (connect(sk, addr, addr_sz) < 0) {
+ if (errno != EINPROGRESS) {
+ err = -errno;
+ goto out;
+ }
+ if (timeout < 0)
+ return sk;
+ err = test_wait_fd(sk, timeout, 1);
+ if (err)
+ goto out;
+ }
+ return sk;
+
+out:
+ close(sk);
+ return err;
+}
+
+int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix,
+ int vrf, const char *password)
+{
+ size_t pwd_len = strlen(password);
+ struct tcp_md5sig md5sig = {};
+
+ md5sig.tcpm_keylen = pwd_len;
+ memcpy(md5sig.tcpm_key, password, pwd_len);
+ md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX;
+ md5sig.tcpm_prefixlen = prefix;
+ if (vrf >= 0) {
+ md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX;
+ md5sig.tcpm_ifindex = (uint8_t)vrf;
+ }
+ memcpy(&md5sig.tcpm_addr, addr, addr_sz);
+
+ errno = 0;
+ return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT,
+ &md5sig, sizeof(md5sig));
+}
+
+
+int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg,
+ void *addr, size_t addr_sz, bool set_current, bool set_rnext,
+ uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid,
+ uint8_t maclen, uint8_t keyflags,
+ uint8_t keylen, const char *key)
+{
+ memset(ao, 0, sizeof(struct tcp_ao_add));
+
+ ao->set_current = !!set_current;
+ ao->set_rnext = !!set_rnext;
+ ao->prefix = prefix;
+ ao->sndid = sndid;
+ ao->rcvid = rcvid;
+ ao->maclen = maclen;
+ ao->keyflags = keyflags;
+ ao->keylen = keylen;
+ ao->ifindex = vrf;
+
+ memcpy(&ao->addr, addr, addr_sz);
+
+ if (strlen(alg) > 64)
+ return -ENOBUFS;
+ strncpy(ao->alg_name, alg, 64);
+
+ memcpy(ao->key, key,
+ (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen);
+ return 0;
+}
+
+static int test_get_ao_keys_nr(int sk)
+{
+ struct tcp_ao_getsockopt tmp = {};
+ socklen_t tmp_sz = sizeof(tmp);
+ int ret;
+
+ tmp.nkeys = 1;
+ tmp.get_all = 1;
+
+ ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+ if (ret)
+ return -errno;
+ return (int)tmp.nkeys;
+}
+
+int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out,
+ void *addr, size_t addr_sz, uint8_t prefix,
+ uint8_t sndid, uint8_t rcvid)
+{
+ struct tcp_ao_getsockopt tmp = {};
+ socklen_t tmp_sz = sizeof(tmp);
+ int ret;
+
+ memcpy(&tmp.addr, addr, addr_sz);
+ tmp.prefix = prefix;
+ tmp.sndid = sndid;
+ tmp.rcvid = rcvid;
+ tmp.nkeys = 1;
+
+ ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz);
+ if (ret)
+ return ret;
+ if (tmp.nkeys != 1)
+ return -E2BIG;
+ *out = tmp;
+ return 0;
+}
+
+int test_get_ao_info(int sk, struct tcp_ao_info_opt *out)
+{
+ socklen_t sz = sizeof(*out);
+
+ out->reserved = 0;
+ out->reserved2 = 0;
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz))
+ return -errno;
+ if (sz != sizeof(*out))
+ return -EMSGSIZE;
+ return 0;
+}
+
+int test_set_ao_info(int sk, struct tcp_ao_info_opt *in)
+{
+ socklen_t sz = sizeof(*in);
+
+ in->reserved = 0;
+ in->reserved2 = 0;
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz))
+ return -errno;
+ return 0;
+}
+
+int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a,
+ const struct tcp_ao_getsockopt *b)
+{
+ bool is_kdf_aes_128_cmac = false;
+ bool is_cmac_aes = false;
+
+ if (!strcmp("cmac(aes128)", a->alg_name)) {
+ is_kdf_aes_128_cmac = (a->keylen != 16);
+ is_cmac_aes = true;
+ }
+
+#define __cmp_ao(member) \
+do { \
+ if (b->member != a->member) { \
+ test_fail("getsockopt(): " __stringify(member) " %u != %u", \
+ b->member, a->member); \
+ return -1; \
+ } \
+} while(0)
+ __cmp_ao(sndid);
+ __cmp_ao(rcvid);
+ __cmp_ao(prefix);
+ __cmp_ao(keyflags);
+ __cmp_ao(ifindex);
+ if (a->maclen) {
+ __cmp_ao(maclen);
+ } else if (b->maclen != 12) {
+ test_fail("getsockopt(): expected default maclen 12, but it's %u",
+ b->maclen);
+ return -1;
+ }
+ if (!is_kdf_aes_128_cmac) {
+ __cmp_ao(keylen);
+ } else if (b->keylen != 16) {
+ test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u",
+ b->keylen);
+ return -1;
+ }
+#undef __cmp_ao
+ if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) {
+ test_fail("getsockopt(): returned key is different `%s' != `%s'",
+ b->key, a->key);
+ return -1;
+ }
+ if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) {
+ test_fail("getsockopt(): returned address is different");
+ return -1;
+ }
+ if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) {
+ test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name);
+ return -1;
+ }
+ if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) {
+ test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name);
+ return -1;
+ }
+ /* For a established key rotation test don't add a key with
+ * set_current = 1, as it's likely to change by peer's request;
+ * rather use setsockopt(TCP_AO_INFO)
+ */
+ if (a->set_current != b->is_current) {
+ test_fail("getsockopt(): returned key is not Current_key");
+ return -1;
+ }
+ if (a->set_rnext != b->is_rnext) {
+ test_fail("getsockopt(): returned key is not RNext_key");
+ return -1;
+ }
+
+ return 0;
+}
+
+int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a,
+ const struct tcp_ao_info_opt *b)
+{
+ /* No check for ::current_key, as it may change by the peer */
+ if (a->ao_required != b->ao_required) {
+ test_fail("getsockopt(): returned ao doesn't have ao_required");
+ return -1;
+ }
+ if (a->accept_icmps != b->accept_icmps) {
+ test_fail("getsockopt(): returned ao doesn't accept ICMPs");
+ return -1;
+ }
+ if (a->set_rnext && a->rnext != b->rnext) {
+ test_fail("getsockopt(): RNext KeyID has changed");
+ return -1;
+ }
+#define __cmp_cnt(member) \
+do { \
+ if (b->member != a->member) { \
+ test_fail("getsockopt(): " __stringify(member) " %llu != %llu", \
+ b->member, a->member); \
+ return -1; \
+ } \
+} while(0)
+ if (a->set_counters) {
+ __cmp_cnt(pkt_good);
+ __cmp_cnt(pkt_bad);
+ __cmp_cnt(pkt_key_not_found);
+ __cmp_cnt(pkt_ao_required);
+ __cmp_cnt(pkt_dropped_icmp);
+ }
+#undef __cmp_cnt
+ return 0;
+}
+
+int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out)
+{
+ struct tcp_ao_getsockopt *key_dump;
+ socklen_t key_dump_sz = sizeof(*key_dump);
+ struct tcp_ao_info_opt info = {};
+ bool c1, c2, c3, c4, c5;
+ struct netstat *ns;
+ int err, nr_keys;
+
+ memset(out, 0, sizeof(*out));
+
+ /* per-netns */
+ ns = netstat_read();
+ out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1);
+ out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2);
+ out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3);
+ out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4);
+ out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5);
+ netstat_free(ns);
+ if (c1 || c2 || c3 || c4 || c5)
+ return -EOPNOTSUPP;
+
+ err = test_get_ao_info(sk, &info);
+ if (err)
+ return err;
+
+ /* per-socket */
+ out->ao_info_pkt_good = info.pkt_good;
+ out->ao_info_pkt_bad = info.pkt_bad;
+ out->ao_info_pkt_key_not_found = info.pkt_key_not_found;
+ out->ao_info_pkt_ao_required = info.pkt_ao_required;
+ out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp;
+
+ /* per-key */
+ nr_keys = test_get_ao_keys_nr(sk);
+ if (nr_keys < 0)
+ return nr_keys;
+ if (nr_keys == 0)
+ test_error("test_get_ao_keys_nr() == 0");
+ out->nr_keys = (size_t)nr_keys;
+ key_dump = calloc(nr_keys, key_dump_sz);
+ if (!key_dump)
+ return -errno;
+
+ key_dump[0].nkeys = nr_keys;
+ key_dump[0].get_all = 1;
+ key_dump[0].get_all = 1;
+ err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS,
+ key_dump, &key_dump_sz);
+ if (err) {
+ free(key_dump);
+ return -errno;
+ }
+
+ out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0]));
+ if (!out->key_cnts) {
+ free(key_dump);
+ return -errno;
+ }
+
+ while (nr_keys--) {
+ out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid;
+ out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid;
+ out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good;
+ out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad;
+ }
+ free(key_dump);
+
+ return 0;
+}
+
+int __test_tcp_ao_counters_cmp(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected)
+{
+#define __cmp_ao(cnt, expecting_inc) \
+do { \
+ if (before->cnt > after->cnt) { \
+ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \
+ tst_name ?: "", before->cnt, after->cnt); \
+ return -1; \
+ } \
+ if ((before->cnt != after->cnt) != (expecting_inc)) { \
+ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \
+ tst_name ?: "", (expecting_inc) ? "" : "not ", \
+ before->cnt, after->cnt); \
+ return -1; \
+ } \
+} while(0)
+
+ errno = 0;
+ /* per-netns */
+ __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD));
+ __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD));
+ __cmp_ao(netns_ao_key_not_found,
+ !!(expected & TEST_CNT_NS_KEY_NOT_FOUND));
+ __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED));
+ __cmp_ao(netns_ao_dropped_icmp,
+ !!(expected & TEST_CNT_NS_DROPPED_ICMP));
+ /* per-socket */
+ __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD));
+ __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD));
+ __cmp_ao(ao_info_pkt_key_not_found,
+ !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND));
+ __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED));
+ __cmp_ao(ao_info_pkt_dropped_icmp,
+ !!(expected & TEST_CNT_SOCK_DROPPED_ICMP));
+ return 0;
+#undef __cmp_ao
+}
+
+int test_tcp_ao_key_counters_cmp(const char *tst_name,
+ struct tcp_ao_counters *before,
+ struct tcp_ao_counters *after,
+ test_cnt expected,
+ int sndid, int rcvid)
+{
+ size_t i;
+#define __cmp_ao(i, cnt, expecting_inc) \
+do { \
+ if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \
+ test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \
+ tst_name ?: "", before->key_cnts[i].cnt, \
+ after->key_cnts[i].cnt, \
+ before->key_cnts[i].sndid, \
+ before->key_cnts[i].rcvid); \
+ return -1; \
+ } \
+ if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \
+ test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \
+ tst_name ?: "", (expecting_inc) ? "" : "not ",\
+ before->key_cnts[i].cnt, \
+ after->key_cnts[i].cnt, \
+ before->key_cnts[i].sndid, \
+ before->key_cnts[i].rcvid); \
+ return -1; \
+ } \
+} while(0)
+
+ if (before->nr_keys != after->nr_keys) {
+ test_fail("%s: Keys changed on the socket %zu != %zu",
+ tst_name, before->nr_keys, after->nr_keys);
+ return -1;
+ }
+
+ /* per-key */
+ i = before->nr_keys;
+ while (i--) {
+ if (sndid >= 0 && before->key_cnts[i].sndid != sndid)
+ continue;
+ if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid)
+ continue;
+ __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD));
+ __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD));
+ }
+ return 0;
+#undef __cmp_ao
+}
+
+void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts)
+{
+ free(cnts->key_cnts);
+}
+
+#define TEST_BUF_SIZE 4096
+ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec)
+{
+ ssize_t total = 0;
+
+ do {
+ char buf[TEST_BUF_SIZE];
+ ssize_t bytes, sent;
+ int ret;
+
+ ret = test_wait_fd(sk, timeout_sec, 0);
+ if (ret)
+ return ret;
+
+ bytes = recv(sk, buf, sizeof(buf), 0);
+
+ if (bytes < 0)
+ test_error("recv(): %zd", bytes);
+ if (bytes == 0)
+ break;
+
+ ret = test_wait_fd(sk, timeout_sec, 1);
+ if (ret)
+ return ret;
+
+ sent = send(sk, buf, bytes, 0);
+ if (sent == 0)
+ break;
+ if (sent != bytes)
+ test_error("send()");
+ total += bytes;
+ } while (!quota || total < quota);
+
+ return total;
+}
+
+ssize_t test_client_loop(int sk, char *buf, size_t buf_sz,
+ const size_t msg_len, time_t timeout_sec)
+{
+ char msg[msg_len];
+ int nodelay = 1;
+ size_t i;
+
+ if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay)))
+ test_error("setsockopt(TCP_NODELAY)");
+
+ for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) {
+ size_t sent, bytes = min(msg_len, buf_sz - i);
+ int ret;
+
+ ret = test_wait_fd(sk, timeout_sec, 1);
+ if (ret)
+ return ret;
+
+ sent = send(sk, buf + i, bytes, 0);
+ if (sent == 0)
+ break;
+ if (sent != bytes)
+ test_error("send()");
+
+ bytes = 0;
+ do {
+ ssize_t got;
+
+ ret = test_wait_fd(sk, timeout_sec, 0);
+ if (ret)
+ return ret;
+
+ got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0);
+ if (got <= 0)
+ return i;
+ bytes += got;
+ } while (bytes < sent);
+ if (bytes > sent)
+ test_error("recv(): %zd > %zd", bytes, sent);
+ if (memcmp(buf + i, msg, bytes) != 0) {
+ test_fail("received message differs");
+ return -1;
+ }
+ }
+ return i;
+}
+
+int test_client_verify(int sk, const size_t msg_len, const size_t nr,
+ time_t timeout_sec)
+{
+ size_t buf_sz = msg_len * nr;
+ char *buf = alloca(buf_sz);
+ ssize_t ret;
+
+ randomize_buffer(buf, buf_sz);
+ ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec);
+ if (ret < 0)
+ return (int)ret;
+ return ret != buf_sz ? -1 : 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c
new file mode 100644
index 000000000000..372daca525f5
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include "aolib.h"
+#include <string.h>
+
+void randomize_buffer(void *buf, size_t buflen)
+{
+ int *p = (int *)buf;
+ size_t words = buflen / sizeof(int);
+ size_t leftover = buflen % sizeof(int);
+
+ if (!buflen)
+ return;
+
+ while (words--)
+ *p++ = rand();
+
+ if (leftover) {
+ int tmp = rand();
+
+ memcpy(buf + buflen - leftover, &tmp, leftover);
+ }
+}
+
+const struct sockaddr_in6 addr_any6 = {
+ .sin6_family = AF_INET6,
+};
+
+const struct sockaddr_in addr_any4 = {
+ .sin_family = AF_INET,
+};
diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c
new file mode 100644
index 000000000000..8fdc808df325
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/restore.c
@@ -0,0 +1,236 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets
+ * It tests that TCP-AO enabled connection can be restored.
+ * For the proper socket repair see:
+ * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const size_t nr_packets = 20;
+const size_t msg_len = 100;
+const size_t quota = nr_packets * msg_len;
+#define fault(type) (inj == FAULT_ ## type)
+
+static void try_server_run(const char *tst_name, unsigned int port,
+ fault_t inj, test_cnt cnt_expected)
+{
+ const char *cnt_name = "TCPAOGood";
+ struct tcp_ao_counters ao1, ao2;
+ uint64_t before_cnt, after_cnt;
+ int sk, lsk;
+ time_t timeout;
+ ssize_t bytes;
+
+ if (fault(TIMEOUT))
+ cnt_name = "TCPAOBad";
+ lsk = test_listen_socket(this_ip_addr, port, 1);
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ synchronize_threads(); /* 1: MKT added => connect() */
+
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ close(lsk);
+
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota) {
+ test_fail("%s: server served: %zd", tst_name, bytes);
+ goto out;
+ }
+
+ before_cnt = netstat_get_one(cnt_name, NULL);
+ if (test_get_tcp_ao_counters(sk, &ao1))
+ test_error("test_get_tcp_ao_counters()");
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ bytes = test_server_run(sk, quota, timeout);
+ if (fault(TIMEOUT)) {
+ if (bytes > 0)
+ test_fail("%s: server served: %zd", tst_name, bytes);
+ else
+ test_ok("%s: server couldn't serve", tst_name);
+ } else {
+ if (bytes != quota)
+ test_fail("%s: server served: %zd", tst_name, bytes);
+ else
+ test_ok("%s: server alive", tst_name);
+ }
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ after_cnt = netstat_get_one(cnt_name, NULL);
+
+ test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+ if (after_cnt <= before_cnt) {
+ test_fail("%s: %s counter did not increase: %zu <= %zu",
+ tst_name, cnt_name, after_cnt, before_cnt);
+ } else {
+ test_ok("%s: counter %s increased %zu => %zu",
+ tst_name, cnt_name, before_cnt, after_cnt);
+ }
+
+ /*
+ * Before close() as that will send FIN and move the peer in TCP_CLOSE
+ * and that will prevent reading AO counters from the peer's socket.
+ */
+ synchronize_threads(); /* 3: verified => closed */
+out:
+ close(sk);
+}
+
+static void *server_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+
+ try_server_run("TCP-AO migrate to another socket", port++,
+ 0, TEST_CNT_GOOD);
+ try_server_run("TCP-AO with wrong send ISN", port++,
+ FAULT_TIMEOUT, TEST_CNT_BAD);
+ try_server_run("TCP-AO with wrong receive ISN", port++,
+ FAULT_TIMEOUT, TEST_CNT_BAD);
+ try_server_run("TCP-AO with wrong send SEQ ext number", port++,
+ FAULT_TIMEOUT, TEST_CNT_BAD);
+ try_server_run("TCP-AO with wrong receive SEQ ext number", port++,
+ FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+ synchronize_threads(); /* don't race to exit: client exits */
+ return NULL;
+}
+
+static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr,
+ struct tcp_sock_state *img,
+ struct tcp_ao_repair *ao_img)
+{
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* 1: MKT added => connect() */
+ if (test_connect_socket(sk, this_ip_dest, server_port) <= 0)
+ test_error("failed to connect()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ test_fail("pre-migrate verify failed");
+
+ test_enable_repair(sk);
+ test_sock_checkpoint(sk, img, saddr);
+ test_ao_checkpoint(sk, ao_img);
+ test_kill_sk(sk);
+}
+
+static void test_sk_restore(const char *tst_name, unsigned int server_port,
+ sockaddr_af *saddr, struct tcp_sock_state *img,
+ struct tcp_ao_repair *ao_img,
+ fault_t inj, test_cnt cnt_expected)
+{
+ const char *cnt_name = "TCPAOGood";
+ struct tcp_ao_counters ao1, ao2;
+ uint64_t before_cnt, after_cnt;
+ time_t timeout;
+ int sk;
+
+ if (fault(TIMEOUT))
+ cnt_name = "TCPAOBad";
+
+ before_cnt = netstat_get_one(cnt_name, NULL);
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ test_enable_repair(sk);
+ test_sock_restore(sk, img, saddr, this_ip_dest, server_port);
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ test_ao_restore(sk, ao_img);
+
+ if (test_get_tcp_ao_counters(sk, &ao1))
+ test_error("test_get_tcp_ao_counters()");
+
+ test_disable_repair(sk);
+ test_sock_state_free(img);
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ if (test_client_verify(sk, msg_len, nr_packets, timeout)) {
+ if (fault(TIMEOUT))
+ test_ok("%s: post-migrate connection is broken", tst_name);
+ else
+ test_fail("%s: post-migrate connection is working", tst_name);
+ } else {
+ if (fault(TIMEOUT))
+ test_fail("%s: post-migrate connection still working", tst_name);
+ else
+ test_ok("%s: post-migrate connection is alive", tst_name);
+ }
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ after_cnt = netstat_get_one(cnt_name, NULL);
+
+ test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected);
+
+ if (after_cnt <= before_cnt) {
+ test_fail("%s: %s counter did not increase: %zu <= %zu",
+ tst_name, cnt_name, after_cnt, before_cnt);
+ } else {
+ test_ok("%s: counter %s increased %zu => %zu",
+ tst_name, cnt_name, before_cnt, after_cnt);
+ }
+ synchronize_threads(); /* 3: verified => closed */
+ close(sk);
+}
+
+static void *client_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+ struct tcp_sock_state tcp_img;
+ struct tcp_ao_repair ao_img;
+ sockaddr_af saddr;
+
+ test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+ test_sk_restore("TCP-AO migrate to another socket", port++,
+ &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD);
+
+ test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+ ao_img.snt_isn += 1;
+ test_sk_restore("TCP-AO with wrong send ISN", port++,
+ &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+ test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+ ao_img.rcv_isn += 1;
+ test_sk_restore("TCP-AO with wrong receive ISN", port++,
+ &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD);
+
+ test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+ ao_img.snd_sne += 1;
+ test_sk_restore("TCP-AO with wrong send SEQ ext number", port++,
+ &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ TEST_CNT_NS_BAD | TEST_CNT_GOOD);
+
+ test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img);
+ ao_img.rcv_sne += 1;
+ test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++,
+ &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT,
+ TEST_CNT_NS_GOOD | TEST_CNT_BAD);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(20, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c
new file mode 100644
index 000000000000..7df8b8700e39
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/rst.c
@@ -0,0 +1,457 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * The test checks that both active and passive reset have correct TCP-AO
+ * signature. An "active" reset (abort) here is procured from closing
+ * listen() socket with non-accepted connections in the queue:
+ * inet_csk_listen_stop() => inet_child_forget() =>
+ * => tcp_disconnect() => tcp_send_active_reset()
+ *
+ * The passive reset is quite hard to get on established TCP connections.
+ * It could be procured from non-established states, but the synchronization
+ * part from userspace in order to reliably get RST seems uneasy.
+ * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state.
+ *
+ * It's important to test both passive and active RST as they go through
+ * different code-paths:
+ * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb()
+ * - tcp_v*_send_reset() create their reply skbs and send them with
+ * ip_send_unicast_reply()
+ *
+ * In both cases TCP-AO signatures have to be correct, which is verified by
+ * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters.
+ *
+ * Author: Dmitry Safonov <dima@arista.com>
+ */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+const size_t quota = 1000;
+const size_t packet_sz = 100;
+/*
+ * Backlog == 0 means 1 connection in queue, see:
+ * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...")
+ */
+const unsigned int backlog;
+
+static void netstats_check(struct netstat *before, struct netstat *after,
+ char *msg)
+{
+ uint64_t before_cnt, after_cnt;
+
+ before_cnt = netstat_get(before, "TCPAORequired", NULL);
+ after_cnt = netstat_get(after, "TCPAORequired", NULL);
+ if (after_cnt > before_cnt)
+ test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64,
+ msg, before_cnt, after_cnt);
+ else
+ test_ok("No segments without AO sign (%s)", msg);
+
+ before_cnt = netstat_get(before, "TCPAOGood", NULL);
+ after_cnt = netstat_get(after, "TCPAOGood", NULL);
+ if (after_cnt <= before_cnt)
+ test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+ msg, before_cnt, after_cnt);
+ else
+ test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64,
+ msg, before_cnt, after_cnt);
+
+ before_cnt = netstat_get(before, "TCPAOBad", NULL);
+ after_cnt = netstat_get(after, "TCPAOBad", NULL);
+ if (after_cnt > before_cnt)
+ test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64,
+ msg, before_cnt, after_cnt);
+ else
+ test_ok("No segments with bad AO sign (%s)", msg);
+}
+
+/*
+ * Another way to send RST, but not through tcp_v{4,6}_send_reset()
+ * is tcp_send_active_reset(), that is not in reply to inbound segment,
+ * but rather active send. It uses tcp_transmit_skb(), so that should
+ * work, but as it also sends RST - nice that it can be covered as well.
+ */
+static void close_forced(int sk)
+{
+ struct linger sl;
+
+ sl.l_onoff = 1;
+ sl.l_linger = 0;
+ if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl)))
+ test_error("setsockopt(SO_LINGER)");
+ close(sk);
+}
+
+static void test_server_active_rst(unsigned int port)
+{
+ struct tcp_ao_counters cnt1, cnt2;
+ ssize_t bytes;
+ int sk, lsk;
+
+ lsk = test_listen_socket(this_ip_addr, port, backlog);
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ if (test_get_tcp_ao_counters(lsk, &cnt1))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 1: MKT added */
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ synchronize_threads(); /* 2: connection accept()ed, another queued */
+ if (test_get_tcp_ao_counters(lsk, &cnt2))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 3: close listen socket */
+ close(lsk);
+ bytes = test_server_run(sk, quota, 0);
+ if (bytes != quota)
+ test_error("servered only %zd bytes", bytes);
+ else
+ test_ok("servered %zd bytes", bytes);
+
+ synchronize_threads(); /* 4: finishing up */
+ close_forced(sk);
+
+ synchronize_threads(); /* 5: closed active sk */
+
+ synchronize_threads(); /* 6: counters checks */
+ if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD))
+ test_fail("MKT counters (server) have not only good packets");
+ else
+ test_ok("MKT counters are good on server");
+}
+
+static void test_server_passive_rst(unsigned int port)
+{
+ struct tcp_ao_counters ao1, ao2;
+ int sk, lsk;
+ ssize_t bytes;
+
+ lsk = test_listen_socket(this_ip_addr, port, 1);
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* 1: MKT added => connect() */
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ close(lsk);
+ if (test_get_tcp_ao_counters(sk, &ao1))
+ test_error("test_get_tcp_ao_counters()");
+
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota) {
+ if (bytes > 0)
+ test_fail("server served: %zd", bytes);
+ else
+ test_fail("server returned %zd", bytes);
+ }
+
+ synchronize_threads(); /* 3: checkpoint the client */
+ synchronize_threads(); /* 4: close the server, creating twsk */
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ close(sk);
+
+ synchronize_threads(); /* 5: restore the socket, send more data */
+ test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD);
+
+ synchronize_threads(); /* 6: server exits */
+}
+
+static void *server_fn(void *arg)
+{
+ struct netstat *ns_before, *ns_after;
+ unsigned int port = test_server_port;
+
+ ns_before = netstat_read();
+
+ test_server_active_rst(port++);
+ test_server_passive_rst(port++);
+
+ ns_after = netstat_read();
+ netstats_check(ns_before, ns_after, "server");
+ netstat_free(ns_after);
+ netstat_free(ns_before);
+ synchronize_threads(); /* exit */
+
+ synchronize_threads(); /* don't race to exit() - client exits */
+ return NULL;
+}
+
+static int test_wait_fds(int sk[], size_t nr, bool is_writable[],
+ ssize_t wait_for, time_t sec)
+{
+ struct timeval tv = { .tv_sec = sec };
+ struct timeval *ptv = NULL;
+ fd_set left;
+ size_t i;
+ int ret;
+
+ FD_ZERO(&left);
+ for (i = 0; i < nr; i++) {
+ FD_SET(sk[i], &left);
+ if (is_writable)
+ is_writable[i] = false;
+ }
+
+ if (sec)
+ ptv = &tv;
+
+ do {
+ bool is_empty = true;
+ fd_set fds, efds;
+ int nfd = 0;
+
+ FD_ZERO(&fds);
+ FD_ZERO(&efds);
+ for (i = 0; i < nr; i++) {
+ if (!FD_ISSET(sk[i], &left))
+ continue;
+
+ if (sk[i] > nfd)
+ nfd = sk[i];
+
+ FD_SET(sk[i], &fds);
+ FD_SET(sk[i], &efds);
+ is_empty = false;
+ }
+ if (is_empty)
+ return -ENOENT;
+
+ errno = 0;
+ ret = select(nfd + 1, NULL, &fds, &efds, ptv);
+ if (ret < 0)
+ return -errno;
+ if (!ret)
+ return -ETIMEDOUT;
+ for (i = 0; i < nr; i++) {
+ if (FD_ISSET(sk[i], &fds)) {
+ if (is_writable)
+ is_writable[i] = true;
+ FD_CLR(sk[i], &left);
+ wait_for--;
+ continue;
+ }
+ if (FD_ISSET(sk[i], &efds)) {
+ FD_CLR(sk[i], &left);
+ wait_for--;
+ }
+ }
+ } while (wait_for > 0);
+
+ return 0;
+}
+
+static void test_client_active_rst(unsigned int port)
+{
+ /* one in queue, another accept()ed */
+ unsigned int wait_for = backlog + 2;
+ int i, sk[3], err;
+ bool is_writable[ARRAY_SIZE(sk)] = {false};
+ unsigned int last = ARRAY_SIZE(sk) - 1;
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk[i] < 0)
+ test_error("socket()");
+ if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD,
+ this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ }
+
+ synchronize_threads(); /* 1: MKT added */
+ for (i = 0; i < last; i++) {
+ err = _test_connect_socket(sk[i], this_ip_dest, port,
+ (i == 0) ? TEST_TIMEOUT_SEC : -1);
+
+ if (err < 0)
+ test_error("failed to connect()");
+ }
+
+ synchronize_threads(); /* 2: connection accept()ed, another queued */
+ err = test_wait_fds(sk, last, is_writable, wait_for, TEST_TIMEOUT_SEC);
+ if (err < 0)
+ test_error("test_wait_fds(): %d", err);
+
+ synchronize_threads(); /* 3: close listen socket */
+ if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ test_fail("Failed to send data on connected socket");
+ else
+ test_ok("Verified established tcp connection");
+
+ synchronize_threads(); /* 4: finishing up */
+ err = _test_connect_socket(sk[last], this_ip_dest, port, -1);
+ if (err < 0)
+ test_error("failed to connect()");
+
+ synchronize_threads(); /* 5: closed active sk */
+ err = test_wait_fds(sk, ARRAY_SIZE(sk), NULL,
+ wait_for, TEST_TIMEOUT_SEC);
+ if (err < 0)
+ test_error("select(): %d", err);
+
+ for (i = 0; i < ARRAY_SIZE(sk); i++) {
+ socklen_t slen = sizeof(err);
+
+ if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen))
+ test_error("getsockopt()");
+ if (is_writable[i] && err != ECONNRESET) {
+ test_fail("sk[%d] = %d, err = %d, connection wasn't reset",
+ i, sk[i], err);
+ } else {
+ test_ok("sk[%d] = %d%s", i, sk[i],
+ is_writable[i] ? ", connection was reset" : "");
+ }
+ }
+ synchronize_threads(); /* 6: counters checks */
+}
+
+static void test_client_passive_rst(unsigned int port)
+{
+ struct tcp_ao_counters ao1, ao2;
+ struct tcp_ao_repair ao_img;
+ struct tcp_sock_state img;
+ sockaddr_af saddr;
+ int sk, err;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* 1: MKT added => connect() */
+ if (test_connect_socket(sk, this_ip_dest, port) <= 0)
+ test_error("failed to connect()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC))
+ test_fail("Failed to send data on connected socket");
+ else
+ test_ok("Verified established tcp connection");
+
+ synchronize_threads(); /* 3: checkpoint the client */
+ test_enable_repair(sk);
+ test_sock_checkpoint(sk, &img, &saddr);
+ test_ao_checkpoint(sk, &ao_img);
+ test_disable_repair(sk);
+
+ synchronize_threads(); /* 4: close the server, creating twsk */
+
+ /*
+ * The "corruption" in SEQ has to be small enough to fit into TCP
+ * window, see tcp_timewait_state_process() for out-of-window
+ * segments.
+ */
+ img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */
+
+ /*
+ * FIXME: This is kind-of ugly and dirty, but it works.
+ *
+ * At this moment, the server has close'ed(sk).
+ * The passive RST that is being targeted here is new data after
+ * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST
+ *
+ * What is needed here is:
+ * (1) wait for FIN from the server
+ * (2) make sure that the ACK from the client went out
+ * (3) make sure that the ACK was received and processed by the server
+ *
+ * Otherwise, the data that will be sent from "repaired" socket
+ * post SEQ corruption may get to the server before it's in
+ * TCP_FIN_WAIT2.
+ *
+ * (1) is easy with select()/poll()
+ * (2) is possible by polling tcpi_state from TCP_INFO
+ * (3) is quite complex: as server's socket was already closed,
+ * probably the way to do it would be tcp-diag.
+ */
+ sleep(TEST_RETRANSMIT_SEC);
+
+ synchronize_threads(); /* 5: restore the socket, send more data */
+ test_kill_sk(sk);
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ test_enable_repair(sk);
+ test_sock_restore(sk, &img, &saddr, this_ip_dest, port);
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ test_ao_restore(sk, &ao_img);
+
+ if (test_get_tcp_ao_counters(sk, &ao1))
+ test_error("test_get_tcp_ao_counters()");
+
+ test_disable_repair(sk);
+ test_sock_state_free(&img);
+
+ /*
+ * This is how "passive reset" is acquired in this test from TCP_TW_RST:
+ *
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0
+ * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0
+ * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100
+ * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0,
+ * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0
+ */
+ err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC);
+ /* Make sure that the connection was reset, not timeouted */
+ if (err && err == -ECONNRESET)
+ test_ok("client sock was passively reset post-seq-adjust");
+ else if (err)
+ test_fail("client sock was not reset post-seq-adjust: %d", err);
+ else
+ test_fail("client sock is yet connected post-seq-adjust");
+
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* 6: server exits */
+ close(sk);
+ test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD);
+}
+
+static void *client_fn(void *arg)
+{
+ struct netstat *ns_before, *ns_after;
+ unsigned int port = test_server_port;
+
+ ns_before = netstat_read();
+
+ test_client_active_rst(port++);
+ test_client_passive_rst(port++);
+
+ ns_after = netstat_read();
+ netstats_check(ns_before, ns_after, "client");
+ netstat_free(ns_after);
+ netstat_free(ns_before);
+
+ synchronize_threads(); /* exit */
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(14, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c
new file mode 100644
index 000000000000..e154d9e198a9
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/self-connect.c
@@ -0,0 +1,197 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+static union tcp_addr local_addr;
+
+static void __setup_lo_intf(const char *lo_intf,
+ const char *addr_str, uint8_t prefix)
+{
+ if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1)
+ test_error("Can't convert local ip address");
+
+ if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix))
+ test_error("Failed to add %s ip address", lo_intf);
+
+ if (link_set_up(lo_intf))
+ test_error("Failed to bring %s up", lo_intf);
+}
+
+static void setup_lo_intf(const char *lo_intf)
+{
+#ifdef IPV6_TEST
+ __setup_lo_intf(lo_intf, "::1", 128);
+#else
+ __setup_lo_intf(lo_intf, "127.0.0.1", 8);
+#endif
+}
+
+static void tcp_self_connect(const char *tst, unsigned int port,
+ bool different_keyids, bool check_restore)
+{
+ uint64_t before_challenge_ack, after_challenge_ack;
+ uint64_t before_syn_challenge, after_syn_challenge;
+ struct tcp_ao_counters before_ao, after_ao;
+ uint64_t before_aogood, after_aogood;
+ struct netstat *ns_before, *ns_after;
+ const size_t nr_packets = 20;
+ struct tcp_ao_repair ao_img;
+ struct tcp_sock_state img;
+ sockaddr_af addr;
+ int sk;
+
+ tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port));
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (different_keyids) {
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ } else {
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ }
+
+ if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ test_error("bind()");
+
+ ns_before = netstat_read();
+ before_aogood = netstat_get(ns_before, "TCPAOGood", NULL);
+ before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL);
+ before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL);
+ if (test_get_tcp_ao_counters(sk, &before_ao))
+ test_error("test_get_tcp_ao_counters()");
+
+ if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr,
+ sizeof(addr), TEST_TIMEOUT_SEC) < 0) {
+ ns_after = netstat_read();
+ netstat_print_diff(ns_before, ns_after);
+ test_error("failed to connect()");
+ }
+
+ if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ test_fail("%s: tcp connection verify failed", tst);
+ close(sk);
+ return;
+ }
+
+ ns_after = netstat_read();
+ after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+ after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL);
+ after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL);
+ if (test_get_tcp_ao_counters(sk, &after_ao))
+ test_error("test_get_tcp_ao_counters()");
+ if (!check_restore) {
+ /* to debug: netstat_print_diff(ns_before, ns_after); */
+ netstat_free(ns_before);
+ }
+ netstat_free(ns_after);
+
+ if (after_aogood <= before_aogood) {
+ test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ tst, after_aogood, before_aogood);
+ close(sk);
+ return;
+ }
+ if (after_challenge_ack <= before_challenge_ack ||
+ after_syn_challenge <= before_syn_challenge) {
+ /*
+ * It's also meant to test simultaneous open, so check
+ * these counters as well.
+ */
+ test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu",
+ tst, after_challenge_ack, before_challenge_ack,
+ after_syn_challenge, before_syn_challenge);
+ close(sk);
+ return;
+ }
+
+ if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) {
+ close(sk);
+ return;
+ }
+
+ if (!check_restore) {
+ test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+ tst, before_aogood, after_aogood);
+ close(sk);
+ return;
+ }
+
+ test_enable_repair(sk);
+ test_sock_checkpoint(sk, &img, &addr);
+#ifdef IPV6_TEST
+ addr.sin6_port = htons(port + 1);
+#else
+ addr.sin_port = htons(port + 1);
+#endif
+ test_ao_checkpoint(sk, &ao_img);
+ test_kill_sk(sk);
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ test_enable_repair(sk);
+ __test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr));
+ if (different_keyids) {
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+ local_addr, -1, 7, 5))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+ local_addr, -1, 5, 7))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ } else {
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0,
+ local_addr, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ }
+ test_ao_restore(sk, &ao_img);
+ test_disable_repair(sk);
+ test_sock_state_free(&img);
+ if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) {
+ test_fail("%s: tcp connection verify failed", tst);
+ close(sk);
+ return;
+ }
+ ns_after = netstat_read();
+ after_aogood = netstat_get(ns_after, "TCPAOGood", NULL);
+ /* to debug: netstat_print_diff(ns_before, ns_after); */
+ netstat_free(ns_before);
+ netstat_free(ns_after);
+ close(sk);
+ if (after_aogood <= before_aogood) {
+ test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu",
+ tst, after_aogood, before_aogood);
+ return;
+ }
+ test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64,
+ tst, before_aogood, after_aogood);
+}
+
+static void *client_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+
+ setup_lo_intf("lo");
+
+ tcp_self_connect("self-connect(same keyids)", port++, false, false);
+ tcp_self_connect("self-connect(different keyids)", port++, true, false);
+ tcp_self_connect("self-connect(restore)", port, false, true);
+ port += 2;
+ tcp_self_connect("self-connect(restore, different keyids)", port, true, true);
+ port += 2;
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(4, client_fn, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c
new file mode 100644
index 000000000000..ad4e77d6823e
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Check that after SEQ number wrap-around:
+ * 1. SEQ-extension has upper bytes set
+ * 2. TCP conneciton is alive and no TCPAOBad segments
+ * In order to test (2), the test doesn't just adjust seq number for a queue
+ * on a connected socket, but migrates it to another sk+port number, so
+ * that there won't be any delayed packets that will fail to verify
+ * with the new SEQ numbers.
+ */
+#include <inttypes.h>
+#include "aolib.h"
+
+const unsigned int nr_packets = 1000;
+const unsigned int msg_len = 1000;
+const unsigned int quota = nr_packets * msg_len;
+unsigned int client_new_port;
+
+/* Move them closer to roll-over */
+static void test_adjust_seqs(struct tcp_sock_state *img,
+ struct tcp_ao_repair *ao_img,
+ bool server)
+{
+ uint32_t new_seq1, new_seq2;
+
+ /* make them roll-over during quota, but on different segments */
+ if (server) {
+ new_seq1 = ((uint32_t)-1) - msg_len;
+ new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len);
+ } else {
+ new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len);
+ new_seq2 = ((uint32_t)-1) - msg_len;
+ }
+
+ img->in.seq = new_seq1;
+ img->trw.snd_wl1 = img->in.seq - msg_len;
+ img->out.seq = new_seq2;
+ img->trw.rcv_wup = img->in.seq;
+}
+
+static int test_sk_restore(struct tcp_sock_state *img,
+ struct tcp_ao_repair *ao_img, sockaddr_af *saddr,
+ const union tcp_addr daddr, unsigned int dport,
+ struct tcp_ao_counters *cnt)
+{
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ test_enable_repair(sk);
+ test_sock_restore(sk, img, saddr, daddr, dport);
+ if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ test_ao_restore(sk, ao_img);
+
+ if (test_get_tcp_ao_counters(sk, cnt))
+ test_error("test_get_tcp_ao_counters()");
+
+ test_disable_repair(sk);
+ test_sock_state_free(img);
+ return sk;
+}
+
+static void *server_fn(void *arg)
+{
+ uint64_t before_good, after_good, after_bad;
+ struct tcp_ao_counters ao1, ao2;
+ struct tcp_sock_state img;
+ struct tcp_ao_repair ao_img;
+ sockaddr_af saddr;
+ ssize_t bytes;
+ int sk, lsk;
+
+ lsk = test_listen_socket(this_ip_addr, test_server_port, 1);
+
+ if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* 1: MKT added => connect() */
+
+ if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0))
+ test_error("test_wait_fd()");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0)
+ test_error("accept()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ close(lsk);
+
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota) {
+ if (bytes > 0)
+ test_fail("server served: %zd", bytes);
+ else
+ test_fail("server returned: %zd", bytes);
+ goto out;
+ }
+
+ before_good = netstat_get_one("TCPAOGood", NULL);
+
+ synchronize_threads(); /* 3: restore the connection on another port */
+
+ test_enable_repair(sk);
+ test_sock_checkpoint(sk, &img, &saddr);
+ test_ao_checkpoint(sk, &ao_img);
+ test_kill_sk(sk);
+#ifdef IPV6_TEST
+ saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+ saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+ test_adjust_seqs(&img, &ao_img, true);
+ synchronize_threads(); /* 4: dump finished */
+ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+ client_new_port, &ao1);
+
+ synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC);
+ if (bytes != quota) {
+ if (bytes > 0)
+ test_fail("server served: %zd", bytes);
+ else
+ test_fail("server returned: %zd", bytes);
+ } else {
+ test_ok("server alive");
+ }
+
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ after_good = netstat_get_one("TCPAOGood", NULL);
+
+ test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+ if (after_good <= before_good) {
+ test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ after_good, before_good);
+ } else {
+ test_ok("TCPAOGood counter increased %zu => %zu",
+ before_good, after_good);
+ }
+ after_bad = netstat_get_one("TCPAOBad", NULL);
+ if (after_bad)
+ test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ else
+ test_ok("TCPAOBad counter didn't increase");
+ test_enable_repair(sk);
+ test_ao_checkpoint(sk, &ao_img);
+ if (ao_img.snd_sne && ao_img.rcv_sne) {
+ test_ok("SEQ extension incremented: %u/%u",
+ ao_img.snd_sne, ao_img.rcv_sne);
+ } else {
+ test_fail("SEQ extension was not incremented: %u/%u",
+ ao_img.snd_sne, ao_img.rcv_sne);
+ }
+
+ synchronize_threads(); /* 6: verified => closed */
+out:
+ close(sk);
+ return NULL;
+}
+
+static void *client_fn(void *arg)
+{
+ uint64_t before_good, after_good, after_bad;
+ struct tcp_ao_counters ao1, ao2;
+ struct tcp_sock_state img;
+ struct tcp_ao_repair ao_img;
+ sockaddr_af saddr;
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* 1: MKT added => connect() */
+ if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0)
+ test_error("failed to connect()");
+
+ synchronize_threads(); /* 2: accepted => send data */
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) {
+ test_fail("pre-migrate verify failed");
+ return NULL;
+ }
+
+ before_good = netstat_get_one("TCPAOGood", NULL);
+
+ synchronize_threads(); /* 3: restore the connection on another port */
+ test_enable_repair(sk);
+ test_sock_checkpoint(sk, &img, &saddr);
+ test_ao_checkpoint(sk, &ao_img);
+ test_kill_sk(sk);
+#ifdef IPV6_TEST
+ client_new_port = ntohs(saddr.sin6_port) + 1;
+ saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1);
+#else
+ client_new_port = ntohs(saddr.sin_port) + 1;
+ saddr.sin_port = htons(ntohs(saddr.sin_port) + 1);
+#endif
+ test_adjust_seqs(&img, &ao_img, false);
+ synchronize_threads(); /* 4: dump finished */
+ sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest,
+ test_server_port + 1, &ao1);
+
+ synchronize_threads(); /* 5: verify counters during SEQ-number rollover */
+ if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC))
+ test_fail("post-migrate verify failed");
+ else
+ test_ok("post-migrate connection alive");
+
+ if (test_get_tcp_ao_counters(sk, &ao2))
+ test_error("test_get_tcp_ao_counters()");
+ after_good = netstat_get_one("TCPAOGood", NULL);
+
+ test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD);
+
+ if (after_good <= before_good) {
+ test_fail("TCPAOGood counter did not increase: %zu <= %zu",
+ after_good, before_good);
+ } else {
+ test_ok("TCPAOGood counter increased %zu => %zu",
+ before_good, after_good);
+ }
+ after_bad = netstat_get_one("TCPAOBad", NULL);
+ if (after_bad)
+ test_fail("TCPAOBad counter is non-zero: %zu", after_bad);
+ else
+ test_ok("TCPAOBad counter didn't increase");
+
+ synchronize_threads(); /* 6: verified => closed */
+ close(sk);
+
+ synchronize_threads(); /* don't race to exit: let server exit() */
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(7, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
new file mode 100644
index 000000000000..452de131fa3a
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c
@@ -0,0 +1,835 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "../../../../include/linux/kernel.h"
+#include "aolib.h"
+
+static union tcp_addr tcp_md5_client;
+
+static int test_port = 7788;
+static void make_listen(int sk)
+{
+ sockaddr_af addr;
+
+ tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++));
+ if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0)
+ test_error("bind()");
+ if (listen(sk, 1))
+ test_error("listen()");
+}
+
+static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info,
+ const char *tst)
+{
+ struct tcp_ao_info_opt tmp;
+ socklen_t len = sizeof(tmp);
+
+ if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len))
+ test_error("getsockopt(TCP_AO_INFO) failed");
+
+#define __cmp_ao(member) \
+do { \
+ if (info->member != tmp.member) { \
+ test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \
+ tst, (size_t)info->member, (size_t)tmp.member); \
+ return; \
+ } \
+} while(0)
+ if (info->set_current)
+ __cmp_ao(current_key);
+ if (info->set_rnext)
+ __cmp_ao(rnext);
+ if (info->set_counters) {
+ __cmp_ao(pkt_good);
+ __cmp_ao(pkt_bad);
+ __cmp_ao(pkt_key_not_found);
+ __cmp_ao(pkt_ao_required);
+ __cmp_ao(pkt_dropped_icmp);
+ }
+ __cmp_ao(ao_required);
+ __cmp_ao(accept_icmps);
+
+ test_ok("AO info get: %s", tst);
+#undef __cmp_ao
+}
+
+static void __setsockopt_checked(int sk, int optname, bool get,
+ void *optval, socklen_t *len,
+ int err, const char *tst, const char *tst2)
+{
+ int ret;
+
+ if (!tst)
+ tst = "";
+ if (!tst2)
+ tst2 = "";
+
+ errno = 0;
+ if (get)
+ ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len);
+ else
+ ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len);
+ if (ret == -1) {
+ if (errno == err)
+ test_ok("%s%s", tst ?: "", tst2 ?: "");
+ else
+ test_fail("%s%s: %setsockopt() failed",
+ tst, tst2, get ? "g" : "s");
+ close(sk);
+ return;
+ }
+
+ if (err) {
+ test_fail("%s%s: %setsockopt() was expected to fail with %d",
+ tst, tst2, get ? "g" : "s", err);
+ } else {
+ test_ok("%s%s", tst ?: "", tst2 ?: "");
+ if (optname == TCP_AO_ADD_KEY) {
+ test_verify_socket_key(sk, optval);
+ } else if (optname == TCP_AO_INFO && !get) {
+ test_vefify_ao_info(sk, optval, tst2);
+ } else if (optname == TCP_AO_GET_KEYS) {
+ if (*len != sizeof(struct tcp_ao_getsockopt))
+ test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size",
+ tst, tst2);
+ }
+ }
+ close(sk);
+}
+
+static void setsockopt_checked(int sk, int optname, void *optval,
+ int err, const char *tst)
+{
+ const char *cmd = NULL;
+ socklen_t len;
+
+ switch (optname) {
+ case TCP_AO_ADD_KEY:
+ cmd = "key add: ";
+ len = sizeof(struct tcp_ao_add);
+ break;
+ case TCP_AO_DEL_KEY:
+ cmd = "key del: ";
+ len = sizeof(struct tcp_ao_del);
+ break;
+ case TCP_AO_INFO:
+ cmd = "AO info set: ";
+ len = sizeof(struct tcp_ao_info_opt);
+ break;
+ default:
+ break;
+ }
+
+ __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst);
+}
+
+static int prepare_defs(int cmd, void *optval)
+{
+ int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+
+ if (sk < 0)
+ test_error("socket()");
+
+ switch (cmd) {
+ case TCP_AO_ADD_KEY: {
+ struct tcp_ao_add *add = optval;
+
+ if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest,
+ -1, 0, 100, 100))
+ test_error("prepare default tcp_ao_add");
+ break;
+ }
+ case TCP_AO_DEL_KEY: {
+ struct tcp_ao_del *del = optval;
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+ DEFAULT_TEST_PREFIX, 100, 100))
+ test_error("add default key");
+ memset(del, 0, sizeof(struct tcp_ao_del));
+ del->sndid = 100;
+ del->rcvid = 100;
+ del->prefix = DEFAULT_TEST_PREFIX;
+ tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0);
+ break;
+ }
+ case TCP_AO_INFO: {
+ struct tcp_ao_info_opt *info = optval;
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+ DEFAULT_TEST_PREFIX, 100, 100))
+ test_error("add default key");
+ memset(info, 0, sizeof(struct tcp_ao_info_opt));
+ break;
+ }
+ case TCP_AO_GET_KEYS: {
+ struct tcp_ao_getsockopt *get = optval;
+
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest,
+ DEFAULT_TEST_PREFIX, 100, 100))
+ test_error("add default key");
+ memset(get, 0, sizeof(struct tcp_ao_getsockopt));
+ get->nkeys = 1;
+ get->get_all = 1;
+ break;
+ }
+ default:
+ test_error("unknown cmd");
+ }
+
+ return sk;
+}
+
+static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size)
+{
+ struct {
+ union {
+ struct tcp_ao_add add;
+ struct tcp_ao_del del;
+ struct tcp_ao_getsockopt get;
+ struct tcp_ao_info_opt info;
+ };
+ char *extend[100];
+ } tmp_opt;
+ socklen_t extended_size = sizeof(tmp_opt);
+ int sk;
+
+ memset(&tmp_opt, 0, sizeof(tmp_opt));
+ sk = prepare_defs(cmd, &tmp_opt);
+ __setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size,
+ EINVAL, tst, ": minimum size");
+
+ memset(&tmp_opt, 0, sizeof(tmp_opt));
+ sk = prepare_defs(cmd, &tmp_opt);
+ __setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size,
+ 0, tst, ": extended size");
+
+ memset(&tmp_opt, 0, sizeof(tmp_opt));
+ sk = prepare_defs(cmd, &tmp_opt);
+ __setsockopt_checked(sk, cmd, get, NULL, &extended_size,
+ EFAULT, tst, ": null optval");
+
+ if (get) {
+ memset(&tmp_opt, 0, sizeof(tmp_opt));
+ sk = prepare_defs(cmd, &tmp_opt);
+ __setsockopt_checked(sk, cmd, get, &tmp_opt, NULL,
+ EFAULT, tst, ": null optlen");
+ }
+}
+
+static void extend_tests(void)
+{
+ test_extend(TCP_AO_ADD_KEY, false, "AO add",
+ offsetof(struct tcp_ao_add, key));
+ test_extend(TCP_AO_DEL_KEY, false, "AO del",
+ offsetof(struct tcp_ao_del, keyflags));
+ test_extend(TCP_AO_INFO, false, "AO set info",
+ offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp));
+ test_extend(TCP_AO_INFO, true, "AO get info", -1);
+ test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1);
+}
+
+static void test_optmem_limit(void)
+{
+ size_t i, keys_limit, current_optmem = test_get_optmem();
+ struct tcp_ao_add ao;
+ union tcp_addr net = {};
+ int sk;
+
+ if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1)
+ test_error("Can't convert ip address %s", TEST_NETWORK);
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP;
+ for (i = 0;; i++) {
+ union tcp_addr key_peer;
+ int err;
+
+ key_peer = gen_tcp_addr(net, i + 1);
+ tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0);
+ err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY,
+ &ao, sizeof(ao));
+ if (!err) {
+ /*
+ * TCP_AO_ADD_KEY should be the same order as the real
+ * sizeof(struct tcp_ao_key) in kernel.
+ */
+ if (i <= keys_limit * 10)
+ continue;
+ test_fail("optmem limit test failed: added %zu key", i);
+ break;
+ }
+ if (i < keys_limit) {
+ test_fail("optmem limit test failed: couldn't add %zu key", i);
+ break;
+ }
+ test_ok("optmem limit was hit on adding %zu key", i);
+ break;
+ }
+ close(sk);
+}
+
+static void test_einval_add_key(void)
+{
+ struct tcp_ao_add ao;
+ int sk;
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.keylen = TCP_AO_MAXKEYLEN + 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.reserved = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.reserved2 = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding");
+
+ /* tcp_ao_verify_ipv{4,6}() checks */
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.addr.ss_family = AF_UNIX;
+ memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234);
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.prefix = 0;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.prefix = 0;
+ memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.prefix = 32;
+ memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.prefix = 129;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.prefix = 2;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.keyflags = (uint8_t)(-1);
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ make_listen(sk);
+ ao.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ make_listen(sk);
+ ao.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ make_listen(sk);
+ ao.set_current = 1;
+ ao.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.set_current = 1;
+ ao.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.ifindex = 42;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL,
+ "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.keyflags |= TCP_AO_KEYF_IFINDEX;
+ ao.ifindex = 42;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF");
+ /*
+ * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5
+ * see client_vrf_tests().
+ */
+
+ test_optmem_limit();
+
+ /* tcp_ao_parse_crypto() */
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao.maclen = 100;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ strcpy(ao.alg_name, "imaginary hash algo");
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo");
+}
+
+static void test_einval_del_key(void)
+{
+ struct tcp_ao_del del;
+ int sk;
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.reserved = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.reserved2 = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ make_listen(sk);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ make_listen(sk);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ make_listen(sk);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_current = 1;
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.keyflags = (uint8_t)(-1);
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.ifindex = 42;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL,
+ "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.keyflags |= TCP_AO_KEYF_IFINDEX;
+ del.ifindex = 42;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_current = 1;
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0))
+ test_error("add key");
+ del.set_current = 1;
+ del.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_current = 1;
+ del.current_key = 100;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_rnext = 1;
+ del.rnext = 100;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.set_current = 1;
+ del.current_key = 100;
+ del.set_rnext = 1;
+ del.rnext = 100;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.del_async = 1;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.sndid = 101;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ del.rcvid = 101;
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0);
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr");
+
+ sk = prepare_defs(TCP_AO_DEL_KEY, &del);
+ setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete");
+}
+
+static void test_einval_ao_info(void)
+{
+ struct tcp_ao_info_opt info;
+ int sk;
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ make_listen(sk);
+ info.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ make_listen(sk);
+ info.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ make_listen(sk);
+ info.set_current = 1;
+ info.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.reserved = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.reserved2 = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.accept_icmps = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.ao_required = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required");
+
+ if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) {
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.ao_required = 1;
+ if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1,
+ "long long secret")) {
+ test_error("setsockopt(TCP_MD5SIG_EXT)");
+ close(sk);
+ } else {
+ setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED,
+ "ao required with MD5 key");
+ }
+ }
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_current = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_current = 1;
+ info.set_rnext = 1;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_current = 1;
+ info.current_key = 100;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_rnext = 1;
+ info.rnext = 100;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_current = 1;
+ info.set_rnext = 1;
+ info.current_key = 100;
+ info.rnext = 100;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ info.set_counters = 1;
+ info.pkt_good = 321;
+ info.pkt_bad = 888;
+ info.pkt_key_not_found = 654;
+ info.pkt_ao_required = 987654;
+ info.pkt_dropped_icmp = 10000;
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters");
+
+ sk = prepare_defs(TCP_AO_INFO, &info);
+ setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op");
+}
+
+static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval,
+ int err, const char *tst)
+{
+ socklen_t len = sizeof(struct tcp_ao_getsockopt);
+
+ __setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err,
+ "get keys: ", tst);
+}
+
+static void test_einval_get_keys(void)
+{
+ struct tcp_ao_getsockopt out;
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+ getsockopt_checked(sk, &out, ENOENT, "no ao_info");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.pkt_good = 643;
+ getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.pkt_bad = 94;
+ getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.keyflags = (uint8_t)(-1);
+ getsockopt_checked(sk, &out, EINVAL, "bad keyflags");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.ifindex = 42;
+ getsockopt_checked(sk, &out, EINVAL,
+ "ifindex without TCP_AO_KEYF_IFNINDEX");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.reserved = 1;
+ getsockopt_checked(sk, &out, EINVAL, "using reserved field");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = 0;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "no prefix, addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = 0;
+ memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ getsockopt_checked(sk, &out, 0, "no prefix, any addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = 32;
+ memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ getsockopt_checked(sk, &out, EINVAL, "prefix, any addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = 129;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "too big prefix");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = 2;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "too short prefix");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.prefix = DEFAULT_TEST_PREFIX;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, 0, "prefix + addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ out.prefix = DEFAULT_TEST_PREFIX;
+ getsockopt_checked(sk, &out, EINVAL, "get_all + prefix");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "get_all + addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ out.sndid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "get_all + sndid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ out.rcvid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_current = 1;
+ out.prefix = DEFAULT_TEST_PREFIX;
+ getsockopt_checked(sk, &out, EINVAL, "current + prefix");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_current = 1;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "current + addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_current = 1;
+ out.sndid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "current + sndid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_current = 1;
+ out.rcvid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "current + rcvid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_rnext = 1;
+ out.prefix = DEFAULT_TEST_PREFIX;
+ getsockopt_checked(sk, &out, EINVAL, "rnext + prefix");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_rnext = 1;
+ tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0);
+ getsockopt_checked(sk, &out, EINVAL, "rnext + addr");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_rnext = 1;
+ out.sndid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "rnext + sndid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_rnext = 1;
+ out.rcvid = 1;
+ getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ out.is_current = 1;
+ getsockopt_checked(sk, &out, EINVAL, "get_all + current");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 1;
+ out.is_rnext = 1;
+ getsockopt_checked(sk, &out, EINVAL, "get_all + rnext");
+
+ sk = prepare_defs(TCP_AO_GET_KEYS, &out);
+ out.get_all = 0;
+ out.is_current = 1;
+ out.is_rnext = 1;
+ getsockopt_checked(sk, &out, 0, "current + rnext");
+}
+
+static void einval_tests(void)
+{
+ test_einval_add_key();
+ test_einval_del_key();
+ test_einval_ao_info();
+ test_einval_get_keys();
+}
+
+static void duplicate_tests(void)
+{
+ union tcp_addr network_dup;
+ struct tcp_ao_add ao, ao2;
+ int sk;
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ ao2 = ao;
+ memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ ao2.prefix = 0;
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao)))
+ test_error("setsockopt()");
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY));
+ ao.prefix = 0;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key");
+
+ if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1)
+ test_error("Can't convert ip address %s", TEST_NETWORK);
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ if (test_prepare_def_key(&ao, "password", 0, network_dup,
+ 16, 0, 100, 100))
+ test_error("prepare default tcp_ao_add");
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ ao.rcvid = 101;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs");
+
+ sk = prepare_defs(TCP_AO_ADD_KEY, &ao);
+ if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao)))
+ test_error("setsockopt()");
+ ao.sndid = 101;
+ setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs");
+}
+
+static void *client_fn(void *arg)
+{
+ if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1)
+ test_error("Can't convert ip address");
+ extend_tests();
+ einval_tests();
+ duplicate_tests();
+ /*
+ * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters
+ * returning proper nr & keys;
+ */
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(120, client_fn, NULL);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
new file mode 100644
index 000000000000..6b59a652159f
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Author: Dmitry Safonov <dima@arista.com> */
+#include <inttypes.h>
+#include "aolib.h"
+
+#define fault(type) (inj == FAULT_ ## type)
+static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver.";
+static const char *ao_password = DEFAULT_TEST_PASSWORD;
+
+static union tcp_addr client2;
+static union tcp_addr client3;
+
+static const int test_vrf_ifindex = 200;
+static const uint8_t test_vrf_tabid = 42;
+static void setup_vrfs(void)
+{
+ int err;
+
+ if (!kernel_config_has(KCONFIG_NET_VRF))
+ return;
+
+ err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1);
+ if (err)
+ test_error("Failed to add a VRF: %d", err);
+
+ err = link_set_up("ksft-vrf");
+ if (err)
+ test_error("Failed to bring up a VRF");
+
+ err = ip_route_add_vrf(veth_name, TEST_FAMILY,
+ this_ip_addr, this_ip_dest, test_vrf_tabid);
+ if (err)
+ test_error("Failed to add a route to VRF: %d", err);
+}
+
+static void try_accept(const char *tst_name, unsigned int port,
+ union tcp_addr *md5_addr, uint8_t md5_prefix,
+ union tcp_addr *ao_addr, uint8_t ao_prefix,
+ bool set_ao_required,
+ uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+ const char *cnt_name, test_cnt cnt_expected,
+ int needs_tcp_md5, fault_t inj)
+{
+ struct tcp_ao_counters ao_cnt1, ao_cnt2;
+ uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */
+ int lsk, err, sk = 0;
+ time_t timeout;
+
+ if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+ return;
+
+ lsk = test_listen_socket(this_ip_addr, port, 1);
+
+ if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password))
+ test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+ if (ao_addr && test_add_key(lsk, ao_password,
+ *ao_addr, ao_prefix, sndid, rcvid))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ if (set_ao_required && test_set_ao_flags(lsk, true, false))
+ test_error("setsockopt(TCP_AO_INFO)");
+
+ if (cnt_name)
+ before_cnt = netstat_get_one(cnt_name, NULL);
+ if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1))
+ test_error("test_get_tcp_ao_counters()");
+
+ synchronize_threads(); /* preparations done */
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ err = test_wait_fd(lsk, timeout, 0);
+ if (err == -ETIMEDOUT) {
+ if (!fault(TIMEOUT))
+ test_fail("timed out for accept()");
+ } else if (err < 0) {
+ test_error("test_wait_fd()");
+ } else {
+ if (fault(TIMEOUT))
+ test_fail("ready to accept");
+
+ sk = accept(lsk, NULL, NULL);
+ if (sk < 0) {
+ test_error("accept()");
+ } else {
+ if (fault(TIMEOUT))
+ test_fail("%s: accepted", tst_name);
+ }
+ }
+
+ if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2))
+ test_error("test_get_tcp_ao_counters()");
+ close(lsk);
+
+ if (!cnt_name) {
+ test_ok("%s: no counter checks", tst_name);
+ goto out;
+ }
+
+ after_cnt = netstat_get_one(cnt_name, NULL);
+
+ if (after_cnt <= before_cnt) {
+ test_fail("%s: %s counter did not increase: %zu <= %zu",
+ tst_name, cnt_name, after_cnt, before_cnt);
+ } else {
+ test_ok("%s: counter %s increased %zu => %zu",
+ tst_name, cnt_name, before_cnt, after_cnt);
+ }
+ if (ao_addr)
+ test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected);
+
+out:
+ synchronize_threads(); /* test_kill_sk() */
+ if (sk > 0)
+ test_kill_sk(sk);
+}
+
+static void server_add_routes(void)
+{
+ int family = TEST_FAMILY;
+
+ synchronize_threads(); /* client_add_ips() */
+
+ if (ip_route_add(veth_name, family, this_ip_addr, client2))
+ test_error("Failed to add route");
+ if (ip_route_add(veth_name, family, this_ip_addr, client3))
+ test_error("Failed to add route");
+}
+
+static void server_add_fail_tests(unsigned int *port)
+{
+ union tcp_addr addr_any = {};
+
+ try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0,
+ &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD,
+ 1, 0);
+ try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any,
+ 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+ try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0,
+ NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+}
+
+static void server_vrf_tests(unsigned int *port)
+{
+ setup_vrfs();
+}
+
+static void *server_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+ union tcp_addr addr_any = {};
+
+ server_add_routes();
+
+ try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+ &addr_any, 0, 0, 100, 100, 0, "TCPAOGood",
+ TEST_CNT_GOOD, 0, 0);
+ try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0,
+ &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected",
+ 0, 1, FAULT_TIMEOUT);
+ try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0,
+ &addr_any, 0, 0, 100, 100, 0, "TCPAORequired",
+ TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+ try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+ &this_ip_dest, TEST_PREFIX, true,
+ 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0);
+ try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+ &this_ip_dest, TEST_PREFIX, true,
+ 100, 100, 0, "TCPAORequired",
+ TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT);
+
+ try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0,
+ NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+ 0, 1, FAULT_TIMEOUT);
+ try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0);
+ try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any,
+ 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound",
+ 0, 1, FAULT_TIMEOUT);
+
+ try_accept("no sign server: AO client", port++, NULL, 0,
+ NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound",
+ TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT);
+ try_accept("no sign server: MD5 client", port++, NULL, 0,
+ NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected",
+ 0, 1, FAULT_TIMEOUT);
+ try_accept("no sign server: no sign client", port++, NULL, 0,
+ NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0);
+
+ try_accept("AO+MD5 server: AO client (matching)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0);
+ try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+ 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND,
+ 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: MD5 client (matching)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, NULL, 0, 1, 0);
+ try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: no sign client (unmatched)", port++,
+ &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "CurrEstab", 0, 1, 0);
+ try_accept("AO+MD5 server: no sign client (misconfig, matching AO)",
+ port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPAORequired",
+ TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)",
+ port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT);
+
+ try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+ port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+ try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+ port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0,
+ 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT);
+
+ server_add_fail_tests(&port);
+
+ server_vrf_tests(&port);
+
+ /* client exits */
+ synchronize_threads();
+ return NULL;
+}
+
+static int client_bind(int sk, union tcp_addr bind_addr)
+{
+#ifdef IPV6_TEST
+ struct sockaddr_in6 addr = {
+ .sin6_family = AF_INET6,
+ .sin6_port = 0,
+ .sin6_addr = bind_addr.a6,
+ };
+#else
+ struct sockaddr_in addr = {
+ .sin_family = AF_INET,
+ .sin_port = 0,
+ .sin_addr = bind_addr.a4,
+ };
+#endif
+ return bind(sk, &addr, sizeof(addr));
+}
+
+static void try_connect(const char *tst_name, unsigned int port,
+ union tcp_addr *md5_addr, uint8_t md5_prefix,
+ union tcp_addr *ao_addr, uint8_t ao_prefix,
+ uint8_t sndid, uint8_t rcvid, uint8_t vrf,
+ fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr)
+{
+ time_t timeout;
+ int sk, ret;
+
+ if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+ return;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (bind_addr && client_bind(sk, *bind_addr))
+ test_error("bind()");
+
+ if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password))
+ test_error("setsockopt(TCP_MD5SIG_EXT)");
+
+ if (ao_addr && test_add_key(sk, ao_password, *ao_addr,
+ ao_prefix, sndid, rcvid))
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+
+ synchronize_threads(); /* preparations done */
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+ if (ret < 0) {
+ if (fault(KEYREJECT) && ret == -EKEYREJECTED)
+ test_ok("%s: connect() was prevented", tst_name);
+ else if (ret == -ETIMEDOUT && fault(TIMEOUT))
+ test_ok("%s", tst_name);
+ else if (ret == -ECONNREFUSED &&
+ (fault(TIMEOUT) || fault(KEYREJECT)))
+ test_ok("%s: refused to connect", tst_name);
+ else
+ test_error("%s: connect() returned %d", tst_name, ret);
+ goto out;
+ }
+
+ if (fault(TIMEOUT) || fault(KEYREJECT))
+ test_fail("%s: connected", tst_name);
+ else
+ test_ok("%s: connected", tst_name);
+
+out:
+ synchronize_threads(); /* test_kill_sk() */
+ /* _test_connect_socket() cleans up on failure */
+ if (ret > 0)
+ test_kill_sk(sk);
+}
+
+#define PREINSTALL_MD5_FIRST BIT(0)
+#define PREINSTALL_AO BIT(1)
+#define POSTINSTALL_AO BIT(2)
+#define PREINSTALL_MD5 BIT(3)
+#define POSTINSTALL_MD5 BIT(4)
+
+static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix,
+ int vrf, uint8_t sndid, uint8_t rcvid,
+ bool set_ao_required)
+{
+ uint8_t keyflags = 0;
+
+ if (vrf >= 0)
+ keyflags |= TCP_AO_KEYF_IFINDEX;
+ else
+ vrf = 0;
+ if (set_ao_required) {
+ int err = test_set_ao_flags(sk, true, 0);
+
+ if (err)
+ return err;
+ }
+ return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix,
+ (uint8_t)vrf, sndid, rcvid);
+}
+
+static bool test_continue(const char *tst_name, int err,
+ fault_t inj, bool added_ao)
+{
+ bool expected_to_fail;
+
+ expected_to_fail = fault(PREINSTALL_AO) && added_ao;
+ expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao;
+
+ if (!err) {
+ if (!expected_to_fail)
+ return true;
+ test_fail("%s: setsockopt()s were expected to fail", tst_name);
+ return false;
+ }
+ if (err != -EKEYREJECTED || !expected_to_fail) {
+ test_error("%s: setsockopt(%s) = %d", tst_name,
+ added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err);
+ return false;
+ }
+ test_ok("%s: prefailed as expected: %m", tst_name);
+ return false;
+}
+
+static int open_add(const char *tst_name, unsigned int port,
+ unsigned int strategy,
+ union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf,
+ union tcp_addr ao_addr, uint8_t ao_prefix,
+ int ao_vrf, bool set_ao_required,
+ uint8_t sndid, uint8_t rcvid,
+ fault_t inj)
+{
+ int sk;
+
+ sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP);
+ if (sk < 0)
+ test_error("socket()");
+
+ if (client_bind(sk, this_ip_addr))
+ test_error("bind()");
+
+ if (strategy & PREINSTALL_MD5_FIRST) {
+ if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password))
+ test_error("setsockopt(TCP_MD5SIG_EXT)");
+ }
+
+ if (strategy & PREINSTALL_AO) {
+ int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+ sndid, rcvid, set_ao_required);
+
+ if (!test_continue(tst_name, err, inj, true)) {
+ close(sk);
+ return -1;
+ }
+ }
+
+ if (strategy & PREINSTALL_MD5) {
+ errno = 0;
+ test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password);
+ if (!test_continue(tst_name, -errno, inj, false)) {
+ close(sk);
+ return -1;
+ }
+ }
+
+ return sk;
+}
+
+static void try_to_preadd(const char *tst_name, unsigned int port,
+ unsigned int strategy,
+ union tcp_addr md5_addr, uint8_t md5_prefix,
+ int md5_vrf,
+ union tcp_addr ao_addr, uint8_t ao_prefix,
+ int ao_vrf, bool set_ao_required,
+ uint8_t sndid, uint8_t rcvid,
+ int needs_tcp_md5, int needs_vrf, fault_t inj)
+{
+ int sk;
+
+ if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+ return;
+ if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF))
+ return;
+
+ sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+ ao_addr, ao_prefix, ao_vrf, set_ao_required,
+ sndid, rcvid, inj);
+ if (sk < 0)
+ return;
+
+ test_ok("%s", tst_name);
+ close(sk);
+}
+
+static void try_to_add(const char *tst_name, unsigned int port,
+ unsigned int strategy,
+ union tcp_addr md5_addr, uint8_t md5_prefix,
+ int md5_vrf,
+ union tcp_addr ao_addr, uint8_t ao_prefix,
+ int ao_vrf, uint8_t sndid, uint8_t rcvid,
+ int needs_tcp_md5, fault_t inj)
+{
+ time_t timeout;
+ int sk, ret;
+
+ if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5))
+ return;
+
+ sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf,
+ ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj);
+ if (sk < 0)
+ return;
+
+ synchronize_threads(); /* preparations done */
+
+ timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC;
+ ret = _test_connect_socket(sk, this_ip_dest, port, timeout);
+
+ if (ret <= 0) {
+ test_error("%s: connect() returned %d", tst_name, ret);
+ goto out;
+ }
+
+ if (strategy & POSTINSTALL_MD5) {
+ if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) {
+ if (fault(POSTINSTALL)) {
+ test_ok("%s: postfailed as expected", tst_name);
+ goto out;
+ } else {
+ test_error("setsockopt(TCP_MD5SIG_EXT)");
+ }
+ } else if (fault(POSTINSTALL)) {
+ test_fail("%s: post setsockopt() was expected to fail", tst_name);
+ goto out;
+ }
+ }
+
+ if (strategy & POSTINSTALL_AO) {
+ if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf,
+ sndid, rcvid, 0)) {
+ if (fault(POSTINSTALL)) {
+ test_ok("%s: postfailed as expected", tst_name);
+ goto out;
+ } else {
+ test_error("setsockopt(TCP_AO_ADD_KEY)");
+ }
+ } else if (fault(POSTINSTALL)) {
+ test_fail("%s: post setsockopt() was expected to fail", tst_name);
+ goto out;
+ }
+ }
+
+out:
+ synchronize_threads(); /* test_kill_sk() */
+ /* _test_connect_socket() cleans up on failure */
+ if (ret > 0)
+ test_kill_sk(sk);
+}
+
+static void client_add_ip(union tcp_addr *client, const char *ip)
+{
+ int err, family = TEST_FAMILY;
+
+ if (inet_pton(family, ip, client) != 1)
+ test_error("Can't convert ip address %s", ip);
+
+ err = ip_addr_add(veth_name, family, *client, TEST_PREFIX);
+ if (err)
+ test_error("Failed to add ip address: %d", err);
+}
+
+static void client_add_ips(void)
+{
+ client_add_ip(&client2, __TEST_CLIENT_IP(2));
+ client_add_ip(&client3, __TEST_CLIENT_IP(3));
+ synchronize_threads(); /* server_add_routes() */
+}
+
+static void client_add_fail_tests(unsigned int *port)
+{
+ try_to_add("TCP-AO established: add TCP-MD5 key",
+ (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+ 100, 100, 1, FAULT_POSTINSTALL);
+ try_to_add("TCP-MD5 established: add TCP-AO key",
+ (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO,
+ this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+ 100, 100, 1, FAULT_POSTINSTALL);
+ try_to_add("non-signed established: add TCP-AO key",
+ (*port)++, POSTINSTALL_AO,
+ this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0,
+ 100, 100, 0, FAULT_POSTINSTALL);
+
+ try_to_add("TCP-AO key intersects with existing TCP-MD5 key",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+ 100, 100, 1, FAULT_PREINSTALL_AO);
+ try_to_add("TCP-MD5 key intersects with existing TCP-AO key",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1,
+ 100, 100, 1, FAULT_PREINSTALL_MD5);
+
+ try_to_preadd("TCP-MD5 key + TCP-AO required",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, -1, true,
+ 100, 100, 1, 0, FAULT_PREINSTALL_AO);
+ try_to_preadd("TCP-AO required on socket + TCP-MD5 key",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, -1, true,
+ 100, 100, 1, 0, FAULT_PREINSTALL_MD5);
+}
+
+static void client_vrf_tests(unsigned int *port)
+{
+ setup_vrfs();
+
+ /* The following restrictions for setsockopt()s are expected:
+ *
+ * |--------------|-----------------|-------------|-------------|
+ * | | MD5 key without | MD5 key | MD5 key |
+ * | | l3index | l3index=0 | l3index=N |
+ * |--------------|-----------------|-------------|-------------|
+ * | TCP-AO key | | | |
+ * | without | reject | reject | reject |
+ * | l3index | | | |
+ * |--------------|-----------------|-------------|-------------|
+ * | TCP-AO key | | | |
+ * | l3index=0 | reject | reject | allow |
+ * |--------------|-----------------|-------------|-------------|
+ * | TCP-AO key | | | |
+ * | l3index=N | reject | allow | reject |
+ * |--------------|-----------------|-------------|-------------|
+ */
+ try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+ try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+ try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+
+ try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+ try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+ try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, 0);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, 0, 0, 100, 100,
+ 1, 1, 0);
+
+ try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, -1, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, -1,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+ try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+ 1, 1, 0);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, 0,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+ 1, 1, 0);
+ try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)",
+ (*port)++, PREINSTALL_MD5 | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_MD5);
+ try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)",
+ (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex,
+ this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100,
+ 1, 1, FAULT_PREINSTALL_AO);
+}
+
+static void *client_fn(void *arg)
+{
+ unsigned int port = test_server_port;
+ union tcp_addr addr_any = {};
+
+ client_add_ips();
+
+ try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0,
+ &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0,
+ NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+ try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0,
+ &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+ try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0,
+ NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2);
+
+ try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0,
+ &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0,
+ NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0,
+ NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+
+ try_connect("no sign server: AO client", port++, NULL, 0,
+ &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr);
+ try_connect("no sign server: MD5 client", port++, &addr_any, 0,
+ NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr);
+ try_connect("no sign server: no sign client", port++, NULL, 0,
+ NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr);
+
+ try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0,
+ &addr_any, 0, 100, 100, 0, 0, 1, &client2);
+ try_connect("AO+MD5 server: AO client (misconfig, matching MD5)",
+ port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+ FAULT_TIMEOUT, 1, &this_ip_addr);
+ try_connect("AO+MD5 server: AO client (misconfig, non-matching)",
+ port++, NULL, 0, &addr_any, 0, 100, 100, 0,
+ FAULT_TIMEOUT, 1, &client3);
+ try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0,
+ NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr);
+ try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)",
+ port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+ 1, &client2);
+ try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)",
+ port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+ 1, &client3);
+ try_connect("AO+MD5 server: no sign client (unmatched)",
+ port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3);
+ try_connect("AO+MD5 server: no sign client (misconfig, matching AO)",
+ port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+ 1, &client2);
+ try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)",
+ port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT,
+ 1, &this_ip_addr);
+
+ try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys",
+ port++, &this_ip_addr, TEST_PREFIX,
+ &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+ 1, &this_ip_addr);
+ try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys",
+ port++, &this_ip_addr, TEST_PREFIX,
+ &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT,
+ 1, &client2);
+
+ client_add_fail_tests(&port);
+ client_vrf_tests(&port);
+
+ return NULL;
+}
+
+int main(int argc, char *argv[])
+{
+ test_init(72, server_fn, client_fn);
+ return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_fastopen_backup_key.c b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
index 9c55ec44fc43..c1cb0c75156a 100644
--- a/tools/testing/selftests/net/tcp_fastopen_backup_key.c
+++ b/tools/testing/selftests/net/tcp_fastopen_backup_key.c
@@ -26,6 +26,8 @@
#include <fcntl.h>
#include <time.h>
+#include "../kselftest.h"
+
#ifndef TCP_FASTOPEN_KEY
#define TCP_FASTOPEN_KEY 33
#endif
@@ -34,10 +36,6 @@
#define PROC_FASTOPEN_KEY "/proc/sys/net/ipv4/tcp_fastopen_key"
#define KEY_LENGTH 16
-#ifndef ARRAY_SIZE
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-#endif
-
static bool do_ipv6;
static bool do_sockopt;
static bool do_rotate;
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
index 4555f88252ba..4fcce5150850 100644
--- a/tools/testing/selftests/net/tcp_mmap.c
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -66,11 +66,16 @@
#include <poll.h>
#include <linux/tcp.h>
#include <assert.h>
+#include <openssl/pem.h>
#ifndef MSG_ZEROCOPY
#define MSG_ZEROCOPY 0x4000000
#endif
+#ifndef min
+#define min(a, b) ((a) < (b) ? (a) : (b))
+#endif
+
#define FILE_SZ (1ULL << 35)
static int cfg_family = AF_INET6;
static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
@@ -81,12 +86,14 @@ static int sndbuf; /* Default: autotuning. Can be set with -w <integer> option
static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
static int xflg; /* hash received data (simple xor) (-h option) */
static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+static int integrity; /* -i option: sender and receiver compute sha256 over the data.*/
static size_t chunk_size = 512*1024;
static size_t map_align;
unsigned long htotal;
+unsigned int digest_len;
static inline void prefetch(const void *x)
{
@@ -123,18 +130,57 @@ void hash_zone(void *zone, unsigned int length)
#define ALIGN_UP(x, align_to) (((x) + ((align_to)-1)) & ~((align_to)-1))
#define ALIGN_PTR_UP(p, ptr_align_to) ((typeof(p))ALIGN_UP((unsigned long)(p), ptr_align_to))
+
+static void *mmap_large_buffer(size_t need, size_t *allocated)
+{
+ void *buffer;
+ size_t sz;
+
+ /* Attempt to use huge pages if possible. */
+ sz = ALIGN_UP(need, map_align);
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB, -1, 0);
+
+ if (buffer == (void *)-1) {
+ sz = need;
+ buffer = mmap(NULL, sz, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE,
+ -1, 0);
+ if (buffer != (void *)-1)
+ fprintf(stderr, "MAP_HUGETLB attempt failed, look at /sys/kernel/mm/hugepages for optimal performance\n");
+ }
+ *allocated = sz;
+ return buffer;
+}
+
+static uint32_t tcp_info_get_rcv_mss(int fd)
+{
+ socklen_t sz = sizeof(struct tcp_info);
+ struct tcp_info info;
+
+ if (getsockopt(fd, IPPROTO_TCP, TCP_INFO, &info, &sz)) {
+ fprintf(stderr, "Error fetching TCP_INFO\n");
+ return 0;
+ }
+
+ return info.tcpi_rcv_mss;
+}
+
void *child_thread(void *arg)
{
+ unsigned char digest[SHA256_DIGEST_LENGTH];
unsigned long total_mmap = 0, total = 0;
struct tcp_zerocopy_receive zc;
+ unsigned char *buffer = NULL;
unsigned long delta_usec;
+ EVP_MD_CTX *ctx = NULL;
int flags = MAP_SHARED;
struct timeval t0, t1;
- char *buffer = NULL;
void *raddr = NULL;
void *addr = NULL;
double throughput;
struct rusage ru;
+ size_t buffer_sz;
int lu, fd;
fd = (int)(unsigned long)arg;
@@ -142,9 +188,9 @@ void *child_thread(void *arg)
gettimeofday(&t0, NULL);
fcntl(fd, F_SETFL, O_NDELAY);
- buffer = malloc(chunk_size);
- if (!buffer) {
- perror("malloc");
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (void *)-1) {
+ perror("mmap");
goto error;
}
if (zflg) {
@@ -156,6 +202,14 @@ void *child_thread(void *arg)
addr = ALIGN_PTR_UP(raddr, map_align);
}
}
+ if (integrity) {
+ ctx = EVP_MD_CTX_new();
+ if (!ctx) {
+ perror("cannot enable SHA computing");
+ goto error;
+ }
+ EVP_DigestInit_ex(ctx, EVP_sha256(), NULL);
+ }
while (1) {
struct pollfd pfd = { .fd = fd, .events = POLLIN, };
int sub;
@@ -167,7 +221,7 @@ void *child_thread(void *arg)
memset(&zc, 0, sizeof(zc));
zc.address = (__u64)((unsigned long)addr);
- zc.length = chunk_size;
+ zc.length = min(chunk_size, FILE_SZ - total);
res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
&zc, &zc_len);
@@ -176,29 +230,43 @@ void *child_thread(void *arg)
if (zc.length) {
assert(zc.length <= chunk_size);
+ if (integrity)
+ EVP_DigestUpdate(ctx, addr, zc.length);
total_mmap += zc.length;
if (xflg)
hash_zone(addr, zc.length);
+ /* It is more efficient to unmap the pages right now,
+ * instead of doing this in next TCP_ZEROCOPY_RECEIVE.
+ */
+ madvise(addr, zc.length, MADV_DONTNEED);
total += zc.length;
}
if (zc.recv_skip_hint) {
assert(zc.recv_skip_hint <= chunk_size);
- lu = read(fd, buffer, zc.recv_skip_hint);
+ lu = read(fd, buffer, min(zc.recv_skip_hint,
+ FILE_SZ - total));
if (lu > 0) {
+ if (integrity)
+ EVP_DigestUpdate(ctx, buffer, lu);
if (xflg)
hash_zone(buffer, lu);
total += lu;
}
+ if (lu == 0)
+ goto end;
}
continue;
}
sub = 0;
while (sub < chunk_size) {
- lu = read(fd, buffer + sub, chunk_size - sub);
+ lu = read(fd, buffer + sub, min(chunk_size - sub,
+ FILE_SZ - total));
if (lu == 0)
goto end;
if (lu < 0)
break;
+ if (integrity)
+ EVP_DigestUpdate(ctx, buffer + sub, lu);
if (xflg)
hash_zone(buffer + sub, lu);
total += lu;
@@ -209,6 +277,20 @@ end:
gettimeofday(&t1, NULL);
delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+ if (integrity) {
+ fcntl(fd, F_SETFL, 0);
+ EVP_DigestFinal_ex(ctx, digest, &digest_len);
+ lu = read(fd, buffer, SHA256_DIGEST_LENGTH);
+ if (lu != SHA256_DIGEST_LENGTH)
+ perror("Error: Cannot read SHA256\n");
+
+ if (memcmp(digest, buffer,
+ SHA256_DIGEST_LENGTH))
+ fprintf(stderr, "Error: SHA256 of the data is not right\n");
+ else
+ printf("\nSHA256 is correct\n");
+ }
+
throughput = 0;
if (delta_usec)
throughput = total * 8.0 / (double)delta_usec / 1000.0;
@@ -219,7 +301,7 @@ end:
total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
- " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+ " cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches, rcv_mss %u\n",
total / (1024.0 * 1024.0),
100.0*total_mmap/total,
(double)delta_usec / 1000000.0,
@@ -227,10 +309,11 @@ end:
(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
(double)total_usec/mb,
- ru.ru_nvcsw);
+ ru.ru_nvcsw,
+ tcp_info_get_rcv_mss(fd));
}
error:
- free(buffer);
+ munmap(buffer, buffer_sz);
close(fd);
if (zflg)
munmap(raddr, chunk_size + map_align);
@@ -340,18 +423,38 @@ static unsigned long default_huge_page_size(void)
return hps;
}
+static void randomize(void *target, size_t count)
+{
+ static int urandom = -1;
+ ssize_t got;
+
+ urandom = open("/dev/urandom", O_RDONLY);
+ if (urandom < 0) {
+ perror("open /dev/urandom");
+ exit(1);
+ }
+ got = read(urandom, target, count);
+ if (got != count) {
+ perror("read /dev/urandom");
+ exit(1);
+ }
+}
+
int main(int argc, char *argv[])
{
+ unsigned char digest[SHA256_DIGEST_LENGTH];
struct sockaddr_storage listenaddr, addr;
unsigned int max_pacing_rate = 0;
- size_t total = 0;
+ EVP_MD_CTX *ctx = NULL;
+ unsigned char *buffer;
+ uint64_t total = 0;
char *host = NULL;
int fd, c, on = 1;
- char *buffer;
+ size_t buffer_sz;
int sflg = 0;
int mss = 0;
- while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:")) != -1) {
+ while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:C:a:i")) != -1) {
switch (c) {
case '4':
cfg_family = PF_INET;
@@ -397,6 +500,9 @@ int main(int argc, char *argv[])
case 'a':
map_align = atol(optarg);
break;
+ case 'i':
+ integrity = 1;
+ break;
default:
exit(1);
}
@@ -437,9 +543,9 @@ int main(int argc, char *argv[])
}
do_accept(fdlisten);
}
- buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
- if (buffer == (char *)-1) {
+
+ buffer = mmap_large_buffer(chunk_size, &buffer_sz);
+ if (buffer == (unsigned char *)-1) {
perror("mmap");
exit(1);
}
@@ -472,18 +578,35 @@ int main(int argc, char *argv[])
perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
zflg = 0;
}
+ if (integrity) {
+ randomize(buffer, buffer_sz);
+ ctx = EVP_MD_CTX_new();
+ if (!ctx) {
+ perror("cannot enable SHA computing");
+ exit(1);
+ }
+ EVP_DigestInit_ex(ctx, EVP_sha256(), NULL);
+ }
while (total < FILE_SZ) {
- ssize_t wr = FILE_SZ - total;
-
- if (wr > chunk_size)
- wr = chunk_size;
- /* Note : we just want to fill the pipe with 0 bytes */
- wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+ size_t offset = total % chunk_size;
+ int64_t wr = FILE_SZ - total;
+
+ if (wr > chunk_size - offset)
+ wr = chunk_size - offset;
+ /* Note : we just want to fill the pipe with random bytes */
+ wr = send(fd, buffer + offset,
+ (size_t)wr, zflg ? MSG_ZEROCOPY : 0);
if (wr <= 0)
break;
+ if (integrity)
+ EVP_DigestUpdate(ctx, buffer + offset, wr);
total += wr;
}
+ if (integrity && total == FILE_SZ) {
+ EVP_DigestFinal_ex(ctx, digest, &digest_len);
+ send(fd, digest, (size_t)SHA256_DIGEST_LENGTH, 0);
+ }
close(fd);
- munmap(buffer, chunk_size);
+ munmap(buffer, buffer_sz);
return 0;
}
diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh
new file mode 100755
index 000000000000..1b3f89e2b86e
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_backup_port.sh
@@ -0,0 +1,775 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge backup port and backup nexthop ID
+# functionality. The topology consists of two bridge (VTEPs) connected using
+# VXLAN. The test checks that when the switch port (swp1) is down, traffic is
+# redirected to the VXLAN port (vx0). When a backup nexthop ID is configured,
+# the test checks that traffic is redirected with the correct nexthop
+# information.
+#
+# +------------------------------------+ +------------------------------------+
+# | + swp1 + vx0 | | + swp1 + vx0 |
+# | | | | | | | |
+# | | br0 | | | | | |
+# | +------------+-----------+ | | +------------+-----------+ |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0.10 | | br0.10 |
+# | 192.0.2.65/28 | | 192.0.2.66/28 |
+# | | | |
+# | | | |
+# | 192.0.2.33 | | 192.0.2.34 |
+# | + lo | | + lo |
+# | | | |
+# | | | |
+# | 192.0.2.49/28 | | 192.0.2.50/28 |
+# | veth0 +-------+ veth0 |
+# | | | |
+# | sw1 | | sw2 |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ backup_port
+ backup_nhid
+ backup_nhid_invalid
+ backup_nhid_ping
+ backup_nhid_torture
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+PING_TIMEOUT=5
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+bridge_link_check()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local state=$1; shift
+
+ bridge -n $ns -d -j link show dev $dev | \
+ jq -e ".[][\"state\"] == \"$state\"" &> /dev/null
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+ local ns=$1; shift
+
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+ local ns
+
+ setup_ns sw1 sw2
+ for ns in $sw1 $sw2; do
+ setup_topo_ns $ns
+ done
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $sw1 name veth0
+ ip link set dev veth1 netns $sw2 name veth0
+}
+
+setup_sw_common()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+ local remote_addr=$1; shift
+ local veth_addr=$1; shift
+ local gw_addr=$1; shift
+ local br_addr=$1; shift
+
+ ip -n $ns address add $local_addr/32 dev lo
+
+ ip -n $ns link set dev veth0 up
+ ip -n $ns address add $veth_addr/28 dev veth0
+ ip -n $ns route add default via $gw_addr
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+ ip -n $ns address add $br_addr/28 dev br0.10
+
+ ip -n $ns link add name swp1 up type dummy
+ ip -n $ns link set dev swp1 master br0
+ bridge -n $ns vlan add vid 10 dev swp1 untagged
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 nolearning external
+ bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+}
+
+setup_sw1()
+{
+ local ns=$sw1
+ local local_addr=192.0.2.33
+ local remote_addr=192.0.2.34
+ local veth_addr=192.0.2.49
+ local gw_addr=192.0.2.50
+ local br_addr=192.0.2.65
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+ $br_addr
+}
+
+setup_sw2()
+{
+ local ns=$sw2
+ local local_addr=192.0.2.34
+ local remote_addr=192.0.2.33
+ local veth_addr=192.0.2.50
+ local gw_addr=192.0.2.49
+ local br_addr=192.0.2.66
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr \
+ $br_addr
+}
+
+setup()
+{
+ set -e
+
+ setup_topo
+ setup_sw1
+ setup_sw2
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ cleanup_ns $sw1 $sw2
+}
+
+################################################################################
+# Tests
+
+backup_port()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+
+ echo
+ echo "Backup port"
+ echo "-----------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ # Initial state - check that packets are forwarded out of swp1 when it
+ # has a carrier and not forwarded out of any port when it does not have
+ # a carrier.
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 carrier on"
+
+ # Configure vx0 as the backup port of swp1 and check that packets are
+ # forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+ # does not have a carrier.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 carrier on"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ # Remove vx0 as the backup port of swp1 and check that packets are no
+ # longer forwarded out of vx0 when swp1 does not have a carrier.
+ run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 1 "vx0 not configured as backup port of swp1"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+}
+
+backup_nhid()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+
+ echo
+ echo "Backup nexthop ID"
+ echo "-----------------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010"
+
+ run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo"
+
+ # The first filter matches on packets forwarded using the backup
+ # nexthop ID and the second filter matches on packets forwarded using a
+ # regular VXLAN FDB entry.
+ run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass"
+
+ # Configure vx0 as the backup port of swp1 and check that packets are
+ # forwarded out of swp1 when it has a carrier and out of vx0 when swp1
+ # does not have a carrier. When packets are forwarded out of vx0, check
+ # that they are forwarded by the VXLAN FDB entry.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 0
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 1
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 0
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "Forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 carrier on"
+
+ # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check
+ # that when packets are forwarded out of vx0, they are forwarded using
+ # the backup nexthop ID.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "No forwarding out of vx0"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 2
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "Forwarding using backup nexthop ID"
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier on"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding
+ log_test $? 0 "swp1 carrier on"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 3
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ # Reset the backup nexthop ID to 0 and check that packets are no longer
+ # forwarded using the backup nexthop ID when swp1 does not have a
+ # carrier and are instead forwarded by the VXLAN FDB.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\""
+ log_test $? 1 "No backup nexthop ID configured for swp1"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "Forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "No forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets $sw2 "dev vx0 ingress" 102 1
+ log_test $? 0 "No forwarding using VXLAN FDB entry"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 4
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ tc_check_packets $sw2 "dev vx0 ingress" 102 2
+ log_test $? 0 "Forwarding using VXLAN FDB entry"
+}
+
+backup_nhid_invalid()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+ local tx_drop
+
+ echo
+ echo "Backup nexthop ID - invalid IDs"
+ echo "-------------------------------"
+
+ # Check that when traffic is redirected with an invalid nexthop ID, it
+ # is forwarded out of the VXLAN port, but dropped by the VXLAN driver
+ # and does not crash the host.
+
+ run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact"
+ run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass"
+ # Drop all other Tx traffic to avoid changes to Tx drop counter.
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop"
+
+ tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]')
+
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass"
+
+ # First, check that redirection works.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\""
+ log_test $? 0 "vx0 configured as backup port of swp1"
+
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\""
+ log_test $? 0 "Valid nexthop as backup nexthop"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ log_test $? 0 "swp1 carrier off"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "Forwarding using backup nexthop ID"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'"
+ log_test $? 0 "No Tx drop increase"
+
+ # Use a non-existent nexthop ID.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\""
+ log_test $? 0 "Non-existent nexthop as backup nexthop"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'"
+ log_test $? 0 "Tx drop increased"
+
+ # Use a blckhole nexthop.
+ run_cmd "ip -n $sw1 nexthop replace id 30 blackhole"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\""
+ log_test $? 0 "Blackhole nexthop as backup nexthop"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'"
+ log_test $? 0 "Tx drop increased"
+
+ # Non-group FDB nexthop.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\""
+ log_test $? 0 "Non-group FDB nexthop as backup nexthop"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'"
+ log_test $? 0 "Tx drop increased"
+
+ # IPv6 address family nexthop.
+ run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300"
+ run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\""
+ log_test $? 0 "IPv6 address family nexthop as backup nexthop"
+
+ run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1"
+ tc_check_packets $sw1 "dev swp1 egress" 101 0
+ log_test $? 0 "No forwarding out of swp1"
+ tc_check_packets $sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "Forwarding out of vx0"
+ tc_check_packets $sw2 "dev vx0 ingress" 101 1
+ log_test $? 0 "No forwarding using backup nexthop ID"
+ run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'"
+ log_test $? 0 "Tx drop increased"
+}
+
+backup_nhid_ping()
+{
+ local sw1_mac
+ local sw2_mac
+
+ echo
+ echo "Backup nexthop ID - ping"
+ echo "------------------------"
+
+ # Test bidirectional traffic when traffic is redirected in both VTEPs.
+ sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]')
+ sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]')
+
+ run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10"
+ run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10"
+
+ run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10"
+ run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10"
+
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb"
+ run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb"
+
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10"
+
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled
+ run_cmd "ip -n $sw2 link set dev swp1 carrier off"
+ busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled
+
+ run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ log_test $? 0 "Ping with backup nexthop ID"
+
+ # Reset the backup nexthop ID to 0 and check that ping fails.
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0"
+ run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0"
+
+ run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66"
+ log_test $? 1 "Ping after disabling backup nexthop ID"
+}
+
+backup_nhid_add_del_loop()
+{
+ while true; do
+ ip -n $sw1 nexthop del id 10
+ ip -n $sw1 nexthop replace id 10 group 1/2 fdb
+ done >/dev/null 2>&1
+}
+
+backup_nhid_torture()
+{
+ local dmac=00:11:22:33:44:55
+ local smac=00:aa:bb:cc:dd:ee
+ local pid1
+ local pid2
+ local pid3
+
+ echo
+ echo "Backup nexthop ID - torture test"
+ echo "--------------------------------"
+
+ # Continuously send traffic through the backup nexthop while adding and
+ # deleting the group. The test is considered successful if nothing
+ # crashed.
+
+ run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb"
+ run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb"
+
+ run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10"
+
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0"
+ run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10"
+ run_cmd "ip -n $sw1 link set dev swp1 carrier off"
+
+ backup_nhid_add_del_loop &
+ pid1=$!
+ ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 &
+ pid2=$!
+
+ sleep 30
+ kill -9 $pid1 $pid2
+ wait $pid1 $pid2 2>/dev/null
+
+ log_test 0 0 "Torture test"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+ -w Timeout for ping
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvhw:" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ w) PING_TIMEOUT=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+ echo "SKIP: Could not run test without tc tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "backup_nhid"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing backup nexthop ID support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
new file mode 100755
index 000000000000..8533393a4f18
--- /dev/null
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -0,0 +1,855 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking bridge neighbor suppression functionality. The
+# topology consists of two bridges (VTEPs) connected using VXLAN. A single
+# host is connected to each bridge over multiple VLANs. The test checks that
+# ARP/NS messages from the first host are suppressed on the VXLAN port when
+# should.
+#
+# +-----------------------+ +------------------------+
+# | h1 | | h2 |
+# | | | |
+# | + eth0.10 | | + eth0.10 |
+# | | 192.0.2.1/28 | | | 192.0.2.2/28 |
+# | | 2001:db8:1::1/64 | | | 2001:db8:1::2/64 |
+# | | | | | |
+# | | + eth0.20 | | | + eth0.20 |
+# | \ | 192.0.2.17/28 | | \ | 192.0.2.18/28 |
+# | \ | 2001:db8:2::1/64 | | \ | 2001:db8:2::2/64 |
+# | \| | | \| |
+# | + eth0 | | + eth0 |
+# +----|------------------+ +----|-------------------+
+# | |
+# | |
+# +----|-------------------------------+ +----|-------------------------------+
+# | + swp1 + vx0 | | + swp1 + vx0 |
+# | | | | | | | |
+# | | br0 | | | | | |
+# | +------------+-----------+ | | +------------+-----------+ |
+# | | | | | |
+# | | | | | |
+# | +---+---+ | | +---+---+ |
+# | | | | | | | |
+# | | | | | | | |
+# | + + | | + + |
+# | br0.10 br0.20 | | br0.10 br0.20 |
+# | | | |
+# | 192.0.2.33 | | 192.0.2.34 |
+# | + lo | | + lo |
+# | | | |
+# | | | |
+# | 192.0.2.49/28 | | 192.0.2.50/28 |
+# | veth0 +-------+ veth0 |
+# | | | |
+# | sw1 | | sw2 |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ neigh_suppress_arp
+ neigh_suppress_ns
+ neigh_vlan_suppress_arp
+ neigh_vlan_suppress_ns
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_topo_ns()
+{
+ local ns=$1; shift
+
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+setup_topo()
+{
+ local ns
+
+ setup_ns h1 h2 sw1 sw2
+ for ns in $h1 $h2 $sw1 $sw2; do
+ setup_topo_ns $ns
+ done
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $h1 name eth0
+ ip link set dev veth1 netns $sw1 name swp1
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $sw1 name veth0
+ ip link set dev veth1 netns $sw2 name veth0
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $h2 name eth0
+ ip link set dev veth1 netns $sw2 name swp1
+}
+
+setup_host_common()
+{
+ local ns=$1; shift
+ local v4addr1=$1; shift
+ local v4addr2=$1; shift
+ local v6addr1=$1; shift
+ local v6addr2=$1; shift
+
+ ip -n $ns link set dev eth0 up
+ ip -n $ns link add link eth0 name eth0.10 up type vlan id 10
+ ip -n $ns link add link eth0 name eth0.20 up type vlan id 20
+
+ ip -n $ns address add $v4addr1 dev eth0.10
+ ip -n $ns address add $v4addr2 dev eth0.20
+ ip -n $ns address add $v6addr1 dev eth0.10
+ ip -n $ns address add $v6addr2 dev eth0.20
+}
+
+setup_h1()
+{
+ local ns=$h1
+ local v4addr1=192.0.2.1/28
+ local v4addr2=192.0.2.17/28
+ local v6addr1=2001:db8:1::1/64
+ local v6addr2=2001:db8:2::1/64
+
+ setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_h2()
+{
+ local ns=$h2
+ local v4addr1=192.0.2.2/28
+ local v4addr2=192.0.2.18/28
+ local v6addr1=2001:db8:1::2/64
+ local v6addr2=2001:db8:2::2/64
+
+ setup_host_common $ns $v4addr1 $v4addr2 $v6addr1 $v6addr2
+}
+
+setup_sw_common()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+ local remote_addr=$1; shift
+ local veth_addr=$1; shift
+ local gw_addr=$1; shift
+
+ ip -n $ns address add $local_addr/32 dev lo
+
+ ip -n $ns link set dev veth0 up
+ ip -n $ns address add $veth_addr/28 dev veth0
+ ip -n $ns route add default via $gw_addr
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+
+ ip -n $ns link add link br0 name br0.20 up type vlan id 20
+ bridge -n $ns vlan add vid 20 dev br0 self
+
+ ip -n $ns link set dev swp1 up master br0
+ bridge -n $ns vlan add vid 10 dev swp1
+ bridge -n $ns vlan add vid 20 dev swp1
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 nolearning external
+ bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+ dst $remote_addr src_vni 10010
+ bridge -n $ns fdb add 00:00:00:00:00:00 dev vx0 self static \
+ dst $remote_addr src_vni 10020
+ bridge -n $ns link set dev vx0 vlan_tunnel on learning off
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+
+ bridge -n $ns vlan add vid 20 dev vx0
+ bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+}
+
+setup_sw1()
+{
+ local ns=$sw1
+ local local_addr=192.0.2.33
+ local remote_addr=192.0.2.34
+ local veth_addr=192.0.2.49
+ local gw_addr=192.0.2.50
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup_sw2()
+{
+ local ns=$sw2
+ local local_addr=192.0.2.34
+ local remote_addr=192.0.2.33
+ local veth_addr=192.0.2.50
+ local gw_addr=192.0.2.49
+
+ setup_sw_common $ns $local_addr $remote_addr $veth_addr $gw_addr
+}
+
+setup()
+{
+ set -e
+
+ setup_topo
+ setup_h1
+ setup_h2
+ setup_sw1
+ setup_sw2
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ cleanup_ns $h1 $h2 $sw1 $sw2
+}
+
+################################################################################
+# Tests
+
+neigh_suppress_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local h2_mac
+
+ echo
+ echo "Per-port ARP suppression - VLAN $vid"
+ echo "----------------------------------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass"
+
+ # Initial state - check that ARP requests are not suppressed and that
+ # ARP replies are received.
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression"
+
+ # Enable neighbor suppression and check that nothing changes compared
+ # to the initial state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression"
+
+ # Install an FDB entry for the remote host and check that nothing
+ # changes compared to the initial state.
+ h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+ log_test $? 0 "FDB entry installation"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ # Install a neighbor on the matching SVI interface and check that ARP
+ # requests are suppressed.
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid"
+ log_test $? 0 "Neighbor entry installation"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ # Take the second host down and check that ARP requests are suppressed
+ # and that ARP replies are received.
+ run_cmd "ip -n $h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "ARP suppression"
+
+ run_cmd "ip -n $h2 link set dev eth0.$vid up"
+ log_test $? 0 "H2 up"
+
+ # Disable neighbor suppression and check that ARP requests are no
+ # longer suppressed.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 0 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "ARP suppression"
+
+ # Take the second host down and check that ARP requests are not
+ # suppressed and that ARP replies are not received.
+ run_cmd "ip -n $h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip"
+ log_test $? 1 "arping"
+ tc_check_packets $sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "ARP suppression"
+}
+
+neigh_suppress_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_arp_common $vid $sip $tip
+}
+
+neigh_suppress_ns_common()
+{
+ local vid=$1; shift
+ local saddr=$1; shift
+ local daddr=$1; shift
+ local maddr=$1; shift
+ local h2_mac
+
+ echo
+ echo "Per-port NS suppression - VLAN $vid"
+ echo "---------------------------------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass"
+
+ # Initial state - check that NS messages are not suppressed and that ND
+ # messages are received.
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression"
+
+ # Enable neighbor suppression and check that nothing changes compared
+ # to the initial state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression"
+
+ # Install an FDB entry for the remote host and check that nothing
+ # changes compared to the initial state.
+ h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid"
+ log_test $? 0 "FDB entry installation"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ # Install a neighbor on the matching SVI interface and check that NS
+ # messages are suppressed.
+ run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid"
+ log_test $? 0 "Neighbor entry installation"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ # Take the second host down and check that NS messages are suppressed
+ # and that ND messages are received.
+ run_cmd "ip -n $h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 3
+ log_test $? 0 "NS suppression"
+
+ run_cmd "ip -n $h2 link set dev eth0.$vid up"
+ log_test $? 0 "H2 up"
+
+ # Disable neighbor suppression and check that NS messages are no longer
+ # suppressed.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 0 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 4
+ log_test $? 0 "NS suppression"
+
+ # Take the second host down and check that NS messages are not
+ # suppressed and that ND messages are not received.
+ run_cmd "ip -n $h2 link set dev eth0.$vid down"
+ log_test $? 0 "H2 down"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid"
+ log_test $? 2 "ndisc6"
+ tc_check_packets $sw1 "dev vx0 egress" 101 5
+ log_test $? 0 "NS suppression"
+}
+
+neigh_suppress_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local maddr=ff02::1:ff00:2
+
+ neigh_suppress_ns_common $vid $saddr $daddr $maddr
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ maddr=ff02::1:ff00:2
+
+ neigh_suppress_ns_common $vid $saddr $daddr $maddr
+}
+
+neigh_vlan_suppress_arp()
+{
+ local vid1=10
+ local vid2=20
+ local sip1=192.0.2.1
+ local sip2=192.0.2.17
+ local tip1=192.0.2.2
+ local tip2=192.0.2.18
+ local h2_mac1
+ local h2_mac2
+
+ echo
+ echo "Per-{Port, VLAN} ARP suppression"
+ echo "--------------------------------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass"
+
+ h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+ h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+ run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+ run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+ # Enable per-{Port, VLAN} neighbor suppression and check that ARP
+ # requests are not suppressed and that ARP replies are received.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 1
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on VLAN 10 and check that only on this
+ # VLAN ARP requests are suppressed.
+ run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 2
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 3
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 4
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on VLAN 10 and check that ARP requests
+ # are no longer suppressed on this VLAN.
+ run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+
+ # Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+ # suppression on the port and check that on both VLANs ARP requests are
+ # suppressed.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1"
+ log_test $? 0 "arping (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2"
+ log_test $? 0 "arping (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "ARP suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "ARP suppression (VLAN $vid2)"
+}
+
+neigh_vlan_suppress_ns()
+{
+ local vid1=10
+ local vid2=20
+ local saddr1=2001:db8:1::1
+ local saddr2=2001:db8:2::1
+ local daddr1=2001:db8:1::2
+ local daddr2=2001:db8:2::2
+ local maddr=ff02::1:ff00:2
+ local h2_mac1
+ local h2_mac2
+
+ echo
+ echo "Per-{Port, VLAN} NS suppression"
+ echo "-------------------------------"
+
+ run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass"
+ run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass"
+
+ h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]')
+ h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1"
+ run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2"
+ run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1"
+ run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2"
+
+ # Enable per-{Port, VLAN} neighbor suppression and check that NS
+ # messages are not suppressed and that ND messages are received.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 1
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on VLAN 10 and check that only on this
+ # VLAN NS messages are suppressed.
+ run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 2
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Enable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 3
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on the port and check that it has no
+ # effect compared to previous state.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 1
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 4
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable neighbor suppression on VLAN 10 and check that NS messages
+ # are no longer suppressed on this VLAN.
+ run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off"
+ run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\""
+ log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+
+ # Disable per-{Port, VLAN} neighbor suppression, enable neighbor
+ # suppression on the port and check that on both VLANs NS messages are
+ # suppressed.
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\""
+ log_test $? 0 "\"neigh_vlan_suppress\" is off"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1"
+ log_test $? 0 "ndisc6 (VLAN $vid1)"
+ run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2"
+ log_test $? 0 "ndisc6 (VLAN $vid2)"
+
+ tc_check_packets $sw1 "dev vx0 egress" 101 2
+ log_test $? 0 "NS suppression (VLAN $vid1)"
+ tc_check_packets $sw1 "dev vx0 egress" 102 5
+ log_test $? 0 "NS suppression (VLAN $vid2)"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v tc)" ]; then
+ echo "SKIP: Could not run test without tc tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v arping)" ]; then
+ echo "SKIP: Could not run test without arping tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ndisc6)" ]; then
+ echo "SKIP: Could not run test without ndisc6 tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_ingress_egress_chaining.sh b/tools/testing/selftests/net/test_ingress_egress_chaining.sh
new file mode 100644
index 000000000000..08adff6bb3b6
--- /dev/null
+++ b/tools/testing/selftests/net/test_ingress_egress_chaining.sh
@@ -0,0 +1,79 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test runs a simple ingress tc setup between two veth pairs,
+# and chains a single egress rule to test ingress chaining to egress.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+needed_mods="act_mirred cls_flower sch_ingress"
+for mod in $needed_mods; do
+ modinfo $mod &>/dev/null || { echo "SKIP: Need act_mirred module"; exit $ksft_skip; }
+done
+
+ns="ns$((RANDOM%899+100))"
+veth1="veth1$((RANDOM%899+100))"
+veth2="veth2$((RANDOM%899+100))"
+peer1="peer1$((RANDOM%899+100))"
+peer2="peer2$((RANDOM%899+100))"
+ip_peer1=198.51.100.5
+ip_peer2=198.51.100.6
+
+function fail() {
+ echo "FAIL: $@" >> /dev/stderr
+ exit 1
+}
+
+function cleanup() {
+ killall -q -9 udpgso_bench_rx
+ ip link del $veth1 &> /dev/null
+ ip link del $veth2 &> /dev/null
+ ip netns del $ns &> /dev/null
+}
+trap cleanup EXIT
+
+function config() {
+ echo "Setup veth pairs [$veth1, $peer1], and veth pair [$veth2, $peer2]"
+ ip link add $veth1 type veth peer name $peer1
+ ip link add $veth2 type veth peer name $peer2
+ ip addr add $ip_peer1/24 dev $peer1
+ ip link set $peer1 up
+ ip netns add $ns
+ ip link set dev $peer2 netns $ns
+ ip netns exec $ns ip addr add $ip_peer2/24 dev $peer2
+ ip netns exec $ns ip link set $peer2 up
+ ip link set $veth1 up
+ ip link set $veth2 up
+
+ echo "Add tc filter ingress->egress forwarding $veth1 <-> $veth2"
+ tc qdisc add dev $veth2 ingress
+ tc qdisc add dev $veth1 ingress
+ tc filter add dev $veth2 ingress prio 1 proto all flower \
+ action mirred egress redirect dev $veth1
+ tc filter add dev $veth1 ingress prio 1 proto all flower \
+ action mirred egress redirect dev $veth2
+
+ echo "Add tc filter egress->ingress forwarding $peer1 -> $veth1, bypassing the veth pipe"
+ tc qdisc add dev $peer1 clsact
+ tc filter add dev $peer1 egress prio 20 proto ip flower \
+ action mirred ingress redirect dev $veth1
+}
+
+function test_run() {
+ echo "Run tcp traffic"
+ ./udpgso_bench_rx -t &
+ sleep 1
+ ip netns exec $ns timeout -k 2 10 ./udpgso_bench_tx -t -l 2 -4 -D $ip_peer1 || fail "traffic failed"
+ echo "Test passed"
+}
+
+config
+test_run
+trap - EXIT
+cleanup
diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh
new file mode 100755
index 000000000000..74ff9fb2a6f0
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_mdb.sh
@@ -0,0 +1,2511 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# This test is for checking VXLAN MDB functionality. The topology consists of
+# two sets of namespaces: One for the testing of IPv4 underlay and another for
+# IPv6. In both cases, both IPv4 and IPv6 overlay traffic are tested.
+#
+# Data path functionality is tested by sending traffic from one of the upper
+# namespaces and checking using ingress tc filters that the expected traffic
+# was received by one of the lower namespaces.
+#
+# +------------------------------------+ +------------------------------------+
+# | ns1_v4 | | ns1_v6 |
+# | | | |
+# | br0.10 br0.4000 br0.20 | | br0.10 br0.4000 br0.20 |
+# | + + + | | + + + |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | +---------+---------+ | | +---------+---------+ |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | vx0 | | vx0 |
+# | | | |
+# | | | |
+# | veth0 | | veth0 |
+# | + | | + |
+# +-----------------|------------------+ +-----------------|------------------+
+# | |
+# +-----------------|------------------+ +-----------------|------------------+
+# | + | | + |
+# | veth0 | | veth0 |
+# | | | |
+# | | | |
+# | vx0 | | vx0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | + | | + |
+# | br0 | | br0 |
+# | + | | + |
+# | | | | | |
+# | | | | | |
+# | +---------+---------+ | | +---------+---------+ |
+# | | | | | | | | | |
+# | | | | | | | | | |
+# | + + + | | + + + |
+# | br0.10 br0.4000 br0.10 | | br0.10 br0.4000 br0.20 |
+# | | | |
+# | ns2_v4 | | ns2_v6 |
+# +------------------------------------+ +------------------------------------+
+
+source lib.sh
+ret=0
+
+CONTROL_PATH_TESTS="
+ basic_star_g_ipv4_ipv4
+ basic_star_g_ipv6_ipv4
+ basic_star_g_ipv4_ipv6
+ basic_star_g_ipv6_ipv6
+ basic_sg_ipv4_ipv4
+ basic_sg_ipv6_ipv4
+ basic_sg_ipv4_ipv6
+ basic_sg_ipv6_ipv6
+ star_g_ipv4_ipv4
+ star_g_ipv6_ipv4
+ star_g_ipv4_ipv6
+ star_g_ipv6_ipv6
+ sg_ipv4_ipv4
+ sg_ipv6_ipv4
+ sg_ipv4_ipv6
+ sg_ipv6_ipv6
+ dump_ipv4_ipv4
+ dump_ipv6_ipv4
+ dump_ipv4_ipv6
+ dump_ipv6_ipv6
+ flush
+"
+
+DATA_PATH_TESTS="
+ encap_params_ipv4_ipv4
+ encap_params_ipv6_ipv4
+ encap_params_ipv4_ipv6
+ encap_params_ipv6_ipv6
+ starg_exclude_ir_ipv4_ipv4
+ starg_exclude_ir_ipv6_ipv4
+ starg_exclude_ir_ipv4_ipv6
+ starg_exclude_ir_ipv6_ipv6
+ starg_include_ir_ipv4_ipv4
+ starg_include_ir_ipv6_ipv4
+ starg_include_ir_ipv4_ipv6
+ starg_include_ir_ipv6_ipv6
+ starg_exclude_p2mp_ipv4_ipv4
+ starg_exclude_p2mp_ipv6_ipv4
+ starg_exclude_p2mp_ipv4_ipv6
+ starg_exclude_p2mp_ipv6_ipv6
+ starg_include_p2mp_ipv4_ipv4
+ starg_include_p2mp_ipv6_ipv4
+ starg_include_p2mp_ipv4_ipv6
+ starg_include_p2mp_ipv6_ipv6
+ egress_vni_translation_ipv4_ipv4
+ egress_vni_translation_ipv6_ipv4
+ egress_vni_translation_ipv4_ipv6
+ egress_vni_translation_ipv6_ipv6
+ all_zeros_mdb_ipv4
+ all_zeros_mdb_ipv6
+ mdb_fdb_ipv4_ipv4
+ mdb_fdb_ipv6_ipv4
+ mdb_fdb_ipv4_ipv6
+ mdb_fdb_ipv6_ipv6
+ mdb_torture_ipv4_ipv4
+ mdb_torture_ipv6_ipv4
+ mdb_torture_ipv4_ipv6
+ mdb_torture_ipv6_ipv6
+"
+
+# All tests in this script. Can be overridden with -t option.
+TESTS="
+ $CONTROL_PATH_TESTS
+ $DATA_PATH_TESTS
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup_common_ns()
+{
+ local ns=$1; shift
+ local local_addr=$1; shift
+
+ ip netns exec $ns sysctl -qw net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -qw net.ipv4.fib_multipath_use_neigh=1
+ ip netns exec $ns sysctl -qw net.ipv4.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.forwarding=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.forwarding=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.accept_dad=0
+
+ ip -n $ns link set dev lo up
+ ip -n $ns address add $local_addr dev lo
+
+ ip -n $ns link set dev veth0 up
+
+ ip -n $ns link add name br0 up type bridge vlan_filtering 1 \
+ vlan_default_pvid 0 mcast_snooping 0
+
+ ip -n $ns link add link br0 name br0.10 up type vlan id 10
+ bridge -n $ns vlan add vid 10 dev br0 self
+
+ ip -n $ns link add link br0 name br0.20 up type vlan id 20
+ bridge -n $ns vlan add vid 20 dev br0 self
+
+ ip -n $ns link add link br0 name br0.4000 up type vlan id 4000
+ bridge -n $ns vlan add vid 4000 dev br0 self
+
+ ip -n $ns link add name vx0 up master br0 type vxlan \
+ local $local_addr dstport 4789 external vnifilter
+ bridge -n $ns link set dev vx0 vlan_tunnel on
+
+ bridge -n $ns vlan add vid 10 dev vx0
+ bridge -n $ns vlan add vid 10 dev vx0 tunnel_info id 10010
+ bridge -n $ns vni add vni 10010 dev vx0
+
+ bridge -n $ns vlan add vid 20 dev vx0
+ bridge -n $ns vlan add vid 20 dev vx0 tunnel_info id 10020
+ bridge -n $ns vni add vni 10020 dev vx0
+
+ bridge -n $ns vlan add vid 4000 dev vx0 pvid
+ bridge -n $ns vlan add vid 4000 dev vx0 tunnel_info id 14000
+ bridge -n $ns vni add vni 14000 dev vx0
+}
+
+setup_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local local_addr1=$1; shift
+ local local_addr2=$1; shift
+
+ ip link add name veth0 type veth peer name veth1
+ ip link set dev veth0 netns $ns1 name veth0
+ ip link set dev veth1 netns $ns2 name veth0
+
+ setup_common_ns $ns1 $local_addr1
+ setup_common_ns $ns2 $local_addr2
+}
+
+setup_v4()
+{
+ setup_ns ns1_v4 ns2_v4
+ setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2
+
+ ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0
+ ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0
+
+ ip -n $ns1_v4 route add default via 192.0.2.18
+ ip -n $ns2_v4 route add default via 192.0.2.17
+}
+
+cleanup_v4()
+{
+ cleanup_ns $ns2_v4 $ns1_v4
+}
+
+setup_v6()
+{
+ setup_ns ns1_v6 ns2_v6
+ setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2
+
+ ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad
+ ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad
+
+ ip -n $ns1_v6 route add default via 2001:db8:2::2
+ ip -n $ns2_v6 route add default via 2001:db8:2::1
+}
+
+cleanup_v6()
+{
+ cleanup_ns $ns2_v6 $ns1_v6
+}
+
+setup()
+{
+ set -e
+
+ setup_v4
+ setup_v6
+
+ sleep 5
+
+ set +e
+}
+
+cleanup()
+{
+ cleanup_v6 &> /dev/null
+ cleanup_v4 &> /dev/null
+}
+
+################################################################################
+# Tests - Control path
+
+basic_common()
+{
+ local ns1=$1; shift
+ local grp_key=$1; shift
+ local vtep_ip=$1; shift
+
+ # Test basic control path operations common to all MDB entry types.
+
+ # Basic add, replace and delete behavior.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry addition"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+ log_test $? 0 "MDB entry presence after addition"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+ log_test $? 0 "MDB entry presence after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+ log_test $? 0 "MDB entry deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010"
+ log_test $? 254 "MDB entry presence after deletion"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Non-existent MDB entry deletion"
+
+ # Default protocol and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto static\""
+ log_test $? 0 "MDB entry default protocol"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent proto 123 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"proto 123\""
+ log_test $? 0 "MDB entry protocol replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default destination port and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" dst_port \""
+ log_test $? 1 "MDB entry default destination port"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip dst_port 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"dst_port 1234\""
+ log_test $? 0 "MDB entry destination port replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default destination VNI and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" vni \""
+ log_test $? 1 "MDB entry default destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"vni 1234\""
+ log_test $? 0 "MDB entry destination VNI replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Default outgoing interface and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \" via \""
+ log_test $? 1 "MDB entry default outgoing interface"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010 via veth0"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 $grp_key src_vni 10010 | grep \"via veth0\""
+ log_test $? 0 "MDB entry outgoing interface replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+
+ # Common error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port veth0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with mismatch between device and port"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key temp dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with temp state"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent vid 10 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with VLAN"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp 01:02:03:04:05:06 permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry MAC address"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent"
+ log_test $? 255 "MDB entry without extended parameters"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent proto 3 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "MDB entry with an invalid protocol"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip vni $((2 ** 24)) src_vni 10010"
+ log_test $? 255 "MDB entry with an invalid destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni $((2 ** 24))"
+ log_test $? 255 "MDB entry with an invalid source VNI"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent src_vni 10010"
+ log_test $? 255 "MDB entry without a remote destination IP"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 $grp_key permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Duplicate MDB entries"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 $grp_key dst $vtep_ip src_vni 10010"
+}
+
+basic_star_g_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp_key="grp 239.1.1.1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp_key="grp ff0e::1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp_key="grp 239.1.1.1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_star_g_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp_key="grp ff0e::1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (*, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp_key="grp 239.1.1.1 src 192.0.2.129"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp_key="grp ff0e::1 src 2001:db8:100::1"
+ local vtep_ip=198.51.100.100
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp_key="grp 239.1.1.1 src 192.0.2.129"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+basic_sg_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp_key="grp ff0e::1 src 2001:db8:100::1"
+ local vtep_ip=2001:db8:1000::1
+
+ echo
+ echo "Control path: Basic (S, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------------"
+
+ basic_common $ns1 "$grp_key" $vtep_ip
+}
+
+star_g_common()
+{
+ local ns1=$1; shift
+ local grp=$1; shift
+ local src1=$1; shift
+ local src2=$1; shift
+ local src3=$1; shift
+ local vtep_ip=$1; shift
+ local all_zeros_grp=$1; shift
+
+ # Test control path operations specific to (*, G) entries.
+
+ # Basic add, replace and delete behavior.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry addition with source list"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry presence after addition"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry presence after addition"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry replacement with source list"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry presence after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry presence after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+ log_test $? 0 "(*, G) MDB entry deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010"
+ log_test $? 254 "(*, G) MDB entry presence after deletion"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 254 "(S, G) MDB entry presence after deletion"
+
+ # Default filter mode and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
+ log_test $? 0 "(*, G) MDB entry default filter mode"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep include"
+ log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"include\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"include\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
+ log_test $? 1 "\"blocked\" flag after replacing filter mode to \"include\""
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep exclude"
+ log_test $? 0 "(*, G) MDB entry after replacing filter mode to \"exclude\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grep grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry after replacing filter mode to \"exclude\""
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep blocked"
+ log_test $? 0 "\"blocked\" flag after replacing filter mode to \"exclude\""
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default source list and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep source_list"
+ log_test $? 1 "(*, G) MDB entry default source list"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src2,$src3 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry of 1st source after replacing source list"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry of 2nd source after replacing source list"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry of 3rd source after replacing source list"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1,$src3 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry of 1st source after removing source"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src2 src_vni 10010"
+ log_test $? 254 "(S, G) MDB entry of 2nd source after removing source"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src3 src_vni 10010"
+ log_test $? 0 "(S, G) MDB entry of 3rd source after removing source"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default protocol and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto static\""
+ log_test $? 0 "(*, G) MDB entry default protocol"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto static\""
+ log_test $? 0 "(S, G) MDB entry default protocol"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 proto bgp dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \"proto bgp\""
+ log_test $? 0 "(*, G) MDB entry protocol after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \"proto bgp\""
+ log_test $? 0 "(S, G) MDB entry protocol after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default destination port and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port \""
+ log_test $? 1 "(*, G) MDB entry default destination port"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port \""
+ log_test $? 1 "(S, G) MDB entry default destination port"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip dst_port 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" dst_port 1234 \""
+ log_test $? 0 "(*, G) MDB entry destination port after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" dst_port 1234 \""
+ log_test $? 0 "(S, G) MDB entry destination port after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default destination VNI and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni \""
+ log_test $? 1 "(*, G) MDB entry default destination VNI"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni \""
+ log_test $? 1 "(S, G) MDB entry default destination VNI"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip vni 1234 src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" vni 1234 \""
+ log_test $? 0 "(*, G) MDB entry destination VNI after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" vni 1234 \""
+ log_test $? 0 "(S, G) MDB entry destination VNI after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Default outgoing interface and replacement.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via \""
+ log_test $? 1 "(*, G) MDB entry default outgoing interface"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via \""
+ log_test $? 1 "(S, G) MDB entry default outgoing interface"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $src1 dst $vtep_ip src_vni 10010 via veth0"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src_vni 10010 | grep \" via veth0 \""
+ log_test $? 0 "(*, G) MDB entry outgoing interface after replacement"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src1 src_vni 10010 | grep \" via veth0 \""
+ log_test $? 0 "(S, G) MDB entry outgoing interface after replacement"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep_ip src_vni 10010"
+
+ # Error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent filter_mode exclude dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with filter mode"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode include dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(*, G) INCLUDE with an empty source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $grp dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Invalid source in source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp permanent source_list $src1 dst $vtep_ip src_vni 10010"
+ log_test $? 255 "Source list without filter mode"
+}
+
+star_g_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp=239.1.1.1
+ local src1=192.0.2.129
+ local src2=192.0.2.130
+ local src3=192.0.2.131
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (*, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp=ff0e::1
+ local src1=2001:db8:100::1
+ local src2=2001:db8:100::2
+ local src3=2001:db8:100::3
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (*, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp=239.1.1.1
+ local src1=192.0.2.129
+ local src2=192.0.2.130
+ local src3=192.0.2.131
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (*, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+star_g_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp=ff0e::1
+ local src1=2001:db8:100::1
+ local src2=2001:db8:100::2
+ local src3=2001:db8:100::3
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (*, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ star_g_common $ns1 $grp $src1 $src2 $src3 $vtep_ip $all_zeros_grp
+}
+
+sg_common()
+{
+ local ns1=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local vtep_ip=$1; shift
+ local all_zeros_grp=$1; shift
+
+ # Test control path operations specific to (S, G) entries.
+
+ # Default filter mode.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 -d -s mdb get dev vx0 grp $grp src $src src_vni 10010 | grep include"
+ log_test $? 0 "(S, G) MDB entry default filter mode"
+
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp src $src permanent dst $vtep_ip src_vni 10010"
+
+ # Error cases.
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent filter_mode include dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with filter mode"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $src permanent source_list $src dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp src $grp permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "(S, G) with an invalid source list"
+
+ run_cmd "bridge -n $ns1 mdb add dev vx0 port vx0 grp $all_zeros_grp src $src permanent dst $vtep_ip src_vni 10010"
+ log_test $? 255 "All-zeros group with source"
+}
+
+sg_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp=239.1.1.1
+ local src=192.0.2.129
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (S, G) operations - IPv4 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+ local vtep_ip=198.51.100.100
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (S, G) operations - IPv6 overlay / IPv4 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp=239.1.1.1
+ local src=192.0.2.129
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=0.0.0.0
+
+ echo
+ echo "Control path: (S, G) operations - IPv4 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+sg_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+ local vtep_ip=2001:db8:1000::1
+ local all_zeros_grp=::
+
+ echo
+ echo "Control path: (S, G) operations - IPv6 overlay / IPv6 underlay"
+ echo "--------------------------------------------------------------"
+
+ sg_common $ns1 $grp $src $vtep_ip $all_zeros_grp
+}
+
+ipv4_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "239.1.1.$i"
+ done
+}
+
+ipv6_grps_get()
+{
+ local max_grps=$1; shift
+ local i
+
+ for i in $(seq 0 $((max_grps - 1))); do
+ echo "ff0e::$(printf %x $i)"
+ done
+}
+
+dump_common()
+{
+ local ns1=$1; shift
+ local local_addr=$1; shift
+ local remote_prefix=$1; shift
+ local fn=$1; shift
+ local max_vxlan_devs=2
+ local max_remotes=64
+ local max_grps=256
+ local num_entries
+ local batch_file
+ local grp
+ local i j
+
+ # The kernel maintains various markers for the MDB dump. Add a test for
+ # large scale MDB dump to make sure that all the configured entries are
+ # dumped and that the markers are used correctly.
+
+ # Create net devices.
+ for i in $(seq 1 $max_vxlan_devs); do
+ ip -n $ns1 link add name vx-test${i} up type vxlan \
+ local $local_addr dstport 4789 external vnifilter
+ done
+
+ # Create batch file with MDB entries.
+ batch_file=$(mktemp)
+ for i in $(seq 1 $max_vxlan_devs); do
+ for j in $(seq 1 $max_remotes); do
+ for grp in $($fn $max_grps); do
+ echo "mdb add dev vx-test${i} port vx-test${i} grp $grp permanent dst ${remote_prefix}${j}" >> $batch_file
+ done
+ done
+ done
+
+ # Program the batch file and check for expected number of entries.
+ bridge -n $ns1 -b $batch_file
+ for i in $(seq 1 $max_vxlan_devs); do
+ num_entries=$(bridge -n $ns1 mdb show dev vx-test${i} | grep "permanent" | wc -l)
+ [[ $num_entries -eq $((max_grps * max_remotes)) ]]
+ log_test $? 0 "Large scale dump - VXLAN device #$i"
+ done
+
+ rm -rf $batch_file
+}
+
+dump_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local local_addr=192.0.2.1
+ local remote_prefix=198.51.100.
+ local fn=ipv4_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv4 overlay / IPv4 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local local_addr=192.0.2.1
+ local remote_prefix=198.51.100.
+ local fn=ipv6_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv6 overlay / IPv4 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local local_addr=2001:db8:1::1
+ local remote_prefix=2001:db8:1000::
+ local fn=ipv4_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv4 overlay / IPv6 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+dump_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local local_addr=2001:db8:1::1
+ local remote_prefix=2001:db8:1000::
+ local fn=ipv6_grps_get
+
+ echo
+ echo "Control path: Large scale MDB dump - IPv6 overlay / IPv6 underlay"
+ echo "-----------------------------------------------------------------"
+
+ dump_common $ns1 $local_addr $remote_prefix $fn
+}
+
+flush()
+{
+ local num_entries
+
+ echo
+ echo "Control path: Flush"
+ echo "-------------------"
+
+ # Add entries with different attributes and check that they are all
+ # flushed when the flush command is given with no parameters.
+
+ # Different source VNI.
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011"
+
+ # Different routing protocol.
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010"
+
+ # Different destination IP.
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010"
+
+ # Different destination port.
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010"
+
+ # Different VNI.
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+ num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l)
+ [[ $num_entries -eq 0 ]]
+ log_test $? 0 "Flush all"
+
+ # Check that entries are flushed when port is specified as the VXLAN
+ # device and that an error is returned when port is specified as a
+ # different net device.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+ log_test $? 254 "Flush by port - matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0"
+ log_test $? 255 "Flush by port - non-matching"
+
+ # Check that when flushing by source VNI only entries programmed with
+ # the specified source VNI are flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+ log_test $? 254 "Flush by source VNI - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011"
+ log_test $? 0 "Flush by source VNI - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # Check that all entries are flushed when "permanent" is specified and
+ # that an error is returned when "nopermanent" is specified.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010"
+ log_test $? 254 "Flush by \"permanent\" state"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent"
+ log_test $? 255 "Flush by \"nopermanent\" state"
+
+ # Check that when flushing by routing protocol only entries programmed
+ # with the specified routing protocol are flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\""
+ log_test $? 1 "Flush by routing protocol - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\""
+ log_test $? 0 "Flush by routing protocol - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # Check that when flushing by destination IP only entries programmed
+ # with the specified destination IP are flushed and the rest are not.
+
+ # IPv4.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+ log_test $? 1 "Flush by IPv4 destination IP - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+ log_test $? 0 "Flush by IPv4 destination IP - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # IPv6.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2"
+ log_test $? 1 "Flush by IPv6 destination IP - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1"
+ log_test $? 0 "Flush by IPv6 destination IP - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # Check that when flushing by UDP destination port only entries
+ # programmed with the specified port are flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\""
+ log_test $? 1 "Flush by UDP destination port - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\""
+ log_test $? 0 "Flush by UDP destination port - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # When not specifying a UDP destination port for an entry, traffic is
+ # encapsulated with the device's UDP destination port. Check that when
+ # flushing by the device's UDP destination port only entries programmed
+ # with this port are flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+ log_test $? 1 "Flush by device's UDP destination port - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+ log_test $? 0 "Flush by device's UDP destination port - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # Check that when flushing by destination VNI only entries programmed
+ # with the specified destination VNI are flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\""
+ log_test $? 1 "Flush by destination VNI - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\""
+ log_test $? 0 "Flush by destination VNI - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # When not specifying a destination VNI for an entry, traffic is
+ # encapsulated with the source VNI. Check that when flushing by a
+ # destination VNI that is equal to the source VNI only such entries are
+ # flushed and the rest are not.
+
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010"
+ run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010"
+
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1"
+ log_test $? 1 "Flush by destination VNI equal to source VNI - matching"
+ run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2"
+ log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching"
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0"
+
+ # Test that an error is returned when trying to flush using VLAN ID.
+
+ run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10"
+ log_test $? 255 "Flush by VLAN ID"
+}
+
+################################################################################
+# Tests - Data path
+
+encap_params_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local enc_ethtype=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # Test that packets forwarded by the VXLAN MDB are encapsulated with
+ # the correct parameters. Transmit packets from the first namespace and
+ # check that they hit the corresponding filters on the ingress of the
+ # second namespace.
+
+ run_cmd "tc -n $ns2 qdisc replace dev veth0 clsact"
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ # Check destination IP.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Destination IP - no match"
+
+ run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+ # Check destination port.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Default destination port - no match"
+
+ run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev veth0 ingress" 101 1
+ log_test $? 0 "Non-default destination port - no match"
+
+ run_cmd "tc -n $ns2 filter del dev veth0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+
+ # Check default VNI.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Default destination VNI - no match"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10020 src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - match"
+
+ run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Non-default destination VNI - no match"
+
+ run_cmd "tc -n $ns2 filter del dev vx0 ingress pref 1 handle 101 flower"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10020"
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+}
+
+encap_params_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv4 overlay / IPv4 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local enc_ethtype="ip"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv6 overlay / IPv4 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn -6"
+}
+
+encap_params_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv4 overlay / IPv6 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn"
+}
+
+encap_params_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local enc_ethtype="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Encapsulation parameters - IPv6 overlay / IPv6 underlay"
+ echo "------------------------------------------------------------------"
+
+ encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \
+ $grp $src "mausezahn -6"
+}
+
+starg_exclude_ir_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) EXCLUDE MDB entry with one source and two remote
+ # VTEPs. Make sure that the source in the source list is not forwarded
+ # and that a source not in the list is forwarded. Remove one of the
+ # VTEPs from the entry and make sure that packets are only forwarded to
+ # the remaining VTEP.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source - second VTEP"
+
+ # Remove second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_exclude_ir_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_ir_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv4 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_ir_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - IR - IPv6 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) INCLUDE MDB entry with one source and two remote
+ # VTEPs. Make sure that the source in the source list is forwarded and
+ # that a source not in the list is not forwarded. Remove one of the
+ # VTEPs from the entry and make sure that packets are only forwarded to
+ # the remaining VTEP.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Block excluded source - second VTEP"
+
+ # Check that valid source is forwarded to both VTEPs.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source - second VTEP"
+
+ # Remove second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010"
+
+ # Check that invalid source is not forwarded to any VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Block excluded source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Block excluded source after removal - second VTEP"
+
+ # Check that valid source is forwarded to the remaining VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Forward valid source after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Forward valid source after removal - second VTEP"
+}
+
+starg_include_ir_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv4 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_ir_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv4 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_ir_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - IR - IPv6 overlay / IPv6 underlay"
+ echo "-------------------------------------------------------------"
+
+ starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) EXCLUDE MDB entry with one source and one multicast
+ # group to which packets are sent. Make sure that the source in the
+ # source list is not forwarded and that a source not in the list is
+ # forwarded.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+ # Remove the VTEP from the multicast group.
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_exclude_p2mp_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_exclude_p2mp_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_exclude_p2mp_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local grp=$1; shift
+ local valid_src=$1; shift
+ local invalid_src=$1; shift
+ local mz=$1; shift
+
+ # Install a (*, G) INCLUDE MDB entry with one source and one multicast
+ # group to which packets are sent. Make sure that the source in the
+ # source list is forwarded and that a source not in the list is not
+ # forwarded.
+
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $mcast_grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0"
+
+ # Check that invalid source is not forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "Block excluded source"
+
+ # Check that valid source is forwarded.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Forward valid source"
+
+ # Remove the VTEP from the multicast group.
+ run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0"
+
+ # Check that valid source is not received anymore.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Receive of valid source after removal from group"
+}
+
+starg_include_p2mp_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+starg_include_p2mp_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=239.1.1.1
+ local valid_src=192.0.2.129
+ local invalid_src=192.0.2.145
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn"
+}
+
+starg_include_p2mp_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local grp=ff0e::1
+ local valid_src=2001:db8:100::1
+ local invalid_src=2001:db8:200::1
+
+ echo
+ echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay"
+ echo "---------------------------------------------------------------"
+
+ starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \
+ $valid_src $invalid_src "mausezahn -6"
+}
+
+egress_vni_translation_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local mcast_grp=$1; shift
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # When P2MP tunnels are used with optimized inter-subnet multicast
+ # (OISM) [1], the ingress VTEP does not perform VNI translation and
+ # uses the VNI of the source broadcast domain (BD). If the egress VTEP
+ # is a member in the source BD, then no VNI translation is needed.
+ # Otherwise, the egress VTEP needs to translate the VNI to the
+ # supplementary broadcast domain (SBD) VNI, which is usually the L3VNI.
+ #
+ # In this test, remove the VTEP in the second namespace from VLAN 10
+ # (VNI 10010) and make sure that a packet sent from this VLAN on the
+ # first VTEP is received by the SVI corresponding to the L3VNI (14000 /
+ # VLAN 4000) on the second VTEP.
+ #
+ # The second VTEP will be able to decapsulate the packet with VNI 10010
+ # because this VNI is configured on its shared VXLAN device. Later,
+ # when ingressing the bridge, the VNI to VLAN lookup will fail because
+ # the VTEP is not a member in VLAN 10, which will cause the packet to
+ # be tagged with VLAN 4000 since it is configured as PVID.
+ #
+ # [1] https://datatracker.ietf.org/doc/html/draft-ietf-bess-evpn-irb-mcast
+
+ run_cmd "tc -n $ns2 qdisc replace dev br0.4000 clsact"
+ run_cmd "ip -n $ns2 address replace $mcast_grp/$plen dev veth0 autojoin"
+ run_cmd "tc -n $ns2 filter replace dev br0.4000 ingress pref 1 handle 101 proto $proto flower src_ip $src dst_ip $grp action pass"
+
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp src $src permanent dst $mcast_grp src_vni 10010 via veth0"
+
+ # Remove the second VTEP from VLAN 10.
+ run_cmd "bridge -n $ns2 vlan del vid 10 dev vx0"
+
+ # Make sure that packets sent from the first VTEP over VLAN 10 are
+ # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on
+ # the second VTEP, since it is configured as PVID.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - PVID configured"
+
+ # Remove PVID flag from VLAN 4000 on the second VTEP and make sure
+ # packets are no longer received by the SVI interface.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1
+ log_test $? 0 "Egress VNI translation - no PVID configured"
+
+ # Reconfigure the PVID and make sure packets are received again.
+ run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2
+ log_test $? 0 "Egress VNI translation - PVID reconfigured"
+}
+
+egress_vni_translation_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Egress VNI translation - IPv4 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local mcast_grp=238.1.1.1
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Egress VNI translation - IPv6 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn -6"
+}
+
+egress_vni_translation_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: Egress VNI translation - IPv4 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn"
+}
+
+egress_vni_translation_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local mcast_grp=ff0e::2
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: Egress VNI translation - IPv6 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------------"
+
+ egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \
+ $src "mausezahn -6"
+}
+
+all_zeros_mdb_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local vtep3_ip=$1; shift
+ local vtep4_ip=$1; shift
+ local plen=$1; shift
+ local ipv4_grp=239.1.1.1
+ local ipv4_unreg_grp=239.2.2.2
+ local ipv4_ll_grp=224.0.0.100
+ local ipv4_src=192.0.2.129
+ local ipv6_grp=ff0e::1
+ local ipv6_unreg_grp=ff0e::2
+ local ipv6_ll_grp=ff02::1
+ local ipv6_src=2001:db8:100::1
+
+ # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic
+ # and make sure they only forward unregistered IP multicast traffic
+ # which is not link-local. Also make sure that each entry only forwards
+ # traffic from the matching address family.
+
+ # Associate two different VTEPs with one all-zeros MDB entry: Two with
+ # the IPv4 entry (0.0.0.0) and another two with the IPv6 one (::).
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp 0.0.0.0 permanent dst $vtep2_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep3_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp :: permanent dst $vtep4_ip src_vni 10010"
+
+ # Associate one VTEP from each set with a regular MDB entry: One with
+ # an IPv4 entry and another with an IPv6 one.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv4_grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $ipv6_grp permanent dst $vtep3_ip src_vni 10010"
+
+ # Add filters to match on decapsulated traffic in the second namespace.
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 103 proto all flower enc_dst_ip $vtep3_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 104 proto all flower enc_dst_ip $vtep4_ip action pass"
+
+ # Configure the VTEP addresses in the second namespace to enable
+ # decapsulation.
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep3_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep4_ip/$plen dev lo"
+
+ # Send registered IPv4 multicast and make sure it only arrives to the
+ # first VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Registered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "Registered IPv4 multicast - second VTEP"
+
+ # Send unregistered IPv4 multicast that is not link-local and make sure
+ # it arrives to the first and second VTEPs.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Unregistered IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Unregistered IPv4 multicast - second VTEP"
+
+ # Send IPv4 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Link-local IPv4 multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Link-local IPv4 multicast - second VTEP"
+
+ # Send registered IPv4 multicast using a unicast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b 00:11:22:33:44:55 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Registered IPv4 multicast with a unicast MAC - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Registered IPv4 multicast with a unicast MAC - second VTEP"
+
+ # Send registered IPv4 multicast using a broadcast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b bcast -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 2
+ log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Registered IPv4 multicast with a broadcast MAC - second VTEP"
+
+ # Make sure IPv4 traffic did not reach the VTEPs associated with
+ # IPv6 entries.
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 0
+ log_test $? 0 "IPv4 traffic - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+ log_test $? 0 "IPv4 traffic - fourth VTEP"
+
+ # Reset IPv4 filters before testing IPv6 traffic.
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto all flower enc_dst_ip $vtep2_ip action pass"
+
+ # Send registered IPv6 multicast and make sure it only arrives to the
+ # third VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 1
+ log_test $? 0 "Registered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 0
+ log_test $? 0 "Registered IPv6 multicast - fourth VTEP"
+
+ # Send unregistered IPv6 multicast that is not link-local and make sure
+ # it arrives to the third and fourth VTEPs.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Unregistered IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Unregistered IPv6 multicast - fourth VTEP"
+
+ # Send IPv6 link-local multicast traffic and make sure it does not
+ # arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Link-local IPv6 multicast - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Link-local IPv6 multicast - fourth VTEP"
+
+ # Send registered IPv6 multicast using a unicast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b 00:11:22:33:44:55 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Registered IPv6 multicast with a unicast MAC - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Registered IPv6 multicast with a unicast MAC - fourth VTEP"
+
+ # Send registered IPv6 multicast using a broadcast MAC address and make
+ # sure it does not arrive to any VTEP.
+ run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b bcast -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 103 2
+ log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - third VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 104 1
+ log_test $? 0 "Registered IPv6 multicast with a broadcast MAC - fourth VTEP"
+
+ # Make sure IPv6 traffic did not reach the VTEPs associated with
+ # IPv4 entries.
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 0
+ log_test $? 0 "IPv6 traffic - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "IPv6 traffic - second VTEP"
+}
+
+all_zeros_mdb_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.101
+ local vtep2_ip=198.51.100.102
+ local vtep3_ip=198.51.100.103
+ local vtep4_ip=198.51.100.104
+ local plen=32
+
+ echo
+ echo "Data path: All-zeros MDB entry - IPv4 underlay"
+ echo "----------------------------------------------"
+
+ all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+ $vtep4_ip $plen
+}
+
+all_zeros_mdb_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local vtep3_ip=2001:db8:3000::1
+ local vtep4_ip=2001:db8:4000::1
+ local plen=128
+
+ echo
+ echo "Data path: All-zeros MDB entry - IPv6 underlay"
+ echo "----------------------------------------------"
+
+ all_zeros_mdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $vtep3_ip \
+ $vtep4_ip $plen
+}
+
+mdb_fdb_common()
+{
+ local ns1=$1; shift
+ local ns2=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local plen=$1; shift
+ local proto=$1; shift
+ local grp=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+
+ # Install an MDB entry and an FDB entry and make sure that the FDB
+ # entry only forwards traffic that was not forwarded by the MDB.
+
+ # Associate the MDB entry with one VTEP and the FDB entry with another
+ # VTEP.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 fdb add 00:00:00:00:00:00 dev vx0 self static dst $vtep2_ip src_vni 10010"
+
+ # Add filters to match on decapsulated traffic in the second namespace.
+ run_cmd "tc -n $ns2 qdisc replace dev vx0 clsact"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep1_ip action pass"
+ run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 102 proto $proto flower ip_proto udp dst_port 54321 enc_dst_ip $vtep2_ip action pass"
+
+ # Configure the VTEP addresses in the second namespace to enable
+ # decapsulation.
+ run_cmd "ip -n $ns2 address replace $vtep1_ip/$plen dev lo"
+ run_cmd "ip -n $ns2 address replace $vtep2_ip/$plen dev lo"
+
+ # Send IP multicast traffic and make sure it is forwarded by the MDB
+ # and only arrives to the first VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 0
+ log_test $? 0 "IP multicast - second VTEP"
+
+ # Send broadcast traffic and make sure it is forwarded by the FDB and
+ # only arrives to the second VTEP.
+ run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b bcast -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "Broadcast - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 1
+ log_test $? 0 "Broadcast - second VTEP"
+
+ # Remove the MDB entry and make sure that IP multicast is now forwarded
+ # by the FDB to the second VTEP.
+ run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010"
+ run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q"
+ tc_check_packets "$ns2" "dev vx0 ingress" 101 1
+ log_test $? 0 "IP multicast after removal - first VTEP"
+ tc_check_packets "$ns2" "dev vx0 ingress" 102 2
+ log_test $? 0 "IP multicast after removal - second VTEP"
+}
+
+mdb_fdb_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn"
+}
+
+mdb_fdb_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local ns2=$ns2_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local plen=32
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn -6"
+}
+
+mdb_fdb_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local proto="ipv4"
+ local grp=239.1.1.1
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn"
+}
+
+mdb_fdb_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local ns2=$ns2_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local plen=128
+ local proto="ipv6"
+ local grp=ff0e::1
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay"
+ echo "------------------------------------------------------"
+
+ mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \
+ "mausezahn -6"
+}
+
+mdb_grp1_loop()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local grp1=$1; shift
+
+ while true; do
+ bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp1 dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010
+ done >/dev/null 2>&1
+}
+
+mdb_grp2_loop()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local grp2=$1; shift
+
+ while true; do
+ bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp2 dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb add dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010
+ bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010
+ done >/dev/null 2>&1
+}
+
+mdb_torture_common()
+{
+ local ns1=$1; shift
+ local vtep1_ip=$1; shift
+ local vtep2_ip=$1; shift
+ local grp1=$1; shift
+ local grp2=$1; shift
+ local src=$1; shift
+ local mz=$1; shift
+ local pid1
+ local pid2
+ local pid3
+ local pid4
+
+ # Continuously send two streams that are forwarded by two different MDB
+ # entries. The first entry will be added and deleted in a loop. This
+ # allows us to test that the data path does not use freed MDB entry
+ # memory. The second entry will have two remotes, one that is added and
+ # deleted in a loop and another that is replaced in a loop. This allows
+ # us to test that the data path does not use freed remote entry memory.
+ # The test is considered successful if nothing crashed.
+
+ # Create the MDB entries that will be continuously deleted / replaced.
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp1 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep1_ip src_vni 10010"
+ run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp2 permanent dst $vtep2_ip src_vni 10010"
+
+ mdb_grp1_loop $ns1 $vtep1_ip $grp1 &
+ pid1=$!
+ mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 &
+ pid2=$!
+ ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid3=$!
+ ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q &
+ pid4=$!
+
+ sleep 30
+ kill -9 $pid1 $pid2 $pid3 $pid4
+ wait $pid1 $pid2 $pid3 $pid4 2>/dev/null
+
+ log_test 0 0 "Torture test"
+}
+
+mdb_torture_ipv4_ipv4()
+{
+ local ns1=$ns1_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=239.1.1.1
+ local grp2=239.2.2.2
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn"
+}
+
+mdb_torture_ipv6_ipv4()
+{
+ local ns1=$ns1_v4
+ local vtep1_ip=198.51.100.100
+ local vtep2_ip=198.51.100.200
+ local grp1=ff0e::1
+ local grp2=ff0e::2
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn -6"
+}
+
+mdb_torture_ipv4_ipv6()
+{
+ local ns1=$ns1_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=239.1.1.1
+ local grp2=239.2.2.2
+ local src=192.0.2.129
+
+ echo
+ echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn"
+}
+
+mdb_torture_ipv6_ipv6()
+{
+ local ns1=$ns1_v6
+ local vtep1_ip=2001:db8:1000::1
+ local vtep2_ip=2001:db8:2000::1
+ local grp1=ff0e::1
+ local grp2=ff0e::2
+ local src=2001:db8:100::1
+
+ echo
+ echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay"
+ echo "----------------------------------------------------------"
+
+ mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \
+ "mausezahn -6"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -c Control path tests only
+ -d Data path tests only
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:cdpPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ c) TESTS=${CONTROL_PATH_TESTS};;
+ d) TESTS=${DATA_PATH_TESTS};;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+bridge mdb help 2>&1 | grep -q "flush"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support"
+ exit $ksft_skip
+fi
+
+# Start clean.
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
new file mode 100755
index 000000000000..b8805983b728
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh
@@ -0,0 +1,238 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the [no]localbypass VXLAN device option. The test
+# configures two VXLAN devices in the same network namespace and a tc filter on
+# the loopback device that drops encapsulated packets. The test sends packets
+# from the first VXLAN device and verifies that by default these packets are
+# received by the second VXLAN device. The test then enables the nolocalbypass
+# option and verifies that packets are no longer received by the second VXLAN
+# device.
+
+source lib.sh
+ret=0
+
+TESTS="
+ nolocalbypass
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+################################################################################
+# Utilities
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "$VERBOSE" = "1" ]; then
+ echo " rc=$rc, expected $expected"
+ fi
+
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+tc_check_packets()
+{
+ local ns=$1; shift
+ local id=$1; shift
+ local handle=$1; shift
+ local count=$1; shift
+ local pkts
+
+ sleep 0.1
+ pkts=$(tc -n $ns -j -s filter show $id \
+ | jq ".[] | select(.options.handle == $handle) | \
+ .options.actions[0].stats.packets")
+ [[ $pkts == $count ]]
+}
+
+################################################################################
+# Setup
+
+setup()
+{
+ setup_ns ns1
+
+ ip -n $ns1 address add 192.0.2.1/32 dev lo
+ ip -n $ns1 address add 198.51.100.1/32 dev lo
+
+ ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \
+ dstport 4789 nolearning
+ ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790
+}
+
+cleanup()
+{
+ cleanup_ns $ns1
+}
+
+################################################################################
+# Tests
+
+nolocalbypass()
+{
+ local smac=00:01:02:03:04:05
+ local dmac=00:0a:0b:0c:0d:0e
+
+ run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790"
+
+ run_cmd "tc -n $ns1 qdisc add dev vx1 clsact"
+ run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass"
+
+ run_cmd "tc -n $ns1 qdisc add dev lo clsact"
+ run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop"
+
+ run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+ log_test $? 0 "localbypass enabled"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "$ns1" "dev vx1 ingress" 101 1
+ log_test $? 0 "Packet received by local VXLAN device - localbypass"
+
+ run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass"
+
+ run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'"
+ log_test $? 0 "localbypass disabled"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "$ns1" "dev vx1 ingress" 101 1
+ log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass"
+
+ run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass"
+
+ run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'"
+ log_test $? 0 "localbypass enabled"
+
+ run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q"
+
+ tc_check_packets "$ns1" "dev vx1 ingress" 101 2
+ log_test $? 0 "Packet received by local VXLAN device - localbypass"
+}
+
+################################################################################
+# Usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+ -p Pause on fail
+ -P Pause after each test before cleanup
+ -v Verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# Main
+
+trap cleanup EXIT
+
+while getopts ":t:pPvh" opt; do
+ case $opt in
+ t) TESTS=$OPTARG ;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# Make sure we don't pause twice.
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v bridge)" ]; then
+ echo "SKIP: Could not run test without bridge tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v jq)" ]; then
+ echo "SKIP: Could not run test without jq tool"
+ exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "localbypass"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 ip too old, missing VXLAN nolocalbypass support"
+ exit $ksft_skip
+fi
+
+cleanup
+
+for t in $TESTS
+do
+ setup; $t; cleanup;
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
index 09f9ed92cbe4..ae8fbe3f0779 100755
--- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh
+++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh
@@ -43,15 +43,14 @@
# This tests both the connectivity between vm-1 and vm-2, and that the underlay
# can be moved in and out of the vrf by unsetting and setting veth0's master.
+source lib.sh
set -e
cleanup() {
ip link del veth-hv-1 2>/dev/null || true
ip link del veth-tap 2>/dev/null || true
- for ns in hv-1 hv-2 vm-1 vm-2; do
- ip netns del $ns || true
- done
+ cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2
}
# Clean start
@@ -60,70 +59,75 @@ cleanup &> /dev/null
[[ $1 == "clean" ]] && exit 0
trap cleanup EXIT
+setup_ns hv_1 hv_2 vm_1 vm_2
+hv[1]=$hv_1
+hv[2]=$hv_2
+vm[1]=$vm_1
+vm[2]=$vm_2
# Setup "Hypervisors" simulated with netns
ip link add veth-hv-1 type veth peer name veth-hv-2
setup-hv-networking() {
- hv=$1
+ id=$1
- ip netns add hv-$hv
- ip link set veth-hv-$hv netns hv-$hv
- ip -netns hv-$hv link set veth-hv-$hv name veth0
+ ip link set veth-hv-$id netns ${hv[$id]}
+ ip -netns ${hv[$id]} link set veth-hv-$id name veth0
- ip -netns hv-$hv link add vrf-underlay type vrf table 1
- ip -netns hv-$hv link set vrf-underlay up
- ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0
- ip -netns hv-$hv link set veth0 up
+ ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1
+ ip -netns ${hv[$id]} link set vrf-underlay up
+ ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0
+ ip -netns ${hv[$id]} link set veth0 up
- ip -netns hv-$hv link add br0 type bridge
- ip -netns hv-$hv link set br0 up
+ ip -netns ${hv[$id]} link add br0 type bridge
+ ip -netns ${hv[$id]} link set br0 up
- ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789
- ip -netns hv-$hv link set vxlan0 master br0
- ip -netns hv-$hv link set vxlan0 up
+ ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789
+ ip -netns ${hv[$id]} link set vxlan0 master br0
+ ip -netns ${hv[$id]} link set vxlan0 up
}
setup-hv-networking 1
setup-hv-networking 2
# Check connectivity between HVs by pinging hv-2 from hv-1
echo -n "Checking HV connectivity "
-ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false)
echo "[ OK ]"
# Setups a "VM" simulated by a netns an a veth pair
setup-vm() {
id=$1
- ip netns add vm-$id
ip link add veth-tap type veth peer name veth-hv
- ip link set veth-tap netns hv-$id
- ip -netns hv-$id link set veth-tap master br0
- ip -netns hv-$id link set veth-tap up
+ ip link set veth-tap netns ${hv[$id]}
+ ip -netns ${hv[$id]} link set veth-tap master br0
+ ip -netns ${hv[$id]} link set veth-tap up
+
+ ip link set veth-hv address 02:1d:8d:dd:0c:6$id
- ip link set veth-hv netns vm-$id
- ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv
- ip -netns vm-$id link set veth-hv up
+ ip link set veth-hv netns ${vm[$id]}
+ ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv
+ ip -netns ${vm[$id]} link set veth-hv up
}
setup-vm 1
setup-vm 2
# Setup VTEP routes to make ARP work
-bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
-bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
+bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent
+bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent
echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
echo "[ OK ]"
# Move the underlay to a non-default VRF
-ip -netns hv-1 link set veth0 vrf vrf-underlay
-ip -netns hv-1 link set veth0 down
-ip -netns hv-1 link set veth0 up
-ip -netns hv-2 link set veth0 vrf vrf-underlay
-ip -netns hv-2 link set veth0 down
-ip -netns hv-2 link set veth0 up
+ip -netns $hv_1 link set veth0 vrf vrf-underlay
+ip -netns $hv_1 link set vxlan0 down
+ip -netns $hv_1 link set vxlan0 up
+ip -netns $hv_2 link set veth0 vrf vrf-underlay
+ip -netns $hv_2 link set vxlan0 down
+ip -netns $hv_2 link set vxlan0 up
echo -n "Check VM connectivity through VXLAN (underlay in a VRF) "
-ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
+ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false)
echo "[ OK ]"
diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
new file mode 100755
index 000000000000..6127a78ee988
--- /dev/null
+++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh
@@ -0,0 +1,607 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking the VXLAN vni filtering api and
+# datapath.
+# It simulates two hypervisors running two VMs each using four network
+# six namespaces: two for the HVs, four for the VMs. Each VM is
+# connected to a separate bridge. The VM's use overlapping vlans and
+# hence the separate bridge domain. Each vxlan device is a collect
+# metadata device with vni filtering and hence has the ability to
+# terminate configured vni's only.
+
+# +--------------------------------+ +------------------------------------+
+# | vm-11 netns | | vm-21 netns |
+# | | | |
+# |+------------+ +-------------+ | |+-------------+ +----------------+ |
+# ||veth-11.10 | |veth-11.20 | | ||veth-21.10 | | veth-21.20 | |
+# ||10.0.10.11/24 |10.0.20.11/24| | ||10.0.10.21/24| | 10.0.20.21/24 | |
+# |+------|-----+ +|------------+ | |+-----------|-+ +---|------------+ |
+# | | | | | | | |
+# | | | | | +------------+ |
+# | +------------+ | | | veth-21 | |
+# | | veth-11 | | | | | |
+# | | | | | +-----|------+ |
+# | +-----|------+ | | | |
+# | | | | | |
+# +------------|-------------------+ +---------------|--------------------+
+# +------------|-----------------------------------------|-------------------+
+# | +-----|------+ +-----|------+ |
+# | |vethhv-11 | |vethhv-21 | |
+# | +----|-------+ +-----|------+ |
+# | +---|---+ +---|--+ |
+# | | br1 | | br2 | |
+# | +---|---+ +---|--+ |
+# | +---|----+ +---|--+ |
+# | | vxlan1| |vxlan2| |
+# | +--|-----+ +--|---+ |
+# | | | |
+# | | +---------------------+ | |
+# | | |veth0 | | |
+# | +---------|172.16.0.1/24 -----------+ |
+# | |2002:fee1::1/64 | |
+# | hv-1 netns +--------|------------+ |
+# +-----------------------------|--------------------------------------------+
+# |
+# +-----------------------------|--------------------------------------------+
+# | hv-2 netns +--------|-------------+ |
+# | | veth0 | |
+# | +------| 172.16.0.2/24 |---+ |
+# | | | 2002:fee1::2/64 | | |
+# | | | | | |
+# | | +----------------------+ | - |
+# | | | |
+# | +-|-------+ +--------|-+ |
+# | | vxlan1 | | vxlan2 | |
+# | +----|----+ +---|------+ |
+# | +--|--+ +-|---+ |
+# | | br1 | | br2 | |
+# | +--|--+ +--|--+ |
+# | +-----|-------+ +----|-------+ |
+# | | vethhv-12 | |vethhv-22 | |
+# | +------|------+ +-------|----+ |
+# +-----------------|----------------------------|---------------------------+
+# | |
+# +-----------------|-----------------+ +--------|---------------------------+
+# | +-------|---+ | | +--|---------+ |
+# | | veth-12 | | | |veth-22 | |
+# | +-|--------|+ | | +--|--------|+ |
+# | | | | | | | |
+# |+----------|--+ +---|-----------+ | |+-------|-----+ +|---------------+ |
+# ||veth-12.10 | |veth-12.20 | | ||veth-22.10 | |veth-22.20 | |
+# ||10.0.10.12/24| |10.0.20.12/24 | | ||10.0.10.22/24| |10.0.20.22/24 | |
+# |+-------------+ +---------------+ | |+-------------+ +----------------+ |
+# | | | |
+# | | | |
+# | vm-12 netns | |vm-22 netns |
+# +-----------------------------------+ +------------------------------------+
+#
+#
+# This test tests the new vxlan vnifiltering api
+source lib.sh
+ret=0
+
+# all tests in this script. Can be overridden with -t option
+TESTS="
+ vxlan_vnifilter_api
+ vxlan_vnifilter_datapath
+ vxlan_vnifilter_datapath_pervni
+ vxlan_vnifilter_datapath_mgroup
+ vxlan_vnifilter_datapath_mgroup_pervni
+ vxlan_vnifilter_metadata_and_traditional_mix
+"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ printf " TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf " TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+
+ if [ "${PAUSE}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$1"
+ local out
+ local stderr="2>/dev/null"
+
+ if [ "$VERBOSE" = "1" ]; then
+ printf "COMMAND: $cmd\n"
+ stderr=
+ fi
+
+ out=$(eval $cmd $stderr)
+ rc=$?
+ if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+ echo " $out"
+ fi
+
+ return $rc
+}
+
+check_hv_connectivity() {
+ ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null
+ sleep 1
+ ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null
+
+ return $?
+}
+
+check_vm_connectivity() {
+ run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12"
+ log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)"
+
+ run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22"
+ log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)"
+}
+
+cleanup() {
+ ip link del veth-hv-1 2>/dev/null || true
+ ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true
+
+ cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32
+}
+
+trap cleanup EXIT
+
+setup-hv-networking() {
+ id=$1
+ local1=$2
+ mask1=$3
+ local2=$4
+ mask2=$5
+
+ ip link set veth-hv-$id netns ${hv[$id]}
+ ip -netns ${hv[$id]} link set veth-hv-$id name veth0
+ ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0
+ ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0
+ ip -netns ${hv[$id]} link set veth0 up
+}
+
+# Setups a "VM" simulated by a netns an a veth pair
+# example: setup-vm <hvid> <vmid> <brid> <VATTRS> <mcast_for_bum>
+# VATTRS = comma separated "<vlan>-<v[46]>-<localip>-<remoteip>-<VTYPE>-<vxlandstport>"
+# VTYPE = vxlan device type. "default = traditional device, metadata = metadata device
+# vnifilter = vnifiltering device,
+# vnifilterg = vnifiltering device with per vni group/remote"
+# example:
+# setup-vm 1 11 1 \
+# 10-v4-172.16.0.1-239.1.1.100-vnifilterg,20-v4-172.16.0.1-239.1.1.100-vnifilterg 1
+#
+setup-vm() {
+ hvid=$1
+ vmid=$2
+ brid=$3
+ vattrs=$4
+ mcast=$5
+ lastvxlandev=""
+
+ # create bridge
+ ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \
+ mcast_snooping 0
+ ip -netns ${hv[$hvid]} link set br$brid up
+
+ # create vm namespace and interfaces and connect to hypervisor
+ # namespace
+ hvvethif="vethhv-$vmid"
+ vmvethif="veth-$vmid"
+ ip link add $hvvethif type veth peer name $vmvethif
+ ip link set $hvvethif netns ${hv[$hvid]}
+ ip link set $vmvethif netns ${vm[$vmid]}
+ ip -netns ${hv[$hvid]} link set $hvvethif up
+ ip -netns ${vm[$vmid]} link set $vmvethif up
+ ip -netns ${hv[$hvid]} link set $hvvethif master br$brid
+
+ # configure VM vlan/vni filtering on hypervisor
+ for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ')
+ do
+ local vid=$(echo $vmap | awk -F'-' '{print ($1)}')
+ local family=$(echo $vmap | awk -F'-' '{print ($2)}')
+ local localip=$(echo $vmap | awk -F'-' '{print ($3)}')
+ local group=$(echo $vmap | awk -F'-' '{print ($4)}')
+ local vtype=$(echo $vmap | awk -F'-' '{print ($5)}')
+ local port=$(echo $vmap | awk -F'-' '{print ($6)}')
+
+ ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid
+ ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid
+ ip -netns ${vm[$vmid]} link set $vmvethif.$vid up
+
+ tid=$vid
+ vxlandev="vxlan$brid"
+ vxlandevflags=""
+
+ if [[ -n $vtype && $vtype == "metadata" ]]; then
+ vxlandevflags="$vxlandevflags external"
+ elif [[ -n $vtype && $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+ vxlandevflags="$vxlandevflags external vnifilter"
+ tid=$((vid+brid))
+ else
+ vxlandevflags="$vxlandevflags id $tid"
+ vxlandev="vxlan$tid"
+ fi
+
+ if [[ -n $vtype && $vtype != "vnifilterg" ]]; then
+ if [[ -n "$group" && "$group" != "null" ]]; then
+ if [ $mcast -eq 1 ]; then
+ vxlandevflags="$vxlandevflags group $group"
+ else
+ vxlandevflags="$vxlandevflags remote $group"
+ fi
+ fi
+ fi
+
+ if [[ -n "$port" && "$port" != "default" ]]; then
+ vxlandevflags="$vxlandevflags dstport $port"
+ fi
+
+ # create vxlan device
+ if [ "$vxlandev" != "$lastvxlandev" ]; then
+ ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null
+ ip -netns ${hv[$hvid]} link set $vxlandev master br$brid
+ ip -netns ${hv[$hvid]} link set $vxlandev up
+ lastvxlandev=$vxlandev
+ fi
+
+ # add vlan
+ bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif
+ bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev
+
+ # Add bridge vni filter for tx
+ if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then
+ bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on
+ bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid
+ fi
+
+ if [[ -n $vtype && $vtype == "metadata" ]]; then
+ bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \
+ src_vni $tid vni $tid dst $group self
+ elif [[ -n $vtype && $vtype == "vnifilter" ]]; then
+ # Add per vni rx filter with 'bridge vni' api
+ bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid
+ elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then
+ # Add per vni group config with 'bridge vni' api
+ if [ -n "$group" ]; then
+ if [ $mcast -eq 1 ]; then
+ bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group
+ else
+ bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group
+ fi
+ fi
+ fi
+ done
+}
+
+setup_vnifilter_api()
+{
+ ip link add veth-host type veth peer name veth-testns
+ setup_ns testns
+ ip link set veth-testns netns $testns
+}
+
+cleanup_vnifilter_api()
+{
+ ip link del veth-host 2>/dev/null || true
+ ip netns del $testns 2>/dev/null || true
+}
+
+# tests vxlan filtering api
+vxlan_vnifilter_api()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ localip="172.16.0.1"
+ group="239.1.1.101"
+
+ cleanup_vnifilter_api &>/dev/null
+ setup_vnifilter_api
+
+ # Duplicate vni test
+ # create non-vnifiltering traditional vni device
+ run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Create traditional vxlan device"
+
+ # create vni filtering device
+ run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 1 "Cannot create vnifilter device without external flag"
+
+ run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Creating external vxlan device with vnifilter flag"
+
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100"
+ log_test $? 0 "Cannot set in-use vni id on vnifiltering device"
+
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200"
+ log_test $? 0 "Set new vni id on vnifiltering device"
+
+ run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789"
+ log_test $? 0 "Create second external vxlan device with vnifilter flag"
+
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200"
+ log_test $? 255 "Cannot set in-use vni id on vnifiltering device"
+
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
+ log_test $? 0 "Set new vni id on vnifiltering device"
+
+ # check in bridge vni show
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300"
+ log_test $? 0 "Update vni id on vnifiltering device"
+
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400"
+ log_test $? 0 "Add new vni id on vnifiltering device"
+
+ # add multicast group per vni
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group"
+ log_test $? 0 "Set multicast group on existing vni"
+
+ # add multicast group per vni
+ run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group"
+ log_test $? 0 "Set multicast group on existing vni"
+
+ # set vnifilter on an existing external vxlan device
+ run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter"
+ log_test $? 2 "Cannot set vnifilter flag on a device"
+
+ # change vxlan vnifilter flag
+ run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter"
+ log_test $? 2 "Cannot unset vnifilter flag on a device"
+}
+
+# Sanity test vnifilter datapath
+# vnifilter vnis inherit BUM group from
+# vxlan device
+vxlan_vnifilter_datapath()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ setup_ns hv_1 hv_2
+ hv[1]=$hv_1
+ hv[2]=$hv_2
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup_ns vm_11 vm_21 vm_12 vm_22
+ vm[11]=$vm_11
+ vm[21]=$vm_21
+ vm[12]=$vm_12
+ vm[22]=$vm_22
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+
+ check_vm_connectivity "vnifiltering vxlan"
+}
+
+# Sanity test vnifilter datapath
+# with vnifilter per vni configured BUM
+# group/remote
+vxlan_vnifilter_datapath_pervni()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ setup_ns hv_1 hv_2
+ hv[1]=$hv_1
+ hv[2]=$hv_2
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup_ns vm_11 vm_21 vm_12 vm_22
+ vm[11]=$vm_11
+ vm[21]=$vm_21
+ vm[12]=$vm_12
+ vm[22]=$vm_22
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilterg,20-v4-$hv2addr1-$hv1addr1-vnifilterg 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilterg,20-v6-$hv2addr2-$hv1addr2-vnifilterg 0
+
+ check_vm_connectivity "vnifiltering vxlan pervni remote"
+}
+
+
+vxlan_vnifilter_datapath_mgroup()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ group="239.1.1.100"
+ group6="ff07::1"
+
+ setup_ns hv_1 hv_2
+ hv[1]=$hv_1
+ hv[2]=$hv_2
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup_ns vm_11 vm_21 vm_12 vm_22
+ vm[11]=$vm_11
+ vm[21]=$vm_21
+ vm[12]=$vm_12
+ vm[22]=$vm_22
+ setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1
+ setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilter,20-v4-$hv2addr1-$group-vnifilter 1
+ setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilter,20-v6-$hv2addr2-$group6-vnifilter 1
+
+ check_vm_connectivity "vnifiltering vxlan mgroup"
+}
+
+vxlan_vnifilter_datapath_mgroup_pervni()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+ group="239.1.1.100"
+ group6="ff07::1"
+
+ setup_ns hv_1 hv_2
+ hv[1]=$hv_1
+ hv[2]=$hv_2
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup_ns vm_11 vm_21 vm_12 vm_22
+ vm[11]=$vm_11
+ vm[21]=$vm_21
+ vm[12]=$vm_12
+ vm[22]=$vm_22
+ setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1
+ setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$group-vnifilterg,20-v4-$hv2addr1-$group-vnifilterg 1
+ setup-vm 2 22 2 10-v6-$hv2addr2-$group6-vnifilterg,20-v6-$hv2addr2-$group6-vnifilterg 1
+
+ check_vm_connectivity "vnifiltering vxlan pervni mgroup"
+}
+
+vxlan_vnifilter_metadata_and_traditional_mix()
+{
+ hv1addr1="172.16.0.1"
+ hv2addr1="172.16.0.2"
+ hv1addr2="2002:fee1::1"
+ hv2addr2="2002:fee1::2"
+
+ setup_ns hv_1 hv_2
+ hv[1]=$hv_1
+ hv[2]=$hv_2
+ ip link add veth-hv-1 type veth peer name veth-hv-2
+ setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64
+ setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64
+
+ check_hv_connectivity hv2addr1 hv2addr2
+
+ setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32
+ vm[11]=$vm_11
+ vm[21]=$vm_21
+ vm[31]=$vm_31
+ vm[12]=$vm_12
+ vm[22]=$vm_22
+ vm[32]=$vm_32
+ setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0
+ setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0
+ setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0
+
+
+ setup-vm 2 12 1 10-v4-$hv2addr1-$hv1addr1-vnifilter,20-v4-$hv2addr1-$hv1addr1-vnifilter 0
+ setup-vm 2 22 2 10-v6-$hv2addr2-$hv1addr2-vnifilter,20-v6-$hv2addr2-$hv1addr2-vnifilter 0
+ setup-vm 2 32 3 30-v4-$hv2addr1-$hv1addr1-default-4790,40-v6-$hv2addr2-$hv1addr2-default-4790,50-v4-$hv2addr1-$hv1addr1-metadata-4791 0
+
+ check_vm_connectivity "vnifiltering vxlan pervni remote mix"
+
+ # check VM connectivity over traditional/non-vxlan filtering vxlan devices
+ run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32"
+ log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)"
+
+ run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32"
+ log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)"
+
+ run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32"
+ log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)"
+}
+
+while getopts :t:pP46hv o
+do
+ case $o in
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ P) PAUSE=yes;;
+ v) VERBOSE=$(($VERBOSE + 1));;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip;
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip link help vxlan 2>&1 | grep -q "vnifilter"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 too old, missing vxlan dev vnifilter setting"
+ sync
+ exit $ksft_skip
+fi
+
+bridge vni help 2>&1 | grep -q "Usage: bridge vni"
+if [ $? -ne 0 ]; then
+ echo "SKIP: iproute2 bridge lacks vxlan vnifiltering support"
+ exit $ksft_skip
+fi
+
+# start clean
+cleanup &> /dev/null
+
+for t in $TESTS
+do
+ case $t in
+ none) setup; exit 0;;
+ *) $t; cleanup;;
+ esac
+done
+
+if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c
index f4bb4fef0f39..044bc0e9ed81 100644
--- a/tools/testing/selftests/net/timestamping.c
+++ b/tools/testing/selftests/net/timestamping.c
@@ -47,7 +47,7 @@ static void usage(const char *error)
{
if (error)
printf("invalid option: %s\n", error);
- printf("timestamping interface option*\n\n"
+ printf("timestamping <interface> [bind_phc_index] [option]*\n\n"
"Options:\n"
" IP_MULTICAST_LOOP - looping outgoing multicasts\n"
" SO_TIMESTAMP - normal software time stamping, ms resolution\n"
@@ -58,8 +58,10 @@ static void usage(const char *error)
" SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n"
" SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n"
" SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n"
+ " SOF_TIMESTAMPING_BIND_PHC - request to bind a PHC of PTP vclock\n"
" SIOCGSTAMP - check last socket time stamp\n"
- " SIOCGSTAMPNS - more accurate socket time stamp\n");
+ " SIOCGSTAMPNS - more accurate socket time stamp\n"
+ " PTPV2 - use PTPv2 messages\n");
exit(1);
}
@@ -115,13 +117,28 @@ static const unsigned char sync[] = {
0x00, 0x00, 0x00, 0x00
};
-static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
+static const unsigned char sync_v2[] = {
+ 0x00, 0x02, 0x00, 0x2C,
+ 0x00, 0x00, 0x02, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0xFF,
+ 0xFE, 0x00, 0x00, 0x00,
+ 0x00, 0x01, 0x00, 0x01,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+ 0x00, 0x00, 0x00, 0x00,
+};
+
+static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len, int ptpv2)
{
+ size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+ const void *sync_p = ptpv2 ? sync_v2 : sync;
struct timeval now;
int res;
- res = sendto(sock, sync, sizeof(sync), 0,
- addr, addr_len);
+ res = sendto(sock, sync_p, sync_len, 0, addr, addr_len);
gettimeofday(&now, 0);
if (res < 0)
printf("%s: %s\n", "send", strerror(errno));
@@ -134,9 +151,11 @@ static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len)
static void printpacket(struct msghdr *msg, int res,
char *data,
int sock, int recvmsg_flags,
- int siocgstamp, int siocgstampns)
+ int siocgstamp, int siocgstampns, int ptpv2)
{
struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name;
+ size_t sync_len = ptpv2 ? sizeof(sync_v2) : sizeof(sync);
+ const void *sync_p = ptpv2 ? sync_v2 : sync;
struct cmsghdr *cmsg;
struct timeval tv;
struct timespec ts;
@@ -210,10 +229,9 @@ static void printpacket(struct msghdr *msg, int res,
"probably SO_EE_ORIGIN_TIMESTAMPING"
#endif
);
- if (res < sizeof(sync))
+ if (res < sync_len)
printf(" => truncated data?!");
- else if (!memcmp(sync, data + res - sizeof(sync),
- sizeof(sync)))
+ else if (!memcmp(sync_p, data + res - sync_len, sync_len))
printf(" => GOT OUR DATA BACK (HURRAY!)");
break;
}
@@ -257,7 +275,7 @@ static void printpacket(struct msghdr *msg, int res,
}
static void recvpacket(int sock, int recvmsg_flags,
- int siocgstamp, int siocgstampns)
+ int siocgstamp, int siocgstampns, int ptpv2)
{
char data[256];
struct msghdr msg;
@@ -288,18 +306,18 @@ static void recvpacket(int sock, int recvmsg_flags,
} else {
printpacket(&msg, res, data,
sock, recvmsg_flags,
- siocgstamp, siocgstampns);
+ siocgstamp, siocgstampns, ptpv2);
}
}
int main(int argc, char **argv)
{
- int so_timestamping_flags = 0;
int so_timestamp = 0;
int so_timestampns = 0;
int siocgstamp = 0;
int siocgstampns = 0;
int ip_multicast_loop = 0;
+ int ptpv2 = 0;
char *interface;
int i;
int enabled = 1;
@@ -307,6 +325,8 @@ int main(int argc, char **argv)
struct ifreq device;
struct ifreq hwtstamp;
struct hwtstamp_config hwconfig, hwconfig_requested;
+ struct so_timestamping so_timestamping_get = { 0, 0 };
+ struct so_timestamping so_timestamping = { 0, 0 };
struct sockaddr_in addr;
struct ip_mreq imr;
struct in_addr iaddr;
@@ -324,7 +344,12 @@ int main(int argc, char **argv)
exit(1);
}
- for (i = 2; i < argc; i++) {
+ if (argc >= 3 && sscanf(argv[2], "%d", &so_timestamping.bind_phc) == 1)
+ val = 3;
+ else
+ val = 2;
+
+ for (i = val; i < argc; i++) {
if (!strcasecmp(argv[i], "SO_TIMESTAMP"))
so_timestamp = 1;
else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS"))
@@ -335,18 +360,22 @@ int main(int argc, char **argv)
siocgstampns = 1;
else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP"))
ip_multicast_loop = 1;
+ else if (!strcasecmp(argv[i], "PTPV2"))
+ ptpv2 = 1;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_TX_HARDWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_TX_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RX_HARDWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RX_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_SOFTWARE;
else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE"))
- so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ so_timestamping.flags |= SOF_TIMESTAMPING_RAW_HARDWARE;
+ else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_BIND_PHC"))
+ so_timestamping.flags |= SOF_TIMESTAMPING_BIND_PHC;
else
usage(argv[i]);
}
@@ -365,10 +394,11 @@ int main(int argc, char **argv)
hwtstamp.ifr_data = (void *)&hwconfig;
memset(&hwconfig, 0, sizeof(hwconfig));
hwconfig.tx_type =
- (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
+ (so_timestamping.flags & SOF_TIMESTAMPING_TX_HARDWARE) ?
HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
hwconfig.rx_filter =
- (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ (so_timestamping.flags & SOF_TIMESTAMPING_RX_HARDWARE) ?
+ ptpv2 ? HWTSTAMP_FILTER_PTP_V2_L4_SYNC :
HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE;
hwconfig_requested = hwconfig;
if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) {
@@ -392,6 +422,9 @@ int main(int argc, char **argv)
sizeof(struct sockaddr_in)) < 0)
bail("bind");
+ if (setsockopt(sock, SOL_SOCKET, SO_BINDTODEVICE, interface, if_len))
+ bail("bind device");
+
/* set multicast group for outgoing packets */
inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */
addr.sin_addr = iaddr;
@@ -423,10 +456,9 @@ int main(int argc, char **argv)
&enabled, sizeof(enabled)) < 0)
bail("setsockopt SO_TIMESTAMPNS");
- if (so_timestamping_flags &&
- setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING,
- &so_timestamping_flags,
- sizeof(so_timestamping_flags)) < 0)
+ if (so_timestamping.flags &&
+ setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping,
+ sizeof(so_timestamping)) < 0)
bail("setsockopt SO_TIMESTAMPING");
/* request IP_PKTINFO for debugging purposes */
@@ -447,14 +479,18 @@ int main(int argc, char **argv)
else
printf("SO_TIMESTAMPNS %d\n", val);
- if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) {
+ len = sizeof(so_timestamping_get);
+ if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &so_timestamping_get,
+ &len) < 0) {
printf("%s: %s\n", "getsockopt SO_TIMESTAMPING",
strerror(errno));
} else {
- printf("SO_TIMESTAMPING %d\n", val);
- if (val != so_timestamping_flags)
- printf(" not the expected value %d\n",
- so_timestamping_flags);
+ printf("SO_TIMESTAMPING flags %d, bind phc %d\n",
+ so_timestamping_get.flags, so_timestamping_get.bind_phc);
+ if (so_timestamping_get.flags != so_timestamping.flags ||
+ so_timestamping_get.bind_phc != so_timestamping.bind_phc)
+ printf(" not expected, flags %d, bind phc %d\n",
+ so_timestamping.flags, so_timestamping.bind_phc);
}
/* send packets forever every five seconds */
@@ -496,16 +532,16 @@ int main(int argc, char **argv)
printf("has error\n");
recvpacket(sock, 0,
siocgstamp,
- siocgstampns);
+ siocgstampns, ptpv2);
recvpacket(sock, MSG_ERRQUEUE,
siocgstamp,
- siocgstampns);
+ siocgstampns, ptpv2);
}
} else {
/* write one packet */
sendpacket(sock,
(struct sockaddr *)&addr,
- sizeof(addr));
+ sizeof(addr), ptpv2);
next.tv_sec += 5;
continue;
}
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index b599f1fa99b5..c6eda21cefb6 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -15,6 +15,7 @@
#include <linux/tcp.h>
#include <linux/socket.h>
+#include <sys/epoll.h>
#include <sys/types.h>
#include <sys/sendfile.h>
#include <sys/socket.h>
@@ -25,26 +26,101 @@
#define TLS_PAYLOAD_MAX_LEN 16384
#define SOL_TLS 282
-FIXTURE(tls_basic)
-{
- int fd, cfd;
- bool notls;
+static int fips_enabled;
+
+struct tls_crypto_info_keys {
+ union {
+ struct tls_crypto_info crypto_info;
+ struct tls12_crypto_info_aes_gcm_128 aes128;
+ struct tls12_crypto_info_chacha20_poly1305 chacha20;
+ struct tls12_crypto_info_sm4_gcm sm4gcm;
+ struct tls12_crypto_info_sm4_ccm sm4ccm;
+ struct tls12_crypto_info_aes_ccm_128 aesccm128;
+ struct tls12_crypto_info_aes_gcm_256 aesgcm256;
+ struct tls12_crypto_info_aria_gcm_128 ariagcm128;
+ struct tls12_crypto_info_aria_gcm_256 ariagcm256;
+ };
+ size_t len;
};
-FIXTURE_SETUP(tls_basic)
+static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
+ struct tls_crypto_info_keys *tls12)
+{
+ memset(tls12, 0, sizeof(*tls12));
+
+ switch (cipher_type) {
+ case TLS_CIPHER_CHACHA20_POLY1305:
+ tls12->len = sizeof(struct tls12_crypto_info_chacha20_poly1305);
+ tls12->chacha20.info.version = tls_version;
+ tls12->chacha20.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_128);
+ tls12->aes128.info.version = tls_version;
+ tls12->aes128.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_SM4_GCM:
+ tls12->len = sizeof(struct tls12_crypto_info_sm4_gcm);
+ tls12->sm4gcm.info.version = tls_version;
+ tls12->sm4gcm.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_SM4_CCM:
+ tls12->len = sizeof(struct tls12_crypto_info_sm4_ccm);
+ tls12->sm4ccm.info.version = tls_version;
+ tls12->sm4ccm.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_CCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_ccm_128);
+ tls12->aesccm128.info.version = tls_version;
+ tls12->aesccm128.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_AES_GCM_256:
+ tls12->len = sizeof(struct tls12_crypto_info_aes_gcm_256);
+ tls12->aesgcm256.info.version = tls_version;
+ tls12->aesgcm256.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_ARIA_GCM_128:
+ tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_128);
+ tls12->ariagcm128.info.version = tls_version;
+ tls12->ariagcm128.info.cipher_type = cipher_type;
+ break;
+ case TLS_CIPHER_ARIA_GCM_256:
+ tls12->len = sizeof(struct tls12_crypto_info_aria_gcm_256);
+ tls12->ariagcm256.info.version = tls_version;
+ tls12->ariagcm256.info.cipher_type = cipher_type;
+ break;
+ default:
+ break;
+ }
+}
+
+static void memrnd(void *s, size_t n)
+{
+ int *dword = s;
+ char *byte;
+
+ for (; n >= 4; n -= 4)
+ *dword++ = rand();
+ byte = (void *)dword;
+ while (n--)
+ *byte++ = rand();
+}
+
+static void ulp_sock_pair(struct __test_metadata *_metadata,
+ int *fd, int *cfd, bool *notls)
{
struct sockaddr_in addr;
socklen_t len;
int sfd, ret;
- self->notls = false;
+ *notls = false;
len = sizeof(addr);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
addr.sin_port = 0;
- self->fd = socket(AF_INET, SOCK_STREAM, 0);
+ *fd = socket(AF_INET, SOCK_STREAM, 0);
sfd = socket(AF_INET, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
@@ -55,26 +131,96 @@ FIXTURE_SETUP(tls_basic)
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
- ret = connect(self->fd, &addr, sizeof(addr));
+ ret = connect(*fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
- self->cfd = accept(sfd, &addr, &len);
- ASSERT_GE(self->cfd, 0);
+ *cfd = accept(sfd, &addr, &len);
+ ASSERT_GE(*cfd, 0);
close(sfd);
- ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ ret = setsockopt(*fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
if (ret != 0) {
ASSERT_EQ(errno, ENOENT);
- self->notls = true;
+ *notls = true;
printf("Failure setting TCP_ULP, testing without tls\n");
return;
}
- ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ ret = setsockopt(*cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
ASSERT_EQ(ret, 0);
}
+/* Produce a basic cmsg */
+static int tls_send_cmsg(int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ int cmsg_len = sizeof(char);
+ struct cmsghdr *cmsg;
+ struct msghdr msg;
+ struct iovec vec;
+
+ vec.iov_base = data;
+ vec.iov_len = len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+ cmsg = CMSG_FIRSTHDR(&msg);
+ cmsg->cmsg_level = SOL_TLS;
+ /* test sending non-record types. */
+ cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
+ cmsg->cmsg_len = CMSG_LEN(cmsg_len);
+ *CMSG_DATA(cmsg) = record_type;
+ msg.msg_controllen = cmsg->cmsg_len;
+
+ return sendmsg(fd, &msg, flags);
+}
+
+static int tls_recv_cmsg(struct __test_metadata *_metadata,
+ int fd, unsigned char record_type,
+ void *data, size_t len, int flags)
+{
+ char cbuf[CMSG_SPACE(sizeof(char))];
+ struct cmsghdr *cmsg;
+ unsigned char ctype;
+ struct msghdr msg;
+ struct iovec vec;
+ int n;
+
+ vec.iov_base = data;
+ vec.iov_len = len;
+ memset(&msg, 0, sizeof(struct msghdr));
+ msg.msg_iov = &vec;
+ msg.msg_iovlen = 1;
+ msg.msg_control = cbuf;
+ msg.msg_controllen = sizeof(cbuf);
+
+ n = recvmsg(fd, &msg, flags);
+
+ cmsg = CMSG_FIRSTHDR(&msg);
+ EXPECT_NE(cmsg, NULL);
+ EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
+ EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
+ ctype = *((unsigned char *)CMSG_DATA(cmsg));
+ EXPECT_EQ(ctype, record_type);
+
+ return n;
+}
+
+FIXTURE(tls_basic)
+{
+ int fd, cfd;
+ bool notls;
+};
+
+FIXTURE_SETUP(tls_basic)
+{
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+}
+
FIXTURE_TEARDOWN(tls_basic)
{
close(self->fd);
@@ -95,6 +241,31 @@ TEST_F(tls_basic, base_base)
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
};
+TEST_F(tls_basic, bad_cipher)
+{
+ struct tls_crypto_info_keys tls12;
+
+ tls12.crypto_info.version = 200;
+ tls12.crypto_info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+ tls12.crypto_info.version = TLS_1_2_VERSION;
+ tls12.crypto_info.cipher_type = 50;
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+ tls12.crypto_info.version = TLS_1_2_VERSION;
+ tls12.crypto_info.cipher_type = 59;
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+ tls12.crypto_info.version = TLS_1_2_VERSION;
+ tls12.crypto_info.cipher_type = 10;
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+
+ tls12.crypto_info.version = TLS_1_2_VERSION;
+ tls12.crypto_info.cipher_type = 70;
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, sizeof(struct tls12_crypto_info_aes_gcm_128)), -1);
+}
+
FIXTURE(tls)
{
int fd, cfd;
@@ -103,77 +274,122 @@ FIXTURE(tls)
FIXTURE_VARIANT(tls)
{
- unsigned int tls_version;
+ uint16_t tls_version;
+ uint16_t cipher_type;
+ bool nopad, fips_non_compliant;
};
-FIXTURE_VARIANT_ADD(tls, 12)
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm)
{
.tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
};
-FIXTURE_VARIANT_ADD(tls, 13)
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm)
{
.tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
};
-FIXTURE_SETUP(tls)
+FIXTURE_VARIANT_ADD(tls, 12_chacha)
{
- struct tls12_crypto_info_aes_gcm_128 tls12;
- struct sockaddr_in addr;
- socklen_t len;
- int sfd, ret;
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+ .fips_non_compliant = true,
+};
- self->notls = false;
- len = sizeof(addr);
+FIXTURE_VARIANT_ADD(tls, 13_chacha)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_CHACHA20_POLY1305,
+ .fips_non_compliant = true,
+};
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = variant->tls_version;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+FIXTURE_VARIANT_ADD(tls, 13_sm4_gcm)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_SM4_GCM,
+ .fips_non_compliant = true,
+};
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
+FIXTURE_VARIANT_ADD(tls, 13_sm4_ccm)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_SM4_CCM,
+ .fips_non_compliant = true,
+};
- self->fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aes_ccm)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_CCM_128,
+};
- ret = bind(sfd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
- ret = listen(sfd, 10);
- ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 13_aes_ccm)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_CCM_128,
+};
- ret = getsockname(sfd, &addr, &len);
- ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aes_gcm_256)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_256,
+};
- ret = connect(self->fd, &addr, sizeof(addr));
- ASSERT_EQ(ret, 0);
+FIXTURE_VARIANT_ADD(tls, 13_aes_gcm_256)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_256,
+};
- ret = setsockopt(self->fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- self->notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
- }
+FIXTURE_VARIANT_ADD(tls, 13_nopad)
+{
+ .tls_version = TLS_1_3_VERSION,
+ .cipher_type = TLS_CIPHER_AES_GCM_128,
+ .nopad = true,
+};
- if (!self->notls) {
- ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
- ASSERT_EQ(ret, 0);
- }
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_ARIA_GCM_128,
+};
- self->cfd = accept(sfd, &addr, &len);
- ASSERT_GE(self->cfd, 0);
+FIXTURE_VARIANT_ADD(tls, 12_aria_gcm_256)
+{
+ .tls_version = TLS_1_2_VERSION,
+ .cipher_type = TLS_CIPHER_ARIA_GCM_256,
+};
- if (!self->notls) {
- ret = setsockopt(self->cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- ASSERT_EQ(ret, 0);
+FIXTURE_SETUP(tls)
+{
+ struct tls_crypto_info_keys tls12;
+ int one = 1;
+ int ret;
- ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
+ if (fips_enabled && variant->fips_non_compliant)
+ SKIP(return, "Unsupported cipher in FIPS mode");
+
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+
+ if (self->notls)
+ return;
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ if (variant->nopad) {
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&one, sizeof(one));
ASSERT_EQ(ret, 0);
}
-
- close(sfd);
}
FIXTURE_TEARDOWN(tls)
@@ -277,6 +493,8 @@ TEST_F(tls, recv_max)
char recv_mem[TLS_PAYLOAD_MAX_LEN];
char buf[TLS_PAYLOAD_MAX_LEN];
+ memrnd(buf, sizeof(buf));
+
EXPECT_GE(send(self->fd, buf, send_len, 0), 0);
EXPECT_NE(recv(self->cfd, recv_mem, send_len, 0), -1);
EXPECT_EQ(memcmp(buf, recv_mem, send_len), 0);
@@ -318,6 +536,17 @@ TEST_F(tls, msg_more_unsent)
EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_DONTWAIT), -1);
}
+TEST_F(tls, msg_eor)
+{
+ char const *test_str = "test_read";
+ int send_len = 10;
+ char buf[10];
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, MSG_EOR), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_WAITALL), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
TEST_F(tls, sendmsg_single)
{
struct msghdr msg;
@@ -384,11 +613,12 @@ TEST_F(tls, sendmsg_large)
msg.msg_iov = &vec;
msg.msg_iovlen = 1;
- EXPECT_EQ(sendmsg(self->cfd, &msg, 0), send_len);
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
}
- while (recvs++ < sends)
- EXPECT_NE(recv(self->fd, mem, send_len, 0), -1);
+ while (recvs++ < sends) {
+ EXPECT_NE(recv(self->cfd, mem, send_len, 0), -1);
+ }
free(mem);
}
@@ -416,9 +646,9 @@ TEST_F(tls, sendmsg_multiple)
msg.msg_iov = vec;
msg.msg_iovlen = iov_len;
- EXPECT_EQ(sendmsg(self->cfd, &msg, 0), total_len);
+ EXPECT_EQ(sendmsg(self->fd, &msg, 0), total_len);
buf = malloc(total_len);
- EXPECT_NE(recv(self->fd, buf, total_len, 0), -1);
+ EXPECT_NE(recv(self->cfd, buf, total_len, 0), -1);
for (i = 0; i < iov_len; i++) {
EXPECT_EQ(memcmp(test_strs[i], buf + len_cmp,
strlen(test_strs[i])),
@@ -477,6 +707,20 @@ TEST_F(tls, splice_from_pipe)
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
+TEST_F(tls, splice_more)
+{
+ unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT;
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ int i, send_pipe = 1;
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_GE(write(p[1], mem_send, send_len), 0);
+ for (i = 0; i < 32; i++)
+ EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1);
+}
+
TEST_F(tls, splice_from_pipe2)
{
int send_len = 16000;
@@ -485,12 +729,14 @@ TEST_F(tls, splice_from_pipe2)
int p2[2];
int p[2];
+ memrnd(mem_send, sizeof(mem_send));
+
ASSERT_GE(pipe(p), 0);
ASSERT_GE(pipe(p2), 0);
- EXPECT_GE(write(p[1], mem_send, 8000), 0);
- EXPECT_GE(splice(p[0], NULL, self->fd, NULL, 8000, 0), 0);
- EXPECT_GE(write(p2[1], mem_send + 8000, 8000), 0);
- EXPECT_GE(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 0);
+ EXPECT_EQ(write(p[1], mem_send, 8000), 8000);
+ EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, 8000, 0), 8000);
+ EXPECT_EQ(write(p2[1], mem_send + 8000, 8000), 8000);
+ EXPECT_EQ(splice(p2[0], NULL, self->fd, NULL, 8000, 0), 8000);
EXPECT_EQ(recv(self->cfd, mem_recv, send_len, MSG_WAITALL), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -524,10 +770,107 @@ TEST_F(tls, splice_to_pipe)
char mem_recv[TLS_PAYLOAD_MAX_LEN];
int p[2];
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, splice_cmsg_to_pipe)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10];
+ int p[2];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(errno, EIO);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, splice_dec_cmsg_to_pipe)
+{
+ char *test_str = "test_read";
+ char record_type = 100;
+ int send_len = 10;
+ char buf[10];
+ int p[2];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(errno, EIO);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, send_len, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
+ EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
+}
+
+TEST_F(tls, recv_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int half = send_len / 2;
+ int p[2];
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+ /* Recv hald of the record, splice the other half */
+ EXPECT_EQ(recv(self->cfd, mem_recv, half, MSG_WAITALL), half);
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, half, SPLICE_F_NONBLOCK),
+ half);
+ EXPECT_EQ(read(p[0], &mem_recv[half], half), half);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, peek_and_splice)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int chunk = TLS_PAYLOAD_MAX_LEN / 4;
+ int n, i, p[2];
+
+ memrnd(mem_send, sizeof(mem_send));
+
ASSERT_GE(pipe(p), 0);
- EXPECT_GE(send(self->fd, mem_send, send_len, 0), 0);
- EXPECT_GE(splice(self->cfd, NULL, p[1], NULL, send_len, 0), 0);
- EXPECT_GE(read(p[0], mem_recv, send_len), 0);
+ for (i = 0; i < 4; i++)
+ EXPECT_EQ(send(self->fd, &mem_send[chunk * i], chunk, 0),
+ chunk);
+
+ EXPECT_EQ(recv(self->cfd, mem_recv, chunk * 5 / 2,
+ MSG_WAITALL | MSG_PEEK),
+ chunk * 5 / 2);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, chunk * 5 / 2), 0);
+
+ n = 0;
+ while (n < send_len) {
+ i = splice(self->cfd, NULL, p[1], NULL, send_len - n, 0);
+ EXPECT_GT(i, 0);
+ n += i;
+ }
+ EXPECT_EQ(n, send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
}
@@ -557,6 +900,8 @@ TEST_F(tls, recvmsg_single_max)
struct iovec vec;
struct msghdr hdr;
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_EQ(send(self->fd, send_mem, send_len, 0), send_len);
vec.iov_base = (char *)recv_mem;
vec.iov_len = TLS_PAYLOAD_MAX_LEN;
@@ -570,7 +915,6 @@ TEST_F(tls, recvmsg_single_max)
TEST_F(tls, recvmsg_multiple)
{
unsigned int msg_iovlen = 1024;
- unsigned int len_compared = 0;
struct iovec vec[1024];
char *iov_base[1024];
unsigned int iov_len = 16;
@@ -579,6 +923,8 @@ TEST_F(tls, recvmsg_multiple)
struct msghdr hdr;
int i;
+ memrnd(buf, sizeof(buf));
+
EXPECT_EQ(send(self->fd, buf, send_len, 0), send_len);
for (i = 0; i < msg_iovlen; i++) {
iov_base[i] = (char *)malloc(iov_len);
@@ -589,8 +935,6 @@ TEST_F(tls, recvmsg_multiple)
hdr.msg_iovlen = msg_iovlen;
hdr.msg_iov = vec;
EXPECT_NE(recvmsg(self->cfd, &hdr, 0), -1);
- for (i = 0; i < msg_iovlen; i++)
- len_compared += iov_len;
for (i = 0; i < msg_iovlen; i++)
free(iov_base[i]);
@@ -603,6 +947,8 @@ TEST_F(tls, single_send_multiple_recv)
char send_mem[TLS_PAYLOAD_MAX_LEN * 2];
char recv_mem[TLS_PAYLOAD_MAX_LEN * 2];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -618,6 +964,8 @@ TEST_F(tls, multiple_send_single_recv)
char recv_mem[2 * 10];
char send_mem[10];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
EXPECT_GE(send(self->fd, send_mem, send_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -634,6 +982,8 @@ TEST_F(tls, single_send_multiple_recv_non_align)
char recv_mem[recv_len * 2];
char send_mem[total_len];
+ memrnd(send_mem, sizeof(send_mem));
+
EXPECT_GE(send(self->fd, send_mem, total_len, 0), 0);
memset(recv_mem, 0, total_len);
@@ -652,12 +1002,12 @@ TEST_F(tls, recv_partial)
memset(recv_mem, 0, sizeof(recv_mem));
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first),
- MSG_WAITALL), -1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first),
+ MSG_WAITALL), strlen(test_str_first));
EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0);
memset(recv_mem, 0, sizeof(recv_mem));
- EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second),
- MSG_WAITALL), -1);
+ EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second),
+ MSG_WAITALL), strlen(test_str_second));
EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)),
0);
}
@@ -679,10 +1029,10 @@ TEST_F(tls, recv_peek)
char buf[15];
EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
- EXPECT_NE(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
memset(buf, 0, sizeof(buf));
- EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
EXPECT_EQ(memcmp(test_str, buf, send_len), 0);
}
@@ -803,18 +1153,17 @@ TEST_F(tls, bidir)
int ret;
if (!self->notls) {
- struct tls12_crypto_info_aes_gcm_128 tls12;
+ struct tls_crypto_info_keys tls12;
- memset(&tls12, 0, sizeof(tls12));
- tls12.info.version = variant->tls_version;
- tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type,
+ &tls12);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
ret = setsockopt(self->cfd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
+ tls12.len);
ASSERT_EQ(ret, 0);
}
@@ -1109,63 +1458,78 @@ TEST_F(tls, mutliproc_sendpage_writers)
TEST_F(tls, control_msg)
{
- if (self->notls)
- return;
-
- char cbuf[CMSG_SPACE(sizeof(char))];
- char const *test_str = "test_read";
- int cmsg_len = sizeof(char);
+ char *test_str = "test_read";
char record_type = 100;
- struct cmsghdr *cmsg;
- struct msghdr msg;
int send_len = 10;
- struct iovec vec;
char buf[10];
- vec.iov_base = (char *)test_str;
- vec.iov_len = 10;
- memset(&msg, 0, sizeof(struct msghdr));
- msg.msg_iov = &vec;
- msg.msg_iovlen = 1;
- msg.msg_control = cbuf;
- msg.msg_controllen = sizeof(cbuf);
- cmsg = CMSG_FIRSTHDR(&msg);
- cmsg->cmsg_level = SOL_TLS;
- /* test sending non-record types. */
- cmsg->cmsg_type = TLS_SET_RECORD_TYPE;
- cmsg->cmsg_len = CMSG_LEN(cmsg_len);
- *CMSG_DATA(cmsg) = record_type;
- msg.msg_controllen = cmsg->cmsg_len;
+ if (self->notls)
+ SKIP(return, "no TLS support");
- EXPECT_EQ(sendmsg(self->fd, &msg, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, record_type, test_str, send_len, 0),
+ send_len);
/* Should fail because we didn't provide a control message */
EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
- vec.iov_base = buf;
- EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL | MSG_PEEK), send_len);
-
- cmsg = CMSG_FIRSTHDR(&msg);
- EXPECT_NE(cmsg, NULL);
- EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
- EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- record_type = *((unsigned char *)CMSG_DATA(cmsg));
- EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL | MSG_PEEK),
+ send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
/* Recv the message again without MSG_PEEK */
- record_type = 0;
memset(buf, 0, sizeof(buf));
- EXPECT_EQ(recvmsg(self->cfd, &msg, MSG_WAITALL), send_len);
- cmsg = CMSG_FIRSTHDR(&msg);
- EXPECT_NE(cmsg, NULL);
- EXPECT_EQ(cmsg->cmsg_level, SOL_TLS);
- EXPECT_EQ(cmsg->cmsg_type, TLS_GET_RECORD_TYPE);
- record_type = *((unsigned char *)CMSG_DATA(cmsg));
- EXPECT_EQ(record_type, 100);
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, record_type,
+ buf, sizeof(buf), MSG_WAITALL),
+ send_len);
EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
}
+TEST_F(tls, control_msg_nomerge)
+{
+ char *rec1 = "1111";
+ char *rec2 = "2222";
+ int send_len = 5;
+ char buf[15];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+ EXPECT_EQ(memcmp(buf, rec1, send_len), 0);
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len);
+ EXPECT_EQ(memcmp(buf, rec2, send_len), 0);
+}
+
+TEST_F(tls, data_control_data)
+{
+ char *rec1 = "1111";
+ char *rec2 = "2222";
+ char *rec3 = "3333";
+ int send_len = 5;
+ char buf[15];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len);
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len);
+ EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+}
+
TEST_F(tls, shutdown)
{
char const *test_str = "test_read";
@@ -1217,6 +1581,356 @@ TEST_F(tls, shutdown_reuse)
EXPECT_EQ(errno, EISCONN);
}
+TEST_F(tls, getsockopt)
+{
+ struct tls_crypto_info_keys expect, get;
+ socklen_t len;
+
+ /* get only the version/cipher */
+ len = sizeof(struct tls_crypto_info);
+ memrnd(&get, sizeof(get));
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+ EXPECT_EQ(len, sizeof(struct tls_crypto_info));
+ EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+ EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+
+ /* get the full crypto_info */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+ len = expect.len;
+ memrnd(&get, sizeof(get));
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
+ EXPECT_EQ(len, expect.len);
+ EXPECT_EQ(get.crypto_info.version, variant->tls_version);
+ EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
+ EXPECT_EQ(memcmp(&get, &expect, expect.len), 0);
+
+ /* short get should fail */
+ len = sizeof(struct tls_crypto_info) - 1;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* partial get of the cipher data should fail */
+ len = expect.len - 1;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), -1);
+ EXPECT_EQ(errno, EINVAL);
+}
+
+FIXTURE(tls_err)
+{
+ int fd, cfd;
+ int fd2, cfd2;
+ bool notls;
+};
+
+FIXTURE_VARIANT(tls_err)
+{
+ uint16_t tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 12_aes_gcm)
+{
+ .tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls_err, 13_aes_gcm)
+{
+ .tls_version = TLS_1_3_VERSION,
+};
+
+FIXTURE_SETUP(tls_err)
+{
+ struct tls_crypto_info_keys tls12;
+ int ret;
+
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
+ &tls12);
+
+ ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
+ ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
+ if (self->notls)
+ return;
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd2, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
+}
+
+FIXTURE_TEARDOWN(tls_err)
+{
+ close(self->fd);
+ close(self->cfd);
+ close(self->fd2);
+ close(self->cfd2);
+}
+
+TEST_F(tls_err, bad_rec)
+{
+ char buf[64];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ memset(buf, 0x55, sizeof(buf));
+ EXPECT_EQ(send(self->fd2, buf, sizeof(buf), 0), sizeof(buf));
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EMSGSIZE);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EAGAIN);
+}
+
+TEST_F(tls_err, bad_auth)
+{
+ char buf[128];
+ int n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ memrnd(buf, sizeof(buf) / 2);
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf) / 2, 0), sizeof(buf) / 2);
+ n = recv(self->cfd, buf, sizeof(buf), 0);
+ EXPECT_GT(n, sizeof(buf) / 2);
+
+ buf[n - 1]++;
+
+ EXPECT_EQ(send(self->fd2, buf, n, 0), n);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_in_large_read)
+{
+ char txt[3][64];
+ char cip[3][128];
+ char buf[3 * 128];
+ int i, n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ /* Put 3 records in the sockets */
+ for (i = 0; i < 3; i++) {
+ memrnd(txt[i], sizeof(txt[i]));
+ EXPECT_EQ(send(self->fd, txt[i], sizeof(txt[i]), 0),
+ sizeof(txt[i]));
+ n = recv(self->cfd, cip[i], sizeof(cip[i]), 0);
+ EXPECT_GT(n, sizeof(txt[i]));
+ /* Break the third message */
+ if (i == 2)
+ cip[2][n - 1]++;
+ EXPECT_EQ(send(self->fd2, cip[i], n, 0), n);
+ }
+
+ /* We should be able to receive the first two messages */
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt[0]) * 2);
+ EXPECT_EQ(memcmp(buf, txt[0], sizeof(txt[0])), 0);
+ EXPECT_EQ(memcmp(buf + sizeof(txt[0]), txt[1], sizeof(txt[1])), 0);
+ /* Third mesasge is bad */
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, bad_cmsg)
+{
+ char *test_str = "test_read";
+ int send_len = 10;
+ char cip[128];
+ char buf[128];
+ char txt[64];
+ int n;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ /* Queue up one data record */
+ memrnd(txt, sizeof(txt));
+ EXPECT_EQ(send(self->fd, txt, sizeof(txt), 0), sizeof(txt));
+ n = recv(self->cfd, cip, sizeof(cip), 0);
+ EXPECT_GT(n, sizeof(txt));
+ EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+ EXPECT_EQ(tls_send_cmsg(self->fd, 100, test_str, send_len, 0), 10);
+ n = recv(self->cfd, cip, sizeof(cip), 0);
+ cip[n - 1]++; /* Break it */
+ EXPECT_GT(n, send_len);
+ EXPECT_EQ(send(self->fd2, cip, n, 0), n);
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), sizeof(txt));
+ EXPECT_EQ(memcmp(buf, txt, sizeof(txt)), 0);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EBADMSG);
+}
+
+TEST_F(tls_err, timeo)
+{
+ struct timeval tv = { .tv_usec = 10000, };
+ char buf[128];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ret = setsockopt(self->cfd2, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ ASSERT_EQ(ret, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ usleep(1000); /* Give child a head start */
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+
+ wait(&ret);
+ } else {
+ EXPECT_EQ(recv(self->cfd2, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EAGAIN);
+ exit(0);
+ }
+}
+
+TEST_F(tls_err, poll_partial_rec)
+{
+ struct pollfd pfd = { };
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ /* Write 100B, not the full record ... */
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+ /* ... no full record should mean no POLLIN */
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 0);
+ /* Now write the rest, and it should all pop out of the other end. */
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 1), 1);
+ EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+ EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+}
+
+TEST_F(tls_err, epoll_partial_rec)
+{
+ struct epoll_event ev, events[10];
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+ int epollfd;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ epollfd = epoll_create1(0);
+ ASSERT_GE(epollfd, 0);
+
+ memset(&ev, 0, sizeof(ev));
+ ev.events = EPOLLIN;
+ ev.data.fd = self->cfd2;
+ ASSERT_GE(epoll_ctl(epollfd, EPOLL_CTL_ADD, self->cfd2, &ev), 0);
+
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ /* Write 100B, not the full record ... */
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+ /* ... no full record should mean no POLLIN */
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 0);
+ /* Now write the rest, and it should all pop out of the other end. */
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0), rec_len - 100);
+ EXPECT_EQ(epoll_wait(epollfd, events, 10, 0), 1);
+ EXPECT_EQ(recv(self->cfd2, rec, sizeof(rec), 0), sizeof(buf));
+ EXPECT_EQ(memcmp(buf, rec, sizeof(buf)), 0);
+
+ close(epollfd);
+}
+
+TEST_F(tls_err, poll_partial_rec_async)
+{
+ struct pollfd pfd = { };
+ ssize_t rec_len;
+ char rec[256];
+ char buf[128];
+ char token;
+ int p[2];
+ int ret;
+
+ if (self->notls)
+ SKIP(return, "no TLS support");
+
+ ASSERT_GE(pipe(p), 0);
+
+ memrnd(buf, sizeof(buf));
+ EXPECT_EQ(send(self->fd, buf, sizeof(buf), 0), sizeof(buf));
+ rec_len = recv(self->cfd, rec, sizeof(rec), 0);
+ EXPECT_GT(rec_len, sizeof(buf));
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int status, pid2;
+
+ close(p[1]);
+ usleep(1000); /* Give child a head start */
+
+ EXPECT_EQ(send(self->fd2, rec, 100, 0), 100);
+
+ EXPECT_EQ(read(p[0], &token, 1), 1); /* Barrier #1 */
+
+ EXPECT_EQ(send(self->fd2, rec + 100, rec_len - 100, 0),
+ rec_len - 100);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ close(p[0]);
+
+ /* Child should sleep in poll(), never get a wake */
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 20), 0);
+
+ EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */
+
+ pfd.fd = self->cfd2;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 20), 1);
+
+ exit(!__test_passed(_metadata));
+ }
+}
+
TEST(non_established) {
struct tls12_crypto_info_aes_gcm_256 tls12;
struct sockaddr_in addr;
@@ -1271,64 +1985,191 @@ TEST(non_established) {
TEST(keysizes) {
struct tls12_crypto_info_aes_gcm_256 tls12;
- struct sockaddr_in addr;
- int sfd, ret, fd, cfd;
- socklen_t len;
+ int ret, fd, cfd;
bool notls;
- notls = false;
- len = sizeof(addr);
-
memset(&tls12, 0, sizeof(tls12));
tls12.info.version = TLS_1_2_VERSION;
tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
- addr.sin_family = AF_INET;
- addr.sin_addr.s_addr = htonl(INADDR_ANY);
- addr.sin_port = 0;
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
- fd = socket(AF_INET, SOCK_STREAM, 0);
- sfd = socket(AF_INET, SOCK_STREAM, 0);
+ if (!notls) {
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
+ sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
+ sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+ }
+
+ close(fd);
+ close(cfd);
+}
+
+TEST(no_pad) {
+ struct tls12_crypto_info_aes_gcm_256 tls12;
+ int ret, fd, cfd, val;
+ socklen_t len;
+ bool notls;
+
+ memset(&tls12, 0, sizeof(tls12));
+ tls12.info.version = TLS_1_3_VERSION;
+ tls12.info.cipher_type = TLS_CIPHER_AES_GCM_256;
+
+ ulp_sock_pair(_metadata, &fd, &cfd, &notls);
+
+ if (notls)
+ exit(KSFT_SKIP);
+
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, sizeof(tls12));
+ EXPECT_EQ(ret, 0);
+
+ val = 1;
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, sizeof(val));
+ EXPECT_EQ(ret, 0);
+
+ len = sizeof(val);
+ val = 2;
+ ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, &len);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(val, 1);
+ EXPECT_EQ(len, 4);
+
+ val = 0;
+ ret = setsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, sizeof(val));
+ EXPECT_EQ(ret, 0);
+
+ len = sizeof(val);
+ val = 2;
+ ret = getsockopt(cfd, SOL_TLS, TLS_RX_EXPECT_NO_PAD,
+ (void *)&val, &len);
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(val, 0);
+ EXPECT_EQ(len, 4);
+
+ close(fd);
+ close(cfd);
+}
+
+TEST(tls_v6ops) {
+ struct tls_crypto_info_keys tls12;
+ struct sockaddr_in6 addr, addr2;
+ int sfd, ret, fd;
+ socklen_t len, len2;
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+
+ addr.sin6_family = AF_INET6;
+ addr.sin6_addr = in6addr_any;
+ addr.sin6_port = 0;
+
+ fd = socket(AF_INET6, SOCK_STREAM, 0);
+ sfd = socket(AF_INET6, SOCK_STREAM, 0);
ret = bind(sfd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
ret = listen(sfd, 10);
ASSERT_EQ(ret, 0);
+ len = sizeof(addr);
ret = getsockname(sfd, &addr, &len);
ASSERT_EQ(ret, 0);
ret = connect(fd, &addr, sizeof(addr));
ASSERT_EQ(ret, 0);
+ len = sizeof(addr);
+ ret = getsockname(fd, &addr, &len);
+ ASSERT_EQ(ret, 0);
+
ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
- if (ret != 0) {
- notls = true;
- printf("Failure setting TCP_ULP, testing without tls\n");
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
}
+ ASSERT_EQ(ret, 0);
- if (!notls) {
- ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
- sizeof(tls12));
- EXPECT_EQ(ret, 0);
- }
+ ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
- cfd = accept(sfd, &addr, &len);
- ASSERT_GE(cfd, 0);
+ ret = setsockopt(fd, SOL_TLS, TLS_RX, &tls12, tls12.len);
+ ASSERT_EQ(ret, 0);
- if (!notls) {
- ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
- sizeof("tls"));
- EXPECT_EQ(ret, 0);
+ len2 = sizeof(addr2);
+ ret = getsockname(fd, &addr2, &len2);
+ ASSERT_EQ(ret, 0);
- ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
- sizeof(tls12));
- EXPECT_EQ(ret, 0);
- }
+ EXPECT_EQ(len2, len);
+ EXPECT_EQ(memcmp(&addr, &addr2, len), 0);
+ close(fd);
close(sfd);
+}
+
+TEST(prequeue) {
+ struct tls_crypto_info_keys tls12;
+ char buf[20000], buf2[20000];
+ struct sockaddr_in addr;
+ int sfd, cfd, ret, fd;
+ socklen_t len;
+
+ len = sizeof(addr);
+ memrnd(buf, sizeof(buf));
+
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+
+ addr.sin_family = AF_INET;
+ addr.sin_addr.s_addr = htonl(INADDR_ANY);
+ addr.sin_port = 0;
+
+ fd = socket(AF_INET, SOCK_STREAM, 0);
+ sfd = socket(AF_INET, SOCK_STREAM, 0);
+
+ ASSERT_EQ(bind(sfd, &addr, sizeof(addr)), 0);
+ ASSERT_EQ(listen(sfd, 10), 0);
+ ASSERT_EQ(getsockname(sfd, &addr, &len), 0);
+ ASSERT_EQ(connect(fd, &addr, sizeof(addr)), 0);
+ ASSERT_GE(cfd = accept(sfd, &addr, &len), 0);
+ close(sfd);
+
+ ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
+ if (ret) {
+ ASSERT_EQ(errno, ENOENT);
+ SKIP(return, "no TLS support");
+ }
+
+ ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+ EXPECT_EQ(send(fd, buf, sizeof(buf), MSG_DONTWAIT), sizeof(buf));
+
+ ASSERT_EQ(setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls")), 0);
+ ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+ EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_WAITALL), sizeof(buf2));
+
+ EXPECT_EQ(memcmp(buf, buf2, sizeof(buf)), 0);
+
close(fd);
close(cfd);
}
+static void __attribute__((constructor)) fips_check(void) {
+ int res;
+ FILE *f;
+
+ f = fopen("/proc/sys/crypto/fips_enabled", "r");
+ if (f) {
+ res = fscanf(f, "%d", &fips_enabled);
+ if (res != 1)
+ ksft_print_msg("ERROR: Couldn't read /proc/sys/crypto/fips_enabled\n");
+ fclose(f);
+ }
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/toeplitz.c b/tools/testing/selftests/net/toeplitz.c
new file mode 100644
index 000000000000..9ba03164d73a
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Toeplitz test
+ *
+ * 1. Read packets and their rx_hash using PF_PACKET/TPACKET_V3
+ * 2. Compute the rx_hash in software based on the packet contents
+ * 3. Compare the two
+ *
+ * Optionally, either '-C $rx_irq_cpu_list' or '-r $rps_bitmap' may be given.
+ *
+ * If '-C $rx_irq_cpu_list' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the rxqueue that RSS would select based on this rx_hash
+ * 6. Using the $rx_irq_cpu_list map, identify the arriving cpu based on rxq irq
+ * 7. Compare the cpus from 4 and 6
+ *
+ * Else if '-r $rps_bitmap' is given, also
+ *
+ * 4. Identify the cpu on which the packet arrived with PACKET_FANOUT_CPU
+ * 5. Compute the cpu that RPS should select based on rx_hash and $rps_bitmap
+ * 6. Compare the cpus from 4 and 5
+ */
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <getopt.h>
+#include <linux/filter.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <net/if.h>
+#include <netdb.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/sysinfo.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../kselftest.h"
+
+#define TOEPLITZ_KEY_MIN_LEN 40
+#define TOEPLITZ_KEY_MAX_LEN 60
+
+#define TOEPLITZ_STR_LEN(K) (((K) * 3) - 1) /* hex encoded: AA:BB:CC:...:ZZ */
+#define TOEPLITZ_STR_MIN_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MIN_LEN)
+#define TOEPLITZ_STR_MAX_LEN TOEPLITZ_STR_LEN(TOEPLITZ_KEY_MAX_LEN)
+
+#define FOUR_TUPLE_MAX_LEN ((sizeof(struct in6_addr) * 2) + (sizeof(uint16_t) * 2))
+
+#define RSS_MAX_CPUS (1 << 16) /* real constraint is PACKET_FANOUT_MAX */
+
+#define RPS_MAX_CPUS 16UL /* must be a power of 2 */
+
+/* configuration options (cmdline arguments) */
+static uint16_t cfg_dport = 8000;
+static int cfg_family = AF_INET6;
+static char *cfg_ifname = "eth0";
+static int cfg_num_queues;
+static int cfg_num_rps_cpus;
+static bool cfg_sink;
+static int cfg_type = SOCK_STREAM;
+static int cfg_timeout_msec = 1000;
+static bool cfg_verbose;
+
+/* global vars */
+static int num_cpus;
+static int ring_block_nr;
+static int ring_block_sz;
+
+/* stats */
+static int frames_received;
+static int frames_nohash;
+static int frames_error;
+
+#define log_verbose(args...) do { if (cfg_verbose) fprintf(stderr, args); } while (0)
+
+/* tpacket ring */
+struct ring_state {
+ int fd;
+ char *mmap;
+ int idx;
+ int cpu;
+};
+
+static unsigned int rx_irq_cpus[RSS_MAX_CPUS]; /* map from rxq to cpu */
+static int rps_silo_to_cpu[RPS_MAX_CPUS];
+static unsigned char toeplitz_key[TOEPLITZ_KEY_MAX_LEN];
+static struct ring_state rings[RSS_MAX_CPUS];
+
+static inline uint32_t toeplitz(const unsigned char *four_tuple,
+ const unsigned char *key)
+{
+ int i, bit, ret = 0;
+ uint32_t key32;
+
+ key32 = ntohl(*((uint32_t *)key));
+ key += 4;
+
+ for (i = 0; i < FOUR_TUPLE_MAX_LEN; i++) {
+ for (bit = 7; bit >= 0; bit--) {
+ if (four_tuple[i] & (1 << bit))
+ ret ^= key32;
+
+ key32 <<= 1;
+ key32 |= !!(key[0] & (1 << bit));
+ }
+ key++;
+ }
+
+ return ret;
+}
+
+/* Compare computed cpu with arrival cpu from packet_fanout_cpu */
+static void verify_rss(uint32_t rx_hash, int cpu)
+{
+ int queue = rx_hash % cfg_num_queues;
+
+ log_verbose(" rxq %d (cpu %d)", queue, rx_irq_cpus[queue]);
+ if (rx_irq_cpus[queue] != cpu) {
+ log_verbose(". error: rss cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void verify_rps(uint64_t rx_hash, int cpu)
+{
+ int silo = (rx_hash * cfg_num_rps_cpus) >> 32;
+
+ log_verbose(" silo %d (cpu %d)", silo, rps_silo_to_cpu[silo]);
+ if (rps_silo_to_cpu[silo] != cpu) {
+ log_verbose(". error: rps cpu mismatch (%d)", cpu);
+ frames_error++;
+ }
+}
+
+static void log_rxhash(int cpu, uint32_t rx_hash,
+ const char *addrs, int addr_len)
+{
+ char saddr[INET6_ADDRSTRLEN], daddr[INET6_ADDRSTRLEN];
+ uint16_t *ports;
+
+ if (!inet_ntop(cfg_family, addrs, saddr, sizeof(saddr)) ||
+ !inet_ntop(cfg_family, addrs + addr_len, daddr, sizeof(daddr)))
+ error(1, 0, "address parse error");
+
+ ports = (void *)addrs + (addr_len * 2);
+ log_verbose("cpu %d: rx_hash 0x%08x [saddr %s daddr %s sport %02hu dport %02hu]",
+ cpu, rx_hash, saddr, daddr,
+ ntohs(ports[0]), ntohs(ports[1]));
+}
+
+/* Compare computed rxhash with rxhash received from tpacket_v3 */
+static void verify_rxhash(const char *pkt, uint32_t rx_hash, int cpu)
+{
+ unsigned char four_tuple[FOUR_TUPLE_MAX_LEN] = {0};
+ uint32_t rx_hash_sw;
+ const char *addrs;
+ int addr_len;
+
+ if (cfg_family == AF_INET) {
+ addr_len = sizeof(struct in_addr);
+ addrs = pkt + offsetof(struct iphdr, saddr);
+ } else {
+ addr_len = sizeof(struct in6_addr);
+ addrs = pkt + offsetof(struct ip6_hdr, ip6_src);
+ }
+
+ memcpy(four_tuple, addrs, (addr_len * 2) + (sizeof(uint16_t) * 2));
+ rx_hash_sw = toeplitz(four_tuple, toeplitz_key);
+
+ if (cfg_verbose)
+ log_rxhash(cpu, rx_hash, addrs, addr_len);
+
+ if (rx_hash != rx_hash_sw) {
+ log_verbose(" != expected 0x%x\n", rx_hash_sw);
+ frames_error++;
+ return;
+ }
+
+ log_verbose(" OK");
+ if (cfg_num_queues)
+ verify_rss(rx_hash, cpu);
+ else if (cfg_num_rps_cpus)
+ verify_rps(rx_hash, cpu);
+ log_verbose("\n");
+}
+
+static char *recv_frame(const struct ring_state *ring, char *frame)
+{
+ struct tpacket3_hdr *hdr = (void *)frame;
+
+ if (hdr->hv1.tp_rxhash)
+ verify_rxhash(frame + hdr->tp_net, hdr->hv1.tp_rxhash,
+ ring->cpu);
+ else
+ frames_nohash++;
+
+ return frame + hdr->tp_next_offset;
+}
+
+/* A single TPACKET_V3 block can hold multiple frames */
+static bool recv_block(struct ring_state *ring)
+{
+ struct tpacket_block_desc *block;
+ char *frame;
+ int i;
+
+ block = (void *)(ring->mmap + ring->idx * ring_block_sz);
+ if (!(block->hdr.bh1.block_status & TP_STATUS_USER))
+ return false;
+
+ frame = (char *)block;
+ frame += block->hdr.bh1.offset_to_first_pkt;
+
+ for (i = 0; i < block->hdr.bh1.num_pkts; i++) {
+ frame = recv_frame(ring, frame);
+ frames_received++;
+ }
+
+ block->hdr.bh1.block_status = TP_STATUS_KERNEL;
+ ring->idx = (ring->idx + 1) % ring_block_nr;
+
+ return true;
+}
+
+/* simple test: sleep once unconditionally and then process all rings */
+static void process_rings(void)
+{
+ int i;
+
+ usleep(1000 * cfg_timeout_msec);
+
+ for (i = 0; i < num_cpus; i++)
+ do {} while (recv_block(&rings[i]));
+
+ fprintf(stderr, "count: pass=%u nohash=%u fail=%u\n",
+ frames_received - frames_nohash - frames_error,
+ frames_nohash, frames_error);
+}
+
+static char *setup_ring(int fd)
+{
+ struct tpacket_req3 req3 = {0};
+ void *ring;
+
+ req3.tp_retire_blk_tov = cfg_timeout_msec / 8;
+ req3.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+ req3.tp_frame_size = 2048;
+ req3.tp_frame_nr = 1 << 10;
+ req3.tp_block_nr = 16;
+
+ req3.tp_block_size = req3.tp_frame_size * req3.tp_frame_nr;
+ req3.tp_block_size /= req3.tp_block_nr;
+
+ if (setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &req3, sizeof(req3)))
+ error(1, errno, "setsockopt PACKET_RX_RING");
+
+ ring_block_sz = req3.tp_block_size;
+ ring_block_nr = req3.tp_block_nr;
+
+ ring = mmap(0, req3.tp_block_size * req3.tp_block_nr,
+ PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_LOCKED | MAP_POPULATE, fd, 0);
+ if (ring == MAP_FAILED)
+ error(1, 0, "mmap failed");
+
+ return ring;
+}
+
+static void __set_filter(int fd, int off_proto, uint8_t proto, int off_dport)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, SKF_AD_OFF + SKF_AD_PKTTYPE),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, PACKET_HOST, 0, 4),
+ BPF_STMT(BPF_LD + BPF_B + BPF_ABS, off_proto),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, proto, 0, 2),
+ BPF_STMT(BPF_LD + BPF_H + BPF_ABS, off_dport),
+ BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, cfg_dport, 1, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ BPF_STMT(BPF_RET + BPF_K, 0xFFFF),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+/* filter on transport protocol and destination port */
+static void set_filter(int fd)
+{
+ const int off_dport = offsetof(struct tcphdr, dest); /* same for udp */
+ uint8_t proto;
+
+ proto = cfg_type == SOCK_STREAM ? IPPROTO_TCP : IPPROTO_UDP;
+ if (cfg_family == AF_INET)
+ __set_filter(fd, offsetof(struct iphdr, protocol), proto,
+ sizeof(struct iphdr) + off_dport);
+ else
+ __set_filter(fd, offsetof(struct ip6_hdr, ip6_nxt), proto,
+ sizeof(struct ip6_hdr) + off_dport);
+}
+
+/* drop everything: used temporarily during setup */
+static void set_filter_null(int fd)
+{
+ struct sock_filter filter[] = {
+ BPF_STMT(BPF_RET + BPF_K, 0),
+ };
+ struct sock_fprog prog = {};
+
+ prog.filter = filter;
+ prog.len = ARRAY_SIZE(filter);
+ if (setsockopt(fd, SOL_SOCKET, SO_ATTACH_FILTER, &prog, sizeof(prog)))
+ error(1, errno, "setsockopt filter");
+}
+
+static int create_ring(char **ring)
+{
+ struct fanout_args args = {
+ .id = 1,
+ .type_flags = PACKET_FANOUT_CPU,
+ .max_num_members = RSS_MAX_CPUS
+ };
+ struct sockaddr_ll ll = { 0 };
+ int fd, val;
+
+ fd = socket(PF_PACKET, SOCK_DGRAM, 0);
+ if (fd == -1)
+ error(1, errno, "socket creation failed");
+
+ val = TPACKET_V3;
+ if (setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val)))
+ error(1, errno, "setsockopt PACKET_VERSION");
+ *ring = setup_ring(fd);
+
+ /* block packets until all rings are added to the fanout group:
+ * else packets can arrive during setup and get misclassified
+ */
+ set_filter_null(fd);
+
+ ll.sll_family = AF_PACKET;
+ ll.sll_ifindex = if_nametoindex(cfg_ifname);
+ ll.sll_protocol = cfg_family == AF_INET ? htons(ETH_P_IP) :
+ htons(ETH_P_IPV6);
+ if (bind(fd, (void *)&ll, sizeof(ll)))
+ error(1, errno, "bind");
+
+ /* must come after bind: verifies all programs in group match */
+ if (setsockopt(fd, SOL_PACKET, PACKET_FANOUT, &args, sizeof(args))) {
+ /* on failure, retry using old API if that is sufficient:
+ * it has a hard limit of 256 sockets, so only try if
+ * (a) only testing rxhash, not RSS or (b) <= 256 cpus.
+ * in this API, the third argument is left implicit.
+ */
+ if (cfg_num_queues || num_cpus > 256 ||
+ setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+ &args, sizeof(uint32_t)))
+ error(1, errno, "setsockopt PACKET_FANOUT cpu");
+ }
+
+ return fd;
+}
+
+/* setup inet(6) socket to blackhole the test traffic, if arg '-s' */
+static int setup_sink(void)
+{
+ int fd, val;
+
+ fd = socket(cfg_family, cfg_type, 0);
+ if (fd == -1)
+ error(1, errno, "socket %d.%d", cfg_family, cfg_type);
+
+ val = 1 << 20;
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUFFORCE, &val, sizeof(val)))
+ error(1, errno, "setsockopt rcvbuf");
+
+ return fd;
+}
+
+static void setup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ rings[i].cpu = i;
+ rings[i].fd = create_ring(&rings[i].mmap);
+ }
+
+ /* accept packets once all rings in the fanout group are up */
+ for (i = 0; i < num_cpus; i++)
+ set_filter(rings[i].fd);
+}
+
+static void cleanup_rings(void)
+{
+ int i;
+
+ for (i = 0; i < num_cpus; i++) {
+ if (munmap(rings[i].mmap, ring_block_nr * ring_block_sz))
+ error(1, errno, "munmap");
+ if (close(rings[i].fd))
+ error(1, errno, "close");
+ }
+}
+
+static void parse_cpulist(const char *arg)
+{
+ do {
+ rx_irq_cpus[cfg_num_queues++] = strtol(arg, NULL, 10);
+
+ arg = strchr(arg, ',');
+ if (!arg)
+ break;
+ arg++; // skip ','
+ } while (1);
+}
+
+static void show_cpulist(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_queues; i++)
+ fprintf(stderr, "rxq %d: cpu %d\n", i, rx_irq_cpus[i]);
+}
+
+static void show_silos(void)
+{
+ int i;
+
+ for (i = 0; i < cfg_num_rps_cpus; i++)
+ fprintf(stderr, "silo %d: cpu %d\n", i, rps_silo_to_cpu[i]);
+}
+
+static void parse_toeplitz_key(const char *str, int slen, unsigned char *key)
+{
+ int i, ret, off;
+
+ if (slen < TOEPLITZ_STR_MIN_LEN ||
+ slen > TOEPLITZ_STR_MAX_LEN + 1)
+ error(1, 0, "invalid toeplitz key");
+
+ for (i = 0, off = 0; off < slen; i++, off += 3) {
+ ret = sscanf(str + off, "%hhx", &key[i]);
+ if (ret != 1)
+ error(1, 0, "key parse error at %d off %d len %d",
+ i, off, slen);
+ }
+}
+
+static void parse_rps_bitmap(const char *arg)
+{
+ unsigned long bitmap;
+ int i;
+
+ bitmap = strtoul(arg, NULL, 0);
+
+ if (bitmap & ~(RPS_MAX_CPUS - 1))
+ error(1, 0, "rps bitmap 0x%lx out of bounds 0..%lu",
+ bitmap, RPS_MAX_CPUS - 1);
+
+ for (i = 0; i < RPS_MAX_CPUS; i++)
+ if (bitmap & 1UL << i)
+ rps_silo_to_cpu[cfg_num_rps_cpus++] = i;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ static struct option long_options[] = {
+ {"dport", required_argument, 0, 'd'},
+ {"cpus", required_argument, 0, 'C'},
+ {"key", required_argument, 0, 'k'},
+ {"iface", required_argument, 0, 'i'},
+ {"ipv4", no_argument, 0, '4'},
+ {"ipv6", no_argument, 0, '6'},
+ {"sink", no_argument, 0, 's'},
+ {"tcp", no_argument, 0, 't'},
+ {"timeout", required_argument, 0, 'T'},
+ {"udp", no_argument, 0, 'u'},
+ {"verbose", no_argument, 0, 'v'},
+ {"rps", required_argument, 0, 'r'},
+ {0, 0, 0, 0}
+ };
+ bool have_toeplitz = false;
+ int index, c;
+
+ while ((c = getopt_long(argc, argv, "46C:d:i:k:r:stT:uv", long_options, &index)) != -1) {
+ switch (c) {
+ case '4':
+ cfg_family = AF_INET;
+ break;
+ case '6':
+ cfg_family = AF_INET6;
+ break;
+ case 'C':
+ parse_cpulist(optarg);
+ break;
+ case 'd':
+ cfg_dport = strtol(optarg, NULL, 0);
+ break;
+ case 'i':
+ cfg_ifname = optarg;
+ break;
+ case 'k':
+ parse_toeplitz_key(optarg, strlen(optarg),
+ toeplitz_key);
+ have_toeplitz = true;
+ break;
+ case 'r':
+ parse_rps_bitmap(optarg);
+ break;
+ case 's':
+ cfg_sink = true;
+ break;
+ case 't':
+ cfg_type = SOCK_STREAM;
+ break;
+ case 'T':
+ cfg_timeout_msec = strtol(optarg, NULL, 0);
+ break;
+ case 'u':
+ cfg_type = SOCK_DGRAM;
+ break;
+ case 'v':
+ cfg_verbose = true;
+ break;
+
+ default:
+ error(1, 0, "unknown option %c", optopt);
+ break;
+ }
+ }
+
+ if (!have_toeplitz)
+ error(1, 0, "Must supply rss key ('-k')");
+
+ num_cpus = get_nprocs();
+ if (num_cpus > RSS_MAX_CPUS)
+ error(1, 0, "increase RSS_MAX_CPUS");
+
+ if (cfg_num_queues && cfg_num_rps_cpus)
+ error(1, 0,
+ "Can't supply both RSS cpus ('-C') and RPS map ('-r')");
+ if (cfg_verbose) {
+ show_cpulist();
+ show_silos();
+ }
+}
+
+int main(int argc, char **argv)
+{
+ const int min_tests = 10;
+ int fd_sink = -1;
+
+ parse_opts(argc, argv);
+
+ if (cfg_sink)
+ fd_sink = setup_sink();
+
+ setup_rings();
+ process_rings();
+ cleanup_rings();
+
+ if (cfg_sink && close(fd_sink))
+ error(1, errno, "close sink");
+
+ if (frames_received - frames_nohash < min_tests)
+ error(1, 0, "too few frames for verification");
+
+ return frames_error;
+}
diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh
new file mode 100755
index 000000000000..8ff172f7bb1b
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz.sh
@@ -0,0 +1,199 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# extended toeplitz test: test rxhash plus, optionally, either (1) rss mapping
+# from rxhash to rx queue ('-rss') or (2) rps mapping from rxhash to cpu
+# ('-rps <rps_map>')
+#
+# irq-pattern-prefix can be derived from /sys/kernel/irq/*/action,
+# which is a driver-specific encoding.
+#
+# invoke as ./toeplitz.sh (-i <iface>) -u|-t -4|-6 \
+# [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]
+
+source setup_loopback.sh
+readonly SERVER_IP4="192.168.1.200/24"
+readonly SERVER_IP6="fda8::1/64"
+readonly SERVER_MAC="aa:00:00:00:00:02"
+
+readonly CLIENT_IP4="192.168.1.100/24"
+readonly CLIENT_IP6="fda8::2/64"
+readonly CLIENT_MAC="aa:00:00:00:00:01"
+
+PORT=8000
+KEY="$(</proc/sys/net/core/netdev_rss_key)"
+TEST_RSS=false
+RPS_MAP=""
+PROTO_FLAG=""
+IP_FLAG=""
+DEV="eth0"
+
+# Return the number of rxqs among which RSS is configured to spread packets.
+# This is determined by reading the RSS indirection table using ethtool.
+get_rss_cfg_num_rxqs() {
+ echo $(ethtool -x "${DEV}" |
+ grep -E [[:space:]]+[0-9]+:[[:space:]]+ |
+ cut -d: -f2- |
+ awk '{$1=$1};1' |
+ tr ' ' '\n' |
+ sort -u |
+ wc -l)
+}
+
+# Return a list of the receive irq handler cpus.
+# The list is ordered by the irqs, so first rxq-0 cpu, then rxq-1 cpu, etc.
+# Reads /sys/kernel/irq/ in order, so algorithm depends on
+# irq_{rxq-0} < irq_{rxq-1}, etc.
+get_rx_irq_cpus() {
+ CPUS=""
+ # sort so that irq 2 is read before irq 10
+ SORTED_IRQS=$(for i in /sys/kernel/irq/*; do echo $i; done | sort -V)
+ # Consider only as many queues as RSS actually uses. We assume that
+ # if RSS_CFG_NUM_RXQS=N, then RSS uses rxqs 0-(N-1).
+ RSS_CFG_NUM_RXQS=$(get_rss_cfg_num_rxqs)
+ RXQ_COUNT=0
+
+ for i in ${SORTED_IRQS}
+ do
+ [[ "${RXQ_COUNT}" -lt "${RSS_CFG_NUM_RXQS}" ]] || break
+ # lookup relevant IRQs by action name
+ [[ -e "$i/actions" ]] || continue
+ cat "$i/actions" | grep -q "${IRQ_PATTERN}" || continue
+ irqname=$(<"$i/actions")
+
+ # does the IRQ get called
+ irqcount=$(cat "$i/per_cpu_count" | tr -d '0,')
+ [[ -n "${irqcount}" ]] || continue
+
+ # lookup CPU
+ irq=$(basename "$i")
+ cpu=$(cat "/proc/irq/$irq/smp_affinity_list")
+
+ if [[ -z "${CPUS}" ]]; then
+ CPUS="${cpu}"
+ else
+ CPUS="${CPUS},${cpu}"
+ fi
+ RXQ_COUNT=$((RXQ_COUNT+1))
+ done
+
+ echo "${CPUS}"
+}
+
+get_disable_rfs_cmd() {
+ echo "echo 0 > /proc/sys/net/core/rps_sock_flow_entries;"
+}
+
+get_set_rps_bitmaps_cmd() {
+ CMD=""
+ for i in /sys/class/net/${DEV}/queues/rx-*/rps_cpus
+ do
+ CMD="${CMD} echo $1 > ${i};"
+ done
+
+ echo "${CMD}"
+}
+
+get_disable_rps_cmd() {
+ echo "$(get_set_rps_bitmaps_cmd 0)"
+}
+
+die() {
+ echo "$1"
+ exit 1
+}
+
+check_nic_rxhash_enabled() {
+ local -r pattern="receive-hashing:\ on"
+
+ ethtool -k "${DEV}" | grep -q "${pattern}" || die "rxhash must be enabled"
+}
+
+parse_opts() {
+ local prog=$0
+ shift 1
+
+ while [[ "$1" =~ "-" ]]; do
+ if [[ "$1" = "-irq_prefix" ]]; then
+ shift
+ IRQ_PATTERN="^$1-[0-9]*$"
+ elif [[ "$1" = "-u" || "$1" = "-t" ]]; then
+ PROTO_FLAG="$1"
+ elif [[ "$1" = "-4" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP4}"
+ CLIENT_IP="${CLIENT_IP4}"
+ elif [[ "$1" = "-6" ]]; then
+ IP_FLAG="$1"
+ SERVER_IP="${SERVER_IP6}"
+ CLIENT_IP="${CLIENT_IP6}"
+ elif [[ "$1" = "-rss" ]]; then
+ TEST_RSS=true
+ elif [[ "$1" = "-rps" ]]; then
+ shift
+ RPS_MAP="$1"
+ elif [[ "$1" = "-i" ]]; then
+ shift
+ DEV="$1"
+ else
+ die "Usage: ${prog} (-i <iface>) -u|-t -4|-6 \
+ [(-rss -irq_prefix <irq-pattern-prefix>)|(-rps <rps_map>)]"
+ fi
+ shift
+ done
+}
+
+setup() {
+ setup_loopback_environment "${DEV}"
+
+ # Set up server_ns namespace and client_ns namespace
+ setup_macvlan_ns "${DEV}" $server_ns server \
+ "${SERVER_MAC}" "${SERVER_IP}"
+ setup_macvlan_ns "${DEV}" $client_ns client \
+ "${CLIENT_MAC}" "${CLIENT_IP}"
+}
+
+cleanup() {
+ cleanup_macvlan_ns $server_ns server $client_ns client
+ cleanup_loopback "${DEV}"
+}
+
+parse_opts $0 $@
+
+setup
+trap cleanup EXIT
+
+check_nic_rxhash_enabled
+
+# Actual test starts here
+if [[ "${TEST_RSS}" = true ]]; then
+ # RPS/RFS must be disabled because they move packets between cpus,
+ # which breaks the PACKET_FANOUT_CPU identification of RSS decisions.
+ eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \
+ ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -C "$(get_rx_irq_cpus)" -s -v &
+elif [[ ! -z "${RPS_MAP}" ]]; then
+ eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \
+ ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \
+ -r "0x${RPS_MAP}" -s -v &
+else
+ ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \
+ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v &
+fi
+
+server_pid=$!
+
+ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \
+ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" &
+
+client_pid=$!
+
+wait "${server_pid}"
+exit_code=$?
+kill -9 "${client_pid}"
+if [[ "${exit_code}" -eq 0 ]]; then
+ echo "Test Succeeded!"
+fi
+exit "${exit_code}"
diff --git a/tools/testing/selftests/net/toeplitz_client.sh b/tools/testing/selftests/net/toeplitz_client.sh
new file mode 100755
index 000000000000..2fef34f4aba1
--- /dev/null
+++ b/tools/testing/selftests/net/toeplitz_client.sh
@@ -0,0 +1,28 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# A simple program for generating traffic for the toeplitz test.
+#
+# This program sends packets periodically for, conservatively, 20 seconds. The
+# intent is for the calling program to kill this program once it is no longer
+# needed, rather than waiting for the 20 second expiration.
+
+send_traffic() {
+ expiration=$((SECONDS+20))
+ while [[ "${SECONDS}" -lt "${expiration}" ]]
+ do
+ if [[ "${PROTO}" == "-u" ]]; then
+ echo "msg $i" | nc "${IPVER}" -u -w 0 "${ADDR}" "${PORT}"
+ else
+ echo "msg $i" | nc "${IPVER}" -w 0 "${ADDR}" "${PORT}"
+ fi
+ sleep 0.001
+ done
+}
+
+PROTO=$1
+IPVER=$2
+ADDR=$3
+PORT=$4
+
+send_traffic
diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh
index de9ca97abc30..282f14760940 100755
--- a/tools/testing/selftests/net/traceroute.sh
+++ b/tools/testing/selftests/net/traceroute.sh
@@ -4,6 +4,7 @@
# Run traceroute/traceroute6 tests
#
+source lib.sh
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -69,9 +70,6 @@ create_ns()
[ -z "${addr}" ] && addr="-"
[ -z "${addr6}" ] && addr6="-"
- ip netns add ${ns}
-
- ip netns exec ${ns} ip link set lo up
if [ "${addr}" != "-" ]; then
ip netns exec ${ns} ip addr add dev lo ${addr}
fi
@@ -160,12 +158,7 @@ connect_ns()
cleanup_traceroute6()
{
- local ns
-
- for ns in host-1 host-2 router-1 router-2
- do
- ip netns del ${ns} 2>/dev/null
- done
+ cleanup_ns $h1 $h2 $r1 $r2
}
setup_traceroute6()
@@ -176,33 +169,34 @@ setup_traceroute6()
cleanup_traceroute6
set -e
- create_ns host-1
- create_ns host-2
- create_ns router-1
- create_ns router-2
+ setup_ns h1 h2 r1 r2
+ create_ns $h1
+ create_ns $h2
+ create_ns $r1
+ create_ns $r2
# Setup N3
- connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64
- ip netns exec host-2 ip route add default via 2000:103::2
+ connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64
+ ip netns exec $h2 ip route add default via 2000:103::2
# Setup N2
- connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64
- ip netns exec router-1 ip route add default via 2000:102::2
+ connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64
+ ip netns exec $r1 ip route add default via 2000:102::2
# Setup N1. host-1 and router-2 connect to a bridge in router-1.
- ip netns exec router-1 ip link add name ${brdev} type bridge
- ip netns exec router-1 ip link set ${brdev} up
- ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev}
+ ip netns exec $r1 ip link add name ${brdev} type bridge
+ ip netns exec $r1 ip link set ${brdev} up
+ ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev}
- connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - -
- ip netns exec router-1 ip link set dev eth0 master ${brdev}
- ip netns exec host-1 ip route add default via 2000:101::1
+ connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - -
+ ip netns exec $r1 ip link set dev eth0 master ${brdev}
+ ip netns exec $h1 ip route add default via 2000:101::1
- connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - -
- ip netns exec router-1 ip link set dev eth1 master ${brdev}
+ connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - -
+ ip netns exec $r1 ip link set dev eth1 master ${brdev}
# Prime the network
- ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1
+ ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1
set +e
}
@@ -217,7 +211,7 @@ run_traceroute6()
setup_traceroute6
# traceroute6 host-2 from host-1 (expects 2000:102::2)
- run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
+ run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2"
log_test $? 0 "IPV6 traceroute"
cleanup_traceroute6
@@ -240,12 +234,7 @@ run_traceroute6()
cleanup_traceroute()
{
- local ns
-
- for ns in host-1 host-2 router
- do
- ip netns del ${ns} 2>/dev/null
- done
+ cleanup_ns $h1 $h2 $router
}
setup_traceroute()
@@ -254,24 +243,25 @@ setup_traceroute()
cleanup_traceroute
set -e
- create_ns host-1
- create_ns host-2
- create_ns router
+ setup_ns h1 h2 router
+ create_ns $h1
+ create_ns $h2
+ create_ns $router
- connect_ns host-1 eth0 1.0.1.3/24 - \
- router eth1 1.0.3.1/24 -
- ip netns exec host-1 ip route add default via 1.0.1.1
+ connect_ns $h1 eth0 1.0.1.3/24 - \
+ $router eth1 1.0.3.1/24 -
+ ip netns exec $h1 ip route add default via 1.0.1.1
- ip netns exec router ip addr add 1.0.1.1/24 dev eth1
- ip netns exec router sysctl -qw \
+ ip netns exec $router ip addr add 1.0.1.1/24 dev eth1
+ ip netns exec $router sysctl -qw \
net.ipv4.icmp_errors_use_inbound_ifaddr=1
- connect_ns host-2 eth0 1.0.2.4/24 - \
- router eth2 1.0.2.1/24 -
- ip netns exec host-2 ip route add default via 1.0.2.1
+ connect_ns $h2 eth0 1.0.2.4/24 - \
+ $router eth2 1.0.2.1/24 -
+ ip netns exec $h2 ip route add default via 1.0.2.1
# Prime the network
- ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1
+ ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1
set +e
}
@@ -286,7 +276,7 @@ run_traceroute()
setup_traceroute
# traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while.
- run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
+ run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1"
log_test $? 0 "IPV4 traceroute"
cleanup_traceroute
diff --git a/tools/testing/selftests/net/tun.c b/tools/testing/selftests/net/tun.c
new file mode 100644
index 000000000000..fa83918b62d1
--- /dev/null
+++ b/tools/testing/selftests/net/tun.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/if.h>
+#include <linux/if_tun.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+
+#include "../kselftest_harness.h"
+
+static int tun_attach(int fd, char *dev)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_ATTACH_QUEUE;
+
+ return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_detach(int fd, char *dev)
+{
+ struct ifreq ifr;
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+ return ioctl(fd, TUNSETQUEUE, (void *) &ifr);
+}
+
+static int tun_alloc(char *dev)
+{
+ struct ifreq ifr;
+ int fd, err;
+
+ fd = open("/dev/net/tun", O_RDWR);
+ if (fd < 0) {
+ fprintf(stderr, "can't open tun: %s\n", strerror(errno));
+ return fd;
+ }
+
+ memset(&ifr, 0, sizeof(ifr));
+ strcpy(ifr.ifr_name, dev);
+ ifr.ifr_flags = IFF_TAP | IFF_NAPI | IFF_MULTI_QUEUE;
+
+ err = ioctl(fd, TUNSETIFF, (void *) &ifr);
+ if (err < 0) {
+ fprintf(stderr, "can't TUNSETIFF: %s\n", strerror(errno));
+ close(fd);
+ return err;
+ }
+ strcpy(dev, ifr.ifr_name);
+ return fd;
+}
+
+static int tun_delete(char *dev)
+{
+ struct {
+ struct nlmsghdr nh;
+ struct ifinfomsg ifm;
+ unsigned char data[64];
+ } req;
+ struct rtattr *rta;
+ int ret, rtnl;
+
+ rtnl = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_ROUTE);
+ if (rtnl < 0) {
+ fprintf(stderr, "can't open rtnl: %s\n", strerror(errno));
+ return 1;
+ }
+
+ memset(&req, 0, sizeof(req));
+ req.nh.nlmsg_len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(req.ifm)));
+ req.nh.nlmsg_flags = NLM_F_REQUEST;
+ req.nh.nlmsg_type = RTM_DELLINK;
+
+ req.ifm.ifi_family = AF_UNSPEC;
+
+ rta = (struct rtattr *)(((char *)&req) + NLMSG_ALIGN(req.nh.nlmsg_len));
+ rta->rta_type = IFLA_IFNAME;
+ rta->rta_len = RTA_LENGTH(IFNAMSIZ);
+ req.nh.nlmsg_len += rta->rta_len;
+ memcpy(RTA_DATA(rta), dev, IFNAMSIZ);
+
+ ret = send(rtnl, &req, req.nh.nlmsg_len, 0);
+ if (ret < 0)
+ fprintf(stderr, "can't send: %s\n", strerror(errno));
+ ret = (unsigned int)ret != req.nh.nlmsg_len;
+
+ close(rtnl);
+ return ret;
+}
+
+FIXTURE(tun)
+{
+ char ifname[IFNAMSIZ];
+ int fd, fd2;
+};
+
+FIXTURE_SETUP(tun)
+{
+ memset(self->ifname, 0, sizeof(self->ifname));
+
+ self->fd = tun_alloc(self->ifname);
+ ASSERT_GE(self->fd, 0);
+
+ self->fd2 = tun_alloc(self->ifname);
+ ASSERT_GE(self->fd2, 0);
+}
+
+FIXTURE_TEARDOWN(tun)
+{
+ if (self->fd >= 0)
+ close(self->fd);
+ if (self->fd2 >= 0)
+ close(self->fd2);
+}
+
+TEST_F(tun, delete_detach_close) {
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), -1);
+ EXPECT_EQ(errno, 22);
+}
+
+TEST_F(tun, detach_delete_close) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, detach_close_delete) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ close(self->fd);
+ self->fd = -1;
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_delete_close) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_F(tun, reattach_close_delete) {
+ EXPECT_EQ(tun_detach(self->fd, self->ifname), 0);
+ EXPECT_EQ(tun_attach(self->fd, self->ifname), 0);
+ close(self->fd);
+ self->fd = -1;
+ EXPECT_EQ(tun_delete(self->ifname), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c
index 011b0da6b033..ec60a16c9307 100644
--- a/tools/testing/selftests/net/txtimestamp.c
+++ b/tools/testing/selftests/net/txtimestamp.c
@@ -26,6 +26,7 @@
#include <inttypes.h>
#include <linux/errqueue.h>
#include <linux/if_ether.h>
+#include <linux/if_packet.h>
#include <linux/ipv6.h>
#include <linux/net_tstamp.h>
#include <netdb.h>
@@ -34,7 +35,6 @@
#include <netinet/ip.h>
#include <netinet/udp.h>
#include <netinet/tcp.h>
-#include <netpacket/packet.h>
#include <poll.h>
#include <stdarg.h>
#include <stdbool.h>
@@ -64,6 +64,7 @@ static int cfg_payload_len = 10;
static int cfg_poll_timeout = 100;
static int cfg_delay_snd;
static int cfg_delay_ack;
+static int cfg_delay_tolerance_usec = 500;
static bool cfg_show_payload;
static bool cfg_do_pktinfo;
static bool cfg_busy_poll;
@@ -152,25 +153,27 @@ static void validate_key(int tskey, int tstype)
static void validate_timestamp(struct timespec *cur, int min_delay)
{
- int max_delay = min_delay + 500 /* processing time upper bound */;
int64_t cur64, start64;
+ int max_delay;
cur64 = timespec_to_us64(cur);
start64 = timespec_to_us64(&ts_usr);
+ max_delay = min_delay + cfg_delay_tolerance_usec;
if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) {
- fprintf(stderr, "ERROR: %lu us expected between %d and %d\n",
+ fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n",
cur64 - start64, min_delay, max_delay);
- test_failed = true;
+ if (!getenv("KSFT_MACHINE_SLOW"))
+ test_failed = true;
}
}
static void __print_ts_delta_formatted(int64_t ts_delta)
{
if (cfg_print_nsec)
- fprintf(stderr, "%lu ns", ts_delta);
+ fprintf(stderr, "%" PRId64 " ns", ts_delta);
else
- fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC);
+ fprintf(stderr, "%" PRId64 " us", ts_delta / NSEC_PER_USEC);
}
static void __print_timestamp(const char *name, struct timespec *cur,
@@ -493,12 +496,12 @@ static void do_test(int family, unsigned int report_opt)
total_len = cfg_payload_len;
if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) {
total_len += sizeof(struct udphdr);
- if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW)
+ if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) {
if (family == PF_INET)
total_len += sizeof(struct iphdr);
else
total_len += sizeof(struct ipv6hdr);
-
+ }
/* special case, only rawv6_sendmsg:
* pass proto in sin6_port if not connected
* also see ANK comment in net/ipv4/raw.c
@@ -683,6 +686,7 @@ static void __attribute__((noreturn)) usage(const char *filepath)
" -r: use raw\n"
" -R: use raw (IP_HDRINCL)\n"
" -S N: usec to sleep before reading error queue\n"
+ " -t N: tolerance (usec) for timestamp validation\n"
" -u: use udp\n"
" -v: validate SND delay (usec)\n"
" -V: validate ACK delay (usec)\n"
@@ -697,7 +701,7 @@ static void parse_opt(int argc, char **argv)
int c;
while ((c = getopt(argc, argv,
- "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) {
+ "46bc:CeEFhIl:LnNp:PrRS:t:uv:V:x")) != -1) {
switch (c) {
case '4':
do_ipv6 = 0;
@@ -760,6 +764,9 @@ static void parse_opt(int argc, char **argv)
case 'S':
cfg_sleep_usec = strtoul(optarg, NULL, 10);
break;
+ case 't':
+ cfg_delay_tolerance_usec = strtoul(optarg, NULL, 10);
+ break;
case 'u':
proto_count++;
cfg_proto = SOCK_DGRAM;
diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh
index eea6f5193693..25baca4b148e 100755
--- a/tools/testing/selftests/net/txtimestamp.sh
+++ b/tools/testing/selftests/net/txtimestamp.sh
@@ -8,13 +8,13 @@ set -e
setup() {
# set 1ms delay on lo egress
- tc qdisc add dev lo root netem delay 1ms
+ tc qdisc add dev lo root netem delay 10ms
# set 2ms delay on ifb0 egress
modprobe ifb
ip link add ifb_netem0 type ifb
ip link set dev ifb_netem0 up
- tc qdisc add dev ifb_netem0 root netem delay 2ms
+ tc qdisc add dev ifb_netem0 root netem delay 20ms
# redirect lo ingress through ifb0 egress
tc qdisc add dev lo handle ffff: ingress
@@ -24,9 +24,11 @@ setup() {
}
run_test_v4v6() {
- # SND will be delayed 1000us
- # ACK will be delayed 6000us: 1 + 2 ms round-trip
- local -r args="$@ -v 1000 -V 6000"
+ # SND will be delayed 10ms
+ # ACK will be delayed 60ms: 10 + 20 ms round-trip
+ # allow +/- tolerance of 8ms
+ # wait for ACK to be queued
+ local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000"
./txtimestamp ${args} -4 -L 127.0.0.1
./txtimestamp ${args} -6 -L ::1
@@ -75,7 +77,7 @@ main() {
fi
}
-if [[ "$(ip netns identify)" == "root" ]]; then
+if [[ -z "$(ip netns identify)" ]]; then
./in_netns.sh $0 $@
else
main $@
diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh
index ac2a30be9b32..8802604148dd 100755
--- a/tools/testing/selftests/net/udpgro.sh
+++ b/tools/testing/selftests/net/udpgro.sh
@@ -3,8 +3,20 @@
#
# Run a series of udpgro functional tests.
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+BPF_FILE="xdp_dummy.o"
+
+# set global exit status, but never reset nonzero one.
+check_err()
+{
+ if [ $ret -eq 0 ]; then
+ ret=$1
+ fi
+}
+
cleanup() {
local -r jobs="$(jobs -p)"
local -r ns="$(ip netns list|grep $PEER_NS)"
@@ -26,7 +38,7 @@ cfg_veth() {
ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
}
run_one() {
@@ -41,10 +53,11 @@ run_one() {
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.1
+ wait_local_port_listen ${PEER_NS} 8000 udp
./udpgso_bench_tx ${tx_args}
+ ret=$?
wait $(jobs -p)
+ return $ret
}
run_test() {
@@ -85,10 +98,12 @@ run_one_nat() {
echo "ok" || \
echo "failed"&
- sleep 0.1
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
+ ret=$?
kill -INT $pid
wait $(jobs -p)
+ return $ret
}
run_one_2sock() {
@@ -104,13 +119,13 @@ run_one_2sock() {
echo "ok" || \
echo "failed" &
- # Hack: let bg programs complete the startup
- sleep 0.1
+ wait_local_port_listen "${PEER_NS}" 12345 udp
./udpgso_bench_tx ${tx_args} -p 12345
- sleep 0.1
- # first UDP GSO socket should be closed at this point
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
+ ret=$?
wait $(jobs -p)
+ return $ret
}
run_nat_test() {
@@ -131,40 +146,58 @@ run_all() {
local -r core_args="-l 4"
local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+ ret=0
echo "ipv4"
run_test "no GRO" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400"
+ check_err $?
# explicitly check we are not receiving UDP_SEGMENT cmsg (-S -1)
# when GRO does not take place
run_test "no GRO chk cmsg" "${ipv4_args} -M 10 -s 1400" "-4 -n 10 -l 1400 -S -1"
+ check_err $?
# the GSO packets are aggregated because:
# * veth schedule napi after each xmit
# * segmentation happens in BH context, veth napi poll is delayed after
# the transmission of the last segment
run_test "GRO" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720"
+ check_err $?
run_test "GRO chk cmsg" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+ check_err $?
run_test "GRO with custom segment size" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720"
+ check_err $?
run_test "GRO with custom segment size cmsg" "${ipv4_args} -M 1 -s 14720 -S 500 " "-4 -n 1 -l 14720 -S 500"
+ check_err $?
run_nat_test "bad GRO lookup" "${ipv4_args} -M 1 -s 14720 -S 0" "-n 10 -l 1472"
+ check_err $?
run_2sock_test "multiple GRO socks" "${ipv4_args} -M 1 -s 14720 -S 0 " "-4 -n 1 -l 14720 -S 1472"
+ check_err $?
echo "ipv6"
run_test "no GRO" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400"
+ check_err $?
run_test "no GRO chk cmsg" "${ipv6_args} -M 10 -s 1400" "-n 10 -l 1400 -S -1"
+ check_err $?
run_test "GRO" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520"
+ check_err $?
run_test "GRO chk cmsg" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 1 -l 14520 -S 1452"
+ check_err $?
run_test "GRO with custom segment size" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520"
+ check_err $?
run_test "GRO with custom segment size cmsg" "${ipv6_args} -M 1 -s 14520 -S 500" "-n 1 -l 14520 -S 500"
+ check_err $?
run_nat_test "bad GRO lookup" "${ipv6_args} -M 1 -s 14520 -S 0" "-n 10 -l 1452"
+ check_err $?
run_2sock_test "multiple GRO socks" "${ipv6_args} -M 1 -s 14520 -S 0 " "-n 1 -l 14520 -S 1452"
+ check_err $?
+ return $ret
}
-if [ ! -f ../bpf/xdp_dummy.o ]; then
- echo "Missing xdp_dummy helper. Build bpf selftest first"
+if [ ! -f ${BPF_FILE} ]; then
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit -1
fi
@@ -180,3 +213,5 @@ elif [[ $1 == "__subprocess_2sock" ]]; then
shift
run_one_2sock $@
fi
+
+exit $?
diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh
index 820bc50f6b68..7080eae5312b 100755
--- a/tools/testing/selftests/net/udpgro_bench.sh
+++ b/tools/testing/selftests/net/udpgro_bench.sh
@@ -3,8 +3,12 @@
#
# Run a series of udpgro benchmarks
+source net_helper.sh
+
readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+BPF_FILE="xdp_dummy.o"
+
cleanup() {
local -r jobs="$(jobs -p)"
local -r ns="$(ip netns list|grep $PEER_NS)"
@@ -34,12 +38,11 @@ run_one() {
ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
ip -netns "${PEER_NS}" link set dev veth1 up
- ip -n "${PEER_NS}" link set veth1 xdp object ../bpf/xdp_dummy.o section xdp_dummy
+ ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r &
- # Hack: let bg programs complete the startup
- sleep 0.1
+ wait_local_port_listen "${PEER_NS}" 8000 udp
./udpgso_bench_tx ${tx_args}
}
@@ -80,8 +83,8 @@ run_all() {
run_udp "${ipv6_args}"
}
-if [ ! -f ../bpf/xdp_dummy.o ]; then
- echo "Missing xdp_dummy helper. Build bpf selftest first"
+if [ ! -f ${BPF_FILE} ]; then
+ echo "Missing ${BPF_FILE}. Run 'make' first"
exit -1
fi
diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh
new file mode 100755
index 000000000000..e1ff645bd3d1
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_frglist.sh
@@ -0,0 +1,104 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgro benchmarks
+
+source net_helper.sh
+
+readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)"
+
+BPF_FILE="xdp_dummy.o"
+
+cleanup() {
+ local -r jobs="$(jobs -p)"
+ local -r ns="$(ip netns list|grep $PEER_NS)"
+
+ [ -n "${jobs}" ] && kill -INT ${jobs} 2>/dev/null
+ [ -n "$ns" ] && ip netns del $ns 2>/dev/null
+}
+trap cleanup EXIT
+
+run_one() {
+ # use 'rx' as separator between sender args and receiver args
+ local -r all="$@"
+ local -r tx_args=${all%rx*}
+ local rx_args=${all#*rx}
+
+
+
+ ip netns add "${PEER_NS}"
+ ip -netns "${PEER_NS}" link set lo up
+ ip link add type veth
+ ip link set dev veth0 up
+ ip addr add dev veth0 192.168.1.2/24
+ ip addr add dev veth0 2001:db8::2/64 nodad
+
+ ip link set dev veth1 netns "${PEER_NS}"
+ ip -netns "${PEER_NS}" addr add dev veth1 192.168.1.1/24
+ ip -netns "${PEER_NS}" addr add dev veth1 2001:db8::1/64 nodad
+ ip -netns "${PEER_NS}" link set dev veth1 up
+ ip netns exec "${PEER_NS}" ethtool -K veth1 rx-gro-list on
+
+
+ ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp
+ tc -n "${PEER_NS}" qdisc add dev veth1 clsact
+ tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action
+ tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action
+ echo ${rx_args}
+ ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r &
+
+ wait_local_port_listen "${PEER_NS}" 8000 udp
+ ./udpgso_bench_tx ${tx_args}
+}
+
+run_in_netns() {
+ local -r args=$@
+ echo ${args}
+ ./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+ local -r args=$@
+
+ echo "udp gso - over veth touching data"
+ run_in_netns ${args} -u -S 0 rx -4 -v
+
+ echo "udp gso and gro - over veth touching data"
+ run_in_netns ${args} -S 0 rx -4 -G
+}
+
+run_tcp() {
+ local -r args=$@
+
+ echo "tcp - over veth touching data"
+ run_in_netns ${args} -t rx -4 -t
+}
+
+run_all() {
+ local -r core_args="-l 4"
+ local -r ipv4_args="${core_args} -4 -D 192.168.1.1"
+ local -r ipv6_args="${core_args} -6 -D 2001:db8::1"
+
+ echo "ipv6"
+ run_tcp "${ipv6_args}"
+ run_udp "${ipv6_args}"
+}
+
+if [ ! -f ${BPF_FILE} ]; then
+ echo "Missing ${BPF_FILE}. Run 'make' first"
+ exit -1
+fi
+
+if [ ! -f nat6to4.o ]; then
+ echo "Missing nat6to4 helper. Run 'make' first"
+ exit -1
+fi
+
+if [[ $# -eq 0 ]]; then
+ run_all
+elif [[ $1 == "__subprocess" ]]; then
+ shift
+ run_one $@
+else
+ run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh
new file mode 100755
index 000000000000..380cb15e942e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgro_fwd.sh
@@ -0,0 +1,271 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source net_helper.sh
+
+BPF_FILE="xdp_dummy.o"
+readonly BASE="ns-$(mktemp -u XXXXXX)"
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+# "overlay" network used for UDP over UDP tunnel traffic
+readonly OL_NET_V4=172.16.1.
+readonly OL_NET_V6=2001:db8:1::
+readonly NPROCS=`nproc`
+
+cleanup() {
+ local ns
+ local -r jobs="$(jobs -p)"
+ [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+ local net
+ local ns
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+
+ # disable route solicitations to decrease 'noise' traffic
+ ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0
+ ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0
+ done
+
+ ip link add name veth$SRC type veth peer name veth$DST
+
+ for ns in $SRC $DST; do
+ ip link set dev veth$ns netns $BASE$ns
+ ip -n $BASE$ns link set dev veth$ns up
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+ ip -n $NS_DST link set veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null
+}
+
+create_vxlan_endpoint() {
+ local -r netns=$1
+ local -r bm_dev=$2
+ local -r bm_rem_addr=$3
+ local -r vxlan_dev=$4
+ local -r vxlan_id=$5
+ local -r vxlan_port=4789
+
+ ip -n $netns link set dev $bm_dev up
+ ip -n $netns link add dev $vxlan_dev type vxlan id $vxlan_id \
+ dstport $vxlan_port remote $bm_rem_addr
+ ip -n $netns link set dev $vxlan_dev up
+}
+
+create_vxlan_pair() {
+ local ns
+
+ create_ns
+
+ for ns in $SRC $DST; do
+ # note that 3 - $SRC == $DST and 3 - $DST == $SRC
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V4$((3 - $ns)) vxlan$ns 4
+ ip -n $BASE$ns addr add dev vxlan$ns $OL_NET_V4$ns/24
+ done
+ for ns in $SRC $DST; do
+ create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6
+ ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad
+ done
+
+ # preload neighbur cache, do avoid some noisy traffic
+ local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]')
+ local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]')
+ ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC
+ ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST
+}
+
+is_ipv6() {
+ if [[ $1 =~ .*:.* ]]; then
+ return 0
+ fi
+ return 1
+}
+
+run_test() {
+ local -r msg=$1
+ local -r dst=$2
+ local -r pkts=$3
+ local -r vxpkts=$4
+ local bind=$5
+ local rx_args=""
+ local rx_family="-4"
+ local family=-4
+ local filter=IpInReceives
+ local ipt=iptables
+
+ printf "%-40s" "$msg"
+
+ if is_ipv6 $dst; then
+ # rx program does not support '-6' and implies ipv6 usage by default
+ rx_family=""
+ family=-6
+ filter=Ip6InReceives
+ ipt=ip6tables
+ fi
+
+ rx_args="$rx_family"
+ [ -n "$bind" ] && rx_args="$rx_args -b $bind"
+
+ # send a single GSO packet, segmented in 10 UDP frames.
+ # Always expect 10 UDP frames on RX side as rx socket does
+ # not enable GRO
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789
+ ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000
+ ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args &
+ local spid=$!
+ wait_local_port_listen "$NS_DST" 8000 udp
+ ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ local rcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 8000' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+ if [ $rcv != $pkts ]; then
+ echo " fail - received $rcv packets, expected $pkts"
+ ret=1
+ return
+ fi
+
+ local vxrcv=`ip netns exec $NS_DST $ipt"-save" -c | grep 'dport 4789' | \
+ sed -e 's/\[//' -e 's/:.*//'`
+
+ # upper net can generate a little noise, allow some tolerance
+ if [ $vxrcv -lt $vxpkts -o $vxrcv -gt $((vxpkts + 3)) ]; then
+ echo " fail - received $vxrcv vxlan packets, expected $vxpkts"
+ ret=1
+ return
+ fi
+ echo " ok"
+}
+
+run_bench() {
+ local -r msg=$1
+ local -r dst=$2
+ local family=-4
+
+ printf "%-40s" "$msg"
+ if [ $NPROCS -lt 2 ]; then
+ echo " skip - needed 2 CPUs found $NPROCS"
+ return
+ fi
+
+ is_ipv6 $dst && family=-6
+
+ # bind the sender and the receiver to different CPUs to try
+ # get reproducible results
+ ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus"
+ ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 &
+ local spid=$!
+ wait_local_port_listen "$NS_DST" 8000 udp
+ ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+}
+
+for family in 4 6; do
+ BM_NET=$BM_NET_V4
+ OL_NET=$OL_NET_V4
+ IPT=iptables
+ SUFFIX=24
+ VXDEV=vxlan
+ PING=ping
+
+ if [ $family = 6 ]; then
+ BM_NET=$BM_NET_V6
+ OL_NET=$OL_NET_V6
+ SUFFIX="64 nodad"
+ VXDEV=vxlan6
+ IPT=ip6tables
+ # Use ping6 on systems where ping doesn't handle IPv6
+ ping -w 1 -c 1 ::1 > /dev/null 2>&1 || PING="ping6"
+ fi
+
+ echo "IPv$family"
+
+ create_ns
+ run_test "No GRO" $BM_NET$DST 10 0
+ cleanup
+
+ create_ns
+ ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list" $BM_NET$DST 1 0
+ cleanup
+
+ # UDP GRO fwd skips aggregation when find an udp socket with the GRO option
+ # if there is an UDP tunnel in the running system, such lookup happen
+ # take place.
+ # use NAT to circumvent GRO FWD check
+ create_ns
+ ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \
+ -j DNAT --to-destination $BM_NET$DST
+ run_test "GRO fwd" $BM_NET$DST_NAT 1 0 $BM_NET$DST
+ cleanup
+
+ create_ns
+ run_bench "UDP fwd perf" $BM_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP GRO fwd perf" $BM_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+ ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on
+ run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1
+ cleanup
+
+ # use NAT to circumvent GRO FWD check
+ create_vxlan_pair
+ ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX
+ ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \
+ -j DNAT --to-destination $OL_NET$DST
+
+ # load arp cache before running the test to reduce the amount of
+ # stray traffic on top of the UDP tunnel
+ ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null
+ run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST
+ cleanup
+
+ create_vxlan_pair
+ run_bench "UDP tunnel fwd perf" $OL_NET$DST
+ ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+ run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST
+ cleanup
+done
+
+exit $ret
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
index c66da6ffd6d8..1d975bf52af3 100644
--- a/tools/testing/selftests/net/udpgso.c
+++ b/tools/testing/selftests/net/udpgso.c
@@ -56,7 +56,6 @@ static bool cfg_do_msgmore;
static bool cfg_do_setsockopt;
static int cfg_specific_test_id = -1;
-static const char cfg_ifname[] = "lo";
static unsigned short cfg_port = 9000;
static char buf[ETH_MAX_MTU];
@@ -69,8 +68,13 @@ struct testcase {
int r_len_last; /* recv(): size of last non-mss dgram, if any */
};
-const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
-const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+const struct in6_addr addr6 = {
+ { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */
+};
+
+const struct in_addr addr4 = {
+ __constant_htonl(0x0a000001), /* 10.0.0.1 */
+};
struct testcase testcases_v4[] = {
{
@@ -156,13 +160,13 @@ struct testcase testcases_v4[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -259,13 +263,13 @@ struct testcase testcases_v6[] = {
},
{
/* send max number of min sized segments */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .tlen = UDP_MAX_SEGMENTS,
.gso_len = 1,
- .r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+ .r_num_mss = UDP_MAX_SEGMENTS,
},
{
/* send max number + 1 of min sized segments: fail */
- .tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+ .tlen = UDP_MAX_SEGMENTS + 1,
.gso_len = 1,
.tfail = true,
},
@@ -274,48 +278,6 @@ struct testcase testcases_v6[] = {
}
};
-static unsigned int get_device_mtu(int fd, const char *ifname)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCGIFMTU, &ifr))
- error(1, errno, "ioctl get mtu");
-
- return ifr.ifr_mtu;
-}
-
-static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
-{
- struct ifreq ifr;
-
- memset(&ifr, 0, sizeof(ifr));
-
- ifr.ifr_mtu = mtu;
- strcpy(ifr.ifr_name, ifname);
-
- if (ioctl(fd, SIOCSIFMTU, &ifr))
- error(1, errno, "ioctl set mtu");
-}
-
-static void set_device_mtu(int fd, int mtu)
-{
- int val;
-
- val = get_device_mtu(fd, cfg_ifname);
- fprintf(stderr, "device mtu (orig): %u\n", val);
-
- __set_device_mtu(fd, cfg_ifname, mtu);
- val = get_device_mtu(fd, cfg_ifname);
- if (val != mtu)
- error(1, 0, "unable to set device mtu to %u\n", val);
-
- fprintf(stderr, "device mtu (test): %u\n", val);
-}
-
static void set_pmtu_discover(int fd, bool is_ipv4)
{
int level, name, val;
@@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4)
return mtu;
}
-/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
-static void set_route_mtu(int mtu, bool is_ipv4)
-{
- struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
- struct nlmsghdr *nh;
- struct rtattr *rta;
- struct rtmsg *rt;
- char data[NLMSG_ALIGN(sizeof(*nh)) +
- NLMSG_ALIGN(sizeof(*rt)) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
- NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
- NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
- int fd, ret, alen, off = 0;
-
- alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
-
- fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
- if (fd == -1)
- error(1, errno, "socket netlink");
-
- memset(data, 0, sizeof(data));
-
- nh = (void *)data;
- nh->nlmsg_type = RTM_NEWROUTE;
- nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
- off += NLMSG_ALIGN(sizeof(*nh));
-
- rt = (void *)(data + off);
- rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
- rt->rtm_table = RT_TABLE_MAIN;
- rt->rtm_dst_len = alen << 3;
- rt->rtm_protocol = RTPROT_BOOT;
- rt->rtm_scope = RT_SCOPE_UNIVERSE;
- rt->rtm_type = RTN_UNICAST;
- off += NLMSG_ALIGN(sizeof(*rt));
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_DST;
- rta->rta_len = RTA_LENGTH(alen);
- if (is_ipv4)
- memcpy(RTA_DATA(rta), &addr4, alen);
- else
- memcpy(RTA_DATA(rta), &addr6, alen);
- off += NLMSG_ALIGN(rta->rta_len);
-
- rta = (void *)(data + off);
- rta->rta_type = RTA_OIF;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* MTU is a subtype in a metrics type */
- rta = (void *)(data + off);
- rta->rta_type = RTA_METRICS;
- rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
- off += NLMSG_ALIGN(rta->rta_len);
-
- /* now fill MTU subtype. Note that it fits within above rta_len */
- rta = (void *)(((char *) rta) + RTA_LENGTH(0));
- rta->rta_type = RTAX_MTU;
- rta->rta_len = RTA_LENGTH(sizeof(int));
- *((int *)(RTA_DATA(rta))) = mtu;
-
- nh->nlmsg_len = off;
-
- ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
- if (ret != off)
- error(1, errno, "send netlink: %uB != %uB\n", ret, off);
-
- if (close(fd))
- error(1, errno, "close netlink");
-
- fprintf(stderr, "route mtu (test): %u\n", mtu);
-}
-
static bool __send_one(int fd, struct msghdr *msg, int flags)
{
int ret;
@@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen)
/* Do not fragment these datagrams: only succeed if GSO works */
set_pmtu_discover(fdt, addr->sa_family == AF_INET);
- if (cfg_do_connectionless) {
- set_device_mtu(fdt, CONST_MTU_TEST);
+ if (cfg_do_connectionless)
run_all(fdt, fdr, addr, alen);
- }
if (cfg_do_connected) {
- set_device_mtu(fdt, CONST_MTU_TEST + 100);
- set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
-
if (connect(fdt, addr, alen))
error(1, errno, "connect");
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
index fec24f584fe9..6c63178086b0 100755
--- a/tools/testing/selftests/net/udpgso.sh
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -3,27 +3,56 @@
#
# Run a series of udpgso regression tests
+set -o errexit
+set -o nounset
+
+setup_loopback() {
+ ip addr add dev lo 10.0.0.1/32
+ ip addr add dev lo fd00::1/128 nodad noprefixroute
+}
+
+test_dev_mtu() {
+ setup_loopback
+ # Reduce loopback MTU
+ ip link set dev lo mtu 1500
+}
+
+test_route_mtu() {
+ setup_loopback
+ # Remove default local routes
+ ip route del local 10.0.0.1/32 table local dev lo
+ ip route del local fd00::1/128 table local dev lo
+ # Install local routes with reduced MTU
+ ip route add local 10.0.0.1/32 table local dev lo mtu 1500
+ ip route add local fd00::1/128 table local dev lo mtu 1500
+}
+
+if [ "$#" -gt 0 ]; then
+ "$1"
+ shift 2 # pop "test_*" arg and "--" delimiter
+ exec "$@"
+fi
+
echo "ipv4 cmsg"
-./in_netns.sh ./udpgso -4 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C
echo "ipv4 setsockopt"
-./in_netns.sh ./udpgso -4 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s
echo "ipv6 cmsg"
-./in_netns.sh ./udpgso -6 -C
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C
echo "ipv6 setsockopt"
-./in_netns.sh ./udpgso -6 -C -s
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s
echo "ipv4 connected"
-./in_netns.sh ./udpgso -4 -c
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c
-# blocked on 2nd loopback address
-# echo "ipv6 connected"
-# ./in_netns.sh ./udpgso -6 -c
+echo "ipv6 connected"
+./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c
echo "ipv4 msg_more"
-./in_netns.sh ./udpgso -4 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m
echo "ipv6 msg_more"
-./in_netns.sh ./udpgso -6 -C -m
+./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 80b5d352702e..640bc43452fa 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -7,6 +7,7 @@ readonly GREEN='\033[0;92m'
readonly YELLOW='\033[0;33m'
readonly RED='\033[0;31m'
readonly NC='\033[0m' # No Color
+readonly TESTPORT=8000
readonly KSFT_PASS=0
readonly KSFT_FAIL=1
@@ -56,11 +57,26 @@ trap wake_children EXIT
run_one() {
local -r args=$@
+ local nr_socks=0
+ local i=0
+ local -r timeout=10
+
+ ./udpgso_bench_rx -p "$TESTPORT" &
+ ./udpgso_bench_rx -p "$TESTPORT" -t &
+
+ # Wait for the above test program to get ready to receive connections.
+ while [ "$i" -lt "$timeout" ]; do
+ nr_socks="$(ss -lnHi | grep -c "\*:${TESTPORT}")"
+ [ "$nr_socks" -eq 2 ] && break
+ i=$((i + 1))
+ sleep 1
+ done
+ if [ "$nr_socks" -ne 2 ]; then
+ echo "timed out while waiting for udpgso_bench_rx"
+ exit 1
+ fi
- ./udpgso_bench_rx &
- ./udpgso_bench_rx -t &
-
- ./udpgso_bench_tx ${args}
+ ./udpgso_bench_tx -p "$TESTPORT" ${args}
}
run_in_netns() {
@@ -120,7 +136,7 @@ run_all() {
run_udp "${ipv4_args}"
echo "ipv6"
- run_tcp "${ipv4_args}"
+ run_tcp "${ipv6_args}"
run_udp "${ipv6_args}"
}
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
index db3d4a8b5a4c..1cbadd267c96 100644
--- a/tools/testing/selftests/net/udpgso_bench_rx.c
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms)
interrupted = true;
break;
}
+
+ /* no events and more time to wait, do poll again */
+ continue;
}
if (pfd.revents != POLLIN)
error(1, errno, "poll: 0x%x expected 0x%x\n",
@@ -211,11 +214,10 @@ static void do_verify_udp(const char *data, int len)
static int recv_msg(int fd, char *buf, int len, int *gso_size)
{
- char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+ char control[CMSG_SPACE(sizeof(int))] = {0};
struct msghdr msg = {0};
struct iovec iov = {0};
struct cmsghdr *cmsg;
- uint16_t *gsosizeptr;
int ret;
iov.iov_base = buf;
@@ -234,8 +236,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
cmsg = CMSG_NXTHDR(&msg, cmsg)) {
if (cmsg->cmsg_level == SOL_UDP
&& cmsg->cmsg_type == UDP_GRO) {
- gsosizeptr = (uint16_t *) CMSG_DATA(cmsg);
- *gso_size = *gsosizeptr;
+ *gso_size = *(int *)CMSG_DATA(cmsg);
break;
}
}
@@ -247,7 +248,7 @@ static int recv_msg(int fd, char *buf, int len, int *gso_size)
static void do_flush_udp(int fd)
{
static char rbuf[ETH_MAX_MTU];
- int ret, len, gso_size, budget = 256;
+ int ret, len, gso_size = 0, budget = 256;
len = cfg_read_all ? sizeof(rbuf) : 0;
while (budget--) {
@@ -290,19 +291,17 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int c;
- /* bind to any by default */
- setup_sockaddr(PF_INET6, "::", &cfg_bind_addr);
while ((c = getopt(argc, argv, "4b:C:Gl:n:p:rR:S:tv")) != -1) {
switch (c) {
case '4':
cfg_family = PF_INET;
cfg_alen = sizeof(struct sockaddr_in);
- setup_sockaddr(PF_INET, "0.0.0.0", &cfg_bind_addr);
break;
case 'b':
- setup_sockaddr(cfg_family, optarg, &cfg_bind_addr);
+ bind_addr = optarg;
break;
case 'C':
cfg_connect_timeout_ms = strtoul(optarg, NULL, 0);
@@ -335,9 +334,16 @@ static void parse_opts(int argc, char **argv)
cfg_verify = true;
cfg_read_all = true;
break;
+ default:
+ exit(1);
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_bind_addr);
+
if (optind != argc)
usage(argv[0]);
@@ -369,7 +375,7 @@ static void do_recv(void)
do_flush_udp(fd);
tnow = gettimeofday_ms();
- if (tnow > treport) {
+ if (!cfg_expected_pkt_nr && tnow > treport) {
if (packets)
fprintf(stderr,
"%s rx: %6lu MB/s %8lu calls/s\n",
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
index 17512a43885e..477392715a9a 100644
--- a/tools/testing/selftests/net/udpgso_bench_tx.c
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -62,6 +62,7 @@ static int cfg_payload_len = (1472 * 42);
static int cfg_port = 8000;
static int cfg_runtime_ms = -1;
static bool cfg_poll;
+static int cfg_poll_loop_timeout_ms = 2000;
static bool cfg_segment;
static bool cfg_sendmmsg;
static bool cfg_tcp;
@@ -235,16 +236,17 @@ static void flush_errqueue_recv(int fd)
}
}
-static void flush_errqueue(int fd, const bool do_poll)
+static void flush_errqueue(int fd, const bool do_poll,
+ unsigned long poll_timeout, const bool poll_err)
{
if (do_poll) {
struct pollfd fds = {0};
int ret;
fds.fd = fd;
- ret = poll(&fds, 1, 500);
+ ret = poll(&fds, 1, poll_timeout);
if (ret == 0) {
- if (cfg_verbose)
+ if ((cfg_verbose) && (poll_err))
fprintf(stderr, "poll timeout\n");
} else if (ret < 0) {
error(1, errno, "poll");
@@ -254,6 +256,20 @@ static void flush_errqueue(int fd, const bool do_poll)
flush_errqueue_recv(fd);
}
+static void flush_errqueue_retry(int fd, unsigned long num_sends)
+{
+ unsigned long tnow, tstop;
+ bool first_try = true;
+
+ tnow = gettimeofday_ms();
+ tstop = tnow + cfg_poll_loop_timeout_ms;
+ do {
+ flush_errqueue(fd, true, tstop - tnow, first_try);
+ first_try = false;
+ tnow = gettimeofday_ms();
+ } while ((stat_zcopies != num_sends) && (tnow < tstop));
+}
+
static int send_tcp(int fd, char *data)
{
int ret, done = 0, count = 0;
@@ -413,16 +429,18 @@ static int send_udp_segment(int fd, char *data)
static void usage(const char *filepath)
{
- error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
+ error(1, 0, "Usage: %s [-46acmHPtTuvz] [-C cpu] [-D dst ip] [-l secs] "
+ "[-L secs] [-M messagenr] [-p port] [-s sendsize] [-S gsosize]",
filepath);
}
static void parse_opts(int argc, char **argv)
{
+ const char *bind_addr = NULL;
int max_len, hdrlen;
int c;
- while ((c = getopt(argc, argv, "46acC:D:Hl:mM:p:s:PS:tTuvz")) != -1) {
+ while ((c = getopt(argc, argv, "46acC:D:Hl:L:mM:p:s:PS:tTuvz")) != -1) {
switch (c) {
case '4':
if (cfg_family != PF_UNSPEC)
@@ -446,11 +464,14 @@ static void parse_opts(int argc, char **argv)
cfg_cpu = strtol(optarg, NULL, 0);
break;
case 'D':
- setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+ bind_addr = optarg;
break;
case 'l':
cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
break;
+ case 'L':
+ cfg_poll_loop_timeout_ms = strtoul(optarg, NULL, 10) * 1000;
+ break;
case 'm':
cfg_sendmmsg = true;
break;
@@ -489,9 +510,16 @@ static void parse_opts(int argc, char **argv)
case 'z':
cfg_zerocopy = true;
break;
+ default:
+ exit(1);
}
}
+ if (!bind_addr)
+ bind_addr = cfg_family == PF_INET6 ? "::" : "0.0.0.0";
+
+ setup_sockaddr(cfg_family, bind_addr, &cfg_dst_addr);
+
if (optind != argc)
usage(argv[0]);
@@ -671,7 +699,7 @@ int main(int argc, char **argv)
num_sends += send_udp(fd, buf[i]);
num_msgs++;
if ((cfg_zerocopy && ((num_msgs & 0xF) == 0)) || cfg_tx_tstamp)
- flush_errqueue(fd, cfg_poll);
+ flush_errqueue(fd, cfg_poll, 500, true);
if (cfg_msg_nr && num_msgs >= cfg_msg_nr)
break;
@@ -690,7 +718,7 @@ int main(int argc, char **argv)
} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
if (cfg_zerocopy || cfg_tx_tstamp)
- flush_errqueue(fd, true);
+ flush_errqueue_retry(fd, num_sends);
if (close(fd))
error(1, errno, "close");
diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh
new file mode 100755
index 000000000000..f52aa5f7da52
--- /dev/null
+++ b/tools/testing/selftests/net/unicast_extensions.sh
@@ -0,0 +1,225 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project
+# Thanks to David Ahern for help and advice on nettest modifications.
+#
+# Self-tests for IPv4 address extensions: the kernel's ability to accept
+# certain traditionally unused or unallocated IPv4 addresses. For each kind
+# of address, we test for interface assignment, ping, TCP, and forwarding.
+# Must be run as root (to manipulate network namespaces and virtual
+# interfaces).
+#
+# Things we test for here:
+#
+# * Currently the kernel accepts addresses in 0/8 and 240/4 as valid.
+#
+# * Notwithstanding that, 0.0.0.0 and 255.255.255.255 cannot be assigned.
+#
+# * Currently the kernel DOES NOT accept unicast use of the lowest
+# address in an IPv4 subnet (e.g. 192.168.100.0/32 in 192.168.100.0/24).
+# This is treated as a second broadcast address, for compatibility
+# with 4.2BSD (!).
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 127/8.
+#
+# * Currently the kernel DOES NOT accept unicast use of any of 224/4.
+#
+# These tests provide an easy way to flip the expected result of any
+# of these behaviors for testing kernel patches that change them.
+
+source lib.sh
+
+# nettest can be run from PATH or from same directory as this selftest
+if ! which nettest >/dev/null; then
+ PATH=$PWD:$PATH
+ if ! which nettest >/dev/null; then
+ echo "'nettest' command not found; skipping tests"
+ exit $ksft_skip
+ fi
+fi
+
+result=0
+
+hide_output(){ exec 3>&1 4>&2 >/dev/null 2>/dev/null; }
+show_output(){ exec >&3 2>&4; }
+
+show_result(){
+ if [ $1 -eq 0 ]; then
+ printf "TEST: %-60s [ OK ]\n" "${2}"
+ else
+ printf "TEST: %-60s [FAIL]\n" "${2}"
+ result=1
+ fi
+}
+
+_do_segmenttest(){
+ # Perform a simple set of link tests between a pair of
+ # IP addresses on a shared (virtual) segment, using
+ # ping and nettest.
+ # foo --- bar
+ # Arguments: ip_a ip_b prefix_length test_description
+ #
+ # Caller must set up $foo_ns and $bar_ns namespaces
+ # containing linked veth devices foo and bar,
+ # respectively.
+
+ ip -n $foo_ns address add $1/$3 dev foo || return 1
+ ip -n $foo_ns link set foo up || return 1
+ ip -n $bar_ns address add $2/$3 dev bar || return 1
+ ip -n $bar_ns link set bar up || return 1
+
+ ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+ ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
+
+ nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+ nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1
+
+ return 0
+}
+
+_do_route_test(){
+ # Perform a simple set of gateway tests.
+ #
+ # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix
+ # host gateway host
+ #
+ # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+ # Displays test result and returns success or failure.
+
+ # Caller must set up $foo_ns, $bar_ns, and $router_ns
+ # containing linked veth devices foo-foo1, bar1-bar
+ # (foo in $foo_ns, foo1 and bar1 in $router_ns, and
+ # bar in $bar_ns).
+
+ ip -n $foo_ns address add $1/$5 dev foo || return 1
+ ip -n $foo_ns link set foo up || return 1
+ ip -n $foo_ns route add default via $2 || return 1
+ ip -n $bar_ns address add $4/$5 dev bar || return 1
+ ip -n $bar_ns link set bar up || return 1
+ ip -n $bar_ns route add default via $3 || return 1
+ ip -n $router_ns address add $2/$5 dev foo1 || return 1
+ ip -n $router_ns link set foo1 up || return 1
+ ip -n $router_ns address add $3/$5 dev bar1 || return 1
+ ip -n $router_ns link set bar1 up || return 1
+
+ echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward
+
+ ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1
+ ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1
+ ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1
+ ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1
+
+ nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1
+ nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1
+
+ return 0
+}
+
+segmenttest(){
+ # Sets up veth link and tries to connect over it.
+ # Arguments: ip_a ip_b prefix_len test_description
+ hide_output
+ setup_ns foo_ns bar_ns
+ ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns
+
+ test_result=0
+ _do_segmenttest "$@" || test_result=1
+
+ ip netns pids $foo_ns | xargs -r kill -9
+ ip netns pids $bar_ns | xargs -r kill -9
+ cleanup_ns $foo_ns $bar_ns
+ show_output
+
+ # inverted tests will expect failure instead of success
+ [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+
+ show_result $test_result "$4"
+}
+
+route_test(){
+ # Sets up a simple gateway and tries to connect through it.
+ # [foo] <---> [foo1]-[bar1] <---> [bar] /prefix
+ # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description
+ # Returns success or failure.
+
+ hide_output
+ setup_ns foo_ns bar_ns router_ns
+ ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns
+ ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns
+
+ test_result=0
+ _do_route_test "$@" || test_result=1
+
+ ip netns pids $foo_ns | xargs -r kill -9
+ ip netns pids $bar_ns | xargs -r kill -9
+ ip netns pids $router_ns | xargs -r kill -9
+ cleanup_ns $foo_ns $bar_ns $router_ns
+
+ show_output
+
+ # inverted tests will expect failure instead of success
+ [ -n "$expect_failure" ] && test_result=`expr 1 - $test_result`
+ show_result $test_result "$6"
+}
+
+echo "###########################################################################"
+echo "Unicast address extensions tests (behavior of reserved IPv4 addresses)"
+echo "###########################################################################"
+#
+# Test support for 240/4
+segmenttest 240.1.2.1 240.1.2.4 24 "assign and ping within 240/4 (1 of 2) (is allowed)"
+segmenttest 250.100.2.1 250.100.30.4 16 "assign and ping within 240/4 (2 of 2) (is allowed)"
+#
+# Test support for 0/8
+segmenttest 0.1.2.17 0.1.2.23 24 "assign and ping within 0/8 (1 of 2) (is allowed)"
+segmenttest 0.77.240.17 0.77.2.23 16 "assign and ping within 0/8 (2 of 2) (is allowed)"
+#
+# Even 255.255/16 is OK!
+segmenttest 255.255.3.1 255.255.50.77 16 "assign and ping inside 255.255/16 (is allowed)"
+#
+# Or 255.255.255/24
+segmenttest 255.255.255.1 255.255.255.254 24 "assign and ping inside 255.255.255/24 (is allowed)"
+#
+# Routing between different networks
+route_test 240.5.6.7 240.5.6.1 255.1.2.1 255.1.2.3 24 "route between 240.5.6/24 and 255.1.2/24 (is allowed)"
+route_test 0.200.6.7 0.200.38.1 245.99.101.1 245.99.200.111 16 "route between 0.200/16 and 245.99/16 (is allowed)"
+#
+# Test support for lowest address ending in .0
+segmenttest 5.10.15.20 5.10.15.0 24 "assign and ping lowest address (/24)"
+#
+# Test support for lowest address not ending in .0
+segmenttest 192.168.101.192 192.168.101.193 26 "assign and ping lowest address (/26)"
+#
+# Routing using lowest address as a gateway/endpoint
+route_test 192.168.42.1 192.168.42.0 9.8.7.6 9.8.7.0 24 "routing using lowest address"
+#
+# ==============================================
+# ==== TESTS THAT CURRENTLY EXPECT FAILURE =====
+# ==============================================
+expect_failure=true
+# It should still not be possible to use 0.0.0.0 or 255.255.255.255
+# as a unicast address. Thus, these tests expect failure.
+segmenttest 0.0.1.5 0.0.0.0 16 "assigning 0.0.0.0 (is forbidden)"
+segmenttest 255.255.255.1 255.255.255.255 16 "assigning 255.255.255.255 (is forbidden)"
+#
+# Test support for not having all of 127 be loopback
+# Currently Linux does not allow this, so this should fail too
+segmenttest 127.99.4.5 127.99.4.6 16 "assign and ping inside 127/8 (is forbidden)"
+#
+# Test support for unicast use of class D
+# Currently Linux does not allow this, so this should fail too
+segmenttest 225.1.2.3 225.1.2.200 24 "assign and ping class D address (is forbidden)"
+#
+# Routing using class D as a gateway
+route_test 225.1.42.1 225.1.42.2 9.8.7.6 9.8.7.1 24 "routing using class D (is forbidden)"
+#
+# Routing using 127/8
+# Currently Linux does not allow this, so this should fail too
+route_test 127.99.2.3 127.99.2.4 200.1.2.3 200.1.2.4 24 "routing using 127/8 (is forbidden)"
+#
+unset expect_failure
+# =====================================================
+# ==== END OF TESTS THAT CURRENTLY EXPECT FAILURE =====
+# =====================================================
+exit ${result}
diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh
new file mode 100755
index 000000000000..3a394b43e274
--- /dev/null
+++ b/tools/testing/selftests/net/veth.sh
@@ -0,0 +1,392 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+BPF_FILE="xdp_dummy.o"
+readonly STATS="$(mktemp -p /tmp ns-XXXXXX)"
+readonly BASE=`basename $STATS`
+readonly SRC=2
+readonly DST=1
+readonly DST_NAT=100
+readonly NS_SRC=$BASE$SRC
+readonly NS_DST=$BASE$DST
+
+# "baremetal" network used for raw UDP traffic
+readonly BM_NET_V4=192.168.1.
+readonly BM_NET_V6=2001:db8::
+
+readonly CPUS=`nproc`
+ret=0
+
+cleanup() {
+ local ns
+ local jobs
+ readonly jobs="$(jobs -p)"
+ [ -n "${jobs}" ] && kill -1 ${jobs} 2>/dev/null
+ rm -f $STATS
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns del $ns 2>/dev/null
+ done
+}
+
+trap cleanup EXIT
+
+create_ns() {
+ local ns
+
+ for ns in $NS_SRC $NS_DST; do
+ ip netns add $ns
+ ip -n $ns link set dev lo up
+ done
+
+ ip link add name veth$SRC type veth peer name veth$DST
+
+ for ns in $SRC $DST; do
+ ip link set dev veth$ns netns $BASE$ns up
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V4$ns/24
+ ip -n $BASE$ns addr add dev veth$ns $BM_NET_V6$ns/64 nodad
+ done
+ echo "#kernel" > $BASE
+ chmod go-rw $BASE
+}
+
+__chk_flag() {
+ local msg="$1"
+ local target=$2
+ local expected=$3
+ local flagname=$4
+
+ local flag=`ip netns exec $BASE$target ethtool -k veth$target |\
+ grep $flagname | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$flag" = "$expected" ]; then
+ echo " ok "
+ else
+ echo " fail - expected $expected found $flag"
+ ret=1
+ fi
+}
+
+chk_gro_flag() {
+ __chk_flag "$1" $2 $3 generic-receive-offload
+}
+
+chk_tso_flag() {
+ __chk_flag "$1" $2 $3 tcp-segmentation-offload
+}
+
+chk_channels() {
+ local msg="$1"
+ local target=$2
+ local rx=$3
+ local tx=$4
+
+ local dev=veth$target
+
+ local cur_rx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep RX: | tail -n 1 | awk '{print $2}' `
+ local cur_tx=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep TX: | tail -n 1 | awk '{print $2}'`
+ local cur_combined=`ip netns exec $BASE$target ethtool -l $dev |\
+ grep Combined: | tail -n 1 | awk '{print $2}'`
+
+ printf "%-60s" "$msg"
+ if [ "$cur_rx" = "$rx" -a "$cur_tx" = "$tx" -a "$cur_combined" = "n/a" ]; then
+ echo " ok "
+ else
+ echo " fail rx:$rx:$cur_rx tx:$tx:$cur_tx combined:n/a:$cur_combined"
+ fi
+}
+
+chk_gro() {
+ local msg="$1"
+ local expected=$2
+
+ ip netns exec $BASE$SRC ping -qc 1 $BM_NET_V4$DST >/dev/null
+ NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat -n
+
+ printf "%-60s" "$msg"
+ ip netns exec $BASE$DST ./udpgso_bench_rx -C 1000 -R 10 &
+ local spid=$!
+ sleep 0.1
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 13000 -S 1300 -M 1 -D $BM_NET_V4$DST
+ local retc=$?
+ wait $spid
+ local rets=$?
+ if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
+ echo " fail client exit code $retc, server $rets"
+ ret=1
+ return
+ fi
+
+ local pkts=`NSTAT_HISTORY=$STATS ip netns exec $NS_DST nstat IpInReceives | \
+ awk '{print $2}' | tail -n 1`
+ if [ "$pkts" = "$expected" ]; then
+ echo " ok "
+ else
+ echo " fail - got $pkts packets, expected $expected "
+ ret=1
+ fi
+}
+
+__change_channels()
+{
+ local cur_cpu
+ local end=$1
+ local cur
+ local i
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ for i in `seq 1 $CPUS`; do
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $i tx $i
+ ip netns exec $NS_DST ethtool -L veth$DST rx $i tx $i
+ done
+
+ for i in `seq 1 $((CPUS - 1))`; do
+ cur_cpu=$((CPUS - $i))
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx $cur_cpu tx $cur_cpu
+ ip netns exec $NS_DST ethtool -L veth$DST rx $cur_cpu tx $cur_cpu
+ done
+ done
+}
+
+__send_data() {
+ local end=$1
+
+ while true; do
+ printf -v cur '%(%s)T'
+ [ $cur -le $end ] || break
+
+ ip netns exec $NS_SRC ./udpgso_bench_tx -4 -s 1000 -M 300 -D $BM_NET_V4$DST
+ done
+}
+
+do_stress() {
+ local end
+ printf -v end '%(%s)T'
+ end=$((end + $STRESS))
+
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 3 tx 3
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+
+ ip netns exec $NS_DST ./udpgso_bench_rx &
+ local rx_pid=$!
+
+ echo "Running stress test for $STRESS seconds..."
+ __change_channels $end &
+ local ch_pid=$!
+ __send_data $end &
+ local data_pid_1=$!
+ __send_data $end &
+ local data_pid_2=$!
+ __send_data $end &
+ local data_pid_3=$!
+ __send_data $end &
+ local data_pid_4=$!
+
+ wait $ch_pid $data_pid_1 $data_pid_2 $data_pid_3 $data_pid_4
+ kill -9 $rx_pid
+ echo "done"
+
+ # restore previous setting
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2 tx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2 tx 1
+}
+
+usage() {
+ echo "Usage: $0 [-h] [-s <seconds>]"
+ echo -e "\t-h: show this help"
+ echo -e "\t-s: run optional stress tests for the given amount of seconds"
+}
+
+STRESS=0
+while getopts "hs:" option; do
+ case "$option" in
+ "h")
+ usage $0
+ exit 0
+ ;;
+ "s")
+ STRESS=$OPTARG
+ ;;
+ esac
+done
+
+if [ ! -f ${BPF_FILE} ]; then
+ echo "Missing ${BPF_FILE}. Run 'make' first"
+ exit 1
+fi
+
+[ $CPUS -lt 2 ] && echo "Only one CPU available, some tests will be skipped"
+[ $STRESS -gt 0 -a $CPUS -lt 3 ] && echo " stress test will be skipped, too"
+
+create_ns
+chk_gro_flag "default - gro flag" $SRC off
+chk_gro_flag " - peer gro flag" $DST off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+chk_gro " - aggregation" 1
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro " - aggregation with TSO off" 10
+cleanup
+
+create_ns
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro on - gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - aggregation with TSO off" 1
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+chk_gro_flag "gro vs xdp while down - gro flag off" $DST off
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag " - after down" $DST off
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag " - after xdp off" $DST off
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag " - after up" $DST off
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag " - after peer xdp" $DST off
+cleanup
+
+create_ns
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp
+ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+chk_gro_flag "gro vs xdp while down - gro flag on" $DST on
+ip -n $NS_DST link set dev veth$DST down
+chk_gro_flag " - after down" $DST on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag " - after xdp off" $DST on
+ip -n $NS_DST link set dev veth$DST up
+chk_gro_flag " - after up" $DST on
+ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp
+chk_gro_flag " - after peer xdp" $DST on
+cleanup
+
+create_ns
+chk_channels "default channels" $DST 1 1
+
+ip -n $NS_DST link set dev veth$DST down
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+chk_gro_flag "with gro enabled on link down - gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+ip -n $NS_DST link set dev veth$DST up
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - aggregation with TSO off" 1
+cleanup
+
+create_ns
+
+CUR_TX=1
+CUR_RX=1
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "setting tx channels" $DST 1 2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST rx 3 tx 3
+ chk_channels "setting both rx and tx channels" $DST 3 3
+ CUR_RX=3
+ CUR_TX=3
+fi
+
+ip netns exec $NS_DST ethtool -L veth$DST combined 2 2>/dev/null
+chk_channels "bad setting: combined channels" $DST $CUR_RX $CUR_TX
+
+ip netns exec $NS_DST ethtool -L veth$DST tx $((CPUS + 1)) 2>/dev/null
+chk_channels "setting invalid channels nr" $DST $CUR_RX $CUR_TX
+
+if [ $CPUS -gt 1 ]; then
+ # this also tests queues nr reduction
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 tx 2 2>/dev/null
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 1 tx 2 2>/dev/null
+ printf "%-60s" "bad setting: XDP with RX nr less than TX"
+ ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \
+ section xdp 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+
+ # the following tests will run with multiple channels active
+ ip netns exec $NS_SRC ethtool -L veth$SRC rx 2
+ ip netns exec $NS_DST ethtool -L veth$DST rx 2
+ ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} \
+ section xdp 2>/dev/null
+ printf "%-60s" "bad setting: reducing RX nr below peer TX with XDP set"
+ ip netns exec $NS_DST ethtool -L veth$DST rx 1 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ CUR_RX=2
+ CUR_TX=2
+fi
+
+if [ $CPUS -gt 2 ]; then
+ printf "%-60s" "bad setting: increasing peer TX nr above RX with XDP set"
+ ip netns exec $NS_SRC ethtool -L veth$SRC tx 3 2>/dev/null &&\
+ echo "fail - set operation successful ?!?" || echo " ok "
+ chk_channels "setting invalid channels nr" $DST 2 2
+fi
+
+ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null
+chk_gro_flag "with xdp attached - gro flag" $DST off
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC off
+chk_tso_flag " - peer tso flag" $DST on
+ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on
+chk_gro " - no aggregation" 10
+ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on
+chk_gro_flag " - gro flag with GRO on" $DST on
+chk_gro " - aggregation" 1
+
+
+ip -n $NS_DST link set dev veth$DST down
+ip -n $NS_SRC link set dev veth$SRC down
+chk_gro_flag " - after dev off, flag" $DST on
+chk_gro_flag " - peer flag" $SRC off
+
+ip netns exec $NS_DST ethtool -K veth$DST gro on
+ip -n $NS_DST link set dev veth$DST xdp off
+chk_gro_flag " - after gro on xdp off, gro flag" $DST on
+chk_gro_flag " - peer gro flag" $SRC off
+chk_tso_flag " - tso flag" $SRC on
+chk_tso_flag " - peer tso flag" $DST on
+
+if [ $CPUS -gt 1 ]; then
+ ip netns exec $NS_DST ethtool -L veth$DST tx 1
+ chk_channels "decreasing tx channels with device down" $DST 2 1
+fi
+
+ip -n $NS_DST link set dev veth$DST up
+ip -n $NS_SRC link set dev veth$SRC up
+chk_gro " - aggregation" 1
+
+if [ $CPUS -gt 1 ]; then
+ [ $STRESS -gt 0 -a $CPUS -gt 2 ] && do_stress
+
+ ip -n $NS_DST link set dev veth$DST down
+ ip -n $NS_SRC link set dev veth$SRC down
+ ip netns exec $NS_DST ethtool -L veth$DST tx 2
+ chk_channels "increasing tx channels with device down" $DST 2 2
+ ip -n $NS_DST link set dev veth$DST up
+ ip -n $NS_SRC link set dev veth$SRC up
+fi
+
+ip netns exec $NS_DST ethtool -K veth$DST gro off
+ip netns exec $NS_SRC ethtool -K veth$SRC tx-udp-segmentation off
+chk_gro "aggregation again with default and TSO off" 10
+
+exit $ret
diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh
new file mode 100755
index 000000000000..7bc804ffaf7c
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_hw_filter.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+readonly NETNS="ns-$(mktemp -u XXXXXX)"
+
+ret=0
+
+cleanup() {
+ ip netns del $NETNS
+}
+
+trap cleanup EXIT
+
+fail() {
+ echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2
+ ret=1
+}
+
+ip netns add ${NETNS}
+ip netns exec ${NETNS} ip link add bond0 type bond mode 0
+ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2
+ip netns exec ${NETNS} ip link set bond_slave_1 master bond0
+ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off
+ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0
+ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0
+ip netns exec ${NETNS} ip link set bond_slave_1 nomaster
+ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function"
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
index 184da81f554f..b64dd891699d 100755
--- a/tools/testing/selftests/net/vrf-xfrm-tests.sh
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -3,9 +3,7 @@
#
# Various combinations of VRF with xfrms and qdisc.
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
+source lib.sh
PAUSE_ON_FAIL=no
VERBOSE=0
ret=0
@@ -67,7 +65,7 @@ run_cmd_host1()
printf " COMMAND: $cmd\n"
fi
- out=$(eval ip netns exec host1 $cmd 2>&1)
+ out=$(eval ip netns exec $host1 $cmd 2>&1)
rc=$?
if [ "$VERBOSE" = "1" ]; then
if [ -n "$out" ]; then
@@ -116,9 +114,6 @@ create_ns()
[ -z "${addr}" ] && addr="-"
[ -z "${addr6}" ] && addr6="-"
- ip netns add ${ns}
-
- ip -netns ${ns} link set lo up
if [ "${addr}" != "-" ]; then
ip -netns ${ns} addr add dev lo ${addr}
fi
@@ -177,25 +172,25 @@ connect_ns()
cleanup()
{
- ip netns del host1
- ip netns del host2
+ cleanup_ns $host1 $host2
}
setup()
{
- create_ns "host1"
- create_ns "host2"
+ setup_ns host1 host2
+ create_ns "$host1"
+ create_ns "$host2"
- connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
- "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+ connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+ "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
- create_vrf "host1" ${VRF} ${TABLE}
- ip -netns host1 link set dev eth0 master ${VRF}
+ create_vrf "$host1" ${VRF} ${TABLE}
+ ip -netns $host1 link set dev eth0 master ${VRF}
}
cleanup_xfrm()
{
- for ns in host1 host2
+ for ns in $host1 $host2
do
for x in state policy
do
@@ -218,127 +213,127 @@ setup_xfrm()
#
# host1 - IPv4 out
- ip -netns host1 xfrm policy add \
+ ip -netns $host1 xfrm policy add \
src ${h1_4} dst ${h2_4} ${devarg} dir out \
tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
# host2 - IPv4 in
- ip -netns host2 xfrm policy add \
+ ip -netns $host2 xfrm policy add \
src ${h1_4} dst ${h2_4} dir in \
tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
# host1 - IPv4 in
- ip -netns host1 xfrm policy add \
+ ip -netns $host1 xfrm policy add \
src ${h2_4} dst ${h1_4} ${devarg} dir in \
tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
# host2 - IPv4 out
- ip -netns host2 xfrm policy add \
+ ip -netns $host2 xfrm policy add \
src ${h2_4} dst ${h1_4} dir out \
tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
# host1 - IPv6 out
- ip -6 -netns host1 xfrm policy add \
+ ip -6 -netns $host1 xfrm policy add \
src ${h1_6} dst ${h2_6} ${devarg} dir out \
tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
# host2 - IPv6 in
- ip -6 -netns host2 xfrm policy add \
+ ip -6 -netns $host2 xfrm policy add \
src ${h1_6} dst ${h2_6} dir in \
tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
# host1 - IPv6 in
- ip -6 -netns host1 xfrm policy add \
+ ip -6 -netns $host1 xfrm policy add \
src ${h2_6} dst ${h1_6} ${devarg} dir in \
tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
# host2 - IPv6 out
- ip -6 -netns host2 xfrm policy add \
+ ip -6 -netns $host2 xfrm policy add \
src ${h2_6} dst ${h1_6} dir out \
tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
#
# state
#
- ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_4} dst ${h2_4} ${devarg}
- ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+ ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_4} dst ${h2_4}
- ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_4} dst ${h1_4} ${devarg}
- ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+ ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_4} dst ${h1_4}
- ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_6} dst ${h2_6} ${devarg}
- ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+ ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
proto esp spi ${SPI_1} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
- enc 'cbc(des3_ede)' ${ENC_1} \
+ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \
+ enc 'cbc(aes)' ${ENC_1} \
sel src ${h1_6} dst ${h2_6}
- ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_6} dst ${h1_6} ${devarg}
- ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+ ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
proto esp spi ${SPI_2} reqid 0 mode tunnel \
replay-window 4 replay-oseq 0x4 \
- auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
- enc 'cbc(des3_ede)' ${ENC_2} \
+ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \
+ enc 'cbc(aes)' ${ENC_2} \
sel src ${h2_6} dst ${h1_6}
}
cleanup_xfrm_dev()
{
- ip -netns host1 li del xfrm0
- ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
- ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+ ip -netns $host1 li del xfrm0
+ ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0
+ ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0
}
setup_xfrm_dev()
{
local vrfarg="vrf ${VRF}"
- ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
- ip -netns host1 li set xfrm0 ${vrfarg} up
- ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
- ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+ ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID}
+ ip -netns $host1 li set xfrm0 ${vrfarg} up
+ ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0
+ ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0
- ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
- ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+ ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0
+ ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0
setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
}
diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh
new file mode 100755
index 000000000000..2da32f4c479b
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_route_leaking.sh
@@ -0,0 +1,617 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2019 David Ahern <dsahern@gmail.com>. All rights reserved.
+# Copyright (c) 2020 Michael Jeanson <mjeanson@efficios.com>. All rights reserved.
+#
+# Requires CONFIG_NET_VRF, CONFIG_VETH, CONFIG_BRIDGE and CONFIG_NET_NS.
+#
+#
+# Symmetric routing topology
+#
+# blue red
+# +----+ .253 +----+ .253 +----+
+# | h1 |-------------------| r1 |-------------------| h2 |
+# +----+ .1 +----+ .2 +----+
+# 172.16.1/24 172.16.2/24
+# 2001:db8:16:1/64 2001:db8:16:2/64
+#
+#
+# Route from h1 to h2 and back goes through r1, incoming vrf blue has a route
+# to the outgoing vrf red for the n2 network and red has a route back to n1.
+# The red VRF interface has a MTU of 1400.
+#
+# The first test sends a ping with a ttl of 1 from h1 to h2 and parses the
+# output of the command to check that a ttl expired error is received.
+#
+# The second test runs traceroute from h1 to h2 and parses the output to check
+# for a hop on r1.
+#
+# The third test sends a ping with a packet size of 1450 from h1 to h2 and
+# parses the output of the command to check that a fragmentation error is
+# received.
+#
+#
+# Asymmetric routing topology
+#
+# This topology represents a customer setup where the issue with icmp errors
+# and VRF route leaking was initialy reported. The MTU test isn't done here
+# because of the lack of a return route in the red VRF.
+#
+# blue red
+# .253 +----+ .253
+# +----| r1 |----+
+# | +----+ |
+# +----+ | | +----+
+# | h1 |--------------+ +--------------| h2 |
+# +----+ .1 | | .2 +----+
+# 172.16.1/24 | +----+ | 172.16.2/24
+# 2001:db8:16:1/64 +----| r2 |----+ 2001:db8:16:2/64
+# .254 +----+ .254
+#
+#
+# Route from h1 to h2 goes through r1, incoming vrf blue has a route to the
+# outgoing vrf red for the n2 network but red doesn't have a route back to n1.
+# Route from h2 to h1 goes through r2.
+#
+# The objective is to check that the incoming vrf routing table is selected
+# to send an ICMP error back to the source when the ttl of a packet reaches 1
+# while it is forwarded between different vrfs.
+
+source lib.sh
+VERBOSE=0
+PAUSE_ON_FAIL=no
+DEFAULT_TTYPE=sym
+
+H1_N1=172.16.1.0/24
+H1_N1_6=2001:db8:16:1::/64
+
+H1_N1_IP=172.16.1.1
+R1_N1_IP=172.16.1.253
+R2_N1_IP=172.16.1.254
+
+H1_N1_IP6=2001:db8:16:1::1
+R1_N1_IP6=2001:db8:16:1::253
+R2_N1_IP6=2001:db8:16:1::254
+
+H2_N2=172.16.2.0/24
+H2_N2_6=2001:db8:16:2::/64
+
+H2_N2_IP=172.16.2.2
+R1_N2_IP=172.16.2.253
+R2_N2_IP=172.16.2.254
+
+H2_N2_IP6=2001:db8:16:2::2
+R1_N2_IP6=2001:db8:16:2::253
+R2_N2_IP6=2001:db8:16:2::254
+
+################################################################################
+# helpers
+
+log_section()
+{
+ echo
+ echo "###########################################################################"
+ echo "$*"
+ echo "###########################################################################"
+ echo
+}
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ "${rc}" -eq "${expected}" ]; then
+ printf "TEST: %-60s [ OK ]\n" "${msg}"
+ nsuccess=$((nsuccess+1))
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read -r a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+run_cmd()
+{
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ rc=$?
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+run_cmd_grep()
+{
+ local grep_pattern="$1"
+ shift
+ local cmd="$*"
+ local out
+ local rc
+
+ if [ "$VERBOSE" = "1" ]; then
+ echo "COMMAND: $cmd"
+ fi
+
+ # shellcheck disable=SC2086
+ out=$(eval $cmd 2>&1)
+ if [ "$VERBOSE" = "1" ] && [ -n "$out" ]; then
+ echo "$out"
+ fi
+
+ echo "$out" | grep -q "$grep_pattern"
+ rc=$?
+
+ [ "$VERBOSE" = "1" ] && echo
+
+ return $rc
+}
+
+################################################################################
+# setup and teardown
+
+cleanup()
+{
+ cleanup_ns $h1 $h2 $r1 $r2
+}
+
+setup_vrf()
+{
+ local ns=$1
+
+ ip -netns "${ns}" rule del pref 0
+ ip -netns "${ns}" rule add pref 32765 from all lookup local
+ ip -netns "${ns}" -6 rule del pref 0
+ ip -netns "${ns}" -6 rule add pref 32765 from all lookup local
+}
+
+create_vrf()
+{
+ local ns=$1
+ local vrf=$2
+ local table=$3
+
+ ip -netns "${ns}" link add "${vrf}" type vrf table "${table}"
+ ip -netns "${ns}" link set "${vrf}" up
+ ip -netns "${ns}" route add vrf "${vrf}" unreachable default metric 8192
+ ip -netns "${ns}" -6 route add vrf "${vrf}" unreachable default metric 8192
+
+ ip -netns "${ns}" addr add 127.0.0.1/8 dev "${vrf}"
+ ip -netns "${ns}" -6 addr add ::1 dev "${vrf}" nodad
+}
+
+setup_sym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ setup_ns h1 h2 r1
+ for ns in $h1 $h2 $r1; do
+ if echo $ns | grep -q h[12]-; then
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ else
+ ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ fi
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns $h1 link add eth0 type veth peer name r1h1
+ ip -netns $h1 link set r1h1 netns $r1 name eth0 up
+
+ ip -netns $h2 link add eth0 type veth peer name r1h2
+ ip -netns $h2 link set r1h2 netns $r1 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24
+ ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad
+ ip -netns $h1 link set eth0 up
+
+ # h1 to h2 via r1
+ ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0
+ ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0
+
+ #
+ # h2
+ #
+ ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24
+ ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad
+ ip -netns $h2 link set eth0 up
+
+ # h2 to h1 via r1
+ ip -netns $h2 route add default via ${R1_N2_IP} dev eth0
+ ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0
+
+ #
+ # r1
+ #
+ setup_vrf $r1
+ create_vrf $r1 blue 1101
+ create_vrf $r1 red 1102
+ ip -netns $r1 link set mtu 1400 dev eth1
+ ip -netns $r1 link set eth0 vrf blue up
+ ip -netns $r1 link set eth1 vrf red up
+ ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns $r1 route add vrf blue ${H2_N2} dev red
+ ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # Route leak from red to blue
+ ip -netns $r1 route add vrf red ${H1_N1} dev blue
+ ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue
+
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+setup_asym()
+{
+ local ns
+
+ # make sure we are starting with a clean slate
+ cleanup
+
+ #
+ # create nodes as namespaces
+ setup_ns h1 h2 r1 r2
+ for ns in $h1 $h2 $r1 $r2; do
+ if echo $ns | grep -q h[12]-; then
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1
+ else
+ ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1
+ ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1
+ fi
+ done
+
+ #
+ # create interconnects
+ #
+ ip -netns $h1 link add eth0 type veth peer name r1h1
+ ip -netns $h1 link set r1h1 netns $r1 name eth0 up
+
+ ip -netns $h1 link add eth1 type veth peer name r2h1
+ ip -netns $h1 link set r2h1 netns $r2 name eth0 up
+
+ ip -netns $h2 link add eth0 type veth peer name r1h2
+ ip -netns $h2 link set r1h2 netns $r1 name eth1 up
+
+ ip -netns $h2 link add eth1 type veth peer name r2h2
+ ip -netns $h2 link set r2h2 netns $r2 name eth1 up
+
+ #
+ # h1
+ #
+ ip -netns $h1 link add br0 type bridge
+ ip -netns $h1 link set br0 up
+ ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24
+ ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad
+ ip -netns $h1 link set eth0 master br0 up
+ ip -netns $h1 link set eth1 master br0 up
+
+ # h1 to h2 via r1
+ ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev br0
+ ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0
+
+ #
+ # h2
+ #
+ ip -netns $h2 link add br0 type bridge
+ ip -netns $h2 link set br0 up
+ ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24
+ ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad
+ ip -netns $h2 link set eth0 master br0 up
+ ip -netns $h2 link set eth1 master br0 up
+
+ # h2 to h1 via r2
+ ip -netns $h2 route add default via ${R2_N2_IP} dev br0
+ ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0
+
+ #
+ # r1
+ #
+ setup_vrf $r1
+ create_vrf $r1 blue 1101
+ create_vrf $r1 red 1102
+ ip -netns $r1 link set mtu 1400 dev eth1
+ ip -netns $r1 link set eth0 vrf blue up
+ ip -netns $r1 link set eth1 vrf red up
+ ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24
+ ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad
+ ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24
+ ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad
+
+ # Route leak from blue to red
+ ip -netns $r1 route add vrf blue ${H2_N2} dev red
+ ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red
+
+ # No route leak from red to blue
+
+ #
+ # r2
+ #
+ ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24
+ ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad
+ ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24
+ ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad
+
+ # Wait for ip config to settle
+ sleep 2
+}
+
+check_connectivity()
+{
+ ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv4 connectivity"
+ return $?
+}
+
+check_connectivity6()
+{
+ ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1
+ log_test $? 0 "Basic IPv6 connectivity"
+ return $?
+}
+
+check_traceroute()
+{
+ if [ ! -x "$(command -v traceroute)" ]; then
+ echo "SKIP: Could not run IPV4 test without traceroute"
+ return 1
+ fi
+}
+
+check_traceroute6()
+{
+ if [ ! -x "$(command -v traceroute6)" ]; then
+ echo "SKIP: Could not run IPV6 test without traceroute6"
+ return 1
+ fi
+}
+
+ipv4_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute || return
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP}
+ log_test $? 0 "Traceroute reports a hop on r1"
+}
+
+ipv4_traceroute_asym()
+{
+ ipv4_traceroute asym
+}
+
+ipv6_traceroute()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP error route lookup traceroute"
+
+ check_traceroute6 || return
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6}
+ log_test $? 0 "Traceroute6 reports a hop on r1"
+}
+
+ipv6_traceroute_asym()
+{
+ ipv6_traceroute asym
+}
+
+ipv4_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP ttl exceeded"
+}
+
+ipv4_ping_ttl_asym()
+{
+ ipv4_ping_ttl asym
+}
+
+ipv4_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv4 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity || return
+
+ run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP}
+ log_test $? 0 "Ping received ICMP Frag needed"
+}
+
+ipv4_ping_frag_asym()
+{
+ ipv4_ping_frag asym
+}
+
+ipv6_ping_ttl()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP ttl error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Hop limit"
+}
+
+ipv6_ping_ttl_asym()
+{
+ ipv6_ping_ttl asym
+}
+
+ipv6_ping_frag()
+{
+ local ttype="$1"
+
+ [ "x$ttype" = "x" ] && ttype="$DEFAULT_TTYPE"
+
+ log_section "IPv6 ($ttype route): VRF ICMP fragmentation error route lookup ping"
+
+ setup_"$ttype"
+
+ check_connectivity6 || return
+
+ run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6}
+ log_test $? 0 "Ping received ICMP Packet too big"
+}
+
+ipv6_ping_frag_asym()
+{
+ ipv6_ping_frag asym
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -4 Run IPv4 tests only
+ -6 Run IPv6 tests only
+ -t TEST Run only TEST
+ -p Pause on fail
+ -v verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+# Some systems don't have a ping6 binary anymore
+command -v ping6 > /dev/null 2>&1 && ping6=$(command -v ping6) || ping6=$(command -v ping)
+
+TESTS_IPV4="ipv4_ping_ttl ipv4_traceroute ipv4_ping_frag ipv4_ping_ttl_asym ipv4_traceroute_asym"
+TESTS_IPV6="ipv6_ping_ttl ipv6_traceroute ipv6_ping_ttl_asym ipv6_traceroute_asym"
+
+ret=0
+nsuccess=0
+nfail=0
+
+while getopts :46t:pvh o
+do
+ case $o in
+ 4) TESTS=ipv4;;
+ 6) TESTS=ipv6;;
+ t) TESTS=$OPTARG;;
+ p) PAUSE_ON_FAIL=yes;;
+ v) VERBOSE=1;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+#
+# show user test config
+#
+if [ -z "$TESTS" ]; then
+ TESTS="$TESTS_IPV4 $TESTS_IPV6"
+elif [ "$TESTS" = "ipv4" ]; then
+ TESTS="$TESTS_IPV4"
+elif [ "$TESTS" = "ipv6" ]; then
+ TESTS="$TESTS_IPV6"
+fi
+
+for t in $TESTS
+do
+ case $t in
+ ipv4_ping_ttl|ping) ipv4_ping_ttl;;&
+ ipv4_ping_ttl_asym|ping) ipv4_ping_ttl_asym;;&
+ ipv4_traceroute|traceroute) ipv4_traceroute;;&
+ ipv4_traceroute_asym|traceroute) ipv4_traceroute_asym;;&
+ ipv4_ping_frag|ping) ipv4_ping_frag;;&
+
+ ipv6_ping_ttl|ping) ipv6_ping_ttl;;&
+ ipv6_ping_ttl_asym|ping) ipv6_ping_ttl_asym;;&
+ ipv6_traceroute|traceroute) ipv6_traceroute;;&
+ ipv6_traceroute_asym|traceroute) ipv6_traceroute_asym;;&
+ ipv6_ping_frag|ping) ipv6_ping_frag;;&
+
+ # setup namespaces and config, but do not run any tests
+ setup_sym|setup) setup_sym; exit 0;;
+ setup_asym) setup_asym; exit 0;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+ esac
+done
+
+cleanup
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n" ${nfail}
+
+exit $ret
diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh
new file mode 100755
index 000000000000..01552b542544
--- /dev/null
+++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh
@@ -0,0 +1,426 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is designed for testing the new VRF strict_mode functionality.
+
+source lib.sh
+ret=0
+
+# identifies the "init" network namespace which is often called root network
+# namespace.
+INIT_NETNS_NAME="init"
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+
+TESTS="init testns mix"
+
+log_test()
+{
+ local rc=$1
+ local expected=$2
+ local msg="$3"
+
+ if [ ${rc} -eq ${expected} ]; then
+ nsuccess=$((nsuccess+1))
+ printf "\n TEST: %-60s [ OK ]\n" "${msg}"
+ else
+ ret=1
+ nfail=$((nfail+1))
+ printf "\n TEST: %-60s [FAIL]\n" "${msg}"
+ if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+ echo
+ echo "hit enter to continue, 'q' to quit"
+ read a
+ [ "$a" = "q" ] && exit 1
+ fi
+ fi
+}
+
+print_log_test_results()
+{
+ if [ "$TESTS" != "none" ]; then
+ printf "\nTests passed: %3d\n" ${nsuccess}
+ printf "Tests failed: %3d\n" ${nfail}
+ fi
+}
+
+log_section()
+{
+ echo
+ echo "################################################################################"
+ echo "TEST SECTION: $*"
+ echo "################################################################################"
+}
+
+ip_expand_args()
+{
+ local nsname=$1
+ local nsarg=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ nsarg="-netns ${nsname}"
+ fi
+
+ echo "${nsarg}"
+}
+
+vrf_count()
+{
+ local nsname=$1
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -o link show type vrf | wc -l
+}
+
+count_vrf_by_table_id()
+{
+ local nsname=$1
+ local tableid=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} -d -o link show type vrf | grep "table ${tableid}" | wc -l
+}
+
+add_vrf()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link add ${vrfname} type vrf table ${vrftable} &>/dev/null
+}
+
+add_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 0 "${nsname}: add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+add_vrf_and_check_fail()
+{
+ local nsname=$1
+ local vrfname=$2
+ local vrftable=$3
+ local cnt
+ local rc
+
+ add_vrf ${nsname} ${vrfname} ${vrftable}; rc=$?
+
+ cnt=$(count_vrf_by_table_id ${nsname} ${vrftable})
+
+ log_test ${rc} 2 "${nsname}: CANNOT add vrf ${vrfname}, ${cnt} vrfs for table ${vrftable}"
+}
+
+del_vrf_and_check()
+{
+ local nsname=$1
+ local vrfname=$2
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link del ${vrfname}
+ log_test $? 0 "${nsname}: remove vrf ${vrfname}"
+}
+
+config_vrf_and_check()
+{
+ local nsname=$1
+ local addr=$2
+ local vrfname=$3
+ local nsarg="$(ip_expand_args ${nsname})"
+
+ ip ${nsarg} link set dev ${vrfname} up && \
+ ip ${nsarg} addr add ${addr} dev ${vrfname}
+ log_test $? 0 "${nsname}: vrf ${vrfname} up, addr ${addr}"
+}
+
+read_strict_mode()
+{
+ local nsname=$1
+ local rval
+ local rc=0
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ rval="$(${nsexec} bash -c "cat /proc/sys/net/vrf/strict_mode" | \
+ grep -E "^[0-1]$")" &> /dev/null
+ if [ $? -ne 0 ]; then
+ # set errors
+ rval=255
+ rc=1
+ fi
+
+ # on success, rval can be only 0 or 1; on error, rval is equal to 255
+ echo ${rval}
+ return ${rc}
+}
+
+read_strict_mode_compare_and_check()
+{
+ local nsname=$1
+ local expected=$2
+ local res
+
+ res="$(read_strict_mode ${nsname})"
+ log_test ${res} ${expected} "${nsname}: check strict_mode=${res}"
+}
+
+set_strict_mode()
+{
+ local nsname=$1
+ local val=$2
+ local nsexec=""
+
+ if [ "${nsname}" != "${INIT_NETNS_NAME}" ]; then
+ # a custom network namespace is provided
+ nsexec="ip netns exec ${nsname}"
+ fi
+
+ ${nsexec} bash -c "echo ${val} >/proc/sys/net/vrf/strict_mode" &>/dev/null
+}
+
+enable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 1
+}
+
+disable_strict_mode()
+{
+ local nsname=$1
+
+ set_strict_mode ${nsname} 0
+}
+
+disable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ disable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: disable strict_mode (=0)"
+}
+
+enable_strict_mode_and_check()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 0 "${nsname}: enable strict_mode (=1)"
+}
+
+enable_strict_mode_and_check_fail()
+{
+ local nsname=$1
+
+ enable_strict_mode ${nsname}
+ log_test $? 1 "${nsname}: CANNOT enable strict_mode"
+}
+
+strict_mode_check_default()
+{
+ local nsname=$1
+ local strictmode
+ local vrfcnt
+
+ vrfcnt=$(vrf_count ${nsname})
+ strictmode=$(read_strict_mode ${nsname})
+ log_test ${strictmode} 0 "${nsname}: strict_mode=0 by default, ${vrfcnt} vrfs"
+}
+
+setup()
+{
+ modprobe vrf
+
+ setup_ns testns
+}
+
+cleanup()
+{
+ ip netns del $testns 2>/dev/null
+
+ ip link del vrf100 2>/dev/null
+ ip link del vrf101 2>/dev/null
+ ip link del vrf102 2>/dev/null
+
+ echo 0 >/proc/sys/net/vrf/strict_mode 2>/dev/null
+}
+
+vrf_strict_mode_tests_init()
+{
+ log_section "VRF strict_mode test on init network namespace"
+
+ vrf_strict_mode_check_support init
+
+ strict_mode_check_default init
+
+ add_vrf_and_check init vrf100 100
+ config_vrf_and_check init 172.16.100.1/24 vrf100
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check_fail init vrf101 100
+
+ disable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf101 100
+ config_vrf_and_check init 172.16.101.1/24 vrf101
+
+ enable_strict_mode_and_check_fail init
+
+ del_vrf_and_check init vrf101
+
+ enable_strict_mode_and_check init
+
+ add_vrf_and_check init vrf102 102
+ config_vrf_and_check init 172.16.102.1/24 vrf102
+
+ # the strict_modle is enabled in the init
+}
+
+vrf_strict_mode_tests_testns()
+{
+ log_section "VRF strict_mode test on testns network namespace"
+
+ vrf_strict_mode_check_support $testns
+
+ strict_mode_check_default $testns
+
+ enable_strict_mode_and_check $testns
+
+ add_vrf_and_check $testns vrf100 100
+ config_vrf_and_check $testns 10.0.100.1/24 vrf100
+
+ add_vrf_and_check_fail $testns vrf101 100
+
+ add_vrf_and_check_fail $testns vrf102 100
+
+ add_vrf_and_check $testns vrf200 200
+
+ disable_strict_mode_and_check $testns
+
+ add_vrf_and_check $testns vrf101 100
+
+ add_vrf_and_check $testns vrf102 100
+
+ #the strict_mode is disabled in the $testns
+}
+
+vrf_strict_mode_tests_mix()
+{
+ log_section "VRF strict_mode test mixing init and testns network namespaces"
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check $testns 0
+
+ del_vrf_and_check $testns vrf101
+
+ del_vrf_and_check $testns vrf102
+
+ disable_strict_mode_and_check init
+
+ enable_strict_mode_and_check $testns
+
+ enable_strict_mode_and_check init
+ enable_strict_mode_and_check init
+
+ disable_strict_mode_and_check $testns
+ disable_strict_mode_and_check $testns
+
+ read_strict_mode_compare_and_check init 1
+
+ read_strict_mode_compare_and_check $testns 0
+}
+
+################################################################################
+# usage
+
+usage()
+{
+ cat <<EOF
+usage: ${0##*/} OPTS
+
+ -t <test> Test(s) to run (default: all)
+ (options: $TESTS)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts ":t:h" opt; do
+ case $opt in
+ t) TESTS=$OPTARG;;
+ h) usage; exit 0;;
+ *) usage; exit 1;;
+ esac
+done
+
+vrf_strict_mode_check_support()
+{
+ local nsname=$1
+ local output
+ local rc
+
+ output="$(lsmod | grep '^vrf' | awk '{print $1}')"
+ if [ -z "${output}" ]; then
+ modinfo vrf || return $?
+ fi
+
+ # we do not care about the value of the strict_mode; we only check if
+ # the strict_mode parameter is available or not.
+ read_strict_mode ${nsname} &>/dev/null; rc=$?
+ log_test ${rc} 0 "${nsname}: net.vrf.strict_mode is available"
+
+ return ${rc}
+}
+
+if [ "$(id -u)" -ne 0 ];then
+ echo "SKIP: Need root privileges"
+ exit $ksft_skip
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+modprobe vrf &>/dev/null
+if [ ! -e /proc/sys/net/vrf/strict_mode ]; then
+ echo "SKIP: vrf sysctl does not exist"
+ exit $ksft_skip
+fi
+
+cleanup &> /dev/null
+
+setup
+for t in $TESTS
+do
+ case $t in
+ vrf_strict_mode_tests_init|init) vrf_strict_mode_tests_init;;
+ vrf_strict_mode_tests_testns|testns) vrf_strict_mode_tests_testns;;
+ vrf_strict_mode_tests_mix|mix) vrf_strict_mode_tests_mix;;
+
+ help) echo "Test names: $TESTS"; exit 0;;
+
+ esac
+done
+cleanup
+
+print_log_test_results
+
+exit $ret
diff --git a/tools/testing/selftests/net/xdp_dummy.c b/tools/testing/selftests/net/xdp_dummy.c
new file mode 100644
index 000000000000..d988b2e0cee8
--- /dev/null
+++ b/tools/testing/selftests/net/xdp_dummy.c
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define KBUILD_MODNAME "xdp_dummy"
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+ return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh
index 7a1bf94c5bd3..457789530645 100755
--- a/tools/testing/selftests/net/xfrm_policy.sh
+++ b/tools/testing/selftests/net/xfrm_policy.sh
@@ -18,8 +18,7 @@
# ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception)
# ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception)
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
+source lib.sh
ret=0
policy_checks_ok=1
@@ -202,26 +201,26 @@ check_xfrm() {
# 1: iptables -m policy rule count != 0
rval=$1
ip=$2
- lret=0
+ local lret=0
- ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null
+ ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null
- check_ipt_policy_count ns3
+ check_ipt_policy_count ${ns[3]}
if [ $? -ne $rval ] ; then
lret=1
fi
- check_ipt_policy_count ns4
+ check_ipt_policy_count ${ns[4]}
if [ $? -ne $rval ] ; then
lret=1
fi
- ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null
+ ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null
- check_ipt_policy_count ns3
+ check_ipt_policy_count ${ns[3]}
if [ $? -ne $rval ] ; then
lret=1
fi
- check_ipt_policy_count ns4
+ check_ipt_policy_count ${ns[4]}
if [ $? -ne $rval ] ; then
lret=1
fi
@@ -270,11 +269,11 @@ check_hthresh_repeat()
i=0
for i in $(seq 1 10);do
- ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
- ip -net ns1 xfrm policy set hthresh6 0 28 || break
+ ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+ ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
- ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
- ip -net ns1 xfrm policy set hthresh6 0 28 || break
+ ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break
+ ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break
done
if [ $i -ne 10 ] ;then
@@ -287,6 +286,47 @@ check_hthresh_repeat()
return 0
}
+# insert non-overlapping policies in a random order and check that
+# all of them can be fetched using the traffic selectors.
+check_random_order()
+{
+ local ns=$1
+ local log=$2
+
+ for i in $(seq 100); do
+ ip -net $ns xfrm policy flush
+ for j in $(seq 0 16 255 | sort -R); do
+ ip -net $ns xfrm policy add dst $j.0.0.0/24 dir out priority 10 action allow
+ done
+ for j in $(seq 0 16 255); do
+ if ! ip -net $ns xfrm policy get dst $j.0.0.0/24 dir out > /dev/null; then
+ echo "FAIL: $log" 1>&2
+ return 1
+ fi
+ done
+ done
+
+ for i in $(seq 100); do
+ ip -net $ns xfrm policy flush
+ for j in $(seq 0 16 255 | sort -R); do
+ local addr=$(printf "e000:0000:%02x00::/56" $j)
+ ip -net $ns xfrm policy add dst $addr dir out priority 10 action allow
+ done
+ for j in $(seq 0 16 255); do
+ local addr=$(printf "e000:0000:%02x00::/56" $j)
+ if ! ip -net $ns xfrm policy get dst $addr dir out > /dev/null; then
+ echo "FAIL: $log" 1>&2
+ return 1
+ fi
+ done
+ done
+
+ ip -net $ns xfrm policy flush
+
+ echo "PASS: $log"
+ return 0
+}
+
#check for needed privileges
if [ "$(id -u)" -ne 0 ];then
echo "SKIP: Need root privileges"
@@ -306,79 +346,80 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
-for i in 1 2 3 4; do
- ip netns add ns$i
- ip -net ns$i link set lo up
-done
+setup_ns ns1 ns2 ns3 ns4
+ns[1]=$ns1
+ns[2]=$ns2
+ns[3]=$ns3
+ns[4]=$ns4
DEV=veth0
-ip link add $DEV netns ns1 type veth peer name eth1 netns ns3
-ip link add $DEV netns ns2 type veth peer name eth1 netns ns4
+ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]}
+ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]}
-ip link add $DEV netns ns3 type veth peer name veth0 netns ns4
+ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]}
DEV=veth0
for i in 1 2; do
- ip -net ns$i link set $DEV up
- ip -net ns$i addr add 10.0.$i.2/24 dev $DEV
- ip -net ns$i addr add dead:$i::2/64 dev $DEV
-
- ip -net ns$i addr add 10.0.$i.253 dev $DEV
- ip -net ns$i addr add 10.0.$i.254 dev $DEV
- ip -net ns$i addr add dead:$i::fd dev $DEV
- ip -net ns$i addr add dead:$i::fe dev $DEV
+ ip -net ${ns[$i]} link set $DEV up
+ ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV
+ ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV
+
+ ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV
+ ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV
+ ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV
+ ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV
done
for i in 3 4; do
-ip -net ns$i link set eth1 up
-ip -net ns$i link set veth0 up
+ ip -net ${ns[$i]} link set eth1 up
+ ip -net ${ns[$i]} link set veth0 up
done
-ip -net ns1 route add default via 10.0.1.1
-ip -net ns2 route add default via 10.0.2.1
+ip -net ${ns[1]} route add default via 10.0.1.1
+ip -net ${ns[2]} route add default via 10.0.2.1
-ip -net ns3 addr add 10.0.1.1/24 dev eth1
-ip -net ns3 addr add 10.0.3.1/24 dev veth0
-ip -net ns3 addr add 2001:1::1/64 dev eth1
-ip -net ns3 addr add 2001:3::1/64 dev veth0
+ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1
+ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0
+ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1
+ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0
-ip -net ns3 route add default via 10.0.3.10
+ip -net ${ns[3]} route add default via 10.0.3.10
-ip -net ns4 addr add 10.0.2.1/24 dev eth1
-ip -net ns4 addr add 10.0.3.10/24 dev veth0
-ip -net ns4 addr add 2001:2::1/64 dev eth1
-ip -net ns4 addr add 2001:3::10/64 dev veth0
-ip -net ns4 route add default via 10.0.3.1
+ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1
+ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0
+ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1
+ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0
+ip -net ${ns[4]} route add default via 10.0.3.1
for j in 4 6; do
for i in 3 4;do
- ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
- ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null
+ ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null
done
done
# abuse iptables rule counter to check if ping matches a policy
-ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
-ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
+ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec
if [ $? -ne 0 ];then
echo "SKIP: Could not insert iptables rule"
- for i in 1 2 3 4;do ip netns del ns$i;done
+ cleanup_ns $ns1 $ns2 $ns3 $ns4
exit $ksft_skip
fi
# localip remoteip localnet remotenet
-do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
-do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
+do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2
+do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1
-do_dummies4 ns3
-do_dummies6 ns4
+do_dummies4 ${ns[3]}
+do_dummies6 ${ns[4]}
-do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24
-do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24
-do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64
-do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64
+do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24
+do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24
+do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64
+do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64
# ping to .254 should use ipsec, exception is not installed.
check_xfrm 1 254
@@ -391,11 +432,11 @@ fi
# installs exceptions
# localip remoteip encryptdst plaindst
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
-do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28
-do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
-do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
+do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96
+do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96
check_exceptions "exceptions"
if [ $? -ne 0 ]; then
@@ -403,14 +444,14 @@ if [ $? -ne 0 ]; then
fi
# insert block policies with adjacent/overlapping netmasks
-do_overlap ns3
+do_overlap ${ns[3]}
check_exceptions "exceptions and block policies"
if [ $? -ne 0 ]; then
ret=1
fi
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125
sleep $((RANDOM%5))
done
@@ -418,19 +459,19 @@ done
check_exceptions "exceptions and block policies after hresh changes"
# full flush of policy db, check everything gets freed incl. internal meta data
-ip -net ns3 xfrm policy flush
+ip -net ${ns[3]} xfrm policy flush
-do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
-do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
+do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24
+do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28
# move inexact policies to hash table
-ip -net ns3 xfrm policy set hthresh4 16 16
+ip -net ${ns[3]} xfrm policy set hthresh4 16 16
sleep $((RANDOM%5))
check_exceptions "exceptions and block policies after hthresh change in ns3"
# restore original hthresh settings -- move policies back to tables
-for n in ns3 ns4;do
+for n in ${ns[3]} ${ns[4]};do
ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128
sleep $((RANDOM%5))
done
@@ -438,6 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal"
check_hthresh_repeat "policies with repeated htresh change"
-for i in 1 2 3 4;do ip netns del ns$i;done
+check_random_order ${ns[3]} "policies inserted in random order"
+
+cleanup_ns $ns1 $ns2 $ns3 $ns4
exit $ret
diff --git a/tools/testing/selftests/netfilter/.gitignore b/tools/testing/selftests/netfilter/.gitignore
new file mode 100644
index 000000000000..c2229b3e40d4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/.gitignore
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-only
+nf-queue
+connect_close
+audit_logread
+conntrack_dump_flush
+sctp_collision
diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile
index a179f0dca8ce..936c3085bb83 100644
--- a/tools/testing/selftests/netfilter/Makefile
+++ b/tools/testing/selftests/netfilter/Makefile
@@ -1,12 +1,21 @@
# SPDX-License-Identifier: GPL-2.0
# Makefile for netfilter selftests
-TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
+TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \
conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
nft_concat_range.sh nft_conntrack_helper.sh \
- nft_queue.sh
+ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
+ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \
+ conntrack_vrf.sh nft_synproxy.sh rpath.sh nft_audit.sh \
+ conntrack_sctp_collision.sh xt_string.sh \
+ bridge_netfilter.sh
-LDLIBS = -lmnl
-TEST_GEN_FILES = nf-queue
+HOSTPKG_CONFIG := pkg-config
+
+CFLAGS += $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null)
+LDLIBS += $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl)
+
+TEST_GEN_FILES = nf-queue connect_close audit_logread sctp_collision \
+ conntrack_dump_flush
include ../lib.mk
diff --git a/tools/testing/selftests/netfilter/audit_logread.c b/tools/testing/selftests/netfilter/audit_logread.c
new file mode 100644
index 000000000000..a0a880fc2d9d
--- /dev/null
+++ b/tools/testing/selftests/netfilter/audit_logread.c
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+#include <linux/audit.h>
+#include <linux/netlink.h>
+
+static int fd;
+
+#define MAX_AUDIT_MESSAGE_LENGTH 8970
+struct audit_message {
+ struct nlmsghdr nlh;
+ union {
+ struct audit_status s;
+ char data[MAX_AUDIT_MESSAGE_LENGTH];
+ } u;
+};
+
+int audit_recv(int fd, struct audit_message *rep)
+{
+ struct sockaddr_nl addr;
+ socklen_t addrlen = sizeof(addr);
+ int ret;
+
+ do {
+ ret = recvfrom(fd, rep, sizeof(*rep), 0,
+ (struct sockaddr *)&addr, &addrlen);
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret < 0 ||
+ addrlen != sizeof(addr) ||
+ addr.nl_pid != 0 ||
+ rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */
+ return -1;
+
+ return ret;
+}
+
+int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val)
+{
+ static int seq = 0;
+ struct audit_message msg = {
+ .nlh = {
+ .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)),
+ .nlmsg_type = type,
+ .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK,
+ .nlmsg_seq = ++seq,
+ },
+ .u.s = {
+ .mask = key,
+ .enabled = key == AUDIT_STATUS_ENABLED ? val : 0,
+ .pid = key == AUDIT_STATUS_PID ? val : 0,
+ }
+ };
+ struct sockaddr_nl addr = {
+ .nl_family = AF_NETLINK,
+ };
+ int ret;
+
+ do {
+ ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0,
+ (struct sockaddr *)&addr, sizeof(addr));
+ } while (ret < 0 && errno == EINTR);
+
+ if (ret != (int)msg.nlh.nlmsg_len)
+ return -1;
+ return 0;
+}
+
+int audit_set(int fd, uint32_t key, uint32_t val)
+{
+ struct audit_message rep = { 0 };
+ int ret;
+
+ ret = audit_send(fd, AUDIT_SET, key, val);
+ if (ret)
+ return ret;
+
+ ret = audit_recv(fd, &rep);
+ if (ret < 0)
+ return ret;
+ return 0;
+}
+
+int readlog(int fd)
+{
+ struct audit_message rep = { 0 };
+ int ret = audit_recv(fd, &rep);
+ const char *sep = "";
+ char *k, *v;
+
+ if (ret < 0)
+ return ret;
+
+ if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG)
+ return 0;
+
+ /* skip the initial "audit(...): " part */
+ strtok(rep.u.data, " ");
+
+ while ((k = strtok(NULL, "="))) {
+ v = strtok(NULL, " ");
+
+ /* these vary and/or are uninteresting, ignore */
+ if (!strcmp(k, "pid") ||
+ !strcmp(k, "comm") ||
+ !strcmp(k, "subj"))
+ continue;
+
+ /* strip the varying sequence number */
+ if (!strcmp(k, "table"))
+ *strchrnul(v, ':') = '\0';
+
+ printf("%s%s=%s", sep, k, v);
+ sep = " ";
+ }
+ if (*sep) {
+ printf("\n");
+ fflush(stdout);
+ }
+ return 0;
+}
+
+void cleanup(int sig)
+{
+ audit_set(fd, AUDIT_STATUS_ENABLED, 0);
+ close(fd);
+ if (sig)
+ exit(0);
+}
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_handler = cleanup,
+ };
+
+ fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT);
+ if (fd < 0) {
+ perror("Can't open netlink socket");
+ return -1;
+ }
+
+ if (sigaction(SIGTERM, &act, NULL) < 0 ||
+ sigaction(SIGINT, &act, NULL) < 0) {
+ perror("Can't set signal handler");
+ close(fd);
+ return -1;
+ }
+
+ audit_set(fd, AUDIT_STATUS_ENABLED, 1);
+ audit_set(fd, AUDIT_STATUS_PID, getpid());
+
+ while (1)
+ readlog(fd);
+}
diff --git a/tools/testing/selftests/netfilter/bridge_netfilter.sh b/tools/testing/selftests/netfilter/bridge_netfilter.sh
new file mode 100644
index 000000000000..659b3ab02c8b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/bridge_netfilter.sh
@@ -0,0 +1,188 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test bridge netfilter + conntrack, a combination that doesn't really work,
+# with multicast/broadcast packets racing for hash table insertion.
+
+# eth0 br0 eth0
+# setup is: ns1 <->,ns0 <-> ns3
+# ns2 <-' `'-> ns4
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+ns3="ns3-$sfx"
+ns4="ns4-$sfx"
+
+ebtables -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ebtables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+for i in $(seq 0 4); do
+ eval ip netns add \$ns$i
+done
+
+cleanup() {
+ for i in $(seq 0 4); do eval ip netns del \$ns$i;done
+}
+
+trap cleanup EXIT
+
+do_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ ip netns exec $fromns ping -c 1 -q $dstip > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ ret=1
+ fi
+}
+
+bcast_ping()
+{
+ fromns="$1"
+ dstip="$2"
+
+ for i in $(seq 1 1000); do
+ ip netns exec $fromns ping -q -f -b -c 1 -q $dstip > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "ERROR: ping -b from $fromns to $dstip"
+ ip netns exec ${ns0} nft list ruleset
+ fi
+ done
+}
+
+ip link add veth1 netns ${ns0} type veth peer name eth0 netns ${ns1}
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create veth device"
+ exit $ksft_skip
+fi
+
+ip link add veth2 netns ${ns0} type veth peer name eth0 netns $ns2
+ip link add veth3 netns ${ns0} type veth peer name eth0 netns $ns3
+ip link add veth4 netns ${ns0} type veth peer name eth0 netns $ns4
+
+ip -net ${ns0} link set lo up
+
+for i in $(seq 1 4); do
+ ip -net ${ns0} link set veth$i up
+done
+
+ip -net ${ns0} link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1
+if [ $? -ne 0 ]; then
+ echo "SKIP: Can't create bridge br0"
+ exit $ksft_skip
+fi
+
+# make veth0,1,2 part of bridge.
+for i in $(seq 1 3); do
+ ip -net ${ns0} link set veth$i master br0
+done
+
+# add a macvlan on top of the bridge.
+MACVLAN_ADDR=ba:f3:13:37:42:23
+ip -net ${ns0} link add link br0 name macvlan0 type macvlan mode private
+ip -net ${ns0} link set macvlan0 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan0 up
+ip -net ${ns0} addr add 10.23.0.1/24 dev macvlan0
+
+# add a macvlan on top of veth4.
+MACVLAN_ADDR=ba:f3:13:37:42:24
+ip -net ${ns0} link add link veth4 name macvlan4 type macvlan mode vepa
+ip -net ${ns0} link set macvlan4 address ${MACVLAN_ADDR}
+ip -net ${ns0} link set macvlan4 up
+
+# make the macvlan part of the bridge.
+# veth4 is not a bridge port, only the macvlan on top of it.
+ip -net ${ns0} link set macvlan4 master br0
+
+ip -net ${ns0} link set br0 up
+ip -net ${ns0} addr add 10.0.0.1/24 dev br0
+ip netns exec ${ns0} sysctl -q net.bridge.bridge-nf-call-iptables=1
+ret=$?
+if [ $ret -ne 0 ] ; then
+ echo "SKIP: bridge netfilter not available"
+ ret=$ksft_skip
+fi
+
+# for testing, so namespaces will reply to ping -b probes.
+ip netns exec ${ns0} sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0
+
+# enable conntrack in ns0 and drop broadcast packets in forward to
+# avoid them from getting confirmed in the postrouting hook before
+# the cloned skb is passed up the stack.
+ip netns exec ${ns0} nft -f - <<EOF
+table ip filter {
+ chain input {
+ type filter hook input priority 1; policy accept
+ iifname br0 counter
+ ct state new accept
+ }
+}
+
+table bridge filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept
+ meta pkttype broadcast ip protocol icmp counter drop
+ }
+}
+EOF
+
+# place 1, 2 & 3 in same subnet, connected via ns0:br0.
+# ns4 is placed in same subnet as well, but its not
+# part of the bridge: the corresponding veth4 is not
+# part of the bridge, only its macvlan interface.
+for i in $(seq 1 4); do
+ eval ip -net \$ns$i link set lo up
+ eval ip -net \$ns$i link set eth0 up
+done
+for i in $(seq 1 2); do
+ eval ip -net \$ns$i addr add 10.0.0.1$i/24 dev eth0
+done
+
+ip -net ${ns3} addr add 10.23.0.13/24 dev eth0
+ip -net ${ns4} addr add 10.23.0.14/24 dev eth0
+
+# test basic connectivity
+do_ping ${ns1} 10.0.0.12
+do_ping ${ns3} 10.23.0.1
+do_ping ${ns4} 10.23.0.1
+
+if [ $ret -eq 0 ];then
+ echo "PASS: netns connectivity: ns1 can reach ns2, ns3 and ns4 can reach ns0"
+fi
+
+bcast_ping ${ns1} 10.0.0.255
+
+# This should deliver broadcast to macvlan0, which is on top of ns0:br0.
+bcast_ping ${ns3} 10.23.0.255
+
+# same, this time via veth4:macvlan4.
+bcast_ping ${ns4} 10.23.0.255
+
+read t < /proc/sys/kernel/tainted
+
+if [ $t -eq 0 ];then
+ echo PASS: kernel not tainted
+else
+ echo ERROR: kernel is tainted
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/config b/tools/testing/selftests/netfilter/config
index 4faf2ce021d9..7c42b1b2c69b 100644
--- a/tools/testing/selftests/netfilter/config
+++ b/tools/testing/selftests/netfilter/config
@@ -6,3 +6,4 @@ CONFIG_NFT_REDIR=m
CONFIG_NFT_MASQ=m
CONFIG_NFT_FLOW_OFFLOAD=m
CONFIG_NF_CT_NETLINK=m
+CONFIG_AUDIT=y
diff --git a/tools/testing/selftests/netfilter/connect_close.c b/tools/testing/selftests/netfilter/connect_close.c
new file mode 100644
index 000000000000..1c3b0add54c4
--- /dev/null
+++ b/tools/testing/selftests/netfilter/connect_close.c
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <string.h>
+#include <unistd.h>
+#include <signal.h>
+
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#define PORT 12345
+#define RUNTIME 10
+
+static struct {
+ unsigned int timeout;
+ unsigned int port;
+} opts = {
+ .timeout = RUNTIME,
+ .port = PORT,
+};
+
+static void handler(int sig)
+{
+ _exit(sig == SIGALRM ? 0 : 1);
+}
+
+static void set_timeout(void)
+{
+ struct sigaction action = {
+ .sa_handler = handler,
+ };
+
+ sigaction(SIGALRM, &action, NULL);
+
+ alarm(opts.timeout);
+}
+
+static void do_connect(const struct sockaddr_in *dst)
+{
+ int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s >= 0)
+ fcntl(s, F_SETFL, O_NONBLOCK);
+
+ connect(s, (struct sockaddr *)dst, sizeof(*dst));
+ close(s);
+}
+
+static void do_accept(const struct sockaddr_in *src)
+{
+ int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+
+ if (s < 0)
+ return;
+
+ setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one));
+ setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one));
+
+ bind(s, (struct sockaddr *)src, sizeof(*src));
+
+ listen(s, 16);
+
+ c = accept(s, NULL, NULL);
+ if (c >= 0)
+ close(c);
+
+ close(s);
+}
+
+static int accept_loop(void)
+{
+ struct sockaddr_in src = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &src.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_accept(&src);
+
+ return 1;
+}
+
+static int connect_loop(void)
+{
+ struct sockaddr_in dst = {
+ .sin_family = AF_INET,
+ .sin_port = htons(opts.port),
+ };
+
+ inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr);
+
+ set_timeout();
+
+ for (;;)
+ do_connect(&dst);
+
+ return 1;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+ int c;
+
+ while ((c = getopt(argc, argv, "t:p:")) != -1) {
+ switch (c) {
+ case 't':
+ opts.timeout = atoi(optarg);
+ break;
+ case 'p':
+ opts.port = atoi(optarg);
+ break;
+ }
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ pid_t p;
+
+ parse_opts(argc, argv);
+
+ p = fork();
+ if (p < 0)
+ return 111;
+
+ if (p > 0)
+ return accept_loop();
+
+ return connect_loop();
+}
diff --git a/tools/testing/selftests/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
new file mode 100644
index 000000000000..b11ea8ee6719
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_dump_flush.c
@@ -0,0 +1,471 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <time.h>
+#include <libmnl/libmnl.h>
+#include <netinet/ip.h>
+
+#include <linux/netlink.h>
+#include <linux/netfilter/nfnetlink.h>
+#include <linux/netfilter/nfnetlink_conntrack.h>
+#include <linux/netfilter/nf_conntrack_tcp.h>
+#include "../kselftest_harness.h"
+
+#define TEST_ZONE_ID 123
+#define NF_CT_DEFAULT_ZONE_ID 0
+
+static int reply_counter;
+
+static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type,
+ uint32_t src_ip, uint32_t dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip);
+ mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type,
+ struct in6_addr src_ip, struct in6_addr dst_ip,
+ uint16_t src_port, uint16_t dst_port)
+{
+ struct nlattr *nest, *nest_ip, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, type);
+ if (!nest)
+ return -1;
+
+ nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP);
+ if (!nest_ip)
+ return -1;
+ mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip);
+ mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip);
+ mnl_attr_nest_end(nlh, nest_ip);
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6);
+ mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port));
+ mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port));
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int build_cta_proto(struct nlmsghdr *nlh)
+{
+ struct nlattr *nest, *nest_proto;
+
+ nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO);
+ if (!nest)
+ return -1;
+
+ nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP);
+ if (!nest_proto)
+ return -1;
+ mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a);
+ mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a);
+ mnl_attr_nest_end(nlh, nest_proto);
+
+ mnl_attr_nest_end(nlh, nest);
+}
+
+static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *rplnlh;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ ret = build_cta_proto(nlh);
+ if (ret < 0) {
+ perror("build_cta_proto");
+ return -1;
+ }
+ mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000));
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) {
+ perror("mnl_socket_sendto");
+ return -1;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ if (errno == EEXIST) {
+ /* The entries are probably still there from a previous
+ * run. So we are good
+ */
+ return 0;
+ }
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip,
+ uint32_t dst_ip, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345);
+ if (ret < 0) {
+ perror("build_cta_tuple_v4");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int conntrack_data_generate_v6(struct mnl_socket *sock,
+ struct in6_addr src_ip,
+ struct in6_addr dst_ip,
+ uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh;
+ struct nfgenmsg *nfh;
+ int ret;
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE |
+ NLM_F_ACK | NLM_F_EXCL;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_INET6;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip,
+ 12345, 443);
+ if (ret < 0) {
+ perror("build_cta_tuple_v6");
+ return ret;
+ }
+ return conntrack_data_insert(sock, nlh, zone);
+}
+
+static int count_entries(const struct nlmsghdr *nlh, void *data)
+{
+ reply_counter++;
+}
+
+static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ reply_counter = 0;
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ while (ret > 0) {
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid,
+ count_entries, NULL);
+ if (ret <= MNL_CB_STOP)
+ break;
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ }
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ return reply_counter;
+}
+
+static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone)
+{
+ char buf[MNL_SOCKET_BUFFER_SIZE];
+ struct nlmsghdr *nlh, *rplnlh;
+ struct nfgenmsg *nfh;
+ struct nlattr *nest;
+ unsigned int portid;
+ int err, ret;
+
+ portid = mnl_socket_get_portid(sock);
+
+ nlh = mnl_nlmsg_put_header(buf);
+ nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE;
+ nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK;
+ nlh->nlmsg_seq = time(NULL);
+
+ nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg));
+ nfh->nfgen_family = AF_UNSPEC;
+ nfh->version = NFNETLINK_V0;
+ nfh->res_id = 0;
+
+ mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone));
+
+ ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len);
+ if (ret < 0) {
+ perror("mnl_socket_sendto");
+ return ret;
+ }
+
+ ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE);
+ if (ret < 0) {
+ perror("mnl_socket_recvfrom");
+ return ret;
+ }
+
+ ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL);
+ if (ret < 0) {
+ perror("mnl_cb_run");
+ return ret;
+ }
+
+ return 0;
+}
+
+FIXTURE(conntrack_dump_flush)
+{
+ struct mnl_socket *sock;
+};
+
+FIXTURE_SETUP(conntrack_dump_flush)
+{
+ struct in6_addr src, dst;
+ int ret;
+
+ self->sock = mnl_socket_open(NETLINK_NETFILTER);
+ if (!self->sock) {
+ perror("mnl_socket_open");
+ exit(EXIT_FAILURE);
+ }
+
+ if (mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID) < 0) {
+ perror("mnl_socket_bind");
+ exit(EXIT_FAILURE);
+ }
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ if (ret < 0 && errno == EPERM)
+ SKIP(return, "Needs to be run as root");
+ else if (ret < 0 && errno == EOPNOTSUPP)
+ SKIP(return, "Kernel does not seem to support conntrack zones");
+
+ ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+ ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x01000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x02000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x03000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x04000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 0);
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x05000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x06000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 0);
+
+ src = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x07000000
+ }
+ }};
+ dst = (struct in6_addr) {{
+ .__u6_addr32 = {
+ 0xb80d0120,
+ 0x00000000,
+ 0x00000000,
+ 0x08000000
+ }
+ }};
+ ret = conntrack_data_generate_v6(self->sock, src, dst,
+ NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_GE(ret, 2);
+ if (ret > 2)
+ SKIP(return, "kernel does not support filtering by zone");
+}
+
+FIXTURE_TEARDOWN(conntrack_dump_flush)
+{
+}
+
+TEST_F(conntrack_dump_flush, test_dump_by_zone)
+{
+ int ret;
+
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+}
+
+TEST_F(conntrack_dump_flush, test_flush_by_zone_default)
+{
+ int ret;
+
+ ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2);
+ EXPECT_EQ(ret, 2);
+ ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID);
+ EXPECT_EQ(ret, 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
index b48e1833bc89..76645aaf2b58 100755
--- a/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
+++ b/tools/testing/selftests/netfilter/conntrack_icmp_related.sh
@@ -35,6 +35,8 @@ cleanup() {
for i in 1 2;do ip netns del nsrouter$i;done
}
+trap cleanup EXIT
+
ipv4() {
echo -n 192.168.$1.2
}
@@ -146,11 +148,17 @@ ip netns exec nsclient1 nft -f - <<EOF
table inet filter {
counter unknown { }
counter related { }
+ counter redir4 { }
+ counter redir6 { }
chain input {
type filter hook input priority 0; policy accept;
- meta l4proto { icmp, icmpv6 } ct state established,untracked accept
+ icmp type "redirect" ct state "related" counter name "redir4" accept
+ icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept
+
+ meta l4proto { icmp, icmpv6 } ct state established,untracked accept
meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept
+
counter name "unknown" drop
}
}
@@ -279,5 +287,29 @@ else
echo "ERROR: icmp error RELATED state test has failed"
fi
-cleanup
+# add 'bad' route, expect icmp REDIRECT to be generated
+ip netns exec nsclient1 ip route add 192.168.1.42 via 192.168.1.1
+ip netns exec nsclient1 ip route add dead:1::42 via dead:1::1
+
+ip netns exec "nsclient1" ping -q -c 2 192.168.1.42 > /dev/null
+
+expect="packets 1 bytes 112"
+check_counter nsclient1 "redir4" "$expect"
+if [ $? -ne 0 ];then
+ ret=1
+fi
+
+ip netns exec "nsclient1" ping -c 1 dead:1::42 > /dev/null
+expect="packets 1 bytes 192"
+check_counter nsclient1 "redir6" "$expect"
+if [ $? -ne 0 ];then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: icmp redirects had RELATED state"
+else
+ echo "ERROR: icmp redirect RELATED state test has failed"
+fi
+
exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
new file mode 100755
index 000000000000..a924e595cfd8
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_sctp_collision.sh
@@ -0,0 +1,89 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Testing For SCTP COLLISION SCENARIO as Below:
+#
+# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359]
+# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187]
+# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359]
+# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO]
+# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK]
+# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970]
+#
+# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS
+
+CLIENT_NS=$(mktemp -u client-XXXXXXXX)
+CLIENT_IP="198.51.200.1"
+CLIENT_PORT=1234
+
+SERVER_NS=$(mktemp -u server-XXXXXXXX)
+SERVER_IP="198.51.100.1"
+SERVER_PORT=1234
+
+ROUTER_NS=$(mktemp -u router-XXXXXXXX)
+CLIENT_GW="198.51.200.2"
+SERVER_GW="198.51.100.2"
+
+# setup the topo
+setup() {
+ ip net add $CLIENT_NS
+ ip net add $SERVER_NS
+ ip net add $ROUTER_NS
+ ip -n $SERVER_NS link add link0 type veth peer name link1 netns $ROUTER_NS
+ ip -n $CLIENT_NS link add link3 type veth peer name link2 netns $ROUTER_NS
+
+ ip -n $SERVER_NS link set link0 up
+ ip -n $SERVER_NS addr add $SERVER_IP/24 dev link0
+ ip -n $SERVER_NS route add $CLIENT_IP dev link0 via $SERVER_GW
+
+ ip -n $ROUTER_NS link set link1 up
+ ip -n $ROUTER_NS link set link2 up
+ ip -n $ROUTER_NS addr add $SERVER_GW/24 dev link1
+ ip -n $ROUTER_NS addr add $CLIENT_GW/24 dev link2
+ ip net exec $ROUTER_NS sysctl -wq net.ipv4.ip_forward=1
+
+ ip -n $CLIENT_NS link set link3 up
+ ip -n $CLIENT_NS addr add $CLIENT_IP/24 dev link3
+ ip -n $CLIENT_NS route add $SERVER_IP dev link3 via $CLIENT_GW
+
+ # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with
+ # tc on $SERVER_NS side
+ tc -n $SERVER_NS qdisc add dev link0 root handle 1: htb
+ tc -n $SERVER_NS class add dev link0 parent 1: classid 1:1 htb rate 100mbit
+ tc -n $SERVER_NS filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \
+ 0xff match u8 2 0xff at 32 flowid 1:1
+ tc -n $SERVER_NS qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms
+
+ # simulate the ctstate check on OVS nf_conntrack
+ ip net exec $ROUTER_NS iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP
+ ip net exec $ROUTER_NS iptables -A INPUT -p sctp -j DROP
+
+ # use a smaller number for assoc's max_retrans to reproduce the issue
+ modprobe sctp
+ ip net exec $CLIENT_NS sysctl -wq net.sctp.association_max_retrans=3
+}
+
+cleanup() {
+ ip net exec $CLIENT_NS pkill sctp_collision 2>&1 >/dev/null
+ ip net exec $SERVER_NS pkill sctp_collision 2>&1 >/dev/null
+ ip net del "$CLIENT_NS"
+ ip net del "$SERVER_NS"
+ ip net del "$ROUTER_NS"
+}
+
+do_test() {
+ ip net exec $SERVER_NS ./sctp_collision server \
+ $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT &
+ ip net exec $CLIENT_NS ./sctp_collision client \
+ $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT
+}
+
+# NOTE: one way to work around the issue is set a smaller hb_interval
+# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500
+
+# run the test case
+trap cleanup EXIT
+setup && \
+echo "Test for SCTP Collision in nf_conntrack:" && \
+do_test && echo "PASS!"
+exit $?
diff --git a/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
new file mode 100755
index 000000000000..e7d7bf13cff5
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_tcp_unreplied.sh
@@ -0,0 +1,167 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that UNREPLIED tcp conntrack will eventually timeout.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+waittime=20
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+ipv4() {
+ echo -n 192.168.$1.2
+}
+
+check_counter()
+{
+ ns=$1
+ name=$2
+ expect=$3
+ local lret=0
+
+ cnt=$(ip netns exec $ns2 nft list counter inet filter "$name" | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2
+ ip netns exec $ns2 nft list counter inet filter "$name" 1>&2
+ lret=1
+ fi
+
+ return $lret
+}
+
+# Create test namespaces
+ip netns add $ns1 || exit 1
+
+trap cleanup EXIT
+
+ip netns add $ns2 || exit 1
+
+# Connect the namespace to the host using a veth pair
+ip -net $ns1 link add name veth1 type veth peer name veth2
+ip -net $ns1 link set netns $ns2 dev veth2
+
+ip -net $ns1 link set up dev lo
+ip -net $ns2 link set up dev lo
+ip -net $ns1 link set up dev veth1
+ip -net $ns2 link set up dev veth2
+
+ip -net $ns2 addr add 10.11.11.2/24 dev veth2
+ip -net $ns2 route add default via 10.11.11.1
+
+ip netns exec $ns2 sysctl -q net.ipv4.conf.veth2.forwarding=1
+
+# add a rule inside NS so we enable conntrack
+ip netns exec $ns1 iptables -A INPUT -m state --state established,related -j ACCEPT
+
+ip -net $ns1 addr add 10.11.11.1/24 dev veth1
+ip -net $ns1 route add 10.99.99.99 via 10.11.11.2
+
+# Check connectivity works
+ip netns exec $ns1 ping -q -c 2 10.11.11.2 >/dev/null || exit 1
+
+ip netns exec $ns2 nc -l -p 8080 < /dev/null &
+
+# however, conntrack entries are there
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+ counter connreq { }
+ counter redir { }
+ chain input {
+ type filter hook input priority 0; policy accept;
+ ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept
+ ct state new ct status dnat tcp dport 8080 counter name "redir" accept
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nft rules"
+ exit 1
+fi
+
+ip netns exec $ns2 sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10
+
+echo "INFO: connect $ns1 -> $ns2 to the virtual ip"
+ip netns exec $ns1 bash -c 'while true ; do
+ nc -p 60000 10.99.99.99 80
+ sleep 1
+ done' &
+
+sleep 1
+
+ip netns exec $ns2 nft -f - <<EOF
+table inet nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ ip daddr 10.99.99.99 tcp dport 80 redirect to :8080
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "ERROR: Could not load nat redirect"
+ exit 1
+fi
+
+count=$(ip netns exec $ns2 conntrack -L -p tcp --dport 80 2>/dev/null | wc -l)
+if [ $count -eq 0 ]; then
+ echo "ERROR: $ns2 did not pick up tcp connection from peer"
+ exit 1
+fi
+
+echo "INFO: NAT redirect added in ns $ns2, waiting for $waittime seconds for nat to take effect"
+for i in $(seq 1 $waittime); do
+ echo -n "."
+
+ sleep 1
+
+ count=$(ip netns exec $ns2 conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l)
+ if [ $count -gt 0 ]; then
+ echo
+ echo "PASS: redirection took effect after $i seconds"
+ break
+ fi
+
+ m=$((i%20))
+ if [ $m -eq 0 ]; then
+ echo " waited for $i seconds"
+ fi
+done
+
+expect="packets 1 bytes 60"
+check_counter "$ns2" "redir" "$expect"
+if [ $? -ne 0 ]; then
+ ret=1
+fi
+
+if [ $ret -eq 0 ];then
+ echo "PASS: redirection counter has expected values"
+else
+ echo "ERROR: no tcp connection was redirected"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/conntrack_vrf.sh b/tools/testing/selftests/netfilter/conntrack_vrf.sh
new file mode 100755
index 000000000000..8b5ea9234588
--- /dev/null
+++ b/tools/testing/selftests/netfilter/conntrack_vrf.sh
@@ -0,0 +1,241 @@
+#!/bin/sh
+
+# This script demonstrates interaction of conntrack and vrf.
+# The vrf driver calls the netfilter hooks again, with oif/iif
+# pointing at the VRF device.
+#
+# For ingress, this means first iteration has iifname of lower/real
+# device. In this script, thats veth0.
+# Second iteration is iifname set to vrf device, tvrf in this script.
+#
+# For egress, this is reversed: first iteration has the vrf device,
+# second iteration is done with the lower/real/veth0 device.
+#
+# test_ct_zone_in demonstrates unexpected change of nftables
+# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack
+# connection on VRF rcv"
+#
+# It was possible to assign conntrack zone to a packet (or mark it for
+# `notracking`) in the prerouting chain before conntrack, based on real iif.
+#
+# After the change, the zone assignment is lost and the zone is assigned based
+# on the VRF master interface (in case such a rule exists).
+# assignment is lost. Instead, assignment based on the `iif` matching
+# Thus it is impossible to distinguish packets based on the original
+# interface.
+#
+# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem
+# that was supposed to be fixed by the commit mentioned above to make sure
+# that any fix to test case 1 won't break masquerade again.
+
+ksft_skip=4
+
+IP0=172.30.30.1
+IP1=172.30.30.2
+PFXL=30
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+ns1="ns1-$sfx"
+
+cleanup()
+{
+ ip netns pids $ns0 | xargs kill 2>/dev/null
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+
+ ip netns del $ns0 $ns1
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns0"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns0"
+ exit $ksft_skip
+fi
+ip netns add "$ns1"
+
+trap cleanup EXIT
+
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.default.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+ip netns exec $ns0 sysctl -q -w net.ipv4.conf.all.rp_filter=0
+
+ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add veth device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li add tvrf type vrf table 9876
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ exit $ksft_skip
+fi
+
+ip -net $ns0 li set lo up
+
+ip -net $ns0 li set veth0 master tvrf
+ip -net $ns0 li set tvrf up
+ip -net $ns0 li set veth0 up
+ip -net $ns1 li set veth0 up
+
+ip -net $ns0 addr add $IP0/$PFXL dev veth0
+ip -net $ns1 addr add $IP1/$PFXL dev veth0
+
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1&
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not start iperf3"
+ exit $ksft_skip
+fi
+
+# test vrf ingress handling.
+# The incoming connection should be placed in conntrack zone 1,
+# as decided by the first iteration of the ruleset.
+test_ct_zone_in()
+{
+ip netns exec $ns0 nft -f - <<EOF
+table testct {
+ chain rawpre {
+ type filter hook prerouting priority raw;
+
+ iif { veth0, tvrf } counter meta nftrace set 1
+ iif veth0 counter ct zone set 1 counter return
+ iif tvrf counter ct zone set 2 counter return
+ ip protocol icmp counter
+ notrack counter
+ }
+
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif veth0 counter ct zone set 1 counter return
+ oif tvrf counter ct zone set 2 counter return
+ notrack counter
+ }
+}
+EOF
+ ip netns exec $ns1 ping -W 1 -c 1 -I veth0 $IP0 > /dev/null
+
+ # should be in zone 1, not zone 2
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "PASS: entry found in conntrack zone 1"
+ else
+ echo "FAIL: entry not found in conntrack zone 1"
+ count=$(ip netns exec $ns0 conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l)
+ if [ $count -eq 1 ]; then
+ echo "FAIL: entry found in zone 2 instead"
+ else
+ echo "FAIL: entry not in zone 1 or 2, dumping table"
+ ip netns exec $ns0 conntrack -L
+ ip netns exec $ns0 nft list ruleset
+ fi
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to vrf device.
+# This tests the first iteration of the packet through conntrack,
+# oifname is the vrf device.
+test_masquerade_vrf()
+{
+ local qdisc=$1
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc add dev tvrf root $qdisc
+ fi
+
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain rawout {
+ type filter hook output priority raw;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting2 {
+ type filter hook postrouting priority mangle;
+
+ oif tvrf ct state untracked counter
+ }
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ # NB: masquerade should always be combined with 'oif(name) bla',
+ # lack of this is intentional here, we want to exercise double-snat.
+ ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 >/dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on vrf device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2' &&
+ ip netns exec $ns0 nft list table ip nat |grep -q 'untracked counter packets [1-9]'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)"
+ else
+ echo "FAIL: vrf rules have unexpected counter value"
+ ret=1
+ fi
+
+ if [ "$qdisc" != "default" ]; then
+ tc -net $ns0 qdisc del dev tvrf root
+ fi
+}
+
+# add masq rule that gets evaluated w. outif set to veth device.
+# This tests the 2nd iteration of the packet through conntrack,
+# oifname is the lower device (veth0 in this case).
+test_masquerade_veth()
+{
+ ip netns exec $ns0 conntrack -F 2>/dev/null
+ip netns exec $ns0 nft -f - <<EOF
+flush ruleset
+table ip nat {
+ chain postrouting {
+ type nat hook postrouting priority 0;
+ meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random
+ }
+}
+EOF
+ ip netns exec $ns0 ip vrf exec tvrf iperf3 -t 1 -c $IP1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 connect failure with masquerade + sport rewrite on veth device"
+ ret=1
+ return
+ fi
+
+ # must also check that nat table was evaluated on second (lower device) iteration.
+ ip netns exec $ns0 nft list table ip nat |grep -q 'counter packets 2'
+ if [ $? -eq 0 ]; then
+ echo "PASS: iperf3 connect with masquerade + sport rewrite on veth device"
+ else
+ echo "FAIL: vrf masq rule has unexpected counter value"
+ ret=1
+ fi
+}
+
+test_ct_zone_in
+test_masquerade_vrf "default"
+test_masquerade_vrf "pfifo"
+test_masquerade_veth
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
new file mode 100755
index 000000000000..eb9553e4986b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/ipip-conntrack-mtu.sh
@@ -0,0 +1,207 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Conntrack needs to reassemble fragments in order to have complete
+# packets for rule matching. Reassembly can lead to packet loss.
+
+# Consider the following setup:
+# +--------+ +---------+ +--------+
+# |Router A|-------|Wanrouter|-------|Router B|
+# | |.IPIP..| |..IPIP.| |
+# +--------+ +---------+ +--------+
+# / mtu 1400 \
+# / \
+#+--------+ +--------+
+#|Client A| |Client B|
+#| | | |
+#+--------+ +--------+
+
+# Router A and Router B use IPIP tunnel interfaces to tunnel traffic
+# between Client A and Client B over WAN. Wanrouter has MTU 1400 set
+# on its interfaces.
+
+rnd=$(mktemp -u XXXXXXXX)
+rx=$(mktemp)
+
+r_a="ns-ra-$rnd"
+r_b="ns-rb-$rnd"
+r_w="ns-rw-$rnd"
+c_a="ns-ca-$rnd"
+c_b="ns-cb-$rnd"
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "iptables --version" "run test without iptables"
+checktool "ip -Version" "run test without ip tool"
+checktool "which socat" "run test without socat"
+checktool "ip netns add ${r_a}" "create net namespace"
+
+for n in ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns add ${n}
+done
+
+cleanup() {
+ for n in ${r_a} ${r_b} ${r_w} ${c_a} ${c_b};do
+ ip netns del ${n}
+ done
+ rm -f ${rx}
+}
+
+trap cleanup EXIT
+
+test_path() {
+ msg="$1"
+
+ ip netns exec ${c_b} socat -t 3 - udp4-listen:5000,reuseaddr > ${rx} < /dev/null &
+
+ sleep 1
+ for i in 1 2 3; do
+ head -c1400 /dev/zero | tr "\000" "a" | \
+ ip netns exec ${c_a} socat -t 1 -u STDIN UDP:192.168.20.2:5000
+ done
+
+ wait
+
+ bytes=$(wc -c < ${rx})
+
+ if [ $bytes -eq 1400 ];then
+ echo "OK: PMTU $msg connection tracking"
+ else
+ echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400"
+ exit 1
+ fi
+}
+
+# Detailed setup for Router A
+# ---------------------------
+# Interfaces:
+# eth0: 10.2.2.1/24
+# eth1: 192.168.10.1/24
+# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1
+# Routes:
+# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B)
+# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_a} type veth peer name veth0 netns ${r_w}
+ip link add veth1 netns ${r_a} type veth peer name veth0 netns ${c_a}
+
+l_addr="10.2.2.1"
+r_addr="10.4.4.1"
+ip netns exec ${r_a} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_a} link set $dev up
+done
+
+ip -net ${r_a} addr add 10.2.2.1/24 dev veth0
+ip -net ${r_a} addr add 192.168.10.1/24 dev veth1
+
+ip -net ${r_a} route add 192.168.20.0/24 dev ipip0
+ip -net ${r_a} route add 10.4.4.0/24 via 10.2.2.254
+
+ip netns exec ${r_a} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Detailed setup for Router B
+# ---------------------------
+# Interfaces:
+# eth0: 10.4.4.1/24
+# eth1: 192.168.20.1/24
+# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1
+# Routes:
+# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A)
+# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter)
+# No iptables rules at all.
+
+ip link add veth0 netns ${r_b} type veth peer name veth1 netns ${r_w}
+ip link add veth1 netns ${r_b} type veth peer name veth0 netns ${c_b}
+
+l_addr="10.4.4.1"
+r_addr="10.2.2.1"
+
+ip netns exec ${r_b} ip link add ipip0 type ipip local ${l_addr} remote ${r_addr} mode ipip || exit $ksft_skip
+
+for dev in lo veth0 veth1 ipip0; do
+ ip -net ${r_b} link set $dev up
+done
+
+ip -net ${r_b} addr add 10.4.4.1/24 dev veth0
+ip -net ${r_b} addr add 192.168.20.1/24 dev veth1
+
+ip -net ${r_b} route add 192.168.10.0/24 dev ipip0
+ip -net ${r_b} route add 10.2.2.0/24 via 10.4.4.254
+ip netns exec ${r_b} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Client A
+ip -net ${c_a} addr add 192.168.10.2/24 dev veth0
+ip -net ${c_a} link set dev lo up
+ip -net ${c_a} link set dev veth0 up
+ip -net ${c_a} route add default via 192.168.10.1
+
+# Client A
+ip -net ${c_b} addr add 192.168.20.2/24 dev veth0
+ip -net ${c_b} link set dev veth0 up
+ip -net ${c_b} link set dev lo up
+ip -net ${c_b} route add default via 192.168.20.1
+
+# Wan
+ip -net ${r_w} addr add 10.2.2.254/24 dev veth0
+ip -net ${r_w} addr add 10.4.4.254/24 dev veth1
+
+ip -net ${r_w} link set dev lo up
+ip -net ${r_w} link set dev veth0 up mtu 1400
+ip -net ${r_w} link set dev veth1 up mtu 1400
+
+ip -net ${r_a} link set dev veth0 mtu 1400
+ip -net ${r_b} link set dev veth0 mtu 1400
+
+ip netns exec ${r_w} sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+
+# Path MTU discovery
+# ------------------
+# Running tracepath from Client A to Client B shows PMTU discovery is working
+# as expected:
+#
+# clienta:~# tracepath 192.168.20.2
+# 1?: [LOCALHOST] pmtu 1500
+# 1: 192.168.10.1 0.867ms
+# 1: 192.168.10.1 0.302ms
+# 2: 192.168.10.1 0.312ms pmtu 1480
+# 2: no reply
+# 3: 192.168.10.1 0.510ms pmtu 1380
+# 3: 192.168.20.2 2.320ms reached
+# Resume: pmtu 1380 hops 3 back 3
+
+# ip netns exec ${c_a} traceroute --mtu 192.168.20.2
+
+# Router A has learned PMTU (1400) to Router B from Wanrouter.
+# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B
+# from Router A.
+
+#Send large UDP packet
+#---------------------
+#Now we send a 1400 bytes UDP packet from Client A to Client B:
+
+# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000
+test_path "without"
+
+# The IPv4 stack on Client A already knows the PMTU to Client B, so the
+# UDP packet is sent as two fragments (1380 + 20). Router A forwards the
+# fragments between eth1 and ipip0. The fragments fit into the tunnel and
+# reach their destination.
+
+#When sending the large UDP packet again, Router A now reassembles the
+#fragments before routing the packet over ipip0. The resulting IPIP
+#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is
+#dropped on Router A before sending.
+
+ip netns exec ${r_a} iptables -A FORWARD -m conntrack --ctstate NEW
+test_path "with"
diff --git a/tools/testing/selftests/netfilter/nf-queue.c b/tools/testing/selftests/netfilter/nf-queue.c
index 29c73bce38fa..9e56b9d47037 100644
--- a/tools/testing/selftests/netfilter/nf-queue.c
+++ b/tools/testing/selftests/netfilter/nf-queue.c
@@ -17,9 +17,12 @@
struct options {
bool count_packets;
+ bool gso_enabled;
int verbose;
unsigned int queue_num;
unsigned int timeout;
+ uint32_t verdict;
+ uint32_t delay_ms;
};
static unsigned int queue_stats[5];
@@ -27,7 +30,7 @@ static struct options opts;
static void help(const char *p)
{
- printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num]\n", p);
+ printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p);
}
static int parse_attr_cb(const struct nlattr *attr, void *data)
@@ -162,7 +165,7 @@ nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num)
}
static struct nlmsghdr *
-nfq_build_verdict(char *buf, int id, int queue_num, int verd)
+nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd)
{
struct nfqnl_msg_verdict_hdr vh = {
.verdict = htonl(verd),
@@ -189,9 +192,6 @@ static void print_stats(void)
unsigned int last, total;
int i;
- if (!opts.count_packets)
- return;
-
total = 0;
last = queue_stats[0];
@@ -234,7 +234,8 @@ struct mnl_socket *open_queue(void)
nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num);
- flags = NFQA_CFG_F_GSO | NFQA_CFG_F_UID_GID;
+ flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0;
+ flags |= NFQA_CFG_F_UID_GID;
mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags));
mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags));
@@ -255,6 +256,17 @@ struct mnl_socket *open_queue(void)
return nl;
}
+static void sleep_ms(uint32_t delay)
+{
+ struct timespec ts = { .tv_sec = delay / 1000 };
+
+ delay %= 1000;
+
+ ts.tv_nsec = delay * 1000llu * 1000llu;
+
+ nanosleep(&ts, NULL);
+}
+
static int mainloop(void)
{
unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE;
@@ -278,7 +290,7 @@ static int mainloop(void)
ret = mnl_socket_recvfrom(nl, buf, buflen);
if (ret == -1) {
- if (errno == ENOBUFS)
+ if (errno == ENOBUFS || errno == EINTR)
continue;
if (errno == EAGAIN) {
@@ -298,7 +310,10 @@ static int mainloop(void)
}
id = ret - MNL_CB_OK;
- nlh = nfq_build_verdict(buf, id, opts.queue_num, NF_ACCEPT);
+ if (opts.delay_ms)
+ sleep_ms(opts.delay_ms);
+
+ nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict);
if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) {
perror("mnl_socket_sendto");
exit(EXIT_FAILURE);
@@ -314,7 +329,7 @@ static void parse_opts(int argc, char **argv)
{
int c;
- while ((c = getopt(argc, argv, "chvt:q:")) != -1) {
+ while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) {
switch (c) {
case 'c':
opts.count_packets = true;
@@ -328,20 +343,48 @@ static void parse_opts(int argc, char **argv)
if (opts.queue_num > 0xffff)
opts.queue_num = 0;
break;
+ case 'Q':
+ opts.verdict = atoi(optarg);
+ if (opts.verdict > 0xffff) {
+ fprintf(stderr, "Expected destination queue number\n");
+ exit(1);
+ }
+
+ opts.verdict <<= 16;
+ opts.verdict |= NF_QUEUE;
+ break;
+ case 'd':
+ opts.delay_ms = atoi(optarg);
+ if (opts.delay_ms == 0) {
+ fprintf(stderr, "Expected nonzero delay (in milliseconds)\n");
+ exit(1);
+ }
+ break;
case 't':
opts.timeout = atoi(optarg);
break;
+ case 'G':
+ opts.gso_enabled = false;
+ break;
case 'v':
opts.verbose++;
break;
}
}
+
+ if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) {
+ fprintf(stderr, "Cannot use same destination and source queue\n");
+ exit(1);
+ }
}
int main(int argc, char *argv[])
{
int ret;
+ opts.verdict = NF_ACCEPT;
+ opts.gso_enabled = true;
+
parse_opts(argc, argv);
ret = mainloop();
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755
index 000000000000..a1aa8f4a5828
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+socatpid=0
+
+cleanup()
+{
+ [ $socatpid -gt 0 ] && kill $socatpid
+ ip netns del $ns1
+ ip netns del $ns2
+}
+
+socat -h > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without socat"
+ exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without iptables"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $ns1"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 &
+socatpid=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 socat -t 10 - TCP:192.168.1.1:5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null
+ret=$?
+
+# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+ echo "PASS: socat can connect via NAT'd address"
+else
+ echo "FAIL: socat cannot connect via NAT'd address"
+fi
+
+# check sport clashres.
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201
+ip netns exec $ns1 iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201
+
+sleep 5 | ip netns exec $ns2 socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null &
+cpid1=$!
+sleep 1
+
+# if connect succeeds, client closes instantly due to EOF on stdin.
+# if connect hangs, it will time out after 5s.
+echo | ip netns exec $ns2 socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null &
+cpid2=$!
+
+time_then=$(date +%s)
+wait $cpid2
+rv=$?
+time_now=$(date +%s)
+
+# Check how much time has elapsed, expectation is for
+# 'cpid2' to connect and then exit (and no connect delay).
+delta=$((time_now - time_then))
+
+if [ $delta -lt 2 -a $rv -eq 0 ]; then
+ echo "PASS: could connect to service via redirected ports"
+else
+ echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)"
+ ret=1
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_audit.sh b/tools/testing/selftests/netfilter/nft_audit.sh
new file mode 100755
index 000000000000..99ed5bd6e840
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_audit.sh
@@ -0,0 +1,245 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Check that audit logs generated for nft commands are as expected.
+
+SKIP_RC=4
+RC=0
+
+nft --version >/dev/null 2>&1 || {
+ echo "SKIP: missing nft tool"
+ exit $SKIP_RC
+}
+
+# Run everything in a separate network namespace
+[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; }
+
+# give other scripts a chance to finish - audit_logread sees all activity
+sleep 1
+
+logfile=$(mktemp)
+rulefile=$(mktemp)
+echo "logging into $logfile"
+./audit_logread >"$logfile" &
+logread_pid=$!
+trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT
+exec 3<"$logfile"
+
+do_test() { # (cmd, log)
+ echo -n "testing for cmd: $1 ... "
+ cat <&3 >/dev/null
+ $1 >/dev/null || exit 1
+ sleep 0.1
+ res=$(diff -a -u <(echo "$2") - <&3)
+ [ $? -eq 0 ] && { echo "OK"; return; }
+ echo "FAIL"
+ grep -v '^\(---\|+++\|@@\)' <<< "$res"
+ ((RC--))
+}
+
+nft flush ruleset
+
+# adding tables, chains and rules
+
+for table in t1 t2; do
+ do_test "nft add table $table" \
+ "table=$table family=2 entries=1 op=nft_register_table"
+
+ do_test "nft add chain $table c1" \
+ "table=$table family=2 entries=1 op=nft_register_chain"
+
+ do_test "nft add chain $table c2; add chain $table c3" \
+ "table=$table family=2 entries=2 op=nft_register_chain"
+
+ cmd="add rule $table c1 counter"
+
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=1 op=nft_register_rule"
+
+ do_test "nft $cmd; $cmd" \
+ "table=$table family=2 entries=2 op=nft_register_rule"
+
+ cmd=""
+ sep=""
+ for chain in c2 c3; do
+ for i in {1..3}; do
+ cmd+="$sep add rule $table $chain counter"
+ sep=";"
+ done
+ done
+ do_test "nft $cmd" \
+ "table=$table family=2 entries=6 op=nft_register_rule"
+done
+
+for ((i = 0; i < 500; i++)); do
+ echo "add rule t2 c3 counter accept comment \"rule $i\""
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=500 op=nft_register_rule'
+
+# adding sets and elements
+
+settype='type inet_service; counter'
+setelem='{ 22, 80, 443 }'
+setblock="{ $settype; elements = $setelem; }"
+do_test "nft add set t1 s $setblock" \
+"table=t1 family=2 entries=4 op=nft_register_set"
+
+do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \
+"table=t1 family=2 entries=5 op=nft_register_set"
+
+do_test "nft add element t1 s3 $setelem" \
+"table=t1 family=2 entries=3 op=nft_register_setelem"
+
+# adding counters
+
+do_test 'nft add counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add counter t2 c1; add counter t2 c2' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add counter t2 c$i"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# adding/updating quotas
+
+do_test 'nft add quota t1 q1 { 10 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \
+'table=t2 family=2 entries=2 op=nft_register_obj'
+
+for ((i = 3; i <= 500; i++)); do
+ echo "add quota t2 q$i { 10 bytes }"
+done >$rulefile
+do_test "nft -f $rulefile" \
+'table=t2 family=2 entries=498 op=nft_register_obj'
+
+# changing the quota value triggers obj update path
+do_test 'nft add quota t1 q1 { 20 bytes }' \
+'table=t1 family=2 entries=1 op=nft_register_obj'
+
+# resetting rules
+
+do_test 'nft reset rules t1 c2' \
+'table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules table t1' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule'
+
+do_test 'nft reset rules t2 c3' \
+'table=t2 family=2 entries=189 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=126 op=nft_reset_rule'
+
+do_test 'nft reset rules t2' \
+'table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=186 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=129 op=nft_reset_rule'
+
+do_test 'nft reset rules' \
+'table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t1 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=3 op=nft_reset_rule
+table=t2 family=2 entries=180 op=nft_reset_rule
+table=t2 family=2 entries=188 op=nft_reset_rule
+table=t2 family=2 entries=135 op=nft_reset_rule'
+
+# resetting sets and elements
+
+elem=(22 ,80 ,443)
+relem=""
+for i in {1..3}; do
+ relem+="${elem[((i - 1))]}"
+ do_test "nft reset element t1 s { $relem }" \
+ "table=t1 family=2 entries=$i op=nft_reset_setelem"
+done
+
+do_test 'nft reset set t1 s' \
+'table=t1 family=2 entries=3 op=nft_reset_setelem'
+
+# resetting counters
+
+do_test 'nft reset counter t1 c1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset counters t2' \
+'table=t2 family=2 entries=342 op=nft_reset_obj
+table=t2 family=2 entries=158 op=nft_reset_obj'
+
+do_test 'nft reset counters' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=341 op=nft_reset_obj
+table=t2 family=2 entries=159 op=nft_reset_obj'
+
+# resetting quotas
+
+do_test 'nft reset quota t1 q1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t1' \
+'table=t1 family=2 entries=1 op=nft_reset_obj'
+
+do_test 'nft reset quotas t2' \
+'table=t2 family=2 entries=315 op=nft_reset_obj
+table=t2 family=2 entries=185 op=nft_reset_obj'
+
+do_test 'nft reset quotas' \
+'table=t1 family=2 entries=1 op=nft_reset_obj
+table=t2 family=2 entries=314 op=nft_reset_obj
+table=t2 family=2 entries=186 op=nft_reset_obj'
+
+# deleting rules
+
+readarray -t handles < <(nft -a list chain t1 c1 | \
+ sed -n 's/.*counter.* handle \(.*\)$/\1/p')
+
+do_test "nft delete rule t1 c1 handle ${handles[0]}" \
+'table=t1 family=2 entries=1 op=nft_unregister_rule'
+
+cmd='delete rule t1 c1 handle'
+do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \
+'table=t1 family=2 entries=2 op=nft_unregister_rule'
+
+do_test 'nft flush chain t1 c2' \
+'table=t1 family=2 entries=3 op=nft_unregister_rule'
+
+do_test 'nft flush table t2' \
+'table=t2 family=2 entries=509 op=nft_unregister_rule'
+
+# deleting chains
+
+do_test 'nft delete chain t2 c2' \
+'table=t2 family=2 entries=1 op=nft_unregister_chain'
+
+# deleting sets and elements
+
+do_test 'nft delete element t1 s { 22 }' \
+'table=t1 family=2 entries=1 op=nft_unregister_setelem'
+
+do_test 'nft delete element t1 s { 80, 443 }' \
+'table=t1 family=2 entries=2 op=nft_unregister_setelem'
+
+do_test 'nft flush set t1 s2' \
+'table=t1 family=2 entries=3 op=nft_unregister_setelem'
+
+do_test 'nft delete set t1 s2' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+do_test 'nft delete set t1 s3' \
+'table=t1 family=2 entries=1 op=nft_unregister_set'
+
+exit $RC
diff --git a/tools/testing/selftests/netfilter/nft_concat_range.sh b/tools/testing/selftests/netfilter/nft_concat_range.sh
index 5a4938d6dcf2..e908009576c7 100755
--- a/tools/testing/selftests/netfilter/nft_concat_range.sh
+++ b/tools/testing/selftests/netfilter/nft_concat_range.sh
@@ -23,15 +23,15 @@ TESTS="reported_issues correctness concurrency timeout"
# Set types, defined by TYPE_ variables below
TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto
- net_port_net net_mac net_mac_icmp net6_mac_icmp net6_port_net6_port
- net_port_mac_proto_net"
+ net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp
+ net6_port_net6_port net_port_mac_proto_net"
# Reported bugs, also described by TYPE_ variables below
-BUGS="flush_remove_add"
+BUGS="flush_remove_add reload"
# List of possible paths to pktgen script from kernel tree for performance tests
PKTGEN_SCRIPT_PATHS="
- ../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
+ ../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh
pktgen/pktgen_bench_xmit_mode_netif_receive.sh"
# Definition of set types:
@@ -91,7 +91,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 3
@@ -116,7 +116,7 @@ src
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 3
@@ -141,7 +141,7 @@ src
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -163,7 +163,7 @@ src mac
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 0
@@ -185,7 +185,7 @@ src mac proto
start 10
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp6
race_repeat 0
@@ -207,7 +207,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 3
@@ -227,7 +227,7 @@ src addr6 port
start 10
count 5
src_delta 2000
-tools sendip nc
+tools sendip socat nc
proto udp6
race_repeat 3
@@ -247,7 +247,7 @@ src mac proto addr4
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -264,7 +264,7 @@ src mac
start 1
count 5
src_delta 2000
-tools sendip nc bash
+tools sendip socat nc bash
proto udp
race_repeat 0
@@ -277,6 +277,23 @@ perf_entries 1000
perf_proto ipv4
"
+TYPE_mac_net="
+display mac,net
+type_spec ether_addr . ipv4_addr
+chain_spec ether saddr . ip saddr
+dst
+src mac addr4
+start 1
+count 5
+src_delta 2000
+tools sendip socat nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
TYPE_net_mac_icmp="
display net,mac - ICMP
type_spec ipv4_addr . ether_addr
@@ -320,7 +337,7 @@ src addr4
start 1
count 5
src_delta 2000
-tools sendip nc
+tools sendip socat nc
proto udp
race_repeat 3
@@ -337,6 +354,23 @@ TYPE_flush_remove_add="
display Add two elements, flush, re-add
"
+TYPE_reload="
+display net,mac with reload
+type_spec ipv4_addr . ether_addr
+chain_spec ip daddr . ether saddr
+dst addr4
+src mac
+start 1
+count 1
+src_delta 2000
+tools sendip socat nc bash
+proto udp
+
+race_repeat 0
+
+perf_duration 0
+"
+
# Set template for all tests, types and rules are filled in depending on test
set_template='
flush ruleset
@@ -507,6 +541,24 @@ setup_send_udp() {
dst_port=
src_addr4=
}
+ elif command -v socat -v >/dev/null; then
+ send_udp() {
+ if [ -n "${src_addr4}" ]; then
+ B ip addr add "${src_addr4}" dev veth_b
+ __socatbind=",bind=${src_addr4}"
+ if [ -n "${src_port}" ];then
+ __socatbind="${__socatbind}:${src_port}"
+ fi
+ fi
+
+ ip addr add "${dst_addr4}" dev veth_a 2>/dev/null
+ [ -z "${dst_port}" ] && dst_port=12345
+
+ echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:${dst_addr4}:${dst_port}"${__socatbind}"
+
+ src_addr4=
+ src_port=
+ }
elif command -v nc >/dev/null; then
if nc -u -w0 1.1.1.1 1 2>/dev/null; then
# OpenBSD netcat
@@ -572,6 +624,29 @@ setup_send_udp6() {
dst_port=
src_addr6=
}
+ elif command -v socat -v >/dev/null; then
+ send_udp6() {
+ ip -6 addr add "${dst_addr6}" dev veth_a nodad \
+ 2>/dev/null
+
+ __socatbind6=
+
+ if [ -n "${src_addr6}" ]; then
+ if [ -n "${src_addr6} != "${src_addr6_added} ]; then
+ B ip addr add "${src_addr6}" dev veth_b nodad
+
+ src_addr6_added=${src_addr6}
+ fi
+
+ __socatbind6=",bind=[${src_addr6}]"
+
+ if [ -n "${src_port}" ] ;then
+ __socatbind6="${__socatbind6}:${src_port}"
+ fi
+ fi
+
+ echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:[${dst_addr6}]:${dst_port}"${__socatbind6}"
+ }
elif command -v nc >/dev/null && nc -u -w0 1.1.1.1 1 2>/dev/null; then
# GNU netcat might not work with IPv6, try next tool
send_udp6() {
@@ -984,7 +1059,8 @@ format() {
fi
done
for f in ${src}; do
- __expr="${__expr} . "
+ [ "${__expr}" != "{ " ] && __expr="${__expr} . "
+
__start="$(eval format_"${f}" "${srcstart}")"
__end="$(eval format_"${f}" "${srcend}")"
@@ -1455,6 +1531,59 @@ test_bug_flush_remove_add() {
nft flush ruleset
}
+# - add ranged element, check that packets match it
+# - reload the set, check packets still match
+test_bug_reload() {
+ setup veth send_"${proto}" set || return ${KSELFTEST_SKIP}
+ rstart=${start}
+
+ range_size=1
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ add "$(format)" || return 1
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ # check kernel does allocate pcpu sctrach map
+ # for reload with no elemet add/delete
+ ( echo flush set inet filter test ;
+ nft list set inet filter test ) | nft -f -
+
+ start=${rstart}
+ range_size=1
+
+ for i in $(seq "${start}" $((start + count))); do
+ end=$((start + range_size))
+
+ # Avoid negative or zero-sized port ranges
+ if [ $((end / 65534)) -gt $((start / 65534)) ]; then
+ start=${end}
+ end=$((end + 1))
+ fi
+ srcstart=$((start + src_delta))
+ srcend=$((end + src_delta))
+
+ for j in $(seq ${start} $((range_size / 2 + 1)) ${end}); do
+ send_match "${j}" $((j + src_delta)) || return 1
+ done
+
+ range_size=$((range_size + 1))
+ start=$((end + range_size))
+ done
+
+ nft flush ruleset
+}
+
test_reported_issues() {
eval test_bug_"${subtest}"
}
@@ -1513,4 +1642,4 @@ for name in ${TESTS}; do
done
done
-[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP}
+[ ${passed} -eq 0 ] && exit ${KSELFTEST_SKIP} || exit 0
diff --git a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
index edf0a48da6bf..faa7778d7bd1 100755
--- a/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
+++ b/tools/testing/selftests/netfilter/nft_conntrack_helper.sh
@@ -94,28 +94,50 @@ check_for_helper()
local message=$2
local port=$3
- ip netns exec ${netns} conntrack -L -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
+ if echo $message |grep -q 'ipv6';then
+ local family="ipv6"
+ else
+ local family="ipv4"
+ fi
+
+ ip netns exec ${netns} conntrack -L -f $family -p tcp --dport $port 2> /dev/null |grep -q 'helper=ftp'
if [ $? -ne 0 ] ; then
- echo "FAIL: ${netns} did not show attached helper $message" 1>&2
- ret=1
+ if [ $autoassign -eq 0 ] ;then
+ echo "FAIL: ${netns} did not show attached helper $message" 1>&2
+ ret=1
+ else
+ echo "PASS: ${netns} did not show attached helper $message" 1>&2
+ fi
+ else
+ if [ $autoassign -eq 0 ] ;then
+ echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
+ else
+ echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2
+ ret=1
+ fi
fi
- echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2
return 0
}
test_helper()
{
local port=$1
- local msg=$2
+ local autoassign=$2
+
+ if [ $autoassign -eq 0 ] ;then
+ msg="set via ruleset"
+ else
+ msg="auto-assign"
+ fi
sleep 3 | ip netns exec ${ns2} nc -w 2 -l -p $port > /dev/null &
- sleep 1
sleep 1 | ip netns exec ${ns1} nc -w 2 10.0.1.2 $port > /dev/null &
+ sleep 1
- check_for_helper "$ns1" "ip $msg" $port
- check_for_helper "$ns2" "ip $msg" $port
+ check_for_helper "$ns1" "ip $msg" $port $autoassign
+ check_for_helper "$ns2" "ip $msg" $port $autoassign
wait
@@ -128,8 +150,8 @@ test_helper()
sleep 3 | ip netns exec ${ns2} nc -w 2 -6 -l -p $port > /dev/null &
- sleep 1
sleep 1 | ip netns exec ${ns1} nc -w 2 -6 dead:1::2 $port > /dev/null &
+ sleep 1
check_for_helper "$ns1" "ipv6 $msg" $port
check_for_helper "$ns2" "ipv6 $msg" $port
@@ -167,9 +189,9 @@ if [ $? -ne 0 ];then
fi
fi
-test_helper 2121 "set via ruleset"
-ip netns exec ${ns1} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-ip netns exec ${ns2} sysctl -q 'net.netfilter.nf_conntrack_helper=1'
-test_helper 21 "auto-assign"
+test_helper 2121 0
+ip netns exec ${ns1} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+ip netns exec ${ns2} sysctl -qe 'net.netfilter.nf_conntrack_helper=1'
+test_helper 21 1
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh
new file mode 100755
index 000000000000..dff476e45e77
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_fib.sh
@@ -0,0 +1,273 @@
+#!/bin/bash
+#
+# This tests the fib expression.
+#
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsrouter="nsrouter-$sfx"
+timeout=4
+
+log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
+
+cleanup()
+{
+ ip netns del ${ns1}
+ ip netns del ${ns2}
+ ip netns del ${nsrouter}
+
+ [ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+ip netns add ${nsrouter}
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace"
+ exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+dmesg | grep -q ' nft_rpfilter: '
+if [ $? -eq 0 ]; then
+ dmesg -c | grep ' nft_rpfilter: '
+ echo "WARN: a previous test run has failed" 1>&2
+fi
+
+sysctl -q net.netfilter.nf_log_all_netns=1
+ip netns add ${ns1}
+ip netns add ${ns2}
+
+load_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
+load_pbr_ruleset() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain forward {
+ type filter hook forward priority raw;
+ fib saddr . iif oif gt 0 accept
+ log drop
+ }
+}
+EOF
+}
+
+load_ruleset_count() {
+ local netns=$1
+
+ip netns exec ${netns} nft -f /dev/stdin <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority 0; policy accept;
+ ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop
+ ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop
+ }
+}
+EOF
+}
+
+check_drops() {
+ dmesg | grep -q ' nft_rpfilter: '
+ if [ $? -eq 0 ]; then
+ dmesg | grep ' nft_rpfilter: '
+ echo "FAIL: rpfilter did drop packets"
+ return 1
+ fi
+
+ return 0
+}
+
+check_fib_counter() {
+ local want=$1
+ local ns=$2
+ local address=$3
+
+ line=$(ip netns exec ${ns} nft list table inet filter | grep 'fib saddr . iif' | grep $address | grep "packets $want" )
+ ret=$?
+
+ if [ $ret -ne 0 ];then
+ echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2
+ ip netns exec ${ns} nft list table inet filter
+ return 1
+ fi
+
+ if [ $want -gt 0 ]; then
+ echo "PASS: fib expression did drop packets for $address"
+ fi
+
+ return 0
+}
+
+load_ruleset ${nsrouter}
+load_ruleset ${ns1}
+load_ruleset ${ns2}
+
+ip link add veth0 netns ${nsrouter} type veth peer name eth0 netns ${ns1} > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+fi
+ip link add veth1 netns ${nsrouter} type veth peer name eth0 netns ${ns2}
+
+ip -net ${nsrouter} link set lo up
+ip -net ${nsrouter} link set veth0 up
+ip -net ${nsrouter} addr add 10.0.1.1/24 dev veth0
+ip -net ${nsrouter} addr add dead:1::1/64 dev veth0
+
+ip -net ${nsrouter} link set veth1 up
+ip -net ${nsrouter} addr add 10.0.2.1/24 dev veth1
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth1
+
+ip -net ${ns1} link set lo up
+ip -net ${ns1} link set eth0 up
+
+ip -net ${ns2} link set lo up
+ip -net ${ns2} link set eth0 up
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+ip -net ${ns1} route add default via 10.0.1.1
+ip -net ${ns1} route add default via dead:1::1
+
+ip -net ${ns2} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns2} addr add dead:2::99/64 dev eth0
+ip -net ${ns2} route add default via 10.0.2.1
+ip -net ${ns2} route add default via dead:2::1
+
+test_ping() {
+ local daddr4=$1
+ local daddr6=$2
+
+ ip netns exec ${ns1} ping -c 1 -q $daddr4 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2
+ return 1
+ fi
+
+ ip netns exec ${ns1} ping -c 3 -q $daddr6 > /dev/null
+ ret=$?
+ if [ $ret -ne 0 ];then
+ check_drops
+ echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2
+ return 1
+ fi
+
+ return 0
+}
+
+ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null
+ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null
+
+sleep 3
+
+test_ping 10.0.2.1 dead:2::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.2.99 dead:2::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not cause unwanted packet drops"
+
+ip netns exec ${nsrouter} nft flush table inet filter
+
+ip -net ${ns1} route del default
+ip -net ${ns1} -6 route del default
+
+ip -net ${ns1} addr del 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr del dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr add 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr add dead:2::99/64 dev eth0
+
+ip -net ${ns1} route add default via 10.0.2.1
+ip -net ${ns1} -6 route add default via dead:2::1
+
+ip -net ${nsrouter} addr add dead:2::1/64 dev veth0
+
+# switch to ruleset that doesn't log, this time
+# its expected that this does drop the packets.
+load_ruleset_count ${nsrouter}
+
+# ns1 has a default route, but nsrouter does not.
+# must not check return value, ping to 1.1.1.1 will
+# fail.
+check_fib_counter 0 ${nsrouter} 1.1.1.1 || exit 1
+check_fib_counter 0 ${nsrouter} 1c3::c01d || exit 1
+
+ip netns exec ${ns1} ping -c 1 -W 1 -q 1.1.1.1 > /dev/null
+check_fib_counter 1 ${nsrouter} 1.1.1.1 || exit 1
+
+sleep 2
+ip netns exec ${ns1} ping -c 3 -q 1c3::c01d > /dev/null
+check_fib_counter 3 ${nsrouter} 1c3::c01d || exit 1
+
+# delete all rules
+ip netns exec ${ns1} nft flush ruleset
+ip netns exec ${ns2} nft flush ruleset
+ip netns exec ${nsrouter} nft flush ruleset
+
+ip -net ${ns1} addr add 10.0.1.99/24 dev eth0
+ip -net ${ns1} addr add dead:1::99/64 dev eth0
+
+ip -net ${ns1} addr del 10.0.2.99/24 dev eth0
+ip -net ${ns1} addr del dead:2::99/64 dev eth0
+
+ip -net ${nsrouter} addr del dead:2::1/64 dev veth0
+
+# ... pbr ruleset for the router, check iif+oif.
+load_pbr_ruleset ${nsrouter}
+if [ $? -ne 0 ] ; then
+ echo "SKIP: Could not load fib forward ruleset"
+ exit $ksft_skip
+fi
+
+ip -net ${nsrouter} rule add from all table 128
+ip -net ${nsrouter} rule add from all iif veth0 table 129
+ip -net ${nsrouter} route add table 128 to 10.0.1.0/24 dev veth0
+ip -net ${nsrouter} route add table 129 to 10.0.2.0/24 dev veth1
+
+# drop main ipv4 table
+ip -net ${nsrouter} -4 rule delete table main
+
+test_ping 10.0.2.99 dead:2::99
+if [ $? -ne 0 ] ; then
+ ip -net ${nsrouter} nft list ruleset
+ echo "FAIL: fib mismatch in pbr setup"
+ exit 1
+fi
+
+echo "PASS: fib expression forward check with policy based routing"
+exit 0
diff --git a/tools/testing/selftests/netfilter/nft_flowtable.sh b/tools/testing/selftests/netfilter/nft_flowtable.sh
index d3e0809ab368..a32f490f7539 100755
--- a/tools/testing/selftests/netfilter/nft_flowtable.sh
+++ b/tools/testing/selftests/netfilter/nft_flowtable.sh
@@ -2,62 +2,57 @@
# SPDX-License-Identifier: GPL-2.0
#
# This tests basic flowtable functionality.
-# Creates following topology:
+# Creates following default topology:
#
# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000)
# Router1 is the one doing flow offloading, Router2 has no special
# purpose other than having a link that is smaller than either Originator
# and responder, i.e. TCPMSS announced values are too large and will still
# result in fragmentation and/or PMTU discovery.
+#
+# You can check with different Orgininator/Link/Responder MTU eg:
+# nft_flowtable.sh -o8000 -l1500 -r2000
+#
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+nsr1="nsr1-$sfx"
+nsr2="nsr2-$sfx"
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
ret=0
-ns1in=""
-ns2in=""
+nsin=""
ns1out=""
ns2out=""
log_netns=$(sysctl -n net.netfilter.nf_log_all_netns)
-nft --version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nft tool"
- exit $ksft_skip
-fi
-
-ip -Version > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without ip tool"
- exit $ksft_skip
-fi
-
-which nc > /dev/null 2>&1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not run test without nc (netcat)"
- exit $ksft_skip
-fi
-
-ip netns add nsr1
-if [ $? -ne 0 ];then
- echo "SKIP: Could not create net namespace"
- exit $ksft_skip
-fi
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
-ip netns add ns1
-ip netns add ns2
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "which nc" "run test without nc (netcat)"
+checktool "ip netns add $nsr1" "create net namespace $nsr1"
-ip netns add nsr2
+ip netns add $ns1
+ip netns add $ns2
+ip netns add $nsr2
cleanup() {
- for i in 1 2; do
- ip netns del ns$i
- ip netns del nsr$i
- done
+ ip netns del $ns1
+ ip netns del $ns2
+ ip netns del $nsr1
+ ip netns del $nsr2
- rm -f "$ns1in" "$ns1out"
- rm -f "$ns2in" "$ns2out"
+ rm -f "$nsin" "$ns1out" "$ns2out"
[ $log_netns -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns
}
@@ -66,22 +61,21 @@ trap cleanup EXIT
sysctl -q net.netfilter.nf_log_all_netns=1
-ip link add veth0 netns nsr1 type veth peer name eth0 netns ns1
-ip link add veth1 netns nsr1 type veth peer name veth0 netns nsr2
+ip link add veth0 netns $nsr1 type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr1 type veth peer name veth0 netns $nsr2
-ip link add veth1 netns nsr2 type veth peer name eth0 netns ns2
+ip link add veth1 netns $nsr2 type veth peer name eth0 netns $ns2
for dev in lo veth0 veth1; do
- for i in 1 2; do
- ip -net nsr$i link set $dev up
- done
+ ip -net $nsr1 link set $dev up
+ ip -net $nsr2 link set $dev up
done
-ip -net nsr1 addr add 10.0.1.1/24 dev veth0
-ip -net nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
-ip -net nsr2 addr add 10.0.2.1/24 dev veth1
-ip -net nsr2 addr add dead:2::1/64 dev veth1
+ip -net $nsr2 addr add 10.0.2.1/24 dev veth1
+ip -net $nsr2 addr add dead:2::1/64 dev veth1
# set different MTUs so we need to push packets coming from ns1 (large MTU)
# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1),
@@ -89,76 +83,100 @@ ip -net nsr2 addr add dead:2::1/64 dev veth1
# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers
# is NOT the lowest link mtu.
-ip -net nsr1 link set veth0 mtu 9000
-ip -net ns1 link set eth0 mtu 9000
+omtu=9000
+lmtu=1500
+rmtu=2000
+
+usage(){
+ echo "nft_flowtable.sh [OPTIONS]"
+ echo
+ echo "MTU options"
+ echo " -o originator"
+ echo " -l link"
+ echo " -r responder"
+ exit 1
+}
+
+while getopts "o:l:r:" o
+do
+ case $o in
+ o) omtu=$OPTARG;;
+ l) lmtu=$OPTARG;;
+ r) rmtu=$OPTARG;;
+ *) usage;;
+ esac
+done
+
+if ! ip -net $nsr1 link set veth0 mtu $omtu; then
+ exit 1
+fi
+
+ip -net $ns1 link set eth0 mtu $omtu
+
+if ! ip -net $nsr2 link set veth1 mtu $rmtu; then
+ exit 1
+fi
-ip -net nsr2 link set veth1 mtu 2000
-ip -net ns2 link set eth0 mtu 2000
+ip -net $ns2 link set eth0 mtu $rmtu
# transfer-net between nsr1 and nsr2.
# these addresses are not used for connections.
-ip -net nsr1 addr add 192.168.10.1/24 dev veth1
-ip -net nsr1 addr add fee1:2::1/64 dev veth1
+ip -net $nsr1 addr add 192.168.10.1/24 dev veth1
+ip -net $nsr1 addr add fee1:2::1/64 dev veth1
-ip -net nsr2 addr add 192.168.10.2/24 dev veth0
-ip -net nsr2 addr add fee1:2::2/64 dev veth0
+ip -net $nsr2 addr add 192.168.10.2/24 dev veth0
+ip -net $nsr2 addr add fee1:2::2/64 dev veth0
-for i in 1 2; do
- ip netns exec nsr$i sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
- ip netns exec nsr$i sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+for i in 0 1; do
+ ip netns exec $nsr1 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+ ip netns exec $nsr2 sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null
+done
- ip -net ns$i link set lo up
- ip -net ns$i link set eth0 up
- ip -net ns$i addr add 10.0.$i.99/24 dev eth0
- ip -net ns$i route add default via 10.0.$i.1
- ip -net ns$i addr add dead:$i::99/64 dev eth0
- ip -net ns$i route add default via dead:$i::1
- ip netns exec ns$i sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null
+for ns in $ns1 $ns2;do
+ ip -net $ns link set lo up
+ ip -net $ns link set eth0 up
+ if ! ip netns exec $ns sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then
+ echo "ERROR: Check Originator/Responder values (problem during address addition)"
+ exit 1
+ fi
# don't set ip DF bit for first two tests
- ip netns exec ns$i sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
+ ip netns exec $ns sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null
done
-ip -net nsr1 route add default via 192.168.10.2
-ip -net nsr2 route add default via 192.168.10.1
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns2 addr add dead:2::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $ns2 route add default via dead:2::1
-ip netns exec nsr1 nft -f - <<EOF
+ip -net $nsr1 route add default via 192.168.10.2
+ip -net $nsr2 route add default via 192.168.10.1
+
+ip netns exec $nsr1 nft -f - <<EOF
table inet filter {
flowtable f1 {
hook ingress priority 0
devices = { veth0, veth1 }
}
+ counter routed_orig { }
+ counter routed_repl { }
+
chain forward {
type filter hook forward priority 0; policy drop;
# flow offloaded? Tag ct with mark 1, so we can detect when it fails.
- meta oif "veth1" tcp dport 12345 flow offload @f1 counter
-
- # use packet size to trigger 'should be offloaded by now'.
- # otherwise, if 'flow offload' expression never offloads, the
- # test will pass.
- tcp dport 12345 meta length gt 200 ct mark set 1 counter
-
- # this turns off flow offloading internally, so expect packets again
- tcp flags fin,rst ct mark set 0 accept
-
- # this allows large packets from responder, we need this as long
- # as PMTUd is off.
- # This rule is deleted for the last test, when we expect PMTUd
- # to kick in and ensure all packets meet mtu requirements.
- meta length gt 1500 accept comment something-to-grep-for
+ meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept
- # next line blocks connection w.o. working offload.
- # we only do this for reverse dir, because we expect packets to
- # enter slow path due to MTU mismatch of veth0 and veth1.
- tcp sport 12345 ct mark 1 counter log prefix "mark failure " drop
+ # count packets supposedly offloaded as per direction.
+ ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept
ct state established,related accept
- # for packets that we can't offload yet, i.e. SYN (any ct that is not confirmed)
- meta length lt 200 oif "veth1" tcp dport 12345 counter accept
-
meta nfproto ipv4 meta l4proto icmp accept
meta nfproto ipv6 meta l4proto icmpv6 accept
}
@@ -170,35 +188,51 @@ if [ $? -ne 0 ]; then
exit $ksft_skip
fi
+ip netns exec $ns2 nft -f - <<EOF
+table inet filter {
+ counter ip4dscp0 { }
+ counter ip4dscp3 { }
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+ meta l4proto tcp goto {
+ ip dscp cs3 counter name ip4dscp3 accept
+ ip dscp 0 counter name ip4dscp0 accept
+ }
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not load nft ruleset"
+ exit $ksft_skip
+fi
+
# test basic connectivity
-ip netns exec ns1 ping -c 1 -q 10.0.2.99 > /dev/null
-if [ $? -ne 0 ];then
- echo "ERROR: ns1 cannot reach ns2" 1>&2
- bash
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach ns2" 1>&2
exit 1
fi
-ip netns exec ns2 ping -c 1 -q 10.0.1.99 > /dev/null
-if [ $? -ne 0 ];then
- echo "ERROR: ns2 cannot reach ns1" 1>&2
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
exit 1
fi
if [ $ret -eq 0 ];then
- echo "PASS: netns routing/connectivity: ns1 can reach ns2"
+ echo "PASS: netns routing/connectivity: $ns1 can reach $ns2"
fi
-ns1in=$(mktemp)
+nsin=$(mktemp)
ns1out=$(mktemp)
-ns2in=$(mktemp)
ns2out=$(mktemp)
make_file()
{
name=$1
- who=$2
- SIZE=$((RANDOM % (1024 * 8)))
+ SIZE=$((RANDOM % (1024 * 128)))
+ SIZE=$((SIZE + (1024 * 8)))
TSIZE=$((SIZE * 1024))
dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null
@@ -209,14 +243,99 @@ make_file()
dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null
}
+check_counters()
+{
+ local what=$1
+ local ok=1
+
+ local orig=$(ip netns exec $nsr1 nft reset counter inet filter routed_orig | grep packets)
+ local repl=$(ip netns exec $nsr1 nft reset counter inet filter routed_repl | grep packets)
+
+ local orig_cnt=${orig#*bytes}
+ local repl_cnt=${repl#*bytes}
+
+ local fs=$(du -sb $nsin)
+ local max_orig=${fs%%/*}
+ local max_repl=$((max_orig/4))
+
+ if [ $orig_cnt -gt $max_orig ];then
+ echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ $repl_cnt -gt $max_repl ];then
+ echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2
+ ret=1
+ ok=0
+ fi
+
+ if [ $ok -eq 1 ]; then
+ echo "PASS: $what"
+ fi
+}
+
+check_dscp()
+{
+ local what=$1
+ local ok=1
+
+ local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp3 | grep packets)
+
+ local pc4=${counter%*bytes*}
+ local pc4=${pc4#*packets}
+
+ local counter=$(ip netns exec $ns2 nft reset counter inet filter ip4dscp0 | grep packets)
+ local pc4z=${counter%*bytes*}
+ local pc4z=${pc4z#*packets}
+
+ case "$what" in
+ "dscp_none")
+ if [ $pc4 -gt 0 ] || [ $pc4z -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_fwd")
+ if [ $pc4 -eq 0 ] || [ $pc4z -eq 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_ingress")
+ if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ "dscp_egress")
+ if [ $pc4 -eq 0 ] || [ $pc4z -gt 0 ]; then
+ echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2
+ ret=1
+ ok=0
+ fi
+ ;;
+ *)
+ echo "FAIL: Unknown DSCP check" 1>&2
+ ret=1
+ ok=0
+ esac
+
+ if [ $ok -eq 1 ] ;then
+ echo "PASS: $what: dscp packet counters match"
+ fi
+}
+
check_transfer()
{
in=$1
out=$2
what=$3
- cmp "$in" "$out" > /dev/null 2>&1
- if [ $? -ne 0 ] ;then
+ if ! cmp "$in" "$out" > /dev/null 2>&1; then
echo "FAIL: file mismatch for $what" 1>&2
ls -l "$in"
ls -l "$out"
@@ -234,26 +353,39 @@ test_tcp_forwarding_ip()
local dstport=$4
local lret=0
- ip netns exec $nsb nc -w 5 -l -p 12345 < "$ns2in" > "$ns2out" &
+ ip netns exec $nsb nc -w 5 -l -p 12345 < "$nsin" > "$ns2out" &
lpid=$!
sleep 1
- ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$ns1in" > "$ns1out" &
+ ip netns exec $nsa nc -w 4 "$dstip" "$dstport" < "$nsin" > "$ns1out" &
cpid=$!
- sleep 3
+ sleep 1
+
+ prev="$(ls -l $ns1out $ns2out)"
+ sleep 1
- kill $lpid
- kill $cpid
- wait
+ while [[ "$prev" != "$(ls -l $ns1out $ns2out)" ]]; do
+ sleep 1;
+ prev="$(ls -l $ns1out $ns2out)"
+ done
- check_transfer "$ns1in" "$ns2out" "ns1 -> ns2"
- if [ $? -ne 0 ];then
+ if test -d /proc/"$lpid"/; then
+ kill $lpid
+ fi
+
+ if test -d /proc/"$cpid"/; then
+ kill $cpid
+ fi
+
+ wait $lpid
+ wait $cpid
+
+ if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then
lret=1
fi
- check_transfer "$ns2in" "$ns1out" "ns1 <- ns2"
- if [ $? -ne 0 ];then
+ if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then
lret=1
fi
@@ -267,45 +399,107 @@ test_tcp_forwarding()
return $?
}
+test_tcp_forwarding_set_dscp()
+{
+ check_dscp "dscp_none"
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook ingress device "veth0" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_ingress"
+
+ ip netns exec $nsr1 nft delete table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:ingress for veth0"
+fi
+
+ip netns exec $nsr1 nft -f - <<EOF
+table netdev dscpmangle {
+ chain setdscp0 {
+ type filter hook egress device "veth1" priority 0; policy accept
+ ip dscp set cs3
+ }
+}
+EOF
+if [ $? -eq 0 ]; then
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_egress"
+
+ ip netns exec $nsr1 nft flush table netdev dscpmangle
+else
+ echo "SKIP: Could not load netdev:egress for veth1"
+fi
+
+ # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3
+ # counters should have seen packets (before and after ft offload kicks in).
+ ip netns exec $nsr1 nft -a insert rule inet filter forward ip dscp set cs3
+ test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
+ check_dscp "dscp_fwd"
+}
+
test_tcp_forwarding_nat()
{
local lret
+ local pmtu
test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345
lret=$?
+ pmtu=$3
+ what=$4
+
if [ $lret -eq 0 ] ; then
+ if [ $pmtu -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what"
+ else
+ echo "PASS: flow offload for ns1/ns2 with masquerade $what"
+ fi
+
test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666
lret=$?
+ if [ $pmtu -eq 1 ] ;then
+ check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what"
+ elif [ $lret -eq 0 ] ; then
+ echo "PASS: flow offload for ns1/ns2 with dnat $what"
+ fi
fi
return $lret
}
-make_file "$ns1in" "ns1"
-make_file "$ns2in" "ns2"
+make_file "$nsin"
# First test:
# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed.
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
+# Due to MTU mismatch in both directions, all packets (except small packets like pure
+# acks) have to be handled by normal forwarding path. Therefore, packet counters
+# are not checked.
+if test_tcp_forwarding $ns1 $ns2; then
echo "PASS: flow offloaded for ns1/ns2"
else
echo "FAIL: flow offload for ns1/ns2:" 1>&2
- ip netns exec nsr1 nft list ruleset
+ ip netns exec $nsr1 nft list ruleset
ret=1
fi
# delete default route, i.e. ns2 won't be able to reach ns1 and
# will depend on ns1 being masqueraded in nsr1.
# expect ns1 has nsr1 address.
-ip -net ns2 route del default via 10.0.2.1
-ip -net ns2 route del default via dead:2::1
-ip -net ns2 route add 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route del default via 10.0.2.1
+ip -net $ns2 route del default via dead:2::1
+ip -net $ns2 route add 192.168.10.1 via 10.0.2.1
# Second test:
-# Same, but with NAT enabled.
-ip netns exec nsr1 nft -f - <<EOF
+# Same, but with NAT enabled. Same as in first test: we expect normal forward path
+# to handle most packets.
+ip netns exec $nsr1 nft -f - <<EOF
table ip nat {
chain prerouting {
type nat hook prerouting priority 0; policy accept;
@@ -319,39 +513,117 @@ table ip nat {
}
EOF
-test_tcp_forwarding_nat ns1 ns2
+if ! test_tcp_forwarding_set_dscp $ns1 $ns2 0 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2
+ exit 0
+fi
-if [ $? -eq 0 ] ;then
- echo "PASS: flow offloaded for ns1/ns2 with NAT"
-else
+if ! test_tcp_forwarding_nat $ns1 $ns2 0 ""; then
echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2
- ip netns exec nsr1 nft list ruleset
+ ip netns exec $nsr1 nft list ruleset
ret=1
fi
# Third test:
-# Same as second test, but with PMTU discovery enabled.
-handle=$(ip netns exec nsr1 nft -a list table inet filter | grep something-to-grep-for | cut -d \# -f 2)
+# Same as second test, but with PMTU discovery enabled. This
+# means that we expect the fastpath to handle packets as soon
+# as the endpoints adjust the packet size.
+ip netns exec $ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+ip netns exec $ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
+
+# reset counters.
+# With pmtu in-place we'll also check that nft counters
+# are lower than file size and packets were forwarded via flowtable layer.
+# For earlier tests (large mtus), packets cannot be handled via flowtable
+# (except pure acks and other small packets).
+ip netns exec $nsr1 nft reset counters table inet filter >/dev/null
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 ""; then
+ echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+fi
-ip netns exec nsr1 nft delete rule inet filter forward $handle
-if [ $? -ne 0 ] ;then
- echo "FAIL: Could not delete large-packet accept rule"
- exit 1
+# Another test:
+# Add bridge interface br0 to Router1, with NAT enabled.
+ip -net $nsr1 link add name br0 type bridge
+ip -net $nsr1 addr flush dev veth0
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set veth0 master br0
+ip -net $nsr1 addr add 10.0.1.1/24 dev br0
+ip -net $nsr1 addr add dead:1::1/64 dev br0
+ip -net $nsr1 link set up dev br0
+
+ip netns exec $nsr1 sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null
+
+# br0 with NAT enabled.
+ip netns exec $nsr1 nft -f - <<EOF
+flush table ip nat
+table ip nat {
+ chain prerouting {
+ type nat hook prerouting priority 0; policy accept;
+ meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345
+ }
+
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oifname "veth1" counter masquerade
+ }
+}
+EOF
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "on bridge"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+ ret=1
fi
-ip netns exec ns1 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-ip netns exec ns2 sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null
-test_tcp_forwarding_nat ns1 ns2
-if [ $? -eq 0 ] ;then
- echo "PASS: flow offloaded for ns1/ns2 with NAT and pmtu discovery"
-else
- echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2
- ip netns exec nsr1 nft list ruleset
+# Another test:
+# Add bridge interface br0 to Router1, with NAT and VLAN.
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set down dev veth0
+ip -net $nsr1 link add link veth0 name veth0.10 type vlan id 10
+ip -net $nsr1 link set up dev veth0
+ip -net $nsr1 link set up dev veth0.10
+ip -net $nsr1 link set veth0.10 master br0
+
+ip -net $ns1 addr flush dev eth0
+ip -net $ns1 link add link eth0 name eth0.10 type vlan id 10
+ip -net $ns1 link set eth0 up
+ip -net $ns1 link set eth0.10 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0.10
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0.10
+
+if ! test_tcp_forwarding_nat $ns1 $ns2 1 "bridge and VLAN"; then
+ echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2
+ ip netns exec $nsr1 nft list ruleset
+ ret=1
fi
-KEY_SHA="0x"$(ps -xaf | sha1sum | cut -d " " -f 1)
-KEY_AES="0x"$(ps -xaf | md5sum | cut -d " " -f 1)
+# restore test topology (remove bridge and VLAN)
+ip -net $nsr1 link set veth0 nomaster
+ip -net $nsr1 link set veth0 down
+ip -net $nsr1 link set veth0.10 down
+ip -net $nsr1 link delete veth0.10 type vlan
+ip -net $nsr1 link delete br0 type bridge
+ip -net $ns1 addr flush dev eth0.10
+ip -net $ns1 link set eth0.10 down
+ip -net $ns1 link set eth0 down
+ip -net $ns1 link delete eth0.10 type vlan
+
+# restore address in ns1 and nsr1
+ip -net $ns1 link set eth0 up
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns1 addr add dead:1::99/64 dev eth0
+ip -net $ns1 route add default via dead:1::1
+ip -net $nsr1 addr add 10.0.1.1/24 dev veth0
+ip -net $nsr1 addr add dead:1::1/64 dev veth0
+ip -net $nsr1 link set up dev veth0
+
+KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1)
+KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1)
SPI1=$RANDOM
SPI2=$RANDOM
@@ -378,24 +650,23 @@ do_esp() {
}
-do_esp nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
+do_esp $nsr1 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2
-do_esp nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
+do_esp $nsr2 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1
-ip netns exec nsr1 nft delete table ip nat
+ip netns exec $nsr1 nft delete table ip nat
# restore default routes
-ip -net ns2 route del 192.168.10.1 via 10.0.2.1
-ip -net ns2 route add default via 10.0.2.1
-ip -net ns2 route add default via dead:2::1
+ip -net $ns2 route del 192.168.10.1 via 10.0.2.1
+ip -net $ns2 route add default via 10.0.2.1
+ip -net $ns2 route add default via dead:2::1
-test_tcp_forwarding ns1 ns2
-if [ $? -eq 0 ] ;then
- echo "PASS: ipsec tunnel mode for ns1/ns2"
+if test_tcp_forwarding $ns1 $ns2; then
+ check_counters "ipsec tunnel mode for ns1/ns2"
else
echo "FAIL: ipsec tunnel mode for ns1/ns2"
- ip netns exec nsr1 nft list ruleset 1>&2
- ip netns exec nsr1 cat /proc/net/xfrm_stat 1>&2
+ ip netns exec $nsr1 nft list ruleset 1>&2
+ ip netns exec $nsr1 cat /proc/net/xfrm_stat 1>&2
fi
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_meta.sh b/tools/testing/selftests/netfilter/nft_meta.sh
new file mode 100755
index 000000000000..f33154c04d34
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_meta.sh
@@ -0,0 +1,142 @@
+#!/bin/bash
+
+# check iif/iifname/oifgroup/iiftype match.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+sfx=$(mktemp -u "XXXXXXXX")
+ns0="ns0-$sfx"
+
+if ! nft --version > /dev/null 2>&1; then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+cleanup()
+{
+ ip netns del "$ns0"
+}
+
+ip netns add "$ns0"
+ip -net "$ns0" link set lo up
+ip -net "$ns0" addr add 127.0.0.1 dev lo
+
+trap cleanup EXIT
+
+currentyear=$(date +%Y)
+lastyear=$((currentyear-1))
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table inet filter {
+ counter iifcount {}
+ counter iifnamecount {}
+ counter iifgroupcount {}
+ counter iiftypecount {}
+ counter infproto4count {}
+ counter il4protocounter {}
+ counter imarkcounter {}
+ counter icpu0counter {}
+ counter ilastyearcounter {}
+ counter icurrentyearcounter {}
+
+ counter oifcount {}
+ counter oifnamecount {}
+ counter oifgroupcount {}
+ counter oiftypecount {}
+ counter onfproto4count {}
+ counter ol4protocounter {}
+ counter oskuidcounter {}
+ counter oskgidcounter {}
+ counter omarkcounter {}
+
+ chain input {
+ type filter hook input priority 0; policy accept;
+
+ meta iif lo counter name "iifcount"
+ meta iifname "lo" counter name "iifnamecount"
+ meta iifgroup "default" counter name "iifgroupcount"
+ meta iiftype "loopback" counter name "iiftypecount"
+ meta nfproto ipv4 counter name "infproto4count"
+ meta l4proto icmp counter name "il4protocounter"
+ meta mark 42 counter name "imarkcounter"
+ meta cpu 0 counter name "icpu0counter"
+ meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter
+ meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter
+ }
+
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oif lo counter name "oifcount" counter
+ meta oifname "lo" counter name "oifnamecount"
+ meta oifgroup "default" counter name "oifgroupcount"
+ meta oiftype "loopback" counter name "oiftypecount"
+ meta nfproto ipv4 counter name "onfproto4count"
+ meta l4proto icmp counter name "ol4protocounter"
+ meta skuid 0 counter name "oskuidcounter"
+ meta skgid 0 counter name "oskgidcounter"
+ meta mark 42 counter name "omarkcounter"
+ }
+}
+EOF
+
+if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add test ruleset"
+ exit $ksft_skip
+fi
+
+ret=0
+
+check_one_counter()
+{
+ local cname="$1"
+ local want="packets $2"
+ local verbose="$3"
+
+ if ! ip netns exec "$ns0" nft list counter inet filter $cname | grep -q "$want"; then
+ echo "FAIL: $cname, want \"$want\", got"
+ ret=1
+ ip netns exec "$ns0" nft list counter inet filter $cname
+ fi
+}
+
+check_lo_counters()
+{
+ local want="$1"
+ local verbose="$2"
+ local counter
+
+ for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \
+ oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \
+ il4protocounter icurrentyearcounter ol4protocounter \
+ ; do
+ check_one_counter "$counter" "$want" "$verbose"
+ done
+}
+
+check_lo_counters "0" false
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null
+
+check_lo_counters "2" true
+
+check_one_counter oskuidcounter "1" true
+check_one_counter oskgidcounter "1" true
+check_one_counter imarkcounter "1" true
+check_one_counter omarkcounter "1" true
+check_one_counter ilastyearcounter "0" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta iif/oif counters at expected values"
+else
+ exit $ret
+fi
+
+#First CPU execution and counter
+taskset -p 01 $$ > /dev/null
+ip netns exec "$ns0" nft reset counters > /dev/null
+ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null
+check_one_counter icpu0counter "2" true
+
+if [ $ret -eq 0 ];then
+ echo "OK: nftables meta cpu counter at expected values"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_nat.sh b/tools/testing/selftests/netfilter/nft_nat.sh
index d7e07f4c3d7f..dd40d9f6f259 100755
--- a/tools/testing/selftests/netfilter/nft_nat.sh
+++ b/tools/testing/selftests/netfilter/nft_nat.sh
@@ -374,6 +374,47 @@ EOF
return $lret
}
+test_local_dnat_portonly()
+{
+ local family=$1
+ local daddr=$2
+ local lret=0
+ local sr_s
+ local sr_r
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain output {
+ type nat hook output priority 0; policy accept;
+ meta l4proto tcp dnat to :2000
+
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ if [ $family = "inet" ];then
+ echo "SKIP: inet port test"
+ test_inet_nat=false
+ return
+ fi
+ echo "SKIP: Could not add $family dnat hook"
+ return
+ fi
+
+ echo SERVER-$family | ip netns exec "$ns1" timeout 5 socat -u STDIN TCP-LISTEN:2000 &
+ sc_s=$!
+
+ sleep 1
+
+ result=$(ip netns exec "$ns0" timeout 1 socat TCP:$daddr:2000 STDOUT)
+
+ if [ "$result" = "SERVER-inet" ];then
+ echo "PASS: inet port rewrite without l3 address"
+ else
+ echo "ERROR: inet port rewrite"
+ ret=1
+ fi
+}
test_masquerade6()
{
@@ -741,6 +782,300 @@ EOF
return $lret
}
+# test port shadowing.
+# create two listening services, one on router (ns0), one
+# on client (ns2), which is masqueraded from ns1 point of view.
+# ns2 sends udp packet coming from service port to ns1, on a highport.
+# Later, if n1 uses same highport to connect to ns0:service, packet
+# might be port-forwarded to ns2 instead.
+
+# second argument tells if we expect the 'fake-entry' to take effect
+# (CLIENT) or not (ROUTER).
+test_port_shadow()
+{
+ local test=$1
+ local expect=$2
+ local daddrc="10.0.1.99"
+ local daddrs="10.0.1.1"
+ local result=""
+ local logmsg=""
+
+ # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405.
+ echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405
+
+ echo ROUTER | ip netns exec "$ns0" timeout 5 socat -u STDIN UDP4-LISTEN:1405 &
+ sc_r=$!
+
+ echo CLIENT | ip netns exec "$ns2" timeout 5 socat -u STDIN UDP4-LISTEN:1405,reuseport &
+ sc_c=$!
+
+ sleep 0.3
+
+ # ns1 tries to connect to ns0:1405. With default settings this should connect
+ # to client, it matches the conntrack entry created above.
+
+ result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404)
+
+ if [ "$result" = "$expect" ] ;then
+ echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}"
+ else
+ echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended"
+ ret=1
+ fi
+
+ kill $sc_r $sc_c 2>/dev/null
+
+ # flush udp entries for next test round, if any
+ ip netns exec "$ns0" conntrack -F >/dev/null 2>&1
+}
+
+# This prevents port shadow of router service via packet filter,
+# packets claiming to originate from service port from internal
+# network are dropped.
+test_port_shadow_filter()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family filter {
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+ meta iif veth1 udp sport 1405 drop
+ }
+}
+EOF
+ test_port_shadow "port-filter" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family filter
+}
+
+# This prevents port shadow of router service via notrack.
+test_port_shadow_notrack()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family raw {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 udp dport 1405 notrack
+ }
+ chain output {
+ type filter hook output priority -300; policy accept;
+ meta oif veth0 udp sport 1405 notrack
+ }
+}
+EOF
+ test_port_shadow "port-notrack" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family raw
+}
+
+# This prevents port shadow of router service via sport remap.
+test_port_shadow_pat()
+{
+ local family=$1
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family pat {
+ chain postrouting {
+ type nat hook postrouting priority -1; policy accept;
+ meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random
+ }
+}
+EOF
+ test_port_shadow "pat" "ROUTER"
+
+ ip netns exec "$ns0" nft delete table $family pat
+}
+
+test_port_shadowing()
+{
+ local family="ip"
+
+ conntrack -h >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without conntrack tool"
+ return
+ fi
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run nat port shadowing test without socat tool"
+ return
+ fi
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table $family nat {
+ chain postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ meta oif veth0 masquerade
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add add $family masquerade hook"
+ return $ksft_skip
+ fi
+
+ # test default behaviour. Packet from ns1 to ns0 is redirected to ns2.
+ test_port_shadow "default" "CLIENT"
+
+ # test packet filter based mitigation: prevent forwarding of
+ # packets claiming to come from the service port.
+ test_port_shadow_filter "$family"
+
+ # test conntrack based mitigation: connections going or coming
+ # from router:service bypass connection tracking.
+ test_port_shadow_notrack "$family"
+
+ # test nat based mitigation: fowarded packets coming from service port
+ # are masqueraded with random highport.
+ test_port_shadow_pat "$family"
+
+ ip netns exec "$ns0" nft delete table $family nat
+}
+
+test_stateless_nat_ip()
+{
+ local lret=0
+
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null
+ ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules"
+ return 1
+ fi
+
+ip netns exec "$ns0" nft -f /dev/stdin <<EOF
+table ip stateless {
+ map xlate_in {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2,
+ }
+ }
+ map xlate_out {
+ typeof meta iifname . ip saddr . ip daddr : ip daddr
+ elements = {
+ "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99
+ }
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in
+ ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out
+ }
+}
+EOF
+ if [ $? -ne 0 ]; then
+ echo "SKIP: Could not add ip statless rules"
+ return $ksft_skip
+ fi
+
+ reset_counters
+
+ ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null # ping ns2->ns1
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules"
+ lret=1
+ fi
+
+ # ns1 should have seen packets from .2.2, due to stateless rewrite.
+ expect="packets 1 bytes 84"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 1"
+ lret=1
+ fi
+
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2"
+ lret=1
+ fi
+ done
+
+ # ns1 should not have seen packets from ns2, due to masquerade
+ expect="packets 0 bytes 0"
+ for dir in "in" "out" ; do
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3"
+ lret=1
+ fi
+
+ cnt=$(ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4"
+ lret=1
+ fi
+ done
+
+ reset_counters
+
+ socat -h > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not run stateless nat frag test without socat tool"
+ if [ $lret -eq 0 ]; then
+ return $ksft_skip
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ return $lret
+ fi
+
+ local tmpfile=$(mktemp)
+ dd if=/dev/urandom of=$tmpfile bs=4096 count=1 2>/dev/null
+
+ local outfile=$(mktemp)
+ ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:$outfile < /dev/null &
+ sc_r=$!
+
+ sleep 1
+ # re-do with large ping -> ip fragmentation
+ ip netns exec "$ns2" timeout 3 socat - UDP4-SENDTO:"10.0.1.99:4233" < "$tmpfile" > /dev/null
+ if [ $? -ne 0 ] ; then
+ echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2
+ lret=1
+ fi
+
+ wait
+
+ cmp "$tmpfile" "$outfile"
+ if [ $? -ne 0 ]; then
+ ls -l "$tmpfile" "$outfile"
+ echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2
+ lret=1
+ fi
+
+ rm -f "$tmpfile" "$outfile"
+
+ # ns1 should have seen packets from 2.2, due to stateless rewrite.
+ expect="packets 3 bytes 4164"
+ cnt=$(ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect")
+ if [ $? -ne 0 ]; then
+ bad_counter "$ns1" ns0insl "$expect" "test_stateless 5"
+ lret=1
+ fi
+
+ ip netns exec "$ns0" nft delete table ip stateless
+ if [ $? -ne 0 ]; then
+ echo "ERROR: Could not delete table ip stateless" 1>&2
+ lret=1
+ fi
+
+ test $lret -eq 0 && echo "PASS: IP statless for $ns2"
+
+ return $lret
+}
# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99
for i in 0 1 2; do
@@ -808,6 +1143,19 @@ table inet filter {
EOF
done
+# special case for stateless nat check, counter needs to
+# be done before (input) ip defragmentation
+ip netns exec ns1-$sfx nft -f /dev/stdin <<EOF
+table inet filter {
+ counter ns0insl {}
+
+ chain pre {
+ type filter hook prerouting priority -400; policy accept;
+ ip saddr 10.0.2.2 counter name "ns0insl"
+ }
+}
+EOF
+
sleep 3
# test basic connectivity
for i in 1 2; do
@@ -841,6 +1189,10 @@ fi
reset_counters
test_local_dnat ip
test_local_dnat6 ip6
+
+reset_counters
+test_local_dnat_portonly inet 10.0.1.99
+
reset_counters
$test_inet_nat && test_local_dnat inet
$test_inet_nat && test_local_dnat6 inet
@@ -861,6 +1213,9 @@ reset_counters
$test_inet_nat && test_redirect inet
$test_inet_nat && test_redirect6 inet
+test_port_shadowing
+test_stateless_nat_ip
+
if [ $ret -ne 0 ];then
echo -n "FAIL: "
nft --version
diff --git a/tools/testing/selftests/netfilter/nft_nat_zones.sh b/tools/testing/selftests/netfilter/nft_nat_zones.sh
new file mode 100755
index 000000000000..b9ab37380f33
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_nat_zones.sh
@@ -0,0 +1,309 @@
+#!/bin/bash
+#
+# Test connection tracking zone and NAT source port reallocation support.
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+# Don't increase too much, 2000 clients should work
+# just fine but script can then take several minutes with
+# KASAN/debug builds.
+maxclients=100
+
+have_iperf=1
+ret=0
+
+# client1---.
+# veth1-.
+# |
+# NAT Gateway --veth0--> Server
+# | |
+# veth2-' |
+# client2---' |
+# .... |
+# clientX----vethX---'
+
+# All clients share identical IP address.
+# NAT Gateway uses policy routing and conntrack zones to isolate client
+# namespaces. Each client connects to Server, each with colliding tuples:
+# clientsaddr:10000 -> serveraddr:dport
+# NAT Gateway is supposed to do port reallocation for each of the
+# connections.
+
+sfx=$(mktemp -u "XXXXXXXX")
+gw="ns-gw-$sfx"
+cl1="ns-cl1-$sfx"
+cl2="ns-cl2-$sfx"
+srv="ns-srv-$sfx"
+
+v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null)
+v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null)
+v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null)
+v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null)
+v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null)
+v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null)
+
+cleanup()
+{
+ ip netns del $gw
+ ip netns del $srv
+ for i in $(seq 1 $maxclients); do
+ ip netns del ns-cl$i-$sfx 2>/dev/null
+ done
+
+ sysctl -q net.ipv4.neigh.default.gc_thresh1=$v4gc1 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh2=$v4gc2 2>/dev/null
+ sysctl -q net.ipv4.neigh.default.gc_thresh3=$v4gc3 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh1=$v6gc1 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh2=$v6gc2 2>/dev/null
+ sysctl -q net.ipv6.neigh.default.gc_thresh3=$v6gc3 2>/dev/null
+}
+
+nft --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without nft tool"
+ exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without ip tool"
+ exit $ksft_skip
+fi
+
+conntrack -V > /dev/null 2>&1
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not run test without conntrack tool"
+ exit $ksft_skip
+fi
+
+iperf3 -v >/dev/null 2>&1
+if [ $? -ne 0 ];then
+ have_iperf=0
+fi
+
+ip netns add "$gw"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create net namespace $gw"
+ exit $ksft_skip
+fi
+ip -net "$gw" link set lo up
+
+trap cleanup EXIT
+
+ip netns add "$srv"
+if [ $? -ne 0 ];then
+ echo "SKIP: Could not create server netns $srv"
+ exit $ksft_skip
+fi
+
+ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv"
+ip -net "$gw" link set veth0 up
+ip -net "$srv" link set lo up
+ip -net "$srv" link set eth0 up
+
+sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null
+sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+
+ ip netns add "$cl"
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not create client netns $cl"
+ exit $ksft_skip
+ fi
+ ip link add veth$i netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1
+ if [ $? -ne 0 ];then
+ echo "SKIP: No virtual ethernet pair device support in kernel"
+ exit $ksft_skip
+ fi
+done
+
+for i in $(seq 1 $maxclients);do
+ cl="ns-cl$i-$sfx"
+ echo netns exec "$cl" ip link set lo up
+ echo netns exec "$cl" ip link set eth0 up
+ echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2
+ echo netns exec "$gw" ip link set veth$i up
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.arp_ignore=2
+ echo netns exec "$gw" sysctl -q net.ipv4.conf.veth$i.rp_filter=0
+
+ # clients have same IP addresses.
+ echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0
+ echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0
+ echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0
+ echo netns exec "$cl" ip route add default via dead:1::2 dev eth0
+
+ # NB: same addresses on client-facing interfaces.
+ echo netns exec "$gw" ip addr add 10.1.0.2/24 dev veth$i
+ echo netns exec "$gw" ip addr add dead:1::2/64 dev veth$i
+
+ # gw: policy routing
+ echo netns exec "$gw" ip route add 10.1.0.0/24 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add dead:1::0/64 dev veth$i table $((1000+i))
+ echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i))
+ echo netns exec "$gw" ip rule add fwmark $i lookup $((1000+i))
+done | ip -batch /dev/stdin
+
+ip -net "$gw" addr add 10.3.0.1/24 dev veth0
+ip -net "$gw" addr add dead:3::1/64 dev veth0
+
+ip -net "$srv" addr add 10.3.0.99/24 dev eth0
+ip -net "$srv" addr add dead:3::99/64 dev eth0
+
+ip netns exec $gw nft -f /dev/stdin<<EOF
+table inet raw {
+ map iiftomark {
+ type ifname : mark
+ }
+
+ map iiftozone {
+ typeof iifname : ct zone
+ }
+
+ set inicmp {
+ flags dynamic
+ type ipv4_addr . ifname . ipv4_addr
+ }
+ set inflows {
+ flags dynamic
+ type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service
+ }
+
+ set inflows6 {
+ flags dynamic
+ type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service
+ }
+
+ chain prerouting {
+ type filter hook prerouting priority -64000; policy accept;
+ ct original zone set meta iifname map @iiftozone
+ meta mark set meta iifname map @iiftomark
+
+ tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter }
+ add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter }
+ ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter }
+ }
+
+ chain nat_postrouting {
+ type nat hook postrouting priority 0; policy accept;
+ ct mark set meta mark meta oifname veth0 masquerade
+ }
+
+ chain mangle_prerouting {
+ type filter hook prerouting priority -100; policy accept;
+ ct direction reply meta mark set ct mark
+ }
+}
+EOF
+
+( echo add element inet raw iiftomark \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+ echo add element inet raw iiftozone \{
+ for i in $(seq 1 $((maxclients-1))); do
+ echo \"veth$i\" : $i,
+ done
+ echo \"veth$maxclients\" : $maxclients \}
+) | ip netns exec $gw nft -f /dev/stdin
+
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null
+
+# useful for debugging: allows to use 'ping' from clients to gateway.
+ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null
+ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null
+
+for i in $(seq 1 $maxclients); do
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 &
+ if [ $? -ne 0 ]; then
+ echo FAIL: Ping failure from $cl 1>&2
+ ret=1
+ break
+ fi
+done
+
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth$i" 1>&2
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2
+ break
+ fi
+done
+
+ip netns exec $gw nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * $maxclients)) bytes $((252 * $maxclients)) }"
+ ip netns exec $gw nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2
+fi
+
+if [ $ret -eq 0 ]; then
+ echo "PASS: ping test from all $maxclients namespaces"
+fi
+
+if [ $have_iperf -eq 0 ];then
+ echo "SKIP: iperf3 not installed"
+ if [ $ret -ne 0 ];then
+ exit $ret
+ fi
+ exit $ksft_skip
+fi
+
+ip netns exec $srv iperf3 -s > /dev/null 2>&1 &
+iperfpid=$!
+sleep 1
+
+for i in $(seq 1 $maxclients); do
+ if [ $ret -ne 0 ]; then
+ break
+ fi
+ cl="ns-cl$i-$sfx"
+ ip netns exec $cl iperf3 -c 10.3.0.99 --cport 10000 -n 1 > /dev/null
+ if [ $? -ne 0 ]; then
+ echo FAIL: Failure to connect for $cl 1>&2
+ ip netns exec $gw conntrack -S 1>&2
+ ret=1
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: iperf3 connections for all $maxclients net namespaces"
+fi
+
+kill $iperfpid
+wait
+
+for i in $(seq 1 $maxclients); do
+ ip netns exec $gw nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null
+ if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: can't find expected tcp entry for veth$i" 1>&2
+ break
+ fi
+done
+if [ $ret -eq 0 ];then
+ echo "PASS: Found client connection for all $maxclients net namespaces"
+fi
+
+ip netns exec $gw nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null
+if [ $? -ne 0 ];then
+ ret=1
+ echo "FAIL: cannot find return entry on veth0" 1>&2
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_queue.sh b/tools/testing/selftests/netfilter/nft_queue.sh
index 6898448b4266..e12729753351 100755
--- a/tools/testing/selftests/netfilter/nft_queue.sh
+++ b/tools/testing/selftests/netfilter/nft_queue.sh
@@ -12,14 +12,20 @@ sfx=$(mktemp -u "XXXXXXXX")
ns1="ns1-$sfx"
ns2="ns2-$sfx"
nsrouter="nsrouter-$sfx"
+timeout=4
cleanup()
{
+ ip netns pids ${ns1} | xargs kill 2>/dev/null
+ ip netns pids ${ns2} | xargs kill 2>/dev/null
+ ip netns pids ${nsrouter} | xargs kill 2>/dev/null
+
ip netns del ${ns1}
ip netns del ${ns2}
ip netns del ${nsrouter}
rm -f "$TMPFILE0"
rm -f "$TMPFILE1"
+ rm -f "$TMPFILE2" "$TMPFILE3"
}
nft --version > /dev/null 2>&1
@@ -42,6 +48,8 @@ fi
TMPFILE0=$(mktemp)
TMPFILE1=$(mktemp)
+TMPFILE2=$(mktemp)
+TMPFILE3=$(mktemp)
trap cleanup EXIT
ip netns add ${ns1}
@@ -83,7 +91,7 @@ load_ruleset() {
local name=$1
local prio=$2
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet $name {
chain nfq {
ip protocol icmp queue bypass
@@ -105,6 +113,7 @@ table inet $name {
chain output {
type filter hook output priority $prio; policy accept;
tcp dport 12345 queue num 3
+ tcp sport 23456 queue num 3
jump nfq
}
chain post {
@@ -118,7 +127,7 @@ EOF
load_counter_ruleset() {
local prio=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table inet countrules {
chain pre {
type filter hook prerouting priority $prio; policy accept;
@@ -175,7 +184,7 @@ test_ping_router() {
test_queue_blackhole() {
local proto=$1
-ip netns exec ${nsrouter} nft -f - <<EOF
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
table $proto blackh {
chain forward {
type filter hook forward priority 0; policy accept;
@@ -184,10 +193,10 @@ table $proto blackh {
}
EOF
if [ $proto = "ip" ] ;then
- ip netns exec ${ns1} ping -c 1 -q 10.0.2.99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q 10.0.2.99 > /dev/null
lret=$?
elif [ $proto = "ip6" ]; then
- ip netns exec ${ns1} ping -c 1 -q dead:2::99 > /dev/null
+ ip netns exec ${ns1} ping -W 2 -c 1 -q dead:2::99 > /dev/null
lret=$?
else
lret=111
@@ -214,8 +223,8 @@ test_queue()
local last=""
# spawn nf-queue listeners
- ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t 3 > "$TMPFILE0" &
- ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t 3 > "$TMPFILE1" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 0 -t $timeout > "$TMPFILE0" &
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE1" &
sleep 1
test_ping
ret=$?
@@ -250,11 +259,11 @@ test_queue()
test_tcp_forward()
{
- ip netns exec ${nsrouter} ./nf-queue -q 2 -t 10 &
+ ip netns exec ${nsrouter} ./nf-queue -q 2 -t $timeout &
local nfqpid=$!
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=100 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${ns2} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
@@ -270,15 +279,13 @@ test_tcp_forward()
test_tcp_localhost()
{
- tc -net "${nsrouter}" qdisc add dev lo root netem loss random 1%
-
tmpfile=$(mktemp) || exit 1
- dd conv=sparse status=none if=/dev/zero bs=1M count=900 of=$tmpfile
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
local rpid=$!
- ip netns exec ${nsrouter} ./nf-queue -q 3 -t 30 &
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
local nfqpid=$!
sleep 1
@@ -287,6 +294,113 @@ test_tcp_localhost()
wait $rpid
[ $? -eq 0 ] && echo "PASS: tcp via loopback"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_connectclose()
+{
+ tmpfile=$(mktemp) || exit 1
+
+ ip netns exec ${nsrouter} ./connect_close -p 23456 -t $timeout &
+
+ ip netns exec ${nsrouter} ./nf-queue -q 3 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ rm -f "$tmpfile"
+
+ wait $rpid
+ [ $? -eq 0 ] && echo "PASS: tcp via loopback with connect/close"
+ wait 2>/dev/null
+}
+
+test_tcp_localhost_requeue()
+{
+ip netns exec ${nsrouter} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0
+ }
+}
+EOF
+ tmpfile=$(mktemp) || exit 1
+ dd conv=sparse status=none if=/dev/zero bs=1M count=200 of=$tmpfile
+ ip netns exec ${nsrouter} nc -w 5 -l -p 12345 <"$tmpfile" >/dev/null &
+ local rpid=$!
+
+ ip netns exec ${nsrouter} ./nf-queue -c -q 1 -t $timeout > "$TMPFILE2" &
+
+ # nfqueue 1 will be called via output hook. But this time,
+ # re-queue the packet to nfqueue program on queue 2.
+ ip netns exec ${nsrouter} ./nf-queue -G -d 150 -c -q 0 -Q 1 -t $timeout > "$TMPFILE3" &
+
+ sleep 1
+ ip netns exec ${nsrouter} nc -w 5 127.0.0.1 12345 <"$tmpfile" > /dev/null
+ rm -f "$tmpfile"
+
+ wait
+
+ if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then
+ echo "FAIL: lost packets during requeue?!" 1>&2
+ return
+ fi
+
+ echo "PASS: tcp via loopback and re-queueing"
+}
+
+test_icmp_vrf() {
+ ip -net $ns1 link add tvrf type vrf table 9876
+ if [ $? -ne 0 ];then
+ echo "SKIP: Could not add vrf device"
+ return
+ fi
+
+ ip -net $ns1 li set eth0 master tvrf
+ ip -net $ns1 li set tvrf up
+
+ ip -net $ns1 route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876
+ip netns exec ${ns1} nft -f /dev/stdin <<EOF
+flush ruleset
+table inet filter {
+ chain output {
+ type filter hook output priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+ chain post {
+ type filter hook postrouting priority 0; policy accept;
+ meta oifname "tvrf" icmp type echo-request counter queue num 1
+ meta oifname "eth0" icmp type echo-request counter queue num 1
+ }
+}
+EOF
+ ip netns exec ${ns1} ./nf-queue -q 1 -t $timeout &
+ local nfqpid=$!
+
+ sleep 1
+ ip netns exec ${ns1} ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null
+
+ for n in output post; do
+ for d in tvrf eth0; do
+ ip netns exec ${ns1} nft list chain inet filter $n | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"
+ if [ $? -ne 0 ] ; then
+ echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2
+ ip netns exec ${ns1} nft list ruleset
+ ret=1
+ return
+ fi
+ done
+ done
+
+ wait $nfqpid
+ [ $? -eq 0 ] && echo "PASS: icmp+nfqueue via vrf"
+ wait 2>/dev/null
}
ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null
@@ -328,5 +442,8 @@ test_queue 20
test_tcp_forward
test_tcp_localhost
+test_tcp_localhost_connectclose
+test_tcp_localhost_requeue
+test_icmp_vrf
exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_synproxy.sh b/tools/testing/selftests/netfilter/nft_synproxy.sh
new file mode 100755
index 000000000000..b62933b680d6
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_synproxy.sh
@@ -0,0 +1,117 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+rnd=$(mktemp -u XXXXXXXX)
+nsr="nsr-$rnd" # synproxy machine
+ns1="ns1-$rnd" # iperf client
+ns2="ns2-$rnd" # iperf server
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "iperf3 --version" "run test without iperf3"
+checktool "ip netns add $nsr" "create net namespace"
+
+modprobe -q nf_conntrack
+
+ip netns add $ns1
+ip netns add $ns2
+
+cleanup() {
+ ip netns pids $ns1 | xargs kill 2>/dev/null
+ ip netns pids $ns2 | xargs kill 2>/dev/null
+ ip netns del $ns1
+ ip netns del $ns2
+
+ ip netns del $nsr
+}
+
+trap cleanup EXIT
+
+ip link add veth0 netns $nsr type veth peer name eth0 netns $ns1
+ip link add veth1 netns $nsr type veth peer name eth0 netns $ns2
+
+for dev in lo veth0 veth1; do
+ip -net $nsr link set $dev up
+done
+
+ip -net $nsr addr add 10.0.1.1/24 dev veth0
+ip -net $nsr addr add 10.0.2.1/24 dev veth1
+
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth0.forwarding=1
+ip netns exec $nsr sysctl -q net.ipv4.conf.veth1.forwarding=1
+ip netns exec $nsr sysctl -q net.netfilter.nf_conntrack_tcp_loose=0
+
+for n in $ns1 $ns2; do
+ ip -net $n link set lo up
+ ip -net $n link set eth0 up
+done
+ip -net $ns1 addr add 10.0.1.99/24 dev eth0
+ip -net $ns2 addr add 10.0.2.99/24 dev eth0
+ip -net $ns1 route add default via 10.0.1.1
+ip -net $ns2 route add default via 10.0.2.1
+
+# test basic connectivity
+if ! ip netns exec $ns1 ping -c 1 -q 10.0.2.99 > /dev/null; then
+ echo "ERROR: $ns1 cannot reach $ns2" 1>&2
+ exit 1
+fi
+
+if ! ip netns exec $ns2 ping -c 1 -q 10.0.1.99 > /dev/null; then
+ echo "ERROR: $ns2 cannot reach $ns1" 1>&2
+ exit 1
+fi
+
+ip netns exec $ns2 iperf3 -s > /dev/null 2>&1 &
+# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 &
+
+sleep 1
+
+ip netns exec $nsr nft -f - <<EOF
+table inet filter {
+ chain prerouting {
+ type filter hook prerouting priority -300; policy accept;
+ meta iif veth0 tcp flags syn counter notrack
+ }
+
+ chain forward {
+ type filter hook forward priority 0; policy accept;
+
+ ct state new,established counter accept
+
+ meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp
+
+ ct state invalid counter drop
+
+ # make ns2 unreachable w.o. tcp synproxy
+ tcp flags syn counter drop
+ }
+}
+EOF
+if [ $? -ne 0 ]; then
+ echo "SKIP: Cannot add nft synproxy"
+ exit $ksft_skip
+fi
+
+ip netns exec $ns1 timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null
+
+if [ $? -ne 0 ]; then
+ echo "FAIL: iperf3 returned an error" 1>&2
+ ret=$?
+ ip netns exec $nsr nft list ruleset
+else
+ echo "PASS: synproxy connection successful"
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/nft_trans_stress.sh b/tools/testing/selftests/netfilter/nft_trans_stress.sh
index f1affd12c4b1..2ffba45a78bf 100755
--- a/tools/testing/selftests/netfilter/nft_trans_stress.sh
+++ b/tools/testing/selftests/netfilter/nft_trans_stress.sh
@@ -9,8 +9,35 @@
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-testns=testns1
+testns=testns-$(mktemp -u "XXXXXXXX")
+tmp=""
+
tables="foo bar baz quux"
+global_ret=0
+eret=0
+lret=0
+
+cleanup() {
+ ip netns pids "$testns" | xargs kill 2>/dev/null
+ ip netns del "$testns"
+
+ rm -f "$tmp"
+}
+
+check_result()
+{
+ local r=$1
+ local OK="PASS"
+
+ if [ $r -ne 0 ] ;then
+ OK="FAIL"
+ global_ret=$r
+ fi
+
+ echo "$OK: nft $2 test returned $r"
+
+ eret=0
+}
nft --version > /dev/null 2>&1
if [ $? -ne 0 ];then
@@ -24,6 +51,7 @@ if [ $? -ne 0 ];then
exit $ksft_skip
fi
+trap cleanup EXIT
tmp=$(mktemp)
for table in $tables; do
@@ -59,20 +87,65 @@ done)
sleep 1
+ip netns exec "$testns" nft -f "$tmp"
for i in $(seq 1 10) ; do ip netns exec "$testns" nft -f "$tmp" & done
for table in $tables;do
- randsleep=$((RANDOM%10))
+ randsleep=$((RANDOM%2))
sleep $randsleep
- ip netns exec "$testns" nft delete table inet $table 2>/dev/null
+ ip netns exec "$testns" nft delete table inet $table
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+done
+
+check_result $eret "add/delete"
+
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+done
+
+check_result $eret "reload"
+
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp"
+ echo "insert rule inet foo INPUT meta nftrace set 1"
+ echo "insert rule inet foo OUTPUT meta nftrace set 1"
+ ) | ip netns exec "$testns" nft -f /dev/stdin
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
+
+ (echo "flush ruleset"; cat "$tmp"
+ ) | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=$lret
+ fi
done
-randsleep=$((RANDOM%10))
-sleep $randsleep
+check_result $eret "add/delete with nftrace enabled"
+
+echo "insert rule inet foo INPUT meta nftrace set 1" >> $tmp
+echo "insert rule inet foo OUTPUT meta nftrace set 1" >> $tmp
-pkill -9 ping
+for i in $(seq 1 10) ; do
+ (echo "flush ruleset"; cat "$tmp") | ip netns exec "$testns" nft -f /dev/stdin
+
+ lret=$?
+ if [ $lret -ne 0 ]; then
+ eret=1
+ fi
+done
-wait
+check_result $lret "add/delete with nftrace enabled"
-rm -f "$tmp"
-ip netns del "$testns"
+exit $global_ret
diff --git a/tools/testing/selftests/netfilter/nft_zones_many.sh b/tools/testing/selftests/netfilter/nft_zones_many.sh
new file mode 100755
index 000000000000..5a8db0b48928
--- /dev/null
+++ b/tools/testing/selftests/netfilter/nft_zones_many.sh
@@ -0,0 +1,163 @@
+#!/bin/bash
+
+# Test insertion speed for packets with identical addresses/ports
+# that are all placed in distinct conntrack zones.
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns="ns-$sfx"
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+zones=2000
+have_ct_tool=0
+ret=0
+
+cleanup()
+{
+ ip netns del $ns
+}
+
+checktool (){
+ if ! $1 > /dev/null 2>&1; then
+ echo "SKIP: Could not $2"
+ exit $ksft_skip
+ fi
+}
+
+checktool "nft --version" "run test without nft tool"
+checktool "ip -Version" "run test without ip tool"
+checktool "socat -V" "run test without socat tool"
+checktool "ip netns add $ns" "create net namespace"
+
+trap cleanup EXIT
+
+conntrack -V > /dev/null 2>&1
+if [ $? -eq 0 ];then
+ have_ct_tool=1
+fi
+
+ip -net "$ns" link set lo up
+
+test_zones() {
+ local max_zones=$1
+
+ip netns exec $ns sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600
+ip netns exec $ns nft -f /dev/stdin<<EOF
+flush ruleset
+table inet raw {
+ map rndzone {
+ typeof numgen inc mod $max_zones : ct zone
+ }
+
+ chain output {
+ type filter hook output priority -64000; policy accept;
+ udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone
+ }
+}
+EOF
+ (
+ echo "add element inet raw rndzone {"
+ for i in $(seq 1 $max_zones);do
+ echo -n "$i : $i"
+ if [ $i -lt $max_zones ]; then
+ echo ","
+ else
+ echo "}"
+ fi
+ done
+ ) | ip netns exec $ns nft -f /dev/stdin
+
+ local i=0
+ local j=0
+ local outerstart=$(date +%s%3N)
+ local stop=$outerstart
+
+ while [ $i -lt $max_zones ]; do
+ local start=$(date +%s%3N)
+ i=$((i + 1000))
+ j=$((j + 1))
+ # nft rule in output places each packet in a different zone.
+ dd if=/dev/zero of=/dev/stdout bs=8k count=1000 2>/dev/null | ip netns exec "$ns" socat STDIN UDP:127.0.0.1:12345,sourceport=12345
+ if [ $? -ne 0 ] ;then
+ ret=1
+ break
+ fi
+
+ stop=$(date +%s%3N)
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)"
+ done
+
+ if [ $have_ct_tool -eq 1 ]; then
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries from packet path in $duration ms total"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries"
+ ret=1
+ fi
+ fi
+
+ if [ $ret -ne 0 ];then
+ echo "FAIL: insert $max_zones entries from packet path" 1>&2
+ fi
+}
+
+test_conntrack_tool() {
+ local max_zones=$1
+
+ ip netns exec $ns conntrack -F >/dev/null 2>/dev/null
+
+ local outerstart=$(date +%s%3N)
+ local start=$(date +%s%3N)
+ local stop=$start
+ local i=0
+ while [ $i -lt $max_zones ]; do
+ i=$((i + 1))
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1
+ if [ $? -ne 0 ];then
+ ip netns exec "$ns" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \
+ --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null
+ echo "FAIL: conntrack -I returned an error"
+ ret=1
+ break
+ fi
+
+ if [ $((i%1000)) -eq 0 ];then
+ stop=$(date +%s%3N)
+
+ local duration=$((stop-start))
+ echo "PASS: added 1000 entries in $duration ms (now $i total)"
+ start=$stop
+ fi
+ done
+
+ local count=$(ip netns exec "$ns" conntrack -C)
+ local duration=$((stop-outerstart))
+
+ if [ $count -eq $max_zones ]; then
+ echo "PASS: inserted $count entries via ctnetlink in $duration ms"
+ else
+ ip netns exec $ns conntrack -S 1>&2
+ echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)"
+ ret=1
+ fi
+}
+
+test_zones $zones
+
+if [ $have_ct_tool -eq 1 ];then
+ test_conntrack_tool $zones
+else
+ echo "SKIP: Could not run ctnetlink insertion test without conntrack tool"
+ if [ $ret -eq 0 ];then
+ exit $ksft_skip
+ fi
+fi
+
+exit $ret
diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh
new file mode 100755
index 000000000000..5289c8447a41
--- /dev/null
+++ b/tools/testing/selftests/netfilter/rpath.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+
+# search for legacy iptables (it uses the xtables extensions
+if iptables-legacy --version >/dev/null 2>&1; then
+ iptables='iptables-legacy'
+elif iptables --version >/dev/null 2>&1; then
+ iptables='iptables'
+else
+ iptables=''
+fi
+
+if ip6tables-legacy --version >/dev/null 2>&1; then
+ ip6tables='ip6tables-legacy'
+elif ip6tables --version >/dev/null 2>&1; then
+ ip6tables='ip6tables'
+else
+ ip6tables=''
+fi
+
+if nft --version >/dev/null 2>&1; then
+ nft='nft'
+else
+ nft=''
+fi
+
+if [ -z "$iptables$ip6tables$nft" ]; then
+ echo "SKIP: Test needs iptables, ip6tables or nft"
+ exit $ksft_skip
+fi
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+trap "ip netns del $ns1; ip netns del $ns2" EXIT
+
+# create two netns, disable rp_filter in ns2 and
+# keep IPv6 address when moving into VRF
+ip netns add "$ns1"
+ip netns add "$ns2"
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0
+ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1
+
+# a standard connection between the netns, should not trigger rp filter
+ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2"
+ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up
+ip -net "$ns1" a a 192.168.23.2/24 dev v0
+ip -net "$ns2" a a 192.168.23.1/24 dev v0
+ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad
+
+# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0
+ip -net "$ns2" link add d0 type dummy
+ip -net "$ns2" link set d0 up
+ip -net "$ns1" a a 192.168.42.2/24 dev v0
+ip -net "$ns2" a a 192.168.42.1/24 dev d0
+ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad
+ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad
+
+# firewall matches to test
+[ -n "$iptables" ] && {
+ common='-t raw -A PREROUTING -s 192.168.0.0/16'
+ ip netns exec "$ns2" "$iptables" $common -m rpfilter
+ ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert
+}
+[ -n "$ip6tables" ] && {
+ common='-t raw -A PREROUTING -s fec0::/16'
+ ip netns exec "$ns2" "$ip6tables" $common -m rpfilter
+ ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert
+}
+[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF
+table inet t {
+ chain c {
+ type filter hook prerouting priority raw;
+ ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter
+ ip6 saddr fec0::/16 fib saddr . iif oif exists counter
+ }
+}
+EOF
+
+die() {
+ echo "FAIL: $*"
+ #ip netns exec "$ns2" "$iptables" -t raw -vS
+ #ip netns exec "$ns2" "$ip6tables" -t raw -vS
+ #ip netns exec "$ns2" nft list ruleset
+ exit 1
+}
+
+# check rule counters, return true if rule did not match
+ipt_zero_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0"
+}
+ipt_zero_reverse_rule() { # (command)
+ [ -n "$1" ] || return 0
+ ip netns exec "$ns2" "$1" -t raw -vS | \
+ grep -q -- "-m rpfilter --invert -c 0 0"
+}
+nft_zero_rule() { # (family)
+ [ -n "$nft" ] || return 0
+ ip netns exec "$ns2" "$nft" list chain inet t c | \
+ grep -q "$1 saddr .* counter packets 0 bytes 0"
+}
+
+netns_ping() { # (netns, args...)
+ local netns="$1"
+ shift
+ ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null
+}
+
+clear_counters() {
+ [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z
+ [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z
+ if [ -n "$nft" ]; then
+ (
+ echo "delete table inet t";
+ ip netns exec "$ns2" $nft -s list table inet t;
+ ) | ip netns exec "$ns2" $nft -f -
+ fi
+}
+
+testrun() {
+ clear_counters
+
+ # test 1: martian traffic should fail rpfilter matches
+ netns_ping "$ns1" -I v0 192.168.42.1 && \
+ die "martian ping 192.168.42.1 succeeded"
+ netns_ping "$ns1" -I v0 fec0:42::1 && \
+ die "martian ping fec0:42::1 succeeded"
+
+ ipt_zero_rule "$iptables" || die "iptables matched martian"
+ ipt_zero_rule "$ip6tables" || die "ip6tables matched martian"
+ ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian"
+ ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian"
+ nft_zero_rule ip || die "nft IPv4 matched martian"
+ nft_zero_rule ip6 || die "nft IPv6 matched martian"
+
+ clear_counters
+
+ # test 2: rpfilter match should pass for regular traffic
+ netns_ping "$ns1" 192.168.23.1 || \
+ die "regular ping 192.168.23.1 failed"
+ netns_ping "$ns1" fec0:23::1 || \
+ die "regular ping fec0:23::1 failed"
+
+ ipt_zero_rule "$iptables" && die "iptables match not effective"
+ ipt_zero_rule "$ip6tables" && die "ip6tables match not effective"
+ ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective"
+ ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective"
+ nft_zero_rule ip && die "nft IPv4 match not effective"
+ nft_zero_rule ip6 && die "nft IPv6 match not effective"
+
+}
+
+testrun
+
+# repeat test with vrf device in $ns2
+ip -net "$ns2" link add vrf0 type vrf table 10
+ip -net "$ns2" link set vrf0 up
+ip -net "$ns2" link set v0 master vrf0
+
+testrun
+
+echo "PASS: netfilter reverse path match works as intended"
+exit 0
diff --git a/tools/testing/selftests/netfilter/sctp_collision.c b/tools/testing/selftests/netfilter/sctp_collision.c
new file mode 100644
index 000000000000..21bb1cfd8a85
--- /dev/null
+++ b/tools/testing/selftests/netfilter/sctp_collision.c
@@ -0,0 +1,99 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in saddr = {}, daddr = {};
+ int sd, ret, len = sizeof(daddr);
+ struct timeval tv = {25, 0};
+ char buf[] = "hello";
+
+ if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) {
+ printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n",
+ argv[0]);
+ return -1;
+ }
+
+ sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP);
+ if (sd < 0) {
+ printf("Failed to create sd\n");
+ return -1;
+ }
+
+ saddr.sin_family = AF_INET;
+ saddr.sin_addr.s_addr = inet_addr(argv[2]);
+ saddr.sin_port = htons(atoi(argv[3]));
+
+ ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr));
+ if (ret < 0) {
+ printf("Failed to bind to address\n");
+ goto out;
+ }
+
+ ret = listen(sd, 5);
+ if (ret < 0) {
+ printf("Failed to listen on port\n");
+ goto out;
+ }
+
+ daddr.sin_family = AF_INET;
+ daddr.sin_addr.s_addr = inet_addr(argv[4]);
+ daddr.sin_port = htons(atoi(argv[5]));
+
+ /* make test shorter than 25s */
+ ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv));
+ if (ret < 0) {
+ printf("Failed to setsockopt SO_RCVTIMEO\n");
+ goto out;
+ }
+
+ if (!strcmp(argv[1], "server")) {
+ sleep(1); /* wait a bit for client's INIT */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ printf("Server: sent! %d\n", ret);
+ }
+
+ if (!strcmp(argv[1], "client")) {
+ usleep(300000); /* wait a bit for server's listening */
+ ret = connect(sd, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to connect to peer\n");
+ goto out;
+ }
+ sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */
+ ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len);
+ if (ret < 0) {
+ printf("Failed to send msg %d\n", ret);
+ goto out;
+ }
+ ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len);
+ if (ret < 0) {
+ printf("Failed to recv msg %d\n", ret);
+ goto out;
+ }
+ printf("Client: rcvd! %d\n", ret);
+ }
+ ret = 0;
+out:
+ close(sd);
+ return ret;
+}
diff --git a/tools/testing/selftests/netfilter/settings b/tools/testing/selftests/netfilter/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/netfilter/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/netfilter/xt_string.sh b/tools/testing/selftests/netfilter/xt_string.sh
new file mode 100755
index 000000000000..1802653a4728
--- /dev/null
+++ b/tools/testing/selftests/netfilter/xt_string.sh
@@ -0,0 +1,128 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# return code to signal skipped test
+ksft_skip=4
+rc=0
+
+if ! iptables --version >/dev/null 2>&1; then
+ echo "SKIP: Test needs iptables"
+ exit $ksft_skip
+fi
+if ! ip -V >/dev/null 2>&1; then
+ echo "SKIP: Test needs iproute2"
+ exit $ksft_skip
+fi
+if ! nc -h >/dev/null 2>&1; then
+ echo "SKIP: Test needs netcat"
+ exit $ksft_skip
+fi
+
+pattern="foo bar baz"
+patlen=11
+hdrlen=$((20 + 8)) # IPv4 + UDP
+ns="ns-$(mktemp -u XXXXXXXX)"
+trap 'ip netns del $ns' EXIT
+ip netns add "$ns"
+ip -net "$ns" link add d0 type dummy
+ip -net "$ns" link set d0 up
+ip -net "$ns" addr add 10.1.2.1/24 dev d0
+
+#ip netns exec "$ns" tcpdump -npXi d0 &
+#tcpdump_pid=$!
+#trap 'kill $tcpdump_pid; ip netns del $ns' EXIT
+
+add_rule() { # (alg, from, to)
+ ip netns exec "$ns" \
+ iptables -A OUTPUT -o d0 -m string \
+ --string "$pattern" --algo $1 --from $2 --to $3
+}
+showrules() { # ()
+ ip netns exec "$ns" iptables -v -S OUTPUT | grep '^-A'
+}
+zerorules() {
+ ip netns exec "$ns" iptables -Z OUTPUT
+}
+countrule() { # (pattern)
+ showrules | grep -c -- "$*"
+}
+send() { # (offset)
+ ( for ((i = 0; i < $1 - $hdrlen; i++)); do
+ printf " "
+ done
+ printf "$pattern"
+ ) | ip netns exec "$ns" nc -w 1 -u 10.1.2.2 27374
+}
+
+add_rule bm 1000 1500
+add_rule bm 1400 1600
+add_rule kmp 1000 1500
+add_rule kmp 1400 1600
+
+zerorules
+send 0
+send $((1000 - $patlen))
+if [ $(countrule -c 0 0) -ne 4 ]; then
+ echo "FAIL: rules match data before --from"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1000
+send $((1400 - $patlen))
+if [ $(countrule -c 2) -ne 2 ]; then
+ echo "FAIL: only two rules should match at low offset"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1500 - $patlen))
+if [ $(countrule -c 1) -ne 4 ]; then
+ echo "FAIL: all rules should match at end of packet"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1495
+if [ $(countrule -c 1) -ne 1 ]; then
+ echo "FAIL: only kmp with proper --to should match pattern spanning fragments"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1500
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at start of second fragment"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen))
+if [ $(countrule -c 1) -ne 2 ]; then
+ echo "FAIL: two rules should match pattern at end of largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send $((1600 - $patlen + 1))
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rules should match pattern extending largest --to"
+ showrules
+ ((rc--))
+fi
+
+zerorules
+send 1600
+if [ $(countrule -c 1) -ne 0 ]; then
+ echo "FAIL: no rule should match pattern past largest --to"
+ showrules
+ ((rc--))
+fi
+
+exit $rc
diff --git a/tools/testing/selftests/nolibc/.gitignore b/tools/testing/selftests/nolibc/.gitignore
new file mode 100644
index 000000000000..35d247a0d5bd
--- /dev/null
+++ b/tools/testing/selftests/nolibc/.gitignore
@@ -0,0 +1,7 @@
+/initramfs/
+/initramfs.cpio
+/libc-test
+/nolibc-test
+/run.out
+/run.out.*
+/sysroot/
diff --git a/tools/testing/selftests/nolibc/Makefile b/tools/testing/selftests/nolibc/Makefile
new file mode 100644
index 000000000000..40dd95228051
--- /dev/null
+++ b/tools/testing/selftests/nolibc/Makefile
@@ -0,0 +1,281 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for nolibc tests
+include ../../../scripts/Makefile.include
+include ../../../scripts/utilities.mak
+# We need this for the "cc-option" macro.
+include ../../../build/Build.include
+
+ifneq ($(O),)
+ifneq ($(call is-absolute,$(O)),y)
+$(error Only absolute O= parameters are supported)
+endif
+endif
+
+# we're in ".../tools/testing/selftests/nolibc"
+ifeq ($(srctree),)
+srctree := $(patsubst %/tools/testing/selftests/,%,$(dir $(CURDIR)))
+endif
+
+ifeq ($(ARCH),)
+include $(srctree)/scripts/subarch.include
+ARCH = $(SUBARCH)
+endif
+
+objtree ?= $(srctree)
+
+# XARCH extends the kernel's ARCH with a few variants of the same
+# architecture that only differ by the configuration, the toolchain
+# and the Qemu program used. It is copied as-is into ARCH except for
+# a few specific values which are mapped like this:
+#
+# XARCH | ARCH | config
+# -------------|-----------|-------------------------
+# ppc | powerpc | 32 bits
+# ppc64 | powerpc | 64 bits big endian
+# ppc64le | powerpc | 64 bits little endian
+#
+# It is recommended to only use XARCH, though it does not harm if
+# ARCH is already set. For simplicity, ARCH is sufficient for all
+# architectures where both are equal.
+
+# configure default variants for target kernel supported architectures
+XARCH_powerpc = ppc
+XARCH_mips = mips32le
+XARCH = $(or $(XARCH_$(ARCH)),$(ARCH))
+
+# map from user input variants to their kernel supported architectures
+ARCH_ppc = powerpc
+ARCH_ppc64 = powerpc
+ARCH_ppc64le = powerpc
+ARCH_mips32le = mips
+ARCH_mips32be = mips
+ARCH := $(or $(ARCH_$(XARCH)),$(XARCH))
+
+# kernel image names by architecture
+IMAGE_i386 = arch/x86/boot/bzImage
+IMAGE_x86_64 = arch/x86/boot/bzImage
+IMAGE_x86 = arch/x86/boot/bzImage
+IMAGE_arm64 = arch/arm64/boot/Image
+IMAGE_arm = arch/arm/boot/zImage
+IMAGE_mips32le = vmlinuz
+IMAGE_mips32be = vmlinuz
+IMAGE_ppc = vmlinux
+IMAGE_ppc64 = vmlinux
+IMAGE_ppc64le = arch/powerpc/boot/zImage
+IMAGE_riscv = arch/riscv/boot/Image
+IMAGE_s390 = arch/s390/boot/bzImage
+IMAGE_loongarch = arch/loongarch/boot/vmlinuz.efi
+IMAGE = $(objtree)/$(IMAGE_$(XARCH))
+IMAGE_NAME = $(notdir $(IMAGE))
+
+# default kernel configurations that appear to be usable
+DEFCONFIG_i386 = defconfig
+DEFCONFIG_x86_64 = defconfig
+DEFCONFIG_x86 = defconfig
+DEFCONFIG_arm64 = defconfig
+DEFCONFIG_arm = multi_v7_defconfig
+DEFCONFIG_mips32le = malta_defconfig
+DEFCONFIG_mips32be = malta_defconfig
+DEFCONFIG_ppc = pmac32_defconfig
+DEFCONFIG_ppc64 = powernv_be_defconfig
+DEFCONFIG_ppc64le = powernv_defconfig
+DEFCONFIG_riscv = defconfig
+DEFCONFIG_s390 = defconfig
+DEFCONFIG_loongarch = defconfig
+DEFCONFIG = $(DEFCONFIG_$(XARCH))
+
+EXTRACONFIG_mips32be = -d CONFIG_CPU_LITTLE_ENDIAN -e CONFIG_CPU_BIG_ENDIAN
+EXTRACONFIG = $(EXTRACONFIG_$(XARCH))
+
+# optional tests to run (default = all)
+TEST =
+
+# QEMU_ARCH: arch names used by qemu
+QEMU_ARCH_i386 = i386
+QEMU_ARCH_x86_64 = x86_64
+QEMU_ARCH_x86 = x86_64
+QEMU_ARCH_arm64 = aarch64
+QEMU_ARCH_arm = arm
+QEMU_ARCH_mips32le = mipsel # works with malta_defconfig
+QEMU_ARCH_mips32be = mips
+QEMU_ARCH_ppc = ppc
+QEMU_ARCH_ppc64 = ppc64
+QEMU_ARCH_ppc64le = ppc64
+QEMU_ARCH_riscv = riscv64
+QEMU_ARCH_s390 = s390x
+QEMU_ARCH_loongarch = loongarch64
+QEMU_ARCH = $(QEMU_ARCH_$(XARCH))
+
+QEMU_ARCH_USER_ppc64le = ppc64le
+QEMU_ARCH_USER = $(or $(QEMU_ARCH_USER_$(XARCH)),$(QEMU_ARCH_$(XARCH)))
+
+QEMU_BIOS_DIR = /usr/share/edk2/
+QEMU_BIOS_loongarch = $(QEMU_BIOS_DIR)/loongarch64/OVMF_CODE.fd
+
+ifneq ($(QEMU_BIOS_$(XARCH)),)
+QEMU_ARGS_BIOS = -bios $(QEMU_BIOS_$(XARCH))
+endif
+
+# QEMU_ARGS : some arch-specific args to pass to qemu
+QEMU_ARGS_i386 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86_64 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_x86 = -M pc -append "console=ttyS0,9600 i8042.noaux panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm64 = -M virt -cpu cortex-a53 -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_arm = -M virt -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32le = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_mips32be = -M malta -append "panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc = -M g3beige -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64 = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_ppc64le = -M powernv -append "console=hvc0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_riscv = -M virt -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_s390 = -M s390-ccw-virtio -m 1G -append "console=ttyS0 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS_loongarch = -M virt -append "console=ttyS0,115200 panic=-1 $(TEST:%=NOLIBC_TEST=%)"
+QEMU_ARGS = $(QEMU_ARGS_$(XARCH)) $(QEMU_ARGS_BIOS) $(QEMU_ARGS_EXTRA)
+
+# OUTPUT is only set when run from the main makefile, otherwise
+# it defaults to this nolibc directory.
+OUTPUT ?= $(CURDIR)/
+
+ifeq ($(V),1)
+Q=
+else
+Q=@
+endif
+
+CFLAGS_i386 = $(call cc-option,-m32)
+CFLAGS_ppc = -m32 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64 = -m64 -mbig-endian -mno-vsx $(call cc-option,-mmultiple)
+CFLAGS_ppc64le = -m64 -mlittle-endian -mno-vsx $(call cc-option,-mabi=elfv2)
+CFLAGS_s390 = -m64
+CFLAGS_mips32le = -EL -mabi=32 -fPIC
+CFLAGS_mips32be = -EB -mabi=32
+CFLAGS_STACKPROTECTOR ?= $(call cc-option,-mstack-protector-guard=global $(call cc-option,-fstack-protector-all))
+CFLAGS ?= -Os -fno-ident -fno-asynchronous-unwind-tables -std=c89 -W -Wall -Wextra \
+ $(call cc-option,-fno-stack-protector) \
+ $(CFLAGS_$(XARCH)) $(CFLAGS_STACKPROTECTOR)
+LDFLAGS :=
+
+REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++; print;} /\[SKIPPED\][\r]*$$/{s++} \
+ END{ printf("\n%3d test(s): %3d passed, %3d skipped, %3d failed => status: ", p+s+f, p, s, f); \
+ if (f) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \
+ printf("\nSee all results in %s\n", ARGV[1]); }'
+
+help:
+ @echo "Supported targets under selftests/nolibc:"
+ @echo " all call the \"run\" target below"
+ @echo " help this help"
+ @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)"
+ @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)"
+ @echo " libc-test build an executable using the compiler's default libc instead"
+ @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " initramfs.cpio prepare the initramfs archive with nolibc-test"
+ @echo " initramfs prepare the initramfs tree with nolibc-test"
+ @echo " defconfig create a fresh new default config (uses \$$XARCH)"
+ @echo " kernel (re)build the kernel (uses \$$XARCH)"
+ @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)"
+ @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)"
+ @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)"
+ @echo " clean clean the sysroot, initramfs, build and output files"
+ @echo ""
+ @echo "The output file is \"run.out\". Test ranges may be passed using \$$TEST."
+ @echo ""
+ @echo "Currently using the following variables:"
+ @echo " ARCH = $(ARCH)"
+ @echo " XARCH = $(XARCH)"
+ @echo " CROSS_COMPILE = $(CROSS_COMPILE)"
+ @echo " CC = $(CC)"
+ @echo " OUTPUT = $(OUTPUT)"
+ @echo " TEST = $(TEST)"
+ @echo " QEMU_ARCH = $(if $(QEMU_ARCH),$(QEMU_ARCH),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo " IMAGE_NAME = $(if $(IMAGE_NAME),$(IMAGE_NAME),UNKNOWN_ARCH) [determined from \$$XARCH]"
+ @echo ""
+
+all: run
+
+sysroot: sysroot/$(ARCH)/include
+
+sysroot/$(ARCH)/include:
+ $(Q)rm -rf sysroot/$(ARCH) sysroot/sysroot
+ $(QUIET_MKDIR)mkdir -p sysroot
+ $(Q)$(MAKE) -C $(srctree) outputmakefile
+ $(Q)$(MAKE) -C $(srctree)/tools/include/nolibc ARCH=$(ARCH) OUTPUT=$(CURDIR)/sysroot/ headers_standalone
+ $(Q)mv sysroot/sysroot sysroot/$(ARCH)
+
+ifneq ($(NOLIBC_SYSROOT),0)
+nolibc-test: nolibc-test.c nolibc-test-linkage.c sysroot/$(ARCH)/include
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -nostdinc -static -Isysroot/$(ARCH)/include nolibc-test.c nolibc-test-linkage.c -lgcc
+else
+nolibc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ \
+ -nostdlib -static -include $(srctree)/tools/include/nolibc/nolibc.h nolibc-test.c nolibc-test-linkage.c -lgcc
+endif
+
+libc-test: nolibc-test.c nolibc-test-linkage.c
+ $(QUIET_CC)$(HOSTCC) -o $@ nolibc-test.c nolibc-test-linkage.c
+
+# local libc-test
+run-libc-test: libc-test
+ $(Q)./libc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# local nolibc-test
+run-nolibc-test: nolibc-test
+ $(Q)./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# qemu user-land test
+run-user: nolibc-test
+ $(Q)qemu-$(QEMU_ARCH_USER) ./nolibc-test > "$(CURDIR)/run.out" || :
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+initramfs.cpio: kernel nolibc-test
+ $(QUIET_GEN)echo 'file /init nolibc-test 755 0 0' | $(objtree)/usr/gen_init_cpio - > initramfs.cpio
+
+initramfs: nolibc-test
+ $(QUIET_MKDIR)mkdir -p initramfs
+ $(call QUIET_INSTALL, initramfs/init)
+ $(Q)cp nolibc-test initramfs/init
+
+defconfig:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) mrproper $(DEFCONFIG) prepare
+ $(Q)if [ -n "$(EXTRACONFIG)" ]; then \
+ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \
+ $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \
+ fi
+
+kernel:
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null
+
+kernel-standalone: initramfs
+ $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null
+
+# run the tests after building the kernel
+run: kernel initramfs.cpio
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# re-run the tests from an existing kernel
+rerun:
+ $(Q)qemu-system-$(QEMU_ARCH) -display none -no-reboot -kernel "$(IMAGE)" -initrd initramfs.cpio -serial stdio $(QEMU_ARGS) > "$(CURDIR)/run.out"
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+# report with existing test log
+report:
+ $(Q)$(REPORT) $(CURDIR)/run.out
+
+clean:
+ $(call QUIET_CLEAN, sysroot)
+ $(Q)rm -rf sysroot
+ $(call QUIET_CLEAN, nolibc-test)
+ $(Q)rm -f nolibc-test
+ $(call QUIET_CLEAN, libc-test)
+ $(Q)rm -f libc-test
+ $(call QUIET_CLEAN, initramfs.cpio)
+ $(Q)rm -rf initramfs.cpio
+ $(call QUIET_CLEAN, initramfs)
+ $(Q)rm -rf initramfs
+ $(call QUIET_CLEAN, run.out)
+ $(Q)rm -rf run.out
+
+.PHONY: sysroot/$(ARCH)/include
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.c b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
new file mode 100644
index 000000000000..5ff4c8a1db2a
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.c
@@ -0,0 +1,26 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#include "nolibc-test-linkage.h"
+
+#ifndef NOLIBC
+#include <errno.h>
+#endif
+
+void *linkage_test_errno_addr(void)
+{
+ return &errno;
+}
+
+int linkage_test_constructor_test_value;
+
+__attribute__((constructor))
+static void constructor1(void)
+{
+ linkage_test_constructor_test_value = 2;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+ linkage_test_constructor_test_value *= 3;
+}
diff --git a/tools/testing/selftests/nolibc/nolibc-test-linkage.h b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
new file mode 100644
index 000000000000..c66473070d73
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test-linkage.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _NOLIBC_TEST_LINKAGE_H
+#define _NOLIBC_TEST_LINKAGE_H
+
+void *linkage_test_errno_addr(void);
+extern int linkage_test_constructor_test_value;
+
+#endif /* _NOLIBC_TEST_LINKAGE_H */
diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c
new file mode 100644
index 000000000000..6ba4f8275ac4
--- /dev/null
+++ b/tools/testing/selftests/nolibc/nolibc-test.c
@@ -0,0 +1,1438 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#define _GNU_SOURCE
+#define _LARGEFILE64_SOURCE
+
+/* libc-specific include files
+ * The program may be built in 3 ways:
+ * $(CC) -nostdlib -include /path/to/nolibc.h => NOLIBC already defined
+ * $(CC) -nostdlib -I/path/to/nolibc/sysroot => _NOLIBC_* guards are present
+ * $(CC) with default libc => NOLIBC* never defined
+ */
+#ifndef NOLIBC
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#ifndef _NOLIBC_STDIO_H
+/* standard libcs need more includes */
+#include <sys/auxv.h>
+#include <sys/io.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/mount.h>
+#include <sys/prctl.h>
+#include <sys/reboot.h>
+#include <sys/resource.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/sysmacros.h>
+#include <sys/time.h>
+#include <sys/wait.h>
+#include <dirent.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdarg.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <limits.h>
+#endif
+#endif
+
+#include "nolibc-test-linkage.h"
+
+/* for the type of int_fast16_t and int_fast32_t, musl differs from glibc and nolibc */
+#define SINT_MAX_OF_TYPE(type) (((type)1 << (sizeof(type) * 8 - 2)) - (type)1 + ((type)1 << (sizeof(type) * 8 - 2)))
+#define SINT_MIN_OF_TYPE(type) (-SINT_MAX_OF_TYPE(type) - 1)
+
+/* will be used to test initialization of environ */
+static char **test_envp;
+
+/* will be used to test initialization of argv */
+static char **test_argv;
+
+/* will be used to test initialization of argc */
+static int test_argc;
+
+/* will be used by some test cases as readable file, please don't write it */
+static const char *argv0;
+
+/* will be used by constructor tests */
+static int constructor_test_value;
+
+/* definition of a series of tests */
+struct test {
+ const char *name; /* test name */
+ int (*func)(int min, int max); /* handler */
+};
+
+#ifndef _NOLIBC_STDLIB_H
+char *itoa(int i)
+{
+ static char buf[12];
+ int ret;
+
+ ret = snprintf(buf, sizeof(buf), "%d", i);
+ return (ret >= 0 && ret < sizeof(buf)) ? buf : "#err";
+}
+#endif
+
+#define CASE_ERR(err) \
+ case err: return #err
+
+/* returns the error name (e.g. "ENOENT") for common errors, "SUCCESS" for 0,
+ * or the decimal value for less common ones.
+ */
+static const char *errorname(int err)
+{
+ switch (err) {
+ case 0: return "SUCCESS";
+ CASE_ERR(EPERM);
+ CASE_ERR(ENOENT);
+ CASE_ERR(ESRCH);
+ CASE_ERR(EINTR);
+ CASE_ERR(EIO);
+ CASE_ERR(ENXIO);
+ CASE_ERR(E2BIG);
+ CASE_ERR(ENOEXEC);
+ CASE_ERR(EBADF);
+ CASE_ERR(ECHILD);
+ CASE_ERR(EAGAIN);
+ CASE_ERR(ENOMEM);
+ CASE_ERR(EACCES);
+ CASE_ERR(EFAULT);
+ CASE_ERR(ENOTBLK);
+ CASE_ERR(EBUSY);
+ CASE_ERR(EEXIST);
+ CASE_ERR(EXDEV);
+ CASE_ERR(ENODEV);
+ CASE_ERR(ENOTDIR);
+ CASE_ERR(EISDIR);
+ CASE_ERR(EINVAL);
+ CASE_ERR(ENFILE);
+ CASE_ERR(EMFILE);
+ CASE_ERR(ENOTTY);
+ CASE_ERR(ETXTBSY);
+ CASE_ERR(EFBIG);
+ CASE_ERR(ENOSPC);
+ CASE_ERR(ESPIPE);
+ CASE_ERR(EROFS);
+ CASE_ERR(EMLINK);
+ CASE_ERR(EPIPE);
+ CASE_ERR(EDOM);
+ CASE_ERR(ERANGE);
+ CASE_ERR(ENOSYS);
+ CASE_ERR(EOVERFLOW);
+ default:
+ return itoa(err);
+ }
+}
+
+static void align_result(size_t llen)
+{
+ const size_t align = 64;
+ char buf[align];
+ size_t n;
+
+ if (llen >= align)
+ return;
+
+ n = align - llen;
+ memset(buf, ' ', n);
+ buf[n] = '\0';
+ fputs(buf, stdout);
+}
+
+enum RESULT {
+ OK,
+ FAIL,
+ SKIPPED,
+};
+
+static void result(int llen, enum RESULT r)
+{
+ const char *msg;
+
+ if (r == OK)
+ msg = " [OK]";
+ else if (r == SKIPPED)
+ msg = "[SKIPPED]";
+ else
+ msg = " [FAIL]";
+
+ align_result(llen);
+ puts(msg);
+}
+
+/* The tests below are intended to be used by the macroes, which evaluate
+ * expression <expr>, print the status to stdout, and update the "ret"
+ * variable to count failures. The functions themselves return the number
+ * of failures, thus either 0 or 1.
+ */
+
+#define EXPECT_ZR(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_zr(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_zr(int expr, int llen)
+{
+ int ret = !(expr == 0);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_NZ(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_nz(expr, llen; } while (0)
+
+static __attribute__((unused))
+int expect_nz(int expr, int llen)
+{
+ int ret = !(expr != 0);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_EQ(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_eq(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_eq(uint64_t expr, int llen, uint64_t val)
+{
+ int ret = !(expr == val);
+
+ llen += printf(" = %lld ", (long long)expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_NE(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ne(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_ne(int expr, int llen, int val)
+{
+ int ret = !(expr != val);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_GE(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ge(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_ge(int expr, int llen, int val)
+{
+ int ret = !(expr >= val);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_GT(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_gt(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_gt(int expr, int llen, int val)
+{
+ int ret = !(expr > val);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_LE(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_le(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_le(int expr, int llen, int val)
+{
+ int ret = !(expr <= val);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_LT(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_lt(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_lt(int expr, int llen, int val)
+{
+ int ret = !(expr < val);
+
+ llen += printf(" = %d ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_SYSZR(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syszr(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_syszr(int expr, int llen)
+{
+ int ret = 0;
+
+ if (expr) {
+ ret = 1;
+ llen += printf(" = %d %s ", expr, errorname(errno));
+ result(llen, FAIL);
+ } else {
+ llen += printf(" = %d ", expr);
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_SYSEQ(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syseq(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_syseq(int expr, int llen, int val)
+{
+ int ret = 0;
+
+ if (expr != val) {
+ ret = 1;
+ llen += printf(" = %d %s ", expr, errorname(errno));
+ result(llen, FAIL);
+ } else {
+ llen += printf(" = %d ", expr);
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_SYSNE(cond, expr, val) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_sysne(expr, llen, val); } while (0)
+
+static __attribute__((unused))
+int expect_sysne(int expr, int llen, int val)
+{
+ int ret = 0;
+
+ if (expr == val) {
+ ret = 1;
+ llen += printf(" = %d %s ", expr, errorname(errno));
+ result(llen, FAIL);
+ } else {
+ llen += printf(" = %d ", expr);
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_SYSER2(cond, expr, expret, experr1, experr2) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_syserr2(expr, expret, experr1, experr2, llen); } while (0)
+
+#define EXPECT_SYSER(cond, expr, expret, experr) \
+ EXPECT_SYSER2(cond, expr, expret, experr, 0)
+
+static __attribute__((unused))
+int expect_syserr2(int expr, int expret, int experr1, int experr2, int llen)
+{
+ int ret = 0;
+ int _errno = errno;
+
+ llen += printf(" = %d %s ", expr, errorname(_errno));
+ if (expr != expret || (_errno != experr1 && _errno != experr2)) {
+ ret = 1;
+ if (experr2 == 0)
+ llen += printf(" != (%d %s) ", expret, errorname(experr1));
+ else
+ llen += printf(" != (%d %s %s) ", expret, errorname(experr1), errorname(experr2));
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_PTRZR(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrzr(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_ptrzr(const void *expr, int llen)
+{
+ int ret = 0;
+
+ llen += printf(" = <%p> ", expr);
+ if (expr) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_PTRNZ(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrnz(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_ptrnz(const void *expr, int llen)
+{
+ int ret = 0;
+
+ llen += printf(" = <%p> ", expr);
+ if (!expr) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+#define EXPECT_PTREQ(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptreq(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptreq(const void *expr, int llen, const void *cmp)
+{
+ int ret = 0;
+
+ llen += printf(" = <%p> ", expr);
+ if (expr != cmp) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+#define EXPECT_PTRNE(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrne(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptrne(const void *expr, int llen, const void *cmp)
+{
+ int ret = 0;
+
+ llen += printf(" = <%p> ", expr);
+ if (expr == cmp) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+#define EXPECT_PTRGE(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrge(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptrge(const void *expr, int llen, const void *cmp)
+{
+ int ret = !(expr >= cmp);
+
+ llen += printf(" = <%p> ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+#define EXPECT_PTRGT(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrgt(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptrgt(const void *expr, int llen, const void *cmp)
+{
+ int ret = !(expr > cmp);
+
+ llen += printf(" = <%p> ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_PTRLE(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrle(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptrle(const void *expr, int llen, const void *cmp)
+{
+ int ret = !(expr <= cmp);
+
+ llen += printf(" = <%p> ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+
+#define EXPECT_PTRLT(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrlt(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_ptrlt(const void *expr, int llen, const void *cmp)
+{
+ int ret = !(expr < cmp);
+
+ llen += printf(" = <%p> ", expr);
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+#define EXPECT_PTRER2(cond, expr, expret, experr1, experr2) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_ptrerr2(expr, expret, experr1, experr2, llen); } while (0)
+
+#define EXPECT_PTRER(cond, expr, expret, experr) \
+ EXPECT_PTRER2(cond, expr, expret, experr, 0)
+
+static __attribute__((unused))
+int expect_ptrerr2(const void *expr, const void *expret, int experr1, int experr2, int llen)
+{
+ int ret = 0;
+ int _errno = errno;
+
+ llen += printf(" = <%p> %s ", expr, errorname(_errno));
+ if (expr != expret || (_errno != experr1 && _errno != experr2)) {
+ ret = 1;
+ if (experr2 == 0)
+ llen += printf(" != (<%p> %s) ", expret, errorname(experr1));
+ else
+ llen += printf(" != (<%p> %s %s) ", expret, errorname(experr1), errorname(experr2));
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+#define EXPECT_STRZR(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strzr(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_strzr(const char *expr, int llen)
+{
+ int ret = 0;
+
+ llen += printf(" = <%s> ", expr);
+ if (expr) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_STRNZ(cond, expr) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strnz(expr, llen); } while (0)
+
+static __attribute__((unused))
+int expect_strnz(const char *expr, int llen)
+{
+ int ret = 0;
+
+ llen += printf(" = <%s> ", expr);
+ if (!expr) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_STREQ(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_streq(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_streq(const char *expr, int llen, const char *cmp)
+{
+ int ret = 0;
+
+ llen += printf(" = <%s> ", expr);
+ if (strcmp(expr, cmp) != 0) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+#define EXPECT_STRNE(cond, expr, cmp) \
+ do { if (!(cond)) result(llen, SKIPPED); else ret += expect_strne(expr, llen, cmp); } while (0)
+
+static __attribute__((unused))
+int expect_strne(const char *expr, int llen, const char *cmp)
+{
+ int ret = 0;
+
+ llen += printf(" = <%s> ", expr);
+ if (strcmp(expr, cmp) == 0) {
+ ret = 1;
+ result(llen, FAIL);
+ } else {
+ result(llen, OK);
+ }
+ return ret;
+}
+
+
+/* declare tests based on line numbers. There must be exactly one test per line. */
+#define CASE_TEST(name) \
+ case __LINE__: llen += printf("%d %s", test, #name);
+
+/* constructors validate that they are executed in definition order */
+__attribute__((constructor))
+static void constructor1(void)
+{
+ constructor_test_value = 1;
+}
+
+__attribute__((constructor))
+static void constructor2(void)
+{
+ constructor_test_value *= 2;
+}
+
+int run_startup(int min, int max)
+{
+ int test;
+ int ret = 0;
+ /* kernel at least passes HOME and TERM, shell passes more */
+ int env_total = 2;
+ /* checking NULL for argv/argv0, environ and _auxv is not enough, let's compare with sbrk(0) or &end */
+ extern char end;
+ char *brk = sbrk(0) != (void *)-1 ? sbrk(0) : &end;
+ /* differ from nolibc, both glibc and musl have no global _auxv */
+ const unsigned long *test_auxv = (void *)-1;
+#ifdef NOLIBC
+ test_auxv = _auxv;
+#endif
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; /* line length */
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(argc); EXPECT_GE(1, test_argc, 1); break;
+ CASE_TEST(argv_addr); EXPECT_PTRGT(1, test_argv, brk); break;
+ CASE_TEST(argv_environ); EXPECT_PTRLT(1, test_argv, environ); break;
+ CASE_TEST(argv_total); EXPECT_EQ(1, environ - test_argv - 1, test_argc ?: 1); break;
+ CASE_TEST(argv0_addr); EXPECT_PTRGT(1, argv0, brk); break;
+ CASE_TEST(argv0_str); EXPECT_STRNZ(1, argv0 > brk ? argv0 : NULL); break;
+ CASE_TEST(argv0_len); EXPECT_GE(1, argv0 > brk ? strlen(argv0) : 0, 1); break;
+ CASE_TEST(environ_addr); EXPECT_PTRGT(1, environ, brk); break;
+ CASE_TEST(environ_envp); EXPECT_PTREQ(1, environ, test_envp); break;
+ CASE_TEST(environ_auxv); EXPECT_PTRLT(test_auxv != (void *)-1, environ, test_auxv); break;
+ CASE_TEST(environ_total); EXPECT_GE(test_auxv != (void *)-1, (void *)test_auxv - (void *)environ - 1, env_total); break;
+ CASE_TEST(environ_HOME); EXPECT_PTRNZ(1, getenv("HOME")); break;
+ CASE_TEST(auxv_addr); EXPECT_PTRGT(test_auxv != (void *)-1, test_auxv, brk); break;
+ CASE_TEST(auxv_AT_UID); EXPECT_EQ(1, getauxval(AT_UID), getuid()); break;
+ CASE_TEST(constructor); EXPECT_EQ(1, constructor_test_value, 2); break;
+ CASE_TEST(linkage_errno); EXPECT_PTREQ(1, linkage_test_errno_addr(), &errno); break;
+ CASE_TEST(linkage_constr); EXPECT_EQ(1, linkage_test_constructor_test_value, 6); break;
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
+
+/* used by some syscall tests below */
+int test_getdents64(const char *dir)
+{
+ char buffer[4096];
+ int fd, ret;
+ int err;
+
+ ret = fd = open(dir, O_RDONLY | O_DIRECTORY, 0);
+ if (ret < 0)
+ return ret;
+
+ ret = getdents64(fd, (void *)buffer, sizeof(buffer));
+ err = errno;
+ close(fd);
+
+ errno = err;
+ return ret;
+}
+
+int test_getpagesize(void)
+{
+ int x = getpagesize();
+ int c;
+
+ if (x < 0)
+ return x;
+
+#if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i586__) || defined(__i686__)
+ /*
+ * x86 family is always 4K page.
+ */
+ c = (x == 4096);
+#elif defined(__aarch64__)
+ /*
+ * Linux aarch64 supports three values of page size: 4K, 16K, and 64K
+ * which are selected at kernel compilation time.
+ */
+ c = (x == 4096 || x == (16 * 1024) || x == (64 * 1024));
+#else
+ /*
+ * Assuming other architectures must have at least 4K page.
+ */
+ c = (x >= 4096);
+#endif
+
+ return !c;
+}
+
+int test_fork(void)
+{
+ int status;
+ pid_t pid;
+
+ /* flush the printf buffer to avoid child flush it */
+ fflush(stdout);
+ fflush(stderr);
+
+ pid = fork();
+
+ switch (pid) {
+ case -1:
+ return 1;
+
+ case 0:
+ exit(123);
+
+ default:
+ pid = waitpid(pid, &status, 0);
+
+ return pid == -1 || !WIFEXITED(status) || WEXITSTATUS(status) != 123;
+ }
+}
+
+int test_stat_timestamps(void)
+{
+ struct stat st;
+
+ if (sizeof(st.st_atim.tv_sec) != sizeof(st.st_atime))
+ return 1;
+
+ if (stat("/proc/self/", &st) && stat(argv0, &st) && stat("/", &st))
+ return 1;
+
+ if (st.st_atim.tv_sec != st.st_atime || st.st_atim.tv_nsec > 1000000000)
+ return 1;
+
+ if (st.st_mtim.tv_sec != st.st_mtime || st.st_mtim.tv_nsec > 1000000000)
+ return 1;
+
+ if (st.st_ctim.tv_sec != st.st_ctime || st.st_ctim.tv_nsec > 1000000000)
+ return 1;
+
+ return 0;
+}
+
+int test_mmap_munmap(void)
+{
+ int ret, fd, i, page_size;
+ void *mem;
+ size_t file_size, length;
+ off_t offset, pa_offset;
+ struct stat stat_buf;
+ const char * const files[] = {
+ "/dev/zero",
+ "/proc/1/exe", "/proc/self/exe",
+ argv0,
+ NULL
+ };
+
+ page_size = getpagesize();
+ if (page_size < 0)
+ return 1;
+
+ /* find a right file to mmap, existed and accessible */
+ for (i = 0; files[i] != NULL; i++) {
+ ret = fd = open(files[i], O_RDONLY);
+ if (ret == -1)
+ continue;
+ else
+ break;
+ }
+ if (ret == -1)
+ return 1;
+
+ ret = stat(files[i], &stat_buf);
+ if (ret == -1)
+ goto end;
+
+ /* file size of the special /dev/zero is 0, let's assign one manually */
+ if (i == 0)
+ file_size = 3*page_size;
+ else
+ file_size = stat_buf.st_size;
+
+ offset = file_size - 1;
+ if (offset < 0)
+ offset = 0;
+ length = file_size - offset;
+ pa_offset = offset & ~(page_size - 1);
+
+ mem = mmap(NULL, length + offset - pa_offset, PROT_READ, MAP_SHARED, fd, pa_offset);
+ if (mem == MAP_FAILED) {
+ ret = 1;
+ goto end;
+ }
+
+ ret = munmap(mem, length + offset - pa_offset);
+
+end:
+ close(fd);
+ return !!ret;
+}
+
+int test_pipe(void)
+{
+ const char *const msg = "hello, nolibc";
+ int pipefd[2];
+ char buf[32];
+ size_t len;
+
+ if (pipe(pipefd) == -1)
+ return 1;
+
+ write(pipefd[1], msg, strlen(msg));
+ close(pipefd[1]);
+ len = read(pipefd[0], buf, sizeof(buf));
+ close(pipefd[0]);
+
+ if (len != strlen(msg))
+ return 1;
+
+ return !!memcmp(buf, msg, len);
+}
+
+int test_rlimit(void)
+{
+ struct rlimit rlim = {
+ .rlim_cur = 1 << 20,
+ .rlim_max = 1 << 21,
+ };
+ int ret;
+
+ ret = setrlimit(RLIMIT_CORE, &rlim);
+ if (ret)
+ return -1;
+
+ rlim.rlim_cur = 0;
+ rlim.rlim_max = 0;
+
+ ret = getrlimit(RLIMIT_CORE, &rlim);
+ if (ret)
+ return -1;
+
+ if (rlim.rlim_cur != 1 << 20)
+ return -1;
+ if (rlim.rlim_max != 1 << 21)
+ return -1;
+
+ return 0;
+}
+
+
+/* Run syscall tests between IDs <min> and <max>.
+ * Return 0 on success, non-zero on failure.
+ */
+int run_syscall(int min, int max)
+{
+ struct timeval tv;
+ struct timezone tz;
+ struct stat stat_buf;
+ int euid0;
+ int proc;
+ int test;
+ int tmp;
+ int ret = 0;
+ void *p1, *p2;
+ int has_gettid = 1;
+
+ /* <proc> indicates whether or not /proc is mounted */
+ proc = stat("/proc", &stat_buf) == 0;
+
+ /* this will be used to skip certain tests that can't be run unprivileged */
+ euid0 = geteuid() == 0;
+
+ /* from 2.30, glibc provides gettid() */
+#if defined(__GLIBC_MINOR__) && defined(__GLIBC__)
+ has_gettid = __GLIBC__ > 2 || (__GLIBC__ == 2 && __GLIBC_MINOR__ >= 30);
+#endif
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; /* line length */
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(getpid); EXPECT_SYSNE(1, getpid(), -1); break;
+ CASE_TEST(getppid); EXPECT_SYSNE(1, getppid(), -1); break;
+ CASE_TEST(gettid); EXPECT_SYSNE(has_gettid, gettid(), -1); break;
+ CASE_TEST(getpgid_self); EXPECT_SYSNE(1, getpgid(0), -1); break;
+ CASE_TEST(getpgid_bad); EXPECT_SYSER(1, getpgid(-1), -1, ESRCH); break;
+ CASE_TEST(kill_0); EXPECT_SYSZR(1, kill(getpid(), 0)); break;
+ CASE_TEST(kill_CONT); EXPECT_SYSZR(1, kill(getpid(), 0)); break;
+ CASE_TEST(kill_BADPID); EXPECT_SYSER(1, kill(INT_MAX, 0), -1, ESRCH); break;
+ CASE_TEST(sbrk_0); EXPECT_PTRNE(1, sbrk(0), (void *)-1); break;
+ CASE_TEST(sbrk); if ((p1 = p2 = sbrk(4096)) != (void *)-1) p2 = sbrk(-4096); EXPECT_SYSZR(1, (p2 == (void *)-1) || p2 == p1); break;
+ CASE_TEST(brk); EXPECT_SYSZR(1, brk(sbrk(0))); break;
+ CASE_TEST(chdir_root); EXPECT_SYSZR(1, chdir("/")); chdir(getenv("PWD")); break;
+ CASE_TEST(chdir_dot); EXPECT_SYSZR(1, chdir(".")); break;
+ CASE_TEST(chdir_blah); EXPECT_SYSER(1, chdir("/blah"), -1, ENOENT); break;
+ CASE_TEST(chmod_argv0); EXPECT_SYSZR(1, chmod(argv0, 0555)); break;
+ CASE_TEST(chmod_self); EXPECT_SYSER(proc, chmod("/proc/self", 0555), -1, EPERM); break;
+ CASE_TEST(chown_self); EXPECT_SYSER(proc, chown("/proc/self", 0, 0), -1, EPERM); break;
+ CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break;
+ CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break;
+ CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break;
+ CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break;
+ CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break;
+ CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
+ CASE_TEST(dup_m1); tmp = dup(-1); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
+ CASE_TEST(dup2_0); tmp = dup2(0, 100); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
+ CASE_TEST(dup2_m1); tmp = dup2(-1, 100); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
+ CASE_TEST(dup3_0); tmp = dup3(0, 100, 0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break;
+ CASE_TEST(dup3_m1); tmp = dup3(-1, 100, 0); EXPECT_SYSER(1, tmp, -1, EBADF); if (tmp != -1) close(tmp); break;
+ CASE_TEST(execve_root); EXPECT_SYSER(1, execve("/", (char*[]){ [0] = "/", [1] = NULL }, NULL), -1, EACCES); break;
+ CASE_TEST(fork); EXPECT_SYSZR(1, test_fork()); break;
+ CASE_TEST(getdents64_root); EXPECT_SYSNE(1, test_getdents64("/"), -1); break;
+ CASE_TEST(getdents64_null); EXPECT_SYSER(1, test_getdents64("/dev/null"), -1, ENOTDIR); break;
+ CASE_TEST(gettimeofday_tv); EXPECT_SYSZR(1, gettimeofday(&tv, NULL)); break;
+ CASE_TEST(gettimeofday_tv_tz);EXPECT_SYSZR(1, gettimeofday(&tv, &tz)); break;
+ CASE_TEST(getpagesize); EXPECT_SYSZR(1, test_getpagesize()); break;
+ CASE_TEST(ioctl_tiocinq); EXPECT_SYSZR(1, ioctl(0, TIOCINQ, &tmp)); break;
+ CASE_TEST(link_root1); EXPECT_SYSER(1, link("/", "/"), -1, EEXIST); break;
+ CASE_TEST(link_blah); EXPECT_SYSER(1, link("/proc/self/blah", "/blah"), -1, ENOENT); break;
+ CASE_TEST(link_dir); EXPECT_SYSER(euid0, link("/", "/blah"), -1, EPERM); break;
+ CASE_TEST(link_cross); EXPECT_SYSER(proc, link("/proc/self/cmdline", "/blah"), -1, EXDEV); break;
+ CASE_TEST(lseek_m1); EXPECT_SYSER(1, lseek(-1, 0, SEEK_SET), -1, EBADF); break;
+ CASE_TEST(lseek_0); EXPECT_SYSER(1, lseek(0, 0, SEEK_SET), -1, ESPIPE); break;
+ CASE_TEST(mkdir_root); EXPECT_SYSER(1, mkdir("/", 0755), -1, EEXIST); break;
+ CASE_TEST(mmap_bad); EXPECT_PTRER(1, mmap(NULL, 0, PROT_READ, MAP_PRIVATE, 0, 0), MAP_FAILED, EINVAL); break;
+ CASE_TEST(munmap_bad); EXPECT_SYSER(1, munmap(NULL, 0), -1, EINVAL); break;
+ CASE_TEST(mmap_munmap_good); EXPECT_SYSZR(1, test_mmap_munmap()); break;
+ CASE_TEST(open_tty); EXPECT_SYSNE(1, tmp = open("/dev/null", 0), -1); if (tmp != -1) close(tmp); break;
+ CASE_TEST(open_blah); EXPECT_SYSER(1, tmp = open("/proc/self/blah", 0), -1, ENOENT); if (tmp != -1) close(tmp); break;
+ CASE_TEST(pipe); EXPECT_SYSZR(1, test_pipe()); break;
+ CASE_TEST(poll_null); EXPECT_SYSZR(1, poll(NULL, 0, 0)); break;
+ CASE_TEST(poll_stdout); EXPECT_SYSNE(1, ({ struct pollfd fds = { 1, POLLOUT, 0}; poll(&fds, 1, 0); }), -1); break;
+ CASE_TEST(poll_fault); EXPECT_SYSER(1, poll(NULL, 1, 0), -1, EFAULT); break;
+ CASE_TEST(prctl); EXPECT_SYSER(1, prctl(PR_SET_NAME, (unsigned long)NULL, 0, 0, 0), -1, EFAULT); break;
+ CASE_TEST(read_badf); EXPECT_SYSER(1, read(-1, &tmp, 1), -1, EBADF); break;
+ CASE_TEST(rlimit); EXPECT_SYSZR(1, test_rlimit()); break;
+ CASE_TEST(rmdir_blah); EXPECT_SYSER(1, rmdir("/blah"), -1, ENOENT); break;
+ CASE_TEST(sched_yield); EXPECT_SYSZR(1, sched_yield()); break;
+ CASE_TEST(select_null); EXPECT_SYSZR(1, ({ struct timeval tv = { 0 }; select(0, NULL, NULL, NULL, &tv); })); break;
+ CASE_TEST(select_stdout); EXPECT_SYSNE(1, ({ fd_set fds; FD_ZERO(&fds); FD_SET(1, &fds); select(2, NULL, &fds, NULL, NULL); }), -1); break;
+ CASE_TEST(select_fault); EXPECT_SYSER(1, select(1, (void *)1, NULL, NULL, 0), -1, EFAULT); break;
+ CASE_TEST(stat_blah); EXPECT_SYSER(1, stat("/proc/self/blah", &stat_buf), -1, ENOENT); break;
+ CASE_TEST(stat_fault); EXPECT_SYSER(1, stat(NULL, &stat_buf), -1, EFAULT); break;
+ CASE_TEST(stat_timestamps); EXPECT_SYSZR(1, test_stat_timestamps()); break;
+ CASE_TEST(symlink_root); EXPECT_SYSER(1, symlink("/", "/"), -1, EEXIST); break;
+ CASE_TEST(unlink_root); EXPECT_SYSER(1, unlink("/"), -1, EISDIR); break;
+ CASE_TEST(unlink_blah); EXPECT_SYSER(1, unlink("/proc/self/blah"), -1, ENOENT); break;
+ CASE_TEST(wait_child); EXPECT_SYSER(1, wait(&tmp), -1, ECHILD); break;
+ CASE_TEST(waitpid_min); EXPECT_SYSER(1, waitpid(INT_MIN, &tmp, WNOHANG), -1, ESRCH); break;
+ CASE_TEST(waitpid_child); EXPECT_SYSER(1, waitpid(getpid(), &tmp, WNOHANG), -1, ECHILD); break;
+ CASE_TEST(write_badf); EXPECT_SYSER(1, write(-1, &tmp, 1), -1, EBADF); break;
+ CASE_TEST(write_zero); EXPECT_SYSZR(1, write(1, &tmp, 0)); break;
+ CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break;
+ CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break;
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
+int run_stdlib(int min, int max)
+{
+ int test;
+ int ret = 0;
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; /* line length */
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(getenv_TERM); EXPECT_STRNZ(1, getenv("TERM")); break;
+ CASE_TEST(getenv_blah); EXPECT_STRZR(1, getenv("blah")); break;
+ CASE_TEST(setcmp_blah_blah); EXPECT_EQ(1, strcmp("blah", "blah"), 0); break;
+ CASE_TEST(setcmp_blah_blah2); EXPECT_NE(1, strcmp("blah", "blah2"), 0); break;
+ CASE_TEST(setncmp_blah_blah); EXPECT_EQ(1, strncmp("blah", "blah", 10), 0); break;
+ CASE_TEST(setncmp_blah_blah4); EXPECT_EQ(1, strncmp("blah", "blah4", 4), 0); break;
+ CASE_TEST(setncmp_blah_blah5); EXPECT_NE(1, strncmp("blah", "blah5", 5), 0); break;
+ CASE_TEST(setncmp_blah_blah6); EXPECT_NE(1, strncmp("blah", "blah6", 6), 0); break;
+ CASE_TEST(strchr_foobar_o); EXPECT_STREQ(1, strchr("foobar", 'o'), "oobar"); break;
+ CASE_TEST(strchr_foobar_z); EXPECT_STRZR(1, strchr("foobar", 'z')); break;
+ CASE_TEST(strrchr_foobar_o); EXPECT_STREQ(1, strrchr("foobar", 'o'), "obar"); break;
+ CASE_TEST(strrchr_foobar_z); EXPECT_STRZR(1, strrchr("foobar", 'z')); break;
+ CASE_TEST(memcmp_20_20); EXPECT_EQ(1, memcmp("aaa\x20", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_20_60); EXPECT_LT(1, memcmp("aaa\x20", "aaa\x60", 4), 0); break;
+ CASE_TEST(memcmp_60_20); EXPECT_GT(1, memcmp("aaa\x60", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_20_e0); EXPECT_LT(1, memcmp("aaa\x20", "aaa\xe0", 4), 0); break;
+ CASE_TEST(memcmp_e0_20); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x20", 4), 0); break;
+ CASE_TEST(memcmp_80_e0); EXPECT_LT(1, memcmp("aaa\x80", "aaa\xe0", 4), 0); break;
+ CASE_TEST(memcmp_e0_80); EXPECT_GT(1, memcmp("aaa\xe0", "aaa\x80", 4), 0); break;
+ CASE_TEST(limit_int8_max); EXPECT_EQ(1, INT8_MAX, (int8_t) 0x7f); break;
+ CASE_TEST(limit_int8_min); EXPECT_EQ(1, INT8_MIN, (int8_t) 0x80); break;
+ CASE_TEST(limit_uint8_max); EXPECT_EQ(1, UINT8_MAX, (uint8_t) 0xff); break;
+ CASE_TEST(limit_int16_max); EXPECT_EQ(1, INT16_MAX, (int16_t) 0x7fff); break;
+ CASE_TEST(limit_int16_min); EXPECT_EQ(1, INT16_MIN, (int16_t) 0x8000); break;
+ CASE_TEST(limit_uint16_max); EXPECT_EQ(1, UINT16_MAX, (uint16_t) 0xffff); break;
+ CASE_TEST(limit_int32_max); EXPECT_EQ(1, INT32_MAX, (int32_t) 0x7fffffff); break;
+ CASE_TEST(limit_int32_min); EXPECT_EQ(1, INT32_MIN, (int32_t) 0x80000000); break;
+ CASE_TEST(limit_uint32_max); EXPECT_EQ(1, UINT32_MAX, (uint32_t) 0xffffffff); break;
+ CASE_TEST(limit_int64_max); EXPECT_EQ(1, INT64_MAX, (int64_t) 0x7fffffffffffffff); break;
+ CASE_TEST(limit_int64_min); EXPECT_EQ(1, INT64_MIN, (int64_t) 0x8000000000000000); break;
+ CASE_TEST(limit_uint64_max); EXPECT_EQ(1, UINT64_MAX, (uint64_t) 0xffffffffffffffff); break;
+ CASE_TEST(limit_int_least8_max); EXPECT_EQ(1, INT_LEAST8_MAX, (int_least8_t) 0x7f); break;
+ CASE_TEST(limit_int_least8_min); EXPECT_EQ(1, INT_LEAST8_MIN, (int_least8_t) 0x80); break;
+ CASE_TEST(limit_uint_least8_max); EXPECT_EQ(1, UINT_LEAST8_MAX, (uint_least8_t) 0xff); break;
+ CASE_TEST(limit_int_least16_max); EXPECT_EQ(1, INT_LEAST16_MAX, (int_least16_t) 0x7fff); break;
+ CASE_TEST(limit_int_least16_min); EXPECT_EQ(1, INT_LEAST16_MIN, (int_least16_t) 0x8000); break;
+ CASE_TEST(limit_uint_least16_max); EXPECT_EQ(1, UINT_LEAST16_MAX, (uint_least16_t) 0xffff); break;
+ CASE_TEST(limit_int_least32_max); EXPECT_EQ(1, INT_LEAST32_MAX, (int_least32_t) 0x7fffffff); break;
+ CASE_TEST(limit_int_least32_min); EXPECT_EQ(1, INT_LEAST32_MIN, (int_least32_t) 0x80000000); break;
+ CASE_TEST(limit_uint_least32_max); EXPECT_EQ(1, UINT_LEAST32_MAX, (uint_least32_t) 0xffffffffU); break;
+ CASE_TEST(limit_int_least64_min); EXPECT_EQ(1, INT_LEAST64_MIN, (int_least64_t) 0x8000000000000000LL); break;
+ CASE_TEST(limit_int_least64_max); EXPECT_EQ(1, INT_LEAST64_MAX, (int_least64_t) 0x7fffffffffffffffLL); break;
+ CASE_TEST(limit_uint_least64_max); EXPECT_EQ(1, UINT_LEAST64_MAX, (uint_least64_t) 0xffffffffffffffffULL); break;
+ CASE_TEST(limit_int_fast8_max); EXPECT_EQ(1, INT_FAST8_MAX, (int_fast8_t) 0x7f); break;
+ CASE_TEST(limit_int_fast8_min); EXPECT_EQ(1, INT_FAST8_MIN, (int_fast8_t) 0x80); break;
+ CASE_TEST(limit_uint_fast8_max); EXPECT_EQ(1, UINT_FAST8_MAX, (uint_fast8_t) 0xff); break;
+ CASE_TEST(limit_int_fast16_min); EXPECT_EQ(1, INT_FAST16_MIN, (int_fast16_t) SINT_MIN_OF_TYPE(int_fast16_t)); break;
+ CASE_TEST(limit_int_fast16_max); EXPECT_EQ(1, INT_FAST16_MAX, (int_fast16_t) SINT_MAX_OF_TYPE(int_fast16_t)); break;
+ CASE_TEST(limit_uint_fast16_max); EXPECT_EQ(1, UINT_FAST16_MAX, (uint_fast16_t) UINTPTR_MAX); break;
+ CASE_TEST(limit_int_fast32_min); EXPECT_EQ(1, INT_FAST32_MIN, (int_fast32_t) SINT_MIN_OF_TYPE(int_fast32_t)); break;
+ CASE_TEST(limit_int_fast32_max); EXPECT_EQ(1, INT_FAST32_MAX, (int_fast32_t) SINT_MAX_OF_TYPE(int_fast32_t)); break;
+ CASE_TEST(limit_uint_fast32_max); EXPECT_EQ(1, UINT_FAST32_MAX, (uint_fast32_t) UINTPTR_MAX); break;
+ CASE_TEST(limit_int_fast64_min); EXPECT_EQ(1, INT_FAST64_MIN, (int_fast64_t) INT64_MIN); break;
+ CASE_TEST(limit_int_fast64_max); EXPECT_EQ(1, INT_FAST64_MAX, (int_fast64_t) INT64_MAX); break;
+ CASE_TEST(limit_uint_fast64_max); EXPECT_EQ(1, UINT_FAST64_MAX, (uint_fast64_t) UINT64_MAX); break;
+ CASE_TEST(sizeof_long_sane); EXPECT_EQ(1, sizeof(long) == 8 || sizeof(long) == 4, 1); break;
+ CASE_TEST(limit_intptr_min); EXPECT_EQ(1, INTPTR_MIN, sizeof(long) == 8 ? (intptr_t) 0x8000000000000000LL : (intptr_t) 0x80000000); break;
+ CASE_TEST(limit_intptr_max); EXPECT_EQ(1, INTPTR_MAX, sizeof(long) == 8 ? (intptr_t) 0x7fffffffffffffffLL : (intptr_t) 0x7fffffff); break;
+ CASE_TEST(limit_uintptr_max); EXPECT_EQ(1, UINTPTR_MAX, sizeof(long) == 8 ? (uintptr_t) 0xffffffffffffffffULL : (uintptr_t) 0xffffffffU); break;
+ CASE_TEST(limit_ptrdiff_min); EXPECT_EQ(1, PTRDIFF_MIN, sizeof(long) == 8 ? (ptrdiff_t) 0x8000000000000000LL : (ptrdiff_t) 0x80000000); break;
+ CASE_TEST(limit_ptrdiff_max); EXPECT_EQ(1, PTRDIFF_MAX, sizeof(long) == 8 ? (ptrdiff_t) 0x7fffffffffffffffLL : (ptrdiff_t) 0x7fffffff); break;
+ CASE_TEST(limit_size_max); EXPECT_EQ(1, SIZE_MAX, sizeof(long) == 8 ? (size_t) 0xffffffffffffffffULL : (size_t) 0xffffffffU); break;
+
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
+#define EXPECT_VFPRINTF(c, expected, fmt, ...) \
+ ret += expect_vfprintf(llen, c, expected, fmt, ##__VA_ARGS__)
+
+static int expect_vfprintf(int llen, int c, const char *expected, const char *fmt, ...)
+{
+ int ret, fd;
+ ssize_t w, r;
+ char buf[100];
+ FILE *memfile;
+ va_list args;
+
+ fd = open("/tmp", O_TMPFILE | O_EXCL | O_RDWR, 0600);
+ if (fd == -1) {
+ result(llen, SKIPPED);
+ return 0;
+ }
+
+ memfile = fdopen(fd, "w+");
+ if (!memfile) {
+ result(llen, FAIL);
+ return 1;
+ }
+
+ va_start(args, fmt);
+ w = vfprintf(memfile, fmt, args);
+ va_end(args);
+
+ if (w != c) {
+ llen += printf(" written(%d) != %d", (int)w, c);
+ result(llen, FAIL);
+ return 1;
+ }
+
+ fflush(memfile);
+ lseek(fd, 0, SEEK_SET);
+
+ r = read(fd, buf, sizeof(buf) - 1);
+
+ fclose(memfile);
+
+ if (r != w) {
+ llen += printf(" written(%d) != read(%d)", (int)w, (int)r);
+ result(llen, FAIL);
+ return 1;
+ }
+
+ buf[r] = '\0';
+ llen += printf(" \"%s\" = \"%s\"", expected, buf);
+ ret = strncmp(expected, buf, c);
+
+ result(llen, ret ? FAIL : OK);
+ return ret;
+}
+
+static int run_vfprintf(int min, int max)
+{
+ int test;
+ int ret = 0;
+
+ for (test = min; test >= 0 && test <= max; test++) {
+ int llen = 0; /* line length */
+
+ /* avoid leaving empty lines below, this will insert holes into
+ * test numbers.
+ */
+ switch (test + __LINE__ + 1) {
+ CASE_TEST(empty); EXPECT_VFPRINTF(0, "", ""); break;
+ CASE_TEST(simple); EXPECT_VFPRINTF(3, "foo", "foo"); break;
+ CASE_TEST(string); EXPECT_VFPRINTF(3, "foo", "%s", "foo"); break;
+ CASE_TEST(number); EXPECT_VFPRINTF(4, "1234", "%d", 1234); break;
+ CASE_TEST(negnumber); EXPECT_VFPRINTF(5, "-1234", "%d", -1234); break;
+ CASE_TEST(unsigned); EXPECT_VFPRINTF(5, "12345", "%u", 12345); break;
+ CASE_TEST(char); EXPECT_VFPRINTF(1, "c", "%c", 'c'); break;
+ CASE_TEST(hex); EXPECT_VFPRINTF(1, "f", "%x", 0xf); break;
+ CASE_TEST(pointer); EXPECT_VFPRINTF(3, "0x1", "%p", (void *) 0x1); break;
+ case __LINE__:
+ return ret; /* must be last */
+ /* note: do not set any defaults so as to permit holes above */
+ }
+ }
+ return ret;
+}
+
+static int smash_stack(void)
+{
+ char buf[100];
+ volatile char *ptr = buf;
+ size_t i;
+
+ for (i = 0; i < 200; i++)
+ ptr[i] = 'P';
+
+ return 1;
+}
+
+static int run_protection(int min __attribute__((unused)),
+ int max __attribute__((unused)))
+{
+ pid_t pid;
+ int llen = 0, status;
+ struct rlimit rlimit = { 0, 0 };
+
+ llen += printf("0 -fstackprotector ");
+
+#if !defined(_NOLIBC_STACKPROTECTOR)
+ llen += printf("not supported");
+ result(llen, SKIPPED);
+ return 0;
+#endif
+
+#if defined(_NOLIBC_STACKPROTECTOR)
+ if (!__stack_chk_guard) {
+ llen += printf("__stack_chk_guard not initialized");
+ result(llen, FAIL);
+ return 1;
+ }
+#endif
+
+ pid = -1;
+ pid = fork();
+
+ switch (pid) {
+ case -1:
+ llen += printf("fork()");
+ result(llen, FAIL);
+ return 1;
+
+ case 0:
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
+
+ prctl(PR_SET_DUMPABLE, 0, 0, 0, 0);
+ setrlimit(RLIMIT_CORE, &rlimit);
+ smash_stack();
+ return 1;
+
+ default:
+ pid = waitpid(pid, &status, 0);
+
+ if (pid == -1 || !WIFSIGNALED(status) || WTERMSIG(status) != SIGABRT) {
+ llen += printf("waitpid()");
+ result(llen, FAIL);
+ return 1;
+ }
+ result(llen, OK);
+ return 0;
+ }
+}
+
+/* prepare what needs to be prepared for pid 1 (stdio, /dev, /proc, etc) */
+int prepare(void)
+{
+ struct stat stat_buf;
+
+ /* It's possible that /dev doesn't even exist or was not mounted, so
+ * we'll try to create it, mount it, or create minimal entries into it.
+ * We want at least /dev/null and /dev/console.
+ */
+ if (stat("/dev/.", &stat_buf) == 0 || mkdir("/dev", 0755) == 0) {
+ if (stat("/dev/console", &stat_buf) != 0 ||
+ stat("/dev/null", &stat_buf) != 0 ||
+ stat("/dev/zero", &stat_buf) != 0) {
+ /* try devtmpfs first, otherwise fall back to manual creation */
+ if (mount("/dev", "/dev", "devtmpfs", 0, 0) != 0) {
+ mknod("/dev/console", 0600 | S_IFCHR, makedev(5, 1));
+ mknod("/dev/null", 0666 | S_IFCHR, makedev(1, 3));
+ mknod("/dev/zero", 0666 | S_IFCHR, makedev(1, 5));
+ }
+ }
+ }
+
+ /* If no /dev/console was found before calling init, stdio is closed so
+ * we need to reopen it from /dev/console. If it failed above, it will
+ * still fail here and we cannot emit a message anyway.
+ */
+ if (close(dup(1)) == -1) {
+ int fd = open("/dev/console", O_RDWR);
+
+ if (fd >= 0) {
+ if (fd != 0)
+ dup2(fd, 0);
+ if (fd != 1)
+ dup2(fd, 1);
+ if (fd != 2)
+ dup2(fd, 2);
+ if (fd > 2)
+ close(fd);
+ puts("\nSuccessfully reopened /dev/console.");
+ }
+ }
+
+ /* try to mount /proc if not mounted. Silently fail otherwise */
+ if (stat("/proc/.", &stat_buf) == 0 || mkdir("/proc", 0755) == 0) {
+ if (stat("/proc/self", &stat_buf) != 0) {
+ /* If not mountable, remove /proc completely to avoid misuse */
+ if (mount("none", "/proc", "proc", 0, 0) != 0)
+ rmdir("/proc");
+ }
+ }
+
+ /* some tests rely on a writable /tmp */
+ mkdir("/tmp", 0755);
+
+ return 0;
+}
+
+/* This is the definition of known test names, with their functions */
+static const struct test test_names[] = {
+ /* add new tests here */
+ { .name = "startup", .func = run_startup },
+ { .name = "syscall", .func = run_syscall },
+ { .name = "stdlib", .func = run_stdlib },
+ { .name = "vfprintf", .func = run_vfprintf },
+ { .name = "protection", .func = run_protection },
+ { 0 }
+};
+
+static int is_setting_valid(char *test)
+{
+ int idx, len, test_len, valid = 0;
+ char delimiter;
+
+ if (!test)
+ return valid;
+
+ test_len = strlen(test);
+
+ for (idx = 0; test_names[idx].name; idx++) {
+ len = strlen(test_names[idx].name);
+ if (test_len < len)
+ continue;
+
+ if (strncmp(test, test_names[idx].name, len) != 0)
+ continue;
+
+ delimiter = test[len];
+ if (delimiter != ':' && delimiter != ',' && delimiter != '\0')
+ continue;
+
+ valid = 1;
+ break;
+ }
+
+ return valid;
+}
+
+int main(int argc, char **argv, char **envp)
+{
+ int min = 0;
+ int max = INT_MAX;
+ int ret = 0;
+ int err;
+ int idx;
+ char *test;
+
+ argv0 = argv[0];
+ test_argc = argc;
+ test_argv = argv;
+ test_envp = envp;
+
+ /* when called as init, it's possible that no console was opened, for
+ * example if no /dev file system was provided. We'll check that fd#1
+ * was opened, and if not we'll attempt to create and open /dev/console
+ * and /dev/null that we'll use for later tests.
+ */
+ if (getpid() == 1)
+ prepare();
+
+ /* the definition of a series of tests comes from either argv[1] or the
+ * "NOLIBC_TEST" environment variable. It's made of a comma-delimited
+ * series of test names and optional ranges:
+ * syscall:5-15[:.*],stdlib:8-10
+ */
+ test = argv[1];
+ if (!is_setting_valid(test))
+ test = getenv("NOLIBC_TEST");
+
+ if (is_setting_valid(test)) {
+ char *comma, *colon, *dash, *value;
+
+ do {
+ comma = strchr(test, ',');
+ if (comma)
+ *(comma++) = '\0';
+
+ colon = strchr(test, ':');
+ if (colon)
+ *(colon++) = '\0';
+
+ for (idx = 0; test_names[idx].name; idx++) {
+ if (strcmp(test, test_names[idx].name) == 0)
+ break;
+ }
+
+ if (test_names[idx].name) {
+ /* The test was named, it will be called at least
+ * once. We may have an optional range at <colon>
+ * here, which defaults to the full range.
+ */
+ do {
+ min = 0; max = INT_MAX;
+ value = colon;
+ if (value && *value) {
+ colon = strchr(value, ':');
+ if (colon)
+ *(colon++) = '\0';
+
+ dash = strchr(value, '-');
+ if (dash)
+ *(dash++) = '\0';
+
+ /* support :val: :min-max: :min-: :-max: */
+ if (*value)
+ min = atoi(value);
+ if (!dash)
+ max = min;
+ else if (*dash)
+ max = atoi(dash);
+
+ value = colon;
+ }
+
+ /* now's time to call the test */
+ printf("Running test '%s'\n", test_names[idx].name);
+ err = test_names[idx].func(min, max);
+ ret += err;
+ printf("Errors during this test: %d\n\n", err);
+ } while (colon && *colon);
+ } else
+ printf("Ignoring unknown test name '%s'\n", test);
+
+ test = comma;
+ } while (test && *test);
+ } else {
+ /* no test mentioned, run everything */
+ for (idx = 0; test_names[idx].name; idx++) {
+ printf("Running test '%s'\n", test_names[idx].name);
+ err = test_names[idx].func(min, max);
+ ret += err;
+ printf("Errors during this test: %d\n\n", err);
+ }
+ }
+
+ printf("Total number of errors: %d\n", ret);
+
+ if (getpid() == 1) {
+ /* we're running as init, there's no other process on the
+ * system, thus likely started from a VM for a quick check.
+ * Exiting will provoke a kernel panic that may be reported
+ * as an error by Qemu or the hypervisor, while stopping
+ * cleanly will often be reported as a success. This allows
+ * to use the output of this program for bisecting kernels.
+ */
+ printf("Leaving init with final status: %d\n", !!ret);
+ if (ret == 0)
+ reboot(RB_POWER_OFF);
+#if defined(__x86_64__)
+ /* QEMU started with "-device isa-debug-exit -no-reboot" will
+ * exit with status code 2N+1 when N is written to 0x501. We
+ * hard-code the syscall here as it's arch-dependent.
+ */
+ else if (syscall(__NR_ioperm, 0x501, 1, 1) == 0)
+ __asm__ volatile ("outb %%al, %%dx" :: "d"(0x501), "a"(0));
+ /* if it does nothing, fall back to the regular panic */
+#endif
+ }
+
+ printf("Exiting with status %d\n", !!ret);
+ return !!ret;
+}
diff --git a/tools/testing/selftests/nolibc/run-tests.sh b/tools/testing/selftests/nolibc/run-tests.sh
new file mode 100755
index 000000000000..c0a5a7cea9fa
--- /dev/null
+++ b/tools/testing/selftests/nolibc/run-tests.sh
@@ -0,0 +1,169 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Test runner for nolibc tests
+
+set -e
+
+trap 'echo Aborting...' 'ERR'
+
+crosstool_version=13.2.0
+hostarch=x86_64
+nproc=$(( $(nproc) + 2))
+cache_dir="${XDG_CACHE_HOME:-"$HOME"/.cache}"
+download_location="${cache_dir}/crosstools/"
+build_location="$(realpath "${cache_dir}"/nolibc-tests/)"
+perform_download=0
+test_mode=system
+archs="i386 x86_64 arm64 arm mips32le mips32be ppc ppc64 ppc64le riscv s390 loongarch"
+
+TEMP=$(getopt -o 'j:d:c:b:a:m:ph' -n "$0" -- "$@")
+
+eval set -- "$TEMP"
+unset TEMP
+
+print_usage() {
+ cat <<EOF
+Run nolibc testsuite for multiple architectures with crosstools
+
+Usage:
+ $0 [options] <architectures>
+
+Known architectures:
+ ${archs}
+
+Options:
+ -j [N] Allow N jobs at once (default: ${nproc})
+ -p Allow download of toolchains
+ -d [DIR] Download location for toolchains (default: ${download_location})
+ -c [VERSION] Version of toolchains to use (default: ${crosstool_version})
+ -a [ARCH] Host architecture of toolchains to use (default: ${hostarch})
+ -b [DIR] Build location (default: ${build_location})
+ -m [MODE] Test mode user/system (default: ${test_mode})
+EOF
+}
+
+while true; do
+ case "$1" in
+ '-j')
+ nproc="$2"
+ shift 2; continue ;;
+ '-p')
+ perform_download=1
+ shift; continue ;;
+ '-d')
+ download_location="$2"
+ shift 2; continue ;;
+ '-c')
+ crosstool_version="$2"
+ shift 2; continue ;;
+ '-a')
+ hostarch="$2"
+ shift 2; continue ;;
+ '-b')
+ build_location="$(realpath "$2")"
+ shift 2; continue ;;
+ '-m')
+ test_mode="$2"
+ shift 2; continue ;;
+ '-h')
+ print_usage
+ exit 0
+ ;;
+ '--')
+ shift; break ;;
+ *)
+ echo 'Internal error!' >&2; exit 1 ;;
+ esac
+done
+
+if [[ -n "$*" ]]; then
+ archs="$*"
+fi
+
+crosstool_arch() {
+ case "$1" in
+ arm64) echo aarch64;;
+ ppc) echo powerpc;;
+ ppc64) echo powerpc64;;
+ ppc64le) echo powerpc64;;
+ riscv) echo riscv64;;
+ loongarch) echo loongarch64;;
+ mips*) echo mips;;
+ *) echo "$1";;
+ esac
+}
+
+crosstool_abi() {
+ case "$1" in
+ arm) echo linux-gnueabi;;
+ *) echo linux;;
+ esac
+}
+
+download_crosstool() {
+ arch="$(crosstool_arch "$1")"
+ abi="$(crosstool_abi "$1")"
+
+ archive_name="${hostarch}-gcc-${crosstool_version}-nolibc-${arch}-${abi}.tar.gz"
+ url="https://mirrors.edge.kernel.org/pub/tools/crosstool/files/bin/${hostarch}/${crosstool_version}/${archive_name}"
+ archive="${download_location}${archive_name}"
+ stamp="${archive}.stamp"
+
+ [ -f "${stamp}" ] && return
+
+ echo "Downloading crosstools ${arch} ${crosstool_version}"
+ mkdir -p "${download_location}"
+ curl -o "${archive}" --fail --continue-at - "${url}"
+ tar -C "${download_location}" -xf "${archive}"
+ touch "${stamp}"
+}
+
+# capture command output, print it on failure
+# mimics chronic(1) from moreutils
+function swallow_output() {
+ if ! OUTPUT="$("$@" 2>&1)"; then
+ echo "$OUTPUT"
+ return 1
+ fi
+ return 0
+}
+
+test_arch() {
+ arch=$1
+ ct_arch=$(crosstool_arch "$arch")
+ ct_abi=$(crosstool_abi "$1")
+ cross_compile=$(realpath "${download_location}gcc-${crosstool_version}-nolibc/${ct_arch}-${ct_abi}/bin/${ct_arch}-${ct_abi}-")
+ build_dir="${build_location}/${arch}"
+ MAKE=(make -j"${nproc}" XARCH="${arch}" CROSS_COMPILE="${cross_compile}" O="${build_dir}")
+
+ mkdir -p "$build_dir"
+ if [ "$test_mode" = "system" ] && [ ! -f "${build_dir}/.config" ]; then
+ swallow_output "${MAKE[@]}" defconfig
+ fi
+ case "$test_mode" in
+ 'system')
+ test_target=run
+ ;;
+ 'user')
+ test_target=run-user
+ ;;
+ *)
+ echo "Unknown mode $test_mode"
+ exit 1
+ esac
+ printf '%-15s' "$arch:"
+ swallow_output "${MAKE[@]}" "$test_target" V=1
+ cp run.out run.out."${arch}"
+ "${MAKE[@]}" report | grep passed
+}
+
+if [ "$perform_download" -ne 0 ]; then
+ for arch in $archs; do
+ download_crosstool "$arch"
+ done
+fi
+
+for arch in $archs; do
+ test_arch "$arch"
+done
diff --git a/tools/testing/selftests/openat2/Makefile b/tools/testing/selftests/openat2/Makefile
index 4b93b1417b86..254d676a2689 100644
--- a/tools/testing/selftests/openat2/Makefile
+++ b/tools/testing/selftests/openat2/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-or-later
-CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined
+CFLAGS += -Wall -O2 -g -fsanitize=address -fsanitize=undefined -static-libasan
TEST_GEN_PROGS := openat2_test resolve_test rename_attack_test
include ../lib.mk
-$(TEST_GEN_PROGS): helpers.c
+$(TEST_GEN_PROGS): helpers.c helpers.h
diff --git a/tools/testing/selftests/openat2/helpers.h b/tools/testing/selftests/openat2/helpers.h
index a6ea27344db2..7056340b9339 100644
--- a/tools/testing/selftests/openat2/helpers.h
+++ b/tools/testing/selftests/openat2/helpers.h
@@ -9,6 +9,7 @@
#define _GNU_SOURCE
#include <stdint.h>
+#include <stdbool.h>
#include <errno.h>
#include <linux/types.h>
#include "../kselftest.h"
@@ -62,11 +63,12 @@ bool needs_openat2(const struct open_how *how);
(similar to chroot(2)). */
#endif /* RESOLVE_IN_ROOT */
-#define E_func(func, ...) \
- do { \
- if (func(__VA_ARGS__) < 0) \
- ksft_exit_fail_msg("%s:%d %s failed\n", \
- __FILE__, __LINE__, #func);\
+#define E_func(func, ...) \
+ do { \
+ errno = 0; \
+ if (func(__VA_ARGS__) < 0) \
+ ksft_exit_fail_msg("%s:%d %s failed - errno:%d\n", \
+ __FILE__, __LINE__, #func, errno); \
} while (0)
#define E_asprintf(...) E_func(asprintf, __VA_ARGS__)
diff --git a/tools/testing/selftests/openat2/openat2_test.c b/tools/testing/selftests/openat2/openat2_test.c
index b386367c606b..9024754530b2 100644
--- a/tools/testing/selftests/openat2/openat2_test.c
+++ b/tools/testing/selftests/openat2/openat2_test.c
@@ -22,7 +22,11 @@
* XXX: This is wrong on {mips, parisc, powerpc, sparc}.
*/
#undef O_LARGEFILE
+#ifdef __aarch64__
+#define O_LARGEFILE 0x20000
+#else
#define O_LARGEFILE 0x8000
+#endif
struct open_how_ext {
struct open_how inner;
@@ -155,7 +159,7 @@ struct flag_test {
int err;
};
-#define NUM_OPENAT2_FLAG_TESTS 23
+#define NUM_OPENAT2_FLAG_TESTS 25
void test_openat2_flags(void)
{
@@ -210,6 +214,12 @@ void test_openat2_flags(void)
.how.flags = O_TMPFILE | O_RDWR,
.how.mode = 0x0000A00000000000ULL, .err = -EINVAL },
+ /* ->resolve flags must not conflict. */
+ { .name = "incompatible resolve flags (BENEATH | IN_ROOT)",
+ .how.flags = O_RDONLY,
+ .how.resolve = RESOLVE_BENEATH | RESOLVE_IN_ROOT,
+ .err = -EINVAL },
+
/* ->resolve must only contain RESOLVE_* flags. */
{ .name = "invalid how.resolve and O_RDONLY",
.how.flags = O_RDONLY,
@@ -223,6 +233,11 @@ void test_openat2_flags(void)
{ .name = "invalid how.resolve and O_PATH",
.how.flags = O_PATH,
.how.resolve = 0x1337, .err = -EINVAL },
+
+ /* currently unknown upper 32 bit rejected. */
+ { .name = "currently unknown bit (1 << 63)",
+ .how.flags = O_RDONLY | (1ULL << 63),
+ .how.resolve = 0, .err = -EINVAL },
};
BUILD_BUG_ON(ARRAY_LEN(tests) != NUM_OPENAT2_FLAG_TESTS);
@@ -244,6 +259,16 @@ void test_openat2_flags(void)
unlink(path);
fd = sys_openat2(AT_FDCWD, path, &test->how);
+ if (fd < 0 && fd == -EOPNOTSUPP) {
+ /*
+ * Skip the testcase if it failed because not supported
+ * by FS. (e.g. a valid O_TMPFILE combination on NFS)
+ */
+ ksft_test_result_skip("openat2 with %s fails with %d (%s)\n",
+ test->name, fd, strerror(-fd));
+ goto next;
+ }
+
if (test->err >= 0)
failed = (fd < 0);
else
@@ -275,7 +300,7 @@ void test_openat2_flags(void)
ksft_print_msg("openat2 unexpectedly returned ");
if (fdpath)
- ksft_print_msg("%d['%s'] with %X (!= %X)\n",
+ ksft_print_msg("%d['%s'] with %X (!= %llX)\n",
fd, fdpath, fdflags,
test->how.flags);
else
@@ -288,7 +313,7 @@ skip:
else
resultfn("openat2 with %s fails with %d (%s)\n",
test->name, test->err, strerror(-test->err));
-
+next:
free(fdpath);
fflush(stdout);
}
diff --git a/tools/testing/selftests/perf_events/.gitignore b/tools/testing/selftests/perf_events/.gitignore
new file mode 100644
index 000000000000..790c47001e77
--- /dev/null
+++ b/tools/testing/selftests/perf_events/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sigtrap_threads
+remove_on_exec
diff --git a/tools/testing/selftests/perf_events/Makefile b/tools/testing/selftests/perf_events/Makefile
new file mode 100644
index 000000000000..db93c4ff081a
--- /dev/null
+++ b/tools/testing/selftests/perf_events/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
+LDFLAGS += -lpthread
+
+TEST_GEN_PROGS := sigtrap_threads remove_on_exec
+include ../lib.mk
diff --git a/tools/testing/selftests/perf_events/config b/tools/testing/selftests/perf_events/config
new file mode 100644
index 000000000000..ba58ff2203e4
--- /dev/null
+++ b/tools/testing/selftests/perf_events/config
@@ -0,0 +1 @@
+CONFIG_PERF_EVENTS=y
diff --git a/tools/testing/selftests/perf_events/remove_on_exec.c b/tools/testing/selftests/perf_events/remove_on_exec.c
new file mode 100644
index 000000000000..5814611a1dc7
--- /dev/null
+++ b/tools/testing/selftests/perf_events/remove_on_exec.c
@@ -0,0 +1,260 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for remove_on_exec.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+static volatile int signal_count;
+
+static struct perf_event_attr make_event_attr(void)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_HARDWARE,
+ .size = sizeof(attr),
+ .config = PERF_COUNT_HW_INSTRUCTIONS,
+ .sample_period = 1000,
+ .exclude_kernel = 1,
+ .exclude_hv = 1,
+ .disabled = 1,
+ .inherit = 1,
+ /*
+ * Children normally retain their inherited event on exec; with
+ * remove_on_exec, we'll remove their event, but the parent and
+ * any other non-exec'd children will keep their events.
+ */
+ .remove_on_exec = 1,
+ .sigtrap = 1,
+ };
+ return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+ if (info->si_code != TRAP_PERF) {
+ fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+ return;
+ }
+
+ signal_count++;
+}
+
+FIXTURE(remove_on_exec)
+{
+ struct sigaction oldact;
+ int fd;
+};
+
+FIXTURE_SETUP(remove_on_exec)
+{
+ struct perf_event_attr attr = make_event_attr();
+ struct sigaction action = {};
+
+ signal_count = 0;
+
+ /* Initialize sigtrap handler. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+ /* Initialize perf event. */
+ self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+ ASSERT_NE(self->fd, -1);
+}
+
+FIXTURE_TEARDOWN(remove_on_exec)
+{
+ close(self->fd);
+ sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+/* Verify event propagates to fork'd child. */
+TEST_F(remove_on_exec, fork_only)
+{
+ int status;
+ pid_t pid = fork();
+
+ if (pid == 0) {
+ ASSERT_EQ(signal_count, 0);
+ ASSERT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ while (!signal_count);
+ _exit(42);
+ }
+
+ while (!signal_count); /* Child enables event. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(WEXITSTATUS(status), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * after fork+exec.
+ */
+TEST_F(remove_on_exec, fork_exec_then_enable)
+{
+ pid_t pid_exec, pid_only_fork;
+ int pipefd[2];
+ int tmp;
+
+ /*
+ * Non-exec child, to ensure exec does not affect inherited events of
+ * other children.
+ */
+ pid_only_fork = fork();
+ if (pid_only_fork == 0) {
+ /* Block until parent enables event. */
+ while (!signal_count);
+ _exit(42);
+ }
+
+ ASSERT_NE(pipe(pipefd), -1);
+ pid_exec = fork();
+ if (pid_exec == 0) {
+ ASSERT_NE(dup2(pipefd[1], STDOUT_FILENO), -1);
+ close(pipefd[0]);
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+ close(pipefd[1]);
+
+ ASSERT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Child is running. */
+ /* Wait for exec'd child to start spinning. */
+ EXPECT_EQ(read(pipefd[0], &tmp, sizeof(int)), sizeof(int));
+ EXPECT_EQ(tmp, 42);
+ close(pipefd[0]);
+ /* Now we can enable the event, knowing the child is doing work. */
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ /* If the event propagated to the exec'd child, it will exit normally... */
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+ EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+ EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+ /* Verify removal from child did not affect this task's event. */
+ tmp = signal_count;
+ while (signal_count == tmp); /* Should not hang! */
+ /* Nor should it have affected the first child. */
+ EXPECT_EQ(waitpid(pid_only_fork, &tmp, 0), pid_only_fork);
+ EXPECT_EQ(WEXITSTATUS(tmp), 42);
+}
+
+/*
+ * Verify that event does _not_ propagate to fork+exec'd child; event enabled
+ * before fork+exec.
+ */
+TEST_F(remove_on_exec, enable_then_fork_exec)
+{
+ pid_t pid_exec;
+ int tmp;
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+
+ pid_exec = fork();
+ if (pid_exec == 0) {
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+
+ /*
+ * The child may exit abnormally at any time if the event propagated and
+ * a SIGTRAP is sent before the handler was set up.
+ */
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+ EXPECT_EQ(waitpid(pid_exec, &tmp, WNOHANG), 0); /* Should still be running. */
+ EXPECT_EQ(kill(pid_exec, SIGKILL), 0);
+
+ /* Verify removal from child did not affect this task's event. */
+ tmp = signal_count;
+ while (signal_count == tmp); /* Should not hang! */
+}
+
+TEST_F(remove_on_exec, exec_stress)
+{
+ pid_t pids[30];
+ int i, tmp;
+
+ for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+ pids[i] = fork();
+ if (pids[i] == 0) {
+ execl("/proc/self/exe", "exec_child", NULL);
+ _exit((perror("exec failed"), 1));
+ }
+
+ /* Some forked with event disabled, rest with enabled. */
+ if (i > 10)
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ }
+
+ usleep(100000); /* ... give time for event to trigger (in case of bug). */
+
+ for (i = 0; i < sizeof(pids) / sizeof(pids[0]); i++) {
+ /* All children should still be running. */
+ EXPECT_EQ(waitpid(pids[i], &tmp, WNOHANG), 0);
+ EXPECT_EQ(kill(pids[i], SIGKILL), 0);
+ }
+
+ /* Verify event is still alive. */
+ tmp = signal_count;
+ while (signal_count == tmp);
+}
+
+/* For exec'd child. */
+static void exec_child(void)
+{
+ struct sigaction action = {};
+ const int val = 42;
+
+ /* Set up sigtrap handler in case we erroneously receive a trap. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ if (sigaction(SIGTRAP, &action, NULL))
+ _exit((perror("sigaction failed"), 1));
+
+ /* Signal parent that we're starting to spin. */
+ if (write(STDOUT_FILENO, &val, sizeof(int)) == -1)
+ _exit((perror("write failed"), 1));
+
+ /* Should hang here until killed. */
+ while (!signal_count);
+}
+
+#define main test_main
+TEST_HARNESS_MAIN
+#undef main
+int main(int argc, char *argv[])
+{
+ if (!strcmp(argv[0], "exec_child")) {
+ exec_child();
+ return 1;
+ }
+
+ return test_main(argc, argv);
+}
diff --git a/tools/testing/selftests/perf_events/settings b/tools/testing/selftests/perf_events/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/perf_events/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/perf_events/sigtrap_threads.c b/tools/testing/selftests/perf_events/sigtrap_threads.c
new file mode 100644
index 000000000000..d1d8483ac628
--- /dev/null
+++ b/tools/testing/selftests/perf_events/sigtrap_threads.c
@@ -0,0 +1,240 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test for perf events with SIGTRAP across all threads.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define _GNU_SOURCE
+
+/* We need the latest siginfo from the kernel repo. */
+#include <sys/types.h>
+#include <asm/siginfo.h>
+#define __have_siginfo_t 1
+#define __have_sigval_t 1
+#define __have_sigevent_t 1
+#define __siginfo_t_defined
+#define __sigval_t_defined
+#define __sigevent_t_defined
+#define _BITS_SIGINFO_CONSTS_H 1
+#define _BITS_SIGEVENT_CONSTS_H 1
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/perf_event.h>
+#include <pthread.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define NUM_THREADS 5
+
+/* Data shared between test body, threads, and signal handler. */
+static struct {
+ int tids_want_signal; /* Which threads still want a signal. */
+ int signal_count; /* Sanity check number of signals received. */
+ volatile int iterate_on; /* Variable to set breakpoint on. */
+ siginfo_t first_siginfo; /* First observed siginfo_t. */
+} ctx;
+
+/* Unique value to check si_perf_data is correctly set from perf_event_attr::sig_data. */
+#define TEST_SIG_DATA(addr, id) (~(unsigned long)(addr) + id)
+
+static struct perf_event_attr make_event_attr(bool enabled, volatile void *addr,
+ unsigned long id)
+{
+ struct perf_event_attr attr = {
+ .type = PERF_TYPE_BREAKPOINT,
+ .size = sizeof(attr),
+ .sample_period = 1,
+ .disabled = !enabled,
+ .bp_addr = (unsigned long)addr,
+ .bp_type = HW_BREAKPOINT_RW,
+ .bp_len = HW_BREAKPOINT_LEN_1,
+ .inherit = 1, /* Children inherit events ... */
+ .inherit_thread = 1, /* ... but only cloned with CLONE_THREAD. */
+ .remove_on_exec = 1, /* Required by sigtrap. */
+ .sigtrap = 1, /* Request synchronous SIGTRAP on event. */
+ .sig_data = TEST_SIG_DATA(addr, id),
+ .exclude_kernel = 1, /* To allow */
+ .exclude_hv = 1, /* running as !root */
+ };
+ return attr;
+}
+
+static void sigtrap_handler(int signum, siginfo_t *info, void *ucontext)
+{
+ if (info->si_code != TRAP_PERF) {
+ fprintf(stderr, "%s: unexpected si_code %d\n", __func__, info->si_code);
+ return;
+ }
+
+ /*
+ * The data in siginfo_t we're interested in should all be the same
+ * across threads.
+ */
+ if (!__atomic_fetch_add(&ctx.signal_count, 1, __ATOMIC_RELAXED))
+ ctx.first_siginfo = *info;
+ __atomic_fetch_sub(&ctx.tids_want_signal, syscall(__NR_gettid), __ATOMIC_RELAXED);
+}
+
+static void *test_thread(void *arg)
+{
+ pthread_barrier_t *barrier = (pthread_barrier_t *)arg;
+ pid_t tid = syscall(__NR_gettid);
+ int iter;
+ int i;
+
+ pthread_barrier_wait(barrier);
+
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ iter = ctx.iterate_on; /* read */
+ if (iter >= 0) {
+ for (i = 0; i < iter - 1; i++) {
+ __atomic_fetch_add(&ctx.tids_want_signal, tid, __ATOMIC_RELAXED);
+ ctx.iterate_on = iter; /* idempotent write */
+ }
+ } else {
+ while (ctx.iterate_on);
+ }
+
+ return NULL;
+}
+
+FIXTURE(sigtrap_threads)
+{
+ struct sigaction oldact;
+ pthread_t threads[NUM_THREADS];
+ pthread_barrier_t barrier;
+ int fd;
+};
+
+FIXTURE_SETUP(sigtrap_threads)
+{
+ struct perf_event_attr attr = make_event_attr(false, &ctx.iterate_on, 0);
+ struct sigaction action = {};
+ int i;
+
+ memset(&ctx, 0, sizeof(ctx));
+
+ /* Initialize sigtrap handler. */
+ action.sa_flags = SA_SIGINFO | SA_NODEFER;
+ action.sa_sigaction = sigtrap_handler;
+ sigemptyset(&action.sa_mask);
+ ASSERT_EQ(sigaction(SIGTRAP, &action, &self->oldact), 0);
+
+ /* Initialize perf event. */
+ self->fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, PERF_FLAG_FD_CLOEXEC);
+ ASSERT_NE(self->fd, -1);
+
+ /* Spawn threads inheriting perf event. */
+ pthread_barrier_init(&self->barrier, NULL, NUM_THREADS + 1);
+ for (i = 0; i < NUM_THREADS; i++)
+ ASSERT_EQ(pthread_create(&self->threads[i], NULL, test_thread, &self->barrier), 0);
+}
+
+FIXTURE_TEARDOWN(sigtrap_threads)
+{
+ pthread_barrier_destroy(&self->barrier);
+ close(self->fd);
+ sigaction(SIGTRAP, &self->oldact, NULL);
+}
+
+static void run_test_threads(struct __test_metadata *_metadata,
+ FIXTURE_DATA(sigtrap_threads) *self)
+{
+ int i;
+
+ pthread_barrier_wait(&self->barrier);
+ for (i = 0; i < NUM_THREADS; i++)
+ ASSERT_EQ(pthread_join(self->threads[i], NULL), 0);
+}
+
+TEST_F(sigtrap_threads, remain_disabled)
+{
+ run_test_threads(_metadata, self);
+ EXPECT_EQ(ctx.signal_count, 0);
+ EXPECT_NE(ctx.tids_want_signal, 0);
+}
+
+TEST_F(sigtrap_threads, enable_event)
+{
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ run_test_threads(_metadata, self);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
+
+ /* Check enabled for parent. */
+ ctx.iterate_on = 0;
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Test that modification propagates to all inherited events. */
+TEST_F(sigtrap_threads, modify_and_enable_event)
+{
+ struct perf_event_attr new_attr = make_event_attr(true, &ctx.iterate_on, 42);
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_MODIFY_ATTRIBUTES, &new_attr), 0);
+ run_test_threads(_metadata, self);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 42));
+
+ /* Check enabled for parent. */
+ ctx.iterate_on = 0;
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS + 1);
+}
+
+/* Stress test event + signal handling. */
+TEST_F(sigtrap_threads, signal_stress)
+{
+ ctx.iterate_on = 3000;
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ run_test_threads(_metadata, self);
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ EXPECT_EQ(ctx.signal_count, NUM_THREADS * ctx.iterate_on);
+ EXPECT_EQ(ctx.tids_want_signal, 0);
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
+}
+
+TEST_F(sigtrap_threads, signal_stress_with_disable)
+{
+ const int target_count = NUM_THREADS * 3000;
+ int i;
+
+ ctx.iterate_on = -1;
+
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ pthread_barrier_wait(&self->barrier);
+ while (__atomic_load_n(&ctx.signal_count, __ATOMIC_RELAXED) < target_count) {
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_ENABLE, 0), 0);
+ }
+ ctx.iterate_on = 0;
+ for (i = 0; i < NUM_THREADS; i++)
+ ASSERT_EQ(pthread_join(self->threads[i], NULL), 0);
+ EXPECT_EQ(ioctl(self->fd, PERF_EVENT_IOC_DISABLE, 0), 0);
+
+ EXPECT_EQ(ctx.first_siginfo.si_addr, &ctx.iterate_on);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_type, PERF_TYPE_BREAKPOINT);
+ EXPECT_EQ(ctx.first_siginfo.si_perf_data, TEST_SIG_DATA(&ctx.iterate_on, 0));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
index dcaefa224ca0..9286a1d22cd3 100644
--- a/tools/testing/selftests/pid_namespace/Makefile
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -1,8 +1,8 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -g -I../../../../usr/include/
+CFLAGS += -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := regression_enomem
+TEST_GEN_PROGS = regression_enomem
-include ../lib.mk
+LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h
-$(OUTPUT)/regression_enomem: regression_enomem.c ../pidfd/pidfd.h
+include ../lib.mk
diff --git a/tools/testing/selftests/pid_namespace/regression_enomem.c b/tools/testing/selftests/pid_namespace/regression_enomem.c
index 73d532556d17..7d84097ad45c 100644
--- a/tools/testing/selftests/pid_namespace/regression_enomem.c
+++ b/tools/testing/selftests/pid_namespace/regression_enomem.c
@@ -11,7 +11,6 @@
#include <syscall.h>
#include <sys/wait.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#include "../pidfd/pidfd.h"
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index f4a2f28f926b..d731e3e76d5b 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -g -I../../../../usr/include/ -pthread
+CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
diff --git a/tools/testing/selftests/pidfd/config b/tools/testing/selftests/pidfd/config
index bb11de90c0c9..f6f2965e17af 100644
--- a/tools/testing/selftests/pidfd/config
+++ b/tools/testing/selftests/pidfd/config
@@ -4,3 +4,4 @@ CONFIG_USER_NS=y
CONFIG_PID_NS=y
CONFIG_NET_NS=y
CONFIG_CGROUPS=y
+CONFIG_CHECKPOINT_RESTORE=y
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 8d728eda783d..88d6830ee004 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -22,6 +22,10 @@
#define P_PIDFD 3
#endif
+#ifndef CLONE_NEWTIME
+#define CLONE_NEWTIME 0x00000080
+#endif
+
#ifndef CLONE_PIDFD
#define CLONE_PIDFD 0x00001000
#endif
@@ -42,6 +46,10 @@
#define __NR_pidfd_getfd -1
#endif
+#ifndef PIDFD_NONBLOCK
+#define PIDFD_NONBLOCK O_NONBLOCK
+#endif
+
/*
* The kernel reserves 300 pids via RESERVED_PIDS in kernel/pid.c
* That means, when it wraps around any pid < 300 will be skipped.
@@ -60,7 +68,7 @@
#define PIDFD_SKIP 3
#define PIDFD_XFAIL 4
-int wait_for_pid(pid_t pid)
+static inline int wait_for_pid(pid_t pid)
{
int status, ret;
@@ -70,13 +78,19 @@ again:
if (errno == EINTR)
goto again;
+ ksft_print_msg("waitpid returned -1, errno=%d\n", errno);
return -1;
}
- if (!WIFEXITED(status))
+ if (!WIFEXITED(status)) {
+ ksft_print_msg(
+ "waitpid !WIFEXITED, WIFSIGNALED=%d, WTERMSIG=%d\n",
+ WIFSIGNALED(status), WTERMSIG(status));
return -1;
+ }
- return WEXITSTATUS(status);
+ ret = WEXITSTATUS(status);
+ return ret;
}
static inline int sys_pidfd_open(pid_t pid, unsigned int flags)
diff --git a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
index 22558524f71c..01cc37bf611c 100644
--- a/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_fdinfo_test.c
@@ -12,6 +12,7 @@
#include <string.h>
#include <syscall.h>
#include <sys/wait.h>
+#include <sys/mman.h>
#include "pidfd.h"
#include "../kselftest.h"
@@ -61,7 +62,7 @@ static void error_report(struct error *err, const char *test_name)
break;
case PIDFD_PASS:
- ksft_test_result_pass("%s test: Passed\n");
+ ksft_test_result_pass("%s test: Passed\n", test_name);
break;
default:
@@ -80,7 +81,10 @@ static inline int error_check(struct error *err, const char *test_name)
return err->code;
}
+#define CHILD_STACK_SIZE 8192
+
struct child {
+ char *stack;
pid_t pid;
int fd;
};
@@ -89,17 +93,22 @@ static struct child clone_newns(int (*fn)(void *), void *args,
struct error *err)
{
static int flags = CLONE_PIDFD | CLONE_NEWPID | CLONE_NEWNS | SIGCHLD;
- size_t stack_size = 1024;
- char *stack[1024] = { 0 };
struct child ret;
if (!(flags & CLONE_NEWUSER) && geteuid() != 0)
flags |= CLONE_NEWUSER;
+ ret.stack = mmap(NULL, CHILD_STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (ret.stack == MAP_FAILED) {
+ error_set(err, -1, "mmap of stack failed (errno %d)", errno);
+ return ret;
+ }
+
#ifdef __ia64__
- ret.pid = __clone2(fn, stack, stack_size, flags, args, &ret.fd);
+ ret.pid = __clone2(fn, ret.stack, CHILD_STACK_SIZE, flags, args, &ret.fd);
#else
- ret.pid = clone(fn, stack + stack_size, flags, args, &ret.fd);
+ ret.pid = clone(fn, ret.stack + CHILD_STACK_SIZE, flags, args, &ret.fd);
#endif
if (ret.pid < 0) {
@@ -129,6 +138,12 @@ static inline int child_join(struct child *child, struct error *err)
else if (r > 0)
error_set(err, r, "child %d reported: %d", child->pid, r);
+ if (munmap(child->stack, CHILD_STACK_SIZE)) {
+ error_set(err, -1, "munmap of child stack failed (errno %d)", errno);
+ r = -1;
+ }
+
+ ksft_print_msg("waitpid WEXITSTATUS=%d\n", r);
return r;
}
diff --git a/tools/testing/selftests/pidfd/pidfd_getfd_test.c b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
index 84b65ecccb04..cd51d547b751 100644
--- a/tools/testing/selftests/pidfd/pidfd_getfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_getfd_test.c
@@ -5,6 +5,7 @@
#include <fcntl.h>
#include <limits.h>
#include <linux/types.h>
+#include <poll.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
@@ -18,7 +19,6 @@
#include <linux/kcmp.h>
#include "pidfd.h"
-#include "../kselftest.h"
#include "../kselftest_harness.h"
/*
@@ -130,6 +130,7 @@ FIXTURE(child)
* When it is closed, the child will exit.
*/
int sk;
+ bool ignore_child_result;
};
FIXTURE_SETUP(child)
@@ -166,10 +167,14 @@ FIXTURE_SETUP(child)
FIXTURE_TEARDOWN(child)
{
+ int ret;
+
EXPECT_EQ(0, close(self->pidfd));
EXPECT_EQ(0, close(self->sk));
- EXPECT_EQ(0, wait_for_pid(self->pid));
+ ret = wait_for_pid(self->pid);
+ if (!self->ignore_child_result)
+ EXPECT_EQ(0, ret);
}
TEST_F(child, disable_ptrace)
@@ -205,7 +210,10 @@ TEST_F(child, fetch_fd)
fd = sys_pidfd_getfd(self->pidfd, self->remote_fd, 0);
ASSERT_GE(fd, 0);
- EXPECT_EQ(0, sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd));
+ ret = sys_kcmp(getpid(), self->pid, KCMP_FILE, fd, self->remote_fd);
+ if (ret < 0 && errno == ENOSYS)
+ SKIP(return, "kcmp() syscall not supported");
+ EXPECT_EQ(ret, 0);
ret = fcntl(fd, F_GETFD);
ASSERT_GE(ret, 0);
@@ -233,6 +241,29 @@ TEST(flags_set)
EXPECT_EQ(errno, EINVAL);
}
+TEST_F(child, no_strange_EBADF)
+{
+ struct pollfd fds;
+
+ self->ignore_child_result = true;
+
+ fds.fd = self->pidfd;
+ fds.events = POLLIN;
+
+ ASSERT_EQ(kill(self->pid, SIGKILL), 0);
+ ASSERT_EQ(poll(&fds, 1, 5000), 1);
+
+ /*
+ * It used to be that pidfd_getfd() could race with the exiting thread
+ * between exit_files() and release_task(), and get a non-null task
+ * with a NULL files struct, and you'd get EBADF, which was slightly
+ * confusing.
+ */
+ errno = 0;
+ EXPECT_EQ(sys_pidfd_getfd(self->pidfd, self->remote_fd, 0), -1);
+ EXPECT_EQ(errno, ESRCH);
+}
+
#if __NR_pidfd_getfd == -1
int main(void)
{
diff --git a/tools/testing/selftests/pidfd/pidfd_open_test.c b/tools/testing/selftests/pidfd/pidfd_open_test.c
index b9fe75fc3e51..8a59438ccc78 100644
--- a/tools/testing/selftests/pidfd/pidfd_open_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_open_test.c
@@ -6,7 +6,6 @@
#include <inttypes.h>
#include <limits.h>
#include <linux/types.h>
-#include <linux/wait.h>
#include <sched.h>
#include <signal.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/pidfd/pidfd_poll_test.c b/tools/testing/selftests/pidfd/pidfd_poll_test.c
index 4b115444dfe9..610811275357 100644
--- a/tools/testing/selftests/pidfd/pidfd_poll_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_poll_test.c
@@ -3,7 +3,6 @@
#define _GNU_SOURCE
#include <errno.h>
#include <linux/types.h>
-#include <linux/wait.h>
#include <poll.h>
#include <signal.h>
#include <stdbool.h>
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 9418108eae13..6e2f2cd400ca 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -16,11 +16,9 @@
#include <unistd.h>
#include <sys/socket.h>
#include <sys/stat.h>
-#include <linux/kcmp.h>
#include "pidfd.h"
#include "../clone3/clone3_selftests.h"
-#include "../kselftest.h"
#include "../kselftest_harness.h"
enum {
@@ -32,6 +30,7 @@ enum {
PIDFD_NS_NET,
PIDFD_NS_CGROUP,
PIDFD_NS_PIDCLD,
+ PIDFD_NS_TIME,
PIDFD_NS_MAX
};
@@ -47,6 +46,7 @@ const struct ns_info {
[PIDFD_NS_NET] = { "net", CLONE_NEWNET, },
[PIDFD_NS_CGROUP] = { "cgroup", CLONE_NEWCGROUP, },
[PIDFD_NS_PIDCLD] = { "pid_for_children", 0, },
+ [PIDFD_NS_TIME] = { "time", CLONE_NEWTIME, },
};
FIXTURE(current_nsset)
@@ -74,7 +74,7 @@ static int sys_waitid(int which, pid_t pid, int options)
pid_t create_child(int *pidfd, unsigned flags)
{
- struct clone_args args = {
+ struct __clone_args args = {
.flags = CLONE_PIDFD | flags,
.exit_signal = SIGCHLD,
.pidfd = ptr_to_u64(pidfd),
@@ -83,9 +83,49 @@ pid_t create_child(int *pidfd, unsigned flags)
return sys_clone3(&args, sizeof(struct clone_args));
}
+static bool switch_timens(void)
+{
+ int fd, ret;
+
+ if (unshare(CLONE_NEWTIME))
+ return false;
+
+ fd = open("/proc/self/ns/time_for_children", O_RDONLY | O_CLOEXEC);
+ if (fd < 0)
+ return false;
+
+ ret = setns(fd, CLONE_NEWTIME);
+ close(fd);
+ return ret == 0;
+}
+
+static ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
FIXTURE_SETUP(current_nsset)
{
int i, proc_fd, ret;
+ int ipc_sockets[2];
+ char c;
for (i = 0; i < PIDFD_NS_MAX; i++) {
self->nsfds[i] = -EBADF;
@@ -130,6 +170,9 @@ FIXTURE_SETUP(current_nsset)
TH_LOG("%m - Failed to open pidfd for process %d", self->pid);
}
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
/* Create tasks that will be stopped. */
self->child_pid1 = create_child(&self->child_pidfd1,
CLONE_NEWUSER | CLONE_NEWNS |
@@ -139,10 +182,27 @@ FIXTURE_SETUP(current_nsset)
EXPECT_GE(self->child_pid1, 0);
if (self->child_pid1 == 0) {
+ close(ipc_sockets[0]);
+
+ if (!switch_timens())
+ _exit(EXIT_FAILURE);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
pause();
_exit(EXIT_SUCCESS);
}
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
self->child_pid2 = create_child(&self->child_pidfd2,
CLONE_NEWUSER | CLONE_NEWNS |
CLONE_NEWCGROUP | CLONE_NEWIPC |
@@ -151,10 +211,24 @@ FIXTURE_SETUP(current_nsset)
EXPECT_GE(self->child_pid2, 0);
if (self->child_pid2 == 0) {
+ close(ipc_sockets[0]);
+
+ if (!switch_timens())
+ _exit(EXIT_FAILURE);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
pause();
_exit(EXIT_SUCCESS);
}
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
for (i = 0; i < PIDFD_NS_MAX; i++) {
char p[100];
diff --git a/tools/testing/selftests/pidfd/pidfd_test.c b/tools/testing/selftests/pidfd/pidfd_test.c
index 7aff2d3b42c0..c081ae91313a 100644
--- a/tools/testing/selftests/pidfd/pidfd_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_test.c
@@ -8,6 +8,7 @@
#include <sched.h>
#include <signal.h>
#include <stdio.h>
+#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
@@ -27,6 +28,8 @@
#define MAX_EVENTS 5
+static bool have_pidfd_send_signal;
+
static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
{
size_t stack_size = 1024;
@@ -56,6 +59,13 @@ static int test_pidfd_send_signal_simple_success(void)
int pidfd, ret;
const char *test_name = "pidfd_send_signal send SIGUSR1";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
if (pidfd < 0)
ksft_exit_fail_msg(
@@ -86,6 +96,13 @@ static int test_pidfd_send_signal_exited_fail(void)
pid_t pid;
const char *test_name = "pidfd_send_signal signal exited process";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
pid = fork();
if (pid < 0)
ksft_exit_fail_msg("%s test: Failed to create new process\n",
@@ -98,7 +115,8 @@ static int test_pidfd_send_signal_exited_fail(void)
pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
- (void)wait_for_pid(pid);
+ ret = wait_for_pid(pid);
+ ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);
if (pidfd < 0)
ksft_exit_fail_msg(
@@ -137,16 +155,34 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
pid_t pid1;
const char *test_name = "pidfd_send_signal signal recycled pid";
+ if (!have_pidfd_send_signal) {
+ ksft_test_result_skip(
+ "%s test: pidfd_send_signal() syscall not supported\n",
+ test_name);
+ return 0;
+ }
+
ret = unshare(CLONE_NEWPID);
- if (ret < 0)
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n",
+ test_name);
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
test_name);
+ }
ret = unshare(CLONE_NEWNS);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: Failed to unshare mount namespace\n",
- test_name);
+ if (ret < 0) {
+ if (errno == EPERM) {
+ ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n",
+ test_name);
+ return 0;
+ }
+ ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n",
+ test_name);
+ }
ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
if (ret < 0)
@@ -295,7 +331,7 @@ static int test_pidfd_send_signal_recycled_pid_fail(void)
ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
test_name, PID_RECYCLE);
case PIDFD_SKIP:
- ksft_print_msg("%s test: Skipping test\n", test_name);
+ ksft_test_result_skip("%s test: Skipping test\n", test_name);
ret = 0;
break;
case PIDFD_XFAIL:
@@ -325,15 +361,17 @@ static int test_pidfd_send_signal_syscall_support(void)
ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
if (ret < 0) {
- if (errno == ENOSYS)
- ksft_exit_skip(
+ if (errno == ENOSYS) {
+ ksft_test_result_skip(
"%s test: pidfd_send_signal() syscall not supported\n",
test_name);
-
+ return 0;
+ }
ksft_exit_fail_msg("%s test: Failed to send signal\n",
test_name);
}
+ have_pidfd_send_signal = true;
close(pidfd);
ksft_test_result_pass(
"%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n",
@@ -343,13 +381,13 @@ static int test_pidfd_send_signal_syscall_support(void)
static void *test_pidfd_poll_exec_thread(void *priv)
{
- ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
getpid(), syscall(SYS_gettid));
ksft_print_msg("Child Thread: doing exec of sleep\n");
execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL);
- ksft_print_msg("Child Thread: DONE. pid %d tid %d\n",
+ ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n",
getpid(), syscall(SYS_gettid));
return NULL;
}
@@ -376,7 +414,7 @@ static void poll_pidfd(const char *test_name, int pidfd)
c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000);
if (c != 1 || !(events[0].events & EPOLLIN))
- ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) ",
+ ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) "
"(errno %d)\n",
test_name, c, events[0].events, errno);
@@ -389,7 +427,7 @@ static int child_poll_exec_test(void *args)
{
pthread_t t1;
- ksft_print_msg("Child (pidfd): starting. pid %d tid %d\n", getpid(),
+ ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(),
syscall(SYS_gettid));
pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL);
/*
@@ -398,13 +436,14 @@ static int child_poll_exec_test(void *args)
*/
while (1)
sleep(1);
+
+ return 0;
}
static void test_pidfd_poll_exec(int use_waitpid)
{
int pid, pidfd = 0;
int status, ret;
- pthread_t t1;
time_t prog_start = time(NULL);
const char *test_name = "pidfd_poll check for premature notification on child thread exec";
@@ -441,10 +480,10 @@ static void test_pidfd_poll_exec(int use_waitpid)
static void *test_pidfd_poll_leader_exit_thread(void *priv)
{
- ksft_print_msg("Child Thread: starting. pid %d tid %d ; and sleeping\n",
+ ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
getpid(), syscall(SYS_gettid));
sleep(CHILD_THREAD_MIN_WAIT);
- ksft_print_msg("Child Thread: DONE. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
return NULL;
}
@@ -453,7 +492,7 @@ static int child_poll_leader_exit_test(void *args)
{
pthread_t t1, t2;
- ksft_print_msg("Child: starting. pid %d tid %d\n", getpid(), syscall(SYS_gettid));
+ ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL);
pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);
@@ -463,13 +502,14 @@ static int child_poll_leader_exit_test(void *args)
*/
*child_exit_secs = time(NULL);
syscall(SYS_exit, 0);
+ /* Never reached, but appeases compiler thinking we should return. */
+ exit(0);
}
static void test_pidfd_poll_leader_exit(int use_waitpid)
{
int pid, pidfd = 0;
- int status, ret;
- time_t prog_start = time(NULL);
+ int status, ret = 0;
const char *test_name = "pidfd_poll check for premature notification on non-empty"
"group leader exit";
@@ -521,7 +561,7 @@ static void test_pidfd_poll_leader_exit(int use_waitpid)
int main(int argc, char **argv)
{
ksft_print_header();
- ksft_set_plan(4);
+ ksft_set_plan(8);
test_pidfd_poll_exec(0);
test_pidfd_poll_exec(1);
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 7079f8eef792..0dcb8365ddc3 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -17,10 +17,15 @@
#include <unistd.h>
#include "pidfd.h"
-#include "../kselftest.h"
+#include "../kselftest_harness.h"
#define ptr_to_u64(ptr) ((__u64)((uintptr_t)(ptr)))
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
static pid_t sys_clone3(struct clone_args *args)
{
return syscall(__NR_clone3, args, sizeof(struct clone_args));
@@ -32,10 +37,9 @@ static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
return syscall(__NR_waitid, which, pid, info, options, ru);
}
-static int test_pidfd_wait_simple(void)
+TEST(wait_simple)
{
- const char *test_name = "pidfd wait simple";
- int pidfd = -1, status = 0;
+ int pidfd = -1;
pid_t parent_tid = -1;
struct clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
@@ -43,84 +47,47 @@ static int test_pidfd_wait_simple(void)
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
.exit_signal = SIGCHLD,
};
- int ret;
pid_t pid;
siginfo_t info = {
.si_signo = 0,
};
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
- if (pidfd < 0)
- ksft_exit_fail_msg("%s test: failed to open /proc/self %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
- if (pidfd == 0)
- ksft_exit_fail_msg("%s test: failed to open /dev/null %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pidfd, 0);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid == 0)
- ksft_exit_fail_msg(
- "%s test: succeeded to wait on invalid pidfd %s\n",
- test_name, strerror(errno));
- close(pidfd);
+ ASSERT_NE(pid, 0);
+ EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0)
exit(EXIT_SUCCESS);
pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (pid < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (!WIFEXITED(info.si_status) || WEXITSTATUS(info.si_status))
- ksft_exit_fail_msg(
- "%s test: unexpected status received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
- close(pidfd);
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_EXITED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_GE(pid, 0);
+ ASSERT_EQ(WIFEXITED(info.si_status), true);
+ ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
+ EXPECT_EQ(close(pidfd), 0);
+
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
}
-static int test_pidfd_wait_states(void)
+TEST(wait_states)
{
- const char *test_name = "pidfd wait states";
- int pidfd = -1, status = 0;
+ int pidfd = -1;
pid_t parent_tid = -1;
struct clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
@@ -128,144 +95,139 @@ static int test_pidfd_wait_states(void)
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
.exit_signal = SIGCHLD,
};
- int ret;
+ int pfd[2];
pid_t pid;
siginfo_t info = {
.si_signo = 0,
};
+ ASSERT_EQ(pipe(pfd), 0);
pid = sys_clone3(&args);
- if (pid < 0)
- ksft_exit_fail_msg("%s test: failed to create new process %s\n",
- test_name, strerror(errno));
+ ASSERT_GE(pid, 0);
if (pid == 0) {
+ char buf[2];
+
+ close(pfd[1]);
kill(getpid(), SIGSTOP);
+ ASSERT_EQ(read(pfd[0], buf, 1), 1);
+ close(pfd[0]);
kill(getpid(), SIGSTOP);
exit(EXIT_SUCCESS);
}
- ret = sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WSTOPPED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send signal to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait WCONTINUED on process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_CONTINUED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WUNTRACED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_STOPPED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- ret = sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to send SIGKILL to process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
+ close(pfd[0]);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
- if (ret < 0)
- ksft_exit_fail_msg(
- "%s test: failed to wait on WEXITED process with pid %d and pidfd %d: %s\n",
- test_name, parent_tid, pidfd, strerror(errno));
-
- if (info.si_signo != SIGCHLD)
- ksft_exit_fail_msg(
- "%s test: unexpected si_signo value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_signo, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_code != CLD_KILLED)
- ksft_exit_fail_msg(
- "%s test: unexpected si_code value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_code, parent_tid, pidfd,
- strerror(errno));
-
- if (info.si_pid != parent_tid)
- ksft_exit_fail_msg(
- "%s test: unexpected si_pid value %d received after waiting on process with pid %d and pidfd %d: %s\n",
- test_name, info.si_pid, parent_tid, pidfd,
- strerror(errno));
-
- close(pidfd);
-
- ksft_test_result_pass("%s test: Passed\n", test_name);
- return 0;
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+ ASSERT_EQ(write(pfd[1], "C", 1), 1);
+ close(pfd[1]);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_CONTINUED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_KILLED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
-int main(int argc, char **argv)
+TEST(wait_nonblock)
{
- ksft_print_header();
- ksft_set_plan(2);
+ int pidfd;
+ unsigned int flags = 0;
+ pid_t parent_tid = -1;
+ struct clone_args args = {
+ .parent_tid = ptr_to_u64(&parent_tid),
+ .flags = CLONE_PARENT_SETTID,
+ .exit_signal = SIGCHLD,
+ };
+ int ret;
+ pid_t pid;
+ siginfo_t info = {
+ .si_signo = 0,
+ };
- test_pidfd_wait_simple();
- test_pidfd_wait_states();
+ /*
+ * Callers need to see ECHILD with non-blocking pidfds when no child
+ * processes exists.
+ */
+ pidfd = sys_pidfd_open(getpid(), PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, ECHILD);
+ EXPECT_EQ(close(pidfd), 0);
- return ksft_exit_pass();
+ pid = sys_clone3(&args);
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ kill(getpid(), SIGSTOP);
+ exit(EXIT_SUCCESS);
+ }
+
+ pidfd = sys_pidfd_open(pid, PIDFD_NONBLOCK);
+ EXPECT_GE(pidfd, 0) {
+ /* pidfd_open() doesn't support PIDFD_NONBLOCK. */
+ ASSERT_EQ(errno, EINVAL);
+ SKIP(return, "Skipping PIDFD_NONBLOCK test");
+ }
+
+ flags = fcntl(pidfd, F_GETFL, 0);
+ ASSERT_GT(flags, 0);
+ ASSERT_GT((flags & O_NONBLOCK), 0);
+
+ /*
+ * Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
+ * child processes exist but none have exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ASSERT_LT(ret, 0);
+ ASSERT_EQ(errno, EAGAIN);
+
+ /*
+ * Callers need to continue seeing 0 with non-blocking pidfd and
+ * WNOHANG raised explicitly when child processes exist but none have
+ * exited.
+ */
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+ ASSERT_EQ(ret, 0);
+
+ ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_STOPPED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
+
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(info.si_signo, SIGCHLD);
+ ASSERT_EQ(info.si_code, CLD_EXITED);
+ ASSERT_EQ(info.si_pid, parent_tid);
+
+ EXPECT_EQ(close(pidfd), 0);
}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/power_supply/Makefile b/tools/testing/selftests/power_supply/Makefile
new file mode 100644
index 000000000000..44f0658d3d2e
--- /dev/null
+++ b/tools/testing/selftests/power_supply/Makefile
@@ -0,0 +1,4 @@
+TEST_PROGS := test_power_supply_properties.sh
+TEST_FILES := helpers.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/power_supply/helpers.sh b/tools/testing/selftests/power_supply/helpers.sh
new file mode 100644
index 000000000000..1ec90d7c9108
--- /dev/null
+++ b/tools/testing/selftests/power_supply/helpers.sh
@@ -0,0 +1,178 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, 2024 Collabora Ltd
+SYSFS_SUPPLIES=/sys/class/power_supply
+
+calc() {
+ awk "BEGIN { print $* }";
+}
+
+test_sysfs_prop() {
+ PROP="$1"
+ VALUE="$2" # optional
+
+ PROP_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP"
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ if [ -z "$VALUE" ]; then
+ ktap_test_result "$TEST_NAME" [ -f "$PROP_PATH" ]
+ else
+ ktap_test_result "$TEST_NAME" grep -q "$VALUE" "$PROP_PATH"
+ fi
+}
+
+to_human_readable_unit() {
+ VALUE="$1"
+ UNIT="$2"
+
+ case "$VALUE" in
+ *[!0-9]* ) return ;; # Not a number
+ esac
+
+ if [ "$UNIT" = "uA" ]; then
+ new_unit="mA"
+ div=1000
+ elif [ "$UNIT" = "uV" ]; then
+ new_unit="V"
+ div=1000000
+ elif [ "$UNIT" = "uAh" ]; then
+ new_unit="Ah"
+ div=1000000
+ elif [ "$UNIT" = "uW" ]; then
+ new_unit="mW"
+ div=1000
+ elif [ "$UNIT" = "uWh" ]; then
+ new_unit="Wh"
+ div=1000000
+ else
+ return
+ fi
+
+ value_converted=$(calc "$VALUE"/"$div")
+ echo "$value_converted" "$new_unit"
+}
+
+_check_sysfs_prop_available() {
+ PROP=$1
+
+ PROP_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP"
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ if [ ! -e "$PROP_PATH" ] ; then
+ ktap_test_skip "$TEST_NAME"
+ return 1
+ fi
+
+ if ! cat "$PROP_PATH" >/dev/null; then
+ ktap_print_msg "Failed to read"
+ ktap_test_fail "$TEST_NAME"
+ return 1
+ fi
+
+ return 0
+}
+
+test_sysfs_prop_optional() {
+ PROP=$1
+ UNIT=$2 # optional
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ ktap_print_msg "Reported: '$DATA' $UNIT ($(to_human_readable_unit "$DATA" "$UNIT"))"
+ ktap_test_pass "$TEST_NAME"
+}
+
+test_sysfs_prop_optional_range() {
+ PROP=$1
+ MIN=$2
+ MAX=$3
+ UNIT=$4 # optional
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ if [ "$DATA" -lt "$MIN" ] || [ "$DATA" -gt "$MAX" ]; then
+ ktap_print_msg "'$DATA' is out of range (min=$MIN, max=$MAX)"
+ ktap_test_fail "$TEST_NAME"
+ else
+ ktap_print_msg "Reported: '$DATA' $UNIT ($(to_human_readable_unit "$DATA" "$UNIT"))"
+ ktap_test_pass "$TEST_NAME"
+ fi
+}
+
+test_sysfs_prop_optional_list() {
+ PROP=$1
+ LIST=$2
+
+ TEST_NAME="$DEVNAME".sysfs."$PROP"
+
+ _check_sysfs_prop_available "$PROP" || return
+ DATA=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/"$PROP")
+
+ valid=0
+
+ OLDIFS=$IFS
+ IFS=","
+ for item in $LIST; do
+ if [ "$DATA" = "$item" ]; then
+ valid=1
+ break
+ fi
+ done
+ if [ "$valid" -eq 1 ]; then
+ ktap_print_msg "Reported: '$DATA'"
+ ktap_test_pass "$TEST_NAME"
+ else
+ ktap_print_msg "'$DATA' is not a valid value for this property"
+ ktap_test_fail "$TEST_NAME"
+ fi
+ IFS=$OLDIFS
+}
+
+dump_file() {
+ FILE="$1"
+ while read -r line; do
+ ktap_print_msg "$line"
+ done < "$FILE"
+}
+
+__test_uevent_prop() {
+ PROP="$1"
+ OPTIONAL="$2"
+ VALUE="$3" # optional
+
+ UEVENT_PATH="$SYSFS_SUPPLIES"/"$DEVNAME"/uevent
+ TEST_NAME="$DEVNAME".uevent."$PROP"
+
+ if ! grep -q "POWER_SUPPLY_$PROP=" "$UEVENT_PATH"; then
+ if [ "$OPTIONAL" -eq 1 ]; then
+ ktap_test_skip "$TEST_NAME"
+ else
+ ktap_print_msg "Missing property"
+ ktap_test_fail "$TEST_NAME"
+ fi
+ return
+ fi
+
+ if ! grep -q "POWER_SUPPLY_$PROP=$VALUE" "$UEVENT_PATH"; then
+ ktap_print_msg "Invalid value for uevent property, dumping..."
+ dump_file "$UEVENT_PATH"
+ ktap_test_fail "$TEST_NAME"
+ else
+ ktap_test_pass "$TEST_NAME"
+ fi
+}
+
+test_uevent_prop() {
+ __test_uevent_prop "$1" 0 "$2"
+}
+
+test_uevent_prop_optional() {
+ __test_uevent_prop "$1" 1 "$2"
+}
diff --git a/tools/testing/selftests/power_supply/test_power_supply_properties.sh b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
new file mode 100755
index 000000000000..df272dfe1d2a
--- /dev/null
+++ b/tools/testing/selftests/power_supply/test_power_supply_properties.sh
@@ -0,0 +1,114 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2022, 2024 Collabora Ltd
+#
+# This test validates the power supply uAPI: namely, the files in sysfs and
+# lines in uevent that expose the power supply properties.
+#
+# By default all power supplies available are tested. Optionally the name of a
+# power supply can be passed as a parameter to test only that one instead.
+DIR="$(dirname "$(readlink -f "$0")")"
+
+. "${DIR}"/../kselftest/ktap_helpers.sh
+
+. "${DIR}"/helpers.sh
+
+count_tests() {
+ SUPPLIES=$1
+
+ # This needs to be updated every time a new test is added.
+ NUM_TESTS=33
+
+ total_tests=0
+
+ for i in $SUPPLIES; do
+ total_tests=$(("$total_tests" + "$NUM_TESTS"))
+ done
+
+ echo "$total_tests"
+}
+
+ktap_print_header
+
+SYSFS_SUPPLIES=/sys/class/power_supply/
+
+if [ $# -eq 0 ]; then
+ supplies=$(ls "$SYSFS_SUPPLIES")
+else
+ supplies=$1
+fi
+
+ktap_set_plan "$(count_tests "$supplies")"
+
+for DEVNAME in $supplies; do
+ ktap_print_msg Testing device "$DEVNAME"
+
+ if [ ! -d "$SYSFS_SUPPLIES"/"$DEVNAME" ]; then
+ ktap_test_fail "$DEVNAME".exists
+ ktap_exit_fail_msg Device does not exist
+ fi
+
+ ktap_test_pass "$DEVNAME".exists
+
+ test_uevent_prop NAME "$DEVNAME"
+
+ test_sysfs_prop type
+ SUPPLY_TYPE=$(cat "$SYSFS_SUPPLIES"/"$DEVNAME"/type)
+ # This fails on kernels < 5.8 (needs 2ad3d74e3c69f)
+ test_uevent_prop TYPE "$SUPPLY_TYPE"
+
+ test_sysfs_prop_optional usb_type
+
+ test_sysfs_prop_optional_range online 0 2
+ test_sysfs_prop_optional_range present 0 1
+
+ test_sysfs_prop_optional_list status "Unknown","Charging","Discharging","Not charging","Full"
+
+ # Capacity is reported as percentage, thus any value less than 0 and
+ # greater than 100 are not allowed.
+ test_sysfs_prop_optional_range capacity 0 100 "%"
+
+ test_sysfs_prop_optional_list capacity_level "Unknown","Critical","Low","Normal","High","Full"
+
+ test_sysfs_prop_optional model_name
+ test_sysfs_prop_optional manufacturer
+ test_sysfs_prop_optional serial_number
+ test_sysfs_prop_optional_list technology "Unknown","NiMH","Li-ion","Li-poly","LiFe","NiCd","LiMn"
+
+ test_sysfs_prop_optional cycle_count
+
+ test_sysfs_prop_optional_list scope "Unknown","System","Device"
+
+ test_sysfs_prop_optional input_current_limit "uA"
+ test_sysfs_prop_optional input_voltage_limit "uV"
+
+ # Technically the power-supply class does not limit reported values.
+ # E.g. one could expose an RTC backup-battery, which goes below 1.5V or
+ # an electric vehicle battery with over 300V. But most devices do not
+ # have a step-up capable regulator behind the battery and operate with
+ # voltages considered safe to touch, so we limit the allowed range to
+ # 1.8V-60V to catch drivers reporting incorrectly scaled values. E.g. a
+ # common mistake is reporting data in mV instead of µV.
+ test_sysfs_prop_optional_range voltage_now 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_min 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_max 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_min_design 1800000 60000000 "uV"
+ test_sysfs_prop_optional_range voltage_max_design 1800000 60000000 "uV"
+
+ # current based systems
+ test_sysfs_prop_optional current_now "uA"
+ test_sysfs_prop_optional current_max "uA"
+ test_sysfs_prop_optional charge_now "uAh"
+ test_sysfs_prop_optional charge_full "uAh"
+ test_sysfs_prop_optional charge_full_design "uAh"
+
+ # power based systems
+ test_sysfs_prop_optional power_now "uW"
+ test_sysfs_prop_optional energy_now "uWh"
+ test_sysfs_prop_optional energy_full "uWh"
+ test_sysfs_prop_optional energy_full_design "uWh"
+ test_sysfs_prop_optional energy_full_design "uWh"
+done
+
+ktap_finished
diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile
index 0830e63818c1..c376151982c4 100644
--- a/tools/testing/selftests/powerpc/Makefile
+++ b/tools/testing/selftests/powerpc/Makefile
@@ -17,6 +17,7 @@ SUB_DIRS = alignment \
benchmarks \
cache_shape \
copyloops \
+ dexcr \
dscr \
mm \
nx-gzip \
@@ -30,8 +31,12 @@ SUB_DIRS = alignment \
eeh \
vphn \
math \
+ papr_attributes \
+ papr_vpd \
+ papr_sysparm \
ptrace \
- security
+ security \
+ mce
endif
@@ -43,28 +48,27 @@ $(SUB_DIRS):
include ../lib.mk
override define RUN_TESTS
- @for TARGET in $(SUB_DIRS); do \
+ +@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests;\
done;
endef
override define INSTALL_RULE
- @for TARGET in $(SUB_DIRS); do \
+ +@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install;\
done;
endef
-override define EMIT_TESTS
- @for TARGET in $(SUB_DIRS); do \
+emit_tests:
+ +@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
- $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests;\
+ $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET $@;\
done;
-endef
override define CLEAN
- @for TARGET in $(SUB_DIRS); do \
+ +@for TARGET in $(SUB_DIRS); do \
BUILD_TARGET=$(OUTPUT)/$$TARGET; \
$(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean; \
done;
@@ -74,4 +78,4 @@ endef
tags:
find . -name '*.c' -o -name '*.h' | xargs ctags
-.PHONY: tags $(SUB_DIRS)
+.PHONY: tags $(SUB_DIRS) emit_tests
diff --git a/tools/testing/selftests/powerpc/alignment/alignment_handler.c b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
index 0453c50c949c..33ee34fc0828 100644
--- a/tools/testing/selftests/powerpc/alignment/alignment_handler.c
+++ b/tools/testing/selftests/powerpc/alignment/alignment_handler.c
@@ -9,7 +9,8 @@
* This selftest exercises the powerpc alignment fault handler.
*
* We create two sets of source and destination buffers, one in regular memory,
- * the other cache-inhibited (we use /dev/fb0 for this).
+ * the other cache-inhibited (by default we use /dev/fb0 for this, but an
+ * alterative path for cache-inhibited memory may be provided, e.g. memtrace).
*
* We initialise the source buffers, then use whichever set of load/store
* instructions is under test to copy bytes from the source buffers to the
@@ -45,14 +46,16 @@
#include <setjmp.h>
#include <signal.h>
-#include <asm/cputable.h>
-
#include "utils.h"
+#include "instructions.h"
int bufsize;
int debug;
int testing;
volatile int gotsig;
+bool prefixes_enabled;
+char *cipath = "/dev/fb0";
+long cioffset;
void sighandler(int sig, siginfo_t *info, void *ctx)
{
@@ -64,7 +67,12 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
}
gotsig = sig;
#ifdef __powerpc64__
- ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ if (prefixes_enabled) {
+ u32 inst = *(u32 *)ucp->uc_mcontext.gp_regs[PT_NIP];
+ ucp->uc_mcontext.gp_regs[PT_NIP] += ((inst >> 26 == 1) ? 8 : 4);
+ } else {
+ ucp->uc_mcontext.gp_regs[PT_NIP] += 4;
+ }
#else
ucp->uc_mcontext.uc_regs->gregs[PT_NIP] += 4;
#endif
@@ -84,6 +92,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
} \
rc |= do_test(#name, test_##name)
+#define TESTP(name, ld_op, st_op, ld_reg, st_reg) \
+ void test_##name(char *s, char *d) \
+ { \
+ asm volatile( \
+ ld_op(ld_reg, %0, 0, 0) \
+ st_op(st_reg, %1, 0, 0) \
+ :: "r"(s), "r"(d), "r"(0) \
+ : "memory", "vs0", "vs32", "r31"); \
+ } \
+ rc |= do_test(#name, test_##name)
+
#define LOAD_VSX_XFORM_TEST(op) TEST(op, op, stxvd2x, XFORM, 32, 32)
#define STORE_VSX_XFORM_TEST(op) TEST(op, lxvd2x, op, XFORM, 32, 32)
#define LOAD_VSX_DFORM_TEST(op) TEST(op, op, stxv, DFORM, 32, 32)
@@ -103,6 +122,17 @@ void sighandler(int sig, siginfo_t *info, void *ctx)
#define LOAD_FLOAT_XFORM_TEST(op) TEST(op, op, stfdx, XFORM, 0, 0)
#define STORE_FLOAT_XFORM_TEST(op) TEST(op, lfdx, op, XFORM, 0, 0)
+#define LOAD_MLS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_MLS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_8LS_PREFIX_TEST(op) TESTP(op, op, PSTD, 31, 31)
+#define STORE_8LS_PREFIX_TEST(op) TESTP(op, PLD, op, 31, 31)
+
+#define LOAD_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, op, PSTFD, 0, 0)
+#define STORE_FLOAT_MLS_PREFIX_TEST(op) TESTP(op, PLFD, op, 0, 0)
+
+#define LOAD_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, op, PSTXV ## tail, 0, 32)
+#define STORE_VSX_8LS_PREFIX_TEST(op, tail) TESTP(op, PLXV ## tail, op, 32, 0)
/* FIXME: Unimplemented tests: */
// STORE_DFORM_TEST(stq) /* FIXME: need two registers for quad */
@@ -195,17 +225,18 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
printf("\tDoing %s:\t", test_name);
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0) {
printf("\n");
- perror("Can't open /dev/fb0 now?");
+ perror("Can't open ci file now?");
return 1;
}
- ci0 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, 0x0);
- ci1 = mmap(NULL, bufsize, PROT_WRITE, MAP_SHARED,
- fd, bufsize);
+ ci0 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset);
+ ci1 = mmap(NULL, bufsize, PROT_WRITE | PROT_READ, MAP_SHARED,
+ fd, cioffset + bufsize);
+
if ((ci0 == MAP_FAILED) || (ci1 == MAP_FAILED)) {
printf("\n");
perror("mmap failed");
@@ -226,8 +257,12 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
}
rc = 0;
- /* offset = 0 no alignment fault, so skip */
- for (offset = 1; offset < 16; offset++) {
+ /*
+ * offset = 0 is aligned but tests the workaround for the P9N
+ * DD2.1 vector CI load issue (see 5080332c2c89 "powerpc/64s:
+ * Add workaround for P9 vector CI load issue")
+ */
+ for (offset = 0; offset < 16; offset++) {
width = 16; /* vsx == 16 bytes */
r = 0;
@@ -270,11 +305,11 @@ int do_test(char *test_name, void (*test_func)(char *, char *))
return rc;
}
-static bool can_open_fb0(void)
+static bool can_open_cifile(void)
{
int fd;
- fd = open("/dev/fb0", O_RDWR);
+ fd = open(cipath, O_RDWR);
if (fd < 0)
return false;
@@ -286,7 +321,7 @@ int test_alignment_handler_vsx_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("VSX: 2.06B\n");
@@ -304,7 +339,7 @@ int test_alignment_handler_vsx_207(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
printf("VSX: 2.07B\n");
@@ -320,7 +355,7 @@ int test_alignment_handler_vsx_300(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
printf("VSX: 3.00B\n");
@@ -348,11 +383,30 @@ int test_alignment_handler_vsx_300(void)
return rc;
}
+int test_alignment_handler_vsx_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("VSX: PREFIX\n");
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSD, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXSSP, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV0, 0);
+ LOAD_VSX_8LS_PREFIX_TEST(PLXV1, 1);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSD, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXSSP, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV0, 0);
+ STORE_VSX_8LS_PREFIX_TEST(PSTXV1, 1);
+ return rc;
+}
+
int test_alignment_handler_integer(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Integer\n");
LOAD_DFORM_TEST(lbz);
@@ -380,7 +434,6 @@ int test_alignment_handler_integer(void)
LOAD_DFORM_TEST(ldu);
LOAD_XFORM_TEST(ldx);
LOAD_XFORM_TEST(ldux);
- LOAD_DFORM_TEST(lmw);
STORE_DFORM_TEST(stb);
STORE_XFORM_TEST(stbx);
STORE_DFORM_TEST(stbu);
@@ -399,7 +452,11 @@ int test_alignment_handler_integer(void)
STORE_XFORM_TEST(stdx);
STORE_DFORM_TEST(stdu);
STORE_XFORM_TEST(stdux);
+
+#ifdef __BIG_ENDIAN__
+ LOAD_DFORM_TEST(lmw);
STORE_DFORM_TEST(stmw);
+#endif
return rc;
}
@@ -408,7 +465,7 @@ int test_alignment_handler_integer_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Integer: 2.06\n");
@@ -419,11 +476,32 @@ int test_alignment_handler_integer_206(void)
return rc;
}
+int test_alignment_handler_integer_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Integer: PREFIX\n");
+ LOAD_MLS_PREFIX_TEST(PLBZ);
+ LOAD_MLS_PREFIX_TEST(PLHZ);
+ LOAD_MLS_PREFIX_TEST(PLHA);
+ LOAD_MLS_PREFIX_TEST(PLWZ);
+ LOAD_8LS_PREFIX_TEST(PLWA);
+ LOAD_8LS_PREFIX_TEST(PLD);
+ STORE_MLS_PREFIX_TEST(PSTB);
+ STORE_MLS_PREFIX_TEST(PSTH);
+ STORE_MLS_PREFIX_TEST(PSTW);
+ STORE_8LS_PREFIX_TEST(PSTD);
+ return rc;
+}
+
int test_alignment_handler_vmx(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_ALTIVEC));
printf("VMX\n");
@@ -451,7 +529,7 @@ int test_alignment_handler_fp(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
printf("Floating point\n");
LOAD_FLOAT_DFORM_TEST(lfd);
@@ -479,7 +557,7 @@ int test_alignment_handler_fp_205(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_05));
printf("Floating point: 2.05\n");
@@ -497,7 +575,7 @@ int test_alignment_handler_fp_206(void)
{
int rc = 0;
- SKIP_IF(!can_open_fb0());
+ SKIP_IF(!can_open_cifile());
SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
printf("Floating point: 2.06\n");
@@ -507,13 +585,32 @@ int test_alignment_handler_fp_206(void)
return rc;
}
+
+int test_alignment_handler_fp_prefix(void)
+{
+ int rc = 0;
+
+ SKIP_IF(!can_open_cifile());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ printf("Floating point: PREFIX\n");
+ LOAD_FLOAT_DFORM_TEST(lfs);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFS);
+ LOAD_FLOAT_MLS_PREFIX_TEST(PLFD);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFS);
+ STORE_FLOAT_MLS_PREFIX_TEST(PSTFD);
+ return rc;
+}
+
void usage(char *prog)
{
- printf("Usage: %s [options]\n", prog);
+ printf("Usage: %s [options] [path [offset]]\n", prog);
printf(" -d Enable debug error output\n");
printf("\n");
- printf("This test requires a POWER8 or POWER9 CPU and a usable ");
- printf("framebuffer at /dev/fb0.\n");
+ printf("This test requires a POWER8, POWER9 or POWER10 CPU ");
+ printf("and either a usable framebuffer at /dev/fb0 or ");
+ printf("the path to usable cache inhibited memory and optional ");
+ printf("offset to be provided\n");
}
int main(int argc, char *argv[])
@@ -533,6 +630,13 @@ int main(int argc, char *argv[])
exit(1);
}
}
+ argc -= optind;
+ argv += optind;
+
+ if (argc > 0)
+ cipath = argv[0];
+ if (argc > 1)
+ cioffset = strtol(argv[1], 0, 0x10);
bufsize = getpagesize();
@@ -546,16 +650,22 @@ int main(int argc, char *argv[])
exit(1);
}
+ prefixes_enabled = have_hwcap2(PPC_FEATURE2_ARCH_3_1);
+
rc |= test_harness(test_alignment_handler_vsx_206,
"test_alignment_handler_vsx_206");
rc |= test_harness(test_alignment_handler_vsx_207,
"test_alignment_handler_vsx_207");
rc |= test_harness(test_alignment_handler_vsx_300,
"test_alignment_handler_vsx_300");
+ rc |= test_harness(test_alignment_handler_vsx_prefix,
+ "test_alignment_handler_vsx_prefix");
rc |= test_harness(test_alignment_handler_integer,
"test_alignment_handler_integer");
rc |= test_harness(test_alignment_handler_integer_206,
"test_alignment_handler_integer_206");
+ rc |= test_harness(test_alignment_handler_integer_prefix,
+ "test_alignment_handler_integer_prefix");
rc |= test_harness(test_alignment_handler_vmx,
"test_alignment_handler_vmx");
rc |= test_harness(test_alignment_handler_fp,
@@ -564,5 +674,7 @@ int main(int argc, char *argv[])
"test_alignment_handler_fp_205");
rc |= test_harness(test_alignment_handler_fp_206,
"test_alignment_handler_fp_206");
+ rc |= test_harness(test_alignment_handler_fp_prefix,
+ "test_alignment_handler_fp_prefix");
return rc;
}
diff --git a/tools/testing/selftests/powerpc/benchmarks/context_switch.c b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
index a2e8c9da7fa5..96554e2794d1 100644
--- a/tools/testing/selftests/powerpc/benchmarks/context_switch.c
+++ b/tools/testing/selftests/powerpc/benchmarks/context_switch.c
@@ -19,6 +19,7 @@
#include <limits.h>
#include <sys/time.h>
#include <sys/syscall.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/shm.h>
#include <linux/futex.h>
@@ -104,8 +105,9 @@ static void start_thread_on(void *(*fn)(void *), void *arg, unsigned long cpu)
static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
{
- int pid;
- cpu_set_t cpuset;
+ int pid, ncpus;
+ cpu_set_t *cpuset;
+ size_t size;
pid = fork();
if (pid == -1) {
@@ -116,14 +118,23 @@ static void start_process_on(void *(*fn)(void *), void *arg, unsigned long cpu)
if (pid)
return;
- CPU_ZERO(&cpuset);
- CPU_SET(cpu, &cpuset);
+ ncpus = get_nprocs();
+ size = CPU_ALLOC_SIZE(ncpus);
+ cpuset = CPU_ALLOC(ncpus);
+ if (!cpuset) {
+ perror("malloc");
+ exit(1);
+ }
+ CPU_ZERO_S(size, cpuset);
+ CPU_SET_S(cpu, size, cpuset);
- if (sched_setaffinity(0, sizeof(cpuset), &cpuset)) {
+ if (sched_setaffinity(0, size, cpuset)) {
perror("sched_setaffinity");
+ CPU_FREE(cpuset);
exit(1);
}
+ CPU_FREE(cpuset);
fn(arg);
exit(0);
@@ -470,6 +481,12 @@ int main(int argc, char *argv[])
else
printf("futex");
+ if (!have_hwcap(PPC_FEATURE_HAS_ALTIVEC))
+ touch_altivec = 0;
+
+ if (!have_hwcap(PPC_FEATURE_HAS_VSX))
+ touch_vector = 0;
+
printf(" on cpus %d/%d touching FP:%s altivec:%s vector:%s vdso:%s\n",
cpu1, cpu2, touch_fp ? "yes" : "no", touch_altivec ? "yes" : "no",
touch_vector ? "yes" : "no", touch_vdso ? "yes" : "no");
diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
index 6b415683357b..580fcac0a09f 100644
--- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
+++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
@@ -12,7 +12,7 @@ static int test_gettimeofday(void)
{
int i;
- struct timeval tv_start, tv_end;
+ struct timeval tv_start, tv_end, tv_diff;
gettimeofday(&tv_start, NULL);
@@ -20,7 +20,9 @@ static int test_gettimeofday(void)
gettimeofday(&tv_end, NULL);
}
- printf("time = %.6f\n", tv_end.tv_sec - tv_start.tv_sec + (tv_end.tv_usec - tv_start.tv_usec) * 1e-6);
+ timersub(&tv_start, &tv_end, &tv_diff);
+
+ printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6);
return 0;
}
diff --git a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
index 579f0215c6e7..9836838a529f 100644
--- a/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
+++ b/tools/testing/selftests/powerpc/benchmarks/null_syscall.c
@@ -14,6 +14,7 @@
#include <time.h>
#include <sys/types.h>
#include <sys/time.h>
+#include <sys/syscall.h>
#include <signal.h>
static volatile int soak_done;
@@ -121,7 +122,7 @@ static void do_null_syscall(unsigned long nr)
unsigned long i;
for (i = 0; i < nr; i++)
- getppid();
+ syscall(__NR_gettid);
}
#define TIME(A, STR) \
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..7283e8b07b75 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,5 @@ memcpy_p7_t1
copyuser_64_exc_t0
copyuser_64_exc_t1
copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
+memmove_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..77594e697f2f 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,8 +12,9 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
copyuser_p7_t0 copyuser_p7_t1 \
memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
- memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
- copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
+ memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
+ copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2 \
+ memmove_64
EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +46,9 @@ $(OUTPUT)/memcpy_p7_t%: memcpy_power7.S $(EXTRA_SOURCES)
-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
-o $@ $^
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
$(CC) $(CPPFLAGS) $(CFLAGS) \
- -D COPY_LOOP=test_memcpy_mcsafe \
+ -D COPY_LOOP=test_copy_mc_generic \
-o $@ $^
$(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
@@ -56,3 +57,9 @@ $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
-D COPY_LOOP=test___copy_tofrom_user_base \
-D SELFTEST_CASE=$(subst copyuser_64_exc_t,,$(notdir $@)) \
-o $@ $^
+
+$(OUTPUT)/memmove_64: mem_64.S memcpy_64.S memmove_validate.c ../harness.c \
+ memcpy_stubs.S
+ $(CC) $(CPPFLAGS) $(CFLAGS) \
+ -D TEST_MEMMOVE=test_memmove \
+ -o $@ $^
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
index 58c1cef3e399..1d293ab77185 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h
@@ -26,6 +26,8 @@
#define _GLOBAL(A) FUNC_START(test_ ## A)
#define _GLOBAL_TOC(A) _GLOBAL(A)
#define _GLOBAL_TOC_KASAN(A) _GLOBAL(A)
+#define _GLOBAL_KASAN(A) _GLOBAL(A)
+#define CFUNC(name) name
#define PPC_MTOCRF(A, B) mtocrf A, B
@@ -45,4 +47,16 @@
/* Default to taking the first of any alternative feature sections */
test_feature = 1
+#define DCBT_SETUP_STREAMS(from, from_parms, to, to_parms, scratch) \
+ lis scratch,0x8000; /* GO=1 */ \
+ clrldi scratch,scratch,32; \
+ /* setup read stream 0 */ \
+ dcbt 0,from,0b01000; /* addr from */ \
+ dcbt 0,from_parms,0b01010; /* length and depth from */ \
+ /* setup write stream 1 */ \
+ dcbtst 0,to,0b01000; /* addr to */ \
+ dcbtst 0,to_parms,0b01010; /* length and depth to */ \
+ eieio; \
+ dcbt 0,scratch,0b01010; /* all streams GO */
+
#endif /* __SELFTESTS_POWERPC_PPC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/asm/export.h b/tools/testing/selftests/powerpc/copyloops/linux/export.h
index e6b80d5fbd14..e6b80d5fbd14 100644
--- a/tools/testing/selftests/powerpc/copyloops/asm/export.h
+++ b/tools/testing/selftests/powerpc/copyloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/copyloops/mem_64.S b/tools/testing/selftests/powerpc/copyloops/mem_64.S
new file mode 120000
index 000000000000..db254c9a5f5c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/mem_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/mem_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S
new file mode 100644
index 000000000000..d9baa832fa49
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memcpy_stubs.S
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#include <asm/ppc_asm.h>
+
+FUNC_START(memcpy)
+ b test_memcpy
+
+FUNC_START(backwards_memcpy)
+ b test_backwards_memcpy
diff --git a/tools/testing/selftests/powerpc/copyloops/memmove_validate.c b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c
new file mode 100644
index 000000000000..1a23218b5757
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/memmove_validate.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <malloc.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+#include "utils.h"
+
+void *TEST_MEMMOVE(const void *s1, const void *s2, size_t n);
+
+#define BUF_LEN 65536
+#define MAX_OFFSET 512
+
+size_t max(size_t a, size_t b)
+{
+ if (a >= b)
+ return a;
+ return b;
+}
+
+static int testcase_run(void)
+{
+ size_t i, src_off, dst_off, len;
+
+ char *usermap = memalign(BUF_LEN, BUF_LEN);
+ char *kernelmap = memalign(BUF_LEN, BUF_LEN);
+
+ assert(usermap != NULL);
+ assert(kernelmap != NULL);
+
+ memset(usermap, 0, BUF_LEN);
+ memset(kernelmap, 0, BUF_LEN);
+
+ for (i = 0; i < BUF_LEN; i++) {
+ usermap[i] = i & 0xff;
+ kernelmap[i] = i & 0xff;
+ }
+
+ for (src_off = 0; src_off < MAX_OFFSET; src_off++) {
+ for (dst_off = 0; dst_off < MAX_OFFSET; dst_off++) {
+ for (len = 1; len < MAX_OFFSET - max(src_off, dst_off); len++) {
+
+ memmove(usermap + dst_off, usermap + src_off, len);
+ TEST_MEMMOVE(kernelmap + dst_off, kernelmap + src_off, len);
+ if (memcmp(usermap, kernelmap, MAX_OFFSET) != 0) {
+ printf("memmove failed at %ld %ld %ld\n",
+ src_off, dst_off, len);
+ abort();
+ }
+ }
+ }
+ }
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(testcase_run, "memmove");
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/.gitignore b/tools/testing/selftests/powerpc/dexcr/.gitignore
new file mode 100644
index 000000000000..b82f45dd46b9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/.gitignore
@@ -0,0 +1,2 @@
+hashchk_test
+lsdexcr
diff --git a/tools/testing/selftests/powerpc/dexcr/Makefile b/tools/testing/selftests/powerpc/dexcr/Makefile
new file mode 100644
index 000000000000..76210f2bcec3
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/Makefile
@@ -0,0 +1,9 @@
+TEST_GEN_PROGS := hashchk_test
+TEST_GEN_FILES := lsdexcr
+
+include ../../lib.mk
+
+$(OUTPUT)/hashchk_test: CFLAGS += -fno-pie $(call cc-option,-mno-rop-protect)
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ./dexcr.c
+$(TEST_GEN_FILES): ../utils.c ./dexcr.c
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.c b/tools/testing/selftests/powerpc/dexcr/dexcr.c
new file mode 100644
index 000000000000..65ec5347de98
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+
+#include "dexcr.h"
+#include "reg.h"
+#include "utils.h"
+
+static jmp_buf generic_signal_jump_buf;
+
+static void generic_signal_handler(int signum, siginfo_t *info, void *context)
+{
+ longjmp(generic_signal_jump_buf, 0);
+}
+
+bool dexcr_exists(void)
+{
+ struct sigaction old;
+ volatile bool exists;
+
+ old = push_signal_handler(SIGILL, generic_signal_handler);
+ if (setjmp(generic_signal_jump_buf))
+ goto out;
+
+ /*
+ * If the SPR is not recognised by the hardware it triggers
+ * a hypervisor emulation interrupt. If the kernel does not
+ * recognise/try to emulate it, we receive a SIGILL signal.
+ *
+ * If we do not receive a signal, assume we have the SPR or the
+ * kernel is trying to emulate it correctly.
+ */
+ exists = false;
+ mfspr(SPRN_DEXCR_RO);
+ exists = true;
+
+out:
+ pop_signal_handler(SIGILL, old);
+ return exists;
+}
+
+/*
+ * Just test if a bad hashchk triggers a signal, without checking
+ * for support or if the NPHIE aspect is enabled.
+ */
+bool hashchk_triggers(void)
+{
+ struct sigaction old;
+ volatile bool triggers;
+
+ old = push_signal_handler(SIGILL, generic_signal_handler);
+ if (setjmp(generic_signal_jump_buf))
+ goto out;
+
+ triggers = true;
+ do_bad_hashchk();
+ triggers = false;
+
+out:
+ pop_signal_handler(SIGILL, old);
+ return triggers;
+}
+
+unsigned int get_dexcr(enum dexcr_source source)
+{
+ switch (source) {
+ case DEXCR:
+ return mfspr(SPRN_DEXCR_RO);
+ case HDEXCR:
+ return mfspr(SPRN_HDEXCR_RO);
+ case EFFECTIVE:
+ return mfspr(SPRN_DEXCR_RO) | mfspr(SPRN_HDEXCR_RO);
+ default:
+ FAIL_IF_EXIT_MSG(true, "bad enum dexcr_source");
+ }
+}
+
+void await_child_success(pid_t pid)
+{
+ int wstatus;
+
+ FAIL_IF_EXIT_MSG(pid == -1, "fork failed");
+ FAIL_IF_EXIT_MSG(waitpid(pid, &wstatus, 0) == -1, "wait failed");
+ FAIL_IF_EXIT_MSG(!WIFEXITED(wstatus), "child did not exit cleanly");
+ FAIL_IF_EXIT_MSG(WEXITSTATUS(wstatus) != 0, "child exit error");
+}
+
+/*
+ * Perform a hashst instruction. The following components determine the result
+ *
+ * 1. The LR value (any register technically)
+ * 2. The SP value (also any register, but it must be a valid address)
+ * 3. A secret key managed by the kernel
+ *
+ * The result is stored to the address held in SP.
+ */
+void hashst(unsigned long lr, void *sp)
+{
+ asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */
+ "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */
+ PPC_RAW_HASHST(31, -8, 30) /* compute hash into stack location */
+ : : "r" (lr), "r" (sp) : "r31", "r30", "memory");
+}
+
+/*
+ * Perform a hashchk instruction. A hash is computed as per hashst(),
+ * however the result is not stored to memory. Instead the existing
+ * value is read and compared against the computed hash.
+ *
+ * If they match, execution continues.
+ * If they differ, an interrupt triggers.
+ */
+void hashchk(unsigned long lr, void *sp)
+{
+ asm volatile ("addi 31, %0, 0;" /* set r31 (pretend LR) to lr */
+ "addi 30, %1, 8;" /* set r30 (pretend SP) to sp + 8 */
+ PPC_RAW_HASHCHK(31, -8, 30) /* check hash at stack location */
+ : : "r" (lr), "r" (sp) : "r31", "r30", "memory");
+}
+
+void do_bad_hashchk(void)
+{
+ unsigned long hash = 0;
+
+ hashst(0, &hash);
+ hash += 1;
+ hashchk(0, &hash);
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/dexcr.h b/tools/testing/selftests/powerpc/dexcr/dexcr.h
new file mode 100644
index 000000000000..f55cbbc8643b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/dexcr.h
@@ -0,0 +1,49 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * POWER Dynamic Execution Control Facility (DEXCR)
+ *
+ * This header file contains helper functions and macros
+ * required for all the DEXCR related test cases.
+ */
+#ifndef _SELFTESTS_POWERPC_DEXCR_DEXCR_H
+#define _SELFTESTS_POWERPC_DEXCR_DEXCR_H
+
+#include <stdbool.h>
+#include <sys/types.h>
+
+#include "reg.h"
+
+#define DEXCR_PR_BIT(aspect) __MASK(63 - (32 + (aspect)))
+#define DEXCR_PR_SBHE DEXCR_PR_BIT(0)
+#define DEXCR_PR_IBRTPD DEXCR_PR_BIT(3)
+#define DEXCR_PR_SRAPD DEXCR_PR_BIT(4)
+#define DEXCR_PR_NPHIE DEXCR_PR_BIT(5)
+
+#define PPC_RAW_HASH_ARGS(b, i, a) \
+ ((((i) >> 3) & 0x1F) << 21 | (a) << 16 | (b) << 11 | (((i) >> 8) & 0x1))
+#define PPC_RAW_HASHST(b, i, a) \
+ str(.long (0x7C0005A4 | PPC_RAW_HASH_ARGS(b, i, a));)
+#define PPC_RAW_HASHCHK(b, i, a) \
+ str(.long (0x7C0005E4 | PPC_RAW_HASH_ARGS(b, i, a));)
+
+bool dexcr_exists(void);
+
+bool hashchk_triggers(void);
+
+enum dexcr_source {
+ DEXCR, /* Userspace DEXCR value */
+ HDEXCR, /* Hypervisor enforced DEXCR value */
+ EFFECTIVE, /* Bitwise OR of UDEXCR and ENFORCED DEXCR bits */
+};
+
+unsigned int get_dexcr(enum dexcr_source source);
+
+void await_child_success(pid_t pid);
+
+void hashst(unsigned long lr, void *sp);
+
+void hashchk(unsigned long lr, void *sp);
+
+void do_bad_hashchk(void);
+
+#endif /* _SELFTESTS_POWERPC_DEXCR_DEXCR_H */
diff --git a/tools/testing/selftests/powerpc/dexcr/hashchk_test.c b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
new file mode 100644
index 000000000000..7d5658c9ebe4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/hashchk_test.c
@@ -0,0 +1,227 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#define _GNU_SOURCE
+
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <setjmp.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static int require_nphie(void)
+{
+ SKIP_IF_MSG(!dexcr_exists(), "DEXCR not supported");
+ SKIP_IF_MSG(!(get_dexcr(EFFECTIVE) & DEXCR_PR_NPHIE),
+ "DEXCR[NPHIE] not enabled");
+
+ return 0;
+}
+
+static jmp_buf hashchk_detected_buf;
+static const char *hashchk_failure_msg;
+
+static void hashchk_handler(int signum, siginfo_t *info, void *context)
+{
+ if (signum != SIGILL)
+ hashchk_failure_msg = "wrong signal received";
+ else if (info->si_code != ILL_ILLOPN)
+ hashchk_failure_msg = "wrong signal code received";
+
+ longjmp(hashchk_detected_buf, 0);
+}
+
+/*
+ * Check that hashchk triggers when DEXCR[NPHIE] is enabled
+ * and is detected as such by the kernel exception handler
+ */
+static int hashchk_detected_test(void)
+{
+ struct sigaction old;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ old = push_signal_handler(SIGILL, hashchk_handler);
+ if (setjmp(hashchk_detected_buf))
+ goto out;
+
+ hashchk_failure_msg = NULL;
+ do_bad_hashchk();
+ hashchk_failure_msg = "hashchk failed to trigger";
+
+out:
+ pop_signal_handler(SIGILL, old);
+ FAIL_IF_MSG(hashchk_failure_msg, hashchk_failure_msg);
+ return 0;
+}
+
+#define HASH_COUNT 8
+
+static unsigned long hash_values[HASH_COUNT + 1];
+
+static void fill_hash_values(void)
+{
+ for (unsigned long i = 0; i < HASH_COUNT; i++)
+ hashst(i, &hash_values[i]);
+
+ /* Used to ensure the checks uses the same addresses as the hashes */
+ hash_values[HASH_COUNT] = (unsigned long)&hash_values;
+}
+
+static unsigned int count_hash_values_matches(void)
+{
+ unsigned long matches = 0;
+
+ for (unsigned long i = 0; i < HASH_COUNT; i++) {
+ unsigned long orig_hash = hash_values[i];
+ hash_values[i] = 0;
+
+ hashst(i, &hash_values[i]);
+
+ if (hash_values[i] == orig_hash)
+ matches++;
+ }
+
+ return matches;
+}
+
+static int hashchk_exec_child(void)
+{
+ ssize_t count;
+
+ fill_hash_values();
+
+ count = write(STDOUT_FILENO, hash_values, sizeof(hash_values));
+ return count == sizeof(hash_values) ? 0 : EOVERFLOW;
+}
+
+static char *hashchk_exec_child_args[] = { "hashchk_exec_child", NULL };
+
+/*
+ * Check that new programs get different keys so a malicious process
+ * can't recreate a victim's hash values.
+ */
+static int hashchk_exec_random_key_test(void)
+{
+ pid_t pid;
+ int err;
+ int pipefd[2];
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ FAIL_IF_MSG(pipe(pipefd), "failed to create pipe");
+
+ pid = fork();
+ if (pid == 0) {
+ if (dup2(pipefd[1], STDOUT_FILENO) == -1)
+ _exit(errno);
+
+ execve("/proc/self/exe", hashchk_exec_child_args, NULL);
+ _exit(errno);
+ }
+
+ await_child_success(pid);
+ FAIL_IF_MSG(read(pipefd[0], hash_values, sizeof(hash_values)) != sizeof(hash_values),
+ "missing expected child output");
+
+ /* Verify the child used the same hash_values address */
+ FAIL_IF_EXIT_MSG(hash_values[HASH_COUNT] != (unsigned long)&hash_values,
+ "bad address check");
+
+ /* If all hashes are the same it means (most likely) same key */
+ FAIL_IF_MSG(count_hash_values_matches() == HASH_COUNT, "shared key detected");
+
+ return 0;
+}
+
+/*
+ * Check that forks share the same key so that existing hash values
+ * remain valid.
+ */
+static int hashchk_fork_share_key_test(void)
+{
+ pid_t pid;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ fill_hash_values();
+
+ pid = fork();
+ if (pid == 0) {
+ if (count_hash_values_matches() != HASH_COUNT)
+ _exit(1);
+ _exit(0);
+ }
+
+ await_child_success(pid);
+ return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+static int hashchk_clone_child_fn(void *args)
+{
+ fill_hash_values();
+ return 0;
+}
+
+/*
+ * Check that threads share the same key so that existing hash values
+ * remain valid.
+ */
+static int hashchk_clone_share_key_test(void)
+{
+ void *child_stack;
+ pid_t pid;
+ int err;
+
+ err = require_nphie();
+ if (err)
+ return err;
+
+ child_stack = mmap(NULL, STACK_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+ FAIL_IF_MSG(child_stack == MAP_FAILED, "failed to map child stack");
+
+ pid = clone(hashchk_clone_child_fn, child_stack + STACK_SIZE,
+ CLONE_VM | SIGCHLD, NULL);
+
+ await_child_success(pid);
+ FAIL_IF_MSG(count_hash_values_matches() != HASH_COUNT,
+ "different key detected");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+
+ if (argc >= 1 && !strcmp(argv[0], hashchk_exec_child_args[0]))
+ return hashchk_exec_child();
+
+ err |= test_harness(hashchk_detected_test, "hashchk_detected");
+ err |= test_harness(hashchk_exec_random_key_test, "hashchk_exec_random_key");
+ err |= test_harness(hashchk_fork_share_key_test, "hashchk_fork_share_key");
+ err |= test_harness(hashchk_clone_share_key_test, "hashchk_clone_share_key");
+
+ return err;
+}
diff --git a/tools/testing/selftests/powerpc/dexcr/lsdexcr.c b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
new file mode 100644
index 000000000000..94abbfcc389e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/dexcr/lsdexcr.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <errno.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <string.h>
+
+#include "dexcr.h"
+#include "utils.h"
+
+static unsigned int dexcr;
+static unsigned int hdexcr;
+static unsigned int effective;
+
+struct dexcr_aspect {
+ const char *name;
+ const char *desc;
+ unsigned int index;
+};
+
+static const struct dexcr_aspect aspects[] = {
+ {
+ .name = "SBHE",
+ .desc = "Speculative branch hint enable",
+ .index = 0,
+ },
+ {
+ .name = "IBRTPD",
+ .desc = "Indirect branch recurrent target prediction disable",
+ .index = 3,
+ },
+ {
+ .name = "SRAPD",
+ .desc = "Subroutine return address prediction disable",
+ .index = 4,
+ },
+ {
+ .name = "NPHIE",
+ .desc = "Non-privileged hash instruction enable",
+ .index = 5,
+ },
+ {
+ .name = "PHIE",
+ .desc = "Privileged hash instruction enable",
+ .index = 6,
+ },
+};
+
+static void print_list(const char *list[], size_t len)
+{
+ for (size_t i = 0; i < len; i++) {
+ printf("%s", list[i]);
+ if (i + 1 < len)
+ printf(", ");
+ }
+}
+
+static void print_dexcr(char *name, unsigned int bits)
+{
+ const char *enabled_aspects[ARRAY_SIZE(aspects) + 1] = {NULL};
+ size_t j = 0;
+
+ printf("%s: %08x", name, bits);
+
+ if (bits == 0) {
+ printf("\n");
+ return;
+ }
+
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++) {
+ unsigned int mask = DEXCR_PR_BIT(aspects[i].index);
+
+ if (bits & mask) {
+ enabled_aspects[j++] = aspects[i].name;
+ bits &= ~mask;
+ }
+ }
+
+ if (bits)
+ enabled_aspects[j++] = "unknown";
+
+ printf(" (");
+ print_list(enabled_aspects, j);
+ printf(")\n");
+}
+
+static void print_aspect(const struct dexcr_aspect *aspect)
+{
+ const char *attributes[8] = {NULL};
+ size_t j = 0;
+ unsigned long mask;
+
+ mask = DEXCR_PR_BIT(aspect->index);
+ if (dexcr & mask)
+ attributes[j++] = "set";
+ if (hdexcr & mask)
+ attributes[j++] = "set (hypervisor)";
+ if (!(effective & mask))
+ attributes[j++] = "clear";
+
+ printf("%12s %c (%d): ", aspect->name, effective & mask ? '*' : ' ', aspect->index);
+ print_list(attributes, j);
+ printf(" \t(%s)\n", aspect->desc);
+}
+
+int main(int argc, char *argv[])
+{
+ if (!dexcr_exists()) {
+ printf("DEXCR not detected on this hardware\n");
+ return 1;
+ }
+
+ dexcr = get_dexcr(DEXCR);
+ hdexcr = get_dexcr(HDEXCR);
+ effective = dexcr | hdexcr;
+
+ print_dexcr(" DEXCR", dexcr);
+ print_dexcr(" HDEXCR", hdexcr);
+ print_dexcr("Effective", effective);
+ printf("\n");
+
+ for (size_t i = 0; i < ARRAY_SIZE(aspects); i++)
+ print_aspect(&aspects[i]);
+ printf("\n");
+
+ if (effective & DEXCR_PR_NPHIE) {
+ printf("DEXCR[NPHIE] enabled: hashst/hashchk ");
+ if (hashchk_triggers())
+ printf("working\n");
+ else
+ printf("failed to trigger\n");
+ } else {
+ printf("DEXCR[NPHIE] disabled: hashst/hashchk ");
+ if (hashchk_triggers())
+ printf("unexpectedly triggered\n");
+ else
+ printf("ignored\n");
+ }
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile
index cfa6eedcb66c..9289d5febe1e 100644
--- a/tools/testing/selftests/powerpc/dscr/Makefile
+++ b/tools/testing/selftests/powerpc/dscr/Makefile
@@ -3,11 +3,10 @@ TEST_GEN_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \
dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \
dscr_sysfs_thread_test
-TEST_FILES := settings
-
top_srcdir = ../../../../..
include ../../lib.mk
$(OUTPUT)/dscr_default_test: LDLIBS += -lpthread
+$(OUTPUT)/dscr_explicit_test: LDLIBS += -lpthread
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h
index 13e9b9e28e2c..b281659071e8 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr.h
+++ b/tools/testing/selftests/powerpc/dscr/dscr.h
@@ -23,6 +23,7 @@
#include <sys/stat.h>
#include <sys/wait.h>
+#include "reg.h"
#include "utils.h"
#define THREADS 100 /* Max threads */
@@ -41,82 +42,48 @@
/* Prilvilege state DSCR access */
inline unsigned long get_dscr(void)
{
- unsigned long ret;
-
- asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR_PRIV));
-
- return ret;
+ return mfspr(SPRN_DSCR_PRIV);
}
inline void set_dscr(unsigned long val)
{
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_PRIV));
+ mtspr(SPRN_DSCR_PRIV, val);
}
/* Problem state DSCR access */
inline unsigned long get_dscr_usr(void)
{
- unsigned long ret;
-
- asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_DSCR));
-
- return ret;
+ return mfspr(SPRN_DSCR);
}
inline void set_dscr_usr(unsigned long val)
{
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+ mtspr(SPRN_DSCR, val);
}
/* Default DSCR access */
unsigned long get_default_dscr(void)
{
- int fd = -1, ret;
- char buf[16];
+ int err;
unsigned long val;
- if (fd == -1) {
- fd = open(DSCR_DEFAULT, O_RDONLY);
- if (fd == -1) {
- perror("open() failed");
- exit(1);
- }
- }
- memset(buf, 0, sizeof(buf));
- lseek(fd, 0, SEEK_SET);
- ret = read(fd, buf, sizeof(buf));
- if (ret == -1) {
+ err = read_ulong(DSCR_DEFAULT, &val, 16);
+ if (err) {
perror("read() failed");
exit(1);
}
- sscanf(buf, "%lx", &val);
- close(fd);
return val;
}
void set_default_dscr(unsigned long val)
{
- int fd = -1, ret;
- char buf[16];
+ int err;
- if (fd == -1) {
- fd = open(DSCR_DEFAULT, O_RDWR);
- if (fd == -1) {
- perror("open() failed");
- exit(1);
- }
- }
- sprintf(buf, "%lx\n", val);
- ret = write(fd, buf, strlen(buf));
- if (ret == -1) {
+ err = write_ulong(DSCR_DEFAULT, val, 16);
+ if (err) {
perror("write() failed");
exit(1);
}
- close(fd);
}
-double uniform_deviate(int seed)
-{
- return seed * (1.0 / (RAND_MAX + 1.0));
-}
#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
index 288a4e2ad156..60ab02525b79 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c
@@ -9,116 +9,161 @@
* Copyright 2012, Anton Blanchard, IBM Corporation.
* Copyright 2015, Anshuman Khandual, IBM Corporation.
*/
+
+#define _GNU_SOURCE
+
#include "dscr.h"
-static unsigned long dscr; /* System DSCR default */
-static unsigned long sequence;
-static unsigned long result[THREADS];
+#include <pthread.h>
+#include <semaphore.h>
+#include <unistd.h>
-static void *do_test(void *in)
+static void *dscr_default_lockstep_writer(void *arg)
{
- unsigned long thread = (unsigned long)in;
- unsigned long i;
+ sem_t *reader_sem = (sem_t *)arg;
+ sem_t *writer_sem = (sem_t *)arg + 1;
+ unsigned long expected_dscr = 0;
- for (i = 0; i < COUNT; i++) {
- unsigned long d, cur_dscr, cur_dscr_usr;
- unsigned long s1, s2;
+ for (int i = 0; i < COUNT; i++) {
+ FAIL_IF_EXIT(sem_wait(writer_sem));
- s1 = READ_ONCE(sequence);
- if (s1 & 1)
- continue;
- rmb();
+ set_default_dscr(expected_dscr);
+ expected_dscr = (expected_dscr + 1) % DSCR_MAX;
- d = dscr;
- cur_dscr = get_dscr();
- cur_dscr_usr = get_dscr_usr();
+ FAIL_IF_EXIT(sem_post(reader_sem));
+ }
- rmb();
- s2 = sequence;
+ return NULL;
+}
- if (s1 != s2)
- continue;
+int dscr_default_lockstep_test(void)
+{
+ pthread_t writer;
+ sem_t rw_semaphores[2];
+ sem_t *reader_sem = &rw_semaphores[0];
+ sem_t *writer_sem = &rw_semaphores[1];
+ unsigned long expected_dscr = 0;
- if (cur_dscr != d) {
- fprintf(stderr, "thread %ld kernel DSCR should be %ld "
- "but is %ld\n", thread, d, cur_dscr);
- result[thread] = 1;
- pthread_exit(&result[thread]);
- }
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
- if (cur_dscr_usr != d) {
- fprintf(stderr, "thread %ld user DSCR should be %ld "
- "but is %ld\n", thread, d, cur_dscr_usr);
- result[thread] = 1;
- pthread_exit(&result[thread]);
- }
+ FAIL_IF(sem_init(reader_sem, 0, 0));
+ FAIL_IF(sem_init(writer_sem, 0, 1)); /* writer starts first */
+ FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
+ FAIL_IF(pthread_create(&writer, NULL, dscr_default_lockstep_writer, (void *)rw_semaphores));
+
+ for (int i = 0; i < COUNT ; i++) {
+ FAIL_IF(sem_wait(reader_sem));
+
+ FAIL_IF(get_dscr() != expected_dscr);
+ FAIL_IF(get_dscr_usr() != expected_dscr);
+
+ expected_dscr = (expected_dscr + 1) % DSCR_MAX;
+
+ FAIL_IF(sem_post(writer_sem));
}
- result[thread] = 0;
- pthread_exit(&result[thread]);
-}
-int dscr_default(void)
-{
- pthread_t threads[THREADS];
- unsigned long i, *status[THREADS];
- unsigned long orig_dscr_default;
+ FAIL_IF(pthread_join(writer, NULL));
+ FAIL_IF(sem_destroy(reader_sem));
+ FAIL_IF(sem_destroy(writer_sem));
- orig_dscr_default = get_default_dscr();
+ return 0;
+}
- /* Initial DSCR default */
- dscr = 1;
- set_default_dscr(dscr);
+struct random_thread_args {
+ pthread_t thread_id;
+ unsigned long *expected_system_dscr;
+ pthread_rwlock_t *rw_lock;
+ pthread_barrier_t *barrier;
+};
- /* Spawn all testing threads */
- for (i = 0; i < THREADS; i++) {
- if (pthread_create(&threads[i], NULL, do_test, (void *)i)) {
- perror("pthread_create() failed");
- goto fail;
+static void *dscr_default_random_thread(void *in)
+{
+ struct random_thread_args *args = (struct random_thread_args *)in;
+ unsigned long *expected_dscr_p = args->expected_system_dscr;
+ pthread_rwlock_t *rw_lock = args->rw_lock;
+ int err;
+
+ srand(gettid());
+
+ err = pthread_barrier_wait(args->barrier);
+ FAIL_IF_EXIT(err != 0 && err != PTHREAD_BARRIER_SERIAL_THREAD);
+
+ for (int i = 0; i < COUNT; i++) {
+ unsigned long expected_dscr;
+ unsigned long current_dscr;
+ unsigned long current_dscr_usr;
+
+ FAIL_IF_EXIT(pthread_rwlock_rdlock(rw_lock));
+ expected_dscr = *expected_dscr_p;
+ current_dscr = get_dscr();
+ current_dscr_usr = get_dscr_usr();
+ FAIL_IF_EXIT(pthread_rwlock_unlock(rw_lock));
+
+ FAIL_IF_EXIT(current_dscr != expected_dscr);
+ FAIL_IF_EXIT(current_dscr_usr != expected_dscr);
+
+ if (rand() % 10 == 0) {
+ unsigned long next_dscr;
+
+ FAIL_IF_EXIT(pthread_rwlock_wrlock(rw_lock));
+ next_dscr = (*expected_dscr_p + 1) % DSCR_MAX;
+ set_default_dscr(next_dscr);
+ *expected_dscr_p = next_dscr;
+ FAIL_IF_EXIT(pthread_rwlock_unlock(rw_lock));
}
}
- srand(getpid());
+ pthread_exit((void *)0);
+}
- /* Keep changing the DSCR default */
- for (i = 0; i < COUNT; i++) {
- double ret = uniform_deviate(rand());
+int dscr_default_random_test(void)
+{
+ struct random_thread_args threads[THREADS];
+ unsigned long expected_system_dscr = 0;
+ pthread_rwlockattr_t rwlock_attr;
+ pthread_rwlock_t rw_lock;
+ pthread_barrier_t barrier;
- if (ret < 0.0001) {
- sequence++;
- wmb();
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
- dscr++;
- if (dscr > DSCR_MAX)
- dscr = 0;
+ FAIL_IF(pthread_rwlockattr_setkind_np(&rwlock_attr,
+ PTHREAD_RWLOCK_PREFER_WRITER_NONRECURSIVE_NP));
+ FAIL_IF(pthread_rwlock_init(&rw_lock, &rwlock_attr));
+ FAIL_IF(pthread_barrier_init(&barrier, NULL, THREADS));
- set_default_dscr(dscr);
+ set_default_dscr(expected_system_dscr);
- wmb();
- sequence++;
- }
+ for (int i = 0; i < THREADS; i++) {
+ threads[i].expected_system_dscr = &expected_system_dscr;
+ threads[i].rw_lock = &rw_lock;
+ threads[i].barrier = &barrier;
+
+ FAIL_IF(pthread_create(&threads[i].thread_id, NULL,
+ dscr_default_random_thread, (void *)&threads[i]));
}
- /* Individual testing thread exit status */
- for (i = 0; i < THREADS; i++) {
- if (pthread_join(threads[i], (void **)&(status[i]))) {
- perror("pthread_join() failed");
- goto fail;
- }
+ for (int i = 0; i < THREADS; i++)
+ FAIL_IF(pthread_join(threads[i].thread_id, NULL));
+
+ FAIL_IF(pthread_barrier_destroy(&barrier));
+ FAIL_IF(pthread_rwlock_destroy(&rw_lock));
- if (*status[i]) {
- printf("%ldth thread failed to join with %ld status\n",
- i, *status[i]);
- goto fail;
- }
- }
- set_default_dscr(orig_dscr_default);
return 0;
-fail:
- set_default_dscr(orig_dscr_default);
- return 1;
}
int main(int argc, char *argv[])
{
- return test_harness(dscr_default, "dscr_default_test");
+ unsigned long orig_dscr_default = 0;
+ int err = 0;
+
+ if (have_hwcap2(PPC_FEATURE2_DSCR))
+ orig_dscr_default = get_default_dscr();
+
+ err |= test_harness(dscr_default_lockstep_test, "dscr_default_lockstep_test");
+ err |= test_harness(dscr_default_random_test, "dscr_default_random_test");
+
+ if (have_hwcap2(PPC_FEATURE2_DSCR))
+ set_default_dscr(orig_dscr_default);
+
+ return err;
}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
index aefcd8d8759b..e2268e9183a8 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c
@@ -7,62 +7,167 @@
* privilege state SPR and the problem state SPR for this purpose.
*
* When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
* directly without any emulation if the HW supports them. Else
* they also get emulated by the kernel.
*
* Copyright 2012, Anton Blanchard, IBM Corporation.
* Copyright 2015, Anshuman Khandual, IBM Corporation.
*/
+
+#define _GNU_SOURCE
+
#include "dscr.h"
+#include "utils.h"
+
+#include <pthread.h>
+#include <sched.h>
+#include <semaphore.h>
-int dscr_explicit(void)
+void *dscr_explicit_lockstep_thread(void *args)
{
- unsigned long i, dscr = 0;
+ sem_t *prev = (sem_t *)args;
+ sem_t *next = (sem_t *)args + 1;
+ unsigned long expected_dscr = 0;
- srand(getpid());
- set_dscr(dscr);
+ set_dscr(expected_dscr);
+ srand(gettid());
- for (i = 0; i < COUNT; i++) {
- unsigned long cur_dscr, cur_dscr_usr;
- double ret = uniform_deviate(rand());
+ for (int i = 0; i < COUNT; i++) {
+ FAIL_IF_EXIT(sem_wait(prev));
- if (ret < 0.001) {
- dscr++;
- if (dscr > DSCR_MAX)
- dscr = 0;
+ FAIL_IF_EXIT(expected_dscr != get_dscr());
+ FAIL_IF_EXIT(expected_dscr != get_dscr_usr());
- set_dscr(dscr);
- }
+ expected_dscr = (expected_dscr + 1) % DSCR_MAX;
+ set_dscr(expected_dscr);
- cur_dscr = get_dscr();
- if (cur_dscr != dscr) {
- fprintf(stderr, "Kernel DSCR should be %ld but "
- "is %ld\n", dscr, cur_dscr);
- return 1;
- }
+ FAIL_IF_EXIT(sem_post(next));
+ }
+
+ return NULL;
+}
+
+int dscr_explicit_lockstep_test(void)
+{
+ pthread_t thread;
+ sem_t semaphores[2];
+ sem_t *prev = &semaphores[1]; /* reversed prev/next than for the other thread */
+ sem_t *next = &semaphores[0];
+ unsigned long expected_dscr = 0;
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
+ srand(gettid());
+ set_dscr(expected_dscr);
+
+ FAIL_IF(sem_init(prev, 0, 0));
+ FAIL_IF(sem_init(next, 0, 1)); /* other thread starts first */
+ FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
+ FAIL_IF(pthread_create(&thread, NULL, dscr_explicit_lockstep_thread, (void *)semaphores));
+
+ for (int i = 0; i < COUNT; i++) {
+ FAIL_IF(sem_wait(prev));
+
+ FAIL_IF(expected_dscr != get_dscr());
+ FAIL_IF(expected_dscr != get_dscr_usr());
+
+ expected_dscr = (expected_dscr - 1) % DSCR_MAX;
+ set_dscr(expected_dscr);
+
+ FAIL_IF(sem_post(next));
+ }
+
+ FAIL_IF(pthread_join(thread, NULL));
+ FAIL_IF(sem_destroy(prev));
+ FAIL_IF(sem_destroy(next));
+
+ return 0;
+}
+
+struct random_thread_args {
+ pthread_t thread_id;
+ bool do_yields;
+ pthread_barrier_t *barrier;
+};
- ret = uniform_deviate(rand());
- if (ret < 0.001) {
- dscr++;
- if (dscr > DSCR_MAX)
- dscr = 0;
+void *dscr_explicit_random_thread(void *in)
+{
+ struct random_thread_args *args = (struct random_thread_args *)in;
+ unsigned long expected_dscr = 0;
+ int err;
+
+ srand(gettid());
+
+ err = pthread_barrier_wait(args->barrier);
+ FAIL_IF_EXIT(err != 0 && err != PTHREAD_BARRIER_SERIAL_THREAD);
- set_dscr_usr(dscr);
+ for (int i = 0; i < COUNT; i++) {
+ expected_dscr = rand() % DSCR_MAX;
+ set_dscr(expected_dscr);
+
+ for (int j = rand() % 5; j > 0; --j) {
+ FAIL_IF_EXIT(get_dscr() != expected_dscr);
+ FAIL_IF_EXIT(get_dscr_usr() != expected_dscr);
+
+ if (args->do_yields && rand() % 2)
+ sched_yield();
}
- cur_dscr_usr = get_dscr_usr();
- if (cur_dscr_usr != dscr) {
- fprintf(stderr, "User DSCR should be %ld but "
- "is %ld\n", dscr, cur_dscr_usr);
- return 1;
+ expected_dscr = rand() % DSCR_MAX;
+ set_dscr_usr(expected_dscr);
+
+ for (int j = rand() % 5; j > 0; --j) {
+ FAIL_IF_EXIT(get_dscr() != expected_dscr);
+ FAIL_IF_EXIT(get_dscr_usr() != expected_dscr);
+
+ if (args->do_yields && rand() % 2)
+ sched_yield();
}
}
+
+ return NULL;
+}
+
+int dscr_explicit_random_test(void)
+{
+ struct random_thread_args threads[THREADS];
+ pthread_barrier_t barrier;
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
+ FAIL_IF(pthread_barrier_init(&barrier, NULL, THREADS));
+
+ for (int i = 0; i < THREADS; i++) {
+ threads[i].do_yields = i % 2 == 0;
+ threads[i].barrier = &barrier;
+
+ FAIL_IF(pthread_create(&threads[i].thread_id, NULL,
+ dscr_explicit_random_thread, (void *)&threads[i]));
+ }
+
+ for (int i = 0; i < THREADS; i++)
+ FAIL_IF(pthread_join(threads[i].thread_id, NULL));
+
+ FAIL_IF(pthread_barrier_destroy(&barrier));
+
return 0;
}
int main(int argc, char *argv[])
{
- return test_harness(dscr_explicit, "dscr_explicit_test");
+ unsigned long orig_dscr_default = 0;
+ int err = 0;
+
+ if (have_hwcap2(PPC_FEATURE2_DSCR))
+ orig_dscr_default = get_default_dscr();
+
+ err |= test_harness(dscr_explicit_lockstep_test, "dscr_explicit_lockstep_test");
+ err |= test_harness(dscr_explicit_random_test, "dscr_explicit_random_test");
+
+ if (have_hwcap2(PPC_FEATURE2_DSCR))
+ set_default_dscr(orig_dscr_default);
+
+ return err;
}
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
index 7c1cb46397c6..c6a81b2d6b91 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c
@@ -44,6 +44,8 @@ int dscr_inherit_exec(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
for (i = 0; i < COUNT; i++) {
dscr++;
if (dscr > DSCR_MAX)
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
index 04297a69ab59..68ce328e813e 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c
@@ -7,8 +7,8 @@
* value using mfspr.
*
* When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
* directly without any emulation if the HW supports them. Else
* they also get emulated by the kernel.
*
@@ -22,6 +22,8 @@ int dscr_inherit(void)
unsigned long i, dscr = 0;
pid_t pid;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
srand(getpid());
set_dscr(dscr);
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
index 02f6b4efde14..e7cd0d6b1fad 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c
@@ -12,26 +12,16 @@
static int check_cpu_dscr_default(char *file, unsigned long val)
{
- char buf[10];
- int fd, rc;
+ unsigned long cpu_dscr;
+ int err;
- fd = open(file, O_RDWR);
- if (fd == -1) {
- perror("open() failed");
- return 1;
- }
-
- rc = read(fd, buf, sizeof(buf));
- if (rc == -1) {
- perror("read() failed");
- return 1;
- }
- close(fd);
+ err = read_ulong(file, &cpu_dscr, 16);
+ if (err)
+ return err;
- buf[rc] = '\0';
- if (strtol(buf, NULL, 16) != val) {
+ if (cpu_dscr != val) {
printf("DSCR match failed: %ld (system) %ld (cpu)\n",
- val, strtol(buf, NULL, 16));
+ val, cpu_dscr);
return 1;
}
return 0;
@@ -65,8 +55,10 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
if (access(file, F_OK))
continue;
- if (check_cpu_dscr_default(file, val))
+ if (check_cpu_dscr_default(file, val)) {
+ closedir(sysfs);
return 1;
+ }
}
closedir(sysfs);
return 0;
@@ -75,15 +67,14 @@ static int check_all_cpu_dscr_defaults(unsigned long val)
int dscr_sysfs(void)
{
unsigned long orig_dscr_default;
- int i, j;
+
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
orig_dscr_default = get_default_dscr();
- for (i = 0; i < COUNT; i++) {
- for (j = 0; j < DSCR_MAX; j++) {
- set_default_dscr(j);
- if (check_all_cpu_dscr_defaults(j))
- goto fail;
- }
+ for (int i = 0; i < DSCR_MAX; i++) {
+ set_default_dscr(i);
+ if (check_all_cpu_dscr_defaults(i))
+ goto fail;
}
set_default_dscr(orig_dscr_default);
return 0;
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
index 37be2c25f277..191ed126f118 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c
@@ -56,6 +56,8 @@ int dscr_sysfs_thread(void)
unsigned long orig_dscr_default;
int i, j;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
orig_dscr_default = get_default_dscr();
for (i = 0; i < COUNT; i++) {
for (j = 0; j < DSCR_MAX; j++) {
diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
index eaf785d11eed..67bb872a246a 100644
--- a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
+++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c
@@ -8,8 +8,8 @@
* numbers.
*
* When using the privilege state SPR, the instructions such as
- * mfspr or mtspr are priviledged and the kernel emulates them
- * for us. Instructions using problem state SPR can be exuecuted
+ * mfspr or mtspr are privileged and the kernel emulates them
+ * for us. Instructions using problem state SPR can be executed
* directly without any emulation if the HW supports them. Else
* they also get emulated by the kernel.
*
@@ -36,6 +36,8 @@ int dscr_user(void)
{
int i;
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_DSCR));
+
check_dscr("");
for (i = 0; i < COUNT; i++) {
diff --git a/tools/testing/selftests/powerpc/eeh/Makefile b/tools/testing/selftests/powerpc/eeh/Makefile
index b397babd569b..ae963eb2dc5b 100644
--- a/tools/testing/selftests/powerpc/eeh/Makefile
+++ b/tools/testing/selftests/powerpc/eeh/Makefile
@@ -3,7 +3,7 @@ noarg:
$(MAKE) -C ../
TEST_PROGS := eeh-basic.sh
-TEST_FILES := eeh-functions.sh
+TEST_FILES := eeh-functions.sh settings
top_srcdir = ../../../../..
include ../../lib.mk
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
index 8a8d0f456946..442b666ccdb5 100755
--- a/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-basic.sh
@@ -3,24 +3,11 @@
. ./eeh-functions.sh
-if ! eeh_supported ; then
- echo "EEH not supported on this system, skipping"
- exit 0;
-fi
-
-if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
- [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
- echo "debugfs EEH testing files are missing. Is debugfs mounted?"
- exit 1;
-fi
+eeh_test_prep # NB: may exit
pre_lspci=`mktemp`
lspci > $pre_lspci
-# Bump the max freeze count to something absurd so we don't
-# trip over it while breaking things.
-echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
-
# record the devices that we break in here. Assuming everything
# goes to plan we should get them back once the recover process
# is finished.
@@ -28,34 +15,16 @@ devices=""
# Build up a list of candidate devices.
for dev in `ls -1 /sys/bus/pci/devices/ | grep '\.0$'` ; do
- # skip bridges since we can't recover them (yet...)
- if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
- echo "$dev, Skipped: bridge"
+ if ! eeh_can_break $dev ; then
continue;
fi
- # Skip VFs for now since we don't have a reliable way
- # to break them.
+ # Skip VFs for now since we don't have a reliable way to break them.
if [ -e "/sys/bus/pci/devices/$dev/physfn" ] ; then
echo "$dev, Skipped: virtfn"
continue;
fi
- if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
- echo "$dev, Skipped: ahci doesn't support recovery"
- continue
- fi
-
- # Don't inject errosr into an already-frozen PE. This happens with
- # PEs that contain multiple PCI devices (e.g. multi-function cards)
- # and injecting new errors during the recovery process will probably
- # result in the recovery failing and the device being marked as
- # failed.
- if ! pe_ok $dev ; then
- echo "$dev, Skipped: Bad initial PE state"
- continue;
- fi
-
echo "$dev, Added"
# Add to this list of device to check
@@ -84,4 +53,5 @@ echo "$failed devices failed to recover ($dev_count tested)"
lspci | diff -u $pre_lspci -
rm -f $pre_lspci
-exit $failed
+test "$failed" -eq 0
+exit $?
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
index f52ed92b53e7..70daa3925dcb 100755..100644
--- a/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
+++ b/tools/testing/selftests/powerpc/eeh/eeh-functions.sh
@@ -1,16 +1,27 @@
#!/bin/sh
# SPDX-License-Identifier: GPL-2.0-only
+export KSELFTESTS_SKIP=4
+
+log() {
+ echo >/dev/stderr $*
+}
+
pe_ok() {
local dev="$1"
local path="/sys/bus/pci/devices/$dev/eeh_pe_state"
- if ! [ -e "$path" ] ; then
+ # if a driver doesn't support the error handling callbacks then the
+ # device is recovered by removing and re-probing it. This causes the
+ # sysfs directory to disappear so read the PE state once and squash
+ # any potential error messages
+ local eeh_state="$(cat $path 2>/dev/null)"
+ if [ -z "$eeh_state" ]; then
return 1;
fi
- local fw_state="$(cut -d' ' -f1 < $path)"
- local sw_state="$(cut -d' ' -f2 < $path)"
+ local fw_state="$(echo $eeh_state | cut -d' ' -f1)"
+ local sw_state="$(echo $eeh_state | cut -d' ' -f2)"
# If EEH_PE_ISOLATED or EEH_PE_RECOVERING are set then the PE is in an
# error state or being recovered. Either way, not ok.
@@ -34,6 +45,52 @@ eeh_supported() {
grep -q 'EEH Subsystem is enabled' /proc/powerpc/eeh
}
+eeh_test_prep() {
+ if ! eeh_supported ; then
+ echo "EEH not supported on this system, skipping"
+ exit $KSELFTESTS_SKIP;
+ fi
+
+ if [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_check" ] && \
+ [ ! -e "/sys/kernel/debug/powerpc/eeh_dev_break" ] ; then
+ log "debugfs EEH testing files are missing. Is debugfs mounted?"
+ exit $KSELFTESTS_SKIP;
+ fi
+
+ # Bump the max freeze count to something absurd so we don't
+ # trip over it while breaking things.
+ echo 5000 > /sys/kernel/debug/powerpc/eeh_max_freezes
+}
+
+eeh_can_break() {
+ # skip bridges since we can't recover them (yet...)
+ if [ -e "/sys/bus/pci/devices/$dev/pci_bus" ] ; then
+ log "$dev, Skipped: bridge"
+ return 1;
+ fi
+
+ # The ahci driver doesn't support error recovery. If the ahci device
+ # happens to be hosting the root filesystem, and then we go and break
+ # it the system will generally go down. We should probably fix that
+ # at some point
+ if [ "ahci" = "$(basename $(realpath /sys/bus/pci/devices/$dev/driver))" ] ; then
+ log "$dev, Skipped: ahci doesn't support recovery"
+ return 1;
+ fi
+
+ # Don't inject errosr into an already-frozen PE. This happens with
+ # PEs that contain multiple PCI devices (e.g. multi-function cards)
+ # and injecting new errors during the recovery process will probably
+ # result in the recovery failing and the device being marked as
+ # failed.
+ if ! pe_ok $dev ; then
+ log "$dev, Skipped: Bad initial PE state"
+ return 1;
+ fi
+
+ return 0
+}
+
eeh_one_dev() {
local dev="$1"
@@ -41,7 +98,7 @@ eeh_one_dev() {
# testing so check that the argument is a well-formed sysfs device
# name.
if ! test -e /sys/bus/pci/devices/$dev/ ; then
- echo "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
+ log "Error: '$dev' must be a sysfs device name (DDDD:BB:DD.F)"
return 1;
fi
@@ -65,16 +122,124 @@ eeh_one_dev() {
if pe_ok $dev ; then
break;
fi
- echo "$dev, waited $i/${max_wait}"
+ log "$dev, waited $i/${max_wait}"
sleep 1
done
if ! pe_ok $dev ; then
- echo "$dev, Failed to recover!"
+ log "$dev, Failed to recover!"
return 1;
fi
- echo "$dev, Recovered after $i seconds"
+ log "$dev, Recovered after $i seconds"
return 0;
}
+eeh_has_driver() {
+ test -e /sys/bus/pci/devices/$1/driver;
+ return $?
+}
+
+eeh_can_recover() {
+ # we'll get an IO error if the device's current driver doesn't support
+ # error recovery
+ echo $1 > '/sys/kernel/debug/powerpc/eeh_dev_can_recover' 2>/dev/null
+
+ return $?
+}
+
+eeh_find_all_pfs() {
+ devices=""
+
+ # SR-IOV on pseries requires hypervisor support, so check for that
+ is_pseries=""
+ if grep -q pSeries /proc/cpuinfo ; then
+ if [ ! -f /proc/device-tree/rtas/ibm,open-sriov-allow-unfreeze ] ||
+ [ ! -f /proc/device-tree/rtas/ibm,open-sriov-map-pe-number ] ; then
+ return 1;
+ fi
+
+ is_pseries="true"
+ fi
+
+ for dev in `ls -1 /sys/bus/pci/devices/` ; do
+ sysfs="/sys/bus/pci/devices/$dev"
+ if [ ! -e "$sysfs/sriov_numvfs" ] ; then
+ continue
+ fi
+
+ # skip unsupported PFs on pseries
+ if [ -z "$is_pseries" ] &&
+ [ ! -f "$sysfs/of_node/ibm,is-open-sriov-pf" ] &&
+ [ ! -f "$sysfs/of_node/ibm,open-sriov-vf-bar-info" ] ; then
+ continue;
+ fi
+
+ # no driver, no vfs
+ if ! eeh_has_driver $dev ; then
+ continue
+ fi
+
+ devices="$devices $dev"
+ done
+
+ if [ -z "$devices" ] ; then
+ return 1;
+ fi
+
+ echo $devices
+ return 0;
+}
+
+# attempts to enable one VF on each PF so we can do VF specific tests.
+# stdout: list of enabled VFs, one per line
+# return code: 0 if vfs are found, 1 otherwise
+eeh_enable_vfs() {
+ pf_list="$(eeh_find_all_pfs)"
+
+ vfs=0
+ for dev in $pf_list ; do
+ pf_sysfs="/sys/bus/pci/devices/$dev"
+
+ # make sure we have a single VF
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ echo 1 > "$pf_sysfs/sriov_numvfs"
+ if [ "$?" != 0 ] ; then
+ log "Unable to enable VFs on $pf, skipping"
+ continue;
+ fi
+
+ vf="$(basename $(realpath "$pf_sysfs/virtfn0"))"
+ if [ $? != 0 ] ; then
+ log "unable to find enabled vf on $pf"
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ continue;
+ fi
+
+ if ! eeh_can_break $vf ; then
+ log "skipping "
+
+ echo 0 > "$pf_sysfs/sriov_numvfs"
+ continue;
+ fi
+
+ vfs="$((vfs + 1))"
+ echo $vf
+ done
+
+ test "$vfs" != 0
+ return $?
+}
+
+eeh_disable_vfs() {
+ pf_list="$(eeh_find_all_pfs)"
+ if [ -z "$pf_list" ] ; then
+ return 1;
+ fi
+
+ for dev in $pf_list ; do
+ echo 0 > "/sys/bus/pci/devices/$dev/sriov_numvfs"
+ done
+
+ return 0;
+}
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh
new file mode 100755
index 000000000000..874c11953bb6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-aware.sh
@@ -0,0 +1,45 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+. ./eeh-functions.sh
+
+eeh_test_prep # NB: may exit
+
+vf_list="$(eeh_enable_vfs)";
+if $? != 0 ; then
+ log "No usable VFs found. Skipping EEH unaware VF test"
+ exit $KSELFTESTS_SKIP;
+fi
+
+log "Enabled VFs: $vf_list"
+
+tested=0
+passed=0
+for vf in $vf_list ; do
+ log "Testing $vf"
+
+ if ! eeh_can_recover $vf ; then
+ log "Driver for $vf doesn't support error recovery, skipping"
+ continue;
+ fi
+
+ tested="$((tested + 1))"
+
+ log "Breaking $vf..."
+ if ! eeh_one_dev $vf ; then
+ log "$vf failed to recover"
+ continue;
+ fi
+
+ passed="$((passed + 1))"
+done
+
+eeh_disable_vfs
+
+if [ "$tested" == 0 ] ; then
+ echo "No VFs with EEH aware drivers found, skipping"
+ exit $KSELFTESTS_SKIP
+fi
+
+test "$failed" != 0
+exit $?;
diff --git a/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh
new file mode 100755
index 000000000000..8a4c147b9d43
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/eeh-vf-unaware.sh
@@ -0,0 +1,35 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+
+. ./eeh-functions.sh
+
+eeh_test_prep # NB: may exit
+
+vf_list="$(eeh_enable_vfs)";
+if $? != 0 ; then
+ log "No usable VFs found. Skipping EEH unaware VF test"
+ exit $KSELFTESTS_SKIP;
+fi
+
+log "Enabled VFs: $vf_list"
+
+failed=0
+for vf in $vf_list ; do
+ log "Testing $vf"
+
+ if eeh_can_recover $vf ; then
+ log "Driver for $vf supports error recovery. Unbinding..."
+ echo "$vf" > /sys/bus/pci/devices/$vf/driver/unbind
+ fi
+
+ log "Breaking $vf..."
+ if ! eeh_one_dev $vf ; then
+ log "$vf failed to recover"
+ failed="$((failed + 1))"
+ fi
+done
+
+eeh_disable_vfs
+
+test "$failed" != 0
+exit $?;
diff --git a/tools/testing/selftests/powerpc/eeh/settings b/tools/testing/selftests/powerpc/eeh/settings
new file mode 100644
index 000000000000..694d70710ff0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/eeh/settings
@@ -0,0 +1 @@
+timeout=300
diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c
index 0ad4f12b3d43..5876220d8ff2 100644
--- a/tools/testing/selftests/powerpc/harness.c
+++ b/tools/testing/selftests/powerpc/harness.c
@@ -24,7 +24,7 @@
/* Setting timeout to -1 disables the alarm */
static uint64_t timeout = 120;
-int run_test(int (test_function)(void), char *name)
+int run_test(int (test_function)(void), const char *name)
{
bool terminated;
int rc, status;
@@ -101,7 +101,7 @@ void test_harness_set_timeout(uint64_t time)
timeout = time;
}
-int test_harness(int (test_function)(void), char *name)
+int test_harness(int (test_function)(void), const char *name)
{
int rc;
diff --git a/tools/testing/selftests/powerpc/include/basic_asm.h b/tools/testing/selftests/powerpc/include/basic_asm.h
index 886dc026fe7a..26cde8ea1f49 100644
--- a/tools/testing/selftests/powerpc/include/basic_asm.h
+++ b/tools/testing/selftests/powerpc/include/basic_asm.h
@@ -5,6 +5,16 @@
#include <ppc-asm.h>
#include <asm/unistd.h>
+#ifdef __powerpc64__
+#define PPC_LL ld
+#define PPC_STL std
+#define PPC_STLU stdu
+#else
+#define PPC_LL lwz
+#define PPC_STL stw
+#define PPC_STLU stwu
+#endif
+
#define LOAD_REG_IMMEDIATE(reg, expr) \
lis reg, (expr)@highest; \
ori reg, reg, (expr)@higher; \
@@ -14,16 +24,20 @@
/*
* Note: These macros assume that variables being stored on the stack are
- * doublewords, while this is usually the case it may not always be the
+ * sizeof(long), while this is usually the case it may not always be the
* case for each use case.
*/
+#ifdef __powerpc64__
+
+// ABIv2
#if defined(_CALL_ELF) && _CALL_ELF == 2
#define STACK_FRAME_MIN_SIZE 32
#define STACK_FRAME_TOC_POS 24
#define __STACK_FRAME_PARAM(_param) (32 + ((_param)*8))
#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*8))
-#else
+
+#else // ABIv1 below
#define STACK_FRAME_MIN_SIZE 112
#define STACK_FRAME_TOC_POS 40
#define __STACK_FRAME_PARAM(i) (48 + ((i)*8))
@@ -34,7 +48,24 @@
*/
#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
(112 + ((_var_num)*8))
-#endif
+
+
+#endif // ABIv2
+
+// Common 64-bit
+#define STACK_FRAME_LR_POS 16
+#define STACK_FRAME_CR_POS 8
+
+#else // 32-bit below
+
+#define STACK_FRAME_MIN_SIZE 16
+#define STACK_FRAME_LR_POS 4
+
+#define __STACK_FRAME_PARAM(_param) (STACK_FRAME_MIN_SIZE + ((_param)*4))
+#define __STACK_FRAME_LOCAL(_num_params, _var_num) \
+ ((STACK_FRAME_PARAM(_num_params)) + ((_var_num)*4))
+
+#endif // __powerpc64__
/* Parameter x saved to the stack */
#define STACK_FRAME_PARAM(var) __STACK_FRAME_PARAM(var)
@@ -42,8 +73,6 @@
/* Local variable x saved to the stack after x parameters */
#define STACK_FRAME_LOCAL(num_params, var) \
__STACK_FRAME_LOCAL(num_params, var)
-#define STACK_FRAME_LR_POS 16
-#define STACK_FRAME_CR_POS 8
/*
* It is very important to note here that _extra is the extra amount of
@@ -56,19 +85,21 @@
* preprocessed incorrectly, hence r0.
*/
#define PUSH_BASIC_STACK(_extra) \
- mflr r0; \
- std r0, STACK_FRAME_LR_POS(%r1); \
- stdu %r1, -(_extra + STACK_FRAME_MIN_SIZE)(%r1); \
- mfcr r0; \
- stw r0, STACK_FRAME_CR_POS(%r1); \
- std %r2, STACK_FRAME_TOC_POS(%r1);
+ mflr r0; \
+ PPC_STL r0, STACK_FRAME_LR_POS(%r1); \
+ PPC_STLU %r1, -(((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE)(%r1);
#define POP_BASIC_STACK(_extra) \
- ld %r2, STACK_FRAME_TOC_POS(%r1); \
- lwz r0, STACK_FRAME_CR_POS(%r1); \
- mtcr r0; \
- addi %r1, %r1, (_extra + STACK_FRAME_MIN_SIZE); \
- ld r0, STACK_FRAME_LR_POS(%r1); \
+ addi %r1, %r1, (((_extra + 15) & ~15) + STACK_FRAME_MIN_SIZE); \
+ PPC_LL r0, STACK_FRAME_LR_POS(%r1); \
mtlr r0;
+.macro OP_REGS op, reg_width, start_reg, end_reg, base_reg, base_reg_offset=0, skip=0
+ .set i, \start_reg
+ .rept (\end_reg - \start_reg + 1)
+ \op i, (\reg_width * (i - \skip) + \base_reg_offset)(\base_reg)
+ .set i, i + 1
+ .endr
+.endm
+
#endif /* _SELFTESTS_POWERPC_BASIC_ASM_H */
diff --git a/tools/testing/selftests/powerpc/include/instructions.h b/tools/testing/selftests/powerpc/include/instructions.h
index f36061eb6f0f..4efa6314bd96 100644
--- a/tools/testing/selftests/powerpc/include/instructions.h
+++ b/tools/testing/selftests/powerpc/include/instructions.h
@@ -66,4 +66,81 @@ static inline int paste_last(void *i)
#define PPC_INST_PASTE __PASTE(0, 0, 0, 0)
#define PPC_INST_PASTE_LAST __PASTE(0, 0, 1, 1)
+/* This defines the prefixed load/store instructions */
+#ifdef __ASSEMBLY__
+# define stringify_in_c(...) __VA_ARGS__
+#else
+# define __stringify_in_c(...) #__VA_ARGS__
+# define stringify_in_c(...) __stringify_in_c(__VA_ARGS__) " "
+#endif
+
+#define __PPC_RA(a) (((a) & 0x1f) << 16)
+#define __PPC_RS(s) (((s) & 0x1f) << 21)
+#define __PPC_RT(t) __PPC_RS(t)
+#define __PPC_PREFIX_R(r) (((r) & 0x1) << 20)
+
+#define PPC_PREFIX_MLS 0x06000000
+#define PPC_PREFIX_8LS 0x04000000
+
+#define PPC_INST_LBZ 0x88000000
+#define PPC_INST_LHZ 0xa0000000
+#define PPC_INST_LHA 0xa8000000
+#define PPC_INST_LWZ 0x80000000
+#define PPC_INST_STB 0x98000000
+#define PPC_INST_STH 0xb0000000
+#define PPC_INST_STW 0x90000000
+#define PPC_INST_STD 0xf8000000
+#define PPC_INST_LFS 0xc0000000
+#define PPC_INST_LFD 0xc8000000
+#define PPC_INST_STFS 0xd0000000
+#define PPC_INST_STFD 0xd8000000
+
+#define PREFIX_MLS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_MLS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+#define PREFIX_8LS(instr, t, a, r, d) stringify_in_c(.balign 64, , 4;) \
+ stringify_in_c(.long PPC_PREFIX_8LS | \
+ __PPC_PREFIX_R(r) | \
+ (((d) >> 16) & 0x3ffff);) \
+ stringify_in_c(.long (instr) | \
+ __PPC_RT(t) | \
+ __PPC_RA(a) | \
+ ((d) & 0xffff);\n)
+
+/* Prefixed Integer Load/Store instructions */
+#define PLBZ(t, a, r, d) PREFIX_MLS(PPC_INST_LBZ, t, a, r, d)
+#define PLHZ(t, a, r, d) PREFIX_MLS(PPC_INST_LHZ, t, a, r, d)
+#define PLHA(t, a, r, d) PREFIX_MLS(PPC_INST_LHA, t, a, r, d)
+#define PLWZ(t, a, r, d) PREFIX_MLS(PPC_INST_LWZ, t, a, r, d)
+#define PLWA(t, a, r, d) PREFIX_8LS(0xa4000000, t, a, r, d)
+#define PLD(t, a, r, d) PREFIX_8LS(0xe4000000, t, a, r, d)
+#define PLQ(t, a, r, d) PREFIX_8LS(0xe0000000, t, a, r, d)
+#define PSTB(s, a, r, d) PREFIX_MLS(PPC_INST_STB, s, a, r, d)
+#define PSTH(s, a, r, d) PREFIX_MLS(PPC_INST_STH, s, a, r, d)
+#define PSTW(s, a, r, d) PREFIX_MLS(PPC_INST_STW, s, a, r, d)
+#define PSTD(s, a, r, d) PREFIX_8LS(0xf4000000, s, a, r, d)
+#define PSTQ(s, a, r, d) PREFIX_8LS(0xf0000000, s, a, r, d)
+
+/* Prefixed Floating-Point Load/Store Instructions */
+#define PLFS(frt, a, r, d) PREFIX_MLS(PPC_INST_LFS, frt, a, r, d)
+#define PLFD(frt, a, r, d) PREFIX_MLS(PPC_INST_LFD, frt, a, r, d)
+#define PSTFS(frs, a, r, d) PREFIX_MLS(PPC_INST_STFS, frs, a, r, d)
+#define PSTFD(frs, a, r, d) PREFIX_MLS(PPC_INST_STFD, frs, a, r, d)
+
+/* Prefixed VSX Load/Store Instructions */
+#define PLXSD(vrt, a, r, d) PREFIX_8LS(0xa8000000, vrt, a, r, d)
+#define PLXSSP(vrt, a, r, d) PREFIX_8LS(0xac000000, vrt, a, r, d)
+#define PLXV0(s, a, r, d) PREFIX_8LS(0xc8000000, s, a, r, d)
+#define PLXV1(s, a, r, d) PREFIX_8LS(0xcc000000, s, a, r, d)
+#define PSTXSD(vrs, a, r, d) PREFIX_8LS(0xb8000000, vrs, a, r, d)
+#define PSTXSSP(vrs, a, r, d) PREFIX_8LS(0xbc000000, vrs, a, r, d)
+#define PSTXV0(s, a, r, d) PREFIX_8LS(0xd8000000, s, a, r, d)
+#define PSTXV1(s, a, r, d) PREFIX_8LS(0xdc000000, s, a, r, d)
+
#endif /* _SELFTESTS_POWERPC_INSTRUCTIONS_H */
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
new file mode 100644
index 000000000000..51729d9a7111
--- /dev/null
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -0,0 +1,136 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ */
+
+#ifndef _SELFTESTS_POWERPC_PKEYS_H
+#define _SELFTESTS_POWERPC_PKEYS_H
+
+#include <sys/mman.h>
+
+#include "reg.h"
+#include "utils.h"
+
+/*
+ * Older versions of libc use the Intel-specific access rights.
+ * Hence, override the definitions as they might be incorrect.
+ */
+#undef PKEY_DISABLE_ACCESS
+#define PKEY_DISABLE_ACCESS 0x3
+
+#undef PKEY_DISABLE_WRITE
+#define PKEY_DISABLE_WRITE 0x2
+
+#undef PKEY_DISABLE_EXECUTE
+#define PKEY_DISABLE_EXECUTE 0x4
+
+/* Older versions of libc do not define this */
+#ifndef SEGV_PKUERR
+#define SEGV_PKUERR 4
+#endif
+
+#define SI_PKEY_OFFSET 0x20
+
+#define __NR_pkey_mprotect 386
+#define __NR_pkey_alloc 384
+#define __NR_pkey_free 385
+
+#define PKEY_BITS_PER_PKEY 2
+#define NR_PKEYS 32
+#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+
+inline unsigned long pkeyreg_get(void)
+{
+ return mfspr(SPRN_AMR);
+}
+
+inline void pkeyreg_set(unsigned long amr)
+{
+ set_amr(amr);
+}
+
+void pkey_set_rights(int pkey, unsigned long rights)
+{
+ unsigned long amr, shift;
+
+ shift = (NR_PKEYS - pkey - 1) * PKEY_BITS_PER_PKEY;
+ amr = pkeyreg_get();
+ amr &= ~(PKEY_BITS_MASK << shift);
+ amr |= (rights & PKEY_BITS_MASK) << shift;
+ pkeyreg_set(amr);
+}
+
+int sys_pkey_mprotect(void *addr, size_t len, int prot, int pkey)
+{
+ return syscall(__NR_pkey_mprotect, addr, len, prot, pkey);
+}
+
+int sys_pkey_alloc(unsigned long flags, unsigned long rights)
+{
+ return syscall(__NR_pkey_alloc, flags, rights);
+}
+
+int sys_pkey_free(int pkey)
+{
+ return syscall(__NR_pkey_free, pkey);
+}
+
+int pkeys_unsupported(void)
+{
+ bool hash_mmu = false;
+ int pkey;
+
+ /* Protection keys are currently supported on Hash MMU only */
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+
+ /* Check if the system call is supported */
+ pkey = sys_pkey_alloc(0, 0);
+ SKIP_IF(pkey < 0);
+ sys_pkey_free(pkey);
+
+ return 0;
+}
+
+int siginfo_pkey(siginfo_t *si)
+{
+ /*
+ * In older versions of libc, siginfo_t does not have si_pkey as
+ * a member.
+ */
+#ifdef si_pkey
+ return si->si_pkey;
+#else
+ return *((int *)(((char *) si) + SI_PKEY_OFFSET));
+#endif
+}
+
+#define pkey_rights(r) ({ \
+ static char buf[4] = "rwx"; \
+ unsigned int amr_bits; \
+ if ((r) & PKEY_DISABLE_EXECUTE) \
+ buf[2] = '-'; \
+ amr_bits = (r) & PKEY_BITS_MASK; \
+ if (amr_bits & PKEY_DISABLE_WRITE) \
+ buf[1] = '-'; \
+ if (amr_bits & PKEY_DISABLE_ACCESS & ~PKEY_DISABLE_WRITE) \
+ buf[0] = '-'; \
+ buf; \
+})
+
+unsigned long next_pkey_rights(unsigned long rights)
+{
+ if (rights == PKEY_DISABLE_ACCESS)
+ return PKEY_DISABLE_EXECUTE;
+ else if (rights == (PKEY_DISABLE_ACCESS | PKEY_DISABLE_EXECUTE))
+ return 0;
+
+ if ((rights & PKEY_BITS_MASK) == 0)
+ rights |= PKEY_DISABLE_WRITE;
+ else if ((rights & PKEY_BITS_MASK) == PKEY_DISABLE_WRITE)
+ rights |= PKEY_DISABLE_ACCESS;
+
+ return rights;
+}
+
+#endif /* _SELFTESTS_POWERPC_PKEYS_H */
diff --git a/tools/testing/selftests/powerpc/include/reg.h b/tools/testing/selftests/powerpc/include/reg.h
index 022c5076b2c5..fad09c9d3387 100644
--- a/tools/testing/selftests/powerpc/include/reg.h
+++ b/tools/testing/selftests/powerpc/include/reg.h
@@ -19,6 +19,8 @@
#define mb() asm volatile("sync" : : : "memory");
#define barrier() asm volatile("" : : : "memory");
+#define SPRN_HDEXCR_RO 455 /* Userspace readonly view of SPRN_HDEXCR (471) */
+
#define SPRN_MMCR2 769
#define SPRN_MMCRA 770
#define SPRN_MMCR0 779
@@ -47,16 +49,31 @@
#define SPRN_SDAR 781
#define SPRN_SIER 768
+#define SPRN_DEXCR_RO 812 /* Userspace readonly view of SPRN_DEXCR (828) */
+
#define SPRN_TEXASR 0x82 /* Transaction Exception and Status Register */
#define SPRN_TFIAR 0x81 /* Transaction Failure Inst Addr */
#define SPRN_TFHAR 0x80 /* Transaction Failure Handler Addr */
#define SPRN_TAR 0x32f /* Target Address Register */
+#define PVR_VER(pvr) (((pvr) >> 16) & 0xFFFF)
+#define SPRN_PVR 0x11F
+
+#define PVR_CFG(pvr) (((pvr) >> 8) & 0xF) /* Configuration field */
+#define PVR_MAJ(pvr) (((pvr) >> 4) & 0xF) /* Major revision field */
+#define PVR_MIN(pvr) (((pvr) >> 0) & 0xF) /* Minor revision field */
+
#define SPRN_DSCR_PRIV 0x11 /* Privilege State DSCR */
#define SPRN_DSCR 0x03 /* Data Stream Control Register */
#define SPRN_PPR 896 /* Program Priority Register */
#define SPRN_AMR 13 /* Authority Mask Register - problem state */
+#define set_amr(v) asm volatile("isync;" \
+ "mtspr " __stringify(SPRN_AMR) ",%0;" \
+ "isync" : \
+ : "r" ((unsigned long)(v)) \
+ : "memory")
+
/* TEXASR register bits */
#define TEXASR_FC 0xFE00000000000000
#define TEXASR_FP 0x0100000000000000
@@ -78,6 +95,7 @@
#define TEXASR_ROT 0x0000000002000000
/* MSR register bits */
+#define MSR_HV (1ul << 60) /* Hypervisor state */
#define MSR_TS_S_LG 33 /* Trans Mem state: Suspended */
#define MSR_TS_T_LG 34 /* Trans Mem state: Active */
@@ -113,45 +131,44 @@
"li 30, %[" #_asm_symbol_name_immed "];" \
"li 31, %[" #_asm_symbol_name_immed "];"
-#define ASM_LOAD_FPR_SINGLE_PRECISION(_asm_symbol_name_addr) \
- "lfs 0, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 1, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 2, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 3, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 4, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 5, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 6, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 7, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 8, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 9, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 10, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 11, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 12, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 13, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 14, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 15, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 16, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 17, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 18, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 19, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 20, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 21, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 22, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 23, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 24, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 25, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 26, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 27, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 28, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 29, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 30, 0(%[" #_asm_symbol_name_addr "]);" \
- "lfs 31, 0(%[" #_asm_symbol_name_addr "]);"
+#define ASM_LOAD_FPR(_asm_symbol_name_addr) \
+ "lfd 0, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 1, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 2, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 3, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 4, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 5, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 6, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 7, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 8, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 9, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 10, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 11, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 12, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 13, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 14, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 15, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 16, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 17, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 18, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 19, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 20, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 21, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 22, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 23, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 24, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 25, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 26, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 27, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 28, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 29, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 30, 0(%[" #_asm_symbol_name_addr "]);" \
+ "lfd 31, 0(%[" #_asm_symbol_name_addr "]);"
#ifndef __ASSEMBLER__
void store_gpr(unsigned long *addr);
void load_gpr(unsigned long *addr);
-void load_fpr_single_precision(float *addr);
-void store_fpr_single_precision(float *addr);
+void store_fpr(double *addr);
#endif /* end of __ASSEMBLER__ */
#endif /* _SELFTESTS_POWERPC_REG_H */
diff --git a/tools/testing/selftests/powerpc/include/subunit.h b/tools/testing/selftests/powerpc/include/subunit.h
index 068d55fdf80f..b0bb774617c9 100644
--- a/tools/testing/selftests/powerpc/include/subunit.h
+++ b/tools/testing/selftests/powerpc/include/subunit.h
@@ -6,37 +6,37 @@
#ifndef _SELFTESTS_POWERPC_SUBUNIT_H
#define _SELFTESTS_POWERPC_SUBUNIT_H
-static inline void test_start(char *name)
+static inline void test_start(const char *name)
{
printf("test: %s\n", name);
}
-static inline void test_failure_detail(char *name, char *detail)
+static inline void test_failure_detail(const char *name, const char *detail)
{
printf("failure: %s [%s]\n", name, detail);
}
-static inline void test_failure(char *name)
+static inline void test_failure(const char *name)
{
printf("failure: %s\n", name);
}
-static inline void test_error(char *name)
+static inline void test_error(const char *name)
{
printf("error: %s\n", name);
}
-static inline void test_skip(char *name)
+static inline void test_skip(const char *name)
{
printf("skip: %s\n", name);
}
-static inline void test_success(char *name)
+static inline void test_success(const char *name)
{
printf("success: %s\n", name);
}
-static inline void test_finish(char *name, int status)
+static inline void test_finish(const char *name, int status)
{
if (status)
test_failure(name);
@@ -44,7 +44,7 @@ static inline void test_finish(char *name, int status)
test_success(name);
}
-static inline void test_set_git_version(char *value)
+static inline void test_set_git_version(const char *value)
{
printf("tags: git_version:%s\n", value);
}
diff --git a/tools/testing/selftests/powerpc/include/utils.h b/tools/testing/selftests/powerpc/include/utils.h
index e089a0c30d9a..66d7b2368dd4 100644
--- a/tools/testing/selftests/powerpc/include/utils.h
+++ b/tools/testing/selftests/powerpc/include/utils.h
@@ -9,10 +9,18 @@
#define __cacheline_aligned __attribute__((aligned(128)))
#include <stdint.h>
+#include <stdio.h>
#include <stdbool.h>
+#include <sys/signal.h>
#include <linux/auxvec.h>
#include <linux/perf_event.h>
+#include <asm/cputable.h>
#include "reg.h"
+#include <unistd.h>
+
+#ifndef ARRAY_SIZE
+# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
+#endif
/* Avoid headaches with PRI?64 - just use %ll? always */
typedef unsigned long long u64;
@@ -24,24 +32,56 @@ typedef uint16_t u16;
typedef uint8_t u8;
void test_harness_set_timeout(uint64_t time);
-int test_harness(int (test_function)(void), char *name);
+int test_harness(int (test_function)(void), const char *name);
int read_auxv(char *buf, ssize_t buf_size);
void *find_auxv_entry(int type, char *auxv);
void *get_auxv_entry(int type);
-int pick_online_cpu(void);
+#define BIND_CPU_ANY (-1)
-int read_debugfs_file(char *debugfs_file, int *result);
-int write_debugfs_file(char *debugfs_file, int result);
+int pick_online_cpu(void);
+int bind_to_cpu(int cpu);
+
+int parse_intmax(const char *buffer, size_t count, intmax_t *result, int base);
+int parse_uintmax(const char *buffer, size_t count, uintmax_t *result, int base);
+int parse_int(const char *buffer, size_t count, int *result, int base);
+int parse_uint(const char *buffer, size_t count, unsigned int *result, int base);
+int parse_long(const char *buffer, size_t count, long *result, int base);
+int parse_ulong(const char *buffer, size_t count, unsigned long *result, int base);
+
+int read_file(const char *path, char *buf, size_t count, size_t *len);
+int write_file(const char *path, const char *buf, size_t count);
+int read_file_alloc(const char *path, char **buf, size_t *len);
+int read_long(const char *path, long *result, int base);
+int write_long(const char *path, long result, int base);
+int read_ulong(const char *path, unsigned long *result, int base);
+int write_ulong(const char *path, unsigned long result, int base);
+int read_debugfs_file(const char *debugfs_file, char *buf, size_t count);
+int write_debugfs_file(const char *debugfs_file, const char *buf, size_t count);
+int read_debugfs_int(const char *debugfs_file, int *result);
+int write_debugfs_int(const char *debugfs_file, int result);
int read_sysfs_file(char *debugfs_file, char *result, size_t result_size);
-void set_dscr(unsigned long val);
int perf_event_open_counter(unsigned int type,
unsigned long config, int group_fd);
int perf_event_enable(int fd);
int perf_event_disable(int fd);
int perf_event_reset(int fd);
+struct perf_event_read {
+ __u64 nr;
+ __u64 l1d_misses;
+};
+
+#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 30)
+#include <sys/syscall.h>
+
+static inline pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
static inline bool have_hwcap(unsigned long ftr)
{
return ((unsigned long)get_auxv_entry(AT_HWCAP) & ftr) == ftr;
@@ -59,7 +99,21 @@ static inline bool have_hwcap2(unsigned long ftr2)
}
#endif
+static inline char *auxv_base_platform(void)
+{
+ return ((char *)get_auxv_entry(AT_BASE_PLATFORM));
+}
+
+static inline char *auxv_platform(void)
+{
+ return ((char *)get_auxv_entry(AT_PLATFORM));
+}
+
bool is_ppc64le(void);
+int using_hash_mmu(bool *using_hash);
+
+struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *));
+struct sigaction pop_signal_handler(int sig, struct sigaction old_handler);
/* Yes, this is evil */
#define FAIL_IF(x) \
@@ -71,6 +125,35 @@ do { \
} \
} while (0)
+#define FAIL_IF_MSG(x, msg) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d: %s\n", \
+ __LINE__, msg); \
+ return 1; \
+ } \
+} while (0)
+
+#define FAIL_IF_EXIT(x) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d\n", __LINE__); \
+ _exit(1); \
+ } \
+} while (0)
+
+#define FAIL_IF_EXIT_MSG(x, msg) \
+do { \
+ if ((x)) { \
+ fprintf(stderr, \
+ "[FAIL] Test FAILED on line %d: %s\n", \
+ __LINE__, msg); \
+ _exit(1); \
+ } \
+} while (0)
+
/* The test harness uses this, yes it's gross */
#define MAGIC_SKIP_RETURN_VALUE 99
@@ -96,11 +179,25 @@ do { \
#define _str(s) #s
#define str(s) _str(s)
+#define sigsafe_err(msg) ({ \
+ ssize_t nbytes __attribute__((unused)); \
+ nbytes = write(STDERR_FILENO, msg, strlen(msg)); })
+
/* POWER9 feature */
#ifndef PPC_FEATURE2_ARCH_3_00
#define PPC_FEATURE2_ARCH_3_00 0x00800000
#endif
+/* POWER10 feature */
+#ifndef PPC_FEATURE2_ARCH_3_1
+#define PPC_FEATURE2_ARCH_3_1 0x00040000
+#endif
+
+/* POWER10 features */
+#ifndef PPC_FEATURE2_MMA
+#define PPC_FEATURE2_MMA 0x00020000
+#endif
+
#if defined(__powerpc64__)
#define UCONTEXT_NIA(UC) (UC)->uc_mcontext.gp_regs[PT_NIP]
#define UCONTEXT_MSR(UC) (UC)->uc_mcontext.gp_regs[PT_MSR]
diff --git a/tools/testing/selftests/powerpc/lib/reg.S b/tools/testing/selftests/powerpc/lib/reg.S
index 9304ea7d59b9..6d1af4a9a6b4 100644
--- a/tools/testing/selftests/powerpc/lib/reg.S
+++ b/tools/testing/selftests/powerpc/lib/reg.S
@@ -53,79 +53,42 @@ FUNC_START(store_gpr)
blr
FUNC_END(store_gpr)
-/* Single Precision Float - float buf[32] */
-FUNC_START(load_fpr_single_precision)
- lfs 0, 0*4(3)
- lfs 1, 1*4(3)
- lfs 2, 2*4(3)
- lfs 3, 3*4(3)
- lfs 4, 4*4(3)
- lfs 5, 5*4(3)
- lfs 6, 6*4(3)
- lfs 7, 7*4(3)
- lfs 8, 8*4(3)
- lfs 9, 9*4(3)
- lfs 10, 10*4(3)
- lfs 11, 11*4(3)
- lfs 12, 12*4(3)
- lfs 13, 13*4(3)
- lfs 14, 14*4(3)
- lfs 15, 15*4(3)
- lfs 16, 16*4(3)
- lfs 17, 17*4(3)
- lfs 18, 18*4(3)
- lfs 19, 19*4(3)
- lfs 20, 20*4(3)
- lfs 21, 21*4(3)
- lfs 22, 22*4(3)
- lfs 23, 23*4(3)
- lfs 24, 24*4(3)
- lfs 25, 25*4(3)
- lfs 26, 26*4(3)
- lfs 27, 27*4(3)
- lfs 28, 28*4(3)
- lfs 29, 29*4(3)
- lfs 30, 30*4(3)
- lfs 31, 31*4(3)
+/* Double Precision Float - double buf[32] */
+FUNC_START(store_fpr)
+ stfd 0, 0*8(3)
+ stfd 1, 1*8(3)
+ stfd 2, 2*8(3)
+ stfd 3, 3*8(3)
+ stfd 4, 4*8(3)
+ stfd 5, 5*8(3)
+ stfd 6, 6*8(3)
+ stfd 7, 7*8(3)
+ stfd 8, 8*8(3)
+ stfd 9, 9*8(3)
+ stfd 10, 10*8(3)
+ stfd 11, 11*8(3)
+ stfd 12, 12*8(3)
+ stfd 13, 13*8(3)
+ stfd 14, 14*8(3)
+ stfd 15, 15*8(3)
+ stfd 16, 16*8(3)
+ stfd 17, 17*8(3)
+ stfd 18, 18*8(3)
+ stfd 19, 19*8(3)
+ stfd 20, 20*8(3)
+ stfd 21, 21*8(3)
+ stfd 22, 22*8(3)
+ stfd 23, 23*8(3)
+ stfd 24, 24*8(3)
+ stfd 25, 25*8(3)
+ stfd 26, 26*8(3)
+ stfd 27, 27*8(3)
+ stfd 28, 28*8(3)
+ stfd 29, 29*8(3)
+ stfd 30, 30*8(3)
+ stfd 31, 31*8(3)
blr
-FUNC_END(load_fpr_single_precision)
-
-/* Single Precision Float - float buf[32] */
-FUNC_START(store_fpr_single_precision)
- stfs 0, 0*4(3)
- stfs 1, 1*4(3)
- stfs 2, 2*4(3)
- stfs 3, 3*4(3)
- stfs 4, 4*4(3)
- stfs 5, 5*4(3)
- stfs 6, 6*4(3)
- stfs 7, 7*4(3)
- stfs 8, 8*4(3)
- stfs 9, 9*4(3)
- stfs 10, 10*4(3)
- stfs 11, 11*4(3)
- stfs 12, 12*4(3)
- stfs 13, 13*4(3)
- stfs 14, 14*4(3)
- stfs 15, 15*4(3)
- stfs 16, 16*4(3)
- stfs 17, 17*4(3)
- stfs 18, 18*4(3)
- stfs 19, 19*4(3)
- stfs 20, 20*4(3)
- stfs 21, 21*4(3)
- stfs 22, 22*4(3)
- stfs 23, 23*4(3)
- stfs 24, 24*4(3)
- stfs 25, 25*4(3)
- stfs 26, 26*4(3)
- stfs 27, 27*4(3)
- stfs 28, 28*4(3)
- stfs 29, 29*4(3)
- stfs 30, 30*4(3)
- stfs 31, 31*4(3)
- blr
-FUNC_END(store_fpr_single_precision)
+FUNC_END(store_fpr)
/* VMX/VSX registers - unsigned long buf[128] */
FUNC_START(loadvsx)
diff --git a/tools/testing/selftests/powerpc/math/.gitignore b/tools/testing/selftests/powerpc/math/.gitignore
index e31ca6f453ed..07b4893ef7af 100644
--- a/tools/testing/selftests/powerpc/math/.gitignore
+++ b/tools/testing/selftests/powerpc/math/.gitignore
@@ -6,3 +6,5 @@ vmx_preempt
fpu_signal
vmx_signal
vsx_preempt
+fpu_denormal
+mma
diff --git a/tools/testing/selftests/powerpc/math/Makefile b/tools/testing/selftests/powerpc/math/Makefile
index 11a10d7a2bbd..3948f7c510aa 100644
--- a/tools/testing/selftests/powerpc/math/Makefile
+++ b/tools/testing/selftests/powerpc/math/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal vmx_syscall vmx_preempt vmx_signal vsx_preempt
+TEST_GEN_PROGS := fpu_syscall fpu_preempt fpu_signal fpu_denormal vmx_syscall vmx_preempt vmx_signal vsx_preempt mma
top_srcdir = ../../../../..
include ../../lib.mk
@@ -11,9 +11,11 @@ $(OUTPUT)/fpu_syscall: fpu_asm.S
$(OUTPUT)/fpu_preempt: fpu_asm.S
$(OUTPUT)/fpu_signal: fpu_asm.S
-$(OUTPUT)/vmx_syscall: vmx_asm.S
-$(OUTPUT)/vmx_preempt: vmx_asm.S
-$(OUTPUT)/vmx_signal: vmx_asm.S
+$(OUTPUT)/vmx_syscall: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_preempt: vmx_asm.S ../utils.c
+$(OUTPUT)/vmx_signal: vmx_asm.S ../utils.c
$(OUTPUT)/vsx_preempt: CFLAGS += -mvsx
-$(OUTPUT)/vsx_preempt: vsx_asm.S
+$(OUTPUT)/vsx_preempt: vsx_asm.S ../utils.c
+
+$(OUTPUT)/mma: mma.c mma.S ../utils.c
diff --git a/tools/testing/selftests/powerpc/math/fpu.h b/tools/testing/selftests/powerpc/math/fpu.h
new file mode 100644
index 000000000000..a8ad0d42604e
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright 2023, Michael Ellerman, IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_FPU_H
+#define _SELFTESTS_POWERPC_FPU_H
+
+static inline void randomise_darray(double *darray, int num)
+{
+ long val;
+
+ for (int i = 0; i < num; i++) {
+ val = random();
+ if (val & 1)
+ val *= -1;
+
+ if (val & 2)
+ darray[i] = 1.0 / val;
+ else
+ darray[i] = val * val;
+ }
+}
+
+#endif /* _SELFTESTS_POWERPC_FPU_H */
diff --git a/tools/testing/selftests/powerpc/math/fpu_asm.S b/tools/testing/selftests/powerpc/math/fpu_asm.S
index 9dc0c158f871..efe1e1be4695 100644
--- a/tools/testing/selftests/powerpc/math/fpu_asm.S
+++ b/tools/testing/selftests/powerpc/math/fpu_asm.S
@@ -66,6 +66,40 @@ FUNC_START(check_fpu)
li r3,0 # Success!!!
1: blr
+
+// int check_all_fprs(double darray[32])
+FUNC_START(check_all_fprs)
+ PUSH_BASIC_STACK(8)
+ mr r4, r3 // r4 = darray
+ li r3, 1 // prepare for failure
+
+ stfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // backup f31
+
+ // Check regs f0-f30, using f31 as scratch
+ .set i, 0
+ .rept 31
+ lfd f31, (8 * i)(r4) // load expected value
+ fcmpu cr0, i, f31 // compare
+ bne cr0, 1f // bail if mismatch
+ .set i, i + 1
+ .endr
+
+ lfd f31, STACK_FRAME_LOCAL(0, 0)(sp) // reload f31
+ stfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // backup f30
+
+ lfd f30, (8 * 31)(r4) // load expected value of f31
+ fcmpu cr0, f30, f31 // compare
+ bne cr0, 1f // bail if mismatch
+
+ lfd f30, STACK_FRAME_LOCAL(0, 0)(sp) // reload f30
+
+ // Success
+ li r3, 0
+
+1: POP_BASIC_STACK(8)
+ blr
+FUNC_END(check_all_fprs)
+
FUNC_START(test_fpu)
# r3 holds pointer to where to put the result of fork
# r4 holds pointer to the pid
@@ -75,8 +109,9 @@ FUNC_START(test_fpu)
std r3,STACK_FRAME_PARAM(0)(sp) # Address of darray
std r4,STACK_FRAME_PARAM(1)(sp) # Address of pid
- bl load_fpu
- nop
+ // Load FPRs with expected values
+ OP_REGS lfd, 8, 0, 31, r3
+
li r0,__NR_fork
sc
@@ -85,7 +120,7 @@ FUNC_START(test_fpu)
std r3,0(r9)
ld r3,STACK_FRAME_PARAM(0)(sp)
- bl check_fpu
+ bl check_all_fprs
nop
POP_FPU(256)
@@ -104,8 +139,8 @@ FUNC_START(preempt_fpu)
std r4,STACK_FRAME_PARAM(1)(sp) # int *threads_starting
std r5,STACK_FRAME_PARAM(2)(sp) # int *running
- bl load_fpu
- nop
+ // Load FPRs with expected values
+ OP_REGS lfd, 8, 0, 31, r3
sync
# Atomic DEC
@@ -116,8 +151,7 @@ FUNC_START(preempt_fpu)
bne- 1b
2: ld r3,STACK_FRAME_PARAM(0)(sp)
- bl check_fpu
- nop
+ bl check_all_fprs
cmpdi r3,0
bne 3f
ld r4,STACK_FRAME_PARAM(2)(sp)
diff --git a/tools/testing/selftests/powerpc/math/fpu_denormal.c b/tools/testing/selftests/powerpc/math/fpu_denormal.c
new file mode 100644
index 000000000000..5f96682abaa8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/fpu_denormal.c
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright IBM Corp. 2020
+ *
+ * This test attempts to cause a FP denormal exception on POWER8 CPUs. Unfortunately
+ * if the denormal handler is not configured or working properly, this can cause a bad
+ * crash in kernel mode when the kernel tries to save FP registers when the process
+ * exits.
+ */
+
+#include <stdio.h>
+#include <string.h>
+
+#include "utils.h"
+
+static int test_denormal_fpu(void)
+{
+ unsigned int m32;
+ unsigned long m64;
+ volatile float f;
+ volatile double d;
+
+ /* try to induce lfs <denormal> ; stfd */
+
+ m32 = 0x00715fcf; /* random denormal */
+ memcpy((float *)&f, &m32, sizeof(f));
+ d = f;
+ memcpy(&m64, (double *)&d, sizeof(d));
+
+ FAIL_IF((long)(m64 != 0x380c57f3c0000000)); /* renormalised value */
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test_denormal_fpu, "fpu_denormal");
+}
diff --git a/tools/testing/selftests/powerpc/math/fpu_preempt.c b/tools/testing/selftests/powerpc/math/fpu_preempt.c
index 5235bdc8c0b1..9ddede0770ed 100644
--- a/tools/testing/selftests/powerpc/math/fpu_preempt.c
+++ b/tools/testing/selftests/powerpc/math/fpu_preempt.c
@@ -1,13 +1,12 @@
// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright 2015, Cyril Bur, IBM Corp.
+ * Copyright 2023, Michael Ellerman, IBM Corp.
*
* This test attempts to see if the FPU registers change across preemption.
- * Two things should be noted here a) The check_fpu function in asm only checks
- * the non volatile registers as it is reused from the syscall test b) There is
- * no way to be sure preemption happened so this test just uses many threads
- * and a long wait. As such, a successful test doesn't mean much but a failure
- * is bad.
+ * There is no way to be sure preemption happened so this test just uses many
+ * threads and a long wait. As such, a successful test doesn't mean much but
+ * a failure is bad.
*/
#include <stdio.h>
@@ -20,9 +19,10 @@
#include <pthread.h>
#include "utils.h"
+#include "fpu.h"
/* Time to wait for workers to get preempted (seconds) */
-#define PREEMPT_TIME 20
+#define PREEMPT_TIME 60
/*
* Factor by which to multiply number of online CPUs for total number of
* worker threads
@@ -30,26 +30,22 @@
#define THREAD_FACTOR 8
-__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
- 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
- 2.1};
+__thread double darray[32];
int threads_starting;
int running;
-extern void preempt_fpu(double *darray, int *threads_starting, int *running);
+extern int preempt_fpu(double *darray, int *threads_starting, int *running);
void *preempt_fpu_c(void *p)
{
- int i;
- srand(pthread_self());
- for (i = 0; i < 21; i++)
- darray[i] = rand();
+ long rc;
- /* Test failed if it ever returns */
- preempt_fpu(darray, &threads_starting, &running);
+ srand(pthread_self());
+ randomise_darray(darray, ARRAY_SIZE(darray));
+ rc = preempt_fpu(darray, &threads_starting, &running);
- return p;
+ return (void *)rc;
}
int test_preempt_fpu(void)
diff --git a/tools/testing/selftests/powerpc/math/fpu_signal.c b/tools/testing/selftests/powerpc/math/fpu_signal.c
index 7b1addd50420..8a64f63e37ce 100644
--- a/tools/testing/selftests/powerpc/math/fpu_signal.c
+++ b/tools/testing/selftests/powerpc/math/fpu_signal.c
@@ -18,6 +18,7 @@
#include <pthread.h>
#include "utils.h"
+#include "fpu.h"
/* Number of times each thread should receive the signal */
#define ITERATIONS 10
@@ -27,9 +28,7 @@
*/
#define THREAD_FACTOR 8
-__thread double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
- 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
- 2.1};
+__thread double darray[32];
bool bad_context;
int threads_starting;
@@ -43,9 +42,9 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
ucontext_t *uc = context;
mcontext_t *mc = &uc->uc_mcontext;
- /* Only the non volatiles were loaded up */
- for (i = 14; i < 32; i++) {
- if (mc->fp_regs[i] != darray[i - 14]) {
+ // Don't check f30/f31, they're used as scratches in check_all_fprs()
+ for (i = 0; i < 30; i++) {
+ if (mc->fp_regs[i] != darray[i]) {
bad_context = true;
break;
}
@@ -54,7 +53,6 @@ void signal_fpu_sig(int sig, siginfo_t *info, void *context)
void *signal_fpu_c(void *p)
{
- int i;
long rc;
struct sigaction act;
act.sa_sigaction = signal_fpu_sig;
@@ -64,9 +62,7 @@ void *signal_fpu_c(void *p)
return p;
srand(pthread_self());
- for (i = 0; i < 21; i++)
- darray[i] = rand();
-
+ randomise_darray(darray, ARRAY_SIZE(darray));
rc = preempt_fpu(darray, &threads_starting, &running);
return (void *) rc;
diff --git a/tools/testing/selftests/powerpc/math/fpu_syscall.c b/tools/testing/selftests/powerpc/math/fpu_syscall.c
index 694f225c7e45..751d46b133fc 100644
--- a/tools/testing/selftests/powerpc/math/fpu_syscall.c
+++ b/tools/testing/selftests/powerpc/math/fpu_syscall.c
@@ -14,12 +14,11 @@
#include <stdlib.h>
#include "utils.h"
+#include "fpu.h"
extern int test_fpu(double *darray, pid_t *pid);
-double darray[] = {0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
- 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2.0,
- 2.1};
+double darray[32];
int syscall_fpu(void)
{
@@ -27,6 +26,9 @@ int syscall_fpu(void)
int i;
int ret;
int child_ret;
+
+ randomise_darray(darray, ARRAY_SIZE(darray));
+
for (i = 0; i < 1000; i++) {
/* test_fpu will fork() */
ret = test_fpu(darray, &fork_pid);
diff --git a/tools/testing/selftests/powerpc/math/mma.S b/tools/testing/selftests/powerpc/math/mma.S
new file mode 100644
index 000000000000..61cc88b1b26b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.S
@@ -0,0 +1,36 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later
+ *
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+ .global test_mma
+test_mma:
+ /* Load accumulator via VSX registers from image passed in r3 */
+ lxvh8x 4,0,3
+ lxvh8x 5,0,4
+
+ /* Clear and prime the accumulator (xxsetaccz) */
+ .long 0x7c030162
+
+ /* Prime the accumulator with MMA VSX move to accumulator
+ * X-form (xxmtacc) (not needed due to above zeroing) */
+ //.long 0x7c010162
+
+ /* xvi16ger2s */
+ .long 0xec042958
+
+ /* Deprime the accumulator - xxmfacc 0 */
+ .long 0x7c000162
+
+ /* Store result in image passed in r5 */
+ stxvw4x 0,0,5
+ addi 5,5,16
+ stxvw4x 1,0,5
+ addi 5,5,16
+ stxvw4x 2,0,5
+ addi 5,5,16
+ stxvw4x 3,0,5
+ addi 5,5,16
+
+ blr
diff --git a/tools/testing/selftests/powerpc/math/mma.c b/tools/testing/selftests/powerpc/math/mma.c
new file mode 100644
index 000000000000..3a71808c993f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/math/mma.c
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Test basic matrix multiply assist (MMA) functionality if available.
+ *
+ * Copyright 2020, Alistair Popple, IBM Corp.
+ */
+#include <stdio.h>
+#include <stdint.h>
+
+#include "utils.h"
+
+extern void test_mma(uint16_t (*)[8], uint16_t (*)[8], uint32_t (*)[4*4]);
+
+static int mma(void)
+{
+ int i;
+ int rc = 0;
+ uint16_t x[] = {1, 0, 2, 0, 3, 0, 4, 0};
+ uint16_t y[] = {1, 0, 2, 0, 3, 0, 4, 0};
+ uint32_t z[4*4];
+ uint32_t exp[4*4] = {1, 2, 3, 4,
+ 2, 4, 6, 8,
+ 3, 6, 9, 12,
+ 4, 8, 12, 16};
+
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_3_1), "Need ISAv3.1");
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_MMA), "Need MMA");
+
+ test_mma(&x, &y, &z);
+
+ for (i = 0; i < 16; i++) {
+ printf("MMA[%d] = %d ", i, z[i]);
+
+ if (z[i] == exp[i]) {
+ printf(" (Correct)\n");
+ } else {
+ printf(" (Incorrect)\n");
+ rc = 1;
+ }
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(mma, "mma");
+}
diff --git a/tools/testing/selftests/powerpc/math/vmx_preempt.c b/tools/testing/selftests/powerpc/math/vmx_preempt.c
index 2e059f154e77..6f7cf400c687 100644
--- a/tools/testing/selftests/powerpc/math/vmx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vmx_preempt.c
@@ -37,19 +37,21 @@ __thread vector int varray[] = {{1, 2, 3, 4}, {5, 6, 7, 8}, {9, 10,11,12},
int threads_starting;
int running;
-extern void preempt_vmx(vector int *varray, int *threads_starting, int *running);
+extern int preempt_vmx(vector int *varray, int *threads_starting, int *running);
void *preempt_vmx_c(void *p)
{
int i, j;
+ long rc;
+
srand(pthread_self());
for (i = 0; i < 12; i++)
for (j = 0; j < 4; j++)
varray[i][j] = rand();
- /* Test fails if it ever returns */
- preempt_vmx(varray, &threads_starting, &running);
- return p;
+ rc = preempt_vmx(varray, &threads_starting, &running);
+
+ return (void *)rc;
}
int test_preempt_vmx(void)
@@ -57,6 +59,9 @@ int test_preempt_vmx(void)
int i, rc, threads;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/math/vmx_signal.c b/tools/testing/selftests/powerpc/math/vmx_signal.c
index 785a48e0976f..c307dff19c12 100644
--- a/tools/testing/selftests/powerpc/math/vmx_signal.c
+++ b/tools/testing/selftests/powerpc/math/vmx_signal.c
@@ -96,6 +96,9 @@ int test_signal_vmx(void)
void *rc_p;
pthread_t *tids;
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
@@ -148,5 +151,6 @@ int test_signal_vmx(void)
int main(int argc, char *argv[])
{
+ test_harness_set_timeout(360);
return test_harness(test_signal_vmx, "vmx_signal");
}
diff --git a/tools/testing/selftests/powerpc/math/vmx_syscall.c b/tools/testing/selftests/powerpc/math/vmx_syscall.c
index 9ee293cc868e..03c78dfe3444 100644
--- a/tools/testing/selftests/powerpc/math/vmx_syscall.c
+++ b/tools/testing/selftests/powerpc/math/vmx_syscall.c
@@ -49,9 +49,14 @@ int test_vmx_syscall(void)
* Setup an environment with much context switching
*/
pid_t pid2;
- pid_t pid = fork();
+ pid_t pid;
int ret;
int child_ret;
+
+ // vcmpequd used in vmx_asm.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
+ pid = fork();
FAIL_IF(pid == -1);
pid2 = fork();
diff --git a/tools/testing/selftests/powerpc/math/vsx_preempt.c b/tools/testing/selftests/powerpc/math/vsx_preempt.c
index 63de9c6e2cd3..d1601bb889d4 100644
--- a/tools/testing/selftests/powerpc/math/vsx_preempt.c
+++ b/tools/testing/selftests/powerpc/math/vsx_preempt.c
@@ -92,6 +92,8 @@ int test_preempt_vsx(void)
int i, rc, threads;
pthread_t *tids;
+ SKIP_IF(!have_hwcap(PPC_FEATURE_HAS_VSX));
+
threads = sysconf(_SC_NPROCESSORS_ONLN) * THREAD_FACTOR;
tids = malloc(threads * sizeof(pthread_t));
FAIL_IF(!tids);
diff --git a/tools/testing/selftests/powerpc/mce/.gitignore b/tools/testing/selftests/powerpc/mce/.gitignore
new file mode 100644
index 000000000000..f5921462a495
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/.gitignore
@@ -0,0 +1 @@
+inject-ra-err
diff --git a/tools/testing/selftests/powerpc/mce/Makefile b/tools/testing/selftests/powerpc/mce/Makefile
new file mode 100644
index 000000000000..2424513982d9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/Makefile
@@ -0,0 +1,7 @@
+#SPDX-License-Identifier: GPL-2.0-or-later
+
+TEST_GEN_PROGS := inject-ra-err
+
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c
diff --git a/tools/testing/selftests/powerpc/mce/inject-ra-err.c b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
new file mode 100644
index 000000000000..94323c34d9a6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/inject-ra-err.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "vas-api.h"
+#include "utils.h"
+
+static bool faulted;
+
+static void sigbus_handler(int n, siginfo_t *info, void *ctxt_v)
+{
+ ucontext_t *ctxt = (ucontext_t *)ctxt_v;
+ struct pt_regs *regs = ctxt->uc_mcontext.regs;
+
+ faulted = true;
+ regs->nip += 4;
+}
+
+static int test_ra_error(void)
+{
+ struct vas_tx_win_open_attr attr;
+ int fd, *paste_addr;
+ char *devname = "/dev/crypto/nx-gzip";
+ struct sigaction act = {
+ .sa_sigaction = sigbus_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+
+ memset(&attr, 0, sizeof(attr));
+ attr.version = 1;
+ attr.vas_id = 0;
+
+ SKIP_IF(access(devname, F_OK));
+
+ fd = open(devname, O_RDWR);
+ FAIL_IF(fd < 0);
+ FAIL_IF(ioctl(fd, VAS_TX_WIN_OPEN, &attr) < 0);
+ FAIL_IF(sigaction(SIGBUS, &act, NULL) != 0);
+
+ paste_addr = mmap(NULL, 4096, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0ULL);
+
+ /* The following assignment triggers exception */
+ mb();
+ *paste_addr = 1;
+ mb();
+
+ FAIL_IF(!faulted);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_ra_error, "inject-ra-err");
+}
+
diff --git a/tools/testing/selftests/powerpc/mce/vas-api.h b/tools/testing/selftests/powerpc/mce/vas-api.h
new file mode 120000
index 000000000000..1455c1bcd351
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mce/vas-api.h
@@ -0,0 +1 @@
+../../../../../arch/powerpc/include/uapi/asm/vas-api.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/mm/.gitignore b/tools/testing/selftests/powerpc/mm/.gitignore
index 2ca523255b1b..0df1a3afc5e2 100644
--- a/tools/testing/selftests/powerpc/mm/.gitignore
+++ b/tools/testing/selftests/powerpc/mm/.gitignore
@@ -1,10 +1,16 @@
# SPDX-License-Identifier: GPL-2.0-only
+bad_accesses
+exec_prot
hugetlb_vs_thp_test
-subpage_prot
-tempfile
+large_vm_fork_separation
+large_vm_gpr_corruption
+pkey_exec_prot
+pkey_siginfo
prot_sao
segv_errors
-wild_bctr
-large_vm_fork_separation
-bad_accesses
+stack_expansion_ldst
+stack_expansion_signal
+subpage_prot
+tempfile
tlbie_test
+wild_bctr
diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile
index b9103c4bb414..4a6608beef0e 100644
--- a/tools/testing/selftests/powerpc/mm/Makefile
+++ b/tools/testing/selftests/powerpc/mm/Makefile
@@ -3,22 +3,36 @@ noarg:
$(MAKE) -C ../
TEST_GEN_PROGS := hugetlb_vs_thp_test subpage_prot prot_sao segv_errors wild_bctr \
- large_vm_fork_separation bad_accesses
+ large_vm_fork_separation bad_accesses exec_prot pkey_exec_prot \
+ pkey_siginfo stack_expansion_signal stack_expansion_ldst \
+ large_vm_gpr_corruption
+TEST_PROGS := stress_code_patching.sh
+
TEST_GEN_PROGS_EXTENDED := tlbie_test
TEST_GEN_FILES := tempfile
top_srcdir = ../../../../..
include ../../lib.mk
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/prot_sao: ../utils.c
$(OUTPUT)/wild_bctr: CFLAGS += -m64
$(OUTPUT)/large_vm_fork_separation: CFLAGS += -m64
+$(OUTPUT)/large_vm_gpr_corruption: CFLAGS += -m64
$(OUTPUT)/bad_accesses: CFLAGS += -m64
+$(OUTPUT)/exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_exec_prot: CFLAGS += -m64
+$(OUTPUT)/pkey_siginfo: CFLAGS += -m64
+
+$(OUTPUT)/stack_expansion_signal: ../utils.c ../pmu/lib.c
+
+$(OUTPUT)/stack_expansion_ldst: CFLAGS += -fno-stack-protector
+$(OUTPUT)/stack_expansion_ldst: ../utils.c
$(OUTPUT)/tempfile:
- dd if=/dev/zero of=$@ bs=64k count=1
+ dd if=/dev/zero of=$@ bs=64k count=1 status=none
$(OUTPUT)/tlbie_test: LDLIBS += -lpthread
+$(OUTPUT)/pkey_siginfo: LDLIBS += -lpthread
diff --git a/tools/testing/selftests/powerpc/mm/bad_accesses.c b/tools/testing/selftests/powerpc/mm/bad_accesses.c
index adc465f499ef..65d2148b05dc 100644
--- a/tools/testing/selftests/powerpc/mm/bad_accesses.c
+++ b/tools/testing/selftests/powerpc/mm/bad_accesses.c
@@ -38,7 +38,7 @@ static void segv_handler(int n, siginfo_t *info, void *ctxt_v)
int bad_access(char *p, bool write)
{
- char x;
+ char x = 0;
fault_code = 0;
fault_addr = 0;
@@ -64,34 +64,6 @@ int bad_access(char *p, bool write)
return 0;
}
-static int using_hash_mmu(bool *using_hash)
-{
- char line[128];
- FILE *f;
- int rc;
-
- f = fopen("/proc/cpuinfo", "r");
- FAIL_IF(!f);
-
- rc = 0;
- while (fgets(line, sizeof(line), f) != NULL) {
- if (strcmp(line, "MMU : Hash\n") == 0) {
- *using_hash = true;
- goto out;
- }
-
- if (strcmp(line, "MMU : Radix\n") == 0) {
- *using_hash = false;
- goto out;
- }
- }
-
- rc = -1;
-out:
- fclose(f);
- return rc;
-}
-
static int test(void)
{
unsigned long i, j, addr, region_shift, page_shift, page_size;
@@ -167,5 +139,6 @@ static int test(void)
int main(void)
{
+ test_harness_set_timeout(300);
return test_harness(test, "bad_accesses");
}
diff --git a/tools/testing/selftests/powerpc/mm/exec_prot.c b/tools/testing/selftests/powerpc/mm/exec_prot.c
new file mode 100644
index 000000000000..db75b2225de1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/exec_prot.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2022, Nicholas Miehlbradt, IBM Corporation
+ * based on pkey_exec_prot.c
+ *
+ * Test if applying execute protection on pages works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_TRAP 0x7fe00008
+#define PPC_INST_BLR 0x4e800020
+
+static volatile sig_atomic_t fault_code;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+static bool pkeys_supported;
+
+static bool is_fault_expected(int fault_code)
+{
+ if (fault_code == SEGV_ACCERR)
+ return true;
+
+ /* Assume any pkey error is fine since pkey_exec_prot test covers them */
+ if (fault_code == SEGV_PKUERR && pkeys_supported)
+ return true;
+
+ return false;
+}
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *)fault_addr)
+ sigsafe_err("got a fault for an unexpected address\n");
+
+ _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ fault_code = sinfo->si_code;
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *)fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for a single test case */
+ if (!remaining_faults) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+
+ /* Restore permissions in order to continue */
+ if (is_fault_expected(fault_code)) {
+ if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE | PROT_EXEC)) {
+ sigsafe_err("failed to set access permissions\n");
+ _exit(1);
+ }
+ } else {
+ sigsafe_err("got a fault with an unexpected code\n");
+ _exit(1);
+ }
+
+ remaining_faults--;
+}
+
+static int check_exec_fault(int rights)
+{
+ /*
+ * Jump to the executable region.
+ *
+ * The first iteration also checks if the overwrite of the
+ * first instruction word from a trap to a no-op succeeded.
+ */
+ fault_code = -1;
+ remaining_faults = 0;
+ if (!(rights & PROT_EXEC))
+ remaining_faults = 1;
+
+ FAIL_IF(mprotect(insns, pgsize, rights) != 0);
+ asm volatile("mtctr %0; bctrl" : : "r"(insns));
+
+ FAIL_IF(remaining_faults != 0);
+ if (!(rights & PROT_EXEC))
+ FAIL_IF(!is_fault_expected(fault_code));
+
+ return 0;
+}
+
+static int test(void)
+{
+ struct sigaction segv_act, trap_act;
+ int i;
+
+ /* Skip the test if the CPU doesn't support Radix */
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_00));
+
+ /* Check if pkeys are supported */
+ pkeys_supported = pkeys_unsupported() == 0;
+
+ /* Setup SIGSEGV handler */
+ segv_act.sa_handler = 0;
+ segv_act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+ segv_act.sa_flags = SA_SIGINFO;
+ segv_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+ /* Setup SIGTRAP handler */
+ trap_act.sa_handler = 0;
+ trap_act.sa_sigaction = trap_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+ trap_act.sa_flags = SA_SIGINFO;
+ trap_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+ /* Setup executable region */
+ pgsize = getpagesize();
+ numinsns = pgsize / sizeof(unsigned int);
+ insns = (unsigned int *)mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(insns == MAP_FAILED);
+
+ /* Write the instruction words */
+ for (i = 1; i < numinsns - 1; i++)
+ insns[i] = PPC_INST_NOP;
+
+ /*
+ * Set the first instruction as an unconditional trap. If
+ * the last write to this address succeeds, this should
+ * get overwritten by a no-op.
+ */
+ insns[0] = PPC_INST_TRAP;
+
+ /*
+ * Later, to jump to the executable region, we use a branch
+ * and link instruction (bctrl) which sets the return address
+ * automatically in LR. Use that to return back.
+ */
+ insns[numinsns - 1] = PPC_INST_BLR;
+
+ /*
+ * Pick the first instruction's address from the executable
+ * region.
+ */
+ fault_addr = insns;
+
+ /*
+ * Read an instruction word from the address when the page
+ * is execute only. This should generate an access fault.
+ */
+ fault_code = -1;
+ remaining_faults = 1;
+ printf("Testing read on --x, should fault...");
+ FAIL_IF(mprotect(insns, pgsize, PROT_EXEC) != 0);
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0 || !is_fault_expected(fault_code));
+ printf("ok!\n");
+
+ /*
+ * Write an instruction word to the address when the page
+ * execute only. This should also generate an access fault.
+ */
+ fault_code = -1;
+ remaining_faults = 1;
+ printf("Testing write on --x, should fault...");
+ FAIL_IF(mprotect(insns, pgsize, PROT_EXEC) != 0);
+ *fault_addr = PPC_INST_NOP;
+ FAIL_IF(remaining_faults != 0 || !is_fault_expected(fault_code));
+ printf("ok!\n");
+
+ printf("Testing exec on ---, should fault...");
+ FAIL_IF(check_exec_fault(PROT_NONE));
+ printf("ok!\n");
+
+ printf("Testing exec on r--, should fault...");
+ FAIL_IF(check_exec_fault(PROT_READ));
+ printf("ok!\n");
+
+ printf("Testing exec on -w-, should fault...");
+ FAIL_IF(check_exec_fault(PROT_WRITE));
+ printf("ok!\n");
+
+ printf("Testing exec on rw-, should fault...");
+ FAIL_IF(check_exec_fault(PROT_READ | PROT_WRITE));
+ printf("ok!\n");
+
+ printf("Testing exec on --x, should succeed...");
+ FAIL_IF(check_exec_fault(PROT_EXEC));
+ printf("ok!\n");
+
+ printf("Testing exec on r-x, should succeed...");
+ FAIL_IF(check_exec_fault(PROT_READ | PROT_EXEC));
+ printf("ok!\n");
+
+ printf("Testing exec on -wx, should succeed...");
+ FAIL_IF(check_exec_fault(PROT_WRITE | PROT_EXEC));
+ printf("ok!\n");
+
+ printf("Testing exec on rwx, should succeed...");
+ FAIL_IF(check_exec_fault(PROT_READ | PROT_WRITE | PROT_EXEC));
+ printf("ok!\n");
+
+ /* Cleanup */
+ FAIL_IF(munmap((void *)insns, pgsize));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
new file mode 100644
index 000000000000..7da515f1da72
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/large_vm_gpr_corruption.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Copyright 2022, Michael Ellerman, IBM Corp.
+//
+// Test that the 4PB address space SLB handling doesn't corrupt userspace registers
+// (r9-r13) due to a SLB fault while saving the PPR.
+//
+// The bug was introduced in f384796c4 ("powerpc/mm: Add support for handling > 512TB
+// address in SLB miss") and fixed in 4c2de74cc869 ("powerpc/64: Interrupts save PPR on
+// stack rather than thread_struct").
+//
+// To hit the bug requires the task struct and kernel stack to be in different segments.
+// Usually that requires more than 1TB of RAM, or if that's not practical, boot the kernel
+// with "disable_1tb_segments".
+//
+// The test works by creating mappings above 512TB, to trigger the large address space
+// support. It creates 64 mappings, double the size of the SLB, to cause SLB faults on
+// each access (assuming naive replacement). It then loops over those mappings touching
+// each, and checks that r9-r13 aren't corrupted.
+//
+// It then forks another child and tries again, because a new child process will get a new
+// kernel stack and thread struct allocated, which may be more optimally placed to trigger
+// the bug. It would probably be better to leave the previous child processes hanging
+// around, so that kernel stack & thread struct allocations are not reused, but that would
+// amount to a 30 second fork bomb. The current design reliably triggers the bug on
+// unpatched kernels.
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#ifndef MAP_FIXED_NOREPLACE
+#define MAP_FIXED_NOREPLACE MAP_FIXED // "Should be safe" above 512TB
+#endif
+
+#define BASE_ADDRESS (1ul << 50) // 1PB
+#define STRIDE (2ul << 40) // 2TB
+#define SLB_SIZE 32
+#define NR_MAPPINGS (SLB_SIZE * 2)
+
+static volatile sig_atomic_t signaled;
+
+static void signal_handler(int sig)
+{
+ signaled = 1;
+}
+
+#define CHECK_REG(_reg) \
+ if (_reg != _reg##_orig) { \
+ printf(str(_reg) " corrupted! Expected 0x%lx != 0x%lx\n", _reg##_orig, \
+ _reg); \
+ _exit(1); \
+ }
+
+static int touch_mappings(void)
+{
+ unsigned long r9_orig, r10_orig, r11_orig, r12_orig, r13_orig;
+ unsigned long r9, r10, r11, r12, r13;
+ unsigned long addr, *p;
+ int i;
+
+ for (i = 0; i < NR_MAPPINGS; i++) {
+ addr = BASE_ADDRESS + (i * STRIDE);
+ p = (unsigned long *)addr;
+
+ asm volatile("mr %0, %%r9 ;" // Read original GPR values
+ "mr %1, %%r10 ;"
+ "mr %2, %%r11 ;"
+ "mr %3, %%r12 ;"
+ "mr %4, %%r13 ;"
+ "std %10, 0(%11) ;" // Trigger SLB fault
+ "mr %5, %%r9 ;" // Save possibly corrupted values
+ "mr %6, %%r10 ;"
+ "mr %7, %%r11 ;"
+ "mr %8, %%r12 ;"
+ "mr %9, %%r13 ;"
+ "mr %%r9, %0 ;" // Restore original values
+ "mr %%r10, %1 ;"
+ "mr %%r11, %2 ;"
+ "mr %%r12, %3 ;"
+ "mr %%r13, %4 ;"
+ : "=&b"(r9_orig), "=&b"(r10_orig), "=&b"(r11_orig),
+ "=&b"(r12_orig), "=&b"(r13_orig), "=&b"(r9), "=&b"(r10),
+ "=&b"(r11), "=&b"(r12), "=&b"(r13)
+ : "b"(i), "b"(p)
+ : "r9", "r10", "r11", "r12", "r13");
+
+ CHECK_REG(r9);
+ CHECK_REG(r10);
+ CHECK_REG(r11);
+ CHECK_REG(r12);
+ CHECK_REG(r13);
+ }
+
+ return 0;
+}
+
+static int test(void)
+{
+ unsigned long page_size, addr, *p;
+ struct sigaction action;
+ bool hash_mmu;
+ int i, status;
+ pid_t pid;
+
+ // This tests a hash MMU specific bug.
+ FAIL_IF(using_hash_mmu(&hash_mmu));
+ SKIP_IF(!hash_mmu);
+ // 4K kernels don't support 4PB address space
+ SKIP_IF(sysconf(_SC_PAGESIZE) < 65536);
+
+ page_size = sysconf(_SC_PAGESIZE);
+
+ for (i = 0; i < NR_MAPPINGS; i++) {
+ addr = BASE_ADDRESS + (i * STRIDE);
+
+ p = mmap((void *)addr, page_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED_NOREPLACE, -1, 0);
+ if (p == MAP_FAILED) {
+ perror("mmap");
+ printf("Error: couldn't mmap(), confirm kernel has 4PB support?\n");
+ return 1;
+ }
+ }
+
+ action.sa_handler = signal_handler;
+ action.sa_flags = SA_RESTART;
+ FAIL_IF(sigaction(SIGALRM, &action, NULL) < 0);
+
+ // Seen to always crash in under ~10s on affected kernels.
+ alarm(30);
+
+ while (!signaled) {
+ // Fork new processes, to increase the chance that we hit the case where
+ // the kernel stack and task struct are in different segments.
+ pid = fork();
+ if (pid == 0)
+ exit(touch_mappings());
+
+ FAIL_IF(waitpid(-1, &status, 0) == -1);
+ FAIL_IF(WIFSIGNALED(status));
+ FAIL_IF(!WIFEXITED(status));
+ FAIL_IF(WEXITSTATUS(status));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "large_vm_gpr_corruption");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
new file mode 100644
index 000000000000..0af4f02669a1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_exec_prot.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if applying execute protection on pages using memory
+ * protection keys works as expected.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_TRAP 0x7fe00008
+#define PPC_INST_BLR 0x4e800020
+
+static volatile sig_atomic_t fault_pkey, fault_code, fault_type;
+static volatile sig_atomic_t remaining_faults;
+static volatile unsigned int *fault_addr;
+static unsigned long pgsize, numinsns;
+static unsigned int *insns;
+
+static void trap_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr)
+ sigsafe_err("got a fault for an unexpected address\n");
+
+ _exit(1);
+}
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ int signal_pkey;
+
+ signal_pkey = siginfo_pkey(sinfo);
+ fault_code = sinfo->si_code;
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for a single test case */
+ if (!remaining_faults) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+
+ /* Restore permissions in order to continue */
+ switch (fault_code) {
+ case SEGV_ACCERR:
+ if (mprotect(insns, pgsize, PROT_READ | PROT_WRITE)) {
+ sigsafe_err("failed to set access permissions\n");
+ _exit(1);
+ }
+ break;
+ case SEGV_PKUERR:
+ if (signal_pkey != fault_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ switch (fault_type) {
+ case PKEY_DISABLE_ACCESS:
+ pkey_set_rights(fault_pkey, 0);
+ break;
+ case PKEY_DISABLE_EXECUTE:
+ /*
+ * Reassociate the exec-only pkey with the region
+ * to be able to continue. Unlike AMR, we cannot
+ * set IAMR directly from userspace to restore the
+ * permissions.
+ */
+ if (mprotect(insns, pgsize, PROT_EXEC)) {
+ sigsafe_err("failed to set execute permissions\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected type\n");
+ _exit(1);
+ }
+ break;
+ default:
+ sigsafe_err("got a fault with an unexpected code\n");
+ _exit(1);
+ }
+
+ remaining_faults--;
+}
+
+static int test(void)
+{
+ struct sigaction segv_act, trap_act;
+ unsigned long rights;
+ int pkey, ret, i;
+
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Setup SIGSEGV handler */
+ segv_act.sa_handler = 0;
+ segv_act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &segv_act.sa_mask) != 0);
+ segv_act.sa_flags = SA_SIGINFO;
+ segv_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &segv_act, NULL) != 0);
+
+ /* Setup SIGTRAP handler */
+ trap_act.sa_handler = 0;
+ trap_act.sa_sigaction = trap_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &trap_act.sa_mask) != 0);
+ trap_act.sa_flags = SA_SIGINFO;
+ trap_act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGTRAP, &trap_act, NULL) != 0);
+
+ /* Setup executable region */
+ pgsize = getpagesize();
+ numinsns = pgsize / sizeof(unsigned int);
+ insns = (unsigned int *) mmap(NULL, pgsize, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(insns == MAP_FAILED);
+
+ /* Write the instruction words */
+ for (i = 1; i < numinsns - 1; i++)
+ insns[i] = PPC_INST_NOP;
+
+ /*
+ * Set the first instruction as an unconditional trap. If
+ * the last write to this address succeeds, this should
+ * get overwritten by a no-op.
+ */
+ insns[0] = PPC_INST_TRAP;
+
+ /*
+ * Later, to jump to the executable region, we use a branch
+ * and link instruction (bctrl) which sets the return address
+ * automatically in LR. Use that to return back.
+ */
+ insns[numinsns - 1] = PPC_INST_BLR;
+
+ /* Allocate a pkey that restricts execution */
+ rights = PKEY_DISABLE_EXECUTE;
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Pick the first instruction's address from the executable
+ * region.
+ */
+ fault_addr = insns;
+
+ /* The following two cases will avoid SEGV_PKUERR */
+ fault_type = -1;
+ fault_pkey = -1;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should not generate a fault as having PROT_EXEC
+ * implies PROT_READ on GNU systems. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 0;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are not set i.e. the pkey permits both read and write
+ * access.
+ *
+ * This should generate an access fault as having just
+ * PROT_EXEC also restricts writes. The pkey currently
+ * restricts execution only based on the IAMR bits. The
+ * AMR bits are cleared.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_TRAP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* The following three cases will generate SEGV_PKUERR */
+ rights |= PKEY_DISABLE_ACCESS;
+ fault_type = PKEY_DISABLE_ACCESS;
+ fault_pkey = pkey;
+
+ /*
+ * Read an instruction word from the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate a pkey fault based on AMR bits only
+ * as having PROT_EXEC implicitly allows reads.
+ */
+ remaining_faults = 1;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("read from %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ i = *fault_addr;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_PKUERR);
+
+ /*
+ * Write an instruction word to the address when AMR bits
+ * are set i.e. the pkey permits neither read nor write
+ * access.
+ *
+ * This should generate two faults. First, a pkey fault
+ * based on AMR bits and then an access fault since
+ * PROT_EXEC does not allow writes.
+ */
+ remaining_faults = 2;
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ pkey_set_rights(pkey, rights);
+ printf("write to %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ *fault_addr = PPC_INST_NOP;
+ FAIL_IF(remaining_faults != 0 || fault_code != SEGV_ACCERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ rights = 0;
+ do {
+ /*
+ * Allocate pkeys with all valid combinations of read,
+ * write and execute restrictions.
+ */
+ pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF(pkey < 0);
+
+ /*
+ * Jump to the executable region. AMR bits may or may not
+ * be set but they should not affect execution.
+ *
+ * This should generate pkey faults based on IAMR bits which
+ * may be set to restrict execution.
+ *
+ * The first iteration also checks if the overwrite of the
+ * first instruction word from a trap to a no-op succeeded.
+ */
+ fault_pkey = pkey;
+ fault_type = -1;
+ remaining_faults = 0;
+ if (rights & PKEY_DISABLE_EXECUTE) {
+ fault_type = PKEY_DISABLE_EXECUTE;
+ remaining_faults = 1;
+ }
+
+ FAIL_IF(sys_pkey_mprotect(insns, pgsize, PROT_EXEC, pkey) != 0);
+ printf("execute at %p, pkey permissions are %s\n", fault_addr,
+ pkey_rights(rights));
+ asm volatile("mtctr %0; bctrl" : : "r"(insns));
+ FAIL_IF(remaining_faults != 0);
+ if (rights & PKEY_DISABLE_EXECUTE)
+ FAIL_IF(fault_code != SEGV_PKUERR);
+
+ /* Free the current pkey */
+ sys_pkey_free(pkey);
+
+ /* Find next valid combination of pkey rights */
+ rights = next_pkey_rights(rights);
+ } while (rights);
+
+ /* Cleanup */
+ munmap((void *) insns, pgsize);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "pkey_exec_prot");
+}
diff --git a/tools/testing/selftests/powerpc/mm/pkey_siginfo.c b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
new file mode 100644
index 000000000000..2db76e56d4cb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/pkey_siginfo.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Copyright 2020, Sandipan Das, IBM Corp.
+ *
+ * Test if the signal information reports the correct memory protection
+ * key upon getting a key access violation fault for a page that was
+ * attempted to be protected by two different keys from two competing
+ * threads at the same time.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <signal.h>
+
+#include <unistd.h>
+#include <pthread.h>
+#include <sys/mman.h>
+
+#include "pkeys.h"
+
+#define PPC_INST_NOP 0x60000000
+#define PPC_INST_BLR 0x4e800020
+#define PROT_RWX (PROT_READ | PROT_WRITE | PROT_EXEC)
+
+#define NUM_ITERATIONS 1000000
+
+static volatile sig_atomic_t perm_pkey, rest_pkey;
+static volatile sig_atomic_t rights, fault_count;
+static volatile unsigned int *volatile fault_addr;
+static pthread_barrier_t iteration_barrier;
+
+static void segv_handler(int signum, siginfo_t *sinfo, void *ctx)
+{
+ void *pgstart;
+ size_t pgsize;
+ int pkey;
+
+ pkey = siginfo_pkey(sinfo);
+
+ /* Check if this fault originated from a pkey access violation */
+ if (sinfo->si_code != SEGV_PKUERR) {
+ sigsafe_err("got a fault for an unexpected reason\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the expected address */
+ if (sinfo->si_addr != (void *) fault_addr) {
+ sigsafe_err("got a fault for an unexpected address\n");
+ _exit(1);
+ }
+
+ /* Check if this fault originated from the restrictive pkey */
+ if (pkey != rest_pkey) {
+ sigsafe_err("got a fault for an unexpected pkey\n");
+ _exit(1);
+ }
+
+ /* Check if too many faults have occurred for the same iteration */
+ if (fault_count > 0) {
+ sigsafe_err("got too many faults for the same address\n");
+ _exit(1);
+ }
+
+ pgsize = getpagesize();
+ pgstart = (void *) ((unsigned long) fault_addr & ~(pgsize - 1));
+
+ /*
+ * If the current fault occurred due to lack of execute rights,
+ * reassociate the page with the exec-only pkey since execute
+ * rights cannot be changed directly for the faulting pkey as
+ * IAMR is inaccessible from userspace.
+ *
+ * Otherwise, if the current fault occurred due to lack of
+ * read-write rights, change the AMR permission bits for the
+ * pkey.
+ *
+ * This will let the test continue.
+ */
+ if (rights == PKEY_DISABLE_EXECUTE &&
+ mprotect(pgstart, pgsize, PROT_EXEC))
+ _exit(1);
+ else
+ pkey_set_rights(pkey, 0);
+
+ fault_count++;
+}
+
+struct region {
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+};
+
+static void *protect(void *p)
+{
+ unsigned long rights;
+ unsigned int *base;
+ size_t size;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ FAIL_IF_EXIT(!base);
+
+ /* No read, write and execute restrictions */
+ rights = 0;
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+
+ /* Allocate the permissive pkey */
+ perm_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(perm_pkey < 0);
+
+ /*
+ * Repeatedly try to protect the common region with a permissive
+ * pkey
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * restrictive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the permissive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ perm_pkey));
+ }
+
+ /* Free the permissive pkey */
+ sys_pkey_free(perm_pkey);
+
+ return NULL;
+}
+
+static void *protect_access(void *p)
+{
+ size_t size, numinsns;
+ unsigned int *base;
+ int tid, i;
+
+ tid = gettid();
+ base = ((struct region *) p)->base;
+ size = ((struct region *) p)->size;
+ rights = ((struct region *) p)->rights;
+ numinsns = size / sizeof(base[0]);
+ FAIL_IF_EXIT(!base);
+
+ /* Allocate the restrictive pkey */
+ rest_pkey = sys_pkey_alloc(0, rights);
+ FAIL_IF_EXIT(rest_pkey < 0);
+
+ printf("tid %d, pkey permissions are %s\n", tid, pkey_rights(rights));
+ printf("tid %d, %s randomly in range [%p, %p]\n", tid,
+ (rights == PKEY_DISABLE_EXECUTE) ? "execute" :
+ (rights == PKEY_DISABLE_WRITE) ? "write" : "read",
+ base, base + numinsns);
+
+ /*
+ * Repeatedly try to protect the common region with a restrictive
+ * pkey and read, write or execute from it
+ */
+ for (i = 0; i < NUM_ITERATIONS; i++) {
+ /*
+ * Wait until the other thread has finished allocating the
+ * permissive pkey or until the next iteration has begun
+ */
+ pthread_barrier_wait(&iteration_barrier);
+
+ /* Try to associate the restrictive pkey with the region */
+ FAIL_IF_EXIT(sys_pkey_mprotect(base, size, PROT_RWX,
+ rest_pkey));
+
+ /* Choose a random instruction word address from the region */
+ fault_addr = base + (rand() % numinsns);
+ fault_count = 0;
+
+ switch (rights) {
+ /* Read protection test */
+ case PKEY_DISABLE_ACCESS:
+ /*
+ * Read an instruction word from the region and
+ * verify if it has not been overwritten to
+ * something unexpected
+ */
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_NOP &&
+ *fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Write protection test */
+ case PKEY_DISABLE_WRITE:
+ /*
+ * Write an instruction word to the region and
+ * verify if the overwrite has succeeded
+ */
+ *fault_addr = PPC_INST_BLR;
+ FAIL_IF_EXIT(*fault_addr != PPC_INST_BLR);
+ break;
+
+ /* Execute protection test */
+ case PKEY_DISABLE_EXECUTE:
+ /* Jump to the region and execute instructions */
+ asm volatile(
+ "mtctr %0; bctrl"
+ : : "r"(fault_addr) : "ctr", "lr");
+ break;
+ }
+
+ /*
+ * Restore the restrictions originally imposed by the
+ * restrictive pkey as the signal handler would have
+ * cleared out the corresponding AMR bits
+ */
+ pkey_set_rights(rest_pkey, rights);
+ }
+
+ /* Free restrictive pkey */
+ sys_pkey_free(rest_pkey);
+
+ return NULL;
+}
+
+static void reset_pkeys(unsigned long rights)
+{
+ int pkeys[NR_PKEYS], i;
+
+ /* Exhaustively allocate all available pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ pkeys[i] = sys_pkey_alloc(0, rights);
+
+ /* Free all allocated pkeys */
+ for (i = 0; i < NR_PKEYS; i++)
+ sys_pkey_free(pkeys[i]);
+}
+
+static int test(void)
+{
+ pthread_t prot_thread, pacc_thread;
+ struct sigaction act;
+ pthread_attr_t attr;
+ size_t numinsns;
+ struct region r;
+ int ret, i;
+
+ srand(time(NULL));
+ ret = pkeys_unsupported();
+ if (ret)
+ return ret;
+
+ /* Allocate the region */
+ r.size = getpagesize();
+ r.base = mmap(NULL, r.size, PROT_RWX,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ FAIL_IF(r.base == MAP_FAILED);
+
+ /*
+ * Fill the region with no-ops with a branch at the end
+ * for returning to the caller
+ */
+ numinsns = r.size / sizeof(r.base[0]);
+ for (i = 0; i < numinsns - 1; i++)
+ r.base[i] = PPC_INST_NOP;
+ r.base[i] = PPC_INST_BLR;
+
+ /* Setup SIGSEGV handler */
+ act.sa_handler = 0;
+ act.sa_sigaction = segv_handler;
+ FAIL_IF(sigprocmask(SIG_SETMASK, 0, &act.sa_mask) != 0);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_restorer = 0;
+ FAIL_IF(sigaction(SIGSEGV, &act, NULL) != 0);
+
+ /*
+ * For these tests, the parent process should clear all bits of
+ * AMR and IAMR, i.e. impose no restrictions, for all available
+ * pkeys. This will be the base for the initial AMR and IAMR
+ * values for all the test thread pairs.
+ *
+ * If the AMR and IAMR bits of all available pkeys are cleared
+ * before running the tests and a fault is generated when
+ * attempting to read, write or execute instructions from a
+ * pkey protected region, the pkey responsible for this must be
+ * the one from the protect-and-access thread since the other
+ * one is fully permissive. Despite that, if the pkey reported
+ * by siginfo is not the restrictive pkey, then there must be a
+ * kernel bug.
+ */
+ reset_pkeys(0);
+
+ /* Setup barrier for protect and protect-and-access threads */
+ FAIL_IF(pthread_attr_init(&attr) != 0);
+ FAIL_IF(pthread_barrier_init(&iteration_barrier, NULL, 2) != 0);
+
+ /* Setup and start protect and protect-and-read threads */
+ puts("starting thread pair (protect, protect-and-read)");
+ r.rights = PKEY_DISABLE_ACCESS;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-write threads */
+ puts("starting thread pair (protect, protect-and-write)");
+ r.rights = PKEY_DISABLE_WRITE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Setup and start protect and protect-and-execute threads */
+ puts("starting thread pair (protect, protect-and-execute)");
+ r.rights = PKEY_DISABLE_EXECUTE;
+ FAIL_IF(pthread_create(&prot_thread, &attr, &protect, &r) != 0);
+ FAIL_IF(pthread_create(&pacc_thread, &attr, &protect_access, &r) != 0);
+ FAIL_IF(pthread_join(prot_thread, NULL) != 0);
+ FAIL_IF(pthread_join(pacc_thread, NULL) != 0);
+
+ /* Cleanup */
+ FAIL_IF(pthread_attr_destroy(&attr) != 0);
+ FAIL_IF(pthread_barrier_destroy(&iteration_barrier) != 0);
+ munmap(r.base, r.size);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "pkey_siginfo");
+}
diff --git a/tools/testing/selftests/powerpc/mm/prot_sao.c b/tools/testing/selftests/powerpc/mm/prot_sao.c
index e2eed65b7735..30b71b1d78d5 100644
--- a/tools/testing/selftests/powerpc/mm/prot_sao.c
+++ b/tools/testing/selftests/powerpc/mm/prot_sao.c
@@ -7,6 +7,7 @@
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
+#include <unistd.h>
#include <asm/cputable.h>
@@ -18,8 +19,13 @@ int test_prot_sao(void)
{
char *p;
- /* 2.06 or later should support SAO */
- SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+ /*
+ * SAO was introduced in 2.06 and removed in 3.1. It's disabled in
+ * guests/LPARs by default, so also skip if we are running in a guest.
+ */
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06) ||
+ have_hwcap2(PPC_FEATURE2_ARCH_3_1) ||
+ access("/proc/device-tree/rtas/ibm,hypertas-functions", F_OK) == 0);
/*
* Ensure we can ask for PROT_SAO.
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
new file mode 100644
index 000000000000..ed9143990888
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that loads/stores expand the stack segment, or trigger a SEGV, in
+ * various conditions.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#undef NDEBUG
+#include <assert.h>
+
+#include <err.h>
+#include <errno.h>
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+volatile char *stack_top_ptr;
+volatile unsigned long stack_top_sp;
+volatile char c;
+
+enum access_type {
+ LOAD,
+ STORE,
+};
+
+/*
+ * Consume stack until the stack pointer is below @target_sp, then do an access
+ * (load or store) at offset @delta from either the base of the stack or the
+ * current stack pointer.
+ */
+__attribute__ ((noinline))
+int consume_stack(unsigned long target_sp, unsigned long stack_high, int delta, enum access_type type)
+{
+ unsigned long target;
+ char stack_cur;
+
+ if ((unsigned long)&stack_cur > target_sp)
+ return consume_stack(target_sp, stack_high, delta, type);
+ else {
+ // We don't really need this, but without it GCC might not
+ // generate a recursive call above.
+ stack_top_ptr = &stack_cur;
+
+#ifdef __powerpc__
+ asm volatile ("mr %[sp], %%r1" : [sp] "=r" (stack_top_sp));
+#else
+ asm volatile ("mov %%rsp, %[sp]" : [sp] "=r" (stack_top_sp));
+#endif
+ target = stack_high - delta + 1;
+ volatile char *p = (char *)target;
+
+ if (type == STORE)
+ *p = c;
+ else
+ c = *p;
+
+ // Do something to prevent the stack frame being popped prior to
+ // our access above.
+ getpid();
+ }
+
+ return 0;
+}
+
+static int search_proc_maps(char *needle, unsigned long *low, unsigned long *high)
+{
+ unsigned long start, end;
+ static char buf[4096];
+ char name[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/self/maps", "r");
+ if (!f) {
+ perror("fopen");
+ return -1;
+ }
+
+ while (fgets(buf, sizeof(buf), f)) {
+ rc = sscanf(buf, "%lx-%lx %*c%*c%*c%*c %*x %*d:%*d %*d %127s\n",
+ &start, &end, name);
+ if (rc == 2)
+ continue;
+
+ if (rc != 3) {
+ printf("sscanf errored\n");
+ rc = -1;
+ break;
+ }
+
+ if (strstr(name, needle)) {
+ *low = start;
+ *high = end - 1;
+ rc = 0;
+ break;
+ }
+ }
+
+ fclose(f);
+
+ return rc;
+}
+
+int child(unsigned int stack_used, int delta, enum access_type type)
+{
+ unsigned long low, stack_high;
+
+ assert(search_proc_maps("[stack]", &low, &stack_high) == 0);
+
+ assert(consume_stack(stack_high - stack_used, stack_high, delta, type) == 0);
+
+ printf("Access OK: %s delta %-7d used size 0x%06x stack high 0x%lx top_ptr %p top sp 0x%lx actual used 0x%lx\n",
+ type == LOAD ? "load" : "store", delta, stack_used, stack_high,
+ stack_top_ptr, stack_top_sp, stack_high - stack_top_sp + 1);
+
+ return 0;
+}
+
+static int test_one(unsigned int stack_used, int delta, enum access_type type)
+{
+ pid_t pid;
+ int rc;
+
+ pid = fork();
+ if (pid == 0)
+ exit(child(stack_used, delta, type));
+
+ assert(waitpid(pid, &rc, 0) != -1);
+
+ if (WIFEXITED(rc) && WEXITSTATUS(rc) == 0)
+ return 0;
+
+ // We don't expect a non-zero exit that's not a signal
+ assert(!WIFEXITED(rc));
+
+ printf("Faulted: %s delta %-7d used size 0x%06x signal %d\n",
+ type == LOAD ? "load" : "store", delta, stack_used,
+ WTERMSIG(rc));
+
+ return 1;
+}
+
+// This is fairly arbitrary but is well below any of the targets below,
+// so that the delta between the stack pointer and the target is large.
+#define DEFAULT_SIZE (32 * _KB)
+
+static void test_one_type(enum access_type type, unsigned long page_size, unsigned long rlim_cur)
+{
+ unsigned long delta;
+
+ // We should be able to access anywhere within the rlimit
+ for (delta = page_size; delta <= rlim_cur; delta += page_size)
+ assert(test_one(DEFAULT_SIZE, delta, type) == 0);
+
+ assert(test_one(DEFAULT_SIZE, rlim_cur, type) == 0);
+
+ // But if we go past the rlimit it should fail
+ assert(test_one(DEFAULT_SIZE, rlim_cur + 1, type) != 0);
+}
+
+static int test(void)
+{
+ unsigned long page_size;
+ struct rlimit rlimit;
+
+ page_size = getpagesize();
+ getrlimit(RLIMIT_STACK, &rlimit);
+ printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur);
+
+ printf("Testing loads ...\n");
+ test_one_type(LOAD, page_size, rlimit.rlim_cur);
+ printf("Testing stores ...\n");
+ test_one_type(STORE, page_size, rlimit.rlim_cur);
+
+ printf("All OK\n");
+
+ return 0;
+}
+
+#ifdef __powerpc__
+#include "utils.h"
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_ldst");
+}
+#else
+int main(void)
+{
+ return test();
+}
+#endif
diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
new file mode 100644
index 000000000000..c8b32a29e274
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stack_expansion_signal.c
@@ -0,0 +1,118 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that signal delivery is able to expand the stack segment without
+ * triggering a SEGV.
+ *
+ * Based on test code by Tom Lane.
+ */
+
+#include <err.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "../pmu/lib.h"
+#include "utils.h"
+
+#define _KB (1024)
+#define _MB (1024 * 1024)
+
+static char *stack_base_ptr;
+static char *stack_top_ptr;
+
+static volatile sig_atomic_t sig_occurred = 0;
+
+static void sigusr1_handler(int signal_arg)
+{
+ sig_occurred = 1;
+}
+
+static int consume_stack(unsigned int stack_size, union pipe write_pipe)
+{
+ char stack_cur;
+
+ if ((stack_base_ptr - &stack_cur) < stack_size)
+ return consume_stack(stack_size, write_pipe);
+ else {
+ stack_top_ptr = &stack_cur;
+
+ FAIL_IF(notify_parent(write_pipe));
+
+ while (!sig_occurred)
+ barrier();
+ }
+
+ return 0;
+}
+
+static int child(unsigned int stack_size, union pipe write_pipe)
+{
+ struct sigaction act;
+ char stack_base;
+
+ act.sa_handler = sigusr1_handler;
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = 0;
+ if (sigaction(SIGUSR1, &act, NULL) < 0)
+ err(1, "sigaction");
+
+ stack_base_ptr = (char *) (((size_t) &stack_base + 65535) & ~65535UL);
+
+ FAIL_IF(consume_stack(stack_size, write_pipe));
+
+ printf("size 0x%06x: OK, stack base %p top %p (%zx used)\n",
+ stack_size, stack_base_ptr, stack_top_ptr,
+ stack_base_ptr - stack_top_ptr);
+
+ return 0;
+}
+
+static int test_one_size(unsigned int stack_size)
+{
+ union pipe read_pipe, write_pipe;
+ pid_t pid;
+
+ FAIL_IF(pipe(read_pipe.fds) == -1);
+ FAIL_IF(pipe(write_pipe.fds) == -1);
+
+ pid = fork();
+ if (pid == 0) {
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+ exit(child(stack_size, read_pipe));
+ }
+
+ close(read_pipe.write_fd);
+ close(write_pipe.read_fd);
+ FAIL_IF(sync_with_child(read_pipe, write_pipe));
+
+ kill(pid, SIGUSR1);
+
+ FAIL_IF(wait_for_child(pid));
+
+ close(read_pipe.read_fd);
+ close(write_pipe.write_fd);
+
+ return 0;
+}
+
+int test(void)
+{
+ unsigned int i, size;
+
+ // Test with used stack from 1MB - 64K to 1MB + 64K
+ // Increment by 64 to get more coverage of odd sizes
+ for (i = 0; i < (128 * _KB); i += 64) {
+ size = i + (1 * _MB) - (64 * _KB);
+ FAIL_IF(test_one_size(size));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test, "stack_expansion_signal");
+}
diff --git a/tools/testing/selftests/powerpc/mm/stress_code_patching.sh b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh
new file mode 100755
index 000000000000..e454509659f6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/mm/stress_code_patching.sh
@@ -0,0 +1,49 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+TIMEOUT=30
+
+DEBUFS_DIR=`cat /proc/mounts | grep debugfs | awk '{print $2}'`
+if [ ! -e "$DEBUFS_DIR" ]
+then
+ echo "debugfs not found, skipping" 1>&2
+ exit 4
+fi
+
+if [ ! -e "$DEBUFS_DIR/tracing/current_tracer" ]
+then
+ echo "Tracing files not found, skipping" 1>&2
+ exit 4
+fi
+
+
+echo "Testing for spurious faults when mapping kernel memory..."
+
+if grep -q "FUNCTION TRACING IS CORRUPTED" "$DEBUFS_DIR/tracing/trace"
+then
+ echo "FAILED: Ftrace already dead. Probably due to a spurious fault" 1>&2
+ exit 1
+fi
+
+dmesg -C
+START_TIME=`date +%s`
+END_TIME=`expr $START_TIME + $TIMEOUT`
+while [ `date +%s` -lt $END_TIME ]
+do
+ echo function > $DEBUFS_DIR/tracing/current_tracer
+ echo nop > $DEBUFS_DIR/tracing/current_tracer
+ if dmesg | grep -q 'ftrace bug'
+ then
+ break
+ fi
+done
+
+echo nop > $DEBUFS_DIR/tracing/current_tracer
+if dmesg | grep -q 'ftrace bug'
+then
+ echo "FAILED: Mapping kernel memory causes spurious faults" 1>&2
+ exit 1
+else
+ echo "OK: Mapping kernel memory does not cause spurious faults"
+ exit 0
+fi
diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c
index f85a0938ab25..48344a74b212 100644
--- a/tools/testing/selftests/powerpc/mm/tlbie_test.c
+++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c
@@ -33,7 +33,6 @@
#include <sched.h>
#include <time.h>
#include <stdarg.h>
-#include <sched.h>
#include <pthread.h>
#include <signal.h>
#include <sys/prctl.h>
diff --git a/tools/testing/selftests/powerpc/nx-gzip/.gitignore b/tools/testing/selftests/powerpc/nx-gzip/.gitignore
new file mode 100644
index 000000000000..886d522d52df
--- /dev/null
+++ b/tools/testing/selftests/powerpc/nx-gzip/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+gunz_test
+gzfht_test
diff --git a/tools/testing/selftests/powerpc/nx-gzip/Makefile b/tools/testing/selftests/powerpc/nx-gzip/Makefile
index 640fad6cc2c7..0785c2e99d40 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/Makefile
+++ b/tools/testing/selftests/powerpc/nx-gzip/Makefile
@@ -1,8 +1,8 @@
-CFLAGS = -O3 -m64 -I./include
+CFLAGS = -O3 -m64 -I./include -I../include
TEST_GEN_FILES := gzfht_test gunz_test
TEST_PROGS := nx-gzip-test.sh
include ../../lib.mk
-$(TEST_GEN_FILES): gzip_vas.c
+$(TEST_GEN_FILES): gzip_vas.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
index 6ee0fded0391..7c23d3dd7d6d 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gunz_test.c
@@ -698,13 +698,13 @@ restart_nx:
switch (cc) {
- case ERR_NX_TRANSLATION:
+ case ERR_NX_AT_FAULT:
/* We touched the pages ahead of time. In the most common case
* we shouldn't be here. But may be some pages were paged out.
* Kernel should have placed the faulting address to fsaddr.
*/
- NXPRT(fprintf(stderr, "ERR_NX_TRANSLATION %p\n",
+ NXPRT(fprintf(stderr, "ERR_NX_AT_FAULT %p\n",
(void *)cmdp->crb.csb.fsaddr));
if (pgfault_retries == NX_MAX_FAULTS) {
diff --git a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
index 7496a83f9c9d..4de079923ccb 100644
--- a/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
+++ b/tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c
@@ -60,6 +60,7 @@
#include <assert.h>
#include <errno.h>
#include <signal.h>
+#include "utils.h"
#include "nxu.h"
#include "nx.h"
@@ -70,6 +71,8 @@ FILE *nx_gzip_log;
#define FNAME_MAX 1024
#define FEXT ".nx.gz"
+#define SYSFS_MAX_REQ_BUF_PATH "devices/vio/ibm,compression-v1/nx_gzip_caps/req_max_processed_len"
+
/*
* LZ counts returned in the user supplied nx_gzip_crb_cpb_t structure.
*/
@@ -140,54 +143,6 @@ int gzip_header_blank(char *buf)
return i;
}
-/* Caller must free the allocated buffer return nonzero on error. */
-int read_alloc_input_file(char *fname, char **buf, size_t *bufsize)
-{
- struct stat statbuf;
- FILE *fp;
- char *p;
- size_t num_bytes;
-
- if (stat(fname, &statbuf)) {
- perror(fname);
- return(-1);
- }
- fp = fopen(fname, "r");
- if (fp == NULL) {
- perror(fname);
- return(-1);
- }
- assert(NULL != (p = (char *) malloc(statbuf.st_size)));
- num_bytes = fread(p, 1, statbuf.st_size, fp);
- if (ferror(fp) || (num_bytes != statbuf.st_size)) {
- perror(fname);
- return(-1);
- }
- *buf = p;
- *bufsize = num_bytes;
- return 0;
-}
-
-/* Returns nonzero on error */
-int write_output_file(char *fname, char *buf, size_t bufsize)
-{
- FILE *fp;
- size_t num_bytes;
-
- fp = fopen(fname, "w");
- if (fp == NULL) {
- perror(fname);
- return(-1);
- }
- num_bytes = fwrite(buf, 1, bufsize, fp);
- if (ferror(fp) || (num_bytes != bufsize)) {
- perror(fname);
- return(-1);
- }
- fclose(fp);
- return 0;
-}
-
/*
* Z_SYNC_FLUSH as described in zlib.h.
* Returns number of appended bytes
@@ -244,6 +199,7 @@ int compress_file(int argc, char **argv, void *handle)
struct nx_gzip_crb_cpb_t *cmdp;
uint32_t pagelen = 65536;
int fault_tries = NX_MAX_FAULTS;
+ char buf[32];
cmdp = (void *)(uintptr_t)
aligned_alloc(sizeof(struct nx_gzip_crb_cpb_t),
@@ -253,7 +209,7 @@ int compress_file(int argc, char **argv, void *handle)
fprintf(stderr, "usage: %s <fname>\n", argv[0]);
exit(-1);
}
- if (read_alloc_input_file(argv[1], &inbuf, &inlen))
+ if (read_file_alloc(argv[1], &inbuf, &inlen))
exit(-1);
fprintf(stderr, "file %s read, %ld bytes\n", argv[1], inlen);
@@ -263,8 +219,17 @@ int compress_file(int argc, char **argv, void *handle)
assert(NULL != (outbuf = (char *)malloc(outlen)));
nxu_touch_pages(outbuf, outlen, pagelen, 1);
- /* Compress piecemeal in smallish chunks */
- chunk = 1<<22;
+ /*
+ * On PowerVM, the hypervisor defines the maximum request buffer
+ * size is defined and this value is available via sysfs.
+ */
+ if (!read_sysfs_file(SYSFS_MAX_REQ_BUF_PATH, buf, sizeof(buf))) {
+ chunk = atoi(buf);
+ } else {
+ /* sysfs entry is not available on PowerNV */
+ /* Compress piecemeal in smallish chunks */
+ chunk = 1<<22;
+ }
/* Write the gzip header to the stream */
num_hdr_bytes = gzip_header_blank(outbuf);
@@ -306,13 +271,13 @@ int compress_file(int argc, char **argv, void *handle)
lzcounts, cmdp, handle);
if (cc != ERR_NX_OK && cc != ERR_NX_TPBC_GT_SPBC &&
- cc != ERR_NX_TRANSLATION) {
+ cc != ERR_NX_AT_FAULT) {
fprintf(stderr, "nx error: cc= %d\n", cc);
exit(-1);
}
/* Page faults are handled by the user code */
- if (cc == ERR_NX_TRANSLATION) {
+ if (cc == ERR_NX_AT_FAULT) {
NXPRT(fprintf(stderr, "page fault: cc= %d, ", cc));
NXPRT(fprintf(stderr, "try= %d, fsa= %08llx\n",
fault_tries,
@@ -324,7 +289,7 @@ int compress_file(int argc, char **argv, void *handle)
fprintf(stderr, "error: cannot progress; ");
fprintf(stderr, "too many faults\n");
exit(-1);
- };
+ }
}
fault_tries = NX_MAX_FAULTS; /* Reset for the next chunk */
@@ -386,7 +351,7 @@ int compress_file(int argc, char **argv, void *handle)
assert(FNAME_MAX > (strlen(argv[1]) + strlen(FEXT)));
strcpy(outname, argv[1]);
strcat(outname, FEXT);
- if (write_output_file(outname, outbuf, dsttotlen)) {
+ if (write_file(outname, outbuf, dsttotlen)) {
fprintf(stderr, "write error: %s\n", outname);
exit(-1);
}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore b/tools/testing/selftests/powerpc/papr_attributes/.gitignore
index d65462d64816..d5f42b6d9e99 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/.gitignore
+++ b/tools/testing/selftests/powerpc/papr_attributes/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-*.out
+attr_test
diff --git a/tools/testing/selftests/powerpc/papr_attributes/Makefile b/tools/testing/selftests/powerpc/papr_attributes/Makefile
new file mode 100644
index 000000000000..e899712d49db
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_attributes/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_GEN_PROGS := attr_test
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/papr_attributes/attr_test.c b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c
new file mode 100644
index 000000000000..9b655be641c9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_attributes/attr_test.c
@@ -0,0 +1,113 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * PAPR Energy attributes sniff test
+ * This checks if the papr folders and contents are populated relating to
+ * the energy and frequency attributes
+ *
+ * Copyright 2022, Pratik Rajesh Sampat, IBM Corp.
+ */
+
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <dirent.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <stdlib.h>
+
+#include "utils.h"
+
+enum energy_freq_attrs {
+ POWER_PERFORMANCE_MODE = 1,
+ IDLE_POWER_SAVER_STATUS = 2,
+ MIN_FREQ = 3,
+ STAT_FREQ = 4,
+ MAX_FREQ = 6,
+ PROC_FOLDING_STATUS = 8
+};
+
+enum type {
+ INVALID,
+ STR_VAL,
+ NUM_VAL
+};
+
+static int value_type(int id)
+{
+ int val_type;
+
+ switch (id) {
+ case POWER_PERFORMANCE_MODE:
+ case IDLE_POWER_SAVER_STATUS:
+ val_type = STR_VAL;
+ break;
+ case MIN_FREQ:
+ case STAT_FREQ:
+ case MAX_FREQ:
+ case PROC_FOLDING_STATUS:
+ val_type = NUM_VAL;
+ break;
+ default:
+ val_type = INVALID;
+ }
+
+ return val_type;
+}
+
+static int verify_energy_info(void)
+{
+ const char *path = "/sys/firmware/papr/energy_scale_info";
+ struct dirent *entry;
+ struct stat s;
+ DIR *dirp;
+
+ errno = 0;
+ if (stat(path, &s)) {
+ SKIP_IF(errno == ENOENT);
+ FAIL_IF(errno);
+ }
+
+ FAIL_IF(!S_ISDIR(s.st_mode));
+
+ dirp = opendir(path);
+
+ while ((entry = readdir(dirp)) != NULL) {
+ char file_name[64];
+ int id, attr_type;
+ FILE *f;
+
+ if (strcmp(entry->d_name, ".") == 0 ||
+ strcmp(entry->d_name, "..") == 0)
+ continue;
+
+ id = atoi(entry->d_name);
+ attr_type = value_type(id);
+ FAIL_IF(attr_type == INVALID);
+
+ /* Check if the files exist and have data in them */
+ sprintf(file_name, "%s/%d/desc", path, id);
+ f = fopen(file_name, "r");
+ FAIL_IF(!f);
+ FAIL_IF(fgetc(f) == EOF);
+
+ sprintf(file_name, "%s/%d/value", path, id);
+ f = fopen(file_name, "r");
+ FAIL_IF(!f);
+ FAIL_IF(fgetc(f) == EOF);
+
+ if (attr_type == STR_VAL) {
+ sprintf(file_name, "%s/%d/value_desc", path, id);
+ f = fopen(file_name, "r");
+ FAIL_IF(!f);
+ FAIL_IF(fgetc(f) == EOF);
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(verify_energy_info, "papr_attributes");
+}
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/.gitignore b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
new file mode 100644
index 000000000000..f2a69bf59d40
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/.gitignore
@@ -0,0 +1 @@
+/papr_sysparm
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/Makefile b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
new file mode 100644
index 000000000000..7f79e437634a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_sysparm
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_sysparm: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
new file mode 100644
index 000000000000..f56c15a11e2f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_sysparm/papr_sysparm.c
@@ -0,0 +1,196 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+#include <asm/papr-sysparm.h>
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-sysparm"
+
+static int open_close(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int get_splpar(void)
+{
+ struct papr_sysparm_io_block sp = {
+ .parameter = 20, // SPLPAR characteristics
+ };
+ const int devfd = open(DEVPATH, O_RDONLY);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+ FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != 0);
+ FAIL_IF(sp.length == 0);
+ FAIL_IF(sp.length > sizeof(sp.data));
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int get_bad_parameter(void)
+{
+ struct papr_sysparm_io_block sp = {
+ .parameter = UINT32_MAX, // there are only ~60 specified parameters
+ };
+ const int devfd = open(DEVPATH, O_RDONLY);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ // Ensure expected error
+ FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_GET, &sp) != -1);
+ FAIL_IF(errno != EOPNOTSUPP);
+
+ // Ensure the buffer is unchanged
+ FAIL_IF(sp.length != 0);
+ for (size_t i = 0; i < ARRAY_SIZE(sp.data); ++i)
+ FAIL_IF(sp.data[i] != 0);
+
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int check_efault_common(unsigned long cmd)
+{
+ const int devfd = open(DEVPATH, O_RDWR);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ // Ensure expected error
+ FAIL_IF(ioctl(devfd, cmd, NULL) != -1);
+ FAIL_IF(errno != EFAULT);
+
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int check_efault_get(void)
+{
+ return check_efault_common(PAPR_SYSPARM_IOC_GET);
+}
+
+static int check_efault_set(void)
+{
+ return check_efault_common(PAPR_SYSPARM_IOC_SET);
+}
+
+static int set_hmc0(void)
+{
+ struct papr_sysparm_io_block sp = {
+ .parameter = 0, // HMC0, not a settable parameter
+ };
+ const int devfd = open(DEVPATH, O_RDWR);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ // Ensure expected error
+ FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1);
+ SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported");
+ FAIL_IF(errno != EPERM);
+
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int set_with_ro_fd(void)
+{
+ struct papr_sysparm_io_block sp = {
+ .parameter = 0, // HMC0, not a settable parameter.
+ };
+ const int devfd = open(DEVPATH, O_RDONLY);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ // Ensure expected error
+ FAIL_IF(ioctl(devfd, PAPR_SYSPARM_IOC_SET, &sp) != -1);
+ SKIP_IF_MSG(errno == EOPNOTSUPP, "operation not supported");
+
+ // HMC0 isn't a settable parameter and we would normally
+ // expect to get EPERM on attempts to modify it. However, when
+ // the file is open read-only, we expect the driver to prevent
+ // the attempt with a distinct error.
+ FAIL_IF(errno != EBADF);
+
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+struct sysparm_test {
+ int (*function)(void);
+ const char *description;
+};
+
+static const struct sysparm_test sysparm_tests[] = {
+ {
+ .function = open_close,
+ .description = "open and close " DEVPATH " without issuing commands",
+ },
+ {
+ .function = get_splpar,
+ .description = "retrieve SPLPAR characteristics",
+ },
+ {
+ .function = get_bad_parameter,
+ .description = "verify EOPNOTSUPP for known-bad parameter",
+ },
+ {
+ .function = check_efault_get,
+ .description = "PAPR_SYSPARM_IOC_GET returns EFAULT on bad address",
+ },
+ {
+ .function = check_efault_set,
+ .description = "PAPR_SYSPARM_IOC_SET returns EFAULT on bad address",
+ },
+ {
+ .function = set_hmc0,
+ .description = "ensure EPERM on attempt to update HMC0",
+ },
+ {
+ .function = set_with_ro_fd,
+ .description = "PAPR_IOC_SYSPARM_SET returns EACCES on read-only fd",
+ },
+};
+
+int main(void)
+{
+ size_t fails = 0;
+
+ for (size_t i = 0; i < ARRAY_SIZE(sysparm_tests); ++i) {
+ const struct sysparm_test *t = &sysparm_tests[i];
+
+ if (test_harness(t->function, t->description))
+ ++fails;
+ }
+
+ return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tools/testing/selftests/powerpc/papr_vpd/.gitignore b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
new file mode 100644
index 000000000000..49285031a656
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/.gitignore
@@ -0,0 +1 @@
+/papr_vpd
diff --git a/tools/testing/selftests/powerpc/papr_vpd/Makefile b/tools/testing/selftests/powerpc/papr_vpd/Makefile
new file mode 100644
index 000000000000..06b719703bfd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+noarg:
+ $(MAKE) -C ../
+
+TEST_GEN_PROGS := papr_vpd
+
+top_srcdir = ../../../../..
+include ../../lib.mk
+
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
+
+$(OUTPUT)/papr_vpd: CFLAGS += $(KHDR_INCLUDES)
diff --git a/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
new file mode 100644
index 000000000000..505294da1b9f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/papr_vpd/papr_vpd.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <unistd.h>
+
+#include <asm/papr-vpd.h>
+
+#include "utils.h"
+
+#define DEVPATH "/dev/papr-vpd"
+
+static int dev_papr_vpd_open_close(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+ FAIL_IF(close(devfd) != 0);
+
+ return 0;
+}
+
+static int dev_papr_vpd_get_handle_all(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ struct papr_location_code lc = { .str = "", };
+ off_t size;
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(errno != 0);
+ FAIL_IF(fd < 0);
+
+ FAIL_IF(close(devfd) != 0);
+
+ size = lseek(fd, 0, SEEK_END);
+ FAIL_IF(size <= 0);
+
+ void *buf = malloc((size_t)size);
+ FAIL_IF(!buf);
+
+ ssize_t consumed = pread(fd, buf, size, 0);
+ FAIL_IF(consumed != size);
+
+ /* Ensure EOF */
+ FAIL_IF(read(fd, buf, size) != 0);
+ FAIL_IF(close(fd));
+
+ /* Verify that the buffer looks like VPD */
+ static const char needle[] = "System VPD";
+ FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+ return 0;
+}
+
+static int dev_papr_vpd_get_handle_byte_at_a_time(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ struct papr_location_code lc = { .str = "", };
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(errno != 0);
+ FAIL_IF(fd < 0);
+
+ FAIL_IF(close(devfd) != 0);
+
+ size_t consumed = 0;
+ while (1) {
+ ssize_t res;
+ char c;
+
+ errno = 0;
+ res = read(fd, &c, sizeof(c));
+ FAIL_IF(res > sizeof(c));
+ FAIL_IF(res < 0);
+ FAIL_IF(errno != 0);
+ consumed += res;
+ if (res == 0)
+ break;
+ }
+
+ FAIL_IF(consumed != lseek(fd, 0, SEEK_END));
+
+ FAIL_IF(close(fd));
+
+ return 0;
+}
+
+
+static int dev_papr_vpd_unterm_loc_code(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ struct papr_location_code lc = {};
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ /*
+ * Place a non-null byte in every element of loc_code; the
+ * driver should reject this input.
+ */
+ memset(lc.str, 'x', ARRAY_SIZE(lc.str));
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(fd != -1);
+ FAIL_IF(errno != EINVAL);
+
+ FAIL_IF(close(devfd) != 0);
+ return 0;
+}
+
+static int dev_papr_vpd_null_handle(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ int rc;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ rc = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, NULL);
+ FAIL_IF(rc != -1);
+ FAIL_IF(errno != EFAULT);
+
+ FAIL_IF(close(devfd) != 0);
+ return 0;
+}
+
+static int papr_vpd_close_handle_without_reading(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ struct papr_location_code lc;
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(errno != 0);
+ FAIL_IF(fd < 0);
+
+ /* close the handle without reading it */
+ FAIL_IF(close(fd) != 0);
+
+ FAIL_IF(close(devfd) != 0);
+ return 0;
+}
+
+static int papr_vpd_reread(void)
+{
+ const int devfd = open(DEVPATH, O_RDONLY);
+ struct papr_location_code lc = { .str = "", };
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(errno != 0);
+ FAIL_IF(fd < 0);
+
+ FAIL_IF(close(devfd) != 0);
+
+ const off_t size = lseek(fd, 0, SEEK_END);
+ FAIL_IF(size <= 0);
+
+ char *bufs[2];
+
+ for (size_t i = 0; i < ARRAY_SIZE(bufs); ++i) {
+ bufs[i] = malloc(size);
+ FAIL_IF(!bufs[i]);
+ ssize_t consumed = pread(fd, bufs[i], size, 0);
+ FAIL_IF(consumed != size);
+ }
+
+ FAIL_IF(memcmp(bufs[0], bufs[1], size));
+
+ FAIL_IF(close(fd) != 0);
+
+ return 0;
+}
+
+static int get_system_loc_code(struct papr_location_code *lc)
+{
+ static const char system_id_path[] = "/sys/firmware/devicetree/base/system-id";
+ static const char model_path[] = "/sys/firmware/devicetree/base/model";
+ char *system_id;
+ char *model;
+ int err = -1;
+
+ if (read_file_alloc(model_path, &model, NULL))
+ return err;
+
+ if (read_file_alloc(system_id_path, &system_id, NULL))
+ goto free_model;
+
+ char *mtm;
+ int sscanf_ret = sscanf(model, "IBM,%ms", &mtm);
+ if (sscanf_ret != 1)
+ goto free_system_id;
+
+ char *plant_and_seq;
+ if (sscanf(system_id, "IBM,%*c%*c%ms", &plant_and_seq) != 1)
+ goto free_mtm;
+ /*
+ * Replace - with . to build location code.
+ */
+ char *sep = strchr(mtm, '-');
+ if (!sep)
+ goto free_mtm;
+ else
+ *sep = '.';
+
+ snprintf(lc->str, sizeof(lc->str),
+ "U%s.%s", mtm, plant_and_seq);
+ err = 0;
+
+ free(plant_and_seq);
+free_mtm:
+ free(mtm);
+free_system_id:
+ free(system_id);
+free_model:
+ free(model);
+ return err;
+}
+
+static int papr_vpd_system_loc_code(void)
+{
+ struct papr_location_code lc;
+ const int devfd = open(DEVPATH, O_RDONLY);
+ off_t size;
+ int fd;
+
+ SKIP_IF_MSG(devfd < 0 && errno == ENOENT,
+ DEVPATH " not present");
+ SKIP_IF_MSG(get_system_loc_code(&lc),
+ "Cannot determine system location code");
+
+ FAIL_IF(devfd < 0);
+
+ errno = 0;
+ fd = ioctl(devfd, PAPR_VPD_IOC_CREATE_HANDLE, &lc);
+ FAIL_IF(errno != 0);
+ FAIL_IF(fd < 0);
+
+ FAIL_IF(close(devfd) != 0);
+
+ size = lseek(fd, 0, SEEK_END);
+ FAIL_IF(size <= 0);
+
+ void *buf = malloc((size_t)size);
+ FAIL_IF(!buf);
+
+ ssize_t consumed = pread(fd, buf, size, 0);
+ FAIL_IF(consumed != size);
+
+ /* Ensure EOF */
+ FAIL_IF(read(fd, buf, size) != 0);
+ FAIL_IF(close(fd));
+
+ /* Verify that the buffer looks like VPD */
+ static const char needle[] = "System VPD";
+ FAIL_IF(!memmem(buf, size, needle, strlen(needle)));
+
+ return 0;
+}
+
+struct vpd_test {
+ int (*function)(void);
+ const char *description;
+};
+
+static const struct vpd_test vpd_tests[] = {
+ {
+ .function = dev_papr_vpd_open_close,
+ .description = "open/close " DEVPATH,
+ },
+ {
+ .function = dev_papr_vpd_unterm_loc_code,
+ .description = "ensure EINVAL on unterminated location code",
+ },
+ {
+ .function = dev_papr_vpd_null_handle,
+ .description = "ensure EFAULT on bad handle addr",
+ },
+ {
+ .function = dev_papr_vpd_get_handle_all,
+ .description = "get handle for all VPD"
+ },
+ {
+ .function = papr_vpd_close_handle_without_reading,
+ .description = "close handle without consuming VPD"
+ },
+ {
+ .function = dev_papr_vpd_get_handle_byte_at_a_time,
+ .description = "read all VPD one byte at a time"
+ },
+ {
+ .function = papr_vpd_reread,
+ .description = "ensure re-read yields same results"
+ },
+ {
+ .function = papr_vpd_system_loc_code,
+ .description = "get handle for system VPD"
+ },
+};
+
+int main(void)
+{
+ size_t fails = 0;
+
+ for (size_t i = 0; i < ARRAY_SIZE(vpd_tests); ++i) {
+ const struct vpd_test *t = &vpd_tests[i];
+
+ if (test_harness(t->function, t->description))
+ ++fails;
+ }
+
+ return fails == 0 ? EXIT_SUCCESS : EXIT_FAILURE;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile
index 904672fb78dd..a284fa874a9f 100644
--- a/tools/testing/selftests/powerpc/pmu/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/Makefile
@@ -8,7 +8,7 @@ EXTRA_SOURCES := ../harness.c event.c lib.c ../utils.c
top_srcdir = ../../../../..
include ../../lib.mk
-all: $(TEST_GEN_PROGS) ebb
+all: $(TEST_GEN_PROGS) ebb sampling_tests event_code_tests
$(TEST_GEN_PROGS): $(EXTRA_SOURCES)
@@ -25,26 +25,44 @@ $(OUTPUT)/per_event_excludes: ../utils.c
DEFAULT_RUN_TESTS := $(RUN_TESTS)
override define RUN_TESTS
$(DEFAULT_RUN_TESTS)
- TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+ +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+ +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
+ +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests
endef
-DEFAULT_EMIT_TESTS := $(EMIT_TESTS)
-override define EMIT_TESTS
- $(DEFAULT_EMIT_TESTS)
- TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
-endef
+emit_tests:
+ for TEST in $(TEST_GEN_PROGS); do \
+ BASENAME_TEST=`basename $$TEST`; \
+ echo "$(COLLECTION):$$BASENAME_TEST"; \
+ done
+ +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+ +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
+ +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -s -C $$TARGET emit_tests
DEFAULT_INSTALL_RULE := $(INSTALL_RULE)
override define INSTALL_RULE
$(DEFAULT_INSTALL_RULE)
- TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+ +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+ +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
+ +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install
endef
-clean:
+DEFAULT_CLEAN := $(CLEAN)
+override define CLEAN
+ $(DEFAULT_CLEAN)
$(RM) $(TEST_GEN_PROGS) $(OUTPUT)/loop.o
- TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+ +TARGET=ebb; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+ +TARGET=sampling_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+ +TARGET=event_code_tests; BUILD_TARGET=$$OUTPUT/$$TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET clean
+endef
ebb:
TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
-.PHONY: all run_tests clean ebb
+sampling_tests:
+ TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+event_code_tests:
+ TARGET=$@; BUILD_TARGET=$$OUTPUT/$$TARGET; mkdir -p $$BUILD_TARGET; $(MAKE) OUTPUT=$$BUILD_TARGET -k -C $$TARGET all
+
+.PHONY: all run_tests ebb sampling_tests event_code_tests emit_tests
diff --git a/tools/testing/selftests/powerpc/pmu/branch_loops.S b/tools/testing/selftests/powerpc/pmu/branch_loops.S
new file mode 100644
index 000000000000..de758dd3cecf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/branch_loops.S
@@ -0,0 +1,28 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <ppc-asm.h>
+
+ .text
+
+#define ITER_SHIFT 31
+
+FUNC_START(indirect_branch_loop)
+ li r3, 1
+ sldi r3, r3, ITER_SHIFT
+
+1: cmpdi r3, 0
+ beqlr
+
+ addi r3, r3, -1
+
+ ld r4, 2f@got(%r2)
+ mtctr r4
+ bctr
+
+ .balign 32
+2: b 1b
+
+FUNC_END(indirect_branch_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
index 7b4ac4537702..2070a1e2b3a5 100644
--- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
+++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c
@@ -104,6 +104,9 @@ static int test_body(void)
struct event events[3];
u64 overhead;
+ // The STCX_FAIL event we use works on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
setup_event(&events[0], PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "instructions");
setup_event(&events[1], PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "cycles");
setup_event(&events[2], PM_STCX_FAIL, PERF_TYPE_RAW, "stcx_fail");
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
index 2920fb39439b..64d8dfdac74a 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
+++ b/tools/testing/selftests/powerpc/pmu/ebb/.gitignore
@@ -21,3 +21,4 @@ back_to_back_ebbs_test
lost_exception_test
no_handler_test
cycles_with_mmcr2_test
+regs_access_pmccext_test
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
index ca35dd8848b0..010160690227 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile
+++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-include ../../../../../../scripts/Kbuild.include
+include ../../../../../build/Build.include
noarg:
$(MAKE) -C ../../
@@ -7,7 +7,7 @@ noarg:
# The EBB handler is 64-bit code and everything links against it
CFLAGS += -m64
-TMPOUT = $(OUTPUT)/
+TMPOUT = $(OUTPUT)/TMPDIR/
# Toolchains may build PIE by default which breaks the assembly
no-pie-option := $(call try-run, echo 'int main() { return 0; }' | \
$(CC) -Werror $(KBUILD_CPPFLAGS) $(CC_OPTION_CFLAGS) -no-pie -x c - -o "$$TMP", -no-pie)
@@ -24,7 +24,7 @@ TEST_GEN_PROGS := reg_access_test event_attributes_test cycles_test \
fork_cleanup_test ebb_on_child_test \
ebb_on_willing_child_test back_to_back_ebbs_test \
lost_exception_test no_handler_test \
- cycles_with_mmcr2_test
+ cycles_with_mmcr2_test regs_access_pmccext_test
top_srcdir = ../../../../../..
include ../../../lib.mk
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
index a2d7b0e3dca9..a26ac122c759 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/back_to_back_ebbs_test.c
@@ -91,8 +91,6 @@ int back_to_back_ebbs(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
index 3cd33eb51e5e..fab7f34d7ce1 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_pinned_vs_ebb_test.c
@@ -45,9 +45,8 @@ int cpu_event_pinned_vs_ebb(void)
SKIP_IF(!ebb_is_supported());
- cpu = pick_online_cpu();
+ cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
- FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
index 8466ef9d7de8..7c54c262036e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cpu_event_vs_ebb_test.c
@@ -43,9 +43,8 @@ int cpu_event_vs_ebb(void)
SKIP_IF(!ebb_is_supported());
- cpu = pick_online_cpu();
+ cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
- FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
index bc893813483e..bb9f587fa76e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_test.c
@@ -42,8 +42,6 @@ int cycles(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
index dcd351d20328..9ae795ce314e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_freeze_test.c
@@ -99,8 +99,6 @@ int cycles_with_freeze(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
printf("EBBs while frozen %d\n", ebbs_while_frozen);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
index 94c99c12c0f2..fc32187d483d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/cycles_with_mmcr2_test.c
@@ -50,6 +50,7 @@ int cycles_with_mmcr2(void)
expected[1] = MMCR2_EXPECTED_2;
i = 0;
bad_mmcr2 = false;
+ actual = 0;
/* Make sure we loop until we take at least one EBB */
while ((ebb_state.stats.ebb_count < 20 && !bad_mmcr2) ||
@@ -71,8 +72,6 @@ int cycles_with_mmcr2(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
index dfbc5c3ad52d..21537d6eb6b7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.c
@@ -396,8 +396,6 @@ int ebb_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
event_close(&event);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
index b5bc2b616075..2c803b5b48d6 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb.h
@@ -55,8 +55,6 @@ void ebb_global_disable(void);
bool ebb_is_supported(void);
void ebb_freeze_pmcs(void);
void ebb_unfreeze_pmcs(void);
-void event_ebb_init(struct event *e);
-void event_leader_ebb_init(struct event *e);
int count_pmc(int pmc, uint32_t sample_period);
void dump_ebb_state(void);
void dump_summary_ebb_state(void);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
index ca2f7d729155..b208bf6ad58d 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_on_willing_child_test.c
@@ -38,8 +38,6 @@ static int victim_child(union pipe read_pipe, union pipe write_pipe)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
FAIL_IF(ebb_state.stats.ebb_count == 0);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
index 4d822cb3589c..d7064b54c64f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/ebb_vs_cpu_event_test.c
@@ -43,9 +43,8 @@ int ebb_vs_cpu_event(void)
SKIP_IF(!ebb_is_supported());
- cpu = pick_online_cpu();
+ cpu = bind_to_cpu(BIND_CPU_ANY);
FAIL_IF(cpu < 0);
- FAIL_IF(bind_to_cpu(cpu));
FAIL_IF(pipe(read_pipe.fds) == -1);
FAIL_IF(pipe(write_pipe.fds) == -1);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S b/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
deleted file mode 100644
index 08a7b5f133b9..000000000000
--- a/tools/testing/selftests/powerpc/pmu/ebb/fixed_instruction_loop.S
+++ /dev/null
@@ -1,43 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright 2014, Michael Ellerman, IBM Corp.
- */
-
-#include <ppc-asm.h>
-
- .text
-
-FUNC_START(thirty_two_instruction_loop)
- cmpwi r3,0
- beqlr
- addi r4,r3,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1
- addi r4,r4,1 # 28 addi's
- subi r3,r3,1
- b FUNC_NAME(thirty_two_instruction_loop)
-FUNC_END(thirty_two_instruction_loop)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
index ac3e6e182614..ba2681a12cc7 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/lost_exception_test.c
@@ -75,7 +75,6 @@ static int test_body(void)
ebb_freeze_pmcs();
ebb_global_disable();
- count_pmc(4, sample_period);
mtspr(SPRN_PMC4, 0xdead);
dump_summary_ebb_state();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
index b8242e9d97d2..791d37ba327b 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_counter_test.c
@@ -70,13 +70,6 @@ int multi_counter(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
- count_pmc(2, sample_period);
- count_pmc(3, sample_period);
- count_pmc(4, sample_period);
- count_pmc(5, sample_period);
- count_pmc(6, sample_period);
-
dump_ebb_state();
for (i = 0; i < 6; i++)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
index a05c0e18ded6..4ac22b2e774f 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/multi_ebb_procs_test.c
@@ -61,8 +61,6 @@ static int cycles_child(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_summary_ebb_state();
event_close(&event);
@@ -77,13 +75,11 @@ static int cycles_child(void)
int multi_ebb_procs(void)
{
pid_t pids[NR_CHILDREN];
- int cpu, rc, i;
+ int rc, i;
SKIP_IF(!ebb_is_supported());
- cpu = pick_online_cpu();
- FAIL_IF(cpu < 0);
- FAIL_IF(bind_to_cpu(cpu));
+ FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
for (i = 0; i < NR_CHILDREN; i++) {
pids[i] = fork();
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
index fc5bf4870d8e..01e827c31169 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/no_handler_test.c
@@ -50,8 +50,6 @@ static int no_handler_test(void)
event_close(&event);
- dump_ebb_state();
-
/* The real test is that we never took an EBB at 0x0 */
return 0;
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
index 153ebc92234f..2904c741e04e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmae_handling_test.c
@@ -82,8 +82,6 @@ static int test_body(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(1, sample_period);
-
dump_ebb_state();
if (mmcr0_mismatch)
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
index eadad75ed7e6..b29f8ba22d1e 100644
--- a/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
+++ b/tools/testing/selftests/powerpc/pmu/ebb/pmc56_overflow_test.c
@@ -76,8 +76,6 @@ int pmc56_overflow(void)
ebb_global_disable();
ebb_freeze_pmcs();
- count_pmc(2, sample_period);
-
dump_ebb_state();
printf("PMC5/6 overflow %d\n", pmc56_overflowed);
diff --git a/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
new file mode 100644
index 000000000000..1eda8e9932e8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/ebb/regs_access_pmccext_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2021, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <setjmp.h>
+#include <signal.h>
+
+#include "ebb.h"
+
+
+/*
+ * Test that closing the EBB event clears MMCR0_PMCC and
+ * sets MMCR0_PMCCEXT preventing further read access to the
+ * group B PMU registers.
+ */
+
+static int regs_access_pmccext(void)
+{
+ struct event event;
+
+ SKIP_IF(!ebb_is_supported());
+
+ event_init_named(&event, 0x1001e, "cycles");
+ event_leader_ebb_init(&event);
+
+ FAIL_IF(event_open(&event));
+
+ ebb_enable_pmc_counting(1);
+ setup_ebb_handler(standard_ebb_callee);
+ ebb_global_enable();
+ FAIL_IF(ebb_event_enable(&event));
+
+ mtspr(SPRN_PMC1, pmc_sample_period(sample_period));
+
+ while (ebb_state.stats.ebb_count < 1)
+ FAIL_IF(core_busy_loop());
+
+ ebb_global_disable();
+ event_close(&event);
+
+ FAIL_IF(ebb_state.stats.ebb_count == 0);
+
+ /*
+ * For ISA v3.1, verify the test takes a SIGILL when reading
+ * PMU regs after the event is closed. With the control bit
+ * in MMCR0 (PMCCEXT) restricting access to group B PMU regs,
+ * sigill is expected.
+ */
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+ FAIL_IF(catch_sigill(dump_ebb_state));
+ else
+ dump_ebb_state();
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(regs_access_pmccext, "regs_access_pmccext");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event.c b/tools/testing/selftests/powerpc/pmu/event.c
index 48e3a413b15d..0c1c1bdba081 100644
--- a/tools/testing/selftests/powerpc/pmu/event.c
+++ b/tools/testing/selftests/powerpc/pmu/event.c
@@ -8,6 +8,7 @@
#include <sys/syscall.h>
#include <string.h>
#include <stdio.h>
+#include <stdbool.h>
#include <sys/ioctl.h>
#include "event.h"
@@ -20,7 +21,8 @@ int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
group_fd, flags);
}
-void event_init_opts(struct event *e, u64 config, int type, char *name)
+static void __event_init_opts(struct event *e, u64 config,
+ int type, char *name, bool sampling)
{
memset(e, 0, sizeof(*e));
@@ -32,6 +34,16 @@ void event_init_opts(struct event *e, u64 config, int type, char *name)
/* This has to match the structure layout in the header */
e->attr.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | \
PERF_FORMAT_TOTAL_TIME_RUNNING;
+ if (sampling) {
+ e->attr.sample_period = 1000;
+ e->attr.sample_type = PERF_SAMPLE_REGS_INTR;
+ e->attr.disabled = 1;
+ }
+}
+
+void event_init_opts(struct event *e, u64 config, int type, char *name)
+{
+ __event_init_opts(e, config, type, name, false);
}
void event_init_named(struct event *e, u64 config, char *name)
@@ -44,6 +56,11 @@ void event_init(struct event *e, u64 config)
event_init_opts(e, config, PERF_TYPE_RAW, "event");
}
+void event_init_sampling(struct event *e, u64 config)
+{
+ __event_init_opts(e, config, PERF_TYPE_RAW, "event", true);
+}
+
#define PERF_CURRENT_PID 0
#define PERF_NO_PID -1
#define PERF_NO_CPU -1
diff --git a/tools/testing/selftests/powerpc/pmu/event.h b/tools/testing/selftests/powerpc/pmu/event.h
index 302eaab51706..51aad0b6d9ad 100644
--- a/tools/testing/selftests/powerpc/pmu/event.h
+++ b/tools/testing/selftests/powerpc/pmu/event.h
@@ -22,11 +22,17 @@ struct event {
u64 running;
u64 enabled;
} result;
+ /*
+ * mmap buffer used while recording sample.
+ * Accessed as "struct perf_event_mmap_page"
+ */
+ void *mmap_buffer;
};
void event_init(struct event *e, u64 config);
void event_init_named(struct event *e, u64 config, char *name);
void event_init_opts(struct event *e, u64 config, int type, char *name);
+void event_init_sampling(struct event *e, u64 config);
int event_open_with_options(struct event *e, pid_t pid, int cpu, int group_fd);
int event_open_with_group(struct event *e, int group_fd);
int event_open_with_pid(struct event *e, pid_t pid);
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore
new file mode 100644
index 000000000000..5710683da525
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/.gitignore
@@ -0,0 +1,20 @@
+blacklisted_events_test
+event_alternatives_tests_p10
+event_alternatives_tests_p9
+generic_events_valid_test
+group_constraint_cache_test
+group_constraint_l2l3_sel_test
+group_constraint_mmcra_sample_test
+group_constraint_pmc56_test
+group_constraint_pmc_count_test
+group_constraint_radix_scope_qual_test
+group_constraint_repeat_test
+group_constraint_thresh_cmp_test
+group_constraint_thresh_ctl_test
+group_constraint_thresh_sel_test
+group_constraint_unit_test
+group_pmc56_exclude_constraints_test
+hw_cache_event_type_test
+invalid_event_code_test
+reserved_bits_mmcra_sample_elig_mode_test
+reserved_bits_mmcra_thresh_ctl_test
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
new file mode 100644
index 000000000000..4e07d7046457
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -m64
+
+TEST_GEN_PROGS := group_constraint_pmc56_test group_pmc56_exclude_constraints_test group_constraint_pmc_count_test \
+ group_constraint_repeat_test group_constraint_radix_scope_qual_test reserved_bits_mmcra_sample_elig_mode_test \
+ group_constraint_mmcra_sample_test invalid_event_code_test reserved_bits_mmcra_thresh_ctl_test \
+ blacklisted_events_test event_alternatives_tests_p9 event_alternatives_tests_p10 generic_events_valid_test \
+ group_constraint_l2l3_sel_test group_constraint_cache_test group_constraint_thresh_cmp_test \
+ group_constraint_unit_test group_constraint_thresh_ctl_test group_constraint_thresh_sel_test \
+ hw_cache_event_type_test
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c ../sampling_tests/misc.h ../sampling_tests/misc.c
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c
new file mode 100644
index 000000000000..fafeff19cb34
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/blacklisted_events_test.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <limits.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+#define PM_DTLB_MISS_16G 0x1c058
+#define PM_DERAT_MISS_2M 0x1c05a
+#define PM_DTLB_MISS_2M 0x1c05c
+#define PM_MRK_DTLB_MISS_1G 0x1d15c
+#define PM_DTLB_MISS_4K 0x2c056
+#define PM_DERAT_MISS_1G 0x2c05a
+#define PM_MRK_DERAT_MISS_2M 0x2d152
+#define PM_MRK_DTLB_MISS_4K 0x2d156
+#define PM_MRK_DTLB_MISS_16G 0x2d15e
+#define PM_DTLB_MISS_64K 0x3c056
+#define PM_MRK_DERAT_MISS_1G 0x3d152
+#define PM_MRK_DTLB_MISS_64K 0x3d156
+#define PM_DISP_HELD_SYNC_HOLD 0x4003c
+#define PM_DTLB_MISS_16M 0x4c056
+#define PM_DTLB_MISS_1G 0x4c05a
+#define PM_MRK_DTLB_MISS_16M 0x4c15e
+#define PM_MRK_ST_DONE_L2 0x10134
+#define PM_RADIX_PWC_L1_HIT 0x1f056
+#define PM_FLOP_CMPL 0x100f4
+#define PM_MRK_NTF_FIN 0x20112
+#define PM_RADIX_PWC_L2_HIT 0x2d024
+#define PM_IFETCH_THROTTLE 0x3405e
+#define PM_MRK_L2_TM_ST_ABORT_SISTER 0x3e15c
+#define PM_RADIX_PWC_L3_HIT 0x3f056
+#define PM_RUN_CYC_SMT2_MODE 0x3006c
+#define PM_TM_TX_PASS_RUN_INST 0x4e014
+
+#define PVR_POWER9_CUMULUS 0x00002000
+
+int blacklist_events_dd21[] = {
+ PM_MRK_ST_DONE_L2,
+ PM_RADIX_PWC_L1_HIT,
+ PM_FLOP_CMPL,
+ PM_MRK_NTF_FIN,
+ PM_RADIX_PWC_L2_HIT,
+ PM_IFETCH_THROTTLE,
+ PM_MRK_L2_TM_ST_ABORT_SISTER,
+ PM_RADIX_PWC_L3_HIT,
+ PM_RUN_CYC_SMT2_MODE,
+ PM_TM_TX_PASS_RUN_INST,
+ PM_DISP_HELD_SYNC_HOLD,
+};
+
+int blacklist_events_dd22[] = {
+ PM_DTLB_MISS_16G,
+ PM_DERAT_MISS_2M,
+ PM_DTLB_MISS_2M,
+ PM_MRK_DTLB_MISS_1G,
+ PM_DTLB_MISS_4K,
+ PM_DERAT_MISS_1G,
+ PM_MRK_DERAT_MISS_2M,
+ PM_MRK_DTLB_MISS_4K,
+ PM_MRK_DTLB_MISS_16G,
+ PM_DTLB_MISS_64K,
+ PM_MRK_DERAT_MISS_1G,
+ PM_MRK_DTLB_MISS_64K,
+ PM_DISP_HELD_SYNC_HOLD,
+ PM_DTLB_MISS_16M,
+ PM_DTLB_MISS_1G,
+ PM_MRK_DTLB_MISS_16M,
+};
+
+int pvr_min;
+
+/*
+ * check for power9 support for 2.1 and
+ * 2.2 model where blacklist is applicable.
+ */
+int check_for_power9_version(void)
+{
+ pvr_min = PVR_MIN(mfspr(SPRN_PVR));
+
+ SKIP_IF(PVR_VER(pvr) != POWER9);
+ SKIP_IF(!(pvr & PVR_POWER9_CUMULUS));
+
+ SKIP_IF(!(3 - pvr_min));
+
+ return 0;
+}
+
+/*
+ * Testcase to ensure that using blacklisted bits in
+ * event code should cause event_open to fail in power9
+ */
+
+static int blacklisted_events(void)
+{
+ struct event event;
+ int i = 0;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * check for power9 support for 2.1 and
+ * 2.2 model where blacklist is applicable.
+ */
+ SKIP_IF(check_for_power9_version());
+
+ /* Skip for Generic compat mode */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ if (pvr_min == 1) {
+ for (i = 0; i < ARRAY_SIZE(blacklist_events_dd21); i++) {
+ event_init(&event, blacklist_events_dd21[i]);
+ FAIL_IF(!event_open(&event));
+ }
+ } else if (pvr_min == 2) {
+ for (i = 0; i < ARRAY_SIZE(blacklist_events_dd22); i++) {
+ event_init(&event, blacklist_events_dd22[i]);
+ FAIL_IF(!event_open(&event));
+ }
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(blacklisted_events, "blacklisted_events");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c
new file mode 100644
index 000000000000..8be7aada6523
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p10.c
@@ -0,0 +1,109 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+#define PM_RUN_CYC_ALT 0x200f4
+#define PM_INST_DISP 0x200f2
+#define PM_BR_2PATH 0x20036
+#define PM_LD_MISS_L1 0x3e054
+#define PM_RUN_INST_CMPL_ALT 0x400fa
+
+#define EventCode_1 0x100fc
+#define EventCode_2 0x200fa
+#define EventCode_3 0x300fc
+#define EventCode_4 0x400fc
+
+/*
+ * Check for event alternatives.
+ */
+
+static int event_alternatives_tests_p10(void)
+{
+ struct event *e, events[5];
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * PVR check is used here since PMU specific data like
+ * alternative events is handled by respective PMU driver
+ * code and using PVR will work correctly for all cases
+ * including generic compat mode.
+ */
+ SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER10);
+
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /*
+ * Test for event alternative for 0x0001e
+ * and 0x00002.
+ */
+ e = &events[0];
+ event_init(e, 0x0001e);
+
+ e = &events[1];
+ event_init(e, EventCode_1);
+
+ e = &events[2];
+ event_init(e, EventCode_2);
+
+ e = &events[3];
+ event_init(e, EventCode_3);
+
+ e = &events[4];
+ event_init(e, EventCode_4);
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * Expected to pass since 0x0001e has alternative event
+ * 0x600f4 in PMC6. So it can go in with other events
+ * in PMC1 to PMC4.
+ */
+ for (i = 1; i < 5; i++)
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+ for (i = 0; i < 5; i++)
+ event_close(&events[i]);
+
+ e = &events[0];
+ event_init(e, 0x00002);
+
+ e = &events[1];
+ event_init(e, EventCode_1);
+
+ e = &events[2];
+ event_init(e, EventCode_2);
+
+ e = &events[3];
+ event_init(e, EventCode_3);
+
+ e = &events[4];
+ event_init(e, EventCode_4);
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * Expected to pass since 0x00020 has alternative event
+ * 0x500fa in PMC5. So it can go in with other events
+ * in PMC1 to PMC4.
+ */
+ for (i = 1; i < 5; i++)
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+ for (i = 0; i < 5; i++)
+ event_close(&events[i]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(event_alternatives_tests_p10, "event_alternatives_tests_p10");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c
new file mode 100644
index 000000000000..f7dcf0e0447c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/event_alternatives_tests_p9.c
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+#define PM_RUN_CYC_ALT 0x200f4
+#define PM_INST_DISP 0x200f2
+#define PM_BR_2PATH 0x20036
+#define PM_LD_MISS_L1 0x3e054
+#define PM_RUN_INST_CMPL_ALT 0x400fa
+
+#define EventCode_1 0x200fa
+#define EventCode_2 0x200fc
+#define EventCode_3 0x300fc
+#define EventCode_4 0x400fc
+
+/*
+ * Check for event alternatives.
+ */
+
+static int event_alternatives_tests_p9(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * PVR check is used here since PMU specific data like
+ * alternative events is handled by respective PMU driver
+ * code and using PVR will work correctly for all cases
+ * including generic compat mode.
+ */
+ SKIP_IF(PVR_VER(mfspr(SPRN_PVR)) != POWER9);
+
+ /* Skip for generic compat PMU */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /* Init the event for PM_RUN_CYC_ALT */
+ event_init(&leader, PM_RUN_CYC_ALT);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_1);
+
+ /*
+ * Expected to pass since PM_RUN_CYC_ALT in PMC2 has alternative event
+ * 0x600f4. So it can go in with EventCode_1 which is using PMC2
+ */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ event_init(&leader, PM_INST_DISP);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+ /*
+ * Expected to pass since PM_INST_DISP in PMC2 has alternative event
+ * 0x300f2 in PMC3. So it can go in with EventCode_2 which is using PMC2
+ */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ event_init(&leader, PM_BR_2PATH);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+ /*
+ * Expected to pass since PM_BR_2PATH in PMC2 has alternative event
+ * 0x40036 in PMC4. So it can go in with EventCode_2 which is using PMC2
+ */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ event_init(&leader, PM_LD_MISS_L1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_3);
+ /*
+ * Expected to pass since PM_LD_MISS_L1 in PMC3 has alternative event
+ * 0x400f0 in PMC4. So it can go in with EventCode_3 which is using PMC3
+ */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ event_init(&leader, PM_RUN_INST_CMPL_ALT);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_4);
+ /*
+ * Expected to pass since PM_RUN_INST_CMPL_ALT in PMC4 has alternative event
+ * 0x500fa in PMC5. So it can go in with EventCode_4 which is using PMC4
+ */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(event_alternatives_tests_p9, "event_alternatives_tests_p9");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c
new file mode 100644
index 000000000000..0d237c15d3f2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/generic_events_valid_test.c
@@ -0,0 +1,130 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <limits.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase to ensure that using invalid event in generic
+ * event for PERF_TYPE_HARDWARE should fail
+ */
+
+static int generic_events_valid_test(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* generic events is different in compat_mode */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /*
+ * Invalid generic events in power10:
+ * - PERF_COUNT_HW_BUS_CYCLES
+ * - PERF_COUNT_HW_STALLED_CYCLES_FRONTEND
+ * - PERF_COUNT_HW_STALLED_CYCLES_BACKEND
+ * - PERF_COUNT_HW_REF_CPU_CYCLES
+ */
+ if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+
+ event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+
+ event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+
+ event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+ } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) {
+ /*
+ * Invalid generic events in power9:
+ * - PERF_COUNT_HW_BUS_CYCLES
+ * - PERF_COUNT_HW_REF_CPU_CYCLES
+ */
+ event_init_opts(&event, PERF_COUNT_HW_CPU_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_INSTRUCTIONS, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_CACHE_REFERENCES,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_CACHE_MISSES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BRANCH_INSTRUCTIONS,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BRANCH_MISSES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_BUS_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+
+ event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_FRONTEND,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_STALLED_CYCLES_BACKEND,
+ PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init_opts(&event, PERF_COUNT_HW_REF_CPU_CYCLES, PERF_TYPE_HARDWARE, "event");
+ FAIL_IF(!event_open(&event));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(generic_events_valid_test, "generic_events_valid_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c
new file mode 100644
index 000000000000..f4be05aa3a3d
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_cache_test.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/* All L1 D cache load references counted at finish, gated by reject */
+#define EventCode_1 0x1100fc
+/* Load Missed L1 */
+#define EventCode_2 0x23e054
+/* Load Missed L1 */
+#define EventCode_3 0x13e054
+
+/*
+ * Testcase for group constraint check of data and instructions
+ * cache qualifier bits which is used to program cache select field in
+ * Monitor Mode Control Register 1 (MMCR1: 16-17) for l1 cache.
+ * All events in the group should match cache select bits otherwise
+ * event_open for the group will fail.
+ */
+static int group_constraint_cache(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Init the events for the group contraint check for l1 cache select bits */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+
+ /* Expected to fail as sibling event doesn't request same l1 cache select bits as leader */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint l1 cache select test */
+ event_init(&event, EventCode_3);
+
+ /* Expected to succeed as sibling event request same l1 cache select bits as leader */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_cache, "group_constraint_cache");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c
new file mode 100644
index 000000000000..85a636886069
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_l2l3_sel_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/* All successful D-side store dispatches for this thread */
+#define EventCode_1 0x010000046080
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define EventCode_2 0x26880
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define EventCode_3 0x010000026880
+
+/*
+ * Testcase for group constraint check of l2l3_sel bits which is
+ * used to program l2l3 select field in Monitor Mode Control Register 0
+ * (MMCR0: 56-60).
+ * All events in the group should match l2l3_sel bits otherwise
+ * event_open for the group should fail.
+ */
+static int group_constraint_l2l3_sel(void)
+{
+ struct event event, leader;
+
+ /*
+ * Check for platform support for the test.
+ * This test is only aplicable on power10
+ */
+ SKIP_IF(platform_check_for_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the events for the group contraint check for l2l3_sel bits */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+
+ /* Expected to fail as sibling event doesn't request same l2l3_sel bits as leader */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint l2l3_sel test */
+ event_init(&event, EventCode_3);
+
+ /* Expected to succeed as sibling event request same l2l3_sel bits as leader */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_l2l3_sel, "group_constraint_l2l3_sel");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c
new file mode 100644
index 000000000000..ff625b5d80eb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_mmcra_sample_test.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+#define EventCode_1 0x35340401e0
+#define EventCode_2 0x353c0101ec
+#define EventCode_3 0x35340101ec
+/*
+ * Test that using different sample bits in
+ * event code cause failure in schedule for
+ * group of events.
+ */
+
+static int group_constraint_mmcra_sample(void)
+{
+ struct event event, leader;
+
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Events with different "sample" field values
+ * in a group will fail to schedule.
+ * Use event with load only sampling mode as
+ * group leader. Use event with store only sampling
+ * as sibling event.
+ */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+
+ /* Expected to fail as sibling event doesn't use same sampling bits as leader */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_init(&event, EventCode_3);
+
+ /* Expected to pass as sibling event use same sampling bits as leader */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_mmcra_sample, "group_constraint_mmcra_sample");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c
new file mode 100644
index 000000000000..f5ee4796d46c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc56_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase for checking constraint checks for
+ * Performance Monitor Counter 5 (PMC5) and also
+ * Performance Monitor Counter 6 (PMC6). Events using
+ * PMC5/PMC6 shouldn't have other fields in event
+ * code like cache bits, thresholding or marked bit.
+ */
+
+static int group_constraint_pmc56(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Events using PMC5 and PMC6 with cache bit
+ * set in event code is expected to fail.
+ */
+ event_init(&event, 0x2500fa);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x2600f4);
+ FAIL_IF(!event_open(&event));
+
+ /*
+ * PMC5 and PMC6 only supports base events:
+ * ie 500fa and 600f4. Other combinations
+ * should fail.
+ */
+ event_init(&event, 0x501e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x6001e);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x501fa);
+ FAIL_IF(!event_open(&event));
+
+ /*
+ * Events using PMC5 and PMC6 with random
+ * sampling bits set in event code should fail
+ * to schedule.
+ */
+ event_init(&event, 0x35340500fa);
+ FAIL_IF(!event_open(&event));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_pmc56, "group_constraint_pmc56");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c
new file mode 100644
index 000000000000..af7c5c75101c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_pmc_count_test.c
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase for number of counters in use.
+ * The number of programmable counters is from
+ * performance monitor counter 1 to performance
+ * monitor counter 4 (PMC1-PMC4). If number of
+ * counters in use exceeds the limit, next event
+ * should fail to schedule.
+ */
+
+static int group_constraint_pmc_count(void)
+{
+ struct event *e, events[5];
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Test for number of counters in use.
+ * Use PMC1 to PMC4 for leader and 3 sibling
+ * events. Trying to open fourth event should
+ * fail here.
+ */
+ e = &events[0];
+ event_init(e, 0x1001a);
+
+ e = &events[1];
+ event_init(e, 0x200fc);
+
+ e = &events[2];
+ event_init(e, 0x30080);
+
+ e = &events[3];
+ event_init(e, 0x40054);
+
+ e = &events[4];
+ event_init(e, 0x0002c);
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * The event_open will fail on event 4 if constraint
+ * check fails
+ */
+ for (i = 1; i < 5; i++) {
+ if (i == 4)
+ FAIL_IF(!event_open_with_group(&events[i], events[0].fd));
+ else
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+ }
+
+ for (i = 1; i < 4; i++)
+ event_close(&events[i]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_pmc_count, "group_constraint_pmc_count");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c
new file mode 100644
index 000000000000..9225618b846a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_radix_scope_qual_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */
+#define EventCode_1 0x14242
+/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L3 */
+#define EventCode_2 0x24242
+
+/*
+ * Testcase for group constraint check for radix_scope_qual
+ * field which is used to program Monitor Mode Control
+ * egister (MMCR1) bit 18.
+ * All events in the group should match radix_scope_qual,
+ * bits otherwise event_open for the group should fail.
+ */
+
+static int group_constraint_radix_scope_qual(void)
+{
+ struct event event, leader;
+
+ /*
+ * Check for platform support for the test.
+ * This test is aplicable on power10 only.
+ */
+ SKIP_IF(platform_check_for_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the events for the group contraint check for radix_scope_qual bits */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, 0x200fc);
+
+ /* Expected to fail as sibling event doesn't request same radix_scope_qual bits as leader */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_init(&event, EventCode_2);
+ /* Expected to pass as sibling event request same radix_scope_qual bits as leader */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_radix_scope_qual,
+ "group_constraint_radix_scope_qual");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c
new file mode 100644
index 000000000000..371cd05bb3ed
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_repeat_test.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/* The processor's L1 data cache was reloaded */
+#define EventCode1 0x21C040
+#define EventCode2 0x22C040
+
+/*
+ * Testcase for group constraint check
+ * when using events with same PMC.
+ * Multiple events in a group shouldn't
+ * ask for same PMC. If so it should fail.
+ */
+
+static int group_constraint_repeat(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Two events in a group using same PMC
+ * should fail to get scheduled. Usei same PMC2
+ * for leader and sibling event which is expected
+ * to fail.
+ */
+ event_init(&leader, EventCode1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode1);
+
+ /* Expected to fail since sibling event is requesting same PMC as leader */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_init(&event, EventCode2);
+
+ /* Expected to pass since sibling event is requesting different PMC */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_repeat, "group_constraint_repeat");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c
new file mode 100644
index 000000000000..9f1197104e8c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_cmp_test.c
@@ -0,0 +1,96 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Primary PMU events used here is PM_MRK_INST_CMPL (0x401e0) and
+ * PM_THRESH_MET (0x101ec)
+ * Threshold event selection used is issue to complete for cycles
+ * Sampling criteria is Load or Store only sampling
+ */
+#define p9_EventCode_1 0x13e35340401e0
+#define p9_EventCode_2 0x17d34340101ec
+#define p9_EventCode_3 0x13e35340101ec
+#define p10_EventCode_1 0x35340401e0
+#define p10_EventCode_2 0x35340101ec
+
+/*
+ * Testcase for group constraint check of thresh_cmp bits which is
+ * used to program thresh compare field in Monitor Mode Control Register A
+ * (MMCRA: 9-18 bits for power9 and MMCRA: 8-18 bits for power10).
+ * All events in the group should match thresh compare bits otherwise
+ * event_open for the group will fail.
+ */
+static int group_constraint_thresh_cmp(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) {
+ /* Init the events for the group contraint check for thresh_cmp bits */
+ event_init(&leader, p10_EventCode_1);
+
+ /* Add the thresh_cmp value for leader in config1 */
+ leader.attr.config1 = 1000;
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, p10_EventCode_2);
+
+ /* Add the different thresh_cmp value from the leader event in config1 */
+ event.attr.config1 = 2000;
+
+ /* Expected to fail as sibling and leader event request different thresh_cmp bits */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint thresh compare test */
+ event_init(&event, p10_EventCode_2);
+
+ /* Add the same thresh_cmp value for leader and sibling event in config1 */
+ event.attr.config1 = 1000;
+
+ /* Expected to succeed as sibling and leader event request same thresh_cmp bits */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+ } else {
+ /* Init the events for the group contraint check for thresh_cmp bits */
+ event_init(&leader, p9_EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, p9_EventCode_2);
+
+ /* Expected to fail as sibling and leader event request different thresh_cmp bits */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint thresh compare test */
+ event_init(&event, p9_EventCode_3);
+
+ /* Expected to succeed as sibling and leader event request same thresh_cmp bits */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_thresh_cmp, "group_constraint_thresh_cmp");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c
new file mode 100644
index 000000000000..e0852ebc1671
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_ctl_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and
+ * PM_THRESH_MET (0x101ec).
+ * Threshold event selection used is issue to complete and issue to
+ * finished for cycles
+ * Sampling criteria is Load or Store only sampling
+ */
+#define EventCode_1 0x35340401e0
+#define EventCode_2 0x34340101ec
+#define EventCode_3 0x35340101ec
+
+/*
+ * Testcase for group constraint check of thresh_ctl bits which is
+ * used to program thresh compare field in Monitor Mode Control Register A
+ * (MMCR0: 48-55).
+ * All events in the group should match thresh ctl bits otherwise
+ * event_open for the group will fail.
+ */
+static int group_constraint_thresh_ctl(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Init the events for the group contraint thresh control test */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+
+ /* Expected to fail as sibling and leader event request different thresh_ctl bits */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint thresh control test */
+ event_init(&event, EventCode_3);
+
+ /* Expected to succeed as sibling and leader event request same thresh_ctl bits */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_thresh_ctl, "group_constraint_thresh_ctl");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c
new file mode 100644
index 000000000000..50a8cd843ce7
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_thresh_sel_test.c
@@ -0,0 +1,63 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Primary PMU events used here are PM_MRK_INST_CMPL (0x401e0) and
+ * PM_THRESH_MET (0x101ec).
+ * Threshold event selection used is issue to complete
+ * Sampling criteria is Load or Store only sampling
+ */
+#define EventCode_1 0x35340401e0
+#define EventCode_2 0x35540101ec
+#define EventCode_3 0x35340101ec
+
+/*
+ * Testcase for group constraint check of thresh_sel bits which is
+ * used to program thresh select field in Monitor Mode Control Register A
+ * (MMCRA: 45-57).
+ * All events in the group should match thresh sel bits otherwise
+ * event_open for the group will fail.
+ */
+static int group_constraint_thresh_sel(void)
+{
+ struct event event, leader;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Init the events for the group contraint thresh select test */
+ event_init(&leader, EventCode_1);
+ FAIL_IF(event_open(&leader));
+
+ event_init(&event, EventCode_2);
+
+ /* Expected to fail as sibling and leader event request different thresh_sel bits */
+ FAIL_IF(!event_open_with_group(&event, leader.fd));
+
+ event_close(&event);
+
+ /* Init the event for the group contraint thresh select test */
+ event_init(&event, EventCode_3);
+
+ /* Expected to succeed as sibling and leader event request same thresh_sel bits */
+ FAIL_IF(event_open_with_group(&event, leader.fd));
+
+ event_close(&leader);
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_thresh_sel, "group_constraint_thresh_sel");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c
new file mode 100644
index 000000000000..a2c18923dcec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_constraint_unit_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/* All successful D-side store dispatches for this thread with PMC 2 */
+#define EventCode_1 0x26080
+/* All successful D-side store dispatches for this thread with PMC 4 */
+#define EventCode_2 0x46080
+/* All successful D-side store dispatches for this thread that were L2 Miss with PMC 3 */
+#define EventCode_3 0x36880
+
+/*
+ * Testcase for group constraint check of unit and pmc bits which is
+ * used to program corresponding unit and pmc field in Monitor Mode
+ * Control Register 1 (MMCR1)
+ * One of the event in the group should use PMC 4 incase units field
+ * value is within 6 to 9 otherwise event_open for the group will fail.
+ */
+static int group_constraint_unit(void)
+{
+ struct event *e, events[3];
+
+ /*
+ * Check for platform support for the test.
+ * Constraint to use PMC4 with one of the event in group,
+ * when the unit is within 6 to 9 is only applicable on
+ * power9.
+ */
+ SKIP_IF(platform_check_for_tests());
+ SKIP_IF(have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the events for the group contraint check for unit bits */
+ e = &events[0];
+ event_init(e, EventCode_1);
+
+ /* Expected to fail as PMC 4 is not used with unit field value 6 to 9 */
+ FAIL_IF(!event_open(&events[0]));
+
+ /* Init the events for the group contraint check for unit bits */
+ e = &events[1];
+ event_init(e, EventCode_2);
+
+ /* Expected to pass as PMC 4 is used with unit field value 6 to 9 */
+ FAIL_IF(event_open(&events[1]));
+
+ /* Init the event for the group contraint unit test */
+ e = &events[2];
+ event_init(e, EventCode_3);
+
+ /* Expected to fail as PMC4 is not being used */
+ FAIL_IF(!event_open_with_group(&events[2], events[0].fd));
+
+ /* Expected to succeed as event using PMC4 */
+ FAIL_IF(event_open_with_group(&events[2], events[1].fd));
+
+ event_close(&events[0]);
+ event_close(&events[1]);
+ event_close(&events[2]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_constraint_unit, "group_constraint_unit");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c
new file mode 100644
index 000000000000..cff9ac170df6
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/group_pmc56_exclude_constraints_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include <sys/prctl.h>
+#include <limits.h>
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase for group constraint check for
+ * Performance Monitor Counter 5 (PMC5) and also
+ * Performance Monitor Counter 6 (PMC6).
+ * Test that pmc5/6 is excluded from constraint
+ * check when scheduled along with group of events.
+ */
+
+static int group_pmc56_exclude_constraints(void)
+{
+ struct event *e, events[3];
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * PMC5/6 is excluded from constraint bit
+ * check along with group of events. Use
+ * group of events with PMC5, PMC6 and also
+ * event with cache bit (dc_ic) set. Test expects
+ * this set of events to go in as a group.
+ */
+ e = &events[0];
+ event_init(e, 0x500fa);
+
+ e = &events[1];
+ event_init(e, 0x600f4);
+
+ e = &events[2];
+ event_init(e, 0x22C040);
+
+ FAIL_IF(event_open(&events[0]));
+
+ /*
+ * The event_open will fail if constraint check fails.
+ * Since we are asking for events in a group and since
+ * PMC5/PMC6 is excluded from group constraints, even_open
+ * should pass.
+ */
+ for (i = 1; i < 3; i++)
+ FAIL_IF(event_open_with_group(&events[i], events[0].fd));
+
+ for (i = 0; i < 3; i++)
+ event_close(&events[i]);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(group_pmc56_exclude_constraints, "group_pmc56_exclude_constraints");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c
new file mode 100644
index 000000000000..a45b1da5b568
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/hw_cache_event_type_test.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "utils.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Load Missed L1, for power9 its pointing to PM_LD_MISS_L1_FIN (0x2c04e) and
+ * for power10 its pointing to PM_LD_MISS_L1 (0x3e054)
+ *
+ * Hardware cache level : PERF_COUNT_HW_CACHE_L1D
+ * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ
+ * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_MISS
+ */
+#define EventCode_1 0x10000
+/*
+ * Hardware cache level : PERF_COUNT_HW_CACHE_L1D
+ * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE
+ * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS
+ */
+#define EventCode_2 0x0100
+/*
+ * Hardware cache level : PERF_COUNT_HW_CACHE_DTLB
+ * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_WRITE
+ * Hardware cache event result type : PERF_COUNT_HW_CACHE_RESULT_ACCESS
+ */
+#define EventCode_3 0x0103
+/*
+ * Hardware cache level : PERF_COUNT_HW_CACHE_L1D
+ * Hardware cache event operation type : PERF_COUNT_HW_CACHE_OP_READ
+ * Hardware cache event result type : Invalid ( > PERF_COUNT_HW_CACHE_RESULT_MAX)
+ */
+#define EventCode_4 0x030000
+
+/*
+ * A perf test to check valid hardware cache events.
+ */
+static int hw_cache_event_type_test(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Skip for Generic compat PMU */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /* Init the event to test hardware cache event */
+ event_init_opts(&event, EventCode_1, PERF_TYPE_HW_CACHE, "event");
+
+ /* Expected to success as its pointing to L1 load miss */
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ /* Init the event to test hardware cache event */
+ event_init_opts(&event, EventCode_2, PERF_TYPE_HW_CACHE, "event");
+
+ /* Expected to fail as the corresponding cache event entry have 0 in that index */
+ FAIL_IF(!event_open(&event));
+ event_close(&event);
+
+ /* Init the event to test hardware cache event */
+ event_init_opts(&event, EventCode_3, PERF_TYPE_HW_CACHE, "event");
+
+ /* Expected to fail as the corresponding cache event entry have -1 in that index */
+ FAIL_IF(!event_open(&event));
+ event_close(&event);
+
+ /* Init the event to test hardware cache event */
+ event_init_opts(&event, EventCode_4, PERF_TYPE_HW_CACHE, "event");
+
+ /* Expected to fail as hardware cache event result type is Invalid */
+ FAIL_IF(!event_open(&event));
+ event_close(&event);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(hw_cache_event_type_test, "hw_cache_event_type_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c
new file mode 100644
index 000000000000..f51fcab837fc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/invalid_event_code_test.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <sys/prctl.h>
+#include <limits.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define EventCode_1 0x1340000001c040
+/* PM_DATA_RADIX_PROCESS_L2_PTE_FROM_L2 */
+#define EventCode_2 0x14242
+/* Event code with IFM, EBB, BHRB bits set in event code */
+#define EventCode_3 0xf00000000000001e
+
+/*
+ * Some of the bits in the event code is
+ * reserved for specific platforms.
+ * Event code bits 52-59 are reserved in power9,
+ * whereas in power10, these are used for programming
+ * Monitor Mode Control Register 3 (MMCR3).
+ * Bit 9 in event code is reserved in power9,
+ * whereas it is used for programming "radix_scope_qual"
+ * bit 18 in Monitor Mode Control Register 1 (MMCR1).
+ *
+ * Testcase to ensure that using reserved bits in
+ * event code should cause event_open to fail.
+ */
+
+static int invalid_event_code(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Events using MMCR3 bits and radix scope qual bits
+ * should fail in power9 and should succeed in power10.
+ * Init the events and check for pass/fail in event open.
+ */
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1)) {
+ event_init(&event, EventCode_1);
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+
+ event_init(&event, EventCode_2);
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+ } else {
+ event_init(&event, EventCode_1);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, EventCode_2);
+ FAIL_IF(!event_open(&event));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(invalid_event_code, "invalid_event_code");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c
new file mode 100644
index 000000000000..4c119c821b99
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_sample_elig_mode_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase for reserved bits in Monitor Mode Control
+ * Register A (MMCRA) Random Sampling Mode (SM) value.
+ * As per Instruction Set Architecture (ISA), the values
+ * 0x5, 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E are reserved
+ * for sampling mode field. Test that having these reserved
+ * bit values should cause event_open to fail.
+ * Input event code uses these sampling bits along with
+ * 401e0 (PM_MRK_INST_CMPL).
+ */
+
+static int reserved_bits_mmcra_sample_elig_mode(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Skip for Generic compat PMU */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /*
+ * MMCRA Random Sampling Mode (SM) values: 0x5
+ * 0x9, 0xD, 0x19, 0x1D, 0x1A, 0x1E is reserved.
+ * Expected to fail when using these reserved values.
+ */
+ event_init(&event, 0x50401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x90401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0xD0401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x190401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x1D0401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x1A0401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x1E0401e0);
+ FAIL_IF(!event_open(&event));
+
+ /*
+ * MMCRA Random Sampling Mode (SM) value 0x10
+ * is reserved in power10 and 0xC is reserved in
+ * power9.
+ */
+ if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ event_init(&event, 0x100401e0);
+ FAIL_IF(!event_open(&event));
+ } else if (PVR_VER(mfspr(SPRN_PVR)) == POWER9) {
+ event_init(&event, 0xC0401e0);
+ FAIL_IF(!event_open(&event));
+ }
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(reserved_bits_mmcra_sample_elig_mode,
+ "reserved_bits_mmcra_sample_elig_mode");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c
new file mode 100644
index 000000000000..4ea1c2f8913f
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/event_code_tests/reserved_bits_mmcra_thresh_ctl_test.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include "../event.h"
+#include "../sampling_tests/misc.h"
+
+/*
+ * Testcase for reserved bits in Monitor Mode
+ * Control Register A (MMCRA) thresh_ctl bits.
+ * For MMCRA[48:51]/[52:55]) Threshold Start/Stop,
+ * 0b11110000/0b00001111 is reserved.
+ */
+
+static int reserved_bits_mmcra_thresh_ctl(void)
+{
+ struct event event;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /* Skip for Generic compat PMU */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /*
+ * MMCRA[48:51]/[52:55]) Threshold Start/Stop
+ * events Selection. 0b11110000/0b00001111 is reserved.
+ * Expected to fail when using these reserved values.
+ */
+ event_init(&event, 0xf0340401e0);
+ FAIL_IF(!event_open(&event));
+
+ event_init(&event, 0x0f340401e0);
+ FAIL_IF(!event_open(&event));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(reserved_bits_mmcra_thresh_ctl, "reserved_bits_mmcra_thresh_ctl");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
index a96d512a18c4..a5dfa9bf3b9f 100644
--- a/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
+++ b/tools/testing/selftests/powerpc/pmu/l3_bank_test.c
@@ -20,6 +20,9 @@ static int l3_bank_test(void)
char *p;
int i;
+ // The L3 bank logic is only used on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
p = malloc(MALLOC_SIZE);
FAIL_IF(!p);
diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c
index 88690b97b7b9..321357987408 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.c
+++ b/tools/testing/selftests/powerpc/pmu/lib.c
@@ -14,19 +14,6 @@
#include "utils.h"
#include "lib.h"
-
-int bind_to_cpu(int cpu)
-{
- cpu_set_t mask;
-
- printf("Binding to cpu %d\n", cpu);
-
- CPU_ZERO(&mask);
- CPU_SET(cpu, &mask);
-
- return sched_setaffinity(0, sizeof(mask), &mask);
-}
-
#define PARENT_TOKEN 0xAA
#define CHILD_TOKEN 0x55
@@ -116,12 +103,10 @@ static int eat_cpu_child(union pipe read_pipe, union pipe write_pipe)
pid_t eat_cpu(int (test_function)(void))
{
union pipe read_pipe, write_pipe;
- int cpu, rc;
+ int rc;
pid_t pid;
- cpu = pick_online_cpu();
- FAIL_IF(cpu < 0);
- FAIL_IF(bind_to_cpu(cpu));
+ FAIL_IF(bind_to_cpu(BIND_CPU_ANY) < 0);
if (pipe(read_pipe.fds) == -1)
return -1;
@@ -190,38 +175,14 @@ int parse_proc_maps(void)
bool require_paranoia_below(int level)
{
+ int err;
long current;
- char *end, buf[16];
- FILE *f;
- bool rc;
- rc = false;
-
- f = fopen(PARANOID_PATH, "r");
- if (!f) {
- perror("fopen");
- goto out;
- }
-
- if (!fgets(buf, sizeof(buf), f)) {
- printf("Couldn't read " PARANOID_PATH "?\n");
- goto out_close;
- }
-
- current = strtol(buf, &end, 10);
-
- if (end == buf) {
+ err = read_long(PARANOID_PATH, &current, 10);
+ if (err) {
printf("Couldn't parse " PARANOID_PATH "?\n");
- goto out_close;
+ return false;
}
- if (current >= level)
- goto out_close;
-
- rc = true;
-out_close:
- fclose(f);
-out:
- return rc;
+ return current < level;
}
-
diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h
index fa12e7d0b4d3..1d62403ae6ea 100644
--- a/tools/testing/selftests/powerpc/pmu/lib.h
+++ b/tools/testing/selftests/powerpc/pmu/lib.h
@@ -6,6 +6,7 @@
#ifndef __SELFTESTS_POWERPC_PMU_LIB_H
#define __SELFTESTS_POWERPC_PMU_LIB_H
+#include <stdbool.h>
#include <stdio.h>
#include <stdint.h>
#include <string.h>
@@ -19,7 +20,6 @@ union pipe {
int fds[2];
};
-extern int bind_to_cpu(int cpu);
extern int kill_child_and_wait(pid_t child_pid);
extern int wait_for_child(pid_t child_pid);
extern int sync_with_child(union pipe read_pipe, union pipe write_pipe);
diff --git a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
index 2756fe2efdc5..ad32a09a6540 100644
--- a/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
+++ b/tools/testing/selftests/powerpc/pmu/per_event_excludes.c
@@ -23,12 +23,9 @@
static int per_event_excludes(void)
{
struct event *e, events[4];
- char *platform;
int i;
- platform = (char *)get_auxv_entry(AT_BASE_PLATFORM);
- FAIL_IF(!platform);
- SKIP_IF(strcmp(platform, "power8") != 0);
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
/*
* We need to create the events disabled, otherwise the running/enabled
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
new file mode 100644
index 000000000000..f93b4c7c3a8a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/.gitignore
@@ -0,0 +1,21 @@
+bhrb_filter_map_test
+bhrb_no_crash_wo_pmu_test
+intr_regs_no_crash_wo_pmu_test
+mmcr0_cc56run_test
+mmcr0_exceptionbits_test
+mmcr0_fc56_pmc1ce_test
+mmcr0_fc56_pmc56_test
+mmcr0_pmccext_test
+mmcr0_pmcjce_test
+mmcr1_comb_test
+mmcr1_sel_unit_cache_test
+mmcr2_fcs_fch_test
+mmcr2_l2l3_test
+mmcr3_src_test
+mmcra_bhrb_any_test
+mmcra_bhrb_cond_test
+mmcra_bhrb_disable_no_branch_test
+mmcra_bhrb_disable_test
+mmcra_bhrb_ind_call_test
+mmcra_thresh_cmp_test
+mmcra_thresh_marked_sample_test
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
new file mode 100644
index 000000000000..9e67351fb252
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -m64
+
+TEST_GEN_PROGS := mmcr0_exceptionbits_test mmcr0_cc56run_test mmcr0_pmccext_test \
+ mmcr0_pmcjce_test mmcr0_fc56_pmc1ce_test mmcr0_fc56_pmc56_test \
+ mmcr1_comb_test mmcr2_l2l3_test mmcr2_fcs_fch_test \
+ mmcr3_src_test mmcra_thresh_marked_sample_test mmcra_thresh_cmp_test \
+ mmcra_bhrb_ind_call_test mmcra_bhrb_any_test mmcra_bhrb_cond_test \
+ mmcra_bhrb_disable_test bhrb_no_crash_wo_pmu_test intr_regs_no_crash_wo_pmu_test \
+ bhrb_filter_map_test mmcr1_sel_unit_cache_test mmcra_bhrb_disable_no_branch_test
+
+top_srcdir = ../../../../../..
+include ../../../lib.mk
+
+$(TEST_GEN_PROGS): ../../harness.c ../../utils.c ../event.c ../lib.c misc.c misc.h ../loop.S ../branch_loops.S
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c
new file mode 100644
index 000000000000..3f43c315c666
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_filter_map_test.c
@@ -0,0 +1,114 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * A perf sampling test to check bhrb filter
+ * map. All the branch filters are not supported
+ * in powerpc. Supported filters in:
+ * power10: any, any_call, ind_call, cond
+ * power9: any, any_call
+ *
+ * Testcase checks event open for invalid bhrb filter
+ * types should fail and valid filter types should pass.
+ * Testcase does validity check for these branch
+ * sample types.
+ */
+
+/* Invalid types for powerpc */
+/* Valid bhrb filters in power9/power10 */
+int bhrb_filter_map_valid_common[] = {
+ PERF_SAMPLE_BRANCH_ANY,
+ PERF_SAMPLE_BRANCH_ANY_CALL,
+};
+
+/* Valid bhrb filters in power10 */
+int bhrb_filter_map_valid_p10[] = {
+ PERF_SAMPLE_BRANCH_IND_CALL,
+ PERF_SAMPLE_BRANCH_COND,
+};
+
+#define EventCode 0x1001e
+
+static int bhrb_filter_map_test(void)
+{
+ struct event event;
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(platform_check_for_tests());
+
+ /*
+ * Skip for Generic compat PMU since
+ * bhrb filters is not supported
+ */
+ SKIP_IF(check_for_generic_compat_pmu());
+
+ /* Init the event for the sampling test */
+ event_init(&event, EventCode);
+
+ event.attr.sample_period = 1000;
+ event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ event.attr.disabled = 1;
+
+ /* Invalid filter maps which are expected to fail in event_open */
+ for (i = PERF_SAMPLE_BRANCH_USER_SHIFT; i < PERF_SAMPLE_BRANCH_MAX_SHIFT; i++) {
+ /* Skip the valid branch sample type */
+ if (i == PERF_SAMPLE_BRANCH_ANY_SHIFT || i == PERF_SAMPLE_BRANCH_ANY_CALL_SHIFT \
+ || i == PERF_SAMPLE_BRANCH_IND_CALL_SHIFT || i == PERF_SAMPLE_BRANCH_COND_SHIFT)
+ continue;
+ event.attr.branch_sample_type = 1U << i;
+ FAIL_IF(!event_open(&event));
+ }
+
+ /* valid filter maps for power9/power10 which are expected to pass in event_open */
+ for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_common); i++) {
+ event.attr.branch_sample_type = bhrb_filter_map_valid_common[i];
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+ }
+
+ /*
+ * filter maps which are valid in power10 and invalid in power9.
+ * PVR check is used here since PMU specific data like bhrb filter
+ * alternative tests is handled by respective PMU driver code and
+ * using PVR will work correctly for all cases including generic
+ * compat mode.
+ */
+ if (PVR_VER(mfspr(SPRN_PVR)) == POWER10) {
+ for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) {
+ event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i];
+ FAIL_IF(event_open(&event));
+ event_close(&event);
+ }
+ } else {
+ for (i = 0; i < ARRAY_SIZE(bhrb_filter_map_valid_p10); i++) {
+ event.attr.branch_sample_type = bhrb_filter_map_valid_p10[i];
+ FAIL_IF(!event_open(&event));
+ }
+ }
+
+ /*
+ * Combine filter maps which includes a valid branch filter and an invalid branch
+ * filter. Example: any ( PERF_SAMPLE_BRANCH_ANY) and any_call
+ * (PERF_SAMPLE_BRANCH_ANY_CALL).
+ * The perf_event_open should fail in this case.
+ */
+ event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY | PERF_SAMPLE_BRANCH_ANY_CALL;
+ FAIL_IF(!event_open(&event));
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(bhrb_filter_map_test, "bhrb_filter_map_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c
new file mode 100644
index 000000000000..4644c6782974
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/bhrb_no_crash_wo_pmu_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * A perf sampling test for making sure
+ * enabling branch stack doesn't crash in any
+ * environment, say:
+ * - With generic compat PMU
+ * - without any PMU registered
+ * - With platform specific PMU
+ * A fix for bhrb sampling crash was added in kernel
+ * via commit: b460b512417a ("powerpc/perf: Fix crashes
+ * with generic_compat_pmu & BHRB")
+ *
+ * This testcase exercises this code by doing branch
+ * stack enable for software event. s/w event is used
+ * since software event will work even in platform
+ * without PMU.
+ */
+static int bhrb_no_crash_wo_pmu_test(void)
+{
+ struct event event;
+
+ /*
+ * Init the event for the sampling test.
+ * This uses software event which works on
+ * any platform.
+ */
+ event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles");
+
+ event.attr.sample_period = 1000;
+ event.attr.sample_type = PERF_SAMPLE_BRANCH_STACK;
+ event.attr.disabled = 1;
+
+ /*
+ * Return code of event_open is not
+ * considered since test just expects no crash from
+ * using PERF_SAMPLE_BRANCH_STACK. Also for environment
+ * like generic compat PMU, branch stack is unsupported.
+ */
+ event_open(&event);
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(bhrb_no_crash_wo_pmu_test, "bhrb_no_crash_wo_pmu_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c
new file mode 100644
index 000000000000..839d2d225da0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/intr_regs_no_crash_wo_pmu_test.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * A perf sampling test for making sure
+ * sampling with -intr-regs doesn't crash
+ * in any environment, say:
+ * - With generic compat PMU
+ * - without any PMU registered
+ * - With platform specific PMU.
+ * A fix for crash with intr_regs was
+ * addressed in commit: f75e7d73bdf7 in kernel.
+ *
+ * This testcase exercises this code path by doing
+ * intr_regs using software event. Software event is
+ * used since s/w event will work even in platform
+ * without PMU.
+ */
+static int intr_regs_no_crash_wo_pmu_test(void)
+{
+ struct event event;
+
+ /*
+ * Init the event for the sampling test.
+ * This uses software event which works on
+ * any platform.
+ */
+ event_init_opts(&event, 0, PERF_TYPE_SOFTWARE, "cycles");
+
+ event.attr.sample_period = 1000;
+ event.attr.sample_type = PERF_SAMPLE_REGS_INTR;
+ event.attr.disabled = 1;
+
+ /*
+ * Return code of event_open is not considered
+ * since test just expects no crash from using
+ * PERF_SAMPLE_REGS_INTR.
+ */
+ event_open(&event);
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(intr_regs_no_crash_wo_pmu_test, "intr_regs_no_crash_wo_pmu_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
new file mode 100644
index 000000000000..eac6420abdf1
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.c
@@ -0,0 +1,537 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <unistd.h>
+#include <sys/syscall.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <stdlib.h>
+#include <ctype.h>
+
+#include "misc.h"
+
+#define PAGE_SIZE sysconf(_SC_PAGESIZE)
+
+/* Storage for platform version */
+int pvr;
+u64 platform_extended_mask;
+
+/* Mask and Shift for Event code fields */
+int ev_mask_pmcxsel, ev_shift_pmcxsel; //pmcxsel field
+int ev_mask_marked, ev_shift_marked; //marked filed
+int ev_mask_comb, ev_shift_comb; //combine field
+int ev_mask_unit, ev_shift_unit; //unit field
+int ev_mask_pmc, ev_shift_pmc; //pmc field
+int ev_mask_cache, ev_shift_cache; //Cache sel field
+int ev_mask_sample, ev_shift_sample; //Random sampling field
+int ev_mask_thd_sel, ev_shift_thd_sel; //thresh_sel field
+int ev_mask_thd_start, ev_shift_thd_start; //thresh_start field
+int ev_mask_thd_stop, ev_shift_thd_stop; //thresh_stop field
+int ev_mask_thd_cmp, ev_shift_thd_cmp; //thresh cmp field
+int ev_mask_sm, ev_shift_sm; //SDAR mode field
+int ev_mask_rsq, ev_shift_rsq; //radix scope qual field
+int ev_mask_l2l3, ev_shift_l2l3; //l2l3 sel field
+int ev_mask_mmcr3_src, ev_shift_mmcr3_src; //mmcr3 field
+
+static void init_ev_encodes(void)
+{
+ ev_mask_pmcxsel = 0xff;
+ ev_shift_pmcxsel = 0;
+ ev_mask_marked = 1;
+ ev_shift_marked = 8;
+ ev_mask_unit = 0xf;
+ ev_shift_unit = 12;
+ ev_mask_pmc = 0xf;
+ ev_shift_pmc = 16;
+ ev_mask_sample = 0x1f;
+ ev_shift_sample = 24;
+ ev_mask_thd_sel = 0x7;
+ ev_shift_thd_sel = 29;
+ ev_mask_thd_start = 0xf;
+ ev_shift_thd_start = 36;
+ ev_mask_thd_stop = 0xf;
+ ev_shift_thd_stop = 32;
+
+ switch (pvr) {
+ case POWER10:
+ ev_mask_thd_cmp = 0x3ffff;
+ ev_shift_thd_cmp = 0;
+ ev_mask_rsq = 1;
+ ev_shift_rsq = 9;
+ ev_mask_comb = 3;
+ ev_shift_comb = 10;
+ ev_mask_cache = 3;
+ ev_shift_cache = 20;
+ ev_mask_sm = 0x3;
+ ev_shift_sm = 22;
+ ev_mask_l2l3 = 0x1f;
+ ev_shift_l2l3 = 40;
+ ev_mask_mmcr3_src = 0x7fff;
+ ev_shift_mmcr3_src = 45;
+ break;
+ case POWER9:
+ ev_mask_comb = 3;
+ ev_shift_comb = 10;
+ ev_mask_cache = 0xf;
+ ev_shift_cache = 20;
+ ev_mask_thd_cmp = 0x3ff;
+ ev_shift_thd_cmp = 40;
+ ev_mask_sm = 0x3;
+ ev_shift_sm = 50;
+ break;
+ default:
+ FAIL_IF_EXIT(1);
+ }
+}
+
+/* Return the extended regs mask value */
+static u64 perf_get_platform_reg_mask(void)
+{
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+ return PERF_POWER10_MASK;
+ if (have_hwcap2(PPC_FEATURE2_ARCH_3_00))
+ return PERF_POWER9_MASK;
+
+ return -1;
+}
+
+int check_extended_regs_support(void)
+{
+ int fd;
+ struct event event;
+
+ event_init(&event, 0x1001e);
+
+ event.attr.type = 4;
+ event.attr.sample_period = 1;
+ event.attr.disabled = 1;
+ event.attr.sample_type = PERF_SAMPLE_REGS_INTR;
+ event.attr.sample_regs_intr = platform_extended_mask;
+
+ fd = event_open(&event);
+ if (fd != -1)
+ return 0;
+
+ return -1;
+}
+
+int platform_check_for_tests(void)
+{
+ pvr = PVR_VER(mfspr(SPRN_PVR));
+
+ /*
+ * Check for supported platforms
+ * for sampling test
+ */
+ if ((pvr != POWER10) && (pvr != POWER9))
+ goto out;
+
+ /*
+ * Check PMU driver registered by looking for
+ * PPC_FEATURE2_EBB bit in AT_HWCAP2
+ */
+ if (!have_hwcap2(PPC_FEATURE2_EBB) || !have_hwcap2(PPC_FEATURE2_ARCH_3_00))
+ goto out;
+
+ return 0;
+
+out:
+ printf("%s: Tests unsupported for this platform\n", __func__);
+ return -1;
+}
+
+int check_pvr_for_sampling_tests(void)
+{
+ SKIP_IF(platform_check_for_tests());
+
+ platform_extended_mask = perf_get_platform_reg_mask();
+ /* check if platform supports extended regs */
+ if (check_extended_regs_support())
+ goto out;
+
+ init_ev_encodes();
+ return 0;
+
+out:
+ printf("%s: Sampling tests un-supported\n", __func__);
+ return -1;
+}
+
+/*
+ * Allocate mmap buffer of "mmap_pages" number of
+ * pages.
+ */
+void *event_sample_buf_mmap(int fd, int mmap_pages)
+{
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ size_t mmap_size;
+ void *buff;
+
+ if (mmap_pages <= 0)
+ return NULL;
+
+ if (fd <= 0)
+ return NULL;
+
+ mmap_size = page_size * (1 + mmap_pages);
+ buff = mmap(NULL, mmap_size,
+ PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+
+ if (buff == MAP_FAILED) {
+ perror("mmap() failed.");
+ return NULL;
+ }
+ return buff;
+}
+
+/*
+ * Post process the mmap buffer.
+ * - If sample_count != NULL then return count of total
+ * number of samples present in the mmap buffer.
+ * - If sample_count == NULL then return the address
+ * of first sample from the mmap buffer
+ */
+void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count)
+{
+ size_t page_size = sysconf(_SC_PAGESIZE);
+ struct perf_event_header *header = sample_buff + page_size;
+ struct perf_event_mmap_page *metadata_page = sample_buff;
+ unsigned long data_head, data_tail;
+
+ /*
+ * PERF_RECORD_SAMPLE:
+ * struct {
+ * struct perf_event_header hdr;
+ * u64 data[];
+ * };
+ */
+
+ data_head = metadata_page->data_head;
+ /* sync memory before reading sample */
+ mb();
+ data_tail = metadata_page->data_tail;
+
+ /* Check for sample_count */
+ if (sample_count)
+ *sample_count = 0;
+
+ while (1) {
+ /*
+ * Reads the mmap data buffer by moving
+ * the data_tail to know the last read data.
+ * data_head points to head in data buffer.
+ * refer "struct perf_event_mmap_page" in
+ * "include/uapi/linux/perf_event.h".
+ */
+ if (data_head - data_tail < sizeof(header))
+ return NULL;
+
+ data_tail += sizeof(header);
+ if (header->type == PERF_RECORD_SAMPLE) {
+ *size = (header->size - sizeof(header));
+ if (!sample_count)
+ return sample_buff + page_size + data_tail;
+ data_tail += *size;
+ *sample_count += 1;
+ } else {
+ *size = (header->size - sizeof(header));
+ if ((metadata_page->data_tail + *size) > metadata_page->data_head)
+ data_tail = metadata_page->data_head;
+ else
+ data_tail += *size;
+ }
+ header = (struct perf_event_header *)((void *)header + header->size);
+ }
+ return NULL;
+}
+
+int collect_samples(void *sample_buff)
+{
+ u64 sample_count;
+ size_t size = 0;
+
+ __event_read_samples(sample_buff, &size, &sample_count);
+ return sample_count;
+}
+
+static void *perf_read_first_sample(void *sample_buff, size_t *size)
+{
+ return __event_read_samples(sample_buff, size, NULL);
+}
+
+u64 *get_intr_regs(struct event *event, void *sample_buff)
+{
+ u64 type = event->attr.sample_type;
+ u64 *intr_regs;
+ size_t size = 0;
+
+ if ((type ^ (PERF_SAMPLE_REGS_INTR | PERF_SAMPLE_BRANCH_STACK)) &&
+ (type ^ PERF_SAMPLE_REGS_INTR))
+ return NULL;
+
+ intr_regs = (u64 *)perf_read_first_sample(sample_buff, &size);
+ if (!intr_regs)
+ return NULL;
+
+ if (type & PERF_SAMPLE_BRANCH_STACK) {
+ /*
+ * PERF_RECORD_SAMPLE and PERF_SAMPLE_BRANCH_STACK:
+ * struct {
+ * struct perf_event_header hdr;
+ * u64 number_of_branches;
+ * struct perf_branch_entry[number_of_branches];
+ * u64 data[];
+ * };
+ * struct perf_branch_entry {
+ * u64 from;
+ * u64 to;
+ * u64 misc;
+ * };
+ */
+ intr_regs += ((*intr_regs) * 3) + 1;
+ }
+
+ /*
+ * First entry in the sample buffer used to specify
+ * PERF_SAMPLE_REGS_ABI_64, skip perf regs abi to access
+ * interrupt registers.
+ */
+ ++intr_regs;
+
+ return intr_regs;
+}
+
+static const int __perf_reg_mask(const char *register_name)
+{
+ if (!strcmp(register_name, "R0"))
+ return 0;
+ else if (!strcmp(register_name, "R1"))
+ return 1;
+ else if (!strcmp(register_name, "R2"))
+ return 2;
+ else if (!strcmp(register_name, "R3"))
+ return 3;
+ else if (!strcmp(register_name, "R4"))
+ return 4;
+ else if (!strcmp(register_name, "R5"))
+ return 5;
+ else if (!strcmp(register_name, "R6"))
+ return 6;
+ else if (!strcmp(register_name, "R7"))
+ return 7;
+ else if (!strcmp(register_name, "R8"))
+ return 8;
+ else if (!strcmp(register_name, "R9"))
+ return 9;
+ else if (!strcmp(register_name, "R10"))
+ return 10;
+ else if (!strcmp(register_name, "R11"))
+ return 11;
+ else if (!strcmp(register_name, "R12"))
+ return 12;
+ else if (!strcmp(register_name, "R13"))
+ return 13;
+ else if (!strcmp(register_name, "R14"))
+ return 14;
+ else if (!strcmp(register_name, "R15"))
+ return 15;
+ else if (!strcmp(register_name, "R16"))
+ return 16;
+ else if (!strcmp(register_name, "R17"))
+ return 17;
+ else if (!strcmp(register_name, "R18"))
+ return 18;
+ else if (!strcmp(register_name, "R19"))
+ return 19;
+ else if (!strcmp(register_name, "R20"))
+ return 20;
+ else if (!strcmp(register_name, "R21"))
+ return 21;
+ else if (!strcmp(register_name, "R22"))
+ return 22;
+ else if (!strcmp(register_name, "R23"))
+ return 23;
+ else if (!strcmp(register_name, "R24"))
+ return 24;
+ else if (!strcmp(register_name, "R25"))
+ return 25;
+ else if (!strcmp(register_name, "R26"))
+ return 26;
+ else if (!strcmp(register_name, "R27"))
+ return 27;
+ else if (!strcmp(register_name, "R28"))
+ return 28;
+ else if (!strcmp(register_name, "R29"))
+ return 29;
+ else if (!strcmp(register_name, "R30"))
+ return 30;
+ else if (!strcmp(register_name, "R31"))
+ return 31;
+ else if (!strcmp(register_name, "NIP"))
+ return 32;
+ else if (!strcmp(register_name, "MSR"))
+ return 33;
+ else if (!strcmp(register_name, "ORIG_R3"))
+ return 34;
+ else if (!strcmp(register_name, "CTR"))
+ return 35;
+ else if (!strcmp(register_name, "LINK"))
+ return 36;
+ else if (!strcmp(register_name, "XER"))
+ return 37;
+ else if (!strcmp(register_name, "CCR"))
+ return 38;
+ else if (!strcmp(register_name, "SOFTE"))
+ return 39;
+ else if (!strcmp(register_name, "TRAP"))
+ return 40;
+ else if (!strcmp(register_name, "DAR"))
+ return 41;
+ else if (!strcmp(register_name, "DSISR"))
+ return 42;
+ else if (!strcmp(register_name, "SIER"))
+ return 43;
+ else if (!strcmp(register_name, "MMCRA"))
+ return 44;
+ else if (!strcmp(register_name, "MMCR0"))
+ return 45;
+ else if (!strcmp(register_name, "MMCR1"))
+ return 46;
+ else if (!strcmp(register_name, "MMCR2"))
+ return 47;
+ else if (!strcmp(register_name, "MMCR3"))
+ return 48;
+ else if (!strcmp(register_name, "SIER2"))
+ return 49;
+ else if (!strcmp(register_name, "SIER3"))
+ return 50;
+ else if (!strcmp(register_name, "PMC1"))
+ return 51;
+ else if (!strcmp(register_name, "PMC2"))
+ return 52;
+ else if (!strcmp(register_name, "PMC3"))
+ return 53;
+ else if (!strcmp(register_name, "PMC4"))
+ return 54;
+ else if (!strcmp(register_name, "PMC5"))
+ return 55;
+ else if (!strcmp(register_name, "PMC6"))
+ return 56;
+ else if (!strcmp(register_name, "SDAR"))
+ return 57;
+ else if (!strcmp(register_name, "SIAR"))
+ return 58;
+ else
+ return -1;
+}
+
+u64 get_reg_value(u64 *intr_regs, char *register_name)
+{
+ int register_bit_position;
+
+ register_bit_position = __perf_reg_mask(register_name);
+
+ if (register_bit_position < 0 || (!((platform_extended_mask >>
+ (register_bit_position - 1)) & 1)))
+ return -1;
+
+ return *(intr_regs + register_bit_position);
+}
+
+int get_thresh_cmp_val(struct event event)
+{
+ int exp = 0;
+ u64 result = 0;
+ u64 value;
+
+ if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1))
+ return EV_CODE_EXTRACT(event.attr.config, thd_cmp);
+
+ value = EV_CODE_EXTRACT(event.attr.config1, thd_cmp);
+
+ if (!value)
+ return value;
+
+ /*
+ * Incase of P10, thresh_cmp value is not part of raw event code
+ * and provided via attr.config1 parameter. To program threshold in MMCRA,
+ * take a 18 bit number N and shift right 2 places and increment
+ * the exponent E by 1 until the upper 10 bits of N are zero.
+ * Write E to the threshold exponent and write the lower 8 bits of N
+ * to the threshold mantissa.
+ * The max threshold that can be written is 261120.
+ */
+ if (value > 261120)
+ value = 261120;
+ while ((64 - __builtin_clzl(value)) > 8) {
+ exp++;
+ value >>= 2;
+ }
+
+ /*
+ * Note that it is invalid to write a mantissa with the
+ * upper 2 bits of mantissa being zero, unless the
+ * exponent is also zero.
+ */
+ if (!(value & 0xC0) && exp)
+ result = -1;
+ else
+ result = (exp << 8) | value;
+ return result;
+}
+
+/*
+ * Utility function to check for generic compat PMU
+ * by comparing base_platform value from auxv and real
+ * PVR value.
+ */
+static bool auxv_generic_compat_pmu(void)
+{
+ int base_pvr = 0;
+
+ if (!strcmp(auxv_base_platform(), "power9"))
+ base_pvr = POWER9;
+ else if (!strcmp(auxv_base_platform(), "power10"))
+ base_pvr = POWER10;
+
+ return (!base_pvr);
+}
+
+/*
+ * Check for generic compat PMU.
+ * First check for presence of pmu_name from
+ * "/sys/bus/event_source/devices/cpu/caps".
+ * If doesn't exist, fallback to using value
+ * auxv.
+ */
+bool check_for_generic_compat_pmu(void)
+{
+ char pmu_name[256];
+
+ memset(pmu_name, 0, sizeof(pmu_name));
+ if (read_sysfs_file("bus/event_source/devices/cpu/caps/pmu_name",
+ pmu_name, sizeof(pmu_name)) < 0)
+ return auxv_generic_compat_pmu();
+
+ if (!strcmp(pmu_name, "ISAv3"))
+ return true;
+ else
+ return false;
+}
+
+/*
+ * Check if system is booted in compat mode.
+ */
+bool check_for_compat_mode(void)
+{
+ char *platform = auxv_platform();
+ char *base_platform = auxv_base_platform();
+
+ return strcmp(platform, base_platform);
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
new file mode 100644
index 000000000000..64e25cce1435
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/misc.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <sys/stat.h>
+#include "../event.h"
+
+#define POWER10 0x80
+#define POWER9 0x4e
+#define PERF_POWER9_MASK 0x7f8ffffffffffff
+#define PERF_POWER10_MASK 0x7ffffffffffffff
+
+#define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */
+#define MMCR0_PMCCEXT 0x00000200UL /* PMCCEXT control */
+#define MMCR1_RSQ 0x200000000000ULL /* radix scope qual field */
+#define BHRB_DISABLE 0x2000000000ULL /* MMCRA BHRB DISABLE bit */
+
+extern int ev_mask_pmcxsel, ev_shift_pmcxsel;
+extern int ev_mask_marked, ev_shift_marked;
+extern int ev_mask_comb, ev_shift_comb;
+extern int ev_mask_unit, ev_shift_unit;
+extern int ev_mask_pmc, ev_shift_pmc;
+extern int ev_mask_cache, ev_shift_cache;
+extern int ev_mask_sample, ev_shift_sample;
+extern int ev_mask_thd_sel, ev_shift_thd_sel;
+extern int ev_mask_thd_start, ev_shift_thd_start;
+extern int ev_mask_thd_stop, ev_shift_thd_stop;
+extern int ev_mask_thd_cmp, ev_shift_thd_cmp;
+extern int ev_mask_sm, ev_shift_sm;
+extern int ev_mask_rsq, ev_shift_rsq;
+extern int ev_mask_l2l3, ev_shift_l2l3;
+extern int ev_mask_mmcr3_src, ev_shift_mmcr3_src;
+extern int pvr;
+extern u64 platform_extended_mask;
+extern int check_pvr_for_sampling_tests(void);
+extern int platform_check_for_tests(void);
+
+/*
+ * Event code field extraction macro.
+ * Raw event code is combination of multiple
+ * fields. Macro to extract individual fields
+ *
+ * x - Raw event code value
+ * y - Field to extract
+ */
+#define EV_CODE_EXTRACT(x, y) \
+ ((x >> ev_shift_##y) & ev_mask_##y)
+
+void *event_sample_buf_mmap(int fd, int mmap_pages);
+void *__event_read_samples(void *sample_buff, size_t *size, u64 *sample_count);
+int collect_samples(void *sample_buff);
+u64 *get_intr_regs(struct event *event, void *sample_buff);
+u64 get_reg_value(u64 *intr_regs, char *register_name);
+int get_thresh_cmp_val(struct event event);
+bool check_for_generic_compat_pmu(void);
+bool check_for_compat_mode(void);
+
+static inline int get_mmcr0_fc56(u64 mmcr0, int pmc)
+{
+ return (mmcr0 & MMCR0_FC56);
+}
+
+static inline int get_mmcr0_pmccext(u64 mmcr0, int pmc)
+{
+ return (mmcr0 & MMCR0_PMCCEXT);
+}
+
+static inline int get_mmcr0_pmao(u64 mmcr0, int pmc)
+{
+ return ((mmcr0 >> 7) & 0x1);
+}
+
+static inline int get_mmcr0_cc56run(u64 mmcr0, int pmc)
+{
+ return ((mmcr0 >> 8) & 0x1);
+}
+
+static inline int get_mmcr0_pmcjce(u64 mmcr0, int pmc)
+{
+ return ((mmcr0 >> 14) & 0x1);
+}
+
+static inline int get_mmcr0_pmc1ce(u64 mmcr0, int pmc)
+{
+ return ((mmcr0 >> 15) & 0x1);
+}
+
+static inline int get_mmcr0_pmae(u64 mmcr0, int pmc)
+{
+ return ((mmcr0 >> 27) & 0x1);
+}
+
+static inline int get_mmcr1_pmcxsel(u64 mmcr1, int pmc)
+{
+ return ((mmcr1 >> ((24 - (((pmc) - 1) * 8))) & 0xff));
+}
+
+static inline int get_mmcr1_unit(u64 mmcr1, int pmc)
+{
+ return ((mmcr1 >> ((60 - (4 * ((pmc) - 1))))) & 0xf);
+}
+
+static inline int get_mmcr1_comb(u64 mmcr1, int pmc)
+{
+ return ((mmcr1 >> (38 - ((pmc - 1) * 2))) & 0x3);
+}
+
+static inline int get_mmcr1_cache(u64 mmcr1, int pmc)
+{
+ return ((mmcr1 >> 46) & 0x3);
+}
+
+static inline int get_mmcr1_rsq(u64 mmcr1, int pmc)
+{
+ return mmcr1 & MMCR1_RSQ;
+}
+
+static inline int get_mmcr2_fcs(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (63 - (((pmc) - 1) * 9)))) >> (63 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcp(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (62 - (((pmc) - 1) * 9)))) >> (62 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcpc(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (61 - (((pmc) - 1) * 9)))) >> (61 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcm1(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (60 - (((pmc) - 1) * 9)))) >> (60 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcm0(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (59 - (((pmc) - 1) * 9)))) >> (59 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcwait(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (58 - (((pmc) - 1) * 9)))) >> (58 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fch(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (57 - (((pmc) - 1) * 9)))) >> (57 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcti(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (56 - (((pmc) - 1) * 9)))) >> (56 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_fcta(u64 mmcr2, int pmc)
+{
+ return ((mmcr2 & (1ull << (55 - (((pmc) - 1) * 9)))) >> (55 - (((pmc) - 1) * 9)));
+}
+
+static inline int get_mmcr2_l2l3(u64 mmcr2, int pmc)
+{
+ if (pvr == POWER10)
+ return ((mmcr2 & 0xf8) >> 3);
+ return 0;
+}
+
+static inline int get_mmcr3_src(u64 mmcr3, int pmc)
+{
+ if (pvr != POWER10)
+ return 0;
+ return ((mmcr3 >> ((49 - (15 * ((pmc) - 1))))) & 0x7fff);
+}
+
+static inline int get_mmcra_thd_cmp(u64 mmcra, int pmc)
+{
+ if (pvr == POWER10)
+ return ((mmcra >> 45) & 0x7ff);
+ return ((mmcra >> 45) & 0x3ff);
+}
+
+static inline int get_mmcra_sm(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 42) & 0x3);
+}
+
+static inline u64 get_mmcra_bhrb_disable(u64 mmcra, int pmc)
+{
+ if (pvr == POWER10)
+ return mmcra & BHRB_DISABLE;
+ return 0;
+}
+
+static inline int get_mmcra_ifm(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 30) & 0x3);
+}
+
+static inline int get_mmcra_thd_sel(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 16) & 0x7);
+}
+
+static inline int get_mmcra_thd_start(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 12) & 0xf);
+}
+
+static inline int get_mmcra_thd_stop(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 8) & 0xf);
+}
+
+static inline int get_mmcra_rand_samp_elig(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 4) & 0x7);
+}
+
+static inline int get_mmcra_sample_mode(u64 mmcra, int pmc)
+{
+ return ((mmcra >> 1) & 0x3);
+}
+
+static inline int get_mmcra_marked(u64 mmcra, int pmc)
+{
+ return mmcra & 0x1;
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c
new file mode 100644
index 000000000000..ae4172f83817
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_cc56run_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: cc56run.
+ */
+static int mmcr0_cc56run(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x500fa);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that cc56run bit is set in MMCR0 */
+ FAIL_IF(!get_mmcr0_cc56run(get_reg_value(intr_regs, "MMCR0"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_cc56run, "mmcr0_cc56run");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c
new file mode 100644
index 000000000000..982aa56d2171
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_exceptionbits_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields : pmae, pmao.
+ */
+static int mmcr0_exceptionbits(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x500fa);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that pmae is cleared and pmao is set in MMCR0 */
+ FAIL_IF(get_mmcr0_pmae(get_reg_value(intr_regs, "MMCR0"), 5));
+ FAIL_IF(!get_mmcr0_pmao(get_reg_value(intr_regs, "MMCR0"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_exceptionbits, "mmcr0_exceptionbits");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c
new file mode 100644
index 000000000000..1c1813c182c0
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc1ce_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields: fc56, pmc1ce.
+ */
+static int mmcr0_fc56_pmc1ce(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x1001e);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that fc56, pmc1ce fields are set in MMCR0 */
+ FAIL_IF(!get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 1));
+ FAIL_IF(!get_mmcr0_pmc1ce(get_reg_value(intr_regs, "MMCR0"), 1));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_fc56_pmc1ce, "mmcr0_fc56_pmc1ce");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c
new file mode 100644
index 000000000000..332d24b5ab9c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_fc56_pmc56_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * fields: fc56_pmc56
+ */
+static int mmcr0_fc56_pmc56(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x500fa);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that fc56 is not set in MMCR0 when using PMC5 */
+ FAIL_IF(get_mmcr0_fc56(get_reg_value(intr_regs, "MMCR0"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_fc56_pmc56, "mmcr0_fc56_pmc56");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c
new file mode 100644
index 000000000000..dfd186cd8eec
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmccext_test.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: pmccext
+ */
+static int mmcr0_pmccext(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x4001e);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that pmccext field is set in MMCR0 */
+ FAIL_IF(!get_mmcr0_pmccext(get_reg_value(intr_regs, "MMCR0"), 4));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_pmccext, "mmcr0_pmccext");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c
new file mode 100644
index 000000000000..fdd8ed9bf725
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr0_pmcjce_test.c
@@ -0,0 +1,58 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/*
+ * A perf sampling test for mmcr0
+ * field: pmcjce
+ */
+static int mmcr0_pmcjce(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x500fa);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that pmcjce field is set in MMCR0 */
+ FAIL_IF(!get_mmcr0_pmcjce(get_reg_value(intr_regs, "MMCR0"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr0_pmcjce, "mmcr0_pmcjce");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c
new file mode 100644
index 000000000000..5aea6499ee9a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_comb_test.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/* All successful D-side store dispatches for this thread that were L2 Miss */
+#define EventCode 0x46880
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/*
+ * A perf sampling test for mmcr1
+ * fields : comb.
+ */
+static int mmcr1_comb(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ u64 dummy;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop_with_ll_sc(10000000, &dummy);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that comb field match with
+ * corresponding event code fields
+ */
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, comb) !=
+ get_mmcr1_comb(get_reg_value(intr_regs, "MMCR1"), 4));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr1_comb, "mmcr1_comb");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c
new file mode 100644
index 000000000000..f0c003282630
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr1_sel_unit_cache_test.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Athira Rajeev, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */
+
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define EventCode 0x21c040
+
+/*
+ * A perf sampling test for mmcr1
+ * fields : pmcxsel, unit, cache.
+ */
+static int mmcr1_sel_unit_cache(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ char *p;
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ p = malloc(MALLOC_SIZE);
+ FAIL_IF(!p);
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.sample_period = 1;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ event_enable(&event);
+
+ /* workload to make the event overflow */
+ for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+ p[i] = i;
+
+ event_disable(&event);
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that pmcxsel, unit and cache field of MMCR1
+ * match with corresponding event code fields
+ */
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, pmcxsel) !=
+ get_mmcr1_pmcxsel(get_reg_value(intr_regs, "MMCR1"), 1));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, unit) !=
+ get_mmcr1_unit(get_reg_value(intr_regs, "MMCR1"), 1));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, cache) !=
+ get_mmcr1_cache(get_reg_value(intr_regs, "MMCR1"), 1));
+
+ free(p);
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ FAIL_IF(test_harness(mmcr1_sel_unit_cache, "mmcr1_sel_unit_cache"));
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c
new file mode 100644
index 000000000000..4e242fd61b25
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_fcs_fch_test.c
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+static bool is_hv;
+
+static void sig_usr2_handler(int signum, siginfo_t *info, void *data)
+{
+ ucontext_t *uctx = data;
+
+ is_hv = !!(uctx->uc_mcontext.gp_regs[PT_MSR] & MSR_HV);
+}
+
+/*
+ * A perf sampling test for mmcr2
+ * fields : fcs, fch.
+ */
+static int mmcr2_fcs_fch(void)
+{
+ struct sigaction sigact = {
+ .sa_sigaction = sig_usr2_handler,
+ .sa_flags = SA_SIGINFO
+ };
+ struct event event;
+ u64 *intr_regs;
+
+ FAIL_IF(sigaction(SIGUSR2, &sigact, NULL));
+ FAIL_IF(kill(getpid(), SIGUSR2));
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, 0x1001e);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.exclude_kernel = 1;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that fcs and fch field of MMCR2 match
+ * with corresponding modifier fields.
+ */
+ if (is_hv)
+ FAIL_IF(event.attr.exclude_kernel !=
+ get_mmcr2_fch(get_reg_value(intr_regs, "MMCR2"), 1));
+ else
+ FAIL_IF(event.attr.exclude_kernel !=
+ get_mmcr2_fcs(get_reg_value(intr_regs, "MMCR2"), 1));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr2_fcs_fch, "mmcr2_fcs_fch");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c
new file mode 100644
index 000000000000..ceca597016b2
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr2_l2l3_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Madhavan Srinivasan, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/* All successful D-side store dispatches for this thread */
+#define EventCode 0x010000046080
+
+#define MALLOC_SIZE (0x10000 * 10) /* Ought to be enough .. */
+
+/*
+ * A perf sampling test for mmcr2
+ * fields : l2l3
+ */
+static int mmcr2_l2l3(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ char *p;
+ int i;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ p = malloc(MALLOC_SIZE);
+ FAIL_IF(!p);
+
+ for (i = 0; i < MALLOC_SIZE; i += 0x10000)
+ p[i] = i;
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that l2l3 field of MMCR2 match with
+ * corresponding event code field
+ */
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, l2l3) !=
+ get_mmcr2_l2l3(get_reg_value(intr_regs, "MMCR2"), 4));
+
+ event_close(&event);
+ free(p);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr2_l2l3, "mmcr2_l2l3");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c
new file mode 100644
index 000000000000..e154e2a4cc3a
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcr3_src_test.c
@@ -0,0 +1,67 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/* The data cache was reloaded from local core's L3 due to a demand load */
+#define EventCode 0x1340000001c040
+
+/*
+ * A perf sampling test for mmcr3
+ * fields.
+ */
+static int mmcr3_src(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ u64 dummy;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make event overflow */
+ thirty_two_instruction_loop_with_ll_sc(1000000, &dummy);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that src field of MMCR3 match with
+ * corresponding event code field
+ */
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, mmcr3_src) !=
+ get_mmcr3_src(get_reg_value(intr_regs, "MMCR3"), 1));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcr3_src, "mmcr3_src");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c
new file mode 100644
index 000000000000..14854694af62
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_any_test.c
@@ -0,0 +1,65 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/* Instructions */
+#define EventCode 0x500fa
+
+/* ifm field for any branch mode */
+#define IFM_ANY_BRANCH 0x0
+
+/*
+ * A perf sampling test for mmcra
+ * field: ifm for bhrb any call.
+ */
+static int mmcra_bhrb_any_test(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
+ event.attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that ifm bit is set properly in MMCRA */
+ FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_ANY_BRANCH);
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_bhrb_any_test, "mmcra_bhrb_any_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c
new file mode 100644
index 000000000000..3e08176eb7f8
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_cond_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/* Instructions */
+#define EventCode 0x500fa
+
+/* ifm field for conditional branch mode */
+#define IFM_COND_BRANCH 0x3
+
+/*
+ * A perf sampling test for mmcra
+ * field: ifm for bhrb cond call.
+ */
+static int mmcra_bhrb_cond_test(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /*
+ * Check for platform support for the test.
+ * This test is only aplicable on power10
+ */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_COND;
+ event.attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that ifm bit is set properly in MMCRA */
+ FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_COND_BRANCH);
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_bhrb_cond_test, "mmcra_bhrb_cond_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c
new file mode 100644
index 000000000000..488c865387e4
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_no_branch_test.c
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/* Instructions */
+#define EventCode 0x500fa
+
+/*
+ * A perf sampling test for mmcra
+ * field: bhrb_disable.
+ */
+static int mmcra_bhrb_disable_no_branch_test(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /*
+ * Check for platform support for the test.
+ * This test is only aplicable on power10
+ */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that bhrb_disable bit is set in MMCRA for non-branch samples */
+ FAIL_IF(!get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_bhrb_disable_no_branch_test, "mmcra_bhrb_disable_no_branch_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c
new file mode 100644
index 000000000000..186a853c0f62
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_disable_test.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void thirty_two_instruction_loop(int loops);
+
+/* Instructions */
+#define EventCode 0x500fa
+
+/*
+ * A perf sampling test for mmcra
+ * field: bhrb_disable.
+ */
+static int mmcra_bhrb_disable_test(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /*
+ * Check for platform support for the test.
+ * This test is only aplicable on power10
+ */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_ANY;
+ event.attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop(10000);
+
+ FAIL_IF(event_disable(&event));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that bhrb_disable bit is set in MMCRA */
+ FAIL_IF(get_mmcra_bhrb_disable(get_reg_value(intr_regs, "MMCRA"), 5));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_bhrb_disable_test, "mmcra_bhrb_disable_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c
new file mode 100644
index 000000000000..f0706730c099
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_bhrb_ind_call_test.c
@@ -0,0 +1,69 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+extern void indirect_branch_loop(void);
+
+/* Instructions */
+#define EventCode 0x500fa
+
+/* ifm field for indirect branch mode */
+#define IFM_IND_BRANCH 0x2
+
+/*
+ * A perf sampling test for mmcra
+ * field: ifm for bhrb ind_call.
+ */
+static int mmcra_bhrb_ind_call_test(void)
+{
+ struct event event;
+ u64 *intr_regs;
+
+ /*
+ * Check for platform support for the test.
+ * This test is only aplicable on power10
+ */
+ SKIP_IF(check_pvr_for_sampling_tests());
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_3_1));
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ event.attr.sample_type |= PERF_SAMPLE_BRANCH_STACK;
+ event.attr.branch_sample_type = PERF_SAMPLE_BRANCH_IND_CALL;
+ event.attr.exclude_kernel = 1;
+
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ indirect_branch_loop();
+
+ FAIL_IF(event_disable(&event));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that ifm bit is set properly in MMCRA */
+ FAIL_IF(get_mmcra_ifm(get_reg_value(intr_regs, "MMCRA"), 5) != IFM_IND_BRANCH);
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_bhrb_ind_call_test, "mmcra_bhrb_ind_call_test");
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c
new file mode 100644
index 000000000000..904362f172c9
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_cmp_test.c
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * Threshold event selection used is issue to complete for cycles
+ * Sampling criteria is Load only sampling
+ */
+#define p9_EventCode 0x13E35340401e0
+#define p10_EventCode 0x35340401e0
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/* A perf sampling test to test mmcra fields */
+static int mmcra_thresh_cmp(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ u64 dummy;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Skip for comapt mode */
+ SKIP_IF(check_for_compat_mode());
+
+ /* Init the event for the sampling test */
+ if (!have_hwcap2(PPC_FEATURE2_ARCH_3_1)) {
+ event_init_sampling(&event, p9_EventCode);
+ } else {
+ event_init_sampling(&event, p10_EventCode);
+ event.attr.config1 = 1000;
+ }
+
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop_with_ll_sc(1000000, &dummy);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /* Verify that thresh cmp match with the corresponding event code fields */
+ FAIL_IF(get_thresh_cmp_val(event) !=
+ get_mmcra_thd_cmp(get_reg_value(intr_regs, "MMCRA"), 4));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ FAIL_IF(test_harness(mmcra_thresh_cmp, "mmcra_thresh_cmp"));
+}
diff --git a/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c
new file mode 100644
index 000000000000..75527876ad3c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/pmu/sampling_tests/mmcra_thresh_marked_sample_test.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright 2022, Kajol Jain, IBM Corp.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "../event.h"
+#include "misc.h"
+#include "utils.h"
+
+/*
+ * Primary PMU event used here is PM_MRK_INST_CMPL (0x401e0)
+ * Threshold event selection used is issue to complete for cycles
+ * Sampling criteria is Load only sampling
+ */
+#define EventCode 0x35340401e0
+
+extern void thirty_two_instruction_loop_with_ll_sc(u64 loops, u64 *ll_sc_target);
+
+/* A perf sampling test to test mmcra fields */
+static int mmcra_thresh_marked_sample(void)
+{
+ struct event event;
+ u64 *intr_regs;
+ u64 dummy;
+
+ /* Check for platform support for the test */
+ SKIP_IF(check_pvr_for_sampling_tests());
+
+ /* Init the event for the sampling test */
+ event_init_sampling(&event, EventCode);
+ event.attr.sample_regs_intr = platform_extended_mask;
+ FAIL_IF(event_open(&event));
+ event.mmap_buffer = event_sample_buf_mmap(event.fd, 1);
+
+ FAIL_IF(event_enable(&event));
+
+ /* workload to make the event overflow */
+ thirty_two_instruction_loop_with_ll_sc(1000000, &dummy);
+
+ FAIL_IF(event_disable(&event));
+
+ /* Check for sample count */
+ FAIL_IF(!collect_samples(event.mmap_buffer));
+
+ intr_regs = get_intr_regs(&event, event.mmap_buffer);
+
+ /* Check for intr_regs */
+ FAIL_IF(!intr_regs);
+
+ /*
+ * Verify that thresh sel/start/stop, marked, random sample
+ * eligibility, sdar mode and sample mode fields match with
+ * the corresponding event code fields
+ */
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_sel) !=
+ get_mmcra_thd_sel(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_start) !=
+ get_mmcra_thd_start(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, thd_stop) !=
+ get_mmcra_thd_stop(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, marked) !=
+ get_mmcra_marked(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF((EV_CODE_EXTRACT(event.attr.config, sample) >> 2) !=
+ get_mmcra_rand_samp_elig(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF((EV_CODE_EXTRACT(event.attr.config, sample) & 0x3) !=
+ get_mmcra_sample_mode(get_reg_value(intr_regs, "MMCRA"), 4));
+ FAIL_IF(EV_CODE_EXTRACT(event.attr.config, sm) !=
+ get_mmcra_sm(get_reg_value(intr_regs, "MMCRA"), 4));
+
+ event_close(&event);
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(mmcra_thresh_marked_sample, "mmcra_thresh_marked_sample");
+}
diff --git a/tools/testing/selftests/powerpc/primitives/asm/extable.h b/tools/testing/selftests/powerpc/primitives/asm/extable.h
new file mode 120000
index 000000000000..6385f059a951
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/asm/extable.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/extable.h \ No newline at end of file
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h b/tools/testing/selftests/powerpc/primitives/linux/bitops.h
index e69de29bb2d1..e69de29bb2d1 100644
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/delay.h
+++ b/tools/testing/selftests/powerpc/primitives/linux/bitops.h
diff --git a/tools/testing/selftests/powerpc/primitives/linux/wordpart.h b/tools/testing/selftests/powerpc/primitives/linux/wordpart.h
new file mode 120000
index 000000000000..4a74d2cbbc9b
--- /dev/null
+++ b/tools/testing/selftests/powerpc/primitives/linux/wordpart.h
@@ -0,0 +1 @@
+../../../../../../include/linux/wordpart.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/ptrace/.gitignore b/tools/testing/selftests/powerpc/ptrace/.gitignore
index 0e96150b7c7e..eb75e5360e31 100644
--- a/tools/testing/selftests/powerpc/ptrace/.gitignore
+++ b/tools/testing/selftests/powerpc/ptrace/.gitignore
@@ -14,3 +14,4 @@ perf-hwbreak
core-pkey
ptrace-pkey
ptrace-syscall
+ptrace-perf-hwbreak
diff --git a/tools/testing/selftests/powerpc/ptrace/Makefile b/tools/testing/selftests/powerpc/ptrace/Makefile
index 8d3f006c98cc..1b39b86849da 100644
--- a/tools/testing/selftests/powerpc/ptrace/Makefile
+++ b/tools/testing/selftests/powerpc/ptrace/Makefile
@@ -1,15 +1,42 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_GEN_PROGS := ptrace-gpr ptrace-tm-gpr ptrace-tm-spd-gpr \
- ptrace-tar ptrace-tm-tar ptrace-tm-spd-tar ptrace-vsx ptrace-tm-vsx \
- ptrace-tm-spd-vsx ptrace-tm-spr ptrace-hwbreak ptrace-pkey core-pkey \
- perf-hwbreak ptrace-syscall
+
+TM_TESTS := ptrace-tm-gpr
+TM_TESTS += ptrace-tm-spd-gpr
+TM_TESTS += ptrace-tm-spd-tar
+TM_TESTS += ptrace-tm-spd-vsx
+TM_TESTS += ptrace-tm-spr
+TM_TESTS += ptrace-tm-tar
+TM_TESTS += ptrace-tm-vsx
+
+TESTS_64 := $(TM_TESTS)
+TESTS_64 += core-pkey
+TESTS_64 += perf-hwbreak
+TESTS_64 += ptrace-hwbreak
+TESTS_64 += ptrace-perf-hwbreak
+TESTS_64 += ptrace-pkey
+TESTS_64 += ptrace-syscall
+TESTS_64 += ptrace-tar
+TESTS_64 += ptrace-vsx
+
+TESTS += ptrace-gpr
+
+TEST_GEN_PROGS := $(TESTS) $(TESTS_64)
+
+LOCAL_HDRS += $(patsubst %,$(selfdir)/powerpc/ptrace/%,$(wildcard *.h))
top_srcdir = ../../../../..
include ../../lib.mk
-CFLAGS += -m64 -I../../../../../usr/include -I../tm -mhtm -fno-pie
+TM_TESTS := $(patsubst %,$(OUTPUT)/%,$(TM_TESTS))
+TESTS_64 := $(patsubst %,$(OUTPUT)/%,$(TESTS_64))
+
+$(TESTS_64): CFLAGS += -m64
+$(TM_TESTS): CFLAGS += -I../tm -mhtm
+
+CFLAGS += $(KHDR_INCLUDES) -fno-pie
-$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: child.h
+$(OUTPUT)/ptrace-gpr: ptrace-gpr.S
+$(OUTPUT)/ptrace-perf-hwbreak: ptrace-perf-asm.S
$(OUTPUT)/ptrace-pkey $(OUTPUT)/core-pkey: LDLIBS += -pthread
-$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S ptrace.h
+$(TEST_GEN_PROGS): ../harness.c ../utils.c ../lib/reg.S
diff --git a/tools/testing/selftests/powerpc/ptrace/child.h b/tools/testing/selftests/powerpc/ptrace/child.h
index d7275b7b33dc..df62ff0735f7 100644
--- a/tools/testing/selftests/powerpc/ptrace/child.h
+++ b/tools/testing/selftests/powerpc/ptrace/child.h
@@ -48,12 +48,12 @@ struct child_sync {
} \
} while (0)
-#define PARENT_SKIP_IF_UNSUPPORTED(x, sync) \
+#define PARENT_SKIP_IF_UNSUPPORTED(x, sync, msg) \
do { \
if ((x) == -1 && (errno == ENODEV || errno == EINVAL)) { \
(sync)->parent_gave_up = true; \
prod_child(sync); \
- SKIP_IF(1); \
+ SKIP_IF_MSG(1, msg); \
} \
} while (0)
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index d5c64fee032d..f6da4cb30cd6 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -150,7 +150,7 @@ static int child(struct shared_info *info)
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr);
+ set_amr(info->amr);
/*
* We won't use pkey3. This tests whether the kernel restores the UAMOR
@@ -266,7 +266,7 @@ static int parent(struct shared_info *info, pid_t pid)
* to the child.
*/
ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
- PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
PARENT_FAIL_IF(ret, &info->child_sync);
info->amr = regs[0];
@@ -329,7 +329,7 @@ static int parent(struct shared_info *info, pid_t pid)
core = mmap(NULL, core_size, PROT_READ, MAP_PRIVATE, fd, 0);
if (core == (void *) -1) {
- perror("Error mmaping core file");
+ perror("Error mmapping core file");
ret = TEST_FAIL;
goto out;
}
@@ -348,15 +348,11 @@ static int parent(struct shared_info *info, pid_t pid)
static int write_core_pattern(const char *core_pattern)
{
- size_t len = strlen(core_pattern), ret;
- FILE *f;
+ int err;
- f = fopen(core_pattern_file, "w");
- SKIP_IF_MSG(!f, "Try with root privileges");
-
- ret = fwrite(core_pattern, 1, len, f);
- fclose(f);
- if (ret != len) {
+ err = write_file(core_pattern_file, core_pattern, strlen(core_pattern));
+ if (err) {
+ SKIP_IF_MSG(err == -EPERM, "Try with root privileges");
perror("Error writing to core_pattern file");
return TEST_FAIL;
}
@@ -366,8 +362,8 @@ static int write_core_pattern(const char *core_pattern)
static int setup_core_pattern(char **core_pattern_, bool *changed_)
{
- FILE *f;
char *core_pattern;
+ size_t len;
int ret;
core_pattern = malloc(PATH_MAX);
@@ -376,21 +372,15 @@ static int setup_core_pattern(char **core_pattern_, bool *changed_)
return TEST_FAIL;
}
- f = fopen(core_pattern_file, "r");
- if (!f) {
- perror("Error opening core_pattern file");
- ret = TEST_FAIL;
- goto out;
- }
-
- ret = fread(core_pattern, 1, PATH_MAX, f);
- fclose(f);
- if (!ret) {
+ ret = read_file(core_pattern_file, core_pattern, PATH_MAX - 1, &len);
+ if (ret) {
perror("Error reading core_pattern file");
ret = TEST_FAIL;
goto out;
}
+ core_pattern[len] = '\0';
+
/* Check whether we can predict the name of the core file. */
if (!strcmp(core_pattern, "core") || !strcmp(core_pattern, "core.%p"))
*changed_ = false;
diff --git a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
index c1f324afdbf3..e374c6b7ace6 100644
--- a/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/perf-hwbreak.c
@@ -17,12 +17,21 @@
* Copyright (C) 2018 Michael Neuling, IBM Corporation.
*/
+#define _GNU_SOURCE
+
#include <unistd.h>
#include <assert.h>
+#include <sched.h>
#include <stdio.h>
#include <stdlib.h>
+#include <signal.h>
#include <string.h>
#include <sys/ioctl.h>
+#include <sys/wait.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/sysinfo.h>
+#include <asm/ptrace.h>
#include <elf.h>
#include <pthread.h>
#include <sys/syscall.h>
@@ -30,32 +39,172 @@
#include <linux/hw_breakpoint.h>
#include "utils.h"
+#ifndef PPC_DEBUG_FEATURE_DATA_BP_ARCH_31
+#define PPC_DEBUG_FEATURE_DATA_BP_ARCH_31 0x20
+#endif
+
#define MAX_LOOPS 10000
#define DAWR_LENGTH_MAX ((0x3f + 1) * 8)
-static inline int sys_perf_event_open(struct perf_event_attr *attr, pid_t pid,
- int cpu, int group_fd,
- unsigned long flags)
+int nprocs;
+
+static volatile int a = 10;
+static volatile int b = 10;
+static volatile char c[512 + 8] __attribute__((aligned(512)));
+
+static void perf_event_attr_set(struct perf_event_attr *attr,
+ __u32 type, __u64 addr, __u64 len,
+ bool exclude_user)
{
- attr->size = sizeof(*attr);
- return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+ memset(attr, 0, sizeof(struct perf_event_attr));
+ attr->type = PERF_TYPE_BREAKPOINT;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->bp_type = type;
+ attr->bp_addr = addr;
+ attr->bp_len = len;
+ attr->exclude_kernel = 1;
+ attr->exclude_hv = 1;
+ attr->exclude_guest = 1;
+ attr->exclude_user = exclude_user;
+ attr->disabled = 1;
}
-static inline bool breakpoint_test(int len)
+static int
+perf_process_event_open_exclude_user(__u32 type, __u64 addr, __u64 len, bool exclude_user)
+{
+ struct perf_event_attr attr;
+
+ perf_event_attr_set(&attr, type, addr, len, exclude_user);
+ return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_process_event_open(__u32 type, __u64 addr, __u64 len)
+{
+ struct perf_event_attr attr;
+
+ perf_event_attr_set(&attr, type, addr, len, 0);
+ return syscall(__NR_perf_event_open, &attr, getpid(), -1, -1, 0);
+}
+
+static int perf_cpu_event_open(long cpu, __u32 type, __u64 addr, __u64 len)
{
struct perf_event_attr attr;
+
+ perf_event_attr_set(&attr, type, addr, len, 0);
+ return syscall(__NR_perf_event_open, &attr, -1, cpu, -1, 0);
+}
+
+static void close_fds(int *fd, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ close(fd[i]);
+}
+
+static unsigned long read_fds(int *fd, int n)
+{
+ int i;
+ unsigned long c = 0;
+ unsigned long count = 0;
+ size_t res;
+
+ for (i = 0; i < n; i++) {
+ res = read(fd[i], &c, sizeof(c));
+ assert(res == sizeof(unsigned long long));
+ count += c;
+ }
+ return count;
+}
+
+static void reset_fds(int *fd, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ ioctl(fd[i], PERF_EVENT_IOC_RESET);
+}
+
+static void enable_fds(int *fd, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ ioctl(fd[i], PERF_EVENT_IOC_ENABLE);
+}
+
+static void disable_fds(int *fd, int n)
+{
+ int i;
+
+ for (i = 0; i < n; i++)
+ ioctl(fd[i], PERF_EVENT_IOC_DISABLE);
+}
+
+static int perf_systemwide_event_open(int *fd, __u32 type, __u64 addr, __u64 len)
+{
+ int i, ncpus, cpu, ret = 0;
+ struct rlimit rlim;
+ cpu_set_t *mask;
+ size_t size;
+
+ if (getrlimit(RLIMIT_NOFILE, &rlim)) {
+ perror("getrlimit");
+ return -1;
+ }
+ rlim.rlim_cur = 65536;
+ if (setrlimit(RLIMIT_NOFILE, &rlim)) {
+ perror("setrlimit");
+ return -1;
+ }
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
+
+ CPU_ZERO_S(size, mask);
+
+ if (sched_getaffinity(0, size, mask)) {
+ perror("sched_getaffinity");
+ ret = -1;
+ goto done;
+ }
+
+ for (i = 0, cpu = 0; i < nprocs && cpu < ncpus; cpu++) {
+ if (!CPU_ISSET_S(cpu, size, mask))
+ continue;
+ fd[i] = perf_cpu_event_open(cpu, type, addr, len);
+ if (fd[i] < 0) {
+ perror("perf_systemwide_event_open");
+ close_fds(fd, i);
+ ret = fd[i];
+ goto done;
+ }
+ i++;
+ }
+
+ if (i < nprocs) {
+ printf("Error: Number of online cpus reduced since start of test: %d < %d\n", i, nprocs);
+ close_fds(fd, i);
+ ret = -1;
+ }
+
+done:
+ CPU_FREE(mask);
+ return ret;
+}
+
+static inline bool breakpoint_test(int len)
+{
int fd;
- /* setup counters */
- memset(&attr, 0, sizeof(attr));
- attr.disabled = 1;
- attr.type = PERF_TYPE_BREAKPOINT;
- attr.bp_type = HW_BREAKPOINT_R;
/* bp_addr can point anywhere but needs to be aligned */
- attr.bp_addr = (__u64)(&attr) & 0xfffffffffffff800;
- attr.bp_len = len;
- fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ fd = perf_process_event_open(HW_BREAKPOINT_R, (__u64)(&fd) & 0xfffffffffffff800, len);
if (fd < 0)
return false;
close(fd);
@@ -75,7 +224,6 @@ static inline bool dawr_supported(void)
static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
{
int i,j;
- struct perf_event_attr attr;
size_t res;
unsigned long long breaks, needed;
int readint;
@@ -85,6 +233,7 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
int break_fd;
int loop_num = MAX_LOOPS - (rand() % 100); /* provide some variability */
volatile int *k;
+ __u64 len;
/* align to 0x400 boundary as required by DAWR */
readintalign = (int *)(((unsigned long)readintarraybig + 0x7ff) &
@@ -94,19 +243,11 @@ static int runtestsingle(int readwriteflag, int exclude_user, int arraytest)
if (arraytest)
ptr = &readintalign[0];
- /* setup counters */
- memset(&attr, 0, sizeof(attr));
- attr.disabled = 1;
- attr.type = PERF_TYPE_BREAKPOINT;
- attr.bp_type = readwriteflag;
- attr.bp_addr = (__u64)ptr;
- attr.bp_len = sizeof(int);
- if (arraytest)
- attr.bp_len = DAWR_LENGTH_MAX;
- attr.exclude_user = exclude_user;
- break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ len = arraytest ? DAWR_LENGTH_MAX : sizeof(int);
+ break_fd = perf_process_event_open_exclude_user(readwriteflag, (__u64)ptr,
+ len, exclude_user);
if (break_fd < 0) {
- perror("sys_perf_event_open");
+ perror("perf_process_event_open_exclude_user");
exit(1);
}
@@ -153,7 +294,6 @@ static int runtest_dar_outside(void)
void *target;
volatile __u16 temp16;
volatile __u64 temp64;
- struct perf_event_attr attr;
int break_fd;
unsigned long long breaks;
int fail = 0;
@@ -165,21 +305,11 @@ static int runtest_dar_outside(void)
exit(EXIT_FAILURE);
}
- /* setup counters */
- memset(&attr, 0, sizeof(attr));
- attr.disabled = 1;
- attr.type = PERF_TYPE_BREAKPOINT;
- attr.exclude_kernel = 1;
- attr.exclude_hv = 1;
- attr.exclude_guest = 1;
- attr.bp_type = HW_BREAKPOINT_RW;
/* watch middle half of target array */
- attr.bp_addr = (__u64)(target + 2);
- attr.bp_len = 4;
- break_fd = sys_perf_event_open(&attr, 0, -1, -1, 0);
+ break_fd = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)(target + 2), 4);
if (break_fd < 0) {
free(target);
- perror("sys_perf_event_open");
+ perror("perf_process_event_open");
exit(EXIT_FAILURE);
}
@@ -263,11 +393,455 @@ static int runtest_dar_outside(void)
return fail;
}
+static void multi_dawr_workload(void)
+{
+ a += 10;
+ b += 10;
+ c[512 + 1] += 'a';
+}
+
+static int test_process_multi_diff_addr(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int fd1, fd2;
+ char *desc = "Process specific, Two events, diff addr";
+ size_t res;
+
+ fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (fd1 < 0) {
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+ if (fd2 < 0) {
+ close(fd1);
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(fd1, PERF_EVENT_IOC_RESET);
+ ioctl(fd2, PERF_EVENT_IOC_RESET);
+ ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+ ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+ multi_dawr_workload();
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+ res = read(fd1, &breaks1, sizeof(breaks1));
+ assert(res == sizeof(unsigned long long));
+ res = read(fd2, &breaks2, sizeof(breaks2));
+ assert(res == sizeof(unsigned long long));
+
+ close(fd1);
+ close(fd2);
+
+ if (breaks1 != 2 || breaks2 != 2) {
+ printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_process_multi_same_addr(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int fd1, fd2;
+ char *desc = "Process specific, Two events, same addr";
+ size_t res;
+
+ fd1 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (fd1 < 0) {
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ fd2 = perf_process_event_open(HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (fd2 < 0) {
+ close(fd1);
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(fd1, PERF_EVENT_IOC_RESET);
+ ioctl(fd2, PERF_EVENT_IOC_RESET);
+ ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+ ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+ multi_dawr_workload();
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+ res = read(fd1, &breaks1, sizeof(breaks1));
+ assert(res == sizeof(unsigned long long));
+ res = read(fd2, &breaks2, sizeof(breaks2));
+ assert(res == sizeof(unsigned long long));
+
+ close(fd1);
+ close(fd2);
+
+ if (breaks1 != 2 || breaks2 != 2) {
+ printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_process_multi_diff_addr_ro_wo(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int fd1, fd2;
+ char *desc = "Process specific, Two events, diff addr, one is RO, other is WO";
+ size_t res;
+
+ fd1 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+ if (fd1 < 0) {
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ fd2 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+ if (fd2 < 0) {
+ close(fd1);
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(fd1, PERF_EVENT_IOC_RESET);
+ ioctl(fd2, PERF_EVENT_IOC_RESET);
+ ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+ ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+ multi_dawr_workload();
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+ res = read(fd1, &breaks1, sizeof(breaks1));
+ assert(res == sizeof(unsigned long long));
+ res = read(fd2, &breaks2, sizeof(breaks2));
+ assert(res == sizeof(unsigned long long));
+
+ close(fd1);
+ close(fd2);
+
+ if (breaks1 != 1 || breaks2 != 1) {
+ printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_process_multi_same_addr_ro_wo(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int fd1, fd2;
+ char *desc = "Process specific, Two events, same addr, one is RO, other is WO";
+ size_t res;
+
+ fd1 = perf_process_event_open(HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+ if (fd1 < 0) {
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ fd2 = perf_process_event_open(HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+ if (fd2 < 0) {
+ close(fd1);
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(fd1, PERF_EVENT_IOC_RESET);
+ ioctl(fd2, PERF_EVENT_IOC_RESET);
+ ioctl(fd1, PERF_EVENT_IOC_ENABLE);
+ ioctl(fd2, PERF_EVENT_IOC_ENABLE);
+ multi_dawr_workload();
+ ioctl(fd1, PERF_EVENT_IOC_DISABLE);
+ ioctl(fd2, PERF_EVENT_IOC_DISABLE);
+
+ res = read(fd1, &breaks1, sizeof(breaks1));
+ assert(res == sizeof(unsigned long long));
+ res = read(fd2, &breaks2, sizeof(breaks2));
+ assert(res == sizeof(unsigned long long));
+
+ close(fd1);
+ close(fd2);
+
+ if (breaks1 != 1 || breaks2 != 1) {
+ printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_syswide_multi_diff_addr(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int *fd1 = malloc(nprocs * sizeof(int));
+ int *fd2 = malloc(nprocs * sizeof(int));
+ char *desc = "Systemwide, Two events, diff addr";
+ int ret;
+
+ ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&b, (__u64)sizeof(b));
+ if (ret) {
+ close_fds(fd1, nprocs);
+ exit(EXIT_FAILURE);
+ }
+
+ reset_fds(fd1, nprocs);
+ reset_fds(fd2, nprocs);
+ enable_fds(fd1, nprocs);
+ enable_fds(fd2, nprocs);
+ multi_dawr_workload();
+ disable_fds(fd1, nprocs);
+ disable_fds(fd2, nprocs);
+
+ breaks1 = read_fds(fd1, nprocs);
+ breaks2 = read_fds(fd2, nprocs);
+
+ close_fds(fd1, nprocs);
+ close_fds(fd2, nprocs);
+
+ free(fd1);
+ free(fd2);
+
+ if (breaks1 != 2 || breaks2 != 2) {
+ printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_syswide_multi_same_addr(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int *fd1 = malloc(nprocs * sizeof(int));
+ int *fd2 = malloc(nprocs * sizeof(int));
+ char *desc = "Systemwide, Two events, same addr";
+ int ret;
+
+ ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_RW, (__u64)&a, (__u64)sizeof(a));
+ if (ret) {
+ close_fds(fd1, nprocs);
+ exit(EXIT_FAILURE);
+ }
+
+ reset_fds(fd1, nprocs);
+ reset_fds(fd2, nprocs);
+ enable_fds(fd1, nprocs);
+ enable_fds(fd2, nprocs);
+ multi_dawr_workload();
+ disable_fds(fd1, nprocs);
+ disable_fds(fd2, nprocs);
+
+ breaks1 = read_fds(fd1, nprocs);
+ breaks2 = read_fds(fd2, nprocs);
+
+ close_fds(fd1, nprocs);
+ close_fds(fd2, nprocs);
+
+ free(fd1);
+ free(fd2);
+
+ if (breaks1 != 2 || breaks2 != 2) {
+ printf("FAILED: %s: %lld != 2 || %lld != 2\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_syswide_multi_diff_addr_ro_wo(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int *fd1 = malloc(nprocs * sizeof(int));
+ int *fd2 = malloc(nprocs * sizeof(int));
+ char *desc = "Systemwide, Two events, diff addr, one is RO, other is WO";
+ int ret;
+
+ ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&b, (__u64)sizeof(b));
+ if (ret) {
+ close_fds(fd1, nprocs);
+ exit(EXIT_FAILURE);
+ }
+
+ reset_fds(fd1, nprocs);
+ reset_fds(fd2, nprocs);
+ enable_fds(fd1, nprocs);
+ enable_fds(fd2, nprocs);
+ multi_dawr_workload();
+ disable_fds(fd1, nprocs);
+ disable_fds(fd2, nprocs);
+
+ breaks1 = read_fds(fd1, nprocs);
+ breaks2 = read_fds(fd2, nprocs);
+
+ close_fds(fd1, nprocs);
+ close_fds(fd2, nprocs);
+
+ free(fd1);
+ free(fd2);
+
+ if (breaks1 != 1 || breaks2 != 1) {
+ printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int test_syswide_multi_same_addr_ro_wo(void)
+{
+ unsigned long long breaks1 = 0, breaks2 = 0;
+ int *fd1 = malloc(nprocs * sizeof(int));
+ int *fd2 = malloc(nprocs * sizeof(int));
+ char *desc = "Systemwide, Two events, same addr, one is RO, other is WO";
+ int ret;
+
+ ret = perf_systemwide_event_open(fd1, HW_BREAKPOINT_W, (__u64)&a, (__u64)sizeof(a));
+ if (ret)
+ exit(EXIT_FAILURE);
+
+ ret = perf_systemwide_event_open(fd2, HW_BREAKPOINT_R, (__u64)&a, (__u64)sizeof(a));
+ if (ret) {
+ close_fds(fd1, nprocs);
+ exit(EXIT_FAILURE);
+ }
+
+ reset_fds(fd1, nprocs);
+ reset_fds(fd2, nprocs);
+ enable_fds(fd1, nprocs);
+ enable_fds(fd2, nprocs);
+ multi_dawr_workload();
+ disable_fds(fd1, nprocs);
+ disable_fds(fd2, nprocs);
+
+ breaks1 = read_fds(fd1, nprocs);
+ breaks2 = read_fds(fd2, nprocs);
+
+ close_fds(fd1, nprocs);
+ close_fds(fd2, nprocs);
+
+ free(fd1);
+ free(fd2);
+
+ if (breaks1 != 1 || breaks2 != 1) {
+ printf("FAILED: %s: %lld != 1 || %lld != 1\n", desc, breaks1, breaks2);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+static int runtest_multi_dawr(void)
+{
+ int ret = 0;
+
+ ret |= test_process_multi_diff_addr();
+ ret |= test_process_multi_same_addr();
+ ret |= test_process_multi_diff_addr_ro_wo();
+ ret |= test_process_multi_same_addr_ro_wo();
+ ret |= test_syswide_multi_diff_addr();
+ ret |= test_syswide_multi_same_addr();
+ ret |= test_syswide_multi_diff_addr_ro_wo();
+ ret |= test_syswide_multi_same_addr_ro_wo();
+
+ return ret;
+}
+
+static int runtest_unaligned_512bytes(void)
+{
+ unsigned long long breaks = 0;
+ int fd;
+ char *desc = "Process specific, 512 bytes, unaligned";
+ __u64 addr = (__u64)&c + 8;
+ size_t res;
+
+ fd = perf_process_event_open(HW_BREAKPOINT_RW, addr, 512);
+ if (fd < 0) {
+ perror("perf_process_event_open");
+ exit(EXIT_FAILURE);
+ }
+
+ ioctl(fd, PERF_EVENT_IOC_RESET);
+ ioctl(fd, PERF_EVENT_IOC_ENABLE);
+ multi_dawr_workload();
+ ioctl(fd, PERF_EVENT_IOC_DISABLE);
+
+ res = read(fd, &breaks, sizeof(breaks));
+ assert(res == sizeof(unsigned long long));
+
+ close(fd);
+
+ if (breaks != 2) {
+ printf("FAILED: %s: %lld != 2\n", desc, breaks);
+ return 1;
+ }
+
+ printf("TESTED: %s\n", desc);
+ return 0;
+}
+
+/* There is no perf api to find number of available watchpoints. Use ptrace. */
+static int get_nr_wps(bool *arch_31)
+{
+ struct ppc_debug_info dbginfo;
+ int child_pid;
+
+ child_pid = fork();
+ if (!child_pid) {
+ int ret = ptrace(PTRACE_TRACEME, 0, NULL, 0);
+ if (ret) {
+ perror("PTRACE_TRACEME failed\n");
+ exit(EXIT_FAILURE);
+ }
+ kill(getpid(), SIGUSR1);
+
+ sleep(1);
+ exit(EXIT_SUCCESS);
+ }
+
+ wait(NULL);
+ if (ptrace(PPC_PTRACE_GETHWDBGINFO, child_pid, NULL, &dbginfo)) {
+ perror("Can't get breakpoint info");
+ exit(EXIT_FAILURE);
+ }
+
+ *arch_31 = !!(dbginfo.features & PPC_DEBUG_FEATURE_DATA_BP_ARCH_31);
+ return dbginfo.num_data_bps;
+}
+
static int runtest(void)
{
int rwflag;
int exclude_user;
int ret;
+ bool dawr = dawr_supported();
+ bool arch_31 = false;
+ int nr_wps = get_nr_wps(&arch_31);
/*
* perf defines rwflag as two bits read and write and at least
@@ -280,7 +854,7 @@ static int runtest(void)
return ret;
/* if we have the dawr, we can do an array test */
- if (!dawr_supported())
+ if (!dawr)
continue;
ret = runtestsingle(rwflag, exclude_user, 1);
if (ret)
@@ -289,6 +863,19 @@ static int runtest(void)
}
ret = runtest_dar_outside();
+ if (ret)
+ return ret;
+
+ if (dawr && nr_wps > 1) {
+ nprocs = get_nprocs();
+ ret = runtest_multi_dawr();
+ if (ret)
+ return ret;
+ }
+
+ if (dawr && arch_31)
+ ret = runtest_unaligned_512bytes();
+
return ret;
}
@@ -297,7 +884,7 @@ static int perf_hwbreak(void)
{
srand ( time(NULL) );
- SKIP_IF(!perf_breakpoint_supported());
+ SKIP_IF_MSG(!perf_breakpoint_supported(), "Perf breakpoints not supported");
return runtest();
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S
new file mode 100644
index 000000000000..070e8443e3cc
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.S
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * test helper assembly functions
+ *
+ * Copyright (C) 2016 Simon Guo, IBM Corporation.
+ * Copyright 2022 Michael Ellerman, IBM Corporation.
+ */
+#include "basic_asm.h"
+
+#define GPR_SIZE __SIZEOF_LONG__
+#define FIRST_GPR 14
+#define NUM_GPRS (32 - FIRST_GPR)
+#define STACK_SIZE (NUM_GPRS * GPR_SIZE)
+
+// gpr_child_loop(int *read_flag, int *write_flag,
+// unsigned long *gpr_buf, double *fpr_buf);
+FUNC_START(gpr_child_loop)
+ // r3 = read_flag
+ // r4 = write_flag
+ // r5 = gpr_buf
+ // r6 = fpr_buf
+ PUSH_BASIC_STACK(STACK_SIZE)
+
+ // Save non-volatile GPRs
+ OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR
+
+ // Load GPRs with expected values
+ OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR
+
+ // Load FPRs with expected values
+ OP_REGS lfd, 8, 0, 31, r6
+
+ // Signal to parent that we're ready
+ li r0, 1
+ stw r0, 0(r4)
+
+ // Wait for parent to finish
+1: lwz r0, 0(r3)
+ cmpwi r0, 0
+ beq 1b // Loop while flag is zero
+
+ // Save GPRs back to caller buffer
+ OP_REGS PPC_STL, GPR_SIZE, FIRST_GPR, 31, r5, 0, FIRST_GPR
+
+ // Save FPRs
+ OP_REGS stfd, 8, 0, 31, r6
+
+ // Reload non-volatile GPRs
+ OP_REGS PPC_LL, GPR_SIZE, FIRST_GPR, 31, %r1, STACK_FRAME_LOCAL(0, 0), FIRST_GPR
+
+ POP_BASIC_STACK(STACK_SIZE)
+ blr
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
index 17cd480c8780..9ed87d297799 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.c
@@ -7,72 +7,127 @@
#include "ptrace.h"
#include "ptrace-gpr.h"
#include "reg.h"
+#include <time.h>
/* Tracer and Tracee Shared Data */
int shm_id;
int *cptr, *pptr;
-float a = FPR_1;
-float b = FPR_2;
-float c = FPR_3;
+extern void gpr_child_loop(int *read_flag, int *write_flag,
+ unsigned long *gpr_buf, double *fpr_buf);
-void gpr(void)
+unsigned long child_gpr_val, parent_gpr_val;
+double child_fpr_val, parent_fpr_val;
+
+static int child(void)
{
- unsigned long gpr_buf[18];
- float fpr_buf[32];
+ unsigned long gpr_buf[32];
+ double fpr_buf[32];
+ int i;
cptr = (int *)shmat(shm_id, NULL, 0);
+ memset(gpr_buf, 0, sizeof(gpr_buf));
+ memset(fpr_buf, 0, sizeof(fpr_buf));
- asm __volatile__(
- ASM_LOAD_GPR_IMMED(gpr_1)
- ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
- :
- : [gpr_1]"i"(GPR_1), [flt_1] "b" (&a)
- : "memory", "r6", "r7", "r8", "r9", "r10",
- "r11", "r12", "r13", "r14", "r15", "r16", "r17",
- "r18", "r19", "r20", "r21", "r22", "r23", "r24",
- "r25", "r26", "r27", "r28", "r29", "r30", "r31"
- );
-
- cptr[1] = 1;
+ for (i = 0; i < 32; i++) {
+ gpr_buf[i] = child_gpr_val;
+ fpr_buf[i] = child_fpr_val;
+ }
- while (!cptr[0])
- asm volatile("" : : : "memory");
+ gpr_child_loop(&cptr[0], &cptr[1], gpr_buf, fpr_buf);
shmdt((void *)cptr);
- store_gpr(gpr_buf);
- store_fpr_single_precision(fpr_buf);
-
- if (validate_gpr(gpr_buf, GPR_3))
- exit(1);
- if (validate_fpr_float(fpr_buf, c))
- exit(1);
+ FAIL_IF(validate_gpr(gpr_buf, parent_gpr_val));
+ FAIL_IF(validate_fpr_double(fpr_buf, parent_fpr_val));
- exit(0);
+ return 0;
}
int trace_gpr(pid_t child)
{
+ __u64 tmp, fpr[32], *peeked_fprs;
unsigned long gpr[18];
- unsigned long fpr[32];
FAIL_IF(start_trace(child));
+
+ // Check child GPRs match what we expect using GETREGS
FAIL_IF(show_gpr(child, gpr));
- FAIL_IF(validate_gpr(gpr, GPR_1));
+ FAIL_IF(validate_gpr(gpr, child_gpr_val));
+
+ // Check child FPRs match what we expect using GETFPREGS
FAIL_IF(show_fpr(child, fpr));
- FAIL_IF(validate_fpr(fpr, FPR_1_REP));
- FAIL_IF(write_gpr(child, GPR_3));
- FAIL_IF(write_fpr(child, FPR_3_REP));
+ memcpy(&tmp, &child_fpr_val, sizeof(tmp));
+ FAIL_IF(validate_fpr(fpr, tmp));
+
+ // Check child FPRs match what we expect using PEEKUSR
+ peeked_fprs = peek_fprs(child);
+ FAIL_IF(!peeked_fprs);
+ FAIL_IF(validate_fpr(peeked_fprs, tmp));
+ free(peeked_fprs);
+
+ // Write child GPRs using SETREGS
+ FAIL_IF(write_gpr(child, parent_gpr_val));
+
+ // Write child FPRs using SETFPREGS
+ memcpy(&tmp, &parent_fpr_val, sizeof(tmp));
+ FAIL_IF(write_fpr(child, tmp));
+
+ // Check child FPRs match what we just set, using PEEKUSR
+ peeked_fprs = peek_fprs(child);
+ FAIL_IF(!peeked_fprs);
+ FAIL_IF(validate_fpr(peeked_fprs, tmp));
+
+ // Write child FPRs using POKEUSR
+ FAIL_IF(poke_fprs(child, (unsigned long *)peeked_fprs));
+
+ // Child will check its FPRs match before exiting
FAIL_IF(stop_trace(child));
return TEST_PASS;
}
+#ifndef __LONG_WIDTH__
+#define __LONG_WIDTH__ (sizeof(long) * 8)
+#endif
+
+static uint64_t rand_reg(void)
+{
+ uint64_t result;
+ long r;
+
+ r = random();
+
+ // Small values are typical
+ result = r & 0xffff;
+ if (r & 0x10000)
+ return result;
+
+ // Pointers tend to have high bits set
+ result |= random() << (__LONG_WIDTH__ - 31);
+ if (r & 0x100000)
+ return result;
+
+ // And sometimes we want a full 64-bit value
+ result ^= random() << 16;
+
+ return result;
+}
+
int ptrace_gpr(void)
{
- pid_t pid;
+ unsigned long seed;
int ret, status;
+ pid_t pid;
+
+ seed = getpid() ^ time(NULL);
+ printf("srand(%lu)\n", seed);
+ srand(seed);
+
+ child_gpr_val = rand_reg();
+ child_fpr_val = rand_reg();
+ parent_gpr_val = rand_reg();
+ parent_fpr_val = rand_reg();
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
@@ -81,7 +136,7 @@ int ptrace_gpr(void)
return TEST_FAIL;
}
if (pid == 0)
- gpr();
+ exit(child());
if (pid) {
pptr = (int *)shmat(shm_id, NULL, 0);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
index c5cd53181e2e..a5470b88bd08 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-gpr.h
@@ -12,10 +12,10 @@
#define FPR_3 0.003
#define FPR_4 0.004
-#define FPR_1_REP 0x3f50624de0000000
-#define FPR_2_REP 0x3f60624de0000000
-#define FPR_3_REP 0x3f689374c0000000
-#define FPR_4_REP 0x3f70624de0000000
+#define FPR_1_REP 0x3f50624dd2f1a9fcull
+#define FPR_2_REP 0x3f60624dd2f1a9fcull
+#define FPR_3_REP 0x3f689374bc6a7efaull
+#define FPR_4_REP 0x3f70624dd2f1a9fcull
/* Buffer must have 18 elements */
int validate_gpr(unsigned long *gpr, unsigned long val)
@@ -36,13 +36,13 @@ int validate_gpr(unsigned long *gpr, unsigned long val)
}
/* Buffer must have 32 elements */
-int validate_fpr(unsigned long *fpr, unsigned long val)
+int validate_fpr(__u64 *fpr, __u64 val)
{
int i, found = 1;
for (i = 0; i < 32; i++) {
if (fpr[i] != val) {
- printf("FPR[%d]: %lx Expected: %lx\n", i, fpr[i], val);
+ printf("FPR[%d]: %llx Expected: %llx\n", i, fpr[i], val);
found = 0;
}
}
@@ -53,7 +53,7 @@ int validate_fpr(unsigned long *fpr, unsigned long val)
}
/* Buffer must have 32 elements */
-int validate_fpr_float(float *fpr, float val)
+int validate_fpr_double(double *fpr, double val)
{
int i, found = 1;
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
index fc477dfe86a2..75d30d61ab0e 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-hwbreak.c
@@ -20,7 +20,10 @@
#include <signal.h>
#include <sys/types.h>
#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <linux/limits.h>
#include "ptrace.h"
+#include "reg.h"
#define SPRN_PVR 0x11F
#define PVR_8xx 0x00500000
@@ -44,6 +47,7 @@ struct gstruct {
};
static volatile struct gstruct gstruct __attribute__((aligned(512)));
+static volatile char cwd[PATH_MAX] __attribute__((aligned(8)));
static void get_dbginfo(pid_t child_pid, struct ppc_debug_info *dbginfo)
{
@@ -60,26 +64,26 @@ static bool dawr_present(struct ppc_debug_info *dbginfo)
static void write_var(int len)
{
- __u8 *pcvar;
- __u16 *psvar;
- __u32 *pivar;
- __u64 *plvar;
+ volatile __u8 *pcvar;
+ volatile __u16 *psvar;
+ volatile __u32 *pivar;
+ volatile __u64 *plvar;
switch (len) {
case 1:
- pcvar = (__u8 *)&glvar;
+ pcvar = (volatile __u8 *)&glvar;
*pcvar = 0xff;
break;
case 2:
- psvar = (__u16 *)&glvar;
+ psvar = (volatile __u16 *)&glvar;
*psvar = 0xffff;
break;
case 4:
- pivar = (__u32 *)&glvar;
+ pivar = (volatile __u32 *)&glvar;
*pivar = 0xffffffff;
break;
case 8:
- plvar = (__u64 *)&glvar;
+ plvar = (volatile __u64 *)&glvar;
*plvar = 0xffffffffffffffffLL;
break;
}
@@ -94,16 +98,16 @@ static void read_var(int len)
switch (len) {
case 1:
- cvar = (__u8)glvar;
+ cvar = (volatile __u8)glvar;
break;
case 2:
- svar = (__u16)glvar;
+ svar = (volatile __u16)glvar;
break;
case 4:
- ivar = (__u32)glvar;
+ ivar = (volatile __u32)glvar;
break;
case 8:
- lvar = (__u64)glvar;
+ lvar = (volatile __u64)glvar;
break;
}
}
@@ -138,6 +142,9 @@ static void test_workload(void)
write_var(len);
}
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO test */
write_var(1);
@@ -150,6 +157,9 @@ static void test_workload(void)
else
read_var(1);
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ syscall(__NR_getcwd, &cwd, PATH_MAX);
+
/* PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO test */
gstruct.a[rand() % A_LEN] = 'a';
@@ -185,6 +195,18 @@ static void test_workload(void)
big_var[rand() % DAWR_MAX_LEN] = 'a';
else
cvar = big_var[rand() % DAWR_MAX_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+ gstruct.a[rand() % A_LEN] = 'a';
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+ cvar = gstruct.b[rand() % B_LEN];
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+ gstruct.a[rand() % A_LEN] = 'a';
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+ cvar = gstruct.a[rand() % A_LEN];
}
static void check_success(pid_t child_pid, const char *name, const char *type,
@@ -293,6 +315,24 @@ static int test_set_debugreg(pid_t child_pid)
return 0;
}
+static int test_set_debugreg_kernel_userspace(pid_t child_pid)
+{
+ unsigned long wp_addr = (unsigned long)cwd;
+ char *name = "PTRACE_SET_DEBUGREG";
+
+ /* PTRACE_SET_DEBUGREG, Kernel Access Userspace test */
+ wp_addr &= ~0x7UL;
+ wp_addr |= (1Ul << DABR_READ_SHIFT);
+ wp_addr |= (1UL << DABR_WRITE_SHIFT);
+ wp_addr |= (1UL << DABR_TRANSLATION_SHIFT);
+ ptrace_set_debugreg(child_pid, wp_addr);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, 8);
+
+ ptrace_set_debugreg(child_pid, 0);
+ return 0;
+}
+
static void get_ppc_hw_breakpoint(struct ppc_hw_breakpoint *info, int type,
unsigned long addr, int len)
{
@@ -338,6 +378,22 @@ static void test_sethwdebug_exact(pid_t child_pid)
ptrace_delhwdebug(child_pid, wh);
}
+static void test_sethwdebug_exact_kernel_userspace(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info;
+ unsigned long wp_addr = (unsigned long)&cwd;
+ char *name = "PPC_PTRACE_SETHWDEBUG, MODE_EXACT";
+ int len = 1; /* hardcoded in kernel */
+ int wh;
+
+ /* PPC_PTRACE_SETHWDEBUG, MODE_EXACT, Kernel Access Userspace test */
+ get_ppc_hw_breakpoint(&info, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr, 0);
+ wh = ptrace_sethwdebug(child_pid, &info);
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "Kernel Access Userspace", wp_addr, len);
+ ptrace_delhwdebug(child_pid, wh);
+}
+
static void test_sethwdebug_range_aligned(pid_t child_pid)
{
struct ppc_hw_breakpoint info;
@@ -374,6 +430,69 @@ static void test_sethwdebug_range_aligned(pid_t child_pid)
ptrace_delhwdebug(child_pid, wh);
}
+static void test_multi_sethwdebug_range(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info1, info2;
+ unsigned long wp_addr1, wp_addr2;
+ char *name1 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED";
+ char *name2 = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED";
+ int len1, len2;
+ int wh1, wh2;
+
+ wp_addr1 = (unsigned long)&gstruct.a;
+ wp_addr2 = (unsigned long)&gstruct.b;
+ len1 = A_LEN;
+ len2 = B_LEN;
+ get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+ get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW ALIGNED, WO test */
+ wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DW UNALIGNED, RO test */
+ wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name1, "WO", wp_addr1, len1);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name2, "RO", wp_addr2, len2);
+
+ ptrace_delhwdebug(child_pid, wh1);
+ ptrace_delhwdebug(child_pid, wh2);
+}
+
+static void test_multi_sethwdebug_range_dawr_overlap(pid_t child_pid)
+{
+ struct ppc_hw_breakpoint info1, info2;
+ unsigned long wp_addr1, wp_addr2;
+ char *name = "PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap";
+ int len1, len2;
+ int wh1, wh2;
+
+ wp_addr1 = (unsigned long)&gstruct.a;
+ wp_addr2 = (unsigned long)&gstruct.a;
+ len1 = A_LEN;
+ len2 = A_LEN;
+ get_ppc_hw_breakpoint(&info1, PPC_BREAKPOINT_TRIGGER_WRITE, wp_addr1, len1);
+ get_ppc_hw_breakpoint(&info2, PPC_BREAKPOINT_TRIGGER_READ, wp_addr2, len2);
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, WO test */
+ wh1 = ptrace_sethwdebug(child_pid, &info1);
+
+ /* PPC_PTRACE_SETHWDEBUG 2, MODE_RANGE, DAWR Overlap, RO test */
+ wh2 = ptrace_sethwdebug(child_pid, &info2);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "WO", wp_addr1, len1);
+
+ ptrace(PTRACE_CONT, child_pid, NULL, 0);
+ check_success(child_pid, name, "RO", wp_addr2, len2);
+
+ ptrace_delhwdebug(child_pid, wh1);
+ ptrace_delhwdebug(child_pid, wh2);
+}
+
static void test_sethwdebug_range_unaligned(pid_t child_pid)
{
struct ppc_hw_breakpoint info;
@@ -452,14 +571,19 @@ static void
run_tests(pid_t child_pid, struct ppc_debug_info *dbginfo, bool dawr)
{
test_set_debugreg(child_pid);
+ test_set_debugreg_kernel_userspace(child_pid);
+ test_sethwdebug_exact(child_pid);
+ test_sethwdebug_exact_kernel_userspace(child_pid);
if (dbginfo->features & PPC_DEBUG_FEATURE_DATA_BP_RANGE) {
- test_sethwdebug_exact(child_pid);
-
test_sethwdebug_range_aligned(child_pid);
if (dawr || is_8xx) {
test_sethwdebug_range_unaligned(child_pid);
test_sethwdebug_range_unaligned_dar(child_pid);
test_sethwdebug_dawr_max_range(child_pid);
+ if (dbginfo->num_data_bps > 1) {
+ test_multi_sethwdebug_range(child_pid);
+ test_multi_sethwdebug_range_dawr_overlap(child_pid);
+ }
}
}
}
@@ -479,7 +603,7 @@ static int ptrace_hwbreak(void)
wait(NULL);
get_dbginfo(child_pid, &dbginfo);
- SKIP_IF(dbginfo.num_data_bps == 0);
+ SKIP_IF_MSG(dbginfo.num_data_bps == 0, "No data breakpoints present");
dawr = dawr_present(&dbginfo);
run_tests(child_pid, &dbginfo, dawr);
@@ -497,10 +621,7 @@ static int ptrace_hwbreak(void)
int main(int argc, char **argv, char **envp)
{
- int pvr = 0;
- asm __volatile__ ("mfspr %0,%1" : "=r"(pvr) : "i"(SPRN_PVR));
- if (pvr == PVR_8xx)
- is_8xx = true;
+ is_8xx = mfspr(SPRN_PVR) == PVR_8xx;
return test_harness(ptrace_hwbreak, "ptrace-hwbreak");
}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
new file mode 100644
index 000000000000..9aa2e58f3189
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-asm.S
@@ -0,0 +1,33 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#include <ppc-asm.h>
+
+.global same_watch_addr_load
+.global same_watch_addr_trap
+
+FUNC_START(same_watch_addr_child)
+ nop
+same_watch_addr_load:
+ ld 0,0(3)
+ nop
+same_watch_addr_trap:
+ trap
+ blr
+FUNC_END(same_watch_addr_child)
+
+
+.global perf_then_ptrace_load1
+.global perf_then_ptrace_load2
+.global perf_then_ptrace_trap
+
+FUNC_START(perf_then_ptrace_child)
+ nop
+perf_then_ptrace_load1:
+ ld 0,0(3)
+perf_then_ptrace_load2:
+ ld 0,0(4)
+ nop
+perf_then_ptrace_trap:
+ trap
+ blr
+FUNC_END(perf_then_ptrace_child)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
new file mode 100644
index 000000000000..a0a0b9bb5854
--- /dev/null
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-perf-hwbreak.c
@@ -0,0 +1,445 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <asm/unistd.h>
+#include <linux/hw_breakpoint.h>
+#include <linux/ptrace.h>
+#include <memory.h>
+#include <stdlib.h>
+#include <sys/wait.h>
+
+#include "utils.h"
+
+/*
+ * Child subroutine that performs a load on the address, then traps
+ */
+void same_watch_addr_child(unsigned long *addr);
+
+/* Address of the ld instruction in same_watch_addr_child() */
+extern char same_watch_addr_load[];
+
+/* Address of the end trap instruction in same_watch_addr_child() */
+extern char same_watch_addr_trap[];
+
+/*
+ * Child subroutine that performs a load on the first address, then a load on
+ * the second address (with no instructions separating this from the first
+ * load), then traps.
+ */
+void perf_then_ptrace_child(unsigned long *first_addr, unsigned long *second_addr);
+
+/* Address of the first ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load1[];
+
+/* Address of the second ld instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_load2[];
+
+/* Address of the end trap instruction in perf_then_ptrace_child() */
+extern char perf_then_ptrace_trap[];
+
+static inline long sys_ptrace(long request, pid_t pid, unsigned long addr, unsigned long data)
+{
+ return syscall(__NR_ptrace, request, pid, addr, data);
+}
+
+static long ptrace_traceme(void)
+{
+ return sys_ptrace(PTRACE_TRACEME, 0, 0, 0);
+}
+
+static long ptrace_getregs(pid_t pid, struct pt_regs *result)
+{
+ return sys_ptrace(PTRACE_GETREGS, pid, 0, (unsigned long)result);
+}
+
+static long ptrace_setregs(pid_t pid, struct pt_regs *result)
+{
+ return sys_ptrace(PTRACE_SETREGS, pid, 0, (unsigned long)result);
+}
+
+static long ptrace_cont(pid_t pid, long signal)
+{
+ return sys_ptrace(PTRACE_CONT, pid, 0, signal);
+}
+
+static long ptrace_singlestep(pid_t pid, long signal)
+{
+ return sys_ptrace(PTRACE_SINGLESTEP, pid, 0, signal);
+}
+
+static long ppc_ptrace_gethwdbginfo(pid_t pid, struct ppc_debug_info *dbginfo)
+{
+ return sys_ptrace(PPC_PTRACE_GETHWDBGINFO, pid, 0, (unsigned long)dbginfo);
+}
+
+static long ppc_ptrace_sethwdbg(pid_t pid, struct ppc_hw_breakpoint *bp_info)
+{
+ return sys_ptrace(PPC_PTRACE_SETHWDEBUG, pid, 0, (unsigned long)bp_info);
+}
+
+static long ppc_ptrace_delhwdbg(pid_t pid, int bp_id)
+{
+ return sys_ptrace(PPC_PTRACE_DELHWDEBUG, pid, 0L, bp_id);
+}
+
+static long ptrace_getreg_pc(pid_t pid, void **pc)
+{
+ struct pt_regs regs;
+ long err;
+
+ err = ptrace_getregs(pid, &regs);
+ if (err)
+ return err;
+
+ *pc = (void *)regs.nip;
+
+ return 0;
+}
+
+static long ptrace_setreg_pc(pid_t pid, void *pc)
+{
+ struct pt_regs regs;
+ long err;
+
+ err = ptrace_getregs(pid, &regs);
+ if (err)
+ return err;
+
+ regs.nip = (unsigned long)pc;
+
+ err = ptrace_setregs(pid, &regs);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static int perf_event_open(struct perf_event_attr *attr, pid_t pid, int cpu,
+ int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags);
+}
+
+static void perf_user_event_attr_set(struct perf_event_attr *attr, void *addr, u64 len)
+{
+ memset(attr, 0, sizeof(struct perf_event_attr));
+
+ attr->type = PERF_TYPE_BREAKPOINT;
+ attr->size = sizeof(struct perf_event_attr);
+ attr->bp_type = HW_BREAKPOINT_R;
+ attr->bp_addr = (u64)addr;
+ attr->bp_len = len;
+ attr->exclude_kernel = 1;
+ attr->exclude_hv = 1;
+}
+
+static int perf_watchpoint_open(pid_t child_pid, void *addr, u64 len)
+{
+ struct perf_event_attr attr;
+
+ perf_user_event_attr_set(&attr, addr, len);
+ return perf_event_open(&attr, child_pid, -1, -1, 0);
+}
+
+static int perf_read_counter(int perf_fd, u64 *count)
+{
+ /*
+ * A perf counter is retrieved by the read() syscall. It contains
+ * the current count as 8 bytes that are interpreted as a u64
+ */
+ ssize_t len = read(perf_fd, count, sizeof(*count));
+
+ if (len != sizeof(*count))
+ return -1;
+
+ return 0;
+}
+
+static void ppc_ptrace_init_breakpoint(struct ppc_hw_breakpoint *info,
+ int type, void *addr, int len)
+{
+ info->version = 1;
+ info->trigger_type = type;
+ info->condition_mode = PPC_BREAKPOINT_CONDITION_NONE;
+ info->addr = (u64)addr;
+ info->addr2 = (u64)addr + len;
+ info->condition_value = 0;
+ if (!len)
+ info->addr_mode = PPC_BREAKPOINT_MODE_EXACT;
+ else
+ info->addr_mode = PPC_BREAKPOINT_MODE_RANGE_INCLUSIVE;
+}
+
+/*
+ * Checks if we can place at least 2 watchpoints on the child process
+ */
+static int check_watchpoints(pid_t pid)
+{
+ struct ppc_debug_info dbginfo;
+
+ FAIL_IF_MSG(ppc_ptrace_gethwdbginfo(pid, &dbginfo), "PPC_PTRACE_GETHWDBGINFO failed");
+ SKIP_IF_MSG(dbginfo.num_data_bps <= 1, "Not enough data watchpoints (need at least 2)");
+
+ return 0;
+}
+
+/*
+ * Wrapper around a plain fork() call that sets up the child for
+ * ptrace-ing. Both the parent and child return from this, though
+ * the child is stopped until ptrace_cont(pid) is run by the parent.
+ */
+static int ptrace_fork_child(pid_t *pid)
+{
+ int status;
+
+ *pid = fork();
+
+ if (*pid < 0)
+ FAIL_IF_MSG(1, "Failed to fork child");
+
+ if (!*pid) {
+ FAIL_IF_EXIT_MSG(ptrace_traceme(), "PTRACE_TRACEME failed");
+ FAIL_IF_EXIT_MSG(raise(SIGSTOP), "Child failed to raise SIGSTOP");
+ } else {
+ /* Synchronise on child SIGSTOP */
+ FAIL_IF_MSG(waitpid(*pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ }
+
+ return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf watching the same data.
+ *
+ * We expect ptrace to take 'priority', as it is has before-execute
+ * semantics.
+ *
+ * The perf counter should not be incremented yet because perf has after-execute
+ * semantics. E.g., if ptrace changes the child PC, we don't even execute the
+ * instruction at all.
+ *
+ * When the child is stopped for ptrace, we test both continue and single step.
+ * Both should increment the perf counter. We also test changing the PC somewhere
+ * different and stepping, which should not increment the perf counter.
+ */
+int same_watch_addr_test(void)
+{
+ struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */
+ int bp_id; /* Breakpoint handle of ptrace watchpoint */
+ int perf_fd; /* File descriptor of perf performance counter */
+ u64 perf_count; /* Most recently fetched perf performance counter value */
+ pid_t pid; /* PID of child process */
+ void *pc; /* Most recently fetched child PC value */
+ int status; /* Stop status of child after waitpid */
+ unsigned long value; /* Dummy value to be read/written to by child */
+ int err;
+
+ err = ptrace_fork_child(&pid);
+ if (err)
+ return err;
+
+ if (!pid) {
+ same_watch_addr_child(&value);
+ exit(1);
+ }
+
+ err = check_watchpoints(pid);
+ if (err)
+ return err;
+
+ /* Place a perf watchpoint counter on value */
+ perf_fd = perf_watchpoint_open(pid, &value, sizeof(value));
+ FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+ /* Place a ptrace watchpoint on value */
+ ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ, &value, sizeof(value));
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+ /* Let the child run. It should stop on the ptrace watchpoint */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+
+ /*
+ * We stopped before executing the load, so perf should not have
+ * recorded any events yet
+ */
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 0, "perf recorded unexpected event");
+
+ /* Single stepping over the load should increment the perf counter */
+ FAIL_IF_MSG(ptrace_singlestep(pid, 0), "Failed to single step child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load + 4, "Failed to single step load instruction");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+ /*
+ * Set up a ptrace watchpoint on the value again and trigger it.
+ * The perf counter should not have incremented because we do not
+ * execute the load yet.
+ */
+ FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter should not have changed");
+
+ /* Continuing over the load should increment the perf counter */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 2, "perf counter did not increment");
+
+ /*
+ * If we set the child PC back to the load instruction, then continue,
+ * we should reach the end trap (because ptrace is one-shot) and have
+ * another perf event.
+ */
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter did not increment");
+
+ /*
+ * If we set the child PC back to the load instruction, set a ptrace
+ * watchpoint on the load, then continue, we should immediately get
+ * the ptrace trap without incrementing the perf counter
+ */
+ FAIL_IF_MSG(ppc_ptrace_delhwdbg(pid, bp_id), "Failed to remove old ptrace watchpoint");
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_load, "Child did not stop on load instruction");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+ /*
+ * If we change the PC while stopped on the load instruction, we should
+ * not increment the perf counter (because ptrace is before-execute,
+ * perf is after-execute).
+ */
+ FAIL_IF_MSG(ptrace_setreg_pc(pid, same_watch_addr_load + 4), "Failed to set child PC");
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != same_watch_addr_trap, "Child did not stop on end trap");
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 3, "perf counter should not have changed");
+
+ /* Clean up child */
+ FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+ return 0;
+}
+
+/*
+ * Tests the interaction between ptrace and perf when:
+ * 1. perf watches a value
+ * 2. ptrace watches a different value
+ * 3. The perf value is read, then the ptrace value is read immediately after
+ *
+ * A breakpoint implementation may accidentally misattribute/skip one of
+ * the ptrace or perf handlers, as interrupt based work is done after perf
+ * and before ptrace.
+ *
+ * We expect the perf counter to increment before the ptrace watchpoint
+ * triggers.
+ */
+int perf_then_ptrace_test(void)
+{
+ struct ppc_hw_breakpoint bp_info; /* ptrace breakpoint info */
+ int bp_id; /* Breakpoint handle of ptrace watchpoint */
+ int perf_fd; /* File descriptor of perf performance counter */
+ u64 perf_count; /* Most recently fetched perf performance counter value */
+ pid_t pid; /* PID of child process */
+ void *pc; /* Most recently fetched child PC value */
+ int status; /* Stop status of child after waitpid */
+ unsigned long perf_value; /* Dummy value to be watched by perf */
+ unsigned long ptrace_value; /* Dummy value to be watched by ptrace */
+ int err;
+
+ err = ptrace_fork_child(&pid);
+ if (err)
+ return err;
+
+ /*
+ * If we are the child, run a subroutine that reads the perf value,
+ * then reads the ptrace value with consecutive load instructions
+ */
+ if (!pid) {
+ perf_then_ptrace_child(&perf_value, &ptrace_value);
+ exit(0);
+ }
+
+ err = check_watchpoints(pid);
+ if (err)
+ return err;
+
+ /* Place a perf watchpoint counter */
+ perf_fd = perf_watchpoint_open(pid, &perf_value, sizeof(perf_value));
+ FAIL_IF_MSG(perf_fd < 0, "Failed to open perf performance counter");
+
+ /* Place a ptrace watchpoint */
+ ppc_ptrace_init_breakpoint(&bp_info, PPC_BREAKPOINT_TRIGGER_READ,
+ &ptrace_value, sizeof(ptrace_value));
+ bp_id = ppc_ptrace_sethwdbg(pid, &bp_info);
+ FAIL_IF_MSG(bp_id < 0, "Failed to set ptrace watchpoint");
+
+ /* Let the child run. It should stop on the ptrace watchpoint */
+ FAIL_IF_MSG(ptrace_cont(pid, 0), "Failed to continue child");
+
+ FAIL_IF_MSG(waitpid(pid, &status, 0) == -1, "Failed to wait for child");
+ FAIL_IF_MSG(!WIFSTOPPED(status), "Child is not stopped");
+ FAIL_IF_MSG(ptrace_getreg_pc(pid, &pc), "Failed to get child PC");
+ FAIL_IF_MSG(pc != perf_then_ptrace_load2, "Child did not stop on ptrace load");
+
+ /* perf should have recorded the first load */
+ FAIL_IF_MSG(perf_read_counter(perf_fd, &perf_count), "Failed to read perf counter");
+ FAIL_IF_MSG(perf_count != 1, "perf counter did not increment");
+
+ /* Clean up child */
+ FAIL_IF_MSG(kill(pid, SIGKILL) != 0, "Failed to kill child");
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int err = 0;
+
+ err |= test_harness(same_watch_addr_test, "same_watch_addr");
+ err |= test_harness(perf_then_ptrace_test, "perf_then_ptrace");
+
+ return err;
+}
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index bdbbbe8431e0..d89474377f11 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -44,7 +44,7 @@ struct shared_info {
unsigned long amr2;
/* AMR value that ptrace should refuse to write to the child. */
- unsigned long amr3;
+ unsigned long invalid_amr;
/* IAMR value the parent expects to read from the child. */
unsigned long expected_iamr;
@@ -57,8 +57,8 @@ struct shared_info {
* (even though they're valid ones) because userspace doesn't have
* access to those registers.
*/
- unsigned long new_iamr;
- unsigned long new_uamor;
+ unsigned long invalid_iamr;
+ unsigned long invalid_uamor;
};
static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
@@ -66,11 +66,6 @@ static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
return syscall(__NR_pkey_alloc, flags, init_access_rights);
}
-static int sys_pkey_free(int pkey)
-{
- return syscall(__NR_pkey_free, pkey);
-}
-
static int child(struct shared_info *info)
{
unsigned long reg;
@@ -100,33 +95,37 @@ static int child(struct shared_info *info)
info->amr1 |= 3ul << pkeyshift(pkey1);
info->amr2 |= 3ul << pkeyshift(pkey2);
- info->amr3 |= info->amr2 | 3ul << pkeyshift(pkey3);
+ /*
+ * invalid amr value where we try to force write
+ * things which are deined by a uamor setting.
+ */
+ info->invalid_amr = info->amr2 | (~0x0UL & ~info->expected_uamor);
+ /*
+ * if PKEY_DISABLE_EXECUTE succeeded we should update the expected_iamr
+ */
if (disable_execute)
info->expected_iamr |= 1ul << pkeyshift(pkey1);
else
info->expected_iamr &= ~(1ul << pkeyshift(pkey1));
- info->expected_iamr &= ~(1ul << pkeyshift(pkey2) | 1ul << pkeyshift(pkey3));
-
- info->expected_uamor |= 3ul << pkeyshift(pkey1) |
- 3ul << pkeyshift(pkey2);
- info->new_iamr |= 1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2);
- info->new_uamor |= 3ul << pkeyshift(pkey1);
+ /*
+ * We allocated pkey2 and pkey 3 above. Clear the IAMR bits.
+ */
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey2));
+ info->expected_iamr &= ~(1ul << pkeyshift(pkey3));
/*
- * We won't use pkey3. We just want a plausible but invalid key to test
- * whether ptrace will let us write to AMR bits we are not supposed to.
- *
- * This also tests whether the kernel restores the UAMOR permissions
- * after a key is freed.
+ * Create an IAMR value different from expected value.
+ * Kernel will reject an IAMR and UAMOR change.
*/
- sys_pkey_free(pkey3);
+ info->invalid_iamr = info->expected_iamr | (1ul << pkeyshift(pkey1) | 1ul << pkeyshift(pkey2));
+ info->invalid_uamor = info->expected_uamor & ~(0x3ul << pkeyshift(pkey1));
printf("%-30s AMR: %016lx pkey1: %d pkey2: %d pkey3: %d\n",
user_write, info->amr1, pkey1, pkey2, pkey3);
- mtspr(SPRN_AMR, info->amr1);
+ set_amr(info->amr1);
/* Wait for parent to read our AMR value and write a new one. */
ret = prod_parent(&info->child_sync);
@@ -193,12 +192,12 @@ static int parent(struct shared_info *info, pid_t pid)
* to the child.
*/
ret = ptrace_read_regs(pid, NT_PPC_PKEY, regs, 3);
- PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync);
+ PARENT_SKIP_IF_UNSUPPORTED(ret, &info->child_sync, "PKEYs not supported");
PARENT_FAIL_IF(ret, &info->child_sync);
- info->amr1 = info->amr2 = info->amr3 = regs[0];
- info->expected_iamr = info->new_iamr = regs[1];
- info->expected_uamor = info->new_uamor = regs[2];
+ info->amr1 = info->amr2 = regs[0];
+ info->expected_iamr = regs[1];
+ info->expected_uamor = regs[2];
/* Wake up child so that it can set itself up. */
ret = prod_child(&info->child_sync);
@@ -234,10 +233,10 @@ static int parent(struct shared_info *info, pid_t pid)
return ret;
/* Write invalid AMR value in child. */
- ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->amr3, 1);
+ ret = ptrace_write_regs(pid, NT_PPC_PKEY, &info->invalid_amr, 1);
PARENT_FAIL_IF(ret, &info->child_sync);
- printf("%-30s AMR: %016lx\n", ptrace_write_running, info->amr3);
+ printf("%-30s AMR: %016lx\n", ptrace_write_running, info->invalid_amr);
/* Wake up child so that it can verify it didn't change. */
ret = prod_child(&info->child_sync);
@@ -249,7 +248,7 @@ static int parent(struct shared_info *info, pid_t pid)
/* Try to write to IAMR. */
regs[0] = info->amr1;
- regs[1] = info->new_iamr;
+ regs[1] = info->invalid_iamr;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 2);
PARENT_FAIL_IF(!ret, &info->child_sync);
@@ -257,7 +256,7 @@ static int parent(struct shared_info *info, pid_t pid)
ptrace_write_running, regs[0], regs[1]);
/* Try to write to IAMR and UAMOR. */
- regs[2] = info->new_uamor;
+ regs[2] = info->invalid_uamor;
ret = ptrace_write_regs(pid, NT_PPC_PKEY, regs, 3);
PARENT_FAIL_IF(!ret, &info->child_sync);
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
index 58cb1a860cc9..14726c77a6ce 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tar.c
@@ -78,6 +78,9 @@ int ptrace_tar(void)
pid_t pid;
int ret, status;
+ // TAR was added in v2.07
+ SKIP_IF_MSG(!have_hwcap2(PPC_FEATURE2_ARCH_2_07), "TAR requires ISA 2.07 compatible hardware");
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
index 82f7bdc2e5e6..7c70d62587c2 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-gpr.c
@@ -12,15 +12,15 @@
int shm_id;
unsigned long *cptr, *pptr;
-float a = FPR_1;
-float b = FPR_2;
-float c = FPR_3;
+double a = FPR_1;
+double b = FPR_2;
+double c = FPR_3;
void tm_gpr(void)
{
unsigned long gpr_buf[18];
unsigned long result, texasr;
- float fpr_buf[32];
+ double fpr_buf[32];
printf("Starting the child\n");
cptr = (unsigned long *)shmat(shm_id, NULL, 0);
@@ -29,12 +29,12 @@ trans:
cptr[1] = 0;
asm __volatile__(
ASM_LOAD_GPR_IMMED(gpr_1)
- ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ ASM_LOAD_FPR(flt_1)
"1: ;"
"tbegin.;"
"beq 2f;"
ASM_LOAD_GPR_IMMED(gpr_2)
- ASM_LOAD_FPR_SINGLE_PRECISION(flt_2)
+ ASM_LOAD_FPR(flt_2)
"tsuspend.;"
"li 7, 1;"
"stw 7, 0(%[cptr1]);"
@@ -57,7 +57,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_2] "b" (&b), [cptr1] "b" (&cptr[1])
- : "memory", "r7", "r8", "r9", "r10",
+ : "memory", "r0", "r7", "r8", "r9", "r10",
"r11", "r12", "r13", "r14", "r15", "r16",
"r17", "r18", "r19", "r20", "r21", "r22",
"r23", "r24", "r25", "r26", "r27", "r28",
@@ -70,12 +70,12 @@ trans:
shmdt((void *)cptr);
store_gpr(gpr_buf);
- store_fpr_single_precision(fpr_buf);
+ store_fpr(fpr_buf);
if (validate_gpr(gpr_buf, GPR_3))
exit(1);
- if (validate_fpr_float(fpr_buf, c))
+ if (validate_fpr_double(fpr_buf, c))
exit(1);
exit(0);
@@ -87,7 +87,7 @@ trans:
int trace_tm_gpr(pid_t child)
{
unsigned long gpr[18];
- unsigned long fpr[32];
+ __u64 fpr[32];
FAIL_IF(start_trace(child));
FAIL_IF(show_gpr(child, gpr));
@@ -112,7 +112,8 @@ int ptrace_tm_gpr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
index ad65be6e8e85..6c17ed099969 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-gpr.c
@@ -12,10 +12,10 @@
int shm_id;
int *cptr, *pptr;
-float a = FPR_1;
-float b = FPR_2;
-float c = FPR_3;
-float d = FPR_4;
+double a = FPR_1;
+double b = FPR_2;
+double c = FPR_3;
+double d = FPR_4;
__attribute__((used)) void wait_parent(void)
{
@@ -28,7 +28,7 @@ void tm_spd_gpr(void)
{
unsigned long gpr_buf[18];
unsigned long result, texasr;
- float fpr_buf[32];
+ double fpr_buf[32];
cptr = (int *)shmat(shm_id, NULL, 0);
@@ -36,7 +36,7 @@ trans:
cptr[2] = 0;
asm __volatile__(
ASM_LOAD_GPR_IMMED(gpr_1)
- ASM_LOAD_FPR_SINGLE_PRECISION(flt_1)
+ ASM_LOAD_FPR(flt_1)
"1: ;"
"tbegin.;"
@@ -45,7 +45,7 @@ trans:
ASM_LOAD_GPR_IMMED(gpr_2)
"tsuspend.;"
ASM_LOAD_GPR_IMMED(gpr_4)
- ASM_LOAD_FPR_SINGLE_PRECISION(flt_4)
+ ASM_LOAD_FPR(flt_4)
"bl wait_parent;"
"tresume.;"
@@ -65,7 +65,7 @@ trans:
: [gpr_1]"i"(GPR_1), [gpr_2]"i"(GPR_2), [gpr_4]"i"(GPR_4),
[sprn_texasr] "i" (SPRN_TEXASR), [flt_1] "b" (&a),
[flt_4] "b" (&d)
- : "memory", "r5", "r6", "r7",
+ : "memory", "r0", "r5", "r6", "r7",
"r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15",
"r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23",
"r24", "r25", "r26", "r27", "r28", "r29", "r30", "r31"
@@ -77,12 +77,12 @@ trans:
shmdt((void *)cptr);
store_gpr(gpr_buf);
- store_fpr_single_precision(fpr_buf);
+ store_fpr(fpr_buf);
if (validate_gpr(gpr_buf, GPR_3))
exit(1);
- if (validate_fpr_float(fpr_buf, c))
+ if (validate_fpr_double(fpr_buf, c))
exit(1);
exit(0);
}
@@ -93,7 +93,7 @@ trans:
int trace_tm_spd_gpr(pid_t child)
{
unsigned long gpr[18];
- unsigned long fpr[32];
+ __u64 fpr[32];
FAIL_IF(start_trace(child));
FAIL_IF(show_gpr(child, gpr));
@@ -118,7 +118,8 @@ int ptrace_tm_spd_gpr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid < 0) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
index 2ecfa1158e2b..afd8dc2e2097 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c
@@ -128,7 +128,8 @@ int ptrace_tm_spd_tar(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
index 6f7fb51f0809..14d2fac8f237 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c
@@ -128,7 +128,8 @@ int ptrace_tm_spd_vsx(void)
pid_t pid;
int ret, status, i;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 3, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
index 068bfed2e606..e64cdb04cecf 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spr.c
@@ -113,7 +113,8 @@ int ptrace_tm_spr(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(struct shared), 0777|IPC_CREAT);
shm_id1 = shmget(IPC_PRIVATE, sizeof(int), 0777|IPC_CREAT);
pid = fork();
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
index 46ef378a15ec..3963d4b0429f 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c
@@ -116,7 +116,8 @@ int ptrace_tm_tar(void)
pid_t pid;
int ret, status;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
pid = fork();
if (pid == 0)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
index 70ca01234f79..8c925d734a72 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c
@@ -112,7 +112,8 @@ int ptrace_tm_vsx(void)
pid_t pid;
int ret, status, i;
- SKIP_IF(!have_htm());
+ SKIP_IF_MSG(!have_htm(), "Don't have transactional memory");
+ SKIP_IF_MSG(htm_is_synthetic(), "Transactional memory is synthetic");
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < 128; i++) {
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
index c4fe0e893306..11bc624574fe 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-vsx.c
@@ -61,6 +61,8 @@ int ptrace_vsx(void)
pid_t pid;
int ret, status, i;
+ SKIP_IF_MSG(!have_hwcap(PPC_FEATURE_HAS_VSX), "Don't have VSX");
+
shm_id = shmget(IPC_PRIVATE, sizeof(int) * 2, 0777|IPC_CREAT);
for (i = 0; i < VEC_MAX; i++)
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace.h b/tools/testing/selftests/powerpc/ptrace/ptrace.h
index 5181ad9b4b6c..04788e5fc504 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace.h
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace.h
@@ -4,6 +4,9 @@
*
* Copyright (C) 2015 Anshuman Khandual, IBM Corporation.
*/
+
+#define __SANE_USERSPACE_TYPES__
+
#include <inttypes.h>
#include <unistd.h>
#include <stdlib.h>
@@ -20,6 +23,7 @@
#include <sys/ipc.h>
#include <sys/shm.h>
#include <sys/user.h>
+#include <sys/syscall.h>
#include <linux/elf.h>
#include <linux/types.h>
#include <linux/auxvec.h>
@@ -30,8 +34,8 @@
#define TEST_FAIL 1
struct fpr_regs {
- unsigned long fpr[32];
- unsigned long fpscr;
+ __u64 fpr[32];
+ __u64 fpscr;
};
struct tm_spr_regs {
@@ -318,7 +322,7 @@ fail:
}
/* FPR */
-int show_fpr(pid_t child, unsigned long *fpr)
+int show_fpr(pid_t child, __u64 *fpr)
{
struct fpr_regs *regs;
int ret, i;
@@ -337,7 +341,7 @@ int show_fpr(pid_t child, unsigned long *fpr)
return TEST_PASS;
}
-int write_fpr(pid_t child, unsigned long val)
+int write_fpr(pid_t child, __u64 val)
{
struct fpr_regs *regs;
int ret, i;
@@ -360,7 +364,7 @@ int write_fpr(pid_t child, unsigned long val)
return TEST_PASS;
}
-int show_ckpt_fpr(pid_t child, unsigned long *fpr)
+int show_ckpt_fpr(pid_t child, __u64 *fpr)
{
struct fpr_regs *regs;
struct iovec iov;
@@ -437,6 +441,70 @@ int show_gpr(pid_t child, unsigned long *gpr)
return TEST_PASS;
}
+long sys_ptrace(enum __ptrace_request request, pid_t pid, unsigned long addr, unsigned long data)
+{
+ return syscall(__NR_ptrace, request, pid, (void *)addr, data);
+}
+
+// 33 because of FPSCR
+#define PT_NUM_FPRS (33 * (sizeof(__u64) / sizeof(unsigned long)))
+
+__u64 *peek_fprs(pid_t child)
+{
+ unsigned long *fprs, *p, addr;
+ long ret;
+ int i;
+
+ fprs = malloc(sizeof(unsigned long) * PT_NUM_FPRS);
+ if (!fprs) {
+ perror("malloc() failed");
+ return NULL;
+ }
+
+ for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) {
+ addr = sizeof(unsigned long) * (PT_FPR0 + i);
+ ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)p);
+ if (ret) {
+ perror("ptrace(PTRACE_PEEKUSR) failed");
+ return NULL;
+ }
+ }
+
+ addr = sizeof(unsigned long) * (PT_FPR0 + i);
+ ret = sys_ptrace(PTRACE_PEEKUSER, child, addr, (unsigned long)&addr);
+ if (!ret) {
+ printf("ptrace(PTRACE_PEEKUSR) succeeded unexpectedly!\n");
+ return NULL;
+ }
+
+ return (__u64 *)fprs;
+}
+
+int poke_fprs(pid_t child, unsigned long *fprs)
+{
+ unsigned long *p, addr;
+ long ret;
+ int i;
+
+ for (i = 0, p = fprs; i < PT_NUM_FPRS; i++, p++) {
+ addr = sizeof(unsigned long) * (PT_FPR0 + i);
+ ret = sys_ptrace(PTRACE_POKEUSER, child, addr, *p);
+ if (ret) {
+ perror("ptrace(PTRACE_POKEUSR) failed");
+ return -1;
+ }
+ }
+
+ addr = sizeof(unsigned long) * (PT_FPR0 + i);
+ ret = sys_ptrace(PTRACE_POKEUSER, child, addr, addr);
+ if (!ret) {
+ printf("ptrace(PTRACE_POKEUSR) succeeded unexpectedly!\n");
+ return -1;
+ }
+
+ return 0;
+}
+
int write_gpr(pid_t child, unsigned long val)
{
struct pt_regs *regs;
@@ -677,10 +745,7 @@ int show_tm_spr(pid_t child, struct tm_spr_regs *out)
/* Analyse TEXASR after TM failure */
inline unsigned long get_tfiar(void)
{
- unsigned long ret;
-
- asm volatile("mfspr %0,%1" : "=r" (ret) : "i" (SPRN_TFIAR));
- return ret;
+ return mfspr(SPRN_TFIAR);
}
void analyse_texasr(unsigned long texasr)
@@ -742,4 +807,3 @@ void analyse_texasr(unsigned long texasr)
}
void store_gpr(unsigned long *addr);
-void store_fpr(float *addr);
diff --git a/tools/testing/selftests/powerpc/scripts/hmi.sh b/tools/testing/selftests/powerpc/scripts/hmi.sh
index dcdb392e8427..bcc7b6b65009 100755
--- a/tools/testing/selftests/powerpc/scripts/hmi.sh
+++ b/tools/testing/selftests/powerpc/scripts/hmi.sh
@@ -36,7 +36,7 @@ trap "ppc64_cpu --smt-snooze-delay=100" 0 1
# for each chip+core combination
# todo - less fragile parsing
-egrep -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
+grep -E -o 'OCC: Chip [0-9a-f]+ Core [0-9a-f]' < /sys/firmware/opal/msglog |
while read chipcore; do
chip=$(echo "$chipcore"|awk '{print $3}')
core=$(echo "$chipcore"|awk '{print $5}')
diff --git a/tools/testing/selftests/powerpc/security/.gitignore b/tools/testing/selftests/powerpc/security/.gitignore
index f795e06f5ae3..9357b186b13c 100644
--- a/tools/testing/selftests/powerpc/security/.gitignore
+++ b/tools/testing/selftests/powerpc/security/.gitignore
@@ -1,2 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
rfi_flush
+entry_flush
+spectre_v2
+uaccess_flush
diff --git a/tools/testing/selftests/powerpc/security/Makefile b/tools/testing/selftests/powerpc/security/Makefile
index eadbbff50be6..e0d979ab0204 100644
--- a/tools/testing/selftests/powerpc/security/Makefile
+++ b/tools/testing/selftests/powerpc/security/Makefile
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0+
-TEST_GEN_PROGS := rfi_flush spectre_v2
+TEST_GEN_PROGS := rfi_flush entry_flush uaccess_flush spectre_v2
+TEST_PROGS := mitigation-patching.sh
+
top_srcdir = ../../../../..
-CFLAGS += -I../../../../../usr/include
+CFLAGS += $(KHDR_INCLUDES)
include ../../lib.mk
@@ -11,3 +13,6 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
$(OUTPUT)/spectre_v2: CFLAGS += -m64
$(OUTPUT)/spectre_v2: ../pmu/event.c branch_loops.S
+$(OUTPUT)/rfi_flush: flush_utils.c
+$(OUTPUT)/entry_flush: flush_utils.c
+$(OUTPUT)/uaccess_flush: flush_utils.c
diff --git a/tools/testing/selftests/powerpc/security/entry_flush.c b/tools/testing/selftests/powerpc/security/entry_flush.c
new file mode 100644
index 000000000000..e01c573deadd
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/entry_flush.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+int entry_flush_test(void)
+{
+ char *p;
+ int repetitions = 10;
+ int fd, passes = 0, iter, rc = 0;
+ struct perf_event_read v;
+ __u64 l1d_misses_total = 0;
+ unsigned long iterations = 100000, zero_size = 24 * 1024;
+ unsigned long l1d_misses_expected;
+ int rfi_flush_orig;
+ int entry_flush, entry_flush_orig;
+
+ SKIP_IF(geteuid() != 0);
+
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+ perror("Unable to read powerpc/rfi_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) {
+ perror("Unable to read powerpc/entry_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (rfi_flush_orig != 0) {
+ if (write_debugfs_int("powerpc/rfi_flush", 0) < 0) {
+ perror("error writing to powerpc/rfi_flush debugfs file");
+ FAIL_IF(1);
+ }
+ }
+
+ entry_flush = entry_flush_orig;
+
+ fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
+ FAIL_IF(fd < 0);
+
+ p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+ FAIL_IF(perf_event_enable(fd));
+
+ // disable L1 prefetching
+ set_dscr(1);
+
+ iter = repetitions;
+
+ /*
+ * We expect to see l1d miss for each cacheline access when entry_flush
+ * is set. Allow a small variation on this.
+ */
+ l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+ FAIL_IF(perf_event_reset(fd));
+
+ syscall_loop(p, iterations, zero_size);
+
+ FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+ if (entry_flush && v.l1d_misses >= l1d_misses_expected)
+ passes++;
+ else if (!entry_flush && v.l1d_misses < (l1d_misses_expected / 2))
+ passes++;
+
+ l1d_misses_total += v.l1d_misses;
+
+ while (--iter)
+ goto again;
+
+ if (passes < repetitions) {
+ printf("FAIL (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+ entry_flush, l1d_misses_total, entry_flush ? '<' : '>',
+ entry_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ repetitions - passes, repetitions);
+ rc = 1;
+ } else {
+ printf("PASS (L1D misses with entry_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+ entry_flush, l1d_misses_total, entry_flush ? '>' : '<',
+ entry_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ passes, repetitions);
+ }
+
+ if (entry_flush == entry_flush_orig) {
+ entry_flush = !entry_flush_orig;
+ if (write_debugfs_int("powerpc/entry_flush", entry_flush) < 0) {
+ perror("error writing to powerpc/entry_flush debugfs file");
+ return 1;
+ }
+ iter = repetitions;
+ l1d_misses_total = 0;
+ passes = 0;
+ goto again;
+ }
+
+ perf_event_disable(fd);
+ close(fd);
+
+ set_dscr(0);
+
+ if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+
+ if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/entry_flush debugfs file");
+ return 1;
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(entry_flush_test, "entry_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.c b/tools/testing/selftests/powerpc/security/flush_utils.c
new file mode 100644
index 000000000000..9c5c00e04f63
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.c
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <sys/utsname.h>
+#include "reg.h"
+#include "utils.h"
+#include "flush_utils.h"
+
+static inline __u64 load(void *addr)
+{
+ __u64 tmp;
+
+ asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
+
+ return tmp;
+}
+
+void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ getppid();
+ }
+}
+
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+ unsigned long zero_size)
+{
+ struct utsname utsname;
+
+ for (unsigned long i = 0; i < iterations; i++) {
+ for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
+ load(p + j);
+ uname(&utsname);
+ }
+}
+
+static void sigill_handler(int signr, siginfo_t *info, void *unused)
+{
+ static int warned;
+ ucontext_t *ctx = (ucontext_t *)unused;
+ unsigned long *pc = &UCONTEXT_NIA(ctx);
+
+ /* mtspr 3,RS to check for move to DSCR below */
+ if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
+ if (!warned++)
+ printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
+ *pc += 4;
+ } else {
+ printf("SIGILL at %p\n", pc);
+ abort();
+ }
+}
+
+void set_dscr(unsigned long val)
+{
+ static int init;
+ struct sigaction sa;
+
+ if (!init) {
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = sigill_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGILL, &sa, NULL))
+ perror("sigill_handler");
+ init = 1;
+ }
+
+ mtspr(SPRN_DSCR, val);
+}
diff --git a/tools/testing/selftests/powerpc/security/flush_utils.h b/tools/testing/selftests/powerpc/security/flush_utils.h
new file mode 100644
index 000000000000..e1e68281f7ac
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/flush_utils.h
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+/*
+ * Copyright 2018 IBM Corporation.
+ */
+
+#ifndef _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+#define _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H
+
+#define CACHELINE_SIZE 128
+
+#define PERF_L1D_READ_MISS_CONFIG ((PERF_COUNT_HW_CACHE_L1D) | \
+ (PERF_COUNT_HW_CACHE_OP_READ << 8) | \
+ (PERF_COUNT_HW_CACHE_RESULT_MISS << 16))
+
+void syscall_loop(char *p, unsigned long iterations,
+ unsigned long zero_size);
+
+void syscall_loop_uaccess(char *p, unsigned long iterations,
+ unsigned long zero_size);
+
+void set_dscr(unsigned long val);
+
+#endif /* _SELFTESTS_POWERPC_SECURITY_FLUSH_UTILS_H */
diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
new file mode 100755
index 000000000000..f43aa4b77fba
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh
@@ -0,0 +1,78 @@
+#!/usr/bin/env bash
+
+set -euo pipefail
+
+TIMEOUT=10
+
+function do_one
+{
+ local mitigation="$1"
+ local orig
+ local start
+ local now
+
+ orig=$(cat "$mitigation")
+
+ start=$(date +%s)
+ now=$start
+
+ while [[ $((now-start)) -lt "$TIMEOUT" ]]
+ do
+ echo 0 > "$mitigation"
+ echo 1 > "$mitigation"
+
+ now=$(date +%s)
+ done
+
+ echo "$orig" > "$mitigation"
+}
+
+rc=0
+cd /sys/kernel/debug/powerpc || rc=1
+if [[ "$rc" -ne 0 ]]; then
+ echo "Error: couldn't cd to /sys/kernel/debug/powerpc" >&2
+ exit 1
+fi
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel already tainted!" >&2
+ exit 1
+fi
+
+mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush"
+
+for m in $mitigations
+do
+ if [[ -f /sys/kernel/debug/powerpc/$m ]]
+ then
+ do_one "$m" &
+ fi
+done
+
+echo "Spawned threads enabling/disabling mitigations ..."
+
+if stress-ng > /dev/null 2>&1; then
+ stress="stress-ng"
+elif stress > /dev/null 2>&1; then
+ stress="stress"
+else
+ stress=""
+fi
+
+if [[ -n "$stress" ]]; then
+ "$stress" -m "$(nproc)" -t "$TIMEOUT" &
+ echo "Spawned VM stressors ..."
+fi
+
+echo "Waiting for timeout ..."
+wait
+
+tainted=$(cat /proc/sys/kernel/tainted)
+if [[ "$tainted" -ne 0 ]]; then
+ echo "Error: kernel became tainted!" >&2
+ exit 1
+fi
+
+echo "OK"
+exit 0
diff --git a/tools/testing/selftests/powerpc/security/rfi_flush.c b/tools/testing/selftests/powerpc/security/rfi_flush.c
index 0a7d0afb26b8..6bedc86443a6 100644
--- a/tools/testing/selftests/powerpc/security/rfi_flush.c
+++ b/tools/testing/selftests/powerpc/security/rfi_flush.c
@@ -14,32 +14,8 @@
#include <string.h>
#include <stdio.h>
#include "utils.h"
+#include "flush_utils.h"
-#define CACHELINE_SIZE 128
-
-struct perf_event_read {
- __u64 nr;
- __u64 l1d_misses;
-};
-
-static inline __u64 load(void *addr)
-{
- __u64 tmp;
-
- asm volatile("ld %0,0(%1)" : "=r"(tmp) : "b"(addr));
-
- return tmp;
-}
-
-static void syscall_loop(char *p, unsigned long iterations,
- unsigned long zero_size)
-{
- for (unsigned long i = 0; i < iterations; i++) {
- for (unsigned long j = 0; j < zero_size; j += CACHELINE_SIZE)
- load(p + j);
- getppid();
- }
-}
int rfi_flush_test(void)
{
@@ -50,24 +26,42 @@ int rfi_flush_test(void)
__u64 l1d_misses_total = 0;
unsigned long iterations = 100000, zero_size = 24 * 1024;
unsigned long l1d_misses_expected;
- int rfi_flush_org, rfi_flush;
+ int rfi_flush_orig, rfi_flush;
+ int have_entry_flush, entry_flush_orig;
SKIP_IF(geteuid() != 0);
- if (read_debugfs_file("powerpc/rfi_flush", &rfi_flush_org)) {
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
perror("Unable to read powerpc/rfi_flush debugfs file");
SKIP_IF(1);
}
- rfi_flush = rfi_flush_org;
+ if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) {
+ have_entry_flush = 0;
+ } else {
+ have_entry_flush = 1;
+
+ if (entry_flush_orig != 0) {
+ if (write_debugfs_int("powerpc/entry_flush", 0) < 0) {
+ perror("error writing to powerpc/entry_flush debugfs file");
+ return 1;
+ }
+ }
+ }
+
+ rfi_flush = rfi_flush_orig;
- fd = perf_event_open_counter(PERF_TYPE_RAW, /* L1d miss */ 0x400f0, -1);
+ fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
FAIL_IF(fd < 0);
p = (char *)memalign(zero_size, CACHELINE_SIZE);
FAIL_IF(perf_event_enable(fd));
+ // disable L1 prefetching
set_dscr(1);
iter = repetitions;
@@ -109,9 +103,9 @@ again:
repetitions * l1d_misses_expected / 2,
passes, repetitions);
- if (rfi_flush == rfi_flush_org) {
- rfi_flush = !rfi_flush_org;
- if (write_debugfs_file("powerpc/rfi_flush", rfi_flush) < 0) {
+ if (rfi_flush == rfi_flush_orig) {
+ rfi_flush = !rfi_flush_orig;
+ if (write_debugfs_int("powerpc/rfi_flush", rfi_flush) < 0) {
perror("error writing to powerpc/rfi_flush debugfs file");
return 1;
}
@@ -126,11 +120,19 @@ again:
set_dscr(0);
- if (write_debugfs_file("powerpc/rfi_flush", rfi_flush_org) < 0) {
+ if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) {
perror("unable to restore original value of powerpc/rfi_flush debugfs file");
return 1;
}
+ if (have_entry_flush) {
+ if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/entry_flush "
+ "debugfs file");
+ return 1;
+ }
+ }
+
return rc;
}
diff --git a/tools/testing/selftests/powerpc/security/spectre_v2.c b/tools/testing/selftests/powerpc/security/spectre_v2.c
index 8c6b982af2a8..5b2abb719ef2 100644
--- a/tools/testing/selftests/powerpc/security/spectre_v2.c
+++ b/tools/testing/selftests/powerpc/security/spectre_v2.c
@@ -125,8 +125,6 @@ static enum spectre_v2_state get_sysfs_state(void)
#define PM_BR_PRED_PCACHE 0x048a0 // P9 only
#define PM_BR_MPRED_PCACHE 0x048b0 // P9 only
-#define SPRN_PVR 287
-
int spectre_v2_test(void)
{
enum spectre_v2_state state;
@@ -134,6 +132,9 @@ int spectre_v2_test(void)
s64 miss_percent;
bool is_p9;
+ // The PMU events we use only work on Power8 or later
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+
state = get_sysfs_state();
if (state == UNKNOWN) {
printf("Error: couldn't determine spectre_v2 mitigation state?\n");
@@ -181,8 +182,24 @@ int spectre_v2_test(void)
case COUNT_CACHE_FLUSH_HW:
// These should all not affect userspace branch prediction
if (miss_percent > 15) {
+ if (miss_percent > 95) {
+ /*
+ * Such a mismatch may be caused by a system being unaware
+ * the count cache is disabled. This may be to enable
+ * guest migration between hosts with different settings.
+ * Return skip code to avoid detecting this as an error.
+ * We are not vulnerable and reporting otherwise, so
+ * missing such a mismatch is safe.
+ */
+ printf("Branch misses > 95%% unexpected in this configuration.\n");
+ printf("Count cache likely disabled without Linux knowing.\n");
+ if (state == COUNT_CACHE_FLUSH_SW)
+ printf("WARNING: Kernel performing unnecessary flushes.\n");
+ return 4;
+ }
printf("Branch misses > 15%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
+
return 1;
}
break;
@@ -190,14 +207,14 @@ int spectre_v2_test(void)
// This seems to affect userspace branch prediction a bit?
if (miss_percent > 25) {
printf("Branch misses > 25%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
return 1;
}
break;
case COUNT_CACHE_DISABLED:
if (miss_percent < 95) {
- printf("Branch misses < 20%% unexpected in this configuration!\n");
- printf("Possible mis-match between reported & actual mitigation\n");
+ printf("Branch misses < 95%% unexpected in this configuration!\n");
+ printf("Possible mismatch between reported & actual mitigation\n");
return 1;
}
break;
diff --git a/tools/testing/selftests/powerpc/security/uaccess_flush.c b/tools/testing/selftests/powerpc/security/uaccess_flush.c
new file mode 100644
index 000000000000..fcf23ea9b183
--- /dev/null
+++ b/tools/testing/selftests/powerpc/security/uaccess_flush.c
@@ -0,0 +1,158 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+/*
+ * Copyright 2018 IBM Corporation.
+ * Copyright 2020 Canonical Ltd.
+ */
+
+#define __SANE_USERSPACE_TYPES__
+
+#include <sys/types.h>
+#include <stdint.h>
+#include <malloc.h>
+#include <unistd.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include "utils.h"
+#include "flush_utils.h"
+
+int uaccess_flush_test(void)
+{
+ char *p;
+ int repetitions = 10;
+ int fd, passes = 0, iter, rc = 0;
+ struct perf_event_read v;
+ __u64 l1d_misses_total = 0;
+ unsigned long iterations = 100000, zero_size = 24 * 1024;
+ unsigned long l1d_misses_expected;
+ int rfi_flush_orig;
+ int entry_flush_orig;
+ int uaccess_flush, uaccess_flush_orig;
+
+ SKIP_IF(geteuid() != 0);
+
+ // The PMU event we use only works on Power7 or later
+ SKIP_IF(!have_hwcap(PPC_FEATURE_ARCH_2_06));
+
+ if (read_debugfs_int("powerpc/rfi_flush", &rfi_flush_orig) < 0) {
+ perror("Unable to read powerpc/rfi_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (read_debugfs_int("powerpc/entry_flush", &entry_flush_orig) < 0) {
+ perror("Unable to read powerpc/entry_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (read_debugfs_int("powerpc/uaccess_flush", &uaccess_flush_orig) < 0) {
+ perror("Unable to read powerpc/entry_flush debugfs file");
+ SKIP_IF(1);
+ }
+
+ if (rfi_flush_orig != 0) {
+ if (write_debugfs_int("powerpc/rfi_flush", 0) < 0) {
+ perror("error writing to powerpc/rfi_flush debugfs file");
+ FAIL_IF(1);
+ }
+ }
+
+ if (entry_flush_orig != 0) {
+ if (write_debugfs_int("powerpc/entry_flush", 0) < 0) {
+ perror("error writing to powerpc/entry_flush debugfs file");
+ FAIL_IF(1);
+ }
+ }
+
+ uaccess_flush = uaccess_flush_orig;
+
+ fd = perf_event_open_counter(PERF_TYPE_HW_CACHE, PERF_L1D_READ_MISS_CONFIG, -1);
+ FAIL_IF(fd < 0);
+
+ p = (char *)memalign(zero_size, CACHELINE_SIZE);
+
+ FAIL_IF(perf_event_enable(fd));
+
+ // disable L1 prefetching
+ set_dscr(1);
+
+ iter = repetitions;
+
+ /*
+ * We expect to see l1d miss for each cacheline access when entry_flush
+ * is set. Allow a small variation on this.
+ */
+ l1d_misses_expected = iterations * (zero_size / CACHELINE_SIZE - 2);
+
+again:
+ FAIL_IF(perf_event_reset(fd));
+
+ syscall_loop_uaccess(p, iterations, zero_size);
+
+ FAIL_IF(read(fd, &v, sizeof(v)) != sizeof(v));
+
+ if (uaccess_flush && v.l1d_misses >= l1d_misses_expected)
+ passes++;
+ else if (!uaccess_flush && v.l1d_misses < (l1d_misses_expected / 2))
+ passes++;
+
+ l1d_misses_total += v.l1d_misses;
+
+ while (--iter)
+ goto again;
+
+ if (passes < repetitions) {
+ printf("FAIL (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d failures]\n",
+ uaccess_flush, l1d_misses_total, uaccess_flush ? '<' : '>',
+ uaccess_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ repetitions - passes, repetitions);
+ rc = 1;
+ } else {
+ printf("PASS (L1D misses with uaccess_flush=%d: %llu %c %lu) [%d/%d pass]\n",
+ uaccess_flush, l1d_misses_total, uaccess_flush ? '>' : '<',
+ uaccess_flush ? repetitions * l1d_misses_expected :
+ repetitions * l1d_misses_expected / 2,
+ passes, repetitions);
+ }
+
+ if (uaccess_flush == uaccess_flush_orig) {
+ uaccess_flush = !uaccess_flush_orig;
+ if (write_debugfs_int("powerpc/uaccess_flush", uaccess_flush) < 0) {
+ perror("error writing to powerpc/uaccess_flush debugfs file");
+ return 1;
+ }
+ iter = repetitions;
+ l1d_misses_total = 0;
+ passes = 0;
+ goto again;
+ }
+
+ perf_event_disable(fd);
+ close(fd);
+
+ set_dscr(0);
+
+ if (write_debugfs_int("powerpc/rfi_flush", rfi_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/rfi_flush debugfs file");
+ return 1;
+ }
+
+ if (write_debugfs_int("powerpc/entry_flush", entry_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/entry_flush debugfs file");
+ return 1;
+ }
+
+ if (write_debugfs_int("powerpc/uaccess_flush", uaccess_flush_orig) < 0) {
+ perror("unable to restore original value of powerpc/uaccess_flush debugfs file");
+ return 1;
+ }
+
+ return rc;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(uaccess_flush_test, "uaccess_flush_test");
+}
diff --git a/tools/testing/selftests/powerpc/signal/.gitignore b/tools/testing/selftests/powerpc/signal/.gitignore
index 405b5364044c..9d0915777fed 100644
--- a/tools/testing/selftests/powerpc/signal/.gitignore
+++ b/tools/testing/selftests/powerpc/signal/.gitignore
@@ -3,3 +3,6 @@ signal
signal_tm
sigfuz
sigreturn_vdso
+sig_sc_double_restart
+sigreturn_kernel
+sigreturn_unaligned
diff --git a/tools/testing/selftests/powerpc/signal/Makefile b/tools/testing/selftests/powerpc/signal/Makefile
index d6ae54663aed..f679d260afc8 100644
--- a/tools/testing/selftests/powerpc/signal/Makefile
+++ b/tools/testing/selftests/powerpc/signal/Makefile
@@ -1,5 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
TEST_GEN_PROGS := signal signal_tm sigfuz sigreturn_vdso sig_sc_double_restart
+TEST_GEN_PROGS += sigreturn_kernel
+TEST_GEN_PROGS += sigreturn_unaligned
CFLAGS += -maltivec
$(OUTPUT)/signal_tm: CFLAGS += -mhtm
diff --git a/tools/testing/selftests/powerpc/signal/signal_tm.c b/tools/testing/selftests/powerpc/signal/signal_tm.c
index 5bf2224ef7f2..c9cf66a3daa2 100644
--- a/tools/testing/selftests/powerpc/signal/signal_tm.c
+++ b/tools/testing/selftests/powerpc/signal/signal_tm.c
@@ -56,6 +56,7 @@ static int test_signal_tm()
}
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
for (i = 0; i < MAX_ATTEMPT; i++) {
/*
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
new file mode 100644
index 000000000000..0a1b6e591eee
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_kernel.c
@@ -0,0 +1,132 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that we can't sigreturn to kernel addresses, or to kernel mode.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <signal.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+#define MSR_PR (1ul << 14)
+
+static volatile unsigned long long sigreturn_addr;
+static volatile unsigned long long sigreturn_msr_mask;
+
+static void sigusr1_handler(int signo, siginfo_t *si, void *uc_ptr)
+{
+ ucontext_t *uc = (ucontext_t *)uc_ptr;
+
+ if (sigreturn_addr)
+ UCONTEXT_NIA(uc) = sigreturn_addr;
+
+ if (sigreturn_msr_mask)
+ UCONTEXT_MSR(uc) &= sigreturn_msr_mask;
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid;
+
+ pid = fork();
+ if (pid == 0) {
+ raise(SIGUSR1);
+ exit(0);
+ }
+
+ return pid;
+}
+
+static int expect_segv(pid_t pid)
+{
+ int child_ret;
+
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(WIFEXITED(child_ret));
+ FAIL_IF(!WIFSIGNALED(child_ret));
+ FAIL_IF(WTERMSIG(child_ret) != 11);
+
+ return 0;
+}
+
+int test_sigreturn_kernel(void)
+{
+ struct sigaction act;
+ int child_ret, i;
+ pid_t pid;
+
+ act.sa_sigaction = sigusr1_handler;
+ act.sa_flags = SA_SIGINFO;
+ sigemptyset(&act.sa_mask);
+
+ FAIL_IF(sigaction(SIGUSR1, &act, NULL));
+
+ for (i = 0; i < 2; i++) {
+ // Return to kernel
+ sigreturn_addr = 0xcull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to kernel virtual
+ sigreturn_addr = 0xc008ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return out of range
+ sigreturn_addr = 0xc010ull << 48;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, just below PAGE_OFFSET
+ sigreturn_addr = (0xcull << 60) - (64 * 1024);
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to no-man's land, above TASK_SIZE_4PB
+ sigreturn_addr = 0x1ull << 52;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xd space
+ sigreturn_addr = 0xdull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xe space
+ sigreturn_addr = 0xeull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Return to 0xf space
+ sigreturn_addr = 0xfull << 60;
+ pid = fork_child();
+ expect_segv(pid);
+
+ // Attempt to set PR=0 for 2nd loop (should be blocked by kernel)
+ sigreturn_msr_mask = ~MSR_PR;
+ }
+
+ printf("All children killed as expected\n");
+
+ // Don't change address, just MSR, should return to user as normal
+ sigreturn_addr = 0;
+ sigreturn_msr_mask = ~MSR_PR;
+ pid = fork_child();
+ waitpid(pid, &child_ret, 0);
+ FAIL_IF(!WIFEXITED(child_ret));
+ FAIL_IF(WIFSIGNALED(child_ret));
+ FAIL_IF(WEXITSTATUS(child_ret) != 0);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_kernel, "sigreturn_kernel");
+}
diff --git a/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
new file mode 100644
index 000000000000..6e58ee4f0fdf
--- /dev/null
+++ b/tools/testing/selftests/powerpc/signal/sigreturn_unaligned.c
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test sigreturn to an unaligned address, ie. low 2 bits set.
+ * Nothing bad should happen.
+ * This was able to trigger warnings with CONFIG_PPC_RFI_SRR_DEBUG=y.
+ */
+
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <ucontext.h>
+#include <unistd.h>
+
+#include "utils.h"
+
+
+static void sigusr1_handler(int signo, siginfo_t *info, void *ptr)
+{
+ ucontext_t *uc = ptr;
+
+ UCONTEXT_NIA(uc) |= 3;
+}
+
+static int test_sigreturn_unaligned(void)
+{
+ struct sigaction action;
+
+ memset(&action, 0, sizeof(action));
+ action.sa_sigaction = sigusr1_handler;
+ action.sa_flags = SA_SIGINFO;
+
+ FAIL_IF(sigaction(SIGUSR1, &action, NULL) == -1);
+
+ raise(SIGUSR1);
+
+ return 0;
+}
+
+int main(void)
+{
+ return test_harness(test_sigreturn_unaligned, "sigreturn_unaligned");
+}
diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile
index 7fc0623d85c3..9c39f55a58ff 100644
--- a/tools/testing/selftests/powerpc/stringloops/Makefile
+++ b/tools/testing/selftests/powerpc/stringloops/Makefile
@@ -8,7 +8,7 @@ build_32bit = $(shell if ($(CC) $(CFLAGS) -m32 -o /dev/null memcmp.c >/dev/null
TEST_GEN_PROGS := memcmp_64 strlen
-$(OUTPUT)/memcmp_64: memcmp.c
+$(OUTPUT)/memcmp_64: memcmp.c ../utils.c
$(OUTPUT)/memcmp_64: CFLAGS += -m64 -maltivec
ifeq ($(build_32bit),1)
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
index 2b488b78c4f2..e713b69d694a 100644
--- a/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
+++ b/tools/testing/selftests/powerpc/stringloops/asm/ppc_asm.h
@@ -9,6 +9,7 @@
#define _GLOBAL(A) FUNC_START(test_ ## A)
#define _GLOBAL_TOC(A) FUNC_START(test_ ## A)
+#define CFUNC(name) name
#define CONFIG_ALTIVEC
diff --git a/tools/testing/selftests/powerpc/stringloops/asm/export.h b/tools/testing/selftests/powerpc/stringloops/linux/export.h
index 2d14a9b4248c..2d14a9b4248c 100644
--- a/tools/testing/selftests/powerpc/stringloops/asm/export.h
+++ b/tools/testing/selftests/powerpc/stringloops/linux/export.h
diff --git a/tools/testing/selftests/powerpc/stringloops/memcmp.c b/tools/testing/selftests/powerpc/stringloops/memcmp.c
index b1fa7546957f..cb2f18855c8d 100644
--- a/tools/testing/selftests/powerpc/stringloops/memcmp.c
+++ b/tools/testing/selftests/powerpc/stringloops/memcmp.c
@@ -2,7 +2,9 @@
#include <malloc.h>
#include <stdlib.h>
#include <string.h>
+#include <sys/mman.h>
#include <time.h>
+
#include "utils.h"
#define SIZE 256
@@ -13,6 +15,9 @@
#define LARGE_MAX_OFFSET 32
#define LARGE_SIZE_START 4096
+/* This is big enough to fit LARGE_SIZE and works on 4K & 64K kernels */
+#define MAP_SIZE (64 * 1024)
+
#define MAX_OFFSET_DIFF_S1_S2 48
int vmx_count;
@@ -68,25 +73,25 @@ static void test_one(char *s1, char *s2, unsigned long max_offset,
static int testcase(bool islarge)
{
- char *s1;
- char *s2;
- unsigned long i;
-
- unsigned long comp_size = (islarge ? LARGE_SIZE : SIZE);
- unsigned long alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
- int iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
-
- s1 = memalign(128, alloc_size);
- if (!s1) {
- perror("memalign");
- exit(1);
- }
+ unsigned long i, comp_size, alloc_size;
+ char *p, *s1, *s2;
+ int iterations;
- s2 = memalign(128, alloc_size);
- if (!s2) {
- perror("memalign");
- exit(1);
- }
+ comp_size = (islarge ? LARGE_SIZE : SIZE);
+ alloc_size = comp_size + MAX_OFFSET_DIFF_S1_S2;
+ iterations = islarge ? LARGE_ITERATIONS : ITERATIONS;
+
+ p = mmap(NULL, 4 * MAP_SIZE, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ FAIL_IF(p == MAP_FAILED);
+
+ /* Put s1/s2 at the end of a page */
+ s1 = p + MAP_SIZE - alloc_size;
+ s2 = p + 3 * MAP_SIZE - alloc_size;
+
+ /* And unmap the subsequent page to force a fault if we overread */
+ munmap(p + MAP_SIZE, MAP_SIZE);
+ munmap(p + 3 * MAP_SIZE, MAP_SIZE);
srandom(time(0));
@@ -147,6 +152,11 @@ static int testcase(bool islarge)
static int testcases(void)
{
+#ifdef __powerpc64__
+ // vcmpequd used in memcmp_64.S is v2.07
+ SKIP_IF(!have_hwcap2(PPC_FEATURE2_ARCH_2_07));
+#endif
+
testcase(0);
testcase(1);
return 0;
diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
index cc4930467235..7887f78cf072 100644
--- a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
+++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S
@@ -3,9 +3,13 @@
.data
.balign 8
-message:
+success_message:
.ascii "success: switch_endian_test\n\0"
+ .balign 8
+failure_message:
+ .ascii "failure: switch_endian_test\n\0"
+
.section ".toc"
.balign 8
pattern:
@@ -64,6 +68,9 @@ FUNC_START(_start)
li r0, __NR_switch_endian
sc
+ tdi 0, 0, 0x48 // b +8 if the endian was switched
+ b .Lfail // exit if endian didn't switch
+
#include "check-reversed.S"
/* Flip back, r0 already has the switch syscall number */
@@ -71,12 +78,20 @@ FUNC_START(_start)
#include "check.S"
+ ld r4, success_message@got(%r2)
+ li r5, 28 // strlen(success_message)
+ li r14, 0 // exit status
+.Lout:
li r0, __NR_write
li r3, 1 /* stdout */
- ld r4, message@got(%r2)
- li r5, 28 /* strlen(message3) */
sc
li r0, __NR_exit
- li r3, 0
+ mr r3, r14
sc
b .
+
+.Lfail:
+ ld r4, failure_message@got(%r2)
+ li r5, 28 // strlen(failure_message)
+ li r14, 1
+ b .Lout
diff --git a/tools/testing/selftests/powerpc/syscalls/.gitignore b/tools/testing/selftests/powerpc/syscalls/.gitignore
index b00cab225476..a1e19ccdef84 100644
--- a/tools/testing/selftests/powerpc/syscalls/.gitignore
+++ b/tools/testing/selftests/powerpc/syscalls/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
ipc_unmuxed
+rtas_filter
diff --git a/tools/testing/selftests/powerpc/syscalls/Makefile b/tools/testing/selftests/powerpc/syscalls/Makefile
index 01b22775ca87..ee1740ddfb0c 100644
--- a/tools/testing/selftests/powerpc/syscalls/Makefile
+++ b/tools/testing/selftests/powerpc/syscalls/Makefile
@@ -1,9 +1,9 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := ipc_unmuxed
+TEST_GEN_PROGS := ipc_unmuxed rtas_filter
-CFLAGS += -I../../../../../usr/include
+CFLAGS += $(KHDR_INCLUDES)
top_srcdir = ../../../../..
include ../../lib.mk
-$(TEST_GEN_PROGS): ../harness.c
+$(TEST_GEN_PROGS): ../harness.c ../utils.c
diff --git a/tools/testing/selftests/powerpc/syscalls/rtas_filter.c b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
new file mode 100644
index 000000000000..9b17780f0b18
--- /dev/null
+++ b/tools/testing/selftests/powerpc/syscalls/rtas_filter.c
@@ -0,0 +1,224 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright 2005-2020 IBM Corporation.
+ *
+ * Includes code from librtas (https://github.com/ibm-power-utilities/librtas/)
+ */
+
+#include <byteswap.h>
+#include <stdint.h>
+#include <inttypes.h>
+#include <linux/limits.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <stdarg.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <errno.h>
+#include "utils.h"
+
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+#define cpu_to_be32(x) bswap_32(x)
+#define be32_to_cpu(x) bswap_32(x)
+#else
+#define cpu_to_be32(x) (x)
+#define be32_to_cpu(x) (x)
+#endif
+
+#define RTAS_IO_ASSERT -1098 /* Unexpected I/O Error */
+#define RTAS_UNKNOWN_OP -1099 /* No Firmware Implementation of Function */
+#define BLOCK_SIZE 4096
+#define PAGE_SIZE 4096
+#define MAX_PAGES 64
+
+static const char *ofdt_rtas_path = "/proc/device-tree/rtas";
+
+typedef __be32 uint32_t;
+struct rtas_args {
+ __be32 token;
+ __be32 nargs;
+ __be32 nret;
+ __be32 args[16];
+ __be32 *rets; /* Pointer to return values in args[]. */
+};
+
+struct region {
+ uint64_t addr;
+ uint32_t size;
+ struct region *next;
+};
+
+static int get_property(const char *prop_path, const char *prop_name,
+ char **prop_val, size_t *prop_len)
+{
+ char path[PATH_MAX];
+
+ int len = snprintf(path, sizeof(path), "%s/%s", prop_path, prop_name);
+ if (len < 0 || len >= sizeof(path))
+ return -ENOMEM;
+
+ return read_file_alloc(path, prop_val, prop_len);
+}
+
+int rtas_token(const char *call_name)
+{
+ char *prop_buf = NULL;
+ size_t len;
+ int rc;
+
+ rc = get_property(ofdt_rtas_path, call_name, &prop_buf, &len);
+ if (rc < 0) {
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ rc = be32_to_cpu(*(int *)prop_buf);
+
+err:
+ free(prop_buf);
+ return rc;
+}
+
+static int read_kregion_bounds(struct region *kregion)
+{
+ char *buf;
+ int err;
+
+ err = read_file_alloc("/proc/ppc64/rtas/rmo_buffer", &buf, NULL);
+ if (err) {
+ perror("Could not open rmo_buffer file");
+ return RTAS_IO_ASSERT;
+ }
+
+ sscanf(buf, "%" SCNx64 " %x", &kregion->addr, &kregion->size);
+ free(buf);
+
+ if (!(kregion->size && kregion->addr) ||
+ (kregion->size > (PAGE_SIZE * MAX_PAGES))) {
+ printf("Unexpected kregion bounds\n");
+ return RTAS_IO_ASSERT;
+ }
+
+ return 0;
+}
+
+static int rtas_call(const char *name, int nargs,
+ int nrets, ...)
+{
+ struct rtas_args args;
+ __be32 *rets[16];
+ int i, rc, token;
+ va_list ap;
+
+ va_start(ap, nrets);
+
+ token = rtas_token(name);
+ if (token == RTAS_UNKNOWN_OP) {
+ // We don't care if the call doesn't exist
+ printf("call '%s' not available, skipping...", name);
+ rc = RTAS_UNKNOWN_OP;
+ goto err;
+ }
+
+ args.token = cpu_to_be32(token);
+ args.nargs = cpu_to_be32(nargs);
+ args.nret = cpu_to_be32(nrets);
+
+ for (i = 0; i < nargs; i++)
+ args.args[i] = (__be32) va_arg(ap, unsigned long);
+
+ for (i = 0; i < nrets; i++)
+ rets[i] = (__be32 *) va_arg(ap, unsigned long);
+
+ rc = syscall(__NR_rtas, &args);
+ if (rc) {
+ rc = -errno;
+ goto err;
+ }
+
+ if (nrets) {
+ *(rets[0]) = be32_to_cpu(args.args[nargs]);
+
+ for (i = 1; i < nrets; i++) {
+ *(rets[i]) = args.args[nargs + i];
+ }
+ }
+
+err:
+ va_end(ap);
+ return rc;
+}
+
+static int test(void)
+{
+ struct region rmo_region;
+ uint32_t rmo_start;
+ uint32_t rmo_end;
+ __be32 rets[1];
+ int rc;
+
+ // Test a legitimate harmless call
+ // Expected: call succeeds
+ printf("Test a permitted call, no parameters... ");
+ rc = rtas_call("get-time-of-day", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a prohibited call
+ // Expected: call returns -EINVAL
+ printf("Test a prohibited call... ");
+ rc = rtas_call("nvram-fetch", 0, 1, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Get RMO
+ rc = read_kregion_bounds(&rmo_region);
+ if (rc) {
+ printf("Couldn't read RMO region bounds, skipping remaining cases\n");
+ return 0;
+ }
+ rmo_start = rmo_region.addr;
+ rmo_end = rmo_start + rmo_region.size - 1;
+ printf("RMO range: %08x - %08x\n", rmo_start, rmo_end);
+
+ // Test a permitted call, user-supplied size, buffer inside RMO
+ // Expected: call succeeds
+ printf("Test a permitted call, user-supplied size, buffer inside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 1), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != 0 && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer start outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer start outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_end + 1),
+ cpu_to_be32(4000), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, user-supplied size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, user-supplied size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,get-system-parameter", 3, 1, 0, cpu_to_be32(rmo_start),
+ cpu_to_be32(rmo_end - rmo_start + 2), rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ // Test a permitted call, fixed size, buffer end outside RMO
+ // Expected: call returns -EINVAL
+ printf("Test a permitted call, fixed size, buffer end outside RMO... ");
+ rc = rtas_call("ibm,configure-connector", 2, 1, cpu_to_be32(rmo_end - 4000), 0, rets);
+ printf("rc: %d\n", rc);
+ FAIL_IF(rc != -EINVAL && rc != RTAS_UNKNOWN_OP);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ return test_harness(test, "rtas_filter");
+}
diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile
index 5881e97c73c1..3876805c2f31 100644
--- a/tools/testing/selftests/powerpc/tm/Makefile
+++ b/tools/testing/selftests/powerpc/tm/Makefile
@@ -17,7 +17,7 @@ $(TEST_GEN_PROGS): ../harness.c ../utils.c
CFLAGS += -mhtm
$(OUTPUT)/tm-syscall: tm-syscall-asm.S
-$(OUTPUT)/tm-syscall: CFLAGS += -I../../../../../usr/include
+$(OUTPUT)/tm-syscall: CFLAGS += $(KHDR_INCLUDES)
$(OUTPUT)/tm-tmspr: CFLAGS += -pthread
$(OUTPUT)/tm-vmx-unavail: CFLAGS += -pthread -m64
$(OUTPUT)/tm-resched-dscr: ../pmu/lib.c
diff --git a/tools/testing/selftests/powerpc/tm/tm-exec.c b/tools/testing/selftests/powerpc/tm/tm-exec.c
index 260cfdb97d23..c59919d6710d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-exec.c
+++ b/tools/testing/selftests/powerpc/tm/tm-exec.c
@@ -27,6 +27,7 @@ static char *path;
static int test_exec(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-fork.c b/tools/testing/selftests/powerpc/tm/tm-fork.c
index 6efa5a685a77..c27b935f0e9f 100644
--- a/tools/testing/selftests/powerpc/tm/tm-fork.c
+++ b/tools/testing/selftests/powerpc/tm/tm-fork.c
@@ -21,6 +21,7 @@
int test_fork(void)
{
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
asm __volatile__(
"tbegin.;"
diff --git a/tools/testing/selftests/powerpc/tm/tm-poison.c b/tools/testing/selftests/powerpc/tm/tm-poison.c
index 977558497c16..a7bbf034b5bb 100644
--- a/tools/testing/selftests/powerpc/tm/tm-poison.c
+++ b/tools/testing/selftests/powerpc/tm/tm-poison.c
@@ -20,13 +20,12 @@
#include <sched.h>
#include <sys/types.h>
#include <signal.h>
-#include <inttypes.h>
#include "tm.h"
int tm_poison_test(void)
{
- int pid;
+ int cpu, pid;
cpu_set_t cpuset;
uint64_t poison = 0xdeadbeefc0dec0fe;
uint64_t unknown = 0;
@@ -34,11 +33,15 @@ int tm_poison_test(void)
bool fail_vr = false;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
+
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
- /* Attach both Child and Parent to CPU 0 */
+ // Attach both Child and Parent to the same CPU
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ CPU_SET(cpu, &cpuset);
+ FAIL_IF(sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0);
pid = fork();
if (!pid) {
diff --git a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
index 4cdb83964bb3..85c940ae6ff8 100644
--- a/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-resched-dscr.c
@@ -40,6 +40,7 @@ int test_body(void)
uint64_t rv, dscr1 = 1, dscr2, texasr;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
printf("Check DSCR TM context switch: ");
fflush(stdout);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
index 254f912ad611..657d755b2905 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-fpu.c
@@ -79,6 +79,7 @@ static int tm_signal_context_chk_fpu()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
index 0cc680f61828..400fa70ca71e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-gpr.c
@@ -81,6 +81,7 @@ static int tm_signal_context_chk_gpr()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
index b6d52730a0d8..d628fd302b28 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vmx.c
@@ -104,6 +104,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
index 8e25e2072ecd..9bd869245bad 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-chk-vsx.c
@@ -153,6 +153,7 @@ static int tm_signal_context_chk()
pid_t pid = getpid();
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
act.sa_sigaction = signal_usr1;
sigemptyset(&act.sa_mask);
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
index 5908bc6abe60..0b84c9208d62 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-pagefault.c
@@ -226,6 +226,7 @@ int tm_signal_pagefault(void)
stack_t ss;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!have_userfaultfd());
setup_uf_mem();
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
index 07c388147b75..06b801906f27 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c
@@ -32,6 +32,7 @@ int tm_signal_sigreturn_nt(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
index cdcf8c5bbbc7..68807aac8dd3 100644
--- a/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
+++ b/tools/testing/selftests/powerpc/tm/tm-signal-stack.c
@@ -35,6 +35,7 @@ int tm_signal_stack()
int pid;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
pid = fork();
if (pid < 0)
diff --git a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
index 9a6017a1d769..ffe4e5515f33 100644
--- a/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
+++ b/tools/testing/selftests/powerpc/tm/tm-sigreturn.c
@@ -55,6 +55,7 @@ int tm_sigreturn(void)
uint64_t ret = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
memset(&sa, 0, sizeof(sa));
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
index bd1ca25febe4..aed632d29fff 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S
@@ -1,5 +1,5 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#include <ppc-asm.h>
+#include <basic_asm.h>
#include <asm/unistd.h>
.text
@@ -26,3 +26,38 @@ FUNC_START(getppid_tm_suspended)
1:
li r3, -1
blr
+
+
+.macro scv level
+ .long (0x44000001 | (\level) << 5)
+.endm
+
+FUNC_START(getppid_scv_tm_active)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ scv 0
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
+
+FUNC_START(getppid_scv_tm_suspended)
+ PUSH_BASIC_STACK(0)
+ tbegin.
+ beq 1f
+ li r0, __NR_getppid
+ tsuspend.
+ scv 0
+ tresume.
+ tend.
+ POP_BASIC_STACK(0)
+ blr
+1:
+ li r3, -1
+ POP_BASIC_STACK(0)
+ blr
diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c
index becb8207b432..b763354c2eb4 100644
--- a/tools/testing/selftests/powerpc/tm/tm-syscall.c
+++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c
@@ -19,24 +19,36 @@
#include "utils.h"
#include "tm.h"
+#ifndef PPC_FEATURE2_SCV
+#define PPC_FEATURE2_SCV 0x00100000 /* scv syscall */
+#endif
+
extern int getppid_tm_active(void);
extern int getppid_tm_suspended(void);
+extern int getppid_scv_tm_active(void);
+extern int getppid_scv_tm_suspended(void);
unsigned retries = 0;
#define TEST_DURATION 10 /* seconds */
-#define TM_RETRIES 100
-pid_t getppid_tm(bool suspend)
+pid_t getppid_tm(bool scv, bool suspend)
{
int i;
pid_t pid;
for (i = 0; i < TM_RETRIES; i++) {
- if (suspend)
- pid = getppid_tm_suspended();
- else
- pid = getppid_tm_active();
+ if (suspend) {
+ if (scv)
+ pid = getppid_scv_tm_suspended();
+ else
+ pid = getppid_tm_suspended();
+ } else {
+ if (scv)
+ pid = getppid_scv_tm_active();
+ else
+ pid = getppid_tm_active();
+ }
if (pid >= 0)
return pid;
@@ -67,6 +79,7 @@ int tm_syscall(void)
struct timeval end, now;
SKIP_IF(!have_htm_nosc());
+ SKIP_IF(htm_is_synthetic());
setbuf(stdout, NULL);
@@ -82,15 +95,24 @@ int tm_syscall(void)
* Test a syscall within a suspended transaction and verify
* that it succeeds.
*/
- FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(false, true) == -1); /* Should succeed. */
/*
* Test a syscall within an active transaction and verify that
* it fails with the correct failure code.
*/
- FAIL_IF(getppid_tm(false) != -1); /* Should fail... */
+ FAIL_IF(getppid_tm(false, false) != -1); /* Should fail... */
FAIL_IF(!failure_is_persistent()); /* ...persistently... */
FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+
+ /* Now do it all again with scv if it is available. */
+ if (have_hwcap2(PPC_FEATURE2_SCV)) {
+ FAIL_IF(getppid_tm(true, true) == -1); /* Should succeed. */
+ FAIL_IF(getppid_tm(true, false) != -1); /* Should fail... */
+ FAIL_IF(!failure_is_persistent()); /* ...persistently... */
+ FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */
+ }
+
gettimeofday(&now, 0);
}
diff --git a/tools/testing/selftests/powerpc/tm/tm-tar.c b/tools/testing/selftests/powerpc/tm/tm-tar.c
index 03be8c47292b..f2a9137f3c1e 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tar.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tar.c
@@ -26,6 +26,7 @@ int test_tar(void)
int i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
for (i = 0; i < num_loops; i++)
diff --git a/tools/testing/selftests/powerpc/tm/tm-tmspr.c b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
index 17becf3dcee4..dd5ddffa28b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-tmspr.c
+++ b/tools/testing/selftests/powerpc/tm/tm-tmspr.c
@@ -33,19 +33,13 @@
#include "utils.h"
#include "tm.h"
-int num_loops = 10000;
+int num_loops = 1000000;
int passed = 1;
void tfiar_tfhar(void *in)
{
- int i, cpu;
unsigned long tfhar, tfhar_rd, tfiar, tfiar_rd;
- cpu_set_t cpuset;
-
- CPU_ZERO(&cpuset);
- cpu = (unsigned long)in >> 1;
- CPU_SET(cpu, &cpuset);
- sched_setaffinity(0, sizeof(cpuset), &cpuset);
+ int i;
/* TFIAR: Last bit has to be high so userspace can read register */
tfiar = ((unsigned long)in) + 1;
@@ -102,6 +96,7 @@ int test_tmspr()
unsigned long i;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
/* To cause some context switching */
thread_num = 10 * sysconf(_SC_NPROCESSORS_ONLN);
diff --git a/tools/testing/selftests/powerpc/tm/tm-trap.c b/tools/testing/selftests/powerpc/tm/tm-trap.c
index 601f0c1d450d..97cb74768e30 100644
--- a/tools/testing/selftests/powerpc/tm/tm-trap.c
+++ b/tools/testing/selftests/powerpc/tm/tm-trap.c
@@ -66,7 +66,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
/* Get thread endianness: extract bit LE from MSR */
thread_endianness = MSR_LE & ucp->uc_mcontext.gp_regs[PT_MSR];
- /***
+ /*
* Little-Endian Machine
*/
@@ -126,7 +126,7 @@ void trap_signal_handler(int signo, siginfo_t *si, void *uc)
}
}
- /***
+ /*
* Big-Endian Machine
*/
@@ -247,8 +247,7 @@ void *pong(void *not_used)
int tm_trap_test(void)
{
uint16_t k = 1;
-
- int rc;
+ int cpu, rc;
pthread_attr_t attr;
cpu_set_t cpuset;
@@ -256,6 +255,7 @@ int tm_trap_test(void)
struct sigaction trap_sa;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
trap_sa.sa_flags = SA_SIGINFO;
trap_sa.sa_sigaction = trap_signal_handler;
@@ -267,9 +267,12 @@ int tm_trap_test(void)
usr1_sa.sa_sigaction = usr1_signal_handler;
sigaction(SIGUSR1, &usr1_sa, NULL);
- /* Set only CPU 0 in the mask. Both threads will be bound to cpu 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-unavailable.c b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
index 2ca2fccb0a3e..6bf1b65b020d 100644
--- a/tools/testing/selftests/powerpc/tm/tm-unavailable.c
+++ b/tools/testing/selftests/powerpc/tm/tm-unavailable.c
@@ -338,16 +338,20 @@ void test_fp_vec(int fp, int vec, pthread_attr_t *attr)
int tm_unavailable_test(void)
{
- int rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
+ int cpu, rc, exception; /* FP = 0, VEC = 1, VSX = 2 */
pthread_t t1;
pthread_attr_t attr;
cpu_set_t cpuset;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
- /* Set only CPU 0 in the mask. Both threads will be bound to CPU 0. */
+ cpu = pick_online_cpu();
+ FAIL_IF(cpu < 0);
+
+ // Set only one CPU in the mask. Both threads will be bound to that CPU.
CPU_ZERO(&cpuset);
- CPU_SET(0, &cpuset);
+ CPU_SET(cpu, &cpuset);
/* Init pthread attribute. */
rc = pthread_attr_init(&attr);
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
index e2a0c07e8362..34364ed2b6b7 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmx-unavail.c
@@ -17,7 +17,6 @@
#include <pthread.h>
#include <sys/mman.h>
#include <unistd.h>
-#include <pthread.h>
#include "tm.h"
#include "utils.h"
@@ -92,6 +91,7 @@ int tm_vmx_unavail_test()
pthread_t *thread;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
passed = 1;
diff --git a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
index c1e788a6df47..1640e7ead69b 100644
--- a/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
+++ b/tools/testing/selftests/powerpc/tm/tm-vmxcopy.c
@@ -46,6 +46,7 @@ int test_vmxcopy()
uint64_t aborted = 0;
SKIP_IF(!have_htm());
+ SKIP_IF(htm_is_synthetic());
SKIP_IF(!is_ppc64le());
fd = mkstemp(tmpfile);
diff --git a/tools/testing/selftests/powerpc/tm/tm.h b/tools/testing/selftests/powerpc/tm/tm.h
index c402464b038f..c03c6e778876 100644
--- a/tools/testing/selftests/powerpc/tm/tm.h
+++ b/tools/testing/selftests/powerpc/tm/tm.h
@@ -6,11 +6,13 @@
#ifndef _SELFTESTS_POWERPC_TM_TM_H
#define _SELFTESTS_POWERPC_TM_TM_H
-#include <asm/tm.h>
-#include <asm/cputable.h>
#include <stdbool.h>
+#include <asm/tm.h>
#include "utils.h"
+#include "reg.h"
+
+#define TM_RETRIES 100
static inline bool have_htm(void)
{
@@ -32,6 +34,39 @@ static inline bool have_htm_nosc(void)
#endif
}
+/*
+ * Transactional Memory was removed in ISA 3.1. A synthetic TM implementation
+ * is provided on P10 for threads running in P8/P9 compatibility mode. The
+ * synthetic implementation immediately fails after tbegin. This failure sets
+ * Bit 7 (Failure Persistent) and Bit 15 (Implementation-specific).
+ */
+static inline bool htm_is_synthetic(void)
+{
+ int i;
+
+ /*
+ * Per the ISA, the Failure Persistent bit may be incorrect. Try a few
+ * times in case we got an Implementation-specific failure on a non ISA
+ * v3.1 system. On these systems the Implementation-specific failure
+ * should not be persistent.
+ */
+ for (i = 0; i < TM_RETRIES; i++) {
+ asm volatile(
+ "tbegin.;"
+ "beq 1f;"
+ "tend.;"
+ "1:"
+ :
+ :
+ : "memory");
+
+ if ((__builtin_get_texasr() & (TEXASR_FP | TEXASR_IC)) !=
+ (TEXASR_FP | TEXASR_IC))
+ break;
+ }
+ return i == TM_RETRIES;
+}
+
static inline long failure_code(void)
{
return __builtin_get_texasru() >> 24;
diff --git a/tools/testing/selftests/powerpc/utils.c b/tools/testing/selftests/powerpc/utils.c
index 5ee0e98c4896..e5f2d8735c64 100644
--- a/tools/testing/selftests/powerpc/utils.c
+++ b/tools/testing/selftests/powerpc/utils.c
@@ -8,14 +8,16 @@
#include <elf.h>
#include <errno.h>
#include <fcntl.h>
+#include <inttypes.h>
+#include <limits.h>
#include <link.h>
#include <sched.h>
-#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <sys/stat.h>
+#include <sys/sysinfo.h>
#include <sys/types.h>
#include <sys/utsname.h>
#include <unistd.h>
@@ -26,34 +28,360 @@
static char auxv[4096];
-int read_auxv(char *buf, ssize_t buf_size)
+int read_file(const char *path, char *buf, size_t count, size_t *len)
{
- ssize_t num;
- int rc, fd;
+ ssize_t rc;
+ int fd;
+ int err;
+ char eof;
- fd = open("/proc/self/auxv", O_RDONLY);
- if (fd == -1) {
- perror("open");
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
return -errno;
+
+ rc = read(fd, buf, count);
+ if (rc < 0) {
+ err = -errno;
+ goto out;
}
- num = read(fd, buf, buf_size);
- if (num < 0) {
- perror("read");
- rc = -EIO;
+ if (len)
+ *len = rc;
+
+ /* Overflow if there are still more bytes after filling the buffer */
+ if (rc == count) {
+ rc = read(fd, &eof, 1);
+ if (rc != 0) {
+ err = -EOVERFLOW;
+ goto out;
+ }
+ }
+
+ err = 0;
+
+out:
+ close(fd);
+ errno = -err;
+ return err;
+}
+
+int read_file_alloc(const char *path, char **buf, size_t *len)
+{
+ size_t read_offset = 0;
+ size_t buffer_len = 0;
+ char *buffer = NULL;
+ int err;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd < 0)
+ return -errno;
+
+ /*
+ * We don't use stat & preallocate st_size because some non-files
+ * report 0 file size. Instead just dynamically grow the buffer
+ * as needed.
+ */
+ while (1) {
+ ssize_t rc;
+
+ if (read_offset >= buffer_len / 2) {
+ char *next_buffer;
+
+ buffer_len = buffer_len ? buffer_len * 2 : 4096;
+ next_buffer = realloc(buffer, buffer_len);
+ if (!next_buffer) {
+ err = -errno;
+ goto out;
+ }
+ buffer = next_buffer;
+ }
+
+ rc = read(fd, buffer + read_offset, buffer_len - read_offset);
+ if (rc < 0) {
+ err = -errno;
+ goto out;
+ }
+
+ if (rc == 0)
+ break;
+
+ read_offset += rc;
+ }
+
+ *buf = buffer;
+ if (len)
+ *len = read_offset;
+
+ err = 0;
+
+out:
+ close(fd);
+ if (err)
+ free(buffer);
+ errno = -err;
+ return err;
+}
+
+int write_file(const char *path, const char *buf, size_t count)
+{
+ int fd;
+ int err;
+ ssize_t rc;
+
+ fd = open(path, O_WRONLY | O_CREAT | O_TRUNC, 0644);
+ if (fd < 0)
+ return -errno;
+
+ rc = write(fd, buf, count);
+ if (rc < 0) {
+ err = -errno;
goto out;
}
- if (num > buf_size) {
- printf("overflowed auxv buffer\n");
- rc = -EOVERFLOW;
+ if (rc != count) {
+ err = -EOVERFLOW;
goto out;
}
- rc = 0;
+ err = 0;
+
out:
close(fd);
- return rc;
+ errno = -err;
+ return err;
+}
+
+int read_auxv(char *buf, ssize_t buf_size)
+{
+ int err;
+
+ err = read_file("/proc/self/auxv", buf, buf_size, NULL);
+ if (err) {
+ perror("Error reading /proc/self/auxv");
+ return err;
+ }
+
+ return 0;
+}
+
+int read_debugfs_file(const char *subpath, char *buf, size_t count)
+{
+ char path[PATH_MAX] = "/sys/kernel/debug/";
+
+ strncat(path, subpath, sizeof(path) - strlen(path) - 1);
+
+ return read_file(path, buf, count, NULL);
+}
+
+int write_debugfs_file(const char *subpath, const char *buf, size_t count)
+{
+ char path[PATH_MAX] = "/sys/kernel/debug/";
+
+ strncat(path, subpath, sizeof(path) - strlen(path) - 1);
+
+ return write_file(path, buf, count);
+}
+
+static int validate_int_parse(const char *buffer, size_t count, char *end)
+{
+ int err = 0;
+
+ /* Require at least one digit */
+ if (end == buffer) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ /* Require all remaining characters be whitespace-ish */
+ for (; end < buffer + count; end++) {
+ if (*end == '\0')
+ break;
+
+ if (*end != ' ' && *end != '\n') {
+ err = -EINVAL;
+ goto out;
+ }
+ }
+
+out:
+ errno = -err;
+ return err;
+}
+
+static int parse_bounded_int(const char *buffer, size_t count, intmax_t *result,
+ int base, intmax_t min, intmax_t max)
+{
+ int err;
+ char *end;
+
+ errno = 0;
+ *result = strtoimax(buffer, &end, base);
+
+ if (errno)
+ return -errno;
+
+ err = validate_int_parse(buffer, count, end);
+ if (err)
+ goto out;
+
+ if (*result < min || *result > max)
+ err = -EOVERFLOW;
+
+out:
+ errno = -err;
+ return err;
+}
+
+static int parse_bounded_uint(const char *buffer, size_t count, uintmax_t *result,
+ int base, uintmax_t max)
+{
+ int err = 0;
+ char *end;
+
+ errno = 0;
+ *result = strtoumax(buffer, &end, base);
+
+ if (errno)
+ return -errno;
+
+ err = validate_int_parse(buffer, count, end);
+ if (err)
+ goto out;
+
+ if (*result > max)
+ err = -EOVERFLOW;
+
+out:
+ errno = -err;
+ return err;
+}
+
+int parse_intmax(const char *buffer, size_t count, intmax_t *result, int base)
+{
+ return parse_bounded_int(buffer, count, result, base, INTMAX_MIN, INTMAX_MAX);
+}
+
+int parse_uintmax(const char *buffer, size_t count, uintmax_t *result, int base)
+{
+ return parse_bounded_uint(buffer, count, result, base, UINTMAX_MAX);
+}
+
+int parse_int(const char *buffer, size_t count, int *result, int base)
+{
+ intmax_t parsed;
+ int err = parse_bounded_int(buffer, count, &parsed, base, INT_MIN, INT_MAX);
+
+ *result = parsed;
+ return err;
+}
+
+int parse_uint(const char *buffer, size_t count, unsigned int *result, int base)
+{
+ uintmax_t parsed;
+ int err = parse_bounded_uint(buffer, count, &parsed, base, UINT_MAX);
+
+ *result = parsed;
+ return err;
+}
+
+int parse_long(const char *buffer, size_t count, long *result, int base)
+{
+ intmax_t parsed;
+ int err = parse_bounded_int(buffer, count, &parsed, base, LONG_MIN, LONG_MAX);
+
+ *result = parsed;
+ return err;
+}
+
+int parse_ulong(const char *buffer, size_t count, unsigned long *result, int base)
+{
+ uintmax_t parsed;
+ int err = parse_bounded_uint(buffer, count, &parsed, base, ULONG_MAX);
+
+ *result = parsed;
+ return err;
+}
+
+int read_long(const char *path, long *result, int base)
+{
+ int err;
+ char buffer[32] = {0};
+
+ err = read_file(path, buffer, sizeof(buffer) - 1, NULL);
+ if (err)
+ return err;
+
+ return parse_long(buffer, sizeof(buffer), result, base);
+}
+
+int read_ulong(const char *path, unsigned long *result, int base)
+{
+ int err;
+ char buffer[32] = {0};
+
+ err = read_file(path, buffer, sizeof(buffer) - 1, NULL);
+ if (err)
+ return err;
+
+ return parse_ulong(buffer, sizeof(buffer), result, base);
+}
+
+int write_long(const char *path, long result, int base)
+{
+ int err;
+ int len;
+ char buffer[32];
+
+ /* Decimal only for now: no format specifier for signed hex values */
+ if (base != 10) {
+ err = -EINVAL;
+ goto out;
+ }
+
+ len = snprintf(buffer, sizeof(buffer), "%ld", result);
+ if (len < 0 || len >= sizeof(buffer)) {
+ err = -EOVERFLOW;
+ goto out;
+ }
+
+ err = write_file(path, buffer, len);
+
+out:
+ errno = -err;
+ return err;
+}
+
+int write_ulong(const char *path, unsigned long result, int base)
+{
+ int err;
+ int len;
+ char buffer[32];
+ char *fmt;
+
+ switch (base) {
+ case 10:
+ fmt = "%lu";
+ break;
+ case 16:
+ fmt = "%lx";
+ break;
+ default:
+ err = -EINVAL;
+ goto out;
+ }
+
+ len = snprintf(buffer, sizeof(buffer), fmt, result);
+ if (len < 0 || len >= sizeof(buffer)) {
+ err = -errno;
+ goto out;
+ }
+
+ err = write_file(path, buffer, len);
+
+out:
+ errno = -err;
+ return err;
}
void *find_auxv_entry(int type, char *auxv)
@@ -88,28 +416,63 @@ void *get_auxv_entry(int type)
int pick_online_cpu(void)
{
- cpu_set_t mask;
- int cpu;
+ int ncpus, cpu = -1;
+ cpu_set_t *mask;
+ size_t size;
+
+ ncpus = get_nprocs_conf();
+ size = CPU_ALLOC_SIZE(ncpus);
+ mask = CPU_ALLOC(ncpus);
+ if (!mask) {
+ perror("malloc");
+ return -1;
+ }
- CPU_ZERO(&mask);
+ CPU_ZERO_S(size, mask);
- if (sched_getaffinity(0, sizeof(mask), &mask)) {
+ if (sched_getaffinity(0, size, mask)) {
perror("sched_getaffinity");
- return -1;
+ goto done;
}
/* We prefer a primary thread, but skip 0 */
- for (cpu = 8; cpu < CPU_SETSIZE; cpu += 8)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = 8; cpu < ncpus; cpu += 8)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
/* Search for anything, but in reverse */
- for (cpu = CPU_SETSIZE - 1; cpu >= 0; cpu--)
- if (CPU_ISSET(cpu, &mask))
- return cpu;
+ for (cpu = ncpus - 1; cpu >= 0; cpu--)
+ if (CPU_ISSET_S(cpu, size, mask))
+ goto done;
printf("No cpus in affinity mask?!\n");
- return -1;
+
+done:
+ CPU_FREE(mask);
+ return cpu;
+}
+
+int bind_to_cpu(int cpu)
+{
+ cpu_set_t mask;
+ int err;
+
+ if (cpu == BIND_CPU_ANY) {
+ cpu = pick_online_cpu();
+ if (cpu < 0)
+ return cpu;
+ }
+
+ printf("Binding to cpu %d\n", cpu);
+
+ CPU_ZERO(&mask);
+ CPU_SET(cpu, &mask);
+
+ err = sched_setaffinity(0, sizeof(mask), &mask);
+ if (err)
+ return err;
+
+ return cpu;
}
bool is_ppc64le(void)
@@ -130,65 +493,31 @@ bool is_ppc64le(void)
int read_sysfs_file(char *fpath, char *result, size_t result_size)
{
char path[PATH_MAX] = "/sys/";
- int rc = -1, fd;
strncat(path, fpath, PATH_MAX - strlen(path) - 1);
- if ((fd = open(path, O_RDONLY)) < 0)
- return rc;
-
- rc = read(fd, result, result_size);
-
- close(fd);
-
- if (rc < 0)
- return rc;
-
- return 0;
+ return read_file(path, result, result_size, NULL);
}
-int read_debugfs_file(char *debugfs_file, int *result)
+int read_debugfs_int(const char *debugfs_file, int *result)
{
- int rc = -1, fd;
- char path[PATH_MAX];
- char value[16];
-
- strcpy(path, "/sys/kernel/debug/");
- strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
-
- if ((fd = open(path, O_RDONLY)) < 0)
- return rc;
+ int err;
+ char value[16] = {0};
- if ((rc = read(fd, value, sizeof(value))) < 0)
- return rc;
+ err = read_debugfs_file(debugfs_file, value, sizeof(value) - 1);
+ if (err)
+ return err;
- value[15] = 0;
- *result = atoi(value);
- close(fd);
-
- return 0;
+ return parse_int(value, sizeof(value), result, 10);
}
-int write_debugfs_file(char *debugfs_file, int result)
+int write_debugfs_int(const char *debugfs_file, int result)
{
- int rc = -1, fd;
- char path[PATH_MAX];
char value[16];
- strcpy(path, "/sys/kernel/debug/");
- strncat(path, debugfs_file, PATH_MAX - strlen(path) - 1);
-
- if ((fd = open(path, O_WRONLY)) < 0)
- return rc;
-
snprintf(value, 16, "%d", result);
- if ((rc = write(fd, value, strlen(value))) < 0)
- return rc;
-
- close(fd);
-
- return 0;
+ return write_debugfs_file(debugfs_file, value, strlen(value));
}
static long perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
@@ -260,36 +589,56 @@ int perf_event_reset(int fd)
return 0;
}
-static void sigill_handler(int signr, siginfo_t *info, void *unused)
+int using_hash_mmu(bool *using_hash)
{
- static int warned = 0;
- ucontext_t *ctx = (ucontext_t *)unused;
- unsigned long *pc = &UCONTEXT_NIA(ctx);
+ char line[128];
+ FILE *f;
+ int rc;
+
+ f = fopen("/proc/cpuinfo", "r");
+ FAIL_IF(!f);
- /* mtspr 3,RS to check for move to DSCR below */
- if ((*((unsigned int *)*pc) & 0xfc1fffff) == 0x7c0303a6) {
- if (!warned++)
- printf("WARNING: Skipping over dscr setup. Consider running 'ppc64_cpu --dscr=1' manually.\n");
- *pc += 4;
- } else {
- printf("SIGILL at %p\n", pc);
- abort();
+ rc = 0;
+ while (fgets(line, sizeof(line), f) != NULL) {
+ if (!strcmp(line, "MMU : Hash\n") ||
+ !strcmp(line, "platform : Cell\n") ||
+ !strcmp(line, "platform : PowerMac\n")) {
+ *using_hash = true;
+ goto out;
+ }
+
+ if (strcmp(line, "MMU : Radix\n") == 0) {
+ *using_hash = false;
+ goto out;
+ }
}
+
+ rc = -1;
+out:
+ fclose(f);
+ return rc;
}
-void set_dscr(unsigned long val)
+struct sigaction push_signal_handler(int sig, void (*fn)(int, siginfo_t *, void *))
{
- static int init = 0;
struct sigaction sa;
+ struct sigaction old_handler;
- if (!init) {
- memset(&sa, 0, sizeof(sa));
- sa.sa_sigaction = sigill_handler;
- sa.sa_flags = SA_SIGINFO;
- if (sigaction(SIGILL, &sa, NULL))
- perror("sigill_handler");
- init = 1;
- }
+ sa.sa_sigaction = fn;
+ sigemptyset(&sa.sa_mask);
+ sa.sa_flags = SA_SIGINFO;
+ FAIL_IF_EXIT_MSG(sigaction(sig, &sa, &old_handler),
+ "failed to push signal handler");
+
+ return old_handler;
+}
+
+struct sigaction pop_signal_handler(int sig, struct sigaction old_handler)
+{
+ struct sigaction popped;
+
+ FAIL_IF_EXIT_MSG(sigaction(sig, &old_handler, &popped),
+ "failed to pop signal handler");
- asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR));
+ return popped;
}
diff --git a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h b/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
deleted file mode 120000
index 942b1d00999c..000000000000
--- a/tools/testing/selftests/powerpc/vphn/asm/lppaca.h
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../../arch/powerpc/include/asm/lppaca.h \ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/vphn/asm/vphn.h b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
new file mode 120000
index 000000000000..3a0b2a00171c
--- /dev/null
+++ b/tools/testing/selftests/powerpc/vphn/asm/vphn.h
@@ -0,0 +1 @@
+../../../../../../arch/powerpc/include/asm/vphn.h \ No newline at end of file
diff --git a/tools/testing/selftests/prctl/.gitignore b/tools/testing/selftests/prctl/.gitignore
index 91af2b631bc9..05d5e31661df 100644
--- a/tools/testing/selftests/prctl/.gitignore
+++ b/tools/testing/selftests/prctl/.gitignore
@@ -2,3 +2,5 @@
disable-tsc-ctxt-sw-stress-test
disable-tsc-on-off-stress-test
disable-tsc-test
+set-anon-vma-name-test
+set-process-name
diff --git a/tools/testing/selftests/prctl/Makefile b/tools/testing/selftests/prctl/Makefile
index c7923b205222..01dc90fbb509 100644
--- a/tools/testing/selftests/prctl/Makefile
+++ b/tools/testing/selftests/prctl/Makefile
@@ -5,12 +5,10 @@ ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
ifeq ($(ARCH),x86)
TEST_PROGS := disable-tsc-ctxt-sw-stress-test disable-tsc-on-off-stress-test \
- disable-tsc-test
+ disable-tsc-test set-anon-vma-name-test set-process-name
all: $(TEST_PROGS)
include ../lib.mk
-clean:
- rm -fr $(TEST_PROGS)
endif
endif
diff --git a/tools/testing/selftests/prctl/config b/tools/testing/selftests/prctl/config
new file mode 100644
index 000000000000..c6ed03c544e5
--- /dev/null
+++ b/tools/testing/selftests/prctl/config
@@ -0,0 +1 @@
+CONFIG_ANON_VMA_NAME=y
diff --git a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
index 62a93cc61b7c..6d1a5ee8eb28 100644
--- a/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
+++ b/tools/testing/selftests/prctl/disable-tsc-ctxt-sw-stress-test.c
@@ -79,7 +79,7 @@ int main(void)
{
int n_tasks = 100, i;
- fprintf(stderr, "[No further output means we're allright]\n");
+ fprintf(stderr, "[No further output means we're all right]\n");
for (i=0; i<n_tasks; i++)
if (fork() == 0)
diff --git a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
index 79950f9a26fd..d39511eb9b01 100644
--- a/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
+++ b/tools/testing/selftests/prctl/disable-tsc-on-off-stress-test.c
@@ -83,7 +83,7 @@ int main(void)
{
int n_tasks = 100, i;
- fprintf(stderr, "[No further output means we're allright]\n");
+ fprintf(stderr, "[No further output means we're all right]\n");
for (i=0; i<n_tasks; i++)
if (fork() == 0)
diff --git a/tools/testing/selftests/prctl/set-anon-vma-name-test.c b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
new file mode 100644
index 000000000000..4275cb256dce
--- /dev/null
+++ b/tools/testing/selftests/prctl/set-anon-vma-name-test.c
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This test covers the anonymous VMA naming functionality through prctl calls
+ */
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <string.h>
+
+#include "../kselftest_harness.h"
+
+#define AREA_SIZE 1024
+
+#define GOOD_NAME "goodname"
+#define BAD_NAME "badname\1"
+
+#ifndef PR_SET_VMA
+#define PR_SET_VMA 0x53564d41
+#define PR_SET_VMA_ANON_NAME 0
+#endif
+
+
+int rename_vma(unsigned long addr, unsigned long size, char *name)
+{
+ int res;
+
+ res = prctl(PR_SET_VMA, PR_SET_VMA_ANON_NAME, addr, size, name);
+ if (res < 0)
+ return -errno;
+ return res;
+}
+
+int was_renaming_successful(char *target_name, unsigned long ptr)
+{
+ FILE *maps_file;
+
+ char line_buf[512], name[128], mode[8];
+ unsigned long start_addr, end_addr, offset;
+ unsigned int major_id, minor_id, node_id;
+
+ char target_buf[128];
+ int res = 0, sscanf_res;
+
+ // The entry name in maps will be in format [anon:<target_name>]
+ sprintf(target_buf, "[anon:%s]", target_name);
+ maps_file = fopen("/proc/self/maps", "r");
+ if (!maps_file) {
+ printf("## /proc/self/maps file opening error\n");
+ return 0;
+ }
+
+ // Parse the maps file to find the entry we renamed
+ while (fgets(line_buf, sizeof(line_buf), maps_file)) {
+ sscanf_res = sscanf(line_buf, "%lx-%lx %7s %lx %u:%u %u %s", &start_addr,
+ &end_addr, mode, &offset, &major_id,
+ &minor_id, &node_id, name);
+ if (sscanf_res == EOF) {
+ res = 0;
+ printf("## EOF while parsing the maps file\n");
+ break;
+ }
+ if (!strcmp(name, target_buf) && start_addr == ptr) {
+ res = 1;
+ break;
+ }
+ }
+ fclose(maps_file);
+ return res;
+}
+
+FIXTURE(vma) {
+ void *ptr_anon, *ptr_not_anon;
+};
+
+FIXTURE_SETUP(vma) {
+ self->ptr_anon = mmap(NULL, AREA_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ ASSERT_NE(self->ptr_anon, NULL);
+ self->ptr_not_anon = mmap(NULL, AREA_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE, 0, 0);
+ ASSERT_NE(self->ptr_not_anon, NULL);
+}
+
+FIXTURE_TEARDOWN(vma) {
+ munmap(self->ptr_anon, AREA_SIZE);
+ munmap(self->ptr_not_anon, AREA_SIZE);
+}
+
+TEST_F(vma, renaming) {
+ TH_LOG("Try to rename the VMA with correct parameters");
+ EXPECT_GE(rename_vma((unsigned long)self->ptr_anon, AREA_SIZE, GOOD_NAME), 0);
+ EXPECT_TRUE(was_renaming_successful(GOOD_NAME, (unsigned long)self->ptr_anon));
+
+ TH_LOG("Try to pass invalid name (with non-printable character \\1) to rename the VMA");
+ EXPECT_EQ(rename_vma((unsigned long)self->ptr_anon, AREA_SIZE, BAD_NAME), -EINVAL);
+
+ TH_LOG("Try to rename non-anonymous VMA");
+ EXPECT_EQ(rename_vma((unsigned long) self->ptr_not_anon, AREA_SIZE, GOOD_NAME), -EINVAL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/prctl/set-process-name.c b/tools/testing/selftests/prctl/set-process-name.c
new file mode 100644
index 000000000000..562f707ba771
--- /dev/null
+++ b/tools/testing/selftests/prctl/set-process-name.c
@@ -0,0 +1,94 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This test covers the PR_SET_NAME functionality of prctl calls
+ */
+
+#include <errno.h>
+#include <sys/prctl.h>
+#include <string.h>
+
+#include "../kselftest_harness.h"
+
+#define CHANGE_NAME "changename"
+#define EMPTY_NAME ""
+#define TASK_COMM_LEN 16
+#define MAX_PATH_LEN 50
+
+int set_name(char *name)
+{
+ int res;
+
+ res = prctl(PR_SET_NAME, name, NULL, NULL, NULL);
+
+ if (res < 0)
+ return -errno;
+ return res;
+}
+
+int check_is_name_correct(char *check_name)
+{
+ char name[TASK_COMM_LEN];
+ int res;
+
+ res = prctl(PR_GET_NAME, name, NULL, NULL, NULL);
+
+ if (res < 0)
+ return -errno;
+
+ return !strcmp(name, check_name);
+}
+
+int check_null_pointer(char *check_name)
+{
+ char *name = NULL;
+ int res;
+
+ res = prctl(PR_GET_NAME, name, NULL, NULL, NULL);
+
+ return res;
+}
+
+int check_name(void)
+{
+
+ int pid;
+
+ pid = getpid();
+ FILE *fptr = NULL;
+ char path[MAX_PATH_LEN] = {};
+ char name[TASK_COMM_LEN] = {};
+ char output[TASK_COMM_LEN] = {};
+ int j;
+
+ j = snprintf(path, MAX_PATH_LEN, "/proc/self/task/%d/comm", pid);
+ fptr = fopen(path, "r");
+ if (!fptr)
+ return -EIO;
+
+ fscanf(fptr, "%s", output);
+ if (ferror(fptr))
+ return -EIO;
+
+ int res = prctl(PR_GET_NAME, name, NULL, NULL, NULL);
+
+ if (res < 0)
+ return -errno;
+
+ return !strcmp(output, name);
+}
+
+TEST(rename_process) {
+
+ EXPECT_GE(set_name(CHANGE_NAME), 0);
+ EXPECT_TRUE(check_is_name_correct(CHANGE_NAME));
+
+ EXPECT_GE(set_name(EMPTY_NAME), 0);
+ EXPECT_TRUE(check_is_name_correct(EMPTY_NAME));
+
+ EXPECT_GE(set_name(CHANGE_NAME), 0);
+ EXPECT_LT(check_null_pointer(CHANGE_NAME), 0);
+
+ EXPECT_TRUE(check_name());
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/proc/.gitignore b/tools/testing/selftests/proc/.gitignore
index bed4b5318a86..a156ac5dd2c6 100644
--- a/tools/testing/selftests/proc/.gitignore
+++ b/tools/testing/selftests/proc/.gitignore
@@ -5,11 +5,14 @@
/proc-fsconfig-hidepid
/proc-loadavg-001
/proc-multiple-procfs
+/proc-empty-vm
/proc-pid-vm
/proc-self-map-files-001
/proc-self-map-files-002
/proc-self-syscall
/proc-self-wchan
+/proc-subset-pid
+/proc-tid0
/proc-uptime-001
/proc-uptime-002
/read
diff --git a/tools/testing/selftests/proc/Makefile b/tools/testing/selftests/proc/Makefile
index 8be8a03d2973..cd95369254c0 100644
--- a/tools/testing/selftests/proc/Makefile
+++ b/tools/testing/selftests/proc/Makefile
@@ -1,17 +1,21 @@
# SPDX-License-Identifier: GPL-2.0-only
CFLAGS += -Wall -O2 -Wno-unused-function
CFLAGS += -D_GNU_SOURCE
+LDFLAGS += -pthread
TEST_GEN_PROGS :=
TEST_GEN_PROGS += fd-001-lookup
TEST_GEN_PROGS += fd-002-posix-eq
TEST_GEN_PROGS += fd-003-kthread
TEST_GEN_PROGS += proc-loadavg-001
+TEST_GEN_PROGS += proc-empty-vm
TEST_GEN_PROGS += proc-pid-vm
TEST_GEN_PROGS += proc-self-map-files-001
TEST_GEN_PROGS += proc-self-map-files-002
TEST_GEN_PROGS += proc-self-syscall
TEST_GEN_PROGS += proc-self-wchan
+TEST_GEN_PROGS += proc-subset-pid
+TEST_GEN_PROGS += proc-tid0
TEST_GEN_PROGS += proc-uptime-001
TEST_GEN_PROGS += proc-uptime-002
TEST_GEN_PROGS += read
diff --git a/tools/testing/selftests/proc/proc-empty-vm.c b/tools/testing/selftests/proc/proc-empty-vm.c
new file mode 100644
index 000000000000..56198d4ca2bf
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-empty-vm.c
@@ -0,0 +1,544 @@
+#if defined __amd64__ || defined __i386__
+/*
+ * Copyright (c) 2022 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Create a process without mappings by unmapping everything at once and
+ * holding it with ptrace(2). See what happens to
+ *
+ * /proc/${pid}/maps
+ * /proc/${pid}/numa_maps
+ * /proc/${pid}/smaps
+ * /proc/${pid}/smaps_rollup
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/ptrace.h>
+#include <sys/resource.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+#ifdef __amd64__
+#define TEST_VSYSCALL
+#endif
+
+#if defined __amd64__
+ #ifndef SYS_pkey_alloc
+ #define SYS_pkey_alloc 330
+ #endif
+ #ifndef SYS_pkey_free
+ #define SYS_pkey_free 331
+ #endif
+#elif defined __i386__
+ #ifndef SYS_pkey_alloc
+ #define SYS_pkey_alloc 381
+ #endif
+ #ifndef SYS_pkey_free
+ #define SYS_pkey_free 382
+ #endif
+#else
+ #error "SYS_pkey_alloc"
+#endif
+
+static int g_protection_key_support;
+
+static int protection_key_support(void)
+{
+ long rv = syscall(SYS_pkey_alloc, 0, 0);
+ if (rv > 0) {
+ syscall(SYS_pkey_free, (int)rv);
+ return 1;
+ } else if (rv == -1 && errno == ENOSYS) {
+ return 0;
+ } else if (rv == -1 && errno == EINVAL) {
+ // ospke=n
+ return 0;
+ } else {
+ fprintf(stderr, "%s: error: rv %ld, errno %d\n", __func__, rv, errno);
+ exit(EXIT_FAILURE);
+ }
+}
+
+/*
+ * 0: vsyscall VMA doesn't exist vsyscall=none
+ * 1: vsyscall VMA is --xp vsyscall=xonly
+ * 2: vsyscall VMA is r-xp vsyscall=emulate
+ */
+static volatile int g_vsyscall;
+static const char *g_proc_pid_maps_vsyscall;
+static const char *g_proc_pid_smaps_vsyscall;
+
+static const char proc_pid_maps_vsyscall_0[] = "";
+static const char proc_pid_maps_vsyscall_1[] =
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
+static const char proc_pid_maps_vsyscall_2[] =
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
+
+static const char proc_pid_smaps_vsyscall_0[] = "";
+
+static const char proc_pid_smaps_vsyscall_1[] =
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n"
+"Size: 4 kB\n"
+"KernelPageSize: 4 kB\n"
+"MMUPageSize: 4 kB\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"KSM: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+"THPeligible: 0\n"
+;
+
+static const char proc_pid_smaps_vsyscall_2[] =
+"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n"
+"Size: 4 kB\n"
+"KernelPageSize: 4 kB\n"
+"MMUPageSize: 4 kB\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"KSM: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+"THPeligible: 0\n"
+;
+
+static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
+{
+ _exit(EXIT_FAILURE);
+}
+
+#ifdef TEST_VSYSCALL
+static void sigaction_SIGSEGV_vsyscall(int _, siginfo_t *__, void *___)
+{
+ _exit(g_vsyscall);
+}
+
+/*
+ * vsyscall page can't be unmapped, probe it directly.
+ */
+static void vsyscall(void)
+{
+ pid_t pid;
+ int wstatus;
+
+ pid = fork();
+ if (pid < 0) {
+ fprintf(stderr, "fork, errno %d\n", errno);
+ exit(1);
+ }
+ if (pid == 0) {
+ setrlimit(RLIMIT_CORE, &(struct rlimit){});
+
+ /* Hide "segfault at ffffffffff600000" messages. */
+ struct sigaction act = {};
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = sigaction_SIGSEGV_vsyscall;
+ sigaction(SIGSEGV, &act, NULL);
+
+ g_vsyscall = 0;
+ /* gettimeofday(NULL, NULL); */
+ uint64_t rax = 0xffffffffff600000;
+ asm volatile (
+ "call *%[rax]"
+ : [rax] "+a" (rax)
+ : "D" (NULL), "S" (NULL)
+ : "rcx", "r11"
+ );
+
+ g_vsyscall = 1;
+ *(volatile int *)0xffffffffff600000UL;
+
+ g_vsyscall = 2;
+ exit(g_vsyscall);
+ }
+ waitpid(pid, &wstatus, 0);
+ if (WIFEXITED(wstatus)) {
+ g_vsyscall = WEXITSTATUS(wstatus);
+ } else {
+ fprintf(stderr, "error: vsyscall wstatus %08x\n", wstatus);
+ exit(1);
+ }
+}
+#endif
+
+static int test_proc_pid_maps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/maps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ perror("open /proc/${pid}/maps");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ if (g_vsyscall == 0) {
+ assert(rv == 0);
+ } else {
+ size_t len = strlen(g_proc_pid_maps_vsyscall);
+ assert(rv == len);
+ assert(memcmp(buf, g_proc_pid_maps_vsyscall, len) == 0);
+ }
+ return EXIT_SUCCESS;
+ }
+}
+
+static int test_proc_pid_numa_maps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/numa_maps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/numa_maps is under CONFIG_NUMA,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/numa_maps");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ assert(rv == 0);
+ return EXIT_SUCCESS;
+ }
+}
+
+static int test_proc_pid_smaps(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/smaps", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/smaps is under CONFIG_PROC_PAGE_MONITOR,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/smaps");
+ return EXIT_FAILURE;
+ }
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ assert(0 <= rv);
+ assert(rv <= sizeof(buf));
+
+ if (g_vsyscall == 0) {
+ assert(rv == 0);
+ } else {
+ size_t len = strlen(g_proc_pid_smaps_vsyscall);
+ assert(rv > len);
+ assert(memcmp(buf, g_proc_pid_smaps_vsyscall, len) == 0);
+
+ if (g_protection_key_support) {
+#define PROTECTION_KEY "ProtectionKey: 0\n"
+ assert(memmem(buf, rv, PROTECTION_KEY, strlen(PROTECTION_KEY)));
+ }
+ }
+
+ return EXIT_SUCCESS;
+}
+
+static const char g_smaps_rollup[] =
+"00000000-00000000 ---p 00000000 00:00 0 [rollup]\n"
+"Rss: 0 kB\n"
+"Pss: 0 kB\n"
+"Pss_Dirty: 0 kB\n"
+"Pss_Anon: 0 kB\n"
+"Pss_File: 0 kB\n"
+"Pss_Shmem: 0 kB\n"
+"Shared_Clean: 0 kB\n"
+"Shared_Dirty: 0 kB\n"
+"Private_Clean: 0 kB\n"
+"Private_Dirty: 0 kB\n"
+"Referenced: 0 kB\n"
+"Anonymous: 0 kB\n"
+"KSM: 0 kB\n"
+"LazyFree: 0 kB\n"
+"AnonHugePages: 0 kB\n"
+"ShmemPmdMapped: 0 kB\n"
+"FilePmdMapped: 0 kB\n"
+"Shared_Hugetlb: 0 kB\n"
+"Private_Hugetlb: 0 kB\n"
+"Swap: 0 kB\n"
+"SwapPss: 0 kB\n"
+"Locked: 0 kB\n"
+;
+
+static int test_proc_pid_smaps_rollup(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/smaps_rollup", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ if (errno == ENOENT) {
+ /*
+ * /proc/${pid}/smaps_rollup is under CONFIG_PROC_PAGE_MONITOR,
+ * it doesn't necessarily exist.
+ */
+ return EXIT_SUCCESS;
+ }
+ perror("open /proc/${pid}/smaps_rollup");
+ return EXIT_FAILURE;
+ } else {
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+ assert(rv == sizeof(g_smaps_rollup) - 1);
+ assert(memcmp(buf, g_smaps_rollup, sizeof(g_smaps_rollup) - 1) == 0);
+ return EXIT_SUCCESS;
+ }
+}
+
+static const char *parse_u64(const char *p, const char *const end, uint64_t *rv)
+{
+ *rv = 0;
+ for (; p != end; p += 1) {
+ if ('0' <= *p && *p <= '9') {
+ assert(!__builtin_mul_overflow(*rv, 10, rv));
+ assert(!__builtin_add_overflow(*rv, *p - '0', rv));
+ } else {
+ break;
+ }
+ }
+ assert(p != end);
+ return p;
+}
+
+/*
+ * There seems to be 2 types of valid output:
+ * "0 A A B 0 0 0\n" for dynamic exeuctables,
+ * "0 0 0 B 0 0 0\n" for static executables.
+ */
+static int test_proc_pid_statm(pid_t pid)
+{
+ char buf[4096];
+ snprintf(buf, sizeof(buf), "/proc/%u/statm", pid);
+ int fd = open(buf, O_RDONLY);
+ if (fd == -1) {
+ perror("open /proc/${pid}/statm");
+ return EXIT_FAILURE;
+ }
+
+ ssize_t rv = read(fd, buf, sizeof(buf));
+ close(fd);
+
+ assert(rv >= 0);
+ assert(rv <= sizeof(buf));
+ if (0) {
+ write(1, buf, rv);
+ }
+
+ const char *p = buf;
+ const char *const end = p + rv;
+
+ /* size */
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ uint64_t resident;
+ p = parse_u64(p, end, &resident);
+ assert(p != end && *p++ == ' ');
+
+ uint64_t shared;
+ p = parse_u64(p, end, &shared);
+ assert(p != end && *p++ == ' ');
+
+ uint64_t text;
+ p = parse_u64(p, end, &text);
+ assert(p != end && *p++ == ' ');
+
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ /* data */
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == ' ');
+
+ assert(p != end && *p++ == '0');
+ assert(p != end && *p++ == '\n');
+
+ assert(p == end);
+
+ /*
+ * "text" is "mm->end_code - mm->start_code" at execve(2) time.
+ * munmap() doesn't change it. It can be anything (just link
+ * statically). It can't be 0 because executing to this point
+ * implies at least 1 page of code.
+ */
+ assert(text > 0);
+
+ /*
+ * These two are always equal. Always 0 for statically linked
+ * executables and sometimes 0 for dynamically linked executables.
+ * There is no way to tell one from another without parsing ELF
+ * which is too much for this test.
+ */
+ assert(resident == shared);
+
+ return EXIT_SUCCESS;
+}
+
+int main(void)
+{
+ int rv = EXIT_SUCCESS;
+
+#ifdef TEST_VSYSCALL
+ vsyscall();
+#endif
+
+ switch (g_vsyscall) {
+ case 0:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_0;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_0;
+ break;
+ case 1:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_1;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_1;
+ break;
+ case 2:
+ g_proc_pid_maps_vsyscall = proc_pid_maps_vsyscall_2;
+ g_proc_pid_smaps_vsyscall = proc_pid_smaps_vsyscall_2;
+ break;
+ default:
+ abort();
+ }
+
+ g_protection_key_support = protection_key_support();
+
+ pid_t pid = fork();
+ if (pid == -1) {
+ perror("fork");
+ return EXIT_FAILURE;
+ } else if (pid == 0) {
+ rv = ptrace(PTRACE_TRACEME, 0, NULL, NULL);
+ if (rv != 0) {
+ if (errno == EPERM) {
+ fprintf(stderr,
+"Did you know? ptrace(PTRACE_TRACEME) doesn't work under strace.\n"
+ );
+ kill(getppid(), SIGTERM);
+ return EXIT_FAILURE;
+ }
+ perror("ptrace PTRACE_TRACEME");
+ return EXIT_FAILURE;
+ }
+
+ /*
+ * Hide "segfault at ..." messages. Signal handler won't run.
+ */
+ struct sigaction act = {};
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = sigaction_SIGSEGV;
+ sigaction(SIGSEGV, &act, NULL);
+
+#ifdef __amd64__
+ munmap(NULL, ((size_t)1 << 47) - 4096);
+#elif defined __i386__
+ {
+ size_t len;
+
+ for (len = -4096;; len -= 4096) {
+ munmap(NULL, len);
+ }
+ }
+#else
+#error "implement 'unmap everything'"
+#endif
+ return EXIT_FAILURE;
+ } else {
+ /*
+ * TODO find reliable way to signal parent that munmap(2) completed.
+ * Child can't do it directly because it effectively doesn't exist
+ * anymore. Looking at child's VM files isn't 100% reliable either:
+ * due to a bug they may not become empty or empty-like.
+ */
+ sleep(1);
+
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_maps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_numa_maps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_smaps(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_smaps_rollup(pid);
+ }
+ if (rv == EXIT_SUCCESS) {
+ rv = test_proc_pid_statm(pid);
+ }
+
+ /* Cut the rope. */
+ int wstatus;
+ waitpid(pid, &wstatus, 0);
+ assert(WIFSTOPPED(wstatus));
+ assert(WSTOPSIG(wstatus) == SIGSEGV);
+ }
+
+ return rv;
+}
+#else
+int main(void)
+{
+ return 4;
+}
+#endif
diff --git a/tools/testing/selftests/proc/proc-loadavg-001.c b/tools/testing/selftests/proc/proc-loadavg-001.c
index 471e2aa28077..fb4fe9188806 100644
--- a/tools/testing/selftests/proc/proc-loadavg-001.c
+++ b/tools/testing/selftests/proc/proc-loadavg-001.c
@@ -14,7 +14,6 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
/* Test that /proc/loadavg correctly reports last pid in pid namespace. */
-#define _GNU_SOURCE
#include <errno.h>
#include <sched.h>
#include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-pid-vm.c b/tools/testing/selftests/proc/proc-pid-vm.c
index 18a3bde8bc96..cacbd2a4aec9 100644
--- a/tools/testing/selftests/proc/proc-pid-vm.c
+++ b/tools/testing/selftests/proc/proc-pid-vm.c
@@ -46,6 +46,8 @@
#include <sys/time.h>
#include <sys/resource.h>
+#include "../kselftest.h"
+
static inline long sys_execveat(int dirfd, const char *pathname, char **argv, char **envp, int flags)
{
return syscall(SYS_execveat, dirfd, pathname, argv, envp, flags);
@@ -209,19 +211,28 @@ static int make_exe(const uint8_t *payload, size_t len)
}
#endif
-static bool g_vsyscall = false;
+/*
+ * 0: vsyscall VMA doesn't exist vsyscall=none
+ * 1: vsyscall VMA is --xp vsyscall=xonly
+ * 2: vsyscall VMA is r-xp vsyscall=emulate
+ */
+static volatile int g_vsyscall;
+static const char *str_vsyscall;
-static const char str_vsyscall[] =
+static const char str_vsyscall_0[] = "";
+static const char str_vsyscall_1[] =
+"ffffffffff600000-ffffffffff601000 --xp 00000000 00:00 0 [vsyscall]\n";
+static const char str_vsyscall_2[] =
"ffffffffff600000-ffffffffff601000 r-xp 00000000 00:00 0 [vsyscall]\n";
#ifdef __x86_64__
static void sigaction_SIGSEGV(int _, siginfo_t *__, void *___)
{
- _exit(1);
+ _exit(g_vsyscall);
}
/*
- * vsyscall page can't be unmapped, probe it with memory load.
+ * vsyscall page can't be unmapped, probe it directly.
*/
static void vsyscall(void)
{
@@ -244,12 +255,28 @@ static void vsyscall(void)
act.sa_sigaction = sigaction_SIGSEGV;
(void)sigaction(SIGSEGV, &act, NULL);
+ g_vsyscall = 0;
+ /* gettimeofday(NULL, NULL); */
+ uint64_t rax = 0xffffffffff600000;
+ asm volatile (
+ "call *%[rax]"
+ : [rax] "+a" (rax)
+ : "D" (NULL), "S" (NULL)
+ : "rcx", "r11"
+ );
+
+ g_vsyscall = 1;
*(volatile int *)0xffffffffff600000UL;
- exit(0);
+
+ g_vsyscall = 2;
+ exit(g_vsyscall);
}
waitpid(pid, &wstatus, 0);
- if (WIFEXITED(wstatus) && WEXITSTATUS(wstatus) == 0) {
- g_vsyscall = true;
+ if (WIFEXITED(wstatus)) {
+ g_vsyscall = WEXITSTATUS(wstatus);
+ } else {
+ fprintf(stderr, "error: wstatus %08x\n", wstatus);
+ exit(1);
}
}
@@ -259,6 +286,19 @@ int main(void)
int exec_fd;
vsyscall();
+ switch (g_vsyscall) {
+ case 0:
+ str_vsyscall = str_vsyscall_0;
+ break;
+ case 1:
+ str_vsyscall = str_vsyscall_1;
+ break;
+ case 2:
+ str_vsyscall = str_vsyscall_2;
+ break;
+ default:
+ abort();
+ }
atexit(ate);
@@ -312,7 +352,7 @@ int main(void)
/* Test /proc/$PID/maps */
{
- const size_t len = strlen(buf0) + (g_vsyscall ? strlen(str_vsyscall) : 0);
+ const size_t len = strlen(buf0) + strlen(str_vsyscall);
char buf[256];
ssize_t rv;
int fd;
@@ -325,7 +365,7 @@ int main(void)
rv = read(fd, buf, sizeof(buf));
assert(rv == len);
assert(memcmp(buf, buf0, strlen(buf0)) == 0);
- if (g_vsyscall) {
+ if (g_vsyscall > 0) {
assert(memcmp(buf + strlen(buf0), str_vsyscall, strlen(str_vsyscall)) == 0);
}
}
@@ -368,11 +408,11 @@ int main(void)
};
int i;
- for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(S); i++) {
assert(memmem(buf, rv, S[i], strlen(S[i])));
}
- if (g_vsyscall) {
+ if (g_vsyscall > 0) {
assert(memmem(buf, rv, str_vsyscall, strlen(str_vsyscall)));
}
}
@@ -417,7 +457,7 @@ int main(void)
};
int i;
- for (i = 0; i < sizeof(S)/sizeof(S[0]); i++) {
+ for (i = 0; i < ARRAY_SIZE(S); i++) {
assert(memmem(buf, rv, S[i], strlen(S[i])));
}
}
diff --git a/tools/testing/selftests/proc/proc-self-syscall.c b/tools/testing/selftests/proc/proc-self-syscall.c
index 9f6d000c0245..8511dcfe67c7 100644
--- a/tools/testing/selftests/proc/proc-self-syscall.c
+++ b/tools/testing/selftests/proc/proc-self-syscall.c
@@ -13,7 +13,6 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <sys/types.h>
diff --git a/tools/testing/selftests/proc/proc-subset-pid.c b/tools/testing/selftests/proc/proc-subset-pid.c
new file mode 100644
index 000000000000..d1052bcab039
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-subset-pid.c
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that "mount -t proc -o subset=pid" hides everything but pids,
+ * /proc/self and /proc/thread-self.
+ */
+#undef NDEBUG
+#include <assert.h>
+#include <errno.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mount.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <dirent.h>
+#include <unistd.h>
+#include <stdio.h>
+
+static inline bool streq(const char *a, const char *b)
+{
+ return strcmp(a, b) == 0;
+}
+
+static void make_private_proc(void)
+{
+ if (unshare(CLONE_NEWNS) == -1) {
+ if (errno == ENOSYS || errno == EPERM) {
+ exit(4);
+ }
+ exit(1);
+ }
+ if (mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL) == -1) {
+ exit(1);
+ }
+ if (mount(NULL, "/proc", "proc", 0, "subset=pid") == -1) {
+ exit(1);
+ }
+}
+
+static bool string_is_pid(const char *s)
+{
+ while (1) {
+ switch (*s++) {
+ case '0':case '1':case '2':case '3':case '4':
+ case '5':case '6':case '7':case '8':case '9':
+ continue;
+
+ case '\0':
+ return true;
+
+ default:
+ return false;
+ }
+ }
+}
+
+int main(void)
+{
+ make_private_proc();
+
+ DIR *d = opendir("/proc");
+ assert(d);
+
+ struct dirent *de;
+
+ bool dot = false;
+ bool dot_dot = false;
+ bool self = false;
+ bool thread_self = false;
+
+ while ((de = readdir(d))) {
+ if (streq(de->d_name, ".")) {
+ assert(!dot);
+ dot = true;
+ assert(de->d_type == DT_DIR);
+ } else if (streq(de->d_name, "..")) {
+ assert(!dot_dot);
+ dot_dot = true;
+ assert(de->d_type == DT_DIR);
+ } else if (streq(de->d_name, "self")) {
+ assert(!self);
+ self = true;
+ assert(de->d_type == DT_LNK);
+ } else if (streq(de->d_name, "thread-self")) {
+ assert(!thread_self);
+ thread_self = true;
+ assert(de->d_type == DT_LNK);
+ } else {
+ if (!string_is_pid(de->d_name)) {
+ fprintf(stderr, "d_name '%s'\n", de->d_name);
+ assert(0);
+ }
+ assert(de->d_type == DT_DIR);
+ }
+ }
+
+ char c;
+ int rv = readlink("/proc/cpuinfo", &c, 1);
+ assert(rv == -1 && errno == ENOENT);
+
+ int fd = open("/proc/cpuinfo", O_RDONLY);
+ assert(fd == -1 && errno == ENOENT);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/proc/proc-tid0.c b/tools/testing/selftests/proc/proc-tid0.c
new file mode 100644
index 000000000000..58c1d7c90a8e
--- /dev/null
+++ b/tools/testing/selftests/proc/proc-tid0.c
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2021 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+// Test that /proc/*/task never contains "0".
+#include <sys/types.h>
+#include <dirent.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <pthread.h>
+
+static pid_t pid = -1;
+
+static void atexit_hook(void)
+{
+ if (pid > 0) {
+ kill(pid, SIGKILL);
+ }
+}
+
+static void *f(void *_)
+{
+ return NULL;
+}
+
+static void sigalrm(int _)
+{
+ exit(0);
+}
+
+int main(void)
+{
+ pid = fork();
+ if (pid == 0) {
+ /* child */
+ while (1) {
+ pthread_t pth;
+ pthread_create(&pth, NULL, f, NULL);
+ pthread_join(pth, NULL);
+ }
+ } else if (pid > 0) {
+ /* parent */
+ atexit(atexit_hook);
+
+ char buf[64];
+ snprintf(buf, sizeof(buf), "/proc/%u/task", pid);
+
+ signal(SIGALRM, sigalrm);
+ alarm(1);
+
+ while (1) {
+ DIR *d = opendir(buf);
+ struct dirent *de;
+ while ((de = readdir(d))) {
+ if (strcmp(de->d_name, "0") == 0) {
+ exit(1);
+ }
+ }
+ closedir(d);
+ }
+
+ return 0;
+ } else {
+ perror("fork");
+ return 1;
+ }
+}
diff --git a/tools/testing/selftests/proc/proc-uptime-001.c b/tools/testing/selftests/proc/proc-uptime-001.c
index 781f7a50fc3f..f335eec5067e 100644
--- a/tools/testing/selftests/proc/proc-uptime-001.c
+++ b/tools/testing/selftests/proc/proc-uptime-001.c
@@ -13,7 +13,9 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-// Test that values in /proc/uptime increment monotonically.
+// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment
+// monotonically. We don't test idle time monotonicity due to broken iowait
+// task counting, cf: comment above get_cpu_idle_time_us()
#undef NDEBUG
#include <assert.h>
#include <stdint.h>
@@ -25,20 +27,31 @@
int main(void)
{
- uint64_t start, u0, u1, i0, i1;
+ uint64_t start, u0, u1, c0, c1;
int fd;
fd = open("/proc/uptime", O_RDONLY);
assert(fd >= 0);
- proc_uptime(fd, &u0, &i0);
+ u0 = proc_uptime(fd);
start = u0;
+ c0 = clock_boottime();
+
do {
- proc_uptime(fd, &u1, &i1);
+ u1 = proc_uptime(fd);
+ c1 = clock_boottime();
+
+ /* Is /proc/uptime monotonic ? */
assert(u1 >= u0);
- assert(i1 >= i0);
+
+ /* Is CLOCK_BOOTTIME monotonic ? */
+ assert(c1 >= c0);
+
+ /* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */
+ assert(c0 >= u0);
+
u0 = u1;
- i0 = i1;
+ c0 = c1;
} while (u1 - start < 100);
return 0;
diff --git a/tools/testing/selftests/proc/proc-uptime-002.c b/tools/testing/selftests/proc/proc-uptime-002.c
index 30e2b7849089..ae453daa96c1 100644
--- a/tools/testing/selftests/proc/proc-uptime-002.c
+++ b/tools/testing/selftests/proc/proc-uptime-002.c
@@ -13,11 +13,13 @@
* ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
-// Test that values in /proc/uptime increment monotonically
-// while shifting across CPUs.
-#define _GNU_SOURCE
+// Test that boottime value in /proc/uptime and CLOCK_BOOTTIME increment
+// monotonically while shifting across CPUs. We don't test idle time
+// monotonicity due to broken iowait task counting, cf: comment above
+// get_cpu_idle_time_us()
#undef NDEBUG
#include <assert.h>
+#include <errno.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <stdlib.h>
@@ -42,10 +44,10 @@ static inline int sys_sched_setaffinity(pid_t pid, unsigned int len, unsigned lo
int main(void)
{
+ uint64_t u0, u1, c0, c1;
unsigned int len;
unsigned long *m;
unsigned int cpu;
- uint64_t u0, u1, i0, i1;
int fd;
/* find out "nr_cpu_ids" */
@@ -55,12 +57,14 @@ int main(void)
len += sizeof(unsigned long);
free(m);
m = malloc(len);
- } while (sys_sched_getaffinity(0, len, m) == -EINVAL);
+ } while (sys_sched_getaffinity(0, len, m) == -1 && errno == EINVAL);
fd = open("/proc/uptime", O_RDONLY);
assert(fd >= 0);
- proc_uptime(fd, &u0, &i0);
+ u0 = proc_uptime(fd);
+ c0 = clock_boottime();
+
for (cpu = 0; cpu < len * 8; cpu++) {
memset(m, 0, len);
m[cpu / (8 * sizeof(unsigned long))] |= 1UL << (cpu % (8 * sizeof(unsigned long)));
@@ -68,11 +72,20 @@ int main(void)
/* CPU might not exist, ignore error */
sys_sched_setaffinity(0, len, m);
- proc_uptime(fd, &u1, &i1);
+ u1 = proc_uptime(fd);
+ c1 = clock_boottime();
+
+ /* Is /proc/uptime monotonic ? */
assert(u1 >= u0);
- assert(i1 >= i0);
+
+ /* Is CLOCK_BOOTTIME monotonic ? */
+ assert(c1 >= c0);
+
+ /* Is CLOCK_BOOTTIME VS /proc/uptime monotonic ? */
+ assert(c0 >= u0);
+
u0 = u1;
- i0 = i1;
+ c0 = c1;
}
return 0;
diff --git a/tools/testing/selftests/proc/proc-uptime.h b/tools/testing/selftests/proc/proc-uptime.h
index dc6a42b1d6b0..730cce4a3d73 100644
--- a/tools/testing/selftests/proc/proc-uptime.h
+++ b/tools/testing/selftests/proc/proc-uptime.h
@@ -19,10 +19,22 @@
#include <string.h>
#include <stdlib.h>
#include <unistd.h>
+#include <time.h>
#include "proc.h"
-static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
+static uint64_t clock_boottime(void)
+{
+ struct timespec ts;
+ int err;
+
+ err = clock_gettime(CLOCK_BOOTTIME, &ts);
+ assert(err >= 0);
+
+ return (ts.tv_sec * 100) + (ts.tv_nsec / 10000000);
+}
+
+static uint64_t proc_uptime(int fd)
{
uint64_t val1, val2;
char buf[64], *p;
@@ -43,18 +55,6 @@ static void proc_uptime(int fd, uint64_t *uptime, uint64_t *idle)
assert(p[3] == ' ');
val2 = (p[1] - '0') * 10 + p[2] - '0';
- *uptime = val1 * 100 + val2;
-
- p += 4;
-
- val1 = xstrtoull(p, &p);
- assert(p[0] == '.');
- assert('0' <= p[1] && p[1] <= '9');
- assert('0' <= p[2] && p[2] <= '9');
- assert(p[3] == '\n');
-
- val2 = (p[1] - '0') * 10 + p[2] - '0';
- *idle = val1 * 100 + val2;
- assert(p + 4 == buf + rv);
+ return val1 * 100 + val2;
}
diff --git a/tools/testing/selftests/proc/read.c b/tools/testing/selftests/proc/read.c
index b3ef9e14d6cc..35ee78dff144 100644
--- a/tools/testing/selftests/proc/read.c
+++ b/tools/testing/selftests/proc/read.c
@@ -14,7 +14,7 @@
* OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
*/
// Test
-// 1) read of every file in /proc
+// 1) read and lseek on every file in /proc
// 2) readlink of every symlink in /proc
// 3) recursively (1) + (2) for every directory in /proc
// 4) write to /proc/*/clear_refs and /proc/*/task/*/clear_refs
@@ -45,6 +45,8 @@ static void f_reg(DIR *d, const char *filename)
fd = openat(dirfd(d), filename, O_RDONLY|O_NONBLOCK);
if (fd == -1)
return;
+ /* struct proc_ops::proc_lseek is mandatory if file is seekable. */
+ (void)lseek(fd, 0, SEEK_SET);
rv = read(fd, buf, sizeof(buf));
assert((0 <= rv && rv <= sizeof(buf)) || rv == -1);
close(fd);
diff --git a/tools/testing/selftests/ptp/Makefile b/tools/testing/selftests/ptp/Makefile
index ef06de0898b7..8f57f88ecadd 100644
--- a/tools/testing/selftests/ptp/Makefile
+++ b/tools/testing/selftests/ptp/Makefile
@@ -1,10 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -I../../../../usr/include/
-TEST_PROGS := testptp
+CFLAGS += $(KHDR_INCLUDES)
+TEST_GEN_PROGS := testptp
LDLIBS += -lrt
-all: $(TEST_PROGS)
+TEST_PROGS = phc.sh
include ../lib.mk
-
-clean:
- rm -fr $(TEST_PROGS)
diff --git a/tools/testing/selftests/ptp/ptpchmaskfmt.sh b/tools/testing/selftests/ptp/ptpchmaskfmt.sh
new file mode 100644
index 000000000000..0a06ba8af300
--- /dev/null
+++ b/tools/testing/selftests/ptp/ptpchmaskfmt.sh
@@ -0,0 +1,14 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# Simple helper script to transform ptp debugfs timestamp event queue filtering
+# masks from decimal values to hexadecimal values
+
+# Only takes the debugfs mask file path as an argument
+DEBUGFS_MASKFILE="${1}"
+
+#shellcheck disable=SC2013,SC2086
+for int in $(cat "$DEBUGFS_MASKFILE") ; do
+ printf '0x%08X ' "$int"
+done
+echo
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index da7a9dda9490..011252fe238c 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -35,6 +35,8 @@
#define CLOCK_INVALID -1
#endif
+#define NSEC_PER_SEC 1000000000LL
+
/* clock_adjtime is not available in GLIBC < 2.14 */
#if !__GLIBC_PREREQ(2, 14)
#include <sys/syscall.h>
@@ -108,7 +110,7 @@ static long ppb_to_scaled_ppm(int ppb)
static int64_t pctns(struct ptp_clock_time *t)
{
- return t->sec * 1000000000LL + t->nsec;
+ return t->sec * NSEC_PER_SEC + t->nsec;
}
static void usage(char *progname)
@@ -119,6 +121,7 @@ static void usage(char *progname)
" -d name device to open\n"
" -e val read 'val' external time stamp events\n"
" -f val adjust the ptp clock frequency by 'val' ppb\n"
+ " -F chan Enable single channel mask and keep device open for debugfs verification.\n"
" -g get the ptp clock time\n"
" -h prints this message\n"
" -i val index for event/trigger\n"
@@ -131,14 +134,20 @@ static void usage(char *progname)
" 0 - none\n"
" 1 - external time stamp\n"
" 2 - periodic output\n"
+ " -n val shift the ptp clock time by 'val' nanoseconds\n"
+ " -o val phase offset (in nanoseconds) to be provided to the PHC servo\n"
" -p val enable output with a period of 'val' nanoseconds\n"
+ " -H val set output phase to 'val' nanoseconds (requires -p)\n"
+ " -w val set output pulse width to 'val' nanoseconds (requires -p)\n"
" -P val enable or disable (val=1|0) the system clock PPS\n"
" -s set the ptp clock time from the system time\n"
" -S set the system time from the ptp clock time\n"
" -t val shift the ptp clock time by 'val' seconds\n"
" -T val set the ptp clock time to 'val' seconds\n"
+ " -x val get an extended ptp clock time with the desired number of samples (up to %d)\n"
+ " -X get a ptp clock cross timestamp\n"
" -z test combinations of rising/falling external time stamp flags\n",
- progname);
+ progname, PTP_MAX_SAMPLES);
}
int main(int argc, char *argv[])
@@ -152,6 +161,8 @@ int main(int argc, char *argv[])
struct timex tx;
struct ptp_clock_time *pct;
struct ptp_sys_offset *sysoff;
+ struct ptp_sys_offset_extended *soe;
+ struct ptp_sys_offset_precise *xts;
char *progname;
unsigned int i;
@@ -161,6 +172,8 @@ int main(int argc, char *argv[])
clockid_t clkid;
int adjfreq = 0x7fffffff;
int adjtime = 0;
+ int adjns = 0;
+ int adjphase = 0;
int capabilities = 0;
int extts = 0;
int flagtest = 0;
@@ -168,19 +181,24 @@ int main(int argc, char *argv[])
int index = 0;
int list_pins = 0;
int pct_offset = 0;
+ int getextended = 0;
+ int getcross = 0;
int n_samples = 0;
- int perout = -1;
int pin_index = -1, pin_func;
int pps = -1;
int seconds = 0;
int settime = 0;
+ int channel = -1;
int64_t t1, t2, tp;
int64_t interval, offset;
+ int64_t perout_phase = -1;
+ int64_t pulsewidth = -1;
+ int64_t perout = -1;
progname = strrchr(argv[0], '/');
progname = progname ? 1+progname : argv[0];
- while (EOF != (c = getopt(argc, argv, "cd:e:f:ghi:k:lL:p:P:sSt:T:z"))) {
+ while (EOF != (c = getopt(argc, argv, "cd:e:f:F:ghH:i:k:lL:n:o:p:P:sSt:T:w:x:Xz"))) {
switch (c) {
case 'c':
capabilities = 1;
@@ -194,9 +212,15 @@ int main(int argc, char *argv[])
case 'f':
adjfreq = atoi(optarg);
break;
+ case 'F':
+ channel = atoi(optarg);
+ break;
case 'g':
gettime = 1;
break;
+ case 'H':
+ perout_phase = atoll(optarg);
+ break;
case 'i':
index = atoi(optarg);
break;
@@ -214,8 +238,14 @@ int main(int argc, char *argv[])
return -1;
}
break;
+ case 'n':
+ adjns = atoi(optarg);
+ break;
+ case 'o':
+ adjphase = atoi(optarg);
+ break;
case 'p':
- perout = atoi(optarg);
+ perout = atoll(optarg);
break;
case 'P':
pps = atoi(optarg);
@@ -233,6 +263,21 @@ int main(int argc, char *argv[])
settime = 3;
seconds = atoi(optarg);
break;
+ case 'w':
+ pulsewidth = atoi(optarg);
+ break;
+ case 'x':
+ getextended = atoi(optarg);
+ if (getextended < 1 || getextended > PTP_MAX_SAMPLES) {
+ fprintf(stderr,
+ "number of extended timestamp samples must be between 1 and %d; was asked for %d\n",
+ PTP_MAX_SAMPLES, getextended);
+ return -1;
+ }
+ break;
+ case 'X':
+ getcross = 1;
+ break;
case 'z':
flagtest = 1;
break;
@@ -270,7 +315,8 @@ int main(int argc, char *argv[])
" %d pulse per second\n"
" %d programmable pins\n"
" %d cross timestamping\n"
- " %d adjust_phase\n",
+ " %d adjust_phase\n"
+ " %d maximum phase adjustment (ns)\n",
caps.max_adj,
caps.n_alarm,
caps.n_ext_ts,
@@ -278,7 +324,8 @@ int main(int argc, char *argv[])
caps.pps,
caps.n_pins,
caps.cross_timestamping,
- caps.adjust_phase);
+ caps.adjust_phase,
+ caps.max_phase_adj);
}
}
@@ -293,11 +340,16 @@ int main(int argc, char *argv[])
}
}
- if (adjtime) {
+ if (adjtime || adjns) {
memset(&tx, 0, sizeof(tx));
- tx.modes = ADJ_SETOFFSET;
+ tx.modes = ADJ_SETOFFSET | ADJ_NANO;
tx.time.tv_sec = adjtime;
- tx.time.tv_usec = 0;
+ tx.time.tv_usec = adjns;
+ while (tx.time.tv_usec < 0) {
+ tx.time.tv_sec -= 1;
+ tx.time.tv_usec += NSEC_PER_SEC;
+ }
+
if (clock_adjtime(clkid, &tx) < 0) {
perror("clock_adjtime");
} else {
@@ -305,6 +357,18 @@ int main(int argc, char *argv[])
}
}
+ if (adjphase) {
+ memset(&tx, 0, sizeof(tx));
+ tx.modes = ADJ_OFFSET | ADJ_NANO;
+ tx.offset = adjphase;
+
+ if (clock_adjtime(clkid, &tx) < 0) {
+ perror("clock_adjtime");
+ } else {
+ puts("phase adjustment okay");
+ }
+ }
+
if (gettime) {
if (clock_gettime(clkid, &ts)) {
perror("clock_gettime");
@@ -342,6 +406,18 @@ int main(int argc, char *argv[])
}
}
+ if (pin_index >= 0) {
+ memset(&desc, 0, sizeof(desc));
+ desc.index = pin_index;
+ desc.func = pin_func;
+ desc.chan = index;
+ if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
+ perror("PTP_PIN_SETFUNC");
+ } else {
+ puts("set pin function okay");
+ }
+ }
+
if (extts) {
memset(&extts_request, 0, sizeof(extts_request));
extts_request.index = index;
@@ -391,6 +467,16 @@ int main(int argc, char *argv[])
}
}
+ if (pulsewidth >= 0 && perout < 0) {
+ puts("-w can only be specified together with -p");
+ return -1;
+ }
+
+ if (perout_phase >= 0 && perout < 0) {
+ puts("-H can only be specified together with -p");
+ return -1;
+ }
+
if (perout >= 0) {
if (clock_gettime(clkid, &ts)) {
perror("clock_gettime");
@@ -398,26 +484,27 @@ int main(int argc, char *argv[])
}
memset(&perout_request, 0, sizeof(perout_request));
perout_request.index = index;
- perout_request.start.sec = ts.tv_sec + 2;
- perout_request.start.nsec = 0;
- perout_request.period.sec = 0;
- perout_request.period.nsec = perout;
- if (ioctl(fd, PTP_PEROUT_REQUEST, &perout_request)) {
- perror("PTP_PEROUT_REQUEST");
+ perout_request.period.sec = perout / NSEC_PER_SEC;
+ perout_request.period.nsec = perout % NSEC_PER_SEC;
+ perout_request.flags = 0;
+ if (pulsewidth >= 0) {
+ perout_request.flags |= PTP_PEROUT_DUTY_CYCLE;
+ perout_request.on.sec = pulsewidth / NSEC_PER_SEC;
+ perout_request.on.nsec = pulsewidth % NSEC_PER_SEC;
+ }
+ if (perout_phase >= 0) {
+ perout_request.flags |= PTP_PEROUT_PHASE;
+ perout_request.phase.sec = perout_phase / NSEC_PER_SEC;
+ perout_request.phase.nsec = perout_phase % NSEC_PER_SEC;
} else {
- puts("periodic output request okay");
+ perout_request.start.sec = ts.tv_sec + 2;
+ perout_request.start.nsec = 0;
}
- }
- if (pin_index >= 0) {
- memset(&desc, 0, sizeof(desc));
- desc.index = pin_index;
- desc.func = pin_func;
- desc.chan = index;
- if (ioctl(fd, PTP_PIN_SETFUNC, &desc)) {
- perror("PTP_PIN_SETFUNC");
+ if (ioctl(fd, PTP_PEROUT_REQUEST2, &perout_request)) {
+ perror("PTP_PEROUT_REQUEST");
} else {
- puts("set pin function okay");
+ puts("periodic output request okay");
}
}
@@ -457,11 +544,11 @@ int main(int argc, char *argv[])
interval = t2 - t1;
offset = (t2 + t1) / 2 - tp;
- printf("system time: %lld.%u\n",
+ printf("system time: %lld.%09u\n",
(pct+2*i)->sec, (pct+2*i)->nsec);
- printf("phc time: %lld.%u\n",
+ printf("phc time: %lld.%09u\n",
(pct+2*i+1)->sec, (pct+2*i+1)->nsec);
- printf("system time: %lld.%u\n",
+ printf("system time: %lld.%09u\n",
(pct+2*i+2)->sec, (pct+2*i+2)->nsec);
printf("system/phc clock time offset is %" PRId64 " ns\n"
"system clock time delay is %" PRId64 " ns\n",
@@ -471,6 +558,69 @@ int main(int argc, char *argv[])
free(sysoff);
}
+ if (getextended) {
+ soe = calloc(1, sizeof(*soe));
+ if (!soe) {
+ perror("calloc");
+ return -1;
+ }
+
+ soe->n_samples = getextended;
+
+ if (ioctl(fd, PTP_SYS_OFFSET_EXTENDED, soe)) {
+ perror("PTP_SYS_OFFSET_EXTENDED");
+ } else {
+ printf("extended timestamp request returned %d samples\n",
+ getextended);
+
+ for (i = 0; i < getextended; i++) {
+ printf("sample #%2d: system time before: %lld.%09u\n",
+ i, soe->ts[i][0].sec, soe->ts[i][0].nsec);
+ printf(" phc time: %lld.%09u\n",
+ soe->ts[i][1].sec, soe->ts[i][1].nsec);
+ printf(" system time after: %lld.%09u\n",
+ soe->ts[i][2].sec, soe->ts[i][2].nsec);
+ }
+ }
+
+ free(soe);
+ }
+
+ if (getcross) {
+ xts = calloc(1, sizeof(*xts));
+ if (!xts) {
+ perror("calloc");
+ return -1;
+ }
+
+ if (ioctl(fd, PTP_SYS_OFFSET_PRECISE, xts)) {
+ perror("PTP_SYS_OFFSET_PRECISE");
+ } else {
+ puts("system and phc crosstimestamping request okay");
+
+ printf("device time: %lld.%09u\n",
+ xts->device.sec, xts->device.nsec);
+ printf("system time: %lld.%09u\n",
+ xts->sys_realtime.sec, xts->sys_realtime.nsec);
+ printf("monoraw time: %lld.%09u\n",
+ xts->sys_monoraw.sec, xts->sys_monoraw.nsec);
+ }
+
+ free(xts);
+ }
+
+ if (channel >= 0) {
+ if (ioctl(fd, PTP_MASK_CLEAR_ALL)) {
+ perror("PTP_MASK_CLEAR_ALL");
+ } else if (ioctl(fd, PTP_MASK_EN_SINGLE, (unsigned int *)&channel)) {
+ perror("PTP_MASK_EN_SINGLE");
+ } else {
+ printf("Channel %d exclusively enabled. Check on debugfs.\n", channel);
+ printf("Press any key to continue\n.");
+ getchar();
+ }
+ }
+
close(fd);
return 0;
}
diff --git a/tools/testing/selftests/ptrace/.gitignore b/tools/testing/selftests/ptrace/.gitignore
index 7bebf9534a86..b7dde152e75a 100644
--- a/tools/testing/selftests/ptrace/.gitignore
+++ b/tools/testing/selftests/ptrace/.gitignore
@@ -1,3 +1,5 @@
# SPDX-License-Identifier: GPL-2.0-only
get_syscall_info
+get_set_sud
peeksiginfo
+vmaccess
diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile
index 2f1f532c39db..1c631740a730 100644
--- a/tools/testing/selftests/ptrace/Makefile
+++ b/tools/testing/selftests/ptrace/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-CFLAGS += -std=c99 -pthread -iquote../../../../include/uapi -Wall
+CFLAGS += -std=c99 -pthread -Wall $(KHDR_INCLUDES)
-TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess
+TEST_GEN_PROGS := get_syscall_info peeksiginfo vmaccess get_set_sud
include ../lib.mk
diff --git a/tools/testing/selftests/ptrace/get_set_sud.c b/tools/testing/selftests/ptrace/get_set_sud.c
new file mode 100644
index 000000000000..5297b10d25c3
--- /dev/null
+++ b/tools/testing/selftests/ptrace/get_set_sud.c
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include "../kselftest_harness.h"
+#include <stdio.h>
+#include <string.h>
+#include <errno.h>
+#include <sys/wait.h>
+#include <sys/syscall.h>
+#include <sys/prctl.h>
+
+#include "linux/ptrace.h"
+
+static int sys_ptrace(int request, pid_t pid, void *addr, void *data)
+{
+ return syscall(SYS_ptrace, request, pid, addr, data);
+}
+
+TEST(get_set_sud)
+{
+ struct ptrace_sud_config config;
+ pid_t child;
+ int ret = 0;
+ int status;
+
+ child = fork();
+ ASSERT_GE(child, 0);
+ if (child == 0) {
+ ASSERT_EQ(0, sys_ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ TH_LOG("PTRACE_TRACEME: %m");
+ }
+ kill(getpid(), SIGSTOP);
+ _exit(1);
+ }
+
+ waitpid(child, &status, 0);
+
+ memset(&config, 0xff, sizeof(config));
+ config.mode = PR_SYS_DISPATCH_ON;
+
+ ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
+ (void *)sizeof(config), &config);
+
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(config.mode, PR_SYS_DISPATCH_OFF);
+ ASSERT_EQ(config.selector, 0);
+ ASSERT_EQ(config.offset, 0);
+ ASSERT_EQ(config.len, 0);
+
+ config.mode = PR_SYS_DISPATCH_ON;
+ config.selector = 0;
+ config.offset = 0x400000;
+ config.len = 0x1000;
+
+ ret = sys_ptrace(PTRACE_SET_SYSCALL_USER_DISPATCH_CONFIG, child,
+ (void *)sizeof(config), &config);
+
+ ASSERT_EQ(ret, 0);
+
+ memset(&config, 1, sizeof(config));
+ ret = sys_ptrace(PTRACE_GET_SYSCALL_USER_DISPATCH_CONFIG, child,
+ (void *)sizeof(config), &config);
+
+ ASSERT_EQ(ret, 0);
+ ASSERT_EQ(config.mode, PR_SYS_DISPATCH_ON);
+ ASSERT_EQ(config.selector, 0);
+ ASSERT_EQ(config.offset, 0x400000);
+ ASSERT_EQ(config.len, 0x1000);
+
+ kill(child, SIGKILL);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/ptrace/peeksiginfo.c b/tools/testing/selftests/ptrace/peeksiginfo.c
index 54900657eb44..a6884f66dc01 100644
--- a/tools/testing/selftests/ptrace/peeksiginfo.c
+++ b/tools/testing/selftests/ptrace/peeksiginfo.c
@@ -151,7 +151,7 @@ out:
int main(int argc, char *argv[])
{
- siginfo_t siginfo[SIGNR];
+ siginfo_t siginfo;
int i, exit_code = 1;
sigset_t blockmask;
pid_t child;
@@ -176,13 +176,13 @@ int main(int argc, char *argv[])
/* Send signals in process-wide and per-thread queues */
for (i = 0; i < SIGNR; i++) {
- siginfo->si_code = TEST_SICODE_SHARE;
- siginfo->si_int = i;
- sys_rt_sigqueueinfo(child, SIGRTMIN, siginfo);
+ siginfo.si_code = TEST_SICODE_SHARE;
+ siginfo.si_int = i;
+ sys_rt_sigqueueinfo(child, SIGRTMIN, &siginfo);
- siginfo->si_code = TEST_SICODE_PRIV;
- siginfo->si_int = i;
- sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, siginfo);
+ siginfo.si_code = TEST_SICODE_PRIV;
+ siginfo.si_int = i;
+ sys_rt_tgsigqueueinfo(child, child, SIGRTMIN, &siginfo);
}
if (sys_ptrace(PTRACE_ATTACH, child, NULL, NULL) == -1)
diff --git a/tools/testing/selftests/rcutorture/bin/config2csv.sh b/tools/testing/selftests/rcutorture/bin/config2csv.sh
new file mode 100755
index 000000000000..0cf55f1bf654
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/config2csv.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create a spreadsheet from torture-test Kconfig options and kernel boot
+# parameters. Run this in the directory containing the scenario files.
+#
+# Usage: config2csv path.csv [ "scenario1 scenario2 ..." ]
+#
+# By default, this script will take the list of scenarios from the CFLIST
+# file in that directory, otherwise it will consider only the scenarios
+# specified on the command line. It will examine each scenario's file
+# and also its .boot file, if present, and create a column in the .csv
+# output file. Note that "CFLIST" is a synonym for all the scenarios in the
+# CFLIST file, which allows easy comparison of those scenarios with selected
+# scenarios such as BUSTED that are normally omitted from CFLIST files.
+
+csvout=${1}
+if test -z "$csvout"
+then
+ echo "Need .csv output file as first argument."
+ exit 1
+fi
+shift
+defaultconfigs="`tr '\012' ' ' < CFLIST`"
+if test "$#" -eq 0
+then
+ scenariosarg=$defaultconfigs
+else
+ scenariosarg=$*
+fi
+scenarios="`echo $scenariosarg | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
+
+T=`mktemp -d /tmp/config2latex.sh.XXXXXX`
+trap 'rm -rf $T' 0
+
+cat << '---EOF---' >> $T/p.awk
+END {
+---EOF---
+for i in $scenarios
+do
+ echo ' s["'$i'"] = 1;' >> $T/p.awk
+ grep -v '^#' < $i | grep -v '^ *$' > $T/p
+ if test -r $i.boot
+ then
+ tr -s ' ' '\012' < $i.boot | grep -v '^#' >> $T/p
+ fi
+ sed -e 's/^[^=]*$/&=?/' < $T/p |
+ sed -e 's/^\([^=]*\)=\(.*\)$/\tp["\1:'"$i"'"] = "\2";\n\tc["\1"] = 1;/' >> $T/p.awk
+done
+cat << '---EOF---' >> $T/p.awk
+ ns = asorti(s, ss);
+ nc = asorti(c, cs);
+ for (j = 1; j <= ns; j++)
+ printf ",\"%s\"", ss[j];
+ printf "\n";
+ for (i = 1; i <= nc; i++) {
+ printf "\"%s\"", cs[i];
+ for (j = 1; j <= ns; j++) {
+ printf ",\"%s\"", p[cs[i] ":" ss[j]];
+ }
+ printf "\n";
+ }
+}
+---EOF---
+awk -f $T/p.awk < /dev/null > $T/p.csv
+cp $T/p.csv $csvout
diff --git a/tools/testing/selftests/rcutorture/bin/config_override.sh b/tools/testing/selftests/rcutorture/bin/config_override.sh
index 90016c359e83..b3d2e7efa40c 100755
--- a/tools/testing/selftests/rcutorture/bin/config_override.sh
+++ b/tools/testing/selftests/rcutorture/bin/config_override.sh
@@ -29,9 +29,8 @@ else
exit 1
fi
-T=${TMPDIR-/tmp}/config_override.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/config_override.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
sed < $override -e 's/^/grep -v "/' -e 's/=.*$/="/' |
awk '
diff --git a/tools/testing/selftests/rcutorture/bin/configcheck.sh b/tools/testing/selftests/rcutorture/bin/configcheck.sh
index 31584cee84d7..99162d18bad3 100755
--- a/tools/testing/selftests/rcutorture/bin/configcheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/configcheck.sh
@@ -3,41 +3,44 @@
#
# Usage: configcheck.sh .config .config-template
#
+# Non-empty output if errors detected.
+#
# Copyright (C) IBM Corporation, 2011
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=${TMPDIR-/tmp}/abat-chk-config.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configcheck.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
-cat $1 > $T/.config
+# function test_kconfig_enabled ( Kconfig-var=val )
+function test_kconfig_enabled () {
+ if ! grep -q "^$1$" $T/.config
+ then
+ echo :$1: improperly set
+ return 1
+ fi
+ return 0
+}
+
+# function test_kconfig_disabled ( Kconfig-var )
+function test_kconfig_disabled () {
+ if grep -q "^$1=n$" $T/.config
+ then
+ return 0
+ fi
+ if grep -q "^$1=" $T/.config
+ then
+ echo :$1=n: improperly set
+ return 1
+ fi
+ return 0
+}
-cat $2 | sed -e 's/\(.*\)=n/# \1 is not set/' -e 's/^#CHECK#//' |
-grep -v '^CONFIG_INITRAMFS_SOURCE' |
-awk '
-{
- print "if grep -q \"" $0 "\" < '"$T/.config"'";
- print "then";
- print "\t:";
- print "else";
- if ($1 == "#") {
- print "\tif grep -q \"" $2 "\" < '"$T/.config"'";
- print "\tthen";
- print "\t\tif test \"$firsttime\" = \"\""
- print "\t\tthen"
- print "\t\t\tfirsttime=1"
- print "\t\tfi"
- print "\t\techo \":" $2 ": improperly set\"";
- print "\telse";
- print "\t\t:";
- print "\tfi";
- } else {
- print "\tif test \"$firsttime\" = \"\""
- print "\tthen"
- print "\t\tfirsttime=1"
- print "\tfi"
- print "\techo \":" $0 ": improperly set\"";
- }
- print "fi";
- }' | sh
+sed -e 's/"//g' < $1 > $T/.config
+sed -e 's/^#CHECK#//' < $2 > $T/ConfigFragment
+grep '^CONFIG_.*=n$' $T/ConfigFragment |
+ sed -e 's/^/test_kconfig_disabled /' -e 's/=n$//' > $T/kconfig-n.sh
+. $T/kconfig-n.sh
+grep -v '^CONFIG_.*=n$' $T/ConfigFragment | grep '^CONFIG_' |
+ sed -e 's/^/test_kconfig_enabled /' > $T/kconfig-not-n.sh
+. $T/kconfig-not-n.sh
diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh
index 93e80a42249a..28bdb3ac7ba6 100755
--- a/tools/testing/selftests/rcutorture/bin/configinit.sh
+++ b/tools/testing/selftests/rcutorture/bin/configinit.sh
@@ -15,9 +15,8 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=${TMPDIR-/tmp}/configinit.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/configinit.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
# Capture config spec file.
@@ -32,11 +31,11 @@ if test -z "$TORTURE_TRUST_MAKE"
then
make clean > $resdir/Make.clean 2>&1
fi
-make $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
+make $TORTURE_KMAKE_ARG $TORTURE_DEFCONFIG > $resdir/Make.defconfig.out 2>&1
mv .config .config.sav
sh $T/upd.sh < .config.sav > .config
cp .config .config.new
-yes '' | make oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
+yes '' | make $TORTURE_KMAKE_ARG oldconfig > $resdir/Make.oldconfig.out 2> $resdir/Make.oldconfig.err
# verify new config matches specification.
configcheck.sh .config $c
diff --git a/tools/testing/selftests/rcutorture/bin/console-badness.sh b/tools/testing/selftests/rcutorture/bin/console-badness.sh
new file mode 100755
index 000000000000..aad51e7c0183
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/console-badness.sh
@@ -0,0 +1,18 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Scan standard input for error messages, dumping any found to standard
+# output.
+#
+# Usage: console-badness.sh
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+grep -E 'Badness|WARNING:|Warn|BUG|===========|BUG: KCSAN:|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for|!!!' |
+grep -v 'ODEBUG: ' |
+grep -v 'This means that this is a DEBUG kernel and it is' |
+grep -v 'Warning: unable to open an initial console' |
+grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr.*the init process!' |
+grep -v 'NOHZ tick-stop error: Non-RCU local softirq work is pending, handler'
diff --git a/tools/testing/selftests/rcutorture/bin/cpus2use.sh b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
index 1dbfb62567d2..6bb993001680 100755
--- a/tools/testing/selftests/rcutorture/bin/cpus2use.sh
+++ b/tools/testing/selftests/rcutorture/bin/cpus2use.sh
@@ -21,7 +21,6 @@ then
awk -v ncpus=$ncpus '{ print ncpus * ($7 + $NF) / 100 }'`
else
# No mpstat command, so use all available CPUs.
- echo The mpstat command is not available, so greedily using all CPUs.
idlecpus=$ncpus
fi
awk -v ncpus=$ncpus -v idlecpus=$idlecpus < /dev/null '
diff --git a/tools/testing/selftests/rcutorture/bin/functions.sh b/tools/testing/selftests/rcutorture/bin/functions.sh
index 12810229fddc..6e415ddb206f 100644..100755
--- a/tools/testing/selftests/rcutorture/bin/functions.sh
+++ b/tools/testing/selftests/rcutorture/bin/functions.sh
@@ -45,7 +45,7 @@ checkarg () {
configfrag_boot_params () {
if test -r "$2.boot"
then
- echo $1 `grep -v '^#' "$2.boot" | tr '\012' ' '`
+ echo `grep -v '^#' "$2.boot" | tr '\012' ' '` $1
else
echo $1
fi
@@ -108,6 +108,39 @@ configfrag_hotplug_cpu () {
grep -q '^CONFIG_HOTPLUG_CPU=y$' "$1"
}
+# get_starttime
+#
+# Returns a cookie identifying the current time.
+get_starttime () {
+ awk 'BEGIN { print systime() }' < /dev/null
+}
+
+# get_starttime_duration starttime
+#
+# Given the return value from get_starttime, compute a human-readable
+# string denoting the time since get_starttime.
+get_starttime_duration () {
+ awk -v starttime=$1 '
+ BEGIN {
+ ts = systime() - starttime;
+ tm = int(ts / 60);
+ th = int(ts / 3600);
+ td = int(ts / 86400);
+ d = td;
+ h = th - td * 24;
+ m = tm - th * 60;
+ s = ts - tm * 60;
+ if (d >= 1)
+ printf "%dd %d:%02d:%02d\n", d, h, m, s
+ else if (h >= 1)
+ printf "%d:%02d:%02d\n", h, m, s
+ else if (m >= 1)
+ printf "%d:%02d.0\n", m, s
+ else
+ print s " seconds"
+ }' < /dev/null
+}
+
# identify_boot_image qemu-cmd
#
# Returns the relative path to the kernel build image. This will be
@@ -126,6 +159,9 @@ identify_boot_image () {
qemu-system-aarch64)
echo arch/arm64/boot/Image
;;
+ qemu-system-s390x)
+ echo arch/s390/boot/bzImage
+ ;;
*)
echo vmlinux
;;
@@ -151,6 +187,9 @@ identify_qemu () {
elif echo $u | grep -q aarch64
then
echo qemu-system-aarch64
+ elif echo $u | grep -q 'IBM S/390'
+ then
+ echo qemu-system-s390x
elif uname -a | grep -q ppc64
then
echo qemu-system-ppc64
@@ -169,6 +208,8 @@ identify_qemu () {
# Output arguments for the qemu "-append" string based on CPU type
# and the TORTURE_QEMU_INTERACTIVE environment variable.
identify_qemu_append () {
+ echo debug_boot_weak_hash
+ echo panic=-1
local console=ttyS0
case "$1" in
qemu-system-x86_64|qemu-system-i386)
@@ -209,15 +250,12 @@ identify_qemu_args () {
echo -machine virt,gic-version=host -cpu host
;;
qemu-system-ppc64)
- echo -enable-kvm -M pseries -nodefaults
+ echo -M pseries -nodefaults
echo -device spapr-vscsi
if test -n "$TORTURE_QEMU_INTERACTIVE" -a -n "$TORTURE_QEMU_MAC"
then
echo -device spapr-vlan,netdev=net0,mac=$TORTURE_QEMU_MAC
echo -netdev bridge,br=br0,id=net0
- elif test -n "$TORTURE_QEMU_INTERACTIVE"
- then
- echo -net nic -net user
fi
;;
esac
@@ -234,7 +272,7 @@ identify_qemu_args () {
# Returns the number of virtual CPUs available to the aggregate of the
# guest OSes.
identify_qemu_vcpus () {
- lscpu | grep '^CPU(s):' | sed -e 's/CPU(s)://'
+ getconf _NPROCESSORS_ONLN
}
# print_bug
@@ -269,9 +307,56 @@ specify_qemu_cpus () {
echo $2 -smp $3
;;
qemu-system-ppc64)
- nt="`lscpu | grep '^NUMA node0' | sed -e 's/^[^,]*,\([0-9]*\),.*$/\1/'`"
+ nt="`lscpu | sed -n 's/^Thread(s) per core:\s*//p'`"
echo $2 -smp cores=`expr \( $3 + $nt - 1 \) / $nt`,threads=$nt
;;
esac
fi
}
+
+# specify_qemu_net qemu-args
+#
+# Appends a string containing "-net none" to qemu-args, unless the incoming
+# qemu-args already contains "-smp" or unless the TORTURE_QEMU_INTERACTIVE
+# environment variable is set, in which case the string that is be added is
+# instead "-net nic -net user".
+specify_qemu_net () {
+ if echo $1 | grep -q -e -net
+ then
+ echo $1
+ elif test -n "$TORTURE_QEMU_INTERACTIVE"
+ then
+ echo $1 -net nic -net user
+ else
+ echo $1 -net none
+ fi
+}
+
+# Extract the ftrace output from the console log output
+# The ftrace output in the original logs look like:
+# Dumping ftrace buffer:
+# ---------------------------------
+# [...]
+# ---------------------------------
+extract_ftrace_from_console() {
+ awk < "$1" '
+
+ /Dumping ftrace buffer:/ {
+ buffer_count++
+ print "Ftrace dump " buffer_count ":"
+ capture = 1
+ next
+ }
+
+ /---------------------------------/ {
+ if(capture == 1) {
+ capture = 2
+ next
+ } else if(capture == 2) {
+ capture = 0
+ print ""
+ }
+ }
+
+ capture == 2'
+}
diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh
index 30cb5b27d32e..fd1ffaa5a135 100755
--- a/tools/testing/selftests/rcutorture/bin/jitter.sh
+++ b/tools/testing/selftests/rcutorture/bin/jitter.sh
@@ -5,10 +5,11 @@
# of this script is to inflict random OS jitter on a concurrently running
# test.
#
-# Usage: jitter.sh me duration [ sleepmax [ spinmax ] ]
+# Usage: jitter.sh me jittering-path duration [ sleepmax [ spinmax ] ]
#
# me: Random-number-generator seed salt.
# duration: Time to run in seconds.
+# jittering-path: Path to file whose removal will stop this script.
# sleepmax: Maximum microseconds to sleep, defaults to one second.
# spinmax: Maximum microseconds to spin, defaults to one millisecond.
#
@@ -17,9 +18,10 @@
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
me=$(($1 * 1000))
-duration=$2
-sleepmax=${3-1000000}
-spinmax=${4-1000}
+jittering=$2
+duration=$3
+sleepmax=${4-1000000}
+spinmax=${5-1000}
n=1
@@ -46,6 +48,12 @@ do
exit 0;
fi
+ # Check for stop request.
+ if ! test -f "$jittering"
+ then
+ exit 1;
+ fi
+
# Set affinity to randomly selected online CPU
if cpus=`grep 1 /sys/devices/system/cpu/*/online 2>&1 |
sed -e 's,/[^/]*$,,' -e 's/^[^0-9]*//'`
@@ -60,16 +68,12 @@ do
cpumask=`awk -v cpus="$cpus" -v me=$me -v n=$n 'BEGIN {
srand(n + me + systime());
ncpus = split(cpus, ca);
- curcpu = ca[int(rand() * ncpus + 1)];
- mask = lshift(1, curcpu);
- if (mask + 0 <= 0)
- mask = 1;
- printf("%#x\n", mask);
+ print ca[int(rand() * ncpus + 1)];
}' < /dev/null`
n=$(($n+1))
- if ! taskset -p $cpumask $$ > /dev/null 2>&1
+ if ! taskset -c -p $cpumask $$ > /dev/null 2>&1
then
- echo taskset failure: '"taskset -p ' $cpumask $$ '"'
+ echo taskset failure: '"taskset -c -p ' $cpumask $$ '"'
exit 1
fi
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstart.sh b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
new file mode 100644
index 000000000000..3d710ad291c3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstart.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Start up the specified number of jitter.sh scripts in the background.
+#
+# Usage: . jitterstart.sh n jittering-dir duration [ sleepmax [ spinmax ] ]
+#
+# n: Number of jitter.sh scripts to start up.
+# jittering-dir: Directory in which to put "jittering" file.
+# duration: Time to run in seconds.
+# sleepmax: Maximum microseconds to sleep, defaults to one second.
+# spinmax: Maximum microseconds to spin, defaults to one millisecond.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jitter_n=$1
+if test -z "$jitter_n"
+then
+ echo jitterstart.sh: Missing count of jitter.sh scripts to start.
+ exit 33
+fi
+jittering_dir=$2
+if test -z "$jittering_dir"
+then
+ echo jitterstart.sh: Missing directory in which to place jittering file.
+ exit 34
+fi
+shift
+shift
+
+touch ${jittering_dir}/jittering
+for ((jitter_i = 1; jitter_i <= $jitter_n; jitter_i++))
+do
+ jitter.sh $jitter_i "${jittering_dir}/jittering" "$@" &
+done
diff --git a/tools/testing/selftests/rcutorture/bin/jitterstop.sh b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
new file mode 100644
index 000000000000..576a4cf4b79a
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/jitterstop.sh
@@ -0,0 +1,23 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Remove the "jittering" file, signaling the jitter.sh scripts to stop,
+# then wait for them to terminate.
+#
+# Usage: . jitterstop.sh jittering-dir
+#
+# jittering-dir: Directory containing "jittering" file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+jittering_dir=$1
+if test -z "$jittering_dir"
+then
+ echo jitterstop.sh: Missing directory in which to place jittering file.
+ exit 34
+fi
+
+rm -f ${jittering_dir}/jittering
+wait
diff --git a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
index e5cc6b2f195e..1af5d6b86b39 100755
--- a/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
+++ b/tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh
@@ -14,7 +14,7 @@ if test -z "$TORTURE_KCONFIG_KCSAN_ARG"
then
exit 0
fi
-cat $1/*/console.log |
+find $1 -name console.log -exec cat {} \; |
grep "BUG: KCSAN: " |
sed -e 's/^\[[^]]*] //' |
sort |
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-again.sh b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
new file mode 100755
index 000000000000..88ca4e368489
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-again.sh
@@ -0,0 +1,221 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Rerun a series of tests under KVM.
+#
+# Usage: kvm-again.sh /path/to/old/run [ options ]
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-again.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+if ! test -d tools/testing/selftests/rcutorture/bin
+then
+ echo $scriptname must be run from top-level directory of kernel source tree.
+ exit 1
+fi
+
+oldrun=$1
+shift
+if ! test -d "$oldrun"
+then
+ echo "Usage: $scriptname /path/to/old/run [ options ]"
+ exit 1
+fi
+if ! cp "$oldrun/scenarios" $T/scenarios.oldrun
+then
+ # Later on, can reconstitute this from console.log files.
+ echo Prior run batches file does not exist: $oldrun/batches
+ exit 1
+fi
+
+if test -f "$oldrun/torture_suite"
+then
+ torture_suite="`cat $oldrun/torture_suite`"
+elif test -f "$oldrun/TORTURE_SUITE"
+then
+ torture_suite="`cat $oldrun/TORTURE_SUITE`"
+else
+ echo "Prior run torture_suite file does not exist: $oldrun/{torture_suite,TORTURE_SUITE}"
+ exit 1
+fi
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+bootargs=
+dryrun=
+dur=
+default_link="cp -R"
+resdir="`pwd`/tools/testing/selftests/rcutorture/res"
+rundir="$resdir/`date +%Y.%m.%d-%H.%M.%S-again`"
+got_datestamp=
+got_rundir=
+
+startdate="`date`"
+starttime="`get_starttime`"
+
+usage () {
+ echo "Usage: $scriptname $oldrun [ arguments ]:"
+ echo " --bootargs kernel-boot-arguments"
+ echo " --datestamp string"
+ echo " --dryrun"
+ echo " --duration minutes | <seconds>s | <hours>h | <days>d"
+ echo " --link hard|soft|copy"
+ echo " --remote"
+ echo " --rundir /new/res/path"
+ echo "Command line: $scriptname $args"
+ exit 1
+}
+
+while test $# -gt 0
+do
+ case "$1" in
+ --bootargs|--bootarg)
+ checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
+ bootargs="$bootargs $2"
+ shift
+ ;;
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
+ if test -n "$got_rundir" || test -n "$got_datestamp"
+ then
+ echo Only one of --datestamp or --rundir may be specified
+ usage
+ fi
+ got_datestamp=y
+ ds=$2
+ rundir="$resdir/$ds"
+ if test -e "$rundir"
+ then
+ echo "--datestamp $2: Already exists."
+ usage
+ fi
+ shift
+ ;;
+ --dryrun)
+ dryrun=1
+ ;;
+ --duration)
+ checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+ mult=60
+ if echo "$2" | grep -q 's$'
+ then
+ mult=1
+ elif echo "$2" | grep -q 'h$'
+ then
+ mult=3600
+ elif echo "$2" | grep -q 'd$'
+ then
+ mult=86400
+ fi
+ ts=`echo $2 | sed -e 's/[smhd]$//'`
+ dur=$(($ts*mult))
+ shift
+ ;;
+ --link)
+ checkarg --link "hard|soft|copy" "$#" "$2" 'hard\|soft\|copy' '^--'
+ case "$2" in
+ copy)
+ arg_link="cp -R"
+ ;;
+ hard)
+ arg_link="cp -Rl"
+ ;;
+ soft)
+ arg_link="cp -Rs"
+ ;;
+ esac
+ shift
+ ;;
+ --remote)
+ arg_remote=1
+ default_link="cp -as"
+ ;;
+ --rundir)
+ checkarg --rundir "(absolute pathname)" "$#" "$2" '^/' '^error'
+ if test -n "$got_rundir" || test -n "$got_datestamp"
+ then
+ echo Only one of --datestamp or --rundir may be specified
+ usage
+ fi
+ got_rundir=y
+ rundir=$2
+ if test -e "$rundir"
+ then
+ echo "--rundir $2: Already exists."
+ usage
+ fi
+ shift
+ ;;
+ *)
+ if test -n "$1"
+ then
+ echo Unknown argument $1
+ usage
+ fi
+ ;;
+ esac
+ shift
+done
+if test -z "$arg_link"
+then
+ arg_link="$default_link"
+fi
+
+echo ---- Re-run results directory: $rundir
+
+# Copy old run directory tree over and adjust.
+mkdir -p "`dirname "$rundir"`"
+if ! $arg_link "$oldrun" "$rundir"
+then
+ echo "Cannot copy from $oldrun to $rundir."
+ usage
+fi
+rm -f "$rundir"/*/{console.log,console.log.diags,qemu_pid,qemu-pid,qemu-retval,Warnings,kvm-test-1-run.sh.out,kvm-test-1-run-qemu.sh.out,vmlinux} "$rundir"/log
+touch "$rundir/log"
+echo $scriptname $args | tee -a "$rundir/log"
+echo $oldrun > "$rundir/re-run"
+if ! test -d "$rundir/../../bin"
+then
+ $arg_link "$oldrun/../../bin" "$rundir/../.."
+fi
+for i in $rundir/*/qemu-cmd
+do
+ cp "$i" $T
+ qemu_cmd_dir="`dirname "$i"`"
+ kernel_dir="`echo $qemu_cmd_dir | sed -e 's/\.[0-9]\+$//'`"
+ jitter_dir="`dirname "$kernel_dir"`"
+ kvm-transform.sh "$kernel_dir/bzImage" "$qemu_cmd_dir/console.log" "$jitter_dir" "$dur" "$bootargs" < $T/qemu-cmd > $i
+ if test -n "$arg_remote"
+ then
+ echo "# TORTURE_KCONFIG_GDB_ARG=''" >> $i
+ fi
+done
+
+# Extract settings from the last qemu-cmd file transformed above.
+grep '^#' $i | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+grep -v '^#' $T/scenarios.oldrun | awk '
+{
+ curbatch = "";
+ for (i = 2; i <= NF; i++)
+ curbatch = curbatch " " $i;
+ print "kvm-test-1-run-batch.sh" curbatch;
+}' > $T/runbatches.sh
+
+if test -n "$dryrun"
+then
+ echo ---- Dryrun complete, directory: $rundir | tee -a "$rundir/log"
+else
+ ( cd "$rundir"; sh $T/runbatches.sh ) | tee -a "$rundir/log"
+ kvm-end-run-stats.sh "$rundir" "$starttime"
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
new file mode 100755
index 000000000000..46b08cd16ba5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-assign-cpus.sh
@@ -0,0 +1,105 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Produce awk statements roughly depicting the system's CPU and cache
+# layout. If the required information is not available, produce
+# error messages as awk comments. Successful exit regardless.
+#
+# Usage: kvm-assign-cpus.sh /path/to/sysfs
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-assign-cpus.sh.XXXXXX`"
+trap 'rm -rf $T' 0 2
+
+sysfsdir=${1-/sys/devices/system/node}
+if ! cd "$sysfsdir" > $T/msg 2>&1
+then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+fi
+nodelist="`ls -d node*`"
+for i in node*
+do
+ if ! test -d $i/
+ then
+ echo "# Not a directory: $sysfsdir/node*"
+ exit 0
+ fi
+ for j in $i/cpu*/cache/index*
+ do
+ if ! test -d $j/
+ then
+ echo "# Not a directory: $sysfsdir/$j"
+ exit 0
+ else
+ break
+ fi
+ done
+ indexlist="`ls -d $i/cpu* | grep 'cpu[0-9][0-9]*' | head -1 | sed -e 's,^.*$,ls -d &/cache/index*,' | sh | sed -e 's,^.*/,,'`"
+ break
+done
+for i in node*/cpu*/cache/index*/shared_cpu_list
+do
+ if ! test -f $i
+ then
+ echo "# Not a file: $sysfsdir/$i"
+ exit 0
+ else
+ break
+ fi
+done
+firstshared=
+for i in $indexlist
+do
+ rm -f $T/cpulist
+ for n in node*
+ do
+ f="$n/cpu*/cache/$i/shared_cpu_list"
+ if ! cat $f > $T/msg 2>&1
+ then
+ sed -e 's/^/# /' < $T/msg
+ exit 0
+ fi
+ cat $f >> $T/cpulist
+ done
+ if grep -q '[-,]' $T/cpulist
+ then
+ if test -z "$firstshared"
+ then
+ firstshared="$i"
+ fi
+ fi
+done
+if test -z "$firstshared"
+then
+ splitindex="`echo $indexlist | sed -e 's/ .*$//'`"
+else
+ splitindex="$firstshared"
+fi
+nodenum=0
+for n in node*
+do
+ cat $n/cpu*/cache/$splitindex/shared_cpu_list | sort -u -k1n |
+ awk -v nodenum="$nodenum" '
+ BEGIN {
+ idx = 0;
+ }
+
+ {
+ nlists = split($0, cpulists, ",");
+ for (i = 1; i <= nlists; i++) {
+ listsize = split(cpulists[i], cpus, "-");
+ if (listsize == 1)
+ cpus[2] = cpus[1];
+ for (j = cpus[1]; j <= cpus[2]; j++) {
+ print "cpu[" nodenum "][" idx "] = " j ";";
+ idx++;
+ }
+ }
+ }
+
+ END {
+ print "nodecpus[" nodenum "] = " idx ";";
+ }'
+ nodenum=`expr $nodenum + 1`
+done
+echo "numnodes = $nodenum;"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-build.sh b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
index 18d6518504ee..11f8d232b0ee 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-build.sh
@@ -9,6 +9,12 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+if test -f "$TORTURE_STOPFILE"
+then
+ echo "kvm-build.sh early exit due to run STOP request"
+ exit 1
+fi
+
config_template=${1}
if test -z "$config_template" -o ! -f "$config_template" -o ! -r "$config_template"
then
@@ -17,9 +23,8 @@ then
fi
resdir=${2}
-T=${TMPDIR-/tmp}/test-linux.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-build.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
cp ${config_template} $T/config
cat << ___EOF___ >> $T/config
@@ -34,13 +39,15 @@ if test $retval -gt 1
then
exit 2
fi
-ncpus=`cpus2use.sh`
-make -j$ncpus $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
+
+# Tell "make" to use double the number of real CPUs on the build system.
+ncpus="`getconf _NPROCESSORS_ONLN`"
+make -j$((2 * ncpus)) $TORTURE_KMAKE_ARG > $resdir/Make.out 2>&1
retval=$?
-if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | egrep -q "Stop|Error|error:|warning:" || egrep -q "Stop|Error|error:" < $resdir/Make.out
+if test $retval -ne 0 || grep "rcu[^/]*": < $resdir/Make.out | grep -E -q "Stop|Error|error:|warning:" || grep -E -q "Stop|Error|error:" < $resdir/Make.out
then
echo Kernel build error
- egrep "Stop|Error|error:|warning:" < $resdir/Make.out
+ grep -E "Stop|Error|error:|warning:" < $resdir/Make.out
echo Run aborted.
exit 3
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
new file mode 100755
index 000000000000..ed0ec7f0927e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-check-branches.sh
@@ -0,0 +1,102 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a group of kvm.sh tests on the specified commits. This currently
+# unconditionally does three-minute runs on each scenario in CFLIST,
+# taking advantage of all available CPUs and trusting the "make" utility.
+# In the short term, adjustments can be made by editing this script and
+# CFLIST. If some adjustments appear to have ongoing value, this script
+# might grow some command-line arguments.
+#
+# Usage: kvm-check-branches.sh commit1 commit2..commit3 commit4 ...
+#
+# This script considers its arguments one at a time. If more elaborate
+# specification of commits is needed, please use "git rev-list" to
+# produce something that this simple script can understand. The reason
+# for retaining the simplicity is that it allows the user to more easily
+# see which commit came from which branch.
+#
+# This script creates a yyyy.mm.dd-hh.mm.ss-group entry in the "res"
+# directory. The calls to kvm.sh create the usual entries, but this script
+# moves them under the yyyy.mm.dd-hh.mm.ss-group entry, each in its own
+# directory numbered in run order, that is, "0001", "0002", and so on.
+# For successful runs, the large build artifacts are removed. Doing this
+# reduces the disk space required by about two orders of magnitude for
+# successful runs.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+if ! git status > /dev/null 2>&1
+then
+ echo '!!!' This script needs to run in a git archive. 1>&2
+ echo '!!!' Giving up. 1>&2
+ exit 1
+fi
+
+# Remember where we started so that we can get back at the end.
+curcommit="`git status | head -1 | awk '{ print $NF }'`"
+
+nfail=0
+ntry=0
+resdir="tools/testing/selftests/rcutorture/res"
+ds="`date +%Y.%m.%d-%H.%M.%S`-group"
+if ! test -e $resdir
+then
+ mkdir $resdir || :
+fi
+mkdir $resdir/$ds
+echo Results directory: $resdir/$ds
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+echo Using all `identify_qemu_vcpus` CPUs.
+
+# Each pass through this loop does one command-line argument.
+for gitbr in $@
+do
+ echo ' --- git branch ' $gitbr
+
+ # Each pass through this loop tests one commit.
+ for i in `git rev-list "$gitbr"`
+ do
+ ntry=`expr $ntry + 1`
+ idir=`awk -v ntry="$ntry" 'END { printf "%04d", ntry; }' < /dev/null`
+ echo ' --- commit ' $i from branch $gitbr
+ date
+ mkdir $resdir/$ds/$idir
+ echo $gitbr > $resdir/$ds/$idir/gitbr
+ echo $i >> $resdir/$ds/$idir/gitbr
+
+ # Test the specified commit.
+ git checkout $i > $resdir/$ds/$idir/git-checkout.out 2>&1
+ echo git checkout return code: $? "(Commit $ntry: $i)"
+ kvm.sh --allcpus --duration 3 --trust-make --datestamp "$ds/$idir" > $resdir/$ds/$idir/kvm.sh.out 2>&1
+ ret=$?
+ echo kvm.sh return code $ret for commit $i from branch $gitbr
+ echo Run results: $resdir/$ds/$idir
+ if test "$ret" -ne 0
+ then
+ # Failure, so leave all evidence intact.
+ nfail=`expr $nfail + 1`
+ else
+ # Success, so remove large files to save about 1GB.
+ ( cd $resdir/$ds/$idir/$rrd; rm -f */vmlinux */bzImage */System.map */Module.symvers )
+ fi
+ done
+done
+date
+
+# Go back to the original commit.
+git checkout "$curcommit"
+
+if test $nfail -ne 0
+then
+ echo '!!! ' $nfail failures in $ntry 'runs!!!'
+ exit 1
+else
+ echo No failures in $ntry runs.
+ exit 0
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
new file mode 100755
index 000000000000..2b56baceb05d
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-end-run-stats.sh
@@ -0,0 +1,39 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Check the status of the specified run.
+#
+# Usage: kvm-end-run-stats.sh /path/to/run starttime
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scriptname=$0
+# args="$*"
+rundir="$1"
+if ! test -d "$rundir"
+then
+ echo kvm-end-run-stats.sh: Specified run directory does not exist: $rundir
+ exit 1
+fi
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-end-run-stats.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+default_starttime="`get_starttime`"
+starttime="${2-default_starttime}"
+
+echo | tee -a "$rundir/log"
+echo | tee -a "$rundir/log"
+echo " --- `date` Test summary:" | tee -a "$rundir/log"
+echo Results directory: $rundir | tee -a "$rundir/log"
+kcsan-collapse.sh "$rundir" | tee -a "$rundir/log"
+kvm-recheck.sh "$rundir" > $T/kvm-recheck.sh.out 2>&1
+ret=$?
+cat $T/kvm-recheck.sh.out | tee -a "$rundir/log"
+echo " --- Done at `date` (`get_starttime_duration $starttime`) exitcode $ret" | tee -a "$rundir/log"
+exit $ret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
index 6f50722f251f..28981007465b 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-find-errors.sh
@@ -30,21 +30,29 @@ editor=${EDITOR-vi}
files=
for i in ${rundir}/*/Make.out
do
- if egrep -q "error:|warning:" < $i
+ scenariodir="`dirname $i`"
+ scenariobasedir="`echo ${scenariodir} | sed -e 's/\.[0-9]*$//'`"
+ if grep -E -q "error:|warning:|^ld: .*undefined reference to" < $i
then
- egrep "error:|warning:" < $i > $i.diags
+ grep -E "error:|warning:|^ld: .*undefined reference to" < $i > $i.diags
+ files="$files $i.diags $i"
+ elif ! test -f ${scenariobasedir}/vmlinux && ! test -f ${scenariobasedir}/vmlinux.xz && ! test -f "${rundir}/re-run"
+ then
+ echo No ${scenariobasedir}/vmlinux file > $i.diags
files="$files $i.diags $i"
fi
done
if test -n "$files"
then
$editor $files
+ editorret=1
else
echo No build errors.
fi
-if grep -q -e "--buildonly" < ${rundir}/log
+if grep -q -e "--build-\?only" < ${rundir}/log && ! test -f "${rundir}/remote-log"
then
echo Build-only run, no console logs to check.
+ exit $editorret
fi
# Find console logs with errors
@@ -62,5 +70,10 @@ then
exit 1
else
echo No errors in console logs.
- exit 0
+ if test -n "$editorret"
+ then
+ exit $editorret
+ else
+ exit 0
+ fi
fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
new file mode 100755
index 000000000000..20c7c53c5795
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-get-cpus-script.sh
@@ -0,0 +1,88 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Create an awk script that takes as input numbers of CPUs and outputs
+# lists of CPUs, one per line in both cases.
+#
+# Usage: kvm-get-cpus-script.sh /path/to/cpu/arrays /path/to/put/script [ /path/to/state ]
+#
+# The CPU arrays are output by kvm-assign-cpus.sh, and are valid awk
+# statements initializing the variables describing the system's topology.
+#
+# The optional state is input by this script (if the file exists and is
+# non-empty), and can also be output by this script.
+
+cpuarrays="${1-/sys/devices/system/node}"
+scriptfile="${2}"
+statefile="${3}"
+
+if ! test -f "$cpuarrays"
+then
+ echo "File not found: $cpuarrays" 1>&2
+ exit 1
+fi
+scriptdir="`dirname "$scriptfile"`"
+if ! test -d "$scriptdir" || ! test -x "$scriptdir" || ! test -w "$scriptdir"
+then
+ echo "Directory not usable for script output: $scriptdir"
+ exit 1
+fi
+
+cat << '___EOF___' > "$scriptfile"
+BEGIN {
+___EOF___
+cat "$cpuarrays" >> "$scriptfile"
+if test -r "$statefile"
+then
+ cat "$statefile" >> "$scriptfile"
+fi
+cat << '___EOF___' >> "$scriptfile"
+}
+
+# Do we have the system architecture to guide CPU affinity?
+function gotcpus()
+{
+ return numnodes != "";
+}
+
+# Return a comma-separated list of the next n CPUs.
+function nextcpus(n, i, s)
+{
+ for (i = 0; i < n; i++) {
+ if (nodecpus[curnode] == "")
+ curnode = 0;
+ if (cpu[curnode][curcpu[curnode]] == "")
+ curcpu[curnode] = 0;
+ if (s != "")
+ s = s ",";
+ s = s cpu[curnode][curcpu[curnode]];
+ curcpu[curnode]++;
+ curnode++
+ }
+ return s;
+}
+
+# Dump out the current node/CPU state so that a later invocation of this
+# script can continue where this one left off. Of course, this only works
+# when a state file was specified and where there was valid sysfs state.
+# Returns 1 if the state was dumped, 0 otherwise.
+#
+# Dumping the state for one system configuration and loading it into
+# another isn't likely to do what you want, whatever that might be.
+function dumpcpustate( i, fn)
+{
+___EOF___
+echo ' fn = "'"$statefile"'";' >> $scriptfile
+cat << '___EOF___' >> "$scriptfile"
+ if (fn != "" && gotcpus()) {
+ print "curnode = " curnode ";" > fn;
+ for (i = 0; i < numnodes; i++)
+ if (curcpu[i] != "")
+ print "curcpu[" i "] = " curcpu[i] ";" >> fn;
+ return 1;
+ }
+ if (fn != "")
+ print "# No CPU state to dump." > fn;
+ return 0;
+}
+___EOF___
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
index f3a7a5e2b89d..db2c0e2c8e1d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-lock.sh
@@ -25,7 +25,7 @@ then
echo "$configfile -------"
else
title="$configfile ------- $ncs acquisitions/releases"
- dur=`sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* locktorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
index 1706cd4466b4..43e1387234d1 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcu.sh
@@ -25,13 +25,13 @@ stopstate="`grep 'End-test grace-period state: g' $i/console.log 2> /dev/null |
tail -1 | sed -e 's/^\[[ 0-9.]*] //' |
awk '{ print \"[\" $1 \" \" $5 \" \" $6 \" \" $7 \"]\"; }' |
tr -d '\012\015'`"
-fwdprog="`grep 'rcu_torture_fwd_prog_cr Duration' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k15nr | head -1 | awk '{ print $14 " " $15 }'`"
+fwdprog="`grep 'rcu_torture_fwd_prog n_max_cbs: ' $i/console.log 2> /dev/null | sed -e 's/^\[[^]]*] //' | sort -k3nr | head -1 | awk '{ print $2 " " $3 }' | tr -d '\015'`"
if test -z "$ngps"
then
echo "$configfile ------- " $stopstate
else
title="$configfile ------- $ngps GPs"
- dur=`sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//' < $i/qemu-cmd 2> /dev/null`
+ dur=`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* rcutorture.shutdown_secs=//' -e 's/ .*$//'`
if test -z "$dur"
then
:
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
index 7d3c2be66c64..d4bec538086d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf-ftrace.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale-ftrace.sh
@@ -1,12 +1,12 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements,
+# Analyze a given results directory for rcuscale performance measurements,
# looking for ftrace data. Exits with 0 if data was found, analyzed, and
-# printed. Intended to be invoked from kvm-recheck-rcuperf.sh after
+# printed. Intended to be invoked from kvm-recheck-rcuscale.sh after
# argument checking.
#
-# Usage: kvm-recheck-rcuperf-ftrace.sh resdir
+# Usage: kvm-recheck-rcuscale-ftrace.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
index db0375a57f28..f683e424ddd5 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuperf.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-rcuscale.sh
@@ -1,9 +1,9 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0+
#
-# Analyze a given results directory for rcuperf performance measurements.
+# Analyze a given results directory for rcuscale scalability measurements.
#
-# Usage: kvm-recheck-rcuperf.sh resdir
+# Usage: kvm-recheck-rcuscale.sh resdir
#
# Copyright (C) IBM Corporation, 2016
#
@@ -20,7 +20,7 @@ fi
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
-if kvm-recheck-rcuperf-ftrace.sh $i
+if kvm-recheck-rcuscale-ftrace.sh $i
then
# ftrace data was successfully analyzed, call it good!
exit 0
@@ -30,20 +30,24 @@ configfile=`echo $i | sed -e 's/^.*\///'`
sed -e 's/^\[[^]]*]//' < $i/console.log |
awk '
-/-perf: .* gps: .* batches:/ {
+/-scale: .* gps: .* batches:/ {
ngps = $9;
- nbatches = $11;
+ nbatches = 1;
}
-/-perf: .*writer-duration/ {
+/-scale: .*writer-duration/ {
gptimes[++n] = $5 / 1000.;
sum += $5 / 1000.;
}
+/rcu_scale: Grace-period kthread CPU time/ {
+ cputime = $6;
+}
+
END {
newNR = asort(gptimes);
if (newNR <= 0) {
- print "No rcuperf records found???"
+ print "No rcuscale records found???"
exit;
}
pct50 = int(newNR * 50 / 100);
@@ -78,6 +82,8 @@ END {
print "90th percentile grace-period duration: " gptimes[pct90];
print "99th percentile grace-period duration: " gptimes[pct99];
print "Maximum grace-period duration: " gptimes[newNR];
- print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches;
- print "Computed from rcuperf printk output.";
+ if (cputime != "")
+ cpustr = " CPU: " cputime;
+ print "Grace periods: " ngps + 0 " Batches: " nbatches + 0 " Ratio: " ngps / nbatches cpustr;
+ print "Computed from rcuscale printk output.";
}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh
new file mode 100755
index 000000000000..35a463dddffe
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-refscale.sh
@@ -0,0 +1,71 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for refscale performance measurements.
+#
+# Usage: kvm-recheck-refscale.sh resdir
+#
+# Copyright (C) IBM Corporation, 2016
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+
+sed -e 's/^\[[^]]*]//' < $i/console.log | tr -d '\015' |
+awk -v configfile="$configfile" '
+/^[ ]*Runs Time\(ns\) *$/ {
+ if (dataphase + 0 == 0) {
+ dataphase = 1;
+ # print configfile, $0;
+ }
+ next;
+}
+
+/[^ ]*[0-9][0-9]* [0-9][0-9]*\.[0-9][0-9]*$/ {
+ if (dataphase == 1) {
+ # print $0;
+ readertimes[++n] = $2;
+ sum += $2;
+ }
+ next;
+}
+
+{
+ if (dataphase == 1)
+ dataphase == 2;
+ next;
+}
+
+END {
+ print configfile " results:";
+ newNR = asort(readertimes);
+ if (newNR <= 0) {
+ print "No refscale records found???"
+ exit;
+ }
+ medianidx = int(newNR / 2);
+ if (newNR == medianidx * 2)
+ medianvalue = (readertimes[medianidx - 1] + readertimes[medianidx]) / 2;
+ else
+ medianvalue = readertimes[medianidx];
+ points = "Points:";
+ for (i = 1; i <= newNR; i++)
+ points = points " " readertimes[i];
+ print points;
+ print "Average reader duration: " sum / newNR " nanoseconds";
+ print "Minimum reader duration: " readertimes[1];
+ print "Median reader duration: " medianvalue;
+ print "Maximum reader duration: " readertimes[newNR];
+ print "Computed from refscale printk output.";
+}'
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
new file mode 100755
index 000000000000..3afa5c6eda4f
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck-scf.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Analyze a given results directory for rcutorture progress.
+#
+# Usage: kvm-recheck-rcu.sh resdir
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+i="$1"
+if test -d "$i" -a -r "$i"
+then
+ :
+else
+ echo Unreadable results directory: $i
+ exit 1
+fi
+. functions.sh
+
+configfile=`echo $i | sed -e 's/^.*\///'`
+nscfs="`grep 'scf_invoked_count ver:' $i/console.log 2> /dev/null | tail -1 | sed -e 's/^.* scf_invoked_count ver: //' -e 's/ .*$//' | tr -d '\015'`"
+if test -z "$nscfs"
+then
+ echo "$configfile ------- "
+else
+ dur="`grep -v '^#' $i/qemu-cmd | sed -e 's/^.* scftorture.shutdown_secs=//' -e 's/ .*$//' 2> /dev/null`"
+ if test -z "$dur"
+ then
+ rate=""
+ else
+ nscfss=`awk -v nscfs=$nscfs -v dur=$dur '
+ BEGIN { print nscfs / dur }' < /dev/null`
+ rate=" ($nscfss/s)"
+ fi
+ echo "${configfile} ------- ${nscfs} SCF handler invocations$rate"
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
index 736f04749b90..de65d77b47ff 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh
@@ -13,9 +13,11 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=/tmp/kvm-recheck.sh.$$
+T="`mktemp ${TMPDIR-/tmp}/kvm-recheck.sh.XXXXXX`"
trap 'rm -f $T' 0 2
+configerrors=0
+
PATH=`pwd`/tools/testing/selftests/rcutorture/bin:$PATH; export PATH
. functions.sh
for rd in "$@"
@@ -30,9 +32,15 @@ do
resdir=`echo $i | sed -e 's,/$,,' -e 's,/[^/]*$,,'`
head -1 $resdir/log
fi
- TORTURE_SUITE="`cat $i/../TORTURE_SUITE`"
- rm -f $i/console.log.*.diags
- kvm-recheck-${TORTURE_SUITE}.sh $i
+ TORTURE_SUITE="`cat $i/../torture_suite`" ; export TORTURE_SUITE
+ configfile=`echo $i | sed -e 's,^.*/,,'`
+ rm -f $i/console.log.*.diags $i/ConfigFragment.diags
+ case "${TORTURE_SUITE}" in
+ X*)
+ ;;
+ *)
+ kvm-recheck-${TORTURE_SUITE}.sh $i
+ esac
if test -f "$i/qemu-retval" && test "`cat $i/qemu-retval`" -ne 0 && test "`cat $i/qemu-retval`" -ne 137
then
echo QEMU error, output:
@@ -43,7 +51,21 @@ do
then
echo QEMU killed
fi
- configcheck.sh $i/.config $i/ConfigFragment
+ configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+ if grep -q '^CONFIG_KCSAN=y$' $i/ConfigFragment.input
+ then
+ # KCSAN forces a number of Kconfig options, so remove
+ # complaints about those Kconfig options in KCSAN runs.
+ mv $i/ConfigFragment.diags $i/ConfigFragment.diags.kcsan
+ grep -v -E 'CONFIG_PROVE_RCU|CONFIG_PREEMPT_COUNT' $i/ConfigFragment.diags.kcsan > $i/ConfigFragment.diags
+ fi
+ if test -s $i/ConfigFragment.diags
+ then
+ cat $i/ConfigFragment.diags
+ configerrors=$((configerrors+1))
+ else
+ rm $i/ConfigFragment.diags
+ fi
if test -r $i/Make.oldconfig.err
then
cat $i/Make.oldconfig.err
@@ -55,15 +77,22 @@ do
cat $i/Warnings
fi
else
- if test -f "$i/qemu-cmd"
- then
- print_bug qemu failed
- echo " $i"
- elif test -f "$i/buildonly"
+ if test -f "$i/buildonly"
then
echo Build-only run, no boot/test
- configcheck.sh $i/.config $i/ConfigFragment
+ configcheck.sh $i/.config $i/ConfigFragment > $i/ConfigFragment.diags 2>&1
+ if test -s $i/ConfigFragment.diags
+ then
+ cat $i/ConfigFragment.diags
+ configerrors=$((configerrors+1))
+ else
+ rm $i/ConfigFragment.diags
+ fi
parse-build.sh $i/Make.out $configfile
+ elif test -f "$i/qemu-cmd"
+ then
+ print_bug qemu failed
+ echo " $i"
else
print_bug Build failed
echo " $i"
@@ -72,7 +101,14 @@ do
done
if test -f "$rd/kcsan.sum"
then
- if test -s "$rd/kcsan.sum"
+ if ! test -f $i/ConfigFragment.diags
+ then
+ :
+ elif grep -q CONFIG_KCSAN=y $i/ConfigFragment.diags
+ then
+ echo "Compiler or architecture does not support KCSAN!"
+ echo Did you forget to switch your compiler with '--kmake-arg CC=<cc-that-supports-kcsan>'?
+ elif test -s "$rd/kcsan.sum"
then
echo KCSAN summary in $rd/kcsan.sum
else
@@ -80,16 +116,23 @@ do
fi
fi
done
+
+if test "$configerrors" -gt 0
+then
+ echo $configerrors runs with .config errors.
+ ret=1
+fi
EDITOR=echo kvm-find-errors.sh "${@: -1}" > $T 2>&1
-ret=$?
builderrors="`tr ' ' '\012' < $T | grep -c '/Make.out.diags'`"
if test "$builderrors" -gt 0
then
echo $builderrors runs with build errors.
+ ret=2
fi
runerrors="`tr ' ' '\012' < $T | grep -c '/console.log.diags'`"
if test "$runerrors" -gt 0
then
echo $runerrors runs with runtime errors.
+ ret=3
fi
exit $ret
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
new file mode 100755
index 000000000000..014ce68260d7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote-noreap.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Periodically scan a directory tree to prevent files from being reaped
+# by systemd and friends on long runs.
+#
+# Usage: kvm-remote-noreap.sh pathname
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+pathname="$1"
+if test "$pathname" = ""
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ exit 1
+fi
+if ! test -d "$pathname"
+then
+ echo Usage: kvm-remote-noreap.sh pathname
+ echo " pathname must be a directory."
+ exit 2
+fi
+
+while test -d "$pathname"
+do
+ find "$pathname" -type f -exec touch -c {} \; > /dev/null 2>&1
+ sleep 30
+done
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
new file mode 100755
index 000000000000..134cdef5a6e0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -0,0 +1,280 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a series of tests on remote systems under KVM.
+#
+# Usage: kvm-remote.sh "systems" [ <kvm.sh args> ]
+# kvm-remote.sh "systems" /path/to/old/run [ <kvm-again.sh args> ]
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+if ! test -d tools/testing/selftests/rcutorture/bin
+then
+ echo $scriptname must be run from top-level directory of kernel source tree.
+ exit 1
+fi
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+starttime="`get_starttime`"
+
+systems="$1"
+if test -z "$systems"
+then
+ echo $scriptname: Empty list of systems will go nowhere good, giving up.
+ exit 1
+fi
+shift
+
+# Pathnames:
+# T: /tmp/kvm-remote.sh.NNNNNN where "NNNNNN" is set by mktemp
+# resdir: /tmp/kvm-remote.sh.NNNNNN/res
+# rundir: /tmp/kvm-remote.sh.NNNNNN/res/$ds ("-remote" suffix)
+# oldrun: `pwd`/tools/testing/.../res/$otherds
+#
+# Pathname segments:
+# TD: kvm-remote.sh.NNNNNN
+# ds: yyyy.mm.dd-hh.mm.ss-remote
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-remote.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+TD="`basename "$T"`"
+
+resdir="$T/res"
+ds=`date +%Y.%m.%d-%H.%M.%S`-remote
+rundir=$resdir/$ds
+echo Results directory: $rundir
+echo $scriptname $args
+if echo $1 | grep -q '^--'
+then
+ # Fresh build. Create a datestamp unless the caller supplied one.
+ datestamp="`echo "$@" | awk -v ds="$ds" '{
+ for (i = 1; i < NF; i++) {
+ if ($i == "--datestamp") {
+ ds = "";
+ break;
+ }
+ }
+ if (ds != "")
+ print "--datestamp " ds;
+ }'`"
+ kvm.sh --remote "$@" $datestamp --buildonly > $T/kvm.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm.sh failed exit code $?
+ cat $T/kvm.sh.out
+ exit 2
+ fi
+ oldrun="`grep -m 1 "^Results directory: " $T/kvm.sh.out | awk '{ print $3 }'`"
+ touch "$oldrun/remote-log"
+ echo $scriptname $args >> "$oldrun/remote-log"
+ echo | tee -a "$oldrun/remote-log"
+ echo " ----" kvm.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
+ cat $T/kvm.sh.out | tee -a "$oldrun/remote-log"
+ # We are going to run this, so remove the buildonly files.
+ rm -f "$oldrun"/*/buildonly
+ kvm-again.sh $oldrun --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
+ cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
+ exit 2
+ fi
+else
+ # Re-use old run.
+ oldrun="$1"
+ if ! echo $oldrun | grep -q '^/'
+ then
+ oldrun="`pwd`/$oldrun"
+ fi
+ shift
+ touch "$oldrun/remote-log"
+ echo $scriptname $args >> "$oldrun/remote-log"
+ kvm-again.sh "$oldrun" "$@" --dryrun --remote --rundir "$rundir" > $T/kvm-again.sh.out 2>&1
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo $scriptname: kvm-again.sh failed exit code $? | tee -a "$oldrun/remote-log"
+ cat $T/kvm-again.sh.out | tee -a "$oldrun/remote-log"
+ exit 2
+ fi
+ cp -a "$rundir" "$RCUTORTURE/res/"
+ oldrun="$RCUTORTURE/res/$ds"
+fi
+echo | tee -a "$oldrun/remote-log"
+echo " ----" kvm-again.sh output: "(`date`)" | tee -a "$oldrun/remote-log"
+cat $T/kvm-again.sh.out
+echo | tee -a "$oldrun/remote-log"
+echo Remote run directory: $rundir | tee -a "$oldrun/remote-log"
+echo Local build-side run directory: $oldrun | tee -a "$oldrun/remote-log"
+
+# Create the kvm-remote-N.sh scripts in the bin directory.
+awk < "$rundir"/scenarios -v dest="$T/bin" -v rundir="$rundir" '
+{
+ n = $1;
+ sub(/\./, "", n);
+ fn = dest "/kvm-remote-" n ".sh"
+ print "kvm-remote-noreap.sh " rundir " &" > fn;
+ scenarios = "";
+ for (i = 2; i <= NF; i++)
+ scenarios = scenarios " " $i;
+ print "kvm-test-1-run-batch.sh" scenarios >> fn;
+ print "sync" >> fn;
+ print "rm " rundir "/remote.run" >> fn;
+}'
+chmod +x $T/bin/kvm-remote-*.sh
+( cd "`dirname $T`"; tar -chzf $T/binres.tgz "$TD/bin" "$TD/res" )
+
+# Check first to avoid the need for cleanup for system-name typos
+for i in $systems
+do
+ ssh -o BatchMode=yes $i getconf _NPROCESSORS_ONLN > $T/ssh.stdout 2> $T/ssh.stderr
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo "System $i unreachable ($ret), giving up." | tee -a "$oldrun/remote-log"
+ echo ' --- ssh stdout: vvv' | tee -a "$oldrun/remote-log"
+ cat $T/ssh.stdout | tee -a "$oldrun/remote-log"
+ echo ' --- ssh stdout: ^^^' | tee -a "$oldrun/remote-log"
+ echo ' --- ssh stderr: vvv' | tee -a "$oldrun/remote-log"
+ cat $T/ssh.stderr | tee -a "$oldrun/remote-log"
+ echo ' --- ssh stderr: ^^^' | tee -a "$oldrun/remote-log"
+ exit 4
+ fi
+ echo $i: `cat $T/ssh.stdout` CPUs " " `date` | tee -a "$oldrun/remote-log"
+done
+
+# Download and expand the tarball on all systems.
+echo Build-products tarball: `du -h $T/binres.tgz` | tee -a "$oldrun/remote-log"
+for i in $systems
+do
+ echo Downloading tarball to $i `date` | tee -a "$oldrun/remote-log"
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
+ ret=$?
+ tries=0
+ while test "$ret" -ne 0
+ do
+ echo Unable to download $T/binres.tgz to system $i, waiting and then retrying. $tries prior retries. | tee -a "$oldrun/remote-log"
+ sleep 60
+ cat $T/binres.tgz | ssh -o BatchMode=yes $i "cd /tmp; tar -xzf -"
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ if test "$tries" > 5
+ then
+ echo Unable to download $T/binres.tgz to system $i, giving up. | tee -a "$oldrun/remote-log"
+ exit 10
+ fi
+ fi
+ tries=$((tries+1))
+ done
+done
+
+# Function to check for presence of a file on the specified system.
+# Complain if the system cannot be reached, and retry after a wait.
+# Currently just waits forever if a machine disappears.
+#
+# Usage: checkremotefile system pathname
+checkremotefile () {
+ local ret
+ local sleeptime=60
+
+ while :
+ do
+ ssh -o BatchMode=yes $1 "test -f \"$2\""
+ ret=$?
+ if test "$ret" -eq 255
+ then
+ echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
+ elif test "$ret" -eq 0
+ then
+ return 0
+ elif test "$ret" -eq 1
+ then
+ echo " ---" File \"$2\" not found: ssh $1 test -f \"$2\" | tee -a "$oldrun/remote-log"
+ return 1
+ else
+ echo " ---" Exit code $ret: ssh $1 test -f \"$2\", retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
+ return $ret
+ fi
+ sleep $sleeptime
+ done
+}
+
+# Function to start batches on idle remote $systems
+#
+# Usage: startbatches curbatch nbatches
+#
+# Batches are numbered starting at 1. Returns the next batch to start.
+# Be careful to redirect all debug output to FD 2 (stderr).
+startbatches () {
+ local curbatch="$1"
+ local nbatches="$2"
+ local ret
+
+ # Each pass through the following loop examines one system.
+ for i in $systems
+ do
+ if test "$curbatch" -gt "$nbatches"
+ then
+ echo $((nbatches + 1))
+ return 0
+ fi
+ if checkremotefile "$i" "$resdir/$ds/remote.run" 1>&2
+ then
+ continue # System still running last test, skip.
+ fi
+ ssh -o BatchMode=yes "$i" "cd \"$resdir/$ds\"; touch remote.run; PATH=\"$T/bin:$PATH\" nohup kvm-remote-$curbatch.sh > kvm-remote-$curbatch.sh.out 2>&1 &" 1>&2
+ ret=$?
+ if test "$ret" -ne 0
+ then
+ echo ssh $i failed: exitcode $ret 1>&2
+ exit 11
+ fi
+ echo " ----" System $i Batch `head -n $curbatch < "$rundir"/scenarios | tail -1` `date` 1>&2
+ curbatch=$((curbatch + 1))
+ done
+ echo $curbatch
+}
+
+# Launch all the scenarios.
+nbatches="`wc -l "$rundir"/scenarios | awk '{ print $1 }'`"
+curbatch=1
+while test "$curbatch" -le "$nbatches"
+do
+ startbatches $curbatch $nbatches > $T/curbatch 2> $T/startbatches.stderr
+ curbatch="`cat $T/curbatch`"
+ if test -s "$T/startbatches.stderr"
+ then
+ cat "$T/startbatches.stderr" | tee -a "$oldrun/remote-log"
+ fi
+ if test "$curbatch" -le "$nbatches"
+ then
+ sleep 30
+ fi
+done
+echo All batches started. `date` | tee -a "$oldrun/remote-log"
+
+# Wait for all remaining scenarios to complete and collect results.
+for i in $systems
+do
+ echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
+ while checkremotefile "$i" "$resdir/$ds/remote.run"
+ do
+ sleep 30
+ done
+ echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
+ ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+done
+
+( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
+exit "`cat $T/exitcode`"
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
new file mode 100755
index 000000000000..c3808c490d92
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-batch.sh
@@ -0,0 +1,90 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified batch of scenarios, which
+# might have been built by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-batch.sh SCENARIO [ SCENARIO ... ]
+#
+# Each SCENARIO is the name of a directory in the current directory
+# containing a ready-to-run qemu-cmd file.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-batch.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+echo ---- Running batch $*
+# Check arguments
+runfiles=
+for i in "$@"
+do
+ if ! echo $i | grep -q '^[^/.a-z]\+\(\.[0-9]\+\)\?$'
+ then
+ echo Bad scenario name: \"$i\" 1>&2
+ exit 1
+ fi
+ if ! test -d "$i"
+ then
+ echo Scenario name not a directory: \"$i\" 1>&2
+ exit 2
+ fi
+ if ! test -f "$i/qemu-cmd"
+ then
+ echo Scenario lacks a command file: \"$i/qemu-cmd\" 1>&2
+ exit 3
+ fi
+ rm -f $i/build.*
+ touch $i/build.run
+ runfiles="$runfiles $i/build.run"
+done
+
+# Extract settings from the qemu-cmd file.
+grep '^#' $1/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Start up jitter, start each scenario, wait, end jitter.
+echo ---- System running test: `uname -a`
+echo ---- Starting kernels. `date` | tee -a log
+$TORTURE_JITTER_START
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
+for i in "$@"
+do
+ echo ---- System running test: `uname -a` > $i/kvm-test-1-run-qemu.sh.out
+ echo > $i/kvm-test-1-run-qemu.sh.out
+ export TORTURE_AFFINITY=
+ kvm-get-cpus-script.sh $T/cpuarray.awk $T/cpubatches.awk $T/cpustate
+ cat << ' ___EOF___' >> $T/cpubatches.awk
+ END {
+ affinitylist = "";
+ if (!gotcpus()) {
+ print "echo No CPU-affinity information, so no taskset command.";
+ } else if (cpu_count !~ /^[0-9][0-9]*$/) {
+ print "echo " scenario ": Bogus number of CPUs (old qemu-cmd?), so no taskset command.";
+ } else {
+ affinitylist = nextcpus(cpu_count);
+ if (!(affinitylist ~ /^[0-9,-][0-9,-]*$/))
+ print "echo " scenario ": Bogus CPU-affinity information, so no taskset command.";
+ else if (!dumpcpustate())
+ print "echo " scenario ": Could not dump state, so no taskset command.";
+ else
+ print "export TORTURE_AFFINITY=" affinitylist;
+ }
+ }
+ ___EOF___
+ cpu_count="`grep '# TORTURE_CPU_COUNT=' $i/qemu-cmd | sed -e 's/^.*=//'`"
+ affinity_export="`awk -f $T/cpubatches.awk -v cpu_count="$cpu_count" -v scenario=$i < /dev/null`"
+ $affinity_export
+ kvm-test-1-run-qemu.sh $i >> $i/kvm-test-1-run-qemu.sh.out 2>&1 &
+done
+for i in $runfiles
+do
+ while ls $i > /dev/null 2>&1
+ do
+ :
+ done
+done
+echo ---- All kernel runs complete. `date` | tee -a log
+$TORTURE_JITTER_STOP
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
new file mode 100755
index 000000000000..76f24cd5825b
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run-qemu.sh
@@ -0,0 +1,184 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Carry out a kvm-based run for the specified qemu-cmd file, which might
+# have been generated by --build-only kvm.sh run.
+#
+# Usage: kvm-test-1-run-qemu.sh qemu-cmd-dir
+#
+# qemu-cmd-dir provides the directory containing qemu-cmd file.
+# This is assumed to be of the form prefix/ds/scenario, where
+# "ds" is the top-level date-stamped directory and "scenario"
+# is the scenario name. Any required adjustments to this file
+# must have been made by the caller. The shell-command comments
+# at the end of the qemu-cmd file are not optional.
+#
+# Copyright (C) 2021 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run-qemu.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+resdir="$1"
+if ! test -d "$resdir"
+then
+ echo $0: Nonexistent directory: $resdir
+ exit 1
+fi
+if ! test -f "$resdir/qemu-cmd"
+then
+ echo $0: Nonexistent qemu-cmd file: $resdir/qemu-cmd
+ exit 1
+fi
+
+echo ' ---' `date`: Starting kernel, PID $$
+
+# Obtain settings from the qemu-cmd file.
+grep '^#' $resdir/qemu-cmd | sed -e 's/^# //' > $T/qemu-cmd-settings
+. $T/qemu-cmd-settings
+
+# Decorate qemu-cmd with affinity, redirection, backgrounding, and PID capture
+taskset_command=
+if test -n "$TORTURE_AFFINITY"
+then
+ taskset_command="taskset -c $TORTURE_AFFINITY "
+fi
+sed -e 's/^[^#].*$/'"$taskset_command"'& 2>\&1 \&/' < $resdir/qemu-cmd > $T/qemu-cmd
+echo 'qemu_pid=$!' >> $T/qemu-cmd
+echo 'echo $qemu_pid > $resdir/qemu-pid' >> $T/qemu-cmd
+echo 'taskset -c -p $qemu_pid > $resdir/qemu-affinity' >> $T/qemu-cmd
+
+# In case qemu refuses to run...
+echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
+
+# Attempt to run qemu
+kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+( . $T/qemu-cmd; wait `cat $resdir/qemu-pid`; echo $? > $resdir/qemu-retval ) &
+commandcompleted=0
+if test -z "$TORTURE_KCONFIG_GDB_ARG"
+then
+ sleep 10 # Give qemu's pid a chance to reach the file
+ if test -s "$resdir/qemu-pid"
+ then
+ qemu_pid=`cat "$resdir/qemu-pid"`
+ echo Monitoring qemu job at pid $qemu_pid `date`
+ else
+ qemu_pid=""
+ echo Monitoring qemu job at yet-as-unknown pid `date`
+ fi
+fi
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
+ if ! test -f $base_resdir/vmlinux
+ then
+ base_resdir="`cat re-run`/$resdir"
+ if ! test -f $base_resdir/vmlinux
+ then
+ base_resdir=/path/to
+ fi
+ fi
+ echo Waiting for you to attach a debug session, for example: > /dev/tty
+ echo " gdb $base_resdir/vmlinux" > /dev/tty
+ echo 'After symbols load and the "(gdb)" prompt appears:' > /dev/tty
+ echo " target remote :1234" > /dev/tty
+ echo " continue" > /dev/tty
+ kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
+fi
+while :
+do
+ if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
+ then
+ qemu_pid=`cat "$resdir/qemu-pid"`
+ fi
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
+ then
+ if test -n "$TORTURE_KCONFIG_GDB_ARG"
+ then
+ :
+ elif test $kruntime -ge $seconds || test -f "$resdir/../STOP.1"
+ then
+ break;
+ fi
+ sleep 1
+ else
+ commandcompleted=1
+ if test $kruntime -lt $seconds
+ then
+ echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
+ grep "^(qemu) qemu:" $resdir/kvm-test-1-run*.sh.out >> $resdir/Warnings 2>&1
+ killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
+ if test -n "$killpid"
+ then
+ echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
+ ps -fp $killpid >> $resdir/Warnings 2>&1
+ fi
+ else
+ echo ' ---' `date`: "Kernel done"
+ fi
+ break
+ fi
+done
+if test -z "$qemu_pid" && test -s "$resdir/qemu-pid"
+then
+ qemu_pid=`cat "$resdir/qemu-pid"`
+fi
+if test $commandcompleted -eq 0 && test -n "$qemu_pid"
+then
+ if ! test -f "$resdir/../STOP.1"
+ then
+ echo Grace period for qemu job at pid $qemu_pid `date`
+ fi
+ oldline="`tail $resdir/console.log`"
+ while :
+ do
+ if test -f "$resdir/../STOP.1"
+ then
+ echo "PID $qemu_pid killed due to run STOP.1 request `date`" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
+ if kill -0 $qemu_pid > /dev/null 2>&1
+ then
+ :
+ else
+ break
+ fi
+ must_continue=no
+ newline="`tail $resdir/console.log`"
+ if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
+ then
+ must_continue=yes
+ fi
+ last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
+ if test -z "$last_ts"
+ then
+ last_ts=0
+ fi
+ if test "$newline" != "$oldline" && test "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE)) && test "$last_ts" -gt "$TORTURE_SHUTDOWN_GRACE"
+ then
+ must_continue=yes
+ if test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo Continuing at console.log time $last_ts \"`tail -n 1 $resdir/console.log`\" `date`
+ fi
+ fi
+ if test $must_continue = no && test $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
+ then
+ echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds `date`" >> $resdir/Warnings 2>&1
+ kill -KILL $qemu_pid
+ break
+ fi
+ oldline=$newline
+ sleep 10
+ done
+elif test -z "$qemu_pid"
+then
+ echo Unknown PID, cannot kill qemu command
+fi
+
+# Tell the script that this run is done.
+rm -f $resdir/build.run
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
index 6ff611c630d1..b33cd8753689 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-test-1-run.sh
@@ -7,15 +7,16 @@
# Execute this in the source tree. Do not run it as a background task
# because qemu does not seem to like that much.
#
-# Usage: kvm-test-1-run.sh config builddir resdir seconds qemu-args boot_args
+# Usage: kvm-test-1-run.sh config resdir seconds qemu-args boot_args_in
#
-# qemu-args defaults to "-enable-kvm -nographic", along with arguments
-# specifying the number of CPUs and other options
-# generated from the underlying CPU architecture.
-# boot_args defaults to value returned by the per_version_boot_params
+# qemu-args defaults to "-enable-kvm -display none -no-reboot", along
+# with arguments specifying the number of CPUs
+# and other options generated from the underlying
+# CPU architecture.
+# boot_args_in defaults to value returned by the per_version_boot_params
# shell function.
#
-# Anything you specify for either qemu-args or boot_args is appended to
+# Anything you specify for either qemu-args or boot_args_in is appended to
# the default values. The "-smp" value is deduced from the contents of
# the config fragment.
#
@@ -25,9 +26,8 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=${TMPDIR-/tmp}/kvm-test-1-run.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm-test-1-run.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
. functions.sh
. $CONFIGFRAG/ver_functions.sh
@@ -35,14 +35,13 @@ mkdir $T
config_template=${1}
config_dir=`echo $config_template | sed -e 's,/[^/]*$,,'`
title=`echo $config_template | sed -e 's/^.*\///'`
-builddir=${2}
-resdir=${3}
+resdir=${2}
if test -z "$resdir" -o ! -d "$resdir" -o ! -w "$resdir"
then
echo "kvm-test-1-run.sh :$resdir: Not a writable directory, cannot store results into it"
exit 1
fi
-echo ' ---' `date`: Starting build
+echo ' ---' `date`: Starting build, PID $$
echo ' ---' Kconfig fragment at: $config_template >> $resdir/log
touch $resdir/ConfigFragment.input
@@ -59,20 +58,20 @@ config_override_param () {
cat $T/Kconfig_args >> $resdir/ConfigFragment.input
config_override.sh $T/$2 $T/Kconfig_args > $T/$2.tmp
mv $T/$2.tmp $T/$2
- # Note that "#CHECK#" is not permitted on commandline.
fi
}
echo > $T/KcList
config_override_param "$config_dir/CFcommon" KcList "`cat $config_dir/CFcommon 2> /dev/null`"
config_override_param "$config_template" KcList "`cat $config_template 2> /dev/null`"
+config_override_param "--gdb options" KcList "$TORTURE_KCONFIG_GDB_ARG"
config_override_param "--kasan options" KcList "$TORTURE_KCONFIG_KASAN_ARG"
config_override_param "--kcsan options" KcList "$TORTURE_KCONFIG_KCSAN_ARG"
config_override_param "--kconfig argument" KcList "$TORTURE_KCONFIG_ARG"
cp $T/KcList $resdir/ConfigFragment
base_resdir=`echo $resdir | sed -e 's/\.[0-9]\+$//'`
-if test "$base_resdir" != "$resdir" -a -f $base_resdir/bzImage -a -f $base_resdir/vmlinux
+if test "$base_resdir" != "$resdir" && test -f $base_resdir/bzImage && test -f $base_resdir/vmlinux
then
# Rerunning previous test, so use that test's kernel.
QEMU="`identify_qemu $base_resdir/vmlinux`"
@@ -82,6 +81,17 @@ then
ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
# Arch-independent indicator
touch $resdir/builtkernel
+elif test "$base_resdir" != "$resdir"
+then
+ # Rerunning previous test for which build failed
+ ln -s $base_resdir/Make*.out $resdir # for kvm-recheck.sh
+ ln -s $base_resdir/.config $resdir # for kvm-recheck.sh
+ echo Initial build failed, not running KVM, see $resdir.
+ if test -f $resdir/build.wait
+ then
+ mv $resdir/build.wait $resdir/build.ready
+ fi
+ exit 1
elif kvm-build.sh $T/KcList $resdir
then
# Had to build a kernel for this test.
@@ -106,41 +116,40 @@ else
# Build failed.
cp .config $resdir || :
echo Build failed, not running KVM, see $resdir.
- if test -f $builddir.wait
+ if test -f $resdir/build.wait
then
- mv $builddir.wait $builddir.ready
+ mv $resdir/build.wait $resdir/build.ready
fi
exit 1
fi
-if test -f $builddir.wait
+if test -f $resdir/build.wait
then
- mv $builddir.wait $builddir.ready
+ mv $resdir/build.wait $resdir/build.ready
fi
-while test -f $builddir.ready
+while test -f $resdir/build.ready
do
sleep 1
done
-seconds=$4
-qemu_args=$5
-boot_args=$6
+seconds=$3
+qemu_args=$4
+boot_args_in=$5
-cd $KVM
-kstarttime=`gawk 'BEGIN { print systime() }' < /dev/null`
if test -z "$TORTURE_BUILDONLY"
then
echo ' ---' `date`: Starting kernel
fi
# Generate -smp qemu argument.
-qemu_args="-enable-kvm -nographic $qemu_args"
+qemu_args="-enable-kvm -display none -no-reboot $qemu_args"
cpu_count=`configNR_CPUS.sh $resdir/ConfigFragment`
-cpu_count=`configfrag_boot_cpus "$boot_args" "$config_template" "$cpu_count"`
+cpu_count=`configfrag_boot_cpus "$boot_args_in" "$config_template" "$cpu_count"`
if test "$cpu_count" -gt "$TORTURE_ALLOTED_CPUS"
then
echo CPU count limited from $cpu_count to $TORTURE_ALLOTED_CPUS | tee -a $resdir/Warnings
cpu_count=$TORTURE_ALLOTED_CPUS
fi
qemu_args="`specify_qemu_cpus "$QEMU" "$qemu_args" "$cpu_count"`"
+qemu_args="`specify_qemu_net "$qemu_args"`"
# Generate architecture-specific and interaction-specific qemu arguments
qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
@@ -149,9 +158,53 @@ qemu_args="$qemu_args `identify_qemu_args "$QEMU" "$resdir/console.log"`"
qemu_append="`identify_qemu_append "$QEMU"`"
# Pull in Kconfig-fragment boot parameters
-boot_args="`configfrag_boot_params "$boot_args" "$config_template"`"
+boot_args="`configfrag_boot_params "$boot_args_in" "$config_template"`"
# Generate kernel-version-specific boot parameters
boot_args="`per_version_boot_params "$boot_args" $resdir/.config $seconds`"
+if test -n "$TORTURE_BOOT_GDB_ARG"
+then
+ boot_args="$TORTURE_BOOT_GDB_ARG $boot_args"
+fi
+
+# Give bare-metal advice
+modprobe_args="`echo $boot_args | tr -s ' ' '\012' | grep "^$TORTURE_MOD\." | sed -e "s/$TORTURE_MOD\.//g"`"
+kboot_args="`echo $boot_args | tr -s ' ' '\012' | grep -v "^$TORTURE_MOD\."`"
+testid_txt="`dirname $resdir`/testid.txt"
+touch $resdir/bare-metal
+echo To run this scenario on bare metal: >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo " 1." Set your bare-metal build tree to the state shown in this file: >> $resdir/bare-metal
+echo " " $testid_txt >> $resdir/bare-metal
+echo " 2." Update your bare-metal build tree"'"s .config based on this file: >> $resdir/bare-metal
+echo " " $resdir/ConfigFragment >> $resdir/bare-metal
+echo " 3." Make the bare-metal kernel"'"s build system aware of your .config updates: >> $resdir/bare-metal
+echo " " $ 'yes "" | make oldconfig' >> $resdir/bare-metal
+echo " 4." Build your bare-metal kernel. >> $resdir/bare-metal
+echo " 5." Boot your bare-metal kernel with the following parameters: >> $resdir/bare-metal
+echo " " $kboot_args >> $resdir/bare-metal
+echo " 6." Start the test with the following command: >> $resdir/bare-metal
+echo " " $ modprobe $TORTURE_MOD $modprobe_args >> $resdir/bare-metal
+echo " 7." After some time, end the test with the following command: >> $resdir/bare-metal
+echo " " $ rmmod $TORTURE_MOD >> $resdir/bare-metal
+echo " 8." Copy your bare-metal kernel"'"s .config file, overwriting this file: >> $resdir/bare-metal
+echo " " $resdir/.config >> $resdir/bare-metal
+echo " 9." Copy the console output from just before the modprobe to just after >> $resdir/bare-metal
+echo " " the rmmod into this file: >> $resdir/bare-metal
+echo " " $resdir/console.log >> $resdir/bare-metal
+echo "10." Check for runtime errors using the following command: >> $resdir/bare-metal
+echo " " $ tools/testing/selftests/rcutorture/bin/kvm-recheck.sh `dirname $resdir` >> $resdir/bare-metal
+echo >> $resdir/bare-metal
+echo Some of the above steps may be skipped if you build your bare-metal >> $resdir/bare-metal
+echo kernel here: `head -n 1 $testid_txt | sed -e 's/^Build directory: //'` >> $resdir/bare-metal
+
+echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" $TORTURE_QEMU_GDB_ARG > $resdir/qemu-cmd
+echo "# TORTURE_SHUTDOWN_GRACE=$TORTURE_SHUTDOWN_GRACE" >> $resdir/qemu-cmd
+echo "# seconds=$seconds" >> $resdir/qemu-cmd
+echo "# TORTURE_KCONFIG_GDB_ARG=\"$TORTURE_KCONFIG_GDB_ARG\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_START=\"$TORTURE_JITTER_START\"" >> $resdir/qemu-cmd
+echo "# TORTURE_JITTER_STOP=\"$TORTURE_JITTER_STOP\"" >> $resdir/qemu-cmd
+echo "# TORTURE_TRUST_MAKE=\"$TORTURE_TRUST_MAKE\"; export TORTURE_TRUST_MAKE" >> $resdir/qemu-cmd
+echo "# TORTURE_CPU_COUNT=$cpu_count" >> $resdir/qemu-cmd
if test -n "$TORTURE_BUILDONLY"
then
@@ -159,95 +212,6 @@ then
touch $resdir/buildonly
exit 0
fi
-echo "NOTE: $QEMU either did not run or was interactive" > $resdir/console.log
-echo $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append \"$qemu_append $boot_args\" > $resdir/qemu-cmd
-( $QEMU $qemu_args -m $TORTURE_QEMU_MEM -kernel $KERNEL -append "$qemu_append $boot_args" > $resdir/qemu-output 2>&1 & echo $! > $resdir/qemu_pid; wait `cat $resdir/qemu_pid`; echo $? > $resdir/qemu-retval ) &
-commandcompleted=0
-sleep 10 # Give qemu's pid a chance to reach the file
-if test -s "$resdir/qemu_pid"
-then
- qemu_pid=`cat "$resdir/qemu_pid"`
- echo Monitoring qemu job at pid $qemu_pid
-else
- qemu_pid=""
- echo Monitoring qemu job at yet-as-unknown pid
-fi
-while :
-do
- if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
- then
- qemu_pid=`cat "$resdir/qemu_pid"`
- fi
- kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
- if test -z "$qemu_pid" || kill -0 "$qemu_pid" > /dev/null 2>&1
- then
- if test $kruntime -ge $seconds
- then
- break;
- fi
- sleep 1
- else
- commandcompleted=1
- if test $kruntime -lt $seconds
- then
- echo Completed in $kruntime vs. $seconds >> $resdir/Warnings 2>&1
- grep "^(qemu) qemu:" $resdir/kvm-test-1-run.sh.out >> $resdir/Warnings 2>&1
- killpid="`sed -n "s/^(qemu) qemu: terminating on signal [0-9]* from pid \([0-9]*\).*$/\1/p" $resdir/Warnings`"
- if test -n "$killpid"
- then
- echo "ps -fp $killpid" >> $resdir/Warnings 2>&1
- ps -fp $killpid >> $resdir/Warnings 2>&1
- fi
- else
- echo ' ---' `date`: "Kernel done"
- fi
- break
- fi
-done
-if test -z "$qemu_pid" -a -s "$resdir/qemu_pid"
-then
- qemu_pid=`cat "$resdir/qemu_pid"`
-fi
-if test $commandcompleted -eq 0 -a -n "$qemu_pid"
-then
- echo Grace period for qemu job at pid $qemu_pid
- oldline="`tail $resdir/console.log`"
- while :
- do
- kruntime=`gawk 'BEGIN { print systime() - '"$kstarttime"' }' < /dev/null`
- if kill -0 $qemu_pid > /dev/null 2>&1
- then
- :
- else
- break
- fi
- must_continue=no
- newline="`tail $resdir/console.log`"
- if test "$newline" != "$oldline" && echo $newline | grep -q ' [0-9]\+us : '
- then
- must_continue=yes
- fi
- last_ts="`tail $resdir/console.log | grep '^\[ *[0-9]\+\.[0-9]\+]' | tail -1 | sed -e 's/^\[ *//' -e 's/\..*$//'`"
- if test -z "$last_ts"
- then
- last_ts=0
- fi
- if test "$newline" != "$oldline" -a "$last_ts" -lt $((seconds + $TORTURE_SHUTDOWN_GRACE))
- then
- must_continue=yes
- fi
- if test $must_continue = no -a $kruntime -ge $((seconds + $TORTURE_SHUTDOWN_GRACE))
- then
- echo "!!! PID $qemu_pid hung at $kruntime vs. $seconds seconds" >> $resdir/Warnings 2>&1
- kill -KILL $qemu_pid
- break
- fi
- oldline=$newline
- sleep 10
- done
-elif test -z "$qemu_pid"
-then
- echo Unknown PID, cannot kill qemu command
-fi
+kvm-test-1-run-qemu.sh $resdir
parse-console.sh $resdir/console.log $title
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-transform.sh b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
new file mode 100755
index 000000000000..75a2610a27f3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/kvm-transform.sh
@@ -0,0 +1,139 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Transform a qemu-cmd file to allow reuse.
+#
+# Usage: kvm-transform.sh bzImage console.log jitter_dir seconds [ bootargs ] < qemu-cmd-in > qemu-cmd-out
+#
+# bzImage: Kernel and initrd from the same prior kvm.sh run.
+# console.log: File into which to place console output.
+# jitter_dir: Jitter directory for TORTURE_JITTER_START and
+# TORTURE_JITTER_STOP environment variables.
+# seconds: Run duaration for *.shutdown_secs module parameter.
+# bootargs: New kernel boot parameters. Beware of Robert Tables.
+#
+# The original qemu-cmd file is provided on standard input.
+# The transformed qemu-cmd file is on standard output.
+# The transformation assumes that the qemu command is confined to a
+# single line. It also assumes no whitespace in filenames.
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+T=`mktemp -d /tmp/kvm-transform.sh.XXXXXXXXXX`
+trap 'rm -rf $T' 0 2
+
+image="$1"
+if test -z "$image"
+then
+ echo Need kernel image file.
+ exit 1
+fi
+consolelog="$2"
+if test -z "$consolelog"
+then
+ echo "Need console log file name."
+ exit 1
+fi
+jitter_dir="$3"
+if test -z "$jitter_dir" || ! test -d "$jitter_dir"
+then
+ echo "Need valid jitter directory: '$jitter_dir'"
+ exit 1
+fi
+seconds="$4"
+if test -n "$seconds" && echo $seconds | grep -q '[^0-9]'
+then
+ echo "Invalid duration, should be numeric in seconds: '$seconds'"
+ exit 1
+fi
+bootargs="$5"
+
+# Build awk program.
+echo "BEGIN {" > $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' |
+ awk -v dq='"' '/./ { print "\tbootarg[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+echo $bootargs | tr -s ' ' '\012' | sed -e 's/=.*$//' |
+ awk -v dq='"' '/./ { print "\tbootpar[" NR "] = " dq $1 dq ";" }' >> $T/bootarg.awk
+cat >> $T/bootarg.awk << '___EOF___'
+}
+
+/^# seconds=/ {
+ if (seconds == "")
+ print $0;
+ else
+ print "# seconds=" seconds;
+ next;
+}
+
+/^# TORTURE_JITTER_START=/ {
+ print "# TORTURE_JITTER_START=\". jitterstart.sh " $4 " " jitter_dir " " $6 " " $7;
+ next;
+}
+
+/^# TORTURE_JITTER_STOP=/ {
+ print "# TORTURE_JITTER_STOP=\". jitterstop.sh " " " jitter_dir " " $5;
+ next;
+}
+
+/^#/ {
+ print $0;
+ next;
+}
+
+{
+ line = "";
+ for (i = 1; i <= NF; i++) {
+ if (line == "") {
+ line = $i;
+ } else {
+ line = line " " $i;
+ }
+ if ($i == "-serial") {
+ i++;
+ line = line " file:" consolelog;
+ } else if ($i == "-kernel") {
+ i++;
+ line = line " " image;
+ } else if ($i == "-append") {
+ for (i++; i <= NF; i++) {
+ arg = $i;
+ lq = "";
+ rq = "";
+ if ("" seconds != "" && $i ~ /\.shutdown_secs=[0-9]*$/)
+ sub(/[0-9]*$/, seconds, arg);
+ if (arg ~ /^"/) {
+ lq = substr(arg, 1, 1);
+ arg = substr(arg, 2);
+ }
+ if (arg ~ /"$/) {
+ rq = substr(arg, length($i), 1);
+ arg = substr(arg, 1, length($i) - 1);
+ }
+ par = arg;
+ gsub(/=.*$/, "", par);
+ j = 1;
+ while (bootpar[j] != "") {
+ if (bootpar[j] == par) {
+ arg = "";
+ break;
+ }
+ j++;
+ }
+ if (line == "")
+ line = lq arg;
+ else
+ line = line " " lq arg;
+ }
+ for (j in bootarg)
+ line = line " " bootarg[j];
+ line = line rq;
+ }
+ }
+ print line;
+}
+___EOF___
+
+awk -v image="$image" -v consolelog="$consolelog" -v jitter_dir="$jitter_dir" \
+ -v seconds="$seconds" -f $T/bootarg.awk
diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh
index c279cf9cb010..7af73ddc148d 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm.sh
@@ -14,38 +14,54 @@
scriptname=$0
args="$*"
-T=${TMPDIR-/tmp}/kvm.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/kvm.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
cd `dirname $scriptname`/../../../../../
+# This script knows only English.
+LANG=en_US.UTF-8; export LANG
+
dur=$((30*60))
dryrun=""
-KVM="`pwd`/tools/testing/selftests/rcutorture"; export KVM
-PATH=${KVM}/bin:$PATH; export PATH
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
. functions.sh
TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
TORTURE_DEFCONFIG=defconfig
TORTURE_BOOT_IMAGE=""
-TORTURE_INITRD="$KVM/initrd"; export TORTURE_INITRD
+TORTURE_BUILDONLY=
+TORTURE_INITRD="$RCUTORTURE/initrd"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG=""
+TORTURE_KCONFIG_GDB_ARG=""
+TORTURE_BOOT_GDB_ARG=""
+TORTURE_QEMU_GDB_ARG=""
+TORTURE_JITTER_START=""
+TORTURE_JITTER_STOP=""
TORTURE_KCONFIG_KASAN_ARG=""
TORTURE_KCONFIG_KCSAN_ARG=""
TORTURE_KMAKE_ARG=""
TORTURE_QEMU_MEM=512
+torture_qemu_mem_default=1
+TORTURE_REMOTE=
TORTURE_SHUTDOWN_GRACE=180
TORTURE_SUITE=rcu
+TORTURE_MOD=rcutorture
TORTURE_TRUST_MAKE=""
+debuginfo="CONFIG_DEBUG_INFO_NONE=n CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT=y"
resdir=""
configs=""
cpus=0
ds=`date +%Y.%m.%d-%H.%M.%S`
jitter="-1"
+startdate="`date`"
+starttime="`get_starttime`"
+
usage () {
echo "Usage: $scriptname optional arguments:"
+ echo " --allcpus"
echo " --bootargs kernel-boot-arguments"
echo " --bootimage relative-path-to-kernel-boot-image"
echo " --buildonly"
@@ -53,19 +69,26 @@ usage () {
echo " --cpus N"
echo " --datestamp string"
echo " --defconfig string"
- echo " --dryrun sched|script"
- echo " --duration minutes"
+ echo " --debug-info"
+ echo " --dryrun batches|scenarios|sched|script"
+ echo " --duration minutes | <seconds>s | <hours>h | <days>d"
+ echo " --gdb"
+ echo " --help"
echo " --interactive"
echo " --jitter N [ maxsleep (us) [ maxspin (us) ] ]"
+ echo " --kasan"
echo " --kconfig Kconfig-options"
+ echo " --kcsan"
echo " --kmake-arg kernel-make-arguments"
echo " --mac nn:nn:nn:nn:nn:nn"
- echo " --memory megabytes | nnnG"
+ echo " --memory megabytes|nnnG"
echo " --no-initrd"
echo " --qemu-args qemu-arguments"
echo " --qemu-cmd qemu-system-..."
+ echo " --remote"
echo " --results absolute-pathname"
- echo " --torture rcu"
+ echo " --shutdown-grace seconds"
+ echo " --torture lock|rcu|rcuscale|refscale|scf|X*"
echo " --trust-make"
exit 1
}
@@ -73,9 +96,13 @@ usage () {
while test $# -gt 0
do
case "$1" in
+ --allcpus)
+ cpus=$TORTURE_ALLOTED_CPUS
+ max_cpus=$TORTURE_ALLOTED_CPUS
+ ;;
--bootargs|--bootarg)
checkarg --bootargs "(list of kernel boot arguments)" "$#" "$2" '.*' '^--'
- TORTURE_BOOTARGS="$2"
+ TORTURE_BOOTARGS="$TORTURE_BOOTARGS $2"
shift
;;
--bootimage)
@@ -83,45 +110,77 @@ do
TORTURE_BOOT_IMAGE="$2"
shift
;;
- --buildonly)
+ --buildonly|--build-only)
TORTURE_BUILDONLY=1
;;
--configs|--config)
- checkarg --configs "(list of config files)" "$#" "$2" '^[^/]*$' '^--'
- configs="$2"
+ checkarg --configs "(list of config files)" "$#" "$2" '^[^/.a-z]\+$' '^--'
+ configs="$configs $2"
shift
;;
--cpus)
checkarg --cpus "(number)" "$#" "$2" '^[0-9]*$' '^--'
cpus=$2
TORTURE_ALLOTED_CPUS="$2"
- max_cpus="`identify_qemu_vcpus`"
- if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
+ if test -z "$TORTURE_REMOTE"
then
- TORTURE_ALLOTED_CPUS=$max_cpus
+ max_cpus="`identify_qemu_vcpus`"
+ if test "$TORTURE_ALLOTED_CPUS" -gt "$max_cpus"
+ then
+ TORTURE_ALLOTED_CPUS=$max_cpus
+ fi
fi
shift
;;
--datestamp)
- checkarg --datestamp "(relative pathname)" "$#" "$2" '^[^/]*$' '^--'
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
ds=$2
shift
;;
+ --debug-info|--debuginfo)
+ if test -z "$TORTURE_KCONFIG_KCSAN_ARG" && test -z "$TORTURE_BOOT_GDB_ARG"
+ then
+ TORTURE_KCONFIG_KCSAN_ARG="$debuginfo"; export TORTURE_KCONFIG_KCSAN_ARG
+ TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+ else
+ echo "Ignored redundant --debug-info (implied by --kcsan &c)"
+ fi
+ ;;
--defconfig)
checkarg --defconfig "defconfigtype" "$#" "$2" '^[^/][^/]*$' '^--'
TORTURE_DEFCONFIG=$2
shift
;;
--dryrun)
- checkarg --dryrun "sched|script" $# "$2" 'sched\|script' '^--'
+ checkarg --dryrun "batches|sched|script" $# "$2" 'batches\|scenarios\|sched\|script' '^--'
dryrun=$2
shift
;;
--duration)
- checkarg --duration "(minutes)" $# "$2" '^[0-9]*$' '^error'
- dur=$(($2*60))
+ checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(s\|m\|h\|d\|\)$' '^error'
+ mult=60
+ if echo "$2" | grep -q 's$'
+ then
+ mult=1
+ elif echo "$2" | grep -q 'h$'
+ then
+ mult=3600
+ elif echo "$2" | grep -q 'd$'
+ then
+ mult=86400
+ fi
+ ts=`echo $2 | sed -e 's/[smhd]$//'`
+ dur=$(($ts*mult))
shift
;;
+ --gdb)
+ TORTURE_KCONFIG_GDB_ARG="$debuginfo"; export TORTURE_KCONFIG_GDB_ARG
+ TORTURE_BOOT_GDB_ARG="nokaslr"; export TORTURE_BOOT_GDB_ARG
+ TORTURE_QEMU_GDB_ARG="-s -S"; export TORTURE_QEMU_GDB_ARG
+ ;;
+ --help|-h)
+ usage
+ ;;
--interactive)
TORTURE_QEMU_INTERACTIVE=1; export TORTURE_QEMU_INTERACTIVE
;;
@@ -130,20 +189,24 @@ do
jitter="$2"
shift
;;
- --kconfig)
- checkarg --kconfig "(Kconfig options)" $# "$2" '^CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\( CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\)\)*$' '^error$'
- TORTURE_KCONFIG_ARG="$2"
- shift
- ;;
--kasan)
- TORTURE_KCONFIG_KASAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ TORTURE_KCONFIG_KASAN_ARG="$debuginfo CONFIG_KASAN=y"; export TORTURE_KCONFIG_KASAN_ARG
+ if test -n "$torture_qemu_mem_default"
+ then
+ TORTURE_QEMU_MEM=2G
+ fi
+ ;;
+ --kconfig|--kconfigs)
+ checkarg --kconfig "(Kconfig options)" $# "$2" '^\(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\( \(#CHECK#\)\?CONFIG_[A-Z0-9_]\+=\([ynm]\|[0-9]\+\|"[^"]*"\)\)*$' '^error$'
+ TORTURE_KCONFIG_ARG="`echo "$TORTURE_KCONFIG_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
+ shift
;;
--kcsan)
- TORTURE_KCONFIG_KCSAN_ARG="CONFIG_DEBUG_INFO=y CONFIG_KCSAN=y CONFIG_KCSAN_ASSUME_PLAIN_WRITES_ATOMIC=n CONFIG_KCSAN_REPORT_VALUE_CHANGE_ONLY=n CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_KCSAN_INTERRUPT_WATCHER=y"; export TORTURE_KCONFIG_KCSAN_ARG
+ TORTURE_KCONFIG_KCSAN_ARG="$debuginfo CONFIG_KCSAN=y CONFIG_KCSAN_STRICT=y CONFIG_KCSAN_REPORT_ONCE_IN_MS=100000 CONFIG_KCSAN_VERBOSE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y"; export TORTURE_KCONFIG_KCSAN_ARG
;;
- --kmake-arg)
+ --kmake-arg|--kmake-args)
checkarg --kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
- TORTURE_KMAKE_ARG="$2"
+ TORTURE_KMAKE_ARG="`echo "$TORTURE_KMAKE_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
--mac)
@@ -154,6 +217,7 @@ do
--memory)
checkarg --memory "(memory size)" $# "$2" '^[0-9]\+[MG]\?$' error
TORTURE_QEMU_MEM=$2
+ torture_qemu_mem_default=
shift
;;
--no-initrd)
@@ -161,7 +225,7 @@ do
;;
--qemu-args|--qemu-arg)
checkarg --qemu-args "(qemu arguments)" $# "$2" '^-' '^error'
- TORTURE_QEMU_ARG="$2"
+ TORTURE_QEMU_ARG="`echo "$TORTURE_QEMU_ARG $2" | sed -e 's/^ *//' -e 's/ *$//'`"
shift
;;
--qemu-cmd)
@@ -169,6 +233,9 @@ do
TORTURE_QEMU_CMD="$2"
shift
;;
+ --remote)
+ TORTURE_REMOTE=1
+ ;;
--results)
checkarg --results "(absolute pathname)" "$#" "$2" '^/' '^error'
resdir=$2
@@ -180,13 +247,15 @@ do
shift
;;
--torture)
- checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuperf\)$' '^--'
+ checkarg --torture "(suite name)" "$#" "$2" '^\(lock\|rcu\|rcuscale\|refscale\|scf\|X.*\)$' '^--'
TORTURE_SUITE=$2
+ TORTURE_MOD="`echo $TORTURE_SUITE | sed -e 's/^\(lock\|rcu\|scf\)$/\1torture/'`"
shift
- if test "$TORTURE_SUITE" = rcuperf
+ if test "$TORTURE_SUITE" = rcuscale || test "$TORTURE_SUITE" = refscale
then
- # If you really want jitter for rcuperf, specify
- # it after specifying rcuperf. (But why?)
+ # If you really want jitter for refscale or
+ # rcuscale, specify it after specifying the rcuscale
+ # or the refscale. (But why jitter in these cases?)
jitter=0
fi
;;
@@ -201,7 +270,7 @@ do
shift
done
-if test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+if test -n "$dryrun" || test -z "$TORTURE_INITRD" || tools/testing/selftests/rcutorture/bin/mkinitrd.sh
then
:
else
@@ -209,7 +278,7 @@ else
exit 1
fi
-CONFIGFRAG=${KVM}/configs/${TORTURE_SUITE}; export CONFIGFRAG
+CONFIGFRAG=${RCUTORTURE}/configs/${TORTURE_SUITE}; export CONFIGFRAG
defaultconfigs="`tr '\012' ' ' < $CONFIGFRAG/CFLIST`"
if test -z "$configs"
@@ -219,7 +288,7 @@ fi
if test -z "$resdir"
then
- resdir=$KVM/res
+ resdir=$RCUTORTURE/res
fi
# Create a file of test-name/#cpus pairs, sorted by decreasing #cpus.
@@ -227,7 +296,7 @@ configs_derep=
for CF in $configs
do
case $CF in
- [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**)
+ [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**|[0-9][0-9][0-9][0-9]\**)
config_reps=`echo $CF | sed -e 's/\*.*$//'`
CF1=`echo $CF | sed -e 's/^[^*]*\*//'`
;;
@@ -243,19 +312,43 @@ do
done
touch $T/cfgcpu
configs_derep="`echo $configs_derep | sed -e "s/\<CFLIST\>/$defaultconfigs/g"`"
-for CF1 in $configs_derep
+if test -n "$TORTURE_KCONFIG_GDB_ARG"
+then
+ if test "`echo $configs_derep | wc -w`" -gt 1
+ then
+ echo "The --config list is: $configs_derep."
+ echo "Only one --config permitted with --gdb, terminating."
+ exit 1
+ fi
+fi
+echo 'BEGIN {' > $T/cfgcpu.awk
+for CF1 in `echo $configs_derep | tr -s ' ' '\012' | sort -u`
do
if test -f "$CONFIGFRAG/$CF1"
then
- cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ if echo "$TORTURE_KCONFIG_ARG" | grep -q '\<CONFIG_NR_CPUS='
+ then
+ echo "$TORTURE_KCONFIG_ARG" | tr -s ' ' | tr ' ' '\012' > $T/KCONFIG_ARG
+ cpu_count=`configNR_CPUS.sh $T/KCONFIG_ARG`
+ else
+ cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1`
+ fi
cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
cpu_count=`configfrag_boot_maxcpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"`
- echo $CF1 $cpu_count >> $T/cfgcpu
+ echo 'scenariocpu["'"$CF1"'"] = '"$cpu_count"';' >> $T/cfgcpu.awk
else
echo "The --configs file $CF1 does not exist, terminating."
exit 1
fi
done
+cat << '___EOF___' >> $T/cfgcpu.awk
+}
+{
+ for (i = 1; i <= NF; i++)
+ print $i, scenariocpu[$i];
+}
+___EOF___
+echo $configs_derep | awk -f $T/cfgcpu.awk > $T/cfgcpu
sort -k2nr $T/cfgcpu -T="$T" > $T/cfgcpu.sort
# Use a greedy bin-packing algorithm, sorting the list accordingly.
@@ -275,11 +368,10 @@ END {
batch = 0;
nc = -1;
- # Each pass through the following loop creates on test batch
- # that can be executed concurrently given ncpus. Note that a
- # given test that requires more than the available CPUs will run in
- # their own batch. Such tests just have to make do with what
- # is available.
+ # Each pass through the following loop creates on test batch that
+ # can be executed concurrently given ncpus. Note that a given test
+ # that requires more than the available CPUs will run in its own
+ # batch. Such tests just have to make do with what is available.
while (nc != ncpus) {
batch++;
nc = ncpus;
@@ -310,7 +402,7 @@ END {
# Generate a script to execute the tests in appropriate batches.
cat << ___EOF___ > $T/script
CONFIGFRAG="$CONFIGFRAG"; export CONFIGFRAG
-KVM="$KVM"; export KVM
+RCUTORTURE="$RCUTORTURE"; export RCUTORTURE
PATH="$PATH"; export PATH
TORTURE_ALLOTED_CPUS="$TORTURE_ALLOTED_CPUS"; export TORTURE_ALLOTED_CPUS
TORTURE_BOOT_IMAGE="$TORTURE_BOOT_IMAGE"; export TORTURE_BOOT_IMAGE
@@ -318,9 +410,13 @@ TORTURE_BUILDONLY="$TORTURE_BUILDONLY"; export TORTURE_BUILDONLY
TORTURE_DEFCONFIG="$TORTURE_DEFCONFIG"; export TORTURE_DEFCONFIG
TORTURE_INITRD="$TORTURE_INITRD"; export TORTURE_INITRD
TORTURE_KCONFIG_ARG="$TORTURE_KCONFIG_ARG"; export TORTURE_KCONFIG_ARG
+TORTURE_KCONFIG_GDB_ARG="$TORTURE_KCONFIG_GDB_ARG"; export TORTURE_KCONFIG_GDB_ARG
+TORTURE_BOOT_GDB_ARG="$TORTURE_BOOT_GDB_ARG"; export TORTURE_BOOT_GDB_ARG
+TORTURE_QEMU_GDB_ARG="$TORTURE_QEMU_GDB_ARG"; export TORTURE_QEMU_GDB_ARG
TORTURE_KCONFIG_KASAN_ARG="$TORTURE_KCONFIG_KASAN_ARG"; export TORTURE_KCONFIG_KASAN_ARG
TORTURE_KCONFIG_KCSAN_ARG="$TORTURE_KCONFIG_KCSAN_ARG"; export TORTURE_KCONFIG_KCSAN_ARG
TORTURE_KMAKE_ARG="$TORTURE_KMAKE_ARG"; export TORTURE_KMAKE_ARG
+TORTURE_MOD="$TORTURE_MOD"; export TORTURE_MOD
TORTURE_QEMU_CMD="$TORTURE_QEMU_CMD"; export TORTURE_QEMU_CMD
TORTURE_QEMU_INTERACTIVE="$TORTURE_QEMU_INTERACTIVE"; export TORTURE_QEMU_INTERACTIVE
TORTURE_QEMU_MAC="$TORTURE_QEMU_MAC"; export TORTURE_QEMU_MAC
@@ -332,31 +428,31 @@ if ! test -e $resdir
then
mkdir -p "$resdir" || :
fi
-mkdir $resdir/$ds
+mkdir -p $resdir/$ds
+TORTURE_RESDIR="$resdir/$ds"; export TORTURE_RESDIR
+TORTURE_STOPFILE="$resdir/$ds/STOP.1"; export TORTURE_STOPFILE
echo Results directory: $resdir/$ds
echo $scriptname $args
touch $resdir/$ds/log
echo $scriptname $args >> $resdir/$ds/log
-echo ${TORTURE_SUITE} > $resdir/$ds/TORTURE_SUITE
-pwd > $resdir/$ds/testid.txt
+echo ${TORTURE_SUITE} > $resdir/$ds/torture_suite
+echo Build directory: `pwd` > $resdir/$ds/testid.txt
if test -d .git
then
+ echo Current commit: `git rev-parse HEAD` >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo ' ---' Output of "'"git status"'": >> $resdir/$ds/testid.txt
git status >> $resdir/$ds/testid.txt
- git rev-parse HEAD >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo >> $resdir/$ds/testid.txt
+ echo ' ---' Output of "'"git diff HEAD"'": >> $resdir/$ds/testid.txt
git diff HEAD >> $resdir/$ds/testid.txt
fi
___EOF___
-awk < $T/cfgcpu.pack \
- -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
- -v CONFIGDIR="$CONFIGFRAG/" \
- -v KVM="$KVM" \
- -v ncpus=$cpus \
- -v jitter="$jitter" \
- -v rd=$resdir/$ds/ \
- -v dur=$dur \
- -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
- -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
-'BEGIN {
+kvm-assign-cpus.sh /sys/devices/system/node > $T/cpuarray.awk
+kvm-get-cpus-script.sh $T/cpuarray.awk $T/dumpbatches.awk
+cat << '___EOF___' >> $T/dumpbatches.awk
+BEGIN {
i = 0;
}
@@ -367,13 +463,22 @@ awk < $T/cfgcpu.pack \
}
# Dump out the scripting required to run one test batch.
-function dump(first, pastlast, batchnum)
+function dump(first, pastlast, batchnum, affinitylist)
{
print "echo ----Start batch " batchnum ": `date` | tee -a " rd "log";
print "needqemurun="
jn=1
+ njitter = 0;
+ split(jitter, ja);
+ if (ja[1] == -1 && ncpus == 0)
+ njitter = 1;
+ else if (ja[1] == -1)
+ njitter = ncpus;
+ else
+ njitter = ja[1];
+ print "TORTURE_JITTER_START=\". jitterstart.sh " njitter " " rd " " dur " " ja[2] " " ja[3] "\"; export TORTURE_JITTER_START";
+ print "TORTURE_JITTER_STOP=\". jitterstop.sh " rd " \"; export TORTURE_JITTER_STOP"
for (j = first; j < pastlast; j++) {
- builddir=KVM "/b" j - first + 1
cpusr[jn] = cpus[j];
if (cfrep[cf[j]] == "") {
cfr[jn] = cf[j];
@@ -382,15 +487,23 @@ function dump(first, pastlast, batchnum)
cfrep[cf[j]]++;
cfr[jn] = cf[j] "." cfrep[cf[j]];
}
+ builddir=rd cfr[jn] "/build";
if (cpusr[jn] > ncpus && ncpus != 0)
ovf = "-ovf";
else
ovf = "";
print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date` | tee -a " rd "log";
- print "rm -f " builddir ".*";
- print "touch " builddir ".wait";
print "mkdir " rd cfr[jn] " || :";
- print "kvm-test-1-run.sh " CONFIGDIR cf[j], builddir, rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
+ print "touch " builddir ".wait";
+ affinitylist = "";
+ if (gotcpus()) {
+ affinitylist = nextcpus(cpusr[jn]);
+ }
+ if (affinitylist ~ /^[0-9,-][0-9,-]*$/)
+ print "export TORTURE_AFFINITY=" affinitylist;
+ else
+ print "export TORTURE_AFFINITY=";
+ print "kvm-test-1-run.sh " CONFIGDIR cf[j], rd cfr[jn], dur " \"" TORTURE_QEMU_ARG "\" \"" TORTURE_BOOTARGS "\" > " rd cfr[jn] "/kvm-test-1-run.sh.out 2>&1 &"
print "echo ", cfr[jn], cpusr[jn] ovf ": Waiting for build to complete. `date` | tee -a " rd "log";
print "while test -f " builddir ".wait"
print "do"
@@ -399,23 +512,21 @@ function dump(first, pastlast, batchnum)
print "echo ", cfr[jn], cpusr[jn] ovf ": Build complete. `date` | tee -a " rd "log";
jn++;
}
+ print "runfiles="
for (j = 1; j < jn; j++) {
- builddir=KVM "/b" j
- print "rm -f " builddir ".ready"
+ builddir=rd cfr[j] "/build";
+ if (TORTURE_BUILDONLY)
+ print "rm -f " builddir ".ready"
+ else
+ print "mv " builddir ".ready " builddir ".run"
+ print "runfiles=\"$runfiles " builddir ".run\""
+ fi
print "if test -f \"" rd cfr[j] "/builtkernel\""
print "then"
print "\techo ----", cfr[j], cpusr[j] ovf ": Kernel present. `date` | tee -a " rd "log";
print "\tneedqemurun=1"
print "fi"
}
- njitter = 0;
- split(jitter, ja);
- if (ja[1] == -1 && ncpus == 0)
- njitter = 1;
- else if (ja[1] == -1)
- njitter = ncpus;
- else
- njitter = ja[1];
if (TORTURE_BUILDONLY && njitter != 0) {
njitter = 0;
print "echo Build-only run, so suppressing jitter | tee -a " rd "log"
@@ -426,16 +537,18 @@ function dump(first, pastlast, batchnum)
print "if test -n \"$needqemurun\""
print "then"
print "\techo ---- Starting kernels. `date` | tee -a " rd "log";
- for (j = 0; j < njitter; j++)
- print "\tjitter.sh " j " " dur " " ja[2] " " ja[3] "&"
- print "\twait"
+ print "\t$TORTURE_JITTER_START";
+ print "\twhile ls $runfiles > /dev/null 2>&1"
+ print "\tdo"
+ print "\t\t:"
+ print "\tdone"
+ print "\t$TORTURE_JITTER_STOP";
print "\techo ---- All kernel runs complete. `date` | tee -a " rd "log";
print "else"
print "\twait"
print "\techo ---- No kernel runs. `date` | tee -a " rd "log";
print "fi"
for (j = 1; j < jn; j++) {
- builddir=KVM "/b" j
print "echo ----", cfr[j], cpusr[j] ovf ": Build/run results: | tee -a " rd "log";
print "cat " rd cfr[j] "/kvm-test-1-run.sh.out | tee -a " rd "log";
}
@@ -467,16 +580,51 @@ END {
# Dump the last batch.
if (ncpus != 0)
dump(first, i, batchnum);
-}' >> $T/script
-
-cat << ___EOF___ >> $T/script
-echo
-echo
-echo " --- `date` Test summary:"
-echo Results directory: $resdir/$ds
-kcsan-collapse.sh $resdir/$ds
-kvm-recheck.sh $resdir/$ds
+}
___EOF___
+awk < $T/cfgcpu.pack \
+ -v TORTURE_BUILDONLY="$TORTURE_BUILDONLY" \
+ -v CONFIGDIR="$CONFIGFRAG/" \
+ -v RCUTORTURE="$RCUTORTURE" \
+ -v ncpus=$cpus \
+ -v jitter="$jitter" \
+ -v rd=$resdir/$ds/ \
+ -v dur=$dur \
+ -v TORTURE_QEMU_ARG="$TORTURE_QEMU_ARG" \
+ -v TORTURE_BOOTARGS="$TORTURE_BOOTARGS" \
+ -f $T/dumpbatches.awk >> $T/script
+echo kvm-end-run-stats.sh "$resdir/$ds" "$starttime" >> $T/script
+
+# Extract the tests and their batches from the script.
+grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
+ sed -e 's/:.*$//' -e 's/^echo //' -e 's/-ovf//' |
+ awk '
+ /^----Start/ {
+ batchno = $3;
+ next;
+ }
+ {
+ print batchno, $1, $2
+ }' > $T/batches
+
+# As above, but one line per batch.
+grep -v '^#' $T/batches | awk '
+BEGIN {
+ oldbatch = 1;
+}
+
+{
+ if (oldbatch != $1) {
+ print ++n ". " curbatch;
+ curbatch = "";
+ oldbatch = $1;
+ }
+ curbatch = curbatch " " $2;
+}
+
+END {
+ print ++n ". " curbatch;
+}' > $T/scenarios
if test "$dryrun" = script
then
@@ -485,15 +633,37 @@ then
elif test "$dryrun" = sched
then
# Extract the test run schedule from the script.
- egrep 'Start batch|Starting build\.' $T/script |
- grep -v ">>" |
+ grep -E 'Start batch|Starting build\.' $T/script | grep -v ">>" |
sed -e 's/:.*$//' -e 's/^echo //'
+ nbuilds="`grep 'Starting build\.' $T/script |
+ grep -v ">>" | sed -e 's/:.*$//' -e 's/^echo //' |
+ awk '{ print $1 }' | grep -v '\.' | wc -l`"
+ echo Total number of builds: $nbuilds
+ nbatches="`grep 'Start batch' $T/script | grep -v ">>" | wc -l`"
+ echo Total number of batches: $nbatches
+ exit 0
+elif test "$dryrun" = batches
+then
+ cat $T/batches
+ exit 0
+elif test "$dryrun" = scenarios
+then
+ cat $T/scenarios
exit 0
else
- # Not a dryrun, so run the script.
- sh $T/script
+ # Not a dryrun. Record the batches and the number of CPUs, then run the script.
+ bash $T/script
+ ret=$?
+ cp $T/batches $resdir/$ds/batches
+ cp $T/scenarios $resdir/$ds/scenarios
+ echo '#' cpus=$cpus >> $resdir/$ds/batches
+ exit $ret
fi
# Tracing: trace_event=rcu:rcu_grace_period,rcu:rcu_future_grace_period,rcu:rcu_grace_period_init,rcu:rcu_nocb_wake,rcu:rcu_preempt_task,rcu:rcu_unlock_preempted_task,rcu:rcu_quiescent_state_report,rcu:rcu_fqs,rcu:rcu_callback,rcu:rcu_kfree_callback,rcu:rcu_batch_start,rcu:rcu_invoke_callback,rcu:rcu_invoke_kfree_callback,rcu:rcu_batch_end,rcu:rcu_torture_read,rcu:rcu_barrier
# Function-graph tracing: ftrace=function_graph ftrace_graph_filter=sched_setaffinity,migration_cpu_stop
# Also --kconfig "CONFIG_FUNCTION_TRACER=y CONFIG_FUNCTION_GRAPH_TRACER=y"
+# Control buffer size: --bootargs trace_buf_size=3k
+# Get trace-buffer dumps on all oopses: --bootargs ftrace_dump_on_oops
+# Ditto, but dump only the oopsing CPU: --bootargs ftrace_dump_on_oops=orig_cpu
+# Heavy-handed way to also dump on warnings: --bootargs panic_on_warn=1
diff --git a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
index 38e424d2392c..f3f867129560 100755
--- a/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
+++ b/tools/testing/selftests/rcutorture/bin/mkinitrd.sh
@@ -10,7 +10,6 @@
D=tools/testing/selftests/rcutorture
# Prerequisite checks
-[ -z "$D" ] && echo >&2 "No argument supplied" && exit 1
if [ ! -d "$D" ]; then
echo >&2 "$D does not exist: Malformed kernel source tree?"
exit 1
@@ -34,12 +33,16 @@ cat > init.c << '___EOF___'
volatile unsigned long delaycount;
-int main(int argc, int argv[])
+int main(int argc, char *argv[])
{
int i;
struct timeval tv;
struct timeval tvb;
+ printf("Torture-test rudimentary init program started, command line:\n");
+ for (i = 0; i < argc; i++)
+ printf(" %s", argv[i]);
+ printf("\n");
for (;;) {
sleep(1);
/* Need some userspace time. */
@@ -64,15 +67,26 @@ ___EOF___
# build using nolibc on supported archs (smaller executable) and fall
# back to regular glibc on other ones.
if echo -e "#if __x86_64__||__i386__||__i486__||__i586__||__i686__" \
- "||__ARM_EABI__||__aarch64__\nyes\n#endif" \
+ "||__ARM_EABI__||__aarch64__||(__mips__ && _ABIO32)" \
+ "||__powerpc__||(__riscv && __riscv_xlen == 64)" \
+ "||__s390x__||__loongarch__" \
+ "\nyes\n#endif" \
| ${CROSS_COMPILE}gcc -E -nostdlib -xc - \
| grep -q '^yes'; then
# architecture supported by nolibc
${CROSS_COMPILE}gcc -fno-asynchronous-unwind-tables -fno-ident \
-nostdlib -include ../../../../include/nolibc/nolibc.h \
- -lgcc -s -static -Os -o init init.c
+ -s -static -Os -o init init.c -lgcc
+ ret=$?
else
${CROSS_COMPILE}gcc -s -static -Os -o init init.c
+ ret=$?
+fi
+
+if [ "$ret" -ne 0 ]
+then
+ echo "Failed to create a statically linked C-language initrd"
+ exit "$ret"
fi
rm init.c
diff --git a/tools/testing/selftests/rcutorture/bin/parse-build.sh b/tools/testing/selftests/rcutorture/bin/parse-build.sh
index 09155c15ea65..5a0b7ffcf047 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-build.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-build.sh
@@ -15,13 +15,12 @@
F=$1
title=$2
-T=${TMPDIR-/tmp}/parse-build.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-build.sh.XXXXXX`"
trap 'rm -rf $T' 0
-mkdir $T
. functions.sh
-if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE"
+if grep -q CC < $F || test -n "$TORTURE_TRUST_MAKE" || grep -qe --trust-make < `dirname $F`/../log
then
:
else
@@ -39,7 +38,8 @@ fi
grep warning: < $F > $T/warnings
grep "include/linux/*rcu*\.h:" $T/warnings > $T/hwarnings
grep "kernel/rcu/[^/]*:" $T/warnings > $T/cwarnings
-cat $T/hwarnings $T/cwarnings > $T/rcuwarnings
+grep "^ld: .*undefined reference to" $T/warnings | head -1 > $T/ldwarnings
+cat $T/hwarnings $T/cwarnings $T/ldwarnings > $T/rcuwarnings
if test -s $T/rcuwarnings
then
print_warning $title build errors:
diff --git a/tools/testing/selftests/rcutorture/bin/parse-console.sh b/tools/testing/selftests/rcutorture/bin/parse-console.sh
index 4bf62d7b1cbc..b07c11cf6929 100755
--- a/tools/testing/selftests/rcutorture/bin/parse-console.sh
+++ b/tools/testing/selftests/rcutorture/bin/parse-console.sh
@@ -11,7 +11,7 @@
#
# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
-T=${TMPDIR-/tmp}/parse-console.sh.$$
+T="`mktemp -d ${TMPDIR-/tmp}/parse-console.sh.XXXXXX`"
file="$1"
title="$2"
@@ -33,8 +33,8 @@ then
fi
cat /dev/null > $file.diags
-# Check for proper termination, except that rcuperf runs don't indicate this.
-if test "$TORTURE_SUITE" != rcuperf
+# Check for proper termination, except for rcuscale and refscale.
+if test "$TORTURE_SUITE" != rcuscale && test "$TORTURE_SUITE" != refscale
then
# check for abject failure
@@ -44,17 +44,30 @@ then
tail -1 |
awk '
{
- for (i=NF-8;i<=NF;i++)
+ normalexit = 1;
+ for (i=NF-8;i<=NF;i++) {
+ if (i <= 0 || i !~ /^[0-9]*$/) {
+ bangstring = $0;
+ gsub(/^\[[^]]*] /, "", bangstring);
+ print bangstring;
+ normalexit = 0;
+ exit 0;
+ }
sum+=$i;
+ }
}
- END { print sum }'`
- print_bug $title FAILURE, $nerrs instances
+ END {
+ if (normalexit)
+ print sum " instances"
+ }'`
+ print_bug $title FAILURE, $nerrs
exit
fi
grep --binary-files=text 'torture:.*ver:' $file |
- egrep --binary-files=text -v '\(null\)|rtc: 000000000* ' |
+ grep -E --binary-files=text -v '\(null\)|rtc: 000000000* ' |
sed -e 's/^(initramfs)[^]]*] //' -e 's/^\[[^]]*] //' |
+ sed -e 's/^.*ver: //' |
awk '
BEGIN {
ver = 0;
@@ -62,13 +75,13 @@ then
}
{
- if (!badseq && ($5 + 0 != $5 || $5 <= ver)) {
+ if (!badseq && ($1 + 0 != $1 || $1 <= ver)) {
badseqno1 = ver;
- badseqno2 = $5;
+ badseqno2 = $1;
badseqnr = NR;
badseq = 1;
}
- ver = $5
+ ver = $1
}
END {
@@ -104,10 +117,7 @@ then
fi
fi | tee -a $file.diags
-egrep 'Badness|WARNING:|Warn|BUG|===========|Call Trace:|Oops:|detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state|rcu_.*kthread starved for' < $file |
-grep -v 'ODEBUG: ' |
-grep -v 'This means that this is a DEBUG kernel and it is' |
-grep -v 'Warning: unable to open an initial console' > $T.diags
+console-badness.sh < $file > $T.diags
if test -s $T.diags
then
print_warning "Assertion failure in $file $title"
@@ -118,16 +128,26 @@ then
then
summary="$summary Badness: $n_badness"
fi
- n_warn=`grep -v 'Warning: unable to open an initial console' $file | egrep -c 'WARNING:|Warn'`
+ n_warn=`grep -v 'Warning: unable to open an initial console' $file | grep -v 'Warning: Failed to add ttynull console. No stdin, stdout, and stderr for the init process' | grep -E -c 'WARNING:|Warn'`
if test "$n_warn" -ne 0
then
summary="$summary Warnings: $n_warn"
fi
- n_bugs=`egrep -c 'BUG|Oops:' $file`
+ n_bugs=`grep -E -c '\bBUG|Oops:' $file`
if test "$n_bugs" -ne 0
then
summary="$summary Bugs: $n_bugs"
fi
+ n_kcsan=`grep -E -c 'BUG: KCSAN: ' $file`
+ if test "$n_kcsan" -ne 0
+ then
+ if test "$n_bugs" = "$n_kcsan"
+ then
+ summary="$summary (all bugs kcsan)"
+ else
+ summary="$summary KCSAN: $n_kcsan"
+ fi
+ fi
n_calltrace=`grep -c 'Call Trace:' $file`
if test "$n_calltrace" -ne 0
then
@@ -138,7 +158,7 @@ then
then
summary="$summary lockdep: $n_badness"
fi
- n_stalls=`egrep -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
+ n_stalls=`grep -E -c 'detected stalls on CPUs/tasks:|self-detected stall on CPU|Stall ended before state dump start|\?\?\? Writer stall state' $file`
if test "$n_stalls" -ne 0
then
summary="$summary Stalls: $n_stalls"
@@ -162,3 +182,10 @@ if ! test -s $file.diags
then
rm -f $file.diags
fi
+
+# Call extract_ftrace_from_console function, if the output is empty,
+# don't create $file.ftrace. Otherwise output the results to $file.ftrace
+extract_ftrace_from_console $file > $file.ftrace
+if [ ! -s $file.ftrace ]; then
+ rm -f $file.ftrace
+fi
diff --git a/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
new file mode 100755
index 000000000000..2e63ef009d59
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh
@@ -0,0 +1,78 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run SRCU-lockdep tests and report any that fail to meet expectations.
+#
+# Copyright (C) 2021 Meta Platforms, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+usage () {
+ echo "Usage: $scriptname optional arguments:"
+ echo " --datestamp string"
+ exit 1
+}
+
+ds=`date +%Y.%m.%d-%H.%M.%S`-srcu_lockdep
+scriptname="$0"
+
+T="`mktemp -d ${TMPDIR-/tmp}/srcu_lockdep.sh.XXXXXX`"
+trap 'rm -rf $T' 0
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+while test $# -gt 0
+do
+ case "$1" in
+ --datestamp)
+ checkarg --datestamp "(relative pathname)" "$#" "$2" '^[a-zA-Z0-9._/-]*$' '^--'
+ ds=$2
+ shift
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+
+err=
+nerrs=0
+for d in 0 1
+do
+ for t in 0 1 2
+ do
+ for c in 1 2 3
+ do
+ err=
+ val=$((d*1000+t*10+c))
+ tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 5s --configs "SRCU-P" --bootargs "rcutorture.test_srcu_lockdep=$val" --trust-make --datestamp "$ds/$val" > "$T/kvm.sh.out" 2>&1
+ ret=$?
+ mv "$T/kvm.sh.out" "$RCUTORTURE/res/$ds/$val"
+ if test "$d" -ne 0 && test "$ret" -eq 0
+ then
+ err=1
+ echo -n Unexpected success for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test "$d" -eq 0 && test "$ret" -ne 0
+ then
+ err=1
+ echo -n Unexpected failure for > "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ fi
+ if test -n "$err"
+ then
+ grep "rcu_torture_init_srcu_lockdep: test_srcu_lockdep = " "$RCUTORTURE/res/$ds/$val/SRCU-P/console.log" | sed -e 's/^.*rcu_torture_init_srcu_lockdep://' >> "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ cat "$RCUTORTURE/res/$ds/$val/kvm.sh.err"
+ nerrs=$((nerrs+1))
+ fi
+ done
+ done
+done
+if test "$nerrs" -ne 0
+then
+ exit 1
+fi
+exit 0
diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh
new file mode 100755
index 000000000000..bbac5f4b03d0
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/bin/torture.sh
@@ -0,0 +1,695 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Run a series of torture tests, intended for overnight or
+# longer timeframes, and also for large systems.
+#
+# Usage: torture.sh [ options ]
+#
+# Copyright (C) 2020 Facebook, Inc.
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+scriptname=$0
+args="$*"
+
+RCUTORTURE="`pwd`/tools/testing/selftests/rcutorture"; export RCUTORTURE
+PATH=${RCUTORTURE}/bin:$PATH; export PATH
+. functions.sh
+
+TORTURE_ALLOTED_CPUS="`identify_qemu_vcpus`"
+MAKE_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS*2))
+HALF_ALLOTED_CPUS=$((TORTURE_ALLOTED_CPUS/2))
+if test "$HALF_ALLOTED_CPUS" -lt 1
+then
+ HALF_ALLOTED_CPUS=1
+fi
+VERBOSE_BATCH_CPUS=$((TORTURE_ALLOTED_CPUS/16))
+if test "$VERBOSE_BATCH_CPUS" -lt 2
+then
+ VERBOSE_BATCH_CPUS=0
+fi
+
+# Configurations/scenarios.
+configs_rcutorture=
+configs_locktorture=
+configs_scftorture=
+kcsan_kmake_args=
+
+# Default compression, duration, and apportionment.
+compress_concurrency="`identify_qemu_vcpus`"
+duration_base=10
+duration_rcutorture_frac=7
+duration_locktorture_frac=1
+duration_scftorture_frac=2
+
+# "yes" or "no" parameters
+do_allmodconfig=yes
+do_rcutorture=yes
+do_locktorture=yes
+do_scftorture=yes
+do_rcuscale=yes
+do_refscale=yes
+do_kvfree=yes
+do_kasan=yes
+do_kcsan=no
+do_clocksourcewd=yes
+do_rt=yes
+do_rcutasksflavors=yes
+do_srcu_lockdep=yes
+
+# doyesno - Helper function for yes/no arguments
+function doyesno () {
+ if test "$1" = "$2"
+ then
+ echo yes
+ else
+ echo no
+ fi
+}
+
+usage () {
+ echo "Usage: $scriptname optional arguments:"
+ echo " --compress-concurrency concurrency"
+ echo " --configs-rcutorture \"config-file list w/ repeat factor (3*TINY01)\""
+ echo " --configs-locktorture \"config-file list w/ repeat factor (10*LOCK01)\""
+ echo " --configs-scftorture \"config-file list w/ repeat factor (2*CFLIST)\""
+ echo " --do-all"
+ echo " --do-allmodconfig / --do-no-allmodconfig / --no-allmodconfig"
+ echo " --do-clocksourcewd / --do-no-clocksourcewd / --no-clocksourcewd"
+ echo " --do-kasan / --do-no-kasan / --no-kasan"
+ echo " --do-kcsan / --do-no-kcsan / --no-kcsan"
+ echo " --do-kvfree / --do-no-kvfree / --no-kvfree"
+ echo " --do-locktorture / --do-no-locktorture / --no-locktorture"
+ echo " --do-none"
+ echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale"
+ echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors"
+ echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture"
+ echo " --do-refscale / --do-no-refscale / --no-refscale"
+ echo " --do-rt / --do-no-rt / --no-rt"
+ echo " --do-scftorture / --do-no-scftorture / --no-scftorture"
+ echo " --do-srcu-lockdep / --do-no-srcu-lockdep / --no-srcu-lockdep"
+ echo " --duration [ <minutes> | <hours>h | <days>d ]"
+ echo " --kcsan-kmake-arg kernel-make-arguments"
+ exit 1
+}
+
+while test $# -gt 0
+do
+ case "$1" in
+ --compress-concurrency)
+ checkarg --compress-concurrency "(concurrency level)" $# "$2" '^[0-9][0-9]*$' '^error'
+ compress_concurrency=$2
+ shift
+ ;;
+ --config-rcutorture|--configs-rcutorture)
+ checkarg --configs-rcutorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--'
+ configs_rcutorture="$configs_rcutorture $2"
+ shift
+ ;;
+ --config-locktorture|--configs-locktorture)
+ checkarg --configs-locktorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--'
+ configs_locktorture="$configs_locktorture $2"
+ shift
+ ;;
+ --config-scftorture|--configs-scftorture)
+ checkarg --configs-scftorture "(list of config files)" "$#" "$2" '^[^/]\+$' '^--'
+ configs_scftorture="$configs_scftorture $2"
+ shift
+ ;;
+ --do-all|--doall)
+ do_allmodconfig=yes
+ do_rcutasksflavor=yes
+ do_rcutorture=yes
+ do_locktorture=yes
+ do_scftorture=yes
+ do_rcuscale=yes
+ do_refscale=yes
+ do_rt=yes
+ do_kvfree=yes
+ do_kasan=yes
+ do_kcsan=yes
+ do_clocksourcewd=yes
+ do_srcu_lockdep=yes
+ ;;
+ --do-allmodconfig|--do-no-allmodconfig|--no-allmodconfig)
+ do_allmodconfig=`doyesno "$1" --do-allmodconfig`
+ ;;
+ --do-clocksourcewd|--do-no-clocksourcewd|--no-clocksourcewd)
+ do_clocksourcewd=`doyesno "$1" --do-clocksourcewd`
+ ;;
+ --do-kasan|--do-no-kasan|--no-kasan)
+ do_kasan=`doyesno "$1" --do-kasan`
+ ;;
+ --do-kcsan|--do-no-kcsan|--no-kcsan)
+ do_kcsan=`doyesno "$1" --do-kcsan`
+ ;;
+ --do-kvfree|--do-no-kvfree|--no-kvfree)
+ do_kvfree=`doyesno "$1" --do-kvfree`
+ ;;
+ --do-locktorture|--do-no-locktorture|--no-locktorture)
+ do_locktorture=`doyesno "$1" --do-locktorture`
+ ;;
+ --do-none|--donone)
+ do_allmodconfig=no
+ do_rcutasksflavors=no
+ do_rcutorture=no
+ do_locktorture=no
+ do_scftorture=no
+ do_rcuscale=no
+ do_refscale=no
+ do_rt=no
+ do_kvfree=no
+ do_kasan=no
+ do_kcsan=no
+ do_clocksourcewd=no
+ do_srcu_lockdep=no
+ ;;
+ --do-rcuscale|--do-no-rcuscale|--no-rcuscale)
+ do_rcuscale=`doyesno "$1" --do-rcuscale`
+ ;;
+ --do-rcutasksflavors|--do-no-rcutasksflavors|--no-rcutasksflavors)
+ do_rcutasksflavors=`doyesno "$1" --do-rcutasksflavors`
+ ;;
+ --do-rcutorture|--do-no-rcutorture|--no-rcutorture)
+ do_rcutorture=`doyesno "$1" --do-rcutorture`
+ ;;
+ --do-refscale|--do-no-refscale|--no-refscale)
+ do_refscale=`doyesno "$1" --do-refscale`
+ ;;
+ --do-rt|--do-no-rt|--no-rt)
+ do_rt=`doyesno "$1" --do-rt`
+ ;;
+ --do-scftorture|--do-no-scftorture|--no-scftorture)
+ do_scftorture=`doyesno "$1" --do-scftorture`
+ ;;
+ --do-srcu-lockdep|--do-no-srcu-lockdep|--no-srcu-lockdep)
+ do_srcu_lockdep=`doyesno "$1" --do-srcu-lockdep`
+ ;;
+ --duration)
+ checkarg --duration "(minutes)" $# "$2" '^[0-9][0-9]*\(m\|h\|d\|\)$' '^error'
+ mult=1
+ if echo "$2" | grep -q 'm$'
+ then
+ mult=1
+ elif echo "$2" | grep -q 'h$'
+ then
+ mult=60
+ elif echo "$2" | grep -q 'd$'
+ then
+ mult=1440
+ fi
+ ts=`echo $2 | sed -e 's/[smhd]$//'`
+ duration_base=$(($ts*mult))
+ shift
+ ;;
+ --kcsan-kmake-arg|--kcsan-kmake-args)
+ checkarg --kcsan-kmake-arg "(kernel make arguments)" $# "$2" '.*' '^error$'
+ kcsan_kmake_args="`echo "$kcsan_kmake_args $2" | sed -e 's/^ *//' -e 's/ *$//'`"
+ shift
+ ;;
+ *)
+ echo Unknown argument $1
+ usage
+ ;;
+ esac
+ shift
+done
+
+ds="`date +%Y.%m.%d-%H.%M.%S`-torture"
+startdate="`date`"
+starttime="`get_starttime`"
+
+T="`mktemp -d ${TMPDIR-/tmp}/torture.sh.XXXXXX`"
+trap 'rm -rf $T' 0 2
+
+echo " --- " $scriptname $args | tee -a $T/log
+echo " --- Results directory: " $ds | tee -a $T/log
+
+# Calculate rcutorture defaults and apportion time
+if test -z "$configs_rcutorture"
+then
+ configs_rcutorture=CFLIST
+fi
+duration_rcutorture=$((duration_base*duration_rcutorture_frac/10))
+if test "$duration_rcutorture" -eq 0
+then
+ echo " --- Zero time for rcutorture, disabling" | tee -a $T/log
+ do_rcutorture=no
+fi
+
+# Calculate locktorture defaults and apportion time
+if test -z "$configs_locktorture"
+then
+ configs_locktorture=CFLIST
+fi
+duration_locktorture=$((duration_base*duration_locktorture_frac/10))
+if test "$duration_locktorture" -eq 0
+then
+ echo " --- Zero time for locktorture, disabling" | tee -a $T/log
+ do_locktorture=no
+fi
+
+# Calculate scftorture defaults and apportion time
+if test -z "$configs_scftorture"
+then
+ configs_scftorture=CFLIST
+fi
+duration_scftorture=$((duration_base*duration_scftorture_frac/10))
+if test "$duration_scftorture" -eq 0
+then
+ echo " --- Zero time for scftorture, disabling" | tee -a $T/log
+ do_scftorture=no
+fi
+
+touch $T/failures
+touch $T/successes
+
+# torture_one - Does a single kvm.sh run.
+#
+# Usage:
+# torture_bootargs="[ kernel boot arguments ]"
+# torture_one flavor [ kvm.sh arguments ]
+#
+# Note that "flavor" is an arbitrary string. Supply --torture if needed.
+# Note that quoting is problematic. So on the command line, pass multiple
+# values with multiple kvm.sh argument instances.
+function torture_one {
+ local cur_bootargs=
+ local boottag=
+
+ echo " --- $curflavor:" Start `date` | tee -a $T/log
+ if test -n "$torture_bootargs"
+ then
+ boottag="--bootargs"
+ cur_bootargs="$torture_bootargs"
+ fi
+ "$@" $boottag "$cur_bootargs" --datestamp "$ds/results-$curflavor" > $T/$curflavor.out 2>&1
+ retcode=$?
+ resdir="`grep '^Results directory: ' $T/$curflavor.out | tail -1 | sed -e 's/^Results directory: //'`"
+ if test -z "$resdir"
+ then
+ cat $T/$curflavor.out | tee -a $T/log
+ echo retcode=$retcode | tee -a $T/log
+ else
+ echo $resdir > $T/last-resdir
+ fi
+ if test "$retcode" == 0
+ then
+ echo "$curflavor($retcode)" $resdir >> $T/successes
+ else
+ echo "$curflavor($retcode)" $resdir >> $T/failures
+ fi
+}
+
+# torture_set - Does a set of tortures with and without KASAN and KCSAN.
+#
+# Usage:
+# torture_bootargs="[ kernel boot arguments ]"
+# torture_set flavor [ kvm.sh arguments ]
+#
+# Note that "flavor" is an arbitrary string that does not affect kvm.sh
+# in any way. So also supply --torture if you need something other than
+# the default.
+function torture_set {
+ local cur_kcsan_kmake_args=
+ local kcsan_kmake_tag=
+ local flavor=$1
+ shift
+ curflavor=$flavor
+ torture_one "$@"
+ mv $T/last-resdir $T/last-resdir-nodebug || :
+ if test "$do_kasan" = "yes"
+ then
+ curflavor=${flavor}-kasan
+ torture_one "$@" --kasan
+ mv $T/last-resdir $T/last-resdir-kasan || :
+ fi
+ if test "$do_kcsan" = "yes"
+ then
+ curflavor=${flavor}-kcsan
+ if test -n "$kcsan_kmake_args"
+ then
+ kcsan_kmake_tag="--kmake-args"
+ cur_kcsan_kmake_args="$kcsan_kmake_args"
+ fi
+ torture_one "$@" --kconfig "CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y" $kcsan_kmake_tag $cur_kcsan_kmake_args --kcsan
+ mv $T/last-resdir $T/last-resdir-kcsan || :
+ fi
+}
+
+# make allmodconfig
+if test "$do_allmodconfig" = "yes"
+then
+ echo " --- allmodconfig:" Start `date` | tee -a $T/log
+ amcdir="tools/testing/selftests/rcutorture/res/$ds/allmodconfig"
+ mkdir -p "$amcdir"
+ echo " --- make clean" | tee $amcdir/log > "$amcdir/Make.out" 2>&1
+ make -j$MAKE_ALLOTED_CPUS clean >> "$amcdir/Make.out" 2>&1
+ retcode=$?
+ buildphase='"make clean"'
+ if test "$retcode" -eq 0
+ then
+ echo " --- make allmodconfig" | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+ cp .config $amcdir
+ make -j$MAKE_ALLOTED_CPUS allmodconfig >> "$amcdir/Make.out" 2>&1
+ retcode=$?
+ buildphase='"make allmodconfig"'
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo " --- make " | tee -a $amcdir/log >> "$amcdir/Make.out" 2>&1
+ make -j$MAKE_ALLOTED_CPUS >> "$amcdir/Make.out" 2>&1
+ retcode="$?"
+ echo $retcode > "$amcdir/Make.exitcode"
+ buildphase='"make"'
+ fi
+ if test "$retcode" -eq 0
+ then
+ echo "allmodconfig($retcode)" $amcdir >> $T/successes
+ echo Success >> $amcdir/log
+ else
+ echo "allmodconfig($retcode)" $amcdir >> $T/failures
+ echo " --- allmodconfig Test summary:" >> $amcdir/log
+ echo " --- Summary: Exit code $retcode from $buildphase, see Make.out" >> $amcdir/log
+ fi
+fi
+
+# Test building RCU Tasks flavors in isolation, both SMP and !SMP
+if test "$do_rcutasksflavors" = "yes"
+then
+ echo " --- rcutasksflavors:" Start `date` | tee -a $T/log
+ rtfdir="tools/testing/selftests/rcutorture/res/$ds/results-rcutasksflavors"
+ mkdir -p "$rtfdir"
+ cat > $T/rcutasksflavors << __EOF__
+#CHECK#CONFIG_TASKS_RCU=n
+#CHECK#CONFIG_TASKS_RUDE_RCU=n
+#CHECK#CONFIG_TASKS_TRACE_RCU=n
+__EOF__
+ for flavor in CONFIG_TASKS_RCU CONFIG_TASKS_RUDE_RCU CONFIG_TASKS_TRACE_RCU
+ do
+ forceflavor="`echo $flavor | sed -e 's/^CONFIG/CONFIG_FORCE/'`"
+ deselectedflavors="`grep -v $flavor $T/rcutasksflavors | tr '\012' ' ' | tr -s ' ' | sed -e 's/ *$//'`"
+ echo " --- Running RCU Tasks Trace flavor $flavor `date`" >> $rtfdir/log
+ tools/testing/selftests/rcutorture/bin/kvm.sh --datestamp "$ds/results-rcutasksflavors/$flavor" --buildonly --configs "TINY01 TREE04" --kconfig "CONFIG_RCU_EXPERT=y CONFIG_RCU_SCALE_TEST=y $forceflavor=y $deselectedflavors" --trust-make > $T/$flavor.out 2>&1
+ retcode=$?
+ if test "$retcode" -ne 0
+ then
+ break
+ fi
+ done
+ if test "$retcode" -eq 0
+ then
+ echo "rcutasksflavors($retcode)" $rtfdir >> $T/successes
+ echo Success >> $rtfdir/log
+ else
+ echo "rcutasksflavors($retcode)" $rtfdir >> $T/failures
+ echo " --- rcutasksflavors Test summary:" >> $rtfdir/log
+ echo " --- Summary: Exit code $retcode from $flavor, see Make.out" >> $rtfdir/log
+ fi
+fi
+
+# --torture rcu
+if test "$do_rcutorture" = "yes"
+then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000"
+ torture_set "rcutorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "$configs_rcutorture" --trust-make
+fi
+
+if test "$do_locktorture" = "yes"
+then
+ torture_bootargs="torture.disable_onoff_at_boot"
+ torture_set "locktorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture lock --allcpus --duration "$duration_locktorture" --configs "$configs_locktorture" --trust-make
+fi
+
+if test "$do_scftorture" = "yes"
+then
+ # Scale memory based on the number of CPUs.
+ scfmem=$((2+HALF_ALLOTED_CPUS/16))
+ torture_bootargs="scftorture.nthreads=$HALF_ALLOTED_CPUS torture.disable_onoff_at_boot csdlock_debug=1"
+ torture_set "scftorture" tools/testing/selftests/rcutorture/bin/kvm.sh --torture scf --allcpus --duration "$duration_scftorture" --configs "$configs_scftorture" --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory ${scfmem}G --trust-make
+fi
+
+if test "$do_rt" = "yes"
+then
+ # With all post-boot grace periods forced to normal.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_normal=1"
+ torture_set "rcurttorture" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+
+ # With all post-boot grace periods forced to expedited.
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 rcupdate.rcu_expedited=1"
+ torture_set "rcurttorture-exp" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration "$duration_rcutorture" --configs "TREE03" --trust-make
+fi
+
+if test "$do_srcu_lockdep" = "yes"
+then
+ echo " --- do-srcu-lockdep:" Start `date` | tee -a $T/log
+ tools/testing/selftests/rcutorture/bin/srcu_lockdep.sh --datestamp "$ds/results-srcu-lockdep" > $T/srcu_lockdep.sh.out 2>&1
+ retcode=$?
+ cp $T/srcu_lockdep.sh.out "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+ if test "$retcode" -eq 0
+ then
+ echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/successes
+ echo Success >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+ else
+ echo "srcu_lockdep($retcode)" "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep" >> $T/failures
+ echo " --- srcu_lockdep Test Summary:" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+ echo " --- Summary: Exit code $retcode from srcu_lockdep.sh, see ds/results-srcu-lockdep" >> "tools/testing/selftests/rcutorture/res/$ds/results-srcu-lockdep/log"
+ fi
+fi
+
+if test "$do_refscale" = yes
+then
+ primlist="`grep '\.name[ ]*=' kernel/rcu/refscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`"
+else
+ primlist=
+fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
+for prim in $primlist
+do
+ if test -n "$firsttime"
+ then
+ torture_bootargs="refscale.scale_type="$prim" refscale.nreaders=$HALF_ALLOTED_CPUS refscale.loops=10000 refscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "refscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture refscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --bootargs "refscale.verbose_batched=$VERBOSE_BATCH_CPUS torture.verbose_sleep_frequency=8 torture.verbose_sleep_duration=$VERBOSE_BATCH_CPUS" --trust-make
+ mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+ if test -f "$T/last-resdir-kasan"
+ then
+ mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+ fi
+ if test -f "$T/last-resdir-kcsan"
+ then
+ mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+ fi
+ firsttime=
+ do_kasan=
+ do_kcsan=
+ else
+ torture_bootargs=
+ for i in $T/first-resdir-*
+ do
+ case "$i" in
+ *-nodebug)
+ torture_suffix=
+ ;;
+ *-kasan)
+ torture_suffix="-kasan"
+ ;;
+ *-kcsan)
+ torture_suffix="-kcsan"
+ ;;
+ esac
+ torture_set "refscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "refscale.scale_type=$prim"
+ done
+ fi
+done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
+
+if test "$do_rcuscale" = yes
+then
+ primlist="`grep '\.name[ ]*=' kernel/rcu/rcuscale.c | sed -e 's/^[^"]*"//' -e 's/".*$//'`"
+else
+ primlist=
+fi
+firsttime=1
+do_kasan_save="$do_kasan"
+do_kcsan_save="$do_kcsan"
+for prim in $primlist
+do
+ if test -n "$firsttime"
+ then
+ torture_bootargs="rcuscale.scale_type="$prim" rcuscale.nwriters=$HALF_ALLOTED_CPUS rcuscale.holdoff=20 torture.disable_onoff_at_boot"
+ torture_set "rcuscale-$prim" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 5 --kconfig "CONFIG_TASKS_TRACE_RCU=y CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --trust-make
+ mv $T/last-resdir-nodebug $T/first-resdir-nodebug || :
+ if test -f "$T/last-resdir-kasan"
+ then
+ mv $T/last-resdir-kasan $T/first-resdir-kasan || :
+ fi
+ if test -f "$T/last-resdir-kcsan"
+ then
+ mv $T/last-resdir-kcsan $T/first-resdir-kcsan || :
+ fi
+ firsttime=
+ do_kasan=
+ do_kcsan=
+ else
+ torture_bootargs=
+ for i in $T/first-resdir-*
+ do
+ case "$i" in
+ *-nodebug)
+ torture_suffix=
+ ;;
+ *-kasan)
+ torture_suffix="-kasan"
+ ;;
+ *-kcsan)
+ torture_suffix="-kcsan"
+ ;;
+ esac
+ torture_set "rcuscale-$prim$torture_suffix" tools/testing/selftests/rcutorture/bin/kvm-again.sh "`cat "$i"`" --duration 5 --bootargs "rcuscale.scale_type=$prim"
+ done
+ fi
+done
+do_kasan="$do_kasan_save"
+do_kcsan="$do_kcsan_save"
+
+if test "$do_kvfree" = "yes"
+then
+ torture_bootargs="rcuscale.kfree_rcu_test=1 rcuscale.kfree_nthreads=16 rcuscale.holdoff=20 rcuscale.kfree_loops=10000 torture.disable_onoff_at_boot"
+ torture_set "rcuscale-kvfree" tools/testing/selftests/rcutorture/bin/kvm.sh --torture rcuscale --allcpus --duration 10 --kconfig "CONFIG_NR_CPUS=$HALF_ALLOTED_CPUS" --memory 2G --trust-make
+fi
+
+if test "$do_clocksourcewd" = "yes"
+then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
+ torture_set "clocksourcewd-1" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
+ torture_set "clocksourcewd-2" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --kconfig "CONFIG_TEST_CLOCKSOURCE_WATCHDOG=y" --trust-make
+
+ # In case our work is already done...
+ if test "$do_rcutorture" != "yes"
+ then
+ torture_bootargs="rcupdate.rcu_cpu_stall_suppress_at_boot=1 torture.disable_onoff_at_boot rcupdate.rcu_task_stall_timeout=30000 tsc=watchdog"
+ torture_set "clocksourcewd-3" tools/testing/selftests/rcutorture/bin/kvm.sh --allcpus --duration 45s --configs TREE03 --trust-make
+ fi
+fi
+
+echo " --- " $scriptname $args
+echo " --- " Done `date` | tee -a $T/log
+ret=0
+nsuccesses=0
+echo SUCCESSES: | tee -a $T/log
+if test -s "$T/successes"
+then
+ cat "$T/successes" | tee -a $T/log
+ nsuccesses="`wc -l "$T/successes" | awk '{ print $1 }'`"
+fi
+nfailures=0
+echo FAILURES: | tee -a $T/log
+if test -s "$T/failures"
+then
+ awk < "$T/failures" -v sq="'" '{ print "echo " sq $0 sq; print "sed -e " sq "1,/^ --- .* Test summary:$/d" sq " " $2 "/log | grep Summary: | sed -e " sq "s/^[^S]*/ /" sq; }' | sh | tee -a $T/log | tee "$T/failuresum"
+ nfailures="`wc -l "$T/failures" | awk '{ print $1 }'`"
+ grep "^ Summary: " "$T/failuresum" |
+ grep -v '^ Summary: Bugs: [0-9]* (all bugs kcsan)$' > "$T/nonkcsan"
+ if test -s "$T/nonkcsan"
+ then
+ nonkcsanbug="yes"
+ fi
+ ret=2
+fi
+if test "$do_kcsan" = "yes"
+then
+ TORTURE_KCONFIG_KCSAN_ARG=1 tools/testing/selftests/rcutorture/bin/kcsan-collapse.sh tools/testing/selftests/rcutorture/res/$ds > tools/testing/selftests/rcutorture/res/$ds/kcsan.sum
+fi
+echo Started at $startdate, ended at `date`, duration `get_starttime_duration $starttime`. | tee -a $T/log
+echo Summary: Successes: $nsuccesses Failures: $nfailures. | tee -a $T/log
+tdir="`cat $T/successes $T/failures | head -1 | awk '{ print $NF }' | sed -e 's,/[^/]\+/*$,,'`"
+find "$tdir" -name 'ConfigFragment.diags' -print > $T/configerrors
+find "$tdir" -name 'Make.out.diags' -print > $T/builderrors
+if test -s "$T/configerrors"
+then
+ echo " Scenarios with .config errors: `wc -l "$T/configerrors" | awk '{ print $1 }'`"
+ nonkcsanbug="yes"
+fi
+if test -s "$T/builderrors"
+then
+ echo " Scenarios with build errors: `wc -l "$T/builderrors" | awk '{ print $1 }'`"
+ nonkcsanbug="yes"
+fi
+if test -z "$nonkcsanbug" && test -s "$T/failuresum"
+then
+ echo " All bugs were KCSAN failures."
+fi
+if test -n "$tdir" && test $compress_concurrency -gt 0
+then
+ # KASAN vmlinux files can approach 1GB in size, so compress them.
+ echo Looking for K[AC]SAN files to compress: `date` > "$tdir/log-xz" 2>&1
+ find "$tdir" -type d -name '*-k[ac]san' -print > $T/xz-todo-all
+ find "$tdir" -type f -name 're-run' -print | sed -e 's,/re-run,,' |
+ grep -e '-k[ac]san$' > $T/xz-todo-copy
+ sort $T/xz-todo-all $T/xz-todo-copy | uniq -u > $T/xz-todo
+ ncompresses=0
+ batchno=1
+ if test -s $T/xz-todo
+ then
+ for i in `cat $T/xz-todo`
+ do
+ find $i -name 'vmlinux*' -print
+ done | wc -l | awk '{ print $1 }' > $T/xz-todo-count
+ n2compress="`cat $T/xz-todo-count`"
+ echo Size before compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
+ for i in `cat $T/xz-todo`
+ do
+ echo Compressing vmlinux files in ${i}: `date` >> "$tdir/log-xz" 2>&1
+ for j in $i/*/vmlinux
+ do
+ xz "$j" >> "$tdir/log-xz" 2>&1 &
+ ncompresses=$((ncompresses+1))
+ if test $ncompresses -ge $compress_concurrency
+ then
+ echo Waiting for batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
+ wait
+ ncompresses=0
+ batchno=$((batchno+1))
+ fi
+ done
+ done
+ if test $ncompresses -gt 0
+ then
+ echo Waiting for final batch $batchno of $ncompresses compressions `date` | tee -a "$tdir/log-xz" | tee -a $T/log
+ fi
+ wait
+ if test -s $T/xz-todo-copy
+ then
+ # The trick here is that we need corresponding
+ # vmlinux files from corresponding scenarios.
+ echo Linking vmlinux.xz files to re-use scenarios `date` | tee -a "$tdir/log-xz" | tee -a $T/log
+ dirstash="`pwd`"
+ for i in `cat $T/xz-todo-copy`
+ do
+ cd $i
+ find . -name vmlinux -print > $T/xz-todo-copy-vmlinux
+ for v in `cat $T/xz-todo-copy-vmlinux`
+ do
+ rm -f "$v"
+ cp -l `cat $i/re-run`/"$i/$v".xz "`dirname "$v"`"
+ done
+ cd "$dirstash"
+ done
+ fi
+ echo Size after compressing $n2compress files: `du -sh $tdir | awk '{ print $1 }'` `date` 2>&1 | tee -a "$tdir/log-xz" | tee -a $T/log
+ echo Total duration `get_starttime_duration $starttime`. | tee -a $T/log
+ else
+ echo No compression needed: `date` >> "$tdir/log-xz" 2>&1
+ fi
+fi
+if test -n "$tdir"
+then
+ cp $T/log "$tdir"
+fi
+exit $ret
diff --git a/tools/testing/selftests/rcutorture/configs/lock/CFLIST b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
index 41bae5824339..28e23d05d5a5 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/lock/CFLIST
@@ -5,3 +5,5 @@ LOCK04
LOCK05
LOCK06
LOCK07
+LOCK08
+LOCK09
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK08 b/tools/testing/selftests/rcutorture/configs/lock/LOCK08
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK08
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot
new file mode 100644
index 000000000000..b8b6caebb89e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK08.boot
@@ -0,0 +1 @@
+locktorture.torture_type=mutex_lock locktorture.nested_locks=8
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK09 b/tools/testing/selftests/rcutorture/configs/lock/LOCK09
new file mode 100644
index 000000000000..1d1da1477fc3
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK09
@@ -0,0 +1,6 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=4
+CONFIG_HOTPLUG_CPU=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
diff --git a/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot b/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot
new file mode 100644
index 000000000000..fd5eff148a93
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/lock/LOCK09.boot
@@ -0,0 +1 @@
+locktorture.torture_type=rtmutex_lock locktorture.nested_locks=8
diff --git a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
index d3e4b2971f92..e7bb32709d78 100644
--- a/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/lock/ver_functions.sh
@@ -22,8 +22,9 @@ locktorture_param_onoff () {
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
- echo $1 `locktorture_param_onoff "$1" "$2"` \
+ echo `locktorture_param_onoff "$1" "$2"` \
locktorture.stat_interval=15 \
locktorture.shutdown_secs=$3 \
- locktorture.verbose=1
+ locktorture.verbose=1 \
+ $1
}
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST
new file mode 100644
index 000000000000..22d598f9cabe
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_NR_CPUS=16
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=y
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_TRACE=y
+CONFIG_HOTPLUG_CPU=y
+CONFIG_RCU_FANOUT=2
+CONFIG_RCU_FANOUT_LEAF=2
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
new file mode 100644
index 000000000000..84f6bb98ce99
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/BUSTED-BOOST.boot
@@ -0,0 +1,8 @@
+rcutorture.test_boost=2
+rcutorture.stutter=0
+rcutree.gp_preinit_delay=12
+rcutree.gp_init_delay=3
+rcutree.gp_cleanup_delay=3
+rcutree.kthread_prio=2
+threadirqs
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
index f2b20db9e296..98b6175e5aa0 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcu/CFLIST
@@ -7,8 +7,8 @@ TREE07
TREE09
SRCU-N
SRCU-P
-SRCU-t
-SRCU-u
+SRCU-T
+SRCU-U
TINY01
TINY02
TASKS01
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01 b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
index bafe94cbd739..6fd6acb94518 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=3
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
@@ -8,3 +8,5 @@ CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
CONFIG_RCU_EXPERT=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
index 9363708c9075..932a0799eb08 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/RUDE01.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks-rude
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
index 2da8b49589a0..07f5e0a70ae7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N
@@ -6,3 +6,5 @@ CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
index 238bfe3bd0cc..ce0694fd9b92 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N.boot
@@ -1 +1,2 @@
rcutorture.torture_type=srcu
+rcutorture.fwd_progress=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
index ce48c7b82673..2db39f298d18 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot
@@ -1,2 +1,4 @@
rcutorture.torture_type=srcud
rcupdate.rcu_self_test=1
+rcutorture.fwd_progress=3
+srcutree.big_cpu_lim=5
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
index 6c78022c8cd8..c70cf0405f24 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T
@@ -2,9 +2,11 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_DEBUG_ATOMIC_SLEEP=y
#CHECK#CONFIG_PREEMPT_COUNT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
index 238bfe3bd0cc..238bfe3bd0cc 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-t.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-T.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
index c15ada821e45..bc9eeabaa1b1 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U
@@ -2,9 +2,9 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_SRCU=y
CONFIG_RCU_TRACE=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
+CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_PREEMPT_COUNT=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
index ce48c7b82673..ce48c7b82673 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-u.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-U.boot
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
index bafe94cbd739..d84801b9a7ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01
@@ -1,5 +1,5 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
@@ -7,4 +7,5 @@ CONFIG_PREEMPT=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_TASKS_RCU=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
index cd2a188eeb6d..30ca5b493c4b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01.boot
@@ -1 +1,3 @@
rcutorture.torture_type=tasks
+rcutree.use_softirq=0
+rcupdate.rcu_task_enqueue_lim=4
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
index ad2be91e5ee7..2f9fcffff5ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02
@@ -2,3 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
index cd2a188eeb6d..b9b6d67cbc5f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks
+rcutorture.stat_interval=60
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
index ea4399020c6c..2ef2fb69c360 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03
@@ -1,9 +1,10 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=2
+CONFIG_NR_CPUS=4
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
-#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_TASKS_RCU=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY01 b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
index 6db705e55487..0953c52fcfd7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY01
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
index d8674264318d..30439f6fc20e 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02
@@ -2,6 +2,7 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=y
CONFIG_NO_HZ_IDLE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
index 12e7661b86f5..85b407467454 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01
@@ -1,11 +1,14 @@
CONFIG_SMP=y
-CONFIG_NR_CPUS=4
+CONFIG_NR_CPUS=5
CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
-CONFIG_DEBUG_LOCK_ALLOC=y
-CONFIG_PROVE_LOCKING=y
-#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PREEMPT_DYNAMIC=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=y
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
index 9675ad632dcc..ba6d636a4856 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE01.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks-tracing
+rcupdate.rcu_task_enqueue_lim=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02 b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
index b69ed6673c41..9003c56cd764 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02
@@ -4,8 +4,11 @@ CONFIG_HOTPLUG_CPU=y
CONFIG_PREEMPT_NONE=n
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=y
-CONFIG_DEBUG_LOCK_ALLOC=n
-CONFIG_PROVE_LOCKING=n
-#CHECK#CONFIG_PROVE_RCU=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
CONFIG_TASKS_TRACE_RCU_READ_MB=n
CONFIG_RCU_EXPERT=y
+CONFIG_DEBUG_OBJECTS=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
index 9675ad632dcc..c70b5db6c2ae 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TRACE02.boot
@@ -1 +1,2 @@
rcutorture.torture_type=tasks-tracing
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index b5b53973c01e..8ae41d5f81a3 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -6,7 +6,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_MAXSMP=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
index d6da9a61d44a..40af3df0f397 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01.boot
@@ -2,5 +2,7 @@ maxcpus=8 nr_cpus=43
rcutree.gp_preinit_delay=3
rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
-rcu_nocbs=0
+rcu_nocbs=0-1,3-7
+rcutorture.nocbs_nthreads=8
+rcutorture.nocbs_toggle=1000
rcutorture.fwd_progress=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index 65daee4fbf5a..2871ee599891 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot
new file mode 100644
index 000000000000..dd914fa8f690
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02.boot
@@ -0,0 +1 @@
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 1c218944b1e9..8e50bfd4b710 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -4,3 +4,4 @@ rcutree.gp_init_delay=3
rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
+rcutree.use_softirq=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
index f6d6a40c0576..dc4985064b3a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04
@@ -1,13 +1,13 @@
CONFIG_SMP=y
CONFIG_NR_CPUS=8
-CONFIG_PREEMPT_NONE=y
-CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=y
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
-CONFIG_RCU_FAST_NO_HZ=y
CONFIG_RCU_TRACE=y
CONFIG_RCU_FANOUT=4
CONFIG_RCU_FANOUT_LEAF=3
@@ -15,3 +15,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_EQS_DEBUG=y
+CONFIG_RCU_LAZY=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
index 5adc6756792a..a8d94caf7d2f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04.boot
@@ -1 +1 @@
-rcutree.rcu_fanout_leaf=4 nohz_full=1-7
+rcutree.rcu_fanout_leaf=4 nohz_full=1-N
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
index 2dde0d9964e3..9f48c73709ec 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=6
@@ -16,5 +15,6 @@ CONFIG_RCU_NOCB_CPU=y
CONFIG_DEBUG_LOCK_ALLOC=y
CONFIG_PROVE_LOCKING=y
#CHECK#CONFIG_PROVE_RCU=y
+CONFIG_PROVE_RCU_LIST=y
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
index bf4980d606b5..db27651de04b 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=n
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=6
CONFIG_RCU_FANOUT_LEAF=6
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
index d7afb271a586..d30922d8c883 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07
@@ -3,11 +3,11 @@ CONFIG_NR_CPUS=16
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=n
CONFIG_NO_HZ_FULL=y
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=y
CONFIG_HOTPLUG_CPU=y
CONFIG_RCU_FANOUT=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
index d44609937503..979edbf4c820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07.boot
@@ -1 +1,4 @@
nohz_full=2-9
+rcutorture.stall_cpu=14
+rcutorture.stall_cpu_holdoff=90
+rcutorture.fwd_progress=0
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index c810c5276a89..8b561355b9ef 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -7,7 +7,6 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_FANOUT=3
CONFIG_RCU_FANOUT_LEAF=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
index 22478fd3a865..94d38445d393 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot
@@ -1,3 +1,3 @@
rcupdate.rcu_self_test=1
rcutree.rcu_fanout_exact=1
-rcu_nocbs=0-7
+rcu_nocbs=all
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index 8523a7515cbf..fc45645bb5f4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -13,3 +13,5 @@ CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
#CHECK#CONFIG_RCU_EXPERT=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10 b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
index 7311f84a5876..a323d8948b7c 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE10
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10
@@ -3,11 +3,11 @@ CONFIG_NR_CPUS=56
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TREE_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_TRACE=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot
new file mode 100644
index 000000000000..dd914fa8f690
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE10.boot
@@ -0,0 +1 @@
+rcutorture.fwd_progress=2
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index effa415f9b92..c044df386876 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -9,7 +9,7 @@
# rcutorture_param_n_barrier_cbs bootparam-string
#
-# Adds n_barrier_cbs rcutorture module parameter to kernels having it.
+# Adds n_barrier_cbs rcutorture module parameter if not already specified.
rcutorture_param_n_barrier_cbs () {
if echo $1 | grep -q "rcutorture\.n_barrier_cbs"
then
@@ -30,14 +30,27 @@ rcutorture_param_onoff () {
fi
}
+# rcutorture_param_stat_interval bootparam-string
+#
+# Adds stat_interval rcutorture module parameter if not already specified.
+rcutorture_param_stat_interval () {
+ if echo $1 | grep -q "rcutorture\.stat_interval"
+ then
+ :
+ else
+ echo rcutorture.stat_interval=15
+ fi
+}
+
# per_version_boot_params bootparam-string config-file seconds
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
- echo $1 `rcutorture_param_onoff "$1" "$2"` \
+ echo `rcutorture_param_onoff "$1" "$2"` \
`rcutorture_param_n_barrier_cbs "$1"` \
- rcutorture.stat_interval=15 \
+ `rcutorture_param_stat_interval "$1"` \
rcutorture.shutdown_secs=$3 \
rcutorture.test_no_idle_hz=1 \
- rcutorture.verbose=1
+ rcutorture.verbose=1 \
+ $1
}
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
deleted file mode 100644
index a09816b8c0f3..000000000000
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFcommon
+++ /dev/null
@@ -1,2 +0,0 @@
-CONFIG_RCU_PERF_TEST=y
-CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
index c9f56cf20775..c9f56cf20775 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/CFLIST
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFLIST
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
new file mode 100644
index 000000000000..b1ffd7c67604
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/CFcommon
@@ -0,0 +1,8 @@
+CONFIG_RCU_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_RUDE_RCU=y
+#CHECK#CONFIG_TASKS_RUDE_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
index fb05ef5279b4..0fa2dc086e10 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TINY
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TINY
@@ -2,11 +2,11 @@ CONFIG_SMP=n
CONFIG_PREEMPT_NONE=y
CONFIG_PREEMPT_VOLUNTARY=n
CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
#CHECK#CONFIG_TINY_RCU=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
CONFIG_RCU_NOCB_CPU=n
CONFIG_DEBUG_LOCK_ALLOC=n
CONFIG_PROVE_LOCKING=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01 b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
new file mode 100644
index 000000000000..0059592c7408
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01
@@ -0,0 +1,16 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_TREE_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_RCU_TRACE=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
new file mode 100644
index 000000000000..af0aff1457a4
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TRACE01.boot
@@ -0,0 +1 @@
+rcuscale.scale_type=tasks-tracing
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
index 721cfda76ab2..b10706fd03a4 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE
@@ -6,8 +6,7 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_NOCB_CPU=n
@@ -17,3 +16,5 @@ CONFIG_RCU_BOOST=n
CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
CONFIG_RCU_EXPERT=y
CONFIG_RCU_TRACE=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54 b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
index 7629f5dd73b2..9f83e5372796 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/TREE54
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/TREE54
@@ -7,8 +7,7 @@ CONFIG_PREEMPT=y
CONFIG_HZ_PERIODIC=n
CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
-CONFIG_RCU_FAST_NO_HZ=n
-CONFIG_HOTPLUG_CPU=n
+CONFIG_HOTPLUG_CPU=y
CONFIG_SUSPEND=n
CONFIG_HIBERNATION=n
CONFIG_RCU_FANOUT=3
diff --git a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
index 777d5b0c190f..28070b43f017 100644
--- a/tools/testing/selftests/rcutorture/configs/rcuperf/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcuscale/ver_functions.sh
@@ -11,6 +11,7 @@
#
# Adds per-version torture-module parameters to kernels supporting them.
per_version_boot_params () {
- echo $1 rcuperf.shutdown=1 \
- rcuperf.verbose=1
+ echo rcuscale.shutdown=1 \
+ rcuscale.verbose=0 \
+ $1
}
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFLIST b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/CFcommon b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
new file mode 100644
index 000000000000..fbea3b13baba
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/CFcommon
@@ -0,0 +1,6 @@
+CONFIG_RCU_REF_SCALE_TEST=y
+CONFIG_PRINTK_TIME=y
+CONFIG_FORCE_TASKS_RCU=y
+#CHECK#CONFIG_TASKS_RCU=y
+CONFIG_FORCE_TASKS_TRACE_RCU=y
+#CHECK#CONFIG_TASKS_TRACE_RCU=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
new file mode 100644
index 000000000000..67f9d2998afd
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/NOPREEMPT
@@ -0,0 +1,20 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_PREEMPT_RCU=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HOTPLUG_CPU=y
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
new file mode 100644
index 000000000000..52e3ef674056
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/PREEMPT
@@ -0,0 +1,17 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+#CHECK#CONFIG_PREEMPT_RCU=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_HOTPLUG_CPU=y
+CONFIG_SUSPEND=n
+CONFIG_HIBERNATION=n
+CONFIG_RCU_NOCB_CPU=n
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_RCU_BOOST=n
+CONFIG_DEBUG_OBJECTS_RCU_HEAD=n
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
new file mode 100644
index 000000000000..748465627601
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/refscale/ver_functions.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) IBM Corporation, 2015
+#
+# Authors: Paul E. McKenney <paulmck@linux.ibm.com>
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo refscale.shutdown=1 \
+ refscale.verbose=0 \
+ $1
+}
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFLIST b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
new file mode 100644
index 000000000000..4d62eb4a39f9
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFLIST
@@ -0,0 +1,2 @@
+NOPREEMPT
+PREEMPT
diff --git a/tools/testing/selftests/rcutorture/configs/scf/CFcommon b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
new file mode 100644
index 000000000000..c11ab91f49f5
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/CFcommon
@@ -0,0 +1,2 @@
+CONFIG_SCF_TORTURE_TEST=y
+CONFIG_PRINTK_TIME=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
new file mode 100644
index 000000000000..6133f54ce2a7
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT
@@ -0,0 +1,13 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=y
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=n
+CONFIG_PREEMPT_DYNAMIC=n
+#CHECK#CONFIG_PREEMPT_RCU=n
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=n
+CONFIG_NO_HZ_FULL=y
+CONFIG_DEBUG_LOCK_ALLOC=n
+CONFIG_PROVE_LOCKING=n
+CONFIG_KPROBES=n
+CONFIG_FTRACE=n
diff --git a/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
new file mode 100644
index 000000000000..d6a7fa097c2e
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/NOPREEMPT.boot
@@ -0,0 +1 @@
+nohz_full=1
diff --git a/tools/testing/selftests/rcutorture/configs/scf/PREEMPT b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
new file mode 100644
index 000000000000..cb37e08037d6
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/PREEMPT
@@ -0,0 +1,10 @@
+CONFIG_SMP=y
+CONFIG_PREEMPT_NONE=n
+CONFIG_PREEMPT_VOLUNTARY=n
+CONFIG_PREEMPT=y
+CONFIG_HZ_PERIODIC=n
+CONFIG_NO_HZ_IDLE=y
+CONFIG_NO_HZ_FULL=n
+CONFIG_DEBUG_LOCK_ALLOC=y
+CONFIG_PROVE_LOCKING=y
+CONFIG_RCU_EXPERT=y
diff --git a/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
new file mode 100644
index 000000000000..7637f68ef0ce
--- /dev/null
+++ b/tools/testing/selftests/rcutorture/configs/scf/ver_functions.sh
@@ -0,0 +1,30 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0+
+#
+# Torture-suite-dependent shell functions for the rest of the scripts.
+#
+# Copyright (C) Facebook, 2020
+#
+# Authors: Paul E. McKenney <paulmck@kernel.org>
+
+# scftorture_param_onoff bootparam-string config-file
+#
+# Adds onoff scftorture module parameters to kernels having it.
+scftorture_param_onoff () {
+ if ! bootparam_hotplug_cpu "$1" && configfrag_hotplug_cpu "$2"
+ then
+ echo CPU-hotplug kernel, adding scftorture onoff. 1>&2
+ echo scftorture.onoff_interval=1000 scftorture.onoff_holdoff=30
+ fi
+}
+
+# per_version_boot_params bootparam-string config-file seconds
+#
+# Adds per-version torture-module parameters to kernels supporting them.
+per_version_boot_params () {
+ echo `scftorture_param_onoff "$1" "$2"` \
+ scftorture.stat_interval=15 \
+ scftorture.shutdown_secs=$3 \
+ scftorture.verbose=1 \
+ $1
+}
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index 1b96d68473b8..3f5fb66f16df 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -15,7 +15,6 @@ CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING.
CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others.
CONFIG_RCU_FANOUT_LEAF -- Do one non-default.
-CONFIG_RCU_FAST_NO_HZ -- Do one, but not with all nohz_full CPUs.
CONFIG_RCU_NOCB_CPU -- Do three, one with no rcu_nocbs CPUs, one with
rcu_nocbs=0, and one with all rcu_nocbs CPUs.
CONFIG_RCU_TRACE -- Do half.
@@ -72,9 +71,5 @@ CONFIG_TASKS_RCU
These are controlled by CONFIG_PREEMPT and/or CONFIG_SMP.
-CONFIG_SRCU
-
- Selected by CONFIG_RCU_TORTURE_TEST, so cannot disable.
-
boot parameters ignored: TBD
diff --git a/tools/testing/selftests/rcutorture/doc/initrd.txt b/tools/testing/selftests/rcutorture/doc/initrd.txt
index 933b4fd12327..41a4255865d4 100644
--- a/tools/testing/selftests/rcutorture/doc/initrd.txt
+++ b/tools/testing/selftests/rcutorture/doc/initrd.txt
@@ -1,12 +1,11 @@
-The rcutorture scripting tools automatically create the needed initrd
-directory using dracut. Failing that, this tool will create an initrd
-containing a single statically linked binary named "init" that loops
-over a very long sleep() call. In both cases, this creation is done
-by tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
+The rcutorture scripting tools automatically create an initrd containing
+a single statically linked binary named "init" that loops over a
+very long sleep() call. In both cases, this creation is done by
+tools/testing/selftests/rcutorture/bin/mkinitrd.sh.
-However, if you are attempting to run rcutorture on a system that does
-not have dracut installed, and if you don't like the notion of static
-linking, you might wish to press an existing initrd into service:
+However, if you don't like the notion of statically linked bare-bones
+userspace environments, you might wish to press an existing initrd
+into service:
------------------------------------------------------------------------
cd tools/testing/selftests/rcutorture
@@ -15,24 +14,3 @@ mkdir initrd
cd initrd
cpio -id < /tmp/initrd.img.zcat
# Manually verify that initrd contains needed binaries and libraries.
-------------------------------------------------------------------------
-
-Interestingly enough, if you are running rcutorture, you don't really
-need userspace in many cases. Running without userspace has the
-advantage of allowing you to test your kernel independently of the
-distro in place, the root-filesystem layout, and so on. To make this
-happen, put the following script in the initrd's tree's "/init" file,
-with 0755 mode.
-
-------------------------------------------------------------------------
-#!/bin/sh
-
-while :
-do
- sleep 10
-done
-------------------------------------------------------------------------
-
-This approach also allows most of the binaries and libraries in the
-initrd filesystem to be dispensed with, which can save significant
-space in rcutorture's "res" directory.
diff --git a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
index 449cf579d6f9..b2fc247976b1 100644
--- a/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
+++ b/tools/testing/selftests/rcutorture/doc/rcu-test-image.txt
@@ -1,8 +1,33 @@
-This document describes one way to create the rcu-test-image file
-that contains the filesystem used by the guest-OS kernel. There are
-probably much better ways of doing this, and this filesystem could no
-doubt be smaller. It is probably also possible to simply download
-an appropriate image from any number of places.
+Normally, a minimal initrd is created automatically by the rcutorture
+scripting. But minimal really does mean "minimal", namely just a single
+root directory with a single statically linked executable named "init":
+
+$ size tools/testing/selftests/rcutorture/initrd/init
+ text data bss dec hex filename
+ 328 0 8 336 150 tools/testing/selftests/rcutorture/initrd/init
+
+Suppose you need to run some scripts, perhaps to monitor or control
+some aspect of the rcutorture testing. This will require a more fully
+filled-out userspace, perhaps containing libraries, executables for
+the shell and other utilities, and soforth. In that case, place your
+desired filesystem here:
+
+ tools/testing/selftests/rcutorture/initrd
+
+For example, your tools/testing/selftests/rcutorture/initrd/init might
+be a script that does any needed mount operations and starts whatever
+scripts need starting to properly monitor or control your testing.
+The next rcutorture build will then incorporate this filesystem into
+the kernel image that is passed to qemu.
+
+Or maybe you need a real root filesystem for some reason, in which case
+please read on!
+
+The remainder of this document describes one way to create the
+rcu-test-image file that contains the filesystem used by the guest-OS
+kernel. There are probably much better ways of doing this, and this
+filesystem could no doubt be smaller. It is probably also possible to
+simply download an appropriate image from any number of places.
That said, here are the commands:
@@ -36,7 +61,7 @@ References:
https://help.ubuntu.com/community/JeOSVMBuilder
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
http://www.moe.co.uk/2011/01/07/pci_add_option_rom-failed-to-find-romfile-pxe-rtl8139-bin/ -- "apt-get install kvm-pxe"
- http://www.landley.net/writing/rootfs-howto.html
- http://en.wikipedia.org/wiki/Initrd
- http://en.wikipedia.org/wiki/Cpio
+ https://www.landley.net/writing/rootfs-howto.html
+ https://en.wikipedia.org/wiki/Initrd
+ https://en.wikipedia.org/wiki/Cpio
http://wiki.libvirt.org/page/UbuntuKVMWalkthrough
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
deleted file mode 100644
index 4bed0b678f8b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/Makefile
+++ /dev/null
@@ -1,17 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-all: srcu.c store_buffering
-
-LINUX_SOURCE = ../../../../../..
-
-modified_srcu_input = $(LINUX_SOURCE)/include/linux/srcu.h \
- $(LINUX_SOURCE)/kernel/rcu/srcu.c
-
-modified_srcu_output = include/linux/srcu.h srcu.c
-
-include/linux/srcu.h: srcu.c
-
-srcu.c: modify_srcu.awk Makefile $(modified_srcu_input)
- awk -f modify_srcu.awk $(modified_srcu_input) $(modified_srcu_output)
-
-store_buffering:
- @cd tests/store_buffering; make
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/export.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/mutex.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/percpu.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/preempt.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/rcupdate.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/sched.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/smp.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/linux/workqueue.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/empty_includes/uapi/linux/types.h
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
deleted file mode 100644
index f2860dd1b407..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/kconfig.h
+++ /dev/null
@@ -1 +0,0 @@
-#include <LINUX_SOURCE/linux/kconfig.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
deleted file mode 100644
index 8bc960e5e713..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/include/linux/types.h
+++ /dev/null
@@ -1,152 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * This header has been modifies to remove definitions of types that
- * are defined in standard userspace headers or are problematic for some
- * other reason.
- */
-
-#ifndef _LINUX_TYPES_H
-#define _LINUX_TYPES_H
-
-#define __EXPORTED_HEADERS__
-#include <uapi/linux/types.h>
-
-#ifndef __ASSEMBLY__
-
-#define DECLARE_BITMAP(name, bits) \
- unsigned long name[BITS_TO_LONGS(bits)]
-
-typedef __u32 __kernel_dev_t;
-
-/* bsd */
-typedef unsigned char u_char;
-typedef unsigned short u_short;
-typedef unsigned int u_int;
-typedef unsigned long u_long;
-
-/* sysv */
-typedef unsigned char unchar;
-typedef unsigned short ushort;
-typedef unsigned int uint;
-typedef unsigned long ulong;
-
-#ifndef __BIT_TYPES_DEFINED__
-#define __BIT_TYPES_DEFINED__
-
-typedef __u8 u_int8_t;
-typedef __s8 int8_t;
-typedef __u16 u_int16_t;
-typedef __s16 int16_t;
-typedef __u32 u_int32_t;
-typedef __s32 int32_t;
-
-#endif /* !(__BIT_TYPES_DEFINED__) */
-
-typedef __u8 uint8_t;
-typedef __u16 uint16_t;
-typedef __u32 uint32_t;
-
-/* this is a special 64bit data type that is 8-byte aligned */
-#define aligned_u64 __u64 __attribute__((aligned(8)))
-#define aligned_be64 __be64 __attribute__((aligned(8)))
-#define aligned_le64 __le64 __attribute__((aligned(8)))
-
-/**
- * The type used for indexing onto a disc or disc partition.
- *
- * Linux always considers sectors to be 512 bytes long independently
- * of the devices real block size.
- *
- * blkcnt_t is the type of the inode's block count.
- */
-typedef u64 sector_t;
-
-/*
- * The type of an index into the pagecache.
- */
-#define pgoff_t unsigned long
-
-/*
- * A dma_addr_t can hold any valid DMA address, i.e., any address returned
- * by the DMA API.
- *
- * If the DMA API only uses 32-bit addresses, dma_addr_t need only be 32
- * bits wide. Bus addresses, e.g., PCI BARs, may be wider than 32 bits,
- * but drivers do memory-mapped I/O to ioremapped kernel virtual addresses,
- * so they don't care about the size of the actual bus addresses.
- */
-#ifdef CONFIG_ARCH_DMA_ADDR_T_64BIT
-typedef u64 dma_addr_t;
-#else
-typedef u32 dma_addr_t;
-#endif
-
-#ifdef CONFIG_PHYS_ADDR_T_64BIT
-typedef u64 phys_addr_t;
-#else
-typedef u32 phys_addr_t;
-#endif
-
-typedef phys_addr_t resource_size_t;
-
-/*
- * This type is the placeholder for a hardware interrupt number. It has to be
- * big enough to enclose whatever representation is used by a given platform.
- */
-typedef unsigned long irq_hw_number_t;
-
-typedef struct {
- int counter;
-} atomic_t;
-
-#ifdef CONFIG_64BIT
-typedef struct {
- long counter;
-} atomic64_t;
-#endif
-
-struct list_head {
- struct list_head *next, *prev;
-};
-
-struct hlist_head {
- struct hlist_node *first;
-};
-
-struct hlist_node {
- struct hlist_node *next, **pprev;
-};
-
-/**
- * struct callback_head - callback structure for use with RCU and task_work
- * @next: next update requests in a list
- * @func: actual update function to call after the grace period.
- *
- * The struct is aligned to size of pointer. On most architectures it happens
- * naturally due ABI requirements, but some architectures (like CRIS) have
- * weird ABI and we need to ask it explicitly.
- *
- * The alignment is required to guarantee that bits 0 and 1 of @next will be
- * clear under normal conditions -- as long as we use call_rcu() or
- * call_srcu() to queue callback.
- *
- * This guarantee is important for few reasons:
- * - future call_rcu_lazy() will make use of lower bits in the pointer;
- * - the structure shares storage spacer in struct page with @compound_head,
- * which encode PageTail() in bit 0. The guarantee is needed to avoid
- * false-positive PageTail().
- */
-struct callback_head {
- struct callback_head *next;
- void (*func)(struct callback_head *head);
-} __attribute__((aligned(sizeof(void *))));
-#define rcu_head callback_head
-
-typedef void (*rcu_callback_t)(struct rcu_head *head);
-typedef void (*call_rcu_func_t)(struct rcu_head *head, rcu_callback_t func);
-
-/* clocksource cycle base type */
-typedef u64 cycle_t;
-
-#endif /* __ASSEMBLY__ */
-#endif /* _LINUX_TYPES_H */
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
deleted file mode 100755
index e05182d3e47d..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/modify_srcu.awk
+++ /dev/null
@@ -1,376 +0,0 @@
-#!/usr/bin/awk -f
-# SPDX-License-Identifier: GPL-2.0
-
-# Modify SRCU for formal verification. The first argument should be srcu.h and
-# the second should be srcu.c. Outputs modified srcu.h and srcu.c into the
-# current directory.
-
-BEGIN {
- if (ARGC != 5) {
- print "Usange: input.h input.c output.h output.c" > "/dev/stderr";
- exit 1;
- }
- h_output = ARGV[3];
- c_output = ARGV[4];
- ARGC = 3;
-
- # Tokenize using FS and not RS as FS supports regular expressions. Each
- # record is one line of source, except that backslashed lines are
- # combined. Comments are treated as field separators, as are quotes.
- quote_regexp="\"([^\\\\\"]|\\\\.)*\"";
- comment_regexp="\\/\\*([^*]|\\*+[^*/])*\\*\\/|\\/\\/.*(\n|$)";
- FS="([ \\\\\t\n\v\f;,.=(){}+*/<>&|^-]|\\[|\\]|" comment_regexp "|" quote_regexp ")+";
-
- inside_srcu_struct = 0;
- inside_srcu_init_def = 0;
- srcu_init_param_name = "";
- in_macro = 0;
- brace_nesting = 0;
- paren_nesting = 0;
-
- # Allow the manipulation of the last field separator after has been
- # seen.
- last_fs = "";
- # Whether the last field separator was intended to be output.
- last_fs_print = 0;
-
- # rcu_batches stores the initialization for each instance of struct
- # rcu_batch
-
- in_comment = 0;
-
- outputfile = "";
-}
-
-{
- prev_outputfile = outputfile;
- if (FILENAME ~ /\.h$/) {
- outputfile = h_output;
- if (FNR != NR) {
- print "Incorrect file order" > "/dev/stderr";
- exit 1;
- }
- }
- else
- outputfile = c_output;
-
- if (prev_outputfile && outputfile != prev_outputfile) {
- new_outputfile = outputfile;
- outputfile = prev_outputfile;
- update_fieldsep("", 0);
- outputfile = new_outputfile;
- }
-}
-
-# Combine the next line into $0.
-function combine_line() {
- ret = getline next_line;
- if (ret == 0) {
- # Don't allow two consecutive getlines at the end of the file
- if (eof_found) {
- print "Error: expected more input." > "/dev/stderr";
- exit 1;
- } else {
- eof_found = 1;
- }
- } else if (ret == -1) {
- print "Error reading next line of file" FILENAME > "/dev/stderr";
- exit 1;
- }
- $0 = $0 "\n" next_line;
-}
-
-# Combine backslashed lines and multiline comments.
-function combine_backslashes() {
- while (/\\$|\/\*([^*]|\*+[^*\/])*\**$/) {
- combine_line();
- }
-}
-
-function read_line() {
- combine_line();
- combine_backslashes();
-}
-
-# Print out field separators and update variables that depend on them. Only
-# print if p is true. Call with sep="" and p=0 to print out the last field
-# separator.
-function update_fieldsep(sep, p) {
- # Count braces
- sep_tmp = sep;
- gsub(quote_regexp "|" comment_regexp, "", sep_tmp);
- while (1)
- {
- if (sub("[^{}()]*\\{", "", sep_tmp)) {
- brace_nesting++;
- continue;
- }
- if (sub("[^{}()]*\\}", "", sep_tmp)) {
- brace_nesting--;
- if (brace_nesting < 0) {
- print "Unbalanced braces!" > "/dev/stderr";
- exit 1;
- }
- continue;
- }
- if (sub("[^{}()]*\\(", "", sep_tmp)) {
- paren_nesting++;
- continue;
- }
- if (sub("[^{}()]*\\)", "", sep_tmp)) {
- paren_nesting--;
- if (paren_nesting < 0) {
- print "Unbalanced parenthesis!" > "/dev/stderr";
- exit 1;
- }
- continue;
- }
-
- break;
- }
-
- if (last_fs_print)
- printf("%s", last_fs) > outputfile;
- last_fs = sep;
- last_fs_print = p;
-}
-
-# Shifts the fields down by n positions. Calls next if there are no more. If p
-# is true then print out field separators.
-function shift_fields(n, p) {
- do {
- if (match($0, FS) > 0) {
- update_fieldsep(substr($0, RSTART, RLENGTH), p);
- if (RSTART + RLENGTH <= length())
- $0 = substr($0, RSTART + RLENGTH);
- else
- $0 = "";
- } else {
- update_fieldsep("", 0);
- print "" > outputfile;
- next;
- }
- } while (--n > 0);
-}
-
-# Shifts and prints the first n fields.
-function print_fields(n) {
- do {
- update_fieldsep("", 0);
- printf("%s", $1) > outputfile;
- shift_fields(1, 1);
- } while (--n > 0);
-}
-
-{
- combine_backslashes();
-}
-
-# Print leading FS
-{
- if (match($0, "^(" FS ")+") > 0) {
- update_fieldsep(substr($0, RSTART, RLENGTH), 1);
- if (RSTART + RLENGTH <= length())
- $0 = substr($0, RSTART + RLENGTH);
- else
- $0 = "";
- }
-}
-
-# Parse the line.
-{
- while (NF > 0) {
- if ($1 == "struct" && NF < 3) {
- read_line();
- continue;
- }
-
- if (FILENAME ~ /\.h$/ && !inside_srcu_struct &&
- brace_nesting == 0 && paren_nesting == 0 &&
- $1 == "struct" && $2 == "srcu_struct" &&
- $0 ~ "^struct(" FS ")+srcu_struct(" FS ")+\\{") {
- inside_srcu_struct = 1;
- print_fields(2);
- continue;
- }
- if (inside_srcu_struct && brace_nesting == 0 &&
- paren_nesting == 0) {
- inside_srcu_struct = 0;
- update_fieldsep("", 0);
- for (name in rcu_batches)
- print "extern struct rcu_batch " name ";" > outputfile;
- }
-
- if (inside_srcu_struct && $1 == "struct" && $2 == "rcu_batch") {
- # Move rcu_batches outside of the struct.
- rcu_batches[$3] = "";
- shift_fields(3, 1);
- sub(/;[[:space:]]*$/, "", last_fs);
- continue;
- }
-
- if (FILENAME ~ /\.h$/ && !inside_srcu_init_def &&
- $1 == "#define" && $2 == "__SRCU_STRUCT_INIT") {
- inside_srcu_init_def = 1;
- srcu_init_param_name = $3;
- in_macro = 1;
- print_fields(3);
- continue;
- }
- if (inside_srcu_init_def && brace_nesting == 0 &&
- paren_nesting == 0) {
- inside_srcu_init_def = 0;
- in_macro = 0;
- continue;
- }
-
- if (inside_srcu_init_def && brace_nesting == 1 &&
- paren_nesting == 0 && last_fs ~ /\.[[:space:]]*$/ &&
- $1 ~ /^[[:alnum:]_]+$/) {
- name = $1;
- if (name in rcu_batches) {
- # Remove the dot.
- sub(/\.[[:space:]]*$/, "", last_fs);
-
- old_record = $0;
- do
- shift_fields(1, 0);
- while (last_fs !~ /,/ || paren_nesting > 0);
- end_loc = length(old_record) - length($0);
- end_loc += index(last_fs, ",") - length(last_fs);
-
- last_fs = substr(last_fs, index(last_fs, ",") + 1);
- last_fs_print = 1;
-
- match(old_record, "^"name"("FS")+=");
- start_loc = RSTART + RLENGTH;
-
- len = end_loc - start_loc;
- initializer = substr(old_record, start_loc, len);
- gsub(srcu_init_param_name "\\.", "", initializer);
- rcu_batches[name] = initializer;
- continue;
- }
- }
-
- # Don't include a nonexistent file
- if (!in_macro && $1 == "#include" && /^#include[[:space:]]+"rcu\.h"/) {
- update_fieldsep("", 0);
- next;
- }
-
- # Ignore most preprocessor stuff.
- if (!in_macro && $1 ~ /#/) {
- break;
- }
-
- if (brace_nesting > 0 && $1 ~ "^[[:alnum:]_]+$" && NF < 2) {
- read_line();
- continue;
- }
- if (brace_nesting > 0 &&
- $0 ~ "^[[:alnum:]_]+[[:space:]]*(\\.|->)[[:space:]]*[[:alnum:]_]+" &&
- $2 in rcu_batches) {
- # Make uses of rcu_batches global. Somewhat unreliable.
- shift_fields(1, 0);
- print_fields(1);
- continue;
- }
-
- if ($1 == "static" && NF < 3) {
- read_line();
- continue;
- }
- if ($1 == "static" && ($2 == "bool" && $3 == "try_check_zero" ||
- $2 == "void" && $3 == "srcu_flip")) {
- shift_fields(1, 1);
- print_fields(2);
- continue;
- }
-
- # Distinguish between read-side and write-side memory barriers.
- if ($1 == "smp_mb" && NF < 2) {
- read_line();
- continue;
- }
- if (match($0, /^smp_mb[[:space:]();\/*]*[[:alnum:]]/)) {
- barrier_letter = substr($0, RLENGTH, 1);
- if (barrier_letter ~ /A|D/)
- new_barrier_name = "sync_smp_mb";
- else if (barrier_letter ~ /B|C/)
- new_barrier_name = "rs_smp_mb";
- else {
- print "Unrecognized memory barrier." > "/dev/null";
- exit 1;
- }
-
- shift_fields(1, 1);
- printf("%s", new_barrier_name) > outputfile;
- continue;
- }
-
- # Skip definition of rcu_synchronize, since it is already
- # defined in misc.h. Only present in old versions of srcu.
- if (brace_nesting == 0 && paren_nesting == 0 &&
- $1 == "struct" && $2 == "rcu_synchronize" &&
- $0 ~ "^struct(" FS ")+rcu_synchronize(" FS ")+\\{") {
- shift_fields(2, 0);
- while (brace_nesting) {
- if (NF < 2)
- read_line();
- shift_fields(1, 0);
- }
- }
-
- # Skip definition of wakeme_after_rcu for the same reason
- if (brace_nesting == 0 && $1 == "static" && $2 == "void" &&
- $3 == "wakeme_after_rcu") {
- while (NF < 5)
- read_line();
- shift_fields(3, 0);
- do {
- while (NF < 3)
- read_line();
- shift_fields(1, 0);
- } while (paren_nesting || brace_nesting);
- }
-
- if ($1 ~ /^(unsigned|long)$/ && NF < 3) {
- read_line();
- continue;
- }
-
- # Give srcu_batches_completed the correct type for old SRCU.
- if (brace_nesting == 0 && $1 == "long" &&
- $2 == "srcu_batches_completed") {
- update_fieldsep("", 0);
- printf("unsigned ") > outputfile;
- print_fields(2);
- continue;
- }
- if (brace_nesting == 0 && $1 == "unsigned" && $2 == "long" &&
- $3 == "srcu_batches_completed") {
- print_fields(3);
- continue;
- }
-
- # Just print out the input code by default.
- print_fields(1);
- }
- update_fieldsep("", 0);
- print > outputfile;
- next;
-}
-
-END {
- update_fieldsep("", 0);
-
- if (brace_nesting != 0) {
- print "Unbalanced braces!" > "/dev/stderr";
- exit 1;
- }
-
- # Define the rcu_batches
- for (name in rcu_batches)
- print "struct rcu_batch " name " = " rcu_batches[name] ";" > c_output;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
deleted file mode 100644
index 570a49d9da7e..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/assume.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef ASSUME_H
-#define ASSUME_H
-
-/* Provide an assumption macro that can be disabled for gcc. */
-#ifdef RUN
-#define assume(x) \
- do { \
- /* Evaluate x to suppress warnings. */ \
- (void) (x); \
- } while (0)
-
-#else
-#define assume(x) __CPROVER_assume(x)
-#endif
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
deleted file mode 100644
index 3f95a768a03b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/barriers.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BARRIERS_H
-#define BARRIERS_H
-
-#define barrier() __asm__ __volatile__("" : : : "memory")
-
-#ifdef RUN
-#define smp_mb() __sync_synchronize()
-#define smp_mb__after_unlock_lock() __sync_synchronize()
-#else
-/*
- * Copied from CBMC's implementation of __sync_synchronize(), which
- * seems to be disabled by default.
- */
-#define smp_mb() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
- "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#define smp_mb__after_unlock_lock() __CPROVER_fence("WWfence", "RRfence", "RWfence", "WRfence", \
- "WWcumul", "RRcumul", "RWcumul", "WRcumul")
-#endif
-
-/*
- * Allow memory barriers to be disabled in either the read or write side
- * of SRCU individually.
- */
-
-#ifndef NO_SYNC_SMP_MB
-#define sync_smp_mb() smp_mb()
-#else
-#define sync_smp_mb() do {} while (0)
-#endif
-
-#ifndef NO_READ_SIDE_SMP_MB
-#define rs_smp_mb() smp_mb()
-#else
-#define rs_smp_mb() do {} while (0)
-#endif
-
-#define READ_ONCE(x) (*(volatile typeof(x) *) &(x))
-#define WRITE_ONCE(x) ((*(volatile typeof(x) *) &(x)) = (val))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
deleted file mode 100644
index 5e7912c6a521..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/bug_on.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef BUG_ON_H
-#define BUG_ON_H
-
-#include <assert.h>
-
-#define BUG() assert(0)
-#define BUG_ON(x) assert(!(x))
-
-/* Does it make sense to treat warnings as errors? */
-#define WARN() BUG()
-#define WARN_ON(x) (BUG_ON(x), false)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
deleted file mode 100644
index e67ee5b3dd7c..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/combined_source.c
+++ /dev/null
@@ -1,14 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-/* Include all source files. */
-
-#include "include_srcu.c"
-
-#include "preempt.c"
-#include "misc.c"
-
-/* Used by test.c files */
-#include <pthread.h>
-#include <stdlib.h>
-#include <linux/srcu.h>
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
deleted file mode 100644
index 283d7103334f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/config.h
+++ /dev/null
@@ -1,28 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* "Cheater" definitions based on restricted Kconfig choices. */
-
-#undef CONFIG_TINY_RCU
-#undef __CHECKER__
-#undef CONFIG_DEBUG_LOCK_ALLOC
-#undef CONFIG_DEBUG_OBJECTS_RCU_HEAD
-#undef CONFIG_HOTPLUG_CPU
-#undef CONFIG_MODULES
-#undef CONFIG_NO_HZ_FULL_SYSIDLE
-#undef CONFIG_PREEMPT_COUNT
-#undef CONFIG_PREEMPT_RCU
-#undef CONFIG_PROVE_RCU
-#undef CONFIG_RCU_NOCB_CPU
-#undef CONFIG_RCU_NOCB_CPU_ALL
-#undef CONFIG_RCU_STALL_COMMON
-#undef CONFIG_RCU_TRACE
-#undef CONFIG_RCU_USER_QS
-#undef CONFIG_TASKS_RCU
-#define CONFIG_TREE_RCU
-
-#define CONFIG_GENERIC_ATOMIC64
-
-#if NR_CPUS > 1
-#define CONFIG_SMP
-#else
-#undef CONFIG_SMP
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
deleted file mode 100644
index e5202d4cff30..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/include_srcu.c
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#define synchronize_srcu(sp) synchronize_srcu_original(sp)
-#endif
-
-#include <srcu.c>
-
-#ifdef USE_SIMPLE_SYNC_SRCU
-#undef synchronize_srcu
-
-#include "simple_sync_srcu.c"
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
deleted file mode 100644
index 0dd27aa517a7..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/int_typedefs.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef INT_TYPEDEFS_H
-#define INT_TYPEDEFS_H
-
-#include <inttypes.h>
-
-typedef int8_t s8;
-typedef uint8_t u8;
-typedef int16_t s16;
-typedef uint16_t u16;
-typedef int32_t s32;
-typedef uint32_t u32;
-typedef int64_t s64;
-typedef uint64_t u64;
-
-typedef int8_t __s8;
-typedef uint8_t __u8;
-typedef int16_t __s16;
-typedef uint16_t __u16;
-typedef int32_t __s32;
-typedef uint32_t __u32;
-typedef int64_t __s64;
-typedef uint64_t __u64;
-
-#define S8_C(x) INT8_C(x)
-#define U8_C(x) UINT8_C(x)
-#define S16_C(x) INT16_C(x)
-#define U16_C(x) UINT16_C(x)
-#define S32_C(x) INT32_C(x)
-#define U32_C(x) UINT32_C(x)
-#define S64_C(x) INT64_C(x)
-#define U64_C(x) UINT64_C(x)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
deleted file mode 100644
index cf6938d679d7..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/locks.h
+++ /dev/null
@@ -1,221 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef LOCKS_H
-#define LOCKS_H
-
-#include <limits.h>
-#include <pthread.h>
-#include <stdbool.h>
-
-#include "assume.h"
-#include "bug_on.h"
-#include "preempt.h"
-
-int nondet_int(void);
-
-#define __acquire(x)
-#define __acquires(x)
-#define __release(x)
-#define __releases(x)
-
-/* Only use one lock mechanism. Select which one. */
-#ifdef PTHREAD_LOCK
-struct lock_impl {
- pthread_mutex_t mutex;
-};
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
- BUG_ON(pthread_mutex_lock(&lock->mutex));
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
- BUG_ON(pthread_mutex_unlock(&lock->mutex));
-}
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
- int err = pthread_mutex_trylock(&lock->mutex);
-
- if (!err)
- return true;
- else if (err == EBUSY)
- return false;
- BUG();
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
- pthread_mutex_init(&lock->mutex, NULL);
-}
-
-#define LOCK_IMPL_INITIALIZER {.mutex = PTHREAD_MUTEX_INITIALIZER}
-
-#else /* !defined(PTHREAD_LOCK) */
-/* Spinlock that assumes that it always gets the lock immediately. */
-
-struct lock_impl {
- bool locked;
-};
-
-static inline bool lock_impl_trylock(struct lock_impl *lock)
-{
-#ifdef RUN
- /* TODO: Should this be a test and set? */
- return __sync_bool_compare_and_swap(&lock->locked, false, true);
-#else
- __CPROVER_atomic_begin();
- bool old_locked = lock->locked;
- lock->locked = true;
- __CPROVER_atomic_end();
-
- /* Minimal barrier to prevent accesses leaking out of lock. */
- __CPROVER_fence("RRfence", "RWfence");
-
- return !old_locked;
-#endif
-}
-
-static inline void lock_impl_lock(struct lock_impl *lock)
-{
- /*
- * CBMC doesn't support busy waiting, so just assume that the
- * lock is available.
- */
- assume(lock_impl_trylock(lock));
-
- /*
- * If the lock was already held by this thread then the assumption
- * is unsatisfiable (deadlock).
- */
-}
-
-static inline void lock_impl_unlock(struct lock_impl *lock)
-{
-#ifdef RUN
- BUG_ON(!__sync_bool_compare_and_swap(&lock->locked, true, false));
-#else
- /* Minimal barrier to prevent accesses leaking out of lock. */
- __CPROVER_fence("RWfence", "WWfence");
-
- __CPROVER_atomic_begin();
- bool old_locked = lock->locked;
- lock->locked = false;
- __CPROVER_atomic_end();
-
- BUG_ON(!old_locked);
-#endif
-}
-
-static inline void lock_impl_init(struct lock_impl *lock)
-{
- lock->locked = false;
-}
-
-#define LOCK_IMPL_INITIALIZER {.locked = false}
-
-#endif /* !defined(PTHREAD_LOCK) */
-
-/*
- * Implement spinlocks using the lock mechanism. Wrap the lock to prevent mixing
- * locks of different types.
- */
-typedef struct {
- struct lock_impl internal_lock;
-} spinlock_t;
-
-#define SPIN_LOCK_UNLOCKED {.internal_lock = LOCK_IMPL_INITIALIZER}
-#define __SPIN_LOCK_UNLOCKED(x) SPIN_LOCK_UNLOCKED
-#define DEFINE_SPINLOCK(x) spinlock_t x = SPIN_LOCK_UNLOCKED
-
-static inline void spin_lock_init(spinlock_t *lock)
-{
- lock_impl_init(&lock->internal_lock);
-}
-
-static inline void spin_lock(spinlock_t *lock)
-{
- /*
- * Spin locks also need to be removed in order to eliminate all
- * memory barriers. They are only used by the write side anyway.
- */
-#ifndef NO_SYNC_SMP_MB
- preempt_disable();
- lock_impl_lock(&lock->internal_lock);
-#endif
-}
-
-static inline void spin_unlock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
- lock_impl_unlock(&lock->internal_lock);
- preempt_enable();
-#endif
-}
-
-/* Don't bother with interrupts */
-#define spin_lock_irq(lock) spin_lock(lock)
-#define spin_unlock_irq(lock) spin_unlock(lock)
-#define spin_lock_irqsave(lock, flags) spin_lock(lock)
-#define spin_unlock_irqrestore(lock, flags) spin_unlock(lock)
-
-/*
- * This is supposed to return an int, but I think that a bool should work as
- * well.
- */
-static inline bool spin_trylock(spinlock_t *lock)
-{
-#ifndef NO_SYNC_SMP_MB
- preempt_disable();
- return lock_impl_trylock(&lock->internal_lock);
-#else
- return true;
-#endif
-}
-
-struct completion {
- /* Hopefuly this won't overflow. */
- unsigned int count;
-};
-
-#define COMPLETION_INITIALIZER(x) {.count = 0}
-#define DECLARE_COMPLETION(x) struct completion x = COMPLETION_INITIALIZER(x)
-#define DECLARE_COMPLETION_ONSTACK(x) DECLARE_COMPLETION(x)
-
-static inline void init_completion(struct completion *c)
-{
- c->count = 0;
-}
-
-static inline void wait_for_completion(struct completion *c)
-{
- unsigned int prev_count = __sync_fetch_and_sub(&c->count, 1);
-
- assume(prev_count);
-}
-
-static inline void complete(struct completion *c)
-{
- unsigned int prev_count = __sync_fetch_and_add(&c->count, 1);
-
- BUG_ON(prev_count == UINT_MAX);
-}
-
-/* This function probably isn't very useful for CBMC. */
-static inline bool try_wait_for_completion(struct completion *c)
-{
- BUG();
-}
-
-static inline bool completion_done(struct completion *c)
-{
- return c->count;
-}
-
-/* TODO: Implement complete_all */
-static inline void complete_all(struct completion *c)
-{
- BUG();
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
deleted file mode 100644
index 9440cc39e3c6..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.c
+++ /dev/null
@@ -1,12 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "misc.h"
-#include "bug_on.h"
-
-struct rcu_head;
-
-void wakeme_after_rcu(struct rcu_head *head)
-{
- BUG();
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
deleted file mode 100644
index aca50030f954..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/misc.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef MISC_H
-#define MISC_H
-
-#include "assume.h"
-#include "int_typedefs.h"
-#include "locks.h"
-
-#include <linux/types.h>
-
-/* Probably won't need to deal with bottom halves. */
-static inline void local_bh_disable(void) {}
-static inline void local_bh_enable(void) {}
-
-#define MODULE_ALIAS(X)
-#define module_param(...)
-#define EXPORT_SYMBOL_GPL(x)
-
-#define container_of(ptr, type, member) ({ \
- const typeof(((type *)0)->member) *__mptr = (ptr); \
- (type *)((char *)__mptr - offsetof(type, member)); \
-})
-
-#ifndef USE_SIMPLE_SYNC_SRCU
-/* Abuse udelay to make sure that busy loops terminate. */
-#define udelay(x) assume(0)
-
-#else
-
-/* The simple custom synchronize_srcu is ok with try_check_zero failing. */
-#define udelay(x) do { } while (0)
-#endif
-
-#define trace_rcu_torture_read(rcutorturename, rhp, secs, c_old, c) \
- do { } while (0)
-
-#define notrace
-
-/* Avoid including rcupdate.h */
-struct rcu_synchronize {
- struct rcu_head head;
- struct completion completion;
-};
-
-void wakeme_after_rcu(struct rcu_head *head);
-
-#define rcu_lock_acquire(a) do { } while (0)
-#define rcu_lock_release(a) do { } while (0)
-#define rcu_lockdep_assert(c, s) do { } while (0)
-#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
-
-/* Let CBMC non-deterministically choose switch between normal and expedited. */
-bool rcu_gp_is_normal(void);
-bool rcu_gp_is_expedited(void);
-
-/* Do the same for old versions of rcu. */
-#define rcu_expedited (rcu_gp_is_expedited())
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
deleted file mode 100644
index 27e67a3f291f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/percpu.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PERCPU_H
-#define PERCPU_H
-
-#include <stddef.h>
-#include "bug_on.h"
-#include "preempt.h"
-
-#define __percpu
-
-/* Maximum size of any percpu data. */
-#define PERCPU_OFFSET (4 * sizeof(long))
-
-/* Ignore alignment, as CBMC doesn't care about false sharing. */
-#define alloc_percpu(type) __alloc_percpu(sizeof(type), 1)
-
-static inline void *__alloc_percpu(size_t size, size_t align)
-{
- BUG();
- return NULL;
-}
-
-static inline void free_percpu(void *ptr)
-{
- BUG();
-}
-
-#define per_cpu_ptr(ptr, cpu) \
- ((typeof(ptr)) ((char *) (ptr) + PERCPU_OFFSET * cpu))
-
-#define __this_cpu_inc(pcp) __this_cpu_add(pcp, 1)
-#define __this_cpu_dec(pcp) __this_cpu_sub(pcp, 1)
-#define __this_cpu_sub(pcp, n) __this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-#define this_cpu_inc(pcp) this_cpu_add(pcp, 1)
-#define this_cpu_dec(pcp) this_cpu_sub(pcp, 1)
-#define this_cpu_sub(pcp, n) this_cpu_add(pcp, -(typeof(pcp)) (n))
-
-/* Make CBMC use atomics to work around bug. */
-#ifdef RUN
-#define THIS_CPU_ADD_HELPER(ptr, x) (*(ptr) += (x))
-#else
-/*
- * Split the atomic into a read and a write so that it has the least
- * possible ordering.
- */
-#define THIS_CPU_ADD_HELPER(ptr, x) \
- do { \
- typeof(ptr) this_cpu_add_helper_ptr = (ptr); \
- typeof(ptr) this_cpu_add_helper_x = (x); \
- typeof(*ptr) this_cpu_add_helper_temp; \
- __CPROVER_atomic_begin(); \
- this_cpu_add_helper_temp = *(this_cpu_add_helper_ptr); \
- __CPROVER_atomic_end(); \
- this_cpu_add_helper_temp += this_cpu_add_helper_x; \
- __CPROVER_atomic_begin(); \
- *(this_cpu_add_helper_ptr) = this_cpu_add_helper_temp; \
- __CPROVER_atomic_end(); \
- } while (0)
-#endif
-
-/*
- * For some reason CBMC needs an atomic operation even though this is percpu
- * data.
- */
-#define __this_cpu_add(pcp, n) \
- do { \
- BUG_ON(preemptible()); \
- THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), thread_cpu_id), \
- (typeof(pcp)) (n)); \
- } while (0)
-
-#define this_cpu_add(pcp, n) \
- do { \
- int this_cpu_add_impl_cpu = get_cpu(); \
- THIS_CPU_ADD_HELPER(per_cpu_ptr(&(pcp), this_cpu_add_impl_cpu), \
- (typeof(pcp)) (n)); \
- put_cpu(); \
- } while (0)
-
-/*
- * This will cause a compiler warning because of the cast from char[][] to
- * type*. This will cause a compile time error if type is too big.
- */
-#define DEFINE_PER_CPU(type, name) \
- char name[NR_CPUS][PERCPU_OFFSET]; \
- typedef char percpu_too_big_##name \
- [sizeof(type) > PERCPU_OFFSET ? -1 : 1]
-
-#define for_each_possible_cpu(cpu) \
- for ((cpu) = 0; (cpu) < NR_CPUS; ++(cpu))
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
deleted file mode 100644
index b4083ae348fb..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.c
+++ /dev/null
@@ -1,79 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include "preempt.h"
-
-#include "assume.h"
-#include "locks.h"
-
-/* Support NR_CPUS of at most 64 */
-#define CPU_PREEMPTION_LOCKS_INIT0 LOCK_IMPL_INITIALIZER
-#define CPU_PREEMPTION_LOCKS_INIT1 \
- CPU_PREEMPTION_LOCKS_INIT0, CPU_PREEMPTION_LOCKS_INIT0
-#define CPU_PREEMPTION_LOCKS_INIT2 \
- CPU_PREEMPTION_LOCKS_INIT1, CPU_PREEMPTION_LOCKS_INIT1
-#define CPU_PREEMPTION_LOCKS_INIT3 \
- CPU_PREEMPTION_LOCKS_INIT2, CPU_PREEMPTION_LOCKS_INIT2
-#define CPU_PREEMPTION_LOCKS_INIT4 \
- CPU_PREEMPTION_LOCKS_INIT3, CPU_PREEMPTION_LOCKS_INIT3
-#define CPU_PREEMPTION_LOCKS_INIT5 \
- CPU_PREEMPTION_LOCKS_INIT4, CPU_PREEMPTION_LOCKS_INIT4
-
-/*
- * Simulate disabling preemption by locking a particular cpu. NR_CPUS
- * should be the actual number of cpus, not just the maximum.
- */
-struct lock_impl cpu_preemption_locks[NR_CPUS] = {
- CPU_PREEMPTION_LOCKS_INIT0
-#if (NR_CPUS - 1) & 1
- , CPU_PREEMPTION_LOCKS_INIT0
-#endif
-#if (NR_CPUS - 1) & 2
- , CPU_PREEMPTION_LOCKS_INIT1
-#endif
-#if (NR_CPUS - 1) & 4
- , CPU_PREEMPTION_LOCKS_INIT2
-#endif
-#if (NR_CPUS - 1) & 8
- , CPU_PREEMPTION_LOCKS_INIT3
-#endif
-#if (NR_CPUS - 1) & 16
- , CPU_PREEMPTION_LOCKS_INIT4
-#endif
-#if (NR_CPUS - 1) & 32
- , CPU_PREEMPTION_LOCKS_INIT5
-#endif
-};
-
-#undef CPU_PREEMPTION_LOCKS_INIT0
-#undef CPU_PREEMPTION_LOCKS_INIT1
-#undef CPU_PREEMPTION_LOCKS_INIT2
-#undef CPU_PREEMPTION_LOCKS_INIT3
-#undef CPU_PREEMPTION_LOCKS_INIT4
-#undef CPU_PREEMPTION_LOCKS_INIT5
-
-__thread int thread_cpu_id;
-__thread int preempt_disable_count;
-
-void preempt_disable(void)
-{
- BUG_ON(preempt_disable_count < 0 || preempt_disable_count == INT_MAX);
-
- if (preempt_disable_count++)
- return;
-
- thread_cpu_id = nondet_int();
- assume(thread_cpu_id >= 0);
- assume(thread_cpu_id < NR_CPUS);
- lock_impl_lock(&cpu_preemption_locks[thread_cpu_id]);
-}
-
-void preempt_enable(void)
-{
- BUG_ON(preempt_disable_count < 1);
-
- if (--preempt_disable_count)
- return;
-
- lock_impl_unlock(&cpu_preemption_locks[thread_cpu_id]);
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
deleted file mode 100644
index f8b762cd214c..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/preempt.h
+++ /dev/null
@@ -1,59 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef PREEMPT_H
-#define PREEMPT_H
-
-#include <stdbool.h>
-
-#include "bug_on.h"
-
-/* This flag contains garbage if preempt_disable_count is 0. */
-extern __thread int thread_cpu_id;
-
-/* Support recursive preemption disabling. */
-extern __thread int preempt_disable_count;
-
-void preempt_disable(void);
-void preempt_enable(void);
-
-static inline void preempt_disable_notrace(void)
-{
- preempt_disable();
-}
-
-static inline void preempt_enable_no_resched(void)
-{
- preempt_enable();
-}
-
-static inline void preempt_enable_notrace(void)
-{
- preempt_enable();
-}
-
-static inline int preempt_count(void)
-{
- return preempt_disable_count;
-}
-
-static inline bool preemptible(void)
-{
- return !preempt_count();
-}
-
-static inline int get_cpu(void)
-{
- preempt_disable();
- return thread_cpu_id;
-}
-
-static inline void put_cpu(void)
-{
- preempt_enable();
-}
-
-static inline void might_sleep(void)
-{
- BUG_ON(preempt_disable_count);
-}
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
deleted file mode 100644
index 97f592048e0b..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/simple_sync_srcu.c
+++ /dev/null
@@ -1,51 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <config.h>
-
-#include <assert.h>
-#include <errno.h>
-#include <inttypes.h>
-#include <pthread.h>
-#include <stddef.h>
-#include <string.h>
-#include <sys/types.h>
-
-#include "int_typedefs.h"
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "locks.h"
-#include "misc.h"
-#include "preempt.h"
-#include "percpu.h"
-#include "workqueues.h"
-
-#include <linux/srcu.h>
-
-/* Functions needed from modify_srcu.c */
-bool try_check_zero(struct srcu_struct *sp, int idx, int trycount);
-void srcu_flip(struct srcu_struct *sp);
-
-/* Simpler implementation of synchronize_srcu that ignores batching. */
-void synchronize_srcu(struct srcu_struct *sp)
-{
- int idx;
- /*
- * This code assumes that try_check_zero will succeed anyway,
- * so there is no point in multiple tries.
- */
- const int trycount = 1;
-
- might_sleep();
-
- /* Ignore the lock, as multiple writers aren't working yet anyway. */
-
- idx = 1 ^ (sp->completed & 1);
-
- /* For comments see srcu_advance_batches. */
-
- assume(try_check_zero(sp, idx, trycount));
-
- srcu_flip(sp);
-
- assume(try_check_zero(sp, idx^1, trycount));
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
deleted file mode 100644
index 28b960300971..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/src/workqueues.h
+++ /dev/null
@@ -1,103 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef WORKQUEUES_H
-#define WORKQUEUES_H
-
-#include <stdbool.h>
-
-#include "barriers.h"
-#include "bug_on.h"
-#include "int_typedefs.h"
-
-#include <linux/types.h>
-
-/* Stub workqueue implementation. */
-
-struct work_struct;
-typedef void (*work_func_t)(struct work_struct *work);
-void delayed_work_timer_fn(unsigned long __data);
-
-struct work_struct {
-/* atomic_long_t data; */
- unsigned long data;
-
- struct list_head entry;
- work_func_t func;
-#ifdef CONFIG_LOCKDEP
- struct lockdep_map lockdep_map;
-#endif
-};
-
-struct timer_list {
- struct hlist_node entry;
- unsigned long expires;
- void (*function)(unsigned long);
- unsigned long data;
- u32 flags;
- int slack;
-};
-
-struct delayed_work {
- struct work_struct work;
- struct timer_list timer;
-
- /* target workqueue and CPU ->timer uses to queue ->work */
- struct workqueue_struct *wq;
- int cpu;
-};
-
-
-static inline bool schedule_work(struct work_struct *work)
-{
- BUG();
- return true;
-}
-
-static inline bool schedule_work_on(int cpu, struct work_struct *work)
-{
- BUG();
- return true;
-}
-
-static inline bool queue_work(struct workqueue_struct *wq,
- struct work_struct *work)
-{
- BUG();
- return true;
-}
-
-static inline bool queue_delayed_work(struct workqueue_struct *wq,
- struct delayed_work *dwork,
- unsigned long delay)
-{
- BUG();
- return true;
-}
-
-#define INIT_WORK(w, f) \
- do { \
- (w)->data = 0; \
- (w)->func = (f); \
- } while (0)
-
-#define INIT_DELAYED_WORK(w, f) INIT_WORK(&(w)->work, (f))
-
-#define __WORK_INITIALIZER(n, f) { \
- .data = 0, \
- .entry = { &(n).entry, &(n).entry }, \
- .func = f \
- }
-
-/* Don't bother initializing timer. */
-#define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \
- .work = __WORK_INITIALIZER((n).work, (f)), \
- }
-
-#define DECLARE_WORK(n, f) \
- struct workqueue_struct n = __WORK_INITIALIZER
-
-#define DECLARE_DELAYED_WORK(n, f) \
- struct delayed_work n = __DELAYED_WORK_INITIALIZER(n, f, 0)
-
-#define system_power_efficient_wq ((struct workqueue_struct *) NULL)
-
-#endif
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
deleted file mode 100644
index ad21b925fbb4..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/Makefile
+++ /dev/null
@@ -1,12 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-CBMC_FLAGS = -I../.. -I../../src -I../../include -I../../empty_includes -32 -pointer-check -mm pso
-
-all:
- for i in ./*.pass; do \
- echo $$i ; \
- CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-pass $$i > $$i.out 2>&1 ; \
- done
- for i in ./*.fail; do \
- echo $$i ; \
- CBMC_FLAGS="$(CBMC_FLAGS)" sh ../test_script.sh --should-fail $$i > $$i.out 2>&1 ; \
- done
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
deleted file mode 100644
index 40c8075919d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/assert_end.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DASSERT_END"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
deleted file mode 100644
index ada5baf0b60d..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
deleted file mode 100644
index 8fe00c8db466..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force2.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_2"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
deleted file mode 100644
index 612ed6772844..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/force3.fail
+++ /dev/null
@@ -1 +0,0 @@
-test_cbmc_options="-DFORCE_FAILURE_3"
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
deleted file mode 100644
index e69de29bb2d1..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/main.pass
+++ /dev/null
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
deleted file mode 100644
index 2ce2016f7871..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/store_buffering/test.c
+++ /dev/null
@@ -1,73 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <src/combined_source.c>
-
-int x;
-int y;
-
-int __unbuffered_tpr_x;
-int __unbuffered_tpr_y;
-
-DEFINE_SRCU(ss);
-
-void rcu_reader(void)
-{
- int idx;
-
-#ifndef FORCE_FAILURE_3
- idx = srcu_read_lock(&ss);
-#endif
- might_sleep();
-
- __unbuffered_tpr_y = READ_ONCE(y);
-#ifdef FORCE_FAILURE
- srcu_read_unlock(&ss, idx);
- idx = srcu_read_lock(&ss);
-#endif
- WRITE_ONCE(x, 1);
-
-#ifndef FORCE_FAILURE_3
- srcu_read_unlock(&ss, idx);
-#endif
- might_sleep();
-}
-
-void *thread_update(void *arg)
-{
- WRITE_ONCE(y, 1);
-#ifndef FORCE_FAILURE_2
- synchronize_srcu(&ss);
-#endif
- might_sleep();
- __unbuffered_tpr_x = READ_ONCE(x);
-
- return NULL;
-}
-
-void *thread_process_reader(void *arg)
-{
- rcu_reader();
-
- return NULL;
-}
-
-int main(int argc, char *argv[])
-{
- pthread_t tu;
- pthread_t tpr;
-
- if (pthread_create(&tu, NULL, thread_update, NULL))
- abort();
- if (pthread_create(&tpr, NULL, thread_process_reader, NULL))
- abort();
- if (pthread_join(tu, NULL))
- abort();
- if (pthread_join(tpr, NULL))
- abort();
- assert(__unbuffered_tpr_y != 0 || __unbuffered_tpr_x != 0);
-
-#ifdef ASSERT_END
- assert(0);
-#endif
-
- return 0;
-}
diff --git a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh b/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
deleted file mode 100755
index 2fe1f0339b4f..000000000000
--- a/tools/testing/selftests/rcutorture/formal/srcu-cbmc/tests/test_script.sh
+++ /dev/null
@@ -1,103 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-
-# This script expects a mode (either --should-pass or --should-fail) followed by
-# an input file. The script uses the following environment variables. The test C
-# source file is expected to be named test.c in the directory containing the
-# input file.
-#
-# CBMC: The command to run CBMC. Default: cbmc
-# CBMC_FLAGS: Additional flags to pass to CBMC
-# NR_CPUS: Number of cpus to run tests with. Default specified by the test
-# SYNC_SRCU_MODE: Choose implementation of synchronize_srcu. Defaults to simple.
-# kernel: Version included in the linux kernel source.
-# simple: Use try_check_zero directly.
-#
-# The input file is a script that is sourced by this file. It can define any of
-# the following variables to configure the test.
-#
-# test_cbmc_options: Extra options to pass to CBMC.
-# min_cpus_fail: Minimum number of CPUs (NR_CPUS) for verification to fail.
-# The test is expected to pass if it is run with fewer. (Only
-# useful for .fail files)
-# default_cpus: Quantity of CPUs to use for the test, if not specified on the
-# command line. Default: Larger of 2 and MIN_CPUS_FAIL.
-
-set -e
-
-if test "$#" -ne 2; then
- echo "Expected one option followed by an input file" 1>&2
- exit 99
-fi
-
-if test "x$1" = "x--should-pass"; then
- should_pass="yes"
-elif test "x$1" = "x--should-fail"; then
- should_pass="no"
-else
- echo "Unrecognized argument '$1'" 1>&2
-
- # Exit code 99 indicates a hard error.
- exit 99
-fi
-
-CBMC=${CBMC:-cbmc}
-
-SYNC_SRCU_MODE=${SYNC_SRCU_MODE:-simple}
-
-case ${SYNC_SRCU_MODE} in
-kernel) sync_srcu_mode_flags="" ;;
-simple) sync_srcu_mode_flags="-DUSE_SIMPLE_SYNC_SRCU" ;;
-
-*)
- echo "Unrecognized argument '${SYNC_SRCU_MODE}'" 1>&2
- exit 99
- ;;
-esac
-
-min_cpus_fail=1
-
-c_file=`dirname "$2"`/test.c
-
-# Source the input file.
-. $2
-
-if test ${min_cpus_fail} -gt 2; then
- default_default_cpus=${min_cpus_fail}
-else
- default_default_cpus=2
-fi
-default_cpus=${default_cpus:-${default_default_cpus}}
-cpus=${NR_CPUS:-${default_cpus}}
-
-# Check if there are two few cpus to make the test fail.
-if test $cpus -lt ${min_cpus_fail:-0}; then
- should_pass="yes"
-fi
-
-cbmc_opts="-DNR_CPUS=${cpus} ${sync_srcu_mode_flags} ${test_cbmc_options} ${CBMC_FLAGS}"
-
-echo "Running CBMC: ${CBMC} ${cbmc_opts} ${c_file}"
-if ${CBMC} ${cbmc_opts} "${c_file}"; then
- # Verification successful. Make sure that it was supposed to verify.
- test "x${should_pass}" = xyes
-else
- cbmc_exit_status=$?
-
- # An exit status of 10 indicates a failed verification.
- # (see cbmc_parse_optionst::do_bmc in the CBMC source code)
- if test ${cbmc_exit_status} -eq 10 && test "x${should_pass}" = xno; then
- :
- else
- echo "CBMC returned ${cbmc_exit_status} exit status" 1>&2
-
- # Parse errors have exit status 6. Any other type of error
- # should be considered a hard error.
- if test ${cbmc_exit_status} -ne 6 && \
- test ${cbmc_exit_status} -ne 10; then
- exit 99
- else
- exit 1
- fi
- fi
-fi
diff --git a/tools/testing/selftests/resctrl/.gitignore b/tools/testing/selftests/resctrl/.gitignore
new file mode 100644
index 000000000000..ab68442b6bc8
--- /dev/null
+++ b/tools/testing/selftests/resctrl/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+resctrl_tests
diff --git a/tools/testing/selftests/resctrl/Makefile b/tools/testing/selftests/resctrl/Makefile
index d585cc1948cc..2deac2031de9 100644
--- a/tools/testing/selftests/resctrl/Makefile
+++ b/tools/testing/selftests/resctrl/Makefile
@@ -1,17 +1,10 @@
-CC = $(CROSS_COMPILE)gcc
-CFLAGS = -g -Wall
-SRCS=$(wildcard *.c)
-OBJS=$(SRCS:.c=.o)
+# SPDX-License-Identifier: GPL-2.0
-all: resctrl_tests
+CFLAGS = -g -Wall -O2 -D_FORTIFY_SOURCE=2 -D_GNU_SOURCE
+CFLAGS += $(KHDR_INCLUDES)
-$(OBJS): $(SRCS)
- $(CC) $(CFLAGS) -c $(SRCS)
+TEST_GEN_PROGS := resctrl_tests
-resctrl_tests: $(OBJS)
- $(CC) $(CFLAGS) -o $@ $^
+include ../lib.mk
-.PHONY: clean
-
-clean:
- $(RM) $(OBJS) resctrl_tests
+$(OUTPUT)/resctrl_tests: $(wildcard *.[ch])
diff --git a/tools/testing/selftests/resctrl/README b/tools/testing/selftests/resctrl/README
index 6e5a0ffa18e8..8d11ce7c2ee5 100644
--- a/tools/testing/selftests/resctrl/README
+++ b/tools/testing/selftests/resctrl/README
@@ -12,24 +12,49 @@ Allocation test on Intel RDT hardware. More tests will be added in the future.
And the test suit can be extended to cover AMD QoS and ARM MPAM hardware
as well.
+resctrl_tests can be run with or without kselftest framework.
+
+WITH KSELFTEST FRAMEWORK
+=======================
+
BUILD
-----
-Run "make" to build executable file "resctrl_tests".
+Build executable file "resctrl_tests" from top level directory of the kernel source:
+ $ make -C tools/testing/selftests TARGETS=resctrl
RUN
---
-To use resctrl_tests, root or sudoer privileges are required. This is because
-the test needs to mount resctrl file system and change contents in the file
-system.
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
+Using kselftest framework will run all supported tests within resctrl_tests:
+
+ $ sudo make -C tools/testing/selftests TARGETS=resctrl run_tests
+
+More details about kselftest framework can be found in
+Documentation/dev-tools/kselftest.rst.
+
+WITHOUT KSELFTEST FRAMEWORK
+===========================
+
+BUILD
+-----
+
+Build executable file "resctrl_tests" from this directory(tools/testing/selftests/resctrl/):
+ $ make
+
+RUN
+---
+Run resctrl_tests as sudo or root since the test needs to mount resctrl file
+system and change contents in the file system.
Executing the test without any parameter will run all supported tests:
- sudo ./resctrl_tests
+ $ sudo ./resctrl_tests
OVERVIEW OF EXECUTION
----------------------
+=====================
A test case has four stages:
@@ -41,13 +66,13 @@ A test case has four stages:
- teardown: umount resctrl and clear temporary files.
ARGUMENTS
----------
+=========
Parameter '-h' shows usage information.
usage: resctrl_tests [-h] [-b "benchmark_cmd [options]"] [-t test list] [-n no_of_bits]
- -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM default benchmark is builtin fill_buf
- -t test list: run tests specified in the test list, e.g. -t mbm, mba, cqm, cat
+ -b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CMT default benchmark is builtin fill_buf
+ -t test list: run tests specified in the test list, e.g. -t mbm,mba,cmt,cat
-n no_of_bits: run cache tests using specified no of bits in cache bit mask
-p cpu_no: specify CPU number to run the test. 1 is default
-h: help
diff --git a/tools/testing/selftests/resctrl/cache.c b/tools/testing/selftests/resctrl/cache.c
index 38dbf4962e33..1b339d6bbff1 100644
--- a/tools/testing/selftests/resctrl/cache.c
+++ b/tools/testing/selftests/resctrl/cache.c
@@ -3,115 +3,64 @@
#include <stdint.h>
#include "resctrl.h"
-struct read_format {
- __u64 nr; /* The number of events */
- struct {
- __u64 value; /* The value of the event */
- } values[2];
-};
-
-static struct perf_event_attr pea_llc_miss;
-static struct read_format rf_cqm;
-static int fd_lm;
char llc_occup_path[1024];
-static void initialize_perf_event_attr(void)
-{
- pea_llc_miss.type = PERF_TYPE_HARDWARE;
- pea_llc_miss.size = sizeof(struct perf_event_attr);
- pea_llc_miss.read_format = PERF_FORMAT_GROUP;
- pea_llc_miss.exclude_kernel = 1;
- pea_llc_miss.exclude_hv = 1;
- pea_llc_miss.exclude_idle = 1;
- pea_llc_miss.exclude_callchain_kernel = 1;
- pea_llc_miss.inherit = 1;
- pea_llc_miss.exclude_guest = 1;
- pea_llc_miss.disabled = 1;
-}
-
-static void ioctl_perf_event_ioc_reset_enable(void)
-{
- ioctl(fd_lm, PERF_EVENT_IOC_RESET, 0);
- ioctl(fd_lm, PERF_EVENT_IOC_ENABLE, 0);
-}
-
-static int perf_event_open_llc_miss(pid_t pid, int cpu_no)
-{
- fd_lm = perf_event_open(&pea_llc_miss, pid, cpu_no, -1,
- PERF_FLAG_FD_CLOEXEC);
- if (fd_lm == -1) {
- perror("Error opening leader");
- ctrlc_handler(0, NULL, NULL);
- return -1;
- }
-
- return 0;
-}
-
-static int initialize_llc_perf(void)
+void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config)
{
- memset(&pea_llc_miss, 0, sizeof(struct perf_event_attr));
- memset(&rf_cqm, 0, sizeof(struct read_format));
-
- /* Initialize perf_event_attr structures for HW_CACHE_MISSES */
- initialize_perf_event_attr();
-
- pea_llc_miss.config = PERF_COUNT_HW_CACHE_MISSES;
-
- rf_cqm.nr = 1;
-
- return 0;
+ memset(pea, 0, sizeof(*pea));
+ pea->type = PERF_TYPE_HARDWARE;
+ pea->size = sizeof(*pea);
+ pea->read_format = PERF_FORMAT_GROUP;
+ pea->exclude_kernel = 1;
+ pea->exclude_hv = 1;
+ pea->exclude_idle = 1;
+ pea->exclude_callchain_kernel = 1;
+ pea->inherit = 1;
+ pea->exclude_guest = 1;
+ pea->disabled = 1;
+ pea->config = config;
}
-static int reset_enable_llc_perf(pid_t pid, int cpu_no)
+/* Start counters to log values */
+int perf_event_reset_enable(int pe_fd)
{
- int ret = 0;
+ int ret;
- ret = perf_event_open_llc_miss(pid, cpu_no);
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_RESET, 0);
if (ret < 0)
return ret;
- /* Start counters to log values */
- ioctl_perf_event_ioc_reset_enable();
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_ENABLE, 0);
+ if (ret < 0)
+ return ret;
return 0;
}
-/*
- * get_llc_perf: llc cache miss through perf events
- * @cpu_no: CPU number that the benchmark PID is binded to
- *
- * Perf events like HW_CACHE_MISSES could be used to validate number of
- * cache lines allocated.
- *
- * Return: =0 on success. <0 on failure.
- */
-static int get_llc_perf(unsigned long *llc_perf_miss)
+void perf_event_initialize_read_format(struct perf_event_read *pe_read)
{
- __u64 total_misses;
-
- /* Stop counters after one span to get miss rate */
-
- ioctl(fd_lm, PERF_EVENT_IOC_DISABLE, 0);
+ memset(pe_read, 0, sizeof(*pe_read));
+ pe_read->nr = 1;
+}
- if (read(fd_lm, &rf_cqm, sizeof(struct read_format)) == -1) {
- perror("Could not get llc misses through perf");
+int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no)
+{
+ int pe_fd;
+ pe_fd = perf_event_open(pea, pid, cpu_no, -1, PERF_FLAG_FD_CLOEXEC);
+ if (pe_fd == -1) {
+ ksft_perror("Error opening leader");
return -1;
}
- total_misses = rf_cqm.values[0].value;
+ perf_event_reset_enable(pe_fd);
- close(fd_lm);
-
- *llc_perf_miss = total_misses;
-
- return 0;
+ return pe_fd;
}
/*
* Get LLC Occupancy as reported by RESCTRL FS
- * For CQM,
+ * For CMT,
* 1. If con_mon grp and mon grp given, then read from mon grp in
* con_mon grp
* 2. If only con_mon grp given, then read from con_mon grp
@@ -128,12 +77,12 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
fp = fopen(llc_occup_path, "r");
if (!fp) {
- perror("Failed to open results file");
+ ksft_perror("Failed to open results file");
- return errno;
+ return -1;
}
if (fscanf(fp, "%lu", llc_occupancy) <= 0) {
- perror("Could not get llc occupancy");
+ ksft_perror("Could not get llc occupancy");
fclose(fp);
return -1;
@@ -150,123 +99,91 @@ static int get_llc_occu_resctrl(unsigned long *llc_occupancy)
* @llc_value: perf miss value /
* llc occupancy value reported by resctrl FS
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 on success, < 0 on error.
*/
-static int print_results_cache(char *filename, int bm_pid,
- unsigned long llc_value)
+static int print_results_cache(const char *filename, int bm_pid, __u64 llc_value)
{
FILE *fp;
if (strcmp(filename, "stdio") == 0 || strcmp(filename, "stderr") == 0) {
- printf("Pid: %d \t LLC_value: %lu\n", bm_pid,
- llc_value);
+ printf("Pid: %d \t LLC_value: %llu\n", bm_pid, llc_value);
} else {
fp = fopen(filename, "a");
if (!fp) {
- perror("Cannot open results file");
+ ksft_perror("Cannot open results file");
- return errno;
+ return -1;
}
- fprintf(fp, "Pid: %d \t llc_value: %lu\n", bm_pid, llc_value);
+ fprintf(fp, "Pid: %d \t llc_value: %llu\n", bm_pid, llc_value);
fclose(fp);
}
return 0;
}
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid)
+/*
+ * perf_event_measure - Measure perf events
+ * @filename: Filename for writing the results
+ * @bm_pid: PID that runs the benchmark
+ *
+ * Measures perf events (e.g., cache misses) and writes the results into
+ * @filename. @bm_pid is written to the results file along with the measured
+ * value.
+ *
+ * Return: =0 on success. <0 on failure.
+ */
+int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
+ const char *filename, int bm_pid)
{
- unsigned long llc_perf_miss = 0, llc_occu_resc = 0, llc_value = 0;
int ret;
- /*
- * Measure cache miss from perf.
- */
- if (!strcmp(param->resctrl_val, "cat")) {
- ret = get_llc_perf(&llc_perf_miss);
- if (ret < 0)
- return ret;
- llc_value = llc_perf_miss;
- }
+ /* Stop counters after one span to get miss rate */
+ ret = ioctl(pe_fd, PERF_EVENT_IOC_DISABLE, 0);
+ if (ret < 0)
+ return ret;
- /*
- * Measure llc occupancy from resctrl.
- */
- if (!strcmp(param->resctrl_val, "cqm")) {
- ret = get_llc_occu_resctrl(&llc_occu_resc);
- if (ret < 0)
- return ret;
- llc_value = llc_occu_resc;
+ ret = read(pe_fd, pe_read, sizeof(*pe_read));
+ if (ret == -1) {
+ ksft_perror("Could not get perf value");
+ return -1;
}
- ret = print_results_cache(param->filename, bm_pid, llc_value);
- if (ret)
- return ret;
- return 0;
+ return print_results_cache(filename, bm_pid, pe_read->values[0].value);
}
/*
- * cache_val: execute benchmark and measure LLC occupancy resctrl
- * and perf cache miss for the benchmark
- * @param: parameters passed to cache_val()
+ * measure_llc_resctrl - Measure resctrl LLC value from resctrl
+ * @filename: Filename for writing the results
+ * @bm_pid: PID that runs the benchmark
+ *
+ * Measures LLC occupancy from resctrl and writes the results into @filename.
+ * @bm_pid is written to the results file along with the measured value.
*
- * Return: 0 on success. non-zero on failure.
+ * Return: =0 on success. <0 on failure.
*/
-int cat_val(struct resctrl_val_param *param)
+int measure_llc_resctrl(const char *filename, int bm_pid)
{
- int malloc_and_init_memory = 1, memflush = 1, operation = 0, ret = 0;
- char *resctrl_val = param->resctrl_val;
- pid_t bm_pid;
-
- if (strcmp(param->filename, "") == 0)
- sprintf(param->filename, "stdio");
-
- bm_pid = getpid();
-
- /* Taskset benchmark to specified cpu */
- ret = taskset_benchmark(bm_pid, param->cpu_no);
- if (ret)
- return ret;
+ unsigned long llc_occu_resc = 0;
+ int ret;
- /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
- ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
- resctrl_val);
- if (ret)
+ ret = get_llc_occu_resctrl(&llc_occu_resc);
+ if (ret < 0)
return ret;
- if ((strcmp(resctrl_val, "cat") == 0)) {
- ret = initialize_llc_perf();
- if (ret)
- return ret;
- }
-
- /* Test runs until the callback setup() tells the test to stop. */
- while (1) {
- if (strcmp(resctrl_val, "cat") == 0) {
- ret = param->setup(1, param);
- if (ret) {
- ret = 0;
- break;
- }
- ret = reset_enable_llc_perf(bm_pid, param->cpu_no);
- if (ret)
- break;
-
- if (run_fill_buf(param->span, malloc_and_init_memory,
- memflush, operation, resctrl_val)) {
- fprintf(stderr, "Error-running fill buffer\n");
- ret = -1;
- break;
- }
-
- sleep(1);
- ret = measure_cache_vals(param, bm_pid);
- if (ret)
- break;
- } else {
- break;
- }
- }
+ return print_results_cache(filename, bm_pid, llc_occu_resc);
+}
- return ret;
+/*
+ * show_cache_info - Show generic cache test information
+ * @no_of_bits: Number of bits
+ * @avg_llc_val: Average of LLC cache result data
+ * @cache_span: Cache span
+ * @lines: @cache_span in lines or bytes
+ */
+void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines)
+{
+ ksft_print_msg("Number of bits: %d\n", no_of_bits);
+ ksft_print_msg("Average LLC val: %llu\n", avg_llc_val);
+ ksft_print_msg("Cache span (%s): %zu\n", lines ? "lines" : "bytes",
+ cache_span);
}
diff --git a/tools/testing/selftests/resctrl/cat_test.c b/tools/testing/selftests/resctrl/cat_test.c
index 5da43767b973..4cb991be8e31 100644
--- a/tools/testing/selftests/resctrl/cat_test.c
+++ b/tools/testing/selftests/resctrl/cat_test.c
@@ -11,240 +11,382 @@
#include "resctrl.h"
#include <unistd.h>
-#define RESULT_FILE_NAME1 "result_cat1"
-#define RESULT_FILE_NAME2 "result_cat2"
+#define RESULT_FILE_NAME "result_cat"
#define NUM_OF_RUNS 5
-#define MAX_DIFF_PERCENT 4
-#define MAX_DIFF 1000000
-
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
/*
- * Change schemata. Write schemata to specified
- * con_mon grp, mon_grp in resctrl FS.
- * Run 5 times in order to get average values.
+ * Minimum difference in LLC misses between a test with n+1 bits CBM to the
+ * test with n bits is MIN_DIFF_PERCENT_PER_BIT * (n - 1). With e.g. 5 vs 4
+ * bits in the CBM mask, the minimum difference must be at least
+ * MIN_DIFF_PERCENT_PER_BIT * (4 - 1) = 3 percent.
+ *
+ * The relationship between number of used CBM bits and difference in LLC
+ * misses is not expected to be linear. With a small number of bits, the
+ * margin is smaller than with larger number of bits. For selftest purposes,
+ * however, linear approach is enough because ultimately only pass/fail
+ * decision has to be made and distinction between strong and stronger
+ * signal is irrelevant.
*/
-static int cat_setup(int num, ...)
+#define MIN_DIFF_PERCENT_PER_BIT 1UL
+
+static int show_results_info(__u64 sum_llc_val, int no_of_bits,
+ unsigned long cache_span,
+ unsigned long min_diff_percent,
+ unsigned long num_of_runs, bool platform,
+ __s64 *prev_avg_llc_val)
{
- struct resctrl_val_param *p;
- char schemata[64];
- va_list param;
+ __u64 avg_llc_val = 0;
+ float avg_diff;
int ret = 0;
- va_start(param, num);
- p = va_arg(param, struct resctrl_val_param *);
- va_end(param);
+ avg_llc_val = sum_llc_val / num_of_runs;
+ if (*prev_avg_llc_val) {
+ float delta = (__s64)(avg_llc_val - *prev_avg_llc_val);
- /* Run NUM_OF_RUNS times */
- if (p->num_of_runs >= NUM_OF_RUNS)
- return -1;
+ avg_diff = delta / *prev_avg_llc_val;
+ ret = platform && (avg_diff * 100) < (float)min_diff_percent;
- if (p->num_of_runs == 0) {
- sprintf(schemata, "%lx", p->mask);
- ret = write_schemata(p->ctrlgrp, schemata, p->cpu_no,
- p->resctrl_val);
+ ksft_print_msg("%s Check cache miss rate changed more than %.1f%%\n",
+ ret ? "Fail:" : "Pass:", (float)min_diff_percent);
+
+ ksft_print_msg("Percent diff=%.1f\n", avg_diff * 100);
}
- p->num_of_runs++;
+ *prev_avg_llc_val = avg_llc_val;
+
+ show_cache_info(no_of_bits, avg_llc_val, cache_span, true);
return ret;
}
-static void show_cache_info(unsigned long sum_llc_perf_miss, int no_of_bits,
- unsigned long span)
+/* Remove the highest bit from CBM */
+static unsigned long next_mask(unsigned long current_mask)
{
- unsigned long allocated_cache_lines = span / 64;
- unsigned long avg_llc_perf_miss = 0;
- float diff_percent;
-
- avg_llc_perf_miss = sum_llc_perf_miss / (NUM_OF_RUNS - 1);
- diff_percent = ((float)allocated_cache_lines - avg_llc_perf_miss) /
- allocated_cache_lines * 100;
-
- printf("%sok CAT: cache miss rate within %d%%\n",
- !is_amd && abs((int)diff_percent) > MAX_DIFF_PERCENT ?
- "not " : "", MAX_DIFF_PERCENT);
- tests_run++;
- printf("# Percent diff=%d\n", abs((int)diff_percent));
- printf("# Number of bits: %d\n", no_of_bits);
- printf("# Avg_llc_perf_miss: %lu\n", avg_llc_perf_miss);
- printf("# Allocated cache lines: %lu\n", allocated_cache_lines);
+ return current_mask & (current_mask >> 1);
}
-static int check_results(struct resctrl_val_param *param)
+static int check_results(struct resctrl_val_param *param, const char *cache_type,
+ unsigned long cache_total_size, unsigned long full_cache_mask,
+ unsigned long current_mask)
{
char *token_array[8], temp[512];
- unsigned long sum_llc_perf_miss = 0;
- int runs = 0, no_of_bits = 0;
+ __u64 sum_llc_perf_miss = 0;
+ __s64 prev_avg_llc_val = 0;
+ unsigned long alloc_size;
+ int runs = 0;
+ int fail = 0;
+ int ret;
FILE *fp;
- printf("# Checking for pass/fail\n");
+ ksft_print_msg("Checking for pass/fail\n");
fp = fopen(param->filename, "r");
if (!fp) {
- perror("# Cannot open file");
+ ksft_perror("Cannot open file");
- return errno;
+ return -1;
}
while (fgets(temp, sizeof(temp), fp)) {
char *token = strtok(temp, ":\t");
int fields = 0;
+ int bits;
while (token) {
token_array[fields++] = token;
token = strtok(NULL, ":\t");
}
- /*
- * Discard the first value which is inaccurate due to monitoring
- * setup transition phase.
- */
- if (runs > 0)
- sum_llc_perf_miss += strtoul(token_array[3], NULL, 0);
+
+ sum_llc_perf_miss += strtoull(token_array[3], NULL, 0);
runs++;
+
+ if (runs < NUM_OF_RUNS)
+ continue;
+
+ if (!current_mask) {
+ ksft_print_msg("Unexpected empty cache mask\n");
+ break;
+ }
+
+ alloc_size = cache_portion_size(cache_total_size, current_mask, full_cache_mask);
+
+ bits = count_bits(current_mask);
+
+ ret = show_results_info(sum_llc_perf_miss, bits,
+ alloc_size / 64,
+ MIN_DIFF_PERCENT_PER_BIT * (bits - 1),
+ runs, get_vendor() == ARCH_INTEL,
+ &prev_avg_llc_val);
+ if (ret)
+ fail = 1;
+
+ runs = 0;
+ sum_llc_perf_miss = 0;
+ current_mask = next_mask(current_mask);
}
fclose(fp);
- no_of_bits = count_bits(param->mask);
- show_cache_info(sum_llc_perf_miss, no_of_bits, param->span);
-
- return 0;
+ return fail;
}
void cat_test_cleanup(void)
{
- remove(RESULT_FILE_NAME1);
- remove(RESULT_FILE_NAME2);
+ remove(RESULT_FILE_NAME);
}
-int cat_perf_miss_val(int cpu_no, int n, char *cache_type)
+/*
+ * cat_test - Execute CAT benchmark and measure cache misses
+ * @test: Test information structure
+ * @uparams: User supplied parameters
+ * @param: Parameters passed to cat_test()
+ * @span: Buffer size for the benchmark
+ * @current_mask Start mask for the first iteration
+ *
+ * Run CAT selftest by varying the allocated cache portion and comparing the
+ * impact on cache misses (the result analysis is done in check_results()
+ * and show_results_info(), not in this function).
+ *
+ * One bit is removed from the CAT allocation bit mask (in current_mask) for
+ * each subsequent test which keeps reducing the size of the allocated cache
+ * portion. A single test flushes the buffer, reads it to warm up the cache,
+ * and reads the buffer again. The cache misses are measured during the last
+ * read pass.
+ *
+ * Return: 0 when the test was run, < 0 on error.
+ */
+static int cat_test(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *param,
+ size_t span, unsigned long current_mask)
{
- unsigned long l_mask, l_mask_1;
- int ret, pipefd[2], sibling_cpu_no;
- char pipe_message;
+ char *resctrl_val = param->resctrl_val;
+ struct perf_event_read pe_read;
+ struct perf_event_attr pea;
+ cpu_set_t old_affinity;
+ unsigned char *buf;
+ char schemata[64];
+ int ret, i, pe_fd;
pid_t bm_pid;
- cache_size = 0;
+ if (strcmp(param->filename, "") == 0)
+ sprintf(param->filename, "stdio");
- ret = remount_resctrlfs(true);
+ bm_pid = getpid();
+
+ /* Taskset benchmark to specified cpu */
+ ret = taskset_benchmark(bm_pid, uparams->cpu, &old_affinity);
if (ret)
return ret;
- if (!validate_resctrl_feature_request("cat"))
- return -1;
+ /* Write benchmark to specified con_mon grp, mon_grp in resctrl FS*/
+ ret = write_bm_pid_to_resctrl(bm_pid, param->ctrlgrp, param->mongrp,
+ resctrl_val);
+ if (ret)
+ goto reset_affinity;
+
+ perf_event_attr_initialize(&pea, PERF_COUNT_HW_CACHE_MISSES);
+ perf_event_initialize_read_format(&pe_read);
+ pe_fd = perf_open(&pea, bm_pid, uparams->cpu);
+ if (pe_fd < 0) {
+ ret = -1;
+ goto reset_affinity;
+ }
- /* Get default cbm mask for L3/L2 cache */
- ret = get_cbm_mask(cache_type);
+ buf = alloc_buffer(span, 1);
+ if (!buf) {
+ ret = -1;
+ goto pe_close;
+ }
+
+ while (current_mask) {
+ snprintf(schemata, sizeof(schemata), "%lx", param->mask & ~current_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret)
+ goto free_buf;
+ snprintf(schemata, sizeof(schemata), "%lx", current_mask);
+ ret = write_schemata(param->ctrlgrp, schemata, uparams->cpu, test->resource);
+ if (ret)
+ goto free_buf;
+
+ for (i = 0; i < NUM_OF_RUNS; i++) {
+ mem_flush(buf, span);
+ fill_cache_read(buf, span, true);
+
+ ret = perf_event_reset_enable(pe_fd);
+ if (ret)
+ goto free_buf;
+
+ fill_cache_read(buf, span, true);
+
+ ret = perf_event_measure(pe_fd, &pe_read, param->filename, bm_pid);
+ if (ret)
+ goto free_buf;
+ }
+ current_mask = next_mask(current_mask);
+ }
+
+free_buf:
+ free(buf);
+pe_close:
+ close(pe_fd);
+reset_affinity:
+ taskset_restore(bm_pid, &old_affinity);
+
+ return ret;
+}
+
+static int cat_run_test(const struct resctrl_test *test, const struct user_params *uparams)
+{
+ unsigned long long_mask, start_mask, full_cache_mask;
+ unsigned long cache_total_size = 0;
+ int n = uparams->bits;
+ unsigned int start;
+ int count_of_bits;
+ size_t span;
+ int ret;
+
+ ret = get_full_cbm(test->resource, &full_cache_mask);
+ if (ret)
+ return ret;
+ /* Get the largest contiguous exclusive portion of the cache */
+ ret = get_mask_no_shareable(test->resource, &long_mask);
if (ret)
return ret;
-
- long_mask = strtoul(cbm_mask, NULL, 16);
/* Get L3/L2 cache size */
- ret = get_cache_size(cpu_no, cache_type, &cache_size);
+ ret = get_cache_size(uparams->cpu, test->resource, &cache_total_size);
if (ret)
return ret;
- printf("cache size :%lu\n", cache_size);
+ ksft_print_msg("Cache size :%lu\n", cache_total_size);
- /* Get max number of bits from default-cabm mask */
- count_of_bits = count_bits(long_mask);
+ count_of_bits = count_contiguous_bits(long_mask, &start);
- if (n < 1 || n > count_of_bits - 1) {
- printf("Invalid input value for no_of_bits n!\n");
- printf("Please Enter value in range 1 to %d\n",
- count_of_bits - 1);
- return -1;
- }
+ if (!n)
+ n = count_of_bits / 2;
- /* Get core id from same socket for running another thread */
- sibling_cpu_no = get_core_sibling(cpu_no);
- if (sibling_cpu_no < 0)
+ if (n > count_of_bits - 1) {
+ ksft_print_msg("Invalid input value for no_of_bits n!\n");
+ ksft_print_msg("Please enter value in range 1 to %d\n",
+ count_of_bits - 1);
return -1;
+ }
+ start_mask = create_bit_mask(start, n);
struct resctrl_val_param param = {
- .resctrl_val = "cat",
- .cpu_no = cpu_no,
- .mum_resctrlfs = 0,
- .setup = cat_setup,
+ .resctrl_val = CAT_STR,
+ .ctrlgrp = "c1",
+ .filename = RESULT_FILE_NAME,
+ .num_of_runs = 0,
};
+ param.mask = long_mask;
+ span = cache_portion_size(cache_total_size, start_mask, full_cache_mask);
- l_mask = long_mask >> n;
- l_mask_1 = ~l_mask & long_mask;
+ remove(param.filename);
- /* Set param values for parent thread which will be allocated bitmask
- * with (max_bits - n) bits
- */
- param.span = cache_size * (count_of_bits - n) / count_of_bits;
- strcpy(param.ctrlgrp, "c2");
- strcpy(param.mongrp, "m2");
- strcpy(param.filename, RESULT_FILE_NAME2);
- param.mask = l_mask;
- param.num_of_runs = 0;
-
- if (pipe(pipefd)) {
- perror("# Unable to create pipe");
- return errno;
- }
+ ret = cat_test(test, uparams, &param, span, start_mask);
+ if (ret)
+ goto out;
- bm_pid = fork();
+ ret = check_results(&param, test->resource,
+ cache_total_size, full_cache_mask, start_mask);
+out:
+ cat_test_cleanup();
- /* Set param values for child thread which will be allocated bitmask
- * with n bits
- */
- if (bm_pid == 0) {
- param.mask = l_mask_1;
- strcpy(param.ctrlgrp, "c1");
- strcpy(param.mongrp, "m1");
- param.span = cache_size * n / count_of_bits;
- strcpy(param.filename, RESULT_FILE_NAME1);
- param.num_of_runs = 0;
- param.cpu_no = sibling_cpu_no;
- }
+ return ret;
+}
- remove(param.filename);
+static int noncont_cat_run_test(const struct resctrl_test *test,
+ const struct user_params *uparams)
+{
+ unsigned long full_cache_mask, cont_mask, noncont_mask;
+ unsigned int eax, ebx, ecx, edx, sparse_masks;
+ int bit_center, ret;
+ char schemata[64];
- ret = cat_val(&param);
+ /* Check to compare sparse_masks content to CPUID output. */
+ ret = resource_info_unsigned_get(test->resource, "sparse_masks", &sparse_masks);
if (ret)
return ret;
- ret = check_results(&param);
- if (ret)
- return ret;
+ if (!strcmp(test->resource, "L3"))
+ __cpuid_count(0x10, 1, eax, ebx, ecx, edx);
+ else if (!strcmp(test->resource, "L2"))
+ __cpuid_count(0x10, 2, eax, ebx, ecx, edx);
+ else
+ return -EINVAL;
- if (bm_pid == 0) {
- /* Tell parent that child is ready */
- close(pipefd[0]);
- pipe_message = 1;
- if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
- sizeof(pipe_message)) {
- close(pipefd[1]);
- perror("# failed signaling parent process");
- return errno;
- }
+ if (sparse_masks != ((ecx >> 3) & 1)) {
+ ksft_print_msg("CPUID output doesn't match 'sparse_masks' file content!\n");
+ return 1;
+ }
- close(pipefd[1]);
- while (1)
- ;
- } else {
- /* Parent waits for child to be ready. */
- close(pipefd[1]);
- pipe_message = 0;
- while (pipe_message != 1) {
- if (read(pipefd[0], &pipe_message,
- sizeof(pipe_message)) < sizeof(pipe_message)) {
- perror("# failed reading from child process");
- break;
- }
- }
- close(pipefd[0]);
- kill(bm_pid, SIGKILL);
+ /* Write checks initialization. */
+ ret = get_full_cbm(test->resource, &full_cache_mask);
+ if (ret < 0)
+ return ret;
+ bit_center = count_bits(full_cache_mask) / 2;
+
+ /*
+ * The bit_center needs to be at least 3 to properly calculate the CBM
+ * hole in the noncont_mask. If it's smaller return an error since the
+ * cache mask is too short and that shouldn't happen.
+ */
+ if (bit_center < 3)
+ return -EINVAL;
+ cont_mask = full_cache_mask >> bit_center;
+
+ /* Contiguous mask write check. */
+ snprintf(schemata, sizeof(schemata), "%lx", cont_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret) {
+ ksft_print_msg("Write of contiguous CBM failed\n");
+ return 1;
}
- cat_test_cleanup();
- if (bm_pid)
- umount_resctrlfs();
+ /*
+ * Non-contiguous mask write check. CBM has a 0xf hole approximately in the middle.
+ * Output is compared with support information to catch any edge case errors.
+ */
+ noncont_mask = ~(0xfUL << (bit_center - 2)) & full_cache_mask;
+ snprintf(schemata, sizeof(schemata), "%lx", noncont_mask);
+ ret = write_schemata("", schemata, uparams->cpu, test->resource);
+ if (ret && sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs supported but write of non-contiguous CBM failed\n");
+ else if (ret && !sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs not supported and write of non-contiguous CBM failed as expected\n");
+ else if (!ret && !sparse_masks)
+ ksft_print_msg("Non-contiguous CBMs not supported but write of non-contiguous CBM succeeded\n");
+
+ return !ret == !sparse_masks;
+}
+
+static bool noncont_cat_feature_check(const struct resctrl_test *test)
+{
+ if (!resctrl_resource_exists(test->resource))
+ return false;
- return 0;
+ return resource_info_file_exists(test->resource, "sparse_masks");
}
+
+struct resctrl_test l3_cat_test = {
+ .name = "L3_CAT",
+ .group = "CAT",
+ .resource = "L3",
+ .feature_check = test_resource_feature_check,
+ .run_test = cat_run_test,
+};
+
+struct resctrl_test l3_noncont_cat_test = {
+ .name = "L3_NONCONT_CAT",
+ .group = "CAT",
+ .resource = "L3",
+ .feature_check = noncont_cat_feature_check,
+ .run_test = noncont_cat_run_test,
+};
+
+struct resctrl_test l2_noncont_cat_test = {
+ .name = "L2_NONCONT_CAT",
+ .group = "CAT",
+ .resource = "L2",
+ .feature_check = noncont_cat_feature_check,
+ .run_test = noncont_cat_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/cmt_test.c b/tools/testing/selftests/resctrl/cmt_test.c
new file mode 100644
index 000000000000..a81f91222a89
--- /dev/null
+++ b/tools/testing/selftests/resctrl/cmt_test.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Cache Monitoring Technology (CMT) test
+ *
+ * Copyright (C) 2018 Intel Corporation
+ *
+ * Authors:
+ * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
+ * Fenghua Yu <fenghua.yu@intel.com>
+ */
+#include "resctrl.h"
+#include <unistd.h>
+
+#define RESULT_FILE_NAME "result_cmt"
+#define NUM_OF_RUNS 5
+#define MAX_DIFF 2000000
+#define MAX_DIFF_PERCENT 15
+
+static int cmt_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
+{
+ /* Run NUM_OF_RUNS times */
+ if (p->num_of_runs >= NUM_OF_RUNS)
+ return END_OF_TESTS;
+
+ p->num_of_runs++;
+
+ return 0;
+}
+
+static int show_results_info(unsigned long sum_llc_val, int no_of_bits,
+ unsigned long cache_span, unsigned long max_diff,
+ unsigned long max_diff_percent, unsigned long num_of_runs,
+ bool platform)
+{
+ unsigned long avg_llc_val = 0;
+ float diff_percent;
+ long avg_diff = 0;
+ int ret;
+
+ avg_llc_val = sum_llc_val / num_of_runs;
+ avg_diff = (long)abs(cache_span - avg_llc_val);
+ diff_percent = ((float)cache_span - avg_llc_val) / cache_span * 100;
+
+ ret = platform && abs((int)diff_percent) > max_diff_percent &&
+ abs(avg_diff) > max_diff;
+
+ ksft_print_msg("%s Check cache miss rate within %lu%%\n",
+ ret ? "Fail:" : "Pass:", max_diff_percent);
+
+ ksft_print_msg("Percent diff=%d\n", abs((int)diff_percent));
+
+ show_cache_info(no_of_bits, avg_llc_val, cache_span, false);
+
+ return ret;
+}
+
+static int check_results(struct resctrl_val_param *param, size_t span, int no_of_bits)
+{
+ char *token_array[8], temp[512];
+ unsigned long sum_llc_occu_resc = 0;
+ int runs = 0;
+ FILE *fp;
+
+ ksft_print_msg("Checking for pass/fail\n");
+ fp = fopen(param->filename, "r");
+ if (!fp) {
+ ksft_perror("Error in opening file");
+
+ return -1;
+ }
+
+ while (fgets(temp, sizeof(temp), fp)) {
+ char *token = strtok(temp, ":\t");
+ int fields = 0;
+
+ while (token) {
+ token_array[fields++] = token;
+ token = strtok(NULL, ":\t");
+ }
+
+ /* Field 3 is llc occ resc value */
+ if (runs > 0)
+ sum_llc_occu_resc += strtoul(token_array[3], NULL, 0);
+ runs++;
+ }
+ fclose(fp);
+
+ return show_results_info(sum_llc_occu_resc, no_of_bits, span,
+ MAX_DIFF, MAX_DIFF_PERCENT, runs - 1, true);
+}
+
+void cmt_test_cleanup(void)
+{
+ remove(RESULT_FILE_NAME);
+}
+
+static int cmt_run_test(const struct resctrl_test *test, const struct user_params *uparams)
+{
+ const char * const *cmd = uparams->benchmark_cmd;
+ const char *new_cmd[BENCHMARK_ARGS];
+ unsigned long cache_total_size = 0;
+ int n = uparams->bits ? : 5;
+ unsigned long long_mask;
+ char *span_str = NULL;
+ int count_of_bits;
+ size_t span;
+ int ret, i;
+
+ ret = get_full_cbm("L3", &long_mask);
+ if (ret)
+ return ret;
+
+ ret = get_cache_size(uparams->cpu, "L3", &cache_total_size);
+ if (ret)
+ return ret;
+ ksft_print_msg("Cache size :%lu\n", cache_total_size);
+
+ count_of_bits = count_bits(long_mask);
+
+ if (n < 1 || n > count_of_bits) {
+ ksft_print_msg("Invalid input value for numbr_of_bits n!\n");
+ ksft_print_msg("Please enter value in range 1 to %d\n", count_of_bits);
+ return -1;
+ }
+
+ struct resctrl_val_param param = {
+ .resctrl_val = CMT_STR,
+ .ctrlgrp = "c1",
+ .mongrp = "m1",
+ .filename = RESULT_FILE_NAME,
+ .mask = ~(long_mask << n) & long_mask,
+ .num_of_runs = 0,
+ .setup = cmt_setup,
+ };
+
+ span = cache_portion_size(cache_total_size, param.mask, long_mask);
+
+ if (strcmp(cmd[0], "fill_buf") == 0) {
+ /* Duplicate the command to be able to replace span in it */
+ for (i = 0; uparams->benchmark_cmd[i]; i++)
+ new_cmd[i] = uparams->benchmark_cmd[i];
+ new_cmd[i] = NULL;
+
+ ret = asprintf(&span_str, "%zu", span);
+ if (ret < 0)
+ return -1;
+ new_cmd[1] = span_str;
+ cmd = new_cmd;
+ }
+
+ remove(RESULT_FILE_NAME);
+
+ ret = resctrl_val(test, uparams, cmd, &param);
+ if (ret)
+ goto out;
+
+ ret = check_results(&param, span, n);
+ if (ret && (get_vendor() == ARCH_INTEL))
+ ksft_print_msg("Intel CMT may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+
+out:
+ cmt_test_cleanup();
+ free(span_str);
+
+ return ret;
+}
+
+static bool cmt_feature_check(const struct resctrl_test *test)
+{
+ return test_resource_feature_check(test) &&
+ resctrl_mon_feature_exists("L3_MON", "llc_occupancy");
+}
+
+struct resctrl_test cmt_test = {
+ .name = "CMT",
+ .resource = "L3",
+ .feature_check = cmt_feature_check,
+ .run_test = cmt_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/config b/tools/testing/selftests/resctrl/config
new file mode 100644
index 000000000000..8d9f2deb56ed
--- /dev/null
+++ b/tools/testing/selftests/resctrl/config
@@ -0,0 +1,2 @@
+CONFIG_X86_CPU_RESCTRL=y
+CONFIG_PROC_CPU_RESCTRL=y
diff --git a/tools/testing/selftests/resctrl/cqm_test.c b/tools/testing/selftests/resctrl/cqm_test.c
deleted file mode 100644
index c8756152bd61..000000000000
--- a/tools/testing/selftests/resctrl/cqm_test.c
+++ /dev/null
@@ -1,176 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Cache Monitoring Technology (CQM) test
- *
- * Copyright (C) 2018 Intel Corporation
- *
- * Authors:
- * Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
- * Fenghua Yu <fenghua.yu@intel.com>
- */
-#include "resctrl.h"
-#include <unistd.h>
-
-#define RESULT_FILE_NAME "result_cqm"
-#define NUM_OF_RUNS 5
-#define MAX_DIFF 2000000
-#define MAX_DIFF_PERCENT 15
-
-int count_of_bits;
-char cbm_mask[256];
-unsigned long long_mask;
-unsigned long cache_size;
-
-static int cqm_setup(int num, ...)
-{
- struct resctrl_val_param *p;
- va_list param;
-
- va_start(param, num);
- p = va_arg(param, struct resctrl_val_param *);
- va_end(param);
-
- /* Run NUM_OF_RUNS times */
- if (p->num_of_runs >= NUM_OF_RUNS)
- return -1;
-
- p->num_of_runs++;
-
- return 0;
-}
-
-static void show_cache_info(unsigned long sum_llc_occu_resc, int no_of_bits,
- unsigned long span)
-{
- unsigned long avg_llc_occu_resc = 0;
- float diff_percent;
- long avg_diff = 0;
- bool res;
-
- avg_llc_occu_resc = sum_llc_occu_resc / (NUM_OF_RUNS - 1);
- avg_diff = (long)abs(span - avg_llc_occu_resc);
-
- diff_percent = (((float)span - avg_llc_occu_resc) / span) * 100;
-
- if ((abs((int)diff_percent) <= MAX_DIFF_PERCENT) ||
- (abs(avg_diff) <= MAX_DIFF))
- res = true;
- else
- res = false;
-
- printf("%sok CQM: diff within %d, %d\%%\n", res ? "" : "not",
- MAX_DIFF, (int)MAX_DIFF_PERCENT);
-
- printf("# diff: %ld\n", avg_diff);
- printf("# percent diff=%d\n", abs((int)diff_percent));
- printf("# Results are displayed in (Bytes)\n");
- printf("# Number of bits: %d\n", no_of_bits);
- printf("# Avg_llc_occu_resc: %lu\n", avg_llc_occu_resc);
- printf("# llc_occu_exp (span): %lu\n", span);
-
- tests_run++;
-}
-
-static int check_results(struct resctrl_val_param *param, int no_of_bits)
-{
- char *token_array[8], temp[512];
- unsigned long sum_llc_occu_resc = 0;
- int runs = 0;
- FILE *fp;
-
- printf("# checking for pass/fail\n");
- fp = fopen(param->filename, "r");
- if (!fp) {
- perror("# Error in opening file\n");
-
- return errno;
- }
-
- while (fgets(temp, 1024, fp)) {
- char *token = strtok(temp, ":\t");
- int fields = 0;
-
- while (token) {
- token_array[fields++] = token;
- token = strtok(NULL, ":\t");
- }
-
- /* Field 3 is llc occ resc value */
- if (runs > 0)
- sum_llc_occu_resc += strtoul(token_array[3], NULL, 0);
- runs++;
- }
- fclose(fp);
- show_cache_info(sum_llc_occu_resc, no_of_bits, param->span);
-
- return 0;
-}
-
-void cqm_test_cleanup(void)
-{
- remove(RESULT_FILE_NAME);
-}
-
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd)
-{
- int ret, mum_resctrlfs;
-
- cache_size = 0;
- mum_resctrlfs = 1;
-
- ret = remount_resctrlfs(mum_resctrlfs);
- if (ret)
- return ret;
-
- if (!validate_resctrl_feature_request("cqm"))
- return -1;
-
- ret = get_cbm_mask("L3");
- if (ret)
- return ret;
-
- long_mask = strtoul(cbm_mask, NULL, 16);
-
- ret = get_cache_size(cpu_no, "L3", &cache_size);
- if (ret)
- return ret;
- printf("cache size :%lu\n", cache_size);
-
- count_of_bits = count_bits(long_mask);
-
- if (n < 1 || n > count_of_bits) {
- printf("Invalid input value for numbr_of_bits n!\n");
- printf("Please Enter value in range 1 to %d\n", count_of_bits);
- return -1;
- }
-
- struct resctrl_val_param param = {
- .resctrl_val = "cqm",
- .ctrlgrp = "c1",
- .mongrp = "m1",
- .cpu_no = cpu_no,
- .mum_resctrlfs = 0,
- .filename = RESULT_FILE_NAME,
- .mask = ~(long_mask << n) & long_mask,
- .span = cache_size * n / count_of_bits,
- .num_of_runs = 0,
- .setup = cqm_setup,
- };
-
- if (strcmp(benchmark_cmd[0], "fill_buf") == 0)
- sprintf(benchmark_cmd[1], "%lu", param.span);
-
- remove(RESULT_FILE_NAME);
-
- ret = resctrl_val(benchmark_cmd, &param);
- if (ret)
- return ret;
-
- ret = check_results(&param, n);
- if (ret)
- return ret;
-
- cqm_test_cleanup();
-
- return 0;
-}
diff --git a/tools/testing/selftests/resctrl/fill_buf.c b/tools/testing/selftests/resctrl/fill_buf.c
index 79c611c99a3d..ae120f1735c0 100644
--- a/tools/testing/selftests/resctrl/fill_buf.c
+++ b/tools/testing/selftests/resctrl/fill_buf.c
@@ -14,7 +14,6 @@
#include <sys/types.h>
#include <sys/wait.h>
#include <inttypes.h>
-#include <malloc.h>
#include <string.h>
#include "resctrl.h"
@@ -23,8 +22,6 @@
#define PAGE_SIZE (4 * 1024)
#define MB (1024 * 1024)
-static unsigned char *startptr;
-
static void sb(void)
{
#if defined(__i386) || defined(__x86_64)
@@ -33,14 +30,6 @@ static void sb(void)
#endif
}
-static void ctrl_handler(int signo)
-{
- free(startptr);
- printf("\nEnding\n");
- sb();
- exit(EXIT_SUCCESS);
-}
-
static void cl_flush(void *p)
{
#if defined(__i386) || defined(__x86_64)
@@ -49,165 +38,132 @@ static void cl_flush(void *p)
#endif
}
-static void mem_flush(void *p, size_t s)
+void mem_flush(unsigned char *buf, size_t buf_size)
{
- char *cp = (char *)p;
+ unsigned char *cp = buf;
size_t i = 0;
- s = s / CL_SIZE; /* mem size in cache llines */
+ buf_size = buf_size / CL_SIZE; /* mem size in cache lines */
- for (i = 0; i < s; i++)
+ for (i = 0; i < buf_size; i++)
cl_flush(&cp[i * CL_SIZE]);
sb();
}
-static void *malloc_and_init_memory(size_t s)
-{
- uint64_t *p64;
- size_t s64;
-
- void *p = memalign(PAGE_SIZE, s);
-
- p64 = (uint64_t *)p;
- s64 = s / sizeof(uint64_t);
-
- while (s64 > 0) {
- *p64 = (uint64_t)rand();
- p64 += (CL_SIZE / sizeof(uint64_t));
- s64 -= (CL_SIZE / sizeof(uint64_t));
- }
-
- return p;
-}
+/*
+ * Buffer index step advance to workaround HW prefetching interfering with
+ * the measurements.
+ *
+ * Must be a prime to step through all indexes of the buffer.
+ *
+ * Some primes work better than others on some architectures (from MBA/MBM
+ * result stability point of view).
+ */
+#define FILL_IDX_MULT 23
-static int fill_one_span_read(unsigned char *start_ptr, unsigned char *end_ptr)
+static int fill_one_span_read(unsigned char *buf, size_t buf_size)
{
- unsigned char sum, *p;
+ unsigned int size = buf_size / (CL_SIZE / 2);
+ unsigned int i, idx = 0;
+ unsigned char sum = 0;
- sum = 0;
- p = start_ptr;
- while (p < end_ptr) {
- sum += *p;
- p += (CL_SIZE / 2);
+ /*
+ * Read the buffer in an order that is unexpected by HW prefetching
+ * optimizations to prevent them interfering with the caching pattern.
+ *
+ * The read order is (in terms of halves of cachelines):
+ * i * FILL_IDX_MULT % size
+ * The formula is open-coded below to avoiding modulo inside the loop
+ * as it improves MBA/MBM result stability on some architectures.
+ */
+ for (i = 0; i < size; i++) {
+ sum += buf[idx * (CL_SIZE / 2)];
+
+ idx += FILL_IDX_MULT;
+ while (idx >= size)
+ idx -= size;
}
return sum;
}
-static
-void fill_one_span_write(unsigned char *start_ptr, unsigned char *end_ptr)
+static void fill_one_span_write(unsigned char *buf, size_t buf_size)
{
+ unsigned char *end_ptr = buf + buf_size;
unsigned char *p;
- p = start_ptr;
+ p = buf;
while (p < end_ptr) {
*p = '1';
p += (CL_SIZE / 2);
}
}
-static int fill_cache_read(unsigned char *start_ptr, unsigned char *end_ptr,
- char *resctrl_val)
+void fill_cache_read(unsigned char *buf, size_t buf_size, bool once)
{
int ret = 0;
- FILE *fp;
while (1) {
- ret = fill_one_span_read(start_ptr, end_ptr);
- if (!strcmp(resctrl_val, "cat"))
+ ret = fill_one_span_read(buf, buf_size);
+ if (once)
break;
}
/* Consume read result so that reading memory is not optimized out. */
- fp = fopen("/dev/null", "w");
- if (!fp)
- perror("Unable to write to /dev/null");
- fprintf(fp, "Sum: %d ", ret);
- fclose(fp);
-
- return 0;
+ *value_sink = ret;
}
-static int fill_cache_write(unsigned char *start_ptr, unsigned char *end_ptr,
- char *resctrl_val)
+static void fill_cache_write(unsigned char *buf, size_t buf_size, bool once)
{
while (1) {
- fill_one_span_write(start_ptr, end_ptr);
- if (!strcmp(resctrl_val, "cat"))
+ fill_one_span_write(buf, buf_size);
+ if (once)
break;
}
-
- return 0;
}
-static int
-fill_cache(unsigned long long buf_size, int malloc_and_init, int memflush,
- int op, char *resctrl_val)
+unsigned char *alloc_buffer(size_t buf_size, int memflush)
{
- unsigned char *start_ptr, *end_ptr;
- unsigned long long i;
+ void *buf = NULL;
+ uint64_t *p64;
+ size_t s64;
int ret;
- if (malloc_and_init)
- start_ptr = malloc_and_init_memory(buf_size);
- else
- start_ptr = malloc(buf_size);
-
- if (!start_ptr)
- return -1;
+ ret = posix_memalign(&buf, PAGE_SIZE, buf_size);
+ if (ret < 0)
+ return NULL;
- startptr = start_ptr;
- end_ptr = start_ptr + buf_size;
+ /* Initialize the buffer */
+ p64 = buf;
+ s64 = buf_size / sizeof(uint64_t);
- /*
- * It's better to touch the memory once to avoid any compiler
- * optimizations
- */
- if (!malloc_and_init) {
- for (i = 0; i < buf_size; i++)
- *start_ptr++ = (unsigned char)rand();
+ while (s64 > 0) {
+ *p64 = (uint64_t)rand();
+ p64 += (CL_SIZE / sizeof(uint64_t));
+ s64 -= (CL_SIZE / sizeof(uint64_t));
}
- start_ptr = startptr;
-
/* Flush the memory before using to avoid "cache hot pages" effect */
if (memflush)
- mem_flush(start_ptr, buf_size);
+ mem_flush(buf, buf_size);
- if (op == 0)
- ret = fill_cache_read(start_ptr, end_ptr, resctrl_val);
- else
- ret = fill_cache_write(start_ptr, end_ptr, resctrl_val);
-
- if (ret) {
- printf("\n Error in fill cache read/write...\n");
- return -1;
- }
-
- free(startptr);
-
- return 0;
+ return buf;
}
-int run_fill_buf(unsigned long span, int malloc_and_init_memory,
- int memflush, int op, char *resctrl_val)
+int run_fill_buf(size_t buf_size, int memflush, int op, bool once)
{
- unsigned long long cache_size = span;
- int ret;
+ unsigned char *buf;
- /* set up ctrl-c handler */
- if (signal(SIGINT, ctrl_handler) == SIG_ERR)
- printf("Failed to catch SIGINT!\n");
- if (signal(SIGHUP, ctrl_handler) == SIG_ERR)
- printf("Failed to catch SIGHUP!\n");
-
- ret = fill_cache(cache_size, malloc_and_init_memory, memflush, op,
- resctrl_val);
- if (ret) {
- printf("\n Error in fill cache\n");
+ buf = alloc_buffer(buf_size, memflush);
+ if (!buf)
return -1;
- }
+
+ if (op == 0)
+ fill_cache_read(buf, buf_size, once);
+ else
+ fill_cache_write(buf, buf_size, once);
+ free(buf);
return 0;
}
diff --git a/tools/testing/selftests/resctrl/mba_test.c b/tools/testing/selftests/resctrl/mba_test.c
index 7bf8eaa6204b..7946e32e85c8 100644
--- a/tools/testing/selftests/resctrl/mba_test.c
+++ b/tools/testing/selftests/resctrl/mba_test.c
@@ -12,7 +12,7 @@
#define RESULT_FILE_NAME "result_mba"
#define NUM_OF_RUNS 5
-#define MAX_DIFF 300
+#define MAX_DIFF_PERCENT 8
#define ALLOCATION_MAX 100
#define ALLOCATION_MIN 10
#define ALLOCATION_STEP 10
@@ -22,16 +22,13 @@
* con_mon grp, mon_grp in resctrl FS.
* For each allocation, run 5 times in order to get average values.
*/
-static int mba_setup(int num, ...)
+static int mba_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
{
static int runs_per_allocation, allocation = 100;
- struct resctrl_val_param *p;
char allocation_str[64];
- va_list param;
-
- va_start(param, num);
- p = va_arg(param, struct resctrl_val_param *);
- va_end(param);
+ int ret;
if (runs_per_allocation >= NUM_OF_RUNS)
runs_per_allocation = 0;
@@ -41,28 +38,32 @@ static int mba_setup(int num, ...)
return 0;
if (allocation < ALLOCATION_MIN || allocation > ALLOCATION_MAX)
- return -1;
+ return END_OF_TESTS;
sprintf(allocation_str, "%d", allocation);
- write_schemata(p->ctrlgrp, allocation_str, p->cpu_no, p->resctrl_val);
+ ret = write_schemata(p->ctrlgrp, allocation_str, uparams->cpu, test->resource);
+ if (ret < 0)
+ return ret;
+
allocation -= ALLOCATION_STEP;
return 0;
}
-static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
+static bool show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
{
int allocation, runs;
- bool failed = false;
+ bool ret = false;
- printf("# Results are displayed in (MB)\n");
+ ksft_print_msg("Results are displayed in (MB)\n");
/* Memory bandwidth from 100% down to 10% */
for (allocation = 0; allocation < ALLOCATION_MAX / ALLOCATION_STEP;
allocation++) {
unsigned long avg_bw_imc, avg_bw_resc;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
- unsigned long avg_diff;
+ int avg_diff_per;
+ float avg_diff;
/*
* The first run is discarded due to inaccurate value from
@@ -76,23 +77,28 @@ static void show_mba_info(unsigned long *bw_imc, unsigned long *bw_resc)
avg_bw_imc = sum_bw_imc / (NUM_OF_RUNS - 1);
avg_bw_resc = sum_bw_resc / (NUM_OF_RUNS - 1);
- avg_diff = labs((long)(avg_bw_resc - avg_bw_imc));
-
- printf("%sok MBA schemata percentage %u smaller than %d %%\n",
- avg_diff > MAX_DIFF ? "not " : "",
- ALLOCATION_MAX - ALLOCATION_STEP * allocation,
- MAX_DIFF);
- tests_run++;
- printf("# avg_diff: %lu\n", avg_diff);
- printf("# avg_bw_imc: %lu\n", avg_bw_imc);
- printf("# avg_bw_resc: %lu\n", avg_bw_resc);
- if (avg_diff > MAX_DIFF)
- failed = true;
+ avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+ avg_diff_per = (int)(avg_diff * 100);
+
+ ksft_print_msg("%s Check MBA diff within %d%% for schemata %u\n",
+ avg_diff_per > MAX_DIFF_PERCENT ?
+ "Fail:" : "Pass:",
+ MAX_DIFF_PERCENT,
+ ALLOCATION_MAX - ALLOCATION_STEP * allocation);
+
+ ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+ ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+ ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+ if (avg_diff_per > MAX_DIFF_PERCENT)
+ ret = true;
}
- printf("%sok schemata change using MBA%s\n", failed ? "not " : "",
- failed ? " # at least one test failed" : "");
- tests_run++;
+ ksft_print_msg("%s Check schemata change using MBA\n",
+ ret ? "Fail:" : "Pass:");
+ if (ret)
+ ksft_print_msg("At least one test failed\n");
+
+ return ret;
}
static int check_results(void)
@@ -104,9 +110,9 @@ static int check_results(void)
fp = fopen(output, "r");
if (!fp) {
- perror(output);
+ ksft_perror(output);
- return errno;
+ return -1;
}
runs = 0;
@@ -128,9 +134,7 @@ static int check_results(void)
fclose(fp);
- show_mba_info(bw_imc, bw_resc);
-
- return 0;
+ return show_mba_info(bw_imc, bw_resc);
}
void mba_test_cleanup(void)
@@ -138,34 +142,42 @@ void mba_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd)
+static int mba_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
- .resctrl_val = "mba",
+ .resctrl_val = MBA_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .cpu_no = cpu_no,
- .mum_resctrlfs = 1,
.filename = RESULT_FILE_NAME,
- .bw_report = bw_report,
+ .bw_report = "reads",
.setup = mba_setup
};
int ret;
remove(RESULT_FILE_NAME);
- if (!validate_resctrl_feature_request("mba"))
- return -1;
-
- ret = resctrl_val(benchmark_cmd, &param);
+ ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- return ret;
+ goto out;
ret = check_results();
- if (ret)
- return ret;
+out:
mba_test_cleanup();
- return 0;
+ return ret;
}
+
+static bool mba_feature_check(const struct resctrl_test *test)
+{
+ return test_resource_feature_check(test) &&
+ resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes");
+}
+
+struct resctrl_test mba_test = {
+ .name = "MBA",
+ .resource = "MB",
+ .vendor_specific = ARCH_INTEL,
+ .feature_check = mba_feature_check,
+ .run_test = mba_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/mbm_test.c b/tools/testing/selftests/resctrl/mbm_test.c
index 4700f7453f81..d67ffa3ec63a 100644
--- a/tools/testing/selftests/resctrl/mbm_test.c
+++ b/tools/testing/selftests/resctrl/mbm_test.c
@@ -11,16 +11,16 @@
#include "resctrl.h"
#define RESULT_FILE_NAME "result_mbm"
-#define MAX_DIFF 300
+#define MAX_DIFF_PERCENT 8
#define NUM_OF_RUNS 5
-static void
-show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
+static int
+show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, size_t span)
{
unsigned long avg_bw_imc = 0, avg_bw_resc = 0;
unsigned long sum_bw_imc = 0, sum_bw_resc = 0;
- long avg_diff = 0;
- int runs;
+ int runs, ret, avg_diff_per;
+ float avg_diff = 0;
/*
* Discard the first value which is inaccurate due to monitoring setup
@@ -33,32 +33,35 @@ show_bw_info(unsigned long *bw_imc, unsigned long *bw_resc, int span)
avg_bw_imc = sum_bw_imc / 4;
avg_bw_resc = sum_bw_resc / 4;
- avg_diff = avg_bw_resc - avg_bw_imc;
-
- printf("%sok MBM: diff within %d%%\n",
- labs(avg_diff) > MAX_DIFF ? "not " : "", MAX_DIFF);
- tests_run++;
- printf("# avg_diff: %lu\n", labs(avg_diff));
- printf("# Span (MB): %d\n", span);
- printf("# avg_bw_imc: %lu\n", avg_bw_imc);
- printf("# avg_bw_resc: %lu\n", avg_bw_resc);
+ avg_diff = (float)labs(avg_bw_resc - avg_bw_imc) / avg_bw_imc;
+ avg_diff_per = (int)(avg_diff * 100);
+
+ ret = avg_diff_per > MAX_DIFF_PERCENT;
+ ksft_print_msg("%s Check MBM diff within %d%%\n",
+ ret ? "Fail:" : "Pass:", MAX_DIFF_PERCENT);
+ ksft_print_msg("avg_diff_per: %d%%\n", avg_diff_per);
+ ksft_print_msg("Span (MB): %zu\n", span / MB);
+ ksft_print_msg("avg_bw_imc: %lu\n", avg_bw_imc);
+ ksft_print_msg("avg_bw_resc: %lu\n", avg_bw_resc);
+
+ return ret;
}
-static int check_results(int span)
+static int check_results(size_t span)
{
unsigned long bw_imc[NUM_OF_RUNS], bw_resc[NUM_OF_RUNS];
char temp[1024], *token_array[8];
char output[] = RESULT_FILE_NAME;
- int runs;
+ int runs, ret;
FILE *fp;
- printf("# Checking for pass/fail\n");
+ ksft_print_msg("Checking for pass/fail\n");
fp = fopen(output, "r");
if (!fp) {
- perror(output);
+ ksft_perror(output);
- return errno;
+ return -1;
}
runs = 0;
@@ -76,32 +79,28 @@ static int check_results(int span)
runs++;
}
- show_bw_info(bw_imc, bw_resc, span);
+ ret = show_bw_info(bw_imc, bw_resc, span);
fclose(fp);
- return 0;
+ return ret;
}
-static int mbm_setup(int num, ...)
+static int mbm_setup(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *p)
{
- struct resctrl_val_param *p;
- static int num_of_runs;
- va_list param;
int ret = 0;
/* Run NUM_OF_RUNS times */
- if (num_of_runs++ >= NUM_OF_RUNS)
- return -1;
-
- va_start(param, num);
- p = va_arg(param, struct resctrl_val_param *);
- va_end(param);
+ if (p->num_of_runs >= NUM_OF_RUNS)
+ return END_OF_TESTS;
/* Set up shemata with 100% allocation on the first run. */
- if (num_of_runs == 0)
- ret = write_schemata(p->ctrlgrp, "100", p->cpu_no,
- p->resctrl_val);
+ if (p->num_of_runs == 0 && resctrl_resource_exists("MB"))
+ ret = write_schemata(p->ctrlgrp, "100", uparams->cpu, test->resource);
+
+ p->num_of_runs++;
return ret;
}
@@ -111,35 +110,44 @@ void mbm_test_cleanup(void)
remove(RESULT_FILE_NAME);
}
-int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd)
+static int mbm_run_test(const struct resctrl_test *test, const struct user_params *uparams)
{
struct resctrl_val_param param = {
- .resctrl_val = "mbm",
+ .resctrl_val = MBM_STR,
.ctrlgrp = "c1",
.mongrp = "m1",
- .span = span,
- .cpu_no = cpu_no,
- .mum_resctrlfs = 1,
.filename = RESULT_FILE_NAME,
- .bw_report = bw_report,
+ .bw_report = "reads",
.setup = mbm_setup
};
int ret;
remove(RESULT_FILE_NAME);
- if (!validate_resctrl_feature_request("mbm"))
- return -1;
-
- ret = resctrl_val(benchmark_cmd, &param);
+ ret = resctrl_val(test, uparams, uparams->benchmark_cmd, &param);
if (ret)
- return ret;
+ goto out;
- ret = check_results(span);
- if (ret)
- return ret;
+ ret = check_results(DEFAULT_SPAN);
+ if (ret && (get_vendor() == ARCH_INTEL))
+ ksft_print_msg("Intel MBM may be inaccurate when Sub-NUMA Clustering is enabled. Check BIOS configuration.\n");
+out:
mbm_test_cleanup();
- return 0;
+ return ret;
+}
+
+static bool mbm_feature_check(const struct resctrl_test *test)
+{
+ return resctrl_mon_feature_exists("L3_MON", "mbm_total_bytes") &&
+ resctrl_mon_feature_exists("L3_MON", "mbm_local_bytes");
}
+
+struct resctrl_test mbm_test = {
+ .name = "MBM",
+ .resource = "MB",
+ .vendor_specific = ARCH_INTEL,
+ .feature_check = mbm_feature_check,
+ .run_test = mbm_run_test,
+};
diff --git a/tools/testing/selftests/resctrl/resctrl.h b/tools/testing/selftests/resctrl/resctrl.h
index 39bf59c6b9c5..2051bd135e0d 100644
--- a/tools/testing/selftests/resctrl/resctrl.h
+++ b/tools/testing/selftests/resctrl/resctrl.h
@@ -1,9 +1,7 @@
/* SPDX-License-Identifier: GPL-2.0 */
-#define _GNU_SOURCE
#ifndef RESCTRL_H
#define RESCTRL_H
#include <stdio.h>
-#include <stdarg.h>
#include <math.h>
#include <errno.h>
#include <sched.h>
@@ -23,27 +21,74 @@
#include <sys/eventfd.h>
#include <asm/unistd.h>
#include <linux/perf_event.h>
+#include "../kselftest.h"
#define MB (1024 * 1024)
#define RESCTRL_PATH "/sys/fs/resctrl"
#define PHYS_ID_PATH "/sys/devices/system/cpu/cpu"
-#define CBM_MASK_PATH "/sys/fs/resctrl/info"
+#define INFO_PATH "/sys/fs/resctrl/info"
-#define PARENT_EXIT(err_msg) \
+/*
+ * CPU vendor IDs
+ *
+ * Define as bits because they're used for vendor_specific bitmask in
+ * the struct resctrl_test.
+ */
+#define ARCH_INTEL 1
+#define ARCH_AMD 2
+
+#define END_OF_TESTS 1
+
+#define BENCHMARK_ARGS 64
+
+#define DEFAULT_SPAN (250 * MB)
+
+#define PARENT_EXIT() \
do { \
- perror(err_msg); \
kill(ppid, SIGKILL); \
+ umount_resctrlfs(); \
exit(EXIT_FAILURE); \
} while (0)
/*
+ * user_params: User supplied parameters
+ * @cpu: CPU number to which the benchmark will be bound to
+ * @bits: Number of bits used for cache allocation size
+ * @benchmark_cmd: Benchmark command to run during (some of the) tests
+ */
+struct user_params {
+ int cpu;
+ int bits;
+ const char *benchmark_cmd[BENCHMARK_ARGS];
+};
+
+/*
+ * resctrl_test: resctrl test definition
+ * @name: Test name
+ * @group: Test group - a common name for tests that share some characteristic
+ * (e.g., L3 CAT test belongs to the CAT group). Can be NULL
+ * @resource: Resource to test (e.g., MB, L3, L2, etc.)
+ * @vendor_specific: Bitmask for vendor-specific tests (can be 0 for universal tests)
+ * @disabled: Test is disabled
+ * @feature_check: Callback to check required resctrl features
+ * @run_test: Callback to run the test
+ */
+struct resctrl_test {
+ const char *name;
+ const char *group;
+ const char *resource;
+ unsigned int vendor_specific;
+ bool disabled;
+ bool (*feature_check)(const struct resctrl_test *test);
+ int (*run_test)(const struct resctrl_test *test,
+ const struct user_params *uparams);
+};
+
+/*
* resctrl_val_param: resctrl test parameters
* @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
- * @cpu_no: CPU number to which the benchmark would be binded
- * @span: Memory bytes accessed in each benchmark iteration
- * @mum_resctrlfs: Should the resctrl FS be remounted?
* @filename: Name of file to which the o/p should be written
* @bw_report: Bandwidth report type (reads vs writes)
* @setup: Call back function to setup test environment
@@ -52,56 +97,119 @@ struct resctrl_val_param {
char *resctrl_val;
char ctrlgrp[64];
char mongrp[64];
- int cpu_no;
- unsigned long span;
- int mum_resctrlfs;
char filename[64];
char *bw_report;
unsigned long mask;
int num_of_runs;
- int (*setup)(int num, ...);
+ int (*setup)(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ struct resctrl_val_param *param);
};
-pid_t bm_pid, ppid;
-int tests_run;
+struct perf_event_read {
+ __u64 nr; /* The number of events */
+ struct {
+ __u64 value; /* The value of the event */
+ } values[2];
+};
+
+#define MBM_STR "mbm"
+#define MBA_STR "mba"
+#define CMT_STR "cmt"
+#define CAT_STR "cat"
-char llc_occup_path[1024];
-bool is_amd;
+/*
+ * Memory location that consumes values compiler must not optimize away.
+ * Volatile ensures writes to this location cannot be optimized away by
+ * compiler.
+ */
+extern volatile int *value_sink;
+extern pid_t bm_pid, ppid;
+
+extern char llc_occup_path[1024];
+
+int get_vendor(void);
bool check_resctrlfs_support(void);
int filter_dmesg(void);
-int remount_resctrlfs(bool mum_resctrlfs);
-int get_resource_id(int cpu_no, int *resource_id);
+int get_domain_id(const char *resource, int cpu_no, int *domain_id);
+int mount_resctrlfs(void);
int umount_resctrlfs(void);
int validate_bw_report_request(char *bw_report);
-bool validate_resctrl_feature_request(char *resctrl_val);
+bool resctrl_resource_exists(const char *resource);
+bool resctrl_mon_feature_exists(const char *resource, const char *feature);
+bool resource_info_file_exists(const char *resource, const char *file);
+bool test_resource_feature_check(const struct resctrl_test *test);
char *fgrep(FILE *inf, const char *str);
-int taskset_benchmark(pid_t bm_pid, int cpu_no);
-void run_benchmark(int signum, siginfo_t *info, void *ucontext);
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no,
- char *resctrl_val);
+int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity);
+int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity);
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource);
int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
char *resctrl_val);
int perf_event_open(struct perf_event_attr *hw_event, pid_t pid, int cpu,
int group_fd, unsigned long flags);
-int run_fill_buf(unsigned long span, int malloc_and_init_memory, int memflush,
- int op, char *resctrl_va);
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param);
-int mbm_bw_change(int span, int cpu_no, char *bw_report, char **benchmark_cmd);
+unsigned char *alloc_buffer(size_t buf_size, int memflush);
+void mem_flush(unsigned char *buf, size_t buf_size);
+void fill_cache_read(unsigned char *buf, size_t buf_size, bool once);
+int run_fill_buf(size_t buf_size, int memflush, int op, bool once);
+int resctrl_val(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ const char * const *benchmark_cmd,
+ struct resctrl_val_param *param);
void tests_cleanup(void);
void mbm_test_cleanup(void);
-int mba_schemata_change(int cpu_no, char *bw_report, char **benchmark_cmd);
void mba_test_cleanup(void);
-int get_cbm_mask(char *cache_type);
-int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size);
+unsigned long create_bit_mask(unsigned int start, unsigned int len);
+unsigned int count_contiguous_bits(unsigned long val, unsigned int *start);
+int get_full_cbm(const char *cache_type, unsigned long *mask);
+int get_mask_no_shareable(const char *cache_type, unsigned long *mask);
+int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size);
+int resource_info_unsigned_get(const char *resource, const char *filename, unsigned int *val);
void ctrlc_handler(int signum, siginfo_t *info, void *ptr);
-int cat_val(struct resctrl_val_param *param);
+int signal_handler_register(void);
+void signal_handler_unregister(void);
void cat_test_cleanup(void);
-int cat_perf_miss_val(int cpu_no, int no_of_bits, char *cache_type);
-int cqm_resctrl_val(int cpu_no, int n, char **benchmark_cmd);
unsigned int count_bits(unsigned long n);
-void cqm_test_cleanup(void);
-int get_core_sibling(int cpu_no);
-int measure_cache_vals(struct resctrl_val_param *param, int bm_pid);
+void cmt_test_cleanup(void);
+
+void perf_event_attr_initialize(struct perf_event_attr *pea, __u64 config);
+void perf_event_initialize_read_format(struct perf_event_read *pe_read);
+int perf_open(struct perf_event_attr *pea, pid_t pid, int cpu_no);
+int perf_event_reset_enable(int pe_fd);
+int perf_event_measure(int pe_fd, struct perf_event_read *pe_read,
+ const char *filename, int bm_pid);
+int measure_llc_resctrl(const char *filename, int bm_pid);
+void show_cache_info(int no_of_bits, __u64 avg_llc_val, size_t cache_span, bool lines);
+
+/*
+ * cache_portion_size - Calculate the size of a cache portion
+ * @cache_size: Total cache size in bytes
+ * @portion_mask: Cache portion mask
+ * @full_cache_mask: Full Cache Bit Mask (CBM) for the cache
+ *
+ * Return: The size of the cache portion in bytes.
+ */
+static inline unsigned long cache_portion_size(unsigned long cache_size,
+ unsigned long portion_mask,
+ unsigned long full_cache_mask)
+{
+ unsigned int bits = count_bits(full_cache_mask);
+
+ /*
+ * With no bits the full CBM, assume cache cannot be split into
+ * smaller portions. To avoid divide by zero, return cache_size.
+ */
+ if (!bits)
+ return cache_size;
+
+ return cache_size * count_bits(portion_mask) / bits;
+}
+
+extern struct resctrl_test mbm_test;
+extern struct resctrl_test mba_test;
+extern struct resctrl_test cmt_test;
+extern struct resctrl_test l3_cat_test;
+extern struct resctrl_test l3_noncont_cat_test;
+extern struct resctrl_test l2_noncont_cat_test;
#endif /* RESCTRL_H */
diff --git a/tools/testing/selftests/resctrl/resctrl_tests.c b/tools/testing/selftests/resctrl/resctrl_tests.c
index 425cc85ac883..f3dc1b9696e7 100644
--- a/tools/testing/selftests/resctrl/resctrl_tests.c
+++ b/tools/testing/selftests/resctrl/resctrl_tests.c
@@ -10,37 +10,72 @@
*/
#include "resctrl.h"
-#define BENCHMARK_ARGS 64
-#define BENCHMARK_ARG_SIZE 64
+/* Volatile memory sink to prevent compiler optimizations */
+static volatile int sink_target;
+volatile int *value_sink = &sink_target;
-bool is_amd;
+static struct resctrl_test *resctrl_tests[] = {
+ &mbm_test,
+ &mba_test,
+ &cmt_test,
+ &l3_cat_test,
+ &l3_noncont_cat_test,
+ &l2_noncont_cat_test,
+};
-void detect_amd(void)
+static int detect_vendor(void)
{
FILE *inf = fopen("/proc/cpuinfo", "r");
+ int vendor_id = 0;
+ char *s = NULL;
char *res;
if (!inf)
- return;
+ return vendor_id;
res = fgrep(inf, "vendor_id");
- if (res) {
- char *s = strchr(res, ':');
+ if (res)
+ s = strchr(res, ':');
+
+ if (s && !strcmp(s, ": GenuineIntel\n"))
+ vendor_id = ARCH_INTEL;
+ else if (s && !strcmp(s, ": AuthenticAMD\n"))
+ vendor_id = ARCH_AMD;
- is_amd = s && !strcmp(s, ": AuthenticAMD\n");
- free(res);
- }
fclose(inf);
+ free(res);
+ return vendor_id;
+}
+
+int get_vendor(void)
+{
+ static int vendor = -1;
+
+ if (vendor == -1)
+ vendor = detect_vendor();
+ if (vendor == 0)
+ ksft_print_msg("Can not get vendor info...\n");
+
+ return vendor;
}
static void cmd_help(void)
{
- printf("usage: resctrl_tests [-h] [-b \"benchmark_cmd [options]\"] [-t test list] [-n no_of_bits]\n");
- printf("\t-b benchmark_cmd [options]: run specified benchmark for MBM, MBA and CQM");
- printf("\t default benchmark is builtin fill_buf\n");
- printf("\t-t test list: run tests specified in the test list, ");
- printf("e.g. -t mbm, mba, cqm, cat\n");
+ int i;
+
+ printf("usage: resctrl_tests [-h] [-t test list] [-n no_of_bits] [-b benchmark_cmd [option]...]\n");
+ printf("\t-b benchmark_cmd [option]...: run specified benchmark for MBM, MBA and CMT\n");
+ printf("\t default benchmark is builtin fill_buf\n");
+ printf("\t-t test list: run tests/groups specified by the list, ");
+ printf("e.g. -t mbm,mba,cmt,cat\n");
+ printf("\t\tSupported tests (group):\n");
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) {
+ if (resctrl_tests[i]->group)
+ printf("\t\t\t%s (%s)\n", resctrl_tests[i]->name, resctrl_tests[i]->group);
+ else
+ printf("\t\t\t%s\n", resctrl_tests[i]->name);
+ }
printf("\t-n no_of_bits: run cache tests using specified no of bits in cache bit mask\n");
printf("\t-p cpu_no: specify CPU number to run the test. 1 is default\n");
printf("\t-h: help\n");
@@ -50,62 +85,153 @@ void tests_cleanup(void)
{
mbm_test_cleanup();
mba_test_cleanup();
- cqm_test_cleanup();
+ cmt_test_cleanup();
cat_test_cleanup();
}
-int main(int argc, char **argv)
+static int test_prepare(void)
{
- bool has_ben = false, mbm_test = true, mba_test = true, cqm_test = true;
- int res, c, cpu_no = 1, span = 250, argc_new = argc, i, no_of_bits = 5;
- char *benchmark_cmd[BENCHMARK_ARGS], bw_report[64], bm_type[64];
- char benchmark_cmd_area[BENCHMARK_ARGS][BENCHMARK_ARG_SIZE];
- int ben_ind, ben_count;
- bool cat_test = true;
-
- for (i = 0; i < argc; i++) {
- if (strcmp(argv[i], "-b") == 0) {
- ben_ind = i + 1;
- ben_count = argc - ben_ind;
- argc_new = ben_ind - 1;
- has_ben = true;
- break;
- }
+ int res;
+
+ res = signal_handler_register();
+ if (res) {
+ ksft_print_msg("Failed to register signal handler\n");
+ return res;
+ }
+
+ res = mount_resctrlfs();
+ if (res) {
+ signal_handler_unregister();
+ ksft_print_msg("Failed to mount resctrl FS\n");
+ return res;
+ }
+ return 0;
+}
+
+static void test_cleanup(void)
+{
+ umount_resctrlfs();
+ signal_handler_unregister();
+}
+
+static bool test_vendor_specific_check(const struct resctrl_test *test)
+{
+ if (!test->vendor_specific)
+ return true;
+
+ return get_vendor() & test->vendor_specific;
+}
+
+static void run_single_test(const struct resctrl_test *test, const struct user_params *uparams)
+{
+ int ret;
+
+ if (test->disabled)
+ return;
+
+ if (!test_vendor_specific_check(test)) {
+ ksft_test_result_skip("Hardware does not support %s\n", test->name);
+ return;
}
- while ((c = getopt(argc_new, argv, "ht:b:")) != -1) {
+ ksft_print_msg("Starting %s test ...\n", test->name);
+
+ if (test_prepare()) {
+ ksft_exit_fail_msg("Abnormal failure when preparing for the test\n");
+ return;
+ }
+
+ if (!test->feature_check(test)) {
+ ksft_test_result_skip("Hardware does not support %s or %s is disabled\n",
+ test->name, test->name);
+ goto cleanup;
+ }
+
+ ret = test->run_test(test, uparams);
+ ksft_test_result(!ret, "%s: test\n", test->name);
+
+cleanup:
+ test_cleanup();
+}
+
+static void init_user_params(struct user_params *uparams)
+{
+ memset(uparams, 0, sizeof(*uparams));
+
+ uparams->cpu = 1;
+ uparams->bits = 0;
+}
+
+int main(int argc, char **argv)
+{
+ int tests = ARRAY_SIZE(resctrl_tests);
+ bool test_param_seen = false;
+ struct user_params uparams;
+ char *span_str = NULL;
+ int ret, c, i;
+
+ init_user_params(&uparams);
+
+ while ((c = getopt(argc, argv, "ht:b:n:p:")) != -1) {
char *token;
switch (c) {
+ case 'b':
+ /*
+ * First move optind back to the (first) optarg and
+ * then build the benchmark command using the
+ * remaining arguments.
+ */
+ optind--;
+ if (argc - optind >= BENCHMARK_ARGS)
+ ksft_exit_fail_msg("Too long benchmark command");
+
+ /* Extract benchmark command from command line. */
+ for (i = 0; i < argc - optind; i++)
+ uparams.benchmark_cmd[i] = argv[i + optind];
+ uparams.benchmark_cmd[i] = NULL;
+
+ goto last_arg;
case 't':
token = strtok(optarg, ",");
- mbm_test = false;
- mba_test = false;
- cqm_test = false;
- cat_test = false;
+ if (!test_param_seen) {
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++)
+ resctrl_tests[i]->disabled = true;
+ tests = 0;
+ test_param_seen = true;
+ }
while (token) {
- if (!strcmp(token, "mbm")) {
- mbm_test = true;
- } else if (!strcmp(token, "mba")) {
- mba_test = true;
- } else if (!strcmp(token, "cqm")) {
- cqm_test = true;
- } else if (!strcmp(token, "cat")) {
- cat_test = true;
- } else {
- printf("invalid argument\n");
+ bool found = false;
+
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++) {
+ if (!strcasecmp(token, resctrl_tests[i]->name) ||
+ (resctrl_tests[i]->group &&
+ !strcasecmp(token, resctrl_tests[i]->group))) {
+ if (resctrl_tests[i]->disabled)
+ tests++;
+ resctrl_tests[i]->disabled = false;
+ found = true;
+ }
+ }
+
+ if (!found) {
+ printf("invalid test: %s\n", token);
return -1;
}
- token = strtok(NULL, ":\t");
+ token = strtok(NULL, ",");
}
break;
case 'p':
- cpu_no = atoi(optarg);
+ uparams.cpu = atoi(optarg);
break;
case 'n':
- no_of_bits = atoi(optarg);
+ uparams.bits = atoi(optarg);
+ if (uparams.bits <= 0) {
+ printf("Bail out! invalid argument for no_of_bits\n");
+ return -1;
+ }
break;
case 'h':
cmd_help();
@@ -117,8 +243,9 @@ int main(int argc, char **argv)
return -1;
}
}
+last_arg:
- printf("TAP version 13\n");
+ ksft_print_header();
/*
* Typically we need root privileges, because:
@@ -126,77 +253,34 @@ int main(int argc, char **argv)
* 2. We execute perf commands
*/
if (geteuid() != 0)
- printf("# WARNING: not running as root, tests may fail.\n");
+ return ksft_exit_skip("Not running as root. Skipping...\n");
- /* Detect AMD vendor */
- detect_amd();
+ if (!check_resctrlfs_support())
+ return ksft_exit_skip("resctrl FS does not exist. Enable X86_CPU_RESCTRL config option.\n");
- if (has_ben) {
- /* Extract benchmark command from command line. */
- for (i = ben_ind; i < argc; i++) {
- benchmark_cmd[i - ben_ind] = benchmark_cmd_area[i];
- sprintf(benchmark_cmd[i - ben_ind], "%s", argv[i]);
- }
- benchmark_cmd[ben_count] = NULL;
- } else {
- /* If no benchmark is given by "-b" argument, use fill_buf. */
- for (i = 0; i < 6; i++)
- benchmark_cmd[i] = benchmark_cmd_area[i];
-
- strcpy(benchmark_cmd[0], "fill_buf");
- sprintf(benchmark_cmd[1], "%d", span);
- strcpy(benchmark_cmd[2], "1");
- strcpy(benchmark_cmd[3], "1");
- strcpy(benchmark_cmd[4], "0");
- strcpy(benchmark_cmd[5], "");
- benchmark_cmd[6] = NULL;
- }
+ if (umount_resctrlfs())
+ return ksft_exit_skip("resctrl FS unmount failed.\n");
- sprintf(bw_report, "reads");
- sprintf(bm_type, "fill_buf");
-
- check_resctrlfs_support();
filter_dmesg();
- if (!is_amd && mbm_test) {
- printf("# Starting MBM BW change ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[5], "%s", "mba");
- res = mbm_bw_change(span, cpu_no, bw_report, benchmark_cmd);
- printf("%sok MBM: bw change\n", res ? "not " : "");
- mbm_test_cleanup();
- tests_run++;
- }
-
- if (!is_amd && mba_test) {
- printf("# Starting MBA Schemata change ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[1], "%d", span);
- res = mba_schemata_change(cpu_no, bw_report, benchmark_cmd);
- printf("%sok MBA: schemata change\n", res ? "not " : "");
- mba_test_cleanup();
- tests_run++;
- }
-
- if (cqm_test) {
- printf("# Starting CQM test ...\n");
- if (!has_ben)
- sprintf(benchmark_cmd[5], "%s", "cqm");
- res = cqm_resctrl_val(cpu_no, no_of_bits, benchmark_cmd);
- printf("%sok CQM: test\n", res ? "not " : "");
- cqm_test_cleanup();
- tests_run++;
+ if (!uparams.benchmark_cmd[0]) {
+ /* If no benchmark is given by "-b" argument, use fill_buf. */
+ uparams.benchmark_cmd[0] = "fill_buf";
+ ret = asprintf(&span_str, "%u", DEFAULT_SPAN);
+ if (ret < 0)
+ ksft_exit_fail_msg("Out of memory!\n");
+ uparams.benchmark_cmd[1] = span_str;
+ uparams.benchmark_cmd[2] = "1";
+ uparams.benchmark_cmd[3] = "0";
+ uparams.benchmark_cmd[4] = "false";
+ uparams.benchmark_cmd[5] = NULL;
}
- if (cat_test) {
- printf("# Starting CAT test ...\n");
- res = cat_perf_miss_val(cpu_no, no_of_bits, "L3");
- printf("%sok CAT: test\n", res ? "not " : "");
- tests_run++;
- cat_test_cleanup();
- }
+ ksft_set_plan(tests);
- printf("1..%d\n", tests_run);
+ for (i = 0; i < ARRAY_SIZE(resctrl_tests); i++)
+ run_single_test(resctrl_tests[i], &uparams);
- return 0;
+ free(span_str);
+ ksft_finished();
}
diff --git a/tools/testing/selftests/resctrl/resctrl_val.c b/tools/testing/selftests/resctrl/resctrl_val.c
index 520fea3606d1..5a49f07a6c85 100644
--- a/tools/testing/selftests/resctrl/resctrl_val.c
+++ b/tools/testing/selftests/resctrl/resctrl_val.c
@@ -156,12 +156,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_type, "%s%s", imc_dir, "type");
fp = fopen(imc_counter_type, "r");
if (!fp) {
- perror("Failed to open imc counter type file");
+ ksft_perror("Failed to open iMC counter type file");
return -1;
}
if (fscanf(fp, "%u", &imc_counters_config[count][READ].type) <= 0) {
- perror("Could not get imc type");
+ ksft_perror("Could not get iMC type");
fclose(fp);
return -1;
@@ -175,12 +175,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_cfg, "%s%s", imc_dir, READ_FILE_NAME);
fp = fopen(imc_counter_cfg, "r");
if (!fp) {
- perror("Failed to open imc config file");
+ ksft_perror("Failed to open iMC config file");
return -1;
}
if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
- perror("Could not get imc cas count read");
+ ksft_perror("Could not get iMC cas count read");
fclose(fp);
return -1;
@@ -193,12 +193,12 @@ static int read_from_imc_dir(char *imc_dir, int count)
sprintf(imc_counter_cfg, "%s%s", imc_dir, WRITE_FILE_NAME);
fp = fopen(imc_counter_cfg, "r");
if (!fp) {
- perror("Failed to open imc config file");
+ ksft_perror("Failed to open iMC config file");
return -1;
}
if (fscanf(fp, "%s", cas_count_cfg) <= 0) {
- perror("Could not get imc cas count write");
+ ksft_perror("Could not get iMC cas count write");
fclose(fp);
return -1;
@@ -221,8 +221,8 @@ static int read_from_imc_dir(char *imc_dir, int count)
*/
static int num_of_imcs(void)
{
+ char imc_dir[512], *temp;
unsigned int count = 0;
- char imc_dir[512];
struct dirent *ep;
int ret;
DIR *dp;
@@ -230,7 +230,25 @@ static int num_of_imcs(void)
dp = opendir(DYN_PMU_PATH);
if (dp) {
while ((ep = readdir(dp))) {
- if (strstr(ep->d_name, UNCORE_IMC)) {
+ temp = strstr(ep->d_name, UNCORE_IMC);
+ if (!temp)
+ continue;
+
+ /*
+ * imc counters are named as "uncore_imc_<n>", hence
+ * increment the pointer to point to <n>. Note that
+ * sizeof(UNCORE_IMC) would count for null character as
+ * well and hence the last underscore character in
+ * uncore_imc'_' need not be counted.
+ */
+ temp = temp + sizeof(UNCORE_IMC);
+
+ /*
+ * Some directories under "DYN_PMU_PATH" could have
+ * names like "uncore_imc_free_running", hence, check if
+ * first character is a numerical digit or not.
+ */
+ if (temp[0] >= '0' && temp[0] <= '9') {
sprintf(imc_dir, "%s/%s/", DYN_PMU_PATH,
ep->d_name);
ret = read_from_imc_dir(imc_dir, count);
@@ -244,12 +262,12 @@ static int num_of_imcs(void)
}
closedir(dp);
if (count == 0) {
- perror("Unable find iMC counters!\n");
+ ksft_print_msg("Unable to find iMC counters\n");
return -1;
}
} else {
- perror("Unable to open PMU directory!\n");
+ ksft_perror("Unable to open PMU directory");
return -1;
}
@@ -282,9 +300,9 @@ static int initialize_mem_bw_imc(void)
* Memory B/W utilized by a process on a socket can be calculated using
* iMC counters. Perf events are used to read these counters.
*
- * Return: >= 0 on success. < 0 on failure.
+ * Return: = 0 on success. < 0 on failure.
*/
-static float get_mem_bw_imc(int cpu_no, char *bw_report)
+static int get_mem_bw_imc(int cpu_no, char *bw_report, float *bw_imc)
{
float reads, writes, of_mul_read, of_mul_write;
int imc, j, ret;
@@ -321,14 +339,14 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report)
if (read(r->fd, &r->return_value,
sizeof(struct membw_read_format)) == -1) {
- perror("Couldn't get read b/w through iMC");
+ ksft_perror("Couldn't get read b/w through iMC");
return -1;
}
if (read(w->fd, &w->return_value,
sizeof(struct membw_read_format)) == -1) {
- perror("Couldn't get write bw through iMC");
+ ksft_perror("Couldn't get write bw through iMC");
return -1;
}
@@ -355,29 +373,34 @@ static float get_mem_bw_imc(int cpu_no, char *bw_report)
close(imc_counters_config[imc][WRITE].fd);
}
- if (strcmp(bw_report, "reads") == 0)
- return reads;
+ if (strcmp(bw_report, "reads") == 0) {
+ *bw_imc = reads;
+ return 0;
+ }
- if (strcmp(bw_report, "writes") == 0)
- return writes;
+ if (strcmp(bw_report, "writes") == 0) {
+ *bw_imc = writes;
+ return 0;
+ }
- return (reads + writes);
+ *bw_imc = reads + writes;
+ return 0;
}
-void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
+void set_mbm_path(const char *ctrlgrp, const char *mongrp, int domain_id)
{
if (ctrlgrp && mongrp)
sprintf(mbm_total_path, CON_MON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, mongrp, resource_id);
+ RESCTRL_PATH, ctrlgrp, mongrp, domain_id);
else if (!ctrlgrp && mongrp)
sprintf(mbm_total_path, MON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- mongrp, resource_id);
+ mongrp, domain_id);
else if (ctrlgrp && !mongrp)
sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- ctrlgrp, resource_id);
+ ctrlgrp, domain_id);
else if (!ctrlgrp && !mongrp)
sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH, RESCTRL_PATH,
- resource_id);
+ domain_id);
}
/*
@@ -390,23 +413,23 @@ void set_mbm_path(const char *ctrlgrp, const char *mongrp, int resource_id)
static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
int cpu_no, char *resctrl_val)
{
- int resource_id;
+ int domain_id;
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- perror("Could not get resource_id");
+ if (get_domain_id("MB", cpu_no, &domain_id) < 0) {
+ ksft_print_msg("Could not get domain ID\n");
return;
}
- if (strcmp(resctrl_val, "mbm") == 0)
- set_mbm_path(ctrlgrp, mongrp, resource_id);
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)))
+ set_mbm_path(ctrlgrp, mongrp, domain_id);
- if ((strcmp(resctrl_val, "mba") == 0)) {
+ if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
if (ctrlgrp)
sprintf(mbm_total_path, CON_MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, ctrlgrp, resource_id);
+ RESCTRL_PATH, ctrlgrp, domain_id);
else
sprintf(mbm_total_path, MBM_LOCAL_BYTES_PATH,
- RESCTRL_PATH, resource_id);
+ RESCTRL_PATH, domain_id);
}
}
@@ -420,48 +443,90 @@ static void initialize_mem_bw_resctrl(const char *ctrlgrp, const char *mongrp,
* 1. If con_mon grp is given, then read from it
* 2. If con_mon grp is not given, then read from root con_mon grp
*/
-static unsigned long get_mem_bw_resctrl(void)
+static int get_mem_bw_resctrl(unsigned long *mbm_total)
{
- unsigned long mbm_total = 0;
FILE *fp;
fp = fopen(mbm_total_path, "r");
if (!fp) {
- perror("Failed to open total bw file");
+ ksft_perror("Failed to open total bw file");
return -1;
}
- if (fscanf(fp, "%lu", &mbm_total) <= 0) {
- perror("Could not get mbm local bytes");
+ if (fscanf(fp, "%lu", mbm_total) <= 0) {
+ ksft_perror("Could not get mbm local bytes");
fclose(fp);
return -1;
}
fclose(fp);
- return mbm_total;
+ return 0;
}
pid_t bm_pid, ppid;
void ctrlc_handler(int signum, siginfo_t *info, void *ptr)
{
- kill(bm_pid, SIGKILL);
+ /* Only kill child after bm_pid is set after fork() */
+ if (bm_pid)
+ kill(bm_pid, SIGKILL);
umount_resctrlfs();
tests_cleanup();
- printf("Ending\n\n");
+ ksft_print_msg("Ending\n\n");
exit(EXIT_SUCCESS);
}
/*
+ * Register CTRL-C handler for parent, as it has to kill
+ * child process before exiting.
+ */
+int signal_handler_register(void)
+{
+ struct sigaction sigact = {};
+ int ret = 0;
+
+ bm_pid = 0;
+
+ sigact.sa_sigaction = ctrlc_handler;
+ sigemptyset(&sigact.sa_mask);
+ sigact.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGINT, &sigact, NULL) ||
+ sigaction(SIGTERM, &sigact, NULL) ||
+ sigaction(SIGHUP, &sigact, NULL)) {
+ ksft_perror("sigaction");
+ ret = -1;
+ }
+ return ret;
+}
+
+/*
+ * Reset signal handler to SIG_DFL.
+ * Non-Value return because the caller should keep
+ * the error code of other path even if sigaction fails.
+ */
+void signal_handler_unregister(void)
+{
+ struct sigaction sigact = {};
+
+ sigact.sa_handler = SIG_DFL;
+ sigemptyset(&sigact.sa_mask);
+ if (sigaction(SIGINT, &sigact, NULL) ||
+ sigaction(SIGTERM, &sigact, NULL) ||
+ sigaction(SIGHUP, &sigact, NULL)) {
+ ksft_perror("sigaction");
+ }
+}
+
+/*
* print_results_bw: the memory bandwidth results are stored in a file
* @filename: file that stores the results
* @bm_pid: child pid that runs benchmark
* @bw_imc: perf imc counter value
* @bw_resc: memory bandwidth value
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 on success, < 0 on error.
*/
static int print_results_bw(char *filename, int bm_pid, float bw_imc,
unsigned long bw_resc)
@@ -475,16 +540,16 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
} else {
fp = fopen(filename, "a");
if (!fp) {
- perror("Cannot open results file");
+ ksft_perror("Cannot open results file");
- return errno;
+ return -1;
}
if (fprintf(fp, "Pid: %d \t Mem_BW_iMC: %f \t Mem_BW_resc: %lu \t Difference: %lu\n",
bm_pid, bw_imc, bw_resc, diff) <= 0) {
+ ksft_print_msg("Could not log results\n");
fclose(fp);
- perror("Could not log results.");
- return errno;
+ return -1;
}
fclose(fp);
}
@@ -492,7 +557,7 @@ static int print_results_bw(char *filename, int bm_pid, float bw_imc,
return 0;
}
-static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
+static void set_cmt_path(const char *ctrlgrp, const char *mongrp, char sock_num)
{
if (strlen(ctrlgrp) && strlen(mongrp))
sprintf(llc_occup_path, CON_MON_LCC_OCCUP_PATH, RESCTRL_PATH,
@@ -512,26 +577,28 @@ static void set_cqm_path(const char *ctrlgrp, const char *mongrp, char sock_num)
* @ctrlgrp: Name of the control monitor group (con_mon grp)
* @mongrp: Name of the monitor group (mon grp)
* @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: cat, cqm.. etc)
+ * @resctrl_val: Resctrl feature (Eg: cat, cmt.. etc)
*/
static void initialize_llc_occu_resctrl(const char *ctrlgrp, const char *mongrp,
int cpu_no, char *resctrl_val)
{
- int resource_id;
+ int domain_id;
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- perror("# Unable to resource_id");
+ if (get_domain_id("L3", cpu_no, &domain_id) < 0) {
+ ksft_print_msg("Could not get domain ID\n");
return;
}
- if (strcmp(resctrl_val, "cqm") == 0)
- set_cqm_path(ctrlgrp, mongrp, resource_id);
+ if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
+ set_cmt_path(ctrlgrp, mongrp, domain_id);
}
-static int
-measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
+static int measure_vals(const struct user_params *uparams,
+ struct resctrl_val_param *param,
+ unsigned long *bw_resc_start)
{
- unsigned long bw_imc, bw_resc, bw_resc_end;
+ unsigned long bw_resc, bw_resc_end;
+ float bw_imc;
int ret;
/*
@@ -541,13 +608,13 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
* Compare the two values to validate resctrl value.
* It takes 1sec to measure the data.
*/
- bw_imc = get_mem_bw_imc(param->cpu_no, param->bw_report);
- if (bw_imc <= 0)
- return bw_imc;
+ ret = get_mem_bw_imc(uparams->cpu, param->bw_report, &bw_imc);
+ if (ret < 0)
+ return ret;
- bw_resc_end = get_mem_bw_resctrl();
- if (bw_resc_end <= 0)
- return bw_resc_end;
+ ret = get_mem_bw_resctrl(&bw_resc_end);
+ if (ret < 0)
+ return ret;
bw_resc = (bw_resc_end - *bw_resc_start) / MB;
ret = print_results_bw(param->filename, bm_pid, bw_imc, bw_resc);
@@ -560,14 +627,74 @@ measure_vals(struct resctrl_val_param *param, unsigned long *bw_resc_start)
}
/*
+ * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
+ * in specified signal. Direct benchmark stdio to /dev/null.
+ * @signum: signal number
+ * @info: signal info
+ * @ucontext: user context in signal handling
+ */
+static void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+{
+ int operation, ret, memflush;
+ char **benchmark_cmd;
+ size_t span;
+ bool once;
+ FILE *fp;
+
+ benchmark_cmd = info->si_ptr;
+
+ /*
+ * Direct stdio of child to /dev/null, so that only parent writes to
+ * stdio (console)
+ */
+ fp = freopen("/dev/null", "w", stdout);
+ if (!fp) {
+ ksft_perror("Unable to direct benchmark status to /dev/null");
+ PARENT_EXIT();
+ }
+
+ if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
+ /* Execute default fill_buf benchmark */
+ span = strtoul(benchmark_cmd[1], NULL, 10);
+ memflush = atoi(benchmark_cmd[2]);
+ operation = atoi(benchmark_cmd[3]);
+ if (!strcmp(benchmark_cmd[4], "true")) {
+ once = true;
+ } else if (!strcmp(benchmark_cmd[4], "false")) {
+ once = false;
+ } else {
+ ksft_print_msg("Invalid once parameter\n");
+ PARENT_EXIT();
+ }
+
+ if (run_fill_buf(span, memflush, operation, once))
+ fprintf(stderr, "Error in running fill buffer\n");
+ } else {
+ /* Execute specified benchmark */
+ ret = execvp(benchmark_cmd[0], benchmark_cmd);
+ if (ret)
+ ksft_perror("execvp");
+ }
+
+ fclose(stdout);
+ ksft_print_msg("Unable to run specified benchmark\n");
+ PARENT_EXIT();
+}
+
+/*
* resctrl_val: execute benchmark and measure memory bandwidth on
* the benchmark
+ * @test: test information structure
+ * @uparams: user supplied parameters
* @benchmark_cmd: benchmark command and its arguments
* @param: parameters passed to resctrl_val()
*
- * Return: 0 on success. non-zero on failure.
+ * Return: 0 when the test was run, < 0 on error.
*/
-int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
+int resctrl_val(const struct resctrl_test *test,
+ const struct user_params *uparams,
+ const char * const *benchmark_cmd,
+ struct resctrl_val_param *param)
{
char *resctrl_val = param->resctrl_val;
unsigned long bw_resc_start = 0;
@@ -579,17 +706,13 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
if (strcmp(param->filename, "") == 0)
sprintf(param->filename, "stdio");
- if ((strcmp(resctrl_val, "mba")) == 0 ||
- (strcmp(resctrl_val, "mbm")) == 0) {
+ if (!strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR)) ||
+ !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
ret = validate_bw_report_request(param->bw_report);
if (ret)
return ret;
}
- ret = remount_resctrlfs(param->mum_resctrlfs);
- if (ret)
- return ret;
-
/*
* If benchmark wasn't successfully started by child, then child should
* kill parent, so save parent's pid
@@ -597,7 +720,7 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
ppid = getpid();
if (pipe(pipefd)) {
- perror("# Unable to create pipe");
+ ksft_perror("Unable to create pipe");
return -1;
}
@@ -606,9 +729,10 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
* Fork to start benchmark, save child's pid so that it can be killed
* when needed
*/
+ fflush(stdout);
bm_pid = fork();
if (bm_pid == -1) {
- perror("# Unable to fork");
+ ksft_perror("Unable to fork");
return -1;
}
@@ -625,15 +749,17 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
sigact.sa_flags = SA_SIGINFO;
/* Register for "SIGUSR1" signal from parent */
- if (sigaction(SIGUSR1, &sigact, NULL))
- PARENT_EXIT("Can't register child for signal");
+ if (sigaction(SIGUSR1, &sigact, NULL)) {
+ ksft_perror("Can't register child for signal");
+ PARENT_EXIT();
+ }
/* Tell parent that child is ready */
close(pipefd[0]);
pipe_message = 1;
if (write(pipefd[1], &pipe_message, sizeof(pipe_message)) <
sizeof(pipe_message)) {
- perror("# failed signaling parent process");
+ ksft_perror("Failed signaling parent process");
close(pipefd[1]);
return -1;
}
@@ -642,29 +768,22 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
/* Suspend child until delivery of "SIGUSR1" from parent */
sigsuspend(&sigact.sa_mask);
- PARENT_EXIT("Child is done");
+ ksft_perror("Child is done");
+ PARENT_EXIT();
}
- printf("# benchmark PID: %d\n", bm_pid);
+ ksft_print_msg("Benchmark PID: %d\n", bm_pid);
/*
- * Register CTRL-C handler for parent, as it has to kill benchmark
- * before exiting
+ * The cast removes constness but nothing mutates benchmark_cmd within
+ * the context of this process. At the receiving process, it becomes
+ * argv, which is mutable, on exec() but that's after fork() so it
+ * doesn't matter for the process running the tests.
*/
- sigact.sa_sigaction = ctrlc_handler;
- sigemptyset(&sigact.sa_mask);
- sigact.sa_flags = SA_SIGINFO;
- if (sigaction(SIGINT, &sigact, NULL) ||
- sigaction(SIGHUP, &sigact, NULL)) {
- perror("# sigaction");
- ret = errno;
- goto out;
- }
-
- value.sival_ptr = benchmark_cmd;
+ value.sival_ptr = (void *)benchmark_cmd;
/* Taskset benchmark to specified cpu */
- ret = taskset_benchmark(bm_pid, param->cpu_no);
+ ret = taskset_benchmark(bm_pid, uparams->cpu, NULL);
if (ret)
goto out;
@@ -674,24 +793,24 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
if (ret)
goto out;
- if ((strcmp(resctrl_val, "mbm") == 0) ||
- (strcmp(resctrl_val, "mba") == 0)) {
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
ret = initialize_mem_bw_imc();
if (ret)
goto out;
initialize_mem_bw_resctrl(param->ctrlgrp, param->mongrp,
- param->cpu_no, resctrl_val);
- } else if (strcmp(resctrl_val, "cqm") == 0)
+ uparams->cpu, resctrl_val);
+ } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)))
initialize_llc_occu_resctrl(param->ctrlgrp, param->mongrp,
- param->cpu_no, resctrl_val);
+ uparams->cpu, resctrl_val);
/* Parent waits for child to be ready. */
close(pipefd[1]);
while (pipe_message != 1) {
if (read(pipefd[0], &pipe_message, sizeof(pipe_message)) <
sizeof(pipe_message)) {
- perror("# failed reading message from child process");
+ ksft_perror("Failed reading message from child process");
close(pipefd[0]);
goto out;
}
@@ -700,8 +819,8 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
/* Signal child to start benchmark */
if (sigqueue(bm_pid, SIGUSR1, value) == -1) {
- perror("# sigqueue SIGUSR1 to child");
- ret = errno;
+ ksft_perror("sigqueue SIGUSR1 to child");
+ ret = -1;
goto out;
}
@@ -710,35 +829,29 @@ int resctrl_val(char **benchmark_cmd, struct resctrl_val_param *param)
/* Test runs until the callback setup() tells the test to stop. */
while (1) {
- if ((strcmp(resctrl_val, "mbm") == 0) ||
- (strcmp(resctrl_val, "mba") == 0)) {
- ret = param->setup(1, param);
- if (ret) {
- ret = 0;
- break;
- }
+ ret = param->setup(test, uparams, param);
+ if (ret == END_OF_TESTS) {
+ ret = 0;
+ break;
+ }
+ if (ret < 0)
+ break;
- ret = measure_vals(param, &bw_resc_start);
+ if (!strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR)) ||
+ !strncmp(resctrl_val, MBA_STR, sizeof(MBA_STR))) {
+ ret = measure_vals(uparams, param, &bw_resc_start);
if (ret)
break;
- } else if (strcmp(resctrl_val, "cqm") == 0) {
- ret = param->setup(1, param);
- if (ret) {
- ret = 0;
- break;
- }
+ } else if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR))) {
sleep(1);
- ret = measure_cache_vals(param, bm_pid);
+ ret = measure_llc_resctrl(param->filename, bm_pid);
if (ret)
break;
- } else {
- break;
}
}
out:
kill(bm_pid, SIGKILL);
- umount_resctrlfs();
return ret;
}
diff --git a/tools/testing/selftests/resctrl/resctrlfs.c b/tools/testing/selftests/resctrl/resctrlfs.c
index 19c0ec4045a4..1cade75176eb 100644
--- a/tools/testing/selftests/resctrl/resctrlfs.c
+++ b/tools/testing/selftests/resctrl/resctrlfs.c
@@ -8,9 +8,10 @@
* Sai Praneeth Prakhya <sai.praneeth.prakhya@intel.com>,
* Fenghua Yu <fenghua.yu@intel.com>
*/
-#include "resctrl.h"
+#include <fcntl.h>
+#include <limits.h>
-int tests_run;
+#include "resctrl.h"
static int find_resctrl_mount(char *buffer)
{
@@ -19,7 +20,7 @@ static int find_resctrl_mount(char *buffer)
mounts = fopen("/proc/mounts", "r");
if (!mounts) {
- perror("/proc/mounts");
+ ksft_perror("/proc/mounts");
return -ENXIO;
}
while (!feof(mounts)) {
@@ -49,85 +50,103 @@ static int find_resctrl_mount(char *buffer)
return -ENOENT;
}
-char cbm_mask[256];
-
/*
- * remount_resctrlfs - Remount resctrl FS at /sys/fs/resctrl
- * @mum_resctrlfs: Should the resctrl FS be remounted?
+ * mount_resctrlfs - Mount resctrl FS at /sys/fs/resctrl
*
- * If not mounted, mount it.
- * If mounted and mum_resctrlfs then remount resctrl FS.
- * If mounted and !mum_resctrlfs then noop
+ * Mounts resctrl FS. Fails if resctrl FS is already mounted to avoid
+ * pre-existing settings interfering with the test results.
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
-int remount_resctrlfs(bool mum_resctrlfs)
+int mount_resctrlfs(void)
{
- char mountpoint[256];
int ret;
- ret = find_resctrl_mount(mountpoint);
- if (ret)
- strcpy(mountpoint, RESCTRL_PATH);
-
- if (!ret && mum_resctrlfs && umount(mountpoint)) {
- printf("not ok unmounting \"%s\"\n", mountpoint);
- perror("# umount");
- tests_run++;
- }
-
- if (!ret && !mum_resctrlfs)
- return 0;
+ ret = find_resctrl_mount(NULL);
+ if (ret != -ENOENT)
+ return -1;
+ ksft_print_msg("Mounting resctrl to \"%s\"\n", RESCTRL_PATH);
ret = mount("resctrl", RESCTRL_PATH, "resctrl", 0, NULL);
- printf("%sok mounting resctrl to \"%s\"\n", ret ? "not " : "",
- RESCTRL_PATH);
if (ret)
- perror("# mount");
-
- tests_run++;
+ ksft_perror("mount");
return ret;
}
int umount_resctrlfs(void)
{
- if (umount(RESCTRL_PATH)) {
- perror("# Unable to umount resctrl");
+ char mountpoint[256];
+ int ret;
- return errno;
+ ret = find_resctrl_mount(mountpoint);
+ if (ret == -ENOENT)
+ return 0;
+ if (ret)
+ return ret;
+
+ if (umount(mountpoint)) {
+ ksft_perror("Unable to umount resctrl");
+
+ return -1;
}
return 0;
}
/*
- * get_resource_id - Get socket number/l3 id for a specified CPU
+ * get_cache_level - Convert cache level from string to integer
+ * @cache_type: Cache level as string
+ *
+ * Return: cache level as integer or -1 if @cache_type is invalid.
+ */
+static int get_cache_level(const char *cache_type)
+{
+ if (!strcmp(cache_type, "L3"))
+ return 3;
+ if (!strcmp(cache_type, "L2"))
+ return 2;
+
+ ksft_print_msg("Invalid cache level\n");
+ return -1;
+}
+
+static int get_resource_cache_level(const char *resource)
+{
+ /* "MB" use L3 (LLC) as resource */
+ if (!strcmp(resource, "MB"))
+ return 3;
+ return get_cache_level(resource);
+}
+
+/*
+ * get_domain_id - Get resctrl domain ID for a specified CPU
+ * @resource: resource name
* @cpu_no: CPU number
- * @resource_id: Socket number or l3_id
+ * @domain_id: domain ID (cache ID; for MB, L3 cache ID)
*
* Return: >= 0 on success, < 0 on failure.
*/
-int get_resource_id(int cpu_no, int *resource_id)
+int get_domain_id(const char *resource, int cpu_no, int *domain_id)
{
char phys_pkg_path[1024];
+ int cache_num;
FILE *fp;
- if (is_amd)
- sprintf(phys_pkg_path, "%s%d/cache/index3/id",
- PHYS_ID_PATH, cpu_no);
- else
- sprintf(phys_pkg_path, "%s%d/topology/physical_package_id",
- PHYS_ID_PATH, cpu_no);
+ cache_num = get_resource_cache_level(resource);
+ if (cache_num < 0)
+ return cache_num;
+
+ sprintf(phys_pkg_path, "%s%d/cache/index%d/id", PHYS_ID_PATH, cpu_no, cache_num);
fp = fopen(phys_pkg_path, "r");
if (!fp) {
- perror("Failed to open physical_package_id");
+ ksft_perror("Failed to open cache id file");
return -1;
}
- if (fscanf(fp, "%d", resource_id) <= 0) {
- perror("Could not get socket number or l3 id");
+ if (fscanf(fp, "%d", domain_id) <= 0) {
+ ksft_perror("Could not get domain ID");
fclose(fp);
return -1;
@@ -145,31 +164,26 @@ int get_resource_id(int cpu_no, int *resource_id)
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
+int get_cache_size(int cpu_no, const char *cache_type, unsigned long *cache_size)
{
char cache_path[1024], cache_str[64];
int length, i, cache_num;
FILE *fp;
- if (!strcmp(cache_type, "L3")) {
- cache_num = 3;
- } else if (!strcmp(cache_type, "L2")) {
- cache_num = 2;
- } else {
- perror("Invalid cache level");
- return -1;
- }
+ cache_num = get_cache_level(cache_type);
+ if (cache_num < 0)
+ return cache_num;
sprintf(cache_path, "/sys/bus/cpu/devices/cpu%d/cache/index%d/size",
cpu_no, cache_num);
fp = fopen(cache_path, "r");
if (!fp) {
- perror("Failed to open cache size");
+ ksft_perror("Failed to open cache size");
return -1;
}
if (fscanf(fp, "%s", cache_str) <= 0) {
- perror("Could not get cache_size");
+ ksft_perror("Could not get cache_size");
fclose(fp);
return -1;
@@ -203,28 +217,29 @@ int get_cache_size(int cpu_no, char *cache_type, unsigned long *cache_size)
#define CORE_SIBLINGS_PATH "/sys/bus/cpu/devices/cpu"
/*
- * get_cbm_mask - Get cbm mask for given cache
- * @cache_type: Cache level L2/L3
- *
- * Mask is stored in cbm_mask which is global variable.
+ * get_bit_mask - Get bit mask from given file
+ * @filename: File containing the mask
+ * @mask: The bit mask returned as unsigned long
*
* Return: = 0 on success, < 0 on failure.
*/
-int get_cbm_mask(char *cache_type)
+static int get_bit_mask(const char *filename, unsigned long *mask)
{
- char cbm_mask_path[1024];
FILE *fp;
- sprintf(cbm_mask_path, "%s/%s/cbm_mask", CBM_MASK_PATH, cache_type);
+ if (!filename || !mask)
+ return -1;
- fp = fopen(cbm_mask_path, "r");
+ fp = fopen(filename, "r");
if (!fp) {
- perror("Failed to open cache level");
-
+ ksft_print_msg("Failed to open bit mask file '%s': %s\n",
+ filename, strerror(errno));
return -1;
}
- if (fscanf(fp, "%s", cbm_mask) <= 0) {
- perror("Could not get max cbm_mask");
+
+ if (fscanf(fp, "%lx", mask) <= 0) {
+ ksft_print_msg("Could not read bit mask file '%s': %s\n",
+ filename, strerror(errno));
fclose(fp);
return -1;
@@ -235,63 +250,182 @@ int get_cbm_mask(char *cache_type)
}
/*
- * get_core_sibling - Get sibling core id from the same socket for given CPU
- * @cpu_no: CPU number
+ * resource_info_unsigned_get - Read an unsigned value from
+ * /sys/fs/resctrl/info/@resource/@filename
+ * @resource: Resource name that matches directory name in
+ * /sys/fs/resctrl/info
+ * @filename: File in /sys/fs/resctrl/info/@resource
+ * @val: Contains read value on success.
*
- * Return: > 0 on success, < 0 on failure.
+ * Return: = 0 on success, < 0 on failure. On success the read
+ * value is saved into @val.
*/
-int get_core_sibling(int cpu_no)
+int resource_info_unsigned_get(const char *resource, const char *filename,
+ unsigned int *val)
{
- char core_siblings_path[1024], cpu_list_str[64];
- int sibling_cpu_no = -1;
+ char file_path[PATH_MAX];
FILE *fp;
- sprintf(core_siblings_path, "%s%d/topology/core_siblings_list",
- CORE_SIBLINGS_PATH, cpu_no);
+ snprintf(file_path, sizeof(file_path), "%s/%s/%s", INFO_PATH, resource,
+ filename);
- fp = fopen(core_siblings_path, "r");
+ fp = fopen(file_path, "r");
if (!fp) {
- perror("Failed to open core siblings path");
-
+ ksft_print_msg("Error opening %s: %m\n", file_path);
return -1;
}
- if (fscanf(fp, "%s", cpu_list_str) <= 0) {
- perror("Could not get core_siblings list");
- fclose(fp);
+ if (fscanf(fp, "%u", val) <= 0) {
+ ksft_print_msg("Could not get contents of %s: %m\n", file_path);
+ fclose(fp);
return -1;
}
+
fclose(fp);
+ return 0;
+}
- char *token = strtok(cpu_list_str, "-,");
+/*
+ * create_bit_mask- Create bit mask from start, len pair
+ * @start: LSB of the mask
+ * @len Number of bits in the mask
+ */
+unsigned long create_bit_mask(unsigned int start, unsigned int len)
+{
+ return ((1UL << len) - 1UL) << start;
+}
- while (token) {
- sibling_cpu_no = atoi(token);
- /* Skipping core 0 as we don't want to run test on core 0 */
- if (sibling_cpu_no != 0)
- break;
- token = strtok(NULL, "-,");
+/*
+ * count_contiguous_bits - Returns the longest train of bits in a bit mask
+ * @val A bit mask
+ * @start The location of the least-significant bit of the longest train
+ *
+ * Return: The length of the contiguous bits in the longest train of bits
+ */
+unsigned int count_contiguous_bits(unsigned long val, unsigned int *start)
+{
+ unsigned long last_val;
+ unsigned int count = 0;
+
+ while (val) {
+ last_val = val;
+ val &= (val >> 1);
+ count++;
}
- return sibling_cpu_no;
+ if (start) {
+ if (count)
+ *start = ffsl(last_val) - 1;
+ else
+ *start = 0;
+ }
+
+ return count;
+}
+
+/*
+ * get_full_cbm - Get full Cache Bit Mask (CBM)
+ * @cache_type: Cache type as "L2" or "L3"
+ * @mask: Full cache bit mask representing the maximal portion of cache
+ * available for allocation, returned as unsigned long.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_full_cbm(const char *cache_type, unsigned long *mask)
+{
+ char cbm_path[PATH_MAX];
+ int ret;
+
+ if (!cache_type)
+ return -1;
+
+ snprintf(cbm_path, sizeof(cbm_path), "%s/%s/cbm_mask",
+ INFO_PATH, cache_type);
+
+ ret = get_bit_mask(cbm_path, mask);
+ if (ret || !*mask)
+ return -1;
+
+ return 0;
+}
+
+/*
+ * get_shareable_mask - Get shareable mask from shareable_bits
+ * @cache_type: Cache type as "L2" or "L3"
+ * @shareable_mask: Shareable mask returned as unsigned long
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+static int get_shareable_mask(const char *cache_type, unsigned long *shareable_mask)
+{
+ char mask_path[PATH_MAX];
+
+ if (!cache_type)
+ return -1;
+
+ snprintf(mask_path, sizeof(mask_path), "%s/%s/shareable_bits",
+ INFO_PATH, cache_type);
+
+ return get_bit_mask(mask_path, shareable_mask);
+}
+
+/*
+ * get_mask_no_shareable - Get Cache Bit Mask (CBM) without shareable bits
+ * @cache_type: Cache type as "L2" or "L3"
+ * @mask: The largest exclusive portion of the cache out of the
+ * full CBM, returned as unsigned long
+ *
+ * Parts of a cache may be shared with other devices such as GPU. This function
+ * calculates the largest exclusive portion of the cache where no other devices
+ * besides CPU have access to the cache portion.
+ *
+ * Return: = 0 on success, < 0 on failure.
+ */
+int get_mask_no_shareable(const char *cache_type, unsigned long *mask)
+{
+ unsigned long full_mask, shareable_mask;
+ unsigned int start, len;
+
+ if (get_full_cbm(cache_type, &full_mask) < 0)
+ return -1;
+ if (get_shareable_mask(cache_type, &shareable_mask) < 0)
+ return -1;
+
+ len = count_contiguous_bits(full_mask & ~shareable_mask, &start);
+ if (!len)
+ return -1;
+
+ *mask = create_bit_mask(start, len);
+
+ return 0;
}
/*
* taskset_benchmark - Taskset PID (i.e. benchmark) to a specified cpu
- * @bm_pid: PID that should be binded
- * @cpu_no: CPU number at which the PID would be binded
+ * @bm_pid: PID that should be binded
+ * @cpu_no: CPU number at which the PID would be binded
+ * @old_affinity: When not NULL, set to old CPU affinity
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
-int taskset_benchmark(pid_t bm_pid, int cpu_no)
+int taskset_benchmark(pid_t bm_pid, int cpu_no, cpu_set_t *old_affinity)
{
cpu_set_t my_set;
+ if (old_affinity) {
+ CPU_ZERO(old_affinity);
+ if (sched_getaffinity(bm_pid, sizeof(*old_affinity),
+ old_affinity)) {
+ ksft_perror("Unable to read CPU affinity");
+ return -1;
+ }
+ }
+
CPU_ZERO(&my_set);
CPU_SET(cpu_no, &my_set);
if (sched_setaffinity(bm_pid, sizeof(cpu_set_t), &my_set)) {
- perror("Unable to taskset benchmark");
+ ksft_perror("Unable to taskset benchmark");
return -1;
}
@@ -300,57 +434,20 @@ int taskset_benchmark(pid_t bm_pid, int cpu_no)
}
/*
- * run_benchmark - Run a specified benchmark or fill_buf (default benchmark)
- * in specified signal. Direct benchmark stdio to /dev/null.
- * @signum: signal number
- * @info: signal info
- * @ucontext: user context in signal handling
+ * taskset_restore - Taskset PID to the earlier CPU affinity
+ * @bm_pid: PID that should be reset
+ * @old_affinity: The old CPU affinity to restore
*
- * Return: void
+ * Return: 0 on success, < 0 on error.
*/
-void run_benchmark(int signum, siginfo_t *info, void *ucontext)
+int taskset_restore(pid_t bm_pid, cpu_set_t *old_affinity)
{
- int operation, ret, malloc_and_init_memory, memflush;
- unsigned long span, buffer_span;
- char **benchmark_cmd;
- char resctrl_val[64];
- FILE *fp;
-
- benchmark_cmd = info->si_ptr;
-
- /*
- * Direct stdio of child to /dev/null, so that only parent writes to
- * stdio (console)
- */
- fp = freopen("/dev/null", "w", stdout);
- if (!fp)
- PARENT_EXIT("Unable to direct benchmark status to /dev/null");
-
- if (strcmp(benchmark_cmd[0], "fill_buf") == 0) {
- /* Execute default fill_buf benchmark */
- span = strtoul(benchmark_cmd[1], NULL, 10);
- malloc_and_init_memory = atoi(benchmark_cmd[2]);
- memflush = atoi(benchmark_cmd[3]);
- operation = atoi(benchmark_cmd[4]);
- sprintf(resctrl_val, "%s", benchmark_cmd[5]);
-
- if (strcmp(resctrl_val, "cqm") != 0)
- buffer_span = span * MB;
- else
- buffer_span = span;
-
- if (run_fill_buf(buffer_span, malloc_and_init_memory, memflush,
- operation, resctrl_val))
- fprintf(stderr, "Error in running fill buffer\n");
- } else {
- /* Execute specified benchmark */
- ret = execvp(benchmark_cmd[0], benchmark_cmd);
- if (ret)
- perror("wrong\n");
+ if (sched_setaffinity(bm_pid, sizeof(*old_affinity), old_affinity)) {
+ ksft_perror("Unable to restore CPU affinity");
+ return -1;
}
- fclose(stdout);
- PARENT_EXIT("Unable to run specified benchmark");
+ return 0;
}
/*
@@ -359,7 +456,7 @@ void run_benchmark(int signum, siginfo_t *info, void *ucontext)
* @grp: Full path and name of the group
* @parent_grp: Full path and name of the parent group
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
{
@@ -384,7 +481,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
}
closedir(dp);
} else {
- perror("Unable to open resctrl for group");
+ ksft_perror("Unable to open resctrl for group");
return -1;
}
@@ -392,7 +489,7 @@ static int create_grp(const char *grp_name, char *grp, const char *parent_grp)
/* Requested grp doesn't exist, hence create it */
if (found_grp == 0) {
if (mkdir(grp, 0) == -1) {
- perror("Unable to create group");
+ ksft_perror("Unable to create group");
return -1;
}
@@ -407,12 +504,12 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
fp = fopen(tasks, "w");
if (!fp) {
- perror("Failed to open tasks file");
+ ksft_perror("Failed to open tasks file");
return -1;
}
if (fprintf(fp, "%d\n", pid) < 0) {
- perror("Failed to wr pid to tasks file");
+ ksft_print_msg("Failed to write pid to tasks file\n");
fclose(fp);
return -1;
@@ -435,7 +532,7 @@ static int write_pid_to_tasks(char *tasks, pid_t pid)
* pid is not written, this means that pid is in con_mon grp and hence
* should consult con_mon grp's mon_data directory for results.
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
char *resctrl_val)
@@ -458,9 +555,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
if (ret)
goto out;
- /* Create mon grp and write pid into it for "mbm" and "cqm" test */
- if ((strcmp(resctrl_val, "cqm") == 0) ||
- (strcmp(resctrl_val, "mbm") == 0)) {
+ /* Create mon grp and write pid into it for "mbm" and "cmt" test */
+ if (!strncmp(resctrl_val, CMT_STR, sizeof(CMT_STR)) ||
+ !strncmp(resctrl_val, MBM_STR, sizeof(MBM_STR))) {
if (strlen(mongrp)) {
sprintf(monitorgroup_p, "%s/mon_groups", controlgroup);
sprintf(monitorgroup, "%s/%s", monitorgroup_p, mongrp);
@@ -477,12 +574,9 @@ int write_bm_pid_to_resctrl(pid_t bm_pid, char *ctrlgrp, char *mongrp,
}
out:
- printf("%sok writing benchmark parameters to resctrl FS\n",
- ret ? "not " : "");
+ ksft_print_msg("Writing benchmark parameters to resctrl FS\n");
if (ret)
- perror("# writing to resctrlfs");
-
- tests_run++;
+ ksft_print_msg("Failed writing to resctrlfs\n");
return ret;
}
@@ -492,32 +586,26 @@ out:
* @ctrlgrp: Name of the con_mon grp
* @schemata: Schemata that should be updated to
* @cpu_no: CPU number that the benchmark PID is binded to
- * @resctrl_val: Resctrl feature (Eg: mbm, mba.. etc)
+ * @resource: Resctrl resource (Eg: MB, L3, L2, etc.)
*
- * Update schemata of a con_mon grp *only* if requested resctrl feature is
+ * Update schemata of a con_mon grp *only* if requested resctrl resource is
* allocation type
*
- * Return: 0 on success, non-zero on failure
+ * Return: 0 on success, < 0 on error.
*/
-int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
+int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, const char *resource)
{
- char controlgroup[1024], schema[1024], reason[64];
- int resource_id, ret = 0;
- FILE *fp;
-
- if ((strcmp(resctrl_val, "mba") != 0) &&
- (strcmp(resctrl_val, "cat") != 0) &&
- (strcmp(resctrl_val, "cqm") != 0))
- return -ENOENT;
+ char controlgroup[1024], reason[128], schema[1024] = {};
+ int domain_id, fd, schema_len, ret = 0;
if (!schemata) {
- printf("# Skipping empty schemata update\n");
+ ksft_print_msg("Skipping empty schemata update\n");
return -1;
}
- if (get_resource_id(cpu_no, &resource_id) < 0) {
- sprintf(reason, "Failed to get resource id");
+ if (get_domain_id(resource, cpu_no, &domain_id) < 0) {
+ sprintf(reason, "Failed to get domain ID");
ret = -1;
goto out;
@@ -528,33 +616,39 @@ int write_schemata(char *ctrlgrp, char *schemata, int cpu_no, char *resctrl_val)
else
sprintf(controlgroup, "%s/schemata", RESCTRL_PATH);
- if (!strcmp(resctrl_val, "cat") || !strcmp(resctrl_val, "cqm"))
- sprintf(schema, "%s%d%c%s", "L3:", resource_id, '=', schemata);
- if (strcmp(resctrl_val, "mba") == 0)
- sprintf(schema, "%s%d%c%s", "MB:", resource_id, '=', schemata);
-
- fp = fopen(controlgroup, "w");
- if (!fp) {
- sprintf(reason, "Failed to open control group");
+ schema_len = snprintf(schema, sizeof(schema), "%s:%d=%s\n",
+ resource, domain_id, schemata);
+ if (schema_len < 0 || schema_len >= sizeof(schema)) {
+ snprintf(reason, sizeof(reason),
+ "snprintf() failed with return value : %d", schema_len);
ret = -1;
-
goto out;
}
- if (fprintf(fp, "%s\n", schema) < 0) {
- sprintf(reason, "Failed to write schemata in control group");
- fclose(fp);
+ fd = open(controlgroup, O_WRONLY);
+ if (fd < 0) {
+ snprintf(reason, sizeof(reason),
+ "open() failed : %s", strerror(errno));
ret = -1;
- goto out;
+ goto err_schema_not_empty;
}
- fclose(fp);
+ if (write(fd, schema, schema_len) < 0) {
+ snprintf(reason, sizeof(reason),
+ "write() failed : %s", strerror(errno));
+ close(fd);
+ ret = -1;
+
+ goto err_schema_not_empty;
+ }
+ close(fd);
+err_schema_not_empty:
+ schema[schema_len - 1] = 0;
out:
- printf("%sok Write schema \"%s\" to resctrl FS%s%s\n",
- ret ? "not " : "", schema, ret ? " # " : "",
- ret ? reason : "");
- tests_run++;
+ ksft_print_msg("Write schema \"%s\" to resctrl FS%s%s\n",
+ schema, ret ? " # " : "",
+ ret ? reason : "");
return ret;
}
@@ -578,18 +672,20 @@ bool check_resctrlfs_support(void)
fclose(inf);
- printf("%sok kernel supports resctrl filesystem\n", ret ? "" : "not ");
- tests_run++;
+ ksft_print_msg("%s Check kernel supports resctrl filesystem\n",
+ ret ? "Pass:" : "Fail:");
+
+ if (!ret)
+ return ret;
dp = opendir(RESCTRL_PATH);
- printf("%sok resctrl mountpoint \"%s\" exists\n",
- dp ? "" : "not ", RESCTRL_PATH);
+ ksft_print_msg("%s Check resctrl mountpoint \"%s\" exists\n",
+ dp ? "Pass:" : "Fail:", RESCTRL_PATH);
if (dp)
closedir(dp);
- tests_run++;
- printf("# resctrl filesystem %s mounted\n",
- find_resctrl_mount(NULL) ? "not" : "is");
+ ksft_print_msg("resctrl filesystem %s mounted\n",
+ find_resctrl_mount(NULL) ? "not" : "is");
return ret;
}
@@ -612,31 +708,90 @@ char *fgrep(FILE *inf, const char *str)
}
/*
- * validate_resctrl_feature_request - Check if requested feature is valid.
- * @resctrl_val: Requested feature
+ * resctrl_resource_exists - Check if a resource is supported.
+ * @resource: Resctrl resource (e.g., MB, L3, L2, L3_MON, etc.)
+ *
+ * Return: True if the resource is supported, else false. False is
+ * also returned if resctrl FS is not mounted.
+ */
+bool resctrl_resource_exists(const char *resource)
+{
+ char res_path[PATH_MAX];
+ struct stat statbuf;
+ int ret;
+
+ if (!resource)
+ return false;
+
+ ret = find_resctrl_mount(NULL);
+ if (ret)
+ return false;
+
+ snprintf(res_path, sizeof(res_path), "%s/%s", INFO_PATH, resource);
+
+ if (stat(res_path, &statbuf))
+ return false;
+
+ return true;
+}
+
+/*
+ * resctrl_mon_feature_exists - Check if requested monitoring feature is valid.
+ * @resource: Resource that uses the mon_features file. Currently only L3_MON
+ * is valid.
+ * @feature: Required monitor feature (in mon_features file).
*
- * Return: 0 on success, non-zero on failure
+ * Return: True if the feature is supported, else false.
*/
-bool validate_resctrl_feature_request(char *resctrl_val)
+bool resctrl_mon_feature_exists(const char *resource, const char *feature)
{
- FILE *inf = fopen("/proc/cpuinfo", "r");
- bool found = false;
+ char res_path[PATH_MAX];
char *res;
+ FILE *inf;
+
+ if (!feature || !resource)
+ return false;
+ snprintf(res_path, sizeof(res_path), "%s/%s/mon_features", INFO_PATH, resource);
+ inf = fopen(res_path, "r");
if (!inf)
return false;
- res = fgrep(inf, "flags");
+ res = fgrep(inf, feature);
+ free(res);
+ fclose(inf);
- if (res) {
- char *s = strchr(res, ':');
+ return !!res;
+}
- found = s && !strstr(s, resctrl_val);
- free(res);
- }
- fclose(inf);
+/*
+ * resource_info_file_exists - Check if a file is present inside
+ * /sys/fs/resctrl/info/@resource.
+ * @resource: Required resource (Eg: MB, L3, L2, etc.)
+ * @file: Required file.
+ *
+ * Return: True if the /sys/fs/resctrl/info/@resource/@file exists, else false.
+ */
+bool resource_info_file_exists(const char *resource, const char *file)
+{
+ char res_path[PATH_MAX];
+ struct stat statbuf;
+
+ if (!file || !resource)
+ return false;
+
+ snprintf(res_path, sizeof(res_path), "%s/%s/%s", INFO_PATH, resource,
+ file);
- return found;
+ if (stat(res_path, &statbuf))
+ return false;
+
+ return true;
+}
+
+bool test_resource_feature_check(const struct resctrl_test *test)
+{
+ return resctrl_resource_exists(test->resource);
}
int filter_dmesg(void)
@@ -649,21 +804,22 @@ int filter_dmesg(void)
ret = pipe(pipefds);
if (ret) {
- perror("pipe");
+ ksft_perror("pipe");
return ret;
}
+ fflush(stdout);
pid = fork();
if (pid == 0) {
close(pipefds[0]);
dup2(pipefds[1], STDOUT_FILENO);
execlp("dmesg", "dmesg", NULL);
- perror("executing dmesg");
+ ksft_perror("Executing dmesg");
exit(1);
}
close(pipefds[1]);
fp = fdopen(pipefds[0], "r");
if (!fp) {
- perror("fdopen(pipe)");
+ ksft_perror("fdopen(pipe)");
kill(pid, SIGTERM);
return -1;
@@ -671,9 +827,9 @@ int filter_dmesg(void)
while (fgets(line, 1024, fp)) {
if (strstr(line, "intel_rdt:"))
- printf("# dmesg: %s", line);
+ ksft_print_msg("dmesg: %s", line);
if (strstr(line, "resctrl:"))
- printf("# dmesg: %s", line);
+ ksft_print_msg("dmesg: %s", line);
}
fclose(fp);
waitpid(pid, NULL, 0);
diff --git a/tools/testing/selftests/resctrl/settings b/tools/testing/selftests/resctrl/settings
new file mode 100644
index 000000000000..a383f3d4565b
--- /dev/null
+++ b/tools/testing/selftests/resctrl/settings
@@ -0,0 +1,3 @@
+# If running time is longer than 120 seconds when new tests are added in
+# the future, increase timeout here.
+timeout=120
diff --git a/tools/testing/selftests/riscv/Makefile b/tools/testing/selftests/riscv/Makefile
new file mode 100644
index 000000000000..4a9ff515a3a0
--- /dev/null
+++ b/tools/testing/selftests/riscv/Makefile
@@ -0,0 +1,58 @@
+# SPDX-License-Identifier: GPL-2.0
+# Originally tools/testing/arm64/Makefile
+
+# When ARCH not overridden for crosscompiling, lookup machine
+ARCH ?= $(shell uname -m 2>/dev/null || echo not)
+
+ifneq (,$(filter $(ARCH),riscv))
+RISCV_SUBTARGETS ?= hwprobe vector mm
+else
+RISCV_SUBTARGETS :=
+endif
+
+CFLAGS := -Wall -O2 -g
+
+# A proper top_srcdir is needed by KSFT(lib.mk)
+top_srcdir = $(realpath ../../../../)
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -I$(top_srcdir)/tools/testing/selftests/
+
+CFLAGS += $(KHDR_INCLUDES)
+
+export CFLAGS
+export top_srcdir
+
+all:
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ mkdir -p $$BUILD_TARGET; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+install: all
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+run_tests: all
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+# Avoid any output on non riscv on emit_tests
+emit_tests:
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+clean:
+ @for DIR in $(RISCV_SUBTARGETS); do \
+ BUILD_TARGET=$(OUTPUT)/$$DIR; \
+ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$DIR $@; \
+ done
+
+.PHONY: all clean install run_tests emit_tests
diff --git a/tools/testing/selftests/riscv/hwprobe/.gitignore b/tools/testing/selftests/riscv/hwprobe/.gitignore
new file mode 100644
index 000000000000..8113dc3bdd03
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/.gitignore
@@ -0,0 +1 @@
+hwprobe
diff --git a/tools/testing/selftests/riscv/hwprobe/Makefile b/tools/testing/selftests/riscv/hwprobe/Makefile
new file mode 100644
index 000000000000..cec81610a5f2
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/Makefile
@@ -0,0 +1,18 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+CFLAGS += -I$(top_srcdir)/tools/include
+
+TEST_GEN_PROGS := hwprobe cbo which-cpus
+
+include ../../lib.mk
+
+$(OUTPUT)/hwprobe: hwprobe.c sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/cbo: cbo.c sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/which-cpus: which-cpus.c sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/hwprobe/cbo.c b/tools/testing/selftests/riscv/hwprobe/cbo.c
new file mode 100644
index 000000000000..c537d52fafc5
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/cbo.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Run with 'taskset -c <cpu-list> cbo' to only execute hwprobe on a
+ * subset of cpus, as well as only executing the tests on those cpus.
+ */
+#define _GNU_SOURCE
+#include <stdbool.h>
+#include <stdint.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <assert.h>
+#include <linux/compiler.h>
+#include <linux/kernel.h>
+#include <asm/ucontext.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+#define MK_CBO(fn) cpu_to_le32((fn) << 20 | 10 << 15 | 2 << 12 | 0 << 7 | 15)
+
+static char mem[4096] __aligned(4096) = { [0 ... 4095] = 0xa5 };
+
+static bool illegal_insn;
+
+static void sigill_handler(int sig, siginfo_t *info, void *context)
+{
+ unsigned long *regs = (unsigned long *)&((ucontext_t *)context)->uc_mcontext;
+ uint32_t insn = *(uint32_t *)regs[0];
+
+ assert(insn == MK_CBO(regs[11]));
+
+ illegal_insn = true;
+ regs[0] += 4;
+}
+
+#define cbo_insn(base, fn) \
+({ \
+ asm volatile( \
+ "mv a0, %0\n" \
+ "li a1, %1\n" \
+ ".4byte %2\n" \
+ : : "r" (base), "i" (fn), "i" (MK_CBO(fn)) : "a0", "a1", "memory"); \
+})
+
+static void cbo_inval(char *base) { cbo_insn(base, 0); }
+static void cbo_clean(char *base) { cbo_insn(base, 1); }
+static void cbo_flush(char *base) { cbo_insn(base, 2); }
+static void cbo_zero(char *base) { cbo_insn(base, 4); }
+
+static void test_no_zicbom(void *arg)
+{
+ ksft_print_msg("Testing Zicbom instructions remain privileged\n");
+
+ illegal_insn = false;
+ cbo_clean(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.clean\n");
+
+ illegal_insn = false;
+ cbo_flush(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.flush\n");
+
+ illegal_insn = false;
+ cbo_inval(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.inval\n");
+}
+
+static void test_no_zicboz(void *arg)
+{
+ ksft_print_msg("No Zicboz, testing cbo.zero remains privileged\n");
+
+ illegal_insn = false;
+ cbo_zero(&mem[0]);
+ ksft_test_result(illegal_insn, "No cbo.zero\n");
+}
+
+static bool is_power_of_2(__u64 n)
+{
+ return n != 0 && (n & (n - 1)) == 0;
+}
+
+static void test_zicboz(void *arg)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE,
+ };
+ cpu_set_t *cpus = (cpu_set_t *)arg;
+ __u64 block_size;
+ int i, j;
+ long rc;
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)cpus, 0);
+ block_size = pair.value;
+ ksft_test_result(rc == 0 && pair.key == RISCV_HWPROBE_KEY_ZICBOZ_BLOCK_SIZE &&
+ is_power_of_2(block_size), "Zicboz block size\n");
+ ksft_print_msg("Zicboz block size: %llu\n", block_size);
+
+ illegal_insn = false;
+ cbo_zero(&mem[block_size]);
+ ksft_test_result(!illegal_insn, "cbo.zero\n");
+
+ if (illegal_insn || !is_power_of_2(block_size)) {
+ ksft_test_result_skip("cbo.zero check\n");
+ return;
+ }
+
+ assert(block_size <= 1024);
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ if (i % 2)
+ cbo_zero(&mem[i * block_size]);
+ }
+
+ for (i = 0; i < 4096 / block_size; ++i) {
+ char expected = i % 2 ? 0x0 : 0xa5;
+
+ for (j = 0; j < block_size; ++j) {
+ if (mem[i * block_size + j] != expected) {
+ ksft_test_result_fail("cbo.zero check\n");
+ ksft_print_msg("cbo.zero check: mem[%llu] != 0x%x\n",
+ i * block_size + j, expected);
+ return;
+ }
+ }
+ }
+
+ ksft_test_result_pass("cbo.zero check\n");
+}
+
+static void check_no_zicboz_cpus(cpu_set_t *cpus)
+{
+ struct riscv_hwprobe pair = {
+ .key = RISCV_HWPROBE_KEY_IMA_EXT_0,
+ };
+ cpu_set_t one_cpu;
+ int i = 0, c = 0;
+ long rc;
+
+ while (i++ < CPU_COUNT(cpus)) {
+ while (!CPU_ISSET(c, cpus))
+ ++c;
+
+ CPU_ZERO(&one_cpu);
+ CPU_SET(c, &one_cpu);
+
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&one_cpu, 0);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ)
+ ksft_exit_fail_msg("Zicboz is only present on a subset of harts.\n"
+ "Use taskset to select a set of harts where Zicboz\n"
+ "presence (present or not) is consistent for each hart\n");
+ ++c;
+ }
+}
+
+enum {
+ TEST_ZICBOZ,
+ TEST_NO_ZICBOZ,
+ TEST_NO_ZICBOM,
+};
+
+static struct test_info {
+ bool enabled;
+ unsigned int nr_tests;
+ void (*test_fn)(void *arg);
+} tests[] = {
+ [TEST_ZICBOZ] = { .nr_tests = 3, test_zicboz },
+ [TEST_NO_ZICBOZ] = { .nr_tests = 1, test_no_zicboz },
+ [TEST_NO_ZICBOM] = { .nr_tests = 3, test_no_zicbom },
+};
+
+int main(int argc, char **argv)
+{
+ struct sigaction act = {
+ .sa_sigaction = &sigill_handler,
+ .sa_flags = SA_SIGINFO,
+ };
+ struct riscv_hwprobe pair;
+ unsigned int plan = 0;
+ cpu_set_t cpus;
+ long rc;
+ int i;
+
+ if (argc > 1 && !strcmp(argv[1], "--sigill")) {
+ rc = sigaction(SIGILL, &act, NULL);
+ assert(rc == 0);
+ tests[TEST_NO_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOM].enabled = true;
+ }
+
+ rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus);
+ assert(rc == 0);
+
+ ksft_print_header();
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ rc = riscv_hwprobe(&pair, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, 0);
+ if (rc < 0)
+ ksft_exit_fail_msg("hwprobe() failed with %ld\n", rc);
+ assert(rc == 0 && pair.key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+
+ if (pair.value & RISCV_HWPROBE_EXT_ZICBOZ) {
+ tests[TEST_ZICBOZ].enabled = true;
+ tests[TEST_NO_ZICBOZ].enabled = false;
+ } else {
+ check_no_zicboz_cpus(&cpus);
+ }
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i)
+ plan += tests[i].enabled ? tests[i].nr_tests : 0;
+
+ if (plan == 0)
+ ksft_print_msg("No tests enabled.\n");
+ else
+ ksft_set_plan(plan);
+
+ for (i = 0; i < ARRAY_SIZE(tests); ++i) {
+ if (tests[i].enabled)
+ tests[i].test_fn(&cpus);
+ }
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.c b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
new file mode 100644
index 000000000000..fd73c87804f3
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+int main(int argc, char **argv)
+{
+ struct riscv_hwprobe pairs[8];
+ unsigned long cpus;
+ long out;
+
+ ksft_print_header();
+ ksft_set_plan(5);
+
+ /* Fake the CPU_SET ops. */
+ cpus = -1;
+
+ /*
+ * Just run a basic test: pass enough pairs to get up to the base
+ * behavior, and then check to make sure it's sane.
+ */
+ for (long i = 0; i < 8; i++)
+ pairs[i].key = i;
+
+ out = riscv_hwprobe(pairs, 8, 1, &cpus, 0);
+ if (out != 0)
+ ksft_exit_fail_msg("hwprobe() failed with %ld\n", out);
+
+ for (long i = 0; i < 4; ++i) {
+ /* Fail if the kernel claims not to recognize a base key. */
+ if ((i < 4) && (pairs[i].key != i))
+ ksft_exit_fail_msg("Failed to recognize base key: key != i, "
+ "key=%lld, i=%ld\n", pairs[i].key, i);
+
+ if (pairs[i].key != RISCV_HWPROBE_KEY_BASE_BEHAVIOR)
+ continue;
+
+ if (pairs[i].value & RISCV_HWPROBE_BASE_BEHAVIOR_IMA)
+ continue;
+
+ ksft_exit_fail_msg("Unexpected pair: (%lld, %llu)\n", pairs[i].key, pairs[i].value);
+ }
+
+ out = riscv_hwprobe(pairs, 8, 0, 0, 0);
+ ksft_test_result(out == 0, "NULL CPU set\n");
+
+ out = riscv_hwprobe(pairs, 8, 0, &cpus, 0);
+ ksft_test_result(out != 0, "Bad CPU set\n");
+
+ out = riscv_hwprobe(pairs, 8, 1, 0, 0);
+ ksft_test_result(out != 0, "NULL CPU set with non-zero size\n");
+
+ pairs[0].key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR;
+ out = riscv_hwprobe(pairs, 1, 1, &cpus, 0);
+ ksft_test_result(out == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR,
+ "Existing key is maintained\n");
+
+ pairs[0].key = 0x5555;
+ pairs[1].key = 1;
+ pairs[1].value = 0xAAAA;
+ out = riscv_hwprobe(pairs, 2, 0, 0, 0);
+ ksft_test_result(out == 0 && pairs[0].key == -1 &&
+ pairs[1].key == 1 && pairs[1].value != 0xAAAA,
+ "Unknown key overwritten with -1 and doesn't block other elements\n");
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/hwprobe/hwprobe.h b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
new file mode 100644
index 000000000000..e3fccb390c4d
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/hwprobe.h
@@ -0,0 +1,15 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef SELFTEST_RISCV_HWPROBE_H
+#define SELFTEST_RISCV_HWPROBE_H
+#include <stddef.h>
+#include <asm/hwprobe.h>
+
+/*
+ * Rather than relying on having a new enough libc to define this, just do it
+ * ourselves. This way we don't need to be coupled to a new-enough libc to
+ * contain the call.
+ */
+long riscv_hwprobe(struct riscv_hwprobe *pairs, size_t pair_count,
+ size_t cpusetsize, unsigned long *cpus, unsigned int flags);
+
+#endif
diff --git a/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S b/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S
new file mode 100644
index 000000000000..a4773c88d267
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/sys_hwprobe.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2023 Rivos, Inc */
+
+.text
+.global riscv_hwprobe
+riscv_hwprobe:
+ # Put __NR_riscv_hwprobe in the syscall number register, then just shim
+ # back the kernel's return. This doesn't do any sort of errno
+ # handling, the caller can deal with it.
+ li a7, 258
+ ecall
+ ret
diff --git a/tools/testing/selftests/riscv/hwprobe/which-cpus.c b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
new file mode 100644
index 000000000000..82c121412dfc
--- /dev/null
+++ b/tools/testing/selftests/riscv/hwprobe/which-cpus.c
@@ -0,0 +1,154 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2023 Ventana Micro Systems Inc.
+ *
+ * Test the RISCV_HWPROBE_WHICH_CPUS flag of hwprobe. Also provides a command
+ * line interface to get the cpu list for arbitrary hwprobe pairs.
+ */
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sched.h>
+#include <unistd.h>
+#include <assert.h>
+
+#include "hwprobe.h"
+#include "../../kselftest.h"
+
+static void help(void)
+{
+ printf("\n"
+ "which-cpus: [-h] [<key=value> [<key=value> ...]]\n\n"
+ " Without parameters, tests the RISCV_HWPROBE_WHICH_CPUS flag of hwprobe.\n"
+ " With parameters, where each parameter is a hwprobe pair written as\n"
+ " <key=value>, outputs the cpulist for cpus which all match the given set\n"
+ " of pairs. 'key' and 'value' should be in numeric form, e.g. 4=0x3b\n");
+}
+
+static void print_cpulist(cpu_set_t *cpus)
+{
+ int start = 0, end = 0;
+
+ if (!CPU_COUNT(cpus)) {
+ printf("cpus: None\n");
+ return;
+ }
+
+ printf("cpus:");
+ for (int i = 0, c = 0; i < CPU_COUNT(cpus); i++, c++) {
+ if (start != end && !CPU_ISSET(c, cpus))
+ printf("-%d", end);
+
+ while (!CPU_ISSET(c, cpus))
+ ++c;
+
+ if (i != 0 && c == end + 1) {
+ end = c;
+ continue;
+ }
+
+ printf("%c%d", i == 0 ? ' ' : ',', c);
+ start = end = c;
+ }
+ if (start != end)
+ printf("-%d", end);
+ printf("\n");
+}
+
+static void do_which_cpus(int argc, char **argv, cpu_set_t *cpus)
+{
+ struct riscv_hwprobe *pairs;
+ int nr_pairs = argc - 1;
+ char *start, *end;
+ int rc;
+
+ pairs = malloc(nr_pairs * sizeof(struct riscv_hwprobe));
+ assert(pairs);
+
+ for (int i = 0; i < nr_pairs; i++) {
+ start = argv[i + 1];
+ pairs[i].key = strtol(start, &end, 0);
+ assert(end != start && *end == '=');
+ start = end + 1;
+ pairs[i].value = strtoul(start, &end, 0);
+ assert(end != start && *end == '\0');
+ }
+
+ rc = riscv_hwprobe(pairs, nr_pairs, sizeof(cpu_set_t), (unsigned long *)cpus, RISCV_HWPROBE_WHICH_CPUS);
+ assert(rc == 0);
+ print_cpulist(cpus);
+ free(pairs);
+}
+
+int main(int argc, char **argv)
+{
+ struct riscv_hwprobe pairs[2];
+ cpu_set_t cpus_aff, cpus;
+ __u64 ext0_all;
+ long rc;
+
+ rc = sched_getaffinity(0, sizeof(cpu_set_t), &cpus_aff);
+ assert(rc == 0);
+
+ if (argc > 1) {
+ if (!strcmp(argv[1], "-h"))
+ help();
+ else
+ do_which_cpus(argc, argv, &cpus_aff);
+ return 0;
+ }
+
+ ksft_print_header();
+ ksft_set_plan(7);
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, };
+ rc = riscv_hwprobe(pairs, 1, 0, NULL, 0);
+ assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_BASE_BEHAVIOR &&
+ pairs[0].value == RISCV_HWPROBE_BASE_BEHAVIOR_IMA);
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, };
+ rc = riscv_hwprobe(pairs, 1, 0, NULL, 0);
+ assert(rc == 0 && pairs[0].key == RISCV_HWPROBE_KEY_IMA_EXT_0);
+ ext0_all = pairs[0].value;
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ CPU_ZERO(&cpus);
+ rc = riscv_hwprobe(pairs, 1, 0, (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == -EINVAL, "no cpusetsize\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ rc = riscv_hwprobe(pairs, 1, sizeof(cpu_set_t), NULL, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == -EINVAL, "NULL cpus\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = 0xbadc0de, };
+ CPU_ZERO(&cpus);
+ rc = riscv_hwprobe(pairs, 1, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == 0, "unknown key\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ CPU_ZERO(&cpus);
+ rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == 0, "duplicate keys\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, };
+ CPU_ZERO(&cpus);
+ rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == sysconf(_SC_NPROCESSORS_ONLN), "set all cpus\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ext0_all, };
+ memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t));
+ rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == 0 && CPU_EQUAL(&cpus, &cpus_aff), "set all affinity cpus\n");
+
+ pairs[0] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_BASE_BEHAVIOR, .value = RISCV_HWPROBE_BASE_BEHAVIOR_IMA, };
+ pairs[1] = (struct riscv_hwprobe){ .key = RISCV_HWPROBE_KEY_IMA_EXT_0, .value = ~ext0_all, };
+ memcpy(&cpus, &cpus_aff, sizeof(cpu_set_t));
+ rc = riscv_hwprobe(pairs, 2, sizeof(cpu_set_t), (unsigned long *)&cpus, RISCV_HWPROBE_WHICH_CPUS);
+ ksft_test_result(rc == 0 && CPU_COUNT(&cpus) == 0, "clear all cpus\n");
+
+ ksft_finished();
+}
diff --git a/tools/testing/selftests/riscv/mm/.gitignore b/tools/testing/selftests/riscv/mm/.gitignore
new file mode 100644
index 000000000000..5c2c57cb950c
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/.gitignore
@@ -0,0 +1,2 @@
+mmap_bottomup
+mmap_default
diff --git a/tools/testing/selftests/riscv/mm/Makefile b/tools/testing/selftests/riscv/mm/Makefile
new file mode 100644
index 000000000000..c333263f2b27
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/Makefile
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+# Additional include paths needed by kselftest.h and local headers
+CFLAGS += -D_GNU_SOURCE -std=gnu99 -I.
+
+TEST_GEN_FILES := mmap_default mmap_bottomup
+
+TEST_PROGS := run_mmap.sh
+
+include ../../lib.mk
+
+$(OUTPUT)/mm: mmap_default.c mmap_bottomup.c mmap_tests.h
+ $(CC) -o$@ $(CFLAGS) $(LDFLAGS) $^
diff --git a/tools/testing/selftests/riscv/mm/mmap_bottomup.c b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
new file mode 100644
index 000000000000..1757d19ca89b
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/mmap_bottomup.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/mman.h>
+#include <mmap_test.h>
+
+#include "../../kselftest_harness.h"
+
+TEST(infinite_rlimit)
+{
+// Only works on 64 bit
+#if __riscv_xlen == 64
+ struct addresses mmap_addresses;
+
+ EXPECT_EQ(BOTTOM_UP, memory_layout());
+
+ do_mmaps(&mmap_addresses);
+
+ EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
+
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
+ EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
+#endif
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_default.c b/tools/testing/selftests/riscv/mm/mmap_default.c
new file mode 100644
index 000000000000..c63c60b9397e
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/mmap_default.c
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/mman.h>
+#include <mmap_test.h>
+
+#include "../../kselftest_harness.h"
+
+TEST(default_rlimit)
+{
+// Only works on 64 bit
+#if __riscv_xlen == 64
+ struct addresses mmap_addresses;
+
+ EXPECT_EQ(TOP_DOWN, memory_layout());
+
+ do_mmaps(&mmap_addresses);
+
+ EXPECT_NE(MAP_FAILED, mmap_addresses.no_hint);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_37_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_38_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_46_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_47_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_55_addr);
+ EXPECT_NE(MAP_FAILED, mmap_addresses.on_56_addr);
+
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.no_hint);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_37_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_38_addr);
+ EXPECT_GT(1UL << 38, (unsigned long)mmap_addresses.on_46_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_47_addr);
+ EXPECT_GT(1UL << 47, (unsigned long)mmap_addresses.on_55_addr);
+ EXPECT_GT(1UL << 56, (unsigned long)mmap_addresses.on_56_addr);
+#endif
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/riscv/mm/mmap_test.h b/tools/testing/selftests/riscv/mm/mmap_test.h
new file mode 100644
index 000000000000..2e0db9c5be6c
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/mmap_test.h
@@ -0,0 +1,67 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+#ifndef _TESTCASES_MMAP_TEST_H
+#define _TESTCASES_MMAP_TEST_H
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <stddef.h>
+
+#define TOP_DOWN 0
+#define BOTTOM_UP 1
+
+struct addresses {
+ int *no_hint;
+ int *on_37_addr;
+ int *on_38_addr;
+ int *on_46_addr;
+ int *on_47_addr;
+ int *on_55_addr;
+ int *on_56_addr;
+};
+
+// Only works on 64 bit
+#if __riscv_xlen == 64
+static inline void do_mmaps(struct addresses *mmap_addresses)
+{
+ /*
+ * Place all of the hint addresses on the boundaries of mmap
+ * sv39, sv48, sv57
+ * User addresses end at 1<<38, 1<<47, 1<<56 respectively
+ */
+ void *on_37_bits = (void *)(1UL << 37);
+ void *on_38_bits = (void *)(1UL << 38);
+ void *on_46_bits = (void *)(1UL << 46);
+ void *on_47_bits = (void *)(1UL << 47);
+ void *on_55_bits = (void *)(1UL << 55);
+ void *on_56_bits = (void *)(1UL << 56);
+
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ mmap_addresses->no_hint =
+ mmap(NULL, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_37_addr =
+ mmap(on_37_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_38_addr =
+ mmap(on_38_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_46_addr =
+ mmap(on_46_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_47_addr =
+ mmap(on_47_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_55_addr =
+ mmap(on_55_bits, 5 * sizeof(int), prot, flags, 0, 0);
+ mmap_addresses->on_56_addr =
+ mmap(on_56_bits, 5 * sizeof(int), prot, flags, 0, 0);
+}
+#endif /* __riscv_xlen == 64 */
+
+static inline int memory_layout(void)
+{
+ int prot = PROT_READ | PROT_WRITE;
+ int flags = MAP_PRIVATE | MAP_ANONYMOUS;
+
+ void *value1 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
+ void *value2 = mmap(NULL, sizeof(int), prot, flags, 0, 0);
+
+ return value2 > value1;
+}
+#endif /* _TESTCASES_MMAP_TEST_H */
diff --git a/tools/testing/selftests/riscv/mm/run_mmap.sh b/tools/testing/selftests/riscv/mm/run_mmap.sh
new file mode 100755
index 000000000000..ca5ad7c48bad
--- /dev/null
+++ b/tools/testing/selftests/riscv/mm/run_mmap.sh
@@ -0,0 +1,12 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+original_stack_limit=$(ulimit -s)
+
+./mmap_default
+
+# Force mmap_bottomup to be ran with bottomup memory due to
+# the unlimited stack
+ulimit -s unlimited
+./mmap_bottomup
+ulimit -s $original_stack_limit
diff --git a/tools/testing/selftests/riscv/vector/.gitignore b/tools/testing/selftests/riscv/vector/.gitignore
new file mode 100644
index 000000000000..9ae7964491d5
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/.gitignore
@@ -0,0 +1,3 @@
+vstate_exec_nolibc
+vstate_prctl
+v_initval_nolibc
diff --git a/tools/testing/selftests/riscv/vector/Makefile b/tools/testing/selftests/riscv/vector/Makefile
new file mode 100644
index 000000000000..bfff0ff4f3be
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/Makefile
@@ -0,0 +1,19 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2021 ARM Limited
+# Originally tools/testing/arm64/abi/Makefile
+
+TEST_GEN_PROGS := vstate_prctl v_initval_nolibc
+TEST_GEN_PROGS_EXTENDED := vstate_exec_nolibc
+
+include ../../lib.mk
+
+$(OUTPUT)/vstate_prctl: vstate_prctl.c ../hwprobe/sys_hwprobe.S
+ $(CC) -static -o$@ $(CFLAGS) $(LDFLAGS) $^
+
+$(OUTPUT)/vstate_exec_nolibc: vstate_exec_nolibc.c
+ $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
+ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
+
+$(OUTPUT)/v_initval_nolibc: v_initval_nolibc.c
+ $(CC) -nostdlib -static -include ../../../../include/nolibc/nolibc.h \
+ -Wall $(CFLAGS) $(LDFLAGS) $^ -o $@ -lgcc
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
new file mode 100644
index 000000000000..1dd94197da30
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
@@ -0,0 +1,68 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include "../../kselftest.h"
+#define MAX_VSIZE (8192 * 32)
+
+void dump(char *ptr, int size)
+{
+ int i = 0;
+
+ for (i = 0; i < size; i++) {
+ if (i != 0) {
+ if (i % 16 == 0)
+ printf("\n");
+ else if (i % 8 == 0)
+ printf(" ");
+ }
+ printf("%02x ", ptr[i]);
+ }
+ printf("\n");
+}
+
+int main(void)
+{
+ int i;
+ unsigned long vl;
+ char *datap, *tmp;
+
+ datap = malloc(MAX_VSIZE);
+ if (!datap) {
+ ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
+ exit(-1);
+ }
+
+ tmp = datap;
+ asm volatile (
+ ".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli %0, x0, e8, m8, ta, ma\n\t"
+ "vse8.v v0, (%2)\n\t"
+ "add %1, %2, %0\n\t"
+ "vse8.v v8, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v16, (%1)\n\t"
+ "add %1, %1, %0\n\t"
+ "vse8.v v24, (%1)\n\t"
+ ".option pop\n\t"
+ : "=&r" (vl), "=r" (tmp) : "r" (datap) : "memory");
+
+ ksft_print_msg("vl = %lu\n", vl);
+
+ if (datap[0] != 0x00 && datap[0] != 0xff) {
+ ksft_test_result_fail("v-regesters are not properly initialized\n");
+ dump(datap, vl * 4);
+ exit(-1);
+ }
+
+ for (i = 1; i < vl * 4; i++) {
+ if (datap[i] != datap[0]) {
+ ksft_test_result_fail("detect stale values on v-regesters\n");
+ dump(datap, vl * 4);
+ exit(-2);
+ }
+ }
+
+ free(datap);
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
new file mode 100644
index 000000000000..1f9969bed235
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_exec_nolibc.c
@@ -0,0 +1,112 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/wait.h>
+
+#define THIS_PROGRAM "./vstate_exec_nolibc"
+
+int main(int argc, char **argv)
+{
+ int rc, pid, status, test_inherit = 0;
+ long ctrl, ctrl_c;
+ char *exec_argv[2], *exec_envp[2];
+
+ if (argc > 1)
+ test_inherit = 1;
+
+ ctrl = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
+ if (ctrl < 0) {
+ puts("PR_RISCV_V_GET_CONTROL is not supported\n");
+ return ctrl;
+ }
+
+ if (test_inherit) {
+ pid = fork();
+ if (pid == -1) {
+ puts("fork failed\n");
+ exit(-1);
+ }
+
+ /* child */
+ if (!pid) {
+ exec_argv[0] = THIS_PROGRAM;
+ exec_argv[1] = NULL;
+ exec_envp[0] = NULL;
+ exec_envp[1] = NULL;
+ /* launch the program again to check inherit */
+ rc = execve(THIS_PROGRAM, exec_argv, exec_envp);
+ if (rc) {
+ puts("child execve failed\n");
+ exit(-1);
+ }
+ }
+
+ } else {
+ pid = fork();
+ if (pid == -1) {
+ puts("fork failed\n");
+ exit(-1);
+ }
+
+ if (!pid) {
+ rc = my_syscall1(__NR_prctl, PR_RISCV_V_GET_CONTROL);
+ if (rc != ctrl) {
+ puts("child's vstate_ctrl not equal to parent's\n");
+ exit(-1);
+ }
+ asm volatile (".option push\n\t"
+ ".option arch, +v\n\t"
+ "vsetvli x0, x0, e32, m8, ta, ma\n\t"
+ ".option pop\n\t"
+ );
+ exit(ctrl);
+ }
+ }
+
+ rc = waitpid(-1, &status, 0);
+
+ if (WIFEXITED(status) && WEXITSTATUS(status) == -1) {
+ puts("child exited abnormally\n");
+ exit(-1);
+ }
+
+ if (WIFSIGNALED(status)) {
+ if (WTERMSIG(status) != SIGILL) {
+ puts("child was terminated by unexpected signal\n");
+ exit(-1);
+ }
+
+ if ((ctrl & PR_RISCV_V_VSTATE_CTRL_CUR_MASK) != PR_RISCV_V_VSTATE_CTRL_OFF) {
+ puts("child signaled by illegal V access but vstate_ctrl is not off\n");
+ exit(-1);
+ }
+
+ /* child terminated, and its vstate_ctrl is off */
+ exit(ctrl);
+ }
+
+ ctrl_c = WEXITSTATUS(status);
+ if (test_inherit) {
+ if (ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT) {
+ if (!(ctrl_c & PR_RISCV_V_VSTATE_CTRL_INHERIT)) {
+ puts("parent has inherit bit, but child has not\n");
+ exit(-1);
+ }
+ }
+ rc = (ctrl & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) >> 2;
+ if (rc != PR_RISCV_V_VSTATE_CTRL_DEFAULT) {
+ if (rc != (ctrl_c & PR_RISCV_V_VSTATE_CTRL_CUR_MASK)) {
+ puts("parent's next setting does not equal to child's\n");
+ exit(-1);
+ }
+
+ if (!(ctrl & PR_RISCV_V_VSTATE_CTRL_INHERIT)) {
+ if ((ctrl_c & PR_RISCV_V_VSTATE_CTRL_NEXT_MASK) !=
+ PR_RISCV_V_VSTATE_CTRL_DEFAULT) {
+ puts("must clear child's next vstate_ctrl if !inherit\n");
+ exit(-1);
+ }
+ }
+ }
+ }
+ return ctrl;
+}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
new file mode 100644
index 000000000000..27668fb3b6d0
--- /dev/null
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -0,0 +1,181 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <errno.h>
+#include <sys/wait.h>
+
+#include "../hwprobe/hwprobe.h"
+#include "../../kselftest.h"
+
+#define NEXT_PROGRAM "./vstate_exec_nolibc"
+static int launch_test(int test_inherit)
+{
+ char *exec_argv[3], *exec_envp[1];
+ int rc, pid, status;
+
+ pid = fork();
+ if (pid < 0) {
+ ksft_test_result_fail("fork failed %d", pid);
+ return -1;
+ }
+
+ if (!pid) {
+ exec_argv[0] = NEXT_PROGRAM;
+ exec_argv[1] = test_inherit != 0 ? "x" : NULL;
+ exec_argv[2] = NULL;
+ exec_envp[0] = NULL;
+ /* launch the program again to check inherit */
+ rc = execve(NEXT_PROGRAM, exec_argv, exec_envp);
+ if (rc) {
+ perror("execve");
+ ksft_test_result_fail("child execve failed %d\n", rc);
+ exit(-1);
+ }
+ }
+
+ rc = waitpid(-1, &status, 0);
+ if (rc < 0) {
+ ksft_test_result_fail("waitpid failed\n");
+ return -3;
+ }
+
+ if ((WIFEXITED(status) && WEXITSTATUS(status) == -1) ||
+ WIFSIGNALED(status)) {
+ ksft_test_result_fail("child exited abnormally\n");
+ return -4;
+ }
+
+ return WEXITSTATUS(status);
+}
+
+int test_and_compare_child(long provided, long expected, int inherit)
+{
+ int rc;
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, provided);
+ if (rc != 0) {
+ ksft_test_result_fail("prctl with provided arg %lx failed with code %d\n",
+ provided, rc);
+ return -1;
+ }
+ rc = launch_test(inherit);
+ if (rc != expected) {
+ ksft_test_result_fail("Test failed, check %d != %ld\n", rc,
+ expected);
+ return -2;
+ }
+ return 0;
+}
+
+#define PR_RISCV_V_VSTATE_CTRL_CUR_SHIFT 0
+#define PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT 2
+
+int main(void)
+{
+ struct riscv_hwprobe pair;
+ long flag, expected;
+ long rc;
+
+ pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
+ rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
+ if (rc < 0) {
+ ksft_test_result_fail("hwprobe() failed with %ld\n", rc);
+ return -1;
+ }
+
+ if (pair.key != RISCV_HWPROBE_KEY_IMA_EXT_0) {
+ ksft_test_result_fail("hwprobe cannot probe RISCV_HWPROBE_KEY_IMA_EXT_0\n");
+ return -2;
+ }
+
+ if (!(pair.value & RISCV_HWPROBE_IMA_V)) {
+ rc = prctl(PR_RISCV_V_GET_CONTROL);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ return -3;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, PR_RISCV_V_VSTATE_CTRL_ON);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("GET_CONTROL should fail on kernel/hw without V\n");
+ return -4;
+ }
+
+ ksft_test_result_skip("Vector not supported\n");
+ return 0;
+ }
+
+ flag = PR_RISCV_V_VSTATE_CTRL_ON;
+ rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
+ if (rc != 0) {
+ ksft_test_result_fail("Enabling V for current should always success\n");
+ return -5;
+ }
+
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF;
+ rc = prctl(PR_RISCV_V_SET_CONTROL, flag);
+ if (rc != -1 || errno != EPERM) {
+ ksft_test_result_fail("Disabling current's V alive must fail with EPERM(%d)\n",
+ errno);
+ return -5;
+ }
+
+ /* Turn on next's vector explicitly and test */
+ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_ON, 0))
+ return -6;
+
+ /* Turn off next's vector explicitly and test */
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ if (test_and_compare_child(flag, PR_RISCV_V_VSTATE_CTRL_OFF, 0))
+ return -7;
+
+ /* Turn on next's vector explicitly and test inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_ON << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_ON;
+ if (test_and_compare_child(flag, expected, 0))
+ return -8;
+
+ if (test_and_compare_child(flag, expected, 1))
+ return -9;
+
+ /* Turn off next's vector explicitly and test inherit */
+ flag = PR_RISCV_V_VSTATE_CTRL_OFF << PR_RISCV_V_VSTATE_CTRL_NEXT_SHIFT;
+ flag |= PR_RISCV_V_VSTATE_CTRL_INHERIT;
+ expected = flag | PR_RISCV_V_VSTATE_CTRL_OFF;
+ if (test_and_compare_child(flag, expected, 0))
+ return -10;
+
+ if (test_and_compare_child(flag, expected, 1))
+ return -11;
+
+ /* arguments should fail with EINVAL */
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xff0);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0x3);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ rc = prctl(PR_RISCV_V_SET_CONTROL, 0xc);
+ if (rc != -1 || errno != EINVAL) {
+ ksft_test_result_fail("Undefined control argument should return EINVAL\n");
+ return -12;
+ }
+
+ ksft_test_result_pass("tests for riscv_v_vstate_ctrl pass\n");
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/rlimits/.gitignore b/tools/testing/selftests/rlimits/.gitignore
new file mode 100644
index 000000000000..091021f255b3
--- /dev/null
+++ b/tools/testing/selftests/rlimits/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+rlimits-per-userns
diff --git a/tools/testing/selftests/rlimits/Makefile b/tools/testing/selftests/rlimits/Makefile
new file mode 100644
index 000000000000..03aadb406212
--- /dev/null
+++ b/tools/testing/selftests/rlimits/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+
+CFLAGS += -Wall -O2 -g
+TEST_GEN_PROGS := rlimits-per-userns
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rlimits/config b/tools/testing/selftests/rlimits/config
new file mode 100644
index 000000000000..416bd53ce982
--- /dev/null
+++ b/tools/testing/selftests/rlimits/config
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/rlimits/rlimits-per-userns.c b/tools/testing/selftests/rlimits/rlimits-per-userns.c
new file mode 100644
index 000000000000..26dc949e93ea
--- /dev/null
+++ b/tools/testing/selftests/rlimits/rlimits-per-userns.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Author: Alexey Gladkov <gladkov.alexey@gmail.com>
+ */
+#define _GNU_SOURCE
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <sys/time.h>
+#include <sys/resource.h>
+#include <sys/prctl.h>
+#include <sys/stat.h>
+
+#include <unistd.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <limits.h>
+#include <fcntl.h>
+#include <errno.h>
+#include <err.h>
+
+#define NR_CHILDS 2
+
+static char *service_prog;
+static uid_t user = 60000;
+static uid_t group = 60000;
+
+static void setrlimit_nproc(rlim_t n)
+{
+ pid_t pid = getpid();
+ struct rlimit limit = {
+ .rlim_cur = n,
+ .rlim_max = n
+ };
+
+ warnx("(pid=%d): Setting RLIMIT_NPROC=%ld", pid, n);
+
+ if (setrlimit(RLIMIT_NPROC, &limit) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setrlimit(RLIMIT_NPROC)", pid);
+}
+
+static pid_t fork_child(void)
+{
+ pid_t pid = fork();
+
+ if (pid < 0)
+ err(EXIT_FAILURE, "fork");
+
+ if (pid > 0)
+ return pid;
+
+ pid = getpid();
+
+ warnx("(pid=%d): New process starting ...", pid);
+
+ if (prctl(PR_SET_PDEATHSIG, SIGKILL) < 0)
+ err(EXIT_FAILURE, "(pid=%d): prctl(PR_SET_PDEATHSIG)", pid);
+
+ signal(SIGUSR1, SIG_DFL);
+
+ warnx("(pid=%d): Changing to uid=%d, gid=%d", pid, user, group);
+
+ if (setgid(group) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setgid(%d)", pid, group);
+ if (setuid(user) < 0)
+ err(EXIT_FAILURE, "(pid=%d): setuid(%d)", pid, user);
+
+ warnx("(pid=%d): Service running ...", pid);
+
+ warnx("(pid=%d): Unshare user namespace", pid);
+ if (unshare(CLONE_NEWUSER) < 0)
+ err(EXIT_FAILURE, "unshare(CLONE_NEWUSER)");
+
+ char *const argv[] = { "service", NULL };
+ char *const envp[] = { "I_AM_SERVICE=1", NULL };
+
+ warnx("(pid=%d): Executing real service ...", pid);
+
+ execve(service_prog, argv, envp);
+ err(EXIT_FAILURE, "(pid=%d): execve", pid);
+}
+
+int main(int argc, char **argv)
+{
+ size_t i;
+ pid_t child[NR_CHILDS];
+ int wstatus[NR_CHILDS];
+ int childs = NR_CHILDS;
+ pid_t pid;
+
+ if (getenv("I_AM_SERVICE")) {
+ pause();
+ exit(EXIT_SUCCESS);
+ }
+
+ service_prog = argv[0];
+ pid = getpid();
+
+ warnx("(pid=%d) Starting testcase", pid);
+
+ /*
+ * This rlimit is not a problem for root because it can be exceeded.
+ */
+ setrlimit_nproc(1);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ child[i] = fork_child();
+ wstatus[i] = 0;
+ usleep(250000);
+ }
+
+ while (1) {
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+
+ errno = 0;
+ pid_t ret = waitpid(child[i], &wstatus[i], WNOHANG);
+
+ if (!ret || (!WIFEXITED(wstatus[i]) && !WIFSIGNALED(wstatus[i])))
+ continue;
+
+ if (ret < 0 && errno != ECHILD)
+ warn("(pid=%d): waitpid(%d)", pid, child[i]);
+
+ child[i] *= -1;
+ childs -= 1;
+ }
+
+ if (!childs)
+ break;
+
+ usleep(250000);
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (child[i] <= 0)
+ continue;
+ kill(child[i], SIGUSR1);
+ }
+ }
+
+ for (i = 0; i < NR_CHILDS; i++) {
+ if (WIFEXITED(wstatus[i]))
+ warnx("(pid=%d): pid %d exited, status=%d",
+ pid, -child[i], WEXITSTATUS(wstatus[i]));
+ else if (WIFSIGNALED(wstatus[i]))
+ warnx("(pid=%d): pid %d killed by signal %d",
+ pid, -child[i], WTERMSIG(wstatus[i]));
+
+ if (WIFSIGNALED(wstatus[i]) && WTERMSIG(wstatus[i]) == SIGUSR1)
+ continue;
+
+ warnx("(pid=%d): Test failed", pid);
+ exit(EXIT_FAILURE);
+ }
+
+ warnx("(pid=%d): Test passed", pid);
+ exit(EXIT_SUCCESS);
+}
diff --git a/tools/testing/selftests/rseq/.gitignore b/tools/testing/selftests/rseq/.gitignore
index 5910888ebfe1..16496de5f6ce 100644
--- a/tools/testing/selftests/rseq/.gitignore
+++ b/tools/testing/selftests/rseq/.gitignore
@@ -1,7 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
basic_percpu_ops_test
+basic_percpu_ops_mm_cid_test
basic_test
basic_rseq_op_test
param_test
param_test_benchmark
param_test_compare_twice
+param_test_mm_cid
+param_test_mm_cid_benchmark
+param_test_mm_cid_compare_twice
diff --git a/tools/testing/selftests/rseq/Makefile b/tools/testing/selftests/rseq/Makefile
index 2af9d39a9716..5a3432fceb58 100644
--- a/tools/testing/selftests/rseq/Makefile
+++ b/tools/testing/selftests/rseq/Makefile
@@ -4,16 +4,19 @@ ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
CLANG_FLAGS += -no-integrated-as
endif
-CFLAGS += -O2 -Wall -g -I./ -I../../../../usr/include/ -L$(OUTPUT) -Wl,-rpath=./ \
- $(CLANG_FLAGS)
-LDLIBS += -lpthread
+top_srcdir = ../../../..
+
+CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -L$(OUTPUT) -Wl,-rpath=./ \
+ $(CLANG_FLAGS) -I$(top_srcdir)/tools/include
+LDLIBS += -lpthread -ldl
# Own dependencies because we only want to build against 1st prerequisite, but
# still track changes to header files and depend on shared object.
OVERRIDE_TARGETS = 1
-TEST_GEN_PROGS = basic_test basic_percpu_ops_test param_test \
- param_test_benchmark param_test_compare_twice
+TEST_GEN_PROGS = basic_test basic_percpu_ops_test basic_percpu_ops_mm_cid_test param_test \
+ param_test_benchmark param_test_compare_twice param_test_mm_cid \
+ param_test_mm_cid_benchmark param_test_mm_cid_compare_twice
TEST_GEN_PROGS_EXTENDED = librseq.so
@@ -29,6 +32,9 @@ $(OUTPUT)/librseq.so: rseq.c rseq.h rseq-*.h
$(OUTPUT)/%: %.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
$(CC) $(CFLAGS) $< $(LDLIBS) -lrseq -o $@
+$(OUTPUT)/basic_percpu_ops_mm_cid_test: basic_percpu_ops_test.c $(TEST_GEN_PROGS_EXTENDED) rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID $< $(LDLIBS) -lrseq -o $@
+
$(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
@@ -36,3 +42,15 @@ $(OUTPUT)/param_test_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
$(OUTPUT)/param_test_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
rseq.h rseq-*.h
$(CC) $(CFLAGS) -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_mm_cid: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_mm_cid_benchmark: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DBENCHMARK $< $(LDLIBS) -lrseq -o $@
+
+$(OUTPUT)/param_test_mm_cid_compare_twice: param_test.c $(TEST_GEN_PROGS_EXTENDED) \
+ rseq.h rseq-*.h
+ $(CC) $(CFLAGS) -DBUILDOPT_RSEQ_PERCPU_MM_CID -DRSEQ_COMPARE_TWICE $< $(LDLIBS) -lrseq -o $@
diff --git a/tools/testing/selftests/rseq/basic_percpu_ops_test.c b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
index eb3f6db36d36..2348d2c20d0a 100644
--- a/tools/testing/selftests/rseq/basic_percpu_ops_test.c
+++ b/tools/testing/selftests/rseq/basic_percpu_ops_test.c
@@ -9,9 +9,44 @@
#include <string.h>
#include <stddef.h>
+#include "../kselftest.h"
#include "rseq.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
+# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_current_mm_cid();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_mm_cid_available();
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return false; /* Use mm_cid */
+}
+#else
+# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_cpu_start();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_current_cpu_raw() >= 0;
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return true; /* Use cpu_id as index. */
+}
+#endif
struct percpu_lock_entry {
intptr_t v;
@@ -52,9 +87,9 @@ int rseq_this_cpu_lock(struct percpu_lock *lock)
for (;;) {
int ret;
- cpu = rseq_cpu_start();
- ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
- 0, 1, cpu);
+ cpu = get_current_cpu_id();
+ ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &lock->c[cpu].v, 0, 1, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
@@ -142,13 +177,14 @@ void this_cpu_list_push(struct percpu_list *list,
intptr_t *targetptr, newval, expect;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
/* Load list->c[cpu].head with single-copy atomicity. */
expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
newval = (intptr_t)node;
targetptr = (intptr_t *)&list->c[cpu].head;
node->next = (struct percpu_list_node *)expect;
- ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expect, newval, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
@@ -168,15 +204,16 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
- off_t offset;
+ long offset;
int ret, cpu;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
targetptr = (intptr_t *)&list->c[cpu].head;
expectnot = (intptr_t)NULL;
offset = offsetof(struct percpu_list_node, next);
load = (intptr_t *)&head;
- ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
+ ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expectnot,
offset, load, cpu);
if (rseq_likely(!ret)) {
if (_cpu)
@@ -247,7 +284,7 @@ void test_percpu_list(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
struct percpu_list_node *node;
@@ -272,7 +309,7 @@ void test_percpu_list(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_list_pop(&list, i))) {
@@ -296,6 +333,10 @@ int main(int argc, char **argv)
errno, strerror(errno));
goto error;
}
+ if (!rseq_validate_cpu_id()) {
+ fprintf(stderr, "Error: cpu id getter unavailable\n");
+ goto error;
+ }
printf("spinlock\n");
test_percpu_spinlock();
printf("percpu_list\n");
diff --git a/tools/testing/selftests/rseq/basic_test.c b/tools/testing/selftests/rseq/basic_test.c
index d8efbfb89193..295eea16466f 100644
--- a/tools/testing/selftests/rseq/basic_test.c
+++ b/tools/testing/selftests/rseq/basic_test.c
@@ -22,6 +22,8 @@ void test_cpu_pointer(void)
CPU_ZERO(&test_affinity);
for (i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, &affinity)) {
+ int node;
+
CPU_SET(i, &test_affinity);
sched_setaffinity(0, sizeof(test_affinity),
&test_affinity);
@@ -29,6 +31,8 @@ void test_cpu_pointer(void)
assert(rseq_current_cpu() == i);
assert(rseq_current_cpu_raw() == i);
assert(rseq_cpu_start() == i);
+ node = rseq_fallback_current_node();
+ assert(rseq_current_node_id() == node);
CPU_CLR(i, &test_affinity);
}
}
diff --git a/tools/testing/selftests/rseq/compiler.h b/tools/testing/selftests/rseq/compiler.h
new file mode 100644
index 000000000000..49d62fbd6dda
--- /dev/null
+++ b/tools/testing/selftests/rseq/compiler.h
@@ -0,0 +1,62 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq/compiler.h
+ *
+ * Work-around asm goto compiler bugs.
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef RSEQ_COMPILER_H
+#define RSEQ_COMPILER_H
+
+/*
+ * gcc prior to 4.8.2 miscompiles asm goto.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * gcc prior to 8.1.0 miscompiles asm goto at O1.
+ * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=103908
+ *
+ * clang prior to version 13.0.1 miscompiles asm goto at O2.
+ * https://github.com/llvm/llvm-project/issues/52735
+ *
+ * Work around these issues by adding a volatile inline asm with
+ * memory clobber in the fallthrough after the asm goto and at each
+ * label target. Emit this for all compilers in case other similar
+ * issues are found in the future.
+ */
+#define rseq_after_asm_goto() asm volatile ("" : : : "memory")
+
+/* Combine two tokens. */
+#define RSEQ__COMBINE_TOKENS(_tokena, _tokenb) \
+ _tokena##_tokenb
+#define RSEQ_COMBINE_TOKENS(_tokena, _tokenb) \
+ RSEQ__COMBINE_TOKENS(_tokena, _tokenb)
+
+#ifdef __cplusplus
+#define rseq_unqual_scalar_typeof(x) \
+ std::remove_cv<std::remove_reference<decltype(x)>::type>::type
+#else
+#define rseq_scalar_type_to_expr(type) \
+ unsigned type: (unsigned type)0, \
+ signed type: (signed type)0
+
+/*
+ * Use C11 _Generic to express unqualified type from expression. This removes
+ * volatile qualifier from expression type.
+ */
+#define rseq_unqual_scalar_typeof(x) \
+ __typeof__( \
+ _Generic((x), \
+ char: (char)0, \
+ rseq_scalar_type_to_expr(char), \
+ rseq_scalar_type_to_expr(short), \
+ rseq_scalar_type_to_expr(int), \
+ rseq_scalar_type_to_expr(long), \
+ rseq_scalar_type_to_expr(long long), \
+ default: (x) \
+ ) \
+ )
+#endif
+
+#endif /* RSEQ_COMPILER_H_ */
diff --git a/tools/testing/selftests/rseq/param_test.c b/tools/testing/selftests/rseq/param_test.c
index e8a657a5f48a..2f37961240ca 100644
--- a/tools/testing/selftests/rseq/param_test.c
+++ b/tools/testing/selftests/rseq/param_test.c
@@ -1,8 +1,10 @@
// SPDX-License-Identifier: LGPL-2.1
#define _GNU_SOURCE
#include <assert.h>
+#include <linux/membarrier.h>
#include <pthread.h>
#include <sched.h>
+#include <stdatomic.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
@@ -14,6 +16,7 @@
#include <signal.h>
#include <errno.h>
#include <stddef.h>
+#include <stdbool.h>
static inline pid_t rseq_gettid(void)
{
@@ -34,13 +37,9 @@ static int opt_modulo, verbose;
static int opt_yield, opt_signal, opt_sleep,
opt_disable_rseq, opt_threads = 200,
- opt_disable_mod = 0, opt_test = 's', opt_mb = 0;
+ opt_disable_mod = 0, opt_test = 's';
-#ifndef RSEQ_SKIP_FASTPATH
static long long opt_reps = 5000;
-#else
-static long long opt_reps = 100;
-#endif
static __thread __attribute__((tls_model("initial-exec")))
unsigned int signals_delivered;
@@ -159,7 +158,7 @@ unsigned int yield_mod_cnt, nr_abort;
" cbnz " INJECT_ASM_REG ", 222b\n" \
"333:\n"
-#elif __PPC__
+#elif defined(__PPC__)
#define RSEQ_INJECT_INPUT \
, [loop_cnt_1]"m"(loop_cnt[1]) \
@@ -205,6 +204,29 @@ unsigned int yield_mod_cnt, nr_abort;
"addiu " INJECT_ASM_REG ", -1\n\t" \
"bnez " INJECT_ASM_REG ", 222b\n\t" \
"333:\n\t"
+#elif defined(__riscv)
+
+#define RSEQ_INJECT_INPUT \
+ , [loop_cnt_1]"m"(loop_cnt[1]) \
+ , [loop_cnt_2]"m"(loop_cnt[2]) \
+ , [loop_cnt_3]"m"(loop_cnt[3]) \
+ , [loop_cnt_4]"m"(loop_cnt[4]) \
+ , [loop_cnt_5]"m"(loop_cnt[5]) \
+ , [loop_cnt_6]"m"(loop_cnt[6])
+
+#define INJECT_ASM_REG "t1"
+
+#define RSEQ_INJECT_CLOBBER \
+ , INJECT_ASM_REG
+
+#define RSEQ_INJECT_ASM(n) \
+ "lw " INJECT_ASM_REG ", %[loop_cnt_" #n "]\n\t" \
+ "beqz " INJECT_ASM_REG ", 333f\n\t" \
+ "222:\n\t" \
+ "addi " INJECT_ASM_REG "," INJECT_ASM_REG ", -1\n\t" \
+ "bnez " INJECT_ASM_REG ", 222b\n\t" \
+ "333:\n\t"
+
#else
#error unsupported target
@@ -243,6 +265,73 @@ unsigned int yield_mod_cnt, nr_abort;
#include "rseq.h"
+static enum rseq_mo opt_mo = RSEQ_MO_RELAXED;
+
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+#define TEST_MEMBARRIER
+
+static int sys_membarrier(int cmd, int flags, int cpu_id)
+{
+ return syscall(__NR_membarrier, cmd, flags, cpu_id);
+}
+#endif
+
+#ifdef BUILDOPT_RSEQ_PERCPU_MM_CID
+# define RSEQ_PERCPU RSEQ_PERCPU_MM_CID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_current_mm_cid();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_mm_cid_available();
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return false; /* Use mm_cid */
+}
+# ifdef TEST_MEMBARRIER
+/*
+ * Membarrier does not currently support targeting a mm_cid, so
+ * issue the barrier on all cpus.
+ */
+static
+int rseq_membarrier_expedited(int cpu)
+{
+ return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ 0, 0);
+}
+# endif /* TEST_MEMBARRIER */
+#else
+# define RSEQ_PERCPU RSEQ_PERCPU_CPU_ID
+static
+int get_current_cpu_id(void)
+{
+ return rseq_cpu_start();
+}
+static
+bool rseq_validate_cpu_id(void)
+{
+ return rseq_current_cpu_raw() >= 0;
+}
+static
+bool rseq_use_cpu_index(void)
+{
+ return true; /* Use cpu_id as index. */
+}
+# ifdef TEST_MEMBARRIER
+static
+int rseq_membarrier_expedited(int cpu)
+{
+ return sys_membarrier(MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ,
+ MEMBARRIER_CMD_FLAG_CPU, cpu);
+}
+# endif /* TEST_MEMBARRIER */
+#endif
+
struct percpu_lock_entry {
intptr_t v;
} __attribute__((aligned(128)));
@@ -330,8 +419,14 @@ static int rseq_this_cpu_lock(struct percpu_lock *lock)
for (;;) {
int ret;
- cpu = rseq_cpu_start();
- ret = rseq_cmpeqv_storev(&lock->c[cpu].v,
+ cpu = get_current_cpu_id();
+ if (cpu < 0) {
+ fprintf(stderr, "pid: %d: tid: %d, cpu: %d: cid: %d\n",
+ getpid(), (int) rseq_gettid(), rseq_current_cpu_raw(), cpu);
+ abort();
+ }
+ ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &lock->c[cpu].v,
0, 1, cpu);
if (rseq_likely(!ret))
break;
@@ -366,9 +461,7 @@ void *test_percpu_spinlock_thread(void *arg)
abort();
reps = thread_data->reps;
for (i = 0; i < reps; i++) {
- int cpu = rseq_cpu_start();
-
- cpu = rseq_this_cpu_lock(&data->lock);
+ int cpu = rseq_this_cpu_lock(&data->lock);
data->c[cpu].count++;
rseq_percpu_unlock(&data->lock, cpu);
#ifndef BENCHMARK
@@ -450,8 +543,9 @@ void *test_percpu_inc_thread(void *arg)
do {
int cpu;
- cpu = rseq_cpu_start();
- ret = rseq_addv(&data->c[cpu].count, 1, cpu);
+ cpu = get_current_cpu_id();
+ ret = rseq_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &data->c[cpu].count, 1, cpu);
} while (rseq_unlikely(ret));
#ifndef BENCHMARK
if (i != 0 && !(i % (reps / 10)))
@@ -520,13 +614,14 @@ void this_cpu_list_push(struct percpu_list *list,
intptr_t *targetptr, newval, expect;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
/* Load list->c[cpu].head with single-copy atomicity. */
expect = (intptr_t)RSEQ_READ_ONCE(list->c[cpu].head);
newval = (intptr_t)node;
targetptr = (intptr_t *)&list->c[cpu].head;
node->next = (struct percpu_list_node *)expect;
- ret = rseq_cmpeqv_storev(targetptr, expect, newval, cpu);
+ ret = rseq_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expect, newval, cpu);
if (rseq_likely(!ret))
break;
/* Retry if comparison fails or rseq aborts. */
@@ -549,16 +644,17 @@ struct percpu_list_node *this_cpu_list_pop(struct percpu_list *list,
for (;;) {
struct percpu_list_node *head;
intptr_t *targetptr, expectnot, *load;
- off_t offset;
+ long offset;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
targetptr = (intptr_t *)&list->c[cpu].head;
expectnot = (intptr_t)NULL;
offset = offsetof(struct percpu_list_node, next);
load = (intptr_t *)&head;
- ret = rseq_cmpnev_storeoffp_load(targetptr, expectnot,
- offset, load, cpu);
+ ret = rseq_cmpnev_storeoffp_load(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, expectnot,
+ offset, load, cpu);
if (rseq_likely(!ret)) {
node = head;
break;
@@ -629,7 +725,7 @@ void test_percpu_list(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
for (j = 1; j <= 100; j++) {
struct percpu_list_node *node;
@@ -666,7 +762,7 @@ void test_percpu_list(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_list_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_list_pop(&list, i))) {
@@ -696,7 +792,7 @@ bool this_cpu_buffer_push(struct percpu_buffer *buffer,
intptr_t offset;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
if (offset == buffer->c[cpu].buflen)
break;
@@ -704,14 +800,9 @@ bool this_cpu_buffer_push(struct percpu_buffer *buffer,
targetptr_spec = (intptr_t *)&buffer->c[cpu].array[offset];
newval_final = offset + 1;
targetptr_final = &buffer->c[cpu].offset;
- if (opt_mb)
- ret = rseq_cmpeqv_trystorev_storev_release(
- targetptr_final, offset, targetptr_spec,
- newval_spec, newval_final, cpu);
- else
- ret = rseq_cmpeqv_trystorev_storev(targetptr_final,
- offset, targetptr_spec, newval_spec,
- newval_final, cpu);
+ ret = rseq_cmpeqv_trystorev_storev(opt_mo, RSEQ_PERCPU,
+ targetptr_final, offset, targetptr_spec,
+ newval_spec, newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
break;
@@ -734,7 +825,7 @@ struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
intptr_t offset;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
/* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
if (offset == 0) {
@@ -744,7 +835,8 @@ struct percpu_buffer_node *this_cpu_buffer_pop(struct percpu_buffer *buffer,
head = RSEQ_READ_ONCE(buffer->c[cpu].array[offset - 1]);
newval = offset - 1;
targetptr = (intptr_t *)&buffer->c[cpu].offset;
- ret = rseq_cmpeqv_cmpeqv_storev(targetptr, offset,
+ ret = rseq_cmpeqv_cmpeqv_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr, offset,
(intptr_t *)&buffer->c[cpu].array[offset - 1],
(intptr_t)head, newval, cpu);
if (rseq_likely(!ret))
@@ -820,7 +912,7 @@ void test_percpu_buffer(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
/* Worse-case is every item in same CPU. */
buffer.c[i].array =
@@ -870,7 +962,7 @@ void test_percpu_buffer(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_buffer_node *node;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while ((node = __percpu_buffer_pop(&buffer, i))) {
@@ -901,7 +993,7 @@ bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
size_t copylen;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
/* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
if (offset == buffer->c[cpu].buflen)
@@ -912,15 +1004,11 @@ bool this_cpu_memcpy_buffer_push(struct percpu_memcpy_buffer *buffer,
copylen = sizeof(item);
newval_final = offset + 1;
targetptr_final = &buffer->c[cpu].offset;
- if (opt_mb)
- ret = rseq_cmpeqv_trymemcpy_storev_release(
- targetptr_final, offset,
- destptr, srcptr, copylen,
- newval_final, cpu);
- else
- ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
- offset, destptr, srcptr, copylen,
- newval_final, cpu);
+ ret = rseq_cmpeqv_trymemcpy_storev(
+ opt_mo, RSEQ_PERCPU,
+ targetptr_final, offset,
+ destptr, srcptr, copylen,
+ newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
break;
@@ -945,7 +1033,7 @@ bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
size_t copylen;
int ret;
- cpu = rseq_cpu_start();
+ cpu = get_current_cpu_id();
/* Load offset with single-copy atomicity. */
offset = RSEQ_READ_ONCE(buffer->c[cpu].offset);
if (offset == 0)
@@ -956,8 +1044,8 @@ bool this_cpu_memcpy_buffer_pop(struct percpu_memcpy_buffer *buffer,
copylen = sizeof(*item);
newval_final = offset - 1;
targetptr_final = &buffer->c[cpu].offset;
- ret = rseq_cmpeqv_trymemcpy_storev(targetptr_final,
- offset, destptr, srcptr, copylen,
+ ret = rseq_cmpeqv_trymemcpy_storev(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ targetptr_final, offset, destptr, srcptr, copylen,
newval_final, cpu);
if (rseq_likely(!ret)) {
result = true;
@@ -1035,7 +1123,7 @@ void test_percpu_memcpy_buffer(void)
/* Generate list entries for every usable cpu. */
sched_getaffinity(0, sizeof(allowed_cpus), &allowed_cpus);
for (i = 0; i < CPU_SETSIZE; i++) {
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
/* Worse-case is every item in same CPU. */
buffer.c[i].array =
@@ -1082,7 +1170,7 @@ void test_percpu_memcpy_buffer(void)
for (i = 0; i < CPU_SETSIZE; i++) {
struct percpu_memcpy_buffer_node item;
- if (!CPU_ISSET(i, &allowed_cpus))
+ if (rseq_use_cpu_index() && !CPU_ISSET(i, &allowed_cpus))
continue;
while (__percpu_memcpy_buffer_pop(&buffer, &item, i)) {
@@ -1131,6 +1219,214 @@ static int set_signal_handler(void)
return ret;
}
+/* Test MEMBARRIER_CMD_PRIVATE_RESTART_RSEQ_ON_CPU membarrier command. */
+#ifdef TEST_MEMBARRIER
+struct test_membarrier_thread_args {
+ int stop;
+ intptr_t percpu_list_ptr;
+};
+
+/* Worker threads modify data in their "active" percpu lists. */
+void *test_membarrier_worker_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ const int iters = opt_reps;
+ int i;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Wait for initialization. */
+ while (!__atomic_load_n(&args->percpu_list_ptr, __ATOMIC_ACQUIRE)) {}
+
+ for (i = 0; i < iters; ++i) {
+ int ret;
+
+ do {
+ int cpu = get_current_cpu_id();
+
+ ret = rseq_offset_deref_addv(RSEQ_MO_RELAXED, RSEQ_PERCPU,
+ &args->percpu_list_ptr,
+ sizeof(struct percpu_list_entry) * cpu, 1, cpu);
+ } while (rseq_unlikely(ret));
+ }
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier_init_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ memset(list, 0, sizeof(*list));
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ struct percpu_list_node *node;
+
+ node = malloc(sizeof(*node));
+ assert(node);
+ node->data = 0;
+ node->next = NULL;
+ list->c[i].head = node;
+ }
+}
+
+void test_membarrier_free_percpu_list(struct percpu_list *list)
+{
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++)
+ free(list->c[i].head);
+}
+
+/*
+ * The manager thread swaps per-cpu lists that worker threads see,
+ * and validates that there are no unexpected modifications.
+ */
+void *test_membarrier_manager_thread(void *arg)
+{
+ struct test_membarrier_thread_args *args =
+ (struct test_membarrier_thread_args *)arg;
+ struct percpu_list list_a, list_b;
+ intptr_t expect_a = 0, expect_b = 0;
+ int cpu_a = 0, cpu_b = 0;
+
+ if (rseq_register_current_thread()) {
+ fprintf(stderr, "Error: rseq_register_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+
+ /* Init lists. */
+ test_membarrier_init_percpu_list(&list_a);
+ test_membarrier_init_percpu_list(&list_b);
+
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
+
+ while (!__atomic_load_n(&args->stop, __ATOMIC_ACQUIRE)) {
+ /* list_a is "active". */
+ cpu_a = rand() % CPU_SETSIZE;
+ /*
+ * As list_b is "inactive", we should never see changes
+ * to list_b.
+ */
+ if (expect_b != __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_b "active". */
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_b, __ATOMIC_RELEASE);
+ if (rseq_membarrier_expedited(cpu_a) &&
+ errno != ENXIO /* missing CPU */) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /*
+ * Cpu A should now only modify list_b, so the values
+ * in list_a should be stable.
+ */
+ expect_a = __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE);
+
+ cpu_b = rand() % CPU_SETSIZE;
+ /*
+ * As list_a is "inactive", we should never see changes
+ * to list_a.
+ */
+ if (expect_a != __atomic_load_n(&list_a.c[cpu_a].head->data, __ATOMIC_ACQUIRE)) {
+ fprintf(stderr, "Membarrier test failed\n");
+ abort();
+ }
+
+ /* Make list_a "active". */
+ __atomic_store_n(&args->percpu_list_ptr, (intptr_t)&list_a, __ATOMIC_RELEASE);
+ if (rseq_membarrier_expedited(cpu_b) &&
+ errno != ENXIO /* missing CPU*/) {
+ perror("sys_membarrier");
+ abort();
+ }
+ /* Remember a value from list_b. */
+ expect_b = __atomic_load_n(&list_b.c[cpu_b].head->data, __ATOMIC_ACQUIRE);
+ }
+
+ test_membarrier_free_percpu_list(&list_a);
+ test_membarrier_free_percpu_list(&list_b);
+
+ if (rseq_unregister_current_thread()) {
+ fprintf(stderr, "Error: rseq_unregister_current_thread(...) failed(%d): %s\n",
+ errno, strerror(errno));
+ abort();
+ }
+ return NULL;
+}
+
+void test_membarrier(void)
+{
+ const int num_threads = opt_threads;
+ struct test_membarrier_thread_args thread_args;
+ pthread_t worker_threads[num_threads];
+ pthread_t manager_thread;
+ int i, ret;
+
+ if (sys_membarrier(MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 0, 0)) {
+ perror("sys_membarrier");
+ abort();
+ }
+
+ thread_args.stop = 0;
+ thread_args.percpu_list_ptr = 0;
+ ret = pthread_create(&manager_thread, NULL,
+ test_membarrier_manager_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_create(&worker_threads[i], NULL,
+ test_membarrier_worker_thread, &thread_args);
+ if (ret) {
+ errno = ret;
+ perror("pthread_create");
+ abort();
+ }
+ }
+
+
+ for (i = 0; i < num_threads; i++) {
+ ret = pthread_join(worker_threads[i], NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+ }
+
+ __atomic_store_n(&thread_args.stop, 1, __ATOMIC_RELEASE);
+ ret = pthread_join(manager_thread, NULL);
+ if (ret) {
+ errno = ret;
+ perror("pthread_join");
+ abort();
+ }
+}
+#else /* TEST_MEMBARRIER */
+void test_membarrier(void)
+{
+ fprintf(stderr, "rseq_offset_deref_addv is not implemented on this architecture. "
+ "Skipping membarrier test.\n");
+}
+#endif
+
static void show_usage(int argc, char **argv)
{
printf("Usage : %s <OPTIONS>\n",
@@ -1153,7 +1449,7 @@ static void show_usage(int argc, char **argv)
printf(" [-r N] Number of repetitions per thread (default 5000)\n");
printf(" [-d] Disable rseq system call (no initialization)\n");
printf(" [-D M] Disable rseq for each M threads\n");
- printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement\n");
+ printf(" [-T test] Choose test: (s)pinlock, (l)ist, (b)uffer, (m)emcpy, (i)ncrement, membarrie(r)\n");
printf(" [-M] Push into buffer and memcpy buffer with memory barriers.\n");
printf(" [-v] Verbose output.\n");
printf(" [-h] Show this help.\n");
@@ -1268,6 +1564,7 @@ int main(int argc, char **argv)
case 'i':
case 'b':
case 'm':
+ case 'r':
break;
default:
show_usage(argc, argv);
@@ -1279,7 +1576,7 @@ int main(int argc, char **argv)
verbose = 1;
break;
case 'M':
- opt_mb = 1;
+ opt_mo = RSEQ_MO_RELEASE;
break;
default:
show_usage(argc, argv);
@@ -1299,6 +1596,10 @@ int main(int argc, char **argv)
if (!opt_disable_rseq && rseq_register_current_thread())
goto error;
+ if (!opt_disable_rseq && !rseq_validate_cpu_id()) {
+ fprintf(stderr, "Error: cpu id getter unavailable\n");
+ goto error;
+ }
switch (opt_test) {
case 's':
printf_verbose("spinlock\n");
@@ -1320,6 +1621,10 @@ int main(int argc, char **argv)
printf_verbose("counter increment\n");
test_percpu_inc();
break;
+ case 'r':
+ printf_verbose("membarrier\n");
+ test_membarrier();
+ break;
}
if (!opt_disable_rseq && rseq_unregister_current_thread())
abort();
diff --git a/tools/testing/selftests/rseq/rseq-abi.h b/tools/testing/selftests/rseq/rseq-abi.h
new file mode 100644
index 000000000000..fb4ec8a75dd4
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-abi.h
@@ -0,0 +1,173 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+#ifndef _RSEQ_ABI_H
+#define _RSEQ_ABI_H
+
+/*
+ * rseq-abi.h
+ *
+ * Restartable sequences system call API
+ *
+ * Copyright (c) 2015-2022 Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include <linux/types.h>
+#include <asm/byteorder.h>
+
+enum rseq_abi_cpu_id_state {
+ RSEQ_ABI_CPU_ID_UNINITIALIZED = -1,
+ RSEQ_ABI_CPU_ID_REGISTRATION_FAILED = -2,
+};
+
+enum rseq_abi_flags {
+ RSEQ_ABI_FLAG_UNREGISTER = (1 << 0),
+};
+
+enum rseq_abi_cs_flags_bit {
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT = 0,
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT = 1,
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT = 2,
+};
+
+enum rseq_abi_cs_flags {
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT_BIT),
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL_BIT),
+ RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE =
+ (1U << RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE_BIT),
+};
+
+/*
+ * struct rseq_abi_cs is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line. It is usually declared as
+ * link-time constant data.
+ */
+struct rseq_abi_cs {
+ /* Version of this structure. */
+ __u32 version;
+ /* enum rseq_abi_cs_flags */
+ __u32 flags;
+ __u64 start_ip;
+ /* Offset from start_ip. */
+ __u64 post_commit_offset;
+ __u64 abort_ip;
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+/*
+ * struct rseq_abi is aligned on 4 * 8 bytes to ensure it is always
+ * contained within a single cache-line.
+ *
+ * A single struct rseq_abi per thread is allowed.
+ */
+struct rseq_abi {
+ /*
+ * Restartable sequences cpu_id_start field. Updated by the
+ * kernel. Read by user-space with single-copy atomicity
+ * semantics. This field should only be read by the thread which
+ * registered this data structure. Aligned on 32-bit. Always
+ * contains a value in the range of possible CPUs, although the
+ * value may not be the actual current CPU (e.g. if rseq is not
+ * initialized). This CPU number value should always be compared
+ * against the value of the cpu_id field before performing a rseq
+ * commit or returning a value read from a data structure indexed
+ * using the cpu_id_start value.
+ */
+ __u32 cpu_id_start;
+ /*
+ * Restartable sequences cpu_id field. Updated by the kernel.
+ * Read by user-space with single-copy atomicity semantics. This
+ * field should only be read by the thread which registered this
+ * data structure. Aligned on 32-bit. Values
+ * RSEQ_CPU_ID_UNINITIALIZED and RSEQ_CPU_ID_REGISTRATION_FAILED
+ * have a special semantic: the former means "rseq uninitialized",
+ * and latter means "rseq initialization failed". This value is
+ * meant to be read within rseq critical sections and compared
+ * with the cpu_id_start value previously read, before performing
+ * the commit instruction, or read and compared with the
+ * cpu_id_start value before returning a value loaded from a data
+ * structure indexed using the cpu_id_start value.
+ */
+ __u32 cpu_id;
+ /*
+ * Restartable sequences rseq_cs field.
+ *
+ * Contains NULL when no critical section is active for the current
+ * thread, or holds a pointer to the currently active struct rseq_cs.
+ *
+ * Updated by user-space, which sets the address of the currently
+ * active rseq_cs at the beginning of assembly instruction sequence
+ * block, and set to NULL by the kernel when it restarts an assembly
+ * instruction sequence block, as well as when the kernel detects that
+ * it is preempting or delivering a signal outside of the range
+ * targeted by the rseq_cs. Also needs to be set to NULL by user-space
+ * before reclaiming memory that contains the targeted struct rseq_cs.
+ *
+ * Read and set by the kernel. Set by user-space with single-copy
+ * atomicity semantics. This field should only be updated by the
+ * thread which registered this data structure. Aligned on 64-bit.
+ */
+ union {
+ __u64 ptr64;
+
+ /*
+ * The "arch" field provides architecture accessor for
+ * the ptr field based on architecture pointer size and
+ * endianness.
+ */
+ struct {
+#ifdef __LP64__
+ __u64 ptr;
+#elif defined(__BYTE_ORDER) ? (__BYTE_ORDER == __BIG_ENDIAN) : defined(__BIG_ENDIAN)
+ __u32 padding; /* Initialized to zero. */
+ __u32 ptr;
+#else
+ __u32 ptr;
+ __u32 padding; /* Initialized to zero. */
+#endif
+ } arch;
+ } rseq_cs;
+
+ /*
+ * Restartable sequences flags field.
+ *
+ * This field should only be updated by the thread which
+ * registered this data structure. Read by the kernel.
+ * Mainly used for single-stepping through rseq critical sections
+ * with debuggers.
+ *
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_PREEMPT
+ * Inhibit instruction sequence block restart on preemption
+ * for this thread.
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_SIGNAL
+ * Inhibit instruction sequence block restart on signal
+ * delivery for this thread.
+ * - RSEQ_ABI_CS_FLAG_NO_RESTART_ON_MIGRATE
+ * Inhibit instruction sequence block restart on migration for
+ * this thread.
+ */
+ __u32 flags;
+
+ /*
+ * Restartable sequences node_id field. Updated by the kernel. Read by
+ * user-space with single-copy atomicity semantics. This field should
+ * only be read by the thread which registered this data structure.
+ * Aligned on 32-bit. Contains the current NUMA node ID.
+ */
+ __u32 node_id;
+
+ /*
+ * Restartable sequences mm_cid field. Updated by the kernel. Read by
+ * user-space with single-copy atomicity semantics. This field should
+ * only be read by the thread which registered this data structure.
+ * Aligned on 32-bit. Contains the current thread's concurrency ID
+ * (allocated uniquely within a memory map).
+ */
+ __u32 mm_cid;
+
+ /*
+ * Flexible array member at end of structure, after last feature field.
+ */
+ char end[];
+} __attribute__((aligned(4 * sizeof(__u64))));
+
+#endif /* _RSEQ_ABI_H */
diff --git a/tools/testing/selftests/rseq/rseq-arm-bits.h b/tools/testing/selftests/rseq/rseq-arm-bits.h
new file mode 100644
index 000000000000..4f03cb395462
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm-bits.h
@@ -0,0 +1,505 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm-bits.h
+ *
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expectnot], r0\n\t"
+ "beq %l[error2]\n\t"
+#endif
+ "str r0, %[load]\n\t"
+ "add r0, %[voffp]\n\t"
+ "ldr r0, [r0]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ "ldr r0, %[v]\n\t"
+ "add r0, %[count]\n\t"
+ /* final store */
+ "str r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+ "ldr r0, %[v2]\n\t"
+ "cmp %[expect2], r0\n\t"
+ "bne %l[error3]\n\t"
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne %l[error2]\n\t"
+#endif
+ /* try store */
+ "str %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "dmb\n\t" /* full mb provides store-release */
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ "str %[src], %[rseq_scratch0]\n\t"
+ "str %[dst], %[rseq_scratch1]\n\t"
+ "str %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ "ldr r0, %[v]\n\t"
+ "cmp %[expect], r0\n\t"
+ "bne 7f\n\t"
+#endif
+ /* try memcpy */
+ "cmp %[len], #0\n\t" \
+ "beq 333f\n\t" \
+ "222:\n\t" \
+ "ldrb %%r0, [%[src]]\n\t" \
+ "strb %%r0, [%[dst]]\n\t" \
+ "adds %[src], #1\n\t" \
+ "adds %[dst], #1\n\t" \
+ "subs %[len], #1\n\t" \
+ "bne 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "dmb\n\t" /* full mb provides store-release */
+#endif
+ /* final store */
+ "str %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ "ldr %[len], %[rseq_scratch2]\n\t"
+ "ldr %[dst], %[rseq_scratch1]\n\t"
+ "ldr %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "r0", "memory", "cc"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-arm.h b/tools/testing/selftests/rseq/rseq-arm.h
index 5943c816c07c..d887b3bbe257 100644
--- a/tools/testing/selftests/rseq/rseq-arm.h
+++ b/tools/testing/selftests/rseq/rseq-arm.h
@@ -2,7 +2,7 @@
/*
* rseq-arm.h
*
- * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
/*
@@ -66,7 +66,7 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
rseq_smp_mb(); \
____p1; \
})
@@ -76,13 +76,9 @@ __extension__ ({ \
#define rseq_smp_store_release(p, v) \
do { \
rseq_smp_mb(); \
- RSEQ_WRITE_ONCE(*p, v); \
+ RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
post_commit_offset, abort_ip) \
".pushsection __rseq_cs, \"aw\"\n\t" \
@@ -147,679 +143,34 @@ do { \
teardown \
"b %l[" __rseq_str(cmpfail_label) "]\n\t"
-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[error2]\n\t"
-#endif
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expectnot], r0\n\t"
- "beq %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- "ldr r0, %[v]\n\t"
- "cmp %[expectnot], r0\n\t"
- "beq %l[error2]\n\t"
-#endif
- "str r0, %[load]\n\t"
- "add r0, %[voffp]\n\t"
- "ldr r0, [r0]\n\t"
- /* final store */
- "str r0, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "Ir" (voffp),
- [load] "m" (*load)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* Per-cpu-id indexing. */
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
-#endif
- "ldr r0, %[v]\n\t"
- "add r0, %[count]\n\t"
- /* final store */
- "str r0, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(4)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [count] "Ir" (count)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-arm-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
+/* Per-mm-cid indexing. */
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[error2]\n\t"
-#endif
- /* try store */
- "str %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-arm-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[error2]\n\t"
-#endif
- /* try store */
- "str %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- "dmb\n\t" /* full mb provides store-release */
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
- "ldr r0, %[v2]\n\t"
- "cmp %[expect2], r0\n\t"
- "bne %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne %l[error2]\n\t"
- "ldr r0, %[v2]\n\t"
- "cmp %[expect2], r0\n\t"
- "bne %l[error3]\n\t"
-#endif
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint32_t rseq_scratch[3];
-
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- "str %[src], %[rseq_scratch0]\n\t"
- "str %[dst], %[rseq_scratch1]\n\t"
- "str %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne 7f\n\t"
-#endif
- /* try memcpy */
- "cmp %[len], #0\n\t" \
- "beq 333f\n\t" \
- "222:\n\t" \
- "ldrb %%r0, [%[src]]\n\t" \
- "strb %%r0, [%[dst]]\n\t" \
- "adds %[src], #1\n\t" \
- "adds %[dst], #1\n\t" \
- "subs %[len], #1\n\t" \
- "bne 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t"
- "b 8f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- abort, 1b, 2b, 4f)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- error2)
-#endif
- "8:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint32_t rseq_scratch[3];
-
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- "str %[src], %[rseq_scratch0]\n\t"
- "str %[dst], %[rseq_scratch1]\n\t"
- "str %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
- "ldr r0, %[v]\n\t"
- "cmp %[expect], r0\n\t"
- "bne 7f\n\t"
-#endif
- /* try memcpy */
- "cmp %[len], #0\n\t" \
- "beq 333f\n\t" \
- "222:\n\t" \
- "ldrb %%r0, [%[src]]\n\t" \
- "strb %%r0, [%[dst]]\n\t" \
- "adds %[src], #1\n\t" \
- "adds %[dst], #1\n\t" \
- "subs %[len], #1\n\t" \
- "bne 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- "dmb\n\t" /* full mb provides store-release */
- /* final store */
- "str %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t"
- "b 8f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- abort, 1b, 2b, 4f)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- /* teardown */
- "ldr %[len], %[rseq_scratch2]\n\t"
- "ldr %[dst], %[rseq_scratch1]\n\t"
- "ldr %[src], %[rseq_scratch0]\n\t",
- error2)
-#endif
- "8:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- RSEQ_INJECT_INPUT
- : "r0", "memory", "cc"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* APIs which are not based on cpu ids. */
-#endif /* !RSEQ_SKIP_FASTPATH */
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-arm64-bits.h b/tools/testing/selftests/rseq/rseq-arm64-bits.h
new file mode 100644
index 000000000000..cc7226b1efe1
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-arm64-bits.h
@@ -0,0 +1,392 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-arm64-bits.h
+ *
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
+ */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "Qo" (*load),
+ [voffp] "r" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "Qo" (*v),
+ [expect] "r" (expect),
+ [v2] "Qo" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [v2] "Qo" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "Qo" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "Qo" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-arm64.h b/tools/testing/selftests/rseq/rseq-arm64.h
index 200dae9e4208..21e1626a7235 100644
--- a/tools/testing/selftests/rseq/rseq-arm64.h
+++ b/tools/testing/selftests/rseq/rseq-arm64.h
@@ -2,7 +2,7 @@
/*
* rseq-arm64.h
*
- * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
* (C) Copyright 2018 - Will Deacon <will.deacon@arm.com>
*/
@@ -27,68 +27,66 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1; \
- switch (sizeof(*p)) { \
+ union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u; \
+ switch (sizeof(*(p))) { \
case 1: \
- asm volatile ("ldarb %w0, %1" \
- : "=r" (*(__u8 *)p) \
- : "Q" (*p) : "memory"); \
+ __asm__ __volatile__ ("ldarb %w0, %1" \
+ : "=r" (*(__u8 *)__u.__c) \
+ : "Q" (*(p)) : "memory"); \
break; \
case 2: \
- asm volatile ("ldarh %w0, %1" \
- : "=r" (*(__u16 *)p) \
- : "Q" (*p) : "memory"); \
+ __asm__ __volatile__ ("ldarh %w0, %1" \
+ : "=r" (*(__u16 *)__u.__c) \
+ : "Q" (*(p)) : "memory"); \
break; \
case 4: \
- asm volatile ("ldar %w0, %1" \
- : "=r" (*(__u32 *)p) \
- : "Q" (*p) : "memory"); \
+ __asm__ __volatile__ ("ldar %w0, %1" \
+ : "=r" (*(__u32 *)__u.__c) \
+ : "Q" (*(p)) : "memory"); \
break; \
case 8: \
- asm volatile ("ldar %0, %1" \
- : "=r" (*(__u64 *)p) \
- : "Q" (*p) : "memory"); \
+ __asm__ __volatile__ ("ldar %0, %1" \
+ : "=r" (*(__u64 *)__u.__c) \
+ : "Q" (*(p)) : "memory"); \
break; \
} \
- ____p1; \
+ (rseq_unqual_scalar_typeof(*(p)))__u.__val; \
})
#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
#define rseq_smp_store_release(p, v) \
do { \
- switch (sizeof(*p)) { \
+ union { rseq_unqual_scalar_typeof(*(p)) __val; char __c[sizeof(*(p))]; } __u = \
+ { .__val = (rseq_unqual_scalar_typeof(*(p))) (v) }; \
+ switch (sizeof(*(p))) { \
case 1: \
- asm volatile ("stlrb %w1, %0" \
- : "=Q" (*p) \
- : "r" ((__u8)v) \
+ __asm__ __volatile__ ("stlrb %w1, %0" \
+ : "=Q" (*(p)) \
+ : "r" (*(__u8 *)__u.__c) \
: "memory"); \
break; \
case 2: \
- asm volatile ("stlrh %w1, %0" \
- : "=Q" (*p) \
- : "r" ((__u16)v) \
+ __asm__ __volatile__ ("stlrh %w1, %0" \
+ : "=Q" (*(p)) \
+ : "r" (*(__u16 *)__u.__c) \
: "memory"); \
break; \
case 4: \
- asm volatile ("stlr %w1, %0" \
- : "=Q" (*p) \
- : "r" ((__u32)v) \
+ __asm__ __volatile__ ("stlr %w1, %0" \
+ : "=Q" (*(p)) \
+ : "r" (*(__u32 *)__u.__c) \
: "memory"); \
break; \
case 8: \
- asm volatile ("stlr %1, %0" \
- : "=Q" (*p) \
- : "r" ((__u64)v) \
+ __asm__ __volatile__ ("stlr %1, %0" \
+ : "=Q" (*(p)) \
+ : "r" (*(__u64 *)__u.__c) \
: "memory"); \
break; \
} \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
#define RSEQ_ASM_TMP_REG32 "w15"
#define RSEQ_ASM_TMP_REG "x15"
#define RSEQ_ASM_TMP_REG_2 "x14"
@@ -204,459 +202,34 @@ do { \
" cbnz " RSEQ_ASM_TMP_REG_2 ", 222b\n" \
"333:\n"
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "Qo" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
-#endif
- RSEQ_ASM_OP_R_LOAD(v)
- RSEQ_ASM_OP_R_STORE(load)
- RSEQ_ASM_OP_R_LOAD_OFF(voffp)
- RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "Qo" (*v),
- [expectnot] "r" (expectnot),
- [load] "Qo" (*load),
- [voffp] "r" (voffp)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* Per-cpu-id indexing. */
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm64-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
-#endif
- RSEQ_ASM_OP_R_LOAD(v)
- RSEQ_ASM_OP_R_ADD(count)
- RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "Qo" (*v),
- [count] "r" (count)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- RSEQ_ASM_OP_STORE(newv2, v2)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [expect] "r" (expect),
- [v] "Qo" (*v),
- [newv] "r" (newv),
- [v2] "Qo" (*v2),
- [newv2] "r" (newv2)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- RSEQ_ASM_OP_STORE(newv2, v2)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [expect] "r" (expect),
- [v] "Qo" (*v),
- [newv] "r" (newv),
- [v2] "Qo" (*v2),
- [newv2] "r" (newv2)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error3])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
- RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
-#endif
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "Qo" (*v),
- [expect] "r" (expect),
- [v2] "Qo" (*v2),
- [expect2] "r" (expect2),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [expect] "r" (expect),
- [v] "Qo" (*v),
- [newv] "r" (newv),
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(2f, %l[error2])
-#endif
- RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "Qo" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [expect] "r" (expect),
- [v] "Qo" (*v),
- [newv] "r" (newv),
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len)
- RSEQ_INJECT_INPUT
- : "memory", RSEQ_ASM_TMP_REG, RSEQ_ASM_TMP_REG_2
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
-
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-arm64-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
+
+/* Per-mm-cid indexing. */
+
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm64-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-arm64-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
+
+/* APIs which are not based on cpu ids. */
-#endif /* !RSEQ_SKIP_FASTPATH */
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-arm64-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-bits-reset.h b/tools/testing/selftests/rseq/rseq-bits-reset.h
new file mode 100644
index 000000000000..e8655089f9cb
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-bits-reset.h
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-bits-reset.h
+ *
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#undef RSEQ_TEMPLATE_IDENTIFIER
+#undef RSEQ_TEMPLATE_CPU_ID_FIELD
+#undef RSEQ_TEMPLATE_CPU_ID_OFFSET
+#undef RSEQ_TEMPLATE_SUFFIX
diff --git a/tools/testing/selftests/rseq/rseq-bits-template.h b/tools/testing/selftests/rseq/rseq-bits-template.h
new file mode 100644
index 000000000000..65698d6a6cf9
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-bits-template.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-bits-template.h
+ *
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifdef RSEQ_TEMPLATE_CPU_ID
+# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_CPU_ID_OFFSET
+# define RSEQ_TEMPLATE_CPU_ID_FIELD cpu_id
+# ifdef RSEQ_TEMPLATE_MO_RELEASE
+# define RSEQ_TEMPLATE_SUFFIX _release_cpu_id
+# elif defined (RSEQ_TEMPLATE_MO_RELAXED)
+# define RSEQ_TEMPLATE_SUFFIX _relaxed_cpu_id
+# else
+# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead."
+# endif
+#elif defined(RSEQ_TEMPLATE_MM_CID)
+# define RSEQ_TEMPLATE_CPU_ID_OFFSET RSEQ_MM_CID_OFFSET
+# define RSEQ_TEMPLATE_CPU_ID_FIELD mm_cid
+# ifdef RSEQ_TEMPLATE_MO_RELEASE
+# define RSEQ_TEMPLATE_SUFFIX _release_mm_cid
+# elif defined (RSEQ_TEMPLATE_MO_RELAXED)
+# define RSEQ_TEMPLATE_SUFFIX _relaxed_mm_cid
+# else
+# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead."
+# endif
+#elif defined (RSEQ_TEMPLATE_CPU_ID_NONE)
+# ifdef RSEQ_TEMPLATE_MO_RELEASE
+# define RSEQ_TEMPLATE_SUFFIX _release
+# elif defined (RSEQ_TEMPLATE_MO_RELAXED)
+# define RSEQ_TEMPLATE_SUFFIX _relaxed
+# else
+# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead."
+# endif
+#else
+# error "Never use <rseq-bits-template.h> directly; include <rseq.h> instead."
+#endif
+
+#define RSEQ_TEMPLATE_IDENTIFIER(x) RSEQ_COMBINE_TOKENS(x, RSEQ_TEMPLATE_SUFFIX)
+
diff --git a/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
new file mode 100644
index 000000000000..38c584661571
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-generic-thread-pointer.h
@@ -0,0 +1,25 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-generic-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_GENERIC_THREAD_POINTER
+#define _RSEQ_GENERIC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* Use gcc builtin thread pointer. */
+static inline void *rseq_thread_pointer(void)
+{
+ return __builtin_thread_pointer();
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-mips-bits.h b/tools/testing/selftests/rseq/rseq-mips-bits.h
new file mode 100644
index 000000000000..6c48af4d0944
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-mips-bits.h
@@ -0,0 +1,462 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Author: Paul Burton <paul.burton@mips.com>
+ * (C) Copyright 2018 MIPS Tech LLC
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "beq $4, %[expectnot], %l[error2]\n\t"
+#endif
+ LONG_S " $4, %[load]\n\t"
+ LONG_ADDI " $4, %[voffp]\n\t"
+ LONG_L " $4, 0($4)\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "Ir" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " $4, %[v]\n\t"
+ LONG_ADDI " $4, %[count]\n\t"
+ /* final store */
+ LONG_S " $4, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "Ir" (count)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+ LONG_L " $4, %[v2]\n\t"
+ "bne $4, %[expect2], %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "sync\n\t" /* full sync provides store-release */
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ "b 5f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
+ "5:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uintptr_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_L " $4, %[v]\n\t"
+ "bne $4, %[expect], 7f\n\t"
+#endif
+ /* try memcpy */
+ "beqz %[len], 333f\n\t" \
+ "222:\n\t" \
+ "lb $4, 0(%[src])\n\t" \
+ "sb $4, 0(%[dst])\n\t" \
+ LONG_ADDI " %[src], 1\n\t" \
+ LONG_ADDI " %[dst], 1\n\t" \
+ LONG_ADDI " %[len], -1\n\t" \
+ "bnez %[len], 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "sync\n\t" /* full sync provides store-release */
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ "b 8f\n\t"
+ RSEQ_ASM_DEFINE_ABORT(3, 4,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort, 1b, 2b, 4f)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ "8:\n\t"
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "$4", "memory"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-mips.h b/tools/testing/selftests/rseq/rseq-mips.h
index e989e7c14b09..42ef8e946693 100644
--- a/tools/testing/selftests/rseq/rseq-mips.h
+++ b/tools/testing/selftests/rseq/rseq-mips.h
@@ -2,9 +2,7 @@
/*
* Author: Paul Burton <paul.burton@mips.com>
* (C) Copyright 2018 MIPS Tech LLC
- *
- * Based on rseq-arm.h:
- * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
/*
@@ -47,7 +45,7 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
rseq_smp_mb(); \
____p1; \
})
@@ -57,13 +55,9 @@ __extension__ ({ \
#define rseq_smp_store_release(p, v) \
do { \
rseq_smp_mb(); \
- RSEQ_WRITE_ONCE(*p, v); \
+ RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
#if _MIPS_SZLONG == 64
# define LONG ".dword"
# define LONG_LA "dla"
@@ -154,661 +148,34 @@ do { \
teardown \
"b %l[" __rseq_str(cmpfail_label) "]\n\t"
-#define rseq_workaround_gcc_asm_size_guess() __asm__ __volatile__("")
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[error2]\n\t"
-#endif
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "beq $4, %[expectnot], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " $4, %[v]\n\t"
- "beq $4, %[expectnot], %l[error2]\n\t"
-#endif
- LONG_S " $4, %[load]\n\t"
- LONG_ADDI " $4, %[voffp]\n\t"
- LONG_L " $4, 0($4)\n\t"
- /* final store */
- LONG_S " $4, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "Ir" (voffp),
- [load] "m" (*load)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
-#endif
- LONG_L " $4, %[v]\n\t"
- LONG_ADDI " $4, %[count]\n\t"
- /* final store */
- LONG_S " $4, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(4)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [count] "Ir" (count)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[error2]\n\t"
-#endif
- /* try store */
- LONG_S " %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[error2]\n\t"
-#endif
- /* try store */
- LONG_S " %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- "sync\n\t" /* full sync provides store-release */
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
- LONG_L " $4, %[v2]\n\t"
- "bne $4, %[expect2], %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], %l[error2]\n\t"
- LONG_L " $4, %[v2]\n\t"
- "bne $4, %[expect2], %l[error3]\n\t"
-#endif
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- "b 5f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4, "", abort, 1b, 2b, 4f)
- "5:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
+/* Per-cpu-id indexing. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uintptr_t rseq_scratch[3];
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-mips-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-mips-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- LONG_S " %[src], %[rseq_scratch0]\n\t"
- LONG_S " %[dst], %[rseq_scratch1]\n\t"
- LONG_S " %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], 7f\n\t"
-#endif
- /* try memcpy */
- "beqz %[len], 333f\n\t" \
- "222:\n\t" \
- "lb $4, 0(%[src])\n\t" \
- "sb $4, 0(%[dst])\n\t" \
- LONG_ADDI " %[src], 1\n\t" \
- LONG_ADDI " %[dst], 1\n\t" \
- LONG_ADDI " %[len], -1\n\t" \
- "bnez %[len], 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t"
- "b 8f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- abort, 1b, 2b, 4f)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error2)
-#endif
- "8:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* Per-mm-cid indexing. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uintptr_t rseq_scratch[3];
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-mips-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-mips-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
- rseq_workaround_gcc_asm_size_guess();
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(9, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- LONG_S " %[src], %[rseq_scratch0]\n\t"
- LONG_S " %[dst], %[rseq_scratch1]\n\t"
- LONG_S " %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3f, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
- LONG_L " $4, %[v]\n\t"
- "bne $4, %[expect], 7f\n\t"
-#endif
- /* try memcpy */
- "beqz %[len], 333f\n\t" \
- "222:\n\t" \
- "lb $4, 0(%[src])\n\t" \
- "sb $4, 0(%[dst])\n\t" \
- LONG_ADDI " %[src], 1\n\t" \
- LONG_ADDI " %[dst], 1\n\t" \
- LONG_ADDI " %[len], -1\n\t" \
- "bnez %[len], 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- "sync\n\t" /* full sync provides store-release */
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t"
- "b 8f\n\t"
- RSEQ_ASM_DEFINE_ABORT(3, 4,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- abort, 1b, 2b, 4f)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error2)
-#endif
- "8:\n\t"
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- RSEQ_INJECT_INPUT
- : "$4", "memory"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- rseq_workaround_gcc_asm_size_guess();
- return 0;
-abort:
- rseq_workaround_gcc_asm_size_guess();
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- rseq_workaround_gcc_asm_size_guess();
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_workaround_gcc_asm_size_guess();
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* APIs which are not based on cpu ids. */
-#endif /* !RSEQ_SKIP_FASTPATH */
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-mips-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-ppc-bits.h b/tools/testing/selftests/rseq/rseq-ppc-bits.h
new file mode 100644
index 000000000000..98e69eae1e62
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc-bits.h
@@ -0,0 +1,454 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-ppc-bits.h
+ *
+ * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
+ */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v not equal to @expectnot */
+ RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* store it in @load */
+ RSEQ_ASM_OP_R_STORE(load)
+ /* dereference voffp(v) */
+ RSEQ_ASM_OP_R_LOADX(voffp)
+ /* final store the value at voffp(v) */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "b" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ /* load the value of @v */
+ RSEQ_ASM_OP_R_LOAD(v)
+ /* add @count to it */
+ RSEQ_ASM_OP_R_ADD(count)
+ /* final store */
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+ /* cmp @v2 equal to @expct2 */
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try store */
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ /* for 'release' */
+ "lwsync\n\t"
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* setup for mempcy */
+ "mr %%r19, %[len]\n\t"
+ "mr %%r20, %[src]\n\t"
+ "mr %%r21, %[dst]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ /* cmp cpuid */
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ /* cmp @v equal to @expect */
+ RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
+#endif
+ /* try memcpy */
+ RSEQ_ASM_OP_R_MEMCPY()
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ /* for 'release' */
+ "lwsync\n\t"
+#endif
+ /* final store */
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
new file mode 100644
index 000000000000..263eee84fb76
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-ppc-thread-pointer.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-ppc-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_PPC_THREAD_POINTER
+#define _RSEQ_PPC_THREAD_POINTER
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+static inline void *rseq_thread_pointer(void)
+{
+#ifdef __powerpc64__
+ register void *__result asm ("r13");
+#else
+ register void *__result asm ("r2");
+#endif
+ asm ("" : "=r" (__result));
+ return __result;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-ppc.h b/tools/testing/selftests/rseq/rseq-ppc.h
index 76be90196fe4..57b160597189 100644
--- a/tools/testing/selftests/rseq/rseq-ppc.h
+++ b/tools/testing/selftests/rseq/rseq-ppc.h
@@ -2,7 +2,7 @@
/*
* rseq-ppc.h
*
- * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
* (C) Copyright 2016-2018 - Boqun Feng <boqun.feng@gmail.com>
*/
@@ -23,7 +23,7 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
rseq_smp_lwsync(); \
____p1; \
})
@@ -33,13 +33,9 @@ __extension__ ({ \
#define rseq_smp_store_release(p, v) \
do { \
rseq_smp_lwsync(); \
- RSEQ_WRITE_ONCE(*p, v); \
+ RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
/*
* The __rseq_cs_ptr_array and __rseq_cs sections can be used by debuggers to
* better handle single-stepping through the restartable critical sections.
@@ -47,10 +43,13 @@ do { \
#ifdef __PPC64__
-#define STORE_WORD "std "
-#define LOAD_WORD "ld "
-#define LOADX_WORD "ldx "
-#define CMP_WORD "cmpd "
+#define RSEQ_STORE_LONG(arg) "std%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg) "ld%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG "ldx " /* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG "cmpd "
+#define RSEQ_CMP_LONG_INT "cmpdi "
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
@@ -89,10 +88,13 @@ do { \
#else /* #ifdef __PPC64__ */
-#define STORE_WORD "stw "
-#define LOAD_WORD "lwz "
-#define LOADX_WORD "lwzx "
-#define CMP_WORD "cmpw "
+#define RSEQ_STORE_LONG(arg) "stw%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* To memory ("m" constraint) */
+#define RSEQ_STORE_INT(arg) RSEQ_STORE_LONG(arg) /* To memory ("m" constraint) */
+#define RSEQ_LOAD_LONG(arg) "lwz%U[" __rseq_str(arg) "]%X[" __rseq_str(arg) "] " /* From memory ("m" constraint) */
+#define RSEQ_LOAD_INT(arg) RSEQ_LOAD_LONG(arg) /* From memory ("m" constraint) */
+#define RSEQ_LOADX_LONG "lwzx " /* From base register ("b" constraint) */
+#define RSEQ_CMP_LONG "cmpw "
+#define RSEQ_CMP_LONG_INT "cmpwi "
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
@@ -125,7 +127,7 @@ do { \
RSEQ_INJECT_ASM(1) \
"lis %%r17, (" __rseq_str(cs_label) ")@ha\n\t" \
"addi %%r17, %%r17, (" __rseq_str(cs_label) ")@l\n\t" \
- "stw %%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
+ RSEQ_STORE_INT(rseq_cs) "%%r17, %[" __rseq_str(rseq_cs) "]\n\t" \
__rseq_str(label) ":\n\t"
#endif /* #ifdef __PPC64__ */
@@ -136,7 +138,7 @@ do { \
#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
RSEQ_INJECT_ASM(2) \
- "lwz %%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
+ RSEQ_LOAD_INT(current_cpu_id) "%%r17, %[" __rseq_str(current_cpu_id) "]\n\t" \
"cmpw cr7, %[" __rseq_str(cpu_id) "], %%r17\n\t" \
"bne- cr7, " __rseq_str(label) "\n\t"
@@ -153,25 +155,25 @@ do { \
* RSEQ_ASM_OP_* (else): doesn't have hard-code registers(unless cr7)
*/
#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
- CMP_WORD "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expect) "]\n\t" \
"bne- cr7, " __rseq_str(label) "\n\t"
#define RSEQ_ASM_OP_CMPNE(var, expectnot, label) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
- CMP_WORD "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_CMP_LONG "cr7, %%r17, %[" __rseq_str(expectnot) "]\n\t" \
"beq- cr7, " __rseq_str(label) "\n\t"
#define RSEQ_ASM_OP_STORE(value, var) \
- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
+ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t"
/* Load @var to r17 */
#define RSEQ_ASM_OP_R_LOAD(var) \
- LOAD_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+ RSEQ_LOAD_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
/* Store r17 to @var */
#define RSEQ_ASM_OP_R_STORE(var) \
- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t"
+ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t"
/* Add @count to r17 */
#define RSEQ_ASM_OP_R_ADD(count) \
@@ -179,11 +181,11 @@ do { \
/* Load (r17 + voffp) to r17 */
#define RSEQ_ASM_OP_R_LOADX(voffp) \
- LOADX_WORD "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
+ RSEQ_LOADX_LONG "%%r17, %[" __rseq_str(voffp) "], %%r17\n\t"
/* TODO: implement a faster memcpy. */
#define RSEQ_ASM_OP_R_MEMCPY() \
- "cmpdi %%r19, 0\n\t" \
+ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
"beq 333f\n\t" \
"addi %%r20, %%r20, -1\n\t" \
"addi %%r21, %%r21, -1\n\t" \
@@ -191,561 +193,46 @@ do { \
"lbzu %%r18, 1(%%r20)\n\t" \
"stbu %%r18, 1(%%r21)\n\t" \
"addi %%r19, %%r19, -1\n\t" \
- "cmpdi %%r19, 0\n\t" \
+ RSEQ_CMP_LONG_INT "%%r19, 0\n\t" \
"bne 222b\n\t" \
"333:\n\t" \
#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
- STORE_WORD "%%r17, %[" __rseq_str(var) "]\n\t" \
+ RSEQ_STORE_LONG(var) "%%r17, %[" __rseq_str(var) "]\n\t" \
__rseq_str(post_commit_label) ":\n\t"
#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
- STORE_WORD "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
+ RSEQ_STORE_LONG(var) "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n\t" \
__rseq_str(post_commit_label) ":\n\t"
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v not equal to @expectnot */
- RSEQ_ASM_OP_CMPNE(v, expectnot, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v not equal to @expectnot */
- RSEQ_ASM_OP_CMPNE(v, expectnot, %l[error2])
-#endif
- /* load the value of @v */
- RSEQ_ASM_OP_R_LOAD(v)
- /* store it in @load */
- RSEQ_ASM_OP_R_STORE(load)
- /* dereference voffp(v) */
- RSEQ_ASM_OP_R_LOADX(voffp)
- /* final store the value at voffp(v) */
- RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "b" (voffp),
- [load] "m" (*load)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
-#endif
- /* load the value of @v */
- RSEQ_ASM_OP_R_LOAD(v)
- /* add @count to it */
- RSEQ_ASM_OP_R_ADD(count)
- /* final store */
- RSEQ_ASM_OP_R_FINAL_STORE(v, 2)
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [count] "r" (count)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- /* try store */
- RSEQ_ASM_OP_STORE(newv2, v2)
- RSEQ_INJECT_ASM(5)
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- /* try store */
- RSEQ_ASM_OP_STORE(newv2, v2)
- RSEQ_INJECT_ASM(5)
- /* for 'release' */
- "lwsync\n\t"
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
- /* cmp @v2 equal to @expct2 */
- RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[cmpfail])
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
- /* cmp @v2 equal to @expct2 */
- RSEQ_ASM_OP_CMPEQ(v2, expect2, %l[error3])
-#endif
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* setup for mempcy */
- "mr %%r19, %[len]\n\t"
- "mr %%r20, %[src]\n\t"
- "mr %%r21, %[dst]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- /* try memcpy */
- RSEQ_ASM_OP_R_MEMCPY()
- RSEQ_INJECT_ASM(5)
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(6)
- /* teardown */
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* setup for mempcy */
- "mr %%r19, %[len]\n\t"
- "mr %%r20, %[src]\n\t"
- "mr %%r21, %[dst]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[cmpfail])
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- /* cmp cpuid */
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- /* cmp @v equal to @expect */
- RSEQ_ASM_OP_CMPEQ(v, expect, %l[error2])
-#endif
- /* try memcpy */
- RSEQ_ASM_OP_R_MEMCPY()
- RSEQ_INJECT_ASM(5)
- /* for 'release' */
- "lwsync\n\t"
- /* final store */
- RSEQ_ASM_OP_FINAL_STORE(newv, v, 2)
- RSEQ_INJECT_ASM(6)
- /* teardown */
- RSEQ_ASM_DEFINE_ABORT(4, abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r17", "r18", "r19", "r20", "r21"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-#undef STORE_WORD
-#undef LOAD_WORD
-#undef LOADX_WORD
-#undef CMP_WORD
-
-#endif /* !RSEQ_SKIP_FASTPATH */
+/* Per-cpu-id indexing. */
+
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-ppc-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-ppc-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
+
+/* Per-mm-cid indexing. */
+
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-ppc-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-ppc-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
+
+/* APIs which are not based on cpu ids. */
+
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-ppc-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-riscv-bits.h b/tools/testing/selftests/rseq/rseq-riscv-bits.h
new file mode 100644
index 000000000000..de31a0143139
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-riscv-bits.h
@@ -0,0 +1,410 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ off_t voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPNE(v, expectnot, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_STORE(load)
+ RSEQ_ASM_OP_R_LOAD_OFF(voffp)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [load] "m" (*load),
+ [voffp] "r" (voffp)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_LOAD(v)
+ RSEQ_ASM_OP_R_ADD(count)
+ RSEQ_ASM_OP_R_FINAL_STORE(v, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error3]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+ RSEQ_ASM_OP_CMPEQ(v2, expect2, "%l[error3]")
+#endif
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+error3:
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, off_t off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+#endif
+ RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, 3)
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [ptr] "r" (ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_STORE(newv2, v2)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __always_inline
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+ __asm__ __volatile__ goto(RSEQ_ASM_DEFINE_TABLE(1, 2f, 3f, 4f)
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[cmpfail]")
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error1]")
+ RSEQ_ASM_DEFINE_EXIT_POINT(2f, "%l[error2]")
+#endif
+ RSEQ_ASM_STORE_RSEQ_CS(2, 1b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[cmpfail]")
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, "%l[error1]")
+ RSEQ_ASM_OP_CMPEQ(v, expect, "%l[error2]")
+#endif
+ RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len)
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ RSEQ_ASM_OP_FINAL_STORE_RELEASE(newv, v, 3)
+#else
+ RSEQ_ASM_OP_FINAL_STORE(newv, v, 3)
+#endif
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [expect] "r" (expect),
+ [v] "m" (*v),
+ [newv] "r" (newv),
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len)
+ RSEQ_INJECT_INPUT
+ : "memory", RSEQ_ASM_TMP_REG_1, RSEQ_ASM_TMP_REG_2,
+ RSEQ_ASM_TMP_REG_3, RSEQ_ASM_TMP_REG_4
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-riscv.h b/tools/testing/selftests/rseq/rseq-riscv.h
new file mode 100644
index 000000000000..37e598d0a365
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-riscv.h
@@ -0,0 +1,198 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * Select the instruction "csrw mhartid, x0" as the RSEQ_SIG. Unlike
+ * other architectures, the ebreak instruction has no immediate field for
+ * distinguishing purposes. Hence, ebreak is not suitable as RSEQ_SIG.
+ * "csrw mhartid, x0" can also satisfy the RSEQ requirement because it
+ * is an uncommon instruction and will raise an illegal instruction
+ * exception when executed in all modes.
+ */
+#include <endian.h>
+
+#if defined(__BYTE_ORDER) ? (__BYTE_ORDER == __LITTLE_ENDIAN) : defined(__LITTLE_ENDIAN)
+#define RSEQ_SIG 0xf1401073 /* csrr mhartid, x0 */
+#else
+#error "Currently, RSEQ only supports Little-Endian version"
+#endif
+
+#if __riscv_xlen == 64
+#define __REG_SEL(a, b) a
+#elif __riscv_xlen == 32
+#define __REG_SEL(a, b) b
+#endif
+
+#define REG_L __REG_SEL("ld ", "lw ")
+#define REG_S __REG_SEL("sd ", "sw ")
+
+#define RISCV_FENCE(p, s) \
+ __asm__ __volatile__ ("fence " #p "," #s : : : "memory")
+#define rseq_smp_mb() RISCV_FENCE(rw, rw)
+#define rseq_smp_rmb() RISCV_FENCE(r, r)
+#define rseq_smp_wmb() RISCV_FENCE(w, w)
+#define RSEQ_ASM_TMP_REG_1 "t6"
+#define RSEQ_ASM_TMP_REG_2 "t5"
+#define RSEQ_ASM_TMP_REG_3 "t4"
+#define RSEQ_ASM_TMP_REG_4 "t3"
+
+#define rseq_smp_load_acquire(p) \
+__extension__ ({ \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
+ RISCV_FENCE(r, rw); \
+ ____p1; \
+})
+
+#define rseq_smp_acquire__after_ctrl_dep() rseq_smp_rmb()
+
+#define rseq_smp_store_release(p, v) \
+do { \
+ RISCV_FENCE(rw, w); \
+ RSEQ_WRITE_ONCE(*(p), v); \
+} while (0)
+
+#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, start_ip, \
+ post_commit_offset, abort_ip) \
+ ".pushsection __rseq_cs, \"aw\"\n" \
+ ".balign 32\n" \
+ __rseq_str(label) ":\n" \
+ ".long " __rseq_str(version) ", " __rseq_str(flags) "\n" \
+ ".quad " __rseq_str(start_ip) ", " \
+ __rseq_str(post_commit_offset) ", " \
+ __rseq_str(abort_ip) "\n" \
+ ".popsection\n\t" \
+ ".pushsection __rseq_cs_ptr_array, \"aw\"\n" \
+ ".quad " __rseq_str(label) "b\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_DEFINE_TABLE(label, start_ip, post_commit_ip, abort_ip) \
+ __RSEQ_ASM_DEFINE_TABLE(label, 0x0, 0x0, start_ip, \
+ ((post_commit_ip) - (start_ip)), abort_ip)
+
+/*
+ * Exit points of a rseq critical section consist of all instructions outside
+ * of the critical section where a critical section can either branch to or
+ * reach through the normal course of its execution. The abort IP and the
+ * post-commit IP are already part of the __rseq_cs section and should not be
+ * explicitly defined as additional exit points. Knowing all exit points is
+ * useful to assist debuggers stepping over the critical section.
+ */
+#define RSEQ_ASM_DEFINE_EXIT_POINT(start_ip, exit_ip) \
+ ".pushsection __rseq_exit_point_array, \"aw\"\n" \
+ ".quad " __rseq_str(start_ip) ", " __rseq_str(exit_ip) "\n" \
+ ".popsection\n"
+
+#define RSEQ_ASM_STORE_RSEQ_CS(label, cs_label, rseq_cs) \
+ RSEQ_INJECT_ASM(1) \
+ "la " RSEQ_ASM_TMP_REG_1 ", " __rseq_str(cs_label) "\n" \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(rseq_cs) "]\n" \
+ __rseq_str(label) ":\n"
+
+#define RSEQ_ASM_DEFINE_ABORT(label, abort_label) \
+ "j 222f\n" \
+ ".balign 4\n" \
+ ".long " __rseq_str(RSEQ_SIG) "\n" \
+ __rseq_str(label) ":\n" \
+ "j %l[" __rseq_str(abort_label) "]\n" \
+ "222:\n"
+
+#define RSEQ_ASM_OP_STORE(value, var) \
+ REG_S "%[" __rseq_str(value) "], %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_CMPEQ(var, expect, label) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPEQ32(var, expect, label) \
+ "lw " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "bne " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_OP_CMPNE(var, expect, label) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ "beq " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(expect) "] ," \
+ __rseq_str(label) "\n"
+
+#define RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, label) \
+ RSEQ_INJECT_ASM(2) \
+ RSEQ_ASM_OP_CMPEQ32(current_cpu_id, cpu_id, label)
+
+#define RSEQ_ASM_OP_R_LOAD(var) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_STORE(var) \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n"
+
+#define RSEQ_ASM_OP_R_LOAD_OFF(offset) \
+ "add " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(offset) "], " \
+ RSEQ_ASM_TMP_REG_1 "\n" \
+ REG_L RSEQ_ASM_TMP_REG_1 ", (" RSEQ_ASM_TMP_REG_1 ")\n"
+
+#define RSEQ_ASM_OP_R_ADD(count) \
+ "add " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 \
+ ", %[" __rseq_str(count) "]\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE(value, var, post_commit_label) \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_FINAL_STORE_RELEASE(value, var, post_commit_label) \
+ "fence rw, w\n" \
+ RSEQ_ASM_OP_STORE(value, var) \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_FINAL_STORE(var, post_commit_label) \
+ REG_S RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(var) "]\n" \
+ __rseq_str(post_commit_label) ":\n"
+
+#define RSEQ_ASM_OP_R_BAD_MEMCPY(dst, src, len) \
+ "beqz %[" __rseq_str(len) "], 333f\n" \
+ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(len) "]\n" \
+ "mv " RSEQ_ASM_TMP_REG_2 ", %[" __rseq_str(src) "]\n" \
+ "mv " RSEQ_ASM_TMP_REG_3 ", %[" __rseq_str(dst) "]\n" \
+ "222:\n" \
+ "lb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_2 ")\n" \
+ "sb " RSEQ_ASM_TMP_REG_4 ", 0(" RSEQ_ASM_TMP_REG_3 ")\n" \
+ "addi " RSEQ_ASM_TMP_REG_1 ", " RSEQ_ASM_TMP_REG_1 ", -1\n" \
+ "addi " RSEQ_ASM_TMP_REG_2 ", " RSEQ_ASM_TMP_REG_2 ", 1\n" \
+ "addi " RSEQ_ASM_TMP_REG_3 ", " RSEQ_ASM_TMP_REG_3 ", 1\n" \
+ "bnez " RSEQ_ASM_TMP_REG_1 ", 222b\n" \
+ "333:\n"
+
+#define RSEQ_ASM_OP_R_DEREF_ADDV(ptr, off, post_commit_label) \
+ "mv " RSEQ_ASM_TMP_REG_1 ", %[" __rseq_str(ptr) "]\n" \
+ RSEQ_ASM_OP_R_ADD(off) \
+ REG_L RSEQ_ASM_TMP_REG_1 ", 0(" RSEQ_ASM_TMP_REG_1 ")\n" \
+ RSEQ_ASM_OP_R_ADD(inc) \
+ __rseq_str(post_commit_label) ":\n"
+
+/* Per-cpu-id indexing. */
+
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-riscv-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-riscv-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
+
+/* Per-mm-cid indexing. */
+
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-riscv-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-riscv-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
+
+/* APIs which are not based on cpu ids. */
+
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-riscv-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-s390-bits.h b/tools/testing/selftests/rseq/rseq-s390-bits.h
new file mode 100644
index 000000000000..0cf17d9f170a
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-s390-bits.h
@@ -0,0 +1,474 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+
+#include "rseq-bits-template.h"
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_L " %%r1, %[v]\n\t"
+ LONG_CMP_R " %%r1, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ LONG_S " %%r1, %[load]\n\t"
+ LONG_ADD_R " %%r1, %[voffp]\n\t"
+ LONG_L " %%r1, 0(%%r1)\n\t"
+ /* final store */
+ LONG_S " %%r1, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "r" (voffp),
+ [load] "m" (*load)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0", "r1"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+#endif
+ LONG_L " %%r0, %[v]\n\t"
+ LONG_ADD_R " %%r0, %[count]\n\t"
+ /* final store */
+ LONG_S " %%r0, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "r" (count)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+ LONG_CMP " %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ LONG_S " %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/* s390 is TSO. */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ LONG_S " %[src], %[rseq_scratch0]\n\t"
+ LONG_S " %[dst], %[rseq_scratch1]\n\t"
+ LONG_S " %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
+ RSEQ_INJECT_ASM(3)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
+ LONG_CMP " %[expect], %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ LONG_LT_R " %[len], %[len]\n\t"
+ "jz 333f\n\t"
+ "222:\n\t"
+ "ic %%r0,0(%[src])\n\t"
+ "stc %%r0,0(%[dst])\n\t"
+ LONG_ADDI " %[src], 1\n\t"
+ LONG_ADDI " %[dst], 1\n\t"
+ LONG_ADDI " %[len], -1\n\t"
+ "jnz 222b\n\t"
+ "333:\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ LONG_S " %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ LONG_L " %[len], %[rseq_scratch2]\n\t"
+ LONG_L " %[dst], %[rseq_scratch1]\n\t"
+ LONG_L " %[src], %[rseq_scratch0]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [current_cpu_id] "m" (rseq_get_abi()->RSEQ_TEMPLATE_CPU_ID_FIELD),
+ [rseq_cs] "m" (rseq_get_abi()->rseq_cs.arch.ptr),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ RSEQ_INJECT_INPUT
+ : "memory", "cc", "r0"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-s390.h b/tools/testing/selftests/rseq/rseq-s390.h
index 8ef94ad1cbb4..33baaa9f9997 100644
--- a/tools/testing/selftests/rseq/rseq-s390.h
+++ b/tools/testing/selftests/rseq/rseq-s390.h
@@ -15,7 +15,7 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
rseq_barrier(); \
____p1; \
})
@@ -25,13 +25,9 @@ __extension__ ({ \
#define rseq_smp_store_release(p, v) \
do { \
rseq_barrier(); \
- RSEQ_WRITE_ONCE(*p, v); \
+ RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
#ifdef __s390x__
#define LONG_L "lg"
@@ -134,448 +130,34 @@ do { \
"jg %l[" __rseq_str(cmpfail_label) "]\n\t" \
".popsection\n\t"
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
+/* Per-cpu-id indexing. */
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-s390-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
-/*
- * Compare @v against @expectnot. When it does _not_ match, load @v
- * into @load, and store the content of *@v + voffp into @v.
- */
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_L " %%r1, %[v]\n\t"
- LONG_CMP_R " %%r1, %[expectnot]\n\t"
- "je %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_L " %%r1, %[v]\n\t"
- LONG_CMP_R " %%r1, %[expectnot]\n\t"
- "je %l[error2]\n\t"
-#endif
- LONG_S " %%r1, %[load]\n\t"
- LONG_ADD_R " %%r1, %[voffp]\n\t"
- LONG_L " %%r1, 0(%%r1)\n\t"
- /* final store */
- LONG_S " %%r1, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "r" (voffp),
- [load] "m" (*load)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0", "r1"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-s390-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
+/* Per-mm-cid indexing. */
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
-#endif
- LONG_L " %%r0, %[v]\n\t"
- LONG_ADD_R " %%r0, %[count]\n\t"
- /* final store */
- LONG_S " %%r0, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [count] "r" (count)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-s390-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-s390-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* try store */
- LONG_S " %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/* s390 is TSO. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
- LONG_CMP " %[expect2], %[v2]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, %l[error1])
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz %l[error2]\n\t"
- LONG_CMP " %[expect2], %[v2]\n\t"
- "jnz %l[error3]\n\t"
-#endif
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint64_t rseq_scratch[3];
-
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- LONG_S " %[src], %[rseq_scratch0]\n\t"
- LONG_S " %[dst], %[rseq_scratch1]\n\t"
- LONG_S " %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, rseq_cs)
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 4f)
- RSEQ_INJECT_ASM(3)
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, current_cpu_id, 6f)
- LONG_CMP " %[expect], %[v]\n\t"
- "jnz 7f\n\t"
-#endif
- /* try memcpy */
- LONG_LT_R " %[len], %[len]\n\t"
- "jz 333f\n\t"
- "222:\n\t"
- "ic %%r0,0(%[src])\n\t"
- "stc %%r0,0(%[dst])\n\t"
- LONG_ADDI " %[src], 1\n\t"
- LONG_ADDI " %[dst], 1\n\t"
- LONG_ADDI " %[len], -1\n\t"
- "jnz 222b\n\t"
- "333:\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- LONG_S " %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t"
- RSEQ_ASM_DEFINE_ABORT(4,
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- abort)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- LONG_L " %[len], %[rseq_scratch2]\n\t"
- LONG_L " %[dst], %[rseq_scratch1]\n\t"
- LONG_L " %[src], %[rseq_scratch0]\n\t",
- error2)
-#endif
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [current_cpu_id] "m" (__rseq_abi.cpu_id),
- [rseq_cs] "m" (__rseq_abi.rseq_cs),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- RSEQ_INJECT_INPUT
- : "memory", "cc", "r0"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+/* APIs which are not based on cpu ids. */
-/* s390 is TSO. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
- newv, cpu);
-}
-#endif /* !RSEQ_SKIP_FASTPATH */
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-s390-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq-skip.h b/tools/testing/selftests/rseq/rseq-skip.h
deleted file mode 100644
index 72750b5905a9..000000000000
--- a/tools/testing/selftests/rseq/rseq-skip.h
+++ /dev/null
@@ -1,65 +0,0 @@
-/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
-/*
- * rseq-skip.h
- *
- * (C) Copyright 2017-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
- */
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- return -1;
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- return -1;
-}
diff --git a/tools/testing/selftests/rseq/rseq-thread-pointer.h b/tools/testing/selftests/rseq/rseq-thread-pointer.h
new file mode 100644
index 000000000000..977c25d758b2
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-thread-pointer.h
@@ -0,0 +1,19 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_THREAD_POINTER
+#define _RSEQ_THREAD_POINTER
+
+#if defined(__x86_64__) || defined(__i386__)
+#include "rseq-x86-thread-pointer.h"
+#elif defined(__PPC__)
+#include "rseq-ppc-thread-pointer.h"
+#else
+#include "rseq-generic-thread-pointer.h"
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-x86-bits.h b/tools/testing/selftests/rseq/rseq-x86-bits.h
new file mode 100644
index 000000000000..8a9431eec467
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86-bits.h
@@ -0,0 +1,993 @@
+/* SPDX-License-Identifier: LGPL-2.1 OR MIT */
+/*
+ * rseq-x86-bits.h
+ *
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#include "rseq-bits-template.h"
+
+#ifdef __x86_64__
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movq %[v], %%rbx\n\t"
+ "cmpq %%rbx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movq %%rbx, %[load]\n\t"
+ "addq %[voffp], %%rbx\n\t"
+ "movq (%%rbx), %%rbx\n\t"
+ /* final store */
+ "movq %%rbx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "er" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "rax", "rbx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+#endif
+ /* final store */
+ "addq %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "er" (count)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+#define RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_offset_deref_addv)(intptr_t *ptr, long off, intptr_t inc, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+#endif
+ /* get p+v */
+ "movq %[ptr], %%rbx\n\t"
+ "addq %[off], %%rbx\n\t"
+ /* get pv */
+ "movq (%%rbx), %%rcx\n\t"
+ /* *pv += inc */
+ "addq %[inc], (%%rcx)\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [ptr] "m" (*ptr),
+ [off] "er" (off),
+ [inc] "er" (inc)
+ : "memory", "cc", "rax", "rbx", "rcx"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ return 0;
+abort:
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpq %[v2], %[expect2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpq %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movq %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint64_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ "movq %[src], %[rseq_scratch0]\n\t"
+ "movq %[dst], %[rseq_scratch1]\n\t"
+ "movq %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f)
+ "cmpq %[v], %[expect]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+ /* final store */
+ "movq %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movq %[rseq_scratch2], %[len]\n\t"
+ "movq %[rseq_scratch1], %[dst]\n\t"
+ "movq %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "rax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#elif defined(__i386__)
+
+#if defined(RSEQ_TEMPLATE_MO_RELAXED) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_storev)(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpnev_storeoffp_load)(intptr_t *v, intptr_t expectnot,
+ long voffp, intptr_t *load, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movl %[v], %%ebx\n\t"
+ "cmpl %%ebx, %[expectnot]\n\t"
+ "je %l[error2]\n\t"
+#endif
+ "movl %%ebx, %[load]\n\t"
+ "addl %[voffp], %%ebx\n\t"
+ "movl (%%ebx), %%ebx\n\t"
+ /* final store */
+ "movl %%ebx, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(5)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expectnot] "r" (expectnot),
+ [voffp] "ir" (voffp),
+ [load] "m" (*load)
+ : "memory", "cc", "eax", "ebx"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_addv)(intptr_t *v, intptr_t count, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+#endif
+ /* final store */
+ "addl %[count], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(4)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [count] "ir" (count)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort
+#ifdef RSEQ_COMPARE_TWICE
+ , error1
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+#endif
+}
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_cmpeqv_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "cmpl %[v], %[expect]\n\t"
+ "jnz %l[error2]\n\t"
+ "cmpl %[expect2], %[v2]\n\t"
+ "jnz %l[error3]\n\t"
+#endif
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* cmp2 input */
+ [v2] "m" (*v2),
+ [expect2] "r" (expect2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "r" (expect),
+ [newv] "m" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2, error3
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("1st expected value comparison failed");
+error3:
+ rseq_after_asm_goto();
+ rseq_bug("2nd expected value comparison failed");
+#endif
+}
+
+#endif /* #if defined(RSEQ_TEMPLATE_MO_RELAXED) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) && \
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID))
+
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trystorev_storev)(intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[cmpfail]\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), %l[error1])
+ "movl %[expect], %%eax\n\t"
+ "cmpl %[v], %%eax\n\t"
+ "jnz %l[error2]\n\t"
+#endif
+ /* try store */
+ "movl %[newv2], %[v2]\n\t"
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "lock; addl $0,-128(%%esp)\n\t"
+#endif
+ /* final store */
+ "movl %[newv], %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ RSEQ_ASM_DEFINE_ABORT(4, "", abort)
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* try store input */
+ [v2] "m" (*v2),
+ [newv2] "r" (newv2),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "r" (newv)
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+
+}
+
+/* TODO: implement a faster memcpy. */
+static inline __attribute__((always_inline))
+int RSEQ_TEMPLATE_IDENTIFIER(rseq_cmpeqv_trymemcpy_storev)(intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ uint32_t rseq_scratch[3];
+
+ RSEQ_INJECT_C(9)
+
+ __asm__ __volatile__ goto (
+ RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
+ RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
+#endif
+ "movl %[src], %[rseq_scratch0]\n\t"
+ "movl %[dst], %[rseq_scratch1]\n\t"
+ "movl %[len], %[rseq_scratch2]\n\t"
+ /* Start rseq by storing table entry pointer into rseq_cs. */
+ RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_ASM_TP_SEGMENT:RSEQ_CS_OFFSET(%[rseq_offset]))
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 4f)
+ RSEQ_INJECT_ASM(3)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 5f\n\t"
+ RSEQ_INJECT_ASM(4)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_ASM_TP_SEGMENT:RSEQ_TEMPLATE_CPU_ID_OFFSET(%[rseq_offset]), 6f)
+ "movl %[expect], %%eax\n\t"
+ "cmpl %%eax, %[v]\n\t"
+ "jnz 7f\n\t"
+#endif
+ /* try memcpy */
+ "test %[len], %[len]\n\t" \
+ "jz 333f\n\t" \
+ "222:\n\t" \
+ "movb (%[src]), %%al\n\t" \
+ "movb %%al, (%[dst])\n\t" \
+ "inc %[src]\n\t" \
+ "inc %[dst]\n\t" \
+ "dec %[len]\n\t" \
+ "jnz 222b\n\t" \
+ "333:\n\t" \
+ RSEQ_INJECT_ASM(5)
+#ifdef RSEQ_TEMPLATE_MO_RELEASE
+ "lock; addl $0,-128(%%esp)\n\t"
+#endif
+ "movl %[newv], %%eax\n\t"
+ /* final store */
+ "movl %%eax, %[v]\n\t"
+ "2:\n\t"
+ RSEQ_INJECT_ASM(6)
+ /* teardown */
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t"
+ RSEQ_ASM_DEFINE_ABORT(4,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ abort)
+ RSEQ_ASM_DEFINE_CMPFAIL(5,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ cmpfail)
+#ifdef RSEQ_COMPARE_TWICE
+ RSEQ_ASM_DEFINE_CMPFAIL(6,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error1)
+ RSEQ_ASM_DEFINE_CMPFAIL(7,
+ "movl %[rseq_scratch2], %[len]\n\t"
+ "movl %[rseq_scratch1], %[dst]\n\t"
+ "movl %[rseq_scratch0], %[src]\n\t",
+ error2)
+#endif
+ : /* gcc asm goto does not allow outputs */
+ : [cpu_id] "r" (cpu),
+ [rseq_offset] "r" (rseq_offset),
+ /* final store input */
+ [v] "m" (*v),
+ [expect] "m" (expect),
+ [newv] "m" (newv),
+ /* try memcpy input */
+ [dst] "r" (dst),
+ [src] "r" (src),
+ [len] "r" (len),
+ [rseq_scratch0] "m" (rseq_scratch[0]),
+ [rseq_scratch1] "m" (rseq_scratch[1]),
+ [rseq_scratch2] "m" (rseq_scratch[2])
+ : "memory", "cc", "eax"
+ RSEQ_INJECT_CLOBBER
+ : abort, cmpfail
+#ifdef RSEQ_COMPARE_TWICE
+ , error1, error2
+#endif
+ );
+ rseq_after_asm_goto();
+ return 0;
+abort:
+ rseq_after_asm_goto();
+ RSEQ_INJECT_FAILED
+ return -1;
+cmpfail:
+ rseq_after_asm_goto();
+ return 1;
+#ifdef RSEQ_COMPARE_TWICE
+error1:
+ rseq_after_asm_goto();
+ rseq_bug("cpu_id comparison failed");
+error2:
+ rseq_after_asm_goto();
+ rseq_bug("expected value comparison failed");
+#endif
+}
+
+#endif /* #if (defined(RSEQ_TEMPLATE_MO_RELAXED) || defined(RSEQ_TEMPLATE_MO_RELEASE)) &&
+ (defined(RSEQ_TEMPLATE_CPU_ID) || defined(RSEQ_TEMPLATE_MM_CID)) */
+
+#endif
+
+#include "rseq-bits-reset.h"
diff --git a/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
new file mode 100644
index 000000000000..d3133587d996
--- /dev/null
+++ b/tools/testing/selftests/rseq/rseq-x86-thread-pointer.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: LGPL-2.1-only OR MIT */
+/*
+ * rseq-x86-thread-pointer.h
+ *
+ * (C) Copyright 2021 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ */
+
+#ifndef _RSEQ_X86_THREAD_POINTER
+#define _RSEQ_X86_THREAD_POINTER
+
+#include <features.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#if __GNUC_PREREQ (11, 1)
+static inline void *rseq_thread_pointer(void)
+{
+ return __builtin_thread_pointer();
+}
+#else
+static inline void *rseq_thread_pointer(void)
+{
+ void *__result;
+
+# ifdef __x86_64__
+ __asm__ ("mov %%fs:0, %0" : "=r" (__result));
+# else
+ __asm__ ("mov %%gs:0, %0" : "=r" (__result));
+# endif
+ return __result;
+}
+#endif /* !GCC 11 */
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/tools/testing/selftests/rseq/rseq-x86.h b/tools/testing/selftests/rseq/rseq-x86.h
index b2da6004fe30..a2aa428ba151 100644
--- a/tools/testing/selftests/rseq/rseq-x86.h
+++ b/tools/testing/selftests/rseq/rseq-x86.h
@@ -2,9 +2,13 @@
/*
* rseq-x86.h
*
- * (C) Copyright 2016-2018 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
+ * (C) Copyright 2016-2022 - Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
*/
+#ifndef RSEQ_H
+#error "Never use <rseq-x86.h> directly; include <rseq.h> instead."
+#endif
+
#include <stdint.h>
/*
@@ -22,12 +26,15 @@
* address through a "r" input operand.
*/
-/* Offset of cpu_id and rseq_cs fields in struct rseq. */
+/* Offset of cpu_id, rseq_cs, and mm_cid fields in struct rseq. */
#define RSEQ_CPU_ID_OFFSET 4
#define RSEQ_CS_OFFSET 8
+#define RSEQ_MM_CID_OFFSET 24
#ifdef __x86_64__
+#define RSEQ_ASM_TP_SEGMENT %%fs
+
#define rseq_smp_mb() \
__asm__ __volatile__ ("lock; addl $0,-128(%%rsp)" ::: "memory", "cc")
#define rseq_smp_rmb() rseq_barrier()
@@ -35,7 +42,7 @@
#define rseq_smp_load_acquire(p) \
__extension__ ({ \
- __typeof(*p) ____p1 = RSEQ_READ_ONCE(*p); \
+ rseq_unqual_scalar_typeof(*(p)) ____p1 = RSEQ_READ_ONCE(*(p)); \
rseq_barrier(); \
____p1; \
})
@@ -45,13 +52,9 @@ __extension__ ({ \
#define rseq_smp_store_release(p, v) \
do { \
rseq_barrier(); \
- RSEQ_WRITE_ONCE(*p, v); \
+ RSEQ_WRITE_ONCE(*(p), v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
#define __RSEQ_ASM_DEFINE_TABLE(label, version, flags, \
start_ip, post_commit_offset, abort_ip) \
".pushsection __rseq_cs, \"aw\"\n\t" \
@@ -110,440 +113,9 @@ do { \
"jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
".popsection\n\t"
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* final store */
- "movq %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "rax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/*
- * Compare @v against @expectnot. When it does _not_ match, load @v
- * into @load, and store the content of *@v + voffp into @v.
- */
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "movq %[v], %%rbx\n\t"
- "cmpq %%rbx, %[expectnot]\n\t"
- "je %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "movq %[v], %%rbx\n\t"
- "cmpq %%rbx, %[expectnot]\n\t"
- "je %l[error2]\n\t"
-#endif
- "movq %%rbx, %[load]\n\t"
- "addq %[voffp], %%rbx\n\t"
- "movq (%%rbx), %%rbx\n\t"
- /* final store */
- "movq %%rbx, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "er" (voffp),
- [load] "m" (*load)
- : "memory", "cc", "rax", "rbx"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
-#endif
- /* final store */
- "addq %[count], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [count] "er" (count)
- : "memory", "cc", "rax"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
+#elif defined(__i386__)
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* try store */
- "movq %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- "movq %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "rax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/* x86-64 is TSO. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- return rseq_cmpeqv_trystorev_storev(v, expect, v2, newv2, newv, cpu);
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
- "cmpq %[v2], %[expect2]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpq %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
- "cmpq %[v2], %[expect2]\n\t"
- "jnz %l[error3]\n\t"
-#endif
- /* final store */
- "movq %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "rax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint64_t rseq_scratch[3];
-
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- "movq %[src], %[rseq_scratch0]\n\t"
- "movq %[dst], %[rseq_scratch1]\n\t"
- "movq %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpq %[v], %[expect]\n\t"
- "jnz 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
- "cmpq %[v], %[expect]\n\t"
- "jnz 7f\n\t"
-#endif
- /* try memcpy */
- "test %[len], %[len]\n\t" \
- "jz 333f\n\t" \
- "222:\n\t" \
- "movb (%[src]), %%al\n\t" \
- "movb %%al, (%[dst])\n\t" \
- "inc %[src]\n\t" \
- "inc %[dst]\n\t" \
- "dec %[len]\n\t" \
- "jnz 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- /* final store */
- "movq %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- "movq %[rseq_scratch2], %[len]\n\t"
- "movq %[rseq_scratch1], %[dst]\n\t"
- "movq %[rseq_scratch0], %[src]\n\t"
- RSEQ_ASM_DEFINE_ABORT(4,
- "movq %[rseq_scratch2], %[len]\n\t"
- "movq %[rseq_scratch1], %[dst]\n\t"
- "movq %[rseq_scratch0], %[src]\n\t",
- abort)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- "movq %[rseq_scratch2], %[len]\n\t"
- "movq %[rseq_scratch1], %[dst]\n\t"
- "movq %[rseq_scratch0], %[src]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- "movq %[rseq_scratch2], %[len]\n\t"
- "movq %[rseq_scratch1], %[dst]\n\t"
- "movq %[rseq_scratch0], %[src]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- "movq %[rseq_scratch2], %[len]\n\t"
- "movq %[rseq_scratch1], %[dst]\n\t"
- "movq %[rseq_scratch0], %[src]\n\t",
- error2)
-#endif
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- : "memory", "cc", "rax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/* x86-64 is TSO. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- return rseq_cmpeqv_trymemcpy_storev(v, expect, dst, src, len,
- newv, cpu);
-}
-
-#endif /* !RSEQ_SKIP_FASTPATH */
-
-#elif __i386__
+#define RSEQ_ASM_TP_SEGMENT %%gs
#define rseq_smp_mb() \
__asm__ __volatile__ ("lock; addl $0,-128(%%esp)" ::: "memory", "cc")
@@ -567,10 +139,6 @@ do { \
RSEQ_WRITE_ONCE(*p, v); \
} while (0)
-#ifdef RSEQ_SKIP_FASTPATH
-#include "rseq-skip.h"
-#else /* !RSEQ_SKIP_FASTPATH */
-
/*
* Use eax as scratch register and take memory operands as input to
* lessen register pressure. Especially needed when compiling in O0.
@@ -631,606 +199,36 @@ do { \
"jmp %l[" __rseq_str(cmpfail_label) "]\n\t" \
".popsection\n\t"
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_storev(intptr_t *v, intptr_t expect, intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* final store */
- "movl %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/*
- * Compare @v against @expectnot. When it does _not_ match, load @v
- * into @load, and store the content of *@v + voffp into @v.
- */
-static inline __attribute__((always_inline))
-int rseq_cmpnev_storeoffp_load(intptr_t *v, intptr_t expectnot,
- off_t voffp, intptr_t *load, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "movl %[v], %%ebx\n\t"
- "cmpl %%ebx, %[expectnot]\n\t"
- "je %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "movl %[v], %%ebx\n\t"
- "cmpl %%ebx, %[expectnot]\n\t"
- "je %l[error2]\n\t"
-#endif
- "movl %%ebx, %[load]\n\t"
- "addl %[voffp], %%ebx\n\t"
- "movl (%%ebx), %%ebx\n\t"
- /* final store */
- "movl %%ebx, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(5)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [expectnot] "r" (expectnot),
- [voffp] "ir" (voffp),
- [load] "m" (*load)
- : "memory", "cc", "eax", "ebx"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_addv(intptr_t *v, intptr_t count, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
-#endif
- /* final store */
- "addl %[count], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(4)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [count] "ir" (count)
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort
-#ifdef RSEQ_COMPARE_TWICE
- , error1
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-#endif
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* try store */
- "movl %[newv2], %%eax\n\t"
- "movl %%eax, %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- /* final store */
- "movl %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "m" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
#endif
-}
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trystorev_storev_release(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t newv2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
+/* Per-cpu-id indexing. */
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "movl %[expect], %%eax\n\t"
- "cmpl %[v], %%eax\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "movl %[expect], %%eax\n\t"
- "cmpl %[v], %%eax\n\t"
- "jnz %l[error2]\n\t"
-#endif
- /* try store */
- "movl %[newv2], %[v2]\n\t"
- RSEQ_INJECT_ASM(5)
- "lock; addl $0,-128(%%esp)\n\t"
- /* final store */
- "movl %[newv], %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* try store input */
- [v2] "m" (*v2),
- [newv2] "r" (newv2),
- /* final store input */
- [v] "m" (*v),
- [expect] "m" (expect),
- [newv] "r" (newv)
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-
-}
-
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_cmpeqv_storev(intptr_t *v, intptr_t expect,
- intptr_t *v2, intptr_t expect2,
- intptr_t newv, int cpu)
-{
- RSEQ_INJECT_C(9)
-
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error3])
-#endif
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(4)
- "cmpl %[expect2], %[v2]\n\t"
- "jnz %l[cmpfail]\n\t"
- RSEQ_INJECT_ASM(5)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), %l[error1])
- "cmpl %[v], %[expect]\n\t"
- "jnz %l[error2]\n\t"
- "cmpl %[expect2], %[v2]\n\t"
- "jnz %l[error3]\n\t"
-#endif
- "movl %[newv], %%eax\n\t"
- /* final store */
- "movl %%eax, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- RSEQ_ASM_DEFINE_ABORT(4, "", abort)
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* cmp2 input */
- [v2] "m" (*v2),
- [expect2] "r" (expect2),
- /* final store input */
- [v] "m" (*v),
- [expect] "r" (expect),
- [newv] "m" (newv)
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2, error3
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("1st expected value comparison failed");
-error3:
- rseq_bug("2nd expected value comparison failed");
-#endif
-}
-
-/* TODO: implement a faster memcpy. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint32_t rseq_scratch[3];
+#define RSEQ_TEMPLATE_CPU_ID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-x86-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-x86-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_CPU_ID
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- "movl %[src], %[rseq_scratch0]\n\t"
- "movl %[dst], %[rseq_scratch1]\n\t"
- "movl %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "movl %[expect], %%eax\n\t"
- "cmpl %%eax, %[v]\n\t"
- "jnz 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
- "movl %[expect], %%eax\n\t"
- "cmpl %%eax, %[v]\n\t"
- "jnz 7f\n\t"
-#endif
- /* try memcpy */
- "test %[len], %[len]\n\t" \
- "jz 333f\n\t" \
- "222:\n\t" \
- "movb (%[src]), %%al\n\t" \
- "movb %%al, (%[dst])\n\t" \
- "inc %[src]\n\t" \
- "inc %[dst]\n\t" \
- "dec %[len]\n\t" \
- "jnz 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- "movl %[newv], %%eax\n\t"
- /* final store */
- "movl %%eax, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t"
- RSEQ_ASM_DEFINE_ABORT(4,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- abort)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- error2)
-#endif
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [expect] "m" (expect),
- [newv] "m" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
-
-/* TODO: implement a faster memcpy. */
-static inline __attribute__((always_inline))
-int rseq_cmpeqv_trymemcpy_storev_release(intptr_t *v, intptr_t expect,
- void *dst, void *src, size_t len,
- intptr_t newv, int cpu)
-{
- uint32_t rseq_scratch[3];
+/* Per-mm-cid indexing. */
- RSEQ_INJECT_C(9)
+#define RSEQ_TEMPLATE_MM_CID
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-x86-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
- __asm__ __volatile__ goto (
- RSEQ_ASM_DEFINE_TABLE(3, 1f, 2f, 4f) /* start, commit, abort */
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[cmpfail])
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error1])
- RSEQ_ASM_DEFINE_EXIT_POINT(1f, %l[error2])
-#endif
- "movl %[src], %[rseq_scratch0]\n\t"
- "movl %[dst], %[rseq_scratch1]\n\t"
- "movl %[len], %[rseq_scratch2]\n\t"
- /* Start rseq by storing table entry pointer into rseq_cs. */
- RSEQ_ASM_STORE_RSEQ_CS(1, 3b, RSEQ_CS_OFFSET(%[rseq_abi]))
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 4f)
- RSEQ_INJECT_ASM(3)
- "movl %[expect], %%eax\n\t"
- "cmpl %%eax, %[v]\n\t"
- "jnz 5f\n\t"
- RSEQ_INJECT_ASM(4)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_CMP_CPU_ID(cpu_id, RSEQ_CPU_ID_OFFSET(%[rseq_abi]), 6f)
- "movl %[expect], %%eax\n\t"
- "cmpl %%eax, %[v]\n\t"
- "jnz 7f\n\t"
-#endif
- /* try memcpy */
- "test %[len], %[len]\n\t" \
- "jz 333f\n\t" \
- "222:\n\t" \
- "movb (%[src]), %%al\n\t" \
- "movb %%al, (%[dst])\n\t" \
- "inc %[src]\n\t" \
- "inc %[dst]\n\t" \
- "dec %[len]\n\t" \
- "jnz 222b\n\t" \
- "333:\n\t" \
- RSEQ_INJECT_ASM(5)
- "lock; addl $0,-128(%%esp)\n\t"
- "movl %[newv], %%eax\n\t"
- /* final store */
- "movl %%eax, %[v]\n\t"
- "2:\n\t"
- RSEQ_INJECT_ASM(6)
- /* teardown */
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t"
- RSEQ_ASM_DEFINE_ABORT(4,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- abort)
- RSEQ_ASM_DEFINE_CMPFAIL(5,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- cmpfail)
-#ifdef RSEQ_COMPARE_TWICE
- RSEQ_ASM_DEFINE_CMPFAIL(6,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- error1)
- RSEQ_ASM_DEFINE_CMPFAIL(7,
- "movl %[rseq_scratch2], %[len]\n\t"
- "movl %[rseq_scratch1], %[dst]\n\t"
- "movl %[rseq_scratch0], %[src]\n\t",
- error2)
-#endif
- : /* gcc asm goto does not allow outputs */
- : [cpu_id] "r" (cpu),
- [rseq_abi] "r" (&__rseq_abi),
- /* final store input */
- [v] "m" (*v),
- [expect] "m" (expect),
- [newv] "m" (newv),
- /* try memcpy input */
- [dst] "r" (dst),
- [src] "r" (src),
- [len] "r" (len),
- [rseq_scratch0] "m" (rseq_scratch[0]),
- [rseq_scratch1] "m" (rseq_scratch[1]),
- [rseq_scratch2] "m" (rseq_scratch[2])
- : "memory", "cc", "eax"
- RSEQ_INJECT_CLOBBER
- : abort, cmpfail
-#ifdef RSEQ_COMPARE_TWICE
- , error1, error2
-#endif
- );
- return 0;
-abort:
- RSEQ_INJECT_FAILED
- return -1;
-cmpfail:
- return 1;
-#ifdef RSEQ_COMPARE_TWICE
-error1:
- rseq_bug("cpu_id comparison failed");
-error2:
- rseq_bug("expected value comparison failed");
-#endif
-}
+#define RSEQ_TEMPLATE_MO_RELEASE
+#include "rseq-x86-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELEASE
+#undef RSEQ_TEMPLATE_MM_CID
-#endif /* !RSEQ_SKIP_FASTPATH */
+/* APIs which are not based on cpu ids. */
-#endif
+#define RSEQ_TEMPLATE_CPU_ID_NONE
+#define RSEQ_TEMPLATE_MO_RELAXED
+#include "rseq-x86-bits.h"
+#undef RSEQ_TEMPLATE_MO_RELAXED
+#undef RSEQ_TEMPLATE_CPU_ID_NONE
diff --git a/tools/testing/selftests/rseq/rseq.c b/tools/testing/selftests/rseq/rseq.c
index 7159eb777fd3..96e812bdf8a4 100644
--- a/tools/testing/selftests/rseq/rseq.c
+++ b/tools/testing/selftests/rseq/rseq.c
@@ -26,104 +26,191 @@
#include <assert.h>
#include <signal.h>
#include <limits.h>
+#include <dlfcn.h>
+#include <stddef.h>
+#include <sys/auxv.h>
+#include <linux/auxvec.h>
+#include <linux/compiler.h>
+
+#include "../kselftest.h"
#include "rseq.h"
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
+/*
+ * Define weak versions to play nice with binaries that are statically linked
+ * against a libc that doesn't support registering its own rseq.
+ */
+__weak ptrdiff_t __rseq_offset;
+__weak unsigned int __rseq_size;
+__weak unsigned int __rseq_flags;
-__thread volatile struct rseq __rseq_abi = {
- .cpu_id = RSEQ_CPU_ID_UNINITIALIZED,
-};
+static const ptrdiff_t *libc_rseq_offset_p = &__rseq_offset;
+static const unsigned int *libc_rseq_size_p = &__rseq_size;
+static const unsigned int *libc_rseq_flags_p = &__rseq_flags;
+
+/* Offset from the thread pointer to the rseq area. */
+ptrdiff_t rseq_offset;
/*
- * Shared with other libraries. This library may take rseq ownership if it is
- * still 0 when executing the library constructor. Set to 1 by library
- * constructor when handling rseq. Set to 0 in destructor if handling rseq.
+ * Size of the registered rseq area. 0 if the registration was
+ * unsuccessful.
*/
-int __rseq_handled;
+unsigned int rseq_size = -1U;
+
+/* Flags used during rseq registration. */
+unsigned int rseq_flags;
+
+/*
+ * rseq feature size supported by the kernel. 0 if the registration was
+ * unsuccessful.
+ */
+unsigned int rseq_feature_size = -1U;
-/* Whether this library have ownership of rseq registration. */
static int rseq_ownership;
+static int rseq_reg_success; /* At least one rseq registration has succeded. */
-static __thread volatile uint32_t __rseq_refcount;
+/* Allocate a large area for the TLS. */
+#define RSEQ_THREAD_AREA_ALLOC_SIZE 1024
-static void signal_off_save(sigset_t *oldset)
-{
- sigset_t set;
- int ret;
+/* Original struct rseq feature size is 20 bytes. */
+#define ORIG_RSEQ_FEATURE_SIZE 20
- sigfillset(&set);
- ret = pthread_sigmask(SIG_BLOCK, &set, oldset);
- if (ret)
- abort();
-}
+/* Original struct rseq allocation size is 32 bytes. */
+#define ORIG_RSEQ_ALLOC_SIZE 32
+
+static
+__thread struct rseq_abi __rseq_abi __attribute__((tls_model("initial-exec"), aligned(RSEQ_THREAD_AREA_ALLOC_SIZE))) = {
+ .cpu_id = RSEQ_ABI_CPU_ID_UNINITIALIZED,
+};
-static void signal_restore(sigset_t oldset)
+static int sys_rseq(struct rseq_abi *rseq_abi, uint32_t rseq_len,
+ int flags, uint32_t sig)
{
- int ret;
+ return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+}
- ret = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
- if (ret)
- abort();
+static int sys_getcpu(unsigned *cpu, unsigned *node)
+{
+ return syscall(__NR_getcpu, cpu, node, NULL);
}
-static int sys_rseq(volatile struct rseq *rseq_abi, uint32_t rseq_len,
- int flags, uint32_t sig)
+int rseq_available(void)
{
- return syscall(__NR_rseq, rseq_abi, rseq_len, flags, sig);
+ int rc;
+
+ rc = sys_rseq(NULL, 0, 0, 0);
+ if (rc != -1)
+ abort();
+ switch (errno) {
+ case ENOSYS:
+ return 0;
+ case EINVAL:
+ return 1;
+ default:
+ abort();
+ }
}
int rseq_register_current_thread(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful registration. */
return 0;
- signal_off_save(&oldset);
- if (__rseq_refcount == UINT_MAX) {
- ret = -1;
- goto end;
}
- if (__rseq_refcount++)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq), 0, RSEQ_SIG);
- if (!rc) {
- assert(rseq_current_cpu_raw() >= 0);
- goto end;
+ rc = sys_rseq(&__rseq_abi, rseq_size, 0, RSEQ_SIG);
+ if (rc) {
+ if (RSEQ_READ_ONCE(rseq_reg_success)) {
+ /* Incoherent success/failure within process. */
+ abort();
+ }
+ return -1;
}
- if (errno != EBUSY)
- __rseq_abi.cpu_id = RSEQ_CPU_ID_REGISTRATION_FAILED;
- ret = -1;
- __rseq_refcount--;
-end:
- signal_restore(oldset);
- return ret;
+ assert(rseq_current_cpu_raw() >= 0);
+ RSEQ_WRITE_ONCE(rseq_reg_success, 1);
+ return 0;
}
int rseq_unregister_current_thread(void)
{
- int rc, ret = 0;
- sigset_t oldset;
+ int rc;
- if (!rseq_ownership)
+ if (!rseq_ownership) {
+ /* Treat libc's ownership as a successful unregistration. */
return 0;
- signal_off_save(&oldset);
- if (!__rseq_refcount) {
- ret = -1;
- goto end;
}
- if (--__rseq_refcount)
- goto end;
- rc = sys_rseq(&__rseq_abi, sizeof(struct rseq),
- RSEQ_FLAG_UNREGISTER, RSEQ_SIG);
- if (!rc)
- goto end;
- __rseq_refcount = 1;
- ret = -1;
-end:
- signal_restore(oldset);
- return ret;
+ rc = sys_rseq(&__rseq_abi, rseq_size, RSEQ_ABI_FLAG_UNREGISTER, RSEQ_SIG);
+ if (rc)
+ return -1;
+ return 0;
+}
+
+static
+unsigned int get_rseq_feature_size(void)
+{
+ unsigned long auxv_rseq_feature_size, auxv_rseq_align;
+
+ auxv_rseq_align = getauxval(AT_RSEQ_ALIGN);
+ assert(!auxv_rseq_align || auxv_rseq_align <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+
+ auxv_rseq_feature_size = getauxval(AT_RSEQ_FEATURE_SIZE);
+ assert(!auxv_rseq_feature_size || auxv_rseq_feature_size <= RSEQ_THREAD_AREA_ALLOC_SIZE);
+ if (auxv_rseq_feature_size)
+ return auxv_rseq_feature_size;
+ else
+ return ORIG_RSEQ_FEATURE_SIZE;
+}
+
+static __attribute__((constructor))
+void rseq_init(void)
+{
+ /*
+ * If the libc's registered rseq size isn't already valid, it may be
+ * because the binary is dynamically linked and not necessarily due to
+ * libc not having registered a restartable sequence. Try to find the
+ * symbols if that's the case.
+ */
+ if (!*libc_rseq_size_p) {
+ libc_rseq_offset_p = dlsym(RTLD_NEXT, "__rseq_offset");
+ libc_rseq_size_p = dlsym(RTLD_NEXT, "__rseq_size");
+ libc_rseq_flags_p = dlsym(RTLD_NEXT, "__rseq_flags");
+ }
+ if (libc_rseq_size_p && libc_rseq_offset_p && libc_rseq_flags_p &&
+ *libc_rseq_size_p != 0) {
+ /* rseq registration owned by glibc */
+ rseq_offset = *libc_rseq_offset_p;
+ rseq_size = *libc_rseq_size_p;
+ rseq_flags = *libc_rseq_flags_p;
+ rseq_feature_size = get_rseq_feature_size();
+ if (rseq_feature_size > rseq_size)
+ rseq_feature_size = rseq_size;
+ return;
+ }
+ rseq_ownership = 1;
+ if (!rseq_available()) {
+ rseq_size = 0;
+ rseq_feature_size = 0;
+ return;
+ }
+ rseq_offset = (void *)&__rseq_abi - rseq_thread_pointer();
+ rseq_flags = 0;
+ rseq_feature_size = get_rseq_feature_size();
+ if (rseq_feature_size == ORIG_RSEQ_FEATURE_SIZE)
+ rseq_size = ORIG_RSEQ_ALLOC_SIZE;
+ else
+ rseq_size = RSEQ_THREAD_AREA_ALLOC_SIZE;
+}
+
+static __attribute__((destructor))
+void rseq_exit(void)
+{
+ if (!rseq_ownership)
+ return;
+ rseq_offset = 0;
+ rseq_size = -1U;
+ rseq_feature_size = -1U;
+ rseq_ownership = 0;
}
int32_t rseq_fallback_current_cpu(void)
@@ -138,19 +225,15 @@ int32_t rseq_fallback_current_cpu(void)
return cpu;
}
-void __attribute__((constructor)) rseq_init(void)
+int32_t rseq_fallback_current_node(void)
{
- /* Check whether rseq is handled by another library. */
- if (__rseq_handled)
- return;
- __rseq_handled = 1;
- rseq_ownership = 1;
-}
+ uint32_t cpu_id, node_id;
+ int ret;
-void __attribute__((destructor)) rseq_fini(void)
-{
- if (!rseq_ownership)
- return;
- __rseq_handled = 0;
- rseq_ownership = 0;
+ ret = sys_getcpu(&cpu_id, &node_id);
+ if (ret) {
+ perror("sys_getcpu()");
+ return ret;
+ }
+ return (int32_t) node_id;
}
diff --git a/tools/testing/selftests/rseq/rseq.h b/tools/testing/selftests/rseq/rseq.h
index 3f63eb362b92..d7364ea4d201 100644
--- a/tools/testing/selftests/rseq/rseq.h
+++ b/tools/testing/selftests/rseq/rseq.h
@@ -16,7 +16,18 @@
#include <errno.h>
#include <stdio.h>
#include <stdlib.h>
-#include <linux/rseq.h>
+#include <stddef.h>
+#include "rseq-abi.h"
+#include "compiler.h"
+
+#ifndef rseq_sizeof_field
+#define rseq_sizeof_field(TYPE, MEMBER) sizeof((((TYPE *)0)->MEMBER))
+#endif
+
+#ifndef rseq_offsetofend
+#define rseq_offsetofend(TYPE, MEMBER) \
+ (offsetof(TYPE, MEMBER) + rseq_sizeof_field(TYPE, MEMBER))
+#endif
/*
* Empty code injection macros, override when testing.
@@ -43,8 +54,44 @@
#define RSEQ_INJECT_FAILED
#endif
-extern __thread volatile struct rseq __rseq_abi;
-extern int __rseq_handled;
+#include "rseq-thread-pointer.h"
+
+/* Offset from the thread pointer to the rseq area. */
+extern ptrdiff_t rseq_offset;
+
+/*
+ * Size of the registered rseq area. 0 if the registration was
+ * unsuccessful.
+ */
+extern unsigned int rseq_size;
+
+/* Flags used during rseq registration. */
+extern unsigned int rseq_flags;
+
+/*
+ * rseq feature size supported by the kernel. 0 if the registration was
+ * unsuccessful.
+ */
+extern unsigned int rseq_feature_size;
+
+enum rseq_mo {
+ RSEQ_MO_RELAXED = 0,
+ RSEQ_MO_CONSUME = 1, /* Unused */
+ RSEQ_MO_ACQUIRE = 2, /* Unused */
+ RSEQ_MO_RELEASE = 3,
+ RSEQ_MO_ACQ_REL = 4, /* Unused */
+ RSEQ_MO_SEQ_CST = 5, /* Unused */
+};
+
+enum rseq_percpu_mode {
+ RSEQ_PERCPU_CPU_ID = 0,
+ RSEQ_PERCPU_MM_CID = 1,
+};
+
+static inline struct rseq_abi *rseq_get_abi(void)
+{
+ return (struct rseq_abi *) ((uintptr_t) rseq_thread_pointer() + rseq_offset);
+}
#define rseq_likely(x) __builtin_expect(!!(x), 1)
#define rseq_unlikely(x) __builtin_expect(!!(x), 0)
@@ -79,6 +126,8 @@ extern int __rseq_handled;
#include <rseq-mips.h>
#elif defined(__s390__)
#include <rseq-s390.h>
+#elif defined(__riscv)
+#include <rseq-riscv.h>
#else
#error unsupported target
#endif
@@ -103,12 +152,17 @@ int rseq_unregister_current_thread(void);
int32_t rseq_fallback_current_cpu(void);
/*
+ * Restartable sequence fallback for reading the current node number.
+ */
+int32_t rseq_fallback_current_node(void);
+
+/*
* Values returned can be either the current CPU number, -1 (rseq is
* uninitialized), or -2 (rseq initialization has failed).
*/
static inline int32_t rseq_current_cpu_raw(void)
{
- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id);
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id);
}
/*
@@ -124,7 +178,7 @@ static inline int32_t rseq_current_cpu_raw(void)
*/
static inline uint32_t rseq_cpu_start(void)
{
- return RSEQ_ACCESS_ONCE(__rseq_abi.cpu_id_start);
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->cpu_id_start);
}
static inline uint32_t rseq_current_cpu(void)
@@ -137,13 +191,33 @@ static inline uint32_t rseq_current_cpu(void)
return cpu;
}
+static inline bool rseq_node_id_available(void)
+{
+ return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, node_id);
+}
+
+/*
+ * Current NUMA node number.
+ */
+static inline uint32_t rseq_current_node_id(void)
+{
+ assert(rseq_node_id_available());
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->node_id);
+}
+
+static inline bool rseq_mm_cid_available(void)
+{
+ return (int) rseq_feature_size >= rseq_offsetofend(struct rseq_abi, mm_cid);
+}
+
+static inline uint32_t rseq_current_mm_cid(void)
+{
+ return RSEQ_ACCESS_ONCE(rseq_get_abi()->mm_cid);
+}
+
static inline void rseq_clear_rseq_cs(void)
{
-#ifdef __LP64__
- __rseq_abi.rseq_cs.ptr = 0;
-#else
- __rseq_abi.rseq_cs.ptr.ptr32 = 0;
-#endif
+ RSEQ_WRITE_ONCE(rseq_get_abi()->rseq_cs.arch.ptr, 0);
}
/*
@@ -162,4 +236,149 @@ static inline void rseq_prepare_unload(void)
rseq_clear_rseq_cs();
}
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t expect,
+ intptr_t newv, int cpu)
+{
+ if (rseq_mo != RSEQ_MO_RELAXED)
+ return -1;
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_storev_relaxed_cpu_id(v, expect, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_storev_relaxed_mm_cid(v, expect, newv, cpu);
+ }
+ return -1;
+}
+
+/*
+ * Compare @v against @expectnot. When it does _not_ match, load @v
+ * into @load, and store the content of *@v + voffp into @v.
+ */
+static inline __attribute__((always_inline))
+int rseq_cmpnev_storeoffp_load(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t expectnot, long voffp, intptr_t *load,
+ int cpu)
+{
+ if (rseq_mo != RSEQ_MO_RELAXED)
+ return -1;
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpnev_storeoffp_load_relaxed_cpu_id(v, expectnot, voffp, load, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpnev_storeoffp_load_relaxed_mm_cid(v, expectnot, voffp, load, cpu);
+ }
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t count, int cpu)
+{
+ if (rseq_mo != RSEQ_MO_RELAXED)
+ return -1;
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_addv_relaxed_cpu_id(v, count, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_addv_relaxed_mm_cid(v, count, cpu);
+ }
+ return -1;
+}
+
+#ifdef RSEQ_ARCH_HAS_OFFSET_DEREF_ADDV
+/*
+ * pval = *(ptr+off)
+ * *pval += inc;
+ */
+static inline __attribute__((always_inline))
+int rseq_offset_deref_addv(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *ptr, long off, intptr_t inc, int cpu)
+{
+ if (rseq_mo != RSEQ_MO_RELAXED)
+ return -1;
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_offset_deref_addv_relaxed_cpu_id(ptr, off, inc, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_offset_deref_addv_relaxed_mm_cid(ptr, off, inc, cpu);
+ }
+ return -1;
+}
+#endif
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trystorev_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t newv2,
+ intptr_t newv, int cpu)
+{
+ switch (rseq_mo) {
+ case RSEQ_MO_RELAXED:
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_trystorev_storev_relaxed_cpu_id(v, expect, v2, newv2, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_trystorev_storev_relaxed_mm_cid(v, expect, v2, newv2, newv, cpu);
+ }
+ return -1;
+ case RSEQ_MO_RELEASE:
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_trystorev_storev_release_cpu_id(v, expect, v2, newv2, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_trystorev_storev_release_mm_cid(v, expect, v2, newv2, newv, cpu);
+ }
+ return -1;
+ default:
+ return -1;
+ }
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_cmpeqv_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t expect,
+ intptr_t *v2, intptr_t expect2,
+ intptr_t newv, int cpu)
+{
+ if (rseq_mo != RSEQ_MO_RELAXED)
+ return -1;
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_cmpeqv_storev_relaxed_cpu_id(v, expect, v2, expect2, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_cmpeqv_storev_relaxed_mm_cid(v, expect, v2, expect2, newv, cpu);
+ }
+ return -1;
+}
+
+static inline __attribute__((always_inline))
+int rseq_cmpeqv_trymemcpy_storev(enum rseq_mo rseq_mo, enum rseq_percpu_mode percpu_mode,
+ intptr_t *v, intptr_t expect,
+ void *dst, void *src, size_t len,
+ intptr_t newv, int cpu)
+{
+ switch (rseq_mo) {
+ case RSEQ_MO_RELAXED:
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_trymemcpy_storev_relaxed_cpu_id(v, expect, dst, src, len, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_trymemcpy_storev_relaxed_mm_cid(v, expect, dst, src, len, newv, cpu);
+ }
+ return -1;
+ case RSEQ_MO_RELEASE:
+ switch (percpu_mode) {
+ case RSEQ_PERCPU_CPU_ID:
+ return rseq_cmpeqv_trymemcpy_storev_release_cpu_id(v, expect, dst, src, len, newv, cpu);
+ case RSEQ_PERCPU_MM_CID:
+ return rseq_cmpeqv_trymemcpy_storev_release_mm_cid(v, expect, dst, src, len, newv, cpu);
+ }
+ return -1;
+ default:
+ return -1;
+ }
+}
+
#endif /* RSEQ_H_ */
diff --git a/tools/testing/selftests/rseq/run_param_test.sh b/tools/testing/selftests/rseq/run_param_test.sh
index e426304fd4a0..8d31426ab41f 100755
--- a/tools/testing/selftests/rseq/run_param_test.sh
+++ b/tools/testing/selftests/rseq/run_param_test.sh
@@ -15,6 +15,7 @@ TEST_LIST=(
"-T m"
"-T m -M"
"-T i"
+ "-T r"
)
TEST_NAME=(
@@ -25,6 +26,7 @@ TEST_NAME=(
"memcpy"
"memcpy with barrier"
"increment"
+ "membarrier"
)
IFS="$OLDIFS"
@@ -40,6 +42,11 @@ function do_tests()
./param_test ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1
echo "Running compare-twice test ${TEST_NAME[$i]}"
./param_test_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1
+
+ echo "Running mm_cid test ${TEST_NAME[$i]}"
+ ./param_test_mm_cid ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1
+ echo "Running mm_cid compare-twice test ${TEST_NAME[$i]}"
+ ./param_test_mm_cid_compare_twice ${TEST_LIST[$i]} -r ${REPS} -t ${NR_THREADS} ${@} ${EXTRA_ARGS} || exit 1
let "i++"
done
}
diff --git a/tools/testing/selftests/rtc/rtctest.c b/tools/testing/selftests/rtc/rtctest.c
index 66af608fb4c6..63ce02d1d5cc 100644
--- a/tools/testing/selftests/rtc/rtctest.c
+++ b/tools/testing/selftests/rtc/rtctest.c
@@ -20,6 +20,8 @@
#define NUM_UIE 3
#define ALARM_DELTA 3
+#define READ_LOOP_DURATION_SEC 30
+#define READ_LOOP_SLEEP_MS 11
static char *rtc_file = "/dev/rtc0";
@@ -29,7 +31,6 @@ FIXTURE(rtc) {
FIXTURE_SETUP(rtc) {
self->fd = open(rtc_file, O_RDONLY);
- ASSERT_NE(-1, self->fd);
}
FIXTURE_TEARDOWN(rtc) {
@@ -40,6 +41,10 @@ TEST_F(rtc, date_read) {
int rc;
struct rtc_time rtc_tm;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
/* Read the RTC time/date */
rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
ASSERT_NE(-1, rc);
@@ -49,10 +54,82 @@ TEST_F(rtc, date_read) {
rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec);
}
+static time_t rtc_time_to_timestamp(struct rtc_time *rtc_time)
+{
+ struct tm tm_time = {
+ .tm_sec = rtc_time->tm_sec,
+ .tm_min = rtc_time->tm_min,
+ .tm_hour = rtc_time->tm_hour,
+ .tm_mday = rtc_time->tm_mday,
+ .tm_mon = rtc_time->tm_mon,
+ .tm_year = rtc_time->tm_year,
+ };
+
+ return mktime(&tm_time);
+}
+
+static void nanosleep_with_retries(long ns)
+{
+ struct timespec req = {
+ .tv_sec = 0,
+ .tv_nsec = ns,
+ };
+ struct timespec rem;
+
+ while (nanosleep(&req, &rem) != 0) {
+ req.tv_sec = rem.tv_sec;
+ req.tv_nsec = rem.tv_nsec;
+ }
+}
+
+TEST_F_TIMEOUT(rtc, date_read_loop, READ_LOOP_DURATION_SEC + 2) {
+ int rc;
+ long iter_count = 0;
+ struct rtc_time rtc_tm;
+ time_t start_rtc_read, prev_rtc_read;
+
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
+ TH_LOG("Continuously reading RTC time for %ds (with %dms breaks after every read).",
+ READ_LOOP_DURATION_SEC, READ_LOOP_SLEEP_MS);
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+ ASSERT_NE(-1, rc);
+ start_rtc_read = rtc_time_to_timestamp(&rtc_tm);
+ prev_rtc_read = start_rtc_read;
+
+ do {
+ time_t rtc_read;
+
+ rc = ioctl(self->fd, RTC_RD_TIME, &rtc_tm);
+ ASSERT_NE(-1, rc);
+
+ rtc_read = rtc_time_to_timestamp(&rtc_tm);
+ /* Time should not go backwards */
+ ASSERT_LE(prev_rtc_read, rtc_read);
+ /* Time should not increase more then 1s at a time */
+ ASSERT_GE(prev_rtc_read + 1, rtc_read);
+
+ /* Sleep 11ms to avoid killing / overheating the RTC */
+ nanosleep_with_retries(READ_LOOP_SLEEP_MS * 1000000);
+
+ prev_rtc_read = rtc_read;
+ iter_count++;
+ } while (prev_rtc_read <= start_rtc_read + READ_LOOP_DURATION_SEC);
+
+ TH_LOG("Performed %ld RTC time reads.", iter_count);
+}
+
TEST_F_TIMEOUT(rtc, uie_read, NUM_UIE + 2) {
int i, rc, irq = 0;
unsigned long data;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
/* Turn on update interrupts */
rc = ioctl(self->fd, RTC_UIE_ON, 0);
if (rc == -1) {
@@ -78,6 +155,10 @@ TEST_F(rtc, uie_select) {
int i, rc, irq = 0;
unsigned long data;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
/* Turn on update interrupts */
rc = ioctl(self->fd, RTC_UIE_ON, 0);
if (rc == -1) {
@@ -117,6 +198,10 @@ TEST_F(rtc, alarm_alm_set) {
time_t secs, new;
int rc;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
rc = ioctl(self->fd, RTC_RD_TIME, &tm);
ASSERT_NE(-1, rc);
@@ -171,6 +256,10 @@ TEST_F(rtc, alarm_wkalm_set) {
time_t secs, new;
int rc;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
ASSERT_NE(-1, rc);
@@ -219,6 +308,10 @@ TEST_F_TIMEOUT(rtc, alarm_alm_set_minute, 65) {
time_t secs, new;
int rc;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
rc = ioctl(self->fd, RTC_RD_TIME, &tm);
ASSERT_NE(-1, rc);
@@ -273,6 +366,10 @@ TEST_F_TIMEOUT(rtc, alarm_wkalm_set_minute, 65) {
time_t secs, new;
int rc;
+ if (self->fd == -1 && errno == ENOENT)
+ SKIP(return, "Skipping test since %s does not exist", rtc_file);
+ ASSERT_NE(-1, self->fd);
+
rc = ioctl(self->fd, RTC_RD_TIME, &alarm.time);
ASSERT_NE(-1, rc);
diff --git a/tools/testing/selftests/rtc/settings b/tools/testing/selftests/rtc/settings
index ba4d85f74cd6..0c1a2075d5f3 100644
--- a/tools/testing/selftests/rtc/settings
+++ b/tools/testing/selftests/rtc/settings
@@ -1 +1 @@
-timeout=90
+timeout=210
diff --git a/tools/testing/selftests/run_kselftest.sh b/tools/testing/selftests/run_kselftest.sh
new file mode 100755
index 000000000000..a28c1416cb89
--- /dev/null
+++ b/tools/testing/selftests/run_kselftest.sh
@@ -0,0 +1,106 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run installed kselftest tests.
+#
+BASE_DIR=$(realpath $(dirname $0))
+cd $BASE_DIR
+TESTS="$BASE_DIR"/kselftest-list.txt
+if [ ! -r "$TESTS" ] ; then
+ echo "$0: Could not find list of tests to run ($TESTS)" >&2
+ available=""
+else
+ available="$(cat "$TESTS")"
+fi
+
+. ./kselftest/runner.sh
+ROOT=$PWD
+
+usage()
+{
+ cat <<EOF
+Usage: $0 [OPTIONS]
+ -s | --summary Print summary with detailed log in output.log (conflict with -p)
+ -p | --per_test_log Print test log in /tmp with each test name (conflict with -s)
+ -t | --test COLLECTION:TEST Run TEST from COLLECTION
+ -c | --collection COLLECTION Run all tests from COLLECTION
+ -l | --list List the available collection:test entries
+ -d | --dry-run Don't actually run any tests
+ -n | --netns Run each test in namespace
+ -h | --help Show this usage info
+ -o | --override-timeout Number of seconds after which we timeout
+EOF
+ exit $1
+}
+
+COLLECTIONS=""
+TESTS=""
+dryrun=""
+kselftest_override_timeout=""
+while true; do
+ case "$1" in
+ -s | --summary)
+ logfile="$BASE_DIR"/output.log
+ cat /dev/null > $logfile
+ shift ;;
+ -p | --per-test-log)
+ per_test_logging=1
+ shift ;;
+ -t | --test)
+ TESTS="$TESTS $2"
+ shift 2 ;;
+ -c | --collection)
+ COLLECTIONS="$COLLECTIONS $2"
+ shift 2 ;;
+ -l | --list)
+ echo "$available"
+ exit 0 ;;
+ -d | --dry-run)
+ dryrun="echo"
+ shift ;;
+ -n | --netns)
+ RUN_IN_NETNS=1
+ shift ;;
+ -o | --override-timeout)
+ kselftest_override_timeout="$2"
+ shift 2 ;;
+ -h | --help)
+ usage 0 ;;
+ "")
+ break ;;
+ *)
+ usage 1 ;;
+ esac
+done
+
+# Add all selected collections to the explicit test list.
+if [ -n "$COLLECTIONS" ]; then
+ for collection in $COLLECTIONS ; do
+ found="$(echo "$available" | grep "^$collection:")"
+ if [ -z "$found" ] ; then
+ echo "No such collection '$collection'" >&2
+ exit 1
+ fi
+ TESTS="$TESTS $found"
+ done
+fi
+# Replace available test list with explicitly selected tests.
+if [ -n "$TESTS" ]; then
+ valid=""
+ for test in $TESTS ; do
+ found="$(echo "$available" | grep "^${test}$")"
+ if [ -z "$found" ] ; then
+ echo "No such test '$test'" >&2
+ exit 1
+ fi
+ valid="$valid $found"
+ done
+ available="$(echo "$valid" | sed -e 's/ /\n/g')"
+fi
+
+collections=$(echo "$available" | cut -d: -f1 | sort | uniq)
+for collection in $collections ; do
+ [ -w /dev/kmsg ] && echo "kselftest: Running tests in $collection" >> /dev/kmsg
+ tests=$(echo "$available" | grep "^$collection:" | cut -d: -f2)
+ ($dryrun cd "$collection" && $dryrun run_many $tests)
+done
diff --git a/tools/testing/selftests/rust/Makefile b/tools/testing/selftests/rust/Makefile
new file mode 100644
index 000000000000..fce1584d3bc0
--- /dev/null
+++ b/tools/testing/selftests/rust/Makefile
@@ -0,0 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
+TEST_PROGS += test_probe_samples.sh
+
+include ../lib.mk
diff --git a/tools/testing/selftests/rust/config b/tools/testing/selftests/rust/config
new file mode 100644
index 000000000000..b4002acd40bc
--- /dev/null
+++ b/tools/testing/selftests/rust/config
@@ -0,0 +1,5 @@
+CONFIG_RUST=y
+CONFIG_SAMPLES=y
+CONFIG_SAMPLES_RUST=y
+CONFIG_SAMPLE_RUST_MINIMAL=m
+CONFIG_SAMPLE_RUST_PRINT=m \ No newline at end of file
diff --git a/tools/testing/selftests/rust/test_probe_samples.sh b/tools/testing/selftests/rust/test_probe_samples.sh
new file mode 100755
index 000000000000..ad0397e4986f
--- /dev/null
+++ b/tools/testing/selftests/rust/test_probe_samples.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (c) 2023 Collabora Ltd
+#
+# This script tests whether the rust sample modules can
+# be added and removed correctly.
+#
+DIR="$(dirname "$(readlink -f "$0")")"
+
+KTAP_HELPERS="${DIR}/../kselftest/ktap_helpers.sh"
+if [ -e "$KTAP_HELPERS" ]; then
+ source "$KTAP_HELPERS"
+else
+ echo "$KTAP_HELPERS file not found [SKIP]"
+ exit 4
+fi
+
+rust_sample_modules=("rust_minimal" "rust_print")
+
+ktap_print_header
+
+for sample in "${rust_sample_modules[@]}"; do
+ if ! /sbin/modprobe -n -q "$sample"; then
+ ktap_skip_all "module $sample is not found in /lib/modules/$(uname -r)"
+ exit "$KSFT_SKIP"
+ fi
+done
+
+ktap_set_plan "${#rust_sample_modules[@]}"
+
+for sample in "${rust_sample_modules[@]}"; do
+ if /sbin/modprobe -q "$sample"; then
+ /sbin/modprobe -q -r "$sample"
+ ktap_test_pass "$sample"
+ else
+ ktap_test_fail "$sample"
+ fi
+done
+
+ktap_finished
diff --git a/tools/testing/selftests/safesetid/Makefile b/tools/testing/selftests/safesetid/Makefile
index fa02c4d5ec13..e815bbf2d0f4 100644
--- a/tools/testing/selftests/safesetid/Makefile
+++ b/tools/testing/selftests/safesetid/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-# Makefile for mount selftests.
+# Makefile for SafeSetID selftest.
CFLAGS = -Wall -O2
LDLIBS = -lcap
diff --git a/tools/testing/selftests/safesetid/safesetid-test.c b/tools/testing/selftests/safesetid/safesetid-test.c
index 0c4d50644c13..eb9bf0aee951 100644
--- a/tools/testing/selftests/safesetid/safesetid-test.c
+++ b/tools/testing/selftests/safesetid/safesetid-test.c
@@ -3,6 +3,7 @@
#include <stdio.h>
#include <errno.h>
#include <pwd.h>
+#include <grp.h>
#include <string.h>
#include <syscall.h>
#include <sys/capability.h>
@@ -16,17 +17,28 @@
#include <stdbool.h>
#include <stdarg.h>
+/*
+ * NOTES about this test:
+ * - requries libcap-dev to be installed on test system
+ * - requires securityfs to me mounted at /sys/kernel/security, e.g.:
+ * mount -n -t securityfs -o nodev,noexec,nosuid securityfs /sys/kernel/security
+ * - needs CONFIG_SECURITYFS and CONFIG_SAFESETID to be enabled
+ */
+
#ifndef CLONE_NEWUSER
# define CLONE_NEWUSER 0x10000000
#endif
-#define ROOT_USER 0
-#define RESTRICTED_PARENT 1
-#define ALLOWED_CHILD1 2
-#define ALLOWED_CHILD2 3
-#define NO_POLICY_USER 4
+#define ROOT_UGID 0
+#define RESTRICTED_PARENT_UGID 1
+#define ALLOWED_CHILD1_UGID 2
+#define ALLOWED_CHILD2_UGID 3
+#define NO_POLICY_UGID 4
+
+#define UGID_POLICY_STRING "1:2\n1:3\n2:2\n3:3\n"
-char* add_whitelist_policy_file = "/sys/kernel/security/safesetid/add_whitelist_policy";
+char* add_uid_whitelist_policy_file = "/sys/kernel/security/safesetid/uid_allowlist_policy";
+char* add_gid_whitelist_policy_file = "/sys/kernel/security/safesetid/gid_allowlist_policy";
static void die(char *fmt, ...)
{
@@ -106,9 +118,10 @@ static void ensure_user_exists(uid_t uid)
die("couldn't open file\n");
if (fseek(fd, 0, SEEK_END))
die("couldn't fseek\n");
- snprintf(name_str, 10, "%d", uid);
+ snprintf(name_str, 10, "user %d", uid);
p.pw_name=name_str;
p.pw_uid=uid;
+ p.pw_gid=uid;
p.pw_gecos="Test account";
p.pw_dir="/dev/null";
p.pw_shell="/bin/false";
@@ -120,9 +133,36 @@ static void ensure_user_exists(uid_t uid)
}
}
+static void ensure_group_exists(gid_t gid)
+{
+ struct group g;
+
+ FILE *fd;
+ char name_str[10];
+
+ if (getgrgid(gid) == NULL) {
+ memset(&g,0x00,sizeof(g));
+ fd=fopen("/etc/group","a");
+ if (fd == NULL)
+ die("couldn't open group file\n");
+ if (fseek(fd, 0, SEEK_END))
+ die("couldn't fseek group file\n");
+ snprintf(name_str, 10, "group %d", gid);
+ g.gr_name=name_str;
+ g.gr_gid=gid;
+ g.gr_passwd=NULL;
+ g.gr_mem=NULL;
+ int value = putgrent(&g,fd);
+ if (value != 0)
+ die("putgrent failed\n");
+ if (fclose(fd))
+ die("fclose failed\n");
+ }
+}
+
static void ensure_securityfs_mounted(void)
{
- int fd = open(add_whitelist_policy_file, O_WRONLY);
+ int fd = open(add_uid_whitelist_policy_file, O_WRONLY);
if (fd < 0) {
if (errno == ENOENT) {
// Need to mount securityfs
@@ -135,39 +175,60 @@ static void ensure_securityfs_mounted(void)
} else {
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
+ }
+}
+
+static void write_uid_policies()
+{
+ static char *policy_str = UGID_POLICY_STRING;
+ ssize_t written;
+ int fd;
+
+ fd = open(add_uid_whitelist_policy_file, O_WRONLY);
+ if (fd < 0)
+ die("can't open add_uid_whitelist_policy file\n");
+ written = write(fd, policy_str, strlen(policy_str));
+ if (written != strlen(policy_str)) {
+ if (written >= 0) {
+ die("short write to %s\n", add_uid_whitelist_policy_file);
+ } else {
+ die("write to %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
}
}
+ if (close(fd) != 0) {
+ die("close of %s failed: %s\n",
+ add_uid_whitelist_policy_file, strerror(errno));
+ }
}
-static void write_policies(void)
+static void write_gid_policies()
{
- static char *policy_str =
- "1:2\n"
- "1:3\n"
- "2:2\n"
- "3:3\n";
+ static char *policy_str = UGID_POLICY_STRING;
ssize_t written;
int fd;
- fd = open(add_whitelist_policy_file, O_WRONLY);
+ fd = open(add_gid_whitelist_policy_file, O_WRONLY);
if (fd < 0)
- die("cant open add_whitelist_policy file\n");
+ die("can't open add_gid_whitelist_policy file\n");
written = write(fd, policy_str, strlen(policy_str));
if (written != strlen(policy_str)) {
if (written >= 0) {
- die("short write to %s\n", add_whitelist_policy_file);
+ die("short write to %s\n", add_gid_whitelist_policy_file);
} else {
die("write to %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
if (close(fd) != 0) {
die("close of %s failed: %s\n",
- add_whitelist_policy_file, strerror(errno));
+ add_gid_whitelist_policy_file, strerror(errno));
}
}
+
static bool test_userns(bool expect_success)
{
uid_t uid;
@@ -194,7 +255,7 @@ static bool test_userns(bool expect_success)
printf("preparing file name string failed");
return false;
}
- success = write_file(map_file_name, "0 0 1", uid);
+ success = write_file(map_file_name, "0 %d 1", uid);
return success == expect_success;
}
@@ -258,13 +319,144 @@ static void test_setuid(uid_t child_uid, bool expect_success)
die("should not reach here\n");
}
+static void test_setgid(gid_t child_gid, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgid(child_gid) < 0)
+ exit(EXIT_FAILURE);
+ if (getgid() == child_gid)
+ exit(EXIT_SUCCESS);
+ else
+ exit(EXIT_FAILURE);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+static void test_setgroups(gid_t* child_groups, size_t len, bool expect_success)
+{
+ pid_t cpid, w;
+ int wstatus;
+ gid_t groupset[len];
+ int i, j;
+
+ cpid = fork();
+ if (cpid == -1) {
+ die("fork\n");
+ }
+
+ if (cpid == 0) { /* Code executed by child */
+ if (setgroups(len, child_groups) != 0)
+ exit(EXIT_FAILURE);
+ if (getgroups(len, groupset) != len)
+ exit(EXIT_FAILURE);
+ for (i = 0; i < len; i++) {
+ for (j = 0; j < len; j++) {
+ if (child_groups[i] == groupset[j])
+ break;
+ if (j == len - 1)
+ exit(EXIT_FAILURE);
+ }
+ }
+ exit(EXIT_SUCCESS);
+ } else { /* Code executed by parent */
+ do {
+ w = waitpid(cpid, &wstatus, WUNTRACED | WCONTINUED);
+ if (w == -1) {
+ die("waitpid\n");
+ }
+
+ if (WIFEXITED(wstatus)) {
+ if (WEXITSTATUS(wstatus) == EXIT_SUCCESS) {
+ if (expect_success) {
+ return;
+ } else {
+ die("unexpected success\n");
+ }
+ } else {
+ if (expect_success) {
+ die("unexpected failure\n");
+ } else {
+ return;
+ }
+ }
+ } else if (WIFSIGNALED(wstatus)) {
+ if (WTERMSIG(wstatus) == 9) {
+ if (expect_success)
+ die("killed unexpectedly\n");
+ else
+ return;
+ } else {
+ die("unexpected signal: %d\n", wstatus);
+ }
+ } else {
+ die("unexpected status: %d\n", wstatus);
+ }
+ } while (!WIFEXITED(wstatus) && !WIFSIGNALED(wstatus));
+ }
+
+ die("should not reach here\n");
+}
+
+
static void ensure_users_exist(void)
{
- ensure_user_exists(ROOT_USER);
- ensure_user_exists(RESTRICTED_PARENT);
- ensure_user_exists(ALLOWED_CHILD1);
- ensure_user_exists(ALLOWED_CHILD2);
- ensure_user_exists(NO_POLICY_USER);
+ ensure_user_exists(ROOT_UGID);
+ ensure_user_exists(RESTRICTED_PARENT_UGID);
+ ensure_user_exists(ALLOWED_CHILD1_UGID);
+ ensure_user_exists(ALLOWED_CHILD2_UGID);
+ ensure_user_exists(NO_POLICY_UGID);
+}
+
+static void ensure_groups_exist(void)
+{
+ ensure_group_exists(ROOT_UGID);
+ ensure_group_exists(RESTRICTED_PARENT_UGID);
+ ensure_group_exists(ALLOWED_CHILD1_UGID);
+ ensure_group_exists(ALLOWED_CHILD2_UGID);
+ ensure_group_exists(NO_POLICY_UGID);
}
static void drop_caps(bool setid_retained)
@@ -283,41 +475,52 @@ static void drop_caps(bool setid_retained)
int main(int argc, char **argv)
{
+ ensure_groups_exist();
ensure_users_exist();
ensure_securityfs_mounted();
- write_policies();
+ write_uid_policies();
+ write_gid_policies();
if (prctl(PR_SET_KEEPCAPS, 1L))
die("Error with set keepcaps\n");
- // First test to make sure we can write userns mappings from a user
- // that doesn't have any restrictions (as long as it has CAP_SETUID);
- if (setuid(NO_POLICY_USER) < 0)
- die("Error with set uid(%d)\n", NO_POLICY_USER);
- if (setgid(NO_POLICY_USER) < 0)
- die("Error with set gid(%d)\n", NO_POLICY_USER);
-
+ // First test to make sure we can write userns mappings from a non-root
+ // user that doesn't have any restrictions (as long as it has
+ // CAP_SETUID);
+ if (setgid(NO_POLICY_UGID) < 0)
+ die("Error with set gid(%d)\n", NO_POLICY_UGID);
+ if (setuid(NO_POLICY_UGID) < 0)
+ die("Error with set uid(%d)\n", NO_POLICY_UGID);
// Take away all but setid caps
drop_caps(true);
-
// Need PR_SET_DUMPABLE flag set so we can write /proc/[pid]/uid_map
// from non-root parent process.
if (prctl(PR_SET_DUMPABLE, 1, 0, 0, 0))
die("Error with set dumpable\n");
-
if (!test_userns(true)) {
die("test_userns failed when it should work\n");
}
- if (setuid(RESTRICTED_PARENT) < 0)
- die("Error with set uid(%d)\n", RESTRICTED_PARENT);
- if (setgid(RESTRICTED_PARENT) < 0)
- die("Error with set gid(%d)\n", RESTRICTED_PARENT);
+ // Now switch to a user/group with restrictions
+ if (setgid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set gid(%d)\n", RESTRICTED_PARENT_UGID);
+ if (setuid(RESTRICTED_PARENT_UGID) < 0)
+ die("Error with set uid(%d)\n", RESTRICTED_PARENT_UGID);
+
+ test_setuid(ROOT_UGID, false);
+ test_setuid(ALLOWED_CHILD1_UGID, true);
+ test_setuid(ALLOWED_CHILD2_UGID, true);
+ test_setuid(NO_POLICY_UGID, false);
+
+ test_setgid(ROOT_UGID, false);
+ test_setgid(ALLOWED_CHILD1_UGID, true);
+ test_setgid(ALLOWED_CHILD2_UGID, true);
+ test_setgid(NO_POLICY_UGID, false);
- test_setuid(ROOT_USER, false);
- test_setuid(ALLOWED_CHILD1, true);
- test_setuid(ALLOWED_CHILD2, true);
- test_setuid(NO_POLICY_USER, false);
+ gid_t allowed_supp_groups[2] = {ALLOWED_CHILD1_UGID, ALLOWED_CHILD2_UGID};
+ gid_t disallowed_supp_groups[2] = {ROOT_UGID, NO_POLICY_UGID};
+ test_setgroups(allowed_supp_groups, 2, true);
+ test_setgroups(disallowed_supp_groups, 2, false);
if (!test_userns(false)) {
die("test_userns worked when it should fail\n");
@@ -328,8 +531,12 @@ int main(int argc, char **argv)
test_setuid(2, false);
test_setuid(3, false);
test_setuid(4, false);
+ test_setgid(2, false);
+ test_setgid(3, false);
+ test_setgid(4, false);
// NOTE: this test doesn't clean up users that were created in
// /etc/passwd or flush policies that were added to the LSM.
+ printf("test successful!\n");
return EXIT_SUCCESS;
}
diff --git a/tools/testing/selftests/sched/.gitignore b/tools/testing/selftests/sched/.gitignore
new file mode 100644
index 000000000000..6996d4654d92
--- /dev/null
+++ b/tools/testing/selftests/sched/.gitignore
@@ -0,0 +1 @@
+cs_prctl_test
diff --git a/tools/testing/selftests/sched/Makefile b/tools/testing/selftests/sched/Makefile
new file mode 100644
index 000000000000..099ee9213557
--- /dev/null
+++ b/tools/testing/selftests/sched/Makefile
@@ -0,0 +1,14 @@
+# SPDX-License-Identifier: GPL-2.0+
+
+ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+CLANG_FLAGS += -no-integrated-as
+endif
+
+CFLAGS += -O2 -Wall -g -I./ $(KHDR_INCLUDES) -Wl,-rpath=./ \
+ $(CLANG_FLAGS)
+LDLIBS += -lpthread
+
+TEST_GEN_FILES := cs_prctl_test
+TEST_PROGS := cs_prctl_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/sched/config b/tools/testing/selftests/sched/config
new file mode 100644
index 000000000000..e8b09aa7c0c4
--- /dev/null
+++ b/tools/testing/selftests/sched/config
@@ -0,0 +1 @@
+CONFIG_SCHED_DEBUG=y
diff --git a/tools/testing/selftests/sched/cs_prctl_test.c b/tools/testing/selftests/sched/cs_prctl_test.c
new file mode 100644
index 000000000000..62fba7356af2
--- /dev/null
+++ b/tools/testing/selftests/sched/cs_prctl_test.c
@@ -0,0 +1,359 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Use the core scheduling prctl() to test core scheduling cookies control.
+ *
+ * Copyright (c) 2021 Oracle and/or its affiliates.
+ * Author: Chris Hyser <chris.hyser@oracle.com>
+ *
+ *
+ * This library is free software; you can redistribute it and/or modify it
+ * under the terms of version 2.1 of the GNU Lesser General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This library is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
+ * for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public License
+ * along with this library; if not, see <http://www.gnu.org/licenses>.
+ */
+
+#define _GNU_SOURCE
+#include <sys/eventfd.h>
+#include <sys/wait.h>
+#include <sys/types.h>
+#include <sched.h>
+#include <sys/prctl.h>
+#include <unistd.h>
+#include <time.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#if __GLIBC_PREREQ(2, 30) == 0
+#include <sys/syscall.h>
+static pid_t gettid(void)
+{
+ return syscall(SYS_gettid);
+}
+#endif
+
+#ifndef PR_SCHED_CORE
+#define PR_SCHED_CORE 62
+# define PR_SCHED_CORE_GET 0
+# define PR_SCHED_CORE_CREATE 1 /* create unique core_sched cookie */
+# define PR_SCHED_CORE_SHARE_TO 2 /* push core_sched cookie to pid */
+# define PR_SCHED_CORE_SHARE_FROM 3 /* pull core_sched cookie to pid */
+# define PR_SCHED_CORE_MAX 4
+#endif
+
+#define MAX_PROCESSES 128
+#define MAX_THREADS 128
+
+static const char USAGE[] = "cs_prctl_test [options]\n"
+" options:\n"
+" -P : number of processes to create.\n"
+" -T : number of threads per process to create.\n"
+" -d : delay time to keep tasks alive.\n"
+" -k : keep tasks alive until keypress.\n";
+
+enum pid_type {PIDTYPE_PID = 0, PIDTYPE_TGID, PIDTYPE_PGID};
+
+const int THREAD_CLONE_FLAGS = CLONE_THREAD | CLONE_SIGHAND | CLONE_FS | CLONE_VM | CLONE_FILES;
+
+struct child_args {
+ int num_threads;
+ int pfd[2];
+ int cpid;
+ int thr_tids[MAX_THREADS];
+};
+
+static struct child_args procs[MAX_PROCESSES];
+static int num_processes = 2;
+static int need_cleanup;
+
+static int _prctl(int option, unsigned long arg2, unsigned long arg3, unsigned long arg4,
+ unsigned long arg5)
+{
+ int res;
+
+ res = prctl(option, arg2, arg3, arg4, arg5);
+ printf("%d = prctl(%d, %ld, %ld, %ld, %lx)\n", res, option, (long)arg2, (long)arg3,
+ (long)arg4, arg5);
+ return res;
+}
+
+#define STACK_SIZE (1024 * 1024)
+
+#define handle_error(msg) __handle_error(__FILE__, __LINE__, msg)
+static void __handle_error(char *fn, int ln, char *msg)
+{
+ int pidx;
+ printf("(%s:%d) - ", fn, ln);
+ perror(msg);
+ if (need_cleanup) {
+ for (pidx = 0; pidx < num_processes; ++pidx)
+ kill(procs[pidx].cpid, 15);
+ need_cleanup = 0;
+ }
+ exit(EXIT_FAILURE);
+}
+
+static void handle_usage(int rc, char *msg)
+{
+ puts(USAGE);
+ puts(msg);
+ putchar('\n');
+ exit(rc);
+}
+
+static unsigned long get_cs_cookie(int pid)
+{
+ unsigned long long cookie;
+ int ret;
+
+ ret = prctl(PR_SCHED_CORE, PR_SCHED_CORE_GET, pid, PIDTYPE_PID,
+ (unsigned long)&cookie);
+ if (ret) {
+ printf("Not a core sched system\n");
+ return -1UL;
+ }
+
+ return cookie;
+}
+
+static int child_func_thread(void __attribute__((unused))*arg)
+{
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static void create_threads(int num_threads, int thr_tids[])
+{
+ void *child_stack;
+ pid_t tid;
+ int i;
+
+ for (i = 0; i < num_threads; ++i) {
+ child_stack = malloc(STACK_SIZE);
+ if (!child_stack)
+ handle_error("child stack allocate");
+
+ tid = clone(child_func_thread, child_stack + STACK_SIZE, THREAD_CLONE_FLAGS, NULL);
+ if (tid == -1)
+ handle_error("clone thread");
+ thr_tids[i] = tid;
+ }
+}
+
+static int child_func_process(void *arg)
+{
+ struct child_args *ca = (struct child_args *)arg;
+ int ret;
+
+ close(ca->pfd[0]);
+
+ create_threads(ca->num_threads, ca->thr_tids);
+
+ ret = write(ca->pfd[1], &ca->thr_tids, sizeof(int) * ca->num_threads);
+ if (ret == -1)
+ printf("write failed on pfd[%d] - error (%s)\n",
+ ca->pfd[1], strerror(errno));
+
+ close(ca->pfd[1]);
+
+ while (1)
+ usleep(20000);
+ return 0;
+}
+
+static unsigned char child_func_process_stack[STACK_SIZE];
+
+void create_processes(int num_processes, int num_threads, struct child_args proc[])
+{
+ pid_t cpid;
+ int i, ret;
+
+ for (i = 0; i < num_processes; ++i) {
+ proc[i].num_threads = num_threads;
+
+ if (pipe(proc[i].pfd) == -1)
+ handle_error("pipe() failed");
+
+ cpid = clone(child_func_process, child_func_process_stack + STACK_SIZE,
+ SIGCHLD, &proc[i]);
+ proc[i].cpid = cpid;
+ close(proc[i].pfd[1]);
+ }
+
+ for (i = 0; i < num_processes; ++i) {
+ ret = read(proc[i].pfd[0], &proc[i].thr_tids, sizeof(int) * proc[i].num_threads);
+ if (ret == -1)
+ printf("read failed on proc[%d].pfd[0] error (%s)\n",
+ i, strerror(errno));
+ close(proc[i].pfd[0]);
+ }
+}
+
+void disp_processes(int num_processes, struct child_args proc[])
+{
+ int i, j;
+
+ printf("tid=%d, / tgid=%d / pgid=%d: %lx\n", gettid(), getpid(), getpgid(0),
+ get_cs_cookie(getpid()));
+
+ for (i = 0; i < num_processes; ++i) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].cpid, proc[i].cpid,
+ getpgid(proc[i].cpid), get_cs_cookie(proc[i].cpid));
+ for (j = 0; j < proc[i].num_threads; ++j) {
+ printf(" tid=%d, / tgid=%d / pgid=%d: %lx\n", proc[i].thr_tids[j],
+ proc[i].cpid, getpgid(0), get_cs_cookie(proc[i].thr_tids[j]));
+ }
+ }
+ puts("\n");
+}
+
+static int errors;
+
+#define validate(v) _validate(__LINE__, v, #v)
+void _validate(int line, int val, char *msg)
+{
+ if (!val) {
+ ++errors;
+ printf("(%d) FAILED: %s\n", line, msg);
+ } else {
+ printf("(%d) PASSED: %s\n", line, msg);
+ }
+}
+
+int main(int argc, char *argv[])
+{
+ int keypress = 0;
+ int num_threads = 3;
+ int delay = 0;
+ int res = 0;
+ int pidx;
+ int pid;
+ int opt;
+
+ while ((opt = getopt(argc, argv, ":hkT:P:d:")) != -1) {
+ switch (opt) {
+ case 'P':
+ num_processes = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'T':
+ num_threads = (int)strtoul(optarg, NULL, 10);
+ break;
+ case 'd':
+ delay = (int)strtol(optarg, NULL, 10);
+ break;
+ case 'k':
+ keypress = 1;
+ break;
+ case 'h':
+ printf(USAGE);
+ exit(EXIT_SUCCESS);
+ default:
+ handle_usage(20, "unknown option");
+ }
+ }
+
+ if (num_processes < 1 || num_processes > MAX_PROCESSES)
+ handle_usage(1, "Bad processes value");
+
+ if (num_threads < 1 || num_threads > MAX_THREADS)
+ handle_usage(2, "Bad thread value");
+
+ if (keypress)
+ delay = -1;
+
+ srand(time(NULL));
+
+ /* put into separate process group */
+ if (setpgid(0, 0) != 0)
+ handle_error("process group");
+
+ printf("\n## Create a thread/process/process group hierarchy\n");
+ create_processes(num_processes, num_threads, procs);
+ need_cleanup = 1;
+ disp_processes(num_processes, procs);
+ validate(get_cs_cookie(0) == 0);
+
+ printf("\n## Set a cookie on entire process group\n");
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched create failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != 0);
+
+ /* get a random process pid */
+ pidx = rand() % num_processes;
+ pid = procs[pidx].cpid;
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Set a new cookie on entire process/TGID [%d]\n", pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_CREATE, pid, PIDTYPE_TGID, 0) < 0)
+ handle_error("core_sched create failed -- TGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) != get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy the cookie of current/PGID[%d], to pid [%d] as PIDTYPE_PID\n",
+ getpid(), pid);
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, pid, PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share to itself failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from a thread [%d] to current/PGID [%d] as PIDTYPE_PID\n",
+ procs[pidx].thr_tids[0], getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_FROM, procs[pidx].thr_tids[0],
+ PIDTYPE_PID, 0) < 0)
+ handle_error("core_sched share from thread failed -- PID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(procs[pidx].thr_tids[0]));
+ validate(get_cs_cookie(pid) != get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ printf("\n## Copy cookie from current [%d] to current as pidtype PGID\n", getpid());
+ if (_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 0) < 0)
+ handle_error("core_sched share to self failed -- PGID");
+ disp_processes(num_processes, procs);
+
+ validate(get_cs_cookie(0) == get_cs_cookie(pid));
+ validate(get_cs_cookie(pid) != 0);
+ validate(get_cs_cookie(pid) == get_cs_cookie(procs[pidx].thr_tids[0]));
+
+ validate(_prctl(PR_SCHED_CORE, PR_SCHED_CORE_MAX, 0, PIDTYPE_PGID, 0) < 0
+ && errno == EINVAL);
+
+ validate(_prctl(PR_SCHED_CORE, PR_SCHED_CORE_SHARE_TO, 0, PIDTYPE_PGID, 1) < 0
+ && errno == EINVAL);
+
+ if (errors) {
+ printf("TESTS FAILED. errors: %d\n", errors);
+ res = 10;
+ } else {
+ printf("SUCCESS !!!\n");
+ }
+
+ if (keypress)
+ getchar();
+ else
+ sleep(delay);
+
+ for (pidx = 0; pidx < num_processes; ++pidx)
+ kill(procs[pidx].cpid, 15);
+
+ return res;
+}
diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile
index 0ebfe8b0e147..584fba487037 100644
--- a/tools/testing/selftests/seccomp/Makefile
+++ b/tools/testing/selftests/seccomp/Makefile
@@ -1,6 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
-CFLAGS += -Wl,-no-as-needed -Wall
+CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
LDFLAGS += -lpthread
+LDLIBS += -lcap
TEST_GEN_PROGS := seccomp_bpf seccomp_benchmark
include ../lib.mk
diff --git a/tools/testing/selftests/seccomp/config b/tools/testing/selftests/seccomp/config
index db1e11b08c8a..ad431a5178fb 100644
--- a/tools/testing/selftests/seccomp/config
+++ b/tools/testing/selftests/seccomp/config
@@ -1,2 +1,4 @@
+CONFIG_PID_NS=y
CONFIG_SECCOMP=y
CONFIG_SECCOMP_FILTER=y
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/seccomp/seccomp_benchmark.c b/tools/testing/selftests/seccomp/seccomp_benchmark.c
index 5838c8697ec3..b83099160fbc 100644
--- a/tools/testing/selftests/seccomp/seccomp_benchmark.c
+++ b/tools/testing/selftests/seccomp/seccomp_benchmark.c
@@ -4,23 +4,29 @@
*/
#define _GNU_SOURCE
#include <assert.h>
+#include <err.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
#include <time.h>
#include <unistd.h>
#include <linux/filter.h>
#include <linux/seccomp.h>
+#include <sys/param.h>
#include <sys/prctl.h>
#include <sys/syscall.h>
#include <sys/types.h>
-#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
+#include "../kselftest.h"
unsigned long long timing(clockid_t clk_id, unsigned long long samples)
{
- pid_t pid, ret;
- unsigned long long i;
struct timespec start, finish;
+ unsigned long long i;
+ pid_t pid, ret;
pid = getpid();
assert(clock_gettime(clk_id, &start) == 0);
@@ -31,69 +37,270 @@ unsigned long long timing(clockid_t clk_id, unsigned long long samples)
assert(clock_gettime(clk_id, &finish) == 0);
i = finish.tv_sec - start.tv_sec;
- i *= 1000000000;
+ i *= 1000000000ULL;
i += finish.tv_nsec - start.tv_nsec;
- printf("%lu.%09lu - %lu.%09lu = %llu\n",
- finish.tv_sec, finish.tv_nsec,
- start.tv_sec, start.tv_nsec,
- i);
+ ksft_print_msg("%lu.%09lu - %lu.%09lu = %llu (%.1fs)\n",
+ finish.tv_sec, finish.tv_nsec,
+ start.tv_sec, start.tv_nsec,
+ i, (double)i / 1000000000.0);
return i;
}
unsigned long long calibrate(void)
{
- unsigned long long i;
+ struct timespec start, finish;
+ unsigned long long i, samples, step = 9973;
+ pid_t pid, ret;
+ int seconds = 15;
- printf("Calibrating reasonable sample size...\n");
+ ksft_print_msg("Calibrating sample size for %d seconds worth of syscalls ...\n", seconds);
- for (i = 5; ; i++) {
- unsigned long long samples = 1 << i;
+ samples = 0;
+ pid = getpid();
+ assert(clock_gettime(CLOCK_MONOTONIC, &start) == 0);
+ do {
+ for (i = 0; i < step; i++) {
+ ret = syscall(__NR_getpid);
+ assert(pid == ret);
+ }
+ assert(clock_gettime(CLOCK_MONOTONIC, &finish) == 0);
- /* Find something that takes more than 5 seconds to run. */
- if (timing(CLOCK_REALTIME, samples) / 1000000000ULL > 5)
- return samples;
+ samples += step;
+ i = finish.tv_sec - start.tv_sec;
+ i *= 1000000000ULL;
+ i += finish.tv_nsec - start.tv_nsec;
+ } while (i < 1000000000ULL);
+
+ return samples * seconds;
+}
+
+bool approx(int i_one, int i_two)
+{
+ /*
+ * This continues to be a noisy test. Instead of a 1% comparison
+ * go with 10%.
+ */
+ double one = i_one, one_bump = one * 0.1;
+ double two = i_two, two_bump = two * 0.1;
+
+ one_bump = one + MAX(one_bump, 2.0);
+ two_bump = two + MAX(two_bump, 2.0);
+
+ /* Equal to, or within 1% or 2 digits */
+ if (one == two ||
+ (one > two && one <= two_bump) ||
+ (two > one && two <= one_bump))
+ return true;
+ return false;
+}
+
+bool le(int i_one, int i_two)
+{
+ if (i_one <= i_two)
+ return true;
+ return false;
+}
+
+long compare(const char *name_one, const char *name_eval, const char *name_two,
+ unsigned long long one, bool (*eval)(int, int), unsigned long long two,
+ bool skip)
+{
+ bool good;
+
+ if (skip) {
+ ksft_test_result_skip("%s %s %s\n", name_one, name_eval,
+ name_two);
+ return 0;
+ }
+
+ ksft_print_msg("\t%s %s %s (%lld %s %lld): ", name_one, name_eval, name_two,
+ (long long)one, name_eval, (long long)two);
+ if (one > INT_MAX) {
+ ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)one);
+ good = false;
+ goto out;
+ }
+ if (two > INT_MAX) {
+ ksft_print_msg("Miscalculation! Measurement went negative: %lld\n", (long long)two);
+ good = false;
+ goto out;
}
+
+ good = eval(one, two);
+ printf("%s\n", good ? "✔️" : "❌");
+
+out:
+ ksft_test_result(good, "%s %s %s\n", name_one, name_eval, name_two);
+
+ return good ? 0 : 1;
+}
+
+/* Pin to a single CPU so the benchmark won't bounce around the system. */
+void affinity(void)
+{
+ long cpu;
+ ulong ncores = sysconf(_SC_NPROCESSORS_CONF);
+ cpu_set_t *setp = CPU_ALLOC(ncores);
+ ulong setsz = CPU_ALLOC_SIZE(ncores);
+
+ /*
+ * Totally unscientific way to avoid CPUs that might be busier:
+ * choose the highest CPU instead of the lowest.
+ */
+ for (cpu = ncores - 1; cpu >= 0; cpu--) {
+ CPU_ZERO_S(setsz, setp);
+ CPU_SET_S(cpu, setsz, setp);
+ if (sched_setaffinity(getpid(), setsz, setp) == -1)
+ continue;
+ printf("Pinned to CPU %lu of %lu\n", cpu + 1, ncores);
+ goto out;
+ }
+ fprintf(stderr, "Could not set CPU affinity -- calibration may not work well");
+
+out:
+ CPU_FREE(setp);
}
int main(int argc, char *argv[])
{
+ struct sock_filter bitmap_filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, nr)),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
+ };
+ struct sock_fprog bitmap_prog = {
+ .len = (unsigned short)ARRAY_SIZE(bitmap_filter),
+ .filter = bitmap_filter,
+ };
struct sock_filter filter[] = {
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS, offsetof(struct seccomp_data, args[0])),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
.len = (unsigned short)ARRAY_SIZE(filter),
.filter = filter,
};
- long ret;
- unsigned long long samples;
- unsigned long long native, filtered;
+
+ long ret, bits;
+ unsigned long long samples, calc;
+ unsigned long long native, filter1, filter2, bitmap1, bitmap2;
+ unsigned long long entry, per_filter1, per_filter2;
+ bool skip = false;
+
+ setbuf(stdout, NULL);
+
+ ksft_print_header();
+ ksft_set_plan(7);
+
+ ksft_print_msg("Running on:\n");
+ ksft_print_msg("");
+ system("uname -a");
+
+ ksft_print_msg("Current BPF sysctl settings:\n");
+ /* Avoid using "sysctl" which may not be installed. */
+ ksft_print_msg("");
+ system("grep -H . /proc/sys/net/core/bpf_jit_enable");
+ ksft_print_msg("");
+ system("grep -H . /proc/sys/net/core/bpf_jit_harden");
+
+ affinity();
if (argc > 1)
samples = strtoull(argv[1], NULL, 0);
else
samples = calibrate();
- printf("Benchmarking %llu samples...\n", samples);
+ ksft_print_msg("Benchmarking %llu syscalls...\n", samples);
+ /* Native call */
native = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid native: %llu ns\n", native);
+ ksft_print_msg("getpid native: %llu ns\n", native);
ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
assert(ret == 0);
+ /* One filter resulting in a bitmap */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+ assert(ret == 0);
+
+ bitmap1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ ksft_print_msg("getpid RET_ALLOW 1 filter (bitmap): %llu ns\n", bitmap1);
+
+ /* Second filter resulting in a bitmap */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+ assert(ret == 0);
+
+ bitmap2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ ksft_print_msg("getpid RET_ALLOW 2 filters (bitmap): %llu ns\n", bitmap2);
+
+ /* Third filter, can no longer be converted to bitmap */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog);
assert(ret == 0);
- filtered = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
- printf("getpid RET_ALLOW: %llu ns\n", filtered);
+ filter1 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ ksft_print_msg("getpid RET_ALLOW 3 filters (full): %llu ns\n", filter1);
+
+ /* Fourth filter, can not be converted to bitmap because of filter 3 */
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &bitmap_prog);
+ assert(ret == 0);
+
+ filter2 = timing(CLOCK_PROCESS_CPUTIME_ID, samples) / samples;
+ ksft_print_msg("getpid RET_ALLOW 4 filters (full): %llu ns\n", filter2);
+
+ /* Estimations */
+#define ESTIMATE(fmt, var, what) do { \
+ var = (what); \
+ ksft_print_msg("Estimated " fmt ": %llu ns\n", var); \
+ if (var > INT_MAX) { \
+ skip = true; \
+ ret |= 1; \
+ } \
+ } while (0)
+
+ ESTIMATE("total seccomp overhead for 1 bitmapped filter", calc,
+ bitmap1 - native);
+ ESTIMATE("total seccomp overhead for 2 bitmapped filters", calc,
+ bitmap2 - native);
+ ESTIMATE("total seccomp overhead for 3 full filters", calc,
+ filter1 - native);
+ ESTIMATE("total seccomp overhead for 4 full filters", calc,
+ filter2 - native);
+ ESTIMATE("seccomp entry overhead", entry,
+ bitmap1 - native - (bitmap2 - bitmap1));
+ ESTIMATE("seccomp per-filter overhead (last 2 diff)", per_filter1,
+ filter2 - filter1);
+ ESTIMATE("seccomp per-filter overhead (filters / 4)", per_filter2,
+ (filter2 - native - entry) / 4);
+
+ ksft_print_msg("Expectations:\n");
+ ret |= compare("native", "≤", "1 bitmap", native, le, bitmap1,
+ skip);
+ bits = compare("native", "≤", "1 filter", native, le, filter1,
+ skip);
+ if (bits)
+ skip = true;
+
+ ret |= compare("per-filter (last 2 diff)", "≈", "per-filter (filters / 4)",
+ per_filter1, approx, per_filter2, skip);
+
+ bits = compare("1 bitmapped", "≈", "2 bitmapped",
+ bitmap1 - native, approx, bitmap2 - native, skip);
+ if (bits) {
+ ksft_print_msg("Skipping constant action bitmap expectations: they appear unsupported.\n");
+ skip = true;
+ }
- printf("Estimated seccomp overhead per syscall: %llu ns\n",
- filtered - native);
+ ret |= compare("entry", "≈", "1 bitmapped", entry, approx,
+ bitmap1 - native, skip);
+ ret |= compare("entry", "≈", "2 bitmapped", entry, approx,
+ bitmap2 - native, skip);
+ ret |= compare("native + entry + (per filter * 4)", "≈", "4 filters total",
+ entry + (per_filter1 * 4) + native, approx, filter2,
+ skip);
- if (filtered == native)
- printf("Trying running again with more samples.\n");
+ if (ret)
+ ksft_print_msg("Saw unexpected benchmark result. Try running again with more samples?\n");
- return 0;
+ ksft_finished();
}
diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c
index 252140a52553..783ebce8c4de 100644
--- a/tools/testing/selftests/seccomp/seccomp_bpf.c
+++ b/tools/testing/selftests/seccomp/seccomp_bpf.c
@@ -45,12 +45,22 @@
#include <sys/socket.h>
#include <sys/ioctl.h>
#include <linux/kcmp.h>
+#include <sys/resource.h>
+#include <sys/capability.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <poll.h>
#include "../kselftest_harness.h"
+#include "../clone3/clone3_selftests.h"
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
+#endif
+
+#define MIN(X, Y) ((X) < (Y) ? (X) : (Y))
#ifndef PR_SET_PTRACER
# define PR_SET_PTRACER 0x59616d61
@@ -116,12 +126,22 @@ struct seccomp_data {
# define __NR_seccomp 277
# elif defined(__riscv)
# define __NR_seccomp 277
+# elif defined(__csky__)
+# define __NR_seccomp 277
+# elif defined(__loongarch__)
+# define __NR_seccomp 277
# elif defined(__hppa__)
# define __NR_seccomp 338
# elif defined(__powerpc__)
# define __NR_seccomp 358
# elif defined(__s390__)
# define __NR_seccomp 348
+# elif defined(__xtensa__)
+# define __NR_seccomp 337
+# elif defined(__sh__)
+# define __NR_seccomp 372
+# elif defined(__mc68000__)
+# define __NR_seccomp 380
# else
# warning "seccomp syscall number unknown for this architecture"
# define __NR_seccomp 0xffff
@@ -167,7 +187,9 @@ struct seccomp_metadata {
#ifndef SECCOMP_FILTER_FLAG_NEW_LISTENER
#define SECCOMP_FILTER_FLAG_NEW_LISTENER (1UL << 3)
+#endif
+#ifndef SECCOMP_RET_USER_NOTIF
#define SECCOMP_RET_USER_NOTIF 0x7fc00000U
#define SECCOMP_IOC_MAGIC '!'
@@ -180,7 +202,7 @@ struct seccomp_metadata {
#define SECCOMP_IOCTL_NOTIF_RECV SECCOMP_IOWR(0, struct seccomp_notif)
#define SECCOMP_IOCTL_NOTIF_SEND SECCOMP_IOWR(1, \
struct seccomp_notif_resp)
-#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOR(2, __u64)
+#define SECCOMP_IOCTL_NOTIF_ID_VALID SECCOMP_IOW(2, __u64)
struct seccomp_notif {
__u64 id;
@@ -203,6 +225,43 @@ struct seccomp_notif_sizes {
};
#endif
+#ifndef SECCOMP_IOCTL_NOTIF_ADDFD
+/* On success, the return value is the remote process's added fd number */
+#define SECCOMP_IOCTL_NOTIF_ADDFD SECCOMP_IOW(3, \
+ struct seccomp_notif_addfd)
+
+/* valid flags for seccomp_notif_addfd */
+#define SECCOMP_ADDFD_FLAG_SETFD (1UL << 0) /* Specify remote fd */
+
+struct seccomp_notif_addfd {
+ __u64 id;
+ __u32 flags;
+ __u32 srcfd;
+ __u32 newfd;
+ __u32 newfd_flags;
+};
+#endif
+
+#ifndef SECCOMP_ADDFD_FLAG_SEND
+#define SECCOMP_ADDFD_FLAG_SEND (1UL << 1) /* Addfd and return it, atomically */
+#endif
+
+struct seccomp_notif_addfd_small {
+ __u64 id;
+ char weird[4];
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_SMALL \
+ SECCOMP_IOW(3, struct seccomp_notif_addfd_small)
+
+struct seccomp_notif_addfd_big {
+ union {
+ struct seccomp_notif_addfd addfd;
+ char buf[sizeof(struct seccomp_notif_addfd) + 8];
+ };
+};
+#define SECCOMP_IOCTL_NOTIF_ADDFD_BIG \
+ SECCOMP_IOWR(3, struct seccomp_notif_addfd_big)
+
#ifndef PTRACE_EVENTMSG_SYSCALL_ENTRY
#define PTRACE_EVENTMSG_SYSCALL_ENTRY 1
#define PTRACE_EVENTMSG_SYSCALL_EXIT 2
@@ -216,6 +275,10 @@ struct seccomp_notif_sizes {
#define SECCOMP_FILTER_FLAG_TSYNC_ESRCH (1UL << 4)
#endif
+#ifndef SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV
+#define SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV (1UL << 5)
+#endif
+
#ifndef seccomp
int seccomp(unsigned int op, unsigned int flags, void *args)
{
@@ -224,18 +287,52 @@ int seccomp(unsigned int op, unsigned int flags, void *args)
}
#endif
-#if __BYTE_ORDER == __LITTLE_ENDIAN
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]))
-#elif __BYTE_ORDER == __BIG_ENDIAN
+#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n]) + sizeof(__u32))
#else
-#error "wut? Unknown __BYTE_ORDER?!"
+#error "wut? Unknown __BYTE_ORDER__?!"
#endif
#define SIBLING_EXIT_UNKILLED 0xbadbeef
#define SIBLING_EXIT_FAILURE 0xbadface
#define SIBLING_EXIT_NEWPRIVS 0xbadfeed
+static int __filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
+{
+#ifdef __NR_kcmp
+ errno = 0;
+ return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
+#else
+ errno = ENOSYS;
+ return -1;
+#endif
+}
+
+/* Have TH_LOG report actual location filecmp() is used. */
+#define filecmp(pid1, pid2, fd1, fd2) ({ \
+ int _ret; \
+ \
+ _ret = __filecmp(pid1, pid2, fd1, fd2); \
+ if (_ret != 0) { \
+ if (_ret < 0 && errno == ENOSYS) { \
+ TH_LOG("kcmp() syscall missing (test is less accurate)");\
+ _ret = 0; \
+ } \
+ } \
+ _ret; })
+
+TEST(kcmp)
+{
+ int ret;
+
+ ret = __filecmp(getpid(), getpid(), 1, 1);
+ EXPECT_EQ(ret, 0);
+ if (ret != 0 && errno == ENOSYS)
+ SKIP(return, "Kernel does not support kcmp() (missing CONFIG_KCMP?)");
+}
+
TEST(mode_strict_support)
{
long ret;
@@ -299,6 +396,8 @@ TEST(mode_filter_without_nnp)
.filter = filter,
};
long ret;
+ cap_t cap = cap_get_proc();
+ cap_flag_value_t is_cap_sys_admin = 0;
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0);
ASSERT_LE(0, ret) {
@@ -307,8 +406,8 @@ TEST(mode_filter_without_nnp)
errno = 0;
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
/* Succeeds with CAP_SYS_ADMIN, fails without */
- /* TODO(wad) check caps not euid */
- if (geteuid()) {
+ cap_get_flag(cap, CAP_SYS_ADMIN, CAP_EFFECTIVE, &is_cap_sys_admin);
+ if (!is_cap_sys_admin) {
EXPECT_EQ(-1, ret);
EXPECT_EQ(EACCES, errno);
} else {
@@ -685,23 +784,30 @@ void *kill_thread(void *data)
bool die = (bool)data;
if (die) {
- prctl(PR_GET_SECCOMP, 0, 0, 0, 0);
+ syscall(__NR_getpid);
return (void *)SIBLING_EXIT_FAILURE;
}
return (void *)SIBLING_EXIT_UNKILLED;
}
+enum kill_t {
+ KILL_THREAD,
+ KILL_PROCESS,
+ RET_UNKNOWN
+};
+
/* Prepare a thread that will kill itself or both of us. */
-void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
+void kill_thread_or_group(struct __test_metadata *_metadata,
+ enum kill_t kill_how)
{
pthread_t thread;
void *status;
- /* Kill only when calling __NR_prctl. */
+ /* Kill only when calling __NR_getpid. */
struct sock_filter filter_thread[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
@@ -709,11 +815,12 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
.len = (unsigned short)ARRAY_SIZE(filter_thread),
.filter = filter_thread,
};
+ int kill = kill_how == KILL_PROCESS ? SECCOMP_RET_KILL_PROCESS : 0xAAAAAAAA;
struct sock_filter filter_process[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_PROCESS),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, kill),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog_process = {
@@ -726,13 +833,15 @@ void kill_thread_or_group(struct __test_metadata *_metadata, bool kill_process)
}
ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0,
- kill_process ? &prog_process : &prog_thread));
+ kill_how == KILL_THREAD ? &prog_thread
+ : &prog_process));
/*
* Add the KILL_THREAD rule again to make sure that the KILL_PROCESS
* flag cannot be downgraded by a new filter.
*/
- ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
+ if (kill_how == KILL_PROCESS)
+ ASSERT_EQ(0, seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog_thread));
/* Start a thread that will exit immediately. */
ASSERT_EQ(0, pthread_create(&thread, NULL, kill_thread, (void *)false));
@@ -760,7 +869,7 @@ TEST(KILL_thread)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, false);
+ kill_thread_or_group(_metadata, KILL_THREAD);
_exit(38);
}
@@ -779,7 +888,7 @@ TEST(KILL_process)
child_pid = fork();
ASSERT_LE(0, child_pid);
if (child_pid == 0) {
- kill_thread_or_group(_metadata, true);
+ kill_thread_or_group(_metadata, KILL_PROCESS);
_exit(38);
}
@@ -790,6 +899,27 @@ TEST(KILL_process)
ASSERT_EQ(SIGSYS, WTERMSIG(status));
}
+TEST(KILL_unknown)
+{
+ int status;
+ pid_t child_pid;
+
+ child_pid = fork();
+ ASSERT_LE(0, child_pid);
+ if (child_pid == 0) {
+ kill_thread_or_group(_metadata, RET_UNKNOWN);
+ _exit(38);
+ }
+
+ ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
+
+ /* If the entire process was killed, we'll see SIGSYS. */
+ EXPECT_TRUE(WIFSIGNALED(status)) {
+ TH_LOG("Unknown SECCOMP_RET is only killing the thread?");
+ }
+ ASSERT_EQ(SIGSYS, WTERMSIG(status));
+}
+
/* TODO(wad) add 64-bit versus 32-bit arg tests. */
TEST(arg_out_of_range)
{
@@ -838,7 +968,7 @@ TEST(ERRNO_valid)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(E2BIG, errno);
}
@@ -857,7 +987,7 @@ TEST(ERRNO_zero)
EXPECT_EQ(parent, syscall(__NR_getppid));
/* "errno" of 0 is ok. */
- EXPECT_EQ(0, read(0, NULL, 0));
+ EXPECT_EQ(0, read(-1, NULL, 0));
}
/*
@@ -878,7 +1008,7 @@ TEST(ERRNO_capped)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(4095, errno);
}
@@ -909,7 +1039,7 @@ TEST(ERRNO_order)
ASSERT_EQ(0, ret);
EXPECT_EQ(parent, syscall(__NR_getppid));
- EXPECT_EQ(-1, read(0, NULL, 0));
+ EXPECT_EQ(-1, read(-1, NULL, 0));
EXPECT_EQ(12, errno);
}
@@ -1370,7 +1500,7 @@ TEST_F(precedence, log_is_fifth_in_any_order)
#define PTRACE_EVENT_SECCOMP 7
#endif
-#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP)
+#define PTRACE_EVENT_MASK(status) ((status) >> 16)
bool tracer_running;
void tracer_stop(int sig)
{
@@ -1422,12 +1552,22 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
if (wait(&status) != tracee)
continue;
- if (WIFSIGNALED(status) || WIFEXITED(status))
- /* Child is dead. Time to go. */
+
+ if (WIFSIGNALED(status)) {
+ /* Child caught a fatal signal. */
+ return;
+ }
+ if (WIFEXITED(status)) {
+ /* Child exited with code. */
return;
+ }
- /* Check if this is a seccomp event. */
- ASSERT_EQ(!ptrace_syscall, IS_SECCOMP_EVENT(status));
+ /* Check if we got an expected event. */
+ ASSERT_EQ(WIFCONTINUED(status), false);
+ ASSERT_EQ(WIFSTOPPED(status), true);
+ ASSERT_EQ(WSTOPSIG(status) & SIGTRAP, SIGTRAP) {
+ TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
+ }
tracer_func(_metadata, tracee, status, args);
@@ -1436,7 +1576,7 @@ void start_tracer(struct __test_metadata *_metadata, int fd, pid_t tracee,
ASSERT_EQ(0, ret);
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
/* Common tracer setup/teardown functions. */
@@ -1470,6 +1610,7 @@ pid_t setup_trace_fixture(struct __test_metadata *_metadata,
return tracer_pid;
}
+
void teardown_trace_fixture(struct __test_metadata *_metadata,
pid_t tracer)
{
@@ -1482,7 +1623,7 @@ void teardown_trace_fixture(struct __test_metadata *_metadata,
ASSERT_EQ(0, kill(tracer, SIGUSR1));
ASSERT_EQ(tracer, waitpid(tracer, &status, 0));
if (WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
}
@@ -1584,50 +1725,165 @@ TEST_F(TRACE_poke, getpid_runs_normally)
}
#if defined(__x86_64__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_rax
-# define SYSCALL_RET rax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_rax
+# define SYSCALL_RET(_regs) (_regs).rax
#elif defined(__i386__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM orig_eax
-# define SYSCALL_RET eax
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_eax
+# define SYSCALL_RET(_regs) (_regs).eax
#elif defined(__arm__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM ARM_r7
-# define SYSCALL_RET ARM_r0
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).ARM_r7
+# ifndef PTRACE_SET_SYSCALL
+# define PTRACE_SET_SYSCALL 23
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ EXPECT_EQ(0, ptrace(PTRACE_SET_SYSCALL, tracee, NULL, _nr))
+# define SYSCALL_RET(_regs) (_regs).ARM_r0
#elif defined(__aarch64__)
-# define ARCH_REGS struct user_pt_regs
-# define SYSCALL_NUM regs[8]
-# define SYSCALL_RET regs[0]
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[8]
+# ifndef NT_ARM_SYSTEM_CALL
+# define NT_ARM_SYSTEM_CALL 0x404
+# endif
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ struct iovec __v; \
+ typeof(_nr) __nr = (_nr); \
+ __v.iov_base = &__nr; \
+ __v.iov_len = sizeof(__nr); \
+ EXPECT_EQ(0, ptrace(PTRACE_SETREGSET, tracee, \
+ NT_ARM_SYSTEM_CALL, &__v)); \
+ } while (0)
+# define SYSCALL_RET(_regs) (_regs).regs[0]
+#elif defined(__loongarch__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[11]
+# define SYSCALL_RET(_regs) (_regs).regs[4]
#elif defined(__riscv) && __riscv_xlen == 64
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM a7
-# define SYSCALL_RET a0
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).a7
+# define SYSCALL_RET(_regs) (_regs).a0
+#elif defined(__csky__)
+# define ARCH_REGS struct pt_regs
+# if defined(__CSKYABIV2__)
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
+# else
+# define SYSCALL_NUM(_regs) (_regs).regs[9]
+# endif
+# define SYSCALL_RET(_regs) (_regs).a0
#elif defined(__hppa__)
-# define ARCH_REGS struct user_regs_struct
-# define SYSCALL_NUM gr[20]
-# define SYSCALL_RET gr[28]
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).gr[20]
+# define SYSCALL_RET(_regs) (_regs).gr[28]
#elif defined(__powerpc__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM gpr[0]
-# define SYSCALL_RET gpr[3]
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).gpr[0]
+# define SYSCALL_RET(_regs) (_regs).gpr[3]
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ typeof(_val) _result = (_val); \
+ if ((_regs.trap & 0xfff0) == 0x3000) { \
+ /* \
+ * scv 0 system call uses -ve result \
+ * for error, so no need to adjust. \
+ */ \
+ SYSCALL_RET(_regs) = _result; \
+ } else { \
+ /* \
+ * A syscall error is signaled by the \
+ * CR0 SO bit and the code is stored as \
+ * a positive value. \
+ */ \
+ if (_result < 0) { \
+ SYSCALL_RET(_regs) = -_result; \
+ (_regs).ccr |= 0x10000000; \
+ } else { \
+ SYSCALL_RET(_regs) = _result; \
+ (_regs).ccr &= ~0x10000000; \
+ } \
+ } \
+ } while (0)
+# define SYSCALL_RET_SET_ON_PTRACE_EXIT
#elif defined(__s390__)
-# define ARCH_REGS s390_regs
-# define SYSCALL_NUM gprs[2]
-# define SYSCALL_RET gprs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# define ARCH_REGS s390_regs
+# define SYSCALL_NUM(_regs) (_regs).gprs[2]
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
#elif defined(__mips__)
-# define ARCH_REGS struct pt_regs
-# define SYSCALL_NUM regs[2]
-# define SYSCALL_SYSCALL_NUM regs[4]
-# define SYSCALL_RET regs[2]
-# define SYSCALL_NUM_RET_SHARE_REG
+# include <asm/unistd_nr_n32.h>
+# include <asm/unistd_nr_n64.h>
+# include <asm/unistd_nr_o32.h>
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) \
+ ({ \
+ typeof((_regs).regs[2]) _nr; \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ _nr = (_regs).regs[4]; \
+ else \
+ _nr = (_regs).regs[2]; \
+ _nr; \
+ })
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ if ((_regs).regs[2] == __NR_O32_Linux) \
+ (_regs).regs[4] = _nr; \
+ else \
+ (_regs).regs[2] = _nr; \
+ } while (0)
+# define SYSCALL_RET_SET(_regs, _val) \
+ TH_LOG("Can't modify syscall return on this architecture")
+#elif defined(__xtensa__)
+# define ARCH_REGS struct user_pt_regs
+# define SYSCALL_NUM(_regs) (_regs).syscall
+/*
+ * On xtensa syscall return value is in the register
+ * a2 of the current window which is not fixed.
+ */
+#define SYSCALL_RET(_regs) (_regs).a[(_regs).windowbase * 4 + 2]
+#elif defined(__sh__)
+# define ARCH_REGS struct pt_regs
+# define SYSCALL_NUM(_regs) (_regs).regs[3]
+# define SYSCALL_RET(_regs) (_regs).regs[0]
+#elif defined(__mc68000__)
+# define ARCH_REGS struct user_regs_struct
+# define SYSCALL_NUM(_regs) (_regs).orig_d0
+# define SYSCALL_RET(_regs) (_regs).d0
#else
# error "Do not know how to find your architecture's registers and syscalls"
#endif
+/*
+ * Most architectures can change the syscall by just updating the
+ * associated register. This is the default if not defined above.
+ */
+#ifndef SYSCALL_NUM_SET
+# define SYSCALL_NUM_SET(_regs, _nr) \
+ do { \
+ SYSCALL_NUM(_regs) = (_nr); \
+ } while (0)
+#endif
+/*
+ * Most architectures can change the syscall return value by just
+ * writing to the SYSCALL_RET register. This is the default if not
+ * defined above. If an architecture cannot set the return value
+ * (for example when the syscall and return value register is
+ * shared), report it with TH_LOG() in an arch-specific definition
+ * of SYSCALL_RET_SET() above, and leave SYSCALL_RET undefined.
+ */
+#if !defined(SYSCALL_RET) && !defined(SYSCALL_RET_SET)
+# error "One of SYSCALL_RET or SYSCALL_RET_SET is needed for this arch"
+#endif
+#ifndef SYSCALL_RET_SET
+# define SYSCALL_RET_SET(_regs, _val) \
+ do { \
+ SYSCALL_RET(_regs) = (_val); \
+ } while (0)
+#endif
+
/* When the syscall return can't be changed, stub out the tests for it. */
-#ifdef SYSCALL_NUM_RET_SHARE_REG
+#ifndef SYSCALL_RET
# define EXPECT_SYSCALL_RETURN(val, action) EXPECT_EQ(-1, action)
#else
# define EXPECT_SYSCALL_RETURN(val, action) \
@@ -1642,120 +1898,105 @@ TEST_F(TRACE_poke, getpid_runs_normally)
} while (0)
#endif
-/* Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
+/*
+ * Some architectures (e.g. powerpc) can only set syscall
+ * return values on syscall exit during ptrace.
+ */
+const bool ptrace_entry_set_syscall_nr = true;
+const bool ptrace_entry_set_syscall_ret =
+#ifndef SYSCALL_RET_SET_ON_PTRACE_EXIT
+ true;
+#else
+ false;
+#endif
+
+/*
+ * Use PTRACE_GETREGS and PTRACE_SETREGS when available. This is useful for
* architectures without HAVE_ARCH_TRACEHOOK (e.g. User-mode Linux).
*/
-#if defined(__x86_64__) || defined(__i386__) || defined(__mips__)
-#define HAVE_GETREGS
+#if defined(__x86_64__) || defined(__i386__) || defined(__mips__) || defined(__mc68000__)
+# define ARCH_GETREGS(_regs) ptrace(PTRACE_GETREGS, tracee, 0, &(_regs))
+# define ARCH_SETREGS(_regs) ptrace(PTRACE_SETREGS, tracee, 0, &(_regs))
+#else
+# define ARCH_GETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
+# define ARCH_SETREGS(_regs) ({ \
+ struct iovec __v; \
+ __v.iov_base = &(_regs); \
+ __v.iov_len = sizeof(_regs); \
+ ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &__v); \
+ })
#endif
/* Architecture-specific syscall fetching routine. */
int get_syscall(struct __test_metadata *_metadata, pid_t tracee)
{
ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- EXPECT_EQ(0, ptrace(PTRACE_GETREGS, tracee, 0, &regs)) {
- TH_LOG("PTRACE_GETREGS failed");
- return -1;
- }
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) {
- TH_LOG("PTRACE_GETREGSET failed");
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
return -1;
}
-#endif
-#if defined(__mips__)
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- return regs.SYSCALL_SYSCALL_NUM;
-#endif
- return regs.SYSCALL_NUM;
+ return SYSCALL_NUM(regs);
}
/* Architecture-specific syscall changing routine. */
-void change_syscall(struct __test_metadata *_metadata,
- pid_t tracee, int syscall, int result)
+void __change_syscall(struct __test_metadata *_metadata,
+ pid_t tracee, long *syscall, long *ret)
{
- int ret;
- ARCH_REGS regs;
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_GETREGS, tracee, 0, &regs);
-#else
- struct iovec iov;
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret) {}
+ ARCH_REGS orig, regs;
-#if defined(__x86_64__) || defined(__i386__) || defined(__powerpc__) || \
- defined(__s390__) || defined(__hppa__) || defined(__riscv)
- {
- regs.SYSCALL_NUM = syscall;
- }
-#elif defined(__mips__)
- {
- if (regs.SYSCALL_NUM == __NR_O32_Linux)
- regs.SYSCALL_SYSCALL_NUM = syscall;
- else
- regs.SYSCALL_NUM = syscall;
- }
+ /* Do not get/set registers if we have nothing to do. */
+ if (!syscall && !ret)
+ return;
-#elif defined(__arm__)
-# ifndef PTRACE_SET_SYSCALL
-# define PTRACE_SET_SYSCALL 23
-# endif
- {
- ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall);
- EXPECT_EQ(0, ret);
+ EXPECT_EQ(0, ARCH_GETREGS(regs)) {
+ return;
}
+ orig = regs;
-#elif defined(__aarch64__)
-# ifndef NT_ARM_SYSTEM_CALL
-# define NT_ARM_SYSTEM_CALL 0x404
-# endif
- {
- iov.iov_base = &syscall;
- iov.iov_len = sizeof(syscall);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_ARM_SYSTEM_CALL,
- &iov);
- EXPECT_EQ(0, ret);
- }
+ if (syscall)
+ SYSCALL_NUM_SET(regs, *syscall);
-#else
- ASSERT_EQ(1, 0) {
- TH_LOG("How is the syscall changed on this architecture?");
- }
-#endif
+ if (ret)
+ SYSCALL_RET_SET(regs, *ret);
- /* If syscall is skipped, change return value. */
- if (syscall == -1)
-#ifdef SYSCALL_NUM_RET_SHARE_REG
- TH_LOG("Can't modify syscall return on this architecture");
-#else
- regs.SYSCALL_RET = result;
-#endif
+ /* Flush any register changes made. */
+ if (memcmp(&orig, &regs, sizeof(orig)) != 0)
+ EXPECT_EQ(0, ARCH_SETREGS(regs));
+}
-#ifdef HAVE_GETREGS
- ret = ptrace(PTRACE_SETREGS, tracee, 0, &regs);
-#else
- iov.iov_base = &regs;
- iov.iov_len = sizeof(regs);
- ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov);
-#endif
- EXPECT_EQ(0, ret);
+/* Change only syscall number. */
+void change_syscall_nr(struct __test_metadata *_metadata,
+ pid_t tracee, long syscall)
+{
+ __change_syscall(_metadata, tracee, &syscall, NULL);
+}
+
+/* Change syscall return value (and set syscall number to -1). */
+void change_syscall_ret(struct __test_metadata *_metadata,
+ pid_t tracee, long ret)
+{
+ long syscall = -1;
+
+ __change_syscall(_metadata, tracee, &syscall, &ret);
}
-void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
+void tracer_seccomp(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
int ret;
unsigned long msg;
+ EXPECT_EQ(PTRACE_EVENT_MASK(status), PTRACE_EVENT_SECCOMP) {
+ TH_LOG("Unexpected ptrace event: %d", PTRACE_EVENT_MASK(status));
+ return;
+ }
+
/* Make sure we got the right message. */
ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg);
EXPECT_EQ(0, ret);
@@ -1765,17 +2006,17 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
case 0x1002:
/* change getpid to getppid. */
EXPECT_EQ(__NR_getpid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, __NR_getppid, 0);
+ change_syscall_nr(_metadata, tracee, __NR_getppid);
break;
case 0x1003:
/* skip gettid with valid return code. */
EXPECT_EQ(__NR_gettid, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, 45000);
+ change_syscall_ret(_metadata, tracee, 45000);
break;
case 0x1004:
/* skip openat with error. */
EXPECT_EQ(__NR_openat, get_syscall(_metadata, tracee));
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ change_syscall_ret(_metadata, tracee, -ESRCH);
break;
case 0x1005:
/* do nothing (allow getppid) */
@@ -1790,12 +2031,26 @@ void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee,
}
+FIXTURE(TRACE_syscall) {
+ struct sock_fprog prog;
+ pid_t tracer, mytid, mypid, parent;
+ long syscall_nr;
+};
+
void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
int status, void *args)
{
- int ret, nr;
+ int ret;
unsigned long msg;
static bool entry;
+ long syscall_nr_val, syscall_ret_val;
+ long *syscall_nr = NULL, *syscall_ret = NULL;
+ FIXTURE_DATA(TRACE_syscall) *self = args;
+
+ EXPECT_EQ(WSTOPSIG(status) & 0x80, 0x80) {
+ TH_LOG("Unexpected WSTOPSIG: %d", WSTOPSIG(status));
+ return;
+ }
/*
* The traditional way to tell PTRACE_SYSCALL entry/exit
@@ -1809,22 +2064,64 @@ void tracer_ptrace(struct __test_metadata *_metadata, pid_t tracee,
EXPECT_EQ(entry ? PTRACE_EVENTMSG_SYSCALL_ENTRY
: PTRACE_EVENTMSG_SYSCALL_EXIT, msg);
- if (!entry)
- return;
+ /*
+ * Some architectures only support setting return values during
+ * syscall exit under ptrace, and on exit the syscall number may
+ * no longer be available. Therefore, save the initial sycall
+ * number here, so it can be examined during both entry and exit
+ * phases.
+ */
+ if (entry)
+ self->syscall_nr = get_syscall(_metadata, tracee);
- nr = get_syscall(_metadata, tracee);
+ /*
+ * Depending on the architecture's syscall setting abilities, we
+ * pick which things to set during this phase (entry or exit).
+ */
+ if (entry == ptrace_entry_set_syscall_nr)
+ syscall_nr = &syscall_nr_val;
+ if (entry == ptrace_entry_set_syscall_ret)
+ syscall_ret = &syscall_ret_val;
+
+ /* Now handle the actual rewriting cases. */
+ switch (self->syscall_nr) {
+ case __NR_getpid:
+ syscall_nr_val = __NR_getppid;
+ /* Never change syscall return for this case. */
+ syscall_ret = NULL;
+ break;
+ case __NR_gettid:
+ syscall_nr_val = -1;
+ syscall_ret_val = 45000;
+ break;
+ case __NR_openat:
+ syscall_nr_val = -1;
+ syscall_ret_val = -ESRCH;
+ break;
+ default:
+ /* Unhandled, do nothing. */
+ return;
+ }
- if (nr == __NR_getpid)
- change_syscall(_metadata, tracee, __NR_getppid, 0);
- if (nr == __NR_gettid)
- change_syscall(_metadata, tracee, -1, 45000);
- if (nr == __NR_openat)
- change_syscall(_metadata, tracee, -1, -ESRCH);
+ __change_syscall(_metadata, tracee, syscall_nr, syscall_ret);
}
-FIXTURE(TRACE_syscall) {
- struct sock_fprog prog;
- pid_t tracer, mytid, mypid, parent;
+FIXTURE_VARIANT(TRACE_syscall) {
+ /*
+ * All of the SECCOMP_RET_TRACE behaviors can be tested with either
+ * SECCOMP_RET_TRACE+PTRACE_CONT or plain ptrace()+PTRACE_SYSCALL.
+ * This indicates if we should use SECCOMP_RET_TRACE (false), or
+ * ptrace (true).
+ */
+ bool use_ptrace;
+};
+
+FIXTURE_VARIANT_ADD(TRACE_syscall, ptrace) {
+ .use_ptrace = true,
+};
+
+FIXTURE_VARIANT_ADD(TRACE_syscall, seccomp) {
+ .use_ptrace = false,
};
FIXTURE_SETUP(TRACE_syscall)
@@ -1842,12 +2139,11 @@ FIXTURE_SETUP(TRACE_syscall)
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1005),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
-
- memset(&self->prog, 0, sizeof(self->prog));
- self->prog.filter = malloc(sizeof(filter));
- ASSERT_NE(NULL, self->prog.filter);
- memcpy(self->prog.filter, filter, sizeof(filter));
- self->prog.len = (unsigned short)ARRAY_SIZE(filter);
+ struct sock_fprog prog = {
+ .len = (unsigned short)ARRAY_SIZE(filter),
+ .filter = filter,
+ };
+ long ret;
/* Prepare some testable syscall results. */
self->mytid = syscall(__NR_gettid);
@@ -1865,60 +2161,52 @@ FIXTURE_SETUP(TRACE_syscall)
ASSERT_NE(self->parent, self->mypid);
/* Launch tracer. */
- self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL,
- false);
-}
+ self->tracer = setup_trace_fixture(_metadata,
+ variant->use_ptrace ? tracer_ptrace
+ : tracer_seccomp,
+ self, variant->use_ptrace);
-FIXTURE_TEARDOWN(TRACE_syscall)
-{
- teardown_trace_fixture(_metadata, self->tracer);
- if (self->prog.filter)
- free(self->prog.filter);
-}
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret);
-TEST_F(TRACE_syscall, ptrace_syscall_redirected)
-{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
+ /* Do not install seccomp rewrite filters, as we'll use ptrace instead. */
+ if (variant->use_ptrace)
+ return;
- /* Tracer will redirect getpid to getppid. */
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
+ ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
+ ASSERT_EQ(0, ret);
}
-TEST_F(TRACE_syscall, ptrace_syscall_errno)
+FIXTURE_TEARDOWN(TRACE_syscall)
{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- /* Tracer should skip the open syscall, resulting in ESRCH. */
- EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
}
-TEST_F(TRACE_syscall, ptrace_syscall_faked)
+TEST(negative_ENOSYS)
{
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
+#if defined(__arm__)
+ SKIP(return, "arm32 does not support calling syscall -1");
+#endif
+ /*
+ * There should be no difference between an "internal" skip
+ * and userspace asking for syscall "-1".
+ */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(-1));
+ EXPECT_EQ(errno, ENOSYS);
+ /* And no difference for "still not valid but not -1". */
+ errno = 0;
+ EXPECT_EQ(-1, syscall(-101));
+ EXPECT_EQ(errno, ENOSYS);
+}
- /* Tracer should skip the gettid syscall, resulting fake pid. */
- EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
+TEST_F(TRACE_syscall, negative_ENOSYS)
+{
+ negative_ENOSYS(_metadata);
}
TEST_F(TRACE_syscall, syscall_allowed)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
/* getppid works as expected (no changes). */
EXPECT_EQ(self->parent, syscall(__NR_getppid));
EXPECT_NE(self->mypid, syscall(__NR_getppid));
@@ -1926,14 +2214,6 @@ TEST_F(TRACE_syscall, syscall_allowed)
TEST_F(TRACE_syscall, syscall_redirected)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
/* getpid has been redirected to getppid as expected. */
EXPECT_EQ(self->parent, syscall(__NR_getpid));
EXPECT_NE(self->mypid, syscall(__NR_getpid));
@@ -1941,71 +2221,23 @@ TEST_F(TRACE_syscall, syscall_redirected)
TEST_F(TRACE_syscall, syscall_errno)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* openat has been skipped and an errno return. */
+ /* Tracer should skip the open syscall, resulting in ESRCH. */
EXPECT_SYSCALL_RETURN(-ESRCH, syscall(__NR_openat));
}
TEST_F(TRACE_syscall, syscall_faked)
{
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* gettid has been skipped and an altered return value stored. */
+ /* Tracer skips the gettid syscall and store altered return value. */
EXPECT_SYSCALL_RETURN(45000, syscall(__NR_gettid));
}
-TEST_F(TRACE_syscall, skip_after_RET_TRACE)
+TEST_F_SIGNAL(TRACE_syscall, kill_immediate, SIGSYS)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EPERM),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
- };
- struct sock_fprog prog = {
- .len = (unsigned short)ARRAY_SIZE(filter),
- .filter = filter,
- };
- long ret;
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install fixture filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "errno on getppid" filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Tracer will redirect getpid to getppid, and we should see EPERM. */
- errno = 0;
- EXPECT_EQ(-1, syscall(__NR_getpid));
- EXPECT_EQ(EPERM, errno);
-}
-
-TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
-{
- struct sock_filter filter[] = {
- BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
- offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1),
- BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_mknodat, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL_THREAD),
BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -2014,22 +2246,15 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_RET_TRACE, SIGSYS)
};
long ret;
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install fixture filter. */
- ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "death on getppid" filter. */
+ /* Install "kill on mknodat" filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
- /* Tracer will redirect getpid to getppid, and we should die. */
- EXPECT_NE(self->mypid, syscall(__NR_getpid));
+ /* This should immediately die with SIGSYS, regardless of tracer. */
+ EXPECT_EQ(-1, syscall(__NR_mknodat, -1, NULL, 0, 0));
}
-TEST_F(TRACE_syscall, skip_after_ptrace)
+TEST_F(TRACE_syscall, skip_after)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
@@ -2044,24 +2269,17 @@ TEST_F(TRACE_syscall, skip_after_ptrace)
};
long ret;
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "errno on getppid" filter. */
+ /* Install additional "errno on getppid" filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
/* Tracer will redirect getpid to getppid, and we should see EPERM. */
+ errno = 0;
EXPECT_EQ(-1, syscall(__NR_getpid));
EXPECT_EQ(EPERM, errno);
}
-TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
+TEST_F_SIGNAL(TRACE_syscall, kill_after, SIGSYS)
{
struct sock_filter filter[] = {
BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
@@ -2076,15 +2294,7 @@ TEST_F_SIGNAL(TRACE_syscall, kill_after_ptrace, SIGSYS)
};
long ret;
- /* Swap SECCOMP_RET_TRACE tracer for PTRACE_SYSCALL tracer. */
- teardown_trace_fixture(_metadata, self->tracer);
- self->tracer = setup_trace_fixture(_metadata, tracer_ptrace, NULL,
- true);
-
- ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
- ASSERT_EQ(0, ret);
-
- /* Install "death on getppid" filter. */
+ /* Install additional "death on getppid" filter. */
ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0);
ASSERT_EQ(0, ret);
@@ -2437,7 +2647,7 @@ void *tsync_sibling(void *data)
ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0);
if (!ret)
return (void *)SIBLING_EXIT_NEWPRIVS;
- read(0, NULL, 0);
+ read(-1, NULL, 0);
return (void *)SIBLING_EXIT_UNKILLED;
}
@@ -2865,7 +3075,8 @@ TEST(syscall_restart)
timeout.tv_sec = 1;
errno = 0;
EXPECT_EQ(0, nanosleep(&timeout, NULL)) {
- TH_LOG("Call to nanosleep() failed (errno %d)", errno);
+ TH_LOG("Call to nanosleep() failed (errno %d: %s)",
+ errno, strerror(errno));
}
/* Read final sync from parent. */
@@ -2877,8 +3088,7 @@ TEST(syscall_restart)
}
/* Directly report the status of our test harness results. */
- syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS
- : EXIT_FAILURE);
+ syscall(__NR_exit, _metadata->exit_code);
}
EXPECT_EQ(0, close(pipefd[0]));
@@ -2963,7 +3173,7 @@ TEST(syscall_restart)
ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0));
if (WIFSIGNALED(status) || WEXITSTATUS(status))
- _metadata->passed = 0;
+ _metadata->exit_code = KSFT_FAIL;
}
TEST_SIGNAL(filter_flag_log, SIGSYS)
@@ -3069,7 +3279,7 @@ TEST(get_metadata)
/* Only real root can get metadata. */
if (geteuid()) {
- XFAIL(return, "get_metadata requires real root");
+ SKIP(return, "get_metadata requires real root");
return;
}
@@ -3112,7 +3322,7 @@ TEST(get_metadata)
ret = ptrace(PTRACE_SECCOMP_GET_METADATA, pid, sizeof(md), &md);
EXPECT_EQ(sizeof(md), ret) {
if (errno == EINVAL)
- XFAIL(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
+ SKIP(goto skip, "Kernel does not support PTRACE_SECCOMP_GET_METADATA (missing CONFIG_CHECKPOINT_RESTORE?)");
}
EXPECT_EQ(md.flags, SECCOMP_FILTER_FLAG_LOG);
@@ -3128,14 +3338,14 @@ skip:
ASSERT_EQ(0, kill(pid, SIGKILL));
}
-static int user_trap_syscall(int nr, unsigned int flags)
+static int user_notif_syscall(int nr, unsigned int flags)
{
struct sock_filter filter[] = {
- BPF_STMT(BPF_LD+BPF_W+BPF_ABS,
+ BPF_STMT(BPF_LD|BPF_W|BPF_ABS,
offsetof(struct seccomp_data, nr)),
- BPF_JUMP(BPF_JMP+BPF_JEQ+BPF_K, nr, 0, 1),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_USER_NOTIF),
- BPF_STMT(BPF_RET+BPF_K, SECCOMP_RET_ALLOW),
+ BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, nr, 0, 1),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_USER_NOTIF),
+ BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW),
};
struct sock_fprog prog = {
@@ -3174,7 +3384,7 @@ TEST(user_notification_basic)
/* Check that we get -ENOSYS with no listener attached */
if (pid == 0) {
- if (user_trap_syscall(__NR_getppid, 0) < 0)
+ if (user_notif_syscall(__NR_getppid, 0) < 0)
exit(1);
ret = syscall(__NR_getppid);
exit(ret >= 0 || errno != ENOSYS);
@@ -3191,13 +3401,13 @@ TEST(user_notification_basic)
EXPECT_EQ(seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog), 0);
/* Check that the basic notification machinery works */
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/* Installing a second listener in the chain should EBUSY */
- EXPECT_EQ(user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER),
+ EXPECT_EQ(user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER),
-1);
EXPECT_EQ(errno, EBUSY);
@@ -3258,15 +3468,20 @@ TEST(user_notification_with_tsync)
int ret;
unsigned int flags;
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
/* these were exclusive */
flags = SECCOMP_FILTER_FLAG_NEW_LISTENER |
SECCOMP_FILTER_FLAG_TSYNC;
- ASSERT_EQ(-1, user_trap_syscall(__NR_getppid, flags));
+ ASSERT_EQ(-1, user_notif_syscall(__NR_getppid, flags));
ASSERT_EQ(EINVAL, errno);
/* but now they're not */
flags |= SECCOMP_FILTER_FLAG_TSYNC_ESRCH;
- ret = user_trap_syscall(__NR_getppid, flags);
+ ret = user_notif_syscall(__NR_getppid, flags);
close(ret);
ASSERT_LE(0, ret);
}
@@ -3284,8 +3499,8 @@ TEST(user_notification_kill_in_middle)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3338,8 +3553,8 @@ TEST(user_notification_signal)
ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
- listener = user_trap_syscall(__NR_gettid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_gettid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3408,8 +3623,8 @@ TEST(user_notification_closed_listener)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
/*
@@ -3440,10 +3655,13 @@ TEST(user_notification_child_pid_ns)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
- ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWUSER | CLONE_NEWPID), 0) {
+ if (errno == EINVAL)
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
+ };
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3482,15 +3700,20 @@ TEST(user_notification_sibling_pid_ns)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
ASSERT_GE(pid, 0);
if (pid == 0) {
- ASSERT_EQ(unshare(CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
+ if (errno == EPERM)
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
+ }
pid2 = fork();
ASSERT_GE(pid2, 0);
@@ -3505,7 +3728,12 @@ TEST(user_notification_sibling_pid_ns)
}
/* Create the sibling ns, and sibling in it. */
- ASSERT_EQ(unshare(CLONE_NEWPID), 0);
+ ASSERT_EQ(unshare(CLONE_NEWPID), 0) {
+ if (errno == EPERM)
+ SKIP(return, "CLONE_NEWPID requires CAP_SYS_ADMIN");
+ else if (errno == EINVAL)
+ SKIP(return, "CLONE_NEWPID is invalid (missing CONFIG_PID_NS?)");
+ }
ASSERT_EQ(errno, 0);
pid2 = fork();
@@ -3545,10 +3773,13 @@ TEST(user_notification_fault_recv)
struct seccomp_notif req = {};
struct seccomp_notif_resp resp = {};
- ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0) {
+ if (errno == EINVAL)
+ SKIP(return, "kernel missing CLONE_NEWUSER support");
+ }
- listener = user_trap_syscall(__NR_getppid,
- SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3585,16 +3816,6 @@ TEST(seccomp_get_notif_sizes)
EXPECT_EQ(sizes.seccomp_notif_resp, sizeof(struct seccomp_notif_resp));
}
-static int filecmp(pid_t pid1, pid_t pid2, int fd1, int fd2)
-{
-#ifdef __NR_kcmp
- return syscall(__NR_kcmp, pid1, pid2, KCMP_FILE, fd1, fd2);
-#else
- errno = ENOSYS;
- return -1;
-#endif
-}
-
TEST(user_notification_continue)
{
pid_t pid;
@@ -3609,7 +3830,7 @@ TEST(user_notification_continue)
TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
}
- listener = user_trap_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
ASSERT_GE(listener, 0);
pid = fork();
@@ -3619,20 +3840,14 @@ TEST(user_notification_continue)
int dup_fd, pipe_fds[2];
pid_t self;
- ret = pipe(pipe_fds);
- if (ret < 0)
- exit(1);
+ ASSERT_GE(pipe(pipe_fds), 0);
dup_fd = dup(pipe_fds[0]);
- if (dup_fd < 0)
- exit(1);
+ ASSERT_GE(dup_fd, 0);
+ EXPECT_NE(pipe_fds[0], dup_fd);
self = getpid();
-
- ret = filecmp(self, self, pipe_fds[0], dup_fd);
- if (ret)
- exit(2);
-
+ ASSERT_EQ(filecmp(self, self, pipe_fds[0], dup_fd), 0);
exit(0);
}
@@ -3673,7 +3888,7 @@ TEST(user_notification_continue)
resp.val = 0;
EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0) {
if (errno == EINVAL)
- XFAIL(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
+ SKIP(goto skip, "Kernel does not support SECCOMP_USER_NOTIF_FLAG_CONTINUE");
}
skip:
@@ -3681,15 +3896,867 @@ skip:
EXPECT_EQ(true, WIFEXITED(status));
EXPECT_EQ(0, WEXITSTATUS(status)) {
if (WEXITSTATUS(status) == 2) {
- XFAIL(return, "Kernel does not support kcmp() syscall");
+ SKIP(return, "Kernel does not support kcmp() syscall");
return;
}
}
}
+TEST(user_notification_filter_empty)
+{
+ pid_t pid;
+ long ret;
+ int status;
+ struct pollfd pollfd;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int listener;
+
+ listener = user_notif_syscall(__NR_mknodat, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+
+ close(listener);
+
+ _exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ pollfd.fd = 200;
+ pollfd.events = POLLHUP;
+
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
+}
+
+static void *do_thread(void *data)
+{
+ return NULL;
+}
+
+TEST(user_notification_filter_empty_threaded)
+{
+ pid_t pid;
+ long ret;
+ int status;
+ struct pollfd pollfd;
+ struct __clone_args args = {
+ .flags = CLONE_FILES,
+ .exit_signal = SIGCHLD,
+ };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ if (__NR_clone3 < 0)
+ SKIP(return, "Test not built with clone3 support");
+
+ pid = sys_clone3(&args, sizeof(args));
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ pid_t pid1, pid2;
+ int listener, status;
+ pthread_t thread;
+
+ listener = user_notif_syscall(__NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ if (listener < 0)
+ _exit(EXIT_FAILURE);
+
+ if (dup2(listener, 200) != 200)
+ _exit(EXIT_FAILURE);
+
+ close(listener);
+
+ pid1 = fork();
+ if (pid1 < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pid1 == 0)
+ _exit(EXIT_SUCCESS);
+
+ pid2 = fork();
+ if (pid2 < 0)
+ _exit(EXIT_FAILURE);
+
+ if (pid2 == 0)
+ _exit(EXIT_SUCCESS);
+
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
+ pthread_join(thread, NULL))
+ _exit(EXIT_FAILURE);
+
+ if (pthread_create(&thread, NULL, do_thread, NULL) ||
+ pthread_join(thread, NULL))
+ _exit(EXIT_FAILURE);
+
+ if (waitpid(pid1, &status, 0) != pid1 || !WIFEXITED(status) ||
+ WEXITSTATUS(status))
+ _exit(EXIT_FAILURE);
+
+ if (waitpid(pid2, &status, 0) != pid2 || !WIFEXITED(status) ||
+ WEXITSTATUS(status))
+ _exit(EXIT_FAILURE);
+
+ exit(EXIT_SUCCESS);
+ }
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /*
+ * The seccomp filter has become unused so we should be notified once
+ * the kernel gets around to cleaning up task struct.
+ */
+ pollfd.fd = 200;
+ pollfd.events = POLLHUP;
+
+ EXPECT_GT(poll(&pollfd, 1, 2000), 0);
+ EXPECT_GT((pollfd.revents & POLLHUP) ?: 0, 0);
+}
+
+
+int get_next_fd(int prev_fd)
+{
+ for (int i = prev_fd + 1; i < FD_SETSIZE; ++i) {
+ if (fcntl(i, F_GETFD) == -1)
+ return i;
+ }
+ _exit(EXIT_FAILURE);
+}
+
+TEST(user_notification_addfd)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd, fd, nextfd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif_addfd_small small = {};
+ struct seccomp_notif_addfd_big big = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ /* There may be arbitrary already-open fds at test start. */
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+ nextfd = get_next_fd(memfd);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* fd: 4 */
+ /* Check that the basic notification machinery works */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_EQ(listener, nextfd);
+ nextfd = get_next_fd(nextfd);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* fds will be added and this value is expected */
+ if (syscall(__NR_getppid) != USER_NOTIF_MAGIC)
+ exit(1);
+
+ /* Atomic addfd+send is received here. Check it is a valid fd */
+ if (fcntl(syscall(__NR_getppid), F_GETFD) == -1)
+ exit(1);
+
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+ }
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ addfd.srcfd = memfd;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0x0;
+
+ /* Verify bad newfd_flags cannot be set */
+ addfd.newfd_flags = ~O_CLOEXEC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.newfd_flags = O_CLOEXEC;
+
+ /* Verify bad flags cannot be set */
+ addfd.flags = 0xff;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.flags = 0;
+
+ /* Verify that remote_fd cannot be set without setting flags */
+ addfd.newfd = 1;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EINVAL);
+ addfd.newfd = 0;
+
+ /* Verify small size cannot be set */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_SMALL, &small), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* Verify we can't send bits filled in unknown buffer area */
+ memset(&big, 0xAA, sizeof(big));
+ big.addfd = addfd;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big), -1);
+ EXPECT_EQ(errno, E2BIG);
+
+
+ /* Verify we can set an arbitrary remote fd */
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /* Verify we can set an arbitrary remote fd with large size */
+ memset(&big, 0x0, sizeof(big));
+ big.addfd = addfd;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD_BIG, &big);
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
+
+ /* Verify we can set a specific remote fd */
+ addfd.newfd = 42;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ EXPECT_EQ(fd, 42);
+ EXPECT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /* Resume syscall */
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
+ /* Verify we can do an atomic addfd and send */
+ addfd.newfd = 0;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ fd = ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd);
+ /*
+ * Child has earlier "low" fds and now 42, so we expect the next
+ * lowest available fd to be assigned here.
+ */
+ EXPECT_EQ(fd, nextfd);
+ nextfd = get_next_fd(nextfd);
+ ASSERT_EQ(filecmp(getpid(), pid, memfd, fd), 0);
+
+ /*
+ * This sets the ID of the ADD FD to the last request plus 1. The
+ * notification ID increments 1 per notification.
+ */
+ addfd.id = req.id + 1;
+
+ /* This spins until the underlying notification is generated */
+ while (ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd) != -1 &&
+ errno != -EINPROGRESS)
+ nanosleep(&delay, NULL);
+
+ memset(&req, 0, sizeof(req));
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ ASSERT_EQ(addfd.id, req.id);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /* Wait for child to finish. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
+TEST(user_notification_addfd_rlimit)
+{
+ pid_t pid;
+ long ret;
+ int status, listener, memfd;
+ struct seccomp_notif_addfd addfd = {};
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ const struct rlimit lim = {
+ .rlim_cur = 0,
+ .rlim_max = 0,
+ };
+
+ memfd = memfd_create("test", 0);
+ ASSERT_GE(memfd, 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Check that the basic notification machinery works */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0)
+ exit(syscall(__NR_getppid) != USER_NOTIF_MAGIC);
+
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(prlimit(pid, RLIMIT_NOFILE, &lim, NULL), 0);
+
+ addfd.srcfd = memfd;
+ addfd.newfd_flags = O_CLOEXEC;
+ addfd.newfd = 0;
+ addfd.id = req.id;
+ addfd.flags = 0;
+
+ /* Should probably spot check /proc/sys/fs/file-nr */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
+ addfd.flags = SECCOMP_ADDFD_FLAG_SEND;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EMFILE);
+
+ addfd.newfd = 100;
+ addfd.flags = SECCOMP_ADDFD_FLAG_SETFD;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), -1);
+ EXPECT_EQ(errno, EBADF);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /* Wait for child to finish. */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ close(memfd);
+}
+
+#ifndef SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP
+#define SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP (1UL << 0)
+#define SECCOMP_IOCTL_NOTIF_SET_FLAGS SECCOMP_IOW(4, __u64)
+#endif
+
+TEST(user_notification_sync)
+{
+ struct seccomp_notif req = {};
+ struct seccomp_notif_resp resp = {};
+ int status, listener;
+ pid_t pid;
+ long ret;
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ /* Try to set invalid flags. */
+ EXPECT_SYSCALL_RETURN(-EINVAL,
+ ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS, 0xffffffff, 0));
+
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SET_FLAGS,
+ SECCOMP_USER_NOTIF_FD_SYNC_WAKE_UP, 0), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ ASSERT_EQ(ret, USER_NOTIF_MAGIC) {
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ req.pid = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+
+ ASSERT_EQ(req.data.nr, __NR_getppid);
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+ resp.flags = 0;
+ ASSERT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ ASSERT_EQ(waitpid(pid, &status, 0), pid);
+ ASSERT_EQ(status, 0);
+}
+
+
+/* Make sure PTRACE_O_SUSPEND_SECCOMP requires CAP_SYS_ADMIN. */
+FIXTURE(O_SUSPEND_SECCOMP) {
+ pid_t pid;
+};
+
+FIXTURE_SETUP(O_SUSPEND_SECCOMP)
+{
+ ERRNO_FILTER(block_read, E2BIG);
+ cap_value_t cap_list[] = { CAP_SYS_ADMIN };
+ cap_t caps;
+
+ self->pid = 0;
+
+ /* make sure we don't have CAP_SYS_ADMIN */
+ caps = cap_get_proc();
+ ASSERT_NE(NULL, caps);
+ ASSERT_EQ(0, cap_set_flag(caps, CAP_EFFECTIVE, 1, cap_list, CAP_CLEAR));
+ ASSERT_EQ(0, cap_set_proc(caps));
+ cap_free(caps);
+
+ ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog_block_read));
+
+ self->pid = fork();
+ ASSERT_GE(self->pid, 0);
+
+ if (self->pid == 0) {
+ while (1)
+ pause();
+ _exit(127);
+ }
+}
+
+FIXTURE_TEARDOWN(O_SUSPEND_SECCOMP)
+{
+ if (self->pid)
+ kill(self->pid, SIGKILL);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, setoptions)
+{
+ int wstatus;
+
+ ASSERT_EQ(0, ptrace(PTRACE_ATTACH, self->pid, NULL, 0));
+ ASSERT_EQ(self->pid, wait(&wstatus));
+ ASSERT_EQ(-1, ptrace(PTRACE_SETOPTIONS, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP));
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+TEST_F(O_SUSPEND_SECCOMP, seize)
+{
+ int ret;
+
+ ret = ptrace(PTRACE_SEIZE, self->pid, NULL, PTRACE_O_SUSPEND_SECCOMP);
+ ASSERT_EQ(-1, ret);
+ if (errno == EINVAL)
+ SKIP(return, "Kernel does not support PTRACE_O_SUSPEND_SECCOMP (missing CONFIG_CHECKPOINT_RESTORE?)");
+ ASSERT_EQ(EPERM, errno);
+}
+
+/*
+ * get_nth - Get the nth, space separated entry in a file.
+ *
+ * Returns the length of the read field.
+ * Throws error if field is zero-lengthed.
+ */
+static ssize_t get_nth(struct __test_metadata *_metadata, const char *path,
+ const unsigned int position, char **entry)
+{
+ char *line = NULL;
+ unsigned int i;
+ ssize_t nread;
+ size_t len = 0;
+ FILE *f;
+
+ f = fopen(path, "r");
+ ASSERT_NE(f, NULL) {
+ TH_LOG("Could not open %s: %s", path, strerror(errno));
+ }
+
+ for (i = 0; i < position; i++) {
+ nread = getdelim(&line, &len, ' ', f);
+ ASSERT_GE(nread, 0) {
+ TH_LOG("Failed to read %d entry in file %s", i, path);
+ }
+ }
+ fclose(f);
+
+ ASSERT_GT(nread, 0) {
+ TH_LOG("Entry in file %s had zero length", path);
+ }
+
+ *entry = line;
+ return nread - 1;
+}
+
+/* For a given PID, get the task state (D, R, etc...) */
+static char get_proc_stat(struct __test_metadata *_metadata, pid_t pid)
+{
+ char proc_path[100] = {0};
+ char status;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/stat", pid);
+ ASSERT_EQ(get_nth(_metadata, proc_path, 3, &line), 1);
+
+ status = *line;
+ free(line);
+
+ return status;
+}
+
+TEST(user_notification_fifo)
+{
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int i, status, listener;
+ pid_t pid, pids[3];
+ __u64 baseid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ /* Setup a listener */
+ listener = user_notif_syscall(__NR_getppid,
+ SECCOMP_FILTER_FLAG_NEW_LISTENER);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ baseid = req.id + 1;
+
+ resp.id = req.id;
+ resp.error = 0;
+ resp.val = USER_NOTIF_MAGIC;
+
+ /* check that we make sure flags == 0 */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ /* Start children, and generate notifications */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ pid = fork();
+ if (pid == 0) {
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+ pids[i] = pid;
+ }
+
+ /* This spins until all of the children are sleeping */
+restart_wait:
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ if (get_proc_stat(_metadata, pids[i]) != 'S') {
+ nanosleep(&delay, NULL);
+ goto restart_wait;
+ }
+ }
+
+ /* Read the notifications in order (and respond) */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ memset(&req, 0, sizeof(req));
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ EXPECT_EQ(req.id, baseid + i);
+ resp.id = req.id;
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+ }
+
+ /* Make sure notifications were received */
+ for (i = 0; i < ARRAY_SIZE(pids); i++) {
+ EXPECT_EQ(waitpid(pids[i], &status, 0), pids[i]);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+ }
+}
+
+/* get_proc_syscall - Get the syscall in progress for a given pid
+ *
+ * Returns the current syscall number for a given process
+ * Returns -1 if not in syscall (running or blocked)
+ */
+static long get_proc_syscall(struct __test_metadata *_metadata, int pid)
+{
+ char proc_path[100] = {0};
+ long ret = -1;
+ ssize_t nread;
+ char *line;
+
+ snprintf(proc_path, sizeof(proc_path), "/proc/%d/syscall", pid);
+ nread = get_nth(_metadata, proc_path, 1, &line);
+ ASSERT_GT(nread, 0);
+
+ if (!strncmp("running", line, MIN(7, nread)))
+ ret = strtol(line, NULL, 16);
+
+ free(line);
+ return ret;
+}
+
+/* Ensure non-fatal signals prior to receive are unmodified */
+TEST(user_notification_wait_killable_pre_notification)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ /*
+ * Check that we can kill the process with SIGUSR1 prior to receiving
+ * the notification. SIGUSR1 is wired up to a custom signal handler,
+ * and make sure it gets called.
+ */
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the non-fatal sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ ret = syscall(__NR_getppid);
+ /* Make sure we got a return from a signal interruption */
+ exit(ret != -1 || errno != EINTR);
+ }
+
+ /*
+ * Make sure we've gotten to the seccomp user notification wait
+ * from getppid prior to sending any signals
+ */
+ while (get_proc_syscall(_metadata, pid) != __NR_getppid &&
+ get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /* wait for process to exit (exit checks for EINTR) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+}
+
+/* Ensure non-fatal signals after receive are blocked */
+TEST(user_notification_wait_killable)
+{
+ struct sigaction new_action = {
+ .sa_handler = signal_handler,
+ };
+ struct seccomp_notif_resp resp = {};
+ struct seccomp_notif req = {};
+ int listener, status, sk_pair[2];
+ pid_t pid;
+ long ret;
+ char c;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ASSERT_EQ(sigemptyset(&new_action.sa_mask), 0);
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ ASSERT_EQ(socketpair(PF_LOCAL, SOCK_SEQPACKET, 0, sk_pair), 0);
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ close(sk_pair[0]);
+ handled = sk_pair[1];
+
+ /* Setup the sigaction without SA_RESTART */
+ if (sigaction(SIGUSR1, &new_action, NULL)) {
+ perror("sigaction");
+ exit(1);
+ }
+
+ /* Make sure that the syscall is completed (no EINTR) */
+ ret = syscall(__NR_getppid);
+ exit(ret != USER_NOTIF_MAGIC);
+ }
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Send non-fatal kill signal */
+ EXPECT_EQ(kill(pid, SIGUSR1), 0);
+
+ /*
+ * Make sure the task enters moves to TASK_KILLABLE by waiting for
+ * D (Disk Sleep) state after receiving non-fatal signal.
+ */
+ while (get_proc_stat(_metadata, pid) != 'D')
+ nanosleep(&delay, NULL);
+
+ resp.id = req.id;
+ resp.val = USER_NOTIF_MAGIC;
+ /* Make sure the notification is found and able to be replied to */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_SEND, &resp), 0);
+
+ /*
+ * Make sure that the signal handler does get called once we're back in
+ * userspace.
+ */
+ EXPECT_EQ(read(sk_pair[0], &c, 1), 1);
+ /* wait for process to exit (exit checks for USER_NOTIF_MAGIC) */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFEXITED(status));
+ EXPECT_EQ(0, WEXITSTATUS(status));
+}
+
+/* Ensure fatal signals after receive are not blocked */
+TEST(user_notification_wait_killable_fatal)
+{
+ struct seccomp_notif req = {};
+ int listener, status;
+ pid_t pid;
+ long ret;
+ /* 100 ms */
+ struct timespec delay = { .tv_nsec = 100000000 };
+
+ ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0);
+ ASSERT_EQ(0, ret)
+ {
+ TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!");
+ }
+
+ listener = user_notif_syscall(
+ __NR_getppid, SECCOMP_FILTER_FLAG_NEW_LISTENER |
+ SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV);
+ ASSERT_GE(listener, 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ /* This should never complete as it should get a SIGTERM */
+ syscall(__NR_getppid);
+ exit(1);
+ }
+
+ while (get_proc_stat(_metadata, pid) != 'S')
+ nanosleep(&delay, NULL);
+
+ /*
+ * Get the notification, to make move the notifying process into a
+ * non-preemptible (TASK_KILLABLE) state.
+ */
+ EXPECT_EQ(ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req), 0);
+ /* Kill the process with a fatal signal */
+ EXPECT_EQ(kill(pid, SIGTERM), 0);
+
+ /*
+ * Wait for the process to exit, and make sure the process terminated
+ * due to the SIGTERM signal.
+ */
+ EXPECT_EQ(waitpid(pid, &status, 0), pid);
+ EXPECT_EQ(true, WIFSIGNALED(status));
+ EXPECT_EQ(SIGTERM, WTERMSIG(status));
+}
+
/*
* TODO:
- * - add microbenchmarks
* - expand NNP testing
* - better arch-specific TRACE and TRAP handlers.
* - endianness checking when appropriate
@@ -3697,7 +4764,6 @@ skip:
* - arch value testing (x86 modes especially)
* - verify that FILTER_FLAG_LOG filters generate log messages
* - verify that RET_LOG generates log messages
- * - ...
*/
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/seccomp/settings b/tools/testing/selftests/seccomp/settings
new file mode 100644
index 000000000000..6091b45d226b
--- /dev/null
+++ b/tools/testing/selftests/seccomp/settings
@@ -0,0 +1 @@
+timeout=120
diff --git a/tools/testing/selftests/sgx/.gitignore b/tools/testing/selftests/sgx/.gitignore
new file mode 100644
index 000000000000..fbaf0bda9a92
--- /dev/null
+++ b/tools/testing/selftests/sgx/.gitignore
@@ -0,0 +1,2 @@
+test_sgx
+test_encl.elf
diff --git a/tools/testing/selftests/sgx/Makefile b/tools/testing/selftests/sgx/Makefile
new file mode 100644
index 000000000000..867f88ce2570
--- /dev/null
+++ b/tools/testing/selftests/sgx/Makefile
@@ -0,0 +1,60 @@
+top_srcdir = ../../../..
+
+include ../lib.mk
+
+.PHONY: all clean
+
+CAN_BUILD_X86_64 := $(shell ../x86/check_cc.sh "$(CC)" \
+ ../x86/trivial_64bit_program.c)
+
+ifndef OBJCOPY
+OBJCOPY := $(CROSS_COMPILE)objcopy
+endif
+
+INCLUDES := -I$(top_srcdir)/tools/include
+HOST_CFLAGS := -Wall -Werror -g $(INCLUDES) -fPIC
+HOST_LDFLAGS := -z noexecstack -lcrypto
+ENCL_CFLAGS += -Wall -Werror -static-pie -nostdlib -ffreestanding -fPIE \
+ -fno-stack-protector -mrdrnd $(INCLUDES)
+ENCL_LDFLAGS := -Wl,-T,test_encl.lds,--build-id=none
+
+ifeq ($(CAN_BUILD_X86_64), 1)
+TEST_CUSTOM_PROGS := $(OUTPUT)/test_sgx
+TEST_FILES := $(OUTPUT)/test_encl.elf
+
+all: $(TEST_CUSTOM_PROGS) $(OUTPUT)/test_encl.elf
+endif
+
+$(OUTPUT)/test_sgx: $(OUTPUT)/main.o \
+ $(OUTPUT)/load.o \
+ $(OUTPUT)/sigstruct.o \
+ $(OUTPUT)/call.o \
+ $(OUTPUT)/sign_key.o
+ $(CC) $(HOST_CFLAGS) -o $@ $^ $(HOST_LDFLAGS)
+
+$(OUTPUT)/main.o: main.c
+ $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/load.o: load.c
+ $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/sigstruct.o: sigstruct.c
+ $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/call.o: call.S
+ $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/sign_key.o: sign_key.S
+ $(CC) $(HOST_CFLAGS) -c $< -o $@
+
+$(OUTPUT)/test_encl.elf: test_encl.c test_encl_bootstrap.S
+ $(CC) $(ENCL_CFLAGS) $^ -o $@ $(ENCL_LDFLAGS)
+
+EXTRA_CLEAN := \
+ $(OUTPUT)/test_encl.elf \
+ $(OUTPUT)/load.o \
+ $(OUTPUT)/call.o \
+ $(OUTPUT)/main.o \
+ $(OUTPUT)/sigstruct.o \
+ $(OUTPUT)/test_sgx \
+ $(OUTPUT)/test_sgx.o \
diff --git a/tools/testing/selftests/sgx/call.S b/tools/testing/selftests/sgx/call.S
new file mode 100644
index 000000000000..b09a25890f3b
--- /dev/null
+++ b/tools/testing/selftests/sgx/call.S
@@ -0,0 +1,44 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+ .text
+
+ .global sgx_enter_enclave
+sgx_enter_enclave:
+ .cfi_startproc
+ push %r15
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %r15, 0
+ push %r14
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %r14, 0
+ push %r13
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %r13, 0
+ push %r12
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %r12, 0
+ push %rbx
+ .cfi_adjust_cfa_offset 8
+ .cfi_rel_offset %rbx, 0
+ push $0
+ .cfi_adjust_cfa_offset 8
+ push 0x38(%rsp)
+ .cfi_adjust_cfa_offset 8
+ call *vdso_sgx_enter_enclave(%rip)
+ add $0x10, %rsp
+ .cfi_adjust_cfa_offset -0x10
+ pop %rbx
+ .cfi_adjust_cfa_offset -8
+ pop %r12
+ .cfi_adjust_cfa_offset -8
+ pop %r13
+ .cfi_adjust_cfa_offset -8
+ pop %r14
+ .cfi_adjust_cfa_offset -8
+ pop %r15
+ .cfi_adjust_cfa_offset -8
+ ret
+ .cfi_endproc
diff --git a/tools/testing/selftests/sgx/defines.h b/tools/testing/selftests/sgx/defines.h
new file mode 100644
index 000000000000..402f8787a71c
--- /dev/null
+++ b/tools/testing/selftests/sgx/defines.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef DEFINES_H
+#define DEFINES_H
+
+#include <stdint.h>
+
+#define PAGE_SIZE 4096
+#define PAGE_MASK (~(PAGE_SIZE - 1))
+
+#define __aligned(x) __attribute__((__aligned__(x)))
+#define __packed __attribute__((packed))
+#define __used __attribute__((used))
+#define __section(x)__attribute__((__section__(x)))
+
+#include "../../../../arch/x86/include/asm/sgx.h"
+#include "../../../../arch/x86/include/asm/enclu.h"
+#include "../../../../arch/x86/include/uapi/asm/sgx.h"
+
+enum encl_op_type {
+ ENCL_OP_PUT_TO_BUFFER,
+ ENCL_OP_GET_FROM_BUFFER,
+ ENCL_OP_PUT_TO_ADDRESS,
+ ENCL_OP_GET_FROM_ADDRESS,
+ ENCL_OP_NOP,
+ ENCL_OP_EACCEPT,
+ ENCL_OP_EMODPE,
+ ENCL_OP_INIT_TCS_PAGE,
+ ENCL_OP_MAX,
+};
+
+struct encl_op_header {
+ uint64_t type;
+};
+
+struct encl_op_put_to_buf {
+ struct encl_op_header header;
+ uint64_t value;
+};
+
+struct encl_op_get_from_buf {
+ struct encl_op_header header;
+ uint64_t value;
+};
+
+struct encl_op_put_to_addr {
+ struct encl_op_header header;
+ uint64_t value;
+ uint64_t addr;
+};
+
+struct encl_op_get_from_addr {
+ struct encl_op_header header;
+ uint64_t value;
+ uint64_t addr;
+};
+
+struct encl_op_eaccept {
+ struct encl_op_header header;
+ uint64_t epc_addr;
+ uint64_t flags;
+ uint64_t ret;
+};
+
+struct encl_op_emodpe {
+ struct encl_op_header header;
+ uint64_t epc_addr;
+ uint64_t flags;
+};
+
+struct encl_op_init_tcs_page {
+ struct encl_op_header header;
+ uint64_t tcs_page;
+ uint64_t ssa;
+ uint64_t entry;
+};
+
+#endif /* DEFINES_H */
diff --git a/tools/testing/selftests/sgx/load.c b/tools/testing/selftests/sgx/load.c
new file mode 100644
index 000000000000..c9f658e44de6
--- /dev/null
+++ b/tools/testing/selftests/sgx/load.c
@@ -0,0 +1,370 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-20 Intel Corporation. */
+
+#include <assert.h>
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include "defines.h"
+#include "main.h"
+
+void encl_delete(struct encl *encl)
+{
+ struct encl_segment *heap_seg;
+
+ if (encl->encl_base)
+ munmap((void *)encl->encl_base, encl->encl_size);
+
+ if (encl->bin)
+ munmap(encl->bin, encl->bin_size);
+
+ if (encl->fd)
+ close(encl->fd);
+
+ if (encl->segment_tbl) {
+ heap_seg = &encl->segment_tbl[encl->nr_segments - 1];
+ munmap(heap_seg->src, heap_seg->size);
+ free(encl->segment_tbl);
+ }
+
+ memset(encl, 0, sizeof(*encl));
+}
+
+static bool encl_map_bin(const char *path, struct encl *encl)
+{
+ struct stat sb;
+ void *bin;
+ int ret;
+ int fd;
+
+ fd = open(path, O_RDONLY);
+ if (fd == -1) {
+ perror("enclave executable open()");
+ return false;
+ }
+
+ ret = stat(path, &sb);
+ if (ret) {
+ perror("enclave executable stat()");
+ goto err;
+ }
+
+ bin = mmap(NULL, sb.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
+ if (bin == MAP_FAILED) {
+ perror("enclave executable mmap()");
+ goto err;
+ }
+
+ encl->bin = bin;
+ encl->bin_size = sb.st_size;
+
+ close(fd);
+ return true;
+
+err:
+ close(fd);
+ return false;
+}
+
+static bool encl_ioc_create(struct encl *encl)
+{
+ struct sgx_secs *secs = &encl->secs;
+ struct sgx_enclave_create ioc;
+ int rc;
+
+ assert(encl->encl_base != 0);
+
+ memset(secs, 0, sizeof(*secs));
+ secs->ssa_frame_size = 1;
+ secs->attributes = SGX_ATTR_MODE64BIT;
+ secs->xfrm = 3;
+ secs->base = encl->encl_base;
+ secs->size = encl->encl_size;
+
+ ioc.src = (unsigned long)secs;
+ rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_CREATE, &ioc);
+ if (rc) {
+ perror("SGX_IOC_ENCLAVE_CREATE failed");
+ munmap((void *)secs->base, encl->encl_size);
+ return false;
+ }
+
+ return true;
+}
+
+static bool encl_ioc_add_pages(struct encl *encl, struct encl_segment *seg)
+{
+ struct sgx_enclave_add_pages ioc;
+ struct sgx_secinfo secinfo;
+ int rc;
+
+ memset(&secinfo, 0, sizeof(secinfo));
+ secinfo.flags = seg->flags;
+
+ ioc.src = (uint64_t)seg->src;
+ ioc.offset = seg->offset;
+ ioc.length = seg->size;
+ ioc.secinfo = (unsigned long)&secinfo;
+ if (seg->measure)
+ ioc.flags = SGX_PAGE_MEASURE;
+ else
+ ioc.flags = 0;
+
+ rc = ioctl(encl->fd, SGX_IOC_ENCLAVE_ADD_PAGES, &ioc);
+ if (rc < 0) {
+ perror("SGX_IOC_ENCLAVE_ADD_PAGES failed");
+ return false;
+ }
+
+ return true;
+}
+
+/*
+ * Parse the enclave code's symbol table to locate and return address of
+ * the provided symbol
+ */
+uint64_t encl_get_entry(struct encl *encl, const char *symbol)
+{
+ Elf64_Sym *symtab = NULL;
+ char *sym_names = NULL;
+ Elf64_Shdr *sections;
+ Elf64_Ehdr *ehdr;
+ int num_sym = 0;
+ int i;
+
+ ehdr = encl->bin;
+ sections = encl->bin + ehdr->e_shoff;
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sections[i].sh_type == SHT_SYMTAB) {
+ symtab = (Elf64_Sym *)((char *)encl->bin + sections[i].sh_offset);
+ num_sym = sections[i].sh_size / sections[i].sh_entsize;
+ break;
+ }
+ }
+
+ for (i = 0; i < ehdr->e_shnum; i++) {
+ if (sections[i].sh_type == SHT_STRTAB) {
+ sym_names = (char *)encl->bin + sections[i].sh_offset;
+ break;
+ }
+ }
+
+ if (!symtab || !sym_names)
+ return 0;
+
+ for (i = 0; i < num_sym; i++) {
+ Elf64_Sym *sym = &symtab[i];
+
+ if (!strcmp(symbol, sym_names + sym->st_name))
+ return (uint64_t)sym->st_value;
+ }
+
+ return 0;
+}
+
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size)
+{
+ const char device_path[] = "/dev/sgx_enclave";
+ struct encl_segment *seg;
+ Elf64_Phdr *phdr_tbl;
+ off_t src_offset;
+ Elf64_Ehdr *ehdr;
+ struct stat sb;
+ void *ptr;
+ int i, j;
+ int ret;
+ int fd = -1;
+
+ memset(encl, 0, sizeof(*encl));
+
+ fd = open(device_path, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open /dev/sgx_enclave");
+ goto err;
+ }
+
+ ret = stat(device_path, &sb);
+ if (ret) {
+ perror("device file stat()");
+ goto err;
+ }
+
+ ptr = mmap(NULL, PAGE_SIZE, PROT_READ, MAP_SHARED, fd, 0);
+ if (ptr == (void *)-1) {
+ perror("mmap for read");
+ goto err;
+ }
+ munmap(ptr, PAGE_SIZE);
+
+#define ERR_MSG \
+"mmap() succeeded for PROT_READ, but failed for PROT_EXEC.\n" \
+" Check that /dev does not have noexec set:\n" \
+" \tmount | grep \"/dev .*noexec\"\n" \
+" If so, remount it executable: mount -o remount,exec /dev\n\n"
+
+ ptr = mmap(NULL, PAGE_SIZE, PROT_EXEC, MAP_SHARED, fd, 0);
+ if (ptr == (void *)-1) {
+ fprintf(stderr, ERR_MSG);
+ goto err;
+ }
+ munmap(ptr, PAGE_SIZE);
+
+ encl->fd = fd;
+
+ if (!encl_map_bin(path, encl))
+ goto err;
+
+ ehdr = encl->bin;
+ phdr_tbl = encl->bin + ehdr->e_phoff;
+
+ encl->nr_segments = 1; /* one for the heap */
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ Elf64_Phdr *phdr = &phdr_tbl[i];
+
+ if (phdr->p_type == PT_LOAD)
+ encl->nr_segments++;
+ }
+
+ encl->segment_tbl = calloc(encl->nr_segments,
+ sizeof(struct encl_segment));
+ if (!encl->segment_tbl)
+ goto err;
+
+ for (i = 0, j = 0; i < ehdr->e_phnum; i++) {
+ Elf64_Phdr *phdr = &phdr_tbl[i];
+ unsigned int flags = phdr->p_flags;
+
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ seg = &encl->segment_tbl[j];
+
+ if (!!(flags & ~(PF_R | PF_W | PF_X))) {
+ fprintf(stderr,
+ "%d has invalid segment flags 0x%02x.\n", i,
+ phdr->p_flags);
+ goto err;
+ }
+
+ if (j == 0 && flags != (PF_R | PF_W)) {
+ fprintf(stderr,
+ "TCS has invalid segment flags 0x%02x.\n",
+ phdr->p_flags);
+ goto err;
+ }
+
+ if (j == 0) {
+ src_offset = phdr->p_offset & PAGE_MASK;
+ encl->src = encl->bin + src_offset;
+
+ seg->prot = PROT_READ | PROT_WRITE;
+ seg->flags = SGX_PAGE_TYPE_TCS << 8;
+ } else {
+ seg->prot = (phdr->p_flags & PF_R) ? PROT_READ : 0;
+ seg->prot |= (phdr->p_flags & PF_W) ? PROT_WRITE : 0;
+ seg->prot |= (phdr->p_flags & PF_X) ? PROT_EXEC : 0;
+ seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+ }
+
+ seg->offset = (phdr->p_offset & PAGE_MASK) - src_offset;
+ seg->size = (phdr->p_filesz + PAGE_SIZE - 1) & PAGE_MASK;
+ seg->src = encl->src + seg->offset;
+ seg->measure = true;
+
+ j++;
+ }
+
+ assert(j == encl->nr_segments - 1);
+
+ seg = &encl->segment_tbl[j];
+ seg->offset = encl->segment_tbl[j - 1].offset + encl->segment_tbl[j - 1].size;
+ seg->size = heap_size;
+ seg->src = mmap(NULL, heap_size, PROT_READ | PROT_WRITE,
+ MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
+ seg->prot = PROT_READ | PROT_WRITE;
+ seg->flags = (SGX_PAGE_TYPE_REG << 8) | seg->prot;
+ seg->measure = false;
+
+ if (seg->src == MAP_FAILED)
+ goto err;
+
+ encl->src_size = encl->segment_tbl[j].offset + encl->segment_tbl[j].size;
+
+ for (encl->encl_size = 4096; encl->encl_size < encl->src_size; )
+ encl->encl_size <<= 1;
+
+ return true;
+
+err:
+ if (fd != -1)
+ close(fd);
+ encl_delete(encl);
+ return false;
+}
+
+static bool encl_map_area(struct encl *encl)
+{
+ size_t encl_size = encl->encl_size;
+ void *area;
+
+ area = mmap(NULL, encl_size * 2, PROT_NONE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (area == MAP_FAILED) {
+ perror("reservation mmap()");
+ return false;
+ }
+
+ encl->encl_base = ((uint64_t)area + encl_size - 1) & ~(encl_size - 1);
+
+ munmap(area, encl->encl_base - (uint64_t)area);
+ munmap((void *)(encl->encl_base + encl_size),
+ (uint64_t)area + encl_size - encl->encl_base);
+
+ return true;
+}
+
+bool encl_build(struct encl *encl)
+{
+ struct sgx_enclave_init ioc;
+ int ret;
+ int i;
+
+ if (!encl_map_area(encl))
+ return false;
+
+ if (!encl_ioc_create(encl))
+ return false;
+
+ /*
+ * Pages must be added before mapping VMAs because their permissions
+ * cap the VMA permissions.
+ */
+ for (i = 0; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ if (!encl_ioc_add_pages(encl, seg))
+ return false;
+ }
+
+ ioc.sigstruct = (uint64_t)&encl->sigstruct;
+ ret = ioctl(encl->fd, SGX_IOC_ENCLAVE_INIT, &ioc);
+ if (ret) {
+ perror("SGX_IOC_ENCLAVE_INIT failed");
+ return false;
+ }
+
+ return true;
+}
diff --git a/tools/testing/selftests/sgx/main.c b/tools/testing/selftests/sgx/main.c
new file mode 100644
index 000000000000..9820b3809c69
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.c
@@ -0,0 +1,1993 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-20 Intel Corporation. */
+
+#include <cpuid.h>
+#include <elf.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/auxv.h>
+#include "defines.h"
+#include "../kselftest_harness.h"
+#include "main.h"
+
+static const uint64_t MAGIC = 0x1122334455667788ULL;
+static const uint64_t MAGIC2 = 0x8877665544332211ULL;
+vdso_sgx_enter_enclave_t vdso_sgx_enter_enclave;
+
+/*
+ * Security Information (SECINFO) data structure needed by a few SGX
+ * instructions (eg. ENCLU[EACCEPT] and ENCLU[EMODPE]) holds meta-data
+ * about an enclave page. &enum sgx_secinfo_page_state specifies the
+ * secinfo flags used for page state.
+ */
+enum sgx_secinfo_page_state {
+ SGX_SECINFO_PENDING = (1 << 3),
+ SGX_SECINFO_MODIFIED = (1 << 4),
+ SGX_SECINFO_PR = (1 << 5),
+};
+
+struct vdso_symtab {
+ Elf64_Sym *elf_symtab;
+ const char *elf_symstrtab;
+ Elf64_Word *elf_hashtab;
+};
+
+static Elf64_Dyn *vdso_get_dyntab(void *addr)
+{
+ Elf64_Ehdr *ehdr = addr;
+ Elf64_Phdr *phdrtab = addr + ehdr->e_phoff;
+ int i;
+
+ for (i = 0; i < ehdr->e_phnum; i++)
+ if (phdrtab[i].p_type == PT_DYNAMIC)
+ return addr + phdrtab[i].p_offset;
+
+ return NULL;
+}
+
+static void *vdso_get_dyn(void *addr, Elf64_Dyn *dyntab, Elf64_Sxword tag)
+{
+ int i;
+
+ for (i = 0; dyntab[i].d_tag != DT_NULL; i++)
+ if (dyntab[i].d_tag == tag)
+ return addr + dyntab[i].d_un.d_ptr;
+
+ return NULL;
+}
+
+static bool vdso_get_symtab(void *addr, struct vdso_symtab *symtab)
+{
+ Elf64_Dyn *dyntab = vdso_get_dyntab(addr);
+
+ symtab->elf_symtab = vdso_get_dyn(addr, dyntab, DT_SYMTAB);
+ if (!symtab->elf_symtab)
+ return false;
+
+ symtab->elf_symstrtab = vdso_get_dyn(addr, dyntab, DT_STRTAB);
+ if (!symtab->elf_symstrtab)
+ return false;
+
+ symtab->elf_hashtab = vdso_get_dyn(addr, dyntab, DT_HASH);
+ if (!symtab->elf_hashtab)
+ return false;
+
+ return true;
+}
+
+static inline int sgx2_supported(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid_count(SGX_CPUID, 0x0, eax, ebx, ecx, edx);
+
+ return eax & 0x2;
+}
+
+static unsigned long elf_sym_hash(const char *name)
+{
+ unsigned long h = 0, high;
+
+ while (*name) {
+ h = (h << 4) + *name++;
+ high = h & 0xf0000000;
+
+ if (high)
+ h ^= high >> 24;
+
+ h &= ~high;
+ }
+
+ return h;
+}
+
+static Elf64_Sym *vdso_symtab_get(struct vdso_symtab *symtab, const char *name)
+{
+ Elf64_Word bucketnum = symtab->elf_hashtab[0];
+ Elf64_Word *buckettab = &symtab->elf_hashtab[2];
+ Elf64_Word *chaintab = &symtab->elf_hashtab[2 + bucketnum];
+ Elf64_Sym *sym;
+ Elf64_Word i;
+
+ for (i = buckettab[elf_sym_hash(name) % bucketnum]; i != STN_UNDEF;
+ i = chaintab[i]) {
+ sym = &symtab->elf_symtab[i];
+ if (!strcmp(name, &symtab->elf_symstrtab[sym->st_name]))
+ return sym;
+ }
+
+ return NULL;
+}
+
+/*
+ * Return the offset in the enclave where the TCS segment can be found.
+ * The first RW segment loaded is the TCS.
+ */
+static off_t encl_get_tcs_offset(struct encl *encl)
+{
+ int i;
+
+ for (i = 0; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ if (i == 0 && seg->prot == (PROT_READ | PROT_WRITE))
+ return seg->offset;
+ }
+
+ return -1;
+}
+
+/*
+ * Return the offset in the enclave where the data segment can be found.
+ * The first RW segment loaded is the TCS, skip that to get info on the
+ * data segment.
+ */
+static off_t encl_get_data_offset(struct encl *encl)
+{
+ int i;
+
+ for (i = 1; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ if (seg->prot == (PROT_READ | PROT_WRITE))
+ return seg->offset;
+ }
+
+ return -1;
+}
+
+FIXTURE(enclave) {
+ struct encl encl;
+ struct sgx_enclave_run run;
+};
+
+static bool setup_test_encl(unsigned long heap_size, struct encl *encl,
+ struct __test_metadata *_metadata)
+{
+ Elf64_Sym *sgx_enter_enclave_sym = NULL;
+ struct vdso_symtab symtab;
+ struct encl_segment *seg;
+ char maps_line[256];
+ FILE *maps_file;
+ unsigned int i;
+ void *addr;
+
+ if (!encl_load("test_encl.elf", encl, heap_size)) {
+ encl_delete(encl);
+ TH_LOG("Failed to load the test enclave.");
+ return false;
+ }
+
+ if (!encl_measure(encl))
+ goto err;
+
+ if (!encl_build(encl))
+ goto err;
+
+ /*
+ * An enclave consumer only must do this.
+ */
+ for (i = 0; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ addr = mmap((void *)encl->encl_base + seg->offset, seg->size,
+ seg->prot, MAP_SHARED | MAP_FIXED, encl->fd, 0);
+ EXPECT_NE(addr, MAP_FAILED);
+ if (addr == MAP_FAILED)
+ goto err;
+ }
+
+ /* Get vDSO base address */
+ addr = (void *)getauxval(AT_SYSINFO_EHDR);
+ if (!addr)
+ goto err;
+
+ if (!vdso_get_symtab(addr, &symtab))
+ goto err;
+
+ sgx_enter_enclave_sym = vdso_symtab_get(&symtab, "__vdso_sgx_enter_enclave");
+ if (!sgx_enter_enclave_sym)
+ goto err;
+
+ vdso_sgx_enter_enclave = addr + sgx_enter_enclave_sym->st_value;
+
+ return true;
+
+err:
+ for (i = 0; i < encl->nr_segments; i++) {
+ seg = &encl->segment_tbl[i];
+
+ TH_LOG("0x%016lx 0x%016lx 0x%02x", seg->offset, seg->size, seg->prot);
+ }
+
+ maps_file = fopen("/proc/self/maps", "r");
+ if (maps_file != NULL) {
+ while (fgets(maps_line, sizeof(maps_line), maps_file) != NULL) {
+ maps_line[strlen(maps_line) - 1] = '\0';
+
+ if (strstr(maps_line, "/dev/sgx_enclave"))
+ TH_LOG("%s", maps_line);
+ }
+
+ fclose(maps_file);
+ }
+
+ TH_LOG("Failed to initialize the test enclave.");
+
+ encl_delete(encl);
+
+ return false;
+}
+
+FIXTURE_SETUP(enclave)
+{
+}
+
+FIXTURE_TEARDOWN(enclave)
+{
+ encl_delete(&self->encl);
+}
+
+#define ENCL_CALL(op, run, clobbered) \
+ ({ \
+ int ret; \
+ if ((clobbered)) \
+ ret = vdso_sgx_enter_enclave((unsigned long)(op), 0, 0, \
+ EENTER, 0, 0, (run)); \
+ else \
+ ret = sgx_enter_enclave((void *)(op), NULL, 0, EENTER, NULL, NULL, \
+ (run)); \
+ ret; \
+ })
+
+#define EXPECT_EEXIT(run) \
+ do { \
+ EXPECT_EQ((run)->function, EEXIT); \
+ if ((run)->function != EEXIT) \
+ TH_LOG("0x%02x 0x%02x 0x%016llx", (run)->exception_vector, \
+ (run)->exception_error_code, (run)->exception_addr); \
+ } while (0)
+
+TEST_F(enclave, unclobbered_vdso)
+{
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+/*
+ * A section metric is concatenated in a way that @low bits 12-31 define the
+ * bits 12-31 of the metric and @high bits 0-19 define the bits 32-51 of the
+ * metric.
+ */
+static unsigned long sgx_calc_section_metric(unsigned int low,
+ unsigned int high)
+{
+ return (low & GENMASK_ULL(31, 12)) +
+ ((high & GENMASK_ULL(19, 0)) << 32);
+}
+
+/*
+ * Sum total available physical SGX memory across all EPC sections
+ *
+ * Return: total available physical SGX memory available on system
+ */
+static unsigned long get_total_epc_mem(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+ unsigned long total_size = 0;
+ unsigned int type;
+ int section = 0;
+
+ while (true) {
+ __cpuid_count(SGX_CPUID, section + SGX_CPUID_EPC, eax, ebx, ecx, edx);
+
+ type = eax & SGX_CPUID_EPC_MASK;
+ if (type == SGX_CPUID_EPC_INVALID)
+ break;
+
+ if (type != SGX_CPUID_EPC_SECTION)
+ break;
+
+ total_size += sgx_calc_section_metric(ecx, edx);
+
+ section++;
+ }
+
+ return total_size;
+}
+
+TEST_F(enclave, unclobbered_vdso_oversubscribed)
+{
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+ unsigned long total_mem;
+
+ total_mem = get_total_epc_mem();
+ ASSERT_NE(total_mem, 0);
+ ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+TEST_F_TIMEOUT(enclave, unclobbered_vdso_oversubscribed_remove, 900)
+{
+ struct sgx_enclave_remove_pages remove_ioc;
+ struct sgx_enclave_modify_types modt_ioc;
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_eaccept eaccept_op;
+ struct encl_op_put_to_buf put_op;
+ struct encl_segment *heap;
+ unsigned long total_mem;
+ int ret, errno_save;
+ unsigned long addr;
+ unsigned long i;
+
+ /*
+ * Create enclave with additional heap that is as big as all
+ * available physical SGX memory.
+ */
+ total_mem = get_total_epc_mem();
+ ASSERT_NE(total_mem, 0);
+ TH_LOG("Creating an enclave with %lu bytes heap may take a while ...",
+ total_mem);
+ ASSERT_TRUE(setup_test_encl(total_mem, &self->encl, _metadata));
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /* SGX2 is supported by kernel and hardware, test can proceed. */
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ heap = &self->encl.segment_tbl[self->encl.nr_segments - 1];
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, false), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, false), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ /* Trim entire heap. */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+
+ modt_ioc.offset = heap->offset;
+ modt_ioc.length = heap->size;
+ modt_ioc.page_type = SGX_PAGE_TYPE_TRIM;
+
+ TH_LOG("Changing type of %zd bytes to trimmed may take a while ...",
+ heap->size);
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(modt_ioc.result, 0);
+ EXPECT_EQ(modt_ioc.count, heap->size);
+
+ /* EACCEPT all removed pages. */
+ addr = self->encl.encl_base + heap->offset;
+
+ eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ TH_LOG("Entering enclave to run EACCEPT for each page of %zd bytes may take a while ...",
+ heap->size);
+ for (i = 0; i < heap->size; i += 4096) {
+ eaccept_op.epc_addr = addr + i;
+ eaccept_op.ret = 0;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ ASSERT_EQ(eaccept_op.ret, 0);
+ ASSERT_EQ(self->run.function, EEXIT);
+ }
+
+ /* Complete page removal. */
+ memset(&remove_ioc, 0, sizeof(remove_ioc));
+
+ remove_ioc.offset = heap->offset;
+ remove_ioc.length = heap->size;
+
+ TH_LOG("Removing %zd bytes from enclave may take a while ...",
+ heap->size);
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(remove_ioc.count, heap->size);
+}
+
+TEST_F(enclave, clobbered_vdso)
+{
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+static int test_handler(long rdi, long rsi, long rdx, long ursp, long r8, long r9,
+ struct sgx_enclave_run *run)
+{
+ run->user_data = 0;
+
+ return 0;
+}
+
+TEST_F(enclave, clobbered_vdso_and_user_function)
+{
+ struct encl_op_get_from_buf get_op;
+ struct encl_op_put_to_buf put_op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ self->run.user_handler = (__u64)test_handler;
+ self->run.user_data = 0xdeadbeef;
+
+ put_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+
+ get_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+}
+
+/*
+ * Sanity check that it is possible to enter either of the two hardcoded TCS
+ */
+TEST_F(enclave, tcs_entry)
+{
+ struct encl_op_header op;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ op.type = ENCL_OP_NOP;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Move to the next TCS. */
+ self->run.tcs = self->encl.encl_base + PAGE_SIZE;
+
+ EXPECT_EQ(ENCL_CALL(&op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+}
+
+/*
+ * Second page of .data segment is used to test changing PTE permissions.
+ * This spans the local encl_buffer within the test enclave.
+ *
+ * 1) Start with a sanity check: a value is written to the target page within
+ * the enclave and read back to ensure target page can be written to.
+ * 2) Change PTE permissions (RW -> RO) of target page within enclave.
+ * 3) Repeat (1) - this time expecting a regular #PF communicated via the
+ * vDSO.
+ * 4) Change PTE permissions of target page within enclave back to be RW.
+ * 5) Repeat (1) by resuming enclave, now expected to be possible to write to
+ * and read from target page within enclave.
+ */
+TEST_F(enclave, pte_permissions)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ unsigned long data_start;
+ int ret;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) +
+ PAGE_SIZE;
+
+ /*
+ * Sanity check to ensure it is possible to write to page that will
+ * have its permissions manipulated.
+ */
+
+ /* Write MAGIC to page */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that it is the
+ * value previously written (MAGIC).
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Change PTE permissions of target page within the enclave */
+ ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ);
+ if (ret)
+ perror("mprotect");
+
+ /*
+ * PTE permissions of target page changed to read-only, EPCM
+ * permissions unchanged (EPCM permissions are RW), attempt to
+ * write to the page, expecting a regular #PF.
+ */
+
+ put_addr_op.value = MAGIC2;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_error_code, 0x7);
+ EXPECT_EQ(self->run.exception_addr, data_start);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /*
+ * Change PTE permissions back to enable enclave to write to the
+ * target page and resume enclave - do not expect any exceptions this
+ * time.
+ */
+ ret = mprotect((void *)data_start, PAGE_SIZE, PROT_READ | PROT_WRITE);
+ if (ret)
+ perror("mprotect");
+
+ EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0,
+ 0, ERESUME, 0, 0, &self->run),
+ 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ get_addr_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC2);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+}
+
+/*
+ * Modifying permissions of TCS page should not be possible.
+ */
+TEST_F(enclave, tcs_permissions)
+{
+ struct sgx_enclave_restrict_permissions ioc;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ memset(&ioc, 0, sizeof(ioc));
+
+ /*
+ * Ensure kernel supports needed ioctl() and system supports needed
+ * commands.
+ */
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, &ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ ASSERT_EQ(ret, -1);
+
+ /* ret == -1 */
+ if (errno_save == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS ioctl()");
+ else if (errno_save == ENODEV)
+ SKIP(return, "System does not support SGX2");
+
+ /*
+ * Attempt to make TCS page read-only. This is not allowed and
+ * should be prevented by the kernel.
+ */
+ ioc.offset = encl_get_tcs_offset(&self->encl);
+ ioc.length = PAGE_SIZE;
+ ioc.permissions = SGX_SECINFO_R;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS, &ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno_save, EINVAL);
+ EXPECT_EQ(ioc.result, 0);
+ EXPECT_EQ(ioc.count, 0);
+}
+
+/*
+ * Enclave page permission test.
+ *
+ * Modify and restore enclave page's EPCM (enclave) permissions from
+ * outside enclave (ENCLS[EMODPR] via kernel) as well as from within
+ * enclave (via ENCLU[EMODPE]). Check for page fault if
+ * VMA allows access but EPCM permissions do not.
+ */
+TEST_F(enclave, epcm_permissions)
+{
+ struct sgx_enclave_restrict_permissions restrict_ioc;
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ struct encl_op_eaccept eaccept_op;
+ struct encl_op_emodpe emodpe_op;
+ unsigned long data_start;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Ensure kernel supports needed ioctl() and system supports needed
+ * commands.
+ */
+ memset(&restrict_ioc, 0, sizeof(restrict_ioc));
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS,
+ &restrict_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ ASSERT_EQ(ret, -1);
+
+ /* ret == -1 */
+ if (errno_save == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS ioctl()");
+ else if (errno_save == ENODEV)
+ SKIP(return, "System does not support SGX2");
+
+ /*
+ * Page that will have its permissions changed is the second data
+ * page in the .data segment. This forms part of the local encl_buffer
+ * within the enclave.
+ *
+ * At start of test @data_start should have EPCM as well as PTE and
+ * VMA permissions of RW.
+ */
+
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) + PAGE_SIZE;
+
+ /*
+ * Sanity check that page at @data_start is writable before making
+ * any changes to page permissions.
+ *
+ * Start by writing MAGIC to test page.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that
+ * page is writable.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Change EPCM permissions to read-only. Kernel still considers
+ * the page writable.
+ */
+ memset(&restrict_ioc, 0, sizeof(restrict_ioc));
+
+ restrict_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ restrict_ioc.length = PAGE_SIZE;
+ restrict_ioc.permissions = SGX_SECINFO_R;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_RESTRICT_PERMISSIONS,
+ &restrict_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(restrict_ioc.result, 0);
+ EXPECT_EQ(restrict_ioc.count, 4096);
+
+ /*
+ * EPCM permissions changed from kernel, need to EACCEPT from enclave.
+ */
+ eaccept_op.epc_addr = data_start;
+ eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_REG | SGX_SECINFO_PR;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /*
+ * EPCM permissions of page is now read-only, expect #PF
+ * on EPCM when attempting to write to page from within enclave.
+ */
+ put_addr_op.value = MAGIC2;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(self->run.function, ERESUME);
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_error_code, 0x8007);
+ EXPECT_EQ(self->run.exception_addr, data_start);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /*
+ * Received AEX but cannot return to enclave at same entrypoint,
+ * need different TCS from where EPCM permission can be made writable
+ * again.
+ */
+ self->run.tcs = self->encl.encl_base + PAGE_SIZE;
+
+ /*
+ * Enter enclave at new TCS to change EPCM permissions to be
+ * writable again and thus fix the page fault that triggered the
+ * AEX.
+ */
+
+ emodpe_op.epc_addr = data_start;
+ emodpe_op.flags = SGX_SECINFO_R | SGX_SECINFO_W;
+ emodpe_op.header.type = ENCL_OP_EMODPE;
+
+ EXPECT_EQ(ENCL_CALL(&emodpe_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Attempt to return to main TCS to resume execution at faulting
+ * instruction, PTE should continue to allow writing to the page.
+ */
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Wrong page permissions that caused original fault has
+ * now been fixed via EPCM permissions.
+ * Resume execution in main TCS to re-attempt the memory access.
+ */
+ self->run.tcs = self->encl.encl_base;
+
+ EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, 0,
+ ERESUME, 0, 0,
+ &self->run),
+ 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ get_addr_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC2);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.user_data, 0);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+}
+
+/*
+ * Test the addition of pages to an initialized enclave via writing to
+ * a page belonging to the enclave's address space but was not added
+ * during enclave creation.
+ */
+TEST_F(enclave, augment)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ struct encl_op_eaccept eaccept_op;
+ size_t total_size = 0;
+ void *addr;
+ int i;
+
+ if (!sgx2_supported())
+ SKIP(return, "SGX2 not supported");
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ for (i = 0; i < self->encl.nr_segments; i++) {
+ struct encl_segment *seg = &self->encl.segment_tbl[i];
+
+ total_size += seg->size;
+ }
+
+ /*
+ * Actual enclave size is expected to be larger than the loaded
+ * test enclave since enclave size must be a power of 2 in bytes
+ * and test_encl does not consume it all.
+ */
+ EXPECT_LT(total_size + PAGE_SIZE, self->encl.encl_size);
+
+ /*
+ * Create memory mapping for the page that will be added. New
+ * memory mapping is for one page right after all existing
+ * mappings.
+ * Kernel will allow new mapping using any permissions if it
+ * falls into the enclave's address range but not backed
+ * by existing enclave pages.
+ */
+ addr = mmap((void *)self->encl.encl_base + total_size, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC,
+ MAP_SHARED | MAP_FIXED, self->encl.fd, 0);
+ EXPECT_NE(addr, MAP_FAILED);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /*
+ * Attempt to write to the new page from within enclave.
+ * Expected to fail since page is not (yet) part of the enclave.
+ * The first #PF will trigger the addition of the page to the
+ * enclave, but since the new page needs an EACCEPT from within the
+ * enclave before it can be used it would not be possible
+ * to successfully return to the failing instruction. This is the
+ * cause of the second #PF captured here having the SGX bit set,
+ * it is from hardware preventing the page from being used.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = (unsigned long)addr;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(self->run.function, ERESUME);
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_addr, (unsigned long)addr);
+
+ if (self->run.exception_error_code == 0x6) {
+ munmap(addr, PAGE_SIZE);
+ SKIP(return, "Kernel does not support adding pages to initialized enclave");
+ }
+
+ EXPECT_EQ(self->run.exception_error_code, 0x8007);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /* Handle AEX by running EACCEPT from new entry point. */
+ self->run.tcs = self->encl.encl_base + PAGE_SIZE;
+
+ eaccept_op.epc_addr = self->encl.encl_base + total_size;
+ eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /* Can now return to main TCS to resume execution. */
+ self->run.tcs = self->encl.encl_base;
+
+ EXPECT_EQ(vdso_sgx_enter_enclave((unsigned long)&put_addr_op, 0, 0,
+ ERESUME, 0, 0,
+ &self->run),
+ 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory from newly added page that was just written to,
+ * confirming that data previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = (unsigned long)addr;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ munmap(addr, PAGE_SIZE);
+}
+
+/*
+ * Test for the addition of pages to an initialized enclave via a
+ * pre-emptive run of EACCEPT on page to be added.
+ */
+TEST_F(enclave, augment_via_eaccept)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ struct encl_op_eaccept eaccept_op;
+ size_t total_size = 0;
+ void *addr;
+ int i;
+
+ if (!sgx2_supported())
+ SKIP(return, "SGX2 not supported");
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ for (i = 0; i < self->encl.nr_segments; i++) {
+ struct encl_segment *seg = &self->encl.segment_tbl[i];
+
+ total_size += seg->size;
+ }
+
+ /*
+ * Actual enclave size is expected to be larger than the loaded
+ * test enclave since enclave size must be a power of 2 in bytes while
+ * test_encl does not consume it all.
+ */
+ EXPECT_LT(total_size + PAGE_SIZE, self->encl.encl_size);
+
+ /*
+ * mmap() a page at end of existing enclave to be used for dynamic
+ * EPC page.
+ *
+ * Kernel will allow new mapping using any permissions if it
+ * falls into the enclave's address range but not backed
+ * by existing enclave pages.
+ */
+
+ addr = mmap((void *)self->encl.encl_base + total_size, PAGE_SIZE,
+ PROT_READ | PROT_WRITE | PROT_EXEC, MAP_SHARED | MAP_FIXED,
+ self->encl.fd, 0);
+ EXPECT_NE(addr, MAP_FAILED);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ /*
+ * Run EACCEPT on new page to trigger the #PF->EAUG->EACCEPT(again
+ * without a #PF). All should be transparent to userspace.
+ */
+ eaccept_op.epc_addr = self->encl.encl_base + total_size;
+ eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ if (self->run.exception_vector == 14 &&
+ self->run.exception_error_code == 4 &&
+ self->run.exception_addr == self->encl.encl_base + total_size) {
+ munmap(addr, PAGE_SIZE);
+ SKIP(return, "Kernel does not support adding pages to initialized enclave");
+ }
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /*
+ * New page should be accessible from within enclave - attempt to
+ * write to it.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = (unsigned long)addr;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory from newly added page that was just written to,
+ * confirming that data previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = (unsigned long)addr;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ munmap(addr, PAGE_SIZE);
+}
+
+/*
+ * SGX2 page type modification test in two phases:
+ * Phase 1:
+ * Create a new TCS, consisting out of three new pages (stack page with regular
+ * page type, SSA page with regular page type, and TCS page with TCS page
+ * type) in an initialized enclave and run a simple workload within it.
+ * Phase 2:
+ * Remove the three pages added in phase 1, add a new regular page at the
+ * same address that previously hosted the TCS page and verify that it can
+ * be modified.
+ */
+TEST_F(enclave, tcs_create)
+{
+ struct encl_op_init_tcs_page init_tcs_page_op;
+ struct sgx_enclave_remove_pages remove_ioc;
+ struct encl_op_get_from_addr get_addr_op;
+ struct sgx_enclave_modify_types modt_ioc;
+ struct encl_op_put_to_addr put_addr_op;
+ struct encl_op_get_from_buf get_buf_op;
+ struct encl_op_put_to_buf put_buf_op;
+ void *addr, *tcs, *stack_end, *ssa;
+ struct encl_op_eaccept eaccept_op;
+ size_t total_size = 0;
+ uint64_t val_64;
+ int errno_save;
+ int ret, i;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl,
+ _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /*
+ * Add three regular pages via EAUG: one will be the TCS stack, one
+ * will be the TCS SSA, and one will be the new TCS. The stack and
+ * SSA will remain as regular pages, the TCS page will need its
+ * type changed after populated with needed data.
+ */
+ for (i = 0; i < self->encl.nr_segments; i++) {
+ struct encl_segment *seg = &self->encl.segment_tbl[i];
+
+ total_size += seg->size;
+ }
+
+ /*
+ * Actual enclave size is expected to be larger than the loaded
+ * test enclave since enclave size must be a power of 2 in bytes while
+ * test_encl does not consume it all.
+ */
+ EXPECT_LT(total_size + 3 * PAGE_SIZE, self->encl.encl_size);
+
+ /*
+ * mmap() three pages at end of existing enclave to be used for the
+ * three new pages.
+ */
+ addr = mmap((void *)self->encl.encl_base + total_size, 3 * PAGE_SIZE,
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_FIXED,
+ self->encl.fd, 0);
+ EXPECT_NE(addr, MAP_FAILED);
+
+ self->run.exception_vector = 0;
+ self->run.exception_error_code = 0;
+ self->run.exception_addr = 0;
+
+ stack_end = (void *)self->encl.encl_base + total_size;
+ tcs = (void *)self->encl.encl_base + total_size + PAGE_SIZE;
+ ssa = (void *)self->encl.encl_base + total_size + 2 * PAGE_SIZE;
+
+ /*
+ * Run EACCEPT on each new page to trigger the
+ * EACCEPT->(#PF)->EAUG->EACCEPT(again without a #PF) flow.
+ */
+
+ eaccept_op.epc_addr = (unsigned long)stack_end;
+ eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ if (self->run.exception_vector == 14 &&
+ self->run.exception_error_code == 4 &&
+ self->run.exception_addr == (unsigned long)stack_end) {
+ munmap(addr, 3 * PAGE_SIZE);
+ SKIP(return, "Kernel does not support adding pages to initialized enclave");
+ }
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ eaccept_op.epc_addr = (unsigned long)ssa;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ eaccept_op.epc_addr = (unsigned long)tcs;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /*
+ * Three new pages added to enclave. Now populate the TCS page with
+ * needed data. This should be done from within enclave. Provide
+ * the function that will do the actual data population with needed
+ * data.
+ */
+
+ /*
+ * New TCS will use the "encl_dyn_entry" entrypoint that expects
+ * stack to begin in page before TCS page.
+ */
+ val_64 = encl_get_entry(&self->encl, "encl_dyn_entry");
+ EXPECT_NE(val_64, 0);
+
+ init_tcs_page_op.tcs_page = (unsigned long)tcs;
+ init_tcs_page_op.ssa = (unsigned long)total_size + 2 * PAGE_SIZE;
+ init_tcs_page_op.entry = val_64;
+ init_tcs_page_op.header.type = ENCL_OP_INIT_TCS_PAGE;
+
+ EXPECT_EQ(ENCL_CALL(&init_tcs_page_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Change TCS page type to TCS. */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+
+ modt_ioc.offset = total_size + PAGE_SIZE;
+ modt_ioc.length = PAGE_SIZE;
+ modt_ioc.page_type = SGX_PAGE_TYPE_TCS;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(modt_ioc.result, 0);
+ EXPECT_EQ(modt_ioc.count, 4096);
+
+ /* EACCEPT new TCS page from enclave. */
+ eaccept_op.epc_addr = (unsigned long)tcs;
+ eaccept_op.flags = SGX_SECINFO_TCS | SGX_SECINFO_MODIFIED;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /* Run workload from new TCS. */
+ self->run.tcs = (unsigned long)tcs;
+
+ /*
+ * Simple workload to write to data buffer and read value back.
+ */
+ put_buf_op.header.type = ENCL_OP_PUT_TO_BUFFER;
+ put_buf_op.value = MAGIC;
+
+ EXPECT_EQ(ENCL_CALL(&put_buf_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ get_buf_op.header.type = ENCL_OP_GET_FROM_BUFFER;
+ get_buf_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_buf_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_buf_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Phase 2 of test:
+ * Remove pages associated with new TCS, create a regular page
+ * where TCS page used to be and verify it can be used as a regular
+ * page.
+ */
+
+ /* Start page removal by requesting change of page type to PT_TRIM. */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+
+ modt_ioc.offset = total_size;
+ modt_ioc.length = 3 * PAGE_SIZE;
+ modt_ioc.page_type = SGX_PAGE_TYPE_TRIM;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(modt_ioc.result, 0);
+ EXPECT_EQ(modt_ioc.count, 3 * PAGE_SIZE);
+
+ /*
+ * Enter enclave via TCS #1 and approve page removal by sending
+ * EACCEPT for each of three removed pages.
+ */
+ self->run.tcs = self->encl.encl_base;
+
+ eaccept_op.epc_addr = (unsigned long)stack_end;
+ eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ eaccept_op.epc_addr = (unsigned long)tcs;
+ eaccept_op.ret = 0;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ eaccept_op.epc_addr = (unsigned long)ssa;
+ eaccept_op.ret = 0;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /* Send final ioctl() to complete page removal. */
+ memset(&remove_ioc, 0, sizeof(remove_ioc));
+
+ remove_ioc.offset = total_size;
+ remove_ioc.length = 3 * PAGE_SIZE;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(remove_ioc.count, 3 * PAGE_SIZE);
+
+ /*
+ * Enter enclave via TCS #1 and access location where TCS #3 was to
+ * trigger dynamic add of regular page at that location.
+ */
+ eaccept_op.epc_addr = (unsigned long)tcs;
+ eaccept_op.flags = SGX_SECINFO_R | SGX_SECINFO_W | SGX_SECINFO_REG | SGX_SECINFO_PENDING;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /*
+ * New page should be accessible from within enclave - write to it.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = (unsigned long)tcs;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory from newly added page that was just written to,
+ * confirming that data previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = (unsigned long)tcs;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ munmap(addr, 3 * PAGE_SIZE);
+}
+
+/*
+ * Ensure sane behavior if user requests page removal, does not run
+ * EACCEPT from within enclave but still attempts to finalize page removal
+ * with the SGX_IOC_ENCLAVE_REMOVE_PAGES ioctl(). The latter should fail
+ * because the removal was not EACCEPTed from within the enclave.
+ */
+TEST_F(enclave, remove_added_page_no_eaccept)
+{
+ struct sgx_enclave_remove_pages remove_ioc;
+ struct encl_op_get_from_addr get_addr_op;
+ struct sgx_enclave_modify_types modt_ioc;
+ struct encl_op_put_to_addr put_addr_op;
+ unsigned long data_start;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /*
+ * Page that will be removed is the second data page in the .data
+ * segment. This forms part of the local encl_buffer within the
+ * enclave.
+ */
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) + PAGE_SIZE;
+
+ /*
+ * Sanity check that page at @data_start is writable before
+ * removing it.
+ *
+ * Start by writing MAGIC to test page.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that data
+ * previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Start page removal by requesting change of page type to PT_TRIM */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+
+ modt_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ modt_ioc.length = PAGE_SIZE;
+ modt_ioc.page_type = SGX_PAGE_TYPE_TRIM;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(modt_ioc.result, 0);
+ EXPECT_EQ(modt_ioc.count, 4096);
+
+ /* Skip EACCEPT */
+
+ /* Send final ioctl() to complete page removal */
+ memset(&remove_ioc, 0, sizeof(remove_ioc));
+
+ remove_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ remove_ioc.length = PAGE_SIZE;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ /* Operation not permitted since EACCEPT was omitted. */
+ EXPECT_EQ(ret, -1);
+ EXPECT_EQ(errno_save, EPERM);
+ EXPECT_EQ(remove_ioc.count, 0);
+}
+
+/*
+ * Request enclave page removal but instead of correctly following with
+ * EACCEPT a read attempt to page is made from within the enclave.
+ */
+TEST_F(enclave, remove_added_page_invalid_access)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ struct sgx_enclave_modify_types ioc;
+ unsigned long data_start;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&ioc, 0, sizeof(ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /*
+ * Page that will be removed is the second data page in the .data
+ * segment. This forms part of the local encl_buffer within the
+ * enclave.
+ */
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) + PAGE_SIZE;
+
+ /*
+ * Sanity check that page at @data_start is writable before
+ * removing it.
+ *
+ * Start by writing MAGIC to test page.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that data
+ * previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Start page removal by requesting change of page type to PT_TRIM. */
+ memset(&ioc, 0, sizeof(ioc));
+
+ ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ ioc.length = PAGE_SIZE;
+ ioc.page_type = SGX_PAGE_TYPE_TRIM;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(ioc.result, 0);
+ EXPECT_EQ(ioc.count, 4096);
+
+ /*
+ * Read from page that was just removed.
+ */
+ get_addr_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ /*
+ * From kernel perspective the page is present but according to SGX the
+ * page should not be accessible so a #PF with SGX bit set is
+ * expected.
+ */
+
+ EXPECT_EQ(self->run.function, ERESUME);
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_error_code, 0x8005);
+ EXPECT_EQ(self->run.exception_addr, data_start);
+}
+
+/*
+ * Request enclave page removal and correctly follow with
+ * EACCEPT but do not follow with removal ioctl() but instead a read attempt
+ * to removed page is made from within the enclave.
+ */
+TEST_F(enclave, remove_added_page_invalid_access_after_eaccept)
+{
+ struct encl_op_get_from_addr get_addr_op;
+ struct encl_op_put_to_addr put_addr_op;
+ struct sgx_enclave_modify_types ioc;
+ struct encl_op_eaccept eaccept_op;
+ unsigned long data_start;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&ioc, 0, sizeof(ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /*
+ * Page that will be removed is the second data page in the .data
+ * segment. This forms part of the local encl_buffer within the
+ * enclave.
+ */
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) + PAGE_SIZE;
+
+ /*
+ * Sanity check that page at @data_start is writable before
+ * removing it.
+ *
+ * Start by writing MAGIC to test page.
+ */
+ put_addr_op.value = MAGIC;
+ put_addr_op.addr = data_start;
+ put_addr_op.header.type = ENCL_OP_PUT_TO_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&put_addr_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /*
+ * Read memory that was just written to, confirming that data
+ * previously written (MAGIC) is present.
+ */
+ get_addr_op.value = 0;
+ get_addr_op.addr = data_start;
+ get_addr_op.header.type = ENCL_OP_GET_FROM_ADDRESS;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ EXPECT_EQ(get_addr_op.value, MAGIC);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+
+ /* Start page removal by requesting change of page type to PT_TRIM. */
+ memset(&ioc, 0, sizeof(ioc));
+
+ ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ ioc.length = PAGE_SIZE;
+ ioc.page_type = SGX_PAGE_TYPE_TRIM;
+
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(ioc.result, 0);
+ EXPECT_EQ(ioc.count, 4096);
+
+ eaccept_op.epc_addr = (unsigned long)data_start;
+ eaccept_op.ret = 0;
+ eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ /* Skip ioctl() to remove page. */
+
+ /*
+ * Read from page that was just removed.
+ */
+ get_addr_op.value = 0;
+
+ EXPECT_EQ(ENCL_CALL(&get_addr_op, &self->run, true), 0);
+
+ /*
+ * From kernel perspective the page is present but according to SGX the
+ * page should not be accessible so a #PF with SGX bit set is
+ * expected.
+ */
+
+ EXPECT_EQ(self->run.function, ERESUME);
+ EXPECT_EQ(self->run.exception_vector, 14);
+ EXPECT_EQ(self->run.exception_error_code, 0x8005);
+ EXPECT_EQ(self->run.exception_addr, data_start);
+}
+
+TEST_F(enclave, remove_untouched_page)
+{
+ struct sgx_enclave_remove_pages remove_ioc;
+ struct sgx_enclave_modify_types modt_ioc;
+ struct encl_op_eaccept eaccept_op;
+ unsigned long data_start;
+ int ret, errno_save;
+
+ ASSERT_TRUE(setup_test_encl(ENCL_HEAP_SIZE_DEFAULT, &self->encl, _metadata));
+
+ /*
+ * Hardware (SGX2) and kernel support is needed for this test. Start
+ * with check that test has a chance of succeeding.
+ */
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+
+ if (ret == -1) {
+ if (errno == ENOTTY)
+ SKIP(return,
+ "Kernel does not support SGX_IOC_ENCLAVE_MODIFY_TYPES ioctl()");
+ else if (errno == ENODEV)
+ SKIP(return, "System does not support SGX2");
+ }
+
+ /*
+ * Invalid parameters were provided during sanity check,
+ * expect command to fail.
+ */
+ EXPECT_EQ(ret, -1);
+
+ /* SGX2 is supported by kernel and hardware, test can proceed. */
+ memset(&self->run, 0, sizeof(self->run));
+ self->run.tcs = self->encl.encl_base;
+
+ data_start = self->encl.encl_base +
+ encl_get_data_offset(&self->encl) + PAGE_SIZE;
+
+ memset(&modt_ioc, 0, sizeof(modt_ioc));
+
+ modt_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ modt_ioc.length = PAGE_SIZE;
+ modt_ioc.page_type = SGX_PAGE_TYPE_TRIM;
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_MODIFY_TYPES, &modt_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(modt_ioc.result, 0);
+ EXPECT_EQ(modt_ioc.count, 4096);
+
+ /*
+ * Enter enclave via TCS #1 and approve page removal by sending
+ * EACCEPT for removed page.
+ */
+
+ eaccept_op.epc_addr = data_start;
+ eaccept_op.flags = SGX_SECINFO_TRIM | SGX_SECINFO_MODIFIED;
+ eaccept_op.ret = 0;
+ eaccept_op.header.type = ENCL_OP_EACCEPT;
+
+ EXPECT_EQ(ENCL_CALL(&eaccept_op, &self->run, true), 0);
+ EXPECT_EEXIT(&self->run);
+ EXPECT_EQ(self->run.exception_vector, 0);
+ EXPECT_EQ(self->run.exception_error_code, 0);
+ EXPECT_EQ(self->run.exception_addr, 0);
+ EXPECT_EQ(eaccept_op.ret, 0);
+
+ memset(&remove_ioc, 0, sizeof(remove_ioc));
+
+ remove_ioc.offset = encl_get_data_offset(&self->encl) + PAGE_SIZE;
+ remove_ioc.length = PAGE_SIZE;
+ ret = ioctl(self->encl.fd, SGX_IOC_ENCLAVE_REMOVE_PAGES, &remove_ioc);
+ errno_save = ret == -1 ? errno : 0;
+
+ EXPECT_EQ(ret, 0);
+ EXPECT_EQ(errno_save, 0);
+ EXPECT_EQ(remove_ioc.count, 4096);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sgx/main.h b/tools/testing/selftests/sgx/main.h
new file mode 100644
index 000000000000..fc585be97e2f
--- /dev/null
+++ b/tools/testing/selftests/sgx/main.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+#ifndef MAIN_H
+#define MAIN_H
+
+#define ENCL_HEAP_SIZE_DEFAULT 4096
+
+struct encl_segment {
+ void *src;
+ off_t offset;
+ size_t size;
+ unsigned int prot;
+ unsigned int flags;
+ bool measure;
+};
+
+struct encl {
+ int fd;
+ void *bin;
+ off_t bin_size;
+ void *src;
+ size_t src_size;
+ size_t encl_size;
+ off_t encl_base;
+ unsigned int nr_segments;
+ struct encl_segment *segment_tbl;
+ struct sgx_secs secs;
+ struct sgx_sigstruct sigstruct;
+};
+
+extern unsigned char sign_key[];
+extern unsigned char sign_key_end[];
+
+void encl_delete(struct encl *ctx);
+bool encl_load(const char *path, struct encl *encl, unsigned long heap_size);
+bool encl_measure(struct encl *encl);
+bool encl_build(struct encl *encl);
+uint64_t encl_get_entry(struct encl *encl, const char *symbol);
+
+int sgx_enter_enclave(void *rdi, void *rsi, long rdx, u32 function, void *r8, void *r9,
+ struct sgx_enclave_run *run);
+
+#endif /* MAIN_H */
diff --git a/tools/testing/selftests/sgx/sign_key.S b/tools/testing/selftests/sgx/sign_key.S
new file mode 100644
index 000000000000..e4fbe948444a
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.S
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/**
+* Copyright(c) 2016-20 Intel Corporation.
+*/
+
+ .section ".rodata", "a"
+
+sign_key:
+ .globl sign_key
+ .incbin "sign_key.pem"
+sign_key_end:
+ .globl sign_key_end
diff --git a/tools/testing/selftests/sgx/sign_key.pem b/tools/testing/selftests/sgx/sign_key.pem
new file mode 100644
index 000000000000..d76f21f19187
--- /dev/null
+++ b/tools/testing/selftests/sgx/sign_key.pem
@@ -0,0 +1,39 @@
+-----BEGIN RSA PRIVATE KEY-----
+MIIG4wIBAAKCAYEApalGbq7Q+usM91CPtksu3D+b0Prc8gAFL6grM3mg85A5Bx8V
+cfMXPgtrw8EYFwQxDAvzZWwl+9VfOX0ECrFRBkOHcOiG0SnADN8+FLj1UiNUQwbp
+S6OzhNWuRcSbGraSOyUlVlV0yMQSvewyzGklOaXBe30AJqzIBc8QfdSxKuP8rs0Z
+ga6k/Bl73osrYKByILJTUUeZqjLERsE6GebsdzbWgKn8qVqng4ZS4yMNg6LeRlH3
++9CIPgg4jwpSLHcp7dq2qTIB9a0tGe9ayp+5FbucpB6U7ePold0EeRN6RlJGDF9k
+L93v8P5ykz5G5gYZ2g0K1X2sHIWV4huxPgv5PXgdyQYbK+6olqj0d5rjYuwX57Ul
+k6SroPS1U6UbdCjG5txM+BNGU0VpD0ZhrIRw0leQdnNcCO9sTJuInZrgYacSVJ7u
+mtB+uCt+uzUesc+l+xPRYA+9e14lLkZp7AAmo9FvL816XDI09deehJ3i/LmHKCRN
+tuqC5TprRjFwUr6dAgEDAoIBgG5w2Z8fNfycs0+LCnmHdJLVEotR6KFVWMpwHMz7
+wKJgJgS/Y6FMuilc8oKAuroCy11dTO5IGVKOP3uorVx2NgQtBPXwWeDGgAiU1A3Q
+o4wXjYIEm4fCd63jyYPYZ2ckYXzDbjmOTdstYdPyzIhGGNEZK6eoqsRzMAPfYFPj
+IMdCqHSIu6vJw1K7p+myHOsVoWshjODaZnF3LYSA0WaZ8vokjwBxUxuRxQJZjJds
+s60XPtmL+qfgWtQFewoG4XL6GuD8FcXccynRRtzrLtFNPIl9BQfWfjBBhTC1/Te1
+0Z6XbZvpdUTD9OfLB7SbR2OUFNpKQgriO0iYVdbW3cr7uu38Zwp4W1TX73DPjoi6
+KNooP6SGWd4mRJW2+dUmSYS4QNG8eVVZswKcploEIXlAKRsOe4kzJJ1iETugIe85
+uX8nd1WYEp65xwoRUg8hqng0MeyveVbXqNKuJG6tzNDt9kgFYo+hmC/oouAW2Dtc
+T9jdRAwKJXqA2Eg6OkgXCEv+kwKBwQDYaQiFMlFhsmLlqI+EzCUh7c941/cL7m6U
+7j98+8ngl0HgCEcrc10iJVCKakQW3YbPzAx3XkKTaGjWazvvrFarXIGlOud64B8a
+iWyQ7VdlnmZnNEdk+C83tI91OQeaTKqRLDGzKh29Ry/jL8Pcbazt+kDgxa0H7qJp
+roADUanLQuNkYubpbhFBh3xpa2EExaVq6rF7nIVsD8W9TrbmPKA4LgH7z0iy544D
+kVCNYsTjYDdUWP+WiSor8kCnnpjnN9sCgcEAw/eNezUD1UDf6OYFC9+5JZJFn4Tg
+mZMyN93JKIb199ffwnjtHUSjcyiWeesXucpzwtGbTcwQnDisSW4oneYKLSEBlBaq
+scqiUugyGZZOthFSCbdXYXMViK2vHrKlkse7GxVlROKcEhM/pRBrmjaGO8eWR+D4
+FO2wCXzVs3KgV6j779frw0vC54oHOxc9+Lu1rSHp4i+600koyvL/zF6U/5tZXIvN
+YW2yoiQJnjCmVA1pwbwV6KAUTPDTMnBK+YjnAoHBAJBGBa4hi5Z27JkbCliIGMFJ
+NPs6pLKe9GNJf6in2+sPgUAFhMeiPhbDiwbxgrnpBIqICE+ULGJFmzmc0p/IOceT
+ARjR76dAFLxbnbXzj5kURETNhO36yiUjCk4mBRGIcbYddndxaSjaH+zKgpLzyJ6m
+1esuc1qfFvEfAAI2cTIsl5hB70ZJYNZaUvDyQK3ZGPHxy6e9rkgKg9OJz0QoatAe
+q/002yHvtAJg4F5B2JeVejg7VQ8GHB1MKxppu0TP5wKBwQCCpQj8zgKOKz/wmViy
+lSYZDC5qWJW7t3bP6TDFr06lOpUsUJ4TgxeiGw778g/RMaKB4RIz3WBoJcgw9BsT
+7rFza1ZiucchMcGMmswRDt8kC4wGejpA92Owc8oUdxkMhSdnY5jYlxK2t3/DYEe8
+JFl9L7mFQKVjSSAGUzkiTGrlG1Kf5UfXh9dFBq98uilQfSPIwUaWynyM23CHTKqI
+Pw3/vOY9sojrnncWwrEUIG7is5vWfWPwargzSzd29YdRBe8CgcEAuRVewK/YeNOX
+B7ZG6gKKsfsvrGtY7FPETzLZAHjoVXYNea4LVZ2kn4hBXXlvw/4HD+YqcTt4wmif
+5JQlDvjNobUiKJZpzy7hklVhF7wZFl4pCF7Yh43q9iQ7gKTaeUG7MiaK+G8Zz8aY
+HW9rsiihbdZkccMvnPfO9334XMxl3HtBRzLstjUlbLB7Sdh+7tZ3JQidCOFNs5pE
+XyWwnASPu4tKfDahH1UUTp1uJcq/6716CSWg080avYxFcn75qqsb
+-----END RSA PRIVATE KEY-----
diff --git a/tools/testing/selftests/sgx/sigstruct.c b/tools/testing/selftests/sgx/sigstruct.c
new file mode 100644
index 000000000000..d73b29becf5b
--- /dev/null
+++ b/tools/testing/selftests/sgx/sigstruct.c
@@ -0,0 +1,391 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-20 Intel Corporation. */
+
+#define _GNU_SOURCE
+#include <assert.h>
+#include <getopt.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+#include <openssl/err.h>
+#include <openssl/pem.h>
+#include "defines.h"
+#include "main.h"
+
+/*
+ * FIXME: OpenSSL 3.0 has deprecated some functions. For now just ignore
+ * the warnings.
+ */
+#pragma GCC diagnostic ignored "-Wdeprecated-declarations"
+
+struct q1q2_ctx {
+ BN_CTX *bn_ctx;
+ BIGNUM *m;
+ BIGNUM *s;
+ BIGNUM *q1;
+ BIGNUM *qr;
+ BIGNUM *q2;
+};
+
+static void free_q1q2_ctx(struct q1q2_ctx *ctx)
+{
+ BN_CTX_free(ctx->bn_ctx);
+ BN_free(ctx->m);
+ BN_free(ctx->s);
+ BN_free(ctx->q1);
+ BN_free(ctx->qr);
+ BN_free(ctx->q2);
+}
+
+static bool alloc_q1q2_ctx(const uint8_t *s, const uint8_t *m,
+ struct q1q2_ctx *ctx)
+{
+ ctx->bn_ctx = BN_CTX_new();
+ ctx->s = BN_bin2bn(s, SGX_MODULUS_SIZE, NULL);
+ ctx->m = BN_bin2bn(m, SGX_MODULUS_SIZE, NULL);
+ ctx->q1 = BN_new();
+ ctx->qr = BN_new();
+ ctx->q2 = BN_new();
+
+ if (!ctx->bn_ctx || !ctx->s || !ctx->m || !ctx->q1 || !ctx->qr ||
+ !ctx->q2) {
+ free_q1q2_ctx(ctx);
+ return false;
+ }
+
+ return true;
+}
+
+static void reverse_bytes(void *data, int length)
+{
+ int i = 0;
+ int j = length - 1;
+ uint8_t temp;
+ uint8_t *ptr = data;
+
+ while (i < j) {
+ temp = ptr[i];
+ ptr[i] = ptr[j];
+ ptr[j] = temp;
+ i++;
+ j--;
+ }
+}
+
+static bool calc_q1q2(const uint8_t *s, const uint8_t *m, uint8_t *q1,
+ uint8_t *q2)
+{
+ struct q1q2_ctx ctx;
+ int len;
+
+ if (!alloc_q1q2_ctx(s, m, &ctx)) {
+ fprintf(stderr, "Not enough memory for Q1Q2 calculation\n");
+ return false;
+ }
+
+ if (!BN_mul(ctx.q1, ctx.s, ctx.s, ctx.bn_ctx))
+ goto out;
+
+ if (!BN_div(ctx.q1, ctx.qr, ctx.q1, ctx.m, ctx.bn_ctx))
+ goto out;
+
+ if (BN_num_bytes(ctx.q1) > SGX_MODULUS_SIZE) {
+ fprintf(stderr, "Too large Q1 %d bytes\n",
+ BN_num_bytes(ctx.q1));
+ goto out;
+ }
+
+ if (!BN_mul(ctx.q2, ctx.s, ctx.qr, ctx.bn_ctx))
+ goto out;
+
+ if (!BN_div(ctx.q2, NULL, ctx.q2, ctx.m, ctx.bn_ctx))
+ goto out;
+
+ if (BN_num_bytes(ctx.q2) > SGX_MODULUS_SIZE) {
+ fprintf(stderr, "Too large Q2 %d bytes\n",
+ BN_num_bytes(ctx.q2));
+ goto out;
+ }
+
+ len = BN_bn2bin(ctx.q1, q1);
+ reverse_bytes(q1, len);
+ len = BN_bn2bin(ctx.q2, q2);
+ reverse_bytes(q2, len);
+
+ free_q1q2_ctx(&ctx);
+ return true;
+out:
+ free_q1q2_ctx(&ctx);
+ return false;
+}
+
+struct sgx_sigstruct_payload {
+ struct sgx_sigstruct_header header;
+ struct sgx_sigstruct_body body;
+};
+
+static bool check_crypto_errors(void)
+{
+ int err;
+ bool had_errors = false;
+ const char *filename;
+ int line;
+ char str[256];
+
+ for ( ; ; ) {
+ if (ERR_peek_error() == 0)
+ break;
+
+ had_errors = true;
+ err = ERR_get_error_line(&filename, &line);
+ ERR_error_string_n(err, str, sizeof(str));
+ fprintf(stderr, "crypto: %s: %s:%d\n", str, filename, line);
+ }
+
+ return had_errors;
+}
+
+static inline const BIGNUM *get_modulus(RSA *key)
+{
+ const BIGNUM *n;
+
+ RSA_get0_key(key, &n, NULL, NULL);
+ return n;
+}
+
+static RSA *gen_sign_key(void)
+{
+ unsigned long sign_key_length;
+ BIO *bio;
+ RSA *key;
+
+ sign_key_length = (unsigned long)&sign_key_end -
+ (unsigned long)&sign_key;
+
+ bio = BIO_new_mem_buf(&sign_key, sign_key_length);
+ if (!bio)
+ return NULL;
+
+ key = PEM_read_bio_RSAPrivateKey(bio, NULL, NULL, NULL);
+ BIO_free(bio);
+
+ return key;
+}
+
+enum mrtags {
+ MRECREATE = 0x0045544145524345,
+ MREADD = 0x0000000044444145,
+ MREEXTEND = 0x00444E4554584545,
+};
+
+static bool mrenclave_update(EVP_MD_CTX *ctx, const void *data)
+{
+ if (!EVP_DigestUpdate(ctx, data, 64)) {
+ fprintf(stderr, "digest update failed\n");
+ return false;
+ }
+
+ return true;
+}
+
+static bool mrenclave_commit(EVP_MD_CTX *ctx, uint8_t *mrenclave)
+{
+ unsigned int size;
+
+ if (!EVP_DigestFinal_ex(ctx, (unsigned char *)mrenclave, &size)) {
+ fprintf(stderr, "digest commit failed\n");
+ return false;
+ }
+
+ if (size != 32) {
+ fprintf(stderr, "invalid digest size = %u\n", size);
+ return false;
+ }
+
+ return true;
+}
+
+struct mrecreate {
+ uint64_t tag;
+ uint32_t ssaframesize;
+ uint64_t size;
+ uint8_t reserved[44];
+} __attribute__((__packed__));
+
+
+static bool mrenclave_ecreate(EVP_MD_CTX *ctx, uint64_t blob_size)
+{
+ struct mrecreate mrecreate;
+ uint64_t encl_size;
+
+ for (encl_size = 0x1000; encl_size < blob_size; )
+ encl_size <<= 1;
+
+ memset(&mrecreate, 0, sizeof(mrecreate));
+ mrecreate.tag = MRECREATE;
+ mrecreate.ssaframesize = 1;
+ mrecreate.size = encl_size;
+
+ if (!EVP_DigestInit_ex(ctx, EVP_sha256(), NULL))
+ return false;
+
+ return mrenclave_update(ctx, &mrecreate);
+}
+
+struct mreadd {
+ uint64_t tag;
+ uint64_t offset;
+ uint64_t flags; /* SECINFO flags */
+ uint8_t reserved[40];
+} __attribute__((__packed__));
+
+static bool mrenclave_eadd(EVP_MD_CTX *ctx, uint64_t offset, uint64_t flags)
+{
+ struct mreadd mreadd;
+
+ memset(&mreadd, 0, sizeof(mreadd));
+ mreadd.tag = MREADD;
+ mreadd.offset = offset;
+ mreadd.flags = flags;
+
+ return mrenclave_update(ctx, &mreadd);
+}
+
+struct mreextend {
+ uint64_t tag;
+ uint64_t offset;
+ uint8_t reserved[48];
+} __attribute__((__packed__));
+
+static bool mrenclave_eextend(EVP_MD_CTX *ctx, uint64_t offset,
+ const uint8_t *data)
+{
+ struct mreextend mreextend;
+ int i;
+
+ for (i = 0; i < 0x1000; i += 0x100) {
+ memset(&mreextend, 0, sizeof(mreextend));
+ mreextend.tag = MREEXTEND;
+ mreextend.offset = offset + i;
+
+ if (!mrenclave_update(ctx, &mreextend))
+ return false;
+
+ if (!mrenclave_update(ctx, &data[i + 0x00]))
+ return false;
+
+ if (!mrenclave_update(ctx, &data[i + 0x40]))
+ return false;
+
+ if (!mrenclave_update(ctx, &data[i + 0x80]))
+ return false;
+
+ if (!mrenclave_update(ctx, &data[i + 0xC0]))
+ return false;
+ }
+
+ return true;
+}
+
+static bool mrenclave_segment(EVP_MD_CTX *ctx, struct encl *encl,
+ struct encl_segment *seg)
+{
+ uint64_t end = seg->size;
+ uint64_t offset;
+
+ for (offset = 0; offset < end; offset += PAGE_SIZE) {
+ if (!mrenclave_eadd(ctx, seg->offset + offset, seg->flags))
+ return false;
+
+ if (seg->measure) {
+ if (!mrenclave_eextend(ctx, seg->offset + offset, seg->src + offset))
+ return false;
+ }
+ }
+
+ return true;
+}
+
+bool encl_measure(struct encl *encl)
+{
+ uint64_t header1[2] = {0x000000E100000006, 0x0000000000010000};
+ uint64_t header2[2] = {0x0000006000000101, 0x0000000100000060};
+ struct sgx_sigstruct *sigstruct = &encl->sigstruct;
+ struct sgx_sigstruct_payload payload;
+ uint8_t digest[SHA256_DIGEST_LENGTH];
+ EVP_MD_CTX *ctx = NULL;
+ unsigned int siglen;
+ RSA *key = NULL;
+ int i;
+
+ memset(sigstruct, 0, sizeof(*sigstruct));
+
+ sigstruct->header.header1[0] = header1[0];
+ sigstruct->header.header1[1] = header1[1];
+ sigstruct->header.header2[0] = header2[0];
+ sigstruct->header.header2[1] = header2[1];
+ sigstruct->exponent = 3;
+ sigstruct->body.attributes = SGX_ATTR_MODE64BIT;
+ sigstruct->body.xfrm = 3;
+
+ /* sanity check */
+ if (check_crypto_errors())
+ goto err;
+
+ key = gen_sign_key();
+ if (!key) {
+ ERR_print_errors_fp(stdout);
+ goto err;
+ }
+
+ BN_bn2bin(get_modulus(key), sigstruct->modulus);
+
+ ctx = EVP_MD_CTX_create();
+ if (!ctx)
+ goto err;
+
+ if (!mrenclave_ecreate(ctx, encl->src_size))
+ goto err;
+
+ for (i = 0; i < encl->nr_segments; i++) {
+ struct encl_segment *seg = &encl->segment_tbl[i];
+
+ if (!mrenclave_segment(ctx, encl, seg))
+ goto err;
+ }
+
+ if (!mrenclave_commit(ctx, sigstruct->body.mrenclave))
+ goto err;
+
+ memcpy(&payload.header, &sigstruct->header, sizeof(sigstruct->header));
+ memcpy(&payload.body, &sigstruct->body, sizeof(sigstruct->body));
+
+ SHA256((unsigned char *)&payload, sizeof(payload), digest);
+
+ if (!RSA_sign(NID_sha256, digest, SHA256_DIGEST_LENGTH,
+ sigstruct->signature, &siglen, key))
+ goto err;
+
+ if (!calc_q1q2(sigstruct->signature, sigstruct->modulus, sigstruct->q1,
+ sigstruct->q2))
+ goto err;
+
+ /* BE -> LE */
+ reverse_bytes(sigstruct->signature, SGX_MODULUS_SIZE);
+ reverse_bytes(sigstruct->modulus, SGX_MODULUS_SIZE);
+
+ EVP_MD_CTX_destroy(ctx);
+ RSA_free(key);
+ return true;
+
+err:
+ if (ctx)
+ EVP_MD_CTX_destroy(ctx);
+ RSA_free(key);
+ return false;
+}
diff --git a/tools/testing/selftests/sgx/test_encl.c b/tools/testing/selftests/sgx/test_encl.c
new file mode 100644
index 000000000000..2c4d709cce2d
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.c
@@ -0,0 +1,162 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-20 Intel Corporation. */
+
+#include <stddef.h>
+#include "defines.h"
+
+/*
+ * Data buffer spanning two pages that will be placed first in the .data
+ * segment via the linker script. Even if not used internally the second page
+ * is needed by external test manipulating page permissions, so mark
+ * encl_buffer as "used" to make sure it is entirely preserved by the compiler.
+ */
+static uint8_t __used __section(".data.encl_buffer") encl_buffer[8192] = { 1 };
+
+enum sgx_enclu_function {
+ EACCEPT = 0x5,
+ EMODPE = 0x6,
+};
+
+static void do_encl_emodpe(void *_op)
+{
+ struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0};
+ struct encl_op_emodpe *op = _op;
+
+ secinfo.flags = op->flags;
+
+ asm volatile(".byte 0x0f, 0x01, 0xd7"
+ : /* no outputs */
+ : "a" (EMODPE),
+ "b" (&secinfo),
+ "c" (op->epc_addr)
+ : "memory" /* read from secinfo pointer */);
+}
+
+static void do_encl_eaccept(void *_op)
+{
+ struct sgx_secinfo secinfo __aligned(sizeof(struct sgx_secinfo)) = {0};
+ struct encl_op_eaccept *op = _op;
+ int rax;
+
+ secinfo.flags = op->flags;
+
+ asm volatile(".byte 0x0f, 0x01, 0xd7"
+ : "=a" (rax)
+ : "a" (EACCEPT),
+ "b" (&secinfo),
+ "c" (op->epc_addr)
+ : "memory" /* read from secinfo pointer */);
+
+ op->ret = rax;
+}
+
+static void *memcpy(void *dest, const void *src, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ ((char *)dest)[i] = ((char *)src)[i];
+
+ return dest;
+}
+
+static void *memset(void *dest, int c, size_t n)
+{
+ size_t i;
+
+ for (i = 0; i < n; i++)
+ ((char *)dest)[i] = c;
+
+ return dest;
+}
+
+static void do_encl_init_tcs_page(void *_op)
+{
+ struct encl_op_init_tcs_page *op = _op;
+ void *tcs = (void *)op->tcs_page;
+ uint32_t val_32;
+
+ memset(tcs, 0, 16); /* STATE and FLAGS */
+ memcpy(tcs + 16, &op->ssa, 8); /* OSSA */
+ memset(tcs + 24, 0, 4); /* CSSA */
+ val_32 = 1;
+ memcpy(tcs + 28, &val_32, 4); /* NSSA */
+ memcpy(tcs + 32, &op->entry, 8); /* OENTRY */
+ memset(tcs + 40, 0, 24); /* AEP, OFSBASE, OGSBASE */
+ val_32 = 0xFFFFFFFF;
+ memcpy(tcs + 64, &val_32, 4); /* FSLIMIT */
+ memcpy(tcs + 68, &val_32, 4); /* GSLIMIT */
+ memset(tcs + 72, 0, 4024); /* Reserved */
+}
+
+static void do_encl_op_put_to_buf(void *op)
+{
+ struct encl_op_put_to_buf *op2 = op;
+
+ memcpy(&encl_buffer[0], &op2->value, 8);
+}
+
+static void do_encl_op_get_from_buf(void *op)
+{
+ struct encl_op_get_from_buf *op2 = op;
+
+ memcpy(&op2->value, &encl_buffer[0], 8);
+}
+
+static void do_encl_op_put_to_addr(void *_op)
+{
+ struct encl_op_put_to_addr *op = _op;
+
+ memcpy((void *)op->addr, &op->value, 8);
+}
+
+static void do_encl_op_get_from_addr(void *_op)
+{
+ struct encl_op_get_from_addr *op = _op;
+
+ memcpy(&op->value, (void *)op->addr, 8);
+}
+
+static void do_encl_op_nop(void *_op)
+{
+
+}
+
+/*
+ * Symbol placed at the start of the enclave image by the linker script.
+ * Declare this extern symbol with visibility "hidden" to ensure the compiler
+ * does not access it through the GOT and generates position-independent
+ * addressing as __encl_base(%rip), so we can get the actual enclave base
+ * during runtime.
+ */
+extern const uint8_t __attribute__((visibility("hidden"))) __encl_base;
+
+typedef void (*encl_op_t)(void *);
+static const encl_op_t encl_op_array[ENCL_OP_MAX] = {
+ do_encl_op_put_to_buf,
+ do_encl_op_get_from_buf,
+ do_encl_op_put_to_addr,
+ do_encl_op_get_from_addr,
+ do_encl_op_nop,
+ do_encl_eaccept,
+ do_encl_emodpe,
+ do_encl_init_tcs_page,
+};
+
+void encl_body(void *rdi, void *rsi)
+{
+ struct encl_op_header *header = (struct encl_op_header *)rdi;
+ encl_op_t op;
+
+ if (header->type >= ENCL_OP_MAX)
+ return;
+
+ /*
+ * The enclave base address needs to be added, as this call site
+ * *cannot be* made rip-relative by the compiler, or fixed up by
+ * any other possible means.
+ */
+ op = ((uint64_t)&__encl_base) + encl_op_array[header->type];
+
+ (*op)(header);
+}
diff --git a/tools/testing/selftests/sgx/test_encl.lds b/tools/testing/selftests/sgx/test_encl.lds
new file mode 100644
index 000000000000..ffe851a1cac4
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl.lds
@@ -0,0 +1,41 @@
+OUTPUT_FORMAT(elf64-x86-64)
+
+PHDRS
+{
+ tcs PT_LOAD;
+ text PT_LOAD;
+ data PT_LOAD;
+}
+
+SECTIONS
+{
+ . = 0;
+ __encl_base = .;
+ .tcs : {
+ *(.tcs*)
+ } : tcs
+
+ . = ALIGN(4096);
+ .text : {
+ *(.text*)
+ *(.rodata*)
+ FILL(0xDEADBEEF);
+ . = ALIGN(4096);
+ } : text
+
+ .data : {
+ *(.data.encl_buffer)
+ *(.data*)
+ } : data
+
+ /DISCARD/ : {
+ *(.comment*)
+ *(.note*)
+ *(.debug*)
+ *(.eh_frame*)
+ *(.dyn*)
+ *(.gnu.hash)
+ }
+}
+
+ASSERT(!DEFINED(_GLOBAL_OFFSET_TABLE_), "Libcalls through GOT are not supported in enclaves")
diff --git a/tools/testing/selftests/sgx/test_encl_bootstrap.S b/tools/testing/selftests/sgx/test_encl_bootstrap.S
new file mode 100644
index 000000000000..d8c4ac94e032
--- /dev/null
+++ b/tools/testing/selftests/sgx/test_encl_bootstrap.S
@@ -0,0 +1,92 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright(c) 2016-20 Intel Corporation.
+ */
+
+ .macro ENCLU
+ .byte 0x0f, 0x01, 0xd7
+ .endm
+
+ .section ".tcs", "aw"
+ .balign 4096
+
+ .fill 1, 8, 0 # STATE (set by CPU)
+ .fill 1, 8, 0 # FLAGS
+ .quad encl_ssa_tcs1 # OSSA
+ .fill 1, 4, 0 # CSSA (set by CPU)
+ .fill 1, 4, 1 # NSSA
+ .quad encl_entry # OENTRY
+ .fill 1, 8, 0 # AEP (set by EENTER and ERESUME)
+ .fill 1, 8, 0 # OFSBASE
+ .fill 1, 8, 0 # OGSBASE
+ .fill 1, 4, 0xFFFFFFFF # FSLIMIT
+ .fill 1, 4, 0xFFFFFFFF # GSLIMIT
+ .fill 4024, 1, 0 # Reserved
+
+ # TCS2
+ .fill 1, 8, 0 # STATE (set by CPU)
+ .fill 1, 8, 0 # FLAGS
+ .quad encl_ssa_tcs2 # OSSA
+ .fill 1, 4, 0 # CSSA (set by CPU)
+ .fill 1, 4, 1 # NSSA
+ .quad encl_entry # OENTRY
+ .fill 1, 8, 0 # AEP (set by EENTER and ERESUME)
+ .fill 1, 8, 0 # OFSBASE
+ .fill 1, 8, 0 # OGSBASE
+ .fill 1, 4, 0xFFFFFFFF # FSLIMIT
+ .fill 1, 4, 0xFFFFFFFF # GSLIMIT
+ .fill 4024, 1, 0 # Reserved
+
+ .text
+
+encl_entry:
+ # RBX contains the base address for TCS, which is the first address
+ # inside the enclave for TCS #1 and one page into the enclave for
+ # TCS #2. First make it relative by substracting __encl_base and
+ # then add the address of encl_stack to get the address for the stack.
+ lea __encl_base(%rip), %rax
+ sub %rax, %rbx
+ lea encl_stack(%rip), %rax
+ add %rbx, %rax
+ jmp encl_entry_core
+encl_dyn_entry:
+ # Entry point for dynamically created TCS page expected to follow
+ # its stack directly.
+ lea -1(%rbx), %rax
+encl_entry_core:
+ xchg %rsp, %rax
+ push %rax
+
+ push %rcx # push the address after EENTER
+
+ # NOTE: as the selftest enclave is *not* intended for production,
+ # simplify the code by not initializing ABI registers on entry or
+ # cleansing caller-save registers on exit.
+ call encl_body
+
+ # Prepare EEXIT target by popping the address of the instruction after
+ # EENTER to RBX.
+ pop %rbx
+
+ # Restore the caller stack.
+ pop %rax
+ mov %rax, %rsp
+
+ # EEXIT
+ mov $4, %rax
+ enclu
+
+ .section ".data", "aw"
+
+encl_ssa_tcs1:
+ .space 4096
+encl_ssa_tcs2:
+ .space 4096
+
+ .balign 4096
+ # Stack of TCS #1
+ .space 4096
+encl_stack:
+ .balign 4096
+ # Stack of TCS #2
+ .space 4096
diff --git a/tools/testing/selftests/sigaltstack/current_stack_pointer.h b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
new file mode 100644
index 000000000000..ea9bdf3a90b1
--- /dev/null
+++ b/tools/testing/selftests/sigaltstack/current_stack_pointer.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#if __alpha__
+register unsigned long sp asm("$30");
+#elif __arm__ || __aarch64__ || __csky__ || __m68k__ || __mips__ || __riscv
+register unsigned long sp asm("sp");
+#elif __i386__
+register unsigned long sp asm("esp");
+#elif __loongarch64
+register unsigned long sp asm("$sp");
+#elif __ppc__
+register unsigned long sp asm("r1");
+#elif __s390x__
+register unsigned long sp asm("%15");
+#elif __sh__
+register unsigned long sp asm("r15");
+#elif __x86_64__
+register unsigned long sp asm("rsp");
+#elif __XTENSA__
+register unsigned long sp asm("a1");
+#else
+#error "implement current_stack_pointer equivalent"
+#endif
diff --git a/tools/testing/selftests/sigaltstack/sas.c b/tools/testing/selftests/sigaltstack/sas.c
index ad0f8df2ca0a..07227fab1cc9 100644
--- a/tools/testing/selftests/sigaltstack/sas.c
+++ b/tools/testing/selftests/sigaltstack/sas.c
@@ -17,13 +17,20 @@
#include <string.h>
#include <assert.h>
#include <errno.h>
+#include <sys/auxv.h>
#include "../kselftest.h"
+#include "current_stack_pointer.h"
#ifndef SS_AUTODISARM
#define SS_AUTODISARM (1U << 31)
#endif
+#ifndef AT_MINSIGSTKSZ
+#define AT_MINSIGSTKSZ 51
+#endif
+
+static unsigned int stack_size;
static void *sstack, *ustack;
static ucontext_t uc, sc;
static const char *msg = "[OK]\tStack preserved";
@@ -40,14 +47,8 @@ void my_usr1(int sig, siginfo_t *si, void *u)
stack_t stk;
struct stk_data *p;
-#if __s390x__
- register unsigned long sp asm("%15");
-#else
- register unsigned long sp asm("sp");
-#endif
-
if (sp < (unsigned long)sstack ||
- sp >= (unsigned long)sstack + SIGSTKSZ) {
+ sp >= (unsigned long)sstack + stack_size) {
ksft_exit_fail_msg("SP is not on sigaltstack\n");
}
/* put some data on stack. other sighandler will try to overwrite it */
@@ -71,7 +72,7 @@ void my_usr1(int sig, siginfo_t *si, void *u)
swapcontext(&sc, &uc);
ksft_print_msg("%s\n", p->msg);
if (!p->flag) {
- ksft_exit_skip("[RUN]\tAborting\n");
+ ksft_exit_fail_msg("[RUN]\tAborting\n");
exit(EXIT_FAILURE);
}
}
@@ -108,6 +109,10 @@ int main(void)
stack_t stk;
int err;
+ /* Make sure more than the required minimum. */
+ stack_size = getauxval(AT_MINSIGSTKSZ) + SIGSTKSZ;
+ ksft_print_msg("[NOTE]\tthe stack size is %u\n", stack_size);
+
ksft_print_header();
ksft_set_plan(3);
@@ -117,7 +122,7 @@ int main(void)
sigaction(SIGUSR1, &act, NULL);
act.sa_sigaction = my_usr2;
sigaction(SIGUSR2, &act, NULL);
- sstack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ sstack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (sstack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -139,12 +144,12 @@ int main(void)
}
stk.ss_sp = sstack;
- stk.ss_size = SIGSTKSZ;
+ stk.ss_size = stack_size;
stk.ss_flags = SS_ONSTACK | SS_AUTODISARM;
err = sigaltstack(&stk, NULL);
if (err) {
if (errno == EINVAL) {
- ksft_exit_skip(
+ ksft_test_result_skip(
"[NOTE]\tThe running kernel doesn't support SS_AUTODISARM\n");
/*
* If test cases for the !SS_AUTODISARM variant were
@@ -161,7 +166,7 @@ int main(void)
}
}
- ustack = mmap(NULL, SIGSTKSZ, PROT_READ | PROT_WRITE,
+ ustack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
if (ustack == MAP_FAILED) {
ksft_exit_fail_msg("mmap() - %s\n", strerror(errno));
@@ -170,7 +175,7 @@ int main(void)
getcontext(&uc);
uc.uc_link = NULL;
uc.uc_stack.ss_sp = ustack;
- uc.uc_stack.ss_size = SIGSTKSZ;
+ uc.uc_stack.ss_size = stack_size;
makecontext(&uc, switch_fn, 0);
raise(SIGUSR1);
diff --git a/tools/testing/selftests/sparc64/drivers/adi-test.c b/tools/testing/selftests/sparc64/drivers/adi-test.c
index 95d93c6a88a5..84e5d9fd20b0 100644
--- a/tools/testing/selftests/sparc64/drivers/adi-test.c
+++ b/tools/testing/selftests/sparc64/drivers/adi-test.c
@@ -24,10 +24,6 @@
#define DEBUG_LEVEL_4_BIT (0x0008)
#define DEBUG_TIMING_BIT (0x1000)
-#ifndef ARRAY_SIZE
-# define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
-#endif
-
/* bit mask of enabled bits to print */
#define DEBUG 0x0001
diff --git a/tools/testing/selftests/splice/.gitignore b/tools/testing/selftests/splice/.gitignore
index d5a2da428752..be8266f5d04c 100644
--- a/tools/testing/selftests/splice/.gitignore
+++ b/tools/testing/selftests/splice/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
default_file_splice_read
+splice_read
diff --git a/tools/testing/selftests/splice/Makefile b/tools/testing/selftests/splice/Makefile
index e519b159b60d..541cd826d5a5 100644
--- a/tools/testing/selftests/splice/Makefile
+++ b/tools/testing/selftests/splice/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: GPL-2.0
-TEST_PROGS := default_file_splice_read.sh
-TEST_GEN_PROGS_EXTENDED := default_file_splice_read
+TEST_PROGS := default_file_splice_read.sh short_splice_read.sh
+TEST_GEN_PROGS_EXTENDED := default_file_splice_read splice_read
include ../lib.mk
diff --git a/tools/testing/selftests/splice/config b/tools/testing/selftests/splice/config
new file mode 100644
index 000000000000..058c928368b8
--- /dev/null
+++ b/tools/testing/selftests/splice/config
@@ -0,0 +1 @@
+CONFIG_TEST_LKM=m
diff --git a/tools/testing/selftests/splice/settings b/tools/testing/selftests/splice/settings
new file mode 100644
index 000000000000..89cedfc0d12b
--- /dev/null
+++ b/tools/testing/selftests/splice/settings
@@ -0,0 +1 @@
+timeout=5
diff --git a/tools/testing/selftests/splice/short_splice_read.sh b/tools/testing/selftests/splice/short_splice_read.sh
new file mode 100755
index 000000000000..4710e09f49fa
--- /dev/null
+++ b/tools/testing/selftests/splice/short_splice_read.sh
@@ -0,0 +1,133 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test for mishandling of splice() on pseudofilesystems, which should catch
+# bugs like 11990a5bd7e5 ("module: Correctly truncate sysfs sections output")
+#
+# Since splice fallback was removed as part of the set_fs() rework, many of these
+# tests expect to fail now. See https://lore.kernel.org/lkml/202009181443.C2179FB@keescook/
+set -e
+
+DIR=$(dirname "$0")
+
+ret=0
+
+expect_success()
+{
+ title="$1"
+ shift
+
+ echo "" >&2
+ echo "$title ..." >&2
+
+ set +e
+ "$@"
+ rc=$?
+ set -e
+
+ case "$rc" in
+ 0)
+ echo "ok: $title succeeded" >&2
+ ;;
+ 1)
+ echo "FAIL: $title should work" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ *)
+ echo "FAIL: something else went wrong" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ esac
+}
+
+expect_failure()
+{
+ title="$1"
+ shift
+
+ echo "" >&2
+ echo "$title ..." >&2
+
+ set +e
+ "$@"
+ rc=$?
+ set -e
+
+ case "$rc" in
+ 0)
+ echo "FAIL: $title unexpectedly worked" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ 1)
+ echo "ok: $title correctly failed" >&2
+ ;;
+ *)
+ echo "FAIL: something else went wrong" >&2
+ ret=$(( ret + 1 ))
+ ;;
+ esac
+}
+
+do_splice()
+{
+ filename="$1"
+ bytes="$2"
+ expected="$3"
+ report="$4"
+
+ out=$("$DIR"/splice_read "$filename" "$bytes" | cat)
+ if [ "$out" = "$expected" ] ; then
+ echo " matched $report" >&2
+ return 0
+ else
+ echo " no match: '$out' vs $report" >&2
+ return 1
+ fi
+}
+
+test_splice()
+{
+ filename="$1"
+
+ echo " checking $filename ..." >&2
+
+ full=$(cat "$filename")
+ rc=$?
+ if [ $rc -ne 0 ] ; then
+ return 2
+ fi
+
+ two=$(echo "$full" | grep -m1 . | cut -c-2)
+
+ # Make sure full splice has the same contents as a standard read.
+ echo " splicing 4096 bytes ..." >&2
+ if ! do_splice "$filename" 4096 "$full" "full read" ; then
+ return 1
+ fi
+
+ # Make sure a partial splice see the first two characters.
+ echo " splicing 2 bytes ..." >&2
+ if ! do_splice "$filename" 2 "$two" "'$two'" ; then
+ return 1
+ fi
+
+ return 0
+}
+
+### /proc/$pid/ has no splice interface; these should all fail.
+expect_failure "proc_single_open(), seq_read() splice" test_splice /proc/$$/limits
+expect_failure "special open(), seq_read() splice" test_splice /proc/$$/comm
+
+### /proc/sys/ has a splice interface; these should all succeed.
+expect_success "proc_handler: proc_dointvec_minmax() splice" test_splice /proc/sys/fs/nr_open
+expect_success "proc_handler: proc_dostring() splice" test_splice /proc/sys/kernel/modprobe
+expect_success "proc_handler: special read splice" test_splice /proc/sys/kernel/version
+
+### /sys/ has no splice interface; these should all fail.
+if ! [ -d /sys/module/test_module/sections ] ; then
+ expect_success "test_module kernel module load" modprobe test_module
+fi
+expect_success "kernfs attr splice" test_splice /sys/module/test_module/coresize
+expect_success "kernfs binattr splice" test_splice /sys/module/test_module/sections/.init.text
+
+exit $ret
diff --git a/tools/testing/selftests/splice/splice_read.c b/tools/testing/selftests/splice/splice_read.c
new file mode 100644
index 000000000000..46dae6a25cfb
--- /dev/null
+++ b/tools/testing/selftests/splice/splice_read.c
@@ -0,0 +1,57 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+
+int main(int argc, char *argv[])
+{
+ int fd;
+ size_t size;
+ ssize_t spliced;
+
+ if (argc < 2) {
+ fprintf(stderr, "Usage: %s INPUT [BYTES]\n", argv[0]);
+ return EXIT_FAILURE;
+ }
+
+ fd = open(argv[1], O_RDONLY);
+ if (fd < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (argc == 3)
+ size = atol(argv[2]);
+ else {
+ struct stat statbuf;
+
+ if (fstat(fd, &statbuf) < 0) {
+ perror(argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ if (statbuf.st_size > INT_MAX) {
+ fprintf(stderr, "%s: Too big\n", argv[1]);
+ return EXIT_FAILURE;
+ }
+
+ size = statbuf.st_size;
+ }
+
+ /* splice(2) file to stdout. */
+ spliced = splice(fd, NULL, STDOUT_FILENO, NULL,
+ size, SPLICE_F_MOVE);
+ if (spliced < 0) {
+ perror("splice");
+ return EXIT_FAILURE;
+ }
+
+ close(fd);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/static_keys/test_static_keys.sh b/tools/testing/selftests/static_keys/test_static_keys.sh
index fc9f8cde7d42..3b0f17b81ac2 100755
--- a/tools/testing/selftests/static_keys/test_static_keys.sh
+++ b/tools/testing/selftests/static_keys/test_static_keys.sh
@@ -6,18 +6,18 @@
ksft_skip=4
if ! /sbin/modprobe -q -n test_static_key_base; then
- echo "static_key: module test_static_key_base is not found [SKIP]"
+ echo "static_keys: module test_static_key_base is not found [SKIP]"
exit $ksft_skip
fi
if ! /sbin/modprobe -q -n test_static_keys; then
- echo "static_key: module test_static_keys is not found [SKIP]"
+ echo "static_keys: module test_static_keys is not found [SKIP]"
exit $ksft_skip
fi
if /sbin/modprobe -q test_static_key_base; then
if /sbin/modprobe -q test_static_keys; then
- echo "static_key: ok"
+ echo "static_keys: ok"
/sbin/modprobe -q -r test_static_keys
/sbin/modprobe -q -r test_static_key_base
else
@@ -25,6 +25,6 @@ if /sbin/modprobe -q test_static_key_base; then
/sbin/modprobe -q -r test_static_key_base
fi
else
- echo "static_key: [FAIL]"
+ echo "static_keys: [FAIL]"
exit 1
fi
diff --git a/tools/testing/selftests/sync/Makefile b/tools/testing/selftests/sync/Makefile
index d0121a8a3523..df0f91bf6890 100644
--- a/tools/testing/selftests/sync/Makefile
+++ b/tools/testing/selftests/sync/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -O2 -g -std=gnu89 -pthread -Wall -Wextra
-CFLAGS += -I../../../../usr/include/
+CFLAGS += $(KHDR_INCLUDES)
LDFLAGS += -pthread
.PHONY: all clean
diff --git a/tools/testing/selftests/sync/config b/tools/testing/selftests/sync/config
index 1ab7e8130db2..64c60f38b446 100644
--- a/tools/testing/selftests/sync/config
+++ b/tools/testing/selftests/sync/config
@@ -1,4 +1,2 @@
CONFIG_STAGING=y
-CONFIG_ANDROID=y
-CONFIG_SYNC=y
CONFIG_SW_SYNC=y
diff --git a/tools/testing/selftests/sync/sync_test.c b/tools/testing/selftests/sync/sync_test.c
index 3824b66f41a0..414a617db993 100644
--- a/tools/testing/selftests/sync/sync_test.c
+++ b/tools/testing/selftests/sync/sync_test.c
@@ -86,9 +86,9 @@ int main(void)
int err;
ksft_print_header();
- ksft_set_plan(3 + 7);
sync_api_supported();
+ ksft_set_plan(3 + 7);
ksft_print_msg("[RUN]\tTesting sync framework\n");
diff --git a/tools/testing/selftests/syscall_user_dispatch/.gitignore b/tools/testing/selftests/syscall_user_dispatch/.gitignore
new file mode 100644
index 000000000000..f539615ad5da
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/.gitignore
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0-only
+sud_test
+sud_benchmark
diff --git a/tools/testing/selftests/syscall_user_dispatch/Makefile b/tools/testing/selftests/syscall_user_dispatch/Makefile
new file mode 100644
index 000000000000..03c120270953
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+top_srcdir = ../../../..
+INSTALL_HDR_PATH = $(top_srcdir)/usr
+LINUX_HDR_PATH = $(INSTALL_HDR_PATH)/include/
+
+CFLAGS += -Wall -I$(LINUX_HDR_PATH)
+
+TEST_GEN_PROGS := sud_test sud_benchmark
+include ../lib.mk
diff --git a/tools/testing/selftests/syscall_user_dispatch/config b/tools/testing/selftests/syscall_user_dispatch/config
new file mode 100644
index 000000000000..039e303e59d7
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/config
@@ -0,0 +1 @@
+CONFIG_GENERIC_ENTRY=y
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
new file mode 100644
index 000000000000..073a03702ff5
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_benchmark.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Benchmark and test syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <signal.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/sysinfo.h>
+#include <sys/prctl.h>
+#include <sys/syscall.h>
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
+#endif
+
+/*
+ * To test returning from a sigsys with selector blocked, the test
+ * requires some per-architecture support (i.e. knowledge about the
+ * signal trampoline address). On i386, we know it is on the vdso, and
+ * a small trampoline is open-coded for x86_64. Other architectures
+ * that have a trampoline in the vdso will support TEST_BLOCKED_RETURN
+ * out of the box, but don't enable them until they support syscall user
+ * dispatch.
+ */
+#if defined(__x86_64__) || defined(__i386__)
+#define TEST_BLOCKED_RETURN
+#endif
+
+#ifdef __x86_64__
+void* (syscall_dispatcher_start)(void);
+void* (syscall_dispatcher_end)(void);
+#else
+unsigned long syscall_dispatcher_start = 0;
+unsigned long syscall_dispatcher_end = 0;
+#endif
+
+unsigned long trapped_call_count = 0;
+unsigned long native_call_count = 0;
+
+char selector;
+#define SYSCALL_BLOCK (selector = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_UNBLOCK (selector = SYSCALL_DISPATCH_FILTER_ALLOW)
+
+#define CALIBRATION_STEP 100000
+#define CALIBRATE_TO_SECS 5
+int factor;
+
+static double one_sysinfo_step(void)
+{
+ struct timespec t1, t2;
+ int i;
+ struct sysinfo info;
+
+ clock_gettime(CLOCK_MONOTONIC, &t1);
+ for (i = 0; i < CALIBRATION_STEP; i++)
+ sysinfo(&info);
+ clock_gettime(CLOCK_MONOTONIC, &t2);
+ return (t2.tv_sec - t1.tv_sec) + 1.0e-9 * (t2.tv_nsec - t1.tv_nsec);
+}
+
+static void calibrate_set(void)
+{
+ double elapsed = 0;
+
+ printf("Calibrating test set to last ~%d seconds...\n", CALIBRATE_TO_SECS);
+
+ while (elapsed < 1) {
+ elapsed += one_sysinfo_step();
+ factor += CALIBRATE_TO_SECS;
+ }
+
+ printf("test iterations = %d\n", CALIBRATION_STEP * factor);
+}
+
+static double perf_syscall(void)
+{
+ unsigned int i;
+ double partial = 0;
+
+ for (i = 0; i < factor; ++i)
+ partial += one_sysinfo_step()/(CALIBRATION_STEP*factor);
+ return partial;
+}
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+ char buf[1024];
+ int len;
+
+ SYSCALL_UNBLOCK;
+
+ /* printf and friends are not signal-safe. */
+ len = snprintf(buf, 1024, "Caught sys_%x\n", info->si_syscall);
+ write(1, buf, len);
+
+ if (info->si_syscall == MAGIC_SYSCALL_1)
+ trapped_call_count++;
+ else
+ native_call_count++;
+
+#ifdef TEST_BLOCKED_RETURN
+ SYSCALL_BLOCK;
+#endif
+
+#ifdef __x86_64__
+ __asm__ volatile("movq $0xf, %rax");
+ __asm__ volatile("leaveq");
+ __asm__ volatile("add $0x8, %rsp");
+ __asm__ volatile("syscall_dispatcher_start:");
+ __asm__ volatile("syscall");
+ __asm__ volatile("nop"); /* Landing pad within dispatcher area */
+ __asm__ volatile("syscall_dispatcher_end:");
+#endif
+
+}
+
+int main(void)
+{
+ struct sigaction act;
+ double time1, time2;
+ int ret;
+ sigset_t mask;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+
+ act.sa_sigaction = handle_sigsys;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_mask = mask;
+
+ calibrate_set();
+
+ time1 = perf_syscall();
+ printf("Avg syscall time %.0lfns.\n", time1 * 1.0e9);
+
+ ret = sigaction(SIGSYS, &act, NULL);
+ if (ret) {
+ perror("Error sigaction:");
+ exit(-1);
+ }
+
+ fprintf(stderr, "Enabling syscall trapping.\n");
+
+ if (prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON,
+ syscall_dispatcher_start,
+ (syscall_dispatcher_end - syscall_dispatcher_start + 1),
+ &selector)) {
+ perror("prctl failed\n");
+ exit(-1);
+ }
+
+ SYSCALL_BLOCK;
+ syscall(MAGIC_SYSCALL_1);
+
+#ifdef TEST_BLOCKED_RETURN
+ if (selector == SYSCALL_DISPATCH_FILTER_ALLOW) {
+ fprintf(stderr, "Failed to return with selector blocked.\n");
+ exit(-1);
+ }
+#endif
+
+ SYSCALL_UNBLOCK;
+
+ if (!trapped_call_count) {
+ fprintf(stderr, "syscall trapping does not work.\n");
+ exit(-1);
+ }
+
+ time2 = perf_syscall();
+
+ if (native_call_count) {
+ perror("syscall trapping intercepted more syscalls than expected\n");
+ exit(-1);
+ }
+
+ printf("trapped_call_count %lu, native_call_count %lu.\n",
+ trapped_call_count, native_call_count);
+ printf("Avg syscall time %.0lfns.\n", time2 * 1.0e9);
+ printf("Interception overhead: %.1lf%% (+%.0lfns).\n",
+ 100.0 * (time2 / time1 - 1.0), 1.0e9 * (time2 - time1));
+ return 0;
+
+}
diff --git a/tools/testing/selftests/syscall_user_dispatch/sud_test.c b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
new file mode 100644
index 000000000000..b5d592d4099e
--- /dev/null
+++ b/tools/testing/selftests/syscall_user_dispatch/sud_test.c
@@ -0,0 +1,312 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 Collabora Ltd.
+ *
+ * Test code for syscall user dispatch
+ */
+
+#define _GNU_SOURCE
+#include <sys/prctl.h>
+#include <sys/sysinfo.h>
+#include <sys/syscall.h>
+#include <signal.h>
+
+#include <asm/unistd.h>
+#include "../kselftest_harness.h"
+
+#ifndef PR_SET_SYSCALL_USER_DISPATCH
+# define PR_SET_SYSCALL_USER_DISPATCH 59
+# define PR_SYS_DISPATCH_OFF 0
+# define PR_SYS_DISPATCH_ON 1
+# define SYSCALL_DISPATCH_FILTER_ALLOW 0
+# define SYSCALL_DISPATCH_FILTER_BLOCK 1
+#endif
+
+#ifndef SYS_USER_DISPATCH
+# define SYS_USER_DISPATCH 2
+#endif
+
+#ifdef __NR_syscalls
+# define MAGIC_SYSCALL_1 (__NR_syscalls + 1) /* Bad Linux syscall number */
+#else
+# define MAGIC_SYSCALL_1 (0xff00) /* Bad Linux syscall number */
+#endif
+
+#define SYSCALL_DISPATCH_ON(x) ((x) = SYSCALL_DISPATCH_FILTER_BLOCK)
+#define SYSCALL_DISPATCH_OFF(x) ((x) = SYSCALL_DISPATCH_FILTER_ALLOW)
+
+/* Test Summary:
+ *
+ * - dispatch_trigger_sigsys: Verify if PR_SET_SYSCALL_USER_DISPATCH is
+ * able to trigger SIGSYS on a syscall.
+ *
+ * - bad_selector: Test that a bad selector value triggers SIGSYS with
+ * si_errno EINVAL.
+ *
+ * - bad_prctl_param: Test that the API correctly rejects invalid
+ * parameters on prctl
+ *
+ * - dispatch_and_return: Test that a syscall is selectively dispatched
+ * to userspace depending on the value of selector.
+ *
+ * - disable_dispatch: Test that the PR_SYS_DISPATCH_OFF correctly
+ * disables the dispatcher
+ *
+ * - direct_dispatch_range: Test that a syscall within the allowed range
+ * can bypass the dispatcher.
+ */
+
+TEST_SIGNAL(dispatch_trigger_sigsys, SIGSYS)
+{
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
+ struct sysinfo info;
+ int ret;
+
+ ret = sysinfo(&info);
+ ASSERT_EQ(0, ret);
+
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+ }
+
+ SYSCALL_DISPATCH_ON(sel);
+
+ sysinfo(&info);
+
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+TEST(bad_prctl_param)
+{
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
+ int op;
+
+ /* Invalid op */
+ op = -1;
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0, 0, &sel);
+ ASSERT_EQ(EINVAL, errno);
+
+ /* PR_SYS_DISPATCH_OFF */
+ op = PR_SYS_DISPATCH_OFF;
+
+ /* offset != 0 */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, 0);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* len != 0 */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0xff, 0);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* sel != NULL */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, &sel);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Valid parameter */
+ errno = 0;
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x0, 0x0);
+ EXPECT_EQ(0, errno);
+
+ /* PR_SYS_DISPATCH_ON */
+ op = PR_SYS_DISPATCH_ON;
+
+ /* Dispatcher region is bad (offset > 0 && len == 0) */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x1, 0x0, &sel);
+ EXPECT_EQ(EINVAL, errno);
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, -1L, 0x0, &sel);
+ EXPECT_EQ(EINVAL, errno);
+
+ /* Invalid selector */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, op, 0x0, 0x1, (void *) -1);
+ ASSERT_EQ(EFAULT, errno);
+
+ /*
+ * Dispatcher range overflows unsigned long
+ */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 1, -1L, &sel);
+ ASSERT_EQ(EINVAL, errno) {
+ TH_LOG("Should reject bad syscall range");
+ }
+
+ /*
+ * Allowed range overflows usigned long
+ */
+ prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, -1L, 0x1, &sel);
+ ASSERT_EQ(EINVAL, errno) {
+ TH_LOG("Should reject bad syscall range");
+ }
+}
+
+/*
+ * Use global selector for handle_sigsys tests, to avoid passing
+ * selector to signal handler
+ */
+char glob_sel;
+int nr_syscalls_emulated;
+int si_code;
+int si_errno;
+
+static void handle_sigsys(int sig, siginfo_t *info, void *ucontext)
+{
+ si_code = info->si_code;
+ si_errno = info->si_errno;
+
+ if (info->si_syscall == MAGIC_SYSCALL_1)
+ nr_syscalls_emulated++;
+
+ /* In preparation for sigreturn. */
+ SYSCALL_DISPATCH_OFF(glob_sel);
+}
+
+TEST(dispatch_and_return)
+{
+ long ret;
+ struct sigaction act;
+ sigset_t mask;
+
+ glob_sel = 0;
+ nr_syscalls_emulated = 0;
+ si_code = 0;
+ si_errno = 0;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+
+ act.sa_sigaction = handle_sigsys;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_mask = mask;
+
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret);
+
+ /* Make sure selector is good prior to prctl. */
+ SYSCALL_DISPATCH_OFF(glob_sel);
+
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+ }
+
+ /* MAGIC_SYSCALL_1 doesn't exist. */
+ SYSCALL_DISPATCH_OFF(glob_sel);
+ ret = syscall(MAGIC_SYSCALL_1);
+ EXPECT_EQ(-1, ret) {
+ TH_LOG("Dispatch triggered unexpectedly");
+ }
+
+ /* MAGIC_SYSCALL_1 should be emulated. */
+ nr_syscalls_emulated = 0;
+ SYSCALL_DISPATCH_ON(glob_sel);
+
+ ret = syscall(MAGIC_SYSCALL_1);
+ EXPECT_EQ(MAGIC_SYSCALL_1, ret) {
+ TH_LOG("Failed to intercept syscall");
+ }
+ EXPECT_EQ(1, nr_syscalls_emulated) {
+ TH_LOG("Failed to emulate syscall");
+ }
+ ASSERT_EQ(SYS_USER_DISPATCH, si_code) {
+ TH_LOG("Bad si_code in SIGSYS");
+ }
+ ASSERT_EQ(0, si_errno) {
+ TH_LOG("Bad si_errno in SIGSYS");
+ }
+}
+
+TEST_SIGNAL(bad_selector, SIGSYS)
+{
+ long ret;
+ struct sigaction act;
+ sigset_t mask;
+ struct sysinfo info;
+
+ glob_sel = SYSCALL_DISPATCH_FILTER_ALLOW;
+ nr_syscalls_emulated = 0;
+ si_code = 0;
+ si_errno = 0;
+
+ memset(&act, 0, sizeof(act));
+ sigemptyset(&mask);
+
+ act.sa_sigaction = handle_sigsys;
+ act.sa_flags = SA_SIGINFO;
+ act.sa_mask = mask;
+
+ ret = sigaction(SIGSYS, &act, NULL);
+ ASSERT_EQ(0, ret);
+
+ /* Make sure selector is good prior to prctl. */
+ SYSCALL_DISPATCH_OFF(glob_sel);
+
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &glob_sel);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+ }
+
+ glob_sel = -1;
+
+ sysinfo(&info);
+
+ /* Even though it is ready to catch SIGSYS, the signal is
+ * supposed to be uncatchable.
+ */
+
+ EXPECT_FALSE(true) {
+ TH_LOG("Unreachable!");
+ }
+}
+
+TEST(disable_dispatch)
+{
+ int ret;
+ struct sysinfo info;
+ char sel = 0;
+
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, 0, &sel);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+ }
+
+ /* MAGIC_SYSCALL_1 doesn't exist. */
+ SYSCALL_DISPATCH_OFF(glob_sel);
+
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_OFF, 0, 0, 0);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Failed to unset syscall user dispatch");
+ }
+
+ /* Shouldn't have any effect... */
+ SYSCALL_DISPATCH_ON(glob_sel);
+
+ ret = syscall(__NR_sysinfo, &info);
+ EXPECT_EQ(0, ret) {
+ TH_LOG("Dispatch triggered unexpectedly");
+ }
+}
+
+TEST(direct_dispatch_range)
+{
+ int ret = 0;
+ struct sysinfo info;
+ char sel = SYSCALL_DISPATCH_FILTER_ALLOW;
+
+ /*
+ * Instead of calculating libc addresses; allow the entire
+ * memory map and lock the selector.
+ */
+ ret = prctl(PR_SET_SYSCALL_USER_DISPATCH, PR_SYS_DISPATCH_ON, 0, -1L, &sel);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Kernel does not support CONFIG_SYSCALL_USER_DISPATCH");
+ }
+
+ SYSCALL_DISPATCH_ON(sel);
+
+ ret = sysinfo(&info);
+ ASSERT_EQ(0, ret) {
+ TH_LOG("Dispatch triggered unexpectedly");
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/sysctl/sysctl.sh b/tools/testing/selftests/sysctl/sysctl.sh
index 19515dcb7d04..84472b436c07 100755
--- a/tools/testing/selftests/sysctl/sysctl.sh
+++ b/tools/testing/selftests/sysctl/sysctl.sh
@@ -1,16 +1,6 @@
#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1
# Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org>
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the Free
-# Software Foundation; either version 2 of the License, or at your option any
-# later version; or, when distributed separately from the Linux kernel or
-# when incorporated into other software packages, subject to the following
-# license:
-#
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of copyleft-next (version 0.3.1 or later) as published
-# at http://copyleft-next.org/.
# This performs a series tests against the proc sysctl interface.
@@ -24,22 +14,28 @@ TEST_FILE=$(mktemp)
# This represents
#
-# TEST_ID:TEST_COUNT:ENABLED:TARGET
+# TEST_ID:TEST_COUNT:ENABLED:TARGET:SKIP_NO_TARGET
#
# TEST_ID: is the test id number
# TEST_COUNT: number of times we should run the test
# ENABLED: 1 if enabled, 0 otherwise
# TARGET: test target file required on the test_sysctl module
+# SKIP_NO_TARGET: 1 skip if TARGET not there
+# 0 run eventhough TARGET not there
#
# Once these are enabled please leave them as-is. Write your own test,
# we have tons of space.
-ALL_TESTS="0001:1:1:int_0001"
-ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001"
-ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002"
-ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001"
-ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003"
-ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001"
-ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int"
+ALL_TESTS="0001:1:1:int_0001:1"
+ALL_TESTS="$ALL_TESTS 0002:1:1:string_0001:1"
+ALL_TESTS="$ALL_TESTS 0003:1:1:int_0002:1"
+ALL_TESTS="$ALL_TESTS 0004:1:1:uint_0001:1"
+ALL_TESTS="$ALL_TESTS 0005:3:1:int_0003:1"
+ALL_TESTS="$ALL_TESTS 0006:50:1:bitmap_0001:1"
+ALL_TESTS="$ALL_TESTS 0007:1:1:boot_int:1"
+ALL_TESTS="$ALL_TESTS 0008:1:1:match_int:1"
+ALL_TESTS="$ALL_TESTS 0009:1:1:unregister_error:0"
+ALL_TESTS="$ALL_TESTS 0010:1:1:mnt/mnt_error:0"
+ALL_TESTS="$ALL_TESTS 0011:1:1:empty_add:0"
function allow_user_defaults()
{
@@ -68,7 +64,7 @@ function check_production_sysctl_writes_strict()
else
old_strict=$(cat ${WRITES_STRICT})
if [ "$old_strict" = "1" ]; then
- echo "ok"
+ echo "OK"
else
echo "FAIL, strict value is 0 but force to 1 to continue" >&2
echo "1" > ${WRITES_STRICT}
@@ -230,7 +226,7 @@ run_numerictests()
echo "FAIL" >&2
exit 1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Checking sysctl is not set to test value ... "
@@ -238,7 +234,7 @@ run_numerictests()
echo "FAIL" >&2
exit 1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Writing sysctl from shell ... "
@@ -247,7 +243,7 @@ run_numerictests()
echo "FAIL" >&2
exit 1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Resetting sysctl to original value ... "
@@ -256,7 +252,7 @@ run_numerictests()
echo "FAIL" >&2
exit 1
else
- echo "ok"
+ echo "OK"
fi
# Now that we've validated the sanity of "set_test" and "set_orig",
@@ -270,7 +266,7 @@ run_numerictests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Writing middle of sysctl after synchronized seek ... "
@@ -280,7 +276,7 @@ run_numerictests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Writing beyond end of sysctl ... "
@@ -290,7 +286,7 @@ run_numerictests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Writing sysctl with multiple long writes ... "
@@ -301,14 +297,14 @@ run_numerictests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
check_failure()
{
- echo -n "Testing that $1 fails as expected..."
+ echo -n "Testing that $1 fails as expected ... "
reset_vals
TEST_STR="$1"
orig="$(cat $TARGET)"
@@ -319,7 +315,7 @@ check_failure()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
@@ -361,7 +357,7 @@ run_wideint_tests()
# Your test must accept digits 3 and 4 to use this
run_limit_digit()
{
- echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ..."
+ echo -n "Checking ignoring spaces up to PAGE_SIZE works on write ... "
reset_vals
LIMIT=$((MAX_DIGITS -1))
@@ -373,11 +369,11 @@ run_limit_digit()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
- echo -n "Checking passing PAGE_SIZE of spaces fails on write ..."
+ echo -n "Checking passing PAGE_SIZE of spaces fails on write ... "
reset_vals
LIMIT=$((MAX_DIGITS))
@@ -389,7 +385,7 @@ run_limit_digit()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
@@ -397,7 +393,7 @@ run_limit_digit()
# You are using an int
run_limit_digit_int()
{
- echo -n "Testing INT_MAX works ..."
+ echo -n "Testing INT_MAX works ... "
reset_vals
TEST_STR="$INT_MAX"
echo -n $TEST_STR > $TARGET
@@ -406,11 +402,11 @@ run_limit_digit_int()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
- echo -n "Testing INT_MAX + 1 will fail as expected..."
+ echo -n "Testing INT_MAX + 1 will fail as expected ... "
reset_vals
let TEST_STR=$INT_MAX+1
echo -n $TEST_STR > $TARGET 2> /dev/null
@@ -419,11 +415,11 @@ run_limit_digit_int()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
- echo -n "Testing negative values will work as expected..."
+ echo -n "Testing negative values will work as expected ... "
reset_vals
TEST_STR="-3"
echo -n $TEST_STR > $TARGET 2> /dev/null
@@ -431,7 +427,7 @@ run_limit_digit_int()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
@@ -447,7 +443,7 @@ run_limit_digit_int_array()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
@@ -464,7 +460,7 @@ run_limit_digit_int_array()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
@@ -482,7 +478,7 @@ run_limit_digit_int_array()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
@@ -499,7 +495,7 @@ run_limit_digit_int_array()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
@@ -507,7 +503,7 @@ run_limit_digit_int_array()
# You are using an unsigned int
run_limit_digit_uint()
{
- echo -n "Testing UINT_MAX works ..."
+ echo -n "Testing UINT_MAX works ... "
reset_vals
TEST_STR="$UINT_MAX"
echo -n $TEST_STR > $TARGET
@@ -516,11 +512,11 @@ run_limit_digit_uint()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
- echo -n "Testing UINT_MAX + 1 will fail as expected..."
+ echo -n "Testing UINT_MAX + 1 will fail as expected ... "
reset_vals
TEST_STR=$(($UINT_MAX+1))
echo -n $TEST_STR > $TARGET 2> /dev/null
@@ -529,11 +525,11 @@ run_limit_digit_uint()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
- echo -n "Testing negative values will not work as expected ..."
+ echo -n "Testing negative values will not work as expected ... "
reset_vals
TEST_STR="-3"
echo -n $TEST_STR > $TARGET 2> /dev/null
@@ -542,7 +538,7 @@ run_limit_digit_uint()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
}
@@ -556,7 +552,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Writing middle of sysctl after unsynchronized seek ... "
@@ -566,7 +562,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Checking sysctl maxlen is at least $MAXLEN ... "
@@ -577,7 +573,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Checking sysctl keeps original string on overflow append ... "
@@ -588,7 +584,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Checking sysctl stays NULL terminated on write ... "
@@ -599,7 +595,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
echo -n "Checking sysctl stays NULL terminated on overwrite ... "
@@ -610,7 +606,7 @@ run_stringtests()
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
fi
test_rc
@@ -622,7 +618,6 @@ target_exists()
TEST_ID="$2"
if [ ! -f ${TARGET} ] ; then
- echo "Target for test $TEST_ID: $TARGET not exist, skipping test ..."
return 0
fi
return 1
@@ -656,7 +651,7 @@ run_bitmaptest() {
fi
done
- echo -n "Checking bitmap handler... "
+ echo -n "Checking bitmap handler ... "
TEST_FILE=$(mktemp)
echo -n "$TEST_STR" > $TEST_FILE
@@ -671,7 +666,7 @@ run_bitmaptest() {
echo "FAIL" >&2
rc=1
else
- echo "ok"
+ echo "OK"
rc=0
fi
test_rc
@@ -739,7 +734,7 @@ sysctl_test_0005()
sysctl_test_0006()
{
- TARGET="${SYSCTL}/bitmap_0001"
+ TARGET="${SYSCTL}/$(get_test_target 0006)"
reset_vals
ORIG=""
run_bitmaptest
@@ -747,44 +742,115 @@ sysctl_test_0006()
sysctl_test_0007()
{
- TARGET="${SYSCTL}/boot_int"
+ TARGET="${SYSCTL}/$(get_test_target 0007)"
+ echo -n "Testing if $TARGET is set to 1 ... "
+
if [ ! -f $TARGET ]; then
- echo "Skipping test for $TARGET as it is not present ..."
+ echo -e "SKIPPING\n$TARGET is not present"
return $ksft_skip
fi
if [ -d $DIR ]; then
- echo "Boot param test only possible sysctl_test is built-in, not module:"
+ echo -e "SKIPPING\nTest only possible if sysctl_test is built-in, not module:"
cat $TEST_DIR/config >&2
return $ksft_skip
fi
- echo -n "Testing if $TARGET is set to 1 ..."
ORIG=$(cat "${TARGET}")
if [ x$ORIG = "x1" ]; then
- echo "ok"
+ echo "OK"
return 0
fi
- echo "FAIL"
- echo "Checking if /proc/cmdline contains setting of the expected parameter ..."
+
if [ ! -f /proc/cmdline ]; then
- echo "/proc/cmdline does not exist, test inconclusive"
- return 0
+ echo -e "SKIPPING\nThere is no /proc/cmdline to check for paramter"
+ return $ksft_skip
fi
FOUND=$(grep -c "sysctl[./]debug[./]test_sysctl[./]boot_int=1" /proc/cmdline)
if [ $FOUND = "1" ]; then
- echo "Kernel param found but $TARGET is not 1, TEST FAILED"
+ echo -e "FAIL\nKernel param found but $TARGET is not 1." >&2
rc=1
test_rc
fi
- echo "Skipping test, expected kernel parameter missing."
- echo "To perform this test, make sure kernel is booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
+ echo -e "SKIPPING\nExpected kernel parameter missing."
+ echo "Kernel must be booted with parameter: sysctl.debug.test_sysctl.boot_int=1"
return $ksft_skip
}
+sysctl_test_0008()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0008)"
+ echo -n "Testing if $TARGET is matched in kernel ... "
+
+ if [ ! -f $TARGET ]; then
+ echo -e "SKIPPING\n$TARGET is not present"
+ return $ksft_skip
+ fi
+
+ ORIG_VALUE=$(cat "${TARGET}")
+
+ if [ $ORIG_VALUE -ne 1 ]; then
+ echo "FAIL" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
+sysctl_test_0009()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0009)"
+ echo -n "Testing if $TARGET unregistered correctly ... "
+ if [ -d $TARGET ]; then
+ echo "FAIL" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
+sysctl_test_0010()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0010)"
+ echo -n "Testing that $TARGET was not created ... "
+ if [ -d $TARGET ]; then
+ echo "FAIL" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
+sysctl_test_0011()
+{
+ TARGET="${SYSCTL}/$(get_test_target 0011)"
+ echo -n "Testing empty dir handling in ${TARGET} ... "
+ if [ ! -d ${TARGET} ]; then
+ echo -e "FAIL\nCould not create ${TARGET}" >&2
+ rc=1
+ test_rc
+ fi
+
+ TARGET2="${TARGET}/empty"
+ if [ ! -d ${TARGET2} ]; then
+ echo -e "FAIL\nCould not create ${TARGET2}" >&2
+ rc=1
+ test_rc
+ fi
+
+ echo "OK"
+ return 0
+}
+
list_tests()
{
echo "Test ID list:"
@@ -800,6 +866,10 @@ list_tests()
echo "0005 x $(get_test_count 0005) - tests proc_douintvec() array"
echo "0006 x $(get_test_count 0006) - tests proc_do_large_bitmap()"
echo "0007 x $(get_test_count 0007) - tests setting sysctl from kernel boot param"
+ echo "0008 x $(get_test_count 0008) - tests sysctl macro values match"
+ echo "0009 x $(get_test_count 0009) - tests sysct unregister"
+ echo "0010 x $(get_test_count 0010) - tests sysct mount point"
+ echo "0011 x $(get_test_count 0011) - tests empty directories"
}
usage()
@@ -844,38 +914,65 @@ function test_num()
usage
fi
}
+function remove_leading_zeros()
+{
+ echo $1 | sed 's/^0*//'
+}
function get_test_count()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $2}'
}
function get_test_enabled()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $3}'
}
function get_test_target()
{
test_num $1
- TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$1'}')
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
echo ${TEST_DATA} | awk -F":" '{print $4}'
}
+function get_test_skip_no_target()
+{
+ test_num $1
+ awk_field=$(remove_leading_zeros $1)
+ TEST_DATA=$(echo $ALL_TESTS | awk '{print $'$awk_field'}')
+ echo ${TEST_DATA} | awk -F":" '{print $5}'
+}
+
+function skip_test()
+{
+ TEST_ID=$1
+ TEST_TARGET=$2
+ if target_exists $TEST_TARGET $TEST_ID; then
+ TEST_SKIP=$(get_test_skip_no_target $TEST_ID)
+ if [[ $TEST_SKIP -eq "1" ]]; then
+ echo "Target $TEST_TARGET for test $TEST_ID does not exist ... SKIPPING"
+ return 0
+ fi
+ fi
+ return 1
+}
+
function run_all_tests()
{
for i in $ALL_TESTS ; do
- TEST_ID=${i%:*:*:*}
+ TEST_ID=${i%:*:*:*:*}
ENABLED=$(get_test_enabled $TEST_ID)
TEST_COUNT=$(get_test_count $TEST_ID)
TEST_TARGET=$(get_test_target $TEST_ID)
- if target_exists $TEST_TARGET $TEST_ID; then
- continue
- fi
+
if [[ $ENABLED -eq "1" ]]; then
test_case $TEST_ID $TEST_COUNT $TEST_TARGET
fi
@@ -910,18 +1007,19 @@ function watch_case()
function test_case()
{
+ TEST_ID=$1
NUM_TESTS=$2
+ TARGET=$3
- i=0
-
- if target_exists $3 $1; then
- continue
+ if skip_test $TEST_ID $TARGET; then
+ return
fi
+ i=0
while [ $i -lt $NUM_TESTS ]; do
- test_num $1
- watch_log $i ${TEST_NAME}_test_$1 noclear
- RUN_TEST=${TEST_NAME}_test_$1
+ test_num $TEST_ID
+ watch_log $i ${TEST_NAME}_test_${TEST_ID} noclear
+ RUN_TEST=${TEST_NAME}_test_${TEST_ID}
$RUN_TEST
let i=$i+1
done
diff --git a/tools/testing/selftests/tc-testing/.gitignore b/tools/testing/selftests/tc-testing/.gitignore
index d52f65de23b4..9fe1cef72728 100644
--- a/tools/testing/selftests/tc-testing/.gitignore
+++ b/tools/testing/selftests/tc-testing/.gitignore
@@ -1,7 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
__pycache__/
*.pyc
-plugins/
*.xml
*.tap
tdc_config_local.py
diff --git a/tools/testing/selftests/tc-testing/Makefile b/tools/testing/selftests/tc-testing/Makefile
new file mode 100644
index 000000000000..9153e3428a77
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/Makefile
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+TEST_PROGS += tdc.sh
+TEST_FILES := action-ebpf tdc*.py Tdc*.py plugins plugin-lib tc-tests scripts
+
+include ../lib.mk
diff --git a/tools/testing/selftests/tc-testing/README b/tools/testing/selftests/tc-testing/README
index b0954c873e2f..fc8e858ff119 100644
--- a/tools/testing/selftests/tc-testing/README
+++ b/tools/testing/selftests/tc-testing/README
@@ -9,8 +9,7 @@ execute them inside a network namespace dedicated to the task.
REQUIREMENTS
------------
-* Minimum Python version of 3.4. Earlier 3.X versions may work but are not
- guaranteed.
+* Minimum Python version of 3.8.
* The kernel must have network namespace support if using nsPlugin
@@ -96,6 +95,15 @@ the stdout with a regular expression.
Each of the commands in any stage will run in a shell instance.
+Each test is an atomic unit. A test that for whatever reason spans multiple test
+definitions is a bug.
+
+A test that runs inside a namespace (requires "nsPlugin") will run in parallel
+with other tests.
+
+Tests that use netdevsim or don't run inside a namespace run serially with regards
+to each other.
+
USER-DEFINED CONSTANTS
----------------------
@@ -116,59 +124,6 @@ COMMAND LINE ARGUMENTS
Run tdc.py -h to see the full list of available arguments.
-usage: tdc.py [-h] [-p PATH] [-D DIR [DIR ...]] [-f FILE [FILE ...]]
- [-c [CATG [CATG ...]]] [-e ID [ID ...]] [-l] [-s] [-i] [-v] [-N]
- [-d DEVICE] [-P] [-n] [-V]
-
-Linux TC unit tests
-
-optional arguments:
- -h, --help show this help message and exit
- -p PATH, --path PATH The full path to the tc executable to use
- -v, --verbose Show the commands that are being run
- -N, --notap Suppress tap results for command under test
- -d DEVICE, --device DEVICE
- Execute test cases that use a physical device, where
- DEVICE is its name. (If not defined, tests that require
- a physical device will be skipped)
- -P, --pause Pause execution just before post-suite stage
-
-selection:
- select which test cases: files plus directories; filtered by categories
- plus testids
-
- -D DIR [DIR ...], --directory DIR [DIR ...]
- Collect tests from the specified directory(ies)
- (default [tc-tests])
- -f FILE [FILE ...], --file FILE [FILE ...]
- Run tests from the specified file(s)
- -c [CATG [CATG ...]], --category [CATG [CATG ...]]
- Run tests only from the specified category/ies, or if
- no category/ies is/are specified, list known
- categories.
- -e ID [ID ...], --execute ID [ID ...]
- Execute the specified test cases with specified IDs
-
-action:
- select action to perform on selected test cases
-
- -l, --list List all test cases, or those only within the
- specified category
- -s, --show Display the selected test cases
- -i, --id Generate ID numbers for new test cases
-
-netns:
- options for nsPlugin (run commands in net namespace)
-
- -N, --no-namespace
- Do not run commands in a network namespace.
-
-valgrind:
- options for valgrindPlugin (run command under test under Valgrind)
-
- -V, --valgrind Run commands under valgrind
-
-
PLUGIN ARCHITECTURE
-------------------
@@ -240,8 +195,6 @@ directory:
and the other is a test whether the command leaked memory or not.
(This one is a preliminary version, it may not work quite right yet,
but the overall template is there and it should only need tweaks.)
- - buildebpfPlugin.py:
- builds all programs in $EBPFDIR.
ACKNOWLEDGEMENTS
diff --git a/tools/testing/selftests/tc-testing/TdcPlugin.py b/tools/testing/selftests/tc-testing/TdcPlugin.py
index 79f3ca8617c9..aae85ce4f776 100644
--- a/tools/testing/selftests/tc-testing/TdcPlugin.py
+++ b/tools/testing/selftests/tc-testing/TdcPlugin.py
@@ -5,10 +5,10 @@ class TdcPlugin:
super().__init__()
print(' -- {}.__init__'.format(self.sub_class))
- def pre_suite(self, testcount, testidlist):
+ def pre_suite(self, testcount, testlist):
'''run commands before test_runner goes into a test loop'''
self.testcount = testcount
- self.testidlist = testidlist
+ self.testlist = testlist
if self.args.verbose > 1:
print(' -- {}.pre_suite'.format(self.sub_class))
diff --git a/tools/testing/selftests/tc-testing/TdcResults.py b/tools/testing/selftests/tc-testing/TdcResults.py
index 1e4d95fdf8d0..e56817b97f08 100644
--- a/tools/testing/selftests/tc-testing/TdcResults.py
+++ b/tools/testing/selftests/tc-testing/TdcResults.py
@@ -59,7 +59,8 @@ class TestResult:
return self.steps
class TestSuiteReport():
- _testsuite = []
+ def __init__(self):
+ self._testsuite = []
def add_resultdata(self, result_data):
if isinstance(result_data, TestResult):
diff --git a/tools/testing/selftests/tc-testing/action-ebpf b/tools/testing/selftests/tc-testing/action-ebpf
new file mode 100644
index 000000000000..4879479b2ee5
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/action-ebpf
Binary files differ
diff --git a/tools/testing/selftests/tc-testing/bpf/action.c b/tools/testing/selftests/tc-testing/action.c
index c32b99b80e19..c32b99b80e19 100644
--- a/tools/testing/selftests/tc-testing/bpf/action.c
+++ b/tools/testing/selftests/tc-testing/action.c
diff --git a/tools/testing/selftests/tc-testing/bpf/Makefile b/tools/testing/selftests/tc-testing/bpf/Makefile
deleted file mode 100644
index be5a5e542804..000000000000
--- a/tools/testing/selftests/tc-testing/bpf/Makefile
+++ /dev/null
@@ -1,30 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-APIDIR := ../../../../include/uapi
-TEST_GEN_FILES = action.o
-
-top_srcdir = ../../../../..
-KSFT_KHDR_INSTALL := 1
-include ../../lib.mk
-
-CLANG ?= clang
-LLC ?= llc
-PROBE := $(shell $(LLC) -march=bpf -mcpu=probe -filetype=null /dev/null 2>&1)
-
-ifeq ($(PROBE),)
- CPU ?= probe
-else
- CPU ?= generic
-endif
-
-CLANG_SYS_INCLUDES := $(shell $(CLANG) -v -E - </dev/null 2>&1 \
- | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }')
-
-CLANG_FLAGS = -I. -I$(APIDIR) \
- $(CLANG_SYS_INCLUDES) \
- -Wno-compare-distinct-pointer-types
-
-$(OUTPUT)/%.o: %.c
- $(CLANG) $(CLANG_FLAGS) \
- -O2 -target bpf -emit-llvm -c $< -o - | \
- $(LLC) -march=bpf -mcpu=$(CPU) $(LLC_FLAGS) -filetype=obj -o $@
diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config
index c33a7aac27ff..db176fe7d0c3 100644
--- a/tools/testing/selftests/tc-testing/config
+++ b/tools/testing/selftests/tc-testing/config
@@ -1,19 +1,56 @@
#
+# Network
+#
+
+CONFIG_DUMMY=y
+CONFIG_VETH=y
+
+#
# Core Netfilter Configuration
#
+CONFIG_NETFILTER=y
+CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_NF_CONNTRACK_MARK=y
CONFIG_NF_CONNTRACK_ZONES=y
CONFIG_NF_CONNTRACK_LABELS=y
+CONFIG_NF_CONNTRACK_PROCFS=y
+CONFIG_NF_FLOW_TABLE=m
+CONFIG_NF_TABLES=m
CONFIG_NF_NAT=m
+CONFIG_NETFILTER_XT_TARGET_LOG=m
CONFIG_NET_SCHED=y
#
# Queueing/Scheduling
#
-CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_CAKE=m
+CONFIG_NET_SCH_CBS=m
+CONFIG_NET_SCH_CHOKE=m
+CONFIG_NET_SCH_CODEL=m
+CONFIG_NET_SCH_DRR=m
+CONFIG_NET_SCH_ETF=m
+CONFIG_NET_SCH_FQ=m
+CONFIG_NET_SCH_FQ_CODEL=m
+CONFIG_NET_SCH_GRED=m
+CONFIG_NET_SCH_HFSC=m
+CONFIG_NET_SCH_HHF=m
+CONFIG_NET_SCH_HTB=m
CONFIG_NET_SCH_INGRESS=m
+CONFIG_NET_SCH_MQPRIO=m
+CONFIG_NET_SCH_MULTIQ=m
+CONFIG_NET_SCH_NETEM=m
+CONFIG_NET_SCH_PIE=m
+CONFIG_NET_SCH_PLUG=m
+CONFIG_NET_SCH_PRIO=m
+CONFIG_NET_SCH_QFQ=m
+CONFIG_NET_SCH_SFB=m
+CONFIG_NET_SCH_SFQ=m
+CONFIG_NET_SCH_SKBPRIO=m
+CONFIG_NET_SCH_TAPRIO=m
+CONFIG_NET_SCH_TBF=m
+CONFIG_NET_SCH_TEQL=m
#
# Classification
@@ -23,6 +60,13 @@ CONFIG_NET_CLS_FW=m
CONFIG_NET_CLS_U32=m
CONFIG_CLS_U32_PERF=y
CONFIG_CLS_U32_MARK=y
+CONFIG_NET_CLS_BASIC=m
+CONFIG_NET_CLS_BPF=m
+CONFIG_NET_CLS_CGROUP=m
+CONFIG_NET_CLS_FLOW=m
+CONFIG_NET_CLS_FLOWER=m
+CONFIG_NET_CLS_MATCHALL=m
+CONFIG_NET_CLS_ROUTE4=m
CONFIG_NET_EMATCH=y
CONFIG_NET_EMATCH_STACK=32
CONFIG_NET_EMATCH_CMP=m
@@ -39,7 +83,6 @@ CONFIG_NET_ACT_GACT=m
CONFIG_GACT_PROB=y
CONFIG_NET_ACT_MIRRED=m
CONFIG_NET_ACT_SAMPLE=m
-CONFIG_NET_ACT_IPT=m
CONFIG_NET_ACT_NAT=m
CONFIG_NET_ACT_PEDIT=m
CONFIG_NET_ACT_SIMP=m
@@ -54,13 +97,19 @@ CONFIG_NET_ACT_IFE=m
CONFIG_NET_ACT_TUNNEL_KEY=m
CONFIG_NET_ACT_CT=m
CONFIG_NET_ACT_MPLS=m
+CONFIG_NET_ACT_GATE=m
CONFIG_NET_IFE_SKBMARK=m
CONFIG_NET_IFE_SKBPRIO=m
CONFIG_NET_IFE_SKBTCINDEX=m
CONFIG_NET_SCH_FIFO=y
CONFIG_NET_SCH_ETS=m
+CONFIG_NET_SCH_RED=m
+CONFIG_NET_SCH_FQ_PIE=m
#
## Network testing
#
CONFIG_CAN=m
+CONFIG_ATM=y
+CONFIG_NETDEVSIM=m
+CONFIG_PTP_1588_CLOCK_MOCK=m
diff --git a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
index a28571aff0e1..ff956d8c99c5 100644
--- a/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
+++ b/tools/testing/selftests/tc-testing/creating-testcases/AddingTestCases.txt
@@ -38,6 +38,8 @@ skip: A completely optional key, if the corresponding value is "yes"
this test case will still appear in the results output but
marked as skipped. This key can be placed anywhere inside the
test case at the top level.
+dependsOn: Same as 'skip', but the value is executed as a command. The test
+ is skipped when the command returns non-zero.
category: A list of single-word descriptions covering what the command
under test is testing. Example: filter, actions, u32, gact, etc.
setup: The list of commands required to ensure the command under test
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
deleted file mode 100644
index d34fe06268d2..000000000000
--- a/tools/testing/selftests/tc-testing/plugin-lib/buildebpfPlugin.py
+++ /dev/null
@@ -1,67 +0,0 @@
-'''
-build ebpf program
-'''
-
-import os
-import signal
-from string import Template
-import subprocess
-import time
-from TdcPlugin import TdcPlugin
-from tdc_config import *
-
-class SubPlugin(TdcPlugin):
- def __init__(self):
- self.sub_class = 'buildebpf/SubPlugin'
- self.tap = ''
- super().__init__()
-
- def pre_suite(self, testcount, testidlist):
- super().pre_suite(testcount, testidlist)
-
- if self.args.buildebpf:
- self._ebpf_makeall()
-
- def post_suite(self, index):
- super().post_suite(index)
-
- self._ebpf_makeclean()
-
- def add_args(self, parser):
- super().add_args(parser)
-
- self.argparser_group = self.argparser.add_argument_group(
- 'buildebpf',
- 'options for buildebpfPlugin')
- self.argparser_group.add_argument(
- '--nobuildebpf', action='store_false', default=True,
- dest='buildebpf',
- help='Don\'t build eBPF programs')
-
- return self.argparser
-
- def _ebpf_makeall(self):
- if self.args.buildebpf:
- self._make('all')
-
- def _ebpf_makeclean(self):
- if self.args.buildebpf:
- self._make('clean')
-
- def _make(self, target):
- command = 'make -C {} {}'.format(self.args.NAMES['EBPFDIR'], target)
- proc = subprocess.Popen(command,
- shell=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=os.environ.copy())
- (rawout, serr) = proc.communicate()
-
- if proc.returncode != 0 and len(serr) > 0:
- foutput = serr.decode("utf-8")
- else:
- foutput = rawout.decode("utf-8")
-
- proc.stdout.close()
- proc.stderr.close()
- return proc, foutput
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
index 9539cffa9e5e..bb19b8b76d3b 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/nsPlugin.py
@@ -3,53 +3,91 @@ import signal
from string import Template
import subprocess
import time
+from multiprocessing import Pool
+from functools import cached_property
from TdcPlugin import TdcPlugin
from tdc_config import *
+try:
+ from pyroute2 import netns
+ from pyroute2 import IPRoute
+ netlink = True
+except ImportError:
+ netlink = False
+ print("!!! Consider installing pyroute2 !!!")
+
class SubPlugin(TdcPlugin):
def __init__(self):
self.sub_class = 'ns/SubPlugin'
super().__init__()
- def pre_suite(self, testcount, testidlist):
- '''run commands before test_runner goes into a test loop'''
- super().pre_suite(testcount, testidlist)
+ def pre_suite(self, testcount, testlist):
+ super().pre_suite(testcount, testlist)
+
+ def prepare_test(self, test):
+ if 'skip' in test and test['skip'] == 'yes':
+ return
+
+ if 'nsPlugin' not in test['plugins']:
+ return
- if self.args.namespace:
- self._ns_create()
+ if netlink == True:
+ self._nl_ns_create()
else:
- self._ports_create()
+ self._ipr2_ns_create()
+
+ # Make sure the netns is visible in the fs
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ self._proc_check()
+ try:
+ ns = self.args.NAMES['NS']
+ f = open('/run/netns/{}'.format(ns))
+ f.close()
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+
+ def pre_case(self, test, test_skip):
+ if self.args.verbose:
+ print('{}.pre_case'.format(self.sub_class))
+
+ if test_skip:
+ return
+
+ self.prepare_test(test)
+
+ def post_case(self):
+ if self.args.verbose:
+ print('{}.post_case'.format(self.sub_class))
+
+ if netlink == True:
+ self._nl_ns_destroy()
+ else:
+ self._ipr2_ns_destroy()
def post_suite(self, index):
- '''run commands after test_runner goes into a test loop'''
- super().post_suite(index)
if self.args.verbose:
print('{}.post_suite'.format(self.sub_class))
- if self.args.namespace:
- self._ns_destroy()
- else:
- self._ports_destroy()
-
- def add_args(self, parser):
- super().add_args(parser)
- self.argparser_group = self.argparser.add_argument_group(
- 'netns',
- 'options for nsPlugin(run commands in net namespace)')
- self.argparser_group.add_argument(
- '-N', '--no-namespace', action='store_false', default=True,
- dest='namespace', help='Don\'t run commands in namespace')
- return self.argparser
+ # Make sure we don't leak resources
+ cmd = self._replace_keywords("$IP -a netns del")
+
+ if self.args.verbose > 3:
+ print('_exec_cmd: command "{}"'.format(cmd))
+
+ subprocess.run(cmd, shell=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
def adjust_command(self, stage, command):
super().adjust_command(stage, command)
cmdform = 'list'
cmdlist = list()
- if not self.args.namespace:
- return command
-
if self.args.verbose:
print('{}.adjust_command'.format(self.sub_class))
@@ -77,73 +115,131 @@ class SubPlugin(TdcPlugin):
print('adjust_command: return command [{}]'.format(command))
return command
- def _ports_create(self):
- cmd = '$IP link add $DEV0 type veth peer name $DEV1'
- self._exec_cmd('pre', cmd)
- cmd = '$IP link set $DEV0 up'
- self._exec_cmd('pre', cmd)
- if not self.args.namespace:
- cmd = '$IP link set $DEV1 up'
- self._exec_cmd('pre', cmd)
-
- def _ports_destroy(self):
- cmd = '$IP link del $DEV0'
- self._exec_cmd('post', cmd)
+ def _nl_ns_create(self):
+ ns = self.args.NAMES["NS"];
+ dev0 = self.args.NAMES["DEV0"];
+ dev1 = self.args.NAMES["DEV1"];
+ dummy = self.args.NAMES["DUMMY"];
- def _ns_create(self):
+ if self.args.verbose:
+ print('{}._nl_ns_create'.format(self.sub_class))
+
+ netns.create(ns)
+ netns.pushns(newns=ns)
+ with IPRoute() as ip:
+ ip.link('add', ifname=dev1, kind='veth', peer={'ifname': dev0, 'net_ns_fd':'/proc/1/ns/net'})
+ ip.link('add', ifname=dummy, kind='dummy')
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev1_idx = ip.link_lookup(ifname=dev1)[0]
+ dummy_idx = ip.link_lookup(ifname=dummy)[0]
+ ip.link('set', index=dev1_idx, state='up')
+ ip.link('set', index=dummy_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+ netns.popns()
+
+ with IPRoute() as ip:
+ ticks = 20
+ while True:
+ if ticks == 0:
+ raise TimeoutError
+ try:
+ dev0_idx = ip.link_lookup(ifname=dev0)[0]
+ ip.link('set', index=dev0_idx, state='up')
+ break
+ except:
+ time.sleep(0.1)
+ ticks -= 1
+ continue
+
+ def _ipr2_ns_create_cmds(self):
+ cmds = []
+
+ ns = self.args.NAMES['NS']
+
+ cmds.append(self._replace_keywords('netns add {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DEV1 type veth peer name $DEV0'))
+ cmds.append(self._replace_keywords('link set $DEV1 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('link add $DUMMY type dummy'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DUMMY netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV1 up'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DUMMY up'.format(ns)))
+ cmds.append(self._replace_keywords('link set $DEV0 up'.format(ns)))
+
+ if self.args.device:
+ cmds.append(self._replace_keywords('link set $DEV2 netns {}'.format(ns)))
+ cmds.append(self._replace_keywords('netns exec {} $IP link set $DEV2 up'.format(ns)))
+
+ return cmds
+
+ def _ipr2_ns_create(self):
'''
Create the network namespace in which the tests will be run and set up
the required network devices for it.
'''
- self._ports_create()
- if self.args.namespace:
- cmd = '$IP netns add {}'.format(self.args.NAMES['NS'])
- self._exec_cmd('pre', cmd)
- cmd = '$IP link set $DEV1 netns {}'.format(self.args.NAMES['NS'])
- self._exec_cmd('pre', cmd)
- cmd = '$IP -n {} link set $DEV1 up'.format(self.args.NAMES['NS'])
- self._exec_cmd('pre', cmd)
- if self.args.device:
- cmd = '$IP link set $DEV2 netns {}'.format(self.args.NAMES['NS'])
- self._exec_cmd('pre', cmd)
- cmd = '$IP -n {} link set $DEV2 up'.format(self.args.NAMES['NS'])
- self._exec_cmd('pre', cmd)
-
- def _ns_destroy(self):
+ self._exec_cmd_batched('pre', self._ipr2_ns_create_cmds())
+
+ def _nl_ns_destroy(self):
+ ns = self.args.NAMES['NS']
+ netns.remove(ns)
+
+ def _ipr2_ns_destroy_cmd(self):
+ return self._replace_keywords('netns delete {}'.format(self.args.NAMES['NS']))
+
+ def _ipr2_ns_destroy(self):
'''
Destroy the network namespace for testing (and any associated network
devices as well)
'''
- if self.args.namespace:
- cmd = '$IP netns delete {}'.format(self.args.NAMES['NS'])
- self._exec_cmd('post', cmd)
+ self._exec_cmd('post', self._ipr2_ns_destroy_cmd())
+
+ @cached_property
+ def _proc(self):
+ ip = self._replace_keywords("$IP -b -")
+ proc = subprocess.Popen(ip,
+ shell=True,
+ stdin=subprocess.PIPE,
+ env=ENVIR)
+
+ return proc
+
+ def _proc_check(self):
+ proc = self._proc
+
+ proc.poll()
+
+ if proc.returncode is not None and proc.returncode != 0:
+ raise RuntimeError("iproute2 exited with an error code")
def _exec_cmd(self, stage, command):
'''
Perform any required modifications on an executable command, then run
it in a subprocess and return the results.
'''
- if '$' in command:
- command = self._replace_keywords(command)
- self.adjust_command(stage, command)
- if self.args.verbose:
+ if self.args.verbose > 3:
print('_exec_cmd: command "{}"'.format(command))
- proc = subprocess.Popen(command,
- shell=True,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- env=ENVIR)
- (rawout, serr) = proc.communicate()
- if proc.returncode != 0 and len(serr) > 0:
- foutput = serr.decode("utf-8")
- else:
- foutput = rawout.decode("utf-8")
+ proc = self._proc
+
+ proc.stdin.write((command + '\n').encode())
+ proc.stdin.flush()
+
+ if self.args.verbose > 3:
+ print('_exec_cmd proc: {}'.format(proc))
+
+ self._proc_check()
- proc.stdout.close()
- proc.stderr.close()
- return proc, foutput
+ def _exec_cmd_batched(self, stage, commands):
+ for cmd in commands:
+ self._exec_cmd(stage, cmd)
def _replace_keywords(self, cmd):
"""
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
index e36775bd4d12..8762c0f4a095 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/rootPlugin.py
@@ -10,9 +10,9 @@ class SubPlugin(TdcPlugin):
self.sub_class = 'root/SubPlugin'
super().__init__()
- def pre_suite(self, testcount, testidlist):
+ def pre_suite(self, testcount, testlist):
# run commands before test_runner goes into a test loop
- super().pre_suite(testcount, testidlist)
+ super().pre_suite(testcount, testlist)
if os.geteuid():
print('This script must be run with root privileges', file=sys.stderr)
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
index 229ee185b27e..254136e3da5a 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/scapyPlugin.py
@@ -29,22 +29,26 @@ class SubPlugin(TdcPlugin):
return
# Check for required fields
- scapyinfo = self.args.caseinfo['scapy']
- scapy_keys = ['iface', 'count', 'packet']
- missing_keys = []
- keyfail = False
- for k in scapy_keys:
- if k not in scapyinfo:
- keyfail = True
- missing_keys.add(k)
- if keyfail:
- print('{}: Scapy block present in the test, but is missing info:'
- .format(self.sub_class))
- print('{}'.format(missing_keys))
-
- pkt = eval(scapyinfo['packet'])
- if '$' in scapyinfo['iface']:
- tpl = Template(scapyinfo['iface'])
- scapyinfo['iface'] = tpl.safe_substitute(NAMES)
- for count in range(scapyinfo['count']):
- sendp(pkt, iface=scapyinfo['iface'])
+ lscapyinfo = self.args.caseinfo['scapy']
+ if type(lscapyinfo) != list:
+ lscapyinfo = [ lscapyinfo, ]
+
+ for scapyinfo in lscapyinfo:
+ scapy_keys = ['iface', 'count', 'packet']
+ missing_keys = []
+ keyfail = False
+ for k in scapy_keys:
+ if k not in scapyinfo:
+ keyfail = True
+ missing_keys.append(k)
+ if keyfail:
+ print('{}: Scapy block present in the test, but is missing info:'
+ .format(self.sub_class))
+ print('{}'.format(missing_keys))
+
+ pkt = eval(scapyinfo['packet'])
+ if '$' in scapyinfo['iface']:
+ tpl = Template(scapyinfo['iface'])
+ scapyinfo['iface'] = tpl.safe_substitute(NAMES)
+ for count in range(scapyinfo['count']):
+ sendp(pkt, iface=scapyinfo['iface'])
diff --git a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
index 4bb866575ea1..c6f61649c430 100644
--- a/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
+++ b/tools/testing/selftests/tc-testing/plugin-lib/valgrindPlugin.py
@@ -25,9 +25,10 @@ class SubPlugin(TdcPlugin):
self._tsr = TestSuiteReport()
super().__init__()
- def pre_suite(self, testcount, testidlist):
+ def pre_suite(self, testcount, testist):
'''run commands before test_runner goes into a test loop'''
- super().pre_suite(testcount, testidlist)
+ self.testidlist = [tidx['id'] for tidx in testlist]
+ super().pre_suite(testcount, testlist)
if self.args.verbose > 1:
print('{}.pre_suite'.format(self.sub_class))
if self.args.valgrind:
diff --git a/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
new file mode 100755
index 000000000000..f5335e8ad6b4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/scripts/taprio_wait_for_admin.sh
@@ -0,0 +1,16 @@
+#!/bin/bash
+
+TC="$1"; shift
+ETH="$1"; shift
+
+# The taprio architecture changes the admin schedule from a hrtimer and not
+# from process context, so we need to wait in order to make sure that any
+# schedule change actually took place.
+while :; do
+ has_admin="$($TC -j qdisc show dev $ETH root | jq '.[].options | has("admin")')"
+ if [ "$has_admin" = "false" ]; then
+ break;
+ fi
+
+ sleep 1
+done
diff --git a/tools/testing/selftests/tc-testing/settings b/tools/testing/selftests/tc-testing/settings
new file mode 100644
index 000000000000..e2206265f67c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/settings
@@ -0,0 +1 @@
+timeout=900
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
index 503982b8f295..6e00bf32ef9a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/bpf.json
@@ -54,9 +54,6 @@
"actions",
"bpf"
],
- "plugins": {
- "requires": "buildebpfPlugin"
- },
"setup": [
[
"$TC action flush action bpf",
@@ -65,10 +62,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ok index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ok index 667",
"expExitCode": "0",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ok\\] id [0-9]* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ok\\] id [0-9].* tag [0-9a-f]{16}( jited)? default-action pipe.*index 667 ref",
"matchCount": "1",
"teardown": [
"$TC action flush action bpf"
@@ -81,9 +78,6 @@
"actions",
"bpf"
],
- "plugins": {
- "requires": "buildebpfPlugin"
- },
"setup": [
[
"$TC action flush action bpf",
@@ -92,10 +86,10 @@
255
]
],
- "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action.o section action-ko index 667",
+ "cmdUnderTest": "$TC action add action bpf object-file $EBPFDIR/action-ebpf section action-ko index 667",
"expExitCode": "255",
"verifyCmd": "$TC action get action bpf index 667",
- "matchPattern": "action order [0-9]*: bpf action.o:\\[action-ko\\] id [0-9].*index 667 ref",
+ "matchPattern": "action order [0-9]*: bpf action-ebpf:\\[action-ko\\] id [0-9].*index 667 ref",
"matchCount": "0",
"teardown": [
[
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
index cadde8f41fcd..3d0f9310bde4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/connmark.json
@@ -6,6 +6,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -30,6 +33,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -54,6 +60,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -78,6 +87,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -102,6 +114,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -126,6 +141,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -150,6 +168,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -174,6 +195,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -198,6 +222,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -222,6 +249,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -246,6 +276,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -271,6 +304,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -295,6 +331,9 @@
"actions",
"connmark"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action connmark",
@@ -312,5 +351,61 @@
"teardown": [
"$TC actions flush action connmark"
]
+ },
+ {
+ "id": "6571",
+ "name": "Delete connmark action with valid index",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action connmark pass index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action connmark index 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action connmark index 20",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
+ },
+ {
+ "id": "3426",
+ "name": "Delete connmark action with invalid index",
+ "category": [
+ "actions",
+ "connmark"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action connmark",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action connmark pass index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action connmark index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action connmark index 20",
+ "matchPattern": "action order [0-9]+: connmark zone 0 pass.*index 20 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action connmark"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
index 072febf25f55..56e11136d0f6 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/csum.json
@@ -6,6 +6,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -30,6 +33,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -54,6 +60,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -78,6 +87,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -102,6 +114,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -126,6 +141,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -150,6 +168,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -174,6 +195,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -198,6 +222,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -222,6 +249,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -246,6 +276,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -270,6 +303,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -294,6 +330,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -318,6 +357,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -342,6 +384,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -366,6 +411,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -390,6 +438,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -414,6 +465,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -438,6 +492,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -461,6 +518,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -485,6 +545,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -508,6 +571,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
@@ -533,6 +599,9 @@
"actions",
"csum"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action csum",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
index 4202e95e27b9..7d07c55bb668 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ct.json
@@ -6,6 +6,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -30,6 +33,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -54,6 +60,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -78,6 +87,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -102,6 +114,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -126,6 +141,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -150,6 +168,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -174,6 +195,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -198,6 +222,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -222,6 +249,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -246,6 +276,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -270,6 +303,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -294,6 +330,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -318,6 +357,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -342,6 +384,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -366,6 +411,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -390,6 +438,9 @@
"actions",
"ct"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ct",
@@ -406,5 +457,53 @@
"teardown": [
"$TC actions flush action ct"
]
+ },
+ {
+ "id": "3992",
+ "name": "Add ct action triggering DNAT tuple conflict",
+ "category": [
+ "actions",
+ "ct",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ [
+ "$TC qdisc del dev $DEV1 ingress",
+ 0,
+ 1,
+ 2,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 ingress protocol ip prio 1 flower ct_state -trk action ct commit nat dst addr 20.0.0.1 port 10 pipe action drop",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.10')/TCP(sport=5000,dport=10)"
+ },
+ {
+ "iface": "$DEV0",
+ "count": 1,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='10.0.0.20')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "cat /proc/net/nf_conntrack",
+ "matchPattern": "dst=10.0.0.20",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json
new file mode 100644
index 000000000000..bb54d71241a0
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ctinfo.json
@@ -0,0 +1,352 @@
+[
+ {
+ "id": "c826",
+ "name": "Add ctinfo action with default setting",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action ctinfo index 10",
+ "matchPattern": "action order [0-9]*: ctinfo zone 0 pipe.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "0286",
+ "name": "Add ctinfo action with dscp",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo dscp 0xfc000000 0x01000000 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action ls action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo zone 0 pipe.*index 100 ref.*dscp 0xfc000000 0x01000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ctinfo"
+ ]
+ },
+ {
+ "id": "4938",
+ "name": "Add ctinfo action with valid cpmark and zone",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo cpmark 0x01000000 zone 1 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action ctinfo index 1",
+ "matchPattern": "action order [0-9]*: ctinfo zone 1 pipe.*index 1 ref.*cpmark 0x01000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "7593",
+ "name": "Add ctinfo action with drop control",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo drop index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action ls action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo zone 0 drop.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "2961",
+ "name": "Replace ctinfo action zone and action control",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action ctinfo zone 1 drop index 1",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action replace action ctinfo zone 200 pass index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action ctinfo index 1",
+ "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "e567",
+ "name": "Delete ctinfo action with valid index",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action ctinfo zone 200 pass index 1",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action ctinfo index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action ctinfo index 1",
+ "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "6a91",
+ "name": "Delete ctinfo action with invalid index",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action ctinfo zone 200 pass index 1",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action ctinfo index 333",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action ctinfo index 1",
+ "matchPattern": "action order [0-9]*: ctinfo zone 200 pass.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "5232",
+ "name": "List ctinfo actions",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action ctinfo zone 20 pass index 101",
+ "$TC action add action ctinfo cpmark 0x02000000 drop index 102",
+ "$TC action add action ctinfo continue index 103"
+ ],
+ "cmdUnderTest": "$TC action list action ctinfo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action ctinfo"
+ ]
+ },
+ {
+ "id": "7702",
+ "name": "Flush ctinfo actions",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action ctinfo zone 20 pass index 101",
+ "$TC action add action ctinfo cpmark 0x02000000 drop index 102",
+ "$TC action add action ctinfo continue index 103"
+ ],
+ "cmdUnderTest": "$TC action flush action ctinfo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action ctinfo"
+ ]
+ },
+ {
+ "id": "3201",
+ "name": "Add ctinfo action with duplicate index",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action ctinfo zone 20 pass index 101"
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo cpmark 0x02000000 drop index 101",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action ctinfo index 101",
+ "matchPattern": "action order [0-9]*: ctinfo zone 20 pass.*index 101",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "8295",
+ "name": "Add ctinfo action with invalid index",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action ctinfo zone 20 index 4294967296",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action ls action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ },
+ {
+ "id": "3964",
+ "name": "Replace ctinfo action with invalid goto_chain control",
+ "category": [
+ "actions",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ctinfo",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action ctinfo pass index 90"
+ ],
+ "cmdUnderTest": "$TC action replace action ctinfo goto chain 42 index 90",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action list action ctinfo",
+ "matchPattern": "action order [0-9]*: ctinfo.*pass.*index 90",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action ctinfo"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
index b24494c6f546..0fcd52742939 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gact.json
@@ -6,6 +6,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -30,6 +33,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -54,6 +60,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -78,6 +87,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -102,6 +114,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -126,6 +141,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -150,6 +168,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -175,6 +196,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -199,6 +223,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -223,6 +250,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -252,6 +282,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC actions add action reclassify index 101",
"$TC actions add action reclassify index 102",
@@ -273,6 +306,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -298,6 +334,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -323,6 +362,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -348,6 +390,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -373,6 +418,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -398,6 +446,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -422,6 +473,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -448,6 +502,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -473,6 +530,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -497,6 +557,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -521,6 +584,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -544,6 +610,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -568,6 +637,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -593,6 +665,9 @@
"actions",
"gact"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action gact",
@@ -609,5 +684,82 @@
"teardown": [
"$TC actions flush action gact"
]
+ },
+ {
+ "id": "7f52",
+ "name": "Try to flush action which is referenced by filter",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC actions add action pass index 1",
+ "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 1"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "1",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "total acts 1.*action order [0-9]*: gact action pass.*index 1 ref 2 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress",
+ [
+ "sleep 1; $TC actions flush action gact",
+ 0,
+ 1
+ ]
+ ]
+ },
+ {
+ "id": "ae1e",
+ "name": "Try to flush actions when last one is referenced by filter",
+ "category": [
+ "actions",
+ "gact"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gact",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC qdisc add dev $DEV1 ingress",
+ [
+ "$TC actions add action pass index 1",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action reclassify index 2",
+ "$TC actions add action drop index 3",
+ "$TC filter add dev $DEV1 protocol all ingress prio 1 handle 0x1234 matchall action gact index 3"
+ ],
+ "cmdUnderTest": "$TC actions flush action gact",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action gact",
+ "matchPattern": "total acts 1.*action order [0-9]*: gact action drop.*index 3 ref 2 bind 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress",
+ [
+ "sleep 1; $TC actions flush action gact",
+ 0,
+ 1
+ ]
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
new file mode 100644
index 000000000000..db645c22ad7b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/gate.json
@@ -0,0 +1,351 @@
+[
+ {
+ "id": "5153",
+ "name": "Add gate action with priority and sched-entry",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate priority 1 sched-entry close 100000000ns index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action gate index 100",
+ "matchPattern": "action order [0-9]*: .*priority 1.*index 100 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "7189",
+ "name": "Add gate action with base-time",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate base-time 200000000000ns sched-entry close 100000000ns index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action ls action gate",
+ "matchPattern": "action order [0-9]*: .*base-time 200s.*index 10 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action gate"
+ ]
+ },
+ {
+ "id": "a721",
+ "name": "Add gate action with cycle-time",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate cycle-time 200000000000ns sched-entry close 100000000ns index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action ls action gate",
+ "matchPattern": "action order [0-9]*: .*cycle-time 200s.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "c029",
+ "name": "Add gate action with cycle-time-ext",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate cycle-time-ext 20000000000ns sched-entry close 100000000ns index 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action gate index 1000",
+ "matchPattern": "action order [0-9]*: .*cycle-time-ext 20s.*index 1000 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "3719",
+ "name": "Replace gate base-time action",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action gate base-time 200000000000ns sched-entry open 200000000ns -1 8000000b index 20",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action replace action gate base-time 400000000000ns index 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action gate index 20",
+ "matchPattern": "action order [0-9]*: .*base-time 400s.*index 20 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "d821",
+ "name": "Delete gate action with valid index",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action gate base-time 200000000000ns sched-entry open 200000000ns -1 8000000b index 302",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action gate index 302",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action get action bpf index 302",
+ "matchPattern": "action order [0-9]*: .*base-time 200s.*index 302 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "3128",
+ "name": "Delete gate action with invalid index",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 999",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action delete action gate index 333",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action gate index 999",
+ "matchPattern": "action order [0-9]*: .*base-time 600s.*index 999 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "7837",
+ "name": "List gate actions",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC action flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 101",
+ "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 102",
+ "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 103"
+ ],
+ "cmdUnderTest": "$TC action list action gate",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action gate",
+ "matchPattern": "action order [0-9]*:",
+ "matchCount": "3",
+ "teardown": [
+ "$TC actions flush action gate"
+ ]
+ },
+ {
+ "id": "9273",
+ "name": "Flush gate actions",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action gate base-time 600000000000ns sched-entry open 200000000ns -1 8000000b index 101",
+ "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 102",
+ "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 103"
+ ],
+ "cmdUnderTest": "$TC action flush action gate",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action gate",
+ "matchPattern": "action order [0-9]*: .*priority",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action gate"
+ ]
+ },
+ {
+ "id": "c829",
+ "name": "Add gate action with duplicate index",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 4294967295"
+ ],
+ "cmdUnderTest": "$TC action add action gate cycle-time 600000000000ns sched-entry open 600000000ns -1 8000000b index 4294967295",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action get action gate index 4294967295",
+ "matchPattern": "action order [0-9]*: .*index 4294967295",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "3043",
+ "name": "Add gate action with invalid index",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 4294967296",
+ "expExitCode": "255",
+ "verifyCmd": "$TC action ls action gate",
+ "matchPattern": "action order [0-9]*:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ },
+ {
+ "id": "2930",
+ "name": "Add gate action with cookie",
+ "category": [
+ "actions",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action gate",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC action add action gate cycle-time-ext 400000000000ns sched-entry close 100000000ns index 4294 cookie d0d0d0d0d0d0d0d0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC action list action gate",
+ "matchPattern": "action order [0-9]*: .*cookie d0d0d0d0d0d0d0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC action flush action gate"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
index c13a68b98fc7..f587a32e44c4 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/ife.json
@@ -6,6 +6,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -30,6 +33,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -54,6 +60,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -78,6 +87,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -102,6 +114,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -126,6 +141,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -150,6 +168,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -174,6 +195,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -196,6 +220,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -220,6 +247,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -244,6 +274,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -268,6 +301,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -292,6 +328,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -316,6 +355,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -340,6 +382,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -364,6 +409,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -386,6 +434,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -410,6 +461,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -434,6 +488,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -458,6 +515,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -482,6 +542,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -506,6 +569,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -530,6 +596,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -554,6 +623,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -578,6 +650,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -600,6 +675,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -624,6 +702,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -648,6 +729,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -672,6 +756,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -696,6 +783,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -720,6 +810,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -744,6 +837,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -768,6 +864,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -792,6 +891,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -816,6 +918,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -840,6 +945,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -864,6 +972,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -888,6 +999,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -912,6 +1026,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -934,6 +1051,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -956,6 +1076,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -980,6 +1103,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -1002,6 +1128,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -1024,6 +1153,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -1046,6 +1178,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -1068,6 +1203,9 @@
"actions",
"ife"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action ife",
@@ -1085,5 +1223,61 @@
"teardown": [
"$TC actions flush action ife"
]
+ },
+ {
+ "id": "a972",
+ "name": "Delete ife encode action with valid index",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife encode allow mark pass index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action ife index 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action ife index 20",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 20",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
+ },
+ {
+ "id": "1272",
+ "name": "Delete ife encode action with invalid index",
+ "category": [
+ "actions",
+ "ife"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action ife",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action ife encode allow mark pass index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action ife index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action ife index 20",
+ "matchPattern": "action order [0-9]*: ife encode action pass.*type 0[xX]ED3E.*allow mark.*index 20",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action ife"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
index 12a2fe0e1472..b73bd255ea36 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json
@@ -6,6 +6,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -30,6 +33,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -55,6 +61,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -81,6 +90,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -105,6 +117,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -129,6 +144,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -153,6 +171,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -178,6 +199,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -202,6 +226,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -226,6 +253,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -250,6 +280,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -274,6 +307,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -298,6 +334,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -322,6 +361,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -346,6 +388,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -370,6 +415,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -392,6 +440,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -417,6 +468,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -442,6 +496,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -467,6 +524,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -491,6 +551,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -514,6 +577,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -538,6 +604,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -561,6 +630,9 @@
"actions",
"mirred"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mirred",
@@ -577,5 +649,408 @@
"teardown": [
"$TC actions flush action mirred"
]
+ },
+ {
+ "id": "456d",
+ "name": "Add mirred mirror to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 egress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 egress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2358",
+ "name": "Add mirred mirror to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress mirror index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "fdb1",
+ "name": "Add mirred redirect to egress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "20cc",
+ "name": "Add mirred redirect to ingress block action",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "ingress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "e739",
+ "name": "Try to add mirred action with both dev and block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21 dev $DEV1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "2f47",
+ "name": "Try to add mirred action without specifying neither dev nor block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC -j actions list action mirred",
+ "matchJSON": [],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "3188",
+ "name": "Replace mirred redirect to dev action with redirect to block",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred ingress redirect index 1 dev $DEV1",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred egress redirect index 1 blockid 21",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "redirect",
+ "direction": "egress",
+ "to_blockid": 21,
+ "control_action": {
+ "type": "stolen"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
+ },
+ {
+ "id": "83cc",
+ "name": "Replace mirred redirect to block action with mirror to dev",
+ "category": [
+ "actions",
+ "mirred"
+ ],
+ "dependsOn": "$TC actions add action mirred help 2>&1 | grep -q blockid",
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action mirred",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC qdisc add dev $DEV1 ingress_block 21 clsact",
+ 0
+ ],
+ [
+ "$TC actions add action mirred egress redirect index 1 blockid 21",
+ 0
+ ]
+ ],
+ "cmdUnderTest": "$TC actions replace action mirred ingress mirror index 1 dev lo",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j actions get action mirred index 1",
+ "matchJSON": [
+ {
+ "total acts": 0
+ },
+ {
+ "actions": [
+ {
+ "order": 1,
+ "kind": "mirred",
+ "mirred_action": "mirror",
+ "direction": "ingress",
+ "to_dev": "lo",
+ "control_action": {
+ "type": "pipe"
+ },
+ "index": 1,
+ "ref": 1,
+ "bind": 0,
+ "not_in_hw": true
+ }
+ ]
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress_block 21 clsact",
+ "$TC actions flush action mirred"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
index 866f0efd0859..b1c5dd27a70d 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mpls.json
@@ -6,6 +6,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -30,6 +33,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -54,6 +60,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -78,6 +87,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -102,6 +114,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -126,6 +141,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -150,6 +168,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -174,6 +195,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -198,6 +222,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -222,6 +249,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -244,6 +274,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -266,6 +299,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -288,6 +324,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -310,6 +349,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -332,6 +374,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -356,6 +401,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -380,6 +428,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -404,6 +455,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -426,6 +480,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -448,6 +505,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -470,6 +530,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -492,6 +555,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -514,6 +580,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -538,6 +607,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -562,6 +634,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -586,6 +661,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -610,6 +688,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -634,6 +715,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -656,6 +740,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -678,6 +765,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -700,6 +790,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -722,6 +815,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -744,6 +840,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -768,6 +867,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -792,6 +894,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -814,6 +919,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -836,6 +944,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -860,6 +971,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -884,6 +998,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -906,6 +1023,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -930,6 +1050,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -954,6 +1077,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -978,6 +1104,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1002,6 +1131,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1024,6 +1156,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1046,6 +1181,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1070,6 +1208,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1094,6 +1235,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1116,6 +1260,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1138,6 +1285,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1163,6 +1313,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1188,6 +1341,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
@@ -1211,6 +1367,9 @@
"actions",
"mpls"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action mpls",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
index bc12c1ccad30..ee2792998c89 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/nat.json
@@ -6,6 +6,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -30,6 +33,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -54,6 +60,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -78,6 +87,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -102,6 +114,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -126,6 +141,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -150,6 +168,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -174,6 +195,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -203,6 +227,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -232,6 +259,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -261,6 +291,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -285,6 +318,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -309,6 +345,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -333,6 +372,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -357,6 +399,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -381,6 +426,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -405,6 +453,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -429,6 +480,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -453,6 +507,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -477,6 +534,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -501,6 +561,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -525,6 +588,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -549,6 +615,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -573,6 +642,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -597,6 +669,9 @@
"actions",
"nat"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action nat",
@@ -614,5 +689,61 @@
"teardown": [
"$TC actions flush action nat"
]
+ },
+ {
+ "id": "b811",
+ "name": "Delete nat action with valid index",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action nat ingress 1.1.1.1 2.2.2.2 drop index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action nat index 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action nat index 20",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 2.2.2.2 drop.*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
+ },
+ {
+ "id": "a521",
+ "name": "Delete nat action with invalid index",
+ "category": [
+ "actions",
+ "nat"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action nat",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action nat ingress 1.1.1.1 2.2.2.2 drop index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action nat index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action nat index 20",
+ "matchPattern": "action order [0-9]+: nat ingress 1.1.1.1/32 2.2.2.2 drop.*index 20 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action nat"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index 72cdc3c800a5..37c410332174 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -6,6 +6,9 @@
"actions",
"pedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -30,6 +33,9 @@
"actions",
"pedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -56,6 +62,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -81,6 +90,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -106,6 +118,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -131,6 +146,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -156,6 +174,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -206,6 +227,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -231,6 +255,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -256,6 +283,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -281,6 +311,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -306,6 +339,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -331,6 +367,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -356,6 +395,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -381,6 +423,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -406,6 +451,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -431,6 +479,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -456,6 +507,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -481,6 +535,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -506,6 +563,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -531,6 +591,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -556,6 +619,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -581,6 +647,9 @@
"pedit",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -606,6 +675,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -631,6 +703,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -656,6 +731,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -681,6 +759,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -706,6 +787,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -731,6 +815,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -756,6 +843,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -779,6 +869,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -804,6 +897,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -829,6 +925,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -854,6 +953,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -879,6 +981,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -904,6 +1009,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -929,6 +1037,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -954,6 +1065,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -979,6 +1093,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1004,6 +1121,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1029,6 +1149,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1054,6 +1177,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1079,6 +1205,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1104,6 +1233,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1129,6 +1261,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1154,6 +1289,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1179,6 +1317,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1204,6 +1345,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1229,6 +1373,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1254,6 +1401,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1279,6 +1429,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1304,6 +1457,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1329,6 +1485,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1354,6 +1513,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1379,6 +1541,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1404,6 +1569,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1429,6 +1597,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1454,6 +1625,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1479,6 +1653,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1504,6 +1681,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1554,6 +1734,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1579,6 +1762,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1629,6 +1815,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1654,6 +1843,9 @@
"pedit",
"layered_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1680,6 +1872,9 @@
"layered_op",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
@@ -1706,6 +1901,9 @@
"layered_op",
"raw_op"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action pedit",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
index b8268da5adaa..dd8109768f8f 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/police.json
@@ -6,6 +6,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -30,6 +33,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -55,6 +61,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -79,6 +88,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -103,6 +115,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -127,6 +142,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -151,6 +169,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -175,6 +196,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -199,6 +223,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -223,6 +250,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -247,6 +277,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -271,6 +304,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -295,6 +331,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -319,6 +358,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -343,6 +385,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -367,6 +412,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -391,6 +439,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -415,6 +466,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -439,6 +493,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -463,6 +520,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -488,6 +548,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -520,6 +583,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -545,6 +611,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -577,6 +646,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC actions add action police rate 1mbit burst 100k index 1",
"$TC actions add action police rate 2mbit burst 200k index 2",
@@ -603,6 +675,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -627,6 +702,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -651,6 +729,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -675,6 +756,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -699,6 +783,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -723,6 +810,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -747,6 +837,9 @@
"actions",
"police"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action police",
@@ -764,5 +857,86 @@
"teardown": [
"$TC actions flush action police"
]
+ },
+ {
+ "id": "cdd7",
+ "name": "Add valid police action with packets per second rate limit",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 rate 0bit burst 0b mtu 4096Mb pkts_rate 1000 pkts_burst 200",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "f5bc",
+ "name": "Add invalid police action with both bps and pps",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k pkts_rate 1000 pkts_burst 200 index 1",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions ls action police",
+ "matchPattern": "action order [0-9]*: police 0x1 ",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
+ },
+ {
+ "id": "7d64",
+ "name": "Add police action with skip_hw option",
+ "category": [
+ "actions",
+ "police"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action police",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action police rate 1kbit burst 10k index 100 skip_hw",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions ls action police | grep skip_hw",
+ "matchPattern": "skip_hw",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action police"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
index ddabb160a11b..af35e2f30a95 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/sample.json
@@ -6,6 +6,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -30,6 +33,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -54,6 +60,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -78,6 +87,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -102,6 +114,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -126,6 +141,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -150,6 +168,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -174,6 +195,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -196,6 +220,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -218,6 +245,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -240,6 +270,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -262,6 +295,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -284,6 +320,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -308,6 +347,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -332,6 +374,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -356,6 +401,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -380,6 +428,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -402,6 +453,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -424,6 +478,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -446,6 +503,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -468,6 +528,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -492,6 +555,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -516,6 +582,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -541,6 +610,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -566,6 +638,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -591,6 +666,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -616,6 +694,9 @@
"actions",
"sample"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action sample",
@@ -633,5 +714,61 @@
"teardown": [
"$TC actions flush action sample"
]
+ },
+ {
+ "id": "3872",
+ "name": "Delete sample action with valid index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 10 group 1 index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action sample index 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action sample index 20",
+ "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 20 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
+ },
+ {
+ "id": "a394",
+ "name": "Delete sample action with invalid index",
+ "category": [
+ "actions",
+ "sample"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action sample",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action sample rate 10 group 1 index 20"
+ ],
+ "cmdUnderTest": "$TC actions del action sample index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions get action sample index 20",
+ "matchPattern": "action order [0-9]+: sample rate 1/10 group 1.*index 20 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action sample"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
index 8e8c1ae12260..ac960e70dc9b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/simple.json
@@ -6,6 +6,9 @@
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -24,12 +27,42 @@
]
},
{
+ "id": "4297",
+ "name": "Add simple action with change command",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions change action simple sdata \"Not changed\" index 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <Not changed>.*index 60 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
"id": "6d4c",
"name": "Add simple action with duplicate index",
"category": [
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -55,6 +88,9 @@
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -82,6 +118,9 @@
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -107,6 +146,9 @@
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -134,6 +176,9 @@
"actions",
"simple"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action simple",
@@ -151,5 +196,70 @@
"teardown": [
"$TC actions flush action simple"
]
+ },
+ {
+ "id": "8d07",
+ "name": "Verify cleanup of failed actions batch add",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action simple sdata \"2\" index 2",
+ [
+ "$TC actions add action simple sdata \"1\" index 1 action simple sdata \"2\" index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"2\" index 2",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <2>.*index 2 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
+ },
+ {
+ "id": "a68a",
+ "name": "Verify cleanup of failed actions batch change",
+ "category": [
+ "actions",
+ "simple"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action simple",
+ 0,
+ 1,
+ 255
+ ],
+ [
+ "$TC actions change action simple sdata \"1\" index 1 action simple sdata \"2\" goto chain 42 index 2",
+ 255
+ ],
+ "$TC actions flush action simple"
+ ],
+ "cmdUnderTest": "$TC actions add action simple sdata \"1\" index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action simple",
+ "matchPattern": "action order [0-9]*: Simple <1>.*index 1 ref",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action simple"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
index 9cdd2e31ac2c..27ba0f72e904 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbedit.json
@@ -6,6 +6,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -30,6 +33,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -54,6 +60,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -76,6 +85,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -100,6 +112,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -124,6 +139,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -146,6 +164,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -168,6 +189,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -193,6 +217,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -217,6 +244,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -241,6 +271,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -265,6 +298,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -289,6 +325,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -313,6 +352,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -337,6 +379,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -361,6 +406,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -385,6 +433,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -409,6 +460,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -433,6 +487,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -457,6 +514,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -481,6 +541,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -505,6 +568,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -529,6 +595,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -557,6 +626,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -581,6 +653,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -603,6 +678,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -628,6 +706,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC actions add action skbedit mark 500",
"$TC actions add action skbedit mark 501",
@@ -653,6 +734,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -678,6 +762,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
@@ -702,6 +789,9 @@
"actions",
"skbedit"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbedit",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
index 6eb4c4f97060..33ed7a8099e2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/skbmod.json
@@ -6,6 +6,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -30,6 +33,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -54,6 +60,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -78,6 +87,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -102,6 +114,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -126,6 +141,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -150,6 +168,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -174,6 +195,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -198,6 +222,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -222,6 +249,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -246,6 +276,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -270,6 +303,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -294,6 +330,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -323,6 +362,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -352,6 +394,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -377,6 +422,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC actions add action skbmod set etype 0x0001",
"$TC actions add action skbmod set etype 0x0011",
@@ -400,6 +448,9 @@
"actions",
"skbmod"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action skbmod",
@@ -417,5 +468,32 @@
"teardown": [
"$TC actions flush action skbmod"
]
+ },
+ {
+ "id": "fe09",
+ "name": "Add skbmod action to mark ECN bits",
+ "category": [
+ "actions",
+ "skbmod"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action skbmod",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action skbmod ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action skbmod index 1",
+ "matchPattern": "action order [0-9]*: skbmod pipe ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action skbmod"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
index d06346968bcb..0b6f0b5aeaad 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/tunnel_key.json
@@ -6,6 +6,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -30,6 +33,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -59,6 +65,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -88,6 +97,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -117,6 +129,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -146,6 +161,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -175,6 +193,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -204,6 +225,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -228,6 +252,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -252,6 +279,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -281,6 +311,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -305,6 +338,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -334,6 +370,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -358,6 +397,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -387,6 +429,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -411,6 +456,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -435,6 +483,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -459,6 +510,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -488,6 +542,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -517,6 +574,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -541,6 +601,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -565,6 +628,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -594,6 +660,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -618,6 +687,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -642,6 +714,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -666,6 +741,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -690,6 +768,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -714,6 +795,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -738,6 +822,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -762,6 +849,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -786,6 +876,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -811,6 +904,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -836,6 +932,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -864,6 +963,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -892,6 +994,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -917,6 +1022,9 @@
"actions",
"tunnel_key"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action tunnel_key",
@@ -933,5 +1041,89 @@
"teardown": [
"$TC actions flush action tunnel_key"
]
+ },
+ {
+ "id": "3671",
+ "name": "Delete tunnel_key set action with valid index",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 index 1"
+ ],
+ "cmdUnderTest": "$TC actions del action tunnel_key index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*index 1",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "8597",
+ "name": "Delete tunnel_key set action with invalid index",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ],
+ "$TC actions add action tunnel_key set src_ip 1.1.1.1 dst_ip 2.2.2.2 index 1"
+ ],
+ "cmdUnderTest": "$TC actions del action tunnel_key index 10",
+ "expExitCode": "255",
+ "verifyCmd": "$TC actions list action tunnel_key",
+ "matchPattern": "action order [0-9]+: tunnel_key.*set.*src_ip 1.1.1.1.*dst_ip 2.2.2.2.*index 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
+ },
+ {
+ "id": "6bda",
+ "name": "Add tunnel_key action with nofrag option",
+ "category": [
+ "actions",
+ "tunnel_key"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "dependsOn": "$TC actions add action tunnel_key help 2>&1 | grep -q nofrag",
+ "setup": [
+ [
+ "$TC action flush action tunnel_key",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 10.10.10.2 id 1111 nofrag index 222",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action tunnel_key index 222",
+ "matchPattern": "action order [0-9]+: tunnel_key.*src_ip 10.10.10.1.*dst_ip 10.10.10.2.*key_id 1111.*csum.*nofrag pipe.*index 222",
+ "matchCount": "1",
+ "teardown": [
+ "$TC actions flush action tunnel_key"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
index 41d783254b08..e5fe8762978a 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/vlan.json
@@ -6,6 +6,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -30,6 +33,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -54,6 +60,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -78,6 +87,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -102,6 +114,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -126,6 +141,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -150,6 +168,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -174,6 +195,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -196,6 +220,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -220,6 +247,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -242,6 +272,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -264,6 +297,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -286,6 +322,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -310,6 +349,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -334,6 +376,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -358,6 +403,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -382,6 +430,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -406,6 +457,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -430,6 +484,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -446,12 +503,42 @@
"teardown": []
},
{
+ "id": "ba5b",
+ "name": "Add vlan modify action for protocol 802.1Q setting priority 0",
+ "category": [
+ "actions",
+ "vlan"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ [
+ "$TC actions flush action vlan",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 priority 0 index 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions get action vlan index 100",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 priority 0 protocol 802.1Q pipe.*index 100 ref",
+ "matchCount": "0",
+ "teardown": [
+ "$TC actions flush action vlan"
+ ]
+ },
+ {
"id": "6812",
"name": "Add vlan modify action for protocol 802.1Q",
"category": [
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -463,7 +550,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1Q id 5 index 100",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 100",
- "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q priority 0 pipe.*index 100 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 100 protocol 802.1Q pipe.*index 100 ref",
"matchCount": "0",
"teardown": [
"$TC actions flush action vlan"
@@ -476,6 +563,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -487,7 +577,7 @@
"cmdUnderTest": "$TC actions add action vlan modify protocol 802.1ad id 500 reclassify index 12",
"expExitCode": "0",
"verifyCmd": "$TC actions get action vlan index 12",
- "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad priority 0 reclassify.*index 12 ref",
+ "matchPattern": "action order [0-9]+: vlan.*modify id 500 protocol 802.1ad reclassify.*index 12 ref",
"matchCount": "1",
"teardown": [
"$TC actions flush action vlan"
@@ -500,6 +590,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -525,6 +618,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -550,6 +646,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -575,6 +674,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -600,6 +702,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -623,6 +728,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -646,6 +754,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -672,6 +783,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -696,6 +810,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -721,6 +838,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -745,6 +865,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -768,6 +891,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -792,6 +918,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
@@ -815,6 +944,9 @@
"actions",
"vlan"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
[
"$TC actions flush action vlan",
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
index e788c114a484..d1278de8ebc3 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/basic.json
@@ -1274,5 +1274,52 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "0811",
+ "name": "Add multiple basic filter with cmp ematch u8/link layer and default action and dump them",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 basic",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5129",
+ "name": "List basic filters",
+ "category": [
+ "filter",
+ "basic"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 1 basic match 'cmp(u8 at 0 layer link mask 0xff gt 10)' classid 1:1"
+ ],
+ "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
new file mode 100644
index 000000000000..725d406a30ac
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/bpf.json
@@ -0,0 +1,175 @@
+[
+ {
+ "id": "23c3",
+ "name": "Add cBPF filter with valid bytecode",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1563",
+ "name": "Add cBPF filter with invalid bytecode",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,31 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2334",
+ "name": "Add eBPF filter with valid object-file",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ok\\].*tag [0-9a-f]{16}( jited)?",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2373",
+ "name": "Add eBPF filter with invalid object-file",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin"
+ ]
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf object-file $EBPFDIR/action-ebpf section action-ko",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1 action-ebpf:\\[action-ko\\].*tag [0-9a-f]{16}( jited)?",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4423",
+ "name": "Replace cBPF bytecode",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ [
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5122",
+ "name": "Delete cBPF filter",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ [
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ 0,
+ 1,
+ 255
+ ]
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf",
+ "matchPattern": "filter parent ffff: protocol ip pref 100 bpf chain [0-9]+ handle 0x1.*bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e0a9",
+ "name": "List cBPF filters",
+ "category": [
+ "filter",
+ "bpf-filter"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0'",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 2054,6 0 0 262144,6 0 0 0'",
+ "$TC filter add dev $DEV1 parent ffff: handle 100 protocol ip prio 100 bpf bytecode '4,40 0 0 12,21 0 1 33024,6 0 0 262144,6 0 0 0'"
+ ],
+ "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref 100 bpf chain [0-9]+ handle",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json
new file mode 100644
index 000000000000..03723cf84379
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/cgroup.json
@@ -0,0 +1,1236 @@
+[
+ {
+ "id": "6273",
+ "name": "Add cgroup filter with cmp ematch u8/link layer and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4721",
+ "name": "Add cgroup filter with cmp ematch u8/link layer with trans flag and pass action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff trans gt 10)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff trans gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "d392",
+ "name": "Add cgroup filter with cmp ematch u16/link layer and pipe action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u16 at 0 layer 0 mask 0xff00 lt 3)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u16 at 0 layer 0 mask 0xff00 lt 3\\).*action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0234",
+ "name": "Add cgroup filter with cmp ematch u32/link layer and miltiple actions",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u32 at 4 layer link mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref [0-9]+ cgroup chain [0-9]+.*handle 0x1.*cmp\\(u32 at 4 layer 0 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8499",
+ "name": "Add cgroup filter with cmp ematch u8/network layer and pass action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 cgroup match 'cmp(u8 at 0 layer 1 mask 0xff gt 10)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 11 cgroup.*handle 0xab.*cmp\\(u8 at 0 layer 1 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b273",
+ "name": "Add cgroup filter with cmp ematch u8/network layer with trans flag and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0xab protocol ip prio 11 cgroup match 'cmp(u8 at 0 layer 1 mask 0xff trans gt 10)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 11 cgroup.*handle 0xab.*cmp\\(u8 at 0 layer 1 mask 0xff trans gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1934",
+ "name": "Add cgroup filter with cmp ematch u16/network layer and pipe action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x100 protocol ip prio 100 cgroup match 'cmp(u16 at 0 layer network mask 0xff00 lt 3)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref 100 cgroup.*handle 0x100..*cmp\\(u16 at 0 layer 1 mask 0xff00 lt 3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2733",
+ "name": "Add cgroup filter with cmp ematch u32/network layer and miltiple actions",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 0x112233 protocol ip prio 7 cgroup match 'cmp(u32 at 4 layer network mask 0xff00ff00 eq 3)' action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 7 cgroup.*handle 0x112233.*cmp\\(u32 at 4 layer 1 mask 0xff00ff00 eq 3\\).*action.*skbedit.*mark 7 pipe.*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3271",
+ "name": "Add cgroup filter with NOT cmp ematch rule and pass action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'not cmp(u8 at 0 layer link mask 0xff eq 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*NOT cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2362",
+ "name": "Add cgroup filter with two ANDed cmp ematch rules and single action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9993",
+ "name": "Add cgroup filter with two ORed cmp ematch rules and single action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) or cmp(u16 at 8 layer link mask 0x00ff gt 7)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*OR cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2331",
+ "name": "Add cgroup filter with two ANDed cmp ematch rules and one ORed ematch rule and single action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3645",
+ "name": "Add cgroup filter with two ANDed cmp ematch rules and one NOT ORed ematch rule and single action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff eq 3) and cmp(u16 at 8 layer link mask 0x00ff gt 7) or not cmp(u32 at 4 layer network mask 0xa0a0 lt 3)' action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*cmp\\(u8 at 0 layer 0 mask 0xff eq 3\\).*AND cmp\\(u16 at 8 layer 0 mask 0xff gt 7\\).*OR NOT cmp\\(u32 at 4 layer 1 mask 0xa0a0 lt 3\\).*action.*gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b124",
+ "name": "Add cgroup filter with u32 ematch u8/zero offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x11 0x0f at 0)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(01000000/0f000000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7381",
+ "name": "Add cgroup filter with u32 ematch u8/zero offset and invalid value >0xFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 basic match 'u32(u8 0x1122 0x0f at 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/0f000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2231",
+ "name": "Add cgroup filter with u32 ematch u8/positive offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0x1f at 12)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(17000000/1f000000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1882",
+ "name": "Add cgroup filter with u32 ematch u8/invalid mask >0xFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff00 at 12)' action drop",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000/ff000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1237",
+ "name": "Add cgroup filter with u32 ematch u8/missing offset",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff at)' action pipe",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3812",
+ "name": "Add cgroup filter with u32 ematch u8/missing AT keyword",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x77 0xff 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77000000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1112",
+ "name": "Add cgroup filter with u32 ematch u8/missing value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 at 12)' action drop",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3241",
+ "name": "Add cgroup filter with u32 ematch u8/non-numeric value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 zero 0xff at 0)' action pipe",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1 flowid 1:1.*u32\\(00000000/ff000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e231",
+ "name": "Add cgroup filter with u32 ematch u8/non-numeric mask",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x11 mask at 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11000000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4652",
+ "name": "Add cgroup filter with u32 ematch u8/negative offset and pass action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0xaa 0xf0 at -14)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(0000a000/0000f000 at -16\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7566",
+ "name": "Add cgroup filter with u32 ematch u8/nexthdr+ offset and drop action",
+ "category": [
+ "filter",
+ "drop"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0xaa 0xf0 at nexthdr+0)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(a0000000/f0000000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1331",
+ "name": "Add cgroup filter with u32 ematch u16/zero offset and pipe action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x1122 0xffff at 0)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/ffff0000 at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e354",
+ "name": "Add cgroup filter with u32 ematch u16/zero offset and invalid value >0xFFFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x112233 0xffff at 0)'",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11223300/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3538",
+ "name": "Add cgroup filter with u32 ematch u16/positive offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0x1fff at 12)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(17880000/1fff0000 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4576",
+ "name": "Add cgroup filter with u32 ematch u16/invalid mask >0xFFFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffffffff at 12)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b842",
+ "name": "Add cgroup filter with u32 ematch u16/missing offset",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffff at)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000 at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "c924",
+ "name": "Add cgroup filter with u32 ematch u16/missing AT keyword",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0x7788 0xffff 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77880000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "cc93",
+ "name": "Add cgroup filter with u32 ematch u16/missing value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 at 12)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "123c",
+ "name": "Add cgroup filter with u32 ematch u16/non-numeric value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 zero 0xffff at 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3675",
+ "name": "Add cgroup filter with u32 ematch u16/non-numeric mask",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u8 0x1122 mask at 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11220000/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1123",
+ "name": "Add cgroup filter with u32 ematch u16/negative offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0xaabb 0xffff at -12)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabb0000/ffff0000 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4234",
+ "name": "Add cgroup filter with u32 ematch u16/nexthdr+ offset and pass action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u16 0xaabb 0xf0f0 at nexthdr+0)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(a0b00000/f0f00000 at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "e912",
+ "name": "Add cgroup filter with u32 ematch u32/zero offset and pipe action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xffffffff at 0)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabbccdd/ffffffff at 0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1435",
+ "name": "Add cgroup filter with u32 ematch u32/positive offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11227788 0x1ffff0f0 at 12)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11227080/1ffff0f0 at 12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1282",
+ "name": "Add cgroup filter with u32 ematch u32/missing offset",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11227788 0xffffffff at)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11227788/ffffffff at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6456",
+ "name": "Add cgroup filter with u32 ematch u32/missing AT keyword",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x77889900 0xfffff0f0 0)' action pipe",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(77889900/fffff0f0 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4231",
+ "name": "Add cgroup filter with u32 ematch u32/missing value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 at 12)' action pipe",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(at 12\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2131",
+ "name": "Add cgroup filter with u32 ematch u32/non-numeric value",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 zero 0xffff at 0)' action pipe",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(00000000/ffff0000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "f125",
+ "name": "Add cgroup filter with u32 ematch u32/non-numeric mask",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0x11223344 mask at 0)' action pass",
+ "expExitCode": "1",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(11223344/00000000 at 0\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4316",
+ "name": "Add cgroup filter with u32 ematch u32/negative offset and drop action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xff00ff00 at -12)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aa00cc00/ff00ff00 at -12\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "23ae",
+ "name": "Add cgroup filter with u32 ematch u32/nexthdr+ offset and pipe action",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'u32(u32 0xaabbccdd 0xffffffff at nexthdr+0)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*u32\\(aabbccdd/ffffffff at nexthdr\\+0\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "23a1",
+ "name": "Add cgroup filter with canid ematch and single SFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 1)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "324f",
+ "name": "Add cgroup filter with canid ematch and single SFF with mask",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0xaabb:0x00ff)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x2BB:0xFF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2576",
+ "name": "Add cgroup filter with canid ematch and multiple SFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 1 sff 2 sff 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0x1 sff 0x2 sff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4839",
+ "name": "Add cgroup filter with canid ematch and multiple SFF with masks",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0xaa:0x01 sff 0xbb:0x02 sff 0xcc:0x03)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(sff 0xAA:0x1 sff 0xBB:0x2 sff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6713",
+ "name": "Add cgroup filter with canid ematch and single EFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 1)' action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4572",
+ "name": "Add cgroup filter with canid ematch and single EFF with mask",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 0xaabb:0xf1f1)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0xAABB:0xF1F1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8031",
+ "name": "Add cgroup filter with canid ematch and multiple EFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 1 eff 2 eff 3)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x1 eff 0x2 eff 0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "ab9d",
+ "name": "Add cgroup filter with canid ematch and multiple EFF with masks",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(eff 0xaa:0x01 eff 0xbb:0x02 eff 0xcc:0x03)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0xAA:0x1 eff 0xBB:0x2 eff 0xCC:0x3\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5349",
+ "name": "Add cgroup filter with canid ematch and a combination of SFF/EFF",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0x01 eff 0x02)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x2 sff 0x1\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "c934",
+ "name": "Add cgroup filter with canid ematch and a combination of SFF/EFF with masks",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'canid(sff 0x01:0xf eff 0x02:0xf)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "^filter protocol ip pref 1 cgroup.*handle 0x1.*canid\\(eff 0x2:0xF sff 0x1:0xF\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4319",
+ "name": "Replace cgroup filter with diffferent match",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 8)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 8\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4636",
+ "name": "Delete cgroup filter",
+ "category": [
+ "filter",
+ "cgroup"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent ffff: protocol ip prio 1 cgroup match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action pass",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "cmp\\(u8 at 0 layer 0 mask 0xff gt 8\\)",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
new file mode 100644
index 000000000000..58189327f644
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
@@ -0,0 +1,623 @@
+[
+ {
+ "id": "5294",
+ "name": "Add flow filter with map key and ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst and 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst and 0x000000ff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3514",
+ "name": "Add flow filter with map key or ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst or 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst.*or 0x000000ff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7534",
+ "name": "Add flow filter with map key xor ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst xor 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst xor 0x000000ff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4524",
+ "name": "Add flow filter with map key rshift ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0230",
+ "name": "Add flow filter with map key addend ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2344",
+ "name": "Add flow filter with src map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key src addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys src addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9304",
+ "name": "Add flow filter with proto map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9038",
+ "name": "Add flow filter with proto-src map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto-src addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto-src addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2a03",
+ "name": "Add flow filter with proto-dst map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key proto-dst addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys proto-dst addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "a073",
+ "name": "Add flow filter with iif map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key iif addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys iif addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3b20",
+ "name": "Add flow filter with priority map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key priority addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys priority addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "8945",
+ "name": "Add flow filter with mark map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key mark addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys mark addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "c034",
+ "name": "Add flow filter with nfct map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0205",
+ "name": "Add flow filter with nfct-src map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-dst addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-dst addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5315",
+ "name": "Add flow filter with nfct-src map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-src addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-src addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7849",
+ "name": "Add flow filter with nfct-proto-src map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-proto-src addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-proto-src addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "9902",
+ "name": "Add flow filter with nfct-proto-dst map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key nfct-proto-dst addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys nfct-proto-dst addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6742",
+ "name": "Add flow filter with rt-classid map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rt-classid addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rt-classid addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "5432",
+ "name": "Add flow filter with sk-uid map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key sk-uid addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys sk-uid addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4134",
+ "name": "Add flow filter with sk-gid map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key sk-gid addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys sk-gid addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4522",
+ "name": "Add flow filter with vlan-tag map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key vlan-tag addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys vlan-tag addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4253",
+ "name": "Add flow filter with rxhash map key",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0xff baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4452",
+ "name": "Add flow filter with hash key list",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow hash keys src",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 hash keys src baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4341",
+ "name": "Add flow filter with muliple ops",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow hash keys src divisor 1024 baseclass 1:1 match 'cmp(u8 at 0 layer link mask 0xff gt 10)' action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 hash keys src divisor 1024 baseclass 1:1.*cmp\\(u8 at 0 layer 0 mask 0xff gt 10\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4392",
+ "name": "List flow filters",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+ "$TC filter add dev $DEV1 parent ffff: handle 2 prio 1 protocol ip flow map key rxhash or 0xff"
+ ],
+ "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref 1 flow chain 0 handle 0x[0-9]+ map keys rxhash",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4322",
+ "name": "Change flow filter with map key num",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+ ],
+ "cmdUnderTest": "$TC filter change dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0x22",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x22 baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2320",
+ "name": "Replace flow filter with map key num",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0x88",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x88 baseclass",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "3213",
+ "name": "Delete flow filter with map key num",
+ "category": [
+ "filter",
+ "flow"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key rxhash addend 0xff",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys rxhash addend 0x88 baseclass",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
index c2a433a4737e..6b08c0642069 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/concurrency.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flower.json
@@ -173,5 +173,103 @@
"$TC qdisc del dev $DEV2 ingress",
"/bin/rm -rf $BATCH_DIR"
]
+ },
+ {
+ "id": "2ff3",
+ "name": "Add flower with max handle and then dump it",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "d052",
+ "name": "Add 1M filters with the same action",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
+ ],
+ "cmdUnderTest": "$TC -b $BATCH_FILE",
+ "expExitCode": "0",
+ "verifyCmd": "$TC actions list action gact",
+ "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress",
+ "/bin/rm $BATCH_FILE"
+ ]
+ },
+ {
+ "id": "4cbd",
+ "name": "Try to add filter with duplicate key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress",
+ "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+ "expExitCode": "2",
+ "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "7c65",
+ "name": "Add flower filter and then terse dump it",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
+ "matchPattern": "filter protocol ip pref 1 flower.*handle",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
+ },
+ {
+ "id": "d45e",
+ "name": "Add flower filter and verify that terse dump doesn't output filter key",
+ "category": [
+ "filter",
+ "flower"
+ ],
+ "setup": [
+ "$TC qdisc add dev $DEV2 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -br filter show dev $DEV2 ingress",
+ "matchPattern": " dst_mac e4:11:22:11:4a:51",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV2 ingress"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
index 5272049566d6..a9b071e1354b 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/fw.json
@@ -53,111 +53,6 @@
"plugins": {
"requires": "nsPlugin"
},
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
- "plugins": {
- "requires": "nsPlugin"
- },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -173,14 +68,15 @@
{
"id": "c591",
"name": "Add fw filter with action ok by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact ok index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -189,9 +85,7 @@
"matchPattern": "handle 0x1.*gact action pass.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -201,6 +95,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -216,14 +113,15 @@
{
"id": "38b3",
"name": "Add fw filter with action continue by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet. Remove this when the behaviour is fixed.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact continue index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -232,9 +130,7 @@
"matchPattern": "handle 0x1.*gact action continue.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -244,6 +140,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -259,14 +158,15 @@
{
"id": "6753",
"name": "Add fw filter with action pipe by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact pipe index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -275,9 +175,7 @@
"matchPattern": "handle 0x1.*gact action pipe.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -287,6 +185,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -302,14 +203,15 @@
{
"id": "6dc6",
"name": "Add fw filter with action drop by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact drop index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -318,9 +220,7 @@
"matchPattern": "handle 0x1.*gact action drop.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -330,6 +230,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -345,14 +248,15 @@
{
"id": "3bc2",
"name": "Add fw filter with action reclassify by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact reclassify index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -361,9 +265,7 @@
"matchPattern": "handle 0x1.*gact action reclassify.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -373,6 +275,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -388,14 +293,15 @@
{
"id": "36f7",
"name": "Add fw filter with action jump 10 by reference",
- "__comment": "We add sleep here because action might have not been deleted by workqueue just yet.",
"category": [
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress",
- "/bin/sleep 1",
"$TC actions add action gact jump 10 index 1"
],
"cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 fw action gact index 1",
@@ -404,9 +310,7 @@
"matchPattern": "handle 0x1.*gact action jump 10.*index 1 ref 2 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DEV1 ingress",
- "/bin/sleep 1",
- "$TC actions del action gact index 1"
+ "$TC qdisc del dev $DEV1 ingress"
]
},
{
@@ -416,6 +320,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -435,6 +342,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -454,6 +364,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -473,6 +386,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -492,6 +408,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -511,6 +430,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -530,6 +452,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -549,6 +474,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -568,6 +496,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -587,6 +518,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -606,6 +540,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -625,6 +562,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -644,6 +584,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -663,6 +606,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -682,6 +628,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -701,6 +650,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -720,6 +672,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -739,6 +694,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -758,6 +716,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -777,6 +738,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -796,6 +760,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -815,6 +782,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -834,6 +804,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -853,6 +826,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -872,6 +848,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -891,6 +870,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -910,6 +892,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -929,6 +914,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -948,6 +936,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -967,6 +958,9 @@
"filter",
"fw"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
@@ -1096,7 +1090,6 @@
{
"id": "0e99",
"name": "Del single fw filter x1",
- "__comment__": "First of two tests to check that one filter is there and the other isn't",
"category": [
"filter",
"fw"
@@ -1121,7 +1114,6 @@
{
"id": "f54c",
"name": "Del single fw filter x2",
- "__comment__": "Second of two tests to check that one filter is there and the other isn't",
"category": [
"filter",
"fw"
@@ -1351,5 +1343,54 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "e470",
+ "name": "Try to delete class referenced by fw after a replace",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "ec1a",
+ "name": "Replace fw classid with nil",
+ "category": [
+ "filter",
+ "fw"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: handle 1 prio 1 fw classid 10:1 action ok"
+ ],
+ "cmdUnderTest": "$TC filter replace dev $DEV1 parent 10: handle 1 prio 1 fw action drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent 10:",
+ "matchPattern": "fw chain 0 handle 0x1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
index 51799874a972..f8d28c415bc3 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/matchall.json
@@ -6,8 +6,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ok",
@@ -16,8 +18,7 @@
"matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -27,8 +28,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ip matchall action ok",
@@ -37,8 +40,7 @@
"matchPattern": "^filter parent 1: protocol ip pref 1 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -48,8 +50,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall action drop",
@@ -58,8 +62,7 @@
"matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -69,8 +72,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol ipv6 matchall action drop",
@@ -79,8 +84,7 @@
"matchPattern": "^filter parent 1: protocol ipv6 pref 1 matchall.*handle 0x1.*gact action drop.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -90,8 +94,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
@@ -100,8 +106,7 @@
"matchPattern": "^filter parent ffff: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -111,8 +116,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 65535 protocol ipv4 matchall action pass",
@@ -121,8 +128,7 @@
"matchPattern": "^filter parent 1: protocol ip pref 65535 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -132,8 +138,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
@@ -142,8 +150,7 @@
"matchPattern": "^filter parent ffff: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -153,8 +160,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 655355 protocol ipv4 matchall action pass",
@@ -163,8 +172,7 @@
"matchPattern": "^filter parent 1: protocol ip pref 655355 matchall.*handle 0x1.*gact action pass.*ref 1 bind 1",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -174,8 +182,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0xffffffff prio 1 protocol all matchall action continue",
@@ -184,8 +194,7 @@
"matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -195,8 +204,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0xffffffff prio 1 protocol all matchall action continue",
@@ -205,8 +216,7 @@
"matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0xffffffff.*gact action continue.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -216,8 +226,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
@@ -226,8 +238,7 @@
"matchPattern": "^filter parent ffff: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -237,8 +248,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent 1: handle 0x1 prio 1 protocol all matchall skip_hw action reclassify",
@@ -247,8 +260,7 @@
"matchPattern": "^filter parent 1: protocol all pref 1 matchall.*handle 0x1.*skip_hw.*not_in_hw.*gact action reclassify.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY root handle 1: prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY root handle 1: prio"
]
},
{
@@ -258,8 +270,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:1 action pass",
@@ -268,8 +282,7 @@
"matchPattern": "^filter parent ffff: protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:1.*gact action pass.*ref 1 bind 1",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -279,8 +292,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 6789defg action pass",
@@ -289,8 +304,7 @@
"matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 6789defg.*gact action pass.*ref 1 bind 1",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -300,8 +314,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress",
"$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv6 matchall classid 1:2 action pass"
],
@@ -311,8 +327,7 @@
"matchPattern": "^filter protocol ipv6 pref 1 matchall.*handle 0x1.*flowid 1:2.*gact action pass.*ref 1 bind 1",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -322,8 +337,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress",
"$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
"$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
@@ -336,8 +353,7 @@
"matchPattern": "^filter protocol all pref.*matchall.*handle.*flowid.*gact action pass",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -347,8 +363,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress",
"$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all matchall classid 1:2 action pass",
"$TC filter add dev $DUMMY parent ffff: handle 0x2 prio 2 protocol all matchall classid 1:3 action pass",
@@ -361,8 +379,7 @@
"matchPattern": "^filter protocol all pref 2 matchall.*handle 0x2 flowid 1:2.*gact action pass",
"matchCount": "0",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -372,8 +389,10 @@
"filter",
"matchall"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress",
"$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol all chain 1 matchall classid 1:1 action pass",
"$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 chain 2 matchall classid 1:3 action continue"
@@ -384,8 +403,105 @@
"matchPattern": "^filter protocol all pref 1 matchall chain 1 handle 0x1 flowid 1:1.*gact action pass",
"matchCount": "1",
"teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "3329",
+ "name": "Validate flags of the matchall filter with skip_sw and police action with skip_hw",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199 skip_hw"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
"$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC actions del action police index 199"
+ ]
+ },
+ {
+ "id": "0eeb",
+ "name": "Validate flags of the matchall filter with skip_hw and police action",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_hw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$TC actions del action police index 199"
+ ]
+ },
+ {
+ "id": "eee4",
+ "name": "Validate flags of the matchall filter with skip_sw and police action",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions flush action police",
+ "$TC actions add action police rate 1mbit burst 100k index 199"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ipv4 matchall skip_sw action police index 199",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ipv4 matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress",
+ "$TC actions del action police index 199"
+ ]
+ },
+ {
+ "id": "2638",
+ "name": "Add matchall and try to get it",
+ "category": [
+ "filter",
+ "matchall"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
+ ],
+ "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 clsact"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/route.json b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
new file mode 100644
index 000000000000..8d8de8f65aef
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/route.json
@@ -0,0 +1,206 @@
+[
+ {
+ "id": "e122",
+ "name": "Add route filter with from and to tag",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 100 route from 1 to 10 classid 1:10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "flowid 1:10 to 10 from 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "6573",
+ "name": "Add route filter with fromif and to tag",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 100 route fromif $DEV1 to 10 classid 1:10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "flowid 1:10 to 10 fromif",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "1362",
+ "name": "Add route filter with to flag and reclassify action",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route to 10 classid 1:20 action reclassify",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:20 to 10.*action order [0-9]+: gact action reclassify",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4720",
+ "name": "Add route filter with from flag and continue actions",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 classid 1:100 action continue",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:100 from 10.*action continue",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2812",
+ "name": "Add route filter with form tag and pipe action",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action pipe",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "7994",
+ "name": "Add route filter with miltiple actions",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action skbedit mark 7 pipe action gact drop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter ls dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action order [0-9]+: skbedit mark 7 pipe.*action order [0-9]+: gact action drop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "4312",
+ "name": "List route filters",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+ "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 20 to 1 classid 1:20 action pipe"
+ ],
+ "cmdUnderTest": "$TC filter show dev $DEV1 parent ffff:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "action order [0-9]+: gact action pipe",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2634",
+ "name": "Delete route filter with pipe action",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe"
+ ],
+ "cmdUnderTest": "$TC filter del dev $DEV1 parent ffff: protocol ip prio 2 route from 10 to 2 classid 1:1 action pipe",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 parent ffff:",
+ "matchPattern": "filter protocol ip pref.*route chain [0-9]+.*flowid 1:1 to 2 from 10.*action pipe",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "b042",
+ "name": "Try to delete class referenced by route after a replace",
+ "category": [
+ "filter",
+ "route"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: prio 1 route from 10 classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: prio 1 route from 5 classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
deleted file mode 100644
index bb543bf69d69..000000000000
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ /dev/null
@@ -1,129 +0,0 @@
-[
- {
- "id": "2638",
- "name": "Add matchall and try to get it",
- "category": [
- "filter",
- "matchall"
- ],
- "plugins": {
- "requires": "nsPlugin"
- },
- "setup": [
- "$TC qdisc add dev $DEV1 clsact",
- "$TC filter add dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall action ok"
- ],
- "cmdUnderTest": "$TC filter get dev $DEV1 protocol all pref 1 ingress handle 0x1234 matchall",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show dev $DEV1 ingress",
- "matchPattern": "filter protocol all pref 1 matchall chain 0 handle 0x1234",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV1 clsact"
- ]
- },
- {
- "id": "2ff3",
- "name": "Add flower with max handle and then dump it",
- "category": [
- "filter",
- "flower"
- ],
- "setup": [
- "$TC qdisc add dev $DEV2 ingress"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show dev $DEV2 ingress",
- "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV2 ingress"
- ]
- },
- {
- "id": "d052",
- "name": "Add 1M filters with the same action",
- "category": [
- "filter",
- "flower"
- ],
- "plugins": {
- "requires": "nsPlugin"
- },
- "setup": [
- "$TC qdisc add dev $DEV2 ingress",
- "./tdc_batch.py $DEV2 $BATCH_FILE --share_action -n 1000000"
- ],
- "cmdUnderTest": "$TC -b $BATCH_FILE",
- "expExitCode": "0",
- "verifyCmd": "$TC actions list action gact",
- "matchPattern": "action order 0: gact action drop.*index 1 ref 1000000 bind 1000000",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV2 ingress",
- "/bin/rm $BATCH_FILE"
- ]
- },
- {
- "id": "4cbd",
- "name": "Try to add filter with duplicate key",
- "category": [
- "filter",
- "flower"
- ],
- "plugins": {
- "requires": "nsPlugin"
- },
- "setup": [
- "$TC qdisc add dev $DEV2 ingress",
- "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
- "expExitCode": "2",
- "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
- "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV2 ingress"
- ]
- },
- {
- "id": "7c65",
- "name": "Add flower filter and then terse dump it",
- "category": [
- "filter",
- "flower"
- ],
- "setup": [
- "$TC qdisc add dev $DEV2 ingress"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
- "matchPattern": "filter protocol ip pref 1 flower.*handle",
- "matchCount": "1",
- "teardown": [
- "$TC qdisc del dev $DEV2 ingress"
- ]
- },
- {
- "id": "d45e",
- "name": "Add flower filter and verify that terse dump doesn't output filter key",
- "category": [
- "filter",
- "flower"
- ],
- "setup": [
- "$TC qdisc add dev $DEV2 ingress"
- ],
- "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
- "expExitCode": "0",
- "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
- "matchPattern": " dst_mac e4:11:22:11:4a:51",
- "matchCount": "0",
- "teardown": [
- "$TC qdisc del dev $DEV2 ingress"
- ]
- }
-]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
index e09d3c0e307f..24bd0c2a3014 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/u32.json
@@ -201,5 +201,133 @@
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
]
+ },
+ {
+ "id": "0692",
+ "name": "Test u32 sample option, divisor 256",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256"
+ ],
+ "cmdUnderTest": "bash -c \"for mask in ff ffff ffffff ffffffff ff00ff ff0000ff ffff00ff; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 40 flowid 1:1",
+ "matchCount": "7",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "2478",
+ "name": "Test u32 sample option, divisor 16",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 ingress",
+ "$TC filter add dev $DEV1 ingress prio 99 handle 1: u32 divisor 256"
+ ],
+ "cmdUnderTest": "bash -c \"for mask in 70 f0 ff0 fff0 ff00f0; do $TC filter add dev $DEV1 ingress prio 99 u32 ht 1: sample u32 0x10203040 \\$mask match u8 0 0 classid 1:1; done\"",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1 ingress",
+ "matchPattern": "filter protocol all pref 99 u32( (chain|fh|order) [0-9:]+){3} key ht 1 bkt 4 flowid 1:1",
+ "matchCount": "5",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 ingress"
+ ]
+ },
+ {
+ "id": "0c37",
+ "name": "Try to delete class referenced by u32 after a replace",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC class add dev $DEV1 parent root classid 1 drr",
+ "$TC filter add dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action ok",
+ "$TC filter replace dev $DEV1 parent 10: prio 1 u32 match icmp type 1 0xff classid 10:1 action drop"
+ ],
+ "cmdUnderTest": "$TC class delete dev $DEV1 parent 10: classid 10:1",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DEV1",
+ "matchPattern": "class drr 10:1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "bd32",
+ "name": "Try to delete hashtable referenced by another u32 filter",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 1:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10: prio 2 handle 1: u32",
+ "expExitCode": "2",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32 chain 0 fh 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
+ },
+ {
+ "id": "4585",
+ "name": "Delete small tree of u32 hashtables and filters",
+ "category": [
+ "filter",
+ "u32"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DEV1 parent root handle 10: drr",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 1: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 2: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 3: u32 divisor 2",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 handle 4: u32 divisor 1",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 1: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 2: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 2:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 3: match ip src any link 1:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 4: match ip src any action drop",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 3:",
+ "$TC filter add dev $DEV1 parent 10:0 protocol ip prio 2 u32 ht 800: match ip src any link 4:"
+ ],
+ "cmdUnderTest": "$TC filter delete dev $DEV1 parent 10:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter show dev $DEV1",
+ "matchPattern": "protocol ip pref 2 u32",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DEV1 parent root drr"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
new file mode 100644
index 000000000000..1ba96c467754
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/actions.json
@@ -0,0 +1,416 @@
+[
+ {
+ "id": "abdc",
+ "name": "Reference pedit action object in filter",
+ "category": [
+ "infra",
+ "pedit"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action pedit munge offset 0 u8 clear index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action pedit index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "7a70",
+ "name": "Reference mpls action object in filter",
+ "category": [
+ "infra",
+ "mpls"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action mpls pop protocol ipv4 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mpls index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "d241",
+ "name": "Reference bpf action object in filter",
+ "category": [
+ "infra",
+ "bpf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action bpf bytecode '4,40 0 0 12,21 0 1 2048,6 0 0 262144,6 0 0 0' index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action bpf index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "383a",
+ "name": "Reference connmark action object in filter",
+ "category": [
+ "infra",
+ "connmark"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action connmark"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action connmark index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "c619",
+ "name": "Reference csum action object in filter",
+ "category": [
+ "infra",
+ "csum"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action csum ip4h index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action csum index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "a93d",
+ "name": "Reference ct action object in filter",
+ "category": [
+ "infra",
+ "ct"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action ct index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ct index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "8bb5",
+ "name": "Reference ctinfo action object in filter",
+ "category": [
+ "infra",
+ "ctinfo"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action ctinfo index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ctinfo index 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "2241",
+ "name": "Reference gact action object in filter",
+ "category": [
+ "infra",
+ "gact"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action pass index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gact index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "35e9",
+ "name": "Reference gate action object in filter",
+ "category": [
+ "infra",
+ "gate"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC action add action gate priority 1 sched-entry close 100000000ns index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action gate index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "b22e",
+ "name": "Reference ife action object in filter",
+ "category": [
+ "infra",
+ "ife"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action ife encode allow mark pass index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action ife index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "ef74",
+ "name": "Reference mirred action object in filter",
+ "category": [
+ "infra",
+ "mirred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action mirred egress mirror index 1 dev lo"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action mirred index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "2c81",
+ "name": "Reference nat action object in filter",
+ "category": [
+ "infra",
+ "nat"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action nat ingress 192.168.1.1 200.200.200.1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action nat index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "ac9d",
+ "name": "Reference police action object in filter",
+ "category": [
+ "infra",
+ "police"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action police rate 1kbit burst 10k index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action police index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "68be",
+ "name": "Reference sample action object in filter",
+ "category": [
+ "infra",
+ "sample"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action sample rate 10 group 1 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action sample index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "cf01",
+ "name": "Reference skbedit action object in filter",
+ "category": [
+ "infra",
+ "skbedit"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action skbedit mark 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbedit index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "c109",
+ "name": "Reference skbmod action object in filter",
+ "category": [
+ "infra",
+ "skbmod"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action skbmod set dmac 11:22:33:44:55:66 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action skbmod index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "4abc",
+ "name": "Reference tunnel_key action object in filter",
+ "category": [
+ "infra",
+ "tunnel_key"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action tunnel_key set src_ip 10.10.10.1 dst_ip 20.20.20.2 id 1 index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action tunnel_key index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ },
+ {
+ "id": "dadd",
+ "name": "Reference vlan action object in filter",
+ "category": [
+ "infra",
+ "tunnel_key"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY ingress",
+ "$TC actions add action vlan pop pipe index 1"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent ffff: handle 0x1 prio 1 protocol ip matchall action vlan index 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC filter get dev $DUMMY parent ffff: handle 1 prio 1 protocol ip matchall",
+ "matchPattern": "^filter parent ffff: protocol ip pref 1 matchall.*handle 0x1.*",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
new file mode 100644
index 000000000000..8d10042b489b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/filter.json
@@ -0,0 +1,26 @@
+[
+ {
+ "id": "c2b4",
+ "name": "Soft lockup alarm will be not generated after delete the prio 0 filter of the chain",
+ "category": [
+ "filter",
+ "chain"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY root handle 1: htb default 1",
+ "$TC chain add dev $DUMMY",
+ "$TC filter del dev $DUMMY chain 0 parent 1: prio 0"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY chain 0 parent 1:",
+ "expExitCode": "2",
+ "verifyCmd": "$TC chain ls dev $DUMMY",
+ "matchPattern": "chain parent 1: chain 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY root handle 1: htb default 1"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
new file mode 100644
index 000000000000..c4c5f7ba0e0f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cake.json
@@ -0,0 +1,445 @@
+[
+ {
+ "id": "1212",
+ "name": "Create CAKE with default setting",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3281",
+ "name": "Create CAKE with bandwidth limit",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake bandwidth 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth 1Kbit diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c940",
+ "name": "Create CAKE with autorate-ingress flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake autorate-ingress",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited autorate-ingress diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2310",
+ "name": "Create CAKE with rtt time",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake rtt 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 200us raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2385",
+ "name": "Create CAKE with besteffort flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake besteffort",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited besteffort triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a032",
+ "name": "Create CAKE with diffserv8 flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake diffserv8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv8 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2349",
+ "name": "Create CAKE with diffserv4 flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake diffserv4",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv4 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8472",
+ "name": "Create CAKE with flowblind flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake flowblind",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2341",
+ "name": "Create CAKE with dsthost and nat flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dsthost nat",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dsthost nat nowash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5134",
+ "name": "Create CAKE with wash flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake hosts wash",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 hosts nonat wash no-ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2302",
+ "name": "Create CAKE with flowblind and no-split-gso flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake flowblind no-split-gso",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 flowblind nonat nowash no-ack-filter no-split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0768",
+ "name": "Create CAKE with dual-srchost and ack-filter flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dual-srchost ack-filter",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dual-srchost nonat nowash ack-filter split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0238",
+ "name": "Create CAKE with dual-dsthost and ack-filter-aggressive flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake dual-dsthost ack-filter-aggressive",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 dual-dsthost nonat nowash ack-filter-aggressive split-gso rtt 100ms raw overhead",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6572",
+ "name": "Create CAKE with memlimit and ptm flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake memlimit 10000 ptm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw ptm overhead 0 memlimit 10000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2436",
+ "name": "Create CAKE with fwmark and atm flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake fwmark 8 atm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms raw atm overhead 0 fwmark 0x8",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3984",
+ "name": "Create CAKE with overhead and mpu",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 256",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5421",
+ "name": "Create CAKE with conservative and ingress flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake conservative ingress",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6854",
+ "name": "Delete CAKE with conservative and ingress flag",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cake conservative ingress"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash ingress no-ack-filter split-gso rtt 100ms atm overhead 48",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "2342",
+ "name": "Replace CAKE with mpu",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root cake mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2313",
+ "name": "Change CAKE with mpu",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cake overhead 128 mpu 256"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root cake mpu 128",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cake 1: root refcnt [0-9]+ bandwidth unlimited diffserv3 triple-isolate nonat nowash no-ack-filter split-gso rtt 100ms noatm overhead 128 mpu 128",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4365",
+ "name": "Show CAKE class",
+ "category": [
+ "qdisc",
+ "cake"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cake",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class cake",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json
new file mode 100644
index 000000000000..33ea986176d9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/cbs.json
@@ -0,0 +1,214 @@
+[
+ {
+ "id": "1820",
+ "name": "Create CBS with default setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "1532",
+ "name": "Create CBS with hicredit setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs hicredit 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 64 locredit 0 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2078",
+ "name": "Create CBS with locredit setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs locredit 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 10 sendslope 0 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9271",
+ "name": "Create CBS with sendslope setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs sendslope 888",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 888 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0482",
+ "name": "Create CBS with idleslope setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 666 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "e8f3",
+ "name": "Create CBS with multiple setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs hicredit 10 locredit 75 sendslope 2 idleslope 666",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 10 locredit 75 sendslope 2 idleslope 666 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "23c9",
+ "name": "Replace CBS with sendslope setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root cbs sendslope 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 10 idleslope 0 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a07a",
+ "name": "Change CBS with idleslope setting",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root cbs idleslope 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 1 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "43b3",
+ "name": "Delete CBS with handle",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root cbs idleslope 666"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc cbs 1: root refcnt [0-9]+ hicredit 0 locredit 0 sendslope 0 idleslope 1 offload 0.*qdisc pfifo 0: parent 1: limit 1000p",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "9472",
+ "name": "Show CBS class",
+ "category": [
+ "qdisc",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root cbs",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class cbs 1:[0-9]+ parent 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json
new file mode 100644
index 000000000000..d46e5e2c9430
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/choke.json
@@ -0,0 +1,172 @@
+[
+ {
+ "id": "8937",
+ "name": "Create CHOKE with default setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "48c0",
+ "name": "Create CHOKE with min packet setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "38c1",
+ "name": "Create CHOKE with max packet setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 max 900",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min.*max 900p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "234a",
+ "name": "Create CHOKE with ecn setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4380",
+ "name": "Create CHOKE with burst setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 burst 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "48c7",
+ "name": "Delete CHOKE with valid handle",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 83p max 250p",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "4398",
+ "name": "Replace CHOKE with min setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0301",
+ "name": "Change CHOKE with limit setting",
+ "category": [
+ "qdisc",
+ "choke"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root choke limit 1000 bandwidth 10000 min 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc choke 1: root refcnt [0-9]+ limit 1000p min 100p max 250p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
new file mode 100644
index 000000000000..e9469ee71e6f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/codel.json
@@ -0,0 +1,193 @@
+[
+ {
+ "id": "983a",
+ "name": "Create CODEL with default setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "38aa",
+ "name": "Create CODEL with limit packet setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel limit 1500",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1500p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9178",
+ "name": "Create CODEL with target setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel target 100ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 100ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "78d1",
+ "name": "Create CODEL with interval setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel interval 20ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 20ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "238a",
+ "name": "Create CODEL with ecn setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "939c",
+ "name": "Create CODEL with ce_threshold setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root codel ce_threshold 20ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms ce_threshold 20ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8380",
+ "name": "Delete CODEL with valid handle",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root codel"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 1000p target 5ms interval 100ms",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "289c",
+ "name": "Replace CODEL with limit setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root codel"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root codel limit 5000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 5000p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0648",
+ "name": "Change CODEL with limit setting",
+ "category": [
+ "qdisc",
+ "codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root codel"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root codel limit 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc codel 1: root refcnt [0-9]+ limit 100p target 5ms interval 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
new file mode 100644
index 000000000000..7126ec3485cb
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/drr.json
@@ -0,0 +1,65 @@
+[
+ {
+ "id": "0385",
+ "name": "Create DRR with default setting",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root drr",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc drr 1: root refcnt [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2375",
+ "name": "Delete DRR with handle",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root drr"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc drr 1: root refcnt [0-9]+",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "3092",
+ "name": "Show DRR class",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root drr",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class drr 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json
new file mode 100644
index 000000000000..2c73ee47bf58
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/etf.json
@@ -0,0 +1,107 @@
+[
+ {
+ "id": "34ba",
+ "name": "Create ETF with default setting",
+ "category": [
+ "qdisc",
+ "etf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check off",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "438f",
+ "name": "Create ETF with delta nanos setting",
+ "category": [
+ "qdisc",
+ "etf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf delta 100 clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 100 offload off deadline_mode off skip_sock_check off",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9041",
+ "name": "Create ETF with deadline_mode setting",
+ "category": [
+ "qdisc",
+ "etf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI deadline_mode",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode on skip_sock_check off",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9a0c",
+ "name": "Create ETF with skip_sock_check setting",
+ "category": [
+ "qdisc",
+ "etf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI skip_sock_check",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check on",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2093",
+ "name": "Delete ETF with valid handle",
+ "category": [
+ "qdisc",
+ "etf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root etf clockid CLOCK_TAI"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc etf 1: root refcnt [0-9]+ clockid TAI delta 0 offload off deadline_mode off skip_sock_check off",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
index 180593010675..a5d94cdec605 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ets.json
@@ -6,8 +6,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2",
"expExitCode": "0",
@@ -15,8 +17,7 @@
"matchPattern": "qdisc ets 1: root .* bands 2",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -26,8 +27,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 900 800 700",
"expExitCode": "0",
@@ -35,8 +38,7 @@
"matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -46,8 +48,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
"expExitCode": "0",
@@ -55,8 +59,7 @@
"matchPattern": "qdisc ets 1: root .*bands 3 strict 3",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -66,8 +69,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 quanta 1000 900 800 700",
"expExitCode": "0",
@@ -75,8 +80,7 @@
"matchPattern": "qdisc ets 1: root .*bands 4 quanta 1000 900 800 700 priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -86,8 +90,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 3",
"expExitCode": "0",
@@ -95,8 +101,7 @@
"matchPattern": "qdisc ets 1: root .*bands 3 strict 3 priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -106,8 +111,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3 quanta 1500 750",
"expExitCode": "0",
@@ -115,8 +122,7 @@
"matchPattern": "qdisc ets 1: root .*bands 5 strict 3 quanta 1500 750 priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -126,8 +132,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 0 quanta 1500 750",
"expExitCode": "0",
@@ -135,8 +143,7 @@
"matchPattern": "qdisc ets 1: root .*bands 2 quanta 1500 750 priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -146,8 +153,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 strict 3 quanta 1500 750",
"expExitCode": "0",
@@ -155,8 +164,7 @@
"matchPattern": "qdisc ets 1: root .*bands 5 .*strict 3 quanta 1500 750 priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -166,8 +174,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000",
"expExitCode": "0",
@@ -175,8 +185,7 @@
"matchPattern": "qdisc ets 1: root .*bands 2 .*quanta 1000 [1-9][0-9]* priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -186,8 +195,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1",
"expExitCode": "0",
@@ -195,8 +206,7 @@
"matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta ([1-9][0-9]* ){2}priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -206,8 +216,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 3 strict 1 quanta 1000",
"expExitCode": "0",
@@ -215,8 +227,7 @@
"matchPattern": "qdisc ets 1: root .*bands 3 strict 1 quanta 1000 [1-9][0-9]* priomap",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -226,8 +237,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 16",
"expExitCode": "0",
@@ -235,8 +248,7 @@
"matchPattern": "qdisc ets 1: root .* bands 16",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -246,8 +258,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
"expExitCode": "1",
@@ -255,7 +269,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -265,8 +278,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 17",
"expExitCode": "1",
@@ -274,7 +289,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -284,8 +298,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
"expExitCode": "0",
@@ -293,8 +309,7 @@
"matchPattern": "qdisc ets 1: root .* bands 16",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -304,8 +319,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17",
"expExitCode": "2",
@@ -313,7 +330,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -323,8 +339,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 8 quanta 1 2 3 4 5 6 7 8",
"expExitCode": "0",
@@ -332,8 +350,7 @@
"matchPattern": "qdisc ets 1: root .* bands 16",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -343,8 +360,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 9 quanta 1 2 3 4 5 6 7 8",
"expExitCode": "2",
@@ -352,7 +371,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -362,8 +380,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"expExitCode": "0",
@@ -371,8 +391,7 @@
"matchPattern": "qdisc ets 1: root .*priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -382,8 +401,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"expExitCode": "0",
@@ -391,8 +412,7 @@
"matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 4000 5000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -402,8 +422,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"expExitCode": "0",
@@ -411,8 +433,7 @@
"matchPattern": "qdisc ets 1: root .*bands 5 strict 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -422,8 +443,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"expExitCode": "0",
@@ -431,8 +454,7 @@
"matchPattern": "qdisc ets 1: root .*strict 2 quanta 1000 2000 3000 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -442,8 +464,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
"expExitCode": "0",
@@ -451,8 +475,7 @@
"matchPattern": "class ets 1:1 root quantum 4000",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -462,8 +485,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
"expExitCode": "0",
@@ -471,8 +496,7 @@
"matchPattern": "class ets 1:2 root quantum 3000",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -482,8 +506,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 4000 3000 2000",
"expExitCode": "0",
@@ -491,8 +517,7 @@
"matchPattern": "class ets 1:3 root quantum 2000",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -502,8 +527,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 3",
"expExitCode": "0",
@@ -511,8 +538,7 @@
"matchPattern": "class ets 1:1 root $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -522,8 +548,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 quanta 1000 2000 3000",
"expExitCode": "1",
@@ -531,7 +559,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -541,8 +568,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 strict 3",
"expExitCode": "1",
@@ -550,7 +579,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -560,8 +588,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 2 quanta 1000 2000 3000",
"expExitCode": "1",
@@ -569,7 +599,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -579,8 +608,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 5 priomap 0 0 1 0 1 2 0 1 2 3 0 1 2 3 4 0 1 2",
"expExitCode": "1",
@@ -588,7 +619,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -598,8 +628,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 2 priomap 0 1 2",
"expExitCode": "1",
@@ -607,7 +639,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -617,8 +648,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 500 priomap 0 1 2",
"expExitCode": "1",
@@ -626,7 +659,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -636,8 +668,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 2 priomap 0 1 2",
"expExitCode": "1",
@@ -645,7 +679,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -655,8 +688,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets strict 1 quanta 1000 500 priomap 0 1 2 3",
"expExitCode": "1",
@@ -664,7 +699,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -674,8 +708,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3",
"expExitCode": "0",
@@ -683,7 +719,6 @@
"matchPattern": "qdisc ets",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -693,8 +728,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 strict 1 quanta 1000 500 priomap 0 1 2 3 4",
"expExitCode": "1",
@@ -702,7 +739,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -712,8 +748,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4 priomap 0 0 0 0",
"expExitCode": "0",
@@ -721,7 +759,6 @@
"matchPattern": "qdisc ets .*priomap 0 0 0 0 3 3 3 3 3 3 3 3 3 3 3 3",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -731,8 +768,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 4",
"expExitCode": "0",
@@ -740,7 +779,6 @@
"matchPattern": "qdisc ets .*priomap 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -750,8 +788,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 0",
"expExitCode": "1",
@@ -759,7 +799,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -769,8 +808,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets bands 17",
"expExitCode": "1",
@@ -778,7 +819,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -788,8 +828,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets",
"expExitCode": "1",
@@ -797,7 +839,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -807,8 +848,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 0 800 700",
"expExitCode": "1",
@@ -816,7 +859,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -826,8 +868,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta 0",
"expExitCode": "1",
@@ -835,7 +879,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -845,8 +888,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root ets quanta",
"expExitCode": "255",
@@ -854,7 +899,6 @@
"matchPattern": "qdisc ets",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -864,8 +908,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
],
"cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets quantum 1500",
@@ -874,7 +920,6 @@
"matchPattern": "qdisc ets 1: root .*quanta 1500 2000 3000 priomap ",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -884,8 +929,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root ets quanta 1000 2000 3000"
],
"cmdUnderTest": "$TC class change dev $DUMMY classid 1:1 ets",
@@ -894,7 +941,6 @@
"matchPattern": "qdisc ets 1: root .*quanta 1000 2000 3000 priomap ",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -904,8 +950,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
],
"cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets quantum 1500",
@@ -914,7 +962,6 @@
"matchPattern": "qdisc ets .*bands 5 .*strict 5",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -924,8 +971,10 @@
"qdisc",
"ets"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root ets strict 5"
],
"cmdUnderTest": "$TC class change dev $DUMMY classid 1:2 ets",
@@ -934,7 +983,6 @@
"matchPattern": "qdisc ets .*bands 5 .*strict 5",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
index 5ecd93b4c473..ae3d286a32b2 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fifo.json
@@ -2,13 +2,14 @@
{
"id": "a519",
"name": "Add bfifo qdisc with system default parameters on egress",
- "__comment": "When omitted, queue size in bfifo is calculated as: txqueuelen * (MTU + LinkLayerHdrSize), where LinkLayerHdrSize=14 for Ethernet",
"category": [
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo",
"expExitCode": "0",
@@ -16,20 +17,20 @@
"matchPattern": "qdisc bfifo 1: root.*limit [0-9]+b",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root bfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo"
]
},
{
"id": "585c",
"name": "Add pfifo qdisc with system default parameters on egress",
- "__comment": "When omitted, queue size in pfifo is defaulted to the interface's txqueuelen value.",
"category": [
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo",
"expExitCode": "0",
@@ -37,8 +38,7 @@
"matchPattern": "qdisc pfifo 1: root.*limit [0-9]+p",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root pfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo"
]
},
{
@@ -48,8 +48,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: bfifo",
"expExitCode": "0",
@@ -57,8 +59,7 @@
"matchPattern": "qdisc bfifo ffff: root.*limit [0-9]+b",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle ffff: root bfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle ffff: root bfifo"
]
},
{
@@ -68,8 +69,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit 3000b",
"expExitCode": "0",
@@ -77,8 +80,7 @@
"matchPattern": "qdisc bfifo 1: root.*limit 3000b",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root bfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo"
]
},
{
@@ -88,8 +90,11 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY txqueuelen 3000 type dummy || /bin/true"
+ "$IP link set dev $DUMMY txqueuelen 3000"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo limit 3000",
"expExitCode": "0",
@@ -97,8 +102,7 @@
"matchPattern": "qdisc pfifo 1: root.*limit 3000p",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root pfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo"
]
},
{
@@ -108,8 +112,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: bfifo",
"expExitCode": "255",
@@ -117,7 +123,6 @@
"matchPattern": "qdisc bfifo 10000: root.*limit [0-9]+b",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -127,8 +132,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo foorbar",
"expExitCode": "1",
@@ -136,7 +143,6 @@
"matchPattern": "qdisc bfifo 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -146,8 +152,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo foorbar",
"expExitCode": "1",
@@ -155,7 +163,6 @@
"matchPattern": "qdisc pfifo 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -165,9 +172,11 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link del dev $DUMMY type dummy || /bin/true",
- "$IP link add dev $DUMMY txqueuelen 1000 type dummy",
+ "$IP link set dev $DUMMY txqueuelen 1000",
"$TC qdisc add dev $DUMMY handle 1: root bfifo"
],
"cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root bfifo limit 3000b",
@@ -176,8 +185,7 @@
"matchPattern": "qdisc bfifo 1: root.*limit 3000b",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root bfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo"
]
},
{
@@ -187,9 +195,11 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link del dev $DUMMY type dummy || /bin/true",
- "$IP link add dev $DUMMY txqueuelen 1000 type dummy",
+ "$IP link set dev $DUMMY txqueuelen 1000",
"$TC qdisc add dev $DUMMY handle 1: root pfifo"
],
"cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root pfifo limit 30",
@@ -198,8 +208,7 @@
"matchPattern": "qdisc pfifo 1: root.*limit 30p",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root pfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root pfifo"
]
},
{
@@ -209,8 +218,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo limit foo-bar",
"expExitCode": "1",
@@ -218,7 +229,6 @@
"matchPattern": "qdisc bfifo 1: root.*limit foo-bar",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -228,8 +238,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root bfifo"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root bfifo",
@@ -238,8 +250,7 @@
"matchPattern": "qdisc bfifo 1: root",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root bfifo",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root bfifo"
]
},
{
@@ -249,8 +260,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: bfifo",
"expExitCode": "2",
@@ -258,7 +271,6 @@
"matchPattern": "qdisc bfifo 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -268,8 +280,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ bfifo limit 100b",
"expExitCode": "255",
@@ -277,7 +291,6 @@
"matchPattern": "qdisc bfifo 123 root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -287,8 +300,10 @@
"qdisc",
"fifo"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: bfifo",
"$TC qdisc del dev $DUMMY root handle 1: bfifo"
],
@@ -298,7 +313,6 @@
"matchPattern": "qdisc bfifo 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
new file mode 100644
index 000000000000..3a537b2ec4c9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json
@@ -0,0 +1,381 @@
+[
+ {
+ "id": "983b",
+ "name": "Create FQ with default setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "38a1",
+ "name": "Create FQ with limit packet setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq limit 3000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 3000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0a18",
+ "name": "Create FQ with flow_limit setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq flow_limit 300",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 300p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2390",
+ "name": "Create FQ with quantum setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*quantum 9000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "845b",
+ "name": "Create FQ with initial_quantum setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq initial_quantum 900000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*initial_quantum 900000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "10f7",
+ "name": "Create FQ with invalid initial_quantum setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq initial_quantum 0x80000000",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root.*initial_quantum 2048Mb",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "9398",
+ "name": "Create FQ with maxrate setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq maxrate 100000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*maxrate 100Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "342c",
+ "name": "Create FQ with nopacing setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq nopacing",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*nopacing",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6391",
+ "name": "Create FQ with refill_delay setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq refill_delay 100ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*refill_delay 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "238b",
+ "name": "Create FQ with low_rate_threshold setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq low_rate_threshold 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*low_rate_threshold 10Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7582",
+ "name": "Create FQ with orphan_mask setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq orphan_mask 255",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*orphan_mask 255",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4894",
+ "name": "Create FQ with timer_slack setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq timer_slack 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*timer_slack 100ns",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "324c",
+ "name": "Create FQ with ce_threshold setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq ce_threshold 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "424a",
+ "name": "Create FQ with horizon time setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq horizon 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*horizon 100us",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "89e1",
+ "name": "Create FQ with horizon_cap setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq horizon_cap",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*horizon_cap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "32e1",
+ "name": "Delete FQ with valid handle",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "49b0",
+ "name": "Replace FQ with limit setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root fq limit 5000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 5000p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9478",
+ "name": "Change FQ with limit setting",
+ "category": [
+ "qdisc",
+ "fq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root fq limit 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 100p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
new file mode 100644
index 000000000000..9774b1e8801b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_codel.json
@@ -0,0 +1,298 @@
+[
+ {
+ "id": "4957",
+ "name": "Create FQ_CODEL with default setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7621",
+ "name": "Create FQ_CODEL with limit setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6871",
+ "name": "Create FQ_CODEL with memory_limit setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel memory_limit 100000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 100000b ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5636",
+ "name": "Create FQ_CODEL with target setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel target 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 2ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "630a",
+ "name": "Create FQ_CODEL with interval setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel interval 5000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 5ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4324",
+ "name": "Create FQ_CODEL with quantum setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel quantum 9000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum 9000 target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b190",
+ "name": "Create FQ_CODEL with noecn flag",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel noecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5381",
+ "name": "Create FQ_CODEL with ce_threshold setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel ce_threshold 1024000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms ce_threshold 1.02s interval 100ms memory_limit 32Mb ecn drop_batch 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c9d2",
+ "name": "Create FQ_CODEL with drop_batch setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel drop_batch 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 10240p flows 1024 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "523b",
+ "name": "Create FQ_CODEL with multiple setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9283",
+ "name": "Replace FQ_CODEL with noecn setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root fq_codel noecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb drop_batch 100",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3459",
+ "name": "Change FQ_CODEL with limit setting",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root fq_codel limit 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 2000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb ecn drop_batch 100",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0128",
+ "name": "Delete FQ_CODEL with handle",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root fq_codel limit 1000 flows 256 drop_batch 100"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc fq_codel 1: root refcnt [0-9]+ limit 1000p flows 256 quantum.*target 5ms interval 100ms memory_limit 32Mb noecn drop_batch 100",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "0435",
+ "name": "Show FQ_CODEL class",
+ "category": [
+ "qdisc",
+ "fq_codel"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_codel",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class fq_codel 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
index 1cda2e11b3ad..d012d88d67fe 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq_pie.json
@@ -6,16 +6,17 @@
"qdisc",
"fq_pie"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
- "cmdUnderTest": "$TC qdisc add dev $DUMMY root fq_pie flows 65536",
- "expExitCode": "2",
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq_pie flows 65536",
+ "expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
- "matchPattern": "qdisc",
- "matchCount": "0",
+ "matchPattern": "qdisc fq_pie 1: root refcnt 2 limit 10240p flows 65536",
+ "matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json
new file mode 100644
index 000000000000..df07fe318de9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/gred.json
@@ -0,0 +1,150 @@
+[
+ {
+ "id": "8942",
+ "name": "Create GRED with default setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5783",
+ "name": "Create GRED with grio setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1 grio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1.*grio",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8a09",
+ "name": "Create GRED with limit setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1 limit 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1 limit 1000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "48ca",
+ "name": "Create GRED with ecn setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 2 ecn",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 2.*ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "48cb",
+ "name": "Create GRED with harddrop setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 2 harddrop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 2.*harddrop",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "763a",
+ "name": "Change GRED setting",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root gred limit 60KB min 15K max 25K burst 64 avpkt 1500 bandwidth 10Mbit DP 1 probability 0.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc gred 1: root refcnt [0-9]+ vqs 10 default 1 limit.*vq 1 prio [0-9]+ limit 60Kb min 15Kb max 25Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8309",
+ "name": "Show GRED class",
+ "category": [
+ "qdisc",
+ "gred"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root gred setup vqs 10 default 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class gred 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
new file mode 100644
index 000000000000..c98c339424d4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hfsc.json
@@ -0,0 +1,173 @@
+[
+ {
+ "id": "3254",
+ "name": "Create HFSC with default setting",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0289",
+ "name": "Create HFSC with class sc and ul rate setting",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc sc rate 20000 ul rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 20Kbit ul m1 0bit d 0us m2 10Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "846a",
+ "name": "Create HFSC with class sc umax and dmax setting",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc sc umax 1540 dmax 5ms rate 10000 ul rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: sc m1 2464Kbit d 5ms m2 10Kbit ul m1 0bit d 0us m2 10Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5413",
+ "name": "Create HFSC with class rt and ls rate setting",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 20000 ls rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: rt m1 0bit d 0us m2 20Kbit ls m1 0bit d 0us m2 10Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9312",
+ "name": "Create HFSC with class rt umax and dmax setting",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt umax 1540 dmax 5ms rate 10000 ls rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: rt m1 2464Kbit d 5ms m2 10Kbit ls m1 0bit d 0us m2 10Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6931",
+ "name": "Delete HFSC with handle",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 11"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 1: root refcnt [0-9]+",
+ "matchCount": "0",
+ "teardown": []
+ },
+ {
+ "id": "8436",
+ "name": "Show HFSC class",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hfsc",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1: root",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "bef4",
+ "name": "HFSC rt inner class upgrade to sc",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hfsc default 1",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 hfsc rt rate 8"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1:1 classid 1:2 hfsc rt rate 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hfsc 1:1 parent 1: sc m1 0bit d 0us m2 8bit.*rt m1 0bit d 0us m2 8bit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
new file mode 100644
index 000000000000..dbef5474b26b
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/hhf.json
@@ -0,0 +1,192 @@
+[
+ {
+ "id": "4812",
+ "name": "Create HHF with default setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8a92",
+ "name": "Create HHF with limit setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf limit 1500",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1500p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3491",
+ "name": "Create HHF with quantum setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf quantum 9000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*quantum 9000b hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "ba04",
+ "name": "Create HHF with reset_timeout setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf reset_timeout 100ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 100ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4238",
+ "name": "Create HHF with admit_bytes setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf admit_bytes 100000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 100000b evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "839f",
+ "name": "Create HHF with evict_timeout setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf evict_timeout 0.5s",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 500ms non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a044",
+ "name": "Create HHF with non_hh_weight setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf non_hh_weight 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 10",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "32f9",
+ "name": "Change HHF with limit setting",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root hhf"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root hhf limit 1500",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hhf 1: root refcnt [0-9]+ limit 1500p.*hh_limit 2048 reset_timeout 40ms admit_bytes 128Kb evict_timeout 1s non_hh_weight 2",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "385e",
+ "name": "Show HHF class",
+ "category": [
+ "qdisc",
+ "hhf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root hhf",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class hhf 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json
new file mode 100644
index 000000000000..cab745f9a83c
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/htb.json
@@ -0,0 +1,261 @@
+[
+ {
+ "id": "0904",
+ "name": "Create HTB with default setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0 direct_packets_stat.*direct_qlen",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3906",
+ "name": "Create HTB with default-N setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb default 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0x10 direct_packets_stat.* direct_qlen",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8492",
+ "name": "Create HTB with r2q setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb r2q 5",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 5 default 0 direct_packets_stat.*direct_qlen",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9502",
+ "name": "Create HTB with direct_qlen setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root htb direct_qlen 1024",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc htb 1: root refcnt [0-9]+ r2q 10 default 0 direct_packets_stat.*direct_qlen 1024",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b924",
+ "name": "Create HTB with class rate and burst setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20kbit burst 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1000b cburst 1600b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4359",
+ "name": "Create HTB with class mpu setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit mpu 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9048",
+ "name": "Create HTB with class prio setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit prio 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 1 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4994",
+ "name": "Create HTB with class ceil setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit ceil 10Kbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 10Kbit burst 1600b cburst 1600b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9523",
+ "name": "Create HTB with class cburst setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit cburst 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 2000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5353",
+ "name": "Create HTB with class mtu setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit mtu 2048",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 2Kb cburst 2Kb",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "346a",
+ "name": "Create HTB with class quantum setting",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 htb rate 20Kbit quantum 2048",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class htb 1:1 root prio 0 rate 20Kbit ceil 20Kbit burst 1600b cburst 1600b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "303a",
+ "name": "Delete HTB with handle",
+ "category": [
+ "qdisc",
+ "htb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root htb r2q 5"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc htb 1: root refcnt [0-9]+",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
index d99dba6e2b1a..57bddc1212d8 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/ingress.json
@@ -7,16 +7,17 @@
"ingress"
],
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
"expExitCode": "0",
"verifyCmd": "$TC qdisc show dev $DUMMY",
"matchPattern": "qdisc ingress ffff:",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -26,8 +27,10 @@
"qdisc",
"ingress"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY ingress foorbar",
"expExitCode": "1",
@@ -35,7 +38,6 @@
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -45,8 +47,10 @@
"qdisc",
"ingress"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
@@ -55,8 +59,7 @@
"matchPattern": "qdisc ingress ffff:",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY ingress",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY ingress"
]
},
{
@@ -66,8 +69,10 @@
"qdisc",
"ingress"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc del dev $DUMMY ingress",
"expExitCode": "2",
@@ -75,7 +80,6 @@
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -85,8 +89,10 @@
"qdisc",
"ingress"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY ingress",
"$TC qdisc del dev $DUMMY ingress"
],
@@ -96,7 +102,27 @@
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "0521",
+ "name": "Show ingress class",
+ "category": [
+ "qdisc",
+ "ingress"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY ingress",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class ingress",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY ingress"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
new file mode 100644
index 000000000000..e3d2de5c184f
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mq.json
@@ -0,0 +1,182 @@
+[
+ {
+ "id": "ce7d",
+ "name": "Add mq Qdisc to multi-queue device (4 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2f82",
+ "name": "Add mq Qdisc to multi-queue device (256 queues)",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 256\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-9,a-f][0-9,a-f]{0,2}",
+ "matchCount": "256",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "c525",
+ "name": "Add duplicate mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "128a",
+ "name": "Delete nonexistent mq Qdisc",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "03a9",
+ "name": "Delete mq Qdisc twice",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mq",
+ "$TC qdisc del dev $ETH root handle 1: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "be0f",
+ "name": "Add mq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent 1:[1-4]",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1023",
+ "name": "Show mq class",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class mq 1:[0-9]+ root",
+ "matchCount": "4",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "0531",
+ "name": "Replace mq with invalid parent ID",
+ "category": [
+ "qdisc",
+ "mq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 16\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle ffff: mq"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH parent ffff:fff1 handle ffff: mq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc [a-zA-Z0-9_]+ 0: parent ffff",
+ "matchCount": "16",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json
new file mode 100644
index 000000000000..6e1973f731e9
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/mqprio.json
@@ -0,0 +1,114 @@
+[
+ {
+ "id": "9903",
+ "name": "Add mqprio Qdisc to multi-queue device (8 queues)",
+ "category": [
+ "qdisc",
+ "mqprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc mqprio 1: root tc 8 map 0 1 2 3 4 5 6 7 0 0 0 0 0 0 0 0.*queues:\\(0:0\\) \\(1:1\\) \\(2:2\\) \\(3:3\\) \\(4:4\\) \\(5:5\\) \\(6:6\\) \\(7:7\\)",
+ "matchCount": "1",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "453a",
+ "name": "Delete nonexistent mqprio Qdisc",
+ "category": [
+ "qdisc",
+ "mqprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: mqprio",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc mqprio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "5292",
+ "name": "Delete mqprio Qdisc twice",
+ "category": [
+ "qdisc",
+ "mqprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+ "$TC qdisc del dev $ETH root handle 1:"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc mqprio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "45a9",
+ "name": "Add mqprio Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "mqprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc mqprio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2ba9",
+ "name": "Show mqprio class",
+ "category": [
+ "qdisc",
+ "mqprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: mqprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 hw 0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class mqprio 1:",
+ "matchCount": "16",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json
new file mode 100644
index 000000000000..12c0af7a145d
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/multiq.json
@@ -0,0 +1,114 @@
+[
+ {
+ "id": "20ba",
+ "name": "Add multiq Qdisc to multi-queue device (8 queues)",
+ "category": [
+ "qdisc",
+ "multiq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc multiq 1: root refcnt [0-9]+ bands 8",
+ "matchCount": "1",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "4301",
+ "name": "List multiq Class",
+ "category": [
+ "qdisc",
+ "multiq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class multiq 1:[0-9]+ parent 1:",
+ "matchCount": "8",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "7832",
+ "name": "Delete nonexistent multiq Qdisc",
+ "category": [
+ "qdisc",
+ "multiq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 4\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1: multiq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc multiq 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "2891",
+ "name": "Delete multiq Qdisc twice",
+ "category": [
+ "qdisc",
+ "multiq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: multiq",
+ "$TC qdisc del dev $ETH root handle 1:"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc mqprio 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "1329",
+ "name": "Add multiq Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "multiq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: multiq",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc multiq 1: root",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
new file mode 100644
index 000000000000..3c4444961488
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/netem.json
@@ -0,0 +1,340 @@
+[
+ {
+ "id": "cb28",
+ "name": "Create NETEM with default setting",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ limit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a089",
+ "name": "Create NETEM with limit flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem limit 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ limit 200",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3449",
+ "name": "Create NETEM with delay time",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3782",
+ "name": "Create NETEM with distribution and corrupt flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal corrupt 1%",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms 10ms corrupt 1%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2b82",
+ "name": "Create NETEM with distribution and duplicate flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal duplicate 1%",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms 10ms duplicate 1%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a932",
+ "name": "Create NETEM with distribution and loss flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution pareto loss 1%",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms 10ms loss 1%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "e01a",
+ "name": "Create NETEM with distribution and loss state flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution paretonormal loss state 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms 10ms loss state p13 1% p31 99% p32 0% p23 100% p14 0%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "ba29",
+ "name": "Create NETEM with loss gemodel flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem loss gemodel 1%",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*loss gemodel p 1%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0492",
+ "name": "Create NETEM with reorder flag",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms reorder 2% gap 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*reorder 2%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7862",
+ "name": "Create NETEM with rate limit",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem rate 20000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*rate 20Kbit",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7235",
+ "name": "Create NETEM with multiple slot rate",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem slot 10 200 packets 2000 bytes 9000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*slot 10ns 200ns packets 2000 bytes 9000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5439",
+ "name": "Create NETEM with multiple slot setting",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem slot distribution pareto 1ms 0.1ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*slot distribution 1ms 100us",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5029",
+ "name": "Change NETEM with loss state",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 1%"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 2%",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*loss 2%",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3785",
+ "name": "Replace NETEM with delay time",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal loss 1%"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root netem delay 200ms 10ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 200ms 10ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4502",
+ "name": "Delete NETEM with handle",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root netem delay 100ms 10ms distribution normal"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc netem 1: root refcnt [0-9]+ .*delay 100ms 10ms",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "0785",
+ "name": "Show NETEM class",
+ "category": [
+ "qdisc",
+ "netem"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root netem",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class netem 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json
new file mode 100644
index 000000000000..30da27fe8806
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/pfifo_fast.json
@@ -0,0 +1,109 @@
+[
+ {
+ "id": "900c",
+ "name": "Create pfifo_fast with default setting",
+ "category": [
+ "qdisc",
+ "pfifo_fast"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7470",
+ "name": "Dump pfifo_fast stats",
+ "category": [
+ "qdisc",
+ "pfifo_fast"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "Sent.*bytes.*pkt \\(dropped.*overlimits.*requeues .*\\)",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b974",
+ "name": "Replace pfifo_fast with different handle",
+ "category": [
+ "qdisc",
+ "pfifo_fast"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 2: root pfifo_fast",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo_fast 2: root refcnt [0-9]+ bands 3 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 2: root"
+ ]
+ },
+ {
+ "id": "3240",
+ "name": "Delete pfifo_fast with valid handle",
+ "category": [
+ "qdisc",
+ "pfifo_fast"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "4385",
+ "name": "Delete pfifo_fast with invalid handle",
+ "category": [
+ "qdisc",
+ "pfifo_fast"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root pfifo_fast"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: root",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc pfifo_fast 1: root refcnt [0-9]+ bands 3 priomap",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json
new file mode 100644
index 000000000000..6ec7e0a01265
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/plug.json
@@ -0,0 +1,172 @@
+[
+ {
+ "id": "3289",
+ "name": "Create PLUG with default setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0917",
+ "name": "Create PLUG with block setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug block",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "483b",
+ "name": "Create PLUG with release setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug release",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4995",
+ "name": "Create PLUG with release_indefinite setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug release_indefinite",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "389c",
+ "name": "Create PLUG with limit setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root plug limit 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "384a",
+ "name": "Delete PLUG with valid handle",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root plug"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "439a",
+ "name": "Replace PLUG with limit setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root plug"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root plug limit 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9831",
+ "name": "Change PLUG with limit setting",
+ "category": [
+ "qdisc",
+ "plug"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root plug"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root plug limit 1000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc plug 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
index 3076c02d08d6..69abf041c799 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/prio.json
@@ -6,8 +6,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
"expExitCode": "0",
@@ -15,8 +17,7 @@
"matchPattern": "qdisc prio 1: root",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio"
]
},
{
@@ -26,8 +27,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle ffff: prio",
"expExitCode": "0",
@@ -35,7 +38,6 @@
"matchPattern": "qdisc prio ffff: root",
"matchCount": "1",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -45,8 +47,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 10000: prio",
"expExitCode": "255",
@@ -54,7 +58,6 @@
"matchPattern": "qdisc prio 10000: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -64,8 +67,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio foorbar",
"expExitCode": "1",
@@ -73,7 +78,6 @@
"matchPattern": "qdisc prio 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -83,8 +87,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
"expExitCode": "0",
@@ -92,8 +98,7 @@
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio"
]
},
{
@@ -103,8 +108,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
"expExitCode": "1",
@@ -112,7 +119,6 @@
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 3 3 0 0 1 2 3 0 0 0 0 0 1 1",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -122,8 +128,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 4 priomap 1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
"expExitCode": "1",
@@ -131,7 +139,6 @@
"matchPattern": "qdisc prio 1: root.*bands 4 priomap.*1 1 2 2 7 5 0 0 1 2 3 0 0 0 0 0",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -141,8 +148,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1 priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
"expExitCode": "2",
@@ -150,7 +159,6 @@
"matchPattern": "qdisc prio 1: root.*bands 1 priomap.*0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -160,8 +168,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio bands 1024 priomap 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
"expExitCode": "2",
@@ -169,7 +179,6 @@
"matchPattern": "qdisc prio 1: root.*bands 1024 priomap.*1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -179,8 +188,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root prio"
],
"cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root prio bands 8 priomap 1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
@@ -189,8 +200,7 @@
"matchPattern": "qdisc prio 1: root.*bands 8 priomap.*1 1 2 2 3 3 4 4 5 5 6 6 7 7 0 0",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio"
]
},
{
@@ -200,8 +210,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY handle 1: root prio"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
@@ -210,8 +222,7 @@
"matchPattern": "qdisc prio 1: root",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root prio",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root prio"
]
},
{
@@ -221,8 +232,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc del dev $DUMMY root handle 1: prio",
"expExitCode": "2",
@@ -230,7 +243,6 @@
"matchPattern": "qdisc prio 1: root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -240,8 +252,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY root handle 123^ prio",
"expExitCode": "255",
@@ -249,7 +263,6 @@
"matchPattern": "qdisc prio 123 root",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -259,8 +272,10 @@
"qdisc",
"prio"
],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true",
"$TC qdisc add dev $DUMMY root handle 1: prio",
"$TC qdisc del dev $DUMMY root handle 1: prio"
],
@@ -270,7 +285,27 @@
"matchPattern": "qdisc ingress ffff:",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
+ ]
+ },
+ {
+ "id": "2410",
+ "name": "Show prio class",
+ "category": [
+ "qdisc",
+ "prio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root prio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class prio 1:[0-9]+ parent 1:",
+ "matchCount": "3",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root prio"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
new file mode 100644
index 000000000000..c95643929841
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/qfq.json
@@ -0,0 +1,280 @@
+[
+ {
+ "id": "0582",
+ "name": "Create QFQ with default setting",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc qfq 1: root refcnt [0-9]+",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c9a3",
+ "name": "Create QFQ with class weight setting",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:1 root weight 100 maxpkt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "d364",
+ "name": "Test QFQ with max class weight setting",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 9999",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:1 root weight 9999 maxpkt",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8452",
+ "name": "Create QFQ with class maxpkt setting",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 2000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:1 root weight 1 maxpkt 2000",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "22df",
+ "name": "Test QFQ class maxpkt setting lower bound",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 128",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:1 root weight 1 maxpkt 128",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "92ee",
+ "name": "Test QFQ class maxpkt setting upper bound",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq maxpkt 99999",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:1 root weight 1 maxpkt 99999",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "d920",
+ "name": "Create QFQ with multiple class setting",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:2 qfq weight 200",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:[0-9]+ root weight [0-9]+00 maxpkt",
+ "matchCount": "2",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0548",
+ "name": "Delete QFQ with handle",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "qdisc qfq 1: root refcnt [0-9]+",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "5901",
+ "name": "Show QFQ class",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root qfq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "85ee",
+ "name": "QFQ with big MTU",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY mtu 2147483647 || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "ddfa",
+ "name": "QFQ with small MTU",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY mtu 256 || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: root qfq"
+ ],
+ "cmdUnderTest": "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "expExitCode": "2",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class qfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ ]
+ },
+ {
+ "id": "5993",
+ "name": "QFQ with stab overhead greater than max packet len",
+ "category": [
+ "qdisc",
+ "qfq",
+ "scapy"
+ ],
+ "plugins": {
+ "requires": [
+ "nsPlugin",
+ "scapyPlugin"
+ ]
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || /bin/true",
+ "$TC qdisc add dev $DUMMY handle 1: stab mtu 2048 tsize 512 mpu 0 overhead 999999999 linklayer ethernet root qfq",
+ "$TC class add dev $DUMMY parent 1: classid 1:1 qfq weight 100",
+ "$TC qdisc add dev $DEV1 clsact",
+ "$TC filter add dev $DEV1 ingress protocol ip flower dst_ip 1.3.3.7/32 action mirred egress mirror dev $DUMMY"
+ ],
+ "cmdUnderTest": "$TC filter add dev $DUMMY parent 1: matchall classid 1:1",
+ "scapy": [
+ {
+ "iface": "$DEV0",
+ "count": 22,
+ "packet": "Ether(type=0x800)/IP(src='10.0.0.10',dst='1.3.3.7')/TCP(sport=5000,dport=10)"
+ }
+ ],
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc ls dev $DUMMY",
+ "matchPattern": "dropped 22",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root qfq"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
index 0703a2a255eb..eec73fda6c80 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/red.json
@@ -10,7 +10,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -18,8 +17,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -33,7 +31,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red adaptive limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -41,8 +38,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb adaptive $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -56,7 +52,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -64,8 +59,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -79,7 +73,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn adaptive limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -87,8 +80,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn adaptive $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -102,7 +94,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -110,8 +101,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -125,7 +115,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn nodrop limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -133,8 +122,7 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn nodrop $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
},
{
@@ -148,7 +136,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red nodrop limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "2",
@@ -156,7 +143,6 @@
"matchPattern": "qdisc red",
"matchCount": "0",
"teardown": [
- "$IP link del dev $DUMMY type dummy"
]
},
{
@@ -170,7 +156,6 @@
"requires": "nsPlugin"
},
"setup": [
- "$IP link add dev $DUMMY type dummy || /bin/true"
],
"cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red ecn harddrop nodrop limit 1M avpkt 1500 min 100K max 300K",
"expExitCode": "0",
@@ -178,8 +163,28 @@
"matchPattern": "qdisc red 1: root .* limit 1Mb min 100Kb max 300Kb ecn harddrop nodrop $",
"matchCount": "1",
"teardown": [
- "$TC qdisc del dev $DUMMY handle 1: root",
- "$IP link del dev $DUMMY type dummy"
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "290a",
+ "name": "Show RED class",
+ "category": [
+ "qdisc",
+ "red"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root red limit 1M avpkt 1500 min 100K max 300K",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class red 1:[0-9]+ parent 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
]
}
]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
new file mode 100644
index 000000000000..aa7914c441ea
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfb.json
@@ -0,0 +1,255 @@
+[
+ {
+ "id": "3294",
+ "name": "Create SFB with default setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 60s",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "430a",
+ "name": "Create SFB with rehash setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb rehash 60",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 60ms db 60s",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3410",
+ "name": "Create SFB with db setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb db 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "49a0",
+ "name": "Create SFB with limit setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb limit 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt [0-9]+ rehash 600s db 60s limit 100p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "1241",
+ "name": "Create SFB with max setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb max 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*max 100p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3249",
+ "name": "Create SFB with target setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb target 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*target 100p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "30a9",
+ "name": "Create SFB with increment setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb increment 0.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*increment 0.1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "239a",
+ "name": "Create SFB with decrement setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb decrement 0.1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*decrement 0.1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "9301",
+ "name": "Create SFB with penalty_rate setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_rate 4000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*penalty_rate 4000pps",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2a01",
+ "name": "Create SFB with penalty_burst setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_burst 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 600s db 60s.*penalty_burst 64p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3209",
+ "name": "Change SFB with rehash setting",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root sfb penalty_burst 64"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root sfb rehash 100",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfb 1: root refcnt 2 rehash 100ms db 60s",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "5447",
+ "name": "Show SFB class",
+ "category": [
+ "qdisc",
+ "sfb"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfb",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class sfb 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
new file mode 100644
index 000000000000..16d51936b385
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -0,0 +1,212 @@
+[
+ {
+ "id": "7482",
+ "name": "Create SFQ with default setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum.*depth 127 divisor 1024",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c186",
+ "name": "Create SFQ with limit setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq limit 8",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 8p",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "ae23",
+ "name": "Create SFQ with perturb setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq perturb 10",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "depth 127 divisor 1024 perturb 10sec",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "a430",
+ "name": "Create SFQ with quantum setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq quantum 9000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 9000b depth 127 divisor 1024",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "4539",
+ "name": "Create SFQ with divisor setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq divisor 512",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 divisor 512",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "b089",
+ "name": "Create SFQ with flows setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq flows 20",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "99a0",
+ "name": "Create SFQ with depth setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq depth 64",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 64 divisor 1024",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "7389",
+ "name": "Create SFQ with headdrop setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq headdrop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 headdrop divisor 1024",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6472",
+ "name": "Create SFQ with redflowlimit setting",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq redflowlimit 100000 min 8000 max 60000 probability 0.20 ecn headdrop",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc sfq 1: root refcnt [0-9]+ limit 127p quantum 1514b depth 127 headdrop divisor 1024 ewma 6 min 8000b max 60000b probability 0.2 ecn",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "8929",
+ "name": "Show SFQ class",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root sfq",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class sfq 1:",
+ "matchCount": "0",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json
new file mode 100644
index 000000000000..076d1d69a3a4
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/skbprio.json
@@ -0,0 +1,87 @@
+[
+ {
+ "id": "283e",
+ "name": "Create skbprio with default setting",
+ "category": [
+ "qdisc",
+ "skbprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 64",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c086",
+ "name": "Create skbprio with limit setting",
+ "category": [
+ "qdisc",
+ "skbprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio limit 1",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "6733",
+ "name": "Change skbprio with limit setting",
+ "category": [
+ "qdisc",
+ "skbprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root skbprio"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root skbprio limit 32",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc skbprio 1: root refcnt [0-9]+ limit 32",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2958",
+ "name": "Show skbprio class",
+ "category": [
+ "qdisc",
+ "skbprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root skbprio",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class skbprio 1:",
+ "matchCount": "64",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
new file mode 100644
index 000000000000..12da0a939e3e
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json
@@ -0,0 +1,262 @@
+[
+ {
+ "id": "ba39",
+ "name": "Add taprio Qdisc to multi-queue device (8 queues)",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt [0-9]+ tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2",
+ "matchCount": "1",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "9462",
+ "name": "Add taprio Qdisc with multiple sched-entry",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 sched-entry S 03 300000 sched-entry S 04 400000 flags 0x1 clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "index [0-9]+ cmd S gatemask 0x[0-9]+ interval [0-9]+00000",
+ "matchCount": "3",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "8d92",
+ "name": "Add taprio Qdisc with txtime-delay",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 txtime-delay 500000 clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "clockid TAI flags 0x1 txtime delay 500000",
+ "matchCount": "1",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "d092",
+ "name": "Delete taprio Qdisc with valid handle",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $ETH root handle 1:",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "8471",
+ "name": "Show taprio class",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $ETH",
+ "matchPattern": "class taprio 1:[0-9]+ root",
+ "matchCount": "8",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "0a85",
+ "name": "Add taprio Qdisc to single-queue device",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1\" > /sys/bus/netdevsim/new_device"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "3e1e",
+ "name": "Add taprio Qdisc with an invalid cycle-time",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc add dev $ETH root handle 1: taprio num_tc 3 map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@0 1@0 base-time 1000000000 sched-entry S 01 300000 flags 0x1 clockid CLOCK_TAI cycle-time 4294967296 || /bin/true",
+ "$IP link set dev $ETH up",
+ "$IP addr add 10.10.10.10/24 dev $ETH"
+ ],
+ "cmdUnderTest": "/bin/true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $ETH",
+ "matchPattern": "qdisc taprio 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "39b4",
+ "name": "Reject grafting taprio as child qdisc of software taprio",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "dependsOn": "echo '' | jq",
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI",
+ "./scripts/taprio_wait_for_admin.sh $TC $ETH"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 clockid CLOCK_TAI",
+ "expExitCode": "2",
+ "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+ "matchPattern": "0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $ETH root",
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "e8a1",
+ "name": "Reject grafting taprio as child qdisc of offloaded taprio",
+ "category": [
+ "qdisc",
+ "taprio"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "dependsOn": "echo '' | jq",
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2",
+ "./scripts/taprio_wait_for_admin.sh $TC $ETH"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH parent 8001:7 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 200 sched-entry S ff 20000000 flags 0x2",
+ "expExitCode": "2",
+ "verifyCmd": "bash -c \"./scripts/taprio_wait_for_admin.sh $TC $ETH && $TC -j qdisc show dev $ETH root | jq '.[].options.base_time'\"",
+ "matchPattern": "0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $ETH root",
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "a7bf",
+ "name": "Graft cbs as child of software taprio",
+ "category": [
+ "qdisc",
+ "taprio",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -d qdisc show dev $ETH",
+ "matchPattern": "qdisc cbs 8002: parent 8001:8 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $ETH root",
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ },
+ {
+ "id": "6a83",
+ "name": "Graft cbs as child of offloaded taprio",
+ "category": [
+ "qdisc",
+ "taprio",
+ "cbs"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device",
+ "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $ETH handle 8002: parent 8001:8 cbs idleslope 20000 sendslope -980000 hicredit 30 locredit -1470",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -d qdisc show dev $ETH",
+ "matchPattern": "qdisc cbs 8002: parent 8001:8 refcnt 2 hicredit 30 locredit -1470 sendslope -980000 idleslope 20000 offload 0",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $ETH root",
+ "echo \"1\" > /sys/bus/netdevsim/del_device"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json
new file mode 100644
index 000000000000..547a44910041
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/tbf.json
@@ -0,0 +1,193 @@
+[
+ {
+ "id": "6430",
+ "name": "Create TBF with default setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 10Kbit burst 1500b limit 1000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "0518",
+ "name": "Create TBF with mtu setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 mtu 2048",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b limit 1000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "320a",
+ "name": "Create TBF with peakrate setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 mtu 1510 peakrate 30000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b peakrate 30Kbit minburst.*limit 1000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "239b",
+ "name": "Create TBF with latency setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 100ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b lat 100ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "c975",
+ "name": "Create TBF with overhead setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 overhead 300",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1800b limit 1000b overhead 300",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "948c",
+ "name": "Create TBF with linklayer setting",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer atm",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1696b limit 1000b linklayer atm",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "3549",
+ "name": "Replace TBF with mtu",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer atm"
+ ],
+ "cmdUnderTest": "$TC qdisc replace dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 20000 linklayer ethernet",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b limit 1000b",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "f948",
+ "name": "Change TBF with latency time",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$TC qdisc add dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 10ms"
+ ],
+ "cmdUnderTest": "$TC qdisc change dev $DUMMY handle 1: root tbf burst 1500 rate 20000 latency 200ms",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc tbf 1: root refcnt [0-9]+ rate 20Kbit burst 1500b lat 200ms",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ },
+ {
+ "id": "2348",
+ "name": "Show TBF class",
+ "category": [
+ "qdisc",
+ "tbf"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root tbf limit 1000 burst 1500 rate 10000",
+ "expExitCode": "0",
+ "verifyCmd": "$TC class show dev $DUMMY",
+ "matchPattern": "class tbf.*parent 1:",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
new file mode 100644
index 000000000000..e5cc31f265f8
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/teql.json
@@ -0,0 +1,85 @@
+[
+ {
+ "id": "84a0",
+ "name": "Create TEQL with default setting",
+ "category": [
+ "qdisc",
+ "teql"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc teql0 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY"
+ ]
+ },
+ {
+ "id": "7734",
+ "name": "Create TEQL with multiple device",
+ "category": [
+ "qdisc",
+ "teql"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy",
+ "$IP link add dev $ETH type dummy",
+ "$TC qdisc add dev $ETH handle 1: root teql0"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc teql0 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY",
+ "$IP link del dev $ETH"
+ ]
+ },
+ {
+ "id": "34a9",
+ "name": "Delete TEQL with valid handle",
+ "category": [
+ "qdisc",
+ "teql"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy",
+ "$TC qdisc add dev $DUMMY handle 1: root teql0"
+ ],
+ "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 1: root",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "qdisc teql0 1: root refcnt",
+ "matchCount": "0",
+ "teardown": [
+ "$IP link del dev $DUMMY"
+ ]
+ },
+ {
+ "id": "6289",
+ "name": "Show TEQL stats",
+ "category": [
+ "qdisc",
+ "teql"
+ ],
+ "setup": [
+ "$IP link add dev $DUMMY type dummy"
+ ],
+ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root teql0",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc teql0 1: root refcnt",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1: root",
+ "$IP link del dev $DUMMY"
+ ]
+ }
+]
diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py
index a3e43189d940..ee349187636f 100755
--- a/tools/testing/selftests/tc-testing/tdc.py
+++ b/tools/testing/selftests/tc-testing/tdc.py
@@ -16,6 +16,8 @@ import json
import subprocess
import time
import traceback
+import random
+from multiprocessing import Pool
from collections import OrderedDict
from string import Template
@@ -38,12 +40,11 @@ class PluginMgrTestFail(Exception):
class PluginMgr:
def __init__(self, argparser):
super().__init__()
- self.plugins = {}
+ self.plugins = set()
self.plugin_instances = []
self.failed_plugins = {}
self.argparser = argparser
- # TODO, put plugins in order
plugindir = os.getenv('TDC_PLUGIN_DIR', './plugins')
for dirpath, dirnames, filenames in os.walk(plugindir):
for fn in filenames:
@@ -53,32 +54,43 @@ class PluginMgr:
not fn.startswith('.#')):
mn = fn[0:-3]
foo = importlib.import_module('plugins.' + mn)
- self.plugins[mn] = foo
- self.plugin_instances.append(foo.SubPlugin())
+ self.plugins.add(mn)
+ self.plugin_instances[mn] = foo.SubPlugin()
def load_plugin(self, pgdir, pgname):
pgname = pgname[0:-3]
+ self.plugins.add(pgname)
+
foo = importlib.import_module('{}.{}'.format(pgdir, pgname))
- self.plugins[pgname] = foo
- self.plugin_instances.append(foo.SubPlugin())
- self.plugin_instances[-1].check_args(self.args, None)
+
+ # nsPlugin must always be the first one
+ if pgname == "nsPlugin":
+ self.plugin_instances.insert(0, (pgname, foo.SubPlugin()))
+ self.plugin_instances[0][1].check_args(self.args, None)
+ else:
+ self.plugin_instances.append((pgname, foo.SubPlugin()))
+ self.plugin_instances[-1][1].check_args(self.args, None)
def get_required_plugins(self, testlist):
'''
Get all required plugins from the list of test cases and return
all unique items.
'''
- reqs = []
+ reqs = set()
for t in testlist:
try:
if 'requires' in t['plugins']:
if isinstance(t['plugins']['requires'], list):
- reqs.extend(t['plugins']['requires'])
+ reqs.update(set(t['plugins']['requires']))
else:
- reqs.append(t['plugins']['requires'])
+ reqs.add(t['plugins']['requires'])
+ t['plugins'] = t['plugins']['requires']
+ else:
+ t['plugins'] = []
except KeyError:
+ t['plugins'] = []
continue
- reqs = get_unique_item(reqs)
+
return reqs
def load_required_plugins(self, reqs, parser, args, remaining):
@@ -115,15 +127,17 @@ class PluginMgr:
return args
def call_pre_suite(self, testcount, testidlist):
- for pgn_inst in self.plugin_instances:
+ for (_, pgn_inst) in self.plugin_instances:
pgn_inst.pre_suite(testcount, testidlist)
def call_post_suite(self, index):
- for pgn_inst in reversed(self.plugin_instances):
+ for (_, pgn_inst) in reversed(self.plugin_instances):
pgn_inst.post_suite(index)
def call_pre_case(self, caseinfo, *, test_skip=False):
- for pgn_inst in self.plugin_instances:
+ for (pgn, pgn_inst) in self.plugin_instances:
+ if pgn not in caseinfo['plugins']:
+ continue
try:
pgn_inst.pre_case(caseinfo, test_skip)
except Exception as ee:
@@ -133,29 +147,37 @@ class PluginMgr:
print('testid is {}'.format(caseinfo['id']))
raise
- def call_post_case(self):
- for pgn_inst in reversed(self.plugin_instances):
+ def call_post_case(self, caseinfo):
+ for (pgn, pgn_inst) in reversed(self.plugin_instances):
+ if pgn not in caseinfo['plugins']:
+ continue
pgn_inst.post_case()
- def call_pre_execute(self):
- for pgn_inst in self.plugin_instances:
+ def call_pre_execute(self, caseinfo):
+ for (pgn, pgn_inst) in self.plugin_instances:
+ if pgn not in caseinfo['plugins']:
+ continue
pgn_inst.pre_execute()
- def call_post_execute(self):
- for pgn_inst in reversed(self.plugin_instances):
+ def call_post_execute(self, caseinfo):
+ for (pgn, pgn_inst) in reversed(self.plugin_instances):
+ if pgn not in caseinfo['plugins']:
+ continue
pgn_inst.post_execute()
def call_add_args(self, parser):
- for pgn_inst in self.plugin_instances:
+ for (pgn, pgn_inst) in self.plugin_instances:
parser = pgn_inst.add_args(parser)
return parser
def call_check_args(self, args, remaining):
- for pgn_inst in self.plugin_instances:
+ for (pgn, pgn_inst) in self.plugin_instances:
pgn_inst.check_args(args, remaining)
- def call_adjust_command(self, stage, command):
- for pgn_inst in self.plugin_instances:
+ def call_adjust_command(self, caseinfo, stage, command):
+ for (pgn, pgn_inst) in self.plugin_instances:
+ if pgn not in caseinfo['plugins']:
+ continue
command = pgn_inst.adjust_command(stage, command)
return command
@@ -177,7 +199,7 @@ def replace_keywords(cmd):
return subcmd
-def exec_cmd(args, pm, stage, command):
+def exec_cmd(caseinfo, args, pm, stage, command):
"""
Perform any required modifications on an executable command, then run
it in a subprocess and return the results.
@@ -187,9 +209,10 @@ def exec_cmd(args, pm, stage, command):
if '$' in command:
command = replace_keywords(command)
- command = pm.call_adjust_command(stage, command)
+ command = pm.call_adjust_command(caseinfo, stage, command)
if args.verbose > 0:
print('command "{}"'.format(command))
+
proc = subprocess.Popen(command,
shell=True,
stdout=subprocess.PIPE,
@@ -211,7 +234,7 @@ def exec_cmd(args, pm, stage, command):
return proc, foutput
-def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
+def prepare_env(caseinfo, args, pm, stage, prefix, cmdlist, output = None):
"""
Execute the setup/teardown commands for a test case.
Optionally terminate test execution if the command fails.
@@ -229,7 +252,7 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
if not cmd:
continue
- (proc, foutput) = exec_cmd(args, pm, stage, cmd)
+ (proc, foutput) = exec_cmd(caseinfo, args, pm, stage, cmd)
if proc and (proc.returncode not in exit_codes):
print('', file=sys.stderr)
@@ -246,8 +269,116 @@ def prepare_env(args, pm, stage, prefix, cmdlist, output = None):
stage, output,
'"{}" did not complete successfully'.format(prefix))
+def verify_by_json(procout, res, tidx, args, pm):
+ try:
+ outputJSON = json.loads(procout)
+ except json.JSONDecodeError:
+ res.set_result(ResultState.fail)
+ res.set_failmsg('Cannot decode verify command\'s output. Is it JSON?')
+ return res
+
+ matchJSON = json.loads(json.dumps(tidx['matchJSON']))
+
+ if type(outputJSON) != type(matchJSON):
+ failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {} '
+ failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+
+ if len(matchJSON) > len(outputJSON):
+ failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}"
+ failmsg = failmsg.format(len(outputJSON), outputJSON, len(matchJSON), matchJSON)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+ res = find_in_json(res, outputJSON, matchJSON, 0)
+
+ return res
+
+def find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+ if res.get_result() == ResultState.fail:
+ return res
+
+ if type(matchJSONVal) == list:
+ res = find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey)
+
+ elif type(matchJSONVal) == dict:
+ res = find_in_json_dict(res, outputJSONVal, matchJSONVal)
+ else:
+ res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey)
+
+ if res.get_result() != ResultState.fail:
+ res.set_result(ResultState.success)
+ return res
+
+ return res
+
+def find_in_json_list(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+ if (type(matchJSONVal) != type(outputJSONVal)):
+ failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}'
+ failmsg = failmsg.format(outputJSONVal, matchJSONVal)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+
+ if len(matchJSONVal) > len(outputJSONVal):
+ failmsg = "Your matchJSON value is an array, and it contains more elements than the command under test\'s output:\ncommand output (length: {}):\n{}\nmatchJSON value (length: {}):\n{}"
+ failmsg = failmsg.format(len(outputJSONVal), outputJSONVal, len(matchJSONVal), matchJSONVal)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+
+ for matchJSONIdx, matchJSONVal in enumerate(matchJSONVal):
+ res = find_in_json(res, outputJSONVal[matchJSONIdx], matchJSONVal,
+ matchJSONKey)
+ return res
+
+def find_in_json_dict(res, outputJSONVal, matchJSONVal):
+ for matchJSONKey, matchJSONVal in matchJSONVal.items():
+ if type(outputJSONVal) == dict:
+ if matchJSONKey not in outputJSONVal:
+ failmsg = 'Key not found in json output: {}: {}\nMatching against output: {}'
+ failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+
+ else:
+ failmsg = 'Original output and matchJSON value are not the same type: output: {} != matchJSON: {}'
+ failmsg = failmsg.format(type(outputJSON).__name__, type(matchJSON).__name__)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return rest
+
+ if type(outputJSONVal) == dict and (type(outputJSONVal[matchJSONKey]) == dict or
+ type(outputJSONVal[matchJSONKey]) == list):
+ if len(matchJSONVal) > 0:
+ res = find_in_json(res, outputJSONVal[matchJSONKey], matchJSONVal, matchJSONKey)
+ # handling corner case where matchJSONVal == [] or matchJSONVal == {}
+ else:
+ res = find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey)
+ else:
+ res = find_in_json(res, outputJSONVal, matchJSONVal, matchJSONKey)
+ return res
+
+def find_in_json_other(res, outputJSONVal, matchJSONVal, matchJSONKey=None):
+ if matchJSONKey in outputJSONVal:
+ if matchJSONVal != outputJSONVal[matchJSONKey]:
+ failmsg = 'Value doesn\'t match: {}: {} != {}\nMatching against output: {}'
+ failmsg = failmsg.format(matchJSONKey, matchJSONVal, outputJSONVal[matchJSONKey], outputJSONVal)
+ res.set_result(ResultState.fail)
+ res.set_failmsg(failmsg)
+ return res
+
+ return res
+
def run_one_test(pm, args, index, tidx):
global NAMES
+ ns = NAMES['NS']
+ dev0 = NAMES['DEV0']
+ dev1 = NAMES['DEV1']
+ dummy = NAMES['DUMMY']
result = True
tresult = ""
tap = ""
@@ -262,25 +393,42 @@ def run_one_test(pm, args, index, tidx):
res.set_result(ResultState.skip)
res.set_errormsg('Test case designated as skipped.')
pm.call_pre_case(tidx, test_skip=True)
- pm.call_post_execute()
+ pm.call_post_execute(tidx)
return res
+ if 'dependsOn' in tidx:
+ if (args.verbose > 0):
+ print('probe command for test skip')
+ (p, procout) = exec_cmd(tidx, args, pm, 'execute', tidx['dependsOn'])
+ if p:
+ if (p.returncode != 0):
+ res = TestResult(tidx['id'], tidx['name'])
+ res.set_result(ResultState.skip)
+ res.set_errormsg('probe command: test skipped.')
+ pm.call_pre_case(tidx, test_skip=True)
+ pm.call_post_execute(tidx)
+ return res
+
# populate NAMES with TESTID for this test
NAMES['TESTID'] = tidx['id']
+ NAMES['NS'] = '{}-{}'.format(NAMES['NS'], tidx['random'])
+ NAMES['DEV0'] = '{}id{}'.format(NAMES['DEV0'], tidx['id'])
+ NAMES['DEV1'] = '{}id{}'.format(NAMES['DEV1'], tidx['id'])
+ NAMES['DUMMY'] = '{}id{}'.format(NAMES['DUMMY'], tidx['id'])
pm.call_pre_case(tidx)
- prepare_env(args, pm, 'setup', "-----> prepare stage", tidx["setup"])
+ prepare_env(tidx, args, pm, 'setup', "-----> prepare stage", tidx["setup"])
if (args.verbose > 0):
print('-----> execute stage')
- pm.call_pre_execute()
- (p, procout) = exec_cmd(args, pm, 'execute', tidx["cmdUnderTest"])
+ pm.call_pre_execute(tidx)
+ (p, procout) = exec_cmd(tidx, args, pm, 'execute', tidx["cmdUnderTest"])
if p:
exit_code = p.returncode
else:
exit_code = None
- pm.call_post_execute()
+ pm.call_post_execute(tidx)
if (exit_code is None or exit_code != int(tidx["expExitCode"])):
print("exit: {!r}".format(exit_code))
@@ -292,31 +440,66 @@ def run_one_test(pm, args, index, tidx):
else:
if args.verbose > 0:
print('-----> verify stage')
- match_pattern = re.compile(
- str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
- (p, procout) = exec_cmd(args, pm, 'verify', tidx["verifyCmd"])
+ (p, procout) = exec_cmd(tidx, args, pm, 'verify', tidx["verifyCmd"])
if procout:
- match_index = re.findall(match_pattern, procout)
- if len(match_index) != int(tidx["matchCount"]):
- res.set_result(ResultState.fail)
- res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout))
+ if 'matchJSON' in tidx:
+ verify_by_json(procout, res, tidx, args, pm)
+ elif 'matchPattern' in tidx:
+ match_pattern = re.compile(
+ str(tidx["matchPattern"]), re.DOTALL | re.MULTILINE)
+ match_index = re.findall(match_pattern, procout)
+ if len(match_index) != int(tidx["matchCount"]):
+ res.set_result(ResultState.fail)
+ res.set_failmsg('Could not match regex pattern. Verify command output:\n{}'.format(procout))
+ else:
+ res.set_result(ResultState.success)
else:
- res.set_result(ResultState.success)
+ res.set_result(ResultState.fail)
+ res.set_failmsg('Must specify a match option: matchJSON or matchPattern\n{}'.format(procout))
elif int(tidx["matchCount"]) != 0:
res.set_result(ResultState.fail)
res.set_failmsg('No output generated by verify command.')
else:
res.set_result(ResultState.success)
- prepare_env(args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
- pm.call_post_case()
+ prepare_env(tidx, args, pm, 'teardown', '-----> teardown stage', tidx['teardown'], procout)
+ pm.call_post_case(tidx)
index += 1
# remove TESTID from NAMES
del(NAMES['TESTID'])
+
+ # Restore names
+ NAMES['NS'] = ns
+ NAMES['DEV0'] = dev0
+ NAMES['DEV1'] = dev1
+ NAMES['DUMMY'] = dummy
+
return res
+def prepare_run(pm, args, testlist):
+ tcount = len(testlist)
+ emergency_exit = False
+ emergency_exit_message = ''
+
+ try:
+ pm.call_pre_suite(tcount, testlist)
+ except Exception as ee:
+ ex_type, ex, ex_tb = sys.exc_info()
+ print('Exception {} {} (caught in pre_suite).'.
+ format(ex_type, ex))
+ traceback.print_tb(ex_tb)
+ emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
+ emergency_exit = True
+
+ if emergency_exit:
+ pm.call_post_suite(1)
+ return emergency_exit_message
+
+def purge_run(pm, index):
+ pm.call_post_suite(index)
+
def test_runner(pm, args, filtered_tests):
"""
Driver function for the unit tests.
@@ -332,28 +515,9 @@ def test_runner(pm, args, filtered_tests):
tap = ''
badtest = None
stage = None
- emergency_exit = False
- emergency_exit_message = ''
tsr = TestSuiteReport()
- try:
- pm.call_pre_suite(tcount, [tidx['id'] for tidx in testlist])
- except Exception as ee:
- ex_type, ex, ex_tb = sys.exc_info()
- print('Exception {} {} (caught in pre_suite).'.
- format(ex_type, ex))
- traceback.print_tb(ex_tb)
- emergency_exit_message = 'EMERGENCY EXIT, call_pre_suite failed with exception {} {}\n'.format(ex_type, ex)
- emergency_exit = True
- stage = 'pre-SUITE'
-
- if emergency_exit:
- pm.call_post_suite(index)
- return emergency_exit_message
- if args.verbose > 1:
- print('give test rig 2 seconds to stabilize')
- time.sleep(2)
for tidx in testlist:
if "flower" in tidx["category"] and args.device == None:
errmsg = "Tests using the DEV2 variable must define the name of a "
@@ -365,6 +529,7 @@ def test_runner(pm, args, filtered_tests):
res.set_result(ResultState.skip)
res.set_errormsg(errmsg)
tsr.add_resultdata(res)
+ index += 1
continue
try:
badtest = tidx # in case it goes bad
@@ -376,7 +541,7 @@ def test_runner(pm, args, filtered_tests):
message = pmtf.message
output = pmtf.output
res = TestResult(tidx['id'], tidx['name'])
- res.set_result(ResultState.skip)
+ res.set_result(ResultState.fail)
res.set_errormsg(pmtf.message)
res.set_failmsg(pmtf.output)
tsr.add_resultdata(res)
@@ -415,7 +580,68 @@ def test_runner(pm, args, filtered_tests):
if input(sys.stdin):
print('got something on stdin')
- pm.call_post_suite(index)
+ return (index, tsr)
+
+def mp_bins(alltests):
+ serial = []
+ parallel = []
+
+ for test in alltests:
+ if 'nsPlugin' not in test['plugins']:
+ serial.append(test)
+ else:
+ # We can only create one netdevsim device at a time
+ if 'netdevsim/new_device' in str(test['setup']):
+ serial.append(test)
+ else:
+ parallel.append(test)
+
+ return (serial, parallel)
+
+def __mp_runner(tests):
+ (_, tsr) = test_runner(mp_pm, mp_args, tests)
+ return tsr._testsuite
+
+def test_runner_mp(pm, args, alltests):
+ prepare_run(pm, args, alltests)
+
+ (serial, parallel) = mp_bins(alltests)
+
+ batches = [parallel[n : n + 32] for n in range(0, len(parallel), 32)]
+ batches.insert(0, serial)
+
+ print("Executing {} tests in parallel and {} in serial".format(len(parallel), len(serial)))
+ print("Using {} batches and {} workers".format(len(batches), args.mp))
+
+ # We can't pickle these objects so workaround them
+ global mp_pm
+ mp_pm = pm
+
+ global mp_args
+ mp_args = args
+
+ with Pool(args.mp) as p:
+ pres = p.map(__mp_runner, batches)
+
+ tsr = TestSuiteReport()
+ for trs in pres:
+ for res in trs:
+ tsr.add_resultdata(res)
+
+ # Passing an index is not useful in MP
+ purge_run(pm, None)
+
+ return tsr
+
+def test_runner_serial(pm, args, alltests):
+ prepare_run(pm, args, alltests)
+
+ if args.verbose:
+ print("Executing {} tests in serial".format(len(alltests)))
+
+ (index, tsr) = test_runner(pm, args, alltests)
+
+ purge_run(pm, index)
return tsr
@@ -444,12 +670,15 @@ def load_from_file(filename):
k['filename'] = filename
return testlist
+def identity(string):
+ return string
def args_parse():
"""
Create the argument parser.
"""
parser = argparse.ArgumentParser(description='Linux TC unit tests')
+ parser.register('type', None, identity)
return parser
@@ -507,6 +736,9 @@ def set_args(parser):
parser.add_argument(
'-P', '--pause', action='store_true',
help='Pause execution just before post-suite stage')
+ parser.add_argument(
+ '-J', '--multiprocess', type=int, default=1, dest='mp',
+ help='Run tests in parallel whenever possible')
return parser
@@ -537,7 +769,6 @@ def get_id_list(alltests):
"""
return [x["id"] for x in alltests]
-
def check_case_id(alltests):
"""
Check for duplicate test case IDs.
@@ -559,7 +790,6 @@ def generate_case_ids(alltests):
If a test case has a blank ID field, generate a random hex ID for it
and then write the test cases back to disk.
"""
- import random
for c in alltests:
if (c["id"] == ""):
while True:
@@ -618,6 +848,9 @@ def filter_tests_by_category(args, testlist):
return answer
+def set_random(alltests):
+ for tidx in alltests:
+ tidx['random'] = random.getrandbits(32)
def get_test_cases(args):
"""
@@ -716,6 +949,9 @@ def set_operation_mode(pm, parser, args, remaining):
list_test_cases(alltests)
exit(0)
+ set_random(alltests)
+
+ exit_code = 0 # KSFT_PASS
if len(alltests):
req_plugins = pm.get_required_plugins(alltests)
try:
@@ -723,7 +959,14 @@ def set_operation_mode(pm, parser, args, remaining):
except PluginDependencyException as pde:
print('The following plugins were not found:')
print('{}'.format(pde.missing_pg))
- catresults = test_runner(pm, args, alltests)
+
+ if args.mp > 1:
+ catresults = test_runner_mp(pm, args, alltests)
+ else:
+ catresults = test_runner_serial(pm, args, alltests)
+
+ if catresults.count_failures() != 0:
+ exit_code = 1 # KSFT_FAIL
if args.format == 'none':
print('Test results output suppression requested\n')
else:
@@ -748,27 +991,38 @@ def set_operation_mode(pm, parser, args, remaining):
gid=int(os.getenv('SUDO_GID')))
else:
print('No tests found\n')
+ exit_code = 4 # KSFT_SKIP
+ exit(exit_code)
def main():
"""
Start of execution; set up argument parser and get the arguments,
and start operations.
"""
+ import resource
+
+ if sys.version_info.major < 3 or sys.version_info.minor < 8:
+ sys.exit("tdc requires at least python 3.8")
+
+ resource.setrlimit(resource.RLIMIT_NOFILE, (1048576, 1048576))
+
parser = args_parse()
parser = set_args(parser)
pm = PluginMgr(parser)
parser = pm.call_add_args(parser)
(args, remaining) = parser.parse_known_args()
args.NAMES = NAMES
+ args.mp = min(args.mp, 4)
pm.set_args(args)
check_default_settings(args, remaining, pm)
if args.verbose > 2:
print('args is {}'.format(args))
- set_operation_mode(pm, parser, args, remaining)
-
- exit(0)
-
+ try:
+ set_operation_mode(pm, parser, args, remaining)
+ except KeyboardInterrupt:
+ # Cleanup on Ctrl-C
+ pm.call_post_suite(None)
if __name__ == "__main__":
main()
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
new file mode 100755
index 000000000000..cddff1772e10
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -0,0 +1,66 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# If a module is required and was not compiled
+# the test that requires it will fail anyways
+try_modprobe() {
+ modprobe -q -R "$1"
+ if [ $? -ne 0 ]; then
+ echo "Module $1 not found... skipping."
+ else
+ modprobe "$1"
+ fi
+}
+
+try_modprobe netdevsim
+try_modprobe act_bpf
+try_modprobe act_connmark
+try_modprobe act_csum
+try_modprobe act_ct
+try_modprobe act_ctinfo
+try_modprobe act_gact
+try_modprobe act_gate
+try_modprobe act_mirred
+try_modprobe act_mpls
+try_modprobe act_nat
+try_modprobe act_pedit
+try_modprobe act_police
+try_modprobe act_sample
+try_modprobe act_simple
+try_modprobe act_skbedit
+try_modprobe act_skbmod
+try_modprobe act_tunnel_key
+try_modprobe act_vlan
+try_modprobe cls_basic
+try_modprobe cls_bpf
+try_modprobe cls_cgroup
+try_modprobe cls_flow
+try_modprobe cls_flower
+try_modprobe cls_fw
+try_modprobe cls_matchall
+try_modprobe cls_route
+try_modprobe cls_u32
+try_modprobe em_canid
+try_modprobe em_cmp
+try_modprobe em_ipset
+try_modprobe em_ipt
+try_modprobe em_meta
+try_modprobe em_nbyte
+try_modprobe em_text
+try_modprobe em_u32
+try_modprobe sch_cake
+try_modprobe sch_cbs
+try_modprobe sch_choke
+try_modprobe sch_codel
+try_modprobe sch_drr
+try_modprobe sch_etf
+try_modprobe sch_ets
+try_modprobe sch_fq
+try_modprobe sch_fq_codel
+try_modprobe sch_fq_pie
+try_modprobe sch_gred
+try_modprobe sch_hfsc
+try_modprobe sch_hhf
+try_modprobe sch_htb
+try_modprobe sch_teql
+./tdc.py -J`nproc`
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 995f66ce43eb..35d5d9493784 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
"""
tdc_batch.py - a script to generate TC batch file
diff --git a/tools/testing/selftests/tc-testing/tdc_config.py b/tools/testing/selftests/tc-testing/tdc_config.py
index 080709cc4297..ccb0f06ef9e3 100644
--- a/tools/testing/selftests/tc-testing/tdc_config.py
+++ b/tools/testing/selftests/tc-testing/tdc_config.py
@@ -17,14 +17,15 @@ NAMES = {
'DEV1': 'v0p1',
'DEV2': '',
'DUMMY': 'dummy1',
+ 'ETH': 'eth0',
'BATCH_FILE': './batch.txt',
'BATCH_DIR': 'tmp',
# Length of time in seconds to wait before terminating a command
- 'TIMEOUT': 12,
+ 'TIMEOUT': 24,
# Name of the namespace to use
'NS': 'tcut',
# Directory containing eBPF test programs
- 'EBPFDIR': './bpf'
+ 'EBPFDIR': './'
}
diff --git a/tools/testing/selftests/tc-testing/tdc_multibatch.py b/tools/testing/selftests/tc-testing/tdc_multibatch.py
index 5e7237952e49..48e1f17ff2e8 100755
--- a/tools/testing/selftests/tc-testing/tdc_multibatch.py
+++ b/tools/testing/selftests/tc-testing/tdc_multibatch.py
@@ -1,4 +1,4 @@
-#!/usr/bin/python3
+#!/usr/bin/env python3
# SPDX-License-Identifier: GPL-2.0
"""
tdc_multibatch.py - a thin wrapper over tdc_batch.py to generate multiple batch
diff --git a/tools/testing/selftests/tdx/.gitignore b/tools/testing/selftests/tdx/.gitignore
new file mode 100644
index 000000000000..5db4d15cc673
--- /dev/null
+++ b/tools/testing/selftests/tdx/.gitignore
@@ -0,0 +1 @@
+tdx_guest_test
diff --git a/tools/testing/selftests/tdx/Makefile b/tools/testing/selftests/tdx/Makefile
new file mode 100644
index 000000000000..306e9c4d5ef7
--- /dev/null
+++ b/tools/testing/selftests/tdx/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+
+CFLAGS += -O3 -Wl,-no-as-needed -Wall $(KHDR_INCLUDES) -static
+
+TEST_GEN_PROGS := tdx_guest_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/tdx/config b/tools/testing/selftests/tdx/config
new file mode 100644
index 000000000000..aa1edc829ab6
--- /dev/null
+++ b/tools/testing/selftests/tdx/config
@@ -0,0 +1 @@
+CONFIG_TDX_GUEST_DRIVER=y
diff --git a/tools/testing/selftests/tdx/tdx_guest_test.c b/tools/testing/selftests/tdx/tdx_guest_test.c
new file mode 100644
index 000000000000..81d8cb88ea1a
--- /dev/null
+++ b/tools/testing/selftests/tdx/tdx_guest_test.c
@@ -0,0 +1,163 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test TDX guest features
+ *
+ * Copyright (C) 2022 Intel Corporation.
+ *
+ * Author: Kuppuswamy Sathyanarayanan <sathyanarayanan.kuppuswamy@linux.intel.com>
+ */
+
+#include <sys/ioctl.h>
+
+#include <errno.h>
+#include <fcntl.h>
+
+#include <linux/tdx-guest.h>
+#include "../kselftest_harness.h"
+
+#define TDX_GUEST_DEVNAME "/dev/tdx_guest"
+#define HEX_DUMP_SIZE 8
+#define DEBUG 0
+
+/**
+ * struct tdreport_type - Type header of TDREPORT_STRUCT.
+ * @type: Type of the TDREPORT (0 - SGX, 81 - TDX, rest are reserved)
+ * @sub_type: Subtype of the TDREPORT (Default value is 0).
+ * @version: TDREPORT version (Default value is 0).
+ * @reserved: Added for future extension.
+ *
+ * More details can be found in TDX v1.0 module specification, sec
+ * titled "REPORTTYPE".
+ */
+struct tdreport_type {
+ __u8 type;
+ __u8 sub_type;
+ __u8 version;
+ __u8 reserved;
+};
+
+/**
+ * struct reportmac - TDX guest report data, MAC and TEE hashes.
+ * @type: TDREPORT type header.
+ * @reserved1: Reserved for future extension.
+ * @cpu_svn: CPU security version.
+ * @tee_tcb_info_hash: SHA384 hash of TEE TCB INFO.
+ * @tee_td_info_hash: SHA384 hash of TDINFO_STRUCT.
+ * @reportdata: User defined unique data passed in TDG.MR.REPORT request.
+ * @reserved2: Reserved for future extension.
+ * @mac: CPU MAC ID.
+ *
+ * It is MAC-protected and contains hashes of the remainder of the
+ * report structure along with user provided report data. More details can
+ * be found in TDX v1.0 Module specification, sec titled "REPORTMACSTRUCT"
+ */
+struct reportmac {
+ struct tdreport_type type;
+ __u8 reserved1[12];
+ __u8 cpu_svn[16];
+ __u8 tee_tcb_info_hash[48];
+ __u8 tee_td_info_hash[48];
+ __u8 reportdata[64];
+ __u8 reserved2[32];
+ __u8 mac[32];
+};
+
+/**
+ * struct td_info - TDX guest measurements and configuration.
+ * @attr: TDX Guest attributes (like debug, spet_disable, etc).
+ * @xfam: Extended features allowed mask.
+ * @mrtd: Build time measurement register.
+ * @mrconfigid: Software-defined ID for non-owner-defined configuration
+ * of the guest - e.g., run-time or OS configuration.
+ * @mrowner: Software-defined ID for the guest owner.
+ * @mrownerconfig: Software-defined ID for owner-defined configuration of
+ * the guest - e.g., specific to the workload.
+ * @rtmr: Run time measurement registers.
+ * @reserved: Added for future extension.
+ *
+ * It contains the measurements and initial configuration of the TDX guest
+ * that was locked at initialization and a set of measurement registers
+ * that are run-time extendable. More details can be found in TDX v1.0
+ * Module specification, sec titled "TDINFO_STRUCT".
+ */
+struct td_info {
+ __u8 attr[8];
+ __u64 xfam;
+ __u64 mrtd[6];
+ __u64 mrconfigid[6];
+ __u64 mrowner[6];
+ __u64 mrownerconfig[6];
+ __u64 rtmr[24];
+ __u64 reserved[14];
+};
+
+/*
+ * struct tdreport - Output of TDCALL[TDG.MR.REPORT].
+ * @reportmac: Mac protected header of size 256 bytes.
+ * @tee_tcb_info: Additional attestable elements in the TCB are not
+ * reflected in the reportmac.
+ * @reserved: Added for future extension.
+ * @tdinfo: Measurements and configuration data of size 512 bytes.
+ *
+ * More details can be found in TDX v1.0 Module specification, sec
+ * titled "TDREPORT_STRUCT".
+ */
+struct tdreport {
+ struct reportmac reportmac;
+ __u8 tee_tcb_info[239];
+ __u8 reserved[17];
+ struct td_info tdinfo;
+};
+
+static void print_array_hex(const char *title, const char *prefix_str,
+ const void *buf, int len)
+{
+ int i, j, line_len, rowsize = HEX_DUMP_SIZE;
+ const __u8 *ptr = buf;
+
+ printf("\t\t%s", title);
+
+ for (j = 0; j < len; j += rowsize) {
+ line_len = rowsize < (len - j) ? rowsize : (len - j);
+ printf("%s%.8x:", prefix_str, j);
+ for (i = 0; i < line_len; i++)
+ printf(" %.2x", ptr[j + i]);
+ printf("\n");
+ }
+
+ printf("\n");
+}
+
+TEST(verify_report)
+{
+ struct tdx_report_req req;
+ struct tdreport *tdreport;
+ int devfd, i;
+
+ devfd = open(TDX_GUEST_DEVNAME, O_RDWR | O_SYNC);
+ ASSERT_LT(0, devfd);
+
+ /* Generate sample report data */
+ for (i = 0; i < TDX_REPORTDATA_LEN; i++)
+ req.reportdata[i] = i;
+
+ /* Get TDREPORT */
+ ASSERT_EQ(0, ioctl(devfd, TDX_CMD_GET_REPORT0, &req));
+
+ if (DEBUG) {
+ print_array_hex("\n\t\tTDX report data\n", "",
+ req.reportdata, sizeof(req.reportdata));
+
+ print_array_hex("\n\t\tTDX tdreport data\n", "",
+ req.tdreport, sizeof(req.tdreport));
+ }
+
+ /* Make sure TDREPORT data includes the REPORTDATA passed */
+ tdreport = (struct tdreport *)req.tdreport;
+ ASSERT_EQ(0, memcmp(&tdreport->reportmac.reportdata[0],
+ req.reportdata, sizeof(req.reportdata)));
+
+ ASSERT_EQ(0, close(devfd));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/thermal/intel/power_floor/.gitignore b/tools/testing/selftests/thermal/intel/power_floor/.gitignore
new file mode 100644
index 000000000000..1b9a76406f18
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/.gitignore
@@ -0,0 +1 @@
+power_floor_test
diff --git a/tools/testing/selftests/thermal/intel/power_floor/Makefile b/tools/testing/selftests/thermal/intel/power_floor/Makefile
new file mode 100644
index 000000000000..9b88e57dbba5
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := power_floor_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
new file mode 100644
index 000000000000..0326b39a11b9
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/power_floor/power_floor_test.c
@@ -0,0 +1,108 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define POWER_FLOOR_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_enable"
+#define POWER_FLOOR_STATUS_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/power_limits/power_floor_status"
+
+void power_floor_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable power floor notifications\n");
+ exit(1);
+ }
+
+ printf("Disabled power floor notifications\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char status_str[3];
+ int fd, ret;
+
+ if (signal(SIGINT, power_floor_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, power_floor_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, power_floor_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(POWER_FLOOR_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open power floor enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable power floor notifications\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled power floor notifications\n");
+
+ while (1) {
+ fd = open(POWER_FLOOR_STATUS_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to power floor status file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ POWER_FLOOR_STATUS_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, status_str, sizeof(status_str)) < 0)
+ exit(0);
+
+ printf("power floor status: %s\n", status_str);
+ }
+
+ close(fd);
+ }
+}
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/.gitignore b/tools/testing/selftests/thermal/intel/workload_hint/.gitignore
new file mode 100644
index 000000000000..d697b034a3a8
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/.gitignore
@@ -0,0 +1 @@
+workload_hint_test
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/Makefile b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
new file mode 100644
index 000000000000..37ff3286283b
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/Makefile
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+ifndef CROSS_COMPILE
+uname_M := $(shell uname -m 2>/dev/null || echo not)
+ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
+
+ifeq ($(ARCH),x86)
+TEST_GEN_PROGS := workload_hint_test
+
+include ../../../lib.mk
+
+endif
+endif
diff --git a/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
new file mode 100644
index 000000000000..217c3a641c53
--- /dev/null
+++ b/tools/testing/selftests/thermal/intel/workload_hint/workload_hint_test.c
@@ -0,0 +1,157 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stdio.h>
+#include <string.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <poll.h>
+#include <signal.h>
+
+#define WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/notification_delay_ms"
+#define WORKLOAD_ENABLE_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_hint_enable"
+#define WORKLOAD_TYPE_INDEX_ATTRIBUTE "/sys/bus/pci/devices/0000:00:04.0/workload_hint/workload_type_index"
+
+static const char * const workload_types[] = {
+ "idle",
+ "battery_life",
+ "sustained",
+ "bursty",
+ NULL
+};
+
+#define WORKLOAD_TYPE_MAX_INDEX 3
+
+void workload_hint_exit(int signum)
+{
+ int fd;
+
+ /* Disable feature via sysfs knob */
+
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "0\n", 2) < 0) {
+ perror("Can' disable workload hints\n");
+ exit(1);
+ }
+
+ printf("Disabled workload type prediction\n");
+
+ close(fd);
+}
+
+int main(int argc, char **argv)
+{
+ struct pollfd ufd;
+ char index_str[4];
+ int fd, ret, index;
+ char delay_str[64];
+ int delay = 0;
+
+ printf("Usage: workload_hint_test [notification delay in milli seconds]\n");
+
+ if (argc > 1) {
+ ret = sscanf(argv[1], "%d", &delay);
+ if (ret < 0) {
+ printf("Invalid delay\n");
+ exit(1);
+ }
+
+ printf("Setting notification delay to %d ms\n", delay);
+ if (delay < 0)
+ exit(1);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+
+ sprintf(delay_str, "%s\n", argv[1]);
+ fd = open(WORKLOAD_NOTIFICATION_DELAY_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload notification delay\n");
+ exit(1);
+ }
+
+ if (write(fd, delay_str, strlen(delay_str)) < 0) {
+ perror("Can't set delay\n");
+ exit(1);
+ }
+
+ close(fd);
+ }
+
+ if (signal(SIGINT, workload_hint_exit) == SIG_IGN)
+ signal(SIGINT, SIG_IGN);
+ if (signal(SIGHUP, workload_hint_exit) == SIG_IGN)
+ signal(SIGHUP, SIG_IGN);
+ if (signal(SIGTERM, workload_hint_exit) == SIG_IGN)
+ signal(SIGTERM, SIG_IGN);
+
+ /* Enable feature via sysfs knob */
+ fd = open(WORKLOAD_ENABLE_ATTRIBUTE, O_RDWR);
+ if (fd < 0) {
+ perror("Unable to open workload type feature enable file\n");
+ exit(1);
+ }
+
+ if (write(fd, "1\n", 2) < 0) {
+ perror("Can' enable workload hints\n");
+ exit(1);
+ }
+
+ close(fd);
+
+ printf("Enabled workload type prediction\n");
+
+ while (1) {
+ fd = open(WORKLOAD_TYPE_INDEX_ATTRIBUTE, O_RDONLY);
+ if (fd < 0) {
+ perror("Unable to open workload type file\n");
+ exit(1);
+ }
+
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0) {
+ fprintf(stderr, "Failed to read from:%s\n",
+ WORKLOAD_TYPE_INDEX_ATTRIBUTE);
+ exit(1);
+ }
+
+ ufd.fd = fd;
+ ufd.events = POLLPRI;
+
+ ret = poll(&ufd, 1, -1);
+ if (ret < 0) {
+ perror("poll error");
+ exit(1);
+ } else if (ret == 0) {
+ printf("Poll Timeout\n");
+ } else {
+ if ((lseek(fd, 0L, SEEK_SET)) < 0) {
+ fprintf(stderr, "Failed to set pointer to beginning\n");
+ exit(1);
+ }
+
+ if (read(fd, index_str, sizeof(index_str)) < 0)
+ exit(0);
+
+ ret = sscanf(index_str, "%d", &index);
+ if (ret < 0)
+ break;
+ if (index > WORKLOAD_TYPE_MAX_INDEX)
+ printf("Invalid workload type index\n");
+ else
+ printf("workload type:%s\n", workload_types[index]);
+ }
+
+ close(fd);
+ }
+}
diff --git a/tools/testing/selftests/timens/.gitignore b/tools/testing/selftests/timens/.gitignore
index 2e43851b47c1..cae8dca0fbff 100644
--- a/tools/testing/selftests/timens/.gitignore
+++ b/tools/testing/selftests/timens/.gitignore
@@ -1,9 +1,11 @@
# SPDX-License-Identifier: GPL-2.0-only
clock_nanosleep
exec
+futex
gettime_perf
gettime_perf_cold
procfs
timens
timer
timerfd
+vfork_exec
diff --git a/tools/testing/selftests/timens/Makefile b/tools/testing/selftests/timens/Makefile
index b4fd9a934654..f0d51d4d2c87 100644
--- a/tools/testing/selftests/timens/Makefile
+++ b/tools/testing/selftests/timens/Makefile
@@ -1,4 +1,4 @@
-TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec
+TEST_GEN_PROGS := timens timerfd timer clock_nanosleep procfs exec futex vfork_exec
TEST_GEN_PROGS_EXTENDED := gettime_perf
CFLAGS := -Wall -Werror -pthread
diff --git a/tools/testing/selftests/timens/futex.c b/tools/testing/selftests/timens/futex.c
new file mode 100644
index 000000000000..6b2b9264e851
--- /dev/null
+++ b/tools/testing/selftests/timens/futex.c
@@ -0,0 +1,110 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <linux/unistd.h>
+#include <linux/futex.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define NSEC_PER_SEC 1000000000ULL
+
+static int run_test(int clockid)
+{
+ int futex_op = FUTEX_WAIT_BITSET;
+ struct timespec timeout, end;
+ int val = 0;
+
+ if (clockid == CLOCK_REALTIME)
+ futex_op |= FUTEX_CLOCK_REALTIME;
+
+ clock_gettime(clockid, &timeout);
+ timeout.tv_nsec += NSEC_PER_SEC / 10; // 100ms
+ if (timeout.tv_nsec > NSEC_PER_SEC) {
+ timeout.tv_sec++;
+ timeout.tv_nsec -= NSEC_PER_SEC;
+ }
+
+ if (syscall(__NR_futex, &val, futex_op, 0,
+ &timeout, 0, FUTEX_BITSET_MATCH_ANY) >= 0) {
+ ksft_test_result_fail("futex didn't return ETIMEDOUT\n");
+ return 1;
+ }
+
+ if (errno != ETIMEDOUT) {
+ ksft_test_result_fail("futex didn't return ETIMEDOUT: %s\n",
+ strerror(errno));
+ return 1;
+ }
+
+ clock_gettime(clockid, &end);
+
+ if (end.tv_sec < timeout.tv_sec ||
+ (end.tv_sec == timeout.tv_sec && end.tv_nsec < timeout.tv_nsec)) {
+ ksft_test_result_fail("futex slept less than 100ms\n");
+ return 1;
+ }
+
+
+ ksft_test_result_pass("futex with the %d clockid\n", clockid);
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ int status, len, fd;
+ char buf[4096];
+ pid_t pid;
+ struct timespec mtime_now;
+
+ nscheck();
+
+ ksft_set_plan(2);
+
+ clock_gettime(CLOCK_MONOTONIC, &mtime_now);
+
+ if (unshare_timens())
+ return 1;
+
+ len = snprintf(buf, sizeof(buf), "%d %d 0",
+ CLOCK_MONOTONIC, 70 * 24 * 3600);
+ fd = open("/proc/self/timens_offsets", O_WRONLY);
+ if (fd < 0)
+ return pr_perror("/proc/self/timens_offsets");
+
+ if (write(fd, buf, len) != len)
+ return pr_perror("/proc/self/timens_offsets");
+
+ close(fd);
+
+ pid = fork();
+ if (pid < 0)
+ return pr_perror("Unable to fork");
+ if (pid == 0) {
+ int ret = 0;
+
+ ret |= run_test(CLOCK_REALTIME);
+ ret |= run_test(CLOCK_MONOTONIC);
+ if (ret)
+ ksft_exit_fail();
+ ksft_exit_pass();
+ return 0;
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("Unable to wait the child process");
+
+ if (WIFEXITED(status))
+ return WEXITSTATUS(status);
+
+ return 1;
+}
diff --git a/tools/testing/selftests/timens/gettime_perf.c b/tools/testing/selftests/timens/gettime_perf.c
index 7bf841a3967b..6b13dc277724 100644
--- a/tools/testing/selftests/timens/gettime_perf.c
+++ b/tools/testing/selftests/timens/gettime_perf.c
@@ -25,6 +25,12 @@ static void fill_function_pointers(void)
if (!vdso)
vdso = dlopen("linux-gate.so.1",
RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-vdso32.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
+ if (!vdso)
+ vdso = dlopen("linux-vdso64.so.1",
+ RTLD_LAZY | RTLD_LOCAL | RTLD_NOLOAD);
if (!vdso) {
pr_err("[WARN]\tfailed to find vDSO\n");
return;
@@ -32,6 +38,8 @@ static void fill_function_pointers(void)
vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
if (!vdso_clock_gettime)
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__kernel_clock_gettime");
+ if (!vdso_clock_gettime)
pr_err("Warning: failed to find clock_gettime in vDSO\n");
}
diff --git a/tools/testing/selftests/timens/procfs.c b/tools/testing/selftests/timens/procfs.c
index 7f14f0fdac84..1833ca97eb24 100644
--- a/tools/testing/selftests/timens/procfs.c
+++ b/tools/testing/selftests/timens/procfs.c
@@ -24,8 +24,6 @@
#define DAY_IN_SEC (60*60*24)
#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
static int child_ns, parent_ns;
static int switch_ns(int fd)
@@ -93,6 +91,33 @@ static int read_proc_uptime(struct timespec *uptime)
return 0;
}
+static int read_proc_stat_btime(unsigned long long *boottime_sec)
+{
+ FILE *proc;
+ char line_buf[2048];
+
+ proc = fopen("/proc/stat", "r");
+ if (proc == NULL) {
+ pr_perror("Unable to open /proc/stat");
+ return -1;
+ }
+
+ while (fgets(line_buf, 2048, proc)) {
+ if (sscanf(line_buf, "btime %llu", boottime_sec) != 1)
+ continue;
+ fclose(proc);
+ return 0;
+ }
+ if (errno) {
+ pr_perror("fscanf");
+ fclose(proc);
+ return -errno;
+ }
+ pr_err("failed to parse /proc/stat");
+ fclose(proc);
+ return -1;
+}
+
static int check_uptime(void)
{
struct timespec uptime_new, uptime_old;
@@ -123,18 +148,47 @@ static int check_uptime(void)
return 0;
}
+static int check_stat_btime(void)
+{
+ unsigned long long btime_new, btime_old;
+ unsigned long long btime_expected;
+
+ if (switch_ns(parent_ns))
+ return pr_err("switch_ns(%d)", parent_ns);
+
+ if (read_proc_stat_btime(&btime_old))
+ return 1;
+
+ if (switch_ns(child_ns))
+ return pr_err("switch_ns(%d)", child_ns);
+
+ if (read_proc_stat_btime(&btime_new))
+ return 1;
+
+ btime_expected = btime_old - TEN_DAYS_IN_SEC;
+ if (btime_new != btime_expected) {
+ pr_fail("btime in /proc/stat: old %llu, new %llu [%llu]",
+ btime_old, btime_new, btime_expected);
+ return 1;
+ }
+
+ ksft_test_result_pass("Passed for /proc/stat btime\n");
+ return 0;
+}
+
int main(int argc, char *argv[])
{
int ret = 0;
nscheck();
- ksft_set_plan(1);
+ ksft_set_plan(2);
if (init_namespaces())
return 1;
ret |= check_uptime();
+ ret |= check_stat_btime();
if (ret)
ksft_exit_fail();
diff --git a/tools/testing/selftests/timens/timens.c b/tools/testing/selftests/timens/timens.c
index 52b6a1185f52..387220791a05 100644
--- a/tools/testing/selftests/timens/timens.c
+++ b/tools/testing/selftests/timens/timens.c
@@ -22,8 +22,6 @@
#define DAY_IN_SEC (60*60*24)
#define TEN_DAYS_IN_SEC (10*DAY_IN_SEC)
-#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0]))
-
struct test_clock {
clockid_t id;
char *name;
diff --git a/tools/testing/selftests/timens/vfork_exec.c b/tools/testing/selftests/timens/vfork_exec.c
new file mode 100644
index 000000000000..beb7614941fb
--- /dev/null
+++ b/tools/testing/selftests/timens/vfork_exec.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+#include <sys/syscall.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
+#include <string.h>
+#include <pthread.h>
+
+#include "log.h"
+#include "timens.h"
+
+#define OFFSET (36000)
+
+struct thread_args {
+ char *tst_name;
+ struct timespec *now;
+};
+
+static void *tcheck(void *_args)
+{
+ struct thread_args *args = _args;
+ struct timespec *now = args->now, tst;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now->tv_sec) > 5) {
+ pr_fail("%s: in-thread: unexpected value: %ld (%ld)\n",
+ args->tst_name, tst.tv_sec, now->tv_sec);
+ return (void *)1UL;
+ }
+ }
+ return NULL;
+}
+
+static int check_in_thread(char *tst_name, struct timespec *now)
+{
+ struct thread_args args = {
+ .tst_name = tst_name,
+ .now = now,
+ };
+ pthread_t th;
+ void *retval;
+
+ if (pthread_create(&th, NULL, tcheck, &args))
+ return pr_perror("thread");
+ if (pthread_join(th, &retval))
+ return pr_perror("pthread_join");
+ return !(retval == NULL);
+}
+
+static int check(char *tst_name, struct timespec *now)
+{
+ struct timespec tst;
+ int i;
+
+ for (i = 0; i < 2; i++) {
+ _gettime(CLOCK_MONOTONIC, &tst, i);
+ if (abs(tst.tv_sec - now->tv_sec) > 5)
+ return pr_fail("%s: unexpected value: %ld (%ld)\n",
+ tst_name, tst.tv_sec, now->tv_sec);
+ }
+ if (check_in_thread(tst_name, now))
+ return 1;
+ ksft_test_result_pass("%s\n", tst_name);
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct timespec now;
+ int status;
+ pid_t pid;
+
+ if (argc > 1) {
+ char *endptr;
+
+ ksft_cnt.ksft_pass = 1;
+ now.tv_sec = strtoul(argv[1], &endptr, 0);
+ if (*endptr != 0)
+ return pr_perror("strtoul");
+
+ return check("child after exec", &now);
+ }
+
+ nscheck();
+
+ ksft_set_plan(4);
+
+ clock_gettime(CLOCK_MONOTONIC, &now);
+
+ if (unshare_timens())
+ return 1;
+
+ if (_settime(CLOCK_MONOTONIC, OFFSET))
+ return 1;
+
+ if (check("parent before vfork", &now))
+ return 1;
+
+ pid = vfork();
+ if (pid < 0)
+ return pr_perror("fork");
+
+ if (pid == 0) {
+ char now_str[64];
+ char *cargv[] = {"exec", now_str, NULL};
+ char *cenv[] = {NULL};
+
+ /* Check for proper vvar offsets after execve. */
+ snprintf(now_str, sizeof(now_str), "%ld", now.tv_sec + OFFSET);
+ execve("/proc/self/exe", cargv, cenv);
+ pr_perror("execve");
+ _exit(1);
+ }
+
+ if (waitpid(pid, &status, 0) != pid)
+ return pr_perror("waitpid");
+
+ if (status)
+ ksft_exit_fail();
+ ksft_inc_pass_cnt();
+ ksft_test_result_pass("wait for child\n");
+
+ /* Check that we are still in the source timens. */
+ if (check("parent after vfork", &now))
+ return 1;
+
+ ksft_exit_pass();
+ return 0;
+}
diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile
index 7656c7ce79d9..0e73a16874c4 100644
--- a/tools/testing/selftests/timers/Makefile
+++ b/tools/testing/selftests/timers/Makefile
@@ -13,6 +13,7 @@ DESTRUCTIVE_TESTS = alarmtimer-suspend valid-adjtimex adjtick change_skew \
TEST_GEN_PROGS_EXTENDED = $(DESTRUCTIVE_TESTS)
+TEST_FILES := settings
include ../lib.mk
diff --git a/tools/testing/selftests/timers/adjtick.c b/tools/testing/selftests/timers/adjtick.c
index 54d8d87f36b3..47e05fdc32c5 100644
--- a/tools/testing/selftests/timers/adjtick.c
+++ b/tools/testing/selftests/timers/adjtick.c
@@ -165,7 +165,7 @@ int check_tick_adj(long tickval)
return 0;
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timespec raw;
long tick, max, interval, err;
diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c
index 4da09dbf83ba..4332b494103d 100644
--- a/tools/testing/selftests/timers/alarmtimer-suspend.c
+++ b/tools/testing/selftests/timers/alarmtimer-suspend.c
@@ -79,7 +79,7 @@ char *clockstring(int clockid)
return "CLOCK_BOOTTIME_ALARM";
case CLOCK_TAI:
return "CLOCK_TAI";
- };
+ }
return "UNKNOWN_CLOCKID";
}
@@ -92,7 +92,7 @@ long long timespec_sub(struct timespec a, struct timespec b)
return ret;
}
-int final_ret = 0;
+int final_ret;
void sigalarm(int signo)
{
diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c
index c4eab7124990..992a77f2a74c 100644
--- a/tools/testing/selftests/timers/change_skew.c
+++ b/tools/testing/selftests/timers/change_skew.c
@@ -55,7 +55,7 @@ int change_skew_test(int ppm)
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timex tx;
int i, ret;
diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c
index bfc974b4572d..c5264594064c 100644
--- a/tools/testing/selftests/timers/clocksource-switch.c
+++ b/tools/testing/selftests/timers/clocksource-switch.c
@@ -3,7 +3,7 @@
* (C) Copyright IBM 2012
* Licensed under the GPLv2
*
- * NOTE: This is a meta-test which quickly changes the clocksourc and
+ * NOTE: This is a meta-test which quickly changes the clocksource and
* then uses other tests to detect problems. Thus this test requires
* that the inconsistency-check and nanosleep tests be present in the
* same directory it is run from.
@@ -23,17 +23,17 @@
*/
+#include <fcntl.h>
#include <stdio.h>
-#include <unistd.h>
#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
#include <sys/time.h>
#include <sys/timex.h>
-#include <time.h>
#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <string.h>
#include <sys/wait.h>
+#include <time.h>
+#include <unistd.h>
#include "../kselftest.h"
@@ -110,21 +110,40 @@ int run_tests(int secs)
sprintf(buf, "./inconsistency-check -t %i", secs);
ret = system(buf);
- if (ret)
- return ret;
+ if (WIFEXITED(ret) && WEXITSTATUS(ret))
+ return WEXITSTATUS(ret);
ret = system("./nanosleep");
- return ret;
+ return WIFEXITED(ret) ? WEXITSTATUS(ret) : 0;
}
char clocksource_list[10][30];
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
char orig_clk[512];
- int count, i, status;
+ int count, i, status, opt;
+ int do_sanity_check = 1;
+ int runtime = 60;
pid_t pid;
+ /* Process arguments */
+ while ((opt = getopt(argc, argv, "st:")) != -1) {
+ switch (opt) {
+ case 's':
+ do_sanity_check = 0;
+ break;
+ case 't':
+ runtime = atoi(optarg);
+ break;
+ default:
+ printf("Usage: %s [-s] [-t <secs>]\n", argv[0]);
+ printf(" -s: skip sanity checks\n");
+ printf(" -t: Number of seconds to run\n");
+ exit(-1);
+ }
+ }
+
get_cur_clocksource(orig_clk, 512);
count = get_clocksources(clocksource_list);
@@ -134,24 +153,26 @@ int main(int argv, char **argc)
return -1;
}
- /* Check everything is sane before we start switching asyncrhonously */
- for (i = 0; i < count; i++) {
- printf("Validating clocksource %s\n", clocksource_list[i]);
- if (change_clocksource(clocksource_list[i])) {
- status = -1;
- goto out;
- }
- if (run_tests(5)) {
- status = -1;
- goto out;
+ /* Check everything is sane before we start switching asynchronously */
+ if (do_sanity_check) {
+ for (i = 0; i < count; i++) {
+ printf("Validating clocksource %s\n",
+ clocksource_list[i]);
+ if (change_clocksource(clocksource_list[i])) {
+ status = -1;
+ goto out;
+ }
+ if (run_tests(5)) {
+ status = -1;
+ goto out;
+ }
}
}
-
printf("Running Asynchronous Switching Tests...\n");
pid = fork();
if (!pid)
- return run_tests(60);
+ return run_tests(runtime);
while (pid != waitpid(pid, &status, WNOHANG))
for (i = 0; i < count; i++)
@@ -162,7 +183,9 @@ int main(int argv, char **argc)
out:
change_clocksource(orig_clk);
- if (status)
- return ksft_exit_fail();
- return ksft_exit_pass();
+ /* Print at the end to not mix output with child process */
+ ksft_print_header();
+ ksft_set_plan(1);
+ ksft_test_result(!status, "clocksource-switch\n");
+ ksft_exit(!status);
}
diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c
index 022d3ffe3fbf..36a49fba6c9b 100644
--- a/tools/testing/selftests/timers/inconsistency-check.c
+++ b/tools/testing/selftests/timers/inconsistency-check.c
@@ -72,7 +72,7 @@ char *clockstring(int clockid)
return "CLOCK_BOOTTIME_ALARM";
case CLOCK_TAI:
return "CLOCK_TAI";
- };
+ }
return "UNKNOWN_CLOCKID";
}
@@ -122,30 +122,28 @@ int consistency_test(int clock_type, unsigned long seconds)
if (inconsistent >= 0) {
unsigned long long delta;
- printf("\%s\n", start_str);
+ ksft_print_msg("\%s\n", start_str);
for (i = 0; i < CALLS_PER_LOOP; i++) {
if (i == inconsistent)
- printf("--------------------\n");
- printf("%lu:%lu\n", list[i].tv_sec,
+ ksft_print_msg("--------------------\n");
+ ksft_print_msg("%lu:%lu\n", list[i].tv_sec,
list[i].tv_nsec);
if (i == inconsistent + 1)
- printf("--------------------\n");
+ ksft_print_msg("--------------------\n");
}
delta = list[inconsistent].tv_sec * NSEC_PER_SEC;
delta += list[inconsistent].tv_nsec;
delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC;
delta -= list[inconsistent+1].tv_nsec;
- printf("Delta: %llu ns\n", delta);
+ ksft_print_msg("Delta: %llu ns\n", delta);
fflush(0);
/* timestamp inconsistency*/
t = time(0);
- printf("%s\n", ctime(&t));
- printf("[FAILED]\n");
+ ksft_print_msg("%s\n", ctime(&t));
return -1;
}
now = list[0].tv_sec;
}
- printf("[OK]\n");
return 0;
}
@@ -178,16 +176,22 @@ int main(int argc, char *argv[])
setbuf(stdout, NULL);
+ ksft_print_header();
+ ksft_set_plan(maxclocks - userclock);
+
for (clockid = userclock; clockid < maxclocks; clockid++) {
- if (clockid == CLOCK_HWSPECIFIC)
+ if (clockid == CLOCK_HWSPECIFIC || clock_gettime(clockid, &ts)) {
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
continue;
+ }
- if (!clock_gettime(clockid, &ts)) {
- printf("Consistent %-30s ", clockstring(clockid));
- if (consistency_test(clockid, runtime))
- return ksft_exit_fail();
+ if (consistency_test(clockid, runtime)) {
+ ksft_test_result_fail("%-31s\n", clockstring(clockid));
+ ksft_exit_fail();
+ } else {
+ ksft_test_result_pass("%-31s\n", clockstring(clockid));
}
}
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c
index 19e46ed5dfb5..23eb398c8140 100644
--- a/tools/testing/selftests/timers/leap-a-day.c
+++ b/tools/testing/selftests/timers/leap-a-day.c
@@ -5,7 +5,7 @@
* Licensed under the GPLv2
*
* This test signals the kernel to insert a leap second
- * every day at midnight GMT. This allows for stessing the
+ * every day at midnight GMT. This allows for stressing the
* kernel's leap-second behavior, as well as how well applications
* handle the leap-second discontinuity.
*
diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c
index dc80728ed191..f70802c5dd0d 100644
--- a/tools/testing/selftests/timers/leapcrash.c
+++ b/tools/testing/selftests/timers/leapcrash.c
@@ -4,10 +4,10 @@
* (C) Copyright 2013, 2015 Linaro Limited
* Licensed under the GPL
*
- * This test demonstrates leapsecond deadlock that is possibe
+ * This test demonstrates leapsecond deadlock that is possible
* on kernels from 2.6.26 to 3.3.
*
- * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA
+ * WARNING: THIS WILL LIKELY HARD HANG SYSTEMS AND MAY LOSE DATA
* RUN AT YOUR OWN RISK!
* To build:
* $ gcc leapcrash.c -o leapcrash -lrt
diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c
index 71b5441c2fd9..df1d03516e7b 100644
--- a/tools/testing/selftests/timers/nanosleep.c
+++ b/tools/testing/selftests/timers/nanosleep.c
@@ -133,33 +133,37 @@ int main(int argc, char **argv)
long long length;
int clockid, ret;
+ ksft_print_header();
+ ksft_set_plan(NR_CLOCKIDS);
+
for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
/* Skip cputime clockids since nanosleep won't increment cputime */
if (clockid == CLOCK_PROCESS_CPUTIME_ID ||
clockid == CLOCK_THREAD_CPUTIME_ID ||
- clockid == CLOCK_HWSPECIFIC)
+ clockid == CLOCK_HWSPECIFIC) {
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
continue;
+ }
- printf("Nanosleep %-31s ", clockstring(clockid));
fflush(stdout);
length = 10;
while (length <= (NSEC_PER_SEC * 10)) {
ret = nanosleep_test(clockid, length);
if (ret == UNSUPPORTED) {
- printf("[UNSUPPORTED]\n");
+ ksft_test_result_skip("%-31s\n", clockstring(clockid));
goto next;
}
if (ret < 0) {
- printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_test_result_fail("%-31s\n", clockstring(clockid));
+ ksft_exit_fail();
}
length *= 100;
}
- printf("[OK]\n");
+ ksft_test_result_pass("%-31s\n", clockstring(clockid));
next:
ret = 0;
}
- return ksft_exit_pass();
+ ksft_exit_pass();
}
diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c
index eb3e79ed7b4a..edb5acacf214 100644
--- a/tools/testing/selftests/timers/nsleep-lat.c
+++ b/tools/testing/selftests/timers/nsleep-lat.c
@@ -118,7 +118,7 @@ int nanosleep_lat_test(int clockid, long long ns)
clock_gettime(clockid, &end);
if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) {
- printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
+ ksft_print_msg("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns);
return -1;
}
@@ -132,20 +132,23 @@ int nanosleep_lat_test(int clockid, long long ns)
}
if (latency/count > UNRESONABLE_LATENCY) {
- printf("Large abs latency: %lld ns :", latency/count);
+ ksft_print_msg("Large abs latency: %lld ns :", latency/count);
return -1;
}
return 0;
}
-
+#define SKIPPED_CLOCK_COUNT 3
int main(int argc, char **argv)
{
long long length;
int clockid, ret;
+ ksft_print_header();
+ ksft_set_plan(NR_CLOCKIDS - CLOCK_REALTIME - SKIPPED_CLOCK_COUNT);
+
for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) {
/* Skip cputime clockids since nanosleep won't increment cputime */
@@ -154,9 +157,6 @@ int main(int argc, char **argv)
clockid == CLOCK_HWSPECIFIC)
continue;
- printf("nsleep latency %-26s ", clockstring(clockid));
- fflush(stdout);
-
length = 10;
while (length <= (NSEC_PER_SEC * 10)) {
ret = nanosleep_lat_test(clockid, length);
@@ -167,14 +167,12 @@ int main(int argc, char **argv)
}
if (ret == UNSUPPORTED) {
- printf("[UNSUPPORTED]\n");
- continue;
- }
- if (ret < 0) {
- printf("[FAILED]\n");
- return ksft_exit_fail();
+ ksft_test_result_skip("%s\n", clockstring(clockid));
+ } else {
+ ksft_test_result(ret >= 0, "%s\n",
+ clockstring(clockid));
}
- printf("[OK]\n");
}
- return ksft_exit_pass();
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c
index 0ba500056e63..d49dd3ffd0d9 100644
--- a/tools/testing/selftests/timers/posix_timers.c
+++ b/tools/testing/selftests/timers/posix_timers.c
@@ -76,22 +76,21 @@ static int check_diff(struct timeval start, struct timeval end)
static int check_itimer(int which)
{
+ const char *name;
int err;
struct timeval start, end;
struct itimerval val = {
.it_value.tv_sec = DELAY,
};
- printf("Check itimer ");
-
if (which == ITIMER_VIRTUAL)
- printf("virtual... ");
+ name = "ITIMER_VIRTUAL";
else if (which == ITIMER_PROF)
- printf("prof... ");
+ name = "ITIMER_PROF";
else if (which == ITIMER_REAL)
- printf("real... ");
-
- fflush(stdout);
+ name = "ITIMER_REAL";
+ else
+ return -1;
done = 0;
@@ -104,13 +103,13 @@ static int check_itimer(int which)
err = gettimeofday(&start, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
err = setitimer(which, &val, NULL);
if (err < 0) {
- perror("Can't set timer\n");
+ ksft_perror("Can't set timer");
return -1;
}
@@ -123,20 +122,18 @@ static int check_itimer(int which)
err = gettimeofday(&end, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
- if (!check_diff(start, end))
- printf("[OK]\n");
- else
- printf("[FAIL]\n");
+ ksft_test_result(check_diff(start, end) == 0, "%s\n", name);
return 0;
}
static int check_timer_create(int which)
{
+ const char *type;
int err;
timer_t id;
struct timeval start, end;
@@ -144,31 +141,32 @@ static int check_timer_create(int which)
.it_value.tv_sec = DELAY,
};
- printf("Check timer_create() ");
if (which == CLOCK_THREAD_CPUTIME_ID) {
- printf("per thread... ");
+ type = "thread";
} else if (which == CLOCK_PROCESS_CPUTIME_ID) {
- printf("per process... ");
+ type = "process";
+ } else {
+ ksft_print_msg("Unknown timer_create() type %d\n", which);
+ return -1;
}
- fflush(stdout);
done = 0;
err = timer_create(which, NULL, &id);
if (err < 0) {
- perror("Can't create timer\n");
+ ksft_perror("Can't create timer");
return -1;
}
signal(SIGALRM, sig_handler);
err = gettimeofday(&start, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
err = timer_settime(id, 0, &val, NULL);
if (err < 0) {
- perror("Can't set timer\n");
+ ksft_perror("Can't set timer");
return -1;
}
@@ -176,22 +174,99 @@ static int check_timer_create(int which)
err = gettimeofday(&end, NULL);
if (err < 0) {
- perror("Can't call gettimeofday()\n");
+ ksft_perror("Can't call gettimeofday()");
return -1;
}
- if (!check_diff(start, end))
- printf("[OK]\n");
- else
- printf("[FAIL]\n");
+ ksft_test_result(check_diff(start, end) == 0,
+ "timer_create() per %s\n", type);
return 0;
}
+int remain;
+__thread int got_signal;
+
+static void *distribution_thread(void *arg)
+{
+ while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+ return NULL;
+}
+
+static void distribution_handler(int nr)
+{
+ if (!__atomic_exchange_n(&got_signal, 1, __ATOMIC_RELAXED))
+ __atomic_fetch_sub(&remain, 1, __ATOMIC_RELAXED);
+}
+
+/*
+ * Test that all running threads _eventually_ receive CLOCK_PROCESS_CPUTIME_ID
+ * timer signals. This primarily tests that the kernel does not favour any one.
+ */
+static int check_timer_distribution(void)
+{
+ int err, i;
+ timer_t id;
+ const int nthreads = 10;
+ pthread_t threads[nthreads];
+ struct itimerspec val = {
+ .it_value.tv_sec = 0,
+ .it_value.tv_nsec = 1000 * 1000,
+ .it_interval.tv_sec = 0,
+ .it_interval.tv_nsec = 1000 * 1000,
+ };
+
+ remain = nthreads + 1; /* worker threads + this thread */
+ signal(SIGALRM, distribution_handler);
+ err = timer_create(CLOCK_PROCESS_CPUTIME_ID, NULL, &id);
+ if (err < 0) {
+ ksft_perror("Can't create timer");
+ return -1;
+ }
+ err = timer_settime(id, 0, &val, NULL);
+ if (err < 0) {
+ ksft_perror("Can't set timer");
+ return -1;
+ }
+
+ for (i = 0; i < nthreads; i++) {
+ err = pthread_create(&threads[i], NULL, distribution_thread,
+ NULL);
+ if (err) {
+ ksft_print_msg("Can't create thread: %s (%d)\n",
+ strerror(errno), errno);
+ return -1;
+ }
+ }
+
+ /* Wait for all threads to receive the signal. */
+ while (__atomic_load_n(&remain, __ATOMIC_RELAXED));
+
+ for (i = 0; i < nthreads; i++) {
+ err = pthread_join(threads[i], NULL);
+ if (err) {
+ ksft_print_msg("Can't join thread: %s (%d)\n",
+ strerror(errno), errno);
+ return -1;
+ }
+ }
+
+ if (timer_delete(id)) {
+ ksft_perror("Can't delete timer");
+ return -1;
+ }
+
+ ksft_test_result_pass("check_timer_distribution\n");
+ return 0;
+}
+
int main(int argc, char **argv)
{
- printf("Testing posix timers. False negative may happen on CPU execution \n");
- printf("based timers if other threads run on the CPU...\n");
+ ksft_print_header();
+ ksft_set_plan(6);
+
+ ksft_print_msg("Testing posix timers. False negative may happen on CPU execution \n");
+ ksft_print_msg("based timers if other threads run on the CPU...\n");
if (check_itimer(ITIMER_VIRTUAL) < 0)
return ksft_exit_fail();
@@ -217,5 +292,8 @@ int main(int argc, char **argv)
if (check_timer_create(CLOCK_PROCESS_CPUTIME_ID) < 0)
return ksft_exit_fail();
- return ksft_exit_pass();
+ if (check_timer_distribution() < 0)
+ return ksft_exit_fail();
+
+ ksft_finished();
}
diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c
index b41d8dd0c40c..6eba203f9da7 100644
--- a/tools/testing/selftests/timers/raw_skew.c
+++ b/tools/testing/selftests/timers/raw_skew.c
@@ -89,7 +89,7 @@ void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw)
}
}
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timespec mon, raw, start, end;
long long delta1, delta2, interval, eppm, ppm;
@@ -129,8 +129,7 @@ int main(int argv, char **argc)
printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000)));
/* Avg the two actual freq samples adjtimex gave us */
- ppm = (tx1.freq + tx2.freq) * 1000 / 2;
- ppm = (long long)tx1.freq * 1000;
+ ppm = (long long)(tx1.freq + tx2.freq) * 1000 / 2;
ppm = shift_right(ppm, 16);
printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000)));
diff --git a/tools/testing/selftests/timers/rtcpie.c b/tools/testing/selftests/timers/rtcpie.c
index 47b5bad1b393..4ef2184f1558 100644
--- a/tools/testing/selftests/timers/rtcpie.c
+++ b/tools/testing/selftests/timers/rtcpie.c
@@ -18,6 +18,8 @@
#include <stdlib.h>
#include <errno.h>
+#include "../kselftest.h"
+
/*
* This expects the new RTC class driver framework, working with
* clocks that will often not be clones of what the PC-AT had.
@@ -35,8 +37,14 @@ int main(int argc, char **argv)
switch (argc) {
case 2:
rtc = argv[1];
- /* FALLTHROUGH */
+ break;
case 1:
+ fd = open(default_rtc, O_RDONLY);
+ if (fd == -1) {
+ printf("Default RTC %s does not exist. Test Skipped!\n", default_rtc);
+ exit(KSFT_SKIP);
+ }
+ close(fd);
break;
default:
fprintf(stderr, "usage: rtctest [rtcdev] [d]\n");
diff --git a/tools/testing/selftests/timers/settings b/tools/testing/selftests/timers/settings
new file mode 100644
index 000000000000..e7b9417537fb
--- /dev/null
+++ b/tools/testing/selftests/timers/settings
@@ -0,0 +1 @@
+timeout=0
diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c
index 8066be9aff11..63913f75b384 100644
--- a/tools/testing/selftests/timers/skew_consistency.c
+++ b/tools/testing/selftests/timers/skew_consistency.c
@@ -38,7 +38,7 @@
#define NSEC_PER_SEC 1000000000LL
-int main(int argv, char **argc)
+int main(int argc, char **argv)
{
struct timex tx;
int ret, ppm;
diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c
index cf3e48919874..80aed4bf06fb 100644
--- a/tools/testing/selftests/timers/threadtest.c
+++ b/tools/testing/selftests/timers/threadtest.c
@@ -76,7 +76,7 @@ void checklist(struct timespec *list, int size)
/* The shared thread shares a global list
* that each thread fills while holding the lock.
- * This stresses clock syncronization across cpus.
+ * This stresses clock synchronization across cpus.
*/
void *shared_thread(void *arg)
{
diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c
index 5397de708d3c..48b9a803235a 100644
--- a/tools/testing/selftests/timers/valid-adjtimex.c
+++ b/tools/testing/selftests/timers/valid-adjtimex.c
@@ -40,7 +40,7 @@
#define ADJ_SETOFFSET 0x0100
#include <sys/syscall.h>
-static int clock_adjtime(clockid_t id, struct timex *tx)
+int clock_adjtime(clockid_t id, struct timex *tx)
{
return syscall(__NR_clock_adjtime, id, tx);
}
diff --git a/tools/testing/selftests/tpm2/Makefile b/tools/testing/selftests/tpm2/Makefile
index 1a5db1eb8ed5..a9bf9459fb25 100644
--- a/tools/testing/selftests/tpm2/Makefile
+++ b/tools/testing/selftests/tpm2/Makefile
@@ -1,5 +1,5 @@
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
include ../lib.mk
-TEST_PROGS := test_smoke.sh test_space.sh
+TEST_PROGS := test_smoke.sh test_space.sh test_async.sh
TEST_PROGS_EXTENDED := tpm2.py tpm2_tests.py
diff --git a/tools/testing/selftests/tpm2/settings b/tools/testing/selftests/tpm2/settings
new file mode 100644
index 000000000000..a62d2fa1275c
--- /dev/null
+++ b/tools/testing/selftests/tpm2/settings
@@ -0,0 +1 @@
+timeout=600
diff --git a/tools/testing/selftests/tpm2/test_async.sh b/tools/testing/selftests/tpm2/test_async.sh
new file mode 100755
index 000000000000..43bf5bd772fd
--- /dev/null
+++ b/tools/testing/selftests/tpm2/test_async.sh
@@ -0,0 +1,10 @@
+#!/bin/sh
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+[ -e /dev/tpm0 ] || exit $ksft_skip
+[ -e /dev/tpmrm0 ] || exit $ksft_skip
+
+python3 -m unittest -v tpm2_tests.AsyncTest
diff --git a/tools/testing/selftests/tpm2/test_smoke.sh b/tools/testing/selftests/tpm2/test_smoke.sh
index 663062701d5a..58af963e5b55 100755
--- a/tools/testing/selftests/tpm2/test_smoke.sh
+++ b/tools/testing/selftests/tpm2/test_smoke.sh
@@ -1,15 +1,9 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-[ -f /dev/tpm0 ] || exit $ksft_skip
+[ -e /dev/tpm0 ] || exit $ksft_skip
-python -m unittest -v tpm2_tests.SmokeTest
-python -m unittest -v tpm2_tests.AsyncTest
-
-CLEAR_CMD=$(which tpm2_clear)
-if [ -n $CLEAR_CMD ]; then
- tpm2_clear -T device
-fi
+python3 -m unittest -v tpm2_tests.SmokeTest
diff --git a/tools/testing/selftests/tpm2/test_space.sh b/tools/testing/selftests/tpm2/test_space.sh
index 36c9d030a1c6..04c47b13fe8a 100755
--- a/tools/testing/selftests/tpm2/test_space.sh
+++ b/tools/testing/selftests/tpm2/test_space.sh
@@ -1,9 +1,9 @@
-#!/bin/bash
+#!/bin/sh
# SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
-[ -f /dev/tpmrm0 ] || exit $ksft_skip
+[ -e /dev/tpmrm0 ] || exit $ksft_skip
-python -m unittest -v tpm2_tests.SpaceTest
+python3 -m unittest -v tpm2_tests.SpaceTest
diff --git a/tools/testing/selftests/tpm2/tpm2.py b/tools/testing/selftests/tpm2/tpm2.py
index d0fcb66a88a6..bba8cb54548e 100644
--- a/tools/testing/selftests/tpm2/tpm2.py
+++ b/tools/testing/selftests/tpm2/tpm2.py
@@ -56,6 +56,7 @@ TSS2_RESMGR_TPM_RC_LAYER = (11 << TSS2_RC_LAYER_SHIFT)
TPM2_CAP_HANDLES = 0x00000001
TPM2_CAP_COMMANDS = 0x00000002
+TPM2_CAP_PCRS = 0x00000005
TPM2_CAP_TPM_PROPERTIES = 0x00000006
TPM2_PT_FIXED = 0x100
@@ -247,14 +248,14 @@ class ProtocolError(Exception):
class AuthCommand(object):
"""TPMS_AUTH_COMMAND"""
- def __init__(self, session_handle=TPM2_RS_PW, nonce='', session_attributes=0,
- hmac=''):
+ def __init__(self, session_handle=TPM2_RS_PW, nonce=bytes(),
+ session_attributes=0, hmac=bytes()):
self.session_handle = session_handle
self.nonce = nonce
self.session_attributes = session_attributes
self.hmac = hmac
- def __str__(self):
+ def __bytes__(self):
fmt = '>I H%us B H%us' % (len(self.nonce), len(self.hmac))
return struct.pack(fmt, self.session_handle, len(self.nonce),
self.nonce, self.session_attributes, len(self.hmac),
@@ -268,11 +269,11 @@ class AuthCommand(object):
class SensitiveCreate(object):
"""TPMS_SENSITIVE_CREATE"""
- def __init__(self, user_auth='', data=''):
+ def __init__(self, user_auth=bytes(), data=bytes()):
self.user_auth = user_auth
self.data = data
- def __str__(self):
+ def __bytes__(self):
fmt = '>H%us H%us' % (len(self.user_auth), len(self.data))
return struct.pack(fmt, len(self.user_auth), self.user_auth,
len(self.data), self.data)
@@ -296,8 +297,9 @@ class Public(object):
return '>HHIH%us%usH%us' % \
(len(self.auth_policy), len(self.parameters), len(self.unique))
- def __init__(self, object_type, name_alg, object_attributes, auth_policy='',
- parameters='', unique=''):
+ def __init__(self, object_type, name_alg, object_attributes,
+ auth_policy=bytes(), parameters=bytes(),
+ unique=bytes()):
self.object_type = object_type
self.name_alg = name_alg
self.object_attributes = object_attributes
@@ -305,7 +307,7 @@ class Public(object):
self.parameters = parameters
self.unique = unique
- def __str__(self):
+ def __bytes__(self):
return struct.pack(self.__fmt(),
self.object_type,
self.name_alg,
@@ -342,8 +344,8 @@ def get_algorithm(name):
def hex_dump(d):
- d = [format(ord(x), '02x') for x in d]
- d = [d[i: i + 16] for i in xrange(0, len(d), 16)]
+ d = [format(x, '02x') for x in d]
+ d = [d[i: i + 16] for i in range(0, len(d), 16)]
d = [' '.join(x) for x in d]
d = os.linesep.join(d)
@@ -369,6 +371,10 @@ class Client:
fcntl.fcntl(self.tpm, fcntl.F_SETFL, flags)
self.tpm_poll = select.poll()
+ def __del__(self):
+ if self.tpm:
+ self.tpm.close()
+
def close(self):
self.tpm.close()
@@ -401,7 +407,7 @@ class Client:
pcrsel_len = max((i >> 3) + 1, 3)
pcrsel = [0] * pcrsel_len
pcrsel[i >> 3] = 1 << (i & 7)
- pcrsel = ''.join(map(chr, pcrsel))
+ pcrsel = ''.join(map(chr, pcrsel)).encode()
fmt = '>HII IHB%us' % (pcrsel_len)
cmd = struct.pack(fmt,
@@ -443,7 +449,7 @@ class Client:
TPM2_CC_PCR_EXTEND,
i,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
1, bank_alg, dig)
self.send_cmd(cmd)
@@ -457,7 +463,7 @@ class Client:
TPM2_RH_NULL,
TPM2_RH_NULL,
16,
- '\0' * 16,
+ ('\0' * 16).encode(),
0,
session_type,
TPM2_ALG_NULL,
@@ -472,7 +478,7 @@ class Client:
for i in pcrs:
pcr = self.read_pcr(i, bank_alg)
- if pcr == None:
+ if pcr is None:
return None
x += pcr
@@ -489,7 +495,7 @@ class Client:
pcrsel = [0] * pcrsel_len
for i in pcrs:
pcrsel[i >> 3] |= 1 << (i & 7)
- pcrsel = ''.join(map(chr, pcrsel))
+ pcrsel = ''.join(map(chr, pcrsel)).encode()
fmt = '>HII IH%usIHB3s' % ds
cmd = struct.pack(fmt,
@@ -497,7 +503,8 @@ class Client:
struct.calcsize(fmt),
TPM2_CC_POLICY_PCR,
handle,
- len(dig), str(dig),
+ len(dig),
+ bytes(dig),
1,
bank_alg,
pcrsel_len, pcrsel)
@@ -534,7 +541,7 @@ class Client:
self.send_cmd(cmd)
- def create_root_key(self, auth_value = ''):
+ def create_root_key(self, auth_value = bytes()):
attributes = \
Public.FIXED_TPM | \
Public.FIXED_PARENT | \
@@ -570,11 +577,11 @@ class Client:
TPM2_CC_CREATE_PRIMARY,
TPM2_RH_OWNER,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
len(sensitive),
- str(sensitive),
+ bytes(sensitive),
len(public),
- str(public),
+ bytes(public),
0, 0)
return struct.unpack('>I', self.send_cmd(cmd)[10:14])[0]
@@ -587,7 +594,7 @@ class Client:
attributes = 0
if not policy_dig:
attributes |= Public.USER_WITH_AUTH
- policy_dig = ''
+ policy_dig = bytes()
auth_cmd = AuthCommand()
sensitive = SensitiveCreate(user_auth=auth_value, data=data)
@@ -608,11 +615,11 @@ class Client:
TPM2_CC_CREATE,
parent_key,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
len(sensitive),
- str(sensitive),
+ bytes(sensitive),
len(public),
- str(public),
+ bytes(public),
0, 0)
rsp = self.send_cmd(cmd)
@@ -635,7 +642,7 @@ class Client:
TPM2_CC_LOAD,
parent_key,
len(auth_cmd),
- str(auth_cmd),
+ bytes(auth_cmd),
blob)
data_handle = struct.unpack('>I', self.send_cmd(cmd)[10:14])[0]
@@ -653,7 +660,7 @@ class Client:
TPM2_CC_UNSEAL,
data_handle,
len(auth_cmd),
- str(auth_cmd))
+ bytes(auth_cmd))
try:
rsp = self.send_cmd(cmd)
@@ -675,7 +682,7 @@ class Client:
TPM2_CC_DICTIONARY_ATTACK_LOCK_RESET,
TPM2_RH_LOCKOUT,
len(auth_cmd),
- str(auth_cmd))
+ bytes(auth_cmd))
self.send_cmd(cmd)
@@ -693,7 +700,7 @@ class Client:
more_data, cap, cnt = struct.unpack('>BII', rsp[:9])
rsp = rsp[9:]
- for i in xrange(0, cnt):
+ for i in range(0, cnt):
handle = struct.unpack('>I', rsp[:4])[0]
handles.append(handle)
rsp = rsp[4:]
@@ -710,3 +717,33 @@ class Client:
pt += 1
return handles
+
+ def get_cap_pcrs(self):
+ pcr_banks = {}
+
+ fmt = '>HII III'
+
+ cmd = struct.pack(fmt,
+ TPM2_ST_NO_SESSIONS,
+ struct.calcsize(fmt),
+ TPM2_CC_GET_CAPABILITY,
+ TPM2_CAP_PCRS, 0, 1)
+ rsp = self.send_cmd(cmd)[10:]
+ _, _, cnt = struct.unpack('>BII', rsp[:9])
+ rsp = rsp[9:]
+
+ # items are TPMS_PCR_SELECTION's
+ for i in range(0, cnt):
+ hash, sizeOfSelect = struct.unpack('>HB', rsp[:3])
+ rsp = rsp[3:]
+
+ pcrSelect = 0
+ if sizeOfSelect > 0:
+ pcrSelect, = struct.unpack('%ds' % sizeOfSelect,
+ rsp[:sizeOfSelect])
+ rsp = rsp[sizeOfSelect:]
+ pcrSelect = int.from_bytes(pcrSelect, byteorder='big')
+
+ pcr_banks[hash] = pcrSelect
+
+ return pcr_banks
diff --git a/tools/testing/selftests/tpm2/tpm2_tests.py b/tools/testing/selftests/tpm2/tpm2_tests.py
index 728be7c69b76..ffe98b5c8d22 100644
--- a/tools/testing/selftests/tpm2/tpm2_tests.py
+++ b/tools/testing/selftests/tpm2/tpm2_tests.py
@@ -20,22 +20,32 @@ class SmokeTest(unittest.TestCase):
self.client.close()
def test_seal_with_auth(self):
- data = 'X' * 64
- auth = 'A' * 15
+ data = ('X' * 64).encode()
+ auth = ('A' * 15).encode()
blob = self.client.seal(self.root_key, data, auth, None)
result = self.client.unseal(self.root_key, blob, auth, None)
self.assertEqual(data, result)
+ def determine_bank_alg(self, mask):
+ pcr_banks = self.client.get_cap_pcrs()
+ for bank_alg, pcrSelection in pcr_banks.items():
+ if pcrSelection & mask == mask:
+ return bank_alg
+ return None
+
def test_seal_with_policy(self):
+ bank_alg = self.determine_bank_alg(1 << 16)
+ self.assertIsNotNone(bank_alg)
+
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
- data = 'X' * 64
- auth = 'A' * 15
+ data = ('X' * 64).encode()
+ auth = ('A' * 15).encode()
pcrs = [16]
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
policy_dig = self.client.get_policy_digest(handle)
@@ -47,7 +57,7 @@ class SmokeTest(unittest.TestCase):
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -58,27 +68,31 @@ class SmokeTest(unittest.TestCase):
self.assertEqual(data, result)
def test_unseal_with_wrong_auth(self):
- data = 'X' * 64
- auth = 'A' * 20
+ data = ('X' * 64).encode()
+ auth = ('A' * 20).encode()
rc = 0
blob = self.client.seal(self.root_key, data, auth, None)
try:
- result = self.client.unseal(self.root_key, blob, auth[:-1] + 'B', None)
- except ProtocolError, e:
+ result = self.client.unseal(self.root_key, blob,
+ auth[:-1] + 'B'.encode(), None)
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_AUTH_FAIL)
def test_unseal_with_wrong_policy(self):
+ bank_alg = self.determine_bank_alg(1 << 16 | 1 << 1)
+ self.assertIsNotNone(bank_alg)
+
handle = self.client.start_auth_session(tpm2.TPM2_SE_TRIAL)
- data = 'X' * 64
- auth = 'A' * 17
+ data = ('X' * 64).encode()
+ auth = ('A' * 17).encode()
pcrs = [16]
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
policy_dig = self.client.get_policy_digest(handle)
@@ -90,13 +104,13 @@ class SmokeTest(unittest.TestCase):
# Extend first a PCR that is not part of the policy and try to unseal.
# This should succeed.
- ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
- self.client.extend_pcr(1, 'X' * ds)
+ ds = tpm2.get_digest_size(bank_alg)
+ self.client.extend_pcr(1, ('X' * ds).encode(), bank_alg=bank_alg)
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
@@ -108,18 +122,18 @@ class SmokeTest(unittest.TestCase):
# Then, extend a PCR that is part of the policy and try to unseal.
# This should fail.
- self.client.extend_pcr(16, 'X' * ds)
+ self.client.extend_pcr(16, ('X' * ds).encode(), bank_alg=bank_alg)
handle = self.client.start_auth_session(tpm2.TPM2_SE_POLICY)
rc = 0
try:
- self.client.policy_pcr(handle, pcrs)
+ self.client.policy_pcr(handle, pcrs, bank_alg=bank_alg)
self.client.policy_password(handle)
result = self.client.unseal(self.root_key, blob, auth, handle)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.client.flush_context(handle)
except:
@@ -130,13 +144,13 @@ class SmokeTest(unittest.TestCase):
def test_seal_with_too_long_auth(self):
ds = tpm2.get_digest_size(tpm2.TPM2_ALG_SHA1)
- data = 'X' * 64
- auth = 'A' * (ds + 1)
+ data = ('X' * 64).encode()
+ auth = ('A' * (ds + 1)).encode()
rc = 0
try:
blob = self.client.seal(self.root_key, data, auth, None)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_SIZE)
@@ -152,7 +166,7 @@ class SmokeTest(unittest.TestCase):
0xDEADBEEF)
self.client.send_cmd(cmd)
- except IOError, e:
+ except IOError as e:
rejected = True
except:
pass
@@ -212,7 +226,7 @@ class SmokeTest(unittest.TestCase):
self.client.tpm.write(cmd)
rsp = self.client.tpm.read()
- except IOError, e:
+ except IOError as e:
# read the response
rsp = self.client.tpm.read()
rejected = True
@@ -283,7 +297,7 @@ class SpaceTest(unittest.TestCase):
rc = 0
try:
space1.send_cmd(cmd)
- except ProtocolError, e:
+ except ProtocolError as e:
rc = e.rc
self.assertEqual(rc, tpm2.TPM2_RC_COMMAND_CODE |
@@ -301,3 +315,19 @@ class AsyncTest(unittest.TestCase):
log.debug("Calling get_cap in a NON_BLOCKING mode")
async_client.get_cap(tpm2.TPM2_CAP_HANDLES, tpm2.HR_LOADED_SESSION)
async_client.close()
+
+ def test_flush_invalid_context(self):
+ log = logging.getLogger(__name__)
+ log.debug(sys._getframe().f_code.co_name)
+
+ async_client = tpm2.Client(tpm2.Client.FLAG_SPACE | tpm2.Client.FLAG_NONBLOCK)
+ log.debug("Calling flush_context passing in an invalid handle ")
+ handle = 0x80123456
+ rc = 0
+ try:
+ async_client.flush_context(handle)
+ except OSError as e:
+ rc = e.errno
+
+ self.assertEqual(rc, 22)
+ async_client.close()
diff --git a/tools/testing/selftests/tty/.gitignore b/tools/testing/selftests/tty/.gitignore
new file mode 100644
index 000000000000..fe70462a4aad
--- /dev/null
+++ b/tools/testing/selftests/tty/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+tty_tstamp_update
diff --git a/tools/testing/selftests/tty/Makefile b/tools/testing/selftests/tty/Makefile
new file mode 100644
index 000000000000..50d7027b2ae3
--- /dev/null
+++ b/tools/testing/selftests/tty/Makefile
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS = -O2 -Wall
+TEST_GEN_PROGS := tty_tstamp_update
+
+include ../lib.mk
diff --git a/tools/testing/selftests/tty/tty_tstamp_update.c b/tools/testing/selftests/tty/tty_tstamp_update.c
new file mode 100644
index 000000000000..0ee97943dccc
--- /dev/null
+++ b/tools/testing/selftests/tty/tty_tstamp_update.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <linux/limits.h>
+
+#include "../kselftest.h"
+
+#define MIN_TTY_PATH_LEN 8
+
+static bool tty_valid(char *tty)
+{
+ if (strlen(tty) < MIN_TTY_PATH_LEN)
+ return false;
+
+ if (strncmp(tty, "/dev/tty", MIN_TTY_PATH_LEN) == 0 ||
+ strncmp(tty, "/dev/pts", MIN_TTY_PATH_LEN) == 0)
+ return true;
+
+ return false;
+}
+
+static int write_dev_tty(void)
+{
+ FILE *f;
+ int r = 0;
+
+ f = fopen("/dev/tty", "r+");
+ if (!f)
+ return -errno;
+
+ r = fprintf(f, "hello, world!\n");
+ if (r != strlen("hello, world!\n"))
+ r = -EIO;
+
+ fclose(f);
+ return r;
+}
+
+int main(int argc, char **argv)
+{
+ int r;
+ char tty[PATH_MAX] = {};
+ struct stat st1, st2;
+
+ ksft_print_header();
+ ksft_set_plan(1);
+
+ r = readlink("/proc/self/fd/0", tty, PATH_MAX);
+ if (r < 0)
+ ksft_exit_fail_msg("readlink on /proc/self/fd/0 failed: %m\n");
+
+ if (!tty_valid(tty))
+ ksft_exit_skip("invalid tty path '%s'\n", tty);
+
+ r = stat(tty, &st1);
+ if (r < 0)
+ ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+
+ /* We need to wait at least 8 seconds in order to observe timestamp change */
+ /* https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/commit/?id=fbf47635315ab308c9b58a1ea0906e711a9228de */
+ sleep(10);
+
+ r = write_dev_tty();
+ if (r < 0)
+ ksft_exit_fail_msg("failed to write to /dev/tty: %s\n",
+ strerror(-r));
+
+ r = stat(tty, &st2);
+ if (r < 0)
+ ksft_exit_fail_msg("stat failed on tty path '%s': %m\n", tty);
+
+ /* We wrote to the terminal so timestamps should have been updated */
+ if (st1.st_atim.tv_sec == st2.st_atim.tv_sec &&
+ st1.st_mtim.tv_sec == st2.st_mtim.tv_sec) {
+ ksft_test_result_fail("tty timestamps not updated\n");
+ ksft_exit_fail();
+ }
+
+ ksft_test_result_pass(
+ "timestamps of terminal '%s' updated after write to /dev/tty\n", tty);
+ return EXIT_SUCCESS;
+}
diff --git a/tools/testing/selftests/uevent/.gitignore b/tools/testing/selftests/uevent/.gitignore
new file mode 100644
index 000000000000..382afb74cd40
--- /dev/null
+++ b/tools/testing/selftests/uevent/.gitignore
@@ -0,0 +1 @@
+uevent_filtering
diff --git a/tools/testing/selftests/uevent/uevent_filtering.c b/tools/testing/selftests/uevent/uevent_filtering.c
index f83391aa42cf..dbe55f3a66f4 100644
--- a/tools/testing/selftests/uevent/uevent_filtering.c
+++ b/tools/testing/selftests/uevent/uevent_filtering.c
@@ -19,7 +19,6 @@
#include <sys/wait.h>
#include <unistd.h>
-#include "../kselftest.h"
#include "../kselftest_harness.h"
#define __DEV_FULL "/sys/devices/virtual/mem/full/uevent"
@@ -79,7 +78,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
{
int sk_fd, ret;
socklen_t sk_addr_len;
- int fret = -1, rcv_buf_sz = __UEVENT_BUFFER_SIZE;
+ int rcv_buf_sz = __UEVENT_BUFFER_SIZE;
uint64_t sync_add = 1;
struct sockaddr_nl sk_addr = { 0 }, rcv_addr = { 0 };
char buf[__UEVENT_BUFFER_SIZE] = { 0 };
@@ -122,6 +121,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
if ((size_t)sk_addr_len != sizeof(sk_addr)) {
fprintf(stderr, "Invalid socket address size\n");
+ ret = -1;
goto on_error;
}
@@ -148,11 +148,12 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
ret = write_nointr(sync_fd, &sync_add, sizeof(sync_add));
close(sync_fd);
if (ret != sizeof(sync_add)) {
+ ret = -1;
fprintf(stderr, "Failed to synchronize with parent process\n");
goto on_error;
}
- fret = 0;
+ ret = 0;
for (;;) {
ssize_t r;
@@ -188,7 +189,7 @@ static int uevent_listener(unsigned long post_flags, bool expect_uevent,
on_error:
close(sk_fd);
- return fret;
+ return ret;
}
int trigger_uevent(unsigned int times)
diff --git a/tools/testing/selftests/user_events/.gitignore b/tools/testing/selftests/user_events/.gitignore
new file mode 100644
index 000000000000..f570febd211b
--- /dev/null
+++ b/tools/testing/selftests/user_events/.gitignore
@@ -0,0 +1,4 @@
+abi_test
+dyn_test
+ftrace_test
+perf_test
diff --git a/tools/testing/selftests/user_events/Makefile b/tools/testing/selftests/user_events/Makefile
new file mode 100644
index 000000000000..10fcd0066203
--- /dev/null
+++ b/tools/testing/selftests/user_events/Makefile
@@ -0,0 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0
+CFLAGS += -Wl,-no-as-needed -Wall $(KHDR_INCLUDES)
+LDLIBS += -lrt -lpthread -lm
+
+TEST_GEN_PROGS = ftrace_test dyn_test perf_test abi_test
+
+TEST_FILES := settings
+
+include ../lib.mk
diff --git a/tools/testing/selftests/user_events/abi_test.c b/tools/testing/selftests/user_events/abi_test.c
new file mode 100644
index 000000000000..7288a05136ba
--- /dev/null
+++ b/tools/testing/selftests/user_events/abi_test.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events ABI Test Program
+ *
+ * Copyright (c) 2022 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#define _GNU_SOURCE
+#include <sched.h>
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <glob.h>
+#include <string.h>
+#include <asm/unistd.h>
+
+#include "../kselftest_harness.h"
+#include "user_events_selftests.h"
+
+const char *data_file = "/sys/kernel/tracing/user_events_data";
+const char *enable_file = "/sys/kernel/tracing/events/user_events/__abi_event/enable";
+const char *multi_dir_glob = "/sys/kernel/tracing/events/user_events_multi/__abi_event.*";
+
+static int wait_for_delete(char *dir)
+{
+ struct stat buf;
+ int i;
+
+ for (i = 0; i < 10000; ++i) {
+ if (stat(dir, &buf) == -1 && errno == ENOENT)
+ return 0;
+
+ usleep(1000);
+ }
+
+ return -1;
+}
+
+static int find_multi_event_dir(char *unique_field, char *out_dir, int dir_len)
+{
+ char path[256];
+ glob_t buf;
+ int i, ret;
+
+ ret = glob(multi_dir_glob, GLOB_ONLYDIR, NULL, &buf);
+
+ if (ret)
+ return -1;
+
+ ret = -1;
+
+ for (i = 0; i < buf.gl_pathc; ++i) {
+ FILE *fp;
+
+ snprintf(path, sizeof(path), "%s/format", buf.gl_pathv[i]);
+ fp = fopen(path, "r");
+
+ if (!fp)
+ continue;
+
+ while (fgets(path, sizeof(path), fp) != NULL) {
+ if (strstr(path, unique_field)) {
+ fclose(fp);
+ /* strscpy is not available, use snprintf */
+ snprintf(out_dir, dir_len, "%s", buf.gl_pathv[i]);
+ ret = 0;
+ goto out;
+ }
+ }
+
+ fclose(fp);
+ }
+out:
+ globfree(&buf);
+
+ return ret;
+}
+
+static bool event_exists(void)
+{
+ int fd = open(enable_file, O_RDWR);
+
+ if (fd < 0)
+ return false;
+
+ close(fd);
+
+ return true;
+}
+
+static int change_event(bool enable)
+{
+ int fd = open(enable_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ if (enable)
+ ret = write(fd, "1", 1);
+ else
+ ret = write(fd, "0", 1);
+
+ close(fd);
+
+ if (ret == 1)
+ ret = 0;
+ else
+ ret = -1;
+
+ return ret;
+}
+
+static int event_delete(void)
+{
+ int fd = open(data_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, DIAG_IOCSDEL, "__abi_event");
+
+ close(fd);
+
+ return ret;
+}
+
+static int reg_enable_multi(void *enable, int size, int bit, int flags,
+ char *args)
+{
+ struct user_reg reg = {0};
+ char full_args[512] = {0};
+ int fd = open(data_file, O_RDWR);
+ int len;
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ len = snprintf(full_args, sizeof(full_args), "__abi_event %s", args);
+
+ if (len > sizeof(full_args)) {
+ ret = -E2BIG;
+ goto out;
+ }
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)full_args;
+ reg.flags = USER_EVENT_REG_MULTI_FORMAT | flags;
+ reg.enable_bit = bit;
+ reg.enable_addr = (__u64)enable;
+ reg.enable_size = size;
+
+ ret = ioctl(fd, DIAG_IOCSREG, &reg);
+out:
+ close(fd);
+
+ return ret;
+}
+
+static int reg_enable_flags(void *enable, int size, int bit, int flags)
+{
+ struct user_reg reg = {0};
+ int fd = open(data_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__abi_event";
+ reg.flags = flags;
+ reg.enable_bit = bit;
+ reg.enable_addr = (__u64)enable;
+ reg.enable_size = size;
+
+ ret = ioctl(fd, DIAG_IOCSREG, &reg);
+
+ close(fd);
+
+ return ret;
+}
+
+static int reg_enable(void *enable, int size, int bit)
+{
+ return reg_enable_flags(enable, size, bit, 0);
+}
+
+static int reg_disable(void *enable, int bit)
+{
+ struct user_unreg reg = {0};
+ int fd = open(data_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ reg.size = sizeof(reg);
+ reg.disable_bit = bit;
+ reg.disable_addr = (__u64)enable;
+
+ ret = ioctl(fd, DIAG_IOCSUNREG, &reg);
+
+ close(fd);
+
+ return ret;
+}
+
+FIXTURE(user) {
+ int check;
+ long check_long;
+ bool umount;
+};
+
+FIXTURE_SETUP(user) {
+ USER_EVENT_FIXTURE_SETUP(return, self->umount);
+
+ change_event(false);
+ self->check = 0;
+ self->check_long = 0;
+}
+
+FIXTURE_TEARDOWN(user) {
+ USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+}
+
+TEST_F(user, enablement) {
+ /* Changes should be reflected immediately */
+ ASSERT_EQ(0, self->check);
+ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
+ ASSERT_EQ(0, change_event(true));
+ ASSERT_EQ(1, self->check);
+ ASSERT_EQ(0, change_event(false));
+ ASSERT_EQ(0, self->check);
+
+ /* Ensure kernel clears bit after disable */
+ ASSERT_EQ(0, change_event(true));
+ ASSERT_EQ(1, self->check);
+ ASSERT_EQ(0, reg_disable(&self->check, 0));
+ ASSERT_EQ(0, self->check);
+
+ /* Ensure doesn't change after unreg */
+ ASSERT_EQ(0, change_event(true));
+ ASSERT_EQ(0, self->check);
+ ASSERT_EQ(0, change_event(false));
+}
+
+TEST_F(user, flags) {
+ /* USER_EVENT_REG_PERSIST is allowed */
+ ASSERT_EQ(0, reg_enable_flags(&self->check, sizeof(int), 0,
+ USER_EVENT_REG_PERSIST));
+ ASSERT_EQ(0, reg_disable(&self->check, 0));
+
+ /* Ensure it exists after close and disable */
+ ASSERT_TRUE(event_exists());
+
+ /* Ensure we can delete it */
+ ASSERT_EQ(0, event_delete());
+
+ /* USER_EVENT_REG_MAX or above is not allowed */
+ ASSERT_EQ(-1, reg_enable_flags(&self->check, sizeof(int), 0,
+ USER_EVENT_REG_MAX));
+
+ /* Ensure it does not exist after invalid flags */
+ ASSERT_FALSE(event_exists());
+}
+
+TEST_F(user, bit_sizes) {
+ /* Allow 0-31 bits for 32-bit */
+ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
+ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 31));
+ ASSERT_NE(0, reg_enable(&self->check, sizeof(int), 32));
+ ASSERT_EQ(0, reg_disable(&self->check, 0));
+ ASSERT_EQ(0, reg_disable(&self->check, 31));
+
+#if BITS_PER_LONG == 8
+ /* Allow 0-64 bits for 64-bit */
+ ASSERT_EQ(0, reg_enable(&self->check_long, sizeof(long), 63));
+ ASSERT_NE(0, reg_enable(&self->check_long, sizeof(long), 64));
+ ASSERT_EQ(0, reg_disable(&self->check_long, 63));
+#endif
+
+ /* Disallowed sizes (everything beside 4 and 8) */
+ ASSERT_NE(0, reg_enable(&self->check, 1, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 2, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 3, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 5, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 6, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 7, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 9, 0));
+ ASSERT_NE(0, reg_enable(&self->check, 128, 0));
+}
+
+TEST_F(user, multi_format) {
+ char first_dir[256];
+ char second_dir[256];
+ struct stat buf;
+
+ /* Multiple formats for the same name should work */
+ ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 0,
+ 0, "u32 multi_first"));
+
+ ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 1,
+ 0, "u64 multi_second"));
+
+ /* Same name with same format should also work */
+ ASSERT_EQ(0, reg_enable_multi(&self->check, sizeof(int), 2,
+ 0, "u64 multi_second"));
+
+ ASSERT_EQ(0, find_multi_event_dir("multi_first",
+ first_dir, sizeof(first_dir)));
+
+ ASSERT_EQ(0, find_multi_event_dir("multi_second",
+ second_dir, sizeof(second_dir)));
+
+ /* Should not be found in the same dir */
+ ASSERT_NE(0, strcmp(first_dir, second_dir));
+
+ /* First dir should still exist */
+ ASSERT_EQ(0, stat(first_dir, &buf));
+
+ /* Disabling first register should remove first dir */
+ ASSERT_EQ(0, reg_disable(&self->check, 0));
+ ASSERT_EQ(0, wait_for_delete(first_dir));
+
+ /* Second dir should still exist */
+ ASSERT_EQ(0, stat(second_dir, &buf));
+
+ /* Disabling second register should remove second dir */
+ ASSERT_EQ(0, reg_disable(&self->check, 1));
+ /* Ensure bit 1 and 2 are tied together, should not delete yet */
+ ASSERT_EQ(0, stat(second_dir, &buf));
+ ASSERT_EQ(0, reg_disable(&self->check, 2));
+ ASSERT_EQ(0, wait_for_delete(second_dir));
+}
+
+TEST_F(user, forks) {
+ int i;
+
+ /* Ensure COW pages get updated after fork */
+ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
+ ASSERT_EQ(0, self->check);
+
+ if (fork() == 0) {
+ /* Force COW */
+ self->check = 0;
+
+ /* Up to 1 sec for enablement */
+ for (i = 0; i < 10; ++i) {
+ usleep(100000);
+
+ if (self->check)
+ exit(0);
+ }
+
+ exit(1);
+ }
+
+ /* Allow generous time for COW, then enable */
+ usleep(100000);
+ ASSERT_EQ(0, change_event(true));
+
+ ASSERT_NE(-1, wait(&i));
+ ASSERT_EQ(0, WEXITSTATUS(i));
+
+ /* Ensure child doesn't disable parent */
+ if (fork() == 0)
+ exit(reg_disable(&self->check, 0));
+
+ ASSERT_NE(-1, wait(&i));
+ ASSERT_EQ(0, WEXITSTATUS(i));
+ ASSERT_EQ(1, self->check);
+ ASSERT_EQ(0, change_event(false));
+ ASSERT_EQ(0, self->check);
+}
+
+/* Waits up to 1 sec for enablement */
+static int clone_check(void *check)
+{
+ int i;
+
+ for (i = 0; i < 10; ++i) {
+ usleep(100000);
+
+ if (*(int *)check)
+ return 0;
+ }
+
+ return 1;
+}
+
+TEST_F(user, clones) {
+ int i, stack_size = 4096;
+ void *stack = mmap(NULL, stack_size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK,
+ -1, 0);
+
+ ASSERT_NE(MAP_FAILED, stack);
+ ASSERT_EQ(0, reg_enable(&self->check, sizeof(int), 0));
+ ASSERT_EQ(0, self->check);
+
+ /* Shared VM should see enablements */
+ ASSERT_NE(-1, clone(&clone_check, stack + stack_size,
+ CLONE_VM | SIGCHLD, &self->check));
+
+ ASSERT_EQ(0, change_event(true));
+ ASSERT_NE(-1, wait(&i));
+ ASSERT_EQ(0, WEXITSTATUS(i));
+ munmap(stack, stack_size);
+ ASSERT_EQ(0, change_event(false));
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/user_events/config b/tools/testing/selftests/user_events/config
new file mode 100644
index 000000000000..64f7a9a90cec
--- /dev/null
+++ b/tools/testing/selftests/user_events/config
@@ -0,0 +1 @@
+CONFIG_USER_EVENTS=y
diff --git a/tools/testing/selftests/user_events/dyn_test.c b/tools/testing/selftests/user_events/dyn_test.c
new file mode 100644
index 000000000000..bdf9ab127488
--- /dev/null
+++ b/tools/testing/selftests/user_events/dyn_test.c
@@ -0,0 +1,294 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events Dyn Events Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+#include "user_events_selftests.h"
+
+const char *dyn_file = "/sys/kernel/tracing/dynamic_events";
+const char *abi_file = "/sys/kernel/tracing/user_events_data";
+const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
+
+static int event_delete(void)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret;
+
+ if (fd < 0)
+ return -1;
+
+ ret = ioctl(fd, DIAG_IOCSDEL, "__test_event");
+
+ close(fd);
+
+ return ret;
+}
+
+static bool wait_for_delete(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
+static int reg_event(int fd, int *check, int bit, const char *value)
+{
+ struct user_reg reg = {0};
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)value;
+ reg.enable_bit = bit;
+ reg.enable_addr = (__u64)check;
+ reg.enable_size = sizeof(*check);
+
+ if (ioctl(fd, DIAG_IOCSREG, &reg) == -1)
+ return -1;
+
+ return 0;
+}
+
+static int unreg_event(int fd, int *check, int bit)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = bit;
+ unreg.disable_addr = (__u64)check;
+
+ return ioctl(fd, DIAG_IOCSUNREG, &unreg);
+}
+
+static int parse_dyn(const char *value)
+{
+ int fd = open(dyn_file, O_RDWR | O_APPEND);
+ int len = strlen(value);
+ int ret;
+
+ if (fd == -1)
+ return -1;
+
+ ret = write(fd, value, len);
+
+ if (ret == len)
+ ret = 0;
+ else
+ ret = -1;
+
+ close(fd);
+
+ if (ret == 0)
+ event_delete();
+
+ return ret;
+}
+
+static int parse_abi(int *check, const char *value)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret;
+
+ if (fd == -1)
+ return -1;
+
+ /* Until we have persist flags via dynamic events, use the base name */
+ if (value[0] != 'u' || value[1] != ':') {
+ close(fd);
+ return -1;
+ }
+
+ ret = reg_event(fd, check, 31, value + 2);
+
+ if (ret != -1) {
+ if (unreg_event(fd, check, 31) == -1)
+ printf("WARN: Couldn't unreg event\n");
+ }
+
+ close(fd);
+
+ return ret;
+}
+
+static int parse(int *check, const char *value)
+{
+ int abi_ret = parse_abi(check, value);
+ int dyn_ret = parse_dyn(value);
+
+ /* Ensure both ABI and DYN parse the same way */
+ if (dyn_ret != abi_ret)
+ return -1;
+
+ return dyn_ret;
+}
+
+static int check_match(int *check, const char *first, const char *second, bool *match)
+{
+ int fd = open(abi_file, O_RDWR);
+ int ret = -1;
+
+ if (fd == -1)
+ return -1;
+
+ if (reg_event(fd, check, 31, first) == -1)
+ goto cleanup;
+
+ if (reg_event(fd, check, 30, second) == -1) {
+ if (errno == EADDRINUSE) {
+ /* Name is in use, with different fields */
+ *match = false;
+ ret = 0;
+ }
+
+ goto cleanup;
+ }
+
+ *match = true;
+ ret = 0;
+cleanup:
+ unreg_event(fd, check, 31);
+ unreg_event(fd, check, 30);
+
+ close(fd);
+
+ wait_for_delete();
+
+ return ret;
+}
+
+#define TEST_MATCH(x, y) \
+do { \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(true, match); \
+} while (0)
+
+#define TEST_NMATCH(x, y) \
+do { \
+ bool match; \
+ ASSERT_NE(-1, check_match(&self->check, x, y, &match)); \
+ ASSERT_EQ(false, match); \
+} while (0)
+
+#define TEST_PARSE(x) ASSERT_NE(-1, parse(&self->check, x))
+
+#define TEST_NPARSE(x) ASSERT_EQ(-1, parse(&self->check, x))
+
+FIXTURE(user) {
+ int check;
+ bool umount;
+};
+
+FIXTURE_SETUP(user) {
+ USER_EVENT_FIXTURE_SETUP(return, self->umount);
+}
+
+FIXTURE_TEARDOWN(user) {
+ USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
+ wait_for_delete();
+}
+
+TEST_F(user, basic_types) {
+ /* All should work */
+ TEST_PARSE("u:__test_event u64 a");
+ TEST_PARSE("u:__test_event u32 a");
+ TEST_PARSE("u:__test_event u16 a");
+ TEST_PARSE("u:__test_event u8 a");
+ TEST_PARSE("u:__test_event char a");
+ TEST_PARSE("u:__test_event unsigned char a");
+ TEST_PARSE("u:__test_event int a");
+ TEST_PARSE("u:__test_event unsigned int a");
+ TEST_PARSE("u:__test_event short a");
+ TEST_PARSE("u:__test_event unsigned short a");
+ TEST_PARSE("u:__test_event char[20] a");
+ TEST_PARSE("u:__test_event unsigned char[20] a");
+ TEST_PARSE("u:__test_event char[0x14] a");
+ TEST_PARSE("u:__test_event unsigned char[0x14] a");
+ /* Bad size format should fail */
+ TEST_NPARSE("u:__test_event char[aa] a");
+ /* Large size should fail */
+ TEST_NPARSE("u:__test_event char[9999] a");
+ /* Long size string should fail */
+ TEST_NPARSE("u:__test_event char[0x0000000000001] a");
+}
+
+TEST_F(user, loc_types) {
+ /* All should work */
+ TEST_PARSE("u:__test_event __data_loc char[] a");
+ TEST_PARSE("u:__test_event __data_loc unsigned char[] a");
+ TEST_PARSE("u:__test_event __rel_loc char[] a");
+ TEST_PARSE("u:__test_event __rel_loc unsigned char[] a");
+}
+
+TEST_F(user, size_types) {
+ /* Should work */
+ TEST_PARSE("u:__test_event struct custom a 20");
+ /* Size not specified on struct should fail */
+ TEST_NPARSE("u:__test_event struct custom a");
+ /* Size specified on non-struct should fail */
+ TEST_NPARSE("u:__test_event char a 20");
+}
+
+TEST_F(user, matching) {
+ /* Single name matches */
+ TEST_MATCH("__test_event u32 a",
+ "__test_event u32 a");
+
+ /* Multiple names match */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b");
+
+ /* Multiple names match with dangling ; */
+ TEST_MATCH("__test_event u32 a; u32 b",
+ "__test_event u32 a; u32 b;");
+
+ /* Single name doesn't match */
+ TEST_NMATCH("__test_event u32 a",
+ "__test_event u32 b");
+
+ /* Multiple names don't match */
+ TEST_NMATCH("__test_event u32 a; u32 b",
+ "__test_event u32 b; u32 a");
+
+ /* Types don't match */
+ TEST_NMATCH("__test_event u64 a; u64 b",
+ "__test_event u32 a; u32 b");
+
+ /* Struct name and size matches */
+ TEST_MATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct a 20");
+
+ /* Struct name don't match */
+ TEST_NMATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct b 20");
+
+ /* Struct size don't match */
+ TEST_NMATCH("__test_event struct my_struct a 20",
+ "__test_event struct my_struct a 21");
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/user_events/ftrace_test.c b/tools/testing/selftests/user_events/ftrace_test.c
new file mode 100644
index 000000000000..dcd7509fe2e0
--- /dev/null
+++ b/tools/testing/selftests/user_events/ftrace_test.c
@@ -0,0 +1,589 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events FTrace Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/uio.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+#include "user_events_selftests.h"
+
+const char *data_file = "/sys/kernel/tracing/user_events_data";
+const char *status_file = "/sys/kernel/tracing/user_events_status";
+const char *enable_file = "/sys/kernel/tracing/events/user_events/__test_event/enable";
+const char *trace_file = "/sys/kernel/tracing/trace";
+const char *fmt_file = "/sys/kernel/tracing/events/user_events/__test_event/format";
+
+static int trace_bytes(void)
+{
+ int fd = open(trace_file, O_RDONLY);
+ char buf[256];
+ int bytes = 0, got;
+
+ if (fd == -1)
+ return -1;
+
+ while (true) {
+ got = read(fd, buf, sizeof(buf));
+
+ if (got == -1)
+ return -1;
+
+ if (got == 0)
+ break;
+
+ bytes += got;
+ }
+
+ close(fd);
+
+ return bytes;
+}
+
+static int skip_until_empty_line(FILE *fp)
+{
+ int c, last = 0;
+
+ while (true) {
+ c = getc(fp);
+
+ if (c == EOF)
+ break;
+
+ if (last == '\n' && c == '\n')
+ return 0;
+
+ last = c;
+ }
+
+ return -1;
+}
+
+static int get_print_fmt(char *buffer, int len)
+{
+ FILE *fp = fopen(fmt_file, "r");
+ char *newline;
+
+ if (!fp)
+ return -1;
+
+ /* Read until empty line (Skip Common) */
+ if (skip_until_empty_line(fp) < 0)
+ goto err;
+
+ /* Read until empty line (Skip Properties) */
+ if (skip_until_empty_line(fp) < 0)
+ goto err;
+
+ /* Read in print_fmt: */
+ if (fgets(buffer, len, fp) == NULL)
+ goto err;
+
+ newline = strchr(buffer, '\n');
+
+ if (newline)
+ *newline = '\0';
+
+ fclose(fp);
+
+ return 0;
+err:
+ fclose(fp);
+
+ return -1;
+}
+
+static bool wait_for_delete(void)
+{
+ int i;
+
+ for (i = 0; i < 1000; ++i) {
+ int fd = open(enable_file, O_RDONLY);
+
+ if (fd == -1)
+ return true;
+
+ close(fd);
+ usleep(1000);
+ }
+
+ return false;
+}
+
+static int clear(int *check)
+{
+ struct user_unreg unreg = {0};
+ int fd;
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = 31;
+ unreg.disable_addr = (__u64)check;
+
+ fd = open(data_file, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
+ if (errno != ENOENT)
+ goto fail;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1) {
+ if (errno == EBUSY) {
+ if (!wait_for_delete())
+ goto fail;
+ } else if (errno != ENOENT)
+ goto fail;
+ }
+
+ close(fd);
+
+ return 0;
+fail:
+ close(fd);
+
+ return -1;
+}
+
+static int check_print_fmt(const char *event, const char *expected, int *check)
+{
+ struct user_reg reg = {0};
+ char print_fmt[256];
+ int ret;
+ int fd;
+
+ /* Ensure cleared */
+ ret = clear(check);
+
+ if (ret != 0)
+ return ret;
+
+ fd = open(data_file, O_RDWR);
+
+ if (fd == -1)
+ return fd;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)event;
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)check;
+ reg.enable_size = sizeof(*check);
+
+ /* Register should work */
+ ret = ioctl(fd, DIAG_IOCSREG, &reg);
+
+ if (ret != 0) {
+ close(fd);
+ printf("Reg failed in fmt\n");
+ return ret;
+ }
+
+ /* Ensure correct print_fmt */
+ ret = get_print_fmt(print_fmt, sizeof(print_fmt));
+
+ close(fd);
+
+ if (ret != 0)
+ return ret;
+
+ return strcmp(print_fmt, expected);
+}
+
+FIXTURE(user) {
+ int status_fd;
+ int data_fd;
+ int enable_fd;
+ int check;
+ bool umount;
+};
+
+FIXTURE_SETUP(user) {
+ USER_EVENT_FIXTURE_SETUP(return, self->umount);
+
+ self->status_fd = open(status_file, O_RDONLY);
+ ASSERT_NE(-1, self->status_fd);
+
+ self->data_fd = open(data_file, O_RDWR);
+ ASSERT_NE(-1, self->data_fd);
+
+ self->enable_fd = -1;
+}
+
+FIXTURE_TEARDOWN(user) {
+ USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
+ close(self->status_fd);
+ close(self->data_fd);
+
+ if (self->enable_fd != -1) {
+ write(self->enable_fd, "0", sizeof("0"));
+ close(self->enable_fd);
+ }
+
+ if (clear(&self->check) != 0)
+ printf("WARNING: Clear didn't work!\n");
+}
+
+TEST_F(user, register_events) {
+ struct user_reg reg = {0};
+ struct user_unreg unreg = {0};
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = 31;
+ unreg.disable_addr = (__u64)&self->check;
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+
+ /* Multiple registers to the same addr + bit should fail */
+ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(EADDRINUSE, errno);
+
+ /* Multiple registers to same name should result in same index */
+ reg.enable_bit = 30;
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+
+ /* Multiple registers to same name but different args should fail */
+ reg.enable_bit = 29;
+ reg.name_args = (__u64)"__test_event u32 field1;";
+ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(EADDRINUSE, errno);
+
+ /* Ensure disabled */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, self->enable_fd);
+ ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
+
+ /* Enable event and ensure bits updated in status */
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Disable event and ensure bits updated in status */
+ ASSERT_NE(-1, write(self->enable_fd, "0", sizeof("0")))
+ ASSERT_EQ(0, self->check);
+
+ /* File still open should return -EBUSY for delete */
+ ASSERT_EQ(-1, ioctl(self->data_fd, DIAG_IOCSDEL, "__test_event"));
+ ASSERT_EQ(EBUSY, errno);
+
+ /* Unregister */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
+ unreg.disable_bit = 30;
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSUNREG, &unreg));
+
+ /* Delete should have been auto-done after close and unregister */
+ close(self->data_fd);
+
+ ASSERT_EQ(true, wait_for_delete());
+}
+
+TEST_F(user, write_events) {
+ struct user_reg reg = {0};
+ struct iovec io[3];
+ __u32 field1, field2;
+ int before = 0, after = 0;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ field1 = 1;
+ field2 = 2;
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+ io[1].iov_base = &field1;
+ io[1].iov_len = sizeof(field1);
+ io[2].iov_base = &field2;
+ io[2].iov_len = sizeof(field2);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Write should fail on invalid slot with ENOENT */
+ io[0].iov_base = &field2;
+ io[0].iov_len = sizeof(field2);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(ENOENT, errno);
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+
+ /* Write should return -EBADF when event is not enabled */
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EBADF, errno);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Event should now be enabled */
+ ASSERT_NE(1 << reg.enable_bit, self->check);
+
+ /* Write should make it out to ftrace buffers */
+ before = trace_bytes();
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ after = trace_bytes();
+ ASSERT_GT(after, before);
+
+ /* Negative index should fail with EINVAL */
+ reg.write_index = -1;
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EINVAL, errno);
+}
+
+TEST_F(user, write_empty_events) {
+ struct user_reg reg = {0};
+ struct iovec io[1];
+ int before = 0, after = 0;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Event should now be enabled */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Write should make it out to ftrace buffers */
+ before = trace_bytes();
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 1));
+ after = trace_bytes();
+ ASSERT_GT(after, before);
+}
+
+TEST_F(user, write_fault) {
+ struct user_reg reg = {0};
+ struct iovec io[2];
+ int l = sizeof(__u64);
+ void *anon;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event u64 anon";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ anon = mmap(NULL, l, PROT_READ, MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, anon);
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+ io[1].iov_base = anon;
+ io[1].iov_len = l;
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Write should work normally */
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
+
+ /* Faulted data should zero fill and work */
+ ASSERT_EQ(0, madvise(anon, l, MADV_DONTNEED));
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 2));
+ ASSERT_EQ(0, munmap(anon, l));
+}
+
+TEST_F(user, write_validator) {
+ struct user_reg reg = {0};
+ struct iovec io[3];
+ int loc, bytes;
+ char data[8];
+ int before = 0, after = 0;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event __rel_loc char[] data";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ io[0].iov_base = &reg.write_index;
+ io[0].iov_len = sizeof(reg.write_index);
+ io[1].iov_base = &loc;
+ io[1].iov_len = sizeof(loc);
+ io[2].iov_base = data;
+ bytes = snprintf(data, sizeof(data), "Test") + 1;
+ io[2].iov_len = bytes;
+
+ /* Undersized write should fail */
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 1));
+ ASSERT_EQ(EINVAL, errno);
+
+ /* Enable event */
+ self->enable_fd = open(enable_file, O_RDWR);
+ ASSERT_NE(-1, write(self->enable_fd, "1", sizeof("1")))
+
+ /* Event should now be enabled */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Full in-bounds write should work */
+ before = trace_bytes();
+ loc = DYN_LOC(0, bytes);
+ ASSERT_NE(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ after = trace_bytes();
+ ASSERT_GT(after, before);
+
+ /* Out of bounds write should fault (offset way out) */
+ loc = DYN_LOC(1024, bytes);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Out of bounds write should fault (offset 1 byte out) */
+ loc = DYN_LOC(1, bytes);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Out of bounds write should fault (size way out) */
+ loc = DYN_LOC(0, bytes + 1024);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Out of bounds write should fault (size 1 byte out) */
+ loc = DYN_LOC(0, bytes + 1);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EFAULT, errno);
+
+ /* Non-Null should fault */
+ memset(data, 'A', sizeof(data));
+ loc = DYN_LOC(0, bytes);
+ ASSERT_EQ(-1, writev(self->data_fd, (const struct iovec *)io, 3));
+ ASSERT_EQ(EFAULT, errno);
+}
+
+TEST_F(user, print_fmt) {
+ int ret;
+
+ ret = check_print_fmt("__test_event __rel_loc char[] data",
+ "print fmt: \"data=%s\", __get_rel_str(data)",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event __data_loc char[] data",
+ "print fmt: \"data=%s\", __get_str(data)",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event s64 data",
+ "print fmt: \"data=%lld\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event u64 data",
+ "print fmt: \"data=%llu\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event s32 data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event u32 data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event int data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event unsigned int data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event s16 data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event u16 data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event short data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event unsigned short data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event s8 data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event u8 data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event char data",
+ "print fmt: \"data=%d\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event unsigned char data",
+ "print fmt: \"data=%u\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+
+ ret = check_print_fmt("__test_event char[4] data",
+ "print fmt: \"data=%s\", REC->data",
+ &self->check);
+ ASSERT_EQ(0, ret);
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c
new file mode 100644
index 000000000000..5288e768b207
--- /dev/null
+++ b/tools/testing/selftests/user_events/perf_test.c
@@ -0,0 +1,254 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * User Events Perf Events Test Program
+ *
+ * Copyright (c) 2021 Beau Belgrave <beaub@linux.microsoft.com>
+ */
+
+#include <errno.h>
+#include <linux/user_events.h>
+#include <linux/perf_event.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <asm/unistd.h>
+
+#include "../kselftest_harness.h"
+#include "user_events_selftests.h"
+
+const char *data_file = "/sys/kernel/tracing/user_events_data";
+const char *id_file = "/sys/kernel/tracing/events/user_events/__test_event/id";
+const char *fmt_file = "/sys/kernel/tracing/events/user_events/__test_event/format";
+
+struct event {
+ __u32 index;
+ __u32 field1;
+ __u32 field2;
+};
+
+static long perf_event_open(struct perf_event_attr *pe, pid_t pid,
+ int cpu, int group_fd, unsigned long flags)
+{
+ return syscall(__NR_perf_event_open, pe, pid, cpu, group_fd, flags);
+}
+
+static int get_id(void)
+{
+ FILE *fp = fopen(id_file, "r");
+ int ret, id = 0;
+
+ if (!fp)
+ return -1;
+
+ ret = fscanf(fp, "%d", &id);
+ fclose(fp);
+
+ if (ret != 1)
+ return -1;
+
+ return id;
+}
+
+static int get_offset(void)
+{
+ FILE *fp = fopen(fmt_file, "r");
+ int ret, c, last = 0, offset = 0;
+
+ if (!fp)
+ return -1;
+
+ /* Read until empty line */
+ while (true) {
+ c = getc(fp);
+
+ if (c == EOF)
+ break;
+
+ if (last == '\n' && c == '\n')
+ break;
+
+ last = c;
+ }
+
+ ret = fscanf(fp, "\tfield:u32 field1;\toffset:%d;", &offset);
+ fclose(fp);
+
+ if (ret != 1)
+ return -1;
+
+ return offset;
+}
+
+static int clear(int *check)
+{
+ struct user_unreg unreg = {0};
+
+ unreg.size = sizeof(unreg);
+ unreg.disable_bit = 31;
+ unreg.disable_addr = (__u64)check;
+
+ int fd = open(data_file, O_RDWR);
+
+ if (fd == -1)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSUNREG, &unreg) == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ if (ioctl(fd, DIAG_IOCSDEL, "__test_event") == -1)
+ if (errno != ENOENT)
+ return -1;
+
+ close(fd);
+
+ return 0;
+}
+
+FIXTURE(user) {
+ int data_fd;
+ int check;
+ bool umount;
+};
+
+FIXTURE_SETUP(user) {
+ USER_EVENT_FIXTURE_SETUP(return, self->umount);
+
+ self->data_fd = open(data_file, O_RDWR);
+ ASSERT_NE(-1, self->data_fd);
+}
+
+FIXTURE_TEARDOWN(user) {
+ USER_EVENT_FIXTURE_TEARDOWN(self->umount);
+
+ close(self->data_fd);
+
+ if (clear(&self->check) != 0)
+ printf("WARNING: Clear didn't work!\n");
+}
+
+TEST_F(user, perf_write) {
+ struct perf_event_attr pe = {0};
+ struct user_reg reg = {0};
+ struct event event;
+ struct perf_event_mmap_page *perf_page;
+ int page_size = sysconf(_SC_PAGESIZE);
+ int id, fd, offset;
+ __u32 *val;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event u32 field1; u32 field2";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Id should be there */
+ id = get_id();
+ ASSERT_NE(-1, id);
+ offset = get_offset();
+ ASSERT_NE(-1, offset);
+
+ pe.type = PERF_TYPE_TRACEPOINT;
+ pe.size = sizeof(pe);
+ pe.config = id;
+ pe.sample_type = PERF_SAMPLE_RAW;
+ pe.sample_period = 1;
+ pe.wakeup_events = 1;
+
+ /* Tracepoint attach should work */
+ fd = perf_event_open(&pe, 0, -1, -1, 0);
+ ASSERT_NE(-1, fd);
+
+ perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, perf_page);
+
+ /* Status should be updated */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ event.index = reg.write_index;
+ event.field1 = 0xc001;
+ event.field2 = 0xc01a;
+
+ /* Ensure write shows up at correct offset */
+ ASSERT_NE(-1, write(self->data_fd, &event, sizeof(event)));
+ val = (void *)(((char *)perf_page) + perf_page->data_offset);
+ ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
+ /* Skip over header and size, move to offset */
+ val += 3;
+ val = (void *)((char *)val) + offset;
+ /* Ensure correct */
+ ASSERT_EQ(event.field1, *val++);
+ ASSERT_EQ(event.field2, *val++);
+
+ munmap(perf_page, page_size * 2);
+ close(fd);
+
+ /* Status should be updated */
+ ASSERT_EQ(0, self->check);
+}
+
+TEST_F(user, perf_empty_events) {
+ struct perf_event_attr pe = {0};
+ struct user_reg reg = {0};
+ struct perf_event_mmap_page *perf_page;
+ int page_size = sysconf(_SC_PAGESIZE);
+ int id, fd;
+ __u32 *val;
+
+ reg.size = sizeof(reg);
+ reg.name_args = (__u64)"__test_event";
+ reg.enable_bit = 31;
+ reg.enable_addr = (__u64)&self->check;
+ reg.enable_size = sizeof(self->check);
+
+ /* Register should work */
+ ASSERT_EQ(0, ioctl(self->data_fd, DIAG_IOCSREG, &reg));
+ ASSERT_EQ(0, reg.write_index);
+ ASSERT_EQ(0, self->check);
+
+ /* Id should be there */
+ id = get_id();
+ ASSERT_NE(-1, id);
+
+ pe.type = PERF_TYPE_TRACEPOINT;
+ pe.size = sizeof(pe);
+ pe.config = id;
+ pe.sample_type = PERF_SAMPLE_RAW;
+ pe.sample_period = 1;
+ pe.wakeup_events = 1;
+
+ /* Tracepoint attach should work */
+ fd = perf_event_open(&pe, 0, -1, -1, 0);
+ ASSERT_NE(-1, fd);
+
+ perf_page = mmap(NULL, page_size * 2, PROT_READ, MAP_SHARED, fd, 0);
+ ASSERT_NE(MAP_FAILED, perf_page);
+
+ /* Status should be updated */
+ ASSERT_EQ(1 << reg.enable_bit, self->check);
+
+ /* Ensure write shows up at correct offset */
+ ASSERT_NE(-1, write(self->data_fd, &reg.write_index,
+ sizeof(reg.write_index)));
+ val = (void *)(((char *)perf_page) + perf_page->data_offset);
+ ASSERT_EQ(PERF_RECORD_SAMPLE, *val);
+
+ munmap(perf_page, page_size * 2);
+ close(fd);
+
+ /* Status should be updated */
+ ASSERT_EQ(0, self->check);
+}
+
+int main(int argc, char **argv)
+{
+ return test_harness_run(argc, argv);
+}
diff --git a/tools/testing/selftests/user_events/settings b/tools/testing/selftests/user_events/settings
new file mode 100644
index 000000000000..ba4d85f74cd6
--- /dev/null
+++ b/tools/testing/selftests/user_events/settings
@@ -0,0 +1 @@
+timeout=90
diff --git a/tools/testing/selftests/user_events/user_events_selftests.h b/tools/testing/selftests/user_events/user_events_selftests.h
new file mode 100644
index 000000000000..e1c3c063c031
--- /dev/null
+++ b/tools/testing/selftests/user_events/user_events_selftests.h
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _USER_EVENTS_SELFTESTS_H
+#define _USER_EVENTS_SELFTESTS_H
+
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <unistd.h>
+#include <errno.h>
+
+#include "../kselftest.h"
+
+static inline void tracefs_unmount(void)
+{
+ umount("/sys/kernel/tracing");
+}
+
+static inline bool tracefs_enabled(char **message, bool *fail, bool *umount)
+{
+ struct stat buf;
+ int ret;
+
+ *message = "";
+ *fail = false;
+ *umount = false;
+
+ /* Ensure tracefs is installed */
+ ret = stat("/sys/kernel/tracing", &buf);
+
+ if (ret == -1) {
+ *message = "Tracefs is not installed";
+ return false;
+ }
+
+ /* Ensure mounted tracefs */
+ ret = stat("/sys/kernel/tracing/README", &buf);
+
+ if (ret == -1 && errno == ENOENT) {
+ if (mount(NULL, "/sys/kernel/tracing", "tracefs", 0, NULL) != 0) {
+ *message = "Cannot mount tracefs";
+ *fail = true;
+ return false;
+ }
+
+ *umount = true;
+
+ ret = stat("/sys/kernel/tracing/README", &buf);
+ }
+
+ if (ret == -1) {
+ *message = "Cannot access tracefs";
+ *fail = true;
+ return false;
+ }
+
+ return true;
+}
+
+static inline bool user_events_enabled(char **message, bool *fail, bool *umount)
+{
+ struct stat buf;
+ int ret;
+
+ *message = "";
+ *fail = false;
+ *umount = false;
+
+ if (getuid() != 0) {
+ *message = "Must be run as root";
+ *fail = true;
+ return false;
+ }
+
+ if (!tracefs_enabled(message, fail, umount))
+ return false;
+
+ /* Ensure user_events is installed */
+ ret = stat("/sys/kernel/tracing/user_events_data", &buf);
+
+ if (ret == -1) {
+ switch (errno) {
+ case ENOENT:
+ *message = "user_events is not installed";
+ return false;
+
+ default:
+ *message = "Cannot access user_events_data";
+ *fail = true;
+ return false;
+ }
+ }
+
+ return true;
+}
+
+#define USER_EVENT_FIXTURE_SETUP(statement, umount) do { \
+ char *message; \
+ bool fail; \
+ if (!user_events_enabled(&message, &fail, &(umount))) { \
+ if (fail) { \
+ TH_LOG("Setup failed due to: %s", message); \
+ ASSERT_FALSE(fail); \
+ } \
+ SKIP(statement, "Skipping due to: %s", message); \
+ } \
+} while (0)
+
+#define USER_EVENT_FIXTURE_TEARDOWN(umount) do { \
+ if ((umount)) \
+ tracefs_unmount(); \
+} while (0)
+
+#endif /* _USER_EVENTS_SELFTESTS_H */
diff --git a/tools/testing/selftests/vDSO/.gitignore b/tools/testing/selftests/vDSO/.gitignore
index 5eb64d41e541..a8dc51af5a9c 100644
--- a/tools/testing/selftests/vDSO/.gitignore
+++ b/tools/testing/selftests/vDSO/.gitignore
@@ -1,5 +1,8 @@
# SPDX-License-Identifier: GPL-2.0-only
vdso_test
+vdso_test_abi
+vdso_test_clock_getres
+vdso_test_correctness
vdso_test_gettimeofday
vdso_test_getcpu
vdso_standalone_test_x86
diff --git a/tools/testing/selftests/vDSO/Makefile b/tools/testing/selftests/vDSO/Makefile
index 0069f2f83f86..d53a4d8008f9 100644
--- a/tools/testing/selftests/vDSO/Makefile
+++ b/tools/testing/selftests/vDSO/Makefile
@@ -5,13 +5,16 @@ uname_M := $(shell uname -m 2>/dev/null || echo not)
ARCH ?= $(shell echo $(uname_M) | sed -e s/i.86/x86/ -e s/x86_64/x86/)
TEST_GEN_PROGS := $(OUTPUT)/vdso_test_gettimeofday $(OUTPUT)/vdso_test_getcpu
-ifeq ($(ARCH),x86)
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_abi
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_clock_getres
+ifeq ($(ARCH),$(filter $(ARCH),x86 x86_64))
TEST_GEN_PROGS += $(OUTPUT)/vdso_standalone_test_x86
endif
+TEST_GEN_PROGS += $(OUTPUT)/vdso_test_correctness
-ifndef CROSS_COMPILE
CFLAGS := -std=gnu99
CFLAGS_vdso_standalone_test_x86 := -nostdlib -fno-asynchronous-unwind-tables -fno-stack-protector
+LDFLAGS_vdso_test_correctness := -ldl
ifeq ($(CONFIG_X86_32),y)
LDLIBS += -lgcc_s
endif
@@ -19,9 +22,14 @@ endif
all: $(TEST_GEN_PROGS)
$(OUTPUT)/vdso_test_gettimeofday: parse_vdso.c vdso_test_gettimeofday.c
$(OUTPUT)/vdso_test_getcpu: parse_vdso.c vdso_test_getcpu.c
+$(OUTPUT)/vdso_test_abi: parse_vdso.c vdso_test_abi.c
+$(OUTPUT)/vdso_test_clock_getres: vdso_test_clock_getres.c
$(OUTPUT)/vdso_standalone_test_x86: vdso_standalone_test_x86.c parse_vdso.c
$(CC) $(CFLAGS) $(CFLAGS_vdso_standalone_test_x86) \
vdso_standalone_test_x86.c parse_vdso.c \
-o $@
-
-endif
+$(OUTPUT)/vdso_test_correctness: vdso_test_correctness.c
+ $(CC) $(CFLAGS) \
+ vdso_test_correctness.c \
+ -o $@ \
+ $(LDFLAGS_vdso_test_correctness)
diff --git a/tools/testing/selftests/vDSO/vdso_config.h b/tools/testing/selftests/vDSO/vdso_config.h
new file mode 100644
index 000000000000..cdfed403ba13
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_config.h
@@ -0,0 +1,90 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * vdso_config.h: Configuration options for vDSO tests.
+ * Copyright (c) 2019 Arm Ltd.
+ */
+#ifndef __VDSO_CONFIG_H__
+#define __VDSO_CONFIG_H__
+
+/*
+ * Each architecture exports its vDSO implementation with different names
+ * and a different version from the others, so we need to handle it as a
+ * special case.
+ */
+#if defined(__arm__)
+#define VDSO_VERSION 0
+#define VDSO_NAMES 1
+#define VDSO_32BIT 1
+#elif defined(__aarch64__)
+#define VDSO_VERSION 3
+#define VDSO_NAMES 0
+#elif defined(__powerpc__)
+#define VDSO_VERSION 1
+#define VDSO_NAMES 0
+#define VDSO_32BIT 1
+#elif defined(__powerpc64__)
+#define VDSO_VERSION 1
+#define VDSO_NAMES 0
+#elif defined (__s390__)
+#define VDSO_VERSION 2
+#define VDSO_NAMES 0
+#define VDSO_32BIT 1
+#elif defined (__s390X__)
+#define VDSO_VERSION 2
+#define VDSO_NAMES 0
+#elif defined(__mips__)
+#define VDSO_VERSION 0
+#define VDSO_NAMES 1
+#define VDSO_32BIT 1
+#elif defined(__sparc__)
+#define VDSO_VERSION 0
+#define VDSO_NAMES 1
+#define VDSO_32BIT 1
+#elif defined(__i386__)
+#define VDSO_VERSION 0
+#define VDSO_NAMES 1
+#define VDSO_32BIT 1
+#elif defined(__x86_64__)
+#define VDSO_VERSION 0
+#define VDSO_NAMES 1
+#elif defined(__riscv__) || defined(__riscv)
+#define VDSO_VERSION 5
+#define VDSO_NAMES 1
+#if __riscv_xlen == 32
+#define VDSO_32BIT 1
+#endif
+#endif
+
+static const char *versions[6] = {
+ "LINUX_2.6",
+ "LINUX_2.6.15",
+ "LINUX_2.6.29",
+ "LINUX_2.6.39",
+ "LINUX_4",
+ "LINUX_4.15",
+};
+
+static const char *names[2][6] = {
+ {
+ "__kernel_gettimeofday",
+ "__kernel_clock_gettime",
+ "__kernel_time",
+ "__kernel_clock_getres",
+ "__kernel_getcpu",
+#if defined(VDSO_32BIT)
+ "__kernel_clock_gettime64",
+#endif
+ },
+ {
+ "__vdso_gettimeofday",
+ "__vdso_clock_gettime",
+ "__vdso_time",
+ "__vdso_clock_getres",
+ "__vdso_getcpu",
+#if defined(VDSO_32BIT)
+ "__vdso_clock_gettime64",
+#endif
+ },
+};
+
+#endif /* __VDSO_CONFIG_H__ */
diff --git a/tools/testing/selftests/vDSO/vdso_test_abi.c b/tools/testing/selftests/vDSO/vdso_test_abi.c
new file mode 100644
index 000000000000..96d32fd65b42
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_abi.c
@@ -0,0 +1,239 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * vdso_full_test.c: Sample code to test all the timers.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_full_test.c parse_vdso.c
+ *
+ */
+
+#include <stdint.h>
+#include <elf.h>
+#include <stdio.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/time.h>
+#define _GNU_SOURCE
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+#include "vdso_config.h"
+
+extern void *vdso_sym(const char *version, const char *name);
+extern void vdso_init_from_sysinfo_ehdr(uintptr_t base);
+extern void vdso_init_from_auxv(void *auxv);
+
+static const char *version;
+static const char **name;
+
+typedef long (*vdso_gettimeofday_t)(struct timeval *tv, struct timezone *tz);
+typedef long (*vdso_clock_gettime_t)(clockid_t clk_id, struct timespec *ts);
+typedef long (*vdso_clock_getres_t)(clockid_t clk_id, struct timespec *ts);
+typedef time_t (*vdso_time_t)(time_t *t);
+
+const char *vdso_clock_name[12] = {
+ "CLOCK_REALTIME",
+ "CLOCK_MONOTONIC",
+ "CLOCK_PROCESS_CPUTIME_ID",
+ "CLOCK_THREAD_CPUTIME_ID",
+ "CLOCK_MONOTONIC_RAW",
+ "CLOCK_REALTIME_COARSE",
+ "CLOCK_MONOTONIC_COARSE",
+ "CLOCK_BOOTTIME",
+ "CLOCK_REALTIME_ALARM",
+ "CLOCK_BOOTTIME_ALARM",
+ "CLOCK_SGI_CYCLE",
+ "CLOCK_TAI",
+};
+
+static void vdso_test_gettimeofday(void)
+{
+ /* Find gettimeofday. */
+ vdso_gettimeofday_t vdso_gettimeofday =
+ (vdso_gettimeofday_t)vdso_sym(version, name[0]);
+
+ if (!vdso_gettimeofday) {
+ ksft_print_msg("Couldn't find %s\n", name[0]);
+ ksft_test_result_skip("%s\n", name[0]);
+ return;
+ }
+
+ struct timeval tv;
+ long ret = vdso_gettimeofday(&tv, 0);
+
+ if (ret == 0) {
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)tv.tv_sec, (long long)tv.tv_usec);
+ ksft_test_result_pass("%s\n", name[0]);
+ } else {
+ ksft_test_result_fail("%s\n", name[0]);
+ }
+}
+
+static void vdso_test_clock_gettime(clockid_t clk_id)
+{
+ /* Find clock_gettime. */
+ vdso_clock_gettime_t vdso_clock_gettime =
+ (vdso_clock_gettime_t)vdso_sym(version, name[1]);
+
+ if (!vdso_clock_gettime) {
+ ksft_print_msg("Couldn't find %s\n", name[1]);
+ ksft_test_result_skip("%s %s\n", name[1],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct timespec ts;
+ long ret = vdso_clock_gettime(clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The time is %lld.%06lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ ksft_test_result_pass("%s %s\n", name[1],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_fail("%s %s\n", name[1],
+ vdso_clock_name[clk_id]);
+ }
+}
+
+static void vdso_test_time(void)
+{
+ /* Find time. */
+ vdso_time_t vdso_time =
+ (vdso_time_t)vdso_sym(version, name[2]);
+
+ if (!vdso_time) {
+ ksft_print_msg("Couldn't find %s\n", name[2]);
+ ksft_test_result_skip("%s\n", name[2]);
+ return;
+ }
+
+ long ret = vdso_time(NULL);
+
+ if (ret > 0) {
+ ksft_print_msg("The time in hours since January 1, 1970 is %lld\n",
+ (long long)(ret / 3600));
+ ksft_test_result_pass("%s\n", name[2]);
+ } else {
+ ksft_test_result_fail("%s\n", name[2]);
+ }
+}
+
+static void vdso_test_clock_getres(clockid_t clk_id)
+{
+ int clock_getres_fail = 0;
+
+ /* Find clock_getres. */
+ vdso_clock_getres_t vdso_clock_getres =
+ (vdso_clock_getres_t)vdso_sym(version, name[3]);
+
+ if (!vdso_clock_getres) {
+ ksft_print_msg("Couldn't find %s\n", name[3]);
+ ksft_test_result_skip("%s %s\n", name[3],
+ vdso_clock_name[clk_id]);
+ return;
+ }
+
+ struct timespec ts, sys_ts;
+ long ret = vdso_clock_getres(clk_id, &ts);
+
+ if (ret == 0) {
+ ksft_print_msg("The vdso resolution is %lld %lld\n",
+ (long long)ts.tv_sec, (long long)ts.tv_nsec);
+ } else {
+ clock_getres_fail++;
+ }
+
+ ret = syscall(SYS_clock_getres, clk_id, &sys_ts);
+
+ ksft_print_msg("The syscall resolution is %lld %lld\n",
+ (long long)sys_ts.tv_sec, (long long)sys_ts.tv_nsec);
+
+ if ((sys_ts.tv_sec != ts.tv_sec) || (sys_ts.tv_nsec != ts.tv_nsec))
+ clock_getres_fail++;
+
+ if (clock_getres_fail > 0) {
+ ksft_test_result_fail("%s %s\n", name[3],
+ vdso_clock_name[clk_id]);
+ } else {
+ ksft_test_result_pass("%s %s\n", name[3],
+ vdso_clock_name[clk_id]);
+ }
+}
+
+/*
+ * This function calls vdso_test_clock_gettime and vdso_test_clock_getres
+ * with different values for clock_id.
+ */
+static inline void vdso_test_clock(clockid_t clock_id)
+{
+ ksft_print_msg("clock_id: %s\n", vdso_clock_name[clock_id]);
+
+ vdso_test_clock_gettime(clock_id);
+
+ vdso_test_clock_getres(clock_id);
+}
+
+#define VDSO_TEST_PLAN 16
+
+int main(int argc, char **argv)
+{
+ unsigned long sysinfo_ehdr = getauxval(AT_SYSINFO_EHDR);
+
+ ksft_print_header();
+ ksft_set_plan(VDSO_TEST_PLAN);
+
+ if (!sysinfo_ehdr) {
+ ksft_print_msg("AT_SYSINFO_EHDR is not present!\n");
+ return KSFT_SKIP;
+ }
+
+ version = versions[VDSO_VERSION];
+ name = (const char **)&names[VDSO_NAMES];
+
+ ksft_print_msg("[vDSO kselftest] VDSO_VERSION: %s\n", version);
+
+ vdso_init_from_sysinfo_ehdr(getauxval(AT_SYSINFO_EHDR));
+
+ vdso_test_gettimeofday();
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+ vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+ vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+ vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+ vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+ vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+ vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+
+ vdso_test_time();
+
+ ksft_print_cnts();
+ return ksft_get_fail_cnt() == 0 ? KSFT_PASS : KSFT_FAIL;
+}
diff --git a/tools/testing/selftests/vDSO/vdso_test_clock_getres.c b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
new file mode 100644
index 000000000000..38d46a8bf7cb
--- /dev/null
+++ b/tools/testing/selftests/vDSO/vdso_test_clock_getres.c
@@ -0,0 +1,124 @@
+// SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note
+/*
+ * vdso_clock_getres.c: Sample code to test clock_getres.
+ * Copyright (c) 2019 Arm Ltd.
+ *
+ * Compile with:
+ * gcc -std=gnu99 vdso_clock_getres.c
+ *
+ * Tested on ARM, ARM64, MIPS32, x86 (32-bit and 64-bit),
+ * Power (32-bit and 64-bit), S390x (32-bit and 64-bit).
+ * Might work on other architectures.
+ */
+
+#define _GNU_SOURCE
+#include <elf.h>
+#include <err.h>
+#include <fcntl.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <time.h>
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/time.h>
+#include <unistd.h>
+#include <sys/syscall.h>
+
+#include "../kselftest.h"
+
+static long syscall_clock_getres(clockid_t _clkid, struct timespec *_ts)
+{
+ long ret;
+
+ ret = syscall(SYS_clock_getres, _clkid, _ts);
+
+ return ret;
+}
+
+const char *vdso_clock_name[12] = {
+ "CLOCK_REALTIME",
+ "CLOCK_MONOTONIC",
+ "CLOCK_PROCESS_CPUTIME_ID",
+ "CLOCK_THREAD_CPUTIME_ID",
+ "CLOCK_MONOTONIC_RAW",
+ "CLOCK_REALTIME_COARSE",
+ "CLOCK_MONOTONIC_COARSE",
+ "CLOCK_BOOTTIME",
+ "CLOCK_REALTIME_ALARM",
+ "CLOCK_BOOTTIME_ALARM",
+ "CLOCK_SGI_CYCLE",
+ "CLOCK_TAI",
+};
+
+/*
+ * This function calls clock_getres in vdso and by system call
+ * with different values for clock_id.
+ *
+ * Example of output:
+ *
+ * clock_id: CLOCK_REALTIME [PASS]
+ * clock_id: CLOCK_BOOTTIME [PASS]
+ * clock_id: CLOCK_TAI [PASS]
+ * clock_id: CLOCK_REALTIME_COARSE [PASS]
+ * clock_id: CLOCK_MONOTONIC [PASS]
+ * clock_id: CLOCK_MONOTONIC_RAW [PASS]
+ * clock_id: CLOCK_MONOTONIC_COARSE [PASS]
+ */
+static inline int vdso_test_clock(unsigned int clock_id)
+{
+ struct timespec x, y;
+
+ printf("clock_id: %s", vdso_clock_name[clock_id]);
+ clock_getres(clock_id, &x);
+ syscall_clock_getres(clock_id, &y);
+
+ if ((x.tv_sec != y.tv_sec) || (x.tv_nsec != y.tv_nsec)) {
+ printf(" [FAIL]\n");
+ return KSFT_FAIL;
+ }
+
+ printf(" [PASS]\n");
+ return KSFT_PASS;
+}
+
+int main(int argc, char **argv)
+{
+ int ret = 0;
+
+#if _POSIX_TIMERS > 0
+
+#ifdef CLOCK_REALTIME
+ ret += vdso_test_clock(CLOCK_REALTIME);
+#endif
+
+#ifdef CLOCK_BOOTTIME
+ ret += vdso_test_clock(CLOCK_BOOTTIME);
+#endif
+
+#ifdef CLOCK_TAI
+ ret += vdso_test_clock(CLOCK_TAI);
+#endif
+
+#ifdef CLOCK_REALTIME_COARSE
+ ret += vdso_test_clock(CLOCK_REALTIME_COARSE);
+#endif
+
+#ifdef CLOCK_MONOTONIC
+ ret += vdso_test_clock(CLOCK_MONOTONIC);
+#endif
+
+#ifdef CLOCK_MONOTONIC_RAW
+ ret += vdso_test_clock(CLOCK_MONOTONIC_RAW);
+#endif
+
+#ifdef CLOCK_MONOTONIC_COARSE
+ ret += vdso_test_clock(CLOCK_MONOTONIC_COARSE);
+#endif
+
+#endif
+ if (ret > 0)
+ return KSFT_FAIL;
+
+ return KSFT_PASS;
+}
diff --git a/tools/testing/selftests/x86/test_vdso.c b/tools/testing/selftests/vDSO/vdso_test_correctness.c
index 42052db0f870..e691a3cf1491 100644
--- a/tools/testing/selftests/x86/test_vdso.c
+++ b/tools/testing/selftests/vDSO/vdso_test_correctness.c
@@ -19,6 +19,11 @@
#include <stdbool.h>
#include <limits.h>
+#include "vdso_config.h"
+#include "../kselftest.h"
+
+static const char **name;
+
#ifndef SYS_getcpu
# ifdef __x86_64__
# define SYS_getcpu 309
@@ -27,6 +32,17 @@
# endif
#endif
+#ifndef __NR_clock_gettime64
+#define __NR_clock_gettime64 403
+#endif
+
+#ifndef __kernel_timespec
+struct __kernel_timespec {
+ long long tv_sec;
+ long long tv_nsec;
+};
+#endif
+
/* max length of lines in /proc/self/maps - anything longer is skipped here */
#define MAPS_LINE_LEN 128
@@ -36,6 +52,10 @@ typedef int (*vgettime_t)(clockid_t, struct timespec *);
vgettime_t vdso_clock_gettime;
+typedef int (*vgettime64_t)(clockid_t, struct __kernel_timespec *);
+
+vgettime64_t vdso_clock_gettime64;
+
typedef long (*vgtod_t)(struct timeval *tv, struct timezone *tz);
vgtod_t vdso_gettimeofday;
@@ -99,17 +119,23 @@ static void fill_function_pointers()
return;
}
- vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
+ vdso_getcpu = (getcpu_t)dlsym(vdso, name[4]);
if (!vdso_getcpu)
printf("Warning: failed to find getcpu in vDSO\n");
vgetcpu = (getcpu_t) vsyscall_getcpu();
- vdso_clock_gettime = (vgettime_t)dlsym(vdso, "__vdso_clock_gettime");
+ vdso_clock_gettime = (vgettime_t)dlsym(vdso, name[1]);
if (!vdso_clock_gettime)
printf("Warning: failed to find clock_gettime in vDSO\n");
- vdso_gettimeofday = (vgtod_t)dlsym(vdso, "__vdso_gettimeofday");
+#if defined(VDSO_32BIT)
+ vdso_clock_gettime64 = (vgettime64_t)dlsym(vdso, name[5]);
+ if (!vdso_clock_gettime64)
+ printf("Warning: failed to find clock_gettime64 in vDSO\n");
+#endif
+
+ vdso_gettimeofday = (vgtod_t)dlsym(vdso, name[0]);
if (!vdso_gettimeofday)
printf("Warning: failed to find gettimeofday in vDSO\n");
@@ -126,6 +152,11 @@ static inline int sys_clock_gettime(clockid_t id, struct timespec *ts)
return syscall(__NR_clock_gettime, id, ts);
}
+static inline int sys_clock_gettime64(clockid_t id, struct __kernel_timespec *ts)
+{
+ return syscall(__NR_clock_gettime64, id, ts);
+}
+
static inline int sys_gettimeofday(struct timeval *tv, struct timezone *tz)
{
return syscall(__NR_gettimeofday, tv, tz);
@@ -191,6 +222,15 @@ static bool ts_leq(const struct timespec *a, const struct timespec *b)
return a->tv_nsec <= b->tv_nsec;
}
+static bool ts64_leq(const struct __kernel_timespec *a,
+ const struct __kernel_timespec *b)
+{
+ if (a->tv_sec != b->tv_sec)
+ return a->tv_sec < b->tv_sec;
+ else
+ return a->tv_nsec <= b->tv_nsec;
+}
+
static bool tv_leq(const struct timeval *a, const struct timeval *b)
{
if (a->tv_sec != b->tv_sec)
@@ -254,7 +294,10 @@ static void test_one_clock_gettime(int clock, const char *name)
if (!ts_leq(&start, &vdso) || !ts_leq(&vdso, &end)) {
printf("[FAIL]\tTimes are out of sequence\n");
nerrs++;
+ return;
}
+
+ printf("[OK]\tTest Passed.\n");
}
static void test_clock_gettime(void)
@@ -264,10 +307,8 @@ static void test_clock_gettime(void)
return;
}
- for (int clock = 0; clock < sizeof(clocknames) / sizeof(clocknames[0]);
- clock++) {
+ for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
test_one_clock_gettime(clock, clocknames[clock]);
- }
/* Also test some invalid clock ids */
test_one_clock_gettime(-1, "invalid");
@@ -275,6 +316,68 @@ static void test_clock_gettime(void)
test_one_clock_gettime(INT_MAX, "invalid");
}
+static void test_one_clock_gettime64(int clock, const char *name)
+{
+ struct __kernel_timespec start, vdso, end;
+ int vdso_ret, end_ret;
+
+ printf("[RUN]\tTesting clock_gettime64 for clock %s (%d)...\n", name, clock);
+
+ if (sys_clock_gettime64(clock, &start) < 0) {
+ if (errno == EINVAL) {
+ vdso_ret = vdso_clock_gettime64(clock, &vdso);
+ if (vdso_ret == -EINVAL) {
+ printf("[OK]\tNo such clock.\n");
+ } else {
+ printf("[FAIL]\tNo such clock, but __vdso_clock_gettime64 returned %d\n", vdso_ret);
+ nerrs++;
+ }
+ } else {
+ printf("[WARN]\t clock_gettime64(%d) syscall returned error %d\n", clock, errno);
+ }
+ return;
+ }
+
+ vdso_ret = vdso_clock_gettime64(clock, &vdso);
+ end_ret = sys_clock_gettime64(clock, &end);
+
+ if (vdso_ret != 0 || end_ret != 0) {
+ printf("[FAIL]\tvDSO returned %d, syscall errno=%d\n",
+ vdso_ret, errno);
+ nerrs++;
+ return;
+ }
+
+ printf("\t%llu.%09lld %llu.%09lld %llu.%09lld\n",
+ (unsigned long long)start.tv_sec, start.tv_nsec,
+ (unsigned long long)vdso.tv_sec, vdso.tv_nsec,
+ (unsigned long long)end.tv_sec, end.tv_nsec);
+
+ if (!ts64_leq(&start, &vdso) || !ts64_leq(&vdso, &end)) {
+ printf("[FAIL]\tTimes are out of sequence\n");
+ nerrs++;
+ return;
+ }
+
+ printf("[OK]\tTest Passed.\n");
+}
+
+static void test_clock_gettime64(void)
+{
+ if (!vdso_clock_gettime64) {
+ printf("[SKIP]\tNo vDSO, so skipping clock_gettime64() tests\n");
+ return;
+ }
+
+ for (int clock = 0; clock < ARRAY_SIZE(clocknames); clock++)
+ test_one_clock_gettime64(clock, clocknames[clock]);
+
+ /* Also test some invalid clock ids */
+ test_one_clock_gettime64(-1, "invalid");
+ test_one_clock_gettime64(INT_MIN, "invalid");
+ test_one_clock_gettime64(INT_MAX, "invalid");
+}
+
static void test_gettimeofday(void)
{
struct timeval start, vdso, end;
@@ -327,9 +430,12 @@ static void test_gettimeofday(void)
int main(int argc, char **argv)
{
+ name = (const char **)&names[VDSO_NAMES];
+
fill_function_pointers();
test_clock_gettime();
+ test_clock_gettime64();
test_gettimeofday();
/*
diff --git a/tools/testing/selftests/vDSO/vdso_test_getcpu.c b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
index fc25ede131b8..1df5d057d79f 100644
--- a/tools/testing/selftests/vDSO/vdso_test_getcpu.c
+++ b/tools/testing/selftests/vDSO/vdso_test_getcpu.c
@@ -14,7 +14,11 @@
#include "../kselftest.h"
#include "parse_vdso.h"
+#if defined(__riscv)
+const char *version = "LINUX_4.15";
+#else
const char *version = "LINUX_2.6";
+#endif
const char *name = "__vdso_getcpu";
struct getcpu_cache;
diff --git a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
index 8ccc73ed8240..e411f287a426 100644
--- a/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
+++ b/tools/testing/selftests/vDSO/vdso_test_gettimeofday.c
@@ -27,6 +27,9 @@
#if defined(__aarch64__)
const char *version = "LINUX_2.6.39";
const char *name = "__kernel_gettimeofday";
+#elif defined(__riscv)
+const char *version = "LINUX_4.15";
+const char *name = "__vdso_gettimeofday";
#else
const char *version = "LINUX_2.6";
const char *name = "__vdso_gettimeofday";
diff --git a/tools/testing/selftests/vm/.gitignore b/tools/testing/selftests/vm/.gitignore
deleted file mode 100644
index 849e8226395a..000000000000
--- a/tools/testing/selftests/vm/.gitignore
+++ /dev/null
@@ -1,22 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-hugepage-mmap
-hugepage-shm
-khugepaged
-map_hugetlb
-map_populate
-thuge-gen
-compaction_test
-mlock2-tests
-mremap_dontunmap
-on-fault-limit
-transhuge-stress
-protection_keys
-userfaultfd
-mlock-intersect-test
-mlock-random-test
-virtual_address_range
-gup_benchmark
-va_128TBswitch
-map_fixed_noreplace
-write_to_hugetlbfs
-hmm-tests
diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile
deleted file mode 100644
index a9026706d597..000000000000
--- a/tools/testing/selftests/vm/Makefile
+++ /dev/null
@@ -1,115 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Makefile for vm selftests
-uname_M := $(shell uname -m 2>/dev/null || echo not)
-MACHINE ?= $(shell echo $(uname_M) | sed -e 's/aarch64.*/arm64/')
-
-CFLAGS = -Wall -I ../../../../usr/include $(EXTRA_CFLAGS)
-LDLIBS = -lrt
-TEST_GEN_FILES = compaction_test
-TEST_GEN_FILES += gup_benchmark
-TEST_GEN_FILES += hmm-tests
-TEST_GEN_FILES += hugepage-mmap
-TEST_GEN_FILES += hugepage-shm
-TEST_GEN_FILES += map_hugetlb
-TEST_GEN_FILES += map_fixed_noreplace
-TEST_GEN_FILES += map_populate
-TEST_GEN_FILES += mlock-random-test
-TEST_GEN_FILES += mlock2-tests
-TEST_GEN_FILES += mremap_dontunmap
-TEST_GEN_FILES += on-fault-limit
-TEST_GEN_FILES += thuge-gen
-TEST_GEN_FILES += transhuge-stress
-TEST_GEN_FILES += userfaultfd
-TEST_GEN_FILES += khugepaged
-
-ifeq ($(ARCH),x86_64)
-CAN_BUILD_I386 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./../x86/check_cc.sh $(CC) ../x86/trivial_program.c -no-pie)
-
-TARGETS := protection_keys
-BINARIES_32 := $(TARGETS:%=%_32)
-BINARIES_64 := $(TARGETS:%=%_64)
-
-ifeq ($(CAN_BUILD_WITH_NOPIE),1)
-CFLAGS += -no-pie
-endif
-
-ifeq ($(CAN_BUILD_I386),1)
-TEST_GEN_FILES += $(BINARIES_32)
-endif
-
-ifeq ($(CAN_BUILD_X86_64),1)
-TEST_GEN_FILES += $(BINARIES_64)
-endif
-else
-TEST_GEN_FILES += protection_keys
-endif
-
-ifneq (,$(filter $(MACHINE),arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64))
-TEST_GEN_FILES += va_128TBswitch
-TEST_GEN_FILES += virtual_address_range
-TEST_GEN_FILES += write_to_hugetlbfs
-endif
-
-TEST_PROGS := run_vmtests
-
-TEST_FILES := test_vmalloc.sh
-
-KSFT_KHDR_INSTALL := 1
-include ../lib.mk
-
-$(OUTPUT)/hmm-tests: LDLIBS += -lhugetlbfs -lpthread
-
-ifeq ($(ARCH),x86_64)
-BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
-BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
-
-define gen-target-rule-32
-$(1) $(1)_32: $(OUTPUT)/$(1)_32
-.PHONY: $(1) $(1)_32
-endef
-
-define gen-target-rule-64
-$(1) $(1)_64: $(OUTPUT)/$(1)_64
-.PHONY: $(1) $(1)_64
-endef
-
-ifeq ($(CAN_BUILD_I386),1)
-$(BINARIES_32): CFLAGS += -m32
-$(BINARIES_32): LDLIBS += -lrt -ldl -lm
-$(BINARIES_32): %_32: %.c
- $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
-endif
-
-ifeq ($(CAN_BUILD_X86_64),1)
-$(BINARIES_64): CFLAGS += -m64
-$(BINARIES_64): LDLIBS += -lrt -ldl
-$(BINARIES_64): %_64: %.c
- $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
-$(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
-endif
-
-# x86_64 users should be encouraged to install 32-bit libraries
-ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01)
-all: warn_32bit_failure
-
-warn_32bit_failure:
- @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \
- echo "environment. This will reduce test coverage of 64-bit" 2>&1; \
- echo "kernels. If you are using a Debian-like distribution," 2>&1; \
- echo "try:"; 2>&1; \
- echo ""; \
- echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \
- echo ""; \
- echo "If you are using a Fedora-like distribution, try:"; \
- echo ""; \
- echo " yum install glibc-devel.*i686"; \
- exit 0;
-endif
-endif
-
-$(OUTPUT)/userfaultfd: LDLIBS += -lpthread
-
-$(OUTPUT)/mlock-random-test: LDLIBS += -lcap
diff --git a/tools/testing/selftests/vm/gup_benchmark.c b/tools/testing/selftests/vm/gup_benchmark.c
deleted file mode 100644
index 43b4dfe161a2..000000000000
--- a/tools/testing/selftests/vm/gup_benchmark.c
+++ /dev/null
@@ -1,137 +0,0 @@
-#include <fcntl.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-
-#include <sys/ioctl.h>
-#include <sys/mman.h>
-#include <sys/prctl.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-
-#include <linux/types.h>
-
-#define MB (1UL << 20)
-#define PAGE_SIZE sysconf(_SC_PAGESIZE)
-
-#define GUP_FAST_BENCHMARK _IOWR('g', 1, struct gup_benchmark)
-#define GUP_LONGTERM_BENCHMARK _IOWR('g', 2, struct gup_benchmark)
-#define GUP_BENCHMARK _IOWR('g', 3, struct gup_benchmark)
-
-/* Similar to above, but use FOLL_PIN instead of FOLL_GET. */
-#define PIN_FAST_BENCHMARK _IOWR('g', 4, struct gup_benchmark)
-#define PIN_BENCHMARK _IOWR('g', 5, struct gup_benchmark)
-
-/* Just the flags we need, copied from mm.h: */
-#define FOLL_WRITE 0x01 /* check pte is writable */
-
-struct gup_benchmark {
- __u64 get_delta_usec;
- __u64 put_delta_usec;
- __u64 addr;
- __u64 size;
- __u32 nr_pages_per_call;
- __u32 flags;
- __u64 expansion[10]; /* For future use */
-};
-
-int main(int argc, char **argv)
-{
- struct gup_benchmark gup;
- unsigned long size = 128 * MB;
- int i, fd, filed, opt, nr_pages = 1, thp = -1, repeats = 1, write = 0;
- int cmd = GUP_FAST_BENCHMARK, flags = MAP_PRIVATE;
- char *file = "/dev/zero";
- char *p;
-
- while ((opt = getopt(argc, argv, "m:r:n:f:abtTLUuwSH")) != -1) {
- switch (opt) {
- case 'a':
- cmd = PIN_FAST_BENCHMARK;
- break;
- case 'b':
- cmd = PIN_BENCHMARK;
- break;
- case 'm':
- size = atoi(optarg) * MB;
- break;
- case 'r':
- repeats = atoi(optarg);
- break;
- case 'n':
- nr_pages = atoi(optarg);
- break;
- case 't':
- thp = 1;
- break;
- case 'T':
- thp = 0;
- break;
- case 'L':
- cmd = GUP_LONGTERM_BENCHMARK;
- break;
- case 'U':
- cmd = GUP_BENCHMARK;
- break;
- case 'u':
- cmd = GUP_FAST_BENCHMARK;
- break;
- case 'w':
- write = 1;
- break;
- case 'f':
- file = optarg;
- break;
- case 'S':
- flags &= ~MAP_PRIVATE;
- flags |= MAP_SHARED;
- break;
- case 'H':
- flags |= (MAP_HUGETLB | MAP_ANONYMOUS);
- break;
- default:
- return -1;
- }
- }
-
- filed = open(file, O_RDWR|O_CREAT);
- if (filed < 0) {
- perror("open");
- exit(filed);
- }
-
- gup.nr_pages_per_call = nr_pages;
- if (write)
- gup.flags |= FOLL_WRITE;
-
- fd = open("/sys/kernel/debug/gup_benchmark", O_RDWR);
- if (fd == -1)
- perror("open"), exit(1);
-
- p = mmap(NULL, size, PROT_READ | PROT_WRITE, flags, filed, 0);
- if (p == MAP_FAILED)
- perror("mmap"), exit(1);
- gup.addr = (unsigned long)p;
-
- if (thp == 1)
- madvise(p, size, MADV_HUGEPAGE);
- else if (thp == 0)
- madvise(p, size, MADV_NOHUGEPAGE);
-
- for (; (unsigned long)p < gup.addr + size; p += PAGE_SIZE)
- p[0] = 0;
-
- for (i = 0; i < repeats; i++) {
- gup.size = size;
- if (ioctl(fd, cmd, &gup))
- perror("ioctl"), exit(1);
-
- printf("Time: get:%lld put:%lld us", gup.get_delta_usec,
- gup.put_delta_usec);
- if (gup.size != size)
- printf(", truncated (size: %lld)", gup.size);
- printf("\n");
- }
-
- return 0;
-}
diff --git a/tools/testing/selftests/vm/khugepaged.c b/tools/testing/selftests/vm/khugepaged.c
deleted file mode 100644
index 8b75821302a7..000000000000
--- a/tools/testing/selftests/vm/khugepaged.c
+++ /dev/null
@@ -1,1035 +0,0 @@
-#define _GNU_SOURCE
-#include <fcntl.h>
-#include <limits.h>
-#include <signal.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdbool.h>
-#include <string.h>
-#include <unistd.h>
-
-#include <sys/mman.h>
-#include <sys/wait.h>
-
-#ifndef MADV_PAGEOUT
-#define MADV_PAGEOUT 21
-#endif
-
-#define BASE_ADDR ((void *)(1UL << 30))
-static unsigned long hpage_pmd_size;
-static unsigned long page_size;
-static int hpage_pmd_nr;
-
-#define THP_SYSFS "/sys/kernel/mm/transparent_hugepage/"
-#define PID_SMAPS "/proc/self/smaps"
-
-enum thp_enabled {
- THP_ALWAYS,
- THP_MADVISE,
- THP_NEVER,
-};
-
-static const char *thp_enabled_strings[] = {
- "always",
- "madvise",
- "never",
- NULL
-};
-
-enum thp_defrag {
- THP_DEFRAG_ALWAYS,
- THP_DEFRAG_DEFER,
- THP_DEFRAG_DEFER_MADVISE,
- THP_DEFRAG_MADVISE,
- THP_DEFRAG_NEVER,
-};
-
-static const char *thp_defrag_strings[] = {
- "always",
- "defer",
- "defer+madvise",
- "madvise",
- "never",
- NULL
-};
-
-enum shmem_enabled {
- SHMEM_ALWAYS,
- SHMEM_WITHIN_SIZE,
- SHMEM_ADVISE,
- SHMEM_NEVER,
- SHMEM_DENY,
- SHMEM_FORCE,
-};
-
-static const char *shmem_enabled_strings[] = {
- "always",
- "within_size",
- "advise",
- "never",
- "deny",
- "force",
- NULL
-};
-
-struct khugepaged_settings {
- bool defrag;
- unsigned int alloc_sleep_millisecs;
- unsigned int scan_sleep_millisecs;
- unsigned int max_ptes_none;
- unsigned int max_ptes_swap;
- unsigned int max_ptes_shared;
- unsigned long pages_to_scan;
-};
-
-struct settings {
- enum thp_enabled thp_enabled;
- enum thp_defrag thp_defrag;
- enum shmem_enabled shmem_enabled;
- bool debug_cow;
- bool use_zero_page;
- struct khugepaged_settings khugepaged;
-};
-
-static struct settings default_settings = {
- .thp_enabled = THP_MADVISE,
- .thp_defrag = THP_DEFRAG_ALWAYS,
- .shmem_enabled = SHMEM_NEVER,
- .debug_cow = 0,
- .use_zero_page = 0,
- .khugepaged = {
- .defrag = 1,
- .alloc_sleep_millisecs = 10,
- .scan_sleep_millisecs = 10,
- },
-};
-
-static struct settings saved_settings;
-static bool skip_settings_restore;
-
-static int exit_status;
-
-static void success(const char *msg)
-{
- printf(" \e[32m%s\e[0m\n", msg);
-}
-
-static void fail(const char *msg)
-{
- printf(" \e[31m%s\e[0m\n", msg);
- exit_status++;
-}
-
-static int read_file(const char *path, char *buf, size_t buflen)
-{
- int fd;
- ssize_t numread;
-
- fd = open(path, O_RDONLY);
- if (fd == -1)
- return 0;
-
- numread = read(fd, buf, buflen - 1);
- if (numread < 1) {
- close(fd);
- return 0;
- }
-
- buf[numread] = '\0';
- close(fd);
-
- return (unsigned int) numread;
-}
-
-static int write_file(const char *path, const char *buf, size_t buflen)
-{
- int fd;
- ssize_t numwritten;
-
- fd = open(path, O_WRONLY);
- if (fd == -1)
- return 0;
-
- numwritten = write(fd, buf, buflen - 1);
- close(fd);
- if (numwritten < 1)
- return 0;
-
- return (unsigned int) numwritten;
-}
-
-static int read_string(const char *name, const char *strings[])
-{
- char path[PATH_MAX];
- char buf[256];
- char *c;
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- if (!read_file(path, buf, sizeof(buf))) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-
- c = strchr(buf, '[');
- if (!c) {
- printf("%s: Parse failure\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- c++;
- memmove(buf, c, sizeof(buf) - (c - buf));
-
- c = strchr(buf, ']');
- if (!c) {
- printf("%s: Parse failure\n", __func__);
- exit(EXIT_FAILURE);
- }
- *c = '\0';
-
- ret = 0;
- while (strings[ret]) {
- if (!strcmp(strings[ret], buf))
- return ret;
- ret++;
- }
-
- printf("Failed to parse %s\n", name);
- exit(EXIT_FAILURE);
-}
-
-static void write_string(const char *name, const char *val)
-{
- char path[PATH_MAX];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- if (!write_file(path, val, strlen(val) + 1)) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-}
-
-static const unsigned long read_num(const char *name)
-{
- char path[PATH_MAX];
- char buf[21];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- ret = read_file(path, buf, sizeof(buf));
- if (ret < 0) {
- perror("read_file(read_num)");
- exit(EXIT_FAILURE);
- }
-
- return strtoul(buf, NULL, 10);
-}
-
-static void write_num(const char *name, unsigned long num)
-{
- char path[PATH_MAX];
- char buf[21];
- int ret;
-
- ret = snprintf(path, PATH_MAX, THP_SYSFS "%s", name);
- if (ret >= PATH_MAX) {
- printf("%s: Pathname is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
- sprintf(buf, "%ld", num);
- if (!write_file(path, buf, strlen(buf) + 1)) {
- perror(path);
- exit(EXIT_FAILURE);
- }
-}
-
-static void write_settings(struct settings *settings)
-{
- struct khugepaged_settings *khugepaged = &settings->khugepaged;
-
- write_string("enabled", thp_enabled_strings[settings->thp_enabled]);
- write_string("defrag", thp_defrag_strings[settings->thp_defrag]);
- write_string("shmem_enabled",
- shmem_enabled_strings[settings->shmem_enabled]);
- write_num("debug_cow", settings->debug_cow);
- write_num("use_zero_page", settings->use_zero_page);
-
- write_num("khugepaged/defrag", khugepaged->defrag);
- write_num("khugepaged/alloc_sleep_millisecs",
- khugepaged->alloc_sleep_millisecs);
- write_num("khugepaged/scan_sleep_millisecs",
- khugepaged->scan_sleep_millisecs);
- write_num("khugepaged/max_ptes_none", khugepaged->max_ptes_none);
- write_num("khugepaged/max_ptes_swap", khugepaged->max_ptes_swap);
- write_num("khugepaged/max_ptes_shared", khugepaged->max_ptes_shared);
- write_num("khugepaged/pages_to_scan", khugepaged->pages_to_scan);
-}
-
-static void restore_settings(int sig)
-{
- if (skip_settings_restore)
- goto out;
-
- printf("Restore THP and khugepaged settings...");
- write_settings(&saved_settings);
- success("OK");
- if (sig)
- exit(EXIT_FAILURE);
-out:
- exit(exit_status);
-}
-
-static void save_settings(void)
-{
- printf("Save THP and khugepaged settings...");
- saved_settings = (struct settings) {
- .thp_enabled = read_string("enabled", thp_enabled_strings),
- .thp_defrag = read_string("defrag", thp_defrag_strings),
- .shmem_enabled =
- read_string("shmem_enabled", shmem_enabled_strings),
- .debug_cow = read_num("debug_cow"),
- .use_zero_page = read_num("use_zero_page"),
- };
- saved_settings.khugepaged = (struct khugepaged_settings) {
- .defrag = read_num("khugepaged/defrag"),
- .alloc_sleep_millisecs =
- read_num("khugepaged/alloc_sleep_millisecs"),
- .scan_sleep_millisecs =
- read_num("khugepaged/scan_sleep_millisecs"),
- .max_ptes_none = read_num("khugepaged/max_ptes_none"),
- .max_ptes_swap = read_num("khugepaged/max_ptes_swap"),
- .max_ptes_shared = read_num("khugepaged/max_ptes_shared"),
- .pages_to_scan = read_num("khugepaged/pages_to_scan"),
- };
- success("OK");
-
- signal(SIGTERM, restore_settings);
- signal(SIGINT, restore_settings);
- signal(SIGHUP, restore_settings);
- signal(SIGQUIT, restore_settings);
-}
-
-static void adjust_settings(void)
-{
-
- printf("Adjust settings...");
- write_settings(&default_settings);
- success("OK");
-}
-
-#define MAX_LINE_LENGTH 500
-
-static bool check_for_pattern(FILE *fp, char *pattern, char *buf)
-{
- while (fgets(buf, MAX_LINE_LENGTH, fp) != NULL) {
- if (!strncmp(buf, pattern, strlen(pattern)))
- return true;
- }
- return false;
-}
-
-static bool check_huge(void *addr)
-{
- bool thp = false;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
-
- fp = fopen(PID_SMAPS, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
- exit(EXIT_FAILURE);
- }
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "AnonHugePages:%10ld kB",
- hpage_pmd_size >> 10);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- /*
- * Fetch the AnonHugePages: in the same block and check whether it got
- * the expected number of hugeepages next.
- */
- if (!check_for_pattern(fp, "AnonHugePages:", buffer))
- goto err_out;
-
- if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
- goto err_out;
-
- thp = true;
-err_out:
- fclose(fp);
- return thp;
-}
-
-
-static bool check_swap(void *addr, unsigned long size)
-{
- bool swap = false;
- int ret;
- FILE *fp;
- char buffer[MAX_LINE_LENGTH];
- char addr_pattern[MAX_LINE_LENGTH];
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "%08lx-",
- (unsigned long) addr);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
-
-
- fp = fopen(PID_SMAPS, "r");
- if (!fp) {
- printf("%s: Failed to open file %s\n", __func__, PID_SMAPS);
- exit(EXIT_FAILURE);
- }
- if (!check_for_pattern(fp, addr_pattern, buffer))
- goto err_out;
-
- ret = snprintf(addr_pattern, MAX_LINE_LENGTH, "Swap:%19ld kB",
- size >> 10);
- if (ret >= MAX_LINE_LENGTH) {
- printf("%s: Pattern is too long\n", __func__);
- exit(EXIT_FAILURE);
- }
- /*
- * Fetch the Swap: in the same block and check whether it got
- * the expected number of hugeepages next.
- */
- if (!check_for_pattern(fp, "Swap:", buffer))
- goto err_out;
-
- if (strncmp(buffer, addr_pattern, strlen(addr_pattern)))
- goto err_out;
-
- swap = true;
-err_out:
- fclose(fp);
- return swap;
-}
-
-static void *alloc_mapping(void)
-{
- void *p;
-
- p = mmap(BASE_ADDR, hpage_pmd_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_PRIVATE, -1, 0);
- if (p != BASE_ADDR) {
- printf("Failed to allocate VMA at %p\n", BASE_ADDR);
- exit(EXIT_FAILURE);
- }
-
- return p;
-}
-
-static void fill_memory(int *p, unsigned long start, unsigned long end)
-{
- int i;
-
- for (i = start / page_size; i < end / page_size; i++)
- p[i * page_size / sizeof(*p)] = i + 0xdead0000;
-}
-
-static void validate_memory(int *p, unsigned long start, unsigned long end)
-{
- int i;
-
- for (i = start / page_size; i < end / page_size; i++) {
- if (p[i * page_size / sizeof(*p)] != i + 0xdead0000) {
- printf("Page %d is corrupted: %#x\n",
- i, p[i * page_size / sizeof(*p)]);
- exit(EXIT_FAILURE);
- }
- }
-}
-
-#define TICK 500000
-static bool wait_for_scan(const char *msg, char *p)
-{
- int full_scans;
- int timeout = 6; /* 3 seconds */
-
- /* Sanity check */
- if (check_huge(p)) {
- printf("Unexpected huge page\n");
- exit(EXIT_FAILURE);
- }
-
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
-
- /* Wait until the second full_scan completed */
- full_scans = read_num("khugepaged/full_scans") + 2;
-
- printf("%s...", msg);
- while (timeout--) {
- if (check_huge(p))
- break;
- if (read_num("khugepaged/full_scans") >= full_scans)
- break;
- printf(".");
- usleep(TICK);
- }
-
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
-
- return timeout == -1;
-}
-
-static void alloc_at_fault(void)
-{
- struct settings settings = default_settings;
- char *p;
-
- settings.thp_enabled = THP_ALWAYS;
- write_settings(&settings);
-
- p = alloc_mapping();
- *p = 1;
- printf("Allocate huge page on fault...");
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- write_settings(&default_settings);
-
- madvise(p, page_size, MADV_DONTNEED);
- printf("Split huge PMD on MADV_DONTNEED...");
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_full(void)
-{
- void *p;
-
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
- if (wait_for_scan("Collapse fully populated PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_empty(void)
-{
- void *p;
-
- p = alloc_mapping();
- if (wait_for_scan("Do not collapse empty PTE table", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_single_pte_entry(void)
-{
- void *p;
-
- p = alloc_mapping();
- fill_memory(p, 0, page_size);
- if (wait_for_scan("Collapse PTE table with single PTE entry present", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_max_ptes_none(void)
-{
- int max_ptes_none = hpage_pmd_nr / 2;
- struct settings settings = default_settings;
- void *p;
-
- settings.khugepaged.max_ptes_none = max_ptes_none;
- write_settings(&settings);
-
- p = alloc_mapping();
-
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
- if (wait_for_scan("Do not collapse with max_ptes_none exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none - 1) * page_size);
-
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
- if (wait_for_scan("Collapse with max_ptes_none PTEs empty", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, (hpage_pmd_nr - max_ptes_none) * page_size);
-
- munmap(p, hpage_pmd_size);
- write_settings(&default_settings);
-}
-
-static void collapse_swapin_single_pte(void)
-{
- void *p;
- p = alloc_mapping();
- fill_memory(p, 0, hpage_pmd_size);
-
- printf("Swapout one page...");
- if (madvise(p, page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
- if (check_swap(p, page_size)) {
- success("OK");
- } else {
- fail("Fail");
- goto out;
- }
-
- if (wait_for_scan("Collapse with swapping in single PTE entry", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
-out:
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_max_ptes_swap(void)
-{
- int max_ptes_swap = read_num("khugepaged/max_ptes_swap");
- void *p;
-
- p = alloc_mapping();
-
- fill_memory(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap + 1, hpage_pmd_nr);
- if (madvise(p, (max_ptes_swap + 1) * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
- if (check_swap(p, (max_ptes_swap + 1) * page_size)) {
- success("OK");
- } else {
- fail("Fail");
- goto out;
- }
-
- if (wait_for_scan("Do not collapse with max_ptes_swap exceeded", p))
- fail("Timeout");
- else if (check_huge(p))
- fail("Fail");
- else
- success("OK");
- validate_memory(p, 0, hpage_pmd_size);
-
- fill_memory(p, 0, hpage_pmd_size);
- printf("Swapout %d of %d pages...", max_ptes_swap, hpage_pmd_nr);
- if (madvise(p, max_ptes_swap * page_size, MADV_PAGEOUT)) {
- perror("madvise(MADV_PAGEOUT)");
- exit(EXIT_FAILURE);
- }
- if (check_swap(p, max_ptes_swap * page_size)) {
- success("OK");
- } else {
- fail("Fail");
- goto out;
- }
-
- if (wait_for_scan("Collapse with max_ptes_swap pages swapped out", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
-out:
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_single_pte_entry_compound(void)
-{
- void *p;
-
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
-
- printf("Split huge page leaving single PTE mapping compound page...");
- madvise(p + page_size, hpage_pmd_size - page_size, MADV_DONTNEED);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Collapse PTE table with single PTE mapping compound page", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_full_of_compound(void)
-{
- void *p;
-
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Split huge page leaving single PTE page table full of compound pages...");
- madvise(p, page_size, MADV_NOHUGEPAGE);
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Collapse PTE table full of compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_compound_extreme(void)
-{
- void *p;
- int i;
-
- p = alloc_mapping();
- for (i = 0; i < hpage_pmd_nr; i++) {
- printf("\rConstruct PTE page table full of different PTE-mapped compound pages %3d/%d...",
- i + 1, hpage_pmd_nr);
-
- madvise(BASE_ADDR, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(BASE_ADDR, 0, hpage_pmd_size);
- if (!check_huge(BASE_ADDR)) {
- printf("Failed to allocate huge page\n");
- exit(EXIT_FAILURE);
- }
- madvise(BASE_ADDR, hpage_pmd_size, MADV_NOHUGEPAGE);
-
- p = mremap(BASE_ADDR - i * page_size,
- i * page_size + hpage_pmd_size,
- (i + 1) * page_size,
- MREMAP_MAYMOVE | MREMAP_FIXED,
- BASE_ADDR + 2 * hpage_pmd_size);
- if (p == MAP_FAILED) {
- perror("mremap+unmap");
- exit(EXIT_FAILURE);
- }
-
- p = mremap(BASE_ADDR + 2 * hpage_pmd_size,
- (i + 1) * page_size,
- (i + 1) * page_size + hpage_pmd_size,
- MREMAP_MAYMOVE | MREMAP_FIXED,
- BASE_ADDR - (i + 1) * page_size);
- if (p == MAP_FAILED) {
- perror("mremap+alloc");
- exit(EXIT_FAILURE);
- }
- }
-
- munmap(BASE_ADDR, hpage_pmd_size);
- fill_memory(p, 0, hpage_pmd_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Collapse PTE table full of different compound pages", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_fork(void)
-{
- int wstatus;
- void *p;
-
- p = alloc_mapping();
-
- printf("Allocate small page...");
- fill_memory(p, 0, page_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Share small page over fork()...");
- if (!fork()) {
- /* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- fill_memory(p, page_size, 2 * page_size);
-
- if (wait_for_scan("Collapse PTE table with single page shared with parent process", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
- exit(exit_status);
- }
-
- wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
-
- printf("Check if parent still has small page...");
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, page_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_fork_compound(void)
-{
- int wstatus;
- void *p;
-
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Share huge page over fork()...");
- if (!fork()) {
- /* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Split huge page PMD in child process...");
- madvise(p, page_size, MADV_NOHUGEPAGE);
- madvise(p, hpage_pmd_size, MADV_NOHUGEPAGE);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
- fill_memory(p, 0, page_size);
-
- write_num("khugepaged/max_ptes_shared", hpage_pmd_nr - 1);
- if (wait_for_scan("Collapse PTE table full of compound pages in child", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- write_num("khugepaged/max_ptes_shared",
- default_settings.khugepaged.max_ptes_shared);
-
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
- exit(exit_status);
- }
-
- wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
-
- printf("Check if parent still has huge page...");
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
-}
-
-static void collapse_max_ptes_shared()
-{
- int max_ptes_shared = read_num("khugepaged/max_ptes_shared");
- int wstatus;
- void *p;
-
- p = alloc_mapping();
-
- printf("Allocate huge page...");
- madvise(p, hpage_pmd_size, MADV_HUGEPAGE);
- fill_memory(p, 0, hpage_pmd_size);
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Share huge page over fork()...");
- if (!fork()) {
- /* Do not touch settings on child exit */
- skip_settings_restore = true;
- exit_status = 0;
-
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Trigger CoW on page %d of %d...",
- hpage_pmd_nr - max_ptes_shared - 1, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared - 1) * page_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- if (wait_for_scan("Do not collapse with max_ptes_shared exceeded", p))
- fail("Timeout");
- else if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- printf("Trigger CoW on page %d of %d...",
- hpage_pmd_nr - max_ptes_shared, hpage_pmd_nr);
- fill_memory(p, 0, (hpage_pmd_nr - max_ptes_shared) * page_size);
- if (!check_huge(p))
- success("OK");
- else
- fail("Fail");
-
-
- if (wait_for_scan("Collapse with max_ptes_shared PTEs shared", p))
- fail("Timeout");
- else if (check_huge(p))
- success("OK");
- else
- fail("Fail");
-
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
- exit(exit_status);
- }
-
- wait(&wstatus);
- exit_status += WEXITSTATUS(wstatus);
-
- printf("Check if parent still has huge page...");
- if (check_huge(p))
- success("OK");
- else
- fail("Fail");
- validate_memory(p, 0, hpage_pmd_size);
- munmap(p, hpage_pmd_size);
-}
-
-int main(void)
-{
- setbuf(stdout, NULL);
-
- page_size = getpagesize();
- hpage_pmd_size = read_num("hpage_pmd_size");
- hpage_pmd_nr = hpage_pmd_size / page_size;
-
- default_settings.khugepaged.max_ptes_none = hpage_pmd_nr - 1;
- default_settings.khugepaged.max_ptes_swap = hpage_pmd_nr / 8;
- default_settings.khugepaged.max_ptes_shared = hpage_pmd_nr / 2;
- default_settings.khugepaged.pages_to_scan = hpage_pmd_nr * 8;
-
- save_settings();
- adjust_settings();
-
- alloc_at_fault();
- collapse_full();
- collapse_empty();
- collapse_single_pte_entry();
- collapse_max_ptes_none();
- collapse_swapin_single_pte();
- collapse_max_ptes_swap();
- collapse_single_pte_entry_compound();
- collapse_full_of_compound();
- collapse_compound_extreme();
- collapse_fork();
- collapse_fork_compound();
- collapse_max_ptes_shared();
-
- restore_settings(0);
-}
diff --git a/tools/testing/selftests/vm/on-fault-limit.c b/tools/testing/selftests/vm/on-fault-limit.c
deleted file mode 100644
index 634d87dfb2a4..000000000000
--- a/tools/testing/selftests/vm/on-fault-limit.c
+++ /dev/null
@@ -1,48 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <sys/mman.h>
-#include <stdio.h>
-#include <unistd.h>
-#include <string.h>
-#include <sys/time.h>
-#include <sys/resource.h>
-
-#ifndef MCL_ONFAULT
-#define MCL_ONFAULT (MCL_FUTURE << 1)
-#endif
-
-static int test_limit(void)
-{
- int ret = 1;
- struct rlimit lims;
- void *map;
-
- if (getrlimit(RLIMIT_MEMLOCK, &lims)) {
- perror("getrlimit");
- return ret;
- }
-
- if (mlockall(MCL_ONFAULT | MCL_FUTURE)) {
- perror("mlockall");
- return ret;
- }
-
- map = mmap(NULL, 2 * lims.rlim_max, PROT_READ | PROT_WRITE,
- MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE, -1, 0);
- if (map != MAP_FAILED)
- printf("mmap should have failed, but didn't\n");
- else {
- ret = 0;
- munmap(map, 2 * lims.rlim_max);
- }
-
- munlockall();
- return ret;
-}
-
-int main(int argc, char **argv)
-{
- int ret = 0;
-
- ret += test_limit();
- return ret;
-}
diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests
deleted file mode 100755
index a3f4f30f0a2e..000000000000
--- a/tools/testing/selftests/vm/run_vmtests
+++ /dev/null
@@ -1,326 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#please run as root
-
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
-mnt=./huge
-exitcode=0
-
-#get huge pagesize and freepages from /proc/meminfo
-while read name size unit; do
- if [ "$name" = "HugePages_Free:" ]; then
- freepgs=$size
- fi
- if [ "$name" = "Hugepagesize:" ]; then
- hpgsize_KB=$size
- fi
-done < /proc/meminfo
-
-# Simple hugetlbfs tests have a hardcoded minimum requirement of
-# huge pages totaling 256MB (262144KB) in size. The userfaultfd
-# hugetlb test requires a minimum of 2 * nr_cpus huge pages. Take
-# both of these requirements into account and attempt to increase
-# number of huge pages available.
-nr_cpus=$(nproc)
-hpgsize_MB=$((hpgsize_KB / 1024))
-half_ufd_size_MB=$((((nr_cpus * hpgsize_MB + 127) / 128) * 128))
-needmem_KB=$((half_ufd_size_MB * 2 * 1024))
-
-#set proper nr_hugepages
-if [ -n "$freepgs" ] && [ -n "$hpgsize_KB" ]; then
- nr_hugepgs=`cat /proc/sys/vm/nr_hugepages`
- needpgs=$((needmem_KB / hpgsize_KB))
- tries=2
- while [ $tries -gt 0 ] && [ $freepgs -lt $needpgs ]; do
- lackpgs=$(( $needpgs - $freepgs ))
- echo 3 > /proc/sys/vm/drop_caches
- echo $(( $lackpgs + $nr_hugepgs )) > /proc/sys/vm/nr_hugepages
- if [ $? -ne 0 ]; then
- echo "Please run this test as root"
- exit $ksft_skip
- fi
- while read name size unit; do
- if [ "$name" = "HugePages_Free:" ]; then
- freepgs=$size
- fi
- done < /proc/meminfo
- tries=$((tries - 1))
- done
- if [ $freepgs -lt $needpgs ]; then
- printf "Not enough huge pages available (%d < %d)\n" \
- $freepgs $needpgs
- exit 1
- fi
-else
- echo "no hugetlbfs support in kernel?"
- exit 1
-fi
-
-#filter 64bit architectures
-ARCH64STR="arm64 ia64 mips64 parisc64 ppc64 ppc64le riscv64 s390x sh64 sparc64 x86_64"
-if [ -z $ARCH ]; then
- ARCH=`uname -m 2>/dev/null | sed -e 's/aarch64.*/arm64/'`
-fi
-VADDR64=0
-echo "$ARCH64STR" | grep $ARCH && VADDR64=1
-
-mkdir $mnt
-mount -t hugetlbfs none $mnt
-
-echo "---------------------"
-echo "running hugepage-mmap"
-echo "---------------------"
-./hugepage-mmap
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-shmmax=`cat /proc/sys/kernel/shmmax`
-shmall=`cat /proc/sys/kernel/shmall`
-echo 268435456 > /proc/sys/kernel/shmmax
-echo 4194304 > /proc/sys/kernel/shmall
-echo "--------------------"
-echo "running hugepage-shm"
-echo "--------------------"
-./hugepage-shm
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-echo $shmmax > /proc/sys/kernel/shmmax
-echo $shmall > /proc/sys/kernel/shmall
-
-echo "-------------------"
-echo "running map_hugetlb"
-echo "-------------------"
-./map_hugetlb
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "NOTE: The above hugetlb tests provide minimal coverage. Use"
-echo " https://github.com/libhugetlbfs/libhugetlbfs.git for"
-echo " hugetlb regression testing."
-
-echo "---------------------------"
-echo "running map_fixed_noreplace"
-echo "---------------------------"
-./map_fixed_noreplace
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "--------------------------------------------"
-echo "running 'gup_benchmark -U' (normal/slow gup)"
-echo "--------------------------------------------"
-./gup_benchmark -U
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "------------------------------------------"
-echo "running gup_benchmark -b (pin_user_pages)"
-echo "------------------------------------------"
-./gup_benchmark -b
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-------------------"
-echo "running userfaultfd"
-echo "-------------------"
-./userfaultfd anon 128 32
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "---------------------------"
-echo "running userfaultfd_hugetlb"
-echo "---------------------------"
-# Test requires source and destination huge pages. Size of source
-# (half_ufd_size_MB) is passed as argument to test.
-./userfaultfd hugetlb $half_ufd_size_MB 32 $mnt/ufd_test_file
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-rm -f $mnt/ufd_test_file
-
-echo "-------------------------"
-echo "running userfaultfd_shmem"
-echo "-------------------------"
-./userfaultfd shmem 128 32
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-#cleanup
-umount $mnt
-rm -rf $mnt
-echo $nr_hugepgs > /proc/sys/vm/nr_hugepages
-
-echo "-----------------------"
-echo "running compaction_test"
-echo "-----------------------"
-./compaction_test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "----------------------"
-echo "running on-fault-limit"
-echo "----------------------"
-sudo -u nobody ./on-fault-limit
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "--------------------"
-echo "running map_populate"
-echo "--------------------"
-./map_populate
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-------------------------"
-echo "running mlock-random-test"
-echo "-------------------------"
-./mlock-random-test
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "--------------------"
-echo "running mlock2-tests"
-echo "--------------------"
-./mlock2-tests
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-----------------"
-echo "running thuge-gen"
-echo "-----------------"
-./thuge-gen
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-if [ $VADDR64 -ne 0 ]; then
-echo "-----------------------------"
-echo "running virtual_address_range"
-echo "-----------------------------"
-./virtual_address_range
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-
-echo "-----------------------------"
-echo "running virtual address 128TB switch test"
-echo "-----------------------------"
-./va_128TBswitch
-if [ $? -ne 0 ]; then
- echo "[FAIL]"
- exitcode=1
-else
- echo "[PASS]"
-fi
-fi # VADDR64
-
-echo "------------------------------------"
-echo "running vmalloc stability smoke test"
-echo "------------------------------------"
-./test_vmalloc.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "------------------------------------"
-echo "running MREMAP_DONTUNMAP smoke test"
-echo "------------------------------------"
-./mremap_dontunmap
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-echo "running HMM smoke test"
-echo "------------------------------------"
-./test_hmm.sh smoke
-ret_val=$?
-
-if [ $ret_val -eq 0 ]; then
- echo "[PASS]"
-elif [ $ret_val -eq $ksft_skip ]; then
- echo "[SKIP]"
- exitcode=$ksft_skip
-else
- echo "[FAIL]"
- exitcode=1
-fi
-
-exit $exitcode
diff --git a/tools/testing/selftests/vm/transhuge-stress.c b/tools/testing/selftests/vm/transhuge-stress.c
deleted file mode 100644
index fd7f1b4a96f9..000000000000
--- a/tools/testing/selftests/vm/transhuge-stress.c
+++ /dev/null
@@ -1,144 +0,0 @@
-/*
- * Stress test for transparent huge pages, memory compaction and migration.
- *
- * Authors: Konstantin Khlebnikov <koct9i@gmail.com>
- *
- * This is free and unencumbered software released into the public domain.
- */
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <stdint.h>
-#include <err.h>
-#include <time.h>
-#include <unistd.h>
-#include <fcntl.h>
-#include <string.h>
-#include <sys/mman.h>
-
-#define PAGE_SHIFT 12
-#define HPAGE_SHIFT 21
-
-#define PAGE_SIZE (1 << PAGE_SHIFT)
-#define HPAGE_SIZE (1 << HPAGE_SHIFT)
-
-#define PAGEMAP_PRESENT(ent) (((ent) & (1ull << 63)) != 0)
-#define PAGEMAP_PFN(ent) ((ent) & ((1ull << 55) - 1))
-
-int pagemap_fd;
-
-int64_t allocate_transhuge(void *ptr)
-{
- uint64_t ent[2];
-
- /* drop pmd */
- if (mmap(ptr, HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_FIXED | MAP_ANONYMOUS |
- MAP_NORESERVE | MAP_PRIVATE, -1, 0) != ptr)
- errx(2, "mmap transhuge");
-
- if (madvise(ptr, HPAGE_SIZE, MADV_HUGEPAGE))
- err(2, "MADV_HUGEPAGE");
-
- /* allocate transparent huge page */
- *(volatile void **)ptr = ptr;
-
- if (pread(pagemap_fd, ent, sizeof(ent),
- (uintptr_t)ptr >> (PAGE_SHIFT - 3)) != sizeof(ent))
- err(2, "read pagemap");
-
- if (PAGEMAP_PRESENT(ent[0]) && PAGEMAP_PRESENT(ent[1]) &&
- PAGEMAP_PFN(ent[0]) + 1 == PAGEMAP_PFN(ent[1]) &&
- !(PAGEMAP_PFN(ent[0]) & ((1 << (HPAGE_SHIFT - PAGE_SHIFT)) - 1)))
- return PAGEMAP_PFN(ent[0]);
-
- return -1;
-}
-
-int main(int argc, char **argv)
-{
- size_t ram, len;
- void *ptr, *p;
- struct timespec a, b;
- double s;
- uint8_t *map;
- size_t map_len;
-
- ram = sysconf(_SC_PHYS_PAGES);
- if (ram > SIZE_MAX / sysconf(_SC_PAGESIZE) / 4)
- ram = SIZE_MAX / 4;
- else
- ram *= sysconf(_SC_PAGESIZE);
-
- if (argc == 1)
- len = ram;
- else if (!strcmp(argv[1], "-h"))
- errx(1, "usage: %s [size in MiB]", argv[0]);
- else
- len = atoll(argv[1]) << 20;
-
- warnx("allocate %zd transhuge pages, using %zd MiB virtual memory"
- " and %zd MiB of ram", len >> HPAGE_SHIFT, len >> 20,
- len >> (20 + HPAGE_SHIFT - PAGE_SHIFT - 1));
-
- pagemap_fd = open("/proc/self/pagemap", O_RDONLY);
- if (pagemap_fd < 0)
- err(2, "open pagemap");
-
- len -= len % HPAGE_SIZE;
- ptr = mmap(NULL, len + HPAGE_SIZE, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_NORESERVE | MAP_PRIVATE, -1, 0);
- if (ptr == MAP_FAILED)
- err(2, "initial mmap");
- ptr += HPAGE_SIZE - (uintptr_t)ptr % HPAGE_SIZE;
-
- if (madvise(ptr, len, MADV_HUGEPAGE))
- err(2, "MADV_HUGEPAGE");
-
- map_len = ram >> (HPAGE_SHIFT - 1);
- map = malloc(map_len);
- if (!map)
- errx(2, "map malloc");
-
- while (1) {
- int nr_succeed = 0, nr_failed = 0, nr_pages = 0;
-
- memset(map, 0, map_len);
-
- clock_gettime(CLOCK_MONOTONIC, &a);
- for (p = ptr; p < ptr + len; p += HPAGE_SIZE) {
- int64_t pfn;
-
- pfn = allocate_transhuge(p);
-
- if (pfn < 0) {
- nr_failed++;
- } else {
- size_t idx = pfn >> (HPAGE_SHIFT - PAGE_SHIFT);
-
- nr_succeed++;
- if (idx >= map_len) {
- map = realloc(map, idx + 1);
- if (!map)
- errx(2, "map realloc");
- memset(map + map_len, 0, idx + 1 - map_len);
- map_len = idx + 1;
- }
- if (!map[idx])
- nr_pages++;
- map[idx] = 1;
- }
-
- /* split transhuge page, keep last page */
- if (madvise(p, HPAGE_SIZE - PAGE_SIZE, MADV_DONTNEED))
- err(2, "MADV_DONTNEED");
- }
- clock_gettime(CLOCK_MONOTONIC, &b);
- s = b.tv_sec - a.tv_sec + (b.tv_nsec - a.tv_nsec) / 1000000000.;
-
- warnx("%.3f s/loop, %.3f ms/page, %10.3f MiB/s\t"
- "%4d succeed, %4d failed, %4d different pages",
- s, s * 1000 / (len >> HPAGE_SHIFT), len / s / (1 << 20),
- nr_succeed, nr_failed, nr_pages);
- }
-}
diff --git a/tools/testing/selftests/vm/userfaultfd.c b/tools/testing/selftests/vm/userfaultfd.c
deleted file mode 100644
index 61e5cfeb1350..000000000000
--- a/tools/testing/selftests/vm/userfaultfd.c
+++ /dev/null
@@ -1,1479 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Stress userfaultfd syscall.
- *
- * Copyright (C) 2015 Red Hat, Inc.
- *
- * This test allocates two virtual areas and bounces the physical
- * memory across the two virtual areas (from area_src to area_dst)
- * using userfaultfd.
- *
- * There are three threads running per CPU:
- *
- * 1) one per-CPU thread takes a per-page pthread_mutex in a random
- * page of the area_dst (while the physical page may still be in
- * area_src), and increments a per-page counter in the same page,
- * and checks its value against a verification region.
- *
- * 2) another per-CPU thread handles the userfaults generated by
- * thread 1 above. userfaultfd blocking reads or poll() modes are
- * exercised interleaved.
- *
- * 3) one last per-CPU thread transfers the memory in the background
- * at maximum bandwidth (if not already transferred by thread
- * 2). Each cpu thread takes cares of transferring a portion of the
- * area.
- *
- * When all threads of type 3 completed the transfer, one bounce is
- * complete. area_src and area_dst are then swapped. All threads are
- * respawned and so the bounce is immediately restarted in the
- * opposite direction.
- *
- * per-CPU threads 1 by triggering userfaults inside
- * pthread_mutex_lock will also verify the atomicity of the memory
- * transfer (UFFDIO_COPY).
- */
-
-#define _GNU_SOURCE
-#include <stdio.h>
-#include <errno.h>
-#include <unistd.h>
-#include <stdlib.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <time.h>
-#include <signal.h>
-#include <poll.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <sys/syscall.h>
-#include <sys/ioctl.h>
-#include <sys/wait.h>
-#include <pthread.h>
-#include <linux/userfaultfd.h>
-#include <setjmp.h>
-#include <stdbool.h>
-#include <assert.h>
-
-#include "../kselftest.h"
-
-#ifdef __NR_userfaultfd
-
-static unsigned long nr_cpus, nr_pages, nr_pages_per_cpu, page_size;
-
-#define BOUNCE_RANDOM (1<<0)
-#define BOUNCE_RACINGFAULTS (1<<1)
-#define BOUNCE_VERIFY (1<<2)
-#define BOUNCE_POLL (1<<3)
-static int bounces;
-
-#define TEST_ANON 1
-#define TEST_HUGETLB 2
-#define TEST_SHMEM 3
-static int test_type;
-
-/* exercise the test_uffdio_*_eexist every ALARM_INTERVAL_SECS */
-#define ALARM_INTERVAL_SECS 10
-static volatile bool test_uffdio_copy_eexist = true;
-static volatile bool test_uffdio_zeropage_eexist = true;
-/* Whether to test uffd write-protection */
-static bool test_uffdio_wp = false;
-
-static bool map_shared;
-static int huge_fd;
-static char *huge_fd_off0;
-static unsigned long long *count_verify;
-static int uffd, uffd_flags, finished, *pipefd;
-static char *area_src, *area_src_alias, *area_dst, *area_dst_alias;
-static char *zeropage;
-pthread_attr_t attr;
-
-/* Userfaultfd test statistics */
-struct uffd_stats {
- int cpu;
- unsigned long missing_faults;
- unsigned long wp_faults;
-};
-
-/* pthread_mutex_t starts at page offset 0 */
-#define area_mutex(___area, ___nr) \
- ((pthread_mutex_t *) ((___area) + (___nr)*page_size))
-/*
- * count is placed in the page after pthread_mutex_t naturally aligned
- * to avoid non alignment faults on non-x86 archs.
- */
-#define area_count(___area, ___nr) \
- ((volatile unsigned long long *) ((unsigned long) \
- ((___area) + (___nr)*page_size + \
- sizeof(pthread_mutex_t) + \
- sizeof(unsigned long long) - 1) & \
- ~(unsigned long)(sizeof(unsigned long long) \
- - 1)))
-
-const char *examples =
- "# Run anonymous memory test on 100MiB region with 99999 bounces:\n"
- "./userfaultfd anon 100 99999\n\n"
- "# Run share memory test on 1GiB region with 99 bounces:\n"
- "./userfaultfd shmem 1000 99\n\n"
- "# Run hugetlb memory test on 256MiB region with 50 bounces (using /dev/hugepages/hugefile):\n"
- "./userfaultfd hugetlb 256 50 /dev/hugepages/hugefile\n\n"
- "# Run the same hugetlb test but using shmem:\n"
- "./userfaultfd hugetlb_shared 256 50 /dev/hugepages/hugefile\n\n"
- "# 10MiB-~6GiB 999 bounces anonymous test, "
- "continue forever unless an error triggers\n"
- "while ./userfaultfd anon $[RANDOM % 6000 + 10] 999; do true; done\n\n";
-
-static void usage(void)
-{
- fprintf(stderr, "\nUsage: ./userfaultfd <test type> <MiB> <bounces> "
- "[hugetlbfs_file]\n\n");
- fprintf(stderr, "Supported <test type>: anon, hugetlb, "
- "hugetlb_shared, shmem\n\n");
- fprintf(stderr, "Examples:\n\n");
- fprintf(stderr, "%s", examples);
- exit(1);
-}
-
-static void uffd_stats_reset(struct uffd_stats *uffd_stats,
- unsigned long n_cpus)
-{
- int i;
-
- for (i = 0; i < n_cpus; i++) {
- uffd_stats[i].cpu = i;
- uffd_stats[i].missing_faults = 0;
- uffd_stats[i].wp_faults = 0;
- }
-}
-
-static void uffd_stats_report(struct uffd_stats *stats, int n_cpus)
-{
- int i;
- unsigned long long miss_total = 0, wp_total = 0;
-
- for (i = 0; i < n_cpus; i++) {
- miss_total += stats[i].missing_faults;
- wp_total += stats[i].wp_faults;
- }
-
- printf("userfaults: %llu missing (", miss_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].missing_faults);
- printf("\b), %llu wp (", wp_total);
- for (i = 0; i < n_cpus; i++)
- printf("%lu+", stats[i].wp_faults);
- printf("\b)\n");
-}
-
-static int anon_release_pages(char *rel_area)
-{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_DONTNEED)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
-}
-
-static void anon_allocate_area(void **alloc_area)
-{
- if (posix_memalign(alloc_area, page_size, nr_pages * page_size)) {
- fprintf(stderr, "out of memory\n");
- *alloc_area = NULL;
- }
-}
-
-static void noop_alias_mapping(__u64 *start, size_t len, unsigned long offset)
-{
-}
-
-/* HugeTLB memory */
-static int hugetlb_release_pages(char *rel_area)
-{
- int ret = 0;
-
- if (fallocate(huge_fd, FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
- rel_area == huge_fd_off0 ? 0 :
- nr_pages * page_size,
- nr_pages * page_size)) {
- perror("fallocate");
- ret = 1;
- }
-
- return ret;
-}
-
-
-static void hugetlb_allocate_area(void **alloc_area)
-{
- void *area_alias = NULL;
- char **alloc_area_alias;
- *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- (map_shared ? MAP_SHARED : MAP_PRIVATE) |
- MAP_HUGETLB,
- huge_fd, *alloc_area == area_src ? 0 :
- nr_pages * page_size);
- if (*alloc_area == MAP_FAILED) {
- fprintf(stderr, "mmap of hugetlbfs file failed\n");
- *alloc_area = NULL;
- }
-
- if (map_shared) {
- area_alias = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_HUGETLB,
- huge_fd, *alloc_area == area_src ? 0 :
- nr_pages * page_size);
- if (area_alias == MAP_FAILED) {
- if (munmap(*alloc_area, nr_pages * page_size) < 0)
- perror("hugetlb munmap"), exit(1);
- *alloc_area = NULL;
- return;
- }
- }
- if (*alloc_area == area_src) {
- huge_fd_off0 = *alloc_area;
- alloc_area_alias = &area_src_alias;
- } else {
- alloc_area_alias = &area_dst_alias;
- }
- if (area_alias)
- *alloc_area_alias = area_alias;
-}
-
-static void hugetlb_alias_mapping(__u64 *start, size_t len, unsigned long offset)
-{
- if (!map_shared)
- return;
- /*
- * We can't zap just the pagetable with hugetlbfs because
- * MADV_DONTEED won't work. So exercise -EEXIST on a alias
- * mapping where the pagetables are not established initially,
- * this way we'll exercise the -EEXEC at the fs level.
- */
- *start = (unsigned long) area_dst_alias + offset;
-}
-
-/* Shared memory */
-static int shmem_release_pages(char *rel_area)
-{
- int ret = 0;
-
- if (madvise(rel_area, nr_pages * page_size, MADV_REMOVE)) {
- perror("madvise");
- ret = 1;
- }
-
- return ret;
-}
-
-static void shmem_allocate_area(void **alloc_area)
-{
- *alloc_area = mmap(NULL, nr_pages * page_size, PROT_READ | PROT_WRITE,
- MAP_ANONYMOUS | MAP_SHARED, -1, 0);
- if (*alloc_area == MAP_FAILED) {
- fprintf(stderr, "shared memory mmap failed\n");
- *alloc_area = NULL;
- }
-}
-
-struct uffd_test_ops {
- unsigned long expected_ioctls;
- void (*allocate_area)(void **alloc_area);
- int (*release_pages)(char *rel_area);
- void (*alias_mapping)(__u64 *start, size_t len, unsigned long offset);
-};
-
-#define SHMEM_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
- (1 << _UFFDIO_COPY) | \
- (1 << _UFFDIO_ZEROPAGE))
-
-#define ANON_EXPECTED_IOCTLS ((1 << _UFFDIO_WAKE) | \
- (1 << _UFFDIO_COPY) | \
- (1 << _UFFDIO_ZEROPAGE) | \
- (1 << _UFFDIO_WRITEPROTECT))
-
-static struct uffd_test_ops anon_uffd_test_ops = {
- .expected_ioctls = ANON_EXPECTED_IOCTLS,
- .allocate_area = anon_allocate_area,
- .release_pages = anon_release_pages,
- .alias_mapping = noop_alias_mapping,
-};
-
-static struct uffd_test_ops shmem_uffd_test_ops = {
- .expected_ioctls = SHMEM_EXPECTED_IOCTLS,
- .allocate_area = shmem_allocate_area,
- .release_pages = shmem_release_pages,
- .alias_mapping = noop_alias_mapping,
-};
-
-static struct uffd_test_ops hugetlb_uffd_test_ops = {
- .expected_ioctls = UFFD_API_RANGE_IOCTLS_BASIC,
- .allocate_area = hugetlb_allocate_area,
- .release_pages = hugetlb_release_pages,
- .alias_mapping = hugetlb_alias_mapping,
-};
-
-static struct uffd_test_ops *uffd_test_ops;
-
-static int my_bcmp(char *str1, char *str2, size_t n)
-{
- unsigned long i;
- for (i = 0; i < n; i++)
- if (str1[i] != str2[i])
- return 1;
- return 0;
-}
-
-static void wp_range(int ufd, __u64 start, __u64 len, bool wp)
-{
- struct uffdio_writeprotect prms = { 0 };
-
- /* Write protection page faults */
- prms.range.start = start;
- prms.range.len = len;
- /* Undo write-protect, do wakeup after that */
- prms.mode = wp ? UFFDIO_WRITEPROTECT_MODE_WP : 0;
-
- if (ioctl(ufd, UFFDIO_WRITEPROTECT, &prms))
- fprintf(stderr, "clear WP failed for address 0x%Lx\n",
- start), exit(1);
-}
-
-static void *locking_thread(void *arg)
-{
- unsigned long cpu = (unsigned long) arg;
- struct random_data rand;
- unsigned long page_nr = *(&(page_nr)); /* uninitialized warning */
- int32_t rand_nr;
- unsigned long long count;
- char randstate[64];
- unsigned int seed;
- time_t start;
-
- if (bounces & BOUNCE_RANDOM) {
- seed = (unsigned int) time(NULL) - bounces;
- if (!(bounces & BOUNCE_RACINGFAULTS))
- seed += cpu;
- bzero(&rand, sizeof(rand));
- bzero(&randstate, sizeof(randstate));
- if (initstate_r(seed, randstate, sizeof(randstate), &rand))
- fprintf(stderr, "srandom_r error\n"), exit(1);
- } else {
- page_nr = -bounces;
- if (!(bounces & BOUNCE_RACINGFAULTS))
- page_nr += cpu * nr_pages_per_cpu;
- }
-
- while (!finished) {
- if (bounces & BOUNCE_RANDOM) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 1 error\n"), exit(1);
- page_nr = rand_nr;
- if (sizeof(page_nr) > sizeof(rand_nr)) {
- if (random_r(&rand, &rand_nr))
- fprintf(stderr, "random_r 2 error\n"), exit(1);
- page_nr |= (((unsigned long) rand_nr) << 16) <<
- 16;
- }
- } else
- page_nr += 1;
- page_nr %= nr_pages;
-
- start = time(NULL);
- if (bounces & BOUNCE_VERIFY) {
- count = *area_count(area_dst, page_nr);
- if (!count)
- fprintf(stderr,
- "page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
-
-
- /*
- * We can't use bcmp (or memcmp) because that
- * returns 0 erroneously if the memory is
- * changing under it (even if the end of the
- * page is never changing and always
- * different).
- */
-#if 1
- if (!my_bcmp(area_dst + page_nr * page_size, zeropage,
- page_size))
- fprintf(stderr,
- "my_bcmp page_nr %lu wrong count %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
-#else
- unsigned long loops;
-
- loops = 0;
- /* uncomment the below line to test with mutex */
- /* pthread_mutex_lock(area_mutex(area_dst, page_nr)); */
- while (!bcmp(area_dst + page_nr * page_size, zeropage,
- page_size)) {
- loops += 1;
- if (loops > 10)
- break;
- }
- /* uncomment below line to test with mutex */
- /* pthread_mutex_unlock(area_mutex(area_dst, page_nr)); */
- if (loops) {
- fprintf(stderr,
- "page_nr %lu all zero thread %lu %p %lu\n",
- page_nr, cpu, area_dst + page_nr * page_size,
- loops);
- if (loops > 10)
- exit(1);
- }
-#endif
- }
-
- pthread_mutex_lock(area_mutex(area_dst, page_nr));
- count = *area_count(area_dst, page_nr);
- if (count != count_verify[page_nr]) {
- fprintf(stderr,
- "page_nr %lu memory corruption %Lu %Lu\n",
- page_nr, count,
- count_verify[page_nr]), exit(1);
- }
- count++;
- *area_count(area_dst, page_nr) = count_verify[page_nr] = count;
- pthread_mutex_unlock(area_mutex(area_dst, page_nr));
-
- if (time(NULL) - start > 1)
- fprintf(stderr,
- "userfault too slow %ld "
- "possible false positive with overcommit\n",
- time(NULL) - start);
- }
-
- return NULL;
-}
-
-static void retry_copy_page(int ufd, struct uffdio_copy *uffdio_copy,
- unsigned long offset)
-{
- uffd_test_ops->alias_mapping(&uffdio_copy->dst,
- uffdio_copy->len,
- offset);
- if (ioctl(ufd, UFFDIO_COPY, uffdio_copy)) {
- /* real retval in ufdio_copy.copy */
- if (uffdio_copy->copy != -EEXIST)
- fprintf(stderr, "UFFDIO_COPY retry error %Ld\n",
- uffdio_copy->copy), exit(1);
- } else {
- fprintf(stderr, "UFFDIO_COPY retry unexpected %Ld\n",
- uffdio_copy->copy), exit(1);
- }
-}
-
-static int __copy_page(int ufd, unsigned long offset, bool retry)
-{
- struct uffdio_copy uffdio_copy;
-
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
- uffdio_copy.dst = (unsigned long) area_dst + offset;
- uffdio_copy.src = (unsigned long) area_src + offset;
- uffdio_copy.len = page_size;
- if (test_uffdio_wp)
- uffdio_copy.mode = UFFDIO_COPY_MODE_WP;
- else
- uffdio_copy.mode = 0;
- uffdio_copy.copy = 0;
- if (ioctl(ufd, UFFDIO_COPY, &uffdio_copy)) {
- /* real retval in ufdio_copy.copy */
- if (uffdio_copy.copy != -EEXIST)
- fprintf(stderr, "UFFDIO_COPY error %Ld\n",
- uffdio_copy.copy), exit(1);
- } else if (uffdio_copy.copy != page_size) {
- fprintf(stderr, "UFFDIO_COPY unexpected copy %Ld\n",
- uffdio_copy.copy), exit(1);
- } else {
- if (test_uffdio_copy_eexist && retry) {
- test_uffdio_copy_eexist = false;
- retry_copy_page(ufd, &uffdio_copy, offset);
- }
- return 1;
- }
- return 0;
-}
-
-static int copy_page_retry(int ufd, unsigned long offset)
-{
- return __copy_page(ufd, offset, true);
-}
-
-static int copy_page(int ufd, unsigned long offset)
-{
- return __copy_page(ufd, offset, false);
-}
-
-static int uffd_read_msg(int ufd, struct uffd_msg *msg)
-{
- int ret = read(uffd, msg, sizeof(*msg));
-
- if (ret != sizeof(*msg)) {
- if (ret < 0) {
- if (errno == EAGAIN)
- return 1;
- else
- perror("blocking read error"), exit(1);
- } else {
- fprintf(stderr, "short read\n"), exit(1);
- }
- }
-
- return 0;
-}
-
-static void uffd_handle_page_fault(struct uffd_msg *msg,
- struct uffd_stats *stats)
-{
- unsigned long offset;
-
- if (msg->event != UFFD_EVENT_PAGEFAULT)
- fprintf(stderr, "unexpected msg event %u\n",
- msg->event), exit(1);
-
- if (msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WP) {
- wp_range(uffd, msg->arg.pagefault.address, page_size, false);
- stats->wp_faults++;
- } else {
- /* Missing page faults */
- if (bounces & BOUNCE_VERIFY &&
- msg->arg.pagefault.flags & UFFD_PAGEFAULT_FLAG_WRITE)
- fprintf(stderr, "unexpected write fault\n"), exit(1);
-
- offset = (char *)(unsigned long)msg->arg.pagefault.address - area_dst;
- offset &= ~(page_size-1);
-
- if (copy_page(uffd, offset))
- stats->missing_faults++;
- }
-}
-
-static void *uffd_poll_thread(void *arg)
-{
- struct uffd_stats *stats = (struct uffd_stats *)arg;
- unsigned long cpu = stats->cpu;
- struct pollfd pollfd[2];
- struct uffd_msg msg;
- struct uffdio_register uffd_reg;
- int ret;
- char tmp_chr;
-
- pollfd[0].fd = uffd;
- pollfd[0].events = POLLIN;
- pollfd[1].fd = pipefd[cpu*2];
- pollfd[1].events = POLLIN;
-
- for (;;) {
- ret = poll(pollfd, 2, -1);
- if (!ret)
- fprintf(stderr, "poll error %d\n", ret), exit(1);
- if (ret < 0)
- perror("poll"), exit(1);
- if (pollfd[1].revents & POLLIN) {
- if (read(pollfd[1].fd, &tmp_chr, 1) != 1)
- fprintf(stderr, "read pipefd error\n"),
- exit(1);
- break;
- }
- if (!(pollfd[0].revents & POLLIN))
- fprintf(stderr, "pollfd[0].revents %d\n",
- pollfd[0].revents), exit(1);
- if (uffd_read_msg(uffd, &msg))
- continue;
- switch (msg.event) {
- default:
- fprintf(stderr, "unexpected msg event %u\n",
- msg.event), exit(1);
- break;
- case UFFD_EVENT_PAGEFAULT:
- uffd_handle_page_fault(&msg, stats);
- break;
- case UFFD_EVENT_FORK:
- close(uffd);
- uffd = msg.arg.fork.ufd;
- pollfd[0].fd = uffd;
- break;
- case UFFD_EVENT_REMOVE:
- uffd_reg.range.start = msg.arg.remove.start;
- uffd_reg.range.len = msg.arg.remove.end -
- msg.arg.remove.start;
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffd_reg.range))
- fprintf(stderr, "remove failure\n"), exit(1);
- break;
- case UFFD_EVENT_REMAP:
- area_dst = (char *)(unsigned long)msg.arg.remap.to;
- break;
- }
- }
-
- return NULL;
-}
-
-pthread_mutex_t uffd_read_mutex = PTHREAD_MUTEX_INITIALIZER;
-
-static void *uffd_read_thread(void *arg)
-{
- struct uffd_stats *stats = (struct uffd_stats *)arg;
- struct uffd_msg msg;
-
- pthread_mutex_unlock(&uffd_read_mutex);
- /* from here cancellation is ok */
-
- for (;;) {
- if (uffd_read_msg(uffd, &msg))
- continue;
- uffd_handle_page_fault(&msg, stats);
- }
-
- return NULL;
-}
-
-static void *background_thread(void *arg)
-{
- unsigned long cpu = (unsigned long) arg;
- unsigned long page_nr, start_nr, mid_nr, end_nr;
-
- start_nr = cpu * nr_pages_per_cpu;
- end_nr = (cpu+1) * nr_pages_per_cpu;
- mid_nr = (start_nr + end_nr) / 2;
-
- /* Copy the first half of the pages */
- for (page_nr = start_nr; page_nr < mid_nr; page_nr++)
- copy_page_retry(uffd, page_nr * page_size);
-
- /*
- * If we need to test uffd-wp, set it up now. Then we'll have
- * at least the first half of the pages mapped already which
- * can be write-protected for testing
- */
- if (test_uffdio_wp)
- wp_range(uffd, (unsigned long)area_dst + start_nr * page_size,
- nr_pages_per_cpu * page_size, true);
-
- /*
- * Continue the 2nd half of the page copying, handling write
- * protection faults if any
- */
- for (page_nr = mid_nr; page_nr < end_nr; page_nr++)
- copy_page_retry(uffd, page_nr * page_size);
-
- return NULL;
-}
-
-static int stress(struct uffd_stats *uffd_stats)
-{
- unsigned long cpu;
- pthread_t locking_threads[nr_cpus];
- pthread_t uffd_threads[nr_cpus];
- pthread_t background_threads[nr_cpus];
-
- finished = 0;
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (pthread_create(&locking_threads[cpu], &attr,
- locking_thread, (void *)cpu))
- return 1;
- if (bounces & BOUNCE_POLL) {
- if (pthread_create(&uffd_threads[cpu], &attr,
- uffd_poll_thread,
- (void *)&uffd_stats[cpu]))
- return 1;
- } else {
- if (pthread_create(&uffd_threads[cpu], &attr,
- uffd_read_thread,
- (void *)&uffd_stats[cpu]))
- return 1;
- pthread_mutex_lock(&uffd_read_mutex);
- }
- if (pthread_create(&background_threads[cpu], &attr,
- background_thread, (void *)cpu))
- return 1;
- }
- for (cpu = 0; cpu < nr_cpus; cpu++)
- if (pthread_join(background_threads[cpu], NULL))
- return 1;
-
- /*
- * Be strict and immediately zap area_src, the whole area has
- * been transferred already by the background treads. The
- * area_src could then be faulted in in a racy way by still
- * running uffdio_threads reading zeropages after we zapped
- * area_src (but they're guaranteed to get -EEXIST from
- * UFFDIO_COPY without writing zero pages into area_dst
- * because the background threads already completed).
- */
- if (uffd_test_ops->release_pages(area_src))
- return 1;
-
-
- finished = 1;
- for (cpu = 0; cpu < nr_cpus; cpu++)
- if (pthread_join(locking_threads[cpu], NULL))
- return 1;
-
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- char c;
- if (bounces & BOUNCE_POLL) {
- if (write(pipefd[cpu*2+1], &c, 1) != 1) {
- fprintf(stderr, "pipefd write error\n");
- return 1;
- }
- if (pthread_join(uffd_threads[cpu],
- (void *)&uffd_stats[cpu]))
- return 1;
- } else {
- if (pthread_cancel(uffd_threads[cpu]))
- return 1;
- if (pthread_join(uffd_threads[cpu], NULL))
- return 1;
- }
- }
-
- return 0;
-}
-
-static int userfaultfd_open(int features)
-{
- struct uffdio_api uffdio_api;
-
- uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
- if (uffd < 0) {
- fprintf(stderr,
- "userfaultfd syscall not available in this kernel\n");
- return 1;
- }
- uffd_flags = fcntl(uffd, F_GETFD, NULL);
-
- uffdio_api.api = UFFD_API;
- uffdio_api.features = features;
- if (ioctl(uffd, UFFDIO_API, &uffdio_api)) {
- fprintf(stderr, "UFFDIO_API\n");
- return 1;
- }
- if (uffdio_api.api != UFFD_API) {
- fprintf(stderr, "UFFDIO_API error %Lu\n", uffdio_api.api);
- return 1;
- }
-
- return 0;
-}
-
-sigjmp_buf jbuf, *sigbuf;
-
-static void sighndl(int sig, siginfo_t *siginfo, void *ptr)
-{
- if (sig == SIGBUS) {
- if (sigbuf)
- siglongjmp(*sigbuf, 1);
- abort();
- }
-}
-
-/*
- * For non-cooperative userfaultfd test we fork() a process that will
- * generate pagefaults, will mremap the area monitored by the
- * userfaultfd and at last this process will release the monitored
- * area.
- * For the anonymous and shared memory the area is divided into two
- * parts, the first part is accessed before mremap, and the second
- * part is accessed after mremap. Since hugetlbfs does not support
- * mremap, the entire monitored area is accessed in a single pass for
- * HUGETLB_TEST.
- * The release of the pages currently generates event for shmem and
- * anonymous memory (UFFD_EVENT_REMOVE), hence it is not checked
- * for hugetlb.
- * For signal test(UFFD_FEATURE_SIGBUS), signal_test = 1, we register
- * monitored area, generate pagefaults and test that signal is delivered.
- * Use UFFDIO_COPY to allocate missing page and retry. For signal_test = 2
- * test robustness use case - we release monitored area, fork a process
- * that will generate pagefaults and verify signal is generated.
- * This also tests UFFD_FEATURE_EVENT_FORK event along with the signal
- * feature. Using monitor thread, verify no userfault events are generated.
- */
-static int faulting_process(int signal_test)
-{
- unsigned long nr;
- unsigned long long count;
- unsigned long split_nr_pages;
- unsigned long lastnr;
- struct sigaction act;
- unsigned long signalled = 0;
-
- if (test_type != TEST_HUGETLB)
- split_nr_pages = (nr_pages + 1) / 2;
- else
- split_nr_pages = nr_pages;
-
- if (signal_test) {
- sigbuf = &jbuf;
- memset(&act, 0, sizeof(act));
- act.sa_sigaction = sighndl;
- act.sa_flags = SA_SIGINFO;
- if (sigaction(SIGBUS, &act, 0)) {
- perror("sigaction");
- return 1;
- }
- lastnr = (unsigned long)-1;
- }
-
- for (nr = 0; nr < split_nr_pages; nr++) {
- int steps = 1;
- unsigned long offset = nr * page_size;
-
- if (signal_test) {
- if (sigsetjmp(*sigbuf, 1) != 0) {
- if (steps == 1 && nr == lastnr) {
- fprintf(stderr, "Signal repeated\n");
- return 1;
- }
-
- lastnr = nr;
- if (signal_test == 1) {
- if (steps == 1) {
- /* This is a MISSING request */
- steps++;
- if (copy_page(uffd, offset))
- signalled++;
- } else {
- /* This is a WP request */
- assert(steps == 2);
- wp_range(uffd,
- (__u64)area_dst +
- offset,
- page_size, false);
- }
- } else {
- signalled++;
- continue;
- }
- }
- }
-
- count = *area_count(area_dst, nr);
- if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]);
- }
- /*
- * Trigger write protection if there is by writting
- * the same value back.
- */
- *area_count(area_dst, nr) = count;
- }
-
- if (signal_test)
- return signalled != split_nr_pages;
-
- if (test_type == TEST_HUGETLB)
- return 0;
-
- area_dst = mremap(area_dst, nr_pages * page_size, nr_pages * page_size,
- MREMAP_MAYMOVE | MREMAP_FIXED, area_src);
- if (area_dst == MAP_FAILED)
- perror("mremap"), exit(1);
-
- for (; nr < nr_pages; nr++) {
- count = *area_count(area_dst, nr);
- if (count != count_verify[nr]) {
- fprintf(stderr,
- "nr %lu memory corruption %Lu %Lu\n",
- nr, count,
- count_verify[nr]), exit(1);
- }
- /*
- * Trigger write protection if there is by writting
- * the same value back.
- */
- *area_count(area_dst, nr) = count;
- }
-
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- for (nr = 0; nr < nr_pages; nr++) {
- if (my_bcmp(area_dst + nr * page_size, zeropage, page_size))
- fprintf(stderr, "nr %lu is not zero\n", nr), exit(1);
- }
-
- return 0;
-}
-
-static void retry_uffdio_zeropage(int ufd,
- struct uffdio_zeropage *uffdio_zeropage,
- unsigned long offset)
-{
- uffd_test_ops->alias_mapping(&uffdio_zeropage->range.start,
- uffdio_zeropage->range.len,
- offset);
- if (ioctl(ufd, UFFDIO_ZEROPAGE, uffdio_zeropage)) {
- if (uffdio_zeropage->zeropage != -EEXIST)
- fprintf(stderr, "UFFDIO_ZEROPAGE retry error %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
- } else {
- fprintf(stderr, "UFFDIO_ZEROPAGE retry unexpected %Ld\n",
- uffdio_zeropage->zeropage), exit(1);
- }
-}
-
-static int __uffdio_zeropage(int ufd, unsigned long offset, bool retry)
-{
- struct uffdio_zeropage uffdio_zeropage;
- int ret;
- unsigned long has_zeropage;
-
- has_zeropage = uffd_test_ops->expected_ioctls & (1 << _UFFDIO_ZEROPAGE);
-
- if (offset >= nr_pages * page_size)
- fprintf(stderr, "unexpected offset %lu\n",
- offset), exit(1);
- uffdio_zeropage.range.start = (unsigned long) area_dst + offset;
- uffdio_zeropage.range.len = page_size;
- uffdio_zeropage.mode = 0;
- ret = ioctl(ufd, UFFDIO_ZEROPAGE, &uffdio_zeropage);
- if (ret) {
- /* real retval in ufdio_zeropage.zeropage */
- if (has_zeropage) {
- if (uffdio_zeropage.zeropage == -EEXIST)
- fprintf(stderr, "UFFDIO_ZEROPAGE -EEXIST\n"),
- exit(1);
- else
- fprintf(stderr, "UFFDIO_ZEROPAGE error %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
- } else {
- if (uffdio_zeropage.zeropage != -EINVAL)
- fprintf(stderr,
- "UFFDIO_ZEROPAGE not -EINVAL %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
- }
- } else if (has_zeropage) {
- if (uffdio_zeropage.zeropage != page_size) {
- fprintf(stderr, "UFFDIO_ZEROPAGE unexpected %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
- } else {
- if (test_uffdio_zeropage_eexist && retry) {
- test_uffdio_zeropage_eexist = false;
- retry_uffdio_zeropage(ufd, &uffdio_zeropage,
- offset);
- }
- return 1;
- }
- } else {
- fprintf(stderr,
- "UFFDIO_ZEROPAGE succeeded %Ld\n",
- uffdio_zeropage.zeropage), exit(1);
- }
-
- return 0;
-}
-
-static int uffdio_zeropage(int ufd, unsigned long offset)
-{
- return __uffdio_zeropage(ufd, offset, false);
-}
-
-/* exercise UFFDIO_ZEROPAGE */
-static int userfaultfd_zeropage_test(void)
-{
- struct uffdio_register uffdio_register;
- unsigned long expected_ioctls;
-
- printf("testing UFFDIO_ZEROPAGE: ");
- fflush(stdout);
-
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- if (userfaultfd_open(0) < 0)
- return 1;
- uffdio_register.range.start = (unsigned long) area_dst;
- uffdio_register.range.len = nr_pages * page_size;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
- uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
-
- expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
-
- if (uffdio_zeropage(uffd, 0)) {
- if (my_bcmp(area_dst, zeropage, page_size))
- fprintf(stderr, "zeropage is not zero\n"), exit(1);
- }
-
- close(uffd);
- printf("done.\n");
- return 0;
-}
-
-static int userfaultfd_events_test(void)
-{
- struct uffdio_register uffdio_register;
- unsigned long expected_ioctls;
- pthread_t uffd_mon;
- int err, features;
- pid_t pid;
- char c;
- struct uffd_stats stats = { 0 };
-
- printf("testing events (fork, remap, remove): ");
- fflush(stdout);
-
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- features = UFFD_FEATURE_EVENT_FORK | UFFD_FEATURE_EVENT_REMAP |
- UFFD_FEATURE_EVENT_REMOVE;
- if (userfaultfd_open(features) < 0)
- return 1;
- fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
-
- uffdio_register.range.start = (unsigned long) area_dst;
- uffdio_register.range.len = nr_pages * page_size;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
- uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
-
- expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
-
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
-
- pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
-
- if (!pid)
- return faulting_process(0);
-
- waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
- if (pthread_join(uffd_mon, NULL))
- return 1;
-
- close(uffd);
-
- uffd_stats_report(&stats, 1);
-
- return stats.missing_faults != nr_pages;
-}
-
-static int userfaultfd_sig_test(void)
-{
- struct uffdio_register uffdio_register;
- unsigned long expected_ioctls;
- unsigned long userfaults;
- pthread_t uffd_mon;
- int err, features;
- pid_t pid;
- char c;
- struct uffd_stats stats = { 0 };
-
- printf("testing signal delivery: ");
- fflush(stdout);
-
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- features = UFFD_FEATURE_EVENT_FORK|UFFD_FEATURE_SIGBUS;
- if (userfaultfd_open(features) < 0)
- return 1;
- fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
-
- uffdio_register.range.start = (unsigned long) area_dst;
- uffdio_register.range.len = nr_pages * page_size;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
- uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
- fprintf(stderr, "register failure\n"), exit(1);
-
- expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls)
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n"),
- exit(1);
-
- if (faulting_process(1))
- fprintf(stderr, "faulting process failed\n"), exit(1);
-
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- if (pthread_create(&uffd_mon, &attr, uffd_poll_thread, &stats))
- perror("uffd_poll_thread create"), exit(1);
-
- pid = fork();
- if (pid < 0)
- perror("fork"), exit(1);
-
- if (!pid)
- exit(faulting_process(2));
-
- waitpid(pid, &err, 0);
- if (err)
- fprintf(stderr, "faulting process failed\n"), exit(1);
-
- if (write(pipefd[1], &c, sizeof(c)) != sizeof(c))
- perror("pipe write"), exit(1);
- if (pthread_join(uffd_mon, (void **)&userfaults))
- return 1;
-
- printf("done.\n");
- if (userfaults)
- fprintf(stderr, "Signal test failed, userfaults: %ld\n",
- userfaults);
- close(uffd);
- return userfaults != 0;
-}
-
-static int userfaultfd_stress(void)
-{
- void *area;
- char *tmp_area;
- unsigned long nr;
- struct uffdio_register uffdio_register;
- unsigned long cpu;
- int err;
- struct uffd_stats uffd_stats[nr_cpus];
-
- uffd_test_ops->allocate_area((void **)&area_src);
- if (!area_src)
- return 1;
- uffd_test_ops->allocate_area((void **)&area_dst);
- if (!area_dst)
- return 1;
-
- if (userfaultfd_open(0) < 0)
- return 1;
-
- count_verify = malloc(nr_pages * sizeof(unsigned long long));
- if (!count_verify) {
- perror("count_verify");
- return 1;
- }
-
- for (nr = 0; nr < nr_pages; nr++) {
- *area_mutex(area_src, nr) = (pthread_mutex_t)
- PTHREAD_MUTEX_INITIALIZER;
- count_verify[nr] = *area_count(area_src, nr) = 1;
- /*
- * In the transition between 255 to 256, powerpc will
- * read out of order in my_bcmp and see both bytes as
- * zero, so leave a placeholder below always non-zero
- * after the count, to avoid my_bcmp to trigger false
- * positives.
- */
- *(area_count(area_src, nr) + 1) = 1;
- }
-
- pipefd = malloc(sizeof(int) * nr_cpus * 2);
- if (!pipefd) {
- perror("pipefd");
- return 1;
- }
- for (cpu = 0; cpu < nr_cpus; cpu++) {
- if (pipe2(&pipefd[cpu*2], O_CLOEXEC | O_NONBLOCK)) {
- perror("pipe");
- return 1;
- }
- }
-
- if (posix_memalign(&area, page_size, page_size)) {
- fprintf(stderr, "out of memory\n");
- return 1;
- }
- zeropage = area;
- bzero(zeropage, page_size);
-
- pthread_mutex_lock(&uffd_read_mutex);
-
- pthread_attr_init(&attr);
- pthread_attr_setstacksize(&attr, 16*1024*1024);
-
- err = 0;
- while (bounces--) {
- unsigned long expected_ioctls;
-
- printf("bounces: %d, mode:", bounces);
- if (bounces & BOUNCE_RANDOM)
- printf(" rnd");
- if (bounces & BOUNCE_RACINGFAULTS)
- printf(" racing");
- if (bounces & BOUNCE_VERIFY)
- printf(" ver");
- if (bounces & BOUNCE_POLL)
- printf(" poll");
- printf(", ");
- fflush(stdout);
-
- if (bounces & BOUNCE_POLL)
- fcntl(uffd, F_SETFL, uffd_flags | O_NONBLOCK);
- else
- fcntl(uffd, F_SETFL, uffd_flags & ~O_NONBLOCK);
-
- /* register */
- uffdio_register.range.start = (unsigned long) area_dst;
- uffdio_register.range.len = nr_pages * page_size;
- uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
- if (test_uffdio_wp)
- uffdio_register.mode |= UFFDIO_REGISTER_MODE_WP;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure\n");
- return 1;
- }
- expected_ioctls = uffd_test_ops->expected_ioctls;
- if ((uffdio_register.ioctls & expected_ioctls) !=
- expected_ioctls) {
- fprintf(stderr,
- "unexpected missing ioctl for anon memory\n");
- return 1;
- }
-
- if (area_dst_alias) {
- uffdio_register.range.start = (unsigned long)
- area_dst_alias;
- if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register)) {
- fprintf(stderr, "register failure alias\n");
- return 1;
- }
- }
-
- /*
- * The madvise done previously isn't enough: some
- * uffd_thread could have read userfaults (one of
- * those already resolved by the background thread)
- * and it may be in the process of calling
- * UFFDIO_COPY. UFFDIO_COPY will read the zapped
- * area_src and it would map a zero page in it (of
- * course such a UFFDIO_COPY is perfectly safe as it'd
- * return -EEXIST). The problem comes at the next
- * bounce though: that racing UFFDIO_COPY would
- * generate zeropages in the area_src, so invalidating
- * the previous MADV_DONTNEED. Without this additional
- * MADV_DONTNEED those zeropages leftovers in the
- * area_src would lead to -EEXIST failure during the
- * next bounce, effectively leaving a zeropage in the
- * area_dst.
- *
- * Try to comment this out madvise to see the memory
- * corruption being caught pretty quick.
- *
- * khugepaged is also inhibited to collapse THP after
- * MADV_DONTNEED only after the UFFDIO_REGISTER, so it's
- * required to MADV_DONTNEED here.
- */
- if (uffd_test_ops->release_pages(area_dst))
- return 1;
-
- uffd_stats_reset(uffd_stats, nr_cpus);
-
- /* bounce pass */
- if (stress(uffd_stats))
- return 1;
-
- /* Clear all the write protections if there is any */
- if (test_uffdio_wp)
- wp_range(uffd, (unsigned long)area_dst,
- nr_pages * page_size, false);
-
- /* unregister */
- if (ioctl(uffd, UFFDIO_UNREGISTER, &uffdio_register.range)) {
- fprintf(stderr, "unregister failure\n");
- return 1;
- }
- if (area_dst_alias) {
- uffdio_register.range.start = (unsigned long) area_dst;
- if (ioctl(uffd, UFFDIO_UNREGISTER,
- &uffdio_register.range)) {
- fprintf(stderr, "unregister failure alias\n");
- return 1;
- }
- }
-
- /* verification */
- if (bounces & BOUNCE_VERIFY) {
- for (nr = 0; nr < nr_pages; nr++) {
- if (*area_count(area_dst, nr) != count_verify[nr]) {
- fprintf(stderr,
- "error area_count %Lu %Lu %lu\n",
- *area_count(area_src, nr),
- count_verify[nr],
- nr);
- err = 1;
- bounces = 0;
- }
- }
- }
-
- /* prepare next bounce */
- tmp_area = area_src;
- area_src = area_dst;
- area_dst = tmp_area;
-
- tmp_area = area_src_alias;
- area_src_alias = area_dst_alias;
- area_dst_alias = tmp_area;
-
- uffd_stats_report(uffd_stats, nr_cpus);
- }
-
- if (err)
- return err;
-
- close(uffd);
- return userfaultfd_zeropage_test() || userfaultfd_sig_test()
- || userfaultfd_events_test();
-}
-
-/*
- * Copied from mlock2-tests.c
- */
-unsigned long default_huge_page_size(void)
-{
- unsigned long hps = 0;
- char *line = NULL;
- size_t linelen = 0;
- FILE *f = fopen("/proc/meminfo", "r");
-
- if (!f)
- return 0;
- while (getline(&line, &linelen, f) > 0) {
- if (sscanf(line, "Hugepagesize: %lu kB", &hps) == 1) {
- hps <<= 10;
- break;
- }
- }
-
- free(line);
- fclose(f);
- return hps;
-}
-
-static void set_test_type(const char *type)
-{
- if (!strcmp(type, "anon")) {
- test_type = TEST_ANON;
- uffd_test_ops = &anon_uffd_test_ops;
- /* Only enable write-protect test for anonymous test */
- test_uffdio_wp = true;
- } else if (!strcmp(type, "hugetlb")) {
- test_type = TEST_HUGETLB;
- uffd_test_ops = &hugetlb_uffd_test_ops;
- } else if (!strcmp(type, "hugetlb_shared")) {
- map_shared = true;
- test_type = TEST_HUGETLB;
- uffd_test_ops = &hugetlb_uffd_test_ops;
- } else if (!strcmp(type, "shmem")) {
- map_shared = true;
- test_type = TEST_SHMEM;
- uffd_test_ops = &shmem_uffd_test_ops;
- } else {
- fprintf(stderr, "Unknown test type: %s\n", type), exit(1);
- }
-
- if (test_type == TEST_HUGETLB)
- page_size = default_huge_page_size();
- else
- page_size = sysconf(_SC_PAGE_SIZE);
-
- if (!page_size)
- fprintf(stderr, "Unable to determine page size\n"),
- exit(2);
- if ((unsigned long) area_count(NULL, 0) + sizeof(unsigned long long) * 2
- > page_size)
- fprintf(stderr, "Impossible to run this test\n"), exit(2);
-}
-
-static void sigalrm(int sig)
-{
- if (sig != SIGALRM)
- abort();
- test_uffdio_copy_eexist = true;
- test_uffdio_zeropage_eexist = true;
- alarm(ALARM_INTERVAL_SECS);
-}
-
-int main(int argc, char **argv)
-{
- if (argc < 4)
- usage();
-
- if (signal(SIGALRM, sigalrm) == SIG_ERR)
- fprintf(stderr, "failed to arm SIGALRM"), exit(1);
- alarm(ALARM_INTERVAL_SECS);
-
- set_test_type(argv[1]);
-
- nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
- nr_pages_per_cpu = atol(argv[2]) * 1024*1024 / page_size /
- nr_cpus;
- if (!nr_pages_per_cpu) {
- fprintf(stderr, "invalid MiB\n");
- usage();
- }
-
- bounces = atoi(argv[3]);
- if (bounces <= 0) {
- fprintf(stderr, "invalid bounces\n");
- usage();
- }
- nr_pages = nr_pages_per_cpu * nr_cpus;
-
- if (test_type == TEST_HUGETLB) {
- if (argc < 5)
- usage();
- huge_fd = open(argv[4], O_CREAT | O_RDWR, 0755);
- if (huge_fd < 0) {
- fprintf(stderr, "Open of %s failed", argv[3]);
- perror("open");
- exit(1);
- }
- if (ftruncate(huge_fd, 0)) {
- fprintf(stderr, "ftruncate %s to size 0 failed", argv[3]);
- perror("ftruncate");
- exit(1);
- }
- }
- printf("nr_pages: %lu, nr_pages_per_cpu: %lu\n",
- nr_pages, nr_pages_per_cpu);
- return userfaultfd_stress();
-}
-
-#else /* __NR_userfaultfd */
-
-#warning "missing __NR_userfaultfd definition"
-
-int main(void)
-{
- printf("skip: Skipping userfaultfd test (missing __NR_userfaultfd)\n");
- return KSFT_SKIP;
-}
-
-#endif /* __NR_userfaultfd */
diff --git a/tools/testing/selftests/watchdog/watchdog-test.c b/tools/testing/selftests/watchdog/watchdog-test.c
index f45e510500c0..bc71cbca0dde 100644
--- a/tools/testing/selftests/watchdog/watchdog-test.c
+++ b/tools/testing/selftests/watchdog/watchdog-test.c
@@ -1,6 +1,14 @@
// SPDX-License-Identifier: GPL-2.0
/*
- * Watchdog Driver Test Program
+* Watchdog Driver Test Program
+* - Tests all ioctls
+* - Tests Magic Close - CONFIG_WATCHDOG_NOWAYOUT
+* - Could be tested against softdog driver on systems that
+* don't have watchdog hardware.
+* - TODO:
+* - Enhance test to add coverage for WDIOC_GETTEMP.
+*
+* Reference: Documentation/watchdog/watchdog-api.rst
*/
#include <errno.h>
@@ -19,13 +27,14 @@
int fd;
const char v = 'V';
-static const char sopts[] = "bdehp:t:Tn:NLf:i";
+static const char sopts[] = "bdehp:st:Tn:NLf:i";
static const struct option lopts[] = {
{"bootstatus", no_argument, NULL, 'b'},
{"disable", no_argument, NULL, 'd'},
{"enable", no_argument, NULL, 'e'},
{"help", no_argument, NULL, 'h'},
{"pingrate", required_argument, NULL, 'p'},
+ {"status", no_argument, NULL, 's'},
{"timeout", required_argument, NULL, 't'},
{"gettimeout", no_argument, NULL, 'T'},
{"pretimeout", required_argument, NULL, 'n'},
@@ -74,6 +83,7 @@ static void usage(char *progname)
printf(" -f, --file\t\tOpen watchdog device file\n");
printf("\t\t\tDefault is /dev/watchdog\n");
printf(" -i, --info\t\tShow watchdog_info\n");
+ printf(" -s, --status\t\tGet status & supported features\n");
printf(" -b, --bootstatus\tGet last boot status (Watchdog/POR)\n");
printf(" -d, --disable\t\tTurn off the watchdog timer\n");
printf(" -e, --enable\t\tTurn on the watchdog timer\n");
@@ -91,6 +101,73 @@ static void usage(char *progname)
printf("Example: %s -t 12 -T -n 7 -N\n", progname);
}
+struct wdiof_status {
+ int flag;
+ const char *status_str;
+};
+
+#define WDIOF_NUM_STATUS 8
+
+static const struct wdiof_status wdiof_status[WDIOF_NUM_STATUS] = {
+ {WDIOF_SETTIMEOUT, "Set timeout (in seconds)"},
+ {WDIOF_MAGICCLOSE, "Supports magic close char"},
+ {WDIOF_PRETIMEOUT, "Pretimeout (in seconds), get/set"},
+ {WDIOF_ALARMONLY, "Watchdog triggers a management or other external alarm not a reboot"},
+ {WDIOF_KEEPALIVEPING, "Keep alive ping reply"},
+ {WDIOS_DISABLECARD, "Turn off the watchdog timer"},
+ {WDIOS_ENABLECARD, "Turn on the watchdog timer"},
+ {WDIOS_TEMPPANIC, "Kernel panic on temperature trip"},
+};
+
+static void print_status(int flags)
+{
+ int wdiof = 0;
+
+ if (flags == WDIOS_UNKNOWN) {
+ printf("Unknown status error from WDIOC_GETSTATUS\n");
+ return;
+ }
+
+ for (wdiof = 0; wdiof < WDIOF_NUM_STATUS; wdiof++) {
+ if (flags & wdiof_status[wdiof].flag)
+ printf("Support/Status: %s\n",
+ wdiof_status[wdiof].status_str);
+ }
+}
+
+#define WDIOF_NUM_BOOTSTATUS 7
+
+static const struct wdiof_status wdiof_bootstatus[WDIOF_NUM_BOOTSTATUS] = {
+ {WDIOF_OVERHEAT, "Reset due to CPU overheat"},
+ {WDIOF_FANFAULT, "Fan failed"},
+ {WDIOF_EXTERN1, "External relay 1"},
+ {WDIOF_EXTERN2, "External relay 2"},
+ {WDIOF_POWERUNDER, "Power bad/power fault"},
+ {WDIOF_CARDRESET, "Card previously reset the CPU"},
+ {WDIOF_POWEROVER, "Power over voltage"},
+};
+
+static void print_boot_status(int flags)
+{
+ int wdiof = 0;
+
+ if (flags == WDIOF_UNKNOWN) {
+ printf("Unknown flag error from WDIOC_GETBOOTSTATUS\n");
+ return;
+ }
+
+ if (flags == 0) {
+ printf("Last boot is caused by: Power-On-Reset\n");
+ return;
+ }
+
+ for (wdiof = 0; wdiof < WDIOF_NUM_BOOTSTATUS; wdiof++) {
+ if (flags & wdiof_bootstatus[wdiof].flag)
+ printf("Last boot is caused by: %s\n",
+ wdiof_bootstatus[wdiof].status_str);
+ }
+}
+
int main(int argc, char *argv[])
{
int flags;
@@ -100,6 +177,7 @@ int main(int argc, char *argv[])
int oneshot = 0;
char *file = "/dev/watchdog";
struct watchdog_info info;
+ int temperature;
setbuf(stdout, NULL);
@@ -140,8 +218,7 @@ int main(int argc, char *argv[])
oneshot = 1;
ret = ioctl(fd, WDIOC_GETBOOTSTATUS, &flags);
if (!ret)
- printf("Last boot is caused by: %s.\n", (flags != 0) ?
- "Watchdog" : "Power-On-Reset");
+ print_boot_status(flags);
else
printf("WDIOC_GETBOOTSTATUS error '%s'\n", strerror(errno));
break;
@@ -171,6 +248,21 @@ int main(int argc, char *argv[])
ping_rate = DEFAULT_PING_RATE;
printf("Watchdog ping rate set to %u seconds.\n", ping_rate);
break;
+ case 's':
+ flags = 0;
+ oneshot = 1;
+ ret = ioctl(fd, WDIOC_GETSTATUS, &flags);
+ if (!ret)
+ print_status(flags);
+ else
+ printf("WDIOC_GETSTATUS error '%s'\n", strerror(errno));
+ ret = ioctl(fd, WDIOC_GETTEMP, &temperature);
+ if (ret)
+ printf("WDIOC_GETTEMP: '%s'\n", strerror(errno));
+ else
+ printf("Temperature %d\n", temperature);
+
+ break;
case 't':
flags = strtoul(optarg, NULL, 0);
ret = ioctl(fd, WDIOC_SETTIMEOUT, &flags);
@@ -228,7 +320,7 @@ int main(int argc, char *argv[])
printf(" identity:\t\t%s\n", info.identity);
printf(" firmware_version:\t%u\n",
info.firmware_version);
- printf(" options:\t\t%08x\n", info.options);
+ print_status(info.options);
break;
default:
@@ -249,6 +341,10 @@ int main(int argc, char *argv[])
sleep(ping_rate);
}
end:
+ /*
+ * Send specific magic character 'V' just in case Magic Close is
+ * enabled to ensure watchdog gets disabled on close.
+ */
ret = write(fd, &v, 1);
if (ret < 0)
printf("Stopping watchdog ticks failed (%d)...\n", errno);
diff --git a/tools/testing/selftests/wireguard/netns.sh b/tools/testing/selftests/wireguard/netns.sh
index d77f4829f1e0..405ff262ca93 100755
--- a/tools/testing/selftests/wireguard/netns.sh
+++ b/tools/testing/selftests/wireguard/netns.sh
@@ -22,10 +22,12 @@
# interfaces in $ns1 and $ns2. See https://www.wireguard.com/netns/ for further
# details on how this is accomplished.
set -e
+shopt -s extglob
exec 3>&1
export LANG=C
export WG_HIDE_KEYS=never
+NPROC=( /sys/devices/system/cpu/cpu+([0-9]) ); NPROC=${#NPROC[@]}
netns0="wg-test-$$-0"
netns1="wg-test-$$-1"
netns2="wg-test-$$-2"
@@ -39,7 +41,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
sleep() { read -t "$1" -N 1 || true; }
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
@@ -141,6 +143,17 @@ tests() {
n2 iperf3 -s -1 -B fd00::2 &
waitiperf $netns2 $!
n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+
+ # TCP over IPv4, in parallel
+ local pids=( ) i
+ for ((i=0; i < NPROC; ++i)) do
+ n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+ pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
+ done
+ for ((i=0; i < NPROC; ++i)) do
+ n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+ done
+ wait "${pids[@]}"
}
[[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
@@ -263,7 +276,23 @@ n0 ping -W 1 -c 1 192.168.241.2
n1 wg set wg0 peer "$pub2" endpoint 192.168.241.2:7
ip2 link del wg0
ip2 link del wg1
-! n0 ping -W 1 -c 10 -f 192.168.241.2 || false # Should not crash kernel
+read _ _ tx_bytes_before < <(n0 wg show wg1 transfer)
+! n0 ping -W 1 -c 10 -f 192.168.241.2 || false
+sleep 1
+read _ _ tx_bytes_after < <(n0 wg show wg1 transfer)
+if ! (( tx_bytes_after - tx_bytes_before < 70000 )); then
+ errstart=$'\x1b[37m\x1b[41m\x1b[1m'
+ errend=$'\x1b[0m'
+ echo "${errstart} ${errend}"
+ echo "${errstart} E R R O R ${errend}"
+ echo "${errstart} ${errend}"
+ echo "${errstart} This architecture does not do the right thing ${errend}"
+ echo "${errstart} with cross-namespace routing loops. This test ${errend}"
+ echo "${errstart} has thus technically failed but, as this issue ${errend}"
+ echo "${errstart} is as yet unsolved, these tests will continue ${errend}"
+ echo "${errstart} onward. :( ${errend}"
+ echo "${errstart} ${errend}"
+fi
ip0 link del wg1
ip1 link del wg0
@@ -316,6 +345,14 @@ pp sleep 3
n2 ping -W 1 -c 1 192.168.241.1
n1 wg set wg0 peer "$pub2" persistent-keepalive 0
+# Test that sk_bound_dev_if works
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2
+# What about when the mark changes and the packet must be rerouted?
+n1 iptables -t mangle -I OUTPUT -j MARK --set-xmark 1
+n1 ping -c 1 -W 1 192.168.241.2 # First the boring case
+n1 ping -I wg0 -c 1 -W 1 192.168.241.2 # Then the sk_bound_dev_if case
+n1 iptables -t mangle -D OUTPUT -j MARK --set-xmark 1
+
# Test that onion routing works, even when it loops
n1 wg set wg0 peer "$pub3" allowed-ips 192.168.242.2/32 endpoint 192.168.241.2:5
ip1 addr add 192.168.242.1/24 dev wg0
@@ -342,6 +379,7 @@ ip1 -6 rule add table main suppress_prefixlength 0
ip1 -4 route add default dev wg0 table 51820
ip1 -4 rule add not fwmark 51820 table 51820
ip1 -4 rule add table main suppress_prefixlength 0
+n1 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/vethc/rp_filter'
# Flood the pings instead of sending just one, to trigger routing table reference counting bugs.
n1 ping -W 1 -c 100 -f 192.168.99.7
n1 ping -W 1 -c 100 -f abab::1111
@@ -476,10 +514,32 @@ n2 bash -c 'printf 0 > /proc/sys/net/ipv4/conf/all/rp_filter'
n1 ping -W 1 -c 1 192.168.241.2
[[ $(n2 wg show wg0 endpoints) == "$pub1 10.0.0.3:1" ]]
-ip1 link del veth1
-ip1 link del veth3
-ip1 link del wg0
-ip2 link del wg0
+ip1 link del dev veth3
+ip1 link del dev wg0
+ip2 link del dev wg0
+
+# Make sure persistent keep alives are sent when an adapter comes up
+ip1 link add dev wg0 type wireguard
+n1 wg set wg0 private-key <(echo "$key1") peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+ip1 link set dev wg0 up
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -gt 0 ]]
+ip1 link del dev wg0
+# This should also happen even if the private key is set later
+ip1 link add dev wg0 type wireguard
+n1 wg set wg0 peer "$pub2" endpoint 10.0.0.1:1 persistent-keepalive 1
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+ip1 link set dev wg0 up
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -eq 0 ]]
+n1 wg set wg0 private-key <(echo "$key1")
+read _ _ tx_bytes < <(n1 wg show wg0 transfer)
+[[ $tx_bytes -gt 0 ]]
+ip1 link del dev veth1
+ip1 link del dev wg0
# We test that Netlink/IPC is working properly by doing things that usually cause split responses
ip0 link add dev wg0 type wireguard
@@ -587,6 +647,28 @@ ip0 link set wg0 up
kill $ncat_pid
ip0 link del wg0
+# Ensure that dst_cache references don't outlive netns lifetime
+ip1 link add dev wg0 type wireguard
+ip2 link add dev wg0 type wireguard
+configure_peers
+ip1 link add veth1 type veth peer name veth2
+ip1 link set veth2 netns $netns2
+ip1 addr add fd00:aa::1/64 dev veth1
+ip2 addr add fd00:aa::2/64 dev veth2
+ip1 link set veth1 up
+ip2 link set veth2 up
+waitiface $netns1 veth1
+waitiface $netns2 veth2
+ip1 -6 route add default dev veth1 via fd00:aa::2
+ip2 -6 route add default dev veth2 via fd00:aa::1
+n1 wg set wg0 peer "$pub2" endpoint [fd00:aa::2]:2
+n2 wg set wg0 peer "$pub1" endpoint [fd00:aa::1]:1
+n1 ping6 -c 1 fd00::2
+pp ip netns delete $netns1
+pp ip netns delete $netns2
+pp ip netns add $netns1
+pp ip netns add $netns2
+
# Ensure there aren't circular reference loops
ip1 link add wg1 type wireguard
ip2 link add wg2 type wireguard
@@ -605,7 +687,7 @@ while read -t 0.1 -r line 2>/dev/null || [[ $? -ne 142 ]]; do
done < /dev/kmsg
alldeleted=1
for object in "${!objects[@]}"; do
- if [[ ${objects["$object"]} != *createddestroyed ]]; then
+ if [[ ${objects["$object"]} != *createddestroyed && ${objects["$object"]} != *createdcreateddestroyeddestroyed ]]; then
echo "Error: $object: merely ${objects["$object"]}" >&3
alldeleted=0
fi
diff --git a/tools/testing/selftests/wireguard/qemu/.gitignore b/tools/testing/selftests/wireguard/qemu/.gitignore
index bfa15e6feb2f..42ab9d72b37b 100644
--- a/tools/testing/selftests/wireguard/qemu/.gitignore
+++ b/tools/testing/selftests/wireguard/qemu/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
build/
distfiles/
+ccache/
diff --git a/tools/testing/selftests/wireguard/qemu/Makefile b/tools/testing/selftests/wireguard/qemu/Makefile
index 4bdd6c1a19d3..e95bd56b332f 100644
--- a/tools/testing/selftests/wireguard/qemu/Makefile
+++ b/tools/testing/selftests/wireguard/qemu/Makefile
@@ -4,26 +4,22 @@
PWD := $(shell pwd)
-CHOST := $(shell gcc -dumpmachine)
-HOST_ARCH := $(firstword $(subst -, ,$(CHOST)))
-ifneq (,$(ARCH))
-CBUILD := $(subst -gcc,,$(lastword $(subst /, ,$(firstword $(wildcard $(foreach bindir,$(subst :, ,$(PATH)),$(bindir)/$(ARCH)-*-gcc))))))
-ifeq (,$(CBUILD))
-$(error The toolchain for $(ARCH) is not installed)
-endif
-else
-CBUILD := $(CHOST)
-ARCH := $(firstword $(subst -, ,$(CBUILD)))
-endif
-
# Set these from the environment to override
KERNEL_PATH ?= $(PWD)/../../../../..
BUILD_PATH ?= $(PWD)/build/$(ARCH)
DISTFILES_PATH ?= $(PWD)/distfiles
NR_CPUS ?= 4
+ARCH ?=
+CBUILD := $(shell gcc -dumpmachine)
+HOST_ARCH := $(firstword $(subst -, ,$(CBUILD)))
+ifeq ($(ARCH),)
+ARCH := $(HOST_ARCH)
+endif
MIRROR := https://download.wireguard.com/qemu-test/distfiles/
+KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
+
default: qemu
# variable name, tarball project name, version, tarball extension, default URI base
@@ -36,100 +32,100 @@ $(call file_download,$$($(1)_NAME)$(4),$(5),$(6))
endef
define file_download =
-$(DISTFILES_PATH)/$(1):
+$(DISTFILES_PATH)/$(1): | $(4)
mkdir -p $(DISTFILES_PATH)
- flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if echo "$(3) $$@.tmp" | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
+ flock -x $$@.lock -c '[ -f $$@ ] && exit 0; wget -O $$@.tmp $(MIRROR)$(1) || wget -O $$@.tmp $(2)$(1) || rm -f $$@.tmp; [ -f $$@.tmp ] || exit 1; if ([ -n "$(4)" ] && sed -n "s#^\([a-f0-9]\{64\}\) \($(1)\)\$$$$#\1 $(DISTFILES_PATH)/\2.tmp#p" "$(4)" || echo "$(3) $$@.tmp") | sha256sum -c -; then mv $$@.tmp $$@; else rm -f $$@.tmp; exit 71; fi'
endef
-$(eval $(call tar_download,MUSL,musl,1.2.0,.tar.gz,https://musl.libc.org/releases/,c6de7b191139142d3f9a7b5b702c9cae1b5ee6e7f57e582da9328629408fd4e8))
-$(eval $(call tar_download,IPERF,iperf,3.7,.tar.gz,https://downloads.es.net/pub/iperf/,d846040224317caf2f75c843d309a950a7db23f9b44b94688ccbe557d6d1710c))
-$(eval $(call tar_download,BASH,bash,5.0,.tar.gz,https://ftp.gnu.org/gnu/bash/,b4a80f2ac66170b2913efbfb9f2594f1f76c7b1afd11f799e22035d63077fb4d))
-$(eval $(call tar_download,IPROUTE2,iproute2,5.6.0,.tar.xz,https://www.kernel.org/pub/linux/utils/net/iproute2/,1b5b0e25ce6e23da7526ea1da044e814ad85ba761b10dd29c2b027c056b04692))
-$(eval $(call tar_download,IPTABLES,iptables,1.8.4,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,993a3a5490a544c2cbf2ef15cf7e7ed21af1845baf228318d5c36ef8827e157c))
-$(eval $(call tar_download,NMAP,nmap,7.80,.tar.bz2,https://nmap.org/dist/,fcfa5a0e42099e12e4bf7a68ebe6fde05553383a682e816a7ec9256ab4773faa))
+$(eval $(call tar_download,IPERF,iperf,3.11,.tar.gz,https://downloads.es.net/pub/iperf/,de8cb409fad61a0574f4cb07eb19ce1159707403ac2dc01b5d175e91240b7e5f))
+$(eval $(call tar_download,BASH,bash,5.1.16,.tar.gz,https://ftp.gnu.org/gnu/bash/,5bac17218d3911834520dad13cd1f85ab944e1c09ae1aba55906be1f8192f558))
+$(eval $(call tar_download,IPROUTE2,iproute2,5.17.0,.tar.gz,https://www.kernel.org/pub/linux/utils/net/iproute2/,bda331d5c4606138892f23a565d78fca18919b4d508a0b7ca8391c2da2db68b9))
+$(eval $(call tar_download,IPTABLES,iptables,1.8.7,.tar.bz2,https://www.netfilter.org/projects/iptables/files/,c109c96bb04998cd44156622d36f8e04b140701ec60531a10668cfdff5e8d8f0))
+$(eval $(call tar_download,NMAP,nmap,7.92,.tgz,https://nmap.org/dist/,064183ea642dc4c12b1ab3b5358ce1cef7d2e7e11ffa2849f16d339f5b717117))
$(eval $(call tar_download,IPUTILS,iputils,s20190709,.tar.gz,https://github.com/iputils/iputils/archive/s20190709.tar.gz/#,a15720dd741d7538dd2645f9f516d193636ae4300ff7dbc8bfca757bf166490a))
-$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20200206,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,f5207248c6a3c3e3bfc9ab30b91c1897b00802ed861e1f9faaed873366078c64))
-
-KERNEL_BUILD_PATH := $(BUILD_PATH)/kernel$(if $(findstring yes,$(DEBUG_KERNEL)),-debug)
-rwildcard=$(foreach d,$(wildcard $1*),$(call rwildcard,$d/,$2) $(filter $(subst *,%,$2),$d))
-WIREGUARD_SOURCES := $(call rwildcard,$(KERNEL_PATH)/drivers/net/wireguard/,*)
-
-export CFLAGS ?= -O3 -pipe
-export LDFLAGS ?=
-export CPPFLAGS := -I$(BUILD_PATH)/include
+$(eval $(call tar_download,WIREGUARD_TOOLS,wireguard-tools,1.0.20210914,.tar.xz,https://git.zx2c4.com/wireguard-tools/snapshot/,97ff31489217bb265b7ae850d3d0f335ab07d2652ba1feec88b734bc96bd05ac))
+export CFLAGS := -O3 -pipe
ifeq ($(HOST_ARCH),$(ARCH))
-CROSS_COMPILE_FLAG := --host=$(CHOST)
CFLAGS += -march=native
-STRIP := strip
-else
-$(info Cross compilation: building for $(CBUILD) using $(CHOST))
-CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
-export CROSS_COMPILE=$(CBUILD)-
-STRIP := $(CBUILD)-strip
endif
+export LDFLAGS :=
+export CPPFLAGS :=
+
+QEMU_VPORT_RESULT :=
ifeq ($(ARCH),aarch64)
+CHOST := aarch64-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),aarch64_be)
+CHOST := aarch64_be-linux-musl
QEMU_ARCH := aarch64
KERNEL_ARCH := arm64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm64/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a53 -machine virt
-CFLAGS += -march=armv8-a -mtune=cortex-a53
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv8-a
endif
else ifeq ($(ARCH),arm)
+CHOST := arm-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mtune=cortex-a15 -mabi=aapcs-linux
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
endif
else ifeq ($(ARCH),armeb)
+CHOST := armeb-linux-musleabi
QEMU_ARCH := arm
KERNEL_ARCH := arm
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/arm/boot/zImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host -machine virt,gic_version=host,accel=kvm
else
-QEMU_MACHINE := -cpu cortex-a15 -machine virt
-CFLAGS += -march=armv7-a -mabi=aapcs-linux # We don't pass -mtune=cortex-a15 due to a compiler bug on big endian.
+QEMU_MACHINE := -cpu max -machine virt
+CFLAGS += -march=armv7-a -mabi=aapcs-linux
LDFLAGS += -Wl,--be8
endif
else ifeq ($(ARCH),x86_64)
+CHOST := x86_64-linux-musl
QEMU_ARCH := x86_64
KERNEL_ARCH := x86_64
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu Skylake-Server -machine q35
-CFLAGS += -march=skylake-avx512
+QEMU_MACHINE := -cpu max -machine microvm -no-acpi
endif
else ifeq ($(ARCH),i686)
+CHOST := i686-linux-musl
QEMU_ARCH := i386
KERNEL_ARCH := x86
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/x86/boot/bzImage
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(subst x86_64,i686,$(HOST_ARCH)),$(ARCH))
-QEMU_MACHINE := -cpu host -machine q35,accel=kvm
+QEMU_MACHINE := -cpu host -machine microvm,accel=kvm,pit=off,pic=off,rtc=off -no-acpi
else
-QEMU_MACHINE := -cpu coreduo -machine q35
-CFLAGS += -march=prescott
+QEMU_MACHINE := -cpu coreduo -machine microvm -no-acpi
endif
else ifeq ($(ARCH),mips64)
+CHOST := mips64-linux-musl
QEMU_ARCH := mips64
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -141,6 +137,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EB
endif
else ifeq ($(ARCH),mips64el)
+CHOST := mips64el-linux-musl
QEMU_ARCH := mips64el
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -152,6 +149,7 @@ QEMU_MACHINE := -cpu MIPS64R2-generic -machine malta -smp 1
CFLAGS += -march=mips64r2 -EL
endif
else ifeq ($(ARCH),mips)
+CHOST := mips-linux-musl
QEMU_ARCH := mips
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -163,6 +161,7 @@ QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EB
endif
else ifeq ($(ARCH),mipsel)
+CHOST := mipsel-linux-musl
QEMU_ARCH := mipsel
KERNEL_ARCH := mips
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
@@ -173,17 +172,28 @@ else
QEMU_MACHINE := -cpu 24Kf -machine malta -smp 1
CFLAGS += -march=mips32r2 -EL
endif
+else ifeq ($(ARCH),powerpc64)
+CHOST := powerpc64-linux-musl
+QEMU_ARCH := ppc64
+KERNEL_ARCH := powerpc
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
+else
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
+endif
else ifeq ($(ARCH),powerpc64le)
+CHOST := powerpc64le-linux-musl
QEMU_ARCH := ppc64
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
ifeq ($(HOST_ARCH),$(ARCH))
QEMU_MACHINE := -cpu host,accel=kvm -machine pseries
else
-QEMU_MACHINE := -machine pseries
+QEMU_MACHINE := -machine pseries -device spapr-rng,rng=rng -object rng-random,id=rng
endif
-CFLAGS += -mcpu=powerpc64le -mlong-double-64
else ifeq ($(ARCH),powerpc)
+CHOST := powerpc-linux-musl
QEMU_ARCH := ppc
KERNEL_ARCH := powerpc
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/powerpc/boot/uImage
@@ -192,29 +202,91 @@ QEMU_MACHINE := -cpu host,accel=kvm -machine ppce500
else
QEMU_MACHINE := -machine ppce500
endif
-CFLAGS += -mcpu=powerpc -mlong-double-64 -msecure-plt
else ifeq ($(ARCH),m68k)
+CHOST := m68k-linux-musl
QEMU_ARCH := m68k
KERNEL_ARCH := m68k
KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/m68k.config)
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt -append $(KERNEL_CMDLINE)
+else
+QEMU_MACHINE := -machine virt -smp 1 -append $(KERNEL_CMDLINE)
+endif
+else ifeq ($(ARCH),riscv64)
+CHOST := riscv64-linux-musl
+QEMU_ARCH := riscv64
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
+else
+QEMU_MACHINE := -cpu rv64 -machine virt
+endif
+else ifeq ($(ARCH),riscv32)
+CHOST := riscv32-linux-musl
+QEMU_ARCH := riscv32
+KERNEL_ARCH := riscv
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/riscv/boot/Image
+QEMU_VPORT_RESULT := virtio-serial-device
ifeq ($(HOST_ARCH),$(ARCH))
-QEMU_MACHINE := -cpu host,accel=kvm -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu host,accel=kvm -machine virt
else
-QEMU_MACHINE := -machine q800 -smp 1 -append $(KERNEL_CMDLINE)
+QEMU_MACHINE := -cpu rv32 -machine virt
endif
+else ifeq ($(ARCH),s390x)
+CHOST := s390x-linux-musl
+QEMU_ARCH := s390x
+KERNEL_ARCH := s390
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/arch/s390/boot/bzImage
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/s390x.config)
+QEMU_VPORT_RESULT := virtio-serial-ccw
+ifeq ($(HOST_ARCH),$(ARCH))
+QEMU_MACHINE := -cpu host,accel=kvm -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
else
-$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64le, powerpc, m68k)
+QEMU_MACHINE := -cpu max -machine s390-ccw-virtio -append $(KERNEL_CMDLINE)
+endif
+else ifeq ($(ARCH),um)
+CHOST := $(HOST_ARCH)-linux-musl
+KERNEL_BZIMAGE := $(KERNEL_BUILD_PATH)/vmlinux
+KERNEL_ARCH := um
+KERNEL_CMDLINE := $(shell sed -n 's/CONFIG_CMDLINE=\(.*\)/\1/p' arch/um.config)
+else
+$(error I only build: x86_64, i686, arm, armeb, aarch64, aarch64_be, mips, mipsel, mips64, mips64el, powerpc64, powerpc64le, powerpc, m68k, riscv64, riscv32, s390x, um)
+endif
+
+TOOLCHAIN_FILENAME := $(CHOST)-cross.tgz
+TOOLCHAIN_TAR := $(DISTFILES_PATH)/$(TOOLCHAIN_FILENAME)
+TOOLCHAIN_PATH := $(BUILD_PATH)/$(CHOST)-cross
+TOOLCHAIN_DIR := https://download.wireguard.com/qemu-test/toolchains/20211123/
+$(eval $(call file_download,toolchain-sha256sums-20211123,$(TOOLCHAIN_DIR)SHA256SUMS#,83da033fd8c798df476c21d9612da2dfb896ec62fbed4ceec5eefc0e56b3f0c8))
+$(eval $(call file_download,$(TOOLCHAIN_FILENAME),$(TOOLCHAIN_DIR),,$(DISTFILES_PATH)/toolchain-sha256sums-20211123))
+
+STRIP := $(CHOST)-strip
+CROSS_COMPILE_FLAG := --build=$(CBUILD) --host=$(CHOST)
+$(info Building for $(CHOST) using $(CBUILD))
+ifneq ($(ARCH),um)
+export CROSS_COMPILE := $(CHOST)-
+endif
+export PATH := $(TOOLCHAIN_PATH)/bin:$(PATH)
+export CC := $(CHOST)-gcc
+CCACHE_PATH := $(shell which ccache 2>/dev/null)
+ifneq ($(CCACHE_PATH),)
+export KBUILD_BUILD_TIMESTAMP := Fri Jun 5 15:58:00 CEST 2015
+export PATH := $(TOOLCHAIN_PATH)/bin/ccache:$(PATH)
+export CCACHE_SLOPPINESS := file_macro,time_macros
+export CCACHE_DIR ?= $(PWD)/ccache
endif
-REAL_CC := $(CBUILD)-gcc
-MUSL_CC := $(BUILD_PATH)/musl-gcc
-export CC := $(MUSL_CC)
-USERSPACE_DEPS := $(MUSL_CC) $(BUILD_PATH)/include/.installed $(BUILD_PATH)/include/linux/.installed
+USERSPACE_DEPS := $(TOOLCHAIN_PATH)/.installed $(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed
+comma := ,
build: $(KERNEL_BZIMAGE)
qemu: $(KERNEL_BZIMAGE)
rm -f $(BUILD_PATH)/result
+ifneq ($(ARCH),um)
timeout --foreground 20m qemu-system-$(QEMU_ARCH) \
-nodefaults \
-nographic \
@@ -222,13 +294,21 @@ qemu: $(KERNEL_BZIMAGE)
$(QEMU_MACHINE) \
-m $$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
-serial stdio \
- -serial file:$(BUILD_PATH)/result \
+ -chardev file,path=$(BUILD_PATH)/result,id=result \
+ $(if $(QEMU_VPORT_RESULT),-device $(QEMU_VPORT_RESULT) -device virtserialport$(comma)chardev=result,-serial chardev:result) \
-no-reboot \
-monitor none \
-kernel $<
+else
+ timeout --foreground 20m $< \
+ $(KERNEL_CMDLINE) \
+ mem=$$(grep -q CONFIG_DEBUG_KMEMLEAK=y $(KERNEL_BUILD_PATH)/.config && echo 1G || echo 256M) \
+ noreboot \
+ con1=fd:51 51>$(BUILD_PATH)/result </dev/null 2>&1 | cat
+endif
grep -Fq success $(BUILD_PATH)/result
-$(BUILD_PATH)/init-cpio-spec.txt:
+$(BUILD_PATH)/init-cpio-spec.txt: $(TOOLCHAIN_PATH)/.installed $(BUILD_PATH)/init
mkdir -p $(BUILD_PATH)
echo "file /init $(BUILD_PATH)/init 755 0 0" > $@
echo "file /init.sh $(PWD)/../netns.sh 755 0 0" >> $@
@@ -246,10 +326,10 @@ $(BUILD_PATH)/init-cpio-spec.txt:
echo "slink /bin/iptables xtables-legacy-multi 777 0 0" >> $@
echo "slink /bin/ping6 ping 777 0 0" >> $@
echo "dir /lib 755 0 0" >> $@
- echo "file /lib/libc.so $(MUSL_PATH)/lib/libc.so 755 0 0" >> $@
- echo "slink /lib/ld-linux.so.1 libc.so 777 0 0" >> $@
+ echo "file /lib/libc.so $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so 755 0 0" >> $@
+ echo "slink $$($(CHOST)-readelf -p .interp '$(BUILD_PATH)/init'| grep -o '/lib/.*') libc.so 777 0 0" >> $@
-$(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
+$(KERNEL_BUILD_PATH)/.config: $(TOOLCHAIN_PATH)/.installed kernel.config arch/$(ARCH).config
mkdir -p $(KERNEL_BUILD_PATH)
cp kernel.config $(KERNEL_BUILD_PATH)/minimal.config
printf 'CONFIG_NR_CPUS=$(NR_CPUS)\nCONFIG_INITRAMFS_SOURCE="$(BUILD_PATH)/init-cpio-spec.txt"\n' >> $(KERNEL_BUILD_PATH)/minimal.config
@@ -258,29 +338,27 @@ $(KERNEL_BUILD_PATH)/.config: kernel.config arch/$(ARCH).config
cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config $(KERNEL_BUILD_PATH)/minimal.config
$(if $(findstring yes,$(DEBUG_KERNEL)),cp debug.config $(KERNEL_BUILD_PATH) && cd $(KERNEL_BUILD_PATH) && ARCH=$(KERNEL_ARCH) $(KERNEL_PATH)/scripts/kconfig/merge_config.sh -n $(KERNEL_BUILD_PATH)/.config debug.config,)
-$(KERNEL_BZIMAGE): $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(MUSL_PATH)/lib/libc.so $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init ../netns.sh $(WIREGUARD_SOURCES)
+$(KERNEL_BZIMAGE): $(TOOLCHAIN_PATH)/.installed $(KERNEL_BUILD_PATH)/.config $(BUILD_PATH)/init-cpio-spec.txt $(IPERF_PATH)/src/iperf3 $(IPUTILS_PATH)/ping $(BASH_PATH)/bash $(IPROUTE2_PATH)/misc/ss $(IPROUTE2_PATH)/ip/ip $(IPTABLES_PATH)/iptables/xtables-legacy-multi $(NMAP_PATH)/ncat/ncat $(WIREGUARD_TOOLS_PATH)/src/wg $(BUILD_PATH)/init
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE)
+.PHONY: $(KERNEL_BZIMAGE)
-$(BUILD_PATH)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config
- $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+$(TOOLCHAIN_PATH)/$(CHOST)/include/linux/.installed: | $(KERNEL_BUILD_PATH)/.config $(TOOLCHAIN_PATH)/.installed
+ifneq ($(ARCH),um)
+ rm -rf $(TOOLCHAIN_PATH)/$(CHOST)/include/linux
+ $(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) INSTALL_HDR_PATH=$(TOOLCHAIN_PATH)/$(CHOST) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) headers_install
+endif
touch $@
-$(MUSL_PATH)/lib/libc.so: $(MUSL_TAR)
+$(TOOLCHAIN_PATH)/.installed: $(TOOLCHAIN_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- cd $(MUSL_PATH) && CC=$(REAL_CC) ./configure --prefix=/ --disable-static --build=$(CBUILD)
- $(MAKE) -C $(MUSL_PATH)
- $(STRIP) -s $@
-
-$(BUILD_PATH)/include/.installed: $(MUSL_PATH)/lib/libc.so
- $(MAKE) -C $(MUSL_PATH) DESTDIR=$(BUILD_PATH) install-headers
+ $(STRIP) -s $(TOOLCHAIN_PATH)/$(CHOST)/lib/libc.so
+ifneq ($(CCACHE_PATH),)
+ mkdir -p $(TOOLCHAIN_PATH)/bin/ccache
+ ln -s $(CCACHE_PATH) $(TOOLCHAIN_PATH)/bin/ccache/$(CC)
+endif
touch $@
-$(MUSL_CC): $(MUSL_PATH)/lib/libc.so
- sh $(MUSL_PATH)/tools/musl-gcc.specs.sh $(BUILD_PATH)/include $(MUSL_PATH)/lib /lib/ld-linux.so.1 > $(BUILD_PATH)/musl-gcc.specs
- printf '#!/bin/sh\nexec "$(REAL_CC)" --specs="$(BUILD_PATH)/musl-gcc.specs" "$$@"\n' > $(BUILD_PATH)/musl-gcc
- chmod +x $(BUILD_PATH)/musl-gcc
-
$(IPERF_PATH)/.installed: $(IPERF_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
@@ -289,6 +367,7 @@ $(IPERF_PATH)/.installed: $(IPERF_TAR)
touch $@
$(IPERF_PATH)/src/iperf3: | $(IPERF_PATH)/.installed $(USERSPACE_DEPS)
+ cd $(IPERF_PATH) && autoreconf -fi
cd $(IPERF_PATH) && CFLAGS="$(CFLAGS) -D_GNU_SOURCE" ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --enable-static --disable-shared --with-openssl=no
$(MAKE) -C $(IPERF_PATH)
$(STRIP) -s $@
@@ -304,7 +383,7 @@ $(WIREGUARD_TOOLS_PATH)/src/wg: | $(WIREGUARD_TOOLS_PATH)/.installed $(USERSPACE
$(BUILD_PATH)/init: init.c | $(USERSPACE_DEPS)
mkdir -p $(BUILD_PATH)
- $(MUSL_CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
+ $(CC) -o $@ $(CFLAGS) $(LDFLAGS) -std=gnu11 $<
$(STRIP) -s $@
$(IPUTILS_PATH)/.installed: $(IPUTILS_TAR)
@@ -323,15 +402,15 @@ $(BASH_PATH)/.installed: $(BASH_TAR)
touch $@
$(BASH_PATH)/bash: | $(BASH_PATH)/.installed $(USERSPACE_DEPS)
- cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-multibyte --disable-progcomp --disable-readline --disable-mem-scramble
+ cd $(BASH_PATH) && ./configure --prefix=/ $(CROSS_COMPILE_FLAG) --without-bash-malloc --disable-debugger --disable-help-builtin --disable-history --disable-progcomp --disable-readline --disable-mem-scramble
$(MAKE) -C $(BASH_PATH)
$(STRIP) -s $@
$(IPROUTE2_PATH)/.installed: $(IPROUTE2_TAR)
mkdir -p $(BUILD_PATH)
flock -s $<.lock tar -C $(BUILD_PATH) -xf $<
- printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS\n' > $(IPROUTE2_PATH)/config.mk
- printf 'lib: snapshot\n\t$$(MAKE) -C lib\nip/ip: lib\n\t$$(MAKE) -C ip ip\nmisc/ss: lib\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
+ printf 'CC:=$(CC)\nPKG_CONFIG:=pkg-config\nTC_CONFIG_XT:=n\nTC_CONFIG_ATM:=n\nTC_CONFIG_IPSET:=n\nIP_CONFIG_SETNS:=y\nHAVE_ELF:=n\nHAVE_MNL:=n\nHAVE_BERKELEY_DB:=n\nHAVE_LATEX:=n\nHAVE_PDFLATEX:=n\nCFLAGS+=-DHAVE_SETNS -DHAVE_HANDLE_AT\n' > $(IPROUTE2_PATH)/config.mk
+ printf 'libutil.a.done:\n\tflock -x $$@.lock $$(MAKE) -C lib\n\ttouch $$@\nip/ip: libutil.a.done\n\t$$(MAKE) -C ip ip\nmisc/ss: libutil.a.done\n\t$$(MAKE) -C misc ss\n' >> $(IPROUTE2_PATH)/Makefile
touch $@
$(IPROUTE2_PATH)/ip/ip: | $(IPROUTE2_PATH)/.installed $(USERSPACE_DEPS)
@@ -370,8 +449,13 @@ clean:
distclean: clean
rm -rf $(DISTFILES_PATH)
+cacheclean: clean
+ifneq ($(CCACHE_DIR),)
+ rm -rf $(CCACHE_DIR)
+endif
+
menuconfig: $(KERNEL_BUILD_PATH)/.config
$(MAKE) -C $(KERNEL_PATH) O=$(KERNEL_BUILD_PATH) ARCH=$(KERNEL_ARCH) CROSS_COMPILE=$(CROSS_COMPILE) menuconfig
-.PHONY: qemu build clean distclean menuconfig
+.PHONY: qemu build clean distclean cacheclean menuconfig
.DELETE_ON_ERROR:
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
index 3d063bb247bb..09016880ce03 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64.config
@@ -1,5 +1,8 @@
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
index dbdc7e406a7b..19ff66e4c602 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/aarch64_be.config
@@ -1,6 +1,9 @@
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/arm.config b/tools/testing/selftests/wireguard/qemu/arch/arm.config
index 148f49905418..0579c66be83e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/arm.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/arm.config
@@ -4,6 +4,10 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/armeb.config b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
index bd76b07d00a2..2a3307bbe534 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/armeb.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/armeb.config
@@ -4,7 +4,11 @@ CONFIG_ARCH_VIRT=y
CONFIG_THUMB2_KERNEL=n
CONFIG_SERIAL_AMBA_PL011=y
CONFIG_SERIAL_AMBA_PL011_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyAMA0 wg.success=ttyAMA1"
+CONFIG_CMDLINE="console=ttyAMA0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_CPU_BIG_ENDIAN=y
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/i686.config b/tools/testing/selftests/wireguard/qemu/arch/i686.config
index a85025d7206e..35b06502606f 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/i686.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/i686.config
@@ -1,5 +1,10 @@
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/m68k.config b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
index 62a15bdb877e..39c48cba56b7 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/m68k.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/m68k.config
@@ -1,9 +1,7 @@
CONFIG_MMU=y
+CONFIG_VIRT=y
CONFIG_M68KCLASSIC=y
-CONFIG_M68040=y
-CONFIG_MAC=y
-CONFIG_SERIAL_PMACZILOG=y
-CONFIG_SERIAL_PMACZILOG_TTYS=y
-CONFIG_SERIAL_PMACZILOG_CONSOLE=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
+CONFIG_CMDLINE="console=ttyGF0 wg.success=vport0p1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips.config b/tools/testing/selftests/wireguard/qemu/arch/mips.config
index df71d6b95546..2a84402353ab 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips.config
@@ -6,6 +6,7 @@ CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64.config b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
index 90c783f725c4..0994947e3392 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64.config
@@ -10,5 +10,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
index 435b0b43e00c..591184342f47 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mips64el.config
@@ -11,5 +11,5 @@ CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
index 62bb50c4a85f..56146a101e7e 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/mipsel.config
@@ -7,6 +7,7 @@ CONFIG_POWER_RESET=y
CONFIG_POWER_RESET_SYSCON=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
index 57957093b71b..174a9ffe2a36 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc.config
@@ -4,7 +4,8 @@ CONFIG_PPC_85xx=y
CONFIG_PHYS_64BIT=y
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_COMPAT_32BIT_TIME=y
CONFIG_MATH_EMULATION=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1 panic_on_warn=1"
CONFIG_FRAME_WARN=1024
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
new file mode 100644
index 000000000000..737194b7619e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64.config
@@ -0,0 +1,13 @@
+CONFIG_PPC64=y
+CONFIG_PPC_PSERIES=y
+CONFIG_ALTIVEC=y
+CONFIG_VSX=y
+CONFIG_PPC_OF_BOOT_TRAMPOLINE=y
+CONFIG_PPC_RADIX_MMU=y
+CONFIG_HVC_CONSOLE=y
+CONFIG_CPU_BIG_ENDIAN=y
+CONFIG_CMDLINE_BOOL=y
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
+CONFIG_SECTION_MISMATCH_WARN_ONLY=y
+CONFIG_FRAME_WARN=1280
+CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
index f52f1e2bc7f6..8148b9d1220a 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/powerpc64le.config
@@ -7,7 +7,7 @@ CONFIG_PPC_RADIX_MMU=y
CONFIG_HVC_CONSOLE=y
CONFIG_CPU_LITTLE_ENDIAN=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=hvc0 wg.success=hvc1"
+CONFIG_CMDLINE="console=hvc0 wg.success=hvc1 panic_on_warn=1"
CONFIG_SECTION_MISMATCH_WARN_ONLY=y
CONFIG_FRAME_WARN=1280
CONFIG_THREAD_SHIFT=14
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv32.config b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
new file mode 100644
index 000000000000..a7f8e8a95625
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv32.config
@@ -0,0 +1,14 @@
+CONFIG_NONPORTABLE=y
+CONFIG_ARCH_RV32I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_RISCV_ISA_FALLBACK=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/riscv64.config b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
new file mode 100644
index 000000000000..daeb3e5e0965
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/riscv64.config
@@ -0,0 +1,13 @@
+CONFIG_ARCH_RV64I=y
+CONFIG_MMU=y
+CONFIG_FPU=y
+CONFIG_SOC_VIRT=y
+CONFIG_RISCV_ISA_FALLBACK=y
+CONFIG_SERIAL_8250=y
+CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_SERIAL_OF_PLATFORM=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1"
+CONFIG_CMDLINE_FORCE=y
diff --git a/tools/testing/selftests/wireguard/qemu/arch/s390x.config b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
new file mode 100644
index 000000000000..a7b44dca0b0a
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/s390x.config
@@ -0,0 +1,6 @@
+CONFIG_SCLP_VT220_TTY=y
+CONFIG_SCLP_VT220_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_S390_GUEST=y
+CONFIG_CMDLINE="console=ttysclp0 wg.success=vport0p1 panic_on_warn=1"
diff --git a/tools/testing/selftests/wireguard/qemu/arch/um.config b/tools/testing/selftests/wireguard/qemu/arch/um.config
new file mode 100644
index 000000000000..c8b229e0810e
--- /dev/null
+++ b/tools/testing/selftests/wireguard/qemu/arch/um.config
@@ -0,0 +1,3 @@
+CONFIG_64BIT=y
+CONFIG_CMDLINE="wg.success=tty1 panic_on_warn=1"
+CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
index 00a1ef4869d5..cf2d1376d121 100644
--- a/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
+++ b/tools/testing/selftests/wireguard/qemu/arch/x86_64.config
@@ -1,5 +1,9 @@
CONFIG_SERIAL_8250=y
CONFIG_SERIAL_8250_CONSOLE=y
+CONFIG_VIRTIO_MENU=y
+CONFIG_VIRTIO_MMIO=y
+CONFIG_VIRTIO_CONSOLE=y
+CONFIG_VIRTIO_MMIO_CMDLINE_DEVICES=y
CONFIG_CMDLINE_BOOL=y
-CONFIG_CMDLINE="console=ttyS0 wg.success=ttyS1"
+CONFIG_CMDLINE="console=ttyS0 wg.success=vport0p1 panic_on_warn=1 reboot=t"
CONFIG_FRAME_WARN=1280
diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config
index b50c2085c1ac..9d172210e2c6 100644
--- a/tools/testing/selftests/wireguard/qemu/debug.config
+++ b/tools/testing/selftests/wireguard/qemu/debug.config
@@ -1,5 +1,4 @@
CONFIG_LOCALVERSION="-debug"
-CONFIG_ENABLE_MUST_CHECK=y
CONFIG_FRAME_POINTER=y
CONFIG_STACK_VALIDATION=y
CONFIG_DEBUG_KERNEL=y
@@ -19,15 +18,12 @@ CONFIG_DEBUG_VM=y
CONFIG_DEBUG_MEMORY_INIT=y
CONFIG_HAVE_DEBUG_STACKOVERFLOW=y
CONFIG_DEBUG_STACKOVERFLOW=y
-CONFIG_HAVE_ARCH_KMEMCHECK=y
CONFIG_HAVE_ARCH_KASAN=y
CONFIG_KASAN=y
CONFIG_KASAN_INLINE=y
CONFIG_UBSAN=y
CONFIG_UBSAN_SANITIZE_ALL=y
-CONFIG_UBSAN_NULL=y
CONFIG_DEBUG_KMEMLEAK=y
-CONFIG_DEBUG_KMEMLEAK_EARLY_LOG_SIZE=8192
CONFIG_DEBUG_STACK_USAGE=y
CONFIG_DEBUG_SHIRQ=y
CONFIG_WQ_WATCHDOG=y
@@ -36,7 +32,6 @@ CONFIG_SCHED_INFO=y
CONFIG_SCHEDSTATS=y
CONFIG_SCHED_STACK_END_CHECK=y
CONFIG_DEBUG_TIMEKEEPING=y
-CONFIG_TIMER_STATS=y
CONFIG_DEBUG_PREEMPT=y
CONFIG_DEBUG_RT_MUTEXES=y
CONFIG_DEBUG_SPINLOCK=y
@@ -48,9 +43,8 @@ CONFIG_DEBUG_ATOMIC_SLEEP=y
CONFIG_TRACE_IRQFLAGS=y
CONFIG_DEBUG_BUGVERBOSE=y
CONFIG_DEBUG_LIST=y
-CONFIG_DEBUG_PI_LIST=y
+CONFIG_DEBUG_PLIST=y
CONFIG_PROVE_RCU=y
-CONFIG_SPARSE_RCU_POINTER=y
CONFIG_RCU_CPU_STALL_TIMEOUT=21
CONFIG_RCU_TRACE=y
CONFIG_RCU_EQS_DEBUG=y
diff --git a/tools/testing/selftests/wireguard/qemu/init.c b/tools/testing/selftests/wireguard/qemu/init.c
index c9698120ac9d..3e49924dd77e 100644
--- a/tools/testing/selftests/wireguard/qemu/init.c
+++ b/tools/testing/selftests/wireguard/qemu/init.c
@@ -11,6 +11,7 @@
#include <stdlib.h>
#include <stdbool.h>
#include <fcntl.h>
+#include <time.h>
#include <sys/wait.h>
#include <sys/mount.h>
#include <sys/stat.h>
@@ -21,6 +22,7 @@
#include <sys/utsname.h>
#include <sys/sendfile.h>
#include <sys/sysmacros.h>
+#include <sys/random.h>
#include <linux/random.h>
#include <linux/version.h>
@@ -56,29 +58,28 @@ static void print_banner(void)
static void seed_rng(void)
{
- int fd;
- struct {
- int entropy_count;
- int buffer_size;
- unsigned char buffer[256];
- } entropy = {
- .entropy_count = sizeof(entropy.buffer) * 8,
- .buffer_size = sizeof(entropy.buffer),
- .buffer = "Adding real entropy is not actually important for these tests. Don't try this at home, kids!"
- };
+ int bits = 256, fd;
- if (mknod("/dev/urandom", S_IFCHR | 0644, makedev(1, 9)))
- panic("mknod(/dev/urandom)");
- fd = open("/dev/urandom", O_WRONLY);
+ if (!getrandom(NULL, 0, GRND_NONBLOCK))
+ return;
+ pretty_message("[+] Fake seeding RNG...");
+ fd = open("/dev/random", O_WRONLY);
if (fd < 0)
- panic("open(urandom)");
- for (int i = 0; i < 256; ++i) {
- if (ioctl(fd, RNDADDENTROPY, &entropy) < 0)
- panic("ioctl(urandom)");
- }
+ panic("open(random)");
+ if (ioctl(fd, RNDADDTOENTCNT, &bits) < 0)
+ panic("ioctl(RNDADDTOENTCNT)");
close(fd);
}
+static void set_time(void)
+{
+ if (time(NULL))
+ return;
+ pretty_message("[+] Setting fake time...");
+ if (stime(&(time_t){1433512680}) < 0)
+ panic("settimeofday()");
+}
+
static void mount_filesystems(void)
{
pretty_message("[+] Mounting filesystems...");
@@ -122,12 +123,6 @@ static void enable_logging(void)
panic("write(exception-trace)");
close(fd);
}
- fd = open("/proc/sys/kernel/panic_on_warn", O_WRONLY);
- if (fd >= 0) {
- if (write(fd, "1\n", 2) != 2)
- panic("write(panic_on_warn)");
- close(fd);
- }
}
static void kmod_selftests(void)
@@ -270,10 +265,11 @@ static void check_leaks(void)
int main(int argc, char *argv[])
{
- seed_rng();
ensure_console();
print_banner();
mount_filesystems();
+ seed_rng();
+ set_time();
kmod_selftests();
enable_logging();
clear_leaks();
diff --git a/tools/testing/selftests/wireguard/qemu/kernel.config b/tools/testing/selftests/wireguard/qemu/kernel.config
index d531de13c95b..507555714b1d 100644
--- a/tools/testing/selftests/wireguard/qemu/kernel.config
+++ b/tools/testing/selftests/wireguard/qemu/kernel.config
@@ -18,10 +18,10 @@ CONFIG_NF_NAT=y
CONFIG_NETFILTER_XTABLES=y
CONFIG_NETFILTER_XT_NAT=y
CONFIG_NETFILTER_XT_MATCH_LENGTH=y
-CONFIG_NF_CONNTRACK_IPV4=y
-CONFIG_NF_NAT_IPV4=y
+CONFIG_NETFILTER_XT_MARK=y
CONFIG_IP_NF_IPTABLES=y
CONFIG_IP_NF_FILTER=y
+CONFIG_IP_NF_MANGLE=y
CONFIG_IP_NF_NAT=y
CONFIG_IP_ADVANCED_ROUTER=y
CONFIG_IP_MULTIPLE_TABLES=y
@@ -30,6 +30,7 @@ CONFIG_TTY=y
CONFIG_BINFMT_ELF=y
CONFIG_BINFMT_SCRIPT=y
CONFIG_VDSO=y
+CONFIG_STRICT_KERNEL_RWX=y
CONFIG_VIRTUALIZATION=y
CONFIG_HYPERVISOR_GUEST=y
CONFIG_PARAVIRT=y
@@ -40,7 +41,6 @@ CONFIG_KALLSYMS=y
CONFIG_BUG=y
CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE=y
CONFIG_JUMP_LABEL=y
-CONFIG_EMBEDDED=n
CONFIG_BASE_FULL=y
CONFIG_FUTEX=y
CONFIG_SHMEM=y
@@ -56,7 +56,6 @@ CONFIG_NO_HZ_IDLE=y
CONFIG_NO_HZ_FULL=n
CONFIG_HZ_PERIODIC=n
CONFIG_HIGH_RES_TIMERS=y
-CONFIG_ARCH_RANDOM=y
CONFIG_FILE_LOCKING=y
CONFIG_POSIX_TIMERS=y
CONFIG_DEVTMPFS=y
@@ -65,6 +64,7 @@ CONFIG_PROC_SYSCTL=y
CONFIG_SYSFS=y
CONFIG_TMPFS=y
CONFIG_CONSOLE_LOGLEVEL_DEFAULT=15
+CONFIG_LOG_BUF_SHIFT=18
CONFIG_PRINTK_TIME=y
CONFIG_BLK_DEV_INITRD=y
CONFIG_LEGACY_VSYSCALL_NONE=y
diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile
index 5f16821c7f63..0b872c0a42d2 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -6,18 +6,20 @@ include ../lib.mk
.PHONY: all all_32 all_64 warn_32bit_failure clean
UNAME_M := $(shell uname -m)
-CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32)
-CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c)
-CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh $(CC) trivial_program.c -no-pie)
+CAN_BUILD_I386 := $(shell ./check_cc.sh "$(CC)" trivial_32bit_program.c -m32)
+CAN_BUILD_X86_64 := $(shell ./check_cc.sh "$(CC)" trivial_64bit_program.c)
+CAN_BUILD_WITH_NOPIE := $(shell ./check_cc.sh "$(CC)" trivial_program.c -no-pie)
TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt test_mremap_vdso \
check_initial_reg_state sigreturn iopl ioperm \
- test_vdso test_vsyscall mov_ss_trap \
- syscall_arg_fault
+ test_vsyscall mov_ss_trap \
+ syscall_arg_fault fsgsbase_restore sigaltstack
+TARGETS_C_BOTHBITS += nx_stack
TARGETS_C_32BIT_ONLY := entry_from_vm86 test_syscall_vdso unwind_vdso \
test_FCMOV test_FCOMI test_FISTTP \
vdso_restorer
-TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering
+TARGETS_C_64BIT_ONLY := fsgsbase sysret_rip syscall_numbering \
+ corrupt_xstate_header amx lam test_shadow_stack
# Some selftests require 32bit support enabled also on 64bit systems
TARGETS_C_32BIT_NEEDED := ldt_gdt ptrace_syscall
@@ -33,7 +35,7 @@ BINARIES_64 := $(TARGETS_C_64BIT_ALL:%=%_64)
BINARIES_32 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_32))
BINARIES_64 := $(patsubst %,$(OUTPUT)/%,$(BINARIES_64))
-CFLAGS := -O2 -g -std=gnu99 -pthread -Wall
+CFLAGS := -O2 -g -std=gnu99 -pthread -Wall $(KHDR_INCLUDES)
# call32_from_64 in thunks.S uses absolute addresses.
ifeq ($(CAN_BUILD_WITH_NOPIE),1)
@@ -70,10 +72,10 @@ all_64: $(BINARIES_64)
EXTRA_CLEAN := $(BINARIES_32) $(BINARIES_64)
-$(BINARIES_32): $(OUTPUT)/%_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c helpers.h
$(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl -lm
-$(BINARIES_64): $(OUTPUT)/%_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c helpers.h
$(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl
# x86_64 users should be encouraged to install 32-bit libraries
@@ -91,6 +93,10 @@ warn_32bit_failure:
echo "If you are using a Fedora-like distribution, try:"; \
echo ""; \
echo " yum install glibc-devel.*i686"; \
+ echo ""; \
+ echo "If you are using a SUSE-like distribution, try:"; \
+ echo ""; \
+ echo " zypper install gcc-32bit glibc-devel-static-32bit"; \
exit 0;
endif
@@ -104,3 +110,6 @@ $(OUTPUT)/test_syscall_vdso_32: thunks_32.S
# state.
$(OUTPUT)/check_initial_reg_state_32: CFLAGS += -Wl,-ereal_start -static
$(OUTPUT)/check_initial_reg_state_64: CFLAGS += -Wl,-ereal_start -static
+
+$(OUTPUT)/nx_stack_32: CFLAGS += -Wl,-z,noexecstack
+$(OUTPUT)/nx_stack_64: CFLAGS += -Wl,-z,noexecstack
diff --git a/tools/testing/selftests/x86/amx.c b/tools/testing/selftests/x86/amx.c
new file mode 100644
index 000000000000..d884fd69dd51
--- /dev/null
+++ b/tools/testing/selftests/x86/amx.c
@@ -0,0 +1,955 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <err.h>
+#include <errno.h>
+#include <pthread.h>
+#include <setjmp.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <unistd.h>
+#include <x86intrin.h>
+
+#include <sys/auxv.h>
+#include <sys/mman.h>
+#include <sys/shm.h>
+#include <sys/ptrace.h>
+#include <sys/syscall.h>
+#include <sys/wait.h>
+#include <sys/uio.h>
+
+#include "../kselftest.h" /* For __cpuid_count() */
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+#define XSAVE_HDR_OFFSET 512
+#define XSAVE_HDR_SIZE 64
+
+struct xsave_buffer {
+ union {
+ struct {
+ char legacy[XSAVE_HDR_OFFSET];
+ char header[XSAVE_HDR_SIZE];
+ char extended[0];
+ };
+ char bytes[0];
+ };
+};
+
+static inline uint64_t xgetbv(uint32_t index)
+{
+ uint32_t eax, edx;
+
+ asm volatile("xgetbv;"
+ : "=a" (eax), "=d" (edx)
+ : "c" (index));
+ return eax + ((uint64_t)edx << 32);
+}
+
+static inline void xsave(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xsave (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi)
+ : "memory");
+}
+
+static inline void xrstor(struct xsave_buffer *xbuf, uint64_t rfbm)
+{
+ uint32_t rfbm_lo = rfbm;
+ uint32_t rfbm_hi = rfbm >> 32;
+
+ asm volatile("xrstor (%%rdi)"
+ : : "D" (xbuf), "a" (rfbm_lo), "d" (rfbm_hi));
+}
+
+/* err() exits and will not return */
+#define fatal_error(msg, ...) err(1, "[FAIL]\t" msg, ##__VA_ARGS__)
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ fatal_error("sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ fatal_error("sigaction");
+}
+
+#define XFEATURE_XTILECFG 17
+#define XFEATURE_XTILEDATA 18
+#define XFEATURE_MASK_XTILECFG (1 << XFEATURE_XTILECFG)
+#define XFEATURE_MASK_XTILEDATA (1 << XFEATURE_XTILEDATA)
+#define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILECFG | XFEATURE_MASK_XTILEDATA)
+
+#define CPUID_LEAF1_ECX_XSAVE_MASK (1 << 26)
+#define CPUID_LEAF1_ECX_OSXSAVE_MASK (1 << 27)
+static inline void check_cpuid_xsave(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ /*
+ * CPUID.1:ECX.XSAVE[bit 26] enumerates general
+ * support for the XSAVE feature set, including
+ * XGETBV.
+ */
+ __cpuid_count(1, 0, eax, ebx, ecx, edx);
+ if (!(ecx & CPUID_LEAF1_ECX_XSAVE_MASK))
+ fatal_error("cpuid: no CPU xsave support");
+ if (!(ecx & CPUID_LEAF1_ECX_OSXSAVE_MASK))
+ fatal_error("cpuid: no OS xsave support");
+}
+
+static uint32_t xbuf_size;
+
+static struct {
+ uint32_t xbuf_offset;
+ uint32_t size;
+} xtiledata;
+
+#define CPUID_LEAF_XSTATE 0xd
+#define CPUID_SUBLEAF_XSTATE_USER 0x0
+#define TILE_CPUID 0x1d
+#define TILE_PALETTE_ID 0x1
+
+static void check_cpuid_xtiledata(void)
+{
+ uint32_t eax, ebx, ecx, edx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, CPUID_SUBLEAF_XSTATE_USER,
+ eax, ebx, ecx, edx);
+
+ /*
+ * EBX enumerates the size (in bytes) required by the XSAVE
+ * instruction for an XSAVE area containing all the user state
+ * components corresponding to bits currently set in XCR0.
+ *
+ * Stash that off so it can be used to allocate buffers later.
+ */
+ xbuf_size = ebx;
+
+ __cpuid_count(CPUID_LEAF_XSTATE, XFEATURE_XTILEDATA,
+ eax, ebx, ecx, edx);
+ /*
+ * eax: XTILEDATA state component size
+ * ebx: XTILEDATA state component offset in user buffer
+ */
+ if (!eax || !ebx)
+ fatal_error("xstate cpuid: invalid tile data size/offset: %d/%d",
+ eax, ebx);
+
+ xtiledata.size = eax;
+ xtiledata.xbuf_offset = ebx;
+}
+
+/* The helpers for managing XSAVE buffer and tile states: */
+
+struct xsave_buffer *alloc_xbuf(void)
+{
+ struct xsave_buffer *xbuf;
+
+ /* XSAVE buffer should be 64B-aligned. */
+ xbuf = aligned_alloc(64, xbuf_size);
+ if (!xbuf)
+ fatal_error("aligned_alloc()");
+ return xbuf;
+}
+
+static inline void clear_xstate_header(struct xsave_buffer *buffer)
+{
+ memset(&buffer->header, 0, sizeof(buffer->header));
+}
+
+static inline uint64_t get_xstatebv(struct xsave_buffer *buffer)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ return *(uint64_t *)&buffer->header;
+}
+
+static inline void set_xstatebv(struct xsave_buffer *buffer, uint64_t bv)
+{
+ /* XSTATE_BV is at the beginning of the header: */
+ *(uint64_t *)(&buffer->header) = bv;
+}
+
+static void set_rand_tiledata(struct xsave_buffer *xbuf)
+{
+ int *ptr = (int *)&xbuf->bytes[xtiledata.xbuf_offset];
+ int data;
+ int i;
+
+ /*
+ * Ensure that 'data' is never 0. This ensures that
+ * the registers are never in their initial configuration
+ * and thus never tracked as being in the init state.
+ */
+ data = rand() | 1;
+
+ for (i = 0; i < xtiledata.size / sizeof(int); i++, ptr++)
+ *ptr = data;
+}
+
+struct xsave_buffer *stashed_xsave;
+
+static void init_stashed_xsave(void)
+{
+ stashed_xsave = alloc_xbuf();
+ if (!stashed_xsave)
+ fatal_error("failed to allocate stashed_xsave\n");
+ clear_xstate_header(stashed_xsave);
+}
+
+static void free_stashed_xsave(void)
+{
+ free(stashed_xsave);
+}
+
+/* See 'struct _fpx_sw_bytes' at sigcontext.h */
+#define SW_BYTES_OFFSET 464
+/* N.B. The struct's field name varies so read from the offset. */
+#define SW_BYTES_BV_OFFSET (SW_BYTES_OFFSET + 8)
+
+static inline struct _fpx_sw_bytes *get_fpx_sw_bytes(void *buffer)
+{
+ return (struct _fpx_sw_bytes *)(buffer + SW_BYTES_OFFSET);
+}
+
+static inline uint64_t get_fpx_sw_bytes_features(void *buffer)
+{
+ return *(uint64_t *)(buffer + SW_BYTES_BV_OFFSET);
+}
+
+/* Work around printf() being unsafe in signals: */
+#define SIGNAL_BUF_LEN 1000
+char signal_message_buffer[SIGNAL_BUF_LEN];
+void sig_print(char *msg)
+{
+ int left = SIGNAL_BUF_LEN - strlen(signal_message_buffer) - 1;
+
+ strncat(signal_message_buffer, msg, left);
+}
+
+static volatile bool noperm_signaled;
+static int noperm_errs;
+/*
+ * Signal handler for when AMX is used but
+ * permission has not been obtained.
+ */
+static void handle_noperm(int sig, siginfo_t *si, void *ctx_void)
+{
+ ucontext_t *ctx = (ucontext_t *)ctx_void;
+ void *xbuf = ctx->uc_mcontext.fpregs;
+ struct _fpx_sw_bytes *sw_bytes;
+ uint64_t features;
+
+ /* Reset the signal message buffer: */
+ signal_message_buffer[0] = '\0';
+ sig_print("\tAt SIGILL handler,\n");
+
+ if (si->si_code != ILL_ILLOPC) {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid signal code.\n");
+ } else {
+ sig_print("[OK]\tValid signal code (ILL_ILLOPC).\n");
+ }
+
+ sw_bytes = get_fpx_sw_bytes(xbuf);
+ /*
+ * Without permission, the signal XSAVE buffer should not
+ * have room for AMX register state (aka. xtiledata).
+ * Check that the size does not overlap with where xtiledata
+ * will reside.
+ *
+ * This also implies that no state components *PAST*
+ * XTILEDATA (features >=19) can be present in the buffer.
+ */
+ if (sw_bytes->xstate_size <= xtiledata.xbuf_offset) {
+ sig_print("[OK]\tValid xstate size\n");
+ } else {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid xstate size\n");
+ }
+
+ features = get_fpx_sw_bytes_features(xbuf);
+ /*
+ * Without permission, the XTILEDATA feature
+ * bit should not be set.
+ */
+ if ((features & XFEATURE_MASK_XTILEDATA) == 0) {
+ sig_print("[OK]\tValid xstate mask\n");
+ } else {
+ noperm_errs++;
+ sig_print("[FAIL]\tInvalid xstate mask\n");
+ }
+
+ noperm_signaled = true;
+ ctx->uc_mcontext.gregs[REG_RIP] += 3; /* Skip the faulting XRSTOR */
+}
+
+/* Return true if XRSTOR is successful; otherwise, false. */
+static inline bool xrstor_safe(struct xsave_buffer *xbuf, uint64_t mask)
+{
+ noperm_signaled = false;
+ xrstor(xbuf, mask);
+
+ /* Print any messages produced by the signal code: */
+ printf("%s", signal_message_buffer);
+ /*
+ * Reset the buffer to make sure any future printing
+ * only outputs new messages:
+ */
+ signal_message_buffer[0] = '\0';
+
+ if (noperm_errs)
+ fatal_error("saw %d errors in noperm signal handler\n", noperm_errs);
+
+ return !noperm_signaled;
+}
+
+/*
+ * Use XRSTOR to populate the XTILEDATA registers with
+ * random data.
+ *
+ * Return true if successful; otherwise, false.
+ */
+static inline bool load_rand_tiledata(struct xsave_buffer *xbuf)
+{
+ clear_xstate_header(xbuf);
+ set_xstatebv(xbuf, XFEATURE_MASK_XTILEDATA);
+ set_rand_tiledata(xbuf);
+ return xrstor_safe(xbuf, XFEATURE_MASK_XTILEDATA);
+}
+
+/* Return XTILEDATA to its initial configuration. */
+static inline void init_xtiledata(void)
+{
+ clear_xstate_header(stashed_xsave);
+ xrstor_safe(stashed_xsave, XFEATURE_MASK_XTILEDATA);
+}
+
+enum expected_result { FAIL_EXPECTED, SUCCESS_EXPECTED };
+
+/* arch_prctl() and sigaltstack() test */
+
+#define ARCH_GET_XCOMP_PERM 0x1022
+#define ARCH_REQ_XCOMP_PERM 0x1023
+
+static void req_xtiledata_perm(void)
+{
+ syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+}
+
+static void validate_req_xcomp_perm(enum expected_result exp)
+{
+ unsigned long bitmask, expected_bitmask;
+ long rc;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+ if (rc) {
+ fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+ } else if (!(bitmask & XFEATURE_MASK_XTILECFG)) {
+ fatal_error("ARCH_GET_XCOMP_PERM returns XFEATURE_XTILECFG off.");
+ }
+
+ rc = syscall(SYS_arch_prctl, ARCH_REQ_XCOMP_PERM, XFEATURE_XTILEDATA);
+ if (exp == FAIL_EXPECTED) {
+ if (rc) {
+ printf("[OK]\tARCH_REQ_XCOMP_PERM saw expected failure..\n");
+ return;
+ }
+
+ fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected success.\n");
+ } else if (rc) {
+ fatal_error("ARCH_REQ_XCOMP_PERM saw unexpected failure.\n");
+ }
+
+ expected_bitmask = bitmask | XFEATURE_MASK_XTILEDATA;
+
+ rc = syscall(SYS_arch_prctl, ARCH_GET_XCOMP_PERM, &bitmask);
+ if (rc) {
+ fatal_error("prctl(ARCH_GET_XCOMP_PERM) error: %ld", rc);
+ } else if (bitmask != expected_bitmask) {
+ fatal_error("ARCH_REQ_XCOMP_PERM set a wrong bitmask: %lx, expected: %lx.\n",
+ bitmask, expected_bitmask);
+ } else {
+ printf("\tARCH_REQ_XCOMP_PERM is successful.\n");
+ }
+}
+
+static void validate_xcomp_perm(enum expected_result exp)
+{
+ bool load_success = load_rand_tiledata(stashed_xsave);
+
+ if (exp == FAIL_EXPECTED) {
+ if (load_success) {
+ noperm_errs++;
+ printf("[FAIL]\tLoad tiledata succeeded.\n");
+ } else {
+ printf("[OK]\tLoad tiledata failed.\n");
+ }
+ } else if (exp == SUCCESS_EXPECTED) {
+ if (load_success) {
+ printf("[OK]\tLoad tiledata succeeded.\n");
+ } else {
+ noperm_errs++;
+ printf("[FAIL]\tLoad tiledata failed.\n");
+ }
+ }
+}
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static void *alloc_altstack(unsigned int size)
+{
+ void *altstack;
+
+ altstack = mmap(NULL, size, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+
+ if (altstack == MAP_FAILED)
+ fatal_error("mmap() for altstack");
+
+ return altstack;
+}
+
+static void setup_altstack(void *addr, unsigned long size, enum expected_result exp)
+{
+ stack_t ss;
+ int rc;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = addr;
+
+ rc = sigaltstack(&ss, NULL);
+
+ if (exp == FAIL_EXPECTED) {
+ if (rc) {
+ printf("[OK]\tsigaltstack() failed.\n");
+ } else {
+ fatal_error("sigaltstack() succeeded unexpectedly.\n");
+ }
+ } else if (rc) {
+ fatal_error("sigaltstack()");
+ }
+}
+
+static void test_dynamic_sigaltstack(void)
+{
+ unsigned int small_size, enough_size;
+ unsigned long minsigstksz;
+ void *altstack;
+
+ minsigstksz = getauxval(AT_MINSIGSTKSZ);
+ printf("\tAT_MINSIGSTKSZ = %lu\n", minsigstksz);
+ /*
+ * getauxval() itself can return 0 for failure or
+ * success. But, in this case, AT_MINSIGSTKSZ
+ * will always return a >=0 value if implemented.
+ * Just check for 0.
+ */
+ if (minsigstksz == 0) {
+ printf("no support for AT_MINSIGSTKSZ, skipping sigaltstack tests\n");
+ return;
+ }
+
+ enough_size = minsigstksz * 2;
+
+ altstack = alloc_altstack(enough_size);
+ printf("\tAllocate memory for altstack (%u bytes).\n", enough_size);
+
+ /*
+ * Try setup_altstack() with a size which can not fit
+ * XTILEDATA. ARCH_REQ_XCOMP_PERM should fail.
+ */
+ small_size = minsigstksz - xtiledata.size;
+ printf("\tAfter sigaltstack() with small size (%u bytes).\n", small_size);
+ setup_altstack(altstack, small_size, SUCCESS_EXPECTED);
+ validate_req_xcomp_perm(FAIL_EXPECTED);
+
+ /*
+ * Try setup_altstack() with a size derived from
+ * AT_MINSIGSTKSZ. It should be more than large enough
+ * and thus ARCH_REQ_XCOMP_PERM should succeed.
+ */
+ printf("\tAfter sigaltstack() with enough size (%u bytes).\n", enough_size);
+ setup_altstack(altstack, enough_size, SUCCESS_EXPECTED);
+ validate_req_xcomp_perm(SUCCESS_EXPECTED);
+
+ /*
+ * Try to coerce setup_altstack() to again accept a
+ * too-small altstack. This ensures that big-enough
+ * sigaltstacks can not shrink to a too-small value
+ * once XTILEDATA permission is established.
+ */
+ printf("\tThen, sigaltstack() with small size (%u bytes).\n", small_size);
+ setup_altstack(altstack, small_size, FAIL_EXPECTED);
+}
+
+static void test_dynamic_state(void)
+{
+ pid_t parent, child, grandchild;
+
+ parent = fork();
+ if (parent < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (parent > 0) {
+ int status;
+ /* fork() succeeded. Now in the parent. */
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("arch_prctl test parent exit");
+ return;
+ }
+ /* fork() succeeded. Now in the child . */
+
+ printf("[RUN]\tCheck ARCH_REQ_XCOMP_PERM around process fork() and sigaltack() test.\n");
+
+ printf("\tFork a child.\n");
+ child = fork();
+ if (child < 0) {
+ fatal_error("fork");
+ } else if (child > 0) {
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("arch_prctl test child exit");
+ _exit(0);
+ }
+
+ /*
+ * The permission request should fail without an
+ * XTILEDATA-compatible signal stack
+ */
+ printf("\tTest XCOMP_PERM at child.\n");
+ validate_xcomp_perm(FAIL_EXPECTED);
+
+ /*
+ * Set up an XTILEDATA-compatible signal stack and
+ * also obtain permission to populate XTILEDATA.
+ */
+ printf("\tTest dynamic sigaltstack at child:\n");
+ test_dynamic_sigaltstack();
+
+ /* Ensure that XTILEDATA can be populated. */
+ printf("\tTest XCOMP_PERM again at child.\n");
+ validate_xcomp_perm(SUCCESS_EXPECTED);
+
+ printf("\tFork a grandchild.\n");
+ grandchild = fork();
+ if (grandchild < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (!grandchild) {
+ /* fork() succeeded. Now in the (grand)child. */
+ printf("\tTest XCOMP_PERM at grandchild.\n");
+
+ /*
+ * Ensure that the grandchild inherited
+ * permission and a compatible sigaltstack:
+ */
+ validate_xcomp_perm(SUCCESS_EXPECTED);
+ } else {
+ int status;
+ /* fork() succeeded. Now in the parent. */
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test grandchild");
+ }
+
+ _exit(0);
+}
+
+static inline int __compare_tiledata_state(struct xsave_buffer *xbuf1, struct xsave_buffer *xbuf2)
+{
+ return memcmp(&xbuf1->bytes[xtiledata.xbuf_offset],
+ &xbuf2->bytes[xtiledata.xbuf_offset],
+ xtiledata.size);
+}
+
+/*
+ * Save current register state and compare it to @xbuf1.'
+ *
+ * Returns false if @xbuf1 matches the registers.
+ * Returns true if @xbuf1 differs from the registers.
+ */
+static inline bool __validate_tiledata_regs(struct xsave_buffer *xbuf1)
+{
+ struct xsave_buffer *xbuf2;
+ int ret;
+
+ xbuf2 = alloc_xbuf();
+ if (!xbuf2)
+ fatal_error("failed to allocate XSAVE buffer\n");
+
+ xsave(xbuf2, XFEATURE_MASK_XTILEDATA);
+ ret = __compare_tiledata_state(xbuf1, xbuf2);
+
+ free(xbuf2);
+
+ if (ret == 0)
+ return false;
+ return true;
+}
+
+static inline void validate_tiledata_regs_same(struct xsave_buffer *xbuf)
+{
+ int ret = __validate_tiledata_regs(xbuf);
+
+ if (ret != 0)
+ fatal_error("TILEDATA registers changed");
+}
+
+static inline void validate_tiledata_regs_changed(struct xsave_buffer *xbuf)
+{
+ int ret = __validate_tiledata_regs(xbuf);
+
+ if (ret == 0)
+ fatal_error("TILEDATA registers did not change");
+}
+
+/* tiledata inheritance test */
+
+static void test_fork(void)
+{
+ pid_t child, grandchild;
+
+ child = fork();
+ if (child < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (child > 0) {
+ /* fork() succeeded. Now in the parent. */
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test child");
+ return;
+ }
+ /* fork() succeeded. Now in the child. */
+ printf("[RUN]\tCheck tile data inheritance.\n\tBefore fork(), load tiledata\n");
+
+ load_rand_tiledata(stashed_xsave);
+
+ grandchild = fork();
+ if (grandchild < 0) {
+ /* fork() failed */
+ fatal_error("fork");
+ } else if (grandchild > 0) {
+ /* fork() succeeded. Still in the first child. */
+ int status;
+
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ fatal_error("fork test grand child");
+ _exit(0);
+ }
+ /* fork() succeeded. Now in the (grand)child. */
+
+ /*
+ * TILEDATA registers are not preserved across fork().
+ * Ensure that their value has changed:
+ */
+ validate_tiledata_regs_changed(stashed_xsave);
+
+ _exit(0);
+}
+
+/* Context switching test */
+
+static struct _ctxtswtest_cfg {
+ unsigned int iterations;
+ unsigned int num_threads;
+} ctxtswtest_config;
+
+struct futex_info {
+ pthread_t thread;
+ int nr;
+ pthread_mutex_t mutex;
+ struct futex_info *next;
+};
+
+static void *check_tiledata(void *info)
+{
+ struct futex_info *finfo = (struct futex_info *)info;
+ struct xsave_buffer *xbuf;
+ int i;
+
+ xbuf = alloc_xbuf();
+ if (!xbuf)
+ fatal_error("unable to allocate XSAVE buffer");
+
+ /*
+ * Load random data into 'xbuf' and then restore
+ * it to the tile registers themselves.
+ */
+ load_rand_tiledata(xbuf);
+ for (i = 0; i < ctxtswtest_config.iterations; i++) {
+ pthread_mutex_lock(&finfo->mutex);
+
+ /*
+ * Ensure the register values have not
+ * diverged from those recorded in 'xbuf'.
+ */
+ validate_tiledata_regs_same(xbuf);
+
+ /* Load new, random values into xbuf and registers */
+ load_rand_tiledata(xbuf);
+
+ /*
+ * The last thread's last unlock will be for
+ * thread 0's mutex. However, thread 0 will
+ * have already exited the loop and the mutex
+ * will already be unlocked.
+ *
+ * Because this is not an ERRORCHECK mutex,
+ * that inconsistency will be silently ignored.
+ */
+ pthread_mutex_unlock(&finfo->next->mutex);
+ }
+
+ free(xbuf);
+ /*
+ * Return this thread's finfo, which is
+ * a unique value for this thread.
+ */
+ return finfo;
+}
+
+static int create_threads(int num, struct futex_info *finfo)
+{
+ int i;
+
+ for (i = 0; i < num; i++) {
+ int next_nr;
+
+ finfo[i].nr = i;
+ /*
+ * Thread 'i' will wait on this mutex to
+ * be unlocked. Lock it immediately after
+ * initialization:
+ */
+ pthread_mutex_init(&finfo[i].mutex, NULL);
+ pthread_mutex_lock(&finfo[i].mutex);
+
+ next_nr = (i + 1) % num;
+ finfo[i].next = &finfo[next_nr];
+
+ if (pthread_create(&finfo[i].thread, NULL, check_tiledata, &finfo[i]))
+ fatal_error("pthread_create()");
+ }
+ return 0;
+}
+
+static void affinitize_cpu0(void)
+{
+ cpu_set_t cpuset;
+
+ CPU_ZERO(&cpuset);
+ CPU_SET(0, &cpuset);
+
+ if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0)
+ fatal_error("sched_setaffinity to CPU 0");
+}
+
+static void test_context_switch(void)
+{
+ struct futex_info *finfo;
+ int i;
+
+ /* Affinitize to one CPU to force context switches */
+ affinitize_cpu0();
+
+ req_xtiledata_perm();
+
+ printf("[RUN]\tCheck tiledata context switches, %d iterations, %d threads.\n",
+ ctxtswtest_config.iterations,
+ ctxtswtest_config.num_threads);
+
+
+ finfo = malloc(sizeof(*finfo) * ctxtswtest_config.num_threads);
+ if (!finfo)
+ fatal_error("malloc()");
+
+ create_threads(ctxtswtest_config.num_threads, finfo);
+
+ /*
+ * This thread wakes up thread 0
+ * Thread 0 will wake up 1
+ * Thread 1 will wake up 2
+ * ...
+ * the last thread will wake up 0
+ *
+ * ... this will repeat for the configured
+ * number of iterations.
+ */
+ pthread_mutex_unlock(&finfo[0].mutex);
+
+ /* Wait for all the threads to finish: */
+ for (i = 0; i < ctxtswtest_config.num_threads; i++) {
+ void *thread_retval;
+ int rc;
+
+ rc = pthread_join(finfo[i].thread, &thread_retval);
+
+ if (rc)
+ fatal_error("pthread_join() failed for thread %d err: %d\n",
+ i, rc);
+
+ if (thread_retval != &finfo[i])
+ fatal_error("unexpected thread retval for thread %d: %p\n",
+ i, thread_retval);
+
+ }
+
+ printf("[OK]\tNo incorrect case was found.\n");
+
+ free(finfo);
+}
+
+/* Ptrace test */
+
+/*
+ * Make sure the ptracee has the expanded kernel buffer on the first
+ * use. Then, initialize the state before performing the state
+ * injection from the ptracer.
+ */
+static inline void ptracee_firstuse_tiledata(void)
+{
+ load_rand_tiledata(stashed_xsave);
+ init_xtiledata();
+}
+
+/*
+ * Ptracer injects the randomized tile data state. It also reads
+ * before and after that, which will execute the kernel's state copy
+ * functions. So, the tester is advised to double-check any emitted
+ * kernel messages.
+ */
+static void ptracer_inject_tiledata(pid_t target)
+{
+ struct xsave_buffer *xbuf;
+ struct iovec iov;
+
+ xbuf = alloc_xbuf();
+ if (!xbuf)
+ fatal_error("unable to allocate XSAVE buffer");
+
+ printf("\tRead the init'ed tiledata via ptrace().\n");
+
+ iov.iov_base = xbuf;
+ iov.iov_len = xbuf_size;
+
+ memset(stashed_xsave, 0, xbuf_size);
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ fatal_error("PTRACE_GETREGSET");
+
+ if (!__compare_tiledata_state(stashed_xsave, xbuf))
+ printf("[OK]\tThe init'ed tiledata was read from ptracee.\n");
+ else
+ printf("[FAIL]\tThe init'ed tiledata was not read from ptracee.\n");
+
+ printf("\tInject tiledata via ptrace().\n");
+
+ load_rand_tiledata(xbuf);
+
+ memcpy(&stashed_xsave->bytes[xtiledata.xbuf_offset],
+ &xbuf->bytes[xtiledata.xbuf_offset],
+ xtiledata.size);
+
+ if (ptrace(PTRACE_SETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ fatal_error("PTRACE_SETREGSET");
+
+ if (ptrace(PTRACE_GETREGSET, target, (uint32_t)NT_X86_XSTATE, &iov))
+ fatal_error("PTRACE_GETREGSET");
+
+ if (!__compare_tiledata_state(stashed_xsave, xbuf))
+ printf("[OK]\tTiledata was correctly written to ptracee.\n");
+ else
+ printf("[FAIL]\tTiledata was not correctly written to ptracee.\n");
+}
+
+static void test_ptrace(void)
+{
+ pid_t child;
+ int status;
+
+ child = fork();
+ if (child < 0) {
+ err(1, "fork");
+ } else if (!child) {
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL))
+ err(1, "PTRACE_TRACEME");
+
+ ptracee_firstuse_tiledata();
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ do {
+ wait(&status);
+ } while (WSTOPSIG(status) != SIGTRAP);
+
+ ptracer_inject_tiledata(child);
+
+ ptrace(PTRACE_DETACH, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status) || WEXITSTATUS(status))
+ err(1, "ptrace test");
+}
+
+int main(void)
+{
+ /* Check hardware availability at first */
+ check_cpuid_xsave();
+ check_cpuid_xtiledata();
+
+ init_stashed_xsave();
+ sethandler(SIGILL, handle_noperm, 0);
+
+ test_dynamic_state();
+
+ /* Request permission for the following tests */
+ req_xtiledata_perm();
+
+ test_fork();
+
+ ctxtswtest_config.iterations = 10;
+ ctxtswtest_config.num_threads = 5;
+ test_context_switch();
+
+ test_ptrace();
+
+ clearhandler(SIGILL);
+ free_stashed_xsave();
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh
index 3e2089c8cf54..8c669c0d662e 100755
--- a/tools/testing/selftests/x86/check_cc.sh
+++ b/tools/testing/selftests/x86/check_cc.sh
@@ -7,7 +7,7 @@ CC="$1"
TESTPROG="$2"
shift 2
-if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
+if [ -n "$CC" ] && $CC -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then
echo 1
else
echo 0
diff --git a/tools/testing/selftests/x86/corrupt_xstate_header.c b/tools/testing/selftests/x86/corrupt_xstate_header.c
new file mode 100644
index 000000000000..cf9ce8fbb656
--- /dev/null
+++ b/tools/testing/selftests/x86/corrupt_xstate_header.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Corrupt the XSTATE header in a signal frame
+ *
+ * Based on analysis and a test case from Thomas Gleixner.
+ */
+
+#define _GNU_SOURCE
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <sched.h>
+#include <signal.h>
+#include <err.h>
+#include <unistd.h>
+#include <stdint.h>
+#include <sys/wait.h>
+
+#include "../kselftest.h" /* For __cpuid_count() */
+
+static inline int xsave_enabled(void)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ __cpuid_count(0x1, 0x0, eax, ebx, ecx, edx);
+
+ /* Is CR4.OSXSAVE enabled ? */
+ return ecx & (1U << 27);
+}
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void sigusr1(int sig, siginfo_t *info, void *uc_void)
+{
+ ucontext_t *uc = uc_void;
+ uint8_t *fpstate = (uint8_t *)uc->uc_mcontext.fpregs;
+ uint64_t *xfeatures = (uint64_t *)(fpstate + 512);
+
+ printf("\tWreck XSTATE header\n");
+ /* Wreck the first reserved bytes in the header */
+ *(xfeatures + 2) = 0xfffffff;
+}
+
+static void sigsegv(int sig, siginfo_t *info, void *uc_void)
+{
+ printf("\tGot SIGSEGV\n");
+}
+
+int main(void)
+{
+ cpu_set_t set;
+
+ sethandler(SIGUSR1, sigusr1, 0);
+ sethandler(SIGSEGV, sigsegv, 0);
+
+ if (!xsave_enabled()) {
+ printf("[SKIP] CR4.OSXSAVE disabled.\n");
+ return 0;
+ }
+
+ CPU_ZERO(&set);
+ CPU_SET(0, &set);
+
+ /*
+ * Enforce that the child runs on the same CPU
+ * which in turn forces a schedule.
+ */
+ sched_setaffinity(getpid(), sizeof(set), &set);
+
+ printf("[RUN]\tSend ourselves a signal\n");
+ raise(SIGUSR1);
+
+ printf("[OK]\tBack from the signal. Now schedule.\n");
+ pid_t child = fork();
+ if (child < 0)
+ err(1, "fork");
+ if (child == 0)
+ return 0;
+ if (child)
+ waitpid(child, NULL, 0);
+ printf("[OK]\tBack in the main thread.\n");
+
+ /*
+ * We could try to confirm that extended state is still preserved
+ * when we schedule. For now, the only indication of failure is
+ * a warning in the kernel logs.
+ */
+
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/fsgsbase.c b/tools/testing/selftests/x86/fsgsbase.c
index 15a329da59fa..8c780cce941d 100644
--- a/tools/testing/selftests/x86/fsgsbase.c
+++ b/tools/testing/selftests/x86/fsgsbase.c
@@ -285,7 +285,8 @@ static unsigned short load_gs(void)
/* 32-bit set_thread_area */
long ret;
asm volatile ("int $0x80"
- : "=a" (ret) : "a" (243), "b" (low_desc)
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
: "r8", "r9", "r10", "r11");
memcpy(&desc, low_desc, sizeof(desc));
munmap(low_desc, sizeof(desc));
@@ -391,8 +392,8 @@ static void set_gs_and_switch_to(unsigned long local,
local = read_base(GS);
/*
- * Signal delivery seems to mess up weird selectors. Put it
- * back.
+ * Signal delivery is quite likely to change a selector
+ * of 1, 2, or 3 back to 0 due to IRET being defective.
*/
asm volatile ("mov %0, %%gs" : : "rm" (force_sel));
} else {
@@ -410,6 +411,14 @@ static void set_gs_and_switch_to(unsigned long local,
if (base == local && sel_pre_sched == sel_post_sched) {
printf("[OK]\tGS/BASE remained 0x%hx/0x%lx\n",
sel_pre_sched, local);
+ } else if (base == local && sel_pre_sched >= 1 && sel_pre_sched <= 3 &&
+ sel_post_sched == 0) {
+ /*
+ * IRET is misdesigned and will squash selectors 1, 2, or 3
+ * to zero. Don't fail the test just because this happened.
+ */
+ printf("[OK]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx because IRET is defective\n",
+ sel_pre_sched, local, sel_post_sched, base);
} else {
nerrs++;
printf("[FAIL]\tGS/BASE changed from 0x%hx/0x%lx to 0x%hx/0x%lx\n",
@@ -442,6 +451,68 @@ static void test_unexpected_base(void)
#define USER_REGS_OFFSET(r) offsetof(struct user_regs_struct, r)
+static void test_ptrace_write_gs_read_base(void)
+{
+ int status;
+ pid_t child = fork();
+
+ if (child < 0)
+ err(1, "fork");
+
+ if (child == 0) {
+ printf("[RUN]\tPTRACE_POKE GS, read GSBASE back\n");
+
+ printf("[RUN]\tARCH_SET_GS to 1\n");
+ if (syscall(SYS_arch_prctl, ARCH_SET_GS, 1) != 0)
+ err(1, "ARCH_SET_GS");
+
+ if (ptrace(PTRACE_TRACEME, 0, NULL, NULL) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ raise(SIGTRAP);
+ _exit(0);
+ }
+
+ wait(&status);
+
+ if (WSTOPSIG(status) == SIGTRAP) {
+ unsigned long base;
+ unsigned long gs_offset = USER_REGS_OFFSET(gs);
+ unsigned long base_offset = USER_REGS_OFFSET(gs_base);
+
+ /* Read the initial base. It should be 1. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+ if (base == 1) {
+ printf("[OK]\tGSBASE started at 1\n");
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE started at 0x%lx\n", base);
+ }
+
+ printf("[RUN]\tSet GS = 0x7, read GSBASE\n");
+
+ /* Poke an LDT selector into GS. */
+ if (ptrace(PTRACE_POKEUSER, child, gs_offset, 0x7) != 0)
+ err(1, "PTRACE_POKEUSER");
+
+ /* And read the base. */
+ base = ptrace(PTRACE_PEEKUSER, child, base_offset, NULL);
+
+ if (base == 0 || base == 1) {
+ printf("[OK]\tGSBASE reads as 0x%lx with invalid GS\n", base);
+ } else {
+ nerrs++;
+ printf("[FAIL]\tGSBASE=0x%lx (should be 0 or 1)\n", base);
+ }
+ }
+
+ ptrace(PTRACE_CONT, child, NULL, NULL);
+
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
+}
+
static void test_ptrace_write_gsbase(void)
{
int status;
@@ -489,16 +560,36 @@ static void test_ptrace_write_gsbase(void)
* selector value is changed or not by the GSBASE write in
* a ptracer.
*/
- if (gs == 0 && base == 0xFF) {
- printf("[OK]\tGS was reset as expected\n");
- } else {
+ if (gs != *shared_scratch) {
nerrs++;
- printf("[FAIL]\tGS=0x%lx, GSBASE=0x%lx (should be 0, 0xFF)\n", gs, base);
+ printf("[FAIL]\tGS changed to %lx\n", gs);
+
+ /*
+ * On older kernels, poking a nonzero value into the
+ * base would zero the selector. On newer kernels,
+ * this behavior has changed -- poking the base
+ * changes only the base and, if FSGSBASE is not
+ * available, this may have no effect once the tracee
+ * is resumed.
+ */
+ if (gs == 0)
+ printf("\tNote: this is expected behavior on older kernels.\n");
+ } else if (have_fsgsbase && (base != 0xFF)) {
+ nerrs++;
+ printf("[FAIL]\tGSBASE changed to %lx\n", base);
+ } else {
+ printf("[OK]\tGS remained 0x%hx", *shared_scratch);
+ if (have_fsgsbase)
+ printf(" and GSBASE changed to 0xFF");
+ printf("\n");
}
}
END:
ptrace(PTRACE_CONT, child, NULL, NULL);
+ wait(&status);
+ if (!WIFEXITED(status))
+ printf("[WARN]\tChild didn't exit cleanly.\n");
}
int main()
@@ -508,6 +599,9 @@ int main()
shared_scratch = mmap(NULL, 4096, PROT_READ | PROT_WRITE,
MAP_ANONYMOUS | MAP_SHARED, -1, 0);
+ /* Do these tests before we have an LDT. */
+ test_ptrace_write_gs_read_base();
+
/* Probe FSGSBASE */
sethandler(SIGILL, sigill, 0);
if (sigsetjmp(jmpbuf, 1) == 0) {
diff --git a/tools/testing/selftests/x86/fsgsbase_restore.c b/tools/testing/selftests/x86/fsgsbase_restore.c
new file mode 100644
index 000000000000..6fffadc51579
--- /dev/null
+++ b/tools/testing/selftests/x86/fsgsbase_restore.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * fsgsbase_restore.c, test ptrace vs fsgsbase
+ * Copyright (c) 2020 Andy Lutomirski
+ *
+ * This test case simulates a tracer redirecting tracee execution to
+ * a function and then restoring tracee state using PTRACE_GETREGS and
+ * PTRACE_SETREGS. This is similar to what gdb does when doing
+ * 'p func()'. The catch is that this test has the called function
+ * modify a segment register. This makes sure that ptrace correctly
+ * restores segment state when using PTRACE_SETREGS.
+ *
+ * This is not part of fsgsbase.c, because that test is 64-bit only.
+ */
+
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdbool.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <unistd.h>
+#include <err.h>
+#include <sys/user.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <asm/ldt.h>
+#include <sys/mman.h>
+#include <stddef.h>
+#include <sys/ptrace.h>
+#include <sys/wait.h>
+#include <stdint.h>
+
+#define EXPECTED_VALUE 0x1337f00d
+
+#ifdef __x86_64__
+# define SEG "%gs"
+#else
+# define SEG "%fs"
+#endif
+
+static unsigned int dereference_seg_base(void)
+{
+ int ret;
+ asm volatile ("mov %" SEG ":(0), %0" : "=rm" (ret));
+ return ret;
+}
+
+static void init_seg(void)
+{
+ unsigned int *target = mmap(
+ NULL, sizeof(unsigned int),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ if (target == MAP_FAILED)
+ err(1, "mmap");
+
+ *target = EXPECTED_VALUE;
+
+ printf("\tsegment base address = 0x%lx\n", (unsigned long)target);
+
+ struct user_desc desc = {
+ .entry_number = 0,
+ .base_addr = (unsigned int)(uintptr_t)target,
+ .limit = sizeof(unsigned int) - 1,
+ .seg_32bit = 1,
+ .contents = 0, /* Data, grow-up */
+ .read_exec_only = 0,
+ .limit_in_pages = 0,
+ .seg_not_present = 0,
+ .useable = 0
+ };
+ if (syscall(SYS_modify_ldt, 1, &desc, sizeof(desc)) == 0) {
+ printf("\tusing LDT slot 0\n");
+ asm volatile ("mov %0, %" SEG :: "rm" ((unsigned short)0x7));
+ } else {
+ /* No modify_ldt for us (configured out, perhaps) */
+
+ struct user_desc *low_desc = mmap(
+ NULL, sizeof(desc),
+ PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_32BIT, -1, 0);
+ memcpy(low_desc, &desc, sizeof(desc));
+
+ low_desc->entry_number = -1;
+
+ /* 32-bit set_thread_area */
+ long ret;
+ asm volatile ("int $0x80"
+ : "=a" (ret), "+m" (*low_desc)
+ : "a" (243), "b" (low_desc)
+#ifdef __x86_64__
+ : "r8", "r9", "r10", "r11"
+#endif
+ );
+ memcpy(&desc, low_desc, sizeof(desc));
+ munmap(low_desc, sizeof(desc));
+
+ if (ret != 0) {
+ printf("[NOTE]\tcould not create a segment -- can't test anything\n");
+ exit(0);
+ }
+ printf("\tusing GDT slot %d\n", desc.entry_number);
+
+ unsigned short sel = (unsigned short)((desc.entry_number << 3) | 0x3);
+ asm volatile ("mov %0, %" SEG :: "rm" (sel));
+ }
+}
+
+static void tracee_zap_segment(void)
+{
+ /*
+ * The tracer will redirect execution here. This is meant to
+ * work like gdb's 'p func()' feature. The tricky bit is that
+ * we modify a segment register in order to make sure that ptrace
+ * can correctly restore segment registers.
+ */
+ printf("\tTracee: in tracee_zap_segment()\n");
+
+ /*
+ * Write a nonzero selector with base zero to the segment register.
+ * Using a null selector would defeat the test on AMD pre-Zen2
+ * CPUs, as such CPUs don't clear the base when loading a null
+ * selector.
+ */
+ unsigned short sel;
+ asm volatile ("mov %%ss, %0\n\t"
+ "mov %0, %" SEG
+ : "=rm" (sel));
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee is going back to sleep\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ /* Should not get here. */
+ while (true) {
+ printf("[FAIL]\tTracee hit unreachable code\n");
+ pause();
+ }
+}
+
+int main()
+{
+ printf("\tSetting up a segment\n");
+ init_seg();
+
+ unsigned int val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ return 1;
+ }
+ printf("[OK]\tThe segment points to the right place.\n");
+
+ pid_t chld = fork();
+ if (chld < 0)
+ err(1, "fork");
+
+ if (chld == 0) {
+ prctl(PR_SET_PDEATHSIG, SIGKILL, 0, 0, 0, 0);
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0) != 0)
+ err(1, "PTRACE_TRACEME");
+
+ pid_t pid = getpid(), tid = syscall(SYS_gettid);
+
+ printf("\tTracee will take a nap until signaled\n");
+ syscall(SYS_tgkill, pid, tid, SIGSTOP);
+
+ printf("\tTracee was resumed. Will re-check segment.\n");
+
+ val = dereference_seg_base();
+ if (val != EXPECTED_VALUE) {
+ printf("[FAIL]\tseg[0] == %x; should be %x\n", val, EXPECTED_VALUE);
+ exit(1);
+ }
+
+ printf("[OK]\tThe segment points to the right place.\n");
+ exit(0);
+ }
+
+ int status;
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ struct user_regs_struct regs;
+
+ if (ptrace(PTRACE_GETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+
+#ifdef __x86_64__
+ printf("\tChild GS=0x%lx, GSBASE=0x%lx\n", (unsigned long)regs.gs, (unsigned long)regs.gs_base);
+#else
+ printf("\tChild FS=0x%lx\n", (unsigned long)regs.xfs);
+#endif
+
+ struct user_regs_struct regs2 = regs;
+#ifdef __x86_64__
+ regs2.rip = (unsigned long)tracee_zap_segment;
+ regs2.rsp -= 128; /* Don't clobber the redzone. */
+#else
+ regs2.eip = (unsigned long)tracee_zap_segment;
+#endif
+
+ printf("\tTracer: redirecting tracee to tracee_zap_segment()\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs2) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_CONT, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld || !WIFSTOPPED(status))
+ err(1, "waitpid");
+
+ printf("\tTracer: restoring tracee state\n");
+ if (ptrace(PTRACE_SETREGS, chld, NULL, &regs) != 0)
+ err(1, "PTRACE_GETREGS");
+ if (ptrace(PTRACE_DETACH, chld, NULL, NULL) != 0)
+ err(1, "PTRACE_GETREGS");
+
+ /* Wait for SIGSTOP. */
+ if (waitpid(chld, &status, 0) != chld)
+ err(1, "waitpid");
+
+ if (WIFSIGNALED(status)) {
+ printf("[FAIL]\tTracee crashed\n");
+ return 1;
+ }
+
+ if (!WIFEXITED(status)) {
+ printf("[FAIL]\tTracee stopped for an unexpected reason: %d\n", status);
+ return 1;
+ }
+
+ int exitcode = WEXITSTATUS(status);
+ if (exitcode != 0) {
+ printf("[FAIL]\tTracee reported failure\n");
+ return 1;
+ }
+
+ printf("[OK]\tAll is well.\n");
+ return 0;
+}
diff --git a/tools/testing/selftests/x86/helpers.h b/tools/testing/selftests/x86/helpers.h
new file mode 100644
index 000000000000..4ef42c4559a9
--- /dev/null
+++ b/tools/testing/selftests/x86/helpers.h
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#ifndef __SELFTESTS_X86_HELPERS_H
+#define __SELFTESTS_X86_HELPERS_H
+
+#include <asm/processor-flags.h>
+
+static inline unsigned long get_eflags(void)
+{
+#ifdef __x86_64__
+ return __builtin_ia32_readeflags_u64();
+#else
+ return __builtin_ia32_readeflags_u32();
+#endif
+}
+
+static inline void set_eflags(unsigned long eflags)
+{
+#ifdef __x86_64__
+ __builtin_ia32_writeeflags_u64(eflags);
+#else
+ __builtin_ia32_writeeflags_u32(eflags);
+#endif
+}
+
+#endif /* __SELFTESTS_X86_HELPERS_H */
diff --git a/tools/testing/selftests/x86/iopl.c b/tools/testing/selftests/x86/iopl.c
index bab2f6e06b63..7e3e09c1abac 100644
--- a/tools/testing/selftests/x86/iopl.c
+++ b/tools/testing/selftests/x86/iopl.c
@@ -85,48 +85,88 @@ static void expect_gp_outb(unsigned short port)
printf("[OK]\toutb to 0x%02hx failed\n", port);
}
-static bool try_cli(void)
+#define RET_FAULTED 0
+#define RET_FAIL 1
+#define RET_EMUL 2
+
+static int try_cli(void)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("cli");
- return true;
+ asm volatile("cli; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (!(flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static bool try_sti(void)
+static int try_sti(bool irqs_off)
{
+ unsigned long flags;
+
sethandler(SIGSEGV, sigsegv, SA_RESETHAND);
if (sigsetjmp(jmpbuf, 1) != 0) {
- return false;
+ return RET_FAULTED;
} else {
- asm volatile ("sti");
- return true;
+ asm volatile("sti; pushf; pop %[flags]"
+ : [flags] "=rm" (flags));
+
+ /* X86_FLAGS_IF */
+ if (irqs_off && (flags & (1 << 9)))
+ return RET_FAIL;
+ else
+ return RET_EMUL;
}
clearhandler(SIGSEGV);
}
-static void expect_gp_sti(void)
+static void expect_gp_sti(bool irqs_off)
{
- if (try_sti()) {
+ int ret = try_sti(irqs_off);
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tSTI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tSTI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tSTI worked\n");
nerrs++;
- } else {
- printf("[OK]\tSTI faulted\n");
}
}
-static void expect_gp_cli(void)
+/*
+ * Returns whether it managed to disable interrupts.
+ */
+static bool test_cli(void)
{
- if (try_cli()) {
+ int ret = try_cli();
+
+ switch (ret) {
+ case RET_FAULTED:
+ printf("[OK]\tCLI faulted\n");
+ break;
+ case RET_EMUL:
+ printf("[OK]\tCLI NOPped\n");
+ break;
+ default:
printf("[FAIL]\tCLI worked\n");
nerrs++;
- } else {
- printf("[OK]\tCLI faulted\n");
+ return true;
}
+
+ return false;
}
int main(void)
@@ -152,8 +192,7 @@ int main(void)
}
/* Make sure that CLI/STI are blocked even with IOPL level 3 */
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
expect_ok_outb(0x80);
/* Establish an I/O bitmap to test the restore */
@@ -204,8 +243,7 @@ int main(void)
printf("[RUN]\tparent: write to 0x80 (should fail)\n");
expect_gp_outb(0x80);
- expect_gp_cli();
- expect_gp_sti();
+ expect_gp_sti(test_cli());
/* Test the capability checks. */
printf("\tiopl(3)\n");
diff --git a/tools/testing/selftests/x86/lam.c b/tools/testing/selftests/x86/lam.c
new file mode 100644
index 000000000000..215b8150b7cc
--- /dev/null
+++ b/tools/testing/selftests/x86/lam.c
@@ -0,0 +1,1241 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/syscall.h>
+#include <time.h>
+#include <signal.h>
+#include <setjmp.h>
+#include <sys/mman.h>
+#include <sys/utsname.h>
+#include <sys/wait.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <inttypes.h>
+#include <sched.h>
+
+#include <sys/uio.h>
+#include <linux/io_uring.h>
+#include "../kselftest.h"
+
+#ifndef __x86_64__
+# error This test is 64-bit only
+#endif
+
+/* LAM modes, these definitions were copied from kernel code */
+#define LAM_NONE 0
+#define LAM_U57_BITS 6
+
+#define LAM_U57_MASK (0x3fULL << 57)
+/* arch prctl for LAM */
+#define ARCH_GET_UNTAG_MASK 0x4001
+#define ARCH_ENABLE_TAGGED_ADDR 0x4002
+#define ARCH_GET_MAX_TAG_BITS 0x4003
+#define ARCH_FORCE_TAGGED_SVA 0x4004
+
+/* Specified test function bits */
+#define FUNC_MALLOC 0x1
+#define FUNC_BITS 0x2
+#define FUNC_MMAP 0x4
+#define FUNC_SYSCALL 0x8
+#define FUNC_URING 0x10
+#define FUNC_INHERITE 0x20
+#define FUNC_PASID 0x40
+
+#define TEST_MASK 0x7f
+
+#define LOW_ADDR (0x1UL << 30)
+#define HIGH_ADDR (0x3UL << 48)
+
+#define MALLOC_LEN 32
+
+#define PAGE_SIZE (4 << 10)
+
+#define STACK_SIZE 65536
+
+#define barrier() ({ \
+ __asm__ __volatile__("" : : : "memory"); \
+})
+
+#define URING_QUEUE_SZ 1
+#define URING_BLOCK_SZ 2048
+
+/* Pasid test define */
+#define LAM_CMD_BIT 0x1
+#define PAS_CMD_BIT 0x2
+#define SVA_CMD_BIT 0x4
+
+#define PAS_CMD(cmd1, cmd2, cmd3) (((cmd3) << 8) | ((cmd2) << 4) | ((cmd1) << 0))
+
+struct testcases {
+ unsigned int later;
+ int expected; /* 2: SIGSEGV Error; 1: other errors */
+ unsigned long lam;
+ uint64_t addr;
+ uint64_t cmd;
+ int (*test_func)(struct testcases *test);
+ const char *msg;
+};
+
+/* Used by CQ of uring, source file handler and file's size */
+struct file_io {
+ int file_fd;
+ off_t file_sz;
+ struct iovec iovecs[];
+};
+
+struct io_uring_queue {
+ unsigned int *head;
+ unsigned int *tail;
+ unsigned int *ring_mask;
+ unsigned int *ring_entries;
+ unsigned int *flags;
+ unsigned int *array;
+ union {
+ struct io_uring_cqe *cqes;
+ struct io_uring_sqe *sqes;
+ } queue;
+ size_t ring_sz;
+};
+
+struct io_ring {
+ int ring_fd;
+ struct io_uring_queue sq_ring;
+ struct io_uring_queue cq_ring;
+};
+
+int tests_cnt;
+jmp_buf segv_env;
+
+static void segv_handler(int sig)
+{
+ ksft_print_msg("Get segmentation fault(%d).", sig);
+
+ siglongjmp(segv_env, 1);
+}
+
+static inline int cpu_has_lam(void)
+{
+ unsigned int cpuinfo[4];
+
+ __cpuid_count(0x7, 1, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+
+ return (cpuinfo[0] & (1 << 26));
+}
+
+/* Check 5-level page table feature in CPUID.(EAX=07H, ECX=00H):ECX.[bit 16] */
+static inline int cpu_has_la57(void)
+{
+ unsigned int cpuinfo[4];
+
+ __cpuid_count(0x7, 0, cpuinfo[0], cpuinfo[1], cpuinfo[2], cpuinfo[3]);
+
+ return (cpuinfo[2] & (1 << 16));
+}
+
+/*
+ * Set tagged address and read back untag mask.
+ * check if the untagged mask is expected.
+ *
+ * @return:
+ * 0: Set LAM mode successfully
+ * others: failed to set LAM
+ */
+static int set_lam(unsigned long lam)
+{
+ int ret = 0;
+ uint64_t ptr = 0;
+
+ if (lam != LAM_U57_BITS && lam != LAM_NONE)
+ return -1;
+
+ /* Skip check return */
+ syscall(SYS_arch_prctl, ARCH_ENABLE_TAGGED_ADDR, lam);
+
+ /* Get untagged mask */
+ syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr);
+
+ /* Check mask returned is expected */
+ if (lam == LAM_U57_BITS)
+ ret = (ptr != ~(LAM_U57_MASK));
+ else if (lam == LAM_NONE)
+ ret = (ptr != -1ULL);
+
+ return ret;
+}
+
+static unsigned long get_default_tag_bits(void)
+{
+ pid_t pid;
+ int lam = LAM_NONE;
+ int ret = 0;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ } else if (pid == 0) {
+ /* Set LAM mode in child process */
+ if (set_lam(LAM_U57_BITS) == 0)
+ lam = LAM_U57_BITS;
+ else
+ lam = LAM_NONE;
+ exit(lam);
+ } else {
+ wait(&ret);
+ lam = WEXITSTATUS(ret);
+ }
+
+ return lam;
+}
+
+/*
+ * Set tagged address and read back untag mask.
+ * check if the untag mask is expected.
+ */
+static int get_lam(void)
+{
+ uint64_t ptr = 0;
+ int ret = -1;
+ /* Get untagged mask */
+ if (syscall(SYS_arch_prctl, ARCH_GET_UNTAG_MASK, &ptr) == -1)
+ return -1;
+
+ /* Check mask returned is expected */
+ if (ptr == ~(LAM_U57_MASK))
+ ret = LAM_U57_BITS;
+ else if (ptr == -1ULL)
+ ret = LAM_NONE;
+
+
+ return ret;
+}
+
+/* According to LAM mode, set metadata in high bits */
+static uint64_t set_metadata(uint64_t src, unsigned long lam)
+{
+ uint64_t metadata;
+
+ srand(time(NULL));
+
+ switch (lam) {
+ case LAM_U57_BITS: /* Set metadata in bits 62:57 */
+ /* Get a random non-zero value as metadata */
+ metadata = (rand() % ((1UL << LAM_U57_BITS) - 1) + 1) << 57;
+ metadata |= (src & ~(LAM_U57_MASK));
+ break;
+ default:
+ metadata = src;
+ break;
+ }
+
+ return metadata;
+}
+
+/*
+ * Set metadata in user pointer, compare new pointer with original pointer.
+ * both pointers should point to the same address.
+ *
+ * @return:
+ * 0: value on the pointer with metadate and value on original are same
+ * 1: not same.
+ */
+static int handle_lam_test(void *src, unsigned int lam)
+{
+ char *ptr;
+
+ strcpy((char *)src, "USER POINTER");
+
+ ptr = (char *)set_metadata((uint64_t)src, lam);
+ if (src == ptr)
+ return 0;
+
+ /* Copy a string into the pointer with metadata */
+ strcpy((char *)ptr, "METADATA POINTER");
+
+ return (!!strcmp((char *)src, (char *)ptr));
+}
+
+
+int handle_max_bits(struct testcases *test)
+{
+ unsigned long exp_bits = get_default_tag_bits();
+ unsigned long bits = 0;
+
+ if (exp_bits != LAM_NONE)
+ exp_bits = LAM_U57_BITS;
+
+ /* Get LAM max tag bits */
+ if (syscall(SYS_arch_prctl, ARCH_GET_MAX_TAG_BITS, &bits) == -1)
+ return 1;
+
+ return (exp_bits != bits);
+}
+
+/*
+ * Test lam feature through dereference pointer get from malloc.
+ * @return 0: Pass test. 1: Get failure during test 2: Get SIGSEGV
+ */
+static int handle_malloc(struct testcases *test)
+{
+ char *ptr = NULL;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) == -1)
+ return 1;
+
+ ptr = (char *)malloc(MALLOC_LEN);
+ if (ptr == NULL) {
+ perror("malloc() failure\n");
+ return 1;
+ }
+
+ /* Set signal handler */
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = handle_lam_test(ptr, test->lam);
+ } else {
+ ret = 2;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) == -1 && ret == 0)
+ ret = 1;
+
+ free(ptr);
+
+ return ret;
+}
+
+static int handle_mmap(struct testcases *test)
+{
+ void *ptr;
+ unsigned int flags = MAP_PRIVATE | MAP_ANONYMOUS | MAP_FIXED;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ ptr = mmap((void *)test->addr, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ flags, -1, 0);
+ if (ptr == MAP_FAILED) {
+ if (test->addr == HIGH_ADDR)
+ if (!cpu_has_la57())
+ return 3; /* unsupport LA57 */
+ return 1;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ ret = 1;
+
+ if (ret == 0) {
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = handle_lam_test(ptr, test->lam);
+ } else {
+ ret = 2;
+ }
+ }
+
+ munmap(ptr, PAGE_SIZE);
+ return ret;
+}
+
+static int handle_syscall(struct testcases *test)
+{
+ struct utsname unme, *pu;
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ pu = (struct utsname *)set_metadata((uint64_t)&unme, test->lam);
+ ret = uname(pu);
+ if (ret < 0)
+ ret = 1;
+ } else {
+ ret = 2;
+ }
+
+ if (test->later != 0 && test->lam != 0)
+ if (set_lam(test->lam) != -1 && ret == 0)
+ ret = 1;
+
+ return ret;
+}
+
+int sys_uring_setup(unsigned int entries, struct io_uring_params *p)
+{
+ return (int)syscall(__NR_io_uring_setup, entries, p);
+}
+
+int sys_uring_enter(int fd, unsigned int to, unsigned int min, unsigned int flags)
+{
+ return (int)syscall(__NR_io_uring_enter, fd, to, min, flags, NULL, 0);
+}
+
+/* Init submission queue and completion queue */
+int mmap_io_uring(struct io_uring_params p, struct io_ring *s)
+{
+ struct io_uring_queue *sring = &s->sq_ring;
+ struct io_uring_queue *cring = &s->cq_ring;
+
+ sring->ring_sz = p.sq_off.array + p.sq_entries * sizeof(unsigned int);
+ cring->ring_sz = p.cq_off.cqes + p.cq_entries * sizeof(struct io_uring_cqe);
+
+ if (p.features & IORING_FEAT_SINGLE_MMAP) {
+ if (cring->ring_sz > sring->ring_sz)
+ sring->ring_sz = cring->ring_sz;
+
+ cring->ring_sz = sring->ring_sz;
+ }
+
+ void *sq_ptr = mmap(0, sring->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, s->ring_fd,
+ IORING_OFF_SQ_RING);
+
+ if (sq_ptr == MAP_FAILED) {
+ perror("sub-queue!");
+ return 1;
+ }
+
+ void *cq_ptr = sq_ptr;
+
+ if (!(p.features & IORING_FEAT_SINGLE_MMAP)) {
+ cq_ptr = mmap(0, cring->ring_sz, PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, s->ring_fd,
+ IORING_OFF_CQ_RING);
+ if (cq_ptr == MAP_FAILED) {
+ perror("cpl-queue!");
+ munmap(sq_ptr, sring->ring_sz);
+ return 1;
+ }
+ }
+
+ sring->head = sq_ptr + p.sq_off.head;
+ sring->tail = sq_ptr + p.sq_off.tail;
+ sring->ring_mask = sq_ptr + p.sq_off.ring_mask;
+ sring->ring_entries = sq_ptr + p.sq_off.ring_entries;
+ sring->flags = sq_ptr + p.sq_off.flags;
+ sring->array = sq_ptr + p.sq_off.array;
+
+ /* Map a queue as mem map */
+ s->sq_ring.queue.sqes = mmap(0, p.sq_entries * sizeof(struct io_uring_sqe),
+ PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE,
+ s->ring_fd, IORING_OFF_SQES);
+ if (s->sq_ring.queue.sqes == MAP_FAILED) {
+ munmap(sq_ptr, sring->ring_sz);
+ if (sq_ptr != cq_ptr) {
+ ksft_print_msg("failed to mmap uring queue!");
+ munmap(cq_ptr, cring->ring_sz);
+ return 1;
+ }
+ }
+
+ cring->head = cq_ptr + p.cq_off.head;
+ cring->tail = cq_ptr + p.cq_off.tail;
+ cring->ring_mask = cq_ptr + p.cq_off.ring_mask;
+ cring->ring_entries = cq_ptr + p.cq_off.ring_entries;
+ cring->queue.cqes = cq_ptr + p.cq_off.cqes;
+
+ return 0;
+}
+
+/* Init io_uring queues */
+int setup_io_uring(struct io_ring *s)
+{
+ struct io_uring_params para;
+
+ memset(&para, 0, sizeof(para));
+ s->ring_fd = sys_uring_setup(URING_QUEUE_SZ, &para);
+ if (s->ring_fd < 0)
+ return 1;
+
+ return mmap_io_uring(para, s);
+}
+
+/*
+ * Get data from completion queue. the data buffer saved the file data
+ * return 0: success; others: error;
+ */
+int handle_uring_cq(struct io_ring *s)
+{
+ struct file_io *fi = NULL;
+ struct io_uring_queue *cring = &s->cq_ring;
+ struct io_uring_cqe *cqe;
+ unsigned int head;
+ off_t len = 0;
+
+ head = *cring->head;
+
+ do {
+ barrier();
+ if (head == *cring->tail)
+ break;
+ /* Get the entry */
+ cqe = &cring->queue.cqes[head & *s->cq_ring.ring_mask];
+ fi = (struct file_io *)cqe->user_data;
+ if (cqe->res < 0)
+ break;
+
+ int blocks = (int)(fi->file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ for (int i = 0; i < blocks; i++)
+ len += fi->iovecs[i].iov_len;
+
+ head++;
+ } while (1);
+
+ *cring->head = head;
+ barrier();
+
+ return (len != fi->file_sz);
+}
+
+/*
+ * Submit squeue. specify via IORING_OP_READV.
+ * the buffer need to be set metadata according to LAM mode
+ */
+int handle_uring_sq(struct io_ring *ring, struct file_io *fi, unsigned long lam)
+{
+ int file_fd = fi->file_fd;
+ struct io_uring_queue *sring = &ring->sq_ring;
+ unsigned int index = 0, cur_block = 0, tail = 0, next_tail = 0;
+ struct io_uring_sqe *sqe;
+
+ off_t remain = fi->file_sz;
+ int blocks = (int)(remain + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ while (remain) {
+ off_t bytes = remain;
+ void *buf;
+
+ if (bytes > URING_BLOCK_SZ)
+ bytes = URING_BLOCK_SZ;
+
+ fi->iovecs[cur_block].iov_len = bytes;
+
+ if (posix_memalign(&buf, URING_BLOCK_SZ, URING_BLOCK_SZ))
+ return 1;
+
+ fi->iovecs[cur_block].iov_base = (void *)set_metadata((uint64_t)buf, lam);
+ remain -= bytes;
+ cur_block++;
+ }
+
+ next_tail = *sring->tail;
+ tail = next_tail;
+ next_tail++;
+
+ barrier();
+
+ index = tail & *ring->sq_ring.ring_mask;
+
+ sqe = &ring->sq_ring.queue.sqes[index];
+ sqe->fd = file_fd;
+ sqe->flags = 0;
+ sqe->opcode = IORING_OP_READV;
+ sqe->addr = (unsigned long)fi->iovecs;
+ sqe->len = blocks;
+ sqe->off = 0;
+ sqe->user_data = (uint64_t)fi;
+
+ sring->array[index] = index;
+ tail = next_tail;
+
+ if (*sring->tail != tail) {
+ *sring->tail = tail;
+ barrier();
+ }
+
+ if (sys_uring_enter(ring->ring_fd, 1, 1, IORING_ENTER_GETEVENTS) < 0)
+ return 1;
+
+ return 0;
+}
+
+/*
+ * Test LAM in async I/O and io_uring, read current binery through io_uring
+ * Set metadata in pointers to iovecs buffer.
+ */
+int do_uring(unsigned long lam)
+{
+ struct io_ring *ring;
+ struct file_io *fi;
+ struct stat st;
+ int ret = 1;
+ char path[PATH_MAX] = {0};
+
+ /* get current process path */
+ if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
+ return 1;
+
+ int file_fd = open(path, O_RDONLY);
+
+ if (file_fd < 0)
+ return 1;
+
+ if (fstat(file_fd, &st) < 0)
+ return 1;
+
+ off_t file_sz = st.st_size;
+
+ int blocks = (int)(file_sz + URING_BLOCK_SZ - 1) / URING_BLOCK_SZ;
+
+ fi = malloc(sizeof(*fi) + sizeof(struct iovec) * blocks);
+ if (!fi)
+ return 1;
+
+ fi->file_sz = file_sz;
+ fi->file_fd = file_fd;
+
+ ring = malloc(sizeof(*ring));
+ if (!ring)
+ return 1;
+
+ memset(ring, 0, sizeof(struct io_ring));
+
+ if (setup_io_uring(ring))
+ goto out;
+
+ if (handle_uring_sq(ring, fi, lam))
+ goto out;
+
+ ret = handle_uring_cq(ring);
+
+out:
+ free(ring);
+
+ for (int i = 0; i < blocks; i++) {
+ if (fi->iovecs[i].iov_base) {
+ uint64_t addr = ((uint64_t)fi->iovecs[i].iov_base);
+
+ switch (lam) {
+ case LAM_U57_BITS: /* Clear bits 62:57 */
+ addr = (addr & ~(LAM_U57_MASK));
+ break;
+ }
+ free((void *)addr);
+ fi->iovecs[i].iov_base = NULL;
+ }
+ }
+
+ free(fi);
+
+ return ret;
+}
+
+int handle_uring(struct testcases *test)
+{
+ int ret = 0;
+
+ if (test->later == 0 && test->lam != 0)
+ if (set_lam(test->lam) != 0)
+ return 1;
+
+ if (sigsetjmp(segv_env, 1) == 0) {
+ signal(SIGSEGV, segv_handler);
+ ret = do_uring(test->lam);
+ } else {
+ ret = 2;
+ }
+
+ return ret;
+}
+
+static int fork_test(struct testcases *test)
+{
+ int ret, child_ret;
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ ret = 1;
+ } else if (pid == 0) {
+ ret = test->test_func(test);
+ exit(ret);
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+ }
+
+ return ret;
+}
+
+static int handle_execve(struct testcases *test)
+{
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ ret = 1;
+ } else if (pid == 0) {
+ char path[PATH_MAX] = {0};
+
+ /* Set LAM mode in parent process */
+ if (set_lam(lam) != 0)
+ return 1;
+
+ /* Get current binary's path and the binary was run by execve */
+ if (readlink("/proc/self/exe", path, PATH_MAX - 1) <= 0)
+ exit(-1);
+
+ /* run binary to get LAM mode and return to parent process */
+ if (execlp(path, path, "-t 0x0", NULL) < 0) {
+ perror("error on exec");
+ exit(-1);
+ }
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+ if (ret != LAM_NONE)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int handle_inheritance(struct testcases *test)
+{
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ /* Set LAM mode in parent process */
+ if (set_lam(lam) != 0)
+ return 1;
+
+ pid = fork();
+ if (pid < 0) {
+ perror("Fork failed.");
+ return 1;
+ } else if (pid == 0) {
+ /* Set LAM mode in parent process */
+ int child_lam = get_lam();
+
+ exit(child_lam);
+ } else {
+ wait(&child_ret);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int thread_fn_get_lam(void *arg)
+{
+ return get_lam();
+}
+
+static int thread_fn_set_lam(void *arg)
+{
+ struct testcases *test = arg;
+
+ return set_lam(test->lam);
+}
+
+static int handle_thread(struct testcases *test)
+{
+ char stack[STACK_SIZE];
+ int ret, child_ret;
+ int lam = 0;
+ pid_t pid;
+
+ /* Set LAM mode in parent process */
+ if (!test->later) {
+ lam = test->lam;
+ if (set_lam(lam) != 0)
+ return 1;
+ }
+
+ pid = clone(thread_fn_get_lam, stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, NULL);
+ if (pid < 0) {
+ perror("Clone failed.");
+ return 1;
+ }
+
+ waitpid(pid, &child_ret, 0);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+
+ if (test->later) {
+ if (set_lam(test->lam) != 0)
+ return 1;
+ }
+
+ return 0;
+}
+
+static int handle_thread_enable(struct testcases *test)
+{
+ char stack[STACK_SIZE];
+ int ret, child_ret;
+ int lam = test->lam;
+ pid_t pid;
+
+ pid = clone(thread_fn_set_lam, stack + STACK_SIZE,
+ SIGCHLD | CLONE_FILES | CLONE_FS | CLONE_VM, test);
+ if (pid < 0) {
+ perror("Clone failed.");
+ return 1;
+ }
+
+ waitpid(pid, &child_ret, 0);
+ ret = WEXITSTATUS(child_ret);
+
+ if (lam != ret)
+ return 1;
+
+ return 0;
+}
+static void run_test(struct testcases *test, int count)
+{
+ int i, ret = 0;
+
+ for (i = 0; i < count; i++) {
+ struct testcases *t = test + i;
+
+ /* fork a process to run test case */
+ tests_cnt++;
+ ret = fork_test(t);
+
+ /* return 3 is not support LA57, the case should be skipped */
+ if (ret == 3) {
+ ksft_test_result_skip("%s", t->msg);
+ continue;
+ }
+
+ if (ret != 0)
+ ret = (t->expected == ret);
+ else
+ ret = !(t->expected);
+
+ ksft_test_result(ret, "%s", t->msg);
+ }
+}
+
+static struct testcases uring_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_uring,
+ .msg = "URING: LAM_U57. Dereferencing pointer with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_uring,
+ .msg = "URING:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases malloc_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_malloc,
+ .msg = "MALLOC: LAM_U57. Dereferencing pointer with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 2,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_malloc,
+ .msg = "MALLOC:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases bits_cases[] = {
+ {
+ .test_func = handle_max_bits,
+ .msg = "BITS: Check default tag bits\n",
+ },
+};
+
+static struct testcases syscall_cases[] = {
+ {
+ .later = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_syscall,
+ .msg = "SYSCALL: LAM_U57. syscall with metadata\n",
+ },
+ {
+ .later = 1,
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_syscall,
+ .msg = "SYSCALL:[Negative] Disable LAM. Dereferencing pointer with metadata.\n",
+ },
+};
+
+static struct testcases mmap_cases[] = {
+ {
+ .later = 1,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = HIGH_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First mmap high address, then set LAM_U57.\n",
+ },
+ {
+ .later = 0,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = HIGH_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First LAM_U57, then High address.\n",
+ },
+ {
+ .later = 0,
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .addr = LOW_ADDR,
+ .test_func = handle_mmap,
+ .msg = "MMAP: First LAM_U57, then Low address.\n",
+ },
+};
+
+static struct testcases inheritance_cases[] = {
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_inheritance,
+ .msg = "FORK: LAM_U57, child process should get LAM mode same as parent\n",
+ },
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread,
+ .msg = "THREAD: LAM_U57, child thread should get LAM mode same as parent\n",
+ },
+ {
+ .expected = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread_enable,
+ .msg = "THREAD: [NEGATIVE] Enable LAM in child.\n",
+ },
+ {
+ .expected = 1,
+ .later = 1,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_thread,
+ .msg = "THREAD: [NEGATIVE] Enable LAM in parent after thread created.\n",
+ },
+ {
+ .expected = 0,
+ .lam = LAM_U57_BITS,
+ .test_func = handle_execve,
+ .msg = "EXECVE: LAM_U57, child process should get disabled LAM mode\n",
+ },
+};
+
+static void cmd_help(void)
+{
+ printf("usage: lam [-h] [-t test list]\n");
+ printf("\t-t test list: run tests specified in the test list, default:0x%x\n", TEST_MASK);
+ printf("\t\t0x1:malloc; 0x2:max_bits; 0x4:mmap; 0x8:syscall; 0x10:io_uring; 0x20:inherit;\n");
+ printf("\t-h: help\n");
+}
+
+/* Check for file existence */
+uint8_t file_Exists(const char *fileName)
+{
+ struct stat buffer;
+
+ uint8_t ret = (stat(fileName, &buffer) == 0);
+
+ return ret;
+}
+
+/* Sysfs idxd files */
+const char *dsa_configs[] = {
+ "echo 1 > /sys/bus/dsa/devices/dsa0/wq0.1/group_id",
+ "echo shared > /sys/bus/dsa/devices/dsa0/wq0.1/mode",
+ "echo 10 > /sys/bus/dsa/devices/dsa0/wq0.1/priority",
+ "echo 16 > /sys/bus/dsa/devices/dsa0/wq0.1/size",
+ "echo 15 > /sys/bus/dsa/devices/dsa0/wq0.1/threshold",
+ "echo user > /sys/bus/dsa/devices/dsa0/wq0.1/type",
+ "echo MyApp1 > /sys/bus/dsa/devices/dsa0/wq0.1/name",
+ "echo 1 > /sys/bus/dsa/devices/dsa0/engine0.1/group_id",
+ "echo dsa0 > /sys/bus/dsa/drivers/idxd/bind",
+ /* bind files and devices, generated a device file in /dev */
+ "echo wq0.1 > /sys/bus/dsa/drivers/user/bind",
+};
+
+/* DSA device file */
+const char *dsaDeviceFile = "/dev/dsa/wq0.1";
+/* file for io*/
+const char *dsaPasidEnable = "/sys/bus/dsa/devices/dsa0/pasid_enabled";
+
+/*
+ * DSA depends on kernel cmdline "intel_iommu=on,sm_on"
+ * return pasid_enabled (0: disable 1:enable)
+ */
+int Check_DSA_Kernel_Setting(void)
+{
+ char command[256] = "";
+ char buf[256] = "";
+ char *ptr;
+ int rv = -1;
+
+ snprintf(command, sizeof(command) - 1, "cat %s", dsaPasidEnable);
+
+ FILE *cmd = popen(command, "r");
+
+ if (cmd) {
+ while (fgets(buf, sizeof(buf) - 1, cmd) != NULL);
+
+ pclose(cmd);
+ rv = strtol(buf, &ptr, 16);
+ }
+
+ return rv;
+}
+
+/*
+ * Config DSA's sysfs files as shared DSA's WQ.
+ * Generated a device file /dev/dsa/wq0.1
+ * Return: 0 OK; 1 Failed; 3 Skip(SVA disabled).
+ */
+int Dsa_Init_Sysfs(void)
+{
+ uint len = ARRAY_SIZE(dsa_configs);
+ const char **p = dsa_configs;
+
+ if (file_Exists(dsaDeviceFile) == 1)
+ return 0;
+
+ /* check the idxd driver */
+ if (file_Exists(dsaPasidEnable) != 1) {
+ printf("Please make sure idxd driver was loaded\n");
+ return 3;
+ }
+
+ /* Check SVA feature */
+ if (Check_DSA_Kernel_Setting() != 1) {
+ printf("Please enable SVA.(Add intel_iommu=on,sm_on in kernel cmdline)\n");
+ return 3;
+ }
+
+ /* Check the idxd device file on /dev/dsa/ */
+ for (int i = 0; i < len; i++) {
+ if (system(p[i]))
+ return 1;
+ }
+
+ /* After config, /dev/dsa/wq0.1 should be generated */
+ return (file_Exists(dsaDeviceFile) != 1);
+}
+
+/*
+ * Open DSA device file, triger API: iommu_sva_alloc_pasid
+ */
+void *allocate_dsa_pasid(void)
+{
+ int fd;
+ void *wq;
+
+ fd = open(dsaDeviceFile, O_RDWR);
+ if (fd < 0) {
+ perror("open");
+ return MAP_FAILED;
+ }
+
+ wq = mmap(NULL, 0x1000, PROT_WRITE,
+ MAP_SHARED | MAP_POPULATE, fd, 0);
+ if (wq == MAP_FAILED)
+ perror("mmap");
+
+ return wq;
+}
+
+int set_force_svm(void)
+{
+ int ret = 0;
+
+ ret = syscall(SYS_arch_prctl, ARCH_FORCE_TAGGED_SVA);
+
+ return ret;
+}
+
+int handle_pasid(struct testcases *test)
+{
+ uint tmp = test->cmd;
+ uint runed = 0x0;
+ int ret = 0;
+ void *wq = NULL;
+
+ ret = Dsa_Init_Sysfs();
+ if (ret != 0)
+ return ret;
+
+ for (int i = 0; i < 3; i++) {
+ int err = 0;
+
+ if (tmp & 0x1) {
+ /* run set lam mode*/
+ if ((runed & 0x1) == 0) {
+ err = set_lam(LAM_U57_BITS);
+ runed = runed | 0x1;
+ } else
+ err = 1;
+ } else if (tmp & 0x4) {
+ /* run force svm */
+ if ((runed & 0x4) == 0) {
+ err = set_force_svm();
+ runed = runed | 0x4;
+ } else
+ err = 1;
+ } else if (tmp & 0x2) {
+ /* run allocate pasid */
+ if ((runed & 0x2) == 0) {
+ runed = runed | 0x2;
+ wq = allocate_dsa_pasid();
+ if (wq == MAP_FAILED)
+ err = 1;
+ } else
+ err = 1;
+ }
+
+ ret = ret + err;
+ if (ret > 0)
+ break;
+
+ tmp = tmp >> 4;
+ }
+
+ if (wq != MAP_FAILED && wq != NULL)
+ if (munmap(wq, 0x1000))
+ printf("munmap failed %d\n", errno);
+
+ if (runed != 0x7)
+ ret = 1;
+
+ return (ret != 0);
+}
+
+/*
+ * Pasid test depends on idxd and SVA, kernel should enable iommu and sm.
+ * command line(intel_iommu=on,sm_on)
+ */
+static struct testcases pasid_cases[] = {
+ {
+ .expected = 1,
+ .cmd = PAS_CMD(LAM_CMD_BIT, PAS_CMD_BIT, SVA_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: [Negative] Execute LAM, PASID, SVA in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(LAM_CMD_BIT, SVA_CMD_BIT, PAS_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute LAM, SVA, PASID in sequence\n",
+ },
+ {
+ .expected = 1,
+ .cmd = PAS_CMD(PAS_CMD_BIT, LAM_CMD_BIT, SVA_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: [Negative] Execute PASID, LAM, SVA in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(PAS_CMD_BIT, SVA_CMD_BIT, LAM_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute PASID, SVA, LAM in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(SVA_CMD_BIT, LAM_CMD_BIT, PAS_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute SVA, LAM, PASID in sequence\n",
+ },
+ {
+ .expected = 0,
+ .cmd = PAS_CMD(SVA_CMD_BIT, PAS_CMD_BIT, LAM_CMD_BIT),
+ .test_func = handle_pasid,
+ .msg = "PASID: Execute SVA, PASID, LAM in sequence\n",
+ },
+};
+
+int main(int argc, char **argv)
+{
+ int c = 0;
+ unsigned int tests = TEST_MASK;
+
+ tests_cnt = 0;
+
+ if (!cpu_has_lam()) {
+ ksft_print_msg("Unsupported LAM feature!\n");
+ return -1;
+ }
+
+ while ((c = getopt(argc, argv, "ht:")) != -1) {
+ switch (c) {
+ case 't':
+ tests = strtoul(optarg, NULL, 16);
+ if (tests && !(tests & TEST_MASK)) {
+ ksft_print_msg("Invalid argument!\n");
+ return -1;
+ }
+ break;
+ case 'h':
+ cmd_help();
+ return 0;
+ default:
+ ksft_print_msg("Invalid argument\n");
+ return -1;
+ }
+ }
+
+ /*
+ * When tests is 0, it is not a real test case;
+ * the option used by test case(execve) to check the lam mode in
+ * process generated by execve, the process read back lam mode and
+ * check with lam mode in parent process.
+ */
+ if (!tests)
+ return (get_lam());
+
+ /* Run test cases */
+ if (tests & FUNC_MALLOC)
+ run_test(malloc_cases, ARRAY_SIZE(malloc_cases));
+
+ if (tests & FUNC_BITS)
+ run_test(bits_cases, ARRAY_SIZE(bits_cases));
+
+ if (tests & FUNC_MMAP)
+ run_test(mmap_cases, ARRAY_SIZE(mmap_cases));
+
+ if (tests & FUNC_SYSCALL)
+ run_test(syscall_cases, ARRAY_SIZE(syscall_cases));
+
+ if (tests & FUNC_URING)
+ run_test(uring_cases, ARRAY_SIZE(uring_cases));
+
+ if (tests & FUNC_INHERITE)
+ run_test(inheritance_cases, ARRAY_SIZE(inheritance_cases));
+
+ if (tests & FUNC_PASID)
+ run_test(pasid_cases, ARRAY_SIZE(pasid_cases));
+
+ ksft_set_plan(tests_cnt);
+
+ return ksft_exit_pass();
+}
diff --git a/tools/testing/selftests/x86/ldt_gdt.c b/tools/testing/selftests/x86/ldt_gdt.c
index 1aef72df20a1..3a29346e1452 100644
--- a/tools/testing/selftests/x86/ldt_gdt.c
+++ b/tools/testing/selftests/x86/ldt_gdt.c
@@ -607,7 +607,7 @@ static void do_multicpu_tests(void)
failures++;
asm volatile ("mov %0, %%ss" : : "rm" (orig_ss));
- };
+ }
ftx = 100; /* Kill the thread. */
syscall(SYS_futex, &ftx, FUTEX_WAKE, 0, NULL, NULL, 0);
diff --git a/tools/testing/selftests/x86/mov_ss_trap.c b/tools/testing/selftests/x86/mov_ss_trap.c
index 6da0ac3f0135..cc3de6ff9fba 100644
--- a/tools/testing/selftests/x86/mov_ss_trap.c
+++ b/tools/testing/selftests/x86/mov_ss_trap.c
@@ -47,7 +47,6 @@
unsigned short ss;
extern unsigned char breakpoint_insn[];
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
static void enable_watchpoint(void)
{
@@ -250,13 +249,14 @@ int main()
if (sigsetjmp(jmpbuf, 1) == 0) {
printf("[RUN]\tMOV SS; SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
err(1, "sigaltstack");
sethandler(SIGSEGV, handle_and_longjmp, SA_RESETHAND | SA_ONSTACK);
nr = SYS_getpid;
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; mov %[ss], %%ss; SYSENTER" : "+a" (nr)
: [ss] "m" (ss) : "flags", "rcx"
diff --git a/tools/testing/selftests/x86/nx_stack.c b/tools/testing/selftests/x86/nx_stack.c
new file mode 100644
index 000000000000..ea4a4e246879
--- /dev/null
+++ b/tools/testing/selftests/x86/nx_stack.c
@@ -0,0 +1,212 @@
+/*
+ * Copyright (c) 2023 Alexey Dobriyan <adobriyan@gmail.com>
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+/*
+ * Test that userspace stack is NX. Requires linking with -Wl,-z,noexecstack
+ * because I don't want to bother with PT_GNU_STACK detection.
+ *
+ * Fill the stack with INT3's and then try to execute some of them:
+ * SIGSEGV -- good, SIGTRAP -- bad.
+ *
+ * Regular stack is completely overwritten before testing.
+ * Test doesn't exit SIGSEGV handler after first fault at INT3.
+ */
+#undef _GNU_SOURCE
+#define _GNU_SOURCE
+#undef NDEBUG
+#include <assert.h>
+#include <signal.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#define PAGE_SIZE 4096
+
+/*
+ * This is memset(rsp, 0xcc, -1); but down.
+ * It will SIGSEGV when bottom of the stack is reached.
+ * Byte-size access is important! (see rdi tweak in the signal handler).
+ */
+void make_stack1(void);
+asm(
+".pushsection .text\n"
+".globl make_stack1\n"
+".align 16\n"
+"make_stack1:\n"
+ "mov $0xcc, %al\n"
+#if defined __amd64__
+ "mov %rsp, %rdi\n"
+ "mov $-1, %rcx\n"
+#elif defined __i386__
+ "mov %esp, %edi\n"
+ "mov $-1, %ecx\n"
+#else
+#error
+#endif
+ "std\n"
+ "rep stosb\n"
+ /* unreachable */
+ "hlt\n"
+".type make_stack1,@function\n"
+".size make_stack1,.-make_stack1\n"
+".popsection\n"
+);
+
+/*
+ * memset(p, 0xcc, -1);
+ * It will SIGSEGV when top of the stack is reached.
+ */
+void make_stack2(uint64_t p);
+asm(
+".pushsection .text\n"
+".globl make_stack2\n"
+".align 16\n"
+"make_stack2:\n"
+ "mov $0xcc, %al\n"
+#if defined __amd64__
+ "mov $-1, %rcx\n"
+#elif defined __i386__
+ "mov $-1, %ecx\n"
+#else
+#error
+#endif
+ "cld\n"
+ "rep stosb\n"
+ /* unreachable */
+ "hlt\n"
+".type make_stack2,@function\n"
+".size make_stack2,.-make_stack2\n"
+".popsection\n"
+);
+
+static volatile int test_state = 0;
+static volatile unsigned long stack_min_addr;
+
+#if defined __amd64__
+#define RDI REG_RDI
+#define RIP REG_RIP
+#define RIP_STRING "rip"
+#elif defined __i386__
+#define RDI REG_EDI
+#define RIP REG_EIP
+#define RIP_STRING "eip"
+#else
+#error
+#endif
+
+static void sigsegv(int _, siginfo_t *__, void *uc_)
+{
+ /*
+ * Some Linux versions didn't clear DF before entering signal
+ * handler. make_stack1() doesn't have a chance to clear DF
+ * either so we clear it by hand here.
+ */
+ asm volatile ("cld" ::: "memory");
+
+ ucontext_t *uc = uc_;
+
+ if (test_state == 0) {
+ /* Stack is faulted and cleared from RSP to the lowest address. */
+ stack_min_addr = ++uc->uc_mcontext.gregs[RDI];
+ if (1) {
+ printf("stack min %lx\n", stack_min_addr);
+ }
+ uc->uc_mcontext.gregs[RIP] = (uintptr_t)&make_stack2;
+ test_state = 1;
+ } else if (test_state == 1) {
+ /* Stack has been cleared from top to bottom. */
+ unsigned long stack_max_addr = uc->uc_mcontext.gregs[RDI];
+ if (1) {
+ printf("stack max %lx\n", stack_max_addr);
+ }
+ /* Start faulting pages on stack and see what happens. */
+ uc->uc_mcontext.gregs[RIP] = stack_max_addr - PAGE_SIZE;
+ test_state = 2;
+ } else if (test_state == 2) {
+ /* Stack page is NX -- good, test next page. */
+ uc->uc_mcontext.gregs[RIP] -= PAGE_SIZE;
+ if (uc->uc_mcontext.gregs[RIP] == stack_min_addr) {
+ /* One more SIGSEGV and test ends. */
+ test_state = 3;
+ }
+ } else {
+ printf("PASS\tAll stack pages are NX\n");
+ _exit(EXIT_SUCCESS);
+ }
+}
+
+static void sigtrap(int _, siginfo_t *__, void *uc_)
+{
+ const ucontext_t *uc = uc_;
+ unsigned long rip = uc->uc_mcontext.gregs[RIP];
+ printf("FAIL\texecutable page on the stack: " RIP_STRING " %lx\n", rip);
+ _exit(EXIT_FAILURE);
+}
+
+int main(void)
+{
+ {
+ struct sigaction act = {};
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = &sigsegv;
+ int rv = sigaction(SIGSEGV, &act, NULL);
+ assert(rv == 0);
+ }
+ {
+ struct sigaction act = {};
+ sigemptyset(&act.sa_mask);
+ act.sa_flags = SA_SIGINFO;
+ act.sa_sigaction = &sigtrap;
+ int rv = sigaction(SIGTRAP, &act, NULL);
+ assert(rv == 0);
+ }
+ {
+ struct rlimit rlim;
+ int rv = getrlimit(RLIMIT_STACK, &rlim);
+ assert(rv == 0);
+ /* Cap stack at time-honored 8 MiB value. */
+ rlim.rlim_max = rlim.rlim_cur;
+ if (rlim.rlim_max > 8 * 1024 * 1024) {
+ rlim.rlim_max = 8 * 1024 * 1024;
+ }
+ rv = setrlimit(RLIMIT_STACK, &rlim);
+ assert(rv == 0);
+ }
+ {
+ /*
+ * We don't know now much stack SIGSEGV handler uses.
+ * Bump this by 1 page every time someone complains,
+ * or rewrite it in assembly.
+ */
+ const size_t len = SIGSTKSZ;
+ void *p = mmap(NULL, len, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+ assert(p != MAP_FAILED);
+ stack_t ss = {};
+ ss.ss_sp = p;
+ ss.ss_size = len;
+ int rv = sigaltstack(&ss, NULL);
+ assert(rv == 0);
+ }
+ make_stack1();
+ /*
+ * Unreachable, but if _this_ INT3 is ever reached, it's a bug somewhere.
+ * Fold it into main SIGTRAP pathway.
+ */
+ __builtin_trap();
+}
diff --git a/tools/testing/selftests/x86/raw_syscall_helper_32.S b/tools/testing/selftests/x86/raw_syscall_helper_32.S
index 94410fa2b5ed..a10d36afdca0 100644
--- a/tools/testing/selftests/x86/raw_syscall_helper_32.S
+++ b/tools/testing/selftests/x86/raw_syscall_helper_32.S
@@ -45,3 +45,5 @@ int80_and_ret:
.type int80_and_ret, @function
.size int80_and_ret, .-int80_and_ret
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/sigaltstack.c b/tools/testing/selftests/x86/sigaltstack.c
new file mode 100644
index 000000000000..f689af75e979
--- /dev/null
+++ b/tools/testing/selftests/x86/sigaltstack.c
@@ -0,0 +1,128 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#define _GNU_SOURCE
+#include <signal.h>
+#include <stdio.h>
+#include <stdbool.h>
+#include <string.h>
+#include <err.h>
+#include <errno.h>
+#include <limits.h>
+#include <sys/mman.h>
+#include <sys/auxv.h>
+#include <sys/prctl.h>
+#include <sys/resource.h>
+#include <setjmp.h>
+
+/* sigaltstack()-enforced minimum stack */
+#define ENFORCED_MINSIGSTKSZ 2048
+
+#ifndef AT_MINSIGSTKSZ
+# define AT_MINSIGSTKSZ 51
+#endif
+
+static int nerrs;
+
+static bool sigalrm_expected;
+
+static unsigned long at_minstack_size;
+
+static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
+ int flags)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_sigaction = handler;
+ sa.sa_flags = SA_SIGINFO | flags;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static void clearhandler(int sig)
+{
+ struct sigaction sa;
+
+ memset(&sa, 0, sizeof(sa));
+ sa.sa_handler = SIG_DFL;
+ sigemptyset(&sa.sa_mask);
+ if (sigaction(sig, &sa, 0))
+ err(1, "sigaction");
+}
+
+static int setup_altstack(void *start, unsigned long size)
+{
+ stack_t ss;
+
+ memset(&ss, 0, sizeof(ss));
+ ss.ss_size = size;
+ ss.ss_sp = start;
+
+ return sigaltstack(&ss, NULL);
+}
+
+static jmp_buf jmpbuf;
+
+static void sigsegv(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGSEGV (expected SIGALRM).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGSEGV signal delivered.\n");
+ }
+
+ siglongjmp(jmpbuf, 1);
+}
+
+static void sigalrm(int sig, siginfo_t *info, void *ctx_void)
+{
+ if (!sigalrm_expected) {
+ printf("[FAIL]\tWrong signal delivered: SIGALRM (expected SIGSEGV).");
+ nerrs++;
+ } else {
+ printf("[OK]\tSIGALRM signal delivered.\n");
+ }
+}
+
+static void test_sigaltstack(void *altstack, unsigned long size)
+{
+ if (setup_altstack(altstack, size))
+ err(1, "sigaltstack()");
+
+ sigalrm_expected = (size > at_minstack_size) ? true : false;
+
+ sethandler(SIGSEGV, sigsegv, 0);
+ sethandler(SIGALRM, sigalrm, SA_ONSTACK);
+
+ if (!sigsetjmp(jmpbuf, 1)) {
+ printf("[RUN]\tTest an alternate signal stack of %ssufficient size.\n",
+ sigalrm_expected ? "" : "in");
+ printf("\tRaise SIGALRM. %s is expected to be delivered.\n",
+ sigalrm_expected ? "It" : "SIGSEGV");
+ raise(SIGALRM);
+ }
+
+ clearhandler(SIGALRM);
+ clearhandler(SIGSEGV);
+}
+
+int main(void)
+{
+ void *altstack;
+
+ at_minstack_size = getauxval(AT_MINSIGSTKSZ);
+
+ altstack = mmap(NULL, at_minstack_size + SIGSTKSZ, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS | MAP_STACK, -1, 0);
+ if (altstack == MAP_FAILED)
+ err(1, "mmap()");
+
+ if ((ENFORCED_MINSIGSTKSZ + 1) < at_minstack_size)
+ test_sigaltstack(altstack, ENFORCED_MINSIGSTKSZ + 1);
+
+ test_sigaltstack(altstack, at_minstack_size + SIGSTKSZ);
+
+ return nerrs == 0 ? 0 : 1;
+}
diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c
index 57c4f67f16ef..5d7961a5f7f6 100644
--- a/tools/testing/selftests/x86/sigreturn.c
+++ b/tools/testing/selftests/x86/sigreturn.c
@@ -138,9 +138,6 @@ static unsigned short LDT3(int idx)
return (idx << 3) | 7;
}
-/* Our sigaltstack scratch space. */
-static char altstack_data[SIGSTKSZ];
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -771,7 +768,8 @@ int main()
setup_ldt();
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -872,5 +870,6 @@ int main()
total_nerrs += test_nonstrict_ss();
#endif
+ free(stack.ss_sp);
return total_nerrs ? 1 : 0;
}
diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c
index 1063328e275c..9a30f443e928 100644
--- a/tools/testing/selftests/x86/single_step_syscall.c
+++ b/tools/testing/selftests/x86/single_step_syscall.c
@@ -31,6 +31,8 @@
#include <sys/ptrace.h>
#include <sys/user.h>
+#include "helpers.h"
+
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -55,7 +57,6 @@ static void clearhandler(int sig)
static volatile sig_atomic_t sig_traps, sig_eflags;
sigjmp_buf jmpbuf;
-static unsigned char altstack_data[SIGSTKSZ];
#ifdef __x86_64__
# define REG_IP REG_RIP
@@ -67,21 +68,6 @@ static unsigned char altstack_data[SIGSTKSZ];
# define INT80_CLOBBERS
#endif
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t*)ctx_void;
@@ -223,7 +209,7 @@ int main()
unsigned long nr = SYS_getpid;
printf("[RUN]\tSet TF and check SYSENTER\n");
stack_t stack = {
- .ss_sp = altstack_data,
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -232,6 +218,7 @@ int main()
SA_RESETHAND | SA_ONSTACK);
sethandler(SIGILL, print_and_longjmp, SA_RESETHAND);
set_eflags(get_eflags() | X86_EFLAGS_TF);
+ free(stack.ss_sp);
/* Clear EBP first to make sure we segfault cleanly. */
asm volatile ("xorl %%ebp, %%ebp; SYSENTER" : "+a" (nr) :: "flags", "rcx"
#ifdef __x86_64__
diff --git a/tools/testing/selftests/x86/syscall_arg_fault.c b/tools/testing/selftests/x86/syscall_arg_fault.c
index bc0ecc2e862e..461fa41a4d02 100644
--- a/tools/testing/selftests/x86/syscall_arg_fault.c
+++ b/tools/testing/selftests/x86/syscall_arg_fault.c
@@ -15,29 +15,7 @@
#include <setjmp.h>
#include <errno.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-/* Our sigaltstack scratch space. */
-static unsigned char altstack_data[SIGSTKSZ];
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
+#include "helpers.h"
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
@@ -72,6 +50,7 @@ static void sigsegv_or_sigbus(int sig, siginfo_t *info, void *ctx_void)
if (ax != -EFAULT && ax != -ENOSYS) {
printf("[FAIL]\tAX had the wrong value: 0x%lx\n",
(unsigned long)ax);
+ printf("\tIP = 0x%lx\n", (unsigned long)ctx->uc_mcontext.gregs[REG_IP]);
n_errs++;
} else {
printf("[OK]\tSeems okay\n");
@@ -122,7 +101,8 @@ static void sigill(int sig, siginfo_t *info, void *ctx_void)
int main()
{
stack_t stack = {
- .ss_sp = altstack_data,
+ /* Our sigaltstack scratch space. */
+ .ss_sp = malloc(sizeof(char) * SIGSTKSZ),
.ss_size = SIGSTKSZ,
};
if (sigaltstack(&stack, NULL) != 0)
@@ -226,5 +206,31 @@ int main()
}
set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#ifdef __x86_64__
+ printf("[RUN]\tSYSENTER with TF, invalid state, and GSBASE < 0\n");
+
+ if (sigsetjmp(jmpbuf, 1) == 0) {
+ sigtrap_consecutive_syscalls = 0;
+
+ asm volatile ("wrgsbase %%rax\n\t"
+ :: "a" (0xffffffffffff0000UL));
+
+ set_eflags(get_eflags() | X86_EFLAGS_TF);
+ asm volatile (
+ "movl $-1, %%eax\n\t"
+ "movl $-1, %%ebx\n\t"
+ "movl $-1, %%ecx\n\t"
+ "movl $-1, %%edx\n\t"
+ "movl $-1, %%esi\n\t"
+ "movl $-1, %%edi\n\t"
+ "movl $-1, %%ebp\n\t"
+ "movl $-1, %%esp\n\t"
+ "sysenter"
+ : : : "memory", "flags");
+ }
+ set_eflags(get_eflags() & ~X86_EFLAGS_TF);
+#endif
+
+ free(stack.ss_sp);
return 0;
}
diff --git a/tools/testing/selftests/x86/syscall_nt.c b/tools/testing/selftests/x86/syscall_nt.c
index 02309a195041..a108b80dd082 100644
--- a/tools/testing/selftests/x86/syscall_nt.c
+++ b/tools/testing/selftests/x86/syscall_nt.c
@@ -13,29 +13,11 @@
#include <signal.h>
#include <err.h>
#include <sys/syscall.h>
-#include <asm/processor-flags.h>
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
+#include "helpers.h"
static unsigned int nerrs;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
int flags)
{
@@ -59,6 +41,7 @@ static void do_it(unsigned long extraflags)
set_eflags(get_eflags() | extraflags);
syscall(SYS_getpid);
flags = get_eflags();
+ set_eflags(X86_EFLAGS_IF | X86_EFLAGS_FIXED);
if ((flags & extraflags) == extraflags) {
printf("[OK]\tThe syscall worked and flags are still set\n");
} else {
@@ -73,6 +56,12 @@ int main(void)
printf("[RUN]\tSet NT and issue a syscall\n");
do_it(X86_EFLAGS_NT);
+ printf("[RUN]\tSet AC and issue a syscall\n");
+ do_it(X86_EFLAGS_AC);
+
+ printf("[RUN]\tSet NT|AC and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC);
+
/*
* Now try it again with TF set -- TF forces returns via IRET in all
* cases except non-ptregs-using 64-bit full fast path syscalls.
@@ -80,8 +69,28 @@ int main(void)
sethandler(SIGTRAP, sigtrap, 0);
+ printf("[RUN]\tSet TF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF);
+
printf("[RUN]\tSet NT|TF and issue a syscall\n");
do_it(X86_EFLAGS_NT | X86_EFLAGS_TF);
+ printf("[RUN]\tSet AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ printf("[RUN]\tSet NT|AC|TF and issue a syscall\n");
+ do_it(X86_EFLAGS_NT | X86_EFLAGS_AC | X86_EFLAGS_TF);
+
+ /*
+ * Now try DF. This is evil and it's plausible that we will crash
+ * glibc, but glibc would have to do something rather surprising
+ * for this to happen.
+ */
+ printf("[RUN]\tSet DF and issue a syscall\n");
+ do_it(X86_EFLAGS_DF);
+
+ printf("[RUN]\tSet TF|DF and issue a syscall\n");
+ do_it(X86_EFLAGS_TF | X86_EFLAGS_DF);
+
return nerrs == 0 ? 0 : 1;
}
diff --git a/tools/testing/selftests/x86/syscall_numbering.c b/tools/testing/selftests/x86/syscall_numbering.c
index d6b09cb1aa2c..991591718bb0 100644
--- a/tools/testing/selftests/x86/syscall_numbering.c
+++ b/tools/testing/selftests/x86/syscall_numbering.c
@@ -1,6 +1,8 @@
/* SPDX-License-Identifier: GPL-2.0 */
/*
- * syscall_arg_fault.c - tests faults 32-bit fast syscall stack args
+ * syscall_numbering.c - test calling the x86-64 kernel with various
+ * valid and invalid system call numbers.
+ *
* Copyright (c) 2018 Andrew Lutomirski
*/
@@ -11,79 +13,470 @@
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
-#include <syscall.h>
+#include <string.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <signal.h>
+#include <sysexits.h>
-static int nerrs;
+#include <sys/ptrace.h>
+#include <sys/user.h>
+#include <sys/wait.h>
+#include <sys/mman.h>
-#define X32_BIT 0x40000000UL
+#include <linux/ptrace.h>
-static void check_enosys(unsigned long nr, bool *ok)
+/* Common system call numbers */
+#define SYS_READ 0
+#define SYS_WRITE 1
+#define SYS_GETPID 39
+/* x64-only system call numbers */
+#define X64_IOCTL 16
+#define X64_READV 19
+#define X64_WRITEV 20
+/* x32-only system call numbers (without X32_BIT) */
+#define X32_IOCTL 514
+#define X32_READV 515
+#define X32_WRITEV 516
+
+#define X32_BIT 0x40000000
+
+static int nullfd = -1; /* File descriptor for /dev/null */
+static bool with_x32; /* x32 supported on this kernel? */
+
+enum ptrace_pass {
+ PTP_NOTHING,
+ PTP_GETREGS,
+ PTP_WRITEBACK,
+ PTP_FUZZRET,
+ PTP_FUZZHIGH,
+ PTP_INTNUM,
+ PTP_DONE
+};
+
+static const char * const ptrace_pass_name[] =
{
- /* If this fails, a segfault is reasonably likely. */
- fflush(stdout);
+ [PTP_NOTHING] = "just stop, no data read",
+ [PTP_GETREGS] = "only getregs",
+ [PTP_WRITEBACK] = "getregs, unmodified setregs",
+ [PTP_FUZZRET] = "modifying the default return",
+ [PTP_FUZZHIGH] = "clobbering the top 32 bits",
+ [PTP_INTNUM] = "sign-extending the syscall number",
+};
- long ret = syscall(nr, 0, 0, 0, 0, 0, 0);
- if (ret == 0) {
- printf("[FAIL]\tsyscall %lu succeeded, but it should have failed\n", nr);
- *ok = false;
- } else if (errno != ENOSYS) {
- printf("[FAIL]\tsyscall %lu had error code %d, but it should have reported ENOSYS\n", nr, errno);
- *ok = false;
- }
+/*
+ * Shared memory block between tracer and test
+ */
+struct shared {
+ unsigned int nerr; /* Total error count */
+ unsigned int indent; /* Message indentation level */
+ enum ptrace_pass ptrace_pass;
+ bool probing_syscall; /* In probe_syscall() */
+};
+static volatile struct shared *sh;
+
+static inline unsigned int offset(void)
+{
+ unsigned int level = sh ? sh->indent : 0;
+
+ return 8 + level * 4;
}
-static void test_x32_without_x32_bit(void)
+#define msg(lvl, fmt, ...) printf("%-*s" fmt, offset(), "[" #lvl "]", \
+ ## __VA_ARGS__)
+
+#define run(fmt, ...) msg(RUN, fmt, ## __VA_ARGS__)
+#define info(fmt, ...) msg(INFO, fmt, ## __VA_ARGS__)
+#define ok(fmt, ...) msg(OK, fmt, ## __VA_ARGS__)
+
+#define fail(fmt, ...) \
+ do { \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ sh->nerr++; \
+ } while (0)
+
+#define crit(fmt, ...) \
+ do { \
+ sh->indent = 0; \
+ msg(FAIL, fmt, ## __VA_ARGS__); \
+ msg(SKIP, "Unable to run test\n"); \
+ exit(EX_OSERR); \
+ } while (0)
+
+/* Sentinel for ptrace-modified return value */
+#define MODIFIED_BY_PTRACE -9999
+
+/*
+ * Directly invokes the given syscall with nullfd as the first argument
+ * and the rest zero. Avoids involving glibc wrappers in case they ever
+ * end up intercepting some system calls for some reason, or modify
+ * the system call number itself.
+ */
+static long long probe_syscall(int msb, int lsb)
{
- bool ok = true;
+ register long long arg1 asm("rdi") = nullfd;
+ register long long arg2 asm("rsi") = 0;
+ register long long arg3 asm("rdx") = 0;
+ register long long arg4 asm("r10") = 0;
+ register long long arg5 asm("r8") = 0;
+ register long long arg6 asm("r9") = 0;
+ long long nr = ((long long)msb << 32) | (unsigned int)lsb;
+ long long ret;
/*
- * Syscalls 512-547 are "x32" syscalls. They are intended to be
- * called with the x32 (0x40000000) bit set. Calling them without
- * the x32 bit set is nonsense and should not work.
+ * We pass in an extra copy of the extended system call number
+ * in %rbx, so we can examine it from the ptrace handler without
+ * worrying about it being possibly modified. This is to test
+ * the validity of struct user regs.orig_rax a.k.a.
+ * struct pt_regs.orig_ax.
*/
- printf("[RUN]\tChecking syscalls 512-547\n");
- for (int i = 512; i <= 547; i++)
- check_enosys(i, &ok);
+ sh->probing_syscall = true;
+ asm volatile("syscall"
+ : "=a" (ret)
+ : "a" (nr), "b" (nr),
+ "r" (arg1), "r" (arg2), "r" (arg3),
+ "r" (arg4), "r" (arg5), "r" (arg6)
+ : "rcx", "r11", "memory", "cc");
+ sh->probing_syscall = false;
+
+ return ret;
+}
+
+static const char *syscall_str(int msb, int start, int end)
+{
+ static char buf[64];
+ const char * const type = (start & X32_BIT) ? "x32" : "x64";
+ int lsb = start;
/*
- * Check that a handful of 64-bit-only syscalls are rejected if the x32
- * bit is set.
+ * Improve readability by stripping the x32 bit, but round
+ * toward zero so we don't display -1 as -1073741825.
*/
- printf("[RUN]\tChecking some 64-bit syscalls in x32 range\n");
- check_enosys(16 | X32_BIT, &ok); /* ioctl */
- check_enosys(19 | X32_BIT, &ok); /* readv */
- check_enosys(20 | X32_BIT, &ok); /* writev */
+ if (lsb < 0)
+ lsb |= X32_BIT;
+ else
+ lsb &= ~X32_BIT;
+
+ if (start == end)
+ snprintf(buf, sizeof buf, "%s syscall %d:%d",
+ type, msb, lsb);
+ else
+ snprintf(buf, sizeof buf, "%s syscalls %d:%d..%d",
+ type, msb, lsb, lsb + (end-start));
+
+ return buf;
+}
+
+static unsigned int _check_for(int msb, int start, int end, long long expect,
+ const char *expect_str)
+{
+ unsigned int err = 0;
+
+ sh->indent++;
+ if (start != end)
+ sh->indent++;
+
+ for (int nr = start; nr <= end; nr++) {
+ long long ret = probe_syscall(msb, nr);
+
+ if (ret != expect) {
+ fail("%s returned %lld, but it should have returned %s\n",
+ syscall_str(msb, nr, nr),
+ ret, expect_str);
+ err++;
+ }
+ }
+
+ if (start != end)
+ sh->indent--;
+
+ if (err) {
+ if (start != end)
+ fail("%s had %u failure%s\n",
+ syscall_str(msb, start, end),
+ err, err == 1 ? "s" : "");
+ } else {
+ ok("%s returned %s as expected\n",
+ syscall_str(msb, start, end), expect_str);
+ }
+
+ sh->indent--;
+
+ return err;
+}
+
+#define check_for(msb,start,end,expect) \
+ _check_for(msb,start,end,expect,#expect)
+
+static bool check_zero(int msb, int nr)
+{
+ return check_for(msb, nr, nr, 0);
+}
+
+static bool check_enosys(int msb, int nr)
+{
+ return check_for(msb, nr, nr, -ENOSYS);
+}
+
+/*
+ * Anyone diagnosing a failure will want to know whether the kernel
+ * supports x32. Tell them. This can also be used to conditionalize
+ * tests based on existence or nonexistence of x32.
+ */
+static bool test_x32(void)
+{
+ long long ret;
+ pid_t mypid = getpid();
+
+ run("Checking for x32 by calling x32 getpid()\n");
+ ret = probe_syscall(0, SYS_GETPID | X32_BIT);
+
+ sh->indent++;
+ if (ret == mypid) {
+ info("x32 is supported\n");
+ with_x32 = true;
+ } else if (ret == -ENOSYS) {
+ info("x32 is not supported\n");
+ with_x32 = false;
+ } else {
+ fail("x32 getpid() returned %lld, but it should have returned either %lld or -ENOSYS\n", ret, (long long)mypid);
+ with_x32 = false;
+ }
+ sh->indent--;
+ return with_x32;
+}
+
+static void test_syscalls_common(int msb)
+{
+ enum ptrace_pass pass = sh->ptrace_pass;
+
+ run("Checking some common syscalls as 64 bit\n");
+ check_zero(msb, SYS_READ);
+ check_zero(msb, SYS_WRITE);
+
+ run("Checking some 64-bit only syscalls as 64 bit\n");
+ check_zero(msb, X64_READV);
+ check_zero(msb, X64_WRITEV);
+
+ run("Checking out of range system calls\n");
+ check_for(msb, -64, -2, -ENOSYS);
+ if (pass >= PTP_FUZZRET)
+ check_for(msb, -1, -1, MODIFIED_BY_PTRACE);
+ else
+ check_for(msb, -1, -1, -ENOSYS);
+ check_for(msb, X32_BIT-64, X32_BIT-1, -ENOSYS);
+ check_for(msb, -64-X32_BIT, -1-X32_BIT, -ENOSYS);
+ check_for(msb, INT_MAX-64, INT_MAX-1, -ENOSYS);
+}
+static void test_syscalls_with_x32(int msb)
+{
/*
- * Check some syscalls with high bits set.
+ * Syscalls 512-547 are "x32" syscalls. They are
+ * intended to be called with the x32 (0x40000000) bit
+ * set. Calling them without the x32 bit set is
+ * nonsense and should not work.
*/
- printf("[RUN]\tChecking numbers above 2^32-1\n");
- check_enosys((1UL << 32), &ok);
- check_enosys(X32_BIT | (1UL << 32), &ok);
+ run("Checking x32 syscalls as 64 bit\n");
+ check_for(msb, 512, 547, -ENOSYS);
- if (!ok)
- nerrs++;
- else
- printf("[OK]\tThey all returned -ENOSYS\n");
+ run("Checking some common syscalls as x32\n");
+ check_zero(msb, SYS_READ | X32_BIT);
+ check_zero(msb, SYS_WRITE | X32_BIT);
+
+ run("Checking some x32 syscalls as x32\n");
+ check_zero(msb, X32_READV | X32_BIT);
+ check_zero(msb, X32_WRITEV | X32_BIT);
+
+ run("Checking some 64-bit syscalls as x32\n");
+ check_enosys(msb, X64_IOCTL | X32_BIT);
+ check_enosys(msb, X64_READV | X32_BIT);
+ check_enosys(msb, X64_WRITEV | X32_BIT);
}
-int main()
+static void test_syscalls_without_x32(int msb)
{
+ run("Checking for absence of x32 system calls\n");
+ check_for(msb, 0 | X32_BIT, 999 | X32_BIT, -ENOSYS);
+}
+
+static void test_syscall_numbering(void)
+{
+ static const int msbs[] = {
+ 0, 1, -1, X32_BIT-1, X32_BIT, X32_BIT-1, -X32_BIT, INT_MAX,
+ INT_MIN, INT_MIN+1
+ };
+
+ sh->indent++;
+
/*
- * Anyone diagnosing a failure will want to know whether the kernel
- * supports x32. Tell them.
+ * The MSB is supposed to be ignored, so we loop over a few
+ * to test that out.
*/
- printf("\tChecking for x32...");
- fflush(stdout);
- if (syscall(39 | X32_BIT, 0, 0, 0, 0, 0, 0) >= 0) {
- printf(" supported\n");
- } else if (errno == ENOSYS) {
- printf(" not supported\n");
+ for (size_t i = 0; i < sizeof(msbs)/sizeof(msbs[0]); i++) {
+ int msb = msbs[i];
+ run("Checking system calls with msb = %d (0x%x)\n",
+ msb, msb);
+
+ sh->indent++;
+
+ test_syscalls_common(msb);
+ if (with_x32)
+ test_syscalls_with_x32(msb);
+ else
+ test_syscalls_without_x32(msb);
+
+ sh->indent--;
+ }
+
+ sh->indent--;
+}
+
+static void syscall_numbering_tracee(void)
+{
+ enum ptrace_pass pass;
+
+ if (ptrace(PTRACE_TRACEME, 0, 0, 0)) {
+ crit("Failed to request tracing\n");
+ return;
+ }
+ raise(SIGSTOP);
+
+ for (sh->ptrace_pass = pass = PTP_NOTHING; pass < PTP_DONE;
+ sh->ptrace_pass = ++pass) {
+ run("Running tests under ptrace: %s\n", ptrace_pass_name[pass]);
+ test_syscall_numbering();
+ }
+}
+
+static void mess_with_syscall(pid_t testpid, enum ptrace_pass pass)
+{
+ struct user_regs_struct regs;
+
+ sh->probing_syscall = false; /* Do this on entry only */
+
+ /* For these, don't even getregs */
+ if (pass == PTP_NOTHING || pass == PTP_DONE)
+ return;
+
+ ptrace(PTRACE_GETREGS, testpid, NULL, &regs);
+
+ if (regs.orig_rax != regs.rbx) {
+ fail("orig_rax %#llx doesn't match syscall number %#llx\n",
+ (unsigned long long)regs.orig_rax,
+ (unsigned long long)regs.rbx);
+ }
+
+ switch (pass) {
+ case PTP_GETREGS:
+ /* Just read, no writeback */
+ return;
+ case PTP_WRITEBACK:
+ /* Write back the same register state verbatim */
+ break;
+ case PTP_FUZZRET:
+ regs.rax = MODIFIED_BY_PTRACE;
+ break;
+ case PTP_FUZZHIGH:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = regs.orig_rax | 0xffffffff00000000ULL;
+ break;
+ case PTP_INTNUM:
+ regs.rax = MODIFIED_BY_PTRACE;
+ regs.orig_rax = (int)regs.orig_rax;
+ break;
+ default:
+ crit("invalid ptrace_pass\n");
+ break;
+ }
+
+ ptrace(PTRACE_SETREGS, testpid, NULL, &regs);
+}
+
+static void syscall_numbering_tracer(pid_t testpid)
+{
+ int wstatus;
+
+ do {
+ pid_t wpid = waitpid(testpid, &wstatus, 0);
+ if (wpid < 0 && errno != EINTR)
+ break;
+ if (wpid != testpid)
+ continue;
+ if (!WIFSTOPPED(wstatus))
+ break; /* Thread exited? */
+
+ if (sh->probing_syscall && WSTOPSIG(wstatus) == SIGTRAP)
+ mess_with_syscall(testpid, sh->ptrace_pass);
+ } while (sh->ptrace_pass != PTP_DONE &&
+ !ptrace(PTRACE_SYSCALL, testpid, NULL, NULL));
+
+ ptrace(PTRACE_DETACH, testpid, NULL, NULL);
+
+ /* Wait for the child process to terminate */
+ while (waitpid(testpid, &wstatus, 0) != testpid || !WIFEXITED(wstatus))
+ /* wait some more */;
+}
+
+static void test_traced_syscall_numbering(void)
+{
+ pid_t testpid;
+
+ /* Launch the test thread; this thread continues as the tracer thread */
+ testpid = fork();
+
+ if (testpid < 0) {
+ crit("Unable to launch tracer process\n");
+ } else if (testpid == 0) {
+ syscall_numbering_tracee();
+ _exit(0);
} else {
- printf(" confused\n");
+ syscall_numbering_tracer(testpid);
}
+}
- test_x32_without_x32_bit();
+int main(void)
+{
+ unsigned int nerr;
- return nerrs ? 1 : 0;
+ /*
+ * It is quite likely to get a segfault on a failure, so make
+ * sure the message gets out by setting stdout to nonbuffered.
+ */
+ setvbuf(stdout, NULL, _IONBF, 0);
+
+ /*
+ * Harmless file descriptor to work on...
+ */
+ nullfd = open("/dev/null", O_RDWR);
+ if (nullfd < 0) {
+ crit("Unable to open /dev/null: %s\n", strerror(errno));
+ }
+
+ /*
+ * Set up a block of shared memory...
+ */
+ sh = mmap(NULL, sysconf(_SC_PAGE_SIZE), PROT_READ|PROT_WRITE,
+ MAP_ANONYMOUS|MAP_SHARED, 0, 0);
+ if (sh == MAP_FAILED) {
+ crit("Unable to allocated shared memory block: %s\n",
+ strerror(errno));
+ }
+
+ with_x32 = test_x32();
+
+ run("Running tests without ptrace...\n");
+ test_syscall_numbering();
+
+ test_traced_syscall_numbering();
+
+ nerr = sh->nerr;
+ if (!nerr) {
+ ok("All system calls succeeded or failed as expected\n");
+ return 0;
+ } else {
+ fail("A total of %u system call%s had incorrect behavior\n",
+ nerr, nerr != 1 ? "s" : "");
+ return 1;
+ }
}
diff --git a/tools/testing/selftests/x86/test_shadow_stack.c b/tools/testing/selftests/x86/test_shadow_stack.c
new file mode 100644
index 000000000000..757e6527f67e
--- /dev/null
+++ b/tools/testing/selftests/x86/test_shadow_stack.c
@@ -0,0 +1,884 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * This program test's basic kernel shadow stack support. It enables shadow
+ * stack manual via the arch_prctl(), instead of relying on glibc. It's
+ * Makefile doesn't compile with shadow stack support, so it doesn't rely on
+ * any particular glibc. As a result it can't do any operations that require
+ * special glibc shadow stack support (longjmp(), swapcontext(), etc). Just
+ * stick to the basics and hope the compiler doesn't do anything strange.
+ */
+
+#define _GNU_SOURCE
+
+#include <sys/syscall.h>
+#include <asm/mman.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <sys/wait.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <x86intrin.h>
+#include <asm/prctl.h>
+#include <sys/prctl.h>
+#include <stdint.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/ioctl.h>
+#include <linux/userfaultfd.h>
+#include <setjmp.h>
+#include <sys/ptrace.h>
+#include <sys/signal.h>
+#include <linux/elf.h>
+
+/*
+ * Define the ABI defines if needed, so people can run the tests
+ * without building the headers.
+ */
+#ifndef __NR_map_shadow_stack
+#define __NR_map_shadow_stack 453
+
+#define SHADOW_STACK_SET_TOKEN (1ULL << 0)
+
+#define ARCH_SHSTK_ENABLE 0x5001
+#define ARCH_SHSTK_DISABLE 0x5002
+#define ARCH_SHSTK_LOCK 0x5003
+#define ARCH_SHSTK_UNLOCK 0x5004
+#define ARCH_SHSTK_STATUS 0x5005
+
+#define ARCH_SHSTK_SHSTK (1ULL << 0)
+#define ARCH_SHSTK_WRSS (1ULL << 1)
+
+#define NT_X86_SHSTK 0x204
+#endif
+
+#define SS_SIZE 0x200000
+#define PAGE_SIZE 0x1000
+
+#if (__GNUC__ < 8) || (__GNUC__ == 8 && __GNUC_MINOR__ < 5)
+int main(int argc, char *argv[])
+{
+ printf("[SKIP]\tCompiler does not support CET.\n");
+ return 0;
+}
+#else
+void write_shstk(unsigned long *addr, unsigned long val)
+{
+ asm volatile("wrssq %[val], (%[addr])\n"
+ : "=m" (addr)
+ : [addr] "r" (addr), [val] "r" (val));
+}
+
+static inline unsigned long __attribute__((always_inline)) get_ssp(void)
+{
+ unsigned long ret = 0;
+
+ asm volatile("xor %0, %0; rdsspq %0" : "=r" (ret));
+ return ret;
+}
+
+/*
+ * For use in inline enablement of shadow stack.
+ *
+ * The program can't return from the point where shadow stack gets enabled
+ * because there will be no address on the shadow stack. So it can't use
+ * syscall() for enablement, since it is a function.
+ *
+ * Based on code from nolibc.h. Keep a copy here because this can't pull in all
+ * of nolibc.h.
+ */
+#define ARCH_PRCTL(arg1, arg2) \
+({ \
+ long _ret; \
+ register long _num asm("eax") = __NR_arch_prctl; \
+ register long _arg1 asm("rdi") = (long)(arg1); \
+ register long _arg2 asm("rsi") = (long)(arg2); \
+ \
+ asm volatile ( \
+ "syscall\n" \
+ : "=a"(_ret) \
+ : "r"(_arg1), "r"(_arg2), \
+ "0"(_num) \
+ : "rcx", "r11", "memory", "cc" \
+ ); \
+ _ret; \
+})
+
+void *create_shstk(void *addr)
+{
+ return (void *)syscall(__NR_map_shadow_stack, addr, SS_SIZE, SHADOW_STACK_SET_TOKEN);
+}
+
+void *create_normal_mem(void *addr)
+{
+ return mmap(addr, SS_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+}
+
+void free_shstk(void *shstk)
+{
+ munmap(shstk, SS_SIZE);
+}
+
+int reset_shstk(void *shstk)
+{
+ return madvise(shstk, SS_SIZE, MADV_DONTNEED);
+}
+
+void try_shstk(unsigned long new_ssp)
+{
+ unsigned long ssp;
+
+ printf("[INFO]\tnew_ssp = %lx, *new_ssp = %lx\n",
+ new_ssp, *((unsigned long *)new_ssp));
+
+ ssp = get_ssp();
+ printf("[INFO]\tchanging ssp from %lx to %lx\n", ssp, new_ssp);
+
+ asm volatile("rstorssp (%0)\n":: "r" (new_ssp));
+ asm volatile("saveprevssp");
+ printf("[INFO]\tssp is now %lx\n", get_ssp());
+
+ /* Switch back to original shadow stack */
+ ssp -= 8;
+ asm volatile("rstorssp (%0)\n":: "r" (ssp));
+ asm volatile("saveprevssp");
+}
+
+int test_shstk_pivot(void)
+{
+ void *shstk = create_shstk(0);
+
+ if (shstk == MAP_FAILED) {
+ printf("[FAIL]\tError creating shadow stack: %d\n", errno);
+ return 1;
+ }
+ try_shstk((unsigned long)shstk + SS_SIZE - 8);
+ free_shstk(shstk);
+
+ printf("[OK]\tShadow stack pivot\n");
+ return 0;
+}
+
+int test_shstk_faults(void)
+{
+ unsigned long *shstk = create_shstk(0);
+
+ /* Read shadow stack, test if it's zero to not get read optimized out */
+ if (*shstk != 0)
+ goto err;
+
+ /* Wrss memory that was already read. */
+ write_shstk(shstk, 1);
+ if (*shstk != 1)
+ goto err;
+
+ /* Page out memory, so we can wrss it again. */
+ if (reset_shstk((void *)shstk))
+ goto err;
+
+ write_shstk(shstk, 1);
+ if (*shstk != 1)
+ goto err;
+
+ printf("[OK]\tShadow stack faults\n");
+ return 0;
+
+err:
+ return 1;
+}
+
+unsigned long saved_ssp;
+unsigned long saved_ssp_val;
+volatile bool segv_triggered;
+
+void __attribute__((noinline)) violate_ss(void)
+{
+ saved_ssp = get_ssp();
+ saved_ssp_val = *(unsigned long *)saved_ssp;
+
+ /* Corrupt shadow stack */
+ printf("[INFO]\tCorrupting shadow stack\n");
+ write_shstk((void *)saved_ssp, 0);
+}
+
+void segv_handler(int signum, siginfo_t *si, void *uc)
+{
+ printf("[INFO]\tGenerated shadow stack violation successfully\n");
+
+ segv_triggered = true;
+
+ /* Fix shadow stack */
+ write_shstk((void *)saved_ssp, saved_ssp_val);
+}
+
+int test_shstk_violation(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_sigaction = segv_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ /* Make sure segv_triggered is set before violate_ss() */
+ asm volatile("" : : : "memory");
+
+ violate_ss();
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tShadow stack violation test\n");
+
+ return !segv_triggered;
+}
+
+/* Gup test state */
+#define MAGIC_VAL 0x12345678
+bool is_shstk_access;
+void *shstk_ptr;
+int fd;
+
+void reset_test_shstk(void *addr)
+{
+ if (shstk_ptr)
+ free_shstk(shstk_ptr);
+ shstk_ptr = create_shstk(addr);
+}
+
+void test_access_fix_handler(int signum, siginfo_t *si, void *uc)
+{
+ printf("[INFO]\tViolation from %s\n", is_shstk_access ? "shstk access" : "normal write");
+
+ segv_triggered = true;
+
+ /* Fix shadow stack */
+ if (is_shstk_access) {
+ reset_test_shstk(shstk_ptr);
+ return;
+ }
+
+ free_shstk(shstk_ptr);
+ create_normal_mem(shstk_ptr);
+}
+
+bool test_shstk_access(void *ptr)
+{
+ is_shstk_access = true;
+ segv_triggered = false;
+ write_shstk(ptr, MAGIC_VAL);
+
+ asm volatile("" : : : "memory");
+
+ return segv_triggered;
+}
+
+bool test_write_access(void *ptr)
+{
+ is_shstk_access = false;
+ segv_triggered = false;
+ *(unsigned long *)ptr = MAGIC_VAL;
+
+ asm volatile("" : : : "memory");
+
+ return segv_triggered;
+}
+
+bool gup_write(void *ptr)
+{
+ unsigned long val;
+
+ lseek(fd, (unsigned long)ptr, SEEK_SET);
+ if (write(fd, &val, sizeof(val)) < 0)
+ return 1;
+
+ return 0;
+}
+
+bool gup_read(void *ptr)
+{
+ unsigned long val;
+
+ lseek(fd, (unsigned long)ptr, SEEK_SET);
+ if (read(fd, &val, sizeof(val)) < 0)
+ return 1;
+
+ return 0;
+}
+
+int test_gup(void)
+{
+ struct sigaction sa = {};
+ int status;
+ pid_t pid;
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ fd = open("/proc/self/mem", O_RDWR);
+ if (fd == -1)
+ return 1;
+
+ reset_test_shstk(0);
+ if (gup_read(shstk_ptr))
+ return 1;
+ if (test_shstk_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup read -> shstk access success\n");
+
+ reset_test_shstk(0);
+ if (gup_write(shstk_ptr))
+ return 1;
+ if (test_shstk_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup write -> shstk access success\n");
+
+ reset_test_shstk(0);
+ if (gup_read(shstk_ptr))
+ return 1;
+ if (!test_write_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup read -> write access success\n");
+
+ reset_test_shstk(0);
+ if (gup_write(shstk_ptr))
+ return 1;
+ if (!test_write_access(shstk_ptr))
+ return 1;
+ printf("[INFO]\tGup write -> write access success\n");
+
+ close(fd);
+
+ /* COW/gup test */
+ reset_test_shstk(0);
+ pid = fork();
+ if (!pid) {
+ fd = open("/proc/self/mem", O_RDWR);
+ if (fd == -1)
+ exit(1);
+
+ if (gup_write(shstk_ptr)) {
+ close(fd);
+ exit(1);
+ }
+ close(fd);
+ exit(0);
+ }
+ waitpid(pid, &status, 0);
+ if (WEXITSTATUS(status)) {
+ printf("[FAIL]\tWrite in child failed\n");
+ return 1;
+ }
+ if (*(unsigned long *)shstk_ptr == MAGIC_VAL) {
+ printf("[FAIL]\tWrite in child wrote through to shared memory\n");
+ return 1;
+ }
+
+ printf("[INFO]\tCow gup write -> write access success\n");
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tShadow gup test\n");
+
+ return 0;
+}
+
+int test_mprotect(void)
+{
+ struct sigaction sa = {};
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ segv_triggered = false;
+
+ /* mprotect a shadow stack as read only */
+ reset_test_shstk(0);
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+ return 1;
+ }
+
+ /* try to wrss it and fail */
+ if (!test_shstk_access(shstk_ptr)) {
+ printf("[FAIL]\tShadow stack access to read-only memory succeeded\n");
+ return 1;
+ }
+
+ /*
+ * The shadow stack was reset above to resolve the fault, make the new one
+ * read-only.
+ */
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_READ) failed\n");
+ return 1;
+ }
+
+ /* then back to writable */
+ if (mprotect(shstk_ptr, SS_SIZE, PROT_WRITE | PROT_READ) < 0) {
+ printf("[FAIL]\tmprotect(PROT_WRITE) failed\n");
+ return 1;
+ }
+
+ /* then wrss to it and succeed */
+ if (test_shstk_access(shstk_ptr)) {
+ printf("[FAIL]\tShadow stack access to mprotect() writable memory failed\n");
+ return 1;
+ }
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ printf("[OK]\tmprotect() test\n");
+
+ return 0;
+}
+
+char zero[4096];
+
+static void *uffd_thread(void *arg)
+{
+ struct uffdio_copy req;
+ int uffd = *(int *)arg;
+ struct uffd_msg msg;
+ int ret;
+
+ while (1) {
+ ret = read(uffd, &msg, sizeof(msg));
+ if (ret > 0)
+ break;
+ else if (errno == EAGAIN)
+ continue;
+ return (void *)1;
+ }
+
+ req.dst = msg.arg.pagefault.address;
+ req.src = (__u64)zero;
+ req.len = 4096;
+ req.mode = 0;
+
+ if (ioctl(uffd, UFFDIO_COPY, &req))
+ return (void *)1;
+
+ return (void *)0;
+}
+
+int test_userfaultfd(void)
+{
+ struct uffdio_register uffdio_register;
+ struct uffdio_api uffdio_api;
+ struct sigaction sa = {};
+ pthread_t thread;
+ void *res;
+ int uffd;
+
+ sa.sa_sigaction = test_access_fix_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
+ if (uffd < 0) {
+ printf("[SKIP]\tUserfaultfd unavailable.\n");
+ return 0;
+ }
+
+ reset_test_shstk(0);
+
+ uffdio_api.api = UFFD_API;
+ uffdio_api.features = 0;
+ if (ioctl(uffd, UFFDIO_API, &uffdio_api))
+ goto err;
+
+ uffdio_register.range.start = (__u64)shstk_ptr;
+ uffdio_register.range.len = 4096;
+ uffdio_register.mode = UFFDIO_REGISTER_MODE_MISSING;
+ if (ioctl(uffd, UFFDIO_REGISTER, &uffdio_register))
+ goto err;
+
+ if (pthread_create(&thread, NULL, &uffd_thread, &uffd))
+ goto err;
+
+ reset_shstk(shstk_ptr);
+ test_shstk_access(shstk_ptr);
+
+ if (pthread_join(thread, &res))
+ goto err;
+
+ if (test_shstk_access(shstk_ptr))
+ goto err;
+
+ free_shstk(shstk_ptr);
+
+ signal(SIGSEGV, SIG_DFL);
+
+ if (!res)
+ printf("[OK]\tUserfaultfd test\n");
+ return !!res;
+err:
+ free_shstk(shstk_ptr);
+ close(uffd);
+ signal(SIGSEGV, SIG_DFL);
+ return 1;
+}
+
+/* Simple linked list for keeping track of mappings in test_guard_gap() */
+struct node {
+ struct node *next;
+ void *mapping;
+};
+
+/*
+ * This tests whether mmap will place other mappings in a shadow stack's guard
+ * gap. The steps are:
+ * 1. Finds an empty place by mapping and unmapping something.
+ * 2. Map a shadow stack in the middle of the known empty area.
+ * 3. Map a bunch of PAGE_SIZE mappings. These will use the search down
+ * direction, filling any gaps until it encounters the shadow stack's
+ * guard gap.
+ * 4. When a mapping lands below the shadow stack from step 2, then all
+ * of the above gaps are filled. The search down algorithm will have
+ * looked at the shadow stack gaps.
+ * 5. See if it landed in the gap.
+ */
+int test_guard_gap(void)
+{
+ void *free_area, *shstk, *test_map = (void *)0xFFFFFFFFFFFFFFFF;
+ struct node *head = NULL, *cur;
+
+ free_area = mmap(0, SS_SIZE * 3, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ munmap(free_area, SS_SIZE * 3);
+
+ shstk = create_shstk(free_area + SS_SIZE);
+ if (shstk == MAP_FAILED)
+ return 1;
+
+ while (test_map > shstk) {
+ test_map = mmap(0, PAGE_SIZE, PROT_READ | PROT_WRITE,
+ MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+ if (test_map == MAP_FAILED)
+ return 1;
+ cur = malloc(sizeof(*cur));
+ cur->mapping = test_map;
+
+ cur->next = head;
+ head = cur;
+ }
+
+ while (head) {
+ cur = head;
+ head = cur->next;
+ munmap(cur->mapping, PAGE_SIZE);
+ free(cur);
+ }
+
+ free_shstk(shstk);
+
+ if (shstk - test_map - PAGE_SIZE != PAGE_SIZE)
+ return 1;
+
+ printf("[OK]\tGuard gap test\n");
+
+ return 0;
+}
+
+/*
+ * Too complicated to pull it out of the 32 bit header, but also get the
+ * 64 bit one needed above. Just define a copy here.
+ */
+#define __NR_compat_sigaction 67
+
+/*
+ * Call 32 bit signal handler to get 32 bit signals ABI. Make sure
+ * to push the registers that will get clobbered.
+ */
+int sigaction32(int signum, const struct sigaction *restrict act,
+ struct sigaction *restrict oldact)
+{
+ register long syscall_reg asm("eax") = __NR_compat_sigaction;
+ register long signum_reg asm("ebx") = signum;
+ register long act_reg asm("ecx") = (long)act;
+ register long oldact_reg asm("edx") = (long)oldact;
+ int ret = 0;
+
+ asm volatile ("int $0x80;"
+ : "=a"(ret), "=m"(oldact)
+ : "r"(syscall_reg), "r"(signum_reg), "r"(act_reg),
+ "r"(oldact_reg)
+ : "r8", "r9", "r10", "r11"
+ );
+
+ return ret;
+}
+
+sigjmp_buf jmp_buffer;
+
+void segv_gp_handler(int signum, siginfo_t *si, void *uc)
+{
+ segv_triggered = true;
+
+ /*
+ * To work with old glibc, this can't rely on siglongjmp working with
+ * shadow stack enabled, so disable shadow stack before siglongjmp().
+ */
+ ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK);
+ siglongjmp(jmp_buffer, -1);
+}
+
+/*
+ * Transition to 32 bit mode and check that a #GP triggers a segfault.
+ */
+int test_32bit(void)
+{
+ struct sigaction sa = {};
+ struct sigaction *sa32;
+
+ /* Create sigaction in 32 bit address range */
+ sa32 = mmap(0, 4096, PROT_READ | PROT_WRITE,
+ MAP_32BIT | MAP_PRIVATE | MAP_ANONYMOUS, 0, 0);
+ sa32->sa_flags = SA_SIGINFO;
+
+ sa.sa_sigaction = segv_gp_handler;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+
+ segv_triggered = false;
+
+ /* Make sure segv_triggered is set before triggering the #GP */
+ asm volatile("" : : : "memory");
+
+ /*
+ * Set handler to somewhere in 32 bit address space
+ */
+ sa32->sa_handler = (void *)sa32;
+ if (sigaction32(SIGUSR1, sa32, NULL))
+ return 1;
+
+ if (!sigsetjmp(jmp_buffer, 1))
+ raise(SIGUSR1);
+
+ if (segv_triggered)
+ printf("[OK]\t32 bit test\n");
+
+ return !segv_triggered;
+}
+
+void segv_handler_ptrace(int signum, siginfo_t *si, void *uc)
+{
+ /* The SSP adjustment caused a segfault. */
+ exit(0);
+}
+
+int test_ptrace(void)
+{
+ unsigned long saved_ssp, ssp = 0;
+ struct sigaction sa= {};
+ struct iovec iov;
+ int status;
+ int pid;
+
+ iov.iov_base = &ssp;
+ iov.iov_len = sizeof(ssp);
+
+ pid = fork();
+ if (!pid) {
+ ssp = get_ssp();
+
+ sa.sa_sigaction = segv_handler_ptrace;
+ sa.sa_flags = SA_SIGINFO;
+ if (sigaction(SIGSEGV, &sa, NULL))
+ return 1;
+
+ ptrace(PTRACE_TRACEME, NULL, NULL, NULL);
+ /*
+ * The parent will tweak the SSP and return from this function
+ * will #CP.
+ */
+ raise(SIGTRAP);
+
+ exit(1);
+ }
+
+ while (waitpid(pid, &status, 0) != -1 && WSTOPSIG(status) != SIGTRAP);
+
+ if (ptrace(PTRACE_GETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tFailed to PTRACE_GETREGS\n");
+ goto out_kill;
+ }
+
+ if (!ssp) {
+ printf("[INFO]\tPtrace child SSP was 0\n");
+ goto out_kill;
+ }
+
+ saved_ssp = ssp;
+
+ iov.iov_len = 0;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tToo small size accepted via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ iov.iov_len = sizeof(ssp) + 1;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tToo large size accepted via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ ssp += 1;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tUnaligned SSP written via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ ssp = 0xFFFFFFFFFFFF0000;
+ if (!ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tKernel range SSP written via PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ /*
+ * Tweak the SSP so the child with #CP when it resumes and returns
+ * from raise()
+ */
+ ssp = saved_ssp + 8;
+ iov.iov_len = sizeof(ssp);
+ if (ptrace(PTRACE_SETREGSET, pid, NT_X86_SHSTK, &iov)) {
+ printf("[INFO]\tFailed to PTRACE_SETREGS\n");
+ goto out_kill;
+ }
+
+ if (ptrace(PTRACE_DETACH, pid, NULL, NULL)) {
+ printf("[INFO]\tFailed to PTRACE_DETACH\n");
+ goto out_kill;
+ }
+
+ waitpid(pid, &status, 0);
+ if (WEXITSTATUS(status))
+ return 1;
+
+ printf("[OK]\tPtrace test\n");
+ return 0;
+
+out_kill:
+ kill(pid, SIGKILL);
+ return 1;
+}
+
+int main(int argc, char *argv[])
+{
+ int ret = 0;
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ printf("[SKIP]\tCould not enable Shadow stack\n");
+ return 1;
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+ ret = 1;
+ printf("[FAIL]\tDisabling shadow stack failed\n");
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_SHSTK)) {
+ printf("[SKIP]\tCould not re-enable Shadow stack\n");
+ return 1;
+ }
+
+ if (ARCH_PRCTL(ARCH_SHSTK_ENABLE, ARCH_SHSTK_WRSS)) {
+ printf("[SKIP]\tCould not enable WRSS\n");
+ ret = 1;
+ goto out;
+ }
+
+ /* Should have succeeded if here, but this is a test, so double check. */
+ if (!get_ssp()) {
+ printf("[FAIL]\tShadow stack disabled\n");
+ return 1;
+ }
+
+ if (test_shstk_pivot()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack pivot\n");
+ goto out;
+ }
+
+ if (test_shstk_faults()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack fault test\n");
+ goto out;
+ }
+
+ if (test_shstk_violation()) {
+ ret = 1;
+ printf("[FAIL]\tShadow stack violation test\n");
+ goto out;
+ }
+
+ if (test_gup()) {
+ ret = 1;
+ printf("[FAIL]\tShadow shadow stack gup\n");
+ goto out;
+ }
+
+ if (test_mprotect()) {
+ ret = 1;
+ printf("[FAIL]\tShadow shadow mprotect test\n");
+ goto out;
+ }
+
+ if (test_userfaultfd()) {
+ ret = 1;
+ printf("[FAIL]\tUserfaultfd test\n");
+ goto out;
+ }
+
+ if (test_guard_gap()) {
+ ret = 1;
+ printf("[FAIL]\tGuard gap test\n");
+ goto out;
+ }
+
+ if (test_ptrace()) {
+ ret = 1;
+ printf("[FAIL]\tptrace test\n");
+ }
+
+ if (test_32bit()) {
+ ret = 1;
+ printf("[FAIL]\t32 bit test\n");
+ goto out;
+ }
+
+ return ret;
+
+out:
+ /*
+ * Disable shadow stack before the function returns, or there will be a
+ * shadow stack violation.
+ */
+ if (ARCH_PRCTL(ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
+ ret = 1;
+ printf("[FAIL]\tDisabling shadow stack failed\n");
+ }
+
+ return ret;
+}
+#endif
diff --git a/tools/testing/selftests/x86/test_vsyscall.c b/tools/testing/selftests/x86/test_vsyscall.c
index a4f4d4cf22c3..47cab972807c 100644
--- a/tools/testing/selftests/x86/test_vsyscall.c
+++ b/tools/testing/selftests/x86/test_vsyscall.c
@@ -20,6 +20,8 @@
#include <setjmp.h>
#include <sys/uio.h>
+#include "helpers.h"
+
#ifdef __x86_64__
# define VSYS(x) (x)
#else
@@ -90,11 +92,8 @@ static void init_vdso(void)
printf("[WARN]\tfailed to find time in vDSO\n");
vdso_getcpu = (getcpu_t)dlsym(vdso, "__vdso_getcpu");
- if (!vdso_getcpu) {
- /* getcpu() was never wired up in the 32-bit vDSO. */
- printf("[%s]\tfailed to find getcpu in vDSO\n",
- sizeof(long) == 8 ? "WARN" : "NOTE");
- }
+ if (!vdso_getcpu)
+ printf("[WARN]\tfailed to find getcpu in vDSO\n");
}
static int init_vsys(void)
@@ -460,6 +459,17 @@ static int test_vsys_x(void)
return 0;
}
+/*
+ * Debuggers expect ptrace() to be able to peek at the vsyscall page.
+ * Use process_vm_readv() as a proxy for ptrace() to test this. We
+ * want it to work in the vsyscall=emulate case and to fail in the
+ * vsyscall=xonly case.
+ *
+ * It's worth noting that this ABI is a bit nutty. write(2) can't
+ * read from the vsyscall page on any kernel version or mode. The
+ * fact that ptrace() ever worked was a nice courtesy of old kernels,
+ * but the code to support it is fairly gross.
+ */
static int test_process_vm_readv(void)
{
#ifdef __x86_64__
@@ -475,17 +485,24 @@ static int test_process_vm_readv(void)
remote.iov_len = 4096;
ret = process_vm_readv(getpid(), &local, 1, &remote, 1, 0);
if (ret != 4096) {
- printf("[OK]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", ret, errno);
- return 0;
+ /*
+ * We expect process_vm_readv() to work if and only if the
+ * vsyscall page is readable.
+ */
+ printf("[%s]\tprocess_vm_readv() failed (ret = %d, errno = %d)\n", vsyscall_map_r ? "FAIL" : "OK", ret, errno);
+ return vsyscall_map_r ? 1 : 0;
}
if (vsyscall_map_r) {
- if (!memcmp(buf, (const void *)0xffffffffff600000, 4096)) {
+ if (!memcmp(buf, remote.iov_base, sizeof(buf))) {
printf("[OK]\tIt worked and read correct data\n");
} else {
printf("[FAIL]\tIt worked but returned incorrect data\n");
return 1;
}
+ } else {
+ printf("[FAIL]\tprocess_rm_readv() succeeded, but it should have failed in this configuration\n");
+ return 1;
}
#endif
@@ -493,21 +510,8 @@ static int test_process_vm_readv(void)
}
#ifdef __x86_64__
-#define X86_EFLAGS_TF (1UL << 8)
static volatile sig_atomic_t num_vsyscall_traps;
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushfq\n\tpopq %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("pushq %0\n\tpopfq" : : "rm" (eflags) : "flags");
-}
-
static void sigtrap(int sig, siginfo_t *info, void *ctx_void)
{
ucontext_t *ctx = (ucontext_t *)ctx_void;
diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S
index 1bb5d62c16a4..a2d47d8344d4 100644
--- a/tools/testing/selftests/x86/thunks.S
+++ b/tools/testing/selftests/x86/thunks.S
@@ -57,3 +57,5 @@ call32_from_64:
ret
.size call32_from_64, .-call32_from_64
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/thunks_32.S b/tools/testing/selftests/x86/thunks_32.S
index a71d92da8f46..f3f56e681e9f 100644
--- a/tools/testing/selftests/x86/thunks_32.S
+++ b/tools/testing/selftests/x86/thunks_32.S
@@ -45,3 +45,5 @@ call64_from_32:
ret
.size call64_from_32, .-call64_from_32
+
+.section .note.GNU-stack,"",%progbits
diff --git a/tools/testing/selftests/x86/unwind_vdso.c b/tools/testing/selftests/x86/unwind_vdso.c
index 0075ccd65407..4c311e1af4c7 100644
--- a/tools/testing/selftests/x86/unwind_vdso.c
+++ b/tools/testing/selftests/x86/unwind_vdso.c
@@ -11,6 +11,8 @@
#include <features.h>
#include <stdio.h>
+#include "helpers.h"
+
#if defined(__GLIBC__) && __GLIBC__ == 2 && __GLIBC_MINOR__ < 16
int main()
@@ -53,27 +55,6 @@ static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *),
err(1, "sigaction");
}
-#ifdef __x86_64__
-# define WIDTH "q"
-#else
-# define WIDTH "l"
-#endif
-
-static unsigned long get_eflags(void)
-{
- unsigned long eflags;
- asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags));
- return eflags;
-}
-
-static void set_eflags(unsigned long eflags)
-{
- asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH
- : : "rm" (eflags) : "flags");
-}
-
-#define X86_EFLAGS_TF (1UL << 8)
-
static volatile sig_atomic_t nerrs;
static unsigned long sysinfo;
static bool got_sysinfo = false;
diff --git a/tools/testing/selftests/zram/zram.sh b/tools/testing/selftests/zram/zram.sh
index 232e958ec454..b0b91d9b0dc2 100755
--- a/tools/testing/selftests/zram/zram.sh
+++ b/tools/testing/selftests/zram/zram.sh
@@ -2,9 +2,6 @@
# SPDX-License-Identifier: GPL-2.0
TCID="zram.sh"
-# Kselftest framework requirement - SKIP code is 4.
-ksft_skip=4
-
. ./zram_lib.sh
run_zram () {
@@ -18,14 +15,4 @@ echo ""
check_prereqs
-# check zram module exists
-MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
-if [ -f $MODULE_PATH ]; then
- run_zram
-elif [ -b /dev/zram0 ]; then
- run_zram
-else
- echo "$TCID : No zram.ko module or /dev/zram0 device file not found"
- echo "$TCID : CONFIG_ZRAM is not set"
- exit $ksft_skip
-fi
+run_zram
diff --git a/tools/testing/selftests/zram/zram01.sh b/tools/testing/selftests/zram/zram01.sh
index 114863d9fb87..8f4affe34f3e 100755
--- a/tools/testing/selftests/zram/zram01.sh
+++ b/tools/testing/selftests/zram/zram01.sh
@@ -33,9 +33,7 @@ zram_algs="lzo"
zram_fill_fs()
{
- local mem_free0=$(free -m | awk 'NR==2 {print $4}')
-
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo "fill zram$i..."
local b=0
while [ true ]; do
@@ -45,29 +43,17 @@ zram_fill_fs()
b=$(($b + 1))
done
echo "zram$i can be filled with '$b' KB"
- done
- local mem_free1=$(free -m | awk 'NR==2 {print $4}')
- local used_mem=$(($mem_free0 - $mem_free1))
+ local mem_used_total=`awk '{print $3}' "/sys/block/zram$i/mm_stat"`
+ local v=$((100 * 1024 * $b / $mem_used_total))
+ if [ "$v" -lt 100 ]; then
+ echo "FAIL compression ratio: 0.$v:1"
+ ERR_CODE=-1
+ return
+ fi
- local total_size=0
- for sm in $zram_sizes; do
- local s=$(echo $sm | sed 's/M//')
- total_size=$(($total_size + $s))
+ echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
done
-
- echo "zram used ${used_mem}M, zram disk sizes ${total_size}M"
-
- local v=$((100 * $total_size / $used_mem))
-
- if [ "$v" -lt 100 ]; then
- echo "FAIL compression ratio: 0.$v:1"
- ERR_CODE=-1
- zram_cleanup
- return
- fi
-
- echo "zram compression ratio: $(echo "scale=2; $v / 100 " | bc):1: OK"
}
check_prereqs
@@ -81,7 +67,6 @@ zram_mount
zram_fill_fs
zram_cleanup
-zram_unload
if [ $ERR_CODE -ne 0 ]; then
echo "$TCID : [FAIL]"
diff --git a/tools/testing/selftests/zram/zram02.sh b/tools/testing/selftests/zram/zram02.sh
index e83b404807c0..2418b0c4ed13 100755
--- a/tools/testing/selftests/zram/zram02.sh
+++ b/tools/testing/selftests/zram/zram02.sh
@@ -36,7 +36,6 @@ zram_set_memlimit
zram_makeswap
zram_swapoff
zram_cleanup
-zram_unload
if [ $ERR_CODE -ne 0 ]; then
echo "$TCID : [FAIL]"
diff --git a/tools/testing/selftests/zram/zram_lib.sh b/tools/testing/selftests/zram/zram_lib.sh
index 6f872f266fd1..21ec1966de76 100755
--- a/tools/testing/selftests/zram/zram_lib.sh
+++ b/tools/testing/selftests/zram/zram_lib.sh
@@ -5,12 +5,17 @@
# Author: Alexey Kodanev <alexey.kodanev@oracle.com>
# Modified: Naresh Kamboju <naresh.kamboju@linaro.org>
-MODULE=0
dev_makeswap=-1
dev_mounted=-1
-
+dev_start=0
+dev_end=-1
+module_load=-1
+sys_control=-1
# Kselftest framework requirement - SKIP code is 4.
ksft_skip=4
+kernel_version=`uname -r | cut -d'.' -f1,2`
+kernel_major=${kernel_version%.*}
+kernel_minor=${kernel_version#*.}
trap INT
@@ -25,68 +30,104 @@ check_prereqs()
fi
}
+kernel_gte()
+{
+ major=${1%.*}
+ minor=${1#*.}
+
+ if [ $kernel_major -gt $major ]; then
+ return 0
+ elif [[ $kernel_major -eq $major && $kernel_minor -ge $minor ]]; then
+ return 0
+ fi
+
+ return 1
+}
+
zram_cleanup()
{
echo "zram cleanup"
local i=
- for i in $(seq 0 $dev_makeswap); do
+ for i in $(seq $dev_start $dev_makeswap); do
swapoff /dev/zram$i
done
- for i in $(seq 0 $dev_mounted); do
+ for i in $(seq $dev_start $dev_mounted); do
umount /dev/zram$i
done
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo 1 > /sys/block/zram${i}/reset
rm -rf zram$i
done
-}
+ if [ $sys_control -eq 1 ]; then
+ for i in $(seq $dev_start $dev_end); do
+ echo $i > /sys/class/zram-control/hot_remove
+ done
+ fi
-zram_unload()
-{
- if [ $MODULE -ne 0 ] ; then
- echo "zram rmmod zram"
+ if [ $module_load -eq 1 ]; then
rmmod zram > /dev/null 2>&1
fi
}
zram_load()
{
- # check zram module exists
- MODULE_PATH=/lib/modules/`uname -r`/kernel/drivers/block/zram/zram.ko
- if [ -f $MODULE_PATH ]; then
- MODULE=1
- echo "create '$dev_num' zram device(s)"
- modprobe zram num_devices=$dev_num
- if [ $? -ne 0 ]; then
- echo "failed to insert zram module"
- exit 1
- fi
-
- dev_num_created=$(ls /dev/zram* | wc -w)
+ echo "create '$dev_num' zram device(s)"
+
+ # zram module loaded, new kernel
+ if [ -d "/sys/class/zram-control" ]; then
+ echo "zram modules already loaded, kernel supports" \
+ "zram-control interface"
+ dev_start=$(ls /dev/zram* | wc -w)
+ dev_end=$(($dev_start + $dev_num - 1))
+ sys_control=1
+
+ for i in $(seq $dev_start $dev_end); do
+ cat /sys/class/zram-control/hot_add > /dev/null
+ done
+
+ echo "all zram devices (/dev/zram$dev_start~$dev_end" \
+ "successfully created"
+ return 0
+ fi
- if [ "$dev_num_created" -ne "$dev_num" ]; then
- echo "unexpected num of devices: $dev_num_created"
- ERR_CODE=-1
+ # detect old kernel or built-in
+ modprobe zram num_devices=$dev_num
+ if [ ! -d "/sys/class/zram-control" ]; then
+ if grep -q '^zram' /proc/modules; then
+ rmmod zram > /dev/null 2>&1
+ if [ $? -ne 0 ]; then
+ echo "zram module is being used on old kernel" \
+ "without zram-control interface"
+ exit $ksft_skip
+ fi
else
- echo "zram load module successful"
+ echo "test needs CONFIG_ZRAM=m on old kernel without" \
+ "zram-control interface"
+ exit $ksft_skip
fi
- elif [ -b /dev/zram0 ]; then
- echo "/dev/zram0 device file found: OK"
- else
- echo "ERROR: No zram.ko module or no /dev/zram0 device found"
- echo "$TCID : CONFIG_ZRAM is not set"
- exit 1
+ modprobe zram num_devices=$dev_num
fi
+
+ module_load=1
+ dev_end=$(($dev_num - 1))
+ echo "all zram devices (/dev/zram0~$dev_end) successfully created"
}
zram_max_streams()
{
echo "set max_comp_streams to zram device(s)"
- local i=0
+ kernel_gte 4.7
+ if [ $? -eq 0 ]; then
+ echo "The device attribute max_comp_streams was"\
+ "deprecated in 4.7"
+ return 0
+ fi
+
+ local i=$dev_start
for max_s in $zram_max_streams; do
local sys_path="/sys/block/zram${i}/max_comp_streams"
echo $max_s > $sys_path || \
@@ -98,7 +139,7 @@ zram_max_streams()
echo "FAIL can't set max_streams '$max_s', get $max_stream"
i=$(($i + 1))
- echo "$sys_path = '$max_streams' ($i/$dev_num)"
+ echo "$sys_path = '$max_streams'"
done
echo "zram max streams: OK"
@@ -108,15 +149,16 @@ zram_compress_alg()
{
echo "test that we can set compression algorithm"
- local algs=$(cat /sys/block/zram0/comp_algorithm)
+ local i=$dev_start
+ local algs=$(cat /sys/block/zram${i}/comp_algorithm)
echo "supported algs: $algs"
- local i=0
+
for alg in $zram_algs; do
local sys_path="/sys/block/zram${i}/comp_algorithm"
echo "$alg" > $sys_path || \
echo "FAIL can't set '$alg' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$alg' ($i/$dev_num)"
+ echo "$sys_path = '$alg'"
done
echo "zram set compression algorithm: OK"
@@ -125,14 +167,14 @@ zram_compress_alg()
zram_set_disksizes()
{
echo "set disk size to zram device(s)"
- local i=0
+ local i=$dev_start
for ds in $zram_sizes; do
local sys_path="/sys/block/zram${i}/disksize"
echo "$ds" > $sys_path || \
echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$ds' ($i/$dev_num)"
+ echo "$sys_path = '$ds'"
done
echo "zram set disksizes: OK"
@@ -142,14 +184,14 @@ zram_set_memlimit()
{
echo "set memory limit to zram device(s)"
- local i=0
+ local i=$dev_start
for ds in $zram_mem_limits; do
local sys_path="/sys/block/zram${i}/mem_limit"
echo "$ds" > $sys_path || \
echo "FAIL can't set '$ds' to $sys_path"
i=$(($i + 1))
- echo "$sys_path = '$ds' ($i/$dev_num)"
+ echo "$sys_path = '$ds'"
done
echo "zram set memory limit: OK"
@@ -158,8 +200,8 @@ zram_set_memlimit()
zram_makeswap()
{
echo "make swap with zram device(s)"
- local i=0
- for i in $(seq 0 $(($dev_num - 1))); do
+ local i=$dev_start
+ for i in $(seq $dev_start $dev_end); do
mkswap /dev/zram$i > err.log 2>&1
if [ $? -ne 0 ]; then
cat err.log
@@ -182,7 +224,7 @@ zram_makeswap()
zram_swapoff()
{
local i=
- for i in $(seq 0 $dev_makeswap); do
+ for i in $(seq $dev_start $dev_end); do
swapoff /dev/zram$i > err.log 2>&1
if [ $? -ne 0 ]; then
cat err.log
@@ -196,7 +238,7 @@ zram_swapoff()
zram_makefs()
{
- local i=0
+ local i=$dev_start
for fs in $zram_filesystems; do
# if requested fs not supported default it to ext2
which mkfs.$fs > /dev/null 2>&1 || fs=ext2
@@ -215,7 +257,7 @@ zram_makefs()
zram_mount()
{
local i=0
- for i in $(seq 0 $(($dev_num - 1))); do
+ for i in $(seq $dev_start $dev_end); do
echo "mount /dev/zram$i"
mkdir zram$i
mount /dev/zram$i zram$i > /dev/null || \